-> import numpy as np
(Pdb) num examples: 111000
loss: 5.145186901092529,grad_norm: 0.9999998904990341, iteration: 1
loss: 4.95803165435791,grad_norm: 0.9999999429872446, iteration: 2
loss: 5.307826995849609,grad_norm: 0.9999998931343625, iteration: 3
loss: 5.379916191101074,grad_norm: 0.9999998610241115, iteration: 4
loss: 5.2271599769592285,grad_norm: 0.999999894936629, iteration: 5
loss: 5.059325218200684,grad_norm: 0.999999962755791, iteration: 6
loss: 5.110374450683594,grad_norm: 0.9999999271327573, iteration: 7
loss: 5.104665756225586,grad_norm: 0.9999998961298109, iteration: 8
loss: 5.331845760345459,grad_norm: 0.9999999274959281, iteration: 9
loss: 5.2206830978393555,grad_norm: 0.9999999061394305, iteration: 10
loss: 5.146205902099609,grad_norm: 0.9999999554388257, iteration: 11
loss: 5.1480889320373535,grad_norm: 0.9999998169956464, iteration: 12
loss: 5.325060844421387,grad_norm: 0.9999998365337923, iteration: 13
loss: 5.148538589477539,grad_norm: 0.9999999605466983, iteration: 14
loss: 5.171293258666992,grad_norm: 0.9999999664323469, iteration: 15
loss: 5.223740100860596,grad_norm: 0.9999998646238273, iteration: 16
loss: 5.034583568572998,grad_norm: 0.9999998712933953, iteration: 17
loss: 4.988928318023682,grad_norm: 0.999999961178344, iteration: 18
loss: 5.152886390686035,grad_norm: 0.9999999297956332, iteration: 19
loss: 5.212274074554443,grad_norm: 0.9999999210975347, iteration: 20
loss: 5.046914100646973,grad_norm: 0.9999998891341287, iteration: 21
loss: 5.117532253265381,grad_norm: 0.9999999241802572, iteration: 22
loss: 5.301633358001709,grad_norm: 0.9999999410158711, iteration: 23
loss: 5.091897964477539,grad_norm: 0.9999999309926297, iteration: 24
loss: 5.056726455688477,grad_norm: 0.9999999328045939, iteration: 25
loss: 5.302924156188965,grad_norm: 0.9999999974419294, iteration: 26
loss: 5.004168510437012,grad_norm: 0.9999999793392623, iteration: 27
loss: 5.176860332489014,grad_norm: 0.9999999862363282, iteration: 28
loss: 4.9341254234313965,grad_norm: 0.9999998328484012, iteration: 29
loss: 5.127638339996338,grad_norm: 0.9999998941902722, iteration: 30
loss: 5.060390949249268,grad_norm: 0.9999998305436225, iteration: 31
loss: 4.929332733154297,grad_norm: 0.9999999029717288, iteration: 32
loss: 5.030551433563232,grad_norm: 0.9999998889372119, iteration: 33
loss: 4.9312744140625,grad_norm: 0.999999963612085, iteration: 34
loss: 5.053755283355713,grad_norm: 0.9999999653978019, iteration: 35
loss: 4.887061595916748,grad_norm: 0.999999885801416, iteration: 36
loss: 5.115194320678711,grad_norm: 0.9999999278398698, iteration: 37
loss: 4.989235877990723,grad_norm: 0.9999998586766191, iteration: 38
loss: 5.224584579467773,grad_norm: 0.9999998752078424, iteration: 39
loss: 5.056591510772705,grad_norm: 0.999999867210848, iteration: 40
loss: 4.812046051025391,grad_norm: 0.9999998850964805, iteration: 41
loss: 5.001852989196777,grad_norm: 0.999999871426743, iteration: 42
loss: 5.1537885665893555,grad_norm: 0.9999999318155488, iteration: 43
loss: 4.964253902435303,grad_norm: 0.9999999317234991, iteration: 44
loss: 5.079248905181885,grad_norm: 0.999999973153236, iteration: 45
loss: 4.918365478515625,grad_norm: 0.9999998785755545, iteration: 46
loss: 5.064563274383545,grad_norm: 0.9999999824307804, iteration: 47
loss: 4.970661163330078,grad_norm: 0.9999998369212371, iteration: 48
loss: 4.758288383483887,grad_norm: 1.0000000005926735, iteration: 49
loss: 5.033345699310303,grad_norm: 0.9999998745923531, iteration: 50
loss: 4.842634201049805,grad_norm: 0.9999999228778499, iteration: 51
loss: 4.940277576446533,grad_norm: 0.9999999271257509, iteration: 52
loss: 5.023684024810791,grad_norm: 0.9999998384366648, iteration: 53
loss: 4.995048522949219,grad_norm: 0.9999999615273429, iteration: 54
loss: 4.65927791595459,grad_norm: 0.9999998528891746, iteration: 55
loss: 5.0487284660339355,grad_norm: 0.9999999760890044, iteration: 56
loss: 4.996364593505859,grad_norm: 0.999999882036113, iteration: 57
loss: 4.615342617034912,grad_norm: 0.9999999551852843, iteration: 58
loss: 4.884873390197754,grad_norm: 0.9999999001553973, iteration: 59
loss: 4.797818183898926,grad_norm: 0.9999999134911592, iteration: 60
loss: 4.887596607208252,grad_norm: 0.9999999035395691, iteration: 61
loss: 4.7917914390563965,grad_norm: 0.9999998999743538, iteration: 62
loss: 4.681645393371582,grad_norm: 0.9999999081005203, iteration: 63
loss: 4.979908466339111,grad_norm: 0.9999999458189054, iteration: 64
loss: 4.739100933074951,grad_norm: 0.9999998488765955, iteration: 65
loss: 4.855643272399902,grad_norm: 0.9999999571633469, iteration: 66
loss: 4.7795515060424805,grad_norm: 0.999999922463241, iteration: 67
loss: 4.667335033416748,grad_norm: 0.9999998849387858, iteration: 68
loss: 4.875817775726318,grad_norm: 0.9999998765283713, iteration: 69
loss: 4.717021942138672,grad_norm: 0.9999999029773954, iteration: 70
loss: 4.935865879058838,grad_norm: 0.9999999066356334, iteration: 71
loss: 4.779294967651367,grad_norm: 0.999999984501568, iteration: 72
loss: 4.861382484436035,grad_norm: 0.9999999119997338, iteration: 73
loss: 4.8587422370910645,grad_norm: 1.0000000284749155, iteration: 74
loss: 4.824520587921143,grad_norm: 1.0000000097881527, iteration: 75
loss: 4.593373775482178,grad_norm: 0.9999998798790477, iteration: 76
loss: 4.819086074829102,grad_norm: 0.9999999450650903, iteration: 77
loss: 4.884719371795654,grad_norm: 0.9999999441128882, iteration: 78
loss: 4.592489242553711,grad_norm: 0.9999998864520153, iteration: 79
loss: 4.758527755737305,grad_norm: 0.9999999849154019, iteration: 80
loss: 4.742068767547607,grad_norm: 0.9999999087628714, iteration: 81
loss: 4.774417877197266,grad_norm: 1.0000000035612175, iteration: 82
loss: 4.8764448165893555,grad_norm: 0.999999936255245, iteration: 83
loss: 4.8067755699157715,grad_norm: 0.9999999100665398, iteration: 84
loss: 4.72320032119751,grad_norm: 0.9999999609370944, iteration: 85
loss: 4.807821273803711,grad_norm: 0.9999999422791704, iteration: 86
loss: 4.681207656860352,grad_norm: 0.9999999457189178, iteration: 87
loss: 4.654062747955322,grad_norm: 0.999999987507232, iteration: 88
loss: 4.600102424621582,grad_norm: 0.9999999003710223, iteration: 89
loss: 4.552317142486572,grad_norm: 0.9999998842263798, iteration: 90
loss: 4.567212104797363,grad_norm: 0.9999999568015057, iteration: 91
loss: 4.704402446746826,grad_norm: 0.999999926029521, iteration: 92
loss: 4.43303108215332,grad_norm: 0.9999999076591815, iteration: 93
loss: 4.6546430587768555,grad_norm: 0.9999999910525876, iteration: 94
loss: 4.546759605407715,grad_norm: 0.999999956748045, iteration: 95
loss: 4.68129301071167,grad_norm: 0.9999999112353984, iteration: 96
loss: 4.688084602355957,grad_norm: 0.9999999466795162, iteration: 97
loss: 4.745823860168457,grad_norm: 0.9999998848760707, iteration: 98
loss: 4.691633224487305,grad_norm: 0.9999999738915859, iteration: 99
loss: 4.438426971435547,grad_norm: 0.9999999114125774, iteration: 100
loss: 4.417684555053711,grad_norm: 0.9999999179796594, iteration: 101
loss: 4.746514797210693,grad_norm: 0.999999930371362, iteration: 102
loss: 4.385605812072754,grad_norm: 0.9999998743487611, iteration: 103
loss: 4.461408615112305,grad_norm: 0.9999999322817711, iteration: 104
loss: 4.69069766998291,grad_norm: 0.9999999421434665, iteration: 105
loss: 4.365383148193359,grad_norm: 0.9999999493793222, iteration: 106
loss: 4.5002899169921875,grad_norm: 0.9999999144772573, iteration: 107
loss: 4.500433444976807,grad_norm: 0.9999998739051883, iteration: 108
loss: 4.554624080657959,grad_norm: 0.9999998639846324, iteration: 109
loss: 4.4500322341918945,grad_norm: 0.9999999132682542, iteration: 110
loss: 4.500711917877197,grad_norm: 0.9999999375770855, iteration: 111
loss: 4.515210151672363,grad_norm: 0.9999999685551549, iteration: 112
loss: 4.552913188934326,grad_norm: 0.9999998958504542, iteration: 113
loss: 4.65945291519165,grad_norm: 0.9999998985202828, iteration: 114
loss: 4.373650550842285,grad_norm: 0.999999880800684, iteration: 115
loss: 4.590122222900391,grad_norm: 0.9999998548368861, iteration: 116
loss: 4.435351848602295,grad_norm: 0.9999998996239695, iteration: 117
loss: 4.5636491775512695,grad_norm: 1.00000000282501, iteration: 118
loss: 4.451341152191162,grad_norm: 0.9999999134214707, iteration: 119
loss: 4.4597015380859375,grad_norm: 0.9999999387954891, iteration: 120
loss: 4.4584126472473145,grad_norm: 0.9999999947568603, iteration: 121
loss: 4.536342620849609,grad_norm: 0.9999999020342061, iteration: 122
loss: 4.489536762237549,grad_norm: 0.9999999404417595, iteration: 123
loss: 4.337063789367676,grad_norm: 0.9999998913392363, iteration: 124
loss: 4.322231292724609,grad_norm: 0.9999999135559862, iteration: 125
loss: 4.3654255867004395,grad_norm: 0.9999999378578815, iteration: 126
loss: 4.33677339553833,grad_norm: 0.9999999104847115, iteration: 127
loss: 4.4991679191589355,grad_norm: 0.9999999641347854, iteration: 128
loss: 4.403639316558838,grad_norm: 0.9999998373484459, iteration: 129
loss: 4.435474395751953,grad_norm: 0.9999999722958973, iteration: 130
loss: 4.624222755432129,grad_norm: 0.9999998518364241, iteration: 131
loss: 4.2547454833984375,grad_norm: 0.9999998486561528, iteration: 132
loss: 4.474911689758301,grad_norm: 0.9999998998889276, iteration: 133
loss: 4.431284427642822,grad_norm: 0.9999999083601293, iteration: 134
loss: 4.318924427032471,grad_norm: 0.9999999642202885, iteration: 135
loss: 4.379516124725342,grad_norm: 0.9999999241057679, iteration: 136
loss: 4.286143779754639,grad_norm: 0.9999998847902646, iteration: 137
loss: 4.354089260101318,grad_norm: 0.9999998783888462, iteration: 138
loss: 4.365725040435791,grad_norm: 0.9999998570438325, iteration: 139
loss: 4.4668684005737305,grad_norm: 0.9999999482010498, iteration: 140
loss: 4.526122093200684,grad_norm: 0.9999998558725005, iteration: 141
loss: 4.28798770904541,grad_norm: 0.9999998756450971, iteration: 142
loss: 4.245731830596924,grad_norm: 0.9999999785737893, iteration: 143
loss: 4.303350448608398,grad_norm: 0.9999998642659933, iteration: 144
loss: 4.358432292938232,grad_norm: 0.9999999606959251, iteration: 145
loss: 4.138339519500732,grad_norm: 0.9999999020368436, iteration: 146
loss: 4.402472972869873,grad_norm: 0.999999954624658, iteration: 147
loss: 4.282939434051514,grad_norm: 1.0000000138201555, iteration: 148
loss: 4.348522663116455,grad_norm: 0.9999999161662009, iteration: 149
loss: 4.2869038581848145,grad_norm: 0.9999999017362673, iteration: 150
loss: 4.251153945922852,grad_norm: 0.9999999034313231, iteration: 151
loss: 4.394387245178223,grad_norm: 0.9999999243750549, iteration: 152
loss: 4.501827239990234,grad_norm: 0.999999958372692, iteration: 153
loss: 4.358603477478027,grad_norm: 0.9999999773419436, iteration: 154
loss: 4.2611212730407715,grad_norm: 0.9999999217432964, iteration: 155
loss: 4.315613269805908,grad_norm: 0.9999998355290057, iteration: 156
loss: 4.189026355743408,grad_norm: 0.999999956091864, iteration: 157
loss: 4.31209135055542,grad_norm: 1.0000000224613526, iteration: 158
loss: 4.211429595947266,grad_norm: 0.9999999979339146, iteration: 159
loss: 4.229029178619385,grad_norm: 0.9999999157726274, iteration: 160
loss: 4.356024742126465,grad_norm: 0.9999998152042596, iteration: 161
loss: 4.2937469482421875,grad_norm: 0.9999999449723322, iteration: 162
loss: 4.212235450744629,grad_norm: 0.9999999088315824, iteration: 163
loss: 4.2583112716674805,grad_norm: 0.9999999300081047, iteration: 164
loss: 4.19994592666626,grad_norm: 0.9999998773799732, iteration: 165
loss: 4.192872047424316,grad_norm: 0.9999999454670684, iteration: 166
loss: 4.4049601554870605,grad_norm: 0.999999863055731, iteration: 167
loss: 4.253939151763916,grad_norm: 0.9999999231820773, iteration: 168
loss: 4.314979076385498,grad_norm: 0.9999999400791666, iteration: 169
loss: 4.266975402832031,grad_norm: 0.9999999402225743, iteration: 170
loss: 4.20697546005249,grad_norm: 0.9999998516131602, iteration: 171
loss: 4.198218822479248,grad_norm: 0.999999861927455, iteration: 172
loss: 4.3304667472839355,grad_norm: 0.9999998926620266, iteration: 173
loss: 4.122496128082275,grad_norm: 0.9999998825986848, iteration: 174
loss: 4.134541034698486,grad_norm: 0.9999997844210546, iteration: 175
loss: 4.3247389793396,grad_norm: 0.9999999475444885, iteration: 176
loss: 4.194174289703369,grad_norm: 0.999999866833348, iteration: 177
loss: 4.236432075500488,grad_norm: 0.9999998696972233, iteration: 178
loss: 4.232011795043945,grad_norm: 0.9999999279237561, iteration: 179
loss: 4.225192546844482,grad_norm: 0.9999999336662672, iteration: 180
loss: 4.208765506744385,grad_norm: 0.9999998597304084, iteration: 181
loss: 4.287848949432373,grad_norm: 0.9999999001272659, iteration: 182
loss: 4.06856632232666,grad_norm: 0.9999998374080521, iteration: 183
loss: 4.1530280113220215,grad_norm: 0.9999998556356718, iteration: 184
loss: 4.08380651473999,grad_norm: 0.9999999554695737, iteration: 185
loss: 4.1423516273498535,grad_norm: 0.9999999036575425, iteration: 186
loss: 4.203484058380127,grad_norm: 0.9999998994614635, iteration: 187
loss: 4.239559650421143,grad_norm: 0.9999998856642454, iteration: 188
loss: 4.160285472869873,grad_norm: 0.9999999292543333, iteration: 189
loss: 4.179233551025391,grad_norm: 0.9999998925063567, iteration: 190
loss: 4.151418209075928,grad_norm: 0.999999903240938, iteration: 191
loss: 4.10987663269043,grad_norm: 0.9999998623435797, iteration: 192
loss: 4.229039192199707,grad_norm: 0.9999998399688734, iteration: 193
loss: 4.067734718322754,grad_norm: 0.9999999333510592, iteration: 194
loss: 4.111753940582275,grad_norm: 0.999999922642057, iteration: 195
loss: 4.214545726776123,grad_norm: 0.9999999458237685, iteration: 196
loss: 4.119431495666504,grad_norm: 0.9999999369270888, iteration: 197
loss: 4.228870391845703,grad_norm: 0.9999998573548278, iteration: 198
loss: 4.058801174163818,grad_norm: 0.999999886208355, iteration: 199
loss: 4.091320991516113,grad_norm: 0.9999999015843317, iteration: 200
loss: 4.126826286315918,grad_norm: 0.9999998654889557, iteration: 201
loss: 4.30449914932251,grad_norm: 0.9999999337164921, iteration: 202
loss: 4.253863334655762,grad_norm: 0.9999998894832579, iteration: 203
loss: 4.211269378662109,grad_norm: 0.9999998720511858, iteration: 204
loss: 4.154618740081787,grad_norm: 0.9999998344533856, iteration: 205
loss: 4.12005615234375,grad_norm: 0.9999998682823226, iteration: 206
loss: 4.081786632537842,grad_norm: 0.9999998453250262, iteration: 207
loss: 4.127992153167725,grad_norm: 0.999999895632834, iteration: 208
loss: 4.153850555419922,grad_norm: 0.999999982469136, iteration: 209
loss: 4.1659674644470215,grad_norm: 0.9999998175325537, iteration: 210
loss: 4.131505966186523,grad_norm: 0.9999998400121969, iteration: 211
loss: 4.120192527770996,grad_norm: 0.9999998728291782, iteration: 212
loss: 4.139400482177734,grad_norm: 0.9999999365870675, iteration: 213
loss: 4.12650203704834,grad_norm: 0.999999842579888, iteration: 214
loss: 4.181677341461182,grad_norm: 0.999999910317169, iteration: 215
loss: 4.076742172241211,grad_norm: 0.9999998328224645, iteration: 216
loss: 4.08585262298584,grad_norm: 0.9999998458693686, iteration: 217
loss: 4.228649616241455,grad_norm: 0.9999999024363495, iteration: 218
loss: 4.171082019805908,grad_norm: 0.9999999050309516, iteration: 219
loss: 4.157576560974121,grad_norm: 0.9999999137407374, iteration: 220
loss: 4.207033157348633,grad_norm: 0.9999998606718754, iteration: 221
loss: 4.154508113861084,grad_norm: 0.9999998438353775, iteration: 222
loss: 4.208817005157471,grad_norm: 0.9999998667651663, iteration: 223
loss: 4.157933712005615,grad_norm: 0.9999998584690115, iteration: 224
loss: 4.120482444763184,grad_norm: 0.999999919931699, iteration: 225
loss: 4.184006690979004,grad_norm: 0.9999998606714664, iteration: 226
loss: 4.15860652923584,grad_norm: 0.999999824860256, iteration: 227
loss: 4.177812576293945,grad_norm: 0.999999942806205, iteration: 228
loss: 4.157991409301758,grad_norm: 0.9999999158026919, iteration: 229
loss: 4.061161041259766,grad_norm: 0.9999998135150169, iteration: 230
loss: 4.147021770477295,grad_norm: 0.9999999593617988, iteration: 231
loss: 4.022837162017822,grad_norm: 0.9999998829918569, iteration: 232
loss: 4.066338539123535,grad_norm: 0.9999999100942265, iteration: 233
loss: 4.055342197418213,grad_norm: 0.9999999501073455, iteration: 234
loss: 4.031569480895996,grad_norm: 0.9999999723231152, iteration: 235
loss: 4.2203803062438965,grad_norm: 0.9999998619163906, iteration: 236
loss: 4.014093399047852,grad_norm: 0.9999998659762013, iteration: 237
loss: 4.149621486663818,grad_norm: 0.9999999157945411, iteration: 238
loss: 4.122856616973877,grad_norm: 0.9999998955961007, iteration: 239
loss: 4.09576940536499,grad_norm: 0.9999999162770579, iteration: 240
loss: 4.14110803604126,grad_norm: 0.9999998808295908, iteration: 241
loss: 4.115131378173828,grad_norm: 0.9999999492238664, iteration: 242
loss: 3.9719667434692383,grad_norm: 0.9999998989652559, iteration: 243
loss: 4.125085830688477,grad_norm: 0.999999945049056, iteration: 244
loss: 4.128687381744385,grad_norm: 0.9999998667264802, iteration: 245
loss: 4.222229480743408,grad_norm: 0.9999998690993566, iteration: 246
loss: 4.098514080047607,grad_norm: 0.9999998661122564, iteration: 247
loss: 4.055154800415039,grad_norm: 0.9999998657791032, iteration: 248
loss: 4.104581356048584,grad_norm: 0.9999998451637442, iteration: 249
loss: 4.0752692222595215,grad_norm: 0.9999998666511217, iteration: 250
loss: 4.118051052093506,grad_norm: 0.9999998921369011, iteration: 251
loss: 4.137966632843018,grad_norm: 0.9999999147327705, iteration: 252
loss: 4.058185577392578,grad_norm: 0.9999998629570422, iteration: 253
loss: 4.2026047706604,grad_norm: 0.9999998166601505, iteration: 254
loss: 4.144179821014404,grad_norm: 0.9999998767127823, iteration: 255
loss: 3.982530355453491,grad_norm: 0.9999998923086614, iteration: 256
loss: 4.085280895233154,grad_norm: 0.9999999016634887, iteration: 257
loss: 4.042885780334473,grad_norm: 0.9999999579095556, iteration: 258
loss: 4.069759368896484,grad_norm: 0.9999998746156558, iteration: 259
loss: 4.2130560874938965,grad_norm: 0.9999998692581777, iteration: 260
loss: 4.03562593460083,grad_norm: 0.9999998424423333, iteration: 261
loss: 3.9984638690948486,grad_norm: 0.9999998794696597, iteration: 262
loss: 4.034770488739014,grad_norm: 0.9999998110027805, iteration: 263
loss: 4.1192779541015625,grad_norm: 0.999999908166405, iteration: 264
loss: 4.14413595199585,grad_norm: 0.9999998223723789, iteration: 265
loss: 4.030156135559082,grad_norm: 0.9999998713424825, iteration: 266
loss: 4.049449443817139,grad_norm: 0.9999998505680183, iteration: 267
loss: 4.0784735679626465,grad_norm: 0.9999998787542821, iteration: 268
loss: 4.059215068817139,grad_norm: 0.9999997980938186, iteration: 269
loss: 4.275557994842529,grad_norm: 0.9999999288897541, iteration: 270
loss: 4.014976978302002,grad_norm: 0.9999998702846746, iteration: 271
loss: 4.118715286254883,grad_norm: 0.9999999206047708, iteration: 272
loss: 4.003195285797119,grad_norm: 0.999999838672371, iteration: 273
loss: 4.080618858337402,grad_norm: 0.9999998498549875, iteration: 274
loss: 4.1003594398498535,grad_norm: 0.9999999304704587, iteration: 275
loss: 4.206331253051758,grad_norm: 0.9999999119260945, iteration: 276
loss: 4.048251152038574,grad_norm: 0.9999998551294109, iteration: 277
loss: 4.120999336242676,grad_norm: 0.9999998628344863, iteration: 278
loss: 4.163815498352051,grad_norm: 0.9999999289617029, iteration: 279
loss: 4.0012431144714355,grad_norm: 0.9999999057839902, iteration: 280
loss: 4.204509735107422,grad_norm: 0.9999998710652023, iteration: 281
loss: 4.1203742027282715,grad_norm: 0.9999998334414734, iteration: 282
loss: 4.040576457977295,grad_norm: 0.9999999183298263, iteration: 283
loss: 3.9486844539642334,grad_norm: 0.9999998129583892, iteration: 284
loss: 4.109046936035156,grad_norm: 0.9999998665287811, iteration: 285
loss: 4.136137962341309,grad_norm: 0.9999998032266725, iteration: 286
loss: 4.122503757476807,grad_norm: 0.9999998880832716, iteration: 287
loss: 3.932882785797119,grad_norm: 0.9999998675852583, iteration: 288
loss: 4.086562156677246,grad_norm: 0.999999740575672, iteration: 289
loss: 4.067445755004883,grad_norm: 0.9999998974731874, iteration: 290
loss: 3.9706454277038574,grad_norm: 0.9999998981610956, iteration: 291
loss: 4.055514812469482,grad_norm: 0.9999998546412341, iteration: 292
loss: 4.1975297927856445,grad_norm: 0.9999998943233338, iteration: 293
loss: 4.048206806182861,grad_norm: 0.9999999610431912, iteration: 294
loss: 4.185693264007568,grad_norm: 0.9999998878408689, iteration: 295
loss: 4.0022687911987305,grad_norm: 0.9999998135507313, iteration: 296
loss: 4.115936756134033,grad_norm: 0.9999998991020466, iteration: 297
loss: 4.01103401184082,grad_norm: 0.9999998255473866, iteration: 298
loss: 4.210887908935547,grad_norm: 0.9999998587494859, iteration: 299
loss: 4.0959930419921875,grad_norm: 0.9999998275010501, iteration: 300
loss: 3.9912078380584717,grad_norm: 0.9999998178680145, iteration: 301
loss: 4.080661296844482,grad_norm: 0.9999998276599139, iteration: 302
loss: 4.047914505004883,grad_norm: 0.9999999134544817, iteration: 303
loss: 4.070131301879883,grad_norm: 0.9999998825658405, iteration: 304
loss: 4.121669292449951,grad_norm: 0.9999998248049426, iteration: 305
loss: 4.118167877197266,grad_norm: 0.9999998958985405, iteration: 306
loss: 4.066779136657715,grad_norm: 0.9999998377048926, iteration: 307
loss: 4.137661457061768,grad_norm: 0.9999999062254736, iteration: 308
loss: 4.123294353485107,grad_norm: 0.9999998144005605, iteration: 309
loss: 4.044161319732666,grad_norm: 0.9999998065333272, iteration: 310
loss: 3.942857503890991,grad_norm: 0.9999998564464383, iteration: 311
loss: 4.0433573722839355,grad_norm: 0.9999998261380595, iteration: 312
loss: 4.05468225479126,grad_norm: 0.9999998385924578, iteration: 313
loss: 4.088902473449707,grad_norm: 0.9999998982573957, iteration: 314
loss: 3.9463770389556885,grad_norm: 0.999999867385828, iteration: 315
loss: 3.97570538520813,grad_norm: 0.9999998517402195, iteration: 316
loss: 4.048732280731201,grad_norm: 0.9999998559155259, iteration: 317
loss: 4.073469161987305,grad_norm: 0.9999997789250041, iteration: 318
loss: 4.044766426086426,grad_norm: 0.9999998144781425, iteration: 319
loss: 3.964723587036133,grad_norm: 0.9999998258410931, iteration: 320
loss: 4.100979804992676,grad_norm: 0.9999998532161317, iteration: 321
loss: 3.9627490043640137,grad_norm: 0.9999999017965042, iteration: 322
loss: 4.166131496429443,grad_norm: 0.999999854534426, iteration: 323
loss: 3.9936389923095703,grad_norm: 0.9999998522446489, iteration: 324
loss: 4.124545574188232,grad_norm: 0.9999998690848341, iteration: 325
loss: 4.095064640045166,grad_norm: 0.9999997714468354, iteration: 326
loss: 3.9613678455352783,grad_norm: 0.9999998633741832, iteration: 327
loss: 4.032707691192627,grad_norm: 0.9999998914067163, iteration: 328
loss: 4.022679328918457,grad_norm: 0.9999998947676332, iteration: 329
loss: 4.082589626312256,grad_norm: 0.9999998288299213, iteration: 330
loss: 4.079856872558594,grad_norm: 0.9999998078789966, iteration: 331
loss: 4.1257004737854,grad_norm: 0.9999998678006712, iteration: 332
loss: 3.997006893157959,grad_norm: 0.9999998266816135, iteration: 333
loss: 4.012644290924072,grad_norm: 0.999999805617281, iteration: 334
loss: 3.9992682933807373,grad_norm: 0.9999998211188743, iteration: 335
loss: 4.09398078918457,grad_norm: 0.9999998954549211, iteration: 336
loss: 4.005422115325928,grad_norm: 0.9999999115365152, iteration: 337
loss: 4.023386478424072,grad_norm: 0.9999999403332571, iteration: 338
loss: 4.11444616317749,grad_norm: 0.9999998626410019, iteration: 339
loss: 4.002342700958252,grad_norm: 0.9999998350777958, iteration: 340
loss: 4.073618412017822,grad_norm: 0.9999998258402164, iteration: 341
loss: 3.9940168857574463,grad_norm: 0.9999999183291156, iteration: 342
loss: 4.028186321258545,grad_norm: 0.9999998852928873, iteration: 343
loss: 4.03973388671875,grad_norm: 0.9999999160542489, iteration: 344
loss: 3.959885835647583,grad_norm: 0.9999998386878856, iteration: 345
loss: 3.9935171604156494,grad_norm: 0.9999998980177325, iteration: 346
loss: 3.997309446334839,grad_norm: 0.9999999172445954, iteration: 347
loss: 4.032121658325195,grad_norm: 0.9999998264826724, iteration: 348
loss: 3.99345326423645,grad_norm: 0.9999998165358195, iteration: 349
loss: 4.017726898193359,grad_norm: 0.9999997861435319, iteration: 350
loss: 4.036339282989502,grad_norm: 0.9999998997965007, iteration: 351
loss: 4.058590888977051,grad_norm: 0.9999997890097226, iteration: 352
loss: 4.04904842376709,grad_norm: 0.9999998895061082, iteration: 353
loss: 4.050704479217529,grad_norm: 0.9999998024068214, iteration: 354
loss: 4.057185173034668,grad_norm: 0.999999856932024, iteration: 355
loss: 4.007222652435303,grad_norm: 0.9999998661003128, iteration: 356
loss: 4.025511264801025,grad_norm: 0.9999997918793538, iteration: 357
loss: 3.9200966358184814,grad_norm: 0.9999998475389184, iteration: 358
loss: 4.079366683959961,grad_norm: 0.9999998175670539, iteration: 359
loss: 4.069483757019043,grad_norm: 0.9999997799509425, iteration: 360
loss: 3.989816904067993,grad_norm: 0.9999998159110257, iteration: 361
loss: 3.975127935409546,grad_norm: 0.9999998726865383, iteration: 362
loss: 4.047036170959473,grad_norm: 0.9999998287816058, iteration: 363
loss: 4.1369547843933105,grad_norm: 0.9999999166658798, iteration: 364
loss: 4.065390586853027,grad_norm: 0.9999998758676907, iteration: 365
loss: 4.084334373474121,grad_norm: 0.9999999196547118, iteration: 366
loss: 3.9943974018096924,grad_norm: 0.9999999079309915, iteration: 367
loss: 4.01481819152832,grad_norm: 0.9999998899036558, iteration: 368
loss: 4.069361686706543,grad_norm: 0.999999911479478, iteration: 369
loss: 4.010307788848877,grad_norm: 0.9999998831237936, iteration: 370
loss: 4.013782024383545,grad_norm: 0.9999997915989849, iteration: 371
loss: 4.073570728302002,grad_norm: 0.9999998097244855, iteration: 372
loss: 4.018157005310059,grad_norm: 0.9999998995392848, iteration: 373
loss: 3.9857828617095947,grad_norm: 0.9999998928830283, iteration: 374
loss: 4.066500663757324,grad_norm: 0.9999998177424488, iteration: 375
loss: 3.9481914043426514,grad_norm: 0.9999999209023903, iteration: 376
loss: 4.09687614440918,grad_norm: 0.9999999088700668, iteration: 377
loss: 3.924483299255371,grad_norm: 0.9999998788567297, iteration: 378
loss: 3.981846332550049,grad_norm: 0.9999998072527414, iteration: 379
loss: 4.083309650421143,grad_norm: 0.999999895929946, iteration: 380
loss: 3.953178644180298,grad_norm: 0.9999997924213132, iteration: 381
loss: 3.9678688049316406,grad_norm: 0.9999998199693682, iteration: 382
loss: 4.083194255828857,grad_norm: 0.9999998738633255, iteration: 383
loss: 4.013538837432861,grad_norm: 0.9999998709991255, iteration: 384
loss: 4.091029644012451,grad_norm: 0.9999999311627331, iteration: 385
loss: 4.054237365722656,grad_norm: 0.9999997995825078, iteration: 386
loss: 4.097343921661377,grad_norm: 0.9999998355560238, iteration: 387
loss: 3.9806759357452393,grad_norm: 0.9999998392802146, iteration: 388
loss: 3.9767534732818604,grad_norm: 0.9999998314817721, iteration: 389
loss: 4.099365234375,grad_norm: 0.9999998986143994, iteration: 390
loss: 4.029598236083984,grad_norm: 0.9999998699526472, iteration: 391
loss: 4.001938819885254,grad_norm: 0.9999999093558098, iteration: 392
loss: 3.973010778427124,grad_norm: 0.9999999425943201, iteration: 393
loss: 4.028473377227783,grad_norm: 0.999999810426825, iteration: 394
loss: 4.040422439575195,grad_norm: 0.9999998405349274, iteration: 395
loss: 4.157429218292236,grad_norm: 0.999999940937531, iteration: 396
loss: 4.027502059936523,grad_norm: 0.9999998737642072, iteration: 397
loss: 3.9697723388671875,grad_norm: 0.9999997816812217, iteration: 398
loss: 3.9720630645751953,grad_norm: 0.999999866426257, iteration: 399
loss: 3.9733781814575195,grad_norm: 0.9999998069576351, iteration: 400
loss: 3.933745861053467,grad_norm: 0.999999884698066, iteration: 401
loss: 3.946380376815796,grad_norm: 0.9999998368347717, iteration: 402
loss: 3.9659268856048584,grad_norm: 0.9999998360998626, iteration: 403
loss: 4.076842784881592,grad_norm: 0.9999997986309439, iteration: 404
loss: 3.937270402908325,grad_norm: 0.9999997954159151, iteration: 405
loss: 4.061174392700195,grad_norm: 0.9999998662960125, iteration: 406
loss: 3.9955451488494873,grad_norm: 0.9999999065851193, iteration: 407
loss: 3.9508426189422607,grad_norm: 0.9999998176316106, iteration: 408
loss: 3.8943819999694824,grad_norm: 0.9999999133503037, iteration: 409
loss: 3.9307336807250977,grad_norm: 0.9999998189745812, iteration: 410
loss: 4.019140243530273,grad_norm: 0.9999998613959094, iteration: 411
loss: 4.018837928771973,grad_norm: 0.9999997794515689, iteration: 412
loss: 4.053635120391846,grad_norm: 0.9999998281369328, iteration: 413
loss: 3.9559290409088135,grad_norm: 0.9999997387809839, iteration: 414
loss: 4.0453901290893555,grad_norm: 0.9999998171946799, iteration: 415
loss: 3.9012410640716553,grad_norm: 0.9999997861619025, iteration: 416
loss: 4.040402889251709,grad_norm: 0.9999998317319112, iteration: 417
loss: 3.8949599266052246,grad_norm: 0.9999998773318602, iteration: 418
loss: 4.042322158813477,grad_norm: 0.9999998239101079, iteration: 419
loss: 3.90665864944458,grad_norm: 0.9999998802389891, iteration: 420
loss: 3.901240348815918,grad_norm: 0.9999998196400771, iteration: 421
loss: 3.9637656211853027,grad_norm: 0.999999817158442, iteration: 422
loss: 4.00526762008667,grad_norm: 0.9999998413003754, iteration: 423
loss: 3.976097583770752,grad_norm: 0.9999998109093735, iteration: 424
loss: 3.8900673389434814,grad_norm: 0.999999825729937, iteration: 425
loss: 4.052694797515869,grad_norm: 0.9999998324995519, iteration: 426
loss: 3.9355266094207764,grad_norm: 0.9999998490671849, iteration: 427
loss: 3.880253314971924,grad_norm: 0.9999998145559186, iteration: 428
loss: 3.9827258586883545,grad_norm: 0.999999766130167, iteration: 429
loss: 4.035406112670898,grad_norm: 0.9999998784680314, iteration: 430
loss: 4.047994613647461,grad_norm: 0.9999998480693476, iteration: 431
loss: 3.9945716857910156,grad_norm: 0.9999998843034767, iteration: 432
loss: 3.9899652004241943,grad_norm: 0.9999998203210724, iteration: 433
loss: 3.9145102500915527,grad_norm: 0.9999997900800335, iteration: 434
loss: 3.845975399017334,grad_norm: 0.9999998366036248, iteration: 435
loss: 3.968364953994751,grad_norm: 0.9999998008699592, iteration: 436
loss: 3.9720489978790283,grad_norm: 0.9999999042059521, iteration: 437
loss: 3.9995110034942627,grad_norm: 0.9999998334376639, iteration: 438
loss: 3.9221808910369873,grad_norm: 0.9999998391526795, iteration: 439
loss: 3.9809725284576416,grad_norm: 0.999999928667245, iteration: 440
loss: 4.029359340667725,grad_norm: 0.999999879526826, iteration: 441
loss: 3.9490597248077393,grad_norm: 0.9999998948836623, iteration: 442
loss: 4.018968105316162,grad_norm: 0.9999998799814116, iteration: 443
loss: 4.0448689460754395,grad_norm: 0.999999814821586, iteration: 444
loss: 3.9738216400146484,grad_norm: 0.9999998441419, iteration: 445
loss: 4.04030704498291,grad_norm: 0.9999998646977377, iteration: 446
loss: 4.079193592071533,grad_norm: 0.999999810922524, iteration: 447
loss: 4.043125152587891,grad_norm: 0.9999999715351121, iteration: 448
loss: 3.9452080726623535,grad_norm: 0.999999883063089, iteration: 449
loss: 3.92333722114563,grad_norm: 0.9999998003897788, iteration: 450
loss: 3.995518445968628,grad_norm: 0.9999998125191653, iteration: 451
loss: 3.9577207565307617,grad_norm: 0.999999855632803, iteration: 452
loss: 3.9730656147003174,grad_norm: 0.9999998538271074, iteration: 453
loss: 4.00508975982666,grad_norm: 0.9999998397833537, iteration: 454
loss: 3.97772216796875,grad_norm: 0.9999998539305975, iteration: 455
loss: 3.97857666015625,grad_norm: 0.9999998016714442, iteration: 456
loss: 3.9478752613067627,grad_norm: 0.999999848347065, iteration: 457
loss: 3.9586479663848877,grad_norm: 0.9999998322558634, iteration: 458
loss: 3.8974108695983887,grad_norm: 0.9999998856832228, iteration: 459
loss: 3.9622230529785156,grad_norm: 0.9999998137309967, iteration: 460
loss: 3.9049136638641357,grad_norm: 0.9999998470255111, iteration: 461
loss: 3.9564871788024902,grad_norm: 0.9999998571519007, iteration: 462
loss: 4.0869221687316895,grad_norm: 0.9999998631920135, iteration: 463
loss: 4.024537563323975,grad_norm: 0.9999998825092439, iteration: 464
loss: 3.916120767593384,grad_norm: 0.9999998817328689, iteration: 465
loss: 4.002708911895752,grad_norm: 0.9999997960058713, iteration: 466
loss: 3.948209524154663,grad_norm: 0.9999998708474255, iteration: 467
loss: 3.8997325897216797,grad_norm: 0.9999997722414827, iteration: 468
loss: 3.9290030002593994,grad_norm: 0.9999998464727557, iteration: 469
loss: 3.9800682067871094,grad_norm: 0.9999998822280403, iteration: 470
loss: 3.9788339138031006,grad_norm: 0.9999998868251081, iteration: 471
loss: 3.991121292114258,grad_norm: 0.9999998483346355, iteration: 472
loss: 3.917890787124634,grad_norm: 0.9999998947312521, iteration: 473
loss: 4.013080596923828,grad_norm: 0.9999997308587327, iteration: 474
loss: 3.8897242546081543,grad_norm: 0.9999998233188633, iteration: 475
loss: 3.969041109085083,grad_norm: 0.9999997913345746, iteration: 476
loss: 3.939349412918091,grad_norm: 0.999999774468992, iteration: 477
loss: 3.905869722366333,grad_norm: 0.9999998821282847, iteration: 478
loss: 3.8494033813476562,grad_norm: 0.9999998145549261, iteration: 479
loss: 3.911127805709839,grad_norm: 0.9999998075891229, iteration: 480
loss: 3.9631166458129883,grad_norm: 0.9999997996819301, iteration: 481
loss: 3.890974760055542,grad_norm: 0.9999998090523914, iteration: 482
loss: 3.9639596939086914,grad_norm: 0.9999999096611154, iteration: 483
loss: 3.9713785648345947,grad_norm: 0.9999998238365347, iteration: 484
loss: 3.8707849979400635,grad_norm: 0.99999987655005, iteration: 485
loss: 3.992342948913574,grad_norm: 0.9999997932182964, iteration: 486
loss: 3.9627254009246826,grad_norm: 0.9999998405243344, iteration: 487
loss: 4.039791584014893,grad_norm: 0.9999998342004301, iteration: 488
loss: 3.910719394683838,grad_norm: 0.9999997906524655, iteration: 489
loss: 3.9432387351989746,grad_norm: 0.9999998394201149, iteration: 490
loss: 3.856374979019165,grad_norm: 0.999999878770515, iteration: 491
loss: 3.952890396118164,grad_norm: 0.9999998175609667, iteration: 492
loss: 3.9038333892822266,grad_norm: 0.999999931021525, iteration: 493
loss: 3.961285352706909,grad_norm: 0.9999998148672201, iteration: 494
loss: 3.7372512817382812,grad_norm: 0.99999990048216, iteration: 495
loss: 3.8566999435424805,grad_norm: 0.9999999182920063, iteration: 496
loss: 3.9469029903411865,grad_norm: 0.9999999281718975, iteration: 497
loss: 3.9407734870910645,grad_norm: 0.9999997981360197, iteration: 498
loss: 3.9665515422821045,grad_norm: 0.9999998784220879, iteration: 499
loss: 4.005887508392334,grad_norm: 0.9999998363597948, iteration: 500
loss: 3.9300222396850586,grad_norm: 0.9999998469373972, iteration: 501
loss: 3.8835740089416504,grad_norm: 0.9999999340939352, iteration: 502
loss: 3.7485058307647705,grad_norm: 0.9999998812722526, iteration: 503
loss: 3.9712889194488525,grad_norm: 0.9999998195422142, iteration: 504
loss: 4.011510372161865,grad_norm: 0.9999998783928438, iteration: 505
loss: 3.890176296234131,grad_norm: 0.9999997943319644, iteration: 506
loss: 3.95066237449646,grad_norm: 0.9999998475392827, iteration: 507
loss: 3.957252264022827,grad_norm: 0.9999998078662246, iteration: 508
loss: 3.859957456588745,grad_norm: 0.9999998257668308, iteration: 509
loss: 3.886007785797119,grad_norm: 0.9999998568318567, iteration: 510
loss: 3.951087474822998,grad_norm: 0.9999998119026846, iteration: 511
loss: 3.973905324935913,grad_norm: 0.9999998286925065, iteration: 512
loss: 3.825507640838623,grad_norm: 0.9999998370862913, iteration: 513
loss: 3.871100664138794,grad_norm: 0.9999998373241569, iteration: 514
loss: 3.9039809703826904,grad_norm: 0.9999998098683188, iteration: 515
loss: 3.924592971801758,grad_norm: 0.999999867907877, iteration: 516
loss: 3.902662754058838,grad_norm: 0.9999998473582526, iteration: 517
loss: 3.8082237243652344,grad_norm: 0.9999998424015133, iteration: 518
loss: 4.037992000579834,grad_norm: 0.9999999171897532, iteration: 519
loss: 3.92474365234375,grad_norm: 0.9999998634842497, iteration: 520
loss: 3.8721678256988525,grad_norm: 0.9999998572662532, iteration: 521
loss: 3.890284538269043,grad_norm: 0.9999998819524205, iteration: 522
loss: 4.0333404541015625,grad_norm: 0.9999998566259115, iteration: 523
loss: 3.8563506603240967,grad_norm: 0.9999998093180522, iteration: 524
loss: 3.8761935234069824,grad_norm: 0.9999999159886667, iteration: 525
loss: 3.8828487396240234,grad_norm: 0.9999999323309325, iteration: 526
loss: 3.906628370285034,grad_norm: 0.9999999040898708, iteration: 527
loss: 3.9100396633148193,grad_norm: 0.9999997925823186, iteration: 528
loss: 3.925940990447998,grad_norm: 0.9999998524784659, iteration: 529
loss: 3.8553638458251953,grad_norm: 0.9999998919550699, iteration: 530
loss: 3.965696096420288,grad_norm: 0.9999998881348003, iteration: 531
loss: 4.036532402038574,grad_norm: 0.999999871870171, iteration: 532
loss: 3.8720171451568604,grad_norm: 0.9999998637395705, iteration: 533
loss: 3.9876339435577393,grad_norm: 0.9999998121467986, iteration: 534
loss: 3.8424570560455322,grad_norm: 0.9999998805914675, iteration: 535
loss: 4.008345127105713,grad_norm: 0.999999822043853, iteration: 536
loss: 3.9788706302642822,grad_norm: 0.9999997892579227, iteration: 537
loss: 3.9383914470672607,grad_norm: 0.9999997869092678, iteration: 538
loss: 3.9033405780792236,grad_norm: 0.9999998121585764, iteration: 539
loss: 3.7904956340789795,grad_norm: 0.9999998161563961, iteration: 540
loss: 3.902087450027466,grad_norm: 0.9999997780979235, iteration: 541
loss: 3.9025771617889404,grad_norm: 0.999999845348203, iteration: 542
loss: 3.8750991821289062,grad_norm: 0.9999998826400084, iteration: 543
loss: 3.808457612991333,grad_norm: 0.9999998011566585, iteration: 544
loss: 3.86478590965271,grad_norm: 0.9999998670732918, iteration: 545
loss: 3.836099863052368,grad_norm: 0.9999997530949108, iteration: 546
loss: 3.954907178878784,grad_norm: 0.9999998687510173, iteration: 547
loss: 3.8470635414123535,grad_norm: 0.9999998715119065, iteration: 548
loss: 3.8906002044677734,grad_norm: 0.999999886046588, iteration: 549
loss: 3.894207239151001,grad_norm: 0.9999998798447202, iteration: 550
loss: 3.862558126449585,grad_norm: 0.9999998473770146, iteration: 551
loss: 3.8517415523529053,grad_norm: 0.9999997915882749, iteration: 552
loss: 3.8199715614318848,grad_norm: 0.9999998797340687, iteration: 553
loss: 3.830197334289551,grad_norm: 0.9999998925740686, iteration: 554
loss: 3.8493525981903076,grad_norm: 0.9999998881364395, iteration: 555
loss: 3.893064260482788,grad_norm: 0.9999998662481943, iteration: 556
loss: 3.8299686908721924,grad_norm: 0.9999999068370979, iteration: 557
loss: 3.909982204437256,grad_norm: 0.9999998400224333, iteration: 558
loss: 3.8639862537384033,grad_norm: 0.9999998542868921, iteration: 559
loss: 3.881457805633545,grad_norm: 0.9999999214121172, iteration: 560
loss: 3.872907876968384,grad_norm: 0.9999998057481692, iteration: 561
loss: 3.942988395690918,grad_norm: 0.9999999527793428, iteration: 562
loss: 3.882655620574951,grad_norm: 0.9999997911112294, iteration: 563
loss: 3.9823036193847656,grad_norm: 0.9999997854189109, iteration: 564
loss: 3.996400833129883,grad_norm: 0.9999997807657555, iteration: 565
loss: 3.970958948135376,grad_norm: 0.9999998515570607, iteration: 566
loss: 4.045618534088135,grad_norm: 0.9999998530171639, iteration: 567
loss: 3.87424898147583,grad_norm: 0.9999997657387484, iteration: 568
loss: 3.76169753074646,grad_norm: 0.9999998378724511, iteration: 569
loss: 3.833113193511963,grad_norm: 0.9999998771618791, iteration: 570
loss: 3.762009859085083,grad_norm: 0.9999998420438133, iteration: 571
loss: 3.8887758255004883,grad_norm: 0.9999997551204234, iteration: 572
loss: 3.9083261489868164,grad_norm: 0.9999998784098545, iteration: 573
loss: 3.7821273803710938,grad_norm: 0.9999998983710198, iteration: 574
loss: 3.830808401107788,grad_norm: 0.9999998832342354, iteration: 575
loss: 3.780287981033325,grad_norm: 0.9999998991213074, iteration: 576
loss: 3.856550931930542,grad_norm: 0.9999998448979486, iteration: 577
loss: 3.712735176086426,grad_norm: 0.9999997851800982, iteration: 578
loss: 3.810028314590454,grad_norm: 0.9999998469889344, iteration: 579
loss: 3.8785922527313232,grad_norm: 0.999999848864257, iteration: 580
loss: 3.87825083732605,grad_norm: 0.9999998772574135, iteration: 581
loss: 3.9996402263641357,grad_norm: 0.9999998697142931, iteration: 582
loss: 3.8548707962036133,grad_norm: 0.9999998757770566, iteration: 583
loss: 3.8925576210021973,grad_norm: 0.9999998599208683, iteration: 584
loss: 3.812237024307251,grad_norm: 0.9999998174158404, iteration: 585
loss: 3.7038509845733643,grad_norm: 0.9999998671442929, iteration: 586
loss: 3.9187018871307373,grad_norm: 0.999999874433661, iteration: 587
loss: 3.869044303894043,grad_norm: 0.999999878906019, iteration: 588
loss: 3.8646719455718994,grad_norm: 0.9999998587131416, iteration: 589
loss: 3.737896680831909,grad_norm: 0.999999879108006, iteration: 590
loss: 3.922912836074829,grad_norm: 0.999999889722496, iteration: 591
loss: 3.8611927032470703,grad_norm: 0.9999998747837348, iteration: 592
loss: 3.774238109588623,grad_norm: 0.9999998216000476, iteration: 593
loss: 3.837792158126831,grad_norm: 0.9999998636509536, iteration: 594
loss: 3.8933207988739014,grad_norm: 0.9999997436218276, iteration: 595
loss: 3.85581111907959,grad_norm: 0.9999998621692983, iteration: 596
loss: 3.79441237449646,grad_norm: 0.9999998410720544, iteration: 597
loss: 3.85988450050354,grad_norm: 0.9999998410690368, iteration: 598
loss: 3.70983624458313,grad_norm: 0.9999998362861712, iteration: 599
loss: 3.8955769538879395,grad_norm: 0.9999999222172616, iteration: 600
loss: 3.819864273071289,grad_norm: 0.9999998030654228, iteration: 601
loss: 3.836491823196411,grad_norm: 0.9999997736883378, iteration: 602
loss: 3.822646141052246,grad_norm: 0.9999997935742162, iteration: 603
loss: 3.79107928276062,grad_norm: 0.9999998608942368, iteration: 604
loss: 3.7116498947143555,grad_norm: 0.9999998612237879, iteration: 605
loss: 3.718597888946533,grad_norm: 0.9999998755806816, iteration: 606
loss: 3.7841031551361084,grad_norm: 0.9999998658464689, iteration: 607
loss: 3.905620813369751,grad_norm: 0.9999998979470092, iteration: 608
loss: 3.651350736618042,grad_norm: 0.9999998004707401, iteration: 609
loss: 3.944920539855957,grad_norm: 0.9999998668532109, iteration: 610
loss: 3.939207077026367,grad_norm: 0.9999999175107803, iteration: 611
loss: 3.8166637420654297,grad_norm: 0.9999998231124706, iteration: 612
loss: 3.85244083404541,grad_norm: 0.9999998437615768, iteration: 613
loss: 3.864978790283203,grad_norm: 0.9999997871745441, iteration: 614
loss: 3.7463529109954834,grad_norm: 0.9999998849085162, iteration: 615
loss: 3.8935725688934326,grad_norm: 0.999999837029893, iteration: 616
loss: 3.710312843322754,grad_norm: 0.9999999416936521, iteration: 617
loss: 3.8058202266693115,grad_norm: 0.9999997734115861, iteration: 618
loss: 3.7770795822143555,grad_norm: 0.9999998063600959, iteration: 619
loss: 3.932447671890259,grad_norm: 0.9999999055073691, iteration: 620
loss: 3.7368862628936768,grad_norm: 0.9999998650963827, iteration: 621
loss: 3.7756505012512207,grad_norm: 0.9999998422558302, iteration: 622
loss: 3.761998176574707,grad_norm: 0.9999998594412839, iteration: 623
loss: 3.7835309505462646,grad_norm: 0.9999999199470175, iteration: 624
loss: 3.630302667617798,grad_norm: 0.9999999150082257, iteration: 625
loss: 3.727215051651001,grad_norm: 0.9999999064172923, iteration: 626
loss: 3.7264747619628906,grad_norm: 0.9999998829751655, iteration: 627
loss: 3.7807552814483643,grad_norm: 0.9999998604489292, iteration: 628
loss: 3.8201777935028076,grad_norm: 0.9999998378486383, iteration: 629
loss: 3.768686294555664,grad_norm: 0.9999998222944202, iteration: 630
loss: 3.8287575244903564,grad_norm: 0.9999998717187869, iteration: 631
loss: 3.932227849960327,grad_norm: 0.9999999001363424, iteration: 632
loss: 3.7937381267547607,grad_norm: 0.9999997807531966, iteration: 633
loss: 3.7712862491607666,grad_norm: 0.9999998035410983, iteration: 634
loss: 3.6697628498077393,grad_norm: 0.9999998361163718, iteration: 635
loss: 3.792367935180664,grad_norm: 0.9999998852532339, iteration: 636
loss: 3.783104658126831,grad_norm: 0.9999998199811049, iteration: 637
loss: 3.574439287185669,grad_norm: 0.9999998313013403, iteration: 638
loss: 3.6642515659332275,grad_norm: 0.9999998758244195, iteration: 639
loss: 3.835310697555542,grad_norm: 0.9999998626056074, iteration: 640
loss: 3.8260509967803955,grad_norm: 0.9999998346107176, iteration: 641
loss: 3.6489646434783936,grad_norm: 0.9999998928481952, iteration: 642
loss: 3.688234329223633,grad_norm: 0.9999998791930393, iteration: 643
loss: 3.8479158878326416,grad_norm: 0.9999998310447501, iteration: 644
loss: 3.75809645652771,grad_norm: 0.9999998518068772, iteration: 645
loss: 3.7236077785491943,grad_norm: 0.9999998694088309, iteration: 646
loss: 3.770226240158081,grad_norm: 0.999999960445987, iteration: 647
loss: 3.711045265197754,grad_norm: 0.9999998889453772, iteration: 648
loss: 3.7385096549987793,grad_norm: 0.9999999038041335, iteration: 649
loss: 3.7296042442321777,grad_norm: 0.999999954770094, iteration: 650
loss: 3.7548351287841797,grad_norm: 0.9999998568291533, iteration: 651
loss: 3.778137445449829,grad_norm: 0.9999998433854193, iteration: 652
loss: 3.7536308765411377,grad_norm: 0.9999998031580525, iteration: 653
loss: 3.6654536724090576,grad_norm: 0.9999998524397832, iteration: 654
loss: 3.7276554107666016,grad_norm: 0.9999998869299659, iteration: 655
loss: 3.6777102947235107,grad_norm: 0.9999999817338842, iteration: 656
loss: 3.653303384780884,grad_norm: 0.9999998692135832, iteration: 657
loss: 3.6820931434631348,grad_norm: 0.9999998608675067, iteration: 658
loss: 3.680140733718872,grad_norm: 0.9999998042004966, iteration: 659
loss: 3.778041124343872,grad_norm: 0.9999998176634246, iteration: 660
loss: 3.7405481338500977,grad_norm: 0.999999796550367, iteration: 661
loss: 3.7906808853149414,grad_norm: 0.9999998865917067, iteration: 662
loss: 3.6161584854125977,grad_norm: 0.999999870279853, iteration: 663
loss: 3.724437713623047,grad_norm: 0.9999999208645228, iteration: 664
loss: 3.6520137786865234,grad_norm: 0.9999998616075948, iteration: 665
loss: 3.58284330368042,grad_norm: 0.9999998342007619, iteration: 666
loss: 3.7361981868743896,grad_norm: 0.9999998024665872, iteration: 667
loss: 3.699233293533325,grad_norm: 0.9999998912298382, iteration: 668
loss: 3.6946702003479004,grad_norm: 0.9999999183530508, iteration: 669
loss: 3.600593090057373,grad_norm: 0.9999998995238716, iteration: 670
loss: 3.7083077430725098,grad_norm: 0.9999998957218774, iteration: 671
loss: 3.7595551013946533,grad_norm: 0.999999935086952, iteration: 672
loss: 3.7823405265808105,grad_norm: 0.9999998172710918, iteration: 673
loss: 3.734790325164795,grad_norm: 0.9999998678131363, iteration: 674
loss: 3.685716390609741,grad_norm: 0.9999998608614026, iteration: 675
loss: 3.7036185264587402,grad_norm: 0.9999998674959528, iteration: 676
loss: 3.7009217739105225,grad_norm: 0.9999999334870795, iteration: 677
loss: 3.772935628890991,grad_norm: 0.9999998944698375, iteration: 678
loss: 3.758991241455078,grad_norm: 0.9999997864882982, iteration: 679
loss: 3.7615511417388916,grad_norm: 0.9999998226366105, iteration: 680
loss: 3.581843137741089,grad_norm: 0.9999998443608985, iteration: 681
loss: 3.6551506519317627,grad_norm: 0.9999999029207262, iteration: 682
loss: 3.692316770553589,grad_norm: 0.9999999268408541, iteration: 683
loss: 3.6499295234680176,grad_norm: 0.999999903978143, iteration: 684
loss: 3.695345163345337,grad_norm: 0.9999998870762473, iteration: 685
loss: 3.5370631217956543,grad_norm: 0.9999998628309931, iteration: 686
loss: 3.7119662761688232,grad_norm: 0.9999998585376698, iteration: 687
loss: 3.745509386062622,grad_norm: 0.9999998570247548, iteration: 688
loss: 3.776416063308716,grad_norm: 0.9999998698432611, iteration: 689
loss: 3.763975143432617,grad_norm: 0.9999998348195929, iteration: 690
loss: 3.783320665359497,grad_norm: 0.9999998294303006, iteration: 691
loss: 3.6236016750335693,grad_norm: 0.9999998415157518, iteration: 692
loss: 3.7772586345672607,grad_norm: 0.9999999485138901, iteration: 693
loss: 3.661515951156616,grad_norm: 0.9999999515424831, iteration: 694
loss: 3.5797340869903564,grad_norm: 0.9999998639661826, iteration: 695
loss: 3.632493734359741,grad_norm: 0.9999999311930273, iteration: 696
loss: 3.592155694961548,grad_norm: 0.9999999002868069, iteration: 697
loss: 3.810026168823242,grad_norm: 0.9999998441992339, iteration: 698
loss: 3.6594841480255127,grad_norm: 0.9999998632394194, iteration: 699
loss: 3.6492412090301514,grad_norm: 0.9999998534510561, iteration: 700
loss: 3.7727084159851074,grad_norm: 0.9999997459195181, iteration: 701
loss: 3.5939087867736816,grad_norm: 0.9999999578143741, iteration: 702
loss: 3.687487840652466,grad_norm: 0.9999998587832208, iteration: 703
loss: 3.5787606239318848,grad_norm: 0.9999999383111673, iteration: 704
loss: 3.6823253631591797,grad_norm: 0.999999915200885, iteration: 705
loss: 3.6366238594055176,grad_norm: 0.9999999028406784, iteration: 706
loss: 3.5509297847747803,grad_norm: 0.9999998307295905, iteration: 707
loss: 3.665313720703125,grad_norm: 0.9999999237143352, iteration: 708
loss: 3.577035903930664,grad_norm: 0.9999998453324602, iteration: 709
loss: 3.560373544692993,grad_norm: 0.9999998412961467, iteration: 710
loss: 3.550846576690674,grad_norm: 0.9999998986703517, iteration: 711
loss: 3.6164627075195312,grad_norm: 0.9999998110766217, iteration: 712
loss: 3.4941904544830322,grad_norm: 0.9999999689251451, iteration: 713
loss: 3.549088716506958,grad_norm: 0.9999998882604314, iteration: 714
loss: 3.6243443489074707,grad_norm: 0.9999999426025036, iteration: 715
loss: 3.6221368312835693,grad_norm: 0.9999999565459728, iteration: 716
loss: 3.7095558643341064,grad_norm: 0.9999998238275623, iteration: 717
loss: 3.6189427375793457,grad_norm: 0.999999839196501, iteration: 718
loss: 3.6032111644744873,grad_norm: 0.9999998934119021, iteration: 719
loss: 3.5805678367614746,grad_norm: 0.9999999353236414, iteration: 720
loss: 3.4292690753936768,grad_norm: 0.999999898970883, iteration: 721
loss: 3.5819499492645264,grad_norm: 0.9999999176267799, iteration: 722
loss: 3.587803840637207,grad_norm: 0.9999999671480524, iteration: 723
loss: 3.711869716644287,grad_norm: 0.9999998152771493, iteration: 724
loss: 3.6971335411071777,grad_norm: 0.9999998663947399, iteration: 725
loss: 3.5192253589630127,grad_norm: 0.9999998413920224, iteration: 726
loss: 3.6334893703460693,grad_norm: 0.999999816015956, iteration: 727
loss: 3.7507717609405518,grad_norm: 0.9999998847423089, iteration: 728
loss: 3.55478572845459,grad_norm: 0.9999999356204462, iteration: 729
loss: 3.6982555389404297,grad_norm: 0.9999998064167958, iteration: 730
loss: 3.628239393234253,grad_norm: 0.9999999302427917, iteration: 731
loss: 3.5729246139526367,grad_norm: 0.9999998464356203, iteration: 732
loss: 3.7175869941711426,grad_norm: 0.9999998879402279, iteration: 733
loss: 3.5789361000061035,grad_norm: 0.9999998425367175, iteration: 734
loss: 3.6886913776397705,grad_norm: 0.9999998226099917, iteration: 735
loss: 3.612766981124878,grad_norm: 0.9999998933376391, iteration: 736
loss: 3.642399787902832,grad_norm: 0.9999998534103782, iteration: 737
loss: 3.603428363800049,grad_norm: 0.9999998274974605, iteration: 738
loss: 3.5780880451202393,grad_norm: 0.9999999302716219, iteration: 739
loss: 3.5845229625701904,grad_norm: 0.9999999027823447, iteration: 740
loss: 3.735142707824707,grad_norm: 0.999999834469875, iteration: 741
loss: 3.34257173538208,grad_norm: 0.999999986293583, iteration: 742
loss: 3.622825860977173,grad_norm: 0.999999819602187, iteration: 743
loss: 3.5415291786193848,grad_norm: 0.9999998639038664, iteration: 744
loss: 3.573430061340332,grad_norm: 0.9999998459001421, iteration: 745
loss: 3.6278605461120605,grad_norm: 0.9999998393708948, iteration: 746
loss: 3.5765652656555176,grad_norm: 0.9999997837814334, iteration: 747
loss: 3.515622138977051,grad_norm: 0.9999999352635878, iteration: 748
loss: 3.544520616531372,grad_norm: 0.9999999441573321, iteration: 749
loss: 3.450673818588257,grad_norm: 0.9999998872210576, iteration: 750
loss: 3.597003698348999,grad_norm: 0.9999998024805413, iteration: 751
loss: 3.4940953254699707,grad_norm: 0.9999998442877643, iteration: 752
loss: 3.564612627029419,grad_norm: 0.9999998559484725, iteration: 753
loss: 3.5828957557678223,grad_norm: 0.999999909641172, iteration: 754
loss: 3.6072609424591064,grad_norm: 0.9999998638468853, iteration: 755
loss: 3.380500078201294,grad_norm: 0.9999998696190909, iteration: 756
loss: 3.6056387424468994,grad_norm: 0.9999998897419582, iteration: 757
loss: 3.5767409801483154,grad_norm: 0.9999999583008509, iteration: 758
loss: 3.5605533123016357,grad_norm: 0.9999998728313809, iteration: 759
loss: 3.634384870529175,grad_norm: 0.9999998693030459, iteration: 760
loss: 3.468179941177368,grad_norm: 0.9999998699938775, iteration: 761
loss: 3.6976945400238037,grad_norm: 0.9999999175159124, iteration: 762
loss: 3.558702230453491,grad_norm: 0.9999999028061349, iteration: 763
loss: 3.5612456798553467,grad_norm: 0.9999999673649105, iteration: 764
loss: 3.561415910720825,grad_norm: 0.9999999091737998, iteration: 765
loss: 3.52612566947937,grad_norm: 0.999999900923941, iteration: 766
loss: 3.429429531097412,grad_norm: 0.9999998404737458, iteration: 767
loss: 3.704709529876709,grad_norm: 0.999999848050282, iteration: 768
loss: 3.5610883235931396,grad_norm: 0.9999998777790814, iteration: 769
loss: 3.4321486949920654,grad_norm: 0.9999998734492586, iteration: 770
loss: 3.6779167652130127,grad_norm: 0.999999947915945, iteration: 771
loss: 3.536052703857422,grad_norm: 0.9999998718777487, iteration: 772
loss: 3.5840847492218018,grad_norm: 0.9999998574894524, iteration: 773
loss: 3.5352630615234375,grad_norm: 0.9999999244717204, iteration: 774
loss: 3.393791913986206,grad_norm: 0.9999999436355728, iteration: 775
loss: 3.4014525413513184,grad_norm: 0.9999999081434829, iteration: 776
loss: 3.562389373779297,grad_norm: 0.9999998964834975, iteration: 777
loss: 3.4961249828338623,grad_norm: 0.9999998803621648, iteration: 778
loss: 3.4078831672668457,grad_norm: 0.9999999199543702, iteration: 779
loss: 3.492492437362671,grad_norm: 0.9999998321074246, iteration: 780
loss: 3.646322250366211,grad_norm: 0.9999997949934363, iteration: 781
loss: 3.482654571533203,grad_norm: 0.9999998844700815, iteration: 782
loss: 3.6088457107543945,grad_norm: 0.9999998956918547, iteration: 783
loss: 3.509255886077881,grad_norm: 0.9999999340145461, iteration: 784
loss: 3.4585821628570557,grad_norm: 0.9999999626219838, iteration: 785
loss: 3.4152469635009766,grad_norm: 0.999999797226819, iteration: 786
loss: 3.6274867057800293,grad_norm: 0.9999998697471886, iteration: 787
loss: 3.5021374225616455,grad_norm: 0.9999998317602076, iteration: 788
loss: 3.5082204341888428,grad_norm: 0.9999998002268293, iteration: 789
loss: 3.6045820713043213,grad_norm: 0.9999997860322677, iteration: 790
loss: 3.498547315597534,grad_norm: 0.999999922324536, iteration: 791
loss: 3.5236690044403076,grad_norm: 0.9999998453885286, iteration: 792
loss: 3.5328662395477295,grad_norm: 0.9999998796650814, iteration: 793
loss: 3.560704469680786,grad_norm: 0.9999999200380612, iteration: 794
loss: 3.51369571685791,grad_norm: 0.9999998610589161, iteration: 795
loss: 3.4975907802581787,grad_norm: 0.9999999160243616, iteration: 796
loss: 3.558340072631836,grad_norm: 0.9999999129674099, iteration: 797
loss: 3.5437803268432617,grad_norm: 0.9999999257474821, iteration: 798
loss: 3.6206612586975098,grad_norm: 0.9999998939252415, iteration: 799
loss: 3.5343618392944336,grad_norm: 0.9999998589684701, iteration: 800
loss: 3.3702056407928467,grad_norm: 0.9999998870115352, iteration: 801
loss: 3.519383668899536,grad_norm: 0.999999805229479, iteration: 802
loss: 3.418771266937256,grad_norm: 0.9999999548136106, iteration: 803
loss: 3.5110907554626465,grad_norm: 0.9999998851517615, iteration: 804
loss: 3.4316506385803223,grad_norm: 0.9999998209153242, iteration: 805
loss: 3.459301471710205,grad_norm: 0.9999999543616034, iteration: 806
loss: 3.448032855987549,grad_norm: 0.9999999395850883, iteration: 807
loss: 3.551650285720825,grad_norm: 0.9999999398811179, iteration: 808
loss: 3.5210187435150146,grad_norm: 0.9999998928990026, iteration: 809
loss: 3.447612762451172,grad_norm: 0.9999998933497207, iteration: 810
loss: 3.474252462387085,grad_norm: 0.9999999429943366, iteration: 811
loss: 3.3660106658935547,grad_norm: 0.999999839886077, iteration: 812
loss: 3.591487169265747,grad_norm: 0.9999998405269878, iteration: 813
loss: 3.439732074737549,grad_norm: 0.999999845344085, iteration: 814
loss: 3.5670723915100098,grad_norm: 0.9999999207241448, iteration: 815
loss: 3.4281890392303467,grad_norm: 0.999999871779772, iteration: 816
loss: 3.4702606201171875,grad_norm: 0.9999998052656719, iteration: 817
loss: 3.657186508178711,grad_norm: 0.9999997821746118, iteration: 818
loss: 3.4340884685516357,grad_norm: 0.9999998918808747, iteration: 819
loss: 3.5416197776794434,grad_norm: 0.9999998051579756, iteration: 820
loss: 3.5698182582855225,grad_norm: 0.9999999390764253, iteration: 821
loss: 3.5134658813476562,grad_norm: 0.9999998400474743, iteration: 822
loss: 3.325915575027466,grad_norm: 0.9999998682212163, iteration: 823
loss: 3.49607515335083,grad_norm: 0.9999998266605984, iteration: 824
loss: 3.4730985164642334,grad_norm: 0.9999998582004702, iteration: 825
loss: 3.4176435470581055,grad_norm: 0.9999998841573882, iteration: 826
loss: 3.452829599380493,grad_norm: 0.9999998603407637, iteration: 827
loss: 3.5832016468048096,grad_norm: 0.999999866963324, iteration: 828
loss: 3.463493824005127,grad_norm: 0.9999998702372392, iteration: 829
loss: 3.4396626949310303,grad_norm: 0.9999999265246494, iteration: 830
loss: 3.4020352363586426,grad_norm: 0.9999998629064368, iteration: 831
loss: 3.4144179821014404,grad_norm: 0.9999998893640414, iteration: 832
loss: 3.4594829082489014,grad_norm: 0.9999998024244245, iteration: 833
loss: 3.594637632369995,grad_norm: 0.9999998528821509, iteration: 834
loss: 3.3200137615203857,grad_norm: 0.9999998887108342, iteration: 835
loss: 3.373300075531006,grad_norm: 0.9999998828442647, iteration: 836
loss: 3.504610061645508,grad_norm: 0.9999998036020661, iteration: 837
loss: 3.547672748565674,grad_norm: 0.9999998887313903, iteration: 838
loss: 3.389596939086914,grad_norm: 0.9999999312200758, iteration: 839
loss: 3.3350584506988525,grad_norm: 0.999999876597714, iteration: 840
loss: 3.4621713161468506,grad_norm: 0.9999998862683294, iteration: 841
loss: 3.60465931892395,grad_norm: 0.999999792009445, iteration: 842
loss: 3.3982110023498535,grad_norm: 0.9999998272405931, iteration: 843
loss: 3.576965570449829,grad_norm: 0.9999998139988103, iteration: 844
loss: 3.4305078983306885,grad_norm: 0.9999998715588715, iteration: 845
loss: 3.434868812561035,grad_norm: 0.9999997699168343, iteration: 846
loss: 3.4976606369018555,grad_norm: 0.9999998014825142, iteration: 847
loss: 3.406569719314575,grad_norm: 0.9999998833892163, iteration: 848
loss: 3.4421744346618652,grad_norm: 0.9999998542154225, iteration: 849
loss: 3.392005681991577,grad_norm: 0.9999998865993904, iteration: 850
loss: 3.3043107986450195,grad_norm: 0.9999998886318006, iteration: 851
loss: 3.5396697521209717,grad_norm: 0.9999999165605614, iteration: 852
loss: 3.3903186321258545,grad_norm: 0.9999998788064128, iteration: 853
loss: 3.3906712532043457,grad_norm: 0.9999999153642, iteration: 854
loss: 3.328913688659668,grad_norm: 0.9999998990085818, iteration: 855
loss: 3.2599546909332275,grad_norm: 0.9999998451807511, iteration: 856
loss: 3.286501169204712,grad_norm: 0.9999998991518223, iteration: 857
loss: 3.3876168727874756,grad_norm: 0.9999998033881696, iteration: 858
loss: 3.53166127204895,grad_norm: 0.9999999293147506, iteration: 859
loss: 3.431039571762085,grad_norm: 0.9999998328054992, iteration: 860
loss: 3.3852896690368652,grad_norm: 0.999999858472005, iteration: 861
loss: 3.324173927307129,grad_norm: 0.9999998692872444, iteration: 862
loss: 3.3748064041137695,grad_norm: 0.9999999209825181, iteration: 863
loss: 3.497555732727051,grad_norm: 0.9999998584578501, iteration: 864
loss: 3.3312747478485107,grad_norm: 0.9999998659943513, iteration: 865
loss: 3.1830251216888428,grad_norm: 0.9999999229887873, iteration: 866
loss: 3.484630584716797,grad_norm: 0.9999998286018941, iteration: 867
loss: 3.377354145050049,grad_norm: 0.9999998868093348, iteration: 868
loss: 3.364225149154663,grad_norm: 0.9999999379348339, iteration: 869
loss: 3.305567502975464,grad_norm: 0.9999998755667582, iteration: 870
loss: 3.4142048358917236,grad_norm: 0.9999998306444043, iteration: 871
loss: 3.2583494186401367,grad_norm: 0.9999998574589363, iteration: 872
loss: 3.3751769065856934,grad_norm: 0.9999999187120947, iteration: 873
loss: 3.2074737548828125,grad_norm: 0.9999999086134455, iteration: 874
loss: 3.2489609718322754,grad_norm: 0.999999914575692, iteration: 875
loss: 3.4949164390563965,grad_norm: 0.9999999416384722, iteration: 876
loss: 3.3992316722869873,grad_norm: 0.999999924272037, iteration: 877
loss: 3.3104710578918457,grad_norm: 0.9999999081195969, iteration: 878
loss: 3.523301839828491,grad_norm: 0.9999998597617417, iteration: 879
loss: 3.349889039993286,grad_norm: 0.9999999077621351, iteration: 880
loss: 3.3870294094085693,grad_norm: 0.9999998791675291, iteration: 881
loss: 3.3804080486297607,grad_norm: 0.9999998261285096, iteration: 882
loss: 3.390538454055786,grad_norm: 0.99999991096688, iteration: 883
loss: 3.283299684524536,grad_norm: 0.9999999339926838, iteration: 884
loss: 3.3517370223999023,grad_norm: 0.9999999568113509, iteration: 885
loss: 3.216193675994873,grad_norm: 0.9999999728948233, iteration: 886
loss: 3.262665033340454,grad_norm: 0.9999999021932068, iteration: 887
loss: 3.365363121032715,grad_norm: 0.9999998469745124, iteration: 888
loss: 3.205047607421875,grad_norm: 0.9999999844920011, iteration: 889
loss: 3.352187156677246,grad_norm: 0.9999999015798356, iteration: 890
loss: 3.2315051555633545,grad_norm: 0.9999998867786176, iteration: 891
loss: 3.312406539916992,grad_norm: 0.9999998521715945, iteration: 892
loss: 3.253859519958496,grad_norm: 0.9999999148571151, iteration: 893
loss: 3.1381983757019043,grad_norm: 0.9999999534108304, iteration: 894
loss: 3.2925970554351807,grad_norm: 0.9999999462306767, iteration: 895
loss: 3.2799112796783447,grad_norm: 0.9999999374348693, iteration: 896
loss: 3.2232558727264404,grad_norm: 0.9999998476510548, iteration: 897
loss: 3.294170379638672,grad_norm: 0.9999999220130638, iteration: 898
loss: 3.384282112121582,grad_norm: 0.9999998355296493, iteration: 899
loss: 3.3403069972991943,grad_norm: 0.9999999419648693, iteration: 900
loss: 3.2678465843200684,grad_norm: 0.999999910355811, iteration: 901
loss: 3.242593288421631,grad_norm: 0.999999933950923, iteration: 902
loss: 3.3991689682006836,grad_norm: 1.0000000206095143, iteration: 903
loss: 3.3967337608337402,grad_norm: 0.9999999006895328, iteration: 904
loss: 3.3015518188476562,grad_norm: 0.9999999260102957, iteration: 905
loss: 3.322854995727539,grad_norm: 0.999999930442871, iteration: 906
loss: 3.3440446853637695,grad_norm: 0.9999998636231828, iteration: 907
loss: 3.202024221420288,grad_norm: 0.9999999165260292, iteration: 908
loss: 3.3271522521972656,grad_norm: 0.999999836192632, iteration: 909
loss: 3.31908917427063,grad_norm: 0.9999998166715054, iteration: 910
loss: 3.291289806365967,grad_norm: 0.9999998992046891, iteration: 911
loss: 3.41860032081604,grad_norm: 0.9999998907120901, iteration: 912
loss: 3.278533697128296,grad_norm: 0.9999998362375208, iteration: 913
loss: 3.2729928493499756,grad_norm: 0.9999998659737929, iteration: 914
loss: 3.2783308029174805,grad_norm: 0.9999999703253822, iteration: 915
loss: 3.3370890617370605,grad_norm: 0.9999998880214713, iteration: 916
loss: 3.244417667388916,grad_norm: 0.9999999073767816, iteration: 917
loss: 3.360344409942627,grad_norm: 0.9999998321448649, iteration: 918
loss: 3.3014121055603027,grad_norm: 0.9999998764073794, iteration: 919
loss: 3.2381248474121094,grad_norm: 0.999999890152119, iteration: 920
loss: 3.3268544673919678,grad_norm: 0.9999997971931971, iteration: 921
loss: 3.331080675125122,grad_norm: 0.9999999734374118, iteration: 922
loss: 3.3323256969451904,grad_norm: 0.9999999171715795, iteration: 923
loss: 3.2659616470336914,grad_norm: 0.9999999683782854, iteration: 924
loss: 3.3137264251708984,grad_norm: 0.9999999393153278, iteration: 925
loss: 3.2931573390960693,grad_norm: 0.9999998936937433, iteration: 926
loss: 3.218491554260254,grad_norm: 0.9999998366717807, iteration: 927
loss: 3.320546865463257,grad_norm: 0.9999998122799824, iteration: 928
loss: 3.308027505874634,grad_norm: 0.9999999264594006, iteration: 929
loss: 3.2024736404418945,grad_norm: 0.9999998751663919, iteration: 930
loss: 3.2524373531341553,grad_norm: 0.9999998889848306, iteration: 931
loss: 3.331474781036377,grad_norm: 0.9999998746339788, iteration: 932
loss: 3.156398057937622,grad_norm: 0.9999999389764671, iteration: 933
loss: 3.4185264110565186,grad_norm: 0.999999906234691, iteration: 934
loss: 3.2249956130981445,grad_norm: 0.9999999001560808, iteration: 935
loss: 3.180576801300049,grad_norm: 0.9999998423083867, iteration: 936
loss: 3.2778990268707275,grad_norm: 0.9999998409128893, iteration: 937
loss: 3.216306447982788,grad_norm: 0.9999999415962801, iteration: 938
loss: 3.3654112815856934,grad_norm: 0.9999999161592545, iteration: 939
loss: 3.197890043258667,grad_norm: 0.9999999012245012, iteration: 940
loss: 3.1666767597198486,grad_norm: 0.9999998538682471, iteration: 941
loss: 3.215383529663086,grad_norm: 0.9999999277150328, iteration: 942
loss: 3.1133008003234863,grad_norm: 0.9999999231418532, iteration: 943
loss: 3.174525260925293,grad_norm: 0.9999998841031319, iteration: 944
loss: 3.4043729305267334,grad_norm: 0.9999999884828141, iteration: 945
loss: 3.1657469272613525,grad_norm: 0.9999998578264568, iteration: 946
loss: 3.234156608581543,grad_norm: 0.9999999034742986, iteration: 947
loss: 3.2250800132751465,grad_norm: 0.9999998611489043, iteration: 948
loss: 3.037654161453247,grad_norm: 0.9999998609465903, iteration: 949
loss: 3.0693187713623047,grad_norm: 0.9999998182873554, iteration: 950
loss: 3.198167085647583,grad_norm: 0.9999999979130441, iteration: 951
loss: 3.1331191062927246,grad_norm: 0.9999998209704976, iteration: 952
loss: 3.1786532402038574,grad_norm: 0.9999998671053881, iteration: 953
loss: 3.17364501953125,grad_norm: 0.9999997885821343, iteration: 954
loss: 3.305100917816162,grad_norm: 0.9999999277573476, iteration: 955
loss: 3.366616725921631,grad_norm: 0.9999998519116098, iteration: 956
loss: 3.1796560287475586,grad_norm: 0.9999999333113999, iteration: 957
loss: 3.156545639038086,grad_norm: 0.99999991054308, iteration: 958
loss: 3.3150179386138916,grad_norm: 0.999999840842367, iteration: 959
loss: 3.1809422969818115,grad_norm: 0.9999999265541022, iteration: 960
loss: 3.1660120487213135,grad_norm: 0.9999998978360202, iteration: 961
loss: 3.284034252166748,grad_norm: 0.9999998678660686, iteration: 962
loss: 3.249345541000366,grad_norm: 0.9999997820975951, iteration: 963
loss: 3.217911720275879,grad_norm: 0.999999895751421, iteration: 964
loss: 3.214571952819824,grad_norm: 0.9999997982683057, iteration: 965
loss: 3.0872867107391357,grad_norm: 0.999999933093448, iteration: 966
loss: 3.119495153427124,grad_norm: 0.9999998985834573, iteration: 967
loss: 3.138899803161621,grad_norm: 0.9999998959249586, iteration: 968
loss: 3.112619161605835,grad_norm: 0.9999998580920546, iteration: 969
loss: 3.22513747215271,grad_norm: 0.9999999531597067, iteration: 970
loss: 3.138312578201294,grad_norm: 0.9999998931641909, iteration: 971
loss: 3.266136884689331,grad_norm: 0.9999998962828716, iteration: 972
loss: 3.1014890670776367,grad_norm: 0.9999999237466278, iteration: 973
loss: 3.18394136428833,grad_norm: 0.9999998853555319, iteration: 974
loss: 3.115494728088379,grad_norm: 0.9999998746325822, iteration: 975
loss: 3.041973829269409,grad_norm: 0.9999998651027787, iteration: 976
loss: 3.0151517391204834,grad_norm: 0.9999998560751708, iteration: 977
loss: 3.186208963394165,grad_norm: 0.9999998610426153, iteration: 978
loss: 3.123847723007202,grad_norm: 0.9999999245399002, iteration: 979
loss: 3.2004103660583496,grad_norm: 0.9999999332696059, iteration: 980
loss: 3.231125831604004,grad_norm: 0.9999999513970403, iteration: 981
loss: 3.1519765853881836,grad_norm: 1.0000000010585803, iteration: 982
loss: 3.2630391120910645,grad_norm: 0.9999999461362313, iteration: 983
loss: 3.164121389389038,grad_norm: 0.9999998935792662, iteration: 984
loss: 3.1694095134735107,grad_norm: 0.999999923492724, iteration: 985
loss: 3.197322130203247,grad_norm: 0.9999998057640718, iteration: 986
loss: 3.1103012561798096,grad_norm: 0.999999863363838, iteration: 987
loss: 3.226856231689453,grad_norm: 0.9999998096832363, iteration: 988
loss: 3.1079516410827637,grad_norm: 0.9999999134853931, iteration: 989
loss: 3.120523452758789,grad_norm: 0.9999998956673636, iteration: 990
loss: 2.934985876083374,grad_norm: 0.9999998631412761, iteration: 991
loss: 2.9267847537994385,grad_norm: 0.9999998276530938, iteration: 992
loss: 3.0434868335723877,grad_norm: 0.9999997973650364, iteration: 993
loss: 3.1591808795928955,grad_norm: 0.9999998918221616, iteration: 994
loss: 3.0866875648498535,grad_norm: 0.9999999618966208, iteration: 995
loss: 2.9407575130462646,grad_norm: 0.9999998774933369, iteration: 996
loss: 3.107396125793457,grad_norm: 0.9999999137411117, iteration: 997
loss: 3.029703140258789,grad_norm: 0.9999998405879776, iteration: 998
loss: 2.9504446983337402,grad_norm: 0.9999998256454611, iteration: 999
loss: 3.135160207748413,grad_norm: 1.00000000950171, iteration: 1000
loss: 3.1018483638763428,grad_norm: 0.9999999278888765, iteration: 1001
loss: 3.1750903129577637,grad_norm: 0.9999999540052855, iteration: 1002
loss: 3.040372610092163,grad_norm: 0.999999889152754, iteration: 1003
loss: 3.150106430053711,grad_norm: 0.9999998814089005, iteration: 1004
loss: 2.965033531188965,grad_norm: 0.9999998859990289, iteration: 1005
loss: 3.128099203109741,grad_norm: 0.9999998966084561, iteration: 1006
loss: 3.0871424674987793,grad_norm: 0.9999998307930588, iteration: 1007
loss: 3.162991762161255,grad_norm: 0.9999998586744525, iteration: 1008
loss: 3.0728836059570312,grad_norm: 0.9999998238616514, iteration: 1009
loss: 3.1649012565612793,grad_norm: 0.999999820393263, iteration: 1010
loss: 3.2180449962615967,grad_norm: 0.9999997878511447, iteration: 1011
loss: 3.0752062797546387,grad_norm: 0.9999998684003095, iteration: 1012
loss: 2.9994518756866455,grad_norm: 0.9999998779698914, iteration: 1013
loss: 3.090216636657715,grad_norm: 0.9999999188476665, iteration: 1014
loss: 3.039884567260742,grad_norm: 0.9999998150769686, iteration: 1015
loss: 2.992501974105835,grad_norm: 0.999999947982402, iteration: 1016
loss: 3.0447521209716797,grad_norm: 0.9999998580867882, iteration: 1017
loss: 3.044093370437622,grad_norm: 0.9999999239517863, iteration: 1018
loss: 3.0303211212158203,grad_norm: 0.9999998787988084, iteration: 1019
loss: 3.0563080310821533,grad_norm: 0.9999998928861324, iteration: 1020
loss: 3.193955898284912,grad_norm: 0.9999998850958272, iteration: 1021
loss: 3.089613437652588,grad_norm: 0.9999998858319881, iteration: 1022
loss: 3.168718099594116,grad_norm: 0.999999900212091, iteration: 1023
loss: 2.9774134159088135,grad_norm: 0.9999998176274036, iteration: 1024
loss: 2.8374316692352295,grad_norm: 0.9999999132111781, iteration: 1025
loss: 3.0491855144500732,grad_norm: 0.9999999463611737, iteration: 1026
loss: 3.1371047496795654,grad_norm: 0.9999999090774078, iteration: 1027
loss: 2.953118085861206,grad_norm: 0.9999998375329335, iteration: 1028
loss: 3.0312740802764893,grad_norm: 0.9999998339701436, iteration: 1029
loss: 2.996922731399536,grad_norm: 0.9999998340007427, iteration: 1030
loss: 2.9087579250335693,grad_norm: 0.9999999341158546, iteration: 1031
loss: 3.149141788482666,grad_norm: 0.9999999715241165, iteration: 1032
loss: 3.0235118865966797,grad_norm: 0.9999998926773143, iteration: 1033
loss: 3.0576603412628174,grad_norm: 0.999999880576681, iteration: 1034
loss: 3.1022562980651855,grad_norm: 0.9999999095696808, iteration: 1035
loss: 2.981428861618042,grad_norm: 0.9999998595717681, iteration: 1036
loss: 2.9010632038116455,grad_norm: 0.9999999549505838, iteration: 1037
loss: 3.001891851425171,grad_norm: 0.9999998427142763, iteration: 1038
loss: 2.954411029815674,grad_norm: 0.9999999364542915, iteration: 1039
loss: 2.905428409576416,grad_norm: 0.9999998792480391, iteration: 1040
loss: 2.949186325073242,grad_norm: 0.9999999055197165, iteration: 1041
loss: 3.08150053024292,grad_norm: 0.9999999355005573, iteration: 1042
loss: 3.12233567237854,grad_norm: 0.9999998509683182, iteration: 1043
loss: 3.1113014221191406,grad_norm: 0.9999998652857658, iteration: 1044
loss: 3.109867572784424,grad_norm: 0.9999998738285362, iteration: 1045
loss: 2.9762303829193115,grad_norm: 0.9999998985886177, iteration: 1046
loss: 2.9116549491882324,grad_norm: 0.9999998699133591, iteration: 1047
loss: 2.9147872924804688,grad_norm: 0.9999999693700593, iteration: 1048
loss: 2.9912211894989014,grad_norm: 0.9999998927698858, iteration: 1049
loss: 3.0721077919006348,grad_norm: 0.9999999280475178, iteration: 1050
loss: 3.0031888484954834,grad_norm: 0.9999998637980247, iteration: 1051
loss: 2.9604763984680176,grad_norm: 0.9999998326443591, iteration: 1052
loss: 2.8250763416290283,grad_norm: 0.9999998902902947, iteration: 1053
loss: 2.8672077655792236,grad_norm: 0.9999999327037787, iteration: 1054
loss: 2.9320855140686035,grad_norm: 0.9999999195106608, iteration: 1055
loss: 3.010481119155884,grad_norm: 0.9999999444663723, iteration: 1056
loss: 2.9839842319488525,grad_norm: 0.9999998296659318, iteration: 1057
loss: 2.921703815460205,grad_norm: 0.9999999044056188, iteration: 1058
loss: 2.8877906799316406,grad_norm: 0.9999999522043382, iteration: 1059
loss: 2.9017391204833984,grad_norm: 0.9999998414241692, iteration: 1060
loss: 2.8860576152801514,grad_norm: 0.9999999502640158, iteration: 1061
loss: 2.9089388847351074,grad_norm: 0.9999998415501332, iteration: 1062
loss: 2.814436674118042,grad_norm: 0.9999999063581315, iteration: 1063
loss: 2.96238374710083,grad_norm: 0.9999999619152414, iteration: 1064
loss: 2.954998731613159,grad_norm: 0.9999998311351066, iteration: 1065
loss: 2.9588253498077393,grad_norm: 0.9999998422399718, iteration: 1066
loss: 2.820164203643799,grad_norm: 0.9999998961161204, iteration: 1067
loss: 2.958014965057373,grad_norm: 0.9999998806128061, iteration: 1068
loss: 2.902357339859009,grad_norm: 0.9999999404231357, iteration: 1069
loss: 2.934544801712036,grad_norm: 0.9999998768894124, iteration: 1070
loss: 2.901479959487915,grad_norm: 0.9999999429672252, iteration: 1071
loss: 2.8448493480682373,grad_norm: 0.9999999186915319, iteration: 1072
loss: 2.9363720417022705,grad_norm: 0.9999998355725065, iteration: 1073
loss: 2.8574891090393066,grad_norm: 0.9999999471022533, iteration: 1074
loss: 2.9034743309020996,grad_norm: 0.9999999365818583, iteration: 1075
loss: 2.9246902465820312,grad_norm: 0.9999997463405262, iteration: 1076
loss: 2.8324198722839355,grad_norm: 0.999999904921676, iteration: 1077
loss: 2.7582786083221436,grad_norm: 0.99999984421053, iteration: 1078
loss: 2.952545166015625,grad_norm: 0.9999998057775153, iteration: 1079
loss: 2.848137140274048,grad_norm: 0.999999929864138, iteration: 1080
loss: 3.114027261734009,grad_norm: 0.9999999371526033, iteration: 1081
loss: 2.797346591949463,grad_norm: 0.9999999549715252, iteration: 1082
loss: 2.815321683883667,grad_norm: 0.9999998572250957, iteration: 1083
loss: 2.850322723388672,grad_norm: 0.9999999262821763, iteration: 1084
loss: 2.751041889190674,grad_norm: 0.9999999091692843, iteration: 1085
loss: 2.8720552921295166,grad_norm: 0.9999998446309749, iteration: 1086
loss: 2.9898667335510254,grad_norm: 0.9999997991332372, iteration: 1087
loss: 2.997690200805664,grad_norm: 0.9999999333800519, iteration: 1088
loss: 2.9038166999816895,grad_norm: 0.9999998955906954, iteration: 1089
loss: 2.9359545707702637,grad_norm: 0.9999999060995579, iteration: 1090
loss: 2.822190761566162,grad_norm: 0.9999998903249692, iteration: 1091
loss: 2.8103463649749756,grad_norm: 0.9999999039789075, iteration: 1092
loss: 2.9142868518829346,grad_norm: 0.9999998561151706, iteration: 1093
loss: 2.8650007247924805,grad_norm: 0.9999998353162307, iteration: 1094
loss: 2.912724256515503,grad_norm: 0.9999999393147465, iteration: 1095
loss: 2.8646600246429443,grad_norm: 0.9999998631404181, iteration: 1096
loss: 2.763042449951172,grad_norm: 0.9999998719377536, iteration: 1097
loss: 2.819931983947754,grad_norm: 0.9999998458501989, iteration: 1098
loss: 2.8539764881134033,grad_norm: 0.9999998336516079, iteration: 1099
loss: 2.88948917388916,grad_norm: 0.9999999613234475, iteration: 1100
loss: 2.761807680130005,grad_norm: 0.9999999041694743, iteration: 1101
loss: 2.8383891582489014,grad_norm: 0.9999997816933486, iteration: 1102
loss: 2.9460995197296143,grad_norm: 0.9999998536028673, iteration: 1103
loss: 2.975219964981079,grad_norm: 0.9999999274169021, iteration: 1104
loss: 2.916717529296875,grad_norm: 0.9999999181441885, iteration: 1105
loss: 2.8255298137664795,grad_norm: 0.9999998638697019, iteration: 1106
loss: 2.886448621749878,grad_norm: 0.9999998673096913, iteration: 1107
loss: 2.7899718284606934,grad_norm: 0.9999999255661862, iteration: 1108
loss: 2.8354218006134033,grad_norm: 0.99999988393091, iteration: 1109
loss: 2.7964229583740234,grad_norm: 0.9999998115888535, iteration: 1110
loss: 2.7291085720062256,grad_norm: 0.9999998330197738, iteration: 1111
loss: 2.7159688472747803,grad_norm: 0.9999998730423811, iteration: 1112
loss: 2.630922317504883,grad_norm: 0.9999999004834237, iteration: 1113
loss: 2.7379117012023926,grad_norm: 0.999999931306917, iteration: 1114
loss: 2.8573803901672363,grad_norm: 0.9999998730376839, iteration: 1115
loss: 2.9580767154693604,grad_norm: 0.9999998163215491, iteration: 1116
loss: 2.891242742538452,grad_norm: 0.999999857704269, iteration: 1117
loss: 2.7326290607452393,grad_norm: 0.9999998830766178, iteration: 1118
loss: 2.9042587280273438,grad_norm: 0.9999998410170607, iteration: 1119
loss: 2.723684072494507,grad_norm: 0.9999998488596831, iteration: 1120
loss: 2.5736923217773438,grad_norm: 0.9999999372095455, iteration: 1121
loss: 2.8759145736694336,grad_norm: 0.9999997983728434, iteration: 1122
loss: 2.774899482727051,grad_norm: 0.9999999258025561, iteration: 1123
loss: 2.8733620643615723,grad_norm: 0.9999998405916697, iteration: 1124
loss: 2.7629377841949463,grad_norm: 0.9999999238255006, iteration: 1125
loss: 2.734309434890747,grad_norm: 0.9999999256279792, iteration: 1126
loss: 2.7807791233062744,grad_norm: 0.9999998273086205, iteration: 1127
loss: 2.771989345550537,grad_norm: 0.9999997944610839, iteration: 1128
loss: 2.8995232582092285,grad_norm: 0.9999998385113943, iteration: 1129
loss: 2.8077504634857178,grad_norm: 0.9999998630100166, iteration: 1130
loss: 2.68825364112854,grad_norm: 0.9999999368830692, iteration: 1131
loss: 2.8033201694488525,grad_norm: 0.9999998443827918, iteration: 1132
loss: 2.7973902225494385,grad_norm: 0.9999998249202696, iteration: 1133
loss: 2.8354811668395996,grad_norm: 0.9999999097792005, iteration: 1134
loss: 2.827345609664917,grad_norm: 0.9999997961074355, iteration: 1135
loss: 2.8224799633026123,grad_norm: 0.9999998630434258, iteration: 1136
loss: 2.7622294425964355,grad_norm: 0.9999998759458683, iteration: 1137
loss: 2.754694700241089,grad_norm: 0.9999998790775367, iteration: 1138
loss: 2.768427848815918,grad_norm: 0.999999896384619, iteration: 1139
loss: 2.844219446182251,grad_norm: 0.9999999591932116, iteration: 1140
loss: 2.7457504272460938,grad_norm: 0.9999999348860708, iteration: 1141
loss: 2.686535358428955,grad_norm: 0.9999999119948754, iteration: 1142
loss: 2.635232448577881,grad_norm: 0.9999999175883504, iteration: 1143
loss: 2.75884747505188,grad_norm: 0.9999998967815816, iteration: 1144
loss: 2.7906408309936523,grad_norm: 0.9999999182480099, iteration: 1145
loss: 2.6977834701538086,grad_norm: 0.9999998148719397, iteration: 1146
loss: 2.779320001602173,grad_norm: 0.999999940414015, iteration: 1147
loss: 2.934469223022461,grad_norm: 0.9999999654739935, iteration: 1148
loss: 2.5847158432006836,grad_norm: 0.9999998362531107, iteration: 1149
loss: 2.8103113174438477,grad_norm: 0.9999998436213619, iteration: 1150
loss: 2.662559747695923,grad_norm: 0.9999999611833156, iteration: 1151
loss: 2.8255720138549805,grad_norm: 0.99999991110437, iteration: 1152
loss: 2.8273465633392334,grad_norm: 0.9999999027120755, iteration: 1153
loss: 2.6378979682922363,grad_norm: 0.999999923073796, iteration: 1154
loss: 2.716027021408081,grad_norm: 0.999999850974649, iteration: 1155
loss: 2.636422872543335,grad_norm: 0.9999998907562418, iteration: 1156
loss: 2.6333606243133545,grad_norm: 0.9999998659661121, iteration: 1157
loss: 2.8650636672973633,grad_norm: 0.9999999458831218, iteration: 1158
loss: 2.710177421569824,grad_norm: 0.9999999337632859, iteration: 1159
loss: 2.7122085094451904,grad_norm: 0.9999998649386408, iteration: 1160
loss: 2.8439013957977295,grad_norm: 0.9999998152221985, iteration: 1161
loss: 2.604409694671631,grad_norm: 0.9999998910406301, iteration: 1162
loss: 2.861299753189087,grad_norm: 0.9999998989695588, iteration: 1163
loss: 2.8070461750030518,grad_norm: 0.9999999312222656, iteration: 1164
loss: 2.8267970085144043,grad_norm: 0.9999998394479463, iteration: 1165
loss: 2.807713270187378,grad_norm: 0.9999997775044771, iteration: 1166
loss: 2.5873935222625732,grad_norm: 0.9999998436529538, iteration: 1167
loss: 2.712116241455078,grad_norm: 0.9999999601270928, iteration: 1168
loss: 2.6153831481933594,grad_norm: 0.9999998937526285, iteration: 1169
loss: 2.745340347290039,grad_norm: 0.9999998922788867, iteration: 1170
loss: 2.7751288414001465,grad_norm: 0.9999998862141916, iteration: 1171
loss: 2.5550930500030518,grad_norm: 0.9999998488889282, iteration: 1172
loss: 2.8808252811431885,grad_norm: 0.9999999084665745, iteration: 1173
loss: 2.6115152835845947,grad_norm: 0.999999860113507, iteration: 1174
loss: 2.6349925994873047,grad_norm: 0.9999998889782056, iteration: 1175
loss: 2.633676767349243,grad_norm: 0.9999998593912504, iteration: 1176
loss: 2.8334767818450928,grad_norm: 0.9999998866303802, iteration: 1177
loss: 2.889695167541504,grad_norm: 0.9999999103728733, iteration: 1178
loss: 2.6062815189361572,grad_norm: 0.9999998949860349, iteration: 1179
loss: 2.584113359451294,grad_norm: 0.9999998825786075, iteration: 1180
loss: 2.7359628677368164,grad_norm: 0.9999999077612609, iteration: 1181
loss: 2.695906639099121,grad_norm: 0.9999999647881279, iteration: 1182
loss: 2.7401161193847656,grad_norm: 0.9999998662327282, iteration: 1183
loss: 2.6320571899414062,grad_norm: 0.9999998601117074, iteration: 1184
loss: 2.515392541885376,grad_norm: 0.9999998754444384, iteration: 1185
loss: 2.620373487472534,grad_norm: 0.9999998567885853, iteration: 1186
loss: 2.7522144317626953,grad_norm: 0.9999999318064112, iteration: 1187
loss: 2.6650524139404297,grad_norm: 0.9999998997266846, iteration: 1188
loss: 2.551079750061035,grad_norm: 0.9999999410839352, iteration: 1189
loss: 2.612457513809204,grad_norm: 0.999999853229252, iteration: 1190
loss: 2.5286576747894287,grad_norm: 0.9999998956681783, iteration: 1191
loss: 2.7566471099853516,grad_norm: 0.999999839102539, iteration: 1192
loss: 2.6713385581970215,grad_norm: 0.99999994088562, iteration: 1193
loss: 2.6124022006988525,grad_norm: 0.9999998388985251, iteration: 1194
loss: 2.694817543029785,grad_norm: 0.9999999131381042, iteration: 1195
loss: 2.62025785446167,grad_norm: 0.9999999165033063, iteration: 1196
loss: 2.646995782852173,grad_norm: 0.9999998710734137, iteration: 1197
loss: 2.6935677528381348,grad_norm: 0.9999997974493868, iteration: 1198
loss: 2.6641435623168945,grad_norm: 0.9999997995081699, iteration: 1199
loss: 2.531280755996704,grad_norm: 0.9999998782280579, iteration: 1200
loss: 2.62156081199646,grad_norm: 0.9999999246735474, iteration: 1201
loss: 2.654031991958618,grad_norm: 0.9999998754406231, iteration: 1202
loss: 2.524888515472412,grad_norm: 0.9999998071977788, iteration: 1203
loss: 2.6207547187805176,grad_norm: 0.9999998940793904, iteration: 1204
loss: 2.795944929122925,grad_norm: 0.9999998786492483, iteration: 1205
loss: 2.709869384765625,grad_norm: 0.9999998712985764, iteration: 1206
loss: 2.660897731781006,grad_norm: 0.9999998403403292, iteration: 1207
loss: 2.558532238006592,grad_norm: 0.9999998900585534, iteration: 1208
loss: 2.9046032428741455,grad_norm: 0.9999998561469197, iteration: 1209
loss: 2.417234420776367,grad_norm: 0.9999999023094878, iteration: 1210
loss: 2.713026285171509,grad_norm: 0.9999999162462962, iteration: 1211
loss: 2.7254326343536377,grad_norm: 0.9999998824549995, iteration: 1212
loss: 2.702877998352051,grad_norm: 0.9999998946305333, iteration: 1213
loss: 2.8785901069641113,grad_norm: 0.9999998998548452, iteration: 1214
loss: 2.5552706718444824,grad_norm: 0.9999998731176963, iteration: 1215
loss: 2.693881034851074,grad_norm: 0.9999998879756312, iteration: 1216
loss: 2.5903472900390625,grad_norm: 0.9999998615940155, iteration: 1217
loss: 2.7351789474487305,grad_norm: 0.9999998879229683, iteration: 1218
loss: 2.7924296855926514,grad_norm: 0.9999999484336768, iteration: 1219
loss: 2.767496109008789,grad_norm: 0.9999998898192197, iteration: 1220
loss: 2.702021598815918,grad_norm: 0.9999998833682794, iteration: 1221
loss: 2.593418598175049,grad_norm: 0.9999999741090501, iteration: 1222
loss: 2.590038537979126,grad_norm: 0.9999997949669241, iteration: 1223
loss: 2.6562910079956055,grad_norm: 0.9999999321371641, iteration: 1224
loss: 2.5058677196502686,grad_norm: 0.9999998718717893, iteration: 1225
loss: 2.6059083938598633,grad_norm: 0.9999999489136073, iteration: 1226
loss: 2.6242706775665283,grad_norm: 0.9999999065929984, iteration: 1227
loss: 2.6188738346099854,grad_norm: 0.9999998506051251, iteration: 1228
loss: 2.74824595451355,grad_norm: 0.9999999954244707, iteration: 1229
loss: 2.6659629344940186,grad_norm: 0.9999998317737137, iteration: 1230
loss: 2.6721713542938232,grad_norm: 0.9999998378773881, iteration: 1231
loss: 2.7604446411132812,grad_norm: 0.9999999410491108, iteration: 1232
loss: 2.699089288711548,grad_norm: 0.9999998938215352, iteration: 1233
loss: 2.600782632827759,grad_norm: 0.9999999683768691, iteration: 1234
loss: 2.653087854385376,grad_norm: 0.9999998536600119, iteration: 1235
loss: 2.5628578662872314,grad_norm: 0.9999999087621466, iteration: 1236
loss: 2.5411226749420166,grad_norm: 0.9999997931586551, iteration: 1237
loss: 2.5633132457733154,grad_norm: 0.9999998913534889, iteration: 1238
loss: 2.6445162296295166,grad_norm: 0.9999999165604271, iteration: 1239
loss: 2.6764001846313477,grad_norm: 0.9999998508837062, iteration: 1240
loss: 2.646345376968384,grad_norm: 0.9999999591738636, iteration: 1241
loss: 2.5777645111083984,grad_norm: 0.999999799157852, iteration: 1242
loss: 2.5683341026306152,grad_norm: 0.9999999157791076, iteration: 1243
loss: 2.457606792449951,grad_norm: 0.9999999299351988, iteration: 1244
loss: 2.5170700550079346,grad_norm: 0.999999919617086, iteration: 1245
loss: 2.7722134590148926,grad_norm: 0.9999998773525337, iteration: 1246
loss: 2.4398913383483887,grad_norm: 0.9999999500106116, iteration: 1247
loss: 2.630312204360962,grad_norm: 0.9999999774823587, iteration: 1248
loss: 2.310523748397827,grad_norm: 0.9999998520855865, iteration: 1249
loss: 2.622988224029541,grad_norm: 0.9999998666731287, iteration: 1250
loss: 2.624796152114868,grad_norm: 0.9999999136741363, iteration: 1251
loss: 2.5736963748931885,grad_norm: 0.9999999300868868, iteration: 1252
loss: 2.482637405395508,grad_norm: 0.9999998605200164, iteration: 1253
loss: 2.6454925537109375,grad_norm: 0.9999998882797255, iteration: 1254
loss: 2.683713912963867,grad_norm: 0.9999998493126787, iteration: 1255
loss: 2.5334033966064453,grad_norm: 0.9999998050304414, iteration: 1256
loss: 2.4707155227661133,grad_norm: 0.9999999282903291, iteration: 1257
loss: 2.4676218032836914,grad_norm: 0.999999910076905, iteration: 1258
loss: 2.526616096496582,grad_norm: 0.9999998430360666, iteration: 1259
loss: 2.6270763874053955,grad_norm: 0.9999998636440391, iteration: 1260
loss: 2.5198941230773926,grad_norm: 0.9999999123189243, iteration: 1261
loss: 2.5537619590759277,grad_norm: 0.9999999356454224, iteration: 1262
loss: 2.4421329498291016,grad_norm: 0.9999998521804926, iteration: 1263
loss: 2.435708999633789,grad_norm: 0.9999998441545707, iteration: 1264
loss: 2.515537977218628,grad_norm: 0.9999999108571258, iteration: 1265
loss: 2.6832406520843506,grad_norm: 0.9999999199149903, iteration: 1266
loss: 2.659903049468994,grad_norm: 0.9999998787039633, iteration: 1267
loss: 2.5171775817871094,grad_norm: 0.9999999175252489, iteration: 1268
loss: 2.577664613723755,grad_norm: 0.9999997941212369, iteration: 1269
loss: 2.443965435028076,grad_norm: 0.9999998383297634, iteration: 1270
loss: 2.442687749862671,grad_norm: 0.9999999483057341, iteration: 1271
loss: 2.520507574081421,grad_norm: 0.9999998994415199, iteration: 1272
loss: 2.6346328258514404,grad_norm: 0.9999998344793436, iteration: 1273
loss: 2.5772204399108887,grad_norm: 0.9999999434590926, iteration: 1274
loss: 2.490504741668701,grad_norm: 0.9999999848580939, iteration: 1275
loss: 2.472797155380249,grad_norm: 0.9999999310341677, iteration: 1276
loss: 2.650865316390991,grad_norm: 0.9999998847972245, iteration: 1277
loss: 2.6983864307403564,grad_norm: 0.9999999058951091, iteration: 1278
loss: 2.563885450363159,grad_norm: 0.999999906469398, iteration: 1279
loss: 2.5581037998199463,grad_norm: 0.9999999125399555, iteration: 1280
loss: 2.5629141330718994,grad_norm: 0.99999981949237, iteration: 1281
loss: 2.5048153400421143,grad_norm: 0.9999999324385135, iteration: 1282
loss: 2.4126956462860107,grad_norm: 0.9999998612035322, iteration: 1283
loss: 2.402703046798706,grad_norm: 0.999999850003234, iteration: 1284
loss: 2.7404937744140625,grad_norm: 0.9999998911786088, iteration: 1285
loss: 2.4839494228363037,grad_norm: 0.9999998480128728, iteration: 1286
loss: 2.5316197872161865,grad_norm: 0.9999998457112372, iteration: 1287
loss: 2.445307970046997,grad_norm: 0.9999998265868891, iteration: 1288
loss: 2.587559223175049,grad_norm: 0.9999997866233121, iteration: 1289
loss: 2.456190824508667,grad_norm: 0.9999998502961802, iteration: 1290
loss: 2.5643742084503174,grad_norm: 0.9999998758036419, iteration: 1291
loss: 2.643162727355957,grad_norm: 0.9999998372309417, iteration: 1292
loss: 2.375399351119995,grad_norm: 0.9999998649532772, iteration: 1293
loss: 2.6093947887420654,grad_norm: 0.9999998730531185, iteration: 1294
loss: 2.523480176925659,grad_norm: 0.999999918288753, iteration: 1295
loss: 2.4063727855682373,grad_norm: 0.9999999099846603, iteration: 1296
loss: 2.3258113861083984,grad_norm: 0.9999999110396174, iteration: 1297
loss: 2.57035493850708,grad_norm: 0.999999898305039, iteration: 1298
loss: 2.495859384536743,grad_norm: 0.9999999094526778, iteration: 1299
loss: 2.498493194580078,grad_norm: 0.9999998684733035, iteration: 1300
loss: 2.6044418811798096,grad_norm: 0.9999998881782817, iteration: 1301
loss: 2.485267400741577,grad_norm: 0.9999998971552669, iteration: 1302
loss: 2.33719801902771,grad_norm: 0.99999990601521, iteration: 1303
loss: 2.6433775424957275,grad_norm: 0.9999998949290897, iteration: 1304
loss: 2.635188579559326,grad_norm: 0.9999998217166203, iteration: 1305
loss: 2.449153423309326,grad_norm: 0.999999826834144, iteration: 1306
loss: 2.5495052337646484,grad_norm: 0.999999922599225, iteration: 1307
loss: 2.5774877071380615,grad_norm: 0.9999998586121885, iteration: 1308
loss: 2.628533124923706,grad_norm: 0.999999905737786, iteration: 1309
loss: 2.6090729236602783,grad_norm: 0.9999999211735192, iteration: 1310
loss: 2.57772159576416,grad_norm: 0.9999999484564128, iteration: 1311
loss: 2.4857778549194336,grad_norm: 0.9999998948453046, iteration: 1312
loss: 2.5829904079437256,grad_norm: 0.9999999163774939, iteration: 1313
loss: 2.5084939002990723,grad_norm: 0.9999998778110034, iteration: 1314
loss: 2.6063663959503174,grad_norm: 0.9999998216285667, iteration: 1315
loss: 2.6721651554107666,grad_norm: 0.9999998557084779, iteration: 1316
loss: 2.5836739540100098,grad_norm: 0.999999957969305, iteration: 1317
loss: 2.522820234298706,grad_norm: 0.9999998259846428, iteration: 1318
loss: 2.5877480506896973,grad_norm: 0.999999900605852, iteration: 1319
loss: 2.5068886280059814,grad_norm: 0.9999998950136623, iteration: 1320
loss: 2.642918586730957,grad_norm: 0.9999997913456747, iteration: 1321
loss: 2.6316304206848145,grad_norm: 0.9999999151133492, iteration: 1322
loss: 2.508293867111206,grad_norm: 0.9999998896398679, iteration: 1323
loss: 2.6501262187957764,grad_norm: 0.9999999464955682, iteration: 1324
loss: 2.347825288772583,grad_norm: 0.9999997989755004, iteration: 1325
loss: 2.5697057247161865,grad_norm: 0.9999999812820299, iteration: 1326
loss: 2.454364061355591,grad_norm: 0.9999998104501063, iteration: 1327
loss: 2.463815212249756,grad_norm: 0.9999998195783981, iteration: 1328
loss: 2.4951064586639404,grad_norm: 0.9999998954558678, iteration: 1329
loss: 2.493015766143799,grad_norm: 0.9999999261333875, iteration: 1330
loss: 2.523930311203003,grad_norm: 0.9999997825295684, iteration: 1331
loss: 2.4919204711914062,grad_norm: 0.999999871055745, iteration: 1332
loss: 2.5490128993988037,grad_norm: 0.9999998850741293, iteration: 1333
loss: 2.372826337814331,grad_norm: 0.999999948500231, iteration: 1334
loss: 2.6024534702301025,grad_norm: 0.9999998039994148, iteration: 1335
loss: 2.4460813999176025,grad_norm: 0.9999998405860795, iteration: 1336
loss: 2.334911346435547,grad_norm: 0.9999997976017805, iteration: 1337
loss: 2.60815167427063,grad_norm: 0.9999998701913357, iteration: 1338
loss: 2.524026393890381,grad_norm: 0.9999998986019657, iteration: 1339
loss: 2.4624149799346924,grad_norm: 0.9999999122518258, iteration: 1340
loss: 2.492013931274414,grad_norm: 0.9999998694111849, iteration: 1341
loss: 2.574821710586548,grad_norm: 0.9999999319657004, iteration: 1342
loss: 2.585660696029663,grad_norm: 0.9999998281870579, iteration: 1343
loss: 2.533374071121216,grad_norm: 0.9999999416748964, iteration: 1344
loss: 2.4813380241394043,grad_norm: 0.9999998608912058, iteration: 1345
loss: 2.451756000518799,grad_norm: 0.9999998103378371, iteration: 1346
loss: 2.5889084339141846,grad_norm: 0.9999998600158858, iteration: 1347
loss: 2.359785556793213,grad_norm: 0.9999998678015529, iteration: 1348
loss: 2.5047011375427246,grad_norm: 0.9999999231755469, iteration: 1349
loss: 2.371821403503418,grad_norm: 0.999999923580574, iteration: 1350
loss: 2.5341811180114746,grad_norm: 0.9999998366010928, iteration: 1351
loss: 2.438173532485962,grad_norm: 0.9999998736368615, iteration: 1352
loss: 2.4431650638580322,grad_norm: 0.9999998499721557, iteration: 1353
loss: 2.530766487121582,grad_norm: 0.9999999302906499, iteration: 1354
loss: 2.6594858169555664,grad_norm: 0.9999997989667659, iteration: 1355
loss: 2.3894708156585693,grad_norm: 0.999999894283774, iteration: 1356
loss: 2.6774885654449463,grad_norm: 0.9999998493960566, iteration: 1357
loss: 2.4851417541503906,grad_norm: 0.9999999062747298, iteration: 1358
loss: 2.4127304553985596,grad_norm: 0.9999999181235211, iteration: 1359
loss: 2.558070659637451,grad_norm: 0.9999998343433837, iteration: 1360
loss: 2.4681646823883057,grad_norm: 0.9999998291943445, iteration: 1361
loss: 2.4756791591644287,grad_norm: 0.9999999512779917, iteration: 1362
loss: 2.50281023979187,grad_norm: 0.9999998078634434, iteration: 1363
loss: 2.610929250717163,grad_norm: 0.999999901299646, iteration: 1364
loss: 2.4100492000579834,grad_norm: 0.9999998981937954, iteration: 1365
loss: 2.3081116676330566,grad_norm: 0.99999985309397, iteration: 1366
loss: 2.595808982849121,grad_norm: 0.9999999080082962, iteration: 1367
loss: 2.5050883293151855,grad_norm: 0.9999998577850265, iteration: 1368
loss: 2.500074863433838,grad_norm: 0.9999999034733393, iteration: 1369
loss: 2.4285659790039062,grad_norm: 0.9999998549097074, iteration: 1370
loss: 2.412647247314453,grad_norm: 0.9999998920453123, iteration: 1371
loss: 2.4908504486083984,grad_norm: 0.9999998566838425, iteration: 1372
loss: 2.2377989292144775,grad_norm: 0.9999998447085919, iteration: 1373
loss: 2.463850498199463,grad_norm: 0.9999998955148077, iteration: 1374
loss: 2.2519383430480957,grad_norm: 0.999999863090797, iteration: 1375
loss: 2.4905335903167725,grad_norm: 0.9999999379146729, iteration: 1376
loss: 2.515613555908203,grad_norm: 0.9999998950138103, iteration: 1377
loss: 2.597348213195801,grad_norm: 0.9999999121252796, iteration: 1378
loss: 2.3919973373413086,grad_norm: 0.9999999000167656, iteration: 1379
loss: 2.612586259841919,grad_norm: 0.9999999010175245, iteration: 1380
loss: 2.3989107608795166,grad_norm: 0.9999999484114885, iteration: 1381
loss: 2.401130437850952,grad_norm: 0.9999998599724813, iteration: 1382
loss: 2.2474324703216553,grad_norm: 0.9999998205192028, iteration: 1383
loss: 2.561819076538086,grad_norm: 0.9999998933834094, iteration: 1384
loss: 2.2169461250305176,grad_norm: 0.9999997968433243, iteration: 1385
loss: 2.485205888748169,grad_norm: 0.9999998294166015, iteration: 1386
loss: 2.376802921295166,grad_norm: 0.9999998967404424, iteration: 1387
loss: 2.563328742980957,grad_norm: 0.9999999081724672, iteration: 1388
loss: 2.444990396499634,grad_norm: 0.9999999376810875, iteration: 1389
loss: 2.443397045135498,grad_norm: 0.9999999241961214, iteration: 1390
loss: 2.422833204269409,grad_norm: 0.9999999265615276, iteration: 1391
loss: 2.462780237197876,grad_norm: 0.9999998709569317, iteration: 1392
loss: 2.3815038204193115,grad_norm: 0.9999998739477021, iteration: 1393
loss: 2.5224664211273193,grad_norm: 0.9999998361223591, iteration: 1394
loss: 2.3816752433776855,grad_norm: 0.9999998734496856, iteration: 1395
loss: 2.3731648921966553,grad_norm: 0.9999999025215932, iteration: 1396
loss: 2.391446590423584,grad_norm: 0.9999999064147682, iteration: 1397
loss: 2.482081890106201,grad_norm: 0.9999999559786663, iteration: 1398
loss: 2.3655636310577393,grad_norm: 0.9999999071501425, iteration: 1399
loss: 2.5266811847686768,grad_norm: 0.9999999129593429, iteration: 1400
loss: 2.397120952606201,grad_norm: 0.9999998829256229, iteration: 1401
loss: 2.4425296783447266,grad_norm: 0.9999998202291943, iteration: 1402
loss: 2.5197882652282715,grad_norm: 0.9999998397933194, iteration: 1403
loss: 2.3912253379821777,grad_norm: 0.9999998955170869, iteration: 1404
loss: 2.467867851257324,grad_norm: 0.9999999655942453, iteration: 1405
loss: 2.418707847595215,grad_norm: 0.9999998658547004, iteration: 1406
loss: 2.3290112018585205,grad_norm: 0.9999998894186528, iteration: 1407
loss: 2.4073286056518555,grad_norm: 0.9999998654828082, iteration: 1408
loss: 2.391481637954712,grad_norm: 0.9999998619846556, iteration: 1409
loss: 2.4131877422332764,grad_norm: 0.9999998207089966, iteration: 1410
loss: 2.4846575260162354,grad_norm: 0.9999998983327351, iteration: 1411
loss: 2.3413197994232178,grad_norm: 0.9999998883888663, iteration: 1412
loss: 2.523895263671875,grad_norm: 0.9999998901423435, iteration: 1413
loss: 2.4937050342559814,grad_norm: 0.9999998238480239, iteration: 1414
loss: 2.526196002960205,grad_norm: 0.9999998877338341, iteration: 1415
loss: 2.3544723987579346,grad_norm: 0.999999907862353, iteration: 1416
loss: 2.443056583404541,grad_norm: 0.9999998834404655, iteration: 1417
loss: 2.5424392223358154,grad_norm: 0.9999999428404575, iteration: 1418
loss: 2.489652156829834,grad_norm: 0.9999998290702367, iteration: 1419
loss: 2.4602129459381104,grad_norm: 0.9999999129027318, iteration: 1420
loss: 2.221170663833618,grad_norm: 0.9999999508262953, iteration: 1421
loss: 2.454833507537842,grad_norm: 0.9999998483415588, iteration: 1422
loss: 2.5115714073181152,grad_norm: 0.9999999154648557, iteration: 1423
loss: 2.1834051609039307,grad_norm: 0.9999999312409407, iteration: 1424
loss: 2.2578158378601074,grad_norm: 0.9999999126637187, iteration: 1425
loss: 2.539858818054199,grad_norm: 0.9999998792776877, iteration: 1426
loss: 2.3652119636535645,grad_norm: 0.9999998480427607, iteration: 1427
loss: 2.3985488414764404,grad_norm: 0.999999866178838, iteration: 1428
loss: 2.4962682723999023,grad_norm: 0.9999998829690839, iteration: 1429
loss: 2.4664268493652344,grad_norm: 0.9999997865828364, iteration: 1430
loss: 2.3703153133392334,grad_norm: 0.9999999176514737, iteration: 1431
loss: 2.357591152191162,grad_norm: 0.9999998156430727, iteration: 1432
loss: 2.3701629638671875,grad_norm: 0.9999997893766144, iteration: 1433
loss: 2.4121930599212646,grad_norm: 0.999999860052477, iteration: 1434
loss: 2.4485161304473877,grad_norm: 0.9999998395976395, iteration: 1435
loss: 2.2370221614837646,grad_norm: 0.9999998202224522, iteration: 1436
loss: 2.3757333755493164,grad_norm: 0.9999999249805085, iteration: 1437
loss: 2.297497034072876,grad_norm: 0.9999998672177528, iteration: 1438
loss: 2.356356620788574,grad_norm: 0.9999998809131753, iteration: 1439
loss: 2.6216843128204346,grad_norm: 0.9999998360456256, iteration: 1440
loss: 2.302705764770508,grad_norm: 0.9999999334655397, iteration: 1441
loss: 2.2771739959716797,grad_norm: 0.9999998459244067, iteration: 1442
loss: 2.3419547080993652,grad_norm: 0.9999998926415004, iteration: 1443
loss: 2.1890971660614014,grad_norm: 0.9999998293339452, iteration: 1444
loss: 2.4928486347198486,grad_norm: 0.9999998615630752, iteration: 1445
loss: 2.5454628467559814,grad_norm: 0.9999999144363488, iteration: 1446
loss: 2.4915027618408203,grad_norm: 0.9999998507398753, iteration: 1447
loss: 2.4420900344848633,grad_norm: 0.9999999582502794, iteration: 1448
loss: 2.421152114868164,grad_norm: 0.999999894974569, iteration: 1449
loss: 2.3829009532928467,grad_norm: 0.9999999176104422, iteration: 1450
loss: 2.3124477863311768,grad_norm: 0.9999998367350302, iteration: 1451
loss: 2.393430709838867,grad_norm: 0.9999998185353061, iteration: 1452
loss: 2.1766953468322754,grad_norm: 0.9999998237170357, iteration: 1453
loss: 2.3456149101257324,grad_norm: 0.9999998420385322, iteration: 1454
loss: 2.384521484375,grad_norm: 0.999999810642281, iteration: 1455
loss: 2.3633575439453125,grad_norm: 0.9999998635652168, iteration: 1456
loss: 2.2386269569396973,grad_norm: 0.9999999069995629, iteration: 1457
loss: 2.4238061904907227,grad_norm: 0.9999998369621556, iteration: 1458
loss: 2.4124550819396973,grad_norm: 0.999999921330953, iteration: 1459
loss: 2.4919722080230713,grad_norm: 0.9999998968713305, iteration: 1460
loss: 2.564798355102539,grad_norm: 0.9999998790972268, iteration: 1461
loss: 2.29107666015625,grad_norm: 0.9999999065634773, iteration: 1462
loss: 2.53838849067688,grad_norm: 0.9999999531512821, iteration: 1463
loss: 2.3901500701904297,grad_norm: 0.9999998567411205, iteration: 1464
loss: 2.330591917037964,grad_norm: 0.9999999087848163, iteration: 1465
loss: 2.3039157390594482,grad_norm: 0.999999941575457, iteration: 1466
loss: 2.1898162364959717,grad_norm: 0.9999999463305351, iteration: 1467
loss: 2.344388484954834,grad_norm: 0.9999998402211948, iteration: 1468
loss: 2.2789998054504395,grad_norm: 0.999999881850966, iteration: 1469
loss: 2.369417428970337,grad_norm: 0.999999923773473, iteration: 1470
loss: 2.4116177558898926,grad_norm: 0.9999998170445471, iteration: 1471
loss: 2.3921682834625244,grad_norm: 0.9999999317062217, iteration: 1472
loss: 2.3258893489837646,grad_norm: 0.9999999567508103, iteration: 1473
loss: 2.3872010707855225,grad_norm: 0.9999998555107837, iteration: 1474
loss: 2.3935775756835938,grad_norm: 0.999999932734721, iteration: 1475
loss: 2.418487787246704,grad_norm: 0.9999998247054206, iteration: 1476
loss: 2.35601806640625,grad_norm: 0.9999998353154772, iteration: 1477
loss: 2.294013023376465,grad_norm: 0.9999998549955232, iteration: 1478
loss: 2.474400281906128,grad_norm: 0.9999999319313436, iteration: 1479
loss: 2.254878520965576,grad_norm: 0.9999999149096707, iteration: 1480
loss: 2.3884921073913574,grad_norm: 0.9999998124393388, iteration: 1481
loss: 2.2828633785247803,grad_norm: 0.9999998725818721, iteration: 1482
loss: 2.357421636581421,grad_norm: 0.9999999151825425, iteration: 1483
loss: 2.5214791297912598,grad_norm: 0.9999998027403607, iteration: 1484
loss: 2.280303478240967,grad_norm: 0.9999998589564167, iteration: 1485
loss: 2.3106327056884766,grad_norm: 0.9999998438360296, iteration: 1486
loss: 2.43562650680542,grad_norm: 0.9999998931945653, iteration: 1487
loss: 2.304931879043579,grad_norm: 0.9999999144331972, iteration: 1488
loss: 2.2884833812713623,grad_norm: 0.9999998508153737, iteration: 1489
loss: 2.453373908996582,grad_norm: 0.9999998155873089, iteration: 1490
loss: 2.5185203552246094,grad_norm: 0.999999785831375, iteration: 1491
loss: 2.336608409881592,grad_norm: 0.9999998427511315, iteration: 1492
loss: 2.3179221153259277,grad_norm: 0.999999917303956, iteration: 1493
loss: 2.3889920711517334,grad_norm: 0.9999998715864873, iteration: 1494
loss: 2.4157254695892334,grad_norm: 0.9999998593357547, iteration: 1495
loss: 2.3358542919158936,grad_norm: 0.9999999012021114, iteration: 1496
loss: 2.3230459690093994,grad_norm: 0.9999998295283942, iteration: 1497
loss: 2.3105859756469727,grad_norm: 0.9999998702555739, iteration: 1498
loss: 2.285270929336548,grad_norm: 0.9999998358739005, iteration: 1499
loss: 2.321458101272583,grad_norm: 0.9999999008928494, iteration: 1500
loss: 2.44154953956604,grad_norm: 0.9999998472972199, iteration: 1501
loss: 2.3995790481567383,grad_norm: 0.9999997842746644, iteration: 1502
loss: 2.3669257164001465,grad_norm: 0.9999998695146449, iteration: 1503
loss: 2.4395387172698975,grad_norm: 0.9999998787077311, iteration: 1504
loss: 2.373922824859619,grad_norm: 0.9999998351720637, iteration: 1505
loss: 2.4079694747924805,grad_norm: 0.999999824471909, iteration: 1506
loss: 2.412444591522217,grad_norm: 0.9999998061127425, iteration: 1507
loss: 2.336531639099121,grad_norm: 0.9999999080300096, iteration: 1508
loss: 2.3504483699798584,grad_norm: 0.9999999491456403, iteration: 1509
loss: 2.4067914485931396,grad_norm: 0.9999998511014171, iteration: 1510
loss: 2.272082805633545,grad_norm: 0.9999999492680735, iteration: 1511
loss: 2.276573419570923,grad_norm: 0.99999986989731, iteration: 1512
loss: 2.33778715133667,grad_norm: 0.9999998103006008, iteration: 1513
loss: 2.3401362895965576,grad_norm: 0.9999999073104477, iteration: 1514
loss: 2.4143950939178467,grad_norm: 0.9999998156149302, iteration: 1515
loss: 2.196807622909546,grad_norm: 0.9999998898816487, iteration: 1516
loss: 2.234265089035034,grad_norm: 0.9999998789008272, iteration: 1517
loss: 2.34995698928833,grad_norm: 0.9999998937666956, iteration: 1518
loss: 2.324697494506836,grad_norm: 0.9999998379768998, iteration: 1519
loss: 2.2804477214813232,grad_norm: 0.9999998764049033, iteration: 1520
loss: 2.3891472816467285,grad_norm: 0.9999998308992049, iteration: 1521
loss: 2.3072729110717773,grad_norm: 0.9999998793133358, iteration: 1522
loss: 2.417158842086792,grad_norm: 0.9999998922324073, iteration: 1523
loss: 2.4223804473876953,grad_norm: 0.9999998120347855, iteration: 1524
loss: 2.368037462234497,grad_norm: 0.9999999044160142, iteration: 1525
loss: 2.353760004043579,grad_norm: 0.9999998603353238, iteration: 1526
loss: 2.409836769104004,grad_norm: 0.9999998031437775, iteration: 1527
loss: 2.4819562435150146,grad_norm: 0.9999999153481973, iteration: 1528
loss: 2.279747247695923,grad_norm: 0.9999999264317475, iteration: 1529
loss: 2.3858678340911865,grad_norm: 0.9999998656547131, iteration: 1530
loss: 2.398499011993408,grad_norm: 0.9999998574017205, iteration: 1531
loss: 2.281423330307007,grad_norm: 0.9999998243421723, iteration: 1532
loss: 2.2775626182556152,grad_norm: 0.9999998189587451, iteration: 1533
loss: 2.286029815673828,grad_norm: 0.9999998458714704, iteration: 1534
loss: 2.301500082015991,grad_norm: 0.9999998300206344, iteration: 1535
loss: 2.2484865188598633,grad_norm: 0.9999998758703323, iteration: 1536
loss: 2.347033739089966,grad_norm: 0.9999998610160642, iteration: 1537
loss: 2.4284183979034424,grad_norm: 0.9999999557208287, iteration: 1538
loss: 2.2242271900177,grad_norm: 0.999999867471087, iteration: 1539
loss: 2.219583034515381,grad_norm: 0.999999950744055, iteration: 1540
loss: 2.2402405738830566,grad_norm: 0.9999999007844337, iteration: 1541
loss: 2.2331838607788086,grad_norm: 0.9999999660611615, iteration: 1542
loss: 2.303309440612793,grad_norm: 0.9999998360619585, iteration: 1543
loss: 2.186038017272949,grad_norm: 0.9999998635864885, iteration: 1544
loss: 2.2574784755706787,grad_norm: 0.9999998000358414, iteration: 1545
loss: 2.227294445037842,grad_norm: 0.999999914379929, iteration: 1546
loss: 2.3783559799194336,grad_norm: 0.9999998552275022, iteration: 1547
loss: 2.310624837875366,grad_norm: 0.9999999199420475, iteration: 1548
loss: 2.2091457843780518,grad_norm: 0.9999998678603107, iteration: 1549
loss: 2.4931626319885254,grad_norm: 0.9999998978179044, iteration: 1550
loss: 2.3964145183563232,grad_norm: 0.9999998796584666, iteration: 1551
loss: 2.4081010818481445,grad_norm: 0.9999998298383876, iteration: 1552
loss: 2.329867124557495,grad_norm: 0.9999998914440383, iteration: 1553
loss: 2.235954761505127,grad_norm: 0.9999997706668053, iteration: 1554
loss: 2.3099873065948486,grad_norm: 0.9999999110074294, iteration: 1555
loss: 2.191541910171509,grad_norm: 0.9999999037698919, iteration: 1556
loss: 2.2993173599243164,grad_norm: 0.9999997778227223, iteration: 1557
loss: 2.207258939743042,grad_norm: 0.9999999120844962, iteration: 1558
loss: 2.3470547199249268,grad_norm: 0.9999997806628684, iteration: 1559
loss: 2.3326497077941895,grad_norm: 0.9999998409625104, iteration: 1560
loss: 2.324812889099121,grad_norm: 0.9999999148944734, iteration: 1561
loss: 2.383594512939453,grad_norm: 0.9999997942801869, iteration: 1562
loss: 2.3370065689086914,grad_norm: 0.9999998320238264, iteration: 1563
loss: 2.385758638381958,grad_norm: 0.9999999088487663, iteration: 1564
loss: 2.2298920154571533,grad_norm: 0.9999998796251272, iteration: 1565
loss: 2.1823155879974365,grad_norm: 0.9999998652458696, iteration: 1566
loss: 2.257110357284546,grad_norm: 0.9999998272877165, iteration: 1567
loss: 2.2796361446380615,grad_norm: 0.9999998199320986, iteration: 1568
loss: 2.373903751373291,grad_norm: 0.999999847595049, iteration: 1569
loss: 2.272071361541748,grad_norm: 0.9999999197773219, iteration: 1570
loss: 2.1310551166534424,grad_norm: 0.9999998356696087, iteration: 1571
loss: 2.3118574619293213,grad_norm: 0.9999998205350957, iteration: 1572
loss: 2.457163095474243,grad_norm: 0.9999997890029328, iteration: 1573
loss: 2.381985902786255,grad_norm: 0.9999998595589664, iteration: 1574
loss: 2.33583664894104,grad_norm: 0.9999998890154621, iteration: 1575
loss: 2.221224308013916,grad_norm: 0.9999998910018236, iteration: 1576
loss: 2.165469169616699,grad_norm: 0.9999998514413219, iteration: 1577
loss: 2.2786717414855957,grad_norm: 0.9999999060932956, iteration: 1578
loss: 2.3616771697998047,grad_norm: 0.9999998506747846, iteration: 1579
loss: 2.2784690856933594,grad_norm: 0.9999998773603557, iteration: 1580
loss: 2.2230865955352783,grad_norm: 0.9999997998380753, iteration: 1581
loss: 2.238823890686035,grad_norm: 0.9999998479115195, iteration: 1582
loss: 2.3676109313964844,grad_norm: 0.9999999091721788, iteration: 1583
loss: 2.375234365463257,grad_norm: 0.9999998450927619, iteration: 1584
loss: 2.3545103073120117,grad_norm: 0.9999998309691427, iteration: 1585
loss: 2.2016048431396484,grad_norm: 0.9999999477305662, iteration: 1586
loss: 2.345324754714966,grad_norm: 0.9999998664910172, iteration: 1587
loss: 2.428179979324341,grad_norm: 0.999999903101797, iteration: 1588
loss: 2.305447816848755,grad_norm: 0.9999998734337022, iteration: 1589
loss: 2.2901220321655273,grad_norm: 0.9999999320221142, iteration: 1590
loss: 2.279743194580078,grad_norm: 0.999999834309232, iteration: 1591
loss: 2.226485013961792,grad_norm: 0.9999999357915501, iteration: 1592
loss: 2.3333582878112793,grad_norm: 0.9999998572676974, iteration: 1593
loss: 2.3761813640594482,grad_norm: 0.9999998750986551, iteration: 1594
loss: 2.2781982421875,grad_norm: 0.9999999011600351, iteration: 1595
loss: 2.278682231903076,grad_norm: 0.9999999517843043, iteration: 1596
loss: 2.3416380882263184,grad_norm: 0.9999998497021428, iteration: 1597
loss: 2.4062492847442627,grad_norm: 0.9999998744240118, iteration: 1598
loss: 2.157985210418701,grad_norm: 0.9999997926840248, iteration: 1599
loss: 2.2868189811706543,grad_norm: 0.9999998615171609, iteration: 1600
loss: 2.2024776935577393,grad_norm: 0.9999998372429458, iteration: 1601
loss: 2.313997268676758,grad_norm: 0.9999998588783403, iteration: 1602
loss: 2.2023043632507324,grad_norm: 0.9999998643940659, iteration: 1603
loss: 2.3730218410491943,grad_norm: 0.9999998767283508, iteration: 1604
loss: 2.2571752071380615,grad_norm: 0.9999997928026387, iteration: 1605
loss: 2.283906936645508,grad_norm: 0.9999998565083877, iteration: 1606
loss: 2.3224897384643555,grad_norm: 0.9999999124651816, iteration: 1607
loss: 2.2441368103027344,grad_norm: 0.9999998992993154, iteration: 1608
loss: 2.4182755947113037,grad_norm: 0.9999998790898982, iteration: 1609
loss: 2.2578048706054688,grad_norm: 0.9999998528379481, iteration: 1610
loss: 2.475808620452881,grad_norm: 0.9999998157558433, iteration: 1611
loss: 2.2765884399414062,grad_norm: 0.9999998210245865, iteration: 1612
loss: 2.228740692138672,grad_norm: 0.9999998806220158, iteration: 1613
loss: 2.1656177043914795,grad_norm: 0.9999998829227447, iteration: 1614
loss: 2.4202089309692383,grad_norm: 0.9999998913632089, iteration: 1615
loss: 2.256598711013794,grad_norm: 0.9999998880896706, iteration: 1616
loss: 2.2189881801605225,grad_norm: 0.9999998597033851, iteration: 1617
loss: 2.370415210723877,grad_norm: 0.9999998552139898, iteration: 1618
loss: 2.082810163497925,grad_norm: 0.9999999647533833, iteration: 1619
loss: 2.248298406600952,grad_norm: 0.9999998862561422, iteration: 1620
loss: 2.3394041061401367,grad_norm: 0.9999999067104737, iteration: 1621
loss: 2.311293125152588,grad_norm: 0.9999998229955654, iteration: 1622
loss: 2.184382438659668,grad_norm: 0.9999998917885683, iteration: 1623
loss: 2.2799296379089355,grad_norm: 0.9999998651284041, iteration: 1624
loss: 2.178126096725464,grad_norm: 0.9999998962274911, iteration: 1625
loss: 2.211416721343994,grad_norm: 0.9999998901761123, iteration: 1626
loss: 2.2223966121673584,grad_norm: 0.9999998587260989, iteration: 1627
loss: 2.31141996383667,grad_norm: 0.9999997917959208, iteration: 1628
loss: 2.193704843521118,grad_norm: 0.9999998651048753, iteration: 1629
loss: 2.0890297889709473,grad_norm: 0.9999999151442521, iteration: 1630
loss: 2.314441442489624,grad_norm: 0.9999998564658965, iteration: 1631
loss: 2.308576822280884,grad_norm: 0.9999998374939609, iteration: 1632
loss: 2.321057081222534,grad_norm: 0.9999999409208216, iteration: 1633
loss: 2.2640421390533447,grad_norm: 0.9999998946826406, iteration: 1634
loss: 2.3189749717712402,grad_norm: 0.9999998761646255, iteration: 1635
loss: 2.282402753829956,grad_norm: 0.9999998047628994, iteration: 1636
loss: 2.3540825843811035,grad_norm: 0.9999998879233701, iteration: 1637
loss: 2.213534355163574,grad_norm: 0.999999875035822, iteration: 1638
loss: 2.2332324981689453,grad_norm: 0.9999998563929299, iteration: 1639
loss: 2.206176519393921,grad_norm: 0.9999998273215357, iteration: 1640
loss: 2.267698287963867,grad_norm: 0.9999998706882208, iteration: 1641
loss: 2.273667335510254,grad_norm: 0.9999998931276669, iteration: 1642
loss: 2.195729970932007,grad_norm: 0.9999997657107982, iteration: 1643
loss: 2.2975428104400635,grad_norm: 0.9999997894211107, iteration: 1644
loss: 2.368604898452759,grad_norm: 0.9999998905349464, iteration: 1645
loss: 2.3403971195220947,grad_norm: 0.9999998409567162, iteration: 1646
loss: 2.1838276386260986,grad_norm: 0.9999998761396018, iteration: 1647
loss: 2.2743921279907227,grad_norm: 0.9999998383179339, iteration: 1648
loss: 2.212693214416504,grad_norm: 0.999999867288092, iteration: 1649
loss: 2.1455538272857666,grad_norm: 0.9999998065432488, iteration: 1650
loss: 2.1413867473602295,grad_norm: 0.9999998408697985, iteration: 1651
loss: 2.332606077194214,grad_norm: 0.9999999488488144, iteration: 1652
loss: 2.268986940383911,grad_norm: 0.999999917892054, iteration: 1653
loss: 2.189974069595337,grad_norm: 0.9999998534635469, iteration: 1654
loss: 2.3188276290893555,grad_norm: 0.9999998234375892, iteration: 1655
loss: 2.3063647747039795,grad_norm: 0.9999998669122554, iteration: 1656
loss: 2.2152626514434814,grad_norm: 0.9999998018814603, iteration: 1657
loss: 2.310640335083008,grad_norm: 0.9999997626469879, iteration: 1658
loss: 2.1360702514648438,grad_norm: 0.9999998938757412, iteration: 1659
loss: 2.1308658123016357,grad_norm: 0.9999998488127929, iteration: 1660
loss: 2.1681294441223145,grad_norm: 0.9999997572928133, iteration: 1661
loss: 2.1302616596221924,grad_norm: 0.9999998172046584, iteration: 1662
loss: 2.1500048637390137,grad_norm: 0.9999998270407566, iteration: 1663
loss: 2.1957497596740723,grad_norm: 0.9999998617436268, iteration: 1664
loss: 2.1005594730377197,grad_norm: 0.9999998096129764, iteration: 1665
loss: 2.271591901779175,grad_norm: 0.9999999096525961, iteration: 1666
loss: 2.2096920013427734,grad_norm: 0.9999998184720374, iteration: 1667
loss: 2.2516965866088867,grad_norm: 0.9999998093822121, iteration: 1668
loss: 2.1538586616516113,grad_norm: 0.9999998847444102, iteration: 1669
loss: 2.2269885540008545,grad_norm: 0.9999999253360525, iteration: 1670
loss: 2.318798303604126,grad_norm: 0.9999998583519225, iteration: 1671
loss: 2.2843549251556396,grad_norm: 0.9999998577465791, iteration: 1672
loss: 2.3064446449279785,grad_norm: 0.9999998108744914, iteration: 1673
loss: 2.3123555183410645,grad_norm: 0.9999997934208245, iteration: 1674
loss: 2.054351568222046,grad_norm: 0.9999998912271144, iteration: 1675
loss: 2.2736473083496094,grad_norm: 0.9999997963898084, iteration: 1676
loss: 2.126289129257202,grad_norm: 0.9999998181573817, iteration: 1677
loss: 2.162437677383423,grad_norm: 0.9999997886078822, iteration: 1678
loss: 2.2722322940826416,grad_norm: 0.9999998376675172, iteration: 1679
loss: 2.2166905403137207,grad_norm: 0.9999998941521665, iteration: 1680
loss: 2.3583199977874756,grad_norm: 0.9999998577402248, iteration: 1681
loss: 2.2951831817626953,grad_norm: 0.9999998852406661, iteration: 1682
loss: 2.344926595687866,grad_norm: 0.9999998920648104, iteration: 1683
loss: 2.2327475547790527,grad_norm: 0.9999998239810788, iteration: 1684
loss: 2.3135929107666016,grad_norm: 0.9999999412724012, iteration: 1685
loss: 2.287379026412964,grad_norm: 0.9999999019104536, iteration: 1686
loss: 2.19387149810791,grad_norm: 0.9999998302302454, iteration: 1687
loss: 2.3381617069244385,grad_norm: 0.999999929909498, iteration: 1688
loss: 2.3862197399139404,grad_norm: 0.9999998626270047, iteration: 1689
loss: 2.2868616580963135,grad_norm: 0.9999998361679914, iteration: 1690
loss: 2.1793878078460693,grad_norm: 0.9999998664057805, iteration: 1691
loss: 2.3049468994140625,grad_norm: 0.9999999065182932, iteration: 1692
loss: 2.3801841735839844,grad_norm: 0.9999998382569328, iteration: 1693
loss: 2.1730453968048096,grad_norm: 0.9999998382434514, iteration: 1694
loss: 2.169844150543213,grad_norm: 0.9999998422947538, iteration: 1695
loss: 2.2211577892303467,grad_norm: 0.9999998649626318, iteration: 1696
loss: 2.1274349689483643,grad_norm: 0.999999846774821, iteration: 1697
loss: 2.1247029304504395,grad_norm: 0.9999999095604412, iteration: 1698
loss: 2.1948087215423584,grad_norm: 0.9999998478815786, iteration: 1699
loss: 2.2308387756347656,grad_norm: 0.9999998448409639, iteration: 1700
loss: 2.3213837146759033,grad_norm: 0.9999998386618987, iteration: 1701
loss: 2.3524153232574463,grad_norm: 0.9999998821034843, iteration: 1702
loss: 2.175215244293213,grad_norm: 0.9999998457815844, iteration: 1703
loss: 2.128417730331421,grad_norm: 0.9999999587365258, iteration: 1704
loss: 2.384007692337036,grad_norm: 0.9999998811999665, iteration: 1705
loss: 2.1942331790924072,grad_norm: 0.9999998854690894, iteration: 1706
loss: 2.3216474056243896,grad_norm: 0.9999998249675818, iteration: 1707
loss: 2.1838083267211914,grad_norm: 0.9999997624708316, iteration: 1708
loss: 2.151111125946045,grad_norm: 0.9999998397739638, iteration: 1709
loss: 2.2263383865356445,grad_norm: 0.9999998008916285, iteration: 1710
loss: 2.253788471221924,grad_norm: 0.9999998990413163, iteration: 1711
loss: 2.2569379806518555,grad_norm: 0.9999999643566979, iteration: 1712
loss: 2.2838730812072754,grad_norm: 0.9999997900011358, iteration: 1713
loss: 2.274832248687744,grad_norm: 0.9999998965212158, iteration: 1714
loss: 2.137951374053955,grad_norm: 0.9999998472990668, iteration: 1715
loss: 2.2072160243988037,grad_norm: 0.9999998456700802, iteration: 1716
loss: 2.4078166484832764,grad_norm: 0.9999999239573998, iteration: 1717
loss: 2.30218768119812,grad_norm: 0.9999998401476724, iteration: 1718
loss: 2.1044390201568604,grad_norm: 0.9999998525315849, iteration: 1719
loss: 2.228179693222046,grad_norm: 0.9999998984738286, iteration: 1720
loss: 2.3889148235321045,grad_norm: 0.9999998099563792, iteration: 1721
loss: 2.350365400314331,grad_norm: 0.9999999133601251, iteration: 1722
loss: 2.130833387374878,grad_norm: 0.9999998221132813, iteration: 1723
loss: 2.1940016746520996,grad_norm: 0.9999999473724533, iteration: 1724
loss: 2.1715400218963623,grad_norm: 0.9999999412086928, iteration: 1725
loss: 2.26401948928833,grad_norm: 0.999999865858711, iteration: 1726
loss: 2.2077715396881104,grad_norm: 0.9999998684924778, iteration: 1727
loss: 2.196790933609009,grad_norm: 0.9999999430919776, iteration: 1728
loss: 2.216895818710327,grad_norm: 0.9999997961848149, iteration: 1729
loss: 2.2649667263031006,grad_norm: 0.9999998890366673, iteration: 1730
loss: 2.231316566467285,grad_norm: 0.9999997880005511, iteration: 1731
loss: 2.2559332847595215,grad_norm: 0.999999882385622, iteration: 1732
loss: 2.248211622238159,grad_norm: 0.9999998936757634, iteration: 1733
loss: 2.2473857402801514,grad_norm: 0.9999998700083016, iteration: 1734
loss: 2.2429349422454834,grad_norm: 0.9999998334564429, iteration: 1735
loss: 2.5113308429718018,grad_norm: 0.9999998190467927, iteration: 1736
loss: 2.2228736877441406,grad_norm: 0.9999998631373886, iteration: 1737
loss: 2.4056553840637207,grad_norm: 0.999999811135306, iteration: 1738
loss: 2.249605178833008,grad_norm: 0.9999998626282329, iteration: 1739
loss: 2.2092056274414062,grad_norm: 0.999999842843688, iteration: 1740
loss: 2.2517271041870117,grad_norm: 0.9999998517404599, iteration: 1741
loss: 2.1412463188171387,grad_norm: 0.999999894610267, iteration: 1742
loss: 2.134716033935547,grad_norm: 0.9999997602107725, iteration: 1743
loss: 2.2832133769989014,grad_norm: 0.9999997595867942, iteration: 1744
loss: 2.1520423889160156,grad_norm: 0.9999998880793035, iteration: 1745
loss: 2.190460443496704,grad_norm: 0.9999997960079764, iteration: 1746
loss: 2.1998653411865234,grad_norm: 0.9999997904329496, iteration: 1747
loss: 2.1562201976776123,grad_norm: 0.9999998802697635, iteration: 1748
loss: 2.11969256401062,grad_norm: 0.9999998718790827, iteration: 1749
loss: 2.096106767654419,grad_norm: 0.9999998012741428, iteration: 1750
loss: 2.225639581680298,grad_norm: 0.9999998415652998, iteration: 1751
loss: 2.0772387981414795,grad_norm: 0.9999998968627213, iteration: 1752
loss: 2.0199239253997803,grad_norm: 0.9999998313284039, iteration: 1753
loss: 2.1898610591888428,grad_norm: 0.9999998072947555, iteration: 1754
loss: 2.151339530944824,grad_norm: 0.9999998419413848, iteration: 1755
loss: 2.061079740524292,grad_norm: 0.9999999628769041, iteration: 1756
loss: 2.156367778778076,grad_norm: 0.9999998642544805, iteration: 1757
loss: 2.1887996196746826,grad_norm: 0.999999763528053, iteration: 1758
loss: 2.2528724670410156,grad_norm: 0.9999998395740324, iteration: 1759
loss: 2.355109453201294,grad_norm: 0.9999998350758442, iteration: 1760
loss: 2.2913689613342285,grad_norm: 0.9999999307532337, iteration: 1761
loss: 2.1171226501464844,grad_norm: 0.9999999217451212, iteration: 1762
loss: 2.203418493270874,grad_norm: 0.9999997333962428, iteration: 1763
loss: 2.2503650188446045,grad_norm: 0.9999999297895543, iteration: 1764
loss: 2.1966171264648438,grad_norm: 0.9999997878746285, iteration: 1765
loss: 2.1520721912384033,grad_norm: 0.9999998608998387, iteration: 1766
loss: 2.350253105163574,grad_norm: 0.9999999518321756, iteration: 1767
loss: 2.1355371475219727,grad_norm: 0.9999997688849279, iteration: 1768
loss: 2.1213483810424805,grad_norm: 0.9999999134612473, iteration: 1769
loss: 2.2979860305786133,grad_norm: 0.999999908190695, iteration: 1770
loss: 2.20866322517395,grad_norm: 0.9999998202910602, iteration: 1771
loss: 2.228379964828491,grad_norm: 0.9999998458867901, iteration: 1772
loss: 2.312004566192627,grad_norm: 0.9999999017216513, iteration: 1773
loss: 2.266500949859619,grad_norm: 0.9999998525410724, iteration: 1774
loss: 2.3339788913726807,grad_norm: 0.9999998929842833, iteration: 1775
loss: 2.380213499069214,grad_norm: 0.999999870933964, iteration: 1776
loss: 2.279940366744995,grad_norm: 0.9999998345947657, iteration: 1777
loss: 2.2741732597351074,grad_norm: 0.9999999044682935, iteration: 1778
loss: 2.0278100967407227,grad_norm: 0.9999998773699188, iteration: 1779
loss: 2.150906801223755,grad_norm: 0.9999999089157803, iteration: 1780
loss: 2.101611614227295,grad_norm: 0.9999998172773161, iteration: 1781
loss: 2.1315784454345703,grad_norm: 0.9999998744496653, iteration: 1782
loss: 2.2367360591888428,grad_norm: 0.9999998289498703, iteration: 1783
loss: 2.169161558151245,grad_norm: 0.99999986763565, iteration: 1784
loss: 2.1324214935302734,grad_norm: 0.9999998956478271, iteration: 1785
loss: 2.224510669708252,grad_norm: 0.9999997813751719, iteration: 1786
loss: 2.247497797012329,grad_norm: 0.9999998431870459, iteration: 1787
loss: 2.1538968086242676,grad_norm: 0.9999998774785542, iteration: 1788
loss: 2.1293928623199463,grad_norm: 0.9999998583826933, iteration: 1789
loss: 2.244964122772217,grad_norm: 0.9999998950240689, iteration: 1790
loss: 2.237016201019287,grad_norm: 0.9999998834745886, iteration: 1791
loss: 2.0603859424591064,grad_norm: 0.9999999322277274, iteration: 1792
loss: 2.2722744941711426,grad_norm: 0.9999998909323738, iteration: 1793
loss: 2.136795997619629,grad_norm: 0.9999998293312496, iteration: 1794
loss: 2.237466335296631,grad_norm: 0.9999997890206958, iteration: 1795
loss: 1.8962981700897217,grad_norm: 0.9999998557923698, iteration: 1796
loss: 2.203697919845581,grad_norm: 0.9999998368728035, iteration: 1797
loss: 2.1966800689697266,grad_norm: 0.9999999148322979, iteration: 1798
loss: 2.148427724838257,grad_norm: 0.9999998832250975, iteration: 1799
loss: 2.054259777069092,grad_norm: 0.9999998418648117, iteration: 1800
loss: 2.184077739715576,grad_norm: 0.9999997718970467, iteration: 1801
loss: 2.148425579071045,grad_norm: 0.9999998000739523, iteration: 1802
loss: 2.2276065349578857,grad_norm: 0.9999998669712895, iteration: 1803
loss: 2.172200918197632,grad_norm: 0.9999998738768413, iteration: 1804
loss: 2.099625587463379,grad_norm: 0.9999998274744967, iteration: 1805
loss: 2.134608268737793,grad_norm: 0.999999844042991, iteration: 1806
loss: 2.150782585144043,grad_norm: 0.999999826078985, iteration: 1807
loss: 2.2000181674957275,grad_norm: 0.9999998679279605, iteration: 1808
loss: 2.1591758728027344,grad_norm: 0.9999998303036151, iteration: 1809
loss: 2.197756052017212,grad_norm: 0.9999998967229088, iteration: 1810
loss: 1.9379692077636719,grad_norm: 0.999999815880335, iteration: 1811
loss: 2.016598701477051,grad_norm: 0.9999998617875572, iteration: 1812
loss: 2.203580856323242,grad_norm: 0.9999998414971862, iteration: 1813
loss: 2.180314064025879,grad_norm: 0.9999999012899216, iteration: 1814
loss: 2.156012535095215,grad_norm: 0.9999998544200875, iteration: 1815
loss: 2.1510517597198486,grad_norm: 0.9999998331400872, iteration: 1816
loss: 2.073289632797241,grad_norm: 0.9999999188123679, iteration: 1817
loss: 2.0796427726745605,grad_norm: 0.9999998882465146, iteration: 1818
loss: 2.1955697536468506,grad_norm: 0.9999998602587442, iteration: 1819
loss: 2.210705280303955,grad_norm: 0.9999998382716166, iteration: 1820
loss: 2.218919038772583,grad_norm: 0.9999998475461005, iteration: 1821
loss: 2.037768840789795,grad_norm: 0.9999997936984178, iteration: 1822
loss: 2.1920549869537354,grad_norm: 0.999999774721193, iteration: 1823
loss: 2.1798980236053467,grad_norm: 0.9999998630922898, iteration: 1824
loss: 2.197084426879883,grad_norm: 0.9999998008970039, iteration: 1825
loss: 2.239515542984009,grad_norm: 0.9999998035560558, iteration: 1826
loss: 1.9468936920166016,grad_norm: 0.9999998182233333, iteration: 1827
loss: 2.142716407775879,grad_norm: 0.9999998753274801, iteration: 1828
loss: 2.1837551593780518,grad_norm: 0.9999999083268395, iteration: 1829
loss: 2.1561431884765625,grad_norm: 0.9999998535944986, iteration: 1830
loss: 2.248331308364868,grad_norm: 0.9999998534379104, iteration: 1831
loss: 2.2536988258361816,grad_norm: 0.999999887638211, iteration: 1832
loss: 2.1904335021972656,grad_norm: 0.9999997984331406, iteration: 1833
loss: 2.1394736766815186,grad_norm: 0.9999998305320086, iteration: 1834
loss: 2.1071178913116455,grad_norm: 0.9999998215976582, iteration: 1835
loss: 2.1450107097625732,grad_norm: 0.9999998405327193, iteration: 1836
loss: 2.192992925643921,grad_norm: 0.9999998873332491, iteration: 1837
loss: 2.1425118446350098,grad_norm: 0.999999876293816, iteration: 1838
loss: 2.1548867225646973,grad_norm: 0.9999998692977333, iteration: 1839
loss: 2.18637752532959,grad_norm: 0.9999998390765175, iteration: 1840
loss: 2.215449810028076,grad_norm: 0.9999997727402302, iteration: 1841
loss: 2.0554981231689453,grad_norm: 0.9999998002502047, iteration: 1842
loss: 2.0326931476593018,grad_norm: 0.9999998604092428, iteration: 1843
loss: 2.0381078720092773,grad_norm: 0.9999998240278247, iteration: 1844
loss: 2.1586294174194336,grad_norm: 0.9999998041101554, iteration: 1845
loss: 2.1119813919067383,grad_norm: 0.9999999347730789, iteration: 1846
loss: 2.186054229736328,grad_norm: 0.999999794111635, iteration: 1847
loss: 2.150408983230591,grad_norm: 0.999999892529699, iteration: 1848
loss: 2.2004685401916504,grad_norm: 0.9999998480378075, iteration: 1849
loss: 2.143322467803955,grad_norm: 0.999999896392103, iteration: 1850
loss: 2.1250126361846924,grad_norm: 0.999999856745155, iteration: 1851
loss: 2.138227701187134,grad_norm: 0.9999999293263363, iteration: 1852
loss: 2.3060667514801025,grad_norm: 0.999999823328169, iteration: 1853
loss: 2.1842522621154785,grad_norm: 0.9999998737294462, iteration: 1854
loss: 2.270507335662842,grad_norm: 0.999999895733025, iteration: 1855
loss: 2.119642734527588,grad_norm: 0.9999998934526774, iteration: 1856
loss: 2.0386860370635986,grad_norm: 0.9999998222779447, iteration: 1857
loss: 2.2637524604797363,grad_norm: 0.9999999235186632, iteration: 1858
loss: 2.2631702423095703,grad_norm: 0.9999998063976445, iteration: 1859
loss: 2.125933885574341,grad_norm: 0.9999998229466553, iteration: 1860
loss: 1.9897884130477905,grad_norm: 0.9999997425822639, iteration: 1861
loss: 2.108792781829834,grad_norm: 0.9999997558311593, iteration: 1862
loss: 1.9947171211242676,grad_norm: 0.9999998849100918, iteration: 1863
loss: 2.139808416366577,grad_norm: 0.9999999686432641, iteration: 1864
loss: 2.1315906047821045,grad_norm: 0.9999998092489456, iteration: 1865
loss: 2.1567764282226562,grad_norm: 0.9999998068105181, iteration: 1866
loss: 2.1054465770721436,grad_norm: 0.9999998402159452, iteration: 1867
loss: 2.139523506164551,grad_norm: 0.9999998508233967, iteration: 1868
loss: 2.270577907562256,grad_norm: 0.9999998876434913, iteration: 1869
loss: 2.208682060241699,grad_norm: 0.9999998195113866, iteration: 1870
loss: 2.1242973804473877,grad_norm: 0.9999999235280862, iteration: 1871
loss: 1.9136532545089722,grad_norm: 0.9999998748231919, iteration: 1872
loss: 2.2210915088653564,grad_norm: 0.9999998259123416, iteration: 1873
loss: 2.148905038833618,grad_norm: 0.9999998450983295, iteration: 1874
loss: 2.1945056915283203,grad_norm: 0.9999998195862991, iteration: 1875
loss: 2.104105234146118,grad_norm: 0.9999999238095792, iteration: 1876
loss: 1.9189057350158691,grad_norm: 0.9999998493357366, iteration: 1877
loss: 2.1414260864257812,grad_norm: 0.9999998197861231, iteration: 1878
loss: 2.292701005935669,grad_norm: 0.9999998164217265, iteration: 1879
loss: 2.1129255294799805,grad_norm: 0.9999998449530347, iteration: 1880
loss: 2.0830790996551514,grad_norm: 0.999999862344582, iteration: 1881
loss: 2.072852373123169,grad_norm: 0.9999998821165682, iteration: 1882
loss: 2.1897082328796387,grad_norm: 0.9999998384396921, iteration: 1883
loss: 2.1281208992004395,grad_norm: 0.9999999056634585, iteration: 1884
loss: 2.0296072959899902,grad_norm: 0.9999999186714965, iteration: 1885
loss: 2.1423332691192627,grad_norm: 0.9999999015453951, iteration: 1886
loss: 1.9547260999679565,grad_norm: 0.9999997804194267, iteration: 1887
loss: 2.1213748455047607,grad_norm: 0.9999999007285734, iteration: 1888
loss: 2.2140369415283203,grad_norm: 0.9999998850512758, iteration: 1889
loss: 1.9947376251220703,grad_norm: 0.9999998610972048, iteration: 1890
loss: 2.1961400508880615,grad_norm: 0.999999779130187, iteration: 1891
loss: 2.2088160514831543,grad_norm: 0.9999998347694542, iteration: 1892
loss: 2.2340798377990723,grad_norm: 0.9999998459613938, iteration: 1893
loss: 2.3175854682922363,grad_norm: 0.9999998488591587, iteration: 1894
loss: 2.238093852996826,grad_norm: 0.9999998497931625, iteration: 1895
loss: 1.9808728694915771,grad_norm: 0.9999997797015954, iteration: 1896
loss: 2.1688876152038574,grad_norm: 0.9999998882049908, iteration: 1897
loss: 2.093667984008789,grad_norm: 0.9999998444772811, iteration: 1898
loss: 2.210289716720581,grad_norm: 0.9999998092641773, iteration: 1899
loss: 2.1389729976654053,grad_norm: 0.9999999007528118, iteration: 1900
loss: 2.142801523208618,grad_norm: 0.9999998443482856, iteration: 1901
loss: 2.1306309700012207,grad_norm: 0.99999986831058, iteration: 1902
loss: 2.169642686843872,grad_norm: 0.9999997585948491, iteration: 1903
loss: 2.138530731201172,grad_norm: 0.9999998773509554, iteration: 1904
loss: 2.095669984817505,grad_norm: 0.9999997831081658, iteration: 1905
loss: 2.150891065597534,grad_norm: 0.9999998469860452, iteration: 1906
loss: 2.056471109390259,grad_norm: 0.9999998798882946, iteration: 1907
loss: 2.1772983074188232,grad_norm: 0.9999998936468742, iteration: 1908
loss: 2.278547525405884,grad_norm: 0.9999998226130581, iteration: 1909
loss: 2.1407923698425293,grad_norm: 0.9999998006720657, iteration: 1910
loss: 2.124051094055176,grad_norm: 0.9999997723736491, iteration: 1911
loss: 2.2285122871398926,grad_norm: 0.9999998455016872, iteration: 1912
loss: 2.0074925422668457,grad_norm: 0.999999811534368, iteration: 1913
loss: 2.080193281173706,grad_norm: 0.9999998958690074, iteration: 1914
loss: 2.0050337314605713,grad_norm: 0.9999998635703203, iteration: 1915
loss: 1.9965587854385376,grad_norm: 0.9999998426892381, iteration: 1916
loss: 1.9393284320831299,grad_norm: 0.9999998732932843, iteration: 1917
loss: 2.1961286067962646,grad_norm: 0.9999998632222322, iteration: 1918
loss: 2.2052114009857178,grad_norm: 0.9999999060758141, iteration: 1919
loss: 2.0947563648223877,grad_norm: 0.9999999524924087, iteration: 1920
loss: 2.037487506866455,grad_norm: 0.9999998018368701, iteration: 1921
loss: 2.180478096008301,grad_norm: 0.9999998927296735, iteration: 1922
loss: 2.1147866249084473,grad_norm: 0.9999998492389081, iteration: 1923
loss: 2.000034809112549,grad_norm: 0.999999888897048, iteration: 1924
loss: 2.1680657863616943,grad_norm: 0.9999998588119821, iteration: 1925
loss: 1.856545090675354,grad_norm: 0.9999998847181011, iteration: 1926
loss: 2.0983188152313232,grad_norm: 0.999999840457292, iteration: 1927
loss: 2.0443644523620605,grad_norm: 0.9999999005507281, iteration: 1928
loss: 2.135185956954956,grad_norm: 0.9999999028607152, iteration: 1929
loss: 2.0792975425720215,grad_norm: 0.9999998690991273, iteration: 1930
loss: 2.1368727684020996,grad_norm: 0.999999846610521, iteration: 1931
loss: 2.0688188076019287,grad_norm: 0.9999997992000429, iteration: 1932
loss: 2.2318339347839355,grad_norm: 0.9999999087633875, iteration: 1933
loss: 2.241400718688965,grad_norm: 0.9999999016391491, iteration: 1934
loss: 2.0481250286102295,grad_norm: 0.9999998280908657, iteration: 1935
loss: 2.3036160469055176,grad_norm: 0.9999997959432785, iteration: 1936
loss: 2.1068787574768066,grad_norm: 0.9999998628359446, iteration: 1937
loss: 2.3016340732574463,grad_norm: 0.9999999091412991, iteration: 1938
loss: 2.1569721698760986,grad_norm: 0.9999998087514888, iteration: 1939
loss: 2.1247713565826416,grad_norm: 0.9999998278425418, iteration: 1940
loss: 2.1819190979003906,grad_norm: 0.9999998120245789, iteration: 1941
loss: 1.9311472177505493,grad_norm: 0.9999998426151995, iteration: 1942
loss: 2.0995428562164307,grad_norm: 0.9999998510785213, iteration: 1943
loss: 2.0103509426116943,grad_norm: 0.9999998905884688, iteration: 1944
loss: 2.0857157707214355,grad_norm: 0.9999998862588017, iteration: 1945
loss: 2.032888650894165,grad_norm: 0.9999998973177698, iteration: 1946
loss: 2.0268588066101074,grad_norm: 0.9999998675707544, iteration: 1947
loss: 2.0701940059661865,grad_norm: 0.9999998421974671, iteration: 1948
loss: 2.0864415168762207,grad_norm: 0.9999998508151043, iteration: 1949
loss: 2.145660877227783,grad_norm: 0.999999877809934, iteration: 1950
loss: 2.1650822162628174,grad_norm: 0.9999998575459486, iteration: 1951
loss: 2.0044867992401123,grad_norm: 0.9999999059678334, iteration: 1952
loss: 1.9134138822555542,grad_norm: 0.999999916312836, iteration: 1953
loss: 2.1517791748046875,grad_norm: 0.999999847831801, iteration: 1954
loss: 2.1360762119293213,grad_norm: 0.999999845736342, iteration: 1955
loss: 2.3042385578155518,grad_norm: 0.9999998667678648, iteration: 1956
loss: 2.2674505710601807,grad_norm: 0.9999998731492452, iteration: 1957
loss: 2.1063506603240967,grad_norm: 0.9999998403973545, iteration: 1958
loss: 2.2242720127105713,grad_norm: 0.999999798231123, iteration: 1959
loss: 2.1817128658294678,grad_norm: 0.9999998882594526, iteration: 1960
loss: 1.8538681268692017,grad_norm: 0.9999998692640384, iteration: 1961
loss: 2.0680508613586426,grad_norm: 0.9999997809193651, iteration: 1962
loss: 2.1229209899902344,grad_norm: 0.9999997872839592, iteration: 1963
loss: 2.0554935932159424,grad_norm: 0.9999998371321867, iteration: 1964
loss: 2.153196096420288,grad_norm: 0.9999998386389901, iteration: 1965
loss: 1.8986127376556396,grad_norm: 0.9999997816217288, iteration: 1966
loss: 2.1525213718414307,grad_norm: 0.9999998519736838, iteration: 1967
loss: 2.10945725440979,grad_norm: 0.9999998137306988, iteration: 1968
loss: 2.1859607696533203,grad_norm: 0.9999998491600959, iteration: 1969
loss: 2.1104722023010254,grad_norm: 0.9999997708716285, iteration: 1970
loss: 2.1386160850524902,grad_norm: 0.9999997455749625, iteration: 1971
loss: 2.051532030105591,grad_norm: 0.9999998493063131, iteration: 1972
loss: 2.1839914321899414,grad_norm: 0.9999999771871986, iteration: 1973
loss: 2.1557552814483643,grad_norm: 0.9999997897144827, iteration: 1974
loss: 2.043656826019287,grad_norm: 0.9999999041251196, iteration: 1975
loss: 2.0226516723632812,grad_norm: 0.9999998025690873, iteration: 1976
loss: 2.036562919616699,grad_norm: 0.9999998864224827, iteration: 1977
loss: 2.158766746520996,grad_norm: 0.9999998808454104, iteration: 1978
loss: 2.053083658218384,grad_norm: 0.9999998129948432, iteration: 1979
loss: 2.057497262954712,grad_norm: 0.9999998494772128, iteration: 1980
loss: 2.1593592166900635,grad_norm: 0.9999997987501155, iteration: 1981
loss: 2.1427688598632812,grad_norm: 0.9999997874070022, iteration: 1982
loss: 2.0369670391082764,grad_norm: 0.9999998673257148, iteration: 1983
loss: 1.9684727191925049,grad_norm: 0.9999998966821849, iteration: 1984
loss: 2.1668834686279297,grad_norm: 0.9999998529930212, iteration: 1985
loss: 1.9942834377288818,grad_norm: 0.9999998991784294, iteration: 1986
loss: 2.0646109580993652,grad_norm: 0.9999998917517108, iteration: 1987
loss: 2.0894851684570312,grad_norm: 0.9999998514412725, iteration: 1988
loss: 1.9691561460494995,grad_norm: 0.9999998825053049, iteration: 1989
loss: 2.102024793624878,grad_norm: 0.9999997842863776, iteration: 1990
loss: 2.0927016735076904,grad_norm: 0.9999998657200525, iteration: 1991
loss: 2.01851224899292,grad_norm: 0.9999999102626931, iteration: 1992
loss: 2.071668863296509,grad_norm: 0.9999998493993035, iteration: 1993
loss: 2.105891466140747,grad_norm: 0.9999998281059549, iteration: 1994
loss: 1.9776952266693115,grad_norm: 0.9999997766278776, iteration: 1995
loss: 2.0690951347351074,grad_norm: 0.9999998563043716, iteration: 1996
loss: 2.1853809356689453,grad_norm: 0.9999998658811491, iteration: 1997
loss: 2.01535964012146,grad_norm: 0.999999839330147, iteration: 1998
loss: 1.946861982345581,grad_norm: 0.9999999141958575, iteration: 1999
loss: 2.1358542442321777,grad_norm: 0.9999997877882318, iteration: 2000
loss: 2.0601370334625244,grad_norm: 0.9999998915546812, iteration: 2001
loss: 2.22623610496521,grad_norm: 0.9999998670332887, iteration: 2002
loss: 2.106884479522705,grad_norm: 0.9999998315113421, iteration: 2003
loss: 2.185910940170288,grad_norm: 0.9999999083206602, iteration: 2004
loss: 2.0861265659332275,grad_norm: 0.999999881598027, iteration: 2005
loss: 2.1001009941101074,grad_norm: 0.9999998820192337, iteration: 2006
loss: 2.0999104976654053,grad_norm: 0.999999889932768, iteration: 2007
loss: 1.928636908531189,grad_norm: 0.9999999023695519, iteration: 2008
loss: 2.0150694847106934,grad_norm: 0.9999998091162119, iteration: 2009
loss: 2.0111958980560303,grad_norm: 0.9999998805308882, iteration: 2010
loss: 2.130444049835205,grad_norm: 0.9999998819866224, iteration: 2011
loss: 2.118950128555298,grad_norm: 0.9999998384389465, iteration: 2012
loss: 2.146061420440674,grad_norm: 0.9999998941010738, iteration: 2013
loss: 2.1130640506744385,grad_norm: 0.9999998893705996, iteration: 2014
loss: 1.9903068542480469,grad_norm: 0.9999998587501292, iteration: 2015
loss: 2.1777918338775635,grad_norm: 0.9999997881478524, iteration: 2016
loss: 2.047065019607544,grad_norm: 0.9999997412305128, iteration: 2017
loss: 2.234999418258667,grad_norm: 0.9999998080351246, iteration: 2018
loss: 1.9253578186035156,grad_norm: 0.9999997696332247, iteration: 2019
loss: 1.9876596927642822,grad_norm: 0.9999997590757883, iteration: 2020
loss: 2.0226385593414307,grad_norm: 0.9999999057731621, iteration: 2021
loss: 2.054330825805664,grad_norm: 0.999999868561332, iteration: 2022
loss: 1.9503593444824219,grad_norm: 0.9999998908147469, iteration: 2023
loss: 2.0427300930023193,grad_norm: 0.9999998993937274, iteration: 2024
loss: 2.128725528717041,grad_norm: 0.9999998130994779, iteration: 2025
loss: 2.063260793685913,grad_norm: 0.9999998899993875, iteration: 2026
loss: 2.0992887020111084,grad_norm: 0.9999998938666046, iteration: 2027
loss: 2.1964151859283447,grad_norm: 0.9999998131667998, iteration: 2028
loss: 2.102027177810669,grad_norm: 0.9999998662972897, iteration: 2029
loss: 1.9538886547088623,grad_norm: 0.999999840665064, iteration: 2030
loss: 2.0708775520324707,grad_norm: 0.999999890585442, iteration: 2031
loss: 2.00461745262146,grad_norm: 0.9999998331536096, iteration: 2032
loss: 2.1811912059783936,grad_norm: 0.999999828834047, iteration: 2033
loss: 2.167498826980591,grad_norm: 0.9999998218119813, iteration: 2034
loss: 2.0543859004974365,grad_norm: 0.9999998944616826, iteration: 2035
loss: 2.0423638820648193,grad_norm: 0.9999999244535038, iteration: 2036
loss: 2.1512598991394043,grad_norm: 0.9999998145159139, iteration: 2037
loss: 2.1295411586761475,grad_norm: 0.9999998221171839, iteration: 2038
loss: 2.091372013092041,grad_norm: 0.9999999153454527, iteration: 2039
loss: 2.1856119632720947,grad_norm: 0.999999807809384, iteration: 2040
loss: 2.009591579437256,grad_norm: 0.9999998314067106, iteration: 2041
loss: 2.0732550621032715,grad_norm: 0.999999750063928, iteration: 2042
loss: 2.0522196292877197,grad_norm: 0.9999998273479302, iteration: 2043
loss: 2.1729660034179688,grad_norm: 0.9999998335223532, iteration: 2044
loss: 2.0685782432556152,grad_norm: 0.9999998457007996, iteration: 2045
loss: 2.1532599925994873,grad_norm: 0.9999999054120867, iteration: 2046
loss: 2.1652779579162598,grad_norm: 0.9999999193398299, iteration: 2047
loss: 2.295238494873047,grad_norm: 0.9999999399810829, iteration: 2048
loss: 1.9886136054992676,grad_norm: 0.9999998692245836, iteration: 2049
loss: 1.7974401712417603,grad_norm: 0.9999998778721952, iteration: 2050
loss: 2.0436792373657227,grad_norm: 0.9999998728395121, iteration: 2051
loss: 1.9345662593841553,grad_norm: 0.9999998762215597, iteration: 2052
loss: 2.0967934131622314,grad_norm: 0.9999999229543189, iteration: 2053
loss: 2.1222221851348877,grad_norm: 0.9999998447405254, iteration: 2054
loss: 1.8620855808258057,grad_norm: 0.9999999132458128, iteration: 2055
loss: 1.9097251892089844,grad_norm: 0.9999998220300144, iteration: 2056
loss: 1.9071977138519287,grad_norm: 0.9999998609077225, iteration: 2057
loss: 2.0627541542053223,grad_norm: 0.9999998727621469, iteration: 2058
loss: 2.1180458068847656,grad_norm: 0.9999998201341638, iteration: 2059
loss: 2.247814416885376,grad_norm: 0.9999997408434929, iteration: 2060
loss: 2.029564380645752,grad_norm: 0.9999998925675242, iteration: 2061
loss: 1.8524590730667114,grad_norm: 0.9999997970311362, iteration: 2062
loss: 2.063701868057251,grad_norm: 0.9999999243600735, iteration: 2063
loss: 2.1649303436279297,grad_norm: 0.9999998635801778, iteration: 2064
loss: 2.103422164916992,grad_norm: 0.9999998006559914, iteration: 2065
loss: 2.133793592453003,grad_norm: 0.9999999628954299, iteration: 2066
loss: 2.045469284057617,grad_norm: 0.999999897298679, iteration: 2067
loss: 2.1740853786468506,grad_norm: 0.9999998614042547, iteration: 2068
loss: 1.9450938701629639,grad_norm: 0.9999998324561408, iteration: 2069
loss: 2.125084161758423,grad_norm: 0.9999998508528045, iteration: 2070
loss: 2.090731382369995,grad_norm: 0.9999998343741694, iteration: 2071
loss: 1.904928207397461,grad_norm: 0.9999998338507832, iteration: 2072
loss: 2.0257701873779297,grad_norm: 0.9999998360404239, iteration: 2073
loss: 2.0006988048553467,grad_norm: 0.999999840834346, iteration: 2074
loss: 2.0952768325805664,grad_norm: 0.9999998578874085, iteration: 2075
loss: 2.0548243522644043,grad_norm: 0.9999998899307113, iteration: 2076
loss: 2.172243595123291,grad_norm: 0.9999998968142962, iteration: 2077
loss: 2.155024528503418,grad_norm: 0.9999998427854696, iteration: 2078
loss: 2.1797823905944824,grad_norm: 0.9999998523925655, iteration: 2079
loss: 2.0028316974639893,grad_norm: 0.9999998211170953, iteration: 2080
loss: 2.1486616134643555,grad_norm: 0.9999997790339243, iteration: 2081
loss: 2.148621082305908,grad_norm: 0.9999998419760594, iteration: 2082
loss: 2.1166021823883057,grad_norm: 0.9999998302349803, iteration: 2083
loss: 2.3085920810699463,grad_norm: 0.9999998376088846, iteration: 2084
loss: 2.130756139755249,grad_norm: 0.9999998856090091, iteration: 2085
loss: 2.049535036087036,grad_norm: 0.9999998498125874, iteration: 2086
loss: 2.137795925140381,grad_norm: 0.99999990008733, iteration: 2087
loss: 2.047586441040039,grad_norm: 0.9999998832427901, iteration: 2088
loss: 2.1009719371795654,grad_norm: 0.999999833804106, iteration: 2089
loss: 1.9169303178787231,grad_norm: 0.9999998943244097, iteration: 2090
loss: 2.034621477127075,grad_norm: 0.9999999218649749, iteration: 2091
loss: 2.102588653564453,grad_norm: 0.9999998148270178, iteration: 2092
loss: 2.049159288406372,grad_norm: 0.9999998456622836, iteration: 2093
loss: 2.018021583557129,grad_norm: 0.9999998292881076, iteration: 2094
loss: 2.1411378383636475,grad_norm: 0.9999998813562763, iteration: 2095
loss: 1.9065876007080078,grad_norm: 0.9999998553540125, iteration: 2096
loss: 2.136810779571533,grad_norm: 0.9999998696658833, iteration: 2097
loss: 2.1678502559661865,grad_norm: 0.9999998259553766, iteration: 2098
loss: 2.0034449100494385,grad_norm: 0.9999997914071732, iteration: 2099
loss: 1.9651460647583008,grad_norm: 0.9999998629027649, iteration: 2100
loss: 2.101778984069824,grad_norm: 0.9999999011853627, iteration: 2101
loss: 2.0089426040649414,grad_norm: 0.9999998129175692, iteration: 2102
loss: 1.9649463891983032,grad_norm: 0.9999998256533982, iteration: 2103
loss: 2.1049911975860596,grad_norm: 0.999999848781031, iteration: 2104
loss: 2.24009370803833,grad_norm: 0.999999885900262, iteration: 2105
loss: 1.9930411577224731,grad_norm: 0.9999998347594269, iteration: 2106
loss: 2.1438608169555664,grad_norm: 0.9999998923999639, iteration: 2107
loss: 2.1250319480895996,grad_norm: 0.9999999134427466, iteration: 2108
loss: 1.8011304140090942,grad_norm: 0.9999998594473893, iteration: 2109
loss: 2.0137176513671875,grad_norm: 0.9999998967375567, iteration: 2110
loss: 2.031074285507202,grad_norm: 0.9999999200343824, iteration: 2111
loss: 1.9617912769317627,grad_norm: 0.9999998946894235, iteration: 2112
loss: 2.0936968326568604,grad_norm: 0.999999930360405, iteration: 2113
loss: 2.109055995941162,grad_norm: 0.9999998090499208, iteration: 2114
loss: 1.990001916885376,grad_norm: 0.999999819196715, iteration: 2115
loss: 1.9101790189743042,grad_norm: 0.9999997732737801, iteration: 2116
loss: 2.0954809188842773,grad_norm: 0.9999997908474527, iteration: 2117
loss: 1.940222978591919,grad_norm: 0.9999998849638565, iteration: 2118
loss: 2.037308692932129,grad_norm: 0.9999998774101373, iteration: 2119
loss: 2.0999374389648438,grad_norm: 0.9999999008047386, iteration: 2120
loss: 2.2944798469543457,grad_norm: 0.9999998931303572, iteration: 2121
loss: 1.9064834117889404,grad_norm: 0.9999998941346903, iteration: 2122
loss: 1.8457880020141602,grad_norm: 0.9999998556807204, iteration: 2123
loss: 2.007556200027466,grad_norm: 0.9999998386202851, iteration: 2124
loss: 2.0441505908966064,grad_norm: 0.9999998819807027, iteration: 2125
loss: 1.9017564058303833,grad_norm: 0.9999999068835793, iteration: 2126
loss: 2.1189677715301514,grad_norm: 0.9999998599028546, iteration: 2127
loss: 2.11361026763916,grad_norm: 0.9999998097715543, iteration: 2128
loss: 2.101731538772583,grad_norm: 0.9999998065399954, iteration: 2129
loss: 2.1643598079681396,grad_norm: 0.9999998095682712, iteration: 2130
loss: 1.8112916946411133,grad_norm: 0.9999998999529347, iteration: 2131
loss: 2.048495054244995,grad_norm: 0.9999998435603106, iteration: 2132
loss: 2.0914440155029297,grad_norm: 0.9999997955277243, iteration: 2133
loss: 2.0536625385284424,grad_norm: 0.999999766295201, iteration: 2134
loss: 2.1479036808013916,grad_norm: 0.9999998601121861, iteration: 2135
loss: 2.0464963912963867,grad_norm: 0.9999998331640341, iteration: 2136
loss: 2.126495599746704,grad_norm: 0.9999998433868255, iteration: 2137
loss: 2.0761027336120605,grad_norm: 0.9999998773859731, iteration: 2138
loss: 1.9832161664962769,grad_norm: 0.9999998013140017, iteration: 2139
loss: 2.020411968231201,grad_norm: 0.9999997921635478, iteration: 2140
loss: 2.0565545558929443,grad_norm: 0.9999997853475519, iteration: 2141
loss: 1.9324432611465454,grad_norm: 0.9999998408077913, iteration: 2142
loss: 2.0886359214782715,grad_norm: 0.9999998058580097, iteration: 2143
loss: 2.1269350051879883,grad_norm: 0.999999792890338, iteration: 2144
loss: 1.9618855714797974,grad_norm: 0.9999998361079928, iteration: 2145
loss: 1.9850742816925049,grad_norm: 0.999999787406319, iteration: 2146
loss: 2.0039966106414795,grad_norm: 0.999999778756841, iteration: 2147
loss: 2.0641393661499023,grad_norm: 0.9999998910418272, iteration: 2148
loss: 1.992197036743164,grad_norm: 0.9999998018262772, iteration: 2149
loss: 2.188572406768799,grad_norm: 0.9999998515817882, iteration: 2150
loss: 2.1373584270477295,grad_norm: 0.9999998065600374, iteration: 2151
loss: 1.7817785739898682,grad_norm: 0.9999997435105952, iteration: 2152
loss: 1.9952565431594849,grad_norm: 0.9999998171442064, iteration: 2153
loss: 2.0816407203674316,grad_norm: 0.9999998625481735, iteration: 2154
loss: 2.1508288383483887,grad_norm: 0.999999789510184, iteration: 2155
loss: 2.0128495693206787,grad_norm: 0.9999998309772853, iteration: 2156
loss: 2.0369746685028076,grad_norm: 0.9999998928900304, iteration: 2157
loss: 2.0503528118133545,grad_norm: 0.9999998388410016, iteration: 2158
loss: 1.8172500133514404,grad_norm: 0.9999998243948863, iteration: 2159
loss: 1.9691576957702637,grad_norm: 0.999999847492908, iteration: 2160
loss: 2.181959629058838,grad_norm: 0.999999928751919, iteration: 2161
loss: 1.8922386169433594,grad_norm: 0.9999998466246084, iteration: 2162
loss: 2.1178202629089355,grad_norm: 0.9999998468541861, iteration: 2163
loss: 1.9235774278640747,grad_norm: 0.9999998716224356, iteration: 2164
loss: 1.7705655097961426,grad_norm: 0.9999998051827576, iteration: 2165
loss: 2.0192277431488037,grad_norm: 0.9999998653913628, iteration: 2166
loss: 2.0830440521240234,grad_norm: 0.9999998299077697, iteration: 2167
loss: 1.8489431142807007,grad_norm: 0.9999997640669646, iteration: 2168
loss: 1.9339765310287476,grad_norm: 0.9999997960854147, iteration: 2169
loss: 2.030719757080078,grad_norm: 0.9999998157217818, iteration: 2170
loss: 1.8581520318984985,grad_norm: 0.9999998672710544, iteration: 2171
loss: 1.9091216325759888,grad_norm: 0.9999998282158205, iteration: 2172
loss: 1.9556293487548828,grad_norm: 0.9999998786548101, iteration: 2173
loss: 1.93416428565979,grad_norm: 0.9999998789497677, iteration: 2174
loss: 2.0654711723327637,grad_norm: 0.999999865985118, iteration: 2175
loss: 2.2026398181915283,grad_norm: 0.9999998603800911, iteration: 2176
loss: 2.1038424968719482,grad_norm: 0.999999804587094, iteration: 2177
loss: 2.0223190784454346,grad_norm: 0.999999821879842, iteration: 2178
loss: 1.9911160469055176,grad_norm: 0.9999997987260172, iteration: 2179
loss: 2.1419880390167236,grad_norm: 0.9999998273846374, iteration: 2180
loss: 1.9341776371002197,grad_norm: 0.9999998911683398, iteration: 2181
loss: 1.9871349334716797,grad_norm: 0.9999998069348287, iteration: 2182
loss: 2.0520942211151123,grad_norm: 0.9999998327909548, iteration: 2183
loss: 1.958737850189209,grad_norm: 0.9999998814797361, iteration: 2184
loss: 2.010319471359253,grad_norm: 0.9999997679590658, iteration: 2185
loss: 2.068633794784546,grad_norm: 0.9999998299024884, iteration: 2186
loss: 2.0305840969085693,grad_norm: 0.999999871935834, iteration: 2187
loss: 2.0607521533966064,grad_norm: 0.9999998738112689, iteration: 2188
loss: 2.1183981895446777,grad_norm: 0.9999998434391171, iteration: 2189
loss: 1.9749130010604858,grad_norm: 0.9999998767317005, iteration: 2190
loss: 1.9833292961120605,grad_norm: 0.9999998818144857, iteration: 2191
loss: 2.1101505756378174,grad_norm: 0.999999813882199, iteration: 2192
loss: 2.0900964736938477,grad_norm: 0.9999998183348083, iteration: 2193
loss: 2.073735237121582,grad_norm: 0.9999998896159652, iteration: 2194
loss: 2.027270793914795,grad_norm: 0.9999998797000601, iteration: 2195
loss: 1.9671846628189087,grad_norm: 0.9999999062236137, iteration: 2196
loss: 2.0469913482666016,grad_norm: 0.9999998802315367, iteration: 2197
loss: 1.9692912101745605,grad_norm: 0.9999998676294055, iteration: 2198
loss: 1.9665523767471313,grad_norm: 0.9999998209940038, iteration: 2199
loss: 2.086167812347412,grad_norm: 0.9999998245210592, iteration: 2200
loss: 2.0032925605773926,grad_norm: 0.9999998370216654, iteration: 2201
loss: 1.9840035438537598,grad_norm: 0.9999997933731473, iteration: 2202
loss: 1.709113597869873,grad_norm: 0.9999998573640971, iteration: 2203
loss: 1.895979881286621,grad_norm: 0.9999999270543353, iteration: 2204
loss: 1.945687174797058,grad_norm: 0.999999806706426, iteration: 2205
loss: 2.0149011611938477,grad_norm: 0.9999998654421517, iteration: 2206
loss: 1.9657375812530518,grad_norm: 0.9999998439964195, iteration: 2207
loss: 1.8917219638824463,grad_norm: 0.999999850062847, iteration: 2208
loss: 2.060441017150879,grad_norm: 0.9999998657702726, iteration: 2209
loss: 1.8512320518493652,grad_norm: 0.9999998794912628, iteration: 2210
loss: 1.9403588771820068,grad_norm: 0.9999998583110792, iteration: 2211
loss: 1.9593700170516968,grad_norm: 0.9999997883140019, iteration: 2212
loss: 1.975095272064209,grad_norm: 0.999999900172686, iteration: 2213
loss: 1.7891420125961304,grad_norm: 0.9999998557965428, iteration: 2214
loss: 1.9708168506622314,grad_norm: 0.9999998041414837, iteration: 2215
loss: 2.0507009029388428,grad_norm: 0.9999998571990114, iteration: 2216
loss: 2.0174131393432617,grad_norm: 0.9999998469001903, iteration: 2217
loss: 2.05483078956604,grad_norm: 0.999999814086259, iteration: 2218
loss: 1.988555908203125,grad_norm: 0.9999999254009945, iteration: 2219
loss: 2.0263545513153076,grad_norm: 0.999999783758776, iteration: 2220
loss: 1.9158501625061035,grad_norm: 0.9999999305703857, iteration: 2221
loss: 2.01435923576355,grad_norm: 0.9999998289724491, iteration: 2222
loss: 1.993754267692566,grad_norm: 0.9999998320924594, iteration: 2223
loss: 2.0076797008514404,grad_norm: 0.9999998094840357, iteration: 2224
loss: 1.959753394126892,grad_norm: 0.9999998837169691, iteration: 2225
loss: 2.0113914012908936,grad_norm: 0.9999998853899178, iteration: 2226
loss: 2.12318754196167,grad_norm: 0.9999999368132498, iteration: 2227
loss: 2.023517370223999,grad_norm: 0.9999998050939987, iteration: 2228
loss: 2.106797933578491,grad_norm: 0.9999998539205343, iteration: 2229
loss: 1.877406120300293,grad_norm: 0.9999999357311117, iteration: 2230
loss: 1.9382567405700684,grad_norm: 0.9999999210315412, iteration: 2231
loss: 1.8106738328933716,grad_norm: 0.9999998272243722, iteration: 2232
loss: 2.0587317943573,grad_norm: 0.9999998596881433, iteration: 2233
loss: 2.0117528438568115,grad_norm: 0.9999998069892702, iteration: 2234
loss: 1.8710048198699951,grad_norm: 0.999999805032019, iteration: 2235
loss: 2.0050928592681885,grad_norm: 0.9999998217512496, iteration: 2236
loss: 1.9905694723129272,grad_norm: 0.9999998979181749, iteration: 2237
loss: 1.9464843273162842,grad_norm: 0.9999997985377143, iteration: 2238
loss: 1.9968796968460083,grad_norm: 0.9999997801439768, iteration: 2239
loss: 1.9069859981536865,grad_norm: 0.9999998745414802, iteration: 2240
loss: 1.9597738981246948,grad_norm: 0.9999998094487926, iteration: 2241
loss: 1.9575854539871216,grad_norm: 0.9999998811913294, iteration: 2242
loss: 1.9332594871520996,grad_norm: 0.9999998488259056, iteration: 2243
loss: 1.85920250415802,grad_norm: 0.9999997555095431, iteration: 2244
loss: 1.965225100517273,grad_norm: 0.9999998499215337, iteration: 2245
loss: 1.889855146408081,grad_norm: 0.9999998060183007, iteration: 2246
loss: 1.9035545587539673,grad_norm: 0.999999833170101, iteration: 2247
loss: 2.0160210132598877,grad_norm: 0.999999872040879, iteration: 2248
loss: 2.0489721298217773,grad_norm: 0.9999997955814065, iteration: 2249
loss: 1.972118854522705,grad_norm: 0.9999998390059365, iteration: 2250
loss: 1.9911657571792603,grad_norm: 0.9999998966670199, iteration: 2251
loss: 2.0138652324676514,grad_norm: 0.99999978711284, iteration: 2252
loss: 2.1366143226623535,grad_norm: 0.9999998600401532, iteration: 2253
loss: 1.8695123195648193,grad_norm: 0.9999998484968994, iteration: 2254
loss: 1.8719888925552368,grad_norm: 0.999999827170985, iteration: 2255
loss: 1.918975830078125,grad_norm: 0.9999999259636193, iteration: 2256
loss: 1.9932992458343506,grad_norm: 0.9999997921940282, iteration: 2257
loss: 2.019756317138672,grad_norm: 0.9999998576355851, iteration: 2258
loss: 1.942447304725647,grad_norm: 0.9999998740834716, iteration: 2259
loss: 1.8819708824157715,grad_norm: 0.9999998694194042, iteration: 2260
loss: 1.9761073589324951,grad_norm: 0.9999997704078308, iteration: 2261
loss: 1.9592370986938477,grad_norm: 0.9999998975513875, iteration: 2262
loss: 1.9829366207122803,grad_norm: 0.9999998510935908, iteration: 2263
loss: 1.9668684005737305,grad_norm: 0.9999998506548169, iteration: 2264
loss: 1.9038251638412476,grad_norm: 0.9999998440444501, iteration: 2265
loss: 1.8556240797042847,grad_norm: 0.9999998172536224, iteration: 2266
loss: 2.0449960231781006,grad_norm: 0.999999875345647, iteration: 2267
loss: 2.0248641967773438,grad_norm: 0.9999997934938867, iteration: 2268
loss: 2.095919370651245,grad_norm: 0.9999997973723995, iteration: 2269
loss: 1.9972848892211914,grad_norm: 0.9999999599968598, iteration: 2270
loss: 1.9070559740066528,grad_norm: 0.9999999037626395, iteration: 2271
loss: 1.9293789863586426,grad_norm: 0.9999998785424989, iteration: 2272
loss: 2.0394771099090576,grad_norm: 0.9999998891561013, iteration: 2273
loss: 2.0271403789520264,grad_norm: 0.999999863098638, iteration: 2274
loss: 2.0710105895996094,grad_norm: 0.9999998328426859, iteration: 2275
loss: 1.9048573970794678,grad_norm: 0.9999998823521776, iteration: 2276
loss: 1.925334095954895,grad_norm: 0.9999998987860197, iteration: 2277
loss: 2.200730085372925,grad_norm: 0.9999998174904311, iteration: 2278
loss: 2.0464820861816406,grad_norm: 0.9999997996391274, iteration: 2279
loss: 2.0071043968200684,grad_norm: 0.9999998458178848, iteration: 2280
loss: 1.8265470266342163,grad_norm: 0.9999998438369309, iteration: 2281
loss: 1.9183785915374756,grad_norm: 0.9999997805178908, iteration: 2282
loss: 1.9412773847579956,grad_norm: 0.9999997992959166, iteration: 2283
loss: 1.9911508560180664,grad_norm: 0.9999998685384841, iteration: 2284
loss: 1.9846805334091187,grad_norm: 0.9999998152116034, iteration: 2285
loss: 1.9241219758987427,grad_norm: 0.9999998215668467, iteration: 2286
loss: 1.9362545013427734,grad_norm: 0.9999997859095564, iteration: 2287
loss: 1.9485987424850464,grad_norm: 0.9999998129775161, iteration: 2288
loss: 1.8302700519561768,grad_norm: 0.9999998653044763, iteration: 2289
loss: 2.145888328552246,grad_norm: 0.9999998721438589, iteration: 2290
loss: 1.9579840898513794,grad_norm: 0.9999998914275993, iteration: 2291
loss: 1.9347838163375854,grad_norm: 0.9999998877876096, iteration: 2292
loss: 1.949336051940918,grad_norm: 0.9999998951127714, iteration: 2293
loss: 1.996411919593811,grad_norm: 0.9999998568439192, iteration: 2294
loss: 2.1484365463256836,grad_norm: 0.9999998959975294, iteration: 2295
loss: 1.8281339406967163,grad_norm: 0.999999851791116, iteration: 2296
loss: 1.9142544269561768,grad_norm: 0.9999998551205346, iteration: 2297
loss: 1.8659378290176392,grad_norm: 0.9999999073034335, iteration: 2298
loss: 2.0088117122650146,grad_norm: 0.9999997839521435, iteration: 2299
loss: 1.9876384735107422,grad_norm: 0.9999998662463597, iteration: 2300
loss: 2.001516342163086,grad_norm: 0.9999997760942078, iteration: 2301
loss: 2.111354351043701,grad_norm: 0.9999998376005619, iteration: 2302
loss: 1.9850194454193115,grad_norm: 0.9999998105727042, iteration: 2303
loss: 2.014192819595337,grad_norm: 0.9999998536386561, iteration: 2304
loss: 1.893308162689209,grad_norm: 0.9999998464418137, iteration: 2305
loss: 2.006927251815796,grad_norm: 0.99999983373537, iteration: 2306
loss: 1.7556153535842896,grad_norm: 0.9999998608509804, iteration: 2307
loss: 1.8980789184570312,grad_norm: 0.9999997951281182, iteration: 2308
loss: 2.000892162322998,grad_norm: 0.9999998547893506, iteration: 2309
loss: 1.9216419458389282,grad_norm: 0.9999998435630332, iteration: 2310
loss: 1.8564558029174805,grad_norm: 0.9999998184482359, iteration: 2311
loss: 1.946761131286621,grad_norm: 0.999999814684819, iteration: 2312
loss: 1.9116085767745972,grad_norm: 0.9999997955210687, iteration: 2313
loss: 1.9283064603805542,grad_norm: 0.9999997539642297, iteration: 2314
loss: 2.0459203720092773,grad_norm: 0.9999998244128252, iteration: 2315
loss: 1.707646131515503,grad_norm: 0.9999998344752506, iteration: 2316
loss: 1.9329428672790527,grad_norm: 0.9999998244673831, iteration: 2317
loss: 1.9358973503112793,grad_norm: 0.9999998617553839, iteration: 2318
loss: 2.147709846496582,grad_norm: 0.9999998973379584, iteration: 2319
loss: 1.7983421087265015,grad_norm: 0.9999998798451083, iteration: 2320
loss: 1.989355206489563,grad_norm: 0.9999997526859964, iteration: 2321
loss: 1.9555116891860962,grad_norm: 0.9999998776478183, iteration: 2322
loss: 1.9143065214157104,grad_norm: 0.9999998420812135, iteration: 2323
loss: 1.891572117805481,grad_norm: 0.9999998605883067, iteration: 2324
loss: 2.077857732772827,grad_norm: 0.9999998261020789, iteration: 2325
loss: 1.8491864204406738,grad_norm: 0.9999998429171346, iteration: 2326
loss: 2.05460524559021,grad_norm: 0.9999998630450302, iteration: 2327
loss: 1.926299810409546,grad_norm: 0.9999997888192457, iteration: 2328
loss: 1.978888988494873,grad_norm: 0.9999998578942476, iteration: 2329
loss: 1.856356143951416,grad_norm: 0.999999838134053, iteration: 2330
loss: 1.9070793390274048,grad_norm: 0.9999998373387412, iteration: 2331
loss: 1.967774748802185,grad_norm: 0.9999998357871671, iteration: 2332
loss: 1.9961239099502563,grad_norm: 0.9999998021414125, iteration: 2333
loss: 2.1241984367370605,grad_norm: 0.9999998483554661, iteration: 2334
loss: 2.0024523735046387,grad_norm: 0.9999997890456631, iteration: 2335
loss: 2.00736665725708,grad_norm: 0.9999998526329895, iteration: 2336
loss: 1.7719389200210571,grad_norm: 0.9999997845021054, iteration: 2337
loss: 2.0262746810913086,grad_norm: 0.9999998109889867, iteration: 2338
loss: 1.9343678951263428,grad_norm: 0.9999997842485597, iteration: 2339
loss: 1.899784803390503,grad_norm: 0.99999980915654, iteration: 2340
loss: 1.8381233215332031,grad_norm: 0.9999998062362057, iteration: 2341
loss: 1.8446425199508667,grad_norm: 0.9999998688681262, iteration: 2342
loss: 2.0399465560913086,grad_norm: 0.9999998456578376, iteration: 2343
loss: 1.9589896202087402,grad_norm: 0.999999796316229, iteration: 2344
loss: 1.8770623207092285,grad_norm: 0.9999998148840136, iteration: 2345
loss: 2.0097455978393555,grad_norm: 0.9999998601023687, iteration: 2346
loss: 1.8352328538894653,grad_norm: 0.9999999049552072, iteration: 2347
loss: 1.9475722312927246,grad_norm: 0.9999997650657889, iteration: 2348
loss: 1.8214647769927979,grad_norm: 0.9999998515254126, iteration: 2349
loss: 2.0385003089904785,grad_norm: 0.9999997990581637, iteration: 2350
loss: 1.9345782995224,grad_norm: 0.9999998341903605, iteration: 2351
loss: 1.872015118598938,grad_norm: 0.9999998757659312, iteration: 2352
loss: 1.9416612386703491,grad_norm: 0.9999998033223221, iteration: 2353
loss: 1.98465895652771,grad_norm: 0.9999998667105905, iteration: 2354
loss: 1.8770333528518677,grad_norm: 0.9999999076069553, iteration: 2355
loss: 1.842934489250183,grad_norm: 0.999999768925719, iteration: 2356
loss: 1.8002922534942627,grad_norm: 0.9999999067317864, iteration: 2357
loss: 1.9891538619995117,grad_norm: 0.9999998476954094, iteration: 2358
loss: 1.9980109930038452,grad_norm: 0.9999998497930825, iteration: 2359
loss: 1.8408817052841187,grad_norm: 0.9999998688874523, iteration: 2360
loss: 1.8219938278198242,grad_norm: 0.9999998680291908, iteration: 2361
loss: 2.0572476387023926,grad_norm: 0.9999998399707719, iteration: 2362
loss: 2.0450997352600098,grad_norm: 0.9999998747777205, iteration: 2363
loss: 1.9356284141540527,grad_norm: 0.9999998251128029, iteration: 2364
loss: 2.0236093997955322,grad_norm: 0.9999998704609596, iteration: 2365
loss: 1.9183526039123535,grad_norm: 0.999999809590033, iteration: 2366
loss: 1.9822626113891602,grad_norm: 0.9999999146156718, iteration: 2367
loss: 1.9436331987380981,grad_norm: 0.9999997466617143, iteration: 2368
loss: 1.7587635517120361,grad_norm: 0.9999998330064414, iteration: 2369
loss: 1.8861606121063232,grad_norm: 0.9999997970311365, iteration: 2370
loss: 2.0018956661224365,grad_norm: 0.9999997227480866, iteration: 2371
loss: 2.0319323539733887,grad_norm: 0.9999998817981783, iteration: 2372
loss: 2.1059482097625732,grad_norm: 0.9999998151231339, iteration: 2373
loss: 1.9358725547790527,grad_norm: 0.9999999204609418, iteration: 2374
loss: 1.982714295387268,grad_norm: 0.9999998222666827, iteration: 2375
loss: 1.9259579181671143,grad_norm: 0.9999998181103805, iteration: 2376
loss: 2.109975576400757,grad_norm: 0.9999998067949147, iteration: 2377
loss: 1.9959388971328735,grad_norm: 0.9999997634986794, iteration: 2378
loss: 1.6345816850662231,grad_norm: 0.9999998165027798, iteration: 2379
loss: 1.9651225805282593,grad_norm: 0.9999998371474925, iteration: 2380
loss: 2.049062490463257,grad_norm: 0.9999998309162045, iteration: 2381
loss: 1.8683173656463623,grad_norm: 0.9999998282898409, iteration: 2382
loss: 1.8349436521530151,grad_norm: 0.9999998251204298, iteration: 2383
loss: 2.1075680255889893,grad_norm: 0.9999998724591459, iteration: 2384
loss: 1.7352322340011597,grad_norm: 0.9999999171943342, iteration: 2385
loss: 2.035632610321045,grad_norm: 0.9999998106648769, iteration: 2386
loss: 1.8811062574386597,grad_norm: 0.9999998345179539, iteration: 2387
loss: 1.91773521900177,grad_norm: 0.9999998332450604, iteration: 2388
loss: 2.181432008743286,grad_norm: 0.9999997516486223, iteration: 2389
loss: 1.766544222831726,grad_norm: 0.9999998373179402, iteration: 2390
loss: 1.770154595375061,grad_norm: 0.9999998572327354, iteration: 2391
loss: 1.9328526258468628,grad_norm: 0.9999998367222176, iteration: 2392
loss: 1.7971028089523315,grad_norm: 0.9999999273991104, iteration: 2393
loss: 1.861392617225647,grad_norm: 0.9999998508618407, iteration: 2394
loss: 1.7933459281921387,grad_norm: 0.9999997831681978, iteration: 2395
loss: 1.9071145057678223,grad_norm: 0.9999998377158283, iteration: 2396
loss: 1.8401747941970825,grad_norm: 0.9999998465497391, iteration: 2397
loss: 1.977412223815918,grad_norm: 0.9999998976795902, iteration: 2398
loss: 1.901519775390625,grad_norm: 0.9999998231180349, iteration: 2399
loss: 1.8558223247528076,grad_norm: 0.9999999535431644, iteration: 2400
loss: 2.04640531539917,grad_norm: 0.9999999099061316, iteration: 2401
loss: 2.02605938911438,grad_norm: 0.9999999009011004, iteration: 2402
loss: 2.052189350128174,grad_norm: 0.9999997624494509, iteration: 2403
loss: 1.969346284866333,grad_norm: 0.9999998345503234, iteration: 2404
loss: 1.8880749940872192,grad_norm: 0.9999998604212059, iteration: 2405
loss: 1.9580968618392944,grad_norm: 0.9999998547348777, iteration: 2406
loss: 1.9079288244247437,grad_norm: 0.9999998196424912, iteration: 2407
loss: 2.051426649093628,grad_norm: 0.9999997918712299, iteration: 2408
loss: 1.842460036277771,grad_norm: 0.9999998465728415, iteration: 2409
loss: 1.5760369300842285,grad_norm: 0.9999998944395132, iteration: 2410
loss: 1.7412797212600708,grad_norm: 0.9999998404543232, iteration: 2411
loss: 1.9035618305206299,grad_norm: 0.9999998203390925, iteration: 2412
loss: 1.8353619575500488,grad_norm: 0.9999999046732574, iteration: 2413
loss: 1.9146428108215332,grad_norm: 0.9999998561708461, iteration: 2414
loss: 1.9066084623336792,grad_norm: 0.9999999264315543, iteration: 2415
loss: 1.9669830799102783,grad_norm: 0.9999998071482982, iteration: 2416
loss: 1.7996450662612915,grad_norm: 0.9999998079100301, iteration: 2417
loss: 2.0193898677825928,grad_norm: 0.9999998394146858, iteration: 2418
loss: 1.9680074453353882,grad_norm: 0.999999894268198, iteration: 2419
loss: 1.752121090888977,grad_norm: 0.9999999534033227, iteration: 2420
loss: 1.828263759613037,grad_norm: 0.9999998411631997, iteration: 2421
loss: 1.9374924898147583,grad_norm: 0.9999998544588959, iteration: 2422
loss: 1.9183093309402466,grad_norm: 0.9999998358525767, iteration: 2423
loss: 1.968274712562561,grad_norm: 0.9999997796601198, iteration: 2424
loss: 1.9716978073120117,grad_norm: 0.9999998306512308, iteration: 2425
loss: 1.9299395084381104,grad_norm: 0.9999998756213135, iteration: 2426
loss: 1.9967432022094727,grad_norm: 0.9999998408653737, iteration: 2427
loss: 1.7843040227890015,grad_norm: 0.9999999055638293, iteration: 2428
loss: 1.7809937000274658,grad_norm: 0.999999772504972, iteration: 2429
loss: 2.0592501163482666,grad_norm: 0.9999997736857037, iteration: 2430
loss: 1.6992088556289673,grad_norm: 0.9999999216765363, iteration: 2431
loss: 2.1167149543762207,grad_norm: 0.999999864088238, iteration: 2432
loss: 1.884476661682129,grad_norm: 0.9999998750203551, iteration: 2433
loss: 2.174976110458374,grad_norm: 0.9999998273752584, iteration: 2434
loss: 1.9342812299728394,grad_norm: 0.999999935963569, iteration: 2435
loss: 1.9241584539413452,grad_norm: 0.9999999000696395, iteration: 2436
loss: 1.8502144813537598,grad_norm: 0.9999998336783632, iteration: 2437
loss: 1.9892425537109375,grad_norm: 0.9999997842370321, iteration: 2438
loss: 2.022045612335205,grad_norm: 0.999999855176591, iteration: 2439
loss: 1.8972046375274658,grad_norm: 0.9999998216335273, iteration: 2440
loss: 2.0766894817352295,grad_norm: 0.999999832188133, iteration: 2441
loss: 1.7220659255981445,grad_norm: 0.9999998276628322, iteration: 2442
loss: 1.7265028953552246,grad_norm: 0.9999998390601899, iteration: 2443
loss: 1.8979287147521973,grad_norm: 0.9999998485406341, iteration: 2444
loss: 1.9221293926239014,grad_norm: 0.9999998580782753, iteration: 2445
loss: 1.8118841648101807,grad_norm: 0.9999997700041667, iteration: 2446
loss: 1.8364824056625366,grad_norm: 0.9999998861457596, iteration: 2447
loss: 1.753727912902832,grad_norm: 0.999999858302904, iteration: 2448
loss: 1.9278392791748047,grad_norm: 0.9999998338383401, iteration: 2449
loss: 1.7833826541900635,grad_norm: 0.9999999150794373, iteration: 2450
loss: 2.095395565032959,grad_norm: 0.9999998974390637, iteration: 2451
loss: 1.9491809606552124,grad_norm: 0.9999998494584138, iteration: 2452
loss: 1.9620617628097534,grad_norm: 0.9999998139379433, iteration: 2453
loss: 1.876965045928955,grad_norm: 0.9999998145246743, iteration: 2454
loss: 1.8939839601516724,grad_norm: 0.9999998980206625, iteration: 2455
loss: 1.8011505603790283,grad_norm: 0.9999998187233088, iteration: 2456
loss: 1.9158236980438232,grad_norm: 0.9999998598166457, iteration: 2457
loss: 1.9986201524734497,grad_norm: 0.9999998512479993, iteration: 2458
loss: 1.9097723960876465,grad_norm: 0.9999998808239614, iteration: 2459
loss: 1.8303160667419434,grad_norm: 0.9999998654232932, iteration: 2460
loss: 1.866565227508545,grad_norm: 0.9999998552738928, iteration: 2461
loss: 1.9089293479919434,grad_norm: 0.9999998698826966, iteration: 2462
loss: 1.7785190343856812,grad_norm: 0.9999998540579231, iteration: 2463
loss: 1.8520240783691406,grad_norm: 0.9999997762077347, iteration: 2464
loss: 1.8375569581985474,grad_norm: 0.9999998348865492, iteration: 2465
loss: 1.9278160333633423,grad_norm: 0.9999998743459658, iteration: 2466
loss: 1.9526312351226807,grad_norm: 0.9999999065418574, iteration: 2467
loss: 1.9145220518112183,grad_norm: 0.9999999309848774, iteration: 2468
loss: 1.9147658348083496,grad_norm: 0.9999998875255325, iteration: 2469
loss: 1.8134403228759766,grad_norm: 0.9999997992567866, iteration: 2470
loss: 1.78291654586792,grad_norm: 0.999999782557327, iteration: 2471
loss: 1.684048056602478,grad_norm: 0.9999998486937739, iteration: 2472
loss: 1.902559757232666,grad_norm: 0.9999998509723886, iteration: 2473
loss: 2.0962963104248047,grad_norm: 0.9999998176722734, iteration: 2474
loss: 1.8718842267990112,grad_norm: 0.9999999108047706, iteration: 2475
loss: 1.8259843587875366,grad_norm: 0.9999998316316898, iteration: 2476
loss: 1.8568882942199707,grad_norm: 0.9999998242017042, iteration: 2477
loss: 1.8929051160812378,grad_norm: 0.999999778531854, iteration: 2478
loss: 1.7740381956100464,grad_norm: 0.9999997977212147, iteration: 2479
loss: 1.9434261322021484,grad_norm: 0.9999998273404802, iteration: 2480
loss: 2.0021800994873047,grad_norm: 0.9999998174633081, iteration: 2481
loss: 2.0245862007141113,grad_norm: 0.9999998021324511, iteration: 2482
loss: 1.984195590019226,grad_norm: 0.9999998322673033, iteration: 2483
loss: 1.9465606212615967,grad_norm: 0.9999998530401258, iteration: 2484
loss: 1.986218810081482,grad_norm: 0.9999998381680429, iteration: 2485
loss: 1.9929399490356445,grad_norm: 0.9999998352479039, iteration: 2486
loss: 1.9460172653198242,grad_norm: 0.9999998647865312, iteration: 2487
loss: 1.8858145475387573,grad_norm: 0.9999998402916804, iteration: 2488
loss: 1.724982500076294,grad_norm: 0.9999998534334901, iteration: 2489
loss: 2.0076589584350586,grad_norm: 0.9999997865861805, iteration: 2490
loss: 1.8881561756134033,grad_norm: 0.9999998311762136, iteration: 2491
loss: 1.7605631351470947,grad_norm: 0.9999998716480778, iteration: 2492
loss: 1.8579530715942383,grad_norm: 0.9999998326318458, iteration: 2493
loss: 1.9528800249099731,grad_norm: 0.9999998459901073, iteration: 2494
loss: 1.8648570775985718,grad_norm: 0.9999998398755294, iteration: 2495
loss: 1.8310590982437134,grad_norm: 0.9999999099767718, iteration: 2496
loss: 1.858602523803711,grad_norm: 0.9999998020011021, iteration: 2497
loss: 1.8985506296157837,grad_norm: 0.9999998815554842, iteration: 2498
loss: 1.81721031665802,grad_norm: 0.9999998001719187, iteration: 2499
loss: 1.9581079483032227,grad_norm: 0.9999998373820295, iteration: 2500
loss: 2.0065157413482666,grad_norm: 0.9999998590918083, iteration: 2501
loss: 1.560647964477539,grad_norm: 0.9999999006728447, iteration: 2502
loss: 1.7202913761138916,grad_norm: 0.9999998321660747, iteration: 2503
loss: 1.884254813194275,grad_norm: 0.9999997930444943, iteration: 2504
loss: 1.8337656259536743,grad_norm: 0.999999809362987, iteration: 2505
loss: 1.916006326675415,grad_norm: 0.9999998014013193, iteration: 2506
loss: 1.8095195293426514,grad_norm: 0.9999998696320402, iteration: 2507
loss: 1.9469350576400757,grad_norm: 0.9999998793495242, iteration: 2508
loss: 2.059441089630127,grad_norm: 0.9999998456143744, iteration: 2509
loss: 1.8983299732208252,grad_norm: 0.9999998025106996, iteration: 2510
loss: 1.8047916889190674,grad_norm: 0.9999998594690634, iteration: 2511
loss: 2.1123604774475098,grad_norm: 0.999999817195869, iteration: 2512
loss: 1.974083423614502,grad_norm: 0.9999997525244672, iteration: 2513
loss: 1.9231762886047363,grad_norm: 0.9999998875166292, iteration: 2514
loss: 2.028671979904175,grad_norm: 0.9999997922438666, iteration: 2515
loss: 1.79988694190979,grad_norm: 0.9999998754603737, iteration: 2516
loss: 1.7733310461044312,grad_norm: 0.9999998080551744, iteration: 2517
loss: 1.8620721101760864,grad_norm: 0.9999997667077832, iteration: 2518
loss: 1.9103888273239136,grad_norm: 0.9999998728472418, iteration: 2519
loss: 1.977515697479248,grad_norm: 0.9999998195363905, iteration: 2520
loss: 1.7891209125518799,grad_norm: 0.9999997710721497, iteration: 2521
loss: 1.8636256456375122,grad_norm: 0.9999998409812412, iteration: 2522
loss: 1.917946696281433,grad_norm: 0.9999998522293815, iteration: 2523
loss: 1.8224945068359375,grad_norm: 0.9999997570336504, iteration: 2524
loss: 1.9053939580917358,grad_norm: 0.9999998247552485, iteration: 2525
loss: 2.007521390914917,grad_norm: 0.9999998550654594, iteration: 2526
loss: 1.8284212350845337,grad_norm: 0.9999997792501208, iteration: 2527
loss: 1.8906104564666748,grad_norm: 0.9999997871700826, iteration: 2528
loss: 2.027137041091919,grad_norm: 0.9999998104301159, iteration: 2529
loss: 1.7567812204360962,grad_norm: 0.9999997875197448, iteration: 2530
loss: 1.8373862504959106,grad_norm: 0.9999997946865186, iteration: 2531
loss: 1.841004729270935,grad_norm: 0.9999998772053458, iteration: 2532
loss: 1.997132420539856,grad_norm: 0.9999997833177754, iteration: 2533
loss: 1.7984472513198853,grad_norm: 0.999999819822467, iteration: 2534
loss: 1.8757445812225342,grad_norm: 0.9999998412144524, iteration: 2535
loss: 1.876355528831482,grad_norm: 0.9999998622470486, iteration: 2536
loss: 1.8545894622802734,grad_norm: 0.9999998140227946, iteration: 2537
loss: 1.9553501605987549,grad_norm: 0.9999997926041982, iteration: 2538
loss: 1.832293152809143,grad_norm: 0.9999998777146587, iteration: 2539
loss: 1.9500025510787964,grad_norm: 0.9999998463926367, iteration: 2540
loss: 1.7995986938476562,grad_norm: 0.9999998022325804, iteration: 2541
loss: 1.8748142719268799,grad_norm: 0.99999979122692, iteration: 2542
loss: 2.0098791122436523,grad_norm: 0.9999998025781391, iteration: 2543
loss: 1.8823816776275635,grad_norm: 0.9999999494627397, iteration: 2544
loss: 1.8019180297851562,grad_norm: 0.999999750435741, iteration: 2545
loss: 1.8066678047180176,grad_norm: 0.9999998291544884, iteration: 2546
loss: 1.897241473197937,grad_norm: 0.9999998612641803, iteration: 2547
loss: 1.5847948789596558,grad_norm: 0.9999998746588894, iteration: 2548
loss: 1.7888933420181274,grad_norm: 0.9999998714640999, iteration: 2549
loss: 1.6958011388778687,grad_norm: 0.9999999058093254, iteration: 2550
loss: 1.7370119094848633,grad_norm: 0.9999998224752225, iteration: 2551
loss: 1.9053688049316406,grad_norm: 0.999999822158852, iteration: 2552
loss: 1.8723056316375732,grad_norm: 0.9999998544406091, iteration: 2553
loss: 1.741129755973816,grad_norm: 0.9999998541146257, iteration: 2554
loss: 1.8297911882400513,grad_norm: 0.9999998976423893, iteration: 2555
loss: 2.0044631958007812,grad_norm: 0.9999997757085364, iteration: 2556
loss: 1.7899084091186523,grad_norm: 0.9999998295831906, iteration: 2557
loss: 1.9219671487808228,grad_norm: 0.9999998030279083, iteration: 2558
loss: 1.8432224988937378,grad_norm: 0.999999781354684, iteration: 2559
loss: 1.8367725610733032,grad_norm: 0.9999998291302729, iteration: 2560
loss: 1.7458995580673218,grad_norm: 0.9999998356472223, iteration: 2561
loss: 1.808486819267273,grad_norm: 0.9999998185597953, iteration: 2562
loss: 2.077446222305298,grad_norm: 0.9999998081477602, iteration: 2563
loss: 1.7421990633010864,grad_norm: 0.999999812179914, iteration: 2564
loss: 1.6851953268051147,grad_norm: 0.9999997382807293, iteration: 2565
loss: 1.7763665914535522,grad_norm: 0.9999997512667654, iteration: 2566
loss: 1.8546068668365479,grad_norm: 0.9999998279610058, iteration: 2567
loss: 1.9495457410812378,grad_norm: 0.9999997816863182, iteration: 2568
loss: 1.9684046506881714,grad_norm: 0.9999997541919081, iteration: 2569
loss: 1.6934174299240112,grad_norm: 0.9999997834427458, iteration: 2570
loss: 2.0119240283966064,grad_norm: 0.9999998319127822, iteration: 2571
loss: 1.8049207925796509,grad_norm: 0.9999997793976219, iteration: 2572
loss: 1.9418338537216187,grad_norm: 0.9999997518501805, iteration: 2573
loss: 1.9290682077407837,grad_norm: 0.9999998656669582, iteration: 2574
loss: 1.863956332206726,grad_norm: 0.9999999092847369, iteration: 2575
loss: 1.6635836362838745,grad_norm: 0.9999999126374097, iteration: 2576
loss: 1.7871779203414917,grad_norm: 0.9999998733287171, iteration: 2577
loss: 1.9303555488586426,grad_norm: 0.9999998338104886, iteration: 2578
loss: 1.8576195240020752,grad_norm: 0.9999997936338986, iteration: 2579
loss: 1.7500739097595215,grad_norm: 0.9999997914973019, iteration: 2580
loss: 1.932988166809082,grad_norm: 0.9999997886840771, iteration: 2581
loss: 1.8255860805511475,grad_norm: 0.9999998287574275, iteration: 2582
loss: 1.9010307788848877,grad_norm: 0.9999998596513595, iteration: 2583
loss: 1.9163697957992554,grad_norm: 0.9999997822807172, iteration: 2584
loss: 1.8088880777359009,grad_norm: 0.9999998402601636, iteration: 2585
loss: 1.8474745750427246,grad_norm: 0.99999988486911, iteration: 2586
loss: 1.9257991313934326,grad_norm: 0.9999998035408406, iteration: 2587
loss: 1.9816113710403442,grad_norm: 0.9999997845321476, iteration: 2588
loss: 1.7415965795516968,grad_norm: 0.9999998913011722, iteration: 2589
loss: 2.0261409282684326,grad_norm: 0.9999998460461129, iteration: 2590
loss: 1.8390966653823853,grad_norm: 0.9999997812896061, iteration: 2591
loss: 1.626602292060852,grad_norm: 0.9999998975641399, iteration: 2592
loss: 1.9457330703735352,grad_norm: 0.9999998295505304, iteration: 2593
loss: 1.7908142805099487,grad_norm: 0.9999998135665514, iteration: 2594
loss: 1.9337568283081055,grad_norm: 0.9999998948783293, iteration: 2595
loss: 1.7800527811050415,grad_norm: 0.9999998807907432, iteration: 2596
loss: 1.765516996383667,grad_norm: 0.9999998365634037, iteration: 2597
loss: 1.7208564281463623,grad_norm: 0.9999998431900158, iteration: 2598
loss: 1.966355562210083,grad_norm: 0.9999998865693385, iteration: 2599
loss: 1.7392945289611816,grad_norm: 0.9999998199712893, iteration: 2600
loss: 1.844693660736084,grad_norm: 0.9999998666516996, iteration: 2601
loss: 1.8676669597625732,grad_norm: 0.9999998890840096, iteration: 2602
loss: 1.7110652923583984,grad_norm: 0.9999998104620973, iteration: 2603
loss: 1.668316125869751,grad_norm: 0.9999999339465909, iteration: 2604
loss: 1.9318859577178955,grad_norm: 0.9999997569819613, iteration: 2605
loss: 1.8004664182662964,grad_norm: 0.9999997601632474, iteration: 2606
loss: 1.7735790014266968,grad_norm: 0.9999998039964945, iteration: 2607
loss: 1.7464512586593628,grad_norm: 0.9999998400973603, iteration: 2608
loss: 1.8569117784500122,grad_norm: 0.9999998138180696, iteration: 2609
loss: 1.735381007194519,grad_norm: 0.9999998309686812, iteration: 2610
loss: 1.9464272260665894,grad_norm: 0.9999998387784507, iteration: 2611
loss: 1.8193800449371338,grad_norm: 0.999999854547103, iteration: 2612
loss: 1.9394769668579102,grad_norm: 0.9999998690828014, iteration: 2613
loss: 1.8455369472503662,grad_norm: 0.9999997652252088, iteration: 2614
loss: 1.9442014694213867,grad_norm: 0.9999998541385852, iteration: 2615
loss: 1.8200863599777222,grad_norm: 0.9999998920444796, iteration: 2616
loss: 1.8157483339309692,grad_norm: 0.9999998335847214, iteration: 2617
loss: 1.7555257081985474,grad_norm: 0.9999998089407921, iteration: 2618
loss: 1.8170714378356934,grad_norm: 0.9999999064355849, iteration: 2619
loss: 1.7783030271530151,grad_norm: 0.9999997796682052, iteration: 2620
loss: 1.863080620765686,grad_norm: 0.9999998347077035, iteration: 2621
loss: 1.8922785520553589,grad_norm: 0.9999998080515254, iteration: 2622
loss: 1.9472105503082275,grad_norm: 0.9999998168214277, iteration: 2623
loss: 1.9601439237594604,grad_norm: 0.9999998822343025, iteration: 2624
loss: 1.994803786277771,grad_norm: 0.9999998597556148, iteration: 2625
loss: 1.838937520980835,grad_norm: 0.9999998171247676, iteration: 2626
loss: 1.8485318422317505,grad_norm: 0.9999998375365556, iteration: 2627
loss: 1.838152527809143,grad_norm: 0.99999981098226, iteration: 2628
loss: 1.8793059587478638,grad_norm: 0.9999998121787045, iteration: 2629
loss: 1.807106375694275,grad_norm: 0.99999977898668, iteration: 2630
loss: 1.8687008619308472,grad_norm: 0.9999998105058125, iteration: 2631
loss: 1.9368128776550293,grad_norm: 0.9999998071809985, iteration: 2632
loss: 1.8785964250564575,grad_norm: 0.999999884692834, iteration: 2633
loss: 1.778898000717163,grad_norm: 0.9999998503122733, iteration: 2634
loss: 1.9132039546966553,grad_norm: 0.9999997987190669, iteration: 2635
loss: 2.0362889766693115,grad_norm: 0.9999998738359868, iteration: 2636
loss: 1.812690019607544,grad_norm: 0.9999998321031518, iteration: 2637
loss: 1.935410737991333,grad_norm: 0.999999859909205, iteration: 2638
loss: 1.7110068798065186,grad_norm: 0.9999998041823582, iteration: 2639
loss: 1.8246289491653442,grad_norm: 0.9999998012087421, iteration: 2640
loss: 1.9315927028656006,grad_norm: 0.9999998319945581, iteration: 2641
loss: 1.938428282737732,grad_norm: 0.9999999163654738, iteration: 2642
loss: 1.991905689239502,grad_norm: 0.9999998396144305, iteration: 2643
loss: 1.7889869213104248,grad_norm: 0.9999998324615893, iteration: 2644
loss: 1.8710800409317017,grad_norm: 0.9999998573187614, iteration: 2645
loss: 1.6977063417434692,grad_norm: 0.9999998717954317, iteration: 2646
loss: 1.8929200172424316,grad_norm: 0.9999998713593793, iteration: 2647
loss: 1.8777590990066528,grad_norm: 0.9999998260555868, iteration: 2648
loss: 1.8109630346298218,grad_norm: 0.9999998689586158, iteration: 2649
loss: 1.6462284326553345,grad_norm: 0.9999997596569736, iteration: 2650
loss: 1.818569302558899,grad_norm: 0.9999999590922208, iteration: 2651
loss: 1.8430296182632446,grad_norm: 0.9999998354542518, iteration: 2652
loss: 1.7206571102142334,grad_norm: 0.9999998428563286, iteration: 2653
loss: 1.8049083948135376,grad_norm: 0.9999998396770846, iteration: 2654
loss: 1.7203636169433594,grad_norm: 0.9999998230161506, iteration: 2655
loss: 1.801201581954956,grad_norm: 0.9999998973035141, iteration: 2656
loss: 1.9635405540466309,grad_norm: 0.9999998307705451, iteration: 2657
loss: 1.816787838935852,grad_norm: 0.9999998149867224, iteration: 2658
loss: 1.7622753381729126,grad_norm: 0.9999999525140092, iteration: 2659
loss: 1.8016769886016846,grad_norm: 0.9999998358270148, iteration: 2660
loss: 1.6953678131103516,grad_norm: 0.9999998798020046, iteration: 2661
loss: 1.8323910236358643,grad_norm: 0.9999998151461919, iteration: 2662
loss: 1.9104552268981934,grad_norm: 0.9999998848128915, iteration: 2663
loss: 1.8593645095825195,grad_norm: 0.9999997615830626, iteration: 2664
loss: 1.6646173000335693,grad_norm: 0.9999997873705652, iteration: 2665
loss: 1.874613881111145,grad_norm: 0.9999998759955915, iteration: 2666
loss: 1.8205668926239014,grad_norm: 0.9999997982794772, iteration: 2667
loss: 1.7968722581863403,grad_norm: 0.9999998564765894, iteration: 2668
loss: 1.7666015625,grad_norm: 0.9999998252452474, iteration: 2669
loss: 1.8738526105880737,grad_norm: 0.9999998560742585, iteration: 2670
loss: 1.9123581647872925,grad_norm: 0.9999998004753715, iteration: 2671
loss: 1.6859657764434814,grad_norm: 0.9999999252229843, iteration: 2672
loss: 1.9316548109054565,grad_norm: 0.9999998545763208, iteration: 2673
loss: 1.6035746335983276,grad_norm: 0.9999997754979124, iteration: 2674
loss: 1.9256868362426758,grad_norm: 0.999999880784655, iteration: 2675
loss: 1.9789851903915405,grad_norm: 0.9999998411523379, iteration: 2676
loss: 1.9525771141052246,grad_norm: 0.9999998179373418, iteration: 2677
loss: 1.801956057548523,grad_norm: 0.9999998382871369, iteration: 2678
loss: 1.871314525604248,grad_norm: 0.9999998089949625, iteration: 2679
loss: 1.9671701192855835,grad_norm: 0.9999998578370553, iteration: 2680
loss: 1.8712674379348755,grad_norm: 0.999999737663251, iteration: 2681
loss: 1.7607126235961914,grad_norm: 0.9999998585952795, iteration: 2682
loss: 1.7437609434127808,grad_norm: 0.9999998056508864, iteration: 2683
loss: 1.7157015800476074,grad_norm: 0.9999997484649912, iteration: 2684
loss: 1.971610188484192,grad_norm: 0.9999997986587112, iteration: 2685
loss: 1.7220209836959839,grad_norm: 0.9999997844125922, iteration: 2686
loss: 1.9233332872390747,grad_norm: 0.9999998514869834, iteration: 2687
loss: 1.9274272918701172,grad_norm: 0.9999998154273514, iteration: 2688
loss: 1.970421314239502,grad_norm: 0.999999867078686, iteration: 2689
loss: 1.629900336265564,grad_norm: 0.9999997977578756, iteration: 2690
loss: 1.8189157247543335,grad_norm: 0.9999998779491039, iteration: 2691
loss: 1.8035600185394287,grad_norm: 0.99999982343412, iteration: 2692
loss: 1.8417009115219116,grad_norm: 0.9999997671268458, iteration: 2693
loss: 1.832019567489624,grad_norm: 0.9999997627993593, iteration: 2694
loss: 1.7712395191192627,grad_norm: 0.9999998404939071, iteration: 2695
loss: 1.6658215522766113,grad_norm: 0.9999998717099058, iteration: 2696
loss: 1.636938452720642,grad_norm: 0.9999998323728349, iteration: 2697
loss: 1.7044070959091187,grad_norm: 0.9999998000287872, iteration: 2698
loss: 1.800489902496338,grad_norm: 0.9999997802595774, iteration: 2699
loss: 1.8452614545822144,grad_norm: 0.9999997719858115, iteration: 2700
loss: 1.701493263244629,grad_norm: 0.9999997827163891, iteration: 2701
loss: 1.5857625007629395,grad_norm: 0.9999999054473847, iteration: 2702
loss: 1.6676784753799438,grad_norm: 0.9999998444168051, iteration: 2703
loss: 1.7529330253601074,grad_norm: 0.999999882799952, iteration: 2704
loss: 1.877798318862915,grad_norm: 0.9999998562934439, iteration: 2705
loss: 1.9094905853271484,grad_norm: 0.9999998223980446, iteration: 2706
loss: 1.7270370721817017,grad_norm: 0.9999998250089928, iteration: 2707
loss: 1.645237684249878,grad_norm: 0.9999997964340352, iteration: 2708
loss: 1.780651330947876,grad_norm: 0.9999998205958524, iteration: 2709
loss: 1.845790982246399,grad_norm: 0.9999998286582361, iteration: 2710
loss: 1.9206304550170898,grad_norm: 0.999999877983852, iteration: 2711
loss: 1.8723512887954712,grad_norm: 0.9999997616972052, iteration: 2712
loss: 1.7819464206695557,grad_norm: 0.9999998280950105, iteration: 2713
loss: 1.8730883598327637,grad_norm: 0.9999998323412728, iteration: 2714
loss: 1.8534414768218994,grad_norm: 0.9999998704893053, iteration: 2715
loss: 1.7084879875183105,grad_norm: 0.9999998338692464, iteration: 2716
loss: 1.9847110509872437,grad_norm: 0.9999998788055556, iteration: 2717
loss: 1.959957480430603,grad_norm: 0.9999998273717782, iteration: 2718
loss: 1.8172543048858643,grad_norm: 0.9999998530009463, iteration: 2719
loss: 1.8056721687316895,grad_norm: 0.9999998491865336, iteration: 2720
loss: 1.7332288026809692,grad_norm: 0.9999999123739938, iteration: 2721
loss: 1.858991026878357,grad_norm: 0.9999998528740928, iteration: 2722
loss: 1.8191200494766235,grad_norm: 0.9999998754714143, iteration: 2723
loss: 1.683701992034912,grad_norm: 0.9999998447562053, iteration: 2724
loss: 1.8484220504760742,grad_norm: 0.9999998354567572, iteration: 2725
loss: 1.870029330253601,grad_norm: 0.9999998497679576, iteration: 2726
loss: 1.9106923341751099,grad_norm: 0.9999998403352726, iteration: 2727
loss: 1.789463996887207,grad_norm: 0.9999999083837422, iteration: 2728
loss: 1.81800377368927,grad_norm: 0.9999998943326465, iteration: 2729
loss: 1.6399991512298584,grad_norm: 0.9999998618151303, iteration: 2730
loss: 1.7921470403671265,grad_norm: 0.9999998608691127, iteration: 2731
loss: 1.7443809509277344,grad_norm: 0.9999998022512646, iteration: 2732
loss: 1.656262755393982,grad_norm: 0.9999998402690716, iteration: 2733
loss: 1.7337543964385986,grad_norm: 0.9999998845665354, iteration: 2734
loss: 1.6629016399383545,grad_norm: 0.999999880790441, iteration: 2735
loss: 1.8657466173171997,grad_norm: 0.9999998535819745, iteration: 2736
loss: 1.775926113128662,grad_norm: 0.9999998353234006, iteration: 2737
loss: 1.8519339561462402,grad_norm: 0.9999998507059411, iteration: 2738
loss: 1.923825740814209,grad_norm: 0.999999890754932, iteration: 2739
loss: 1.8145419359207153,grad_norm: 0.9999997815110618, iteration: 2740
loss: 1.969620943069458,grad_norm: 0.9999998980058425, iteration: 2741
loss: 1.7271736860275269,grad_norm: 0.9999998157771018, iteration: 2742
loss: 1.7911885976791382,grad_norm: 0.999999812991019, iteration: 2743
loss: 1.8560266494750977,grad_norm: 0.9999997709814809, iteration: 2744
loss: 1.8924390077590942,grad_norm: 0.9999998377198729, iteration: 2745
loss: 1.8020676374435425,grad_norm: 0.9999997696606279, iteration: 2746
loss: 1.8219966888427734,grad_norm: 0.9999998181681431, iteration: 2747
loss: 1.7326500415802002,grad_norm: 0.9999998613384778, iteration: 2748
loss: 1.7339661121368408,grad_norm: 0.9999998031420477, iteration: 2749
loss: 1.8174914121627808,grad_norm: 0.9999998755936059, iteration: 2750
loss: 1.8331828117370605,grad_norm: 0.9999997830661519, iteration: 2751
loss: 1.7687112092971802,grad_norm: 0.9999998695225369, iteration: 2752
loss: 1.7663193941116333,grad_norm: 0.9999997995367944, iteration: 2753
loss: 1.7599873542785645,grad_norm: 0.9999998859049191, iteration: 2754
loss: 1.7910133600234985,grad_norm: 0.9999998241366119, iteration: 2755
loss: 1.5580594539642334,grad_norm: 0.999999770502579, iteration: 2756
loss: 1.6367539167404175,grad_norm: 0.9999998045125641, iteration: 2757
loss: 1.8442819118499756,grad_norm: 0.9999999019357947, iteration: 2758
loss: 1.9449459314346313,grad_norm: 0.9999998221069882, iteration: 2759
loss: 1.6722080707550049,grad_norm: 0.9999998001378804, iteration: 2760
loss: 1.5916054248809814,grad_norm: 0.9999999133610626, iteration: 2761
loss: 1.8583916425704956,grad_norm: 0.9999998578535931, iteration: 2762
loss: 1.8339734077453613,grad_norm: 0.9999998033162111, iteration: 2763
loss: 1.665910005569458,grad_norm: 0.9999998456143898, iteration: 2764
loss: 1.8165141344070435,grad_norm: 0.9999998662348606, iteration: 2765
loss: 1.9209003448486328,grad_norm: 0.9999998391018515, iteration: 2766
loss: 1.7596406936645508,grad_norm: 0.9999998273062669, iteration: 2767
loss: 1.8981400728225708,grad_norm: 0.9999998451126181, iteration: 2768
loss: 1.823807954788208,grad_norm: 0.9999998099322644, iteration: 2769
loss: 1.7334176301956177,grad_norm: 0.9999997977215334, iteration: 2770
loss: 1.816413402557373,grad_norm: 0.9999998161379633, iteration: 2771
loss: 1.7761174440383911,grad_norm: 0.9999997904360654, iteration: 2772
loss: 1.5953223705291748,grad_norm: 0.9999999162055159, iteration: 2773
loss: 1.738089680671692,grad_norm: 0.999999825231331, iteration: 2774
loss: 1.6855132579803467,grad_norm: 0.9999997596721647, iteration: 2775
loss: 1.9681687355041504,grad_norm: 0.999999867937617, iteration: 2776
loss: 1.6917799711227417,grad_norm: 0.9999997725739307, iteration: 2777
loss: 1.8840749263763428,grad_norm: 0.9999998039317485, iteration: 2778
loss: 1.9164927005767822,grad_norm: 0.999999843477512, iteration: 2779
loss: 1.9342408180236816,grad_norm: 0.99999985789907, iteration: 2780
loss: 1.7040365934371948,grad_norm: 0.9999998218091632, iteration: 2781
loss: 1.9093276262283325,grad_norm: 0.9999998350516187, iteration: 2782
loss: 1.8331972360610962,grad_norm: 0.9999998085162973, iteration: 2783
loss: 1.8676724433898926,grad_norm: 0.9999999084230269, iteration: 2784
loss: 1.7190375328063965,grad_norm: 0.9999998661900075, iteration: 2785
loss: 1.7625261545181274,grad_norm: 0.9999998273067323, iteration: 2786
loss: 1.6960123777389526,grad_norm: 0.9999998322756013, iteration: 2787
loss: 1.7598870992660522,grad_norm: 0.9999998490757184, iteration: 2788
loss: 1.8095792531967163,grad_norm: 0.9999998832481182, iteration: 2789
loss: 1.8521174192428589,grad_norm: 0.9999997827770855, iteration: 2790
loss: 1.6936622858047485,grad_norm: 0.9999998890735027, iteration: 2791
loss: 1.7619274854660034,grad_norm: 0.9999998180150754, iteration: 2792
loss: 1.7242193222045898,grad_norm: 0.9999998384844011, iteration: 2793
loss: 1.8234633207321167,grad_norm: 0.9999999036042959, iteration: 2794
loss: 1.7926865816116333,grad_norm: 0.9999998139113586, iteration: 2795
loss: 1.6721398830413818,grad_norm: 0.999999840657984, iteration: 2796
loss: 1.6907004117965698,grad_norm: 0.9999998807208498, iteration: 2797
loss: 1.7763863801956177,grad_norm: 0.9999998542133922, iteration: 2798
loss: 1.7978203296661377,grad_norm: 0.9999997749909438, iteration: 2799
loss: 1.7434275150299072,grad_norm: 0.9999997859549956, iteration: 2800
loss: 1.6983225345611572,grad_norm: 0.9999998807677103, iteration: 2801
loss: 1.827665090560913,grad_norm: 0.9999998443062699, iteration: 2802
loss: 1.8549691438674927,grad_norm: 0.9999997758732576, iteration: 2803
loss: 1.7187503576278687,grad_norm: 0.9999998682017474, iteration: 2804
loss: 1.7845124006271362,grad_norm: 0.9999998628536017, iteration: 2805
loss: 1.9077290296554565,grad_norm: 0.9999998052213728, iteration: 2806
loss: 1.7546327114105225,grad_norm: 0.999999849576386, iteration: 2807
loss: 1.7249855995178223,grad_norm: 0.9999997980929111, iteration: 2808
loss: 1.7509832382202148,grad_norm: 0.9999998314590817, iteration: 2809
loss: 1.8190449476242065,grad_norm: 0.9999998607751156, iteration: 2810
loss: 1.7032588720321655,grad_norm: 0.9999997318317962, iteration: 2811
loss: 1.8072094917297363,grad_norm: 0.9999997790810253, iteration: 2812
loss: 1.8772497177124023,grad_norm: 0.99999979083286, iteration: 2813
loss: 1.8934377431869507,grad_norm: 0.9999998455121067, iteration: 2814
loss: 1.6800867319107056,grad_norm: 0.9999998567766094, iteration: 2815
loss: 1.5856032371520996,grad_norm: 0.9999998524948814, iteration: 2816
loss: 1.808727502822876,grad_norm: 0.9999998526992728, iteration: 2817
loss: 1.6475024223327637,grad_norm: 0.9999998994142733, iteration: 2818
loss: 1.892643928527832,grad_norm: 0.999999838422839, iteration: 2819
loss: 1.8784304857254028,grad_norm: 0.9999998545917529, iteration: 2820
loss: 1.6250073909759521,grad_norm: 0.9999998745701407, iteration: 2821
loss: 1.7654228210449219,grad_norm: 0.9999998341540273, iteration: 2822
loss: 1.588085651397705,grad_norm: 0.9999998139585853, iteration: 2823
loss: 1.8961572647094727,grad_norm: 0.9999998681176011, iteration: 2824
loss: 1.726603627204895,grad_norm: 0.9999998267172192, iteration: 2825
loss: 1.8285590410232544,grad_norm: 0.999999795512213, iteration: 2826
loss: 1.8741614818572998,grad_norm: 0.9999998327999151, iteration: 2827
loss: 1.6211894750595093,grad_norm: 0.9999998898667279, iteration: 2828
loss: 1.6658977270126343,grad_norm: 0.999999839432511, iteration: 2829
loss: 1.8216603994369507,grad_norm: 0.9999998513128472, iteration: 2830
loss: 1.8322135210037231,grad_norm: 0.9999997833160331, iteration: 2831
loss: 1.8226683139801025,grad_norm: 0.9999998645688242, iteration: 2832
loss: 1.7679280042648315,grad_norm: 0.99999988720139, iteration: 2833
loss: 1.8052035570144653,grad_norm: 0.9999998656781954, iteration: 2834
loss: 1.65312659740448,grad_norm: 0.9999997878439996, iteration: 2835
loss: 1.719072937965393,grad_norm: 0.999999846983124, iteration: 2836
loss: 1.7229971885681152,grad_norm: 0.9999998703288043, iteration: 2837
loss: 1.7232050895690918,grad_norm: 0.9999998015061591, iteration: 2838
loss: 1.7534949779510498,grad_norm: 0.9999998480274072, iteration: 2839
loss: 1.7174551486968994,grad_norm: 0.9999998001052615, iteration: 2840
loss: 1.7569876909255981,grad_norm: 0.9999997829782377, iteration: 2841
loss: 1.8214002847671509,grad_norm: 0.9999997583510638, iteration: 2842
loss: 1.8317173719406128,grad_norm: 0.9999998295270379, iteration: 2843
loss: 1.6376316547393799,grad_norm: 0.9999998106197292, iteration: 2844
loss: 1.881394386291504,grad_norm: 0.999999909356101, iteration: 2845
loss: 1.6326895952224731,grad_norm: 0.9999998728211074, iteration: 2846
loss: 1.6788880825042725,grad_norm: 0.9999998160296452, iteration: 2847
loss: 1.9169518947601318,grad_norm: 0.999999755875724, iteration: 2848
loss: 1.8468838930130005,grad_norm: 0.999999864332697, iteration: 2849
loss: 1.560948133468628,grad_norm: 0.9999997342479915, iteration: 2850
loss: 1.7908695936203003,grad_norm: 0.9999998572122759, iteration: 2851
loss: 1.7117832899093628,grad_norm: 0.9999998505437778, iteration: 2852
loss: 1.7027431726455688,grad_norm: 0.9999997935498718, iteration: 2853
loss: 1.884709358215332,grad_norm: 0.9999998100497149, iteration: 2854
loss: 1.8169766664505005,grad_norm: 0.9999998998822762, iteration: 2855
loss: 1.6047508716583252,grad_norm: 0.9999999056835481, iteration: 2856
loss: 1.7883604764938354,grad_norm: 0.9999998140717394, iteration: 2857
loss: 1.7926948070526123,grad_norm: 0.9999998312881085, iteration: 2858
loss: 1.7308545112609863,grad_norm: 0.9999999276577684, iteration: 2859
loss: 1.652921438217163,grad_norm: 0.9999998193177491, iteration: 2860
loss: 1.7848219871520996,grad_norm: 0.999999802289612, iteration: 2861
loss: 1.634895920753479,grad_norm: 0.9999998859869559, iteration: 2862
loss: 1.8341351747512817,grad_norm: 0.9999998533655896, iteration: 2863
loss: 1.7131149768829346,grad_norm: 0.9999998383437068, iteration: 2864
loss: 1.6327446699142456,grad_norm: 0.9999998021734422, iteration: 2865
loss: 1.711241602897644,grad_norm: 0.9999998458264746, iteration: 2866
loss: 1.6608635187149048,grad_norm: 0.9999998704872011, iteration: 2867
loss: 1.8523443937301636,grad_norm: 0.9999998297486903, iteration: 2868
loss: 1.6847779750823975,grad_norm: 0.9999998875323066, iteration: 2869
loss: 1.5595492124557495,grad_norm: 0.9999999161572516, iteration: 2870
loss: 1.8551177978515625,grad_norm: 0.9999998632809388, iteration: 2871
loss: 1.779060959815979,grad_norm: 0.9999997971544278, iteration: 2872
loss: 1.7931525707244873,grad_norm: 0.9999998321252233, iteration: 2873
loss: 1.7027615308761597,grad_norm: 0.9999998472436396, iteration: 2874
loss: 1.7221014499664307,grad_norm: 0.9999998952780289, iteration: 2875
loss: 1.8212133646011353,grad_norm: 0.9999998232666077, iteration: 2876
loss: 1.7518863677978516,grad_norm: 0.9999998577157055, iteration: 2877
loss: 1.8294943571090698,grad_norm: 0.9999997470381242, iteration: 2878
loss: 1.6889928579330444,grad_norm: 0.999999784761932, iteration: 2879
loss: 1.7739276885986328,grad_norm: 0.9999998849184932, iteration: 2880
loss: 1.7529538869857788,grad_norm: 0.9999998488740595, iteration: 2881
loss: 1.7636345624923706,grad_norm: 0.9999997798063068, iteration: 2882
loss: 1.8286690711975098,grad_norm: 0.999999856412137, iteration: 2883
loss: 1.6515989303588867,grad_norm: 0.9999997698677952, iteration: 2884
loss: 1.730915904045105,grad_norm: 0.9999997775391493, iteration: 2885
loss: 1.7903392314910889,grad_norm: 0.9999998571391054, iteration: 2886
loss: 1.6842347383499146,grad_norm: 0.999999812829721, iteration: 2887
loss: 1.672963261604309,grad_norm: 0.9999997600883987, iteration: 2888
loss: 1.6232370138168335,grad_norm: 0.9999998938808163, iteration: 2889
loss: 1.6080591678619385,grad_norm: 0.9999998938351928, iteration: 2890
loss: 1.6834031343460083,grad_norm: 0.9999998951486201, iteration: 2891
loss: 1.6248198747634888,grad_norm: 0.9999996926233916, iteration: 2892
loss: 1.7380837202072144,grad_norm: 0.9999998246819213, iteration: 2893
loss: 1.7326765060424805,grad_norm: 0.9999998589422275, iteration: 2894
loss: 1.682857632637024,grad_norm: 0.999999871956204, iteration: 2895
loss: 1.7615770101547241,grad_norm: 0.9999998374570275, iteration: 2896
loss: 1.5200715065002441,grad_norm: 0.9999998926397786, iteration: 2897
loss: 1.7759324312210083,grad_norm: 0.9999998987776607, iteration: 2898
loss: 1.7611312866210938,grad_norm: 0.9999998686241077, iteration: 2899
loss: 1.64542555809021,grad_norm: 0.9999997693706496, iteration: 2900
loss: 1.7489891052246094,grad_norm: 0.9999997512287147, iteration: 2901
loss: 1.6873434782028198,grad_norm: 0.9999998479761034, iteration: 2902
loss: 1.8226178884506226,grad_norm: 0.9999998968540286, iteration: 2903
loss: 1.6866265535354614,grad_norm: 0.9999998554435631, iteration: 2904
loss: 1.8112927675247192,grad_norm: 0.9999998195217796, iteration: 2905
loss: 1.6333128213882446,grad_norm: 0.9999998054218902, iteration: 2906
loss: 1.6310988664627075,grad_norm: 0.999999819781819, iteration: 2907
loss: 1.7691878080368042,grad_norm: 0.9999998166546384, iteration: 2908
loss: 1.7880696058273315,grad_norm: 0.9999998235113224, iteration: 2909
loss: 1.705449104309082,grad_norm: 0.9999997625606047, iteration: 2910
loss: 1.7061309814453125,grad_norm: 0.9999998209662965, iteration: 2911
loss: 1.8259155750274658,grad_norm: 0.9999998165094308, iteration: 2912
loss: 1.7116241455078125,grad_norm: 0.9999998815799743, iteration: 2913
loss: 1.6799285411834717,grad_norm: 0.999999808066993, iteration: 2914
loss: 1.738637089729309,grad_norm: 0.9999998571255276, iteration: 2915
loss: 1.8231654167175293,grad_norm: 0.9999998472820333, iteration: 2916
loss: 1.7099568843841553,grad_norm: 0.9999997891709409, iteration: 2917
loss: 1.5978466272354126,grad_norm: 0.9999998581080266, iteration: 2918
loss: 1.729690670967102,grad_norm: 0.999999791383519, iteration: 2919
loss: 1.8313426971435547,grad_norm: 0.9999998239223404, iteration: 2920
loss: 1.7279713153839111,grad_norm: 0.9999998182958513, iteration: 2921
loss: 1.7171154022216797,grad_norm: 0.9999998724596663, iteration: 2922
loss: 1.6841949224472046,grad_norm: 0.9999997580255374, iteration: 2923
loss: 1.620010256767273,grad_norm: 0.999999880099489, iteration: 2924
loss: 1.8923976421356201,grad_norm: 0.9999998783733453, iteration: 2925
loss: 1.5408058166503906,grad_norm: 0.9999997573026802, iteration: 2926
loss: 1.7470561265945435,grad_norm: 0.9999997713154775, iteration: 2927
loss: 1.5736783742904663,grad_norm: 0.9999998047538987, iteration: 2928
loss: 1.7199180126190186,grad_norm: 0.9999997837988674, iteration: 2929
loss: 1.6595473289489746,grad_norm: 0.9999998378659122, iteration: 2930
loss: 1.6620886325836182,grad_norm: 0.9999997605915613, iteration: 2931
loss: 1.5834338665008545,grad_norm: 0.9999998394030584, iteration: 2932
loss: 1.8355005979537964,grad_norm: 0.9999998847758494, iteration: 2933
loss: 1.7299721240997314,grad_norm: 0.9999998570932779, iteration: 2934
loss: 1.6383628845214844,grad_norm: 0.9999997404132404, iteration: 2935
loss: 1.753488302230835,grad_norm: 0.9999998114345681, iteration: 2936
loss: 1.5567386150360107,grad_norm: 0.9999998347826022, iteration: 2937
loss: 1.7356901168823242,grad_norm: 0.9999998314997632, iteration: 2938
loss: 1.5521883964538574,grad_norm: 0.999999753056556, iteration: 2939
loss: 1.7532039880752563,grad_norm: 0.9999998531855478, iteration: 2940
loss: 1.7020326852798462,grad_norm: 0.9999997775679733, iteration: 2941
loss: 1.7150040864944458,grad_norm: 0.9999998685771915, iteration: 2942
loss: 1.803633689880371,grad_norm: 0.9999998724884226, iteration: 2943
loss: 1.7194584608078003,grad_norm: 0.9999998199493141, iteration: 2944
loss: 1.8478209972381592,grad_norm: 0.999999818695042, iteration: 2945
loss: 1.7077244520187378,grad_norm: 0.999999884160445, iteration: 2946
loss: 1.789833426475525,grad_norm: 0.9999998319535899, iteration: 2947
loss: 1.651570200920105,grad_norm: 0.999999823666915, iteration: 2948
loss: 1.6529439687728882,grad_norm: 0.999999825753049, iteration: 2949
loss: 1.7245250940322876,grad_norm: 0.9999998693801591, iteration: 2950
loss: 1.4642467498779297,grad_norm: 0.9999997649249049, iteration: 2951
loss: 1.7360173463821411,grad_norm: 0.9999998732276318, iteration: 2952
loss: 1.7044506072998047,grad_norm: 0.9999997650910691, iteration: 2953
loss: 1.6989898681640625,grad_norm: 0.9999997660449919, iteration: 2954
loss: 1.711301565170288,grad_norm: 0.9999997861201012, iteration: 2955
loss: 1.924766182899475,grad_norm: 0.999999867506983, iteration: 2956
loss: 1.7281907796859741,grad_norm: 0.9999998222598453, iteration: 2957
loss: 1.6853123903274536,grad_norm: 0.9999998181772658, iteration: 2958
loss: 1.7219996452331543,grad_norm: 0.9999998308012246, iteration: 2959
loss: 1.7247694730758667,grad_norm: 0.9999998213496611, iteration: 2960
loss: 1.7202397584915161,grad_norm: 0.9999997853834177, iteration: 2961
loss: 1.8317749500274658,grad_norm: 0.9999998286289543, iteration: 2962
loss: 1.727217435836792,grad_norm: 0.9999998414516937, iteration: 2963
loss: 1.6968556642532349,grad_norm: 0.9999997730383637, iteration: 2964
loss: 1.7077298164367676,grad_norm: 0.9999998142035896, iteration: 2965
loss: 1.5989115238189697,grad_norm: 0.9999998305705293, iteration: 2966
loss: 1.7008129358291626,grad_norm: 0.9999998475151874, iteration: 2967
loss: 1.586342453956604,grad_norm: 0.999999865208901, iteration: 2968
loss: 1.8152178525924683,grad_norm: 0.9999998495315664, iteration: 2969
loss: 1.616819143295288,grad_norm: 0.9999998750778124, iteration: 2970
loss: 1.6167099475860596,grad_norm: 0.9999997588666142, iteration: 2971
loss: 1.5623353719711304,grad_norm: 0.9999997577089685, iteration: 2972
loss: 1.835573673248291,grad_norm: 0.9999998185890903, iteration: 2973
loss: 1.640878677368164,grad_norm: 0.9999999251689545, iteration: 2974
loss: 1.5802221298217773,grad_norm: 0.9999998315307502, iteration: 2975
loss: 1.8324702978134155,grad_norm: 0.9999998867585093, iteration: 2976
loss: 1.6251894235610962,grad_norm: 0.9999998291325098, iteration: 2977
loss: 1.6082128286361694,grad_norm: 0.9999998078580776, iteration: 2978
loss: 1.6191730499267578,grad_norm: 0.9999998600074081, iteration: 2979
loss: 1.716217041015625,grad_norm: 0.9999997958365843, iteration: 2980
loss: 1.6723133325576782,grad_norm: 0.9999999425098247, iteration: 2981
loss: 1.6908369064331055,grad_norm: 0.9999997948742824, iteration: 2982
loss: 1.690333366394043,grad_norm: 0.99999983484078, iteration: 2983
loss: 1.600934386253357,grad_norm: 0.9999997959814201, iteration: 2984
loss: 1.7009679079055786,grad_norm: 0.9999997730547685, iteration: 2985
loss: 1.449173927307129,grad_norm: 0.9999998661814027, iteration: 2986
loss: 1.917250633239746,grad_norm: 0.9999998702200465, iteration: 2987
loss: 1.7709698677062988,grad_norm: 0.999999822787146, iteration: 2988
loss: 1.8169562816619873,grad_norm: 0.9999997948043141, iteration: 2989
loss: 1.6157222986221313,grad_norm: 0.9999998981003924, iteration: 2990
loss: 1.6305856704711914,grad_norm: 0.9999997982798756, iteration: 2991
loss: 1.7479852437973022,grad_norm: 0.999999872817201, iteration: 2992
loss: 1.8273130655288696,grad_norm: 0.9999998940001791, iteration: 2993
loss: 1.679611325263977,grad_norm: 0.9999998183608184, iteration: 2994
loss: 1.6595138311386108,grad_norm: 0.9999999050825696, iteration: 2995
loss: 1.5648590326309204,grad_norm: 0.9999998257824277, iteration: 2996
loss: 1.7685766220092773,grad_norm: 0.9999998901702873, iteration: 2997
loss: 1.6086480617523193,grad_norm: 0.9999998201024831, iteration: 2998
loss: 1.700194239616394,grad_norm: 0.9999997421054867, iteration: 2999
loss: 1.7596956491470337,grad_norm: 0.9999997588894842, iteration: 3000
loss: 1.616493821144104,grad_norm: 0.9999998506231038, iteration: 3001
loss: 1.4973416328430176,grad_norm: 0.9999998808858216, iteration: 3002
loss: 1.734992265701294,grad_norm: 0.9999998582891995, iteration: 3003
loss: 1.5312092304229736,grad_norm: 0.9999998447602361, iteration: 3004
loss: 1.7542078495025635,grad_norm: 0.9999998649562432, iteration: 3005
loss: 1.7736706733703613,grad_norm: 0.9999998967080522, iteration: 3006
loss: 1.7306567430496216,grad_norm: 0.9999998463456251, iteration: 3007
loss: 1.745057225227356,grad_norm: 0.9999998995342811, iteration: 3008
loss: 1.5295007228851318,grad_norm: 0.9999998731227993, iteration: 3009
loss: 1.5153474807739258,grad_norm: 0.9999997464940424, iteration: 3010
loss: 1.704404354095459,grad_norm: 0.999999768792567, iteration: 3011
loss: 1.751875638961792,grad_norm: 0.9999998459474042, iteration: 3012
loss: 1.5824308395385742,grad_norm: 0.9999998060072277, iteration: 3013
loss: 1.8681056499481201,grad_norm: 0.9999998695896617, iteration: 3014
loss: 1.7250478267669678,grad_norm: 0.9999997873287915, iteration: 3015
loss: 1.8552199602127075,grad_norm: 0.9999998909995961, iteration: 3016
loss: 1.6371757984161377,grad_norm: 0.9999997927175168, iteration: 3017
loss: 1.6030566692352295,grad_norm: 0.9999998946460167, iteration: 3018
loss: 1.5721604824066162,grad_norm: 0.999999811115955, iteration: 3019
loss: 1.5633965730667114,grad_norm: 0.999999804972143, iteration: 3020
loss: 1.6391518115997314,grad_norm: 0.9999997856671657, iteration: 3021
loss: 1.78298020362854,grad_norm: 0.9999998117976511, iteration: 3022
loss: 1.7794374227523804,grad_norm: 0.9999997903383634, iteration: 3023
loss: 1.7730032205581665,grad_norm: 0.9999997671160007, iteration: 3024
loss: 1.649435043334961,grad_norm: 0.9999998218444622, iteration: 3025
loss: 1.5886284112930298,grad_norm: 0.9999998509455739, iteration: 3026
loss: 1.6079925298690796,grad_norm: 0.9999998970566957, iteration: 3027
loss: 1.7615660429000854,grad_norm: 0.9999998358196497, iteration: 3028
loss: 1.8438507318496704,grad_norm: 0.9999999045654455, iteration: 3029
loss: 1.6669931411743164,grad_norm: 0.9999997820935895, iteration: 3030
loss: 1.6947698593139648,grad_norm: 0.9999998341311074, iteration: 3031
loss: 1.8289700746536255,grad_norm: 0.9999998796498669, iteration: 3032
loss: 1.7229609489440918,grad_norm: 0.9999998354935818, iteration: 3033
loss: 1.7038753032684326,grad_norm: 0.9999998817628468, iteration: 3034
loss: 1.6119648218154907,grad_norm: 0.9999998776245111, iteration: 3035
loss: 1.7212698459625244,grad_norm: 0.9999998413486848, iteration: 3036
loss: 1.6681653261184692,grad_norm: 0.9999998269320931, iteration: 3037
loss: 1.7114369869232178,grad_norm: 0.9999999004542762, iteration: 3038
loss: 1.6307144165039062,grad_norm: 0.9999998635798611, iteration: 3039
loss: 1.7292399406433105,grad_norm: 0.9999998115247477, iteration: 3040
loss: 1.7038030624389648,grad_norm: 0.9999998064716471, iteration: 3041
loss: 1.6026642322540283,grad_norm: 0.9999997976893803, iteration: 3042
loss: 1.7805744409561157,grad_norm: 0.9999998435238574, iteration: 3043
loss: 1.5822089910507202,grad_norm: 0.9999998295070138, iteration: 3044
loss: 1.7908886671066284,grad_norm: 0.9999998586900667, iteration: 3045
loss: 1.585132122039795,grad_norm: 0.9999998786016101, iteration: 3046
loss: 1.486982822418213,grad_norm: 0.999999833003113, iteration: 3047
loss: 1.7410048246383667,grad_norm: 0.9999998319121417, iteration: 3048
loss: 1.7630449533462524,grad_norm: 0.9999998497168144, iteration: 3049
loss: 1.6555298566818237,grad_norm: 0.9999998147107603, iteration: 3050
loss: 1.6158031225204468,grad_norm: 0.9999997975423622, iteration: 3051
loss: 1.6233127117156982,grad_norm: 0.9999998053124995, iteration: 3052
loss: 1.6391921043395996,grad_norm: 0.9999998565271092, iteration: 3053
loss: 1.7686712741851807,grad_norm: 0.9999998735167197, iteration: 3054
loss: 1.7261751890182495,grad_norm: 0.9999998977551031, iteration: 3055
loss: 1.6355531215667725,grad_norm: 0.999999845671903, iteration: 3056
loss: 1.735769271850586,grad_norm: 0.9999998112911889, iteration: 3057
loss: 1.6599074602127075,grad_norm: 0.9999998024394768, iteration: 3058
loss: 1.503361701965332,grad_norm: 0.9999997388631929, iteration: 3059
loss: 1.5238840579986572,grad_norm: 0.9999998197925928, iteration: 3060
loss: 1.602455735206604,grad_norm: 0.9999997618592944, iteration: 3061
loss: 1.774235486984253,grad_norm: 0.99999984131248, iteration: 3062
loss: 1.7689517736434937,grad_norm: 0.9999998776347582, iteration: 3063
loss: 1.7871297597885132,grad_norm: 0.9999998483957957, iteration: 3064
loss: 1.600020408630371,grad_norm: 0.9999998095767378, iteration: 3065
loss: 1.714646339416504,grad_norm: 0.999999817210478, iteration: 3066
loss: 1.7200263738632202,grad_norm: 0.9999998320993126, iteration: 3067
loss: 1.6767064332962036,grad_norm: 0.9999998370018671, iteration: 3068
loss: 1.5828946828842163,grad_norm: 0.9999997820211701, iteration: 3069
loss: 1.767771601676941,grad_norm: 0.9999998091000843, iteration: 3070
loss: 1.8436118364334106,grad_norm: 0.9999998696079135, iteration: 3071
loss: 1.7178747653961182,grad_norm: 0.9999998193873084, iteration: 3072
loss: 1.698378562927246,grad_norm: 0.9999998621791514, iteration: 3073
loss: 1.8701401948928833,grad_norm: 0.9999998769744707, iteration: 3074
loss: 1.7564904689788818,grad_norm: 0.9999997535764517, iteration: 3075
loss: 1.636957049369812,grad_norm: 0.9999998558436408, iteration: 3076
loss: 1.657090425491333,grad_norm: 0.9999998490079726, iteration: 3077
loss: 1.6344224214553833,grad_norm: 0.99999978790111, iteration: 3078
loss: 1.7236738204956055,grad_norm: 0.9999997677029626, iteration: 3079
loss: 1.7106133699417114,grad_norm: 0.9999997710377801, iteration: 3080
loss: 1.6733648777008057,grad_norm: 0.9999998404674822, iteration: 3081
loss: 1.7333855628967285,grad_norm: 0.9999997927103034, iteration: 3082
loss: 1.7130376100540161,grad_norm: 0.9999998077761035, iteration: 3083
loss: 1.579853892326355,grad_norm: 0.9999997933120321, iteration: 3084
loss: 1.5786240100860596,grad_norm: 0.9999998365712194, iteration: 3085
loss: 1.7590548992156982,grad_norm: 0.9999998510668697, iteration: 3086
loss: 1.6568586826324463,grad_norm: 0.9999998509915806, iteration: 3087
loss: 1.5885725021362305,grad_norm: 0.9999998796985525, iteration: 3088
loss: 1.5714423656463623,grad_norm: 0.9999998350814201, iteration: 3089
loss: 1.8004214763641357,grad_norm: 0.9999997988345259, iteration: 3090
loss: 1.477905035018921,grad_norm: 0.9999998489263735, iteration: 3091
loss: 1.690529227256775,grad_norm: 0.9999998683501505, iteration: 3092
loss: 1.8337758779525757,grad_norm: 0.9999997764045672, iteration: 3093
loss: 1.5571368932724,grad_norm: 0.9999998038012694, iteration: 3094
loss: 1.525762677192688,grad_norm: 0.999999793489033, iteration: 3095
loss: 1.7010853290557861,grad_norm: 0.9999997671671381, iteration: 3096
loss: 1.567647933959961,grad_norm: 0.9999998202990872, iteration: 3097
loss: 1.6943590641021729,grad_norm: 0.999999818805668, iteration: 3098
loss: 1.5722099542617798,grad_norm: 0.9999998598674242, iteration: 3099
loss: 1.6428260803222656,grad_norm: 0.9999998320330118, iteration: 3100
loss: 1.7403925657272339,grad_norm: 0.9999999049070907, iteration: 3101
loss: 1.6329917907714844,grad_norm: 0.9999998767396909, iteration: 3102
loss: 1.7451761960983276,grad_norm: 0.9999998620591051, iteration: 3103
loss: 1.6783254146575928,grad_norm: 0.999999831777477, iteration: 3104
loss: 1.6969109773635864,grad_norm: 0.9999997514409176, iteration: 3105
loss: 1.6171501874923706,grad_norm: 0.9999998155726095, iteration: 3106
loss: 1.7743699550628662,grad_norm: 0.999999900625057, iteration: 3107
loss: 1.5194295644760132,grad_norm: 0.999999827050136, iteration: 3108
loss: 1.6763266324996948,grad_norm: 0.9999998036551653, iteration: 3109
loss: 1.5444777011871338,grad_norm: 0.9999998306848019, iteration: 3110
loss: 1.5531988143920898,grad_norm: 0.999999774977204, iteration: 3111
loss: 1.6762661933898926,grad_norm: 0.9999998086563149, iteration: 3112
loss: 1.5315786600112915,grad_norm: 0.9999997973797885, iteration: 3113
loss: 1.6498990058898926,grad_norm: 0.9999998469480034, iteration: 3114
loss: 1.7049363851547241,grad_norm: 0.9999997962997708, iteration: 3115
loss: 1.6963378190994263,grad_norm: 0.9999998659240323, iteration: 3116
loss: 1.5869792699813843,grad_norm: 0.9999997661351417, iteration: 3117
loss: 1.494748830795288,grad_norm: 0.9999997617532284, iteration: 3118
loss: 1.5823835134506226,grad_norm: 0.999999793683614, iteration: 3119
loss: 1.7305762767791748,grad_norm: 0.9999997723562478, iteration: 3120
loss: 1.6267505884170532,grad_norm: 0.9999998266557685, iteration: 3121
loss: 1.7336375713348389,grad_norm: 0.9999998386233167, iteration: 3122
loss: 1.6757309436798096,grad_norm: 0.9999998758970264, iteration: 3123
loss: 1.6737136840820312,grad_norm: 0.9999998649503218, iteration: 3124
loss: 1.4284353256225586,grad_norm: 0.9999998227058816, iteration: 3125
loss: 1.7765285968780518,grad_norm: 0.999999755297191, iteration: 3126
loss: 1.707579255104065,grad_norm: 0.9999998033086147, iteration: 3127
loss: 1.6303131580352783,grad_norm: 0.9999998085940434, iteration: 3128
loss: 1.6776189804077148,grad_norm: 0.9999998213779154, iteration: 3129
loss: 1.5390479564666748,grad_norm: 0.9999997988551593, iteration: 3130
loss: 1.568395972251892,grad_norm: 0.9999998137479911, iteration: 3131
loss: 1.8135432004928589,grad_norm: 0.999999816705095, iteration: 3132
loss: 1.77214515209198,grad_norm: 0.9999997901131933, iteration: 3133
loss: 1.5565154552459717,grad_norm: 0.9999998201836096, iteration: 3134
loss: 1.7899267673492432,grad_norm: 0.9999998102389459, iteration: 3135
loss: 1.6414092779159546,grad_norm: 0.9999999159855121, iteration: 3136
loss: 1.6436117887496948,grad_norm: 0.9999998234986998, iteration: 3137
loss: 1.641261100769043,grad_norm: 0.9999997856002633, iteration: 3138
loss: 1.9147993326187134,grad_norm: 0.9999998279746765, iteration: 3139
loss: 1.3927767276763916,grad_norm: 0.9999999249999535, iteration: 3140
loss: 1.6108756065368652,grad_norm: 0.9999998094640309, iteration: 3141
loss: 1.5264055728912354,grad_norm: 0.9999998189532933, iteration: 3142
loss: 1.6174075603485107,grad_norm: 0.9999998126703727, iteration: 3143
loss: 1.6264526844024658,grad_norm: 0.9999997404165122, iteration: 3144
loss: 1.6725571155548096,grad_norm: 0.9999998256357653, iteration: 3145
loss: 1.4990363121032715,grad_norm: 0.9999999049809811, iteration: 3146
loss: 1.6920281648635864,grad_norm: 0.9999998525767752, iteration: 3147
loss: 1.5925726890563965,grad_norm: 0.999999825949286, iteration: 3148
loss: 1.59879469871521,grad_norm: 0.9999998674277629, iteration: 3149
loss: 1.7142560482025146,grad_norm: 0.9999998205848507, iteration: 3150
loss: 1.6202205419540405,grad_norm: 0.9999997156982696, iteration: 3151
loss: 1.6080454587936401,grad_norm: 0.9999998453501664, iteration: 3152
loss: 1.7305800914764404,grad_norm: 0.9999998561818705, iteration: 3153
loss: 1.6124290227890015,grad_norm: 0.9999998452977134, iteration: 3154
loss: 1.7871901988983154,grad_norm: 0.9999998448767418, iteration: 3155
loss: 1.7703498601913452,grad_norm: 0.9999999172993405, iteration: 3156
loss: 1.5463776588439941,grad_norm: 0.9999998121071575, iteration: 3157
loss: 1.4254119396209717,grad_norm: 0.9999998182706662, iteration: 3158
loss: 1.711869478225708,grad_norm: 0.9999998412541613, iteration: 3159
loss: 1.685544729232788,grad_norm: 0.9999998978242856, iteration: 3160
loss: 1.5634888410568237,grad_norm: 0.9999998581533147, iteration: 3161
loss: 1.6015461683273315,grad_norm: 0.9999998283311549, iteration: 3162
loss: 1.4202141761779785,grad_norm: 0.9999998153961394, iteration: 3163
loss: 1.7382601499557495,grad_norm: 0.9999998439207387, iteration: 3164
loss: 1.5430539846420288,grad_norm: 0.9999998461013266, iteration: 3165
loss: 1.6653746366500854,grad_norm: 0.9999998305316138, iteration: 3166
loss: 1.5241647958755493,grad_norm: 0.9999998311137117, iteration: 3167
loss: 1.6818081140518188,grad_norm: 0.9999998865651839, iteration: 3168
loss: 1.7012256383895874,grad_norm: 0.9999997992671242, iteration: 3169
loss: 1.5398356914520264,grad_norm: 0.9999998411622134, iteration: 3170
loss: 1.7838213443756104,grad_norm: 0.9999998796413466, iteration: 3171
loss: 1.6627466678619385,grad_norm: 0.9999998197841098, iteration: 3172
loss: 1.7551965713500977,grad_norm: 0.9999998052110737, iteration: 3173
loss: 1.596737265586853,grad_norm: 0.9999998429285404, iteration: 3174
loss: 1.5289732217788696,grad_norm: 0.99999991478866, iteration: 3175
loss: 1.60613214969635,grad_norm: 0.9999998360643733, iteration: 3176
loss: 1.61867094039917,grad_norm: 0.9999997818255859, iteration: 3177
loss: 1.5811927318572998,grad_norm: 0.999999871703911, iteration: 3178
loss: 1.686471700668335,grad_norm: 0.9999998734455683, iteration: 3179
loss: 1.579538106918335,grad_norm: 0.9999997795554239, iteration: 3180
loss: 1.705701470375061,grad_norm: 0.9999997593375882, iteration: 3181
loss: 1.6542344093322754,grad_norm: 0.9999999322512365, iteration: 3182
loss: 1.5330426692962646,grad_norm: 0.9999997879464078, iteration: 3183
loss: 1.8303112983703613,grad_norm: 0.999999822886928, iteration: 3184
loss: 1.5971827507019043,grad_norm: 0.9999999016476407, iteration: 3185
loss: 1.5428175926208496,grad_norm: 0.9999998760483528, iteration: 3186
loss: 1.521833896636963,grad_norm: 0.9999997918470661, iteration: 3187
loss: 1.6339576244354248,grad_norm: 0.9999998754627023, iteration: 3188
loss: 1.7115062475204468,grad_norm: 0.999999781679354, iteration: 3189
loss: 1.7344328165054321,grad_norm: 0.9999997986583197, iteration: 3190
loss: 1.698639988899231,grad_norm: 0.9999998390026231, iteration: 3191
loss: 1.6976772546768188,grad_norm: 0.9999997638652914, iteration: 3192
loss: 1.542360544204712,grad_norm: 0.9999998793143466, iteration: 3193
loss: 1.7063065767288208,grad_norm: 0.9999998636402844, iteration: 3194
loss: 1.7384958267211914,grad_norm: 0.999999849043989, iteration: 3195
loss: 1.47423255443573,grad_norm: 0.9999998592881281, iteration: 3196
loss: 1.593950867652893,grad_norm: 0.9999998363687033, iteration: 3197
loss: 1.6427024602890015,grad_norm: 0.9999999285108512, iteration: 3198
loss: 1.4613323211669922,grad_norm: 0.999999777628666, iteration: 3199
loss: 1.5487060546875,grad_norm: 0.9999997689332127, iteration: 3200
loss: 1.667006254196167,grad_norm: 0.99999976500546, iteration: 3201
loss: 1.5929789543151855,grad_norm: 0.9999997948669033, iteration: 3202
loss: 1.5253715515136719,grad_norm: 0.999999775710929, iteration: 3203
loss: 1.5403187274932861,grad_norm: 0.9999998570038137, iteration: 3204
loss: 1.6488651037216187,grad_norm: 0.9999998433624995, iteration: 3205
loss: 1.7154114246368408,grad_norm: 0.9999998226397812, iteration: 3206
loss: 1.602242350578308,grad_norm: 0.9999998305173208, iteration: 3207
loss: 1.6792224645614624,grad_norm: 0.9999998445813814, iteration: 3208
loss: 1.6231367588043213,grad_norm: 0.9999997935223538, iteration: 3209
loss: 1.6160166263580322,grad_norm: 0.9999997762628927, iteration: 3210
loss: 1.7596534490585327,grad_norm: 0.9999998449721701, iteration: 3211
loss: 1.5117610692977905,grad_norm: 0.9999998440184817, iteration: 3212
loss: 1.5892528295516968,grad_norm: 0.9999998311411423, iteration: 3213
loss: 1.715129017829895,grad_norm: 0.9999997972781993, iteration: 3214
loss: 1.6761937141418457,grad_norm: 0.9999998192407723, iteration: 3215
loss: 1.5987377166748047,grad_norm: 0.9999998242709436, iteration: 3216
loss: 1.7875988483428955,grad_norm: 0.9999998340746157, iteration: 3217
loss: 1.678291916847229,grad_norm: 0.9999998176868045, iteration: 3218
loss: 1.4798098802566528,grad_norm: 0.9999997910919974, iteration: 3219
loss: 1.5876795053482056,grad_norm: 0.9999998279402612, iteration: 3220
loss: 1.5949066877365112,grad_norm: 0.9999999221023648, iteration: 3221
loss: 1.545728325843811,grad_norm: 0.9999998625636344, iteration: 3222
loss: 1.53864586353302,grad_norm: 0.9999998411494634, iteration: 3223
loss: 1.5888091325759888,grad_norm: 0.9999997543790697, iteration: 3224
loss: 1.6732990741729736,grad_norm: 0.9999998243907036, iteration: 3225
loss: 1.484979510307312,grad_norm: 0.9999998127628542, iteration: 3226
loss: 1.5782729387283325,grad_norm: 0.9999998212900185, iteration: 3227
loss: 1.703539252281189,grad_norm: 0.999999806150475, iteration: 3228
loss: 1.4730969667434692,grad_norm: 0.9999998504155925, iteration: 3229
loss: 1.5112731456756592,grad_norm: 0.9999998234352333, iteration: 3230
loss: 1.5870440006256104,grad_norm: 0.9999998446446813, iteration: 3231
loss: 1.4865214824676514,grad_norm: 0.9999997909977282, iteration: 3232
loss: 1.6189253330230713,grad_norm: 0.9999998195776824, iteration: 3233
loss: 1.6595948934555054,grad_norm: 0.9999998162357843, iteration: 3234
loss: 1.495354413986206,grad_norm: 0.9999998428486222, iteration: 3235
loss: 1.5842119455337524,grad_norm: 0.9999998266432751, iteration: 3236
loss: 1.5679535865783691,grad_norm: 0.9999997898493557, iteration: 3237
loss: 1.606655240058899,grad_norm: 0.9999997902549638, iteration: 3238
loss: 1.6686066389083862,grad_norm: 0.999999845692496, iteration: 3239
loss: 1.7067991495132446,grad_norm: 0.9999998370365365, iteration: 3240
loss: 1.4377262592315674,grad_norm: 0.9999997529388427, iteration: 3241
loss: 1.6953939199447632,grad_norm: 0.9999998245585952, iteration: 3242
loss: 1.7258174419403076,grad_norm: 0.9999997839152883, iteration: 3243
loss: 1.6387782096862793,grad_norm: 0.9999998491998704, iteration: 3244
loss: 1.636324167251587,grad_norm: 0.9999998348990116, iteration: 3245
loss: 1.6584948301315308,grad_norm: 0.9999998687154452, iteration: 3246
loss: 1.660433053970337,grad_norm: 0.9999998032242393, iteration: 3247
loss: 1.4713605642318726,grad_norm: 0.9999998981359258, iteration: 3248
loss: 1.4742727279663086,grad_norm: 0.9999998110965524, iteration: 3249
loss: 1.5770947933197021,grad_norm: 0.9999997449496552, iteration: 3250
loss: 1.5145630836486816,grad_norm: 0.9999997498253834, iteration: 3251
loss: 1.6013096570968628,grad_norm: 0.9999997552225026, iteration: 3252
loss: 1.5955222845077515,grad_norm: 0.9999998597308648, iteration: 3253
loss: 1.7951767444610596,grad_norm: 0.9999998339462642, iteration: 3254
loss: 1.5337109565734863,grad_norm: 0.9999997980270432, iteration: 3255
loss: 1.5462628602981567,grad_norm: 0.9999998766545138, iteration: 3256
loss: 1.6268181800842285,grad_norm: 0.9999998636326999, iteration: 3257
loss: 1.6579939126968384,grad_norm: 0.9999998183449518, iteration: 3258
loss: 1.747194528579712,grad_norm: 0.9999998683032499, iteration: 3259
loss: 1.642296552658081,grad_norm: 0.9999998668662688, iteration: 3260
loss: 1.5499404668807983,grad_norm: 0.9999998011107678, iteration: 3261
loss: 1.619234323501587,grad_norm: 0.999999813987968, iteration: 3262
loss: 1.6017584800720215,grad_norm: 0.9999998255961785, iteration: 3263
loss: 1.5856845378875732,grad_norm: 0.9999997946262639, iteration: 3264
loss: 1.4797816276550293,grad_norm: 0.9999998146198149, iteration: 3265
loss: 1.6367013454437256,grad_norm: 0.9999997299354386, iteration: 3266
loss: 1.3925920724868774,grad_norm: 0.9999998244799803, iteration: 3267
loss: 1.494281530380249,grad_norm: 0.9999998503128882, iteration: 3268
loss: 1.6670304536819458,grad_norm: 0.9999997875554204, iteration: 3269
loss: 1.5499615669250488,grad_norm: 0.99999978161889, iteration: 3270
loss: 1.5304126739501953,grad_norm: 0.9999998327895062, iteration: 3271
loss: 1.6639835834503174,grad_norm: 0.9999998234142482, iteration: 3272
loss: 1.489427089691162,grad_norm: 0.9999998561989167, iteration: 3273
loss: 1.5585546493530273,grad_norm: 0.9999998543572423, iteration: 3274
loss: 1.5935657024383545,grad_norm: 0.9999998251271817, iteration: 3275
loss: 1.5329596996307373,grad_norm: 0.9999998208092087, iteration: 3276
loss: 1.549747109413147,grad_norm: 0.9999997978886815, iteration: 3277
loss: 1.6461575031280518,grad_norm: 0.9999998403774992, iteration: 3278
loss: 1.4988908767700195,grad_norm: 0.9999999409230284, iteration: 3279
loss: 1.5640347003936768,grad_norm: 0.9999998123016568, iteration: 3280
loss: 1.5133975744247437,grad_norm: 0.9999997824232423, iteration: 3281
loss: 1.555701494216919,grad_norm: 0.9999998522727981, iteration: 3282
loss: 1.671199083328247,grad_norm: 0.9999998604611606, iteration: 3283
loss: 1.576418399810791,grad_norm: 0.9999997996350312, iteration: 3284
loss: 1.4453445672988892,grad_norm: 0.9999998080291174, iteration: 3285
loss: 1.560699701309204,grad_norm: 0.9999998350867263, iteration: 3286
loss: 1.6457983255386353,grad_norm: 0.9999997913974992, iteration: 3287
loss: 1.4769586324691772,grad_norm: 0.9999998458178905, iteration: 3288
loss: 1.6130579710006714,grad_norm: 0.9999998034455095, iteration: 3289
loss: 1.7432210445404053,grad_norm: 0.9999998192896227, iteration: 3290
loss: 1.645877718925476,grad_norm: 0.9999998566865552, iteration: 3291
loss: 1.6490740776062012,grad_norm: 0.9999998114724823, iteration: 3292
loss: 1.713644027709961,grad_norm: 0.9999998363962783, iteration: 3293
loss: 1.4029051065444946,grad_norm: 0.9999997983352539, iteration: 3294
loss: 1.7071565389633179,grad_norm: 0.9999998134449807, iteration: 3295
loss: 1.5529022216796875,grad_norm: 0.9999997617286097, iteration: 3296
loss: 1.5447489023208618,grad_norm: 0.999999819050141, iteration: 3297
loss: 1.455795168876648,grad_norm: 0.9999998408144044, iteration: 3298
loss: 1.465831995010376,grad_norm: 0.9999998414710357, iteration: 3299
loss: 1.6738340854644775,grad_norm: 0.999999906281755, iteration: 3300
loss: 1.5865120887756348,grad_norm: 0.9999998335879865, iteration: 3301
loss: 1.737947702407837,grad_norm: 0.9999997752773271, iteration: 3302
loss: 1.5372775793075562,grad_norm: 0.9999998449815016, iteration: 3303
loss: 1.5959025621414185,grad_norm: 0.9999998085551125, iteration: 3304
loss: 1.5356062650680542,grad_norm: 0.999999826203972, iteration: 3305
loss: 1.5886482000350952,grad_norm: 0.9999998971254999, iteration: 3306
loss: 1.551571011543274,grad_norm: 0.999999795238495, iteration: 3307
loss: 1.4287370443344116,grad_norm: 0.9999997674476993, iteration: 3308
loss: 1.7125906944274902,grad_norm: 0.9999998958878132, iteration: 3309
loss: 1.4667083024978638,grad_norm: 0.9999998318892438, iteration: 3310
loss: 1.5084398984909058,grad_norm: 0.9999998620479091, iteration: 3311
loss: 1.5639115571975708,grad_norm: 0.9999998660701809, iteration: 3312
loss: 1.6413732767105103,grad_norm: 0.9999997485779544, iteration: 3313
loss: 1.6009167432785034,grad_norm: 0.999999860364285, iteration: 3314
loss: 1.6323518753051758,grad_norm: 0.9999998930211857, iteration: 3315
loss: 1.6239980459213257,grad_norm: 0.999999841241514, iteration: 3316
loss: 1.6024900674819946,grad_norm: 0.9999998019863205, iteration: 3317
loss: 1.5780948400497437,grad_norm: 0.9999997746889082, iteration: 3318
loss: 1.6695996522903442,grad_norm: 0.9999998343826424, iteration: 3319
loss: 1.771726131439209,grad_norm: 0.9999997937615067, iteration: 3320
loss: 1.5767072439193726,grad_norm: 0.9999997481377627, iteration: 3321
loss: 1.5580542087554932,grad_norm: 0.9999998293350268, iteration: 3322
loss: 1.561403512954712,grad_norm: 0.9999998650597491, iteration: 3323
loss: 1.6581617593765259,grad_norm: 0.9999998307506985, iteration: 3324
loss: 1.4473165273666382,grad_norm: 0.9999997593014163, iteration: 3325
loss: 1.5011024475097656,grad_norm: 0.9999997954504964, iteration: 3326
loss: 1.4895566701889038,grad_norm: 0.999999799498729, iteration: 3327
loss: 1.6520792245864868,grad_norm: 0.9999998131437379, iteration: 3328
loss: 1.5816327333450317,grad_norm: 0.9999998635728564, iteration: 3329
loss: 1.4515267610549927,grad_norm: 0.9999998826565416, iteration: 3330
loss: 1.7787580490112305,grad_norm: 0.9999998792712974, iteration: 3331
loss: 1.5954089164733887,grad_norm: 0.9999998508812942, iteration: 3332
loss: 1.3925329446792603,grad_norm: 0.9999998260196131, iteration: 3333
loss: 1.518094539642334,grad_norm: 0.9999997454658478, iteration: 3334
loss: 1.60333251953125,grad_norm: 0.9999998367020879, iteration: 3335
loss: 1.3646587133407593,grad_norm: 0.9999997711695557, iteration: 3336
loss: 1.641933560371399,grad_norm: 0.9999997761632832, iteration: 3337
loss: 1.745182752609253,grad_norm: 0.9999998878418561, iteration: 3338
loss: 1.577704906463623,grad_norm: 0.9999998131355723, iteration: 3339
loss: 1.6690959930419922,grad_norm: 0.9999997735493471, iteration: 3340
loss: 1.5322660207748413,grad_norm: 0.9999998443706769, iteration: 3341
loss: 1.5087429285049438,grad_norm: 0.9999998716698685, iteration: 3342
loss: 1.520284652709961,grad_norm: 0.9999998574655615, iteration: 3343
loss: 1.5869214534759521,grad_norm: 0.9999998403411106, iteration: 3344
loss: 1.582748293876648,grad_norm: 0.9999998538972145, iteration: 3345
loss: 1.6035411357879639,grad_norm: 0.9999998398151306, iteration: 3346
loss: 1.4911785125732422,grad_norm: 0.9999998004467078, iteration: 3347
loss: 1.4878618717193604,grad_norm: 0.9999997633856621, iteration: 3348
loss: 1.4087963104248047,grad_norm: 0.9999998529785821, iteration: 3349
loss: 1.609776496887207,grad_norm: 0.9999998100191027, iteration: 3350
loss: 1.6346001625061035,grad_norm: 0.9999998342495307, iteration: 3351
loss: 1.5207561254501343,grad_norm: 0.999999779067188, iteration: 3352
loss: 1.6483289003372192,grad_norm: 0.9999997743558656, iteration: 3353
loss: 1.7197312116622925,grad_norm: 0.9999998077195833, iteration: 3354
loss: 1.5157418251037598,grad_norm: 0.9999998370600337, iteration: 3355
loss: 1.555302381515503,grad_norm: 0.9999998494577229, iteration: 3356
loss: 1.507829189300537,grad_norm: 0.9999998942289622, iteration: 3357
loss: 1.5634371042251587,grad_norm: 0.9999998455121611, iteration: 3358
loss: 1.6622434854507446,grad_norm: 0.9999998471683073, iteration: 3359
loss: 1.6360571384429932,grad_norm: 0.9999997981979418, iteration: 3360
loss: 1.6148722171783447,grad_norm: 0.9999998430571543, iteration: 3361
loss: 1.656467318534851,grad_norm: 0.9999998312882116, iteration: 3362
loss: 1.5412636995315552,grad_norm: 0.9999998302169487, iteration: 3363
loss: 1.660529613494873,grad_norm: 0.9999997896305615, iteration: 3364
loss: 1.5661485195159912,grad_norm: 0.9999997807400535, iteration: 3365
loss: 1.42116379737854,grad_norm: 0.9999998307833592, iteration: 3366
loss: 1.5074470043182373,grad_norm: 0.9999998083709933, iteration: 3367
loss: 1.5022246837615967,grad_norm: 0.9999998112847522, iteration: 3368
loss: 1.3937795162200928,grad_norm: 0.9999998100995371, iteration: 3369
loss: 1.7307960987091064,grad_norm: 0.9999998075205082, iteration: 3370
loss: 1.6670207977294922,grad_norm: 0.9999998397766208, iteration: 3371
loss: 1.6936737298965454,grad_norm: 0.9999997995154785, iteration: 3372
loss: 1.761468529701233,grad_norm: 0.9999998408280023, iteration: 3373
loss: 1.623120665550232,grad_norm: 0.999999832757077, iteration: 3374
loss: 1.5005638599395752,grad_norm: 0.9999997594394846, iteration: 3375
loss: 1.466963768005371,grad_norm: 0.9999998380752497, iteration: 3376
loss: 1.4234551191329956,grad_norm: 0.9999998136512769, iteration: 3377
loss: 1.5327852964401245,grad_norm: 0.999999861488059, iteration: 3378
loss: 1.635666847229004,grad_norm: 0.9999998992736464, iteration: 3379
loss: 1.619920253753662,grad_norm: 0.9999998202714305, iteration: 3380
loss: 1.5711930990219116,grad_norm: 0.999999866744627, iteration: 3381
loss: 1.5710723400115967,grad_norm: 0.9999998294653701, iteration: 3382
loss: 1.4867639541625977,grad_norm: 0.9999998067771978, iteration: 3383
loss: 1.621860384941101,grad_norm: 0.9999998668542492, iteration: 3384
loss: 1.4605274200439453,grad_norm: 0.9999997409642151, iteration: 3385
loss: 1.57521653175354,grad_norm: 0.999999792716071, iteration: 3386
loss: 1.5124261379241943,grad_norm: 0.9999997956339478, iteration: 3387
loss: 1.5524744987487793,grad_norm: 0.9999998424063796, iteration: 3388
loss: 1.4051191806793213,grad_norm: 0.9999998020057613, iteration: 3389
loss: 1.6038825511932373,grad_norm: 0.9999998645867411, iteration: 3390
loss: 1.4762507677078247,grad_norm: 0.9999998502658369, iteration: 3391
loss: 1.4583606719970703,grad_norm: 0.999999778375084, iteration: 3392
loss: 1.58428955078125,grad_norm: 0.9999998184199823, iteration: 3393
loss: 1.549532413482666,grad_norm: 0.9999997438756295, iteration: 3394
loss: 1.5565695762634277,grad_norm: 0.999999908442552, iteration: 3395
loss: 1.619373083114624,grad_norm: 0.9999998327973861, iteration: 3396
loss: 1.5530043840408325,grad_norm: 0.9999998736594956, iteration: 3397
loss: 1.546627402305603,grad_norm: 0.999999799164572, iteration: 3398
loss: 1.562092661857605,grad_norm: 0.9999997344211581, iteration: 3399
loss: 1.5961625576019287,grad_norm: 0.9999997476712008, iteration: 3400
loss: 1.6682802438735962,grad_norm: 0.9999998021632951, iteration: 3401
loss: 1.5611307621002197,grad_norm: 0.9999997668887961, iteration: 3402
loss: 1.5269725322723389,grad_norm: 0.9999997442486358, iteration: 3403
loss: 1.3770523071289062,grad_norm: 0.9999998302599579, iteration: 3404
loss: 1.4675482511520386,grad_norm: 0.9999997761566045, iteration: 3405
loss: 1.6469100713729858,grad_norm: 0.9999998271622382, iteration: 3406
loss: 1.4912203550338745,grad_norm: 0.9999998329521462, iteration: 3407
loss: 1.551817774772644,grad_norm: 0.9999998023996093, iteration: 3408
loss: 1.4720094203948975,grad_norm: 0.9999998565321347, iteration: 3409
loss: 1.5365303754806519,grad_norm: 0.9999998132480709, iteration: 3410
loss: 1.544931173324585,grad_norm: 0.9999997660882333, iteration: 3411
loss: 1.554598331451416,grad_norm: 0.999999886316626, iteration: 3412
loss: 1.5530740022659302,grad_norm: 0.9999998147678921, iteration: 3413
loss: 1.680953860282898,grad_norm: 0.9999997830733126, iteration: 3414
loss: 1.4117249250411987,grad_norm: 0.9999998059960294, iteration: 3415
loss: 1.5258593559265137,grad_norm: 0.9999997270865425, iteration: 3416
loss: 1.3876081705093384,grad_norm: 0.9999997430993381, iteration: 3417
loss: 1.5815709829330444,grad_norm: 0.9999997888006212, iteration: 3418
loss: 1.5300811529159546,grad_norm: 0.999999759561651, iteration: 3419
loss: 1.5154706239700317,grad_norm: 0.9999997900958192, iteration: 3420
loss: 1.5586506128311157,grad_norm: 0.9999998339752529, iteration: 3421
loss: 1.611415982246399,grad_norm: 0.9999998431161375, iteration: 3422
loss: 1.4526219367980957,grad_norm: 0.9999997731208394, iteration: 3423
loss: 1.4534351825714111,grad_norm: 0.999999808378834, iteration: 3424
loss: 1.5984680652618408,grad_norm: 0.9999997999132614, iteration: 3425
loss: 1.5980719327926636,grad_norm: 0.9999998608791105, iteration: 3426
loss: 1.51765775680542,grad_norm: 0.9999998391118693, iteration: 3427
loss: 1.683268666267395,grad_norm: 0.9999998593782191, iteration: 3428
loss: 1.5994150638580322,grad_norm: 0.9999998364882057, iteration: 3429
loss: 1.589238166809082,grad_norm: 0.9999997897160842, iteration: 3430
loss: 1.4654827117919922,grad_norm: 0.9999998373844892, iteration: 3431
loss: 1.505453109741211,grad_norm: 0.9999997909106559, iteration: 3432
loss: 1.5618565082550049,grad_norm: 0.9999998541412508, iteration: 3433
loss: 1.502738118171692,grad_norm: 0.9999997308148909, iteration: 3434
loss: 1.4314895868301392,grad_norm: 0.99999986684098, iteration: 3435
loss: 1.6120781898498535,grad_norm: 0.9999997985032432, iteration: 3436
loss: 1.551128625869751,grad_norm: 0.9999998202932805, iteration: 3437
loss: 1.5545967817306519,grad_norm: 0.9999998282532262, iteration: 3438
loss: 1.4388285875320435,grad_norm: 0.9999997750145956, iteration: 3439
loss: 1.5245988368988037,grad_norm: 0.9999997723674148, iteration: 3440
loss: 1.5600383281707764,grad_norm: 0.9999998487816857, iteration: 3441
loss: 1.4266670942306519,grad_norm: 0.9999998135488453, iteration: 3442
loss: 1.4467302560806274,grad_norm: 0.9999998597690206, iteration: 3443
loss: 1.6014506816864014,grad_norm: 0.9999998195865, iteration: 3444
loss: 1.7092796564102173,grad_norm: 0.9999999197114251, iteration: 3445
loss: 1.4049854278564453,grad_norm: 0.9999998162831574, iteration: 3446
loss: 1.3757004737854004,grad_norm: 0.999999783458794, iteration: 3447
loss: 1.5621699094772339,grad_norm: 0.9999998378618183, iteration: 3448
loss: 1.4032487869262695,grad_norm: 0.9999998270330808, iteration: 3449
loss: 1.4211803674697876,grad_norm: 0.999999792349524, iteration: 3450
loss: 1.511372685432434,grad_norm: 0.9999998939164102, iteration: 3451
loss: 1.551409363746643,grad_norm: 0.9999997993361794, iteration: 3452
loss: 1.3788708448410034,grad_norm: 0.9999997513703632, iteration: 3453
loss: 1.5593862533569336,grad_norm: 0.999999818685975, iteration: 3454
loss: 1.5414878129959106,grad_norm: 0.9999997419016884, iteration: 3455
loss: 1.5637757778167725,grad_norm: 0.9999997983691712, iteration: 3456
loss: 1.5087506771087646,grad_norm: 0.9999997710891778, iteration: 3457
loss: 1.5063365697860718,grad_norm: 0.9999998455394954, iteration: 3458
loss: 1.5032175779342651,grad_norm: 0.9999998369842208, iteration: 3459
loss: 1.520716667175293,grad_norm: 0.9999997923801356, iteration: 3460
loss: 1.5236170291900635,grad_norm: 0.9999998769125946, iteration: 3461
loss: 1.6162492036819458,grad_norm: 0.999999814591123, iteration: 3462
loss: 1.5792025327682495,grad_norm: 0.9999998721137303, iteration: 3463
loss: 1.4320615530014038,grad_norm: 0.9999998377610543, iteration: 3464
loss: 1.3315638303756714,grad_norm: 0.9999997894233116, iteration: 3465
loss: 1.6434414386749268,grad_norm: 0.9999998441707035, iteration: 3466
loss: 1.5982812643051147,grad_norm: 0.9999998047096141, iteration: 3467
loss: 1.677449345588684,grad_norm: 0.9999997616458427, iteration: 3468
loss: 1.5877623558044434,grad_norm: 0.99999983034755, iteration: 3469
loss: 1.4596096277236938,grad_norm: 0.9999998424098578, iteration: 3470
loss: 1.4502323865890503,grad_norm: 0.9999998226874288, iteration: 3471
loss: 1.5197066068649292,grad_norm: 0.9999998398778609, iteration: 3472
loss: 1.4462252855300903,grad_norm: 0.9999996906247995, iteration: 3473
loss: 1.411774754524231,grad_norm: 0.9999997937433611, iteration: 3474
loss: 1.4880141019821167,grad_norm: 0.9999997916873615, iteration: 3475
loss: 1.552082896232605,grad_norm: 0.9999998678065952, iteration: 3476
loss: 1.4616773128509521,grad_norm: 0.9999997501101434, iteration: 3477
loss: 1.4138457775115967,grad_norm: 0.9999998163692568, iteration: 3478
loss: 1.7388280630111694,grad_norm: 0.9999998354002739, iteration: 3479
loss: 1.595011830329895,grad_norm: 0.9999997800746729, iteration: 3480
loss: 1.589207410812378,grad_norm: 0.9999998338577814, iteration: 3481
loss: 1.5322197675704956,grad_norm: 0.9999997549975321, iteration: 3482
loss: 1.477900505065918,grad_norm: 0.9999997541717125, iteration: 3483
loss: 1.6927021741867065,grad_norm: 0.9999998459638617, iteration: 3484
loss: 1.6085458993911743,grad_norm: 0.9999999228156454, iteration: 3485
loss: 1.5911002159118652,grad_norm: 0.999999834728058, iteration: 3486
loss: 1.6069598197937012,grad_norm: 0.999999810607859, iteration: 3487
loss: 1.6379907131195068,grad_norm: 0.9999998006402293, iteration: 3488
loss: 1.6436607837677002,grad_norm: 0.999999764882996, iteration: 3489
loss: 1.4296579360961914,grad_norm: 0.9999998063231127, iteration: 3490
loss: 1.5812735557556152,grad_norm: 0.9999998179944426, iteration: 3491
loss: 1.5967609882354736,grad_norm: 0.99999974707283, iteration: 3492
loss: 1.5350292921066284,grad_norm: 0.9999997581672528, iteration: 3493
loss: 1.6039197444915771,grad_norm: 0.999999808112364, iteration: 3494
loss: 1.5793532133102417,grad_norm: 0.9999998666888622, iteration: 3495
loss: 1.580612063407898,grad_norm: 0.9999998298164288, iteration: 3496
loss: 1.6400766372680664,grad_norm: 0.9999998502574527, iteration: 3497
loss: 1.5927062034606934,grad_norm: 0.9999998404366418, iteration: 3498
loss: 1.5726009607315063,grad_norm: 0.9999998029246776, iteration: 3499
loss: 1.5558545589447021,grad_norm: 0.9999997967137773, iteration: 3500
loss: 1.3296711444854736,grad_norm: 0.9999998326678695, iteration: 3501
loss: 1.5180305242538452,grad_norm: 0.9999998039152032, iteration: 3502
loss: 1.3852900266647339,grad_norm: 0.9999997952610928, iteration: 3503
loss: 1.6496840715408325,grad_norm: 0.9999999047165253, iteration: 3504
loss: 1.513741135597229,grad_norm: 0.9999998705727365, iteration: 3505
loss: 1.6395204067230225,grad_norm: 0.9999997479474462, iteration: 3506
loss: 1.6405116319656372,grad_norm: 0.9999999149434288, iteration: 3507
loss: 1.507834792137146,grad_norm: 0.999999818526832, iteration: 3508
loss: 1.4241511821746826,grad_norm: 0.9999998228059384, iteration: 3509
loss: 1.4861871004104614,grad_norm: 0.999999814241615, iteration: 3510
loss: 1.3631383180618286,grad_norm: 0.9999998669111666, iteration: 3511
loss: 1.40425443649292,grad_norm: 0.9999998245466826, iteration: 3512
loss: 1.5513826608657837,grad_norm: 0.9999998746545384, iteration: 3513
loss: 1.605386734008789,grad_norm: 0.9999997992172015, iteration: 3514
loss: 1.5333906412124634,grad_norm: 0.9999997577900185, iteration: 3515
loss: 1.4941787719726562,grad_norm: 0.9999997418237159, iteration: 3516
loss: 1.4792407751083374,grad_norm: 0.9999998038730697, iteration: 3517
loss: 1.5456622838974,grad_norm: 0.9999997954584533, iteration: 3518
loss: 1.4919824600219727,grad_norm: 0.9999998577792876, iteration: 3519
loss: 1.516574740409851,grad_norm: 0.999999817886622, iteration: 3520
loss: 1.458400845527649,grad_norm: 0.9999997844436193, iteration: 3521
loss: 1.4776180982589722,grad_norm: 0.9999997299629496, iteration: 3522
loss: 1.438369870185852,grad_norm: 0.9999998266485262, iteration: 3523
loss: 1.5386148691177368,grad_norm: 0.9999998767340549, iteration: 3524
loss: 1.5598105192184448,grad_norm: 0.9999998527346036, iteration: 3525
loss: 1.5699104070663452,grad_norm: 0.9999998669734079, iteration: 3526
loss: 1.3919329643249512,grad_norm: 0.9999997836239769, iteration: 3527
loss: 1.4237167835235596,grad_norm: 0.9999997778058364, iteration: 3528
loss: 1.458741545677185,grad_norm: 0.999999773596672, iteration: 3529
loss: 1.4445290565490723,grad_norm: 0.9999997535803633, iteration: 3530
loss: 1.5767695903778076,grad_norm: 0.9999998436927084, iteration: 3531
loss: 1.4186451435089111,grad_norm: 0.9999998165379693, iteration: 3532
loss: 1.4875861406326294,grad_norm: 0.9999998322961667, iteration: 3533
loss: 1.4440405368804932,grad_norm: 0.9999998316132966, iteration: 3534
loss: 1.4796370267868042,grad_norm: 0.9999998125686835, iteration: 3535
loss: 1.5628306865692139,grad_norm: 0.999999847138533, iteration: 3536
loss: 1.4470349550247192,grad_norm: 0.9999997716079471, iteration: 3537
loss: 1.5220140218734741,grad_norm: 0.9999998042425081, iteration: 3538
loss: 1.4424127340316772,grad_norm: 0.9999998299688366, iteration: 3539
loss: 1.3137658834457397,grad_norm: 0.9999997938516088, iteration: 3540
loss: 1.4561597108840942,grad_norm: 0.9999998086438909, iteration: 3541
loss: 1.467672348022461,grad_norm: 0.9999998178161916, iteration: 3542
loss: 1.5412702560424805,grad_norm: 0.9999998311378184, iteration: 3543
loss: 1.5721741914749146,grad_norm: 0.9999998476645833, iteration: 3544
loss: 1.3591442108154297,grad_norm: 0.9999997154170227, iteration: 3545
loss: 1.5306720733642578,grad_norm: 0.9999997553679232, iteration: 3546
loss: 1.6724754571914673,grad_norm: 0.9999998898861744, iteration: 3547
loss: 1.58413565158844,grad_norm: 0.9999998397449302, iteration: 3548
loss: 1.6721633672714233,grad_norm: 0.9999998406760959, iteration: 3549
loss: 1.6781789064407349,grad_norm: 0.9999998063298031, iteration: 3550
loss: 1.503885269165039,grad_norm: 0.9999998411117184, iteration: 3551
loss: 1.316458821296692,grad_norm: 0.9999998273259405, iteration: 3552
loss: 1.5846481323242188,grad_norm: 0.9999998729646143, iteration: 3553
loss: 1.6299121379852295,grad_norm: 0.9999997869623584, iteration: 3554
loss: 1.5032957792282104,grad_norm: 0.9999997871936217, iteration: 3555
loss: 1.509730339050293,grad_norm: 0.9999998245708935, iteration: 3556
loss: 1.5685063600540161,grad_norm: 0.9999997375776798, iteration: 3557
loss: 1.6612021923065186,grad_norm: 0.9999997924183075, iteration: 3558
loss: 1.4736268520355225,grad_norm: 0.9999998299241405, iteration: 3559
loss: 1.5110245943069458,grad_norm: 0.999999828481965, iteration: 3560
loss: 1.432724118232727,grad_norm: 0.9999998114937697, iteration: 3561
loss: 1.7245210409164429,grad_norm: 0.9999997948536284, iteration: 3562
loss: 1.503912091255188,grad_norm: 0.999999827329648, iteration: 3563
loss: 1.4587023258209229,grad_norm: 0.9999998728097892, iteration: 3564
loss: 1.4862669706344604,grad_norm: 0.9999998420105856, iteration: 3565
loss: 1.4350333213806152,grad_norm: 0.9999997904244841, iteration: 3566
loss: 1.560845136642456,grad_norm: 0.9999998016385988, iteration: 3567
loss: 1.4485416412353516,grad_norm: 0.9999997792807426, iteration: 3568
loss: 1.4717260599136353,grad_norm: 0.9999997773158569, iteration: 3569
loss: 1.5047147274017334,grad_norm: 0.9999998800621653, iteration: 3570
loss: 1.4004448652267456,grad_norm: 0.999999862324817, iteration: 3571
loss: 1.569627285003662,grad_norm: 0.9999997815040734, iteration: 3572
loss: 1.5434181690216064,grad_norm: 0.9999997376542533, iteration: 3573
loss: 1.5606207847595215,grad_norm: 0.99999981574574, iteration: 3574
loss: 1.582079291343689,grad_norm: 0.9999998351716554, iteration: 3575
loss: 1.4006404876708984,grad_norm: 0.9999998813784239, iteration: 3576
loss: 1.3982031345367432,grad_norm: 0.9999998066165529, iteration: 3577
loss: 1.557812213897705,grad_norm: 0.9999998125705676, iteration: 3578
loss: 1.6085047721862793,grad_norm: 0.9999998145737196, iteration: 3579
loss: 1.4210233688354492,grad_norm: 0.9999998485142707, iteration: 3580
loss: 1.566580057144165,grad_norm: 0.9999998043042558, iteration: 3581
loss: 1.4206693172454834,grad_norm: 0.9999998867857708, iteration: 3582
loss: 1.4690163135528564,grad_norm: 0.9999998132274408, iteration: 3583
loss: 1.4636863470077515,grad_norm: 0.9999998093857506, iteration: 3584
loss: 1.4992895126342773,grad_norm: 0.9999998683125612, iteration: 3585
loss: 1.3333276510238647,grad_norm: 0.9999997353824003, iteration: 3586
loss: 1.535030484199524,grad_norm: 0.9999998919723805, iteration: 3587
loss: 1.4363921880722046,grad_norm: 0.9999997669741681, iteration: 3588
loss: 1.5257562398910522,grad_norm: 0.9999997995330095, iteration: 3589
loss: 1.3348578214645386,grad_norm: 0.9999998379707536, iteration: 3590
loss: 1.4638580083847046,grad_norm: 0.9999998983216027, iteration: 3591
loss: 1.4365153312683105,grad_norm: 0.9999997720927357, iteration: 3592
loss: 1.659062147140503,grad_norm: 0.9999998413531608, iteration: 3593
loss: 1.5842673778533936,grad_norm: 0.9999998770159426, iteration: 3594
loss: 1.5506298542022705,grad_norm: 0.9999998499076327, iteration: 3595
loss: 1.4808553457260132,grad_norm: 0.9999998040567439, iteration: 3596
loss: 1.3929662704467773,grad_norm: 0.9999998433447224, iteration: 3597
loss: 1.6711872816085815,grad_norm: 0.999999727926277, iteration: 3598
loss: 1.413127064704895,grad_norm: 0.9999997596835696, iteration: 3599
loss: 1.5234999656677246,grad_norm: 0.9999998608065276, iteration: 3600
loss: 1.5931757688522339,grad_norm: 0.999999762806853, iteration: 3601
loss: 1.3960652351379395,grad_norm: 0.999999822036042, iteration: 3602
loss: 1.4165563583374023,grad_norm: 0.999999867124461, iteration: 3603
loss: 1.4704546928405762,grad_norm: 0.9999997881525385, iteration: 3604
loss: 1.4789763689041138,grad_norm: 0.9999997570452828, iteration: 3605
loss: 1.4988501071929932,grad_norm: 0.9999998283402095, iteration: 3606
loss: 1.4242382049560547,grad_norm: 0.9999997291887324, iteration: 3607
loss: 1.4363073110580444,grad_norm: 0.9999998279762643, iteration: 3608
loss: 1.5273035764694214,grad_norm: 0.9999998392933204, iteration: 3609
loss: 1.6697663068771362,grad_norm: 0.9999997892027481, iteration: 3610
loss: 1.423488974571228,grad_norm: 0.9999997401825734, iteration: 3611
loss: 1.5033962726593018,grad_norm: 0.9999998315326297, iteration: 3612
loss: 1.4082454442977905,grad_norm: 0.9999998369044379, iteration: 3613
loss: 1.5144984722137451,grad_norm: 0.9999998325917006, iteration: 3614
loss: 1.4709190130233765,grad_norm: 0.9999998019309623, iteration: 3615
loss: 1.480945110321045,grad_norm: 0.999999816834795, iteration: 3616
loss: 1.4563499689102173,grad_norm: 0.9999997819677653, iteration: 3617
loss: 1.4020965099334717,grad_norm: 0.9999998411895391, iteration: 3618
loss: 1.37932550907135,grad_norm: 0.9999998360475625, iteration: 3619
loss: 1.534531593322754,grad_norm: 0.9999998129585833, iteration: 3620
loss: 1.418316125869751,grad_norm: 0.999999764123164, iteration: 3621
loss: 1.5117679834365845,grad_norm: 0.9999998596492569, iteration: 3622
loss: 1.582678198814392,grad_norm: 0.9999998340118609, iteration: 3623
loss: 1.408052921295166,grad_norm: 0.999999911542399, iteration: 3624
loss: 1.3319391012191772,grad_norm: 0.9999998415070936, iteration: 3625
loss: 1.393563985824585,grad_norm: 0.9999997479469528, iteration: 3626
loss: 1.4863018989562988,grad_norm: 0.9999997782217896, iteration: 3627
loss: 1.3639004230499268,grad_norm: 0.9999997236808685, iteration: 3628
loss: 1.4760347604751587,grad_norm: 0.9999997924398858, iteration: 3629
loss: 1.5679833889007568,grad_norm: 0.9999997930108032, iteration: 3630
loss: 1.4977046251296997,grad_norm: 0.9999997944258704, iteration: 3631
loss: 1.3751044273376465,grad_norm: 0.9999998383312545, iteration: 3632
loss: 1.4549626111984253,grad_norm: 0.99999982470959, iteration: 3633
loss: 1.3840341567993164,grad_norm: 0.9999998541191277, iteration: 3634
loss: 1.533464789390564,grad_norm: 0.9999998619489416, iteration: 3635
loss: 1.5442172288894653,grad_norm: 0.9999998736699641, iteration: 3636
loss: 1.6923216581344604,grad_norm: 0.9999998541594782, iteration: 3637
loss: 1.5609674453735352,grad_norm: 0.9999997978961627, iteration: 3638
loss: 1.4938925504684448,grad_norm: 0.9999997993407649, iteration: 3639
loss: 1.5241233110427856,grad_norm: 0.9999997938381542, iteration: 3640
loss: 1.7108465433120728,grad_norm: 0.9999998342433236, iteration: 3641
loss: 1.6088544130325317,grad_norm: 0.9999997752368364, iteration: 3642
loss: 1.3490591049194336,grad_norm: 0.9999998518506987, iteration: 3643
loss: 1.4542983770370483,grad_norm: 0.9999998303276114, iteration: 3644
loss: 1.5729875564575195,grad_norm: 0.9999998019895088, iteration: 3645
loss: 1.5288487672805786,grad_norm: 0.9999997720195498, iteration: 3646
loss: 1.5232633352279663,grad_norm: 0.9999998022029031, iteration: 3647
loss: 1.5805397033691406,grad_norm: 0.9999997386494851, iteration: 3648
loss: 1.3802801370620728,grad_norm: 0.9999998229037823, iteration: 3649
loss: 1.6468673944473267,grad_norm: 0.9999998288557076, iteration: 3650
loss: 1.5621585845947266,grad_norm: 0.9999999064939767, iteration: 3651
loss: 1.5033667087554932,grad_norm: 0.9999998607758007, iteration: 3652
loss: 1.3415465354919434,grad_norm: 0.9999998359558822, iteration: 3653
loss: 1.4710146188735962,grad_norm: 0.9999997633895977, iteration: 3654
loss: 1.5849992036819458,grad_norm: 0.9999997914453357, iteration: 3655
loss: 1.5915714502334595,grad_norm: 0.9999998112295766, iteration: 3656
loss: 1.4754716157913208,grad_norm: 0.9999998507164363, iteration: 3657
loss: 1.4506233930587769,grad_norm: 0.999999821982058, iteration: 3658
loss: 1.4279149770736694,grad_norm: 0.9999997406365475, iteration: 3659
loss: 1.6649200916290283,grad_norm: 0.9999998775199838, iteration: 3660
loss: 1.564022421836853,grad_norm: 0.999999786489455, iteration: 3661
loss: 1.5674715042114258,grad_norm: 0.9999998079866558, iteration: 3662
loss: 1.486393690109253,grad_norm: 0.9999998499629376, iteration: 3663
loss: 1.439866304397583,grad_norm: 0.9999997384331611, iteration: 3664
loss: 1.4256384372711182,grad_norm: 0.9999998238287403, iteration: 3665
loss: 1.4094079732894897,grad_norm: 0.999999860922777, iteration: 3666
loss: 1.5856891870498657,grad_norm: 0.9999998483430332, iteration: 3667
loss: 1.5559974908828735,grad_norm: 0.999999788890761, iteration: 3668
loss: 1.4115099906921387,grad_norm: 0.9999997826250184, iteration: 3669
loss: 1.363397240638733,grad_norm: 0.9999998464084661, iteration: 3670
loss: 1.4889519214630127,grad_norm: 0.9999997601251883, iteration: 3671
loss: 1.5121359825134277,grad_norm: 0.9999998595492161, iteration: 3672
loss: 1.263162612915039,grad_norm: 0.9999997840246584, iteration: 3673
loss: 1.5569074153900146,grad_norm: 0.9999998549827775, iteration: 3674
loss: 1.2808789014816284,grad_norm: 0.9999998093081887, iteration: 3675
loss: 1.4072558879852295,grad_norm: 0.9999999006428527, iteration: 3676
loss: 1.4875462055206299,grad_norm: 0.9999998237378003, iteration: 3677
loss: 1.5131083726882935,grad_norm: 0.9999998080099779, iteration: 3678
loss: 1.342883825302124,grad_norm: 0.9999997846489316, iteration: 3679
loss: 1.5192756652832031,grad_norm: 0.9999998076754193, iteration: 3680
loss: 1.5346394777297974,grad_norm: 0.9999998565789013, iteration: 3681
loss: 1.4705144166946411,grad_norm: 0.9999998279123017, iteration: 3682
loss: 1.4728699922561646,grad_norm: 0.9999998344411973, iteration: 3683
loss: 1.5656360387802124,grad_norm: 0.9999998204925221, iteration: 3684
loss: 1.4387164115905762,grad_norm: 0.999999828902834, iteration: 3685
loss: 1.5217537879943848,grad_norm: 0.9999997808071484, iteration: 3686
loss: 1.4498473405838013,grad_norm: 0.9999998392092811, iteration: 3687
loss: 1.4851419925689697,grad_norm: 0.9999998583444081, iteration: 3688
loss: 1.4859473705291748,grad_norm: 0.9999998055163087, iteration: 3689
loss: 1.6623367071151733,grad_norm: 0.9999998176378758, iteration: 3690
loss: 1.7059812545776367,grad_norm: 0.9999998190072963, iteration: 3691
loss: 1.4442442655563354,grad_norm: 0.9999998024458994, iteration: 3692
loss: 1.4513384103775024,grad_norm: 0.9999998651114658, iteration: 3693
loss: 1.4218162298202515,grad_norm: 0.9999998657351847, iteration: 3694
loss: 1.3851395845413208,grad_norm: 0.9999997563306975, iteration: 3695
loss: 1.5162420272827148,grad_norm: 0.9999997653019609, iteration: 3696
loss: 1.3869071006774902,grad_norm: 0.9999997263914212, iteration: 3697
loss: 1.4333584308624268,grad_norm: 0.9999998423611686, iteration: 3698
loss: 1.4671688079833984,grad_norm: 0.9999998546780514, iteration: 3699
loss: 1.3689438104629517,grad_norm: 0.9999998308984231, iteration: 3700
loss: 1.4203269481658936,grad_norm: 0.9999998598713388, iteration: 3701
loss: 1.635143518447876,grad_norm: 0.9999998110982008, iteration: 3702
loss: 1.5981098413467407,grad_norm: 0.9999999002443645, iteration: 3703
loss: 1.5675549507141113,grad_norm: 0.9999998950521585, iteration: 3704
loss: 1.4793384075164795,grad_norm: 0.9999997868384486, iteration: 3705
loss: 1.4901920557022095,grad_norm: 0.9999998393340422, iteration: 3706
loss: 1.4083316326141357,grad_norm: 0.9999998456607893, iteration: 3707
loss: 1.5407847166061401,grad_norm: 0.9999997938495827, iteration: 3708
loss: 1.431193470954895,grad_norm: 0.9999997581133959, iteration: 3709
loss: 1.604000210762024,grad_norm: 0.9999998209398392, iteration: 3710
loss: 1.407454013824463,grad_norm: 0.9999997634445446, iteration: 3711
loss: 1.343822717666626,grad_norm: 0.9999997801847332, iteration: 3712
loss: 1.3823295831680298,grad_norm: 0.999999893006523, iteration: 3713
loss: 1.519687533378601,grad_norm: 0.999999779485492, iteration: 3714
loss: 1.420951008796692,grad_norm: 0.9999997266534054, iteration: 3715
loss: 1.4771612882614136,grad_norm: 0.9999997554160714, iteration: 3716
loss: 1.575648307800293,grad_norm: 0.9999997798508206, iteration: 3717
loss: 1.4392706155776978,grad_norm: 0.999999799027743, iteration: 3718
loss: 1.4161373376846313,grad_norm: 0.9999998735141551, iteration: 3719
loss: 1.3041526079177856,grad_norm: 0.9999997514560884, iteration: 3720
loss: 1.4813565015792847,grad_norm: 0.9999999069704331, iteration: 3721
loss: 1.4507322311401367,grad_norm: 0.9999998197880261, iteration: 3722
loss: 1.4582648277282715,grad_norm: 0.9999998149847363, iteration: 3723
loss: 1.4491205215454102,grad_norm: 0.9999997921345709, iteration: 3724
loss: 1.5055807828903198,grad_norm: 0.9999997105414057, iteration: 3725
loss: 1.3731186389923096,grad_norm: 0.9999998884505817, iteration: 3726
loss: 1.5274585485458374,grad_norm: 0.9999998790812774, iteration: 3727
loss: 1.5363680124282837,grad_norm: 0.9999998196421048, iteration: 3728
loss: 1.3720369338989258,grad_norm: 0.9999998148303879, iteration: 3729
loss: 1.3768277168273926,grad_norm: 0.9999997566948905, iteration: 3730
loss: 1.4994198083877563,grad_norm: 0.9999998820590366, iteration: 3731
loss: 1.444619059562683,grad_norm: 0.9999997915192097, iteration: 3732
loss: 1.420676827430725,grad_norm: 0.99999979285233, iteration: 3733
loss: 1.3767428398132324,grad_norm: 0.9999997389665766, iteration: 3734
loss: 1.388021469116211,grad_norm: 0.9999998027091366, iteration: 3735
loss: 1.5115966796875,grad_norm: 0.9999998238158999, iteration: 3736
loss: 1.4794647693634033,grad_norm: 0.9999997482227718, iteration: 3737
loss: 1.5395365953445435,grad_norm: 0.9999998191892234, iteration: 3738
loss: 1.6147284507751465,grad_norm: 0.9999997677779113, iteration: 3739
loss: 1.3556758165359497,grad_norm: 0.9999998865655073, iteration: 3740
loss: 1.5099574327468872,grad_norm: 0.9999998180708771, iteration: 3741
loss: 1.4204450845718384,grad_norm: 0.9999998369817035, iteration: 3742
loss: 1.4067984819412231,grad_norm: 0.9999997922422905, iteration: 3743
loss: 1.4498451948165894,grad_norm: 0.9999998130804236, iteration: 3744
loss: 1.4169790744781494,grad_norm: 0.999999790847248, iteration: 3745
loss: 1.4971956014633179,grad_norm: 0.9999998478477194, iteration: 3746
loss: 1.6141583919525146,grad_norm: 0.9999998818499466, iteration: 3747
loss: 1.4423598051071167,grad_norm: 0.999999797979989, iteration: 3748
loss: 1.3188555240631104,grad_norm: 0.9999997417348075, iteration: 3749
loss: 1.3654776811599731,grad_norm: 0.9999997815239802, iteration: 3750
loss: 1.4961292743682861,grad_norm: 0.999999733884885, iteration: 3751
loss: 1.3824334144592285,grad_norm: 0.9999997456180489, iteration: 3752
loss: 1.4872808456420898,grad_norm: 0.9999998532375162, iteration: 3753
loss: 1.5953481197357178,grad_norm: 0.9999998065751613, iteration: 3754
loss: 1.3614511489868164,grad_norm: 0.9999997369791216, iteration: 3755
loss: 1.361802577972412,grad_norm: 0.999999857310428, iteration: 3756
loss: 1.4608988761901855,grad_norm: 0.9999997544421484, iteration: 3757
loss: 1.411089301109314,grad_norm: 0.9999998674018263, iteration: 3758
loss: 1.4340250492095947,grad_norm: 0.9999997423379392, iteration: 3759
loss: 1.4166597127914429,grad_norm: 0.9999998779957673, iteration: 3760
loss: 1.4237709045410156,grad_norm: 0.9999998265598484, iteration: 3761
loss: 1.451332449913025,grad_norm: 0.9999998163386163, iteration: 3762
loss: 1.419431447982788,grad_norm: 0.9999998384124745, iteration: 3763
loss: 1.5056201219558716,grad_norm: 0.9999998154667146, iteration: 3764
loss: 1.5191526412963867,grad_norm: 0.9999998891632818, iteration: 3765
loss: 1.5036805868148804,grad_norm: 0.9999998420748741, iteration: 3766
loss: 1.4774876832962036,grad_norm: 0.999999837578361, iteration: 3767
loss: 1.3604117631912231,grad_norm: 0.9999997345749277, iteration: 3768
loss: 1.4665216207504272,grad_norm: 0.9999998194343493, iteration: 3769
loss: 1.4817193746566772,grad_norm: 0.9999998236575184, iteration: 3770
loss: 1.5833230018615723,grad_norm: 0.9999998493362657, iteration: 3771
loss: 1.4424858093261719,grad_norm: 0.999999801215323, iteration: 3772
loss: 1.4720783233642578,grad_norm: 0.9999997970527111, iteration: 3773
loss: 1.4814332723617554,grad_norm: 0.9999997810290611, iteration: 3774
loss: 1.5096726417541504,grad_norm: 0.9999998042869914, iteration: 3775
loss: 1.4041719436645508,grad_norm: 0.9999998132349559, iteration: 3776
loss: 1.4642800092697144,grad_norm: 0.9999998686532011, iteration: 3777
loss: 1.4290095567703247,grad_norm: 0.999999767650218, iteration: 3778
loss: 1.4408717155456543,grad_norm: 0.9999997550743788, iteration: 3779
loss: 1.5472455024719238,grad_norm: 0.9999998332421609, iteration: 3780
loss: 1.4914745092391968,grad_norm: 0.9999997949174727, iteration: 3781
loss: 1.288907527923584,grad_norm: 0.999999741835855, iteration: 3782
loss: 1.410996913909912,grad_norm: 0.999999796289289, iteration: 3783
loss: 1.425032138824463,grad_norm: 0.9999997762980322, iteration: 3784
loss: 1.4260112047195435,grad_norm: 0.9999999091908867, iteration: 3785
loss: 1.3891600370407104,grad_norm: 0.9999998307511534, iteration: 3786
loss: 1.465455174446106,grad_norm: 0.9999998060863706, iteration: 3787
loss: 1.4350266456604004,grad_norm: 0.999999873223477, iteration: 3788
loss: 1.5992014408111572,grad_norm: 0.9999998032783567, iteration: 3789
loss: 1.4381604194641113,grad_norm: 0.999999786519909, iteration: 3790
loss: 1.5169979333877563,grad_norm: 0.9999997692344496, iteration: 3791
loss: 1.3951373100280762,grad_norm: 0.999999809517747, iteration: 3792
loss: 1.3968160152435303,grad_norm: 0.9999998462346362, iteration: 3793
loss: 1.4031133651733398,grad_norm: 0.9999997927485256, iteration: 3794
loss: 1.3682215213775635,grad_norm: 0.9999997950449636, iteration: 3795
loss: 1.4205857515335083,grad_norm: 0.9999997702837415, iteration: 3796
loss: 1.479174017906189,grad_norm: 0.9999998612386721, iteration: 3797
loss: 1.3496534824371338,grad_norm: 0.9999997507582562, iteration: 3798
loss: 1.4352827072143555,grad_norm: 0.9999998254808307, iteration: 3799
loss: 1.397818922996521,grad_norm: 0.9999998131627011, iteration: 3800
loss: 1.4326932430267334,grad_norm: 0.9999998163065085, iteration: 3801
loss: 1.3263243436813354,grad_norm: 0.9999998206176526, iteration: 3802
loss: 1.4620683193206787,grad_norm: 0.9999998104530327, iteration: 3803
loss: 1.3739196062088013,grad_norm: 0.9999998203656961, iteration: 3804
loss: 1.4482630491256714,grad_norm: 0.9999998548007012, iteration: 3805
loss: 1.5329606533050537,grad_norm: 0.9999998694412033, iteration: 3806
loss: 1.649928331375122,grad_norm: 0.9999997690148806, iteration: 3807
loss: 1.2377192974090576,grad_norm: 0.9999998251743709, iteration: 3808
loss: 1.2701796293258667,grad_norm: 0.9999997363948787, iteration: 3809
loss: 1.561102032661438,grad_norm: 0.9999998614706633, iteration: 3810
loss: 1.2111955881118774,grad_norm: 0.9999998159860958, iteration: 3811
loss: 1.4636931419372559,grad_norm: 0.9999998255785648, iteration: 3812
loss: 1.4528238773345947,grad_norm: 0.999999820282409, iteration: 3813
loss: 1.308027744293213,grad_norm: 0.999999833303102, iteration: 3814
loss: 1.2590017318725586,grad_norm: 0.999999837771578, iteration: 3815
loss: 1.2603598833084106,grad_norm: 0.9999997452901914, iteration: 3816
loss: 1.4165630340576172,grad_norm: 0.9999997925472714, iteration: 3817
loss: 1.538069725036621,grad_norm: 0.9999998554216674, iteration: 3818
loss: 1.5493124723434448,grad_norm: 0.9999997555438025, iteration: 3819
loss: 1.539254903793335,grad_norm: 0.9999997602428784, iteration: 3820
loss: 1.4447197914123535,grad_norm: 0.9999999045846714, iteration: 3821
loss: 1.4306145906448364,grad_norm: 0.9999998721183395, iteration: 3822
loss: 1.4634817838668823,grad_norm: 0.9999998273259209, iteration: 3823
loss: 1.4001117944717407,grad_norm: 0.9999998610016564, iteration: 3824
loss: 1.377087116241455,grad_norm: 0.9999997780572516, iteration: 3825
loss: 1.2521549463272095,grad_norm: 0.9999998730106768, iteration: 3826
loss: 1.380844235420227,grad_norm: 0.9999997984166871, iteration: 3827
loss: 1.2071892023086548,grad_norm: 0.9999998409571065, iteration: 3828
loss: 1.602199912071228,grad_norm: 0.999999880801129, iteration: 3829
loss: 1.6483495235443115,grad_norm: 0.999999801019856, iteration: 3830
loss: 1.410638451576233,grad_norm: 0.9999998008949722, iteration: 3831
loss: 1.5696176290512085,grad_norm: 0.999999852687017, iteration: 3832
loss: 1.5330700874328613,grad_norm: 0.999999825135479, iteration: 3833
loss: 1.67507803440094,grad_norm: 0.9999998293244938, iteration: 3834
loss: 1.3644930124282837,grad_norm: 0.9999998848719083, iteration: 3835
loss: 1.3072535991668701,grad_norm: 0.9999997579538015, iteration: 3836
loss: 1.5613592863082886,grad_norm: 0.9999998323844886, iteration: 3837
loss: 1.3418128490447998,grad_norm: 0.9999997875429287, iteration: 3838
loss: 1.2830045223236084,grad_norm: 0.9999998099892057, iteration: 3839
loss: 1.5542199611663818,grad_norm: 0.9999997849884977, iteration: 3840
loss: 1.4077610969543457,grad_norm: 0.9999997824533163, iteration: 3841
loss: 1.4462858438491821,grad_norm: 0.9999998630312508, iteration: 3842
loss: 1.3781291246414185,grad_norm: 0.999999883789108, iteration: 3843
loss: 1.4983181953430176,grad_norm: 0.9999997934654478, iteration: 3844
loss: 1.360701560974121,grad_norm: 0.9999997943304223, iteration: 3845
loss: 1.272944450378418,grad_norm: 0.9999997722818286, iteration: 3846
loss: 1.3243261575698853,grad_norm: 0.9999997861526222, iteration: 3847
loss: 1.5230834484100342,grad_norm: 0.999999842507617, iteration: 3848
loss: 1.4710325002670288,grad_norm: 0.9999998166737875, iteration: 3849
loss: 1.3058949708938599,grad_norm: 0.9999998290195499, iteration: 3850
loss: 1.4743531942367554,grad_norm: 0.999999854751126, iteration: 3851
loss: 1.298263669013977,grad_norm: 0.9999997352175366, iteration: 3852
loss: 1.3840025663375854,grad_norm: 0.9999997574317074, iteration: 3853
loss: 1.5828545093536377,grad_norm: 0.999999852886115, iteration: 3854
loss: 1.4434635639190674,grad_norm: 0.9999998210379959, iteration: 3855
loss: 1.592450499534607,grad_norm: 0.9999998911603972, iteration: 3856
loss: 1.4001538753509521,grad_norm: 0.9999998603095561, iteration: 3857
loss: 1.3972437381744385,grad_norm: 0.999999847195452, iteration: 3858
loss: 1.4815729856491089,grad_norm: 0.9999997935200049, iteration: 3859
loss: 1.462448239326477,grad_norm: 0.9999997025790418, iteration: 3860
loss: 1.5423203706741333,grad_norm: 0.9999998147083803, iteration: 3861
loss: 1.4722753763198853,grad_norm: 0.9999998672862906, iteration: 3862
loss: 1.4097598791122437,grad_norm: 0.9999998029225072, iteration: 3863
loss: 1.324703335762024,grad_norm: 0.9999997496412221, iteration: 3864
loss: 1.2467682361602783,grad_norm: 0.999999797898996, iteration: 3865
loss: 1.463585615158081,grad_norm: 0.9999998109912009, iteration: 3866
loss: 1.4360144138336182,grad_norm: 0.9999999147605387, iteration: 3867
loss: 1.4849259853363037,grad_norm: 0.9999997314734568, iteration: 3868
loss: 1.379791021347046,grad_norm: 0.9999998172339213, iteration: 3869
loss: 1.4612852334976196,grad_norm: 0.9999998138333689, iteration: 3870
loss: 1.4236069917678833,grad_norm: 0.9999998441915334, iteration: 3871
loss: 1.2966198921203613,grad_norm: 0.9999998031752756, iteration: 3872
loss: 1.3802865743637085,grad_norm: 0.9999998659695358, iteration: 3873
loss: 1.3575594425201416,grad_norm: 0.9999998287662845, iteration: 3874
loss: 1.4545849561691284,grad_norm: 0.9999997307943501, iteration: 3875
loss: 1.505136489868164,grad_norm: 0.9999997481596384, iteration: 3876
loss: 1.391595721244812,grad_norm: 0.9999998081670547, iteration: 3877
loss: 1.4026089906692505,grad_norm: 0.9999998105748463, iteration: 3878
loss: 1.3742612600326538,grad_norm: 0.9999998367917803, iteration: 3879
loss: 1.5803158283233643,grad_norm: 0.9999998289324724, iteration: 3880
loss: 1.4733631610870361,grad_norm: 0.9999998687552868, iteration: 3881
loss: 1.3882176876068115,grad_norm: 0.9999997379565027, iteration: 3882
loss: 1.4234939813613892,grad_norm: 0.9999998643298729, iteration: 3883
loss: 1.446165919303894,grad_norm: 0.9999997470900422, iteration: 3884
loss: 1.3515427112579346,grad_norm: 0.9999998262703275, iteration: 3885
loss: 1.2583223581314087,grad_norm: 0.9999997510995887, iteration: 3886
loss: 1.4064092636108398,grad_norm: 0.9999997987896216, iteration: 3887
loss: 1.4637032747268677,grad_norm: 0.9999998900573754, iteration: 3888
loss: 1.3192800283432007,grad_norm: 0.9999998705236852, iteration: 3889
loss: 1.3629627227783203,grad_norm: 0.9999997430673062, iteration: 3890
loss: 1.3007644414901733,grad_norm: 0.9999997725564969, iteration: 3891
loss: 1.522511601448059,grad_norm: 0.999999792544712, iteration: 3892
loss: 1.3242137432098389,grad_norm: 0.9999996807046233, iteration: 3893
loss: 1.3359452486038208,grad_norm: 0.9999997985938821, iteration: 3894
loss: 1.4620410203933716,grad_norm: 0.9999999015521818, iteration: 3895
loss: 1.502458930015564,grad_norm: 0.9999998022670044, iteration: 3896
loss: 1.3627667427062988,grad_norm: 0.9999997344903307, iteration: 3897
loss: 1.3764704465866089,grad_norm: 0.9999997874817158, iteration: 3898
loss: 1.4961776733398438,grad_norm: 0.9999998007961797, iteration: 3899
loss: 1.3854542970657349,grad_norm: 0.9999998083607107, iteration: 3900
loss: 1.3521426916122437,grad_norm: 0.9999997952248931, iteration: 3901
loss: 1.4486358165740967,grad_norm: 0.9999997334743574, iteration: 3902
loss: 1.414426326751709,grad_norm: 0.9999998822820868, iteration: 3903
loss: 1.3680130243301392,grad_norm: 0.9999997852744665, iteration: 3904
loss: 1.3889724016189575,grad_norm: 0.9999998422666732, iteration: 3905
loss: 1.4663327932357788,grad_norm: 0.9999997805813817, iteration: 3906
loss: 1.4795314073562622,grad_norm: 0.9999998880896667, iteration: 3907
loss: 1.5379583835601807,grad_norm: 0.9999997894781293, iteration: 3908
loss: 1.3248566389083862,grad_norm: 0.9999997140929118, iteration: 3909
loss: 1.2973301410675049,grad_norm: 0.9999997622638226, iteration: 3910
loss: 1.398523211479187,grad_norm: 0.9999997604414455, iteration: 3911
loss: 1.5722687244415283,grad_norm: 0.9999998367092993, iteration: 3912
loss: 1.5744431018829346,grad_norm: 0.9999998143474755, iteration: 3913
loss: 1.4551469087600708,grad_norm: 0.999999831607464, iteration: 3914
loss: 1.2192730903625488,grad_norm: 0.9999997850326428, iteration: 3915
loss: 1.3997541666030884,grad_norm: 0.9999998098610644, iteration: 3916
loss: 1.4012168645858765,grad_norm: 0.999999755836823, iteration: 3917
loss: 1.2779937982559204,grad_norm: 0.9999998019298446, iteration: 3918
loss: 1.29066801071167,grad_norm: 0.999999807870076, iteration: 3919
loss: 1.409623384475708,grad_norm: 0.9999998532352012, iteration: 3920
loss: 1.4236228466033936,grad_norm: 0.9999997858882934, iteration: 3921
loss: 1.441433310508728,grad_norm: 0.9999997613405441, iteration: 3922
loss: 1.3447386026382446,grad_norm: 0.9999998653417207, iteration: 3923
loss: 1.5196866989135742,grad_norm: 0.9999998312691328, iteration: 3924
loss: 1.4538004398345947,grad_norm: 0.9999997471276786, iteration: 3925
loss: 1.404010534286499,grad_norm: 0.9999997416455924, iteration: 3926
loss: 1.474372386932373,grad_norm: 0.9999998343321247, iteration: 3927
loss: 1.427146077156067,grad_norm: 0.9999997682224737, iteration: 3928
loss: 1.30830717086792,grad_norm: 0.9999997890704283, iteration: 3929
loss: 1.3831446170806885,grad_norm: 0.9999996374818299, iteration: 3930
loss: 1.2772308588027954,grad_norm: 0.9999998117968075, iteration: 3931
loss: 1.3931052684783936,grad_norm: 0.999999886135551, iteration: 3932
loss: 1.3558529615402222,grad_norm: 0.9999998786414777, iteration: 3933
loss: 1.4218794107437134,grad_norm: 0.9999998779857091, iteration: 3934
loss: 1.2077314853668213,grad_norm: 0.9999998384808966, iteration: 3935
loss: 1.4233953952789307,grad_norm: 0.9999998097366624, iteration: 3936
loss: 1.421226978302002,grad_norm: 0.9999997519686639, iteration: 3937
loss: 1.4838758707046509,grad_norm: 0.9999998250535351, iteration: 3938
loss: 1.443276286125183,grad_norm: 0.9999998000138022, iteration: 3939
loss: 1.2831735610961914,grad_norm: 0.9999997810619453, iteration: 3940
loss: 1.3324036598205566,grad_norm: 0.9999997680562315, iteration: 3941
loss: 1.5080901384353638,grad_norm: 0.9999998618006585, iteration: 3942
loss: 1.4156925678253174,grad_norm: 0.9999998411938795, iteration: 3943
loss: 1.2953969240188599,grad_norm: 0.999999850732481, iteration: 3944
loss: 1.448305368423462,grad_norm: 0.9999998359339413, iteration: 3945
loss: 1.3687009811401367,grad_norm: 0.9999998346780774, iteration: 3946
loss: 1.3900922536849976,grad_norm: 0.9999997753406695, iteration: 3947
loss: 1.398308277130127,grad_norm: 0.9999997623694292, iteration: 3948
loss: 1.2559900283813477,grad_norm: 0.9999998487440318, iteration: 3949
loss: 1.4941917657852173,grad_norm: 0.999999693355202, iteration: 3950
loss: 1.3865216970443726,grad_norm: 0.9999997369801428, iteration: 3951
loss: 1.4514198303222656,grad_norm: 0.9999997769015618, iteration: 3952
loss: 1.3486236333847046,grad_norm: 0.9999997249777858, iteration: 3953
loss: 1.3663170337677002,grad_norm: 0.9999997955968345, iteration: 3954
loss: 1.4342072010040283,grad_norm: 0.9999998694919768, iteration: 3955
loss: 1.5101486444473267,grad_norm: 0.9999998209610151, iteration: 3956
loss: 1.3227264881134033,grad_norm: 0.9999997840719561, iteration: 3957
loss: 1.3424475193023682,grad_norm: 0.9999998246781794, iteration: 3958
loss: 1.4654865264892578,grad_norm: 0.9999997632548238, iteration: 3959
loss: 1.5668565034866333,grad_norm: 0.9999999043860499, iteration: 3960
loss: 1.429661512374878,grad_norm: 0.9999998155930604, iteration: 3961
loss: 1.26943838596344,grad_norm: 0.9999997944739802, iteration: 3962
loss: 1.3198198080062866,grad_norm: 0.9999998592197558, iteration: 3963
loss: 1.4059538841247559,grad_norm: 0.9999998650017065, iteration: 3964
loss: 1.4233368635177612,grad_norm: 0.9999996988692993, iteration: 3965
loss: 1.2680292129516602,grad_norm: 0.999999733555876, iteration: 3966
loss: 1.4191256761550903,grad_norm: 0.9999997896013587, iteration: 3967
loss: 1.593839406967163,grad_norm: 0.9999998147398723, iteration: 3968
loss: 1.3681796789169312,grad_norm: 0.999999755583212, iteration: 3969
loss: 1.3745254278182983,grad_norm: 0.9999997257279996, iteration: 3970
loss: 1.4840283393859863,grad_norm: 0.9999997889745492, iteration: 3971
loss: 1.381474256515503,grad_norm: 0.9999997440816834, iteration: 3972
loss: 1.3186287879943848,grad_norm: 0.9999997983432041, iteration: 3973
loss: 1.3658384084701538,grad_norm: 0.999999827359225, iteration: 3974
loss: 1.3153586387634277,grad_norm: 0.9999998898805977, iteration: 3975
loss: 1.4671101570129395,grad_norm: 0.9999998632372514, iteration: 3976
loss: 1.3403600454330444,grad_norm: 0.99999976091967, iteration: 3977
loss: 1.412752389907837,grad_norm: 0.9999997076349725, iteration: 3978
loss: 1.4610002040863037,grad_norm: 0.9999997893121892, iteration: 3979
loss: 1.3335036039352417,grad_norm: 0.9999998099569362, iteration: 3980
loss: 1.4007010459899902,grad_norm: 0.9999998131072578, iteration: 3981
loss: 1.4317477941513062,grad_norm: 0.9999997766457528, iteration: 3982
loss: 1.302304744720459,grad_norm: 0.999999817481993, iteration: 3983
loss: 1.3551867008209229,grad_norm: 0.99999984317532, iteration: 3984
loss: 1.4453977346420288,grad_norm: 0.9999997749828861, iteration: 3985
loss: 1.2490200996398926,grad_norm: 0.9999997487400828, iteration: 3986
loss: 1.4522217512130737,grad_norm: 0.9999998831658761, iteration: 3987
loss: 1.370659351348877,grad_norm: 0.9999998344783926, iteration: 3988
loss: 1.5021929740905762,grad_norm: 0.9999998583826393, iteration: 3989
loss: 1.4599778652191162,grad_norm: 0.9999997927973471, iteration: 3990
loss: 1.383257269859314,grad_norm: 0.9999997336127898, iteration: 3991
loss: 1.4273792505264282,grad_norm: 0.9999998240964414, iteration: 3992
loss: 1.5063782930374146,grad_norm: 0.9999998454862931, iteration: 3993
loss: 1.50972318649292,grad_norm: 0.9999998036060322, iteration: 3994
loss: 1.4505754709243774,grad_norm: 0.9999998169747508, iteration: 3995
loss: 1.3375725746154785,grad_norm: 0.9999998247618619, iteration: 3996
loss: 1.5083032846450806,grad_norm: 0.999999866118263, iteration: 3997
loss: 1.4404445886611938,grad_norm: 0.9999997693222595, iteration: 3998
loss: 1.4477850198745728,grad_norm: 0.9999997609694236, iteration: 3999
loss: 1.321380853652954,grad_norm: 0.999999718206846, iteration: 4000
loss: 1.378430962562561,grad_norm: 0.9999997399318674, iteration: 4001
loss: 1.477333426475525,grad_norm: 0.9999997430970452, iteration: 4002
loss: 1.23800528049469,grad_norm: 0.9999997881825919, iteration: 4003
loss: 1.4986517429351807,grad_norm: 0.9999997678831555, iteration: 4004
loss: 1.4226356744766235,grad_norm: 0.9999998117639062, iteration: 4005
loss: 1.5335463285446167,grad_norm: 0.9999998116957046, iteration: 4006
loss: 1.4859107732772827,grad_norm: 0.9999998583345073, iteration: 4007
loss: 1.3927041292190552,grad_norm: 0.9999996920147126, iteration: 4008
loss: 1.3717563152313232,grad_norm: 0.9999998222282899, iteration: 4009
loss: 1.467011570930481,grad_norm: 0.9999998574018766, iteration: 4010
loss: 1.2944105863571167,grad_norm: 0.9999997537001335, iteration: 4011
loss: 1.411872386932373,grad_norm: 0.9999998261292212, iteration: 4012
loss: 1.3556653261184692,grad_norm: 0.9999998444454588, iteration: 4013
loss: 1.2429946660995483,grad_norm: 0.9999997868135239, iteration: 4014
loss: 1.4161286354064941,grad_norm: 0.9999998538986739, iteration: 4015
loss: 1.437928557395935,grad_norm: 0.9999998014699384, iteration: 4016
loss: 1.3851428031921387,grad_norm: 0.9999997970945892, iteration: 4017
loss: 1.3148797750473022,grad_norm: 0.9999998128039078, iteration: 4018
loss: 1.323926568031311,grad_norm: 0.9999997913026695, iteration: 4019
loss: 1.3751122951507568,grad_norm: 0.9999999073346644, iteration: 4020
loss: 1.32551908493042,grad_norm: 0.999999805239923, iteration: 4021
loss: 1.279748558998108,grad_norm: 0.9999997742315059, iteration: 4022
loss: 1.55451238155365,grad_norm: 0.9999998385344664, iteration: 4023
loss: 1.3377872705459595,grad_norm: 0.9999997927629241, iteration: 4024
loss: 1.4990922212600708,grad_norm: 0.999999828329753, iteration: 4025
loss: 1.3059417009353638,grad_norm: 0.9999997633200336, iteration: 4026
loss: 1.3387489318847656,grad_norm: 0.9999997062069605, iteration: 4027
loss: 1.5412333011627197,grad_norm: 0.9999998655468765, iteration: 4028
loss: 1.3586167097091675,grad_norm: 0.9999997879159808, iteration: 4029
loss: 1.3903999328613281,grad_norm: 0.9999997151965857, iteration: 4030
loss: 1.5530798435211182,grad_norm: 0.9999998500991328, iteration: 4031
loss: 1.1688013076782227,grad_norm: 0.9999998538626664, iteration: 4032
loss: 1.3776271343231201,grad_norm: 0.9999997674682883, iteration: 4033
loss: 1.4172985553741455,grad_norm: 0.9999998338481318, iteration: 4034
loss: 1.3189674615859985,grad_norm: 0.9999998508059348, iteration: 4035
loss: 1.3056588172912598,grad_norm: 0.9999997252978148, iteration: 4036
loss: 1.4568058252334595,grad_norm: 0.9999997624038893, iteration: 4037
loss: 1.30975341796875,grad_norm: 0.9999997561740639, iteration: 4038
loss: 1.462471604347229,grad_norm: 0.9999997896171899, iteration: 4039
loss: 1.4932332038879395,grad_norm: 0.9999998039530329, iteration: 4040
loss: 1.416348934173584,grad_norm: 0.9999998250765754, iteration: 4041
loss: 1.5393857955932617,grad_norm: 0.9999997841736399, iteration: 4042
loss: 1.4636073112487793,grad_norm: 0.9999997991663072, iteration: 4043
loss: 1.4098559617996216,grad_norm: 0.9999997705984186, iteration: 4044
loss: 1.5769709348678589,grad_norm: 0.9999998751439143, iteration: 4045
loss: 1.4130645990371704,grad_norm: 0.9999998404496535, iteration: 4046
loss: 1.3935372829437256,grad_norm: 0.9999997988284313, iteration: 4047
loss: 1.368937611579895,grad_norm: 0.9999998303225959, iteration: 4048
loss: 1.4571807384490967,grad_norm: 0.9999997893321334, iteration: 4049
loss: 1.6056454181671143,grad_norm: 0.9999998191975172, iteration: 4050
loss: 1.393640398979187,grad_norm: 0.9999998376584307, iteration: 4051
loss: 1.3071303367614746,grad_norm: 0.9999998324251651, iteration: 4052
loss: 1.4418773651123047,grad_norm: 0.9999998664359261, iteration: 4053
loss: 1.4186197519302368,grad_norm: 0.9999997522403363, iteration: 4054
loss: 1.3213527202606201,grad_norm: 0.9999997234802258, iteration: 4055
loss: 1.3877851963043213,grad_norm: 0.9999997711858114, iteration: 4056
loss: 1.4621474742889404,grad_norm: 0.9999998123232022, iteration: 4057
loss: 1.4981142282485962,grad_norm: 0.9999998333870459, iteration: 4058
loss: 1.4384242296218872,grad_norm: 0.9999998573585693, iteration: 4059
loss: 1.2209330797195435,grad_norm: 0.9999997984892578, iteration: 4060
loss: 1.4793819189071655,grad_norm: 0.9999997890471168, iteration: 4061
loss: 1.3680803775787354,grad_norm: 0.999999793363772, iteration: 4062
loss: 1.2731541395187378,grad_norm: 0.9999998169161503, iteration: 4063
loss: 1.5534343719482422,grad_norm: 0.9999998364585168, iteration: 4064
loss: 1.3853827714920044,grad_norm: 0.9999997781291367, iteration: 4065
loss: 1.404386281967163,grad_norm: 0.9999997889774876, iteration: 4066
loss: 1.3437824249267578,grad_norm: 0.9999996933935402, iteration: 4067
loss: 1.4213600158691406,grad_norm: 0.9999998090448818, iteration: 4068
loss: 1.5136679410934448,grad_norm: 0.9999997691668354, iteration: 4069
loss: 1.3261407613754272,grad_norm: 0.9999997711185838, iteration: 4070
loss: 1.4046205282211304,grad_norm: 0.9999997625637942, iteration: 4071
loss: 1.203666090965271,grad_norm: 0.9999997301042027, iteration: 4072
loss: 1.385727047920227,grad_norm: 0.9999997507571596, iteration: 4073
loss: 1.4202836751937866,grad_norm: 0.999999736481595, iteration: 4074
loss: 1.3522382974624634,grad_norm: 0.9999998174042767, iteration: 4075
loss: 1.5684356689453125,grad_norm: 0.9999998330285935, iteration: 4076
loss: 1.1377274990081787,grad_norm: 0.9999997077599369, iteration: 4077
loss: 1.4209705591201782,grad_norm: 0.9999998266706308, iteration: 4078
loss: 1.3393096923828125,grad_norm: 0.9999997869514519, iteration: 4079
loss: 1.2717756032943726,grad_norm: 0.9999997936506475, iteration: 4080
loss: 1.3558611869812012,grad_norm: 0.9999997434085821, iteration: 4081
loss: 1.4357770681381226,grad_norm: 0.9999998003416016, iteration: 4082
loss: 1.4193798303604126,grad_norm: 0.9999997668365141, iteration: 4083
loss: 1.2848119735717773,grad_norm: 0.999999819850915, iteration: 4084
loss: 1.378250002861023,grad_norm: 0.999999788194713, iteration: 4085
loss: 1.4147733449935913,grad_norm: 0.999999814449063, iteration: 4086
loss: 1.338234782218933,grad_norm: 0.9999997564664019, iteration: 4087
loss: 1.3508539199829102,grad_norm: 0.9999997522405847, iteration: 4088
loss: 1.4880104064941406,grad_norm: 0.9999997707573867, iteration: 4089
loss: 1.323507308959961,grad_norm: 0.999999816796395, iteration: 4090
loss: 1.5017576217651367,grad_norm: 0.9999996729242517, iteration: 4091
loss: 1.3504890203475952,grad_norm: 0.9999998313936133, iteration: 4092
loss: 1.5085986852645874,grad_norm: 0.9999998095466961, iteration: 4093
loss: 1.4393492937088013,grad_norm: 0.9999997810738424, iteration: 4094
loss: 1.4728285074234009,grad_norm: 0.999999850895144, iteration: 4095
loss: 1.3077431917190552,grad_norm: 0.9999998096375655, iteration: 4096
loss: 1.491548776626587,grad_norm: 0.999999775233121, iteration: 4097
loss: 1.363381266593933,grad_norm: 0.9999997923332192, iteration: 4098
loss: 1.3012992143630981,grad_norm: 0.9999997935986656, iteration: 4099
loss: 1.3651586771011353,grad_norm: 0.9999997900188795, iteration: 4100
loss: 1.3380392789840698,grad_norm: 0.9999997750690646, iteration: 4101
loss: 1.2616606950759888,grad_norm: 0.9999997732870052, iteration: 4102
loss: 1.2719405889511108,grad_norm: 0.9999997114139403, iteration: 4103
loss: 1.380197286605835,grad_norm: 0.9999998099377061, iteration: 4104
loss: 1.3169599771499634,grad_norm: 0.9999998194460519, iteration: 4105
loss: 1.3467967510223389,grad_norm: 0.999999790644208, iteration: 4106
loss: 1.3735476732254028,grad_norm: 0.9999998261339632, iteration: 4107
loss: 1.3569344282150269,grad_norm: 0.9999998153495737, iteration: 4108
loss: 1.4481700658798218,grad_norm: 0.9999998325866605, iteration: 4109
loss: 1.4637930393218994,grad_norm: 0.9999998556896286, iteration: 4110
loss: 1.5425751209259033,grad_norm: 0.9999998177080641, iteration: 4111
loss: 1.4195027351379395,grad_norm: 0.9999998237870836, iteration: 4112
loss: 1.353591799736023,grad_norm: 0.9999997290754215, iteration: 4113
loss: 1.5020084381103516,grad_norm: 0.9999997483177494, iteration: 4114
loss: 1.347991943359375,grad_norm: 0.9999998246617806, iteration: 4115
loss: 1.2452207803726196,grad_norm: 0.9999997338108275, iteration: 4116
loss: 1.485764980316162,grad_norm: 0.9999998189388118, iteration: 4117
loss: 1.4434285163879395,grad_norm: 0.9999997301586178, iteration: 4118
loss: 1.363240122795105,grad_norm: 0.9999997924525056, iteration: 4119
loss: 1.4432480335235596,grad_norm: 0.9999998248051796, iteration: 4120
loss: 1.4187836647033691,grad_norm: 0.9999997297252368, iteration: 4121
loss: 1.3194435834884644,grad_norm: 0.999999792827825, iteration: 4122
loss: 1.3476582765579224,grad_norm: 0.9999998840426967, iteration: 4123
loss: 1.3776187896728516,grad_norm: 0.9999997522353457, iteration: 4124
loss: 1.4437437057495117,grad_norm: 0.9999997568428204, iteration: 4125
loss: 1.4580367803573608,grad_norm: 0.9999998113891141, iteration: 4126
loss: 1.363033413887024,grad_norm: 0.9999997650201127, iteration: 4127
loss: 1.3377013206481934,grad_norm: 0.9999998336201634, iteration: 4128
loss: 1.2114914655685425,grad_norm: 0.999999842588999, iteration: 4129
loss: 1.3982518911361694,grad_norm: 0.9999998145651223, iteration: 4130
loss: 1.3024098873138428,grad_norm: 0.9999997982308074, iteration: 4131
loss: 1.354522466659546,grad_norm: 0.9999997950558225, iteration: 4132
loss: 1.3893369436264038,grad_norm: 0.9999998026289109, iteration: 4133
loss: 1.3809442520141602,grad_norm: 0.9999997454718886, iteration: 4134
loss: 1.43548583984375,grad_norm: 0.9999998627950204, iteration: 4135
loss: 1.351862907409668,grad_norm: 0.9999996992984639, iteration: 4136
loss: 1.3479450941085815,grad_norm: 0.9999997086367439, iteration: 4137
loss: 1.4505585432052612,grad_norm: 0.9999997116377709, iteration: 4138
loss: 1.2009903192520142,grad_norm: 0.9999997445912376, iteration: 4139
loss: 1.302758812904358,grad_norm: 0.9999997627133335, iteration: 4140
loss: 1.436437964439392,grad_norm: 0.9999997787763175, iteration: 4141
loss: 1.339036464691162,grad_norm: 0.9999997857275981, iteration: 4142
loss: 1.392148733139038,grad_norm: 0.9999998017805394, iteration: 4143
loss: 1.350820541381836,grad_norm: 0.9999998320263028, iteration: 4144
loss: 1.468613624572754,grad_norm: 0.9999998119815074, iteration: 4145
loss: 1.2812755107879639,grad_norm: 0.9999998372673015, iteration: 4146
loss: 1.1877025365829468,grad_norm: 0.9999997002991944, iteration: 4147
loss: 1.3657411336898804,grad_norm: 0.9999996879211414, iteration: 4148
loss: 1.4094104766845703,grad_norm: 0.9999996919336294, iteration: 4149
loss: 1.4918445348739624,grad_norm: 0.9999997801909648, iteration: 4150
loss: 1.3819960355758667,grad_norm: 0.9999998167539708, iteration: 4151
loss: 1.3129782676696777,grad_norm: 0.9999997504690883, iteration: 4152
loss: 1.3899765014648438,grad_norm: 0.9999997857855953, iteration: 4153
loss: 1.2180677652359009,grad_norm: 0.9999998028275743, iteration: 4154
loss: 1.3734019994735718,grad_norm: 0.9999997468923432, iteration: 4155
loss: 1.2971889972686768,grad_norm: 0.9999998197557647, iteration: 4156
loss: 1.3114300966262817,grad_norm: 0.9999998128810115, iteration: 4157
loss: 1.4452286958694458,grad_norm: 0.9999998632627103, iteration: 4158
loss: 1.4041575193405151,grad_norm: 0.9999997965666341, iteration: 4159
loss: 1.3821942806243896,grad_norm: 0.9999997461903292, iteration: 4160
loss: 1.6099894046783447,grad_norm: 0.999999817296172, iteration: 4161
loss: 1.3932712078094482,grad_norm: 0.9999997861720479, iteration: 4162
loss: 1.2917263507843018,grad_norm: 0.9999998515942924, iteration: 4163
loss: 1.3643913269042969,grad_norm: 0.9999998028603463, iteration: 4164
loss: 1.462929606437683,grad_norm: 0.9999997737813657, iteration: 4165
loss: 1.36196768283844,grad_norm: 0.9999998320898407, iteration: 4166
loss: 1.188016653060913,grad_norm: 0.9999997333771166, iteration: 4167
loss: 1.406219482421875,grad_norm: 0.9999997630610432, iteration: 4168
loss: 1.3707150220870972,grad_norm: 0.9999997745634387, iteration: 4169
loss: 1.3244937658309937,grad_norm: 0.9999997819969363, iteration: 4170
loss: 1.4075363874435425,grad_norm: 0.9999998812800688, iteration: 4171
loss: 1.338657259941101,grad_norm: 0.9999997301832132, iteration: 4172
loss: 1.3034753799438477,grad_norm: 0.9999998396551191, iteration: 4173
loss: 1.3664276599884033,grad_norm: 0.9999998156258807, iteration: 4174
loss: 1.4494986534118652,grad_norm: 0.9999998453214165, iteration: 4175
loss: 1.259581208229065,grad_norm: 0.9999997165305429, iteration: 4176
loss: 1.3440167903900146,grad_norm: 0.9999998190331262, iteration: 4177
loss: 1.4241608381271362,grad_norm: 0.9999998110239048, iteration: 4178
loss: 1.303831934928894,grad_norm: 0.9999998159849243, iteration: 4179
loss: 1.3745356798171997,grad_norm: 0.9999997779292794, iteration: 4180
loss: 1.4193329811096191,grad_norm: 0.9999997815976591, iteration: 4181
loss: 1.36589515209198,grad_norm: 0.9999997444847143, iteration: 4182
loss: 1.4468286037445068,grad_norm: 0.9999997707507714, iteration: 4183
loss: 1.4407768249511719,grad_norm: 0.9999998146025144, iteration: 4184
loss: 1.3423473834991455,grad_norm: 0.9999998823050199, iteration: 4185
loss: 1.3472445011138916,grad_norm: 0.9999998326628355, iteration: 4186
loss: 1.225402593612671,grad_norm: 0.9999997716477634, iteration: 4187
loss: 1.4239611625671387,grad_norm: 0.9999997404799005, iteration: 4188
loss: 1.3850045204162598,grad_norm: 0.9999997960748708, iteration: 4189
loss: 1.2274067401885986,grad_norm: 0.999999819158417, iteration: 4190
loss: 1.4467920064926147,grad_norm: 0.9999998045376199, iteration: 4191
loss: 1.2916333675384521,grad_norm: 0.999999760387334, iteration: 4192
loss: 1.3804092407226562,grad_norm: 0.9999999073055448, iteration: 4193
loss: 1.3729795217514038,grad_norm: 0.9999998204813508, iteration: 4194
loss: 1.4042372703552246,grad_norm: 0.9999997703335314, iteration: 4195
loss: 1.2302099466323853,grad_norm: 0.9999997773057572, iteration: 4196
loss: 1.481469750404358,grad_norm: 0.9999998113363, iteration: 4197
loss: 1.2619857788085938,grad_norm: 0.9999998333049623, iteration: 4198
loss: 1.5115212202072144,grad_norm: 0.9999998168619177, iteration: 4199
loss: 1.3319649696350098,grad_norm: 0.9999997972741946, iteration: 4200
loss: 1.3442150354385376,grad_norm: 0.9999998042192578, iteration: 4201
loss: 1.3858636617660522,grad_norm: 0.9999998236765703, iteration: 4202
loss: 1.4252482652664185,grad_norm: 0.9999997295385401, iteration: 4203
loss: 1.2471966743469238,grad_norm: 0.9999996875192197, iteration: 4204
loss: 1.4140325784683228,grad_norm: 0.9999998538206081, iteration: 4205
loss: 1.2626867294311523,grad_norm: 0.9999998571913359, iteration: 4206
loss: 1.2151033878326416,grad_norm: 0.9999998288574371, iteration: 4207
loss: 1.275987148284912,grad_norm: 0.9999997841935956, iteration: 4208
loss: 1.3235588073730469,grad_norm: 0.9999998641038191, iteration: 4209
loss: 1.4042658805847168,grad_norm: 0.9999997908293996, iteration: 4210
loss: 1.2605712413787842,grad_norm: 0.9999997963094999, iteration: 4211
loss: 1.4071300029754639,grad_norm: 0.9999997919563004, iteration: 4212
loss: 1.2148568630218506,grad_norm: 0.9999997007007704, iteration: 4213
loss: 1.284845232963562,grad_norm: 0.9999997562691477, iteration: 4214
loss: 1.4579671621322632,grad_norm: 0.9999997895284748, iteration: 4215
loss: 1.342919111251831,grad_norm: 0.9999997687817096, iteration: 4216
loss: 1.2557026147842407,grad_norm: 0.9999997959478066, iteration: 4217
loss: 1.2194037437438965,grad_norm: 0.9999997976913538, iteration: 4218
loss: 1.3873764276504517,grad_norm: 0.9999997818837995, iteration: 4219
loss: 1.4369889497756958,grad_norm: 0.9999998447327917, iteration: 4220
loss: 1.4147117137908936,grad_norm: 0.9999998512290473, iteration: 4221
loss: 1.1788735389709473,grad_norm: 0.999999764717962, iteration: 4222
loss: 1.3668463230133057,grad_norm: 0.999999784860447, iteration: 4223
loss: 1.3772094249725342,grad_norm: 0.9999998046250073, iteration: 4224
loss: 1.273949384689331,grad_norm: 0.9999997769356569, iteration: 4225
loss: 1.343766212463379,grad_norm: 0.9999997439710637, iteration: 4226
loss: 1.4595739841461182,grad_norm: 0.9999997537574483, iteration: 4227
loss: 1.327165126800537,grad_norm: 0.9999997389533127, iteration: 4228
loss: 1.3541308641433716,grad_norm: 0.9999997730187556, iteration: 4229
loss: 1.4745515584945679,grad_norm: 0.9999998303073963, iteration: 4230
loss: 1.2293180227279663,grad_norm: 0.9999997477575326, iteration: 4231
loss: 1.2613722085952759,grad_norm: 0.9999997031664121, iteration: 4232
loss: 1.271392583847046,grad_norm: 0.9999997445099763, iteration: 4233
loss: 1.4963665008544922,grad_norm: 0.9999996996341229, iteration: 4234
loss: 1.2784396409988403,grad_norm: 0.9999997830673067, iteration: 4235
loss: 1.432145595550537,grad_norm: 0.9999997727838816, iteration: 4236
loss: 1.4312056303024292,grad_norm: 0.99999980800716, iteration: 4237
loss: 1.2031259536743164,grad_norm: 0.9999998300532412, iteration: 4238
loss: 1.3574296236038208,grad_norm: 0.9999998331957962, iteration: 4239
loss: 1.3955641984939575,grad_norm: 0.999999858251301, iteration: 4240
loss: 1.3869398832321167,grad_norm: 0.9999997914366071, iteration: 4241
loss: 1.3645209074020386,grad_norm: 0.9999997364867185, iteration: 4242
loss: 1.3460073471069336,grad_norm: 0.9999998335206559, iteration: 4243
loss: 1.3267364501953125,grad_norm: 0.9999996964516658, iteration: 4244
loss: 1.3972564935684204,grad_norm: 0.9999998526093549, iteration: 4245
loss: 1.3453481197357178,grad_norm: 0.999999881043268, iteration: 4246
loss: 1.3382841348648071,grad_norm: 0.9999997860601968, iteration: 4247
loss: 1.303586721420288,grad_norm: 0.9999997623616701, iteration: 4248
loss: 1.3079887628555298,grad_norm: 0.9999997877113761, iteration: 4249
loss: 1.3692424297332764,grad_norm: 0.9999998220417519, iteration: 4250
loss: 1.398954153060913,grad_norm: 0.9999998385959253, iteration: 4251
loss: 1.3692541122436523,grad_norm: 0.9999997351219615, iteration: 4252
loss: 1.3298817873001099,grad_norm: 0.9999997758735114, iteration: 4253
loss: 1.1890337467193604,grad_norm: 0.9999997145730302, iteration: 4254
loss: 1.4527662992477417,grad_norm: 0.9999997941288045, iteration: 4255
loss: 1.3707807064056396,grad_norm: 0.9999998318333395, iteration: 4256
loss: 1.3235195875167847,grad_norm: 0.999999841717359, iteration: 4257
loss: 1.4645648002624512,grad_norm: 0.9999997328894302, iteration: 4258
loss: 1.3603250980377197,grad_norm: 0.9999997478582306, iteration: 4259
loss: 1.331655740737915,grad_norm: 0.9999997954327992, iteration: 4260
loss: 1.2799023389816284,grad_norm: 0.9999998058468943, iteration: 4261
loss: 1.2099826335906982,grad_norm: 0.9999998478810476, iteration: 4262
loss: 1.2968776226043701,grad_norm: 0.999999747865744, iteration: 4263
loss: 1.3308217525482178,grad_norm: 0.9999997272445544, iteration: 4264
loss: 1.3128814697265625,grad_norm: 0.9999997061308751, iteration: 4265
loss: 1.3512773513793945,grad_norm: 0.9999997338956704, iteration: 4266
loss: 1.5104488134384155,grad_norm: 0.9999998091930699, iteration: 4267
loss: 1.3342094421386719,grad_norm: 0.9999997474243851, iteration: 4268
loss: 1.3410295248031616,grad_norm: 0.999999815433148, iteration: 4269
loss: 1.3352760076522827,grad_norm: 0.9999997619410464, iteration: 4270
loss: 1.3129444122314453,grad_norm: 0.9999998297843741, iteration: 4271
loss: 1.3199107646942139,grad_norm: 0.9999997315087268, iteration: 4272
loss: 1.291841983795166,grad_norm: 0.999999774853307, iteration: 4273
loss: 1.4890246391296387,grad_norm: 0.9999997938334071, iteration: 4274
loss: 1.2563767433166504,grad_norm: 0.9999997390947309, iteration: 4275
loss: 1.3606568574905396,grad_norm: 0.9999997686043521, iteration: 4276
loss: 1.1818907260894775,grad_norm: 0.9999997687605416, iteration: 4277
loss: 1.361271858215332,grad_norm: 0.9999997998317512, iteration: 4278
loss: 1.2668019533157349,grad_norm: 0.9999998378056234, iteration: 4279
loss: 1.5678620338439941,grad_norm: 0.9999997542140108, iteration: 4280
loss: 1.2542672157287598,grad_norm: 0.999999699772956, iteration: 4281
loss: 1.1982178688049316,grad_norm: 0.999999741142748, iteration: 4282
loss: 1.333390474319458,grad_norm: 0.9999998467916068, iteration: 4283
loss: 1.2437316179275513,grad_norm: 0.9999998175360005, iteration: 4284
loss: 1.289840817451477,grad_norm: 0.9999997941935854, iteration: 4285
loss: 1.2623345851898193,grad_norm: 0.9999997897937823, iteration: 4286
loss: 1.3445701599121094,grad_norm: 0.9999997692053224, iteration: 4287
loss: 1.2838597297668457,grad_norm: 0.9999998299292245, iteration: 4288
loss: 1.3285185098648071,grad_norm: 0.9999998700911625, iteration: 4289
loss: 1.4150434732437134,grad_norm: 0.9999997335407265, iteration: 4290
loss: 1.3186008930206299,grad_norm: 0.9999998488871286, iteration: 4291
loss: 1.388796329498291,grad_norm: 0.9999998676924098, iteration: 4292
loss: 1.1791110038757324,grad_norm: 0.999999693964494, iteration: 4293
loss: 1.3654732704162598,grad_norm: 0.9999997977834211, iteration: 4294
loss: 1.393917202949524,grad_norm: 0.9999998526321207, iteration: 4295
loss: 1.1513813734054565,grad_norm: 0.9999997565340094, iteration: 4296
loss: 1.251773715019226,grad_norm: 0.9999998125892269, iteration: 4297
loss: 1.332635521888733,grad_norm: 0.9999998694449932, iteration: 4298
loss: 1.238454818725586,grad_norm: 0.9999997915550106, iteration: 4299
loss: 1.2870582342147827,grad_norm: 0.9999998145473382, iteration: 4300
loss: 1.32027268409729,grad_norm: 0.9999997601986865, iteration: 4301
loss: 1.274233341217041,grad_norm: 0.9999997646617569, iteration: 4302
loss: 1.367294430732727,grad_norm: 0.99999974346653, iteration: 4303
loss: 1.3763551712036133,grad_norm: 0.9999998612782096, iteration: 4304
loss: 1.3203505277633667,grad_norm: 0.9999997540862972, iteration: 4305
loss: 1.4334577322006226,grad_norm: 0.9999997755850654, iteration: 4306
loss: 1.4141786098480225,grad_norm: 0.9999998350096946, iteration: 4307
loss: 1.4961568117141724,grad_norm: 0.9999997189625129, iteration: 4308
loss: 1.3402836322784424,grad_norm: 0.9999998124051919, iteration: 4309
loss: 1.4490692615509033,grad_norm: 0.9999998539797804, iteration: 4310
loss: 1.216634750366211,grad_norm: 0.9999997149174934, iteration: 4311
loss: 1.306532859802246,grad_norm: 0.9999996674177983, iteration: 4312
loss: 1.3443186283111572,grad_norm: 0.999999779276401, iteration: 4313
loss: 1.2884663343429565,grad_norm: 0.9999998119328439, iteration: 4314
loss: 1.2587745189666748,grad_norm: 0.999999776106138, iteration: 4315
loss: 1.4242058992385864,grad_norm: 0.9999997878242689, iteration: 4316
loss: 1.2754316329956055,grad_norm: 0.9999997986220667, iteration: 4317
loss: 1.4219595193862915,grad_norm: 0.9999997828828094, iteration: 4318
loss: 1.279746651649475,grad_norm: 0.9999998291898954, iteration: 4319
loss: 1.237593650817871,grad_norm: 0.999999739454186, iteration: 4320
loss: 1.2162710428237915,grad_norm: 0.9999997964372472, iteration: 4321
loss: 1.305773377418518,grad_norm: 0.9999997780422658, iteration: 4322
loss: 1.2319059371948242,grad_norm: 0.9999997483924256, iteration: 4323
loss: 1.2528656721115112,grad_norm: 0.9999997894114496, iteration: 4324
loss: 1.357389211654663,grad_norm: 0.9999998556717109, iteration: 4325
loss: 1.1696478128433228,grad_norm: 0.9999996557931923, iteration: 4326
loss: 1.4037903547286987,grad_norm: 0.9999998511208649, iteration: 4327
loss: 1.3734556436538696,grad_norm: 0.9999998411599674, iteration: 4328
loss: 1.302198886871338,grad_norm: 0.9999998432633352, iteration: 4329
loss: 1.234404444694519,grad_norm: 0.9999997715346067, iteration: 4330
loss: 1.3272216320037842,grad_norm: 0.9999997795438875, iteration: 4331
loss: 1.4063959121704102,grad_norm: 0.9999998002761196, iteration: 4332
loss: 1.217329978942871,grad_norm: 0.9999998138123996, iteration: 4333
loss: 1.4695829153060913,grad_norm: 0.9999997619645077, iteration: 4334
loss: 1.3381367921829224,grad_norm: 0.9999997490595256, iteration: 4335
loss: 1.3564246892929077,grad_norm: 0.9999997536175405, iteration: 4336
loss: 1.3579816818237305,grad_norm: 0.9999998245576558, iteration: 4337
loss: 1.3460571765899658,grad_norm: 0.9999998417573228, iteration: 4338
loss: 1.2580527067184448,grad_norm: 0.9999997284093561, iteration: 4339
loss: 1.321484923362732,grad_norm: 0.9999997977626617, iteration: 4340
loss: 1.257596492767334,grad_norm: 0.9999998162810977, iteration: 4341
loss: 1.2509527206420898,grad_norm: 0.9999997497727754, iteration: 4342
loss: 1.280320167541504,grad_norm: 0.9999998428906578, iteration: 4343
loss: 1.401303768157959,grad_norm: 0.9999998545284506, iteration: 4344
loss: 1.1565020084381104,grad_norm: 0.9999998741449049, iteration: 4345
loss: 1.4291229248046875,grad_norm: 0.9999998501577273, iteration: 4346
loss: 1.2488702535629272,grad_norm: 0.9999997385929842, iteration: 4347
loss: 1.3954256772994995,grad_norm: 0.999999869215709, iteration: 4348
loss: 1.3272137641906738,grad_norm: 0.9999998261432165, iteration: 4349
loss: 1.3107253313064575,grad_norm: 0.9999997836268619, iteration: 4350
loss: 1.3667824268341064,grad_norm: 0.9999997734424335, iteration: 4351
loss: 1.4094274044036865,grad_norm: 0.9999997862322796, iteration: 4352
loss: 1.286076545715332,grad_norm: 0.9999997175614903, iteration: 4353
loss: 1.3892394304275513,grad_norm: 0.9999997780698574, iteration: 4354
loss: 1.251848816871643,grad_norm: 0.9999997691178883, iteration: 4355
loss: 1.4043712615966797,grad_norm: 0.9999997768319895, iteration: 4356
loss: 1.361922264099121,grad_norm: 0.9999998573731353, iteration: 4357
loss: 1.3034141063690186,grad_norm: 0.9999998070966807, iteration: 4358
loss: 1.329702615737915,grad_norm: 0.9999998439235701, iteration: 4359
loss: 1.376050591468811,grad_norm: 0.9999997878387011, iteration: 4360
loss: 1.2959222793579102,grad_norm: 0.9999998483748732, iteration: 4361
loss: 1.396799087524414,grad_norm: 0.9999997782683983, iteration: 4362
loss: 1.3342047929763794,grad_norm: 0.9999998292647169, iteration: 4363
loss: 1.4146465063095093,grad_norm: 0.9999997588924658, iteration: 4364
loss: 1.241421103477478,grad_norm: 0.9999997927865706, iteration: 4365
loss: 1.2396719455718994,grad_norm: 0.9999997103935432, iteration: 4366
loss: 1.408685326576233,grad_norm: 0.9999998397762134, iteration: 4367
loss: 1.2787905931472778,grad_norm: 0.9999998033300617, iteration: 4368
loss: 1.3873405456542969,grad_norm: 0.9999997541094701, iteration: 4369
loss: 1.377943515777588,grad_norm: 0.9999997877408862, iteration: 4370
loss: 1.3317856788635254,grad_norm: 0.9999997469417174, iteration: 4371
loss: 1.2734191417694092,grad_norm: 0.9999997571170419, iteration: 4372
loss: 1.2283658981323242,grad_norm: 0.9999997660198489, iteration: 4373
loss: 1.273483395576477,grad_norm: 0.9999997013428684, iteration: 4374
loss: 1.3613930940628052,grad_norm: 0.9999998287217216, iteration: 4375
loss: 1.3672540187835693,grad_norm: 0.9999998000296961, iteration: 4376
loss: 1.3189107179641724,grad_norm: 0.9999998078468384, iteration: 4377
loss: 1.4812264442443848,grad_norm: 0.9999998821432015, iteration: 4378
loss: 1.3975814580917358,grad_norm: 0.9999998214609487, iteration: 4379
loss: 1.3487776517868042,grad_norm: 0.9999997539282902, iteration: 4380
loss: 1.2172210216522217,grad_norm: 0.9999997148882059, iteration: 4381
loss: 1.3582855463027954,grad_norm: 0.9999998388355399, iteration: 4382
loss: 1.3151910305023193,grad_norm: 0.9999996938335415, iteration: 4383
loss: 1.2767807245254517,grad_norm: 0.9999997266456181, iteration: 4384
loss: 1.3903416395187378,grad_norm: 0.9999997376072555, iteration: 4385
loss: 1.295697808265686,grad_norm: 0.9999998613236402, iteration: 4386
loss: 1.304038166999817,grad_norm: 0.999999824031787, iteration: 4387
loss: 1.1904963254928589,grad_norm: 0.9999998341889493, iteration: 4388
loss: 1.3466238975524902,grad_norm: 0.9999997462751058, iteration: 4389
loss: 1.1994503736495972,grad_norm: 0.9999997309775854, iteration: 4390
loss: 1.3288706541061401,grad_norm: 0.9999997506342593, iteration: 4391
loss: 1.4167897701263428,grad_norm: 0.9999998004471485, iteration: 4392
loss: 1.1774394512176514,grad_norm: 0.9999996662161506, iteration: 4393
loss: 1.3980265855789185,grad_norm: 0.9999998079216912, iteration: 4394
loss: 1.4054826498031616,grad_norm: 0.9999998526545745, iteration: 4395
loss: 1.3665012121200562,grad_norm: 0.9999998187892017, iteration: 4396
loss: 1.200909972190857,grad_norm: 0.9999997597076649, iteration: 4397
loss: 1.2932878732681274,grad_norm: 0.9999997392611805, iteration: 4398
loss: 1.2214702367782593,grad_norm: 0.9999997086886196, iteration: 4399
loss: 1.3464595079421997,grad_norm: 0.9999998055064878, iteration: 4400
loss: 1.2798514366149902,grad_norm: 0.9999998130490191, iteration: 4401
loss: 1.1445876359939575,grad_norm: 0.9999997272125043, iteration: 4402
loss: 1.2575920820236206,grad_norm: 0.9999997921899202, iteration: 4403
loss: 1.2700892686843872,grad_norm: 0.9999998216565988, iteration: 4404
loss: 1.3703293800354004,grad_norm: 0.9999997371854297, iteration: 4405
loss: 1.166921854019165,grad_norm: 0.9999997903553361, iteration: 4406
loss: 1.3610317707061768,grad_norm: 0.9999997851924239, iteration: 4407
loss: 1.4248946905136108,grad_norm: 0.9999997747124155, iteration: 4408
loss: 1.3796206712722778,grad_norm: 0.9999998763672886, iteration: 4409
loss: 1.272124171257019,grad_norm: 0.9999997539034262, iteration: 4410
loss: 1.3759735822677612,grad_norm: 0.999999748279363, iteration: 4411
loss: 1.3353371620178223,grad_norm: 0.9999997787270346, iteration: 4412
loss: 1.2777044773101807,grad_norm: 0.999999785218377, iteration: 4413
loss: 1.2951245307922363,grad_norm: 0.9999998601198299, iteration: 4414
loss: 1.3923611640930176,grad_norm: 0.9999997989338892, iteration: 4415
loss: 1.323733925819397,grad_norm: 0.9999998328204401, iteration: 4416
loss: 1.2884228229522705,grad_norm: 0.9999997211182331, iteration: 4417
loss: 1.41159987449646,grad_norm: 0.9999997291648469, iteration: 4418
loss: 1.2923465967178345,grad_norm: 0.9999998011989314, iteration: 4419
loss: 1.4904590845108032,grad_norm: 0.9999997996789516, iteration: 4420
loss: 1.2335282564163208,grad_norm: 0.9999997657159421, iteration: 4421
loss: 1.2772687673568726,grad_norm: 0.9999997484531958, iteration: 4422
loss: 1.2067190408706665,grad_norm: 0.9999997167391147, iteration: 4423
loss: 1.1820515394210815,grad_norm: 0.9999998096470047, iteration: 4424
loss: 1.3412739038467407,grad_norm: 0.9999997598097369, iteration: 4425
loss: 1.3862789869308472,grad_norm: 0.9999996884356485, iteration: 4426
loss: 1.288722276687622,grad_norm: 0.9999997150769414, iteration: 4427
loss: 1.3119347095489502,grad_norm: 0.9999998164617363, iteration: 4428
loss: 1.2817251682281494,grad_norm: 0.9999997830893151, iteration: 4429
loss: 1.3281617164611816,grad_norm: 0.9999998264298462, iteration: 4430
loss: 1.192369818687439,grad_norm: 0.9999998195062515, iteration: 4431
loss: 1.3681241273880005,grad_norm: 0.9999998636800878, iteration: 4432
loss: 1.3379192352294922,grad_norm: 0.999999697701833, iteration: 4433
loss: 1.3513103723526,grad_norm: 0.999999789310333, iteration: 4434
loss: 1.3187777996063232,grad_norm: 0.9999997589726349, iteration: 4435
loss: 1.3311492204666138,grad_norm: 0.9999997385338582, iteration: 4436
loss: 1.2701570987701416,grad_norm: 0.9999997498573658, iteration: 4437
loss: 1.3335953950881958,grad_norm: 0.9999998716284048, iteration: 4438
loss: 1.302990436553955,grad_norm: 0.9999997308953041, iteration: 4439
loss: 1.3115767240524292,grad_norm: 0.99999978523011, iteration: 4440
loss: 1.3976161479949951,grad_norm: 0.9999997260738401, iteration: 4441
loss: 1.318759799003601,grad_norm: 0.9999997536277492, iteration: 4442
loss: 1.2735308408737183,grad_norm: 0.9999997560138308, iteration: 4443
loss: 1.2102690935134888,grad_norm: 0.999999796486396, iteration: 4444
loss: 1.3430958986282349,grad_norm: 0.9999998051817052, iteration: 4445
loss: 1.3039525747299194,grad_norm: 0.9999997991541888, iteration: 4446
loss: 1.242788553237915,grad_norm: 0.9999996895949155, iteration: 4447
loss: 1.2928105592727661,grad_norm: 0.9999996920411873, iteration: 4448
loss: 1.4137099981307983,grad_norm: 0.9999998468237039, iteration: 4449
loss: 1.353278636932373,grad_norm: 0.9999997364300248, iteration: 4450
loss: 1.3958855867385864,grad_norm: 0.9999997764458516, iteration: 4451
loss: 1.320976734161377,grad_norm: 0.9999998063581923, iteration: 4452
loss: 1.2747684717178345,grad_norm: 0.9999996898075146, iteration: 4453
loss: 1.3537672758102417,grad_norm: 0.9999998480549778, iteration: 4454
loss: 1.3994845151901245,grad_norm: 0.9999997585736852, iteration: 4455
loss: 1.3254839181900024,grad_norm: 0.9999997833292407, iteration: 4456
loss: 1.228327989578247,grad_norm: 0.9999997464889419, iteration: 4457
loss: 1.2228471040725708,grad_norm: 0.9999997759291361, iteration: 4458
loss: 1.3089349269866943,grad_norm: 0.999999756939522, iteration: 4459
loss: 1.2682645320892334,grad_norm: 0.9999998427898804, iteration: 4460
loss: 1.2967503070831299,grad_norm: 0.9999997956434995, iteration: 4461
loss: 1.4259772300720215,grad_norm: 0.9999998541417902, iteration: 4462
loss: 1.3855183124542236,grad_norm: 0.9999997819371735, iteration: 4463
loss: 1.4159430265426636,grad_norm: 0.9999997937560604, iteration: 4464
loss: 1.324905276298523,grad_norm: 0.9999998237724737, iteration: 4465
loss: 1.1890780925750732,grad_norm: 0.9999998105005847, iteration: 4466
loss: 1.1891262531280518,grad_norm: 0.9999997275676711, iteration: 4467
loss: 1.2786431312561035,grad_norm: 0.9999998160556564, iteration: 4468
loss: 1.3436341285705566,grad_norm: 0.9999998622338396, iteration: 4469
loss: 1.2377755641937256,grad_norm: 0.9999997065325913, iteration: 4470
loss: 1.3704639673233032,grad_norm: 0.9999998292062969, iteration: 4471
loss: 1.3887308835983276,grad_norm: 0.9999998070753267, iteration: 4472
loss: 1.2785152196884155,grad_norm: 0.9999998112338222, iteration: 4473
loss: 1.3883044719696045,grad_norm: 0.9999997902381073, iteration: 4474
loss: 1.3051478862762451,grad_norm: 0.9999997855570976, iteration: 4475
loss: 1.2617777585983276,grad_norm: 0.9999997713312284, iteration: 4476
loss: 1.3393536806106567,grad_norm: 0.9999997523342719, iteration: 4477
loss: 1.3113362789154053,grad_norm: 0.9999998034397999, iteration: 4478
loss: 1.2858967781066895,grad_norm: 0.9999998334728275, iteration: 4479
loss: 1.2676490545272827,grad_norm: 0.9999997772981835, iteration: 4480
loss: 1.1683886051177979,grad_norm: 0.9999997328927545, iteration: 4481
loss: 1.313987374305725,grad_norm: 0.9999998161127798, iteration: 4482
loss: 1.424332857131958,grad_norm: 0.9999998542008839, iteration: 4483
loss: 1.275853157043457,grad_norm: 0.9999998067212916, iteration: 4484
loss: 1.2867653369903564,grad_norm: 0.9999998063709417, iteration: 4485
loss: 1.419259786605835,grad_norm: 0.9999997831335252, iteration: 4486
loss: 1.3298225402832031,grad_norm: 0.9999998114854132, iteration: 4487
loss: 1.196064829826355,grad_norm: 0.9999997412337852, iteration: 4488
loss: 1.3146398067474365,grad_norm: 0.9999997593027293, iteration: 4489
loss: 1.3667128086090088,grad_norm: 0.9999997478393843, iteration: 4490
loss: 1.316551923751831,grad_norm: 0.9999998114337696, iteration: 4491
loss: 1.2142308950424194,grad_norm: 0.9999997421522502, iteration: 4492
loss: 1.350032091140747,grad_norm: 0.9999998166570381, iteration: 4493
loss: 1.367086410522461,grad_norm: 0.9999998237570927, iteration: 4494
loss: 1.291162133216858,grad_norm: 0.9999997823036554, iteration: 4495
loss: 1.3707958459854126,grad_norm: 0.9999997487713375, iteration: 4496
loss: 1.2599633932113647,grad_norm: 0.9999997086901676, iteration: 4497
loss: 1.298963189125061,grad_norm: 0.9999997431399273, iteration: 4498
loss: 1.2023460865020752,grad_norm: 0.9999998100859023, iteration: 4499
loss: 1.2285100221633911,grad_norm: 0.9999997863773896, iteration: 4500
loss: 1.3581069707870483,grad_norm: 0.9999997342875484, iteration: 4501
loss: 1.3304628133773804,grad_norm: 0.9999998370980917, iteration: 4502
loss: 1.4029310941696167,grad_norm: 0.9999997826385897, iteration: 4503
loss: 1.2970277070999146,grad_norm: 0.9999997297810064, iteration: 4504
loss: 1.216957449913025,grad_norm: 0.9999997230834248, iteration: 4505
loss: 1.2820066213607788,grad_norm: 0.9999997868055059, iteration: 4506
loss: 1.3732459545135498,grad_norm: 0.9999997985765794, iteration: 4507
loss: 1.3798599243164062,grad_norm: 0.9999998191236477, iteration: 4508
loss: 1.2512630224227905,grad_norm: 0.9999997516792318, iteration: 4509
loss: 1.2795082330703735,grad_norm: 0.9999997707489445, iteration: 4510
loss: 1.4130430221557617,grad_norm: 0.9999998844706244, iteration: 4511
loss: 1.1846669912338257,grad_norm: 0.999999799511149, iteration: 4512
loss: 1.2011100053787231,grad_norm: 0.9999997695184749, iteration: 4513
loss: 1.3622575998306274,grad_norm: 0.9999997630797257, iteration: 4514
loss: 1.385227084159851,grad_norm: 0.9999998144389961, iteration: 4515
loss: 1.267357587814331,grad_norm: 0.9999997059853074, iteration: 4516
loss: 1.3954135179519653,grad_norm: 0.9999997737159458, iteration: 4517
loss: 1.306778073310852,grad_norm: 0.9999996978981157, iteration: 4518
loss: 1.3063730001449585,grad_norm: 0.9999996940041176, iteration: 4519
loss: 1.446649193763733,grad_norm: 0.9999998520556872, iteration: 4520
loss: 1.3487513065338135,grad_norm: 0.9999998333615079, iteration: 4521
loss: 1.2177222967147827,grad_norm: 0.9999997237412155, iteration: 4522
loss: 1.2534645795822144,grad_norm: 0.9999997491416457, iteration: 4523
loss: 1.2577394247055054,grad_norm: 0.999999840976276, iteration: 4524
loss: 1.367919683456421,grad_norm: 0.9999998737600628, iteration: 4525
loss: 1.3142951726913452,grad_norm: 0.9999998433082904, iteration: 4526
loss: 1.28212308883667,grad_norm: 0.9999997582019745, iteration: 4527
loss: 1.2345305681228638,grad_norm: 0.9999996974849757, iteration: 4528
loss: 1.2765724658966064,grad_norm: 0.9999998095759232, iteration: 4529
loss: 1.258447527885437,grad_norm: 0.9999998246780922, iteration: 4530
loss: 1.2639189958572388,grad_norm: 0.9999998087222377, iteration: 4531
loss: 1.350955843925476,grad_norm: 0.9999998173652338, iteration: 4532
loss: 1.3603193759918213,grad_norm: 0.9999998722285643, iteration: 4533
loss: 1.2797398567199707,grad_norm: 0.9999998351224634, iteration: 4534
loss: 1.2977890968322754,grad_norm: 0.9999997468296566, iteration: 4535
loss: 1.2396187782287598,grad_norm: 0.9999998006802924, iteration: 4536
loss: 1.305770993232727,grad_norm: 0.9999997381317782, iteration: 4537
loss: 1.2652803659439087,grad_norm: 0.9999997916025865, iteration: 4538
loss: 1.3191055059432983,grad_norm: 0.999999799610937, iteration: 4539
loss: 1.2757668495178223,grad_norm: 0.9999997252008025, iteration: 4540
loss: 1.316707968711853,grad_norm: 0.9999997827337829, iteration: 4541
loss: 1.2584172487258911,grad_norm: 0.9999997705955304, iteration: 4542
loss: 1.281132698059082,grad_norm: 0.9999998911304342, iteration: 4543
loss: 1.2913416624069214,grad_norm: 0.9999997416390339, iteration: 4544
loss: 1.1859501600265503,grad_norm: 0.9999998587599407, iteration: 4545
loss: 1.3103361129760742,grad_norm: 0.9999998267721838, iteration: 4546
loss: 1.3031566143035889,grad_norm: 0.9999997157803342, iteration: 4547
loss: 1.3911463022232056,grad_norm: 0.9999997644201489, iteration: 4548
loss: 1.16229248046875,grad_norm: 0.9999997165347786, iteration: 4549
loss: 1.2153306007385254,grad_norm: 0.9999997240941896, iteration: 4550
loss: 1.2312004566192627,grad_norm: 0.9999998752132542, iteration: 4551
loss: 1.227439045906067,grad_norm: 0.9999997310649558, iteration: 4552
loss: 1.2228420972824097,grad_norm: 0.9999997647000499, iteration: 4553
loss: 1.2728596925735474,grad_norm: 0.9999997745544568, iteration: 4554
loss: 1.323979377746582,grad_norm: 0.9999997744459301, iteration: 4555
loss: 1.275584101676941,grad_norm: 0.9999997162223645, iteration: 4556
loss: 1.2380880117416382,grad_norm: 0.9999997406589057, iteration: 4557
loss: 1.3215954303741455,grad_norm: 0.9999998677942965, iteration: 4558
loss: 1.299696445465088,grad_norm: 0.9999997976965915, iteration: 4559
loss: 1.283420205116272,grad_norm: 0.9999997838384387, iteration: 4560
loss: 1.2885665893554688,grad_norm: 0.999999723396895, iteration: 4561
loss: 1.1906160116195679,grad_norm: 0.9999997046872119, iteration: 4562
loss: 1.309509038925171,grad_norm: 0.9999997111383037, iteration: 4563
loss: 1.1550060510635376,grad_norm: 0.9999997268706442, iteration: 4564
loss: 1.225656270980835,grad_norm: 0.9999998238323966, iteration: 4565
loss: 1.269253134727478,grad_norm: 0.9999997467630806, iteration: 4566
loss: 1.1898231506347656,grad_norm: 0.9999997655994608, iteration: 4567
loss: 1.2528070211410522,grad_norm: 0.999999720080345, iteration: 4568
loss: 1.4021347761154175,grad_norm: 0.9999998079300518, iteration: 4569
loss: 1.3102625608444214,grad_norm: 0.9999998097671168, iteration: 4570
loss: 1.2322360277175903,grad_norm: 0.9999998290313812, iteration: 4571
loss: 1.1833314895629883,grad_norm: 0.9999996979567273, iteration: 4572
loss: 1.0775208473205566,grad_norm: 0.9999997104068171, iteration: 4573
loss: 1.3223183155059814,grad_norm: 0.9999997966421872, iteration: 4574
loss: 1.3305772542953491,grad_norm: 0.9999997482769096, iteration: 4575
loss: 1.2598367929458618,grad_norm: 0.999999723763774, iteration: 4576
loss: 1.3230129480361938,grad_norm: 0.99999982053433, iteration: 4577
loss: 1.3050588369369507,grad_norm: 0.9999997754458123, iteration: 4578
loss: 1.201838493347168,grad_norm: 0.9999997499888257, iteration: 4579
loss: 1.3643008470535278,grad_norm: 0.9999997399904547, iteration: 4580
loss: 1.224345088005066,grad_norm: 0.9999998181712171, iteration: 4581
loss: 1.3310773372650146,grad_norm: 0.999999766482494, iteration: 4582
loss: 1.2864419221878052,grad_norm: 0.9999997184560265, iteration: 4583
loss: 1.1739513874053955,grad_norm: 0.9999998082380371, iteration: 4584
loss: 1.3116512298583984,grad_norm: 0.9999997670249512, iteration: 4585
loss: 1.2327207326889038,grad_norm: 0.999999757796743, iteration: 4586
loss: 1.2238447666168213,grad_norm: 0.9999997699174581, iteration: 4587
loss: 1.2540725469589233,grad_norm: 0.9999996962523532, iteration: 4588
loss: 1.277267336845398,grad_norm: 0.9999998395471466, iteration: 4589
loss: 1.3513582944869995,grad_norm: 0.9999997052292158, iteration: 4590
loss: 1.253989577293396,grad_norm: 0.9999997614912777, iteration: 4591
loss: 1.2696219682693481,grad_norm: 0.9999997685075822, iteration: 4592
loss: 1.2922245264053345,grad_norm: 0.9999998219560324, iteration: 4593
loss: 1.3345201015472412,grad_norm: 0.9999997866859744, iteration: 4594
loss: 1.3934119939804077,grad_norm: 0.9999997838648687, iteration: 4595
loss: 1.1990635395050049,grad_norm: 0.9999997065099167, iteration: 4596
loss: 1.280797004699707,grad_norm: 0.9999998616783814, iteration: 4597
loss: 1.3085631132125854,grad_norm: 0.9999997919844169, iteration: 4598
loss: 1.4209327697753906,grad_norm: 0.9999997813686579, iteration: 4599
loss: 1.2359087467193604,grad_norm: 0.9999998009814816, iteration: 4600
loss: 1.25884211063385,grad_norm: 0.9999997516915355, iteration: 4601
loss: 1.322148084640503,grad_norm: 0.9999998628049467, iteration: 4602
loss: 1.3240348100662231,grad_norm: 0.9999996977461107, iteration: 4603
loss: 1.287380576133728,grad_norm: 0.9999997453065067, iteration: 4604
loss: 1.1520488262176514,grad_norm: 0.9999998030100548, iteration: 4605
loss: 1.3084309101104736,grad_norm: 0.9999997683062439, iteration: 4606
loss: 1.3324449062347412,grad_norm: 0.9999997355790581, iteration: 4607
loss: 1.259514570236206,grad_norm: 0.9999996776658292, iteration: 4608
loss: 1.3191728591918945,grad_norm: 0.9999997855953898, iteration: 4609
loss: 1.3339860439300537,grad_norm: 0.9999997302420819, iteration: 4610
loss: 1.2524149417877197,grad_norm: 0.9999997642309572, iteration: 4611
loss: 1.1946347951889038,grad_norm: 0.9999996945785737, iteration: 4612
loss: 1.260480284690857,grad_norm: 0.9999998098167946, iteration: 4613
loss: 1.4666625261306763,grad_norm: 0.9999997665843403, iteration: 4614
loss: 1.23896324634552,grad_norm: 0.9999997238222043, iteration: 4615
loss: 1.339056372642517,grad_norm: 0.9999997946631377, iteration: 4616
loss: 1.2956196069717407,grad_norm: 0.9999997420176433, iteration: 4617
loss: 1.351905345916748,grad_norm: 0.999999760275118, iteration: 4618
loss: 1.3941088914871216,grad_norm: 0.999999731865584, iteration: 4619
loss: 1.3194223642349243,grad_norm: 0.9999998243754324, iteration: 4620
loss: 1.2344326972961426,grad_norm: 0.9999997780162266, iteration: 4621
loss: 1.2799229621887207,grad_norm: 0.9999997265124596, iteration: 4622
loss: 1.3103443384170532,grad_norm: 0.9999997853476643, iteration: 4623
loss: 1.3035907745361328,grad_norm: 0.9999997292226552, iteration: 4624
loss: 1.3834004402160645,grad_norm: 0.9999998449171507, iteration: 4625
loss: 1.2119033336639404,grad_norm: 0.9999997823213949, iteration: 4626
loss: 1.2985037565231323,grad_norm: 0.9999998404667179, iteration: 4627
loss: 1.2741912603378296,grad_norm: 0.9999998141232533, iteration: 4628
loss: 1.2521535158157349,grad_norm: 0.9999998324616103, iteration: 4629
loss: 1.1920324563980103,grad_norm: 0.9999997649252874, iteration: 4630
loss: 1.312809705734253,grad_norm: 0.9999998149610322, iteration: 4631
loss: 1.2583177089691162,grad_norm: 0.9999998092128217, iteration: 4632
loss: 1.2459524869918823,grad_norm: 0.9999998026113048, iteration: 4633
loss: 1.1150615215301514,grad_norm: 0.9999997995603576, iteration: 4634
loss: 1.2592047452926636,grad_norm: 0.9999997579906141, iteration: 4635
loss: 1.2706612348556519,grad_norm: 0.9999997960769451, iteration: 4636
loss: 1.2639472484588623,grad_norm: 0.9999998236995602, iteration: 4637
loss: 1.3560092449188232,grad_norm: 0.999999801797093, iteration: 4638
loss: 1.2385574579238892,grad_norm: 0.9999997387959175, iteration: 4639
loss: 1.2193418741226196,grad_norm: 0.999999732004484, iteration: 4640
loss: 1.2382197380065918,grad_norm: 0.9999997887510138, iteration: 4641
loss: 1.3310201168060303,grad_norm: 0.9999997359567847, iteration: 4642
loss: 1.27728271484375,grad_norm: 0.9999997133848385, iteration: 4643
loss: 1.1895170211791992,grad_norm: 0.9999997293612424, iteration: 4644
loss: 1.2388681173324585,grad_norm: 0.9999997088686167, iteration: 4645
loss: 1.2695844173431396,grad_norm: 0.9999997459788125, iteration: 4646
loss: 1.2733665704727173,grad_norm: 0.9999997713737857, iteration: 4647
loss: 1.339178442955017,grad_norm: 0.9999997489978928, iteration: 4648
loss: 1.3232334852218628,grad_norm: 0.9999997751443718, iteration: 4649
loss: 1.300359845161438,grad_norm: 0.9999998171037553, iteration: 4650
loss: 1.249616026878357,grad_norm: 0.9999997474536375, iteration: 4651
loss: 1.1788393259048462,grad_norm: 0.9999997305466312, iteration: 4652
loss: 1.3214064836502075,grad_norm: 0.9999998021623749, iteration: 4653
loss: 1.307588815689087,grad_norm: 0.9999998264798701, iteration: 4654
loss: 1.2232415676116943,grad_norm: 0.9999996575867047, iteration: 4655
loss: 1.2735800743103027,grad_norm: 0.999999703586609, iteration: 4656
loss: 1.3127472400665283,grad_norm: 0.9999998207363184, iteration: 4657
loss: 1.4710890054702759,grad_norm: 0.9999998268947579, iteration: 4658
loss: 1.2727477550506592,grad_norm: 0.9999997097717466, iteration: 4659
loss: 1.2986081838607788,grad_norm: 0.9999996980996789, iteration: 4660
loss: 1.3219012022018433,grad_norm: 0.9999998321785133, iteration: 4661
loss: 1.2636417150497437,grad_norm: 0.9999997174569102, iteration: 4662
loss: 1.0936784744262695,grad_norm: 0.9999997237799565, iteration: 4663
loss: 1.159948468208313,grad_norm: 0.999999696819733, iteration: 4664
loss: 1.256417989730835,grad_norm: 0.9999997839343931, iteration: 4665
loss: 1.2371912002563477,grad_norm: 0.9999996654789095, iteration: 4666
loss: 1.3607594966888428,grad_norm: 0.9999998409063389, iteration: 4667
loss: 1.2612897157669067,grad_norm: 0.9999997016999577, iteration: 4668
loss: 1.405104637145996,grad_norm: 0.9999997459526447, iteration: 4669
loss: 1.2099374532699585,grad_norm: 0.9999997857388042, iteration: 4670
loss: 1.297908902168274,grad_norm: 0.9999998288282379, iteration: 4671
loss: 1.2689977884292603,grad_norm: 0.999999734811147, iteration: 4672
loss: 1.2736375331878662,grad_norm: 0.9999997816941916, iteration: 4673
loss: 1.3420250415802002,grad_norm: 0.999999751520857, iteration: 4674
loss: 1.258948802947998,grad_norm: 0.9999997896658549, iteration: 4675
loss: 1.232485055923462,grad_norm: 0.999999694994312, iteration: 4676
loss: 1.3135395050048828,grad_norm: 0.9999997375443013, iteration: 4677
loss: 1.2605714797973633,grad_norm: 0.9999997319816851, iteration: 4678
loss: 1.2893891334533691,grad_norm: 0.999999735212447, iteration: 4679
loss: 1.1514829397201538,grad_norm: 0.9999997822666228, iteration: 4680
loss: 1.2904572486877441,grad_norm: 0.999999755152943, iteration: 4681
loss: 1.2037286758422852,grad_norm: 0.9999996713245148, iteration: 4682
loss: 1.3091031312942505,grad_norm: 0.9999998621774451, iteration: 4683
loss: 1.269661545753479,grad_norm: 0.9999997870417533, iteration: 4684
loss: 1.2573078870773315,grad_norm: 0.9999997232840871, iteration: 4685
loss: 1.299815058708191,grad_norm: 0.9999997815828141, iteration: 4686
loss: 1.2581292390823364,grad_norm: 0.999999683308196, iteration: 4687
loss: 1.2789932489395142,grad_norm: 0.9999998175401467, iteration: 4688
loss: 1.1725760698318481,grad_norm: 0.9999997001999494, iteration: 4689
loss: 1.2110227346420288,grad_norm: 0.9999998073417224, iteration: 4690
loss: 1.1207959651947021,grad_norm: 0.9999996431948862, iteration: 4691
loss: 1.3339662551879883,grad_norm: 0.9999998443429577, iteration: 4692
loss: 1.1810219287872314,grad_norm: 0.9999998892787861, iteration: 4693
loss: 1.3122972249984741,grad_norm: 0.9999997604524643, iteration: 4694
loss: 1.3502846956253052,grad_norm: 0.9999997759332242, iteration: 4695
loss: 1.2241029739379883,grad_norm: 0.9999998263319345, iteration: 4696
loss: 1.3419281244277954,grad_norm: 0.9999997769099546, iteration: 4697
loss: 1.1781362295150757,grad_norm: 0.9999998052328807, iteration: 4698
loss: 1.2038968801498413,grad_norm: 0.9999998068444553, iteration: 4699
loss: 1.2913895845413208,grad_norm: 0.9999998145988275, iteration: 4700
loss: 1.1897308826446533,grad_norm: 0.999999761571685, iteration: 4701
loss: 1.434994101524353,grad_norm: 0.999999748078072, iteration: 4702
loss: 1.3866363763809204,grad_norm: 0.9999997493165772, iteration: 4703
loss: 1.3008300065994263,grad_norm: 0.9999997269593308, iteration: 4704
loss: 1.2703572511672974,grad_norm: 0.9999998170455835, iteration: 4705
loss: 1.383173942565918,grad_norm: 0.9999998702532603, iteration: 4706
loss: 1.2350008487701416,grad_norm: 0.9999997717224592, iteration: 4707
loss: 1.259485125541687,grad_norm: 0.999999727592689, iteration: 4708
loss: 1.2755802869796753,grad_norm: 0.9999996593526311, iteration: 4709
loss: 1.2374409437179565,grad_norm: 0.9999997120833465, iteration: 4710
loss: 1.2490413188934326,grad_norm: 0.9999997723892381, iteration: 4711
loss: 1.2059627771377563,grad_norm: 0.9999997488445315, iteration: 4712
loss: 1.151289463043213,grad_norm: 0.9999996763518074, iteration: 4713
loss: 1.1664091348648071,grad_norm: 0.9999997451827679, iteration: 4714
loss: 1.2223864793777466,grad_norm: 0.999999843811872, iteration: 4715
loss: 1.192793846130371,grad_norm: 0.9999997274978374, iteration: 4716
loss: 1.3040891885757446,grad_norm: 0.9999998799468061, iteration: 4717
loss: 1.5391762256622314,grad_norm: 0.9999997735143532, iteration: 4718
loss: 1.2076187133789062,grad_norm: 0.9999997336594515, iteration: 4719
loss: 1.2267117500305176,grad_norm: 0.9999997634889543, iteration: 4720
loss: 1.1753872632980347,grad_norm: 0.9999996046872004, iteration: 4721
loss: 1.1944024562835693,grad_norm: 0.9999997751660347, iteration: 4722
loss: 1.2876675128936768,grad_norm: 0.9999998306692489, iteration: 4723
loss: 1.2643070220947266,grad_norm: 0.9999997141069478, iteration: 4724
loss: 1.2672935724258423,grad_norm: 0.9999998568155196, iteration: 4725
loss: 1.157155990600586,grad_norm: 0.9999998230832775, iteration: 4726
loss: 1.300721287727356,grad_norm: 0.9999997616248001, iteration: 4727
loss: 1.353983759880066,grad_norm: 0.9999997374166907, iteration: 4728
loss: 1.246235966682434,grad_norm: 0.9999998003307339, iteration: 4729
loss: 1.1525286436080933,grad_norm: 0.9999996549258152, iteration: 4730
loss: 1.1496498584747314,grad_norm: 0.9999997555287597, iteration: 4731
loss: 1.2084236145019531,grad_norm: 0.9999997774680683, iteration: 4732
loss: 1.2285583019256592,grad_norm: 0.9999997553312252, iteration: 4733
loss: 1.2413482666015625,grad_norm: 0.9999997640152727, iteration: 4734
loss: 1.3998438119888306,grad_norm: 0.9999997467839142, iteration: 4735
loss: 1.325831651687622,grad_norm: 0.9999998041922025, iteration: 4736
loss: 1.2605513334274292,grad_norm: 0.9999997286322444, iteration: 4737
loss: 1.2147594690322876,grad_norm: 0.9999997305054291, iteration: 4738
loss: 1.2608833312988281,grad_norm: 0.9999996977555352, iteration: 4739
loss: 1.3229793310165405,grad_norm: 0.9999997583586587, iteration: 4740
loss: 1.1540366411209106,grad_norm: 0.9999997603346029, iteration: 4741
loss: 1.4782171249389648,grad_norm: 0.9999997384221706, iteration: 4742
loss: 1.3323768377304077,grad_norm: 0.9999997874709111, iteration: 4743
loss: 1.2485636472702026,grad_norm: 0.9999997805084015, iteration: 4744
loss: 1.2125895023345947,grad_norm: 0.9999997918105139, iteration: 4745
loss: 1.23115074634552,grad_norm: 0.9999998418433905, iteration: 4746
loss: 1.2356549501419067,grad_norm: 0.9999997046271093, iteration: 4747
loss: 1.3316048383712769,grad_norm: 0.9999998243398682, iteration: 4748
loss: 1.3226476907730103,grad_norm: 0.9999998408130918, iteration: 4749
loss: 1.2017264366149902,grad_norm: 0.9999997230548241, iteration: 4750
loss: 1.2173234224319458,grad_norm: 0.9999997780785187, iteration: 4751
loss: 1.1373690366744995,grad_norm: 0.999999760727001, iteration: 4752
loss: 1.2309809923171997,grad_norm: 0.9999998125755717, iteration: 4753
loss: 1.3961334228515625,grad_norm: 0.9999997639782299, iteration: 4754
loss: 1.1441786289215088,grad_norm: 0.9999997651892758, iteration: 4755
loss: 1.2413231134414673,grad_norm: 0.999999815556359, iteration: 4756
loss: 1.3263930082321167,grad_norm: 0.9999998293005524, iteration: 4757
loss: 1.3530131578445435,grad_norm: 0.9999998078927274, iteration: 4758
loss: 1.3127003908157349,grad_norm: 0.9999998434168881, iteration: 4759
loss: 1.209525465965271,grad_norm: 0.9999997072733193, iteration: 4760
loss: 1.1816818714141846,grad_norm: 0.9999996998491133, iteration: 4761
loss: 1.3250837326049805,grad_norm: 0.9999997443043449, iteration: 4762
loss: 1.3415627479553223,grad_norm: 0.9999997858664104, iteration: 4763
loss: 1.152937650680542,grad_norm: 0.9999996550980237, iteration: 4764
loss: 1.1983166933059692,grad_norm: 0.9999997531568534, iteration: 4765
loss: 1.2807397842407227,grad_norm: 0.9999996965349982, iteration: 4766
loss: 1.1733851432800293,grad_norm: 0.9999998570019913, iteration: 4767
loss: 1.225141167640686,grad_norm: 0.9999998012611434, iteration: 4768
loss: 1.3461437225341797,grad_norm: 0.9999998296700339, iteration: 4769
loss: 1.2843034267425537,grad_norm: 0.9999997860626528, iteration: 4770
loss: 1.2611266374588013,grad_norm: 0.99999981621224, iteration: 4771
loss: 1.3230983018875122,grad_norm: 0.9999998300279092, iteration: 4772
loss: 1.236824870109558,grad_norm: 0.9999998098441204, iteration: 4773
loss: 1.3624606132507324,grad_norm: 0.9999997398397958, iteration: 4774
loss: 1.2308934926986694,grad_norm: 0.9999996510955357, iteration: 4775
loss: 1.2562756538391113,grad_norm: 0.9999997667755867, iteration: 4776
loss: 1.3121095895767212,grad_norm: 0.9999998082807199, iteration: 4777
loss: 1.233697772026062,grad_norm: 0.9999997147648331, iteration: 4778
loss: 1.0949108600616455,grad_norm: 0.9999996398915536, iteration: 4779
loss: 1.1938396692276,grad_norm: 0.9999996729793882, iteration: 4780
loss: 1.34276282787323,grad_norm: 0.9999997420088739, iteration: 4781
loss: 1.2550435066223145,grad_norm: 0.9999997549403183, iteration: 4782
loss: 1.228027105331421,grad_norm: 0.9999997441770762, iteration: 4783
loss: 1.2844128608703613,grad_norm: 0.9999996853125614, iteration: 4784
loss: 1.1215379238128662,grad_norm: 0.9999996671847228, iteration: 4785
loss: 1.2345316410064697,grad_norm: 0.9999997216154027, iteration: 4786
loss: 1.3848276138305664,grad_norm: 0.9999998790054019, iteration: 4787
loss: 1.200411081314087,grad_norm: 0.9999998080402321, iteration: 4788
loss: 1.2306418418884277,grad_norm: 0.9999997680128941, iteration: 4789
loss: 1.1709336042404175,grad_norm: 0.9999997737030453, iteration: 4790
loss: 1.1980184316635132,grad_norm: 0.9999997623100358, iteration: 4791
loss: 1.189509391784668,grad_norm: 0.9999997915606709, iteration: 4792
loss: 1.3031635284423828,grad_norm: 0.9999997454230969, iteration: 4793
loss: 1.2237318754196167,grad_norm: 0.9999998143280533, iteration: 4794
loss: 1.0905249118804932,grad_norm: 0.9999997536457764, iteration: 4795
loss: 1.2563953399658203,grad_norm: 0.9999998293565647, iteration: 4796
loss: 1.241593360900879,grad_norm: 0.9999996744483024, iteration: 4797
loss: 1.245895504951477,grad_norm: 0.9999996818181202, iteration: 4798
loss: 1.25154447555542,grad_norm: 0.9999997897532383, iteration: 4799
loss: 1.3048617839813232,grad_norm: 0.9999997963198776, iteration: 4800
loss: 1.2628819942474365,grad_norm: 0.9999998320872037, iteration: 4801
loss: 1.321932315826416,grad_norm: 0.9999997069171008, iteration: 4802
loss: 1.1702628135681152,grad_norm: 0.9999997003726993, iteration: 4803
loss: 1.2279787063598633,grad_norm: 0.9999997030669685, iteration: 4804
loss: 1.2673228979110718,grad_norm: 0.9999998609676749, iteration: 4805
loss: 1.1299065351486206,grad_norm: 0.9999996685909848, iteration: 4806
loss: 1.1389756202697754,grad_norm: 0.9999997135803073, iteration: 4807
loss: 1.1902774572372437,grad_norm: 0.9999997550625992, iteration: 4808
loss: 1.2939724922180176,grad_norm: 0.999999703399891, iteration: 4809
loss: 1.2423079013824463,grad_norm: 0.9999998073548497, iteration: 4810
loss: 1.2665382623672485,grad_norm: 0.9999997412878109, iteration: 4811
loss: 1.2599854469299316,grad_norm: 0.9999996759495735, iteration: 4812
loss: 1.1547313928604126,grad_norm: 0.9999997785706616, iteration: 4813
loss: 1.3860589265823364,grad_norm: 0.9999997615312962, iteration: 4814
loss: 1.2560964822769165,grad_norm: 0.9999997415855365, iteration: 4815
loss: 1.2426974773406982,grad_norm: 0.9999998281506084, iteration: 4816
loss: 1.1570038795471191,grad_norm: 0.9999997841610189, iteration: 4817
loss: 1.2322824001312256,grad_norm: 0.9999997759026715, iteration: 4818
loss: 1.2681622505187988,grad_norm: 0.9999998300179725, iteration: 4819
loss: 1.2883785963058472,grad_norm: 0.999999802820111, iteration: 4820
loss: 1.1415401697158813,grad_norm: 0.9999997012944095, iteration: 4821
loss: 1.2992570400238037,grad_norm: 0.9999998119035097, iteration: 4822
loss: 1.2548688650131226,grad_norm: 0.999999747018059, iteration: 4823
loss: 1.2037811279296875,grad_norm: 0.9999998444579783, iteration: 4824
loss: 1.1929043531417847,grad_norm: 0.9999997954543878, iteration: 4825
loss: 1.157196044921875,grad_norm: 0.9999998041732905, iteration: 4826
loss: 1.2311407327651978,grad_norm: 0.9999996673301207, iteration: 4827
loss: 1.2865409851074219,grad_norm: 0.9999997229318387, iteration: 4828
loss: 1.245871663093567,grad_norm: 0.9999997252262599, iteration: 4829
loss: 1.1921720504760742,grad_norm: 0.9999997131599461, iteration: 4830
loss: 1.2531720399856567,grad_norm: 0.999999692620903, iteration: 4831
loss: 1.1922003030776978,grad_norm: 0.9999997243280826, iteration: 4832
loss: 1.311923861503601,grad_norm: 0.9999997316140812, iteration: 4833
loss: 1.2021576166152954,grad_norm: 0.9999997338036105, iteration: 4834
loss: 1.1243911981582642,grad_norm: 0.9999997234962692, iteration: 4835
loss: 1.2473881244659424,grad_norm: 0.9999998847928694, iteration: 4836
loss: 1.2541005611419678,grad_norm: 0.9999997937683384, iteration: 4837
loss: 1.254826545715332,grad_norm: 0.9999997998706218, iteration: 4838
loss: 1.3418421745300293,grad_norm: 0.9999998775508104, iteration: 4839
loss: 1.0940682888031006,grad_norm: 0.9999996375998647, iteration: 4840
loss: 1.400114893913269,grad_norm: 0.9999997187456351, iteration: 4841
loss: 1.3370182514190674,grad_norm: 0.9999998010914162, iteration: 4842
loss: 1.0909361839294434,grad_norm: 0.9999996758854526, iteration: 4843
loss: 1.2721757888793945,grad_norm: 0.9999998108328991, iteration: 4844
loss: 1.325228214263916,grad_norm: 0.9999997159312104, iteration: 4845
loss: 1.3169710636138916,grad_norm: 0.9999997386468105, iteration: 4846
loss: 1.2579035758972168,grad_norm: 0.9999997554344321, iteration: 4847
loss: 1.1847676038742065,grad_norm: 0.9999996944070435, iteration: 4848
loss: 1.357643723487854,grad_norm: 0.9999997614253393, iteration: 4849
loss: 1.2615926265716553,grad_norm: 0.9999998083353185, iteration: 4850
loss: 1.1950584650039673,grad_norm: 0.9999996771211427, iteration: 4851
loss: 1.168015718460083,grad_norm: 0.9999997261211229, iteration: 4852
loss: 1.224675178527832,grad_norm: 0.9999997262294408, iteration: 4853
loss: 1.2677843570709229,grad_norm: 0.9999997991705494, iteration: 4854
loss: 1.2148921489715576,grad_norm: 0.999999722857916, iteration: 4855
loss: 1.187767505645752,grad_norm: 0.999999756488954, iteration: 4856
loss: 1.2185771465301514,grad_norm: 0.9999997461625543, iteration: 4857
loss: 1.311567783355713,grad_norm: 0.999999716748286, iteration: 4858
loss: 1.2213199138641357,grad_norm: 0.9999997369016629, iteration: 4859
loss: 1.1829867362976074,grad_norm: 0.9999997396372121, iteration: 4860
loss: 1.223745584487915,grad_norm: 0.999999761370674, iteration: 4861
loss: 1.2521289587020874,grad_norm: 0.9999998166919901, iteration: 4862
loss: 1.2831124067306519,grad_norm: 0.999999843679434, iteration: 4863
loss: 1.1534732580184937,grad_norm: 0.9999996288329197, iteration: 4864
loss: 1.2778559923171997,grad_norm: 0.9999998038190119, iteration: 4865
loss: 1.1586016416549683,grad_norm: 0.9999997386932903, iteration: 4866
loss: 1.1995325088500977,grad_norm: 0.9999998038523458, iteration: 4867
loss: 1.1296470165252686,grad_norm: 0.999999723514098, iteration: 4868
loss: 1.3074373006820679,grad_norm: 0.999999806393243, iteration: 4869
loss: 1.2816369533538818,grad_norm: 0.9999997388498703, iteration: 4870
loss: 1.142754077911377,grad_norm: 0.9999997674607187, iteration: 4871
loss: 1.3239985704421997,grad_norm: 0.999999749936465, iteration: 4872
loss: 1.2773188352584839,grad_norm: 0.9999997720922426, iteration: 4873
loss: 1.2074098587036133,grad_norm: 0.9999997579324457, iteration: 4874
loss: 1.1511688232421875,grad_norm: 0.9999997321780019, iteration: 4875
loss: 1.3301565647125244,grad_norm: 0.999999692031749, iteration: 4876
loss: 1.2349635362625122,grad_norm: 0.9999997200585873, iteration: 4877
loss: 1.233899712562561,grad_norm: 0.9999998018184191, iteration: 4878
loss: 1.2026563882827759,grad_norm: 0.9999997792068708, iteration: 4879
loss: 1.0134649276733398,grad_norm: 0.9999997372001171, iteration: 4880
loss: 1.221632719039917,grad_norm: 0.9999997651195146, iteration: 4881
loss: 1.172486424446106,grad_norm: 0.9999997311149476, iteration: 4882
loss: 1.272700548171997,grad_norm: 0.999999790196182, iteration: 4883
loss: 1.3046276569366455,grad_norm: 0.9999998136426342, iteration: 4884
loss: 1.2324697971343994,grad_norm: 0.9999997489983504, iteration: 4885
loss: 1.2356637716293335,grad_norm: 0.9999997734826622, iteration: 4886
loss: 1.212113857269287,grad_norm: 0.99999981129388, iteration: 4887
loss: 1.1819947957992554,grad_norm: 0.9999997866019524, iteration: 4888
loss: 1.199704647064209,grad_norm: 0.9999997944865281, iteration: 4889
loss: 1.2736700773239136,grad_norm: 0.9999997576572691, iteration: 4890
loss: 1.2417691946029663,grad_norm: 0.9999997102574535, iteration: 4891
loss: 1.2111759185791016,grad_norm: 0.9999997310240281, iteration: 4892
loss: 1.322285532951355,grad_norm: 0.9999997687309111, iteration: 4893
loss: 1.149381399154663,grad_norm: 0.9999998495971908, iteration: 4894
loss: 1.2317283153533936,grad_norm: 0.999999728366662, iteration: 4895
loss: 1.2068188190460205,grad_norm: 0.9999997560919662, iteration: 4896
loss: 1.3797634840011597,grad_norm: 0.9999997605868537, iteration: 4897
loss: 1.1650547981262207,grad_norm: 0.9999997814822341, iteration: 4898
loss: 1.2487009763717651,grad_norm: 0.9999998667204261, iteration: 4899
loss: 1.21514093875885,grad_norm: 0.9999997556636794, iteration: 4900
loss: 1.184121012687683,grad_norm: 0.9999997401753957, iteration: 4901
loss: 1.165610671043396,grad_norm: 0.9999997624746789, iteration: 4902
loss: 1.2725080251693726,grad_norm: 0.9999997927017519, iteration: 4903
loss: 1.1990514993667603,grad_norm: 0.9999998015563067, iteration: 4904
loss: 1.3190232515335083,grad_norm: 0.9999997259131467, iteration: 4905
loss: 1.3114275932312012,grad_norm: 0.999999781311665, iteration: 4906
loss: 1.27384614944458,grad_norm: 0.9999997820166615, iteration: 4907
loss: 1.2835086584091187,grad_norm: 0.9999998324510712, iteration: 4908
loss: 1.329674243927002,grad_norm: 0.9999998048139428, iteration: 4909
loss: 1.252111792564392,grad_norm: 0.9999997406315466, iteration: 4910
loss: 1.1909171342849731,grad_norm: 0.9999997299227968, iteration: 4911
loss: 1.2629796266555786,grad_norm: 0.9999997924790885, iteration: 4912
loss: 1.196082353591919,grad_norm: 0.9999997568362774, iteration: 4913
loss: 1.3145703077316284,grad_norm: 0.9999998416211962, iteration: 4914
loss: 1.2890348434448242,grad_norm: 0.9999997115976577, iteration: 4915
loss: 1.2150801420211792,grad_norm: 0.9999997362876437, iteration: 4916
loss: 1.222795844078064,grad_norm: 0.9999996912679566, iteration: 4917
loss: 1.3129959106445312,grad_norm: 0.9999997796179089, iteration: 4918
loss: 1.268587350845337,grad_norm: 0.9999998272054775, iteration: 4919
loss: 1.235795021057129,grad_norm: 0.9999996777113092, iteration: 4920
loss: 1.1648575067520142,grad_norm: 0.9999997312419726, iteration: 4921
loss: 1.2494772672653198,grad_norm: 0.9999997813970556, iteration: 4922
loss: 1.2177246809005737,grad_norm: 0.9999996860294292, iteration: 4923
loss: 1.326474905014038,grad_norm: 0.9999997275177446, iteration: 4924
loss: 1.173616647720337,grad_norm: 0.9999996433472373, iteration: 4925
loss: 1.3162920475006104,grad_norm: 0.9999998342451691, iteration: 4926
loss: 1.1347873210906982,grad_norm: 0.9999997396401928, iteration: 4927
loss: 1.2236111164093018,grad_norm: 0.9999998152274476, iteration: 4928
loss: 1.3830245733261108,grad_norm: 0.9999997470557082, iteration: 4929
loss: 1.2146276235580444,grad_norm: 0.9999997332584273, iteration: 4930
loss: 1.103844404220581,grad_norm: 0.9999996729491898, iteration: 4931
loss: 1.2197446823120117,grad_norm: 0.999999806155681, iteration: 4932
loss: 1.315035104751587,grad_norm: 0.9999997720779357, iteration: 4933
loss: 1.2652195692062378,grad_norm: 0.9999998064649805, iteration: 4934
loss: 1.2206708192825317,grad_norm: 0.9999997177615689, iteration: 4935
loss: 1.1509567499160767,grad_norm: 0.9999997389992273, iteration: 4936
loss: 1.2715483903884888,grad_norm: 0.9999996847250374, iteration: 4937
loss: 1.2065935134887695,grad_norm: 0.9999997094865997, iteration: 4938
loss: 1.2136759757995605,grad_norm: 0.9999998055286672, iteration: 4939
loss: 1.3156650066375732,grad_norm: 0.9999998285302709, iteration: 4940
loss: 1.2509335279464722,grad_norm: 0.9999998440514766, iteration: 4941
loss: 1.2591496706008911,grad_norm: 0.9999997752742825, iteration: 4942
loss: 1.163741946220398,grad_norm: 0.9999997685786227, iteration: 4943
loss: 1.0918433666229248,grad_norm: 0.9999997114963265, iteration: 4944
loss: 1.2663580179214478,grad_norm: 0.9999997547873976, iteration: 4945
loss: 1.1488702297210693,grad_norm: 0.9999997110748566, iteration: 4946
loss: 1.1240317821502686,grad_norm: 0.9999997129389846, iteration: 4947
loss: 1.1833256483078003,grad_norm: 0.9999997986667886, iteration: 4948
loss: 1.226154088973999,grad_norm: 0.9999998935604183, iteration: 4949
loss: 1.211400032043457,grad_norm: 0.9999997585816344, iteration: 4950
loss: 1.146196722984314,grad_norm: 0.9999996676043389, iteration: 4951
loss: 1.1322617530822754,grad_norm: 0.9999997382804602, iteration: 4952
loss: 1.3172475099563599,grad_norm: 0.9999997772429964, iteration: 4953
loss: 1.2864452600479126,grad_norm: 0.9999997863613306, iteration: 4954
loss: 1.1740624904632568,grad_norm: 0.9999997836587704, iteration: 4955
loss: 1.2253551483154297,grad_norm: 0.999999690593576, iteration: 4956
loss: 1.2043256759643555,grad_norm: 0.9999996707660637, iteration: 4957
loss: 1.1738886833190918,grad_norm: 0.9999997246984563, iteration: 4958
loss: 1.1973615884780884,grad_norm: 0.9999996900191407, iteration: 4959
loss: 1.2236437797546387,grad_norm: 0.9999997964295487, iteration: 4960
loss: 1.2034140825271606,grad_norm: 0.9999997506757701, iteration: 4961
loss: 1.213738203048706,grad_norm: 0.9999997530217352, iteration: 4962
loss: 1.2999931573867798,grad_norm: 0.999999850521978, iteration: 4963
loss: 1.2405790090560913,grad_norm: 0.9999998241109602, iteration: 4964
loss: 1.14442777633667,grad_norm: 0.9999997419417365, iteration: 4965
loss: 1.271385669708252,grad_norm: 0.9999997727494631, iteration: 4966
loss: 1.1858818531036377,grad_norm: 0.9999998298135362, iteration: 4967
loss: 1.1921354532241821,grad_norm: 0.9999998343946329, iteration: 4968
loss: 1.1953868865966797,grad_norm: 0.9999997619479432, iteration: 4969
loss: 1.1698797941207886,grad_norm: 0.9999997006026746, iteration: 4970
loss: 1.3151448965072632,grad_norm: 0.9999997168969879, iteration: 4971
loss: 1.1861976385116577,grad_norm: 0.9999996902769523, iteration: 4972
loss: 1.145587682723999,grad_norm: 0.9999997257957828, iteration: 4973
loss: 1.2524346113204956,grad_norm: 0.9999997473559644, iteration: 4974
loss: 1.156304121017456,grad_norm: 0.9999997831982342, iteration: 4975
loss: 1.3196359872817993,grad_norm: 0.9999997416742751, iteration: 4976
loss: 1.2607595920562744,grad_norm: 0.9999997543032768, iteration: 4977
loss: 1.2142266035079956,grad_norm: 0.9999997384098143, iteration: 4978
loss: 1.169185757637024,grad_norm: 0.9999997070285649, iteration: 4979
loss: 1.224872350692749,grad_norm: 0.9999997369161601, iteration: 4980
loss: 1.2430357933044434,grad_norm: 0.9999997217411842, iteration: 4981
loss: 1.1612108945846558,grad_norm: 0.9999996924794651, iteration: 4982
loss: 1.1659908294677734,grad_norm: 0.9999998291202745, iteration: 4983
loss: 1.3610470294952393,grad_norm: 0.9999997536873926, iteration: 4984
loss: 1.2088384628295898,grad_norm: 0.9999998180488159, iteration: 4985
loss: 1.1411200761795044,grad_norm: 0.9999997365394176, iteration: 4986
loss: 1.2743749618530273,grad_norm: 0.9999997593131599, iteration: 4987
loss: 1.2329717874526978,grad_norm: 0.9999998595233878, iteration: 4988
loss: 1.3300197124481201,grad_norm: 0.9999997190936266, iteration: 4989
loss: 1.1772432327270508,grad_norm: 0.9999997971747574, iteration: 4990
loss: 1.1917496919631958,grad_norm: 0.9999998157644195, iteration: 4991
loss: 1.2754075527191162,grad_norm: 0.9999997746512841, iteration: 4992
loss: 1.2600537538528442,grad_norm: 0.9999998389309621, iteration: 4993
loss: 1.0985491275787354,grad_norm: 0.9999997394750456, iteration: 4994
loss: 1.2140296697616577,grad_norm: 0.999999746445287, iteration: 4995
loss: 1.284123182296753,grad_norm: 0.9999998627223685, iteration: 4996
loss: 1.3016183376312256,grad_norm: 0.9999997169851, iteration: 4997
loss: 1.133079171180725,grad_norm: 0.9999996199653819, iteration: 4998
loss: 1.1731568574905396,grad_norm: 0.9999996588606656, iteration: 4999
loss: 1.2813644409179688,grad_norm: 0.9999998402917009, iteration: 5000
loss: 1.15453040599823,grad_norm: 0.9999996493479552, iteration: 5001
loss: 1.205214023590088,grad_norm: 0.9999996864015509, iteration: 5002
loss: 1.2151740789413452,grad_norm: 0.9999996948496516, iteration: 5003
loss: 1.0759086608886719,grad_norm: 0.9999996110369694, iteration: 5004
loss: 1.193468451499939,grad_norm: 0.9999997203752065, iteration: 5005
loss: 1.2138147354125977,grad_norm: 0.9999997076968655, iteration: 5006
loss: 1.175674319267273,grad_norm: 0.9999996959738224, iteration: 5007
loss: 1.2792359590530396,grad_norm: 0.9999996937755634, iteration: 5008
loss: 1.2515888214111328,grad_norm: 0.99999974937713, iteration: 5009
loss: 1.2212855815887451,grad_norm: 0.9999997742076417, iteration: 5010
loss: 1.2632068395614624,grad_norm: 0.9999997029731976, iteration: 5011
loss: 1.2540258169174194,grad_norm: 0.9999997052578635, iteration: 5012
loss: 1.2396501302719116,grad_norm: 0.9999997551889479, iteration: 5013
loss: 1.2285871505737305,grad_norm: 0.9999996655981694, iteration: 5014
loss: 1.2741762399673462,grad_norm: 0.9999997123431188, iteration: 5015
loss: 1.2488514184951782,grad_norm: 0.9999997879654994, iteration: 5016
loss: 1.2017141580581665,grad_norm: 0.9999997827458164, iteration: 5017
loss: 1.1987903118133545,grad_norm: 0.9999997628649897, iteration: 5018
loss: 1.2519811391830444,grad_norm: 0.9999997425187657, iteration: 5019
loss: 1.1943923234939575,grad_norm: 0.9999996757978645, iteration: 5020
loss: 1.1488648653030396,grad_norm: 0.9999997019926864, iteration: 5021
loss: 1.1774951219558716,grad_norm: 0.9999997024421418, iteration: 5022
loss: 1.2609326839447021,grad_norm: 0.9999997958536975, iteration: 5023
loss: 1.2056529521942139,grad_norm: 0.9999996478510497, iteration: 5024
loss: 1.2046083211898804,grad_norm: 0.9999997152631843, iteration: 5025
loss: 1.1151174306869507,grad_norm: 0.9999998341119114, iteration: 5026
loss: 1.3439699411392212,grad_norm: 0.9999998703384804, iteration: 5027
loss: 1.1738271713256836,grad_norm: 0.9999997411081097, iteration: 5028
loss: 1.1480190753936768,grad_norm: 0.9999997180634816, iteration: 5029
loss: 1.1211963891983032,grad_norm: 0.9999996531636022, iteration: 5030
loss: 1.2101432085037231,grad_norm: 0.9999998045367441, iteration: 5031
loss: 1.282448649406433,grad_norm: 0.9999997412058781, iteration: 5032
loss: 1.250579833984375,grad_norm: 0.9999996991167521, iteration: 5033
loss: 1.1277886629104614,grad_norm: 0.9999997300693502, iteration: 5034
loss: 1.2832642793655396,grad_norm: 0.9999998319306331, iteration: 5035
loss: 1.2930762767791748,grad_norm: 0.999999763881889, iteration: 5036
loss: 1.2964719533920288,grad_norm: 0.9999997869523982, iteration: 5037
loss: 1.2079442739486694,grad_norm: 0.9999998055018801, iteration: 5038
loss: 1.1329089403152466,grad_norm: 0.9999997221893011, iteration: 5039
loss: 1.225013256072998,grad_norm: 0.9999996799597236, iteration: 5040
loss: 1.1897305250167847,grad_norm: 0.9999997127634946, iteration: 5041
loss: 1.3004616498947144,grad_norm: 0.9999998643619089, iteration: 5042
loss: 1.2897539138793945,grad_norm: 0.9999996649633018, iteration: 5043
loss: 1.2300212383270264,grad_norm: 0.99999964670441, iteration: 5044
loss: 1.2936848402023315,grad_norm: 0.9999998506985303, iteration: 5045
loss: 1.1569695472717285,grad_norm: 0.999999720081222, iteration: 5046
loss: 1.2070419788360596,grad_norm: 0.9999997284017187, iteration: 5047
loss: 1.2489162683486938,grad_norm: 0.99999979574917, iteration: 5048
loss: 1.1411691904067993,grad_norm: 0.999999706125249, iteration: 5049
loss: 1.225649356842041,grad_norm: 0.9999997703160597, iteration: 5050
loss: 1.2759133577346802,grad_norm: 0.9999997315622285, iteration: 5051
loss: 1.2247276306152344,grad_norm: 0.9999998151770225, iteration: 5052
loss: 1.18495512008667,grad_norm: 0.9999996990554836, iteration: 5053
loss: 1.1810253858566284,grad_norm: 0.9999997247686667, iteration: 5054
loss: 1.226641297340393,grad_norm: 0.9999998058858091, iteration: 5055
loss: 1.2027336359024048,grad_norm: 0.9999996516658358, iteration: 5056
loss: 1.1841166019439697,grad_norm: 0.9999997832034544, iteration: 5057
loss: 1.1173317432403564,grad_norm: 0.9999997610972295, iteration: 5058
loss: 1.2068957090377808,grad_norm: 0.999999873278327, iteration: 5059
loss: 1.3307883739471436,grad_norm: 0.9999998992801784, iteration: 5060
loss: 1.1904112100601196,grad_norm: 0.9999998019050983, iteration: 5061
loss: 1.2799984216690063,grad_norm: 0.999999774284981, iteration: 5062
loss: 1.2728334665298462,grad_norm: 0.9999997330346574, iteration: 5063
loss: 1.2369221448898315,grad_norm: 0.9999998036203026, iteration: 5064
loss: 1.146590232849121,grad_norm: 0.9999996694066414, iteration: 5065
loss: 1.1416637897491455,grad_norm: 0.9999996378959604, iteration: 5066
loss: 1.245537281036377,grad_norm: 0.9999996906241876, iteration: 5067
loss: 1.2265596389770508,grad_norm: 0.9999997781050488, iteration: 5068
loss: 1.1636484861373901,grad_norm: 0.9999997352959463, iteration: 5069
loss: 1.2102051973342896,grad_norm: 0.9999997534198884, iteration: 5070
loss: 1.2047338485717773,grad_norm: 0.9999997548240956, iteration: 5071
loss: 1.1715209484100342,grad_norm: 0.9999998906757561, iteration: 5072
loss: 1.2941099405288696,grad_norm: 0.9999997821450591, iteration: 5073
loss: 1.1917147636413574,grad_norm: 0.9999997661623339, iteration: 5074
loss: 1.1552181243896484,grad_norm: 0.9999997492511936, iteration: 5075
loss: 1.133934736251831,grad_norm: 0.9999997399643903, iteration: 5076
loss: 1.1849666833877563,grad_norm: 0.99999977406279, iteration: 5077
loss: 1.2505817413330078,grad_norm: 0.9999998257064334, iteration: 5078
loss: 1.357345461845398,grad_norm: 0.9999997953069537, iteration: 5079
loss: 1.173842191696167,grad_norm: 0.9999997826252387, iteration: 5080
loss: 1.350038766860962,grad_norm: 0.99999984679544, iteration: 5081
loss: 1.2888104915618896,grad_norm: 0.9999997485436113, iteration: 5082
loss: 1.2992987632751465,grad_norm: 0.9999997101250075, iteration: 5083
loss: 1.3121905326843262,grad_norm: 0.9999997102298948, iteration: 5084
loss: 1.1516038179397583,grad_norm: 0.9999997222813992, iteration: 5085
loss: 1.2408043146133423,grad_norm: 0.9999996485890973, iteration: 5086
loss: 1.1842682361602783,grad_norm: 0.999999761963269, iteration: 5087
loss: 1.1596735715866089,grad_norm: 0.9999998026970699, iteration: 5088
loss: 1.1765773296356201,grad_norm: 0.9999997953153508, iteration: 5089
loss: 1.2661397457122803,grad_norm: 0.9999997218904368, iteration: 5090
loss: 1.2598921060562134,grad_norm: 0.9999997419954879, iteration: 5091
loss: 1.1985018253326416,grad_norm: 0.9999996695256309, iteration: 5092
loss: 1.1815146207809448,grad_norm: 0.9999997056133255, iteration: 5093
loss: 1.2434298992156982,grad_norm: 0.9999998235170556, iteration: 5094
loss: 1.2758756875991821,grad_norm: 0.9999997587047872, iteration: 5095
loss: 1.1614235639572144,grad_norm: 0.9999997743889741, iteration: 5096
loss: 1.2293349504470825,grad_norm: 0.9999997952597635, iteration: 5097
loss: 1.2220125198364258,grad_norm: 0.9999997749030973, iteration: 5098
loss: 1.3231854438781738,grad_norm: 0.9999996389913444, iteration: 5099
loss: 1.1371221542358398,grad_norm: 0.9999997271681074, iteration: 5100
loss: 1.1493092775344849,grad_norm: 0.9999997563387102, iteration: 5101
loss: 1.2649396657943726,grad_norm: 0.999999720194491, iteration: 5102
loss: 1.1517966985702515,grad_norm: 0.9999997515928651, iteration: 5103
loss: 1.260478138923645,grad_norm: 0.9999997895094008, iteration: 5104
loss: 1.1311334371566772,grad_norm: 0.9999997720287993, iteration: 5105
loss: 1.1018133163452148,grad_norm: 0.9999997027226043, iteration: 5106
loss: 1.1806182861328125,grad_norm: 0.9999997708475967, iteration: 5107
loss: 1.18941068649292,grad_norm: 0.9999996923124552, iteration: 5108
loss: 1.2077845335006714,grad_norm: 0.999999755049761, iteration: 5109
loss: 1.2148717641830444,grad_norm: 0.999999764780885, iteration: 5110
loss: 1.220927119255066,grad_norm: 0.9999997433032486, iteration: 5111
loss: 1.285011887550354,grad_norm: 0.9999998445804317, iteration: 5112
loss: 1.1739553213119507,grad_norm: 0.9999996531709033, iteration: 5113
loss: 1.180572271347046,grad_norm: 0.9999997430352171, iteration: 5114
loss: 1.1367415189743042,grad_norm: 0.9999997892799621, iteration: 5115
loss: 1.1256510019302368,grad_norm: 0.999999703854706, iteration: 5116
loss: 1.2093616724014282,grad_norm: 0.9999997620759391, iteration: 5117
loss: 1.1563587188720703,grad_norm: 0.9999996968893321, iteration: 5118
loss: 1.195548415184021,grad_norm: 0.9999998069261388, iteration: 5119
loss: 1.324352502822876,grad_norm: 0.9999997878724709, iteration: 5120
loss: 1.2962238788604736,grad_norm: 0.9999998282343836, iteration: 5121
loss: 1.1803257465362549,grad_norm: 0.9999997234616633, iteration: 5122
loss: 1.1162604093551636,grad_norm: 0.9999997035382286, iteration: 5123
loss: 1.036713719367981,grad_norm: 0.9999995620548322, iteration: 5124
loss: 1.251859426498413,grad_norm: 0.9999998002473152, iteration: 5125
loss: 1.1820704936981201,grad_norm: 0.9999997181435556, iteration: 5126
loss: 1.2603105306625366,grad_norm: 0.9999997451104358, iteration: 5127
loss: 1.1933152675628662,grad_norm: 0.9999997182708379, iteration: 5128
loss: 1.1360894441604614,grad_norm: 0.9999998030707433, iteration: 5129
loss: 1.2074570655822754,grad_norm: 0.9999997360721945, iteration: 5130
loss: 1.195800542831421,grad_norm: 0.9999998481685566, iteration: 5131
loss: 1.2143805027008057,grad_norm: 0.9999997555620596, iteration: 5132
loss: 1.3514446020126343,grad_norm: 0.9999997888898091, iteration: 5133
loss: 1.2141618728637695,grad_norm: 0.9999996909477663, iteration: 5134
loss: 1.1499792337417603,grad_norm: 0.9999996982702714, iteration: 5135
loss: 1.2400920391082764,grad_norm: 0.9999997583186957, iteration: 5136
loss: 1.2780954837799072,grad_norm: 0.9999997785472908, iteration: 5137
loss: 1.1859325170516968,grad_norm: 0.9999997246076092, iteration: 5138
loss: 1.2512787580490112,grad_norm: 0.9999996909778772, iteration: 5139
loss: 1.2454899549484253,grad_norm: 0.9999997710351578, iteration: 5140
loss: 1.1978248357772827,grad_norm: 0.999999718663894, iteration: 5141
loss: 1.231519103050232,grad_norm: 0.9999997410987677, iteration: 5142
loss: 1.079979658126831,grad_norm: 0.9999996230824023, iteration: 5143
loss: 1.1785995960235596,grad_norm: 0.9999997421045278, iteration: 5144
loss: 1.154982089996338,grad_norm: 0.999999729704605, iteration: 5145
loss: 1.2318534851074219,grad_norm: 0.9999998095305261, iteration: 5146
loss: 1.2504099607467651,grad_norm: 0.9999997377356445, iteration: 5147
loss: 1.163641095161438,grad_norm: 0.9999996684476483, iteration: 5148
loss: 1.2625421285629272,grad_norm: 0.9999996066026383, iteration: 5149
loss: 1.164475440979004,grad_norm: 0.999999755465663, iteration: 5150
loss: 1.2139244079589844,grad_norm: 0.9999998302689086, iteration: 5151
loss: 1.2632938623428345,grad_norm: 0.9999996723064448, iteration: 5152
loss: 1.1497710943222046,grad_norm: 0.9999997197658412, iteration: 5153
loss: 1.1108036041259766,grad_norm: 0.9999997139194167, iteration: 5154
loss: 1.1606568098068237,grad_norm: 0.9999998457046785, iteration: 5155
loss: 1.1692055463790894,grad_norm: 0.9999997019051902, iteration: 5156
loss: 1.2588474750518799,grad_norm: 0.9999998467827113, iteration: 5157
loss: 1.3088548183441162,grad_norm: 0.9999997011510277, iteration: 5158
loss: 1.1106706857681274,grad_norm: 0.9999996388483524, iteration: 5159
loss: 1.1164500713348389,grad_norm: 0.9999996635269207, iteration: 5160
loss: 1.2701961994171143,grad_norm: 0.9999997788772697, iteration: 5161
loss: 1.204837441444397,grad_norm: 0.9999996372052972, iteration: 5162
loss: 1.2441720962524414,grad_norm: 0.9999998704398186, iteration: 5163
loss: 1.293330430984497,grad_norm: 0.9999997524808626, iteration: 5164
loss: 1.2275925874710083,grad_norm: 0.9999996772246995, iteration: 5165
loss: 1.208357810974121,grad_norm: 0.9999997813363166, iteration: 5166
loss: 1.0645445585250854,grad_norm: 0.9999996742340667, iteration: 5167
loss: 1.0891906023025513,grad_norm: 0.9999997073869566, iteration: 5168
loss: 1.2630301713943481,grad_norm: 0.9999996921298008, iteration: 5169
loss: 1.2082687616348267,grad_norm: 0.9999996539919517, iteration: 5170
loss: 1.1837512254714966,grad_norm: 0.9999996744760377, iteration: 5171
loss: 1.2145439386367798,grad_norm: 0.9999997522074917, iteration: 5172
loss: 1.2696287631988525,grad_norm: 0.999999816822554, iteration: 5173
loss: 1.140364646911621,grad_norm: 0.9999997134412961, iteration: 5174
loss: 1.1848353147506714,grad_norm: 0.999999660597788, iteration: 5175
loss: 1.1330904960632324,grad_norm: 0.9999996510299363, iteration: 5176
loss: 1.1154594421386719,grad_norm: 0.9999997891116619, iteration: 5177
loss: 1.2356040477752686,grad_norm: 0.9999998253542374, iteration: 5178
loss: 1.218812346458435,grad_norm: 0.9999997840954293, iteration: 5179
loss: 1.2141493558883667,grad_norm: 0.9999997135351044, iteration: 5180
loss: 1.1648679971694946,grad_norm: 0.9999997119246471, iteration: 5181
loss: 1.1622660160064697,grad_norm: 0.999999816956721, iteration: 5182
loss: 1.2035009860992432,grad_norm: 0.9999997381737995, iteration: 5183
loss: 1.14229416847229,grad_norm: 0.99999973413829, iteration: 5184
loss: 1.1830447912216187,grad_norm: 0.9999997675480142, iteration: 5185
loss: 1.2403360605239868,grad_norm: 0.9999997143169641, iteration: 5186
loss: 1.156506061553955,grad_norm: 0.9999998443506709, iteration: 5187
loss: 1.187313437461853,grad_norm: 0.999999737118547, iteration: 5188
loss: 1.1883972883224487,grad_norm: 0.9999996218887364, iteration: 5189
loss: 1.1702371835708618,grad_norm: 0.9999997126932249, iteration: 5190
loss: 1.2278743982315063,grad_norm: 0.9999997522088468, iteration: 5191
loss: 1.1449933052062988,grad_norm: 0.9999997286466947, iteration: 5192
loss: 1.201177716255188,grad_norm: 0.9999997326529053, iteration: 5193
loss: 1.1909738779067993,grad_norm: 0.9999997251563866, iteration: 5194
loss: 1.2112144231796265,grad_norm: 0.9999997402497067, iteration: 5195
loss: 1.215865135192871,grad_norm: 0.9999997756752955, iteration: 5196
loss: 1.226883053779602,grad_norm: 0.9999997826946808, iteration: 5197
loss: 1.1437443494796753,grad_norm: 0.9999997747122348, iteration: 5198
loss: 1.1093415021896362,grad_norm: 0.9999997236519725, iteration: 5199
loss: 1.0938717126846313,grad_norm: 0.9999996992059473, iteration: 5200
loss: 1.1471160650253296,grad_norm: 0.9999996150788691, iteration: 5201
loss: 1.1391681432724,grad_norm: 0.9999996510417145, iteration: 5202
loss: 1.0856072902679443,grad_norm: 0.9999996449578508, iteration: 5203
loss: 1.2098928689956665,grad_norm: 0.9999997038041033, iteration: 5204
loss: 1.2402204275131226,grad_norm: 0.99999988131748, iteration: 5205
loss: 1.1453733444213867,grad_norm: 0.9999996574973944, iteration: 5206
loss: 1.2210966348648071,grad_norm: 0.999999734230283, iteration: 5207
loss: 1.1241451501846313,grad_norm: 0.9999997475516876, iteration: 5208
loss: 1.0945361852645874,grad_norm: 0.9999997478957011, iteration: 5209
loss: 1.2078956365585327,grad_norm: 0.9999998417806583, iteration: 5210
loss: 1.1341228485107422,grad_norm: 0.9999997135106804, iteration: 5211
loss: 1.2652409076690674,grad_norm: 0.9999998439807642, iteration: 5212
loss: 1.2540055513381958,grad_norm: 0.9999997818840745, iteration: 5213
loss: 1.178135633468628,grad_norm: 0.9999997753168113, iteration: 5214
loss: 1.1385786533355713,grad_norm: 0.9999997171947085, iteration: 5215
loss: 1.1724913120269775,grad_norm: 0.9999997364326536, iteration: 5216
loss: 1.263913631439209,grad_norm: 0.9999996900456554, iteration: 5217
loss: 1.2122745513916016,grad_norm: 0.9999996962528762, iteration: 5218
loss: 1.1389923095703125,grad_norm: 0.9999998200378212, iteration: 5219
loss: 1.083372950553894,grad_norm: 0.9999995592345942, iteration: 5220
loss: 1.2363018989562988,grad_norm: 0.9999996977044369, iteration: 5221
loss: 1.275840401649475,grad_norm: 0.9999998659690973, iteration: 5222
loss: 1.2156333923339844,grad_norm: 0.9999998034778044, iteration: 5223
loss: 1.225347876548767,grad_norm: 0.9999998398242965, iteration: 5224
loss: 1.115797758102417,grad_norm: 0.9999997181442122, iteration: 5225
loss: 1.1612420082092285,grad_norm: 0.9999996847804514, iteration: 5226
loss: 1.1226614713668823,grad_norm: 0.9999997070574821, iteration: 5227
loss: 1.1117137670516968,grad_norm: 0.999999751293372, iteration: 5228
loss: 1.0577691793441772,grad_norm: 0.9999996292835078, iteration: 5229
loss: 1.3122657537460327,grad_norm: 0.9999997939044957, iteration: 5230
loss: 1.2460683584213257,grad_norm: 0.9999998026088222, iteration: 5231
loss: 1.1461520195007324,grad_norm: 0.9999997366780576, iteration: 5232
loss: 1.2632583379745483,grad_norm: 0.9999997814310586, iteration: 5233
loss: 1.1694848537445068,grad_norm: 0.9999996705592115, iteration: 5234
loss: 1.2336905002593994,grad_norm: 0.9999997872887608, iteration: 5235
loss: 1.2600163221359253,grad_norm: 0.9999998223962424, iteration: 5236
loss: 1.2026524543762207,grad_norm: 0.9999996762327045, iteration: 5237
loss: 1.0513418912887573,grad_norm: 0.9999997568050727, iteration: 5238
loss: 1.207507848739624,grad_norm: 0.9999998117139514, iteration: 5239
loss: 1.1386008262634277,grad_norm: 0.9999997440210168, iteration: 5240
loss: 1.1091892719268799,grad_norm: 0.999999689008053, iteration: 5241
loss: 1.1510939598083496,grad_norm: 0.9999997664025091, iteration: 5242
loss: 1.2518213987350464,grad_norm: 0.9999997704484886, iteration: 5243
loss: 1.2840296030044556,grad_norm: 0.9999997407104639, iteration: 5244
loss: 1.1658176183700562,grad_norm: 0.9999997689576311, iteration: 5245
loss: 1.1016886234283447,grad_norm: 0.9999996170657277, iteration: 5246
loss: 1.1324211359024048,grad_norm: 0.9999997586296078, iteration: 5247
loss: 1.1220030784606934,grad_norm: 0.9999998314418567, iteration: 5248
loss: 1.0957380533218384,grad_norm: 0.9999997565084893, iteration: 5249
loss: 1.1986361742019653,grad_norm: 0.9999998856760092, iteration: 5250
loss: 1.1935169696807861,grad_norm: 0.9999997406892552, iteration: 5251
loss: 1.2501866817474365,grad_norm: 0.9999997325170363, iteration: 5252
loss: 1.2501237392425537,grad_norm: 0.9999997829486671, iteration: 5253
loss: 1.091218113899231,grad_norm: 0.9999997056106406, iteration: 5254
loss: 1.1864898204803467,grad_norm: 0.9999997443518956, iteration: 5255
loss: 1.1667125225067139,grad_norm: 0.9999996878159608, iteration: 5256
loss: 1.2525328397750854,grad_norm: 0.9999998867714965, iteration: 5257
loss: 1.2244559526443481,grad_norm: 0.9999997275230771, iteration: 5258
loss: 1.2465453147888184,grad_norm: 0.9999997845836553, iteration: 5259
loss: 1.1172723770141602,grad_norm: 0.9999997048520307, iteration: 5260
loss: 1.2853987216949463,grad_norm: 0.999999747369746, iteration: 5261
loss: 1.1772607564926147,grad_norm: 0.9999996891125879, iteration: 5262
loss: 1.1393018960952759,grad_norm: 0.9999997538895221, iteration: 5263
loss: 1.1146427392959595,grad_norm: 0.9999997552709524, iteration: 5264
loss: 1.1069384813308716,grad_norm: 0.9999996963549383, iteration: 5265
loss: 1.1878454685211182,grad_norm: 0.9999997185567334, iteration: 5266
loss: 1.169114589691162,grad_norm: 0.999999744811063, iteration: 5267
loss: 1.1397969722747803,grad_norm: 0.9999996910158983, iteration: 5268
loss: 1.1971477270126343,grad_norm: 0.9999997382144437, iteration: 5269
loss: 1.2308897972106934,grad_norm: 0.999999784777104, iteration: 5270
loss: 1.30892813205719,grad_norm: 0.9999998609288736, iteration: 5271
loss: 1.1837984323501587,grad_norm: 0.9999996670968138, iteration: 5272
loss: 1.1494137048721313,grad_norm: 0.999999730063486, iteration: 5273
loss: 1.1128737926483154,grad_norm: 0.9999996985945577, iteration: 5274
loss: 1.0714001655578613,grad_norm: 0.9999997426607088, iteration: 5275
loss: 1.226974368095398,grad_norm: 0.9999997544995276, iteration: 5276
loss: 1.2452555894851685,grad_norm: 0.9999998045610805, iteration: 5277
loss: 1.2119641304016113,grad_norm: 0.9999997835293314, iteration: 5278
loss: 1.2459144592285156,grad_norm: 0.9999998035078701, iteration: 5279
loss: 1.2172744274139404,grad_norm: 0.9999997542379386, iteration: 5280
loss: 1.1624990701675415,grad_norm: 0.9999998322392403, iteration: 5281
loss: 1.2056701183319092,grad_norm: 0.9999997157894168, iteration: 5282
loss: 1.1168427467346191,grad_norm: 0.9999996999990028, iteration: 5283
loss: 1.1054198741912842,grad_norm: 0.999999706206627, iteration: 5284
loss: 1.2689354419708252,grad_norm: 0.999999736096055, iteration: 5285
loss: 1.1620479822158813,grad_norm: 0.9999997018565406, iteration: 5286
loss: 1.0985186100006104,grad_norm: 0.9999996094750483, iteration: 5287
loss: 1.1677912473678589,grad_norm: 0.9999997012915932, iteration: 5288
loss: 1.3741061687469482,grad_norm: 0.999999806538672, iteration: 5289
loss: 1.25739324092865,grad_norm: 0.9999997462237883, iteration: 5290
loss: 1.063269019126892,grad_norm: 0.9999996897792205, iteration: 5291
loss: 1.2105759382247925,grad_norm: 0.9999997562885936, iteration: 5292
loss: 1.0784813165664673,grad_norm: 0.9999996117901494, iteration: 5293
loss: 1.1634820699691772,grad_norm: 0.9999998251808325, iteration: 5294
loss: 1.2280007600784302,grad_norm: 0.9999997501388213, iteration: 5295
loss: 1.1852914094924927,grad_norm: 0.9999997774100529, iteration: 5296
loss: 1.2164090871810913,grad_norm: 0.9999997365984764, iteration: 5297
loss: 1.174451231956482,grad_norm: 0.9999997811404548, iteration: 5298
loss: 1.2331851720809937,grad_norm: 0.9999997602490907, iteration: 5299
loss: 1.1544681787490845,grad_norm: 0.9999998300632352, iteration: 5300
loss: 1.1660122871398926,grad_norm: 0.9999997851133928, iteration: 5301
loss: 1.2252211570739746,grad_norm: 0.9999997921263213, iteration: 5302
loss: 1.219650387763977,grad_norm: 0.9999997225583094, iteration: 5303
loss: 1.2745013236999512,grad_norm: 0.9999997495423777, iteration: 5304
loss: 1.3268218040466309,grad_norm: 0.9999997911150789, iteration: 5305
loss: 1.1628001928329468,grad_norm: 0.9999997099722827, iteration: 5306
loss: 1.141966700553894,grad_norm: 0.999999768682269, iteration: 5307
loss: 1.139950156211853,grad_norm: 0.9999996759715641, iteration: 5308
loss: 1.1723756790161133,grad_norm: 0.9999997801122559, iteration: 5309
loss: 1.1233264207839966,grad_norm: 0.999999824107489, iteration: 5310
loss: 1.203949213027954,grad_norm: 0.9999997689449716, iteration: 5311
loss: 1.1496365070343018,grad_norm: 0.9999997188184472, iteration: 5312
loss: 1.2833185195922852,grad_norm: 0.9999997571607216, iteration: 5313
loss: 1.214622139930725,grad_norm: 0.9999997281420326, iteration: 5314
loss: 1.24551522731781,grad_norm: 0.9999997373973151, iteration: 5315
loss: 1.1908501386642456,grad_norm: 0.9999997949864243, iteration: 5316
loss: 1.1019799709320068,grad_norm: 0.999999589801498, iteration: 5317
loss: 1.186416745185852,grad_norm: 0.9999997351310921, iteration: 5318
loss: 1.1383988857269287,grad_norm: 0.9999996904831115, iteration: 5319
loss: 1.19273841381073,grad_norm: 0.9999997603439997, iteration: 5320
loss: 1.165695071220398,grad_norm: 0.9999996999052878, iteration: 5321
loss: 1.1832571029663086,grad_norm: 0.9999996481867525, iteration: 5322
loss: 1.259792685508728,grad_norm: 0.9999997704513547, iteration: 5323
loss: 1.0812437534332275,grad_norm: 0.9999997011201435, iteration: 5324
loss: 1.1683073043823242,grad_norm: 0.9999997491199195, iteration: 5325
loss: 1.078384280204773,grad_norm: 0.9999997718725672, iteration: 5326
loss: 1.1185272932052612,grad_norm: 0.9999997430394921, iteration: 5327
loss: 1.1088989973068237,grad_norm: 0.9999996782391792, iteration: 5328
loss: 1.1038137674331665,grad_norm: 0.9999996776192042, iteration: 5329
loss: 1.1621837615966797,grad_norm: 0.9999997536876086, iteration: 5330
loss: 1.1142349243164062,grad_norm: 0.9999996014985192, iteration: 5331
loss: 1.1547126770019531,grad_norm: 0.9999996447885056, iteration: 5332
loss: 1.1776928901672363,grad_norm: 0.9999997614236827, iteration: 5333
loss: 1.1480510234832764,grad_norm: 0.9999996962874812, iteration: 5334
loss: 1.129300832748413,grad_norm: 0.9999997233715141, iteration: 5335
loss: 1.2348116636276245,grad_norm: 0.9999996667261388, iteration: 5336
loss: 1.1654953956604004,grad_norm: 0.9999997131753291, iteration: 5337
loss: 1.085118293762207,grad_norm: 0.9999996481289415, iteration: 5338
loss: 1.2426271438598633,grad_norm: 0.9999997284707244, iteration: 5339
loss: 1.1048446893692017,grad_norm: 0.9999997422193264, iteration: 5340
loss: 1.1352555751800537,grad_norm: 0.9999997346724693, iteration: 5341
loss: 1.195675015449524,grad_norm: 0.9999998020408665, iteration: 5342
loss: 1.0708805322647095,grad_norm: 0.9999996665596479, iteration: 5343
loss: 1.2669224739074707,grad_norm: 0.9999997562280315, iteration: 5344
loss: 1.1738334894180298,grad_norm: 0.9999997034700119, iteration: 5345
loss: 1.108024001121521,grad_norm: 0.9999998233045562, iteration: 5346
loss: 1.1786699295043945,grad_norm: 0.9999997452454656, iteration: 5347
loss: 1.074215292930603,grad_norm: 0.9999996284734386, iteration: 5348
loss: 1.0852993726730347,grad_norm: 0.9999996243070685, iteration: 5349
loss: 1.0906131267547607,grad_norm: 0.9999997156908209, iteration: 5350
loss: 1.2388631105422974,grad_norm: 0.9999997965089036, iteration: 5351
loss: 1.1420196294784546,grad_norm: 0.9999996792125249, iteration: 5352
loss: 1.115780234336853,grad_norm: 0.9999997502688278, iteration: 5353
loss: 1.1624014377593994,grad_norm: 0.9999997512771756, iteration: 5354
loss: 1.0678343772888184,grad_norm: 0.9999996565160819, iteration: 5355
loss: 1.1630326509475708,grad_norm: 0.9999996523125743, iteration: 5356
loss: 1.332173228263855,grad_norm: 0.9999997096250016, iteration: 5357
loss: 1.1467341184616089,grad_norm: 0.9999996688153726, iteration: 5358
loss: 1.108811855316162,grad_norm: 0.9999996650909733, iteration: 5359
loss: 1.127511978149414,grad_norm: 0.9999997337977604, iteration: 5360
loss: 1.1110754013061523,grad_norm: 0.9999996426440446, iteration: 5361
loss: 1.2120332717895508,grad_norm: 0.9999998725560946, iteration: 5362
loss: 1.2202173471450806,grad_norm: 0.9999997601411705, iteration: 5363
loss: 1.210609793663025,grad_norm: 0.9999997048909007, iteration: 5364
loss: 1.1833162307739258,grad_norm: 0.9999996273306192, iteration: 5365
loss: 1.1712185144424438,grad_norm: 0.9999996823725962, iteration: 5366
loss: 1.1205980777740479,grad_norm: 0.9999995815785719, iteration: 5367
loss: 1.2445391416549683,grad_norm: 0.9999997482750773, iteration: 5368
loss: 1.1611132621765137,grad_norm: 0.9999997505965533, iteration: 5369
loss: 1.242139458656311,grad_norm: 0.9999997694479017, iteration: 5370
loss: 1.2956465482711792,grad_norm: 0.999999674575053, iteration: 5371
loss: 1.121833086013794,grad_norm: 0.9999995609885676, iteration: 5372
loss: 1.2297743558883667,grad_norm: 0.9999996384813538, iteration: 5373
loss: 1.149199366569519,grad_norm: 0.9999996947306372, iteration: 5374
loss: 1.2681599855422974,grad_norm: 0.9999998897684774, iteration: 5375
loss: 1.2833008766174316,grad_norm: 0.9999998135423839, iteration: 5376
loss: 1.1740776300430298,grad_norm: 0.9999997843016549, iteration: 5377
loss: 1.0913385152816772,grad_norm: 0.9999996573088957, iteration: 5378
loss: 1.1705418825149536,grad_norm: 0.9999997167340746, iteration: 5379
loss: 1.2322998046875,grad_norm: 0.9999997358941742, iteration: 5380
loss: 1.1300331354141235,grad_norm: 0.9999996673772759, iteration: 5381
loss: 1.0778743028640747,grad_norm: 0.9999996476803992, iteration: 5382
loss: 1.1316136121749878,grad_norm: 0.999999575589197, iteration: 5383
loss: 1.115583062171936,grad_norm: 0.9999996027584387, iteration: 5384
loss: 1.266321063041687,grad_norm: 0.999999825175421, iteration: 5385
loss: 1.2300173044204712,grad_norm: 0.9999997659585114, iteration: 5386
loss: 1.195600152015686,grad_norm: 0.9999997477730421, iteration: 5387
loss: 1.2850574254989624,grad_norm: 0.9999997636512379, iteration: 5388
loss: 1.1785056591033936,grad_norm: 0.9999996817258413, iteration: 5389
loss: 1.1817835569381714,grad_norm: 0.9999996254301892, iteration: 5390
loss: 1.1419341564178467,grad_norm: 0.9999996003818759, iteration: 5391
loss: 1.3182899951934814,grad_norm: 0.9999998168476585, iteration: 5392
loss: 1.188623309135437,grad_norm: 0.9999996847929018, iteration: 5393
loss: 1.1068207025527954,grad_norm: 0.9999997238261209, iteration: 5394
loss: 1.2323147058486938,grad_norm: 0.9999997947627981, iteration: 5395
loss: 1.150041937828064,grad_norm: 0.9999997066632157, iteration: 5396
loss: 1.2064638137817383,grad_norm: 0.9999998533448634, iteration: 5397
loss: 1.160798192024231,grad_norm: 0.9999996395463547, iteration: 5398
loss: 1.1457749605178833,grad_norm: 0.9999996990481207, iteration: 5399
loss: 1.2511930465698242,grad_norm: 0.9999998080851967, iteration: 5400
loss: 1.1242233514785767,grad_norm: 0.9999996474105778, iteration: 5401
loss: 1.2161645889282227,grad_norm: 0.9999997903915874, iteration: 5402
loss: 1.1097283363342285,grad_norm: 0.9999996968164752, iteration: 5403
loss: 1.2124664783477783,grad_norm: 0.9999997356857766, iteration: 5404
loss: 1.0906956195831299,grad_norm: 0.9999997796435106, iteration: 5405
loss: 1.1002401113510132,grad_norm: 0.9999997069766271, iteration: 5406
loss: 1.051425576210022,grad_norm: 0.999999605154313, iteration: 5407
loss: 1.1722276210784912,grad_norm: 0.999999692699847, iteration: 5408
loss: 1.1341208219528198,grad_norm: 0.9999997926805445, iteration: 5409
loss: 1.1515278816223145,grad_norm: 0.9999998314810968, iteration: 5410
loss: 1.0771269798278809,grad_norm: 0.9999998335759557, iteration: 5411
loss: 1.0355685949325562,grad_norm: 0.9999996180196018, iteration: 5412
loss: 1.1736189126968384,grad_norm: 0.9999996397586103, iteration: 5413
loss: 1.1218897104263306,grad_norm: 0.9999997089064764, iteration: 5414
loss: 1.1014513969421387,grad_norm: 0.9999997080122471, iteration: 5415
loss: 1.0309514999389648,grad_norm: 0.9999997210877466, iteration: 5416
loss: 1.199615478515625,grad_norm: 0.9999998015317773, iteration: 5417
loss: 1.0736478567123413,grad_norm: 0.9999997270027453, iteration: 5418
loss: 1.1368085145950317,grad_norm: 0.9999996519834878, iteration: 5419
loss: 1.2194933891296387,grad_norm: 0.9999998243981392, iteration: 5420
loss: 1.1696348190307617,grad_norm: 0.9999997613491979, iteration: 5421
loss: 1.2169930934906006,grad_norm: 0.9999997221170479, iteration: 5422
loss: 1.1176249980926514,grad_norm: 0.9999996582548915, iteration: 5423
loss: 1.1723151206970215,grad_norm: 0.999999705140159, iteration: 5424
loss: 1.2542872428894043,grad_norm: 0.9999998071753751, iteration: 5425
loss: 1.1305513381958008,grad_norm: 0.9999996566123117, iteration: 5426
loss: 1.1622768640518188,grad_norm: 0.999999640345164, iteration: 5427
loss: 1.1427632570266724,grad_norm: 0.999999607431705, iteration: 5428
loss: 1.0470705032348633,grad_norm: 0.9999996440290126, iteration: 5429
loss: 1.096831202507019,grad_norm: 0.9999996832985683, iteration: 5430
loss: 1.144623875617981,grad_norm: 0.999999747110995, iteration: 5431
loss: 1.2203418016433716,grad_norm: 0.9999997887465089, iteration: 5432
loss: 1.2606474161148071,grad_norm: 0.9999998438529654, iteration: 5433
loss: 1.1673184633255005,grad_norm: 0.9999997425207794, iteration: 5434
loss: 1.2413506507873535,grad_norm: 0.9999997318582701, iteration: 5435
loss: 1.2241610288619995,grad_norm: 0.9999998132103624, iteration: 5436
loss: 1.1571422815322876,grad_norm: 0.9999998157096447, iteration: 5437
loss: 1.1278891563415527,grad_norm: 0.9999997211695213, iteration: 5438
loss: 1.2247586250305176,grad_norm: 0.9999996886284443, iteration: 5439
loss: 1.1601866483688354,grad_norm: 0.9999997444770973, iteration: 5440
loss: 1.26961088180542,grad_norm: 0.9999996324418412, iteration: 5441
loss: 1.1847255229949951,grad_norm: 0.9999997146755766, iteration: 5442
loss: 1.1554558277130127,grad_norm: 0.9999997695746303, iteration: 5443
loss: 1.151416540145874,grad_norm: 0.9999997188563546, iteration: 5444
loss: 1.2016866207122803,grad_norm: 0.9999997125765717, iteration: 5445
loss: 1.196229100227356,grad_norm: 0.9999998100201264, iteration: 5446
loss: 1.2120177745819092,grad_norm: 0.9999998208716551, iteration: 5447
loss: 1.1437801122665405,grad_norm: 0.9999997008224755, iteration: 5448
loss: 1.1969696283340454,grad_norm: 0.9999997293678715, iteration: 5449
loss: 1.2196636199951172,grad_norm: 0.9999997672934864, iteration: 5450
loss: 1.1853477954864502,grad_norm: 0.9999997760715769, iteration: 5451
loss: 1.2061907052993774,grad_norm: 0.9999997573132389, iteration: 5452
loss: 1.1543419361114502,grad_norm: 0.9999997042969365, iteration: 5453
loss: 1.1533621549606323,grad_norm: 0.9999997722270815, iteration: 5454
loss: 1.168473482131958,grad_norm: 0.9999997012807472, iteration: 5455
loss: 1.136888861656189,grad_norm: 0.9999997165185565, iteration: 5456
loss: 1.2156680822372437,grad_norm: 0.9999997630498876, iteration: 5457
loss: 1.0659550428390503,grad_norm: 0.9999995916028063, iteration: 5458
loss: 1.0839262008666992,grad_norm: 0.9999995467444027, iteration: 5459
loss: 1.094321608543396,grad_norm: 0.9999996446191808, iteration: 5460
loss: 1.0404887199401855,grad_norm: 0.999999698290087, iteration: 5461
loss: 1.1430845260620117,grad_norm: 0.9999997203971265, iteration: 5462
loss: 1.089590072631836,grad_norm: 0.9999996684311196, iteration: 5463
loss: 1.1432019472122192,grad_norm: 0.9999996839792619, iteration: 5464
loss: 1.1598501205444336,grad_norm: 0.99999970508952, iteration: 5465
loss: 1.3244413137435913,grad_norm: 0.9999998336715281, iteration: 5466
loss: 1.2007877826690674,grad_norm: 0.999999714824321, iteration: 5467
loss: 1.1833438873291016,grad_norm: 0.9999997412804739, iteration: 5468
loss: 1.1511600017547607,grad_norm: 0.9999997213428596, iteration: 5469
loss: 1.1375020742416382,grad_norm: 0.9999997001582773, iteration: 5470
loss: 1.0975797176361084,grad_norm: 0.9999996822600938, iteration: 5471
loss: 1.1611411571502686,grad_norm: 0.9999997743396193, iteration: 5472
loss: 1.2256327867507935,grad_norm: 0.9999997048884207, iteration: 5473
loss: 1.1084272861480713,grad_norm: 0.9999996567052719, iteration: 5474
loss: 1.0749037265777588,grad_norm: 0.9999997135555556, iteration: 5475
loss: 1.2442238330841064,grad_norm: 0.9999997427759642, iteration: 5476
loss: 1.1247879266738892,grad_norm: 0.9999995682176832, iteration: 5477
loss: 1.1360269784927368,grad_norm: 0.9999996896157181, iteration: 5478
loss: 1.1484142541885376,grad_norm: 0.9999996941427339, iteration: 5479
loss: 1.1788071393966675,grad_norm: 0.9999997167424984, iteration: 5480
loss: 1.1608500480651855,grad_norm: 0.9999997832069388, iteration: 5481
loss: 1.118959903717041,grad_norm: 0.999999654276801, iteration: 5482
loss: 1.1832884550094604,grad_norm: 0.9999996862290127, iteration: 5483
loss: 1.1413869857788086,grad_norm: 0.999999798822678, iteration: 5484
loss: 1.1580129861831665,grad_norm: 0.9999998193135214, iteration: 5485
loss: 1.1558729410171509,grad_norm: 0.9999996795898866, iteration: 5486
loss: 1.1560938358306885,grad_norm: 0.9999997198836444, iteration: 5487
loss: 1.1207524538040161,grad_norm: 0.9999997274173148, iteration: 5488
loss: 1.2140204906463623,grad_norm: 0.9999996737535453, iteration: 5489
loss: 1.16618812084198,grad_norm: 0.9999997035949053, iteration: 5490
loss: 1.1392279863357544,grad_norm: 0.9999996260036671, iteration: 5491
loss: 1.202432632446289,grad_norm: 0.9999996648219911, iteration: 5492
loss: 1.0535783767700195,grad_norm: 0.999999608086392, iteration: 5493
loss: 1.0988843441009521,grad_norm: 0.9999996922846863, iteration: 5494
loss: 1.1949301958084106,grad_norm: 0.9999997181838859, iteration: 5495
loss: 1.2461470365524292,grad_norm: 0.9999997558679224, iteration: 5496
loss: 1.0614920854568481,grad_norm: 0.9999997111811637, iteration: 5497
loss: 1.2198982238769531,grad_norm: 0.9999996982974634, iteration: 5498
loss: 1.1723301410675049,grad_norm: 0.9999998418328241, iteration: 5499
loss: 1.1521929502487183,grad_norm: 0.9999996760800218, iteration: 5500
loss: 1.0807504653930664,grad_norm: 0.999999861590769, iteration: 5501
loss: 1.0936813354492188,grad_norm: 0.9999996793732224, iteration: 5502
loss: 1.110535979270935,grad_norm: 0.999999625775808, iteration: 5503
loss: 1.0655347108840942,grad_norm: 0.9999996613575887, iteration: 5504
loss: 1.1718920469284058,grad_norm: 0.999999757140006, iteration: 5505
loss: 1.149533987045288,grad_norm: 0.9999996445795397, iteration: 5506
loss: 1.1722564697265625,grad_norm: 0.9999996600127542, iteration: 5507
loss: 1.22072434425354,grad_norm: 0.9999997865513898, iteration: 5508
loss: 1.0826293230056763,grad_norm: 0.9999997060319554, iteration: 5509
loss: 1.168757677078247,grad_norm: 0.9999997628346624, iteration: 5510
loss: 1.2014224529266357,grad_norm: 0.9999998352353956, iteration: 5511
loss: 1.0673413276672363,grad_norm: 0.9999996390586107, iteration: 5512
loss: 1.1750296354293823,grad_norm: 0.999999748757907, iteration: 5513
loss: 1.1392908096313477,grad_norm: 0.9999997860010389, iteration: 5514
loss: 1.0708205699920654,grad_norm: 0.9999996604349008, iteration: 5515
loss: 1.0624126195907593,grad_norm: 0.9999997897075034, iteration: 5516
loss: 1.2455586194992065,grad_norm: 0.9999997881203472, iteration: 5517
loss: 1.1202441453933716,grad_norm: 0.9999995861478038, iteration: 5518
loss: 1.1354336738586426,grad_norm: 0.999999617075276, iteration: 5519
loss: 1.1196385622024536,grad_norm: 0.9999997357838352, iteration: 5520
loss: 1.1596099138259888,grad_norm: 0.9999996707763541, iteration: 5521
loss: 1.2146440744400024,grad_norm: 0.9999997693902666, iteration: 5522
loss: 1.2293494939804077,grad_norm: 0.9999997177880348, iteration: 5523
loss: 1.1611446142196655,grad_norm: 0.9999996410208839, iteration: 5524
loss: 1.0806210041046143,grad_norm: 0.9999996996837633, iteration: 5525
loss: 1.1593124866485596,grad_norm: 0.9999997030030847, iteration: 5526
loss: 1.208475112915039,grad_norm: 0.9999997434156459, iteration: 5527
loss: 1.1233302354812622,grad_norm: 0.9999997105356062, iteration: 5528
loss: 1.0970112085342407,grad_norm: 0.9999997596425391, iteration: 5529
loss: 1.1815472841262817,grad_norm: 0.9999997463981526, iteration: 5530
loss: 1.1895862817764282,grad_norm: 0.9999997448539517, iteration: 5531
loss: 1.0519224405288696,grad_norm: 0.9999996835584489, iteration: 5532
loss: 1.2167465686798096,grad_norm: 0.9999998300740659, iteration: 5533
loss: 1.122032642364502,grad_norm: 0.9999996726415888, iteration: 5534
loss: 1.2532836198806763,grad_norm: 0.9999998280705181, iteration: 5535
loss: 1.093149185180664,grad_norm: 0.9999996146499835, iteration: 5536
loss: 1.1897895336151123,grad_norm: 0.9999996984287647, iteration: 5537
loss: 1.1543422937393188,grad_norm: 0.9999997597301993, iteration: 5538
loss: 1.1969811916351318,grad_norm: 0.9999997003212416, iteration: 5539
loss: 1.1723334789276123,grad_norm: 0.9999997054092764, iteration: 5540
loss: 1.1544406414031982,grad_norm: 0.9999996399648273, iteration: 5541
loss: 1.1060595512390137,grad_norm: 0.9999996668805969, iteration: 5542
loss: 1.1280450820922852,grad_norm: 0.9999997854900105, iteration: 5543
loss: 1.1212600469589233,grad_norm: 0.9999996723565473, iteration: 5544
loss: 1.1467775106430054,grad_norm: 0.9999997755321735, iteration: 5545
loss: 1.1114318370819092,grad_norm: 0.9999996042409007, iteration: 5546
loss: 1.0764638185501099,grad_norm: 0.9999997430569522, iteration: 5547
loss: 1.2039947509765625,grad_norm: 0.9999997148899289, iteration: 5548
loss: 1.1740872859954834,grad_norm: 0.9999996884220347, iteration: 5549
loss: 1.227501392364502,grad_norm: 0.9999997775889267, iteration: 5550
loss: 1.1892666816711426,grad_norm: 0.999999656685374, iteration: 5551
loss: 1.1945174932479858,grad_norm: 0.9999997145193155, iteration: 5552
loss: 1.0985455513000488,grad_norm: 0.999999636140988, iteration: 5553
loss: 1.0435479879379272,grad_norm: 0.9999996071201881, iteration: 5554
loss: 1.1482082605361938,grad_norm: 0.9999996483653042, iteration: 5555
loss: 1.105570673942566,grad_norm: 0.9999995870046783, iteration: 5556
loss: 1.1340136528015137,grad_norm: 0.9999998134840421, iteration: 5557
loss: 1.16371488571167,grad_norm: 0.9999997219473321, iteration: 5558
loss: 1.1198188066482544,grad_norm: 0.9999997287736988, iteration: 5559
loss: 1.1878130435943604,grad_norm: 0.9999997301625023, iteration: 5560
loss: 1.182456135749817,grad_norm: 0.9999997748963448, iteration: 5561
loss: 1.1417421102523804,grad_norm: 0.9999997468440482, iteration: 5562
loss: 1.1368869543075562,grad_norm: 0.9999996166527999, iteration: 5563
loss: 1.1804320812225342,grad_norm: 0.9999997866108837, iteration: 5564
loss: 1.2965584993362427,grad_norm: 0.9999997955593775, iteration: 5565
loss: 1.1353428363800049,grad_norm: 0.9999996191336238, iteration: 5566
loss: 1.0933738946914673,grad_norm: 0.9999997045157336, iteration: 5567
loss: 1.214869499206543,grad_norm: 0.9999997245763289, iteration: 5568
loss: 1.1103028059005737,grad_norm: 0.9999997066452899, iteration: 5569
loss: 1.1685746908187866,grad_norm: 0.9999996781556093, iteration: 5570
loss: 1.13801109790802,grad_norm: 0.9999996386253838, iteration: 5571
loss: 1.0634260177612305,grad_norm: 0.9999996175696663, iteration: 5572
loss: 1.15903639793396,grad_norm: 0.9999997669343992, iteration: 5573
loss: 1.1976559162139893,grad_norm: 0.9999997077636014, iteration: 5574
loss: 1.1698520183563232,grad_norm: 0.9999997621736733, iteration: 5575
loss: 1.1653475761413574,grad_norm: 0.9999996629718796, iteration: 5576
loss: 1.1551995277404785,grad_norm: 0.9999998369222598, iteration: 5577
loss: 1.2269004583358765,grad_norm: 0.9999998184449915, iteration: 5578
loss: 1.0996952056884766,grad_norm: 0.9999996198595803, iteration: 5579
loss: 1.0795446634292603,grad_norm: 0.9999997940714845, iteration: 5580
loss: 1.1527321338653564,grad_norm: 0.9999997774706753, iteration: 5581
loss: 1.1940466165542603,grad_norm: 0.9999997699850317, iteration: 5582
loss: 1.1179893016815186,grad_norm: 0.9999996265692273, iteration: 5583
loss: 1.1541132926940918,grad_norm: 0.9999996723572724, iteration: 5584
loss: 1.1520240306854248,grad_norm: 0.9999997486493425, iteration: 5585
loss: 1.1054573059082031,grad_norm: 0.9999997902003028, iteration: 5586
loss: 1.1583130359649658,grad_norm: 0.9999997977131801, iteration: 5587
loss: 1.2521055936813354,grad_norm: 0.999999730742536, iteration: 5588
loss: 1.096621036529541,grad_norm: 0.9999997521793468, iteration: 5589
loss: 1.1465539932250977,grad_norm: 0.9999997277010153, iteration: 5590
loss: 1.1724807024002075,grad_norm: 0.9999996869446407, iteration: 5591
loss: 1.0968083143234253,grad_norm: 0.9999997375361421, iteration: 5592
loss: 1.1668024063110352,grad_norm: 0.9999997113062292, iteration: 5593
loss: 1.1897703409194946,grad_norm: 0.9999996763903188, iteration: 5594
loss: 1.2111135721206665,grad_norm: 0.9999998102680662, iteration: 5595
loss: 1.1352007389068604,grad_norm: 0.9999997045481092, iteration: 5596
loss: 1.2057968378067017,grad_norm: 0.9999997415999613, iteration: 5597
loss: 1.1601248979568481,grad_norm: 0.9999997445122852, iteration: 5598
loss: 1.1465634107589722,grad_norm: 0.9999997203540818, iteration: 5599
loss: 1.2203309535980225,grad_norm: 0.9999997382166343, iteration: 5600
loss: 1.118706464767456,grad_norm: 0.999999801203807, iteration: 5601
loss: 1.0572705268859863,grad_norm: 0.9999995977383593, iteration: 5602
loss: 1.1135739088058472,grad_norm: 0.9999996862119801, iteration: 5603
loss: 1.1832414865493774,grad_norm: 0.9999998351069782, iteration: 5604
loss: 1.1561464071273804,grad_norm: 0.9999997304153051, iteration: 5605
loss: 1.1724473237991333,grad_norm: 0.9999996359400893, iteration: 5606
loss: 1.1450319290161133,grad_norm: 0.9999996985142905, iteration: 5607
loss: 1.1121234893798828,grad_norm: 0.9999996895675412, iteration: 5608
loss: 1.054712176322937,grad_norm: 0.9999995828372676, iteration: 5609
loss: 1.0257220268249512,grad_norm: 0.9999996993614394, iteration: 5610
loss: 1.1200928688049316,grad_norm: 0.9999997043132501, iteration: 5611
loss: 1.069922685623169,grad_norm: 0.9999995140137571, iteration: 5612
loss: 1.1560578346252441,grad_norm: 0.999999710995322, iteration: 5613
loss: 1.1746453046798706,grad_norm: 0.9999998200105744, iteration: 5614
loss: 1.2402980327606201,grad_norm: 0.9999998343646598, iteration: 5615
loss: 1.174832820892334,grad_norm: 0.9999996974315177, iteration: 5616
loss: 1.1453123092651367,grad_norm: 0.9999996786490826, iteration: 5617
loss: 1.3235721588134766,grad_norm: 0.9999997468682595, iteration: 5618
loss: 1.124325156211853,grad_norm: 0.9999997451989812, iteration: 5619
loss: 1.1142736673355103,grad_norm: 0.9999996434271315, iteration: 5620
loss: 1.101635217666626,grad_norm: 0.9999996589437302, iteration: 5621
loss: 1.210315227508545,grad_norm: 0.9999997207917076, iteration: 5622
loss: 1.120178461074829,grad_norm: 0.9999997307680293, iteration: 5623
loss: 1.141992211341858,grad_norm: 0.9999996965692971, iteration: 5624
loss: 1.0735957622528076,grad_norm: 0.9999996842278664, iteration: 5625
loss: 1.207410454750061,grad_norm: 0.9999997158133797, iteration: 5626
loss: 1.1053051948547363,grad_norm: 0.9999997945206213, iteration: 5627
loss: 1.2281949520111084,grad_norm: 0.9999997659974436, iteration: 5628
loss: 1.1323620080947876,grad_norm: 0.9999995820903466, iteration: 5629
loss: 1.188774585723877,grad_norm: 0.9999997489053504, iteration: 5630
loss: 1.1351699829101562,grad_norm: 0.9999996663900144, iteration: 5631
loss: 1.2264494895935059,grad_norm: 0.9999998127384176, iteration: 5632
loss: 1.0554859638214111,grad_norm: 0.9999996028531529, iteration: 5633
loss: 1.1704685688018799,grad_norm: 0.9999996551208445, iteration: 5634
loss: 1.1702643632888794,grad_norm: 0.9999996859146532, iteration: 5635
loss: 1.128639578819275,grad_norm: 0.9999997374544347, iteration: 5636
loss: 1.1227816343307495,grad_norm: 0.9999997133025126, iteration: 5637
loss: 1.2373976707458496,grad_norm: 0.999999710251512, iteration: 5638
loss: 1.2080895900726318,grad_norm: 0.9999997448142893, iteration: 5639
loss: 0.99750155210495,grad_norm: 0.999999475127723, iteration: 5640
loss: 1.0346962213516235,grad_norm: 0.9999995684444329, iteration: 5641
loss: 1.122851014137268,grad_norm: 0.9999996676811476, iteration: 5642
loss: 1.166656732559204,grad_norm: 0.9999997535075503, iteration: 5643
loss: 1.055518627166748,grad_norm: 0.999999609427249, iteration: 5644
loss: 1.137957215309143,grad_norm: 0.9999996327086389, iteration: 5645
loss: 1.0959562063217163,grad_norm: 0.9999996541785764, iteration: 5646
loss: 1.094171404838562,grad_norm: 0.9999997450811648, iteration: 5647
loss: 1.1513359546661377,grad_norm: 0.999999823248639, iteration: 5648
loss: 1.216170310974121,grad_norm: 0.9999997165727662, iteration: 5649
loss: 1.3187557458877563,grad_norm: 0.9999998140938852, iteration: 5650
loss: 1.1335166692733765,grad_norm: 0.9999997346427796, iteration: 5651
loss: 1.113648772239685,grad_norm: 0.9999997371310628, iteration: 5652
loss: 1.0816818475723267,grad_norm: 0.9999996269052452, iteration: 5653
loss: 1.1589652299880981,grad_norm: 0.9999998391282918, iteration: 5654
loss: 1.1700117588043213,grad_norm: 0.9999996523050542, iteration: 5655
loss: 1.12908935546875,grad_norm: 0.9999996451213461, iteration: 5656
loss: 1.1055548191070557,grad_norm: 0.9999996407349755, iteration: 5657
loss: 1.132881999015808,grad_norm: 0.9999996509491443, iteration: 5658
loss: 1.1514034271240234,grad_norm: 0.9999996225369308, iteration: 5659
loss: 1.1584147214889526,grad_norm: 0.9999997524741325, iteration: 5660
loss: 1.1917301416397095,grad_norm: 0.9999997402145688, iteration: 5661
loss: 1.2216600179672241,grad_norm: 0.9999998324630526, iteration: 5662
loss: 1.1413079500198364,grad_norm: 0.9999996175172787, iteration: 5663
loss: 1.2151689529418945,grad_norm: 0.9999997371110282, iteration: 5664
loss: 1.112601399421692,grad_norm: 0.9999997033754987, iteration: 5665
loss: 1.0723717212677002,grad_norm: 0.9999997322030081, iteration: 5666
loss: 1.0846014022827148,grad_norm: 0.9999996544777913, iteration: 5667
loss: 1.0373780727386475,grad_norm: 0.9999995549522205, iteration: 5668
loss: 1.0625336170196533,grad_norm: 0.9999996165593451, iteration: 5669
loss: 1.1053580045700073,grad_norm: 0.9999997106136372, iteration: 5670
loss: 1.2186977863311768,grad_norm: 0.99999970391149, iteration: 5671
loss: 1.2298920154571533,grad_norm: 0.9999997795102871, iteration: 5672
loss: 1.190728783607483,grad_norm: 0.9999997499105099, iteration: 5673
loss: 1.0783628225326538,grad_norm: 0.9999996615555699, iteration: 5674
loss: 1.1597143411636353,grad_norm: 0.9999998202083621, iteration: 5675
loss: 1.139186978340149,grad_norm: 0.999999665052851, iteration: 5676
loss: 1.0817084312438965,grad_norm: 0.999999577709729, iteration: 5677
loss: 1.1477092504501343,grad_norm: 0.9999997093098177, iteration: 5678
loss: 1.139517068862915,grad_norm: 0.9999996579148376, iteration: 5679
loss: 1.2562578916549683,grad_norm: 0.9999997387932474, iteration: 5680
loss: 1.1714696884155273,grad_norm: 0.9999996641929464, iteration: 5681
loss: 1.218122124671936,grad_norm: 0.999999779128766, iteration: 5682
loss: 1.1716983318328857,grad_norm: 0.9999996154275078, iteration: 5683
loss: 1.2180708646774292,grad_norm: 0.9999997585212654, iteration: 5684
loss: 1.094138503074646,grad_norm: 0.9999996974335178, iteration: 5685
loss: 1.1601479053497314,grad_norm: 0.9999997824083812, iteration: 5686
loss: 1.0880701541900635,grad_norm: 0.9999995599943339, iteration: 5687
loss: 1.1978728771209717,grad_norm: 0.9999997937279235, iteration: 5688
loss: 1.1832939386367798,grad_norm: 0.9999997148311683, iteration: 5689
loss: 1.1364666223526,grad_norm: 0.9999997141168103, iteration: 5690
loss: 1.1040167808532715,grad_norm: 0.9999996952975935, iteration: 5691
loss: 1.1789171695709229,grad_norm: 0.9999996266618604, iteration: 5692
loss: 1.1101315021514893,grad_norm: 0.9999996093977858, iteration: 5693
loss: 1.175384521484375,grad_norm: 0.9999996085727894, iteration: 5694
loss: 1.1416689157485962,grad_norm: 0.9999996514473333, iteration: 5695
loss: 1.095605731010437,grad_norm: 0.9999996703647214, iteration: 5696
loss: 1.1396206617355347,grad_norm: 0.9999996736981112, iteration: 5697
loss: 1.0508642196655273,grad_norm: 0.9999996389321102, iteration: 5698
loss: 1.089614987373352,grad_norm: 0.9999995975647575, iteration: 5699
loss: 1.1821491718292236,grad_norm: 0.9999997722663733, iteration: 5700
loss: 1.0334826707839966,grad_norm: 0.9999996145263628, iteration: 5701
loss: 1.0353120565414429,grad_norm: 0.9999995745056556, iteration: 5702
loss: 1.299788236618042,grad_norm: 0.9999998155016948, iteration: 5703
loss: 1.1694029569625854,grad_norm: 0.9999997578874611, iteration: 5704
loss: 1.201353907585144,grad_norm: 0.9999997928120158, iteration: 5705
loss: 1.213468313217163,grad_norm: 0.9999998161294864, iteration: 5706
loss: 1.1279972791671753,grad_norm: 0.9999998041081949, iteration: 5707
loss: 1.0843958854675293,grad_norm: 0.999999616745526, iteration: 5708
loss: 1.0653585195541382,grad_norm: 0.9999997174699883, iteration: 5709
loss: 1.1415395736694336,grad_norm: 0.9999996545451414, iteration: 5710
loss: 1.1795679330825806,grad_norm: 0.9999997341803324, iteration: 5711
loss: 1.126360297203064,grad_norm: 0.9999997240318262, iteration: 5712
loss: 1.15364670753479,grad_norm: 0.9999996906094994, iteration: 5713
loss: 1.169843316078186,grad_norm: 0.9999996467552956, iteration: 5714
loss: 1.1667512655258179,grad_norm: 0.999999716881841, iteration: 5715
loss: 1.1341663599014282,grad_norm: 0.999999736455623, iteration: 5716
loss: 1.1103168725967407,grad_norm: 0.9999996694777882, iteration: 5717
loss: 1.1144620180130005,grad_norm: 0.9999996165880247, iteration: 5718
loss: 1.120830774307251,grad_norm: 0.9999996449145064, iteration: 5719
loss: 1.0828996896743774,grad_norm: 0.9999996083994951, iteration: 5720
loss: 1.0743401050567627,grad_norm: 0.999999607714585, iteration: 5721
loss: 1.0440020561218262,grad_norm: 0.9999997415165067, iteration: 5722
loss: 1.0189847946166992,grad_norm: 0.9999996590495465, iteration: 5723
loss: 1.2699835300445557,grad_norm: 0.9999998054089909, iteration: 5724
loss: 1.1601393222808838,grad_norm: 0.9999997263092354, iteration: 5725
loss: 1.1034576892852783,grad_norm: 0.9999996456515554, iteration: 5726
loss: 1.2320749759674072,grad_norm: 0.9999997709312425, iteration: 5727
loss: 1.1145864725112915,grad_norm: 0.9999997260852607, iteration: 5728
loss: 1.1518436670303345,grad_norm: 0.9999996270073107, iteration: 5729
loss: 1.085724115371704,grad_norm: 0.9999996037067368, iteration: 5730
loss: 1.1553500890731812,grad_norm: 0.9999996474566002, iteration: 5731
loss: 1.1248834133148193,grad_norm: 0.9999996765735332, iteration: 5732
loss: 1.0901442766189575,grad_norm: 0.9999995934757819, iteration: 5733
loss: 1.0205800533294678,grad_norm: 0.9999995921535851, iteration: 5734
loss: 1.0488171577453613,grad_norm: 0.9999996074392096, iteration: 5735
loss: 1.0670771598815918,grad_norm: 0.9999996687842613, iteration: 5736
loss: 1.1842073202133179,grad_norm: 0.9999997652466664, iteration: 5737
loss: 1.1474032402038574,grad_norm: 0.9999996886591636, iteration: 5738
loss: 1.1069960594177246,grad_norm: 0.9999996194916622, iteration: 5739
loss: 1.0999513864517212,grad_norm: 0.9999997553780963, iteration: 5740
loss: 1.1384551525115967,grad_norm: 0.9999998113686984, iteration: 5741
loss: 1.127602219581604,grad_norm: 0.9999997119150121, iteration: 5742
loss: 1.2350373268127441,grad_norm: 0.9999997336472202, iteration: 5743
loss: 1.0743062496185303,grad_norm: 0.9999997186550768, iteration: 5744
loss: 1.1522481441497803,grad_norm: 0.9999997390075759, iteration: 5745
loss: 1.1174304485321045,grad_norm: 0.999999681693627, iteration: 5746
loss: 1.213092565536499,grad_norm: 0.9999996901092808, iteration: 5747
loss: 1.1559261083602905,grad_norm: 0.9999996717005485, iteration: 5748
loss: 1.11225163936615,grad_norm: 0.9999996963665675, iteration: 5749
loss: 1.1592321395874023,grad_norm: 0.9999997246633918, iteration: 5750
loss: 1.1129865646362305,grad_norm: 0.9999996337772722, iteration: 5751
loss: 1.1807661056518555,grad_norm: 0.9999996306020146, iteration: 5752
loss: 1.15488600730896,grad_norm: 0.9999997288641245, iteration: 5753
loss: 1.0183607339859009,grad_norm: 0.9999995745055225, iteration: 5754
loss: 1.1585737466812134,grad_norm: 0.9999997360038145, iteration: 5755
loss: 1.1529223918914795,grad_norm: 0.9999997076153804, iteration: 5756
loss: 1.1363306045532227,grad_norm: 0.999999689641036, iteration: 5757
loss: 1.234094500541687,grad_norm: 0.9999997154007805, iteration: 5758
loss: 1.0859203338623047,grad_norm: 0.9999996952812468, iteration: 5759
loss: 1.1178117990493774,grad_norm: 0.9999996463147406, iteration: 5760
loss: 1.1261858940124512,grad_norm: 0.9999996922643495, iteration: 5761
loss: 1.0480527877807617,grad_norm: 0.9999995261765869, iteration: 5762
loss: 1.1394586563110352,grad_norm: 0.9999997199165132, iteration: 5763
loss: 1.1880626678466797,grad_norm: 0.9999997466697718, iteration: 5764
loss: 1.120836853981018,grad_norm: 0.9999996981011309, iteration: 5765
loss: 1.1174137592315674,grad_norm: 0.9999997020056346, iteration: 5766
loss: 1.142897367477417,grad_norm: 0.9999995551239047, iteration: 5767
loss: 1.124531626701355,grad_norm: 0.9999996396148892, iteration: 5768
loss: 1.0793299674987793,grad_norm: 0.9999997132435388, iteration: 5769
loss: 1.0556670427322388,grad_norm: 0.9999995407357586, iteration: 5770
loss: 1.1673990488052368,grad_norm: 0.999999850245855, iteration: 5771
loss: 1.1449276208877563,grad_norm: 0.999999754402582, iteration: 5772
loss: 1.1721802949905396,grad_norm: 0.9999998019484176, iteration: 5773
loss: 1.0776249170303345,grad_norm: 0.9999997132224064, iteration: 5774
loss: 1.1342260837554932,grad_norm: 0.9999997378771825, iteration: 5775
loss: 1.1017236709594727,grad_norm: 0.9999995671332808, iteration: 5776
loss: 1.0546027421951294,grad_norm: 0.999999658616662, iteration: 5777
loss: 1.0703860521316528,grad_norm: 0.9999996805433665, iteration: 5778
loss: 1.2260079383850098,grad_norm: 0.9999997100894658, iteration: 5779
loss: 1.1276246309280396,grad_norm: 0.9999996769562811, iteration: 5780
loss: 1.160675048828125,grad_norm: 0.9999997388254184, iteration: 5781
loss: 1.0597835779190063,grad_norm: 0.9999996288166164, iteration: 5782
loss: 1.1038340330123901,grad_norm: 0.9999995733688559, iteration: 5783
loss: 1.1436762809753418,grad_norm: 0.9999996310472051, iteration: 5784
loss: 1.1717256307601929,grad_norm: 0.9999997479041243, iteration: 5785
loss: 1.146270990371704,grad_norm: 0.9999997012520893, iteration: 5786
loss: 1.0010217428207397,grad_norm: 0.9999996520661857, iteration: 5787
loss: 1.0841342210769653,grad_norm: 0.9999995813659114, iteration: 5788
loss: 1.1167818307876587,grad_norm: 0.999999535117796, iteration: 5789
loss: 1.0562517642974854,grad_norm: 0.999999605503032, iteration: 5790
loss: 1.1000087261199951,grad_norm: 0.9999997050629474, iteration: 5791
loss: 1.1644246578216553,grad_norm: 0.9999997233474219, iteration: 5792
loss: 1.129284381866455,grad_norm: 0.9999996577320095, iteration: 5793
loss: 1.19723379611969,grad_norm: 0.9999996547301113, iteration: 5794
loss: 1.120460867881775,grad_norm: 0.9999997405778601, iteration: 5795
loss: 1.1631267070770264,grad_norm: 0.999999715684781, iteration: 5796
loss: 1.153030276298523,grad_norm: 0.9999997079099726, iteration: 5797
loss: 1.1084742546081543,grad_norm: 0.9999995710625852, iteration: 5798
loss: 1.1698435544967651,grad_norm: 0.9999996316500785, iteration: 5799
loss: 1.1353263854980469,grad_norm: 0.9999997602694172, iteration: 5800
loss: 1.1235448122024536,grad_norm: 0.999999644091085, iteration: 5801
loss: 1.1331404447555542,grad_norm: 0.9999996637059697, iteration: 5802
loss: 1.104027509689331,grad_norm: 0.9999996695447069, iteration: 5803
loss: 1.124937891960144,grad_norm: 0.9999996589598127, iteration: 5804
loss: 1.102534294128418,grad_norm: 0.9999997350336687, iteration: 5805
loss: 1.1127055883407593,grad_norm: 0.9999996900887088, iteration: 5806
loss: 1.2101151943206787,grad_norm: 0.9999997726817003, iteration: 5807
loss: 1.1060760021209717,grad_norm: 0.9999997904693029, iteration: 5808
loss: 1.0706501007080078,grad_norm: 0.9999996792022599, iteration: 5809
loss: 1.1004713773727417,grad_norm: 0.999999670365355, iteration: 5810
loss: 1.1317365169525146,grad_norm: 0.9999996262996323, iteration: 5811
loss: 1.1170494556427002,grad_norm: 0.9999996988788025, iteration: 5812
loss: 1.1027095317840576,grad_norm: 0.9999996296659073, iteration: 5813
loss: 1.039759874343872,grad_norm: 0.9999996976989465, iteration: 5814
loss: 1.1495815515518188,grad_norm: 0.9999996602050443, iteration: 5815
loss: 1.0382753610610962,grad_norm: 0.9999996695530955, iteration: 5816
loss: 1.1629023551940918,grad_norm: 0.9999997652275915, iteration: 5817
loss: 1.132610559463501,grad_norm: 0.9999996960896437, iteration: 5818
loss: 1.1670892238616943,grad_norm: 0.9999996925770217, iteration: 5819
loss: 1.1910059452056885,grad_norm: 0.999999807663967, iteration: 5820
loss: 1.1560378074645996,grad_norm: 0.9999996780405367, iteration: 5821
loss: 1.105562686920166,grad_norm: 0.99999973945849, iteration: 5822
loss: 1.0665874481201172,grad_norm: 0.9999996021167122, iteration: 5823
loss: 1.1001540422439575,grad_norm: 0.9999996902131363, iteration: 5824
loss: 1.0369279384613037,grad_norm: 0.999999680031235, iteration: 5825
loss: 1.0613833665847778,grad_norm: 0.9999996756806754, iteration: 5826
loss: 1.1187167167663574,grad_norm: 0.9999996514209095, iteration: 5827
loss: 1.1457571983337402,grad_norm: 0.9999997081902756, iteration: 5828
loss: 1.077138066291809,grad_norm: 0.999999651847151, iteration: 5829
loss: 1.182873249053955,grad_norm: 0.9999997817227169, iteration: 5830
loss: 1.0631706714630127,grad_norm: 0.9999997393678804, iteration: 5831
loss: 1.0918018817901611,grad_norm: 0.9999996164381512, iteration: 5832
loss: 1.0941044092178345,grad_norm: 0.9999996863194883, iteration: 5833
loss: 1.0963740348815918,grad_norm: 0.9999996529612201, iteration: 5834
loss: 1.1394561529159546,grad_norm: 0.9999997162461386, iteration: 5835
loss: 1.081192970275879,grad_norm: 0.9999996594525358, iteration: 5836
loss: 1.1434955596923828,grad_norm: 0.9999996362904396, iteration: 5837
loss: 1.1494412422180176,grad_norm: 0.9999996961871983, iteration: 5838
loss: 1.0933926105499268,grad_norm: 0.9999996798236108, iteration: 5839
loss: 1.1793407201766968,grad_norm: 0.9999997311526989, iteration: 5840
loss: 1.1776301860809326,grad_norm: 0.9999997384615119, iteration: 5841
loss: 1.084678053855896,grad_norm: 0.9999995614008912, iteration: 5842
loss: 1.225898027420044,grad_norm: 0.9999996895411818, iteration: 5843
loss: 1.1693997383117676,grad_norm: 0.9999997645277611, iteration: 5844
loss: 1.0291186571121216,grad_norm: 0.9999997172165527, iteration: 5845
loss: 1.1003150939941406,grad_norm: 0.9999996553881279, iteration: 5846
loss: 1.1517455577850342,grad_norm: 0.9999995965348741, iteration: 5847
loss: 1.1288270950317383,grad_norm: 0.9999996642361829, iteration: 5848
loss: 1.0771723985671997,grad_norm: 0.9999997256526039, iteration: 5849
loss: 1.1733938455581665,grad_norm: 0.9999998242098227, iteration: 5850
loss: 1.0776005983352661,grad_norm: 0.9999995849174476, iteration: 5851
loss: 1.0510507822036743,grad_norm: 0.9999996662819511, iteration: 5852
loss: 1.2177932262420654,grad_norm: 0.9999997644691674, iteration: 5853
loss: 1.0651415586471558,grad_norm: 0.9999996272813543, iteration: 5854
loss: 1.1115286350250244,grad_norm: 0.999999650300275, iteration: 5855
loss: 1.192812442779541,grad_norm: 0.9999998281684577, iteration: 5856
loss: 1.1157978773117065,grad_norm: 0.9999997237081991, iteration: 5857
loss: 1.1327909231185913,grad_norm: 0.9999995561083574, iteration: 5858
loss: 1.1257976293563843,grad_norm: 0.9999997072923459, iteration: 5859
loss: 1.1606279611587524,grad_norm: 0.999999710219251, iteration: 5860
loss: 1.1019960641860962,grad_norm: 0.9999995551390766, iteration: 5861
loss: 1.1944475173950195,grad_norm: 0.9999996784500266, iteration: 5862
loss: 1.1174263954162598,grad_norm: 0.9999995616456918, iteration: 5863
loss: 1.1067126989364624,grad_norm: 0.9999996289406736, iteration: 5864
loss: 1.1034297943115234,grad_norm: 0.999999603155842, iteration: 5865
loss: 1.2051752805709839,grad_norm: 0.99999970556331, iteration: 5866
loss: 1.1098542213439941,grad_norm: 0.9999996461191284, iteration: 5867
loss: 1.0507255792617798,grad_norm: 0.9999995266459802, iteration: 5868
loss: 1.1072674989700317,grad_norm: 0.9999996450179066, iteration: 5869
loss: 1.1871416568756104,grad_norm: 0.9999997842092379, iteration: 5870
loss: 1.1958580017089844,grad_norm: 0.9999997062106698, iteration: 5871
loss: 1.0731271505355835,grad_norm: 0.9999997836501733, iteration: 5872
loss: 1.2031813859939575,grad_norm: 0.9999996663855328, iteration: 5873
loss: 1.170730471611023,grad_norm: 0.999999628024669, iteration: 5874
loss: 1.1603232622146606,grad_norm: 0.9999996029970908, iteration: 5875
loss: 1.1994096040725708,grad_norm: 0.9999997871370253, iteration: 5876
loss: 1.2017701864242554,grad_norm: 0.9999998245805412, iteration: 5877
loss: 1.1728477478027344,grad_norm: 0.9999998151922739, iteration: 5878
loss: 1.096901535987854,grad_norm: 0.9999997076306677, iteration: 5879
loss: 1.228941559791565,grad_norm: 0.9999997988462742, iteration: 5880
loss: 1.1384999752044678,grad_norm: 0.9999996980401102, iteration: 5881
loss: 1.123651385307312,grad_norm: 0.9999995644172196, iteration: 5882
loss: 1.1391159296035767,grad_norm: 0.9999996996502192, iteration: 5883
loss: 1.1907936334609985,grad_norm: 0.9999997215557914, iteration: 5884
loss: 1.0536541938781738,grad_norm: 0.9999996205109271, iteration: 5885
loss: 1.1561864614486694,grad_norm: 0.9999996121235403, iteration: 5886
loss: 1.1502386331558228,grad_norm: 0.9999997160235735, iteration: 5887
loss: 1.14741051197052,grad_norm: 0.9999995982970699, iteration: 5888
loss: 1.053293228149414,grad_norm: 0.9999996060352397, iteration: 5889
loss: 1.1454898118972778,grad_norm: 0.9999997343963317, iteration: 5890
loss: 1.101340889930725,grad_norm: 0.9999997789246221, iteration: 5891
loss: 1.084188461303711,grad_norm: 0.9999996087363717, iteration: 5892
loss: 1.0773099660873413,grad_norm: 0.9999997049219558, iteration: 5893
loss: 1.092626690864563,grad_norm: 0.9999995789692813, iteration: 5894
loss: 1.0288938283920288,grad_norm: 0.9999995505927273, iteration: 5895
loss: 1.0406395196914673,grad_norm: 0.9999995650357872, iteration: 5896
loss: 1.1083877086639404,grad_norm: 0.9999996734391116, iteration: 5897
loss: 1.1161596775054932,grad_norm: 0.999999527963364, iteration: 5898
loss: 1.1067924499511719,grad_norm: 0.9999998072537316, iteration: 5899
loss: 1.0966956615447998,grad_norm: 0.9999996773659152, iteration: 5900
loss: 1.0570725202560425,grad_norm: 0.9999995343154088, iteration: 5901
loss: 1.072725534439087,grad_norm: 0.9999996237696156, iteration: 5902
loss: 1.1924020051956177,grad_norm: 0.9999997637996506, iteration: 5903
loss: 1.1156895160675049,grad_norm: 0.9999996531763965, iteration: 5904
loss: 1.08845055103302,grad_norm: 0.9999995526308277, iteration: 5905
loss: 1.1100581884384155,grad_norm: 0.9999995705722909, iteration: 5906
loss: 1.0692261457443237,grad_norm: 0.9999997197875496, iteration: 5907
loss: 1.133702039718628,grad_norm: 0.9999997410360277, iteration: 5908
loss: 1.1400785446166992,grad_norm: 0.9999996077214142, iteration: 5909
loss: 1.1102714538574219,grad_norm: 0.9999996277464177, iteration: 5910
loss: 1.228122353553772,grad_norm: 0.9999997725875011, iteration: 5911
loss: 1.1066725254058838,grad_norm: 0.9999996068046652, iteration: 5912
loss: 1.2035995721817017,grad_norm: 0.9999997119282776, iteration: 5913
loss: 1.1490017175674438,grad_norm: 0.99999974196969, iteration: 5914
loss: 1.0829815864562988,grad_norm: 0.9999996680845479, iteration: 5915
loss: 1.1878570318222046,grad_norm: 0.9999997430459368, iteration: 5916
loss: 1.0674848556518555,grad_norm: 0.9999995948770131, iteration: 5917
loss: 1.0966249704360962,grad_norm: 0.9999995437674891, iteration: 5918
loss: 1.1948862075805664,grad_norm: 0.9999997998940569, iteration: 5919
loss: 1.0986779928207397,grad_norm: 0.9999997021653176, iteration: 5920
loss: 1.0946063995361328,grad_norm: 0.9999995794780071, iteration: 5921
loss: 1.1445121765136719,grad_norm: 0.999999654122283, iteration: 5922
loss: 1.0770552158355713,grad_norm: 0.9999997279081383, iteration: 5923
loss: 1.1109464168548584,grad_norm: 0.9999996385491616, iteration: 5924
loss: 1.1747395992279053,grad_norm: 0.9999996928309475, iteration: 5925
loss: 1.0497441291809082,grad_norm: 0.9999996118912559, iteration: 5926
loss: 1.19425630569458,grad_norm: 0.9999997103926209, iteration: 5927
loss: 1.1202001571655273,grad_norm: 0.9999997869136359, iteration: 5928
loss: 1.115210771560669,grad_norm: 0.9999995375711247, iteration: 5929
loss: 1.152782678604126,grad_norm: 0.9999997112928035, iteration: 5930
loss: 1.187485933303833,grad_norm: 0.9999996511420866, iteration: 5931
loss: 1.1801822185516357,grad_norm: 0.9999996160084165, iteration: 5932
loss: 1.0830491781234741,grad_norm: 0.9999997984677157, iteration: 5933
loss: 1.0531301498413086,grad_norm: 0.9999995913004163, iteration: 5934
loss: 1.0348910093307495,grad_norm: 0.9999995771419256, iteration: 5935
loss: 1.1532337665557861,grad_norm: 0.999999627440174, iteration: 5936
loss: 1.0381286144256592,grad_norm: 0.9999996788341539, iteration: 5937
loss: 1.1340850591659546,grad_norm: 0.9999997330805391, iteration: 5938
loss: 1.1634443998336792,grad_norm: 0.9999997043012009, iteration: 5939
loss: 1.0917046070098877,grad_norm: 0.9999996571398584, iteration: 5940
loss: 1.2003633975982666,grad_norm: 0.9999997541890948, iteration: 5941
loss: 1.1312603950500488,grad_norm: 0.9999997199133783, iteration: 5942
loss: 1.1804052591323853,grad_norm: 0.9999996633302043, iteration: 5943
loss: 1.0962685346603394,grad_norm: 0.9999996830495255, iteration: 5944
loss: 1.1615880727767944,grad_norm: 0.9999997414953243, iteration: 5945
loss: 1.1428040266036987,grad_norm: 0.9999996588292355, iteration: 5946
loss: 1.1268054246902466,grad_norm: 0.9999997454330718, iteration: 5947
loss: 1.0374228954315186,grad_norm: 0.9999996437501645, iteration: 5948
loss: 1.0795655250549316,grad_norm: 0.9999996914757541, iteration: 5949
loss: 1.1514629125595093,grad_norm: 0.999999646153409, iteration: 5950
loss: 1.176525354385376,grad_norm: 0.9999995959203587, iteration: 5951
loss: 1.0233371257781982,grad_norm: 0.9999994564900024, iteration: 5952
loss: 1.102015733718872,grad_norm: 0.999999665609396, iteration: 5953
loss: 1.0613255500793457,grad_norm: 0.9999995695046495, iteration: 5954
loss: 1.116346001625061,grad_norm: 0.9999997033937446, iteration: 5955
loss: 1.198890209197998,grad_norm: 0.9999998225148302, iteration: 5956
loss: 1.0731974840164185,grad_norm: 0.999999679973565, iteration: 5957
loss: 1.1317335367202759,grad_norm: 0.9999997007190583, iteration: 5958
loss: 1.1968543529510498,grad_norm: 0.9999997138344225, iteration: 5959
loss: 1.1341830492019653,grad_norm: 0.9999997029406824, iteration: 5960
loss: 1.1090521812438965,grad_norm: 0.9999996729063305, iteration: 5961
loss: 1.1925320625305176,grad_norm: 0.9999997872854325, iteration: 5962
loss: 1.1142240762710571,grad_norm: 0.9999996093847555, iteration: 5963
loss: 1.0563023090362549,grad_norm: 0.9999997175159435, iteration: 5964
loss: 0.9791996479034424,grad_norm: 0.9999995162136553, iteration: 5965
loss: 1.0642973184585571,grad_norm: 0.9999995420388912, iteration: 5966
loss: 1.1113473176956177,grad_norm: 0.999999615056966, iteration: 5967
loss: 1.1777472496032715,grad_norm: 0.9999996801384345, iteration: 5968
loss: 1.0812658071517944,grad_norm: 0.9999995652653678, iteration: 5969
loss: 1.1603814363479614,grad_norm: 0.9999997466479077, iteration: 5970
loss: 1.0642828941345215,grad_norm: 0.9999995415076601, iteration: 5971
loss: 0.967413604259491,grad_norm: 0.999999468214238, iteration: 5972
loss: 1.0971753597259521,grad_norm: 0.999999611339437, iteration: 5973
loss: 1.1515127420425415,grad_norm: 0.9999996576453852, iteration: 5974
loss: 1.111891746520996,grad_norm: 0.999999661982088, iteration: 5975
loss: 1.1065446138381958,grad_norm: 0.9999995922739658, iteration: 5976
loss: 1.0808779001235962,grad_norm: 0.999999715103975, iteration: 5977
loss: 1.1264265775680542,grad_norm: 0.9999996389442656, iteration: 5978
loss: 1.1739970445632935,grad_norm: 0.9999997838944334, iteration: 5979
loss: 1.2051644325256348,grad_norm: 0.999999668339393, iteration: 5980
loss: 1.0831711292266846,grad_norm: 0.9999996338110766, iteration: 5981
loss: 1.1205432415008545,grad_norm: 0.9999997251148113, iteration: 5982
loss: 1.1727045774459839,grad_norm: 0.9999997224009185, iteration: 5983
loss: 1.1639372110366821,grad_norm: 0.999999785863934, iteration: 5984
loss: 1.0757642984390259,grad_norm: 0.9999997031361852, iteration: 5985
loss: 1.065499186515808,grad_norm: 0.9999997432364592, iteration: 5986
loss: 1.0654518604278564,grad_norm: 0.9999995970195993, iteration: 5987
loss: 1.0436933040618896,grad_norm: 0.9999995409620636, iteration: 5988
loss: 1.063612461090088,grad_norm: 0.9999995643100751, iteration: 5989
loss: 1.247249722480774,grad_norm: 0.9999997713200102, iteration: 5990
loss: 1.2315690517425537,grad_norm: 0.99999980688648, iteration: 5991
loss: 1.0401490926742554,grad_norm: 0.999999553304439, iteration: 5992
loss: 1.165574550628662,grad_norm: 0.9999996789955684, iteration: 5993
loss: 1.0926647186279297,grad_norm: 0.9999996357091971, iteration: 5994
loss: 1.1279598474502563,grad_norm: 0.9999996062031545, iteration: 5995
loss: 1.0884637832641602,grad_norm: 0.9999996450382909, iteration: 5996
loss: 1.1276090145111084,grad_norm: 0.9999996360284031, iteration: 5997
loss: 1.1425244808197021,grad_norm: 0.9999996096470135, iteration: 5998
loss: 1.0993324518203735,grad_norm: 0.9999995629683629, iteration: 5999
loss: 1.0529141426086426,grad_norm: 0.999999676227786, iteration: 6000
loss: 1.08674156665802,grad_norm: 0.9999997423318627, iteration: 6001
loss: 1.1013143062591553,grad_norm: 0.9999996021323679, iteration: 6002
loss: 1.0147448778152466,grad_norm: 0.9999995184462573, iteration: 6003
loss: 1.1805087327957153,grad_norm: 0.9999996169777127, iteration: 6004
loss: 1.0579321384429932,grad_norm: 0.9999994815328361, iteration: 6005
loss: 1.1355561017990112,grad_norm: 0.9999996119250975, iteration: 6006
loss: 1.187479019165039,grad_norm: 0.9999997259330641, iteration: 6007
loss: 1.1849366426467896,grad_norm: 0.9999997523901174, iteration: 6008
loss: 1.1498304605484009,grad_norm: 0.9999997214558571, iteration: 6009
loss: 1.0485823154449463,grad_norm: 0.9999996057120181, iteration: 6010
loss: 1.1226329803466797,grad_norm: 0.9999996402887471, iteration: 6011
loss: 1.0295573472976685,grad_norm: 0.9999995475849612, iteration: 6012
loss: 1.1596022844314575,grad_norm: 0.9999997714546935, iteration: 6013
loss: 1.1090900897979736,grad_norm: 0.9999997248502847, iteration: 6014
loss: 1.0956717729568481,grad_norm: 0.9999995231963916, iteration: 6015
loss: 1.0893503427505493,grad_norm: 0.9999996283200838, iteration: 6016
loss: 1.1501845121383667,grad_norm: 0.9999997389595143, iteration: 6017
loss: 1.0411720275878906,grad_norm: 0.9999996795929191, iteration: 6018
loss: 1.0971752405166626,grad_norm: 0.9999995656137405, iteration: 6019
loss: 1.1387263536453247,grad_norm: 0.9999996118413669, iteration: 6020
loss: 1.17158043384552,grad_norm: 0.9999996480368526, iteration: 6021
loss: 1.0740280151367188,grad_norm: 0.999999730242282, iteration: 6022
loss: 1.0456112623214722,grad_norm: 0.9999995637541038, iteration: 6023
loss: 1.0607478618621826,grad_norm: 0.999999588929035, iteration: 6024
loss: 1.151456356048584,grad_norm: 0.9999996582998725, iteration: 6025
loss: 1.0791230201721191,grad_norm: 0.9999996744675886, iteration: 6026
loss: 1.09140145778656,grad_norm: 0.9999997948811021, iteration: 6027
loss: 1.1098999977111816,grad_norm: 0.9999995845298661, iteration: 6028
loss: 1.1157554388046265,grad_norm: 0.9999997637137031, iteration: 6029
loss: 1.0579618215560913,grad_norm: 0.9999995560733126, iteration: 6030
loss: 1.118194818496704,grad_norm: 0.999999822721366, iteration: 6031
loss: 1.1306920051574707,grad_norm: 0.9999997853673485, iteration: 6032
loss: 1.101910948753357,grad_norm: 0.9999997170990588, iteration: 6033
loss: 1.1623876094818115,grad_norm: 0.9999995950598692, iteration: 6034
loss: 1.0936861038208008,grad_norm: 0.9999996681253015, iteration: 6035
loss: 1.1463547945022583,grad_norm: 0.9999996333270813, iteration: 6036
loss: 1.1172711849212646,grad_norm: 0.9999997194255652, iteration: 6037
loss: 1.0489927530288696,grad_norm: 0.9999996509243845, iteration: 6038
loss: 1.246427297592163,grad_norm: 0.9999998371070384, iteration: 6039
loss: 1.0404078960418701,grad_norm: 0.9999995981472909, iteration: 6040
loss: 1.128751516342163,grad_norm: 0.9999996730397531, iteration: 6041
loss: 1.0567219257354736,grad_norm: 0.9999997015433738, iteration: 6042
loss: 1.0858451128005981,grad_norm: 0.9999997561171852, iteration: 6043
loss: 1.1143494844436646,grad_norm: 0.9999997046919903, iteration: 6044
loss: 1.1234016418457031,grad_norm: 0.9999997547136367, iteration: 6045
loss: 1.0864884853363037,grad_norm: 0.9999996874540572, iteration: 6046
loss: 1.0480176210403442,grad_norm: 0.9999995990376588, iteration: 6047
loss: 1.127327561378479,grad_norm: 0.9999996733877922, iteration: 6048
loss: 1.108947992324829,grad_norm: 0.9999996977078239, iteration: 6049
loss: 1.0630733966827393,grad_norm: 0.9999996657404281, iteration: 6050
loss: 1.0609441995620728,grad_norm: 0.9999996495705534, iteration: 6051
loss: 1.1635417938232422,grad_norm: 0.9999997212441771, iteration: 6052
loss: 1.1122547388076782,grad_norm: 0.9999996644204366, iteration: 6053
loss: 1.1374263763427734,grad_norm: 0.9999997196514195, iteration: 6054
loss: 1.1084234714508057,grad_norm: 0.9999996475626991, iteration: 6055
loss: 1.1349995136260986,grad_norm: 0.9999996998233759, iteration: 6056
loss: 1.1475590467453003,grad_norm: 0.9999996776584693, iteration: 6057
loss: 1.1902786493301392,grad_norm: 0.9999997781486459, iteration: 6058
loss: 1.0812633037567139,grad_norm: 0.999999628814931, iteration: 6059
loss: 1.1067413091659546,grad_norm: 0.9999997317107701, iteration: 6060
loss: 1.084097146987915,grad_norm: 0.9999994854974708, iteration: 6061
loss: 1.099308967590332,grad_norm: 0.9999997560091748, iteration: 6062
loss: 1.1273736953735352,grad_norm: 0.9999997829051839, iteration: 6063
loss: 1.1260806322097778,grad_norm: 0.999999736410897, iteration: 6064
loss: 1.0897257328033447,grad_norm: 0.9999996269354443, iteration: 6065
loss: 1.071061372756958,grad_norm: 0.999999634403125, iteration: 6066
loss: 1.1821001768112183,grad_norm: 0.9999996557021259, iteration: 6067
loss: 1.1108943223953247,grad_norm: 0.9999996803637194, iteration: 6068
loss: 1.1089696884155273,grad_norm: 0.9999996891017663, iteration: 6069
loss: 1.2587310075759888,grad_norm: 0.9999997212817171, iteration: 6070
loss: 1.14905846118927,grad_norm: 0.9999998039893586, iteration: 6071
loss: 1.1110860109329224,grad_norm: 0.9999996164282184, iteration: 6072
loss: 1.0415505170822144,grad_norm: 0.9999995586273309, iteration: 6073
loss: 1.0667600631713867,grad_norm: 0.9999995211501572, iteration: 6074
loss: 1.1446716785430908,grad_norm: 0.9999996455210151, iteration: 6075
loss: 1.0729647874832153,grad_norm: 0.9999996363099787, iteration: 6076
loss: 1.1041339635849,grad_norm: 0.9999996789509625, iteration: 6077
loss: 1.0700016021728516,grad_norm: 0.9999996705362431, iteration: 6078
loss: 1.1850275993347168,grad_norm: 0.9999997608093159, iteration: 6079
loss: 1.087852954864502,grad_norm: 0.9999995213333673, iteration: 6080
loss: 1.098116397857666,grad_norm: 0.9999996133339345, iteration: 6081
loss: 1.0823662281036377,grad_norm: 0.9999996644469954, iteration: 6082
loss: 1.1583309173583984,grad_norm: 0.9999996669961239, iteration: 6083
loss: 1.0922820568084717,grad_norm: 0.999999589085897, iteration: 6084
loss: 1.069597601890564,grad_norm: 0.999999611580753, iteration: 6085
loss: 1.1160948276519775,grad_norm: 0.9999996927033107, iteration: 6086
loss: 1.1647419929504395,grad_norm: 0.9999996518273724, iteration: 6087
loss: 1.14299738407135,grad_norm: 0.9999996643664593, iteration: 6088
loss: 1.1346004009246826,grad_norm: 0.9999997001564789, iteration: 6089
loss: 1.0897568464279175,grad_norm: 0.9999995379345445, iteration: 6090
loss: 1.0210837125778198,grad_norm: 0.9999996453170494, iteration: 6091
loss: 1.1040499210357666,grad_norm: 0.9999995530823973, iteration: 6092
loss: 1.0697505474090576,grad_norm: 0.9999996075122479, iteration: 6093
loss: 1.1013845205307007,grad_norm: 0.9999997142242938, iteration: 6094
loss: 1.1646493673324585,grad_norm: 0.9999997020657978, iteration: 6095
loss: 1.1211011409759521,grad_norm: 0.9999996178845083, iteration: 6096
loss: 1.09499192237854,grad_norm: 0.9999994847879053, iteration: 6097
loss: 1.1233683824539185,grad_norm: 0.9999996212998873, iteration: 6098
loss: 1.119248628616333,grad_norm: 0.9999996137393214, iteration: 6099
loss: 1.1687504053115845,grad_norm: 0.9999997825040702, iteration: 6100
loss: 1.1405471563339233,grad_norm: 0.9999997250167143, iteration: 6101
loss: 1.0905718803405762,grad_norm: 0.9999996553771717, iteration: 6102
loss: 1.0843397378921509,grad_norm: 0.9999996337971724, iteration: 6103
loss: 1.123518705368042,grad_norm: 0.9999996400406853, iteration: 6104
loss: 1.1073405742645264,grad_norm: 0.9999996645935179, iteration: 6105
loss: 1.0632175207138062,grad_norm: 0.9999996215261864, iteration: 6106
loss: 1.0657720565795898,grad_norm: 0.9999994860929083, iteration: 6107
loss: 1.164974331855774,grad_norm: 0.9999996195487644, iteration: 6108
loss: 1.0749189853668213,grad_norm: 0.9999996019868297, iteration: 6109
loss: 1.1024444103240967,grad_norm: 0.9999997594533433, iteration: 6110
loss: 1.1015273332595825,grad_norm: 0.9999996378477161, iteration: 6111
loss: 1.116331934928894,grad_norm: 0.9999996021442297, iteration: 6112
loss: 1.0155560970306396,grad_norm: 0.9999995806760585, iteration: 6113
loss: 1.1139010190963745,grad_norm: 0.9999997196441311, iteration: 6114
loss: 1.1229181289672852,grad_norm: 0.9999996485345022, iteration: 6115
loss: 1.0371896028518677,grad_norm: 0.9999993906951773, iteration: 6116
loss: 1.1250077486038208,grad_norm: 0.9999997101676328, iteration: 6117
loss: 1.0919654369354248,grad_norm: 0.9999995621276224, iteration: 6118
loss: 1.0650209188461304,grad_norm: 0.9999994731763249, iteration: 6119
loss: 1.0960111618041992,grad_norm: 0.9999996774037537, iteration: 6120
loss: 1.0674049854278564,grad_norm: 0.9999995869775133, iteration: 6121
loss: 1.0773476362228394,grad_norm: 0.9999996564844055, iteration: 6122
loss: 1.140213966369629,grad_norm: 0.9999997945207347, iteration: 6123
loss: 1.0869784355163574,grad_norm: 0.9999995761933267, iteration: 6124
loss: 1.072214961051941,grad_norm: 0.999999618690165, iteration: 6125
loss: 1.1282052993774414,grad_norm: 0.9999997546269112, iteration: 6126
loss: 1.0767844915390015,grad_norm: 0.9999994725074228, iteration: 6127
loss: 1.0321506261825562,grad_norm: 0.9999996079155491, iteration: 6128
loss: 1.1011618375778198,grad_norm: 0.9999996181773627, iteration: 6129
loss: 1.0757111310958862,grad_norm: 0.999999648493535, iteration: 6130
loss: 1.0805281400680542,grad_norm: 0.9999996751691808, iteration: 6131
loss: 1.1585772037506104,grad_norm: 0.9999997377265761, iteration: 6132
loss: 1.1800694465637207,grad_norm: 0.9999997019971539, iteration: 6133
loss: 1.082970380783081,grad_norm: 0.9999996095905761, iteration: 6134
loss: 1.0770292282104492,grad_norm: 0.999999743585773, iteration: 6135
loss: 1.0625262260437012,grad_norm: 0.9999996710447797, iteration: 6136
loss: 1.0691453218460083,grad_norm: 0.9999998234377432, iteration: 6137
loss: 1.068979263305664,grad_norm: 0.9999995738213147, iteration: 6138
loss: 1.1210983991622925,grad_norm: 0.9999995704961989, iteration: 6139
loss: 1.078438639640808,grad_norm: 0.999999593298314, iteration: 6140
loss: 1.1049988269805908,grad_norm: 0.9999996068179793, iteration: 6141
loss: 1.0306938886642456,grad_norm: 0.9999996100778326, iteration: 6142
loss: 1.157730221748352,grad_norm: 0.9999998240425738, iteration: 6143
loss: 1.077928900718689,grad_norm: 0.9999995386553778, iteration: 6144
loss: 1.0945240259170532,grad_norm: 0.9999997106363476, iteration: 6145
loss: 1.1249076128005981,grad_norm: 0.9999996495495491, iteration: 6146
loss: 1.159261703491211,grad_norm: 0.9999996788761423, iteration: 6147
loss: 1.1145094633102417,grad_norm: 0.9999996612887728, iteration: 6148
loss: 1.0830415487289429,grad_norm: 0.9999996543711037, iteration: 6149
loss: 1.1155158281326294,grad_norm: 0.9999996425268182, iteration: 6150
loss: 1.0940890312194824,grad_norm: 0.9999997854508641, iteration: 6151
loss: 1.1060292720794678,grad_norm: 0.9999995926446271, iteration: 6152
loss: 1.0484102964401245,grad_norm: 0.9999996486812657, iteration: 6153
loss: 1.1030476093292236,grad_norm: 0.9999996267534712, iteration: 6154
loss: 1.138428807258606,grad_norm: 0.9999997758599467, iteration: 6155
loss: 1.1430463790893555,grad_norm: 0.999999586922724, iteration: 6156
loss: 1.0445598363876343,grad_norm: 0.9999995335478654, iteration: 6157
loss: 1.0282344818115234,grad_norm: 0.9999995613043097, iteration: 6158
loss: 1.0018682479858398,grad_norm: 0.9999995345733407, iteration: 6159
loss: 1.1618858575820923,grad_norm: 0.9999997005555813, iteration: 6160
loss: 1.0640745162963867,grad_norm: 0.9999996873539057, iteration: 6161
loss: 1.1179840564727783,grad_norm: 0.9999996575693316, iteration: 6162
loss: 1.1496516466140747,grad_norm: 0.9999997745545343, iteration: 6163
loss: 1.147169589996338,grad_norm: 0.999999694711427, iteration: 6164
loss: 1.0800633430480957,grad_norm: 0.9999996609737983, iteration: 6165
loss: 1.1188814640045166,grad_norm: 0.9999996804439532, iteration: 6166
loss: 1.0810198783874512,grad_norm: 0.9999995059645562, iteration: 6167
loss: 1.1675351858139038,grad_norm: 0.9999996839142652, iteration: 6168
loss: 1.2052587270736694,grad_norm: 0.9999996927172986, iteration: 6169
loss: 1.0039805173873901,grad_norm: 0.9999996913142173, iteration: 6170
loss: 1.1294366121292114,grad_norm: 0.9999996446448047, iteration: 6171
loss: 1.1453486680984497,grad_norm: 0.9999996471665173, iteration: 6172
loss: 1.0514565706253052,grad_norm: 0.9999996920036556, iteration: 6173
loss: 1.0936003923416138,grad_norm: 0.9999996428923058, iteration: 6174
loss: 1.0592396259307861,grad_norm: 0.9999994978699248, iteration: 6175
loss: 1.0889121294021606,grad_norm: 0.9999996679408906, iteration: 6176
loss: 1.1186591386795044,grad_norm: 0.9999995788845685, iteration: 6177
loss: 1.074142336845398,grad_norm: 0.9999995444401825, iteration: 6178
loss: 1.0659269094467163,grad_norm: 0.9999995595673051, iteration: 6179
loss: 1.077991008758545,grad_norm: 0.9999996487626471, iteration: 6180
loss: 1.0906317234039307,grad_norm: 0.9999996823548974, iteration: 6181
loss: 1.1166020631790161,grad_norm: 0.9999996636740865, iteration: 6182
loss: 1.1271750926971436,grad_norm: 0.9999996568932963, iteration: 6183
loss: 1.111141562461853,grad_norm: 0.9999996032422942, iteration: 6184
loss: 1.0746263265609741,grad_norm: 0.9999995378218728, iteration: 6185
loss: 1.1439425945281982,grad_norm: 0.9999997062582013, iteration: 6186
loss: 1.0916035175323486,grad_norm: 0.9999996352428651, iteration: 6187
loss: 1.2020423412322998,grad_norm: 0.9999996425790643, iteration: 6188
loss: 1.1148791313171387,grad_norm: 0.9999997544481071, iteration: 6189
loss: 1.0487020015716553,grad_norm: 0.9999995477186995, iteration: 6190
loss: 1.0498952865600586,grad_norm: 0.999999471519755, iteration: 6191
loss: 1.095350980758667,grad_norm: 0.9999996630944809, iteration: 6192
loss: 1.0856808423995972,grad_norm: 0.9999994621170066, iteration: 6193
loss: 1.1431905031204224,grad_norm: 0.9999996515153139, iteration: 6194
loss: 1.0596630573272705,grad_norm: 0.9999995367521445, iteration: 6195
loss: 1.108469843864441,grad_norm: 0.9999996167993467, iteration: 6196
loss: 1.0982539653778076,grad_norm: 0.9999995246682941, iteration: 6197
loss: 1.0348103046417236,grad_norm: 0.9999994430494876, iteration: 6198
loss: 1.0771404504776,grad_norm: 0.9999995405840882, iteration: 6199
loss: 1.113588809967041,grad_norm: 0.9999997728536919, iteration: 6200
loss: 1.1113754510879517,grad_norm: 0.9999997285649252, iteration: 6201
loss: 1.184000015258789,grad_norm: 0.9999997088967234, iteration: 6202
loss: 1.024349331855774,grad_norm: 0.9999995214447671, iteration: 6203
loss: 1.0217225551605225,grad_norm: 0.9999995935926941, iteration: 6204
loss: 1.019065022468567,grad_norm: 0.9999995234864034, iteration: 6205
loss: 1.0842087268829346,grad_norm: 0.9999997731914423, iteration: 6206
loss: 1.084295392036438,grad_norm: 0.9999995071778666, iteration: 6207
loss: 1.1703996658325195,grad_norm: 0.9999997204238917, iteration: 6208
loss: 1.1181014776229858,grad_norm: 0.999999664855368, iteration: 6209
loss: 1.2200008630752563,grad_norm: 0.9999996878108709, iteration: 6210
loss: 1.0326154232025146,grad_norm: 0.9999995941601567, iteration: 6211
loss: 1.1433918476104736,grad_norm: 0.9999997768815095, iteration: 6212
loss: 1.1419005393981934,grad_norm: 0.9999996653819572, iteration: 6213
loss: 1.0723001956939697,grad_norm: 0.9999994807218023, iteration: 6214
loss: 1.0908852815628052,grad_norm: 0.999999513113192, iteration: 6215
loss: 1.0759706497192383,grad_norm: 0.999999497592879, iteration: 6216
loss: 1.1508803367614746,grad_norm: 0.99999969750011, iteration: 6217
loss: 1.1135218143463135,grad_norm: 0.9999996320331708, iteration: 6218
loss: 1.0433100461959839,grad_norm: 0.9999996499797814, iteration: 6219
loss: 1.0252975225448608,grad_norm: 0.9999994764384877, iteration: 6220
loss: 1.1652565002441406,grad_norm: 0.9999996510554415, iteration: 6221
loss: 1.0939353704452515,grad_norm: 0.9999997783725987, iteration: 6222
loss: 1.144647479057312,grad_norm: 0.9999996001402647, iteration: 6223
loss: 1.1018937826156616,grad_norm: 0.9999996410212605, iteration: 6224
loss: 1.085004210472107,grad_norm: 0.9999996179163126, iteration: 6225
loss: 1.0994956493377686,grad_norm: 0.9999994927731829, iteration: 6226
loss: 1.0785154104232788,grad_norm: 0.9999997396330499, iteration: 6227
loss: 1.058514952659607,grad_norm: 0.9999996010989066, iteration: 6228
loss: 1.1217679977416992,grad_norm: 0.9999996426303129, iteration: 6229
loss: 1.1581586599349976,grad_norm: 0.999999740179321, iteration: 6230
loss: 1.0999163389205933,grad_norm: 0.999999662072706, iteration: 6231
loss: 1.1245216131210327,grad_norm: 0.9999996530089968, iteration: 6232
loss: 1.0696682929992676,grad_norm: 0.9999996082916668, iteration: 6233
loss: 1.0492521524429321,grad_norm: 0.9999995398185229, iteration: 6234
loss: 1.1716625690460205,grad_norm: 0.9999998052027114, iteration: 6235
loss: 1.1248610019683838,grad_norm: 0.999999757169965, iteration: 6236
loss: 1.1433919668197632,grad_norm: 0.9999997983499254, iteration: 6237
loss: 1.0608724355697632,grad_norm: 0.9999996931333168, iteration: 6238
loss: 1.1414974927902222,grad_norm: 0.9999996445830486, iteration: 6239
loss: 1.1528146266937256,grad_norm: 0.9999995501037774, iteration: 6240
loss: 1.130484700202942,grad_norm: 0.9999997827347045, iteration: 6241
loss: 1.056721568107605,grad_norm: 0.9999997761841302, iteration: 6242
loss: 1.1311618089675903,grad_norm: 0.999999651813461, iteration: 6243
loss: 1.0966848134994507,grad_norm: 0.9999997780243702, iteration: 6244
loss: 1.126499891281128,grad_norm: 0.9999998198975798, iteration: 6245
loss: 1.0967857837677002,grad_norm: 0.9999995980674296, iteration: 6246
loss: 1.1342313289642334,grad_norm: 0.9999995945470328, iteration: 6247
loss: 1.1552329063415527,grad_norm: 0.9999996055616732, iteration: 6248
loss: 1.1551134586334229,grad_norm: 0.9999996186311273, iteration: 6249
loss: 1.1217795610427856,grad_norm: 0.9999996470000176, iteration: 6250
loss: 1.0825117826461792,grad_norm: 0.9999996538733698, iteration: 6251
loss: 1.1585525274276733,grad_norm: 0.9999997859875207, iteration: 6252
loss: 1.1601316928863525,grad_norm: 0.9999996689603967, iteration: 6253
loss: 1.1546812057495117,grad_norm: 0.999999643091513, iteration: 6254
loss: 1.1181175708770752,grad_norm: 0.9999996711797159, iteration: 6255
loss: 1.09763503074646,grad_norm: 0.9999995781909694, iteration: 6256
loss: 1.0562409162521362,grad_norm: 0.9999995758026312, iteration: 6257
loss: 1.0892356634140015,grad_norm: 0.9999996320351384, iteration: 6258
loss: 1.09285306930542,grad_norm: 0.9999997380360728, iteration: 6259
loss: 1.0703710317611694,grad_norm: 0.9999996055700991, iteration: 6260
loss: 1.112097978591919,grad_norm: 0.999999689044743, iteration: 6261
loss: 1.0973762273788452,grad_norm: 0.9999997660350037, iteration: 6262
loss: 1.0743740797042847,grad_norm: 0.9999996720026816, iteration: 6263
loss: 1.0604337453842163,grad_norm: 0.9999996226240113, iteration: 6264
loss: 1.0841923952102661,grad_norm: 0.9999997550562959, iteration: 6265
loss: 1.1064847707748413,grad_norm: 0.9999995810051029, iteration: 6266
loss: 1.1898494958877563,grad_norm: 0.9999998075845279, iteration: 6267
loss: 1.0675064325332642,grad_norm: 0.9999997096943211, iteration: 6268
loss: 1.151875615119934,grad_norm: 0.9999998670259957, iteration: 6269
loss: 1.062475562095642,grad_norm: 0.9999995242165779, iteration: 6270
loss: 1.1202641725540161,grad_norm: 0.9999994928789793, iteration: 6271
loss: 1.0040085315704346,grad_norm: 0.9999995263831292, iteration: 6272
loss: 1.0993157625198364,grad_norm: 0.9999996794454121, iteration: 6273
loss: 1.0747041702270508,grad_norm: 0.9999996588030302, iteration: 6274
loss: 1.1139943599700928,grad_norm: 0.9999996904555338, iteration: 6275
loss: 1.0998163223266602,grad_norm: 0.9999995739731573, iteration: 6276
loss: 1.101572036743164,grad_norm: 0.9999996877636463, iteration: 6277
loss: 1.0776852369308472,grad_norm: 0.9999995881673768, iteration: 6278
loss: 1.1498414278030396,grad_norm: 0.9999996622293988, iteration: 6279
loss: 1.1076334714889526,grad_norm: 0.9999995514007823, iteration: 6280
loss: 1.1581733226776123,grad_norm: 0.9999996278574363, iteration: 6281
loss: 1.0711606740951538,grad_norm: 0.9999994664277146, iteration: 6282
loss: 1.1056071519851685,grad_norm: 0.9999996478308105, iteration: 6283
loss: 1.1360602378845215,grad_norm: 0.999999689381326, iteration: 6284
loss: 1.1030775308609009,grad_norm: 0.999999615705926, iteration: 6285
loss: 1.1282620429992676,grad_norm: 0.9999996238658775, iteration: 6286
loss: 1.1321296691894531,grad_norm: 0.9999997033241448, iteration: 6287
loss: 1.0690436363220215,grad_norm: 0.9999994936123442, iteration: 6288
loss: 1.1081092357635498,grad_norm: 0.9999996537676277, iteration: 6289
loss: 1.0787731409072876,grad_norm: 0.999999578306764, iteration: 6290
loss: 1.1284576654434204,grad_norm: 0.9999996895967336, iteration: 6291
loss: 1.0616765022277832,grad_norm: 0.9999994677864233, iteration: 6292
loss: 1.099883794784546,grad_norm: 0.9999997254175893, iteration: 6293
loss: 1.166434645652771,grad_norm: 0.9999996990876063, iteration: 6294
loss: 1.034170389175415,grad_norm: 0.9999995448095417, iteration: 6295
loss: 1.1139272451400757,grad_norm: 0.999999589525949, iteration: 6296
loss: 1.165614128112793,grad_norm: 0.9999996293450526, iteration: 6297
loss: 1.0632513761520386,grad_norm: 0.9999996199630343, iteration: 6298
loss: 1.1666953563690186,grad_norm: 0.9999997335027905, iteration: 6299
loss: 1.082392930984497,grad_norm: 0.9999996638208, iteration: 6300
loss: 1.098703384399414,grad_norm: 0.9999995508399224, iteration: 6301
loss: 1.0749614238739014,grad_norm: 0.9999996916322527, iteration: 6302
loss: 1.1476478576660156,grad_norm: 0.9999994336933794, iteration: 6303
loss: 1.138059139251709,grad_norm: 0.99999969048233, iteration: 6304
loss: 1.1438018083572388,grad_norm: 0.9999996260495031, iteration: 6305
loss: 1.0599145889282227,grad_norm: 0.9999996707377858, iteration: 6306
loss: 1.144590139389038,grad_norm: 0.9999995995461075, iteration: 6307
loss: 1.07669198513031,grad_norm: 0.9999995355855739, iteration: 6308
loss: 1.071692943572998,grad_norm: 0.9999996725483524, iteration: 6309
loss: 1.0770965814590454,grad_norm: 0.9999994472010808, iteration: 6310
loss: 1.0552747249603271,grad_norm: 0.9999995706330266, iteration: 6311
loss: 1.1361545324325562,grad_norm: 0.9999996325037447, iteration: 6312
loss: 1.1301974058151245,grad_norm: 0.9999997383086155, iteration: 6313
loss: 1.159104347229004,grad_norm: 0.9999997290706927, iteration: 6314
loss: 1.0570088624954224,grad_norm: 0.9999998009873837, iteration: 6315
loss: 1.044339656829834,grad_norm: 0.9999996601073806, iteration: 6316
loss: 1.1187783479690552,grad_norm: 0.9999996018478802, iteration: 6317
loss: 1.0644739866256714,grad_norm: 0.9999996222742241, iteration: 6318
loss: 1.1641541719436646,grad_norm: 0.9999996907469529, iteration: 6319
loss: 1.0703063011169434,grad_norm: 0.9999996063592074, iteration: 6320
loss: 1.1235803365707397,grad_norm: 0.9999996627389017, iteration: 6321
loss: 1.150199294090271,grad_norm: 0.999999705151543, iteration: 6322
loss: 1.1038262844085693,grad_norm: 0.9999998264747632, iteration: 6323
loss: 1.1116241216659546,grad_norm: 0.9999996381679019, iteration: 6324
loss: 1.140049934387207,grad_norm: 0.9999996260120193, iteration: 6325
loss: 1.1352638006210327,grad_norm: 0.9999997227542441, iteration: 6326
loss: 1.1095048189163208,grad_norm: 0.9999998012755119, iteration: 6327
loss: 1.0872383117675781,grad_norm: 0.9999995805202979, iteration: 6328
loss: 1.1361150741577148,grad_norm: 0.9999997078334416, iteration: 6329
loss: 1.0584250688552856,grad_norm: 0.9999994437703086, iteration: 6330
loss: 1.0511767864227295,grad_norm: 0.9999995207485172, iteration: 6331
loss: 1.0994873046875,grad_norm: 0.999999631664249, iteration: 6332
loss: 1.1801892518997192,grad_norm: 0.9999996362399024, iteration: 6333
loss: 1.2000980377197266,grad_norm: 0.9999998309266563, iteration: 6334
loss: 0.9887202978134155,grad_norm: 0.9999995041040944, iteration: 6335
loss: 1.033997893333435,grad_norm: 0.9999996423525689, iteration: 6336
loss: 1.034480333328247,grad_norm: 0.9999995502668586, iteration: 6337
loss: 1.0481712818145752,grad_norm: 0.9999994446353945, iteration: 6338
loss: 1.065710425376892,grad_norm: 0.9999996006351576, iteration: 6339
loss: 1.1075036525726318,grad_norm: 0.9999995973849795, iteration: 6340
loss: 1.1800647974014282,grad_norm: 0.999999757432316, iteration: 6341
loss: 1.0817697048187256,grad_norm: 0.9999996221285701, iteration: 6342
loss: 1.0841302871704102,grad_norm: 0.9999994996265589, iteration: 6343
loss: 1.059799075126648,grad_norm: 0.9999997289732097, iteration: 6344
loss: 1.0706437826156616,grad_norm: 0.9999995900091853, iteration: 6345
loss: 1.0763676166534424,grad_norm: 0.9999994744063749, iteration: 6346
loss: 1.1061618328094482,grad_norm: 0.9999995828098843, iteration: 6347
loss: 1.1646004915237427,grad_norm: 0.9999997065723837, iteration: 6348
loss: 1.0567227602005005,grad_norm: 0.9999995979094843, iteration: 6349
loss: 1.1158835887908936,grad_norm: 0.9999995401584599, iteration: 6350
loss: 1.0309662818908691,grad_norm: 0.9999994445054988, iteration: 6351
loss: 1.1441400051116943,grad_norm: 0.9999996616413077, iteration: 6352
loss: 1.0879024267196655,grad_norm: 0.9999995410686949, iteration: 6353
loss: 1.1076377630233765,grad_norm: 0.9999996156375186, iteration: 6354
loss: 1.0127882957458496,grad_norm: 0.9999996689513012, iteration: 6355
loss: 1.0657947063446045,grad_norm: 0.9999994974918146, iteration: 6356
loss: 1.08004629611969,grad_norm: 0.9999995647729304, iteration: 6357
loss: 1.1381784677505493,grad_norm: 0.9999996711970952, iteration: 6358
loss: 1.0659734010696411,grad_norm: 0.9999996021255549, iteration: 6359
loss: 1.0934169292449951,grad_norm: 0.9999996420541337, iteration: 6360
loss: 1.10940682888031,grad_norm: 0.9999996726836277, iteration: 6361
loss: 1.0913141965866089,grad_norm: 0.9999995522970382, iteration: 6362
loss: 1.0618754625320435,grad_norm: 0.9999996421869953, iteration: 6363
loss: 1.117734670639038,grad_norm: 0.9999996994357411, iteration: 6364
loss: 1.0978646278381348,grad_norm: 0.9999995487479828, iteration: 6365
loss: 1.1020007133483887,grad_norm: 0.9999996847375098, iteration: 6366
loss: 1.1857908964157104,grad_norm: 0.9999997381284702, iteration: 6367
loss: 1.0384905338287354,grad_norm: 0.9999994802529153, iteration: 6368
loss: 1.1529147624969482,grad_norm: 0.9999996584580804, iteration: 6369
loss: 1.0217723846435547,grad_norm: 0.9999994842521951, iteration: 6370
loss: 1.0672533512115479,grad_norm: 0.9999995709815815, iteration: 6371
loss: 1.0843861103057861,grad_norm: 0.999999589244297, iteration: 6372
loss: 1.0948487520217896,grad_norm: 0.9999996957018582, iteration: 6373
loss: 1.1993162631988525,grad_norm: 0.9999997895531647, iteration: 6374
loss: 1.023281455039978,grad_norm: 0.9999996418179, iteration: 6375
loss: 1.0202544927597046,grad_norm: 0.9999994613292741, iteration: 6376
loss: 1.0666296482086182,grad_norm: 0.9999995388260021, iteration: 6377
loss: 1.1439354419708252,grad_norm: 0.9999996876583883, iteration: 6378
loss: 1.0219415426254272,grad_norm: 0.9999997066803825, iteration: 6379
loss: 1.0742292404174805,grad_norm: 0.9999997383586638, iteration: 6380
loss: 1.1798450946807861,grad_norm: 0.9999996598916259, iteration: 6381
loss: 1.1565954685211182,grad_norm: 0.999999736064952, iteration: 6382
loss: 1.1388121843338013,grad_norm: 0.9999996188323209, iteration: 6383
loss: 1.0558992624282837,grad_norm: 0.9999995566363016, iteration: 6384
loss: 1.0803143978118896,grad_norm: 0.9999996480786643, iteration: 6385
loss: 1.1188565492630005,grad_norm: 0.9999996802079245, iteration: 6386
loss: 1.0706243515014648,grad_norm: 0.999999749124336, iteration: 6387
loss: 1.0980043411254883,grad_norm: 0.9999996336555976, iteration: 6388
loss: 1.0436882972717285,grad_norm: 0.9999994672899546, iteration: 6389
loss: 1.032839298248291,grad_norm: 0.9999994716519975, iteration: 6390
loss: 1.053790807723999,grad_norm: 0.9999994854417207, iteration: 6391
loss: 1.1148436069488525,grad_norm: 0.9999995890838626, iteration: 6392
loss: 1.1089473962783813,grad_norm: 0.9999997486321639, iteration: 6393
loss: 1.1347516775131226,grad_norm: 0.9999997129249891, iteration: 6394
loss: 1.0717687606811523,grad_norm: 0.9999996130519041, iteration: 6395
loss: 1.0760375261306763,grad_norm: 0.9999995423899907, iteration: 6396
loss: 1.032647967338562,grad_norm: 0.9999994899193001, iteration: 6397
loss: 1.0964068174362183,grad_norm: 0.9999997256595763, iteration: 6398
loss: 1.0745887756347656,grad_norm: 0.9999995438606442, iteration: 6399
loss: 1.1430249214172363,grad_norm: 0.999999826246759, iteration: 6400
loss: 0.9814306497573853,grad_norm: 0.9999995257430369, iteration: 6401
loss: 1.0050431489944458,grad_norm: 0.999999552601877, iteration: 6402
loss: 1.020491600036621,grad_norm: 0.999999652218049, iteration: 6403
loss: 1.098076343536377,grad_norm: 0.999999509074142, iteration: 6404
loss: 1.0837174654006958,grad_norm: 0.9999996232279641, iteration: 6405
loss: 1.0980892181396484,grad_norm: 0.9999996000235942, iteration: 6406
loss: 1.0445973873138428,grad_norm: 0.9999994534734444, iteration: 6407
loss: 1.0770810842514038,grad_norm: 0.9999995476923905, iteration: 6408
loss: 1.1303303241729736,grad_norm: 0.9999996888124473, iteration: 6409
loss: 1.0625805854797363,grad_norm: 0.9999996366485725, iteration: 6410
loss: 1.0927811861038208,grad_norm: 0.9999996590066453, iteration: 6411
loss: 1.1446568965911865,grad_norm: 0.9999997774517753, iteration: 6412
loss: 1.0580806732177734,grad_norm: 0.9999996363831989, iteration: 6413
loss: 1.0572134256362915,grad_norm: 0.999999416402387, iteration: 6414
loss: 1.0826416015625,grad_norm: 0.9999995179394274, iteration: 6415
loss: 1.1382919549942017,grad_norm: 0.9999995487247281, iteration: 6416
loss: 1.1252325773239136,grad_norm: 0.9999997231658982, iteration: 6417
loss: 1.0909850597381592,grad_norm: 0.9999995917677102, iteration: 6418
loss: 1.0396407842636108,grad_norm: 0.9999993665284374, iteration: 6419
loss: 1.0667937994003296,grad_norm: 0.9999995405326289, iteration: 6420
loss: 1.0794280767440796,grad_norm: 0.9999996973115665, iteration: 6421
loss: 1.0302571058273315,grad_norm: 0.9999996903903331, iteration: 6422
loss: 1.0820077657699585,grad_norm: 0.9999995801151942, iteration: 6423
loss: 1.034206748008728,grad_norm: 0.9999995106609166, iteration: 6424
loss: 1.0881333351135254,grad_norm: 0.9999996723714174, iteration: 6425
loss: 0.9848159551620483,grad_norm: 0.9999995414694651, iteration: 6426
loss: 1.0835691690444946,grad_norm: 0.9999996198408808, iteration: 6427
loss: 1.163327932357788,grad_norm: 0.9999997215299903, iteration: 6428
loss: 1.0586025714874268,grad_norm: 0.9999996238241664, iteration: 6429
loss: 1.0455116033554077,grad_norm: 0.9999996087647581, iteration: 6430
loss: 1.099599003791809,grad_norm: 0.9999995253703667, iteration: 6431
loss: 1.062963843345642,grad_norm: 0.9999995185520633, iteration: 6432
loss: 1.0806938409805298,grad_norm: 0.9999995968822157, iteration: 6433
loss: 1.0844500064849854,grad_norm: 0.9999996197750219, iteration: 6434
loss: 1.1055279970169067,grad_norm: 0.999999562483751, iteration: 6435
loss: 1.0162792205810547,grad_norm: 0.9999996813727426, iteration: 6436
loss: 1.110829472541809,grad_norm: 0.9999997148976784, iteration: 6437
loss: 1.1164106130599976,grad_norm: 0.9999996436763305, iteration: 6438
loss: 1.0320266485214233,grad_norm: 0.9999994748301573, iteration: 6439
loss: 1.0691860914230347,grad_norm: 0.9999995841464432, iteration: 6440
loss: 1.1234203577041626,grad_norm: 0.9999997549444769, iteration: 6441
loss: 1.112754225730896,grad_norm: 0.9999995863529799, iteration: 6442
loss: 1.1464797258377075,grad_norm: 0.99999948739648, iteration: 6443
loss: 1.0662904977798462,grad_norm: 0.999999618152135, iteration: 6444
loss: 1.100720763206482,grad_norm: 0.9999996490468566, iteration: 6445
loss: 1.0980011224746704,grad_norm: 0.9999994892566051, iteration: 6446
loss: 1.1366177797317505,grad_norm: 0.9999996517947448, iteration: 6447
loss: 1.1555119752883911,grad_norm: 0.9999997384029148, iteration: 6448
loss: 1.0715008974075317,grad_norm: 0.9999994558065016, iteration: 6449
loss: 1.1108382940292358,grad_norm: 0.9999995368067673, iteration: 6450
loss: 1.0702658891677856,grad_norm: 0.9999997023884595, iteration: 6451
loss: 1.025056004524231,grad_norm: 0.9999993419691738, iteration: 6452
loss: 1.0260519981384277,grad_norm: 0.9999994014946445, iteration: 6453
loss: 1.1301665306091309,grad_norm: 0.9999996335502265, iteration: 6454
loss: 1.1045277118682861,grad_norm: 0.9999997342502107, iteration: 6455
loss: 1.1170282363891602,grad_norm: 0.9999996148436568, iteration: 6456
loss: 1.1364952325820923,grad_norm: 0.9999996285059861, iteration: 6457
loss: 1.0876024961471558,grad_norm: 0.9999996529664942, iteration: 6458
loss: 1.071018934249878,grad_norm: 0.9999994892750248, iteration: 6459
loss: 1.1148086786270142,grad_norm: 0.9999997062411395, iteration: 6460
loss: 1.0744231939315796,grad_norm: 0.9999996924392417, iteration: 6461
loss: 1.0647565126419067,grad_norm: 0.9999995582815029, iteration: 6462
loss: 1.1507912874221802,grad_norm: 0.9999997650597431, iteration: 6463
loss: 1.0767714977264404,grad_norm: 0.9999998366096667, iteration: 6464
loss: 1.1218974590301514,grad_norm: 0.9999997144506166, iteration: 6465
loss: 1.076653242111206,grad_norm: 0.9999995119975073, iteration: 6466
loss: 1.0575997829437256,grad_norm: 0.9999994869914635, iteration: 6467
loss: 1.0665783882141113,grad_norm: 0.9999995226797492, iteration: 6468
loss: 1.1628187894821167,grad_norm: 0.9999997683952897, iteration: 6469
loss: 1.1250637769699097,grad_norm: 0.9999996694369321, iteration: 6470
loss: 1.089401125907898,grad_norm: 0.9999995429698045, iteration: 6471
loss: 1.0719332695007324,grad_norm: 0.9999996190999034, iteration: 6472
loss: 1.143280267715454,grad_norm: 0.9999997024828147, iteration: 6473
loss: 1.134843349456787,grad_norm: 0.9999996649287488, iteration: 6474
loss: 1.0429214239120483,grad_norm: 0.9999995473528299, iteration: 6475
loss: 1.1866716146469116,grad_norm: 0.9999997712061679, iteration: 6476
loss: 1.023598313331604,grad_norm: 0.9999993796843815, iteration: 6477
loss: 0.9978678226470947,grad_norm: 0.9999995006943673, iteration: 6478
loss: 1.17136812210083,grad_norm: 0.9999997342305488, iteration: 6479
loss: 1.140504240989685,grad_norm: 0.9999996259193384, iteration: 6480
loss: 1.139890193939209,grad_norm: 0.9999997096235054, iteration: 6481
loss: 1.0810468196868896,grad_norm: 0.9999996239234301, iteration: 6482
loss: 1.1191585063934326,grad_norm: 0.9999995846724259, iteration: 6483
loss: 1.2026060819625854,grad_norm: 0.9999998309180173, iteration: 6484
loss: 1.09722101688385,grad_norm: 0.999999514974158, iteration: 6485
loss: 1.0451791286468506,grad_norm: 0.9999993921324564, iteration: 6486
loss: 1.1152023077011108,grad_norm: 0.9999997412976757, iteration: 6487
loss: 1.093013882637024,grad_norm: 0.9999995004238479, iteration: 6488
loss: 1.10889732837677,grad_norm: 0.9999995813908078, iteration: 6489
loss: 1.0905910730361938,grad_norm: 0.99999968118618, iteration: 6490
loss: 1.0592973232269287,grad_norm: 0.9999996831684267, iteration: 6491
loss: 1.1231023073196411,grad_norm: 0.9999998134706718, iteration: 6492
loss: 1.0997694730758667,grad_norm: 0.9999997127780167, iteration: 6493
loss: 1.0925081968307495,grad_norm: 0.9999994593095838, iteration: 6494
loss: 1.034494161605835,grad_norm: 0.9999995908637227, iteration: 6495
loss: 1.1225783824920654,grad_norm: 0.9999997354812918, iteration: 6496
loss: 1.1012604236602783,grad_norm: 0.9999995530452865, iteration: 6497
loss: 1.0457288026809692,grad_norm: 0.9999994938559915, iteration: 6498
loss: 1.1349682807922363,grad_norm: 0.9999996724419158, iteration: 6499
loss: 1.0836730003356934,grad_norm: 0.9999996785747596, iteration: 6500
loss: 1.1064941883087158,grad_norm: 0.9999996300489492, iteration: 6501
loss: 1.11324143409729,grad_norm: 0.999999663911572, iteration: 6502
loss: 1.0629228353500366,grad_norm: 0.9999997074738097, iteration: 6503
loss: 1.1050150394439697,grad_norm: 0.9999996953238517, iteration: 6504
loss: 1.067460536956787,grad_norm: 0.9999996784380623, iteration: 6505
loss: 1.0569217205047607,grad_norm: 0.9999994659134084, iteration: 6506
loss: 1.1635239124298096,grad_norm: 0.9999995796322242, iteration: 6507
loss: 1.0730353593826294,grad_norm: 0.99999966015514, iteration: 6508
loss: 1.1610071659088135,grad_norm: 0.9999996910221718, iteration: 6509
loss: 1.0957683324813843,grad_norm: 0.9999998406077056, iteration: 6510
loss: 1.0956363677978516,grad_norm: 0.9999996585239245, iteration: 6511
loss: 1.0359681844711304,grad_norm: 0.9999996571014205, iteration: 6512
loss: 1.0444384813308716,grad_norm: 0.9999994715984, iteration: 6513
loss: 1.1078046560287476,grad_norm: 0.999999610583905, iteration: 6514
loss: 1.052657127380371,grad_norm: 0.9999995300590294, iteration: 6515
loss: 1.0533044338226318,grad_norm: 0.9999995909073748, iteration: 6516
loss: 1.0553594827651978,grad_norm: 0.9999995756661837, iteration: 6517
loss: 1.0920532941818237,grad_norm: 0.9999995345203921, iteration: 6518
loss: 1.022628903388977,grad_norm: 0.9999995233178366, iteration: 6519
loss: 1.0511248111724854,grad_norm: 0.9999994926326115, iteration: 6520
loss: 1.0962880849838257,grad_norm: 0.9999995984447363, iteration: 6521
loss: 1.0236324071884155,grad_norm: 0.9999994583660297, iteration: 6522
loss: 1.0666327476501465,grad_norm: 0.9999996285249915, iteration: 6523
loss: 1.0622296333312988,grad_norm: 0.9999995299683838, iteration: 6524
loss: 1.0104985237121582,grad_norm: 0.999999567973993, iteration: 6525
loss: 1.1068987846374512,grad_norm: 0.999999708791755, iteration: 6526
loss: 1.0866979360580444,grad_norm: 0.9999997091068347, iteration: 6527
loss: 1.120265245437622,grad_norm: 0.9999995566514186, iteration: 6528
loss: 1.105194330215454,grad_norm: 0.9999994922975196, iteration: 6529
loss: 1.1633679866790771,grad_norm: 0.9999998234684769, iteration: 6530
loss: 1.0719285011291504,grad_norm: 0.9999997223528382, iteration: 6531
loss: 1.109724998474121,grad_norm: 0.9999996060802789, iteration: 6532
loss: 1.1044018268585205,grad_norm: 0.9999996149426971, iteration: 6533
loss: 1.023645281791687,grad_norm: 0.9999996144918846, iteration: 6534
loss: 1.0175561904907227,grad_norm: 0.9999996237439842, iteration: 6535
loss: 1.1257688999176025,grad_norm: 0.9999997204560567, iteration: 6536
loss: 1.086010456085205,grad_norm: 0.9999997039719228, iteration: 6537
loss: 1.1000322103500366,grad_norm: 0.9999997012477722, iteration: 6538
loss: 1.0899356603622437,grad_norm: 0.9999996998919645, iteration: 6539
loss: 1.111802577972412,grad_norm: 0.9999995915765207, iteration: 6540
loss: 1.1095064878463745,grad_norm: 0.9999995950097609, iteration: 6541
loss: 1.0511977672576904,grad_norm: 0.9999997251488244, iteration: 6542
loss: 1.0916787385940552,grad_norm: 0.999999551763318, iteration: 6543
loss: 1.1096633672714233,grad_norm: 0.999999651283831, iteration: 6544
loss: 1.099906325340271,grad_norm: 0.9999996879854576, iteration: 6545
loss: 1.030507206916809,grad_norm: 0.9999995718232876, iteration: 6546
loss: 1.0322744846343994,grad_norm: 0.9999994684785797, iteration: 6547
loss: 1.1559410095214844,grad_norm: 0.9999996515429393, iteration: 6548
loss: 1.0888711214065552,grad_norm: 0.9999995810756118, iteration: 6549
loss: 1.0995231866836548,grad_norm: 0.9999995490662377, iteration: 6550
loss: 1.080176830291748,grad_norm: 0.9999997539481407, iteration: 6551
loss: 1.0567413568496704,grad_norm: 0.9999995040387479, iteration: 6552
loss: 1.0807833671569824,grad_norm: 0.9999994993393897, iteration: 6553
loss: 1.1039385795593262,grad_norm: 0.9999996897677298, iteration: 6554
loss: 1.1084589958190918,grad_norm: 0.9999996512334375, iteration: 6555
loss: 1.1487233638763428,grad_norm: 0.9999996541259053, iteration: 6556
loss: 1.0049399137496948,grad_norm: 0.9999995718455184, iteration: 6557
loss: 1.076456069946289,grad_norm: 0.999999450328609, iteration: 6558
loss: 1.152719259262085,grad_norm: 0.9999996872009247, iteration: 6559
loss: 1.1333404779434204,grad_norm: 0.9999997650882959, iteration: 6560
loss: 1.0750861167907715,grad_norm: 0.9999993679734758, iteration: 6561
loss: 1.0817490816116333,grad_norm: 0.9999996651331056, iteration: 6562
loss: 1.053598403930664,grad_norm: 0.9999996507690442, iteration: 6563
loss: 1.1320558786392212,grad_norm: 0.9999997960757998, iteration: 6564
loss: 1.0830395221710205,grad_norm: 0.9999997032241553, iteration: 6565
loss: 1.1239354610443115,grad_norm: 0.9999996075760709, iteration: 6566
loss: 1.0094318389892578,grad_norm: 0.9999995810005805, iteration: 6567
loss: 1.0570448637008667,grad_norm: 0.9999995816566747, iteration: 6568
loss: 1.0955169200897217,grad_norm: 0.9999995229656721, iteration: 6569
loss: 1.1240934133529663,grad_norm: 0.9999995825667303, iteration: 6570
loss: 1.088947057723999,grad_norm: 0.9999996193497972, iteration: 6571
loss: 1.0787845849990845,grad_norm: 0.9999995285474108, iteration: 6572
loss: 1.0713903903961182,grad_norm: 0.9999995606636987, iteration: 6573
loss: 1.0984437465667725,grad_norm: 0.9999996572956353, iteration: 6574
loss: 1.1631712913513184,grad_norm: 0.9999996793091105, iteration: 6575
loss: 1.0625135898590088,grad_norm: 0.999999616983786, iteration: 6576
loss: 1.0919216871261597,grad_norm: 0.9999996230935096, iteration: 6577
loss: 1.0946624279022217,grad_norm: 0.9999994674046119, iteration: 6578
loss: 1.0591181516647339,grad_norm: 0.9999995742591378, iteration: 6579
loss: 1.0967782735824585,grad_norm: 0.9999997906583744, iteration: 6580
loss: 1.0474244356155396,grad_norm: 0.9999995585193322, iteration: 6581
loss: 1.1078850030899048,grad_norm: 0.9999996367858768, iteration: 6582
loss: 1.0632010698318481,grad_norm: 0.9999996238100789, iteration: 6583
loss: 1.046789526939392,grad_norm: 0.9999995992142632, iteration: 6584
loss: 1.0555728673934937,grad_norm: 0.9999995113921053, iteration: 6585
loss: 1.0727657079696655,grad_norm: 0.9999996650717144, iteration: 6586
loss: 0.9976428151130676,grad_norm: 0.9999995326797493, iteration: 6587
loss: 1.041323184967041,grad_norm: 0.9999994867235154, iteration: 6588
loss: 1.102531909942627,grad_norm: 0.9999995666680364, iteration: 6589
loss: 1.1045089960098267,grad_norm: 0.9999996303739955, iteration: 6590
loss: 1.0660978555679321,grad_norm: 0.9999996180907594, iteration: 6591
loss: 1.0827863216400146,grad_norm: 0.9999994311924241, iteration: 6592
loss: 1.0622522830963135,grad_norm: 0.9999996894184303, iteration: 6593
loss: 1.0045933723449707,grad_norm: 0.9999993619763873, iteration: 6594
loss: 1.1009646654129028,grad_norm: 0.9999995272907545, iteration: 6595
loss: 1.1060574054718018,grad_norm: 0.9999995607913069, iteration: 6596
loss: 1.0394595861434937,grad_norm: 0.9999995184569063, iteration: 6597
loss: 1.0919603109359741,grad_norm: 0.999999550002657, iteration: 6598
loss: 1.13497793674469,grad_norm: 0.9999997055917744, iteration: 6599
loss: 1.018105149269104,grad_norm: 0.9999993709662558, iteration: 6600
loss: 1.0887300968170166,grad_norm: 0.999999624931278, iteration: 6601
loss: 1.0588715076446533,grad_norm: 0.9999996230439796, iteration: 6602
loss: 1.1662558317184448,grad_norm: 0.9999997316488575, iteration: 6603
loss: 1.1311076879501343,grad_norm: 0.9999997227012277, iteration: 6604
loss: 1.09532630443573,grad_norm: 0.9999995006578052, iteration: 6605
loss: 1.0345888137817383,grad_norm: 0.9999995850215551, iteration: 6606
loss: 1.0895370244979858,grad_norm: 0.9999995037957636, iteration: 6607
loss: 1.1070899963378906,grad_norm: 0.9999995415677481, iteration: 6608
loss: 1.0602977275848389,grad_norm: 0.9999992362253275, iteration: 6609
loss: 1.0424681901931763,grad_norm: 0.9999994665793164, iteration: 6610
loss: 1.1130800247192383,grad_norm: 0.999999631201351, iteration: 6611
loss: 1.0980722904205322,grad_norm: 0.9999996521689668, iteration: 6612
loss: 1.1115413904190063,grad_norm: 0.9999995215360078, iteration: 6613
loss: 1.1568844318389893,grad_norm: 0.9999997174335347, iteration: 6614
loss: 1.076887845993042,grad_norm: 0.9999996193872291, iteration: 6615
loss: 1.0626921653747559,grad_norm: 0.999999599047552, iteration: 6616
loss: 1.100805640220642,grad_norm: 0.9999996207529386, iteration: 6617
loss: 1.128705382347107,grad_norm: 0.9999996744977265, iteration: 6618
loss: 1.1074572801589966,grad_norm: 0.9999995328626735, iteration: 6619
loss: 1.0895334482192993,grad_norm: 0.9999994982158205, iteration: 6620
loss: 1.0642331838607788,grad_norm: 0.9999994867116365, iteration: 6621
loss: 1.116447925567627,grad_norm: 0.9999996683093222, iteration: 6622
loss: 1.058064341545105,grad_norm: 0.9999994695720875, iteration: 6623
loss: 1.0932040214538574,grad_norm: 0.9999996938080491, iteration: 6624
loss: 1.096082329750061,grad_norm: 0.9999995535813399, iteration: 6625
loss: 1.1014070510864258,grad_norm: 0.999999614196994, iteration: 6626
loss: 1.092793583869934,grad_norm: 0.9999997661549989, iteration: 6627
loss: 1.0836267471313477,grad_norm: 0.9999996578109539, iteration: 6628
loss: 1.0433417558670044,grad_norm: 0.9999994740671365, iteration: 6629
loss: 1.0587971210479736,grad_norm: 0.999999448296727, iteration: 6630
loss: 1.0878218412399292,grad_norm: 0.9999995362766373, iteration: 6631
loss: 1.0531363487243652,grad_norm: 0.9999996667357691, iteration: 6632
loss: 1.0237925052642822,grad_norm: 0.9999996700427312, iteration: 6633
loss: 1.091881275177002,grad_norm: 0.9999997903494136, iteration: 6634
loss: 1.1503500938415527,grad_norm: 0.9999997729087375, iteration: 6635
loss: 1.1218838691711426,grad_norm: 0.9999998217196875, iteration: 6636
loss: 1.082888126373291,grad_norm: 0.9999993341820363, iteration: 6637
loss: 1.1884583234786987,grad_norm: 0.999999795614473, iteration: 6638
loss: 1.076428771018982,grad_norm: 0.9999995817307978, iteration: 6639
loss: 1.1678438186645508,grad_norm: 0.9999997029556442, iteration: 6640
loss: 1.1386127471923828,grad_norm: 0.9999996340376415, iteration: 6641
loss: 1.097035527229309,grad_norm: 0.9999997007050936, iteration: 6642
loss: 1.139976143836975,grad_norm: 0.9999996349551992, iteration: 6643
loss: 1.1566616296768188,grad_norm: 0.9999996800913186, iteration: 6644
loss: 1.0597772598266602,grad_norm: 0.9999996092284736, iteration: 6645
loss: 1.0941795110702515,grad_norm: 0.9999996686489817, iteration: 6646
loss: 1.0907648801803589,grad_norm: 0.9999996068039742, iteration: 6647
loss: 1.096904993057251,grad_norm: 0.9999996827334496, iteration: 6648
loss: 1.1650277376174927,grad_norm: 0.9999996700960996, iteration: 6649
loss: 1.0684205293655396,grad_norm: 0.9999996125752506, iteration: 6650
loss: 1.0526732206344604,grad_norm: 0.9999997640592435, iteration: 6651
loss: 1.0798132419586182,grad_norm: 0.9999994867901436, iteration: 6652
loss: 1.09610116481781,grad_norm: 0.9999996458863231, iteration: 6653
loss: 1.1186531782150269,grad_norm: 0.9999995793722117, iteration: 6654
loss: 1.039530634880066,grad_norm: 0.9999994587758964, iteration: 6655
loss: 1.023154377937317,grad_norm: 0.9999994317321631, iteration: 6656
loss: 1.073141098022461,grad_norm: 0.9999994181960665, iteration: 6657
loss: 1.024538278579712,grad_norm: 0.9999995622927446, iteration: 6658
loss: 1.0496339797973633,grad_norm: 0.999999627267092, iteration: 6659
loss: 1.112839698791504,grad_norm: 0.9999995384235512, iteration: 6660
loss: 1.1679778099060059,grad_norm: 0.9999997685337475, iteration: 6661
loss: 1.0620719194412231,grad_norm: 0.9999996711823936, iteration: 6662
loss: 1.1444987058639526,grad_norm: 0.9999997709080599, iteration: 6663
loss: 1.0617790222167969,grad_norm: 0.9999996172160603, iteration: 6664
loss: 1.1279727220535278,grad_norm: 0.9999995433882399, iteration: 6665
loss: 1.082735300064087,grad_norm: 0.9999996304836644, iteration: 6666
loss: 1.0532902479171753,grad_norm: 0.9999996062098206, iteration: 6667
loss: 1.0648572444915771,grad_norm: 0.9999995133872, iteration: 6668
loss: 1.0299077033996582,grad_norm: 0.9999996102492749, iteration: 6669
loss: 1.069837212562561,grad_norm: 0.9999994482600225, iteration: 6670
loss: 1.1214567422866821,grad_norm: 0.9999996587283868, iteration: 6671
loss: 1.1219866275787354,grad_norm: 0.9999997410564135, iteration: 6672
loss: 1.0936185121536255,grad_norm: 0.9999997552665593, iteration: 6673
loss: 1.1399023532867432,grad_norm: 0.9999996575753692, iteration: 6674
loss: 1.0948420763015747,grad_norm: 0.9999995482764955, iteration: 6675
loss: 1.1024465560913086,grad_norm: 0.9999994128997921, iteration: 6676
loss: 1.0648603439331055,grad_norm: 0.9999995397845544, iteration: 6677
loss: 1.0602777004241943,grad_norm: 0.9999995207559141, iteration: 6678
loss: 1.059189796447754,grad_norm: 0.9999995709518517, iteration: 6679
loss: 1.0808988809585571,grad_norm: 0.9999995586997568, iteration: 6680
loss: 1.0634006261825562,grad_norm: 0.9999995591038048, iteration: 6681
loss: 1.0894471406936646,grad_norm: 0.9999994412268014, iteration: 6682
loss: 1.0684728622436523,grad_norm: 0.9999994109939967, iteration: 6683
loss: 1.0972951650619507,grad_norm: 0.999999678183097, iteration: 6684
loss: 1.0390303134918213,grad_norm: 0.9999994347010901, iteration: 6685
loss: 1.140001893043518,grad_norm: 0.9999996395973904, iteration: 6686
loss: 1.0712180137634277,grad_norm: 0.999999696159604, iteration: 6687
loss: 1.071739673614502,grad_norm: 0.9999995546880398, iteration: 6688
loss: 1.0870269536972046,grad_norm: 0.9999996958813595, iteration: 6689
loss: 1.0960444211959839,grad_norm: 0.9999997837483436, iteration: 6690
loss: 1.0879348516464233,grad_norm: 0.999999590702124, iteration: 6691
loss: 1.1028317213058472,grad_norm: 0.9999995832621046, iteration: 6692
loss: 1.09397292137146,grad_norm: 0.9999996084142767, iteration: 6693
loss: 1.109525203704834,grad_norm: 0.9999996478103778, iteration: 6694
loss: 1.1020106077194214,grad_norm: 0.999999601507853, iteration: 6695
loss: 1.086990475654602,grad_norm: 0.9999995827626672, iteration: 6696
loss: 1.0737062692642212,grad_norm: 0.9999995954082413, iteration: 6697
loss: 1.0455594062805176,grad_norm: 0.9999995686717288, iteration: 6698
loss: 1.0864399671554565,grad_norm: 0.9999997046649561, iteration: 6699
loss: 1.0334091186523438,grad_norm: 0.99999956080744, iteration: 6700
loss: 1.139151930809021,grad_norm: 0.999999714592205, iteration: 6701
loss: 1.104770302772522,grad_norm: 0.9999994163854258, iteration: 6702
loss: 1.0429359674453735,grad_norm: 0.9999995532588128, iteration: 6703
loss: 1.0655105113983154,grad_norm: 0.9999997784146182, iteration: 6704
loss: 1.0649644136428833,grad_norm: 0.9999994650458868, iteration: 6705
loss: 1.0956335067749023,grad_norm: 0.999999621197416, iteration: 6706
loss: 1.1170672178268433,grad_norm: 0.9999994274776589, iteration: 6707
loss: 1.0526453256607056,grad_norm: 0.9999995890862385, iteration: 6708
loss: 1.0342235565185547,grad_norm: 0.9999994230143804, iteration: 6709
loss: 1.0402597188949585,grad_norm: 0.9999995211223952, iteration: 6710
loss: 1.0614573955535889,grad_norm: 0.9999996303391504, iteration: 6711
loss: 1.1235533952713013,grad_norm: 0.9999996520668981, iteration: 6712
loss: 1.0935511589050293,grad_norm: 0.9999995247640344, iteration: 6713
loss: 1.073171615600586,grad_norm: 0.9999995141148846, iteration: 6714
loss: 1.095105767250061,grad_norm: 0.9999997262214779, iteration: 6715
loss: 1.0916078090667725,grad_norm: 0.9999995931845854, iteration: 6716
loss: 1.0151602029800415,grad_norm: 0.9999994427567551, iteration: 6717
loss: 1.0567116737365723,grad_norm: 0.9999995623630644, iteration: 6718
loss: 1.0723724365234375,grad_norm: 0.9999995304116862, iteration: 6719
loss: 1.1061145067214966,grad_norm: 0.9999995648440719, iteration: 6720
loss: 1.0740028619766235,grad_norm: 0.9999994279664223, iteration: 6721
loss: 1.0252398252487183,grad_norm: 0.9999994976570257, iteration: 6722
loss: 1.1044672727584839,grad_norm: 0.9999996870012732, iteration: 6723
loss: 1.0745301246643066,grad_norm: 0.9999995813523677, iteration: 6724
loss: 1.133030891418457,grad_norm: 0.999999639040865, iteration: 6725
loss: 1.0588902235031128,grad_norm: 0.9999997092062185, iteration: 6726
loss: 1.0562193393707275,grad_norm: 0.9999994750123768, iteration: 6727
loss: 1.149892807006836,grad_norm: 0.9999997520895814, iteration: 6728
loss: 1.0522589683532715,grad_norm: 0.9999995001385258, iteration: 6729
loss: 1.0693219900131226,grad_norm: 0.9999996618817795, iteration: 6730
loss: 1.0985431671142578,grad_norm: 0.9999996391948008, iteration: 6731
loss: 1.0667146444320679,grad_norm: 0.999999351896468, iteration: 6732
loss: 1.086564540863037,grad_norm: 0.999999585612781, iteration: 6733
loss: 1.0541883707046509,grad_norm: 0.9999996384533468, iteration: 6734
loss: 1.1431986093521118,grad_norm: 0.9999998532571003, iteration: 6735
loss: 1.0221014022827148,grad_norm: 0.999999480834712, iteration: 6736
loss: 1.0226409435272217,grad_norm: 0.9999993698116518, iteration: 6737
loss: 1.0885876417160034,grad_norm: 0.9999994335135958, iteration: 6738
loss: 1.1037471294403076,grad_norm: 0.9999996423633699, iteration: 6739
loss: 1.043182373046875,grad_norm: 0.9999996311666173, iteration: 6740
loss: 1.0895096063613892,grad_norm: 0.9999996708691086, iteration: 6741
loss: 1.0869799852371216,grad_norm: 0.9999996379278991, iteration: 6742
loss: 1.1068243980407715,grad_norm: 0.999999615799602, iteration: 6743
loss: 1.0863038301467896,grad_norm: 0.9999998107270031, iteration: 6744
loss: 1.0245134830474854,grad_norm: 0.9999996573842518, iteration: 6745
loss: 1.1858909130096436,grad_norm: 0.9999997780109604, iteration: 6746
loss: 1.0089683532714844,grad_norm: 0.9999994869562414, iteration: 6747
loss: 1.0802733898162842,grad_norm: 0.9999996994519114, iteration: 6748
loss: 1.0855836868286133,grad_norm: 0.9999995505701341, iteration: 6749
loss: 1.0646415948867798,grad_norm: 0.9999995346894147, iteration: 6750
loss: 1.089722752571106,grad_norm: 0.9999994713435292, iteration: 6751
loss: 1.0787118673324585,grad_norm: 0.9999995400161621, iteration: 6752
loss: 1.0913403034210205,grad_norm: 0.9999995581367283, iteration: 6753
loss: 1.1263514757156372,grad_norm: 0.999999665105029, iteration: 6754
loss: 1.054627776145935,grad_norm: 0.9999994519029951, iteration: 6755
loss: 1.0636281967163086,grad_norm: 0.9999994127138118, iteration: 6756
loss: 1.105156660079956,grad_norm: 0.9999996070445749, iteration: 6757
loss: 1.0663566589355469,grad_norm: 0.9999994552783026, iteration: 6758
loss: 1.092673420906067,grad_norm: 0.9999996347376875, iteration: 6759
loss: 1.05828857421875,grad_norm: 0.9999994718122959, iteration: 6760
loss: 1.0405877828598022,grad_norm: 0.9999996145940397, iteration: 6761
loss: 1.084560513496399,grad_norm: 0.9999995296709755, iteration: 6762
loss: 1.0865681171417236,grad_norm: 0.9999997109333943, iteration: 6763
loss: 1.0879369974136353,grad_norm: 0.9999994934638502, iteration: 6764
loss: 1.06166410446167,grad_norm: 0.9999995164482693, iteration: 6765
loss: 1.084538459777832,grad_norm: 0.9999994586572687, iteration: 6766
loss: 1.0636812448501587,grad_norm: 0.9999996186853576, iteration: 6767
loss: 1.0452872514724731,grad_norm: 0.9999994708628334, iteration: 6768
loss: 1.0667271614074707,grad_norm: 0.999999442519302, iteration: 6769
loss: 1.1157770156860352,grad_norm: 0.9999996627540054, iteration: 6770
loss: 1.0659765005111694,grad_norm: 0.9999995810506181, iteration: 6771
loss: 1.0572575330734253,grad_norm: 0.9999994280867782, iteration: 6772
loss: 1.11839759349823,grad_norm: 0.9999997121708591, iteration: 6773
loss: 1.041327714920044,grad_norm: 0.9999993061331571, iteration: 6774
loss: 1.0881800651550293,grad_norm: 0.9999994907646309, iteration: 6775
loss: 1.1001662015914917,grad_norm: 0.9999996414125598, iteration: 6776
loss: 1.0013017654418945,grad_norm: 0.9999994641737507, iteration: 6777
loss: 1.0851726531982422,grad_norm: 0.9999996601132197, iteration: 6778
loss: 1.080700159072876,grad_norm: 0.9999996904403873, iteration: 6779
loss: 1.0471824407577515,grad_norm: 0.9999995434709712, iteration: 6780
loss: 1.0957905054092407,grad_norm: 0.9999996209707871, iteration: 6781
loss: 1.1312841176986694,grad_norm: 0.9999996089744683, iteration: 6782
loss: 1.089395523071289,grad_norm: 0.9999995706564454, iteration: 6783
loss: 1.0483946800231934,grad_norm: 0.9999996595901075, iteration: 6784
loss: 1.0420172214508057,grad_norm: 0.9999994803755353, iteration: 6785
loss: 1.0925968885421753,grad_norm: 0.999999490538303, iteration: 6786
loss: 1.0621761083602905,grad_norm: 0.9999995852563844, iteration: 6787
loss: 1.1254223585128784,grad_norm: 0.9999996858470132, iteration: 6788
loss: 1.0528169870376587,grad_norm: 0.9999994949879815, iteration: 6789
loss: 1.1562820672988892,grad_norm: 0.9999996339064411, iteration: 6790
loss: 1.0828356742858887,grad_norm: 0.999999640600356, iteration: 6791
loss: 1.0914831161499023,grad_norm: 0.9999996058776107, iteration: 6792
loss: 1.0904477834701538,grad_norm: 0.9999996739237061, iteration: 6793
loss: 0.9943848252296448,grad_norm: 0.9999995006308066, iteration: 6794
loss: 1.0717222690582275,grad_norm: 0.9999994389486394, iteration: 6795
loss: 1.1421245336532593,grad_norm: 0.9999997038311363, iteration: 6796
loss: 1.092423915863037,grad_norm: 0.9999994603261273, iteration: 6797
loss: 1.0842665433883667,grad_norm: 0.9999995719957028, iteration: 6798
loss: 1.107346773147583,grad_norm: 0.9999995719352023, iteration: 6799
loss: 1.086592435836792,grad_norm: 0.9999995008621807, iteration: 6800
loss: 1.0947744846343994,grad_norm: 0.9999997888252691, iteration: 6801
loss: 1.0917538404464722,grad_norm: 0.9999994606885189, iteration: 6802
loss: 1.0809522867202759,grad_norm: 0.9999995832905284, iteration: 6803
loss: 1.1439013481140137,grad_norm: 0.9999996673275754, iteration: 6804
loss: 1.0367923974990845,grad_norm: 0.9999995658916636, iteration: 6805
loss: 1.0618700981140137,grad_norm: 0.9999995875743467, iteration: 6806
loss: 1.1060842275619507,grad_norm: 0.9999996057103814, iteration: 6807
loss: 1.0832288265228271,grad_norm: 0.9999994997994478, iteration: 6808
loss: 1.0793062448501587,grad_norm: 0.9999994024737036, iteration: 6809
loss: 1.0609989166259766,grad_norm: 0.999999535248891, iteration: 6810
loss: 1.139298915863037,grad_norm: 0.9999996201853601, iteration: 6811
loss: 1.0715044736862183,grad_norm: 0.9999995117769485, iteration: 6812
loss: 1.102419137954712,grad_norm: 0.9999996580495711, iteration: 6813
loss: 1.086135745048523,grad_norm: 0.999999590715524, iteration: 6814
loss: 1.0650964975357056,grad_norm: 0.9999995864079054, iteration: 6815
loss: 1.1013118028640747,grad_norm: 0.9999995269800696, iteration: 6816
loss: 1.068169116973877,grad_norm: 0.9999996260117503, iteration: 6817
loss: 1.032943844795227,grad_norm: 0.9999994727891708, iteration: 6818
loss: 1.0769246816635132,grad_norm: 0.9999996402295158, iteration: 6819
loss: 1.082196593284607,grad_norm: 0.9999995335888032, iteration: 6820
loss: 1.1199488639831543,grad_norm: 0.9999997733069685, iteration: 6821
loss: 1.0901267528533936,grad_norm: 0.9999994545520371, iteration: 6822
loss: 1.0383822917938232,grad_norm: 0.9999995436493481, iteration: 6823
loss: 1.0471044778823853,grad_norm: 0.999999319283279, iteration: 6824
loss: 1.1164315938949585,grad_norm: 0.9999997634231496, iteration: 6825
loss: 1.0743781328201294,grad_norm: 0.9999995762288993, iteration: 6826
loss: 1.0542629957199097,grad_norm: 0.999999531542292, iteration: 6827
loss: 1.0669752359390259,grad_norm: 0.9999995360233239, iteration: 6828
loss: 1.0465917587280273,grad_norm: 0.9999994619317046, iteration: 6829
loss: 1.139164924621582,grad_norm: 0.9999996872168824, iteration: 6830
loss: 1.0526427030563354,grad_norm: 0.9999995463826193, iteration: 6831
loss: 1.0601675510406494,grad_norm: 0.9999998245701986, iteration: 6832
loss: 1.0548850297927856,grad_norm: 0.9999995594074271, iteration: 6833
loss: 1.0705020427703857,grad_norm: 0.9999996329279676, iteration: 6834
loss: 1.1377302408218384,grad_norm: 0.9999997306590312, iteration: 6835
loss: 1.079304575920105,grad_norm: 0.999999529955382, iteration: 6836
loss: 1.1549159288406372,grad_norm: 0.9999996014545756, iteration: 6837
loss: 1.050621509552002,grad_norm: 0.999999488931692, iteration: 6838
loss: 1.1207019090652466,grad_norm: 0.9999996532575847, iteration: 6839
loss: 1.0849113464355469,grad_norm: 0.999999545310306, iteration: 6840
loss: 1.0451310873031616,grad_norm: 0.9999995170512596, iteration: 6841
loss: 1.0762947797775269,grad_norm: 0.9999994419139105, iteration: 6842
loss: 1.083673119544983,grad_norm: 0.999999584531764, iteration: 6843
loss: 1.0656180381774902,grad_norm: 0.9999995783014979, iteration: 6844
loss: 1.074173927307129,grad_norm: 0.9999996705695063, iteration: 6845
loss: 1.0207592248916626,grad_norm: 0.9999995199430258, iteration: 6846
loss: 1.061653733253479,grad_norm: 0.9999995938192809, iteration: 6847
loss: 1.0722556114196777,grad_norm: 0.999999586403393, iteration: 6848
loss: 1.0760844945907593,grad_norm: 0.99999962657436, iteration: 6849
loss: 1.1295394897460938,grad_norm: 0.9999997671344368, iteration: 6850
loss: 1.0613197088241577,grad_norm: 0.9999994032327241, iteration: 6851
loss: 1.0866235494613647,grad_norm: 0.9999994830104738, iteration: 6852
loss: 1.0670931339263916,grad_norm: 0.9999996139938447, iteration: 6853
loss: 1.108777642250061,grad_norm: 0.9999994383905788, iteration: 6854
loss: 1.0456417798995972,grad_norm: 0.999999490943453, iteration: 6855
loss: 1.0864992141723633,grad_norm: 0.9999995906824604, iteration: 6856
loss: 1.0925627946853638,grad_norm: 0.9999995743496014, iteration: 6857
loss: 1.0730746984481812,grad_norm: 0.9999994811441173, iteration: 6858
loss: 1.097886562347412,grad_norm: 0.9999994652419164, iteration: 6859
loss: 1.0779184103012085,grad_norm: 0.9999995367408265, iteration: 6860
loss: 1.120504379272461,grad_norm: 0.9999995770514352, iteration: 6861
loss: 1.0743962526321411,grad_norm: 0.999999645161581, iteration: 6862
loss: 1.0341122150421143,grad_norm: 0.999999743650283, iteration: 6863
loss: 1.1003704071044922,grad_norm: 0.9999995510081854, iteration: 6864
loss: 1.0699506998062134,grad_norm: 0.9999993988547186, iteration: 6865
loss: 1.0651575326919556,grad_norm: 0.9999995815951627, iteration: 6866
loss: 1.0077221393585205,grad_norm: 0.9999994522497033, iteration: 6867
loss: 1.0632013082504272,grad_norm: 0.9999995285484683, iteration: 6868
loss: 1.1213958263397217,grad_norm: 0.999999699917681, iteration: 6869
loss: 1.0591036081314087,grad_norm: 0.9999996473973393, iteration: 6870
loss: 1.1257802248001099,grad_norm: 0.9999996207689525, iteration: 6871
loss: 1.049674391746521,grad_norm: 0.9999995704118688, iteration: 6872
loss: 1.0387388467788696,grad_norm: 0.9999993790765961, iteration: 6873
loss: 1.0473567247390747,grad_norm: 0.9999996824980181, iteration: 6874
loss: 1.0947723388671875,grad_norm: 0.9999997046411511, iteration: 6875
loss: 1.106399416923523,grad_norm: 0.9999996167805951, iteration: 6876
loss: 0.9678114056587219,grad_norm: 0.9999993397182128, iteration: 6877
loss: 1.0411280393600464,grad_norm: 0.9999994104050588, iteration: 6878
loss: 1.07672917842865,grad_norm: 0.9999996656316658, iteration: 6879
loss: 1.0505200624465942,grad_norm: 0.9999994702921641, iteration: 6880
loss: 1.1004126071929932,grad_norm: 0.9999995936755199, iteration: 6881
loss: 1.011872410774231,grad_norm: 0.9999994123508024, iteration: 6882
loss: 1.073684573173523,grad_norm: 0.9999995590677615, iteration: 6883
loss: 1.0999339818954468,grad_norm: 0.9999995664118525, iteration: 6884
loss: 1.0331069231033325,grad_norm: 0.9999993978905479, iteration: 6885
loss: 1.0475493669509888,grad_norm: 0.9999995027691505, iteration: 6886
loss: 1.117579460144043,grad_norm: 0.9999996913815953, iteration: 6887
loss: 1.0572890043258667,grad_norm: 0.9999994877033819, iteration: 6888
loss: 1.0541473627090454,grad_norm: 0.9999993863545421, iteration: 6889
loss: 1.0277959108352661,grad_norm: 0.9999993631291473, iteration: 6890
loss: 1.057198405265808,grad_norm: 0.9999994617810963, iteration: 6891
loss: 1.0809342861175537,grad_norm: 0.9999998487174607, iteration: 6892
loss: 1.0262370109558105,grad_norm: 0.9999995330400084, iteration: 6893
loss: 1.0507984161376953,grad_norm: 0.9999995369015268, iteration: 6894
loss: 1.0813496112823486,grad_norm: 0.999999577759572, iteration: 6895
loss: 1.0902965068817139,grad_norm: 0.9999992922002041, iteration: 6896
loss: 1.0315707921981812,grad_norm: 0.9999996324074056, iteration: 6897
loss: 1.154189109802246,grad_norm: 0.9999996643324677, iteration: 6898
loss: 1.1173378229141235,grad_norm: 0.9999997150832098, iteration: 6899
loss: 1.0779914855957031,grad_norm: 0.9999995900892523, iteration: 6900
loss: 1.0498703718185425,grad_norm: 0.9999994166536758, iteration: 6901
loss: 1.1490219831466675,grad_norm: 0.9999996918241256, iteration: 6902
loss: 1.0836400985717773,grad_norm: 0.9999996915238666, iteration: 6903
loss: 1.069859504699707,grad_norm: 0.9999994828609097, iteration: 6904
loss: 1.051706075668335,grad_norm: 0.9999994278112013, iteration: 6905
loss: 1.0476950407028198,grad_norm: 0.9999994025434992, iteration: 6906
loss: 1.0487961769104004,grad_norm: 0.9999995614109826, iteration: 6907
loss: 1.099924087524414,grad_norm: 0.9999994625299484, iteration: 6908
loss: 1.105607509613037,grad_norm: 0.9999994375647545, iteration: 6909
loss: 1.1028116941452026,grad_norm: 0.9999996066383994, iteration: 6910
loss: 1.1092514991760254,grad_norm: 0.9999995642576054, iteration: 6911
loss: 1.0772254467010498,grad_norm: 0.9999996005347569, iteration: 6912
loss: 1.0722565650939941,grad_norm: 0.9999995930297567, iteration: 6913
loss: 1.061574101448059,grad_norm: 0.9999993834828406, iteration: 6914
loss: 1.0497770309448242,grad_norm: 0.9999994251455958, iteration: 6915
loss: 1.0454444885253906,grad_norm: 0.9999995365245332, iteration: 6916
loss: 1.0289310216903687,grad_norm: 0.9999995062205542, iteration: 6917
loss: 1.039450764656067,grad_norm: 0.9999994760587952, iteration: 6918
loss: 1.097915768623352,grad_norm: 0.9999997455044436, iteration: 6919
loss: 1.066996455192566,grad_norm: 0.9999994572389657, iteration: 6920
loss: 1.1036489009857178,grad_norm: 0.9999996041724789, iteration: 6921
loss: 1.0551966428756714,grad_norm: 0.9999996819029466, iteration: 6922
loss: 1.0634143352508545,grad_norm: 0.9999995705255091, iteration: 6923
loss: 1.0725061893463135,grad_norm: 0.9999994239914202, iteration: 6924
loss: 1.116327166557312,grad_norm: 0.9999995679404637, iteration: 6925
loss: 1.0741620063781738,grad_norm: 0.9999997250479024, iteration: 6926
loss: 1.0529401302337646,grad_norm: 0.9999993848181841, iteration: 6927
loss: 1.0799185037612915,grad_norm: 0.9999994556714472, iteration: 6928
loss: 1.0535962581634521,grad_norm: 0.9999995529768131, iteration: 6929
loss: 1.1138479709625244,grad_norm: 0.9999996809195467, iteration: 6930
loss: 1.0716336965560913,grad_norm: 0.9999994905440213, iteration: 6931
loss: 1.0588880777359009,grad_norm: 0.9999996224064499, iteration: 6932
loss: 1.090301752090454,grad_norm: 0.9999994601310822, iteration: 6933
loss: 1.0019950866699219,grad_norm: 0.9999993311706439, iteration: 6934
loss: 1.1005419492721558,grad_norm: 0.999999726834133, iteration: 6935
loss: 1.1867880821228027,grad_norm: 0.9999997408962348, iteration: 6936
loss: 1.0490912199020386,grad_norm: 0.9999995651992126, iteration: 6937
loss: 1.1538517475128174,grad_norm: 0.9999997443678995, iteration: 6938
loss: 1.0487467050552368,grad_norm: 0.9999994404052228, iteration: 6939
loss: 1.0740201473236084,grad_norm: 0.9999996654469838, iteration: 6940
loss: 1.0537976026535034,grad_norm: 0.9999995938388816, iteration: 6941
loss: 1.0743674039840698,grad_norm: 0.9999995973716925, iteration: 6942
loss: 1.0183186531066895,grad_norm: 0.9999992994971365, iteration: 6943
loss: 1.0657535791397095,grad_norm: 0.9999996507839278, iteration: 6944
loss: 1.082579493522644,grad_norm: 0.99999938485721, iteration: 6945
loss: 1.084911584854126,grad_norm: 0.9999997251587426, iteration: 6946
loss: 1.0857000350952148,grad_norm: 0.9999995435223904, iteration: 6947
loss: 1.0965949296951294,grad_norm: 0.9999994844169685, iteration: 6948
loss: 1.070892095565796,grad_norm: 0.9999998106718813, iteration: 6949
loss: 1.0705690383911133,grad_norm: 0.9999996526668798, iteration: 6950
loss: 1.0517460107803345,grad_norm: 0.9999996051405277, iteration: 6951
loss: 1.0931178331375122,grad_norm: 0.9999996914475546, iteration: 6952
loss: 1.0329713821411133,grad_norm: 0.9999996036685825, iteration: 6953
loss: 1.1068603992462158,grad_norm: 0.999999713686038, iteration: 6954
loss: 1.054916501045227,grad_norm: 0.9999996356990152, iteration: 6955
loss: 1.0346314907073975,grad_norm: 0.9999995293731276, iteration: 6956
loss: 1.038540005683899,grad_norm: 0.9999995658312161, iteration: 6957
loss: 1.0785244703292847,grad_norm: 0.9999994714559504, iteration: 6958
loss: 1.0703269243240356,grad_norm: 0.9999996039586271, iteration: 6959
loss: 1.015092372894287,grad_norm: 0.9999994529205175, iteration: 6960
loss: 1.0338642597198486,grad_norm: 0.9999994268584502, iteration: 6961
loss: 1.055276870727539,grad_norm: 0.9999996704566853, iteration: 6962
loss: 1.1000088453292847,grad_norm: 0.9999995105528814, iteration: 6963
loss: 1.103009819984436,grad_norm: 0.9999995178215941, iteration: 6964
loss: 1.0698403120040894,grad_norm: 0.9999994878652892, iteration: 6965
loss: 1.0741584300994873,grad_norm: 0.9999995110600792, iteration: 6966
loss: 1.1006805896759033,grad_norm: 0.999999570828191, iteration: 6967
loss: 1.08592689037323,grad_norm: 0.9999994137214683, iteration: 6968
loss: 1.10261070728302,grad_norm: 0.9999996848531444, iteration: 6969
loss: 1.085652470588684,grad_norm: 0.9999995760385885, iteration: 6970
loss: 1.0630884170532227,grad_norm: 0.9999996402001131, iteration: 6971
loss: 1.0375341176986694,grad_norm: 0.9999993627890582, iteration: 6972
loss: 1.042283296585083,grad_norm: 0.9999995305123872, iteration: 6973
loss: 1.0804234743118286,grad_norm: 0.999999598708044, iteration: 6974
loss: 1.061616063117981,grad_norm: 0.9999994878569703, iteration: 6975
loss: 1.052291750907898,grad_norm: 0.9999994403065011, iteration: 6976
loss: 1.0594905614852905,grad_norm: 0.999999457284795, iteration: 6977
loss: 0.9804461598396301,grad_norm: 0.9999995018139751, iteration: 6978
loss: 1.0832935571670532,grad_norm: 0.9999995051921063, iteration: 6979
loss: 1.116574764251709,grad_norm: 0.9999996049184352, iteration: 6980
loss: 1.0884636640548706,grad_norm: 0.9999995873654582, iteration: 6981
loss: 1.0914428234100342,grad_norm: 0.9999995087742203, iteration: 6982
loss: 1.0658595561981201,grad_norm: 0.9999994449345162, iteration: 6983
loss: 1.0502455234527588,grad_norm: 0.9999994329616124, iteration: 6984
loss: 1.086858868598938,grad_norm: 0.9999994050165655, iteration: 6985
loss: 1.079869031906128,grad_norm: 0.9999995257127876, iteration: 6986
loss: 0.993898332118988,grad_norm: 0.9999993678789456, iteration: 6987
loss: 1.0826339721679688,grad_norm: 0.9999995651881706, iteration: 6988
loss: 1.0937641859054565,grad_norm: 0.9999997164226874, iteration: 6989
loss: 0.9758126139640808,grad_norm: 0.9999993076908441, iteration: 6990
loss: 1.075945258140564,grad_norm: 0.9999997157294778, iteration: 6991
loss: 1.0293943881988525,grad_norm: 0.9999995030443964, iteration: 6992
loss: 1.0925933122634888,grad_norm: 0.9999994984721817, iteration: 6993
loss: 1.1162844896316528,grad_norm: 0.999999580383407, iteration: 6994
loss: 1.0610630512237549,grad_norm: 0.9999996103272618, iteration: 6995
loss: 1.0432065725326538,grad_norm: 0.9999994990831156, iteration: 6996
loss: 1.012725591659546,grad_norm: 0.9999996275732844, iteration: 6997
loss: 1.0519596338272095,grad_norm: 0.999999377509882, iteration: 6998
loss: 1.108137607574463,grad_norm: 0.9999997666039626, iteration: 6999
loss: 1.0959721803665161,grad_norm: 0.9999994944137212, iteration: 7000
loss: 1.1285842657089233,grad_norm: 0.9999995678344116, iteration: 7001
loss: 1.0709922313690186,grad_norm: 0.9999997085178618, iteration: 7002
loss: 1.0545520782470703,grad_norm: 0.9999993661522327, iteration: 7003
loss: 1.1459649801254272,grad_norm: 0.9999997961929575, iteration: 7004
loss: 1.0709501504898071,grad_norm: 0.9999994531352729, iteration: 7005
loss: 1.0789252519607544,grad_norm: 0.9999993831881357, iteration: 7006
loss: 1.0770037174224854,grad_norm: 0.9999995404406591, iteration: 7007
loss: 1.0790783166885376,grad_norm: 0.9999994218911258, iteration: 7008
loss: 1.0333702564239502,grad_norm: 0.9999993657630732, iteration: 7009
loss: 1.0684007406234741,grad_norm: 0.9999995979112073, iteration: 7010
loss: 1.1367063522338867,grad_norm: 0.9999995925078928, iteration: 7011
loss: 1.0677381753921509,grad_norm: 0.9999996510907292, iteration: 7012
loss: 1.0795965194702148,grad_norm: 0.9999994150036431, iteration: 7013
loss: 1.0054845809936523,grad_norm: 0.9999995520010035, iteration: 7014
loss: 1.116827130317688,grad_norm: 0.9999995870376732, iteration: 7015
loss: 1.108095407485962,grad_norm: 0.9999995564562798, iteration: 7016
loss: 0.988768994808197,grad_norm: 0.999999425369058, iteration: 7017
loss: 1.0316985845565796,grad_norm: 0.9999994300216637, iteration: 7018
loss: 1.085750937461853,grad_norm: 0.9999996364998484, iteration: 7019
loss: 1.113772988319397,grad_norm: 0.9999994301123682, iteration: 7020
loss: 1.1295217275619507,grad_norm: 0.9999996822505131, iteration: 7021
loss: 1.0163536071777344,grad_norm: 0.9999993784154575, iteration: 7022
loss: 1.1088316440582275,grad_norm: 0.9999996108062325, iteration: 7023
loss: 1.1130510568618774,grad_norm: 0.9999995336007682, iteration: 7024
loss: 1.0640449523925781,grad_norm: 0.9999994381823415, iteration: 7025
loss: 1.0943517684936523,grad_norm: 0.9999994917591848, iteration: 7026
loss: 1.0773694515228271,grad_norm: 0.9999997234192076, iteration: 7027
loss: 1.100026249885559,grad_norm: 0.9999997384471705, iteration: 7028
loss: 1.107256531715393,grad_norm: 0.9999997990266903, iteration: 7029
loss: 1.030440092086792,grad_norm: 0.9999994677096328, iteration: 7030
loss: 1.0310797691345215,grad_norm: 0.9999994894099395, iteration: 7031
loss: 1.162804126739502,grad_norm: 0.9999996704405919, iteration: 7032
loss: 1.0671679973602295,grad_norm: 0.9999995740246879, iteration: 7033
loss: 1.0471391677856445,grad_norm: 0.9999994939406243, iteration: 7034
loss: 1.0787694454193115,grad_norm: 0.999999558382622, iteration: 7035
loss: 1.1582348346710205,grad_norm: 0.9999996594240769, iteration: 7036
loss: 1.0922685861587524,grad_norm: 0.9999995843354851, iteration: 7037
loss: 1.0572761297225952,grad_norm: 0.9999993089132042, iteration: 7038
loss: 1.0752861499786377,grad_norm: 0.9999996071909445, iteration: 7039
loss: 1.0315972566604614,grad_norm: 0.9999994753814911, iteration: 7040
loss: 1.129109263420105,grad_norm: 0.99999966372923, iteration: 7041
loss: 1.0421568155288696,grad_norm: 0.9999993358861401, iteration: 7042
loss: 1.0582412481307983,grad_norm: 0.999999391618618, iteration: 7043
loss: 1.1419925689697266,grad_norm: 0.9999997204074285, iteration: 7044
loss: 1.0690655708312988,grad_norm: 0.9999995264836142, iteration: 7045
loss: 0.9726554751396179,grad_norm: 0.9999993243530505, iteration: 7046
loss: 1.0902538299560547,grad_norm: 0.999999328634289, iteration: 7047
loss: 1.0513694286346436,grad_norm: 0.9999998937447493, iteration: 7048
loss: 1.0541613101959229,grad_norm: 0.9999994822793682, iteration: 7049
loss: 1.0226402282714844,grad_norm: 0.9999996687418327, iteration: 7050
loss: 1.1249792575836182,grad_norm: 0.9999996259936909, iteration: 7051
loss: 1.075873613357544,grad_norm: 0.9999995736560108, iteration: 7052
loss: 1.0539158582687378,grad_norm: 0.9999994040080701, iteration: 7053
loss: 1.0923795700073242,grad_norm: 0.999999362370485, iteration: 7054
loss: 1.0496330261230469,grad_norm: 0.9999995843663423, iteration: 7055
loss: 1.0194464921951294,grad_norm: 0.999999481876059, iteration: 7056
loss: 1.0410124063491821,grad_norm: 0.999999690943846, iteration: 7057
loss: 1.0973621606826782,grad_norm: 0.9999996974486524, iteration: 7058
loss: 1.0590218305587769,grad_norm: 0.9999995632323412, iteration: 7059
loss: 1.104786992073059,grad_norm: 0.9999995850744711, iteration: 7060
loss: 1.064271092414856,grad_norm: 0.9999993749523912, iteration: 7061
loss: 1.0967320203781128,grad_norm: 0.9999995415011022, iteration: 7062
loss: 1.0769997835159302,grad_norm: 0.9999996234782651, iteration: 7063
loss: 1.030601978302002,grad_norm: 0.9999994384541557, iteration: 7064
loss: 1.0334006547927856,grad_norm: 0.9999995799486538, iteration: 7065
loss: 1.0947470664978027,grad_norm: 0.9999996064014517, iteration: 7066
loss: 1.0560778379440308,grad_norm: 0.999999602187727, iteration: 7067
loss: 1.0623573064804077,grad_norm: 0.999999480498754, iteration: 7068
loss: 1.0906953811645508,grad_norm: 0.9999995233576632, iteration: 7069
loss: 1.0996739864349365,grad_norm: 0.9999996950596007, iteration: 7070
loss: 1.0782850980758667,grad_norm: 0.9999995445101514, iteration: 7071
loss: 1.1020336151123047,grad_norm: 0.9999996560313247, iteration: 7072
loss: 1.0442814826965332,grad_norm: 0.9999994446939444, iteration: 7073
loss: 1.0974663496017456,grad_norm: 0.9999996871801614, iteration: 7074
loss: 1.1216992139816284,grad_norm: 0.9999995172506692, iteration: 7075
loss: 1.074316382408142,grad_norm: 0.9999994189291228, iteration: 7076
loss: 1.0935161113739014,grad_norm: 0.9999997157799325, iteration: 7077
loss: 1.1154240369796753,grad_norm: 0.999999637329272, iteration: 7078
loss: 1.0900464057922363,grad_norm: 0.9999996502710549, iteration: 7079
loss: 1.102423906326294,grad_norm: 0.9999995676506159, iteration: 7080
loss: 1.063025951385498,grad_norm: 0.9999994126936721, iteration: 7081
loss: 1.0622678995132446,grad_norm: 0.9999994174135456, iteration: 7082
loss: 1.0715886354446411,grad_norm: 0.9999995833833215, iteration: 7083
loss: 1.0274027585983276,grad_norm: 0.9999993452950441, iteration: 7084
loss: 1.0848321914672852,grad_norm: 0.9999994740128938, iteration: 7085
loss: 1.0663148164749146,grad_norm: 0.9999994701842252, iteration: 7086
loss: 1.0978857278823853,grad_norm: 0.999999673930482, iteration: 7087
loss: 1.0643893480300903,grad_norm: 0.9999994773076166, iteration: 7088
loss: 1.037123680114746,grad_norm: 0.9999994391673088, iteration: 7089
loss: 1.0707882642745972,grad_norm: 0.9999994837939552, iteration: 7090
loss: 1.1628470420837402,grad_norm: 0.9999997904132308, iteration: 7091
loss: 1.1272908449172974,grad_norm: 0.9999994513832855, iteration: 7092
loss: 1.0382689237594604,grad_norm: 0.9999994594531086, iteration: 7093
loss: 1.01704740524292,grad_norm: 0.9999993811647252, iteration: 7094
loss: 1.04851496219635,grad_norm: 0.999999533871998, iteration: 7095
loss: 1.0194008350372314,grad_norm: 0.9999995874111864, iteration: 7096
loss: 1.0772161483764648,grad_norm: 0.9999994536576768, iteration: 7097
loss: 1.0652529001235962,grad_norm: 0.9999993949532535, iteration: 7098
loss: 1.1288721561431885,grad_norm: 0.9999995400184887, iteration: 7099
loss: 1.0753765106201172,grad_norm: 0.9999996491622432, iteration: 7100
loss: 1.095712423324585,grad_norm: 0.9999995903024184, iteration: 7101
loss: 1.0494170188903809,grad_norm: 0.999999551064207, iteration: 7102
loss: 0.9794884920120239,grad_norm: 0.99999944968059, iteration: 7103
loss: 1.0581365823745728,grad_norm: 0.9999993667590162, iteration: 7104
loss: 1.0311135053634644,grad_norm: 0.9999995100141517, iteration: 7105
loss: 1.0676552057266235,grad_norm: 0.9999995614744214, iteration: 7106
loss: 1.0231623649597168,grad_norm: 0.9999993489490226, iteration: 7107
loss: 1.0634175539016724,grad_norm: 0.999999435084534, iteration: 7108
loss: 1.0772132873535156,grad_norm: 0.9999994791836454, iteration: 7109
loss: 1.0662274360656738,grad_norm: 0.9999993808184601, iteration: 7110
loss: 1.0519462823867798,grad_norm: 0.9999996200600256, iteration: 7111
loss: 1.0703822374343872,grad_norm: 0.9999994851517922, iteration: 7112
loss: 1.0629271268844604,grad_norm: 0.9999994719490743, iteration: 7113
loss: 1.0567214488983154,grad_norm: 0.9999996085756486, iteration: 7114
loss: 0.992926836013794,grad_norm: 0.9999993947721635, iteration: 7115
loss: 1.065558910369873,grad_norm: 0.9999993664261823, iteration: 7116
loss: 1.077134132385254,grad_norm: 0.9999994706752733, iteration: 7117
loss: 1.0503522157669067,grad_norm: 0.9999995537363064, iteration: 7118
loss: 0.9946310520172119,grad_norm: 0.9999995899858236, iteration: 7119
loss: 1.102165699005127,grad_norm: 0.9999995759000243, iteration: 7120
loss: 0.9753627777099609,grad_norm: 0.9999993921035857, iteration: 7121
loss: 1.1430189609527588,grad_norm: 0.9999997444576343, iteration: 7122
loss: 1.0864249467849731,grad_norm: 0.9999993907910262, iteration: 7123
loss: 1.0552122592926025,grad_norm: 0.999999497166588, iteration: 7124
loss: 1.037305235862732,grad_norm: 0.9999995293993696, iteration: 7125
loss: 1.0537313222885132,grad_norm: 0.999999509032447, iteration: 7126
loss: 1.1091545820236206,grad_norm: 0.99999970243145, iteration: 7127
loss: 1.078873872756958,grad_norm: 0.9999995435194342, iteration: 7128
loss: 1.085601806640625,grad_norm: 0.9999994740244793, iteration: 7129
loss: 1.0255730152130127,grad_norm: 0.9999994124226135, iteration: 7130
loss: 1.0803067684173584,grad_norm: 0.9999995195248634, iteration: 7131
loss: 1.0558303594589233,grad_norm: 0.9999993847002222, iteration: 7132
loss: 1.0056601762771606,grad_norm: 0.9999993509772634, iteration: 7133
loss: 1.1233656406402588,grad_norm: 0.9999996494730798, iteration: 7134
loss: 1.0647656917572021,grad_norm: 0.9999994509809947, iteration: 7135
loss: 1.0598384141921997,grad_norm: 0.9999994178697815, iteration: 7136
loss: 1.0376707315444946,grad_norm: 0.9999996125926869, iteration: 7137
loss: 1.0319234132766724,grad_norm: 0.9999994196620767, iteration: 7138
loss: 1.0311458110809326,grad_norm: 0.9999994970066164, iteration: 7139
loss: 1.0955966711044312,grad_norm: 0.9999995694363806, iteration: 7140
loss: 1.0419645309448242,grad_norm: 0.9999995304495698, iteration: 7141
loss: 1.0728214979171753,grad_norm: 0.9999995373994074, iteration: 7142
loss: 1.0246978998184204,grad_norm: 0.9999995221702089, iteration: 7143
loss: 1.015974521636963,grad_norm: 0.9999994988702946, iteration: 7144
loss: 1.0863940715789795,grad_norm: 0.9999998019073578, iteration: 7145
loss: 1.0949839353561401,grad_norm: 0.9999996150254759, iteration: 7146
loss: 1.0640588998794556,grad_norm: 0.9999995075421133, iteration: 7147
loss: 1.0761717557907104,grad_norm: 0.9999995797639618, iteration: 7148
loss: 1.0484189987182617,grad_norm: 0.9999995896899146, iteration: 7149
loss: 1.1419408321380615,grad_norm: 0.9999995162389809, iteration: 7150
loss: 1.1090317964553833,grad_norm: 0.999999720228554, iteration: 7151
loss: 1.0859171152114868,grad_norm: 0.9999995727879434, iteration: 7152
loss: 1.0362279415130615,grad_norm: 0.9999996088188157, iteration: 7153
loss: 1.0665044784545898,grad_norm: 0.9999995122060769, iteration: 7154
loss: 1.0328654050827026,grad_norm: 0.9999994828724214, iteration: 7155
loss: 1.070778727531433,grad_norm: 0.999999579005104, iteration: 7156
loss: 1.0427309274673462,grad_norm: 0.9999997350673224, iteration: 7157
loss: 1.1210732460021973,grad_norm: 0.9999996695381576, iteration: 7158
loss: 1.092930793762207,grad_norm: 0.9999995720993264, iteration: 7159
loss: 1.0648976564407349,grad_norm: 0.9999996022568594, iteration: 7160
loss: 1.0590685606002808,grad_norm: 0.9999994396732463, iteration: 7161
loss: 1.0798527002334595,grad_norm: 0.9999994695690337, iteration: 7162
loss: 1.0631194114685059,grad_norm: 0.9999996172807687, iteration: 7163
loss: 1.1158326864242554,grad_norm: 0.999999710764375, iteration: 7164
loss: 1.0858478546142578,grad_norm: 0.9999995312492466, iteration: 7165
loss: 1.0715919733047485,grad_norm: 0.9999995912343231, iteration: 7166
loss: 1.081631064414978,grad_norm: 0.999999574529354, iteration: 7167
loss: 1.0812710523605347,grad_norm: 0.9999996324219099, iteration: 7168
loss: 1.0226556062698364,grad_norm: 0.9999996145000872, iteration: 7169
loss: 1.0506372451782227,grad_norm: 0.9999996461259145, iteration: 7170
loss: 1.02727210521698,grad_norm: 0.9999994044738171, iteration: 7171
loss: 1.1116297245025635,grad_norm: 0.9999997555047012, iteration: 7172
loss: 1.10953950881958,grad_norm: 0.9999994995436234, iteration: 7173
loss: 1.049682378768921,grad_norm: 0.9999994359314949, iteration: 7174
loss: 1.0781233310699463,grad_norm: 0.9999996442251746, iteration: 7175
loss: 1.0124740600585938,grad_norm: 0.9999994854870174, iteration: 7176
loss: 1.093532681465149,grad_norm: 0.9999997411737135, iteration: 7177
loss: 1.0662394762039185,grad_norm: 0.9999996053008904, iteration: 7178
loss: 1.1332663297653198,grad_norm: 0.9999997057728219, iteration: 7179
loss: 1.0964813232421875,grad_norm: 0.9999995794939788, iteration: 7180
loss: 1.0960257053375244,grad_norm: 0.999999678793234, iteration: 7181
loss: 1.0628947019577026,grad_norm: 0.9999996929116066, iteration: 7182
loss: 1.0464671850204468,grad_norm: 0.9999995008514373, iteration: 7183
loss: 1.0376421213150024,grad_norm: 0.9999993449709109, iteration: 7184
loss: 1.0250821113586426,grad_norm: 0.9999995414392255, iteration: 7185
loss: 1.127418041229248,grad_norm: 0.9999994955865765, iteration: 7186
loss: 1.0582327842712402,grad_norm: 0.9999994023334223, iteration: 7187
loss: 1.0651129484176636,grad_norm: 0.9999994371914436, iteration: 7188
loss: 1.057356595993042,grad_norm: 0.9999995442437382, iteration: 7189
loss: 1.0533411502838135,grad_norm: 0.9999994286044921, iteration: 7190
loss: 1.1051123142242432,grad_norm: 0.9999997050349665, iteration: 7191
loss: 1.14007568359375,grad_norm: 0.9999996597649695, iteration: 7192
loss: 1.028000831604004,grad_norm: 0.999999524529564, iteration: 7193
loss: 1.1040366888046265,grad_norm: 0.9999997203926407, iteration: 7194
loss: 1.0752809047698975,grad_norm: 0.9999996785384163, iteration: 7195
loss: 1.0255626440048218,grad_norm: 0.9999994524398185, iteration: 7196
loss: 1.0478391647338867,grad_norm: 0.999999444977531, iteration: 7197
loss: 1.0832422971725464,grad_norm: 0.9999996953164806, iteration: 7198
loss: 1.0730739831924438,grad_norm: 0.9999996583120428, iteration: 7199
loss: 1.0903584957122803,grad_norm: 0.9999994462682722, iteration: 7200
loss: 1.1362181901931763,grad_norm: 0.9999995272679875, iteration: 7201
loss: 1.064329743385315,grad_norm: 0.9999994559208666, iteration: 7202
loss: 1.096055030822754,grad_norm: 0.9999994542460175, iteration: 7203
loss: 1.1300230026245117,grad_norm: 0.9999995878115246, iteration: 7204
loss: 1.0865050554275513,grad_norm: 0.9999997208449495, iteration: 7205
loss: 1.080816388130188,grad_norm: 0.9999994197217806, iteration: 7206
loss: 1.007521629333496,grad_norm: 0.9999998193557443, iteration: 7207
loss: 1.0865405797958374,grad_norm: 0.9999995872926865, iteration: 7208
loss: 1.077701210975647,grad_norm: 0.9999996384952351, iteration: 7209
loss: 1.0538249015808105,grad_norm: 0.9999996672702098, iteration: 7210
loss: 1.0966342687606812,grad_norm: 0.9999995349077095, iteration: 7211
loss: 1.1146681308746338,grad_norm: 0.9999994958355395, iteration: 7212
loss: 1.0483360290527344,grad_norm: 0.9999995561722531, iteration: 7213
loss: 1.0775010585784912,grad_norm: 0.9999994469847829, iteration: 7214
loss: 1.0786360502243042,grad_norm: 0.9999995153500033, iteration: 7215
loss: 1.076816201210022,grad_norm: 0.9999992805702052, iteration: 7216
loss: 1.1133500337600708,grad_norm: 0.9999996661036934, iteration: 7217
loss: 1.0781300067901611,grad_norm: 0.9999993671182053, iteration: 7218
loss: 1.1106764078140259,grad_norm: 0.9999997411826659, iteration: 7219
loss: 1.021049976348877,grad_norm: 0.9999994260383412, iteration: 7220
loss: 1.0743746757507324,grad_norm: 0.9999994770407306, iteration: 7221
loss: 1.0717371702194214,grad_norm: 0.9999996316056357, iteration: 7222
loss: 1.1052570343017578,grad_norm: 0.9999995622507833, iteration: 7223
loss: 1.0187636613845825,grad_norm: 0.9999993624367449, iteration: 7224
loss: 1.0934090614318848,grad_norm: 0.9999995351972473, iteration: 7225
loss: 1.1032814979553223,grad_norm: 0.9999996454701348, iteration: 7226
loss: 1.0937820672988892,grad_norm: 0.9999995117481051, iteration: 7227
loss: 1.052791953086853,grad_norm: 0.9999993766134332, iteration: 7228
loss: 1.0138969421386719,grad_norm: 0.9999994528538959, iteration: 7229
loss: 1.1077642440795898,grad_norm: 0.9999993228990819, iteration: 7230
loss: 1.1256966590881348,grad_norm: 0.9999996234030609, iteration: 7231
loss: 1.0671786069869995,grad_norm: 0.9999996604306015, iteration: 7232
loss: 1.0624364614486694,grad_norm: 0.9999994419576277, iteration: 7233
loss: 1.059230089187622,grad_norm: 0.9999993974277621, iteration: 7234
loss: 1.0702855587005615,grad_norm: 0.9999994398616067, iteration: 7235
loss: 1.1482163667678833,grad_norm: 0.9999996657251508, iteration: 7236
loss: 1.0324335098266602,grad_norm: 0.9999995173193769, iteration: 7237
loss: 1.0795828104019165,grad_norm: 0.9999997430221377, iteration: 7238
loss: 1.0323740243911743,grad_norm: 0.999999565959674, iteration: 7239
loss: 1.0441060066223145,grad_norm: 0.9999994620493381, iteration: 7240
loss: 1.0422054529190063,grad_norm: 0.9999993609332111, iteration: 7241
loss: 1.1134599447250366,grad_norm: 0.999999502541906, iteration: 7242
loss: 1.04883873462677,grad_norm: 0.9999995556763789, iteration: 7243
loss: 1.0882912874221802,grad_norm: 0.9999997289537774, iteration: 7244
loss: 1.0548911094665527,grad_norm: 0.9999995004476654, iteration: 7245
loss: 1.046036958694458,grad_norm: 0.9999995663795145, iteration: 7246
loss: 1.0202733278274536,grad_norm: 0.999999357960739, iteration: 7247
loss: 1.0656977891921997,grad_norm: 0.9999994090979798, iteration: 7248
loss: 1.0643290281295776,grad_norm: 0.9999995476582806, iteration: 7249
loss: 1.1014412641525269,grad_norm: 0.9999994119543211, iteration: 7250
loss: 1.0601904392242432,grad_norm: 0.9999995509768109, iteration: 7251
loss: 1.057011604309082,grad_norm: 0.9999993622552668, iteration: 7252
loss: 1.0876483917236328,grad_norm: 0.9999996158234872, iteration: 7253
loss: 1.032066822052002,grad_norm: 0.9999993328012905, iteration: 7254
loss: 1.0101739168167114,grad_norm: 0.9999993168823124, iteration: 7255
loss: 1.045494556427002,grad_norm: 0.9999994755576572, iteration: 7256
loss: 1.078168511390686,grad_norm: 0.9999993355161771, iteration: 7257
loss: 1.1061689853668213,grad_norm: 0.9999997054958869, iteration: 7258
loss: 1.080341100692749,grad_norm: 0.99999961444995, iteration: 7259
loss: 1.0164340734481812,grad_norm: 0.9999995577049133, iteration: 7260
loss: 1.0140936374664307,grad_norm: 0.9999992638660224, iteration: 7261
loss: 1.0810211896896362,grad_norm: 0.9999995280247023, iteration: 7262
loss: 1.1549625396728516,grad_norm: 0.9999995757132013, iteration: 7263
loss: 1.0798094272613525,grad_norm: 0.9999996789235857, iteration: 7264
loss: 1.0035037994384766,grad_norm: 0.9999993014050604, iteration: 7265
loss: 1.0590864419937134,grad_norm: 0.9999994571771184, iteration: 7266
loss: 1.0730576515197754,grad_norm: 0.9999995351128452, iteration: 7267
loss: 1.1052600145339966,grad_norm: 0.9999997217361306, iteration: 7268
loss: 1.027277946472168,grad_norm: 0.9999995108952909, iteration: 7269
loss: 1.0320416688919067,grad_norm: 0.999999499100084, iteration: 7270
loss: 1.1281591653823853,grad_norm: 0.9999996243657308, iteration: 7271
loss: 1.0844827890396118,grad_norm: 0.9999994746282989, iteration: 7272
loss: 1.065197229385376,grad_norm: 0.9999997150979993, iteration: 7273
loss: 1.0454978942871094,grad_norm: 0.9999994981074076, iteration: 7274
loss: 1.0323047637939453,grad_norm: 0.9999993893387715, iteration: 7275
loss: 1.0870782136917114,grad_norm: 0.9999993368568177, iteration: 7276
loss: 1.0659576654434204,grad_norm: 0.9999994083950836, iteration: 7277
loss: 1.0696184635162354,grad_norm: 0.9999994328044934, iteration: 7278
loss: 1.1123441457748413,grad_norm: 0.9999996053156337, iteration: 7279
loss: 1.0478655099868774,grad_norm: 0.9999994197836619, iteration: 7280
loss: 1.0474023818969727,grad_norm: 0.9999995501839963, iteration: 7281
loss: 1.0521537065505981,grad_norm: 0.9999998371544503, iteration: 7282
loss: 1.1383568048477173,grad_norm: 0.9999996791917374, iteration: 7283
loss: 1.0550343990325928,grad_norm: 0.9999994046846618, iteration: 7284
loss: 1.0238699913024902,grad_norm: 0.9999994201153632, iteration: 7285
loss: 1.041999101638794,grad_norm: 0.999999622856178, iteration: 7286
loss: 1.0852887630462646,grad_norm: 0.999999532646487, iteration: 7287
loss: 1.0246684551239014,grad_norm: 0.9999993965495519, iteration: 7288
loss: 1.0547103881835938,grad_norm: 0.9999997829988786, iteration: 7289
loss: 1.075234055519104,grad_norm: 0.9999995493413701, iteration: 7290
loss: 1.0803765058517456,grad_norm: 0.9999996389418867, iteration: 7291
loss: 1.0157417058944702,grad_norm: 0.9999996147562868, iteration: 7292
loss: 1.0542715787887573,grad_norm: 0.999999603283422, iteration: 7293
loss: 1.0517019033432007,grad_norm: 0.9999995411738213, iteration: 7294
loss: 1.0246397256851196,grad_norm: 0.9999994774073067, iteration: 7295
loss: 1.1412286758422852,grad_norm: 0.9999996844367575, iteration: 7296
loss: 1.0887517929077148,grad_norm: 0.9999997181924629, iteration: 7297
loss: 1.0303012132644653,grad_norm: 0.9999994266943889, iteration: 7298
loss: 1.0707330703735352,grad_norm: 0.9999995890098121, iteration: 7299
loss: 1.0696916580200195,grad_norm: 0.9999995331994649, iteration: 7300
loss: 1.083665370941162,grad_norm: 0.9999995013198071, iteration: 7301
loss: 1.075303554534912,grad_norm: 0.9999994283979687, iteration: 7302
loss: 1.0133273601531982,grad_norm: 0.9999993272339586, iteration: 7303
loss: 1.127799391746521,grad_norm: 0.9999996470396818, iteration: 7304
loss: 1.0344690084457397,grad_norm: 0.9999994087665092, iteration: 7305
loss: 1.039976716041565,grad_norm: 0.9999995602351159, iteration: 7306
loss: 1.077778697013855,grad_norm: 0.999999461669189, iteration: 7307
loss: 1.09006929397583,grad_norm: 0.9999996919906227, iteration: 7308
loss: 1.1050223112106323,grad_norm: 0.9999995383253544, iteration: 7309
loss: 1.043099045753479,grad_norm: 0.9999996886925232, iteration: 7310
loss: 1.1413756608963013,grad_norm: 0.9999996577065549, iteration: 7311
loss: 1.0280835628509521,grad_norm: 0.9999992806602743, iteration: 7312
loss: 1.0588877201080322,grad_norm: 0.999999585599464, iteration: 7313
loss: 1.0147982835769653,grad_norm: 0.9999993973702812, iteration: 7314
loss: 1.0173170566558838,grad_norm: 0.9999993978800646, iteration: 7315
loss: 1.060478687286377,grad_norm: 0.9999994850037283, iteration: 7316
loss: 1.061275839805603,grad_norm: 0.999999516630741, iteration: 7317
loss: 1.0271884202957153,grad_norm: 0.9999993503984657, iteration: 7318
loss: 1.0663360357284546,grad_norm: 0.9999996454671, iteration: 7319
loss: 1.073177695274353,grad_norm: 0.9999995113581792, iteration: 7320
loss: 1.0980448722839355,grad_norm: 0.9999994437640355, iteration: 7321
loss: 1.0829098224639893,grad_norm: 0.9999994661918293, iteration: 7322
loss: 1.0291458368301392,grad_norm: 0.9999994573061844, iteration: 7323
loss: 1.0675994157791138,grad_norm: 0.9999998443898137, iteration: 7324
loss: 1.1245872974395752,grad_norm: 0.9999998122775761, iteration: 7325
loss: 1.1033546924591064,grad_norm: 0.999999813928532, iteration: 7326
loss: 1.057342767715454,grad_norm: 0.9999995207538086, iteration: 7327
loss: 1.067216396331787,grad_norm: 0.999999593528991, iteration: 7328
loss: 1.0917980670928955,grad_norm: 0.9999994313598014, iteration: 7329
loss: 1.0586748123168945,grad_norm: 0.9999994175136582, iteration: 7330
loss: 1.0646289587020874,grad_norm: 0.9999993542350192, iteration: 7331
loss: 1.0350197553634644,grad_norm: 0.9999995831967275, iteration: 7332
loss: 1.1032549142837524,grad_norm: 0.9999995880840199, iteration: 7333
loss: 1.1030542850494385,grad_norm: 0.9999995483026575, iteration: 7334
loss: 1.045015573501587,grad_norm: 0.9999995858661546, iteration: 7335
loss: 1.0922682285308838,grad_norm: 0.9999994784004128, iteration: 7336
loss: 1.0301743745803833,grad_norm: 0.9999996296308084, iteration: 7337
loss: 1.0370728969573975,grad_norm: 0.9999993465426168, iteration: 7338
loss: 1.0032252073287964,grad_norm: 0.999999501056722, iteration: 7339
loss: 1.0999382734298706,grad_norm: 0.999999631941478, iteration: 7340
loss: 1.0897592306137085,grad_norm: 0.9999993578916101, iteration: 7341
loss: 1.0309317111968994,grad_norm: 0.9999994085777232, iteration: 7342
loss: 1.0385125875473022,grad_norm: 0.9999994477142743, iteration: 7343
loss: 1.0201467275619507,grad_norm: 0.9999994553826741, iteration: 7344
loss: 1.0420897006988525,grad_norm: 0.9999996866588206, iteration: 7345
loss: 1.0849201679229736,grad_norm: 0.9999996829198748, iteration: 7346
loss: 1.0189597606658936,grad_norm: 0.9999996093408691, iteration: 7347
loss: 1.1085978746414185,grad_norm: 0.9999995956857461, iteration: 7348
loss: 1.1066291332244873,grad_norm: 0.9999994645646814, iteration: 7349
loss: 1.0517162084579468,grad_norm: 0.9999995230115255, iteration: 7350
loss: 1.1281765699386597,grad_norm: 0.9999996312320538, iteration: 7351
loss: 1.0358858108520508,grad_norm: 0.9999996202397154, iteration: 7352
loss: 1.1094348430633545,grad_norm: 0.9999995931609372, iteration: 7353
loss: 1.0865145921707153,grad_norm: 0.9999995000407608, iteration: 7354
loss: 1.0498241186141968,grad_norm: 0.9999995231213891, iteration: 7355
loss: 1.115852952003479,grad_norm: 0.9999996558098238, iteration: 7356
loss: 1.0534911155700684,grad_norm: 0.9999997037972692, iteration: 7357
loss: 1.0463536977767944,grad_norm: 0.9999994062047598, iteration: 7358
loss: 1.0343244075775146,grad_norm: 0.9999993854631829, iteration: 7359
loss: 1.0991681814193726,grad_norm: 0.9999994879963289, iteration: 7360
loss: 1.0864461660385132,grad_norm: 0.9999995694734756, iteration: 7361
loss: 1.0937684774398804,grad_norm: 0.9999996960407611, iteration: 7362
loss: 1.0019512176513672,grad_norm: 0.9999994701754017, iteration: 7363
loss: 1.1147685050964355,grad_norm: 0.9999994958961457, iteration: 7364
loss: 1.0889906883239746,grad_norm: 0.999999667031727, iteration: 7365
loss: 1.0739402770996094,grad_norm: 0.9999992671571083, iteration: 7366
loss: 1.0751646757125854,grad_norm: 0.9999997760396119, iteration: 7367
loss: 1.0498046875,grad_norm: 0.9999995444739204, iteration: 7368
loss: 1.0785497426986694,grad_norm: 0.9999994596299722, iteration: 7369
loss: 1.1113708019256592,grad_norm: 0.9999996343960977, iteration: 7370
loss: 1.051656723022461,grad_norm: 0.9999994473727488, iteration: 7371
loss: 1.061956524848938,grad_norm: 0.9999994776662157, iteration: 7372
loss: 1.0600610971450806,grad_norm: 0.999999383726324, iteration: 7373
loss: 1.0999324321746826,grad_norm: 0.999999627103949, iteration: 7374
loss: 1.0336114168167114,grad_norm: 0.9999995422337624, iteration: 7375
loss: 1.0658454895019531,grad_norm: 0.9999994423664131, iteration: 7376
loss: 1.0645902156829834,grad_norm: 0.9999994784224342, iteration: 7377
loss: 1.0548540353775024,grad_norm: 0.9999996419512869, iteration: 7378
loss: 1.0499324798583984,grad_norm: 0.9999994514883209, iteration: 7379
loss: 1.0320689678192139,grad_norm: 0.9999997692424941, iteration: 7380
loss: 1.123055338859558,grad_norm: 0.9999995563369509, iteration: 7381
loss: 1.0893288850784302,grad_norm: 0.9999995616878559, iteration: 7382
loss: 1.0321545600891113,grad_norm: 0.9999996117567466, iteration: 7383
loss: 1.0335625410079956,grad_norm: 0.9999995005404014, iteration: 7384
loss: 0.9998936653137207,grad_norm: 0.999999548116721, iteration: 7385
loss: 1.038124442100525,grad_norm: 0.9999994087179838, iteration: 7386
loss: 1.0437796115875244,grad_norm: 0.9999996413247251, iteration: 7387
loss: 1.090859055519104,grad_norm: 0.99999938249501, iteration: 7388
loss: 1.0683352947235107,grad_norm: 0.9999994254852291, iteration: 7389
loss: 1.0618224143981934,grad_norm: 0.9999995800833281, iteration: 7390
loss: 1.0617936849594116,grad_norm: 0.9999995096496959, iteration: 7391
loss: 1.1311687231063843,grad_norm: 0.9999997064168976, iteration: 7392
loss: 1.1281609535217285,grad_norm: 0.9999995502255995, iteration: 7393
loss: 1.047767996788025,grad_norm: 0.9999995330902943, iteration: 7394
loss: 1.0565800666809082,grad_norm: 0.9999994159899688, iteration: 7395
loss: 1.0806618928909302,grad_norm: 0.9999994963941928, iteration: 7396
loss: 1.0108176469802856,grad_norm: 0.999999344939404, iteration: 7397
loss: 1.0847554206848145,grad_norm: 0.9999994866725935, iteration: 7398
loss: 1.061942458152771,grad_norm: 0.9999997367498531, iteration: 7399
loss: 1.0803847312927246,grad_norm: 0.9999996992530803, iteration: 7400
loss: 1.1020900011062622,grad_norm: 0.9999997249714111, iteration: 7401
loss: 1.0644831657409668,grad_norm: 0.9999996382231535, iteration: 7402
loss: 1.1274547576904297,grad_norm: 0.9999997804194799, iteration: 7403
loss: 1.0807973146438599,grad_norm: 0.9999996822409541, iteration: 7404
loss: 1.0400649309158325,grad_norm: 0.9999995090013006, iteration: 7405
loss: 1.0521670579910278,grad_norm: 0.9999998235045926, iteration: 7406
loss: 1.0366545915603638,grad_norm: 0.9999993676595575, iteration: 7407
loss: 1.074798345565796,grad_norm: 0.9999996626164254, iteration: 7408
loss: 0.990263044834137,grad_norm: 0.9999993418251109, iteration: 7409
loss: 1.0905205011367798,grad_norm: 0.9999996942149055, iteration: 7410
loss: 1.059837818145752,grad_norm: 0.9999996679023037, iteration: 7411
loss: 1.0609958171844482,grad_norm: 0.9999998282108509, iteration: 7412
loss: 1.076728105545044,grad_norm: 0.9999995118618559, iteration: 7413
loss: 1.063957929611206,grad_norm: 0.9999997687769667, iteration: 7414
loss: 1.0547213554382324,grad_norm: 0.9999995012483207, iteration: 7415
loss: 1.0024610757827759,grad_norm: 0.9999996159498492, iteration: 7416
loss: 1.0414282083511353,grad_norm: 0.9999993512798698, iteration: 7417
loss: 1.1236547231674194,grad_norm: 0.9999996512473379, iteration: 7418
loss: 1.1163161993026733,grad_norm: 0.999999712329777, iteration: 7419
loss: 1.0696156024932861,grad_norm: 0.9999995121874276, iteration: 7420
loss: 1.0835602283477783,grad_norm: 0.9999994110764328, iteration: 7421
loss: 1.0082141160964966,grad_norm: 0.9999994000269317, iteration: 7422
loss: 1.0406726598739624,grad_norm: 0.9999994536408539, iteration: 7423
loss: 1.0796265602111816,grad_norm: 0.9999996737028283, iteration: 7424
loss: 1.0238845348358154,grad_norm: 0.9999998014710282, iteration: 7425
loss: 1.0571702718734741,grad_norm: 0.9999995524541915, iteration: 7426
loss: 1.0160826444625854,grad_norm: 0.9999995232266319, iteration: 7427
loss: 1.0674993991851807,grad_norm: 0.999999501734929, iteration: 7428
loss: 1.0431618690490723,grad_norm: 0.9999995013794765, iteration: 7429
loss: 1.0432127714157104,grad_norm: 0.9999995513438567, iteration: 7430
loss: 1.053019404411316,grad_norm: 0.9999996593860777, iteration: 7431
loss: 0.9994775652885437,grad_norm: 0.9999995039495432, iteration: 7432
loss: 1.0650861263275146,grad_norm: 0.9999995369040616, iteration: 7433
loss: 0.9967913627624512,grad_norm: 0.9999992730827145, iteration: 7434
loss: 1.0774725675582886,grad_norm: 0.9999995431261256, iteration: 7435
loss: 1.0494928359985352,grad_norm: 0.9999995182995345, iteration: 7436
loss: 1.0313129425048828,grad_norm: 0.9999995059378067, iteration: 7437
loss: 1.0193116664886475,grad_norm: 0.9999995403716846, iteration: 7438
loss: 1.0998872518539429,grad_norm: 0.9999997213988026, iteration: 7439
loss: 1.1017874479293823,grad_norm: 0.9999995510770514, iteration: 7440
loss: 1.045688271522522,grad_norm: 0.9999994140394121, iteration: 7441
loss: 1.1235944032669067,grad_norm: 0.9999995619038413, iteration: 7442
loss: 1.0808253288269043,grad_norm: 0.999999523016433, iteration: 7443
loss: 1.0659749507904053,grad_norm: 0.9999994815944707, iteration: 7444
loss: 1.1266223192214966,grad_norm: 0.9999996336008715, iteration: 7445
loss: 1.0494111776351929,grad_norm: 0.9999995513559048, iteration: 7446
loss: 1.0674000978469849,grad_norm: 0.9999996455033338, iteration: 7447
loss: 1.1055505275726318,grad_norm: 0.9999996759250341, iteration: 7448
loss: 1.039635419845581,grad_norm: 0.9999994515354563, iteration: 7449
loss: 1.0303517580032349,grad_norm: 0.9999997177551282, iteration: 7450
loss: 1.0381468534469604,grad_norm: 0.9999995159544249, iteration: 7451
loss: 1.0941773653030396,grad_norm: 0.9999997011607374, iteration: 7452
loss: 1.041681170463562,grad_norm: 0.9999994982399203, iteration: 7453
loss: 1.0768290758132935,grad_norm: 0.9999994825493057, iteration: 7454
loss: 0.9982139468193054,grad_norm: 0.9999995889159061, iteration: 7455
loss: 1.0881505012512207,grad_norm: 0.9999997241152724, iteration: 7456
loss: 1.0941925048828125,grad_norm: 0.9999994560179176, iteration: 7457
loss: 1.0509021282196045,grad_norm: 0.9999995546789712, iteration: 7458
loss: 1.1411490440368652,grad_norm: 0.9999996236315299, iteration: 7459
loss: 0.981856644153595,grad_norm: 0.9999995352500253, iteration: 7460
loss: 1.1020056009292603,grad_norm: 0.9999997592560117, iteration: 7461
loss: 0.9706823229789734,grad_norm: 0.9999993779564429, iteration: 7462
loss: 1.1312522888183594,grad_norm: 0.9999996686688013, iteration: 7463
loss: 1.0794068574905396,grad_norm: 0.9999994668927287, iteration: 7464
loss: 1.10902738571167,grad_norm: 0.9999994821273618, iteration: 7465
loss: 1.1171241998672485,grad_norm: 0.9999996466392918, iteration: 7466
loss: 1.0635508298873901,grad_norm: 0.9999994152155322, iteration: 7467
loss: 1.0736974477767944,grad_norm: 0.9999994915035808, iteration: 7468
loss: 1.0849692821502686,grad_norm: 0.9999994765542733, iteration: 7469
loss: 1.1233290433883667,grad_norm: 0.9999996052287666, iteration: 7470
loss: 1.0194872617721558,grad_norm: 0.9999994405354564, iteration: 7471
loss: 1.0650370121002197,grad_norm: 0.9999994410954732, iteration: 7472
loss: 1.039227843284607,grad_norm: 0.9999993783817112, iteration: 7473
loss: 1.0918883085250854,grad_norm: 0.999999590480979, iteration: 7474
loss: 1.0587801933288574,grad_norm: 0.9999995425926119, iteration: 7475
loss: 1.0012127161026,grad_norm: 0.999999403796101, iteration: 7476
loss: 1.0957000255584717,grad_norm: 0.9999995517072695, iteration: 7477
loss: 1.0922905206680298,grad_norm: 0.9999994414611761, iteration: 7478
loss: 1.030627727508545,grad_norm: 0.9999996716568132, iteration: 7479
loss: 1.016422152519226,grad_norm: 0.9999993970607307, iteration: 7480
loss: 1.0424230098724365,grad_norm: 0.9999994253796336, iteration: 7481
loss: 1.037643551826477,grad_norm: 0.9999993723447725, iteration: 7482
loss: 1.065373420715332,grad_norm: 0.9999995811597819, iteration: 7483
loss: 0.9943640232086182,grad_norm: 0.9999994448232119, iteration: 7484
loss: 1.0550012588500977,grad_norm: 0.9999995870502721, iteration: 7485
loss: 1.1247316598892212,grad_norm: 0.9999996298100418, iteration: 7486
loss: 1.0195591449737549,grad_norm: 0.9999995358310226, iteration: 7487
loss: 1.0774469375610352,grad_norm: 0.999999814143897, iteration: 7488
loss: 1.0840284824371338,grad_norm: 0.9999993739827949, iteration: 7489
loss: 1.0559031963348389,grad_norm: 0.9999995954790826, iteration: 7490
loss: 1.1048556566238403,grad_norm: 0.999999328194773, iteration: 7491
loss: 1.0171353816986084,grad_norm: 0.9999993902723694, iteration: 7492
loss: 1.0493303537368774,grad_norm: 0.9999994006559875, iteration: 7493
loss: 1.0441975593566895,grad_norm: 0.9999996232582428, iteration: 7494
loss: 1.1218194961547852,grad_norm: 0.9999997195631694, iteration: 7495
loss: 1.0194926261901855,grad_norm: 0.9999994858537987, iteration: 7496
loss: 1.040964961051941,grad_norm: 0.9999994394682976, iteration: 7497
loss: 1.0481082201004028,grad_norm: 0.9999993974220677, iteration: 7498
loss: 1.0582923889160156,grad_norm: 0.9999994689285576, iteration: 7499
loss: 1.0704476833343506,grad_norm: 0.9999994646877578, iteration: 7500
loss: 1.027582049369812,grad_norm: 0.99999928745186, iteration: 7501
loss: 1.0718226432800293,grad_norm: 0.9999994373754675, iteration: 7502
loss: 1.0039842128753662,grad_norm: 0.99999927345571, iteration: 7503
loss: 1.0476491451263428,grad_norm: 0.9999993117907487, iteration: 7504
loss: 1.0980373620986938,grad_norm: 0.9999997248286236, iteration: 7505
loss: 1.1064034700393677,grad_norm: 0.9999996083395716, iteration: 7506
loss: 1.0324174165725708,grad_norm: 0.9999995450174698, iteration: 7507
loss: 1.0142860412597656,grad_norm: 0.9999993956935117, iteration: 7508
loss: 1.0653927326202393,grad_norm: 0.9999994430781536, iteration: 7509
loss: 1.0853817462921143,grad_norm: 0.999999484904334, iteration: 7510
loss: 1.085214376449585,grad_norm: 0.9999995751007696, iteration: 7511
loss: 1.0747394561767578,grad_norm: 0.9999996669325762, iteration: 7512
loss: 1.0639067888259888,grad_norm: 0.9999993267450145, iteration: 7513
loss: 1.0789463520050049,grad_norm: 0.999999435065425, iteration: 7514
loss: 1.001952052116394,grad_norm: 0.9999994621574844, iteration: 7515
loss: 1.092679500579834,grad_norm: 0.9999994983194019, iteration: 7516
loss: 1.00634765625,grad_norm: 0.9999992622911973, iteration: 7517
loss: 1.0281133651733398,grad_norm: 0.9999993333660695, iteration: 7518
loss: 1.0593284368515015,grad_norm: 0.9999995599052657, iteration: 7519
loss: 1.0231801271438599,grad_norm: 0.9999993670656708, iteration: 7520
loss: 1.1240026950836182,grad_norm: 0.9999994675545986, iteration: 7521
loss: 1.0951615571975708,grad_norm: 0.999999460268497, iteration: 7522
loss: 1.0200945138931274,grad_norm: 0.999999361173477, iteration: 7523
loss: 1.0975278615951538,grad_norm: 0.9999994329911667, iteration: 7524
loss: 1.0484105348587036,grad_norm: 0.9999993902695739, iteration: 7525
loss: 1.058479905128479,grad_norm: 0.9999994146848233, iteration: 7526
loss: 1.045669436454773,grad_norm: 0.9999996148080844, iteration: 7527
loss: 1.0947144031524658,grad_norm: 0.9999997849011057, iteration: 7528
loss: 1.0519657135009766,grad_norm: 0.9999993595260442, iteration: 7529
loss: 1.0497232675552368,grad_norm: 0.9999993516294311, iteration: 7530
loss: 1.0742661952972412,grad_norm: 0.9999995611494549, iteration: 7531
loss: 1.0445977449417114,grad_norm: 0.9999993297752859, iteration: 7532
loss: 1.0598081350326538,grad_norm: 0.9999995565567301, iteration: 7533
loss: 1.0427409410476685,grad_norm: 0.9999994626615967, iteration: 7534
loss: 1.0953823328018188,grad_norm: 0.9999995748246366, iteration: 7535
loss: 1.0169188976287842,grad_norm: 0.9999993486763844, iteration: 7536
loss: 1.045919418334961,grad_norm: 0.9999994148004665, iteration: 7537
loss: 1.0955564975738525,grad_norm: 0.9999996098319817, iteration: 7538
loss: 1.0060540437698364,grad_norm: 0.9999994241548436, iteration: 7539
loss: 1.050059199333191,grad_norm: 0.9999994498441309, iteration: 7540
loss: 1.0510280132293701,grad_norm: 0.9999996517049317, iteration: 7541
loss: 1.053083062171936,grad_norm: 0.9999997858732481, iteration: 7542
loss: 1.1015580892562866,grad_norm: 0.9999997899472531, iteration: 7543
loss: 1.0104992389678955,grad_norm: 0.9999993959565376, iteration: 7544
loss: 1.041791558265686,grad_norm: 0.9999993966209053, iteration: 7545
loss: 1.0626314878463745,grad_norm: 0.999999354754144, iteration: 7546
loss: 1.1111267805099487,grad_norm: 0.9999997152737238, iteration: 7547
loss: 1.0747623443603516,grad_norm: 0.9999993915673226, iteration: 7548
loss: 1.0416178703308105,grad_norm: 0.9999994201759352, iteration: 7549
loss: 1.0176700353622437,grad_norm: 0.9999994293207226, iteration: 7550
loss: 1.0458184480667114,grad_norm: 0.9999996565552631, iteration: 7551
loss: 1.0642802715301514,grad_norm: 0.9999995208850647, iteration: 7552
loss: 1.1186201572418213,grad_norm: 0.9999995027543698, iteration: 7553
loss: 1.0439980030059814,grad_norm: 0.9999993205552395, iteration: 7554
loss: 1.0539634227752686,grad_norm: 0.9999994678597801, iteration: 7555
loss: 1.097753643989563,grad_norm: 0.9999996409525642, iteration: 7556
loss: 1.063167691230774,grad_norm: 0.9999996020780566, iteration: 7557
loss: 1.136359691619873,grad_norm: 0.9999997786290233, iteration: 7558
loss: 1.0759787559509277,grad_norm: 0.9999994585492358, iteration: 7559
loss: 1.027265191078186,grad_norm: 0.9999994533124785, iteration: 7560
loss: 1.042499303817749,grad_norm: 0.9999996154001767, iteration: 7561
loss: 1.0029003620147705,grad_norm: 0.9999991456470263, iteration: 7562
loss: 1.0927046537399292,grad_norm: 0.9999996399568676, iteration: 7563
loss: 1.070812702178955,grad_norm: 0.9999994907345773, iteration: 7564
loss: 1.06703782081604,grad_norm: 0.9999993784925915, iteration: 7565
loss: 1.0706697702407837,grad_norm: 0.999999278538265, iteration: 7566
loss: 1.07411527633667,grad_norm: 0.9999994552826319, iteration: 7567
loss: 1.0855841636657715,grad_norm: 0.9999995574768364, iteration: 7568
loss: 1.0852901935577393,grad_norm: 0.9999994278512409, iteration: 7569
loss: 1.0734901428222656,grad_norm: 0.9999995929855194, iteration: 7570
loss: 1.0171080827713013,grad_norm: 0.999999365251279, iteration: 7571
loss: 1.014403223991394,grad_norm: 0.999999473867476, iteration: 7572
loss: 1.0701179504394531,grad_norm: 0.9999995159345026, iteration: 7573
loss: 0.9793474078178406,grad_norm: 0.9999993224496609, iteration: 7574
loss: 1.0844565629959106,grad_norm: 0.9999992644600266, iteration: 7575
loss: 1.011451005935669,grad_norm: 0.999999657769262, iteration: 7576
loss: 0.9975167512893677,grad_norm: 0.9999993141984594, iteration: 7577
loss: 1.0124030113220215,grad_norm: 0.9999994886024905, iteration: 7578
loss: 1.0651665925979614,grad_norm: 0.9999995937626006, iteration: 7579
loss: 1.121238112449646,grad_norm: 0.9999995370832113, iteration: 7580
loss: 1.0777753591537476,grad_norm: 0.999999452316927, iteration: 7581
loss: 1.0122281312942505,grad_norm: 0.9999993078649385, iteration: 7582
loss: 1.0596176385879517,grad_norm: 0.9999994317111114, iteration: 7583
loss: 1.0739036798477173,grad_norm: 0.9999993910771376, iteration: 7584
loss: 1.027083396911621,grad_norm: 0.9999995158466298, iteration: 7585
loss: 1.0667977333068848,grad_norm: 0.9999994429767471, iteration: 7586
loss: 1.0834096670150757,grad_norm: 0.9999997061826308, iteration: 7587
loss: 1.0881611108779907,grad_norm: 0.9999996484522877, iteration: 7588
loss: 1.0296728610992432,grad_norm: 0.9999995442207897, iteration: 7589
loss: 1.095375418663025,grad_norm: 0.9999997538762275, iteration: 7590
loss: 1.0178114175796509,grad_norm: 0.9999994600853148, iteration: 7591
loss: 1.146394968032837,grad_norm: 0.9999997243651915, iteration: 7592
loss: 1.1193169355392456,grad_norm: 0.9999995870373308, iteration: 7593
loss: 1.0544039011001587,grad_norm: 0.9999995260324428, iteration: 7594
loss: 1.079041600227356,grad_norm: 0.9999995539265243, iteration: 7595
loss: 1.1196516752243042,grad_norm: 0.9999995592661464, iteration: 7596
loss: 0.9990448951721191,grad_norm: 0.9999993786995175, iteration: 7597
loss: 1.0712039470672607,grad_norm: 0.9999994754066409, iteration: 7598
loss: 1.1323591470718384,grad_norm: 0.9999995975716961, iteration: 7599
loss: 1.0923945903778076,grad_norm: 0.9999993289236957, iteration: 7600
loss: 1.0532530546188354,grad_norm: 0.9999996638405966, iteration: 7601
loss: 1.0500056743621826,grad_norm: 0.9999993546527812, iteration: 7602
loss: 1.0372523069381714,grad_norm: 0.9999993535911708, iteration: 7603
loss: 1.0461761951446533,grad_norm: 0.9999993889698313, iteration: 7604
loss: 1.0755282640457153,grad_norm: 0.9999996969834903, iteration: 7605
loss: 1.0798269510269165,grad_norm: 0.9999995346538835, iteration: 7606
loss: 1.0856479406356812,grad_norm: 0.9999996363036293, iteration: 7607
loss: 0.9557955861091614,grad_norm: 0.9999991579193402, iteration: 7608
loss: 1.0657362937927246,grad_norm: 0.9999995117214111, iteration: 7609
loss: 1.056980013847351,grad_norm: 0.999999445301137, iteration: 7610
loss: 1.0475971698760986,grad_norm: 0.9999993231325912, iteration: 7611
loss: 1.0287948846817017,grad_norm: 0.999999295571223, iteration: 7612
loss: 1.0715357065200806,grad_norm: 0.9999994894565156, iteration: 7613
loss: 1.0782701969146729,grad_norm: 0.9999997099358593, iteration: 7614
loss: 1.077232003211975,grad_norm: 0.9999993767509369, iteration: 7615
loss: 1.0559828281402588,grad_norm: 0.9999994829927851, iteration: 7616
loss: 1.140183448791504,grad_norm: 0.9999996322809467, iteration: 7617
loss: 1.0530123710632324,grad_norm: 0.9999993210560861, iteration: 7618
loss: 1.0280344486236572,grad_norm: 0.9999993560679299, iteration: 7619
loss: 1.058870792388916,grad_norm: 0.9999993399279737, iteration: 7620
loss: 1.082830548286438,grad_norm: 0.9999995735128562, iteration: 7621
loss: 1.067931890487671,grad_norm: 0.9999994286201073, iteration: 7622
loss: 1.0445313453674316,grad_norm: 0.9999994581603274, iteration: 7623
loss: 1.0310806035995483,grad_norm: 0.9999994469777158, iteration: 7624
loss: 1.0724998712539673,grad_norm: 0.9999993710910623, iteration: 7625
loss: 1.0228477716445923,grad_norm: 0.999999771399833, iteration: 7626
loss: 1.191646933555603,grad_norm: 0.9999998775600302, iteration: 7627
loss: 1.0472475290298462,grad_norm: 0.9999995800398893, iteration: 7628
loss: 0.9741237759590149,grad_norm: 0.9999993793887145, iteration: 7629
loss: 1.0153931379318237,grad_norm: 0.9999997134146277, iteration: 7630
loss: 1.0128445625305176,grad_norm: 0.9999994181689804, iteration: 7631
loss: 1.067758560180664,grad_norm: 0.9999994584123646, iteration: 7632
loss: 1.0873414278030396,grad_norm: 0.999999347879141, iteration: 7633
loss: 1.048376202583313,grad_norm: 0.9999996765448763, iteration: 7634
loss: 1.122086763381958,grad_norm: 0.9999994773596704, iteration: 7635
loss: 1.0560160875320435,grad_norm: 0.9999995450945348, iteration: 7636
loss: 1.0396714210510254,grad_norm: 0.9999993421873067, iteration: 7637
loss: 1.0787323713302612,grad_norm: 0.9999993858346077, iteration: 7638
loss: 1.0553187131881714,grad_norm: 0.9999994588273435, iteration: 7639
loss: 1.0941166877746582,grad_norm: 0.9999994460265019, iteration: 7640
loss: 1.0667022466659546,grad_norm: 0.9999996063236625, iteration: 7641
loss: 1.0754234790802002,grad_norm: 0.9999996307480685, iteration: 7642
loss: 1.1226288080215454,grad_norm: 0.9999997426080901, iteration: 7643
loss: 1.080074429512024,grad_norm: 0.9999996837789481, iteration: 7644
loss: 1.0626837015151978,grad_norm: 0.9999994386993051, iteration: 7645
loss: 1.0712977647781372,grad_norm: 0.9999993248054646, iteration: 7646
loss: 1.0048187971115112,grad_norm: 0.999999511969696, iteration: 7647
loss: 1.0555404424667358,grad_norm: 0.9999994703901233, iteration: 7648
loss: 1.1598795652389526,grad_norm: 0.9999997469203986, iteration: 7649
loss: 1.043416142463684,grad_norm: 0.9999994224678285, iteration: 7650
loss: 1.0947601795196533,grad_norm: 0.9999996771846611, iteration: 7651
loss: 1.009272813796997,grad_norm: 0.9999994509391534, iteration: 7652
loss: 1.001968264579773,grad_norm: 0.9999993622632923, iteration: 7653
loss: 1.0522058010101318,grad_norm: 0.9999994573364416, iteration: 7654
loss: 1.0494507551193237,grad_norm: 0.9999994367110586, iteration: 7655
loss: 1.0738976001739502,grad_norm: 0.9999993830228237, iteration: 7656
loss: 1.0776857137680054,grad_norm: 0.9999994035905289, iteration: 7657
loss: 1.0580542087554932,grad_norm: 0.9999993579621591, iteration: 7658
loss: 1.1074837446212769,grad_norm: 0.9999994936617722, iteration: 7659
loss: 1.0383955240249634,grad_norm: 0.9999995644193097, iteration: 7660
loss: 1.1136996746063232,grad_norm: 0.9999996904002595, iteration: 7661
loss: 1.0941553115844727,grad_norm: 0.9999994093502483, iteration: 7662
loss: 1.0876636505126953,grad_norm: 0.9999995708168394, iteration: 7663
loss: 1.0854142904281616,grad_norm: 0.9999994982886697, iteration: 7664
loss: 1.0052536725997925,grad_norm: 0.9999993674820434, iteration: 7665
loss: 1.0182595252990723,grad_norm: 0.999999510595647, iteration: 7666
loss: 1.029966950416565,grad_norm: 0.9999995362269187, iteration: 7667
loss: 1.0826746225357056,grad_norm: 0.9999995103734328, iteration: 7668
loss: 1.0871325731277466,grad_norm: 0.9999995152135044, iteration: 7669
loss: 1.02243173122406,grad_norm: 0.9999994665476707, iteration: 7670
loss: 1.0608530044555664,grad_norm: 0.9999993115293832, iteration: 7671
loss: 1.041046380996704,grad_norm: 0.9999995162656694, iteration: 7672
loss: 1.0805914402008057,grad_norm: 0.9999993187178468, iteration: 7673
loss: 1.0715818405151367,grad_norm: 0.9999994623084448, iteration: 7674
loss: 1.073204517364502,grad_norm: 0.9999995658720151, iteration: 7675
loss: 1.0995672941207886,grad_norm: 0.9999993796644719, iteration: 7676
loss: 1.0480090379714966,grad_norm: 0.9999993537737721, iteration: 7677
loss: 1.1031303405761719,grad_norm: 0.9999994893778176, iteration: 7678
loss: 1.0739232301712036,grad_norm: 0.999999437547879, iteration: 7679
loss: 1.1093705892562866,grad_norm: 0.9999996144199438, iteration: 7680
loss: 1.074037790298462,grad_norm: 0.9999996433569494, iteration: 7681
loss: 1.0314977169036865,grad_norm: 0.9999994770971926, iteration: 7682
loss: 1.0296885967254639,grad_norm: 0.9999992965723409, iteration: 7683
loss: 1.075892448425293,grad_norm: 0.9999995725998592, iteration: 7684
loss: 1.0175126791000366,grad_norm: 0.9999994002812552, iteration: 7685
loss: 1.0723110437393188,grad_norm: 0.9999998394685644, iteration: 7686
loss: 1.0325218439102173,grad_norm: 0.9999993895695092, iteration: 7687
loss: 1.0258285999298096,grad_norm: 0.9999995036670948, iteration: 7688
loss: 1.103103756904602,grad_norm: 0.9999995790342695, iteration: 7689
loss: 1.0424563884735107,grad_norm: 0.9999993531850836, iteration: 7690
loss: 1.0258245468139648,grad_norm: 0.9999995916937144, iteration: 7691
loss: 1.0460129976272583,grad_norm: 0.9999993330387187, iteration: 7692
loss: 1.0221965312957764,grad_norm: 0.9999993777235662, iteration: 7693
loss: 1.0524674654006958,grad_norm: 0.9999993915663782, iteration: 7694
loss: 1.0843356847763062,grad_norm: 0.9999996808985395, iteration: 7695
loss: 1.0610554218292236,grad_norm: 0.9999996009434985, iteration: 7696
loss: 1.0903252363204956,grad_norm: 0.999999672037505, iteration: 7697
loss: 1.0688860416412354,grad_norm: 0.9999996268830386, iteration: 7698
loss: 1.072151780128479,grad_norm: 0.9999994232206065, iteration: 7699
loss: 1.0048120021820068,grad_norm: 0.9999993953501314, iteration: 7700
loss: 1.0639642477035522,grad_norm: 0.9999995149938783, iteration: 7701
loss: 1.0772343873977661,grad_norm: 0.9999996353109141, iteration: 7702
loss: 1.0663758516311646,grad_norm: 0.9999996513731645, iteration: 7703
loss: 1.0624754428863525,grad_norm: 0.9999995382855278, iteration: 7704
loss: 1.068652868270874,grad_norm: 0.9999995123214794, iteration: 7705
loss: 1.049852967262268,grad_norm: 0.9999994408267405, iteration: 7706
loss: 1.094699740409851,grad_norm: 0.9999994770880853, iteration: 7707
loss: 1.0925620794296265,grad_norm: 0.9999995035192814, iteration: 7708
loss: 1.0454610586166382,grad_norm: 0.9999994787992983, iteration: 7709
loss: 1.0347704887390137,grad_norm: 0.9999994418140815, iteration: 7710
loss: 1.0454984903335571,grad_norm: 0.9999995812011185, iteration: 7711
loss: 1.0827670097351074,grad_norm: 0.99999952670713, iteration: 7712
loss: 1.0338757038116455,grad_norm: 0.9999993897378131, iteration: 7713
loss: 1.0814366340637207,grad_norm: 0.9999996778880306, iteration: 7714
loss: 1.0472732782363892,grad_norm: 0.9999994808838931, iteration: 7715
loss: 1.033422589302063,grad_norm: 0.9999994385202013, iteration: 7716
loss: 1.0684983730316162,grad_norm: 0.9999996898109957, iteration: 7717
loss: 1.022818922996521,grad_norm: 0.9999994961667626, iteration: 7718
loss: 1.0715575218200684,grad_norm: 0.9999996513096483, iteration: 7719
loss: 1.1210941076278687,grad_norm: 0.9999997408713122, iteration: 7720
loss: 1.0535458326339722,grad_norm: 0.9999995471707704, iteration: 7721
loss: 1.1193132400512695,grad_norm: 0.9999995462848609, iteration: 7722
loss: 1.0559229850769043,grad_norm: 0.9999995207353224, iteration: 7723
loss: 1.0668833255767822,grad_norm: 0.9999995490844779, iteration: 7724
loss: 1.0746879577636719,grad_norm: 0.999999168523638, iteration: 7725
loss: 1.03487229347229,grad_norm: 0.9999994197303567, iteration: 7726
loss: 1.055546760559082,grad_norm: 0.999999532847983, iteration: 7727
loss: 1.065120816230774,grad_norm: 0.9999995603240358, iteration: 7728
loss: 1.0774893760681152,grad_norm: 0.9999997133790535, iteration: 7729
loss: 1.0743002891540527,grad_norm: 0.9999996459010716, iteration: 7730
loss: 1.0591753721237183,grad_norm: 0.9999996657449474, iteration: 7731
loss: 1.0826902389526367,grad_norm: 0.9999995733078122, iteration: 7732
loss: 1.1438406705856323,grad_norm: 0.9999997768805653, iteration: 7733
loss: 1.0888978242874146,grad_norm: 0.9999995504021763, iteration: 7734
loss: 1.0606869459152222,grad_norm: 0.9999994215735066, iteration: 7735
loss: 1.0398260354995728,grad_norm: 0.9999993938306203, iteration: 7736
loss: 1.0373620986938477,grad_norm: 0.9999993185443781, iteration: 7737
loss: 1.109663963317871,grad_norm: 0.9999994948025324, iteration: 7738
loss: 1.0438274145126343,grad_norm: 0.9999994022463593, iteration: 7739
loss: 1.0487462282180786,grad_norm: 0.9999994822879774, iteration: 7740
loss: 1.0381771326065063,grad_norm: 0.9999995983708743, iteration: 7741
loss: 1.054049015045166,grad_norm: 0.9999995192623998, iteration: 7742
loss: 1.116114616394043,grad_norm: 0.999999596696155, iteration: 7743
loss: 1.0310349464416504,grad_norm: 0.9999994097142006, iteration: 7744
loss: 1.075802206993103,grad_norm: 0.9999994814821742, iteration: 7745
loss: 1.1121537685394287,grad_norm: 0.9999995996941026, iteration: 7746
loss: 1.058293342590332,grad_norm: 0.9999995112278518, iteration: 7747
loss: 1.0125839710235596,grad_norm: 0.9999995566419837, iteration: 7748
loss: 1.0894466638565063,grad_norm: 0.999999711851572, iteration: 7749
loss: 1.0943517684936523,grad_norm: 0.9999996818978987, iteration: 7750
loss: 1.0980528593063354,grad_norm: 0.9999994247891043, iteration: 7751
loss: 1.0919792652130127,grad_norm: 0.9999996853758595, iteration: 7752
loss: 1.1120444536209106,grad_norm: 0.999999754600623, iteration: 7753
loss: 1.0403748750686646,grad_norm: 0.9999995003062451, iteration: 7754
loss: 1.0697232484817505,grad_norm: 0.9999996616331219, iteration: 7755
loss: 1.1172634363174438,grad_norm: 0.9999994834347349, iteration: 7756
loss: 1.110036015510559,grad_norm: 0.9999996918943066, iteration: 7757
loss: 1.0887900590896606,grad_norm: 0.999999565630787, iteration: 7758
loss: 1.0421451330184937,grad_norm: 0.9999996563436535, iteration: 7759
loss: 1.0328363180160522,grad_norm: 0.9999993299736873, iteration: 7760
loss: 1.0390701293945312,grad_norm: 0.9999995761879511, iteration: 7761
loss: 1.0857590436935425,grad_norm: 0.9999995999404, iteration: 7762
loss: 1.0703309774398804,grad_norm: 0.9999996442172516, iteration: 7763
loss: 1.10270357131958,grad_norm: 0.9999999121444453, iteration: 7764
loss: 1.0489795207977295,grad_norm: 0.9999994020044458, iteration: 7765
loss: 1.0332046747207642,grad_norm: 0.9999994565700787, iteration: 7766
loss: 0.9948377013206482,grad_norm: 0.999999336339366, iteration: 7767
loss: 0.9880104064941406,grad_norm: 0.9999994862806604, iteration: 7768
loss: 1.0963634252548218,grad_norm: 0.9999997545007031, iteration: 7769
loss: 1.051127552986145,grad_norm: 0.9999995620647606, iteration: 7770
loss: 1.0652283430099487,grad_norm: 0.9999997869111378, iteration: 7771
loss: 1.0756956338882446,grad_norm: 0.9999995772278548, iteration: 7772
loss: 1.0932679176330566,grad_norm: 0.999999471569039, iteration: 7773
loss: 1.0669606924057007,grad_norm: 0.9999994484396174, iteration: 7774
loss: 1.0116775035858154,grad_norm: 0.9999995852088789, iteration: 7775
loss: 1.0306416749954224,grad_norm: 0.9999995831912002, iteration: 7776
loss: 1.0561789274215698,grad_norm: 0.9999994911078635, iteration: 7777
loss: 1.061169981956482,grad_norm: 0.9999995067247966, iteration: 7778
loss: 1.0195462703704834,grad_norm: 0.9999993579410592, iteration: 7779
loss: 1.057970643043518,grad_norm: 0.999999430670991, iteration: 7780
loss: 1.0315706729888916,grad_norm: 0.9999995170497401, iteration: 7781
loss: 1.0775723457336426,grad_norm: 0.9999994176822152, iteration: 7782
loss: 1.074653148651123,grad_norm: 0.9999995604440306, iteration: 7783
loss: 1.03108811378479,grad_norm: 0.9999995930876577, iteration: 7784
loss: 1.033124327659607,grad_norm: 0.9999995664124713, iteration: 7785
loss: 1.1392947435379028,grad_norm: 0.999999499339272, iteration: 7786
loss: 1.0379948616027832,grad_norm: 0.999999542743826, iteration: 7787
loss: 1.043758511543274,grad_norm: 0.9999993748736334, iteration: 7788
loss: 1.0929571390151978,grad_norm: 0.999999577010064, iteration: 7789
loss: 1.0783971548080444,grad_norm: 0.9999996253395053, iteration: 7790
loss: 1.0921454429626465,grad_norm: 0.9999996245512701, iteration: 7791
loss: 1.099266767501831,grad_norm: 0.9999995399262254, iteration: 7792
loss: 1.041516661643982,grad_norm: 0.9999995297949084, iteration: 7793
loss: 1.0291978120803833,grad_norm: 0.9999993434194842, iteration: 7794
loss: 0.9878619313240051,grad_norm: 0.9999996128595678, iteration: 7795
loss: 1.0219404697418213,grad_norm: 0.9999993940795182, iteration: 7796
loss: 1.0063821077346802,grad_norm: 0.9999994309094731, iteration: 7797
loss: 1.0620927810668945,grad_norm: 0.9999994712399429, iteration: 7798
loss: 1.0888582468032837,grad_norm: 0.9999994302767791, iteration: 7799
loss: 1.0165876150131226,grad_norm: 0.9999994529225125, iteration: 7800
loss: 1.0765511989593506,grad_norm: 0.9999994264811016, iteration: 7801
loss: 1.0344233512878418,grad_norm: 0.9999995479240132, iteration: 7802
loss: 1.00569748878479,grad_norm: 0.9999993548078121, iteration: 7803
loss: 0.9869057536125183,grad_norm: 0.999999322995101, iteration: 7804
loss: 1.0798190832138062,grad_norm: 0.9999994743644897, iteration: 7805
loss: 1.0353107452392578,grad_norm: 0.9999995743724428, iteration: 7806
loss: 1.0250335931777954,grad_norm: 0.9999993617184161, iteration: 7807
loss: 1.0759577751159668,grad_norm: 0.9999995431888141, iteration: 7808
loss: 1.0271259546279907,grad_norm: 0.9999995232683931, iteration: 7809
loss: 1.0161508321762085,grad_norm: 0.9999993231532904, iteration: 7810
loss: 1.085064172744751,grad_norm: 0.9999995397336084, iteration: 7811
loss: 0.9997084140777588,grad_norm: 0.9999994621894467, iteration: 7812
loss: 1.0773279666900635,grad_norm: 0.9999994612394445, iteration: 7813
loss: 1.0535424947738647,grad_norm: 0.9999992763987561, iteration: 7814
loss: 1.1286860704421997,grad_norm: 0.9999997864110625, iteration: 7815
loss: 1.0297000408172607,grad_norm: 0.9999994041064475, iteration: 7816
loss: 1.1175487041473389,grad_norm: 0.9999996904941845, iteration: 7817
loss: 1.0953630208969116,grad_norm: 0.9999996492949277, iteration: 7818
loss: 1.0439563989639282,grad_norm: 0.9999993439326439, iteration: 7819
loss: 1.0735952854156494,grad_norm: 0.999999578972301, iteration: 7820
loss: 1.0712378025054932,grad_norm: 0.9999996965431103, iteration: 7821
loss: 1.0582785606384277,grad_norm: 0.9999994491204298, iteration: 7822
loss: 1.0783997774124146,grad_norm: 0.9999994836809827, iteration: 7823
loss: 1.1579228639602661,grad_norm: 0.999999688441325, iteration: 7824
loss: 1.0098605155944824,grad_norm: 0.999999342616491, iteration: 7825
loss: 1.0477317571640015,grad_norm: 0.9999997029652714, iteration: 7826
loss: 1.08391273021698,grad_norm: 0.9999994618983539, iteration: 7827
loss: 1.0288739204406738,grad_norm: 0.9999996072647557, iteration: 7828
loss: 1.0238862037658691,grad_norm: 0.9999994843989718, iteration: 7829
loss: 1.0473259687423706,grad_norm: 0.9999992962399793, iteration: 7830
loss: 1.1271172761917114,grad_norm: 0.9999997680180025, iteration: 7831
loss: 1.0257490873336792,grad_norm: 0.9999994694404664, iteration: 7832
loss: 1.0099579095840454,grad_norm: 0.9999995882982513, iteration: 7833
loss: 1.006878137588501,grad_norm: 0.9999995541061926, iteration: 7834
loss: 1.0527317523956299,grad_norm: 0.9999994154326444, iteration: 7835
loss: 1.0860122442245483,grad_norm: 0.9999993535702534, iteration: 7836
loss: 1.0715276002883911,grad_norm: 0.9999993341154553, iteration: 7837
loss: 1.0503296852111816,grad_norm: 0.999999485409715, iteration: 7838
loss: 1.001706838607788,grad_norm: 0.9999994298327752, iteration: 7839
loss: 1.0095787048339844,grad_norm: 0.999999636457168, iteration: 7840
loss: 1.0959829092025757,grad_norm: 0.9999995155182618, iteration: 7841
loss: 1.080032467842102,grad_norm: 0.9999996889211894, iteration: 7842
loss: 1.0394641160964966,grad_norm: 0.9999995028974662, iteration: 7843
loss: 1.1822307109832764,grad_norm: 0.9999997989526505, iteration: 7844
loss: 1.0582555532455444,grad_norm: 0.9999996674864516, iteration: 7845
loss: 1.0684614181518555,grad_norm: 0.9999994744037229, iteration: 7846
loss: 1.0951799154281616,grad_norm: 0.9999997597203268, iteration: 7847
loss: 1.0800288915634155,grad_norm: 0.9999994814802505, iteration: 7848
loss: 1.023350477218628,grad_norm: 0.9999997706768735, iteration: 7849
loss: 1.0451680421829224,grad_norm: 0.999999372079615, iteration: 7850
loss: 0.9783942699432373,grad_norm: 0.9999994558686784, iteration: 7851
loss: 1.008820652961731,grad_norm: 0.9999993453217115, iteration: 7852
loss: 1.0737181901931763,grad_norm: 0.9999994140586093, iteration: 7853
loss: 1.096551775932312,grad_norm: 0.9999995260994208, iteration: 7854
loss: 1.0559874773025513,grad_norm: 0.9999994012383929, iteration: 7855
loss: 1.065481185913086,grad_norm: 0.9999993972773202, iteration: 7856
loss: 1.0493323802947998,grad_norm: 0.9999995888210342, iteration: 7857
loss: 1.075007677078247,grad_norm: 0.9999995112185878, iteration: 7858
loss: 1.0208958387374878,grad_norm: 0.9999994120558299, iteration: 7859
loss: 1.0613784790039062,grad_norm: 0.9999994565257683, iteration: 7860
loss: 1.0391063690185547,grad_norm: 0.9999994096086295, iteration: 7861
loss: 1.0782326459884644,grad_norm: 0.999999506511718, iteration: 7862
loss: 1.0855660438537598,grad_norm: 0.9999996122870464, iteration: 7863
loss: 1.0878514051437378,grad_norm: 0.9999992847691438, iteration: 7864
loss: 1.0415221452713013,grad_norm: 0.9999993582851388, iteration: 7865
loss: 1.0596516132354736,grad_norm: 0.9999994330502157, iteration: 7866
loss: 1.0283267498016357,grad_norm: 0.9999993624631804, iteration: 7867
loss: 1.0651243925094604,grad_norm: 0.9999993613599282, iteration: 7868
loss: 1.0371395349502563,grad_norm: 0.9999994477068123, iteration: 7869
loss: 1.0325002670288086,grad_norm: 0.9999993755234678, iteration: 7870
loss: 1.0826210975646973,grad_norm: 0.9999994374865997, iteration: 7871
loss: 1.074769139289856,grad_norm: 0.9999994375606539, iteration: 7872
loss: 1.0240871906280518,grad_norm: 0.9999993737273345, iteration: 7873
loss: 1.042541265487671,grad_norm: 0.9999995376369962, iteration: 7874
loss: 1.0836734771728516,grad_norm: 0.9999993347461196, iteration: 7875
loss: 1.0260419845581055,grad_norm: 0.9999997066291361, iteration: 7876
loss: 1.070139765739441,grad_norm: 0.9999994506753869, iteration: 7877
loss: 1.020424723625183,grad_norm: 0.9999994682618633, iteration: 7878
loss: 1.0309467315673828,grad_norm: 0.9999993190080873, iteration: 7879
loss: 1.0598814487457275,grad_norm: 0.9999993954769837, iteration: 7880
loss: 1.049460768699646,grad_norm: 0.9999993427924269, iteration: 7881
loss: 1.0489754676818848,grad_norm: 0.999999595889606, iteration: 7882
loss: 0.997931182384491,grad_norm: 0.999999419793634, iteration: 7883
loss: 1.0335439443588257,grad_norm: 0.9999997492132107, iteration: 7884
loss: 1.0747063159942627,grad_norm: 0.9999996385433572, iteration: 7885
loss: 1.038798213005066,grad_norm: 0.9999994662197439, iteration: 7886
loss: 1.1053431034088135,grad_norm: 0.9999994093322503, iteration: 7887
loss: 1.0702309608459473,grad_norm: 0.9999994574295066, iteration: 7888
loss: 1.0956560373306274,grad_norm: 0.99999959233476, iteration: 7889
loss: 1.0343977212905884,grad_norm: 0.9999993799488708, iteration: 7890
loss: 1.1456797122955322,grad_norm: 0.9999994779771924, iteration: 7891
loss: 1.0595195293426514,grad_norm: 0.9999995014998111, iteration: 7892
loss: 1.0655688047409058,grad_norm: 0.9999994922624132, iteration: 7893
loss: 1.073899507522583,grad_norm: 0.9999995096067875, iteration: 7894
loss: 1.0260769128799438,grad_norm: 0.9999994879441415, iteration: 7895
loss: 1.0727038383483887,grad_norm: 0.9999994889773344, iteration: 7896
loss: 1.0583122968673706,grad_norm: 0.999999573627974, iteration: 7897
loss: 1.074077844619751,grad_norm: 0.9999992599754004, iteration: 7898
loss: 1.0504132509231567,grad_norm: 0.9999996458505007, iteration: 7899
loss: 1.0747469663619995,grad_norm: 0.9999995417307421, iteration: 7900
loss: 1.0870928764343262,grad_norm: 0.9999994021089696, iteration: 7901
loss: 1.0736439228057861,grad_norm: 0.9999994072267109, iteration: 7902
loss: 1.0677852630615234,grad_norm: 0.999999392385721, iteration: 7903
loss: 1.038375735282898,grad_norm: 0.999999584177678, iteration: 7904
loss: 1.047372817993164,grad_norm: 0.999999572771421, iteration: 7905
loss: 1.0697028636932373,grad_norm: 0.9999996489664504, iteration: 7906
loss: 1.0432378053665161,grad_norm: 0.9999992982742226, iteration: 7907
loss: 1.0522466897964478,grad_norm: 0.9999996796302678, iteration: 7908
loss: 1.0374150276184082,grad_norm: 0.9999993695703422, iteration: 7909
loss: 1.0230791568756104,grad_norm: 0.9999995521615369, iteration: 7910
loss: 1.0873699188232422,grad_norm: 0.9999993430150576, iteration: 7911
loss: 1.084195852279663,grad_norm: 0.9999994620402913, iteration: 7912
loss: 1.1023592948913574,grad_norm: 0.9999996506009986, iteration: 7913
loss: 1.0741838216781616,grad_norm: 0.9999992251547181, iteration: 7914
loss: 1.087307095527649,grad_norm: 0.9999994454884992, iteration: 7915
loss: 1.0252432823181152,grad_norm: 0.9999994074189905, iteration: 7916
loss: 1.1249427795410156,grad_norm: 0.9999996653355671, iteration: 7917
loss: 1.0143799781799316,grad_norm: 0.9999995537332472, iteration: 7918
loss: 1.0685044527053833,grad_norm: 0.9999994838719457, iteration: 7919
loss: 1.04995596408844,grad_norm: 0.9999996815281499, iteration: 7920
loss: 1.0314323902130127,grad_norm: 0.9999995581296699, iteration: 7921
loss: 1.0099836587905884,grad_norm: 0.9999994654616764, iteration: 7922
loss: 1.0587949752807617,grad_norm: 0.9999995079866458, iteration: 7923
loss: 1.0431255102157593,grad_norm: 0.9999995104177423, iteration: 7924
loss: 1.0460270643234253,grad_norm: 0.9999992850955514, iteration: 7925
loss: 1.1239659786224365,grad_norm: 0.9999996313121834, iteration: 7926
loss: 1.0218948125839233,grad_norm: 0.9999993905658644, iteration: 7927
loss: 1.0484315156936646,grad_norm: 0.9999994523570104, iteration: 7928
loss: 1.0131583213806152,grad_norm: 0.9999992685910783, iteration: 7929
loss: 1.035778522491455,grad_norm: 0.9999994481303378, iteration: 7930
loss: 1.089843988418579,grad_norm: 0.9999995421844691, iteration: 7931
loss: 1.052067518234253,grad_norm: 0.9999996690482877, iteration: 7932
loss: 1.0742053985595703,grad_norm: 0.9999995358725162, iteration: 7933
loss: 1.0270005464553833,grad_norm: 0.9999994620059648, iteration: 7934
loss: 1.0620124340057373,grad_norm: 0.9999994043884569, iteration: 7935
loss: 1.0996695756912231,grad_norm: 0.9999997862082527, iteration: 7936
loss: 1.0623629093170166,grad_norm: 0.999999451980982, iteration: 7937
loss: 1.0618400573730469,grad_norm: 0.9999995620380534, iteration: 7938
loss: 1.0448592901229858,grad_norm: 0.9999997492698178, iteration: 7939
loss: 1.0599249601364136,grad_norm: 0.9999995495687918, iteration: 7940
loss: 0.9956753849983215,grad_norm: 0.9999992952752494, iteration: 7941
loss: 1.0077104568481445,grad_norm: 0.9999994497526717, iteration: 7942
loss: 0.9784992337226868,grad_norm: 0.999999181814773, iteration: 7943
loss: 1.0436631441116333,grad_norm: 0.999999428452119, iteration: 7944
loss: 1.0079257488250732,grad_norm: 0.9999992899597544, iteration: 7945
loss: 1.0948796272277832,grad_norm: 0.9999997160675427, iteration: 7946
loss: 1.026940107345581,grad_norm: 0.9999996336446072, iteration: 7947
loss: 1.070008635520935,grad_norm: 0.9999992808503232, iteration: 7948
loss: 1.0975898504257202,grad_norm: 0.9999995720791608, iteration: 7949
loss: 1.057287335395813,grad_norm: 0.9999995415598681, iteration: 7950
loss: 1.0573828220367432,grad_norm: 0.9999993688652877, iteration: 7951
loss: 1.0782426595687866,grad_norm: 0.9999993199890176, iteration: 7952
loss: 0.9943622946739197,grad_norm: 0.9999995216149178, iteration: 7953
loss: 1.1139708757400513,grad_norm: 0.9999997148283104, iteration: 7954
loss: 1.0467936992645264,grad_norm: 0.9999994380560137, iteration: 7955
loss: 1.0418355464935303,grad_norm: 0.9999993034584518, iteration: 7956
loss: 1.0427640676498413,grad_norm: 0.9999993856226679, iteration: 7957
loss: 1.0553596019744873,grad_norm: 0.9999997232948462, iteration: 7958
loss: 1.0757492780685425,grad_norm: 0.9999993815039704, iteration: 7959
loss: 1.0539990663528442,grad_norm: 0.9999993751858383, iteration: 7960
loss: 1.090134859085083,grad_norm: 0.9999996562967525, iteration: 7961
loss: 1.0314310789108276,grad_norm: 0.9999995688764712, iteration: 7962
loss: 1.0327402353286743,grad_norm: 0.9999994701874455, iteration: 7963
loss: 0.9832407832145691,grad_norm: 0.9999995580530171, iteration: 7964
loss: 1.0583347082138062,grad_norm: 0.9999994106280864, iteration: 7965
loss: 1.0545519590377808,grad_norm: 0.999999287234451, iteration: 7966
loss: 0.9700629711151123,grad_norm: 0.9999995889818426, iteration: 7967
loss: 1.0850759744644165,grad_norm: 0.999999523544794, iteration: 7968
loss: 1.0081284046173096,grad_norm: 0.9999996152674288, iteration: 7969
loss: 1.047764778137207,grad_norm: 0.9999994917926169, iteration: 7970
loss: 1.0211422443389893,grad_norm: 0.9999994427784502, iteration: 7971
loss: 1.0531389713287354,grad_norm: 0.9999994731493526, iteration: 7972
loss: 1.084396481513977,grad_norm: 0.9999994888858326, iteration: 7973
loss: 1.0331732034683228,grad_norm: 0.9999996090427938, iteration: 7974
loss: 1.1524543762207031,grad_norm: 0.9999997886637354, iteration: 7975
loss: 1.024660587310791,grad_norm: 0.9999995357176265, iteration: 7976
loss: 1.0189112424850464,grad_norm: 0.9999996134850118, iteration: 7977
loss: 1.053505778312683,grad_norm: 0.9999993846703864, iteration: 7978
loss: 1.015332818031311,grad_norm: 0.999999609663617, iteration: 7979
loss: 1.0797991752624512,grad_norm: 0.9999994393768774, iteration: 7980
loss: 1.038233995437622,grad_norm: 0.9999993985533201, iteration: 7981
loss: 1.068137764930725,grad_norm: 0.9999996761797632, iteration: 7982
loss: 1.0678315162658691,grad_norm: 0.9999993182914456, iteration: 7983
loss: 1.033057451248169,grad_norm: 0.9999993813371635, iteration: 7984
loss: 0.997643768787384,grad_norm: 0.9999992649385029, iteration: 7985
loss: 1.0923246145248413,grad_norm: 0.999999629080899, iteration: 7986
loss: 1.017544150352478,grad_norm: 0.9999993232866433, iteration: 7987
loss: 1.0241702795028687,grad_norm: 0.9999992630418766, iteration: 7988
loss: 1.087498426437378,grad_norm: 0.9999995960276564, iteration: 7989
loss: 1.1157596111297607,grad_norm: 0.9999995993748376, iteration: 7990
loss: 1.076011061668396,grad_norm: 0.9999995804706897, iteration: 7991
loss: 1.0767841339111328,grad_norm: 0.9999993288860698, iteration: 7992
loss: 1.0425739288330078,grad_norm: 0.9999993855509494, iteration: 7993
loss: 0.9867174029350281,grad_norm: 0.9999995125697363, iteration: 7994
loss: 1.0747038125991821,grad_norm: 0.9999997130235052, iteration: 7995
loss: 1.0474313497543335,grad_norm: 0.9999993982169358, iteration: 7996
loss: 1.0402435064315796,grad_norm: 0.9999993709607212, iteration: 7997
loss: 1.0781972408294678,grad_norm: 0.9999992333100115, iteration: 7998
loss: 1.0538963079452515,grad_norm: 0.9999993321026428, iteration: 7999
loss: 1.070772409439087,grad_norm: 0.9999993777055126, iteration: 8000
loss: 1.0821475982666016,grad_norm: 0.9999997025466224, iteration: 8001
loss: 0.992285430431366,grad_norm: 0.9999994840191777, iteration: 8002
loss: 1.0694966316223145,grad_norm: 0.9999993894133531, iteration: 8003
loss: 1.0250134468078613,grad_norm: 0.9999998116535433, iteration: 8004
loss: 1.0554291009902954,grad_norm: 0.9999994691375391, iteration: 8005
loss: 1.1245636940002441,grad_norm: 0.9999996573755526, iteration: 8006
loss: 1.0485905408859253,grad_norm: 0.9999994758555205, iteration: 8007
loss: 1.031267523765564,grad_norm: 0.9999994547435209, iteration: 8008
loss: 1.068589448928833,grad_norm: 0.9999996331444005, iteration: 8009
loss: 1.0435599088668823,grad_norm: 0.9999994505488286, iteration: 8010
loss: 1.0339981317520142,grad_norm: 0.9999993926591961, iteration: 8011
loss: 1.0901143550872803,grad_norm: 0.9999994465587948, iteration: 8012
loss: 1.1283658742904663,grad_norm: 0.9999996300384865, iteration: 8013
loss: 1.0712169408798218,grad_norm: 0.9999996469640806, iteration: 8014
loss: 1.0990854501724243,grad_norm: 0.9999998359518284, iteration: 8015
loss: 1.0553964376449585,grad_norm: 0.9999994977161664, iteration: 8016
loss: 1.0577627420425415,grad_norm: 0.9999998144454976, iteration: 8017
loss: 1.0789862871170044,grad_norm: 0.9999994453100547, iteration: 8018
loss: 1.0429128408432007,grad_norm: 0.9999996697283018, iteration: 8019
loss: 1.1443172693252563,grad_norm: 0.9999995237737639, iteration: 8020
loss: 1.0339120626449585,grad_norm: 0.999999370832052, iteration: 8021
loss: 0.9672823548316956,grad_norm: 0.99999945971413, iteration: 8022
loss: 1.0295685529708862,grad_norm: 0.9999996583309017, iteration: 8023
loss: 1.0260449647903442,grad_norm: 0.9999992919223828, iteration: 8024
loss: 1.0236327648162842,grad_norm: 0.999999191815265, iteration: 8025
loss: 1.0167922973632812,grad_norm: 0.9999992842868807, iteration: 8026
loss: 1.0528018474578857,grad_norm: 0.999999655779993, iteration: 8027
loss: 1.054491639137268,grad_norm: 0.9999996261002437, iteration: 8028
loss: 1.0719555616378784,grad_norm: 0.9999993192912326, iteration: 8029
loss: 1.1703710556030273,grad_norm: 0.9999997361578943, iteration: 8030
loss: 1.1672093868255615,grad_norm: 0.9999996297648942, iteration: 8031
loss: 1.120683193206787,grad_norm: 0.9999997702742854, iteration: 8032
loss: 1.1084036827087402,grad_norm: 0.9999996017518604, iteration: 8033
loss: 1.0880095958709717,grad_norm: 0.999999633326543, iteration: 8034
loss: 1.0965090990066528,grad_norm: 0.9999996049286095, iteration: 8035
loss: 1.0932730436325073,grad_norm: 0.9999997304522683, iteration: 8036
loss: 1.0331636667251587,grad_norm: 0.9999997463598059, iteration: 8037
loss: 1.0594146251678467,grad_norm: 0.99999959580646, iteration: 8038
loss: 1.1254749298095703,grad_norm: 0.9999998195728107, iteration: 8039
loss: 1.0254799127578735,grad_norm: 0.9999995340618449, iteration: 8040
loss: 1.053224802017212,grad_norm: 0.9999992794190882, iteration: 8041
loss: 1.077614665031433,grad_norm: 0.9999993560157107, iteration: 8042
loss: 1.031235694885254,grad_norm: 0.9999997654483365, iteration: 8043
loss: 1.0779362916946411,grad_norm: 0.9999995460931159, iteration: 8044
loss: 1.1018959283828735,grad_norm: 0.9999997740586292, iteration: 8045
loss: 1.0657334327697754,grad_norm: 0.9999996186942112, iteration: 8046
loss: 1.0560740232467651,grad_norm: 0.999999540109206, iteration: 8047
loss: 1.0666851997375488,grad_norm: 0.9999996885188701, iteration: 8048
loss: 1.0982948541641235,grad_norm: 0.9999994219957726, iteration: 8049
loss: 1.1553195714950562,grad_norm: 0.9999997173956161, iteration: 8050
loss: 1.0641380548477173,grad_norm: 0.9999994838092248, iteration: 8051
loss: 1.1269232034683228,grad_norm: 0.9999997391141944, iteration: 8052
loss: 1.039847493171692,grad_norm: 0.9999995868813903, iteration: 8053
loss: 1.0447660684585571,grad_norm: 0.9999993903043761, iteration: 8054
loss: 1.0367616415023804,grad_norm: 0.9999994847556648, iteration: 8055
loss: 1.1024699211120605,grad_norm: 0.9999994562231723, iteration: 8056
loss: 1.0730541944503784,grad_norm: 0.9999997200356786, iteration: 8057
loss: 1.01670241355896,grad_norm: 0.9999993851941579, iteration: 8058
loss: 1.059953212738037,grad_norm: 0.9999997227525116, iteration: 8059
loss: 1.0497353076934814,grad_norm: 0.9999995976278047, iteration: 8060
loss: 1.0658634901046753,grad_norm: 0.9999994788127501, iteration: 8061
loss: 1.0395444631576538,grad_norm: 0.9999994659138759, iteration: 8062
loss: 1.1008954048156738,grad_norm: 0.9999996542283113, iteration: 8063
loss: 0.990839958190918,grad_norm: 0.999999232980586, iteration: 8064
loss: 1.086409330368042,grad_norm: 0.9999996241686369, iteration: 8065
loss: 1.088262677192688,grad_norm: 0.9999994543558299, iteration: 8066
loss: 1.0291736125946045,grad_norm: 0.9999994582192419, iteration: 8067
loss: 1.0636756420135498,grad_norm: 0.999999615646556, iteration: 8068
loss: 1.0554096698760986,grad_norm: 0.9999994628105784, iteration: 8069
loss: 1.0512021780014038,grad_norm: 0.9999996845839144, iteration: 8070
loss: 0.9943793416023254,grad_norm: 0.9999992987428706, iteration: 8071
loss: 1.1082700490951538,grad_norm: 0.9999997300052854, iteration: 8072
loss: 1.0246766805648804,grad_norm: 0.9999993704482786, iteration: 8073
loss: 1.0252741575241089,grad_norm: 0.9999993918943123, iteration: 8074
loss: 1.053856372833252,grad_norm: 0.9999992980920178, iteration: 8075
loss: 1.0821667909622192,grad_norm: 0.9999998087482151, iteration: 8076
loss: 1.064340353012085,grad_norm: 0.9999994991806186, iteration: 8077
loss: 1.08599054813385,grad_norm: 0.9999993646608861, iteration: 8078
loss: 1.0375100374221802,grad_norm: 0.9999997702193671, iteration: 8079
loss: 1.0237001180648804,grad_norm: 0.9999994418256364, iteration: 8080
loss: 1.0597882270812988,grad_norm: 0.9999998391609344, iteration: 8081
loss: 1.0657763481140137,grad_norm: 0.9999996250980653, iteration: 8082
loss: 1.0383085012435913,grad_norm: 0.9999993004868458, iteration: 8083
loss: 1.047839879989624,grad_norm: 0.9999992763361243, iteration: 8084
loss: 1.08396577835083,grad_norm: 0.9999995181977992, iteration: 8085
loss: 1.0545023679733276,grad_norm: 0.9999994622178672, iteration: 8086
loss: 1.0758329629898071,grad_norm: 0.9999998406335513, iteration: 8087
loss: 1.1291747093200684,grad_norm: 0.9999994490551194, iteration: 8088
loss: 1.1341222524642944,grad_norm: 0.9999995522165421, iteration: 8089
loss: 1.102773904800415,grad_norm: 0.9999997711371538, iteration: 8090
loss: 1.0371756553649902,grad_norm: 0.999999677119934, iteration: 8091
loss: 1.0711115598678589,grad_norm: 0.9999995034161203, iteration: 8092
loss: 1.0665102005004883,grad_norm: 0.9999993117498132, iteration: 8093
loss: 1.0781755447387695,grad_norm: 0.9999994072692642, iteration: 8094
loss: 1.0478276014328003,grad_norm: 0.9999995753205105, iteration: 8095
loss: 1.0649292469024658,grad_norm: 0.9999995065814885, iteration: 8096
loss: 1.0320745706558228,grad_norm: 0.9999993746290298, iteration: 8097
loss: 1.0192320346832275,grad_norm: 0.9999996743809569, iteration: 8098
loss: 1.0417218208312988,grad_norm: 0.9999995526370827, iteration: 8099
loss: 1.043595790863037,grad_norm: 0.9999994899065163, iteration: 8100
loss: 1.0647722482681274,grad_norm: 0.999999496531197, iteration: 8101
loss: 1.0558130741119385,grad_norm: 0.9999993600335554, iteration: 8102
loss: 1.0546667575836182,grad_norm: 0.999999438152782, iteration: 8103
loss: 1.0066008567810059,grad_norm: 0.9999993795276365, iteration: 8104
loss: 1.048128366470337,grad_norm: 0.9999994501223786, iteration: 8105
loss: 0.9675485491752625,grad_norm: 0.9999994166656144, iteration: 8106
loss: 1.1052075624465942,grad_norm: 0.9999996998655638, iteration: 8107
loss: 1.0948578119277954,grad_norm: 0.9999997967349847, iteration: 8108
loss: 1.043919324874878,grad_norm: 0.9999993479093645, iteration: 8109
loss: 1.050487995147705,grad_norm: 0.9999995237088937, iteration: 8110
loss: 1.0638506412506104,grad_norm: 0.9999995548442355, iteration: 8111
loss: 1.003085970878601,grad_norm: 0.999999206337907, iteration: 8112
loss: 1.083300232887268,grad_norm: 0.9999997452193775, iteration: 8113
loss: 1.1009360551834106,grad_norm: 0.9999994688192266, iteration: 8114
loss: 1.0233237743377686,grad_norm: 0.9999995295308783, iteration: 8115
loss: 1.086588740348816,grad_norm: 0.999999451605647, iteration: 8116
loss: 1.0714880228042603,grad_norm: 0.9999995565725958, iteration: 8117
loss: 1.0265740156173706,grad_norm: 0.9999994162714159, iteration: 8118
loss: 1.070921540260315,grad_norm: 0.9999996383823102, iteration: 8119
loss: 1.001745581626892,grad_norm: 0.9999996417848144, iteration: 8120
loss: 1.0071460008621216,grad_norm: 0.9999993100619022, iteration: 8121
loss: 1.0478888750076294,grad_norm: 0.999999302230191, iteration: 8122
loss: 1.082520604133606,grad_norm: 0.9999991754294456, iteration: 8123
loss: 1.070109486579895,grad_norm: 0.9999995776596086, iteration: 8124
loss: 1.0623780488967896,grad_norm: 0.9999993710130549, iteration: 8125
loss: 0.991313099861145,grad_norm: 0.9999991933661212, iteration: 8126
loss: 1.0591365098953247,grad_norm: 0.9999993656165178, iteration: 8127
loss: 1.0372411012649536,grad_norm: 0.9999993720812833, iteration: 8128
loss: 1.066640853881836,grad_norm: 0.9999995153851682, iteration: 8129
loss: 1.0534149408340454,grad_norm: 0.9999993061401579, iteration: 8130
loss: 1.0847991704940796,grad_norm: 0.9999994773134496, iteration: 8131
loss: 1.0465213060379028,grad_norm: 0.9999993984820298, iteration: 8132
loss: 1.0380253791809082,grad_norm: 0.9999993113730296, iteration: 8133
loss: 1.0806713104248047,grad_norm: 0.9999997229066515, iteration: 8134
loss: 1.058968186378479,grad_norm: 0.999999450389543, iteration: 8135
loss: 1.094777226448059,grad_norm: 0.9999992864249357, iteration: 8136
loss: 1.0998579263687134,grad_norm: 0.9999994260644098, iteration: 8137
loss: 1.086557388305664,grad_norm: 0.9999996200953937, iteration: 8138
loss: 1.1028690338134766,grad_norm: 0.9999996446628684, iteration: 8139
loss: 1.0454729795455933,grad_norm: 0.9999993287095472, iteration: 8140
loss: 1.1466182470321655,grad_norm: 0.9999997080507208, iteration: 8141
loss: 1.076416254043579,grad_norm: 0.9999993956448657, iteration: 8142
loss: 1.0706572532653809,grad_norm: 0.9999994291435585, iteration: 8143
loss: 0.9745407104492188,grad_norm: 0.9999994073227514, iteration: 8144
loss: 0.9890440106391907,grad_norm: 0.9999993068710792, iteration: 8145
loss: 1.0496159791946411,grad_norm: 0.9999997052674581, iteration: 8146
loss: 1.070562720298767,grad_norm: 0.9999995391922917, iteration: 8147
loss: 1.104276418685913,grad_norm: 0.9999995687545161, iteration: 8148
loss: 1.0920343399047852,grad_norm: 0.9999996278471037, iteration: 8149
loss: 1.0847911834716797,grad_norm: 0.9999993104875511, iteration: 8150
loss: 1.0621825456619263,grad_norm: 0.9999996224930714, iteration: 8151
loss: 1.0131261348724365,grad_norm: 0.9999992654121687, iteration: 8152
loss: 1.0276342630386353,grad_norm: 0.9999993619149076, iteration: 8153
loss: 1.0381423234939575,grad_norm: 0.9999993788689907, iteration: 8154
loss: 1.0329340696334839,grad_norm: 0.9999993432613415, iteration: 8155
loss: 1.0828125476837158,grad_norm: 0.9999993809719887, iteration: 8156
loss: 1.0609763860702515,grad_norm: 0.9999994430367761, iteration: 8157
loss: 0.9811447858810425,grad_norm: 0.9999993724931621, iteration: 8158
loss: 1.0480427742004395,grad_norm: 0.9999993987874986, iteration: 8159
loss: 1.0906614065170288,grad_norm: 0.9999992505654186, iteration: 8160
loss: 1.006847620010376,grad_norm: 0.9999992700879271, iteration: 8161
loss: 1.0884612798690796,grad_norm: 0.9999994344350536, iteration: 8162
loss: 1.085713505744934,grad_norm: 0.9999994059812798, iteration: 8163
loss: 1.04818594455719,grad_norm: 0.9999991836600203, iteration: 8164
loss: 1.0130447149276733,grad_norm: 0.9999995591523498, iteration: 8165
loss: 1.1426715850830078,grad_norm: 0.9999997047436343, iteration: 8166
loss: 1.0567001104354858,grad_norm: 0.9999994554113061, iteration: 8167
loss: 1.012768268585205,grad_norm: 0.9999993288172344, iteration: 8168
loss: 1.0433214902877808,grad_norm: 0.9999993693149597, iteration: 8169
loss: 1.0453165769577026,grad_norm: 0.9999992238727472, iteration: 8170
loss: 1.087380051612854,grad_norm: 0.9999992979067156, iteration: 8171
loss: 1.0666894912719727,grad_norm: 0.9999994083092217, iteration: 8172
loss: 1.0466643571853638,grad_norm: 0.9999997998922251, iteration: 8173
loss: 1.116706132888794,grad_norm: 0.9999994306707446, iteration: 8174
loss: 1.0367194414138794,grad_norm: 0.9999995500425544, iteration: 8175
loss: 1.0995334386825562,grad_norm: 0.9999995499360229, iteration: 8176
loss: 1.036798119544983,grad_norm: 0.9999994912102202, iteration: 8177
loss: 1.0286697149276733,grad_norm: 0.9999994155891981, iteration: 8178
loss: 1.0922054052352905,grad_norm: 0.9999994305113986, iteration: 8179
loss: 1.074611783027649,grad_norm: 0.9999993490446643, iteration: 8180
loss: 1.0510050058364868,grad_norm: 0.999999560478607, iteration: 8181
loss: 1.0430248975753784,grad_norm: 0.9999993683815482, iteration: 8182
loss: 1.0642194747924805,grad_norm: 0.9999994224371067, iteration: 8183
loss: 1.0341945886611938,grad_norm: 0.999999670255505, iteration: 8184
loss: 1.065018892288208,grad_norm: 0.9999992681598832, iteration: 8185
loss: 1.0066876411437988,grad_norm: 0.9999994092769466, iteration: 8186
loss: 1.0398266315460205,grad_norm: 0.9999994087264065, iteration: 8187
loss: 1.0550271272659302,grad_norm: 0.9999994710884145, iteration: 8188
loss: 1.0642378330230713,grad_norm: 0.9999994834734477, iteration: 8189
loss: 1.0654997825622559,grad_norm: 0.9999993023260917, iteration: 8190
loss: 1.0794122219085693,grad_norm: 0.9999993626640583, iteration: 8191
loss: 1.0730409622192383,grad_norm: 0.9999994591079977, iteration: 8192
loss: 1.0320439338684082,grad_norm: 0.9999995188386439, iteration: 8193
loss: 1.0901696681976318,grad_norm: 0.9999995502749521, iteration: 8194
loss: 1.049048662185669,grad_norm: 0.9999993009981126, iteration: 8195
loss: 1.0300854444503784,grad_norm: 0.9999994164557053, iteration: 8196
loss: 1.037866473197937,grad_norm: 0.9999993905133817, iteration: 8197
loss: 1.069892168045044,grad_norm: 0.9999992318404134, iteration: 8198
loss: 1.064736008644104,grad_norm: 0.999999520761612, iteration: 8199
loss: 1.0831363201141357,grad_norm: 0.9999994258819226, iteration: 8200
loss: 1.0983498096466064,grad_norm: 0.9999994684775142, iteration: 8201
loss: 1.0480644702911377,grad_norm: 0.9999992415487237, iteration: 8202
loss: 1.0747125148773193,grad_norm: 0.9999993443023543, iteration: 8203
loss: 1.0416367053985596,grad_norm: 0.9999995475314948, iteration: 8204
loss: 1.0843074321746826,grad_norm: 0.9999992784406188, iteration: 8205
loss: 1.0434437990188599,grad_norm: 0.9999994231416591, iteration: 8206
loss: 1.0264960527420044,grad_norm: 0.9999993199113382, iteration: 8207
loss: 1.0121066570281982,grad_norm: 0.9999993573232878, iteration: 8208
loss: 1.0525656938552856,grad_norm: 0.9999993380756182, iteration: 8209
loss: 1.1191731691360474,grad_norm: 0.9999996034871463, iteration: 8210
loss: 1.0040996074676514,grad_norm: 0.999999415989507, iteration: 8211
loss: 1.0624743700027466,grad_norm: 0.9999993776947729, iteration: 8212
loss: 1.0893549919128418,grad_norm: 0.9999995821687939, iteration: 8213
loss: 1.0988142490386963,grad_norm: 0.9999995289712406, iteration: 8214
loss: 1.0323864221572876,grad_norm: 0.9999994712999415, iteration: 8215
loss: 1.0077118873596191,grad_norm: 0.9999994965466806, iteration: 8216
loss: 1.077004313468933,grad_norm: 0.9999995115355701, iteration: 8217
loss: 1.0031710863113403,grad_norm: 0.9999992944354844, iteration: 8218
loss: 1.061968445777893,grad_norm: 0.9999994386014998, iteration: 8219
loss: 1.108635425567627,grad_norm: 0.9999994463650003, iteration: 8220
loss: 1.0669143199920654,grad_norm: 0.9999994675369654, iteration: 8221
loss: 1.0436241626739502,grad_norm: 0.9999993744660719, iteration: 8222
loss: 1.1004880666732788,grad_norm: 0.9999995228471205, iteration: 8223
loss: 1.0020774602890015,grad_norm: 0.9999994841802338, iteration: 8224
loss: 1.0921576023101807,grad_norm: 0.9999996537978298, iteration: 8225
loss: 1.047586441040039,grad_norm: 0.9999994574965221, iteration: 8226
loss: 1.1537771224975586,grad_norm: 0.9999997775921629, iteration: 8227
loss: 1.013201355934143,grad_norm: 0.9999993368836553, iteration: 8228
loss: 1.0505608320236206,grad_norm: 0.9999994025291882, iteration: 8229
loss: 1.0013729333877563,grad_norm: 0.9999996315322761, iteration: 8230
loss: 1.0581368207931519,grad_norm: 0.9999993962906408, iteration: 8231
loss: 1.0451961755752563,grad_norm: 0.9999994526325219, iteration: 8232
loss: 0.9442534446716309,grad_norm: 0.9999994959505262, iteration: 8233
loss: 1.0600281953811646,grad_norm: 0.9999995129134486, iteration: 8234
loss: 1.0973520278930664,grad_norm: 0.999999591401009, iteration: 8235
loss: 1.068877100944519,grad_norm: 0.9999996651066044, iteration: 8236
loss: 1.0473464727401733,grad_norm: 0.9999993617921484, iteration: 8237
loss: 1.0120424032211304,grad_norm: 0.9999995885625313, iteration: 8238
loss: 1.0297255516052246,grad_norm: 0.9999993230241465, iteration: 8239
loss: 1.0977097749710083,grad_norm: 0.9999994617619411, iteration: 8240
loss: 1.0777157545089722,grad_norm: 0.9999995297967839, iteration: 8241
loss: 1.0324960947036743,grad_norm: 0.9999994308041721, iteration: 8242
loss: 1.0069905519485474,grad_norm: 0.9999995682389431, iteration: 8243
loss: 1.063339114189148,grad_norm: 0.9999993658295634, iteration: 8244
loss: 1.1040973663330078,grad_norm: 0.9999996479813563, iteration: 8245
loss: 1.0688209533691406,grad_norm: 0.9999995089981516, iteration: 8246
loss: 1.0271447896957397,grad_norm: 0.9999995894003254, iteration: 8247
loss: 1.0183223485946655,grad_norm: 0.9999993636108939, iteration: 8248
loss: 1.0802665948867798,grad_norm: 0.9999993108690964, iteration: 8249
loss: 1.0594115257263184,grad_norm: 0.9999992963415818, iteration: 8250
loss: 1.0891516208648682,grad_norm: 0.9999994064212395, iteration: 8251
loss: 1.0272693634033203,grad_norm: 0.9999994975357277, iteration: 8252
loss: 1.0266504287719727,grad_norm: 0.999999656716676, iteration: 8253
loss: 1.116509199142456,grad_norm: 0.9999996003184253, iteration: 8254
loss: 1.0023072957992554,grad_norm: 0.9999992676512968, iteration: 8255
loss: 0.9863142371177673,grad_norm: 0.9999994040075372, iteration: 8256
loss: 1.0529553890228271,grad_norm: 0.999999524365356, iteration: 8257
loss: 1.016190767288208,grad_norm: 0.9999994316319497, iteration: 8258
loss: 1.0964996814727783,grad_norm: 0.999999911528736, iteration: 8259
loss: 1.0616856813430786,grad_norm: 0.9999992807005625, iteration: 8260
loss: 1.0812046527862549,grad_norm: 0.9999993683350067, iteration: 8261
loss: 1.0074764490127563,grad_norm: 0.9999997520225432, iteration: 8262
loss: 1.108193039894104,grad_norm: 0.9999994888692678, iteration: 8263
loss: 1.0798885822296143,grad_norm: 0.9999993913182185, iteration: 8264
loss: 1.0865856409072876,grad_norm: 0.9999993644389057, iteration: 8265
loss: 1.1398842334747314,grad_norm: 0.9999996033506465, iteration: 8266
loss: 1.0524609088897705,grad_norm: 0.9999996495657091, iteration: 8267
loss: 1.0367045402526855,grad_norm: 0.9999993489293538, iteration: 8268
loss: 1.0399466753005981,grad_norm: 0.9999994333630937, iteration: 8269
loss: 1.0540072917938232,grad_norm: 0.9999993039857822, iteration: 8270
loss: 1.0828561782836914,grad_norm: 0.999999477149198, iteration: 8271
loss: 1.0136833190917969,grad_norm: 0.9999994707349126, iteration: 8272
loss: 1.0683625936508179,grad_norm: 0.9999994026123831, iteration: 8273
loss: 1.054707646369934,grad_norm: 0.9999993248669561, iteration: 8274
loss: 1.0311888456344604,grad_norm: 0.9999994502513369, iteration: 8275
loss: 1.0502554178237915,grad_norm: 0.9999993973077441, iteration: 8276
loss: 1.0759587287902832,grad_norm: 0.9999994436548062, iteration: 8277
loss: 1.0267884731292725,grad_norm: 0.9999992815041732, iteration: 8278
loss: 1.019983172416687,grad_norm: 0.9999994834407867, iteration: 8279
loss: 1.1577777862548828,grad_norm: 0.9999997095972283, iteration: 8280
loss: 1.101327657699585,grad_norm: 0.9999995850719607, iteration: 8281
loss: 1.018639087677002,grad_norm: 0.9999994521349065, iteration: 8282
loss: 1.0240254402160645,grad_norm: 0.9999994340899682, iteration: 8283
loss: 1.0453588962554932,grad_norm: 0.9999993164143237, iteration: 8284
loss: 0.9903939962387085,grad_norm: 0.9999992662040288, iteration: 8285
loss: 1.094972014427185,grad_norm: 0.9999995776397653, iteration: 8286
loss: 1.0844534635543823,grad_norm: 0.9999995340461505, iteration: 8287
loss: 0.9988148212432861,grad_norm: 0.9999992808509626, iteration: 8288
loss: 1.1491175889968872,grad_norm: 0.9999996253410661, iteration: 8289
loss: 1.085503339767456,grad_norm: 0.9999995956100803, iteration: 8290
loss: 1.0919148921966553,grad_norm: 0.999999513957177, iteration: 8291
loss: 1.1478686332702637,grad_norm: 0.9999995916145553, iteration: 8292
loss: 1.0442798137664795,grad_norm: 0.9999994878806028, iteration: 8293
loss: 1.060145378112793,grad_norm: 0.9999995678854455, iteration: 8294
loss: 1.0414561033248901,grad_norm: 0.9999993449355958, iteration: 8295
loss: 1.0903083086013794,grad_norm: 0.9999994224409473, iteration: 8296
loss: 1.0273820161819458,grad_norm: 0.9999996195399335, iteration: 8297
loss: 1.0437116622924805,grad_norm: 0.9999996425295276, iteration: 8298
loss: 1.053901195526123,grad_norm: 0.9999994813625745, iteration: 8299
loss: 1.0796928405761719,grad_norm: 0.9999997784109333, iteration: 8300
loss: 1.047699213027954,grad_norm: 0.9999993832871527, iteration: 8301
loss: 1.0644768476486206,grad_norm: 0.9999995171281444, iteration: 8302
loss: 1.1149770021438599,grad_norm: 0.9999994889975959, iteration: 8303
loss: 1.0983240604400635,grad_norm: 0.999999573510764, iteration: 8304
loss: 1.0727369785308838,grad_norm: 0.9999996089227051, iteration: 8305
loss: 1.030356526374817,grad_norm: 0.999999499604447, iteration: 8306
loss: 1.0487871170043945,grad_norm: 0.9999994149606805, iteration: 8307
loss: 0.957262396812439,grad_norm: 0.9999993012107369, iteration: 8308
loss: 1.0330523252487183,grad_norm: 0.9999994505019152, iteration: 8309
loss: 1.0636934041976929,grad_norm: 0.9999993749083635, iteration: 8310
loss: 1.0686677694320679,grad_norm: 0.9999995316425061, iteration: 8311
loss: 1.0190303325653076,grad_norm: 0.999999394514917, iteration: 8312
loss: 1.0849759578704834,grad_norm: 0.9999995156742353, iteration: 8313
loss: 1.0626635551452637,grad_norm: 0.9999994980102478, iteration: 8314
loss: 1.0429514646530151,grad_norm: 0.9999993406109847, iteration: 8315
loss: 1.0666007995605469,grad_norm: 0.9999994946109707, iteration: 8316
loss: 1.05287504196167,grad_norm: 0.9999993191735317, iteration: 8317
loss: 1.035251259803772,grad_norm: 0.9999993768047555, iteration: 8318
loss: 1.0484416484832764,grad_norm: 0.9999994307382247, iteration: 8319
loss: 0.9913535118103027,grad_norm: 0.9999992210232302, iteration: 8320
loss: 1.0108875036239624,grad_norm: 0.9999992356211286, iteration: 8321
loss: 1.0382572412490845,grad_norm: 0.9999993921771431, iteration: 8322
loss: 1.051517128944397,grad_norm: 0.9999995499957054, iteration: 8323
loss: 1.0625407695770264,grad_norm: 0.9999994598468891, iteration: 8324
loss: 1.0221574306488037,grad_norm: 0.9999994772507572, iteration: 8325
loss: 1.0843274593353271,grad_norm: 0.9999993395115594, iteration: 8326
loss: 1.0279042720794678,grad_norm: 0.9999993013658922, iteration: 8327
loss: 1.0828274488449097,grad_norm: 0.9999996837042396, iteration: 8328
loss: 1.0408037900924683,grad_norm: 0.9999993846074066, iteration: 8329
loss: 1.0038622617721558,grad_norm: 0.9999993581612553, iteration: 8330
loss: 1.0286637544631958,grad_norm: 0.999999399598768, iteration: 8331
loss: 1.0709421634674072,grad_norm: 0.9999995936159359, iteration: 8332
loss: 1.017006278038025,grad_norm: 0.9999992896643244, iteration: 8333
loss: 1.0457329750061035,grad_norm: 0.9999995621033655, iteration: 8334
loss: 1.0311589241027832,grad_norm: 0.9999994682462434, iteration: 8335
loss: 1.0603611469268799,grad_norm: 0.9999995425163988, iteration: 8336
loss: 1.1113861799240112,grad_norm: 0.9999994804768544, iteration: 8337
loss: 1.0689736604690552,grad_norm: 0.999999423576433, iteration: 8338
loss: 0.9752961993217468,grad_norm: 0.99999917371744, iteration: 8339
loss: 1.0522449016571045,grad_norm: 0.9999992701673661, iteration: 8340
loss: 1.0173465013504028,grad_norm: 0.9999995318511433, iteration: 8341
loss: 1.0690544843673706,grad_norm: 0.9999995655186368, iteration: 8342
loss: 1.041571021080017,grad_norm: 0.9999992698102708, iteration: 8343
loss: 1.0732678174972534,grad_norm: 0.9999996023730255, iteration: 8344
loss: 1.0601625442504883,grad_norm: 0.9999994384762916, iteration: 8345
loss: 1.0882480144500732,grad_norm: 0.9999995464211034, iteration: 8346
loss: 1.0586453676223755,grad_norm: 0.9999993423423768, iteration: 8347
loss: 1.060828447341919,grad_norm: 0.9999994973118618, iteration: 8348
loss: 1.0232206583023071,grad_norm: 0.999999443972957, iteration: 8349
loss: 1.0842607021331787,grad_norm: 0.9999994569437264, iteration: 8350
loss: 1.0150495767593384,grad_norm: 0.9999993165870112, iteration: 8351
loss: 1.0945899486541748,grad_norm: 0.9999996031542242, iteration: 8352
loss: 1.0684194564819336,grad_norm: 0.9999993546009104, iteration: 8353
loss: 1.0124107599258423,grad_norm: 0.9999994071095243, iteration: 8354
loss: 1.0068953037261963,grad_norm: 0.9999993770903244, iteration: 8355
loss: 1.0687328577041626,grad_norm: 0.9999992009810619, iteration: 8356
loss: 1.0539470911026,grad_norm: 0.9999992331961642, iteration: 8357
loss: 1.0805641412734985,grad_norm: 0.9999995501419338, iteration: 8358
loss: 1.1413036584854126,grad_norm: 0.9999994823932482, iteration: 8359
loss: 1.0669113397598267,grad_norm: 0.9999993934952719, iteration: 8360
loss: 1.0454751253128052,grad_norm: 0.9999993701059876, iteration: 8361
loss: 1.05722177028656,grad_norm: 0.9999993394896517, iteration: 8362
loss: 1.061922550201416,grad_norm: 0.9999994410933076, iteration: 8363
loss: 1.0598069429397583,grad_norm: 0.9999993627281314, iteration: 8364
loss: 1.0456020832061768,grad_norm: 0.9999993276689837, iteration: 8365
loss: 1.023285984992981,grad_norm: 0.9999993805154374, iteration: 8366
loss: 0.9924523234367371,grad_norm: 0.9999994836597603, iteration: 8367
loss: 1.0088672637939453,grad_norm: 0.9999991983606645, iteration: 8368
loss: 1.0453054904937744,grad_norm: 0.9999995299395007, iteration: 8369
loss: 1.0484439134597778,grad_norm: 0.9999994752035937, iteration: 8370
loss: 1.0576292276382446,grad_norm: 0.999999580469807, iteration: 8371
loss: 1.0172300338745117,grad_norm: 0.9999994735043265, iteration: 8372
loss: 1.0443614721298218,grad_norm: 0.9999994212970735, iteration: 8373
loss: 1.0475696325302124,grad_norm: 0.9999993281841297, iteration: 8374
loss: 1.0586328506469727,grad_norm: 0.9999994269148438, iteration: 8375
loss: 1.021166205406189,grad_norm: 0.999999363099378, iteration: 8376
loss: 0.9926925897598267,grad_norm: 0.9999993840696467, iteration: 8377
loss: 1.0863267183303833,grad_norm: 0.9999998578793544, iteration: 8378
loss: 1.0741384029388428,grad_norm: 0.9999994123808633, iteration: 8379
loss: 1.0610113143920898,grad_norm: 0.9999993638959145, iteration: 8380
loss: 1.0169650316238403,grad_norm: 0.9999992696209046, iteration: 8381
loss: 1.021246075630188,grad_norm: 0.9999996070845886, iteration: 8382
loss: 1.0603153705596924,grad_norm: 0.9999994391615713, iteration: 8383
loss: 1.0454891920089722,grad_norm: 0.999999549812639, iteration: 8384
loss: 1.0864081382751465,grad_norm: 0.9999995877761165, iteration: 8385
loss: 1.0519729852676392,grad_norm: 0.9999994842389932, iteration: 8386
loss: 1.0956518650054932,grad_norm: 0.9999997985566857, iteration: 8387
loss: 1.007885217666626,grad_norm: 0.9999992618744825, iteration: 8388
loss: 1.1428496837615967,grad_norm: 0.9999996139576159, iteration: 8389
loss: 1.1121666431427002,grad_norm: 0.9999995307608417, iteration: 8390
loss: 1.0944970846176147,grad_norm: 0.9999995918894689, iteration: 8391
loss: 1.0602949857711792,grad_norm: 0.9999996697685427, iteration: 8392
loss: 1.0962677001953125,grad_norm: 0.9999992199569399, iteration: 8393
loss: 1.127238392829895,grad_norm: 0.9999993786146115, iteration: 8394
loss: 1.0125168561935425,grad_norm: 0.9999993426176845, iteration: 8395
loss: 1.0430907011032104,grad_norm: 0.9999993519492307, iteration: 8396
loss: 1.0270605087280273,grad_norm: 0.9999997020801116, iteration: 8397
loss: 1.0464649200439453,grad_norm: 0.9999996199336211, iteration: 8398
loss: 1.0336196422576904,grad_norm: 0.9999994815042181, iteration: 8399
loss: 1.0626472234725952,grad_norm: 0.9999993025535634, iteration: 8400
loss: 1.0204966068267822,grad_norm: 0.9999991809939867, iteration: 8401
loss: 0.9840131998062134,grad_norm: 0.9999992912908615, iteration: 8402
loss: 1.0710254907608032,grad_norm: 0.9999996831419009, iteration: 8403
loss: 1.0862112045288086,grad_norm: 0.9999995770982664, iteration: 8404
loss: 1.0847666263580322,grad_norm: 0.9999994375016914, iteration: 8405
loss: 1.0709980726242065,grad_norm: 0.999999348095958, iteration: 8406
loss: 1.0602293014526367,grad_norm: 0.9999994630213139, iteration: 8407
loss: 1.0428292751312256,grad_norm: 0.9999993545980994, iteration: 8408
loss: 1.0525399446487427,grad_norm: 0.9999997337094545, iteration: 8409
loss: 1.0249625444412231,grad_norm: 0.9999992038036294, iteration: 8410
loss: 0.9971232414245605,grad_norm: 0.9999994007620419, iteration: 8411
loss: 1.0774836540222168,grad_norm: 0.9999993782960013, iteration: 8412
loss: 1.0383217334747314,grad_norm: 0.9999992997652603, iteration: 8413
loss: 1.0794264078140259,grad_norm: 0.9999997913697186, iteration: 8414
loss: 1.1010276079177856,grad_norm: 0.9999995189559271, iteration: 8415
loss: 1.0359852313995361,grad_norm: 0.9999992981813733, iteration: 8416
loss: 1.0619250535964966,grad_norm: 0.9999992404876893, iteration: 8417
loss: 1.0548218488693237,grad_norm: 0.9999993771705936, iteration: 8418
loss: 1.0292963981628418,grad_norm: 0.9999997393557164, iteration: 8419
loss: 1.1072639226913452,grad_norm: 0.9999994223218648, iteration: 8420
loss: 1.0389206409454346,grad_norm: 0.9999994925302726, iteration: 8421
loss: 1.0157722234725952,grad_norm: 0.9999993485967, iteration: 8422
loss: 0.9922022223472595,grad_norm: 0.999999364902179, iteration: 8423
loss: 1.0679216384887695,grad_norm: 0.9999993605731238, iteration: 8424
loss: 1.0623606443405151,grad_norm: 0.999999520120363, iteration: 8425
loss: 1.0542314052581787,grad_norm: 0.999999541387018, iteration: 8426
loss: 1.024396300315857,grad_norm: 0.9999994202746318, iteration: 8427
loss: 1.0334967374801636,grad_norm: 0.9999993432633998, iteration: 8428
loss: 1.029549241065979,grad_norm: 0.9999992930193139, iteration: 8429
loss: 1.0909881591796875,grad_norm: 0.999999681498373, iteration: 8430
loss: 1.0851210355758667,grad_norm: 0.9999993990626047, iteration: 8431
loss: 1.0067347288131714,grad_norm: 0.9999992417626932, iteration: 8432
loss: 1.0383576154708862,grad_norm: 0.9999992677500932, iteration: 8433
loss: 1.0373090505599976,grad_norm: 0.9999992890808885, iteration: 8434
loss: 1.0126968622207642,grad_norm: 0.9999993181422192, iteration: 8435
loss: 1.0363649129867554,grad_norm: 0.9999993442349502, iteration: 8436
loss: 1.0281366109848022,grad_norm: 0.9999993177627746, iteration: 8437
loss: 1.0697622299194336,grad_norm: 0.9999995267065633, iteration: 8438
loss: 1.0352966785430908,grad_norm: 0.999999536675089, iteration: 8439
loss: 1.1041996479034424,grad_norm: 0.9999994301134059, iteration: 8440
loss: 1.0754835605621338,grad_norm: 0.9999993706321054, iteration: 8441
loss: 0.9771649241447449,grad_norm: 0.9999992266864136, iteration: 8442
loss: 1.0546492338180542,grad_norm: 0.999999536953143, iteration: 8443
loss: 1.1220109462738037,grad_norm: 0.9999994002320267, iteration: 8444
loss: 1.030314326286316,grad_norm: 0.9999998086333258, iteration: 8445
loss: 1.0107054710388184,grad_norm: 0.9999994677407629, iteration: 8446
loss: 0.9915430545806885,grad_norm: 0.9999994936800114, iteration: 8447
loss: 1.0375558137893677,grad_norm: 0.9999995912111289, iteration: 8448
loss: 1.0470479726791382,grad_norm: 0.9999995903122054, iteration: 8449
loss: 1.0044245719909668,grad_norm: 0.9999992235589708, iteration: 8450
loss: 1.0742520093917847,grad_norm: 0.9999994581196436, iteration: 8451
loss: 0.9839335680007935,grad_norm: 0.9999992711872042, iteration: 8452
loss: 1.0656468868255615,grad_norm: 0.9999993780698895, iteration: 8453
loss: 1.0475099086761475,grad_norm: 0.9999993009779272, iteration: 8454
loss: 1.0051865577697754,grad_norm: 0.999999362055253, iteration: 8455
loss: 1.069028615951538,grad_norm: 0.9999993735329419, iteration: 8456
loss: 1.0649592876434326,grad_norm: 0.9999993844120022, iteration: 8457
loss: 1.089608907699585,grad_norm: 0.999999602434367, iteration: 8458
loss: 1.0382084846496582,grad_norm: 0.9999993675376564, iteration: 8459
loss: 1.034404993057251,grad_norm: 0.9999994385566229, iteration: 8460
loss: 1.0448729991912842,grad_norm: 0.9999993248497465, iteration: 8461
loss: 1.023253083229065,grad_norm: 0.9999995723101882, iteration: 8462
loss: 0.9834233522415161,grad_norm: 0.9999993876675695, iteration: 8463
loss: 1.059880256652832,grad_norm: 0.9999995904032847, iteration: 8464
loss: 1.0718613862991333,grad_norm: 0.9999993787782901, iteration: 8465
loss: 1.079741358757019,grad_norm: 0.9999993256844494, iteration: 8466
loss: 1.0574424266815186,grad_norm: 0.9999996454368122, iteration: 8467
loss: 1.0555362701416016,grad_norm: 0.9999997071903942, iteration: 8468
loss: 1.0530617237091064,grad_norm: 0.9999996474677174, iteration: 8469
loss: 1.0163938999176025,grad_norm: 0.9999993742489303, iteration: 8470
loss: 1.1182928085327148,grad_norm: 0.9999993876789104, iteration: 8471
loss: 1.0229464769363403,grad_norm: 0.999999406413453, iteration: 8472
loss: 1.0579948425292969,grad_norm: 0.9999993093972038, iteration: 8473
loss: 1.0438835620880127,grad_norm: 0.9999997285260719, iteration: 8474
loss: 1.013663411140442,grad_norm: 0.9999994906715013, iteration: 8475
loss: 1.0528205633163452,grad_norm: 0.999999573916149, iteration: 8476
loss: 1.1698877811431885,grad_norm: 0.9999998433001935, iteration: 8477
loss: 1.0326521396636963,grad_norm: 0.9999991992118363, iteration: 8478
loss: 1.055154800415039,grad_norm: 0.9999994161806949, iteration: 8479
loss: 1.0426156520843506,grad_norm: 0.999999527602679, iteration: 8480
loss: 0.9999393820762634,grad_norm: 0.9999993848776164, iteration: 8481
loss: 0.9859883785247803,grad_norm: 0.9999994558617484, iteration: 8482
loss: 1.0239150524139404,grad_norm: 0.9999994224368025, iteration: 8483
loss: 1.0316299200057983,grad_norm: 0.9999994181311898, iteration: 8484
loss: 0.954562246799469,grad_norm: 0.9999992828002287, iteration: 8485
loss: 1.0995752811431885,grad_norm: 0.9999995720272618, iteration: 8486
loss: 0.9952437281608582,grad_norm: 0.9999993062083542, iteration: 8487
loss: 1.1162759065628052,grad_norm: 0.9999995564493717, iteration: 8488
loss: 1.0277655124664307,grad_norm: 0.9999992361348979, iteration: 8489
loss: 1.0168366432189941,grad_norm: 0.9999993342317853, iteration: 8490
loss: 1.022571086883545,grad_norm: 0.9999994358148575, iteration: 8491
loss: 1.1129378080368042,grad_norm: 0.9999996236565771, iteration: 8492
loss: 1.0560553073883057,grad_norm: 0.9999994945293769, iteration: 8493
loss: 1.0842598676681519,grad_norm: 0.9999997758333182, iteration: 8494
loss: 1.0734753608703613,grad_norm: 0.9999994396342614, iteration: 8495
loss: 0.9756261706352234,grad_norm: 0.9999993841434305, iteration: 8496
loss: 1.0968990325927734,grad_norm: 0.9999995216819131, iteration: 8497
loss: 1.0761761665344238,grad_norm: 0.9999994605952233, iteration: 8498
loss: 1.0399202108383179,grad_norm: 0.9999996100651747, iteration: 8499
loss: 0.9665045142173767,grad_norm: 0.99999930257944, iteration: 8500
loss: 1.0166006088256836,grad_norm: 0.9999995577212898, iteration: 8501
loss: 1.1217472553253174,grad_norm: 0.999999791783172, iteration: 8502
loss: 1.0099148750305176,grad_norm: 0.999999468782374, iteration: 8503
loss: 1.019092321395874,grad_norm: 0.9999994230992454, iteration: 8504
loss: 1.0109081268310547,grad_norm: 0.9999994130243466, iteration: 8505
loss: 1.0999842882156372,grad_norm: 0.9999994907088202, iteration: 8506
loss: 1.0491875410079956,grad_norm: 0.9999993687836495, iteration: 8507
loss: 1.0753092765808105,grad_norm: 0.9999995893699212, iteration: 8508
loss: 1.0082184076309204,grad_norm: 0.9999993454156321, iteration: 8509
loss: 1.0818599462509155,grad_norm: 0.9999994819156962, iteration: 8510
loss: 1.0817126035690308,grad_norm: 0.9999994553608772, iteration: 8511
loss: 1.0484284162521362,grad_norm: 0.999999405498637, iteration: 8512
loss: 1.0587290525436401,grad_norm: 0.999999469610429, iteration: 8513
loss: 1.0730196237564087,grad_norm: 0.999999635948189, iteration: 8514
loss: 1.114715814590454,grad_norm: 0.9999996021177325, iteration: 8515
loss: 1.0876518487930298,grad_norm: 0.9999995699317966, iteration: 8516
loss: 1.0840157270431519,grad_norm: 0.9999994334548035, iteration: 8517
loss: 1.056276559829712,grad_norm: 0.9999996018512292, iteration: 8518
loss: 0.9856882691383362,grad_norm: 0.9999993298285297, iteration: 8519
loss: 0.9548434019088745,grad_norm: 0.9999992171784615, iteration: 8520
loss: 1.0139286518096924,grad_norm: 0.9999993028391272, iteration: 8521
loss: 1.0466747283935547,grad_norm: 0.9999993101157911, iteration: 8522
loss: 1.0426169633865356,grad_norm: 0.9999993929361354, iteration: 8523
loss: 1.0861988067626953,grad_norm: 0.9999994672976542, iteration: 8524
loss: 1.0002241134643555,grad_norm: 0.9999993754370465, iteration: 8525
loss: 1.024014949798584,grad_norm: 0.9999993839729708, iteration: 8526
loss: 1.0814881324768066,grad_norm: 0.9999995772345835, iteration: 8527
loss: 1.0731494426727295,grad_norm: 0.9999994472666704, iteration: 8528
loss: 1.097296953201294,grad_norm: 0.9999996726736805, iteration: 8529
loss: 1.0645760297775269,grad_norm: 0.9999993647596226, iteration: 8530
loss: 1.0195685625076294,grad_norm: 0.9999993240686782, iteration: 8531
loss: 1.0512710809707642,grad_norm: 0.9999993502150224, iteration: 8532
loss: 1.0076833963394165,grad_norm: 0.9999994111498438, iteration: 8533
loss: 1.0216373205184937,grad_norm: 0.9999994378199206, iteration: 8534
loss: 1.0299928188323975,grad_norm: 0.9999993025054407, iteration: 8535
loss: 1.0529210567474365,grad_norm: 0.9999994326942768, iteration: 8536
loss: 1.0683205127716064,grad_norm: 0.999999556019922, iteration: 8537
loss: 1.1348382234573364,grad_norm: 0.9999998961946152, iteration: 8538
loss: 1.0509607791900635,grad_norm: 0.9999996088049512, iteration: 8539
loss: 1.0524027347564697,grad_norm: 0.9999993976333559, iteration: 8540
loss: 1.059963583946228,grad_norm: 0.9999993508826478, iteration: 8541
loss: 1.1389639377593994,grad_norm: 0.9999997190256198, iteration: 8542
loss: 1.052322506904602,grad_norm: 0.999999623205525, iteration: 8543
loss: 1.0873806476593018,grad_norm: 0.9999997486790604, iteration: 8544
loss: 1.125449299812317,grad_norm: 0.9999996193396249, iteration: 8545
loss: 1.0478522777557373,grad_norm: 0.9999992579356896, iteration: 8546
loss: 1.02865731716156,grad_norm: 0.9999993050247441, iteration: 8547
loss: 1.0735234022140503,grad_norm: 0.9999995577501092, iteration: 8548
loss: 1.0431396961212158,grad_norm: 0.9999994775598514, iteration: 8549
loss: 1.0816612243652344,grad_norm: 0.9999996489436269, iteration: 8550
loss: 1.1085126399993896,grad_norm: 0.9999995464397997, iteration: 8551
loss: 1.1222953796386719,grad_norm: 0.9999996811229964, iteration: 8552
loss: 1.0710456371307373,grad_norm: 0.9999994979091438, iteration: 8553
loss: 1.0553760528564453,grad_norm: 0.9999997311842654, iteration: 8554
loss: 1.0455671548843384,grad_norm: 0.99999944569513, iteration: 8555
loss: 1.034018635749817,grad_norm: 0.9999993882612814, iteration: 8556
loss: 1.017557144165039,grad_norm: 0.9999994844749532, iteration: 8557
loss: 1.0456041097640991,grad_norm: 0.9999993130012084, iteration: 8558
loss: 1.0638703107833862,grad_norm: 0.9999997541137295, iteration: 8559
loss: 1.073274850845337,grad_norm: 0.9999993521877046, iteration: 8560
loss: 1.0943973064422607,grad_norm: 0.9999995363518216, iteration: 8561
loss: 1.014572024345398,grad_norm: 0.9999993662829733, iteration: 8562
loss: 1.0516430139541626,grad_norm: 0.9999996222073066, iteration: 8563
loss: 1.0740073919296265,grad_norm: 0.9999995324590715, iteration: 8564
loss: 1.0924901962280273,grad_norm: 0.9999994126234847, iteration: 8565
loss: 1.0826863050460815,grad_norm: 0.9999996194515951, iteration: 8566
loss: 1.009893536567688,grad_norm: 0.9999995601363816, iteration: 8567
loss: 1.0222636461257935,grad_norm: 0.9999994958995729, iteration: 8568
loss: 1.0335891246795654,grad_norm: 0.9999994675492502, iteration: 8569
loss: 1.0585163831710815,grad_norm: 0.9999994662755071, iteration: 8570
loss: 1.014756441116333,grad_norm: 0.9999994850001479, iteration: 8571
loss: 0.9763350486755371,grad_norm: 0.9999993013749876, iteration: 8572
loss: 1.0618765354156494,grad_norm: 0.9999993481433873, iteration: 8573
loss: 1.0716819763183594,grad_norm: 0.9999993827653997, iteration: 8574
loss: 1.0029919147491455,grad_norm: 0.9999994423428087, iteration: 8575
loss: 1.0890861749649048,grad_norm: 0.9999997381226557, iteration: 8576
loss: 0.9861771464347839,grad_norm: 0.999999412634238, iteration: 8577
loss: 1.1431870460510254,grad_norm: 0.9999996383004292, iteration: 8578
loss: 1.0678421258926392,grad_norm: 0.9999994413724483, iteration: 8579
loss: 1.0254188776016235,grad_norm: 0.9999992945220152, iteration: 8580
loss: 1.0295817852020264,grad_norm: 0.9999992476988406, iteration: 8581
loss: 1.1204973459243774,grad_norm: 0.9999996095974782, iteration: 8582
loss: 1.070569396018982,grad_norm: 0.9999994916707068, iteration: 8583
loss: 1.020452618598938,grad_norm: 0.9999994337711343, iteration: 8584
loss: 1.0134296417236328,grad_norm: 0.9999992585125329, iteration: 8585
loss: 1.0157369375228882,grad_norm: 0.9999995947226763, iteration: 8586
loss: 1.0536293983459473,grad_norm: 0.9999994066856746, iteration: 8587
loss: 1.0657402276992798,grad_norm: 0.999999373018816, iteration: 8588
loss: 1.061859130859375,grad_norm: 0.999999532079734, iteration: 8589
loss: 1.029497504234314,grad_norm: 0.999999536286521, iteration: 8590
loss: 1.057581901550293,grad_norm: 0.9999994432489383, iteration: 8591
loss: 1.1038343906402588,grad_norm: 0.9999997097022412, iteration: 8592
loss: 1.0752891302108765,grad_norm: 0.9999994481719298, iteration: 8593
loss: 1.0701392889022827,grad_norm: 0.9999995620997906, iteration: 8594
loss: 1.030133843421936,grad_norm: 0.9999993804887507, iteration: 8595
loss: 1.0480451583862305,grad_norm: 0.9999994003666547, iteration: 8596
loss: 1.008104920387268,grad_norm: 0.9999994389434363, iteration: 8597
loss: 1.0761196613311768,grad_norm: 0.9999995324473745, iteration: 8598
loss: 1.0657374858856201,grad_norm: 0.999999560235561, iteration: 8599
loss: 1.0057635307312012,grad_norm: 0.9999993918029534, iteration: 8600
loss: 1.0245428085327148,grad_norm: 0.99999981680934, iteration: 8601
loss: 1.061374545097351,grad_norm: 0.9999997053953925, iteration: 8602
loss: 1.0372525453567505,grad_norm: 0.9999995429414612, iteration: 8603
loss: 0.9795541167259216,grad_norm: 0.9999993065104935, iteration: 8604
loss: 0.9915413856506348,grad_norm: 0.9999992718359197, iteration: 8605
loss: 1.0411508083343506,grad_norm: 0.9999995025222718, iteration: 8606
loss: 1.0799087285995483,grad_norm: 0.9999995448031285, iteration: 8607
loss: 1.0222060680389404,grad_norm: 0.9999995309755653, iteration: 8608
loss: 0.9931067228317261,grad_norm: 0.9999991577675912, iteration: 8609
loss: 1.031674861907959,grad_norm: 0.9999992006927255, iteration: 8610
loss: 1.0815752744674683,grad_norm: 0.9999994043814664, iteration: 8611
loss: 1.1024730205535889,grad_norm: 0.9999998326341883, iteration: 8612
loss: 1.071453332901001,grad_norm: 0.9999992778334585, iteration: 8613
loss: 1.0825223922729492,grad_norm: 0.9999994830196888, iteration: 8614
loss: 1.022117257118225,grad_norm: 0.9999991158511787, iteration: 8615
loss: 1.08048415184021,grad_norm: 0.9999994193150555, iteration: 8616
loss: 1.0591802597045898,grad_norm: 0.9999996907345977, iteration: 8617
loss: 1.0193151235580444,grad_norm: 0.9999995175546555, iteration: 8618
loss: 1.0427870750427246,grad_norm: 0.9999994439047054, iteration: 8619
loss: 1.0766929388046265,grad_norm: 0.9999996394974744, iteration: 8620
loss: 1.115674376487732,grad_norm: 0.9999995386823862, iteration: 8621
loss: 1.0876184701919556,grad_norm: 0.9999995107088695, iteration: 8622
loss: 1.0561305284500122,grad_norm: 0.999999186049437, iteration: 8623
loss: 1.009032130241394,grad_norm: 0.999999444195245, iteration: 8624
loss: 1.0149942636489868,grad_norm: 0.9999992011473801, iteration: 8625
loss: 1.0408834218978882,grad_norm: 0.9999993532387838, iteration: 8626
loss: 1.0319825410842896,grad_norm: 0.9999996204639423, iteration: 8627
loss: 1.10251784324646,grad_norm: 0.9999995955665466, iteration: 8628
loss: 1.0901724100112915,grad_norm: 0.9999993968985917, iteration: 8629
loss: 1.06644868850708,grad_norm: 0.9999994950838906, iteration: 8630
loss: 1.0501996278762817,grad_norm: 0.9999994158140082, iteration: 8631
loss: 1.0333808660507202,grad_norm: 0.9999993375255061, iteration: 8632
loss: 1.1001269817352295,grad_norm: 0.9999993337636958, iteration: 8633
loss: 1.0721676349639893,grad_norm: 0.9999992312055691, iteration: 8634
loss: 1.0688222646713257,grad_norm: 0.9999993703948596, iteration: 8635
loss: 1.0365066528320312,grad_norm: 0.9999993471294064, iteration: 8636
loss: 1.0202631950378418,grad_norm: 0.9999995705957192, iteration: 8637
loss: 1.0135923624038696,grad_norm: 0.9999993328220323, iteration: 8638
loss: 1.0546568632125854,grad_norm: 0.9999993638732458, iteration: 8639
loss: 1.0811928510665894,grad_norm: 0.9999994335282155, iteration: 8640
loss: 1.002267837524414,grad_norm: 0.9999994217409343, iteration: 8641
loss: 1.0558241605758667,grad_norm: 0.9999995177384708, iteration: 8642
loss: 1.0439151525497437,grad_norm: 0.9999993631556017, iteration: 8643
loss: 1.03424072265625,grad_norm: 0.9999993120315002, iteration: 8644
loss: 1.079385757446289,grad_norm: 0.9999992654804534, iteration: 8645
loss: 1.1176820993423462,grad_norm: 0.9999995979912184, iteration: 8646
loss: 1.057738184928894,grad_norm: 0.9999995743749507, iteration: 8647
loss: 1.0097404718399048,grad_norm: 0.9999992232212609, iteration: 8648
loss: 1.0478287935256958,grad_norm: 0.9999993599477112, iteration: 8649
loss: 1.0500991344451904,grad_norm: 0.999999355931993, iteration: 8650
loss: 1.0544253587722778,grad_norm: 0.9999994642809514, iteration: 8651
loss: 1.0571752786636353,grad_norm: 0.9999996452191733, iteration: 8652
loss: 1.0451297760009766,grad_norm: 0.9999995093898858, iteration: 8653
loss: 1.098819613456726,grad_norm: 0.9999994484079997, iteration: 8654
loss: 1.0852776765823364,grad_norm: 0.9999994750256348, iteration: 8655
loss: 1.088658094406128,grad_norm: 0.999999686422907, iteration: 8656
loss: 1.0822590589523315,grad_norm: 0.9999993977946169, iteration: 8657
loss: 1.046345591545105,grad_norm: 0.9999993209119965, iteration: 8658
loss: 1.0411394834518433,grad_norm: 0.999999295060897, iteration: 8659
loss: 1.0389683246612549,grad_norm: 0.9999991094396175, iteration: 8660
loss: 0.9779150485992432,grad_norm: 0.9999992986755151, iteration: 8661
loss: 1.0327574014663696,grad_norm: 0.999999278718493, iteration: 8662
loss: 1.039188265800476,grad_norm: 0.9999994074849997, iteration: 8663
loss: 1.0684791803359985,grad_norm: 0.9999998111398238, iteration: 8664
loss: 1.1284366846084595,grad_norm: 0.9999997011818902, iteration: 8665
loss: 1.0401779413223267,grad_norm: 0.9999995530379506, iteration: 8666
loss: 1.0189933776855469,grad_norm: 0.9999993929591908, iteration: 8667
loss: 1.036948323249817,grad_norm: 0.999999278260657, iteration: 8668
loss: 1.0195186138153076,grad_norm: 0.9999993793771595, iteration: 8669
loss: 0.9717035889625549,grad_norm: 0.9999996353456987, iteration: 8670
loss: 1.0670692920684814,grad_norm: 0.9999992991236086, iteration: 8671
loss: 1.132509469985962,grad_norm: 0.9999995908407884, iteration: 8672
loss: 1.0485798120498657,grad_norm: 0.9999991659573483, iteration: 8673
loss: 1.0942728519439697,grad_norm: 0.9999996639098285, iteration: 8674
loss: 1.0503414869308472,grad_norm: 0.9999993492460956, iteration: 8675
loss: 1.0535520315170288,grad_norm: 0.9999993164711489, iteration: 8676
loss: 1.089624285697937,grad_norm: 0.9999994915244228, iteration: 8677
loss: 1.0677772760391235,grad_norm: 0.9999995061860326, iteration: 8678
loss: 1.0960817337036133,grad_norm: 0.9999995171001511, iteration: 8679
loss: 1.0474605560302734,grad_norm: 0.9999994469732086, iteration: 8680
loss: 1.0880969762802124,grad_norm: 0.9999996019484798, iteration: 8681
loss: 1.030121088027954,grad_norm: 0.9999995988861534, iteration: 8682
loss: 0.9941805601119995,grad_norm: 0.9999993352380077, iteration: 8683
loss: 1.0414519309997559,grad_norm: 0.9999993542123358, iteration: 8684
loss: 1.119341254234314,grad_norm: 0.999999644328847, iteration: 8685
loss: 1.0591098070144653,grad_norm: 0.9999995488554918, iteration: 8686
loss: 1.094746708869934,grad_norm: 0.9999994313218173, iteration: 8687
loss: 1.0612064599990845,grad_norm: 0.9999994336225835, iteration: 8688
loss: 1.0512508153915405,grad_norm: 0.9999993225562441, iteration: 8689
loss: 1.0317171812057495,grad_norm: 0.9999993280098786, iteration: 8690
loss: 1.0554378032684326,grad_norm: 0.9999994497603635, iteration: 8691
loss: 1.0594547986984253,grad_norm: 0.9999992991733786, iteration: 8692
loss: 1.0645334720611572,grad_norm: 0.9999993387569132, iteration: 8693
loss: 1.0232200622558594,grad_norm: 0.9999994991881055, iteration: 8694
loss: 1.052903652191162,grad_norm: 0.9999994933450325, iteration: 8695
loss: 1.0775892734527588,grad_norm: 0.9999996738055339, iteration: 8696
loss: 1.0348819494247437,grad_norm: 0.9999995811368929, iteration: 8697
loss: 1.0253885984420776,grad_norm: 0.9999992331946465, iteration: 8698
loss: 0.994249165058136,grad_norm: 0.9999994094081651, iteration: 8699
loss: 1.0482428073883057,grad_norm: 0.9999993699529149, iteration: 8700
loss: 1.0140730142593384,grad_norm: 0.99999960383705, iteration: 8701
loss: 1.0330742597579956,grad_norm: 0.9999994873105832, iteration: 8702
loss: 1.063007116317749,grad_norm: 0.9999995408973374, iteration: 8703
loss: 1.0458970069885254,grad_norm: 0.9999996376368813, iteration: 8704
loss: 1.0213475227355957,grad_norm: 0.999999439309346, iteration: 8705
loss: 1.0453399419784546,grad_norm: 0.9999993196449051, iteration: 8706
loss: 1.0001020431518555,grad_norm: 0.9999992720040471, iteration: 8707
loss: 1.042677402496338,grad_norm: 0.9999994300833456, iteration: 8708
loss: 1.0365655422210693,grad_norm: 0.999999366528415, iteration: 8709
loss: 1.0534510612487793,grad_norm: 0.9999996294073233, iteration: 8710
loss: 1.023755669593811,grad_norm: 0.9999994533718928, iteration: 8711
loss: 1.0353695154190063,grad_norm: 0.999999452685142, iteration: 8712
loss: 1.0724425315856934,grad_norm: 0.9999992613851972, iteration: 8713
loss: 1.0879604816436768,grad_norm: 0.999999326099907, iteration: 8714
loss: 1.0715179443359375,grad_norm: 0.9999993059004861, iteration: 8715
loss: 1.0118305683135986,grad_norm: 0.9999993166138537, iteration: 8716
loss: 1.0880651473999023,grad_norm: 0.9999993245796217, iteration: 8717
loss: 1.069378137588501,grad_norm: 0.9999993253229961, iteration: 8718
loss: 1.0512609481811523,grad_norm: 0.9999992106629035, iteration: 8719
loss: 1.0391297340393066,grad_norm: 0.9999996534346159, iteration: 8720
loss: 1.058539867401123,grad_norm: 0.9999993001921583, iteration: 8721
loss: 0.9953774809837341,grad_norm: 0.9999994593751523, iteration: 8722
loss: 1.0445423126220703,grad_norm: 0.9999994939756783, iteration: 8723
loss: 1.025610089302063,grad_norm: 0.9999993249575603, iteration: 8724
loss: 1.0715965032577515,grad_norm: 0.9999995079225712, iteration: 8725
loss: 0.9864305853843689,grad_norm: 0.999999296024911, iteration: 8726
loss: 1.0924910306930542,grad_norm: 0.9999994970272941, iteration: 8727
loss: 1.0613200664520264,grad_norm: 0.9999996418972728, iteration: 8728
loss: 1.0447840690612793,grad_norm: 0.9999995641363505, iteration: 8729
loss: 1.075720191001892,grad_norm: 0.9999993452267084, iteration: 8730
loss: 1.0715256929397583,grad_norm: 0.9999993273944803, iteration: 8731
loss: 1.0579155683517456,grad_norm: 0.9999992920033295, iteration: 8732
loss: 1.1302460432052612,grad_norm: 0.9999998354336, iteration: 8733
loss: 1.0723137855529785,grad_norm: 0.9999994535866421, iteration: 8734
loss: 1.0724025964736938,grad_norm: 0.9999994853795829, iteration: 8735
loss: 1.030393123626709,grad_norm: 0.9999996085686886, iteration: 8736
loss: 1.0876195430755615,grad_norm: 0.999999721115783, iteration: 8737
loss: 1.0376362800598145,grad_norm: 0.9999994803051553, iteration: 8738
loss: 1.0562934875488281,grad_norm: 0.9999992792314183, iteration: 8739
loss: 1.0745538473129272,grad_norm: 0.9999996921580964, iteration: 8740
loss: 1.0081878900527954,grad_norm: 0.9999993718933518, iteration: 8741
loss: 1.0788155794143677,grad_norm: 0.9999997281438251, iteration: 8742
loss: 1.040761113166809,grad_norm: 0.9999994203022192, iteration: 8743
loss: 1.0367423295974731,grad_norm: 0.9999992756288034, iteration: 8744
loss: 1.0963923931121826,grad_norm: 0.9999996179357534, iteration: 8745
loss: 1.042303204536438,grad_norm: 0.9999993998529205, iteration: 8746
loss: 1.0574918985366821,grad_norm: 0.9999993164456799, iteration: 8747
loss: 1.0521318912506104,grad_norm: 0.9999991038731054, iteration: 8748
loss: 1.026585578918457,grad_norm: 0.999999483806584, iteration: 8749
loss: 1.0572582483291626,grad_norm: 0.9999995966879826, iteration: 8750
loss: 1.0807400941848755,grad_norm: 0.9999994657899149, iteration: 8751
loss: 1.0363391637802124,grad_norm: 0.9999994675020815, iteration: 8752
loss: 1.0009301900863647,grad_norm: 0.9999994391178385, iteration: 8753
loss: 1.093601942062378,grad_norm: 0.9999994324574145, iteration: 8754
loss: 1.045835018157959,grad_norm: 0.9999993094842481, iteration: 8755
loss: 1.0398759841918945,grad_norm: 0.9999993397076128, iteration: 8756
loss: 1.0647988319396973,grad_norm: 0.999999460014888, iteration: 8757
loss: 1.056936264038086,grad_norm: 0.9999994160059364, iteration: 8758
loss: 1.053026556968689,grad_norm: 0.9999994960506295, iteration: 8759
loss: 1.1068449020385742,grad_norm: 0.9999996422220515, iteration: 8760
loss: 1.031903862953186,grad_norm: 0.9999992306402645, iteration: 8761
loss: 1.0494498014450073,grad_norm: 0.9999995121400852, iteration: 8762
loss: 1.0256669521331787,grad_norm: 0.9999993030272936, iteration: 8763
loss: 1.008699655532837,grad_norm: 0.999999496887437, iteration: 8764
loss: 1.0460108518600464,grad_norm: 0.9999991230062615, iteration: 8765
loss: 1.04099702835083,grad_norm: 0.9999997360890301, iteration: 8766
loss: 1.0603100061416626,grad_norm: 0.9999996307080001, iteration: 8767
loss: 1.076093077659607,grad_norm: 0.9999994827687205, iteration: 8768
loss: 0.9871603846549988,grad_norm: 0.9999993295745884, iteration: 8769
loss: 1.038873314857483,grad_norm: 0.9999992538797393, iteration: 8770
loss: 1.091135859489441,grad_norm: 0.9999993273209693, iteration: 8771
loss: 1.0175280570983887,grad_norm: 0.9999992727029997, iteration: 8772
loss: 1.1062570810317993,grad_norm: 0.9999995893929989, iteration: 8773
loss: 1.0303068161010742,grad_norm: 0.9999996150060871, iteration: 8774
loss: 1.0157161951065063,grad_norm: 0.999999566734924, iteration: 8775
loss: 1.0393247604370117,grad_norm: 0.999999655851208, iteration: 8776
loss: 1.0768489837646484,grad_norm: 0.9999994937208676, iteration: 8777
loss: 1.091172218322754,grad_norm: 0.9999994990122053, iteration: 8778
loss: 1.076951265335083,grad_norm: 0.9999993734762972, iteration: 8779
loss: 1.1009317636489868,grad_norm: 0.9999996842561059, iteration: 8780
loss: 0.9791975617408752,grad_norm: 0.9999993140959096, iteration: 8781
loss: 1.0157136917114258,grad_norm: 0.9999992462550007, iteration: 8782
loss: 1.0275007486343384,grad_norm: 0.9999992779481172, iteration: 8783
loss: 1.0268874168395996,grad_norm: 0.9999996516664783, iteration: 8784
loss: 1.1206544637680054,grad_norm: 0.9999997718330786, iteration: 8785
loss: 1.0316630601882935,grad_norm: 0.9999992223837423, iteration: 8786
loss: 1.0598503351211548,grad_norm: 0.9999997025683273, iteration: 8787
loss: 1.009784460067749,grad_norm: 0.9999993120106903, iteration: 8788
loss: 1.0590537786483765,grad_norm: 0.9999993216095276, iteration: 8789
loss: 1.058186411857605,grad_norm: 0.9999995668548411, iteration: 8790
loss: 1.0614187717437744,grad_norm: 0.9999994580343912, iteration: 8791
loss: 1.0565290451049805,grad_norm: 0.9999991862382585, iteration: 8792
loss: 1.0744068622589111,grad_norm: 0.9999994089586498, iteration: 8793
loss: 1.0552927255630493,grad_norm: 0.9999997827630386, iteration: 8794
loss: 1.0473973751068115,grad_norm: 0.9999994117206675, iteration: 8795
loss: 1.0875588655471802,grad_norm: 0.9999994766304672, iteration: 8796
loss: 0.9679375290870667,grad_norm: 0.9999992119417176, iteration: 8797
loss: 1.039966344833374,grad_norm: 0.9999994217381609, iteration: 8798
loss: 1.0412784814834595,grad_norm: 0.9999992082142966, iteration: 8799
loss: 1.0232024192810059,grad_norm: 0.9999994238866284, iteration: 8800
loss: 1.0489726066589355,grad_norm: 0.9999994268167047, iteration: 8801
loss: 1.0254799127578735,grad_norm: 0.9999993821135913, iteration: 8802
loss: 1.090635895729065,grad_norm: 0.9999993280547445, iteration: 8803
loss: 1.0378601551055908,grad_norm: 0.9999992765184609, iteration: 8804
loss: 1.0648525953292847,grad_norm: 0.9999993447060339, iteration: 8805
loss: 1.0545991659164429,grad_norm: 0.9999993042008645, iteration: 8806
loss: 0.959244430065155,grad_norm: 0.9999993184624472, iteration: 8807
loss: 1.070474624633789,grad_norm: 0.9999996489533362, iteration: 8808
loss: 1.0432817935943604,grad_norm: 0.9999993285014763, iteration: 8809
loss: 1.072538137435913,grad_norm: 0.999999603287105, iteration: 8810
loss: 1.0759793519973755,grad_norm: 0.9999994242252647, iteration: 8811
loss: 1.0530544519424438,grad_norm: 0.9999998328596966, iteration: 8812
loss: 1.057314395904541,grad_norm: 0.9999993652860953, iteration: 8813
loss: 1.0558379888534546,grad_norm: 0.9999994035430851, iteration: 8814
loss: 1.0099073648452759,grad_norm: 0.9999995712447873, iteration: 8815
loss: 1.023892879486084,grad_norm: 0.9999992360785467, iteration: 8816
loss: 1.0302248001098633,grad_norm: 0.9999994381493454, iteration: 8817
loss: 1.017470121383667,grad_norm: 0.9999996484323852, iteration: 8818
loss: 1.0701017379760742,grad_norm: 0.999999301032371, iteration: 8819
loss: 1.0083293914794922,grad_norm: 0.9999993627226712, iteration: 8820
loss: 1.0349140167236328,grad_norm: 0.9999994078213171, iteration: 8821
loss: 1.026923418045044,grad_norm: 0.999999216515062, iteration: 8822
loss: 1.0545434951782227,grad_norm: 0.9999996430034582, iteration: 8823
loss: 1.006595253944397,grad_norm: 0.9999992362362004, iteration: 8824
loss: 1.0074342489242554,grad_norm: 0.9999994394346372, iteration: 8825
loss: 1.0538798570632935,grad_norm: 0.9999995295312742, iteration: 8826
loss: 1.0608760118484497,grad_norm: 0.9999998037601452, iteration: 8827
loss: 1.0821912288665771,grad_norm: 0.9999995310475026, iteration: 8828
loss: 1.0931342840194702,grad_norm: 0.9999996348889898, iteration: 8829
loss: 1.042602300643921,grad_norm: 0.999999426905362, iteration: 8830
loss: 1.0127243995666504,grad_norm: 0.9999992672900565, iteration: 8831
loss: 1.0422320365905762,grad_norm: 0.9999991679314675, iteration: 8832
loss: 1.004868507385254,grad_norm: 0.9999994268477947, iteration: 8833
loss: 1.1396112442016602,grad_norm: 0.9999996542626695, iteration: 8834
loss: 1.0235495567321777,grad_norm: 0.9999993918704063, iteration: 8835
loss: 1.0376352071762085,grad_norm: 0.999999279983962, iteration: 8836
loss: 1.0741755962371826,grad_norm: 0.9999996515436889, iteration: 8837
loss: 1.0621323585510254,grad_norm: 0.9999993038082398, iteration: 8838
loss: 0.9954050183296204,grad_norm: 0.9999994436902432, iteration: 8839
loss: 1.0318381786346436,grad_norm: 0.999999517107277, iteration: 8840
loss: 1.014269232749939,grad_norm: 0.9999994687027849, iteration: 8841
loss: 1.0234990119934082,grad_norm: 0.9999993719408218, iteration: 8842
loss: 1.070475697517395,grad_norm: 0.9999996081048443, iteration: 8843
loss: 1.021697998046875,grad_norm: 0.9999993875330223, iteration: 8844
loss: 1.0973162651062012,grad_norm: 0.9999996446841358, iteration: 8845
loss: 1.030664086341858,grad_norm: 0.9999992494615022, iteration: 8846
loss: 1.0653901100158691,grad_norm: 0.9999995321779008, iteration: 8847
loss: 1.0792436599731445,grad_norm: 0.9999995781754967, iteration: 8848
loss: 1.0302892923355103,grad_norm: 0.9999992797825467, iteration: 8849
loss: 1.0609852075576782,grad_norm: 0.9999993304090584, iteration: 8850
loss: 1.0205416679382324,grad_norm: 0.999999765680217, iteration: 8851
loss: 1.031690239906311,grad_norm: 0.9999994613217146, iteration: 8852
loss: 1.0293831825256348,grad_norm: 0.9999994285867737, iteration: 8853
loss: 1.059757947921753,grad_norm: 0.9999995698428219, iteration: 8854
loss: 1.0668818950653076,grad_norm: 0.9999995190021306, iteration: 8855
loss: 1.088025689125061,grad_norm: 0.9999993726085722, iteration: 8856
loss: 0.9946335554122925,grad_norm: 0.9999996530726304, iteration: 8857
loss: 1.031773567199707,grad_norm: 0.9999993624257184, iteration: 8858
loss: 1.0989530086517334,grad_norm: 0.9999994794486708, iteration: 8859
loss: 1.0752753019332886,grad_norm: 0.9999994701028643, iteration: 8860
loss: 1.0244742631912231,grad_norm: 0.9999992644327956, iteration: 8861
loss: 1.1403077840805054,grad_norm: 0.9999997467209208, iteration: 8862
loss: 1.0091865062713623,grad_norm: 0.9999992803533276, iteration: 8863
loss: 1.0002081394195557,grad_norm: 0.9999991926697561, iteration: 8864
loss: 1.0718885660171509,grad_norm: 0.9999994554126592, iteration: 8865
loss: 1.062699317932129,grad_norm: 0.9999993160499002, iteration: 8866
loss: 1.0545225143432617,grad_norm: 0.9999994857606119, iteration: 8867
loss: 1.0494261980056763,grad_norm: 0.9999995403694542, iteration: 8868
loss: 1.0685741901397705,grad_norm: 0.9999996044940662, iteration: 8869
loss: 1.0169668197631836,grad_norm: 0.9999993442317511, iteration: 8870
loss: 1.0177502632141113,grad_norm: 0.9999992857041112, iteration: 8871
loss: 1.0557329654693604,grad_norm: 0.9999992602870944, iteration: 8872
loss: 1.0859084129333496,grad_norm: 0.9999997193840251, iteration: 8873
loss: 1.0616557598114014,grad_norm: 0.9999992276645012, iteration: 8874
loss: 1.0543946027755737,grad_norm: 0.999999438703007, iteration: 8875
loss: 1.0789875984191895,grad_norm: 0.9999993665576306, iteration: 8876
loss: 1.0094701051712036,grad_norm: 0.9999992564766038, iteration: 8877
loss: 1.0490272045135498,grad_norm: 0.9999993420017507, iteration: 8878
loss: 1.0330890417099,grad_norm: 0.9999996049354456, iteration: 8879
loss: 1.043709635734558,grad_norm: 0.9999991215033222, iteration: 8880
loss: 1.0220638513565063,grad_norm: 0.9999994557115885, iteration: 8881
loss: 1.0216034650802612,grad_norm: 0.9999993550871614, iteration: 8882
loss: 1.057377576828003,grad_norm: 0.9999993770015648, iteration: 8883
loss: 0.9955223202705383,grad_norm: 0.9999992505360552, iteration: 8884
loss: 1.1110550165176392,grad_norm: 0.9999996062435165, iteration: 8885
loss: 1.0594121217727661,grad_norm: 0.9999992003653877, iteration: 8886
loss: 0.9963018894195557,grad_norm: 0.9999991359914842, iteration: 8887
loss: 1.0648521184921265,grad_norm: 0.9999993444857173, iteration: 8888
loss: 1.0815255641937256,grad_norm: 0.9999993562044786, iteration: 8889
loss: 1.057530403137207,grad_norm: 0.999999599881458, iteration: 8890
loss: 1.0014561414718628,grad_norm: 0.9999992733344862, iteration: 8891
loss: 1.0476516485214233,grad_norm: 0.9999992362921075, iteration: 8892
loss: 1.0253418684005737,grad_norm: 0.9999993808975564, iteration: 8893
loss: 1.0815850496292114,grad_norm: 0.9999997279237732, iteration: 8894
loss: 1.052287220954895,grad_norm: 0.9999995860574227, iteration: 8895
loss: 1.076926589012146,grad_norm: 0.9999993902923303, iteration: 8896
loss: 1.0958236455917358,grad_norm: 0.9999993714294947, iteration: 8897
loss: 1.0330685377120972,grad_norm: 0.9999993504367566, iteration: 8898
loss: 1.0403082370758057,grad_norm: 0.9999992949950347, iteration: 8899
loss: 1.0501563549041748,grad_norm: 0.9999994170541208, iteration: 8900
loss: 1.030153512954712,grad_norm: 0.9999995172003077, iteration: 8901
loss: 1.0559178590774536,grad_norm: 0.9999993586485535, iteration: 8902
loss: 1.05464506149292,grad_norm: 0.9999993854641414, iteration: 8903
loss: 1.0216501951217651,grad_norm: 0.9999992640346427, iteration: 8904
loss: 1.1213525533676147,grad_norm: 0.9999994092328022, iteration: 8905
loss: 1.0567809343338013,grad_norm: 0.9999995511923642, iteration: 8906
loss: 1.0528984069824219,grad_norm: 0.9999992914431551, iteration: 8907
loss: 1.0410315990447998,grad_norm: 0.999999438787602, iteration: 8908
loss: 1.0389108657836914,grad_norm: 0.9999992993225301, iteration: 8909
loss: 1.0751954317092896,grad_norm: 0.9999995331824835, iteration: 8910
loss: 0.9769100546836853,grad_norm: 0.999999288933893, iteration: 8911
loss: 0.9830271005630493,grad_norm: 0.9999993553817524, iteration: 8912
loss: 1.055281162261963,grad_norm: 0.9999994103672261, iteration: 8913
loss: 1.0351184606552124,grad_norm: 0.9999993808643137, iteration: 8914
loss: 1.1027249097824097,grad_norm: 0.9999998376857604, iteration: 8915
loss: 0.9991891980171204,grad_norm: 0.9999994629066409, iteration: 8916
loss: 1.0867490768432617,grad_norm: 0.9999994482431787, iteration: 8917
loss: 1.0602800846099854,grad_norm: 0.9999993631125806, iteration: 8918
loss: 1.053133487701416,grad_norm: 0.9999992698529433, iteration: 8919
loss: 0.9867759943008423,grad_norm: 0.9999993996888762, iteration: 8920
loss: 1.0054534673690796,grad_norm: 0.9999992764477724, iteration: 8921
loss: 1.083649754524231,grad_norm: 0.9999994168696603, iteration: 8922
loss: 1.038641333580017,grad_norm: 0.9999993816882284, iteration: 8923
loss: 1.0300977230072021,grad_norm: 0.9999993767238002, iteration: 8924
loss: 1.0684185028076172,grad_norm: 0.9999995742434361, iteration: 8925
loss: 1.0475066900253296,grad_norm: 0.9999992219226856, iteration: 8926
loss: 1.0632843971252441,grad_norm: 0.9999995504114984, iteration: 8927
loss: 1.0949467420578003,grad_norm: 0.999999441792809, iteration: 8928
loss: 1.1213129758834839,grad_norm: 0.9999994845614845, iteration: 8929
loss: 1.0430245399475098,grad_norm: 0.999999156280284, iteration: 8930
loss: 1.0357447862625122,grad_norm: 0.9999993709074756, iteration: 8931
loss: 1.0093774795532227,grad_norm: 0.9999992848131383, iteration: 8932
loss: 1.0723373889923096,grad_norm: 0.9999997562421502, iteration: 8933
loss: 1.0261138677597046,grad_norm: 0.9999993667108492, iteration: 8934
loss: 1.0239251852035522,grad_norm: 0.9999995025265926, iteration: 8935
loss: 1.0642119646072388,grad_norm: 0.9999995424033551, iteration: 8936
loss: 1.0343692302703857,grad_norm: 0.9999992682568103, iteration: 8937
loss: 1.0827608108520508,grad_norm: 0.999999540368573, iteration: 8938
loss: 1.0272670984268188,grad_norm: 0.9999995285831487, iteration: 8939
loss: 1.0594009160995483,grad_norm: 0.999999364232443, iteration: 8940
loss: 1.0201442241668701,grad_norm: 0.9999993814616155, iteration: 8941
loss: 0.9968863725662231,grad_norm: 0.9999992827194971, iteration: 8942
loss: 1.0356804132461548,grad_norm: 0.9999994384344537, iteration: 8943
loss: 1.0623111724853516,grad_norm: 0.9999992951490579, iteration: 8944
loss: 1.0650570392608643,grad_norm: 0.9999994389614528, iteration: 8945
loss: 1.0523322820663452,grad_norm: 0.9999993367093838, iteration: 8946
loss: 1.052937388420105,grad_norm: 0.999999287311665, iteration: 8947
loss: 1.0269496440887451,grad_norm: 0.999999288381191, iteration: 8948
loss: 1.0155736207962036,grad_norm: 0.9999992863828864, iteration: 8949
loss: 1.0173346996307373,grad_norm: 0.9999991976300348, iteration: 8950
loss: 0.9884313941001892,grad_norm: 0.9999992581854139, iteration: 8951
loss: 1.0895233154296875,grad_norm: 0.9999994449678586, iteration: 8952
loss: 1.026303768157959,grad_norm: 0.9999993115865572, iteration: 8953
loss: 1.1136630773544312,grad_norm: 0.9999993487011819, iteration: 8954
loss: 1.0293105840682983,grad_norm: 0.999999282089758, iteration: 8955
loss: 1.0371760129928589,grad_norm: 0.9999996086868359, iteration: 8956
loss: 1.0134592056274414,grad_norm: 0.9999993253765257, iteration: 8957
loss: 1.0319445133209229,grad_norm: 0.9999994219081044, iteration: 8958
loss: 1.035342812538147,grad_norm: 0.9999994183468615, iteration: 8959
loss: 0.9948937892913818,grad_norm: 0.9999991889505294, iteration: 8960
loss: 1.0005130767822266,grad_norm: 0.9999996812396497, iteration: 8961
loss: 1.054100751876831,grad_norm: 0.9999993720626019, iteration: 8962
loss: 1.102921485900879,grad_norm: 0.9999995668663368, iteration: 8963
loss: 1.028975009918213,grad_norm: 0.9999992035636553, iteration: 8964
loss: 1.0497772693634033,grad_norm: 0.9999991544178993, iteration: 8965
loss: 1.052984595298767,grad_norm: 0.9999994227093791, iteration: 8966
loss: 1.018794298171997,grad_norm: 0.9999991940299058, iteration: 8967
loss: 1.0655850172042847,grad_norm: 0.9999994119859208, iteration: 8968
loss: 1.061545968055725,grad_norm: 0.9999993333124128, iteration: 8969
loss: 1.0559710264205933,grad_norm: 0.999999303551901, iteration: 8970
loss: 0.9786393046379089,grad_norm: 0.9999993811301056, iteration: 8971
loss: 1.0748331546783447,grad_norm: 0.9999993832052476, iteration: 8972
loss: 1.0666859149932861,grad_norm: 0.9999993364957639, iteration: 8973
loss: 1.087070345878601,grad_norm: 0.9999996204722427, iteration: 8974
loss: 1.026168942451477,grad_norm: 0.9999995782742797, iteration: 8975
loss: 1.0596075057983398,grad_norm: 0.999999361494204, iteration: 8976
loss: 1.1212983131408691,grad_norm: 0.9999999123167606, iteration: 8977
loss: 1.001605749130249,grad_norm: 0.9999997808092118, iteration: 8978
loss: 1.0468473434448242,grad_norm: 0.999999278403589, iteration: 8979
loss: 1.0442465543746948,grad_norm: 0.9999993733320414, iteration: 8980
loss: 1.068712830543518,grad_norm: 0.9999991469361849, iteration: 8981
loss: 0.9944387674331665,grad_norm: 0.9999992455778745, iteration: 8982
loss: 0.9936177730560303,grad_norm: 0.9999993669808888, iteration: 8983
loss: 1.0139297246932983,grad_norm: 0.9999994060188987, iteration: 8984
loss: 1.0391067266464233,grad_norm: 0.9999993142403384, iteration: 8985
loss: 1.04867422580719,grad_norm: 0.999999386605708, iteration: 8986
loss: 1.0699647665023804,grad_norm: 0.9999995098070862, iteration: 8987
loss: 1.0292510986328125,grad_norm: 0.9999992420193817, iteration: 8988
loss: 1.0476298332214355,grad_norm: 0.9999993190093993, iteration: 8989
loss: 1.0676093101501465,grad_norm: 0.9999992256653786, iteration: 8990
loss: 1.0885505676269531,grad_norm: 0.9999994830595595, iteration: 8991
loss: 1.0390263795852661,grad_norm: 0.9999993732100712, iteration: 8992
loss: 1.0582863092422485,grad_norm: 0.9999994863429311, iteration: 8993
loss: 1.076673984527588,grad_norm: 0.9999996090660367, iteration: 8994
loss: 0.9608110189437866,grad_norm: 0.9999995029682587, iteration: 8995
loss: 1.0094621181488037,grad_norm: 0.999999167715325, iteration: 8996
loss: 1.0519388914108276,grad_norm: 0.9999992575851672, iteration: 8997
loss: 1.0818983316421509,grad_norm: 0.9999993943294587, iteration: 8998
loss: 1.0167512893676758,grad_norm: 0.9999996978286001, iteration: 8999
loss: 1.04026460647583,grad_norm: 0.9999994628598192, iteration: 9000
loss: 1.0969936847686768,grad_norm: 0.9999994949812328, iteration: 9001
loss: 1.108893871307373,grad_norm: 0.9999996947774877, iteration: 9002
loss: 1.0230966806411743,grad_norm: 0.9999993722999141, iteration: 9003
loss: 1.0654897689819336,grad_norm: 0.9999993889317196, iteration: 9004
loss: 1.0000274181365967,grad_norm: 0.999999263220246, iteration: 9005
loss: 1.1017347574234009,grad_norm: 0.9999995395783914, iteration: 9006
loss: 1.0189573764801025,grad_norm: 0.9999993295046994, iteration: 9007
loss: 1.0601121187210083,grad_norm: 0.9999994994608592, iteration: 9008
loss: 1.0935579538345337,grad_norm: 0.9999993639701913, iteration: 9009
loss: 1.1068817377090454,grad_norm: 0.9999995380171336, iteration: 9010
loss: 1.0632661581039429,grad_norm: 0.9999991708390171, iteration: 9011
loss: 1.0802664756774902,grad_norm: 0.9999994832540786, iteration: 9012
loss: 1.0315353870391846,grad_norm: 0.9999992571010199, iteration: 9013
loss: 1.092855453491211,grad_norm: 0.9999993975712276, iteration: 9014
loss: 1.0342376232147217,grad_norm: 0.9999994071494456, iteration: 9015
loss: 1.0719281435012817,grad_norm: 0.9999995786575233, iteration: 9016
loss: 1.0744937658309937,grad_norm: 0.9999994217117152, iteration: 9017
loss: 0.9725911021232605,grad_norm: 0.999999275229993, iteration: 9018
loss: 1.06910240650177,grad_norm: 0.9999992658876332, iteration: 9019
loss: 1.0505626201629639,grad_norm: 0.9999993924491886, iteration: 9020
loss: 1.0756595134735107,grad_norm: 0.9999994736942344, iteration: 9021
loss: 1.0765910148620605,grad_norm: 0.9999994108870138, iteration: 9022
loss: 1.0084642171859741,grad_norm: 0.999999356768701, iteration: 9023
loss: 1.0651143789291382,grad_norm: 0.9999993881436022, iteration: 9024
loss: 1.063528060913086,grad_norm: 0.9999995938382582, iteration: 9025
loss: 1.1372125148773193,grad_norm: 0.9999997119400303, iteration: 9026
loss: 1.1072582006454468,grad_norm: 0.9999995865060214, iteration: 9027
loss: 1.0098174810409546,grad_norm: 0.9999992745911547, iteration: 9028
loss: 1.0573056936264038,grad_norm: 0.9999994300605239, iteration: 9029
loss: 1.068243384361267,grad_norm: 0.9999993698424425, iteration: 9030
loss: 0.9859773516654968,grad_norm: 0.9999992363467406, iteration: 9031
loss: 1.055928349494934,grad_norm: 0.9999992493962763, iteration: 9032
loss: 1.0874139070510864,grad_norm: 0.9999993745811921, iteration: 9033
loss: 1.0381510257720947,grad_norm: 0.9999992528529297, iteration: 9034
loss: 0.986781895160675,grad_norm: 0.9999995339468079, iteration: 9035
loss: 1.029674768447876,grad_norm: 0.9999994021023989, iteration: 9036
loss: 1.0402920246124268,grad_norm: 0.9999992506721348, iteration: 9037
loss: 1.0999280214309692,grad_norm: 0.9999993546798888, iteration: 9038
loss: 1.051819920539856,grad_norm: 0.99999927125519, iteration: 9039
loss: 0.960463285446167,grad_norm: 0.9999996076434401, iteration: 9040
loss: 1.006866455078125,grad_norm: 0.9999993440620234, iteration: 9041
loss: 1.024910569190979,grad_norm: 0.9999992290958619, iteration: 9042
loss: 1.034825325012207,grad_norm: 0.9999994957818109, iteration: 9043
loss: 1.057322382926941,grad_norm: 0.9999993798897079, iteration: 9044
loss: 1.0151536464691162,grad_norm: 0.9999996011750714, iteration: 9045
loss: 1.036286473274231,grad_norm: 0.9999991627638579, iteration: 9046
loss: 1.0709680318832397,grad_norm: 0.9999994048535922, iteration: 9047
loss: 1.095333218574524,grad_norm: 0.9999995668591213, iteration: 9048
loss: 1.0921566486358643,grad_norm: 0.9999993383906715, iteration: 9049
loss: 1.0553395748138428,grad_norm: 0.9999993364143811, iteration: 9050
loss: 1.042386770248413,grad_norm: 0.999999655600707, iteration: 9051
loss: 1.0818085670471191,grad_norm: 0.9999993893202437, iteration: 9052
loss: 1.075639009475708,grad_norm: 0.9999996612452192, iteration: 9053
loss: 1.0700165033340454,grad_norm: 0.9999993191643848, iteration: 9054
loss: 1.0823851823806763,grad_norm: 0.9999994838303353, iteration: 9055
loss: 1.0510579347610474,grad_norm: 0.99999975389249, iteration: 9056
loss: 1.0085499286651611,grad_norm: 0.9999997404892307, iteration: 9057
loss: 1.0647637844085693,grad_norm: 0.9999994495852823, iteration: 9058
loss: 1.1055558919906616,grad_norm: 0.9999994242288855, iteration: 9059
loss: 1.0622797012329102,grad_norm: 0.9999997305741694, iteration: 9060
loss: 1.0600764751434326,grad_norm: 0.9999992857772827, iteration: 9061
loss: 1.099563717842102,grad_norm: 0.9999993726519225, iteration: 9062
loss: 1.0430701971054077,grad_norm: 0.9999993829432502, iteration: 9063
loss: 1.068599820137024,grad_norm: 0.9999993476423767, iteration: 9064
loss: 1.0453718900680542,grad_norm: 0.9999994970557189, iteration: 9065
loss: 1.0577712059020996,grad_norm: 0.9999993800898823, iteration: 9066
loss: 1.0093070268630981,grad_norm: 0.9999992794634329, iteration: 9067
loss: 1.0373189449310303,grad_norm: 0.9999993209110223, iteration: 9068
loss: 1.0701953172683716,grad_norm: 0.9999996865808816, iteration: 9069
loss: 1.012563705444336,grad_norm: 0.99999940711354, iteration: 9070
loss: 1.106533169746399,grad_norm: 0.9999995997602855, iteration: 9071
loss: 1.0391157865524292,grad_norm: 0.9999993972992368, iteration: 9072
loss: 1.0458087921142578,grad_norm: 0.999999431849435, iteration: 9073
loss: 1.0343115329742432,grad_norm: 0.9999991733598722, iteration: 9074
loss: 1.0957223176956177,grad_norm: 0.9999994210745738, iteration: 9075
loss: 1.0760935544967651,grad_norm: 0.999999710486704, iteration: 9076
loss: 1.0579688549041748,grad_norm: 0.9999992175490524, iteration: 9077
loss: 1.1223711967468262,grad_norm: 0.9999997297274421, iteration: 9078
loss: 1.0106217861175537,grad_norm: 0.9999992927087814, iteration: 9079
loss: 1.0162742137908936,grad_norm: 0.9999995784486029, iteration: 9080
loss: 1.0154680013656616,grad_norm: 0.9999993598444026, iteration: 9081
loss: 1.0701349973678589,grad_norm: 0.9999992995566754, iteration: 9082
loss: 1.066224455833435,grad_norm: 0.9999994936575519, iteration: 9083
loss: 1.0130349397659302,grad_norm: 0.9999993414485693, iteration: 9084
loss: 0.9698578119277954,grad_norm: 0.9999993191658413, iteration: 9085
loss: 1.008301019668579,grad_norm: 0.9999995699726204, iteration: 9086
loss: 1.0694838762283325,grad_norm: 0.9999994125685129, iteration: 9087
loss: 1.0815495252609253,grad_norm: 0.9999996206603516, iteration: 9088
loss: 1.0501811504364014,grad_norm: 0.9999994194125214, iteration: 9089
loss: 1.0470982789993286,grad_norm: 0.9999992568674952, iteration: 9090
loss: 1.042920708656311,grad_norm: 0.9999996088576764, iteration: 9091
loss: 0.9741997718811035,grad_norm: 0.9999994443920597, iteration: 9092
loss: 0.9481968879699707,grad_norm: 0.9999994042079835, iteration: 9093
loss: 1.0809602737426758,grad_norm: 0.999999505746691, iteration: 9094
loss: 1.0722064971923828,grad_norm: 0.9999994218974223, iteration: 9095
loss: 1.0630232095718384,grad_norm: 0.999999319879398, iteration: 9096
loss: 1.067496418952942,grad_norm: 0.9999993891756725, iteration: 9097
loss: 1.07574462890625,grad_norm: 0.9999996205545154, iteration: 9098
loss: 1.0178841352462769,grad_norm: 0.9999995790920894, iteration: 9099
loss: 1.0771002769470215,grad_norm: 0.9999993517480059, iteration: 9100
loss: 1.0300889015197754,grad_norm: 0.9999994857451817, iteration: 9101
loss: 1.0459157228469849,grad_norm: 0.9999993732302318, iteration: 9102
loss: 1.0564943552017212,grad_norm: 0.999999233916432, iteration: 9103
loss: 1.0498889684677124,grad_norm: 0.9999994892996977, iteration: 9104
loss: 1.032976508140564,grad_norm: 0.9999992014555484, iteration: 9105
loss: 1.067921757698059,grad_norm: 0.9999992943649646, iteration: 9106
loss: 1.0790793895721436,grad_norm: 0.999999274471257, iteration: 9107
loss: 1.0410841703414917,grad_norm: 0.9999995656013531, iteration: 9108
loss: 1.0647163391113281,grad_norm: 0.9999994417718598, iteration: 9109
loss: 0.9867990612983704,grad_norm: 0.9999993576415611, iteration: 9110
loss: 1.0287137031555176,grad_norm: 0.999999477087543, iteration: 9111
loss: 1.0294638872146606,grad_norm: 0.9999993364176809, iteration: 9112
loss: 1.0323970317840576,grad_norm: 0.999999162131326, iteration: 9113
loss: 1.0501105785369873,grad_norm: 0.9999992263937811, iteration: 9114
loss: 1.0821869373321533,grad_norm: 0.9999995056540563, iteration: 9115
loss: 0.9674335718154907,grad_norm: 0.9999992252245661, iteration: 9116
loss: 1.0735347270965576,grad_norm: 0.9999995040455198, iteration: 9117
loss: 1.0888932943344116,grad_norm: 0.9999995805156779, iteration: 9118
loss: 1.0810686349868774,grad_norm: 0.9999993413590482, iteration: 9119
loss: 0.9877193570137024,grad_norm: 0.9999993367400983, iteration: 9120
loss: 1.0601880550384521,grad_norm: 0.9999992181848298, iteration: 9121
loss: 1.027641773223877,grad_norm: 0.999999282138563, iteration: 9122
loss: 0.9978417754173279,grad_norm: 0.9999994630106975, iteration: 9123
loss: 1.0111504793167114,grad_norm: 0.9999993780014614, iteration: 9124
loss: 1.0172351598739624,grad_norm: 0.9999992718048293, iteration: 9125
loss: 1.0180178880691528,grad_norm: 0.999999575971597, iteration: 9126
loss: 1.0772007703781128,grad_norm: 0.9999993624718551, iteration: 9127
loss: 1.0353623628616333,grad_norm: 0.9999993696932983, iteration: 9128
loss: 1.0508626699447632,grad_norm: 0.999999392163069, iteration: 9129
loss: 1.1435682773590088,grad_norm: 0.9999998947646213, iteration: 9130
loss: 1.0903139114379883,grad_norm: 0.9999995632530265, iteration: 9131
loss: 0.9741530418395996,grad_norm: 0.9999993211724775, iteration: 9132
loss: 1.0546709299087524,grad_norm: 0.9999996348188216, iteration: 9133
loss: 1.0120760202407837,grad_norm: 0.9999992744170861, iteration: 9134
loss: 1.0775038003921509,grad_norm: 0.9999995395474578, iteration: 9135
loss: 1.1257688999176025,grad_norm: 0.9999996739717505, iteration: 9136
loss: 1.081978678703308,grad_norm: 0.9999998125865797, iteration: 9137
loss: 1.0934091806411743,grad_norm: 0.9999996960818454, iteration: 9138
loss: 1.0475947856903076,grad_norm: 0.9999996343973963, iteration: 9139
loss: 1.054221510887146,grad_norm: 0.9999994042355748, iteration: 9140
loss: 1.0279858112335205,grad_norm: 0.9999995335095136, iteration: 9141
loss: 0.9714038372039795,grad_norm: 0.9999993911849333, iteration: 9142
loss: 1.0058757066726685,grad_norm: 0.9999992269023775, iteration: 9143
loss: 1.0758148431777954,grad_norm: 0.9999994141551486, iteration: 9144
loss: 0.9880030751228333,grad_norm: 0.9999992544814359, iteration: 9145
loss: 1.106460690498352,grad_norm: 0.9999996860939204, iteration: 9146
loss: 1.1433039903640747,grad_norm: 0.9999996537995168, iteration: 9147
loss: 1.0117473602294922,grad_norm: 0.9999992454439312, iteration: 9148
loss: 1.0609774589538574,grad_norm: 0.9999992362489915, iteration: 9149
loss: 1.0593925714492798,grad_norm: 0.9999991827985527, iteration: 9150
loss: 1.0091015100479126,grad_norm: 0.9999995843253767, iteration: 9151
loss: 1.0283476114273071,grad_norm: 0.999999670016173, iteration: 9152
loss: 1.0822526216506958,grad_norm: 0.9999994636038134, iteration: 9153
loss: 1.058612585067749,grad_norm: 0.9999996177647124, iteration: 9154
loss: 1.0962718725204468,grad_norm: 0.9999997493682039, iteration: 9155
loss: 1.0398691892623901,grad_norm: 0.9999995297859849, iteration: 9156
loss: 1.0218262672424316,grad_norm: 0.9999994941687895, iteration: 9157
loss: 1.00116765499115,grad_norm: 0.9999993955175663, iteration: 9158
loss: 1.0072635412216187,grad_norm: 0.9999994107175916, iteration: 9159
loss: 1.0190494060516357,grad_norm: 0.9999993505305584, iteration: 9160
loss: 1.0085880756378174,grad_norm: 0.9999994308845315, iteration: 9161
loss: 1.0633504390716553,grad_norm: 0.9999993245125461, iteration: 9162
loss: 1.0758671760559082,grad_norm: 0.9999994030699909, iteration: 9163
loss: 1.0222009420394897,grad_norm: 0.9999994812008557, iteration: 9164
loss: 1.0309345722198486,grad_norm: 0.9999996010613131, iteration: 9165
loss: 1.028577208518982,grad_norm: 0.9999992972037168, iteration: 9166
loss: 1.052556037902832,grad_norm: 0.9999994443853312, iteration: 9167
loss: 1.0309048891067505,grad_norm: 0.9999992249081134, iteration: 9168
loss: 1.0367153882980347,grad_norm: 0.9999994128001877, iteration: 9169
loss: 1.021504521369934,grad_norm: 0.9999994670002745, iteration: 9170
loss: 1.0182989835739136,grad_norm: 0.9999996372339687, iteration: 9171
loss: 1.1140576601028442,grad_norm: 0.9999997516406277, iteration: 9172
loss: 1.0614417791366577,grad_norm: 0.9999993238499096, iteration: 9173
loss: 1.039425015449524,grad_norm: 0.9999994982220001, iteration: 9174
loss: 1.1096835136413574,grad_norm: 0.9999994670690799, iteration: 9175
loss: 1.1288408041000366,grad_norm: 0.9999997196809913, iteration: 9176
loss: 1.0747705698013306,grad_norm: 0.9999993511528152, iteration: 9177
loss: 1.0117580890655518,grad_norm: 0.9999993000420447, iteration: 9178
loss: 1.0239781141281128,grad_norm: 0.9999993064751869, iteration: 9179
loss: 1.0351051092147827,grad_norm: 0.9999992544597475, iteration: 9180
loss: 1.0064517259597778,grad_norm: 0.9999992570942804, iteration: 9181
loss: 1.0349475145339966,grad_norm: 0.9999996559354495, iteration: 9182
loss: 1.0793625116348267,grad_norm: 0.9999995490574438, iteration: 9183
loss: 1.0044554471969604,grad_norm: 0.9999994364557573, iteration: 9184
loss: 1.0327059030532837,grad_norm: 0.9999994678732874, iteration: 9185
loss: 1.0695208311080933,grad_norm: 0.9999995289390082, iteration: 9186
loss: 0.9993487000465393,grad_norm: 0.9999993387981064, iteration: 9187
loss: 1.0158002376556396,grad_norm: 0.999999338335632, iteration: 9188
loss: 1.0316393375396729,grad_norm: 0.9999994766483703, iteration: 9189
loss: 1.0610673427581787,grad_norm: 0.9999993723290013, iteration: 9190
loss: 1.0771431922912598,grad_norm: 0.9999993774859051, iteration: 9191
loss: 1.063929557800293,grad_norm: 0.9999996017690262, iteration: 9192
loss: 1.0770100355148315,grad_norm: 0.9999994406077453, iteration: 9193
loss: 1.0347421169281006,grad_norm: 0.9999996009853396, iteration: 9194
loss: 1.0892747640609741,grad_norm: 0.999999399155549, iteration: 9195
loss: 1.0338672399520874,grad_norm: 0.9999992105909964, iteration: 9196
loss: 1.0545951128005981,grad_norm: 0.9999994073374778, iteration: 9197
loss: 1.0717010498046875,grad_norm: 0.999999143952656, iteration: 9198
loss: 0.9783190488815308,grad_norm: 0.9999993166455651, iteration: 9199
loss: 1.0511482954025269,grad_norm: 0.9999994242641754, iteration: 9200
loss: 1.088634967803955,grad_norm: 0.9999995341755368, iteration: 9201
loss: 1.05405592918396,grad_norm: 0.9999993303360941, iteration: 9202
loss: 1.0366814136505127,grad_norm: 0.9999993232972797, iteration: 9203
loss: 1.137877106666565,grad_norm: 0.999999516926716, iteration: 9204
loss: 1.042008399963379,grad_norm: 0.9999996480523371, iteration: 9205
loss: 1.0349247455596924,grad_norm: 0.9999994282419868, iteration: 9206
loss: 1.086930274963379,grad_norm: 0.9999993099224381, iteration: 9207
loss: 1.0440874099731445,grad_norm: 0.9999993883352967, iteration: 9208
loss: 1.0990194082260132,grad_norm: 0.9999995286014136, iteration: 9209
loss: 1.0879566669464111,grad_norm: 0.9999995529336286, iteration: 9210
loss: 1.13523530960083,grad_norm: 0.9999996293069655, iteration: 9211
loss: 1.050407886505127,grad_norm: 0.9999998250062719, iteration: 9212
loss: 1.0546363592147827,grad_norm: 0.9999993573813318, iteration: 9213
loss: 1.0783076286315918,grad_norm: 0.9999991986511191, iteration: 9214
loss: 1.0355827808380127,grad_norm: 0.9999994040585974, iteration: 9215
loss: 1.03786301612854,grad_norm: 0.9999993755042972, iteration: 9216
loss: 1.0365070104599,grad_norm: 0.9999993360680696, iteration: 9217
loss: 1.0890326499938965,grad_norm: 0.9999994689957291, iteration: 9218
loss: 1.0304920673370361,grad_norm: 0.999999359904468, iteration: 9219
loss: 1.0344696044921875,grad_norm: 0.9999993554344883, iteration: 9220
loss: 1.0519887208938599,grad_norm: 0.9999995754909214, iteration: 9221
loss: 1.0538389682769775,grad_norm: 0.9999991420181163, iteration: 9222
loss: 1.0512980222702026,grad_norm: 0.9999994749404022, iteration: 9223
loss: 1.0676909685134888,grad_norm: 0.9999994563050169, iteration: 9224
loss: 1.1089433431625366,grad_norm: 0.9999994707695531, iteration: 9225
loss: 1.0456024408340454,grad_norm: 0.9999993854546966, iteration: 9226
loss: 1.0510057210922241,grad_norm: 0.9999996847942114, iteration: 9227
loss: 1.0163267850875854,grad_norm: 0.9999994584181197, iteration: 9228
loss: 1.061707615852356,grad_norm: 0.9999993694246874, iteration: 9229
loss: 1.085719108581543,grad_norm: 0.9999995050002699, iteration: 9230
loss: 1.0465530157089233,grad_norm: 0.9999992959737635, iteration: 9231
loss: 1.0087890625,grad_norm: 0.9999993461280094, iteration: 9232
loss: 1.0263398885726929,grad_norm: 0.9999993118542193, iteration: 9233
loss: 1.0720343589782715,grad_norm: 0.9999995797074351, iteration: 9234
loss: 1.0445187091827393,grad_norm: 0.9999994633416469, iteration: 9235
loss: 1.0930877923965454,grad_norm: 0.9999995678124816, iteration: 9236
loss: 1.045323371887207,grad_norm: 0.999999200208894, iteration: 9237
loss: 1.0658539533615112,grad_norm: 0.9999993160015632, iteration: 9238
loss: 1.112336277961731,grad_norm: 0.9999995922441762, iteration: 9239
loss: 1.0514893531799316,grad_norm: 0.9999993011300612, iteration: 9240
loss: 1.0545437335968018,grad_norm: 0.9999993981273646, iteration: 9241
loss: 1.0520741939544678,grad_norm: 0.9999994361031544, iteration: 9242
loss: 1.0565698146820068,grad_norm: 0.9999992511465423, iteration: 9243
loss: 1.0388784408569336,grad_norm: 0.9999995150928274, iteration: 9244
loss: 0.9978395104408264,grad_norm: 0.9999993760816233, iteration: 9245
loss: 1.0466303825378418,grad_norm: 0.9999993119337843, iteration: 9246
loss: 1.065467119216919,grad_norm: 0.9999996631271478, iteration: 9247
loss: 1.0837500095367432,grad_norm: 0.9999994248186008, iteration: 9248
loss: 1.0552270412445068,grad_norm: 0.9999993961481033, iteration: 9249
loss: 1.0243335962295532,grad_norm: 0.9999992317420522, iteration: 9250
loss: 1.033551812171936,grad_norm: 0.9999993444933927, iteration: 9251
loss: 0.997953474521637,grad_norm: 0.9999993171990996, iteration: 9252
loss: 1.0082720518112183,grad_norm: 0.9999993220292676, iteration: 9253
loss: 1.0016108751296997,grad_norm: 0.9999994398137944, iteration: 9254
loss: 1.0743578672409058,grad_norm: 0.999999510670766, iteration: 9255
loss: 1.0338504314422607,grad_norm: 0.9999993924787896, iteration: 9256
loss: 1.051292896270752,grad_norm: 0.9999993802800168, iteration: 9257
loss: 1.0282883644104004,grad_norm: 0.9999995340368563, iteration: 9258
loss: 1.030274510383606,grad_norm: 0.9999991523418483, iteration: 9259
loss: 1.035123348236084,grad_norm: 0.9999991903036394, iteration: 9260
loss: 1.0788179636001587,grad_norm: 0.9999995815922231, iteration: 9261
loss: 1.0425857305526733,grad_norm: 0.9999993188688214, iteration: 9262
loss: 1.0191693305969238,grad_norm: 0.9999992477798114, iteration: 9263
loss: 1.0604627132415771,grad_norm: 0.9999995076664203, iteration: 9264
loss: 1.0165942907333374,grad_norm: 0.9999992903838184, iteration: 9265
loss: 1.0234816074371338,grad_norm: 0.9999994373750882, iteration: 9266
loss: 1.06741464138031,grad_norm: 0.9999994566194179, iteration: 9267
loss: 1.0555623769760132,grad_norm: 0.9999993880021532, iteration: 9268
loss: 1.046403408050537,grad_norm: 0.9999996189541686, iteration: 9269
loss: 1.039346694946289,grad_norm: 0.9999993381038458, iteration: 9270
loss: 1.063880205154419,grad_norm: 0.9999995616389, iteration: 9271
loss: 1.0304242372512817,grad_norm: 0.9999992544240862, iteration: 9272
loss: 1.1231509447097778,grad_norm: 0.9999994641816693, iteration: 9273
loss: 1.0163815021514893,grad_norm: 0.9999994044670192, iteration: 9274
loss: 1.0654568672180176,grad_norm: 0.999999299116392, iteration: 9275
loss: 1.0461190938949585,grad_norm: 0.9999991353163501, iteration: 9276
loss: 1.128815770149231,grad_norm: 0.9999997702699068, iteration: 9277
loss: 1.0471370220184326,grad_norm: 0.9999993513733834, iteration: 9278
loss: 1.0602004528045654,grad_norm: 0.9999994149891074, iteration: 9279
loss: 1.0209966897964478,grad_norm: 0.9999995858254429, iteration: 9280
loss: 1.039157509803772,grad_norm: 0.9999993435643805, iteration: 9281
loss: 1.0172137022018433,grad_norm: 0.9999996027662469, iteration: 9282
loss: 1.041083574295044,grad_norm: 0.9999995135098273, iteration: 9283
loss: 1.0525639057159424,grad_norm: 0.9999992938048441, iteration: 9284
loss: 1.0219298601150513,grad_norm: 0.9999993373267436, iteration: 9285
loss: 1.0277979373931885,grad_norm: 0.9999992874016953, iteration: 9286
loss: 1.0317414999008179,grad_norm: 0.9999996225929149, iteration: 9287
loss: 0.9776846766471863,grad_norm: 0.9999992610893371, iteration: 9288
loss: 1.0704783201217651,grad_norm: 0.9999994607050793, iteration: 9289
loss: 1.0399271249771118,grad_norm: 0.9999994092102048, iteration: 9290
loss: 1.0561769008636475,grad_norm: 0.9999994687801365, iteration: 9291
loss: 1.0148776769638062,grad_norm: 0.9999994160583235, iteration: 9292
loss: 1.0290604829788208,grad_norm: 0.9999992193219104, iteration: 9293
loss: 1.0611746311187744,grad_norm: 0.9999998323866517, iteration: 9294
loss: 1.0198909044265747,grad_norm: 0.99999923539728, iteration: 9295
loss: 1.0374456644058228,grad_norm: 0.999999497475584, iteration: 9296
loss: 1.0324805974960327,grad_norm: 0.9999995372158739, iteration: 9297
loss: 1.0512315034866333,grad_norm: 0.9999993709523521, iteration: 9298
loss: 1.0803862810134888,grad_norm: 0.9999994833564793, iteration: 9299
loss: 1.044974684715271,grad_norm: 0.999999553224123, iteration: 9300
loss: 1.026344895362854,grad_norm: 0.9999993598300019, iteration: 9301
loss: 1.0594841241836548,grad_norm: 0.9999992726021376, iteration: 9302
loss: 0.9994393587112427,grad_norm: 0.9999992762001639, iteration: 9303
loss: 1.007223129272461,grad_norm: 0.9999996921716616, iteration: 9304
loss: 1.0509467124938965,grad_norm: 0.9999993566566496, iteration: 9305
loss: 1.073071002960205,grad_norm: 0.9999993942796743, iteration: 9306
loss: 1.0530633926391602,grad_norm: 0.9999996707809419, iteration: 9307
loss: 1.1707779169082642,grad_norm: 0.9999995750251227, iteration: 9308
loss: 1.1136682033538818,grad_norm: 0.999999582890056, iteration: 9309
loss: 1.062943696975708,grad_norm: 0.9999995162738397, iteration: 9310
loss: 1.0384316444396973,grad_norm: 0.9999991745330336, iteration: 9311
loss: 1.0390092134475708,grad_norm: 0.999999291721969, iteration: 9312
loss: 1.0575997829437256,grad_norm: 0.999999575060948, iteration: 9313
loss: 0.9906015396118164,grad_norm: 0.9999992980906767, iteration: 9314
loss: 1.043993353843689,grad_norm: 0.9999994314220109, iteration: 9315
loss: 1.0150518417358398,grad_norm: 0.9999993687976023, iteration: 9316
loss: 1.0068868398666382,grad_norm: 0.9999992150345347, iteration: 9317
loss: 1.0641474723815918,grad_norm: 0.999999609428321, iteration: 9318
loss: 1.0370267629623413,grad_norm: 0.999999290036724, iteration: 9319
loss: 1.0194902420043945,grad_norm: 0.9999993251693119, iteration: 9320
loss: 1.081714153289795,grad_norm: 0.9999992029174337, iteration: 9321
loss: 1.003062129020691,grad_norm: 0.9999994100394975, iteration: 9322
loss: 1.0331106185913086,grad_norm: 0.9999993331626665, iteration: 9323
loss: 1.0104098320007324,grad_norm: 0.9999995334454731, iteration: 9324
loss: 1.0918586254119873,grad_norm: 0.9999996041904741, iteration: 9325
loss: 1.1039056777954102,grad_norm: 0.9999994978716484, iteration: 9326
loss: 1.0637009143829346,grad_norm: 0.9999994830265603, iteration: 9327
loss: 1.0779361724853516,grad_norm: 0.999999322968171, iteration: 9328
loss: 1.0855839252471924,grad_norm: 0.9999996159690502, iteration: 9329
loss: 1.0304678678512573,grad_norm: 0.9999993518775545, iteration: 9330
loss: 1.0983699560165405,grad_norm: 0.9999994191086338, iteration: 9331
loss: 1.0652923583984375,grad_norm: 0.9999994168846348, iteration: 9332
loss: 1.0494792461395264,grad_norm: 0.9999997075750953, iteration: 9333
loss: 1.0601332187652588,grad_norm: 0.9999995523997746, iteration: 9334
loss: 1.0295814275741577,grad_norm: 0.9999992336652461, iteration: 9335
loss: 1.0376648902893066,grad_norm: 0.9999995499876758, iteration: 9336
loss: 0.9974122047424316,grad_norm: 0.9999992786074096, iteration: 9337
loss: 1.065094232559204,grad_norm: 0.9999995169509964, iteration: 9338
loss: 1.0892996788024902,grad_norm: 0.9999993725967263, iteration: 9339
loss: 1.051210880279541,grad_norm: 0.9999992742282728, iteration: 9340
loss: 1.0426479578018188,grad_norm: 0.9999993104834379, iteration: 9341
loss: 1.0388716459274292,grad_norm: 0.9999991942303499, iteration: 9342
loss: 1.0552444458007812,grad_norm: 0.9999995689791149, iteration: 9343
loss: 0.9822522401809692,grad_norm: 0.9999992278541422, iteration: 9344
loss: 1.0480005741119385,grad_norm: 0.9999993082012312, iteration: 9345
loss: 1.0150234699249268,grad_norm: 0.9999992755751634, iteration: 9346
loss: 1.0377089977264404,grad_norm: 0.9999995610039665, iteration: 9347
loss: 1.105168104171753,grad_norm: 0.9999996207636366, iteration: 9348
loss: 1.0943491458892822,grad_norm: 0.9999995286739386, iteration: 9349
loss: 1.0308252573013306,grad_norm: 0.9999995760105038, iteration: 9350
loss: 1.0095536708831787,grad_norm: 0.9999993259134541, iteration: 9351
loss: 1.0807336568832397,grad_norm: 0.9999994823326611, iteration: 9352
loss: 1.0666775703430176,grad_norm: 0.9999992716037798, iteration: 9353
loss: 1.0572800636291504,grad_norm: 0.9999995308879331, iteration: 9354
loss: 1.0327420234680176,grad_norm: 0.9999993456573694, iteration: 9355
loss: 1.0072615146636963,grad_norm: 0.9999993246182798, iteration: 9356
loss: 1.060159683227539,grad_norm: 0.9999993459307758, iteration: 9357
loss: 1.0528030395507812,grad_norm: 0.9999993409513346, iteration: 9358
loss: 0.9992775917053223,grad_norm: 0.9999994496325064, iteration: 9359
loss: 1.0728228092193604,grad_norm: 0.9999991706883945, iteration: 9360
loss: 1.0346457958221436,grad_norm: 0.9999993902536419, iteration: 9361
loss: 1.0737556219100952,grad_norm: 0.9999997518240566, iteration: 9362
loss: 1.0506685972213745,grad_norm: 0.9999994017405719, iteration: 9363
loss: 1.0609983205795288,grad_norm: 0.9999994624183972, iteration: 9364
loss: 1.0355192422866821,grad_norm: 0.9999992194952989, iteration: 9365
loss: 1.0556526184082031,grad_norm: 0.9999994719423927, iteration: 9366
loss: 1.0128004550933838,grad_norm: 0.9999994598119774, iteration: 9367
loss: 1.030324101448059,grad_norm: 0.9999996046301975, iteration: 9368
loss: 1.0881866216659546,grad_norm: 0.999999616109881, iteration: 9369
loss: 1.0361087322235107,grad_norm: 0.999999333200195, iteration: 9370
loss: 1.0532276630401611,grad_norm: 0.9999995404226381, iteration: 9371
loss: 1.0170722007751465,grad_norm: 0.9999996958612034, iteration: 9372
loss: 1.0329219102859497,grad_norm: 0.9999993476826459, iteration: 9373
loss: 1.0202254056930542,grad_norm: 0.9999993047921235, iteration: 9374
loss: 1.0025694370269775,grad_norm: 0.9999993516323333, iteration: 9375
loss: 1.035751461982727,grad_norm: 0.9999992830352734, iteration: 9376
loss: 1.071189045906067,grad_norm: 0.9999996613465009, iteration: 9377
loss: 1.0854804515838623,grad_norm: 0.9999995535736833, iteration: 9378
loss: 1.0525068044662476,grad_norm: 0.9999994030329873, iteration: 9379
loss: 1.0645076036453247,grad_norm: 0.9999994989488027, iteration: 9380
loss: 1.0312004089355469,grad_norm: 0.999999331264197, iteration: 9381
loss: 1.0343953371047974,grad_norm: 0.9999992245586076, iteration: 9382
loss: 0.993751049041748,grad_norm: 0.9999993160815205, iteration: 9383
loss: 1.0531063079833984,grad_norm: 0.9999992965683691, iteration: 9384
loss: 1.037452220916748,grad_norm: 0.9999993712213209, iteration: 9385
loss: 1.0380964279174805,grad_norm: 0.9999995434833227, iteration: 9386
loss: 1.0854488611221313,grad_norm: 0.9999993547080853, iteration: 9387
loss: 1.0519585609436035,grad_norm: 0.9999994060916491, iteration: 9388
loss: 1.0699527263641357,grad_norm: 0.9999994946663304, iteration: 9389
loss: 1.076237440109253,grad_norm: 0.9999994208558065, iteration: 9390
loss: 0.9927013516426086,grad_norm: 0.9999994471512906, iteration: 9391
loss: 1.1057738065719604,grad_norm: 0.9999993605645424, iteration: 9392
loss: 1.0568821430206299,grad_norm: 0.9999991930195856, iteration: 9393
loss: 1.0002167224884033,grad_norm: 0.9999993399965194, iteration: 9394
loss: 1.0259301662445068,grad_norm: 0.9999994657849022, iteration: 9395
loss: 1.0947070121765137,grad_norm: 0.9999993433006182, iteration: 9396
loss: 1.0811457633972168,grad_norm: 0.9999995794879009, iteration: 9397
loss: 1.0380587577819824,grad_norm: 0.9999992253191476, iteration: 9398
loss: 1.0553781986236572,grad_norm: 0.9999995705914454, iteration: 9399
loss: 1.00118887424469,grad_norm: 0.9999993629087205, iteration: 9400
loss: 1.0575746297836304,grad_norm: 0.99999931954065, iteration: 9401
loss: 1.022154450416565,grad_norm: 0.9999994743220553, iteration: 9402
loss: 1.0025107860565186,grad_norm: 0.9999992457483655, iteration: 9403
loss: 1.0348021984100342,grad_norm: 0.9999993078430803, iteration: 9404
loss: 1.0258773565292358,grad_norm: 0.9999994631393705, iteration: 9405
loss: 1.0497674942016602,grad_norm: 0.9999995919454445, iteration: 9406
loss: 1.0191009044647217,grad_norm: 0.9999993578008858, iteration: 9407
loss: 1.032892107963562,grad_norm: 0.9999992708768597, iteration: 9408
loss: 1.0682673454284668,grad_norm: 0.9999994209107278, iteration: 9409
loss: 1.0750179290771484,grad_norm: 0.9999994330900708, iteration: 9410
loss: 0.9951139688491821,grad_norm: 0.9999995920923085, iteration: 9411
loss: 1.0498346090316772,grad_norm: 0.9999994794413519, iteration: 9412
loss: 1.0383621454238892,grad_norm: 0.9999997108342281, iteration: 9413
loss: 0.9954137206077576,grad_norm: 0.9999992365132438, iteration: 9414
loss: 1.029999852180481,grad_norm: 0.9999994164566098, iteration: 9415
loss: 0.9908629059791565,grad_norm: 0.999999352458906, iteration: 9416
loss: 1.0682880878448486,grad_norm: 0.9999995616465066, iteration: 9417
loss: 1.0303999185562134,grad_norm: 0.9999992685420949, iteration: 9418
loss: 1.0556237697601318,grad_norm: 0.9999993147549598, iteration: 9419
loss: 1.0878421068191528,grad_norm: 0.9999996332729248, iteration: 9420
loss: 1.0411161184310913,grad_norm: 0.9999996910067562, iteration: 9421
loss: 1.0344046354293823,grad_norm: 0.9999994804160708, iteration: 9422
loss: 1.0351219177246094,grad_norm: 0.9999992354548425, iteration: 9423
loss: 1.0184892416000366,grad_norm: 0.9999994991255576, iteration: 9424
loss: 1.0839167833328247,grad_norm: 0.9999996100284566, iteration: 9425
loss: 1.0484915971755981,grad_norm: 0.9999996208667875, iteration: 9426
loss: 1.0472955703735352,grad_norm: 0.9999993552500396, iteration: 9427
loss: 1.033224105834961,grad_norm: 0.9999993084282073, iteration: 9428
loss: 1.0634758472442627,grad_norm: 0.999999293057377, iteration: 9429
loss: 1.0555955171585083,grad_norm: 0.9999993810500101, iteration: 9430
loss: 1.0273563861846924,grad_norm: 0.9999993771454706, iteration: 9431
loss: 1.069130301475525,grad_norm: 0.9999992923202798, iteration: 9432
loss: 0.9971574544906616,grad_norm: 0.9999994446382274, iteration: 9433
loss: 1.0827645063400269,grad_norm: 0.9999994578199037, iteration: 9434
loss: 1.0865451097488403,grad_norm: 0.9999997682920152, iteration: 9435
loss: 1.094374656677246,grad_norm: 0.9999995978607031, iteration: 9436
loss: 0.9920839667320251,grad_norm: 0.9999993035329512, iteration: 9437
loss: 1.0387028455734253,grad_norm: 0.9999994093892843, iteration: 9438
loss: 1.0239537954330444,grad_norm: 0.9999992079087019, iteration: 9439
loss: 1.0298548936843872,grad_norm: 0.9999995753636352, iteration: 9440
loss: 1.013276219367981,grad_norm: 0.9999993128424299, iteration: 9441
loss: 1.049556851387024,grad_norm: 0.9999992956239117, iteration: 9442
loss: 1.0279895067214966,grad_norm: 0.9999994599769997, iteration: 9443
loss: 1.0333386659622192,grad_norm: 0.9999993238257084, iteration: 9444
loss: 1.053589105606079,grad_norm: 0.9999995269502949, iteration: 9445
loss: 0.9945775866508484,grad_norm: 0.9999991522649943, iteration: 9446
loss: 1.045572280883789,grad_norm: 0.9999992192221102, iteration: 9447
loss: 1.0953043699264526,grad_norm: 0.9999992872914142, iteration: 9448
loss: 1.03554368019104,grad_norm: 0.9999993705289792, iteration: 9449
loss: 0.9942650198936462,grad_norm: 0.9999993429489324, iteration: 9450
loss: 0.9477462768554688,grad_norm: 0.9999992475585666, iteration: 9451
loss: 1.0566397905349731,grad_norm: 0.9999992495158443, iteration: 9452
loss: 0.9855119585990906,grad_norm: 0.9999992366771767, iteration: 9453
loss: 1.0689074993133545,grad_norm: 0.9999994302759687, iteration: 9454
loss: 1.0535650253295898,grad_norm: 0.9999992253904156, iteration: 9455
loss: 1.0332705974578857,grad_norm: 0.9999993366548665, iteration: 9456
loss: 1.0000311136245728,grad_norm: 0.9999993595546088, iteration: 9457
loss: 0.9953427314758301,grad_norm: 0.9999991295374449, iteration: 9458
loss: 1.0732053518295288,grad_norm: 0.9999992799986769, iteration: 9459
loss: 1.0335465669631958,grad_norm: 0.9999994404822635, iteration: 9460
loss: 1.0581254959106445,grad_norm: 0.999999234534713, iteration: 9461
loss: 1.0478452444076538,grad_norm: 0.9999992758657448, iteration: 9462
loss: 1.0448206663131714,grad_norm: 0.9999995990560367, iteration: 9463
loss: 1.0469331741333008,grad_norm: 0.999999320446713, iteration: 9464
loss: 1.0315786600112915,grad_norm: 0.9999992630586094, iteration: 9465
loss: 1.0132198333740234,grad_norm: 0.9999993283441057, iteration: 9466
loss: 1.0210506916046143,grad_norm: 0.9999992625096401, iteration: 9467
loss: 1.0535374879837036,grad_norm: 0.9999993669919116, iteration: 9468
loss: 1.0346499681472778,grad_norm: 0.9999992205559164, iteration: 9469
loss: 1.058763861656189,grad_norm: 0.9999994395172839, iteration: 9470
loss: 1.073570966720581,grad_norm: 0.9999993734289523, iteration: 9471
loss: 1.0303291082382202,grad_norm: 0.9999993652826578, iteration: 9472
loss: 1.0119085311889648,grad_norm: 0.9999996076767127, iteration: 9473
loss: 1.0561487674713135,grad_norm: 0.9999991858716043, iteration: 9474
loss: 1.026986002922058,grad_norm: 0.9999992958489853, iteration: 9475
loss: 1.0588457584381104,grad_norm: 0.9999995546049293, iteration: 9476
loss: 1.0282505750656128,grad_norm: 0.9999994324376945, iteration: 9477
loss: 1.0144743919372559,grad_norm: 0.9999992353209005, iteration: 9478
loss: 1.004897952079773,grad_norm: 0.9999993359902227, iteration: 9479
loss: 1.0104992389678955,grad_norm: 0.9999993032995759, iteration: 9480
loss: 1.0552059412002563,grad_norm: 0.9999993353640592, iteration: 9481
loss: 1.034587025642395,grad_norm: 0.9999993895577168, iteration: 9482
loss: 1.0722798109054565,grad_norm: 0.9999994566306643, iteration: 9483
loss: 1.0463403463363647,grad_norm: 0.9999994104315372, iteration: 9484
loss: 1.022525429725647,grad_norm: 0.9999993042759135, iteration: 9485
loss: 1.0356825590133667,grad_norm: 0.9999992110376205, iteration: 9486
loss: 1.0423606634140015,grad_norm: 0.9999993772638722, iteration: 9487
loss: 1.0565085411071777,grad_norm: 0.9999992465259966, iteration: 9488
loss: 1.049713373184204,grad_norm: 0.9999992955601006, iteration: 9489
loss: 1.01004958152771,grad_norm: 0.9999992696055752, iteration: 9490
loss: 1.0556690692901611,grad_norm: 0.9999992812478891, iteration: 9491
loss: 1.0901269912719727,grad_norm: 0.9999996329678691, iteration: 9492
loss: 1.0677528381347656,grad_norm: 0.999999622600362, iteration: 9493
loss: 1.0614991188049316,grad_norm: 0.9999993484308386, iteration: 9494
loss: 1.0518327951431274,grad_norm: 0.9999995207594098, iteration: 9495
loss: 1.0496113300323486,grad_norm: 0.9999993544845834, iteration: 9496
loss: 1.0358365774154663,grad_norm: 0.9999991789279276, iteration: 9497
loss: 1.0434435606002808,grad_norm: 0.9999993558650759, iteration: 9498
loss: 1.0551966428756714,grad_norm: 0.9999994549409364, iteration: 9499
loss: 1.0256924629211426,grad_norm: 0.999999142381866, iteration: 9500
loss: 1.0287396907806396,grad_norm: 0.9999993405778578, iteration: 9501
loss: 1.0096044540405273,grad_norm: 0.9999992680310492, iteration: 9502
loss: 1.0618208646774292,grad_norm: 0.999999308809099, iteration: 9503
loss: 1.0462795495986938,grad_norm: 0.9999994735810483, iteration: 9504
loss: 1.062880277633667,grad_norm: 0.9999996653921807, iteration: 9505
loss: 1.064967393875122,grad_norm: 0.9999994548180687, iteration: 9506
loss: 1.0537183284759521,grad_norm: 0.9999994850062324, iteration: 9507
loss: 1.0300594568252563,grad_norm: 0.9999993483901578, iteration: 9508
loss: 1.0705296993255615,grad_norm: 0.9999992454464796, iteration: 9509
loss: 1.0444120168685913,grad_norm: 0.9999996039449817, iteration: 9510
loss: 1.0565820932388306,grad_norm: 0.9999994105117285, iteration: 9511
loss: 1.0341317653656006,grad_norm: 0.9999995336914069, iteration: 9512
loss: 1.1276997327804565,grad_norm: 0.9999994772445291, iteration: 9513
loss: 1.0187265872955322,grad_norm: 0.9999993424717684, iteration: 9514
loss: 0.9979594349861145,grad_norm: 0.9999993228830389, iteration: 9515
loss: 1.086888313293457,grad_norm: 0.999999429351512, iteration: 9516
loss: 1.077710747718811,grad_norm: 0.9999994072330047, iteration: 9517
loss: 1.0338692665100098,grad_norm: 0.9999992282474655, iteration: 9518
loss: 1.0218260288238525,grad_norm: 0.9999991589880588, iteration: 9519
loss: 1.0332154035568237,grad_norm: 0.999999593258622, iteration: 9520
loss: 1.056302547454834,grad_norm: 0.9999995388536719, iteration: 9521
loss: 1.048905372619629,grad_norm: 0.9999996278365143, iteration: 9522
loss: 0.9972614645957947,grad_norm: 0.9999992806373045, iteration: 9523
loss: 1.060409665107727,grad_norm: 0.9999995417843708, iteration: 9524
loss: 1.0707335472106934,grad_norm: 0.9999994615867797, iteration: 9525
loss: 1.0285676717758179,grad_norm: 0.9999995055130237, iteration: 9526
loss: 1.0100452899932861,grad_norm: 0.9999991343367329, iteration: 9527
loss: 1.0162134170532227,grad_norm: 0.9999992607892574, iteration: 9528
loss: 1.0095322132110596,grad_norm: 0.9999995979577594, iteration: 9529
loss: 1.1138356924057007,grad_norm: 0.999999839199155, iteration: 9530
loss: 1.0603914260864258,grad_norm: 0.9999992794982033, iteration: 9531
loss: 1.0397318601608276,grad_norm: 0.9999991780996983, iteration: 9532
loss: 1.0404579639434814,grad_norm: 0.9999993137419656, iteration: 9533
loss: 1.0413278341293335,grad_norm: 0.9999993475541845, iteration: 9534
loss: 1.0144315958023071,grad_norm: 0.9999992947300237, iteration: 9535
loss: 1.0711835622787476,grad_norm: 0.9999997719793192, iteration: 9536
loss: 1.0675768852233887,grad_norm: 0.9999996604747486, iteration: 9537
loss: 1.0929105281829834,grad_norm: 0.9999995414828947, iteration: 9538
loss: 1.1344704627990723,grad_norm: 0.9999993858899437, iteration: 9539
loss: 1.0292783975601196,grad_norm: 0.9999994799600848, iteration: 9540
loss: 1.0017434358596802,grad_norm: 0.9999995609554263, iteration: 9541
loss: 1.0372674465179443,grad_norm: 0.999999363448029, iteration: 9542
loss: 1.0332945585250854,grad_norm: 0.9999994393106973, iteration: 9543
loss: 1.0204802751541138,grad_norm: 0.9999994280568719, iteration: 9544
loss: 1.0553807020187378,grad_norm: 0.999999701285797, iteration: 9545
loss: 1.0309042930603027,grad_norm: 0.9999990797173536, iteration: 9546
loss: 0.9775072336196899,grad_norm: 0.9999991666284539, iteration: 9547
loss: 1.0589447021484375,grad_norm: 0.99999911431656, iteration: 9548
loss: 1.0243388414382935,grad_norm: 0.999999240309545, iteration: 9549
loss: 1.045607328414917,grad_norm: 0.9999992671525126, iteration: 9550
loss: 1.0483338832855225,grad_norm: 0.9999993058101149, iteration: 9551
loss: 1.0103366374969482,grad_norm: 0.9999993472392875, iteration: 9552
loss: 1.0593640804290771,grad_norm: 0.9999993382447221, iteration: 9553
loss: 1.0450947284698486,grad_norm: 0.9999994064234022, iteration: 9554
loss: 1.0801721811294556,grad_norm: 0.9999995683990552, iteration: 9555
loss: 1.0432697534561157,grad_norm: 0.9999991375976625, iteration: 9556
loss: 1.0465718507766724,grad_norm: 0.9999992989794281, iteration: 9557
loss: 1.0334256887435913,grad_norm: 0.9999991481391359, iteration: 9558
loss: 0.9169667363166809,grad_norm: 0.9999992910888309, iteration: 9559
loss: 1.1009608507156372,grad_norm: 0.9999993791484932, iteration: 9560
loss: 1.0604586601257324,grad_norm: 0.999999314178617, iteration: 9561
loss: 0.984879732131958,grad_norm: 0.9999992772650896, iteration: 9562
loss: 1.1101250648498535,grad_norm: 0.9999995129361102, iteration: 9563
loss: 1.0530474185943604,grad_norm: 0.9999994013631476, iteration: 9564
loss: 1.0197124481201172,grad_norm: 0.9999993455824833, iteration: 9565
loss: 1.075603723526001,grad_norm: 0.999999793537782, iteration: 9566
loss: 0.9720815420150757,grad_norm: 0.9999993670358623, iteration: 9567
loss: 1.0738219022750854,grad_norm: 0.9999993439482832, iteration: 9568
loss: 1.0588873624801636,grad_norm: 0.9999992571015794, iteration: 9569
loss: 1.1215168237686157,grad_norm: 0.9999995744279411, iteration: 9570
loss: 1.0783467292785645,grad_norm: 0.9999995245852414, iteration: 9571
loss: 1.0658878087997437,grad_norm: 0.999999476696493, iteration: 9572
loss: 1.042771816253662,grad_norm: 0.9999994340415234, iteration: 9573
loss: 1.0708537101745605,grad_norm: 0.9999994779458048, iteration: 9574
loss: 1.0192692279815674,grad_norm: 0.9999994616735653, iteration: 9575
loss: 1.0758181810379028,grad_norm: 0.9999992121817035, iteration: 9576
loss: 1.0283890962600708,grad_norm: 0.9999993326793467, iteration: 9577
loss: 1.0380754470825195,grad_norm: 0.9999995810957422, iteration: 9578
loss: 1.0372951030731201,grad_norm: 0.9999995339333939, iteration: 9579
loss: 1.0810201168060303,grad_norm: 0.9999996509786655, iteration: 9580
loss: 1.0794949531555176,grad_norm: 0.9999996741957153, iteration: 9581
loss: 1.0157415866851807,grad_norm: 0.9999990806708867, iteration: 9582
loss: 1.0246366262435913,grad_norm: 0.9999994589967698, iteration: 9583
loss: 1.043418526649475,grad_norm: 0.9999995592312537, iteration: 9584
loss: 1.0688154697418213,grad_norm: 0.9999996539826415, iteration: 9585
loss: 1.0715728998184204,grad_norm: 0.9999994513643833, iteration: 9586
loss: 1.0730774402618408,grad_norm: 0.9999995856568515, iteration: 9587
loss: 1.0164070129394531,grad_norm: 0.9999993683224865, iteration: 9588
loss: 0.9871479868888855,grad_norm: 0.9999993175261973, iteration: 9589
loss: 1.0521831512451172,grad_norm: 0.9999993034594976, iteration: 9590
loss: 1.0073394775390625,grad_norm: 0.999999417943316, iteration: 9591
loss: 1.0729401111602783,grad_norm: 0.9999995087308483, iteration: 9592
loss: 1.1497111320495605,grad_norm: 0.99999956874879, iteration: 9593
loss: 1.1470857858657837,grad_norm: 0.9999997976321766, iteration: 9594
loss: 1.068154215812683,grad_norm: 0.9999995742315182, iteration: 9595
loss: 1.0286685228347778,grad_norm: 0.9999992657282092, iteration: 9596
loss: 1.020634651184082,grad_norm: 0.9999992478068637, iteration: 9597
loss: 0.975320041179657,grad_norm: 0.9999992455176283, iteration: 9598
loss: 1.0233515501022339,grad_norm: 0.9999994601439848, iteration: 9599
loss: 1.0545248985290527,grad_norm: 0.9999991358158495, iteration: 9600
loss: 1.0660676956176758,grad_norm: 0.9999994629811038, iteration: 9601
loss: 1.051371455192566,grad_norm: 0.9999995332827174, iteration: 9602
loss: 1.0507822036743164,grad_norm: 0.9999993765330812, iteration: 9603
loss: 1.0586906671524048,grad_norm: 0.9999992612845602, iteration: 9604
loss: 1.0359050035476685,grad_norm: 0.9999995586313878, iteration: 9605
loss: 0.9768249988555908,grad_norm: 0.9999993122873883, iteration: 9606
loss: 1.0350439548492432,grad_norm: 0.999999200831318, iteration: 9607
loss: 1.0559437274932861,grad_norm: 0.9999991590271538, iteration: 9608
loss: 1.0167909860610962,grad_norm: 0.9999993334895546, iteration: 9609
loss: 1.0563889741897583,grad_norm: 0.9999992546957693, iteration: 9610
loss: 1.0295995473861694,grad_norm: 0.9999996319008599, iteration: 9611
loss: 0.9869394898414612,grad_norm: 0.9999992617462394, iteration: 9612
loss: 1.0120398998260498,grad_norm: 0.9999993739283386, iteration: 9613
loss: 1.0947177410125732,grad_norm: 0.9999995743033354, iteration: 9614
loss: 1.1367417573928833,grad_norm: 0.9999999049142229, iteration: 9615
loss: 1.0296913385391235,grad_norm: 0.9999993872681039, iteration: 9616
loss: 1.0499025583267212,grad_norm: 0.9999993201351294, iteration: 9617
loss: 1.0486845970153809,grad_norm: 0.999999727261728, iteration: 9618
loss: 1.0130975246429443,grad_norm: 0.9999994713753148, iteration: 9619
loss: 1.0124255418777466,grad_norm: 0.9999996535218261, iteration: 9620
loss: 1.0492985248565674,grad_norm: 0.9999997915841473, iteration: 9621
loss: 1.0641047954559326,grad_norm: 0.9999993218598153, iteration: 9622
loss: 1.0640366077423096,grad_norm: 0.9999997346292886, iteration: 9623
loss: 1.0010230541229248,grad_norm: 0.9999994994434559, iteration: 9624
loss: 1.0738221406936646,grad_norm: 0.9999995801443973, iteration: 9625
loss: 1.035046100616455,grad_norm: 0.9999994369541383, iteration: 9626
loss: 1.058961272239685,grad_norm: 0.9999994455034308, iteration: 9627
loss: 1.0482817888259888,grad_norm: 0.9999994998073252, iteration: 9628
loss: 1.0748287439346313,grad_norm: 0.9999994370931892, iteration: 9629
loss: 1.0368614196777344,grad_norm: 0.9999996080463694, iteration: 9630
loss: 1.0457550287246704,grad_norm: 0.9999993542549647, iteration: 9631
loss: 1.0220774412155151,grad_norm: 0.9999993182223059, iteration: 9632
loss: 1.026781439781189,grad_norm: 0.9999993541369435, iteration: 9633
loss: 1.0013275146484375,grad_norm: 0.9999992382545833, iteration: 9634
loss: 1.063498616218567,grad_norm: 0.9999995947170554, iteration: 9635
loss: 0.9991849660873413,grad_norm: 0.9999993986709704, iteration: 9636
loss: 1.0187475681304932,grad_norm: 0.9999991815883323, iteration: 9637
loss: 1.062833309173584,grad_norm: 0.9999994984621733, iteration: 9638
loss: 1.0286659002304077,grad_norm: 0.999999355753557, iteration: 9639
loss: 1.0219324827194214,grad_norm: 0.9999995229093231, iteration: 9640
loss: 1.0636790990829468,grad_norm: 0.9999994762492487, iteration: 9641
loss: 1.052231788635254,grad_norm: 0.9999995417156948, iteration: 9642
loss: 1.012160062789917,grad_norm: 0.9999996310830728, iteration: 9643
loss: 1.031362771987915,grad_norm: 0.9999994046391321, iteration: 9644
loss: 0.9735352993011475,grad_norm: 0.9999992902341972, iteration: 9645
loss: 1.0540547370910645,grad_norm: 0.999999443322782, iteration: 9646
loss: 1.0940381288528442,grad_norm: 0.9999997751502957, iteration: 9647
loss: 1.0809733867645264,grad_norm: 0.9999994049665757, iteration: 9648
loss: 1.0457457304000854,grad_norm: 0.9999993351176122, iteration: 9649
loss: 1.0178800821304321,grad_norm: 0.9999993748752674, iteration: 9650
loss: 1.03066086769104,grad_norm: 0.9999993876749022, iteration: 9651
loss: 1.0167882442474365,grad_norm: 0.9999995411026207, iteration: 9652
loss: 0.9722155332565308,grad_norm: 0.9999993709730588, iteration: 9653
loss: 0.9952279329299927,grad_norm: 0.9999992523097218, iteration: 9654
loss: 1.0640889406204224,grad_norm: 0.9999997721057942, iteration: 9655
loss: 1.052626609802246,grad_norm: 0.9999994176120933, iteration: 9656
loss: 1.056600570678711,grad_norm: 0.9999992835704717, iteration: 9657
loss: 1.0077340602874756,grad_norm: 0.9999992156867069, iteration: 9658
loss: 0.9559637308120728,grad_norm: 0.9999994217064931, iteration: 9659
loss: 1.0357335805892944,grad_norm: 0.9999996661881804, iteration: 9660
loss: 0.9944159388542175,grad_norm: 0.9999991248210088, iteration: 9661
loss: 0.9900218844413757,grad_norm: 0.9999992913697906, iteration: 9662
loss: 1.0343745946884155,grad_norm: 0.9999993138318699, iteration: 9663
loss: 1.085291862487793,grad_norm: 0.9999993261149149, iteration: 9664
loss: 1.0393099784851074,grad_norm: 0.9999994073187859, iteration: 9665
loss: 1.0591803789138794,grad_norm: 0.9999992107266267, iteration: 9666
loss: 1.0486233234405518,grad_norm: 0.9999993434534203, iteration: 9667
loss: 1.0844703912734985,grad_norm: 0.9999991621998243, iteration: 9668
loss: 1.0062134265899658,grad_norm: 0.9999991884816217, iteration: 9669
loss: 1.064744234085083,grad_norm: 0.9999993177009947, iteration: 9670
loss: 1.0347297191619873,grad_norm: 0.9999992589072314, iteration: 9671
loss: 1.0229686498641968,grad_norm: 0.9999993377828099, iteration: 9672
loss: 1.0435289144515991,grad_norm: 0.9675735427768859, iteration: 9673
loss: 1.0556360483169556,grad_norm: 0.9999992593042143, iteration: 9674
loss: 1.067880630493164,grad_norm: 0.9999995204016097, iteration: 9675
loss: 0.9997376203536987,grad_norm: 0.999999804181159, iteration: 9676
loss: 1.055815577507019,grad_norm: 0.9999997357433271, iteration: 9677
loss: 1.0602657794952393,grad_norm: 0.999999539262917, iteration: 9678
loss: 1.0593266487121582,grad_norm: 0.9999998041612348, iteration: 9679
loss: 0.9779273271560669,grad_norm: 0.9999993635628389, iteration: 9680
loss: 1.015629768371582,grad_norm: 0.999999467028671, iteration: 9681
loss: 1.0771253108978271,grad_norm: 0.9999997279834684, iteration: 9682
loss: 1.0638453960418701,grad_norm: 0.9999998556068161, iteration: 9683
loss: 1.0189902782440186,grad_norm: 0.999999777124975, iteration: 9684
loss: 0.9989817142486572,grad_norm: 0.9999994707772799, iteration: 9685
loss: 0.9637841582298279,grad_norm: 0.9999993154782226, iteration: 9686
loss: 1.0680938959121704,grad_norm: 0.9999992416893332, iteration: 9687
loss: 1.0240418910980225,grad_norm: 0.9999992719761255, iteration: 9688
loss: 1.0487693548202515,grad_norm: 0.9999991401231527, iteration: 9689
loss: 1.0538259744644165,grad_norm: 0.9999992587722227, iteration: 9690
loss: 1.1242719888687134,grad_norm: 0.9999997743906045, iteration: 9691
loss: 1.0631977319717407,grad_norm: 0.999999386143088, iteration: 9692
loss: 1.0336158275604248,grad_norm: 0.9999992085808718, iteration: 9693
loss: 1.0258190631866455,grad_norm: 0.9999995366473386, iteration: 9694
loss: 1.0317789316177368,grad_norm: 0.999999576692555, iteration: 9695
loss: 1.0481988191604614,grad_norm: 0.9999991476164444, iteration: 9696
loss: 1.074741005897522,grad_norm: 0.9999994800217473, iteration: 9697
loss: 1.0716105699539185,grad_norm: 0.9999994832583119, iteration: 9698
loss: 1.0776041746139526,grad_norm: 0.999999638828351, iteration: 9699
loss: 1.1016048192977905,grad_norm: 0.99999967542985, iteration: 9700
loss: 1.0263317823410034,grad_norm: 0.9999989991594814, iteration: 9701
loss: 1.0420467853546143,grad_norm: 0.9999994335034407, iteration: 9702
loss: 1.1172826290130615,grad_norm: 0.9999993876713132, iteration: 9703
loss: 1.0405951738357544,grad_norm: 0.9999993649791404, iteration: 9704
loss: 1.0819553136825562,grad_norm: 0.9999993698157681, iteration: 9705
loss: 1.0589948892593384,grad_norm: 0.9999994891259296, iteration: 9706
loss: 1.111350178718567,grad_norm: 0.9999996384948382, iteration: 9707
loss: 1.0032103061676025,grad_norm: 0.999999626931762, iteration: 9708
loss: 1.0314334630966187,grad_norm: 0.9999995467112712, iteration: 9709
loss: 1.0386736392974854,grad_norm: 0.9999994718800989, iteration: 9710
loss: 1.0657026767730713,grad_norm: 0.9999995046336828, iteration: 9711
loss: 1.0684621334075928,grad_norm: 0.9999995494552835, iteration: 9712
loss: 1.0719577074050903,grad_norm: 0.9999996408095664, iteration: 9713
loss: 1.0113388299942017,grad_norm: 0.9999992806074582, iteration: 9714
loss: 1.068711519241333,grad_norm: 0.9999994222120053, iteration: 9715
loss: 1.0542538166046143,grad_norm: 0.9999992922557495, iteration: 9716
loss: 1.0369882583618164,grad_norm: 0.9999993869165966, iteration: 9717
loss: 1.0515377521514893,grad_norm: 0.9999991550082689, iteration: 9718
loss: 1.0080063343048096,grad_norm: 0.9999991858798535, iteration: 9719
loss: 1.0255262851715088,grad_norm: 0.9999991894714985, iteration: 9720
loss: 1.1521371603012085,grad_norm: 0.9999997686006243, iteration: 9721
loss: 1.0395872592926025,grad_norm: 0.9999995555185515, iteration: 9722
loss: 0.9970443844795227,grad_norm: 0.999999327406039, iteration: 9723
loss: 1.0708729028701782,grad_norm: 0.9999993613293988, iteration: 9724
loss: 1.0055612325668335,grad_norm: 0.9999995020904613, iteration: 9725
loss: 1.0803766250610352,grad_norm: 0.9999994117199058, iteration: 9726
loss: 1.1136783361434937,grad_norm: 0.9999995669131498, iteration: 9727
loss: 1.0487868785858154,grad_norm: 0.9999996784356917, iteration: 9728
loss: 1.0250755548477173,grad_norm: 0.9999994422915877, iteration: 9729
loss: 1.085911512374878,grad_norm: 0.9999994125466335, iteration: 9730
loss: 1.0268152952194214,grad_norm: 0.9999996967322489, iteration: 9731
loss: 1.018572449684143,grad_norm: 0.9999995549731744, iteration: 9732
loss: 1.0582200288772583,grad_norm: 0.9999991849182407, iteration: 9733
loss: 1.0371617078781128,grad_norm: 0.9999993086646279, iteration: 9734
loss: 1.0329382419586182,grad_norm: 0.9999992666188725, iteration: 9735
loss: 1.0803560018539429,grad_norm: 0.9999994955796846, iteration: 9736
loss: 1.0593429803848267,grad_norm: 0.9999995318240209, iteration: 9737
loss: 1.0367109775543213,grad_norm: 0.999999261412273, iteration: 9738
loss: 1.077999472618103,grad_norm: 0.9999995546780578, iteration: 9739
loss: 0.983510434627533,grad_norm: 0.9999992462867094, iteration: 9740
loss: 1.065424919128418,grad_norm: 0.9999994529768689, iteration: 9741
loss: 1.0631818771362305,grad_norm: 0.9999994108164902, iteration: 9742
loss: 1.055173635482788,grad_norm: 0.9999994959367181, iteration: 9743
loss: 0.986356794834137,grad_norm: 0.9999993372352005, iteration: 9744
loss: 1.075709342956543,grad_norm: 0.9999994333187038, iteration: 9745
loss: 1.0807385444641113,grad_norm: 0.999999517870579, iteration: 9746
loss: 1.0486165285110474,grad_norm: 0.9999994469193064, iteration: 9747
loss: 1.0468013286590576,grad_norm: 0.9999992770963738, iteration: 9748
loss: 1.0749591588974,grad_norm: 0.9999992376666902, iteration: 9749
loss: 1.0011768341064453,grad_norm: 0.9999993264749426, iteration: 9750
loss: 1.0396370887756348,grad_norm: 0.9999996591437295, iteration: 9751
loss: 1.1043801307678223,grad_norm: 0.9999996311082712, iteration: 9752
loss: 1.0439969301223755,grad_norm: 0.9999995239300024, iteration: 9753
loss: 1.0751599073410034,grad_norm: 0.9999995056473103, iteration: 9754
loss: 1.0631011724472046,grad_norm: 0.9999996275935211, iteration: 9755
loss: 1.0091558694839478,grad_norm: 0.9999992995163067, iteration: 9756
loss: 1.162415862083435,grad_norm: 0.9999995895820517, iteration: 9757
loss: 1.1368902921676636,grad_norm: 0.9999996764536319, iteration: 9758
loss: 1.0709974765777588,grad_norm: 0.9999994602933009, iteration: 9759
loss: 1.0879517793655396,grad_norm: 0.9999992657498555, iteration: 9760
loss: 1.0499008893966675,grad_norm: 0.9999996012213717, iteration: 9761
loss: 1.140183448791504,grad_norm: 0.9999995825798504, iteration: 9762
loss: 1.0580511093139648,grad_norm: 0.9999992222660724, iteration: 9763
loss: 1.0833415985107422,grad_norm: 0.9999995403937681, iteration: 9764
loss: 1.07796049118042,grad_norm: 0.9999992920802876, iteration: 9765
loss: 1.0415220260620117,grad_norm: 0.99999934746528, iteration: 9766
loss: 1.0376554727554321,grad_norm: 0.999999218906744, iteration: 9767
loss: 1.0807859897613525,grad_norm: 0.9999996039307054, iteration: 9768
loss: 1.0437911748886108,grad_norm: 0.999999392655805, iteration: 9769
loss: 1.0137659311294556,grad_norm: 0.9999993322625763, iteration: 9770
loss: 1.086759090423584,grad_norm: 0.9999995376656999, iteration: 9771
loss: 1.0690947771072388,grad_norm: 0.9999995653483488, iteration: 9772
loss: 1.1116433143615723,grad_norm: 0.9999997444892663, iteration: 9773
loss: 1.063821792602539,grad_norm: 0.999999412618844, iteration: 9774
loss: 1.018950343132019,grad_norm: 0.9999995241082704, iteration: 9775
loss: 1.0830572843551636,grad_norm: 0.9999996314172463, iteration: 9776
loss: 1.0581505298614502,grad_norm: 0.999999491851668, iteration: 9777
loss: 1.0256388187408447,grad_norm: 0.9999994866206586, iteration: 9778
loss: 1.0586624145507812,grad_norm: 0.9999994752027185, iteration: 9779
loss: 1.0234649181365967,grad_norm: 0.9999995623920237, iteration: 9780
loss: 1.0115845203399658,grad_norm: 0.9999994455572744, iteration: 9781
loss: 1.0570281744003296,grad_norm: 0.9999996782038603, iteration: 9782
loss: 1.0214662551879883,grad_norm: 0.999999333014241, iteration: 9783
loss: 1.0580312013626099,grad_norm: 0.9999995724636092, iteration: 9784
loss: 1.0677820444107056,grad_norm: 0.9999994832131873, iteration: 9785
loss: 1.1383283138275146,grad_norm: 0.9999997104901934, iteration: 9786
loss: 1.0431392192840576,grad_norm: 0.9999993970919826, iteration: 9787
loss: 1.0460330247879028,grad_norm: 0.9999997298382206, iteration: 9788
loss: 1.0161868333816528,grad_norm: 0.999999423309218, iteration: 9789
loss: 1.0542422533035278,grad_norm: 0.9999992698115421, iteration: 9790
loss: 1.0729426145553589,grad_norm: 0.9999996100282289, iteration: 9791
loss: 1.0954629182815552,grad_norm: 0.9999991813985787, iteration: 9792
loss: 1.052451252937317,grad_norm: 0.999999427612585, iteration: 9793
loss: 1.0236331224441528,grad_norm: 0.9999994290531641, iteration: 9794
loss: 1.0176615715026855,grad_norm: 0.999999309811391, iteration: 9795
loss: 1.0678843259811401,grad_norm: 0.9999993283378307, iteration: 9796
loss: 1.0433743000030518,grad_norm: 0.999999382569783, iteration: 9797
loss: 1.0579732656478882,grad_norm: 0.9999995055845645, iteration: 9798
loss: 1.0374451875686646,grad_norm: 0.999999341945845, iteration: 9799
loss: 1.0361539125442505,grad_norm: 0.9999992629278818, iteration: 9800
loss: 1.0120309591293335,grad_norm: 0.9999993560937994, iteration: 9801
loss: 1.053580403327942,grad_norm: 0.9999996257875668, iteration: 9802
loss: 1.0720396041870117,grad_norm: 0.9999995779996802, iteration: 9803
loss: 1.0713552236557007,grad_norm: 0.9999992010964671, iteration: 9804
loss: 1.0079962015151978,grad_norm: 0.999999189756581, iteration: 9805
loss: 1.0173156261444092,grad_norm: 0.9999993273253286, iteration: 9806
loss: 1.0906308889389038,grad_norm: 0.9999994545573293, iteration: 9807
loss: 1.045762062072754,grad_norm: 0.999999451116928, iteration: 9808
loss: 1.0282655954360962,grad_norm: 0.999999341259064, iteration: 9809
loss: 1.1310663223266602,grad_norm: 0.9999994515356303, iteration: 9810
loss: 1.0331201553344727,grad_norm: 0.9999993578916867, iteration: 9811
loss: 1.090669870376587,grad_norm: 0.9999998839381152, iteration: 9812
loss: 1.095228910446167,grad_norm: 0.9999995681174277, iteration: 9813
loss: 1.0175143480300903,grad_norm: 0.9999993281949486, iteration: 9814
loss: 1.1072633266448975,grad_norm: 0.9999996120706044, iteration: 9815
loss: 1.0944066047668457,grad_norm: 0.9999993934478054, iteration: 9816
loss: 1.0737465620040894,grad_norm: 0.9999992937682065, iteration: 9817
loss: 1.0495927333831787,grad_norm: 0.9999994622084836, iteration: 9818
loss: 0.9893662333488464,grad_norm: 0.9999993041864642, iteration: 9819
loss: 1.009801983833313,grad_norm: 0.9999993763090733, iteration: 9820
loss: 0.9703353047370911,grad_norm: 0.9999992382351083, iteration: 9821
loss: 1.0709128379821777,grad_norm: 0.9999994326989572, iteration: 9822
loss: 1.0359004735946655,grad_norm: 0.9999992993949803, iteration: 9823
loss: 1.1037291288375854,grad_norm: 0.999999537514868, iteration: 9824
loss: 0.9983020424842834,grad_norm: 0.9999993700072712, iteration: 9825
loss: 1.031956672668457,grad_norm: 0.9999995777705011, iteration: 9826
loss: 1.0786088705062866,grad_norm: 0.9999995128131236, iteration: 9827
loss: 1.0579867362976074,grad_norm: 0.9999993466138253, iteration: 9828
loss: 1.0466758012771606,grad_norm: 0.9999992468274932, iteration: 9829
loss: 0.9728451371192932,grad_norm: 0.9999992014912455, iteration: 9830
loss: 1.032450556755066,grad_norm: 0.9999994330585683, iteration: 9831
loss: 1.055127739906311,grad_norm: 0.9999992699791285, iteration: 9832
loss: 0.9971450567245483,grad_norm: 0.9999992157786988, iteration: 9833
loss: 1.0901567935943604,grad_norm: 0.9999993654137618, iteration: 9834
loss: 1.0038676261901855,grad_norm: 0.9999992298154106, iteration: 9835
loss: 1.0470701456069946,grad_norm: 0.9999993702179509, iteration: 9836
loss: 1.0663279294967651,grad_norm: 0.9999991930103276, iteration: 9837
loss: 1.0049151182174683,grad_norm: 0.9999993852734873, iteration: 9838
loss: 1.0541507005691528,grad_norm: 0.9999991557125985, iteration: 9839
loss: 1.096228003501892,grad_norm: 0.99999925948861, iteration: 9840
loss: 1.0056674480438232,grad_norm: 0.9999993301974085, iteration: 9841
loss: 1.081665277481079,grad_norm: 0.9999996734324745, iteration: 9842
loss: 0.9904080033302307,grad_norm: 0.9999992854800802, iteration: 9843
loss: 1.0793966054916382,grad_norm: 0.9999996103124971, iteration: 9844
loss: 1.067394495010376,grad_norm: 0.9999994026339618, iteration: 9845
loss: 1.0499225854873657,grad_norm: 0.9999994599435799, iteration: 9846
loss: 1.0928078889846802,grad_norm: 0.999999400798012, iteration: 9847
loss: 1.0632350444793701,grad_norm: 0.9999990972749565, iteration: 9848
loss: 1.0451619625091553,grad_norm: 0.9999993401626937, iteration: 9849
loss: 1.0505545139312744,grad_norm: 0.9999992624463562, iteration: 9850
loss: 1.0591435432434082,grad_norm: 0.9999993984106367, iteration: 9851
loss: 1.0149611234664917,grad_norm: 0.9999993159440376, iteration: 9852
loss: 1.0616761445999146,grad_norm: 0.9999995572393368, iteration: 9853
loss: 1.0111339092254639,grad_norm: 0.9999992303526019, iteration: 9854
loss: 1.053993582725525,grad_norm: 0.9999994182620385, iteration: 9855
loss: 1.0391204357147217,grad_norm: 0.9999993724892942, iteration: 9856
loss: 1.0599420070648193,grad_norm: 0.9999995278751685, iteration: 9857
loss: 1.0074098110198975,grad_norm: 0.9999994285627237, iteration: 9858
loss: 1.0671483278274536,grad_norm: 0.9999994402122905, iteration: 9859
loss: 1.0187416076660156,grad_norm: 0.999999509709627, iteration: 9860
loss: 0.9982047080993652,grad_norm: 0.9999992220194047, iteration: 9861
loss: 1.0480226278305054,grad_norm: 0.9999994197376617, iteration: 9862
loss: 1.0250974893569946,grad_norm: 0.9999995290146264, iteration: 9863
loss: 1.0433622598648071,grad_norm: 0.9999993200820612, iteration: 9864
loss: 1.0187774896621704,grad_norm: 0.999999701704081, iteration: 9865
loss: 0.991566002368927,grad_norm: 0.9999993876583742, iteration: 9866
loss: 1.0114099979400635,grad_norm: 0.9999996019490643, iteration: 9867
loss: 1.018428921699524,grad_norm: 0.9999993967488477, iteration: 9868
loss: 1.0786082744598389,grad_norm: 0.9999992791010243, iteration: 9869
loss: 1.0353690385818481,grad_norm: 0.9999991045501393, iteration: 9870
loss: 1.102818250656128,grad_norm: 0.9999995045690209, iteration: 9871
loss: 1.0863523483276367,grad_norm: 0.9999993087850636, iteration: 9872
loss: 0.9997550845146179,grad_norm: 0.9999991536379059, iteration: 9873
loss: 0.9860942363739014,grad_norm: 0.9999992322498571, iteration: 9874
loss: 1.056147575378418,grad_norm: 0.9999991351434987, iteration: 9875
loss: 1.0924129486083984,grad_norm: 0.9999992549403951, iteration: 9876
loss: 1.0868053436279297,grad_norm: 0.9999994493323551, iteration: 9877
loss: 1.015342354774475,grad_norm: 0.999999174311639, iteration: 9878
loss: 1.0515271425247192,grad_norm: 0.9999995182332345, iteration: 9879
loss: 1.0394214391708374,grad_norm: 0.9999994504190327, iteration: 9880
loss: 1.0452146530151367,grad_norm: 0.9999993350145142, iteration: 9881
loss: 0.983025848865509,grad_norm: 0.9999994014426115, iteration: 9882
loss: 1.0690946578979492,grad_norm: 0.9999994871545798, iteration: 9883
loss: 1.0726193189620972,grad_norm: 0.9999992152809261, iteration: 9884
loss: 1.0410926342010498,grad_norm: 0.9999993893392654, iteration: 9885
loss: 1.057521104812622,grad_norm: 0.9999992975715397, iteration: 9886
loss: 1.0858893394470215,grad_norm: 0.9999995613556886, iteration: 9887
loss: 1.0246647596359253,grad_norm: 0.9999991738185399, iteration: 9888
loss: 1.0199755430221558,grad_norm: 0.9999994170112161, iteration: 9889
loss: 1.0321190357208252,grad_norm: 0.9999993093834091, iteration: 9890
loss: 1.0599966049194336,grad_norm: 0.9999992042863826, iteration: 9891
loss: 1.0142792463302612,grad_norm: 0.9999993640694466, iteration: 9892
loss: 1.0110529661178589,grad_norm: 0.9999996238843565, iteration: 9893
loss: 1.037330985069275,grad_norm: 0.9999992672934795, iteration: 9894
loss: 1.1144169569015503,grad_norm: 0.9999995488591248, iteration: 9895
loss: 1.0162009000778198,grad_norm: 0.9999993015418037, iteration: 9896
loss: 1.001297950744629,grad_norm: 0.9999993619239362, iteration: 9897
loss: 1.0771106481552124,grad_norm: 0.9999994730921411, iteration: 9898
loss: 1.076425313949585,grad_norm: 0.9999992793178909, iteration: 9899
loss: 1.0244877338409424,grad_norm: 0.9999991972164433, iteration: 9900
loss: 1.0536154508590698,grad_norm: 0.999999391312646, iteration: 9901
loss: 1.0687345266342163,grad_norm: 0.9999998940029939, iteration: 9902
loss: 1.012307047843933,grad_norm: 0.9999992054555563, iteration: 9903
loss: 1.011824369430542,grad_norm: 0.9999993219954162, iteration: 9904
loss: 1.0399705171585083,grad_norm: 0.9999994326863563, iteration: 9905
loss: 1.1227372884750366,grad_norm: 0.9999996269047062, iteration: 9906
loss: 1.0289312601089478,grad_norm: 0.9999994366644563, iteration: 9907
loss: 1.1316288709640503,grad_norm: 0.9999994519038615, iteration: 9908
loss: 0.9996047019958496,grad_norm: 0.9999992780743913, iteration: 9909
loss: 1.031050682067871,grad_norm: 0.9999994456762523, iteration: 9910
loss: 0.9514366388320923,grad_norm: 0.9999992157817864, iteration: 9911
loss: 1.0608562231063843,grad_norm: 0.9999992966857935, iteration: 9912
loss: 1.129673719406128,grad_norm: 0.9999993805906364, iteration: 9913
loss: 1.1141234636306763,grad_norm: 0.9999992378837177, iteration: 9914
loss: 1.0640112161636353,grad_norm: 0.9999992769185295, iteration: 9915
loss: 1.1063077449798584,grad_norm: 0.9999997146750473, iteration: 9916
loss: 1.065285325050354,grad_norm: 0.9999994484472742, iteration: 9917
loss: 1.0821151733398438,grad_norm: 0.9999993417085964, iteration: 9918
loss: 1.0410561561584473,grad_norm: 0.9999993052927968, iteration: 9919
loss: 1.0660450458526611,grad_norm: 0.9999993936271886, iteration: 9920
loss: 1.0643236637115479,grad_norm: 0.9999992475123136, iteration: 9921
loss: 1.0391708612442017,grad_norm: 0.9999993884770831, iteration: 9922
loss: 1.033589243888855,grad_norm: 0.9999991858070855, iteration: 9923
loss: 1.0327228307724,grad_norm: 0.9999994435220529, iteration: 9924
loss: 1.0112214088439941,grad_norm: 0.9999991527128925, iteration: 9925
loss: 1.05221426486969,grad_norm: 0.9999993231476102, iteration: 9926
loss: 0.9959996938705444,grad_norm: 0.9999991406726252, iteration: 9927
loss: 1.0386728048324585,grad_norm: 0.999999315169193, iteration: 9928
loss: 1.013723611831665,grad_norm: 0.9999993115376768, iteration: 9929
loss: 1.0520949363708496,grad_norm: 0.9999994335667555, iteration: 9930
loss: 1.0482873916625977,grad_norm: 0.999999398046064, iteration: 9931
loss: 0.9995929002761841,grad_norm: 0.9999991190019265, iteration: 9932
loss: 1.0686419010162354,grad_norm: 0.9999995816246477, iteration: 9933
loss: 1.0190850496292114,grad_norm: 0.9999991921043055, iteration: 9934
loss: 1.048237919807434,grad_norm: 0.9999993289971394, iteration: 9935
loss: 1.0437403917312622,grad_norm: 0.9999992670977292, iteration: 9936
loss: 1.0433632135391235,grad_norm: 0.9999992418719129, iteration: 9937
loss: 1.0504825115203857,grad_norm: 0.9999994562142164, iteration: 9938
loss: 1.0473284721374512,grad_norm: 0.9999992647887432, iteration: 9939
loss: 1.053188443183899,grad_norm: 0.9999992148480612, iteration: 9940
loss: 1.0289453268051147,grad_norm: 0.9999991975396537, iteration: 9941
loss: 1.056646704673767,grad_norm: 0.9999993374357166, iteration: 9942
loss: 1.0677697658538818,grad_norm: 0.9999993397796976, iteration: 9943
loss: 1.0299465656280518,grad_norm: 0.9999993415950699, iteration: 9944
loss: 1.0550947189331055,grad_norm: 0.999999397900202, iteration: 9945
loss: 1.0720258951187134,grad_norm: 0.9999994368810483, iteration: 9946
loss: 1.0181621313095093,grad_norm: 0.9999991898285815, iteration: 9947
loss: 1.0272804498672485,grad_norm: 0.9999992111212143, iteration: 9948
loss: 1.010575294494629,grad_norm: 0.9999992072868401, iteration: 9949
loss: 1.0955822467803955,grad_norm: 0.9999996063565127, iteration: 9950
loss: 1.0586533546447754,grad_norm: 0.9999992649612067, iteration: 9951
loss: 1.0627567768096924,grad_norm: 0.9999995042158323, iteration: 9952
loss: 1.03775954246521,grad_norm: 0.9999993258985306, iteration: 9953
loss: 1.06669020652771,grad_norm: 0.999999273843399, iteration: 9954
loss: 1.0306257009506226,grad_norm: 0.999999341824751, iteration: 9955
loss: 1.0353198051452637,grad_norm: 0.999999436555039, iteration: 9956
loss: 1.0382100343704224,grad_norm: 0.9999992695259738, iteration: 9957
loss: 0.9936811923980713,grad_norm: 0.9999992830744874, iteration: 9958
loss: 1.074122667312622,grad_norm: 0.9999992136896294, iteration: 9959
loss: 1.034591555595398,grad_norm: 0.999999644094465, iteration: 9960
loss: 1.0288212299346924,grad_norm: 0.9999994478691715, iteration: 9961
loss: 1.073439598083496,grad_norm: 0.9999991867179574, iteration: 9962
loss: 1.01954185962677,grad_norm: 0.9999994732534871, iteration: 9963
loss: 1.01811683177948,grad_norm: 0.9999994505823546, iteration: 9964
loss: 1.0180507898330688,grad_norm: 0.999999278809623, iteration: 9965
loss: 1.0854865312576294,grad_norm: 0.9999995371866032, iteration: 9966
loss: 1.0900366306304932,grad_norm: 0.9999996977914533, iteration: 9967
loss: 1.034271001815796,grad_norm: 0.9999992006056271, iteration: 9968
loss: 1.0235539674758911,grad_norm: 0.9999992940260932, iteration: 9969
loss: 1.099381685256958,grad_norm: 0.9999993194427245, iteration: 9970
loss: 1.0516077280044556,grad_norm: 0.9999993430053457, iteration: 9971
loss: 1.0732089281082153,grad_norm: 0.9999993709669478, iteration: 9972
loss: 1.0009437799453735,grad_norm: 0.9999993308014109, iteration: 9973
loss: 1.0710002183914185,grad_norm: 0.9999994804491654, iteration: 9974
loss: 1.042397379875183,grad_norm: 0.9999992541857282, iteration: 9975
loss: 1.018872618675232,grad_norm: 0.9999992419689004, iteration: 9976
loss: 1.034398078918457,grad_norm: 0.9999992397164281, iteration: 9977
loss: 1.052955150604248,grad_norm: 0.9999993542723208, iteration: 9978
loss: 1.0883204936981201,grad_norm: 0.9999994562984078, iteration: 9979
loss: 1.064268708229065,grad_norm: 0.9999992651419928, iteration: 9980
loss: 1.0213853120803833,grad_norm: 0.9999991462929032, iteration: 9981
loss: 1.037277102470398,grad_norm: 0.9999992763682384, iteration: 9982
loss: 0.9945388436317444,grad_norm: 0.9999993488206005, iteration: 9983
loss: 1.0242278575897217,grad_norm: 0.9999992079413401, iteration: 9984
loss: 1.0713310241699219,grad_norm: 0.9999993930281351, iteration: 9985
loss: 1.0286130905151367,grad_norm: 0.999999249970158, iteration: 9986
loss: 1.0104045867919922,grad_norm: 0.9999994594532945, iteration: 9987
loss: 1.0116382837295532,grad_norm: 0.9999990782927195, iteration: 9988
loss: 1.0291109085083008,grad_norm: 0.9999991962862161, iteration: 9989
loss: 1.0889118909835815,grad_norm: 0.9999992035951167, iteration: 9990
loss: 1.053623914718628,grad_norm: 0.999999720043686, iteration: 9991
loss: 1.045781135559082,grad_norm: 0.9999995158153532, iteration: 9992
loss: 1.0636967420578003,grad_norm: 0.9999993442326481, iteration: 9993
loss: 1.0531154870986938,grad_norm: 0.9999992396915678, iteration: 9994
loss: 0.9874342083930969,grad_norm: 0.9999992365969694, iteration: 9995
loss: 1.0265716314315796,grad_norm: 0.9999993366106089, iteration: 9996
loss: 1.0515598058700562,grad_norm: 0.999999374297111, iteration: 9997
loss: 1.0734392404556274,grad_norm: 0.9999993165733613, iteration: 9998
loss: 1.0587158203125,grad_norm: 0.9999993584471518, iteration: 9999
loss: 1.0001519918441772,grad_norm: 0.9999994761614115, iteration: 10000
Evaluating at step 10000
{'val': 1.0138992927968502, 'test': 2.0179202522332678}
loss: 1.0018752813339233,grad_norm: 0.9999994358938257, iteration: 10001
loss: 1.0478832721710205,grad_norm: 0.9999993059706044, iteration: 10002
loss: 1.0842812061309814,grad_norm: 0.9999996215262342, iteration: 10003
loss: 1.0392175912857056,grad_norm: 0.9999991250881689, iteration: 10004
loss: 0.9923625588417053,grad_norm: 0.9999993025742485, iteration: 10005
loss: 1.0356168746948242,grad_norm: 0.9999994916775076, iteration: 10006
loss: 1.06587553024292,grad_norm: 0.9999992552303334, iteration: 10007
loss: 1.0567833185195923,grad_norm: 0.9999992393015499, iteration: 10008
loss: 0.9655545353889465,grad_norm: 0.9999994215471095, iteration: 10009
loss: 1.0484519004821777,grad_norm: 0.999999192583346, iteration: 10010
loss: 1.051194190979004,grad_norm: 0.9999995972086484, iteration: 10011
loss: 1.056377649307251,grad_norm: 0.9999992915256659, iteration: 10012
loss: 1.052994728088379,grad_norm: 0.9999992726211956, iteration: 10013
loss: 1.0849868059158325,grad_norm: 0.9999996278361443, iteration: 10014
loss: 1.0405176877975464,grad_norm: 0.9999993295610211, iteration: 10015
loss: 1.0160351991653442,grad_norm: 0.9999993725270964, iteration: 10016
loss: 1.0061973333358765,grad_norm: 0.9999992017479415, iteration: 10017
loss: 1.1103099584579468,grad_norm: 0.9999995806187283, iteration: 10018
loss: 1.0527255535125732,grad_norm: 0.9999993654448944, iteration: 10019
loss: 1.0163761377334595,grad_norm: 0.9999995098113048, iteration: 10020
loss: 1.0549304485321045,grad_norm: 0.9999991094236416, iteration: 10021
loss: 0.9889193773269653,grad_norm: 0.99999912274834, iteration: 10022
loss: 1.0005247592926025,grad_norm: 0.9999991736381963, iteration: 10023
loss: 1.0408024787902832,grad_norm: 0.9999992289772421, iteration: 10024
loss: 1.0403172969818115,grad_norm: 0.9999991657998106, iteration: 10025
loss: 0.9897401332855225,grad_norm: 0.9999992561847778, iteration: 10026
loss: 1.0825724601745605,grad_norm: 0.9999994005728559, iteration: 10027
loss: 1.0509480237960815,grad_norm: 0.9999993034620162, iteration: 10028
loss: 0.9875127077102661,grad_norm: 0.9999992114856874, iteration: 10029
loss: 1.056311011314392,grad_norm: 0.9999993163719547, iteration: 10030
loss: 1.0120177268981934,grad_norm: 0.9999992979693031, iteration: 10031
loss: 1.0587552785873413,grad_norm: 0.9999993216184894, iteration: 10032
loss: 1.0133284330368042,grad_norm: 0.9999991716107908, iteration: 10033
loss: 1.0489709377288818,grad_norm: 0.999999440073864, iteration: 10034
loss: 1.071756362915039,grad_norm: 0.9999993915635903, iteration: 10035
loss: 1.0321228504180908,grad_norm: 0.9999998013241119, iteration: 10036
loss: 1.0741008520126343,grad_norm: 0.9999994646081789, iteration: 10037
loss: 1.0029582977294922,grad_norm: 0.9999992321978752, iteration: 10038
loss: 1.0461463928222656,grad_norm: 0.9999993844758814, iteration: 10039
loss: 1.06813645362854,grad_norm: 0.9999992569868912, iteration: 10040
loss: 1.1025841236114502,grad_norm: 0.9999996746533747, iteration: 10041
loss: 1.0268535614013672,grad_norm: 0.9999991324584868, iteration: 10042
loss: 1.0503665208816528,grad_norm: 0.9999994029685748, iteration: 10043
loss: 1.024533748626709,grad_norm: 0.999999335913185, iteration: 10044
loss: 1.0576103925704956,grad_norm: 0.9999993359581358, iteration: 10045
loss: 1.0741463899612427,grad_norm: 0.9999999045676801, iteration: 10046
loss: 1.0305800437927246,grad_norm: 0.9999991946189698, iteration: 10047
loss: 1.0801258087158203,grad_norm: 0.9999993847268924, iteration: 10048
loss: 1.0373072624206543,grad_norm: 0.9999992855007578, iteration: 10049
loss: 1.0703198909759521,grad_norm: 0.9999994468072422, iteration: 10050
loss: 1.0197303295135498,grad_norm: 0.9999991539429682, iteration: 10051
loss: 1.0664643049240112,grad_norm: 0.9999992312315492, iteration: 10052
loss: 1.0418401956558228,grad_norm: 0.9999992636434625, iteration: 10053
loss: 1.0584765672683716,grad_norm: 0.9999993806799378, iteration: 10054
loss: 1.0635344982147217,grad_norm: 0.9999994854531179, iteration: 10055
loss: 1.0037370920181274,grad_norm: 0.9999995899324127, iteration: 10056
loss: 1.0252845287322998,grad_norm: 0.9999992169881967, iteration: 10057
loss: 0.994799792766571,grad_norm: 0.9999992879117134, iteration: 10058
loss: 1.0263772010803223,grad_norm: 0.9999994237505553, iteration: 10059
loss: 1.058304786682129,grad_norm: 0.9999993450428775, iteration: 10060
loss: 1.0445486307144165,grad_norm: 0.9999993319683119, iteration: 10061
loss: 1.0570762157440186,grad_norm: 0.9999995424816311, iteration: 10062
loss: 1.0610257387161255,grad_norm: 0.9999993527706139, iteration: 10063
loss: 1.0402199029922485,grad_norm: 0.9999992580517841, iteration: 10064
loss: 1.0158220529556274,grad_norm: 0.9999992449415329, iteration: 10065
loss: 1.001315951347351,grad_norm: 0.9999992575746489, iteration: 10066
loss: 1.0188450813293457,grad_norm: 0.9999993573706286, iteration: 10067
loss: 1.017280101776123,grad_norm: 0.9999992817633819, iteration: 10068
loss: 1.0208289623260498,grad_norm: 0.9999996122593273, iteration: 10069
loss: 1.0260488986968994,grad_norm: 0.999999411312454, iteration: 10070
loss: 1.0148371458053589,grad_norm: 0.999999292263906, iteration: 10071
loss: 1.0340052843093872,grad_norm: 0.9999990296357267, iteration: 10072
loss: 1.0331403017044067,grad_norm: 0.9999992264902251, iteration: 10073
loss: 0.9842624664306641,grad_norm: 0.9999993420150904, iteration: 10074
loss: 1.047173023223877,grad_norm: 0.9999995482671428, iteration: 10075
loss: 1.0593780279159546,grad_norm: 0.999999547048835, iteration: 10076
loss: 1.046067476272583,grad_norm: 0.999999398750638, iteration: 10077
loss: 0.980675220489502,grad_norm: 0.9999994496793302, iteration: 10078
loss: 1.008009910583496,grad_norm: 0.9999991180114881, iteration: 10079
loss: 1.0563695430755615,grad_norm: 0.9999993662258604, iteration: 10080
loss: 1.0227261781692505,grad_norm: 0.9999994466997424, iteration: 10081
loss: 1.1055976152420044,grad_norm: 0.9999992976844897, iteration: 10082
loss: 1.0412050485610962,grad_norm: 0.9999996562522695, iteration: 10083
loss: 1.0676571130752563,grad_norm: 0.9999992708303188, iteration: 10084
loss: 1.081475019454956,grad_norm: 0.9999992988151537, iteration: 10085
loss: 1.0195791721343994,grad_norm: 0.9999992678175521, iteration: 10086
loss: 1.0231746435165405,grad_norm: 0.9999991170301306, iteration: 10087
loss: 1.0271471738815308,grad_norm: 0.9999992002080633, iteration: 10088
loss: 1.0426918268203735,grad_norm: 0.9999991462733689, iteration: 10089
loss: 1.0315252542495728,grad_norm: 0.9999992585195644, iteration: 10090
loss: 1.011812448501587,grad_norm: 0.9999993716136374, iteration: 10091
loss: 1.0722655057907104,grad_norm: 0.9999996328428474, iteration: 10092
loss: 1.0363668203353882,grad_norm: 0.9999995645833472, iteration: 10093
loss: 1.018328070640564,grad_norm: 0.9999995132055962, iteration: 10094
loss: 1.1060173511505127,grad_norm: 0.9999996429308007, iteration: 10095
loss: 1.0032365322113037,grad_norm: 0.999999359902355, iteration: 10096
loss: 1.0559358596801758,grad_norm: 0.9999991756691686, iteration: 10097
loss: 1.042195200920105,grad_norm: 0.9999992759534312, iteration: 10098
loss: 1.0266516208648682,grad_norm: 0.9999994493276445, iteration: 10099
loss: 1.0807405710220337,grad_norm: 0.9999997219280351, iteration: 10100
loss: 1.0169833898544312,grad_norm: 0.9999990945923759, iteration: 10101
loss: 1.0659785270690918,grad_norm: 0.999999204230469, iteration: 10102
loss: 1.0428229570388794,grad_norm: 0.9999993570303717, iteration: 10103
loss: 0.995225727558136,grad_norm: 0.9999990624415332, iteration: 10104
loss: 1.0350011587142944,grad_norm: 0.9999995064471807, iteration: 10105
loss: 1.1277439594268799,grad_norm: 0.9999994401633356, iteration: 10106
loss: 1.058489441871643,grad_norm: 0.9999992821909073, iteration: 10107
loss: 1.0113482475280762,grad_norm: 0.9999994287316852, iteration: 10108
loss: 1.0550106763839722,grad_norm: 0.9999993564315804, iteration: 10109
loss: 1.060484528541565,grad_norm: 0.9999994200734444, iteration: 10110
loss: 1.064856767654419,grad_norm: 0.9999993143525624, iteration: 10111
loss: 1.0171024799346924,grad_norm: 0.9999994383469035, iteration: 10112
loss: 1.0188627243041992,grad_norm: 0.9999994425721641, iteration: 10113
loss: 0.9865149855613708,grad_norm: 0.9999991553318527, iteration: 10114
loss: 1.0583910942077637,grad_norm: 0.9999995908931137, iteration: 10115
loss: 1.0308195352554321,grad_norm: 0.9999997558063504, iteration: 10116
loss: 1.065609335899353,grad_norm: 0.9999996058099921, iteration: 10117
loss: 1.019989252090454,grad_norm: 0.999999323246163, iteration: 10118
loss: 1.024407982826233,grad_norm: 0.999999252609625, iteration: 10119
loss: 1.0205234289169312,grad_norm: 0.9999992399642103, iteration: 10120
loss: 1.0952801704406738,grad_norm: 0.9999992819926643, iteration: 10121
loss: 1.0124588012695312,grad_norm: 0.9999994929864451, iteration: 10122
loss: 1.0567288398742676,grad_norm: 0.9999993696268249, iteration: 10123
loss: 1.0132962465286255,grad_norm: 0.9999993252887555, iteration: 10124
loss: 1.0458098649978638,grad_norm: 0.9999994396410868, iteration: 10125
loss: 1.0722485780715942,grad_norm: 0.999999353380663, iteration: 10126
loss: 1.048317313194275,grad_norm: 0.9999993637836515, iteration: 10127
loss: 1.0372453927993774,grad_norm: 0.9999991956680951, iteration: 10128
loss: 1.0635043382644653,grad_norm: 0.9999997341412434, iteration: 10129
loss: 0.9765347838401794,grad_norm: 0.9999994271681388, iteration: 10130
loss: 1.0733751058578491,grad_norm: 0.9999994951747518, iteration: 10131
loss: 1.0489062070846558,grad_norm: 0.9999994002795471, iteration: 10132
loss: 1.03689444065094,grad_norm: 0.9999993461898788, iteration: 10133
loss: 1.0098177194595337,grad_norm: 0.999999132217729, iteration: 10134
loss: 1.0370012521743774,grad_norm: 0.9999994130131348, iteration: 10135
loss: 1.0944246053695679,grad_norm: 0.9999997205961214, iteration: 10136
loss: 0.9507696628570557,grad_norm: 0.9999991315926222, iteration: 10137
loss: 0.9920198321342468,grad_norm: 0.9999992549143604, iteration: 10138
loss: 1.0779364109039307,grad_norm: 0.999999445488441, iteration: 10139
loss: 1.022078514099121,grad_norm: 0.9999995582877975, iteration: 10140
loss: 1.0677441358566284,grad_norm: 0.9999992683101411, iteration: 10141
loss: 1.035361409187317,grad_norm: 0.9999993990307426, iteration: 10142
loss: 1.1023458242416382,grad_norm: 0.9999996159535269, iteration: 10143
loss: 1.096084475517273,grad_norm: 0.9999998559961085, iteration: 10144
loss: 1.0701731443405151,grad_norm: 0.9999993362869674, iteration: 10145
loss: 1.0597871541976929,grad_norm: 0.999999366733085, iteration: 10146
loss: 1.0531212091445923,grad_norm: 0.9999993607023878, iteration: 10147
loss: 1.0658689737319946,grad_norm: 0.9999993241116705, iteration: 10148
loss: 1.10038161277771,grad_norm: 0.9999998663257571, iteration: 10149
loss: 1.042214035987854,grad_norm: 0.9999994130045112, iteration: 10150
loss: 1.0695713758468628,grad_norm: 0.9999995497025649, iteration: 10151
loss: 1.0569682121276855,grad_norm: 0.9999992711662369, iteration: 10152
loss: 1.1014453172683716,grad_norm: 0.9999997937168286, iteration: 10153
loss: 1.0149338245391846,grad_norm: 0.9999993705348799, iteration: 10154
loss: 1.025709629058838,grad_norm: 0.9999992159504963, iteration: 10155
loss: 0.988089919090271,grad_norm: 0.9999994816664799, iteration: 10156
loss: 1.0477086305618286,grad_norm: 0.9999992766350323, iteration: 10157
loss: 1.0458863973617554,grad_norm: 0.999999252136737, iteration: 10158
loss: 1.0933724641799927,grad_norm: 0.9999994128617505, iteration: 10159
loss: 1.0106347799301147,grad_norm: 0.9999994496041256, iteration: 10160
loss: 1.0477793216705322,grad_norm: 0.9999992274296113, iteration: 10161
loss: 1.082292914390564,grad_norm: 0.9999991856476803, iteration: 10162
loss: 1.0793054103851318,grad_norm: 0.9999992887812917, iteration: 10163
loss: 1.0969927310943604,grad_norm: 0.999999741012882, iteration: 10164
loss: 1.0265148878097534,grad_norm: 0.999999140790652, iteration: 10165
loss: 1.03545081615448,grad_norm: 0.9999992286429773, iteration: 10166
loss: 1.0672249794006348,grad_norm: 0.9999992356579536, iteration: 10167
loss: 1.0494699478149414,grad_norm: 0.9999991482040032, iteration: 10168
loss: 1.0152400732040405,grad_norm: 0.9999993749700137, iteration: 10169
loss: 1.056864857673645,grad_norm: 0.9999992757968981, iteration: 10170
loss: 1.0557864904403687,grad_norm: 0.9999992810033426, iteration: 10171
loss: 1.0168691873550415,grad_norm: 0.9999992979995959, iteration: 10172
loss: 1.045665979385376,grad_norm: 0.9999993940884209, iteration: 10173
loss: 0.9869483709335327,grad_norm: 0.9999993215082276, iteration: 10174
loss: 1.0318409204483032,grad_norm: 0.9999992831473264, iteration: 10175
loss: 1.0387797355651855,grad_norm: 0.9999994138214248, iteration: 10176
loss: 1.0440303087234497,grad_norm: 0.9999995349247803, iteration: 10177
loss: 0.9789721369743347,grad_norm: 0.9999991286344863, iteration: 10178
loss: 1.0443651676177979,grad_norm: 0.9999994248024765, iteration: 10179
loss: 1.151748538017273,grad_norm: 0.9999995939136861, iteration: 10180
loss: 1.0669307708740234,grad_norm: 0.9999992951866625, iteration: 10181
loss: 1.0393450260162354,grad_norm: 0.999999292069146, iteration: 10182
loss: 1.0338624715805054,grad_norm: 0.9999991490551141, iteration: 10183
loss: 1.0433598756790161,grad_norm: 0.99999941506578, iteration: 10184
loss: 1.0500400066375732,grad_norm: 0.9999993010695046, iteration: 10185
loss: 1.0333889722824097,grad_norm: 0.9999993233367471, iteration: 10186
loss: 0.9896720051765442,grad_norm: 0.9999993757369904, iteration: 10187
loss: 0.9899489283561707,grad_norm: 0.999999250353292, iteration: 10188
loss: 1.0193253755569458,grad_norm: 0.9999992363400922, iteration: 10189
loss: 1.0467618703842163,grad_norm: 0.9999994080383455, iteration: 10190
loss: 1.0448944568634033,grad_norm: 0.9999993487690938, iteration: 10191
loss: 0.9953035712242126,grad_norm: 0.99999934783379, iteration: 10192
loss: 1.0455631017684937,grad_norm: 0.9999993724045851, iteration: 10193
loss: 1.0891247987747192,grad_norm: 0.9999993981127672, iteration: 10194
loss: 1.022580862045288,grad_norm: 0.9999994197193333, iteration: 10195
loss: 0.9720215797424316,grad_norm: 0.9999992828658921, iteration: 10196
loss: 1.0531772375106812,grad_norm: 0.9999993515014621, iteration: 10197
loss: 0.9826983213424683,grad_norm: 0.9999994359428879, iteration: 10198
loss: 1.035609245300293,grad_norm: 0.9999992009904609, iteration: 10199
loss: 1.0699658393859863,grad_norm: 0.9999994408606435, iteration: 10200
loss: 1.0800319910049438,grad_norm: 0.9999992739388203, iteration: 10201
loss: 1.011580228805542,grad_norm: 0.9999994033894997, iteration: 10202
loss: 1.0176669359207153,grad_norm: 0.9999991454316849, iteration: 10203
loss: 1.0476624965667725,grad_norm: 0.9999994498500638, iteration: 10204
loss: 0.9995355010032654,grad_norm: 0.999999130942405, iteration: 10205
loss: 1.0110670328140259,grad_norm: 0.9999992313776953, iteration: 10206
loss: 1.0428358316421509,grad_norm: 0.9999991619383036, iteration: 10207
loss: 1.0589369535446167,grad_norm: 0.9999996828877833, iteration: 10208
loss: 1.0666475296020508,grad_norm: 0.9999995254649453, iteration: 10209
loss: 1.0124400854110718,grad_norm: 0.9999991874138245, iteration: 10210
loss: 1.0345803499221802,grad_norm: 0.9999990650194114, iteration: 10211
loss: 1.0546643733978271,grad_norm: 0.9999997978148758, iteration: 10212
loss: 1.0011941194534302,grad_norm: 0.9999995441739502, iteration: 10213
loss: 1.0260872840881348,grad_norm: 0.9999992889725858, iteration: 10214
loss: 1.0330225229263306,grad_norm: 0.9999994288244564, iteration: 10215
loss: 1.0686404705047607,grad_norm: 0.9999994107379291, iteration: 10216
loss: 1.0070607662200928,grad_norm: 0.9999995822409437, iteration: 10217
loss: 1.0946540832519531,grad_norm: 0.9999994095793086, iteration: 10218
loss: 1.0470079183578491,grad_norm: 0.9999993132887036, iteration: 10219
loss: 1.0118870735168457,grad_norm: 0.9999993020841317, iteration: 10220
loss: 1.0859960317611694,grad_norm: 0.9999996290650524, iteration: 10221
loss: 1.003542184829712,grad_norm: 0.9999995290662183, iteration: 10222
loss: 1.0007777214050293,grad_norm: 0.999999476977562, iteration: 10223
loss: 1.0590882301330566,grad_norm: 0.9999994552255261, iteration: 10224
loss: 0.9803310632705688,grad_norm: 0.9999991896263039, iteration: 10225
loss: 1.033429741859436,grad_norm: 0.9999992630601023, iteration: 10226
loss: 1.0521140098571777,grad_norm: 0.9999993006020235, iteration: 10227
loss: 0.9873720407485962,grad_norm: 0.9999992502133124, iteration: 10228
loss: 0.981599748134613,grad_norm: 0.9999992111350605, iteration: 10229
loss: 1.0576997995376587,grad_norm: 0.9999993932228884, iteration: 10230
loss: 1.0295110940933228,grad_norm: 0.9999994209477979, iteration: 10231
loss: 1.0103918313980103,grad_norm: 0.9999993059929695, iteration: 10232
loss: 1.034730315208435,grad_norm: 0.9999994772876348, iteration: 10233
loss: 1.0002425909042358,grad_norm: 0.9999992968522697, iteration: 10234
loss: 1.057261347770691,grad_norm: 0.9999992059162713, iteration: 10235
loss: 1.0173219442367554,grad_norm: 0.9999992812664843, iteration: 10236
loss: 1.0483418703079224,grad_norm: 0.9999993623925173, iteration: 10237
loss: 1.0726373195648193,grad_norm: 0.999999595920859, iteration: 10238
loss: 1.0267561674118042,grad_norm: 0.9999992592058872, iteration: 10239
loss: 1.034652829170227,grad_norm: 0.9999999931382274, iteration: 10240
loss: 1.0916038751602173,grad_norm: 0.9999992567181898, iteration: 10241
loss: 0.9706913828849792,grad_norm: 0.9999992994336063, iteration: 10242
loss: 1.0993674993515015,grad_norm: 0.9999994239122572, iteration: 10243
loss: 1.0529061555862427,grad_norm: 0.9999994267122412, iteration: 10244
loss: 1.0791906118392944,grad_norm: 0.9999994144166258, iteration: 10245
loss: 1.025747537612915,grad_norm: 0.9999993113119159, iteration: 10246
loss: 1.0992108583450317,grad_norm: 0.999999379211888, iteration: 10247
loss: 1.052776575088501,grad_norm: 0.9999992516261063, iteration: 10248
loss: 1.0379040241241455,grad_norm: 0.999999326653689, iteration: 10249
loss: 1.0968376398086548,grad_norm: 0.9999995823749886, iteration: 10250
loss: 0.9820900559425354,grad_norm: 0.9999991546998022, iteration: 10251
loss: 1.0793068408966064,grad_norm: 0.9999993370895592, iteration: 10252
loss: 1.0217519998550415,grad_norm: 0.9999991093693789, iteration: 10253
loss: 1.0439817905426025,grad_norm: 0.9999992392258915, iteration: 10254
loss: 1.0369148254394531,grad_norm: 0.9999992417777118, iteration: 10255
loss: 1.0940996408462524,grad_norm: 0.9999993865139992, iteration: 10256
loss: 1.023850679397583,grad_norm: 0.9999993672613096, iteration: 10257
loss: 1.0268489122390747,grad_norm: 0.9999993097439128, iteration: 10258
loss: 1.061887502670288,grad_norm: 0.9999995512513921, iteration: 10259
loss: 1.0055222511291504,grad_norm: 0.9999992756700069, iteration: 10260
loss: 1.0561842918395996,grad_norm: 0.9999991588713906, iteration: 10261
loss: 0.9830538034439087,grad_norm: 0.9999992954857647, iteration: 10262
loss: 1.0361891984939575,grad_norm: 0.9999992186150637, iteration: 10263
loss: 1.054176926612854,grad_norm: 0.9999996014858777, iteration: 10264
loss: 0.9939525723457336,grad_norm: 0.9999994777095976, iteration: 10265
loss: 0.9853262305259705,grad_norm: 0.999999249894717, iteration: 10266
loss: 0.9513787627220154,grad_norm: 0.9999992092487305, iteration: 10267
loss: 1.005003809928894,grad_norm: 0.9999992853968643, iteration: 10268
loss: 1.046513319015503,grad_norm: 0.9999993058810478, iteration: 10269
loss: 1.1065841913223267,grad_norm: 0.9999997385434366, iteration: 10270
loss: 1.071488857269287,grad_norm: 0.9999991222246366, iteration: 10271
loss: 1.0615673065185547,grad_norm: 0.9999994268168902, iteration: 10272
loss: 0.9887421131134033,grad_norm: 0.9999993047636971, iteration: 10273
loss: 1.0586506128311157,grad_norm: 0.999999335812247, iteration: 10274
loss: 1.0611510276794434,grad_norm: 0.9999993488095599, iteration: 10275
loss: 1.0460216999053955,grad_norm: 0.9999993762856396, iteration: 10276
loss: 1.0478578805923462,grad_norm: 0.9999993659153347, iteration: 10277
loss: 1.039094090461731,grad_norm: 0.9999997501915062, iteration: 10278
loss: 1.028602123260498,grad_norm: 0.9999992333775066, iteration: 10279
loss: 1.0573019981384277,grad_norm: 0.9999992997858463, iteration: 10280
loss: 1.105185627937317,grad_norm: 0.999999656011547, iteration: 10281
loss: 1.0506401062011719,grad_norm: 0.9999995323652912, iteration: 10282
loss: 1.0459071397781372,grad_norm: 0.9999994292368812, iteration: 10283
loss: 1.0531047582626343,grad_norm: 0.9999995317093284, iteration: 10284
loss: 1.0514954328536987,grad_norm: 0.9999994498327535, iteration: 10285
loss: 1.029484748840332,grad_norm: 0.9999992885547987, iteration: 10286
loss: 1.076024055480957,grad_norm: 0.9999995290325546, iteration: 10287
loss: 1.0454025268554688,grad_norm: 0.9999992496171137, iteration: 10288
loss: 1.045261025428772,grad_norm: 0.9999994453505957, iteration: 10289
loss: 1.0761536359786987,grad_norm: 0.9999997284185078, iteration: 10290
loss: 0.9861559271812439,grad_norm: 0.9999993620394535, iteration: 10291
loss: 1.0719720125198364,grad_norm: 0.9999993763454832, iteration: 10292
loss: 1.0260529518127441,grad_norm: 0.9999993200820837, iteration: 10293
loss: 1.018364429473877,grad_norm: 0.9999996183090526, iteration: 10294
loss: 1.015128493309021,grad_norm: 0.9999993043963391, iteration: 10295
loss: 1.0555747747421265,grad_norm: 0.9999995293665019, iteration: 10296
loss: 1.1026474237442017,grad_norm: 0.9999993888463636, iteration: 10297
loss: 1.0722981691360474,grad_norm: 0.9999994541304401, iteration: 10298
loss: 1.0916508436203003,grad_norm: 0.9999996000971683, iteration: 10299
loss: 1.07486093044281,grad_norm: 0.9999994652429506, iteration: 10300
loss: 1.0136140584945679,grad_norm: 0.9999991429294772, iteration: 10301
loss: 1.0479600429534912,grad_norm: 0.9999995256297116, iteration: 10302
loss: 1.0679799318313599,grad_norm: 0.9999994473291408, iteration: 10303
loss: 1.069881796836853,grad_norm: 0.9999991602372255, iteration: 10304
loss: 1.0367199182510376,grad_norm: 0.9999997413735402, iteration: 10305
loss: 1.028898000717163,grad_norm: 0.9999993067579368, iteration: 10306
loss: 1.0209436416625977,grad_norm: 0.9999991411345879, iteration: 10307
loss: 1.0480681657791138,grad_norm: 0.9999996331784193, iteration: 10308
loss: 1.0539450645446777,grad_norm: 0.9999993429811979, iteration: 10309
loss: 1.0306663513183594,grad_norm: 0.9999993784726097, iteration: 10310
loss: 1.05962073802948,grad_norm: 0.9999991943143332, iteration: 10311
loss: 1.0402401685714722,grad_norm: 0.9999991427378031, iteration: 10312
loss: 1.0386751890182495,grad_norm: 0.999999533803754, iteration: 10313
loss: 1.0258293151855469,grad_norm: 0.999999347017354, iteration: 10314
loss: 1.0642704963684082,grad_norm: 0.9999995517698412, iteration: 10315
loss: 0.9960854053497314,grad_norm: 0.9999990973570733, iteration: 10316
loss: 1.036405324935913,grad_norm: 0.9999993960088712, iteration: 10317
loss: 1.0485882759094238,grad_norm: 0.9999992767985464, iteration: 10318
loss: 1.0472222566604614,grad_norm: 0.9999994054422192, iteration: 10319
loss: 0.9985641837120056,grad_norm: 0.9999993482205255, iteration: 10320
loss: 1.0294400453567505,grad_norm: 0.9999993502679704, iteration: 10321
loss: 1.0416321754455566,grad_norm: 0.9999992852225719, iteration: 10322
loss: 1.0033621788024902,grad_norm: 0.9999993521064567, iteration: 10323
loss: 0.9845892786979675,grad_norm: 0.99999919585975, iteration: 10324
loss: 1.0253465175628662,grad_norm: 0.9999992704576856, iteration: 10325
loss: 1.025043249130249,grad_norm: 0.9999995668790558, iteration: 10326
loss: 1.1042284965515137,grad_norm: 0.9999995927359573, iteration: 10327
loss: 0.991811990737915,grad_norm: 0.9999996402641548, iteration: 10328
loss: 1.0508641004562378,grad_norm: 0.9999993179318308, iteration: 10329
loss: 1.0265676975250244,grad_norm: 0.9999993743318277, iteration: 10330
loss: 1.0145976543426514,grad_norm: 0.9999992985102851, iteration: 10331
loss: 1.050559163093567,grad_norm: 0.999999448482303, iteration: 10332
loss: 0.9726254343986511,grad_norm: 0.999999173795158, iteration: 10333
loss: 1.0594922304153442,grad_norm: 0.9999993678464345, iteration: 10334
loss: 1.0304286479949951,grad_norm: 0.9999992260850313, iteration: 10335
loss: 1.0797450542449951,grad_norm: 0.9999996238410965, iteration: 10336
loss: 1.060577630996704,grad_norm: 0.9999995517251892, iteration: 10337
loss: 1.043138861656189,grad_norm: 0.9999992611278596, iteration: 10338
loss: 1.0527188777923584,grad_norm: 0.999999289461209, iteration: 10339
loss: 0.953702986240387,grad_norm: 0.9999993865790878, iteration: 10340
loss: 1.0382835865020752,grad_norm: 0.999999206210928, iteration: 10341
loss: 1.1048554182052612,grad_norm: 0.9999993841150167, iteration: 10342
loss: 1.0568091869354248,grad_norm: 0.9999993117230764, iteration: 10343
loss: 1.0835689306259155,grad_norm: 0.9999994673980528, iteration: 10344
loss: 1.0038368701934814,grad_norm: 0.9999996912760931, iteration: 10345
loss: 1.0701556205749512,grad_norm: 0.9999995742336788, iteration: 10346
loss: 0.992217481136322,grad_norm: 0.9999992506119678, iteration: 10347
loss: 1.0745465755462646,grad_norm: 0.9999998316127131, iteration: 10348
loss: 1.0013507604599,grad_norm: 0.9999993363037512, iteration: 10349
loss: 1.0839016437530518,grad_norm: 0.9999992567512103, iteration: 10350
loss: 1.0681073665618896,grad_norm: 0.9999992914726822, iteration: 10351
loss: 1.006270170211792,grad_norm: 0.9999992875606291, iteration: 10352
loss: 1.0776485204696655,grad_norm: 0.9999995495830099, iteration: 10353
loss: 1.1119728088378906,grad_norm: 0.9999997607952991, iteration: 10354
loss: 1.0019720792770386,grad_norm: 0.9999993808202826, iteration: 10355
loss: 1.0521090030670166,grad_norm: 0.9999992127826142, iteration: 10356
loss: 1.0707100629806519,grad_norm: 0.9999995863937476, iteration: 10357
loss: 1.0898703336715698,grad_norm: 0.9999991970207375, iteration: 10358
loss: 1.083598256111145,grad_norm: 0.9999995481010416, iteration: 10359
loss: 1.0220520496368408,grad_norm: 0.9999991028402788, iteration: 10360
loss: 1.038321852684021,grad_norm: 0.9999994103611742, iteration: 10361
loss: 1.021318793296814,grad_norm: 0.999999482220062, iteration: 10362
loss: 1.0523933172225952,grad_norm: 0.9999992560369951, iteration: 10363
loss: 1.0270168781280518,grad_norm: 0.9999992893892532, iteration: 10364
loss: 1.0815247297286987,grad_norm: 0.9999995955340563, iteration: 10365
loss: 1.053792119026184,grad_norm: 0.9999991147986006, iteration: 10366
loss: 1.0343824625015259,grad_norm: 0.9999991004891176, iteration: 10367
loss: 1.0028626918792725,grad_norm: 0.9999992523950082, iteration: 10368
loss: 1.0316803455352783,grad_norm: 0.9999994664719695, iteration: 10369
loss: 1.0553722381591797,grad_norm: 0.9999991973407575, iteration: 10370
loss: 1.09709894657135,grad_norm: 0.9999996270448194, iteration: 10371
loss: 1.0175837278366089,grad_norm: 0.9999992641162645, iteration: 10372
loss: 1.0633381605148315,grad_norm: 0.9999994513730582, iteration: 10373
loss: 1.0062397718429565,grad_norm: 0.9999992421570497, iteration: 10374
loss: 1.0548467636108398,grad_norm: 0.9999993144494292, iteration: 10375
loss: 1.0123571157455444,grad_norm: 0.9999996893774176, iteration: 10376
loss: 1.0161356925964355,grad_norm: 0.9999994299438798, iteration: 10377
loss: 1.0027629137039185,grad_norm: 0.9999993047506317, iteration: 10378
loss: 1.0461673736572266,grad_norm: 0.999999237746626, iteration: 10379
loss: 1.0794395208358765,grad_norm: 0.9999992016129249, iteration: 10380
loss: 1.0276391506195068,grad_norm: 0.9999994391563941, iteration: 10381
loss: 1.0376664400100708,grad_norm: 0.9999994966599894, iteration: 10382
loss: 1.051654577255249,grad_norm: 0.9999996397324252, iteration: 10383
loss: 1.101258635520935,grad_norm: 0.9999996467018377, iteration: 10384
loss: 1.0327285528182983,grad_norm: 0.9999991887325114, iteration: 10385
loss: 0.9901292324066162,grad_norm: 0.9999990564465951, iteration: 10386
loss: 1.043103814125061,grad_norm: 0.9999995117469945, iteration: 10387
loss: 1.054766058921814,grad_norm: 0.999999290582621, iteration: 10388
loss: 1.0954115390777588,grad_norm: 0.9999992255149518, iteration: 10389
loss: 1.0164172649383545,grad_norm: 0.9999992319452075, iteration: 10390
loss: 0.9950172901153564,grad_norm: 0.9999993379749338, iteration: 10391
loss: 1.0422903299331665,grad_norm: 0.9999995000131389, iteration: 10392
loss: 1.0592060089111328,grad_norm: 0.9999994065595553, iteration: 10393
loss: 1.034815788269043,grad_norm: 0.9999993792842625, iteration: 10394
loss: 1.0901435613632202,grad_norm: 0.9999993538783045, iteration: 10395
loss: 1.0264346599578857,grad_norm: 0.9999991783774578, iteration: 10396
loss: 1.0273900032043457,grad_norm: 0.9999993634387305, iteration: 10397
loss: 1.0575181245803833,grad_norm: 0.9999996184532228, iteration: 10398
loss: 1.0328949689865112,grad_norm: 0.9999992286427933, iteration: 10399
loss: 1.13164222240448,grad_norm: 0.9999997593995587, iteration: 10400
loss: 1.035157322883606,grad_norm: 0.9999993648925771, iteration: 10401
loss: 1.041890263557434,grad_norm: 0.99999947574251, iteration: 10402
loss: 1.0146265029907227,grad_norm: 0.9999992296923927, iteration: 10403
loss: 1.0069918632507324,grad_norm: 0.9999992213918024, iteration: 10404
loss: 1.033750057220459,grad_norm: 0.9999991972019774, iteration: 10405
loss: 1.035576343536377,grad_norm: 0.9999994472355075, iteration: 10406
loss: 1.0403242111206055,grad_norm: 0.9999994802927351, iteration: 10407
loss: 1.0848454236984253,grad_norm: 0.9999993601942577, iteration: 10408
loss: 1.0890172719955444,grad_norm: 0.9999992723564451, iteration: 10409
loss: 1.0273088216781616,grad_norm: 0.9999992534462936, iteration: 10410
loss: 1.0292387008666992,grad_norm: 0.9999991341343846, iteration: 10411
loss: 1.0588324069976807,grad_norm: 0.9999993049070359, iteration: 10412
loss: 1.0745238065719604,grad_norm: 0.9999993131889415, iteration: 10413
loss: 1.0797652006149292,grad_norm: 0.9999992005605416, iteration: 10414
loss: 1.0283044576644897,grad_norm: 0.9999991968541421, iteration: 10415
loss: 1.0994013547897339,grad_norm: 0.9999994084153446, iteration: 10416
loss: 1.042244553565979,grad_norm: 0.999999241719417, iteration: 10417
loss: 1.0013102293014526,grad_norm: 0.9999992168884482, iteration: 10418
loss: 1.0397007465362549,grad_norm: 0.9999992305301579, iteration: 10419
loss: 1.0294666290283203,grad_norm: 0.9999994142014732, iteration: 10420
loss: 1.0547993183135986,grad_norm: 0.9999994769834567, iteration: 10421
loss: 1.0631121397018433,grad_norm: 0.9999995014000709, iteration: 10422
loss: 1.0557385683059692,grad_norm: 0.9999992734215337, iteration: 10423
loss: 1.0293971300125122,grad_norm: 0.9999996528014565, iteration: 10424
loss: 1.058570384979248,grad_norm: 0.9999992726074135, iteration: 10425
loss: 1.0331016778945923,grad_norm: 0.9999998547432088, iteration: 10426
loss: 1.0330655574798584,grad_norm: 0.99999932906144, iteration: 10427
loss: 1.0505954027175903,grad_norm: 0.999999406154241, iteration: 10428
loss: 1.0184025764465332,grad_norm: 0.9999993660237457, iteration: 10429
loss: 1.0633113384246826,grad_norm: 0.9999996422173867, iteration: 10430
loss: 1.054281234741211,grad_norm: 0.9999993153755471, iteration: 10431
loss: 1.0657199621200562,grad_norm: 0.9999995803581188, iteration: 10432
loss: 1.0625572204589844,grad_norm: 0.9999995598579345, iteration: 10433
loss: 1.0475006103515625,grad_norm: 0.9999994934236986, iteration: 10434
loss: 1.0340330600738525,grad_norm: 0.999999171280287, iteration: 10435
loss: 0.999153733253479,grad_norm: 0.9999995054259403, iteration: 10436
loss: 1.0347721576690674,grad_norm: 0.9999993313879627, iteration: 10437
loss: 1.020841360092163,grad_norm: 0.9999995643393911, iteration: 10438
loss: 1.0820866823196411,grad_norm: 0.9999994991880151, iteration: 10439
loss: 1.0656778812408447,grad_norm: 0.9999991411844806, iteration: 10440
loss: 1.0474470853805542,grad_norm: 0.9999991850956327, iteration: 10441
loss: 1.016097903251648,grad_norm: 0.9999991906795984, iteration: 10442
loss: 1.0263252258300781,grad_norm: 0.9999994033265094, iteration: 10443
loss: 1.0690264701843262,grad_norm: 0.9999995295801611, iteration: 10444
loss: 0.9913001656532288,grad_norm: 0.9999991205262448, iteration: 10445
loss: 1.0040518045425415,grad_norm: 0.9999991416262822, iteration: 10446
loss: 1.004345417022705,grad_norm: 0.9999993097678218, iteration: 10447
loss: 1.042596697807312,grad_norm: 0.9999992113144008, iteration: 10448
loss: 1.0351121425628662,grad_norm: 0.9999993250383736, iteration: 10449
loss: 1.0171482563018799,grad_norm: 0.9999992261773619, iteration: 10450
loss: 1.0814306735992432,grad_norm: 0.9999995828697724, iteration: 10451
loss: 1.0351109504699707,grad_norm: 0.9999992881134853, iteration: 10452
loss: 1.0249109268188477,grad_norm: 0.9999993838667142, iteration: 10453
loss: 1.0361849069595337,grad_norm: 0.9999997339069618, iteration: 10454
loss: 1.024366021156311,grad_norm: 0.999999395289489, iteration: 10455
loss: 1.038011074066162,grad_norm: 0.999999356259302, iteration: 10456
loss: 1.0341051816940308,grad_norm: 0.9999994273753032, iteration: 10457
loss: 1.0651586055755615,grad_norm: 0.9999992776889852, iteration: 10458
loss: 1.0262547731399536,grad_norm: 0.9999992239724677, iteration: 10459
loss: 1.044798493385315,grad_norm: 0.9999993960092236, iteration: 10460
loss: 1.020948886871338,grad_norm: 0.9999996659385949, iteration: 10461
loss: 1.026415467262268,grad_norm: 0.9999994330439155, iteration: 10462
loss: 1.0420639514923096,grad_norm: 0.9999994783320333, iteration: 10463
loss: 1.0023818016052246,grad_norm: 0.9999995489009037, iteration: 10464
loss: 1.0344321727752686,grad_norm: 0.9999994294155256, iteration: 10465
loss: 1.0883452892303467,grad_norm: 0.9999997237230568, iteration: 10466
loss: 1.058382511138916,grad_norm: 0.9999990542731236, iteration: 10467
loss: 1.0334721803665161,grad_norm: 0.999999318070553, iteration: 10468
loss: 1.0238590240478516,grad_norm: 0.9999994337293945, iteration: 10469
loss: 1.077128291130066,grad_norm: 0.9999995750770214, iteration: 10470
loss: 1.0486407279968262,grad_norm: 0.9999994101675864, iteration: 10471
loss: 1.0044329166412354,grad_norm: 0.9999993150967595, iteration: 10472
loss: 1.0177024602890015,grad_norm: 0.9999992878799717, iteration: 10473
loss: 1.0787731409072876,grad_norm: 0.9999993337405166, iteration: 10474
loss: 1.0968424081802368,grad_norm: 0.9999998127836511, iteration: 10475
loss: 1.09330415725708,grad_norm: 0.9999992055448073, iteration: 10476
loss: 1.0472952127456665,grad_norm: 0.9999993643456397, iteration: 10477
loss: 1.0953989028930664,grad_norm: 0.999999683570531, iteration: 10478
loss: 1.0094867944717407,grad_norm: 0.9999992799883729, iteration: 10479
loss: 1.0388857126235962,grad_norm: 0.9999992646943379, iteration: 10480
loss: 1.089948296546936,grad_norm: 0.9999992373047988, iteration: 10481
loss: 1.0253727436065674,grad_norm: 0.9999991046980299, iteration: 10482
loss: 1.0530680418014526,grad_norm: 0.9999995596081146, iteration: 10483
loss: 1.0498019456863403,grad_norm: 0.9999992908174461, iteration: 10484
loss: 0.97828209400177,grad_norm: 0.999999267934296, iteration: 10485
loss: 1.0319486856460571,grad_norm: 0.9999992806472742, iteration: 10486
loss: 1.069062352180481,grad_norm: 0.9999994385558368, iteration: 10487
loss: 0.9582515954971313,grad_norm: 0.999999080370119, iteration: 10488
loss: 1.0457404851913452,grad_norm: 0.9999993135643025, iteration: 10489
loss: 1.013183832168579,grad_norm: 0.9999994151776903, iteration: 10490
loss: 0.9791498184204102,grad_norm: 0.9999992345641476, iteration: 10491
loss: 0.9803502559661865,grad_norm: 0.9999992901921835, iteration: 10492
loss: 1.011173963546753,grad_norm: 0.9999991090524785, iteration: 10493
loss: 1.0304890871047974,grad_norm: 0.9999992298657825, iteration: 10494
loss: 0.9855300188064575,grad_norm: 0.9999993911421694, iteration: 10495
loss: 1.0090786218643188,grad_norm: 0.9999994065907453, iteration: 10496
loss: 1.0361480712890625,grad_norm: 0.9999993652946305, iteration: 10497
loss: 1.0153758525848389,grad_norm: 0.9999993036436261, iteration: 10498
loss: 1.0212312936782837,grad_norm: 0.9999992668610956, iteration: 10499
loss: 1.0679521560668945,grad_norm: 0.9999993679864969, iteration: 10500
loss: 1.0539751052856445,grad_norm: 0.9999992438322298, iteration: 10501
loss: 1.084121823310852,grad_norm: 0.9999995089023361, iteration: 10502
loss: 1.068835735321045,grad_norm: 0.9999992677867019, iteration: 10503
loss: 1.0230984687805176,grad_norm: 0.9999993936076218, iteration: 10504
loss: 1.030100703239441,grad_norm: 0.9999992221321609, iteration: 10505
loss: 1.0101507902145386,grad_norm: 0.9999993337948665, iteration: 10506
loss: 1.002210259437561,grad_norm: 0.9999995695216234, iteration: 10507
loss: 1.0778634548187256,grad_norm: 0.9999993992364921, iteration: 10508
loss: 1.0350698232650757,grad_norm: 0.9999992535674835, iteration: 10509
loss: 1.0349806547164917,grad_norm: 0.9999992647601289, iteration: 10510
loss: 0.960351288318634,grad_norm: 0.9999993500175771, iteration: 10511
loss: 1.0494511127471924,grad_norm: 0.9999992862208213, iteration: 10512
loss: 1.0219202041625977,grad_norm: 0.9999992785696357, iteration: 10513
loss: 1.064044713973999,grad_norm: 0.9999993587872763, iteration: 10514
loss: 1.044877290725708,grad_norm: 0.9999994409350219, iteration: 10515
loss: 1.0636355876922607,grad_norm: 0.9999994837425139, iteration: 10516
loss: 1.0550167560577393,grad_norm: 0.9999994209536159, iteration: 10517
loss: 1.013254165649414,grad_norm: 0.9999993077924305, iteration: 10518
loss: 1.026314377784729,grad_norm: 0.9999995019786425, iteration: 10519
loss: 1.0628210306167603,grad_norm: 0.9999993681738086, iteration: 10520
loss: 1.0170906782150269,grad_norm: 0.9999993197450542, iteration: 10521
loss: 1.0215197801589966,grad_norm: 0.9999992965680541, iteration: 10522
loss: 0.9787907600402832,grad_norm: 0.9999993761015572, iteration: 10523
loss: 1.1008597612380981,grad_norm: 0.9999995035510237, iteration: 10524
loss: 1.0274840593338013,grad_norm: 0.9999992113367571, iteration: 10525
loss: 1.0289251804351807,grad_norm: 0.9999995418803315, iteration: 10526
loss: 1.0262584686279297,grad_norm: 0.9999992668443486, iteration: 10527
loss: 1.0533561706542969,grad_norm: 0.9999996758996461, iteration: 10528
loss: 1.0302900075912476,grad_norm: 0.9999991704664183, iteration: 10529
loss: 1.03821861743927,grad_norm: 0.9999993468102508, iteration: 10530
loss: 1.003344178199768,grad_norm: 0.9999992085504631, iteration: 10531
loss: 1.013837218284607,grad_norm: 0.9999994553527458, iteration: 10532
loss: 1.050192952156067,grad_norm: 0.9999992398637624, iteration: 10533
loss: 1.0643521547317505,grad_norm: 0.9999991221126244, iteration: 10534
loss: 1.0012013912200928,grad_norm: 0.9999991830528441, iteration: 10535
loss: 0.9906278848648071,grad_norm: 0.9999994889212969, iteration: 10536
loss: 0.9813980460166931,grad_norm: 0.9999992042411894, iteration: 10537
loss: 1.045356035232544,grad_norm: 0.9999992551470521, iteration: 10538
loss: 0.9822751879692078,grad_norm: 0.9999991861099857, iteration: 10539
loss: 1.0905559062957764,grad_norm: 0.9999995031581573, iteration: 10540
loss: 1.0389541387557983,grad_norm: 0.9999993392089397, iteration: 10541
loss: 1.0637602806091309,grad_norm: 0.9999993765400109, iteration: 10542
loss: 1.0085105895996094,grad_norm: 0.9999994312159364, iteration: 10543
loss: 1.0538893938064575,grad_norm: 0.9999991496075831, iteration: 10544
loss: 1.0040092468261719,grad_norm: 0.9999996362865631, iteration: 10545
loss: 1.0468376874923706,grad_norm: 0.9999990953773967, iteration: 10546
loss: 1.012921690940857,grad_norm: 0.999999164278644, iteration: 10547
loss: 1.0592762231826782,grad_norm: 0.9999993661536594, iteration: 10548
loss: 1.0207679271697998,grad_norm: 0.9999993062765866, iteration: 10549
loss: 0.9793429374694824,grad_norm: 0.9999991559006213, iteration: 10550
loss: 1.0559581518173218,grad_norm: 0.9999994500747287, iteration: 10551
loss: 1.0569300651550293,grad_norm: 0.9999996856245759, iteration: 10552
loss: 1.0704418420791626,grad_norm: 0.9999992838264278, iteration: 10553
loss: 1.043379783630371,grad_norm: 0.9999994049677228, iteration: 10554
loss: 1.0597758293151855,grad_norm: 0.9999992255798343, iteration: 10555
loss: 1.0683246850967407,grad_norm: 0.9999993427239583, iteration: 10556
loss: 1.0492643117904663,grad_norm: 0.9999994262200754, iteration: 10557
loss: 1.0195746421813965,grad_norm: 0.9999995189118916, iteration: 10558
loss: 1.0276278257369995,grad_norm: 0.9999992124915141, iteration: 10559
loss: 1.0446230173110962,grad_norm: 0.9999993755174369, iteration: 10560
loss: 1.081445574760437,grad_norm: 0.9671046691289247, iteration: 10561
loss: 1.071565866470337,grad_norm: 0.999999171018454, iteration: 10562
loss: 1.0679421424865723,grad_norm: 0.9999992432280355, iteration: 10563
loss: 1.1300092935562134,grad_norm: 0.9999997413258145, iteration: 10564
loss: 1.0493448972702026,grad_norm: 0.9999997701309242, iteration: 10565
loss: 1.020675778388977,grad_norm: 0.9999991302347853, iteration: 10566
loss: 1.0568513870239258,grad_norm: 0.9999991466408618, iteration: 10567
loss: 1.0734244585037231,grad_norm: 0.999999240848732, iteration: 10568
loss: 1.0561660528182983,grad_norm: 0.9999993596622598, iteration: 10569
loss: 1.0877537727355957,grad_norm: 0.9999993663198921, iteration: 10570
loss: 1.0211774110794067,grad_norm: 0.9999991466787245, iteration: 10571
loss: 1.027814269065857,grad_norm: 0.9999993046996278, iteration: 10572
loss: 1.0363078117370605,grad_norm: 0.9999992735236938, iteration: 10573
loss: 0.9912298917770386,grad_norm: 0.9999992820462053, iteration: 10574
loss: 1.0487574338912964,grad_norm: 0.9999992342776214, iteration: 10575
loss: 1.0182138681411743,grad_norm: 0.9999993014151656, iteration: 10576
loss: 1.0527608394622803,grad_norm: 0.9999998220229734, iteration: 10577
loss: 1.0355241298675537,grad_norm: 0.9999992419641086, iteration: 10578
loss: 1.0620251893997192,grad_norm: 0.9999992938889231, iteration: 10579
loss: 1.0558415651321411,grad_norm: 0.9999991337357346, iteration: 10580
loss: 1.0402857065200806,grad_norm: 0.9999990572433137, iteration: 10581
loss: 1.0431461334228516,grad_norm: 0.9999993691375895, iteration: 10582
loss: 1.0762909650802612,grad_norm: 0.9999993148808549, iteration: 10583
loss: 1.0722500085830688,grad_norm: 0.9999995203922619, iteration: 10584
loss: 1.0427426099777222,grad_norm: 0.9999994971083529, iteration: 10585
loss: 1.0645591020584106,grad_norm: 0.9999997929288202, iteration: 10586
loss: 1.0679301023483276,grad_norm: 0.9999991607603065, iteration: 10587
loss: 1.0915879011154175,grad_norm: 0.9999994232203745, iteration: 10588
loss: 1.079311728477478,grad_norm: 0.99999929657627, iteration: 10589
loss: 1.0260565280914307,grad_norm: 0.9999997117717679, iteration: 10590
loss: 1.0504010915756226,grad_norm: 0.999999271283804, iteration: 10591
loss: 1.0084165334701538,grad_norm: 0.9999992909280458, iteration: 10592
loss: 1.0554718971252441,grad_norm: 0.9999996930427459, iteration: 10593
loss: 1.0404926538467407,grad_norm: 0.9999994792208489, iteration: 10594
loss: 1.0853832960128784,grad_norm: 0.9999995739681999, iteration: 10595
loss: 1.029166579246521,grad_norm: 0.9999993409948774, iteration: 10596
loss: 1.1031101942062378,grad_norm: 0.9999996491258792, iteration: 10597
loss: 1.0203135013580322,grad_norm: 0.9999996140567528, iteration: 10598
loss: 1.0811917781829834,grad_norm: 0.9999996343813291, iteration: 10599
loss: 1.0294791460037231,grad_norm: 0.999999482373375, iteration: 10600
loss: 1.03177809715271,grad_norm: 0.9999994124228326, iteration: 10601
loss: 1.0341193675994873,grad_norm: 0.9999992624248812, iteration: 10602
loss: 1.0346518754959106,grad_norm: 0.9999994251784119, iteration: 10603
loss: 1.0550215244293213,grad_norm: 0.9999996859862517, iteration: 10604
loss: 1.0363852977752686,grad_norm: 0.9999995899282917, iteration: 10605
loss: 1.0314043760299683,grad_norm: 0.9999995005278292, iteration: 10606
loss: 1.0619096755981445,grad_norm: 0.9999994404573386, iteration: 10607
loss: 1.0478992462158203,grad_norm: 0.9999994661718552, iteration: 10608
loss: 1.0289928913116455,grad_norm: 0.9999992502495229, iteration: 10609
loss: 1.0217304229736328,grad_norm: 0.9999998033597816, iteration: 10610
loss: 0.9947487115859985,grad_norm: 0.999999190968371, iteration: 10611
loss: 1.0799888372421265,grad_norm: 0.9999996828496667, iteration: 10612
loss: 1.0408700704574585,grad_norm: 0.9999993684332432, iteration: 10613
loss: 1.0872689485549927,grad_norm: 0.9999995366226633, iteration: 10614
loss: 1.0599459409713745,grad_norm: 0.9999994176996264, iteration: 10615
loss: 1.0407423973083496,grad_norm: 0.9999996238423362, iteration: 10616
loss: 1.0491315126419067,grad_norm: 0.9999996568691542, iteration: 10617
loss: 1.0317939519882202,grad_norm: 0.9999992947903434, iteration: 10618
loss: 1.0692059993743896,grad_norm: 0.999999250124223, iteration: 10619
loss: 1.0565121173858643,grad_norm: 0.9999993160914061, iteration: 10620
loss: 1.0488433837890625,grad_norm: 0.9999996777944307, iteration: 10621
loss: 1.0586841106414795,grad_norm: 0.9999995177608625, iteration: 10622
loss: 1.0187808275222778,grad_norm: 0.9999993455045785, iteration: 10623
loss: 1.0318583250045776,grad_norm: 0.999999533680979, iteration: 10624
loss: 1.0428651571273804,grad_norm: 0.999999591724198, iteration: 10625
loss: 0.9906096458435059,grad_norm: 0.9999992443046138, iteration: 10626
loss: 1.029025673866272,grad_norm: 0.999999685514793, iteration: 10627
loss: 1.0439878702163696,grad_norm: 0.9999992891894754, iteration: 10628
loss: 1.0367134809494019,grad_norm: 0.9999992169925637, iteration: 10629
loss: 0.9581292271614075,grad_norm: 0.9999993307436387, iteration: 10630
loss: 1.024928092956543,grad_norm: 0.9999990261630709, iteration: 10631
loss: 1.055658221244812,grad_norm: 0.9999993252861276, iteration: 10632
loss: 1.004517912864685,grad_norm: 0.99999926678848, iteration: 10633
loss: 1.0408439636230469,grad_norm: 0.9999994056077278, iteration: 10634
loss: 1.0117626190185547,grad_norm: 0.9999994911351344, iteration: 10635
loss: 1.0383862257003784,grad_norm: 0.9999997838344583, iteration: 10636
loss: 1.0265549421310425,grad_norm: 0.9999994234049219, iteration: 10637
loss: 1.0884569883346558,grad_norm: 0.9999996137083068, iteration: 10638
loss: 1.0541143417358398,grad_norm: 0.9999993340224222, iteration: 10639
loss: 0.9863066673278809,grad_norm: 0.9999994572117384, iteration: 10640
loss: 1.0122628211975098,grad_norm: 0.9999993531282102, iteration: 10641
loss: 1.0604325532913208,grad_norm: 0.9999992140961527, iteration: 10642
loss: 0.9965640306472778,grad_norm: 0.9999992609673395, iteration: 10643
loss: 1.026265263557434,grad_norm: 0.9999996467855912, iteration: 10644
loss: 1.0426336526870728,grad_norm: 0.9999994692138332, iteration: 10645
loss: 1.0626057386398315,grad_norm: 0.9999991856285094, iteration: 10646
loss: 1.0335768461227417,grad_norm: 0.999999374679107, iteration: 10647
loss: 0.9904937744140625,grad_norm: 0.999999294662513, iteration: 10648
loss: 1.0375735759735107,grad_norm: 0.9999991528001212, iteration: 10649
loss: 1.0437276363372803,grad_norm: 0.9999994832256786, iteration: 10650
loss: 1.0022341012954712,grad_norm: 0.9999994064955574, iteration: 10651
loss: 1.0472904443740845,grad_norm: 0.9999993794662848, iteration: 10652
loss: 1.0559277534484863,grad_norm: 0.9999992116225966, iteration: 10653
loss: 1.0131516456604004,grad_norm: 0.9999994689079439, iteration: 10654
loss: 1.0599101781845093,grad_norm: 0.9999995055396969, iteration: 10655
loss: 1.069453477859497,grad_norm: 0.999999528301551, iteration: 10656
loss: 1.0729382038116455,grad_norm: 0.9999994295998254, iteration: 10657
loss: 1.0676189661026,grad_norm: 0.9999995129586549, iteration: 10658
loss: 1.0061675310134888,grad_norm: 0.9999992315095538, iteration: 10659
loss: 1.0268932580947876,grad_norm: 0.9999993530596432, iteration: 10660
loss: 1.1076644659042358,grad_norm: 0.999999707748587, iteration: 10661
loss: 1.0876997709274292,grad_norm: 0.9999992788922851, iteration: 10662
loss: 1.0528172254562378,grad_norm: 0.9999991115180735, iteration: 10663
loss: 1.0593271255493164,grad_norm: 0.9999992919223832, iteration: 10664
loss: 1.0698453187942505,grad_norm: 0.9999998472389474, iteration: 10665
loss: 1.062197208404541,grad_norm: 0.9999997272510831, iteration: 10666
loss: 1.0369700193405151,grad_norm: 0.9999993541053196, iteration: 10667
loss: 1.0259602069854736,grad_norm: 0.9999992769889896, iteration: 10668
loss: 0.9970512390136719,grad_norm: 0.9999992262264812, iteration: 10669
loss: 1.01365065574646,grad_norm: 0.9999990285817101, iteration: 10670
loss: 1.0278455018997192,grad_norm: 0.9999993051448492, iteration: 10671
loss: 1.0403380393981934,grad_norm: 0.9999991759935625, iteration: 10672
loss: 1.0824297666549683,grad_norm: 0.9999993274117592, iteration: 10673
loss: 1.0398385524749756,grad_norm: 0.9999990500936738, iteration: 10674
loss: 1.0514906644821167,grad_norm: 0.9999991592928402, iteration: 10675
loss: 1.023027777671814,grad_norm: 0.9999992265771718, iteration: 10676
loss: 1.0035675764083862,grad_norm: 0.9999992682873369, iteration: 10677
loss: 1.003527283668518,grad_norm: 0.999999120724722, iteration: 10678
loss: 1.0338727235794067,grad_norm: 0.9999993970234172, iteration: 10679
loss: 1.0669198036193848,grad_norm: 0.9999993416237478, iteration: 10680
loss: 1.0176650285720825,grad_norm: 0.9999991231703488, iteration: 10681
loss: 1.0509510040283203,grad_norm: 0.9999997235028151, iteration: 10682
loss: 1.058813452720642,grad_norm: 0.9999993592577823, iteration: 10683
loss: 1.0378824472427368,grad_norm: 0.9999994321385919, iteration: 10684
loss: 0.9929255843162537,grad_norm: 0.9999992008135408, iteration: 10685
loss: 1.0854198932647705,grad_norm: 0.9999994727543552, iteration: 10686
loss: 1.0381826162338257,grad_norm: 0.9999994872529214, iteration: 10687
loss: 1.0127816200256348,grad_norm: 0.9999994541625844, iteration: 10688
loss: 1.0543651580810547,grad_norm: 0.9999992731971667, iteration: 10689
loss: 1.046741247177124,grad_norm: 0.9999993691751314, iteration: 10690
loss: 1.0632543563842773,grad_norm: 0.9999991184157564, iteration: 10691
loss: 1.0467721223831177,grad_norm: 0.9999993303401002, iteration: 10692
loss: 1.0515390634536743,grad_norm: 0.9999992170475026, iteration: 10693
loss: 1.0423610210418701,grad_norm: 0.9999993861887302, iteration: 10694
loss: 1.003377079963684,grad_norm: 0.9999992869922234, iteration: 10695
loss: 1.0545157194137573,grad_norm: 0.9999994139807302, iteration: 10696
loss: 1.0073412656784058,grad_norm: 0.999999267085644, iteration: 10697
loss: 1.0029253959655762,grad_norm: 0.9999991344819789, iteration: 10698
loss: 1.0470918416976929,grad_norm: 0.9999992304000783, iteration: 10699
loss: 1.0481853485107422,grad_norm: 0.9999996364469302, iteration: 10700
loss: 1.0166375637054443,grad_norm: 0.9999992776651696, iteration: 10701
loss: 1.0176289081573486,grad_norm: 0.9999992530068768, iteration: 10702
loss: 1.0298928022384644,grad_norm: 0.999999250552155, iteration: 10703
loss: 1.0678383111953735,grad_norm: 0.9999996191507983, iteration: 10704
loss: 1.0462778806686401,grad_norm: 0.99999916453692, iteration: 10705
loss: 1.038641333580017,grad_norm: 0.9999993157642346, iteration: 10706
loss: 1.0060136318206787,grad_norm: 0.9999993084202403, iteration: 10707
loss: 1.052639365196228,grad_norm: 0.9999992410382312, iteration: 10708
loss: 0.957167387008667,grad_norm: 0.9999991361147998, iteration: 10709
loss: 1.0338658094406128,grad_norm: 0.9999991496095085, iteration: 10710
loss: 1.0414814949035645,grad_norm: 0.9999994563231733, iteration: 10711
loss: 0.9970114827156067,grad_norm: 0.9999992243950329, iteration: 10712
loss: 1.0629841089248657,grad_norm: 0.9999993575960783, iteration: 10713
loss: 1.0266789197921753,grad_norm: 0.9999992555733522, iteration: 10714
loss: 1.033047080039978,grad_norm: 0.9999990825613251, iteration: 10715
loss: 1.0615663528442383,grad_norm: 0.9999993144330461, iteration: 10716
loss: 1.006767988204956,grad_norm: 0.9999990976128861, iteration: 10717
loss: 1.0225213766098022,grad_norm: 0.9999991812470205, iteration: 10718
loss: 1.0051990747451782,grad_norm: 0.9999994839867173, iteration: 10719
loss: 1.003638744354248,grad_norm: 0.999999190834257, iteration: 10720
loss: 1.04645836353302,grad_norm: 0.9999992330461002, iteration: 10721
loss: 1.054980754852295,grad_norm: 0.9999992007786593, iteration: 10722
loss: 1.019456148147583,grad_norm: 0.9999994444294058, iteration: 10723
loss: 1.0365004539489746,grad_norm: 0.9999993133437148, iteration: 10724
loss: 1.0123506784439087,grad_norm: 0.9999996011546978, iteration: 10725
loss: 1.013744831085205,grad_norm: 0.9999993161359154, iteration: 10726
loss: 1.0194785594940186,grad_norm: 0.9999994127970314, iteration: 10727
loss: 1.0282710790634155,grad_norm: 0.9999992333436498, iteration: 10728
loss: 1.0725589990615845,grad_norm: 0.9999990966166907, iteration: 10729
loss: 1.0530245304107666,grad_norm: 0.9999996319508698, iteration: 10730
loss: 1.0127394199371338,grad_norm: 0.9999992229538985, iteration: 10731
loss: 1.0559720993041992,grad_norm: 0.9999993013557266, iteration: 10732
loss: 1.0118602514266968,grad_norm: 0.999999354852806, iteration: 10733
loss: 1.022210955619812,grad_norm: 0.9999993008414427, iteration: 10734
loss: 0.9920475482940674,grad_norm: 0.999999216459681, iteration: 10735
loss: 1.075912356376648,grad_norm: 0.9999997659879535, iteration: 10736
loss: 1.0577558279037476,grad_norm: 0.9999992200732132, iteration: 10737
loss: 1.0007500648498535,grad_norm: 0.9999992233798025, iteration: 10738
loss: 1.0296800136566162,grad_norm: 0.9999992538401792, iteration: 10739
loss: 1.073819875717163,grad_norm: 0.9999994627149223, iteration: 10740
loss: 1.0003376007080078,grad_norm: 0.999999492808312, iteration: 10741
loss: 1.0374630689620972,grad_norm: 0.9999992739007957, iteration: 10742
loss: 1.0433385372161865,grad_norm: 0.9999994015683985, iteration: 10743
loss: 1.0131112337112427,grad_norm: 0.9999993230699691, iteration: 10744
loss: 1.0420883893966675,grad_norm: 0.9999994030413872, iteration: 10745
loss: 1.0472899675369263,grad_norm: 0.9999995770278198, iteration: 10746
loss: 1.01435387134552,grad_norm: 0.9999993280009287, iteration: 10747
loss: 0.9895819425582886,grad_norm: 0.9999992862595024, iteration: 10748
loss: 1.0263303518295288,grad_norm: 0.9999992885086196, iteration: 10749
loss: 1.0308477878570557,grad_norm: 0.9999991553344215, iteration: 10750
loss: 1.0441322326660156,grad_norm: 0.9999992128145722, iteration: 10751
loss: 1.012616753578186,grad_norm: 0.9999993094570507, iteration: 10752
loss: 1.1005831956863403,grad_norm: 0.9999994677299283, iteration: 10753
loss: 1.0475132465362549,grad_norm: 0.9999995589958501, iteration: 10754
loss: 1.0432910919189453,grad_norm: 0.9999994745810269, iteration: 10755
loss: 1.0747321844100952,grad_norm: 0.9999995324839229, iteration: 10756
loss: 1.0265507698059082,grad_norm: 0.9999995812329258, iteration: 10757
loss: 0.9927395582199097,grad_norm: 0.9999992612603027, iteration: 10758
loss: 1.0284936428070068,grad_norm: 0.9999993176391273, iteration: 10759
loss: 1.0586241483688354,grad_norm: 0.9999996628977333, iteration: 10760
loss: 1.0572985410690308,grad_norm: 0.999999277099527, iteration: 10761
loss: 0.9772183299064636,grad_norm: 0.9999992597270534, iteration: 10762
loss: 1.0237644910812378,grad_norm: 0.9999992466065732, iteration: 10763
loss: 1.0580058097839355,grad_norm: 0.9999994320355509, iteration: 10764
loss: 1.1072673797607422,grad_norm: 0.9999996244548912, iteration: 10765
loss: 1.0197203159332275,grad_norm: 0.9999993302907948, iteration: 10766
loss: 1.0282820463180542,grad_norm: 0.9999991801998431, iteration: 10767
loss: 1.0415433645248413,grad_norm: 0.9999992250844226, iteration: 10768
loss: 1.0826071500778198,grad_norm: 0.9999995755958089, iteration: 10769
loss: 1.061446189880371,grad_norm: 0.9999992170765105, iteration: 10770
loss: 1.0267733335494995,grad_norm: 0.9999993768663644, iteration: 10771
loss: 1.0346472263336182,grad_norm: 0.9999995276518248, iteration: 10772
loss: 1.0439554452896118,grad_norm: 0.9999991760926863, iteration: 10773
loss: 1.037316918373108,grad_norm: 0.9999993208294654, iteration: 10774
loss: 1.0260272026062012,grad_norm: 0.9999991995813506, iteration: 10775
loss: 1.055451512336731,grad_norm: 0.9999996517141453, iteration: 10776
loss: 1.0597093105316162,grad_norm: 0.9999992416022166, iteration: 10777
loss: 1.0494130849838257,grad_norm: 0.9999992761891037, iteration: 10778
loss: 1.0295482873916626,grad_norm: 0.9999992542733166, iteration: 10779
loss: 1.0722986459732056,grad_norm: 0.9999993275808273, iteration: 10780
loss: 0.9864798188209534,grad_norm: 0.9999993248317635, iteration: 10781
loss: 0.9952530860900879,grad_norm: 0.9999992096652414, iteration: 10782
loss: 1.0537636280059814,grad_norm: 0.9999992606917161, iteration: 10783
loss: 1.0496206283569336,grad_norm: 0.9999994014633864, iteration: 10784
loss: 1.0088132619857788,grad_norm: 0.9999994402963943, iteration: 10785
loss: 1.044222116470337,grad_norm: 0.9999993037910528, iteration: 10786
loss: 1.0005968809127808,grad_norm: 0.9999995068845636, iteration: 10787
loss: 1.0355626344680786,grad_norm: 0.9999994058464445, iteration: 10788
loss: 1.0510797500610352,grad_norm: 0.9999993754927606, iteration: 10789
loss: 1.042351245880127,grad_norm: 0.9999994304778235, iteration: 10790
loss: 1.0765635967254639,grad_norm: 0.9999994714678546, iteration: 10791
loss: 1.009925365447998,grad_norm: 0.9999992614851857, iteration: 10792
loss: 1.0206234455108643,grad_norm: 0.9999992749361286, iteration: 10793
loss: 1.019456148147583,grad_norm: 0.9999994978211972, iteration: 10794
loss: 1.0310138463974,grad_norm: 0.9999994379935453, iteration: 10795
loss: 1.05524480342865,grad_norm: 0.9999993303337028, iteration: 10796
loss: 1.0250276327133179,grad_norm: 0.9999992696751807, iteration: 10797
loss: 1.042255163192749,grad_norm: 0.9999992237476433, iteration: 10798
loss: 1.0385557413101196,grad_norm: 0.9999993097454912, iteration: 10799
loss: 1.0318772792816162,grad_norm: 0.9999990995990341, iteration: 10800
loss: 1.0427312850952148,grad_norm: 0.9999994307311928, iteration: 10801
loss: 1.0385946035385132,grad_norm: 0.9999995575635133, iteration: 10802
loss: 1.0428276062011719,grad_norm: 0.9999992653471013, iteration: 10803
loss: 1.0700275897979736,grad_norm: 0.9999994148566772, iteration: 10804
loss: 1.0478123426437378,grad_norm: 0.9999992964736286, iteration: 10805
loss: 1.001120686531067,grad_norm: 0.9999992240369573, iteration: 10806
loss: 1.0406248569488525,grad_norm: 0.9999996127922481, iteration: 10807
loss: 1.0064177513122559,grad_norm: 0.9999990351696799, iteration: 10808
loss: 1.0131372213363647,grad_norm: 0.9999991187652615, iteration: 10809
loss: 0.9961128830909729,grad_norm: 0.9999992280557303, iteration: 10810
loss: 1.1027954816818237,grad_norm: 0.999999467709826, iteration: 10811
loss: 1.0323666334152222,grad_norm: 0.9999993201104793, iteration: 10812
loss: 1.0306521654129028,grad_norm: 0.9999993130163304, iteration: 10813
loss: 1.0324225425720215,grad_norm: 0.9999992230174026, iteration: 10814
loss: 0.9762186408042908,grad_norm: 0.9999994850947453, iteration: 10815
loss: 1.0766581296920776,grad_norm: 0.999999599103979, iteration: 10816
loss: 1.052046298980713,grad_norm: 0.9999991850746797, iteration: 10817
loss: 1.0225334167480469,grad_norm: 0.9999993168147419, iteration: 10818
loss: 1.0052707195281982,grad_norm: 0.999999226284265, iteration: 10819
loss: 1.036118984222412,grad_norm: 0.9999992575388118, iteration: 10820
loss: 1.0650379657745361,grad_norm: 0.9999992178392175, iteration: 10821
loss: 1.0836671590805054,grad_norm: 0.9999992208081515, iteration: 10822
loss: 1.081480622291565,grad_norm: 0.9999992933886578, iteration: 10823
loss: 0.9941807985305786,grad_norm: 0.9999992866369373, iteration: 10824
loss: 1.0260529518127441,grad_norm: 0.9999992222811418, iteration: 10825
loss: 1.0572537183761597,grad_norm: 0.9999995312370497, iteration: 10826
loss: 1.0775479078292847,grad_norm: 0.9999995321128328, iteration: 10827
loss: 1.034422516822815,grad_norm: 0.9999992803867997, iteration: 10828
loss: 0.9998853802680969,grad_norm: 0.9925853794816059, iteration: 10829
loss: 1.050065279006958,grad_norm: 0.9999991900248079, iteration: 10830
loss: 0.9731845855712891,grad_norm: 0.9999993130220196, iteration: 10831
loss: 1.0020835399627686,grad_norm: 0.9999992760821227, iteration: 10832
loss: 1.0251741409301758,grad_norm: 0.9999997721450027, iteration: 10833
loss: 1.056408166885376,grad_norm: 0.9999993851852128, iteration: 10834
loss: 1.0452653169631958,grad_norm: 0.9999995601438615, iteration: 10835
loss: 1.028867483139038,grad_norm: 0.9999991985762023, iteration: 10836
loss: 1.0092720985412598,grad_norm: 0.9999992516711179, iteration: 10837
loss: 1.0580837726593018,grad_norm: 0.99999936929098, iteration: 10838
loss: 1.0470887422561646,grad_norm: 0.9999991764917344, iteration: 10839
loss: 1.0070691108703613,grad_norm: 0.9999993100842506, iteration: 10840
loss: 0.9777864217758179,grad_norm: 0.9999991759165964, iteration: 10841
loss: 1.0025895833969116,grad_norm: 0.9999998012016096, iteration: 10842
loss: 1.0516338348388672,grad_norm: 0.9999990865082076, iteration: 10843
loss: 1.0550775527954102,grad_norm: 0.9999995695120728, iteration: 10844
loss: 1.0708565711975098,grad_norm: 0.9999996194423318, iteration: 10845
loss: 1.0135674476623535,grad_norm: 0.9999992576076113, iteration: 10846
loss: 1.0333970785140991,grad_norm: 0.999999715557422, iteration: 10847
loss: 1.0429154634475708,grad_norm: 0.9999992591627496, iteration: 10848
loss: 1.014709234237671,grad_norm: 0.999999273154363, iteration: 10849
loss: 1.0058205127716064,grad_norm: 0.9999992715695529, iteration: 10850
loss: 0.987616777420044,grad_norm: 0.9999990977852556, iteration: 10851
loss: 1.0509324073791504,grad_norm: 0.9999994232067511, iteration: 10852
loss: 1.0223857164382935,grad_norm: 0.9999996013700979, iteration: 10853
loss: 1.0437572002410889,grad_norm: 0.9999992722208022, iteration: 10854
loss: 0.9986389875411987,grad_norm: 0.9999991311430542, iteration: 10855
loss: 1.0283088684082031,grad_norm: 0.999999378235677, iteration: 10856
loss: 0.9880968928337097,grad_norm: 0.9999991653624013, iteration: 10857
loss: 1.0475108623504639,grad_norm: 0.9999993265747769, iteration: 10858
loss: 1.057093620300293,grad_norm: 0.99999930274985, iteration: 10859
loss: 0.9900389909744263,grad_norm: 0.9999992036662025, iteration: 10860
loss: 1.0594816207885742,grad_norm: 0.999999354172068, iteration: 10861
loss: 1.0591325759887695,grad_norm: 0.999999185570745, iteration: 10862
loss: 1.0449252128601074,grad_norm: 0.999999751279854, iteration: 10863
loss: 1.0429754257202148,grad_norm: 0.9999992228418656, iteration: 10864
loss: 1.0219743251800537,grad_norm: 0.9999993786900183, iteration: 10865
loss: 1.0868362188339233,grad_norm: 0.9999995495074743, iteration: 10866
loss: 1.0332921743392944,grad_norm: 0.9999993628372084, iteration: 10867
loss: 1.022253394126892,grad_norm: 0.9999991498239723, iteration: 10868
loss: 1.0978491306304932,grad_norm: 0.9999995171460766, iteration: 10869
loss: 1.0850226879119873,grad_norm: 0.9999995220808914, iteration: 10870
loss: 1.0921522378921509,grad_norm: 0.9999997067881953, iteration: 10871
loss: 1.0383007526397705,grad_norm: 0.9999991103535615, iteration: 10872
loss: 1.1102361679077148,grad_norm: 0.9999995413659741, iteration: 10873
loss: 1.000659704208374,grad_norm: 0.999999314678841, iteration: 10874
loss: 1.0265980958938599,grad_norm: 0.9999994732294295, iteration: 10875
loss: 1.0353174209594727,grad_norm: 0.9999991215432275, iteration: 10876
loss: 0.9949032664299011,grad_norm: 0.9999992173052977, iteration: 10877
loss: 1.040087103843689,grad_norm: 0.9999992329564565, iteration: 10878
loss: 1.0233548879623413,grad_norm: 0.9999992111003824, iteration: 10879
loss: 1.0650928020477295,grad_norm: 0.999999318141033, iteration: 10880
loss: 1.0558890104293823,grad_norm: 0.9999998200913253, iteration: 10881
loss: 1.0537307262420654,grad_norm: 0.9999992134850336, iteration: 10882
loss: 1.025689959526062,grad_norm: 0.9999993441086731, iteration: 10883
loss: 1.0340956449508667,grad_norm: 0.9999994697902793, iteration: 10884
loss: 1.0081301927566528,grad_norm: 0.9999989860406114, iteration: 10885
loss: 1.0648256540298462,grad_norm: 0.9999990399585722, iteration: 10886
loss: 1.0498523712158203,grad_norm: 0.999999770752263, iteration: 10887
loss: 1.0121402740478516,grad_norm: 0.9999992810013575, iteration: 10888
loss: 0.9946945905685425,grad_norm: 0.9999994688694862, iteration: 10889
loss: 1.0178327560424805,grad_norm: 0.9999990954646532, iteration: 10890
loss: 1.0776546001434326,grad_norm: 0.9999998070874215, iteration: 10891
loss: 1.026239275932312,grad_norm: 0.9999992116035846, iteration: 10892
loss: 0.9978825449943542,grad_norm: 0.9999991744074959, iteration: 10893
loss: 1.018297791481018,grad_norm: 0.9999995823409226, iteration: 10894
loss: 1.0306562185287476,grad_norm: 0.9999993175057356, iteration: 10895
loss: 1.0323129892349243,grad_norm: 0.9999991809270528, iteration: 10896
loss: 1.1906529664993286,grad_norm: 0.9999996336174589, iteration: 10897
loss: 1.0160425901412964,grad_norm: 0.9999993165264806, iteration: 10898
loss: 1.05635666847229,grad_norm: 0.9999992680465368, iteration: 10899
loss: 1.0592279434204102,grad_norm: 0.9999994493994921, iteration: 10900
loss: 1.0300809144973755,grad_norm: 0.9999994196366562, iteration: 10901
loss: 1.0271731615066528,grad_norm: 0.9999991112714636, iteration: 10902
loss: 1.0416710376739502,grad_norm: 0.9999994830657918, iteration: 10903
loss: 1.0351099967956543,grad_norm: 0.9999994539353336, iteration: 10904
loss: 1.0337541103363037,grad_norm: 0.9999991030907519, iteration: 10905
loss: 1.0523557662963867,grad_norm: 0.9999994773083561, iteration: 10906
loss: 1.0916881561279297,grad_norm: 0.9999995948774221, iteration: 10907
loss: 0.9949597716331482,grad_norm: 0.999999376340631, iteration: 10908
loss: 1.041684627532959,grad_norm: 0.9999992759095493, iteration: 10909
loss: 1.0141605138778687,grad_norm: 0.9999991670360281, iteration: 10910
loss: 1.0530869960784912,grad_norm: 0.9999991421087134, iteration: 10911
loss: 1.0428849458694458,grad_norm: 0.9999991835252627, iteration: 10912
loss: 1.0134602785110474,grad_norm: 0.9999992986000108, iteration: 10913
loss: 1.1111774444580078,grad_norm: 0.9999996395112905, iteration: 10914
loss: 1.0586780309677124,grad_norm: 0.9999996135650444, iteration: 10915
loss: 1.0753579139709473,grad_norm: 0.9999994687188308, iteration: 10916
loss: 1.1308544874191284,grad_norm: 0.9999994615147918, iteration: 10917
loss: 1.0684075355529785,grad_norm: 0.999999148805357, iteration: 10918
loss: 0.9794563055038452,grad_norm: 0.9999992203978498, iteration: 10919
loss: 1.030430555343628,grad_norm: 0.9999992422513357, iteration: 10920
loss: 1.0597479343414307,grad_norm: 0.9999993843642896, iteration: 10921
loss: 1.068495512008667,grad_norm: 0.999999244225411, iteration: 10922
loss: 1.0544109344482422,grad_norm: 0.999999686067236, iteration: 10923
loss: 1.0536675453186035,grad_norm: 0.9999994127432777, iteration: 10924
loss: 1.044481635093689,grad_norm: 0.9999996902817094, iteration: 10925
loss: 1.0723649263381958,grad_norm: 0.99999956910464, iteration: 10926
loss: 1.0192283391952515,grad_norm: 0.9999994337995965, iteration: 10927
loss: 1.0439039468765259,grad_norm: 0.9999994492212794, iteration: 10928
loss: 1.0915576219558716,grad_norm: 0.9999993399505196, iteration: 10929
loss: 1.0561822652816772,grad_norm: 0.999999378490154, iteration: 10930
loss: 1.111680030822754,grad_norm: 0.999999502243794, iteration: 10931
loss: 1.0406451225280762,grad_norm: 0.9999992196607345, iteration: 10932
loss: 1.0201280117034912,grad_norm: 0.999999253244987, iteration: 10933
loss: 1.0662546157836914,grad_norm: 0.9999992276580497, iteration: 10934
loss: 1.0102952718734741,grad_norm: 0.9999994233728711, iteration: 10935
loss: 1.0248422622680664,grad_norm: 0.9999993182914243, iteration: 10936
loss: 1.0671436786651611,grad_norm: 0.9999993827825427, iteration: 10937
loss: 1.0732624530792236,grad_norm: 0.999999432623317, iteration: 10938
loss: 1.0718649625778198,grad_norm: 0.9999993074600987, iteration: 10939
loss: 1.0511646270751953,grad_norm: 0.9999991492455158, iteration: 10940
loss: 1.0274732112884521,grad_norm: 0.9999994020248653, iteration: 10941
loss: 1.02057683467865,grad_norm: 0.9999994319280474, iteration: 10942
loss: 1.0828512907028198,grad_norm: 0.999999729175405, iteration: 10943
loss: 1.0195902585983276,grad_norm: 0.9999992603501144, iteration: 10944
loss: 1.0266391038894653,grad_norm: 0.9999995296829223, iteration: 10945
loss: 1.0323452949523926,grad_norm: 0.9999991046948445, iteration: 10946
loss: 1.0088709592819214,grad_norm: 0.9999993034333774, iteration: 10947
loss: 1.0127387046813965,grad_norm: 0.9999991593695458, iteration: 10948
loss: 1.0551090240478516,grad_norm: 0.9999992020106054, iteration: 10949
loss: 1.084135890007019,grad_norm: 0.9999994678364924, iteration: 10950
loss: 1.0125207901000977,grad_norm: 0.9999993117308508, iteration: 10951
loss: 1.0626978874206543,grad_norm: 0.9999994910548088, iteration: 10952
loss: 1.0797147750854492,grad_norm: 0.999999097008287, iteration: 10953
loss: 1.0360347032546997,grad_norm: 0.9999994305068012, iteration: 10954
loss: 1.0129095315933228,grad_norm: 0.999999314275811, iteration: 10955
loss: 1.0523062944412231,grad_norm: 0.9999994854680628, iteration: 10956
loss: 1.0384002923965454,grad_norm: 0.9999992775375185, iteration: 10957
loss: 1.0382050275802612,grad_norm: 0.9999991026443698, iteration: 10958
loss: 0.998625636100769,grad_norm: 0.9999991418143309, iteration: 10959
loss: 1.0376466512680054,grad_norm: 0.9999991716903623, iteration: 10960
loss: 1.0517499446868896,grad_norm: 0.999999297132766, iteration: 10961
loss: 1.0138640403747559,grad_norm: 0.9999997986830436, iteration: 10962
loss: 1.0277830362319946,grad_norm: 0.9999994896664675, iteration: 10963
loss: 1.0516929626464844,grad_norm: 0.9999994479312151, iteration: 10964
loss: 1.0680127143859863,grad_norm: 0.9999992880611394, iteration: 10965
loss: 1.0253041982650757,grad_norm: 0.9999992453310766, iteration: 10966
loss: 1.0489407777786255,grad_norm: 0.9999994451704185, iteration: 10967
loss: 0.9913815855979919,grad_norm: 0.9999994812038645, iteration: 10968
loss: 1.004575252532959,grad_norm: 0.9999990811032082, iteration: 10969
loss: 1.0043429136276245,grad_norm: 0.9999991830321643, iteration: 10970
loss: 0.99379962682724,grad_norm: 0.9999991763869388, iteration: 10971
loss: 1.0136330127716064,grad_norm: 0.9999990856571948, iteration: 10972
loss: 0.9728801846504211,grad_norm: 0.9999992837305098, iteration: 10973
loss: 1.0427618026733398,grad_norm: 0.9999992371047733, iteration: 10974
loss: 1.097363829612732,grad_norm: 0.9999993828056462, iteration: 10975
loss: 1.0253695249557495,grad_norm: 0.9999992372217216, iteration: 10976
loss: 1.082053542137146,grad_norm: 0.999999384367011, iteration: 10977
loss: 1.0619229078292847,grad_norm: 0.999999261503638, iteration: 10978
loss: 1.069953203201294,grad_norm: 0.9999994133174788, iteration: 10979
loss: 1.0509694814682007,grad_norm: 0.9999994506454145, iteration: 10980
loss: 1.0506927967071533,grad_norm: 0.999999390122056, iteration: 10981
loss: 1.029924988746643,grad_norm: 0.9999998818120626, iteration: 10982
loss: 1.00615656375885,grad_norm: 0.99999932079954, iteration: 10983
loss: 1.0115443468093872,grad_norm: 0.9999991846363195, iteration: 10984
loss: 1.0252550840377808,grad_norm: 0.9999991422427451, iteration: 10985
loss: 0.9748342633247375,grad_norm: 0.9999992322024692, iteration: 10986
loss: 1.0864489078521729,grad_norm: 0.9999994115409028, iteration: 10987
loss: 1.0045995712280273,grad_norm: 0.9999992013897001, iteration: 10988
loss: 1.0402354001998901,grad_norm: 0.9999993124254798, iteration: 10989
loss: 1.00967276096344,grad_norm: 0.9999996113967595, iteration: 10990
loss: 1.0464162826538086,grad_norm: 0.9999994666109415, iteration: 10991
loss: 1.0714024305343628,grad_norm: 0.9999994191012561, iteration: 10992
loss: 1.0734633207321167,grad_norm: 0.9999991555393197, iteration: 10993
loss: 1.1020554304122925,grad_norm: 0.9999994525125625, iteration: 10994
loss: 1.0345107316970825,grad_norm: 0.9999993171448835, iteration: 10995
loss: 1.0498255491256714,grad_norm: 0.9999993839261614, iteration: 10996
loss: 1.044238805770874,grad_norm: 0.9999991721284895, iteration: 10997
loss: 1.0482257604599,grad_norm: 0.9999993247435691, iteration: 10998
loss: 1.0681830644607544,grad_norm: 0.9999993521604792, iteration: 10999
loss: 1.0521479845046997,grad_norm: 0.9999997940379542, iteration: 11000
loss: 1.0945038795471191,grad_norm: 0.9999995235025386, iteration: 11001
loss: 1.0181913375854492,grad_norm: 0.9999995861561142, iteration: 11002
loss: 1.0533735752105713,grad_norm: 0.9999994345293319, iteration: 11003
loss: 1.048112154006958,grad_norm: 0.9999992007814609, iteration: 11004
loss: 1.0573090314865112,grad_norm: 0.9999998235596086, iteration: 11005
loss: 1.047899603843689,grad_norm: 0.9999994949883922, iteration: 11006
loss: 1.0422428846359253,grad_norm: 0.9999995069560814, iteration: 11007
loss: 1.0864070653915405,grad_norm: 0.9999994084436995, iteration: 11008
loss: 1.0189478397369385,grad_norm: 0.9999994669267633, iteration: 11009
loss: 1.0469369888305664,grad_norm: 0.999999047329711, iteration: 11010
loss: 1.0253911018371582,grad_norm: 0.9999991496308096, iteration: 11011
loss: 1.0212937593460083,grad_norm: 0.9999991938883042, iteration: 11012
loss: 1.0700159072875977,grad_norm: 0.9999992390527882, iteration: 11013
loss: 0.992103636264801,grad_norm: 0.9999992715199238, iteration: 11014
loss: 0.9969629645347595,grad_norm: 0.9999992616603902, iteration: 11015
loss: 1.0756111145019531,grad_norm: 0.9999995817889518, iteration: 11016
loss: 1.0472686290740967,grad_norm: 0.9999991777597792, iteration: 11017
loss: 1.0695890188217163,grad_norm: 0.9999993375106915, iteration: 11018
loss: 1.0324504375457764,grad_norm: 0.9999991952466867, iteration: 11019
loss: 1.0957070589065552,grad_norm: 0.9999995655024454, iteration: 11020
loss: 1.0621379613876343,grad_norm: 0.9999993406760599, iteration: 11021
loss: 1.007494330406189,grad_norm: 0.999999346431437, iteration: 11022
loss: 1.0656450986862183,grad_norm: 0.9999994807773004, iteration: 11023
loss: 1.0669200420379639,grad_norm: 0.9999994124349847, iteration: 11024
loss: 1.1069250106811523,grad_norm: 0.9999996644619068, iteration: 11025
loss: 1.0532387495040894,grad_norm: 0.9999992349313058, iteration: 11026
loss: 1.0300476551055908,grad_norm: 0.9999994129555241, iteration: 11027
loss: 1.0819933414459229,grad_norm: 0.9999991359626452, iteration: 11028
loss: 1.028078317642212,grad_norm: 0.9999994026090361, iteration: 11029
loss: 1.0430785417556763,grad_norm: 0.9999991034335367, iteration: 11030
loss: 1.0133802890777588,grad_norm: 0.9999992806463736, iteration: 11031
loss: 1.0659887790679932,grad_norm: 0.9999997621141654, iteration: 11032
loss: 1.0074607133865356,grad_norm: 0.9999992195197688, iteration: 11033
loss: 1.0418907403945923,grad_norm: 0.999999343951216, iteration: 11034
loss: 1.0254597663879395,grad_norm: 0.9999992502988215, iteration: 11035
loss: 1.0329999923706055,grad_norm: 0.9999991088922672, iteration: 11036
loss: 1.0382121801376343,grad_norm: 0.9999994574730787, iteration: 11037
loss: 0.9723801016807556,grad_norm: 0.9999992290599351, iteration: 11038
loss: 1.0475318431854248,grad_norm: 0.9999993140547715, iteration: 11039
loss: 1.0618451833724976,grad_norm: 0.9999993131255239, iteration: 11040
loss: 1.0231322050094604,grad_norm: 0.9999991492455307, iteration: 11041
loss: 1.051312804222107,grad_norm: 0.9999994426141727, iteration: 11042
loss: 0.9932284951210022,grad_norm: 0.9999995632673809, iteration: 11043
loss: 0.9861588478088379,grad_norm: 0.9999991054714152, iteration: 11044
loss: 1.044359564781189,grad_norm: 0.9999996636600287, iteration: 11045
loss: 1.0100187063217163,grad_norm: 0.9999990948213613, iteration: 11046
loss: 1.032927393913269,grad_norm: 0.9999996867734473, iteration: 11047
loss: 1.0209254026412964,grad_norm: 0.9999993470450359, iteration: 11048
loss: 1.0252267122268677,grad_norm: 0.9999993402513195, iteration: 11049
loss: 1.0666714906692505,grad_norm: 0.9999995096664487, iteration: 11050
loss: 1.0530465841293335,grad_norm: 0.9999991813936793, iteration: 11051
loss: 1.039414405822754,grad_norm: 0.9999991640118288, iteration: 11052
loss: 1.1077518463134766,grad_norm: 0.9999997596649522, iteration: 11053
loss: 1.0099200010299683,grad_norm: 0.9999991881054229, iteration: 11054
loss: 1.045652985572815,grad_norm: 0.9999991973857889, iteration: 11055
loss: 1.0343314409255981,grad_norm: 0.9999992723767994, iteration: 11056
loss: 1.010388731956482,grad_norm: 0.9999991953728619, iteration: 11057
loss: 1.0628987550735474,grad_norm: 0.9999992259701324, iteration: 11058
loss: 1.0493980646133423,grad_norm: 0.9999992277942851, iteration: 11059
loss: 1.0255608558654785,grad_norm: 0.9999993886370118, iteration: 11060
loss: 1.0488837957382202,grad_norm: 0.9999993084722637, iteration: 11061
loss: 1.0299566984176636,grad_norm: 0.9999993430724543, iteration: 11062
loss: 1.0335084199905396,grad_norm: 0.9999992075828502, iteration: 11063
loss: 1.1034988164901733,grad_norm: 0.9999994653547823, iteration: 11064
loss: 1.0537933111190796,grad_norm: 0.9999993597941998, iteration: 11065
loss: 1.0524615049362183,grad_norm: 0.9999993364008144, iteration: 11066
loss: 1.011623501777649,grad_norm: 0.9999994358748141, iteration: 11067
loss: 1.0153077840805054,grad_norm: 0.9999992641252385, iteration: 11068
loss: 1.0418437719345093,grad_norm: 0.9999992568027022, iteration: 11069
loss: 1.0613294839859009,grad_norm: 0.9999992094614387, iteration: 11070
loss: 1.0158486366271973,grad_norm: 0.9999994736365654, iteration: 11071
loss: 1.0180937051773071,grad_norm: 0.999999054854259, iteration: 11072
loss: 1.021260142326355,grad_norm: 0.9999991035258501, iteration: 11073
loss: 1.0109074115753174,grad_norm: 0.9999993137884198, iteration: 11074
loss: 1.0384517908096313,grad_norm: 0.9999991499835025, iteration: 11075
loss: 1.0280568599700928,grad_norm: 0.9999993132457161, iteration: 11076
loss: 0.982150673866272,grad_norm: 0.9999992839787657, iteration: 11077
loss: 1.0489007234573364,grad_norm: 0.9619773451091636, iteration: 11078
loss: 1.0376907587051392,grad_norm: 0.999999130444314, iteration: 11079
loss: 1.034246802330017,grad_norm: 0.9999991070018136, iteration: 11080
loss: 1.0132747888565063,grad_norm: 0.9999994588125439, iteration: 11081
loss: 1.0671463012695312,grad_norm: 0.9771283382076315, iteration: 11082
loss: 1.0589799880981445,grad_norm: 0.9999993066039048, iteration: 11083
loss: 0.9988880753517151,grad_norm: 0.9999995890698006, iteration: 11084
loss: 0.9819595813751221,grad_norm: 0.999999316994296, iteration: 11085
loss: 1.0618879795074463,grad_norm: 0.9999995696520391, iteration: 11086
loss: 1.0016028881072998,grad_norm: 0.9999992364104208, iteration: 11087
loss: 1.0668286085128784,grad_norm: 0.9999996175532835, iteration: 11088
loss: 0.9931678175926208,grad_norm: 0.9999991391644678, iteration: 11089
loss: 1.0506398677825928,grad_norm: 0.9999995774846323, iteration: 11090
loss: 1.03084397315979,grad_norm: 0.9999994058048, iteration: 11091
loss: 1.047019362449646,grad_norm: 0.9999995829312034, iteration: 11092
loss: 1.0551658868789673,grad_norm: 0.9999993395099189, iteration: 11093
loss: 1.0382566452026367,grad_norm: 0.9999992346230957, iteration: 11094
loss: 1.0164573192596436,grad_norm: 0.9999992431534015, iteration: 11095
loss: 0.9734459519386292,grad_norm: 0.9999992364582763, iteration: 11096
loss: 1.0365865230560303,grad_norm: 0.9999992627458338, iteration: 11097
loss: 1.0274903774261475,grad_norm: 0.9999991867503855, iteration: 11098
loss: 1.0313059091567993,grad_norm: 0.9999992406316834, iteration: 11099
loss: 0.9852260947227478,grad_norm: 0.9999991166333697, iteration: 11100
loss: 1.0364916324615479,grad_norm: 0.9999991366015936, iteration: 11101
loss: 1.0421558618545532,grad_norm: 0.9999993086179917, iteration: 11102
loss: 1.032256841659546,grad_norm: 0.9999994023236186, iteration: 11103
loss: 0.999312162399292,grad_norm: 0.9999992192001362, iteration: 11104
loss: 1.0237301588058472,grad_norm: 0.9999990953626605, iteration: 11105
loss: 1.1791940927505493,grad_norm: 0.9999997782771413, iteration: 11106
loss: 1.0662213563919067,grad_norm: 0.9999996825985651, iteration: 11107
loss: 1.0339174270629883,grad_norm: 0.9999990975281893, iteration: 11108
loss: 1.036788821220398,grad_norm: 0.999999242165268, iteration: 11109
loss: 0.9920995831489563,grad_norm: 0.9999992297750769, iteration: 11110
loss: 1.082133173942566,grad_norm: 0.9999995465510791, iteration: 11111
loss: 1.0437710285186768,grad_norm: 0.9999992743841717, iteration: 11112
loss: 1.031495213508606,grad_norm: 0.9999993362808892, iteration: 11113
loss: 1.0172603130340576,grad_norm: 0.9999993499839027, iteration: 11114
loss: 1.0653623342514038,grad_norm: 0.99999924235244, iteration: 11115
loss: 1.014852523803711,grad_norm: 0.9999992320626121, iteration: 11116
loss: 1.0179349184036255,grad_norm: 0.9999992917822271, iteration: 11117
loss: 1.029140591621399,grad_norm: 0.9999992621939152, iteration: 11118
loss: 1.0111457109451294,grad_norm: 0.9999992903758108, iteration: 11119
loss: 1.0126500129699707,grad_norm: 0.999999240768723, iteration: 11120
loss: 1.0085809230804443,grad_norm: 0.9999992892522838, iteration: 11121
loss: 1.0413085222244263,grad_norm: 0.999999381524748, iteration: 11122
loss: 0.9947440028190613,grad_norm: 0.9999991278820359, iteration: 11123
loss: 1.1147024631500244,grad_norm: 0.9999997897109614, iteration: 11124
loss: 1.0577120780944824,grad_norm: 0.9999992632900104, iteration: 11125
loss: 1.0448393821716309,grad_norm: 0.9999996027824275, iteration: 11126
loss: 1.0250104665756226,grad_norm: 0.9999992144562356, iteration: 11127
loss: 1.0764962434768677,grad_norm: 0.9999994891347855, iteration: 11128
loss: 1.001834511756897,grad_norm: 0.9999992495475112, iteration: 11129
loss: 0.9767550230026245,grad_norm: 0.9999992844241238, iteration: 11130
loss: 1.104239821434021,grad_norm: 0.9999995721378172, iteration: 11131
loss: 1.0232268571853638,grad_norm: 0.9999992298796312, iteration: 11132
loss: 1.0090290307998657,grad_norm: 0.9999995991974835, iteration: 11133
loss: 1.069223165512085,grad_norm: 0.9999994528731889, iteration: 11134
loss: 1.0325965881347656,grad_norm: 0.9999990897792563, iteration: 11135
loss: 1.0579742193222046,grad_norm: 0.9999991991785379, iteration: 11136
loss: 1.0369731187820435,grad_norm: 0.9999992612440587, iteration: 11137
loss: 1.0299745798110962,grad_norm: 0.9999993189350331, iteration: 11138
loss: 1.057426929473877,grad_norm: 0.9999991149322843, iteration: 11139
loss: 1.0559558868408203,grad_norm: 0.9999991262266602, iteration: 11140
loss: 1.0700522661209106,grad_norm: 0.9999993160448264, iteration: 11141
loss: 1.0632715225219727,grad_norm: 0.999999486071889, iteration: 11142
loss: 1.0248891115188599,grad_norm: 0.9999994107090479, iteration: 11143
loss: 1.017299771308899,grad_norm: 0.9999991455587304, iteration: 11144
loss: 1.0199545621871948,grad_norm: 0.9999992983995192, iteration: 11145
loss: 1.0154207944869995,grad_norm: 0.9999992535138803, iteration: 11146
loss: 1.0278117656707764,grad_norm: 0.9999993644668753, iteration: 11147
loss: 1.0941983461380005,grad_norm: 0.9999994257262501, iteration: 11148
loss: 1.0223227739334106,grad_norm: 0.9999995168229491, iteration: 11149
loss: 1.0022133588790894,grad_norm: 0.9999993290042698, iteration: 11150
loss: 1.1307322978973389,grad_norm: 0.999999481442016, iteration: 11151
loss: 1.0607143640518188,grad_norm: 0.9999997617141486, iteration: 11152
loss: 1.1183693408966064,grad_norm: 0.9999998501781185, iteration: 11153
loss: 1.0902692079544067,grad_norm: 0.9999996400380385, iteration: 11154
loss: 1.1327927112579346,grad_norm: 0.9999994925231757, iteration: 11155
loss: 1.1429164409637451,grad_norm: 0.9999995363023583, iteration: 11156
loss: 1.0220134258270264,grad_norm: 0.9999992655047687, iteration: 11157
loss: 1.038081407546997,grad_norm: 0.9999993658886415, iteration: 11158
loss: 1.028285026550293,grad_norm: 0.9999992059798785, iteration: 11159
loss: 1.0464259386062622,grad_norm: 0.9999995093602023, iteration: 11160
loss: 1.0034459829330444,grad_norm: 0.9999991538838084, iteration: 11161
loss: 1.0338666439056396,grad_norm: 0.9999991862119395, iteration: 11162
loss: 1.0222785472869873,grad_norm: 0.9999994652023126, iteration: 11163
loss: 1.0097103118896484,grad_norm: 0.9999993633394073, iteration: 11164
loss: 1.0272133350372314,grad_norm: 0.9999992743473178, iteration: 11165
loss: 1.0569463968276978,grad_norm: 0.9999991652717729, iteration: 11166
loss: 1.043708086013794,grad_norm: 0.9999993478826098, iteration: 11167
loss: 1.0667041540145874,grad_norm: 0.999999573505479, iteration: 11168
loss: 1.0074107646942139,grad_norm: 0.9999993179204032, iteration: 11169
loss: 1.018937349319458,grad_norm: 0.9999992092168917, iteration: 11170
loss: 1.0013210773468018,grad_norm: 0.9999993279149295, iteration: 11171
loss: 1.0192610025405884,grad_norm: 0.999999373744551, iteration: 11172
loss: 1.0904901027679443,grad_norm: 0.9999993808380281, iteration: 11173
loss: 0.9975002408027649,grad_norm: 0.9999992288845536, iteration: 11174
loss: 0.9681645035743713,grad_norm: 0.9999991231390781, iteration: 11175
loss: 1.0105005502700806,grad_norm: 0.9999992084439184, iteration: 11176
loss: 1.0924615859985352,grad_norm: 0.9999996299267786, iteration: 11177
loss: 1.0134996175765991,grad_norm: 0.9999993477692344, iteration: 11178
loss: 1.050204873085022,grad_norm: 0.9999998331609933, iteration: 11179
loss: 1.0469954013824463,grad_norm: 0.9999994874106601, iteration: 11180
loss: 1.026458740234375,grad_norm: 0.9999991574800133, iteration: 11181
loss: 1.0175893306732178,grad_norm: 0.9999993893368412, iteration: 11182
loss: 1.0063194036483765,grad_norm: 0.9999993597968786, iteration: 11183
loss: 1.0406427383422852,grad_norm: 0.9999991179045213, iteration: 11184
loss: 1.034256100654602,grad_norm: 0.9999994017032449, iteration: 11185
loss: 0.9999648332595825,grad_norm: 0.9999990729827102, iteration: 11186
loss: 0.994286298751831,grad_norm: 0.9999991339293898, iteration: 11187
loss: 1.019069790840149,grad_norm: 0.9999992628044949, iteration: 11188
loss: 1.0777974128723145,grad_norm: 0.9999992630378403, iteration: 11189
loss: 1.0553852319717407,grad_norm: 0.9999994610230515, iteration: 11190
loss: 1.0888123512268066,grad_norm: 0.999999787141015, iteration: 11191
loss: 1.0237051248550415,grad_norm: 0.9999993214967047, iteration: 11192
loss: 1.0604100227355957,grad_norm: 0.99999914961381, iteration: 11193
loss: 1.0679173469543457,grad_norm: 0.9999993583573936, iteration: 11194
loss: 1.0646708011627197,grad_norm: 0.9999993264467567, iteration: 11195
loss: 1.0654797554016113,grad_norm: 0.9999993295718168, iteration: 11196
loss: 1.116997480392456,grad_norm: 0.9999994488902619, iteration: 11197
loss: 1.0217463970184326,grad_norm: 0.9960273050038225, iteration: 11198
loss: 0.9881218075752258,grad_norm: 0.9999992723220344, iteration: 11199
loss: 1.011892557144165,grad_norm: 0.9999993230886446, iteration: 11200
loss: 1.022565484046936,grad_norm: 0.9999993521732856, iteration: 11201
loss: 1.0598399639129639,grad_norm: 0.9999994063280501, iteration: 11202
loss: 1.0162078142166138,grad_norm: 0.9999994980091904, iteration: 11203
loss: 1.0696542263031006,grad_norm: 0.9999993440816057, iteration: 11204
loss: 0.9974156022071838,grad_norm: 0.9999991738023448, iteration: 11205
loss: 1.0643366575241089,grad_norm: 0.9999995015056238, iteration: 11206
loss: 1.0511268377304077,grad_norm: 0.9999997628489218, iteration: 11207
loss: 1.0538372993469238,grad_norm: 0.9999995209674851, iteration: 11208
loss: 1.1210966110229492,grad_norm: 0.9999997181441769, iteration: 11209
loss: 1.031059741973877,grad_norm: 0.9999990521309039, iteration: 11210
loss: 1.0506234169006348,grad_norm: 0.9999993277961481, iteration: 11211
loss: 1.0979276895523071,grad_norm: 0.9999994143366995, iteration: 11212
loss: 1.03602933883667,grad_norm: 0.9999993079541535, iteration: 11213
loss: 1.0034328699111938,grad_norm: 0.9999991859292938, iteration: 11214
loss: 1.0199276208877563,grad_norm: 0.9999991154844319, iteration: 11215
loss: 1.0318272113800049,grad_norm: 0.9999993910746793, iteration: 11216
loss: 1.0329023599624634,grad_norm: 0.9999992667590417, iteration: 11217
loss: 1.0463827848434448,grad_norm: 0.999999326133075, iteration: 11218
loss: 1.0471305847167969,grad_norm: 0.9999993111956316, iteration: 11219
loss: 1.076566457748413,grad_norm: 0.9999994472824162, iteration: 11220
loss: 1.0576838254928589,grad_norm: 0.9999994186865021, iteration: 11221
loss: 1.0291869640350342,grad_norm: 0.9999992220997448, iteration: 11222
loss: 1.0211668014526367,grad_norm: 0.9999992952837984, iteration: 11223
loss: 1.0690813064575195,grad_norm: 0.9999991783407296, iteration: 11224
loss: 1.016728401184082,grad_norm: 0.9999995560690798, iteration: 11225
loss: 1.0100001096725464,grad_norm: 0.999999262292522, iteration: 11226
loss: 1.0456584692001343,grad_norm: 0.9999994252133851, iteration: 11227
loss: 1.0443165302276611,grad_norm: 0.9999991783332801, iteration: 11228
loss: 1.017919659614563,grad_norm: 0.9999991930868707, iteration: 11229
loss: 1.079052448272705,grad_norm: 0.9999993984665974, iteration: 11230
loss: 0.9904585480690002,grad_norm: 0.999999308414302, iteration: 11231
loss: 0.9943589568138123,grad_norm: 0.9999992744648831, iteration: 11232
loss: 1.052614688873291,grad_norm: 0.9999993763996738, iteration: 11233
loss: 1.053766131401062,grad_norm: 0.9999995676519605, iteration: 11234
loss: 1.0300581455230713,grad_norm: 0.9999992095068675, iteration: 11235
loss: 1.023802638053894,grad_norm: 0.9999992938320382, iteration: 11236
loss: 0.971723198890686,grad_norm: 0.9336805020606572, iteration: 11237
loss: 1.0655978918075562,grad_norm: 0.9999992673405284, iteration: 11238
loss: 1.0201247930526733,grad_norm: 0.9999992464264078, iteration: 11239
loss: 1.0366836786270142,grad_norm: 0.999999577575129, iteration: 11240
loss: 1.0432995557785034,grad_norm: 0.9999996218712578, iteration: 11241
loss: 1.0648528337478638,grad_norm: 0.9999995069014646, iteration: 11242
loss: 0.9972732067108154,grad_norm: 0.9999993271702156, iteration: 11243
loss: 1.0388270616531372,grad_norm: 0.9999992509685018, iteration: 11244
loss: 0.9953029155731201,grad_norm: 0.9999991866287831, iteration: 11245
loss: 1.0930424928665161,grad_norm: 0.9999994341789663, iteration: 11246
loss: 1.0434627532958984,grad_norm: 0.9999992439287602, iteration: 11247
loss: 1.0432692766189575,grad_norm: 0.9999993327901582, iteration: 11248
loss: 1.0351381301879883,grad_norm: 0.9999992209096177, iteration: 11249
loss: 0.9986591339111328,grad_norm: 0.9999993384474353, iteration: 11250
loss: 1.0675898790359497,grad_norm: 0.9999992177097917, iteration: 11251
loss: 1.0473320484161377,grad_norm: 0.999999234958877, iteration: 11252
loss: 0.9786219596862793,grad_norm: 0.9999991297278494, iteration: 11253
loss: 1.0538278818130493,grad_norm: 0.9999991802909759, iteration: 11254
loss: 1.0103873014450073,grad_norm: 0.9999994059671734, iteration: 11255
loss: 1.0169569253921509,grad_norm: 0.9999992433846566, iteration: 11256
loss: 1.0808871984481812,grad_norm: 0.9999990470272203, iteration: 11257
loss: 1.0045970678329468,grad_norm: 0.9999996104215622, iteration: 11258
loss: 1.0705527067184448,grad_norm: 0.9999995468538369, iteration: 11259
loss: 1.0236738920211792,grad_norm: 0.9999991923717207, iteration: 11260
loss: 1.0060316324234009,grad_norm: 0.9999991330767329, iteration: 11261
loss: 1.052228331565857,grad_norm: 0.9999994323828092, iteration: 11262
loss: 1.0221041440963745,grad_norm: 0.999999365866291, iteration: 11263
loss: 1.0724338293075562,grad_norm: 0.9999994754439955, iteration: 11264
loss: 1.0508395433425903,grad_norm: 0.9999993635129845, iteration: 11265
loss: 1.0589799880981445,grad_norm: 0.9999996019361234, iteration: 11266
loss: 1.0251350402832031,grad_norm: 0.9999992130173218, iteration: 11267
loss: 1.0834609270095825,grad_norm: 0.9999992358981805, iteration: 11268
loss: 0.9804607033729553,grad_norm: 0.9999990618089953, iteration: 11269
loss: 1.0229276418685913,grad_norm: 0.9999992567916544, iteration: 11270
loss: 1.0288435220718384,grad_norm: 0.9999992824982847, iteration: 11271
loss: 1.0529202222824097,grad_norm: 0.9999991531745549, iteration: 11272
loss: 1.0052107572555542,grad_norm: 0.9999992727194382, iteration: 11273
loss: 1.0278189182281494,grad_norm: 0.9999994423585102, iteration: 11274
loss: 0.9943677186965942,grad_norm: 0.9999991293129707, iteration: 11275
loss: 1.0661669969558716,grad_norm: 0.9999995462423518, iteration: 11276
loss: 0.9862685799598694,grad_norm: 0.9999992244740068, iteration: 11277
loss: 1.0263501405715942,grad_norm: 0.9999997078390408, iteration: 11278
loss: 1.0216774940490723,grad_norm: 0.9999991547282142, iteration: 11279
loss: 1.0752170085906982,grad_norm: 0.999999304385342, iteration: 11280
loss: 1.0243724584579468,grad_norm: 0.9999993673982073, iteration: 11281
loss: 1.033268928527832,grad_norm: 0.9999992531952697, iteration: 11282
loss: 0.993794858455658,grad_norm: 0.9999991571042672, iteration: 11283
loss: 1.077590823173523,grad_norm: 0.9999998882132934, iteration: 11284
loss: 1.069762945175171,grad_norm: 0.9999992714741538, iteration: 11285
loss: 1.0514841079711914,grad_norm: 0.9999995715703569, iteration: 11286
loss: 1.0769931077957153,grad_norm: 0.9999992225002461, iteration: 11287
loss: 1.0485836267471313,grad_norm: 0.999999359214423, iteration: 11288
loss: 1.1473010778427124,grad_norm: 0.999999861526469, iteration: 11289
loss: 1.0690079927444458,grad_norm: 0.9999995097176486, iteration: 11290
loss: 1.0187973976135254,grad_norm: 0.9999995320887458, iteration: 11291
loss: 1.0596177577972412,grad_norm: 0.9999992513312194, iteration: 11292
loss: 1.0104879140853882,grad_norm: 0.999999266255098, iteration: 11293
loss: 1.025976538658142,grad_norm: 0.9999993577794818, iteration: 11294
loss: 0.9982235431671143,grad_norm: 0.9999991625971792, iteration: 11295
loss: 1.07393217086792,grad_norm: 0.9999993711059134, iteration: 11296
loss: 1.083397626876831,grad_norm: 0.9999994552817213, iteration: 11297
loss: 1.0501466989517212,grad_norm: 0.9999993675044893, iteration: 11298
loss: 1.048625111579895,grad_norm: 0.9999994379503548, iteration: 11299
loss: 1.0557955503463745,grad_norm: 0.9999992820605975, iteration: 11300
loss: 1.0661225318908691,grad_norm: 0.9999992771487536, iteration: 11301
loss: 0.997410237789154,grad_norm: 0.999999353946226, iteration: 11302
loss: 1.034157156944275,grad_norm: 0.9999992157614166, iteration: 11303
loss: 1.059926152229309,grad_norm: 0.9999994776882256, iteration: 11304
loss: 1.0410499572753906,grad_norm: 0.9999990914595147, iteration: 11305
loss: 1.0244419574737549,grad_norm: 0.9999992460697716, iteration: 11306
loss: 1.0508792400360107,grad_norm: 0.9999991425460062, iteration: 11307
loss: 1.0181481838226318,grad_norm: 0.9999991260159135, iteration: 11308
loss: 1.0540449619293213,grad_norm: 0.999999371107226, iteration: 11309
loss: 1.029214859008789,grad_norm: 0.9999991581007514, iteration: 11310
loss: 1.061660647392273,grad_norm: 0.9999991254784094, iteration: 11311
loss: 1.0484119653701782,grad_norm: 0.9999991994381722, iteration: 11312
loss: 1.001057505607605,grad_norm: 0.9999991398854424, iteration: 11313
loss: 1.0529433488845825,grad_norm: 0.9999991125860268, iteration: 11314
loss: 1.0580965280532837,grad_norm: 0.9999992688836158, iteration: 11315
loss: 1.0616480112075806,grad_norm: 0.9999992748385171, iteration: 11316
loss: 1.0549236536026,grad_norm: 0.9999992649454708, iteration: 11317
loss: 1.011322021484375,grad_norm: 0.9999993201128062, iteration: 11318
loss: 0.991786777973175,grad_norm: 0.9999990600871298, iteration: 11319
loss: 1.0147275924682617,grad_norm: 0.9999991515706034, iteration: 11320
loss: 0.9403758645057678,grad_norm: 0.9999992694875476, iteration: 11321
loss: 1.0206115245819092,grad_norm: 0.9999991914885656, iteration: 11322
loss: 1.0771253108978271,grad_norm: 0.9999992812640794, iteration: 11323
loss: 1.0284223556518555,grad_norm: 0.9999992309266349, iteration: 11324
loss: 1.0412789583206177,grad_norm: 0.9999997030401052, iteration: 11325
loss: 1.032172441482544,grad_norm: 0.9999992952973047, iteration: 11326
loss: 1.042944073677063,grad_norm: 0.9999995910583924, iteration: 11327
loss: 1.0239194631576538,grad_norm: 0.9999990992478767, iteration: 11328
loss: 0.9974547028541565,grad_norm: 0.9999991360615288, iteration: 11329
loss: 1.0174767971038818,grad_norm: 0.9999992968234361, iteration: 11330
loss: 1.0897510051727295,grad_norm: 0.999999358321876, iteration: 11331
loss: 1.0241539478302002,grad_norm: 0.9999993250927119, iteration: 11332
loss: 1.0464613437652588,grad_norm: 0.9999992615505555, iteration: 11333
loss: 1.0429868698120117,grad_norm: 0.9999994687663988, iteration: 11334
loss: 1.0490916967391968,grad_norm: 0.999999371471605, iteration: 11335
loss: 1.0332114696502686,grad_norm: 0.9999991579025536, iteration: 11336
loss: 1.030505657196045,grad_norm: 0.9999991192330626, iteration: 11337
loss: 1.0773558616638184,grad_norm: 0.9999995427190641, iteration: 11338
loss: 1.0755841732025146,grad_norm: 0.9999993760740995, iteration: 11339
loss: 0.9949614405632019,grad_norm: 0.9999990833253528, iteration: 11340
loss: 1.0462045669555664,grad_norm: 0.9999991332546908, iteration: 11341
loss: 0.979564905166626,grad_norm: 0.9999993262427778, iteration: 11342
loss: 1.051718831062317,grad_norm: 0.9999994293433886, iteration: 11343
loss: 1.050832748413086,grad_norm: 0.9999993641424081, iteration: 11344
loss: 1.1012771129608154,grad_norm: 0.9999991557696681, iteration: 11345
loss: 1.0271328687667847,grad_norm: 0.9999991845369639, iteration: 11346
loss: 1.053856372833252,grad_norm: 0.9999991849724412, iteration: 11347
loss: 1.0323916673660278,grad_norm: 0.9999991408941852, iteration: 11348
loss: 1.011697769165039,grad_norm: 0.9999992196887045, iteration: 11349
loss: 1.0242003202438354,grad_norm: 0.9999991920679058, iteration: 11350
loss: 1.0352146625518799,grad_norm: 0.999999172742347, iteration: 11351
loss: 1.0265555381774902,grad_norm: 0.99999911622842, iteration: 11352
loss: 1.0232934951782227,grad_norm: 0.9999991472605557, iteration: 11353
loss: 1.0184626579284668,grad_norm: 0.9999995366544111, iteration: 11354
loss: 1.0542770624160767,grad_norm: 0.9999992683020166, iteration: 11355
loss: 1.0482419729232788,grad_norm: 0.9999993506479462, iteration: 11356
loss: 1.0575385093688965,grad_norm: 0.9999998577904234, iteration: 11357
loss: 1.080222725868225,grad_norm: 0.9999996773512716, iteration: 11358
loss: 1.0623998641967773,grad_norm: 0.9999995398923913, iteration: 11359
loss: 1.0958534479141235,grad_norm: 0.9999997959206602, iteration: 11360
loss: 1.0294092893600464,grad_norm: 0.9999992075031316, iteration: 11361
loss: 1.0501424074172974,grad_norm: 0.9999992192746965, iteration: 11362
loss: 1.0148965120315552,grad_norm: 0.9999991173544172, iteration: 11363
loss: 1.039608359336853,grad_norm: 0.9999995876963852, iteration: 11364
loss: 1.0123461484909058,grad_norm: 0.9999991354520587, iteration: 11365
loss: 1.0097707509994507,grad_norm: 0.9999994322263827, iteration: 11366
loss: 1.080283522605896,grad_norm: 0.9999992563671559, iteration: 11367
loss: 1.0566476583480835,grad_norm: 0.9999994373351421, iteration: 11368
loss: 1.0786923170089722,grad_norm: 0.9999994196314815, iteration: 11369
loss: 1.0742838382720947,grad_norm: 0.9999998180891515, iteration: 11370
loss: 1.0000262260437012,grad_norm: 0.9999992698194085, iteration: 11371
loss: 1.0494645833969116,grad_norm: 0.9786275498305224, iteration: 11372
loss: 1.0119073390960693,grad_norm: 0.9999991122426128, iteration: 11373
loss: 1.0997276306152344,grad_norm: 0.9999997508322868, iteration: 11374
loss: 1.010504961013794,grad_norm: 0.9999992671600394, iteration: 11375
loss: 1.0298678874969482,grad_norm: 0.9999992849834499, iteration: 11376
loss: 1.0087977647781372,grad_norm: 0.9999991521047434, iteration: 11377
loss: 1.0045393705368042,grad_norm: 0.9999994447474247, iteration: 11378
loss: 1.0037771463394165,grad_norm: 0.9999992539931938, iteration: 11379
loss: 1.0301109552383423,grad_norm: 0.9999995350515546, iteration: 11380
loss: 1.065878987312317,grad_norm: 0.9999994741094145, iteration: 11381
loss: 1.0743229389190674,grad_norm: 0.9999997121847125, iteration: 11382
loss: 1.0612730979919434,grad_norm: 0.9999996294535296, iteration: 11383
loss: 1.0839354991912842,grad_norm: 0.9999995643616243, iteration: 11384
loss: 1.0318039655685425,grad_norm: 0.9999993601654967, iteration: 11385
loss: 1.0513863563537598,grad_norm: 0.9999998919927677, iteration: 11386
loss: 1.0570563077926636,grad_norm: 0.9999993551052625, iteration: 11387
loss: 1.0741090774536133,grad_norm: 0.9999992252998434, iteration: 11388
loss: 1.0440417528152466,grad_norm: 0.9999991251277773, iteration: 11389
loss: 1.038720965385437,grad_norm: 0.999999503327827, iteration: 11390
loss: 1.0617908239364624,grad_norm: 0.9999992377708036, iteration: 11391
loss: 1.0814359188079834,grad_norm: 0.9999995187157926, iteration: 11392
loss: 1.0062339305877686,grad_norm: 0.9999991395455704, iteration: 11393
loss: 1.024619460105896,grad_norm: 0.999999176719095, iteration: 11394
loss: 1.0575569868087769,grad_norm: 0.9999997556906395, iteration: 11395
loss: 1.0399365425109863,grad_norm: 0.9999992733548985, iteration: 11396
loss: 1.0141257047653198,grad_norm: 0.9999993327549248, iteration: 11397
loss: 1.0181435346603394,grad_norm: 0.9999992297239962, iteration: 11398
loss: 1.0254817008972168,grad_norm: 0.9999992502426526, iteration: 11399
loss: 0.9756243228912354,grad_norm: 0.9999992570537376, iteration: 11400
loss: 1.0338454246520996,grad_norm: 0.9999993929855332, iteration: 11401
loss: 1.0491026639938354,grad_norm: 0.9999991383600401, iteration: 11402
loss: 1.054194450378418,grad_norm: 0.9999992657413922, iteration: 11403
loss: 0.9900966286659241,grad_norm: 0.9999995538187629, iteration: 11404
loss: 1.026244878768921,grad_norm: 0.9784971007126937, iteration: 11405
loss: 1.0241727828979492,grad_norm: 0.9999997797974111, iteration: 11406
loss: 1.0658458471298218,grad_norm: 0.9999991733242926, iteration: 11407
loss: 1.003710389137268,grad_norm: 0.9999990417511144, iteration: 11408
loss: 1.0149638652801514,grad_norm: 0.9999991525747779, iteration: 11409
loss: 1.0608171224594116,grad_norm: 0.9999993402127378, iteration: 11410
loss: 1.0694268941879272,grad_norm: 0.9999991269216788, iteration: 11411
loss: 1.0228339433670044,grad_norm: 0.9999993693813238, iteration: 11412
loss: 1.0424809455871582,grad_norm: 0.9999995291775398, iteration: 11413
loss: 1.067771553993225,grad_norm: 0.9999992930025856, iteration: 11414
loss: 1.003262996673584,grad_norm: 0.9999992125767911, iteration: 11415
loss: 1.0022956132888794,grad_norm: 0.9999991096890734, iteration: 11416
loss: 1.0159738063812256,grad_norm: 0.9999992391676283, iteration: 11417
loss: 1.0370898246765137,grad_norm: 0.9999991399199669, iteration: 11418
loss: 1.0459468364715576,grad_norm: 0.99999967501899, iteration: 11419
loss: 1.0150132179260254,grad_norm: 0.9999993141948205, iteration: 11420
loss: 1.0503368377685547,grad_norm: 0.9999994344126186, iteration: 11421
loss: 1.08748197555542,grad_norm: 0.9999996797414135, iteration: 11422
loss: 1.0256059169769287,grad_norm: 0.9999993403150436, iteration: 11423
loss: 1.0540016889572144,grad_norm: 0.9999998171713558, iteration: 11424
loss: 1.0767736434936523,grad_norm: 0.9999996781914093, iteration: 11425
loss: 1.0851351022720337,grad_norm: 0.999999202961402, iteration: 11426
loss: 1.0624239444732666,grad_norm: 0.9999993797797281, iteration: 11427
loss: 0.9681990742683411,grad_norm: 0.9999992214677437, iteration: 11428
loss: 1.0254446268081665,grad_norm: 0.9999994098691456, iteration: 11429
loss: 1.0174649953842163,grad_norm: 0.9999993146145745, iteration: 11430
loss: 1.0452098846435547,grad_norm: 0.999999183586268, iteration: 11431
loss: 1.0121937990188599,grad_norm: 0.9999996333731359, iteration: 11432
loss: 0.978191614151001,grad_norm: 0.9999993237679269, iteration: 11433
loss: 1.0341299772262573,grad_norm: 0.9999992449837781, iteration: 11434
loss: 1.0737653970718384,grad_norm: 0.9999993626248851, iteration: 11435
loss: 1.0518041849136353,grad_norm: 0.9999993353552407, iteration: 11436
loss: 1.0613207817077637,grad_norm: 0.9999993863348273, iteration: 11437
loss: 0.9856953024864197,grad_norm: 0.9999996256215199, iteration: 11438
loss: 1.0418250560760498,grad_norm: 0.9999997012622434, iteration: 11439
loss: 0.987430989742279,grad_norm: 0.9999992751032144, iteration: 11440
loss: 0.9707853198051453,grad_norm: 0.9999990860120326, iteration: 11441
loss: 1.069321870803833,grad_norm: 0.9999994525926506, iteration: 11442
loss: 0.9903628826141357,grad_norm: 0.9999992179837786, iteration: 11443
loss: 1.041576862335205,grad_norm: 0.9999993299214152, iteration: 11444
loss: 1.0449649095535278,grad_norm: 0.9999993489123108, iteration: 11445
loss: 1.0026166439056396,grad_norm: 0.9999992287382544, iteration: 11446
loss: 1.0865836143493652,grad_norm: 0.9999994103539502, iteration: 11447
loss: 1.0983713865280151,grad_norm: 0.9999994621793856, iteration: 11448
loss: 1.0237877368927002,grad_norm: 0.9999991941757095, iteration: 11449
loss: 1.0009993314743042,grad_norm: 0.9999994749496995, iteration: 11450
loss: 1.0145528316497803,grad_norm: 0.9999995846930224, iteration: 11451
loss: 1.0493720769882202,grad_norm: 0.999999222687751, iteration: 11452
loss: 1.0584876537322998,grad_norm: 0.9999992535859499, iteration: 11453
loss: 1.0556718111038208,grad_norm: 0.99999909521193, iteration: 11454
loss: 1.020769476890564,grad_norm: 0.9999994553818806, iteration: 11455
loss: 1.0620003938674927,grad_norm: 0.9999991406424061, iteration: 11456
loss: 1.0104254484176636,grad_norm: 0.9999993550817262, iteration: 11457
loss: 0.9944879412651062,grad_norm: 0.9999992587522073, iteration: 11458
loss: 1.087856411933899,grad_norm: 0.9999995083463559, iteration: 11459
loss: 1.0666638612747192,grad_norm: 0.9999997297459401, iteration: 11460
loss: 1.0353143215179443,grad_norm: 0.9999991637945783, iteration: 11461
loss: 1.122771978378296,grad_norm: 0.9999994739892115, iteration: 11462
loss: 1.0165141820907593,grad_norm: 0.9999995523177884, iteration: 11463
loss: 1.0783013105392456,grad_norm: 0.9999993185956773, iteration: 11464
loss: 1.0615110397338867,grad_norm: 0.9999992718530538, iteration: 11465
loss: 1.0121086835861206,grad_norm: 0.9999994381845949, iteration: 11466
loss: 1.057004690170288,grad_norm: 0.9999994123142748, iteration: 11467
loss: 1.065854549407959,grad_norm: 0.999999401095965, iteration: 11468
loss: 1.0543187856674194,grad_norm: 0.9999991775711906, iteration: 11469
loss: 1.0093218088150024,grad_norm: 0.9999995954668438, iteration: 11470
loss: 1.0524977445602417,grad_norm: 0.999999464979791, iteration: 11471
loss: 1.0616635084152222,grad_norm: 0.9999992834081433, iteration: 11472
loss: 1.0624374151229858,grad_norm: 0.9999990470209055, iteration: 11473
loss: 1.0663611888885498,grad_norm: 0.9999997837480196, iteration: 11474
loss: 1.0274405479431152,grad_norm: 0.9999994340300173, iteration: 11475
loss: 1.0258103609085083,grad_norm: 0.999999317687229, iteration: 11476
loss: 1.0494908094406128,grad_norm: 0.9999991316191968, iteration: 11477
loss: 1.0090806484222412,grad_norm: 0.9999991717056474, iteration: 11478
loss: 1.0845831632614136,grad_norm: 0.9999993171739804, iteration: 11479
loss: 1.0525143146514893,grad_norm: 0.9999990679705858, iteration: 11480
loss: 1.0025707483291626,grad_norm: 0.9999993030177048, iteration: 11481
loss: 0.9945598244667053,grad_norm: 0.9999992425954822, iteration: 11482
loss: 1.0240387916564941,grad_norm: 0.999999191297662, iteration: 11483
loss: 1.092312216758728,grad_norm: 0.9999990954855392, iteration: 11484
loss: 1.0572402477264404,grad_norm: 0.9999992041834947, iteration: 11485
loss: 1.028885841369629,grad_norm: 0.9999992685668582, iteration: 11486
loss: 1.018778920173645,grad_norm: 0.9999991762871334, iteration: 11487
loss: 0.999752938747406,grad_norm: 0.9999992631844578, iteration: 11488
loss: 1.028259515762329,grad_norm: 0.9999990794205736, iteration: 11489
loss: 1.0672965049743652,grad_norm: 0.999999397524029, iteration: 11490
loss: 1.007786512374878,grad_norm: 0.9999991943890789, iteration: 11491
loss: 1.0534741878509521,grad_norm: 0.9999991838422241, iteration: 11492
loss: 1.0479464530944824,grad_norm: 0.9999992325977751, iteration: 11493
loss: 1.044755220413208,grad_norm: 0.9999992936391042, iteration: 11494
loss: 1.037846565246582,grad_norm: 0.9999995170622629, iteration: 11495
loss: 1.0281254053115845,grad_norm: 0.999999227356121, iteration: 11496
loss: 1.1008901596069336,grad_norm: 0.9999996285015165, iteration: 11497
loss: 1.0183407068252563,grad_norm: 0.9936467662592645, iteration: 11498
loss: 0.9614999890327454,grad_norm: 0.9999991860111809, iteration: 11499
loss: 1.017562985420227,grad_norm: 0.9999992126328594, iteration: 11500
loss: 0.9953653216362,grad_norm: 0.9999992101866527, iteration: 11501
loss: 1.0271434783935547,grad_norm: 0.9999992450789478, iteration: 11502
loss: 1.0429426431655884,grad_norm: 0.9999992895671442, iteration: 11503
loss: 1.1503643989562988,grad_norm: 0.9999995920996837, iteration: 11504
loss: 1.046789288520813,grad_norm: 0.9999992203414829, iteration: 11505
loss: 1.07474684715271,grad_norm: 0.9999993264804747, iteration: 11506
loss: 1.0094906091690063,grad_norm: 0.999999177746264, iteration: 11507
loss: 1.036902666091919,grad_norm: 0.999999578156745, iteration: 11508
loss: 1.1171705722808838,grad_norm: 0.9999998030562434, iteration: 11509
loss: 0.9907689094543457,grad_norm: 0.9999993440710364, iteration: 11510
loss: 1.0119253396987915,grad_norm: 0.9999991729809689, iteration: 11511
loss: 1.045081615447998,grad_norm: 0.9999992879824727, iteration: 11512
loss: 1.0717812776565552,grad_norm: 0.9999992778018321, iteration: 11513
loss: 0.9957499504089355,grad_norm: 0.9999991701697273, iteration: 11514
loss: 1.0480350255966187,grad_norm: 0.9999991279104085, iteration: 11515
loss: 0.9692833423614502,grad_norm: 0.9999993250237041, iteration: 11516
loss: 1.0168498754501343,grad_norm: 0.9999995223229242, iteration: 11517
loss: 1.043332576751709,grad_norm: 0.99999924454432, iteration: 11518
loss: 1.0512372255325317,grad_norm: 0.9999992247794275, iteration: 11519
loss: 1.056420087814331,grad_norm: 0.9999991362838107, iteration: 11520
loss: 1.0145211219787598,grad_norm: 0.9999992708211605, iteration: 11521
loss: 1.0380586385726929,grad_norm: 0.9999993420010138, iteration: 11522
loss: 1.0218323469161987,grad_norm: 0.9999991841984467, iteration: 11523
loss: 1.0234802961349487,grad_norm: 0.9999990976609088, iteration: 11524
loss: 1.0575683116912842,grad_norm: 0.9999991611978403, iteration: 11525
loss: 1.0456658601760864,grad_norm: 0.9999996490469005, iteration: 11526
loss: 1.0335896015167236,grad_norm: 0.9999991970084217, iteration: 11527
loss: 1.0541174411773682,grad_norm: 0.9999993313996499, iteration: 11528
loss: 1.0663032531738281,grad_norm: 0.9999992094944198, iteration: 11529
loss: 1.0366480350494385,grad_norm: 0.999999400061278, iteration: 11530
loss: 1.059579610824585,grad_norm: 0.999999310616882, iteration: 11531
loss: 1.0634011030197144,grad_norm: 0.9999991495461537, iteration: 11532
loss: 1.0409462451934814,grad_norm: 0.9999991700021426, iteration: 11533
loss: 1.0794936418533325,grad_norm: 0.999999596157483, iteration: 11534
loss: 1.0160374641418457,grad_norm: 0.9999993900622522, iteration: 11535
loss: 1.0577462911605835,grad_norm: 0.9999991932493585, iteration: 11536
loss: 1.0117193460464478,grad_norm: 0.9999992499318567, iteration: 11537
loss: 1.0794984102249146,grad_norm: 0.9999994882612184, iteration: 11538
loss: 1.0762866735458374,grad_norm: 0.9999995882456125, iteration: 11539
loss: 1.0352638959884644,grad_norm: 0.9999992508637107, iteration: 11540
loss: 1.0559049844741821,grad_norm: 0.9999994642034432, iteration: 11541
loss: 1.0383597612380981,grad_norm: 0.9999994464579072, iteration: 11542
loss: 1.0665173530578613,grad_norm: 0.9999990592151856, iteration: 11543
loss: 1.0441622734069824,grad_norm: 0.999999264089483, iteration: 11544
loss: 1.0059245824813843,grad_norm: 0.9999993202751972, iteration: 11545
loss: 0.961226761341095,grad_norm: 0.9665985017657216, iteration: 11546
loss: 1.0746748447418213,grad_norm: 0.9999993577834039, iteration: 11547
loss: 1.0485923290252686,grad_norm: 0.99999932310786, iteration: 11548
loss: 1.0144816637039185,grad_norm: 0.9999992609003796, iteration: 11549
loss: 1.059172511100769,grad_norm: 0.999999187894091, iteration: 11550
loss: 1.0183913707733154,grad_norm: 0.9999992002464904, iteration: 11551
loss: 1.0702712535858154,grad_norm: 0.9999993918894792, iteration: 11552
loss: 0.9966918230056763,grad_norm: 0.99999922727038, iteration: 11553
loss: 1.0350565910339355,grad_norm: 0.9999991874727753, iteration: 11554
loss: 1.0251425504684448,grad_norm: 0.9999992445166874, iteration: 11555
loss: 0.9748473167419434,grad_norm: 0.9999994394231112, iteration: 11556
loss: 1.076771855354309,grad_norm: 0.9999993434743453, iteration: 11557
loss: 1.0229737758636475,grad_norm: 0.9999991794063354, iteration: 11558
loss: 1.0503135919570923,grad_norm: 0.9999995247935194, iteration: 11559
loss: 1.022707462310791,grad_norm: 0.9999994686239762, iteration: 11560
loss: 1.0349221229553223,grad_norm: 0.9999992902436582, iteration: 11561
loss: 1.046902060508728,grad_norm: 0.9999995109134248, iteration: 11562
loss: 1.1497094631195068,grad_norm: 0.9999997618073245, iteration: 11563
loss: 1.0603053569793701,grad_norm: 0.9999993518644542, iteration: 11564
loss: 1.0603967905044556,grad_norm: 0.9999994625762945, iteration: 11565
loss: 1.0435374975204468,grad_norm: 0.9999992248398181, iteration: 11566
loss: 1.0755728483200073,grad_norm: 0.9999992440054281, iteration: 11567
loss: 1.0188660621643066,grad_norm: 0.9999993746848814, iteration: 11568
loss: 0.9863259196281433,grad_norm: 0.999999217723064, iteration: 11569
loss: 1.0473382472991943,grad_norm: 0.9999993596558235, iteration: 11570
loss: 1.0031325817108154,grad_norm: 0.9999993860208494, iteration: 11571
loss: 1.0296012163162231,grad_norm: 0.9999992362653971, iteration: 11572
loss: 1.0554760694503784,grad_norm: 0.9999990937225516, iteration: 11573
loss: 1.055429220199585,grad_norm: 0.999999046418105, iteration: 11574
loss: 1.0230900049209595,grad_norm: 0.9116128637675843, iteration: 11575
loss: 0.9947700500488281,grad_norm: 0.9999993036199473, iteration: 11576
loss: 0.9941586256027222,grad_norm: 0.9999992618422605, iteration: 11577
loss: 1.0643231868743896,grad_norm: 0.99999925057724, iteration: 11578
loss: 1.0112133026123047,grad_norm: 0.9999992618398279, iteration: 11579
loss: 1.0339807271957397,grad_norm: 0.9999993547193571, iteration: 11580
loss: 1.0310595035552979,grad_norm: 0.9999991947417768, iteration: 11581
loss: 1.0804251432418823,grad_norm: 0.9999992436776337, iteration: 11582
loss: 1.0171103477478027,grad_norm: 0.9999994793042648, iteration: 11583
loss: 1.0834203958511353,grad_norm: 0.9999997143407221, iteration: 11584
loss: 1.049312710762024,grad_norm: 0.9999991824471538, iteration: 11585
loss: 1.0498746633529663,grad_norm: 0.9999993100531926, iteration: 11586
loss: 1.0218336582183838,grad_norm: 0.9999991855776423, iteration: 11587
loss: 1.0892919301986694,grad_norm: 0.9999996460484497, iteration: 11588
loss: 0.9814808368682861,grad_norm: 0.9999992042932626, iteration: 11589
loss: 1.0031530857086182,grad_norm: 0.9999990725490724, iteration: 11590
loss: 1.0374830961227417,grad_norm: 0.9999993912297993, iteration: 11591
loss: 1.017018437385559,grad_norm: 0.9999993444941582, iteration: 11592
loss: 1.0276676416397095,grad_norm: 0.9999991987278954, iteration: 11593
loss: 1.0003916025161743,grad_norm: 0.9999996523593938, iteration: 11594
loss: 1.042258858680725,grad_norm: 0.9999991161320136, iteration: 11595
loss: 1.0132976770401,grad_norm: 0.9999992284212247, iteration: 11596
loss: 1.0501803159713745,grad_norm: 0.999999139648512, iteration: 11597
loss: 1.0225186347961426,grad_norm: 0.9999992949382678, iteration: 11598
loss: 1.0721080303192139,grad_norm: 0.999999244101562, iteration: 11599
loss: 1.0252069234848022,grad_norm: 0.9999991779667684, iteration: 11600
loss: 1.0512338876724243,grad_norm: 0.9999992450057731, iteration: 11601
loss: 1.0657399892807007,grad_norm: 0.9999992356075041, iteration: 11602
loss: 1.0305086374282837,grad_norm: 0.9999992342833304, iteration: 11603
loss: 1.0364576578140259,grad_norm: 0.9999996152719007, iteration: 11604
loss: 1.028857946395874,grad_norm: 0.9999993120586234, iteration: 11605
loss: 0.985942542552948,grad_norm: 0.9999992732201491, iteration: 11606
loss: 0.99510657787323,grad_norm: 0.9999992400100576, iteration: 11607
loss: 1.0323835611343384,grad_norm: 0.9999994864823257, iteration: 11608
loss: 0.9736103415489197,grad_norm: 0.9999990861449397, iteration: 11609
loss: 1.025750756263733,grad_norm: 0.9999996530611213, iteration: 11610
loss: 0.9879553914070129,grad_norm: 0.9999991783363995, iteration: 11611
loss: 1.0352277755737305,grad_norm: 0.9999993363074805, iteration: 11612
loss: 1.0334707498550415,grad_norm: 0.999999318107507, iteration: 11613
loss: 1.0838245153427124,grad_norm: 0.9999995852252574, iteration: 11614
loss: 1.0592479705810547,grad_norm: 0.9999993992789501, iteration: 11615
loss: 0.9635547995567322,grad_norm: 0.9999990969737961, iteration: 11616
loss: 1.0461281538009644,grad_norm: 0.9999992260783666, iteration: 11617
loss: 1.0043283700942993,grad_norm: 0.9999991935271014, iteration: 11618
loss: 1.0599358081817627,grad_norm: 0.9999992268010547, iteration: 11619
loss: 1.0004198551177979,grad_norm: 0.9999993225702984, iteration: 11620
loss: 1.0429961681365967,grad_norm: 0.9999993209995388, iteration: 11621
loss: 1.0816103219985962,grad_norm: 0.999999536760355, iteration: 11622
loss: 0.9940225481987,grad_norm: 0.999999429981443, iteration: 11623
loss: 1.047698974609375,grad_norm: 0.9999995081888862, iteration: 11624
loss: 1.0383217334747314,grad_norm: 0.9999993027270313, iteration: 11625
loss: 1.080019235610962,grad_norm: 0.9999991400769089, iteration: 11626
loss: 1.0297070741653442,grad_norm: 0.9999996379229281, iteration: 11627
loss: 1.0753637552261353,grad_norm: 0.9999993658328254, iteration: 11628
loss: 0.9991126656532288,grad_norm: 0.9999994006378641, iteration: 11629
loss: 1.0177569389343262,grad_norm: 0.9999990685484085, iteration: 11630
loss: 1.0499337911605835,grad_norm: 0.999999483232358, iteration: 11631
loss: 1.0328305959701538,grad_norm: 0.9999993427745895, iteration: 11632
loss: 1.0513160228729248,grad_norm: 0.999999360340377, iteration: 11633
loss: 1.0397422313690186,grad_norm: 0.9999991633267139, iteration: 11634
loss: 0.9673932194709778,grad_norm: 0.9999994977683282, iteration: 11635
loss: 0.9716543555259705,grad_norm: 0.9999992168454126, iteration: 11636
loss: 1.0638234615325928,grad_norm: 0.9999990583257645, iteration: 11637
loss: 1.052659034729004,grad_norm: 0.9999995871839068, iteration: 11638
loss: 1.04543936252594,grad_norm: 0.9999990945982625, iteration: 11639
loss: 0.9919283986091614,grad_norm: 0.9999990530030985, iteration: 11640
loss: 1.0484070777893066,grad_norm: 0.9999993626707003, iteration: 11641
loss: 1.0785242319107056,grad_norm: 0.9999993466007567, iteration: 11642
loss: 1.0492147207260132,grad_norm: 0.9999992802685675, iteration: 11643
loss: 1.0626510381698608,grad_norm: 0.9999996975129287, iteration: 11644
loss: 1.0037742853164673,grad_norm: 0.9999991018290139, iteration: 11645
loss: 1.0373585224151611,grad_norm: 0.9999989931526958, iteration: 11646
loss: 1.0633642673492432,grad_norm: 0.9999993252894845, iteration: 11647
loss: 1.0257489681243896,grad_norm: 0.9999993779338316, iteration: 11648
loss: 1.0027339458465576,grad_norm: 0.9999992757027143, iteration: 11649
loss: 1.0400538444519043,grad_norm: 0.9999993767250714, iteration: 11650
loss: 0.989287257194519,grad_norm: 0.9999991873275748, iteration: 11651
loss: 1.0220415592193604,grad_norm: 0.9999992565793216, iteration: 11652
loss: 1.0474599599838257,grad_norm: 0.9999994167396407, iteration: 11653
loss: 1.0099334716796875,grad_norm: 0.999999115961799, iteration: 11654
loss: 1.0484659671783447,grad_norm: 0.9999993621735478, iteration: 11655
loss: 1.036689281463623,grad_norm: 0.9999993349228163, iteration: 11656
loss: 1.0311861038208008,grad_norm: 0.9999991655369108, iteration: 11657
loss: 1.0416935682296753,grad_norm: 0.9999992262485284, iteration: 11658
loss: 1.044034481048584,grad_norm: 0.999998931855339, iteration: 11659
loss: 1.0266036987304688,grad_norm: 0.9999992123551968, iteration: 11660
loss: 1.002713680267334,grad_norm: 0.9999992526176256, iteration: 11661
loss: 1.0368831157684326,grad_norm: 0.9999992803970643, iteration: 11662
loss: 1.001421570777893,grad_norm: 0.9999993419081172, iteration: 11663
loss: 1.0162949562072754,grad_norm: 0.9999992932931788, iteration: 11664
loss: 1.0190227031707764,grad_norm: 0.9999991710148295, iteration: 11665
loss: 1.071702480316162,grad_norm: 0.9999992693431309, iteration: 11666
loss: 1.0353754758834839,grad_norm: 0.9999991409435239, iteration: 11667
loss: 1.0922552347183228,grad_norm: 0.9999995022720567, iteration: 11668
loss: 1.0662692785263062,grad_norm: 0.9999991742366133, iteration: 11669
loss: 1.0526467561721802,grad_norm: 0.9999995216654851, iteration: 11670
loss: 1.037528157234192,grad_norm: 0.9999991236198368, iteration: 11671
loss: 1.041516661643982,grad_norm: 0.9999992829027616, iteration: 11672
loss: 1.0277959108352661,grad_norm: 0.9999991765815586, iteration: 11673
loss: 1.0291718244552612,grad_norm: 0.9999992381921168, iteration: 11674
loss: 1.0319023132324219,grad_norm: 0.9999992875308432, iteration: 11675
loss: 1.0468133687973022,grad_norm: 0.9999992959270914, iteration: 11676
loss: 1.0002762079238892,grad_norm: 0.9999992062723138, iteration: 11677
loss: 1.079291820526123,grad_norm: 0.9999991913649277, iteration: 11678
loss: 1.044424057006836,grad_norm: 0.9999994076168595, iteration: 11679
loss: 1.012589693069458,grad_norm: 0.999999041093862, iteration: 11680
loss: 1.066389799118042,grad_norm: 0.9999991460835919, iteration: 11681
loss: 1.0101028680801392,grad_norm: 0.999999193440098, iteration: 11682
loss: 1.0657936334609985,grad_norm: 0.999999390260437, iteration: 11683
loss: 1.0555217266082764,grad_norm: 0.9999993117835846, iteration: 11684
loss: 1.0447163581848145,grad_norm: 0.9999991614294308, iteration: 11685
loss: 0.9996387362480164,grad_norm: 0.9999992051245049, iteration: 11686
loss: 1.0271623134613037,grad_norm: 0.9999992568464123, iteration: 11687
loss: 1.017137050628662,grad_norm: 0.9999992202449849, iteration: 11688
loss: 1.0644724369049072,grad_norm: 0.9999991787645932, iteration: 11689
loss: 1.0490303039550781,grad_norm: 0.9999991433591902, iteration: 11690
loss: 1.0156903266906738,grad_norm: 0.9999992692287423, iteration: 11691
loss: 1.056176781654358,grad_norm: 0.999999117056465, iteration: 11692
loss: 1.114879846572876,grad_norm: 0.9999994086897257, iteration: 11693
loss: 1.034178614616394,grad_norm: 0.9999993057353292, iteration: 11694
loss: 1.0474021434783936,grad_norm: 0.9999994719070063, iteration: 11695
loss: 1.0199220180511475,grad_norm: 0.9999991507133723, iteration: 11696
loss: 1.002597689628601,grad_norm: 0.9999992208986337, iteration: 11697
loss: 0.9800096750259399,grad_norm: 0.99999955414723, iteration: 11698
loss: 1.1028742790222168,grad_norm: 0.9963914339695223, iteration: 11699
loss: 1.0857282876968384,grad_norm: 0.9999993783970706, iteration: 11700
loss: 1.0400364398956299,grad_norm: 0.9999991435875538, iteration: 11701
loss: 1.0107040405273438,grad_norm: 0.9999992748111046, iteration: 11702
loss: 1.0390833616256714,grad_norm: 0.9999993886897829, iteration: 11703
loss: 1.0314013957977295,grad_norm: 0.9999991344273212, iteration: 11704
loss: 1.0456571578979492,grad_norm: 0.9999991991047588, iteration: 11705
loss: 1.0413146018981934,grad_norm: 0.9999992691491064, iteration: 11706
loss: 1.0559126138687134,grad_norm: 0.9999992944685163, iteration: 11707
loss: 1.0353021621704102,grad_norm: 0.9999993422670296, iteration: 11708
loss: 1.005170464515686,grad_norm: 0.9819868213937692, iteration: 11709
loss: 1.0416533946990967,grad_norm: 0.9999994421752447, iteration: 11710
loss: 0.9929169416427612,grad_norm: 0.9999991019449864, iteration: 11711
loss: 1.018829345703125,grad_norm: 0.9999990934619433, iteration: 11712
loss: 1.0871021747589111,grad_norm: 0.9999995763615915, iteration: 11713
loss: 1.0744069814682007,grad_norm: 0.9999992593723173, iteration: 11714
loss: 1.0169739723205566,grad_norm: 0.9999993104404832, iteration: 11715
loss: 1.0110572576522827,grad_norm: 0.9999992929346955, iteration: 11716
loss: 1.0030235052108765,grad_norm: 0.9999990623726733, iteration: 11717
loss: 1.0152817964553833,grad_norm: 0.9999993207932896, iteration: 11718
loss: 0.9792130589485168,grad_norm: 0.9999993741891497, iteration: 11719
loss: 0.9945870637893677,grad_norm: 0.9999991691246967, iteration: 11720
loss: 1.0047003030776978,grad_norm: 0.9999992308490048, iteration: 11721
loss: 1.0018638372421265,grad_norm: 0.9999992025843227, iteration: 11722
loss: 1.0390918254852295,grad_norm: 0.9999993450540492, iteration: 11723
loss: 1.009063720703125,grad_norm: 0.9999993918416242, iteration: 11724
loss: 1.008396863937378,grad_norm: 0.9999996694481427, iteration: 11725
loss: 1.0407060384750366,grad_norm: 0.9999991696302468, iteration: 11726
loss: 1.0513098239898682,grad_norm: 0.999999232153511, iteration: 11727
loss: 1.0309689044952393,grad_norm: 0.9999990238086666, iteration: 11728
loss: 1.0414416790008545,grad_norm: 0.9999994414573219, iteration: 11729
loss: 1.0629215240478516,grad_norm: 0.999999107189532, iteration: 11730
loss: 1.0640373229980469,grad_norm: 0.9999993871203112, iteration: 11731
loss: 1.069701910018921,grad_norm: 0.9999994796063427, iteration: 11732
loss: 1.0314429998397827,grad_norm: 0.999999277273211, iteration: 11733
loss: 1.0540989637374878,grad_norm: 0.9999990862439357, iteration: 11734
loss: 1.0871174335479736,grad_norm: 0.9999995925780246, iteration: 11735
loss: 1.055869698524475,grad_norm: 0.9999996498587912, iteration: 11736
loss: 1.0310465097427368,grad_norm: 0.9999992659859904, iteration: 11737
loss: 0.9789847731590271,grad_norm: 0.9999990871986847, iteration: 11738
loss: 1.0048167705535889,grad_norm: 0.9999992561064874, iteration: 11739
loss: 0.9894535541534424,grad_norm: 0.9999991588987924, iteration: 11740
loss: 1.0120006799697876,grad_norm: 0.9999991246819648, iteration: 11741
loss: 1.0142711400985718,grad_norm: 0.9999993710118564, iteration: 11742
loss: 1.090680480003357,grad_norm: 0.9999996741285643, iteration: 11743
loss: 1.0388545989990234,grad_norm: 0.9999993080450642, iteration: 11744
loss: 1.0853465795516968,grad_norm: 0.9999995350859845, iteration: 11745
loss: 1.0593620538711548,grad_norm: 0.9999992622225232, iteration: 11746
loss: 1.0595852136611938,grad_norm: 0.9999993507558802, iteration: 11747
loss: 1.0638644695281982,grad_norm: 0.999999289017479, iteration: 11748
loss: 1.079606294631958,grad_norm: 0.9999998118938501, iteration: 11749
loss: 1.0743522644042969,grad_norm: 0.9999996135862407, iteration: 11750
loss: 1.0243260860443115,grad_norm: 0.999999112232055, iteration: 11751
loss: 1.0090045928955078,grad_norm: 0.9999991847711736, iteration: 11752
loss: 0.988335907459259,grad_norm: 0.9999990761986833, iteration: 11753
loss: 1.0194556713104248,grad_norm: 0.9999991458787608, iteration: 11754
loss: 1.079948902130127,grad_norm: 0.9999996114850614, iteration: 11755
loss: 1.021024227142334,grad_norm: 0.9999996708549732, iteration: 11756
loss: 0.9980260729789734,grad_norm: 0.9999991730427347, iteration: 11757
loss: 1.0648154020309448,grad_norm: 0.9999994158424185, iteration: 11758
loss: 1.0368373394012451,grad_norm: 0.9999991978490039, iteration: 11759
loss: 0.9938740134239197,grad_norm: 0.9999991090066432, iteration: 11760
loss: 1.020871639251709,grad_norm: 0.9999993312437121, iteration: 11761
loss: 1.0171120166778564,grad_norm: 0.9999992878570858, iteration: 11762
loss: 1.023419737815857,grad_norm: 0.999999131165009, iteration: 11763
loss: 0.9936472773551941,grad_norm: 0.9999991260849561, iteration: 11764
loss: 1.0542678833007812,grad_norm: 0.9999991230085488, iteration: 11765
loss: 1.0312855243682861,grad_norm: 0.9999991167159029, iteration: 11766
loss: 1.0940461158752441,grad_norm: 0.9999997449220969, iteration: 11767
loss: 1.069377064704895,grad_norm: 0.9999993310752927, iteration: 11768
loss: 1.0362768173217773,grad_norm: 0.9999992794228769, iteration: 11769
loss: 1.039709210395813,grad_norm: 0.9999992526364272, iteration: 11770
loss: 1.0613049268722534,grad_norm: 0.999999244610219, iteration: 11771
loss: 1.0577691793441772,grad_norm: 0.9999993581672743, iteration: 11772
loss: 1.0561481714248657,grad_norm: 0.9999994039027577, iteration: 11773
loss: 1.0140935182571411,grad_norm: 0.9999993853809424, iteration: 11774
loss: 1.0659133195877075,grad_norm: 0.9999991607952194, iteration: 11775
loss: 1.0910238027572632,grad_norm: 0.9999994047713815, iteration: 11776
loss: 0.9945288896560669,grad_norm: 0.9947365389284962, iteration: 11777
loss: 1.0017571449279785,grad_norm: 0.9999991435676964, iteration: 11778
loss: 1.0132144689559937,grad_norm: 0.9999991685446773, iteration: 11779
loss: 1.0079314708709717,grad_norm: 0.9999993015286975, iteration: 11780
loss: 1.0554089546203613,grad_norm: 0.9999992623257565, iteration: 11781
loss: 1.0373072624206543,grad_norm: 0.999999613442501, iteration: 11782
loss: 1.0024021863937378,grad_norm: 0.9999992026789255, iteration: 11783
loss: 1.0350358486175537,grad_norm: 0.9999990447259424, iteration: 11784
loss: 1.0616220235824585,grad_norm: 0.9999991391227675, iteration: 11785
loss: 0.9924018979072571,grad_norm: 0.9999990780043363, iteration: 11786
loss: 1.0079718828201294,grad_norm: 0.9999992819629566, iteration: 11787
loss: 1.1061458587646484,grad_norm: 0.9999995474493082, iteration: 11788
loss: 1.037968635559082,grad_norm: 0.9999992084313198, iteration: 11789
loss: 1.0303099155426025,grad_norm: 0.9999995094460895, iteration: 11790
loss: 1.0344061851501465,grad_norm: 0.9999995739320887, iteration: 11791
loss: 0.9846659302711487,grad_norm: 0.9999990919099405, iteration: 11792
loss: 1.0251703262329102,grad_norm: 0.9999992840912805, iteration: 11793
loss: 1.0206959247589111,grad_norm: 0.9999990507257226, iteration: 11794
loss: 1.011863350868225,grad_norm: 0.9999992607664482, iteration: 11795
loss: 1.0491710901260376,grad_norm: 0.9999992231325977, iteration: 11796
loss: 1.025923728942871,grad_norm: 0.9999994485509801, iteration: 11797
loss: 1.0458263158798218,grad_norm: 0.9999991479313942, iteration: 11798
loss: 0.9965076446533203,grad_norm: 0.9999992479653649, iteration: 11799
loss: 1.0037524700164795,grad_norm: 0.9999990935165902, iteration: 11800
loss: 1.0206876993179321,grad_norm: 0.9999991358528444, iteration: 11801
loss: 1.0693386793136597,grad_norm: 0.9999992043490435, iteration: 11802
loss: 0.9797778129577637,grad_norm: 0.9999991998828021, iteration: 11803
loss: 1.0834522247314453,grad_norm: 0.9999992592582565, iteration: 11804
loss: 0.9487376809120178,grad_norm: 0.9999992823860713, iteration: 11805
loss: 1.027557134628296,grad_norm: 0.9999994042639154, iteration: 11806
loss: 0.9924389719963074,grad_norm: 0.9999994938648767, iteration: 11807
loss: 1.0646488666534424,grad_norm: 0.9999997022562557, iteration: 11808
loss: 1.0428763628005981,grad_norm: 0.9999992974351901, iteration: 11809
loss: 0.9994291663169861,grad_norm: 0.9999992656322898, iteration: 11810
loss: 1.0398952960968018,grad_norm: 0.9999997417583005, iteration: 11811
loss: 1.024240493774414,grad_norm: 0.9999996546319009, iteration: 11812
loss: 1.0250331163406372,grad_norm: 0.9999995494837465, iteration: 11813
loss: 1.0112049579620361,grad_norm: 0.9999991048765393, iteration: 11814
loss: 1.0363281965255737,grad_norm: 0.9999991998873198, iteration: 11815
loss: 1.0149943828582764,grad_norm: 0.9999993348877941, iteration: 11816
loss: 1.0403251647949219,grad_norm: 0.9999994453725081, iteration: 11817
loss: 1.006912112236023,grad_norm: 0.9999990759266926, iteration: 11818
loss: 1.0358396768569946,grad_norm: 0.9999994727759413, iteration: 11819
loss: 0.9725310206413269,grad_norm: 0.9999991929173097, iteration: 11820
loss: 1.0638779401779175,grad_norm: 0.99999941068227, iteration: 11821
loss: 1.0585047006607056,grad_norm: 0.9999993364680472, iteration: 11822
loss: 1.0113903284072876,grad_norm: 0.9999991612593225, iteration: 11823
loss: 1.0382747650146484,grad_norm: 0.9999992734956868, iteration: 11824
loss: 1.0237728357315063,grad_norm: 0.9999993347519889, iteration: 11825
loss: 1.06858229637146,grad_norm: 0.9999995495390736, iteration: 11826
loss: 1.0680729150772095,grad_norm: 0.9999992878072657, iteration: 11827
loss: 1.0409274101257324,grad_norm: 0.9999992971244129, iteration: 11828
loss: 1.036392092704773,grad_norm: 0.9999994119375666, iteration: 11829
loss: 1.0278010368347168,grad_norm: 0.9999991396421247, iteration: 11830
loss: 1.0664091110229492,grad_norm: 0.9999992037677263, iteration: 11831
loss: 1.1161127090454102,grad_norm: 0.9999996093072829, iteration: 11832
loss: 1.0044718980789185,grad_norm: 0.9999993996829567, iteration: 11833
loss: 1.0190855264663696,grad_norm: 0.9999993068974434, iteration: 11834
loss: 1.0259493589401245,grad_norm: 0.9999992997663262, iteration: 11835
loss: 1.0500197410583496,grad_norm: 0.9999995979716297, iteration: 11836
loss: 1.085687518119812,grad_norm: 0.9999996847193041, iteration: 11837
loss: 1.0401215553283691,grad_norm: 0.9999991396203496, iteration: 11838
loss: 1.041609525680542,grad_norm: 0.9999990473410825, iteration: 11839
loss: 1.0564051866531372,grad_norm: 0.9999993732471153, iteration: 11840
loss: 1.0059847831726074,grad_norm: 0.9999991643632383, iteration: 11841
loss: 1.0218908786773682,grad_norm: 0.999999246965997, iteration: 11842
loss: 1.0322506427764893,grad_norm: 0.9999992912576883, iteration: 11843
loss: 1.055584192276001,grad_norm: 0.9999990829032656, iteration: 11844
loss: 1.0131595134735107,grad_norm: 0.9999991662521251, iteration: 11845
loss: 1.03082275390625,grad_norm: 0.9999993596641522, iteration: 11846
loss: 1.0372111797332764,grad_norm: 0.999999190751524, iteration: 11847
loss: 1.0272103548049927,grad_norm: 0.9999991402754896, iteration: 11848
loss: 1.0492459535598755,grad_norm: 0.9999994692803075, iteration: 11849
loss: 0.9692091941833496,grad_norm: 0.9999991917572962, iteration: 11850
loss: 1.0728288888931274,grad_norm: 0.9999998699370549, iteration: 11851
loss: 1.0083671808242798,grad_norm: 0.9999992998486248, iteration: 11852
loss: 1.016459345817566,grad_norm: 0.9999994034388603, iteration: 11853
loss: 1.0124677419662476,grad_norm: 0.9999992733003721, iteration: 11854
loss: 1.017347812652588,grad_norm: 0.9999993505847952, iteration: 11855
loss: 1.0251219272613525,grad_norm: 0.9999992682781235, iteration: 11856
loss: 1.0053774118423462,grad_norm: 0.9999993175110233, iteration: 11857
loss: 1.0483046770095825,grad_norm: 0.9999992698400888, iteration: 11858
loss: 1.0431045293807983,grad_norm: 0.9999996705387656, iteration: 11859
loss: 1.0389065742492676,grad_norm: 0.9999995911727889, iteration: 11860
loss: 1.0429686307907104,grad_norm: 0.9999992009561098, iteration: 11861
loss: 1.028266191482544,grad_norm: 0.9999990946067762, iteration: 11862
loss: 1.0552105903625488,grad_norm: 0.9999994015691142, iteration: 11863
loss: 1.0227314233779907,grad_norm: 0.999999307303263, iteration: 11864
loss: 1.0202134847640991,grad_norm: 0.9999991998050565, iteration: 11865
loss: 0.9953659176826477,grad_norm: 0.999999211362228, iteration: 11866
loss: 1.0452003479003906,grad_norm: 0.9999992455508161, iteration: 11867
loss: 1.0219154357910156,grad_norm: 0.9999996192112786, iteration: 11868
loss: 1.0436152219772339,grad_norm: 0.9999992450794299, iteration: 11869
loss: 1.0439597368240356,grad_norm: 0.9999994501220673, iteration: 11870
loss: 1.0517065525054932,grad_norm: 0.9999992625428935, iteration: 11871
loss: 1.0561742782592773,grad_norm: 0.9999993882614575, iteration: 11872
loss: 0.9873111248016357,grad_norm: 0.9999993778704919, iteration: 11873
loss: 1.042725920677185,grad_norm: 0.9999992881069842, iteration: 11874
loss: 1.032306432723999,grad_norm: 0.9999992272395565, iteration: 11875
loss: 1.0378168821334839,grad_norm: 0.9999991832381877, iteration: 11876
loss: 1.0507396459579468,grad_norm: 0.9999990948391198, iteration: 11877
loss: 1.0308470726013184,grad_norm: 0.999999430677136, iteration: 11878
loss: 1.0511517524719238,grad_norm: 0.9999995438247787, iteration: 11879
loss: 1.0318411588668823,grad_norm: 0.999999398104263, iteration: 11880
loss: 1.024532675743103,grad_norm: 0.9999991536843191, iteration: 11881
loss: 1.005948543548584,grad_norm: 0.9999993714239654, iteration: 11882
loss: 1.0171457529067993,grad_norm: 0.9999992100120172, iteration: 11883
loss: 1.015796184539795,grad_norm: 0.999999434322014, iteration: 11884
loss: 1.0646562576293945,grad_norm: 0.9999992357984119, iteration: 11885
loss: 1.0537065267562866,grad_norm: 0.9999991817377228, iteration: 11886
loss: 1.0809303522109985,grad_norm: 0.9999994806565355, iteration: 11887
loss: 1.02317476272583,grad_norm: 0.9999991833195605, iteration: 11888
loss: 1.0463087558746338,grad_norm: 0.9999990874602462, iteration: 11889
loss: 1.0544265508651733,grad_norm: 0.999999284775889, iteration: 11890
loss: 1.0303648710250854,grad_norm: 0.9999991841169223, iteration: 11891
loss: 1.031569242477417,grad_norm: 0.9999994046598126, iteration: 11892
loss: 1.0475479364395142,grad_norm: 0.9655258000011336, iteration: 11893
loss: 1.0286678075790405,grad_norm: 0.999999314776834, iteration: 11894
loss: 1.006443738937378,grad_norm: 0.9999991692611054, iteration: 11895
loss: 1.02838134765625,grad_norm: 0.9999992593435787, iteration: 11896
loss: 1.0855897665023804,grad_norm: 0.9999992060552695, iteration: 11897
loss: 1.0454738140106201,grad_norm: 0.999999441754468, iteration: 11898
loss: 1.0553725957870483,grad_norm: 0.999999213299445, iteration: 11899
loss: 1.0028246641159058,grad_norm: 0.99999917779026, iteration: 11900
loss: 1.0583196878433228,grad_norm: 0.9999993837087285, iteration: 11901
loss: 1.0553433895111084,grad_norm: 0.999999191044318, iteration: 11902
loss: 1.0330098867416382,grad_norm: 0.9999991359166184, iteration: 11903
loss: 1.0445243120193481,grad_norm: 0.9999992545448421, iteration: 11904
loss: 1.02459716796875,grad_norm: 0.9999993845874416, iteration: 11905
loss: 1.0261380672454834,grad_norm: 0.9999992472880447, iteration: 11906
loss: 0.9893192052841187,grad_norm: 0.9999990875187239, iteration: 11907
loss: 1.0152913331985474,grad_norm: 0.9999992010210733, iteration: 11908
loss: 0.9934412837028503,grad_norm: 0.9999992196105353, iteration: 11909
loss: 0.9796105027198792,grad_norm: 0.9999991363606228, iteration: 11910
loss: 0.987453818321228,grad_norm: 0.9999992295365778, iteration: 11911
loss: 1.039494514465332,grad_norm: 0.999999305272618, iteration: 11912
loss: 1.0250086784362793,grad_norm: 0.9999992033723317, iteration: 11913
loss: 0.9937998652458191,grad_norm: 0.9999991072158592, iteration: 11914
loss: 1.0929462909698486,grad_norm: 0.9999994071566515, iteration: 11915
loss: 1.0222564935684204,grad_norm: 0.9999995968607068, iteration: 11916
loss: 0.981037974357605,grad_norm: 0.9999992384598402, iteration: 11917
loss: 0.9928815364837646,grad_norm: 0.9999991589072232, iteration: 11918
loss: 1.0758414268493652,grad_norm: 0.9999992852669995, iteration: 11919
loss: 1.0897449254989624,grad_norm: 0.999999653398029, iteration: 11920
loss: 1.0695781707763672,grad_norm: 0.9999992913095065, iteration: 11921
loss: 1.0560276508331299,grad_norm: 0.99999925891257, iteration: 11922
loss: 1.0596821308135986,grad_norm: 0.9999991410632925, iteration: 11923
loss: 1.0742149353027344,grad_norm: 0.9999993389419286, iteration: 11924
loss: 0.9914937615394592,grad_norm: 0.9999990473213056, iteration: 11925
loss: 1.0591702461242676,grad_norm: 0.9999997057493261, iteration: 11926
loss: 1.0065549612045288,grad_norm: 0.999999283428324, iteration: 11927
loss: 1.000679850578308,grad_norm: 0.9999992867203185, iteration: 11928
loss: 1.0325191020965576,grad_norm: 0.9999993537170583, iteration: 11929
loss: 1.0439311265945435,grad_norm: 0.9999992835298604, iteration: 11930
loss: 1.0066065788269043,grad_norm: 0.9999992741560282, iteration: 11931
loss: 1.0279513597488403,grad_norm: 0.9999994810479129, iteration: 11932
loss: 1.0456174612045288,grad_norm: 0.9999990920671011, iteration: 11933
loss: 1.0384153127670288,grad_norm: 0.9999991287209863, iteration: 11934
loss: 1.0305640697479248,grad_norm: 0.9999993427021396, iteration: 11935
loss: 1.0256478786468506,grad_norm: 0.9999991952280706, iteration: 11936
loss: 1.012773036956787,grad_norm: 0.999999356187345, iteration: 11937
loss: 1.017735242843628,grad_norm: 0.9999990743876304, iteration: 11938
loss: 1.054551124572754,grad_norm: 0.9999993979245034, iteration: 11939
loss: 0.9929120540618896,grad_norm: 0.9150648758678098, iteration: 11940
loss: 1.0617766380310059,grad_norm: 0.999999481361059, iteration: 11941
loss: 1.0426210165023804,grad_norm: 0.9999992151200998, iteration: 11942
loss: 1.043625831604004,grad_norm: 0.9999993113399284, iteration: 11943
loss: 1.0896250009536743,grad_norm: 0.9999994480412467, iteration: 11944
loss: 0.9709091782569885,grad_norm: 0.9999991095797715, iteration: 11945
loss: 1.041050672531128,grad_norm: 0.9999996757594279, iteration: 11946
loss: 1.0304080247879028,grad_norm: 0.9999991979923091, iteration: 11947
loss: 1.0170735120773315,grad_norm: 0.9999992428101645, iteration: 11948
loss: 1.08355712890625,grad_norm: 0.9999991521968223, iteration: 11949
loss: 1.0337200164794922,grad_norm: 0.9999991248382779, iteration: 11950
loss: 1.0721863508224487,grad_norm: 0.9999993449294478, iteration: 11951
loss: 1.0040580034255981,grad_norm: 0.9999991809220804, iteration: 11952
loss: 1.0252299308776855,grad_norm: 0.9999992517559299, iteration: 11953
loss: 1.0385624170303345,grad_norm: 0.9999993087801273, iteration: 11954
loss: 1.043237566947937,grad_norm: 0.9999992247934801, iteration: 11955
loss: 1.0636460781097412,grad_norm: 0.9999993173046887, iteration: 11956
loss: 1.0969675779342651,grad_norm: 0.9999996205036724, iteration: 11957
loss: 1.0305269956588745,grad_norm: 0.999999059549587, iteration: 11958
loss: 1.0226763486862183,grad_norm: 0.9999995433094306, iteration: 11959
loss: 0.9872070550918579,grad_norm: 0.9999991446766698, iteration: 11960
loss: 1.0405864715576172,grad_norm: 0.9999993075946131, iteration: 11961
loss: 1.020554542541504,grad_norm: 0.9999990887742352, iteration: 11962
loss: 1.0045360326766968,grad_norm: 0.9999995786515378, iteration: 11963
loss: 1.051084280014038,grad_norm: 0.9999993349552949, iteration: 11964
loss: 1.0484097003936768,grad_norm: 0.9999992285369983, iteration: 11965
loss: 0.9962328672409058,grad_norm: 0.9999995127418289, iteration: 11966
loss: 1.0289971828460693,grad_norm: 0.9999992916335655, iteration: 11967
loss: 1.0390406847000122,grad_norm: 0.9999992592658782, iteration: 11968
loss: 1.0418072938919067,grad_norm: 0.9999992409322082, iteration: 11969
loss: 0.9976027607917786,grad_norm: 0.9999991915688732, iteration: 11970
loss: 1.065442681312561,grad_norm: 0.9999990886657217, iteration: 11971
loss: 1.0488245487213135,grad_norm: 0.9999992396476094, iteration: 11972
loss: 1.0078431367874146,grad_norm: 0.9999996473488267, iteration: 11973
loss: 1.021601676940918,grad_norm: 0.9999997241458755, iteration: 11974
loss: 1.016298532485962,grad_norm: 0.9999992097691324, iteration: 11975
loss: 1.0336769819259644,grad_norm: 0.9792848184058265, iteration: 11976
loss: 1.0070791244506836,grad_norm: 0.999999293017302, iteration: 11977
loss: 1.0366140604019165,grad_norm: 0.9999991754426494, iteration: 11978
loss: 1.0833775997161865,grad_norm: 0.9999995410459991, iteration: 11979
loss: 0.9450832605361938,grad_norm: 0.9999991658677782, iteration: 11980
loss: 1.0513880252838135,grad_norm: 0.9999992115334724, iteration: 11981
loss: 0.9903720021247864,grad_norm: 0.999999377640436, iteration: 11982
loss: 1.075235366821289,grad_norm: 0.9999995432743033, iteration: 11983
loss: 1.062930703163147,grad_norm: 0.9999995797902346, iteration: 11984
loss: 1.066915512084961,grad_norm: 0.999999278866838, iteration: 11985
loss: 0.9894294738769531,grad_norm: 0.9999991400533821, iteration: 11986
loss: 1.0208032131195068,grad_norm: 0.9999992814556625, iteration: 11987
loss: 0.9653717279434204,grad_norm: 0.9999991303105269, iteration: 11988
loss: 1.0925414562225342,grad_norm: 0.9999993089447806, iteration: 11989
loss: 1.062914252281189,grad_norm: 0.9999992750544979, iteration: 11990
loss: 0.9785511493682861,grad_norm: 0.9999990875232412, iteration: 11991
loss: 1.0309319496154785,grad_norm: 0.9999994170341878, iteration: 11992
loss: 1.0552639961242676,grad_norm: 0.9999993208327517, iteration: 11993
loss: 1.042527675628662,grad_norm: 0.999999171965271, iteration: 11994
loss: 1.0324559211730957,grad_norm: 0.9999992076890193, iteration: 11995
loss: 0.9865468740463257,grad_norm: 0.9999998557669263, iteration: 11996
loss: 1.064559817314148,grad_norm: 0.9999992526602532, iteration: 11997
loss: 1.0435994863510132,grad_norm: 0.9999992433963423, iteration: 11998
loss: 1.060879111289978,grad_norm: 0.999999367580954, iteration: 11999
loss: 1.0614572763442993,grad_norm: 0.9999992384987406, iteration: 12000
loss: 0.9896976351737976,grad_norm: 0.9999991989289361, iteration: 12001
loss: 1.0682742595672607,grad_norm: 0.9999992896744375, iteration: 12002
loss: 0.9780789017677307,grad_norm: 0.9999991859233107, iteration: 12003
loss: 1.1034208536148071,grad_norm: 0.9999998302578407, iteration: 12004
loss: 1.0065573453903198,grad_norm: 0.9999992493335415, iteration: 12005
loss: 1.0165377855300903,grad_norm: 0.9999995962385806, iteration: 12006
loss: 1.0007973909378052,grad_norm: 0.9999994498171707, iteration: 12007
loss: 1.0010581016540527,grad_norm: 0.9999992078653369, iteration: 12008
loss: 1.0328701734542847,grad_norm: 0.9999990957409752, iteration: 12009
loss: 1.0164661407470703,grad_norm: 0.9999993450539196, iteration: 12010
loss: 1.0500017404556274,grad_norm: 0.9999995263192233, iteration: 12011
loss: 0.9995602965354919,grad_norm: 0.9999991315086376, iteration: 12012
loss: 1.0531630516052246,grad_norm: 0.9999993601519803, iteration: 12013
loss: 1.048460602760315,grad_norm: 0.999999328167791, iteration: 12014
loss: 1.0343623161315918,grad_norm: 0.999999246724726, iteration: 12015
loss: 1.0231130123138428,grad_norm: 0.9999995819546931, iteration: 12016
loss: 1.0491515398025513,grad_norm: 0.9999991280460013, iteration: 12017
loss: 0.9996435046195984,grad_norm: 0.9999993735971571, iteration: 12018
loss: 1.0341771841049194,grad_norm: 0.9999993063277283, iteration: 12019
loss: 1.0231837034225464,grad_norm: 0.9999992624607555, iteration: 12020
loss: 0.9972477555274963,grad_norm: 0.9999993014252919, iteration: 12021
loss: 0.9984827637672424,grad_norm: 0.9999991964597228, iteration: 12022
loss: 1.0488470792770386,grad_norm: 0.9999991855157772, iteration: 12023
loss: 1.013657808303833,grad_norm: 0.9999994222709134, iteration: 12024
loss: 0.9922521710395813,grad_norm: 0.9999992185360886, iteration: 12025
loss: 1.0275391340255737,grad_norm: 0.9999993143442899, iteration: 12026
loss: 1.0849322080612183,grad_norm: 0.9999995591057734, iteration: 12027
loss: 1.037060260772705,grad_norm: 0.999999112739003, iteration: 12028
loss: 1.00308096408844,grad_norm: 0.9999991593927163, iteration: 12029
loss: 1.0575708150863647,grad_norm: 0.9999992559435987, iteration: 12030
loss: 1.0291918516159058,grad_norm: 0.978915814189025, iteration: 12031
loss: 0.9988438487052917,grad_norm: 0.9999992786742411, iteration: 12032
loss: 1.085060715675354,grad_norm: 0.9999997119410854, iteration: 12033
loss: 1.024415135383606,grad_norm: 0.9999992882448983, iteration: 12034
loss: 1.0227651596069336,grad_norm: 0.99999935441534, iteration: 12035
loss: 0.9967126250267029,grad_norm: 0.9724717460491809, iteration: 12036
loss: 0.9948481321334839,grad_norm: 0.9999992879603089, iteration: 12037
loss: 1.0461006164550781,grad_norm: 0.9999992356307073, iteration: 12038
loss: 1.0266057252883911,grad_norm: 0.9999992249609257, iteration: 12039
loss: 1.0428845882415771,grad_norm: 0.9999998294176299, iteration: 12040
loss: 1.035099744796753,grad_norm: 0.9999993697060278, iteration: 12041
loss: 1.0521525144577026,grad_norm: 0.999999347285947, iteration: 12042
loss: 1.0042544603347778,grad_norm: 0.9999992005968443, iteration: 12043
loss: 1.05635404586792,grad_norm: 0.9999995199099709, iteration: 12044
loss: 0.9755169153213501,grad_norm: 0.9999994009871374, iteration: 12045
loss: 1.024519920349121,grad_norm: 0.9999995738281494, iteration: 12046
loss: 1.0402917861938477,grad_norm: 0.9999995309334921, iteration: 12047
loss: 0.9811151027679443,grad_norm: 0.9999993270330526, iteration: 12048
loss: 1.0424659252166748,grad_norm: 0.999999243664461, iteration: 12049
loss: 1.001702904701233,grad_norm: 0.9999991245463689, iteration: 12050
loss: 1.029693603515625,grad_norm: 0.9999992173739792, iteration: 12051
loss: 1.0441828966140747,grad_norm: 0.9999992850471627, iteration: 12052
loss: 1.013534665107727,grad_norm: 0.9999997024728862, iteration: 12053
loss: 1.053766131401062,grad_norm: 0.9999991455627254, iteration: 12054
loss: 0.9915621876716614,grad_norm: 0.9999993090044111, iteration: 12055
loss: 1.0603580474853516,grad_norm: 0.9999991306518393, iteration: 12056
loss: 1.0562182664871216,grad_norm: 0.9999992986192269, iteration: 12057
loss: 1.0379844903945923,grad_norm: 0.9999991199139582, iteration: 12058
loss: 1.0250452756881714,grad_norm: 0.9999993711138553, iteration: 12059
loss: 1.0089044570922852,grad_norm: 0.9999992116003522, iteration: 12060
loss: 1.0443538427352905,grad_norm: 0.9999993948763427, iteration: 12061
loss: 1.0445904731750488,grad_norm: 0.9999993615233661, iteration: 12062
loss: 1.005295991897583,grad_norm: 0.9999994808759434, iteration: 12063
loss: 1.0637626647949219,grad_norm: 0.9999998135921638, iteration: 12064
loss: 1.0783655643463135,grad_norm: 0.9999992765323749, iteration: 12065
loss: 1.0827535390853882,grad_norm: 0.9999994862940567, iteration: 12066
loss: 1.0372002124786377,grad_norm: 0.9999990409291685, iteration: 12067
loss: 1.0155186653137207,grad_norm: 0.9999992004933461, iteration: 12068
loss: 1.0233350992202759,grad_norm: 0.9999995724418996, iteration: 12069
loss: 1.0051008462905884,grad_norm: 0.9999992075801161, iteration: 12070
loss: 1.0448096990585327,grad_norm: 0.999999375213242, iteration: 12071
loss: 1.043858289718628,grad_norm: 0.9999991940183864, iteration: 12072
loss: 1.017768383026123,grad_norm: 0.9999997043819743, iteration: 12073
loss: 1.0212386846542358,grad_norm: 0.9999992105532661, iteration: 12074
loss: 1.068753719329834,grad_norm: 0.9999989722789902, iteration: 12075
loss: 1.0019463300704956,grad_norm: 0.9999992423571277, iteration: 12076
loss: 1.0566946268081665,grad_norm: 0.9999993814431043, iteration: 12077
loss: 0.970140814781189,grad_norm: 0.9999991193336879, iteration: 12078
loss: 1.0411553382873535,grad_norm: 0.999999268322037, iteration: 12079
loss: 1.0537316799163818,grad_norm: 0.9999993744661627, iteration: 12080
loss: 1.0390464067459106,grad_norm: 0.9999994409305036, iteration: 12081
loss: 1.0353840589523315,grad_norm: 0.9999992323998619, iteration: 12082
loss: 1.0035337209701538,grad_norm: 0.9999991057201795, iteration: 12083
loss: 0.9956668019294739,grad_norm: 0.9999991642219741, iteration: 12084
loss: 1.0238014459609985,grad_norm: 0.9999997983024062, iteration: 12085
loss: 1.0357375144958496,grad_norm: 0.9999991941024979, iteration: 12086
loss: 0.9985272288322449,grad_norm: 0.9999995108076927, iteration: 12087
loss: 1.0464951992034912,grad_norm: 0.9999996620755923, iteration: 12088
loss: 1.0023467540740967,grad_norm: 0.9999990950434072, iteration: 12089
loss: 1.0288701057434082,grad_norm: 0.9999992580327474, iteration: 12090
loss: 0.9792459011077881,grad_norm: 0.999999387189057, iteration: 12091
loss: 1.071808934211731,grad_norm: 0.999999206653031, iteration: 12092
loss: 1.0234630107879639,grad_norm: 0.9999993079575508, iteration: 12093
loss: 1.013967514038086,grad_norm: 0.9999993324562083, iteration: 12094
loss: 1.0221796035766602,grad_norm: 0.9999995542865658, iteration: 12095
loss: 1.0208370685577393,grad_norm: 0.9999993333492643, iteration: 12096
loss: 1.0746458768844604,grad_norm: 0.9999997780816055, iteration: 12097
loss: 1.0704814195632935,grad_norm: 0.9999996026835583, iteration: 12098
loss: 1.0799477100372314,grad_norm: 0.9999993985911623, iteration: 12099
loss: 1.0648458003997803,grad_norm: 0.9999995836503398, iteration: 12100
loss: 1.0624685287475586,grad_norm: 0.9999992645987025, iteration: 12101
loss: 1.0950030088424683,grad_norm: 0.9999997270881656, iteration: 12102
loss: 1.0434359312057495,grad_norm: 0.9999992698697261, iteration: 12103
loss: 0.9936379790306091,grad_norm: 0.999999634743276, iteration: 12104
loss: 1.0538841485977173,grad_norm: 0.9999995615146887, iteration: 12105
loss: 1.0372837781906128,grad_norm: 0.9999992579140974, iteration: 12106
loss: 1.0029873847961426,grad_norm: 0.9999992446050527, iteration: 12107
loss: 0.9810469746589661,grad_norm: 0.9999993655428598, iteration: 12108
loss: 1.0395536422729492,grad_norm: 0.9999996860538902, iteration: 12109
loss: 1.0424517393112183,grad_norm: 0.9999992548300844, iteration: 12110
loss: 1.0555464029312134,grad_norm: 0.9999992982485703, iteration: 12111
loss: 0.9903913140296936,grad_norm: 0.9999992440527949, iteration: 12112
loss: 1.0753262042999268,grad_norm: 0.9999995973457614, iteration: 12113
loss: 1.0668683052062988,grad_norm: 0.9999994807722107, iteration: 12114
loss: 1.042779564857483,grad_norm: 0.9999994711320604, iteration: 12115
loss: 1.0270189046859741,grad_norm: 0.9999993309127343, iteration: 12116
loss: 1.021811842918396,grad_norm: 0.9999992766998178, iteration: 12117
loss: 1.015022873878479,grad_norm: 0.9999991898812624, iteration: 12118
loss: 1.0361227989196777,grad_norm: 0.9999995338793255, iteration: 12119
loss: 1.0721443891525269,grad_norm: 0.999999440994868, iteration: 12120
loss: 1.0050970315933228,grad_norm: 0.9999992404139675, iteration: 12121
loss: 1.0588874816894531,grad_norm: 0.9999995175095922, iteration: 12122
loss: 1.0202975273132324,grad_norm: 0.9999995630075867, iteration: 12123
loss: 1.0586491823196411,grad_norm: 0.9999991233958931, iteration: 12124
loss: 1.0650643110275269,grad_norm: 0.9999995959138717, iteration: 12125
loss: 0.9734375476837158,grad_norm: 0.9999991775206766, iteration: 12126
loss: 1.075285792350769,grad_norm: 0.9999993716922323, iteration: 12127
loss: 1.0611999034881592,grad_norm: 0.9999996140097546, iteration: 12128
loss: 1.102565050125122,grad_norm: 0.9999992474750662, iteration: 12129
loss: 0.9474760890007019,grad_norm: 0.9999992537381269, iteration: 12130
loss: 1.0412009954452515,grad_norm: 0.9999992207484963, iteration: 12131
loss: 1.0425065755844116,grad_norm: 0.9999996891163674, iteration: 12132
loss: 1.0002535581588745,grad_norm: 0.999999251876423, iteration: 12133
loss: 1.0179749727249146,grad_norm: 0.999999395313965, iteration: 12134
loss: 1.0310640335083008,grad_norm: 0.9999991676918328, iteration: 12135
loss: 1.0009592771530151,grad_norm: 0.9999992448532017, iteration: 12136
loss: 1.0269746780395508,grad_norm: 0.9999994807673959, iteration: 12137
loss: 1.0058351755142212,grad_norm: 0.9999992240708303, iteration: 12138
loss: 1.0258394479751587,grad_norm: 0.9999995272665148, iteration: 12139
loss: 1.0223379135131836,grad_norm: 0.9999993067482423, iteration: 12140
loss: 1.0014317035675049,grad_norm: 0.9999992521504704, iteration: 12141
loss: 1.0084874629974365,grad_norm: 0.9999992350699155, iteration: 12142
loss: 1.0423907041549683,grad_norm: 0.9999993113432207, iteration: 12143
loss: 1.1209253072738647,grad_norm: 0.999999614524209, iteration: 12144
loss: 1.0325347185134888,grad_norm: 0.9999992348027352, iteration: 12145
loss: 1.0304101705551147,grad_norm: 0.9999992557627048, iteration: 12146
loss: 1.0314258337020874,grad_norm: 0.9999991818991798, iteration: 12147
loss: 1.0156993865966797,grad_norm: 0.9999991604518748, iteration: 12148
loss: 1.0687204599380493,grad_norm: 0.9999995868159823, iteration: 12149
loss: 1.0698546171188354,grad_norm: 0.999999245702689, iteration: 12150
loss: 1.0320119857788086,grad_norm: 0.9999992537012967, iteration: 12151
loss: 1.0162699222564697,grad_norm: 0.9999990590531748, iteration: 12152
loss: 1.0029703378677368,grad_norm: 0.9999993258250365, iteration: 12153
loss: 1.041254997253418,grad_norm: 0.9999993703184882, iteration: 12154
loss: 1.0365219116210938,grad_norm: 0.9999991644712626, iteration: 12155
loss: 1.0528297424316406,grad_norm: 0.9999995675844857, iteration: 12156
loss: 1.0061513185501099,grad_norm: 0.9999992328969011, iteration: 12157
loss: 1.0375796556472778,grad_norm: 0.999999093387145, iteration: 12158
loss: 1.0720329284667969,grad_norm: 0.99999973650989, iteration: 12159
loss: 1.0466409921646118,grad_norm: 0.9999993903345865, iteration: 12160
loss: 0.9634492993354797,grad_norm: 0.9999992785299403, iteration: 12161
loss: 1.0782519578933716,grad_norm: 0.9999995301860705, iteration: 12162
loss: 1.043587565422058,grad_norm: 0.999999179762804, iteration: 12163
loss: 1.030186414718628,grad_norm: 0.9999992993564875, iteration: 12164
loss: 1.017536997795105,grad_norm: 0.9999992189893621, iteration: 12165
loss: 1.0166754722595215,grad_norm: 0.9999992482844716, iteration: 12166
loss: 1.008806586265564,grad_norm: 0.9999991905760254, iteration: 12167
loss: 1.049580454826355,grad_norm: 0.9999992917618002, iteration: 12168
loss: 1.0225640535354614,grad_norm: 0.9999991508796888, iteration: 12169
loss: 1.0764063596725464,grad_norm: 0.9999994657781992, iteration: 12170
loss: 1.0388543605804443,grad_norm: 0.9999994183442705, iteration: 12171
loss: 1.081305980682373,grad_norm: 0.99999941786587, iteration: 12172
loss: 1.00751531124115,grad_norm: 0.9999991583994167, iteration: 12173
loss: 1.0284242630004883,grad_norm: 0.9999991454361745, iteration: 12174
loss: 1.0246683359146118,grad_norm: 0.9999996496204735, iteration: 12175
loss: 1.0184717178344727,grad_norm: 0.9999992903324152, iteration: 12176
loss: 1.0963034629821777,grad_norm: 0.9999991505360234, iteration: 12177
loss: 1.0614991188049316,grad_norm: 0.9999991967009328, iteration: 12178
loss: 0.9944791197776794,grad_norm: 0.9999992760426711, iteration: 12179
loss: 1.084408164024353,grad_norm: 0.9999991945870215, iteration: 12180
loss: 1.0186021327972412,grad_norm: 0.9999992146141529, iteration: 12181
loss: 1.0583419799804688,grad_norm: 0.9999993286688726, iteration: 12182
loss: 1.038097858428955,grad_norm: 0.9999996865641649, iteration: 12183
loss: 1.1326605081558228,grad_norm: 0.9999998386283054, iteration: 12184
loss: 1.052680253982544,grad_norm: 0.9999996758114013, iteration: 12185
loss: 1.0293294191360474,grad_norm: 0.9999992986416587, iteration: 12186
loss: 1.027815580368042,grad_norm: 0.9999993734813541, iteration: 12187
loss: 1.1658886671066284,grad_norm: 0.9999997501360417, iteration: 12188
loss: 1.0380879640579224,grad_norm: 0.9999992263485021, iteration: 12189
loss: 1.028163194656372,grad_norm: 0.9999993014211819, iteration: 12190
loss: 1.098161220550537,grad_norm: 0.999999813259882, iteration: 12191
loss: 1.017992615699768,grad_norm: 0.9999998092460466, iteration: 12192
loss: 1.0133774280548096,grad_norm: 0.9999992780243819, iteration: 12193
loss: 1.013738989830017,grad_norm: 0.9999992347285643, iteration: 12194
loss: 1.059781551361084,grad_norm: 0.9999993910046786, iteration: 12195
loss: 1.040196180343628,grad_norm: 0.9999993349487205, iteration: 12196
loss: 1.0742067098617554,grad_norm: 0.999999469364789, iteration: 12197
loss: 0.9938920140266418,grad_norm: 0.9999991816352867, iteration: 12198
loss: 1.0542351007461548,grad_norm: 0.999999120251817, iteration: 12199
loss: 1.0588726997375488,grad_norm: 0.9999996885230419, iteration: 12200
loss: 1.0656845569610596,grad_norm: 0.9999994280797689, iteration: 12201
loss: 1.0402230024337769,grad_norm: 0.9999994495967983, iteration: 12202
loss: 1.0741167068481445,grad_norm: 0.9999998522787631, iteration: 12203
loss: 0.9882028698921204,grad_norm: 0.9999995107673156, iteration: 12204
loss: 1.067087173461914,grad_norm: 0.9999992324262835, iteration: 12205
loss: 1.0129657983779907,grad_norm: 0.9999991427312567, iteration: 12206
loss: 1.0464569330215454,grad_norm: 0.9999993655786407, iteration: 12207
loss: 1.0452128648757935,grad_norm: 0.9999998022646635, iteration: 12208
loss: 0.9833457469940186,grad_norm: 0.9999992962190105, iteration: 12209
loss: 1.1084392070770264,grad_norm: 0.9999996339975112, iteration: 12210
loss: 1.059572458267212,grad_norm: 0.9999994231227469, iteration: 12211
loss: 1.0265891551971436,grad_norm: 0.9999993422039509, iteration: 12212
loss: 1.0658433437347412,grad_norm: 0.9999998242563113, iteration: 12213
loss: 1.0310795307159424,grad_norm: 0.999999161784, iteration: 12214
loss: 1.035744071006775,grad_norm: 0.9999997857616385, iteration: 12215
loss: 1.0885370969772339,grad_norm: 0.9999995570165061, iteration: 12216
loss: 1.0359948873519897,grad_norm: 0.931354652567722, iteration: 12217
loss: 1.0608642101287842,grad_norm: 0.9999991155526268, iteration: 12218
loss: 1.011155366897583,grad_norm: 0.9999991570142928, iteration: 12219
loss: 1.018451452255249,grad_norm: 0.999999426836965, iteration: 12220
loss: 1.0234378576278687,grad_norm: 0.999999361828981, iteration: 12221
loss: 0.9982454180717468,grad_norm: 0.9999993414057539, iteration: 12222
loss: 1.027321457862854,grad_norm: 0.9999990290231464, iteration: 12223
loss: 1.0030893087387085,grad_norm: 0.9999991718603174, iteration: 12224
loss: 1.0120353698730469,grad_norm: 0.9999993039785307, iteration: 12225
loss: 1.0514492988586426,grad_norm: 0.9999993128142902, iteration: 12226
loss: 1.0430912971496582,grad_norm: 0.9999994598422229, iteration: 12227
loss: 1.0096968412399292,grad_norm: 0.9999991601503865, iteration: 12228
loss: 1.0179673433303833,grad_norm: 0.9999993586478347, iteration: 12229
loss: 1.034238576889038,grad_norm: 0.9999997893399383, iteration: 12230
loss: 1.0338550806045532,grad_norm: 0.9999993040770607, iteration: 12231
loss: 0.989048182964325,grad_norm: 0.9999993512677896, iteration: 12232
loss: 1.037223219871521,grad_norm: 0.9999994521805958, iteration: 12233
loss: 1.073195219039917,grad_norm: 0.9999991474326977, iteration: 12234
loss: 0.992448627948761,grad_norm: 0.9999991556810093, iteration: 12235
loss: 1.031859040260315,grad_norm: 0.9999993411535145, iteration: 12236
loss: 1.0470960140228271,grad_norm: 0.9999996471671301, iteration: 12237
loss: 1.0211952924728394,grad_norm: 0.999999416111906, iteration: 12238
loss: 1.0491199493408203,grad_norm: 0.9999993615864098, iteration: 12239
loss: 1.037588119506836,grad_norm: 0.9999992954343003, iteration: 12240
loss: 1.0512360334396362,grad_norm: 0.9999994634414954, iteration: 12241
loss: 1.0354148149490356,grad_norm: 0.9999990459436712, iteration: 12242
loss: 1.0009918212890625,grad_norm: 0.99999927694867, iteration: 12243
loss: 1.0226601362228394,grad_norm: 0.9999990788187958, iteration: 12244
loss: 1.0469167232513428,grad_norm: 0.999999091093427, iteration: 12245
loss: 1.044569492340088,grad_norm: 0.9999992786514975, iteration: 12246
loss: 1.026389241218567,grad_norm: 0.9999992602197344, iteration: 12247
loss: 1.0213429927825928,grad_norm: 0.9999991753681802, iteration: 12248
loss: 1.0348669290542603,grad_norm: 0.999999119847858, iteration: 12249
loss: 0.9738803505897522,grad_norm: 0.999999185552227, iteration: 12250
loss: 0.9982768297195435,grad_norm: 0.999999353615349, iteration: 12251
loss: 1.0368784666061401,grad_norm: 0.999999175513455, iteration: 12252
loss: 1.0587408542633057,grad_norm: 0.9999991559689736, iteration: 12253
loss: 1.039913535118103,grad_norm: 0.9999993694919621, iteration: 12254
loss: 1.0230902433395386,grad_norm: 0.9999994236080233, iteration: 12255
loss: 1.0717699527740479,grad_norm: 0.9999993333505129, iteration: 12256
loss: 1.0005334615707397,grad_norm: 0.9999990809801564, iteration: 12257
loss: 1.0832287073135376,grad_norm: 0.9999994092877307, iteration: 12258
loss: 0.9966824650764465,grad_norm: 0.9999993688986335, iteration: 12259
loss: 1.005454182624817,grad_norm: 0.9999992628017635, iteration: 12260
loss: 1.022179126739502,grad_norm: 0.9999995027300873, iteration: 12261
loss: 1.0135562419891357,grad_norm: 0.9999992901171209, iteration: 12262
loss: 1.0022273063659668,grad_norm: 0.9999991650714595, iteration: 12263
loss: 1.0558626651763916,grad_norm: 0.9999996345439042, iteration: 12264
loss: 1.1068073511123657,grad_norm: 0.9999994452344383, iteration: 12265
loss: 1.0489215850830078,grad_norm: 0.9999992349071105, iteration: 12266
loss: 1.0176939964294434,grad_norm: 0.9999992746436543, iteration: 12267
loss: 1.0142861604690552,grad_norm: 0.9999991433029104, iteration: 12268
loss: 1.0709095001220703,grad_norm: 0.9999993329553881, iteration: 12269
loss: 1.0747336149215698,grad_norm: 0.9999992574603142, iteration: 12270
loss: 1.0202962160110474,grad_norm: 0.9999992377210223, iteration: 12271
loss: 1.0464190244674683,grad_norm: 0.9999991757059812, iteration: 12272
loss: 1.0758928060531616,grad_norm: 0.9999992684550841, iteration: 12273
loss: 1.0194536447525024,grad_norm: 0.9999995923614861, iteration: 12274
loss: 1.0227265357971191,grad_norm: 0.9999995657528526, iteration: 12275
loss: 1.0924863815307617,grad_norm: 0.999999640095226, iteration: 12276
loss: 1.0397748947143555,grad_norm: 0.9999992947756158, iteration: 12277
loss: 1.01203453540802,grad_norm: 0.9999991369832016, iteration: 12278
loss: 1.0902942419052124,grad_norm: 0.9999996897881868, iteration: 12279
loss: 1.006425142288208,grad_norm: 0.9999995059652723, iteration: 12280
loss: 1.0380446910858154,grad_norm: 0.9999994283251409, iteration: 12281
loss: 1.0692412853240967,grad_norm: 0.9999995026819973, iteration: 12282
loss: 1.0213727951049805,grad_norm: 0.9999992502008227, iteration: 12283
loss: 1.1456125974655151,grad_norm: 0.9999995487973566, iteration: 12284
loss: 1.0481780767440796,grad_norm: 0.9999994401754898, iteration: 12285
loss: 1.035079002380371,grad_norm: 0.9999993172349768, iteration: 12286
loss: 1.0606201887130737,grad_norm: 0.999999396345994, iteration: 12287
loss: 1.034615159034729,grad_norm: 0.999999466886457, iteration: 12288
loss: 1.0242849588394165,grad_norm: 0.9999993224081919, iteration: 12289
loss: 0.9912318587303162,grad_norm: 0.999999484139077, iteration: 12290
loss: 1.0272858142852783,grad_norm: 0.9999992855933136, iteration: 12291
loss: 1.0029274225234985,grad_norm: 0.9999991850191537, iteration: 12292
loss: 1.0511373281478882,grad_norm: 0.9999993472710886, iteration: 12293
loss: 1.0688296556472778,grad_norm: 0.9999992915036828, iteration: 12294
loss: 0.9823476672172546,grad_norm: 0.9999992599849353, iteration: 12295
loss: 1.031043291091919,grad_norm: 0.9999991966322598, iteration: 12296
loss: 1.03146493434906,grad_norm: 0.9999990995823539, iteration: 12297
loss: 1.0039572715759277,grad_norm: 0.9999993112691361, iteration: 12298
loss: 1.0579876899719238,grad_norm: 0.9999996581132115, iteration: 12299
loss: 1.0077075958251953,grad_norm: 0.9999994858446463, iteration: 12300
loss: 1.0331759452819824,grad_norm: 0.9999993166717355, iteration: 12301
loss: 1.0661053657531738,grad_norm: 0.9999995115865057, iteration: 12302
loss: 0.9912434220314026,grad_norm: 0.9999994542459688, iteration: 12303
loss: 1.0034942626953125,grad_norm: 0.9999994167360757, iteration: 12304
loss: 1.041343331336975,grad_norm: 0.9999996976238863, iteration: 12305
loss: 1.007188320159912,grad_norm: 0.9999991314248324, iteration: 12306
loss: 1.0161861181259155,grad_norm: 0.9999992690831115, iteration: 12307
loss: 1.0237624645233154,grad_norm: 0.9999994001821396, iteration: 12308
loss: 1.05338454246521,grad_norm: 0.9999993217116299, iteration: 12309
loss: 1.0426925420761108,grad_norm: 0.999999199971026, iteration: 12310
loss: 1.0606621503829956,grad_norm: 0.9999994427081106, iteration: 12311
loss: 1.079937219619751,grad_norm: 0.9999995630227733, iteration: 12312
loss: 1.0639220476150513,grad_norm: 0.9999991939467858, iteration: 12313
loss: 1.0359331369400024,grad_norm: 0.9999992719466624, iteration: 12314
loss: 1.072733759880066,grad_norm: 0.9999993659046391, iteration: 12315
loss: 1.0523841381072998,grad_norm: 0.9793210166865179, iteration: 12316
loss: 1.043269395828247,grad_norm: 0.9999993406782665, iteration: 12317
loss: 1.030777931213379,grad_norm: 0.9999990225345828, iteration: 12318
loss: 1.0648412704467773,grad_norm: 0.9999996073384567, iteration: 12319
loss: 0.9933215379714966,grad_norm: 0.9999990462914845, iteration: 12320
loss: 1.0341559648513794,grad_norm: 0.9999995430680608, iteration: 12321
loss: 1.0592622756958008,grad_norm: 0.9999991075334688, iteration: 12322
loss: 1.015459656715393,grad_norm: 0.9999994938134745, iteration: 12323
loss: 1.0173399448394775,grad_norm: 0.9999990538819444, iteration: 12324
loss: 1.0824458599090576,grad_norm: 0.9999995409384637, iteration: 12325
loss: 1.0653882026672363,grad_norm: 0.9999993908985984, iteration: 12326
loss: 1.0528539419174194,grad_norm: 0.9999992166913703, iteration: 12327
loss: 1.024547815322876,grad_norm: 0.9999991356597403, iteration: 12328
loss: 1.036785364151001,grad_norm: 0.9999991634746356, iteration: 12329
loss: 1.0169333219528198,grad_norm: 0.999999623626885, iteration: 12330
loss: 0.9890192151069641,grad_norm: 0.9999993142418272, iteration: 12331
loss: 1.0581035614013672,grad_norm: 0.9359677402783062, iteration: 12332
loss: 1.0659605264663696,grad_norm: 0.999999253658533, iteration: 12333
loss: 1.0314385890960693,grad_norm: 0.9999991253410195, iteration: 12334
loss: 1.02220618724823,grad_norm: 0.9999990851426135, iteration: 12335
loss: 1.033022165298462,grad_norm: 0.9999994606990709, iteration: 12336
loss: 1.070522665977478,grad_norm: 0.9999992321104655, iteration: 12337
loss: 1.0507463216781616,grad_norm: 0.9999993276014384, iteration: 12338
loss: 1.033055305480957,grad_norm: 0.999999567503812, iteration: 12339
loss: 1.0679124593734741,grad_norm: 0.9999994016402964, iteration: 12340
loss: 1.0299712419509888,grad_norm: 0.9999994645601032, iteration: 12341
loss: 0.9911631345748901,grad_norm: 0.9999989956059018, iteration: 12342
loss: 1.0331038236618042,grad_norm: 0.9999992931700016, iteration: 12343
loss: 1.0159869194030762,grad_norm: 0.9999990161528237, iteration: 12344
loss: 1.0159611701965332,grad_norm: 0.9845596482078923, iteration: 12345
loss: 1.0035293102264404,grad_norm: 0.9999992684605485, iteration: 12346
loss: 1.0111042261123657,grad_norm: 0.999999508867966, iteration: 12347
loss: 1.0590404272079468,grad_norm: 0.9999990930479853, iteration: 12348
loss: 1.0197569131851196,grad_norm: 0.9999993911315503, iteration: 12349
loss: 1.0123050212860107,grad_norm: 0.9685769885420742, iteration: 12350
loss: 1.0011341571807861,grad_norm: 0.9999992691784162, iteration: 12351
loss: 1.0436877012252808,grad_norm: 0.9999992400227001, iteration: 12352
loss: 1.0223366022109985,grad_norm: 0.9999991119663022, iteration: 12353
loss: 1.011542797088623,grad_norm: 0.999999190404038, iteration: 12354
loss: 0.9878352880477905,grad_norm: 0.9999991868845415, iteration: 12355
loss: 1.0836879014968872,grad_norm: 0.9999995485235639, iteration: 12356
loss: 1.0500588417053223,grad_norm: 0.9999992510472248, iteration: 12357
loss: 0.9880514740943909,grad_norm: 0.9801522910478299, iteration: 12358
loss: 1.0420500040054321,grad_norm: 0.999999124628825, iteration: 12359
loss: 1.0737336874008179,grad_norm: 0.9999993187655412, iteration: 12360
loss: 1.0143508911132812,grad_norm: 0.9999991453156775, iteration: 12361
loss: 1.0166268348693848,grad_norm: 0.9999992690854925, iteration: 12362
loss: 0.9959350228309631,grad_norm: 0.9999998369259993, iteration: 12363
loss: 1.0572435855865479,grad_norm: 0.9999991164479635, iteration: 12364
loss: 1.0395575761795044,grad_norm: 0.9999994241444138, iteration: 12365
loss: 0.9831816554069519,grad_norm: 0.9999993301407768, iteration: 12366
loss: 1.0276241302490234,grad_norm: 0.999999145145957, iteration: 12367
loss: 1.028017282485962,grad_norm: 0.9999994186214451, iteration: 12368
loss: 1.0628046989440918,grad_norm: 0.999999132164416, iteration: 12369
loss: 1.051024079322815,grad_norm: 0.9999992866573814, iteration: 12370
loss: 1.0211296081542969,grad_norm: 0.9999996643379407, iteration: 12371
loss: 1.0660300254821777,grad_norm: 0.9999998292034509, iteration: 12372
loss: 1.021796703338623,grad_norm: 0.9999994393738367, iteration: 12373
loss: 0.9950098395347595,grad_norm: 0.9999991405304769, iteration: 12374
loss: 1.0270570516586304,grad_norm: 0.9999992809213835, iteration: 12375
loss: 0.9844701290130615,grad_norm: 0.9999993484020822, iteration: 12376
loss: 0.9884064793586731,grad_norm: 0.9999993059981549, iteration: 12377
loss: 0.9928200840950012,grad_norm: 0.9999995202306406, iteration: 12378
loss: 1.042201280593872,grad_norm: 0.9999990153882102, iteration: 12379
loss: 1.0272258520126343,grad_norm: 0.999999450322052, iteration: 12380
loss: 1.0091434717178345,grad_norm: 0.9999991674053866, iteration: 12381
loss: 0.963073194026947,grad_norm: 0.9999992023473285, iteration: 12382
loss: 1.0544342994689941,grad_norm: 0.9999992308683535, iteration: 12383
loss: 0.950649619102478,grad_norm: 0.9999995180524381, iteration: 12384
loss: 1.0374726057052612,grad_norm: 0.9999992941384502, iteration: 12385
loss: 1.0133033990859985,grad_norm: 0.9999993306698693, iteration: 12386
loss: 1.0591658353805542,grad_norm: 0.9999991912592681, iteration: 12387
loss: 1.0479542016983032,grad_norm: 0.9999991523924023, iteration: 12388
loss: 1.069072961807251,grad_norm: 0.9999996907488128, iteration: 12389
loss: 1.025088906288147,grad_norm: 0.9999993008317845, iteration: 12390
loss: 1.0192195177078247,grad_norm: 0.999999309735621, iteration: 12391
loss: 1.0322461128234863,grad_norm: 0.9999992698851571, iteration: 12392
loss: 1.0194419622421265,grad_norm: 0.9999992483143694, iteration: 12393
loss: 1.0077705383300781,grad_norm: 0.9999992361412186, iteration: 12394
loss: 1.0093187093734741,grad_norm: 0.9999993297848431, iteration: 12395
loss: 1.015620231628418,grad_norm: 0.999999334434105, iteration: 12396
loss: 1.1000146865844727,grad_norm: 0.9999994406222541, iteration: 12397
loss: 1.060571312904358,grad_norm: 0.9999994067961031, iteration: 12398
loss: 1.0203050374984741,grad_norm: 0.9999993144841861, iteration: 12399
loss: 1.0381149053573608,grad_norm: 0.9999991043475198, iteration: 12400
loss: 1.0333564281463623,grad_norm: 0.9999992283129847, iteration: 12401
loss: 1.0669662952423096,grad_norm: 0.9999991742871409, iteration: 12402
loss: 1.1278358697891235,grad_norm: 0.999999822755038, iteration: 12403
loss: 0.9770174622535706,grad_norm: 0.9890119503567383, iteration: 12404
loss: 1.0380406379699707,grad_norm: 0.9999995965008596, iteration: 12405
loss: 1.0746182203292847,grad_norm: 0.9999992013676154, iteration: 12406
loss: 1.0480053424835205,grad_norm: 0.9999991395084297, iteration: 12407
loss: 1.0328129529953003,grad_norm: 0.9999991507779844, iteration: 12408
loss: 1.082532286643982,grad_norm: 0.9999994215186003, iteration: 12409
loss: 0.987490177154541,grad_norm: 0.9120834874974986, iteration: 12410
loss: 1.016595721244812,grad_norm: 0.9999990790678402, iteration: 12411
loss: 1.0679761171340942,grad_norm: 0.9999992791340254, iteration: 12412
loss: 0.9973863363265991,grad_norm: 0.9999991278598636, iteration: 12413
loss: 1.002755045890808,grad_norm: 0.9999992952902091, iteration: 12414
loss: 1.0650660991668701,grad_norm: 0.9999993430955971, iteration: 12415
loss: 1.0334491729736328,grad_norm: 0.9999995331705533, iteration: 12416
loss: 1.0307259559631348,grad_norm: 0.9999991150932586, iteration: 12417
loss: 1.0005544424057007,grad_norm: 0.999999239206791, iteration: 12418
loss: 1.0191763639450073,grad_norm: 0.9999991063793064, iteration: 12419
loss: 1.04324209690094,grad_norm: 0.9999993321452035, iteration: 12420
loss: 1.0605580806732178,grad_norm: 0.999999210770467, iteration: 12421
loss: 0.9716705083847046,grad_norm: 0.9999991421426074, iteration: 12422
loss: 1.0252333879470825,grad_norm: 0.9999994522055771, iteration: 12423
loss: 1.02732253074646,grad_norm: 0.9999990991697422, iteration: 12424
loss: 1.0123482942581177,grad_norm: 0.9999992755924516, iteration: 12425
loss: 1.0105637311935425,grad_norm: 0.9999994351875019, iteration: 12426
loss: 1.0546894073486328,grad_norm: 0.9999992253544951, iteration: 12427
loss: 1.0014464855194092,grad_norm: 0.9999993526714913, iteration: 12428
loss: 0.9910538196563721,grad_norm: 0.9999994950748087, iteration: 12429
loss: 1.0141816139221191,grad_norm: 0.9999991409227563, iteration: 12430
loss: 1.0544524192810059,grad_norm: 0.99999921786389, iteration: 12431
loss: 1.0688586235046387,grad_norm: 0.9999994486298973, iteration: 12432
loss: 0.9918376803398132,grad_norm: 0.9999991373024368, iteration: 12433
loss: 1.0547144412994385,grad_norm: 0.9999991829832946, iteration: 12434
loss: 1.0303313732147217,grad_norm: 0.999999324997619, iteration: 12435
loss: 1.0492830276489258,grad_norm: 0.9999994612951816, iteration: 12436
loss: 1.0102319717407227,grad_norm: 0.9999992742672839, iteration: 12437
loss: 1.051900029182434,grad_norm: 0.9999992375104555, iteration: 12438
loss: 1.0246793031692505,grad_norm: 0.9999992901709356, iteration: 12439
loss: 1.000543475151062,grad_norm: 0.9999991124315137, iteration: 12440
loss: 1.035452127456665,grad_norm: 0.9999991389506194, iteration: 12441
loss: 1.0818356275558472,grad_norm: 0.9999996871122384, iteration: 12442
loss: 1.04350745677948,grad_norm: 0.9999994381528695, iteration: 12443
loss: 1.0254765748977661,grad_norm: 0.9999991419784346, iteration: 12444
loss: 1.0562770366668701,grad_norm: 0.9999994387321707, iteration: 12445
loss: 1.0289427042007446,grad_norm: 0.9999997730051015, iteration: 12446
loss: 1.037490963935852,grad_norm: 0.999999303021467, iteration: 12447
loss: 1.041182041168213,grad_norm: 0.9999993409927209, iteration: 12448
loss: 1.0184406042099,grad_norm: 0.9903465202073871, iteration: 12449
loss: 1.0445119142532349,grad_norm: 0.9999991418133272, iteration: 12450
loss: 1.0465737581253052,grad_norm: 0.9999994468262309, iteration: 12451
loss: 1.03181791305542,grad_norm: 0.999999243481528, iteration: 12452
loss: 1.0133179426193237,grad_norm: 0.999999440007102, iteration: 12453
loss: 1.002632737159729,grad_norm: 0.9999996581947518, iteration: 12454
loss: 0.9745023846626282,grad_norm: 0.9999992281672594, iteration: 12455
loss: 1.0095313787460327,grad_norm: 0.9999993352031273, iteration: 12456
loss: 1.0073977708816528,grad_norm: 0.9999991161148154, iteration: 12457
loss: 1.0649845600128174,grad_norm: 0.9999993590724124, iteration: 12458
loss: 1.0078610181808472,grad_norm: 0.9999991785832569, iteration: 12459
loss: 1.0458823442459106,grad_norm: 0.9999990339272303, iteration: 12460
loss: 1.0159780979156494,grad_norm: 0.9999992077130668, iteration: 12461
loss: 1.0336729288101196,grad_norm: 0.9999993683276267, iteration: 12462
loss: 1.0541045665740967,grad_norm: 0.9999995215051, iteration: 12463
loss: 1.0653557777404785,grad_norm: 0.9999991472006045, iteration: 12464
loss: 0.9975109100341797,grad_norm: 0.9999991301689068, iteration: 12465
loss: 1.0584224462509155,grad_norm: 0.9999994284643215, iteration: 12466
loss: 1.0013422966003418,grad_norm: 0.9999991220583986, iteration: 12467
loss: 1.0477581024169922,grad_norm: 0.9999992967566274, iteration: 12468
loss: 1.0270922183990479,grad_norm: 0.9618055945852303, iteration: 12469
loss: 0.981553852558136,grad_norm: 0.9701483233959242, iteration: 12470
loss: 1.0448663234710693,grad_norm: 0.999999091799705, iteration: 12471
loss: 1.047507405281067,grad_norm: 0.9999990151015933, iteration: 12472
loss: 0.9938356280326843,grad_norm: 0.9999992772317045, iteration: 12473
loss: 0.9923476576805115,grad_norm: 0.9999992549604768, iteration: 12474
loss: 1.020745873451233,grad_norm: 0.9999994608927517, iteration: 12475
loss: 1.0420641899108887,grad_norm: 0.9999991886836862, iteration: 12476
loss: 1.029678225517273,grad_norm: 0.9999990351524978, iteration: 12477
loss: 1.0708743333816528,grad_norm: 0.99999945378072, iteration: 12478
loss: 1.0510661602020264,grad_norm: 0.999999521782893, iteration: 12479
loss: 1.059651255607605,grad_norm: 0.9999991766500992, iteration: 12480
loss: 1.0657578706741333,grad_norm: 0.9999991673532607, iteration: 12481
loss: 1.0297064781188965,grad_norm: 0.9999991418967951, iteration: 12482
loss: 1.0177087783813477,grad_norm: 0.999999183928489, iteration: 12483
loss: 0.9938080310821533,grad_norm: 0.9999992552339583, iteration: 12484
loss: 1.0326550006866455,grad_norm: 0.9999990432784671, iteration: 12485
loss: 1.0269231796264648,grad_norm: 0.9999991887980328, iteration: 12486
loss: 1.0262389183044434,grad_norm: 0.9999992471187189, iteration: 12487
loss: 1.0271190404891968,grad_norm: 0.9999997742909059, iteration: 12488
loss: 1.0145622491836548,grad_norm: 0.9999992547547873, iteration: 12489
loss: 1.0456738471984863,grad_norm: 0.9999995776908078, iteration: 12490
loss: 1.0033259391784668,grad_norm: 0.999999142933631, iteration: 12491
loss: 1.012761116027832,grad_norm: 0.9999992874747855, iteration: 12492
loss: 1.0355108976364136,grad_norm: 0.9999998473762785, iteration: 12493
loss: 1.0818065404891968,grad_norm: 0.9999992560968066, iteration: 12494
loss: 0.9739903211593628,grad_norm: 0.9999991078759219, iteration: 12495
loss: 1.0519341230392456,grad_norm: 0.999999231416223, iteration: 12496
loss: 1.0449371337890625,grad_norm: 0.9999991357719153, iteration: 12497
loss: 1.0265834331512451,grad_norm: 0.9999992778913349, iteration: 12498
loss: 1.015757441520691,grad_norm: 0.9999991564550172, iteration: 12499
loss: 1.0483287572860718,grad_norm: 0.9869782568117219, iteration: 12500
loss: 0.9991875886917114,grad_norm: 0.9999992180485078, iteration: 12501
loss: 1.06076979637146,grad_norm: 0.99999911782356, iteration: 12502
loss: 1.0040879249572754,grad_norm: 0.9999991211355517, iteration: 12503
loss: 1.0317459106445312,grad_norm: 0.9999992862637452, iteration: 12504
loss: 1.0480778217315674,grad_norm: 0.9999991918721108, iteration: 12505
loss: 1.055837631225586,grad_norm: 0.9999994828830838, iteration: 12506
loss: 0.9918532967567444,grad_norm: 0.9999998099250607, iteration: 12507
loss: 1.0227500200271606,grad_norm: 0.9999991943391445, iteration: 12508
loss: 1.0348753929138184,grad_norm: 0.9999992741643302, iteration: 12509
loss: 1.03452467918396,grad_norm: 0.9999991459172435, iteration: 12510
loss: 1.0452799797058105,grad_norm: 0.9999991337429432, iteration: 12511
loss: 1.0110357999801636,grad_norm: 0.9999992492316842, iteration: 12512
loss: 0.9971054196357727,grad_norm: 0.9999991939906369, iteration: 12513
loss: 1.0418497323989868,grad_norm: 0.9999995845941051, iteration: 12514
loss: 1.0039594173431396,grad_norm: 0.9999991872742614, iteration: 12515
loss: 1.0239590406417847,grad_norm: 0.9999993479593668, iteration: 12516
loss: 1.0254967212677002,grad_norm: 0.9999991059577572, iteration: 12517
loss: 1.0032939910888672,grad_norm: 0.9999993032793935, iteration: 12518
loss: 1.0453532934188843,grad_norm: 0.9999992639066434, iteration: 12519
loss: 1.0313252210617065,grad_norm: 0.9999994645426492, iteration: 12520
loss: 1.0495551824569702,grad_norm: 0.9999991877758707, iteration: 12521
loss: 1.0565565824508667,grad_norm: 0.9999991376619557, iteration: 12522
loss: 1.0234910249710083,grad_norm: 0.9999992345928329, iteration: 12523
loss: 1.0554230213165283,grad_norm: 0.9999994425439293, iteration: 12524
loss: 1.0164679288864136,grad_norm: 0.9999994572324155, iteration: 12525
loss: 1.087812900543213,grad_norm: 0.9999991492806711, iteration: 12526
loss: 1.0263549089431763,grad_norm: 0.9999991175622441, iteration: 12527
loss: 1.0783464908599854,grad_norm: 0.9999994113312263, iteration: 12528
loss: 1.063726782798767,grad_norm: 0.999999390317877, iteration: 12529
loss: 1.0111533403396606,grad_norm: 0.9999991250437225, iteration: 12530
loss: 1.0422618389129639,grad_norm: 0.9999993999592445, iteration: 12531
loss: 1.060204029083252,grad_norm: 0.9999992548211829, iteration: 12532
loss: 1.0424710512161255,grad_norm: 0.9999994438862956, iteration: 12533
loss: 1.0367976427078247,grad_norm: 0.9999993613390711, iteration: 12534
loss: 0.9908884167671204,grad_norm: 0.9999993829884466, iteration: 12535
loss: 1.0360053777694702,grad_norm: 0.9999990775759663, iteration: 12536
loss: 1.0550732612609863,grad_norm: 0.9999992524908078, iteration: 12537
loss: 1.0636770725250244,grad_norm: 0.9999993857616366, iteration: 12538
loss: 1.055168628692627,grad_norm: 0.9999993246170896, iteration: 12539
loss: 1.0475693941116333,grad_norm: 0.9999992416690252, iteration: 12540
loss: 1.0338212251663208,grad_norm: 0.9999990556042981, iteration: 12541
loss: 1.052513599395752,grad_norm: 0.9999993219200767, iteration: 12542
loss: 1.0285453796386719,grad_norm: 0.9999992376679157, iteration: 12543
loss: 0.9927712082862854,grad_norm: 0.9999989801367332, iteration: 12544
loss: 0.9894232749938965,grad_norm: 0.9999992857637344, iteration: 12545
loss: 1.0193630456924438,grad_norm: 0.9341186236993004, iteration: 12546
loss: 1.067373514175415,grad_norm: 0.9999992852827662, iteration: 12547
loss: 1.0644707679748535,grad_norm: 0.9999993468623132, iteration: 12548
loss: 1.0033985376358032,grad_norm: 0.9999993529490162, iteration: 12549
loss: 1.0541130304336548,grad_norm: 0.9999993085069383, iteration: 12550
loss: 1.013902187347412,grad_norm: 0.9999992778648072, iteration: 12551
loss: 1.036402940750122,grad_norm: 0.999999269729428, iteration: 12552
loss: 1.0445489883422852,grad_norm: 0.9999992000686629, iteration: 12553
loss: 1.001891016960144,grad_norm: 0.9999991067978914, iteration: 12554
loss: 0.9891716241836548,grad_norm: 0.9999992003762201, iteration: 12555
loss: 1.0156513452529907,grad_norm: 0.9999992033464056, iteration: 12556
loss: 1.0263211727142334,grad_norm: 0.9999992443454299, iteration: 12557
loss: 1.0335148572921753,grad_norm: 0.9999993874173431, iteration: 12558
loss: 1.0486003160476685,grad_norm: 0.9999992532600328, iteration: 12559
loss: 1.0710195302963257,grad_norm: 0.9999992677533278, iteration: 12560
loss: 1.0446052551269531,grad_norm: 0.9999990867127967, iteration: 12561
loss: 1.0221978425979614,grad_norm: 0.9999992078097248, iteration: 12562
loss: 1.053600788116455,grad_norm: 0.999999098024647, iteration: 12563
loss: 0.9873573780059814,grad_norm: 0.9456910014243134, iteration: 12564
loss: 1.054732084274292,grad_norm: 0.9999993227659272, iteration: 12565
loss: 0.9932140111923218,grad_norm: 0.9999993152010054, iteration: 12566
loss: 1.0090268850326538,grad_norm: 0.9999995334829376, iteration: 12567
loss: 1.004226565361023,grad_norm: 0.9999991757629256, iteration: 12568
loss: 1.0446488857269287,grad_norm: 0.9999992969212724, iteration: 12569
loss: 1.0182342529296875,grad_norm: 0.9999991922179547, iteration: 12570
loss: 0.9971964955329895,grad_norm: 0.9999992254031488, iteration: 12571
loss: 1.025669813156128,grad_norm: 0.999999177417232, iteration: 12572
loss: 1.0173317193984985,grad_norm: 0.9999994170586667, iteration: 12573
loss: 1.011092185974121,grad_norm: 0.999999122678952, iteration: 12574
loss: 1.0676380395889282,grad_norm: 0.9999991929801328, iteration: 12575
loss: 1.0672659873962402,grad_norm: 0.9999992578635101, iteration: 12576
loss: 1.0547642707824707,grad_norm: 0.9999992182020165, iteration: 12577
loss: 1.016647219657898,grad_norm: 0.9999991580593562, iteration: 12578
loss: 0.9723260402679443,grad_norm: 0.9999992694239184, iteration: 12579
loss: 1.0197640657424927,grad_norm: 0.9999992018973474, iteration: 12580
loss: 1.0414494276046753,grad_norm: 0.999999169456108, iteration: 12581
loss: 0.98480224609375,grad_norm: 0.9999992963439218, iteration: 12582
loss: 0.9884106516838074,grad_norm: 0.999999082278843, iteration: 12583
loss: 1.0306577682495117,grad_norm: 0.9999993735747079, iteration: 12584
loss: 1.0543791055679321,grad_norm: 0.9999993161311692, iteration: 12585
loss: 1.0108989477157593,grad_norm: 0.9999991901256543, iteration: 12586
loss: 1.0451881885528564,grad_norm: 0.9999990932833466, iteration: 12587
loss: 1.0286579132080078,grad_norm: 0.9999992349454784, iteration: 12588
loss: 1.0650750398635864,grad_norm: 0.9999992299412976, iteration: 12589
loss: 1.0767234563827515,grad_norm: 0.9999996040798211, iteration: 12590
loss: 1.0631566047668457,grad_norm: 0.9999989765289119, iteration: 12591
loss: 1.0633795261383057,grad_norm: 0.9999994043858071, iteration: 12592
loss: 0.9977878332138062,grad_norm: 0.9999991558607872, iteration: 12593
loss: 1.0493367910385132,grad_norm: 0.9999991437621412, iteration: 12594
loss: 1.0056291818618774,grad_norm: 0.9999992851415529, iteration: 12595
loss: 0.9750427007675171,grad_norm: 0.9999991890114115, iteration: 12596
loss: 1.0355345010757446,grad_norm: 0.9999992685625596, iteration: 12597
loss: 1.0647612810134888,grad_norm: 0.9999990987892589, iteration: 12598
loss: 1.0205333232879639,grad_norm: 0.9999992340829289, iteration: 12599
loss: 1.0769767761230469,grad_norm: 0.9999994141648675, iteration: 12600
loss: 0.9937183260917664,grad_norm: 0.9999992053040268, iteration: 12601
loss: 1.07417631149292,grad_norm: 0.9999993040419931, iteration: 12602
loss: 1.0424526929855347,grad_norm: 0.9999993458572212, iteration: 12603
loss: 1.0264569520950317,grad_norm: 0.9999993314790855, iteration: 12604
loss: 1.0017874240875244,grad_norm: 0.9999995895136546, iteration: 12605
loss: 1.015814185142517,grad_norm: 0.9999992185975032, iteration: 12606
loss: 1.028407335281372,grad_norm: 0.9999991463021772, iteration: 12607
loss: 0.995058000087738,grad_norm: 0.9999992667725957, iteration: 12608
loss: 1.0576231479644775,grad_norm: 0.9999990956298275, iteration: 12609
loss: 1.0171520709991455,grad_norm: 0.9999991686033365, iteration: 12610
loss: 1.1086050271987915,grad_norm: 0.9999995671403025, iteration: 12611
loss: 1.040650725364685,grad_norm: 0.9999993276215893, iteration: 12612
loss: 1.0102624893188477,grad_norm: 0.9999991280911531, iteration: 12613
loss: 1.0512256622314453,grad_norm: 0.9999992519323517, iteration: 12614
loss: 1.0279592275619507,grad_norm: 0.9155829178307633, iteration: 12615
loss: 1.0410866737365723,grad_norm: 0.9999992336333232, iteration: 12616
loss: 1.0295634269714355,grad_norm: 0.9999990403850805, iteration: 12617
loss: 1.014119267463684,grad_norm: 0.9999992793576753, iteration: 12618
loss: 0.9857937693595886,grad_norm: 0.9999992810178117, iteration: 12619
loss: 1.022950530052185,grad_norm: 0.9999991691268608, iteration: 12620
loss: 1.034051775932312,grad_norm: 0.999999557640178, iteration: 12621
loss: 1.0240535736083984,grad_norm: 0.9999995387441054, iteration: 12622
loss: 1.0012397766113281,grad_norm: 0.9999990956285383, iteration: 12623
loss: 0.9971617460250854,grad_norm: 0.9999990245355133, iteration: 12624
loss: 1.049918293952942,grad_norm: 0.9999991307017734, iteration: 12625
loss: 1.0171425342559814,grad_norm: 0.9999992857027693, iteration: 12626
loss: 0.9832011461257935,grad_norm: 0.9999993336430506, iteration: 12627
loss: 0.9861024618148804,grad_norm: 0.999999340711169, iteration: 12628
loss: 1.0115662813186646,grad_norm: 0.9999992687534015, iteration: 12629
loss: 0.9699971675872803,grad_norm: 0.9999993081575241, iteration: 12630
loss: 1.0329978466033936,grad_norm: 0.999999232731488, iteration: 12631
loss: 1.0244776010513306,grad_norm: 0.9999990408591295, iteration: 12632
loss: 0.9757575988769531,grad_norm: 0.9999993045394858, iteration: 12633
loss: 1.0516949892044067,grad_norm: 0.9999992123859526, iteration: 12634
loss: 1.0913889408111572,grad_norm: 0.9999993508930928, iteration: 12635
loss: 1.038790225982666,grad_norm: 0.9999991911048803, iteration: 12636
loss: 1.0223952531814575,grad_norm: 0.9999994859261915, iteration: 12637
loss: 1.0232176780700684,grad_norm: 0.9999992504040327, iteration: 12638
loss: 1.0595736503601074,grad_norm: 0.999999130253243, iteration: 12639
loss: 1.0290690660476685,grad_norm: 0.9999991418332681, iteration: 12640
loss: 1.0035954713821411,grad_norm: 0.9999993701043548, iteration: 12641
loss: 1.0168429613113403,grad_norm: 0.9999993111343736, iteration: 12642
loss: 1.0268723964691162,grad_norm: 0.9999991221728812, iteration: 12643
loss: 0.9882411956787109,grad_norm: 0.9999993389041139, iteration: 12644
loss: 1.0087660551071167,grad_norm: 0.9999992435217936, iteration: 12645
loss: 1.0067765712738037,grad_norm: 0.9999994401840127, iteration: 12646
loss: 1.0704187154769897,grad_norm: 0.9999992324239353, iteration: 12647
loss: 1.0597728490829468,grad_norm: 0.9999992130508778, iteration: 12648
loss: 1.0340229272842407,grad_norm: 0.9999991918497004, iteration: 12649
loss: 0.997292697429657,grad_norm: 0.9683618085821457, iteration: 12650
loss: 1.0163925886154175,grad_norm: 0.9999990227130852, iteration: 12651
loss: 1.0411477088928223,grad_norm: 0.999999030916889, iteration: 12652
loss: 1.0277464389801025,grad_norm: 0.9999991714893914, iteration: 12653
loss: 1.030895709991455,grad_norm: 0.9999990928549073, iteration: 12654
loss: 1.0166336297988892,grad_norm: 0.9999996629475523, iteration: 12655
loss: 0.9904587268829346,grad_norm: 0.9999993832392591, iteration: 12656
loss: 1.0294973850250244,grad_norm: 0.9999995192225148, iteration: 12657
loss: 1.0348881483078003,grad_norm: 0.9999991430253558, iteration: 12658
loss: 1.0144411325454712,grad_norm: 0.999999163208164, iteration: 12659
loss: 1.0833109617233276,grad_norm: 0.999999753506303, iteration: 12660
loss: 1.0070146322250366,grad_norm: 0.999999142483392, iteration: 12661
loss: 1.0323530435562134,grad_norm: 0.9999991782036757, iteration: 12662
loss: 1.0382088422775269,grad_norm: 0.9999994167723014, iteration: 12663
loss: 1.0365676879882812,grad_norm: 0.9999993358405886, iteration: 12664
loss: 1.020780086517334,grad_norm: 0.9999992161022329, iteration: 12665
loss: 1.0638420581817627,grad_norm: 0.9999992201777145, iteration: 12666
loss: 1.0170950889587402,grad_norm: 0.9999993199781712, iteration: 12667
loss: 0.9955756664276123,grad_norm: 0.999999085080243, iteration: 12668
loss: 1.0540472269058228,grad_norm: 0.9999994266726809, iteration: 12669
loss: 1.0562241077423096,grad_norm: 0.9999993240184083, iteration: 12670
loss: 0.9782077074050903,grad_norm: 0.9999991491414221, iteration: 12671
loss: 0.9829619526863098,grad_norm: 0.9999992157500731, iteration: 12672
loss: 0.9931392073631287,grad_norm: 0.9999994163328981, iteration: 12673
loss: 1.0215344429016113,grad_norm: 0.9999992499734015, iteration: 12674
loss: 1.0326718091964722,grad_norm: 0.9999991486613754, iteration: 12675
loss: 1.0247902870178223,grad_norm: 0.9999991878594079, iteration: 12676
loss: 1.0414936542510986,grad_norm: 0.9999991639807503, iteration: 12677
loss: 1.0082619190216064,grad_norm: 0.9367121425065135, iteration: 12678
loss: 0.9928998351097107,grad_norm: 0.9999992283956135, iteration: 12679
loss: 1.0053383111953735,grad_norm: 0.9999992133351623, iteration: 12680
loss: 1.013467788696289,grad_norm: 0.9701766034094678, iteration: 12681
loss: 0.9777492880821228,grad_norm: 0.999999346456601, iteration: 12682
loss: 0.9959989190101624,grad_norm: 0.9999995140620562, iteration: 12683
loss: 1.0502078533172607,grad_norm: 0.9999994758183582, iteration: 12684
loss: 1.0815887451171875,grad_norm: 0.9999994365934329, iteration: 12685
loss: 0.9744179844856262,grad_norm: 0.999999056951761, iteration: 12686
loss: 1.009264349937439,grad_norm: 0.9999991462711919, iteration: 12687
loss: 1.0253760814666748,grad_norm: 0.999999277408086, iteration: 12688
loss: 1.0515755414962769,grad_norm: 0.9789698066057196, iteration: 12689
loss: 1.0495731830596924,grad_norm: 0.999999068651747, iteration: 12690
loss: 1.0253753662109375,grad_norm: 0.9999992622246999, iteration: 12691
loss: 1.0792996883392334,grad_norm: 0.9999991394653477, iteration: 12692
loss: 0.9944342374801636,grad_norm: 0.9999991730335278, iteration: 12693
loss: 1.0249335765838623,grad_norm: 0.9999990837700374, iteration: 12694
loss: 1.0241669416427612,grad_norm: 0.9999992411983205, iteration: 12695
loss: 1.0901211500167847,grad_norm: 0.99999924600318, iteration: 12696
loss: 1.0180550813674927,grad_norm: 0.9999993217799391, iteration: 12697
loss: 1.0192248821258545,grad_norm: 0.9999991017943354, iteration: 12698
loss: 1.0076298713684082,grad_norm: 0.9999991647291888, iteration: 12699
loss: 1.035767912864685,grad_norm: 0.9999999078831403, iteration: 12700
loss: 0.9974750280380249,grad_norm: 0.999999290917165, iteration: 12701
loss: 1.0084067583084106,grad_norm: 0.9999990491018289, iteration: 12702
loss: 1.0225882530212402,grad_norm: 0.9999991626062343, iteration: 12703
loss: 1.0455371141433716,grad_norm: 0.9999991176515983, iteration: 12704
loss: 0.9865636825561523,grad_norm: 0.9999991975701427, iteration: 12705
loss: 1.0087485313415527,grad_norm: 0.9999990937561694, iteration: 12706
loss: 1.010632872581482,grad_norm: 0.9999990909245812, iteration: 12707
loss: 0.9980916380882263,grad_norm: 0.9999990721968308, iteration: 12708
loss: 1.0222642421722412,grad_norm: 0.9999992456753586, iteration: 12709
loss: 1.0071669816970825,grad_norm: 0.9999991334204366, iteration: 12710
loss: 1.0350080728530884,grad_norm: 0.9999994782401024, iteration: 12711
loss: 1.0260871648788452,grad_norm: 0.9999992592949084, iteration: 12712
loss: 1.058985710144043,grad_norm: 0.9999990871065014, iteration: 12713
loss: 1.015208125114441,grad_norm: 0.9999997616429928, iteration: 12714
loss: 1.007511019706726,grad_norm: 0.9999994899749719, iteration: 12715
loss: 1.030648946762085,grad_norm: 0.9999992342026196, iteration: 12716
loss: 0.9778574705123901,grad_norm: 0.9999991459738495, iteration: 12717
loss: 1.0134222507476807,grad_norm: 0.9999991894022661, iteration: 12718
loss: 1.0362114906311035,grad_norm: 0.9999990367707917, iteration: 12719
loss: 1.0256654024124146,grad_norm: 0.9999992642877074, iteration: 12720
loss: 1.0186364650726318,grad_norm: 0.9999992179618145, iteration: 12721
loss: 1.058765172958374,grad_norm: 0.9999991988468591, iteration: 12722
loss: 0.9979851841926575,grad_norm: 0.9999992500894643, iteration: 12723
loss: 0.9884209036827087,grad_norm: 0.9999992098722549, iteration: 12724
loss: 0.9880871176719666,grad_norm: 0.9999990514295104, iteration: 12725
loss: 1.0140366554260254,grad_norm: 0.9999992120301614, iteration: 12726
loss: 1.0145013332366943,grad_norm: 0.999999231559901, iteration: 12727
loss: 1.065535068511963,grad_norm: 0.9999993761533599, iteration: 12728
loss: 1.0251697301864624,grad_norm: 0.9999992936871165, iteration: 12729
loss: 1.011147379875183,grad_norm: 0.9999990079159683, iteration: 12730
loss: 1.0271493196487427,grad_norm: 0.999999093984113, iteration: 12731
loss: 1.0263205766677856,grad_norm: 0.9999996201398542, iteration: 12732
loss: 1.0789040327072144,grad_norm: 0.9999992071217322, iteration: 12733
loss: 1.0426323413848877,grad_norm: 0.9999990433021892, iteration: 12734
loss: 1.0259324312210083,grad_norm: 0.9999994284629794, iteration: 12735
loss: 1.0460484027862549,grad_norm: 0.9999993041094114, iteration: 12736
loss: 1.0548077821731567,grad_norm: 0.9999993485848446, iteration: 12737
loss: 1.0328874588012695,grad_norm: 0.963553031572816, iteration: 12738
loss: 1.0575050115585327,grad_norm: 0.9999990778774249, iteration: 12739
loss: 1.0109139680862427,grad_norm: 0.9999991767142007, iteration: 12740
loss: 1.0720866918563843,grad_norm: 0.9999993468291951, iteration: 12741
loss: 1.0225998163223267,grad_norm: 0.9999992458284565, iteration: 12742
loss: 1.0552899837493896,grad_norm: 0.9999992348895755, iteration: 12743
loss: 1.007896065711975,grad_norm: 0.9999992050442442, iteration: 12744
loss: 1.0428096055984497,grad_norm: 0.9999996279800681, iteration: 12745
loss: 0.9890140891075134,grad_norm: 0.9999990456625879, iteration: 12746
loss: 1.013179063796997,grad_norm: 0.9999992590577611, iteration: 12747
loss: 1.058632254600525,grad_norm: 0.9999995456147492, iteration: 12748
loss: 1.0488468408584595,grad_norm: 0.9999992195230384, iteration: 12749
loss: 0.9872680306434631,grad_norm: 0.9999991060844041, iteration: 12750
loss: 1.02279531955719,grad_norm: 0.9999990653806768, iteration: 12751
loss: 1.0262314081192017,grad_norm: 0.9999992656298993, iteration: 12752
loss: 1.0408706665039062,grad_norm: 0.999999428275525, iteration: 12753
loss: 1.0032527446746826,grad_norm: 0.9999991746505336, iteration: 12754
loss: 1.0549272298812866,grad_norm: 0.9999993128100791, iteration: 12755
loss: 1.053335428237915,grad_norm: 0.9999992394537336, iteration: 12756
loss: 1.0296114683151245,grad_norm: 0.999999362205475, iteration: 12757
loss: 0.9720096588134766,grad_norm: 0.999999423632465, iteration: 12758
loss: 1.0031540393829346,grad_norm: 0.9999991743396393, iteration: 12759
loss: 0.9761741757392883,grad_norm: 0.9999990532735846, iteration: 12760
loss: 1.0125459432601929,grad_norm: 0.9999995954981349, iteration: 12761
loss: 1.0046206712722778,grad_norm: 0.9999993269945502, iteration: 12762
loss: 1.03294837474823,grad_norm: 0.9999991634539979, iteration: 12763
loss: 1.0701671838760376,grad_norm: 0.9999994125867955, iteration: 12764
loss: 1.0311883687973022,grad_norm: 0.9999991773553043, iteration: 12765
loss: 0.9640699028968811,grad_norm: 0.9999990870943101, iteration: 12766
loss: 1.0160558223724365,grad_norm: 0.9999991008508574, iteration: 12767
loss: 1.0455119609832764,grad_norm: 0.9999991468829067, iteration: 12768
loss: 1.0388239622116089,grad_norm: 0.9999995396990536, iteration: 12769
loss: 1.0390232801437378,grad_norm: 0.937261435308601, iteration: 12770
loss: 0.9692217707633972,grad_norm: 0.9999992636733966, iteration: 12771
loss: 0.9747908711433411,grad_norm: 0.9999996115129207, iteration: 12772
loss: 1.0144742727279663,grad_norm: 0.999999269931564, iteration: 12773
loss: 1.057108998298645,grad_norm: 0.9999992447345152, iteration: 12774
loss: 0.9770194888114929,grad_norm: 0.9999992765375644, iteration: 12775
loss: 1.018522024154663,grad_norm: 0.9999991779420655, iteration: 12776
loss: 0.962648868560791,grad_norm: 0.9999992018345836, iteration: 12777
loss: 1.0288649797439575,grad_norm: 0.9999992336944346, iteration: 12778
loss: 1.0177956819534302,grad_norm: 0.9999991867555942, iteration: 12779
loss: 1.0145279169082642,grad_norm: 0.9999991615602516, iteration: 12780
loss: 1.0597020387649536,grad_norm: 0.9999991480262617, iteration: 12781
loss: 0.9855779409408569,grad_norm: 0.9999991126448463, iteration: 12782
loss: 1.021602749824524,grad_norm: 0.999999325415158, iteration: 12783
loss: 0.9995795488357544,grad_norm: 0.9999993150510904, iteration: 12784
loss: 1.0713247060775757,grad_norm: 0.999999635561989, iteration: 12785
loss: 1.005942702293396,grad_norm: 0.9999993561237142, iteration: 12786
loss: 0.9743430018424988,grad_norm: 0.9999992005045496, iteration: 12787
loss: 1.047561764717102,grad_norm: 0.9999989928071419, iteration: 12788
loss: 0.9842421412467957,grad_norm: 0.9999992777160879, iteration: 12789
loss: 0.9833341240882874,grad_norm: 0.999999843530754, iteration: 12790
loss: 1.0171934366226196,grad_norm: 0.9999993209291183, iteration: 12791
loss: 0.9908778071403503,grad_norm: 0.9999990736689987, iteration: 12792
loss: 1.0310159921646118,grad_norm: 0.9999991966130314, iteration: 12793
loss: 1.0286247730255127,grad_norm: 0.9999992549311673, iteration: 12794
loss: 1.0086147785186768,grad_norm: 0.9999991434951042, iteration: 12795
loss: 0.9976552128791809,grad_norm: 0.9999992180663737, iteration: 12796
loss: 1.071494698524475,grad_norm: 0.9999994346668296, iteration: 12797
loss: 0.9812569618225098,grad_norm: 0.9999991230002149, iteration: 12798
loss: 1.0687793493270874,grad_norm: 0.9999992404023691, iteration: 12799
loss: 1.0332016944885254,grad_norm: 0.9999992292823007, iteration: 12800
loss: 1.0108720064163208,grad_norm: 0.999999179259256, iteration: 12801
loss: 1.0342990159988403,grad_norm: 0.9999991133633658, iteration: 12802
loss: 0.9848400950431824,grad_norm: 0.9999991935527897, iteration: 12803
loss: 1.0509096384048462,grad_norm: 0.9999996649440243, iteration: 12804
loss: 1.0089510679244995,grad_norm: 0.9999995483600795, iteration: 12805
loss: 1.0497742891311646,grad_norm: 0.9999991208670322, iteration: 12806
loss: 1.0749778747558594,grad_norm: 0.9999994018862828, iteration: 12807
loss: 1.0362730026245117,grad_norm: 0.9999989816953213, iteration: 12808
loss: 1.0183215141296387,grad_norm: 0.999999199723371, iteration: 12809
loss: 1.0093252658843994,grad_norm: 0.9999992918941576, iteration: 12810
loss: 1.0359939336776733,grad_norm: 0.9999997752467241, iteration: 12811
loss: 1.0115433931350708,grad_norm: 0.9999992209830877, iteration: 12812
loss: 1.0696732997894287,grad_norm: 0.9999997388602919, iteration: 12813
loss: 1.0073025226593018,grad_norm: 0.9999992751015279, iteration: 12814
loss: 1.05068039894104,grad_norm: 0.9999990688718153, iteration: 12815
loss: 1.038051724433899,grad_norm: 0.9999992928436672, iteration: 12816
loss: 1.0112138986587524,grad_norm: 0.9999991133987133, iteration: 12817
loss: 1.0042933225631714,grad_norm: 0.9231285712221804, iteration: 12818
loss: 1.0401866436004639,grad_norm: 0.9999994673573208, iteration: 12819
loss: 1.0167183876037598,grad_norm: 0.9999992088337691, iteration: 12820
loss: 1.0278239250183105,grad_norm: 0.9999997562257232, iteration: 12821
loss: 1.0473501682281494,grad_norm: 0.9999992391517978, iteration: 12822
loss: 1.034271240234375,grad_norm: 0.9999990842938263, iteration: 12823
loss: 0.9957358241081238,grad_norm: 0.9999991383159101, iteration: 12824
loss: 1.0652931928634644,grad_norm: 0.9999993173549785, iteration: 12825
loss: 1.0445691347122192,grad_norm: 0.9999992229294358, iteration: 12826
loss: 0.9757561683654785,grad_norm: 0.9999990610641825, iteration: 12827
loss: 1.0512317419052124,grad_norm: 0.9999990681252653, iteration: 12828
loss: 1.0679547786712646,grad_norm: 0.9999992754654468, iteration: 12829
loss: 1.0101113319396973,grad_norm: 0.9999991963518977, iteration: 12830
loss: 1.0069022178649902,grad_norm: 0.9999991822047352, iteration: 12831
loss: 1.0064961910247803,grad_norm: 0.9999992348438458, iteration: 12832
loss: 1.0556827783584595,grad_norm: 0.9999993373785793, iteration: 12833
loss: 0.9730831980705261,grad_norm: 0.9999992196605623, iteration: 12834
loss: 0.9683510065078735,grad_norm: 0.9999993345674225, iteration: 12835
loss: 1.1225095987319946,grad_norm: 0.9999997465276559, iteration: 12836
loss: 1.075932502746582,grad_norm: 0.9999992055153853, iteration: 12837
loss: 1.0368311405181885,grad_norm: 0.9999994352338891, iteration: 12838
loss: 1.0498908758163452,grad_norm: 0.9999994624303887, iteration: 12839
loss: 1.0114352703094482,grad_norm: 0.9999992652042049, iteration: 12840
loss: 1.0192817449569702,grad_norm: 0.9999991172025439, iteration: 12841
loss: 1.016197919845581,grad_norm: 0.9999991115304064, iteration: 12842
loss: 1.034529685974121,grad_norm: 0.9999996095766963, iteration: 12843
loss: 1.0201166868209839,grad_norm: 0.999999164028972, iteration: 12844
loss: 1.057183027267456,grad_norm: 0.9999991498802842, iteration: 12845
loss: 0.9784117937088013,grad_norm: 0.9999994316573066, iteration: 12846
loss: 1.0030697584152222,grad_norm: 0.9999990677164641, iteration: 12847
loss: 1.035994529724121,grad_norm: 0.9969642579133894, iteration: 12848
loss: 1.0715053081512451,grad_norm: 0.9999995971571226, iteration: 12849
loss: 1.0951099395751953,grad_norm: 0.9999991923108303, iteration: 12850
loss: 1.0508939027786255,grad_norm: 0.9999995551160432, iteration: 12851
loss: 1.0281882286071777,grad_norm: 0.9999991284346238, iteration: 12852
loss: 1.029260277748108,grad_norm: 0.999999693707914, iteration: 12853
loss: 0.9942326545715332,grad_norm: 0.9999992601320234, iteration: 12854
loss: 0.9919469356536865,grad_norm: 0.9999992719757888, iteration: 12855
loss: 1.0028940439224243,grad_norm: 0.9999994557606097, iteration: 12856
loss: 1.0628032684326172,grad_norm: 0.9999996418435851, iteration: 12857
loss: 1.0571337938308716,grad_norm: 0.9999993967542777, iteration: 12858
loss: 1.0372803211212158,grad_norm: 0.9999993709651549, iteration: 12859
loss: 1.0517305135726929,grad_norm: 0.9999993665457361, iteration: 12860
loss: 1.0119372606277466,grad_norm: 0.9999991746713445, iteration: 12861
loss: 1.0481452941894531,grad_norm: 0.9999990604025669, iteration: 12862
loss: 0.9974573850631714,grad_norm: 0.9712604955416463, iteration: 12863
loss: 1.0049514770507812,grad_norm: 0.9999994741036535, iteration: 12864
loss: 1.0168753862380981,grad_norm: 0.9999991050823565, iteration: 12865
loss: 1.0566496849060059,grad_norm: 0.999999611674554, iteration: 12866
loss: 0.9731277227401733,grad_norm: 0.9999991735829339, iteration: 12867
loss: 1.0634655952453613,grad_norm: 0.9999994709838305, iteration: 12868
loss: 1.0162981748580933,grad_norm: 0.9999993534554955, iteration: 12869
loss: 0.9826356768608093,grad_norm: 0.9999991727523581, iteration: 12870
loss: 1.003284215927124,grad_norm: 0.9999991609728653, iteration: 12871
loss: 1.0196361541748047,grad_norm: 0.9999991848340312, iteration: 12872
loss: 0.9726593494415283,grad_norm: 0.9999991371738529, iteration: 12873
loss: 1.0510327816009521,grad_norm: 0.9999993262109755, iteration: 12874
loss: 1.0311166048049927,grad_norm: 0.9999991721266906, iteration: 12875
loss: 1.029482364654541,grad_norm: 0.9999992379592649, iteration: 12876
loss: 0.9782459139823914,grad_norm: 0.999999138120754, iteration: 12877
loss: 0.9986457228660583,grad_norm: 0.9999990956751863, iteration: 12878
loss: 1.0097734928131104,grad_norm: 0.9999991878102513, iteration: 12879
loss: 1.0375031232833862,grad_norm: 0.999999099104347, iteration: 12880
loss: 1.0817749500274658,grad_norm: 0.9999991912563932, iteration: 12881
loss: 1.01253080368042,grad_norm: 0.9999993360612645, iteration: 12882
loss: 1.0220450162887573,grad_norm: 0.9999993142552529, iteration: 12883
loss: 1.0391854047775269,grad_norm: 0.9999991579230928, iteration: 12884
loss: 1.060802698135376,grad_norm: 0.9999991068388181, iteration: 12885
loss: 1.0218476057052612,grad_norm: 0.9999990940710308, iteration: 12886
loss: 1.021949291229248,grad_norm: 0.9999992055744784, iteration: 12887
loss: 1.00454580783844,grad_norm: 0.9999993554934496, iteration: 12888
loss: 1.0391565561294556,grad_norm: 0.9999993260694903, iteration: 12889
loss: 0.9940788149833679,grad_norm: 0.9999992063964254, iteration: 12890
loss: 1.0308353900909424,grad_norm: 0.9999992215609524, iteration: 12891
loss: 0.984307050704956,grad_norm: 0.9999993026319182, iteration: 12892
loss: 1.0076773166656494,grad_norm: 0.999999213891266, iteration: 12893
loss: 1.028562307357788,grad_norm: 0.999999129035577, iteration: 12894
loss: 1.042515754699707,grad_norm: 0.999999558083654, iteration: 12895
loss: 1.0456879138946533,grad_norm: 0.9999991992619297, iteration: 12896
loss: 1.0556193590164185,grad_norm: 0.9999993435960239, iteration: 12897
loss: 1.0408889055252075,grad_norm: 0.9999992075521931, iteration: 12898
loss: 1.0345250368118286,grad_norm: 0.9999993996981137, iteration: 12899
loss: 1.030415415763855,grad_norm: 0.9999992217128114, iteration: 12900
loss: 1.0383275747299194,grad_norm: 0.9999992124171537, iteration: 12901
loss: 0.9944936037063599,grad_norm: 0.999999201154636, iteration: 12902
loss: 1.0180225372314453,grad_norm: 0.9999991822256759, iteration: 12903
loss: 1.0215277671813965,grad_norm: 0.9999992966935146, iteration: 12904
loss: 1.0352141857147217,grad_norm: 0.9999994309242073, iteration: 12905
loss: 1.0061160326004028,grad_norm: 0.9999991827453315, iteration: 12906
loss: 1.044287919998169,grad_norm: 0.9999992577916651, iteration: 12907
loss: 1.0611860752105713,grad_norm: 0.9999991161545928, iteration: 12908
loss: 0.9994579553604126,grad_norm: 0.9999992730451064, iteration: 12909
loss: 1.0034208297729492,grad_norm: 0.9999993118740746, iteration: 12910
loss: 1.010438084602356,grad_norm: 0.9999991702042709, iteration: 12911
loss: 1.0296751260757446,grad_norm: 0.9999991680530615, iteration: 12912
loss: 0.9937839508056641,grad_norm: 0.999999205094884, iteration: 12913
loss: 1.0262787342071533,grad_norm: 0.9999992613301997, iteration: 12914
loss: 1.0084530115127563,grad_norm: 0.9999993093144258, iteration: 12915
loss: 1.004574179649353,grad_norm: 0.9999991678680075, iteration: 12916
loss: 0.9993769526481628,grad_norm: 0.9999992685088454, iteration: 12917
loss: 1.064885139465332,grad_norm: 0.9999990854487433, iteration: 12918
loss: 1.0627233982086182,grad_norm: 0.9999992706296573, iteration: 12919
loss: 1.0017648935317993,grad_norm: 0.9999993278411486, iteration: 12920
loss: 1.0213053226470947,grad_norm: 0.9999992150407024, iteration: 12921
loss: 1.025259017944336,grad_norm: 0.9999992200197135, iteration: 12922
loss: 1.0374937057495117,grad_norm: 0.9999994347123565, iteration: 12923
loss: 1.0556565523147583,grad_norm: 0.9999997539411705, iteration: 12924
loss: 1.0082389116287231,grad_norm: 0.9999994252459797, iteration: 12925
loss: 1.026432991027832,grad_norm: 0.9999990771600106, iteration: 12926
loss: 1.0752222537994385,grad_norm: 0.9999992862708479, iteration: 12927
loss: 0.9919472336769104,grad_norm: 0.9999992375970087, iteration: 12928
loss: 1.0423352718353271,grad_norm: 0.999999173058554, iteration: 12929
loss: 1.0298700332641602,grad_norm: 0.9999993195320918, iteration: 12930
loss: 1.044350028038025,grad_norm: 0.999999178874314, iteration: 12931
loss: 1.0114365816116333,grad_norm: 0.9999992901914548, iteration: 12932
loss: 0.9673535227775574,grad_norm: 0.9999991875764858, iteration: 12933
loss: 1.0197893381118774,grad_norm: 0.9999990951566103, iteration: 12934
loss: 0.9983267188072205,grad_norm: 0.9999991049216054, iteration: 12935
loss: 1.0430632829666138,grad_norm: 0.9999992635918221, iteration: 12936
loss: 0.9955746531486511,grad_norm: 0.9999991652326562, iteration: 12937
loss: 1.0319952964782715,grad_norm: 0.9999990922785468, iteration: 12938
loss: 1.0347645282745361,grad_norm: 0.9999996932299675, iteration: 12939
loss: 1.0000478029251099,grad_norm: 0.9999992688449915, iteration: 12940
loss: 1.0798064470291138,grad_norm: 0.999999423664333, iteration: 12941
loss: 1.0178524255752563,grad_norm: 0.9999993796105484, iteration: 12942
loss: 1.0130857229232788,grad_norm: 0.999999212680893, iteration: 12943
loss: 1.0296293497085571,grad_norm: 0.9999997579737883, iteration: 12944
loss: 1.091538906097412,grad_norm: 0.9999994236650829, iteration: 12945
loss: 1.072943091392517,grad_norm: 0.9999990929275594, iteration: 12946
loss: 1.0111037492752075,grad_norm: 0.9999992831129847, iteration: 12947
loss: 1.050486445426941,grad_norm: 0.999999149442529, iteration: 12948
loss: 1.0315239429473877,grad_norm: 0.999999215592421, iteration: 12949
loss: 1.0114763975143433,grad_norm: 0.9999991275291952, iteration: 12950
loss: 1.0312527418136597,grad_norm: 0.9999991258121772, iteration: 12951
loss: 1.0679231882095337,grad_norm: 0.9999992029219921, iteration: 12952
loss: 1.0461018085479736,grad_norm: 0.9999995230930243, iteration: 12953
loss: 1.0677450895309448,grad_norm: 0.9999996276323078, iteration: 12954
loss: 1.0911213159561157,grad_norm: 0.9999997474184706, iteration: 12955
loss: 1.0483862161636353,grad_norm: 0.9999994600008666, iteration: 12956
loss: 1.00432288646698,grad_norm: 0.9999993497487994, iteration: 12957
loss: 1.054344654083252,grad_norm: 0.9999992849612465, iteration: 12958
loss: 1.058770775794983,grad_norm: 0.9999994101764957, iteration: 12959
loss: 1.0453088283538818,grad_norm: 0.9999993150376052, iteration: 12960
loss: 0.9959456920623779,grad_norm: 0.9999993453265139, iteration: 12961
loss: 1.0010080337524414,grad_norm: 0.9999990805147952, iteration: 12962
loss: 1.0494472980499268,grad_norm: 0.9999993748214382, iteration: 12963
loss: 1.049994707107544,grad_norm: 0.9999991387289093, iteration: 12964
loss: 1.003833532333374,grad_norm: 0.9999994260451294, iteration: 12965
loss: 1.04606294631958,grad_norm: 0.9999991527611722, iteration: 12966
loss: 1.0482951402664185,grad_norm: 0.9999995755774962, iteration: 12967
loss: 0.9967991709709167,grad_norm: 0.9999991662132987, iteration: 12968
loss: 1.0072975158691406,grad_norm: 0.9999990574348591, iteration: 12969
loss: 1.0428385734558105,grad_norm: 0.9999992546731478, iteration: 12970
loss: 1.117468237876892,grad_norm: 0.9999994768290842, iteration: 12971
loss: 1.0430879592895508,grad_norm: 0.9999992320662793, iteration: 12972
loss: 1.0492514371871948,grad_norm: 0.9999991886616848, iteration: 12973
loss: 1.0252866744995117,grad_norm: 0.9999992984902486, iteration: 12974
loss: 1.032940149307251,grad_norm: 0.9999992562011039, iteration: 12975
loss: 1.0087915658950806,grad_norm: 0.999999310659308, iteration: 12976
loss: 1.0447559356689453,grad_norm: 0.9999995894462319, iteration: 12977
loss: 1.0482757091522217,grad_norm: 0.9999991174280006, iteration: 12978
loss: 1.03956139087677,grad_norm: 0.9999992686538105, iteration: 12979
loss: 0.9781333208084106,grad_norm: 0.9999992764502961, iteration: 12980
loss: 1.0672394037246704,grad_norm: 0.9999994610549896, iteration: 12981
loss: 0.9779356122016907,grad_norm: 0.9999991238568855, iteration: 12982
loss: 0.9856953024864197,grad_norm: 0.9999992299881327, iteration: 12983
loss: 1.069487452507019,grad_norm: 0.9999992797536903, iteration: 12984
loss: 1.0846530199050903,grad_norm: 0.9999991794864412, iteration: 12985
loss: 1.0369316339492798,grad_norm: 0.9999991701857747, iteration: 12986
loss: 1.0256866216659546,grad_norm: 0.9999993744553561, iteration: 12987
loss: 1.0065268278121948,grad_norm: 0.9999990789526222, iteration: 12988
loss: 0.9858666658401489,grad_norm: 0.9999992194751657, iteration: 12989
loss: 1.010115146636963,grad_norm: 0.9999991569817044, iteration: 12990
loss: 1.0400365591049194,grad_norm: 0.999999141988355, iteration: 12991
loss: 1.018458366394043,grad_norm: 0.9999992562573177, iteration: 12992
loss: 1.0371074676513672,grad_norm: 0.9999991747741559, iteration: 12993
loss: 1.0302221775054932,grad_norm: 0.9999992640265732, iteration: 12994
loss: 1.0531865358352661,grad_norm: 0.9999990645088986, iteration: 12995
loss: 1.0027457475662231,grad_norm: 0.999998987854639, iteration: 12996
loss: 1.0310486555099487,grad_norm: 0.9999992559369076, iteration: 12997
loss: 1.0090481042861938,grad_norm: 0.9999998704444847, iteration: 12998
loss: 1.0530049800872803,grad_norm: 0.9999992294397745, iteration: 12999
loss: 1.0166114568710327,grad_norm: 0.9999992826501822, iteration: 13000
loss: 1.0743685960769653,grad_norm: 0.9999996140171319, iteration: 13001
loss: 1.0115939378738403,grad_norm: 0.9999991492916158, iteration: 13002
loss: 1.0601634979248047,grad_norm: 0.9999997151120462, iteration: 13003
loss: 1.0459562540054321,grad_norm: 0.9999992322552914, iteration: 13004
loss: 1.0220533609390259,grad_norm: 0.9999992930377705, iteration: 13005
loss: 1.0577723979949951,grad_norm: 0.9999996526656509, iteration: 13006
loss: 0.9988772869110107,grad_norm: 0.9639884192987928, iteration: 13007
loss: 1.0672852993011475,grad_norm: 0.9999997030962928, iteration: 13008
loss: 1.0377099514007568,grad_norm: 0.999999236918381, iteration: 13009
loss: 1.0438590049743652,grad_norm: 0.9999992016742896, iteration: 13010
loss: 1.0249688625335693,grad_norm: 0.9999992353796257, iteration: 13011
loss: 0.9976544380187988,grad_norm: 0.9999992658126368, iteration: 13012
loss: 1.0281282663345337,grad_norm: 0.9999993633650557, iteration: 13013
loss: 0.9964779019355774,grad_norm: 0.9999991436925648, iteration: 13014
loss: 1.0269055366516113,grad_norm: 0.9999993224065364, iteration: 13015
loss: 1.0308176279067993,grad_norm: 0.9999996891216734, iteration: 13016
loss: 1.0128850936889648,grad_norm: 0.9999990395808708, iteration: 13017
loss: 1.0608298778533936,grad_norm: 0.9999991630323098, iteration: 13018
loss: 1.0133095979690552,grad_norm: 0.9999992105312487, iteration: 13019
loss: 1.0681604146957397,grad_norm: 0.999999336681962, iteration: 13020
loss: 1.006081461906433,grad_norm: 0.9999993069827856, iteration: 13021
loss: 1.0166006088256836,grad_norm: 0.9999994606351726, iteration: 13022
loss: 1.0307196378707886,grad_norm: 0.9999991380502634, iteration: 13023
loss: 0.9609130620956421,grad_norm: 0.9999991397017346, iteration: 13024
loss: 1.0287132263183594,grad_norm: 0.9999991599541126, iteration: 13025
loss: 1.0265926122665405,grad_norm: 0.9999997157174682, iteration: 13026
loss: 1.021713376045227,grad_norm: 0.9999990498456429, iteration: 13027
loss: 0.9650083780288696,grad_norm: 0.999999234728231, iteration: 13028
loss: 1.037758231163025,grad_norm: 0.9999991989653821, iteration: 13029
loss: 1.0656485557556152,grad_norm: 0.9999992447068191, iteration: 13030
loss: 1.0000276565551758,grad_norm: 0.9999993328952602, iteration: 13031
loss: 1.0243850946426392,grad_norm: 0.9999990761713469, iteration: 13032
loss: 1.0373966693878174,grad_norm: 0.9999993023882954, iteration: 13033
loss: 1.0954504013061523,grad_norm: 0.9999996119326507, iteration: 13034
loss: 1.0681793689727783,grad_norm: 0.9999997192933685, iteration: 13035
loss: 1.0378531217575073,grad_norm: 0.9999993427062344, iteration: 13036
loss: 1.0427637100219727,grad_norm: 0.9999991414209507, iteration: 13037
loss: 1.0448877811431885,grad_norm: 0.9999993408122863, iteration: 13038
loss: 1.031138300895691,grad_norm: 0.9999991119619174, iteration: 13039
loss: 1.0437754392623901,grad_norm: 0.9999991110866744, iteration: 13040
loss: 1.1164246797561646,grad_norm: 0.999999092661357, iteration: 13041
loss: 0.9860747456550598,grad_norm: 0.9999991816941924, iteration: 13042
loss: 1.0207066535949707,grad_norm: 0.999999059777589, iteration: 13043
loss: 1.0219202041625977,grad_norm: 0.9999993511617307, iteration: 13044
loss: 1.0404917001724243,grad_norm: 0.9999998287931161, iteration: 13045
loss: 1.0313864946365356,grad_norm: 0.9999995632893395, iteration: 13046
loss: 1.0467573404312134,grad_norm: 0.9999994907904259, iteration: 13047
loss: 1.0422508716583252,grad_norm: 0.999999124299107, iteration: 13048
loss: 0.9838957190513611,grad_norm: 0.9999992356262326, iteration: 13049
loss: 1.0457199811935425,grad_norm: 0.9999995290707637, iteration: 13050
loss: 0.9951695203781128,grad_norm: 0.9999992051054173, iteration: 13051
loss: 1.0257230997085571,grad_norm: 0.9999990897922937, iteration: 13052
loss: 1.0281468629837036,grad_norm: 0.9615799258133336, iteration: 13053
loss: 0.9759436249732971,grad_norm: 0.9999992605137206, iteration: 13054
loss: 1.0331422090530396,grad_norm: 0.9999994345441786, iteration: 13055
loss: 1.0134351253509521,grad_norm: 0.9999994176506085, iteration: 13056
loss: 1.0192663669586182,grad_norm: 0.9999990981557684, iteration: 13057
loss: 1.0641248226165771,grad_norm: 0.9999992382596524, iteration: 13058
loss: 1.0092765092849731,grad_norm: 0.9999991867430839, iteration: 13059
loss: 1.0353572368621826,grad_norm: 0.9999991409895518, iteration: 13060
loss: 1.0340710878372192,grad_norm: 0.9999995541700777, iteration: 13061
loss: 1.0337871313095093,grad_norm: 0.9999992492132017, iteration: 13062
loss: 0.9676733016967773,grad_norm: 0.9980317129216953, iteration: 13063
loss: 1.0585087537765503,grad_norm: 0.9999994896044794, iteration: 13064
loss: 1.0547404289245605,grad_norm: 0.9999991006459843, iteration: 13065
loss: 1.0952461957931519,grad_norm: 0.9999994179519066, iteration: 13066
loss: 1.0057613849639893,grad_norm: 0.9999991253270955, iteration: 13067
loss: 1.0158774852752686,grad_norm: 0.9999996190734788, iteration: 13068
loss: 0.9872525930404663,grad_norm: 0.9999994163983227, iteration: 13069
loss: 1.014246940612793,grad_norm: 0.9999990760963838, iteration: 13070
loss: 1.0379770994186401,grad_norm: 0.999999152659867, iteration: 13071
loss: 1.0520286560058594,grad_norm: 0.9999990936330271, iteration: 13072
loss: 1.0348680019378662,grad_norm: 0.999999653406468, iteration: 13073
loss: 1.0487767457962036,grad_norm: 0.9999991793149201, iteration: 13074
loss: 1.0581448078155518,grad_norm: 0.9999991982082194, iteration: 13075
loss: 1.0230324268341064,grad_norm: 0.9999992803658302, iteration: 13076
loss: 1.0397647619247437,grad_norm: 0.9999992211001361, iteration: 13077
loss: 1.0501787662506104,grad_norm: 0.9999993990220498, iteration: 13078
loss: 1.0682241916656494,grad_norm: 0.9999994545683196, iteration: 13079
loss: 1.0161844491958618,grad_norm: 0.9538665931188014, iteration: 13080
loss: 1.0604954957962036,grad_norm: 0.9999990907276732, iteration: 13081
loss: 1.0325171947479248,grad_norm: 0.9999992005805771, iteration: 13082
loss: 1.0007740259170532,grad_norm: 0.9999992404257014, iteration: 13083
loss: 1.0363906621932983,grad_norm: 0.9999992271048915, iteration: 13084
loss: 1.02206552028656,grad_norm: 0.999999260407474, iteration: 13085
loss: 1.0095648765563965,grad_norm: 0.9999993834758704, iteration: 13086
loss: 1.050827980041504,grad_norm: 0.9999994242673285, iteration: 13087
loss: 1.051788330078125,grad_norm: 0.9999993370785635, iteration: 13088
loss: 1.0709587335586548,grad_norm: 0.999999356973775, iteration: 13089
loss: 1.0515042543411255,grad_norm: 0.9999996083685438, iteration: 13090
loss: 1.0117311477661133,grad_norm: 0.9999990331514884, iteration: 13091
loss: 1.0203803777694702,grad_norm: 0.9999993653375471, iteration: 13092
loss: 0.9926712512969971,grad_norm: 0.9999992336644106, iteration: 13093
loss: 0.9987248778343201,grad_norm: 0.9999992793734922, iteration: 13094
loss: 1.022483468055725,grad_norm: 0.9999994302093346, iteration: 13095
loss: 1.0317915678024292,grad_norm: 0.999999337776397, iteration: 13096
loss: 0.9971135258674622,grad_norm: 0.9999991916956978, iteration: 13097
loss: 1.0331624746322632,grad_norm: 0.9999992444192787, iteration: 13098
loss: 0.9876638650894165,grad_norm: 0.9999990564708867, iteration: 13099
loss: 1.014032244682312,grad_norm: 0.9999991044477518, iteration: 13100
loss: 1.0081385374069214,grad_norm: 0.9999991843448043, iteration: 13101
loss: 1.0419728755950928,grad_norm: 0.9999994446638529, iteration: 13102
loss: 0.9540175795555115,grad_norm: 0.9999990293235677, iteration: 13103
loss: 1.034140944480896,grad_norm: 0.9999991564001556, iteration: 13104
loss: 1.0581531524658203,grad_norm: 0.9999996187261472, iteration: 13105
loss: 1.015697956085205,grad_norm: 0.9999992764926982, iteration: 13106
loss: 1.0525099039077759,grad_norm: 0.9999992302752563, iteration: 13107
loss: 0.958344042301178,grad_norm: 0.9999993391869413, iteration: 13108
loss: 0.9617440104484558,grad_norm: 0.9999993190666643, iteration: 13109
loss: 0.9880566596984863,grad_norm: 0.9999991197306649, iteration: 13110
loss: 1.0445044040679932,grad_norm: 0.9999994921733222, iteration: 13111
loss: 1.057432770729065,grad_norm: 0.9999992541129301, iteration: 13112
loss: 1.0861785411834717,grad_norm: 0.9999998168697837, iteration: 13113
loss: 1.0343416929244995,grad_norm: 0.9999992464358971, iteration: 13114
loss: 1.0774916410446167,grad_norm: 0.9999993244290508, iteration: 13115
loss: 0.980595588684082,grad_norm: 0.9999991765795438, iteration: 13116
loss: 1.1155887842178345,grad_norm: 0.9999996099127788, iteration: 13117
loss: 1.068419337272644,grad_norm: 0.999999279990091, iteration: 13118
loss: 1.05565345287323,grad_norm: 0.999999268836375, iteration: 13119
loss: 1.0512832403182983,grad_norm: 0.999999270936162, iteration: 13120
loss: 1.0775856971740723,grad_norm: 0.9999990941242802, iteration: 13121
loss: 0.9999894499778748,grad_norm: 0.9999991872807616, iteration: 13122
loss: 1.0489782094955444,grad_norm: 0.9999992959140244, iteration: 13123
loss: 0.9734193086624146,grad_norm: 0.9999991306160767, iteration: 13124
loss: 1.0546836853027344,grad_norm: 0.9999993524305669, iteration: 13125
loss: 1.0086605548858643,grad_norm: 0.9999993047299526, iteration: 13126
loss: 1.0544993877410889,grad_norm: 0.9824431571619694, iteration: 13127
loss: 1.0301727056503296,grad_norm: 0.9999992535787715, iteration: 13128
loss: 1.0005757808685303,grad_norm: 0.9999992978991652, iteration: 13129
loss: 1.0604205131530762,grad_norm: 0.9999994510356903, iteration: 13130
loss: 1.094544529914856,grad_norm: 0.9999992357514301, iteration: 13131
loss: 1.0106992721557617,grad_norm: 0.9999991323141342, iteration: 13132
loss: 1.013057827949524,grad_norm: 0.9878282158561614, iteration: 13133
loss: 1.0235271453857422,grad_norm: 0.9999996713773003, iteration: 13134
loss: 1.0640521049499512,grad_norm: 0.9999993612979178, iteration: 13135
loss: 1.027446985244751,grad_norm: 0.9999993791481391, iteration: 13136
loss: 0.9999444484710693,grad_norm: 0.9999989978847942, iteration: 13137
loss: 1.0246591567993164,grad_norm: 0.999999467373236, iteration: 13138
loss: 1.064584732055664,grad_norm: 0.9999991035497386, iteration: 13139
loss: 1.0336912870407104,grad_norm: 0.9999990594334274, iteration: 13140
loss: 1.0215916633605957,grad_norm: 0.9999994038079086, iteration: 13141
loss: 1.0162993669509888,grad_norm: 0.9999992171972739, iteration: 13142
loss: 0.986487090587616,grad_norm: 0.9999991117055884, iteration: 13143
loss: 1.0267616510391235,grad_norm: 0.9078022769890988, iteration: 13144
loss: 1.0227771997451782,grad_norm: 0.9558797261773853, iteration: 13145
loss: 1.0335383415222168,grad_norm: 0.9999991167461486, iteration: 13146
loss: 1.0561044216156006,grad_norm: 0.9999992942370871, iteration: 13147
loss: 1.0053589344024658,grad_norm: 0.999999233998226, iteration: 13148
loss: 0.9946773052215576,grad_norm: 0.9883352837600515, iteration: 13149
loss: 1.0381361246109009,grad_norm: 0.9999992237827053, iteration: 13150
loss: 1.0466479063034058,grad_norm: 0.99999910031088, iteration: 13151
loss: 1.0116772651672363,grad_norm: 0.9999992537497931, iteration: 13152
loss: 1.0542516708374023,grad_norm: 0.9999995883892268, iteration: 13153
loss: 1.0241907835006714,grad_norm: 0.9999994298488734, iteration: 13154
loss: 1.0320584774017334,grad_norm: 0.9999992388329284, iteration: 13155
loss: 1.0450921058654785,grad_norm: 0.9999996745239262, iteration: 13156
loss: 1.0237960815429688,grad_norm: 0.9999991972192834, iteration: 13157
loss: 1.0507115125656128,grad_norm: 0.9999991360459064, iteration: 13158
loss: 1.0488587617874146,grad_norm: 0.9999991634915998, iteration: 13159
loss: 0.9850353002548218,grad_norm: 0.9999990316902566, iteration: 13160
loss: 1.0453803539276123,grad_norm: 0.999999316158683, iteration: 13161
loss: 1.035267949104309,grad_norm: 0.9999990524559661, iteration: 13162
loss: 1.0222889184951782,grad_norm: 0.9999992279055644, iteration: 13163
loss: 1.0430597066879272,grad_norm: 0.9999993381888073, iteration: 13164
loss: 1.0668333768844604,grad_norm: 0.9999995822618402, iteration: 13165
loss: 1.0058830976486206,grad_norm: 0.9999990483073926, iteration: 13166
loss: 1.0521405935287476,grad_norm: 0.9999991356421299, iteration: 13167
loss: 1.0515638589859009,grad_norm: 0.9999991540233655, iteration: 13168
loss: 0.9881888628005981,grad_norm: 0.9999993301867348, iteration: 13169
loss: 1.0326528549194336,grad_norm: 0.9999992330383315, iteration: 13170
loss: 1.0362741947174072,grad_norm: 0.9999991737446551, iteration: 13171
loss: 1.051095724105835,grad_norm: 0.999999208685194, iteration: 13172
loss: 1.0205557346343994,grad_norm: 0.9999992237768397, iteration: 13173
loss: 0.9932327270507812,grad_norm: 0.9999992808402912, iteration: 13174
loss: 1.021916389465332,grad_norm: 0.9999991516430181, iteration: 13175
loss: 1.0349270105361938,grad_norm: 0.9999992500028342, iteration: 13176
loss: 1.008687138557434,grad_norm: 0.9999995725266793, iteration: 13177
loss: 1.0292494297027588,grad_norm: 0.9999994789133935, iteration: 13178
loss: 0.971113920211792,grad_norm: 0.9999992002191433, iteration: 13179
loss: 1.046744704246521,grad_norm: 0.9999991543611647, iteration: 13180
loss: 1.0372849702835083,grad_norm: 0.9999992303477592, iteration: 13181
loss: 1.0249489545822144,grad_norm: 0.9999992664015821, iteration: 13182
loss: 1.0356820821762085,grad_norm: 0.9999991451034509, iteration: 13183
loss: 0.9955713152885437,grad_norm: 0.9999992545957646, iteration: 13184
loss: 1.038903832435608,grad_norm: 0.9999990538999866, iteration: 13185
loss: 1.0220333337783813,grad_norm: 0.9999991194083706, iteration: 13186
loss: 1.0138169527053833,grad_norm: 0.9999991832889814, iteration: 13187
loss: 1.033707618713379,grad_norm: 0.9999992316839296, iteration: 13188
loss: 0.9762770533561707,grad_norm: 0.9999991846102759, iteration: 13189
loss: 1.0038138628005981,grad_norm: 0.999999121552794, iteration: 13190
loss: 1.0590639114379883,grad_norm: 0.9999991356030211, iteration: 13191
loss: 1.100257396697998,grad_norm: 0.9999997136617775, iteration: 13192
loss: 1.0574519634246826,grad_norm: 0.9999993679390394, iteration: 13193
loss: 1.0270986557006836,grad_norm: 0.9999992431657965, iteration: 13194
loss: 1.0502740144729614,grad_norm: 0.9999992857788087, iteration: 13195
loss: 1.063496470451355,grad_norm: 0.9999993295528521, iteration: 13196
loss: 1.0815236568450928,grad_norm: 0.999999171689163, iteration: 13197
loss: 1.0200984477996826,grad_norm: 0.999999436235532, iteration: 13198
loss: 1.015813946723938,grad_norm: 0.9999992364454116, iteration: 13199
loss: 0.9880578517913818,grad_norm: 0.999999211735634, iteration: 13200
loss: 1.0309383869171143,grad_norm: 0.9999993295707826, iteration: 13201
loss: 0.9986134171485901,grad_norm: 0.9999992537018861, iteration: 13202
loss: 1.0082589387893677,grad_norm: 0.9999993694911238, iteration: 13203
loss: 1.0833653211593628,grad_norm: 0.9999997386467102, iteration: 13204
loss: 1.0322688817977905,grad_norm: 0.9999993646316568, iteration: 13205
loss: 1.0186126232147217,grad_norm: 0.9999991371248018, iteration: 13206
loss: 1.0264195203781128,grad_norm: 0.9999992656101929, iteration: 13207
loss: 1.0314834117889404,grad_norm: 0.99999923821187, iteration: 13208
loss: 1.0166317224502563,grad_norm: 0.9999991384362116, iteration: 13209
loss: 1.047777533531189,grad_norm: 0.9999994468549216, iteration: 13210
loss: 1.0371850728988647,grad_norm: 0.9999991975536112, iteration: 13211
loss: 1.0072726011276245,grad_norm: 0.9999992617162432, iteration: 13212
loss: 1.026206135749817,grad_norm: 0.9999990932776989, iteration: 13213
loss: 1.0245020389556885,grad_norm: 0.9999993320913853, iteration: 13214
loss: 1.027791976928711,grad_norm: 0.9999992788018177, iteration: 13215
loss: 1.0702303647994995,grad_norm: 0.9999991133649885, iteration: 13216
loss: 1.026668906211853,grad_norm: 0.999999085609485, iteration: 13217
loss: 1.0098415613174438,grad_norm: 0.9999990670809651, iteration: 13218
loss: 1.0140602588653564,grad_norm: 0.9999991480972303, iteration: 13219
loss: 1.0967291593551636,grad_norm: 0.9999992296149924, iteration: 13220
loss: 1.0142282247543335,grad_norm: 0.9999992913214004, iteration: 13221
loss: 1.045962929725647,grad_norm: 0.9999993308680917, iteration: 13222
loss: 1.0176880359649658,grad_norm: 0.9631751940525761, iteration: 13223
loss: 1.002814769744873,grad_norm: 0.9999991909212523, iteration: 13224
loss: 1.034450650215149,grad_norm: 0.9999990733830427, iteration: 13225
loss: 0.9953801035881042,grad_norm: 0.9999992414022394, iteration: 13226
loss: 1.0559004545211792,grad_norm: 0.9999992039457779, iteration: 13227
loss: 1.0060404539108276,grad_norm: 0.9999991306322342, iteration: 13228
loss: 1.0365140438079834,grad_norm: 0.9999993813867024, iteration: 13229
loss: 1.0507986545562744,grad_norm: 0.9999992497742288, iteration: 13230
loss: 1.019901156425476,grad_norm: 0.9999990560295257, iteration: 13231
loss: 1.0330288410186768,grad_norm: 0.9999990246228305, iteration: 13232
loss: 1.0530387163162231,grad_norm: 0.9999991372105425, iteration: 13233
loss: 1.0375325679779053,grad_norm: 0.9999990844076878, iteration: 13234
loss: 1.0207172632217407,grad_norm: 0.9999991603087858, iteration: 13235
loss: 1.0387018918991089,grad_norm: 0.999999132404273, iteration: 13236
loss: 1.101007103919983,grad_norm: 0.9999992305426817, iteration: 13237
loss: 1.0075989961624146,grad_norm: 0.9999991562455773, iteration: 13238
loss: 0.9932488799095154,grad_norm: 0.9999991601774599, iteration: 13239
loss: 1.0276154279708862,grad_norm: 0.9999990937726912, iteration: 13240
loss: 1.0273734331130981,grad_norm: 0.9999990284312252, iteration: 13241
loss: 1.0559254884719849,grad_norm: 0.9999993127800668, iteration: 13242
loss: 1.0420818328857422,grad_norm: 0.9999992165904722, iteration: 13243
loss: 1.0317844152450562,grad_norm: 0.9999991480234356, iteration: 13244
loss: 1.0198166370391846,grad_norm: 0.9999990663959615, iteration: 13245
loss: 1.0277408361434937,grad_norm: 0.9999991804823433, iteration: 13246
loss: 1.03620445728302,grad_norm: 0.9106441480993254, iteration: 13247
loss: 1.0267986059188843,grad_norm: 0.9999991686109732, iteration: 13248
loss: 1.0949435234069824,grad_norm: 0.9999996783487652, iteration: 13249
loss: 1.0399566888809204,grad_norm: 0.9999991207242832, iteration: 13250
loss: 1.0072760581970215,grad_norm: 0.9999993514611057, iteration: 13251
loss: 1.0356395244598389,grad_norm: 0.9999990862392633, iteration: 13252
loss: 0.9968637824058533,grad_norm: 0.9999990600725078, iteration: 13253
loss: 1.0392916202545166,grad_norm: 0.9999992160577001, iteration: 13254
loss: 1.0548152923583984,grad_norm: 0.999999219996913, iteration: 13255
loss: 0.9816220998764038,grad_norm: 0.9999992215998366, iteration: 13256
loss: 1.0461492538452148,grad_norm: 0.9999992088876354, iteration: 13257
loss: 0.9864006042480469,grad_norm: 0.9999991643480447, iteration: 13258
loss: 0.9935221672058105,grad_norm: 0.9999992027187741, iteration: 13259
loss: 1.0345611572265625,grad_norm: 0.9999990903248581, iteration: 13260
loss: 0.9986212253570557,grad_norm: 0.9999992029203453, iteration: 13261
loss: 0.9811773896217346,grad_norm: 0.9999991270224577, iteration: 13262
loss: 1.0007058382034302,grad_norm: 0.9999992407554688, iteration: 13263
loss: 1.0021687746047974,grad_norm: 0.9999993030190191, iteration: 13264
loss: 0.979563295841217,grad_norm: 0.999999797180789, iteration: 13265
loss: 0.9736671447753906,grad_norm: 0.999999331087007, iteration: 13266
loss: 1.0488190650939941,grad_norm: 0.9999991314123439, iteration: 13267
loss: 0.9937652349472046,grad_norm: 0.9999993078459639, iteration: 13268
loss: 0.9980632662773132,grad_norm: 0.9999992614937105, iteration: 13269
loss: 1.0284253358840942,grad_norm: 0.999999147738918, iteration: 13270
loss: 0.988490879535675,grad_norm: 0.999999325532327, iteration: 13271
loss: 1.0943862199783325,grad_norm: 0.9999994869039965, iteration: 13272
loss: 1.0285063982009888,grad_norm: 0.999999301753113, iteration: 13273
loss: 1.0405380725860596,grad_norm: 0.9999991556604284, iteration: 13274
loss: 1.0333210229873657,grad_norm: 0.9999992908393901, iteration: 13275
loss: 1.0113054513931274,grad_norm: 0.9999992651191977, iteration: 13276
loss: 0.9962919354438782,grad_norm: 0.9999991987954014, iteration: 13277
loss: 1.0104731321334839,grad_norm: 0.9999991296772109, iteration: 13278
loss: 1.0280675888061523,grad_norm: 0.999999104351494, iteration: 13279
loss: 1.0496811866760254,grad_norm: 0.9999991450968491, iteration: 13280
loss: 1.0343419313430786,grad_norm: 0.9875391607880428, iteration: 13281
loss: 1.097683072090149,grad_norm: 0.9999996268252955, iteration: 13282
loss: 1.0337291955947876,grad_norm: 0.9999991410037394, iteration: 13283
loss: 1.075079321861267,grad_norm: 0.999999317638458, iteration: 13284
loss: 1.0407295227050781,grad_norm: 0.9999992176341858, iteration: 13285
loss: 1.0037270784378052,grad_norm: 0.9999993801126688, iteration: 13286
loss: 1.0333584547042847,grad_norm: 0.9999991123365449, iteration: 13287
loss: 1.0203419923782349,grad_norm: 0.9999993217853421, iteration: 13288
loss: 1.0012470483779907,grad_norm: 0.9999991370680803, iteration: 13289
loss: 1.01975679397583,grad_norm: 0.9999990011440899, iteration: 13290
loss: 0.985310971736908,grad_norm: 0.9999993660457077, iteration: 13291
loss: 1.0972914695739746,grad_norm: 0.9999992367196578, iteration: 13292
loss: 1.0217005014419556,grad_norm: 0.9999991513391092, iteration: 13293
loss: 1.0142027139663696,grad_norm: 0.9999990968103883, iteration: 13294
loss: 1.1159720420837402,grad_norm: 0.9999999493337657, iteration: 13295
loss: 1.040658950805664,grad_norm: 0.9999993216825477, iteration: 13296
loss: 1.071829080581665,grad_norm: 0.9999995991300962, iteration: 13297
loss: 1.0087724924087524,grad_norm: 0.9999995071473168, iteration: 13298
loss: 1.0530176162719727,grad_norm: 0.9999990452876791, iteration: 13299
loss: 0.9692837595939636,grad_norm: 0.995243197682993, iteration: 13300
loss: 1.033414363861084,grad_norm: 0.9999994683423316, iteration: 13301
loss: 1.0352703332901,grad_norm: 0.9999995523066326, iteration: 13302
loss: 1.0254255533218384,grad_norm: 0.9999992332222554, iteration: 13303
loss: 1.0195658206939697,grad_norm: 0.9999992483062988, iteration: 13304
loss: 1.0507715940475464,grad_norm: 0.9999993535770555, iteration: 13305
loss: 1.033564567565918,grad_norm: 0.9999996607269052, iteration: 13306
loss: 1.0568596124649048,grad_norm: 0.9999992168104709, iteration: 13307
loss: 1.0127960443496704,grad_norm: 0.9999992689917874, iteration: 13308
loss: 1.0197898149490356,grad_norm: 0.9999993012135442, iteration: 13309
loss: 1.0149633884429932,grad_norm: 0.9999992335435236, iteration: 13310
loss: 1.0336264371871948,grad_norm: 0.9999992994691391, iteration: 13311
loss: 1.0345985889434814,grad_norm: 0.9999995101543435, iteration: 13312
loss: 0.9468806982040405,grad_norm: 0.999999162937512, iteration: 13313
loss: 1.0168315172195435,grad_norm: 0.9999993580483123, iteration: 13314
loss: 1.053763747215271,grad_norm: 0.9847577613511919, iteration: 13315
loss: 1.0650676488876343,grad_norm: 0.9999993332218997, iteration: 13316
loss: 1.0229026079177856,grad_norm: 0.9999992482039283, iteration: 13317
loss: 1.0191802978515625,grad_norm: 0.9999992441381506, iteration: 13318
loss: 1.0574179887771606,grad_norm: 0.9999994147514013, iteration: 13319
loss: 1.0248664617538452,grad_norm: 0.999999241239993, iteration: 13320
loss: 1.062649130821228,grad_norm: 0.999999462041303, iteration: 13321
loss: 1.067121982574463,grad_norm: 0.9999993134444143, iteration: 13322
loss: 1.0182336568832397,grad_norm: 0.9999990708222488, iteration: 13323
loss: 1.1479238271713257,grad_norm: 0.9999997466201294, iteration: 13324
loss: 1.0834147930145264,grad_norm: 0.9999995287603082, iteration: 13325
loss: 1.0217541456222534,grad_norm: 0.9999994304088481, iteration: 13326
loss: 1.0195685625076294,grad_norm: 0.9999993009230695, iteration: 13327
loss: 1.053214430809021,grad_norm: 0.9999993702426001, iteration: 13328
loss: 1.0004172325134277,grad_norm: 0.999999186087419, iteration: 13329
loss: 1.0334311723709106,grad_norm: 0.9999996594664293, iteration: 13330
loss: 1.0570082664489746,grad_norm: 0.9999995894673241, iteration: 13331
loss: 1.035671353340149,grad_norm: 0.9999995700374139, iteration: 13332
loss: 1.001475214958191,grad_norm: 0.9818845500210525, iteration: 13333
loss: 1.0165491104125977,grad_norm: 0.9768450219275804, iteration: 13334
loss: 1.0342907905578613,grad_norm: 0.999999298826782, iteration: 13335
loss: 1.0033518075942993,grad_norm: 0.9999991682773666, iteration: 13336
loss: 1.0054184198379517,grad_norm: 0.9999991221568582, iteration: 13337
loss: 0.9959973096847534,grad_norm: 0.9999991089065211, iteration: 13338
loss: 1.0274864435195923,grad_norm: 0.9521079018147891, iteration: 13339
loss: 1.0802059173583984,grad_norm: 0.9999994717885593, iteration: 13340
loss: 1.0221431255340576,grad_norm: 0.999999056610709, iteration: 13341
loss: 1.0529049634933472,grad_norm: 0.9999991393428658, iteration: 13342
loss: 1.0369659662246704,grad_norm: 0.9999993354227089, iteration: 13343
loss: 1.0017884969711304,grad_norm: 0.9999994540382652, iteration: 13344
loss: 1.0687257051467896,grad_norm: 0.9999993832947033, iteration: 13345
loss: 0.9976523518562317,grad_norm: 0.9999992754490328, iteration: 13346
loss: 1.0000381469726562,grad_norm: 0.9999992937767671, iteration: 13347
loss: 1.0563441514968872,grad_norm: 0.9999994003318868, iteration: 13348
loss: 1.0296069383621216,grad_norm: 0.9844510983207899, iteration: 13349
loss: 1.0121357440948486,grad_norm: 0.9999992502057595, iteration: 13350
loss: 1.0322412252426147,grad_norm: 0.9753274192201895, iteration: 13351
loss: 1.0146669149398804,grad_norm: 0.999999231394246, iteration: 13352
loss: 1.0201072692871094,grad_norm: 0.9999993252480599, iteration: 13353
loss: 0.9845148324966431,grad_norm: 0.999999260289456, iteration: 13354
loss: 0.9947736859321594,grad_norm: 0.999999213365046, iteration: 13355
loss: 1.0820083618164062,grad_norm: 0.9999995658843667, iteration: 13356
loss: 1.0057430267333984,grad_norm: 0.999999702194368, iteration: 13357
loss: 1.0398834943771362,grad_norm: 0.9999990477994862, iteration: 13358
loss: 1.0246769189834595,grad_norm: 0.9999993546349563, iteration: 13359
loss: 1.0267013311386108,grad_norm: 0.9999993392385758, iteration: 13360
loss: 1.0054829120635986,grad_norm: 0.9999990574495868, iteration: 13361
loss: 1.0079877376556396,grad_norm: 0.9999992838815128, iteration: 13362
loss: 1.0794259309768677,grad_norm: 0.9999993243705042, iteration: 13363
loss: 1.0560163259506226,grad_norm: 0.9999992930598641, iteration: 13364
loss: 1.04185152053833,grad_norm: 0.9999990998939937, iteration: 13365
loss: 0.997894823551178,grad_norm: 0.9999991219238162, iteration: 13366
loss: 1.0398352146148682,grad_norm: 0.999999128162824, iteration: 13367
loss: 1.1318864822387695,grad_norm: 0.9999997793754211, iteration: 13368
loss: 1.0281480550765991,grad_norm: 0.9999991222087046, iteration: 13369
loss: 0.9812760353088379,grad_norm: 0.9999992091513811, iteration: 13370
loss: 0.9149770736694336,grad_norm: 0.9999992219600489, iteration: 13371
loss: 1.036334753036499,grad_norm: 0.9999992994146462, iteration: 13372
loss: 1.0195116996765137,grad_norm: 0.9999991412728911, iteration: 13373
loss: 1.0071057081222534,grad_norm: 0.9999993880759122, iteration: 13374
loss: 1.0387953519821167,grad_norm: 0.9999992034953059, iteration: 13375
loss: 1.0472917556762695,grad_norm: 0.9999993837324787, iteration: 13376
loss: 1.0483880043029785,grad_norm: 0.9999994745013189, iteration: 13377
loss: 1.0151560306549072,grad_norm: 0.9999992096813757, iteration: 13378
loss: 1.0351979732513428,grad_norm: 0.9999991362852128, iteration: 13379
loss: 1.0627610683441162,grad_norm: 0.999999455220671, iteration: 13380
loss: 1.032817006111145,grad_norm: 0.9999992388152915, iteration: 13381
loss: 0.9576048254966736,grad_norm: 0.9999994773397397, iteration: 13382
loss: 1.0092837810516357,grad_norm: 0.9999991663631544, iteration: 13383
loss: 1.0037094354629517,grad_norm: 0.9999990552367528, iteration: 13384
loss: 1.0633004903793335,grad_norm: 0.9999992115047814, iteration: 13385
loss: 1.0219560861587524,grad_norm: 0.9999991541475186, iteration: 13386
loss: 1.0366017818450928,grad_norm: 0.9999993478760848, iteration: 13387
loss: 1.0139456987380981,grad_norm: 0.9999992162425246, iteration: 13388
loss: 0.9675251245498657,grad_norm: 0.9999991867523492, iteration: 13389
loss: 1.012487769126892,grad_norm: 0.9999993880130761, iteration: 13390
loss: 1.0300325155258179,grad_norm: 0.999999297506309, iteration: 13391
loss: 1.0521814823150635,grad_norm: 0.9999995843915882, iteration: 13392
loss: 0.9983802437782288,grad_norm: 0.999999278662798, iteration: 13393
loss: 1.0640699863433838,grad_norm: 0.9999992756102423, iteration: 13394
loss: 1.0444833040237427,grad_norm: 0.9984706719924525, iteration: 13395
loss: 1.0667126178741455,grad_norm: 0.9999993868835173, iteration: 13396
loss: 1.0022658109664917,grad_norm: 0.9999991546200235, iteration: 13397
loss: 1.0016051530838013,grad_norm: 0.9999990164741688, iteration: 13398
loss: 0.9750431180000305,grad_norm: 0.999999280119873, iteration: 13399
loss: 1.030733585357666,grad_norm: 0.9999992175655393, iteration: 13400
loss: 1.0764468908309937,grad_norm: 0.999999798318425, iteration: 13401
loss: 1.0411992073059082,grad_norm: 0.9999991938697192, iteration: 13402
loss: 1.0242644548416138,grad_norm: 0.9999992749016383, iteration: 13403
loss: 1.0104902982711792,grad_norm: 0.999999326750275, iteration: 13404
loss: 1.0575815439224243,grad_norm: 0.9999996759339975, iteration: 13405
loss: 1.000756025314331,grad_norm: 0.9999991575734806, iteration: 13406
loss: 1.031593918800354,grad_norm: 0.9999991850417851, iteration: 13407
loss: 1.0288002490997314,grad_norm: 0.9999994399091441, iteration: 13408
loss: 1.032499074935913,grad_norm: 0.9999991757234066, iteration: 13409
loss: 0.9928706288337708,grad_norm: 0.9999991856534165, iteration: 13410
loss: 0.9936243295669556,grad_norm: 0.9999992067311129, iteration: 13411
loss: 1.0522167682647705,grad_norm: 0.9999990874981454, iteration: 13412
loss: 0.9805707335472107,grad_norm: 0.9999992757257623, iteration: 13413
loss: 1.0304173231124878,grad_norm: 0.9999990554611751, iteration: 13414
loss: 1.0357320308685303,grad_norm: 0.9999993113978074, iteration: 13415
loss: 1.0639137029647827,grad_norm: 0.9999996871769629, iteration: 13416
loss: 1.0295041799545288,grad_norm: 0.9999991601700591, iteration: 13417
loss: 1.0149494409561157,grad_norm: 0.9999991173230992, iteration: 13418
loss: 1.0213055610656738,grad_norm: 0.9999994646437572, iteration: 13419
loss: 1.0292752981185913,grad_norm: 0.9999993465572562, iteration: 13420
loss: 1.0215210914611816,grad_norm: 0.9999993311346976, iteration: 13421
loss: 1.044321060180664,grad_norm: 0.9999990506859359, iteration: 13422
loss: 1.0254675149917603,grad_norm: 0.9999993108647621, iteration: 13423
loss: 1.0336061716079712,grad_norm: 0.9999993057065275, iteration: 13424
loss: 0.9962713122367859,grad_norm: 0.9999992631881792, iteration: 13425
loss: 1.0147738456726074,grad_norm: 0.9999991765260288, iteration: 13426
loss: 1.046699047088623,grad_norm: 0.999999239849995, iteration: 13427
loss: 0.9710554480552673,grad_norm: 0.9999991000151895, iteration: 13428
loss: 0.9939258694648743,grad_norm: 0.9999993999378082, iteration: 13429
loss: 1.0170435905456543,grad_norm: 0.9999991128966907, iteration: 13430
loss: 1.1198443174362183,grad_norm: 0.9999997716398419, iteration: 13431
loss: 1.0176935195922852,grad_norm: 0.9999993697113069, iteration: 13432
loss: 0.9990101456642151,grad_norm: 0.9999991611397918, iteration: 13433
loss: 1.0543599128723145,grad_norm: 0.9999994384340865, iteration: 13434
loss: 1.0714080333709717,grad_norm: 0.9999993176736939, iteration: 13435
loss: 1.008193016052246,grad_norm: 0.9999992646445005, iteration: 13436
loss: 1.0332227945327759,grad_norm: 0.9999990874433786, iteration: 13437
loss: 1.017883062362671,grad_norm: 0.9999992419665198, iteration: 13438
loss: 1.0164730548858643,grad_norm: 0.9999994601001321, iteration: 13439
loss: 1.0283136367797852,grad_norm: 0.9999994010835747, iteration: 13440
loss: 1.0288677215576172,grad_norm: 0.9628100657269786, iteration: 13441
loss: 0.9958266615867615,grad_norm: 0.9999995530056824, iteration: 13442
loss: 1.0106974840164185,grad_norm: 0.9999990274172283, iteration: 13443
loss: 1.0779526233673096,grad_norm: 0.99999930697927, iteration: 13444
loss: 0.9892268180847168,grad_norm: 0.9999990530493986, iteration: 13445
loss: 1.0458080768585205,grad_norm: 0.9999995983240131, iteration: 13446
loss: 1.0591670274734497,grad_norm: 0.9999995269112311, iteration: 13447
loss: 1.0194220542907715,grad_norm: 0.9372471732873182, iteration: 13448
loss: 1.0486119985580444,grad_norm: 0.999999538216078, iteration: 13449
loss: 1.050516963005066,grad_norm: 0.9999993236764007, iteration: 13450
loss: 0.9872958660125732,grad_norm: 0.9999991416796792, iteration: 13451
loss: 1.024213433265686,grad_norm: 0.9054669187410208, iteration: 13452
loss: 0.9823839068412781,grad_norm: 0.9583674188524727, iteration: 13453
loss: 1.0361560583114624,grad_norm: 0.9999992016100833, iteration: 13454
loss: 1.1060301065444946,grad_norm: 0.9999995135806938, iteration: 13455
loss: 1.032735824584961,grad_norm: 0.9341574977700055, iteration: 13456
loss: 1.0444270372390747,grad_norm: 0.999999021613997, iteration: 13457
loss: 1.0769201517105103,grad_norm: 0.9999990144224631, iteration: 13458
loss: 1.0354397296905518,grad_norm: 0.9999990970925597, iteration: 13459
loss: 1.0750502347946167,grad_norm: 0.9999996821677692, iteration: 13460
loss: 1.0406142473220825,grad_norm: 0.9554881812792408, iteration: 13461
loss: 0.9705801010131836,grad_norm: 0.9999991594835335, iteration: 13462
loss: 1.0065994262695312,grad_norm: 0.9999991962993398, iteration: 13463
loss: 1.0079950094223022,grad_norm: 0.9999991206901961, iteration: 13464
loss: 1.0160667896270752,grad_norm: 0.9999992348443091, iteration: 13465
loss: 0.9800178408622742,grad_norm: 0.9999995521899994, iteration: 13466
loss: 1.056384563446045,grad_norm: 0.9999994131972105, iteration: 13467
loss: 1.0374257564544678,grad_norm: 0.9999992683381228, iteration: 13468
loss: 0.9814411401748657,grad_norm: 0.9999993299031248, iteration: 13469
loss: 1.073625922203064,grad_norm: 0.9999993765141394, iteration: 13470
loss: 1.0631731748580933,grad_norm: 0.9999993592028584, iteration: 13471
loss: 1.0074301958084106,grad_norm: 0.9999992921748855, iteration: 13472
loss: 1.0275003910064697,grad_norm: 0.999999322090222, iteration: 13473
loss: 1.0167901515960693,grad_norm: 0.9999995507080387, iteration: 13474
loss: 0.9948456287384033,grad_norm: 0.9999990932409577, iteration: 13475
loss: 1.0317714214324951,grad_norm: 0.99999909560827, iteration: 13476
loss: 1.0556949377059937,grad_norm: 0.9999994351954932, iteration: 13477
loss: 1.0244765281677246,grad_norm: 0.9999991162473499, iteration: 13478
loss: 0.9818181991577148,grad_norm: 0.999999319321809, iteration: 13479
loss: 1.0662288665771484,grad_norm: 0.9999998196585956, iteration: 13480
loss: 1.0441278219223022,grad_norm: 0.9999994414102291, iteration: 13481
loss: 1.0395106077194214,grad_norm: 0.9999993855662249, iteration: 13482
loss: 1.0372720956802368,grad_norm: 0.9999994899374973, iteration: 13483
loss: 0.9771198630332947,grad_norm: 0.9999991592048869, iteration: 13484
loss: 1.006279706954956,grad_norm: 0.9999991224882185, iteration: 13485
loss: 1.0273391008377075,grad_norm: 0.9999991939823533, iteration: 13486
loss: 1.0714191198349,grad_norm: 0.999999291230758, iteration: 13487
loss: 1.0221511125564575,grad_norm: 0.9999991853685692, iteration: 13488
loss: 1.0338850021362305,grad_norm: 0.9999990638425524, iteration: 13489
loss: 0.9836613535881042,grad_norm: 0.9999993172597816, iteration: 13490
loss: 1.0867910385131836,grad_norm: 0.9999990965899277, iteration: 13491
loss: 1.0160647630691528,grad_norm: 0.9999991074016824, iteration: 13492
loss: 1.0585063695907593,grad_norm: 0.9999992909459537, iteration: 13493
loss: 1.0297898054122925,grad_norm: 0.9999991439881362, iteration: 13494
loss: 1.0523650646209717,grad_norm: 0.9999995100538035, iteration: 13495
loss: 1.0373377799987793,grad_norm: 0.9999992110794379, iteration: 13496
loss: 0.9861494302749634,grad_norm: 0.9999991587968934, iteration: 13497
loss: 1.115860939025879,grad_norm: 0.9999997206864094, iteration: 13498
loss: 1.023747444152832,grad_norm: 0.9999992275244878, iteration: 13499
loss: 1.0133663415908813,grad_norm: 0.999999362521472, iteration: 13500
loss: 1.0345349311828613,grad_norm: 0.9999997004195371, iteration: 13501
loss: 1.0314706563949585,grad_norm: 0.9999992129077667, iteration: 13502
loss: 1.01895272731781,grad_norm: 0.9999993415752961, iteration: 13503
loss: 1.0189692974090576,grad_norm: 0.9999994043021058, iteration: 13504
loss: 1.0581164360046387,grad_norm: 0.9999994313229092, iteration: 13505
loss: 1.0518089532852173,grad_norm: 0.9999996907156248, iteration: 13506
loss: 1.0371254682540894,grad_norm: 0.9999991973685154, iteration: 13507
loss: 0.9955548048019409,grad_norm: 0.9999990844176144, iteration: 13508
loss: 1.0533044338226318,grad_norm: 0.9999994166844394, iteration: 13509
loss: 1.0597949028015137,grad_norm: 0.999999610647238, iteration: 13510
loss: 1.036795735359192,grad_norm: 0.9999991654090226, iteration: 13511
loss: 1.0398101806640625,grad_norm: 0.9999992469564103, iteration: 13512
loss: 1.0215741395950317,grad_norm: 0.9999990354877429, iteration: 13513
loss: 1.060324788093567,grad_norm: 0.9999990764122214, iteration: 13514
loss: 1.0154885053634644,grad_norm: 0.9999990667867952, iteration: 13515
loss: 1.0294748544692993,grad_norm: 0.9999993189022729, iteration: 13516
loss: 1.0091078281402588,grad_norm: 0.9999997732296261, iteration: 13517
loss: 0.9378781318664551,grad_norm: 0.9999992853021809, iteration: 13518
loss: 1.053861379623413,grad_norm: 0.9999997468014896, iteration: 13519
loss: 1.004141092300415,grad_norm: 0.9999993084495183, iteration: 13520
loss: 0.9745523929595947,grad_norm: 0.9999991707691794, iteration: 13521
loss: 1.028109073638916,grad_norm: 0.9999992857526374, iteration: 13522
loss: 1.0372226238250732,grad_norm: 0.9999994767084168, iteration: 13523
loss: 1.0209243297576904,grad_norm: 0.9999991295100196, iteration: 13524
loss: 0.9692720174789429,grad_norm: 0.9999995610354239, iteration: 13525
loss: 1.0159038305282593,grad_norm: 0.9999992871198411, iteration: 13526
loss: 1.023297905921936,grad_norm: 0.9999990754269169, iteration: 13527
loss: 1.0252635478973389,grad_norm: 0.999999657577066, iteration: 13528
loss: 1.0780845880508423,grad_norm: 0.9999998714115015, iteration: 13529
loss: 1.0395199060440063,grad_norm: 0.9999996899957029, iteration: 13530
loss: 1.065306544303894,grad_norm: 0.99999908558628, iteration: 13531
loss: 1.1031948328018188,grad_norm: 0.999999754014601, iteration: 13532
loss: 1.0669206380844116,grad_norm: 0.999999109409909, iteration: 13533
loss: 1.0846774578094482,grad_norm: 0.9999991782777597, iteration: 13534
loss: 1.0289256572723389,grad_norm: 0.9999991635056548, iteration: 13535
loss: 1.0861386060714722,grad_norm: 0.9999996435328359, iteration: 13536
loss: 1.022452473640442,grad_norm: 0.999999099220191, iteration: 13537
loss: 1.0347338914871216,grad_norm: 0.9999990628531523, iteration: 13538
loss: 1.0025123357772827,grad_norm: 0.9999992408663585, iteration: 13539
loss: 1.0500127077102661,grad_norm: 0.9999994846494573, iteration: 13540
loss: 0.9854393601417542,grad_norm: 0.9639581895711136, iteration: 13541
loss: 1.0234297513961792,grad_norm: 0.9999997882617854, iteration: 13542
loss: 1.0041974782943726,grad_norm: 0.9999991802288068, iteration: 13543
loss: 0.9921241998672485,grad_norm: 0.999999084993335, iteration: 13544
loss: 1.0719304084777832,grad_norm: 0.9999992151885985, iteration: 13545
loss: 1.0247479677200317,grad_norm: 0.9999991012926366, iteration: 13546
loss: 1.0549391508102417,grad_norm: 0.9999990964597579, iteration: 13547
loss: 1.0428341627120972,grad_norm: 0.9999991410747973, iteration: 13548
loss: 1.0334571599960327,grad_norm: 0.9999992719774712, iteration: 13549
loss: 1.0417184829711914,grad_norm: 0.9999990669971891, iteration: 13550
loss: 1.027969241142273,grad_norm: 0.9999993630611835, iteration: 13551
loss: 1.0221489667892456,grad_norm: 0.9771577882140636, iteration: 13552
loss: 1.0080008506774902,grad_norm: 0.9999997154741763, iteration: 13553
loss: 1.0232828855514526,grad_norm: 0.999999410312413, iteration: 13554
loss: 1.0179479122161865,grad_norm: 0.999999112793799, iteration: 13555
loss: 0.9952543377876282,grad_norm: 0.9999991006276382, iteration: 13556
loss: 1.0181912183761597,grad_norm: 0.9999993450295905, iteration: 13557
loss: 1.044212818145752,grad_norm: 0.9999990809670587, iteration: 13558
loss: 1.053739070892334,grad_norm: 0.9999992457305281, iteration: 13559
loss: 1.0561600923538208,grad_norm: 0.9999991697222422, iteration: 13560
loss: 1.0311239957809448,grad_norm: 0.9999992481777231, iteration: 13561
loss: 1.0195538997650146,grad_norm: 0.9999992461606647, iteration: 13562
loss: 1.0207383632659912,grad_norm: 0.9999990647541139, iteration: 13563
loss: 1.0262171030044556,grad_norm: 0.9999991370705326, iteration: 13564
loss: 1.0604290962219238,grad_norm: 0.9999995121658752, iteration: 13565
loss: 1.068480134010315,grad_norm: 0.9999997638917506, iteration: 13566
loss: 0.9709621667861938,grad_norm: 0.9999991038515698, iteration: 13567
loss: 1.0073838233947754,grad_norm: 0.9999992695258445, iteration: 13568
loss: 1.0316674709320068,grad_norm: 0.9999991663259191, iteration: 13569
loss: 1.038826823234558,grad_norm: 0.9999994632714883, iteration: 13570
loss: 1.027438759803772,grad_norm: 0.9999991055409945, iteration: 13571
loss: 1.0271276235580444,grad_norm: 0.999999544158468, iteration: 13572
loss: 1.0511586666107178,grad_norm: 0.9999997971426596, iteration: 13573
loss: 0.9522091746330261,grad_norm: 0.9999992504175879, iteration: 13574
loss: 1.0649884939193726,grad_norm: 0.9999997814854875, iteration: 13575
loss: 1.0208027362823486,grad_norm: 0.9999994189571214, iteration: 13576
loss: 1.0648895502090454,grad_norm: 0.9999993700899993, iteration: 13577
loss: 1.031465768814087,grad_norm: 0.9999992431831883, iteration: 13578
loss: 1.048351764678955,grad_norm: 0.9999994803561372, iteration: 13579
loss: 0.9940239787101746,grad_norm: 0.9159092760120142, iteration: 13580
loss: 1.0104517936706543,grad_norm: 0.9999994562317245, iteration: 13581
loss: 1.024904727935791,grad_norm: 0.9999995144206055, iteration: 13582
loss: 1.0600597858428955,grad_norm: 0.9999990622437087, iteration: 13583
loss: 1.0764073133468628,grad_norm: 0.9999998807054379, iteration: 13584
loss: 1.064334511756897,grad_norm: 0.9999997247363364, iteration: 13585
loss: 1.088560938835144,grad_norm: 0.9999990539480571, iteration: 13586
loss: 1.0157183408737183,grad_norm: 0.9999992886576065, iteration: 13587
loss: 1.013704776763916,grad_norm: 0.9999991038209283, iteration: 13588
loss: 1.0018163919448853,grad_norm: 0.9999995654547056, iteration: 13589
loss: 0.9969419836997986,grad_norm: 0.9999992097964256, iteration: 13590
loss: 1.0035303831100464,grad_norm: 0.9999993657989262, iteration: 13591
loss: 1.0144686698913574,grad_norm: 0.9999993182401177, iteration: 13592
loss: 1.0273504257202148,grad_norm: 0.9999996648507461, iteration: 13593
loss: 0.9911006689071655,grad_norm: 0.9999991892320281, iteration: 13594
loss: 1.0050538778305054,grad_norm: 0.9999992867813714, iteration: 13595
loss: 1.0644874572753906,grad_norm: 0.9999996502820725, iteration: 13596
loss: 1.002585768699646,grad_norm: 0.9999990416208379, iteration: 13597
loss: 0.9974305629730225,grad_norm: 0.9999993733258856, iteration: 13598
loss: 1.0568045377731323,grad_norm: 0.9999993243259931, iteration: 13599
loss: 1.0076934099197388,grad_norm: 0.9999994063654415, iteration: 13600
loss: 1.052839756011963,grad_norm: 0.9999992174016082, iteration: 13601
loss: 1.0010650157928467,grad_norm: 0.9999992466282708, iteration: 13602
loss: 1.026867151260376,grad_norm: 0.999999183146712, iteration: 13603
loss: 1.0046095848083496,grad_norm: 0.9999992067440118, iteration: 13604
loss: 0.9926272034645081,grad_norm: 0.999999505804187, iteration: 13605
loss: 0.9972658753395081,grad_norm: 0.999999132144587, iteration: 13606
loss: 1.0033884048461914,grad_norm: 0.9999992129444635, iteration: 13607
loss: 1.0037078857421875,grad_norm: 0.9999990127974969, iteration: 13608
loss: 1.007159948348999,grad_norm: 0.9999993340859099, iteration: 13609
loss: 1.0802611112594604,grad_norm: 0.999999206034104, iteration: 13610
loss: 1.0005552768707275,grad_norm: 0.999999202469843, iteration: 13611
loss: 1.0919036865234375,grad_norm: 0.9999998905778708, iteration: 13612
loss: 0.9773412346839905,grad_norm: 0.9999991266338076, iteration: 13613
loss: 1.0467387437820435,grad_norm: 0.9999994961011072, iteration: 13614
loss: 1.0561803579330444,grad_norm: 0.9999992517541891, iteration: 13615
loss: 1.0214372873306274,grad_norm: 0.9999991223774207, iteration: 13616
loss: 1.010025978088379,grad_norm: 0.9999992184286669, iteration: 13617
loss: 0.9531497359275818,grad_norm: 0.9999994349062726, iteration: 13618
loss: 1.054119348526001,grad_norm: 0.999999728891689, iteration: 13619
loss: 1.030578374862671,grad_norm: 0.9999991857254859, iteration: 13620
loss: 1.0552746057510376,grad_norm: 0.9999992948904316, iteration: 13621
loss: 1.0199073553085327,grad_norm: 0.9999992722033129, iteration: 13622
loss: 1.0189521312713623,grad_norm: 0.9999992355696021, iteration: 13623
loss: 0.9772303700447083,grad_norm: 0.9999990501673813, iteration: 13624
loss: 1.0221564769744873,grad_norm: 0.9895269393749383, iteration: 13625
loss: 1.0456849336624146,grad_norm: 0.9999994800941775, iteration: 13626
loss: 1.0103012323379517,grad_norm: 0.999999089515885, iteration: 13627
loss: 1.0127674341201782,grad_norm: 0.9999991007679035, iteration: 13628
loss: 0.9841901063919067,grad_norm: 0.9999990303340869, iteration: 13629
loss: 1.0652142763137817,grad_norm: 0.9999992802819906, iteration: 13630
loss: 1.0227912664413452,grad_norm: 0.9999993180557665, iteration: 13631
loss: 1.0267819166183472,grad_norm: 0.9870536306641978, iteration: 13632
loss: 0.9446308016777039,grad_norm: 0.9999993130285041, iteration: 13633
loss: 1.0093039274215698,grad_norm: 0.999999128744743, iteration: 13634
loss: 0.9719692468643188,grad_norm: 0.9999990811738504, iteration: 13635
loss: 1.0478607416152954,grad_norm: 0.9999994956460213, iteration: 13636
loss: 1.003639578819275,grad_norm: 0.9999991577664704, iteration: 13637
loss: 0.9910523295402527,grad_norm: 0.9999992815214611, iteration: 13638
loss: 1.0340608358383179,grad_norm: 0.999999211696226, iteration: 13639
loss: 1.0566911697387695,grad_norm: 0.9999990767098442, iteration: 13640
loss: 1.0506259202957153,grad_norm: 0.9999991098461903, iteration: 13641
loss: 1.0702213048934937,grad_norm: 0.999999527175651, iteration: 13642
loss: 1.0574098825454712,grad_norm: 0.9999995083037446, iteration: 13643
loss: 1.0132919549942017,grad_norm: 0.9999997133984778, iteration: 13644
loss: 1.0123006105422974,grad_norm: 0.9999992417700483, iteration: 13645
loss: 1.0455830097198486,grad_norm: 0.9999991794729923, iteration: 13646
loss: 0.9991514682769775,grad_norm: 0.9999994530265421, iteration: 13647
loss: 1.01405668258667,grad_norm: 0.9999993549949601, iteration: 13648
loss: 1.0024583339691162,grad_norm: 0.9999993557780709, iteration: 13649
loss: 1.018270492553711,grad_norm: 0.9999991089313858, iteration: 13650
loss: 1.0601118803024292,grad_norm: 0.9999992993530135, iteration: 13651
loss: 1.0312504768371582,grad_norm: 0.9999991328287942, iteration: 13652
loss: 1.004907488822937,grad_norm: 0.9923297430395028, iteration: 13653
loss: 1.054840087890625,grad_norm: 0.9755503263074621, iteration: 13654
loss: 1.021832823753357,grad_norm: 0.9999992291756296, iteration: 13655
loss: 1.043441653251648,grad_norm: 0.9999993753910191, iteration: 13656
loss: 1.0491256713867188,grad_norm: 0.9999990861518641, iteration: 13657
loss: 1.060551643371582,grad_norm: 0.9999993580335476, iteration: 13658
loss: 1.0151221752166748,grad_norm: 0.9999993460947629, iteration: 13659
loss: 1.0450330972671509,grad_norm: 0.999999157927508, iteration: 13660
loss: 0.9560114145278931,grad_norm: 0.9999992912472044, iteration: 13661
loss: 1.0145909786224365,grad_norm: 0.9999995431170523, iteration: 13662
loss: 1.0295991897583008,grad_norm: 0.9999993752363389, iteration: 13663
loss: 1.0349962711334229,grad_norm: 0.9464991712647713, iteration: 13664
loss: 1.0389647483825684,grad_norm: 0.999999273016343, iteration: 13665
loss: 0.9944385290145874,grad_norm: 0.9999991702224024, iteration: 13666
loss: 1.0759942531585693,grad_norm: 0.9999995650489345, iteration: 13667
loss: 1.0132460594177246,grad_norm: 0.9999991430592525, iteration: 13668
loss: 1.0722090005874634,grad_norm: 0.9999994012665676, iteration: 13669
loss: 1.0326429605484009,grad_norm: 0.9999991643017773, iteration: 13670
loss: 1.030771255493164,grad_norm: 0.9999991069174877, iteration: 13671
loss: 1.03482985496521,grad_norm: 0.9999990736537483, iteration: 13672
loss: 1.0005847215652466,grad_norm: 0.999999317694629, iteration: 13673
loss: 1.015649437904358,grad_norm: 0.9999992555509315, iteration: 13674
loss: 1.0446852445602417,grad_norm: 0.9999995264832223, iteration: 13675
loss: 1.013536810874939,grad_norm: 0.9999992276407556, iteration: 13676
loss: 1.0278598070144653,grad_norm: 0.9999994817467633, iteration: 13677
loss: 0.9924557209014893,grad_norm: 0.9999992217078588, iteration: 13678
loss: 1.0860130786895752,grad_norm: 0.9999999066481307, iteration: 13679
loss: 1.0183080434799194,grad_norm: 0.9999990909995583, iteration: 13680
loss: 1.0196006298065186,grad_norm: 0.999999271661744, iteration: 13681
loss: 1.0117402076721191,grad_norm: 0.9999995985910378, iteration: 13682
loss: 0.9983544945716858,grad_norm: 0.9999991605821438, iteration: 13683
loss: 1.0054937601089478,grad_norm: 0.9999991762012138, iteration: 13684
loss: 1.0107184648513794,grad_norm: 0.9999994038040416, iteration: 13685
loss: 1.0694115161895752,grad_norm: 0.9999992393729907, iteration: 13686
loss: 1.041435718536377,grad_norm: 0.9999995584896402, iteration: 13687
loss: 1.086550235748291,grad_norm: 0.9999995390899105, iteration: 13688
loss: 1.0584336519241333,grad_norm: 0.9999992027727809, iteration: 13689
loss: 1.0469125509262085,grad_norm: 0.9999992250509081, iteration: 13690
loss: 1.0170365571975708,grad_norm: 0.9999991936203749, iteration: 13691
loss: 1.0153894424438477,grad_norm: 0.9999995630173356, iteration: 13692
loss: 1.0336573123931885,grad_norm: 0.9999994236663454, iteration: 13693
loss: 1.0318087339401245,grad_norm: 0.9999990790046733, iteration: 13694
loss: 1.041994333267212,grad_norm: 0.9999995115418809, iteration: 13695
loss: 1.044276475906372,grad_norm: 0.9999991692811933, iteration: 13696
loss: 1.0192469358444214,grad_norm: 0.9742853645127759, iteration: 13697
loss: 1.0551809072494507,grad_norm: 0.9999993068322706, iteration: 13698
loss: 1.0528979301452637,grad_norm: 0.9999992593818902, iteration: 13699
loss: 1.0042848587036133,grad_norm: 0.8652244044030158, iteration: 13700
loss: 1.0156676769256592,grad_norm: 0.999999427365553, iteration: 13701
loss: 1.010854959487915,grad_norm: 0.9999992772474754, iteration: 13702
loss: 1.0738943815231323,grad_norm: 0.999999399854604, iteration: 13703
loss: 1.0363168716430664,grad_norm: 0.9999990807605741, iteration: 13704
loss: 1.0099680423736572,grad_norm: 0.9999991511843248, iteration: 13705
loss: 1.014247179031372,grad_norm: 0.9999992807763814, iteration: 13706
loss: 1.0156428813934326,grad_norm: 0.9999990909957106, iteration: 13707
loss: 1.0188422203063965,grad_norm: 0.9999991056459133, iteration: 13708
loss: 1.067883849143982,grad_norm: 0.9999991958857418, iteration: 13709
loss: 1.0122696161270142,grad_norm: 0.9999991930952753, iteration: 13710
loss: 0.9867640137672424,grad_norm: 0.9999991180860676, iteration: 13711
loss: 0.9752969145774841,grad_norm: 0.9999993319286498, iteration: 13712
loss: 1.0385187864303589,grad_norm: 0.9999992175870148, iteration: 13713
loss: 1.01743745803833,grad_norm: 0.9999989876211242, iteration: 13714
loss: 1.041628122329712,grad_norm: 0.9999990829248074, iteration: 13715
loss: 1.0022952556610107,grad_norm: 0.9999991522744878, iteration: 13716
loss: 1.0306915044784546,grad_norm: 0.9999992168023415, iteration: 13717
loss: 0.9865873456001282,grad_norm: 0.9999995341057202, iteration: 13718
loss: 1.0385010242462158,grad_norm: 0.9999992466009722, iteration: 13719
loss: 1.014107346534729,grad_norm: 0.9999992551763384, iteration: 13720
loss: 1.0476386547088623,grad_norm: 0.9999991619137973, iteration: 13721
loss: 1.0088403224945068,grad_norm: 0.9999992946388193, iteration: 13722
loss: 1.0273566246032715,grad_norm: 0.9999992978606218, iteration: 13723
loss: 1.0061613321304321,grad_norm: 0.99999915727299, iteration: 13724
loss: 1.0706756114959717,grad_norm: 0.9999993495942601, iteration: 13725
loss: 1.0206800699234009,grad_norm: 0.9999992146711664, iteration: 13726
loss: 1.0754990577697754,grad_norm: 0.9999996491016876, iteration: 13727
loss: 1.0230447053909302,grad_norm: 0.9999990201687949, iteration: 13728
loss: 1.0804203748703003,grad_norm: 0.9999990611852052, iteration: 13729
loss: 1.0260229110717773,grad_norm: 0.9999991542714054, iteration: 13730
loss: 1.0341037511825562,grad_norm: 0.9999990855670322, iteration: 13731
loss: 1.0113375186920166,grad_norm: 0.9999990624499152, iteration: 13732
loss: 1.0624949932098389,grad_norm: 0.9999992624537022, iteration: 13733
loss: 1.0238242149353027,grad_norm: 0.9999995693667505, iteration: 13734
loss: 1.0051671266555786,grad_norm: 0.9999990218260023, iteration: 13735
loss: 1.0291755199432373,grad_norm: 0.9999997278083849, iteration: 13736
loss: 0.99253249168396,grad_norm: 0.999999111803259, iteration: 13737
loss: 0.9862197041511536,grad_norm: 0.9999992573177372, iteration: 13738
loss: 1.046523928642273,grad_norm: 0.9999991185220443, iteration: 13739
loss: 1.0247291326522827,grad_norm: 0.9999994378026957, iteration: 13740
loss: 0.9982689619064331,grad_norm: 0.9999993341082817, iteration: 13741
loss: 1.0287352800369263,grad_norm: 0.9999992555871932, iteration: 13742
loss: 1.0234479904174805,grad_norm: 0.9999991469670761, iteration: 13743
loss: 1.0285768508911133,grad_norm: 0.9999993670243301, iteration: 13744
loss: 1.0288718938827515,grad_norm: 0.999999241583752, iteration: 13745
loss: 0.9865127801895142,grad_norm: 0.999999136135172, iteration: 13746
loss: 1.021991491317749,grad_norm: 0.9999996226524829, iteration: 13747
loss: 1.0229871273040771,grad_norm: 0.9999991136239306, iteration: 13748
loss: 1.0348668098449707,grad_norm: 0.999999397955512, iteration: 13749
loss: 1.034376621246338,grad_norm: 0.9999993793685806, iteration: 13750
loss: 1.0354353189468384,grad_norm: 0.9999993748234667, iteration: 13751
loss: 0.9948505163192749,grad_norm: 0.9999992145605691, iteration: 13752
loss: 1.0386079549789429,grad_norm: 0.9999991975002932, iteration: 13753
loss: 0.9940583109855652,grad_norm: 0.9999992490230639, iteration: 13754
loss: 0.9743028879165649,grad_norm: 0.9999991481941997, iteration: 13755
loss: 1.0074443817138672,grad_norm: 0.9999996746024306, iteration: 13756
loss: 1.0512351989746094,grad_norm: 0.9999992954376871, iteration: 13757
loss: 1.084835171699524,grad_norm: 0.9999993318940064, iteration: 13758
loss: 1.0694447755813599,grad_norm: 0.9999991938526291, iteration: 13759
loss: 1.005985140800476,grad_norm: 0.9999992423102132, iteration: 13760
loss: 0.9890071749687195,grad_norm: 0.9999992234189571, iteration: 13761
loss: 1.026357650756836,grad_norm: 0.9999992666460271, iteration: 13762
loss: 1.0645945072174072,grad_norm: 0.9999990647435208, iteration: 13763
loss: 1.0216628313064575,grad_norm: 0.9999992560340424, iteration: 13764
loss: 0.9775358438491821,grad_norm: 0.99999934417322, iteration: 13765
loss: 1.0361814498901367,grad_norm: 0.999999417323832, iteration: 13766
loss: 1.0552059412002563,grad_norm: 0.9999993691455336, iteration: 13767
loss: 0.9966602921485901,grad_norm: 0.9999992805138791, iteration: 13768
loss: 1.039404273033142,grad_norm: 0.9999996819521768, iteration: 13769
loss: 1.0678696632385254,grad_norm: 0.9999993528367693, iteration: 13770
loss: 0.9918506145477295,grad_norm: 0.9999991355983175, iteration: 13771
loss: 1.0425828695297241,grad_norm: 0.9999991697404566, iteration: 13772
loss: 0.9767786264419556,grad_norm: 0.9999992194483024, iteration: 13773
loss: 1.0194692611694336,grad_norm: 0.9999990767781239, iteration: 13774
loss: 1.0240435600280762,grad_norm: 0.9999990835218333, iteration: 13775
loss: 1.036184549331665,grad_norm: 0.999999728841789, iteration: 13776
loss: 1.0125629901885986,grad_norm: 0.9999991961634062, iteration: 13777
loss: 1.0913513898849487,grad_norm: 0.9999994810471333, iteration: 13778
loss: 1.0502146482467651,grad_norm: 0.9999991536326354, iteration: 13779
loss: 1.006760597229004,grad_norm: 0.9999992191649447, iteration: 13780
loss: 0.9990360736846924,grad_norm: 0.99999924243905, iteration: 13781
loss: 1.0286000967025757,grad_norm: 0.9999992510303842, iteration: 13782
loss: 1.0452461242675781,grad_norm: 0.9999991673318592, iteration: 13783
loss: 1.0170921087265015,grad_norm: 0.999999273111907, iteration: 13784
loss: 1.0244601964950562,grad_norm: 0.9999992394481796, iteration: 13785
loss: 1.0540642738342285,grad_norm: 0.9999993264130328, iteration: 13786
loss: 1.0239843130111694,grad_norm: 0.9999992513242372, iteration: 13787
loss: 1.02741277217865,grad_norm: 0.9787046477301501, iteration: 13788
loss: 1.0884522199630737,grad_norm: 0.9999993263103354, iteration: 13789
loss: 1.0782997608184814,grad_norm: 0.9999992661322835, iteration: 13790
loss: 1.0885690450668335,grad_norm: 0.9999994671324002, iteration: 13791
loss: 1.000657320022583,grad_norm: 0.9999991577864221, iteration: 13792
loss: 1.0174394845962524,grad_norm: 0.9999992092942075, iteration: 13793
loss: 1.0228519439697266,grad_norm: 0.9999990407699211, iteration: 13794
loss: 1.0349620580673218,grad_norm: 0.9999993781492577, iteration: 13795
loss: 0.9814784526824951,grad_norm: 0.9999991439433564, iteration: 13796
loss: 1.0345885753631592,grad_norm: 0.9999991671484093, iteration: 13797
loss: 1.0511014461517334,grad_norm: 0.9999993402290852, iteration: 13798
loss: 1.0702619552612305,grad_norm: 0.999999394206935, iteration: 13799
loss: 1.0229672193527222,grad_norm: 0.9999991599675492, iteration: 13800
loss: 1.016809344291687,grad_norm: 0.9999991651939694, iteration: 13801
loss: 1.058566689491272,grad_norm: 0.9999993272139902, iteration: 13802
loss: 0.9956429600715637,grad_norm: 0.9999990483883121, iteration: 13803
loss: 1.0481352806091309,grad_norm: 0.9999991549479447, iteration: 13804
loss: 0.9910131096839905,grad_norm: 0.9732094778829077, iteration: 13805
loss: 1.077597975730896,grad_norm: 0.9999994652207239, iteration: 13806
loss: 0.98602294921875,grad_norm: 0.999999075973856, iteration: 13807
loss: 0.9760227203369141,grad_norm: 0.9999992949341312, iteration: 13808
loss: 1.0296504497528076,grad_norm: 0.9999993494923993, iteration: 13809
loss: 1.0341699123382568,grad_norm: 0.9999992381985345, iteration: 13810
loss: 1.0156621932983398,grad_norm: 0.9999993112060772, iteration: 13811
loss: 1.0191729068756104,grad_norm: 0.9999990722444441, iteration: 13812
loss: 1.0400667190551758,grad_norm: 0.9999995725448181, iteration: 13813
loss: 1.0090301036834717,grad_norm: 0.9999991680770896, iteration: 13814
loss: 1.0762889385223389,grad_norm: 0.999999460512882, iteration: 13815
loss: 1.0423763990402222,grad_norm: 0.9999993989710313, iteration: 13816
loss: 1.0154366493225098,grad_norm: 0.9999991209589353, iteration: 13817
loss: 1.0445973873138428,grad_norm: 0.9999992426822076, iteration: 13818
loss: 1.0139212608337402,grad_norm: 0.9999991561485404, iteration: 13819
loss: 1.0634286403656006,grad_norm: 0.9999991185899516, iteration: 13820
loss: 1.0181593894958496,grad_norm: 0.9999991055720796, iteration: 13821
loss: 1.0230032205581665,grad_norm: 0.9999991391437365, iteration: 13822
loss: 1.0255711078643799,grad_norm: 0.999999281164148, iteration: 13823
loss: 1.0201984643936157,grad_norm: 0.9999993101272788, iteration: 13824
loss: 1.068580150604248,grad_norm: 0.9999991797294597, iteration: 13825
loss: 1.0342941284179688,grad_norm: 0.999999049918835, iteration: 13826
loss: 1.0253804922103882,grad_norm: 0.9999991816305795, iteration: 13827
loss: 1.0334528684616089,grad_norm: 0.9999992553653181, iteration: 13828
loss: 1.0284870862960815,grad_norm: 0.9999991530095707, iteration: 13829
loss: 0.9457643032073975,grad_norm: 0.9999992298525364, iteration: 13830
loss: 0.9997869729995728,grad_norm: 0.9999990748132284, iteration: 13831
loss: 1.0471789836883545,grad_norm: 0.999999015280762, iteration: 13832
loss: 1.068765640258789,grad_norm: 0.9999992937342435, iteration: 13833
loss: 1.056593418121338,grad_norm: 0.9999994192445057, iteration: 13834
loss: 0.9967717528343201,grad_norm: 0.9999991800603152, iteration: 13835
loss: 1.0454071760177612,grad_norm: 0.9999993381514554, iteration: 13836
loss: 1.0305126905441284,grad_norm: 0.9999995919130414, iteration: 13837
loss: 1.0573536157608032,grad_norm: 0.9999992151277763, iteration: 13838
loss: 1.0550986528396606,grad_norm: 0.9999991235607553, iteration: 13839
loss: 0.9837097525596619,grad_norm: 0.9999992807407958, iteration: 13840
loss: 1.081229567527771,grad_norm: 0.999999206951086, iteration: 13841
loss: 1.0267442464828491,grad_norm: 0.9999991390187449, iteration: 13842
loss: 1.034464955329895,grad_norm: 0.9978438579924723, iteration: 13843
loss: 1.0327776670455933,grad_norm: 0.9999990897146405, iteration: 13844
loss: 0.9673914313316345,grad_norm: 0.9999992051027816, iteration: 13845
loss: 1.0217704772949219,grad_norm: 0.9999991604507907, iteration: 13846
loss: 1.050969123840332,grad_norm: 0.9999993219941411, iteration: 13847
loss: 1.0326199531555176,grad_norm: 0.9791098840352846, iteration: 13848
loss: 1.0591187477111816,grad_norm: 0.9999994929809725, iteration: 13849
loss: 1.0201420783996582,grad_norm: 0.9999991722742391, iteration: 13850
loss: 1.079106092453003,grad_norm: 0.9999995658702272, iteration: 13851
loss: 1.0195069313049316,grad_norm: 0.9999992122293272, iteration: 13852
loss: 1.0029674768447876,grad_norm: 0.9999990166448801, iteration: 13853
loss: 1.0240530967712402,grad_norm: 0.9999990344992518, iteration: 13854
loss: 0.9777445197105408,grad_norm: 0.9999993803321451, iteration: 13855
loss: 1.010957956314087,grad_norm: 0.9999993757858288, iteration: 13856
loss: 1.0359225273132324,grad_norm: 0.9999990842308294, iteration: 13857
loss: 1.034747838973999,grad_norm: 0.9999990880984272, iteration: 13858
loss: 1.0306257009506226,grad_norm: 0.9999992280433625, iteration: 13859
loss: 1.0785038471221924,grad_norm: 0.999999739651498, iteration: 13860
loss: 1.0551425218582153,grad_norm: 0.9999996477426385, iteration: 13861
loss: 0.9760148525238037,grad_norm: 0.9999994939158969, iteration: 13862
loss: 1.0386816263198853,grad_norm: 0.9999991717611321, iteration: 13863
loss: 1.0275969505310059,grad_norm: 0.9999991726069416, iteration: 13864
loss: 1.0321580171585083,grad_norm: 0.9999990363009659, iteration: 13865
loss: 1.0831369161605835,grad_norm: 0.9999996315010832, iteration: 13866
loss: 1.0168460607528687,grad_norm: 0.9999991260649953, iteration: 13867
loss: 1.0968655347824097,grad_norm: 0.9999995696598099, iteration: 13868
loss: 1.057023286819458,grad_norm: 0.9999996131531954, iteration: 13869
loss: 1.007759928703308,grad_norm: 0.9999992531769535, iteration: 13870
loss: 1.0618234872817993,grad_norm: 0.999999144131547, iteration: 13871
loss: 1.0360296964645386,grad_norm: 0.9999991991600469, iteration: 13872
loss: 1.019680380821228,grad_norm: 0.9999992131954936, iteration: 13873
loss: 1.0236788988113403,grad_norm: 0.9999990887372742, iteration: 13874
loss: 1.0215216875076294,grad_norm: 0.9999994637862444, iteration: 13875
loss: 1.027213215827942,grad_norm: 0.9999990679073901, iteration: 13876
loss: 1.0210051536560059,grad_norm: 0.9931240119354712, iteration: 13877
loss: 1.0121532678604126,grad_norm: 0.9999991911277157, iteration: 13878
loss: 1.0754190683364868,grad_norm: 0.999999274285252, iteration: 13879
loss: 1.011136531829834,grad_norm: 0.9999995272338335, iteration: 13880
loss: 1.045332431793213,grad_norm: 0.9999992691319551, iteration: 13881
loss: 1.038909673690796,grad_norm: 0.9999992104112637, iteration: 13882
loss: 1.0026649236679077,grad_norm: 0.9999991639734425, iteration: 13883
loss: 1.0396395921707153,grad_norm: 0.9999993085622809, iteration: 13884
loss: 0.9934170246124268,grad_norm: 0.9981227477855007, iteration: 13885
loss: 0.9901272058486938,grad_norm: 0.9999993381648287, iteration: 13886
loss: 1.0240964889526367,grad_norm: 0.9999992054514174, iteration: 13887
loss: 1.0598353147506714,grad_norm: 0.9999997764791692, iteration: 13888
loss: 1.0301882028579712,grad_norm: 0.9999991242651867, iteration: 13889
loss: 1.0496991872787476,grad_norm: 0.9999995798657135, iteration: 13890
loss: 1.0440584421157837,grad_norm: 0.9999990439470714, iteration: 13891
loss: 1.0641546249389648,grad_norm: 0.9999993435244102, iteration: 13892
loss: 1.0461736917495728,grad_norm: 0.9274863791454545, iteration: 13893
loss: 0.9937403798103333,grad_norm: 0.9999996924735869, iteration: 13894
loss: 1.0436488389968872,grad_norm: 0.9999996175395547, iteration: 13895
loss: 0.9773946404457092,grad_norm: 0.9999990929951144, iteration: 13896
loss: 1.0588204860687256,grad_norm: 0.9999991303243133, iteration: 13897
loss: 1.050049066543579,grad_norm: 0.9999994123475652, iteration: 13898
loss: 0.9977353811264038,grad_norm: 0.9999990720973457, iteration: 13899
loss: 1.0565299987792969,grad_norm: 0.9999992332423897, iteration: 13900
loss: 1.0636663436889648,grad_norm: 0.9999994429087457, iteration: 13901
loss: 1.0373879671096802,grad_norm: 0.9999992631732918, iteration: 13902
loss: 0.9975994825363159,grad_norm: 0.9999990637399094, iteration: 13903
loss: 1.0202305316925049,grad_norm: 0.9999994141631505, iteration: 13904
loss: 1.0332154035568237,grad_norm: 0.9999992294997514, iteration: 13905
loss: 1.0482780933380127,grad_norm: 0.9999990672978154, iteration: 13906
loss: 1.038846492767334,grad_norm: 0.9999990511296047, iteration: 13907
loss: 1.0459353923797607,grad_norm: 0.9999993257847026, iteration: 13908
loss: 1.0084290504455566,grad_norm: 0.9999990885947524, iteration: 13909
loss: 1.0137701034545898,grad_norm: 0.9965014741067985, iteration: 13910
loss: 1.041247010231018,grad_norm: 0.9999991721691485, iteration: 13911
loss: 1.0024315118789673,grad_norm: 0.9999996829979265, iteration: 13912
loss: 1.0188188552856445,grad_norm: 0.99999917117156, iteration: 13913
loss: 1.0123172998428345,grad_norm: 0.9192674533572819, iteration: 13914
loss: 0.995476484298706,grad_norm: 0.9999994347220437, iteration: 13915
loss: 0.9740251898765564,grad_norm: 0.9999990610770987, iteration: 13916
loss: 1.0311247110366821,grad_norm: 0.9999991975540432, iteration: 13917
loss: 0.9985403418540955,grad_norm: 0.9999991464419229, iteration: 13918
loss: 1.0502967834472656,grad_norm: 0.9999992742370504, iteration: 13919
loss: 1.012728214263916,grad_norm: 0.9999992081442813, iteration: 13920
loss: 1.0523799657821655,grad_norm: 0.9999992529146521, iteration: 13921
loss: 1.0331007242202759,grad_norm: 0.999999560556522, iteration: 13922
loss: 0.9726681113243103,grad_norm: 0.999999157653007, iteration: 13923
loss: 1.0164308547973633,grad_norm: 0.999999122878437, iteration: 13924
loss: 1.0412405729293823,grad_norm: 0.9999991887754343, iteration: 13925
loss: 1.0575108528137207,grad_norm: 0.9999997637045387, iteration: 13926
loss: 1.0190829038619995,grad_norm: 0.9883375488636836, iteration: 13927
loss: 1.0682123899459839,grad_norm: 0.9999991817538266, iteration: 13928
loss: 1.071223497390747,grad_norm: 0.9999993364330172, iteration: 13929
loss: 1.0338345766067505,grad_norm: 0.9978292467485385, iteration: 13930
loss: 1.041841983795166,grad_norm: 0.9999990367119792, iteration: 13931
loss: 1.0060955286026,grad_norm: 0.9999993275781154, iteration: 13932
loss: 1.012379765510559,grad_norm: 0.9999990803304759, iteration: 13933
loss: 1.0074069499969482,grad_norm: 0.9999992818126942, iteration: 13934
loss: 0.9910537004470825,grad_norm: 0.9999991157127399, iteration: 13935
loss: 1.0071243047714233,grad_norm: 0.9999990953136109, iteration: 13936
loss: 1.0006353855133057,grad_norm: 0.9999992516849608, iteration: 13937
loss: 1.0153429508209229,grad_norm: 0.9999992305580977, iteration: 13938
loss: 1.0440253019332886,grad_norm: 0.9999992160026698, iteration: 13939
loss: 1.007088303565979,grad_norm: 0.9999994241998115, iteration: 13940
loss: 1.0372546911239624,grad_norm: 0.9999992811613153, iteration: 13941
loss: 1.0462303161621094,grad_norm: 0.9999995401147295, iteration: 13942
loss: 0.9968117475509644,grad_norm: 0.9999990929331573, iteration: 13943
loss: 1.221459150314331,grad_norm: 0.9999997755091421, iteration: 13944
loss: 1.033301830291748,grad_norm: 0.999999709358453, iteration: 13945
loss: 1.0127582550048828,grad_norm: 0.9999995138016684, iteration: 13946
loss: 1.0205564498901367,grad_norm: 0.9999992234776229, iteration: 13947
loss: 1.0198876857757568,grad_norm: 0.965529606950352, iteration: 13948
loss: 1.0215774774551392,grad_norm: 0.9999992082745018, iteration: 13949
loss: 1.1008265018463135,grad_norm: 0.9999996314973415, iteration: 13950
loss: 1.0269577503204346,grad_norm: 0.9999993883508422, iteration: 13951
loss: 1.0376033782958984,grad_norm: 0.9999996911850918, iteration: 13952
loss: 1.0208320617675781,grad_norm: 0.9999995205704197, iteration: 13953
loss: 1.0595108270645142,grad_norm: 0.9999993465948447, iteration: 13954
loss: 1.0247596502304077,grad_norm: 0.9566539142258974, iteration: 13955
loss: 0.9928383231163025,grad_norm: 0.9999992896361389, iteration: 13956
loss: 1.0368045568466187,grad_norm: 0.9999995061324882, iteration: 13957
loss: 0.9727006554603577,grad_norm: 0.9999992450695298, iteration: 13958
loss: 1.0770152807235718,grad_norm: 0.999999270682248, iteration: 13959
loss: 0.9925072193145752,grad_norm: 0.9999990356398132, iteration: 13960
loss: 1.000535249710083,grad_norm: 0.9999991917264558, iteration: 13961
loss: 1.0477521419525146,grad_norm: 0.9999991861662366, iteration: 13962
loss: 1.0453414916992188,grad_norm: 0.999999169420314, iteration: 13963
loss: 1.0042251348495483,grad_norm: 0.999999494448552, iteration: 13964
loss: 1.0317460298538208,grad_norm: 0.9999992276807711, iteration: 13965
loss: 1.0172137022018433,grad_norm: 0.9999992906267002, iteration: 13966
loss: 0.9580128788948059,grad_norm: 0.9999991872423563, iteration: 13967
loss: 1.0448936223983765,grad_norm: 0.9999992106867557, iteration: 13968
loss: 1.044780969619751,grad_norm: 0.9999990811747761, iteration: 13969
loss: 1.0284007787704468,grad_norm: 0.9999992084690986, iteration: 13970
loss: 1.009696125984192,grad_norm: 0.9999991734846342, iteration: 13971
loss: 1.032412052154541,grad_norm: 0.9999993251943727, iteration: 13972
loss: 1.0155268907546997,grad_norm: 0.9999997610047052, iteration: 13973
loss: 0.9864078164100647,grad_norm: 0.9999990296491751, iteration: 13974
loss: 1.0623078346252441,grad_norm: 0.9999992994497993, iteration: 13975
loss: 1.008127212524414,grad_norm: 0.999999329953966, iteration: 13976
loss: 1.002915382385254,grad_norm: 0.9999989843986092, iteration: 13977
loss: 1.0466516017913818,grad_norm: 0.999999460651348, iteration: 13978
loss: 1.0144298076629639,grad_norm: 0.9999991401233166, iteration: 13979
loss: 1.0663549900054932,grad_norm: 0.9999991391476087, iteration: 13980
loss: 1.056911826133728,grad_norm: 0.9999993223010448, iteration: 13981
loss: 1.0422124862670898,grad_norm: 0.9999990543143523, iteration: 13982
loss: 1.0025781393051147,grad_norm: 0.9999991719636595, iteration: 13983
loss: 1.0024360418319702,grad_norm: 0.9999992000289969, iteration: 13984
loss: 1.0203315019607544,grad_norm: 0.9999991946983178, iteration: 13985
loss: 0.9813521504402161,grad_norm: 0.9999992608007606, iteration: 13986
loss: 1.0226094722747803,grad_norm: 0.9999993392645949, iteration: 13987
loss: 1.0381771326065063,grad_norm: 0.9999997335428175, iteration: 13988
loss: 1.021340250968933,grad_norm: 0.9999995954975935, iteration: 13989
loss: 1.0539276599884033,grad_norm: 0.999999209415084, iteration: 13990
loss: 0.9880614876747131,grad_norm: 0.9546792665739777, iteration: 13991
loss: 1.0107271671295166,grad_norm: 0.9999993742436406, iteration: 13992
loss: 1.0916472673416138,grad_norm: 0.9999995405609597, iteration: 13993
loss: 1.0418096780776978,grad_norm: 0.9999992169301374, iteration: 13994
loss: 1.0440400838851929,grad_norm: 0.999999294222289, iteration: 13995
loss: 1.0113885402679443,grad_norm: 0.9999993156739824, iteration: 13996
loss: 1.0545395612716675,grad_norm: 0.9999991008154409, iteration: 13997
loss: 1.0008238554000854,grad_norm: 0.9999993993341422, iteration: 13998
loss: 1.009851098060608,grad_norm: 0.9999994438865903, iteration: 13999
loss: 1.0003011226654053,grad_norm: 0.9999990732618373, iteration: 14000
loss: 1.025608777999878,grad_norm: 0.9999991913971978, iteration: 14001
loss: 1.017162561416626,grad_norm: 0.9999992842434451, iteration: 14002
loss: 0.9861242771148682,grad_norm: 0.9999990447218623, iteration: 14003
loss: 1.0648492574691772,grad_norm: 0.9999993545758717, iteration: 14004
loss: 1.0393283367156982,grad_norm: 0.9999993866286585, iteration: 14005
loss: 0.9623547792434692,grad_norm: 0.970028798926058, iteration: 14006
loss: 0.9987656474113464,grad_norm: 0.9999992146399738, iteration: 14007
loss: 0.9746583700180054,grad_norm: 0.9999990661470037, iteration: 14008
loss: 1.0484352111816406,grad_norm: 0.9999993817820358, iteration: 14009
loss: 1.0106979608535767,grad_norm: 0.9999993127055191, iteration: 14010
loss: 1.0371156930923462,grad_norm: 0.9999992910327983, iteration: 14011
loss: 1.0395678281784058,grad_norm: 0.9999997739660671, iteration: 14012
loss: 1.0261895656585693,grad_norm: 0.9999991884325028, iteration: 14013
loss: 1.0392069816589355,grad_norm: 0.9999991351538949, iteration: 14014
loss: 1.0207654237747192,grad_norm: 0.9999991294457337, iteration: 14015
loss: 1.0050249099731445,grad_norm: 0.9999993503008707, iteration: 14016
loss: 1.0484155416488647,grad_norm: 0.9999990435587915, iteration: 14017
loss: 0.9986733198165894,grad_norm: 0.9999990974356409, iteration: 14018
loss: 1.0926687717437744,grad_norm: 0.9999994256097869, iteration: 14019
loss: 1.0469670295715332,grad_norm: 0.9999989454955946, iteration: 14020
loss: 1.054855465888977,grad_norm: 0.9999993384870459, iteration: 14021
loss: 1.0025705099105835,grad_norm: 0.9999995070208126, iteration: 14022
loss: 1.1032418012619019,grad_norm: 0.9999997093652694, iteration: 14023
loss: 1.0282469987869263,grad_norm: 0.9999990809945479, iteration: 14024
loss: 1.0094681978225708,grad_norm: 0.9999994905640021, iteration: 14025
loss: 0.9827851057052612,grad_norm: 0.9999992036674163, iteration: 14026
loss: 1.0393650531768799,grad_norm: 0.9999991284219206, iteration: 14027
loss: 1.0532604455947876,grad_norm: 0.9999991432747561, iteration: 14028
loss: 1.049440622329712,grad_norm: 0.9812664593179333, iteration: 14029
loss: 1.0464849472045898,grad_norm: 0.9999992634561568, iteration: 14030
loss: 0.9895042777061462,grad_norm: 0.9999990959403215, iteration: 14031
loss: 1.0536885261535645,grad_norm: 0.9999991679884697, iteration: 14032
loss: 1.0522375106811523,grad_norm: 0.9999995133699064, iteration: 14033
loss: 0.9910668730735779,grad_norm: 0.9999992573321569, iteration: 14034
loss: 1.0346776247024536,grad_norm: 0.9999996542282115, iteration: 14035
loss: 1.0133932828903198,grad_norm: 0.9999992645080518, iteration: 14036
loss: 1.0599019527435303,grad_norm: 0.9999994937820984, iteration: 14037
loss: 1.028200626373291,grad_norm: 0.999999290583999, iteration: 14038
loss: 1.0133148431777954,grad_norm: 0.9999991524726086, iteration: 14039
loss: 1.0090160369873047,grad_norm: 0.9999992608236806, iteration: 14040
loss: 1.0452916622161865,grad_norm: 0.9999991048260062, iteration: 14041
loss: 1.030849575996399,grad_norm: 0.9999990785930748, iteration: 14042
loss: 1.017027497291565,grad_norm: 0.9999993012773616, iteration: 14043
loss: 1.0251637697219849,grad_norm: 0.999999131834513, iteration: 14044
loss: 1.014145016670227,grad_norm: 0.9999990188123129, iteration: 14045
loss: 1.030051589012146,grad_norm: 0.9999990975832269, iteration: 14046
loss: 1.0164536237716675,grad_norm: 0.9999991922393142, iteration: 14047
loss: 1.0704857110977173,grad_norm: 0.9999993997436503, iteration: 14048
loss: 1.0172072649002075,grad_norm: 0.9999992779267839, iteration: 14049
loss: 1.0045921802520752,grad_norm: 0.9618234195311379, iteration: 14050
loss: 0.9795829653739929,grad_norm: 0.9999991116249607, iteration: 14051
loss: 1.0314679145812988,grad_norm: 0.9999991876872759, iteration: 14052
loss: 1.0025042295455933,grad_norm: 0.9999993097345518, iteration: 14053
loss: 1.0163471698760986,grad_norm: 0.9999991629134977, iteration: 14054
loss: 1.0041879415512085,grad_norm: 0.9999990932088216, iteration: 14055
loss: 1.0345168113708496,grad_norm: 0.9999993836296865, iteration: 14056
loss: 1.0396127700805664,grad_norm: 0.9585633529584886, iteration: 14057
loss: 0.9753848910331726,grad_norm: 0.9999995103066254, iteration: 14058
loss: 0.984017550945282,grad_norm: 0.9999998586841791, iteration: 14059
loss: 1.0219789743423462,grad_norm: 0.9999991107041954, iteration: 14060
loss: 1.0479408502578735,grad_norm: 0.9999992949142975, iteration: 14061
loss: 1.006730079650879,grad_norm: 0.9999991084632897, iteration: 14062
loss: 1.0683232545852661,grad_norm: 0.9999993645171613, iteration: 14063
loss: 1.0472344160079956,grad_norm: 0.9999991728658824, iteration: 14064
loss: 1.0098403692245483,grad_norm: 0.9999992022729135, iteration: 14065
loss: 1.021699070930481,grad_norm: 0.9999991058948923, iteration: 14066
loss: 0.9885962009429932,grad_norm: 0.9999992884713227, iteration: 14067
loss: 1.0020365715026855,grad_norm: 0.9600443655620198, iteration: 14068
loss: 1.0155669450759888,grad_norm: 0.9999991992019914, iteration: 14069
loss: 1.037127137184143,grad_norm: 0.9999991746041388, iteration: 14070
loss: 1.0435236692428589,grad_norm: 0.9999991737040328, iteration: 14071
loss: 1.0461715459823608,grad_norm: 0.9999996994597916, iteration: 14072
loss: 0.9847185611724854,grad_norm: 0.9999990529028993, iteration: 14073
loss: 1.0198235511779785,grad_norm: 0.9999991962912489, iteration: 14074
loss: 1.0619772672653198,grad_norm: 0.9999997550623422, iteration: 14075
loss: 1.01441490650177,grad_norm: 0.9999994217943852, iteration: 14076
loss: 1.060144305229187,grad_norm: 0.9999993456893309, iteration: 14077
loss: 1.0354499816894531,grad_norm: 0.9999994865584796, iteration: 14078
loss: 1.05727219581604,grad_norm: 0.9999994881156001, iteration: 14079
loss: 0.9851216673851013,grad_norm: 0.9999992287290238, iteration: 14080
loss: 1.0364437103271484,grad_norm: 0.9999992391162661, iteration: 14081
loss: 1.0044147968292236,grad_norm: 0.999999148690433, iteration: 14082
loss: 1.0720837116241455,grad_norm: 0.999999207315957, iteration: 14083
loss: 1.0347657203674316,grad_norm: 0.9999990071812651, iteration: 14084
loss: 1.0290197134017944,grad_norm: 0.9999993520540222, iteration: 14085
loss: 1.0376605987548828,grad_norm: 0.9999991477803815, iteration: 14086
loss: 1.0353903770446777,grad_norm: 0.9722584191619797, iteration: 14087
loss: 0.98930823802948,grad_norm: 0.9999991010192437, iteration: 14088
loss: 1.0486372709274292,grad_norm: 0.9999993401456776, iteration: 14089
loss: 1.0414890050888062,grad_norm: 0.9999993722483871, iteration: 14090
loss: 1.05592679977417,grad_norm: 0.9999993202345302, iteration: 14091
loss: 1.0151429176330566,grad_norm: 0.9999992368741715, iteration: 14092
loss: 0.982889711856842,grad_norm: 0.9999991824662988, iteration: 14093
loss: 1.0270365476608276,grad_norm: 0.9999992229455736, iteration: 14094
loss: 1.0601348876953125,grad_norm: 0.999999184149755, iteration: 14095
loss: 1.042900562286377,grad_norm: 0.9999991399173215, iteration: 14096
loss: 1.0172364711761475,grad_norm: 0.9999992386529176, iteration: 14097
loss: 1.0474761724472046,grad_norm: 0.999999296432023, iteration: 14098
loss: 1.0388766527175903,grad_norm: 0.9999990664689002, iteration: 14099
loss: 1.0100294351577759,grad_norm: 0.9999992284118838, iteration: 14100
loss: 1.0159953832626343,grad_norm: 0.9389259569387878, iteration: 14101
loss: 1.023322582244873,grad_norm: 0.9999992857487531, iteration: 14102
loss: 1.0092496871948242,grad_norm: 0.9999990686598008, iteration: 14103
loss: 1.1153873205184937,grad_norm: 0.9999996419645847, iteration: 14104
loss: 1.04877769947052,grad_norm: 0.9999993869650404, iteration: 14105
loss: 1.029754400253296,grad_norm: 0.9999992571363836, iteration: 14106
loss: 1.0096522569656372,grad_norm: 0.9999990709770297, iteration: 14107
loss: 1.0448086261749268,grad_norm: 0.9999991579736979, iteration: 14108
loss: 1.0300520658493042,grad_norm: 0.9999994270984492, iteration: 14109
loss: 1.0160200595855713,grad_norm: 0.999999224768254, iteration: 14110
loss: 1.0393648147583008,grad_norm: 0.9999993665105048, iteration: 14111
loss: 1.0330392122268677,grad_norm: 0.9999990565066442, iteration: 14112
loss: 0.9886897206306458,grad_norm: 0.9999991444543769, iteration: 14113
loss: 1.042219877243042,grad_norm: 0.999999169419772, iteration: 14114
loss: 1.016263484954834,grad_norm: 0.9999994142186781, iteration: 14115
loss: 1.006791591644287,grad_norm: 0.9999991805180031, iteration: 14116
loss: 1.0028977394104004,grad_norm: 0.9999991651403035, iteration: 14117
loss: 1.0217233896255493,grad_norm: 0.9999991532297063, iteration: 14118
loss: 1.0009492635726929,grad_norm: 0.9999992139828547, iteration: 14119
loss: 1.083463430404663,grad_norm: 0.9999995207935285, iteration: 14120
loss: 1.0581060647964478,grad_norm: 0.999999061638037, iteration: 14121
loss: 1.0088164806365967,grad_norm: 0.9999990982754765, iteration: 14122
loss: 1.0641173124313354,grad_norm: 0.9858622079470818, iteration: 14123
loss: 0.9800179600715637,grad_norm: 0.999999099956901, iteration: 14124
loss: 1.0077039003372192,grad_norm: 0.999999107802158, iteration: 14125
loss: 1.0205376148223877,grad_norm: 0.999999116269571, iteration: 14126
loss: 1.0229853391647339,grad_norm: 0.9999991625500496, iteration: 14127
loss: 0.9885702133178711,grad_norm: 0.9999995664791089, iteration: 14128
loss: 0.9824724197387695,grad_norm: 0.9999991673315337, iteration: 14129
loss: 1.0420715808868408,grad_norm: 0.999999338526586, iteration: 14130
loss: 1.0355792045593262,grad_norm: 0.9326519789077462, iteration: 14131
loss: 1.0603015422821045,grad_norm: 0.9999991289511744, iteration: 14132
loss: 1.00950288772583,grad_norm: 0.9999990665154572, iteration: 14133
loss: 1.0218684673309326,grad_norm: 0.9999991996476928, iteration: 14134
loss: 1.038454294204712,grad_norm: 0.9999992130355826, iteration: 14135
loss: 0.9646417498588562,grad_norm: 0.9999992315734484, iteration: 14136
loss: 1.004042625427246,grad_norm: 0.9724995929463675, iteration: 14137
loss: 0.9900745153427124,grad_norm: 0.8988987600927978, iteration: 14138
loss: 1.0008846521377563,grad_norm: 0.9999992328809959, iteration: 14139
loss: 1.0325846672058105,grad_norm: 0.9999988942576061, iteration: 14140
loss: 1.009750485420227,grad_norm: 0.9999991291995008, iteration: 14141
loss: 1.022821068763733,grad_norm: 0.9999990587380875, iteration: 14142
loss: 1.01444673538208,grad_norm: 0.9999991679945361, iteration: 14143
loss: 0.9743833541870117,grad_norm: 0.999999150542073, iteration: 14144
loss: 0.9801310896873474,grad_norm: 0.9673606864120659, iteration: 14145
loss: 1.0211923122406006,grad_norm: 0.9999989967244411, iteration: 14146
loss: 1.031957983970642,grad_norm: 0.9999992092312319, iteration: 14147
loss: 1.0149978399276733,grad_norm: 0.9999995244765049, iteration: 14148
loss: 1.049364686012268,grad_norm: 0.9999991938591509, iteration: 14149
loss: 1.0765963792800903,grad_norm: 0.9999990685664895, iteration: 14150
loss: 1.045395851135254,grad_norm: 0.9999990712536649, iteration: 14151
loss: 1.04509699344635,grad_norm: 0.9999993514885159, iteration: 14152
loss: 1.0843148231506348,grad_norm: 0.9999993632700385, iteration: 14153
loss: 1.0517596006393433,grad_norm: 0.9999991753798492, iteration: 14154
loss: 1.039646029472351,grad_norm: 0.9999992923737334, iteration: 14155
loss: 0.9454642534255981,grad_norm: 0.9999991682445359, iteration: 14156
loss: 1.0618609189987183,grad_norm: 0.9999993879229628, iteration: 14157
loss: 1.0401628017425537,grad_norm: 0.9999993750692242, iteration: 14158
loss: 1.051490306854248,grad_norm: 0.9023551258206317, iteration: 14159
loss: 1.0104202032089233,grad_norm: 0.9999990910069281, iteration: 14160
loss: 1.0323195457458496,grad_norm: 0.9999992066279189, iteration: 14161
loss: 1.0221012830734253,grad_norm: 0.9728463396869582, iteration: 14162
loss: 1.0292537212371826,grad_norm: 0.9999992563526633, iteration: 14163
loss: 0.9565625786781311,grad_norm: 0.9999990952338216, iteration: 14164
loss: 1.012479543685913,grad_norm: 0.9999991589648537, iteration: 14165
loss: 1.0383989810943604,grad_norm: 0.9999992363006125, iteration: 14166
loss: 1.056444764137268,grad_norm: 0.9999990670187655, iteration: 14167
loss: 1.033212423324585,grad_norm: 0.9999990734278379, iteration: 14168
loss: 1.0137203931808472,grad_norm: 0.9999992598730416, iteration: 14169
loss: 1.0614923238754272,grad_norm: 0.9999991002173069, iteration: 14170
loss: 1.0007411241531372,grad_norm: 0.9999991008001746, iteration: 14171
loss: 1.0815578699111938,grad_norm: 0.999999352804453, iteration: 14172
loss: 1.0522425174713135,grad_norm: 0.999999153915561, iteration: 14173
loss: 1.00929594039917,grad_norm: 0.9999991080288675, iteration: 14174
loss: 1.0429407358169556,grad_norm: 0.9999991798906704, iteration: 14175
loss: 1.030970811843872,grad_norm: 0.9999993159311729, iteration: 14176
loss: 0.9805590510368347,grad_norm: 0.9999994096683344, iteration: 14177
loss: 0.9846001267433167,grad_norm: 0.9999991769606899, iteration: 14178
loss: 1.0584356784820557,grad_norm: 0.9999992384098119, iteration: 14179
loss: 0.9786503911018372,grad_norm: 0.9999993045226563, iteration: 14180
loss: 1.0511149168014526,grad_norm: 0.9813942802017985, iteration: 14181
loss: 0.9916461706161499,grad_norm: 0.9903304995453043, iteration: 14182
loss: 1.0162142515182495,grad_norm: 0.9999991830180113, iteration: 14183
loss: 0.9666191935539246,grad_norm: 0.9999991086731994, iteration: 14184
loss: 1.0122002363204956,grad_norm: 0.9999992777068423, iteration: 14185
loss: 1.03995943069458,grad_norm: 0.9999993235242702, iteration: 14186
loss: 1.0101966857910156,grad_norm: 0.9999993248142284, iteration: 14187
loss: 1.0454248189926147,grad_norm: 0.999999199880897, iteration: 14188
loss: 0.9994046688079834,grad_norm: 0.9999992578318017, iteration: 14189
loss: 1.039573073387146,grad_norm: 0.9999992156458569, iteration: 14190
loss: 1.0505439043045044,grad_norm: 0.9999989831960563, iteration: 14191
loss: 1.0402650833129883,grad_norm: 0.999999254574261, iteration: 14192
loss: 1.073467493057251,grad_norm: 0.999999228454909, iteration: 14193
loss: 1.0569828748703003,grad_norm: 0.9999991895511859, iteration: 14194
loss: 1.0354770421981812,grad_norm: 0.9999991238751615, iteration: 14195
loss: 0.9759919047355652,grad_norm: 0.9346597477335565, iteration: 14196
loss: 1.0313854217529297,grad_norm: 0.9999992724673816, iteration: 14197
loss: 1.0280559062957764,grad_norm: 0.9999993062920797, iteration: 14198
loss: 1.014998435974121,grad_norm: 0.99999923199757, iteration: 14199
loss: 1.0435376167297363,grad_norm: 0.99999911929592, iteration: 14200
loss: 1.0314863920211792,grad_norm: 0.9999991319694955, iteration: 14201
loss: 1.0495611429214478,grad_norm: 0.9999992653612971, iteration: 14202
loss: 1.0060133934020996,grad_norm: 0.9999991436577582, iteration: 14203
loss: 1.0246946811676025,grad_norm: 0.9666362130057977, iteration: 14204
loss: 1.0073789358139038,grad_norm: 0.9999991881394481, iteration: 14205
loss: 1.0432229042053223,grad_norm: 0.9999991789137537, iteration: 14206
loss: 1.012006163597107,grad_norm: 0.9999991592818201, iteration: 14207
loss: 0.9929990172386169,grad_norm: 0.9239551872088584, iteration: 14208
loss: 1.0020291805267334,grad_norm: 0.9574985544450504, iteration: 14209
loss: 0.9908912777900696,grad_norm: 0.9999991464336928, iteration: 14210
loss: 1.0311118364334106,grad_norm: 0.9999990643685697, iteration: 14211
loss: 1.047474980354309,grad_norm: 0.9999991119806546, iteration: 14212
loss: 1.0304832458496094,grad_norm: 0.9999991704472494, iteration: 14213
loss: 1.0155229568481445,grad_norm: 0.9999993252704128, iteration: 14214
loss: 1.0307600498199463,grad_norm: 0.9999994865154549, iteration: 14215
loss: 0.9988505244255066,grad_norm: 0.9999991433156089, iteration: 14216
loss: 1.0111390352249146,grad_norm: 0.999999246440853, iteration: 14217
loss: 1.0550614595413208,grad_norm: 0.9999991406579575, iteration: 14218
loss: 1.0651886463165283,grad_norm: 0.9999990911911086, iteration: 14219
loss: 0.9899564981460571,grad_norm: 0.9999991412177797, iteration: 14220
loss: 1.0359266996383667,grad_norm: 0.9999989778827729, iteration: 14221
loss: 1.0164822340011597,grad_norm: 0.9999994148147032, iteration: 14222
loss: 1.0268577337265015,grad_norm: 0.851642216047362, iteration: 14223
loss: 1.0258150100708008,grad_norm: 0.9999993238716364, iteration: 14224
loss: 1.0085339546203613,grad_norm: 0.9531274017333705, iteration: 14225
loss: 1.0152896642684937,grad_norm: 0.999999096223756, iteration: 14226
loss: 0.9994441270828247,grad_norm: 0.9999997016015666, iteration: 14227
loss: 1.0569859743118286,grad_norm: 0.9999992012028913, iteration: 14228
loss: 1.0229918956756592,grad_norm: 0.9999991219167703, iteration: 14229
loss: 1.0030230283737183,grad_norm: 0.9942167281863064, iteration: 14230
loss: 1.0195518732070923,grad_norm: 0.999999190263036, iteration: 14231
loss: 1.018542766571045,grad_norm: 0.9999991838771948, iteration: 14232
loss: 1.01259183883667,grad_norm: 0.9225664948751843, iteration: 14233
loss: 1.048083782196045,grad_norm: 0.9999991148259214, iteration: 14234
loss: 1.0338736772537231,grad_norm: 0.9999992694768332, iteration: 14235
loss: 1.0638880729675293,grad_norm: 0.9999992063354628, iteration: 14236
loss: 0.9843940138816833,grad_norm: 0.999999198374216, iteration: 14237
loss: 1.0052154064178467,grad_norm: 0.9999994803387253, iteration: 14238
loss: 1.059444546699524,grad_norm: 0.9999992374295485, iteration: 14239
loss: 1.0163685083389282,grad_norm: 0.999999232127381, iteration: 14240
loss: 1.0210081338882446,grad_norm: 0.9999991818466522, iteration: 14241
loss: 1.0023198127746582,grad_norm: 0.9999991745703358, iteration: 14242
loss: 1.0161042213439941,grad_norm: 0.9999992373595743, iteration: 14243
loss: 1.055240273475647,grad_norm: 0.9999991792931231, iteration: 14244
loss: 1.0440759658813477,grad_norm: 0.9999996515344859, iteration: 14245
loss: 1.0186787843704224,grad_norm: 0.9999993160904259, iteration: 14246
loss: 1.0255860090255737,grad_norm: 0.9999992914738813, iteration: 14247
loss: 1.0491448640823364,grad_norm: 0.9999991574589647, iteration: 14248
loss: 1.0341421365737915,grad_norm: 0.999999602600358, iteration: 14249
loss: 1.0605207681655884,grad_norm: 0.9999992451813533, iteration: 14250
loss: 1.040926218032837,grad_norm: 0.9999990786483739, iteration: 14251
loss: 1.1475383043289185,grad_norm: 0.9999996422413817, iteration: 14252
loss: 1.010010838508606,grad_norm: 0.9999994972523901, iteration: 14253
loss: 1.0775471925735474,grad_norm: 0.999999327545315, iteration: 14254
loss: 1.0076606273651123,grad_norm: 0.9999991407328169, iteration: 14255
loss: 1.0168211460113525,grad_norm: 0.9999993201758118, iteration: 14256
loss: 1.0259034633636475,grad_norm: 0.9999991699042405, iteration: 14257
loss: 1.014270305633545,grad_norm: 0.9999993846504234, iteration: 14258
loss: 1.0567480325698853,grad_norm: 0.9999991110701083, iteration: 14259
loss: 1.012064814567566,grad_norm: 0.9999991022396875, iteration: 14260
loss: 0.9653549194335938,grad_norm: 0.9999993380064549, iteration: 14261
loss: 1.0295852422714233,grad_norm: 0.9999992482560327, iteration: 14262
loss: 1.0902775526046753,grad_norm: 0.9999996381343706, iteration: 14263
loss: 1.0706359148025513,grad_norm: 0.9999993286046039, iteration: 14264
loss: 0.9966254234313965,grad_norm: 0.9999990762850899, iteration: 14265
loss: 1.0310033559799194,grad_norm: 0.9999990856571273, iteration: 14266
loss: 1.0429719686508179,grad_norm: 0.9999993183307857, iteration: 14267
loss: 1.0200649499893188,grad_norm: 0.9999990606699819, iteration: 14268
loss: 1.0065553188323975,grad_norm: 0.9999991847831372, iteration: 14269
loss: 1.0089479684829712,grad_norm: 0.9999990863155506, iteration: 14270
loss: 1.0179203748703003,grad_norm: 0.999999153492095, iteration: 14271
loss: 1.0470044612884521,grad_norm: 0.999999348872933, iteration: 14272
loss: 1.0334285497665405,grad_norm: 0.9999992001284064, iteration: 14273
loss: 1.0305558443069458,grad_norm: 0.9999993783490555, iteration: 14274
loss: 1.0639694929122925,grad_norm: 0.9999995757302916, iteration: 14275
loss: 1.0259910821914673,grad_norm: 0.9999992497704411, iteration: 14276
loss: 1.0440077781677246,grad_norm: 0.9999992005707253, iteration: 14277
loss: 0.9696483612060547,grad_norm: 0.9999992045824879, iteration: 14278
loss: 1.0367040634155273,grad_norm: 0.9999994732934554, iteration: 14279
loss: 1.0544739961624146,grad_norm: 0.9522006275171515, iteration: 14280
loss: 0.9804121851921082,grad_norm: 0.9999992226682426, iteration: 14281
loss: 1.00613272190094,grad_norm: 0.9999992975500257, iteration: 14282
loss: 1.005609154701233,grad_norm: 0.9999991215280281, iteration: 14283
loss: 0.9611913561820984,grad_norm: 0.9999990986873509, iteration: 14284
loss: 1.037742018699646,grad_norm: 0.9999992105434874, iteration: 14285
loss: 1.0552895069122314,grad_norm: 0.9999993875398331, iteration: 14286
loss: 1.0643651485443115,grad_norm: 0.9999994991997722, iteration: 14287
loss: 1.0423258543014526,grad_norm: 0.9999992068329067, iteration: 14288
loss: 1.001552700996399,grad_norm: 0.9999992122915178, iteration: 14289
loss: 1.003239393234253,grad_norm: 0.9999992849210338, iteration: 14290
loss: 0.9979010820388794,grad_norm: 0.9999992822034659, iteration: 14291
loss: 0.9313340783119202,grad_norm: 0.9999990907018805, iteration: 14292
loss: 1.0205893516540527,grad_norm: 0.9999997775729509, iteration: 14293
loss: 1.0396782159805298,grad_norm: 0.9999992105453251, iteration: 14294
loss: 0.9986538887023926,grad_norm: 0.999999383817427, iteration: 14295
loss: 1.0736348628997803,grad_norm: 0.9999997862753671, iteration: 14296
loss: 1.007399320602417,grad_norm: 0.9999990822469013, iteration: 14297
loss: 1.024829387664795,grad_norm: 0.9999991116032532, iteration: 14298
loss: 1.0611896514892578,grad_norm: 0.999999090253149, iteration: 14299
loss: 1.0406967401504517,grad_norm: 0.9999994585804313, iteration: 14300
loss: 1.0081545114517212,grad_norm: 0.9999990639625298, iteration: 14301
loss: 1.0141141414642334,grad_norm: 0.9999992364649207, iteration: 14302
loss: 0.9816629886627197,grad_norm: 0.9689496934425018, iteration: 14303
loss: 1.027336835861206,grad_norm: 0.9999993304544418, iteration: 14304
loss: 1.0295648574829102,grad_norm: 0.9999996776215272, iteration: 14305
loss: 1.014090657234192,grad_norm: 0.9999992326086244, iteration: 14306
loss: 1.0344990491867065,grad_norm: 0.9999992617859457, iteration: 14307
loss: 0.9981703758239746,grad_norm: 0.9999992759493233, iteration: 14308
loss: 0.9940431714057922,grad_norm: 0.9999991195163358, iteration: 14309
loss: 1.052635669708252,grad_norm: 0.9999992438875996, iteration: 14310
loss: 0.998621940612793,grad_norm: 0.9999991780938036, iteration: 14311
loss: 0.9979333281517029,grad_norm: 0.9999992642223551, iteration: 14312
loss: 1.048574447631836,grad_norm: 0.9999991943461807, iteration: 14313
loss: 1.042068362236023,grad_norm: 0.9999992308412912, iteration: 14314
loss: 1.0549523830413818,grad_norm: 0.999999142336848, iteration: 14315
loss: 1.0344239473342896,grad_norm: 0.9999993751952775, iteration: 14316
loss: 1.0008634328842163,grad_norm: 0.9999990292487526, iteration: 14317
loss: 0.9600275754928589,grad_norm: 0.999999711171164, iteration: 14318
loss: 1.0015939474105835,grad_norm: 0.9999990555161168, iteration: 14319
loss: 1.0145646333694458,grad_norm: 0.999999135048809, iteration: 14320
loss: 0.9996203184127808,grad_norm: 0.9999992805098332, iteration: 14321
loss: 1.0529944896697998,grad_norm: 0.999999407529702, iteration: 14322
loss: 1.0191513299942017,grad_norm: 0.9999993999820481, iteration: 14323
loss: 1.0240124464035034,grad_norm: 0.9999991130901789, iteration: 14324
loss: 1.0221024751663208,grad_norm: 0.9999991110082868, iteration: 14325
loss: 1.0250587463378906,grad_norm: 0.9999992381798326, iteration: 14326
loss: 0.9642497897148132,grad_norm: 0.9568570949853575, iteration: 14327
loss: 1.0496551990509033,grad_norm: 0.9999992114932839, iteration: 14328
loss: 1.0590037107467651,grad_norm: 0.999999632154408, iteration: 14329
loss: 1.0432528257369995,grad_norm: 0.999999409219047, iteration: 14330
loss: 1.0254188776016235,grad_norm: 0.9999993860589246, iteration: 14331
loss: 1.0155551433563232,grad_norm: 0.9999994311237074, iteration: 14332
loss: 1.0352386236190796,grad_norm: 0.9999998791409647, iteration: 14333
loss: 1.0509238243103027,grad_norm: 0.9999995005478571, iteration: 14334
loss: 1.0317586660385132,grad_norm: 0.9999994193995175, iteration: 14335
loss: 1.0675729513168335,grad_norm: 0.9999997026442152, iteration: 14336
loss: 1.0367993116378784,grad_norm: 0.9999995143420599, iteration: 14337
loss: 1.0225861072540283,grad_norm: 0.9999990647985671, iteration: 14338
loss: 1.0568522214889526,grad_norm: 0.9999995605771326, iteration: 14339
loss: 1.0665078163146973,grad_norm: 0.9999997557604797, iteration: 14340
loss: 1.0156629085540771,grad_norm: 0.9999991861830589, iteration: 14341
loss: 1.0593960285186768,grad_norm: 0.9999990498144595, iteration: 14342
loss: 1.012001395225525,grad_norm: 0.9999992523200342, iteration: 14343
loss: 1.0127724409103394,grad_norm: 0.9999992213362969, iteration: 14344
loss: 0.9865280985832214,grad_norm: 0.9999994937215111, iteration: 14345
loss: 1.004786491394043,grad_norm: 0.9999991862564405, iteration: 14346
loss: 1.031714677810669,grad_norm: 0.9999992794187859, iteration: 14347
loss: 1.0560604333877563,grad_norm: 0.9999993009848113, iteration: 14348
loss: 1.0531970262527466,grad_norm: 0.9999992521653077, iteration: 14349
loss: 1.04268217086792,grad_norm: 0.9999991608900617, iteration: 14350
loss: 1.0497790575027466,grad_norm: 0.9999990869718107, iteration: 14351
loss: 1.0762856006622314,grad_norm: 0.9999995186058678, iteration: 14352
loss: 1.051912784576416,grad_norm: 0.999999328293753, iteration: 14353
loss: 0.9979085922241211,grad_norm: 0.9999992255556666, iteration: 14354
loss: 1.07154381275177,grad_norm: 0.999999162562997, iteration: 14355
loss: 0.9730204343795776,grad_norm: 0.9999990344222016, iteration: 14356
loss: 1.0364094972610474,grad_norm: 0.9999990503510177, iteration: 14357
loss: 1.0421409606933594,grad_norm: 0.9999995548282495, iteration: 14358
loss: 1.031853437423706,grad_norm: 0.9999991208849722, iteration: 14359
loss: 1.032781958580017,grad_norm: 0.9999991107292521, iteration: 14360
loss: 0.9754564762115479,grad_norm: 0.9983821736309322, iteration: 14361
loss: 1.0352457761764526,grad_norm: 0.9388307927935937, iteration: 14362
loss: 1.0736433267593384,grad_norm: 0.9999992780953365, iteration: 14363
loss: 1.0216740369796753,grad_norm: 0.9999992670728766, iteration: 14364
loss: 1.107797622680664,grad_norm: 0.999999489208452, iteration: 14365
loss: 0.9914782643318176,grad_norm: 0.9999993080464689, iteration: 14366
loss: 0.989916205406189,grad_norm: 0.9999995457598988, iteration: 14367
loss: 1.0027008056640625,grad_norm: 0.9999992981268714, iteration: 14368
loss: 1.017324447631836,grad_norm: 0.999999313132602, iteration: 14369
loss: 1.006162405014038,grad_norm: 0.9999993080979013, iteration: 14370
loss: 1.0279732942581177,grad_norm: 0.9999992756553115, iteration: 14371
loss: 1.0409035682678223,grad_norm: 0.9999990251368129, iteration: 14372
loss: 1.03531014919281,grad_norm: 0.9999995701227989, iteration: 14373
loss: 1.0690271854400635,grad_norm: 0.999999749033745, iteration: 14374
loss: 1.0386707782745361,grad_norm: 0.9999991392965406, iteration: 14375
loss: 1.026626467704773,grad_norm: 0.9999993551834803, iteration: 14376
loss: 0.9618167281150818,grad_norm: 0.9999992269947141, iteration: 14377
loss: 1.0063780546188354,grad_norm: 0.9999990821749162, iteration: 14378
loss: 1.0261961221694946,grad_norm: 0.9999990993564483, iteration: 14379
loss: 1.0463507175445557,grad_norm: 0.9999992048495173, iteration: 14380
loss: 1.0408225059509277,grad_norm: 0.9999991456549892, iteration: 14381
loss: 1.034274935722351,grad_norm: 0.9999992057550295, iteration: 14382
loss: 1.0047190189361572,grad_norm: 0.9999993011864153, iteration: 14383
loss: 1.0713335275650024,grad_norm: 0.9999994598715397, iteration: 14384
loss: 1.0241299867630005,grad_norm: 0.9999992834536768, iteration: 14385
loss: 1.1045840978622437,grad_norm: 0.9999992026060405, iteration: 14386
loss: 1.048093557357788,grad_norm: 0.9999990994170119, iteration: 14387
loss: 1.0274161100387573,grad_norm: 0.9547617935533577, iteration: 14388
loss: 1.057429552078247,grad_norm: 0.9999994553335836, iteration: 14389
loss: 1.042823314666748,grad_norm: 0.9999991347191206, iteration: 14390
loss: 0.9912202954292297,grad_norm: 0.9999992206278193, iteration: 14391
loss: 0.9995215535163879,grad_norm: 0.9999997663043735, iteration: 14392
loss: 1.0315340757369995,grad_norm: 0.9999991883822755, iteration: 14393
loss: 1.0184110403060913,grad_norm: 0.9757339805734724, iteration: 14394
loss: 1.047866940498352,grad_norm: 0.9999992433356931, iteration: 14395
loss: 1.0310945510864258,grad_norm: 0.9999991819260671, iteration: 14396
loss: 1.0712954998016357,grad_norm: 0.9999991635796013, iteration: 14397
loss: 1.043131947517395,grad_norm: 0.9999993727466567, iteration: 14398
loss: 1.0612109899520874,grad_norm: 0.9999996515158334, iteration: 14399
loss: 1.0581631660461426,grad_norm: 0.9999992022835295, iteration: 14400
loss: 1.0713196992874146,grad_norm: 0.9999997672970264, iteration: 14401
loss: 1.061323881149292,grad_norm: 0.9999998058886681, iteration: 14402
loss: 1.0045424699783325,grad_norm: 0.9999992386721841, iteration: 14403
loss: 0.9964255094528198,grad_norm: 0.9999991137105476, iteration: 14404
loss: 1.0441741943359375,grad_norm: 0.9999995546388838, iteration: 14405
loss: 0.9893627166748047,grad_norm: 0.9999991586913489, iteration: 14406
loss: 0.9946649074554443,grad_norm: 0.9999990669980071, iteration: 14407
loss: 1.0063698291778564,grad_norm: 0.9114559378504874, iteration: 14408
loss: 1.0269814729690552,grad_norm: 0.943450983705503, iteration: 14409
loss: 0.9998561143875122,grad_norm: 0.9068171733743469, iteration: 14410
loss: 1.0063378810882568,grad_norm: 0.9999991070177514, iteration: 14411
loss: 1.0458649396896362,grad_norm: 0.9999998891341291, iteration: 14412
loss: 1.0228428840637207,grad_norm: 0.999999401485034, iteration: 14413
loss: 1.069626808166504,grad_norm: 0.9999992281275877, iteration: 14414
loss: 1.0391896963119507,grad_norm: 0.9999990226409903, iteration: 14415
loss: 0.9789615273475647,grad_norm: 0.8908292941200897, iteration: 14416
loss: 1.006503701210022,grad_norm: 0.9999991180001492, iteration: 14417
loss: 1.0665578842163086,grad_norm: 0.9999998202065025, iteration: 14418
loss: 1.0447252988815308,grad_norm: 0.999999482431953, iteration: 14419
loss: 1.0222787857055664,grad_norm: 0.9999994953714625, iteration: 14420
loss: 1.0144234895706177,grad_norm: 0.9999993238512667, iteration: 14421
loss: 1.020450234413147,grad_norm: 0.9999993400592557, iteration: 14422
loss: 1.0521228313446045,grad_norm: 0.9999992099581902, iteration: 14423
loss: 1.0343575477600098,grad_norm: 0.999999144141977, iteration: 14424
loss: 1.0161854028701782,grad_norm: 0.9999991408726306, iteration: 14425
loss: 1.006389856338501,grad_norm: 0.999999320063584, iteration: 14426
loss: 1.074171543121338,grad_norm: 0.9999992832767687, iteration: 14427
loss: 1.0115487575531006,grad_norm: 0.9999993890647683, iteration: 14428
loss: 1.0398386716842651,grad_norm: 0.999999335552542, iteration: 14429
loss: 0.9733447432518005,grad_norm: 0.9999992018427711, iteration: 14430
loss: 1.0765072107315063,grad_norm: 0.9999995041868959, iteration: 14431
loss: 1.0188559293746948,grad_norm: 0.999999244040643, iteration: 14432
loss: 1.0655289888381958,grad_norm: 0.9999994987111728, iteration: 14433
loss: 0.9757913947105408,grad_norm: 0.9999991222862384, iteration: 14434
loss: 1.0207602977752686,grad_norm: 0.9999992794943777, iteration: 14435
loss: 1.0119569301605225,grad_norm: 0.9999992871273005, iteration: 14436
loss: 1.0508912801742554,grad_norm: 0.9999996366013622, iteration: 14437
loss: 1.0688198804855347,grad_norm: 0.9999992635450595, iteration: 14438
loss: 1.0288071632385254,grad_norm: 0.99999916879278, iteration: 14439
loss: 1.0339139699935913,grad_norm: 0.9999996350637693, iteration: 14440
loss: 1.0642781257629395,grad_norm: 0.9999991236520586, iteration: 14441
loss: 1.0674943923950195,grad_norm: 0.9999993359459832, iteration: 14442
loss: 0.9827389121055603,grad_norm: 0.9999991959384287, iteration: 14443
loss: 1.0413585901260376,grad_norm: 0.919850359445572, iteration: 14444
loss: 1.0117871761322021,grad_norm: 0.9999991666274458, iteration: 14445
loss: 1.024691104888916,grad_norm: 0.99999927230124, iteration: 14446
loss: 1.060553789138794,grad_norm: 0.9999994423867026, iteration: 14447
loss: 0.9942028522491455,grad_norm: 0.9188680371959903, iteration: 14448
loss: 1.0209139585494995,grad_norm: 0.9999994318596384, iteration: 14449
loss: 1.0166782140731812,grad_norm: 0.9999995107078206, iteration: 14450
loss: 1.0326985120773315,grad_norm: 0.9999991627682137, iteration: 14451
loss: 1.0244594812393188,grad_norm: 0.999999223487688, iteration: 14452
loss: 1.0126276016235352,grad_norm: 0.9999993296338866, iteration: 14453
loss: 1.027267336845398,grad_norm: 0.9999991617826899, iteration: 14454
loss: 0.9905364513397217,grad_norm: 0.9999991744451919, iteration: 14455
loss: 1.0381840467453003,grad_norm: 0.9999992065703966, iteration: 14456
loss: 1.0203739404678345,grad_norm: 0.99644368985025, iteration: 14457
loss: 0.9868517518043518,grad_norm: 0.9999994423013017, iteration: 14458
loss: 1.003214716911316,grad_norm: 0.9540704360798964, iteration: 14459
loss: 1.0256332159042358,grad_norm: 0.9999992897478494, iteration: 14460
loss: 1.073553442955017,grad_norm: 0.9999996208807994, iteration: 14461
loss: 1.0174031257629395,grad_norm: 0.999999652124247, iteration: 14462
loss: 0.992405891418457,grad_norm: 0.9999991161770787, iteration: 14463
loss: 1.0601284503936768,grad_norm: 0.9999992828204679, iteration: 14464
loss: 1.0079622268676758,grad_norm: 0.9999993168491604, iteration: 14465
loss: 1.0177385807037354,grad_norm: 0.99999921983673, iteration: 14466
loss: 1.036512017250061,grad_norm: 0.9999993515428145, iteration: 14467
loss: 1.0176599025726318,grad_norm: 0.9999992826791502, iteration: 14468
loss: 1.028551697731018,grad_norm: 0.9999991371207232, iteration: 14469
loss: 1.0643315315246582,grad_norm: 0.9999994587147266, iteration: 14470
loss: 1.02969491481781,grad_norm: 0.9999991002004663, iteration: 14471
loss: 1.0319218635559082,grad_norm: 0.9999991304925117, iteration: 14472
loss: 0.9992662072181702,grad_norm: 0.9999991073770546, iteration: 14473
loss: 1.0560303926467896,grad_norm: 0.9999992427052748, iteration: 14474
loss: 0.9640142917633057,grad_norm: 0.9999993677493932, iteration: 14475
loss: 1.0596227645874023,grad_norm: 0.999999364185219, iteration: 14476
loss: 1.0284157991409302,grad_norm: 0.9999992128161791, iteration: 14477
loss: 0.9890751242637634,grad_norm: 0.9999991311860402, iteration: 14478
loss: 1.0228362083435059,grad_norm: 0.9999991878162726, iteration: 14479
loss: 1.0209956169128418,grad_norm: 0.9999991938580456, iteration: 14480
loss: 1.0394586324691772,grad_norm: 0.9999991405140447, iteration: 14481
loss: 0.976546585559845,grad_norm: 0.999999116411119, iteration: 14482
loss: 1.0107284784317017,grad_norm: 0.9999990574088631, iteration: 14483
loss: 1.0242093801498413,grad_norm: 0.9999992077396372, iteration: 14484
loss: 0.9956873655319214,grad_norm: 0.9999992114245917, iteration: 14485
loss: 1.0300555229187012,grad_norm: 0.9999990935963377, iteration: 14486
loss: 1.0348014831542969,grad_norm: 0.9999991629086878, iteration: 14487
loss: 1.058994174003601,grad_norm: 0.9999992229666143, iteration: 14488
loss: 1.0167933702468872,grad_norm: 0.9999992019290223, iteration: 14489
loss: 1.015634298324585,grad_norm: 0.9999995885361946, iteration: 14490
loss: 0.9850438833236694,grad_norm: 0.9999992676375149, iteration: 14491
loss: 1.0511200428009033,grad_norm: 0.999999439193225, iteration: 14492
loss: 1.0479152202606201,grad_norm: 0.9999995611196304, iteration: 14493
loss: 1.0379995107650757,grad_norm: 0.9999990939950751, iteration: 14494
loss: 1.0138591527938843,grad_norm: 0.9999990513642186, iteration: 14495
loss: 0.9896015524864197,grad_norm: 0.999999220518806, iteration: 14496
loss: 1.0121461153030396,grad_norm: 0.9999990611992967, iteration: 14497
loss: 1.0602562427520752,grad_norm: 0.9999991013175178, iteration: 14498
loss: 1.0134841203689575,grad_norm: 0.9999994325237541, iteration: 14499
loss: 1.0101466178894043,grad_norm: 0.9999991921272431, iteration: 14500
loss: 1.0611616373062134,grad_norm: 0.9999995700001764, iteration: 14501
loss: 1.0074125528335571,grad_norm: 0.9999991519137323, iteration: 14502
loss: 1.08512282371521,grad_norm: 0.9999994696998155, iteration: 14503
loss: 1.0667850971221924,grad_norm: 0.9999994037011023, iteration: 14504
loss: 0.9767251014709473,grad_norm: 0.9712341977037199, iteration: 14505
loss: 1.0634013414382935,grad_norm: 0.9999991656157835, iteration: 14506
loss: 1.038586974143982,grad_norm: 0.99999923360519, iteration: 14507
loss: 1.0277066230773926,grad_norm: 0.9999995969675044, iteration: 14508
loss: 1.0562708377838135,grad_norm: 0.9999992575732655, iteration: 14509
loss: 1.0026893615722656,grad_norm: 0.9999990738271085, iteration: 14510
loss: 1.0555555820465088,grad_norm: 0.9999993163262794, iteration: 14511
loss: 1.0048903226852417,grad_norm: 0.9999992322026467, iteration: 14512
loss: 1.040703535079956,grad_norm: 0.9999991068715002, iteration: 14513
loss: 1.0317836999893188,grad_norm: 0.9999990653809826, iteration: 14514
loss: 0.9973044395446777,grad_norm: 0.9999992925229961, iteration: 14515
loss: 1.0378576517105103,grad_norm: 0.9999991193891058, iteration: 14516
loss: 1.0248435735702515,grad_norm: 0.9998045121032381, iteration: 14517
loss: 1.054078221321106,grad_norm: 0.9999993226255566, iteration: 14518
loss: 1.045072078704834,grad_norm: 0.9999996533853184, iteration: 14519
loss: 1.0119950771331787,grad_norm: 0.9999992257777911, iteration: 14520
loss: 1.039092779159546,grad_norm: 0.9999992092480326, iteration: 14521
loss: 1.0256627798080444,grad_norm: 0.9999990809460082, iteration: 14522
loss: 1.0388041734695435,grad_norm: 0.9999995552347594, iteration: 14523
loss: 0.9894868731498718,grad_norm: 0.9999990925177886, iteration: 14524
loss: 0.9778707027435303,grad_norm: 0.9999992064283321, iteration: 14525
loss: 0.9979084134101868,grad_norm: 0.999999053671888, iteration: 14526
loss: 1.0299725532531738,grad_norm: 0.9999990666064718, iteration: 14527
loss: 1.0444048643112183,grad_norm: 0.9999993818144266, iteration: 14528
loss: 1.032387614250183,grad_norm: 0.9999997070798129, iteration: 14529
loss: 1.0173611640930176,grad_norm: 0.999999109274756, iteration: 14530
loss: 1.0709490776062012,grad_norm: 0.9999992426272354, iteration: 14531
loss: 1.0026817321777344,grad_norm: 0.9999990386000354, iteration: 14532
loss: 1.0326128005981445,grad_norm: 0.9999992565679313, iteration: 14533
loss: 0.9929845333099365,grad_norm: 0.999999399419074, iteration: 14534
loss: 1.0256940126419067,grad_norm: 0.9999994714148279, iteration: 14535
loss: 1.0082615613937378,grad_norm: 0.9999992178341377, iteration: 14536
loss: 1.0080535411834717,grad_norm: 0.9999991488897245, iteration: 14537
loss: 1.0055873394012451,grad_norm: 0.9999992581987861, iteration: 14538
loss: 0.9866408705711365,grad_norm: 0.9999991833030489, iteration: 14539
loss: 1.0140990018844604,grad_norm: 0.9999992031879643, iteration: 14540
loss: 1.0314066410064697,grad_norm: 0.9999990253591283, iteration: 14541
loss: 1.009317398071289,grad_norm: 0.9999993160432047, iteration: 14542
loss: 1.0386261940002441,grad_norm: 0.9999994765747922, iteration: 14543
loss: 1.0120676755905151,grad_norm: 0.9999994772909304, iteration: 14544
loss: 0.998146116733551,grad_norm: 0.9999992001435517, iteration: 14545
loss: 1.0310531854629517,grad_norm: 0.9999992602586705, iteration: 14546
loss: 0.9707256555557251,grad_norm: 0.9999993981956855, iteration: 14547
loss: 1.0026572942733765,grad_norm: 0.999999463132867, iteration: 14548
loss: 1.074985146522522,grad_norm: 0.9999993871350075, iteration: 14549
loss: 1.0307908058166504,grad_norm: 0.9999992487676364, iteration: 14550
loss: 1.0694208145141602,grad_norm: 0.9999993064203033, iteration: 14551
loss: 1.002155065536499,grad_norm: 0.9999991514275643, iteration: 14552
loss: 1.0122108459472656,grad_norm: 0.9999991154524133, iteration: 14553
loss: 0.9885991811752319,grad_norm: 0.9713303940368069, iteration: 14554
loss: 1.0379009246826172,grad_norm: 0.9999993968383624, iteration: 14555
loss: 1.040291666984558,grad_norm: 0.9999994131740645, iteration: 14556
loss: 1.0900899171829224,grad_norm: 0.9999991932293719, iteration: 14557
loss: 1.0576165914535522,grad_norm: 0.9999992206919011, iteration: 14558
loss: 1.1035805940628052,grad_norm: 0.999999706237769, iteration: 14559
loss: 1.0183771848678589,grad_norm: 0.9999992989292308, iteration: 14560
loss: 0.9765830636024475,grad_norm: 0.9999992368238422, iteration: 14561
loss: 0.9855846762657166,grad_norm: 0.9999991722160833, iteration: 14562
loss: 1.0094395875930786,grad_norm: 0.9999991890006504, iteration: 14563
loss: 0.9739907383918762,grad_norm: 0.9999992755662744, iteration: 14564
loss: 1.121620774269104,grad_norm: 0.9999991122986389, iteration: 14565
loss: 1.0474169254302979,grad_norm: 0.9999993912317405, iteration: 14566
loss: 1.0503484010696411,grad_norm: 0.9999992602271766, iteration: 14567
loss: 0.9963832497596741,grad_norm: 0.9999990944164934, iteration: 14568
loss: 1.022139549255371,grad_norm: 0.9999991493626311, iteration: 14569
loss: 1.0401278734207153,grad_norm: 0.999999156440875, iteration: 14570
loss: 0.9961166381835938,grad_norm: 0.9999993090090422, iteration: 14571
loss: 1.028809666633606,grad_norm: 0.9999990974048206, iteration: 14572
loss: 1.0691781044006348,grad_norm: 0.9999994303429092, iteration: 14573
loss: 1.0853880643844604,grad_norm: 0.9999993307507176, iteration: 14574
loss: 1.0040299892425537,grad_norm: 0.9999992278723766, iteration: 14575
loss: 1.0016719102859497,grad_norm: 0.9821262196025664, iteration: 14576
loss: 1.0625005960464478,grad_norm: 0.9999992319944602, iteration: 14577
loss: 1.0387052297592163,grad_norm: 0.9574813659073389, iteration: 14578
loss: 1.0240713357925415,grad_norm: 0.9999991693702425, iteration: 14579
loss: 1.071241021156311,grad_norm: 0.9999990916014911, iteration: 14580
loss: 1.0523130893707275,grad_norm: 0.9999991288716917, iteration: 14581
loss: 1.0254977941513062,grad_norm: 0.9999990626688872, iteration: 14582
loss: 0.9724907279014587,grad_norm: 0.9981630255875771, iteration: 14583
loss: 1.051803708076477,grad_norm: 0.9999993854391042, iteration: 14584
loss: 1.0529547929763794,grad_norm: 0.9999989599589701, iteration: 14585
loss: 0.9857311248779297,grad_norm: 0.9999994050656121, iteration: 14586
loss: 0.988514244556427,grad_norm: 0.9845696758953169, iteration: 14587
loss: 1.0241901874542236,grad_norm: 0.9999993168490274, iteration: 14588
loss: 0.9995049834251404,grad_norm: 0.9999994365533774, iteration: 14589
loss: 1.0671272277832031,grad_norm: 0.9999990984660974, iteration: 14590
loss: 1.0033982992172241,grad_norm: 0.9999993321832293, iteration: 14591
loss: 1.0339231491088867,grad_norm: 0.9999994427701677, iteration: 14592
loss: 1.0323002338409424,grad_norm: 0.9999991712532821, iteration: 14593
loss: 0.9995099902153015,grad_norm: 0.9999992113973903, iteration: 14594
loss: 1.0733577013015747,grad_norm: 0.9999992975380321, iteration: 14595
loss: 1.1151334047317505,grad_norm: 0.9999996967280567, iteration: 14596
loss: 0.9880261421203613,grad_norm: 0.9999992983946696, iteration: 14597
loss: 1.04155433177948,grad_norm: 0.9999990349312896, iteration: 14598
loss: 1.0017882585525513,grad_norm: 0.9999996715859519, iteration: 14599
loss: 1.0076130628585815,grad_norm: 0.9999991288750083, iteration: 14600
loss: 1.011887788772583,grad_norm: 0.9999991379467633, iteration: 14601
loss: 1.0226670503616333,grad_norm: 0.9999991775089948, iteration: 14602
loss: 1.0150980949401855,grad_norm: 0.9999994949901314, iteration: 14603
loss: 1.0340074300765991,grad_norm: 0.9999996928134238, iteration: 14604
loss: 0.994415283203125,grad_norm: 0.9999991594842365, iteration: 14605
loss: 0.9960988163948059,grad_norm: 0.9658216410454992, iteration: 14606
loss: 0.9897100329399109,grad_norm: 0.9999995037892755, iteration: 14607
loss: 0.9746543765068054,grad_norm: 0.9999993729362368, iteration: 14608
loss: 1.0196142196655273,grad_norm: 0.9999992413346014, iteration: 14609
loss: 1.0283145904541016,grad_norm: 0.9999994369863461, iteration: 14610
loss: 1.0591819286346436,grad_norm: 0.999999879951033, iteration: 14611
loss: 1.0558760166168213,grad_norm: 0.9999994676118984, iteration: 14612
loss: 1.007549524307251,grad_norm: 0.9999992329449718, iteration: 14613
loss: 0.9858240485191345,grad_norm: 0.9999991906606313, iteration: 14614
loss: 1.012468934059143,grad_norm: 0.9999993271049339, iteration: 14615
loss: 1.0685251951217651,grad_norm: 0.9999994992037297, iteration: 14616
loss: 1.0060601234436035,grad_norm: 0.9999991684724957, iteration: 14617
loss: 1.04709792137146,grad_norm: 0.9999991448658396, iteration: 14618
loss: 1.0173710584640503,grad_norm: 0.9999994949250289, iteration: 14619
loss: 1.0044524669647217,grad_norm: 0.9999991416287882, iteration: 14620
loss: 1.0445119142532349,grad_norm: 0.9999993563452753, iteration: 14621
loss: 1.0406033992767334,grad_norm: 0.9999991691962864, iteration: 14622
loss: 1.0343233346939087,grad_norm: 0.9999992679218836, iteration: 14623
loss: 1.0292882919311523,grad_norm: 0.9999993976747127, iteration: 14624
loss: 1.0796748399734497,grad_norm: 0.9999990507086882, iteration: 14625
loss: 0.9867326617240906,grad_norm: 0.9999991166914662, iteration: 14626
loss: 1.0223479270935059,grad_norm: 0.999999262433141, iteration: 14627
loss: 1.0550469160079956,grad_norm: 0.9999993182316537, iteration: 14628
loss: 1.0538884401321411,grad_norm: 0.9999993901291825, iteration: 14629
loss: 1.0119068622589111,grad_norm: 0.9999992223696064, iteration: 14630
loss: 1.0041612386703491,grad_norm: 0.999999210655446, iteration: 14631
loss: 1.0485193729400635,grad_norm: 0.9999992410590824, iteration: 14632
loss: 1.0342453718185425,grad_norm: 0.9999992817385162, iteration: 14633
loss: 1.0131151676177979,grad_norm: 0.9999991727078374, iteration: 14634
loss: 0.9811360836029053,grad_norm: 0.9999990014311362, iteration: 14635
loss: 1.0122638940811157,grad_norm: 0.9999991072319502, iteration: 14636
loss: 1.0682412385940552,grad_norm: 0.9999996025133194, iteration: 14637
loss: 1.0195151567459106,grad_norm: 0.9999991524693601, iteration: 14638
loss: 1.0185613632202148,grad_norm: 0.9999990751607344, iteration: 14639
loss: 0.9980358481407166,grad_norm: 0.9999991184869617, iteration: 14640
loss: 1.0637601613998413,grad_norm: 0.9999991995020175, iteration: 14641
loss: 1.0840927362442017,grad_norm: 0.9999996635368026, iteration: 14642
loss: 0.9995801448822021,grad_norm: 0.999999207976129, iteration: 14643
loss: 1.013571858406067,grad_norm: 0.9999991684233694, iteration: 14644
loss: 0.9937765598297119,grad_norm: 0.9999991700580674, iteration: 14645
loss: 1.0031605958938599,grad_norm: 0.9999993093376779, iteration: 14646
loss: 0.9896063208580017,grad_norm: 0.9999992542400865, iteration: 14647
loss: 1.033307671546936,grad_norm: 0.9999991370441315, iteration: 14648
loss: 1.0479341745376587,grad_norm: 0.999999234135605, iteration: 14649
loss: 1.0091339349746704,grad_norm: 0.9999994611997838, iteration: 14650
loss: 1.0301133394241333,grad_norm: 0.9999991885395502, iteration: 14651
loss: 0.9341815114021301,grad_norm: 0.9999991113052143, iteration: 14652
loss: 1.021622896194458,grad_norm: 0.9999993076293783, iteration: 14653
loss: 1.0155651569366455,grad_norm: 0.9999992027382284, iteration: 14654
loss: 1.0077342987060547,grad_norm: 0.9999991971382156, iteration: 14655
loss: 1.000512957572937,grad_norm: 0.9999990574517271, iteration: 14656
loss: 1.0071598291397095,grad_norm: 0.9999990678431213, iteration: 14657
loss: 1.0559026002883911,grad_norm: 0.999999170135196, iteration: 14658
loss: 1.0537545680999756,grad_norm: 0.9999993479424614, iteration: 14659
loss: 1.0316755771636963,grad_norm: 0.9999992329831583, iteration: 14660
loss: 1.0343917608261108,grad_norm: 0.9999992522443844, iteration: 14661
loss: 0.9876223802566528,grad_norm: 0.9999991901425583, iteration: 14662
loss: 0.9659703969955444,grad_norm: 0.9999992472458773, iteration: 14663
loss: 1.0109102725982666,grad_norm: 0.9999992694442683, iteration: 14664
loss: 1.023074746131897,grad_norm: 0.9999992906540928, iteration: 14665
loss: 0.973497211933136,grad_norm: 0.9999991513868653, iteration: 14666
loss: 1.0442463159561157,grad_norm: 0.9999990505413404, iteration: 14667
loss: 1.0349880456924438,grad_norm: 0.9999992135272332, iteration: 14668
loss: 1.014479398727417,grad_norm: 0.9999992472159135, iteration: 14669
loss: 1.0097391605377197,grad_norm: 0.9999995373117204, iteration: 14670
loss: 1.040266513824463,grad_norm: 0.9999990628338559, iteration: 14671
loss: 1.0423274040222168,grad_norm: 0.9999995489067466, iteration: 14672
loss: 1.0318167209625244,grad_norm: 0.9999998819043352, iteration: 14673
loss: 1.0034031867980957,grad_norm: 0.9999992777737935, iteration: 14674
loss: 1.044817566871643,grad_norm: 0.9999993540092182, iteration: 14675
loss: 0.9974784255027771,grad_norm: 0.9999990238841365, iteration: 14676
loss: 1.0478448867797852,grad_norm: 0.9999993570593709, iteration: 14677
loss: 0.9748414754867554,grad_norm: 0.9999992447320871, iteration: 14678
loss: 1.0385841131210327,grad_norm: 0.9999992382586175, iteration: 14679
loss: 1.0415122509002686,grad_norm: 0.999999218312346, iteration: 14680
loss: 1.0073802471160889,grad_norm: 0.9999991842570114, iteration: 14681
loss: 0.9598712921142578,grad_norm: 0.9999989835319274, iteration: 14682
loss: 1.0138065814971924,grad_norm: 0.999999164226052, iteration: 14683
loss: 0.9918877482414246,grad_norm: 0.9999991500449775, iteration: 14684
loss: 1.0430617332458496,grad_norm: 0.9999994795207603, iteration: 14685
loss: 0.9853231310844421,grad_norm: 0.9999992170400023, iteration: 14686
loss: 1.0149794816970825,grad_norm: 0.9999995109345323, iteration: 14687
loss: 0.9874362349510193,grad_norm: 0.9999996531482495, iteration: 14688
loss: 1.0016173124313354,grad_norm: 0.9999993212832498, iteration: 14689
loss: 1.0574222803115845,grad_norm: 0.9999996244155965, iteration: 14690
loss: 0.9891417622566223,grad_norm: 0.9999992470465835, iteration: 14691
loss: 1.096801519393921,grad_norm: 0.9999994379257243, iteration: 14692
loss: 1.035315990447998,grad_norm: 0.9999989971309862, iteration: 14693
loss: 1.0647237300872803,grad_norm: 0.9999991061631974, iteration: 14694
loss: 1.0261247158050537,grad_norm: 0.999999865774623, iteration: 14695
loss: 0.9573781490325928,grad_norm: 0.999999215575426, iteration: 14696
loss: 1.001502513885498,grad_norm: 0.9999991516557061, iteration: 14697
loss: 1.0681390762329102,grad_norm: 0.9999992392966945, iteration: 14698
loss: 1.008296012878418,grad_norm: 0.9999990523573675, iteration: 14699
loss: 0.987925112247467,grad_norm: 0.9999991539045439, iteration: 14700
loss: 1.0235559940338135,grad_norm: 0.9716017930049544, iteration: 14701
loss: 1.0122919082641602,grad_norm: 0.9999991988632705, iteration: 14702
loss: 1.0494906902313232,grad_norm: 0.999999175389563, iteration: 14703
loss: 1.0025062561035156,grad_norm: 0.9999998510754399, iteration: 14704
loss: 1.0464004278182983,grad_norm: 0.9814712423092372, iteration: 14705
loss: 1.036959171295166,grad_norm: 0.9999995670436467, iteration: 14706
loss: 1.0367577075958252,grad_norm: 0.9999992075079422, iteration: 14707
loss: 1.02305269241333,grad_norm: 0.9999997173417429, iteration: 14708
loss: 0.9796546697616577,grad_norm: 0.9999991456308125, iteration: 14709
loss: 1.0076829195022583,grad_norm: 0.9999991392483062, iteration: 14710
loss: 1.04046630859375,grad_norm: 0.9999995812904566, iteration: 14711
loss: 1.0618622303009033,grad_norm: 0.9999990776958386, iteration: 14712
loss: 1.029144048690796,grad_norm: 0.9999991739971035, iteration: 14713
loss: 1.0201393365859985,grad_norm: 0.9999993228872507, iteration: 14714
loss: 1.0260285139083862,grad_norm: 0.9999993986807334, iteration: 14715
loss: 1.0257560014724731,grad_norm: 0.9999991546610898, iteration: 14716
loss: 0.9890369176864624,grad_norm: 0.999999141159694, iteration: 14717
loss: 0.9772925972938538,grad_norm: 0.9999992262629096, iteration: 14718
loss: 0.991178035736084,grad_norm: 0.9999990943531472, iteration: 14719
loss: 1.0641052722930908,grad_norm: 0.9999996267757592, iteration: 14720
loss: 1.020965576171875,grad_norm: 0.9999992284802999, iteration: 14721
loss: 1.0018064975738525,grad_norm: 0.9999990972252865, iteration: 14722
loss: 1.0188487768173218,grad_norm: 0.9999995475250552, iteration: 14723
loss: 1.0453332662582397,grad_norm: 0.9999992131976143, iteration: 14724
loss: 0.9956551194190979,grad_norm: 0.9999990512450909, iteration: 14725
loss: 1.0149813890457153,grad_norm: 0.9999993586690965, iteration: 14726
loss: 1.0589593648910522,grad_norm: 0.9999990454270363, iteration: 14727
loss: 1.0075973272323608,grad_norm: 0.9999995785058072, iteration: 14728
loss: 1.0015933513641357,grad_norm: 0.999999127113781, iteration: 14729
loss: 1.0060559511184692,grad_norm: 0.9999990609945641, iteration: 14730
loss: 1.0125017166137695,grad_norm: 0.9999994662066186, iteration: 14731
loss: 1.0456982851028442,grad_norm: 0.9999992237083108, iteration: 14732
loss: 1.0548006296157837,grad_norm: 0.9999993326717784, iteration: 14733
loss: 1.0211604833602905,grad_norm: 0.9999991592341084, iteration: 14734
loss: 1.013121247291565,grad_norm: 0.9999990814663311, iteration: 14735
loss: 1.005408763885498,grad_norm: 0.9516284148960265, iteration: 14736
loss: 1.0230326652526855,grad_norm: 0.9894482370270733, iteration: 14737
loss: 1.0441749095916748,grad_norm: 0.9999992497876227, iteration: 14738
loss: 1.02732253074646,grad_norm: 0.999999297181242, iteration: 14739
loss: 1.0375821590423584,grad_norm: 0.9999991980106707, iteration: 14740
loss: 1.0390077829360962,grad_norm: 0.9999995360714912, iteration: 14741
loss: 1.0046154260635376,grad_norm: 0.9999992535693742, iteration: 14742
loss: 1.0087686777114868,grad_norm: 0.9999990681386445, iteration: 14743
loss: 1.0353169441223145,grad_norm: 0.9999996280112297, iteration: 14744
loss: 1.070417046546936,grad_norm: 0.9999990881935986, iteration: 14745
loss: 1.0098873376846313,grad_norm: 0.9999991618781636, iteration: 14746
loss: 1.0124889612197876,grad_norm: 0.999999185250493, iteration: 14747
loss: 1.0695677995681763,grad_norm: 0.9999995262785331, iteration: 14748
loss: 1.0239256620407104,grad_norm: 0.999999472499737, iteration: 14749
loss: 1.0473793745040894,grad_norm: 0.9999999034334501, iteration: 14750
loss: 1.059963583946228,grad_norm: 0.9999994183448427, iteration: 14751
loss: 1.0499259233474731,grad_norm: 0.9999991421293669, iteration: 14752
loss: 1.0022841691970825,grad_norm: 0.9999990657256538, iteration: 14753
loss: 1.0338549613952637,grad_norm: 0.9999992100376324, iteration: 14754
loss: 0.9992513656616211,grad_norm: 0.9999991090108032, iteration: 14755
loss: 1.051317811012268,grad_norm: 0.9999992580264946, iteration: 14756
loss: 1.0198060274124146,grad_norm: 0.9209464549599842, iteration: 14757
loss: 1.020655632019043,grad_norm: 0.9999990960522639, iteration: 14758
loss: 1.0320789813995361,grad_norm: 0.9999994196674384, iteration: 14759
loss: 1.1081398725509644,grad_norm: 0.9999996129930635, iteration: 14760
loss: 1.006558895111084,grad_norm: 0.9999996548759585, iteration: 14761
loss: 1.0617411136627197,grad_norm: 0.9999991067835986, iteration: 14762
loss: 0.9735241532325745,grad_norm: 0.9799696796470061, iteration: 14763
loss: 1.0887441635131836,grad_norm: 0.9999995270822665, iteration: 14764
loss: 1.010629415512085,grad_norm: 0.9999991266920165, iteration: 14765
loss: 1.0290969610214233,grad_norm: 0.9999994778436392, iteration: 14766
loss: 1.0256444215774536,grad_norm: 0.9999993233641289, iteration: 14767
loss: 1.042495846748352,grad_norm: 0.9795984138043509, iteration: 14768
loss: 1.054329514503479,grad_norm: 0.9999989842843873, iteration: 14769
loss: 1.0378104448318481,grad_norm: 0.9999993821141285, iteration: 14770
loss: 1.045836091041565,grad_norm: 0.9999990990926979, iteration: 14771
loss: 1.0518379211425781,grad_norm: 0.9999992675051352, iteration: 14772
loss: 1.0211355686187744,grad_norm: 0.9999991217797104, iteration: 14773
loss: 1.003880500793457,grad_norm: 0.9999993227128505, iteration: 14774
loss: 1.0395041704177856,grad_norm: 0.999999079572921, iteration: 14775
loss: 1.0256611108779907,grad_norm: 0.9999993007659548, iteration: 14776
loss: 0.9988807439804077,grad_norm: 0.9999991821407229, iteration: 14777
loss: 1.0179020166397095,grad_norm: 0.9999991370187793, iteration: 14778
loss: 1.099319338798523,grad_norm: 0.9999997454852495, iteration: 14779
loss: 0.9938784241676331,grad_norm: 0.9999991010544512, iteration: 14780
loss: 1.0379616022109985,grad_norm: 0.9999992894810719, iteration: 14781
loss: 1.0671583414077759,grad_norm: 0.9999991947615852, iteration: 14782
loss: 1.0537751913070679,grad_norm: 0.9999992857178492, iteration: 14783
loss: 0.9822075963020325,grad_norm: 0.9999991689135068, iteration: 14784
loss: 1.0547726154327393,grad_norm: 0.999999762280043, iteration: 14785
loss: 1.0589721202850342,grad_norm: 0.999999341456067, iteration: 14786
loss: 0.945236325263977,grad_norm: 0.9999992580095313, iteration: 14787
loss: 1.0584571361541748,grad_norm: 0.9999992701014956, iteration: 14788
loss: 1.0577492713928223,grad_norm: 0.920222711598176, iteration: 14789
loss: 1.041509747505188,grad_norm: 0.9999993926347702, iteration: 14790
loss: 1.007145881652832,grad_norm: 0.9999993679214659, iteration: 14791
loss: 0.9899303913116455,grad_norm: 0.972687017973795, iteration: 14792
loss: 1.0516842603683472,grad_norm: 0.9999997468952753, iteration: 14793
loss: 1.0765268802642822,grad_norm: 0.9999997471783628, iteration: 14794
loss: 1.0194154977798462,grad_norm: 0.9999993584325396, iteration: 14795
loss: 1.0241166353225708,grad_norm: 0.9999991736395908, iteration: 14796
loss: 0.9736482501029968,grad_norm: 0.999999087226985, iteration: 14797
loss: 1.0192869901657104,grad_norm: 0.9999993590319257, iteration: 14798
loss: 1.1986693143844604,grad_norm: 0.9999998041284769, iteration: 14799
loss: 1.0048917531967163,grad_norm: 0.9999990993321848, iteration: 14800
loss: 1.0521135330200195,grad_norm: 0.9999992434215328, iteration: 14801
loss: 1.0403800010681152,grad_norm: 0.9999992493268871, iteration: 14802
loss: 0.9887580275535583,grad_norm: 0.8881911158666919, iteration: 14803
loss: 1.0350390672683716,grad_norm: 0.9999992098008922, iteration: 14804
loss: 1.0522642135620117,grad_norm: 0.9999991122917244, iteration: 14805
loss: 1.0136890411376953,grad_norm: 0.9999992632459788, iteration: 14806
loss: 1.0398789644241333,grad_norm: 0.9999992051443387, iteration: 14807
loss: 1.0273959636688232,grad_norm: 0.9507416265110927, iteration: 14808
loss: 1.0466411113739014,grad_norm: 0.9999990600603836, iteration: 14809
loss: 1.0078752040863037,grad_norm: 0.99999945212728, iteration: 14810
loss: 1.036327600479126,grad_norm: 0.9999992243376635, iteration: 14811
loss: 1.024611234664917,grad_norm: 0.9999992772091744, iteration: 14812
loss: 1.0681934356689453,grad_norm: 0.9999992428827186, iteration: 14813
loss: 1.0236791372299194,grad_norm: 0.9999991111431991, iteration: 14814
loss: 0.9906421303749084,grad_norm: 0.9999991695417298, iteration: 14815
loss: 1.0167690515518188,grad_norm: 0.9999990449799419, iteration: 14816
loss: 1.0806193351745605,grad_norm: 0.9999997390045173, iteration: 14817
loss: 1.093021035194397,grad_norm: 0.9999993328201617, iteration: 14818
loss: 1.0034968852996826,grad_norm: 0.9999994325429118, iteration: 14819
loss: 1.0102012157440186,grad_norm: 0.9999992302378594, iteration: 14820
loss: 0.993564248085022,grad_norm: 0.9110351495478075, iteration: 14821
loss: 1.0110483169555664,grad_norm: 0.9999991781067822, iteration: 14822
loss: 1.0374749898910522,grad_norm: 0.9999993393453251, iteration: 14823
loss: 0.9651287794113159,grad_norm: 0.9999995011389587, iteration: 14824
loss: 1.0371936559677124,grad_norm: 0.999999338000298, iteration: 14825
loss: 1.0193655490875244,grad_norm: 0.9999992840307513, iteration: 14826
loss: 0.9914867877960205,grad_norm: 0.999999184708823, iteration: 14827
loss: 1.0537326335906982,grad_norm: 0.9999991395516349, iteration: 14828
loss: 1.0184862613677979,grad_norm: 0.9999994384547752, iteration: 14829
loss: 1.0580114126205444,grad_norm: 0.9999990887521883, iteration: 14830
loss: 0.9702408909797668,grad_norm: 0.9999992913627999, iteration: 14831
loss: 0.9990299940109253,grad_norm: 0.9999991492682405, iteration: 14832
loss: 0.974666953086853,grad_norm: 0.9999991559492121, iteration: 14833
loss: 0.9974908828735352,grad_norm: 0.9999992640973004, iteration: 14834
loss: 0.983630895614624,grad_norm: 0.9999990105526977, iteration: 14835
loss: 1.0423483848571777,grad_norm: 0.9999991203934752, iteration: 14836
loss: 0.9966666102409363,grad_norm: 0.9999995773798178, iteration: 14837
loss: 0.9881264567375183,grad_norm: 0.9999991423970839, iteration: 14838
loss: 1.0231977701187134,grad_norm: 0.9999990475701529, iteration: 14839
loss: 1.017411470413208,grad_norm: 0.9999992302740542, iteration: 14840
loss: 1.046648383140564,grad_norm: 0.9999989641417779, iteration: 14841
loss: 0.9998844265937805,grad_norm: 0.9999992185072935, iteration: 14842
loss: 1.1128309965133667,grad_norm: 0.9999996418570138, iteration: 14843
loss: 1.0491281747817993,grad_norm: 0.9999991853416947, iteration: 14844
loss: 0.9778419137001038,grad_norm: 0.9999992274666589, iteration: 14845
loss: 1.0167531967163086,grad_norm: 0.9999991541870746, iteration: 14846
loss: 1.0852274894714355,grad_norm: 0.9999991142550362, iteration: 14847
loss: 1.0277968645095825,grad_norm: 0.9999991180868898, iteration: 14848
loss: 1.042860746383667,grad_norm: 0.9999995689302258, iteration: 14849
loss: 1.0026092529296875,grad_norm: 0.9999992683189538, iteration: 14850
loss: 0.9906558394432068,grad_norm: 0.9999995403696477, iteration: 14851
loss: 1.059216022491455,grad_norm: 0.995608751422481, iteration: 14852
loss: 1.0124613046646118,grad_norm: 0.999999266838806, iteration: 14853
loss: 1.0336226224899292,grad_norm: 0.9999992608056497, iteration: 14854
loss: 1.0157041549682617,grad_norm: 0.9999993964555306, iteration: 14855
loss: 1.0040931701660156,grad_norm: 0.9130322022415199, iteration: 14856
loss: 1.0630683898925781,grad_norm: 0.9999994154871046, iteration: 14857
loss: 1.0267359018325806,grad_norm: 0.9999992865683469, iteration: 14858
loss: 1.0075956583023071,grad_norm: 0.9999992044424735, iteration: 14859
loss: 1.0273758172988892,grad_norm: 0.9999991070472362, iteration: 14860
loss: 1.1191644668579102,grad_norm: 0.9999996330865054, iteration: 14861
loss: 1.0465235710144043,grad_norm: 0.9999996137963016, iteration: 14862
loss: 0.9978818297386169,grad_norm: 0.9999992857391643, iteration: 14863
loss: 1.042470097541809,grad_norm: 0.9999993079459027, iteration: 14864
loss: 1.0401396751403809,grad_norm: 0.9999990942312379, iteration: 14865
loss: 1.0399415493011475,grad_norm: 0.9999992511347023, iteration: 14866
loss: 0.9886787533760071,grad_norm: 0.9999992849823965, iteration: 14867
loss: 1.0234370231628418,grad_norm: 0.9999991029299987, iteration: 14868
loss: 1.0977377891540527,grad_norm: 0.9999992036092935, iteration: 14869
loss: 1.057105541229248,grad_norm: 0.999999285328038, iteration: 14870
loss: 1.0792347192764282,grad_norm: 0.9999996263495563, iteration: 14871
loss: 1.0580902099609375,grad_norm: 0.9999995005923117, iteration: 14872
loss: 1.027776837348938,grad_norm: 0.9999994185761389, iteration: 14873
loss: 1.0374083518981934,grad_norm: 0.9779451221801598, iteration: 14874
loss: 1.07150137424469,grad_norm: 0.9999998047004727, iteration: 14875
loss: 1.0401432514190674,grad_norm: 0.9999990943447018, iteration: 14876
loss: 1.047945499420166,grad_norm: 0.9999991249806816, iteration: 14877
loss: 1.038695216178894,grad_norm: 0.9999992521389764, iteration: 14878
loss: 1.029981255531311,grad_norm: 0.9999994643576977, iteration: 14879
loss: 1.014841079711914,grad_norm: 0.9999992713772455, iteration: 14880
loss: 1.0207847356796265,grad_norm: 0.9918695795600941, iteration: 14881
loss: 1.026798963546753,grad_norm: 0.9999991373675854, iteration: 14882
loss: 0.9976097941398621,grad_norm: 0.9999991499464145, iteration: 14883
loss: 1.038690209388733,grad_norm: 0.999999160830492, iteration: 14884
loss: 1.0608497858047485,grad_norm: 0.9999993710630607, iteration: 14885
loss: 1.003055453300476,grad_norm: 0.9999992653194473, iteration: 14886
loss: 1.0396569967269897,grad_norm: 0.9940299446127648, iteration: 14887
loss: 1.026528000831604,grad_norm: 0.9999992860022978, iteration: 14888
loss: 1.0585194826126099,grad_norm: 0.9999992071453886, iteration: 14889
loss: 1.0299452543258667,grad_norm: 0.9999998538260875, iteration: 14890
loss: 1.0048376321792603,grad_norm: 0.9730657337568178, iteration: 14891
loss: 1.0061161518096924,grad_norm: 0.9999992460505998, iteration: 14892
loss: 1.0331146717071533,grad_norm: 0.9999992202977118, iteration: 14893
loss: 1.0395842790603638,grad_norm: 0.9999993259792049, iteration: 14894
loss: 1.0309250354766846,grad_norm: 0.9999991543576163, iteration: 14895
loss: 0.995535671710968,grad_norm: 0.9999994702278661, iteration: 14896
loss: 1.0156441926956177,grad_norm: 0.9999991333147878, iteration: 14897
loss: 1.0477019548416138,grad_norm: 0.999999285697308, iteration: 14898
loss: 1.072084903717041,grad_norm: 0.9999992109409911, iteration: 14899
loss: 1.0442174673080444,grad_norm: 0.99999974156191, iteration: 14900
loss: 1.0559927225112915,grad_norm: 0.9999997816686167, iteration: 14901
loss: 0.9942920804023743,grad_norm: 0.9999991265109691, iteration: 14902
loss: 1.0031278133392334,grad_norm: 0.9999991037554083, iteration: 14903
loss: 1.0284911394119263,grad_norm: 0.9999991578531157, iteration: 14904
loss: 0.9969671368598938,grad_norm: 0.9999992732231081, iteration: 14905
loss: 1.008271336555481,grad_norm: 0.9999990708551773, iteration: 14906
loss: 1.0345879793167114,grad_norm: 0.9999991420001305, iteration: 14907
loss: 1.0381369590759277,grad_norm: 0.9999990399310916, iteration: 14908
loss: 1.0240259170532227,grad_norm: 0.9999992305435225, iteration: 14909
loss: 1.009136438369751,grad_norm: 0.9999992045606276, iteration: 14910
loss: 1.048629641532898,grad_norm: 0.9999991150024171, iteration: 14911
loss: 1.0034961700439453,grad_norm: 0.9999991462657193, iteration: 14912
loss: 0.9977912306785583,grad_norm: 0.9999991510909896, iteration: 14913
loss: 1.0596599578857422,grad_norm: 0.9999995583310572, iteration: 14914
loss: 1.026343822479248,grad_norm: 0.9999994713467861, iteration: 14915
loss: 1.04277765750885,grad_norm: 0.9999990081505132, iteration: 14916
loss: 0.9884016513824463,grad_norm: 0.9999991117654219, iteration: 14917
loss: 1.0605112314224243,grad_norm: 0.9999995929542672, iteration: 14918
loss: 1.026228666305542,grad_norm: 0.999999100280556, iteration: 14919
loss: 1.0823713541030884,grad_norm: 0.9999994102822953, iteration: 14920
loss: 1.0666834115982056,grad_norm: 0.999999627498722, iteration: 14921
loss: 0.9836585521697998,grad_norm: 0.9999990300509591, iteration: 14922
loss: 1.0026253461837769,grad_norm: 0.9999991992014051, iteration: 14923
loss: 1.0529786348342896,grad_norm: 0.9999992189035363, iteration: 14924
loss: 1.0166230201721191,grad_norm: 0.9999992094406119, iteration: 14925
loss: 1.0022302865982056,grad_norm: 0.9999992470336523, iteration: 14926
loss: 1.032576322555542,grad_norm: 0.9999993581450812, iteration: 14927
loss: 1.0698051452636719,grad_norm: 0.9999993320913774, iteration: 14928
loss: 1.010529637336731,grad_norm: 0.9999993295217539, iteration: 14929
loss: 0.9789713025093079,grad_norm: 0.9999992570025488, iteration: 14930
loss: 1.0165117979049683,grad_norm: 0.9278265801190161, iteration: 14931
loss: 1.0168442726135254,grad_norm: 0.9999992776126324, iteration: 14932
loss: 0.9921427369117737,grad_norm: 0.9564682473703732, iteration: 14933
loss: 1.0032498836517334,grad_norm: 0.9999992063990457, iteration: 14934
loss: 0.9968302249908447,grad_norm: 0.999999184566691, iteration: 14935
loss: 1.0227091312408447,grad_norm: 0.9999992556492677, iteration: 14936
loss: 1.0151019096374512,grad_norm: 0.9943314979628434, iteration: 14937
loss: 1.0855414867401123,grad_norm: 0.9999991588877819, iteration: 14938
loss: 0.9883387088775635,grad_norm: 0.9999992484856457, iteration: 14939
loss: 1.0506240129470825,grad_norm: 0.9999991628926749, iteration: 14940
loss: 0.9762305617332458,grad_norm: 0.9999992037505265, iteration: 14941
loss: 1.0568177700042725,grad_norm: 0.9999991345246726, iteration: 14942
loss: 1.0045865774154663,grad_norm: 0.999999086170255, iteration: 14943
loss: 0.9756879210472107,grad_norm: 0.999999252688134, iteration: 14944
loss: 1.008336067199707,grad_norm: 0.9999991762181647, iteration: 14945
loss: 0.9988534450531006,grad_norm: 0.9999993089540588, iteration: 14946
loss: 1.0260496139526367,grad_norm: 0.9999989665849518, iteration: 14947
loss: 1.0320744514465332,grad_norm: 0.9999989975603275, iteration: 14948
loss: 1.0056520700454712,grad_norm: 0.9999991035705311, iteration: 14949
loss: 1.0172479152679443,grad_norm: 0.9999991725444893, iteration: 14950
loss: 0.9589863419532776,grad_norm: 0.9999992697170893, iteration: 14951
loss: 1.0042126178741455,grad_norm: 0.9999990807334685, iteration: 14952
loss: 1.020449161529541,grad_norm: 0.9999991995140588, iteration: 14953
loss: 0.9926288723945618,grad_norm: 0.999999651365966, iteration: 14954
loss: 1.0367047786712646,grad_norm: 0.9999991903771472, iteration: 14955
loss: 1.0314127206802368,grad_norm: 0.9999991616938508, iteration: 14956
loss: 1.033825397491455,grad_norm: 0.999999292308498, iteration: 14957
loss: 1.0010319948196411,grad_norm: 0.9999990980955112, iteration: 14958
loss: 1.0475488901138306,grad_norm: 0.8930709605315941, iteration: 14959
loss: 1.0411949157714844,grad_norm: 0.9999992998374881, iteration: 14960
loss: 1.020928144454956,grad_norm: 0.9999991926505759, iteration: 14961
loss: 1.0458526611328125,grad_norm: 0.9152806612667203, iteration: 14962
loss: 1.0102593898773193,grad_norm: 0.9999993296942917, iteration: 14963
loss: 1.0264091491699219,grad_norm: 0.9999990158057448, iteration: 14964
loss: 1.02971613407135,grad_norm: 0.8396486736281558, iteration: 14965
loss: 1.0615732669830322,grad_norm: 0.999999554389022, iteration: 14966
loss: 1.0000264644622803,grad_norm: 0.9999992567534021, iteration: 14967
loss: 1.037369728088379,grad_norm: 0.9999992346424458, iteration: 14968
loss: 1.049363136291504,grad_norm: 0.9999993236705653, iteration: 14969
loss: 1.014762043952942,grad_norm: 0.9999990579776303, iteration: 14970
loss: 1.0373783111572266,grad_norm: 0.9999991138957215, iteration: 14971
loss: 1.0012388229370117,grad_norm: 0.9999992517582613, iteration: 14972
loss: 1.043860912322998,grad_norm: 0.9999990853863324, iteration: 14973
loss: 1.0209195613861084,grad_norm: 0.999999320300985, iteration: 14974
loss: 1.0500468015670776,grad_norm: 0.9999992398916926, iteration: 14975
loss: 1.0038713216781616,grad_norm: 0.9999992634010948, iteration: 14976
loss: 1.029966950416565,grad_norm: 0.9999992172826839, iteration: 14977
loss: 1.0327197313308716,grad_norm: 0.9111043904707136, iteration: 14978
loss: 1.012533187866211,grad_norm: 0.9999990790697503, iteration: 14979
loss: 1.0033602714538574,grad_norm: 0.9992528112859189, iteration: 14980
loss: 1.0508904457092285,grad_norm: 0.9681484512832539, iteration: 14981
loss: 1.0183850526809692,grad_norm: 0.9999990937282861, iteration: 14982
loss: 0.9926730990409851,grad_norm: 0.9999991505394147, iteration: 14983
loss: 1.0460562705993652,grad_norm: 0.9999993419868466, iteration: 14984
loss: 1.0244587659835815,grad_norm: 0.9999993034656108, iteration: 14985
loss: 1.0445353984832764,grad_norm: 0.9999990585767926, iteration: 14986
loss: 0.997075617313385,grad_norm: 0.9999991094375157, iteration: 14987
loss: 1.0470919609069824,grad_norm: 0.9999992865934013, iteration: 14988
loss: 0.9614989161491394,grad_norm: 0.9999990044706609, iteration: 14989
loss: 1.0498064756393433,grad_norm: 0.9999992014122433, iteration: 14990
loss: 1.0405761003494263,grad_norm: 0.9999991097407691, iteration: 14991
loss: 1.0687580108642578,grad_norm: 0.9999998374522456, iteration: 14992
loss: 1.0364153385162354,grad_norm: 0.9999991623597398, iteration: 14993
loss: 1.0223639011383057,grad_norm: 0.9999990308227583, iteration: 14994
loss: 0.9952003955841064,grad_norm: 0.9999992261811037, iteration: 14995
loss: 0.9870335459709167,grad_norm: 0.999999264589505, iteration: 14996
loss: 1.0549712181091309,grad_norm: 0.9999991558283617, iteration: 14997
loss: 1.0387670993804932,grad_norm: 0.9999993396709509, iteration: 14998
loss: 1.0704982280731201,grad_norm: 0.9999991659779836, iteration: 14999
loss: 1.0259352922439575,grad_norm: 0.9999990338566372, iteration: 15000
loss: 1.013048768043518,grad_norm: 0.9999992419085926, iteration: 15001
loss: 1.0241115093231201,grad_norm: 0.9999990723136779, iteration: 15002
loss: 1.0789769887924194,grad_norm: 0.9999994000188521, iteration: 15003
loss: 0.9926087856292725,grad_norm: 0.9999992282675663, iteration: 15004
loss: 1.01435124874115,grad_norm: 0.9999993428759654, iteration: 15005
loss: 1.0114970207214355,grad_norm: 0.9999994030161302, iteration: 15006
loss: 1.000329852104187,grad_norm: 0.9999991255961045, iteration: 15007
loss: 1.0320793390274048,grad_norm: 0.9777968294517069, iteration: 15008
loss: 1.0014408826828003,grad_norm: 0.9999993889872327, iteration: 15009
loss: 1.0496186017990112,grad_norm: 0.9975413192460976, iteration: 15010
loss: 1.0361179113388062,grad_norm: 0.9007548102064394, iteration: 15011
loss: 1.031959891319275,grad_norm: 0.9999993418083384, iteration: 15012
loss: 1.033531904220581,grad_norm: 0.9999989405234159, iteration: 15013
loss: 0.9693929553031921,grad_norm: 0.9999992965916725, iteration: 15014
loss: 1.0413085222244263,grad_norm: 0.9999990348445533, iteration: 15015
loss: 1.0606037378311157,grad_norm: 0.8972427010434377, iteration: 15016
loss: 1.0432215929031372,grad_norm: 0.9999992714127046, iteration: 15017
loss: 0.9944654107093811,grad_norm: 0.9999992291563755, iteration: 15018
loss: 1.0241495370864868,grad_norm: 0.9999993115463625, iteration: 15019
loss: 0.9973426461219788,grad_norm: 0.9999992916363024, iteration: 15020
loss: 1.0496888160705566,grad_norm: 0.9807416871265793, iteration: 15021
loss: 1.0124199390411377,grad_norm: 0.7992148115810157, iteration: 15022
loss: 1.0412838459014893,grad_norm: 0.9999992554056414, iteration: 15023
loss: 0.9532725214958191,grad_norm: 0.8732883150545412, iteration: 15024
loss: 0.9993613362312317,grad_norm: 0.9999990094138207, iteration: 15025
loss: 1.030076026916504,grad_norm: 0.9999993690327091, iteration: 15026
loss: 1.0217026472091675,grad_norm: 0.999999061474707, iteration: 15027
loss: 0.9937939047813416,grad_norm: 0.9839997212653083, iteration: 15028
loss: 1.0568697452545166,grad_norm: 0.999999555807466, iteration: 15029
loss: 1.0760527849197388,grad_norm: 0.9999993887229341, iteration: 15030
loss: 1.075123906135559,grad_norm: 0.9999992881865186, iteration: 15031
loss: 1.0139561891555786,grad_norm: 0.9999992393260003, iteration: 15032
loss: 0.9736006259918213,grad_norm: 0.9999992276721312, iteration: 15033
loss: 1.027904987335205,grad_norm: 0.9999992271983524, iteration: 15034
loss: 1.0517548322677612,grad_norm: 0.9999993973248804, iteration: 15035
loss: 1.0259473323822021,grad_norm: 0.9999991293250176, iteration: 15036
loss: 1.0367910861968994,grad_norm: 0.9999991989413289, iteration: 15037
loss: 1.0340322256088257,grad_norm: 0.9999993658889063, iteration: 15038
loss: 1.0089950561523438,grad_norm: 0.9999992216876764, iteration: 15039
loss: 1.0491793155670166,grad_norm: 0.9999997285826397, iteration: 15040
loss: 0.9907615184783936,grad_norm: 0.999999144014402, iteration: 15041
loss: 1.0306051969528198,grad_norm: 0.999999235667659, iteration: 15042
loss: 1.0456961393356323,grad_norm: 0.999999329936598, iteration: 15043
loss: 1.0220789909362793,grad_norm: 0.999999165212028, iteration: 15044
loss: 1.0255399942398071,grad_norm: 0.9999990903396998, iteration: 15045
loss: 1.0467126369476318,grad_norm: 0.9999993394729985, iteration: 15046
loss: 1.012283205986023,grad_norm: 0.9373253067494904, iteration: 15047
loss: 1.037824034690857,grad_norm: 0.9999992845672924, iteration: 15048
loss: 0.9669971466064453,grad_norm: 0.9999990759493125, iteration: 15049
loss: 1.0429068803787231,grad_norm: 0.9999992443778455, iteration: 15050
loss: 1.0227468013763428,grad_norm: 0.9999990358022504, iteration: 15051
loss: 1.0175800323486328,grad_norm: 0.9999990573400249, iteration: 15052
loss: 1.0236786603927612,grad_norm: 0.9999990882542977, iteration: 15053
loss: 1.0332865715026855,grad_norm: 0.9999996716066918, iteration: 15054
loss: 1.0159460306167603,grad_norm: 0.999999109350677, iteration: 15055
loss: 0.9747719168663025,grad_norm: 0.9999990090847035, iteration: 15056
loss: 1.011122226715088,grad_norm: 0.9999992780319109, iteration: 15057
loss: 1.0447273254394531,grad_norm: 0.999999251380856, iteration: 15058
loss: 0.9918314218521118,grad_norm: 0.9999993492533049, iteration: 15059
loss: 1.0264521837234497,grad_norm: 0.9999992728627064, iteration: 15060
loss: 0.998968243598938,grad_norm: 0.9999991711110308, iteration: 15061
loss: 1.0621719360351562,grad_norm: 0.999999061508427, iteration: 15062
loss: 1.0326155424118042,grad_norm: 0.999999162911545, iteration: 15063
loss: 1.0348546504974365,grad_norm: 0.9999996405761239, iteration: 15064
loss: 0.9844735264778137,grad_norm: 0.9999993049339702, iteration: 15065
loss: 1.0734834671020508,grad_norm: 0.999999521685958, iteration: 15066
loss: 1.0590848922729492,grad_norm: 0.965675552394419, iteration: 15067
loss: 1.0443717241287231,grad_norm: 0.9999995182895512, iteration: 15068
loss: 1.034688949584961,grad_norm: 0.9999991464281568, iteration: 15069
loss: 1.0518282651901245,grad_norm: 0.9999993714239253, iteration: 15070
loss: 1.0335805416107178,grad_norm: 0.9999990176508713, iteration: 15071
loss: 1.056254267692566,grad_norm: 0.999999344902182, iteration: 15072
loss: 1.004830002784729,grad_norm: 0.999999136515747, iteration: 15073
loss: 1.0081787109375,grad_norm: 0.9850033123871533, iteration: 15074
loss: 1.0305036306381226,grad_norm: 0.9999989875477235, iteration: 15075
loss: 1.0349045991897583,grad_norm: 0.99999926900159, iteration: 15076
loss: 1.042388916015625,grad_norm: 0.9999992045991568, iteration: 15077
loss: 0.9846320748329163,grad_norm: 0.9999991406835492, iteration: 15078
loss: 1.0421230792999268,grad_norm: 0.9999995330805372, iteration: 15079
loss: 1.0091384649276733,grad_norm: 0.9999990659020981, iteration: 15080
loss: 1.0625503063201904,grad_norm: 0.9999995688231664, iteration: 15081
loss: 1.0586642026901245,grad_norm: 0.9999996300475996, iteration: 15082
loss: 1.0212150812149048,grad_norm: 0.9999993439159794, iteration: 15083
loss: 1.0421838760375977,grad_norm: 0.9999995633347174, iteration: 15084
loss: 1.035865306854248,grad_norm: 0.9619033822721478, iteration: 15085
loss: 1.039905309677124,grad_norm: 0.999999067643565, iteration: 15086
loss: 1.0369396209716797,grad_norm: 0.999999283223397, iteration: 15087
loss: 1.0591884851455688,grad_norm: 0.9999992280678539, iteration: 15088
loss: 0.9998404383659363,grad_norm: 0.9999994296970051, iteration: 15089
loss: 1.0351414680480957,grad_norm: 0.9886276797816775, iteration: 15090
loss: 1.011842966079712,grad_norm: 0.9999996074483631, iteration: 15091
loss: 1.0378365516662598,grad_norm: 0.9999993493981079, iteration: 15092
loss: 1.0205491781234741,grad_norm: 0.9999990294233724, iteration: 15093
loss: 1.0392426252365112,grad_norm: 0.9999992053080151, iteration: 15094
loss: 0.9979143142700195,grad_norm: 0.9999991638019604, iteration: 15095
loss: 1.0082528591156006,grad_norm: 0.9999991960512942, iteration: 15096
loss: 0.9998628497123718,grad_norm: 0.9999991991960778, iteration: 15097
loss: 1.0036921501159668,grad_norm: 0.999999033499569, iteration: 15098
loss: 1.0509843826293945,grad_norm: 0.9999995350535739, iteration: 15099
loss: 1.036840796470642,grad_norm: 0.9999991941916504, iteration: 15100
loss: 0.9931268095970154,grad_norm: 0.999999198476855, iteration: 15101
loss: 1.0220750570297241,grad_norm: 0.9999990934701324, iteration: 15102
loss: 0.9903895854949951,grad_norm: 0.9654778290543541, iteration: 15103
loss: 1.0006868839263916,grad_norm: 0.9999991448975161, iteration: 15104
loss: 1.0235192775726318,grad_norm: 0.9999992741869935, iteration: 15105
loss: 1.0295730829238892,grad_norm: 0.9999994074294443, iteration: 15106
loss: 1.0455033779144287,grad_norm: 0.9999993017284694, iteration: 15107
loss: 1.0398131608963013,grad_norm: 0.999999344527739, iteration: 15108
loss: 1.0332117080688477,grad_norm: 0.9999992200302715, iteration: 15109
loss: 1.0055112838745117,grad_norm: 0.9999993334382612, iteration: 15110
loss: 1.053787350654602,grad_norm: 0.9999991045365478, iteration: 15111
loss: 1.068706750869751,grad_norm: 0.9999992998409455, iteration: 15112
loss: 1.0568946599960327,grad_norm: 0.9999996857770982, iteration: 15113
loss: 1.0453815460205078,grad_norm: 0.9999992926339899, iteration: 15114
loss: 1.0392874479293823,grad_norm: 0.9999993123608665, iteration: 15115
loss: 1.0040854215621948,grad_norm: 0.9999992484002027, iteration: 15116
loss: 0.9852091073989868,grad_norm: 0.9999992303556205, iteration: 15117
loss: 1.0268197059631348,grad_norm: 0.9999990011243821, iteration: 15118
loss: 1.053972601890564,grad_norm: 0.9999991298814921, iteration: 15119
loss: 1.0097262859344482,grad_norm: 0.9999993169725784, iteration: 15120
loss: 1.0418139696121216,grad_norm: 0.9999993185646109, iteration: 15121
loss: 1.0055686235427856,grad_norm: 0.9999992674429662, iteration: 15122
loss: 1.0210483074188232,grad_norm: 0.9999993949278447, iteration: 15123
loss: 0.9902868270874023,grad_norm: 0.9999991661649293, iteration: 15124
loss: 1.0012304782867432,grad_norm: 0.9999992058467865, iteration: 15125
loss: 0.9929640293121338,grad_norm: 0.9864908114114532, iteration: 15126
loss: 1.0220228433609009,grad_norm: 0.9161893673127716, iteration: 15127
loss: 1.0504279136657715,grad_norm: 0.9999995665734573, iteration: 15128
loss: 1.015214443206787,grad_norm: 0.9999993592454478, iteration: 15129
loss: 0.9936156272888184,grad_norm: 0.9999992962373293, iteration: 15130
loss: 1.0123729705810547,grad_norm: 0.9999991313463328, iteration: 15131
loss: 1.0157179832458496,grad_norm: 0.9999992774914227, iteration: 15132
loss: 1.0285221338272095,grad_norm: 0.9999992268002132, iteration: 15133
loss: 1.0773035287857056,grad_norm: 0.9999993044490151, iteration: 15134
loss: 1.0646735429763794,grad_norm: 0.999999254350402, iteration: 15135
loss: 1.043448805809021,grad_norm: 0.999999153870075, iteration: 15136
loss: 1.0000189542770386,grad_norm: 0.9999990696146254, iteration: 15137
loss: 0.9954427480697632,grad_norm: 0.9999993422184458, iteration: 15138
loss: 0.9499191641807556,grad_norm: 0.9999991950929028, iteration: 15139
loss: 1.0150141716003418,grad_norm: 0.9999990696778558, iteration: 15140
loss: 1.0141706466674805,grad_norm: 0.9999992435683562, iteration: 15141
loss: 1.0272319316864014,grad_norm: 0.9999992223614697, iteration: 15142
loss: 1.0399919748306274,grad_norm: 0.9999994570722522, iteration: 15143
loss: 1.0534985065460205,grad_norm: 0.9999991947798867, iteration: 15144
loss: 0.9802743792533875,grad_norm: 0.9999991279097953, iteration: 15145
loss: 0.9925664663314819,grad_norm: 0.9999993383229, iteration: 15146
loss: 1.0202003717422485,grad_norm: 0.9999991116323452, iteration: 15147
loss: 0.9845104813575745,grad_norm: 0.992951123862256, iteration: 15148
loss: 1.0086827278137207,grad_norm: 0.9999991391954575, iteration: 15149
loss: 0.9955621957778931,grad_norm: 0.999999162587362, iteration: 15150
loss: 1.0004792213439941,grad_norm: 0.999999064491049, iteration: 15151
loss: 0.9962448477745056,grad_norm: 0.9948152663997293, iteration: 15152
loss: 1.0296828746795654,grad_norm: 0.999999242732984, iteration: 15153
loss: 0.9900110363960266,grad_norm: 0.9999990503512903, iteration: 15154
loss: 1.0048140287399292,grad_norm: 0.999999303778184, iteration: 15155
loss: 1.0042777061462402,grad_norm: 0.9999990875902927, iteration: 15156
loss: 0.9957873225212097,grad_norm: 0.8549491210063811, iteration: 15157
loss: 1.0128659009933472,grad_norm: 0.9734954604386928, iteration: 15158
loss: 1.0081787109375,grad_norm: 0.9999991963088708, iteration: 15159
loss: 1.0520048141479492,grad_norm: 0.9999995861607515, iteration: 15160
loss: 0.9966996908187866,grad_norm: 0.9999990820402876, iteration: 15161
loss: 0.9794756174087524,grad_norm: 0.9999991939958426, iteration: 15162
loss: 0.9877414107322693,grad_norm: 0.9999991852179815, iteration: 15163
loss: 1.0397241115570068,grad_norm: 0.9999992469916505, iteration: 15164
loss: 1.0363754034042358,grad_norm: 0.9999991924763746, iteration: 15165
loss: 1.073129653930664,grad_norm: 0.8580160739634625, iteration: 15166
loss: 1.0584194660186768,grad_norm: 0.9999992002062782, iteration: 15167
loss: 1.0559403896331787,grad_norm: 0.9999995219927033, iteration: 15168
loss: 1.0470389127731323,grad_norm: 0.9999990749858735, iteration: 15169
loss: 1.0260004997253418,grad_norm: 0.9999991460155638, iteration: 15170
loss: 1.0442813634872437,grad_norm: 0.9999995852112277, iteration: 15171
loss: 1.0189303159713745,grad_norm: 0.9999989904481074, iteration: 15172
loss: 1.0212774276733398,grad_norm: 0.9999992530362241, iteration: 15173
loss: 1.0019787549972534,grad_norm: 0.9819340764479368, iteration: 15174
loss: 1.0030385255813599,grad_norm: 0.9999991965154512, iteration: 15175
loss: 1.0316574573516846,grad_norm: 0.9999991380904284, iteration: 15176
loss: 1.0326911211013794,grad_norm: 0.9999990105517099, iteration: 15177
loss: 0.9923709630966187,grad_norm: 0.999999209055534, iteration: 15178
loss: 1.0419777631759644,grad_norm: 0.9999989917272418, iteration: 15179
loss: 1.0529018640518188,grad_norm: 0.9999992144620333, iteration: 15180
loss: 1.0339926481246948,grad_norm: 0.9999991468249939, iteration: 15181
loss: 1.0217783451080322,grad_norm: 0.9999992908625006, iteration: 15182
loss: 0.9831082820892334,grad_norm: 0.9658375583175542, iteration: 15183
loss: 1.046210527420044,grad_norm: 0.9999990666761502, iteration: 15184
loss: 0.980702817440033,grad_norm: 0.9999991664561038, iteration: 15185
loss: 1.0005607604980469,grad_norm: 0.9136856245328274, iteration: 15186
loss: 1.045582890510559,grad_norm: 0.9999999042373097, iteration: 15187
loss: 1.021176815032959,grad_norm: 0.9999991987856982, iteration: 15188
loss: 1.0322843790054321,grad_norm: 0.9999990641403104, iteration: 15189
loss: 1.030190348625183,grad_norm: 0.9999993155580073, iteration: 15190
loss: 0.9992038011550903,grad_norm: 0.9999990372014924, iteration: 15191
loss: 0.9894766211509705,grad_norm: 0.9999992104024805, iteration: 15192
loss: 1.0244044065475464,grad_norm: 0.9999991426468817, iteration: 15193
loss: 1.101028561592102,grad_norm: 0.9999996606705432, iteration: 15194
loss: 1.0326197147369385,grad_norm: 0.9999997207873329, iteration: 15195
loss: 1.0022426843643188,grad_norm: 0.9999992317279036, iteration: 15196
loss: 1.0108627080917358,grad_norm: 0.9999994716916965, iteration: 15197
loss: 1.0355359315872192,grad_norm: 0.999999097942458, iteration: 15198
loss: 1.001800537109375,grad_norm: 0.9999990324724222, iteration: 15199
loss: 1.0043034553527832,grad_norm: 0.9697496446616282, iteration: 15200
loss: 1.0182665586471558,grad_norm: 0.9999992769329183, iteration: 15201
loss: 1.0024240016937256,grad_norm: 0.9999991980225862, iteration: 15202
loss: 1.0449351072311401,grad_norm: 0.9999997357414577, iteration: 15203
loss: 0.9673632383346558,grad_norm: 0.9999991589720464, iteration: 15204
loss: 1.0177016258239746,grad_norm: 0.9999991489163274, iteration: 15205
loss: 0.9931190013885498,grad_norm: 0.9999991940599993, iteration: 15206
loss: 0.9775522351264954,grad_norm: 0.9999993729467153, iteration: 15207
loss: 0.980353832244873,grad_norm: 0.9999991263951832, iteration: 15208
loss: 1.0093374252319336,grad_norm: 0.9999991755526675, iteration: 15209
loss: 1.044215440750122,grad_norm: 0.9999990975673134, iteration: 15210
loss: 1.01390540599823,grad_norm: 0.9551944671752597, iteration: 15211
loss: 1.0553685426712036,grad_norm: 0.9999992224291713, iteration: 15212
loss: 1.018239140510559,grad_norm: 0.999999242325045, iteration: 15213
loss: 0.9837245941162109,grad_norm: 0.971901408122097, iteration: 15214
loss: 0.9752183556556702,grad_norm: 0.8559079283117252, iteration: 15215
loss: 1.0377717018127441,grad_norm: 0.9999990057225818, iteration: 15216
loss: 1.061425805091858,grad_norm: 0.9999990992255713, iteration: 15217
loss: 1.0020942687988281,grad_norm: 0.9535913895127637, iteration: 15218
loss: 1.0128881931304932,grad_norm: 0.9999991507228737, iteration: 15219
loss: 0.9952993988990784,grad_norm: 0.9999991535800627, iteration: 15220
loss: 1.0376439094543457,grad_norm: 0.9999990230365698, iteration: 15221
loss: 1.0707167387008667,grad_norm: 0.9999994309107983, iteration: 15222
loss: 0.9746735095977783,grad_norm: 0.8841023159076302, iteration: 15223
loss: 1.0523127317428589,grad_norm: 0.999999114054908, iteration: 15224
loss: 1.0876412391662598,grad_norm: 0.9999994368506946, iteration: 15225
loss: 1.0282349586486816,grad_norm: 0.9999990434078653, iteration: 15226
loss: 1.047995924949646,grad_norm: 0.9999993016150598, iteration: 15227
loss: 1.0117921829223633,grad_norm: 0.9999992399697767, iteration: 15228
loss: 1.0574958324432373,grad_norm: 0.9983963061302675, iteration: 15229
loss: 1.0742216110229492,grad_norm: 0.9999993224505722, iteration: 15230
loss: 1.0262371301651,grad_norm: 0.9999991448647797, iteration: 15231
loss: 0.9953582286834717,grad_norm: 0.9999992060722848, iteration: 15232
loss: 1.0489492416381836,grad_norm: 0.9999992479268082, iteration: 15233
loss: 1.0630035400390625,grad_norm: 0.9999994105303699, iteration: 15234
loss: 1.0588639974594116,grad_norm: 0.9999992990882801, iteration: 15235
loss: 0.987525999546051,grad_norm: 0.999999370918637, iteration: 15236
loss: 0.9681372046470642,grad_norm: 0.8483073688241606, iteration: 15237
loss: 1.0056612491607666,grad_norm: 0.999999227060466, iteration: 15238
loss: 1.0181455612182617,grad_norm: 0.9999993050836773, iteration: 15239
loss: 1.019307017326355,grad_norm: 0.9999991126069894, iteration: 15240
loss: 1.0134183168411255,grad_norm: 0.9999990903551221, iteration: 15241
loss: 1.0512421131134033,grad_norm: 0.999999036722502, iteration: 15242
loss: 1.012103796005249,grad_norm: 0.9999991752838339, iteration: 15243
loss: 1.0385891199111938,grad_norm: 0.999999130867488, iteration: 15244
loss: 1.069822907447815,grad_norm: 0.9999993893494002, iteration: 15245
loss: 1.0341475009918213,grad_norm: 0.9999992260054519, iteration: 15246
loss: 1.0173749923706055,grad_norm: 0.9806067230114225, iteration: 15247
loss: 1.0358017683029175,grad_norm: 0.9999993555719651, iteration: 15248
loss: 1.005623698234558,grad_norm: 0.8621054295780133, iteration: 15249
loss: 1.0261645317077637,grad_norm: 0.9999992257619681, iteration: 15250
loss: 1.0190355777740479,grad_norm: 0.9999992396622241, iteration: 15251
loss: 1.0590649843215942,grad_norm: 0.9999991797723828, iteration: 15252
loss: 1.03245210647583,grad_norm: 0.999999374494257, iteration: 15253
loss: 1.0267413854599,grad_norm: 0.9999992324885469, iteration: 15254
loss: 1.0107605457305908,grad_norm: 0.9999990780175959, iteration: 15255
loss: 1.0767861604690552,grad_norm: 0.9999993157794715, iteration: 15256
loss: 0.9981176257133484,grad_norm: 0.999999008412511, iteration: 15257
loss: 1.02975332736969,grad_norm: 0.9999991003513329, iteration: 15258
loss: 1.010996699333191,grad_norm: 0.9999992757039698, iteration: 15259
loss: 1.0182278156280518,grad_norm: 0.9999994598633106, iteration: 15260
loss: 0.9692784547805786,grad_norm: 0.9999991490875897, iteration: 15261
loss: 0.9397933483123779,grad_norm: 0.9999992259123291, iteration: 15262
loss: 0.9849843978881836,grad_norm: 0.9810194841734263, iteration: 15263
loss: 0.9463851451873779,grad_norm: 0.9999990381685104, iteration: 15264
loss: 1.0514729022979736,grad_norm: 0.9999990792353417, iteration: 15265
loss: 1.0427206754684448,grad_norm: 0.9552491161981578, iteration: 15266
loss: 1.0675323009490967,grad_norm: 0.9999993717398447, iteration: 15267
loss: 1.0191209316253662,grad_norm: 0.953433631297275, iteration: 15268
loss: 0.9872210621833801,grad_norm: 0.9999992873541109, iteration: 15269
loss: 1.011676549911499,grad_norm: 0.9999992198916589, iteration: 15270
loss: 1.0022218227386475,grad_norm: 0.9999994369676938, iteration: 15271
loss: 1.0124422311782837,grad_norm: 0.9964190685117371, iteration: 15272
loss: 1.0119659900665283,grad_norm: 0.999999599412867, iteration: 15273
loss: 1.0056300163269043,grad_norm: 0.9999991804770166, iteration: 15274
loss: 1.033931016921997,grad_norm: 0.9999993888566358, iteration: 15275
loss: 1.0540151596069336,grad_norm: 0.9999991838277794, iteration: 15276
loss: 1.0186150074005127,grad_norm: 0.999999252523131, iteration: 15277
loss: 1.0073314905166626,grad_norm: 0.9999992375182138, iteration: 15278
loss: 1.0178205966949463,grad_norm: 0.9999992557326265, iteration: 15279
loss: 0.979619562625885,grad_norm: 0.9999991509026027, iteration: 15280
loss: 1.0298625230789185,grad_norm: 0.9999990292489963, iteration: 15281
loss: 1.0165133476257324,grad_norm: 0.9999990748819686, iteration: 15282
loss: 1.033672571182251,grad_norm: 0.9937588654533439, iteration: 15283
loss: 0.9970198273658752,grad_norm: 0.9999990540410143, iteration: 15284
loss: 1.0391758680343628,grad_norm: 0.9999990463216922, iteration: 15285
loss: 1.0067296028137207,grad_norm: 0.9999991201805853, iteration: 15286
loss: 1.0059349536895752,grad_norm: 0.9913140511003199, iteration: 15287
loss: 1.0081236362457275,grad_norm: 0.9999991102731196, iteration: 15288
loss: 0.9864705204963684,grad_norm: 0.9999993441183206, iteration: 15289
loss: 1.0525394678115845,grad_norm: 0.999999432818472, iteration: 15290
loss: 0.9694452881813049,grad_norm: 0.9999992436656213, iteration: 15291
loss: 0.9932260513305664,grad_norm: 0.9999991976312541, iteration: 15292
loss: 0.9877176284790039,grad_norm: 0.9999991748797791, iteration: 15293
loss: 1.0176639556884766,grad_norm: 0.9999994076887525, iteration: 15294
loss: 1.0523773431777954,grad_norm: 0.9999993041750802, iteration: 15295
loss: 1.041029691696167,grad_norm: 0.9999996066750172, iteration: 15296
loss: 1.033933162689209,grad_norm: 0.9999991784621767, iteration: 15297
loss: 0.9855506420135498,grad_norm: 0.9999992246292665, iteration: 15298
loss: 0.9641461968421936,grad_norm: 0.9991305700195249, iteration: 15299
loss: 1.0247491598129272,grad_norm: 0.9999990466267061, iteration: 15300
loss: 1.0481101274490356,grad_norm: 0.999999535016686, iteration: 15301
loss: 1.0096724033355713,grad_norm: 0.9999991303217602, iteration: 15302
loss: 1.019555687904358,grad_norm: 0.9999991236627457, iteration: 15303
loss: 1.0075186491012573,grad_norm: 0.9999991388125395, iteration: 15304
loss: 1.0182923078536987,grad_norm: 0.9999991838034694, iteration: 15305
loss: 1.0167828798294067,grad_norm: 0.9999991114637345, iteration: 15306
loss: 1.0122452974319458,grad_norm: 0.9999994140864226, iteration: 15307
loss: 0.9893612265586853,grad_norm: 0.9999991507603245, iteration: 15308
loss: 1.0065549612045288,grad_norm: 0.9999990900312463, iteration: 15309
loss: 1.0603902339935303,grad_norm: 0.9999993580145358, iteration: 15310
loss: 1.0325169563293457,grad_norm: 0.9999989903307214, iteration: 15311
loss: 0.9913666248321533,grad_norm: 0.999999292055429, iteration: 15312
loss: 1.0492950677871704,grad_norm: 0.9999993722029958, iteration: 15313
loss: 0.9915793538093567,grad_norm: 0.9999996018351717, iteration: 15314
loss: 0.9834445714950562,grad_norm: 0.9999991206344903, iteration: 15315
loss: 1.0456632375717163,grad_norm: 0.9999992833113087, iteration: 15316
loss: 0.990366518497467,grad_norm: 0.9999992310642779, iteration: 15317
loss: 1.0327832698822021,grad_norm: 0.9999992168277366, iteration: 15318
loss: 0.9858250021934509,grad_norm: 0.9999991645131215, iteration: 15319
loss: 1.0861411094665527,grad_norm: 0.9999992865484973, iteration: 15320
loss: 0.9889334440231323,grad_norm: 0.9999992629367869, iteration: 15321
loss: 0.9757626056671143,grad_norm: 0.9999992601335924, iteration: 15322
loss: 1.02755868434906,grad_norm: 0.9999990949799431, iteration: 15323
loss: 1.008258581161499,grad_norm: 0.9999991096111682, iteration: 15324
loss: 0.9989902377128601,grad_norm: 0.9999991759540212, iteration: 15325
loss: 1.0122517347335815,grad_norm: 0.9999992537171087, iteration: 15326
loss: 1.0368609428405762,grad_norm: 0.9999994736608477, iteration: 15327
loss: 0.9716903567314148,grad_norm: 0.9999991100650067, iteration: 15328
loss: 0.9939573407173157,grad_norm: 0.9948230944668313, iteration: 15329
loss: 1.0476313829421997,grad_norm: 0.999999418278131, iteration: 15330
loss: 1.0739496946334839,grad_norm: 0.999999408798053, iteration: 15331
loss: 0.9933105707168579,grad_norm: 0.8900088613275108, iteration: 15332
loss: 0.9945680499076843,grad_norm: 0.9999991184530352, iteration: 15333
loss: 1.061092495918274,grad_norm: 0.9999993688697871, iteration: 15334
loss: 1.019855260848999,grad_norm: 0.9999995900626236, iteration: 15335
loss: 1.0287901163101196,grad_norm: 0.9999990985558221, iteration: 15336
loss: 1.019152283668518,grad_norm: 0.999999137311851, iteration: 15337
loss: 1.0231750011444092,grad_norm: 0.9999992519511337, iteration: 15338
loss: 1.0397893190383911,grad_norm: 0.9999990454984489, iteration: 15339
loss: 1.0251123905181885,grad_norm: 0.9999990869288934, iteration: 15340
loss: 1.0535098314285278,grad_norm: 0.9999993343561239, iteration: 15341
loss: 1.0204941034317017,grad_norm: 0.9999994198489485, iteration: 15342
loss: 1.0452100038528442,grad_norm: 0.9999992046185464, iteration: 15343
loss: 0.9816992282867432,grad_norm: 0.9999991674555914, iteration: 15344
loss: 0.9919589757919312,grad_norm: 0.9999992223918526, iteration: 15345
loss: 0.9870200157165527,grad_norm: 0.999999251435213, iteration: 15346
loss: 1.0164992809295654,grad_norm: 0.9999993025329085, iteration: 15347
loss: 0.9775236248970032,grad_norm: 0.9999992693948938, iteration: 15348
loss: 1.021661400794983,grad_norm: 0.9999990640221011, iteration: 15349
loss: 1.0076618194580078,grad_norm: 0.9999996669600635, iteration: 15350
loss: 1.0684148073196411,grad_norm: 0.999999346343148, iteration: 15351
loss: 1.007048487663269,grad_norm: 0.9999992579814283, iteration: 15352
loss: 1.0150198936462402,grad_norm: 0.9999992622148948, iteration: 15353
loss: 1.0333408117294312,grad_norm: 0.9999993792871379, iteration: 15354
loss: 1.015486240386963,grad_norm: 0.9999994608353264, iteration: 15355
loss: 1.043404459953308,grad_norm: 0.9999993218920766, iteration: 15356
loss: 0.9924513101577759,grad_norm: 0.9999992868901629, iteration: 15357
loss: 1.035583257675171,grad_norm: 0.9999991761228487, iteration: 15358
loss: 0.9889945387840271,grad_norm: 0.9999996041605168, iteration: 15359
loss: 1.0541409254074097,grad_norm: 0.9999997824137582, iteration: 15360
loss: 1.0518547296524048,grad_norm: 0.9999997171630011, iteration: 15361
loss: 1.0533212423324585,grad_norm: 0.9999996064164739, iteration: 15362
loss: 1.0140234231948853,grad_norm: 0.9999992908509092, iteration: 15363
loss: 0.9936754703521729,grad_norm: 0.9999993040943516, iteration: 15364
loss: 1.046630620956421,grad_norm: 0.9999994406253504, iteration: 15365
loss: 1.0101724863052368,grad_norm: 0.9999991365291571, iteration: 15366
loss: 1.0042829513549805,grad_norm: 0.9999994995655482, iteration: 15367
loss: 1.027410864830017,grad_norm: 0.9999991575962428, iteration: 15368
loss: 0.9810948967933655,grad_norm: 0.9999990264480326, iteration: 15369
loss: 1.1060171127319336,grad_norm: 0.9999997343604067, iteration: 15370
loss: 1.0228520631790161,grad_norm: 0.9999993216373078, iteration: 15371
loss: 1.0214110612869263,grad_norm: 0.999999284272623, iteration: 15372
loss: 1.0574860572814941,grad_norm: 0.9999992134103133, iteration: 15373
loss: 1.0487197637557983,grad_norm: 0.9999992780253122, iteration: 15374
loss: 1.0078744888305664,grad_norm: 0.9999993209640139, iteration: 15375
loss: 1.0480433702468872,grad_norm: 0.9999991109608815, iteration: 15376
loss: 0.9989596605300903,grad_norm: 0.9999992899429488, iteration: 15377
loss: 1.0412887334823608,grad_norm: 0.9999991867846225, iteration: 15378
loss: 1.0603498220443726,grad_norm: 0.999999354021639, iteration: 15379
loss: 1.0469539165496826,grad_norm: 0.9999997008976415, iteration: 15380
loss: 1.0198408365249634,grad_norm: 0.9999994740424962, iteration: 15381
loss: 1.0404913425445557,grad_norm: 0.9999991698207957, iteration: 15382
loss: 1.0212342739105225,grad_norm: 0.9999992310728034, iteration: 15383
loss: 1.115459680557251,grad_norm: 0.9999998419696948, iteration: 15384
loss: 1.0160179138183594,grad_norm: 0.9999990875908343, iteration: 15385
loss: 1.0808743238449097,grad_norm: 0.9999993760704945, iteration: 15386
loss: 1.0204710960388184,grad_norm: 0.9999996278115516, iteration: 15387
loss: 1.0411407947540283,grad_norm: 0.9999996926076958, iteration: 15388
loss: 1.093398928642273,grad_norm: 0.999999715896847, iteration: 15389
loss: 0.9947646260261536,grad_norm: 0.9999992225551991, iteration: 15390
loss: 1.0237338542938232,grad_norm: 0.9999990340721926, iteration: 15391
loss: 1.053301453590393,grad_norm: 0.9999990729333655, iteration: 15392
loss: 0.9963696599006653,grad_norm: 0.9999993571441138, iteration: 15393
loss: 1.0211093425750732,grad_norm: 0.9999990991179342, iteration: 15394
loss: 1.0277067422866821,grad_norm: 0.9999996367423766, iteration: 15395
loss: 1.093327522277832,grad_norm: 0.9999992821684737, iteration: 15396
loss: 0.987971842288971,grad_norm: 0.9999993057469657, iteration: 15397
loss: 1.0326001644134521,grad_norm: 0.9999995089317634, iteration: 15398
loss: 1.014407992362976,grad_norm: 0.9981141710497162, iteration: 15399
loss: 1.058844804763794,grad_norm: 0.9999993159639808, iteration: 15400
loss: 1.0574520826339722,grad_norm: 0.9999994385425663, iteration: 15401
loss: 1.0186519622802734,grad_norm: 0.9999991680031949, iteration: 15402
loss: 1.0153359174728394,grad_norm: 0.9999993161833317, iteration: 15403
loss: 1.034701943397522,grad_norm: 0.999999547589523, iteration: 15404
loss: 0.9974479079246521,grad_norm: 0.9999991889157563, iteration: 15405
loss: 1.0106395483016968,grad_norm: 0.9999992578636429, iteration: 15406
loss: 1.0265549421310425,grad_norm: 0.999999248261215, iteration: 15407
loss: 1.0504454374313354,grad_norm: 0.9999996610935825, iteration: 15408
loss: 1.079342246055603,grad_norm: 0.9999994389483081, iteration: 15409
loss: 1.0141160488128662,grad_norm: 0.9999992528071681, iteration: 15410
loss: 1.0439988374710083,grad_norm: 0.9999991025674886, iteration: 15411
loss: 0.9974479675292969,grad_norm: 0.9999990644857797, iteration: 15412
loss: 1.0813103914260864,grad_norm: 0.9999995991491142, iteration: 15413
loss: 1.0275472402572632,grad_norm: 0.9999992640080908, iteration: 15414
loss: 1.054009199142456,grad_norm: 0.9999991834937162, iteration: 15415
loss: 1.0384598970413208,grad_norm: 0.999998958554675, iteration: 15416
loss: 1.0513371229171753,grad_norm: 0.999999269159264, iteration: 15417
loss: 1.0134798288345337,grad_norm: 0.9999992779883097, iteration: 15418
loss: 1.028273344039917,grad_norm: 0.9999997851594888, iteration: 15419
loss: 0.9574143886566162,grad_norm: 0.9999993367438437, iteration: 15420
loss: 1.0180693864822388,grad_norm: 0.9999992605075279, iteration: 15421
loss: 1.0162172317504883,grad_norm: 0.9999989902871163, iteration: 15422
loss: 0.982173502445221,grad_norm: 0.999999433912623, iteration: 15423
loss: 0.9914022088050842,grad_norm: 0.9704109736017437, iteration: 15424
loss: 1.0397615432739258,grad_norm: 0.9999993768197043, iteration: 15425
loss: 1.0259168148040771,grad_norm: 0.9999991382290956, iteration: 15426
loss: 1.0230547189712524,grad_norm: 0.9999994163606221, iteration: 15427
loss: 1.009588599205017,grad_norm: 0.9999994875361484, iteration: 15428
loss: 1.0042041540145874,grad_norm: 0.913046005207104, iteration: 15429
loss: 1.0421184301376343,grad_norm: 0.9999992189164237, iteration: 15430
loss: 1.0080841779708862,grad_norm: 0.9999997531978335, iteration: 15431
loss: 1.0452373027801514,grad_norm: 0.859032705155068, iteration: 15432
loss: 1.054460883140564,grad_norm: 0.9999991186626742, iteration: 15433
loss: 0.9397870898246765,grad_norm: 0.9999992925327031, iteration: 15434
loss: 1.0715465545654297,grad_norm: 0.9999991908464885, iteration: 15435
loss: 0.9919189810752869,grad_norm: 0.9999989829809747, iteration: 15436
loss: 1.0216434001922607,grad_norm: 0.9999996914049057, iteration: 15437
loss: 1.0910645723342896,grad_norm: 0.999999533914176, iteration: 15438
loss: 1.006870150566101,grad_norm: 0.9999996103957548, iteration: 15439
loss: 1.057033658027649,grad_norm: 0.9999993458978317, iteration: 15440
loss: 1.0454095602035522,grad_norm: 0.9999996478781056, iteration: 15441
loss: 1.064541220664978,grad_norm: 0.9999992104405285, iteration: 15442
loss: 1.0296831130981445,grad_norm: 0.99999935757947, iteration: 15443
loss: 1.0138037204742432,grad_norm: 0.9999990824332969, iteration: 15444
loss: 1.013435959815979,grad_norm: 0.999999129261888, iteration: 15445
loss: 1.0441153049468994,grad_norm: 0.9999992521414018, iteration: 15446
loss: 1.0509729385375977,grad_norm: 0.99999945949122, iteration: 15447
loss: 1.0419158935546875,grad_norm: 0.9999992538030028, iteration: 15448
loss: 1.0832597017288208,grad_norm: 0.9999997232720154, iteration: 15449
loss: 1.0540947914123535,grad_norm: 0.9999991589980985, iteration: 15450
loss: 1.0126585960388184,grad_norm: 0.9999991979486178, iteration: 15451
loss: 1.02217698097229,grad_norm: 0.999999477800472, iteration: 15452
loss: 1.025317668914795,grad_norm: 0.9999996168455775, iteration: 15453
loss: 1.0358786582946777,grad_norm: 0.9999991421692277, iteration: 15454
loss: 1.0332523584365845,grad_norm: 0.9899691953332851, iteration: 15455
loss: 1.0308356285095215,grad_norm: 0.9999992705648038, iteration: 15456
loss: 1.0110429525375366,grad_norm: 0.9999997673635808, iteration: 15457
loss: 1.0381927490234375,grad_norm: 0.9999994830985012, iteration: 15458
loss: 0.9996146559715271,grad_norm: 0.9999992130599974, iteration: 15459
loss: 1.0245879888534546,grad_norm: 0.9999992871989891, iteration: 15460
loss: 1.029326319694519,grad_norm: 0.9999995125512966, iteration: 15461
loss: 1.0307999849319458,grad_norm: 0.9999992589758885, iteration: 15462
loss: 1.0479234457015991,grad_norm: 0.9999996358419487, iteration: 15463
loss: 1.0458505153656006,grad_norm: 0.9999990055555236, iteration: 15464
loss: 0.9987993240356445,grad_norm: 0.9999993501793585, iteration: 15465
loss: 1.0330007076263428,grad_norm: 0.9999990823339993, iteration: 15466
loss: 1.0446316003799438,grad_norm: 0.9999992349814306, iteration: 15467
loss: 1.1202892065048218,grad_norm: 0.9999996958395577, iteration: 15468
loss: 1.0057460069656372,grad_norm: 0.9999992064694891, iteration: 15469
loss: 1.0339478254318237,grad_norm: 0.9999992252394617, iteration: 15470
loss: 0.9761934280395508,grad_norm: 0.9999992772664672, iteration: 15471
loss: 1.0464787483215332,grad_norm: 0.9999992832591452, iteration: 15472
loss: 1.0150538682937622,grad_norm: 0.9999992402588606, iteration: 15473
loss: 1.0522931814193726,grad_norm: 0.9012948920291776, iteration: 15474
loss: 1.0371448993682861,grad_norm: 0.9999990824372429, iteration: 15475
loss: 1.0102319717407227,grad_norm: 0.9999990628311621, iteration: 15476
loss: 0.9870914816856384,grad_norm: 0.9999990443726523, iteration: 15477
loss: 1.0366719961166382,grad_norm: 0.999999169849395, iteration: 15478
loss: 1.0285389423370361,grad_norm: 0.9999991649795159, iteration: 15479
loss: 1.0641491413116455,grad_norm: 0.969869579164227, iteration: 15480
loss: 1.0485464334487915,grad_norm: 0.9999992198394294, iteration: 15481
loss: 1.0186529159545898,grad_norm: 0.9999992055399004, iteration: 15482
loss: 1.0361756086349487,grad_norm: 0.999999502129208, iteration: 15483
loss: 1.057350754737854,grad_norm: 0.9999992944769744, iteration: 15484
loss: 1.047500491142273,grad_norm: 0.9999991328865244, iteration: 15485
loss: 1.0481603145599365,grad_norm: 0.9999991833193191, iteration: 15486
loss: 1.0247840881347656,grad_norm: 0.9999995870254464, iteration: 15487
loss: 1.0405865907669067,grad_norm: 0.9999992275817364, iteration: 15488
loss: 1.038756251335144,grad_norm: 0.9999991225589139, iteration: 15489
loss: 0.9910283088684082,grad_norm: 0.9999992413571205, iteration: 15490
loss: 0.9940382838249207,grad_norm: 0.9999994841373625, iteration: 15491
loss: 1.0477595329284668,grad_norm: 0.9999994321062601, iteration: 15492
loss: 1.021727204322815,grad_norm: 0.9999994150703257, iteration: 15493
loss: 0.9901242256164551,grad_norm: 0.9999990117038654, iteration: 15494
loss: 1.0180387496948242,grad_norm: 0.9885677512038508, iteration: 15495
loss: 1.001752257347107,grad_norm: 0.9999992012242411, iteration: 15496
loss: 1.0212706327438354,grad_norm: 0.9999994245481447, iteration: 15497
loss: 1.0067753791809082,grad_norm: 0.999999323091954, iteration: 15498
loss: 1.009658694267273,grad_norm: 0.9999991773486627, iteration: 15499
loss: 1.0125762224197388,grad_norm: 0.9999991488360441, iteration: 15500
loss: 0.9896623492240906,grad_norm: 0.9999989908697237, iteration: 15501
loss: 1.02608060836792,grad_norm: 0.9999994430527033, iteration: 15502
loss: 0.9754412174224854,grad_norm: 0.9408909551538971, iteration: 15503
loss: 1.0327929258346558,grad_norm: 0.9999992367411991, iteration: 15504
loss: 1.0649151802062988,grad_norm: 0.9999990888163769, iteration: 15505
loss: 1.0717824697494507,grad_norm: 0.9322150161994465, iteration: 15506
loss: 1.009254813194275,grad_norm: 0.9999991711743728, iteration: 15507
loss: 1.0033513307571411,grad_norm: 0.9999991754871445, iteration: 15508
loss: 1.0085620880126953,grad_norm: 0.9192236839861945, iteration: 15509
loss: 1.030569076538086,grad_norm: 0.9999992167870331, iteration: 15510
loss: 1.0348492860794067,grad_norm: 0.9999992247170515, iteration: 15511
loss: 1.0224149227142334,grad_norm: 0.9999992107095516, iteration: 15512
loss: 1.0091325044631958,grad_norm: 0.9999990249817672, iteration: 15513
loss: 1.019426941871643,grad_norm: 0.9999991968410188, iteration: 15514
loss: 1.0374943017959595,grad_norm: 0.9999992953445772, iteration: 15515
loss: 1.010712742805481,grad_norm: 0.9999992635871712, iteration: 15516
loss: 1.0137449502944946,grad_norm: 0.9999991863961004, iteration: 15517
loss: 1.0478919744491577,grad_norm: 0.9999991518709175, iteration: 15518
loss: 1.0472908020019531,grad_norm: 0.9999990913514385, iteration: 15519
loss: 1.0061105489730835,grad_norm: 0.9999995353132316, iteration: 15520
loss: 0.997936487197876,grad_norm: 0.9923786942508092, iteration: 15521
loss: 1.0096441507339478,grad_norm: 0.999999190937751, iteration: 15522
loss: 1.0607101917266846,grad_norm: 0.9999992066194823, iteration: 15523
loss: 1.0100414752960205,grad_norm: 0.999999127978104, iteration: 15524
loss: 1.0006682872772217,grad_norm: 0.9999991672262111, iteration: 15525
loss: 0.9342341423034668,grad_norm: 0.9999990168902982, iteration: 15526
loss: 1.0074141025543213,grad_norm: 0.9999992585792233, iteration: 15527
loss: 0.9914245009422302,grad_norm: 0.9999991568581026, iteration: 15528
loss: 1.0264593362808228,grad_norm: 0.9999993583969579, iteration: 15529
loss: 0.9872937798500061,grad_norm: 0.9999990339357628, iteration: 15530
loss: 1.0313183069229126,grad_norm: 0.999999112650139, iteration: 15531
loss: 1.0845814943313599,grad_norm: 0.9999992083118463, iteration: 15532
loss: 1.0316288471221924,grad_norm: 0.9999992517098785, iteration: 15533
loss: 1.0849026441574097,grad_norm: 0.9999993709641464, iteration: 15534
loss: 0.9769772887229919,grad_norm: 0.9999991344559653, iteration: 15535
loss: 1.0678826570510864,grad_norm: 0.9999994035798894, iteration: 15536
loss: 1.0237802267074585,grad_norm: 0.9999992561218333, iteration: 15537
loss: 1.0432541370391846,grad_norm: 0.9999993845493746, iteration: 15538
loss: 0.973846435546875,grad_norm: 0.999999167805156, iteration: 15539
loss: 1.023797869682312,grad_norm: 0.9999992350611816, iteration: 15540
loss: 1.021691083908081,grad_norm: 0.9999992706390749, iteration: 15541
loss: 1.0345776081085205,grad_norm: 0.9999992539874147, iteration: 15542
loss: 1.026864767074585,grad_norm: 0.9999993417280018, iteration: 15543
loss: 0.999298095703125,grad_norm: 0.9042540880434996, iteration: 15544
loss: 1.0138975381851196,grad_norm: 0.825521796993939, iteration: 15545
loss: 1.0341812372207642,grad_norm: 0.9999992650925403, iteration: 15546
loss: 1.0600835084915161,grad_norm: 0.999999350550065, iteration: 15547
loss: 1.0191936492919922,grad_norm: 0.9999989915064226, iteration: 15548
loss: 1.024168610572815,grad_norm: 0.9999992421405556, iteration: 15549
loss: 0.9928534626960754,grad_norm: 0.9999990804588768, iteration: 15550
loss: 0.9952517151832581,grad_norm: 0.999999181185977, iteration: 15551
loss: 1.0268032550811768,grad_norm: 0.999999261879733, iteration: 15552
loss: 1.0514661073684692,grad_norm: 0.9999991835476455, iteration: 15553
loss: 1.018526315689087,grad_norm: 0.999999100580173, iteration: 15554
loss: 1.0631135702133179,grad_norm: 0.999999126733298, iteration: 15555
loss: 1.0137481689453125,grad_norm: 0.9999991668463954, iteration: 15556
loss: 0.995694637298584,grad_norm: 0.9999993073928725, iteration: 15557
loss: 1.009513258934021,grad_norm: 0.9999990132726329, iteration: 15558
loss: 0.9970374703407288,grad_norm: 0.9999992012968854, iteration: 15559
loss: 0.9716392755508423,grad_norm: 0.9999990319687384, iteration: 15560
loss: 0.9749694466590881,grad_norm: 0.9999990529247798, iteration: 15561
loss: 1.0448205471038818,grad_norm: 0.9785674287056797, iteration: 15562
loss: 0.9674439430236816,grad_norm: 0.9999992259387633, iteration: 15563
loss: 1.0262373685836792,grad_norm: 0.9999991152503849, iteration: 15564
loss: 1.0191335678100586,grad_norm: 0.9999992736863406, iteration: 15565
loss: 1.0190993547439575,grad_norm: 0.9999991005789272, iteration: 15566
loss: 1.0500305891036987,grad_norm: 0.999999203335341, iteration: 15567
loss: 1.0533943176269531,grad_norm: 0.9999994133314503, iteration: 15568
loss: 0.9951979517936707,grad_norm: 0.9999993625407861, iteration: 15569
loss: 0.997991144657135,grad_norm: 0.9999991841688369, iteration: 15570
loss: 1.0202820301055908,grad_norm: 0.9999992817260219, iteration: 15571
loss: 0.9787136316299438,grad_norm: 0.9999991930471914, iteration: 15572
loss: 1.053914189338684,grad_norm: 0.9999991268502215, iteration: 15573
loss: 0.9627292156219482,grad_norm: 0.9999992137078587, iteration: 15574
loss: 1.082180380821228,grad_norm: 0.9999991749120507, iteration: 15575
loss: 0.9569653868675232,grad_norm: 0.9369737499423081, iteration: 15576
loss: 1.0354398488998413,grad_norm: 0.999999263051971, iteration: 15577
loss: 1.0329796075820923,grad_norm: 0.9999992392992697, iteration: 15578
loss: 1.0528205633163452,grad_norm: 0.999999146671173, iteration: 15579
loss: 1.0245332717895508,grad_norm: 0.9999994196363309, iteration: 15580
loss: 0.9894241690635681,grad_norm: 0.9999990387292444, iteration: 15581
loss: 1.0077348947525024,grad_norm: 0.9999989862285555, iteration: 15582
loss: 1.058056116104126,grad_norm: 0.9999992775955037, iteration: 15583
loss: 1.0476560592651367,grad_norm: 0.9999992743238586, iteration: 15584
loss: 1.021579623222351,grad_norm: 0.9999992148750639, iteration: 15585
loss: 1.0656957626342773,grad_norm: 0.9999990691125046, iteration: 15586
loss: 0.9967445135116577,grad_norm: 0.9544828547167205, iteration: 15587
loss: 0.9937090873718262,grad_norm: 0.9999992081593069, iteration: 15588
loss: 1.0137501955032349,grad_norm: 0.9999992745600167, iteration: 15589
loss: 1.0312530994415283,grad_norm: 0.9999992937187584, iteration: 15590
loss: 1.0231355428695679,grad_norm: 0.9999992651594225, iteration: 15591
loss: 1.0184061527252197,grad_norm: 0.9999992717167128, iteration: 15592
loss: 1.0392709970474243,grad_norm: 0.9999990965578657, iteration: 15593
loss: 1.016472339630127,grad_norm: 0.9999993208388384, iteration: 15594
loss: 1.0425605773925781,grad_norm: 0.9999992041731901, iteration: 15595
loss: 1.0609849691390991,grad_norm: 0.9999991559185953, iteration: 15596
loss: 1.0354399681091309,grad_norm: 0.9999991230002788, iteration: 15597
loss: 1.0003471374511719,grad_norm: 0.99999902487215, iteration: 15598
loss: 0.9575868248939514,grad_norm: 0.9999991840542672, iteration: 15599
loss: 1.0463731288909912,grad_norm: 0.9999991908986753, iteration: 15600
loss: 1.0132800340652466,grad_norm: 0.8853786316063301, iteration: 15601
loss: 1.0109655857086182,grad_norm: 0.9999992096916853, iteration: 15602
loss: 1.0262874364852905,grad_norm: 0.9999991205240212, iteration: 15603
loss: 1.0070472955703735,grad_norm: 0.999999236355435, iteration: 15604
loss: 1.0361814498901367,grad_norm: 0.999999084772463, iteration: 15605
loss: 0.9870646595954895,grad_norm: 0.9595541317898387, iteration: 15606
loss: 1.0168687105178833,grad_norm: 0.999999163322588, iteration: 15607
loss: 1.0317184925079346,grad_norm: 0.9999993599834691, iteration: 15608
loss: 1.0299365520477295,grad_norm: 0.9999993565748644, iteration: 15609
loss: 1.0303418636322021,grad_norm: 0.9999991440727657, iteration: 15610
loss: 1.0273572206497192,grad_norm: 0.999999423986074, iteration: 15611
loss: 0.9660366773605347,grad_norm: 0.9999991972131003, iteration: 15612
loss: 0.9985029697418213,grad_norm: 0.8689775005766149, iteration: 15613
loss: 1.0228569507598877,grad_norm: 0.999999185887992, iteration: 15614
loss: 1.0163466930389404,grad_norm: 0.9999991356734911, iteration: 15615
loss: 1.0302436351776123,grad_norm: 0.999999124707777, iteration: 15616
loss: 1.0350865125656128,grad_norm: 0.9999993170002328, iteration: 15617
loss: 1.0902316570281982,grad_norm: 0.999999059277836, iteration: 15618
loss: 1.033033013343811,grad_norm: 0.9999991024938432, iteration: 15619
loss: 1.0561714172363281,grad_norm: 0.9999992479697892, iteration: 15620
loss: 1.0541545152664185,grad_norm: 0.9999992669706924, iteration: 15621
loss: 1.022587776184082,grad_norm: 0.9999991702633926, iteration: 15622
loss: 0.9865385293960571,grad_norm: 0.999999071571206, iteration: 15623
loss: 1.031445026397705,grad_norm: 0.9646832496308101, iteration: 15624
loss: 1.035377860069275,grad_norm: 0.9874638149648687, iteration: 15625
loss: 1.0691202878952026,grad_norm: 0.9999992482462123, iteration: 15626
loss: 1.0119446516036987,grad_norm: 0.9999989953817133, iteration: 15627
loss: 1.0083677768707275,grad_norm: 0.9999991945234262, iteration: 15628
loss: 1.0069191455841064,grad_norm: 0.9826957354289542, iteration: 15629
loss: 0.9604830145835876,grad_norm: 0.9999991339522968, iteration: 15630
loss: 1.0376884937286377,grad_norm: 0.9999990979537156, iteration: 15631
loss: 1.0018306970596313,grad_norm: 0.9999991086680201, iteration: 15632
loss: 1.0171290636062622,grad_norm: 0.9999991721023592, iteration: 15633
loss: 1.067047119140625,grad_norm: 0.9999993172874254, iteration: 15634
loss: 1.037855863571167,grad_norm: 0.9999991014846477, iteration: 15635
loss: 1.0343284606933594,grad_norm: 0.9682082646319621, iteration: 15636
loss: 1.0078977346420288,grad_norm: 0.999999416690844, iteration: 15637
loss: 1.0673489570617676,grad_norm: 0.9999992194609248, iteration: 15638
loss: 1.040104866027832,grad_norm: 0.9999992519262082, iteration: 15639
loss: 1.0046179294586182,grad_norm: 0.9999992364887857, iteration: 15640
loss: 1.0530123710632324,grad_norm: 0.9233929533214735, iteration: 15641
loss: 1.004058599472046,grad_norm: 0.973785570740875, iteration: 15642
loss: 1.0133638381958008,grad_norm: 0.9999992892437655, iteration: 15643
loss: 1.0235899686813354,grad_norm: 0.9999992439699422, iteration: 15644
loss: 1.0030033588409424,grad_norm: 0.9999990939082044, iteration: 15645
loss: 1.0062888860702515,grad_norm: 0.9431383193220872, iteration: 15646
loss: 1.0470505952835083,grad_norm: 0.9999992078515753, iteration: 15647
loss: 1.0215175151824951,grad_norm: 0.999999166686726, iteration: 15648
loss: 0.9883351922035217,grad_norm: 0.9999991389623035, iteration: 15649
loss: 1.0156153440475464,grad_norm: 0.9999991225324911, iteration: 15650
loss: 1.0264530181884766,grad_norm: 0.9999992400601547, iteration: 15651
loss: 1.0039409399032593,grad_norm: 0.9209436539910072, iteration: 15652
loss: 1.0320159196853638,grad_norm: 0.9999990743929934, iteration: 15653
loss: 1.0092953443527222,grad_norm: 0.9999992548947285, iteration: 15654
loss: 1.0007081031799316,grad_norm: 0.999999119581527, iteration: 15655
loss: 1.019392728805542,grad_norm: 0.9999993237003452, iteration: 15656
loss: 1.028600811958313,grad_norm: 0.9999992080510323, iteration: 15657
loss: 1.035635232925415,grad_norm: 0.9999992767569807, iteration: 15658
loss: 1.033097267150879,grad_norm: 0.9999990876035203, iteration: 15659
loss: 0.9984659552574158,grad_norm: 0.9999991251166443, iteration: 15660
loss: 1.0307549238204956,grad_norm: 0.9999990866926579, iteration: 15661
loss: 1.0393202304840088,grad_norm: 0.9999992189170782, iteration: 15662
loss: 0.9723183512687683,grad_norm: 0.9999992315179924, iteration: 15663
loss: 0.9690123796463013,grad_norm: 0.9999991486842814, iteration: 15664
loss: 0.9943522810935974,grad_norm: 0.9999992346824772, iteration: 15665
loss: 1.026242733001709,grad_norm: 0.9999991219452049, iteration: 15666
loss: 1.048895001411438,grad_norm: 0.999999288991681, iteration: 15667
loss: 0.9944269061088562,grad_norm: 0.9999992728368953, iteration: 15668
loss: 1.0068583488464355,grad_norm: 0.9999992378781365, iteration: 15669
loss: 0.985529363155365,grad_norm: 0.9999991397348162, iteration: 15670
loss: 1.0661839246749878,grad_norm: 0.9999992377479024, iteration: 15671
loss: 1.0348119735717773,grad_norm: 0.9999991694861161, iteration: 15672
loss: 1.016096830368042,grad_norm: 0.9999991587866564, iteration: 15673
loss: 1.0300894975662231,grad_norm: 0.9498474437026272, iteration: 15674
loss: 1.0124577283859253,grad_norm: 0.9999993778813308, iteration: 15675
loss: 1.029502511024475,grad_norm: 0.9999990586824256, iteration: 15676
loss: 1.0344903469085693,grad_norm: 0.9999990825079346, iteration: 15677
loss: 0.9926329255104065,grad_norm: 0.9999990753465661, iteration: 15678
loss: 1.0471229553222656,grad_norm: 0.9999992149632102, iteration: 15679
loss: 1.0394052267074585,grad_norm: 0.9854972893372559, iteration: 15680
loss: 1.0179955959320068,grad_norm: 0.9999990075403731, iteration: 15681
loss: 1.0245170593261719,grad_norm: 0.9999998377708592, iteration: 15682
loss: 1.0129975080490112,grad_norm: 0.9999992241295845, iteration: 15683
loss: 1.019615650177002,grad_norm: 0.9999991298887603, iteration: 15684
loss: 1.0089393854141235,grad_norm: 0.9999991896173902, iteration: 15685
loss: 1.0197808742523193,grad_norm: 0.9999990884283065, iteration: 15686
loss: 0.9855417609214783,grad_norm: 0.9999991812004037, iteration: 15687
loss: 1.0034688711166382,grad_norm: 0.9226788172954011, iteration: 15688
loss: 1.0261144638061523,grad_norm: 0.9999990920683767, iteration: 15689
loss: 1.0403660535812378,grad_norm: 0.9365399270626787, iteration: 15690
loss: 1.0387303829193115,grad_norm: 0.999999289172777, iteration: 15691
loss: 1.0441075563430786,grad_norm: 0.9999991798660148, iteration: 15692
loss: 1.0496115684509277,grad_norm: 0.8747661982289643, iteration: 15693
loss: 0.9768547415733337,grad_norm: 0.9203263987589836, iteration: 15694
loss: 1.0725867748260498,grad_norm: 0.9999992520712877, iteration: 15695
loss: 1.0168015956878662,grad_norm: 0.9999990673524288, iteration: 15696
loss: 0.9921002984046936,grad_norm: 0.999999167691796, iteration: 15697
loss: 1.0462441444396973,grad_norm: 0.9999990028297067, iteration: 15698
loss: 1.0500484704971313,grad_norm: 0.9999990773012976, iteration: 15699
loss: 0.9779474139213562,grad_norm: 0.9999991175586223, iteration: 15700
loss: 1.020703673362732,grad_norm: 0.9999991286530406, iteration: 15701
loss: 1.049972414970398,grad_norm: 0.9999991841839914, iteration: 15702
loss: 0.9856607913970947,grad_norm: 0.9999992453110685, iteration: 15703
loss: 1.0507043600082397,grad_norm: 0.9999990668851054, iteration: 15704
loss: 0.9959624409675598,grad_norm: 0.9999991140282196, iteration: 15705
loss: 1.0007084608078003,grad_norm: 0.9999991386598511, iteration: 15706
loss: 1.006894588470459,grad_norm: 0.9999993191777065, iteration: 15707
loss: 1.0245707035064697,grad_norm: 0.9999991858968104, iteration: 15708
loss: 1.0194907188415527,grad_norm: 0.9999990504165267, iteration: 15709
loss: 0.9810227751731873,grad_norm: 0.999999206991889, iteration: 15710
loss: 1.0067931413650513,grad_norm: 0.9970976718201616, iteration: 15711
loss: 1.0187287330627441,grad_norm: 0.9824207434783693, iteration: 15712
loss: 1.0327950716018677,grad_norm: 0.9122804229016938, iteration: 15713
loss: 1.0442346334457397,grad_norm: 0.9999991464247354, iteration: 15714
loss: 0.9488925933837891,grad_norm: 0.9928209459682367, iteration: 15715
loss: 1.0166996717453003,grad_norm: 0.9999991797076099, iteration: 15716
loss: 1.054512858390808,grad_norm: 0.9999992504081777, iteration: 15717
loss: 1.0130219459533691,grad_norm: 0.9999992579287879, iteration: 15718
loss: 0.9708278179168701,grad_norm: 0.9999991853741226, iteration: 15719
loss: 1.011648178100586,grad_norm: 0.9999991183733339, iteration: 15720
loss: 1.0479689836502075,grad_norm: 0.9999993358896428, iteration: 15721
loss: 0.9755458831787109,grad_norm: 0.9999992255018658, iteration: 15722
loss: 1.0183690786361694,grad_norm: 0.9999992855770876, iteration: 15723
loss: 1.026862621307373,grad_norm: 0.9999992730073961, iteration: 15724
loss: 1.0573347806930542,grad_norm: 0.999999063635682, iteration: 15725
loss: 1.0398274660110474,grad_norm: 0.999999078838053, iteration: 15726
loss: 0.9773440361022949,grad_norm: 0.9553742499042528, iteration: 15727
loss: 1.0361937284469604,grad_norm: 0.9999992442716662, iteration: 15728
loss: 1.0735173225402832,grad_norm: 0.999998997054909, iteration: 15729
loss: 1.0417768955230713,grad_norm: 0.999999089640594, iteration: 15730
loss: 0.9519938230514526,grad_norm: 0.999999072896727, iteration: 15731
loss: 1.0116783380508423,grad_norm: 0.9999991659869006, iteration: 15732
loss: 0.9570502638816833,grad_norm: 0.9999989736751934, iteration: 15733
loss: 0.9911467432975769,grad_norm: 0.9999990933930438, iteration: 15734
loss: 0.996626079082489,grad_norm: 0.9348833385581117, iteration: 15735
loss: 1.0328396558761597,grad_norm: 0.9999992000014107, iteration: 15736
loss: 1.0567890405654907,grad_norm: 0.9999995153960947, iteration: 15737
loss: 0.9755242466926575,grad_norm: 0.9999992251405895, iteration: 15738
loss: 1.014814853668213,grad_norm: 0.9999990207982624, iteration: 15739
loss: 1.0162616968154907,grad_norm: 0.9999991246364685, iteration: 15740
loss: 1.0275310277938843,grad_norm: 0.9999994492238911, iteration: 15741
loss: 1.078048825263977,grad_norm: 0.9999992645122475, iteration: 15742
loss: 1.0059548616409302,grad_norm: 0.9999991191387596, iteration: 15743
loss: 1.0374414920806885,grad_norm: 0.9999992144710581, iteration: 15744
loss: 1.0503077507019043,grad_norm: 0.9999993731891702, iteration: 15745
loss: 1.0155150890350342,grad_norm: 0.9999990853311626, iteration: 15746
loss: 1.0196298360824585,grad_norm: 0.9999992007324358, iteration: 15747
loss: 1.0525392293930054,grad_norm: 0.9999992024460158, iteration: 15748
loss: 1.0183855295181274,grad_norm: 0.9999992536620045, iteration: 15749
loss: 1.0504928827285767,grad_norm: 0.9999991329775995, iteration: 15750
loss: 1.03219735622406,grad_norm: 0.9999991966107545, iteration: 15751
loss: 1.0349946022033691,grad_norm: 0.9816188350942784, iteration: 15752
loss: 0.9893848896026611,grad_norm: 0.9999992911419967, iteration: 15753
loss: 0.9455373883247375,grad_norm: 0.9999990835316875, iteration: 15754
loss: 1.0564820766448975,grad_norm: 0.9999991079796542, iteration: 15755
loss: 1.0135087966918945,grad_norm: 0.9646813817762333, iteration: 15756
loss: 1.0269336700439453,grad_norm: 0.9999990492229232, iteration: 15757
loss: 1.0224229097366333,grad_norm: 0.9999989837507044, iteration: 15758
loss: 1.029358983039856,grad_norm: 0.9999990869639261, iteration: 15759
loss: 1.046960711479187,grad_norm: 0.9999993133831074, iteration: 15760
loss: 1.006298303604126,grad_norm: 0.9999990409292865, iteration: 15761
loss: 1.006127953529358,grad_norm: 0.9999991458813863, iteration: 15762
loss: 0.9652137160301208,grad_norm: 0.9999991612079927, iteration: 15763
loss: 1.0312340259552002,grad_norm: 0.9999996100080775, iteration: 15764
loss: 0.971969723701477,grad_norm: 0.9999991682146808, iteration: 15765
loss: 1.007293939590454,grad_norm: 0.9999990508981424, iteration: 15766
loss: 0.9466183185577393,grad_norm: 0.999999103148362, iteration: 15767
loss: 1.0588202476501465,grad_norm: 0.9999991481007068, iteration: 15768
loss: 1.0103850364685059,grad_norm: 0.9999991035951935, iteration: 15769
loss: 1.0436025857925415,grad_norm: 0.9999994788375086, iteration: 15770
loss: 0.996791660785675,grad_norm: 0.9999990758578708, iteration: 15771
loss: 0.9714571833610535,grad_norm: 0.999999284542452, iteration: 15772
loss: 0.9512677192687988,grad_norm: 0.9893826911428241, iteration: 15773
loss: 0.982675313949585,grad_norm: 0.9999991009982814, iteration: 15774
loss: 1.0077639818191528,grad_norm: 0.9870444405202232, iteration: 15775
loss: 1.005231499671936,grad_norm: 0.9999992749227135, iteration: 15776
loss: 1.0426779985427856,grad_norm: 0.9999992343431414, iteration: 15777
loss: 1.0055694580078125,grad_norm: 0.9999990884323022, iteration: 15778
loss: 1.0369948148727417,grad_norm: 0.9999991532387326, iteration: 15779
loss: 0.9958043098449707,grad_norm: 0.9999991747427696, iteration: 15780
loss: 1.0214842557907104,grad_norm: 0.9999992736056014, iteration: 15781
loss: 1.0332188606262207,grad_norm: 0.9999992046477174, iteration: 15782
loss: 1.0245535373687744,grad_norm: 0.9999994107884722, iteration: 15783
loss: 1.0479437112808228,grad_norm: 0.9999993478563007, iteration: 15784
loss: 1.040644884109497,grad_norm: 0.9999990215538512, iteration: 15785
loss: 1.0134711265563965,grad_norm: 0.9999992751842948, iteration: 15786
loss: 1.0263774394989014,grad_norm: 0.9999991383688855, iteration: 15787
loss: 1.0639371871948242,grad_norm: 0.9999993293697095, iteration: 15788
loss: 0.9803619980812073,grad_norm: 0.9518989130526093, iteration: 15789
loss: 1.0027530193328857,grad_norm: 0.9999993551461004, iteration: 15790
loss: 1.0350173711776733,grad_norm: 0.9999991974669111, iteration: 15791
loss: 1.0637513399124146,grad_norm: 0.9999992325092968, iteration: 15792
loss: 1.0361350774765015,grad_norm: 0.9312074165417049, iteration: 15793
loss: 1.0190445184707642,grad_norm: 0.999999267123312, iteration: 15794
loss: 0.9856722354888916,grad_norm: 0.9999992942961957, iteration: 15795
loss: 0.9977617263793945,grad_norm: 0.9112326624398557, iteration: 15796
loss: 1.05533766746521,grad_norm: 0.9999997239147509, iteration: 15797
loss: 1.013718605041504,grad_norm: 0.9999991258138616, iteration: 15798
loss: 1.0291982889175415,grad_norm: 0.9999991150614279, iteration: 15799
loss: 1.0284373760223389,grad_norm: 0.9999994333313716, iteration: 15800
loss: 1.033767580986023,grad_norm: 0.9999993169551605, iteration: 15801
loss: 1.0310791730880737,grad_norm: 0.9999991220079197, iteration: 15802
loss: 1.0266681909561157,grad_norm: 0.999999018302135, iteration: 15803
loss: 1.0262235403060913,grad_norm: 0.9999992236468263, iteration: 15804
loss: 1.0243343114852905,grad_norm: 0.9999991418186539, iteration: 15805
loss: 1.0177521705627441,grad_norm: 0.9999991223978335, iteration: 15806
loss: 0.9837906956672668,grad_norm: 0.9999993831846334, iteration: 15807
loss: 1.0743905305862427,grad_norm: 0.9999991809843705, iteration: 15808
loss: 0.9887832999229431,grad_norm: 0.999999220434855, iteration: 15809
loss: 1.034692645072937,grad_norm: 0.9999994523998306, iteration: 15810
loss: 1.0204963684082031,grad_norm: 0.9103913030980194, iteration: 15811
loss: 0.9940479397773743,grad_norm: 0.8700222354027145, iteration: 15812
loss: 1.038032054901123,grad_norm: 0.9999991490483529, iteration: 15813
loss: 1.0558029413223267,grad_norm: 0.9999996121664063, iteration: 15814
loss: 1.026286244392395,grad_norm: 0.9999990986251572, iteration: 15815
loss: 1.0024577379226685,grad_norm: 0.9999990588114048, iteration: 15816
loss: 1.0389939546585083,grad_norm: 0.9999994706780115, iteration: 15817
loss: 1.0438870191574097,grad_norm: 0.9999992260317867, iteration: 15818
loss: 1.0317927598953247,grad_norm: 0.9999993841229478, iteration: 15819
loss: 0.9875094294548035,grad_norm: 0.9021318982246985, iteration: 15820
loss: 1.056707501411438,grad_norm: 0.9999990241257309, iteration: 15821
loss: 0.9788262844085693,grad_norm: 0.9999990716421447, iteration: 15822
loss: 1.0273301601409912,grad_norm: 0.9999992206844953, iteration: 15823
loss: 0.9854594469070435,grad_norm: 0.9999991585159316, iteration: 15824
loss: 1.0501747131347656,grad_norm: 0.9999991976439625, iteration: 15825
loss: 1.006135106086731,grad_norm: 0.9999992926187364, iteration: 15826
loss: 0.9907055497169495,grad_norm: 0.9999991193131816, iteration: 15827
loss: 1.0156960487365723,grad_norm: 0.999999414826234, iteration: 15828
loss: 1.0288739204406738,grad_norm: 0.7825285623076851, iteration: 15829
loss: 1.0342167615890503,grad_norm: 0.9999992057884042, iteration: 15830
loss: 1.041454553604126,grad_norm: 0.9999992947484385, iteration: 15831
loss: 1.0576287508010864,grad_norm: 0.9999992428333244, iteration: 15832
loss: 1.044559359550476,grad_norm: 0.9999991491135444, iteration: 15833
loss: 1.0150700807571411,grad_norm: 0.9999991748699375, iteration: 15834
loss: 1.0412205457687378,grad_norm: 0.9999993549617907, iteration: 15835
loss: 1.0479199886322021,grad_norm: 0.9010993672256938, iteration: 15836
loss: 1.019838571548462,grad_norm: 0.934540311500212, iteration: 15837
loss: 1.0392813682556152,grad_norm: 0.999999216530339, iteration: 15838
loss: 1.0593844652175903,grad_norm: 0.9999993414108923, iteration: 15839
loss: 1.0114976167678833,grad_norm: 0.9999991298064216, iteration: 15840
loss: 1.0195952653884888,grad_norm: 0.9999991707544229, iteration: 15841
loss: 1.0959073305130005,grad_norm: 0.9999996605023086, iteration: 15842
loss: 0.9986912608146667,grad_norm: 0.9999990345084149, iteration: 15843
loss: 1.0104572772979736,grad_norm: 0.9999993306934638, iteration: 15844
loss: 1.018254041671753,grad_norm: 0.9999992971402271, iteration: 15845
loss: 0.9855853915214539,grad_norm: 0.9999990179540911, iteration: 15846
loss: 0.9650177955627441,grad_norm: 0.9999990312198526, iteration: 15847
loss: 1.0504575967788696,grad_norm: 0.9995789546478875, iteration: 15848
loss: 1.048146367073059,grad_norm: 0.9999991159848216, iteration: 15849
loss: 1.016069769859314,grad_norm: 0.9999990373141768, iteration: 15850
loss: 1.0034853219985962,grad_norm: 0.9999991329565674, iteration: 15851
loss: 0.9974161982536316,grad_norm: 0.9999991946565203, iteration: 15852
loss: 1.0101933479309082,grad_norm: 0.9999991116912337, iteration: 15853
loss: 1.08700430393219,grad_norm: 0.9999993691018509, iteration: 15854
loss: 1.0351742506027222,grad_norm: 0.9999993598339477, iteration: 15855
loss: 1.0096186399459839,grad_norm: 0.9999993767010505, iteration: 15856
loss: 1.0318381786346436,grad_norm: 0.9999992418581602, iteration: 15857
loss: 0.9885344505310059,grad_norm: 0.9999998127847338, iteration: 15858
loss: 1.0123893022537231,grad_norm: 0.9999991196137253, iteration: 15859
loss: 1.064692497253418,grad_norm: 0.9999990125906707, iteration: 15860
loss: 1.0242282152175903,grad_norm: 0.9999990063476519, iteration: 15861
loss: 1.009500503540039,grad_norm: 0.9999994425373615, iteration: 15862
loss: 1.0170530080795288,grad_norm: 0.9999991513738928, iteration: 15863
loss: 0.9705541729927063,grad_norm: 0.9999992679716772, iteration: 15864
loss: 0.987777829170227,grad_norm: 0.9999990600155994, iteration: 15865
loss: 1.0213003158569336,grad_norm: 0.9999991595904211, iteration: 15866
loss: 1.0012660026550293,grad_norm: 0.99999921247474, iteration: 15867
loss: 0.9944741725921631,grad_norm: 0.9999991648597938, iteration: 15868
loss: 1.0613212585449219,grad_norm: 0.9999994408620742, iteration: 15869
loss: 0.9759128093719482,grad_norm: 0.9581856048655961, iteration: 15870
loss: 0.9987192749977112,grad_norm: 0.9999992042756466, iteration: 15871
loss: 0.9824895858764648,grad_norm: 0.9999991170224942, iteration: 15872
loss: 1.0410743951797485,grad_norm: 0.909834791279581, iteration: 15873
loss: 1.0470765829086304,grad_norm: 0.9750390277951138, iteration: 15874
loss: 1.0000278949737549,grad_norm: 0.8844898713635561, iteration: 15875
loss: 1.0408720970153809,grad_norm: 0.9999991514983688, iteration: 15876
loss: 1.0300482511520386,grad_norm: 0.9999992407938617, iteration: 15877
loss: 0.9937922954559326,grad_norm: 0.9999990910530613, iteration: 15878
loss: 0.9941990375518799,grad_norm: 0.9999990549014621, iteration: 15879
loss: 0.9702577590942383,grad_norm: 0.9999991306857002, iteration: 15880
loss: 0.9995908737182617,grad_norm: 0.999999221048811, iteration: 15881
loss: 0.9884066581726074,grad_norm: 0.9999991409685693, iteration: 15882
loss: 1.0296984910964966,grad_norm: 0.9999991266137118, iteration: 15883
loss: 1.0152894258499146,grad_norm: 0.9999991045203493, iteration: 15884
loss: 1.0167996883392334,grad_norm: 0.999998993638105, iteration: 15885
loss: 1.045662522315979,grad_norm: 0.999999122216704, iteration: 15886
loss: 1.0439893007278442,grad_norm: 0.9999991735240036, iteration: 15887
loss: 0.9929192662239075,grad_norm: 0.9996778488683399, iteration: 15888
loss: 1.0459736585617065,grad_norm: 0.99999907034069, iteration: 15889
loss: 0.9897481203079224,grad_norm: 0.9999994171763644, iteration: 15890
loss: 1.0474944114685059,grad_norm: 0.999999613175816, iteration: 15891
loss: 1.013419270515442,grad_norm: 0.9999990815774703, iteration: 15892
loss: 1.061733603477478,grad_norm: 0.9999992072180733, iteration: 15893
loss: 1.0130563974380493,grad_norm: 0.8678205167454015, iteration: 15894
loss: 1.0202178955078125,grad_norm: 0.9999990376877796, iteration: 15895
loss: 1.0578471422195435,grad_norm: 0.9999992644456362, iteration: 15896
loss: 1.0011440515518188,grad_norm: 0.9829568484507132, iteration: 15897
loss: 1.003458857536316,grad_norm: 0.9999991750224924, iteration: 15898
loss: 1.0144188404083252,grad_norm: 0.9423662505400607, iteration: 15899
loss: 1.0205698013305664,grad_norm: 0.9999990606170813, iteration: 15900
loss: 1.0390657186508179,grad_norm: 0.9999990855315893, iteration: 15901
loss: 1.0061404705047607,grad_norm: 0.9999990491564777, iteration: 15902
loss: 1.004413366317749,grad_norm: 0.9999992308866605, iteration: 15903
loss: 1.0310670137405396,grad_norm: 0.9999991275037496, iteration: 15904
loss: 1.016575574874878,grad_norm: 0.9999993699706309, iteration: 15905
loss: 0.9750431180000305,grad_norm: 0.9999990533232774, iteration: 15906
loss: 1.0807031393051147,grad_norm: 0.9999997946933471, iteration: 15907
loss: 1.0467536449432373,grad_norm: 0.9999991579253906, iteration: 15908
loss: 1.016290307044983,grad_norm: 0.9999993384900397, iteration: 15909
loss: 1.049931526184082,grad_norm: 0.9860754642143646, iteration: 15910
loss: 1.0081464052200317,grad_norm: 0.9999992575840069, iteration: 15911
loss: 1.0889151096343994,grad_norm: 0.9999995762362798, iteration: 15912
loss: 1.0310945510864258,grad_norm: 0.99999911834112, iteration: 15913
loss: 1.0590890645980835,grad_norm: 0.9999992332389656, iteration: 15914
loss: 1.0178204774856567,grad_norm: 0.9999991111715446, iteration: 15915
loss: 1.0474472045898438,grad_norm: 0.9764885137501196, iteration: 15916
loss: 0.9992926120758057,grad_norm: 0.9999990770139618, iteration: 15917
loss: 1.0518718957901,grad_norm: 0.8959344426037457, iteration: 15918
loss: 1.0201270580291748,grad_norm: 0.9999991175361507, iteration: 15919
loss: 0.9903582334518433,grad_norm: 0.9999992744110847, iteration: 15920
loss: 1.0261788368225098,grad_norm: 0.9684481470920041, iteration: 15921
loss: 1.0390673875808716,grad_norm: 0.9999992965472374, iteration: 15922
loss: 1.0081541538238525,grad_norm: 0.9999989381234721, iteration: 15923
loss: 1.0007045269012451,grad_norm: 0.999999094555981, iteration: 15924
loss: 1.0275951623916626,grad_norm: 0.9999991534842958, iteration: 15925
loss: 1.0004996061325073,grad_norm: 0.9999991258810058, iteration: 15926
loss: 1.0316427946090698,grad_norm: 0.9999994278376021, iteration: 15927
loss: 1.0254123210906982,grad_norm: 0.9999991902483218, iteration: 15928
loss: 1.024803638458252,grad_norm: 0.9999992263566218, iteration: 15929
loss: 1.041399598121643,grad_norm: 0.9999995416210707, iteration: 15930
loss: 1.026592493057251,grad_norm: 0.9999993644174874, iteration: 15931
loss: 0.9738468527793884,grad_norm: 0.9999990787434205, iteration: 15932
loss: 1.0836467742919922,grad_norm: 0.9999996718462854, iteration: 15933
loss: 1.0218576192855835,grad_norm: 0.9999997963999582, iteration: 15934
loss: 1.0456137657165527,grad_norm: 0.9999991330867621, iteration: 15935
loss: 1.0515178442001343,grad_norm: 0.9999993599620147, iteration: 15936
loss: 1.0110241174697876,grad_norm: 0.9999992478281021, iteration: 15937
loss: 1.0462640523910522,grad_norm: 0.9999990130534383, iteration: 15938
loss: 1.0430326461791992,grad_norm: 0.999999058987773, iteration: 15939
loss: 1.017391562461853,grad_norm: 0.9999992119732843, iteration: 15940
loss: 1.0869147777557373,grad_norm: 0.9999994061040953, iteration: 15941
loss: 1.0151240825653076,grad_norm: 0.9999995396354322, iteration: 15942
loss: 1.0421468019485474,grad_norm: 0.9999992003593767, iteration: 15943
loss: 1.0125453472137451,grad_norm: 0.9999990560483063, iteration: 15944
loss: 0.990990936756134,grad_norm: 0.9999989910917456, iteration: 15945
loss: 1.0394132137298584,grad_norm: 0.9999992826141524, iteration: 15946
loss: 1.0328874588012695,grad_norm: 0.999999202467582, iteration: 15947
loss: 1.0373303890228271,grad_norm: 0.9999991980721281, iteration: 15948
loss: 1.0436724424362183,grad_norm: 0.9999991831737656, iteration: 15949
loss: 1.0428969860076904,grad_norm: 0.9999996286581779, iteration: 15950
loss: 1.0106545686721802,grad_norm: 0.9999991483397552, iteration: 15951
loss: 0.9945907592773438,grad_norm: 0.9999994090321235, iteration: 15952
loss: 1.0137234926223755,grad_norm: 0.9999991094713626, iteration: 15953
loss: 1.0227500200271606,grad_norm: 0.9999992074858889, iteration: 15954
loss: 0.9695954322814941,grad_norm: 0.9999994024429756, iteration: 15955
loss: 1.019100546836853,grad_norm: 0.9999991348106546, iteration: 15956
loss: 1.116542100906372,grad_norm: 0.9999998731526856, iteration: 15957
loss: 1.0431841611862183,grad_norm: 0.9999996479142605, iteration: 15958
loss: 1.018062949180603,grad_norm: 0.9999992873484765, iteration: 15959
loss: 0.9905822277069092,grad_norm: 0.9999991765108596, iteration: 15960
loss: 1.0170856714248657,grad_norm: 0.9999995656296233, iteration: 15961
loss: 1.082664966583252,grad_norm: 0.9999991452048913, iteration: 15962
loss: 1.0343215465545654,grad_norm: 0.9649883553656681, iteration: 15963
loss: 0.9629859328269958,grad_norm: 0.9999992888379025, iteration: 15964
loss: 1.0092071294784546,grad_norm: 0.9999991197573538, iteration: 15965
loss: 0.9943585991859436,grad_norm: 0.9999992205115609, iteration: 15966
loss: 0.9903622269630432,grad_norm: 0.9999992035732781, iteration: 15967
loss: 1.0471675395965576,grad_norm: 0.9999991778372983, iteration: 15968
loss: 0.9953378438949585,grad_norm: 0.9999992075363916, iteration: 15969
loss: 1.0970135927200317,grad_norm: 0.9999991279896263, iteration: 15970
loss: 1.0479155778884888,grad_norm: 0.9999994037421337, iteration: 15971
loss: 1.062453031539917,grad_norm: 0.9999996021123512, iteration: 15972
loss: 1.0105595588684082,grad_norm: 0.9999995533278089, iteration: 15973
loss: 0.968762218952179,grad_norm: 0.9999992482917424, iteration: 15974
loss: 0.9903262257575989,grad_norm: 0.9999992828165097, iteration: 15975
loss: 1.03352952003479,grad_norm: 0.999999305453735, iteration: 15976
loss: 1.0309269428253174,grad_norm: 0.9999991153770033, iteration: 15977
loss: 0.9779054522514343,grad_norm: 0.9999990811972229, iteration: 15978
loss: 0.991848349571228,grad_norm: 0.9999991033693567, iteration: 15979
loss: 1.0624518394470215,grad_norm: 0.9999990959384616, iteration: 15980
loss: 1.0259991884231567,grad_norm: 0.9999993257099085, iteration: 15981
loss: 1.0419325828552246,grad_norm: 0.9999990713429208, iteration: 15982
loss: 1.033400535583496,grad_norm: 0.9999990360600148, iteration: 15983
loss: 1.0468400716781616,grad_norm: 0.9205684557387211, iteration: 15984
loss: 1.0155699253082275,grad_norm: 0.9999991526601244, iteration: 15985
loss: 1.0907094478607178,grad_norm: 0.9999991674970521, iteration: 15986
loss: 1.0183266401290894,grad_norm: 0.9999992697403521, iteration: 15987
loss: 1.0527851581573486,grad_norm: 0.9999996027876925, iteration: 15988
loss: 1.016902208328247,grad_norm: 0.9999992839098745, iteration: 15989
loss: 0.9980525374412537,grad_norm: 0.9999991515777896, iteration: 15990
loss: 1.039276123046875,grad_norm: 0.9999994232220665, iteration: 15991
loss: 1.0063263177871704,grad_norm: 0.9999992224137729, iteration: 15992
loss: 1.0078890323638916,grad_norm: 0.9999991962871027, iteration: 15993
loss: 1.0053343772888184,grad_norm: 0.999999207991197, iteration: 15994
loss: 0.9844879508018494,grad_norm: 0.9999993032217964, iteration: 15995
loss: 1.0149208307266235,grad_norm: 0.9999990955742412, iteration: 15996
loss: 1.0776921510696411,grad_norm: 0.999999243496242, iteration: 15997
loss: 1.0107609033584595,grad_norm: 0.9999991837707726, iteration: 15998
loss: 1.0369764566421509,grad_norm: 0.999999046106397, iteration: 15999
loss: 1.046629786491394,grad_norm: 0.9999990132563008, iteration: 16000
loss: 1.0363210439682007,grad_norm: 0.9999990953783291, iteration: 16001
loss: 1.0285130739212036,grad_norm: 0.999999265335533, iteration: 16002
loss: 1.0280866622924805,grad_norm: 0.9999992350592114, iteration: 16003
loss: 1.0349981784820557,grad_norm: 0.9999993044730248, iteration: 16004
loss: 1.0520498752593994,grad_norm: 0.999999706507066, iteration: 16005
loss: 0.9923239946365356,grad_norm: 0.9999991976925131, iteration: 16006
loss: 1.0041948556900024,grad_norm: 0.9999997491564305, iteration: 16007
loss: 1.0221854448318481,grad_norm: 0.9999992397449731, iteration: 16008
loss: 0.9831191897392273,grad_norm: 0.9999991378162865, iteration: 16009
loss: 1.0261540412902832,grad_norm: 0.9999992445178861, iteration: 16010
loss: 1.0460186004638672,grad_norm: 0.9999994697716683, iteration: 16011
loss: 0.9851098656654358,grad_norm: 0.9999992912031715, iteration: 16012
loss: 1.0353552103042603,grad_norm: 0.9999992291477763, iteration: 16013
loss: 0.9984995126724243,grad_norm: 0.999999063620827, iteration: 16014
loss: 1.0390479564666748,grad_norm: 0.9999992154459291, iteration: 16015
loss: 1.0598337650299072,grad_norm: 0.9999993502859529, iteration: 16016
loss: 1.0192278623580933,grad_norm: 0.9999991068211868, iteration: 16017
loss: 1.0428097248077393,grad_norm: 0.9999995546168265, iteration: 16018
loss: 0.9982432126998901,grad_norm: 0.9999990749499458, iteration: 16019
loss: 1.0170526504516602,grad_norm: 0.9999991329035589, iteration: 16020
loss: 1.0572445392608643,grad_norm: 0.9918359677004069, iteration: 16021
loss: 1.0636768341064453,grad_norm: 0.9999996015607033, iteration: 16022
loss: 0.990941047668457,grad_norm: 0.9999993250476273, iteration: 16023
loss: 1.0060683488845825,grad_norm: 0.999999150982499, iteration: 16024
loss: 1.022283911705017,grad_norm: 0.9999992824031618, iteration: 16025
loss: 1.0417860746383667,grad_norm: 0.9999991234885244, iteration: 16026
loss: 0.9743401408195496,grad_norm: 0.9999990611029934, iteration: 16027
loss: 0.9864349365234375,grad_norm: 0.9999992955525744, iteration: 16028
loss: 1.0466854572296143,grad_norm: 0.9999990825654261, iteration: 16029
loss: 1.0211941003799438,grad_norm: 0.9999994867411061, iteration: 16030
loss: 1.0298054218292236,grad_norm: 0.9999990997601336, iteration: 16031
loss: 1.0056718587875366,grad_norm: 0.8182291220392179, iteration: 16032
loss: 1.0134412050247192,grad_norm: 0.9999992418775657, iteration: 16033
loss: 0.9904131293296814,grad_norm: 0.9999991381792922, iteration: 16034
loss: 1.0108237266540527,grad_norm: 0.9999990266108206, iteration: 16035
loss: 0.9828581213951111,grad_norm: 0.9999993187225159, iteration: 16036
loss: 0.985263466835022,grad_norm: 0.9092932939676516, iteration: 16037
loss: 1.0165107250213623,grad_norm: 0.8134314668978678, iteration: 16038
loss: 1.0412907600402832,grad_norm: 0.985888513480277, iteration: 16039
loss: 0.9592163562774658,grad_norm: 0.9894709954817832, iteration: 16040
loss: 0.9775276184082031,grad_norm: 0.9999990304183117, iteration: 16041
loss: 1.0774515867233276,grad_norm: 0.9999993912036131, iteration: 16042
loss: 0.9681445360183716,grad_norm: 0.9999991824129872, iteration: 16043
loss: 1.0452879667282104,grad_norm: 0.9999991295854915, iteration: 16044
loss: 1.003202199935913,grad_norm: 0.9999991988366856, iteration: 16045
loss: 1.0483952760696411,grad_norm: 0.9999993540015661, iteration: 16046
loss: 1.0492502450942993,grad_norm: 0.9999996271883137, iteration: 16047
loss: 0.9924078583717346,grad_norm: 0.999999676729036, iteration: 16048
loss: 1.0096355676651,grad_norm: 0.9999990521463066, iteration: 16049
loss: 1.0191255807876587,grad_norm: 0.9999991542536457, iteration: 16050
loss: 1.056605577468872,grad_norm: 0.9999996925606331, iteration: 16051
loss: 1.0081132650375366,grad_norm: 0.9508220054904454, iteration: 16052
loss: 1.0787209272384644,grad_norm: 0.9999991896608132, iteration: 16053
loss: 1.0229672193527222,grad_norm: 0.9999991310846784, iteration: 16054
loss: 1.0051721334457397,grad_norm: 0.9999990411245874, iteration: 16055
loss: 1.0244028568267822,grad_norm: 0.9650078364952759, iteration: 16056
loss: 0.9970327615737915,grad_norm: 0.9999991131868742, iteration: 16057
loss: 0.9915300607681274,grad_norm: 0.9999992723415078, iteration: 16058
loss: 1.0165443420410156,grad_norm: 0.9999991701620935, iteration: 16059
loss: 1.025755763053894,grad_norm: 0.9999993557178133, iteration: 16060
loss: 1.0287089347839355,grad_norm: 0.9999992210156837, iteration: 16061
loss: 1.0210150480270386,grad_norm: 0.9999995823359529, iteration: 16062
loss: 1.0395504236221313,grad_norm: 0.9999992353963271, iteration: 16063
loss: 1.0162745714187622,grad_norm: 0.9999996052953508, iteration: 16064
loss: 0.9717362523078918,grad_norm: 0.809662619862188, iteration: 16065
loss: 1.0368598699569702,grad_norm: 0.9999990967161264, iteration: 16066
loss: 1.0474683046340942,grad_norm: 0.9999992857180937, iteration: 16067
loss: 0.9770694375038147,grad_norm: 0.9999990515803359, iteration: 16068
loss: 1.0420618057250977,grad_norm: 0.9999991570544298, iteration: 16069
loss: 1.0225005149841309,grad_norm: 0.999999107757558, iteration: 16070
loss: 1.0386701822280884,grad_norm: 0.9999991214141771, iteration: 16071
loss: 0.9951114654541016,grad_norm: 0.9999990534973663, iteration: 16072
loss: 1.0745831727981567,grad_norm: 0.9999998133373285, iteration: 16073
loss: 1.0256484746932983,grad_norm: 0.9999993288756638, iteration: 16074
loss: 1.0692012310028076,grad_norm: 0.9999994050361088, iteration: 16075
loss: 1.0483139753341675,grad_norm: 0.9999992256159143, iteration: 16076
loss: 0.9774559736251831,grad_norm: 0.9999993435231626, iteration: 16077
loss: 0.9810603857040405,grad_norm: 0.9999992411164813, iteration: 16078
loss: 1.0212537050247192,grad_norm: 0.9999992040683079, iteration: 16079
loss: 1.0035613775253296,grad_norm: 0.9999989841653782, iteration: 16080
loss: 1.0689561367034912,grad_norm: 0.9999993759256375, iteration: 16081
loss: 1.0408530235290527,grad_norm: 0.9999991020126332, iteration: 16082
loss: 0.995502233505249,grad_norm: 0.9999990495019102, iteration: 16083
loss: 1.046860694885254,grad_norm: 0.9999992563632707, iteration: 16084
loss: 1.0129398107528687,grad_norm: 0.9999991947123457, iteration: 16085
loss: 1.0361827611923218,grad_norm: 0.9999990531384898, iteration: 16086
loss: 1.0004578828811646,grad_norm: 0.9999993711870223, iteration: 16087
loss: 1.0267473459243774,grad_norm: 0.8622425231771274, iteration: 16088
loss: 1.0277529954910278,grad_norm: 0.9999991049834112, iteration: 16089
loss: 1.0250983238220215,grad_norm: 0.9999994726254432, iteration: 16090
loss: 1.025050163269043,grad_norm: 0.9999990864403269, iteration: 16091
loss: 1.0374884605407715,grad_norm: 0.9999995132551012, iteration: 16092
loss: 1.0369094610214233,grad_norm: 0.9999992138545012, iteration: 16093
loss: 1.0469516515731812,grad_norm: 0.9999991360692562, iteration: 16094
loss: 1.023922085762024,grad_norm: 0.9999995989249076, iteration: 16095
loss: 1.0418455600738525,grad_norm: 0.9999994704689953, iteration: 16096
loss: 1.0426043272018433,grad_norm: 0.9999993114203531, iteration: 16097
loss: 1.0647516250610352,grad_norm: 0.999999741505101, iteration: 16098
loss: 1.0354899168014526,grad_norm: 0.9999996704674267, iteration: 16099
loss: 1.0584443807601929,grad_norm: 0.9999992287591163, iteration: 16100
loss: 1.0658464431762695,grad_norm: 0.9999993878371167, iteration: 16101
loss: 0.9737514853477478,grad_norm: 0.9999991408451815, iteration: 16102
loss: 1.0258122682571411,grad_norm: 0.9999990410324833, iteration: 16103
loss: 0.9888055920600891,grad_norm: 0.9999992285811132, iteration: 16104
loss: 1.0413631200790405,grad_norm: 0.9705672539499938, iteration: 16105
loss: 1.0744946002960205,grad_norm: 0.9999996136130201, iteration: 16106
loss: 0.9976714849472046,grad_norm: 0.9999990195020265, iteration: 16107
loss: 1.0298781394958496,grad_norm: 0.9999991080146508, iteration: 16108
loss: 0.9987155199050903,grad_norm: 0.9999991736336672, iteration: 16109
loss: 0.9792265295982361,grad_norm: 0.9999992490187771, iteration: 16110
loss: 1.0324621200561523,grad_norm: 0.9999990245257196, iteration: 16111
loss: 1.0095847845077515,grad_norm: 0.9999991168199361, iteration: 16112
loss: 0.9730300307273865,grad_norm: 0.9999992776360526, iteration: 16113
loss: 1.0145931243896484,grad_norm: 0.9999989859676414, iteration: 16114
loss: 1.0738872289657593,grad_norm: 0.9999992195861181, iteration: 16115
loss: 1.041717290878296,grad_norm: 0.9999993431811767, iteration: 16116
loss: 1.0233997106552124,grad_norm: 0.9999994592366466, iteration: 16117
loss: 1.0292768478393555,grad_norm: 0.9999993301814131, iteration: 16118
loss: 0.995263934135437,grad_norm: 0.9999993332611179, iteration: 16119
loss: 0.9895375967025757,grad_norm: 0.9999989651537139, iteration: 16120
loss: 1.0245988368988037,grad_norm: 0.9999992442952921, iteration: 16121
loss: 1.0047733783721924,grad_norm: 0.9999995544981346, iteration: 16122
loss: 1.0396376848220825,grad_norm: 0.9999992023753239, iteration: 16123
loss: 1.0202360153198242,grad_norm: 0.9999989662398584, iteration: 16124
loss: 1.040463924407959,grad_norm: 0.9999996580399593, iteration: 16125
loss: 1.0168721675872803,grad_norm: 0.9999992216739696, iteration: 16126
loss: 1.0582011938095093,grad_norm: 0.999999929870662, iteration: 16127
loss: 1.0720518827438354,grad_norm: 0.9999991923998627, iteration: 16128
loss: 1.057815670967102,grad_norm: 0.9091837478749035, iteration: 16129
loss: 0.9921981692314148,grad_norm: 0.9999992494316723, iteration: 16130
loss: 1.0272985696792603,grad_norm: 0.9999991544140807, iteration: 16131
loss: 1.0605723857879639,grad_norm: 0.9999996074847527, iteration: 16132
loss: 1.0484440326690674,grad_norm: 0.99999903936822, iteration: 16133
loss: 0.9892550706863403,grad_norm: 0.9768288529746074, iteration: 16134
loss: 0.9784190654754639,grad_norm: 0.9827519772710163, iteration: 16135
loss: 1.021411657333374,grad_norm: 0.9999991124692229, iteration: 16136
loss: 0.9751052856445312,grad_norm: 0.9999992719421748, iteration: 16137
loss: 1.030840516090393,grad_norm: 0.9999992190107881, iteration: 16138
loss: 1.0020192861557007,grad_norm: 0.9999992294377988, iteration: 16139
loss: 1.0318529605865479,grad_norm: 0.9999992809070144, iteration: 16140
loss: 1.0270746946334839,grad_norm: 0.9999991504711964, iteration: 16141
loss: 1.0460599660873413,grad_norm: 0.9999992533746079, iteration: 16142
loss: 0.9815373420715332,grad_norm: 0.999999201358079, iteration: 16143
loss: 1.0284006595611572,grad_norm: 0.9999990562312007, iteration: 16144
loss: 1.002357840538025,grad_norm: 0.9999993120597701, iteration: 16145
loss: 1.07132887840271,grad_norm: 0.9999997623712402, iteration: 16146
loss: 0.9815128445625305,grad_norm: 0.9999990654398067, iteration: 16147
loss: 1.0899046659469604,grad_norm: 0.9999993734103239, iteration: 16148
loss: 1.1124687194824219,grad_norm: 0.9999998248490626, iteration: 16149
loss: 0.9952547550201416,grad_norm: 0.9712953087269537, iteration: 16150
loss: 1.0225266218185425,grad_norm: 0.940891258780987, iteration: 16151
loss: 1.017844796180725,grad_norm: 0.9999991405070423, iteration: 16152
loss: 1.0424882173538208,grad_norm: 0.9999993147721985, iteration: 16153
loss: 1.0261002779006958,grad_norm: 0.9999991599826762, iteration: 16154
loss: 0.9894717931747437,grad_norm: 0.9999992309880023, iteration: 16155
loss: 1.0573036670684814,grad_norm: 0.9999992315247515, iteration: 16156
loss: 0.984080970287323,grad_norm: 0.999999130369248, iteration: 16157
loss: 1.020555019378662,grad_norm: 0.9999991324311825, iteration: 16158
loss: 1.0232211351394653,grad_norm: 0.9999992328956974, iteration: 16159
loss: 0.9997262954711914,grad_norm: 0.9999991123009367, iteration: 16160
loss: 1.0229568481445312,grad_norm: 0.9999991127903461, iteration: 16161
loss: 1.0462208986282349,grad_norm: 0.999999239042514, iteration: 16162
loss: 0.9654752612113953,grad_norm: 0.9999992308428324, iteration: 16163
loss: 1.0566999912261963,grad_norm: 0.9999991513091545, iteration: 16164
loss: 0.9992020726203918,grad_norm: 0.9999991782414336, iteration: 16165
loss: 1.005181074142456,grad_norm: 0.9999991272371785, iteration: 16166
loss: 1.0491759777069092,grad_norm: 0.9999998104083702, iteration: 16167
loss: 1.0518311262130737,grad_norm: 0.9999993123261678, iteration: 16168
loss: 1.0158134698867798,grad_norm: 0.9999991462375614, iteration: 16169
loss: 1.016066074371338,grad_norm: 0.9999990639844931, iteration: 16170
loss: 1.0058687925338745,grad_norm: 0.9999993470230161, iteration: 16171
loss: 1.011715054512024,grad_norm: 0.9999992594824993, iteration: 16172
loss: 1.0510493516921997,grad_norm: 0.9999993645032007, iteration: 16173
loss: 1.015512466430664,grad_norm: 0.999999384512957, iteration: 16174
loss: 1.0045403242111206,grad_norm: 0.9648399899393034, iteration: 16175
loss: 1.0401238203048706,grad_norm: 0.9999992895310017, iteration: 16176
loss: 1.0052121877670288,grad_norm: 0.9999992465608707, iteration: 16177
loss: 1.0028505325317383,grad_norm: 0.9999995546688651, iteration: 16178
loss: 1.0421864986419678,grad_norm: 0.9999998186559645, iteration: 16179
loss: 1.0397690534591675,grad_norm: 0.9999990909185321, iteration: 16180
loss: 1.0053592920303345,grad_norm: 0.9999996121792076, iteration: 16181
loss: 1.0912108421325684,grad_norm: 0.9999997889451872, iteration: 16182
loss: 1.024158239364624,grad_norm: 0.9999991460210057, iteration: 16183
loss: 0.9664918184280396,grad_norm: 0.9282428057015588, iteration: 16184
loss: 1.0418707132339478,grad_norm: 0.9999991828555719, iteration: 16185
loss: 1.0250482559204102,grad_norm: 0.9999993651360883, iteration: 16186
loss: 1.027366042137146,grad_norm: 0.9999993749529643, iteration: 16187
loss: 1.0211195945739746,grad_norm: 0.9999992100411436, iteration: 16188
loss: 1.0221003293991089,grad_norm: 0.9855819544845906, iteration: 16189
loss: 1.0548893213272095,grad_norm: 0.9999990473623916, iteration: 16190
loss: 1.025745153427124,grad_norm: 0.9999991092746339, iteration: 16191
loss: 1.0604608058929443,grad_norm: 0.9999992687256829, iteration: 16192
loss: 0.9930874109268188,grad_norm: 0.9999992541284913, iteration: 16193
loss: 0.9784604907035828,grad_norm: 0.9999991820064831, iteration: 16194
loss: 1.0415147542953491,grad_norm: 0.9999990201253726, iteration: 16195
loss: 1.0314897298812866,grad_norm: 0.9999992967687286, iteration: 16196
loss: 1.042993426322937,grad_norm: 0.9999992546656234, iteration: 16197
loss: 1.0419566631317139,grad_norm: 0.9999991553004564, iteration: 16198
loss: 0.9853627681732178,grad_norm: 0.999999148215095, iteration: 16199
loss: 1.0731587409973145,grad_norm: 0.9999997688254005, iteration: 16200
loss: 1.0194684267044067,grad_norm: 0.9999992787891724, iteration: 16201
loss: 1.007782220840454,grad_norm: 0.9999990749116138, iteration: 16202
loss: 1.0323487520217896,grad_norm: 0.9999992838807796, iteration: 16203
loss: 1.0413751602172852,grad_norm: 0.9999991107346243, iteration: 16204
loss: 0.9764480590820312,grad_norm: 0.9999991207482003, iteration: 16205
loss: 1.0115244388580322,grad_norm: 0.9999992668711367, iteration: 16206
loss: 0.9978844523429871,grad_norm: 0.9999990653836023, iteration: 16207
loss: 0.9790651798248291,grad_norm: 0.9667007281050388, iteration: 16208
loss: 0.9780858755111694,grad_norm: 0.8859669168548925, iteration: 16209
loss: 1.001267433166504,grad_norm: 0.9715898516360559, iteration: 16210
loss: 1.010459065437317,grad_norm: 0.999999676167022, iteration: 16211
loss: 1.039465308189392,grad_norm: 0.9999990138910375, iteration: 16212
loss: 0.97486412525177,grad_norm: 0.999999259708347, iteration: 16213
loss: 1.0335025787353516,grad_norm: 0.9238202259227589, iteration: 16214
loss: 1.037954568862915,grad_norm: 0.9999991157882487, iteration: 16215
loss: 1.0362755060195923,grad_norm: 0.9999991436724412, iteration: 16216
loss: 0.9690364003181458,grad_norm: 0.9999991830657357, iteration: 16217
loss: 1.0437018871307373,grad_norm: 0.9999994665477682, iteration: 16218
loss: 1.0025134086608887,grad_norm: 0.9999991033222781, iteration: 16219
loss: 0.9263546466827393,grad_norm: 0.9999990783891509, iteration: 16220
loss: 0.9700806140899658,grad_norm: 0.9999993992364427, iteration: 16221
loss: 1.0069507360458374,grad_norm: 0.9999991043636242, iteration: 16222
loss: 1.0907195806503296,grad_norm: 0.9999993822185399, iteration: 16223
loss: 1.052249789237976,grad_norm: 0.9999991368265592, iteration: 16224
loss: 0.9927763342857361,grad_norm: 0.999999289714217, iteration: 16225
loss: 1.0215970277786255,grad_norm: 0.9999992628194353, iteration: 16226
loss: 1.0005061626434326,grad_norm: 0.9991080589332636, iteration: 16227
loss: 1.075762391090393,grad_norm: 0.9999992629176285, iteration: 16228
loss: 1.0098313093185425,grad_norm: 0.9999993169198701, iteration: 16229
loss: 0.9724034667015076,grad_norm: 0.9999989841185449, iteration: 16230
loss: 0.9804089069366455,grad_norm: 0.9999996396547132, iteration: 16231
loss: 1.053303837776184,grad_norm: 0.9999991861428392, iteration: 16232
loss: 1.001800298690796,grad_norm: 0.9999991084910433, iteration: 16233
loss: 1.0098084211349487,grad_norm: 0.980978175159518, iteration: 16234
loss: 1.02720308303833,grad_norm: 0.9999992158199551, iteration: 16235
loss: 1.0174974203109741,grad_norm: 0.9999990097229216, iteration: 16236
loss: 1.0267122983932495,grad_norm: 0.9999990702914038, iteration: 16237
loss: 0.9836936593055725,grad_norm: 0.9999991095613354, iteration: 16238
loss: 1.0745071172714233,grad_norm: 0.9999990924377818, iteration: 16239
loss: 0.9819676876068115,grad_norm: 0.9999992883866328, iteration: 16240
loss: 1.0305088758468628,grad_norm: 0.9999996898345237, iteration: 16241
loss: 1.021140217781067,grad_norm: 0.9999992433808537, iteration: 16242
loss: 1.01682710647583,grad_norm: 0.9016543612584262, iteration: 16243
loss: 0.9996049404144287,grad_norm: 0.9999991842713789, iteration: 16244
loss: 1.0988432168960571,grad_norm: 0.9999992473545601, iteration: 16245
loss: 1.037867546081543,grad_norm: 0.9999992268225335, iteration: 16246
loss: 0.9719750285148621,grad_norm: 0.9897967383071904, iteration: 16247
loss: 1.0330756902694702,grad_norm: 0.9999991500463871, iteration: 16248
loss: 1.0087202787399292,grad_norm: 0.9999990724480039, iteration: 16249
loss: 0.9910348653793335,grad_norm: 0.9999990584605893, iteration: 16250
loss: 1.041308045387268,grad_norm: 0.9999997169583335, iteration: 16251
loss: 1.0325461626052856,grad_norm: 0.999999306837774, iteration: 16252
loss: 1.0355111360549927,grad_norm: 0.9578614965211264, iteration: 16253
loss: 1.0504504442214966,grad_norm: 0.9999997131160933, iteration: 16254
loss: 0.9868131875991821,grad_norm: 0.999999438243243, iteration: 16255
loss: 1.0319218635559082,grad_norm: 0.9999991224295197, iteration: 16256
loss: 1.0575541257858276,grad_norm: 0.999999165095317, iteration: 16257
loss: 1.0314068794250488,grad_norm: 0.9794088321735016, iteration: 16258
loss: 1.0269759893417358,grad_norm: 0.9999993446337534, iteration: 16259
loss: 1.0234068632125854,grad_norm: 0.9999990739041004, iteration: 16260
loss: 1.0299633741378784,grad_norm: 0.9999993151233444, iteration: 16261
loss: 1.0242605209350586,grad_norm: 0.9999991574803793, iteration: 16262
loss: 1.0192590951919556,grad_norm: 0.9999991196520951, iteration: 16263
loss: 1.025901198387146,grad_norm: 0.9999990791207403, iteration: 16264
loss: 1.0032353401184082,grad_norm: 0.9999992355768004, iteration: 16265
loss: 1.0065709352493286,grad_norm: 0.9999993889267986, iteration: 16266
loss: 1.0220975875854492,grad_norm: 0.999999208347444, iteration: 16267
loss: 1.0270291566848755,grad_norm: 0.9999992248608938, iteration: 16268
loss: 1.0230903625488281,grad_norm: 0.9999991219808205, iteration: 16269
loss: 0.9935228824615479,grad_norm: 0.8782470652892232, iteration: 16270
loss: 1.0418658256530762,grad_norm: 0.9999991690619913, iteration: 16271
loss: 1.0306576490402222,grad_norm: 0.9999991441070112, iteration: 16272
loss: 1.067984700202942,grad_norm: 0.9999994589644214, iteration: 16273
loss: 1.058843970298767,grad_norm: 0.9999993988928875, iteration: 16274
loss: 1.0466697216033936,grad_norm: 0.999999465115136, iteration: 16275
loss: 1.0201072692871094,grad_norm: 0.9999990261072115, iteration: 16276
loss: 1.0389807224273682,grad_norm: 0.9999994276212223, iteration: 16277
loss: 0.995461106300354,grad_norm: 0.999999344894434, iteration: 16278
loss: 1.0164557695388794,grad_norm: 0.9999993471115007, iteration: 16279
loss: 1.0471768379211426,grad_norm: 0.9999995438189601, iteration: 16280
loss: 1.041704773902893,grad_norm: 0.9999990313658387, iteration: 16281
loss: 1.0767855644226074,grad_norm: 0.9999997299603186, iteration: 16282
loss: 1.0013775825500488,grad_norm: 0.9999991636938467, iteration: 16283
loss: 1.05050528049469,grad_norm: 0.9999992200640464, iteration: 16284
loss: 1.0359536409378052,grad_norm: 0.9999991700510836, iteration: 16285
loss: 0.9729633331298828,grad_norm: 0.9999991326401712, iteration: 16286
loss: 1.0366562604904175,grad_norm: 0.9999990469216317, iteration: 16287
loss: 0.9966346025466919,grad_norm: 0.9999990825856595, iteration: 16288
loss: 1.0576279163360596,grad_norm: 0.9720428440458223, iteration: 16289
loss: 1.05531907081604,grad_norm: 0.999999267955119, iteration: 16290
loss: 1.0124622583389282,grad_norm: 0.9999993019505489, iteration: 16291
loss: 1.051355004310608,grad_norm: 0.9999992195922247, iteration: 16292
loss: 1.0464026927947998,grad_norm: 0.9999995508577545, iteration: 16293
loss: 1.0356782674789429,grad_norm: 0.9999993156386444, iteration: 16294
loss: 1.0319119691848755,grad_norm: 0.9999991147109804, iteration: 16295
loss: 0.9981746077537537,grad_norm: 0.9999991819986742, iteration: 16296
loss: 1.0464093685150146,grad_norm: 0.9999990351204198, iteration: 16297
loss: 1.0329909324645996,grad_norm: 0.9999994910954959, iteration: 16298
loss: 1.0205738544464111,grad_norm: 0.9999990480413983, iteration: 16299
loss: 1.0141661167144775,grad_norm: 0.9999991867362037, iteration: 16300
loss: 1.0756334066390991,grad_norm: 0.9999997497651295, iteration: 16301
loss: 1.0501916408538818,grad_norm: 0.9999990552373771, iteration: 16302
loss: 1.0399912595748901,grad_norm: 0.9999995490855516, iteration: 16303
loss: 1.0090118646621704,grad_norm: 0.9999992244677359, iteration: 16304
loss: 1.0175288915634155,grad_norm: 0.9999990510976847, iteration: 16305
loss: 1.0160807371139526,grad_norm: 0.9999991359119259, iteration: 16306
loss: 0.9825587868690491,grad_norm: 0.9999992070250081, iteration: 16307
loss: 1.028030276298523,grad_norm: 0.9999992029614957, iteration: 16308
loss: 1.0058608055114746,grad_norm: 0.9999991104077666, iteration: 16309
loss: 1.0165140628814697,grad_norm: 0.9410531191364039, iteration: 16310
loss: 1.0248945951461792,grad_norm: 0.9999992796109808, iteration: 16311
loss: 1.0159447193145752,grad_norm: 0.9999995119703141, iteration: 16312
loss: 1.0771055221557617,grad_norm: 0.9999995978506979, iteration: 16313
loss: 1.0367121696472168,grad_norm: 0.9999992943637324, iteration: 16314
loss: 1.0317696332931519,grad_norm: 0.9999991009569033, iteration: 16315
loss: 1.0450960397720337,grad_norm: 0.9999992106473089, iteration: 16316
loss: 0.9949511885643005,grad_norm: 0.999999200623515, iteration: 16317
loss: 1.0012050867080688,grad_norm: 0.9999996734886597, iteration: 16318
loss: 1.0118722915649414,grad_norm: 0.9999996558622416, iteration: 16319
loss: 1.0088075399398804,grad_norm: 0.9999994961915631, iteration: 16320
loss: 0.998799204826355,grad_norm: 0.9999993609765822, iteration: 16321
loss: 1.0022718906402588,grad_norm: 0.9999991322565283, iteration: 16322
loss: 1.0291070938110352,grad_norm: 0.9999991699332725, iteration: 16323
loss: 1.0187797546386719,grad_norm: 0.9954984988559159, iteration: 16324
loss: 0.9926393032073975,grad_norm: 0.9424076834556884, iteration: 16325
loss: 1.060709834098816,grad_norm: 0.9999995579152562, iteration: 16326
loss: 1.0372262001037598,grad_norm: 0.9783681653013907, iteration: 16327
loss: 1.0315228700637817,grad_norm: 0.9999992445243473, iteration: 16328
loss: 1.0036712884902954,grad_norm: 0.9999991205858753, iteration: 16329
loss: 1.002573013305664,grad_norm: 0.894926425699427, iteration: 16330
loss: 1.0491400957107544,grad_norm: 0.9999990463153555, iteration: 16331
loss: 1.0256041288375854,grad_norm: 0.9987850265726362, iteration: 16332
loss: 0.992499828338623,grad_norm: 0.9999989921419022, iteration: 16333
loss: 1.0679265260696411,grad_norm: 0.8776521304628097, iteration: 16334
loss: 1.0145553350448608,grad_norm: 0.9999992222487695, iteration: 16335
loss: 1.0590178966522217,grad_norm: 0.999999675794057, iteration: 16336
loss: 0.9915373921394348,grad_norm: 0.9999991228436499, iteration: 16337
loss: 1.0070184469223022,grad_norm: 0.8760389414550286, iteration: 16338
loss: 1.0076979398727417,grad_norm: 0.9999992401588025, iteration: 16339
loss: 1.0658001899719238,grad_norm: 0.9999992821570666, iteration: 16340
loss: 1.0011556148529053,grad_norm: 0.9999990759139596, iteration: 16341
loss: 1.0220625400543213,grad_norm: 0.9999990651453328, iteration: 16342
loss: 1.0482488870620728,grad_norm: 0.9999997632434582, iteration: 16343
loss: 1.0207605361938477,grad_norm: 0.9999991999983803, iteration: 16344
loss: 1.0291870832443237,grad_norm: 0.9999991911543135, iteration: 16345
loss: 1.0157197713851929,grad_norm: 0.9999990619028517, iteration: 16346
loss: 1.056485891342163,grad_norm: 0.9999993984573771, iteration: 16347
loss: 1.0496128797531128,grad_norm: 0.9999992646261854, iteration: 16348
loss: 1.0502502918243408,grad_norm: 0.9999997852460263, iteration: 16349
loss: 1.0091394186019897,grad_norm: 0.9999990990066384, iteration: 16350
loss: 1.050905466079712,grad_norm: 0.9999994722155648, iteration: 16351
loss: 1.0098457336425781,grad_norm: 0.9999992171691409, iteration: 16352
loss: 1.0434287786483765,grad_norm: 0.9999993283488985, iteration: 16353
loss: 1.020812749862671,grad_norm: 0.9394556049560892, iteration: 16354
loss: 1.0221033096313477,grad_norm: 0.9999991106358233, iteration: 16355
loss: 1.0298759937286377,grad_norm: 0.9999990691234121, iteration: 16356
loss: 0.9699596166610718,grad_norm: 0.9607739288809709, iteration: 16357
loss: 1.0476471185684204,grad_norm: 0.9999995321709405, iteration: 16358
loss: 1.0323359966278076,grad_norm: 0.9999991078746435, iteration: 16359
loss: 0.996269166469574,grad_norm: 0.9999991952402415, iteration: 16360
loss: 1.052573323249817,grad_norm: 0.9999990914775871, iteration: 16361
loss: 1.0667004585266113,grad_norm: 0.999999289458646, iteration: 16362
loss: 1.0312937498092651,grad_norm: 0.9999993401217977, iteration: 16363
loss: 1.0562255382537842,grad_norm: 0.9999992734679329, iteration: 16364
loss: 1.0342363119125366,grad_norm: 0.9999991371853448, iteration: 16365
loss: 1.0846563577651978,grad_norm: 0.9999994063810748, iteration: 16366
loss: 0.9520919919013977,grad_norm: 0.8720002280473191, iteration: 16367
loss: 0.9954649806022644,grad_norm: 0.9999991929725058, iteration: 16368
loss: 0.9785895943641663,grad_norm: 0.9999992201697927, iteration: 16369
loss: 0.9980098605155945,grad_norm: 0.9999992496204372, iteration: 16370
loss: 1.0449286699295044,grad_norm: 0.9999996153016563, iteration: 16371
loss: 0.9927859902381897,grad_norm: 0.9999994722229203, iteration: 16372
loss: 1.0008808374404907,grad_norm: 0.9999991017904801, iteration: 16373
loss: 1.0256340503692627,grad_norm: 0.9999993160319587, iteration: 16374
loss: 1.0483182668685913,grad_norm: 0.9999991902616092, iteration: 16375
loss: 1.0758864879608154,grad_norm: 0.9999994282254004, iteration: 16376
loss: 1.0138458013534546,grad_norm: 0.9999991192371351, iteration: 16377
loss: 1.029715657234192,grad_norm: 0.999999141450389, iteration: 16378
loss: 1.023369312286377,grad_norm: 0.9999991547042737, iteration: 16379
loss: 1.0011961460113525,grad_norm: 0.999999099870822, iteration: 16380
loss: 1.0150489807128906,grad_norm: 0.9999992975759013, iteration: 16381
loss: 1.0466135740280151,grad_norm: 0.9999991143059741, iteration: 16382
loss: 1.0022183656692505,grad_norm: 0.9999990442249814, iteration: 16383
loss: 1.0585761070251465,grad_norm: 0.9999996406942968, iteration: 16384
loss: 0.9982842206954956,grad_norm: 0.9999990885634813, iteration: 16385
loss: 1.0601600408554077,grad_norm: 0.9999991802925062, iteration: 16386
loss: 1.0342556238174438,grad_norm: 0.9999991247482181, iteration: 16387
loss: 1.028304934501648,grad_norm: 0.9999994757892142, iteration: 16388
loss: 1.076949954032898,grad_norm: 0.9999991777747371, iteration: 16389
loss: 1.008842945098877,grad_norm: 0.9999990320603792, iteration: 16390
loss: 1.023032307624817,grad_norm: 0.9999992999572821, iteration: 16391
loss: 1.0140082836151123,grad_norm: 0.9999992645816216, iteration: 16392
loss: 1.028357744216919,grad_norm: 0.9999996070396964, iteration: 16393
loss: 1.067921757698059,grad_norm: 0.9999994927794091, iteration: 16394
loss: 1.0141743421554565,grad_norm: 0.9999992160422926, iteration: 16395
loss: 1.0017178058624268,grad_norm: 0.9450771113018641, iteration: 16396
loss: 1.054244041442871,grad_norm: 0.9999995834988031, iteration: 16397
loss: 1.0602399110794067,grad_norm: 0.9999995738647908, iteration: 16398
loss: 0.9940236210823059,grad_norm: 0.9999990586816145, iteration: 16399
loss: 1.0040380954742432,grad_norm: 0.9999991296835057, iteration: 16400
loss: 1.0092910528182983,grad_norm: 0.9999993565546106, iteration: 16401
loss: 0.9893078804016113,grad_norm: 0.9999991490075749, iteration: 16402
loss: 1.005183458328247,grad_norm: 0.9227452389410015, iteration: 16403
loss: 1.0863176584243774,grad_norm: 0.9999992944465698, iteration: 16404
loss: 1.0594202280044556,grad_norm: 0.9999993535840556, iteration: 16405
loss: 1.0014376640319824,grad_norm: 0.9376811051291809, iteration: 16406
loss: 0.9773722290992737,grad_norm: 0.9999990431634052, iteration: 16407
loss: 1.0251762866973877,grad_norm: 0.999999410736101, iteration: 16408
loss: 1.0338497161865234,grad_norm: 0.9999995965277132, iteration: 16409
loss: 1.0364636182785034,grad_norm: 0.9999994252665997, iteration: 16410
loss: 1.0420845746994019,grad_norm: 0.9999993306783915, iteration: 16411
loss: 1.0877236127853394,grad_norm: 0.9999997288724735, iteration: 16412
loss: 0.9991615414619446,grad_norm: 0.9963459032743353, iteration: 16413
loss: 1.0552030801773071,grad_norm: 0.9999992757755697, iteration: 16414
loss: 1.0270023345947266,grad_norm: 0.9999999358811618, iteration: 16415
loss: 0.9764254689216614,grad_norm: 0.9999992459298553, iteration: 16416
loss: 1.0068739652633667,grad_norm: 0.9999990869632652, iteration: 16417
loss: 1.0677316188812256,grad_norm: 0.9999996172320534, iteration: 16418
loss: 1.080662727355957,grad_norm: 0.9999994282134971, iteration: 16419
loss: 1.0053439140319824,grad_norm: 0.9999990958432963, iteration: 16420
loss: 0.9486097693443298,grad_norm: 0.9999991517896517, iteration: 16421
loss: 1.0198076963424683,grad_norm: 0.9999991102078932, iteration: 16422
loss: 1.0732877254486084,grad_norm: 0.9999996622837726, iteration: 16423
loss: 1.040770411491394,grad_norm: 0.9999993031622004, iteration: 16424
loss: 1.0068053007125854,grad_norm: 0.9999991667583544, iteration: 16425
loss: 1.0066697597503662,grad_norm: 0.9999993156504138, iteration: 16426
loss: 1.0472900867462158,grad_norm: 0.9999994869949085, iteration: 16427
loss: 1.0340431928634644,grad_norm: 0.9999994985160081, iteration: 16428
loss: 1.0141478776931763,grad_norm: 0.9999991581315704, iteration: 16429
loss: 1.0455336570739746,grad_norm: 0.9999998231410525, iteration: 16430
loss: 0.9950668811798096,grad_norm: 0.9999989759349512, iteration: 16431
loss: 1.0307319164276123,grad_norm: 0.9999993376045387, iteration: 16432
loss: 1.0916489362716675,grad_norm: 0.9999995850594394, iteration: 16433
loss: 0.9992261528968811,grad_norm: 0.9973528691634462, iteration: 16434
loss: 1.016861915588379,grad_norm: 0.9999993912112025, iteration: 16435
loss: 0.9761970043182373,grad_norm: 0.9999992566927657, iteration: 16436
loss: 1.0330283641815186,grad_norm: 0.9999994416555671, iteration: 16437
loss: 1.033387303352356,grad_norm: 0.9999993043724857, iteration: 16438
loss: 1.042155146598816,grad_norm: 0.9999991777209486, iteration: 16439
loss: 1.0145262479782104,grad_norm: 0.9999992518777506, iteration: 16440
loss: 1.016574501991272,grad_norm: 0.9999993575702254, iteration: 16441
loss: 1.014278769493103,grad_norm: 0.9999991750023237, iteration: 16442
loss: 0.9990944266319275,grad_norm: 0.9999992276518344, iteration: 16443
loss: 1.017930269241333,grad_norm: 0.9999991395351464, iteration: 16444
loss: 0.9836946129798889,grad_norm: 0.9999991496876626, iteration: 16445
loss: 1.019292950630188,grad_norm: 0.9999994232074825, iteration: 16446
loss: 1.0027657747268677,grad_norm: 0.9999994523196329, iteration: 16447
loss: 1.0120558738708496,grad_norm: 0.9999992858388135, iteration: 16448
loss: 1.0445129871368408,grad_norm: 0.9999989480764149, iteration: 16449
loss: 0.9876804947853088,grad_norm: 0.9999995474263891, iteration: 16450
loss: 1.0793485641479492,grad_norm: 0.999999548258291, iteration: 16451
loss: 1.014258623123169,grad_norm: 0.9999992552130861, iteration: 16452
loss: 1.0042638778686523,grad_norm: 0.999999162753626, iteration: 16453
loss: 1.0118274688720703,grad_norm: 0.9999990309316136, iteration: 16454
loss: 1.0258052349090576,grad_norm: 0.9999994093702957, iteration: 16455
loss: 1.0035696029663086,grad_norm: 0.9999993532248396, iteration: 16456
loss: 1.0160657167434692,grad_norm: 0.99999911350355, iteration: 16457
loss: 1.0161583423614502,grad_norm: 0.9999991533966042, iteration: 16458
loss: 1.0179851055145264,grad_norm: 0.9999990056328262, iteration: 16459
loss: 1.0161137580871582,grad_norm: 0.9999992275904022, iteration: 16460
loss: 0.9962234497070312,grad_norm: 0.9999995409718885, iteration: 16461
loss: 0.9991843700408936,grad_norm: 0.9999993298020344, iteration: 16462
loss: 1.0490444898605347,grad_norm: 0.9999996255246872, iteration: 16463
loss: 1.0270167589187622,grad_norm: 0.9999994044982977, iteration: 16464
loss: 1.0582643747329712,grad_norm: 0.9999993976473232, iteration: 16465
loss: 1.0015161037445068,grad_norm: 0.9999992483731126, iteration: 16466
loss: 1.0557990074157715,grad_norm: 0.9999997093057813, iteration: 16467
loss: 1.0111372470855713,grad_norm: 0.9999992535976321, iteration: 16468
loss: 1.0195890665054321,grad_norm: 0.9999991290184557, iteration: 16469
loss: 1.0410548448562622,grad_norm: 0.9999992533632931, iteration: 16470
loss: 1.0160197019577026,grad_norm: 0.9999992842109039, iteration: 16471
loss: 1.0050426721572876,grad_norm: 0.99999925561298, iteration: 16472
loss: 1.0481796264648438,grad_norm: 0.9999992334150279, iteration: 16473
loss: 0.999748170375824,grad_norm: 0.9990508128265516, iteration: 16474
loss: 1.036814570426941,grad_norm: 0.9999990409643861, iteration: 16475
loss: 1.0239163637161255,grad_norm: 0.9999991731598619, iteration: 16476
loss: 1.0346624851226807,grad_norm: 0.9999992446152622, iteration: 16477
loss: 0.9751589298248291,grad_norm: 0.9999991456241442, iteration: 16478
loss: 1.0033190250396729,grad_norm: 0.9999991899600962, iteration: 16479
loss: 1.013023853302002,grad_norm: 0.9999992200544442, iteration: 16480
loss: 1.0714764595031738,grad_norm: 0.9999993731979886, iteration: 16481
loss: 0.9786117076873779,grad_norm: 0.9999991142469842, iteration: 16482
loss: 1.0224473476409912,grad_norm: 0.9999995872436532, iteration: 16483
loss: 0.9979463815689087,grad_norm: 0.999999069546308, iteration: 16484
loss: 1.0237900018692017,grad_norm: 0.9999991545591628, iteration: 16485
loss: 1.0779070854187012,grad_norm: 0.9999994141289701, iteration: 16486
loss: 1.013507604598999,grad_norm: 0.9999991736218188, iteration: 16487
loss: 1.0063464641571045,grad_norm: 0.9999992632675461, iteration: 16488
loss: 1.0152467489242554,grad_norm: 0.956398883389212, iteration: 16489
loss: 1.0015977621078491,grad_norm: 0.999999136121777, iteration: 16490
loss: 1.0158371925354004,grad_norm: 0.9999995568128587, iteration: 16491
loss: 1.0267053842544556,grad_norm: 0.9999990989126168, iteration: 16492
loss: 1.009103536605835,grad_norm: 0.9999991252862559, iteration: 16493
loss: 1.0331650972366333,grad_norm: 0.9999991782140452, iteration: 16494
loss: 1.0193727016448975,grad_norm: 0.9999992605493268, iteration: 16495
loss: 1.047162652015686,grad_norm: 0.9999992261716687, iteration: 16496
loss: 1.0250884294509888,grad_norm: 0.9999992048335659, iteration: 16497
loss: 0.9587612152099609,grad_norm: 0.9904234261341981, iteration: 16498
loss: 1.0130805969238281,grad_norm: 0.9948776304271908, iteration: 16499
loss: 1.0495260953903198,grad_norm: 0.9999990907317177, iteration: 16500
loss: 0.9965980648994446,grad_norm: 0.9999990961252955, iteration: 16501
loss: 0.9998175501823425,grad_norm: 0.9999991458337717, iteration: 16502
loss: 1.0269876718521118,grad_norm: 0.9999998221330876, iteration: 16503
loss: 1.0355075597763062,grad_norm: 0.9999990295741602, iteration: 16504
loss: 1.0222806930541992,grad_norm: 0.9999994635161377, iteration: 16505
loss: 1.0211626291275024,grad_norm: 0.9784850290387146, iteration: 16506
loss: 0.9948056936264038,grad_norm: 0.9999991969207235, iteration: 16507
loss: 1.043056845664978,grad_norm: 0.9999995216813767, iteration: 16508
loss: 1.010830044746399,grad_norm: 0.9999993792686881, iteration: 16509
loss: 1.0097129344940186,grad_norm: 0.9999993652940381, iteration: 16510
loss: 1.0307399034500122,grad_norm: 0.9999993707637427, iteration: 16511
loss: 1.033016562461853,grad_norm: 0.9999992820829005, iteration: 16512
loss: 1.0457326173782349,grad_norm: 0.9999994996416249, iteration: 16513
loss: 1.0073233842849731,grad_norm: 0.9999990399789731, iteration: 16514
loss: 1.05990731716156,grad_norm: 0.9999993712677555, iteration: 16515
loss: 1.0160796642303467,grad_norm: 0.9812914771279097, iteration: 16516
loss: 1.0058447122573853,grad_norm: 0.9999992075510042, iteration: 16517
loss: 1.0458248853683472,grad_norm: 0.9999996006942279, iteration: 16518
loss: 1.0243237018585205,grad_norm: 0.9999992002790221, iteration: 16519
loss: 1.0345674753189087,grad_norm: 0.9999990536387103, iteration: 16520
loss: 1.0444453954696655,grad_norm: 0.999999555949915, iteration: 16521
loss: 1.0449399948120117,grad_norm: 0.9999991554875154, iteration: 16522
loss: 1.0182976722717285,grad_norm: 0.9999990712795561, iteration: 16523
loss: 1.040434718132019,grad_norm: 0.9999992962351856, iteration: 16524
loss: 1.027910590171814,grad_norm: 0.9999991178940408, iteration: 16525
loss: 1.0237057209014893,grad_norm: 0.9476533137443987, iteration: 16526
loss: 1.0301036834716797,grad_norm: 0.9999993904674624, iteration: 16527
loss: 1.0307530164718628,grad_norm: 0.9999995967136496, iteration: 16528
loss: 1.0528463125228882,grad_norm: 0.9999990549864315, iteration: 16529
loss: 1.029760479927063,grad_norm: 0.9999992529125206, iteration: 16530
loss: 1.0491154193878174,grad_norm: 0.9999995163166181, iteration: 16531
loss: 1.0217974185943604,grad_norm: 0.9999998259732307, iteration: 16532
loss: 1.0334254503250122,grad_norm: 0.9999992021216921, iteration: 16533
loss: 1.03554105758667,grad_norm: 0.9999990610007738, iteration: 16534
loss: 1.0064078569412231,grad_norm: 0.9999991125888997, iteration: 16535
loss: 1.010124683380127,grad_norm: 0.9999997696095138, iteration: 16536
loss: 1.0572443008422852,grad_norm: 0.9999992658990067, iteration: 16537
loss: 1.0378808975219727,grad_norm: 0.9999992939339697, iteration: 16538
loss: 1.0536314249038696,grad_norm: 0.9999997667005713, iteration: 16539
loss: 1.07023024559021,grad_norm: 0.9999996544684134, iteration: 16540
loss: 1.0251080989837646,grad_norm: 0.8331126765585429, iteration: 16541
loss: 1.0988737344741821,grad_norm: 0.9999994218780291, iteration: 16542
loss: 1.045390009880066,grad_norm: 0.9999991703754894, iteration: 16543
loss: 1.0026187896728516,grad_norm: 0.9999992346739008, iteration: 16544
loss: 1.0648788213729858,grad_norm: 0.9999995831976581, iteration: 16545
loss: 0.9932147264480591,grad_norm: 0.9999990181777705, iteration: 16546
loss: 1.0259640216827393,grad_norm: 0.9999991743244403, iteration: 16547
loss: 1.0206104516983032,grad_norm: 0.9999990734560735, iteration: 16548
loss: 1.0068442821502686,grad_norm: 0.9999991226851547, iteration: 16549
loss: 1.0244892835617065,grad_norm: 0.9999998529312674, iteration: 16550
loss: 1.040487289428711,grad_norm: 0.9999992219822355, iteration: 16551
loss: 1.0418903827667236,grad_norm: 0.9999995279711063, iteration: 16552
loss: 1.0184136629104614,grad_norm: 0.9999990962836006, iteration: 16553
loss: 1.0023387670516968,grad_norm: 0.9999992981199111, iteration: 16554
loss: 0.9871277213096619,grad_norm: 0.9999991546650423, iteration: 16555
loss: 1.006728172302246,grad_norm: 0.9999991779136853, iteration: 16556
loss: 1.0394651889801025,grad_norm: 0.9915982023724228, iteration: 16557
loss: 1.0061465501785278,grad_norm: 0.9999994722634997, iteration: 16558
loss: 1.0203546285629272,grad_norm: 0.9999990401374667, iteration: 16559
loss: 1.0443668365478516,grad_norm: 0.9999992543716156, iteration: 16560
loss: 1.0213515758514404,grad_norm: 0.999999193051473, iteration: 16561
loss: 1.0405312776565552,grad_norm: 0.999999503266057, iteration: 16562
loss: 1.0256986618041992,grad_norm: 0.9999991736719568, iteration: 16563
loss: 0.9790852069854736,grad_norm: 0.9813261461087102, iteration: 16564
loss: 1.0114262104034424,grad_norm: 0.9999990347385667, iteration: 16565
loss: 1.077248215675354,grad_norm: 0.9999997469303633, iteration: 16566
loss: 1.0263490676879883,grad_norm: 0.9999992375628947, iteration: 16567
loss: 1.0130605697631836,grad_norm: 0.9999993458962201, iteration: 16568
loss: 1.0180293321609497,grad_norm: 0.9999991116819593, iteration: 16569
loss: 1.0029218196868896,grad_norm: 0.9999991277190552, iteration: 16570
loss: 1.0524667501449585,grad_norm: 0.9999991593964045, iteration: 16571
loss: 1.018536925315857,grad_norm: 0.9651187743307913, iteration: 16572
loss: 0.9912149310112,grad_norm: 0.9999991817230158, iteration: 16573
loss: 1.0730714797973633,grad_norm: 0.9999991537365411, iteration: 16574
loss: 1.0413395166397095,grad_norm: 0.999999406735709, iteration: 16575
loss: 1.1035877466201782,grad_norm: 0.9999998852668104, iteration: 16576
loss: 1.0224113464355469,grad_norm: 0.9999994112494934, iteration: 16577
loss: 1.0710082054138184,grad_norm: 0.9999992934947116, iteration: 16578
loss: 1.0591071844100952,grad_norm: 0.9999992441965269, iteration: 16579
loss: 1.0299066305160522,grad_norm: 0.9999990299271173, iteration: 16580
loss: 1.052314043045044,grad_norm: 0.999999208456294, iteration: 16581
loss: 1.0560705661773682,grad_norm: 0.9999990606384108, iteration: 16582
loss: 1.0384057760238647,grad_norm: 0.9999991600189737, iteration: 16583
loss: 1.0557197332382202,grad_norm: 0.9999996356063662, iteration: 16584
loss: 1.0057882070541382,grad_norm: 0.9999991960484151, iteration: 16585
loss: 1.0024311542510986,grad_norm: 0.9999996481167955, iteration: 16586
loss: 1.0136526823043823,grad_norm: 0.9999991077623923, iteration: 16587
loss: 1.052803874015808,grad_norm: 0.999999586724122, iteration: 16588
loss: 0.9899210929870605,grad_norm: 0.9999993939277017, iteration: 16589
loss: 1.039207100868225,grad_norm: 0.9999995333685012, iteration: 16590
loss: 0.9660437703132629,grad_norm: 0.9999990990512315, iteration: 16591
loss: 0.9843069314956665,grad_norm: 0.9999990279010722, iteration: 16592
loss: 1.0189920663833618,grad_norm: 0.9999996189145669, iteration: 16593
loss: 0.9809455275535583,grad_norm: 0.9999992494394961, iteration: 16594
loss: 1.0536634922027588,grad_norm: 0.9999995588381642, iteration: 16595
loss: 1.0233678817749023,grad_norm: 0.9999997250981993, iteration: 16596
loss: 1.0049225091934204,grad_norm: 0.999999108518428, iteration: 16597
loss: 0.9745148420333862,grad_norm: 0.9999992233659789, iteration: 16598
loss: 1.0832147598266602,grad_norm: 0.9999990465312244, iteration: 16599
loss: 1.0336997509002686,grad_norm: 0.9999993693040246, iteration: 16600
loss: 1.0437276363372803,grad_norm: 0.9999990418161888, iteration: 16601
loss: 1.0083041191101074,grad_norm: 0.9999991007438309, iteration: 16602
loss: 1.0582178831100464,grad_norm: 0.9999996759070459, iteration: 16603
loss: 0.9658528566360474,grad_norm: 0.9999991360746865, iteration: 16604
loss: 0.9701569080352783,grad_norm: 0.9999992407195295, iteration: 16605
loss: 1.0817146301269531,grad_norm: 0.9999996073781706, iteration: 16606
loss: 1.0480366945266724,grad_norm: 0.9999995988425519, iteration: 16607
loss: 1.0482009649276733,grad_norm: 0.9999992084194596, iteration: 16608
loss: 0.9755977988243103,grad_norm: 0.9999992152275207, iteration: 16609
loss: 1.036271572113037,grad_norm: 0.9999992594858451, iteration: 16610
loss: 1.037965178489685,grad_norm: 0.9999990024792216, iteration: 16611
loss: 0.9842857718467712,grad_norm: 0.9725485687301705, iteration: 16612
loss: 0.992914617061615,grad_norm: 0.9999991000215734, iteration: 16613
loss: 1.036301612854004,grad_norm: 0.9999993117286354, iteration: 16614
loss: 1.0255060195922852,grad_norm: 0.9999994785575557, iteration: 16615
loss: 1.0603392124176025,grad_norm: 0.9999994322122563, iteration: 16616
loss: 1.0360727310180664,grad_norm: 0.9999991824219702, iteration: 16617
loss: 1.0347095727920532,grad_norm: 0.9999990037664394, iteration: 16618
loss: 1.0124319791793823,grad_norm: 0.999999618213423, iteration: 16619
loss: 1.0112416744232178,grad_norm: 0.9999994402849347, iteration: 16620
loss: 1.0110594034194946,grad_norm: 0.9999989714304682, iteration: 16621
loss: 1.1245496273040771,grad_norm: 0.9999995950322783, iteration: 16622
loss: 1.0582646131515503,grad_norm: 0.9999993192849915, iteration: 16623
loss: 1.066188097000122,grad_norm: 0.9999991444795177, iteration: 16624
loss: 0.9901974201202393,grad_norm: 0.9999993245381533, iteration: 16625
loss: 1.054344654083252,grad_norm: 0.9999990994160446, iteration: 16626
loss: 1.056622862815857,grad_norm: 0.999999289104982, iteration: 16627
loss: 1.0490530729293823,grad_norm: 0.9999996492016067, iteration: 16628
loss: 1.0461196899414062,grad_norm: 0.9999993939002081, iteration: 16629
loss: 1.0145305395126343,grad_norm: 0.9999993998571161, iteration: 16630
loss: 1.024338722229004,grad_norm: 0.9850630331434178, iteration: 16631
loss: 1.0012197494506836,grad_norm: 0.9999992340046295, iteration: 16632
loss: 0.9873068928718567,grad_norm: 0.9999991159142468, iteration: 16633
loss: 1.017056941986084,grad_norm: 0.9999991646355549, iteration: 16634
loss: 1.0564895868301392,grad_norm: 0.9999995251055709, iteration: 16635
loss: 0.9831957221031189,grad_norm: 0.9999992984267728, iteration: 16636
loss: 1.034925937652588,grad_norm: 0.9999990294274268, iteration: 16637
loss: 1.0308163166046143,grad_norm: 0.9999992236115672, iteration: 16638
loss: 1.0103129148483276,grad_norm: 0.9999991970345682, iteration: 16639
loss: 1.0263547897338867,grad_norm: 0.8594668752564482, iteration: 16640
loss: 1.0160760879516602,grad_norm: 0.9999990710284135, iteration: 16641
loss: 1.0647932291030884,grad_norm: 0.9999992440452666, iteration: 16642
loss: 1.0522013902664185,grad_norm: 0.9999996092747184, iteration: 16643
loss: 1.0028879642486572,grad_norm: 0.9703653802278193, iteration: 16644
loss: 1.0123989582061768,grad_norm: 0.9999992157118135, iteration: 16645
loss: 0.9989630579948425,grad_norm: 0.9999992942442738, iteration: 16646
loss: 1.045644998550415,grad_norm: 0.9999993049074797, iteration: 16647
loss: 1.0202821493148804,grad_norm: 0.9999991655661731, iteration: 16648
loss: 1.0262054204940796,grad_norm: 0.9999994260897225, iteration: 16649
loss: 1.0401700735092163,grad_norm: 0.9999991259448368, iteration: 16650
loss: 1.022943377494812,grad_norm: 0.9301802643626409, iteration: 16651
loss: 1.0505430698394775,grad_norm: 0.9999993425605793, iteration: 16652
loss: 1.0369752645492554,grad_norm: 0.9999994954466741, iteration: 16653
loss: 1.0314973592758179,grad_norm: 0.9999990696137694, iteration: 16654
loss: 1.0067212581634521,grad_norm: 0.9999991420463274, iteration: 16655
loss: 1.0047860145568848,grad_norm: 0.9999991547649579, iteration: 16656
loss: 1.013736605644226,grad_norm: 0.9999992213252612, iteration: 16657
loss: 0.9786276817321777,grad_norm: 0.9999991737154194, iteration: 16658
loss: 1.0089704990386963,grad_norm: 0.9999990766262189, iteration: 16659
loss: 1.0085846185684204,grad_norm: 0.9999991194872524, iteration: 16660
loss: 1.0660570859909058,grad_norm: 0.9999996335185163, iteration: 16661
loss: 1.0662815570831299,grad_norm: 0.9999991603487483, iteration: 16662
loss: 1.061628818511963,grad_norm: 0.9999992085170031, iteration: 16663
loss: 1.0373443365097046,grad_norm: 0.9999989757562344, iteration: 16664
loss: 1.0087971687316895,grad_norm: 0.9999994505807128, iteration: 16665
loss: 0.9856942296028137,grad_norm: 0.999999135760778, iteration: 16666
loss: 1.0043610334396362,grad_norm: 0.9999992942294005, iteration: 16667
loss: 1.0174472332000732,grad_norm: 0.9999992851878052, iteration: 16668
loss: 0.9973745942115784,grad_norm: 0.9822999120739211, iteration: 16669
loss: 1.0229370594024658,grad_norm: 0.9999993017655951, iteration: 16670
loss: 1.0233983993530273,grad_norm: 0.9999991150868185, iteration: 16671
loss: 1.0576213598251343,grad_norm: 0.9999995087797746, iteration: 16672
loss: 1.049914836883545,grad_norm: 0.9999993499406198, iteration: 16673
loss: 1.037393569946289,grad_norm: 0.9999992792561544, iteration: 16674
loss: 1.0426124334335327,grad_norm: 0.9999993690359172, iteration: 16675
loss: 1.0331138372421265,grad_norm: 0.890936550458045, iteration: 16676
loss: 1.031894564628601,grad_norm: 0.9999996250665589, iteration: 16677
loss: 1.017940640449524,grad_norm: 0.9999990889877405, iteration: 16678
loss: 0.9989432096481323,grad_norm: 0.9999990657652478, iteration: 16679
loss: 1.031314730644226,grad_norm: 0.9999990803367172, iteration: 16680
loss: 1.012403964996338,grad_norm: 0.9999992120442682, iteration: 16681
loss: 1.0026509761810303,grad_norm: 0.9999992031771385, iteration: 16682
loss: 1.0399587154388428,grad_norm: 0.9999994650266247, iteration: 16683
loss: 0.9957138895988464,grad_norm: 0.9999993387044113, iteration: 16684
loss: 1.0297852754592896,grad_norm: 0.9999991867931058, iteration: 16685
loss: 1.0083199739456177,grad_norm: 0.9999991713529073, iteration: 16686
loss: 0.9837775826454163,grad_norm: 0.9902587267020232, iteration: 16687
loss: 0.962791919708252,grad_norm: 0.9808060306467562, iteration: 16688
loss: 0.973070502281189,grad_norm: 0.9999992066017642, iteration: 16689
loss: 0.9859437346458435,grad_norm: 0.9999994943854638, iteration: 16690
loss: 1.0217700004577637,grad_norm: 0.9999992323186025, iteration: 16691
loss: 0.9962702393531799,grad_norm: 0.9999993606579987, iteration: 16692
loss: 0.9946159720420837,grad_norm: 0.9985386004099157, iteration: 16693
loss: 1.0399599075317383,grad_norm: 0.9999992251071701, iteration: 16694
loss: 1.0168782472610474,grad_norm: 0.9957631918371785, iteration: 16695
loss: 1.0357242822647095,grad_norm: 0.9999991869728052, iteration: 16696
loss: 0.971997857093811,grad_norm: 0.9999990136066768, iteration: 16697
loss: 1.0645571947097778,grad_norm: 0.9999996242118421, iteration: 16698
loss: 1.0363959074020386,grad_norm: 0.9395085337737729, iteration: 16699
loss: 1.0607812404632568,grad_norm: 0.9999993290052098, iteration: 16700
loss: 1.0089075565338135,grad_norm: 0.999999090262246, iteration: 16701
loss: 0.9821243286132812,grad_norm: 0.9999991682657235, iteration: 16702
loss: 1.004483938217163,grad_norm: 0.9444976055920336, iteration: 16703
loss: 1.0121768712997437,grad_norm: 0.9999990789229206, iteration: 16704
loss: 1.0369418859481812,grad_norm: 0.960879137625265, iteration: 16705
loss: 0.9900672435760498,grad_norm: 0.9999990948128934, iteration: 16706
loss: 1.006307601928711,grad_norm: 0.9999992531991235, iteration: 16707
loss: 1.0208325386047363,grad_norm: 0.999999057789902, iteration: 16708
loss: 1.0404115915298462,grad_norm: 0.9999994315784337, iteration: 16709
loss: 1.0032514333724976,grad_norm: 0.9999993548081825, iteration: 16710
loss: 1.0082957744598389,grad_norm: 0.999999593590994, iteration: 16711
loss: 0.9804627895355225,grad_norm: 0.9999991750426965, iteration: 16712
loss: 1.0219515562057495,grad_norm: 0.9999993094427124, iteration: 16713
loss: 0.9850201606750488,grad_norm: 0.9999989999231772, iteration: 16714
loss: 0.9881200790405273,grad_norm: 0.9999990982355869, iteration: 16715
loss: 1.0204014778137207,grad_norm: 0.9944235350687768, iteration: 16716
loss: 1.025175929069519,grad_norm: 0.9999993548795328, iteration: 16717
loss: 0.9919242262840271,grad_norm: 0.8987564225886688, iteration: 16718
loss: 1.0211350917816162,grad_norm: 0.9566242868207143, iteration: 16719
loss: 1.0469813346862793,grad_norm: 0.999999122532405, iteration: 16720
loss: 1.021852731704712,grad_norm: 0.99999909701122, iteration: 16721
loss: 1.0320942401885986,grad_norm: 0.9999991383712269, iteration: 16722
loss: 1.0319397449493408,grad_norm: 0.9895790029529054, iteration: 16723
loss: 1.0214905738830566,grad_norm: 0.9999988726389287, iteration: 16724
loss: 1.018062710762024,grad_norm: 0.9999990165727616, iteration: 16725
loss: 1.0022594928741455,grad_norm: 0.99999921665872, iteration: 16726
loss: 0.9827149510383606,grad_norm: 0.999999293495714, iteration: 16727
loss: 0.9543966054916382,grad_norm: 0.9999990215968038, iteration: 16728
loss: 0.9702239632606506,grad_norm: 0.9226585616898767, iteration: 16729
loss: 1.0181012153625488,grad_norm: 0.9999991394957395, iteration: 16730
loss: 1.014054536819458,grad_norm: 0.9999991411417992, iteration: 16731
loss: 1.0602319240570068,grad_norm: 0.9999992313889164, iteration: 16732
loss: 0.9931634664535522,grad_norm: 0.9999990655112628, iteration: 16733
loss: 0.9873604774475098,grad_norm: 0.9172212179262876, iteration: 16734
loss: 1.0133872032165527,grad_norm: 0.9999992679646965, iteration: 16735
loss: 1.0195884704589844,grad_norm: 0.9999990979634308, iteration: 16736
loss: 1.019029974937439,grad_norm: 0.9999991805919073, iteration: 16737
loss: 1.0081795454025269,grad_norm: 0.9944359085757882, iteration: 16738
loss: 1.035620927810669,grad_norm: 0.9999994939079946, iteration: 16739
loss: 1.0344706773757935,grad_norm: 0.9999991999532645, iteration: 16740
loss: 1.016865611076355,grad_norm: 0.9999992025133786, iteration: 16741
loss: 1.0346567630767822,grad_norm: 0.9999991028645168, iteration: 16742
loss: 0.9976109862327576,grad_norm: 0.9999991246385646, iteration: 16743
loss: 1.0575003623962402,grad_norm: 0.999999379309664, iteration: 16744
loss: 0.9735338091850281,grad_norm: 0.9999990381234956, iteration: 16745
loss: 1.063857078552246,grad_norm: 0.9999996212521214, iteration: 16746
loss: 0.9555370807647705,grad_norm: 0.999999126956152, iteration: 16747
loss: 1.0052061080932617,grad_norm: 0.9993380181492135, iteration: 16748
loss: 1.044528841972351,grad_norm: 0.9999989868765988, iteration: 16749
loss: 1.036136269569397,grad_norm: 0.999999335145205, iteration: 16750
loss: 1.0221846103668213,grad_norm: 0.9999991705049355, iteration: 16751
loss: 1.0349622964859009,grad_norm: 0.8952020892138107, iteration: 16752
loss: 1.0876401662826538,grad_norm: 0.9999997876683382, iteration: 16753
loss: 1.0318716764450073,grad_norm: 0.9999991249723997, iteration: 16754
loss: 1.0370715856552124,grad_norm: 0.9999990465303779, iteration: 16755
loss: 1.0166908502578735,grad_norm: 0.9999992034638465, iteration: 16756
loss: 1.0218753814697266,grad_norm: 0.9999991070107711, iteration: 16757
loss: 0.9966574311256409,grad_norm: 0.9999990930796222, iteration: 16758
loss: 0.9931600689888,grad_norm: 0.9999991720435211, iteration: 16759
loss: 1.037365436553955,grad_norm: 0.9218836380886096, iteration: 16760
loss: 1.0409787893295288,grad_norm: 0.9999997002804404, iteration: 16761
loss: 1.0467995405197144,grad_norm: 0.9999994066621043, iteration: 16762
loss: 1.04307222366333,grad_norm: 0.9999991079545671, iteration: 16763
loss: 1.0179328918457031,grad_norm: 0.9999993273813162, iteration: 16764
loss: 1.0343526601791382,grad_norm: 0.9376678366310885, iteration: 16765
loss: 1.0059560537338257,grad_norm: 0.9999990180299356, iteration: 16766
loss: 0.959642231464386,grad_norm: 0.9999988912591157, iteration: 16767
loss: 1.0080270767211914,grad_norm: 0.9999991760907486, iteration: 16768
loss: 1.0323987007141113,grad_norm: 0.99999930264467, iteration: 16769
loss: 1.0362433195114136,grad_norm: 0.9809943849232386, iteration: 16770
loss: 1.0008666515350342,grad_norm: 0.9999990844731862, iteration: 16771
loss: 1.0127158164978027,grad_norm: 0.9999990335397506, iteration: 16772
loss: 1.028907299041748,grad_norm: 0.9712504850893902, iteration: 16773
loss: 1.0242434740066528,grad_norm: 0.9999991328785659, iteration: 16774
loss: 1.0292927026748657,grad_norm: 0.9999991970100353, iteration: 16775
loss: 1.0058733224868774,grad_norm: 0.9999993963994273, iteration: 16776
loss: 1.0655944347381592,grad_norm: 0.9999992740163198, iteration: 16777
loss: 0.9992613196372986,grad_norm: 0.999999081863287, iteration: 16778
loss: 1.0245563983917236,grad_norm: 0.9271658735430217, iteration: 16779
loss: 1.0036860704421997,grad_norm: 0.9965206931739397, iteration: 16780
loss: 1.001261830329895,grad_norm: 0.9999993635709808, iteration: 16781
loss: 1.0037941932678223,grad_norm: 0.9999991816040757, iteration: 16782
loss: 1.008845567703247,grad_norm: 0.9999990380943563, iteration: 16783
loss: 1.0038588047027588,grad_norm: 0.9999994022273493, iteration: 16784
loss: 1.0474231243133545,grad_norm: 0.98390115182825, iteration: 16785
loss: 1.0239813327789307,grad_norm: 0.9999991509502228, iteration: 16786
loss: 0.9896238446235657,grad_norm: 0.9999992978662099, iteration: 16787
loss: 1.0185917615890503,grad_norm: 0.9999991107961379, iteration: 16788
loss: 1.048766016960144,grad_norm: 0.9414999682011569, iteration: 16789
loss: 1.0764110088348389,grad_norm: 0.9999995683075495, iteration: 16790
loss: 0.9472512602806091,grad_norm: 0.9831681859562096, iteration: 16791
loss: 0.9935207962989807,grad_norm: 0.99999915995976, iteration: 16792
loss: 0.9935031533241272,grad_norm: 0.9999990583716282, iteration: 16793
loss: 1.0597285032272339,grad_norm: 0.9999993424629264, iteration: 16794
loss: 1.0190277099609375,grad_norm: 0.9999992143122163, iteration: 16795
loss: 1.018599510192871,grad_norm: 0.9999991328767984, iteration: 16796
loss: 1.0217843055725098,grad_norm: 0.9999991219018911, iteration: 16797
loss: 0.9947856068611145,grad_norm: 0.9999991820217078, iteration: 16798
loss: 1.023835301399231,grad_norm: 0.999999350130915, iteration: 16799
loss: 1.0397069454193115,grad_norm: 0.9999991246261598, iteration: 16800
loss: 1.068472146987915,grad_norm: 0.9999996989211933, iteration: 16801
loss: 1.0405644178390503,grad_norm: 0.9999992569315141, iteration: 16802
loss: 1.0532888174057007,grad_norm: 0.9999996136649466, iteration: 16803
loss: 1.1075385808944702,grad_norm: 0.9999996167754136, iteration: 16804
loss: 0.9932122826576233,grad_norm: 0.9999989729029293, iteration: 16805
loss: 1.0512326955795288,grad_norm: 0.9999993397661948, iteration: 16806
loss: 1.0504753589630127,grad_norm: 0.9172851914453498, iteration: 16807
loss: 1.042095422744751,grad_norm: 0.9999993101733161, iteration: 16808
loss: 1.0012495517730713,grad_norm: 0.9999990398475295, iteration: 16809
loss: 1.0245795249938965,grad_norm: 0.9999990360844911, iteration: 16810
loss: 1.0329393148422241,grad_norm: 0.9999991417581304, iteration: 16811
loss: 1.0031179189682007,grad_norm: 0.9757904359028355, iteration: 16812
loss: 1.0521055459976196,grad_norm: 0.9999991672405273, iteration: 16813
loss: 1.02849280834198,grad_norm: 0.9999991460000403, iteration: 16814
loss: 0.9986891150474548,grad_norm: 0.9999992784491125, iteration: 16815
loss: 1.0311031341552734,grad_norm: 0.9701962471625628, iteration: 16816
loss: 1.033219575881958,grad_norm: 0.9999993307427198, iteration: 16817
loss: 1.0184204578399658,grad_norm: 0.9999991017599412, iteration: 16818
loss: 1.0143404006958008,grad_norm: 0.9999990686133938, iteration: 16819
loss: 1.0348924398422241,grad_norm: 0.9999991708632766, iteration: 16820
loss: 1.0021625757217407,grad_norm: 0.9999993966635703, iteration: 16821
loss: 1.0134952068328857,grad_norm: 0.9999990055172829, iteration: 16822
loss: 0.9910595417022705,grad_norm: 0.9739704879734368, iteration: 16823
loss: 1.0349302291870117,grad_norm: 0.9328606784996049, iteration: 16824
loss: 1.0181622505187988,grad_norm: 0.9999991100684071, iteration: 16825
loss: 1.0084223747253418,grad_norm: 0.9999989539407322, iteration: 16826
loss: 0.9940147995948792,grad_norm: 0.9999993544449183, iteration: 16827
loss: 1.0387765169143677,grad_norm: 0.9999992565312804, iteration: 16828
loss: 0.9963268041610718,grad_norm: 0.9999993175769772, iteration: 16829
loss: 1.0334588289260864,grad_norm: 0.9999992945915197, iteration: 16830
loss: 1.0517053604125977,grad_norm: 0.9999992719429448, iteration: 16831
loss: 1.0329488515853882,grad_norm: 0.9999993053094858, iteration: 16832
loss: 1.029618263244629,grad_norm: 0.999999189171682, iteration: 16833
loss: 1.0039308071136475,grad_norm: 0.9999992164512119, iteration: 16834
loss: 1.047995686531067,grad_norm: 0.9999994376331502, iteration: 16835
loss: 0.998778760433197,grad_norm: 0.9999991891028626, iteration: 16836
loss: 1.0285896062850952,grad_norm: 0.9999991813168064, iteration: 16837
loss: 0.9915291666984558,grad_norm: 0.9481316776766784, iteration: 16838
loss: 1.0086508989334106,grad_norm: 0.9912399452023961, iteration: 16839
loss: 1.0388717651367188,grad_norm: 0.9999992216555356, iteration: 16840
loss: 1.031969666481018,grad_norm: 0.9999992325322504, iteration: 16841
loss: 1.0444523096084595,grad_norm: 0.9825107308659666, iteration: 16842
loss: 1.0154783725738525,grad_norm: 0.9706885973900953, iteration: 16843
loss: 1.015491247177124,grad_norm: 0.9999990690336198, iteration: 16844
loss: 1.0607945919036865,grad_norm: 0.9999992285724257, iteration: 16845
loss: 0.9878376722335815,grad_norm: 0.9999993235514595, iteration: 16846
loss: 1.048629879951477,grad_norm: 0.9999991960434509, iteration: 16847
loss: 1.039355754852295,grad_norm: 0.9999991127458432, iteration: 16848
loss: 1.018741488456726,grad_norm: 0.9999991625103062, iteration: 16849
loss: 0.9835348129272461,grad_norm: 0.999999065599516, iteration: 16850
loss: 1.0426942110061646,grad_norm: 0.9999991579788552, iteration: 16851
loss: 0.9447304010391235,grad_norm: 0.9999991989967778, iteration: 16852
loss: 1.0107234716415405,grad_norm: 0.9999992548214467, iteration: 16853
loss: 1.0599595308303833,grad_norm: 0.9999992083667391, iteration: 16854
loss: 0.9649490714073181,grad_norm: 0.9999991253330057, iteration: 16855
loss: 1.0185858011245728,grad_norm: 0.9999990149670709, iteration: 16856
loss: 1.0388226509094238,grad_norm: 0.9999991252065283, iteration: 16857
loss: 1.0394030809402466,grad_norm: 0.9999992038336306, iteration: 16858
loss: 1.0434845685958862,grad_norm: 0.9999991845482593, iteration: 16859
loss: 0.9887909293174744,grad_norm: 0.9999992460927707, iteration: 16860
loss: 0.996157705783844,grad_norm: 0.9999992181063218, iteration: 16861
loss: 1.0262222290039062,grad_norm: 0.9999990455192678, iteration: 16862
loss: 1.0335109233856201,grad_norm: 0.9999992032011895, iteration: 16863
loss: 1.009528636932373,grad_norm: 0.9999991745753611, iteration: 16864
loss: 1.0223674774169922,grad_norm: 0.9999992312961813, iteration: 16865
loss: 1.0663484334945679,grad_norm: 0.9999991728988561, iteration: 16866
loss: 0.9874945878982544,grad_norm: 0.9999989927068826, iteration: 16867
loss: 1.0091657638549805,grad_norm: 0.999999232929101, iteration: 16868
loss: 1.0521589517593384,grad_norm: 0.9999991136805024, iteration: 16869
loss: 1.0340884923934937,grad_norm: 0.999999181807664, iteration: 16870
loss: 0.982041597366333,grad_norm: 0.9999993974673206, iteration: 16871
loss: 0.9982673525810242,grad_norm: 0.9999991523962366, iteration: 16872
loss: 1.0172427892684937,grad_norm: 0.9625062567264849, iteration: 16873
loss: 1.0353143215179443,grad_norm: 0.9999993002114739, iteration: 16874
loss: 1.0037859678268433,grad_norm: 0.9999990328773881, iteration: 16875
loss: 1.0303820371627808,grad_norm: 0.9999992094724397, iteration: 16876
loss: 1.0056661367416382,grad_norm: 0.9999990411466415, iteration: 16877
loss: 1.0004962682724,grad_norm: 0.9999990604484044, iteration: 16878
loss: 1.0334409475326538,grad_norm: 0.9999991840965832, iteration: 16879
loss: 0.9965313673019409,grad_norm: 0.9999991763283342, iteration: 16880
loss: 1.0154306888580322,grad_norm: 0.9602103849283979, iteration: 16881
loss: 0.9997645616531372,grad_norm: 0.9999990610678954, iteration: 16882
loss: 0.9599767923355103,grad_norm: 0.9999990901120485, iteration: 16883
loss: 1.0122756958007812,grad_norm: 0.9999992273026945, iteration: 16884
loss: 1.0122267007827759,grad_norm: 0.9999990957535736, iteration: 16885
loss: 1.000876545906067,grad_norm: 0.9999991705634642, iteration: 16886
loss: 1.0589154958724976,grad_norm: 0.9999997050592034, iteration: 16887
loss: 1.0461536645889282,grad_norm: 0.9999990541637046, iteration: 16888
loss: 0.9794454574584961,grad_norm: 0.9999993202324147, iteration: 16889
loss: 1.0439451932907104,grad_norm: 0.9999989630855484, iteration: 16890
loss: 1.006028652191162,grad_norm: 0.9999992713338187, iteration: 16891
loss: 0.99422287940979,grad_norm: 0.8748500643034558, iteration: 16892
loss: 1.0197874307632446,grad_norm: 0.9201965959949412, iteration: 16893
loss: 1.0514887571334839,grad_norm: 0.999999391799958, iteration: 16894
loss: 1.0136021375656128,grad_norm: 0.9999992178557111, iteration: 16895
loss: 0.9554791450500488,grad_norm: 0.9999990920363216, iteration: 16896
loss: 1.0159573554992676,grad_norm: 0.9999991196425806, iteration: 16897
loss: 1.0610899925231934,grad_norm: 0.9999994454092842, iteration: 16898
loss: 0.9691081643104553,grad_norm: 0.9999993060490527, iteration: 16899
loss: 1.0237945318222046,grad_norm: 0.9999992276879173, iteration: 16900
loss: 1.0096162557601929,grad_norm: 0.8818729894007862, iteration: 16901
loss: 1.0022929906845093,grad_norm: 0.9999993782630855, iteration: 16902
loss: 1.0380504131317139,grad_norm: 0.9999990464740295, iteration: 16903
loss: 1.0210963487625122,grad_norm: 0.9999991464978751, iteration: 16904
loss: 0.96481853723526,grad_norm: 0.9999993575919264, iteration: 16905
loss: 0.962879478931427,grad_norm: 0.9999993178118072, iteration: 16906
loss: 1.0459415912628174,grad_norm: 0.9999991839326717, iteration: 16907
loss: 0.9629474878311157,grad_norm: 0.9999990027968414, iteration: 16908
loss: 1.0308101177215576,grad_norm: 0.9999991226086987, iteration: 16909
loss: 1.0316907167434692,grad_norm: 0.999999114989648, iteration: 16910
loss: 1.0426089763641357,grad_norm: 0.9642764311000751, iteration: 16911
loss: 1.0591473579406738,grad_norm: 0.999999424875806, iteration: 16912
loss: 1.0373162031173706,grad_norm: 0.9999991068673714, iteration: 16913
loss: 1.0255638360977173,grad_norm: 0.9999990312113615, iteration: 16914
loss: 1.0279514789581299,grad_norm: 0.9999991285606898, iteration: 16915
loss: 1.0011037588119507,grad_norm: 0.9999991285638404, iteration: 16916
loss: 1.0098851919174194,grad_norm: 0.9999990951962711, iteration: 16917
loss: 1.013128399848938,grad_norm: 0.9999989931881921, iteration: 16918
loss: 1.0298163890838623,grad_norm: 0.9999991296545189, iteration: 16919
loss: 0.9959198236465454,grad_norm: 0.9999993033092405, iteration: 16920
loss: 1.0095359086990356,grad_norm: 0.9999992967316814, iteration: 16921
loss: 1.0219223499298096,grad_norm: 0.9999991845233169, iteration: 16922
loss: 1.006746768951416,grad_norm: 0.9999990893879728, iteration: 16923
loss: 1.0166616439819336,grad_norm: 0.9999990970293503, iteration: 16924
loss: 0.9949175715446472,grad_norm: 0.9999991750886371, iteration: 16925
loss: 1.0719083547592163,grad_norm: 0.9999993689470497, iteration: 16926
loss: 1.0246837139129639,grad_norm: 0.9999992285352235, iteration: 16927
loss: 0.9999367594718933,grad_norm: 0.9999991177564336, iteration: 16928
loss: 1.002580165863037,grad_norm: 0.9999990024929154, iteration: 16929
loss: 1.0064669847488403,grad_norm: 0.9526167452458555, iteration: 16930
loss: 1.0443137884140015,grad_norm: 0.9999996711836, iteration: 16931
loss: 1.0596734285354614,grad_norm: 0.9397178740178204, iteration: 16932
loss: 0.9988620281219482,grad_norm: 0.925287913014146, iteration: 16933
loss: 1.0167548656463623,grad_norm: 0.970397330358572, iteration: 16934
loss: 1.010984182357788,grad_norm: 0.9999989716449451, iteration: 16935
loss: 1.0186175107955933,grad_norm: 0.9999995969586764, iteration: 16936
loss: 0.9531911611557007,grad_norm: 0.9999990182632712, iteration: 16937
loss: 0.9556930065155029,grad_norm: 0.9999991678436221, iteration: 16938
loss: 1.0442854166030884,grad_norm: 0.9999991159200959, iteration: 16939
loss: 0.9681174159049988,grad_norm: 0.9999991093290163, iteration: 16940
loss: 1.0120773315429688,grad_norm: 0.9999991278116782, iteration: 16941
loss: 1.013676643371582,grad_norm: 0.999999043416274, iteration: 16942
loss: 1.0778611898422241,grad_norm: 0.999999200336582, iteration: 16943
loss: 1.0478922128677368,grad_norm: 0.9999991570957542, iteration: 16944
loss: 1.0269453525543213,grad_norm: 0.971991957256696, iteration: 16945
loss: 0.9947859048843384,grad_norm: 0.9999991515297438, iteration: 16946
loss: 0.9815089702606201,grad_norm: 0.9999991308949733, iteration: 16947
loss: 1.042766809463501,grad_norm: 0.9675602961800428, iteration: 16948
loss: 0.9989224672317505,grad_norm: 0.9425895391048779, iteration: 16949
loss: 1.0206356048583984,grad_norm: 0.9999993052460316, iteration: 16950
loss: 1.0001740455627441,grad_norm: 0.9999991250165988, iteration: 16951
loss: 1.053900122642517,grad_norm: 0.9999992835385475, iteration: 16952
loss: 0.9992697834968567,grad_norm: 0.9999993105107361, iteration: 16953
loss: 1.0232270956039429,grad_norm: 0.9999993706558792, iteration: 16954
loss: 1.031788945198059,grad_norm: 0.9999995337224383, iteration: 16955
loss: 1.020167350769043,grad_norm: 0.9999991498858143, iteration: 16956
loss: 1.0734837055206299,grad_norm: 0.9999994153028255, iteration: 16957
loss: 1.019500970840454,grad_norm: 0.9999990439418004, iteration: 16958
loss: 1.0233861207962036,grad_norm: 0.9999991696405703, iteration: 16959
loss: 1.0230283737182617,grad_norm: 0.9999993674789306, iteration: 16960
loss: 1.04473078250885,grad_norm: 0.9999991381960082, iteration: 16961
loss: 0.9924977421760559,grad_norm: 0.9999992177809427, iteration: 16962
loss: 1.0252858400344849,grad_norm: 0.999999619574197, iteration: 16963
loss: 1.0301307439804077,grad_norm: 0.9999992520997394, iteration: 16964
loss: 0.9991700053215027,grad_norm: 0.9999992866854271, iteration: 16965
loss: 0.9662672877311707,grad_norm: 0.9999990785403086, iteration: 16966
loss: 1.0278784036636353,grad_norm: 0.893943649222023, iteration: 16967
loss: 1.0220677852630615,grad_norm: 0.9999991677881168, iteration: 16968
loss: 1.060606837272644,grad_norm: 0.9999998011909078, iteration: 16969
loss: 1.0485143661499023,grad_norm: 0.9999990763537535, iteration: 16970
loss: 1.0092331171035767,grad_norm: 0.9999991281000732, iteration: 16971
loss: 0.9946413040161133,grad_norm: 0.9999992812944977, iteration: 16972
loss: 1.0217422246932983,grad_norm: 0.9999989930745176, iteration: 16973
loss: 1.0300685167312622,grad_norm: 0.9999991772504206, iteration: 16974
loss: 1.0329515933990479,grad_norm: 0.9999992800927118, iteration: 16975
loss: 1.015235424041748,grad_norm: 0.9999991153319827, iteration: 16976
loss: 1.0232081413269043,grad_norm: 0.9999989830218867, iteration: 16977
loss: 1.0603184700012207,grad_norm: 0.9999993601138024, iteration: 16978
loss: 0.9941856861114502,grad_norm: 0.9999990325888141, iteration: 16979
loss: 1.0539642572402954,grad_norm: 0.9999990049158151, iteration: 16980
loss: 0.9955464005470276,grad_norm: 0.9899551776076261, iteration: 16981
loss: 0.9997609257698059,grad_norm: 0.9999993224586748, iteration: 16982
loss: 1.0204716920852661,grad_norm: 0.9999990947424228, iteration: 16983
loss: 1.004460096359253,grad_norm: 0.999999341939702, iteration: 16984
loss: 1.0256989002227783,grad_norm: 0.9999992008239761, iteration: 16985
loss: 1.024102807044983,grad_norm: 0.9999992870124697, iteration: 16986
loss: 1.0231202840805054,grad_norm: 0.9999991658685354, iteration: 16987
loss: 1.0235333442687988,grad_norm: 0.9999994759845009, iteration: 16988
loss: 1.0185104608535767,grad_norm: 0.999999162671266, iteration: 16989
loss: 1.0097830295562744,grad_norm: 0.9999991639195074, iteration: 16990
loss: 1.0199397802352905,grad_norm: 0.9999991448595401, iteration: 16991
loss: 1.0237146615982056,grad_norm: 0.9999990470863241, iteration: 16992
loss: 1.0135655403137207,grad_norm: 0.999999181591627, iteration: 16993
loss: 1.008933663368225,grad_norm: 0.9999994670678876, iteration: 16994
loss: 1.0290967226028442,grad_norm: 0.9999992858034147, iteration: 16995
loss: 0.9753596186637878,grad_norm: 0.9999992775054263, iteration: 16996
loss: 0.9983863234519958,grad_norm: 0.9999990374398668, iteration: 16997
loss: 1.0219725370407104,grad_norm: 0.9999990807243678, iteration: 16998
loss: 1.0297565460205078,grad_norm: 0.9999992885061888, iteration: 16999
loss: 1.0150893926620483,grad_norm: 0.9999991657177535, iteration: 17000
loss: 1.0020496845245361,grad_norm: 0.8310659758037563, iteration: 17001
loss: 1.033249020576477,grad_norm: 0.9999992339667928, iteration: 17002
loss: 1.0300835371017456,grad_norm: 0.9999995031378041, iteration: 17003
loss: 1.0236825942993164,grad_norm: 0.9999990918811416, iteration: 17004
loss: 1.019068956375122,grad_norm: 0.9999993899598955, iteration: 17005
loss: 1.0057636499404907,grad_norm: 0.9999993889804636, iteration: 17006
loss: 0.9849124550819397,grad_norm: 0.8995509327108195, iteration: 17007
loss: 0.9943872690200806,grad_norm: 0.9999991553980018, iteration: 17008
loss: 1.0187715291976929,grad_norm: 0.9999990508807143, iteration: 17009
loss: 1.0236998796463013,grad_norm: 0.9999990679593627, iteration: 17010
loss: 0.9973012804985046,grad_norm: 0.9999990697689615, iteration: 17011
loss: 1.0291627645492554,grad_norm: 0.9999998123350196, iteration: 17012
loss: 1.0257147550582886,grad_norm: 0.9999991123632673, iteration: 17013
loss: 0.9880505800247192,grad_norm: 0.9999991860888826, iteration: 17014
loss: 1.0447990894317627,grad_norm: 0.9999991055234153, iteration: 17015
loss: 1.03462815284729,grad_norm: 0.9552748039925472, iteration: 17016
loss: 1.0141912698745728,grad_norm: 0.999999243515724, iteration: 17017
loss: 1.0177624225616455,grad_norm: 0.9999990174151229, iteration: 17018
loss: 1.0066155195236206,grad_norm: 0.9999992142384867, iteration: 17019
loss: 1.137795329093933,grad_norm: 0.9999999028743242, iteration: 17020
loss: 1.029768466949463,grad_norm: 0.9999990041814948, iteration: 17021
loss: 1.0836141109466553,grad_norm: 0.9999992054196281, iteration: 17022
loss: 1.0302023887634277,grad_norm: 0.9999990154191012, iteration: 17023
loss: 1.0043952465057373,grad_norm: 0.9320328527000451, iteration: 17024
loss: 1.033098816871643,grad_norm: 0.9999992646550854, iteration: 17025
loss: 1.015568494796753,grad_norm: 0.9999992111822329, iteration: 17026
loss: 0.9881424903869629,grad_norm: 0.9999990884188774, iteration: 17027
loss: 1.0064064264297485,grad_norm: 0.9200554796407754, iteration: 17028
loss: 0.9993755221366882,grad_norm: 0.9954373352511052, iteration: 17029
loss: 0.9937890768051147,grad_norm: 0.9997844919058567, iteration: 17030
loss: 1.0221142768859863,grad_norm: 0.8386538329279766, iteration: 17031
loss: 1.0041435956954956,grad_norm: 0.9999992043184697, iteration: 17032
loss: 1.0238451957702637,grad_norm: 0.9999995248282095, iteration: 17033
loss: 1.034769892692566,grad_norm: 0.9999992270566284, iteration: 17034
loss: 0.9889162182807922,grad_norm: 0.9999992834614894, iteration: 17035
loss: 0.9964644312858582,grad_norm: 0.9999991979122795, iteration: 17036
loss: 1.0127959251403809,grad_norm: 0.9999989699127624, iteration: 17037
loss: 1.0640884637832642,grad_norm: 0.9999990856558234, iteration: 17038
loss: 0.9859588146209717,grad_norm: 0.9999991029061327, iteration: 17039
loss: 1.010695457458496,grad_norm: 0.9999990678677748, iteration: 17040
loss: 1.0205644369125366,grad_norm: 0.9999990658086776, iteration: 17041
loss: 0.9961872696876526,grad_norm: 0.9999993749060394, iteration: 17042
loss: 1.0287284851074219,grad_norm: 0.9838269359137608, iteration: 17043
loss: 1.0065407752990723,grad_norm: 0.9999997055779014, iteration: 17044
loss: 0.9819241166114807,grad_norm: 0.9999993175364289, iteration: 17045
loss: 0.9893801212310791,grad_norm: 0.9999991245581977, iteration: 17046
loss: 1.0130479335784912,grad_norm: 0.999999237444523, iteration: 17047
loss: 0.9823997020721436,grad_norm: 0.9999992405348951, iteration: 17048
loss: 1.0716670751571655,grad_norm: 0.9999991547528236, iteration: 17049
loss: 0.9934806227684021,grad_norm: 0.9999990644607919, iteration: 17050
loss: 1.0111242532730103,grad_norm: 0.999999260058976, iteration: 17051
loss: 1.0319403409957886,grad_norm: 0.9999993553526807, iteration: 17052
loss: 0.9741964340209961,grad_norm: 0.8666017264267096, iteration: 17053
loss: 1.0611600875854492,grad_norm: 0.9199356652022164, iteration: 17054
loss: 1.0536503791809082,grad_norm: 0.9999990756919104, iteration: 17055
loss: 1.0133522748947144,grad_norm: 0.9085467852538743, iteration: 17056
loss: 1.057422399520874,grad_norm: 0.9781294667087798, iteration: 17057
loss: 0.9778905510902405,grad_norm: 0.999999154445306, iteration: 17058
loss: 1.0237195491790771,grad_norm: 0.999999480985029, iteration: 17059
loss: 0.9910156726837158,grad_norm: 0.9999989059215058, iteration: 17060
loss: 1.0194165706634521,grad_norm: 0.9999994030556951, iteration: 17061
loss: 1.0214427709579468,grad_norm: 0.9999991940365173, iteration: 17062
loss: 1.0197972059249878,grad_norm: 0.9999992354762991, iteration: 17063
loss: 0.9975472092628479,grad_norm: 0.9996702803784235, iteration: 17064
loss: 0.9802671670913696,grad_norm: 0.9999990742311521, iteration: 17065
loss: 0.9921162128448486,grad_norm: 0.9999991786789002, iteration: 17066
loss: 0.9983236789703369,grad_norm: 0.8344237612355669, iteration: 17067
loss: 1.103318214416504,grad_norm: 0.9999993691961495, iteration: 17068
loss: 1.0229262113571167,grad_norm: 0.999999211414371, iteration: 17069
loss: 1.0035616159439087,grad_norm: 0.9999991004598353, iteration: 17070
loss: 0.9850097298622131,grad_norm: 0.9999991698511772, iteration: 17071
loss: 0.9594858288764954,grad_norm: 0.9872543182697726, iteration: 17072
loss: 1.0112966299057007,grad_norm: 0.999999233765755, iteration: 17073
loss: 1.0014543533325195,grad_norm: 0.9999992563469915, iteration: 17074
loss: 1.0313482284545898,grad_norm: 0.9999995712499852, iteration: 17075
loss: 0.9943507313728333,grad_norm: 0.9999990592557242, iteration: 17076
loss: 1.0476572513580322,grad_norm: 0.9999996191809223, iteration: 17077
loss: 0.9866132736206055,grad_norm: 0.9999992121290892, iteration: 17078
loss: 1.02250075340271,grad_norm: 0.9999995438148962, iteration: 17079
loss: 0.9872658252716064,grad_norm: 0.999999181056695, iteration: 17080
loss: 0.9356172680854797,grad_norm: 0.9999990487789292, iteration: 17081
loss: 0.9986352920532227,grad_norm: 0.8700798452614595, iteration: 17082
loss: 1.0372148752212524,grad_norm: 0.9999994903331916, iteration: 17083
loss: 0.9933472275733948,grad_norm: 0.9524019553824834, iteration: 17084
loss: 1.039954662322998,grad_norm: 0.9999993891403806, iteration: 17085
loss: 1.0413745641708374,grad_norm: 0.9999990480319466, iteration: 17086
loss: 1.0901445150375366,grad_norm: 0.9999992792680921, iteration: 17087
loss: 1.0503830909729004,grad_norm: 0.9999992330573602, iteration: 17088
loss: 0.9850271344184875,grad_norm: 0.9999991455986913, iteration: 17089
loss: 1.0251290798187256,grad_norm: 0.9999992542283573, iteration: 17090
loss: 0.9680658578872681,grad_norm: 0.9718744390192425, iteration: 17091
loss: 1.019689679145813,grad_norm: 0.9999992878370539, iteration: 17092
loss: 0.9586013555526733,grad_norm: 0.9999992565849604, iteration: 17093
loss: 1.0292317867279053,grad_norm: 0.9999990902956845, iteration: 17094
loss: 1.0670336484909058,grad_norm: 0.9999990328927978, iteration: 17095
loss: 1.0282690525054932,grad_norm: 0.9999991709406534, iteration: 17096
loss: 1.0341849327087402,grad_norm: 0.9999991138006239, iteration: 17097
loss: 1.010652780532837,grad_norm: 0.9999996830052543, iteration: 17098
loss: 1.0241447687149048,grad_norm: 0.9999991429586313, iteration: 17099
loss: 1.030971646308899,grad_norm: 0.9999995188239416, iteration: 17100
loss: 1.0162670612335205,grad_norm: 0.9999992686001078, iteration: 17101
loss: 1.043016791343689,grad_norm: 0.9379165633704434, iteration: 17102
loss: 1.0146607160568237,grad_norm: 0.951908452761981, iteration: 17103
loss: 1.0267300605773926,grad_norm: 0.9327018987212546, iteration: 17104
loss: 1.0300942659378052,grad_norm: 0.9999991966049427, iteration: 17105
loss: 1.0634887218475342,grad_norm: 0.9999991089061685, iteration: 17106
loss: 1.0278104543685913,grad_norm: 0.9999991396082722, iteration: 17107
loss: 1.0561245679855347,grad_norm: 0.9999992371845337, iteration: 17108
loss: 1.0486814975738525,grad_norm: 0.9999990569499818, iteration: 17109
loss: 1.033930778503418,grad_norm: 0.9999991601786823, iteration: 17110
loss: 0.9929869771003723,grad_norm: 0.99999918094096, iteration: 17111
loss: 1.0229506492614746,grad_norm: 0.9999990416651701, iteration: 17112
loss: 1.0337578058242798,grad_norm: 0.979085730872338, iteration: 17113
loss: 1.0355970859527588,grad_norm: 0.9999995526446612, iteration: 17114
loss: 1.0060851573944092,grad_norm: 0.9999991154642978, iteration: 17115
loss: 1.0089887380599976,grad_norm: 0.9999994194721884, iteration: 17116
loss: 1.0424734354019165,grad_norm: 0.9999990776712875, iteration: 17117
loss: 1.0433976650238037,grad_norm: 0.9999991314725765, iteration: 17118
loss: 1.009907603263855,grad_norm: 0.9999992494752526, iteration: 17119
loss: 1.0085411071777344,grad_norm: 0.9999992594607133, iteration: 17120
loss: 1.0124729871749878,grad_norm: 0.9999996347789033, iteration: 17121
loss: 1.0460032224655151,grad_norm: 0.9999992581617795, iteration: 17122
loss: 1.0176095962524414,grad_norm: 0.9999991486250519, iteration: 17123
loss: 1.003191590309143,grad_norm: 0.9999991661412011, iteration: 17124
loss: 1.0316497087478638,grad_norm: 0.9999996108441281, iteration: 17125
loss: 1.0655457973480225,grad_norm: 0.9999990472644398, iteration: 17126
loss: 0.9968019127845764,grad_norm: 0.9999992494213117, iteration: 17127
loss: 0.9930697083473206,grad_norm: 0.9999992222995139, iteration: 17128
loss: 1.0343255996704102,grad_norm: 0.9999994525331362, iteration: 17129
loss: 1.0101209878921509,grad_norm: 0.9999990634050532, iteration: 17130
loss: 1.0398964881896973,grad_norm: 0.9999996899206219, iteration: 17131
loss: 1.0505870580673218,grad_norm: 0.9999990221548372, iteration: 17132
loss: 0.9833231568336487,grad_norm: 0.9999992561750609, iteration: 17133
loss: 1.0516374111175537,grad_norm: 0.9557593537162646, iteration: 17134
loss: 1.044206142425537,grad_norm: 0.9999991631470693, iteration: 17135
loss: 1.0174217224121094,grad_norm: 0.99999927423202, iteration: 17136
loss: 1.011008620262146,grad_norm: 0.9999991935983599, iteration: 17137
loss: 1.028585433959961,grad_norm: 0.9999991703955827, iteration: 17138
loss: 1.039451003074646,grad_norm: 0.9999994642910945, iteration: 17139
loss: 1.036546230316162,grad_norm: 0.9234037429746831, iteration: 17140
loss: 1.0017598867416382,grad_norm: 0.9711373590534432, iteration: 17141
loss: 1.0261491537094116,grad_norm: 0.9999994259271568, iteration: 17142
loss: 1.0198745727539062,grad_norm: 0.996956801395329, iteration: 17143
loss: 1.0528626441955566,grad_norm: 0.9999994127052138, iteration: 17144
loss: 1.038430094718933,grad_norm: 0.9931422711191757, iteration: 17145
loss: 1.0453451871871948,grad_norm: 0.9999990372653977, iteration: 17146
loss: 0.9922898411750793,grad_norm: 0.9999991743605696, iteration: 17147
loss: 1.009758472442627,grad_norm: 0.9999992139819028, iteration: 17148
loss: 1.0357418060302734,grad_norm: 0.9692544488139996, iteration: 17149
loss: 1.0130287408828735,grad_norm: 0.8477920684843893, iteration: 17150
loss: 0.9829061031341553,grad_norm: 0.9454881099546428, iteration: 17151
loss: 0.9950895309448242,grad_norm: 0.9658966868018327, iteration: 17152
loss: 1.013467788696289,grad_norm: 0.9999992779833679, iteration: 17153
loss: 1.063447117805481,grad_norm: 0.9999992460967856, iteration: 17154
loss: 1.0071871280670166,grad_norm: 0.999999098303941, iteration: 17155
loss: 0.9870985746383667,grad_norm: 0.9469913869114919, iteration: 17156
loss: 0.9684745073318481,grad_norm: 0.9999992091478571, iteration: 17157
loss: 1.0336742401123047,grad_norm: 0.9999992705163137, iteration: 17158
loss: 1.039446473121643,grad_norm: 0.9999992974308844, iteration: 17159
loss: 1.031296730041504,grad_norm: 0.9999991837521024, iteration: 17160
loss: 1.0142484903335571,grad_norm: 0.9999990423749276, iteration: 17161
loss: 0.9794020652770996,grad_norm: 0.9999992220686486, iteration: 17162
loss: 1.0102062225341797,grad_norm: 0.9999992416134863, iteration: 17163
loss: 0.9866129755973816,grad_norm: 0.9483725644356947, iteration: 17164
loss: 1.0001555681228638,grad_norm: 0.9999990858538538, iteration: 17165
loss: 1.001613974571228,grad_norm: 0.9999990805729202, iteration: 17166
loss: 0.9781784415245056,grad_norm: 0.9554799259556859, iteration: 17167
loss: 1.009083867073059,grad_norm: 0.9999991871116707, iteration: 17168
loss: 1.0952965021133423,grad_norm: 0.999999780812653, iteration: 17169
loss: 1.0389219522476196,grad_norm: 0.9999992390681761, iteration: 17170
loss: 1.0041276216506958,grad_norm: 0.9999991845553904, iteration: 17171
loss: 1.015832781791687,grad_norm: 0.8917129977544149, iteration: 17172
loss: 1.0296660661697388,grad_norm: 0.970359318539972, iteration: 17173
loss: 1.04257333278656,grad_norm: 0.999999179904373, iteration: 17174
loss: 1.057734727859497,grad_norm: 0.9999992415742766, iteration: 17175
loss: 1.0177594423294067,grad_norm: 0.9999991724057525, iteration: 17176
loss: 1.0075156688690186,grad_norm: 0.9999991352200103, iteration: 17177
loss: 1.083080768585205,grad_norm: 0.9999992529697215, iteration: 17178
loss: 0.9717221856117249,grad_norm: 0.9924731933507394, iteration: 17179
loss: 0.9960536956787109,grad_norm: 0.99999921891016, iteration: 17180
loss: 1.027428388595581,grad_norm: 0.99999907617013, iteration: 17181
loss: 1.0186173915863037,grad_norm: 0.9666403926372091, iteration: 17182
loss: 1.0064514875411987,grad_norm: 0.9999993460541625, iteration: 17183
loss: 1.0528217554092407,grad_norm: 0.9999991532317537, iteration: 17184
loss: 1.0202292203903198,grad_norm: 0.9999992844350163, iteration: 17185
loss: 1.0140717029571533,grad_norm: 0.9999991796983526, iteration: 17186
loss: 1.0560766458511353,grad_norm: 0.9999991556863804, iteration: 17187
loss: 0.9820672273635864,grad_norm: 0.8923907636117333, iteration: 17188
loss: 0.9973204135894775,grad_norm: 0.9659251211999897, iteration: 17189
loss: 0.987334668636322,grad_norm: 0.999999124413204, iteration: 17190
loss: 1.0107346773147583,grad_norm: 0.9999993210982808, iteration: 17191
loss: 1.01992666721344,grad_norm: 0.9999992615765241, iteration: 17192
loss: 1.037090539932251,grad_norm: 0.9999993043479566, iteration: 17193
loss: 0.9963959455490112,grad_norm: 0.9328977300419161, iteration: 17194
loss: 1.0332986116409302,grad_norm: 0.9999992006921529, iteration: 17195
loss: 1.062135934829712,grad_norm: 0.9999991041562819, iteration: 17196
loss: 1.007310152053833,grad_norm: 0.8820320823898069, iteration: 17197
loss: 1.0215847492218018,grad_norm: 0.9999992222313174, iteration: 17198
loss: 1.025283694267273,grad_norm: 0.9999991345460139, iteration: 17199
loss: 1.0219136476516724,grad_norm: 0.9999991659953865, iteration: 17200
loss: 1.0315306186676025,grad_norm: 0.9999992498760463, iteration: 17201
loss: 1.035915493965149,grad_norm: 0.9999993274037133, iteration: 17202
loss: 1.0375092029571533,grad_norm: 0.9999991547430068, iteration: 17203
loss: 1.0422496795654297,grad_norm: 0.9999990875916765, iteration: 17204
loss: 1.0462925434112549,grad_norm: 0.9999991089972224, iteration: 17205
loss: 1.0008171796798706,grad_norm: 0.9999991244766077, iteration: 17206
loss: 0.990844190120697,grad_norm: 0.9999990979727033, iteration: 17207
loss: 1.0505393743515015,grad_norm: 0.999999527925027, iteration: 17208
loss: 1.0191971063613892,grad_norm: 0.9999992001804692, iteration: 17209
loss: 1.0402264595031738,grad_norm: 0.9999993865944896, iteration: 17210
loss: 1.02565598487854,grad_norm: 0.989342846897483, iteration: 17211
loss: 0.9962515830993652,grad_norm: 0.9999990462655793, iteration: 17212
loss: 1.0420514345169067,grad_norm: 0.9999991462636912, iteration: 17213
loss: 1.0245890617370605,grad_norm: 0.9942008863419026, iteration: 17214
loss: 1.0495535135269165,grad_norm: 0.9999992165698014, iteration: 17215
loss: 1.0302395820617676,grad_norm: 0.996666007737409, iteration: 17216
loss: 1.0165705680847168,grad_norm: 0.9999991772492026, iteration: 17217
loss: 1.0303746461868286,grad_norm: 0.9999991849935583, iteration: 17218
loss: 0.9852797985076904,grad_norm: 0.9999991108539935, iteration: 17219
loss: 1.023140549659729,grad_norm: 0.9999994316555608, iteration: 17220
loss: 1.031108021736145,grad_norm: 0.999999148015407, iteration: 17221
loss: 1.0066982507705688,grad_norm: 0.9637111651745369, iteration: 17222
loss: 1.0005545616149902,grad_norm: 0.9999997620872921, iteration: 17223
loss: 0.9968765377998352,grad_norm: 0.9999991992945871, iteration: 17224
loss: 0.9907702207565308,grad_norm: 0.9999991586583133, iteration: 17225
loss: 1.0244059562683105,grad_norm: 0.9999989968351549, iteration: 17226
loss: 0.9772399067878723,grad_norm: 0.9999990532721134, iteration: 17227
loss: 0.9332702159881592,grad_norm: 0.9999991503971387, iteration: 17228
loss: 0.9975355267524719,grad_norm: 0.9999991917693702, iteration: 17229
loss: 1.016756534576416,grad_norm: 0.9999993620363321, iteration: 17230
loss: 0.9841468930244446,grad_norm: 0.999999108086781, iteration: 17231
loss: 1.0077859163284302,grad_norm: 0.9999990692472461, iteration: 17232
loss: 1.0391970872879028,grad_norm: 0.96943977633012, iteration: 17233
loss: 1.0285707712173462,grad_norm: 0.9999992516931121, iteration: 17234
loss: 1.0277798175811768,grad_norm: 0.9999992683330352, iteration: 17235
loss: 0.9827205538749695,grad_norm: 0.9999991886850641, iteration: 17236
loss: 1.026463270187378,grad_norm: 0.9999995932175498, iteration: 17237
loss: 1.0018962621688843,grad_norm: 0.9999993336856887, iteration: 17238
loss: 1.0356271266937256,grad_norm: 0.9999993167808522, iteration: 17239
loss: 1.014763593673706,grad_norm: 0.9999997326167435, iteration: 17240
loss: 1.0286765098571777,grad_norm: 0.9999991095384491, iteration: 17241
loss: 1.036137580871582,grad_norm: 0.9999991781429879, iteration: 17242
loss: 0.9332372546195984,grad_norm: 0.9779470548287456, iteration: 17243
loss: 1.0994254350662231,grad_norm: 0.9999992609599404, iteration: 17244
loss: 1.0432233810424805,grad_norm: 0.9999992871121043, iteration: 17245
loss: 1.0212897062301636,grad_norm: 0.9999990046746356, iteration: 17246
loss: 1.0198249816894531,grad_norm: 0.9999994495625604, iteration: 17247
loss: 0.9630764722824097,grad_norm: 0.9999992100667706, iteration: 17248
loss: 1.0202816724777222,grad_norm: 0.9999993171918387, iteration: 17249
loss: 1.0337148904800415,grad_norm: 0.9999994025740983, iteration: 17250
loss: 1.01603102684021,grad_norm: 0.9999992040676474, iteration: 17251
loss: 0.9916700720787048,grad_norm: 0.9999992704006032, iteration: 17252
loss: 1.030619502067566,grad_norm: 0.9999993539590538, iteration: 17253
loss: 1.0251370668411255,grad_norm: 0.9999990613530073, iteration: 17254
loss: 0.9677286148071289,grad_norm: 0.9999990435084498, iteration: 17255
loss: 1.0171769857406616,grad_norm: 0.9999996919727213, iteration: 17256
loss: 1.0063480138778687,grad_norm: 0.9999991132031164, iteration: 17257
loss: 0.9917077422142029,grad_norm: 0.9999992439191553, iteration: 17258
loss: 1.0287419557571411,grad_norm: 0.9999991650464708, iteration: 17259
loss: 0.9792227149009705,grad_norm: 0.9999991964987491, iteration: 17260
loss: 1.0352563858032227,grad_norm: 0.9999989836413565, iteration: 17261
loss: 0.9922388195991516,grad_norm: 0.9999990855361381, iteration: 17262
loss: 1.0483592748641968,grad_norm: 0.9999992630750192, iteration: 17263
loss: 1.044281005859375,grad_norm: 0.9999991261040847, iteration: 17264
loss: 0.9846662878990173,grad_norm: 0.9999991954081002, iteration: 17265
loss: 0.9844557046890259,grad_norm: 0.9716735604708662, iteration: 17266
loss: 1.000218152999878,grad_norm: 0.9999992171958523, iteration: 17267
loss: 1.0261292457580566,grad_norm: 0.9670346998147883, iteration: 17268
loss: 1.0327386856079102,grad_norm: 0.9999989992231714, iteration: 17269
loss: 0.995341420173645,grad_norm: 0.9999996173623725, iteration: 17270
loss: 1.0313256978988647,grad_norm: 0.9999991280282605, iteration: 17271
loss: 1.046220302581787,grad_norm: 0.9999991567298707, iteration: 17272
loss: 1.0208568572998047,grad_norm: 0.9999991538141623, iteration: 17273
loss: 1.0281058549880981,grad_norm: 0.9999993039386765, iteration: 17274
loss: 0.9922346472740173,grad_norm: 0.9549494385469417, iteration: 17275
loss: 1.0122156143188477,grad_norm: 0.9999992965296249, iteration: 17276
loss: 1.0324491262435913,grad_norm: 0.9999990069664915, iteration: 17277
loss: 1.052406668663025,grad_norm: 0.9999991438790761, iteration: 17278
loss: 1.0002859830856323,grad_norm: 0.999999153211708, iteration: 17279
loss: 1.0071680545806885,grad_norm: 0.9999994125966909, iteration: 17280
loss: 1.024088740348816,grad_norm: 0.9605833572726649, iteration: 17281
loss: 1.0410741567611694,grad_norm: 0.9662024551611924, iteration: 17282
loss: 1.0268146991729736,grad_norm: 0.9999991394149045, iteration: 17283
loss: 1.020705223083496,grad_norm: 0.9999990655109969, iteration: 17284
loss: 0.997344434261322,grad_norm: 0.9999990588320758, iteration: 17285
loss: 1.0199052095413208,grad_norm: 0.9999992140603656, iteration: 17286
loss: 1.0516715049743652,grad_norm: 0.9999991506598456, iteration: 17287
loss: 0.9786580801010132,grad_norm: 0.9999991573466996, iteration: 17288
loss: 1.001283884048462,grad_norm: 0.9999991425198208, iteration: 17289
loss: 1.0136257410049438,grad_norm: 0.999999517910206, iteration: 17290
loss: 1.0357502698898315,grad_norm: 0.9999992886588573, iteration: 17291
loss: 0.9970752596855164,grad_norm: 0.9865251507753654, iteration: 17292
loss: 1.0125513076782227,grad_norm: 0.9999991813828101, iteration: 17293
loss: 0.9735569953918457,grad_norm: 0.9999992686589222, iteration: 17294
loss: 0.9844513535499573,grad_norm: 0.9999991367540809, iteration: 17295
loss: 1.0149799585342407,grad_norm: 0.9999993883417434, iteration: 17296
loss: 1.02775239944458,grad_norm: 0.9999991172256969, iteration: 17297
loss: 1.0082879066467285,grad_norm: 0.8562016934300267, iteration: 17298
loss: 1.0318909883499146,grad_norm: 0.999999052652504, iteration: 17299
loss: 1.0206444263458252,grad_norm: 0.9999992081156621, iteration: 17300
loss: 0.9993467330932617,grad_norm: 0.9999990387895974, iteration: 17301
loss: 1.0179128646850586,grad_norm: 0.9999991505624224, iteration: 17302
loss: 1.0615314245224,grad_norm: 0.999999825165448, iteration: 17303
loss: 1.0396817922592163,grad_norm: 0.9999991479002314, iteration: 17304
loss: 1.0153300762176514,grad_norm: 0.9999991916429315, iteration: 17305
loss: 1.00946044921875,grad_norm: 0.9999990905013568, iteration: 17306
loss: 0.9977917075157166,grad_norm: 0.9999994520863255, iteration: 17307
loss: 0.9860028028488159,grad_norm: 0.9999991839063561, iteration: 17308
loss: 1.0496363639831543,grad_norm: 0.9999993149954106, iteration: 17309
loss: 1.0128300189971924,grad_norm: 0.8695568264989906, iteration: 17310
loss: 1.0162910223007202,grad_norm: 0.9999992132761247, iteration: 17311
loss: 1.0117324590682983,grad_norm: 0.9843559197351682, iteration: 17312
loss: 1.0653129816055298,grad_norm: 0.9999991955685985, iteration: 17313
loss: 0.9878795146942139,grad_norm: 0.9999990919799557, iteration: 17314
loss: 1.0411593914031982,grad_norm: 0.9999990325909626, iteration: 17315
loss: 1.034653663635254,grad_norm: 0.999999268161422, iteration: 17316
loss: 0.9945304989814758,grad_norm: 0.9999990704166237, iteration: 17317
loss: 1.0557435750961304,grad_norm: 0.9999994081155662, iteration: 17318
loss: 1.006966471672058,grad_norm: 0.9999991963784098, iteration: 17319
loss: 1.0120378732681274,grad_norm: 0.9999993012714117, iteration: 17320
loss: 1.020385980606079,grad_norm: 0.9999990342439666, iteration: 17321
loss: 1.0142700672149658,grad_norm: 0.9999992372401963, iteration: 17322
loss: 1.005912184715271,grad_norm: 0.9999992424647617, iteration: 17323
loss: 0.9734959006309509,grad_norm: 0.9919967254456075, iteration: 17324
loss: 0.9979354739189148,grad_norm: 0.9999993686323375, iteration: 17325
loss: 1.0042171478271484,grad_norm: 0.9999990945138237, iteration: 17326
loss: 1.0482549667358398,grad_norm: 0.963898045672519, iteration: 17327
loss: 1.0603594779968262,grad_norm: 0.9999991817712878, iteration: 17328
loss: 1.0155341625213623,grad_norm: 0.9999993997896119, iteration: 17329
loss: 0.9696464538574219,grad_norm: 0.9999990565060964, iteration: 17330
loss: 1.0261939764022827,grad_norm: 0.9999992441375611, iteration: 17331
loss: 1.062042236328125,grad_norm: 0.9999991195853764, iteration: 17332
loss: 1.035468578338623,grad_norm: 0.9999991761801138, iteration: 17333
loss: 1.0081257820129395,grad_norm: 0.9999991445843539, iteration: 17334
loss: 1.0385009050369263,grad_norm: 0.9999993130298783, iteration: 17335
loss: 1.0221422910690308,grad_norm: 0.919795784720584, iteration: 17336
loss: 1.0604650974273682,grad_norm: 0.9999993159525672, iteration: 17337
loss: 1.00692880153656,grad_norm: 0.9999991845175787, iteration: 17338
loss: 0.9895116686820984,grad_norm: 0.9999990697263046, iteration: 17339
loss: 1.0295841693878174,grad_norm: 0.9999990820320042, iteration: 17340
loss: 0.991913914680481,grad_norm: 0.9999989743576431, iteration: 17341
loss: 1.0292478799819946,grad_norm: 0.9999991026223489, iteration: 17342
loss: 0.9968849420547485,grad_norm: 0.9999991806952596, iteration: 17343
loss: 1.029099464416504,grad_norm: 0.9999997684880558, iteration: 17344
loss: 1.0434482097625732,grad_norm: 0.8968931739448254, iteration: 17345
loss: 1.021921157836914,grad_norm: 0.9340588295612546, iteration: 17346
loss: 1.0407286882400513,grad_norm: 0.9999992161928873, iteration: 17347
loss: 1.0141913890838623,grad_norm: 0.9286552406172736, iteration: 17348
loss: 1.035205602645874,grad_norm: 0.9999991311898256, iteration: 17349
loss: 0.9947035908699036,grad_norm: 0.9999991870037712, iteration: 17350
loss: 0.991232693195343,grad_norm: 0.9999991957403296, iteration: 17351
loss: 0.9948708415031433,grad_norm: 0.9999991366450479, iteration: 17352
loss: 0.9902580976486206,grad_norm: 0.9999992869129813, iteration: 17353
loss: 1.023449420928955,grad_norm: 0.9999992257022583, iteration: 17354
loss: 1.0125385522842407,grad_norm: 0.9492984537567376, iteration: 17355
loss: 1.0361274480819702,grad_norm: 0.9999991818256915, iteration: 17356
loss: 1.0701897144317627,grad_norm: 0.9999996511537012, iteration: 17357
loss: 1.0225094556808472,grad_norm: 0.9999991224918315, iteration: 17358
loss: 1.028560996055603,grad_norm: 0.9999991139494496, iteration: 17359
loss: 0.9814539551734924,grad_norm: 0.9999991602227776, iteration: 17360
loss: 1.0336182117462158,grad_norm: 0.9999992546892186, iteration: 17361
loss: 0.9781457185745239,grad_norm: 0.9999991489384966, iteration: 17362
loss: 0.9742935299873352,grad_norm: 0.9999993034788416, iteration: 17363
loss: 1.0343384742736816,grad_norm: 0.9999991484055081, iteration: 17364
loss: 1.0465750694274902,grad_norm: 0.9999994012616754, iteration: 17365
loss: 1.0261366367340088,grad_norm: 0.9865556244312564, iteration: 17366
loss: 1.0136001110076904,grad_norm: 0.9069598340816244, iteration: 17367
loss: 1.0337305068969727,grad_norm: 0.9999992326132429, iteration: 17368
loss: 1.019921064376831,grad_norm: 0.9999991335529682, iteration: 17369
loss: 1.041695237159729,grad_norm: 0.9999991024623509, iteration: 17370
loss: 1.0052889585494995,grad_norm: 0.9999994521225185, iteration: 17371
loss: 1.0350234508514404,grad_norm: 0.9999996430360607, iteration: 17372
loss: 1.0156174898147583,grad_norm: 0.9999990973011239, iteration: 17373
loss: 0.9695748090744019,grad_norm: 0.9999991538754393, iteration: 17374
loss: 1.09336256980896,grad_norm: 0.9999998443480514, iteration: 17375
loss: 1.0421857833862305,grad_norm: 0.9999990279944219, iteration: 17376
loss: 0.9976693391799927,grad_norm: 0.999999204739506, iteration: 17377
loss: 1.0160448551177979,grad_norm: 0.9999991782598224, iteration: 17378
loss: 0.9866350889205933,grad_norm: 0.9794973299333223, iteration: 17379
loss: 1.0174884796142578,grad_norm: 0.9999992744822631, iteration: 17380
loss: 0.9653142094612122,grad_norm: 0.9999990924632665, iteration: 17381
loss: 0.9981116056442261,grad_norm: 0.960911924735638, iteration: 17382
loss: 1.0028213262557983,grad_norm: 0.999999477355843, iteration: 17383
loss: 1.0355244874954224,grad_norm: 0.999999170149707, iteration: 17384
loss: 1.0243816375732422,grad_norm: 0.9742583692907669, iteration: 17385
loss: 1.023214340209961,grad_norm: 0.9999991080332021, iteration: 17386
loss: 1.0233538150787354,grad_norm: 0.9999993514148373, iteration: 17387
loss: 0.9917426705360413,grad_norm: 0.9999991124076124, iteration: 17388
loss: 0.9988794922828674,grad_norm: 0.99999915322106, iteration: 17389
loss: 0.9918689131736755,grad_norm: 0.9999991770242027, iteration: 17390
loss: 1.0102020502090454,grad_norm: 0.9999990660589454, iteration: 17391
loss: 1.004569411277771,grad_norm: 0.9279230667619138, iteration: 17392
loss: 1.0234222412109375,grad_norm: 0.9999991833689624, iteration: 17393
loss: 0.9874995946884155,grad_norm: 0.9999992166231693, iteration: 17394
loss: 1.0004698038101196,grad_norm: 0.9999990333764645, iteration: 17395
loss: 0.9842798709869385,grad_norm: 0.9999990708311628, iteration: 17396
loss: 1.0162696838378906,grad_norm: 0.9999991243426738, iteration: 17397
loss: 1.0467638969421387,grad_norm: 0.9999997021731546, iteration: 17398
loss: 1.0343164205551147,grad_norm: 0.999999247754475, iteration: 17399
loss: 1.0027821063995361,grad_norm: 0.9999991269578012, iteration: 17400
loss: 1.0405535697937012,grad_norm: 0.999999436825548, iteration: 17401
loss: 1.0146976709365845,grad_norm: 0.9619727227336993, iteration: 17402
loss: 1.0614839792251587,grad_norm: 0.9999996406039848, iteration: 17403
loss: 1.0329227447509766,grad_norm: 0.9999990245435657, iteration: 17404
loss: 0.9858151078224182,grad_norm: 0.9635212113184841, iteration: 17405
loss: 1.0357022285461426,grad_norm: 0.9999993384923286, iteration: 17406
loss: 1.0048532485961914,grad_norm: 0.9999991264332294, iteration: 17407
loss: 1.0352143049240112,grad_norm: 0.9999991580541775, iteration: 17408
loss: 1.0692018270492554,grad_norm: 0.9999991763614522, iteration: 17409
loss: 1.0336394309997559,grad_norm: 0.9999990652377747, iteration: 17410
loss: 1.0201023817062378,grad_norm: 0.9999993208656339, iteration: 17411
loss: 1.0120418071746826,grad_norm: 0.9749847490638722, iteration: 17412
loss: 1.034443974494934,grad_norm: 0.9999992024373562, iteration: 17413
loss: 0.9932901263237,grad_norm: 0.9999991928813732, iteration: 17414
loss: 1.0698000192642212,grad_norm: 0.9999992140193799, iteration: 17415
loss: 0.955048680305481,grad_norm: 0.9999990999196142, iteration: 17416
loss: 1.0462327003479004,grad_norm: 0.9999996136642415, iteration: 17417
loss: 1.0042403936386108,grad_norm: 0.9999993180187916, iteration: 17418
loss: 1.0354481935501099,grad_norm: 0.9999991242786432, iteration: 17419
loss: 1.0376538038253784,grad_norm: 0.8483714038680101, iteration: 17420
loss: 0.9964309930801392,grad_norm: 0.9999991056152542, iteration: 17421
loss: 1.0020978450775146,grad_norm: 0.9489867519270363, iteration: 17422
loss: 1.0205849409103394,grad_norm: 0.9999992537327116, iteration: 17423
loss: 1.0243523120880127,grad_norm: 0.9999990674765616, iteration: 17424
loss: 1.0148111581802368,grad_norm: 0.8949908052283748, iteration: 17425
loss: 1.0286439657211304,grad_norm: 0.9999991655025872, iteration: 17426
loss: 0.9893152713775635,grad_norm: 0.984671534927719, iteration: 17427
loss: 1.0674664974212646,grad_norm: 0.9999990850553264, iteration: 17428
loss: 1.0267882347106934,grad_norm: 0.999999075175239, iteration: 17429
loss: 0.9813580513000488,grad_norm: 0.9999993083257718, iteration: 17430
loss: 1.050506591796875,grad_norm: 0.9999989753452326, iteration: 17431
loss: 1.0038421154022217,grad_norm: 0.9935119762712377, iteration: 17432
loss: 1.011558175086975,grad_norm: 0.9999990104451131, iteration: 17433
loss: 1.0395987033843994,grad_norm: 0.9999990346690586, iteration: 17434
loss: 1.0480073690414429,grad_norm: 0.9999992662946139, iteration: 17435
loss: 1.0060399770736694,grad_norm: 0.8606413767146092, iteration: 17436
loss: 1.0180120468139648,grad_norm: 0.9999992573974347, iteration: 17437
loss: 1.0286294221878052,grad_norm: 0.9999993169695911, iteration: 17438
loss: 1.0012344121932983,grad_norm: 0.9999991897665443, iteration: 17439
loss: 1.0421357154846191,grad_norm: 0.9999998430750882, iteration: 17440
loss: 0.9909068942070007,grad_norm: 0.9999992056252895, iteration: 17441
loss: 1.0178555250167847,grad_norm: 0.9999991598929615, iteration: 17442
loss: 0.9964000582695007,grad_norm: 0.9999992340723654, iteration: 17443
loss: 1.0535426139831543,grad_norm: 0.9999998619433875, iteration: 17444
loss: 1.0035604238510132,grad_norm: 0.9755969766602307, iteration: 17445
loss: 1.0602761507034302,grad_norm: 0.9999995216035948, iteration: 17446
loss: 1.0127174854278564,grad_norm: 0.9999992828935999, iteration: 17447
loss: 1.006269097328186,grad_norm: 0.9999989577452318, iteration: 17448
loss: 1.0151796340942383,grad_norm: 0.9999993981796738, iteration: 17449
loss: 1.0097051858901978,grad_norm: 0.9999990786314724, iteration: 17450
loss: 1.0300971269607544,grad_norm: 0.9735284019163595, iteration: 17451
loss: 0.9885990023612976,grad_norm: 0.9999991487663745, iteration: 17452
loss: 1.024221658706665,grad_norm: 0.999999361934063, iteration: 17453
loss: 1.0136750936508179,grad_norm: 0.9999998062960774, iteration: 17454
loss: 1.0196760892868042,grad_norm: 0.9999991208647071, iteration: 17455
loss: 1.0575478076934814,grad_norm: 0.9999996612815313, iteration: 17456
loss: 1.0339072942733765,grad_norm: 0.9999991060940604, iteration: 17457
loss: 1.026747226715088,grad_norm: 0.9999991458789544, iteration: 17458
loss: 1.0255210399627686,grad_norm: 0.9999993137844324, iteration: 17459
loss: 0.9962348341941833,grad_norm: 0.9999993202508662, iteration: 17460
loss: 1.0005590915679932,grad_norm: 0.9999992114801132, iteration: 17461
loss: 1.0022224187850952,grad_norm: 0.9999991462071308, iteration: 17462
loss: 1.0606062412261963,grad_norm: 0.9999996358191623, iteration: 17463
loss: 1.0343090295791626,grad_norm: 0.9999990839888414, iteration: 17464
loss: 0.9896615147590637,grad_norm: 0.9999990486474953, iteration: 17465
loss: 1.0334264039993286,grad_norm: 0.9999989454460118, iteration: 17466
loss: 1.0742183923721313,grad_norm: 0.9999998387673017, iteration: 17467
loss: 1.0589061975479126,grad_norm: 0.9225677851191536, iteration: 17468
loss: 1.0287574529647827,grad_norm: 0.9999990393823206, iteration: 17469
loss: 1.0592204332351685,grad_norm: 0.9999997324097618, iteration: 17470
loss: 1.0521023273468018,grad_norm: 0.9999993258139112, iteration: 17471
loss: 1.0524097681045532,grad_norm: 0.9999997621197749, iteration: 17472
loss: 1.0062304735183716,grad_norm: 0.9999997072780316, iteration: 17473
loss: 0.9914185404777527,grad_norm: 0.9999990888375944, iteration: 17474
loss: 1.0259521007537842,grad_norm: 0.9999992145798666, iteration: 17475
loss: 0.9967144131660461,grad_norm: 0.8634975816037087, iteration: 17476
loss: 0.9979916214942932,grad_norm: 0.8750881190968259, iteration: 17477
loss: 0.9969052076339722,grad_norm: 0.9999991559974417, iteration: 17478
loss: 0.9868027567863464,grad_norm: 0.9802086898145136, iteration: 17479
loss: 1.0318584442138672,grad_norm: 0.9845795508896874, iteration: 17480
loss: 1.0095248222351074,grad_norm: 0.9999990801155206, iteration: 17481
loss: 1.0312402248382568,grad_norm: 0.9645194006006912, iteration: 17482
loss: 0.9995734691619873,grad_norm: 0.9999993609075285, iteration: 17483
loss: 0.9989357590675354,grad_norm: 0.9999991981927623, iteration: 17484
loss: 1.0462191104888916,grad_norm: 0.9999992584949405, iteration: 17485
loss: 0.975801944732666,grad_norm: 0.9999991152224974, iteration: 17486
loss: 1.0242453813552856,grad_norm: 0.9999990871119295, iteration: 17487
loss: 1.0429452657699585,grad_norm: 0.9999993047279877, iteration: 17488
loss: 1.002992868423462,grad_norm: 0.9845375472856889, iteration: 17489
loss: 1.0393439531326294,grad_norm: 0.9999991673889488, iteration: 17490
loss: 1.0044116973876953,grad_norm: 0.9831372840125413, iteration: 17491
loss: 1.0066877603530884,grad_norm: 0.9999991909868121, iteration: 17492
loss: 0.9958900213241577,grad_norm: 0.9999993297174077, iteration: 17493
loss: 1.0129821300506592,grad_norm: 0.9999991465848774, iteration: 17494
loss: 0.9808624386787415,grad_norm: 0.9999990299505025, iteration: 17495
loss: 1.049511432647705,grad_norm: 0.9999991501687048, iteration: 17496
loss: 0.9845340847969055,grad_norm: 0.9999992892268814, iteration: 17497
loss: 1.0305415391921997,grad_norm: 0.9999999079814959, iteration: 17498
loss: 1.0208719968795776,grad_norm: 0.999999806574594, iteration: 17499
loss: 1.0289613008499146,grad_norm: 0.9999992887560655, iteration: 17500
loss: 1.0533909797668457,grad_norm: 0.9653041007304185, iteration: 17501
loss: 1.0286877155303955,grad_norm: 0.9999992040784095, iteration: 17502
loss: 0.9986462593078613,grad_norm: 0.999999242870104, iteration: 17503
loss: 1.0291810035705566,grad_norm: 0.9999991357095027, iteration: 17504
loss: 0.9809625744819641,grad_norm: 0.9999992370495138, iteration: 17505
loss: 0.9981642365455627,grad_norm: 0.999999095770718, iteration: 17506
loss: 1.0835295915603638,grad_norm: 0.9999996870962238, iteration: 17507
loss: 1.0303990840911865,grad_norm: 0.9999997131443689, iteration: 17508
loss: 1.0176750421524048,grad_norm: 0.9549273690320152, iteration: 17509
loss: 1.09844970703125,grad_norm: 0.9999998540094441, iteration: 17510
loss: 1.0161237716674805,grad_norm: 0.9999995724396822, iteration: 17511
loss: 1.043386459350586,grad_norm: 0.9545895060050029, iteration: 17512
loss: 1.0929343700408936,grad_norm: 0.9999996879271676, iteration: 17513
loss: 0.9775305986404419,grad_norm: 0.9139964505588384, iteration: 17514
loss: 1.0082048177719116,grad_norm: 0.9999990763053096, iteration: 17515
loss: 1.039979338645935,grad_norm: 0.9999990512840387, iteration: 17516
loss: 1.014425277709961,grad_norm: 0.9372619801436867, iteration: 17517
loss: 1.0109280347824097,grad_norm: 0.9999993958859424, iteration: 17518
loss: 0.9816722273826599,grad_norm: 0.9999992898462127, iteration: 17519
loss: 1.0135772228240967,grad_norm: 0.999999115745473, iteration: 17520
loss: 1.018129587173462,grad_norm: 0.9999991927880881, iteration: 17521
loss: 1.0356522798538208,grad_norm: 0.9999991420684128, iteration: 17522
loss: 1.0090749263763428,grad_norm: 0.9999996224663802, iteration: 17523
loss: 1.0402216911315918,grad_norm: 0.9999992881757352, iteration: 17524
loss: 1.0235391855239868,grad_norm: 0.9999994325010825, iteration: 17525
loss: 1.0237150192260742,grad_norm: 0.9999997967532488, iteration: 17526
loss: 1.032097578048706,grad_norm: 0.9999992069738441, iteration: 17527
loss: 0.9861494898796082,grad_norm: 0.9999991441305374, iteration: 17528
loss: 1.08090078830719,grad_norm: 0.9999995468765114, iteration: 17529
loss: 1.0059828758239746,grad_norm: 0.975232461722295, iteration: 17530
loss: 1.0033317804336548,grad_norm: 0.9999990927428258, iteration: 17531
loss: 0.9702807068824768,grad_norm: 0.999999314799059, iteration: 17532
loss: 1.013515830039978,grad_norm: 0.9999993223665682, iteration: 17533
loss: 1.0109790563583374,grad_norm: 0.9999990622829896, iteration: 17534
loss: 1.0229289531707764,grad_norm: 0.9999995283362788, iteration: 17535
loss: 1.0193921327590942,grad_norm: 0.9999991393986267, iteration: 17536
loss: 0.9988749623298645,grad_norm: 0.9999991153578004, iteration: 17537
loss: 1.0053679943084717,grad_norm: 0.9999993517827922, iteration: 17538
loss: 0.9937379956245422,grad_norm: 0.886738612656484, iteration: 17539
loss: 1.0123575925827026,grad_norm: 0.9999992790786048, iteration: 17540
loss: 1.0110554695129395,grad_norm: 0.9999991831414112, iteration: 17541
loss: 0.9960866570472717,grad_norm: 0.9748416591797849, iteration: 17542
loss: 0.9651637077331543,grad_norm: 0.999999135291531, iteration: 17543
loss: 1.0531368255615234,grad_norm: 0.999999152404518, iteration: 17544
loss: 0.991686999797821,grad_norm: 0.9920548120633114, iteration: 17545
loss: 0.9929589629173279,grad_norm: 0.9975278981188616, iteration: 17546
loss: 1.001419186592102,grad_norm: 0.9999990435874327, iteration: 17547
loss: 0.9845266938209534,grad_norm: 0.9241394108432615, iteration: 17548
loss: 0.9879931211471558,grad_norm: 0.9999991703722694, iteration: 17549
loss: 1.1075999736785889,grad_norm: 0.9999994856811844, iteration: 17550
loss: 1.0062572956085205,grad_norm: 0.9999992735422283, iteration: 17551
loss: 1.0420650243759155,grad_norm: 0.9999991310352897, iteration: 17552
loss: 1.0349570512771606,grad_norm: 0.9999998806491418, iteration: 17553
loss: 1.06757652759552,grad_norm: 0.993927784218831, iteration: 17554
loss: 1.055371642112732,grad_norm: 0.9576061952093363, iteration: 17555
loss: 1.0054768323898315,grad_norm: 0.9999991401792165, iteration: 17556
loss: 1.0063756704330444,grad_norm: 0.9999991097685638, iteration: 17557
loss: 1.019547700881958,grad_norm: 0.9999993814074699, iteration: 17558
loss: 1.0250804424285889,grad_norm: 0.9999989331054912, iteration: 17559
loss: 1.0295755863189697,grad_norm: 0.9999991643197534, iteration: 17560
loss: 1.0343798398971558,grad_norm: 0.9999990148284286, iteration: 17561
loss: 1.0465307235717773,grad_norm: 0.9999991634682195, iteration: 17562
loss: 1.0042529106140137,grad_norm: 0.9999991304906657, iteration: 17563
loss: 0.9907244443893433,grad_norm: 0.999999045619727, iteration: 17564
loss: 1.0356254577636719,grad_norm: 0.9999994263343245, iteration: 17565
loss: 1.0173290967941284,grad_norm: 0.9999990905431614, iteration: 17566
loss: 1.001434564590454,grad_norm: 0.9999992158733011, iteration: 17567
loss: 0.9930996894836426,grad_norm: 0.9999990929879904, iteration: 17568
loss: 1.0207027196884155,grad_norm: 0.9999991478569629, iteration: 17569
loss: 1.0247693061828613,grad_norm: 0.9999992226167851, iteration: 17570
loss: 1.0027838945388794,grad_norm: 0.9999990676520228, iteration: 17571
loss: 1.0090506076812744,grad_norm: 0.9781374662216687, iteration: 17572
loss: 1.0027827024459839,grad_norm: 0.8904281133391779, iteration: 17573
loss: 0.9947606921195984,grad_norm: 0.999999182006534, iteration: 17574
loss: 1.00528085231781,grad_norm: 0.9999990520321339, iteration: 17575
loss: 0.9691954255104065,grad_norm: 0.9999992375113196, iteration: 17576
loss: 0.9961706399917603,grad_norm: 0.9999992417177741, iteration: 17577
loss: 1.0217703580856323,grad_norm: 0.9999991030913388, iteration: 17578
loss: 1.0492242574691772,grad_norm: 0.9796011216532835, iteration: 17579
loss: 0.9807305932044983,grad_norm: 0.9999990968264724, iteration: 17580
loss: 1.0549721717834473,grad_norm: 0.9999991672750815, iteration: 17581
loss: 1.010244369506836,grad_norm: 0.9999990309081785, iteration: 17582
loss: 1.0088437795639038,grad_norm: 0.9999990448605918, iteration: 17583
loss: 0.9935551881790161,grad_norm: 0.9999991145666678, iteration: 17584
loss: 1.0304558277130127,grad_norm: 0.9999993208909184, iteration: 17585
loss: 1.0106666088104248,grad_norm: 0.9999991121653828, iteration: 17586
loss: 1.0552005767822266,grad_norm: 0.9999993354887448, iteration: 17587
loss: 1.0286866426467896,grad_norm: 0.9999991004629558, iteration: 17588
loss: 0.9942130446434021,grad_norm: 0.990289055771365, iteration: 17589
loss: 0.997930645942688,grad_norm: 0.8735898172469058, iteration: 17590
loss: 1.0471934080123901,grad_norm: 0.9999992936496547, iteration: 17591
loss: 1.0231471061706543,grad_norm: 0.9999994739626343, iteration: 17592
loss: 1.0540424585342407,grad_norm: 0.9999993281112883, iteration: 17593
loss: 1.0212149620056152,grad_norm: 0.9999991837093828, iteration: 17594
loss: 1.0282710790634155,grad_norm: 0.9999991417684256, iteration: 17595
loss: 1.024438500404358,grad_norm: 0.9999992680142736, iteration: 17596
loss: 1.0086140632629395,grad_norm: 0.9999992068634579, iteration: 17597
loss: 1.0488226413726807,grad_norm: 0.9999991870645552, iteration: 17598
loss: 1.0190991163253784,grad_norm: 0.9999990976925649, iteration: 17599
loss: 1.0561434030532837,grad_norm: 0.9999997540410145, iteration: 17600
loss: 1.0542244911193848,grad_norm: 0.9999990745848434, iteration: 17601
loss: 0.9438955187797546,grad_norm: 0.9999993046962142, iteration: 17602
loss: 0.9992483258247375,grad_norm: 0.9806574461824425, iteration: 17603
loss: 1.0296716690063477,grad_norm: 0.9999993043340277, iteration: 17604
loss: 1.0306694507598877,grad_norm: 0.9999991647266211, iteration: 17605
loss: 1.0253276824951172,grad_norm: 0.999999114843741, iteration: 17606
loss: 0.9899042248725891,grad_norm: 0.9999993288683547, iteration: 17607
loss: 1.0122992992401123,grad_norm: 0.9999991359086966, iteration: 17608
loss: 1.0409128665924072,grad_norm: 0.9999992608534595, iteration: 17609
loss: 1.0480036735534668,grad_norm: 0.9999996749002752, iteration: 17610
loss: 0.9839455485343933,grad_norm: 0.9999989698176281, iteration: 17611
loss: 1.016148328781128,grad_norm: 0.9999990347704624, iteration: 17612
loss: 1.0399339199066162,grad_norm: 0.9999993291107713, iteration: 17613
loss: 0.9865362048149109,grad_norm: 0.9999991510160529, iteration: 17614
loss: 1.0116472244262695,grad_norm: 0.9999990634506579, iteration: 17615
loss: 1.0526962280273438,grad_norm: 0.999999210857467, iteration: 17616
loss: 1.0244252681732178,grad_norm: 0.999999517796402, iteration: 17617
loss: 1.0043885707855225,grad_norm: 0.9999993416134119, iteration: 17618
loss: 1.058082938194275,grad_norm: 0.9999994107822733, iteration: 17619
loss: 0.9977786540985107,grad_norm: 0.9999993183266441, iteration: 17620
loss: 1.0139678716659546,grad_norm: 0.9999990838927457, iteration: 17621
loss: 1.013793706893921,grad_norm: 0.9999992504440605, iteration: 17622
loss: 0.9801747798919678,grad_norm: 0.9999990954152512, iteration: 17623
loss: 1.012752890586853,grad_norm: 0.9999991543250355, iteration: 17624
loss: 1.0352647304534912,grad_norm: 0.9999991263118084, iteration: 17625
loss: 1.0313928127288818,grad_norm: 0.9999995140970059, iteration: 17626
loss: 1.0380173921585083,grad_norm: 0.9999991761660157, iteration: 17627
loss: 1.0279186964035034,grad_norm: 0.9933939394468239, iteration: 17628
loss: 1.063880443572998,grad_norm: 0.9999992124672604, iteration: 17629
loss: 1.026007056236267,grad_norm: 0.9999990880339311, iteration: 17630
loss: 1.0123863220214844,grad_norm: 0.9999990665455482, iteration: 17631
loss: 1.017755389213562,grad_norm: 0.9999993011345625, iteration: 17632
loss: 1.0013962984085083,grad_norm: 0.9999991268683601, iteration: 17633
loss: 1.048095703125,grad_norm: 0.9999995211600028, iteration: 17634
loss: 1.0128148794174194,grad_norm: 0.9999995890697302, iteration: 17635
loss: 1.0178059339523315,grad_norm: 0.9999990755393848, iteration: 17636
loss: 1.0212113857269287,grad_norm: 0.9438269565179274, iteration: 17637
loss: 1.0331324338912964,grad_norm: 0.9999994285671517, iteration: 17638
loss: 0.994333028793335,grad_norm: 0.9999991860967177, iteration: 17639
loss: 1.0641664266586304,grad_norm: 0.9999996742686512, iteration: 17640
loss: 1.0090653896331787,grad_norm: 0.9999991878895412, iteration: 17641
loss: 1.0027148723602295,grad_norm: 0.9999991813250133, iteration: 17642
loss: 0.9988133907318115,grad_norm: 0.9999994925729432, iteration: 17643
loss: 0.9809805750846863,grad_norm: 0.9999989694221537, iteration: 17644
loss: 1.031883955001831,grad_norm: 0.9460775171496782, iteration: 17645
loss: 0.9890334606170654,grad_norm: 0.9999990663630828, iteration: 17646
loss: 1.0089894533157349,grad_norm: 0.9999992666254239, iteration: 17647
loss: 1.0367566347122192,grad_norm: 0.9999993617264712, iteration: 17648
loss: 1.0466917753219604,grad_norm: 0.9675544970064601, iteration: 17649
loss: 1.0346899032592773,grad_norm: 0.9999992536888901, iteration: 17650
loss: 1.015952467918396,grad_norm: 0.9999997112526225, iteration: 17651
loss: 1.0103071928024292,grad_norm: 0.9999994067092473, iteration: 17652
loss: 1.0213291645050049,grad_norm: 0.9999993974518153, iteration: 17653
loss: 1.0024820566177368,grad_norm: 0.9999991209378494, iteration: 17654
loss: 1.0563781261444092,grad_norm: 0.9999993368562402, iteration: 17655
loss: 1.0315709114074707,grad_norm: 0.9999990619000209, iteration: 17656
loss: 1.0065330266952515,grad_norm: 0.9927618021489599, iteration: 17657
loss: 1.0281397104263306,grad_norm: 0.9999992246470103, iteration: 17658
loss: 1.0110615491867065,grad_norm: 0.9999992059883088, iteration: 17659
loss: 1.0668269395828247,grad_norm: 0.9999995811796822, iteration: 17660
loss: 0.9873730540275574,grad_norm: 0.9999991198466985, iteration: 17661
loss: 1.0473471879959106,grad_norm: 0.9999991795690589, iteration: 17662
loss: 1.001025676727295,grad_norm: 0.999999108087512, iteration: 17663
loss: 1.0439841747283936,grad_norm: 0.9999992459800435, iteration: 17664
loss: 1.0717227458953857,grad_norm: 0.9999999282934735, iteration: 17665
loss: 1.0270545482635498,grad_norm: 0.9999993018391063, iteration: 17666
loss: 0.9977812767028809,grad_norm: 0.9999993422872702, iteration: 17667
loss: 0.9933498501777649,grad_norm: 0.9999990812742336, iteration: 17668
loss: 0.99553382396698,grad_norm: 0.9999990712852078, iteration: 17669
loss: 1.0735665559768677,grad_norm: 0.9999997007648946, iteration: 17670
loss: 1.071431279182434,grad_norm: 0.9999995711288958, iteration: 17671
loss: 1.0374510288238525,grad_norm: 0.9999991182683041, iteration: 17672
loss: 1.0519633293151855,grad_norm: 0.9999990128088747, iteration: 17673
loss: 0.9844022989273071,grad_norm: 0.9999990080134156, iteration: 17674
loss: 1.0452438592910767,grad_norm: 0.9999994647648809, iteration: 17675
loss: 1.0070387125015259,grad_norm: 0.9654123535333375, iteration: 17676
loss: 0.9978225231170654,grad_norm: 0.9979720208513168, iteration: 17677
loss: 1.0002591609954834,grad_norm: 0.9999990246816963, iteration: 17678
loss: 1.04926335811615,grad_norm: 0.9271643214086963, iteration: 17679
loss: 1.0453425645828247,grad_norm: 0.9999995299844505, iteration: 17680
loss: 1.041924238204956,grad_norm: 0.9999993111565575, iteration: 17681
loss: 1.0373741388320923,grad_norm: 0.9999994747643126, iteration: 17682
loss: 0.9952212572097778,grad_norm: 0.8196495617829713, iteration: 17683
loss: 1.0122489929199219,grad_norm: 0.9999991022354688, iteration: 17684
loss: 1.0082285404205322,grad_norm: 0.9999992223418572, iteration: 17685
loss: 1.0391925573349,grad_norm: 0.9999992543109518, iteration: 17686
loss: 1.0348931550979614,grad_norm: 0.9999996531746667, iteration: 17687
loss: 0.9721044898033142,grad_norm: 0.9526232779519155, iteration: 17688
loss: 0.98099285364151,grad_norm: 0.9704240435502995, iteration: 17689
loss: 1.0383111238479614,grad_norm: 0.9999994848951081, iteration: 17690
loss: 1.0113872289657593,grad_norm: 0.9999992331900861, iteration: 17691
loss: 1.0244334936141968,grad_norm: 0.9999990991219192, iteration: 17692
loss: 1.0093821287155151,grad_norm: 0.9999993519071105, iteration: 17693
loss: 1.03916597366333,grad_norm: 0.9207216855914716, iteration: 17694
loss: 0.9990761280059814,grad_norm: 0.9999992047181555, iteration: 17695
loss: 1.0293200016021729,grad_norm: 0.9999994304000076, iteration: 17696
loss: 1.027232050895691,grad_norm: 0.9999991017699523, iteration: 17697
loss: 1.0138490200042725,grad_norm: 0.9999991230254879, iteration: 17698
loss: 1.0256218910217285,grad_norm: 0.9999993207219459, iteration: 17699
loss: 1.0159776210784912,grad_norm: 0.9999991864138487, iteration: 17700
loss: 1.0181934833526611,grad_norm: 0.9999991771955283, iteration: 17701
loss: 0.9945029616355896,grad_norm: 0.9999993455946805, iteration: 17702
loss: 0.9979522228240967,grad_norm: 0.9999991600984734, iteration: 17703
loss: 1.0327537059783936,grad_norm: 0.9999991498710624, iteration: 17704
loss: 1.018621802330017,grad_norm: 0.9999993138979449, iteration: 17705
loss: 1.0526982545852661,grad_norm: 0.9999992659821658, iteration: 17706
loss: 1.1408029794692993,grad_norm: 0.9999995836539065, iteration: 17707
loss: 1.008292555809021,grad_norm: 0.9999994053909396, iteration: 17708
loss: 1.0218987464904785,grad_norm: 0.9999991072970619, iteration: 17709
loss: 1.1073904037475586,grad_norm: 0.9999997995689615, iteration: 17710
loss: 1.028124451637268,grad_norm: 0.9999994035249495, iteration: 17711
loss: 1.0498007535934448,grad_norm: 0.9999991346024495, iteration: 17712
loss: 0.9622877240180969,grad_norm: 0.9999991009929748, iteration: 17713
loss: 1.084510326385498,grad_norm: 0.9999995560323497, iteration: 17714
loss: 0.9506975412368774,grad_norm: 0.9999989716492346, iteration: 17715
loss: 1.0811067819595337,grad_norm: 0.9999995094568331, iteration: 17716
loss: 1.0479058027267456,grad_norm: 0.9143031478273365, iteration: 17717
loss: 0.9939087629318237,grad_norm: 0.9999993331917362, iteration: 17718
loss: 0.9948356747627258,grad_norm: 0.9999991209147496, iteration: 17719
loss: 0.9861748218536377,grad_norm: 0.9999991891196921, iteration: 17720
loss: 1.0137965679168701,grad_norm: 0.9984381005112661, iteration: 17721
loss: 1.0119071006774902,grad_norm: 0.9999993173023146, iteration: 17722
loss: 1.0121691226959229,grad_norm: 0.9999995012398252, iteration: 17723
loss: 1.0252323150634766,grad_norm: 0.9999992766882483, iteration: 17724
loss: 0.9946997761726379,grad_norm: 0.999999129532244, iteration: 17725
loss: 1.0176275968551636,grad_norm: 0.9999990619266388, iteration: 17726
loss: 1.023240327835083,grad_norm: 0.9999990675306804, iteration: 17727
loss: 0.9942488074302673,grad_norm: 0.9898158232977194, iteration: 17728
loss: 1.0267270803451538,grad_norm: 0.9999990541455237, iteration: 17729
loss: 1.0222015380859375,grad_norm: 0.9999990688035088, iteration: 17730
loss: 1.0356990098953247,grad_norm: 0.9999991826673128, iteration: 17731
loss: 1.0031119585037231,grad_norm: 0.9999992330846265, iteration: 17732
loss: 1.0417417287826538,grad_norm: 0.9999997406156198, iteration: 17733
loss: 1.0307928323745728,grad_norm: 0.9999990321900436, iteration: 17734
loss: 1.011047601699829,grad_norm: 0.9925480205182178, iteration: 17735
loss: 1.0222398042678833,grad_norm: 0.9999993305388835, iteration: 17736
loss: 1.0240269899368286,grad_norm: 0.9999991799560309, iteration: 17737
loss: 1.0675909519195557,grad_norm: 0.9999991514760466, iteration: 17738
loss: 0.9563068747520447,grad_norm: 0.9999990603268113, iteration: 17739
loss: 1.0248574018478394,grad_norm: 0.9999992118133653, iteration: 17740
loss: 1.0916436910629272,grad_norm: 0.9999995477389881, iteration: 17741
loss: 1.0312103033065796,grad_norm: 0.8537122368988198, iteration: 17742
loss: 0.9831655621528625,grad_norm: 0.9372585720004375, iteration: 17743
loss: 1.027504324913025,grad_norm: 0.9999993597323766, iteration: 17744
loss: 1.0089356899261475,grad_norm: 0.9999991460486284, iteration: 17745
loss: 1.0064361095428467,grad_norm: 0.9999991846139362, iteration: 17746
loss: 1.015891432762146,grad_norm: 0.9999991651871498, iteration: 17747
loss: 0.9899609684944153,grad_norm: 0.999999264140039, iteration: 17748
loss: 1.0170940160751343,grad_norm: 0.9698600855361239, iteration: 17749
loss: 1.0202723741531372,grad_norm: 0.9999993038595162, iteration: 17750
loss: 0.994376540184021,grad_norm: 0.9999993579613643, iteration: 17751
loss: 1.0144869089126587,grad_norm: 0.9999991889317175, iteration: 17752
loss: 1.0506246089935303,grad_norm: 0.999998946011217, iteration: 17753
loss: 1.03497314453125,grad_norm: 0.999999184230817, iteration: 17754
loss: 1.0335692167282104,grad_norm: 0.9999991757318717, iteration: 17755
loss: 0.9744911789894104,grad_norm: 0.999999088644194, iteration: 17756
loss: 0.9915757775306702,grad_norm: 0.99999952469501, iteration: 17757
loss: 1.013845443725586,grad_norm: 0.986758933650028, iteration: 17758
loss: 1.0231940746307373,grad_norm: 0.9454671097423998, iteration: 17759
loss: 0.9930717349052429,grad_norm: 0.9837766791129524, iteration: 17760
loss: 1.0115622282028198,grad_norm: 0.9999993112173142, iteration: 17761
loss: 0.9880549907684326,grad_norm: 0.9999990831516703, iteration: 17762
loss: 0.9970423579216003,grad_norm: 0.9999991211287382, iteration: 17763
loss: 1.052017331123352,grad_norm: 0.9999992831271244, iteration: 17764
loss: 1.0114859342575073,grad_norm: 0.9999990535289747, iteration: 17765
loss: 1.031324028968811,grad_norm: 0.9999992735540123, iteration: 17766
loss: 1.020545244216919,grad_norm: 0.9918668872251295, iteration: 17767
loss: 1.048542857170105,grad_norm: 0.9999990746071616, iteration: 17768
loss: 1.015763759613037,grad_norm: 0.9372276649569652, iteration: 17769
loss: 1.015958309173584,grad_norm: 0.9999991353687123, iteration: 17770
loss: 1.004679560661316,grad_norm: 0.9999991504037135, iteration: 17771
loss: 1.0016989707946777,grad_norm: 0.9999991708851638, iteration: 17772
loss: 1.0459024906158447,grad_norm: 0.999999178853069, iteration: 17773
loss: 0.96299147605896,grad_norm: 0.999999125957181, iteration: 17774
loss: 0.9799315929412842,grad_norm: 0.9879089355701389, iteration: 17775
loss: 1.0452154874801636,grad_norm: 0.9919408945720061, iteration: 17776
loss: 0.9898524284362793,grad_norm: 0.999999065322195, iteration: 17777
loss: 1.001935362815857,grad_norm: 0.9999996285259535, iteration: 17778
loss: 1.0098601579666138,grad_norm: 0.9999991830907896, iteration: 17779
loss: 0.9673271179199219,grad_norm: 0.9999991562181667, iteration: 17780
loss: 1.011974811553955,grad_norm: 0.9976950419532209, iteration: 17781
loss: 1.0175824165344238,grad_norm: 0.9999996730162823, iteration: 17782
loss: 1.0240988731384277,grad_norm: 0.9999992837253134, iteration: 17783
loss: 0.9754179120063782,grad_norm: 0.9999992865642485, iteration: 17784
loss: 1.04305100440979,grad_norm: 0.9999990864033046, iteration: 17785
loss: 1.0019580125808716,grad_norm: 0.9999990971325429, iteration: 17786
loss: 0.9818610548973083,grad_norm: 0.8658187947364687, iteration: 17787
loss: 1.030594825744629,grad_norm: 0.999999043781669, iteration: 17788
loss: 1.0500801801681519,grad_norm: 0.9674205379730382, iteration: 17789
loss: 1.0133979320526123,grad_norm: 0.9999995614259105, iteration: 17790
loss: 0.993401050567627,grad_norm: 0.9611222650215133, iteration: 17791
loss: 0.9946407079696655,grad_norm: 0.9999992616444097, iteration: 17792
loss: 0.9860501885414124,grad_norm: 0.9999990042846361, iteration: 17793
loss: 1.0143567323684692,grad_norm: 0.9999990326484582, iteration: 17794
loss: 1.037810206413269,grad_norm: 0.9999990794978976, iteration: 17795
loss: 1.0250298976898193,grad_norm: 0.9999991080541663, iteration: 17796
loss: 1.0186301469802856,grad_norm: 0.9999990273952767, iteration: 17797
loss: 1.0018041133880615,grad_norm: 0.9713148950930217, iteration: 17798
loss: 1.014015793800354,grad_norm: 0.9999993873064386, iteration: 17799
loss: 0.9451419711112976,grad_norm: 0.999999474251363, iteration: 17800
loss: 0.9664747714996338,grad_norm: 0.999999185608653, iteration: 17801
loss: 1.0338726043701172,grad_norm: 0.9999992269838591, iteration: 17802
loss: 0.9983755350112915,grad_norm: 0.9999990793517654, iteration: 17803
loss: 1.0102547407150269,grad_norm: 0.999999128192629, iteration: 17804
loss: 1.025778889656067,grad_norm: 0.9999989646415235, iteration: 17805
loss: 0.9747492074966431,grad_norm: 0.9999992787908037, iteration: 17806
loss: 1.0111619234085083,grad_norm: 0.9999989617584465, iteration: 17807
loss: 1.0117433071136475,grad_norm: 0.9797249537263498, iteration: 17808
loss: 0.9964520931243896,grad_norm: 0.9213528578203166, iteration: 17809
loss: 1.045067548751831,grad_norm: 0.9999993000540442, iteration: 17810
loss: 1.04005765914917,grad_norm: 0.9999991204643327, iteration: 17811
loss: 1.0173934698104858,grad_norm: 0.99999906052, iteration: 17812
loss: 1.0235519409179688,grad_norm: 0.9220702293784562, iteration: 17813
loss: 1.0592632293701172,grad_norm: 0.9999992105273637, iteration: 17814
loss: 0.9960935115814209,grad_norm: 0.9999991670028855, iteration: 17815
loss: 0.9841989278793335,grad_norm: 0.9999992034372209, iteration: 17816
loss: 1.0183532238006592,grad_norm: 0.9999990598813289, iteration: 17817
loss: 1.0336427688598633,grad_norm: 0.9448981916540882, iteration: 17818
loss: 1.032608985900879,grad_norm: 0.9999994656005174, iteration: 17819
loss: 0.9828159809112549,grad_norm: 0.9999992206120006, iteration: 17820
loss: 0.9995371699333191,grad_norm: 0.9557012342057629, iteration: 17821
loss: 1.0370985269546509,grad_norm: 0.9999992056262117, iteration: 17822
loss: 1.013934850692749,grad_norm: 0.9543897653549216, iteration: 17823
loss: 1.0027766227722168,grad_norm: 0.9999991952784313, iteration: 17824
loss: 1.006608486175537,grad_norm: 0.9999992958306658, iteration: 17825
loss: 1.039109230041504,grad_norm: 0.9999991304579487, iteration: 17826
loss: 1.0446689128875732,grad_norm: 0.9999995508122804, iteration: 17827
loss: 1.0349221229553223,grad_norm: 0.9999991614196886, iteration: 17828
loss: 1.006991982460022,grad_norm: 0.999999239270913, iteration: 17829
loss: 1.0278370380401611,grad_norm: 0.9999992334754487, iteration: 17830
loss: 1.0526771545410156,grad_norm: 0.9999996732916498, iteration: 17831
loss: 1.0231839418411255,grad_norm: 0.9999993607192557, iteration: 17832
loss: 1.0338808298110962,grad_norm: 0.9999990584501733, iteration: 17833
loss: 1.0352210998535156,grad_norm: 0.9999990897061782, iteration: 17834
loss: 1.0332002639770508,grad_norm: 0.9999991231940147, iteration: 17835
loss: 1.008297085762024,grad_norm: 0.999999273820352, iteration: 17836
loss: 1.0362849235534668,grad_norm: 0.9999991316605161, iteration: 17837
loss: 0.9892286062240601,grad_norm: 0.9999992353972388, iteration: 17838
loss: 1.027411937713623,grad_norm: 0.9999993782522021, iteration: 17839
loss: 1.0633609294891357,grad_norm: 0.9999991731390733, iteration: 17840
loss: 1.0201482772827148,grad_norm: 0.9999992112764926, iteration: 17841
loss: 0.9957412481307983,grad_norm: 0.9953781638288467, iteration: 17842
loss: 1.0325582027435303,grad_norm: 0.9999992775590945, iteration: 17843
loss: 1.0459725856781006,grad_norm: 0.9999991969262431, iteration: 17844
loss: 1.0335986614227295,grad_norm: 0.9999990983669166, iteration: 17845
loss: 1.0145915746688843,grad_norm: 0.9999994139673958, iteration: 17846
loss: 1.0075966119766235,grad_norm: 0.9999994287874279, iteration: 17847
loss: 1.013076901435852,grad_norm: 0.8958791072128183, iteration: 17848
loss: 0.9976078867912292,grad_norm: 0.9999991249259013, iteration: 17849
loss: 1.0045535564422607,grad_norm: 0.8835649777007412, iteration: 17850
loss: 1.0248123407363892,grad_norm: 0.9999992446961191, iteration: 17851
loss: 1.0136923789978027,grad_norm: 0.9999993088391959, iteration: 17852
loss: 1.0232620239257812,grad_norm: 0.9290279890989104, iteration: 17853
loss: 1.0133674144744873,grad_norm: 0.9884131659833405, iteration: 17854
loss: 1.0412933826446533,grad_norm: 0.958402225349053, iteration: 17855
loss: 1.0629945993423462,grad_norm: 0.9999992699848707, iteration: 17856
loss: 0.9778965711593628,grad_norm: 0.9587594463751191, iteration: 17857
loss: 1.0056167840957642,grad_norm: 0.9999992762473693, iteration: 17858
loss: 1.0544769763946533,grad_norm: 0.9999990292582228, iteration: 17859
loss: 0.9990675449371338,grad_norm: 0.9999998737653673, iteration: 17860
loss: 1.0266488790512085,grad_norm: 0.9999993866105777, iteration: 17861
loss: 1.018795132637024,grad_norm: 0.9999990344923827, iteration: 17862
loss: 1.036830186843872,grad_norm: 0.99999916463885, iteration: 17863
loss: 1.0466084480285645,grad_norm: 0.9999991253541963, iteration: 17864
loss: 1.0196027755737305,grad_norm: 0.9999994847848971, iteration: 17865
loss: 1.034249186515808,grad_norm: 0.9542231073839107, iteration: 17866
loss: 1.0598310232162476,grad_norm: 0.9999998994787335, iteration: 17867
loss: 1.0175317525863647,grad_norm: 0.9999994949832118, iteration: 17868
loss: 1.0390952825546265,grad_norm: 0.9999993546729807, iteration: 17869
loss: 1.0066349506378174,grad_norm: 0.9912617270696957, iteration: 17870
loss: 0.996661365032196,grad_norm: 0.9999990860186332, iteration: 17871
loss: 1.0547785758972168,grad_norm: 0.9999989950926049, iteration: 17872
loss: 1.0246371030807495,grad_norm: 0.999999538830049, iteration: 17873
loss: 1.0406510829925537,grad_norm: 0.9999990586498791, iteration: 17874
loss: 1.0228946208953857,grad_norm: 0.9999991666185558, iteration: 17875
loss: 1.0706579685211182,grad_norm: 0.9999998964557647, iteration: 17876
loss: 1.0611798763275146,grad_norm: 0.9999991024426158, iteration: 17877
loss: 1.0012001991271973,grad_norm: 0.9999993365549872, iteration: 17878
loss: 1.03139066696167,grad_norm: 0.8561730671319784, iteration: 17879
loss: 1.0464708805084229,grad_norm: 0.9999995440248504, iteration: 17880
loss: 1.0442957878112793,grad_norm: 0.9999991524974304, iteration: 17881
loss: 1.0143613815307617,grad_norm: 0.9999990786723809, iteration: 17882
loss: 0.9848681688308716,grad_norm: 0.9999996319468661, iteration: 17883
loss: 1.0362917184829712,grad_norm: 0.9999990103314176, iteration: 17884
loss: 1.0019299983978271,grad_norm: 0.9926369219742086, iteration: 17885
loss: 1.026044487953186,grad_norm: 0.9999990510137639, iteration: 17886
loss: 1.0441755056381226,grad_norm: 0.999998991888093, iteration: 17887
loss: 1.012487530708313,grad_norm: 0.9999991768644959, iteration: 17888
loss: 1.0124107599258423,grad_norm: 0.9999992112487295, iteration: 17889
loss: 1.0232678651809692,grad_norm: 0.9999992161141594, iteration: 17890
loss: 1.0079915523529053,grad_norm: 0.9999990109526818, iteration: 17891
loss: 0.9788469076156616,grad_norm: 0.9999990845256028, iteration: 17892
loss: 1.0417100191116333,grad_norm: 0.9999991902960964, iteration: 17893
loss: 0.9903035163879395,grad_norm: 0.9999996259529468, iteration: 17894
loss: 0.9908028244972229,grad_norm: 0.9999991151452211, iteration: 17895
loss: 1.0298806428909302,grad_norm: 0.9111717373193917, iteration: 17896
loss: 0.988756537437439,grad_norm: 0.9999990338585334, iteration: 17897
loss: 1.0282697677612305,grad_norm: 0.9087700448738846, iteration: 17898
loss: 1.0523066520690918,grad_norm: 0.9999993093843144, iteration: 17899
loss: 0.9930610060691833,grad_norm: 0.9999994829695532, iteration: 17900
loss: 0.9933663010597229,grad_norm: 0.9999991288237636, iteration: 17901
loss: 1.0593757629394531,grad_norm: 0.9999991960211395, iteration: 17902
loss: 1.0261907577514648,grad_norm: 0.999999183406146, iteration: 17903
loss: 0.9929928183555603,grad_norm: 0.999999179391676, iteration: 17904
loss: 1.0010522603988647,grad_norm: 0.9999990571475676, iteration: 17905
loss: 1.0097781419754028,grad_norm: 0.9999991039662631, iteration: 17906
loss: 1.0038625001907349,grad_norm: 0.9999991901581375, iteration: 17907
loss: 1.0374685525894165,grad_norm: 0.99999937928943, iteration: 17908
loss: 1.0297071933746338,grad_norm: 0.9999993260529185, iteration: 17909
loss: 1.023546814918518,grad_norm: 0.9999997842712943, iteration: 17910
loss: 0.9988197088241577,grad_norm: 0.9999991323356374, iteration: 17911
loss: 1.088556170463562,grad_norm: 0.9999992803479266, iteration: 17912
loss: 0.9994140863418579,grad_norm: 0.9999991775388417, iteration: 17913
loss: 1.0213098526000977,grad_norm: 0.9999991250532676, iteration: 17914
loss: 1.011454463005066,grad_norm: 0.9999991540037592, iteration: 17915
loss: 1.0171808004379272,grad_norm: 0.9999990574865472, iteration: 17916
loss: 1.0145988464355469,grad_norm: 0.9999992963696945, iteration: 17917
loss: 1.0446062088012695,grad_norm: 0.9999990806186511, iteration: 17918
loss: 1.0108619928359985,grad_norm: 0.9999990674846793, iteration: 17919
loss: 0.994567334651947,grad_norm: 0.9999992134215826, iteration: 17920
loss: 1.0209565162658691,grad_norm: 0.9999991060121824, iteration: 17921
loss: 1.0398293733596802,grad_norm: 0.9999996833939488, iteration: 17922
loss: 0.9718969464302063,grad_norm: 0.9999992046363854, iteration: 17923
loss: 0.9934569001197815,grad_norm: 0.9999991159510595, iteration: 17924
loss: 1.007622241973877,grad_norm: 0.9999990452113543, iteration: 17925
loss: 1.0197809934616089,grad_norm: 0.9999990665760593, iteration: 17926
loss: 0.9698014259338379,grad_norm: 0.9999990463037789, iteration: 17927
loss: 0.9988263249397278,grad_norm: 0.999999242713003, iteration: 17928
loss: 1.0611135959625244,grad_norm: 0.9604303855164275, iteration: 17929
loss: 1.0181769132614136,grad_norm: 0.9999990811133511, iteration: 17930
loss: 1.0427281856536865,grad_norm: 0.9999992600739516, iteration: 17931
loss: 1.0048922300338745,grad_norm: 0.9999995263947034, iteration: 17932
loss: 1.049591302871704,grad_norm: 0.9999990041666073, iteration: 17933
loss: 1.032495379447937,grad_norm: 0.9999991643199938, iteration: 17934
loss: 1.012211561203003,grad_norm: 0.9299197721633606, iteration: 17935
loss: 1.0144977569580078,grad_norm: 0.9999995477001935, iteration: 17936
loss: 0.9899634718894958,grad_norm: 0.9525167831339016, iteration: 17937
loss: 1.0093047618865967,grad_norm: 0.9999997550639024, iteration: 17938
loss: 1.0218018293380737,grad_norm: 0.9999990486056938, iteration: 17939
loss: 0.9513145685195923,grad_norm: 0.9350402229727139, iteration: 17940
loss: 1.0436046123504639,grad_norm: 0.9999993903752309, iteration: 17941
loss: 1.0365961790084839,grad_norm: 0.9999990134226323, iteration: 17942
loss: 1.0750017166137695,grad_norm: 0.9999995133807043, iteration: 17943
loss: 1.029376745223999,grad_norm: 0.9297427252699593, iteration: 17944
loss: 1.0287641286849976,grad_norm: 0.9616180830359975, iteration: 17945
loss: 1.0277526378631592,grad_norm: 0.9999993285613135, iteration: 17946
loss: 1.0086455345153809,grad_norm: 0.9999992336161433, iteration: 17947
loss: 1.035085678100586,grad_norm: 0.9999995353027298, iteration: 17948
loss: 1.0255392789840698,grad_norm: 0.9999993394927763, iteration: 17949
loss: 1.0274571180343628,grad_norm: 0.9999997854228005, iteration: 17950
loss: 0.974356472492218,grad_norm: 0.9999992470270841, iteration: 17951
loss: 1.013255000114441,grad_norm: 0.9999989832889927, iteration: 17952
loss: 1.0592485666275024,grad_norm: 0.9999996195453904, iteration: 17953
loss: 1.0120645761489868,grad_norm: 0.8944699403392298, iteration: 17954
loss: 1.0428597927093506,grad_norm: 0.9999991519831125, iteration: 17955
loss: 1.0112590789794922,grad_norm: 0.9199828007447046, iteration: 17956
loss: 1.0249284505844116,grad_norm: 0.9999992705580608, iteration: 17957
loss: 1.0725260972976685,grad_norm: 0.9999993273815129, iteration: 17958
loss: 1.0296577215194702,grad_norm: 0.999999367993573, iteration: 17959
loss: 0.9887134432792664,grad_norm: 0.880432421623612, iteration: 17960
loss: 1.0089877843856812,grad_norm: 0.9132935704855455, iteration: 17961
loss: 1.0334452390670776,grad_norm: 0.9999991158893329, iteration: 17962
loss: 1.0083978176116943,grad_norm: 0.9999990845388236, iteration: 17963
loss: 1.0045078992843628,grad_norm: 0.9999991397696577, iteration: 17964
loss: 0.991792619228363,grad_norm: 0.9999990973309519, iteration: 17965
loss: 1.0268162488937378,grad_norm: 0.999999128571751, iteration: 17966
loss: 1.0515977144241333,grad_norm: 0.9999997347881656, iteration: 17967
loss: 0.9813653826713562,grad_norm: 0.9999990072895205, iteration: 17968
loss: 1.036466360092163,grad_norm: 0.9999991928755708, iteration: 17969
loss: 1.018740177154541,grad_norm: 0.8991625076474629, iteration: 17970
loss: 1.0276364088058472,grad_norm: 0.999999398896929, iteration: 17971
loss: 1.023910403251648,grad_norm: 0.9811226144411878, iteration: 17972
loss: 0.9996407628059387,grad_norm: 0.9999993166552855, iteration: 17973
loss: 0.9889591336250305,grad_norm: 0.9999990753352254, iteration: 17974
loss: 1.0013558864593506,grad_norm: 0.9999992129321952, iteration: 17975
loss: 1.035449743270874,grad_norm: 0.9999990615766776, iteration: 17976
loss: 1.0155258178710938,grad_norm: 0.9999990120849692, iteration: 17977
loss: 1.0212018489837646,grad_norm: 0.8999054602410115, iteration: 17978
loss: 1.0121248960494995,grad_norm: 0.9999997216256494, iteration: 17979
loss: 1.0161879062652588,grad_norm: 0.9999991909263454, iteration: 17980
loss: 1.012772560119629,grad_norm: 0.99999965260692, iteration: 17981
loss: 1.0359233617782593,grad_norm: 0.9999991786531074, iteration: 17982
loss: 1.045123815536499,grad_norm: 0.9999992156574615, iteration: 17983
loss: 1.0088999271392822,grad_norm: 0.9999994084916597, iteration: 17984
loss: 0.9989889860153198,grad_norm: 0.9999991090887912, iteration: 17985
loss: 1.0271483659744263,grad_norm: 0.9999991011322468, iteration: 17986
loss: 1.0245811939239502,grad_norm: 0.9999991341119611, iteration: 17987
loss: 0.9638890624046326,grad_norm: 0.9999993599456926, iteration: 17988
loss: 1.0445749759674072,grad_norm: 0.9999991808241139, iteration: 17989
loss: 0.9910921454429626,grad_norm: 0.9999990097795627, iteration: 17990
loss: 1.0151901245117188,grad_norm: 0.9999992481844122, iteration: 17991
loss: 0.9949692487716675,grad_norm: 0.9999990965385888, iteration: 17992
loss: 1.0209546089172363,grad_norm: 0.9999990799760894, iteration: 17993
loss: 1.003851056098938,grad_norm: 0.9999992036497849, iteration: 17994
loss: 1.062955617904663,grad_norm: 0.9999994503543437, iteration: 17995
loss: 1.0022202730178833,grad_norm: 0.9999992477591576, iteration: 17996
loss: 1.018505334854126,grad_norm: 0.9999992892960856, iteration: 17997
loss: 0.9982379674911499,grad_norm: 0.9999991977700733, iteration: 17998
loss: 1.0141102075576782,grad_norm: 0.999998988765759, iteration: 17999
loss: 1.0562669038772583,grad_norm: 0.9999996498633567, iteration: 18000
loss: 0.9811566472053528,grad_norm: 0.9999992826935922, iteration: 18001
loss: 1.0214968919754028,grad_norm: 0.9999991410279241, iteration: 18002
loss: 1.0331112146377563,grad_norm: 0.9999993691453906, iteration: 18003
loss: 0.9824941754341125,grad_norm: 0.8875847725496606, iteration: 18004
loss: 1.0057810544967651,grad_norm: 0.9999990533200135, iteration: 18005
loss: 1.007869005203247,grad_norm: 0.9999991214674495, iteration: 18006
loss: 1.0444947481155396,grad_norm: 0.9999996249630946, iteration: 18007
loss: 0.9899430274963379,grad_norm: 0.9776427054202812, iteration: 18008
loss: 1.027713418006897,grad_norm: 0.9999991656486406, iteration: 18009
loss: 1.0203050374984741,grad_norm: 0.9999993838178607, iteration: 18010
loss: 1.0023083686828613,grad_norm: 0.9999993038016769, iteration: 18011
loss: 1.0140595436096191,grad_norm: 0.9999992814091986, iteration: 18012
loss: 1.0208265781402588,grad_norm: 0.9999991362427583, iteration: 18013
loss: 0.9906799793243408,grad_norm: 0.9999991418920833, iteration: 18014
loss: 1.0252141952514648,grad_norm: 0.9999990031328586, iteration: 18015
loss: 0.9977973103523254,grad_norm: 0.9999990423371439, iteration: 18016
loss: 1.042474389076233,grad_norm: 0.9999992494143268, iteration: 18017
loss: 1.0251904726028442,grad_norm: 0.999999243977558, iteration: 18018
loss: 1.003744125366211,grad_norm: 0.9977632123687892, iteration: 18019
loss: 0.9940131306648254,grad_norm: 0.9999989751281656, iteration: 18020
loss: 1.0466861724853516,grad_norm: 0.9999991655591, iteration: 18021
loss: 1.0311357975006104,grad_norm: 0.9999995880910509, iteration: 18022
loss: 1.0305510759353638,grad_norm: 0.9999990318091941, iteration: 18023
loss: 1.0883166790008545,grad_norm: 0.9999996450146897, iteration: 18024
loss: 1.0434283018112183,grad_norm: 0.9999996533220125, iteration: 18025
loss: 1.0168840885162354,grad_norm: 0.9999993111969292, iteration: 18026
loss: 0.9956220388412476,grad_norm: 0.999999190618909, iteration: 18027
loss: 1.0387345552444458,grad_norm: 0.9999993053168149, iteration: 18028
loss: 1.0413825511932373,grad_norm: 0.9999990550848852, iteration: 18029
loss: 1.022270679473877,grad_norm: 0.9999991883580841, iteration: 18030
loss: 1.0267916917800903,grad_norm: 0.9999994970262643, iteration: 18031
loss: 1.0440027713775635,grad_norm: 0.9999994650121407, iteration: 18032
loss: 0.9815952181816101,grad_norm: 0.9999990502704398, iteration: 18033
loss: 0.9679694175720215,grad_norm: 0.9600935767756085, iteration: 18034
loss: 1.0000358819961548,grad_norm: 0.9999995807999775, iteration: 18035
loss: 1.0253651142120361,grad_norm: 0.9999993433366355, iteration: 18036
loss: 1.0558888912200928,grad_norm: 0.9999992726872083, iteration: 18037
loss: 1.0248252153396606,grad_norm: 0.9999991677034475, iteration: 18038
loss: 1.0053772926330566,grad_norm: 0.9999991861933925, iteration: 18039
loss: 1.009403944015503,grad_norm: 0.9999995628752705, iteration: 18040
loss: 1.0097121000289917,grad_norm: 0.9999991098660109, iteration: 18041
loss: 0.9714163541793823,grad_norm: 0.8930633914073257, iteration: 18042
loss: 1.0290154218673706,grad_norm: 0.9999995807160065, iteration: 18043
loss: 1.0472832918167114,grad_norm: 0.9999996440267932, iteration: 18044
loss: 0.9980719685554504,grad_norm: 0.9999998209734348, iteration: 18045
loss: 1.0264323949813843,grad_norm: 0.999999206229505, iteration: 18046
loss: 1.0602638721466064,grad_norm: 0.9184245807937016, iteration: 18047
loss: 0.9958174824714661,grad_norm: 0.9999991474589439, iteration: 18048
loss: 1.011854648590088,grad_norm: 0.9999991547005148, iteration: 18049
loss: 1.0183675289154053,grad_norm: 0.8697457731308558, iteration: 18050
loss: 1.0577380657196045,grad_norm: 0.999999326315491, iteration: 18051
loss: 1.054824709892273,grad_norm: 0.9999993610178677, iteration: 18052
loss: 1.000275731086731,grad_norm: 0.9999992302234154, iteration: 18053
loss: 0.9932329654693604,grad_norm: 0.9999990753607522, iteration: 18054
loss: 1.0436102151870728,grad_norm: 0.9999993077934555, iteration: 18055
loss: 1.0066967010498047,grad_norm: 0.9999998388582485, iteration: 18056
loss: 0.9999195337295532,grad_norm: 0.9999992488775544, iteration: 18057
loss: 1.0569794178009033,grad_norm: 0.9999992987642918, iteration: 18058
loss: 1.0337640047073364,grad_norm: 0.9999992572763334, iteration: 18059
loss: 0.9959471225738525,grad_norm: 0.9999990597849933, iteration: 18060
loss: 0.9987002015113831,grad_norm: 0.8712333011087685, iteration: 18061
loss: 1.022799015045166,grad_norm: 0.9999991730621867, iteration: 18062
loss: 1.052587628364563,grad_norm: 0.999999665486192, iteration: 18063
loss: 1.0338419675827026,grad_norm: 0.9999990642560916, iteration: 18064
loss: 1.0605088472366333,grad_norm: 0.9999993739926383, iteration: 18065
loss: 1.056311845779419,grad_norm: 0.9999992784876767, iteration: 18066
loss: 0.9635228514671326,grad_norm: 0.9999991546751917, iteration: 18067
loss: 0.9678695201873779,grad_norm: 0.9999992662690477, iteration: 18068
loss: 1.0231342315673828,grad_norm: 0.9999992831944517, iteration: 18069
loss: 1.0189789533615112,grad_norm: 0.9999990934171366, iteration: 18070
loss: 1.0762920379638672,grad_norm: 0.9235143962874306, iteration: 18071
loss: 1.0190677642822266,grad_norm: 0.9999989795732946, iteration: 18072
loss: 1.0186104774475098,grad_norm: 0.9999995034577954, iteration: 18073
loss: 1.0213462114334106,grad_norm: 0.9999992147183651, iteration: 18074
loss: 1.0403746366500854,grad_norm: 0.9144777526412087, iteration: 18075
loss: 1.033327341079712,grad_norm: 0.9999991215254577, iteration: 18076
loss: 1.0258985757827759,grad_norm: 0.9999990524257051, iteration: 18077
loss: 1.0081053972244263,grad_norm: 0.9999990407530049, iteration: 18078
loss: 1.0225157737731934,grad_norm: 0.9999991687748376, iteration: 18079
loss: 0.9996868371963501,grad_norm: 0.9999991860166358, iteration: 18080
loss: 1.0143369436264038,grad_norm: 0.9999992533398533, iteration: 18081
loss: 1.0733810663223267,grad_norm: 0.999999663191511, iteration: 18082
loss: 1.0154298543930054,grad_norm: 0.9999990932419022, iteration: 18083
loss: 1.0352782011032104,grad_norm: 0.8852565869485579, iteration: 18084
loss: 1.000182867050171,grad_norm: 0.9999991137334474, iteration: 18085
loss: 1.028302550315857,grad_norm: 0.9999994312275641, iteration: 18086
loss: 1.0403214693069458,grad_norm: 0.9999990728802612, iteration: 18087
loss: 1.0334066152572632,grad_norm: 0.999999148383853, iteration: 18088
loss: 1.0556272268295288,grad_norm: 0.9999990928339553, iteration: 18089
loss: 1.0268235206604004,grad_norm: 0.9999996534996551, iteration: 18090
loss: 1.0455827713012695,grad_norm: 0.9999997106710642, iteration: 18091
loss: 1.0390745401382446,grad_norm: 0.8145441617564041, iteration: 18092
loss: 1.0030133724212646,grad_norm: 0.9336960947563293, iteration: 18093
loss: 1.0649722814559937,grad_norm: 0.999999083986156, iteration: 18094
loss: 1.0251206159591675,grad_norm: 0.9999991421595119, iteration: 18095
loss: 1.0223292112350464,grad_norm: 0.9999991476249801, iteration: 18096
loss: 0.980793297290802,grad_norm: 0.9210065268841195, iteration: 18097
loss: 1.040668249130249,grad_norm: 0.9999991583529303, iteration: 18098
loss: 1.0400513410568237,grad_norm: 0.9999991167054859, iteration: 18099
loss: 0.9921429753303528,grad_norm: 0.9487662373897885, iteration: 18100
loss: 1.0336265563964844,grad_norm: 0.9999991435438127, iteration: 18101
loss: 1.00664222240448,grad_norm: 0.9999993887675582, iteration: 18102
loss: 1.0022802352905273,grad_norm: 0.9999991646030635, iteration: 18103
loss: 1.0103360414505005,grad_norm: 0.9999991884889642, iteration: 18104
loss: 1.0064924955368042,grad_norm: 0.9999995286220174, iteration: 18105
loss: 1.0354655981063843,grad_norm: 0.9999993088182053, iteration: 18106
loss: 0.9847906827926636,grad_norm: 0.99999915289131, iteration: 18107
loss: 1.0343635082244873,grad_norm: 0.9999992471165473, iteration: 18108
loss: 1.086796760559082,grad_norm: 0.9999992322344483, iteration: 18109
loss: 1.0044647455215454,grad_norm: 0.9999990838479234, iteration: 18110
loss: 1.040162205696106,grad_norm: 0.9356697244342316, iteration: 18111
loss: 1.0325820446014404,grad_norm: 0.9999990982703857, iteration: 18112
loss: 1.0352222919464111,grad_norm: 0.975868369042888, iteration: 18113
loss: 1.045464038848877,grad_norm: 0.9999992050802546, iteration: 18114
loss: 1.0262089967727661,grad_norm: 0.9999991586388315, iteration: 18115
loss: 1.0024937391281128,grad_norm: 0.9999992423341474, iteration: 18116
loss: 0.9399604201316833,grad_norm: 0.9999989862742331, iteration: 18117
loss: 0.9948591589927673,grad_norm: 0.9999992177739868, iteration: 18118
loss: 1.0347106456756592,grad_norm: 0.9716692819276831, iteration: 18119
loss: 1.0044245719909668,grad_norm: 0.9999991214893861, iteration: 18120
loss: 1.032393217086792,grad_norm: 0.9999992104773369, iteration: 18121
loss: 1.0527615547180176,grad_norm: 0.9999997024983559, iteration: 18122
loss: 1.005724310874939,grad_norm: 0.9999990026970467, iteration: 18123
loss: 0.9358634948730469,grad_norm: 0.9999990583808501, iteration: 18124
loss: 1.0149147510528564,grad_norm: 0.9304978015002646, iteration: 18125
loss: 0.9692155718803406,grad_norm: 0.999999123677414, iteration: 18126
loss: 0.9857377409934998,grad_norm: 0.9999995248461737, iteration: 18127
loss: 0.9793397188186646,grad_norm: 0.9999992508379231, iteration: 18128
loss: 0.9664677381515503,grad_norm: 0.9999991762414319, iteration: 18129
loss: 0.986025869846344,grad_norm: 0.9999992207731412, iteration: 18130
loss: 1.0108987092971802,grad_norm: 0.9532331198744246, iteration: 18131
loss: 1.033846378326416,grad_norm: 0.9999991457307762, iteration: 18132
loss: 1.0071110725402832,grad_norm: 0.9999993197160891, iteration: 18133
loss: 1.0586028099060059,grad_norm: 0.999999731249421, iteration: 18134
loss: 1.021274447441101,grad_norm: 0.9775693656574991, iteration: 18135
loss: 1.0489134788513184,grad_norm: 0.9999990994256851, iteration: 18136
loss: 0.9912934899330139,grad_norm: 0.9999991635110089, iteration: 18137
loss: 1.045328140258789,grad_norm: 0.9935399124290214, iteration: 18138
loss: 1.0576592683792114,grad_norm: 0.9999992835314615, iteration: 18139
loss: 1.0111404657363892,grad_norm: 0.9999991034667564, iteration: 18140
loss: 0.9974024295806885,grad_norm: 0.9999991301443684, iteration: 18141
loss: 1.0107202529907227,grad_norm: 0.913883449663014, iteration: 18142
loss: 0.9903371334075928,grad_norm: 0.993809407093764, iteration: 18143
loss: 0.9835049510002136,grad_norm: 0.8293658323661581, iteration: 18144
loss: 1.0039376020431519,grad_norm: 0.9999991201847296, iteration: 18145
loss: 0.9895145893096924,grad_norm: 0.9999991634149729, iteration: 18146
loss: 1.0055934190750122,grad_norm: 0.9636605413600833, iteration: 18147
loss: 1.0368741750717163,grad_norm: 0.9999990725296264, iteration: 18148
loss: 1.0067800283432007,grad_norm: 0.9959926145517551, iteration: 18149
loss: 0.9768856167793274,grad_norm: 0.9999992470445163, iteration: 18150
loss: 1.0198168754577637,grad_norm: 0.9999992213479435, iteration: 18151
loss: 0.9708380699157715,grad_norm: 0.9999991266695799, iteration: 18152
loss: 1.0255279541015625,grad_norm: 0.9999993502563899, iteration: 18153
loss: 1.0118491649627686,grad_norm: 0.9999995177560386, iteration: 18154
loss: 1.083589792251587,grad_norm: 0.999999877933407, iteration: 18155
loss: 1.0395171642303467,grad_norm: 0.9999993302092574, iteration: 18156
loss: 1.0183582305908203,grad_norm: 0.9999995652960207, iteration: 18157
loss: 1.0512579679489136,grad_norm: 0.9999995390687174, iteration: 18158
loss: 1.0410510301589966,grad_norm: 0.999999374567053, iteration: 18159
loss: 1.0221295356750488,grad_norm: 0.9999991507985414, iteration: 18160
loss: 1.031055212020874,grad_norm: 0.878520747457677, iteration: 18161
loss: 1.0279875993728638,grad_norm: 0.8874168750161132, iteration: 18162
loss: 1.0079745054244995,grad_norm: 0.9999990255976677, iteration: 18163
loss: 1.0227876901626587,grad_norm: 0.9999991027427458, iteration: 18164
loss: 1.0526020526885986,grad_norm: 0.9999989824400298, iteration: 18165
loss: 1.0290656089782715,grad_norm: 0.9999992927823437, iteration: 18166
loss: 1.036598801612854,grad_norm: 0.9999998045338049, iteration: 18167
loss: 1.0313827991485596,grad_norm: 0.9999990646071344, iteration: 18168
loss: 1.0343713760375977,grad_norm: 0.9801444006947326, iteration: 18169
loss: 1.0362833738327026,grad_norm: 0.9999991303526913, iteration: 18170
loss: 1.0019960403442383,grad_norm: 0.9999991906765698, iteration: 18171
loss: 0.975959300994873,grad_norm: 0.9846748800916435, iteration: 18172
loss: 1.043865442276001,grad_norm: 0.9999991845838233, iteration: 18173
loss: 1.026734709739685,grad_norm: 0.9999990837287301, iteration: 18174
loss: 1.0523066520690918,grad_norm: 0.9999991542625324, iteration: 18175
loss: 1.0092084407806396,grad_norm: 0.8669695012498557, iteration: 18176
loss: 0.9652128219604492,grad_norm: 0.9999989313455007, iteration: 18177
loss: 1.0728009939193726,grad_norm: 0.999999499212584, iteration: 18178
loss: 1.0207123756408691,grad_norm: 0.9387893692600481, iteration: 18179
loss: 1.020061731338501,grad_norm: 0.9999991347703926, iteration: 18180
loss: 1.0534526109695435,grad_norm: 0.9999991933130463, iteration: 18181
loss: 1.0171834230422974,grad_norm: 0.9999991312501307, iteration: 18182
loss: 0.9797214865684509,grad_norm: 0.9920656784668312, iteration: 18183
loss: 1.0134868621826172,grad_norm: 0.9999991381011131, iteration: 18184
loss: 1.0108603239059448,grad_norm: 0.9074460928104192, iteration: 18185
loss: 1.0300341844558716,grad_norm: 0.8714614590865986, iteration: 18186
loss: 0.9895140528678894,grad_norm: 0.999999074812338, iteration: 18187
loss: 1.0038795471191406,grad_norm: 0.9830418053823524, iteration: 18188
loss: 1.0253214836120605,grad_norm: 0.9999991485992406, iteration: 18189
loss: 1.002153754234314,grad_norm: 0.9999991923726311, iteration: 18190
loss: 0.9766427278518677,grad_norm: 0.9999993308360692, iteration: 18191
loss: 1.0151270627975464,grad_norm: 0.9999991414772208, iteration: 18192
loss: 0.9713496565818787,grad_norm: 0.9999992392218151, iteration: 18193
loss: 0.9844467043876648,grad_norm: 0.9999234218987376, iteration: 18194
loss: 1.0283669233322144,grad_norm: 0.9999996171856516, iteration: 18195
loss: 1.008497714996338,grad_norm: 0.9999990738459187, iteration: 18196
loss: 1.0324316024780273,grad_norm: 0.9999991514779832, iteration: 18197
loss: 1.0104440450668335,grad_norm: 0.999999006599451, iteration: 18198
loss: 0.9870810508728027,grad_norm: 0.9999992728516819, iteration: 18199
loss: 1.0117539167404175,grad_norm: 0.9999993325890904, iteration: 18200
loss: 1.0711101293563843,grad_norm: 0.9802846500760799, iteration: 18201
loss: 1.0140085220336914,grad_norm: 0.9184897317147812, iteration: 18202
loss: 1.0353220701217651,grad_norm: 0.9999990577070194, iteration: 18203
loss: 0.9776468276977539,grad_norm: 0.8267138747225213, iteration: 18204
loss: 1.0731377601623535,grad_norm: 0.99999895902398, iteration: 18205
loss: 1.0524821281433105,grad_norm: 0.9999995103737799, iteration: 18206
loss: 1.1147832870483398,grad_norm: 0.9999992667260834, iteration: 18207
loss: 1.0327250957489014,grad_norm: 0.976666777364294, iteration: 18208
loss: 1.072801947593689,grad_norm: 0.9999996568805353, iteration: 18209
loss: 1.0241942405700684,grad_norm: 0.9387941600790077, iteration: 18210
loss: 1.0243947505950928,grad_norm: 0.9999993497311652, iteration: 18211
loss: 1.0698421001434326,grad_norm: 0.9999993469032038, iteration: 18212
loss: 1.015059232711792,grad_norm: 0.993434011047962, iteration: 18213
loss: 1.0264652967453003,grad_norm: 0.9292997333451033, iteration: 18214
loss: 1.0293923616409302,grad_norm: 0.8573481241330653, iteration: 18215
loss: 1.0503379106521606,grad_norm: 0.9999991086432786, iteration: 18216
loss: 1.0656774044036865,grad_norm: 0.9999992208457761, iteration: 18217
loss: 1.0337257385253906,grad_norm: 0.9999990472133503, iteration: 18218
loss: 1.0102559328079224,grad_norm: 0.9999991852481517, iteration: 18219
loss: 1.052773356437683,grad_norm: 0.9999992032102734, iteration: 18220
loss: 1.0305229425430298,grad_norm: 0.9999997872687194, iteration: 18221
loss: 1.016746163368225,grad_norm: 0.9999992134492268, iteration: 18222
loss: 1.0064454078674316,grad_norm: 0.9999991133899353, iteration: 18223
loss: 0.9951789379119873,grad_norm: 0.9719726841326394, iteration: 18224
loss: 1.0309101343154907,grad_norm: 0.9904318718055298, iteration: 18225
loss: 1.027405858039856,grad_norm: 0.9999991836904689, iteration: 18226
loss: 1.0416150093078613,grad_norm: 0.9656995773443642, iteration: 18227
loss: 1.0305136442184448,grad_norm: 0.9999990788823913, iteration: 18228
loss: 0.9793935418128967,grad_norm: 0.9999993686037404, iteration: 18229
loss: 1.0014792680740356,grad_norm: 0.9999997800758983, iteration: 18230
loss: 0.9775735139846802,grad_norm: 0.9999993691896486, iteration: 18231
loss: 1.0115008354187012,grad_norm: 0.9999991612400383, iteration: 18232
loss: 0.9927971363067627,grad_norm: 0.9999990753797903, iteration: 18233
loss: 1.0302554368972778,grad_norm: 0.9988687883403897, iteration: 18234
loss: 0.9776533246040344,grad_norm: 0.9999991118762217, iteration: 18235
loss: 1.004433512687683,grad_norm: 0.9569641348757982, iteration: 18236
loss: 0.9791275858879089,grad_norm: 0.9575870910178261, iteration: 18237
loss: 0.9710626006126404,grad_norm: 0.9999991218601375, iteration: 18238
loss: 1.040241003036499,grad_norm: 0.9999990612355513, iteration: 18239
loss: 1.0005849599838257,grad_norm: 0.9359022330578266, iteration: 18240
loss: 0.999690055847168,grad_norm: 0.9999991363267096, iteration: 18241
loss: 1.0188493728637695,grad_norm: 0.9999991117203898, iteration: 18242
loss: 0.9803686141967773,grad_norm: 0.9838700127779832, iteration: 18243
loss: 1.0486159324645996,grad_norm: 0.9999992177362254, iteration: 18244
loss: 0.9749749898910522,grad_norm: 0.9999990217683306, iteration: 18245
loss: 0.9950108528137207,grad_norm: 0.9999992284686429, iteration: 18246
loss: 0.9830271601676941,grad_norm: 0.9999994415659544, iteration: 18247
loss: 1.0492947101593018,grad_norm: 0.9999990145662064, iteration: 18248
loss: 0.9616910219192505,grad_norm: 0.9936768458197593, iteration: 18249
loss: 1.0424585342407227,grad_norm: 0.9999991172550914, iteration: 18250
loss: 0.9658612608909607,grad_norm: 0.9594618457639814, iteration: 18251
loss: 0.9615024924278259,grad_norm: 0.999999094543787, iteration: 18252
loss: 0.9877917766571045,grad_norm: 0.9999991858082573, iteration: 18253
loss: 0.9804041385650635,grad_norm: 0.9999992747287553, iteration: 18254
loss: 1.0224305391311646,grad_norm: 0.9999990201728579, iteration: 18255
loss: 0.970862865447998,grad_norm: 0.9999990446869397, iteration: 18256
loss: 0.978719174861908,grad_norm: 0.9999992541567698, iteration: 18257
loss: 1.0576659440994263,grad_norm: 0.9999993570277699, iteration: 18258
loss: 1.0255340337753296,grad_norm: 0.9999993195379968, iteration: 18259
loss: 1.0018706321716309,grad_norm: 0.9999989716212626, iteration: 18260
loss: 1.051155686378479,grad_norm: 0.9016237090464717, iteration: 18261
loss: 0.977647066116333,grad_norm: 0.9999990822105513, iteration: 18262
loss: 1.0259805917739868,grad_norm: 0.9999992280239953, iteration: 18263
loss: 1.0227307081222534,grad_norm: 0.9999992006287025, iteration: 18264
loss: 0.999798595905304,grad_norm: 0.9687775320382185, iteration: 18265
loss: 1.0037872791290283,grad_norm: 0.9999992169941144, iteration: 18266
loss: 1.0047672986984253,grad_norm: 0.9999990766001635, iteration: 18267
loss: 1.0646488666534424,grad_norm: 0.9347891489771109, iteration: 18268
loss: 1.015410304069519,grad_norm: 0.9999990737700828, iteration: 18269
loss: 1.0334961414337158,grad_norm: 0.909847153307936, iteration: 18270
loss: 1.033086895942688,grad_norm: 0.9999993224911127, iteration: 18271
loss: 0.9373030662536621,grad_norm: 0.9999991870686255, iteration: 18272
loss: 1.0087722539901733,grad_norm: 0.916285656434255, iteration: 18273
loss: 1.0262839794158936,grad_norm: 0.999999480113436, iteration: 18274
loss: 1.038078784942627,grad_norm: 0.9999990713391527, iteration: 18275
loss: 1.022810459136963,grad_norm: 0.9999998369610548, iteration: 18276
loss: 1.0220706462860107,grad_norm: 0.9999992569227938, iteration: 18277
loss: 1.0583473443984985,grad_norm: 0.9999990197664801, iteration: 18278
loss: 0.9948747158050537,grad_norm: 0.9999996435087367, iteration: 18279
loss: 1.0421147346496582,grad_norm: 0.999998954540242, iteration: 18280
loss: 0.9748762249946594,grad_norm: 0.9999992270796338, iteration: 18281
loss: 1.008545994758606,grad_norm: 0.9999991384882312, iteration: 18282
loss: 0.9951720833778381,grad_norm: 0.9999990901194122, iteration: 18283
loss: 1.0270577669143677,grad_norm: 0.8903045310806488, iteration: 18284
loss: 1.0228724479675293,grad_norm: 0.9999992032648892, iteration: 18285
loss: 0.9810405373573303,grad_norm: 0.99999904435731, iteration: 18286
loss: 0.9786472916603088,grad_norm: 0.9999993303678885, iteration: 18287
loss: 1.0162100791931152,grad_norm: 0.9999991703593152, iteration: 18288
loss: 1.0340638160705566,grad_norm: 0.999998976837614, iteration: 18289
loss: 0.979763925075531,grad_norm: 0.9276740478304155, iteration: 18290
loss: 1.0172901153564453,grad_norm: 0.9999994631441552, iteration: 18291
loss: 1.0065091848373413,grad_norm: 0.9999991371586411, iteration: 18292
loss: 0.992453932762146,grad_norm: 0.9999990665422471, iteration: 18293
loss: 1.0076534748077393,grad_norm: 0.9999994961542793, iteration: 18294
loss: 1.0443083047866821,grad_norm: 0.9999991213021558, iteration: 18295
loss: 1.014378547668457,grad_norm: 0.9999990377848127, iteration: 18296
loss: 1.0370492935180664,grad_norm: 0.9999990813386687, iteration: 18297
loss: 1.0672909021377563,grad_norm: 0.9999991607872948, iteration: 18298
loss: 1.0170773267745972,grad_norm: 0.9999992452968242, iteration: 18299
loss: 0.9976696372032166,grad_norm: 0.9999991255587095, iteration: 18300
loss: 1.0584890842437744,grad_norm: 0.9999993627276723, iteration: 18301
loss: 1.023491621017456,grad_norm: 0.9999991592415798, iteration: 18302
loss: 1.0230029821395874,grad_norm: 0.9999991586089476, iteration: 18303
loss: 1.0004826784133911,grad_norm: 0.9970817364390956, iteration: 18304
loss: 1.0090839862823486,grad_norm: 0.9999993613688924, iteration: 18305
loss: 1.0601545572280884,grad_norm: 0.9777911106587968, iteration: 18306
loss: 0.949401319026947,grad_norm: 0.9999992858094565, iteration: 18307
loss: 1.0030518770217896,grad_norm: 0.9999991348064408, iteration: 18308
loss: 0.9999887943267822,grad_norm: 0.9999991954835847, iteration: 18309
loss: 1.0096274614334106,grad_norm: 0.9999992304318884, iteration: 18310
loss: 1.0567197799682617,grad_norm: 0.9999991745873715, iteration: 18311
loss: 1.0529699325561523,grad_norm: 0.9698432598010032, iteration: 18312
loss: 1.024671196937561,grad_norm: 0.9999991885333664, iteration: 18313
loss: 1.0135843753814697,grad_norm: 0.9999991163367398, iteration: 18314
loss: 0.9973282814025879,grad_norm: 0.9999990702016553, iteration: 18315
loss: 1.0454983711242676,grad_norm: 0.9063012958475822, iteration: 18316
loss: 1.031800389289856,grad_norm: 0.9999990521003702, iteration: 18317
loss: 1.0033234357833862,grad_norm: 0.9999993069233551, iteration: 18318
loss: 1.04555082321167,grad_norm: 0.9928468181094933, iteration: 18319
loss: 1.0016961097717285,grad_norm: 0.907882949856487, iteration: 18320
loss: 0.99021315574646,grad_norm: 0.9999991494441393, iteration: 18321
loss: 1.0349583625793457,grad_norm: 0.9999997463660159, iteration: 18322
loss: 1.017485499382019,grad_norm: 0.9999991327669787, iteration: 18323
loss: 1.0229018926620483,grad_norm: 0.999999328027692, iteration: 18324
loss: 0.9880509972572327,grad_norm: 0.9999991440331508, iteration: 18325
loss: 0.984704315662384,grad_norm: 0.9999991795576166, iteration: 18326
loss: 1.0240815877914429,grad_norm: 0.9999991347653655, iteration: 18327
loss: 1.0153398513793945,grad_norm: 0.97992895919553, iteration: 18328
loss: 1.012618064880371,grad_norm: 0.9999990956350795, iteration: 18329
loss: 0.984368622303009,grad_norm: 0.9076782336952378, iteration: 18330
loss: 1.0484225749969482,grad_norm: 0.999999285662605, iteration: 18331
loss: 1.0263278484344482,grad_norm: 0.962823553793389, iteration: 18332
loss: 1.0122439861297607,grad_norm: 0.9999990171854239, iteration: 18333
loss: 1.045529842376709,grad_norm: 0.9999990875128789, iteration: 18334
loss: 1.0208861827850342,grad_norm: 0.9999992913931723, iteration: 18335
loss: 0.9843700528144836,grad_norm: 0.9999990937475867, iteration: 18336
loss: 1.0213786363601685,grad_norm: 0.9999992791540996, iteration: 18337
loss: 0.9898836016654968,grad_norm: 0.9999992424551616, iteration: 18338
loss: 1.026946783065796,grad_norm: 0.8860933062675684, iteration: 18339
loss: 1.0102936029434204,grad_norm: 0.8756324070496989, iteration: 18340
loss: 1.024015188217163,grad_norm: 0.9999993889072128, iteration: 18341
loss: 1.0183205604553223,grad_norm: 0.9999991493306911, iteration: 18342
loss: 1.009986400604248,grad_norm: 0.9999989893436876, iteration: 18343
loss: 1.0519442558288574,grad_norm: 0.999999732599233, iteration: 18344
loss: 0.9908803105354309,grad_norm: 0.999999293949841, iteration: 18345
loss: 1.0251779556274414,grad_norm: 0.9999989975853149, iteration: 18346
loss: 1.055269479751587,grad_norm: 0.9999991596089965, iteration: 18347
loss: 1.0137540102005005,grad_norm: 0.9922665595143674, iteration: 18348
loss: 0.9973482489585876,grad_norm: 0.999999113802556, iteration: 18349
loss: 0.9438849091529846,grad_norm: 0.999999139692758, iteration: 18350
loss: 1.01271390914917,grad_norm: 0.9999990424553641, iteration: 18351
loss: 0.9370625019073486,grad_norm: 0.9999991598505015, iteration: 18352
loss: 0.9675446152687073,grad_norm: 0.9999990621120162, iteration: 18353
loss: 1.0272413492202759,grad_norm: 0.9421623046868927, iteration: 18354
loss: 1.0276210308074951,grad_norm: 0.9806415736312509, iteration: 18355
loss: 1.0190731287002563,grad_norm: 0.9833149408611527, iteration: 18356
loss: 1.022111415863037,grad_norm: 0.9999990722935183, iteration: 18357
loss: 0.9894118905067444,grad_norm: 0.9999991975952894, iteration: 18358
loss: 0.9660893082618713,grad_norm: 0.9999990562669062, iteration: 18359
loss: 1.003468632698059,grad_norm: 0.9856185797607767, iteration: 18360
loss: 1.0565122365951538,grad_norm: 0.9999991332240374, iteration: 18361
loss: 1.0586650371551514,grad_norm: 0.9999992134635569, iteration: 18362
loss: 0.9657798409461975,grad_norm: 0.9999991693604611, iteration: 18363
loss: 1.0441433191299438,grad_norm: 0.9999992154179983, iteration: 18364
loss: 0.9965246915817261,grad_norm: 0.9999991809305805, iteration: 18365
loss: 1.0082341432571411,grad_norm: 0.9999990328336622, iteration: 18366
loss: 0.943576991558075,grad_norm: 0.9999992024717537, iteration: 18367
loss: 1.000331997871399,grad_norm: 0.9814996943195948, iteration: 18368
loss: 1.071939468383789,grad_norm: 0.981891341682151, iteration: 18369
loss: 1.015048623085022,grad_norm: 0.999999201110323, iteration: 18370
loss: 1.0511502027511597,grad_norm: 0.9999993100153951, iteration: 18371
loss: 1.0469703674316406,grad_norm: 0.9999997889867552, iteration: 18372
loss: 1.0243693590164185,grad_norm: 0.9987574502976206, iteration: 18373
loss: 1.025280475616455,grad_norm: 0.999999197427489, iteration: 18374
loss: 0.9859617352485657,grad_norm: 0.9999992305628069, iteration: 18375
loss: 1.0320967435836792,grad_norm: 0.9861736787449066, iteration: 18376
loss: 1.0281531810760498,grad_norm: 0.9999991021768667, iteration: 18377
loss: 0.9804906845092773,grad_norm: 0.9921662991812691, iteration: 18378
loss: 1.0329557657241821,grad_norm: 0.9999993680158071, iteration: 18379
loss: 0.9862812757492065,grad_norm: 0.9999990864468387, iteration: 18380
loss: 1.0187398195266724,grad_norm: 0.9499291298785294, iteration: 18381
loss: 0.9982424974441528,grad_norm: 0.9885852841571166, iteration: 18382
loss: 1.007795810699463,grad_norm: 0.9999996798554868, iteration: 18383
loss: 1.0368413925170898,grad_norm: 0.9999994581644434, iteration: 18384
loss: 1.0041252374649048,grad_norm: 0.9999992811894453, iteration: 18385
loss: 1.020248293876648,grad_norm: 0.9999992808046978, iteration: 18386
loss: 1.066068410873413,grad_norm: 0.9999991088188679, iteration: 18387
loss: 1.0445433855056763,grad_norm: 0.9999990835051821, iteration: 18388
loss: 0.9915688037872314,grad_norm: 0.9999991738937436, iteration: 18389
loss: 1.036865472793579,grad_norm: 0.9999991384368488, iteration: 18390
loss: 1.0237451791763306,grad_norm: 0.9999990326436377, iteration: 18391
loss: 1.0340327024459839,grad_norm: 0.9999991944674174, iteration: 18392
loss: 1.011552095413208,grad_norm: 0.8966402975401224, iteration: 18393
loss: 0.967884361743927,grad_norm: 0.9999992511777517, iteration: 18394
loss: 1.0262192487716675,grad_norm: 0.9999993846679829, iteration: 18395
loss: 1.0502526760101318,grad_norm: 0.9999992204557173, iteration: 18396
loss: 1.000993013381958,grad_norm: 0.9999991433205858, iteration: 18397
loss: 1.0592128038406372,grad_norm: 0.9999993918443716, iteration: 18398
loss: 1.0158089399337769,grad_norm: 0.9999994618645421, iteration: 18399
loss: 0.9988223910331726,grad_norm: 0.8408471398839796, iteration: 18400
loss: 1.0569164752960205,grad_norm: 0.9999991744007654, iteration: 18401
loss: 1.0202223062515259,grad_norm: 0.999999140406791, iteration: 18402
loss: 1.028399109840393,grad_norm: 0.9999991481511682, iteration: 18403
loss: 1.0322455167770386,grad_norm: 0.9999989347170638, iteration: 18404
loss: 0.9913681149482727,grad_norm: 0.9898884777934778, iteration: 18405
loss: 1.066919207572937,grad_norm: 0.9999997927489386, iteration: 18406
loss: 1.0152807235717773,grad_norm: 0.9999991895442835, iteration: 18407
loss: 1.0206024646759033,grad_norm: 0.999999087877212, iteration: 18408
loss: 1.027579665184021,grad_norm: 0.999999380833521, iteration: 18409
loss: 1.0374716520309448,grad_norm: 0.9999992610434633, iteration: 18410
loss: 0.9911893606185913,grad_norm: 0.9999992143201824, iteration: 18411
loss: 1.022495985031128,grad_norm: 0.9999992269717465, iteration: 18412
loss: 0.9994072914123535,grad_norm: 0.8959111162651108, iteration: 18413
loss: 1.017126441001892,grad_norm: 0.9999994900133812, iteration: 18414
loss: 0.9777525067329407,grad_norm: 0.9999991267983013, iteration: 18415
loss: 0.9372848272323608,grad_norm: 0.9999991967600051, iteration: 18416
loss: 1.0418438911437988,grad_norm: 0.9999993538424456, iteration: 18417
loss: 1.0059289932250977,grad_norm: 0.9999992326588374, iteration: 18418
loss: 0.9856276512145996,grad_norm: 0.9999991845863452, iteration: 18419
loss: 1.024872064590454,grad_norm: 0.9225487533114854, iteration: 18420
loss: 0.999130368232727,grad_norm: 0.9999991657224506, iteration: 18421
loss: 1.0265226364135742,grad_norm: 0.9999990940846625, iteration: 18422
loss: 0.9571755528450012,grad_norm: 0.9999991639518884, iteration: 18423
loss: 1.0755534172058105,grad_norm: 0.999593880877947, iteration: 18424
loss: 1.0020508766174316,grad_norm: 0.8961296227598682, iteration: 18425
loss: 1.0439413785934448,grad_norm: 0.999999162257257, iteration: 18426
loss: 1.0152883529663086,grad_norm: 0.9999991742094331, iteration: 18427
loss: 0.9845707416534424,grad_norm: 0.9999990841328406, iteration: 18428
loss: 0.9921824336051941,grad_norm: 0.9999991094550835, iteration: 18429
loss: 1.0326296091079712,grad_norm: 0.999999122992846, iteration: 18430
loss: 1.0091403722763062,grad_norm: 0.9965514228578141, iteration: 18431
loss: 1.0343084335327148,grad_norm: 0.9999991919158362, iteration: 18432
loss: 1.052945613861084,grad_norm: 0.9999992160142778, iteration: 18433
loss: 0.9936013221740723,grad_norm: 0.9999991337633019, iteration: 18434
loss: 1.0324838161468506,grad_norm: 0.9999991393144934, iteration: 18435
loss: 1.0126687288284302,grad_norm: 0.9999992574251632, iteration: 18436
loss: 1.0055687427520752,grad_norm: 0.9999993883705294, iteration: 18437
loss: 1.0027729272842407,grad_norm: 0.9999997737906704, iteration: 18438
loss: 1.0414440631866455,grad_norm: 0.9999991804625703, iteration: 18439
loss: 1.0219839811325073,grad_norm: 0.999999220177349, iteration: 18440
loss: 1.016920804977417,grad_norm: 0.9999991664850455, iteration: 18441
loss: 0.9865933060646057,grad_norm: 0.9999992635699576, iteration: 18442
loss: 0.9557744264602661,grad_norm: 0.999999195365562, iteration: 18443
loss: 1.0304884910583496,grad_norm: 0.9549382401229833, iteration: 18444
loss: 0.9957004189491272,grad_norm: 0.9999990376254103, iteration: 18445
loss: 0.9714914560317993,grad_norm: 0.8840155759091114, iteration: 18446
loss: 1.0653446912765503,grad_norm: 0.999999145765974, iteration: 18447
loss: 1.0190969705581665,grad_norm: 0.9527861629086452, iteration: 18448
loss: 1.0646718740463257,grad_norm: 0.9999992262933296, iteration: 18449
loss: 1.0443921089172363,grad_norm: 0.9999991478485609, iteration: 18450
loss: 1.046127438545227,grad_norm: 0.9999992235613191, iteration: 18451
loss: 1.0376968383789062,grad_norm: 0.9999992505327483, iteration: 18452
loss: 0.9969817399978638,grad_norm: 0.9505701754741942, iteration: 18453
loss: 0.9953704476356506,grad_norm: 0.9999989731697244, iteration: 18454
loss: 1.0485668182373047,grad_norm: 0.9173140458524905, iteration: 18455
loss: 1.0040977001190186,grad_norm: 0.9999992313851844, iteration: 18456
loss: 1.0064857006072998,grad_norm: 0.9999993379532098, iteration: 18457
loss: 1.0079983472824097,grad_norm: 0.999999091344308, iteration: 18458
loss: 1.0238001346588135,grad_norm: 0.9999990290005069, iteration: 18459
loss: 0.9935728907585144,grad_norm: 0.9517243489985453, iteration: 18460
loss: 1.012266993522644,grad_norm: 0.9999989806931907, iteration: 18461
loss: 1.0258110761642456,grad_norm: 0.9576702500810278, iteration: 18462
loss: 1.0411826372146606,grad_norm: 0.9999990341664002, iteration: 18463
loss: 0.9997902512550354,grad_norm: 0.9999990912514244, iteration: 18464
loss: 1.0388760566711426,grad_norm: 0.9999992912152276, iteration: 18465
loss: 1.0044430494308472,grad_norm: 0.9999991623061882, iteration: 18466
loss: 1.0119330883026123,grad_norm: 0.9999991376797214, iteration: 18467
loss: 1.0155161619186401,grad_norm: 0.9999991617216742, iteration: 18468
loss: 1.025060772895813,grad_norm: 0.9999991391571935, iteration: 18469
loss: 1.0303746461868286,grad_norm: 0.9999991786940026, iteration: 18470
loss: 0.9839874505996704,grad_norm: 0.9999992973436665, iteration: 18471
loss: 1.0169658660888672,grad_norm: 0.9999992782108399, iteration: 18472
loss: 0.9932822585105896,grad_norm: 0.9999990410328203, iteration: 18473
loss: 1.0581028461456299,grad_norm: 0.9999995810511011, iteration: 18474
loss: 0.996094822883606,grad_norm: 0.9999990577505748, iteration: 18475
loss: 1.037234902381897,grad_norm: 0.9912884450328082, iteration: 18476
loss: 1.0065925121307373,grad_norm: 0.9999991665713767, iteration: 18477
loss: 0.9916228652000427,grad_norm: 0.9666816576076369, iteration: 18478
loss: 1.0342472791671753,grad_norm: 0.99999914387837, iteration: 18479
loss: 1.0760623216629028,grad_norm: 0.9999991347239487, iteration: 18480
loss: 1.098154067993164,grad_norm: 0.9999991461986905, iteration: 18481
loss: 1.0089424848556519,grad_norm: 0.9444983503586627, iteration: 18482
loss: 0.9801887273788452,grad_norm: 0.9999992500964334, iteration: 18483
loss: 1.0249534845352173,grad_norm: 0.9999994932004803, iteration: 18484
loss: 1.0263524055480957,grad_norm: 0.9999991158803346, iteration: 18485
loss: 1.0382075309753418,grad_norm: 0.9999989571528792, iteration: 18486
loss: 1.034966230392456,grad_norm: 0.9999992271211664, iteration: 18487
loss: 0.9843775629997253,grad_norm: 0.9999990725332262, iteration: 18488
loss: 1.0004889965057373,grad_norm: 0.9999991568326534, iteration: 18489
loss: 1.0231598615646362,grad_norm: 0.9999993161542451, iteration: 18490
loss: 1.0314109325408936,grad_norm: 0.9999993296769155, iteration: 18491
loss: 0.9816305637359619,grad_norm: 0.9556829263242121, iteration: 18492
loss: 1.0022501945495605,grad_norm: 0.9999992215253176, iteration: 18493
loss: 0.991417407989502,grad_norm: 0.9999993070991146, iteration: 18494
loss: 1.0192018747329712,grad_norm: 0.9999997359211829, iteration: 18495
loss: 1.032283902168274,grad_norm: 0.9999990195780908, iteration: 18496
loss: 1.019784927368164,grad_norm: 0.9999992009619626, iteration: 18497
loss: 0.9835164546966553,grad_norm: 0.9999990564159325, iteration: 18498
loss: 1.0313385725021362,grad_norm: 0.9999990478657702, iteration: 18499
loss: 1.0430927276611328,grad_norm: 0.8259811047446388, iteration: 18500
loss: 1.0271751880645752,grad_norm: 0.9999992143875546, iteration: 18501
loss: 0.9900704026222229,grad_norm: 0.9457940205090675, iteration: 18502
loss: 1.0200629234313965,grad_norm: 0.9955876669204771, iteration: 18503
loss: 1.0160129070281982,grad_norm: 0.9999991743445328, iteration: 18504
loss: 0.9851126074790955,grad_norm: 0.9999991468084489, iteration: 18505
loss: 1.0019835233688354,grad_norm: 0.9999990797210041, iteration: 18506
loss: 1.0453085899353027,grad_norm: 0.999999428794447, iteration: 18507
loss: 1.0077238082885742,grad_norm: 0.9999990175366379, iteration: 18508
loss: 1.046764850616455,grad_norm: 0.9999991349463262, iteration: 18509
loss: 0.9894648790359497,grad_norm: 0.9999992941675495, iteration: 18510
loss: 0.9912404417991638,grad_norm: 0.9999992086834565, iteration: 18511
loss: 1.0107594728469849,grad_norm: 0.9999990863274313, iteration: 18512
loss: 1.0026837587356567,grad_norm: 0.9254381921353747, iteration: 18513
loss: 1.0090519189834595,grad_norm: 0.9999990342887433, iteration: 18514
loss: 1.0196928977966309,grad_norm: 0.9999990606079494, iteration: 18515
loss: 1.0316896438598633,grad_norm: 0.9470936203958796, iteration: 18516
loss: 1.0140349864959717,grad_norm: 0.9999990858704455, iteration: 18517
loss: 0.9772372245788574,grad_norm: 0.9999991262170329, iteration: 18518
loss: 1.0263930559158325,grad_norm: 0.9219286400345285, iteration: 18519
loss: 1.037147045135498,grad_norm: 0.9999993229388557, iteration: 18520
loss: 0.9840587377548218,grad_norm: 0.9999991636839807, iteration: 18521
loss: 0.9845190644264221,grad_norm: 0.999998999617608, iteration: 18522
loss: 1.0032405853271484,grad_norm: 0.904818365386147, iteration: 18523
loss: 0.981231689453125,grad_norm: 0.9999990992742375, iteration: 18524
loss: 1.0523830652236938,grad_norm: 0.9999995014043096, iteration: 18525
loss: 1.0069736242294312,grad_norm: 0.9999991740903247, iteration: 18526
loss: 1.0136263370513916,grad_norm: 0.9668265624499766, iteration: 18527
loss: 0.958429217338562,grad_norm: 0.9999991409101813, iteration: 18528
loss: 0.9875745177268982,grad_norm: 0.999999050493831, iteration: 18529
loss: 1.0085090398788452,grad_norm: 0.9999992833639727, iteration: 18530
loss: 1.0142438411712646,grad_norm: 0.999999007691024, iteration: 18531
loss: 1.0462617874145508,grad_norm: 0.9999991641208844, iteration: 18532
loss: 1.029045820236206,grad_norm: 0.9609953344280542, iteration: 18533
loss: 1.0315507650375366,grad_norm: 0.9999991152796728, iteration: 18534
loss: 0.9904167652130127,grad_norm: 0.9999993097530335, iteration: 18535
loss: 1.0765529870986938,grad_norm: 0.9999998013736967, iteration: 18536
loss: 1.0877329111099243,grad_norm: 0.9999995684800488, iteration: 18537
loss: 0.9503617286682129,grad_norm: 0.9999991359547674, iteration: 18538
loss: 1.0153629779815674,grad_norm: 0.9999990655165388, iteration: 18539
loss: 1.0219981670379639,grad_norm: 0.962498591616915, iteration: 18540
loss: 0.9848586916923523,grad_norm: 0.9999992708932623, iteration: 18541
loss: 1.0837386846542358,grad_norm: 0.9999992305264781, iteration: 18542
loss: 1.0550843477249146,grad_norm: 0.9999992783605361, iteration: 18543
loss: 1.060045838356018,grad_norm: 0.9999991276576632, iteration: 18544
loss: 1.0093656778335571,grad_norm: 0.9999990827065305, iteration: 18545
loss: 1.0148714780807495,grad_norm: 0.9999991834760236, iteration: 18546
loss: 0.9966655969619751,grad_norm: 0.8426223338815798, iteration: 18547
loss: 0.9985692501068115,grad_norm: 0.9728254032784641, iteration: 18548
loss: 1.0177762508392334,grad_norm: 0.9999996976377041, iteration: 18549
loss: 1.012550950050354,grad_norm: 0.9999992734731146, iteration: 18550
loss: 1.0275888442993164,grad_norm: 0.9827539383677684, iteration: 18551
loss: 1.048445224761963,grad_norm: 0.9999990743770509, iteration: 18552
loss: 1.053790807723999,grad_norm: 0.9889473698313399, iteration: 18553
loss: 1.0283476114273071,grad_norm: 0.9999993812779883, iteration: 18554
loss: 1.024788737297058,grad_norm: 0.9999989532733056, iteration: 18555
loss: 1.0430740118026733,grad_norm: 0.9999991898493268, iteration: 18556
loss: 1.0086487531661987,grad_norm: 0.9999997253481834, iteration: 18557
loss: 1.0429807901382446,grad_norm: 0.9999990255230031, iteration: 18558
loss: 1.0152583122253418,grad_norm: 0.999999150515698, iteration: 18559
loss: 1.0272701978683472,grad_norm: 0.99999914867375, iteration: 18560
loss: 1.0651675462722778,grad_norm: 0.9999992198154113, iteration: 18561
loss: 1.038149118423462,grad_norm: 0.9999991396455101, iteration: 18562
loss: 0.9888439774513245,grad_norm: 0.9999991070166868, iteration: 18563
loss: 1.025099754333496,grad_norm: 0.9999991071701312, iteration: 18564
loss: 1.0354113578796387,grad_norm: 0.9999997644420627, iteration: 18565
loss: 1.0015805959701538,grad_norm: 0.9999991657630444, iteration: 18566
loss: 1.0089908838272095,grad_norm: 0.9999995215562782, iteration: 18567
loss: 0.9850569367408752,grad_norm: 0.999999035525974, iteration: 18568
loss: 0.997211754322052,grad_norm: 0.9919828466322487, iteration: 18569
loss: 1.083216905593872,grad_norm: 0.9999998341165194, iteration: 18570
loss: 1.0402944087982178,grad_norm: 0.9999990719976521, iteration: 18571
loss: 0.9888714551925659,grad_norm: 0.8436616881099995, iteration: 18572
loss: 1.0175023078918457,grad_norm: 0.9999997206603226, iteration: 18573
loss: 0.9871023297309875,grad_norm: 0.9999991765735307, iteration: 18574
loss: 1.0184204578399658,grad_norm: 0.9999995963069993, iteration: 18575
loss: 1.04171884059906,grad_norm: 0.9999994329194642, iteration: 18576
loss: 0.9877700805664062,grad_norm: 0.999999025190118, iteration: 18577
loss: 1.0268396139144897,grad_norm: 0.9999993711401433, iteration: 18578
loss: 0.9902875423431396,grad_norm: 0.9743619336835408, iteration: 18579
loss: 0.994391679763794,grad_norm: 0.9999991121580628, iteration: 18580
loss: 1.1617616415023804,grad_norm: 0.9999997942767958, iteration: 18581
loss: 1.019174337387085,grad_norm: 0.9999993869614381, iteration: 18582
loss: 1.0185208320617676,grad_norm: 0.9693495417006113, iteration: 18583
loss: 1.0362110137939453,grad_norm: 0.9999996278278973, iteration: 18584
loss: 0.9900621175765991,grad_norm: 0.9999993086127266, iteration: 18585
loss: 0.9893313646316528,grad_norm: 0.9999991422746894, iteration: 18586
loss: 1.0016015768051147,grad_norm: 0.9999992940103875, iteration: 18587
loss: 1.0299206972122192,grad_norm: 0.9999991820514834, iteration: 18588
loss: 0.9920766949653625,grad_norm: 0.999999223789542, iteration: 18589
loss: 0.9670677781105042,grad_norm: 0.9999991559060397, iteration: 18590
loss: 0.9909182190895081,grad_norm: 0.9999992026320754, iteration: 18591
loss: 1.0206120014190674,grad_norm: 0.9999995083916787, iteration: 18592
loss: 1.0753945112228394,grad_norm: 0.999999300018453, iteration: 18593
loss: 1.0197124481201172,grad_norm: 0.9999990221814998, iteration: 18594
loss: 1.0163383483886719,grad_norm: 0.9689818005918406, iteration: 18595
loss: 0.9566571712493896,grad_norm: 0.9999991642755939, iteration: 18596
loss: 1.0067226886749268,grad_norm: 0.9999991431694741, iteration: 18597
loss: 0.9891811013221741,grad_norm: 0.9999991680375063, iteration: 18598
loss: 1.0149918794631958,grad_norm: 0.9945921815288844, iteration: 18599
loss: 0.98820561170578,grad_norm: 0.9999991679917378, iteration: 18600
loss: 1.0103570222854614,grad_norm: 0.8579703149284568, iteration: 18601
loss: 1.0179603099822998,grad_norm: 0.9999996257623986, iteration: 18602
loss: 1.0404980182647705,grad_norm: 0.999999192285243, iteration: 18603
loss: 1.0111252069473267,grad_norm: 0.9999991132874981, iteration: 18604
loss: 1.0466772317886353,grad_norm: 0.9999991898808827, iteration: 18605
loss: 1.0252058506011963,grad_norm: 0.9999993953538254, iteration: 18606
loss: 1.0685701370239258,grad_norm: 0.9999995205165245, iteration: 18607
loss: 1.068737268447876,grad_norm: 0.9641133201246969, iteration: 18608
loss: 1.0348020792007446,grad_norm: 0.9999991228790862, iteration: 18609
loss: 1.0055848360061646,grad_norm: 0.9999991700005998, iteration: 18610
loss: 1.035343885421753,grad_norm: 0.99999915656366, iteration: 18611
loss: 1.045749306678772,grad_norm: 0.9999992409065972, iteration: 18612
loss: 1.0057393312454224,grad_norm: 0.9999991790609742, iteration: 18613
loss: 1.0378825664520264,grad_norm: 0.9999991933440192, iteration: 18614
loss: 1.0374647378921509,grad_norm: 0.9999991258212845, iteration: 18615
loss: 1.0428184270858765,grad_norm: 0.9999991858291316, iteration: 18616
loss: 1.0732436180114746,grad_norm: 0.9999992447484681, iteration: 18617
loss: 1.022840976715088,grad_norm: 0.9999991218171377, iteration: 18618
loss: 0.9614204168319702,grad_norm: 0.9999990526284961, iteration: 18619
loss: 1.0118576288223267,grad_norm: 0.9262796513022087, iteration: 18620
loss: 1.0089576244354248,grad_norm: 0.9653950650104651, iteration: 18621
loss: 1.005934238433838,grad_norm: 0.9999991056376176, iteration: 18622
loss: 1.0601266622543335,grad_norm: 0.9999997299720967, iteration: 18623
loss: 1.028427004814148,grad_norm: 0.9999990832406734, iteration: 18624
loss: 1.0526219606399536,grad_norm: 0.9999994169839341, iteration: 18625
loss: 1.0139944553375244,grad_norm: 0.9999991678924111, iteration: 18626
loss: 0.9773309230804443,grad_norm: 0.9999990224978198, iteration: 18627
loss: 0.9981432557106018,grad_norm: 0.9999993035880974, iteration: 18628
loss: 1.002393126487732,grad_norm: 0.9999992190508354, iteration: 18629
loss: 1.0181434154510498,grad_norm: 0.8443367729732733, iteration: 18630
loss: 1.0449491739273071,grad_norm: 0.9568628079503952, iteration: 18631
loss: 1.0181962251663208,grad_norm: 0.9999993117034739, iteration: 18632
loss: 1.00368070602417,grad_norm: 0.9999992286968734, iteration: 18633
loss: 1.0633622407913208,grad_norm: 0.9999996470567067, iteration: 18634
loss: 1.0399752855300903,grad_norm: 0.9999993044573964, iteration: 18635
loss: 0.9902229905128479,grad_norm: 0.9999991160774901, iteration: 18636
loss: 0.9930714964866638,grad_norm: 0.9999996062218588, iteration: 18637
loss: 1.0046992301940918,grad_norm: 0.9999991743087343, iteration: 18638
loss: 0.969046950340271,grad_norm: 0.999999104102971, iteration: 18639
loss: 1.0307393074035645,grad_norm: 0.997105167730199, iteration: 18640
loss: 1.0541448593139648,grad_norm: 0.9999991995217588, iteration: 18641
loss: 1.0339750051498413,grad_norm: 0.9077677355001386, iteration: 18642
loss: 1.0246206521987915,grad_norm: 0.9999990727308045, iteration: 18643
loss: 1.006431221961975,grad_norm: 0.9999991125491471, iteration: 18644
loss: 1.0126283168792725,grad_norm: 0.999999508055434, iteration: 18645
loss: 1.0120491981506348,grad_norm: 0.9999991071922479, iteration: 18646
loss: 1.015318751335144,grad_norm: 0.9999992166329684, iteration: 18647
loss: 1.0146085023880005,grad_norm: 0.9909599533348223, iteration: 18648
loss: 1.0159322023391724,grad_norm: 0.9353367382504805, iteration: 18649
loss: 0.9827277064323425,grad_norm: 0.9999992759567188, iteration: 18650
loss: 1.0301460027694702,grad_norm: 0.9999992658312982, iteration: 18651
loss: 1.0262137651443481,grad_norm: 0.8224733363628254, iteration: 18652
loss: 1.0592609643936157,grad_norm: 0.9999991718280757, iteration: 18653
loss: 1.0475707054138184,grad_norm: 0.9999996060841891, iteration: 18654
loss: 0.9796019792556763,grad_norm: 0.9999992660809086, iteration: 18655
loss: 0.9848806262016296,grad_norm: 0.905235161178076, iteration: 18656
loss: 1.023135781288147,grad_norm: 0.9999991201563784, iteration: 18657
loss: 1.0726242065429688,grad_norm: 0.9999995328410196, iteration: 18658
loss: 1.0493545532226562,grad_norm: 0.927133862203101, iteration: 18659
loss: 0.9852582216262817,grad_norm: 0.9999990268099911, iteration: 18660
loss: 1.0707669258117676,grad_norm: 0.9999991918333684, iteration: 18661
loss: 0.9884448051452637,grad_norm: 0.8655752723639056, iteration: 18662
loss: 1.022273302078247,grad_norm: 0.9999990997512522, iteration: 18663
loss: 1.0020822286605835,grad_norm: 0.9607464445259432, iteration: 18664
loss: 1.0069737434387207,grad_norm: 0.9213534725587286, iteration: 18665
loss: 1.0260002613067627,grad_norm: 0.9999990138219843, iteration: 18666
loss: 0.9757640361785889,grad_norm: 0.9999993674094428, iteration: 18667
loss: 1.0507270097732544,grad_norm: 0.9999992622852697, iteration: 18668
loss: 1.0430514812469482,grad_norm: 0.9915046717343553, iteration: 18669
loss: 0.9999400973320007,grad_norm: 0.9999992415488447, iteration: 18670
loss: 1.0203697681427002,grad_norm: 0.9999992585120421, iteration: 18671
loss: 0.9492334127426147,grad_norm: 0.9999992084522883, iteration: 18672
loss: 1.0195155143737793,grad_norm: 0.997921042986133, iteration: 18673
loss: 0.9878851175308228,grad_norm: 0.9999990619149631, iteration: 18674
loss: 1.040926456451416,grad_norm: 0.9999990810504299, iteration: 18675
loss: 1.066855788230896,grad_norm: 0.9999995214148474, iteration: 18676
loss: 0.9805872440338135,grad_norm: 0.9999993298420817, iteration: 18677
loss: 1.0315483808517456,grad_norm: 0.9999990701549412, iteration: 18678
loss: 1.0212153196334839,grad_norm: 0.9999991136799444, iteration: 18679
loss: 1.0367610454559326,grad_norm: 0.9999993377422826, iteration: 18680
loss: 1.047301173210144,grad_norm: 0.9999994743073664, iteration: 18681
loss: 1.0200639963150024,grad_norm: 0.9997920551431805, iteration: 18682
loss: 1.058733344078064,grad_norm: 0.9999993027725854, iteration: 18683
loss: 1.04654061794281,grad_norm: 0.999999091201415, iteration: 18684
loss: 1.0141297578811646,grad_norm: 0.9999990213454157, iteration: 18685
loss: 1.0392948389053345,grad_norm: 0.9999993312445232, iteration: 18686
loss: 1.0533028841018677,grad_norm: 0.9999994123844407, iteration: 18687
loss: 1.0280661582946777,grad_norm: 0.9999993478940674, iteration: 18688
loss: 1.0394573211669922,grad_norm: 0.9999993068889561, iteration: 18689
loss: 1.0178189277648926,grad_norm: 0.881962870095963, iteration: 18690
loss: 0.9957677721977234,grad_norm: 0.9999991082370623, iteration: 18691
loss: 1.0253019332885742,grad_norm: 0.8732829369218243, iteration: 18692
loss: 0.9861065149307251,grad_norm: 0.999999311818551, iteration: 18693
loss: 1.0244687795639038,grad_norm: 0.9999992112420159, iteration: 18694
loss: 0.9942463636398315,grad_norm: 0.9999991440973542, iteration: 18695
loss: 1.0306891202926636,grad_norm: 0.9999990629446989, iteration: 18696
loss: 1.0085541009902954,grad_norm: 0.9999994132786769, iteration: 18697
loss: 1.0225169658660889,grad_norm: 0.9999995463166313, iteration: 18698
loss: 0.9923088550567627,grad_norm: 0.999999625853091, iteration: 18699
loss: 1.0287328958511353,grad_norm: 0.9999991952106697, iteration: 18700
loss: 1.046270728111267,grad_norm: 0.9999993429132684, iteration: 18701
loss: 1.009717345237732,grad_norm: 0.9999992051883102, iteration: 18702
loss: 1.0303770303726196,grad_norm: 0.9999993286102781, iteration: 18703
loss: 1.0210670232772827,grad_norm: 0.9999989776568237, iteration: 18704
loss: 0.9994184970855713,grad_norm: 0.9999990466794257, iteration: 18705
loss: 1.0370105504989624,grad_norm: 0.999999117257571, iteration: 18706
loss: 0.9968448281288147,grad_norm: 0.9697075859447083, iteration: 18707
loss: 1.0067030191421509,grad_norm: 0.9999995111639566, iteration: 18708
loss: 1.0493212938308716,grad_norm: 0.9999991355129717, iteration: 18709
loss: 1.0146528482437134,grad_norm: 0.9999993807763429, iteration: 18710
loss: 1.0079776048660278,grad_norm: 0.9999992139869904, iteration: 18711
loss: 1.014182686805725,grad_norm: 0.9905590298239627, iteration: 18712
loss: 1.0051380395889282,grad_norm: 0.999999206631033, iteration: 18713
loss: 1.0237828493118286,grad_norm: 0.9999990775935145, iteration: 18714
loss: 1.0263079404830933,grad_norm: 0.9999990400466752, iteration: 18715
loss: 1.0240997076034546,grad_norm: 0.9999998864136265, iteration: 18716
loss: 0.9796062707901001,grad_norm: 0.9999992344007121, iteration: 18717
loss: 1.0425857305526733,grad_norm: 0.9999992650744505, iteration: 18718
loss: 0.9848637580871582,grad_norm: 0.9999991931877944, iteration: 18719
loss: 1.0099310874938965,grad_norm: 0.9999990019017432, iteration: 18720
loss: 1.0604997873306274,grad_norm: 0.9999991480113202, iteration: 18721
loss: 1.0027961730957031,grad_norm: 0.9999989856164402, iteration: 18722
loss: 0.9906594157218933,grad_norm: 0.9999990161880626, iteration: 18723
loss: 1.0667941570281982,grad_norm: 0.9999998310141468, iteration: 18724
loss: 1.0359667539596558,grad_norm: 0.9999989984276092, iteration: 18725
loss: 1.0025922060012817,grad_norm: 0.9999990853728157, iteration: 18726
loss: 1.0019423961639404,grad_norm: 0.999999181008113, iteration: 18727
loss: 0.9983928799629211,grad_norm: 0.9999992926907892, iteration: 18728
loss: 1.0682982206344604,grad_norm: 0.9999991384627371, iteration: 18729
loss: 1.0011354684829712,grad_norm: 0.8710409961366674, iteration: 18730
loss: 1.0348800420761108,grad_norm: 0.9999991795397392, iteration: 18731
loss: 1.0295406579971313,grad_norm: 0.9999993368707308, iteration: 18732
loss: 1.0089260339736938,grad_norm: 0.9999990977762593, iteration: 18733
loss: 1.021750807762146,grad_norm: 0.999999446676022, iteration: 18734
loss: 1.0328010320663452,grad_norm: 0.9999991309275522, iteration: 18735
loss: 1.0250638723373413,grad_norm: 0.9999995960408382, iteration: 18736
loss: 1.049009919166565,grad_norm: 0.9999990431621265, iteration: 18737
loss: 1.0099724531173706,grad_norm: 0.9568496163197774, iteration: 18738
loss: 1.0772204399108887,grad_norm: 0.999999382659267, iteration: 18739
loss: 0.9875249266624451,grad_norm: 0.9785173217122088, iteration: 18740
loss: 1.0110079050064087,grad_norm: 0.9999990813981326, iteration: 18741
loss: 0.9991937279701233,grad_norm: 0.9999992877350159, iteration: 18742
loss: 1.0187231302261353,grad_norm: 0.9999991240059554, iteration: 18743
loss: 1.0360348224639893,grad_norm: 0.999999103915922, iteration: 18744
loss: 0.9791073203086853,grad_norm: 0.9999991907603486, iteration: 18745
loss: 1.0061869621276855,grad_norm: 0.9999996890260457, iteration: 18746
loss: 0.9923871755599976,grad_norm: 0.9740770939153268, iteration: 18747
loss: 0.9827725291252136,grad_norm: 0.9579776863312276, iteration: 18748
loss: 0.986738383769989,grad_norm: 0.9999991169334889, iteration: 18749
loss: 0.9475377202033997,grad_norm: 0.9999990757390126, iteration: 18750
loss: 0.9957984089851379,grad_norm: 0.9999992084079332, iteration: 18751
loss: 1.0364773273468018,grad_norm: 0.9266528473390445, iteration: 18752
loss: 1.0254311561584473,grad_norm: 0.9999995496385342, iteration: 18753
loss: 1.033239722251892,grad_norm: 0.9999994847025175, iteration: 18754
loss: 1.034761905670166,grad_norm: 0.9089651248708327, iteration: 18755
loss: 1.0002188682556152,grad_norm: 0.9902812136773649, iteration: 18756
loss: 1.0289925336837769,grad_norm: 0.9999990625213998, iteration: 18757
loss: 1.0323338508605957,grad_norm: 0.9999990365080437, iteration: 18758
loss: 1.027268648147583,grad_norm: 0.9999996191807639, iteration: 18759
loss: 0.9930091500282288,grad_norm: 0.9999991592097072, iteration: 18760
loss: 1.0059003829956055,grad_norm: 0.9999991256765681, iteration: 18761
loss: 0.9928538203239441,grad_norm: 0.9999993718548404, iteration: 18762
loss: 0.9868387579917908,grad_norm: 0.9999990225065782, iteration: 18763
loss: 1.005049467086792,grad_norm: 0.9999991541560888, iteration: 18764
loss: 1.0226850509643555,grad_norm: 0.9999992243581275, iteration: 18765
loss: 0.9836276769638062,grad_norm: 0.9999990070153121, iteration: 18766
loss: 1.0091865062713623,grad_norm: 0.9637133066901846, iteration: 18767
loss: 1.0602368116378784,grad_norm: 0.9999992148239617, iteration: 18768
loss: 0.9722957015037537,grad_norm: 0.9999991340251242, iteration: 18769
loss: 0.9832946062088013,grad_norm: 0.9999994016782177, iteration: 18770
loss: 0.9940521717071533,grad_norm: 0.9999992231600633, iteration: 18771
loss: 1.0325837135314941,grad_norm: 0.9999991967208907, iteration: 18772
loss: 1.0045685768127441,grad_norm: 0.9999991158990048, iteration: 18773
loss: 1.0606046915054321,grad_norm: 0.9999992896290626, iteration: 18774
loss: 0.9797297716140747,grad_norm: 0.9999988786285591, iteration: 18775
loss: 1.0623996257781982,grad_norm: 0.9999992323574592, iteration: 18776
loss: 1.0374822616577148,grad_norm: 0.9999992298483044, iteration: 18777
loss: 0.9897950291633606,grad_norm: 0.9999993864873467, iteration: 18778
loss: 1.0150684118270874,grad_norm: 0.9999991248239132, iteration: 18779
loss: 0.9927030801773071,grad_norm: 0.9999991201259993, iteration: 18780
loss: 0.9587060809135437,grad_norm: 0.999999129947533, iteration: 18781
loss: 1.000969409942627,grad_norm: 0.9999990650475049, iteration: 18782
loss: 1.0368807315826416,grad_norm: 0.9901997809924622, iteration: 18783
loss: 1.0304805040359497,grad_norm: 0.999999185161943, iteration: 18784
loss: 1.0037466287612915,grad_norm: 0.999999419964125, iteration: 18785
loss: 1.0259915590286255,grad_norm: 0.9999990395930733, iteration: 18786
loss: 0.9977113008499146,grad_norm: 0.9999991895394527, iteration: 18787
loss: 1.046217679977417,grad_norm: 0.9764756589584096, iteration: 18788
loss: 1.0058618783950806,grad_norm: 0.9999990532519389, iteration: 18789
loss: 1.030187726020813,grad_norm: 0.9999992638689096, iteration: 18790
loss: 1.0040581226348877,grad_norm: 0.9999993380001924, iteration: 18791
loss: 0.968975305557251,grad_norm: 0.9485643577996553, iteration: 18792
loss: 0.9840977191925049,grad_norm: 0.8266397454207619, iteration: 18793
loss: 1.0034898519515991,grad_norm: 0.910046267504413, iteration: 18794
loss: 0.9981673955917358,grad_norm: 0.9999991657328311, iteration: 18795
loss: 1.0455178022384644,grad_norm: 0.9999990366414984, iteration: 18796
loss: 1.0172102451324463,grad_norm: 0.9811540019996513, iteration: 18797
loss: 0.9836025834083557,grad_norm: 0.9999992598033192, iteration: 18798
loss: 1.0387202501296997,grad_norm: 0.929205743336832, iteration: 18799
loss: 1.0086561441421509,grad_norm: 0.9999994859932689, iteration: 18800
loss: 1.1003676652908325,grad_norm: 0.9999993363728529, iteration: 18801
loss: 1.0782719850540161,grad_norm: 0.9999996034305817, iteration: 18802
loss: 1.0315513610839844,grad_norm: 0.9999994270615078, iteration: 18803
loss: 1.0033502578735352,grad_norm: 0.9999991980800439, iteration: 18804
loss: 0.9795606732368469,grad_norm: 0.9199253744473262, iteration: 18805
loss: 1.0346944332122803,grad_norm: 0.9999991413093441, iteration: 18806
loss: 0.9903234243392944,grad_norm: 0.9999992824521651, iteration: 18807
loss: 1.00565505027771,grad_norm: 0.9999991869863984, iteration: 18808
loss: 0.9857746362686157,grad_norm: 0.999999207642314, iteration: 18809
loss: 1.0231714248657227,grad_norm: 0.9999993338329628, iteration: 18810
loss: 1.0587915182113647,grad_norm: 0.9999993629298606, iteration: 18811
loss: 0.9867814779281616,grad_norm: 0.9999989872663204, iteration: 18812
loss: 1.0215531587600708,grad_norm: 0.91265424416533, iteration: 18813
loss: 1.0397014617919922,grad_norm: 0.9999992599549471, iteration: 18814
loss: 1.0072427988052368,grad_norm: 0.9999991687974406, iteration: 18815
loss: 0.9950584173202515,grad_norm: 0.9999993619093136, iteration: 18816
loss: 1.0317474603652954,grad_norm: 0.9999991242598711, iteration: 18817
loss: 0.9915693402290344,grad_norm: 0.9845039751949166, iteration: 18818
loss: 1.0918139219284058,grad_norm: 0.9999999207658451, iteration: 18819
loss: 1.0126793384552002,grad_norm: 0.9999990929040987, iteration: 18820
loss: 0.9825875759124756,grad_norm: 0.9958477461167273, iteration: 18821
loss: 1.1106963157653809,grad_norm: 0.9999996220691942, iteration: 18822
loss: 1.0034950971603394,grad_norm: 0.9999990825239946, iteration: 18823
loss: 0.9819427728652954,grad_norm: 0.9999992134872794, iteration: 18824
loss: 0.9805331826210022,grad_norm: 0.9999992677018016, iteration: 18825
loss: 1.0460015535354614,grad_norm: 0.9999995796568026, iteration: 18826
loss: 1.0069465637207031,grad_norm: 0.9999991405620071, iteration: 18827
loss: 1.0477726459503174,grad_norm: 0.9999994689084248, iteration: 18828
loss: 1.022220492362976,grad_norm: 0.8784057561680788, iteration: 18829
loss: 0.9973897933959961,grad_norm: 0.9999992659744258, iteration: 18830
loss: 1.024141788482666,grad_norm: 0.9999991751834882, iteration: 18831
loss: 1.0298552513122559,grad_norm: 0.9999992225889256, iteration: 18832
loss: 1.0065138339996338,grad_norm: 0.9805422053562047, iteration: 18833
loss: 1.0346698760986328,grad_norm: 0.9999990599929746, iteration: 18834
loss: 1.0471187829971313,grad_norm: 0.9999990351545254, iteration: 18835
loss: 1.0435587167739868,grad_norm: 0.8845614827044461, iteration: 18836
loss: 1.0131257772445679,grad_norm: 0.7609916142400165, iteration: 18837
loss: 1.0186266899108887,grad_norm: 0.9661806947183801, iteration: 18838
loss: 1.0190027952194214,grad_norm: 0.9999990799613009, iteration: 18839
loss: 1.0323338508605957,grad_norm: 0.9999991021514907, iteration: 18840
loss: 1.0403690338134766,grad_norm: 0.9999993273390896, iteration: 18841
loss: 1.0230865478515625,grad_norm: 0.99999904742839, iteration: 18842
loss: 1.036224603652954,grad_norm: 0.9999992222993703, iteration: 18843
loss: 1.0405876636505127,grad_norm: 0.9195947558291035, iteration: 18844
loss: 1.053009271621704,grad_norm: 0.9999990831543967, iteration: 18845
loss: 1.0164477825164795,grad_norm: 0.9999991987375465, iteration: 18846
loss: 0.9924810528755188,grad_norm: 0.9896107454222931, iteration: 18847
loss: 0.9800232648849487,grad_norm: 0.9999990732176263, iteration: 18848
loss: 1.0252368450164795,grad_norm: 0.9999993832541737, iteration: 18849
loss: 1.0065889358520508,grad_norm: 0.9999992537915458, iteration: 18850
loss: 1.0067602396011353,grad_norm: 0.9999990808533714, iteration: 18851
loss: 0.9883535504341125,grad_norm: 0.9999991541749995, iteration: 18852
loss: 1.0122489929199219,grad_norm: 0.9588257672243276, iteration: 18853
loss: 0.9902123212814331,grad_norm: 0.9999993052592657, iteration: 18854
loss: 1.0055625438690186,grad_norm: 0.9999991477112855, iteration: 18855
loss: 0.9779917001724243,grad_norm: 0.9999991765742887, iteration: 18856
loss: 1.006692886352539,grad_norm: 0.8871737287340221, iteration: 18857
loss: 1.057877779006958,grad_norm: 0.9999997075425815, iteration: 18858
loss: 0.9988476037979126,grad_norm: 0.9999991210691052, iteration: 18859
loss: 1.0140384435653687,grad_norm: 0.9999991639819079, iteration: 18860
loss: 0.9798275232315063,grad_norm: 0.9999990656309601, iteration: 18861
loss: 0.9774741530418396,grad_norm: 0.9999989817232248, iteration: 18862
loss: 1.0149370431900024,grad_norm: 0.9999990128409931, iteration: 18863
loss: 0.9996688961982727,grad_norm: 0.9999992078986634, iteration: 18864
loss: 0.9970519542694092,grad_norm: 0.9355300588906013, iteration: 18865
loss: 1.02252197265625,grad_norm: 0.9999990124902634, iteration: 18866
loss: 1.0666698217391968,grad_norm: 0.9719208874023604, iteration: 18867
loss: 0.9948111772537231,grad_norm: 0.9999990307825896, iteration: 18868
loss: 1.0185242891311646,grad_norm: 0.9553542833332437, iteration: 18869
loss: 1.0299551486968994,grad_norm: 0.9496161190063586, iteration: 18870
loss: 1.030860185623169,grad_norm: 0.9960499694291027, iteration: 18871
loss: 0.9578408598899841,grad_norm: 0.8986954297619604, iteration: 18872
loss: 0.9694663882255554,grad_norm: 0.9999990752516903, iteration: 18873
loss: 0.9840607643127441,grad_norm: 0.9937057641395324, iteration: 18874
loss: 1.0167001485824585,grad_norm: 0.9999991429248126, iteration: 18875
loss: 0.9754499793052673,grad_norm: 0.9647650809055466, iteration: 18876
loss: 1.0346786975860596,grad_norm: 0.999999162469388, iteration: 18877
loss: 0.9852347373962402,grad_norm: 0.9999990541998982, iteration: 18878
loss: 1.0280117988586426,grad_norm: 0.9805884213174091, iteration: 18879
loss: 1.0076513290405273,grad_norm: 0.9999992869381051, iteration: 18880
loss: 1.0081524848937988,grad_norm: 0.9999991077676429, iteration: 18881
loss: 0.9730526804924011,grad_norm: 0.9999991330138277, iteration: 18882
loss: 1.0510281324386597,grad_norm: 0.9999991352114566, iteration: 18883
loss: 0.9752212166786194,grad_norm: 0.9999990625130009, iteration: 18884
loss: 1.041312575340271,grad_norm: 0.9999992749032565, iteration: 18885
loss: 1.0175946950912476,grad_norm: 0.999999036119293, iteration: 18886
loss: 0.9967378973960876,grad_norm: 0.9999989634796589, iteration: 18887
loss: 1.0261757373809814,grad_norm: 0.971886873568754, iteration: 18888
loss: 1.0459282398223877,grad_norm: 0.9999989908510508, iteration: 18889
loss: 0.9804881811141968,grad_norm: 0.9901548315107658, iteration: 18890
loss: 1.0155304670333862,grad_norm: 0.999999107205119, iteration: 18891
loss: 1.0191470384597778,grad_norm: 0.9999992142766034, iteration: 18892
loss: 1.011778473854065,grad_norm: 0.9044612585565247, iteration: 18893
loss: 1.0303467512130737,grad_norm: 0.9677877421404955, iteration: 18894
loss: 1.0587297677993774,grad_norm: 0.9999992328867064, iteration: 18895
loss: 1.0063267946243286,grad_norm: 0.9432908449315277, iteration: 18896
loss: 1.021636962890625,grad_norm: 0.9999993549276653, iteration: 18897
loss: 1.0268583297729492,grad_norm: 0.9324315713108351, iteration: 18898
loss: 1.0058355331420898,grad_norm: 0.9999991135715883, iteration: 18899
loss: 1.049919843673706,grad_norm: 0.9999991340095248, iteration: 18900
loss: 0.9834346771240234,grad_norm: 0.9247933821568789, iteration: 18901
loss: 1.011334776878357,grad_norm: 0.9999992548735186, iteration: 18902
loss: 1.01984703540802,grad_norm: 0.9999990985314179, iteration: 18903
loss: 1.022333025932312,grad_norm: 0.9999996352100312, iteration: 18904
loss: 0.9961857795715332,grad_norm: 0.8249979190734121, iteration: 18905
loss: 0.9723342061042786,grad_norm: 0.9886430575212487, iteration: 18906
loss: 1.0032907724380493,grad_norm: 0.9999990451898029, iteration: 18907
loss: 1.0308583974838257,grad_norm: 0.9999992068346033, iteration: 18908
loss: 1.1186063289642334,grad_norm: 0.9999996151118167, iteration: 18909
loss: 0.972078800201416,grad_norm: 0.9999991289027057, iteration: 18910
loss: 1.004412293434143,grad_norm: 0.9999992876011579, iteration: 18911
loss: 0.972800612449646,grad_norm: 0.9999991030621244, iteration: 18912
loss: 1.0286437273025513,grad_norm: 0.9999994875929412, iteration: 18913
loss: 1.00239896774292,grad_norm: 0.972078452082188, iteration: 18914
loss: 1.0068368911743164,grad_norm: 0.9999990495233103, iteration: 18915
loss: 1.0139338970184326,grad_norm: 0.9999991828803032, iteration: 18916
loss: 0.9986200332641602,grad_norm: 0.8082201140267047, iteration: 18917
loss: 0.9990525841712952,grad_norm: 0.9999991083084685, iteration: 18918
loss: 1.0526615381240845,grad_norm: 0.9999994595185063, iteration: 18919
loss: 1.007023811340332,grad_norm: 0.8933119106147778, iteration: 18920
loss: 1.048665165901184,grad_norm: 0.9966542879727114, iteration: 18921
loss: 1.0527677536010742,grad_norm: 0.9999990205068586, iteration: 18922
loss: 1.048008680343628,grad_norm: 0.9999992345311706, iteration: 18923
loss: 1.019705891609192,grad_norm: 0.9999992943149466, iteration: 18924
loss: 0.9835032224655151,grad_norm: 0.9091898676152351, iteration: 18925
loss: 1.0834128856658936,grad_norm: 0.9999994943549104, iteration: 18926
loss: 0.994873046875,grad_norm: 0.9999990434144425, iteration: 18927
loss: 1.0490825176239014,grad_norm: 0.9999992285438589, iteration: 18928
loss: 1.0223480463027954,grad_norm: 0.8859363052062692, iteration: 18929
loss: 1.0300391912460327,grad_norm: 0.9999997109769246, iteration: 18930
loss: 1.0736668109893799,grad_norm: 0.9999992613480736, iteration: 18931
loss: 0.9558150768280029,grad_norm: 0.9263863285250686, iteration: 18932
loss: 1.0230637788772583,grad_norm: 0.9999991440648147, iteration: 18933
loss: 1.0088491439819336,grad_norm: 0.7825060654170901, iteration: 18934
loss: 0.9631823897361755,grad_norm: 0.9999991024546305, iteration: 18935
loss: 0.9971839785575867,grad_norm: 0.9999990538796271, iteration: 18936
loss: 1.0063941478729248,grad_norm: 0.9999991451866959, iteration: 18937
loss: 0.9862865805625916,grad_norm: 0.9999994696726535, iteration: 18938
loss: 1.0622552633285522,grad_norm: 0.9999991331794108, iteration: 18939
loss: 1.0260272026062012,grad_norm: 0.9999990653700657, iteration: 18940
loss: 0.992073118686676,grad_norm: 0.9499085633227125, iteration: 18941
loss: 1.0021541118621826,grad_norm: 0.999999200032891, iteration: 18942
loss: 1.0084855556488037,grad_norm: 0.9999992238546641, iteration: 18943
loss: 0.9643480777740479,grad_norm: 0.9999992027062619, iteration: 18944
loss: 0.9814064502716064,grad_norm: 0.9999995123118534, iteration: 18945
loss: 1.0135056972503662,grad_norm: 0.9999991270054357, iteration: 18946
loss: 1.0443366765975952,grad_norm: 0.9999993301886636, iteration: 18947
loss: 1.013399362564087,grad_norm: 0.9512096642026608, iteration: 18948
loss: 0.941567599773407,grad_norm: 0.9999991072774117, iteration: 18949
loss: 1.0090934038162231,grad_norm: 0.8052952069892368, iteration: 18950
loss: 1.0186570882797241,grad_norm: 0.9999991155328538, iteration: 18951
loss: 1.0182923078536987,grad_norm: 0.9999991720351695, iteration: 18952
loss: 0.9961570501327515,grad_norm: 0.9999991645199394, iteration: 18953
loss: 1.0423511266708374,grad_norm: 0.9999991983358429, iteration: 18954
loss: 1.0193246603012085,grad_norm: 0.9999991134132498, iteration: 18955
loss: 1.0825743675231934,grad_norm: 0.9999992528073414, iteration: 18956
loss: 0.9789087772369385,grad_norm: 0.9999998011818481, iteration: 18957
loss: 1.0287206172943115,grad_norm: 0.9999998109686669, iteration: 18958
loss: 0.9972552061080933,grad_norm: 0.7978017138580309, iteration: 18959
loss: 1.0107208490371704,grad_norm: 0.9999992360066221, iteration: 18960
loss: 1.0074177980422974,grad_norm: 0.9999991294170123, iteration: 18961
loss: 0.9944430589675903,grad_norm: 0.9999990416587962, iteration: 18962
loss: 1.0161961317062378,grad_norm: 0.9999991434004352, iteration: 18963
loss: 1.0547598600387573,grad_norm: 0.9957540777209661, iteration: 18964
loss: 1.0185573101043701,grad_norm: 0.9999994057085345, iteration: 18965
loss: 0.9927405714988708,grad_norm: 0.9999990747698446, iteration: 18966
loss: 0.9791067838668823,grad_norm: 0.9999992470709881, iteration: 18967
loss: 1.0650838613510132,grad_norm: 0.999999836201165, iteration: 18968
loss: 0.9918522238731384,grad_norm: 0.9203087514845716, iteration: 18969
loss: 1.0193842649459839,grad_norm: 0.9999991275316257, iteration: 18970
loss: 1.0222017765045166,grad_norm: 0.8826523096185919, iteration: 18971
loss: 1.0488158464431763,grad_norm: 0.9999992071767638, iteration: 18972
loss: 1.013129472732544,grad_norm: 0.9880146308103509, iteration: 18973
loss: 1.0175204277038574,grad_norm: 0.9999999393568569, iteration: 18974
loss: 1.00784432888031,grad_norm: 0.999999050828122, iteration: 18975
loss: 1.004800796508789,grad_norm: 0.9999990885974288, iteration: 18976
loss: 1.0212827920913696,grad_norm: 0.9999993050594962, iteration: 18977
loss: 1.0986829996109009,grad_norm: 0.9999992907572707, iteration: 18978
loss: 1.0045645236968994,grad_norm: 0.9906274120954176, iteration: 18979
loss: 1.0233181715011597,grad_norm: 0.9999995984465128, iteration: 18980
loss: 1.093042016029358,grad_norm: 0.9999993041802995, iteration: 18981
loss: 0.9993838667869568,grad_norm: 0.9756505842315037, iteration: 18982
loss: 0.9936549663543701,grad_norm: 0.9923575602659259, iteration: 18983
loss: 1.0234242677688599,grad_norm: 0.9999991816653286, iteration: 18984
loss: 1.014444351196289,grad_norm: 0.999999268119637, iteration: 18985
loss: 1.0066652297973633,grad_norm: 0.9499207824744993, iteration: 18986
loss: 0.9966661930084229,grad_norm: 0.9679836484659549, iteration: 18987
loss: 1.0195001363754272,grad_norm: 0.9999989929706825, iteration: 18988
loss: 0.990481436252594,grad_norm: 0.9907257692924275, iteration: 18989
loss: 0.9924107789993286,grad_norm: 0.9999991158399153, iteration: 18990
loss: 1.0250324010849,grad_norm: 0.9999992632922908, iteration: 18991
loss: 1.0695593357086182,grad_norm: 0.9999993258673834, iteration: 18992
loss: 0.9986116886138916,grad_norm: 0.9999997774062253, iteration: 18993
loss: 1.0175139904022217,grad_norm: 0.9780456487807346, iteration: 18994
loss: 1.0211728811264038,grad_norm: 0.9411417206762953, iteration: 18995
loss: 0.9623044729232788,grad_norm: 0.999999302175913, iteration: 18996
loss: 1.0014111995697021,grad_norm: 0.9999990877348134, iteration: 18997
loss: 1.014694094657898,grad_norm: 0.9999992662297755, iteration: 18998
loss: 0.9982646703720093,grad_norm: 0.9999990888103878, iteration: 18999
loss: 1.0132510662078857,grad_norm: 0.9999992634383139, iteration: 19000
loss: 1.0661159753799438,grad_norm: 0.9999996994661293, iteration: 19001
loss: 0.9926478862762451,grad_norm: 0.9999995439955724, iteration: 19002
loss: 1.0039721727371216,grad_norm: 0.9999993089956605, iteration: 19003
loss: 1.0028773546218872,grad_norm: 0.9999990696926013, iteration: 19004
loss: 0.9999342560768127,grad_norm: 0.9999991769342838, iteration: 19005
loss: 1.0269018411636353,grad_norm: 0.9999991105823077, iteration: 19006
loss: 1.0313717126846313,grad_norm: 0.9999990528536683, iteration: 19007
loss: 1.0134847164154053,grad_norm: 0.9999989920195539, iteration: 19008
loss: 0.9818767309188843,grad_norm: 0.9454482994541642, iteration: 19009
loss: 1.0350016355514526,grad_norm: 0.9999992655164393, iteration: 19010
loss: 0.9648775458335876,grad_norm: 0.9999990119493734, iteration: 19011
loss: 1.0084874629974365,grad_norm: 0.9999998108756136, iteration: 19012
loss: 0.980778157711029,grad_norm: 0.9090256473331947, iteration: 19013
loss: 1.0392810106277466,grad_norm: 0.999999552493313, iteration: 19014
loss: 1.0301318168640137,grad_norm: 0.9999992038194253, iteration: 19015
loss: 1.0452938079833984,grad_norm: 0.9999991920678237, iteration: 19016
loss: 1.0004487037658691,grad_norm: 0.9999989949882135, iteration: 19017
loss: 1.0157792568206787,grad_norm: 0.8742153933239304, iteration: 19018
loss: 1.0081446170806885,grad_norm: 0.9601270827485741, iteration: 19019
loss: 1.043484091758728,grad_norm: 0.9999993656016103, iteration: 19020
loss: 1.000366449356079,grad_norm: 0.9999990422149211, iteration: 19021
loss: 1.045017957687378,grad_norm: 0.9999991839861129, iteration: 19022
loss: 1.0398122072219849,grad_norm: 0.9999991721318668, iteration: 19023
loss: 0.98127681016922,grad_norm: 0.9999991275932366, iteration: 19024
loss: 1.0704565048217773,grad_norm: 0.9999994900194976, iteration: 19025
loss: 0.9996774792671204,grad_norm: 0.9999991007875251, iteration: 19026
loss: 1.040374755859375,grad_norm: 0.9711061026838244, iteration: 19027
loss: 0.9840919375419617,grad_norm: 0.9999991111637251, iteration: 19028
loss: 1.0265262126922607,grad_norm: 0.9999989965512481, iteration: 19029
loss: 1.0210736989974976,grad_norm: 0.9999991747443367, iteration: 19030
loss: 1.0105479955673218,grad_norm: 0.9999991475179, iteration: 19031
loss: 1.0328000783920288,grad_norm: 0.9999990852582072, iteration: 19032
loss: 0.9997589588165283,grad_norm: 0.9999992505256491, iteration: 19033
loss: 1.037213683128357,grad_norm: 0.999999114444269, iteration: 19034
loss: 1.0008635520935059,grad_norm: 0.999999064629933, iteration: 19035
loss: 1.0378527641296387,grad_norm: 0.9999996504841456, iteration: 19036
loss: 1.0157912969589233,grad_norm: 0.9999990180587681, iteration: 19037
loss: 1.0482193231582642,grad_norm: 0.9999991886390106, iteration: 19038
loss: 1.006854772567749,grad_norm: 0.9999990203567491, iteration: 19039
loss: 1.0157607793807983,grad_norm: 0.9999992234544481, iteration: 19040
loss: 1.0314568281173706,grad_norm: 0.9999990578748524, iteration: 19041
loss: 1.0364582538604736,grad_norm: 0.9999994801190483, iteration: 19042
loss: 1.0034189224243164,grad_norm: 0.9999990247889978, iteration: 19043
loss: 1.001022458076477,grad_norm: 0.8832743259189031, iteration: 19044
loss: 1.026423454284668,grad_norm: 0.9999990903713495, iteration: 19045
loss: 0.9795985221862793,grad_norm: 0.9999990608033252, iteration: 19046
loss: 1.0327807664871216,grad_norm: 0.999999548842361, iteration: 19047
loss: 1.0317543745040894,grad_norm: 0.9999990205686744, iteration: 19048
loss: 1.0304173231124878,grad_norm: 0.9999991342246121, iteration: 19049
loss: 1.054236650466919,grad_norm: 0.9999993292487466, iteration: 19050
loss: 0.9677873253822327,grad_norm: 0.9999990694717747, iteration: 19051
loss: 0.9944109916687012,grad_norm: 0.816236586619319, iteration: 19052
loss: 1.0432591438293457,grad_norm: 0.9755767173266414, iteration: 19053
loss: 1.0069254636764526,grad_norm: 0.9999991925226731, iteration: 19054
loss: 0.9753724932670593,grad_norm: 0.933332512946976, iteration: 19055
loss: 0.9797462224960327,grad_norm: 0.9608526818486506, iteration: 19056
loss: 1.0190409421920776,grad_norm: 0.9999992817912176, iteration: 19057
loss: 0.9963129758834839,grad_norm: 0.9999995193675482, iteration: 19058
loss: 1.0013234615325928,grad_norm: 0.9999991928156388, iteration: 19059
loss: 0.9952226281166077,grad_norm: 0.9999990078482206, iteration: 19060
loss: 0.9699341058731079,grad_norm: 0.9999991283095556, iteration: 19061
loss: 1.0623003244400024,grad_norm: 0.9999993973609009, iteration: 19062
loss: 1.0236610174179077,grad_norm: 0.9999991268529477, iteration: 19063
loss: 1.0572665929794312,grad_norm: 0.9747043490856739, iteration: 19064
loss: 1.0229893922805786,grad_norm: 0.9999990912562305, iteration: 19065
loss: 0.9941830039024353,grad_norm: 0.9999991699653225, iteration: 19066
loss: 1.0226486921310425,grad_norm: 0.9834621711729087, iteration: 19067
loss: 1.0182558298110962,grad_norm: 0.9999991808889986, iteration: 19068
loss: 1.0294196605682373,grad_norm: 0.9999991347713906, iteration: 19069
loss: 1.0047086477279663,grad_norm: 0.9999993919179846, iteration: 19070
loss: 1.0338764190673828,grad_norm: 0.9999990390082778, iteration: 19071
loss: 1.0635333061218262,grad_norm: 0.9999996508806226, iteration: 19072
loss: 1.0504348278045654,grad_norm: 0.9999991400713782, iteration: 19073
loss: 0.9942222237586975,grad_norm: 0.999999106753413, iteration: 19074
loss: 1.0054010152816772,grad_norm: 0.9999991371799274, iteration: 19075
loss: 1.0036566257476807,grad_norm: 0.9999991464478051, iteration: 19076
loss: 0.9701074361801147,grad_norm: 0.9302599461194474, iteration: 19077
loss: 0.9912830591201782,grad_norm: 0.9999992696255491, iteration: 19078
loss: 1.0660624504089355,grad_norm: 0.9999996875867169, iteration: 19079
loss: 1.0272358655929565,grad_norm: 0.9999990565577137, iteration: 19080
loss: 1.0622751712799072,grad_norm: 0.9999991299071839, iteration: 19081
loss: 1.011240839958191,grad_norm: 0.9999993234755357, iteration: 19082
loss: 0.9905180335044861,grad_norm: 0.9999996005073727, iteration: 19083
loss: 1.0243223905563354,grad_norm: 0.999999382278909, iteration: 19084
loss: 0.9898666143417358,grad_norm: 0.999999165532296, iteration: 19085
loss: 1.0530918836593628,grad_norm: 0.999999696783558, iteration: 19086
loss: 1.0345261096954346,grad_norm: 0.9999993993874768, iteration: 19087
loss: 1.0472594499588013,grad_norm: 0.9999991635409152, iteration: 19088
loss: 1.008021354675293,grad_norm: 0.889547609276167, iteration: 19089
loss: 1.0058304071426392,grad_norm: 0.9999990592658904, iteration: 19090
loss: 1.0117802619934082,grad_norm: 0.9999991711675079, iteration: 19091
loss: 1.02208411693573,grad_norm: 0.9999990419906167, iteration: 19092
loss: 1.0430195331573486,grad_norm: 0.9999990932252871, iteration: 19093
loss: 1.0128084421157837,grad_norm: 0.9999989706760783, iteration: 19094
loss: 0.9735311269760132,grad_norm: 0.9999992174019814, iteration: 19095
loss: 1.005593180656433,grad_norm: 0.999999174766372, iteration: 19096
loss: 1.039021372795105,grad_norm: 0.9999992471999024, iteration: 19097
loss: 1.018393874168396,grad_norm: 0.9999992769559239, iteration: 19098
loss: 1.0613218545913696,grad_norm: 0.9999990989303499, iteration: 19099
loss: 0.9958705306053162,grad_norm: 0.9999996206879291, iteration: 19100
loss: 1.023842215538025,grad_norm: 0.9999990022120661, iteration: 19101
loss: 1.009611964225769,grad_norm: 0.9999997282975306, iteration: 19102
loss: 1.0479398965835571,grad_norm: 0.9999994464498891, iteration: 19103
loss: 1.0184990167617798,grad_norm: 0.9846561158613959, iteration: 19104
loss: 1.026275396347046,grad_norm: 0.9999993966873251, iteration: 19105
loss: 1.0513916015625,grad_norm: 0.9999991743348865, iteration: 19106
loss: 0.9594076871871948,grad_norm: 0.9999991645234796, iteration: 19107
loss: 1.0249308347702026,grad_norm: 0.9999995128218124, iteration: 19108
loss: 1.0160586833953857,grad_norm: 0.9999990117422208, iteration: 19109
loss: 0.9982225894927979,grad_norm: 0.9999989676396491, iteration: 19110
loss: 0.9901280999183655,grad_norm: 0.9999990454389764, iteration: 19111
loss: 1.016638994216919,grad_norm: 0.9999992436498119, iteration: 19112
loss: 1.006687879562378,grad_norm: 0.8993354706542798, iteration: 19113
loss: 1.0046656131744385,grad_norm: 0.9448861116193005, iteration: 19114
loss: 1.0009757280349731,grad_norm: 0.993966529430161, iteration: 19115
loss: 0.9294685125350952,grad_norm: 0.999999225556881, iteration: 19116
loss: 1.0375688076019287,grad_norm: 0.999999299122733, iteration: 19117
loss: 1.00917649269104,grad_norm: 0.9999990383252454, iteration: 19118
loss: 1.0555983781814575,grad_norm: 0.9999998347620388, iteration: 19119
loss: 0.9784048199653625,grad_norm: 0.9999992709941032, iteration: 19120
loss: 1.0241594314575195,grad_norm: 0.9999992028826216, iteration: 19121
loss: 0.9991369843482971,grad_norm: 0.9999993637947489, iteration: 19122
loss: 0.9838067889213562,grad_norm: 0.9854478254368959, iteration: 19123
loss: 1.0777095556259155,grad_norm: 0.9999996769631699, iteration: 19124
loss: 1.0834782123565674,grad_norm: 0.9999992588456942, iteration: 19125
loss: 1.0548204183578491,grad_norm: 0.9999991295294904, iteration: 19126
loss: 1.0870660543441772,grad_norm: 0.9999997193480772, iteration: 19127
loss: 1.0272297859191895,grad_norm: 0.9999992617605427, iteration: 19128
loss: 1.0440068244934082,grad_norm: 0.9999997180991083, iteration: 19129
loss: 0.9750210046768188,grad_norm: 0.9999993381557293, iteration: 19130
loss: 1.0400243997573853,grad_norm: 0.9999994530246669, iteration: 19131
loss: 1.0374078750610352,grad_norm: 0.9999995129408686, iteration: 19132
loss: 1.0115041732788086,grad_norm: 0.9999993549324445, iteration: 19133
loss: 1.0195239782333374,grad_norm: 0.9999990395673368, iteration: 19134
loss: 0.994483470916748,grad_norm: 0.9999993654616038, iteration: 19135
loss: 1.0496865510940552,grad_norm: 0.9999997708015482, iteration: 19136
loss: 0.9889192581176758,grad_norm: 0.9999994881863266, iteration: 19137
loss: 1.0002366304397583,grad_norm: 0.999999062783484, iteration: 19138
loss: 1.0321013927459717,grad_norm: 0.9999994146618315, iteration: 19139
loss: 1.0000330209732056,grad_norm: 0.9999991132089626, iteration: 19140
loss: 1.0493011474609375,grad_norm: 0.999999474909271, iteration: 19141
loss: 1.0352920293807983,grad_norm: 0.9999990069457881, iteration: 19142
loss: 0.9877802133560181,grad_norm: 0.9999990030887953, iteration: 19143
loss: 1.040480136871338,grad_norm: 0.9944242380771658, iteration: 19144
loss: 0.9887711405754089,grad_norm: 0.9999992953998154, iteration: 19145
loss: 0.9653528332710266,grad_norm: 0.9673933875301688, iteration: 19146
loss: 0.9940024614334106,grad_norm: 0.9286256215032803, iteration: 19147
loss: 1.0189472436904907,grad_norm: 0.9265105424271148, iteration: 19148
loss: 0.9409607648849487,grad_norm: 0.9999990424240156, iteration: 19149
loss: 1.0450388193130493,grad_norm: 0.892386997916419, iteration: 19150
loss: 1.0150588750839233,grad_norm: 0.9715626798094527, iteration: 19151
loss: 1.053289771080017,grad_norm: 0.9999994067265366, iteration: 19152
loss: 1.0198805332183838,grad_norm: 0.951231666728279, iteration: 19153
loss: 1.0374642610549927,grad_norm: 0.9999989235662144, iteration: 19154
loss: 1.0298231840133667,grad_norm: 0.9999992746203596, iteration: 19155
loss: 1.057663083076477,grad_norm: 0.9999991450466303, iteration: 19156
loss: 1.010956048965454,grad_norm: 0.9999990795883772, iteration: 19157
loss: 1.0379345417022705,grad_norm: 0.9999990701882828, iteration: 19158
loss: 0.9962151050567627,grad_norm: 0.9999990669276151, iteration: 19159
loss: 0.9875750541687012,grad_norm: 0.9999990869705134, iteration: 19160
loss: 1.0300110578536987,grad_norm: 0.9999989869752817, iteration: 19161
loss: 1.0325183868408203,grad_norm: 0.9999991779377996, iteration: 19162
loss: 0.9818213582038879,grad_norm: 0.9354770441533315, iteration: 19163
loss: 0.9842675924301147,grad_norm: 0.9999990966325139, iteration: 19164
loss: 1.12641441822052,grad_norm: 0.9999997076508738, iteration: 19165
loss: 1.000390648841858,grad_norm: 0.9053698253156628, iteration: 19166
loss: 1.0407614707946777,grad_norm: 0.9999992461036163, iteration: 19167
loss: 1.0101654529571533,grad_norm: 0.9999990797521018, iteration: 19168
loss: 0.9972242116928101,grad_norm: 0.9547809535598667, iteration: 19169
loss: 1.0431909561157227,grad_norm: 0.9999992712234097, iteration: 19170
loss: 0.9807820320129395,grad_norm: 0.9999989957982713, iteration: 19171
loss: 1.0275026559829712,grad_norm: 0.898896034864277, iteration: 19172
loss: 1.0214488506317139,grad_norm: 0.9999991706620059, iteration: 19173
loss: 0.9952715635299683,grad_norm: 0.9999992453657898, iteration: 19174
loss: 1.0298230648040771,grad_norm: 0.9999991231744236, iteration: 19175
loss: 1.018011212348938,grad_norm: 0.9999998370003175, iteration: 19176
loss: 0.995188295841217,grad_norm: 0.9077532709955253, iteration: 19177
loss: 1.0296391248703003,grad_norm: 0.9999990263668529, iteration: 19178
loss: 1.0135610103607178,grad_norm: 0.9976324601263705, iteration: 19179
loss: 0.9932918548583984,grad_norm: 0.8985273246106863, iteration: 19180
loss: 1.0358201265335083,grad_norm: 0.9999993275471671, iteration: 19181
loss: 0.9999438524246216,grad_norm: 0.9999989861516461, iteration: 19182
loss: 1.0554537773132324,grad_norm: 0.9999994397680364, iteration: 19183
loss: 1.0002671480178833,grad_norm: 0.9999990066536504, iteration: 19184
loss: 1.0400537252426147,grad_norm: 0.9999990478226907, iteration: 19185
loss: 1.0013536214828491,grad_norm: 0.9999991607070468, iteration: 19186
loss: 0.9983215928077698,grad_norm: 0.9999990154146116, iteration: 19187
loss: 1.0492030382156372,grad_norm: 0.9999990293546904, iteration: 19188
loss: 1.0109758377075195,grad_norm: 0.9999995925250096, iteration: 19189
loss: 1.041358470916748,grad_norm: 0.9999990084465259, iteration: 19190
loss: 1.0128554105758667,grad_norm: 0.9999990175758723, iteration: 19191
loss: 1.0490566492080688,grad_norm: 0.9999991265057366, iteration: 19192
loss: 1.038920283317566,grad_norm: 0.9999990251531669, iteration: 19193
loss: 0.9893245100975037,grad_norm: 0.9999990274712363, iteration: 19194
loss: 1.020065188407898,grad_norm: 0.9844789051630124, iteration: 19195
loss: 1.0215978622436523,grad_norm: 0.9999990674346848, iteration: 19196
loss: 1.0197205543518066,grad_norm: 0.999999379831559, iteration: 19197
loss: 0.9826017022132874,grad_norm: 0.9999993044788895, iteration: 19198
loss: 1.027843713760376,grad_norm: 0.9999996056891008, iteration: 19199
loss: 0.9962813854217529,grad_norm: 0.999999156600744, iteration: 19200
loss: 1.0667229890823364,grad_norm: 0.9999991718323606, iteration: 19201
loss: 1.0435365438461304,grad_norm: 0.9999990979465392, iteration: 19202
loss: 1.000959873199463,grad_norm: 0.9769284035246459, iteration: 19203
loss: 1.029908299446106,grad_norm: 0.9462532139482437, iteration: 19204
loss: 1.022612452507019,grad_norm: 0.99999913984691, iteration: 19205
loss: 0.9993361830711365,grad_norm: 0.963136782903133, iteration: 19206
loss: 1.0203382968902588,grad_norm: 0.9999994536786555, iteration: 19207
loss: 0.9984669089317322,grad_norm: 0.9999991460119484, iteration: 19208
loss: 1.042525053024292,grad_norm: 0.9999991096433299, iteration: 19209
loss: 1.0384016036987305,grad_norm: 0.9999998841448935, iteration: 19210
loss: 1.02689528465271,grad_norm: 0.9999991267249575, iteration: 19211
loss: 1.038845419883728,grad_norm: 0.9999992498688387, iteration: 19212
loss: 1.0005742311477661,grad_norm: 0.9999992908853158, iteration: 19213
loss: 1.0150911808013916,grad_norm: 0.9999991960733654, iteration: 19214
loss: 1.0248464345932007,grad_norm: 0.9717224521080039, iteration: 19215
loss: 1.0223339796066284,grad_norm: 0.9999991303707582, iteration: 19216
loss: 1.028692364692688,grad_norm: 0.9999998032221394, iteration: 19217
loss: 1.0758131742477417,grad_norm: 0.9999992981264313, iteration: 19218
loss: 1.00902259349823,grad_norm: 0.9784154541632808, iteration: 19219
loss: 1.0244011878967285,grad_norm: 0.9999994116032543, iteration: 19220
loss: 0.995135486125946,grad_norm: 0.99999918057895, iteration: 19221
loss: 0.9694210290908813,grad_norm: 0.9526732128975554, iteration: 19222
loss: 1.0269198417663574,grad_norm: 0.9999992265332238, iteration: 19223
loss: 1.0728243589401245,grad_norm: 0.9999997349375748, iteration: 19224
loss: 1.0156352519989014,grad_norm: 0.9999993012838926, iteration: 19225
loss: 1.0205880403518677,grad_norm: 0.9999991223932762, iteration: 19226
loss: 0.9765786528587341,grad_norm: 0.9999991584712468, iteration: 19227
loss: 1.064515233039856,grad_norm: 0.9999991046817176, iteration: 19228
loss: 1.0199553966522217,grad_norm: 0.9586768710599226, iteration: 19229
loss: 1.0067439079284668,grad_norm: 0.999999022348651, iteration: 19230
loss: 1.0002758502960205,grad_norm: 0.9999991360242576, iteration: 19231
loss: 1.0170984268188477,grad_norm: 0.9999993080273931, iteration: 19232
loss: 0.9859596490859985,grad_norm: 0.9584740563016693, iteration: 19233
loss: 1.0343835353851318,grad_norm: 0.9999990735816113, iteration: 19234
loss: 1.0270878076553345,grad_norm: 0.999998986580274, iteration: 19235
loss: 1.0023629665374756,grad_norm: 0.9999989714544288, iteration: 19236
loss: 1.0215622186660767,grad_norm: 0.9365340441077952, iteration: 19237
loss: 1.0132187604904175,grad_norm: 0.9464319959070685, iteration: 19238
loss: 0.9775133728981018,grad_norm: 0.9999992331900951, iteration: 19239
loss: 0.9469031691551208,grad_norm: 0.9999990607652648, iteration: 19240
loss: 1.034233808517456,grad_norm: 0.9999991723333262, iteration: 19241
loss: 0.9685685038566589,grad_norm: 0.9999991089754623, iteration: 19242
loss: 0.9962514042854309,grad_norm: 0.9999991318360995, iteration: 19243
loss: 0.9961779117584229,grad_norm: 0.8967446530495106, iteration: 19244
loss: 1.0132007598876953,grad_norm: 0.9999995266439073, iteration: 19245
loss: 1.0272666215896606,grad_norm: 0.9999991408507021, iteration: 19246
loss: 0.9900074005126953,grad_norm: 0.9999990839354519, iteration: 19247
loss: 1.0314087867736816,grad_norm: 0.9999990332271279, iteration: 19248
loss: 0.985564112663269,grad_norm: 0.9999991185440104, iteration: 19249
loss: 1.0259867906570435,grad_norm: 0.9017533503177853, iteration: 19250
loss: 0.9628263115882874,grad_norm: 0.9999992351999449, iteration: 19251
loss: 1.0462223291397095,grad_norm: 0.9999992566541454, iteration: 19252
loss: 0.9669135212898254,grad_norm: 0.9999990361383934, iteration: 19253
loss: 1.035983920097351,grad_norm: 0.9999991361742249, iteration: 19254
loss: 0.9831196069717407,grad_norm: 0.9999992342192021, iteration: 19255
loss: 1.0136741399765015,grad_norm: 0.9999989459308487, iteration: 19256
loss: 1.0182265043258667,grad_norm: 0.999999107031321, iteration: 19257
loss: 0.9730130434036255,grad_norm: 0.9999993300360911, iteration: 19258
loss: 0.9977982640266418,grad_norm: 0.9755024711883724, iteration: 19259
loss: 1.0205051898956299,grad_norm: 0.9999991387847916, iteration: 19260
loss: 1.0066266059875488,grad_norm: 0.999999110326044, iteration: 19261
loss: 1.017578125,grad_norm: 0.9999991577198175, iteration: 19262
loss: 1.0305055379867554,grad_norm: 0.8731962302992088, iteration: 19263
loss: 1.016719937324524,grad_norm: 0.9999990818713217, iteration: 19264
loss: 1.0068761110305786,grad_norm: 0.8694431262811314, iteration: 19265
loss: 0.9545994997024536,grad_norm: 0.9999991166470993, iteration: 19266
loss: 0.9587141871452332,grad_norm: 0.9999991783654317, iteration: 19267
loss: 1.0471699237823486,grad_norm: 0.9428543510655925, iteration: 19268
loss: 1.0249203443527222,grad_norm: 0.9999996455495056, iteration: 19269
loss: 1.025094747543335,grad_norm: 0.999999411974739, iteration: 19270
loss: 1.068677544593811,grad_norm: 0.9999991362776504, iteration: 19271
loss: 1.0445188283920288,grad_norm: 0.9346953968687777, iteration: 19272
loss: 1.047154188156128,grad_norm: 0.9999992389111484, iteration: 19273
loss: 0.9743903279304504,grad_norm: 0.9999992760696443, iteration: 19274
loss: 0.9802902340888977,grad_norm: 0.9999991413357724, iteration: 19275
loss: 1.0641156435012817,grad_norm: 0.9999991559796132, iteration: 19276
loss: 0.9908003211021423,grad_norm: 0.9999993130016178, iteration: 19277
loss: 0.9911335110664368,grad_norm: 0.9628802057579677, iteration: 19278
loss: 1.0205518007278442,grad_norm: 0.9718711343810044, iteration: 19279
loss: 0.9900063872337341,grad_norm: 0.9999991001281439, iteration: 19280
loss: 1.045698881149292,grad_norm: 0.8949564961523642, iteration: 19281
loss: 0.9873016476631165,grad_norm: 0.7947020026815168, iteration: 19282
loss: 1.0494507551193237,grad_norm: 0.9999990303152239, iteration: 19283
loss: 0.9989200234413147,grad_norm: 0.9999998064393953, iteration: 19284
loss: 1.0328487157821655,grad_norm: 0.9999991336945696, iteration: 19285
loss: 0.9609617590904236,grad_norm: 0.99999926225864, iteration: 19286
loss: 1.019107699394226,grad_norm: 0.999999210095754, iteration: 19287
loss: 0.9799217581748962,grad_norm: 0.9365498414122377, iteration: 19288
loss: 0.9755343794822693,grad_norm: 0.99999913697464, iteration: 19289
loss: 0.9976834058761597,grad_norm: 0.9755183582941249, iteration: 19290
loss: 1.0195093154907227,grad_norm: 0.9999991744405735, iteration: 19291
loss: 1.0487816333770752,grad_norm: 0.9999992186973857, iteration: 19292
loss: 1.0258129835128784,grad_norm: 0.9999991643318316, iteration: 19293
loss: 1.0033246278762817,grad_norm: 0.9999992436824665, iteration: 19294
loss: 1.0027046203613281,grad_norm: 0.9999991000913613, iteration: 19295
loss: 0.9955794811248779,grad_norm: 0.8937251865915384, iteration: 19296
loss: 0.9420841336250305,grad_norm: 0.999999096604699, iteration: 19297
loss: 1.0064949989318848,grad_norm: 0.999999004551092, iteration: 19298
loss: 1.017125129699707,grad_norm: 0.9999990749634553, iteration: 19299
loss: 1.014799952507019,grad_norm: 0.9999993904825337, iteration: 19300
loss: 1.0832668542861938,grad_norm: 0.9999991197351403, iteration: 19301
loss: 1.0323377847671509,grad_norm: 0.999999019848769, iteration: 19302
loss: 1.061181664466858,grad_norm: 0.9999991764126978, iteration: 19303
loss: 1.0179904699325562,grad_norm: 0.9999992092410114, iteration: 19304
loss: 1.0341166257858276,grad_norm: 0.9999996463239962, iteration: 19305
loss: 1.0293893814086914,grad_norm: 0.8237419397187337, iteration: 19306
loss: 1.0557135343551636,grad_norm: 0.9999992705964047, iteration: 19307
loss: 0.9765645265579224,grad_norm: 0.9999991857845709, iteration: 19308
loss: 1.045559287071228,grad_norm: 0.999999471760177, iteration: 19309
loss: 1.032764196395874,grad_norm: 0.9999994623260517, iteration: 19310
loss: 0.9711600542068481,grad_norm: 0.9999990516234212, iteration: 19311
loss: 0.9963569641113281,grad_norm: 0.8644625062128344, iteration: 19312
loss: 0.969896674156189,grad_norm: 0.9999989733765119, iteration: 19313
loss: 1.028083086013794,grad_norm: 0.835191115228722, iteration: 19314
loss: 0.9994968175888062,grad_norm: 0.9116899433603308, iteration: 19315
loss: 0.9825989603996277,grad_norm: 0.9053735707144357, iteration: 19316
loss: 1.0258206129074097,grad_norm: 0.935237574394508, iteration: 19317
loss: 0.9915399551391602,grad_norm: 0.9999990536186255, iteration: 19318
loss: 1.0129114389419556,grad_norm: 0.9545261271603646, iteration: 19319
loss: 1.0315251350402832,grad_norm: 0.9999991340870635, iteration: 19320
loss: 1.0055996179580688,grad_norm: 0.9999991558093946, iteration: 19321
loss: 1.020038366317749,grad_norm: 0.963462181829815, iteration: 19322
loss: 0.9945281744003296,grad_norm: 0.9569109149025011, iteration: 19323
loss: 0.9688202142715454,grad_norm: 0.9999992551323439, iteration: 19324
loss: 1.0062261819839478,grad_norm: 0.9330803422284206, iteration: 19325
loss: 1.0161925554275513,grad_norm: 0.9422267706218335, iteration: 19326
loss: 1.0815900564193726,grad_norm: 0.9999993714321984, iteration: 19327
loss: 0.9829002022743225,grad_norm: 0.99541561075499, iteration: 19328
loss: 1.004256010055542,grad_norm: 0.9999991324407087, iteration: 19329
loss: 1.0178786516189575,grad_norm: 0.8729518123349062, iteration: 19330
loss: 0.9717932343482971,grad_norm: 0.9999990963209195, iteration: 19331
loss: 1.0206929445266724,grad_norm: 0.9999991494054221, iteration: 19332
loss: 1.0112147331237793,grad_norm: 0.9999993894411159, iteration: 19333
loss: 0.9757006168365479,grad_norm: 0.9902004327901777, iteration: 19334
loss: 1.0493671894073486,grad_norm: 0.9999993032728149, iteration: 19335
loss: 1.008873701095581,grad_norm: 0.9289430398514983, iteration: 19336
loss: 1.0425076484680176,grad_norm: 0.9999994427454907, iteration: 19337
loss: 0.9772915840148926,grad_norm: 0.9624285402560285, iteration: 19338
loss: 1.0070784091949463,grad_norm: 0.9999990850391511, iteration: 19339
loss: 1.0320074558258057,grad_norm: 0.9469137981215264, iteration: 19340
loss: 1.059592366218567,grad_norm: 0.9999992867423361, iteration: 19341
loss: 1.0247092247009277,grad_norm: 0.9999996121844048, iteration: 19342
loss: 0.9808701872825623,grad_norm: 0.9999993460945317, iteration: 19343
loss: 0.9912475943565369,grad_norm: 0.99999902811452, iteration: 19344
loss: 0.9708579778671265,grad_norm: 0.9999991835581238, iteration: 19345
loss: 0.9882780313491821,grad_norm: 0.9999990820401572, iteration: 19346
loss: 1.0658676624298096,grad_norm: 0.999999259267588, iteration: 19347
loss: 1.0108102560043335,grad_norm: 0.9999990562550629, iteration: 19348
loss: 0.9658864140510559,grad_norm: 0.9999991080791878, iteration: 19349
loss: 1.0179812908172607,grad_norm: 0.9999991575217746, iteration: 19350
loss: 1.0296716690063477,grad_norm: 0.9999991245864069, iteration: 19351
loss: 1.0006446838378906,grad_norm: 0.9183081938224547, iteration: 19352
loss: 1.0182945728302002,grad_norm: 0.9999993335632293, iteration: 19353
loss: 1.0050877332687378,grad_norm: 0.999999250179689, iteration: 19354
loss: 0.9941796660423279,grad_norm: 0.9999990750132183, iteration: 19355
loss: 1.0681623220443726,grad_norm: 0.9999995363340343, iteration: 19356
loss: 1.0216120481491089,grad_norm: 0.9999990962786636, iteration: 19357
loss: 0.9818411469459534,grad_norm: 0.9726017753380252, iteration: 19358
loss: 0.9632125496864319,grad_norm: 0.977227997698757, iteration: 19359
loss: 0.9919905662536621,grad_norm: 0.9716998455611243, iteration: 19360
loss: 1.0084149837493896,grad_norm: 0.999999179160077, iteration: 19361
loss: 1.0615204572677612,grad_norm: 0.9999991905403345, iteration: 19362
loss: 1.041571021080017,grad_norm: 0.9999991291862599, iteration: 19363
loss: 1.0121692419052124,grad_norm: 0.9999989677238379, iteration: 19364
loss: 1.007599949836731,grad_norm: 0.9999991925480236, iteration: 19365
loss: 1.0765490531921387,grad_norm: 0.9999995104648273, iteration: 19366
loss: 0.9955095648765564,grad_norm: 0.9996045312938006, iteration: 19367
loss: 1.0184531211853027,grad_norm: 0.9999992086781592, iteration: 19368
loss: 1.0272423028945923,grad_norm: 0.9999992514718229, iteration: 19369
loss: 1.0216548442840576,grad_norm: 0.9999992434377987, iteration: 19370
loss: 0.9972385764122009,grad_norm: 0.9999990770450343, iteration: 19371
loss: 1.0359584093093872,grad_norm: 0.9906469902296712, iteration: 19372
loss: 1.0562156438827515,grad_norm: 0.9999993302218865, iteration: 19373
loss: 1.004629135131836,grad_norm: 0.9343848405860393, iteration: 19374
loss: 1.0053719282150269,grad_norm: 0.9140555359427261, iteration: 19375
loss: 1.025214433670044,grad_norm: 0.999999056006972, iteration: 19376
loss: 1.0009105205535889,grad_norm: 0.9650344184113202, iteration: 19377
loss: 1.03437340259552,grad_norm: 0.9817804240587019, iteration: 19378
loss: 1.0425595045089722,grad_norm: 0.9999996111218464, iteration: 19379
loss: 1.0405726432800293,grad_norm: 0.9999994682770685, iteration: 19380
loss: 1.0351868867874146,grad_norm: 0.9999991965904801, iteration: 19381
loss: 1.037096381187439,grad_norm: 0.9999990791091111, iteration: 19382
loss: 1.0219571590423584,grad_norm: 0.9493325274540201, iteration: 19383
loss: 1.030673861503601,grad_norm: 0.9664176573164696, iteration: 19384
loss: 1.0353683233261108,grad_norm: 0.9999993198470694, iteration: 19385
loss: 0.985543429851532,grad_norm: 0.9999990196210925, iteration: 19386
loss: 1.070517659187317,grad_norm: 0.999999279373757, iteration: 19387
loss: 0.9793572425842285,grad_norm: 0.9999993854484528, iteration: 19388
loss: 1.00870943069458,grad_norm: 0.9999991301407569, iteration: 19389
loss: 1.0183364152908325,grad_norm: 0.9999994349985964, iteration: 19390
loss: 1.0227524042129517,grad_norm: 0.9999993689289007, iteration: 19391
loss: 1.0130163431167603,grad_norm: 0.9999990503386718, iteration: 19392
loss: 1.0303394794464111,grad_norm: 0.999999437535792, iteration: 19393
loss: 1.0012273788452148,grad_norm: 0.9999990937414888, iteration: 19394
loss: 1.0371763706207275,grad_norm: 0.9999991101025816, iteration: 19395
loss: 1.0181410312652588,grad_norm: 0.9999992895089549, iteration: 19396
loss: 0.981519341468811,grad_norm: 0.9999991572047827, iteration: 19397
loss: 1.0405761003494263,grad_norm: 0.9999992449125614, iteration: 19398
loss: 1.013805627822876,grad_norm: 0.9999990842568944, iteration: 19399
loss: 1.020709753036499,grad_norm: 0.9697848739922532, iteration: 19400
loss: 1.0204375982284546,grad_norm: 0.9999991578204037, iteration: 19401
loss: 1.001541018486023,grad_norm: 0.9597822719564111, iteration: 19402
loss: 0.9729063510894775,grad_norm: 0.9999991973807956, iteration: 19403
loss: 1.0245692729949951,grad_norm: 0.9999991915060504, iteration: 19404
loss: 1.014210820198059,grad_norm: 0.9999996740734296, iteration: 19405
loss: 0.9895775318145752,grad_norm: 0.9999989873709848, iteration: 19406
loss: 0.9986140131950378,grad_norm: 0.9999991132463785, iteration: 19407
loss: 1.0219861268997192,grad_norm: 0.9999990142936312, iteration: 19408
loss: 1.039319396018982,grad_norm: 0.9544127885358157, iteration: 19409
loss: 0.9825928807258606,grad_norm: 0.9999991989815349, iteration: 19410
loss: 1.0562076568603516,grad_norm: 0.9999990903740634, iteration: 19411
loss: 1.014078140258789,grad_norm: 0.9999990223423534, iteration: 19412
loss: 1.0392482280731201,grad_norm: 0.9886131203103854, iteration: 19413
loss: 1.0060765743255615,grad_norm: 0.9352468363528359, iteration: 19414
loss: 1.014011263847351,grad_norm: 0.9999991809822911, iteration: 19415
loss: 1.0089547634124756,grad_norm: 0.9921833764135216, iteration: 19416
loss: 1.0124258995056152,grad_norm: 0.9999993292481404, iteration: 19417
loss: 0.9727805256843567,grad_norm: 0.9999992113576183, iteration: 19418
loss: 0.9719474911689758,grad_norm: 0.9724706116708012, iteration: 19419
loss: 1.0020575523376465,grad_norm: 0.9135827133469969, iteration: 19420
loss: 1.009779930114746,grad_norm: 0.9999990655968745, iteration: 19421
loss: 1.0452584028244019,grad_norm: 0.8580406853791265, iteration: 19422
loss: 1.0101852416992188,grad_norm: 0.9999991200563845, iteration: 19423
loss: 0.9747945070266724,grad_norm: 0.9999990942208415, iteration: 19424
loss: 1.0312366485595703,grad_norm: 0.999999228110871, iteration: 19425
loss: 1.0900553464889526,grad_norm: 0.9999990738169038, iteration: 19426
loss: 1.0313011407852173,grad_norm: 0.9999990352318276, iteration: 19427
loss: 1.014374852180481,grad_norm: 0.9999990369508153, iteration: 19428
loss: 1.030487298965454,grad_norm: 0.9999992033238361, iteration: 19429
loss: 1.015305519104004,grad_norm: 0.9999991837556423, iteration: 19430
loss: 0.9933151006698608,grad_norm: 0.9999992660190049, iteration: 19431
loss: 1.0222941637039185,grad_norm: 0.9739266847442245, iteration: 19432
loss: 1.0338841676712036,grad_norm: 0.9999992661697165, iteration: 19433
loss: 1.0437034368515015,grad_norm: 0.9349433728098497, iteration: 19434
loss: 1.0017478466033936,grad_norm: 0.9999991215670483, iteration: 19435
loss: 1.0216104984283447,grad_norm: 0.9999990731714974, iteration: 19436
loss: 1.0310031175613403,grad_norm: 0.9999990190759562, iteration: 19437
loss: 1.0502947568893433,grad_norm: 0.9999990459341512, iteration: 19438
loss: 1.0201916694641113,grad_norm: 0.999999105453967, iteration: 19439
loss: 0.9802083373069763,grad_norm: 0.9999990765637939, iteration: 19440
loss: 1.029759407043457,grad_norm: 0.9913335269344185, iteration: 19441
loss: 0.9908962845802307,grad_norm: 0.9876265557601233, iteration: 19442
loss: 1.0294322967529297,grad_norm: 0.9999990781439495, iteration: 19443
loss: 1.0111440420150757,grad_norm: 0.9999996439694032, iteration: 19444
loss: 1.0145224332809448,grad_norm: 0.999999118273976, iteration: 19445
loss: 1.0004839897155762,grad_norm: 0.999999148410203, iteration: 19446
loss: 0.9978524446487427,grad_norm: 0.9999991922560005, iteration: 19447
loss: 0.971014142036438,grad_norm: 0.9999995790381048, iteration: 19448
loss: 1.0191820859909058,grad_norm: 0.9999990640845614, iteration: 19449
loss: 0.9901606440544128,grad_norm: 0.90167771441979, iteration: 19450
loss: 1.0051935911178589,grad_norm: 0.9999996200619314, iteration: 19451
loss: 0.9802680015563965,grad_norm: 0.9507843764406827, iteration: 19452
loss: 1.0446265935897827,grad_norm: 0.9999991088167562, iteration: 19453
loss: 1.0078057050704956,grad_norm: 0.9999992125196767, iteration: 19454
loss: 1.036342978477478,grad_norm: 0.9999991812695378, iteration: 19455
loss: 1.021939754486084,grad_norm: 0.9999990718586259, iteration: 19456
loss: 1.0650042295455933,grad_norm: 0.9999996666026766, iteration: 19457
loss: 0.982690691947937,grad_norm: 0.9999990887510304, iteration: 19458
loss: 1.0282787084579468,grad_norm: 0.9999989019825251, iteration: 19459
loss: 0.9685075879096985,grad_norm: 0.9999990552731794, iteration: 19460
loss: 1.0268981456756592,grad_norm: 0.9999989675540595, iteration: 19461
loss: 1.0485167503356934,grad_norm: 0.9999992276164154, iteration: 19462
loss: 0.9955151677131653,grad_norm: 0.9999989028154459, iteration: 19463
loss: 1.031584620475769,grad_norm: 0.9776874057480711, iteration: 19464
loss: 1.0518269538879395,grad_norm: 0.9999990542189664, iteration: 19465
loss: 1.0098317861557007,grad_norm: 0.9999991767634576, iteration: 19466
loss: 1.010464072227478,grad_norm: 0.9999991911146325, iteration: 19467
loss: 1.1429295539855957,grad_norm: 0.9999996616531918, iteration: 19468
loss: 1.023819923400879,grad_norm: 0.9999991774310788, iteration: 19469
loss: 0.9895420074462891,grad_norm: 0.9999992003775839, iteration: 19470
loss: 0.9999975562095642,grad_norm: 0.9363807229053064, iteration: 19471
loss: 1.0139237642288208,grad_norm: 0.9952772760377797, iteration: 19472
loss: 1.0188599824905396,grad_norm: 0.9698239506738996, iteration: 19473
loss: 0.9513185620307922,grad_norm: 0.9999991515401232, iteration: 19474
loss: 1.0195202827453613,grad_norm: 0.9999992562972466, iteration: 19475
loss: 1.000630259513855,grad_norm: 0.9999990978559749, iteration: 19476
loss: 1.0488553047180176,grad_norm: 0.9999994072547179, iteration: 19477
loss: 1.039031744003296,grad_norm: 0.9999991851590643, iteration: 19478
loss: 1.0354552268981934,grad_norm: 0.9999998324882121, iteration: 19479
loss: 1.0539650917053223,grad_norm: 0.999999308822873, iteration: 19480
loss: 1.017289161682129,grad_norm: 0.9999991064383597, iteration: 19481
loss: 0.9677358269691467,grad_norm: 0.9999991207489866, iteration: 19482
loss: 0.9898853898048401,grad_norm: 0.9999992674591186, iteration: 19483
loss: 1.0687897205352783,grad_norm: 0.9999992808749993, iteration: 19484
loss: 0.9683494567871094,grad_norm: 0.99999930085829, iteration: 19485
loss: 1.0078164339065552,grad_norm: 0.9632708619150597, iteration: 19486
loss: 1.0781649351119995,grad_norm: 0.9999994173759051, iteration: 19487
loss: 1.0168867111206055,grad_norm: 0.9442928720509148, iteration: 19488
loss: 1.0192893743515015,grad_norm: 0.9999992844646383, iteration: 19489
loss: 0.9840339422225952,grad_norm: 0.9999990615656715, iteration: 19490
loss: 1.0639712810516357,grad_norm: 0.999999482893719, iteration: 19491
loss: 1.0008200407028198,grad_norm: 0.9999990151145994, iteration: 19492
loss: 0.9886892437934875,grad_norm: 0.999999237595194, iteration: 19493
loss: 0.998904287815094,grad_norm: 0.9999990928092474, iteration: 19494
loss: 0.9978827834129333,grad_norm: 0.9999990988188926, iteration: 19495
loss: 0.9840732216835022,grad_norm: 0.9999991913175007, iteration: 19496
loss: 1.0162595510482788,grad_norm: 0.9999992871547548, iteration: 19497
loss: 1.0135722160339355,grad_norm: 0.9999993284656037, iteration: 19498
loss: 0.9746003150939941,grad_norm: 0.9999995088320933, iteration: 19499
loss: 1.0506166219711304,grad_norm: 0.9999996876330745, iteration: 19500
loss: 0.9777982831001282,grad_norm: 0.9999991043968134, iteration: 19501
loss: 1.005314826965332,grad_norm: 0.9999991810377966, iteration: 19502
loss: 0.9597530961036682,grad_norm: 0.9999990200920521, iteration: 19503
loss: 1.0185171365737915,grad_norm: 0.9450559919162553, iteration: 19504
loss: 1.019577145576477,grad_norm: 0.9865283521876604, iteration: 19505
loss: 1.001886248588562,grad_norm: 0.9999994074700622, iteration: 19506
loss: 1.0047931671142578,grad_norm: 0.8679126241726777, iteration: 19507
loss: 0.9940412640571594,grad_norm: 0.9999992007986706, iteration: 19508
loss: 0.9713563323020935,grad_norm: 0.9999991815828124, iteration: 19509
loss: 1.0432590246200562,grad_norm: 0.9999993273184542, iteration: 19510
loss: 1.0096116065979004,grad_norm: 0.9291398034511913, iteration: 19511
loss: 0.9927743077278137,grad_norm: 0.9999991573207254, iteration: 19512
loss: 0.9938697218894958,grad_norm: 0.9718979077095456, iteration: 19513
loss: 1.052182674407959,grad_norm: 0.9999990593815524, iteration: 19514
loss: 1.0037100315093994,grad_norm: 0.9999995145883357, iteration: 19515
loss: 1.051835060119629,grad_norm: 0.9999992312008862, iteration: 19516
loss: 1.0566898584365845,grad_norm: 0.9999990774826246, iteration: 19517
loss: 1.0144468545913696,grad_norm: 0.9999991290253663, iteration: 19518
loss: 1.0017304420471191,grad_norm: 0.934417242912757, iteration: 19519
loss: 0.9828078746795654,grad_norm: 0.9999992267070271, iteration: 19520
loss: 1.0798636674880981,grad_norm: 0.9999995606959164, iteration: 19521
loss: 1.0384814739227295,grad_norm: 0.9957239585193274, iteration: 19522
loss: 1.0346721410751343,grad_norm: 0.9999993200464042, iteration: 19523
loss: 0.9710928797721863,grad_norm: 0.9999990332312907, iteration: 19524
loss: 0.9562991857528687,grad_norm: 0.9850657272431589, iteration: 19525
loss: 1.033950924873352,grad_norm: 0.981169893132194, iteration: 19526
loss: 1.040586233139038,grad_norm: 0.999999206065873, iteration: 19527
loss: 1.0163288116455078,grad_norm: 0.9999993971067903, iteration: 19528
loss: 0.976264476776123,grad_norm: 0.9999990971129621, iteration: 19529
loss: 1.032944917678833,grad_norm: 0.9681745689049641, iteration: 19530
loss: 1.0123733282089233,grad_norm: 0.9999993417788885, iteration: 19531
loss: 0.9681410789489746,grad_norm: 0.9999993952462995, iteration: 19532
loss: 0.9643746614456177,grad_norm: 0.9999990410807471, iteration: 19533
loss: 1.062034249305725,grad_norm: 0.9999992395640661, iteration: 19534
loss: 1.0463919639587402,grad_norm: 0.9999995061560664, iteration: 19535
loss: 1.119525671005249,grad_norm: 0.9999992386233891, iteration: 19536
loss: 1.0187751054763794,grad_norm: 0.9999996170659227, iteration: 19537
loss: 1.0417530536651611,grad_norm: 0.9999992463387842, iteration: 19538
loss: 1.0048466920852661,grad_norm: 0.9999992503679315, iteration: 19539
loss: 1.0621912479400635,grad_norm: 0.9999993062371111, iteration: 19540
loss: 1.0364995002746582,grad_norm: 0.9962953829134995, iteration: 19541
loss: 1.0107935667037964,grad_norm: 0.9999993867076046, iteration: 19542
loss: 1.0084151029586792,grad_norm: 0.9999991947729293, iteration: 19543
loss: 1.030947208404541,grad_norm: 0.9999992880163665, iteration: 19544
loss: 1.012523889541626,grad_norm: 0.9999990118701233, iteration: 19545
loss: 0.9847027063369751,grad_norm: 0.9999991108519748, iteration: 19546
loss: 0.997647225856781,grad_norm: 0.9999994098370696, iteration: 19547
loss: 1.0133558511734009,grad_norm: 0.9999993170693717, iteration: 19548
loss: 1.0274882316589355,grad_norm: 0.9999993059881869, iteration: 19549
loss: 1.0031414031982422,grad_norm: 0.999999089454963, iteration: 19550
loss: 0.9937488436698914,grad_norm: 0.9999991349191473, iteration: 19551
loss: 1.0199400186538696,grad_norm: 0.936405083748516, iteration: 19552
loss: 0.9876410961151123,grad_norm: 0.999999240290928, iteration: 19553
loss: 1.0175257921218872,grad_norm: 0.9999991627307367, iteration: 19554
loss: 1.0613536834716797,grad_norm: 0.999999195231178, iteration: 19555
loss: 0.9948490262031555,grad_norm: 0.999999431705788, iteration: 19556
loss: 1.089686632156372,grad_norm: 0.9999997796397335, iteration: 19557
loss: 1.0418312549591064,grad_norm: 0.9999991080797239, iteration: 19558
loss: 1.0132707357406616,grad_norm: 0.8798702948857785, iteration: 19559
loss: 1.0557093620300293,grad_norm: 0.9999995401394514, iteration: 19560
loss: 0.9884809851646423,grad_norm: 0.9999990734285861, iteration: 19561
loss: 1.0025705099105835,grad_norm: 0.9999991057592292, iteration: 19562
loss: 0.9802642464637756,grad_norm: 0.8841298955401846, iteration: 19563
loss: 1.0507017374038696,grad_norm: 0.9585572325060907, iteration: 19564
loss: 1.0102460384368896,grad_norm: 0.9260678248423093, iteration: 19565
loss: 1.017321228981018,grad_norm: 0.999999336916632, iteration: 19566
loss: 0.9821808934211731,grad_norm: 0.9999992756655987, iteration: 19567
loss: 1.0263696908950806,grad_norm: 0.9999991318453, iteration: 19568
loss: 1.004919409751892,grad_norm: 0.9999993522199586, iteration: 19569
loss: 1.0044009685516357,grad_norm: 0.9999990399093553, iteration: 19570
loss: 0.9930226802825928,grad_norm: 0.9036623787402173, iteration: 19571
loss: 1.0324136018753052,grad_norm: 0.9999990069889123, iteration: 19572
loss: 0.9648687243461609,grad_norm: 0.9999992735718745, iteration: 19573
loss: 1.0211243629455566,grad_norm: 0.9999993788569551, iteration: 19574
loss: 0.9851827621459961,grad_norm: 0.9999990778875124, iteration: 19575
loss: 0.9849721789360046,grad_norm: 0.9999995030231684, iteration: 19576
loss: 0.9714753031730652,grad_norm: 0.9437437802365507, iteration: 19577
loss: 1.0109996795654297,grad_norm: 0.999999320420377, iteration: 19578
loss: 1.0250962972640991,grad_norm: 0.9054767758349156, iteration: 19579
loss: 1.0342013835906982,grad_norm: 0.99999903493859, iteration: 19580
loss: 0.964631199836731,grad_norm: 0.99999911856087, iteration: 19581
loss: 0.9915663599967957,grad_norm: 0.9999991262811817, iteration: 19582
loss: 1.0238590240478516,grad_norm: 0.9999991586852784, iteration: 19583
loss: 1.0131152868270874,grad_norm: 0.9999991589335083, iteration: 19584
loss: 1.0136339664459229,grad_norm: 0.8792260305055742, iteration: 19585
loss: 1.00350821018219,grad_norm: 0.9611605715265432, iteration: 19586
loss: 1.0220446586608887,grad_norm: 0.9999992627919883, iteration: 19587
loss: 1.0069360733032227,grad_norm: 0.9999992030779911, iteration: 19588
loss: 1.0560208559036255,grad_norm: 0.9999991628231145, iteration: 19589
loss: 1.0002679824829102,grad_norm: 0.9746390717813311, iteration: 19590
loss: 1.0289287567138672,grad_norm: 0.99999899308037, iteration: 19591
loss: 0.9933494329452515,grad_norm: 0.9378500265314086, iteration: 19592
loss: 1.0360702276229858,grad_norm: 0.9999991661963844, iteration: 19593
loss: 1.0225027799606323,grad_norm: 0.9999997534369288, iteration: 19594
loss: 1.0231612920761108,grad_norm: 0.9999991825532172, iteration: 19595
loss: 0.9885836839675903,grad_norm: 0.9999991743911664, iteration: 19596
loss: 1.0328072309494019,grad_norm: 0.9999992084375491, iteration: 19597
loss: 0.9817959070205688,grad_norm: 0.9999992951777327, iteration: 19598
loss: 1.0393503904342651,grad_norm: 0.9999995255250563, iteration: 19599
loss: 1.039689302444458,grad_norm: 0.9999989502912093, iteration: 19600
loss: 1.0662285089492798,grad_norm: 0.9999992658880095, iteration: 19601
loss: 1.007330298423767,grad_norm: 0.9999990447311872, iteration: 19602
loss: 1.0273646116256714,grad_norm: 0.999998997271545, iteration: 19603
loss: 1.0615607500076294,grad_norm: 0.9999992817968847, iteration: 19604
loss: 1.0167168378829956,grad_norm: 0.921417918414033, iteration: 19605
loss: 1.0367485284805298,grad_norm: 0.9999990302405196, iteration: 19606
loss: 1.0283383131027222,grad_norm: 0.9428979001379993, iteration: 19607
loss: 0.9808472394943237,grad_norm: 0.9999990578376609, iteration: 19608
loss: 0.9916355609893799,grad_norm: 0.99999916235617, iteration: 19609
loss: 1.0306700468063354,grad_norm: 0.9569835013927074, iteration: 19610
loss: 1.0669394731521606,grad_norm: 0.9999991985978819, iteration: 19611
loss: 1.0125685930252075,grad_norm: 0.9999991246455068, iteration: 19612
loss: 1.0149879455566406,grad_norm: 0.9955640654796805, iteration: 19613
loss: 1.0308505296707153,grad_norm: 0.9999996328568417, iteration: 19614
loss: 1.0008076429367065,grad_norm: 0.9999992236088643, iteration: 19615
loss: 0.9726419448852539,grad_norm: 0.9999990840605186, iteration: 19616
loss: 1.0063941478729248,grad_norm: 0.9999989935445688, iteration: 19617
loss: 0.994397759437561,grad_norm: 0.9999991763047793, iteration: 19618
loss: 0.992205798625946,grad_norm: 0.9999990861569339, iteration: 19619
loss: 1.0195906162261963,grad_norm: 0.9999991540181371, iteration: 19620
loss: 1.011136770248413,grad_norm: 0.9551155847970667, iteration: 19621
loss: 0.9824966192245483,grad_norm: 0.9999990881677054, iteration: 19622
loss: 1.0405662059783936,grad_norm: 0.9999991432395443, iteration: 19623
loss: 1.008073329925537,grad_norm: 0.9999992585326762, iteration: 19624
loss: 1.063262939453125,grad_norm: 0.9999995188731347, iteration: 19625
loss: 1.0271883010864258,grad_norm: 0.9999989854685896, iteration: 19626
loss: 1.0560160875320435,grad_norm: 0.9999991200495107, iteration: 19627
loss: 0.9938161969184875,grad_norm: 0.9999991357423124, iteration: 19628
loss: 1.034881591796875,grad_norm: 0.999999109849047, iteration: 19629
loss: 0.9896186590194702,grad_norm: 0.9536004628214891, iteration: 19630
loss: 0.9573302865028381,grad_norm: 0.9899058853960703, iteration: 19631
loss: 1.008085012435913,grad_norm: 0.9999989924503963, iteration: 19632
loss: 1.0028330087661743,grad_norm: 0.9999991780492838, iteration: 19633
loss: 1.0084607601165771,grad_norm: 0.9999991929973011, iteration: 19634
loss: 1.0169507265090942,grad_norm: 0.999999086371159, iteration: 19635
loss: 1.0035828351974487,grad_norm: 0.9938897162938226, iteration: 19636
loss: 1.0277482271194458,grad_norm: 0.999999073430737, iteration: 19637
loss: 0.9961540699005127,grad_norm: 0.9606931799375508, iteration: 19638
loss: 1.02682626247406,grad_norm: 0.9999991993944803, iteration: 19639
loss: 1.0227023363113403,grad_norm: 0.9999990632941594, iteration: 19640
loss: 0.9884142279624939,grad_norm: 0.9053621902758476, iteration: 19641
loss: 1.0286478996276855,grad_norm: 0.9999990560271461, iteration: 19642
loss: 1.022850751876831,grad_norm: 0.9999990932250578, iteration: 19643
loss: 1.0227925777435303,grad_norm: 0.93795681579194, iteration: 19644
loss: 1.0273584127426147,grad_norm: 0.999999014451081, iteration: 19645
loss: 1.030623197555542,grad_norm: 0.9999989443766261, iteration: 19646
loss: 1.031295657157898,grad_norm: 0.9999993425680739, iteration: 19647
loss: 1.0316731929779053,grad_norm: 0.9999992811588088, iteration: 19648
loss: 1.0017623901367188,grad_norm: 0.9999994005432834, iteration: 19649
loss: 1.0156875848770142,grad_norm: 0.9999992239681882, iteration: 19650
loss: 1.002632737159729,grad_norm: 0.9243751176906783, iteration: 19651
loss: 0.9754380583763123,grad_norm: 0.8264967767152771, iteration: 19652
loss: 1.033790111541748,grad_norm: 0.9999991773007146, iteration: 19653
loss: 1.0284866094589233,grad_norm: 0.9999993513665083, iteration: 19654
loss: 0.9871401786804199,grad_norm: 0.9999991748566085, iteration: 19655
loss: 0.9832581281661987,grad_norm: 0.9787754533139812, iteration: 19656
loss: 0.9978561401367188,grad_norm: 0.9999990558036408, iteration: 19657
loss: 1.0385854244232178,grad_norm: 0.9999995848128356, iteration: 19658
loss: 1.0284463167190552,grad_norm: 0.9908648819471856, iteration: 19659
loss: 1.0010876655578613,grad_norm: 0.9999990767602762, iteration: 19660
loss: 1.080102562904358,grad_norm: 0.9999990589201094, iteration: 19661
loss: 1.04762601852417,grad_norm: 0.9999992117965268, iteration: 19662
loss: 1.0126633644104004,grad_norm: 0.8618768708327592, iteration: 19663
loss: 1.0181200504302979,grad_norm: 0.9999994313004273, iteration: 19664
loss: 1.0290045738220215,grad_norm: 0.9999992318946893, iteration: 19665
loss: 1.0394080877304077,grad_norm: 0.9999995740237188, iteration: 19666
loss: 1.0016999244689941,grad_norm: 0.9999992432544661, iteration: 19667
loss: 1.0180054903030396,grad_norm: 0.9999991031970715, iteration: 19668
loss: 1.0122478008270264,grad_norm: 0.9999991140202992, iteration: 19669
loss: 1.0266132354736328,grad_norm: 0.9999990193183991, iteration: 19670
loss: 0.9508760571479797,grad_norm: 0.9852500478786014, iteration: 19671
loss: 1.010582447052002,grad_norm: 0.99541935288217, iteration: 19672
loss: 1.0041605234146118,grad_norm: 0.9582309929905442, iteration: 19673
loss: 1.0347778797149658,grad_norm: 0.9999996732509365, iteration: 19674
loss: 1.0262151956558228,grad_norm: 0.9999992619046388, iteration: 19675
loss: 1.0389466285705566,grad_norm: 0.8844749809548608, iteration: 19676
loss: 1.0294569730758667,grad_norm: 0.8812591105482634, iteration: 19677
loss: 1.0489448308944702,grad_norm: 0.9999994683685116, iteration: 19678
loss: 1.0648244619369507,grad_norm: 0.999999579059012, iteration: 19679
loss: 1.0484585762023926,grad_norm: 0.9999993802541204, iteration: 19680
loss: 0.9979310631752014,grad_norm: 0.9999990581100917, iteration: 19681
loss: 1.00553560256958,grad_norm: 0.9999992524275778, iteration: 19682
loss: 1.0433119535446167,grad_norm: 0.9999992905410292, iteration: 19683
loss: 1.0090620517730713,grad_norm: 0.9999996432247342, iteration: 19684
loss: 1.0004223585128784,grad_norm: 0.9999990660234481, iteration: 19685
loss: 1.0075571537017822,grad_norm: 0.9999990298451448, iteration: 19686
loss: 1.0336604118347168,grad_norm: 0.9999993499418633, iteration: 19687
loss: 1.0310550928115845,grad_norm: 0.9999992473222415, iteration: 19688
loss: 1.0637773275375366,grad_norm: 0.9999993213639439, iteration: 19689
loss: 1.0183051824569702,grad_norm: 0.9999991371771232, iteration: 19690
loss: 1.0057648420333862,grad_norm: 0.8282664003992332, iteration: 19691
loss: 1.0188754796981812,grad_norm: 0.9935161992705343, iteration: 19692
loss: 1.0092041492462158,grad_norm: 0.9999990756252394, iteration: 19693
loss: 1.013292908668518,grad_norm: 0.9637411576834204, iteration: 19694
loss: 1.0311990976333618,grad_norm: 0.9999990742254284, iteration: 19695
loss: 1.0724860429763794,grad_norm: 0.9999991448877535, iteration: 19696
loss: 1.0016298294067383,grad_norm: 0.9999991746442074, iteration: 19697
loss: 0.9695557951927185,grad_norm: 0.9999991764284338, iteration: 19698
loss: 1.0011653900146484,grad_norm: 0.9999991798838427, iteration: 19699
loss: 1.027571439743042,grad_norm: 0.957904745844227, iteration: 19700
loss: 1.0296294689178467,grad_norm: 0.9999990293244564, iteration: 19701
loss: 0.9973646998405457,grad_norm: 0.9999992048018838, iteration: 19702
loss: 1.0405592918395996,grad_norm: 0.9016488057015803, iteration: 19703
loss: 1.0274313688278198,grad_norm: 0.9999994200972314, iteration: 19704
loss: 1.022030234336853,grad_norm: 0.9999990721069606, iteration: 19705
loss: 0.9875101447105408,grad_norm: 0.9270398213042402, iteration: 19706
loss: 1.0322679281234741,grad_norm: 0.9999991048215421, iteration: 19707
loss: 1.0287154912948608,grad_norm: 0.9999991009356718, iteration: 19708
loss: 1.0330926179885864,grad_norm: 0.9999993681429361, iteration: 19709
loss: 0.9717323780059814,grad_norm: 0.9890428861452071, iteration: 19710
loss: 1.0491807460784912,grad_norm: 0.9999997288280142, iteration: 19711
loss: 1.0139169692993164,grad_norm: 0.9787173723658491, iteration: 19712
loss: 1.0252816677093506,grad_norm: 0.9999993078284054, iteration: 19713
loss: 0.992167055606842,grad_norm: 0.9999992494023972, iteration: 19714
loss: 1.0336676836013794,grad_norm: 0.9999991474002732, iteration: 19715
loss: 1.0371265411376953,grad_norm: 0.9999990404788826, iteration: 19716
loss: 1.054866909980774,grad_norm: 0.9999992651556204, iteration: 19717
loss: 1.0535240173339844,grad_norm: 0.9999998119391661, iteration: 19718
loss: 0.9599646329879761,grad_norm: 0.9999991387037334, iteration: 19719
loss: 1.0166943073272705,grad_norm: 0.8657880972240105, iteration: 19720
loss: 1.0253745317459106,grad_norm: 0.9999998455456467, iteration: 19721
loss: 1.083686351776123,grad_norm: 0.9999996539686032, iteration: 19722
loss: 1.028128981590271,grad_norm: 0.999999373798559, iteration: 19723
loss: 1.001164436340332,grad_norm: 0.9999992722246568, iteration: 19724
loss: 1.0074962377548218,grad_norm: 0.976533135426955, iteration: 19725
loss: 1.0557156801223755,grad_norm: 0.9999993713214163, iteration: 19726
loss: 1.009156584739685,grad_norm: 0.9999990887789607, iteration: 19727
loss: 1.0391021966934204,grad_norm: 0.9621745309563309, iteration: 19728
loss: 1.0506048202514648,grad_norm: 0.9999998758567089, iteration: 19729
loss: 0.9843620657920837,grad_norm: 0.8490494992725084, iteration: 19730
loss: 1.0284897089004517,grad_norm: 0.9999994973863182, iteration: 19731
loss: 1.0424373149871826,grad_norm: 0.9999990268159525, iteration: 19732
loss: 1.0246930122375488,grad_norm: 0.9999997452607366, iteration: 19733
loss: 1.0079227685928345,grad_norm: 0.9999992265323481, iteration: 19734
loss: 1.0400041341781616,grad_norm: 0.9953447566037074, iteration: 19735
loss: 0.9855250716209412,grad_norm: 0.8768246268641111, iteration: 19736
loss: 1.0179316997528076,grad_norm: 0.9275867580561142, iteration: 19737
loss: 1.0111793279647827,grad_norm: 0.999999176488571, iteration: 19738
loss: 1.0325809717178345,grad_norm: 0.9999993623778728, iteration: 19739
loss: 1.0079106092453003,grad_norm: 0.8467283604643149, iteration: 19740
loss: 1.1178778409957886,grad_norm: 0.9999999220906123, iteration: 19741
loss: 1.0267624855041504,grad_norm: 0.9999991142220531, iteration: 19742
loss: 0.9979289770126343,grad_norm: 0.9759076392967843, iteration: 19743
loss: 0.9843729734420776,grad_norm: 0.9999991830325068, iteration: 19744
loss: 1.0242373943328857,grad_norm: 0.9999991870165744, iteration: 19745
loss: 1.0223647356033325,grad_norm: 0.9999996666448364, iteration: 19746
loss: 0.9707701206207275,grad_norm: 0.9999992295001338, iteration: 19747
loss: 1.032663345336914,grad_norm: 0.999999202781598, iteration: 19748
loss: 1.0407047271728516,grad_norm: 0.9999996311115915, iteration: 19749
loss: 0.9681828618049622,grad_norm: 0.999998938265041, iteration: 19750
loss: 1.0344487428665161,grad_norm: 0.9999991340974609, iteration: 19751
loss: 1.040269374847412,grad_norm: 0.999999102726231, iteration: 19752
loss: 1.005353569984436,grad_norm: 0.8284833527886084, iteration: 19753
loss: 1.0015631914138794,grad_norm: 0.9999991534287921, iteration: 19754
loss: 1.0712532997131348,grad_norm: 0.999999497701249, iteration: 19755
loss: 1.0329993963241577,grad_norm: 0.99999907849175, iteration: 19756
loss: 1.0342270135879517,grad_norm: 0.999999043123102, iteration: 19757
loss: 1.0308549404144287,grad_norm: 0.9999997766369808, iteration: 19758
loss: 1.031524419784546,grad_norm: 0.9999992130433174, iteration: 19759
loss: 1.0231772661209106,grad_norm: 0.9999992340381744, iteration: 19760
loss: 1.0170025825500488,grad_norm: 0.9999990428118773, iteration: 19761
loss: 1.0733333826065063,grad_norm: 0.9999991709301604, iteration: 19762
loss: 1.0082817077636719,grad_norm: 0.9999991439967351, iteration: 19763
loss: 1.0014160871505737,grad_norm: 0.999999035617821, iteration: 19764
loss: 0.9788057804107666,grad_norm: 0.9999992535463, iteration: 19765
loss: 1.0142728090286255,grad_norm: 0.9999993584422922, iteration: 19766
loss: 1.0376098155975342,grad_norm: 0.9999990770913727, iteration: 19767
loss: 0.9951199889183044,grad_norm: 0.9999992556408741, iteration: 19768
loss: 1.0144699811935425,grad_norm: 0.9999991370074016, iteration: 19769
loss: 0.975230872631073,grad_norm: 0.9999991011147225, iteration: 19770
loss: 1.032880425453186,grad_norm: 0.9999992330758629, iteration: 19771
loss: 0.9734575152397156,grad_norm: 0.99999903979923, iteration: 19772
loss: 1.0455615520477295,grad_norm: 0.9999994813062788, iteration: 19773
loss: 0.973955512046814,grad_norm: 0.8945466700974433, iteration: 19774
loss: 1.0173853635787964,grad_norm: 0.9999990718112014, iteration: 19775
loss: 1.0168299674987793,grad_norm: 0.9999992651755523, iteration: 19776
loss: 1.0172978639602661,grad_norm: 0.9999994361091146, iteration: 19777
loss: 1.007297396659851,grad_norm: 0.9999989827811226, iteration: 19778
loss: 1.0243467092514038,grad_norm: 0.9999992809494133, iteration: 19779
loss: 0.9758080244064331,grad_norm: 0.9999990711953096, iteration: 19780
loss: 0.9903674721717834,grad_norm: 0.9999996730203021, iteration: 19781
loss: 1.0420421361923218,grad_norm: 0.9999991172215128, iteration: 19782
loss: 1.0592674016952515,grad_norm: 0.9999991791254924, iteration: 19783
loss: 1.0661448240280151,grad_norm: 0.9999993543783643, iteration: 19784
loss: 0.9817308187484741,grad_norm: 0.9999991611966152, iteration: 19785
loss: 0.983786940574646,grad_norm: 0.9702575127681563, iteration: 19786
loss: 1.0049177408218384,grad_norm: 0.9999990437314378, iteration: 19787
loss: 0.9974902272224426,grad_norm: 0.9999990820566814, iteration: 19788
loss: 1.0140482187271118,grad_norm: 0.9999989733530155, iteration: 19789
loss: 1.0417888164520264,grad_norm: 0.9999993092219359, iteration: 19790
loss: 1.0439943075180054,grad_norm: 0.999999174139339, iteration: 19791
loss: 1.0038588047027588,grad_norm: 0.8978694884619024, iteration: 19792
loss: 0.9962641596794128,grad_norm: 0.9999991037210892, iteration: 19793
loss: 1.0220335721969604,grad_norm: 0.999999251823132, iteration: 19794
loss: 1.0074126720428467,grad_norm: 0.9999993422606792, iteration: 19795
loss: 0.9872339963912964,grad_norm: 0.9999993893887035, iteration: 19796
loss: 0.9996229410171509,grad_norm: 0.9999991770560211, iteration: 19797
loss: 1.06208336353302,grad_norm: 0.9999992869054363, iteration: 19798
loss: 0.9916549921035767,grad_norm: 0.9946722530001523, iteration: 19799
loss: 0.99078369140625,grad_norm: 0.9999992011933972, iteration: 19800
loss: 0.980068027973175,grad_norm: 0.9696507300752081, iteration: 19801
loss: 0.9495118856430054,grad_norm: 0.9801395715565492, iteration: 19802
loss: 1.006576418876648,grad_norm: 0.9263350171696226, iteration: 19803
loss: 1.0269092321395874,grad_norm: 0.9999989753192317, iteration: 19804
loss: 1.0473397970199585,grad_norm: 0.9999990658797522, iteration: 19805
loss: 1.0404430627822876,grad_norm: 0.9999996827997334, iteration: 19806
loss: 1.0160282850265503,grad_norm: 0.9296765650996598, iteration: 19807
loss: 1.0278769731521606,grad_norm: 0.9999990386403851, iteration: 19808
loss: 1.0489097833633423,grad_norm: 0.999999066318801, iteration: 19809
loss: 0.9826696515083313,grad_norm: 0.9999991274730977, iteration: 19810
loss: 1.0482912063598633,grad_norm: 0.9999998221431929, iteration: 19811
loss: 0.9815632700920105,grad_norm: 0.9999992622501348, iteration: 19812
loss: 1.0387846231460571,grad_norm: 0.9999991229873765, iteration: 19813
loss: 1.0635097026824951,grad_norm: 0.9999993088345573, iteration: 19814
loss: 0.9811116456985474,grad_norm: 0.8930462954792054, iteration: 19815
loss: 1.053911566734314,grad_norm: 0.9999994177072231, iteration: 19816
loss: 1.0268875360488892,grad_norm: 0.9999993526213008, iteration: 19817
loss: 1.028465986251831,grad_norm: 0.9999994476287172, iteration: 19818
loss: 1.0885628461837769,grad_norm: 0.9999993597796009, iteration: 19819
loss: 0.9855051636695862,grad_norm: 0.9937052975228055, iteration: 19820
loss: 1.02057945728302,grad_norm: 0.9999990605144263, iteration: 19821
loss: 1.0322329998016357,grad_norm: 0.9999991585048457, iteration: 19822
loss: 1.0208526849746704,grad_norm: 0.9999991133957478, iteration: 19823
loss: 1.0558992624282837,grad_norm: 0.9999993654764477, iteration: 19824
loss: 1.024813175201416,grad_norm: 0.9999992292274635, iteration: 19825
loss: 1.0187640190124512,grad_norm: 0.9999991356208501, iteration: 19826
loss: 1.0293593406677246,grad_norm: 0.999999098696703, iteration: 19827
loss: 1.0288666486740112,grad_norm: 0.883362251823885, iteration: 19828
loss: 1.0263975858688354,grad_norm: 0.9095653999935186, iteration: 19829
loss: 1.022093653678894,grad_norm: 0.9999998007959119, iteration: 19830
loss: 1.0374433994293213,grad_norm: 0.9999990686697185, iteration: 19831
loss: 1.0363255739212036,grad_norm: 0.9999998005499312, iteration: 19832
loss: 0.9891455769538879,grad_norm: 0.999999192928865, iteration: 19833
loss: 1.025883674621582,grad_norm: 0.9999993132722773, iteration: 19834
loss: 0.9965345859527588,grad_norm: 0.9999991777018604, iteration: 19835
loss: 1.0113859176635742,grad_norm: 0.9155312468427763, iteration: 19836
loss: 1.045535922050476,grad_norm: 0.9999992132215675, iteration: 19837
loss: 1.013124704360962,grad_norm: 0.9999996241853069, iteration: 19838
loss: 1.0124255418777466,grad_norm: 0.9999990985912408, iteration: 19839
loss: 1.010485291481018,grad_norm: 0.9999991361726406, iteration: 19840
loss: 1.007219910621643,grad_norm: 0.9999992384741667, iteration: 19841
loss: 1.0203522443771362,grad_norm: 0.9999992910753426, iteration: 19842
loss: 1.0236985683441162,grad_norm: 0.9999991801205075, iteration: 19843
loss: 1.039723515510559,grad_norm: 0.999999078808444, iteration: 19844
loss: 1.0055104494094849,grad_norm: 0.9999991315890161, iteration: 19845
loss: 0.9997311234474182,grad_norm: 0.8625788557488814, iteration: 19846
loss: 1.0357213020324707,grad_norm: 0.9999990936345374, iteration: 19847
loss: 1.0594332218170166,grad_norm: 0.9999997221949741, iteration: 19848
loss: 0.9697617888450623,grad_norm: 0.9999991195948504, iteration: 19849
loss: 1.0569363832473755,grad_norm: 0.9999994671390392, iteration: 19850
loss: 1.017179250717163,grad_norm: 0.971979005602761, iteration: 19851
loss: 1.0064148902893066,grad_norm: 0.9999991866513853, iteration: 19852
loss: 1.0184721946716309,grad_norm: 0.999999141057119, iteration: 19853
loss: 0.9769411087036133,grad_norm: 0.9999992960165289, iteration: 19854
loss: 1.047608733177185,grad_norm: 0.9999990852242638, iteration: 19855
loss: 1.017784595489502,grad_norm: 0.9999993805971436, iteration: 19856
loss: 0.9811720252037048,grad_norm: 0.9999990891234553, iteration: 19857
loss: 1.0279172658920288,grad_norm: 0.9999991569811977, iteration: 19858
loss: 1.0048191547393799,grad_norm: 0.9999991701230453, iteration: 19859
loss: 0.9816361665725708,grad_norm: 0.9999990686031877, iteration: 19860
loss: 1.0323519706726074,grad_norm: 0.9999991644575643, iteration: 19861
loss: 1.0679142475128174,grad_norm: 0.9999994853382193, iteration: 19862
loss: 1.039345622062683,grad_norm: 0.9999996159439198, iteration: 19863
loss: 0.9859119057655334,grad_norm: 0.9999991104928877, iteration: 19864
loss: 0.9980503916740417,grad_norm: 0.9493801685779014, iteration: 19865
loss: 1.026584267616272,grad_norm: 0.999999101317322, iteration: 19866
loss: 1.031688928604126,grad_norm: 0.9395639021118674, iteration: 19867
loss: 1.0028115510940552,grad_norm: 0.8675738130499867, iteration: 19868
loss: 1.051643967628479,grad_norm: 0.9999991565883307, iteration: 19869
loss: 0.9599994421005249,grad_norm: 0.999999344006792, iteration: 19870
loss: 1.0532456636428833,grad_norm: 0.9999991406822228, iteration: 19871
loss: 0.9834051728248596,grad_norm: 0.9999991971600859, iteration: 19872
loss: 1.0556764602661133,grad_norm: 0.9999990669360156, iteration: 19873
loss: 0.9940251708030701,grad_norm: 0.9999994608961195, iteration: 19874
loss: 1.019903302192688,grad_norm: 0.9999995721311572, iteration: 19875
loss: 1.0111491680145264,grad_norm: 0.9999990462128265, iteration: 19876
loss: 1.0185210704803467,grad_norm: 0.9999991850526173, iteration: 19877
loss: 1.0242770910263062,grad_norm: 0.9999991219846868, iteration: 19878
loss: 0.9970139265060425,grad_norm: 0.9999992238356593, iteration: 19879
loss: 1.0043280124664307,grad_norm: 0.9999991006071156, iteration: 19880
loss: 1.0679212808609009,grad_norm: 0.9999990675949212, iteration: 19881
loss: 1.027220368385315,grad_norm: 0.9999992802116532, iteration: 19882
loss: 1.0433269739151,grad_norm: 0.9804081710891869, iteration: 19883
loss: 0.9562505483627319,grad_norm: 0.9999993517058453, iteration: 19884
loss: 1.0222338438034058,grad_norm: 0.9999993117144345, iteration: 19885
loss: 1.009470820426941,grad_norm: 0.9999993895583641, iteration: 19886
loss: 0.9894918203353882,grad_norm: 0.9999990799403569, iteration: 19887
loss: 1.0182573795318604,grad_norm: 0.9999991632288532, iteration: 19888
loss: 1.0431677103042603,grad_norm: 0.9999991063946309, iteration: 19889
loss: 1.0443952083587646,grad_norm: 0.9999992388575591, iteration: 19890
loss: 1.014290452003479,grad_norm: 0.9999992231851965, iteration: 19891
loss: 0.9702016711235046,grad_norm: 0.9670695588758894, iteration: 19892
loss: 1.0600426197052002,grad_norm: 0.9999991171994481, iteration: 19893
loss: 1.043886423110962,grad_norm: 0.9999993555950701, iteration: 19894
loss: 1.0513871908187866,grad_norm: 0.9338587642524359, iteration: 19895
loss: 1.0090689659118652,grad_norm: 0.9999991787237937, iteration: 19896
loss: 1.0585441589355469,grad_norm: 0.9999994112381536, iteration: 19897
loss: 1.0186998844146729,grad_norm: 0.9999989934898011, iteration: 19898
loss: 1.0547995567321777,grad_norm: 0.9739832823531607, iteration: 19899
loss: 0.9831405878067017,grad_norm: 0.9999991258791529, iteration: 19900
loss: 0.9950644969940186,grad_norm: 0.8973920030213999, iteration: 19901
loss: 1.0100795030593872,grad_norm: 0.9657136769717055, iteration: 19902
loss: 0.9839894771575928,grad_norm: 0.9999991685699262, iteration: 19903
loss: 0.9869391322135925,grad_norm: 0.9999990469048111, iteration: 19904
loss: 1.0475906133651733,grad_norm: 0.9999992816933583, iteration: 19905
loss: 1.0135846138000488,grad_norm: 0.999999209367767, iteration: 19906
loss: 0.9987637400627136,grad_norm: 0.9999991395826935, iteration: 19907
loss: 1.008431315422058,grad_norm: 0.9999990833851704, iteration: 19908
loss: 1.013363003730774,grad_norm: 0.9999993503554382, iteration: 19909
loss: 1.0112528800964355,grad_norm: 0.9205573934294634, iteration: 19910
loss: 1.0019311904907227,grad_norm: 0.9988977407709918, iteration: 19911
loss: 1.0262640714645386,grad_norm: 0.9999992457534732, iteration: 19912
loss: 1.0124974250793457,grad_norm: 0.9999991036349829, iteration: 19913
loss: 1.0158586502075195,grad_norm: 0.9999992650275975, iteration: 19914
loss: 0.9993154406547546,grad_norm: 0.9999992409606855, iteration: 19915
loss: 1.0371191501617432,grad_norm: 0.8582419480811803, iteration: 19916
loss: 1.0041176080703735,grad_norm: 0.9578478369056509, iteration: 19917
loss: 1.028483271598816,grad_norm: 0.9356731291665646, iteration: 19918
loss: 1.0039349794387817,grad_norm: 0.9999990710838497, iteration: 19919
loss: 1.0260933637619019,grad_norm: 0.9999992597141898, iteration: 19920
loss: 1.0164576768875122,grad_norm: 0.9999992870214778, iteration: 19921
loss: 1.0306988954544067,grad_norm: 0.9999990213042972, iteration: 19922
loss: 0.9570654630661011,grad_norm: 0.9999991361041879, iteration: 19923
loss: 1.0719711780548096,grad_norm: 0.9999991676467387, iteration: 19924
loss: 1.0384241342544556,grad_norm: 0.9933885450638952, iteration: 19925
loss: 1.0095961093902588,grad_norm: 0.9999991237130443, iteration: 19926
loss: 1.0230776071548462,grad_norm: 0.9999992223489438, iteration: 19927
loss: 0.9943059086799622,grad_norm: 0.8810835138288073, iteration: 19928
loss: 1.0365859270095825,grad_norm: 0.9999992412768671, iteration: 19929
loss: 1.0212191343307495,grad_norm: 0.8209868313257376, iteration: 19930
loss: 1.004550814628601,grad_norm: 0.9589857959888202, iteration: 19931
loss: 1.0201189517974854,grad_norm: 0.9533866227338488, iteration: 19932
loss: 0.9893466830253601,grad_norm: 0.9999990533953403, iteration: 19933
loss: 1.051835060119629,grad_norm: 0.9081946108262933, iteration: 19934
loss: 1.0240370035171509,grad_norm: 0.9999990186949634, iteration: 19935
loss: 1.0369220972061157,grad_norm: 0.9999998472561457, iteration: 19936
loss: 1.0022773742675781,grad_norm: 0.9792826255118607, iteration: 19937
loss: 0.9950777292251587,grad_norm: 0.9999992591544823, iteration: 19938
loss: 1.0000910758972168,grad_norm: 0.9999991358766471, iteration: 19939
loss: 1.0293781757354736,grad_norm: 0.9160857321799215, iteration: 19940
loss: 0.9962834119796753,grad_norm: 0.9718256547377817, iteration: 19941
loss: 1.0088257789611816,grad_norm: 0.9769065767206296, iteration: 19942
loss: 1.0078392028808594,grad_norm: 0.8908791150241945, iteration: 19943
loss: 1.0312718152999878,grad_norm: 0.9999992334523287, iteration: 19944
loss: 1.013599157333374,grad_norm: 0.862253406385388, iteration: 19945
loss: 1.0215749740600586,grad_norm: 0.8938859409680086, iteration: 19946
loss: 1.0120009183883667,grad_norm: 0.9999990597534334, iteration: 19947
loss: 1.0084104537963867,grad_norm: 0.9999991304015968, iteration: 19948
loss: 0.9943891167640686,grad_norm: 0.999999090273446, iteration: 19949
loss: 1.0123486518859863,grad_norm: 0.9999991798246212, iteration: 19950
loss: 0.9786691069602966,grad_norm: 0.9999990512159737, iteration: 19951
loss: 1.0021929740905762,grad_norm: 0.9999991476700567, iteration: 19952
loss: 0.9905945062637329,grad_norm: 0.9693049807749725, iteration: 19953
loss: 1.0434499979019165,grad_norm: 0.9999991687069366, iteration: 19954
loss: 1.0493957996368408,grad_norm: 0.9999990909375702, iteration: 19955
loss: 1.0410677194595337,grad_norm: 0.9071329554429179, iteration: 19956
loss: 1.029194712638855,grad_norm: 0.950238134126174, iteration: 19957
loss: 1.0170503854751587,grad_norm: 0.9999996638711979, iteration: 19958
loss: 0.9632899165153503,grad_norm: 0.9999990672226899, iteration: 19959
loss: 1.0439517498016357,grad_norm: 0.9317294809159921, iteration: 19960
loss: 1.0490097999572754,grad_norm: 0.9999996065854201, iteration: 19961
loss: 1.0053313970565796,grad_norm: 0.884848389935175, iteration: 19962
loss: 0.9749218821525574,grad_norm: 0.9999990083753623, iteration: 19963
loss: 1.064323902130127,grad_norm: 0.9999992762166416, iteration: 19964
loss: 0.9820156097412109,grad_norm: 0.9999990375192284, iteration: 19965
loss: 0.9618532657623291,grad_norm: 0.999999229543365, iteration: 19966
loss: 0.9811570644378662,grad_norm: 0.9999992961441126, iteration: 19967
loss: 1.0639432668685913,grad_norm: 0.9999998188867586, iteration: 19968
loss: 0.997593343257904,grad_norm: 0.8925621916124308, iteration: 19969
loss: 1.0063596963882446,grad_norm: 0.999999297131847, iteration: 19970
loss: 1.0222856998443604,grad_norm: 0.9999991154290967, iteration: 19971
loss: 1.0149348974227905,grad_norm: 0.9999992640767863, iteration: 19972
loss: 0.9949392080307007,grad_norm: 0.9999991450887186, iteration: 19973
loss: 1.001817226409912,grad_norm: 0.9999991798227738, iteration: 19974
loss: 1.0508922338485718,grad_norm: 0.9999990491782896, iteration: 19975
loss: 0.9918878674507141,grad_norm: 0.9999992481107848, iteration: 19976
loss: 1.011657476425171,grad_norm: 0.9999992378296603, iteration: 19977
loss: 1.0274899005889893,grad_norm: 0.9999993662762111, iteration: 19978
loss: 1.0399532318115234,grad_norm: 0.9999993119604702, iteration: 19979
loss: 1.0408798456192017,grad_norm: 0.999999082907547, iteration: 19980
loss: 1.0020270347595215,grad_norm: 0.9999991762646842, iteration: 19981
loss: 1.0197663307189941,grad_norm: 0.9900890943712121, iteration: 19982
loss: 1.0645954608917236,grad_norm: 0.9999995002960752, iteration: 19983
loss: 1.0129014253616333,grad_norm: 0.999999154888789, iteration: 19984
loss: 1.0628447532653809,grad_norm: 0.9999990444762901, iteration: 19985
loss: 1.0399320125579834,grad_norm: 0.9999992387404091, iteration: 19986
loss: 1.0353184938430786,grad_norm: 0.9999992141912342, iteration: 19987
loss: 1.030683159828186,grad_norm: 0.9999993347295136, iteration: 19988
loss: 1.0645071268081665,grad_norm: 0.9999997526758769, iteration: 19989
loss: 1.0036561489105225,grad_norm: 0.9999992642261469, iteration: 19990
loss: 1.013097882270813,grad_norm: 0.9991969620660308, iteration: 19991
loss: 1.0428138971328735,grad_norm: 0.9999993793509445, iteration: 19992
loss: 0.9999781250953674,grad_norm: 0.9999991032849511, iteration: 19993
loss: 1.0255595445632935,grad_norm: 0.9063346697757253, iteration: 19994
loss: 0.97733473777771,grad_norm: 0.9999992565178429, iteration: 19995
loss: 0.9820348620414734,grad_norm: 0.8755443805443057, iteration: 19996
loss: 1.0158237218856812,grad_norm: 0.9754128212382478, iteration: 19997
loss: 0.9950494766235352,grad_norm: 0.9999991461285942, iteration: 19998
loss: 0.9794001579284668,grad_norm: 0.9999990964096106, iteration: 19999
loss: 1.0154167413711548,grad_norm: 0.9999991747409478, iteration: 20000
Evaluating at step 20000
{'val': 1.0006739925593138, 'test': 2.4286377829865526}
loss: 0.9837443232536316,grad_norm: 0.9999992760846919, iteration: 20001
loss: 1.0122743844985962,grad_norm: 0.9999992882739264, iteration: 20002
loss: 0.9847759008407593,grad_norm: 0.9787432450374831, iteration: 20003
loss: 1.0117579698562622,grad_norm: 0.9742808471868483, iteration: 20004
loss: 1.0059360265731812,grad_norm: 0.9999989966917103, iteration: 20005
loss: 1.002143383026123,grad_norm: 0.9213514487348033, iteration: 20006
loss: 1.0189189910888672,grad_norm: 0.9999992480838523, iteration: 20007
loss: 1.0169248580932617,grad_norm: 0.9999990417351001, iteration: 20008
loss: 1.0105059146881104,grad_norm: 0.9999992599939959, iteration: 20009
loss: 1.0437657833099365,grad_norm: 0.9999991254672109, iteration: 20010
loss: 1.0222299098968506,grad_norm: 0.999999317570364, iteration: 20011
loss: 0.9726769328117371,grad_norm: 0.9999993577448362, iteration: 20012
loss: 1.04081392288208,grad_norm: 0.9999997828436281, iteration: 20013
loss: 1.0171653032302856,grad_norm: 0.982480503329947, iteration: 20014
loss: 0.9674731492996216,grad_norm: 0.9999992381634522, iteration: 20015
loss: 1.0238361358642578,grad_norm: 0.909332704636757, iteration: 20016
loss: 1.0217931270599365,grad_norm: 0.9457916349808779, iteration: 20017
loss: 1.04587984085083,grad_norm: 0.9338236970533227, iteration: 20018
loss: 1.0085746049880981,grad_norm: 0.9999989949142042, iteration: 20019
loss: 0.9901091456413269,grad_norm: 0.9999992894219238, iteration: 20020
loss: 0.9978440403938293,grad_norm: 0.9999994219470013, iteration: 20021
loss: 1.0233930349349976,grad_norm: 0.9999991043803708, iteration: 20022
loss: 1.0283284187316895,grad_norm: 0.970390277621075, iteration: 20023
loss: 0.9556007385253906,grad_norm: 0.9999990557485633, iteration: 20024
loss: 0.9861907958984375,grad_norm: 0.9999990892644495, iteration: 20025
loss: 1.0043946504592896,grad_norm: 0.9999990831384366, iteration: 20026
loss: 1.0275436639785767,grad_norm: 0.9387621435879709, iteration: 20027
loss: 1.0110632181167603,grad_norm: 0.9999992512485969, iteration: 20028
loss: 1.0030741691589355,grad_norm: 0.9999990505057909, iteration: 20029
loss: 1.0269029140472412,grad_norm: 0.9999992059534731, iteration: 20030
loss: 0.9751518368721008,grad_norm: 0.9999991845200739, iteration: 20031
loss: 1.007593035697937,grad_norm: 0.9999992469165093, iteration: 20032
loss: 1.060002088546753,grad_norm: 0.9194493445432933, iteration: 20033
loss: 0.9730009436607361,grad_norm: 0.9324428762004973, iteration: 20034
loss: 1.0476936101913452,grad_norm: 0.9999990625147862, iteration: 20035
loss: 1.024527668952942,grad_norm: 0.9999991354733022, iteration: 20036
loss: 1.045918345451355,grad_norm: 0.9999991192080211, iteration: 20037
loss: 1.0427168607711792,grad_norm: 0.9999990229609393, iteration: 20038
loss: 1.0291767120361328,grad_norm: 0.9999991066142131, iteration: 20039
loss: 1.0270915031433105,grad_norm: 0.9999991814111028, iteration: 20040
loss: 0.9802332520484924,grad_norm: 0.9999990961651226, iteration: 20041
loss: 0.9907442331314087,grad_norm: 0.9999991562235097, iteration: 20042
loss: 1.008064866065979,grad_norm: 0.9999990851624839, iteration: 20043
loss: 1.018993854522705,grad_norm: 0.9999990883514903, iteration: 20044
loss: 1.0147825479507446,grad_norm: 0.9999990776033454, iteration: 20045
loss: 0.9979842901229858,grad_norm: 0.9999991556084589, iteration: 20046
loss: 1.0149179697036743,grad_norm: 0.9691854393281879, iteration: 20047
loss: 1.0373667478561401,grad_norm: 0.940664717473859, iteration: 20048
loss: 0.992199718952179,grad_norm: 0.9402268489066121, iteration: 20049
loss: 0.9927335977554321,grad_norm: 0.9999990089652694, iteration: 20050
loss: 0.9840161204338074,grad_norm: 0.9999991535152504, iteration: 20051
loss: 1.0545036792755127,grad_norm: 0.9404820280437637, iteration: 20052
loss: 1.0602556467056274,grad_norm: 0.9999993559638786, iteration: 20053
loss: 1.0014939308166504,grad_norm: 0.9999990114144772, iteration: 20054
loss: 1.0122169256210327,grad_norm: 0.9999990289803712, iteration: 20055
loss: 1.0222489833831787,grad_norm: 0.9999992313903114, iteration: 20056
loss: 1.010839581489563,grad_norm: 0.999999168104565, iteration: 20057
loss: 1.0043400526046753,grad_norm: 0.9999992946692136, iteration: 20058
loss: 1.0055030584335327,grad_norm: 0.9999989900455509, iteration: 20059
loss: 1.0606112480163574,grad_norm: 0.9999991698657918, iteration: 20060
loss: 1.012255072593689,grad_norm: 0.9999989375179978, iteration: 20061
loss: 1.0267133712768555,grad_norm: 0.9999991089752012, iteration: 20062
loss: 1.0340633392333984,grad_norm: 0.9999993435172455, iteration: 20063
loss: 1.0044399499893188,grad_norm: 0.9605778336768654, iteration: 20064
loss: 1.013196587562561,grad_norm: 0.999999294957956, iteration: 20065
loss: 1.037418007850647,grad_norm: 0.9999991239827518, iteration: 20066
loss: 0.9885671138763428,grad_norm: 0.9999992113492977, iteration: 20067
loss: 1.0402461290359497,grad_norm: 0.9982137102293804, iteration: 20068
loss: 1.052184820175171,grad_norm: 0.9999991054255187, iteration: 20069
loss: 1.0265004634857178,grad_norm: 0.9999990976493233, iteration: 20070
loss: 0.9906570911407471,grad_norm: 0.9999991355645993, iteration: 20071
loss: 1.0363094806671143,grad_norm: 0.9999992952336562, iteration: 20072
loss: 1.0344003438949585,grad_norm: 0.864722683630587, iteration: 20073
loss: 1.0774714946746826,grad_norm: 0.9927493399497546, iteration: 20074
loss: 0.9956696033477783,grad_norm: 0.9999991062283724, iteration: 20075
loss: 1.0291081666946411,grad_norm: 0.9999992543380813, iteration: 20076
loss: 1.0117508172988892,grad_norm: 0.9984652866811422, iteration: 20077
loss: 1.0155689716339111,grad_norm: 0.9999990788278087, iteration: 20078
loss: 1.0124834775924683,grad_norm: 0.9504174998144252, iteration: 20079
loss: 1.0291904211044312,grad_norm: 0.9131048056698229, iteration: 20080
loss: 1.056929588317871,grad_norm: 0.999999127810945, iteration: 20081
loss: 1.00458824634552,grad_norm: 0.8117097687812742, iteration: 20082
loss: 1.021176815032959,grad_norm: 0.9999992555913255, iteration: 20083
loss: 0.9703480005264282,grad_norm: 0.9999992136152386, iteration: 20084
loss: 0.9535560011863708,grad_norm: 0.9999990461544543, iteration: 20085
loss: 1.0380007028579712,grad_norm: 0.9999991905872624, iteration: 20086
loss: 1.0020860433578491,grad_norm: 0.8890114226618322, iteration: 20087
loss: 1.0188870429992676,grad_norm: 0.9999991815280022, iteration: 20088
loss: 1.017139196395874,grad_norm: 0.9999991451727378, iteration: 20089
loss: 1.0295710563659668,grad_norm: 0.9999990683814574, iteration: 20090
loss: 1.041288137435913,grad_norm: 0.9999991254028522, iteration: 20091
loss: 1.0032804012298584,grad_norm: 0.9999992338982966, iteration: 20092
loss: 1.0355618000030518,grad_norm: 0.9155529573812748, iteration: 20093
loss: 0.9934649467468262,grad_norm: 0.8270330341815912, iteration: 20094
loss: 1.008723497390747,grad_norm: 0.9999993251058344, iteration: 20095
loss: 1.0018881559371948,grad_norm: 0.8777601716196084, iteration: 20096
loss: 0.9951903820037842,grad_norm: 0.953188556960893, iteration: 20097
loss: 1.0005476474761963,grad_norm: 0.999999186133122, iteration: 20098
loss: 1.0217854976654053,grad_norm: 0.9999995656959705, iteration: 20099
loss: 1.0183627605438232,grad_norm: 0.9999990244669951, iteration: 20100
loss: 0.9809027314186096,grad_norm: 0.916241468130986, iteration: 20101
loss: 0.9676158428192139,grad_norm: 0.9999992133880216, iteration: 20102
loss: 0.9930762648582458,grad_norm: 0.9630459125299015, iteration: 20103
loss: 1.008893609046936,grad_norm: 0.8721325701561224, iteration: 20104
loss: 0.9912984371185303,grad_norm: 0.9999991067237636, iteration: 20105
loss: 0.9612510204315186,grad_norm: 0.9999992309757588, iteration: 20106
loss: 1.0376968383789062,grad_norm: 0.912104411694479, iteration: 20107
loss: 1.0249981880187988,grad_norm: 0.9999991022845302, iteration: 20108
loss: 1.0322569608688354,grad_norm: 0.9999991017039613, iteration: 20109
loss: 1.0069798231124878,grad_norm: 0.9409529829414337, iteration: 20110
loss: 0.9858382940292358,grad_norm: 0.9999992711972515, iteration: 20111
loss: 0.9704124927520752,grad_norm: 0.9999992100330248, iteration: 20112
loss: 1.0065910816192627,grad_norm: 0.9999991284781636, iteration: 20113
loss: 0.9852855205535889,grad_norm: 0.999999206235704, iteration: 20114
loss: 1.0097498893737793,grad_norm: 0.9999991642048772, iteration: 20115
loss: 0.9975124001502991,grad_norm: 0.8727833634056784, iteration: 20116
loss: 0.9770184755325317,grad_norm: 0.9999991060760622, iteration: 20117
loss: 1.0040761232376099,grad_norm: 0.9999992195826862, iteration: 20118
loss: 1.0332356691360474,grad_norm: 0.9999991452114472, iteration: 20119
loss: 1.015175700187683,grad_norm: 0.9999991457120028, iteration: 20120
loss: 1.0229941606521606,grad_norm: 0.9999992836242373, iteration: 20121
loss: 1.0790979862213135,grad_norm: 0.9999990699660256, iteration: 20122
loss: 1.0126897096633911,grad_norm: 0.9999990760623703, iteration: 20123
loss: 0.9705111384391785,grad_norm: 0.8843129871673826, iteration: 20124
loss: 1.0106130838394165,grad_norm: 0.9881205104230723, iteration: 20125
loss: 1.008219838142395,grad_norm: 0.9999991784319016, iteration: 20126
loss: 0.9914767146110535,grad_norm: 0.9999991497341916, iteration: 20127
loss: 1.0335222482681274,grad_norm: 0.999999240428214, iteration: 20128
loss: 0.9814265370368958,grad_norm: 0.9079555363498356, iteration: 20129
loss: 1.0048667192459106,grad_norm: 0.999999034587799, iteration: 20130
loss: 1.022176742553711,grad_norm: 0.9271812057259157, iteration: 20131
loss: 1.0264298915863037,grad_norm: 0.9999992162137644, iteration: 20132
loss: 1.0248531103134155,grad_norm: 0.9999991940006204, iteration: 20133
loss: 1.0199037790298462,grad_norm: 0.9714586065494472, iteration: 20134
loss: 0.9820132851600647,grad_norm: 0.9999991448323895, iteration: 20135
loss: 1.086818814277649,grad_norm: 0.9999993009220386, iteration: 20136
loss: 1.0240813493728638,grad_norm: 0.9944187504584251, iteration: 20137
loss: 0.9966427087783813,grad_norm: 0.9796759127758145, iteration: 20138
loss: 1.0372235774993896,grad_norm: 0.9999990128136288, iteration: 20139
loss: 1.0270277261734009,grad_norm: 0.9644619935806713, iteration: 20140
loss: 0.9806067943572998,grad_norm: 0.999999093881893, iteration: 20141
loss: 0.9794407486915588,grad_norm: 0.999999040738548, iteration: 20142
loss: 1.0957564115524292,grad_norm: 0.9999995780252634, iteration: 20143
loss: 1.000038743019104,grad_norm: 0.9449347098669894, iteration: 20144
loss: 1.0214625597000122,grad_norm: 0.926267690258341, iteration: 20145
loss: 0.9642150402069092,grad_norm: 0.9999991037498371, iteration: 20146
loss: 1.0134118795394897,grad_norm: 0.9999991850375948, iteration: 20147
loss: 0.9908940196037292,grad_norm: 0.9999991444509158, iteration: 20148
loss: 1.0398560762405396,grad_norm: 0.9999995016169598, iteration: 20149
loss: 0.9929138422012329,grad_norm: 0.9999991960968613, iteration: 20150
loss: 1.0256929397583008,grad_norm: 0.9999991505552615, iteration: 20151
loss: 1.0347692966461182,grad_norm: 0.9999992497550901, iteration: 20152
loss: 1.008290410041809,grad_norm: 0.9999991010833928, iteration: 20153
loss: 1.0070152282714844,grad_norm: 0.9999990731719949, iteration: 20154
loss: 1.0184440612792969,grad_norm: 0.9999991029092248, iteration: 20155
loss: 1.0250306129455566,grad_norm: 0.891791194465892, iteration: 20156
loss: 0.9874541759490967,grad_norm: 0.9999990895885499, iteration: 20157
loss: 1.026397705078125,grad_norm: 0.9642216373773306, iteration: 20158
loss: 1.0419690608978271,grad_norm: 0.9999991605854536, iteration: 20159
loss: 0.9712676405906677,grad_norm: 0.9286026054773524, iteration: 20160
loss: 0.9968085885047913,grad_norm: 0.999998975536499, iteration: 20161
loss: 1.044230580329895,grad_norm: 0.9999990992354043, iteration: 20162
loss: 1.0178622007369995,grad_norm: 0.9999991705262277, iteration: 20163
loss: 1.0181840658187866,grad_norm: 0.9999992888182829, iteration: 20164
loss: 0.9737467765808105,grad_norm: 0.9999993009263215, iteration: 20165
loss: 1.0103782415390015,grad_norm: 0.9999993219400729, iteration: 20166
loss: 0.9788896441459656,grad_norm: 0.999999195235828, iteration: 20167
loss: 1.0330803394317627,grad_norm: 0.9999992594439157, iteration: 20168
loss: 1.0378789901733398,grad_norm: 0.9999991630810544, iteration: 20169
loss: 1.017960548400879,grad_norm: 0.9999990213880884, iteration: 20170
loss: 1.0072382688522339,grad_norm: 0.9600039348245486, iteration: 20171
loss: 1.035319447517395,grad_norm: 0.9999990502945656, iteration: 20172
loss: 0.9730682969093323,grad_norm: 0.9482618100457434, iteration: 20173
loss: 1.0411417484283447,grad_norm: 0.9999991008597874, iteration: 20174
loss: 0.9646898508071899,grad_norm: 0.9545389444574865, iteration: 20175
loss: 1.0100572109222412,grad_norm: 0.9866751433749207, iteration: 20176
loss: 0.9984341263771057,grad_norm: 0.9999990118795973, iteration: 20177
loss: 1.0300532579421997,grad_norm: 0.9999991136791949, iteration: 20178
loss: 1.0360217094421387,grad_norm: 0.9123324710907331, iteration: 20179
loss: 1.0084096193313599,grad_norm: 0.9999991684215077, iteration: 20180
loss: 1.0290755033493042,grad_norm: 0.9999991170378882, iteration: 20181
loss: 1.0336710214614868,grad_norm: 0.9999990172523936, iteration: 20182
loss: 1.0074286460876465,grad_norm: 0.9999991836792734, iteration: 20183
loss: 1.014209508895874,grad_norm: 0.9999992681971698, iteration: 20184
loss: 1.015931487083435,grad_norm: 0.9633074564047341, iteration: 20185
loss: 1.0086829662322998,grad_norm: 0.9290305606440301, iteration: 20186
loss: 1.0496965646743774,grad_norm: 0.9999992935294346, iteration: 20187
loss: 0.992838442325592,grad_norm: 0.9999990983952273, iteration: 20188
loss: 1.034855842590332,grad_norm: 0.9999990094991033, iteration: 20189
loss: 1.0169157981872559,grad_norm: 0.9292995681886487, iteration: 20190
loss: 1.011572003364563,grad_norm: 0.999999491727255, iteration: 20191
loss: 0.9923862814903259,grad_norm: 0.9999990179843425, iteration: 20192
loss: 1.033102035522461,grad_norm: 0.9490227310974235, iteration: 20193
loss: 1.0128426551818848,grad_norm: 0.8580294466599613, iteration: 20194
loss: 1.0055111646652222,grad_norm: 0.9215807983338913, iteration: 20195
loss: 1.0117321014404297,grad_norm: 0.9999990420169287, iteration: 20196
loss: 1.0025871992111206,grad_norm: 0.9999990354129805, iteration: 20197
loss: 1.0474568605422974,grad_norm: 0.9999991363042436, iteration: 20198
loss: 0.9717492461204529,grad_norm: 0.999999123351796, iteration: 20199
loss: 1.0254746675491333,grad_norm: 0.9310367188279329, iteration: 20200
loss: 1.0418483018875122,grad_norm: 0.9613666319259602, iteration: 20201
loss: 1.0068005323410034,grad_norm: 0.9999991474288842, iteration: 20202
loss: 1.039901614189148,grad_norm: 0.999999054217529, iteration: 20203
loss: 0.9615929126739502,grad_norm: 0.9999990921898849, iteration: 20204
loss: 0.992652416229248,grad_norm: 0.9420861744580743, iteration: 20205
loss: 1.0646677017211914,grad_norm: 0.9999997208294785, iteration: 20206
loss: 0.996997058391571,grad_norm: 0.8421950340878298, iteration: 20207
loss: 1.0509439706802368,grad_norm: 0.9999990356438265, iteration: 20208
loss: 1.0133178234100342,grad_norm: 0.9999991032741249, iteration: 20209
loss: 1.0391799211502075,grad_norm: 0.9999990231643918, iteration: 20210
loss: 0.984479546546936,grad_norm: 0.9582908230862377, iteration: 20211
loss: 1.018316626548767,grad_norm: 0.9283289087067167, iteration: 20212
loss: 1.0378761291503906,grad_norm: 0.9999990986349467, iteration: 20213
loss: 1.0103124380111694,grad_norm: 0.8833597480654273, iteration: 20214
loss: 0.9982780814170837,grad_norm: 0.9999995931661095, iteration: 20215
loss: 1.045616865158081,grad_norm: 0.9999991608539228, iteration: 20216
loss: 0.9999542832374573,grad_norm: 0.999999100499064, iteration: 20217
loss: 0.9821762442588806,grad_norm: 0.9999991514802367, iteration: 20218
loss: 1.049340009689331,grad_norm: 0.9999991605420177, iteration: 20219
loss: 0.9846727252006531,grad_norm: 0.9999990557846193, iteration: 20220
loss: 1.0017136335372925,grad_norm: 0.9999990977461408, iteration: 20221
loss: 1.0019031763076782,grad_norm: 0.9999991075620189, iteration: 20222
loss: 1.039852499961853,grad_norm: 0.9999997869551109, iteration: 20223
loss: 1.0022590160369873,grad_norm: 0.9999990912849052, iteration: 20224
loss: 1.0228781700134277,grad_norm: 0.9999992339193292, iteration: 20225
loss: 1.0102622509002686,grad_norm: 0.9554828432534461, iteration: 20226
loss: 0.9999367594718933,grad_norm: 0.9999991620415044, iteration: 20227
loss: 0.9769290685653687,grad_norm: 0.9999994379684571, iteration: 20228
loss: 1.012919545173645,grad_norm: 0.9043496376495858, iteration: 20229
loss: 1.041335105895996,grad_norm: 0.9999991713678303, iteration: 20230
loss: 1.0514365434646606,grad_norm: 0.9999990812168528, iteration: 20231
loss: 0.9957177042961121,grad_norm: 0.9999991972544247, iteration: 20232
loss: 1.0633927583694458,grad_norm: 0.9999990263958385, iteration: 20233
loss: 1.0288975238800049,grad_norm: 0.9999990226610368, iteration: 20234
loss: 1.0494967699050903,grad_norm: 0.999999765195643, iteration: 20235
loss: 1.0275083780288696,grad_norm: 0.9999991856688579, iteration: 20236
loss: 1.0192844867706299,grad_norm: 0.8691943912551624, iteration: 20237
loss: 0.9999716877937317,grad_norm: 0.9199964003679411, iteration: 20238
loss: 0.9579821825027466,grad_norm: 0.999999087378474, iteration: 20239
loss: 1.0581858158111572,grad_norm: 0.9999993118146837, iteration: 20240
loss: 1.040995717048645,grad_norm: 0.999999476422324, iteration: 20241
loss: 0.9894983768463135,grad_norm: 0.9999990491474288, iteration: 20242
loss: 1.0037096738815308,grad_norm: 0.9490669548370324, iteration: 20243
loss: 1.0215468406677246,grad_norm: 0.9600667359987369, iteration: 20244
loss: 0.9981966614723206,grad_norm: 0.9512687318416017, iteration: 20245
loss: 0.9963843822479248,grad_norm: 0.8517191775832357, iteration: 20246
loss: 0.9836545586585999,grad_norm: 0.9394488586809682, iteration: 20247
loss: 1.04153573513031,grad_norm: 0.9999994613067117, iteration: 20248
loss: 1.0068973302841187,grad_norm: 0.9891649591476199, iteration: 20249
loss: 1.036037564277649,grad_norm: 0.9999991670887515, iteration: 20250
loss: 1.0182517766952515,grad_norm: 0.9999991287151516, iteration: 20251
loss: 1.006003975868225,grad_norm: 0.8884340569554551, iteration: 20252
loss: 1.0089802742004395,grad_norm: 0.9461981514913946, iteration: 20253
loss: 0.9923676252365112,grad_norm: 0.9875488982750068, iteration: 20254
loss: 0.996068000793457,grad_norm: 0.9999993007157435, iteration: 20255
loss: 1.0188806056976318,grad_norm: 0.9999990175396075, iteration: 20256
loss: 0.9691128134727478,grad_norm: 0.8517062904153425, iteration: 20257
loss: 1.0137404203414917,grad_norm: 0.9999992169989057, iteration: 20258
loss: 0.9998935461044312,grad_norm: 0.9999993880286836, iteration: 20259
loss: 0.9846109747886658,grad_norm: 0.946559401495046, iteration: 20260
loss: 0.9994941353797913,grad_norm: 0.9999991168141731, iteration: 20261
loss: 0.9908014535903931,grad_norm: 0.99999906776123, iteration: 20262
loss: 1.035494089126587,grad_norm: 0.9999993251925485, iteration: 20263
loss: 1.0060522556304932,grad_norm: 0.9366901148048346, iteration: 20264
loss: 0.9767154455184937,grad_norm: 0.9999992129388282, iteration: 20265
loss: 0.983126699924469,grad_norm: 0.8404248501503294, iteration: 20266
loss: 0.9730671048164368,grad_norm: 0.9200535834945495, iteration: 20267
loss: 0.9610026478767395,grad_norm: 0.9999991127214166, iteration: 20268
loss: 0.9786161780357361,grad_norm: 0.9999992833501036, iteration: 20269
loss: 1.0323286056518555,grad_norm: 0.9999998073831542, iteration: 20270
loss: 1.0027464628219604,grad_norm: 0.9907762252879837, iteration: 20271
loss: 1.017065167427063,grad_norm: 0.9999992976203196, iteration: 20272
loss: 0.9801930785179138,grad_norm: 0.8353147119346223, iteration: 20273
loss: 0.9732317328453064,grad_norm: 0.8950748371987735, iteration: 20274
loss: 1.0575971603393555,grad_norm: 0.999999431005323, iteration: 20275
loss: 0.9823798537254333,grad_norm: 0.9999990338509087, iteration: 20276
loss: 0.9890228509902954,grad_norm: 0.8642268437391161, iteration: 20277
loss: 1.0250071287155151,grad_norm: 0.9999992994579876, iteration: 20278
loss: 1.0265979766845703,grad_norm: 0.9630331901448829, iteration: 20279
loss: 1.0148035287857056,grad_norm: 0.9999992051464787, iteration: 20280
loss: 0.984669029712677,grad_norm: 0.9999992031376032, iteration: 20281
loss: 1.0014487504959106,grad_norm: 0.9619319130569276, iteration: 20282
loss: 1.0347979068756104,grad_norm: 0.9224375193020994, iteration: 20283
loss: 1.003132700920105,grad_norm: 0.9999991069728736, iteration: 20284
loss: 1.0192242860794067,grad_norm: 0.9999990526695288, iteration: 20285
loss: 1.012007474899292,grad_norm: 0.9999992052614971, iteration: 20286
loss: 1.0430457592010498,grad_norm: 0.9999991623194342, iteration: 20287
loss: 1.015379786491394,grad_norm: 0.9999991198007492, iteration: 20288
loss: 0.974831759929657,grad_norm: 0.9999990534890644, iteration: 20289
loss: 0.9533815979957581,grad_norm: 0.9999990127895201, iteration: 20290
loss: 1.0422602891921997,grad_norm: 0.9999995000543953, iteration: 20291
loss: 0.9692900776863098,grad_norm: 0.9999991218815721, iteration: 20292
loss: 1.024847149848938,grad_norm: 0.9999991548570865, iteration: 20293
loss: 1.033051609992981,grad_norm: 0.9999991569145856, iteration: 20294
loss: 1.0027167797088623,grad_norm: 0.9867784453988443, iteration: 20295
loss: 0.9981515407562256,grad_norm: 0.9999992119519024, iteration: 20296
loss: 1.013893723487854,grad_norm: 0.9999992908037609, iteration: 20297
loss: 0.9801459312438965,grad_norm: 0.9999991438629837, iteration: 20298
loss: 1.0182244777679443,grad_norm: 0.999999189231293, iteration: 20299
loss: 1.000783085823059,grad_norm: 0.99999920429309, iteration: 20300
loss: 0.9927746057510376,grad_norm: 0.9999992639870425, iteration: 20301
loss: 1.0240647792816162,grad_norm: 0.999999410579818, iteration: 20302
loss: 1.0832924842834473,grad_norm: 0.9999996760802493, iteration: 20303
loss: 1.0171191692352295,grad_norm: 0.9520934235380243, iteration: 20304
loss: 1.0290669202804565,grad_norm: 0.9999995075577612, iteration: 20305
loss: 1.0507254600524902,grad_norm: 0.9191321762079288, iteration: 20306
loss: 1.0348409414291382,grad_norm: 0.9999994272454586, iteration: 20307
loss: 1.0271245241165161,grad_norm: 0.851503996218792, iteration: 20308
loss: 1.0483167171478271,grad_norm: 0.9759142007348244, iteration: 20309
loss: 0.9813817143440247,grad_norm: 0.9747592478402894, iteration: 20310
loss: 1.0028808116912842,grad_norm: 0.9999990101861805, iteration: 20311
loss: 1.0524699687957764,grad_norm: 0.9999992828123663, iteration: 20312
loss: 1.0265660285949707,grad_norm: 0.9212967083367771, iteration: 20313
loss: 1.0275766849517822,grad_norm: 0.9999993149314487, iteration: 20314
loss: 0.984574019908905,grad_norm: 0.999999038249065, iteration: 20315
loss: 1.038806676864624,grad_norm: 0.9999992611817473, iteration: 20316
loss: 1.0402902364730835,grad_norm: 0.9999991570942105, iteration: 20317
loss: 1.0105692148208618,grad_norm: 0.9331807735462161, iteration: 20318
loss: 1.0518213510513306,grad_norm: 0.9999994723091896, iteration: 20319
loss: 1.00031316280365,grad_norm: 0.9294015393829197, iteration: 20320
loss: 1.003163456916809,grad_norm: 0.9808626551103489, iteration: 20321
loss: 1.0673201084136963,grad_norm: 0.9999993769645191, iteration: 20322
loss: 1.0116978883743286,grad_norm: 0.9567053185797686, iteration: 20323
loss: 0.9848135709762573,grad_norm: 0.9999991540945926, iteration: 20324
loss: 1.0063319206237793,grad_norm: 0.9419558090919102, iteration: 20325
loss: 1.0152451992034912,grad_norm: 0.9596152035380682, iteration: 20326
loss: 1.009207010269165,grad_norm: 0.9999992250125086, iteration: 20327
loss: 1.0001087188720703,grad_norm: 0.9999992324019227, iteration: 20328
loss: 1.031303882598877,grad_norm: 0.9999991999407516, iteration: 20329
loss: 0.9898976683616638,grad_norm: 0.9603581703288098, iteration: 20330
loss: 1.0476330518722534,grad_norm: 0.999999334665677, iteration: 20331
loss: 0.9979822039604187,grad_norm: 0.9938365874636038, iteration: 20332
loss: 1.02191960811615,grad_norm: 0.99999909808104, iteration: 20333
loss: 1.0135555267333984,grad_norm: 0.9999991085174714, iteration: 20334
loss: 0.9932953119277954,grad_norm: 0.9999991740769223, iteration: 20335
loss: 1.040626883506775,grad_norm: 0.9400806417858741, iteration: 20336
loss: 1.0258703231811523,grad_norm: 0.9673501200946556, iteration: 20337
loss: 1.0447118282318115,grad_norm: 0.9999992682377762, iteration: 20338
loss: 1.013195514678955,grad_norm: 0.9999989782028168, iteration: 20339
loss: 0.981235921382904,grad_norm: 0.9999991248993947, iteration: 20340
loss: 0.9939901828765869,grad_norm: 0.9617052158249108, iteration: 20341
loss: 1.0447481870651245,grad_norm: 0.9999991926789099, iteration: 20342
loss: 1.0237306356430054,grad_norm: 0.8919784378084323, iteration: 20343
loss: 0.9992996454238892,grad_norm: 0.9999994945662216, iteration: 20344
loss: 1.0051432847976685,grad_norm: 0.9999990353671364, iteration: 20345
loss: 1.015139102935791,grad_norm: 0.9999990359839996, iteration: 20346
loss: 1.0290027856826782,grad_norm: 0.882902767809382, iteration: 20347
loss: 1.0101633071899414,grad_norm: 0.9999991159205516, iteration: 20348
loss: 1.0477843284606934,grad_norm: 0.9999992810347039, iteration: 20349
loss: 1.0199432373046875,grad_norm: 0.9999994489271851, iteration: 20350
loss: 1.1219615936279297,grad_norm: 0.9999996712953846, iteration: 20351
loss: 1.0323214530944824,grad_norm: 0.9999991540763328, iteration: 20352
loss: 0.9823694825172424,grad_norm: 0.9901224102005901, iteration: 20353
loss: 1.011149525642395,grad_norm: 0.9999991174368681, iteration: 20354
loss: 0.9980918169021606,grad_norm: 0.9999989594449249, iteration: 20355
loss: 1.0370278358459473,grad_norm: 0.9999994116880738, iteration: 20356
loss: 1.0199922323226929,grad_norm: 0.9500715681380367, iteration: 20357
loss: 1.0840343236923218,grad_norm: 0.9999997529084936, iteration: 20358
loss: 1.0203510522842407,grad_norm: 0.9999994225670437, iteration: 20359
loss: 1.010660171508789,grad_norm: 0.8913125979774851, iteration: 20360
loss: 1.0030843019485474,grad_norm: 0.9999994210911446, iteration: 20361
loss: 1.0373061895370483,grad_norm: 0.9999993881724163, iteration: 20362
loss: 0.9820342063903809,grad_norm: 0.9999992386995556, iteration: 20363
loss: 0.9985765218734741,grad_norm: 0.9999994231852032, iteration: 20364
loss: 0.9855290651321411,grad_norm: 0.9999991771659317, iteration: 20365
loss: 1.034040093421936,grad_norm: 0.9999992686321767, iteration: 20366
loss: 0.9794872403144836,grad_norm: 0.9999990784126545, iteration: 20367
loss: 1.0143954753875732,grad_norm: 0.9999990912257202, iteration: 20368
loss: 1.0509296655654907,grad_norm: 0.9772800791719871, iteration: 20369
loss: 1.0588759183883667,grad_norm: 0.9999992844089418, iteration: 20370
loss: 1.0355240106582642,grad_norm: 0.9999996784685495, iteration: 20371
loss: 0.9945589900016785,grad_norm: 0.9725007232443148, iteration: 20372
loss: 0.9780499339103699,grad_norm: 0.9999991836491624, iteration: 20373
loss: 0.9993746280670166,grad_norm: 0.9816369526343595, iteration: 20374
loss: 1.0227481126785278,grad_norm: 0.9999991649347901, iteration: 20375
loss: 1.0144131183624268,grad_norm: 0.9223844730972988, iteration: 20376
loss: 1.022767424583435,grad_norm: 0.8616489730737474, iteration: 20377
loss: 0.9983797669410706,grad_norm: 0.9999992792209454, iteration: 20378
loss: 1.0122339725494385,grad_norm: 0.9999992034618198, iteration: 20379
loss: 1.041146993637085,grad_norm: 0.9999990786231393, iteration: 20380
loss: 1.0242995023727417,grad_norm: 0.9999991383522813, iteration: 20381
loss: 1.017189383506775,grad_norm: 0.9999991615591088, iteration: 20382
loss: 1.0192245244979858,grad_norm: 0.9999991061994254, iteration: 20383
loss: 1.0641831159591675,grad_norm: 0.997833467905832, iteration: 20384
loss: 1.0028376579284668,grad_norm: 0.9475736620462331, iteration: 20385
loss: 1.014802098274231,grad_norm: 0.999999822568753, iteration: 20386
loss: 1.0380380153656006,grad_norm: 0.9355920864921995, iteration: 20387
loss: 1.0194882154464722,grad_norm: 0.9999992324350135, iteration: 20388
loss: 1.0047554969787598,grad_norm: 0.999998953391614, iteration: 20389
loss: 1.0220754146575928,grad_norm: 0.9999992222578749, iteration: 20390
loss: 1.0250424146652222,grad_norm: 0.9999991246460532, iteration: 20391
loss: 1.0245763063430786,grad_norm: 0.9999991847150063, iteration: 20392
loss: 1.0092673301696777,grad_norm: 0.9999990821835016, iteration: 20393
loss: 1.048163890838623,grad_norm: 0.9999990448527085, iteration: 20394
loss: 1.0036426782608032,grad_norm: 0.9999990934782811, iteration: 20395
loss: 0.9983828067779541,grad_norm: 0.9999991266628697, iteration: 20396
loss: 1.0125126838684082,grad_norm: 0.9377815037566443, iteration: 20397
loss: 1.0573816299438477,grad_norm: 0.9999991530113509, iteration: 20398
loss: 0.9924893975257874,grad_norm: 0.9999992967825703, iteration: 20399
loss: 1.0219310522079468,grad_norm: 0.9999992127303037, iteration: 20400
loss: 1.006941318511963,grad_norm: 0.9999990312226666, iteration: 20401
loss: 1.0084428787231445,grad_norm: 0.9999990221034762, iteration: 20402
loss: 0.991596519947052,grad_norm: 0.948478418611418, iteration: 20403
loss: 1.0104072093963623,grad_norm: 0.9582839231345253, iteration: 20404
loss: 1.042889952659607,grad_norm: 0.9999996792058202, iteration: 20405
loss: 1.0070860385894775,grad_norm: 0.9999993808775843, iteration: 20406
loss: 1.006906509399414,grad_norm: 0.9999990406135378, iteration: 20407
loss: 1.0101916790008545,grad_norm: 0.9999991231761091, iteration: 20408
loss: 1.03473699092865,grad_norm: 0.9999994132195891, iteration: 20409
loss: 1.0354111194610596,grad_norm: 0.9999992419400847, iteration: 20410
loss: 1.0292811393737793,grad_norm: 0.9210290548717708, iteration: 20411
loss: 0.9982953071594238,grad_norm: 0.8526306640577028, iteration: 20412
loss: 1.0226349830627441,grad_norm: 0.9999993053830993, iteration: 20413
loss: 0.9961068630218506,grad_norm: 0.9999990753507254, iteration: 20414
loss: 0.9896437525749207,grad_norm: 0.999999255198334, iteration: 20415
loss: 1.0050028562545776,grad_norm: 0.9999990331395423, iteration: 20416
loss: 1.0099120140075684,grad_norm: 0.8689029322344862, iteration: 20417
loss: 1.0246342420578003,grad_norm: 0.9580602745620437, iteration: 20418
loss: 1.0232642889022827,grad_norm: 0.9847560386411841, iteration: 20419
loss: 1.0335856676101685,grad_norm: 0.9999991603020071, iteration: 20420
loss: 1.0050181150436401,grad_norm: 0.9999991058404296, iteration: 20421
loss: 1.0275248289108276,grad_norm: 0.9999991816555416, iteration: 20422
loss: 0.9757856726646423,grad_norm: 0.9669682510552619, iteration: 20423
loss: 1.0163668394088745,grad_norm: 0.9999989748013687, iteration: 20424
loss: 1.0409979820251465,grad_norm: 0.9999990302487249, iteration: 20425
loss: 1.0051323175430298,grad_norm: 0.903372068546834, iteration: 20426
loss: 1.0197633504867554,grad_norm: 0.9999990918518898, iteration: 20427
loss: 0.9697131514549255,grad_norm: 0.9445921768567871, iteration: 20428
loss: 1.0580517053604126,grad_norm: 0.9999990729809382, iteration: 20429
loss: 0.9911626577377319,grad_norm: 0.9687832754601267, iteration: 20430
loss: 1.0436172485351562,grad_norm: 0.9999993130558905, iteration: 20431
loss: 1.035377025604248,grad_norm: 0.9999989745369607, iteration: 20432
loss: 1.0374424457550049,grad_norm: 0.9999995671096331, iteration: 20433
loss: 1.026491641998291,grad_norm: 0.9999992112571444, iteration: 20434
loss: 1.0352903604507446,grad_norm: 0.9999996274255941, iteration: 20435
loss: 1.0084600448608398,grad_norm: 0.9999989631588366, iteration: 20436
loss: 1.0040279626846313,grad_norm: 0.8779779686469418, iteration: 20437
loss: 1.0246118307113647,grad_norm: 0.9999992206191471, iteration: 20438
loss: 1.0272068977355957,grad_norm: 0.9999993053781252, iteration: 20439
loss: 1.0238782167434692,grad_norm: 0.943286781215975, iteration: 20440
loss: 0.9880367517471313,grad_norm: 0.999999263599241, iteration: 20441
loss: 0.9858009815216064,grad_norm: 0.9331654301343327, iteration: 20442
loss: 1.0060800313949585,grad_norm: 0.9999994057651165, iteration: 20443
loss: 1.0101608037948608,grad_norm: 0.9999992942258846, iteration: 20444
loss: 1.025316834449768,grad_norm: 0.9999990994478278, iteration: 20445
loss: 1.039990782737732,grad_norm: 0.9999994274692401, iteration: 20446
loss: 1.0206259489059448,grad_norm: 0.9999990103810935, iteration: 20447
loss: 1.0113232135772705,grad_norm: 0.9999994747327907, iteration: 20448
loss: 1.0181015729904175,grad_norm: 0.9999990500772343, iteration: 20449
loss: 1.0118685960769653,grad_norm: 0.9789775079758947, iteration: 20450
loss: 1.040104866027832,grad_norm: 0.9999991501659845, iteration: 20451
loss: 1.0338287353515625,grad_norm: 0.9999990354581172, iteration: 20452
loss: 1.0176717042922974,grad_norm: 0.999999192782223, iteration: 20453
loss: 1.0455220937728882,grad_norm: 0.9999991687179565, iteration: 20454
loss: 0.9847143888473511,grad_norm: 0.9150481235875045, iteration: 20455
loss: 1.0176869630813599,grad_norm: 0.999999064239269, iteration: 20456
loss: 1.0135080814361572,grad_norm: 0.9999991717611979, iteration: 20457
loss: 1.0374046564102173,grad_norm: 0.9999996334808148, iteration: 20458
loss: 1.0113964080810547,grad_norm: 0.9907893177533513, iteration: 20459
loss: 1.0286438465118408,grad_norm: 0.9299170607880171, iteration: 20460
loss: 0.9957307577133179,grad_norm: 0.9339873247559056, iteration: 20461
loss: 0.9836734533309937,grad_norm: 0.9999991111188896, iteration: 20462
loss: 1.020351767539978,grad_norm: 0.9999991674747216, iteration: 20463
loss: 1.0636632442474365,grad_norm: 0.9999993358393344, iteration: 20464
loss: 0.9730749726295471,grad_norm: 0.9974117346289698, iteration: 20465
loss: 1.0029852390289307,grad_norm: 0.9999991253966466, iteration: 20466
loss: 1.0101758241653442,grad_norm: 0.9151949079640012, iteration: 20467
loss: 1.0156879425048828,grad_norm: 0.9999991156611129, iteration: 20468
loss: 0.9996572136878967,grad_norm: 0.9452964608630718, iteration: 20469
loss: 1.0022172927856445,grad_norm: 0.999999090000711, iteration: 20470
loss: 0.9845311045646667,grad_norm: 0.9877391286047899, iteration: 20471
loss: 1.0152924060821533,grad_norm: 0.9999991143136818, iteration: 20472
loss: 1.001015305519104,grad_norm: 0.9480671013536187, iteration: 20473
loss: 1.0188134908676147,grad_norm: 0.9999992121670073, iteration: 20474
loss: 0.9783547520637512,grad_norm: 0.9999991150745952, iteration: 20475
loss: 1.0170788764953613,grad_norm: 0.9077023073041511, iteration: 20476
loss: 1.005997657775879,grad_norm: 0.9999992609429859, iteration: 20477
loss: 0.9968283772468567,grad_norm: 0.9999991695123038, iteration: 20478
loss: 0.9971668720245361,grad_norm: 0.9630206691742393, iteration: 20479
loss: 1.0026603937149048,grad_norm: 0.9999990593115157, iteration: 20480
loss: 1.034751296043396,grad_norm: 0.9999990422439267, iteration: 20481
loss: 1.051159143447876,grad_norm: 0.9446239296213902, iteration: 20482
loss: 1.0002515316009521,grad_norm: 0.9999993583841974, iteration: 20483
loss: 0.9691762328147888,grad_norm: 0.9365974769802949, iteration: 20484
loss: 0.9953206181526184,grad_norm: 0.9999991958042531, iteration: 20485
loss: 1.0066519975662231,grad_norm: 0.9999991328585914, iteration: 20486
loss: 0.9887081980705261,grad_norm: 0.9999989825575157, iteration: 20487
loss: 1.043848991394043,grad_norm: 0.9999989870231856, iteration: 20488
loss: 1.012061357498169,grad_norm: 0.9999992984034619, iteration: 20489
loss: 1.0138881206512451,grad_norm: 0.944304564731576, iteration: 20490
loss: 1.0058411359786987,grad_norm: 0.9999996993735775, iteration: 20491
loss: 0.9966223835945129,grad_norm: 0.8677953844927139, iteration: 20492
loss: 0.9920340180397034,grad_norm: 0.9999992767341347, iteration: 20493
loss: 0.9697134494781494,grad_norm: 0.9999992888606709, iteration: 20494
loss: 1.0108219385147095,grad_norm: 0.9999992576924158, iteration: 20495
loss: 1.0170810222625732,grad_norm: 0.9999992263647904, iteration: 20496
loss: 1.0406430959701538,grad_norm: 0.9920331299873482, iteration: 20497
loss: 1.005094289779663,grad_norm: 0.9999992418111318, iteration: 20498
loss: 1.0203807353973389,grad_norm: 0.9999991980186974, iteration: 20499
loss: 1.0008894205093384,grad_norm: 0.9990933875709604, iteration: 20500
loss: 1.0141053199768066,grad_norm: 0.9999992338954423, iteration: 20501
loss: 0.9771820306777954,grad_norm: 0.9999994098485852, iteration: 20502
loss: 1.0496501922607422,grad_norm: 0.9999992530444248, iteration: 20503
loss: 0.9580352902412415,grad_norm: 0.9999992339319485, iteration: 20504
loss: 1.0203975439071655,grad_norm: 0.9999990624358895, iteration: 20505
loss: 1.0146597623825073,grad_norm: 0.9999990906228502, iteration: 20506
loss: 0.9882789254188538,grad_norm: 0.9879679328250104, iteration: 20507
loss: 0.9995095133781433,grad_norm: 0.9669528978236589, iteration: 20508
loss: 1.0295578241348267,grad_norm: 0.949084823705522, iteration: 20509
loss: 0.9925959706306458,grad_norm: 0.9999992446859924, iteration: 20510
loss: 0.9845768213272095,grad_norm: 0.9999992013245388, iteration: 20511
loss: 1.0084116458892822,grad_norm: 0.9999991254181023, iteration: 20512
loss: 0.977408230304718,grad_norm: 0.9999992279340023, iteration: 20513
loss: 1.0732111930847168,grad_norm: 0.9999997227560344, iteration: 20514
loss: 1.0355969667434692,grad_norm: 0.9999990864839164, iteration: 20515
loss: 1.0088311433792114,grad_norm: 0.9999992021877367, iteration: 20516
loss: 1.0496997833251953,grad_norm: 0.965675813081103, iteration: 20517
loss: 1.0344548225402832,grad_norm: 0.9999994942095713, iteration: 20518
loss: 1.017633080482483,grad_norm: 0.9999990393766642, iteration: 20519
loss: 1.031542181968689,grad_norm: 0.9999989774194822, iteration: 20520
loss: 1.0032745599746704,grad_norm: 0.9999991173008409, iteration: 20521
loss: 0.995934009552002,grad_norm: 0.9979807007285079, iteration: 20522
loss: 1.00144362449646,grad_norm: 0.999999246709927, iteration: 20523
loss: 1.0037155151367188,grad_norm: 0.9999992681107172, iteration: 20524
loss: 1.0229296684265137,grad_norm: 0.9999992619699876, iteration: 20525
loss: 1.0278124809265137,grad_norm: 0.9999990661026977, iteration: 20526
loss: 1.010888934135437,grad_norm: 0.9999992040596671, iteration: 20527
loss: 1.0522667169570923,grad_norm: 0.9999990098090482, iteration: 20528
loss: 1.012014389038086,grad_norm: 0.9999992835741601, iteration: 20529
loss: 0.9962179064750671,grad_norm: 0.9999993850397126, iteration: 20530
loss: 1.0141268968582153,grad_norm: 0.9354732017191818, iteration: 20531
loss: 1.020005702972412,grad_norm: 0.9615263159163431, iteration: 20532
loss: 1.0446792840957642,grad_norm: 0.9813177036044296, iteration: 20533
loss: 1.0252935886383057,grad_norm: 0.9999994516878118, iteration: 20534
loss: 0.9857490062713623,grad_norm: 0.9999994265126313, iteration: 20535
loss: 1.039516806602478,grad_norm: 0.9999991997975862, iteration: 20536
loss: 1.0010324716567993,grad_norm: 0.9999990617835148, iteration: 20537
loss: 1.006669282913208,grad_norm: 0.8345573564371106, iteration: 20538
loss: 1.0391321182250977,grad_norm: 0.9999991856819616, iteration: 20539
loss: 0.9790439009666443,grad_norm: 0.9999991472305975, iteration: 20540
loss: 0.9835453033447266,grad_norm: 0.9516392323794958, iteration: 20541
loss: 1.0306675434112549,grad_norm: 0.9999992269650798, iteration: 20542
loss: 1.0420470237731934,grad_norm: 0.9999996004484826, iteration: 20543
loss: 0.9991064667701721,grad_norm: 0.961305082491159, iteration: 20544
loss: 1.006423830986023,grad_norm: 0.9999990364051838, iteration: 20545
loss: 1.0800247192382812,grad_norm: 0.9999994528019834, iteration: 20546
loss: 1.0577718019485474,grad_norm: 0.9999991314797373, iteration: 20547
loss: 1.035475254058838,grad_norm: 0.9661585625723621, iteration: 20548
loss: 1.0058048963546753,grad_norm: 0.9999992479537234, iteration: 20549
loss: 1.0423959493637085,grad_norm: 0.9954918857895758, iteration: 20550
loss: 1.0258066654205322,grad_norm: 0.9999990347983724, iteration: 20551
loss: 0.9497931599617004,grad_norm: 0.9699950838568873, iteration: 20552
loss: 1.0161149501800537,grad_norm: 0.9999992734643751, iteration: 20553
loss: 1.026537299156189,grad_norm: 0.8809539436239988, iteration: 20554
loss: 1.0201826095581055,grad_norm: 0.9999992119477897, iteration: 20555
loss: 1.0409177541732788,grad_norm: 0.9999992108375432, iteration: 20556
loss: 0.9947981238365173,grad_norm: 0.9999991312797195, iteration: 20557
loss: 1.0198322534561157,grad_norm: 0.7629912348615233, iteration: 20558
loss: 0.9893651604652405,grad_norm: 0.9931561536912319, iteration: 20559
loss: 0.9567489624023438,grad_norm: 0.9999992200481764, iteration: 20560
loss: 1.0338144302368164,grad_norm: 0.8636309996023616, iteration: 20561
loss: 0.99045729637146,grad_norm: 0.9965846218141662, iteration: 20562
loss: 1.0024797916412354,grad_norm: 0.9999998380166563, iteration: 20563
loss: 1.0160566568374634,grad_norm: 0.9923166886954282, iteration: 20564
loss: 0.990419328212738,grad_norm: 0.9999992425769502, iteration: 20565
loss: 0.9775853753089905,grad_norm: 0.9551115828067731, iteration: 20566
loss: 1.031748652458191,grad_norm: 0.999999180178771, iteration: 20567
loss: 0.9843109846115112,grad_norm: 0.9999991311176846, iteration: 20568
loss: 1.0385890007019043,grad_norm: 0.9999991331470364, iteration: 20569
loss: 0.9688782095909119,grad_norm: 0.9999991371301928, iteration: 20570
loss: 1.0367646217346191,grad_norm: 0.9999993900759966, iteration: 20571
loss: 1.0222201347351074,grad_norm: 0.8888866339422674, iteration: 20572
loss: 1.0129320621490479,grad_norm: 0.9266778112078355, iteration: 20573
loss: 1.0689754486083984,grad_norm: 0.9558148957086868, iteration: 20574
loss: 1.0287621021270752,grad_norm: 0.999999557365316, iteration: 20575
loss: 1.1430728435516357,grad_norm: 0.9999997650116926, iteration: 20576
loss: 1.0196422338485718,grad_norm: 0.9999991107711462, iteration: 20577
loss: 1.0383172035217285,grad_norm: 0.9999990822345309, iteration: 20578
loss: 1.0034743547439575,grad_norm: 0.9999990697049743, iteration: 20579
loss: 1.0103868246078491,grad_norm: 0.9999991959471142, iteration: 20580
loss: 1.0574026107788086,grad_norm: 0.9999998334348186, iteration: 20581
loss: 0.9326333999633789,grad_norm: 0.9999990629469331, iteration: 20582
loss: 1.0175658464431763,grad_norm: 0.9999992672647094, iteration: 20583
loss: 1.0185819864273071,grad_norm: 0.9523182600256418, iteration: 20584
loss: 1.0256701707839966,grad_norm: 0.9807826613043328, iteration: 20585
loss: 0.9856697916984558,grad_norm: 0.9999991314385449, iteration: 20586
loss: 1.045899748802185,grad_norm: 0.9165577930250347, iteration: 20587
loss: 1.036420226097107,grad_norm: 0.9999991126557719, iteration: 20588
loss: 0.9968411922454834,grad_norm: 0.9999990238244663, iteration: 20589
loss: 1.0209887027740479,grad_norm: 0.9999994830391707, iteration: 20590
loss: 1.0111428499221802,grad_norm: 0.9175487605500349, iteration: 20591
loss: 1.0272023677825928,grad_norm: 0.9999992863035394, iteration: 20592
loss: 0.9939795136451721,grad_norm: 0.9999992824567984, iteration: 20593
loss: 1.0152091979980469,grad_norm: 0.9999993149999418, iteration: 20594
loss: 0.959995687007904,grad_norm: 0.878562624434658, iteration: 20595
loss: 0.9978017807006836,grad_norm: 0.9588274974735508, iteration: 20596
loss: 1.0542807579040527,grad_norm: 0.9999990952856972, iteration: 20597
loss: 1.0180104970932007,grad_norm: 0.9932326198177968, iteration: 20598
loss: 0.9960812926292419,grad_norm: 0.999999198302483, iteration: 20599
loss: 1.0317413806915283,grad_norm: 0.9999998741820508, iteration: 20600
loss: 1.0095809698104858,grad_norm: 0.9999993869765734, iteration: 20601
loss: 1.0475958585739136,grad_norm: 0.9999990165922906, iteration: 20602
loss: 1.0023525953292847,grad_norm: 0.9999992068165814, iteration: 20603
loss: 1.0046368837356567,grad_norm: 0.985717283287775, iteration: 20604
loss: 0.977498471736908,grad_norm: 0.9361624980037211, iteration: 20605
loss: 0.9588878154754639,grad_norm: 0.9551162850600022, iteration: 20606
loss: 1.004228949546814,grad_norm: 0.7542464509538707, iteration: 20607
loss: 0.9981645345687866,grad_norm: 0.9469932045243477, iteration: 20608
loss: 1.030185580253601,grad_norm: 0.8418500496992403, iteration: 20609
loss: 0.9732274413108826,grad_norm: 0.9999991359892131, iteration: 20610
loss: 1.0328340530395508,grad_norm: 0.9999991377332073, iteration: 20611
loss: 0.9807277917861938,grad_norm: 0.999999215302193, iteration: 20612
loss: 0.9839721322059631,grad_norm: 0.9999990993389293, iteration: 20613
loss: 1.008873701095581,grad_norm: 0.9602298813159337, iteration: 20614
loss: 0.9591874480247498,grad_norm: 0.9999992115706771, iteration: 20615
loss: 1.0186688899993896,grad_norm: 0.9999990927411209, iteration: 20616
loss: 1.0230756998062134,grad_norm: 0.9999990787104054, iteration: 20617
loss: 1.0219969749450684,grad_norm: 0.9999990418294998, iteration: 20618
loss: 1.0201531648635864,grad_norm: 0.9123317908407507, iteration: 20619
loss: 1.020421028137207,grad_norm: 0.9999990679344344, iteration: 20620
loss: 1.0411816835403442,grad_norm: 0.9987310157515346, iteration: 20621
loss: 1.0268205404281616,grad_norm: 0.9999992172316828, iteration: 20622
loss: 1.0138012170791626,grad_norm: 0.971209210813406, iteration: 20623
loss: 1.0068360567092896,grad_norm: 0.999999227042839, iteration: 20624
loss: 1.0219300985336304,grad_norm: 0.9999994247113934, iteration: 20625
loss: 1.0154602527618408,grad_norm: 0.9999990254173088, iteration: 20626
loss: 1.080979585647583,grad_norm: 0.9999996075276318, iteration: 20627
loss: 1.0002713203430176,grad_norm: 0.9999995368178508, iteration: 20628
loss: 1.0393527746200562,grad_norm: 0.9999989502845915, iteration: 20629
loss: 1.0008070468902588,grad_norm: 0.9075271303492175, iteration: 20630
loss: 0.9894930720329285,grad_norm: 0.9999990416492682, iteration: 20631
loss: 1.0324960947036743,grad_norm: 0.9197872214289033, iteration: 20632
loss: 1.0101195573806763,grad_norm: 0.9469970682737732, iteration: 20633
loss: 1.0289216041564941,grad_norm: 0.9999991637240294, iteration: 20634
loss: 1.023458480834961,grad_norm: 0.999999068037468, iteration: 20635
loss: 1.0175299644470215,grad_norm: 0.9999990543352567, iteration: 20636
loss: 1.055871844291687,grad_norm: 0.9999994387805085, iteration: 20637
loss: 1.0463533401489258,grad_norm: 0.9999992732992873, iteration: 20638
loss: 1.0234110355377197,grad_norm: 0.9999991886771556, iteration: 20639
loss: 1.0157686471939087,grad_norm: 0.9999990942145066, iteration: 20640
loss: 0.978265106678009,grad_norm: 0.9999993832682078, iteration: 20641
loss: 1.0253831148147583,grad_norm: 0.9999992339410237, iteration: 20642
loss: 1.0150578022003174,grad_norm: 0.9531593549085878, iteration: 20643
loss: 1.030745029449463,grad_norm: 0.9593805263639381, iteration: 20644
loss: 0.9791168570518494,grad_norm: 0.9999994433309847, iteration: 20645
loss: 0.9837307333946228,grad_norm: 0.9999995092887599, iteration: 20646
loss: 1.0178468227386475,grad_norm: 0.9999992003115423, iteration: 20647
loss: 1.0356355905532837,grad_norm: 0.9999992111512701, iteration: 20648
loss: 1.044443130493164,grad_norm: 0.8521854151756064, iteration: 20649
loss: 1.079192042350769,grad_norm: 0.999999823566053, iteration: 20650
loss: 1.0260536670684814,grad_norm: 0.9999992561471356, iteration: 20651
loss: 1.045466661453247,grad_norm: 0.9999999128987744, iteration: 20652
loss: 1.058034062385559,grad_norm: 0.9999998204287599, iteration: 20653
loss: 1.030432105064392,grad_norm: 0.9999994368169813, iteration: 20654
loss: 0.9875058531761169,grad_norm: 0.9751155028641252, iteration: 20655
loss: 1.0332995653152466,grad_norm: 0.9999994878931223, iteration: 20656
loss: 0.9990098476409912,grad_norm: 0.9999991617239151, iteration: 20657
loss: 0.9981943368911743,grad_norm: 0.9999990496750809, iteration: 20658
loss: 0.9981845021247864,grad_norm: 0.9999992873927543, iteration: 20659
loss: 0.9661142826080322,grad_norm: 0.999999093935093, iteration: 20660
loss: 1.0409870147705078,grad_norm: 0.9999990807503579, iteration: 20661
loss: 1.060085415840149,grad_norm: 0.9722336339566557, iteration: 20662
loss: 1.0506482124328613,grad_norm: 0.9999992797780624, iteration: 20663
loss: 0.9860499501228333,grad_norm: 0.9999990677800936, iteration: 20664
loss: 0.9874285459518433,grad_norm: 0.9999998361818417, iteration: 20665
loss: 1.0079901218414307,grad_norm: 0.9999990875774859, iteration: 20666
loss: 1.0295847654342651,grad_norm: 0.9225427797780452, iteration: 20667
loss: 1.0371562242507935,grad_norm: 0.9999993579725711, iteration: 20668
loss: 1.0410813093185425,grad_norm: 0.9297696925000524, iteration: 20669
loss: 1.0145559310913086,grad_norm: 0.9999990741606326, iteration: 20670
loss: 1.0111503601074219,grad_norm: 0.9999991703709367, iteration: 20671
loss: 0.9701714515686035,grad_norm: 0.9540720189248222, iteration: 20672
loss: 1.0265991687774658,grad_norm: 0.9851079970050284, iteration: 20673
loss: 0.998327374458313,grad_norm: 0.9999996118963671, iteration: 20674
loss: 1.02397620677948,grad_norm: 0.99999928833012, iteration: 20675
loss: 0.9917301535606384,grad_norm: 0.9742306807816975, iteration: 20676
loss: 1.074149250984192,grad_norm: 0.999999436916063, iteration: 20677
loss: 0.9981047511100769,grad_norm: 0.9999992295323844, iteration: 20678
loss: 1.0494823455810547,grad_norm: 0.9999993613851129, iteration: 20679
loss: 0.9982333183288574,grad_norm: 0.9999994463273549, iteration: 20680
loss: 1.0373433828353882,grad_norm: 0.999999062352842, iteration: 20681
loss: 1.0022025108337402,grad_norm: 0.9999993486118534, iteration: 20682
loss: 1.0162783861160278,grad_norm: 0.9999992687726277, iteration: 20683
loss: 1.0286043882369995,grad_norm: 0.9656134314949689, iteration: 20684
loss: 0.9905361533164978,grad_norm: 0.9999991570871916, iteration: 20685
loss: 0.9963181614875793,grad_norm: 0.9999990943320634, iteration: 20686
loss: 0.9917914867401123,grad_norm: 0.9999992148935127, iteration: 20687
loss: 0.999836802482605,grad_norm: 0.9999996470345611, iteration: 20688
loss: 1.0194650888442993,grad_norm: 0.9986413702689567, iteration: 20689
loss: 1.0879151821136475,grad_norm: 0.9508225557974739, iteration: 20690
loss: 1.0131868124008179,grad_norm: 0.9375535571594339, iteration: 20691
loss: 1.0840874910354614,grad_norm: 0.9999994996879372, iteration: 20692
loss: 1.0217634439468384,grad_norm: 0.9036922000663145, iteration: 20693
loss: 0.9728493094444275,grad_norm: 0.9999993528629886, iteration: 20694
loss: 0.9989977478981018,grad_norm: 0.9269418407406886, iteration: 20695
loss: 1.0362316370010376,grad_norm: 0.9999993701893287, iteration: 20696
loss: 0.9960564970970154,grad_norm: 0.8893427931308115, iteration: 20697
loss: 1.0092521905899048,grad_norm: 0.9162906930307781, iteration: 20698
loss: 1.0096222162246704,grad_norm: 0.9999989584839716, iteration: 20699
loss: 1.0213024616241455,grad_norm: 0.9999991568513642, iteration: 20700
loss: 1.035372018814087,grad_norm: 0.999999657460158, iteration: 20701
loss: 1.0111567974090576,grad_norm: 0.9999991886435209, iteration: 20702
loss: 1.0297456979751587,grad_norm: 0.9999992340456938, iteration: 20703
loss: 0.9937957525253296,grad_norm: 0.8784825482694898, iteration: 20704
loss: 0.9745083451271057,grad_norm: 0.8125418024006036, iteration: 20705
loss: 0.9399738907814026,grad_norm: 0.9999991161804027, iteration: 20706
loss: 0.9892736673355103,grad_norm: 0.99999915430746, iteration: 20707
loss: 1.0238261222839355,grad_norm: 0.9587234126082532, iteration: 20708
loss: 1.0604162216186523,grad_norm: 0.9924330235558663, iteration: 20709
loss: 0.9727492928504944,grad_norm: 0.9624778784245348, iteration: 20710
loss: 0.9833940863609314,grad_norm: 0.8765947229724921, iteration: 20711
loss: 1.008566975593567,grad_norm: 0.99999905275478, iteration: 20712
loss: 1.0021991729736328,grad_norm: 0.9999990920123926, iteration: 20713
loss: 1.030478596687317,grad_norm: 0.9327914928683404, iteration: 20714
loss: 1.0424396991729736,grad_norm: 0.9999997641977039, iteration: 20715
loss: 1.0086548328399658,grad_norm: 0.9999991674262003, iteration: 20716
loss: 1.0098819732666016,grad_norm: 0.9999990559542566, iteration: 20717
loss: 0.9602822065353394,grad_norm: 0.9999990453329498, iteration: 20718
loss: 1.0291556119918823,grad_norm: 0.999999206157596, iteration: 20719
loss: 1.0120571851730347,grad_norm: 0.9338800771939532, iteration: 20720
loss: 0.9870008826255798,grad_norm: 0.9999998560660915, iteration: 20721
loss: 1.018221378326416,grad_norm: 0.9999991630276966, iteration: 20722
loss: 1.0004860162734985,grad_norm: 0.9186830827640488, iteration: 20723
loss: 1.0256973505020142,grad_norm: 0.9192490701898561, iteration: 20724
loss: 1.0220378637313843,grad_norm: 0.9999992952258666, iteration: 20725
loss: 1.0036325454711914,grad_norm: 0.9999991207982935, iteration: 20726
loss: 1.013440489768982,grad_norm: 0.9999993568028228, iteration: 20727
loss: 1.027408480644226,grad_norm: 0.9999989833268261, iteration: 20728
loss: 1.0265063047409058,grad_norm: 0.999999272921096, iteration: 20729
loss: 1.0157296657562256,grad_norm: 0.9999991381605793, iteration: 20730
loss: 1.0542998313903809,grad_norm: 0.9999993637026393, iteration: 20731
loss: 0.9943626523017883,grad_norm: 0.9999991414964751, iteration: 20732
loss: 0.9976886510848999,grad_norm: 0.9999991332266562, iteration: 20733
loss: 1.0426361560821533,grad_norm: 0.837675063570078, iteration: 20734
loss: 1.0046838521957397,grad_norm: 0.9999995713824298, iteration: 20735
loss: 1.021485686302185,grad_norm: 0.9999991322554567, iteration: 20736
loss: 1.0041446685791016,grad_norm: 0.9999991742266972, iteration: 20737
loss: 1.0309951305389404,grad_norm: 0.9999991883384561, iteration: 20738
loss: 0.9989678859710693,grad_norm: 0.9999994916077446, iteration: 20739
loss: 0.9930019378662109,grad_norm: 0.9999992434062605, iteration: 20740
loss: 0.9898163080215454,grad_norm: 0.9999991770672662, iteration: 20741
loss: 0.990017831325531,grad_norm: 0.9999990995685677, iteration: 20742
loss: 1.0507891178131104,grad_norm: 0.9999993868591307, iteration: 20743
loss: 0.9774999618530273,grad_norm: 0.8768662810703277, iteration: 20744
loss: 1.0268827676773071,grad_norm: 0.9999990819880538, iteration: 20745
loss: 1.0226517915725708,grad_norm: 0.9999991670664746, iteration: 20746
loss: 1.0097235441207886,grad_norm: 0.9999990923175368, iteration: 20747
loss: 0.9845020174980164,grad_norm: 0.9999992963397194, iteration: 20748
loss: 0.9962226152420044,grad_norm: 0.9999990418772916, iteration: 20749
loss: 1.0044969320297241,grad_norm: 0.9999992569321566, iteration: 20750
loss: 1.06548273563385,grad_norm: 0.9999991145195818, iteration: 20751
loss: 1.0393811464309692,grad_norm: 0.9999991518966446, iteration: 20752
loss: 1.135715126991272,grad_norm: 0.9999997336874903, iteration: 20753
loss: 1.0430704355239868,grad_norm: 0.9999995856112264, iteration: 20754
loss: 0.9888405799865723,grad_norm: 0.9999990701592183, iteration: 20755
loss: 1.0013282299041748,grad_norm: 0.9703201617074819, iteration: 20756
loss: 0.9700586199760437,grad_norm: 0.9999990713113557, iteration: 20757
loss: 0.9638898968696594,grad_norm: 0.9885377462956689, iteration: 20758
loss: 1.0183318853378296,grad_norm: 0.9999992962425055, iteration: 20759
loss: 1.035534381866455,grad_norm: 0.9999992064474093, iteration: 20760
loss: 0.9998889565467834,grad_norm: 0.9999995186940656, iteration: 20761
loss: 0.9782177805900574,grad_norm: 0.9999990945681942, iteration: 20762
loss: 1.0357437133789062,grad_norm: 0.9216508582514888, iteration: 20763
loss: 1.0554143190383911,grad_norm: 0.9999992450552908, iteration: 20764
loss: 1.023642659187317,grad_norm: 0.999999585890538, iteration: 20765
loss: 1.0313560962677002,grad_norm: 0.9999999154086792, iteration: 20766
loss: 1.0048021078109741,grad_norm: 0.9999995169812593, iteration: 20767
loss: 1.0129334926605225,grad_norm: 0.999999147788739, iteration: 20768
loss: 1.013169765472412,grad_norm: 0.9999991553395315, iteration: 20769
loss: 0.9987121820449829,grad_norm: 0.9999992117397497, iteration: 20770
loss: 1.0246655941009521,grad_norm: 0.9999991311336158, iteration: 20771
loss: 1.0194011926651,grad_norm: 0.9999993444033225, iteration: 20772
loss: 0.9642924070358276,grad_norm: 0.9999991978081174, iteration: 20773
loss: 1.0239521265029907,grad_norm: 0.8693200393519945, iteration: 20774
loss: 0.997699499130249,grad_norm: 0.9999991138493184, iteration: 20775
loss: 0.9760603308677673,grad_norm: 0.999999383154527, iteration: 20776
loss: 1.0147364139556885,grad_norm: 0.9999990101032444, iteration: 20777
loss: 1.0053952932357788,grad_norm: 0.9999990719568064, iteration: 20778
loss: 1.0365228652954102,grad_norm: 0.9999992854346141, iteration: 20779
loss: 0.9876546263694763,grad_norm: 0.999999099775062, iteration: 20780
loss: 1.0721619129180908,grad_norm: 0.9999991395307702, iteration: 20781
loss: 1.0018819570541382,grad_norm: 0.9999993427941481, iteration: 20782
loss: 1.0526782274246216,grad_norm: 0.9932182912019547, iteration: 20783
loss: 0.9930591583251953,grad_norm: 0.9956609996170819, iteration: 20784
loss: 1.0033725500106812,grad_norm: 0.9999992609865307, iteration: 20785
loss: 0.9823409914970398,grad_norm: 0.9999990793679242, iteration: 20786
loss: 1.0333333015441895,grad_norm: 0.9999992631796444, iteration: 20787
loss: 1.039467215538025,grad_norm: 0.9999990877549095, iteration: 20788
loss: 0.9793691039085388,grad_norm: 0.9022313122965279, iteration: 20789
loss: 0.9905955791473389,grad_norm: 0.9999991353631849, iteration: 20790
loss: 1.0223232507705688,grad_norm: 0.9999991092364209, iteration: 20791
loss: 1.116511344909668,grad_norm: 0.9999996050305364, iteration: 20792
loss: 1.0208325386047363,grad_norm: 0.9453923758777358, iteration: 20793
loss: 1.012714147567749,grad_norm: 0.9999992009763113, iteration: 20794
loss: 0.9915146231651306,grad_norm: 0.9323145804113615, iteration: 20795
loss: 0.9970695972442627,grad_norm: 0.9999993029508392, iteration: 20796
loss: 1.0253527164459229,grad_norm: 0.985095713223437, iteration: 20797
loss: 1.0017921924591064,grad_norm: 0.9999991523902857, iteration: 20798
loss: 0.9976860880851746,grad_norm: 0.9999991432441486, iteration: 20799
loss: 0.9667548537254333,grad_norm: 0.9999991168286788, iteration: 20800
loss: 0.9791597723960876,grad_norm: 0.999999192610219, iteration: 20801
loss: 1.0231069326400757,grad_norm: 0.9663514333023907, iteration: 20802
loss: 0.9680078029632568,grad_norm: 0.9999990599050373, iteration: 20803
loss: 1.0127410888671875,grad_norm: 0.9999990114449172, iteration: 20804
loss: 1.0408087968826294,grad_norm: 0.999999336392878, iteration: 20805
loss: 1.044844627380371,grad_norm: 0.9999993479678453, iteration: 20806
loss: 1.018688678741455,grad_norm: 0.9999442987754459, iteration: 20807
loss: 0.9809386134147644,grad_norm: 0.9512110164177675, iteration: 20808
loss: 1.0688539743423462,grad_norm: 0.9999996931015579, iteration: 20809
loss: 0.942736029624939,grad_norm: 0.9999990847391763, iteration: 20810
loss: 1.0052564144134521,grad_norm: 0.9999994904203932, iteration: 20811
loss: 1.0148403644561768,grad_norm: 0.9999992291521504, iteration: 20812
loss: 1.0023409128189087,grad_norm: 0.9999991586133931, iteration: 20813
loss: 1.0293041467666626,grad_norm: 0.9999990613643777, iteration: 20814
loss: 1.006669044494629,grad_norm: 0.999999309169198, iteration: 20815
loss: 1.0993852615356445,grad_norm: 0.999999745493838, iteration: 20816
loss: 0.9927098751068115,grad_norm: 0.9999994297357084, iteration: 20817
loss: 1.0373783111572266,grad_norm: 0.9339706897431612, iteration: 20818
loss: 1.0118545293807983,grad_norm: 0.9132402632859697, iteration: 20819
loss: 1.048635721206665,grad_norm: 0.9999991009636915, iteration: 20820
loss: 1.0591562986373901,grad_norm: 0.9999993746863151, iteration: 20821
loss: 1.012328863143921,grad_norm: 0.9999993089474751, iteration: 20822
loss: 1.0271167755126953,grad_norm: 0.9999992717797539, iteration: 20823
loss: 1.0291773080825806,grad_norm: 0.9092878026395951, iteration: 20824
loss: 1.0202317237854004,grad_norm: 0.9022515265504097, iteration: 20825
loss: 0.9741056561470032,grad_norm: 0.9999990711742258, iteration: 20826
loss: 1.0246609449386597,grad_norm: 0.9999990800873008, iteration: 20827
loss: 1.0520424842834473,grad_norm: 0.9526390223447496, iteration: 20828
loss: 1.0003328323364258,grad_norm: 0.9999995332959521, iteration: 20829
loss: 1.0321018695831299,grad_norm: 0.9999992887740239, iteration: 20830
loss: 0.9925825595855713,grad_norm: 0.9999991256987482, iteration: 20831
loss: 1.000632882118225,grad_norm: 0.9999991500586941, iteration: 20832
loss: 1.0260627269744873,grad_norm: 0.9999994197305079, iteration: 20833
loss: 1.0198725461959839,grad_norm: 0.9999992418986232, iteration: 20834
loss: 1.030400037765503,grad_norm: 0.9999990708009955, iteration: 20835
loss: 1.0320398807525635,grad_norm: 0.9999992486094585, iteration: 20836
loss: 1.0280340909957886,grad_norm: 0.9426268943691536, iteration: 20837
loss: 1.020336389541626,grad_norm: 0.9999991569756485, iteration: 20838
loss: 1.0065455436706543,grad_norm: 0.9433244880978267, iteration: 20839
loss: 1.1141047477722168,grad_norm: 0.9999996477895218, iteration: 20840
loss: 1.0274853706359863,grad_norm: 0.9999990697199062, iteration: 20841
loss: 1.0254260301589966,grad_norm: 0.9539314575952107, iteration: 20842
loss: 1.0074987411499023,grad_norm: 0.9999994101000427, iteration: 20843
loss: 1.0167763233184814,grad_norm: 0.9965128250697578, iteration: 20844
loss: 1.023458480834961,grad_norm: 0.999999228863307, iteration: 20845
loss: 0.9998416900634766,grad_norm: 0.9999991678491078, iteration: 20846
loss: 1.0221366882324219,grad_norm: 0.9999989727791211, iteration: 20847
loss: 0.9824642539024353,grad_norm: 0.93349706583872, iteration: 20848
loss: 1.0236173868179321,grad_norm: 0.8072300778640545, iteration: 20849
loss: 1.0267215967178345,grad_norm: 0.8887308448191565, iteration: 20850
loss: 1.0118082761764526,grad_norm: 0.9999991888500482, iteration: 20851
loss: 1.01558518409729,grad_norm: 0.9999998271343621, iteration: 20852
loss: 1.0394026041030884,grad_norm: 0.9999992005480521, iteration: 20853
loss: 1.0433568954467773,grad_norm: 0.9999992723803786, iteration: 20854
loss: 0.9987573027610779,grad_norm: 0.9999990937327906, iteration: 20855
loss: 1.0506035089492798,grad_norm: 0.999999324350019, iteration: 20856
loss: 1.0704169273376465,grad_norm: 0.9999993269704783, iteration: 20857
loss: 1.0460118055343628,grad_norm: 0.9999991957942007, iteration: 20858
loss: 0.9919947981834412,grad_norm: 0.999999066851553, iteration: 20859
loss: 1.0217844247817993,grad_norm: 0.9999989462431612, iteration: 20860
loss: 1.0121855735778809,grad_norm: 0.9999990070853637, iteration: 20861
loss: 1.0541846752166748,grad_norm: 0.9999995272449215, iteration: 20862
loss: 0.9927284717559814,grad_norm: 0.9858008884429431, iteration: 20863
loss: 0.9808104634284973,grad_norm: 0.999999035075133, iteration: 20864
loss: 1.0122742652893066,grad_norm: 0.79630877797138, iteration: 20865
loss: 1.0182915925979614,grad_norm: 0.9999992399025568, iteration: 20866
loss: 1.003531813621521,grad_norm: 0.9999991649454985, iteration: 20867
loss: 1.0388333797454834,grad_norm: 0.999999165387904, iteration: 20868
loss: 1.0474010705947876,grad_norm: 0.9383803924684901, iteration: 20869
loss: 0.9961010813713074,grad_norm: 0.9999990807526165, iteration: 20870
loss: 0.996704638004303,grad_norm: 0.9999992067748936, iteration: 20871
loss: 0.9978012442588806,grad_norm: 0.9999991252349809, iteration: 20872
loss: 1.017774224281311,grad_norm: 0.9999992272181013, iteration: 20873
loss: 1.0159584283828735,grad_norm: 0.9999990172904277, iteration: 20874
loss: 1.0116682052612305,grad_norm: 0.9999991134423537, iteration: 20875
loss: 0.9915182590484619,grad_norm: 0.9999992227618436, iteration: 20876
loss: 1.0363273620605469,grad_norm: 0.9999993482119941, iteration: 20877
loss: 0.9920209050178528,grad_norm: 0.8424504911011305, iteration: 20878
loss: 0.9870852828025818,grad_norm: 0.907472915548379, iteration: 20879
loss: 1.0124797821044922,grad_norm: 0.9469428707115274, iteration: 20880
loss: 0.9851757287979126,grad_norm: 0.9999992203836746, iteration: 20881
loss: 1.011118769645691,grad_norm: 0.9999991645864853, iteration: 20882
loss: 1.0522419214248657,grad_norm: 0.999999510020169, iteration: 20883
loss: 0.9890090823173523,grad_norm: 0.9998338126711755, iteration: 20884
loss: 0.955116868019104,grad_norm: 0.8988992814837088, iteration: 20885
loss: 0.9972285032272339,grad_norm: 0.9999991350694427, iteration: 20886
loss: 0.9967304468154907,grad_norm: 0.9999991330061313, iteration: 20887
loss: 1.0158439874649048,grad_norm: 0.8729732747458391, iteration: 20888
loss: 1.0032352209091187,grad_norm: 0.8897982564879898, iteration: 20889
loss: 1.009160041809082,grad_norm: 0.9220373038894205, iteration: 20890
loss: 1.0399924516677856,grad_norm: 0.9999990547010297, iteration: 20891
loss: 0.9599319100379944,grad_norm: 0.9999991604026343, iteration: 20892
loss: 1.025027871131897,grad_norm: 0.9313726716665215, iteration: 20893
loss: 1.0276226997375488,grad_norm: 0.9999996093869922, iteration: 20894
loss: 1.0590593814849854,grad_norm: 0.9999990083247396, iteration: 20895
loss: 1.0488042831420898,grad_norm: 0.9999990327609132, iteration: 20896
loss: 1.0041834115982056,grad_norm: 0.9999991282475276, iteration: 20897
loss: 1.0148653984069824,grad_norm: 0.9902857038512527, iteration: 20898
loss: 1.0323188304901123,grad_norm: 0.9999994523335832, iteration: 20899
loss: 0.992734432220459,grad_norm: 0.9999991113075062, iteration: 20900
loss: 1.0238170623779297,grad_norm: 0.993095060665651, iteration: 20901
loss: 1.0072442293167114,grad_norm: 0.8328865866971148, iteration: 20902
loss: 0.9935702681541443,grad_norm: 0.9731778437082049, iteration: 20903
loss: 1.009484887123108,grad_norm: 0.999999190164806, iteration: 20904
loss: 0.9868595004081726,grad_norm: 0.9999992794204772, iteration: 20905
loss: 0.9741824269294739,grad_norm: 0.9499912485336023, iteration: 20906
loss: 1.0223442316055298,grad_norm: 0.999999198938496, iteration: 20907
loss: 1.0098228454589844,grad_norm: 0.9999991821312217, iteration: 20908
loss: 0.9854400157928467,grad_norm: 0.9999991753316597, iteration: 20909
loss: 1.0532172918319702,grad_norm: 0.9999994264877745, iteration: 20910
loss: 0.9931554198265076,grad_norm: 0.8560343286696789, iteration: 20911
loss: 1.0426936149597168,grad_norm: 0.9999992163821325, iteration: 20912
loss: 0.987909197807312,grad_norm: 0.9631750142965928, iteration: 20913
loss: 0.9828489422798157,grad_norm: 0.99999901607167, iteration: 20914
loss: 1.0453161001205444,grad_norm: 0.9999991293833489, iteration: 20915
loss: 1.0200856924057007,grad_norm: 0.9999993852964579, iteration: 20916
loss: 0.9992335438728333,grad_norm: 0.9534418750406106, iteration: 20917
loss: 1.001711368560791,grad_norm: 0.9999993036745565, iteration: 20918
loss: 1.0556859970092773,grad_norm: 0.9999993102258458, iteration: 20919
loss: 1.0350258350372314,grad_norm: 0.99999918187166, iteration: 20920
loss: 1.0484724044799805,grad_norm: 0.999999275515873, iteration: 20921
loss: 1.0496299266815186,grad_norm: 0.9999990400066694, iteration: 20922
loss: 1.0417133569717407,grad_norm: 0.9999995179373184, iteration: 20923
loss: 1.013609528541565,grad_norm: 0.9999991836896234, iteration: 20924
loss: 0.9894894957542419,grad_norm: 0.8743418730155351, iteration: 20925
loss: 1.0282814502716064,grad_norm: 0.9434665376933488, iteration: 20926
loss: 1.0174038410186768,grad_norm: 0.9136294824826472, iteration: 20927
loss: 1.02540123462677,grad_norm: 0.9999992085811791, iteration: 20928
loss: 0.9991849064826965,grad_norm: 0.9999990931703502, iteration: 20929
loss: 0.9863697290420532,grad_norm: 0.9999996851951699, iteration: 20930
loss: 1.010493516921997,grad_norm: 0.9999994957219797, iteration: 20931
loss: 1.0018513202667236,grad_norm: 0.9999990776194152, iteration: 20932
loss: 1.0390559434890747,grad_norm: 0.9909411310941262, iteration: 20933
loss: 0.9957119226455688,grad_norm: 0.9999992322405483, iteration: 20934
loss: 1.0166410207748413,grad_norm: 0.921689243938728, iteration: 20935
loss: 1.0158299207687378,grad_norm: 0.9999991859033883, iteration: 20936
loss: 0.9961769580841064,grad_norm: 0.9999991195042774, iteration: 20937
loss: 1.0137773752212524,grad_norm: 0.9999991317600113, iteration: 20938
loss: 1.0418637990951538,grad_norm: 0.9999992771803887, iteration: 20939
loss: 1.0072696208953857,grad_norm: 0.999999253129569, iteration: 20940
loss: 1.078181266784668,grad_norm: 0.9999991893399096, iteration: 20941
loss: 1.0017749071121216,grad_norm: 0.9682743932218342, iteration: 20942
loss: 1.005651593208313,grad_norm: 0.9999993015926396, iteration: 20943
loss: 1.0218830108642578,grad_norm: 0.9999990537204233, iteration: 20944
loss: 1.0628175735473633,grad_norm: 0.999998951264097, iteration: 20945
loss: 1.0229685306549072,grad_norm: 0.999999025596385, iteration: 20946
loss: 1.0237853527069092,grad_norm: 0.9999991550510183, iteration: 20947
loss: 1.0227775573730469,grad_norm: 0.9999990364439368, iteration: 20948
loss: 1.0024067163467407,grad_norm: 0.9999997185300524, iteration: 20949
loss: 1.0426419973373413,grad_norm: 0.9999995039051432, iteration: 20950
loss: 1.0753483772277832,grad_norm: 0.9999995750451453, iteration: 20951
loss: 1.003819465637207,grad_norm: 0.9999991450968416, iteration: 20952
loss: 1.0199533700942993,grad_norm: 0.9561240243791703, iteration: 20953
loss: 1.0294173955917358,grad_norm: 0.9999995222181577, iteration: 20954
loss: 0.9740080833435059,grad_norm: 0.9999993446085378, iteration: 20955
loss: 1.0256211757659912,grad_norm: 0.9999995130113787, iteration: 20956
loss: 1.0303421020507812,grad_norm: 0.9999991024908198, iteration: 20957
loss: 1.0347672700881958,grad_norm: 0.9999990814088648, iteration: 20958
loss: 0.9903307557106018,grad_norm: 0.9999990820146777, iteration: 20959
loss: 1.0092296600341797,grad_norm: 0.9999990151008467, iteration: 20960
loss: 0.9811630249023438,grad_norm: 0.9999991016035592, iteration: 20961
loss: 1.0178107023239136,grad_norm: 0.9999991297384049, iteration: 20962
loss: 1.0353145599365234,grad_norm: 0.9999998231928742, iteration: 20963
loss: 0.9848785400390625,grad_norm: 0.9634678733021197, iteration: 20964
loss: 0.9880144000053406,grad_norm: 0.9999991345204533, iteration: 20965
loss: 0.9880703687667847,grad_norm: 0.8774673501595232, iteration: 20966
loss: 1.0204098224639893,grad_norm: 0.999999880583722, iteration: 20967
loss: 1.0482923984527588,grad_norm: 0.9999992345985161, iteration: 20968
loss: 1.0293751955032349,grad_norm: 0.9999996271209771, iteration: 20969
loss: 1.0093998908996582,grad_norm: 0.999999650333159, iteration: 20970
loss: 0.9880807995796204,grad_norm: 0.9999992226977731, iteration: 20971
loss: 1.0320725440979004,grad_norm: 0.9999990659732861, iteration: 20972
loss: 0.9979164600372314,grad_norm: 0.8850908473646568, iteration: 20973
loss: 0.9812171459197998,grad_norm: 0.9999991010762036, iteration: 20974
loss: 0.9950548410415649,grad_norm: 0.9999990702156617, iteration: 20975
loss: 1.0580430030822754,grad_norm: 0.925125225523152, iteration: 20976
loss: 1.000138759613037,grad_norm: 0.9999991015689776, iteration: 20977
loss: 1.0617974996566772,grad_norm: 0.9999994695806342, iteration: 20978
loss: 1.0233536958694458,grad_norm: 0.999999084590174, iteration: 20979
loss: 1.0117617845535278,grad_norm: 0.9999991585284271, iteration: 20980
loss: 1.0275226831436157,grad_norm: 0.9999990562314348, iteration: 20981
loss: 1.0157967805862427,grad_norm: 0.9999990837744475, iteration: 20982
loss: 1.001201868057251,grad_norm: 0.999999188342989, iteration: 20983
loss: 1.0019773244857788,grad_norm: 0.9999993174030393, iteration: 20984
loss: 0.9710917472839355,grad_norm: 0.9464440827876882, iteration: 20985
loss: 1.007597804069519,grad_norm: 0.999999065894969, iteration: 20986
loss: 1.0902613401412964,grad_norm: 0.9999994591690922, iteration: 20987
loss: 1.0155417919158936,grad_norm: 0.999999116010955, iteration: 20988
loss: 1.0085086822509766,grad_norm: 0.9999991621584002, iteration: 20989
loss: 0.9938948750495911,grad_norm: 0.8893091059730969, iteration: 20990
loss: 1.0016038417816162,grad_norm: 0.9999991421952971, iteration: 20991
loss: 1.1072993278503418,grad_norm: 0.9999997542405835, iteration: 20992
loss: 0.9975829720497131,grad_norm: 0.9999991646454461, iteration: 20993
loss: 1.0069570541381836,grad_norm: 0.9999992918772431, iteration: 20994
loss: 1.0261906385421753,grad_norm: 0.9999991090775091, iteration: 20995
loss: 1.032752513885498,grad_norm: 0.9999993319214638, iteration: 20996
loss: 0.9939692616462708,grad_norm: 0.8430831859398581, iteration: 20997
loss: 1.0296305418014526,grad_norm: 0.9999990778452957, iteration: 20998
loss: 1.0059587955474854,grad_norm: 0.9999989778073797, iteration: 20999
loss: 0.9993144869804382,grad_norm: 0.9940603727082821, iteration: 21000
loss: 1.0247459411621094,grad_norm: 0.9999992739183348, iteration: 21001
loss: 1.0401452779769897,grad_norm: 0.9191997077590974, iteration: 21002
loss: 1.027319312095642,grad_norm: 0.9932364415628849, iteration: 21003
loss: 0.97048419713974,grad_norm: 0.9784353935033101, iteration: 21004
loss: 0.9813785552978516,grad_norm: 0.9999991272264793, iteration: 21005
loss: 0.9970282912254333,grad_norm: 0.999999131874126, iteration: 21006
loss: 0.9721214771270752,grad_norm: 0.9999990565571886, iteration: 21007
loss: 1.0565459728240967,grad_norm: 0.9999990445630093, iteration: 21008
loss: 1.005144715309143,grad_norm: 0.9999991953337728, iteration: 21009
loss: 1.019149661064148,grad_norm: 0.9997347939876237, iteration: 21010
loss: 0.9988329410552979,grad_norm: 0.9999992036088909, iteration: 21011
loss: 0.9868195652961731,grad_norm: 0.9999991493515104, iteration: 21012
loss: 0.9979539513587952,grad_norm: 0.8449334620098743, iteration: 21013
loss: 1.0181792974472046,grad_norm: 0.9999989066103258, iteration: 21014
loss: 0.9924566745758057,grad_norm: 0.9999990550667839, iteration: 21015
loss: 1.0060060024261475,grad_norm: 0.9999992675346921, iteration: 21016
loss: 1.021364688873291,grad_norm: 0.9999992392231886, iteration: 21017
loss: 1.0535329580307007,grad_norm: 0.9999990255273036, iteration: 21018
loss: 0.9903572201728821,grad_norm: 0.9999990351976568, iteration: 21019
loss: 1.0409249067306519,grad_norm: 0.9999993410067033, iteration: 21020
loss: 1.0266085863113403,grad_norm: 0.9886378034888942, iteration: 21021
loss: 1.0069925785064697,grad_norm: 0.9999992055965969, iteration: 21022
loss: 1.0125906467437744,grad_norm: 0.9999990058262479, iteration: 21023
loss: 0.9957455992698669,grad_norm: 0.9999994483120284, iteration: 21024
loss: 0.9763782620429993,grad_norm: 0.9999993025486508, iteration: 21025
loss: 0.9769426584243774,grad_norm: 0.9717906297337202, iteration: 21026
loss: 1.0497171878814697,grad_norm: 0.9999990703623891, iteration: 21027
loss: 1.0038750171661377,grad_norm: 0.859348570228538, iteration: 21028
loss: 1.0265991687774658,grad_norm: 0.9999991878767805, iteration: 21029
loss: 1.0084108114242554,grad_norm: 0.8853423586685253, iteration: 21030
loss: 0.9996556639671326,grad_norm: 0.8997086925059273, iteration: 21031
loss: 1.0357296466827393,grad_norm: 0.9999990021808304, iteration: 21032
loss: 1.0254565477371216,grad_norm: 0.9123476681958557, iteration: 21033
loss: 1.0074597597122192,grad_norm: 0.9999991734299347, iteration: 21034
loss: 1.0062263011932373,grad_norm: 0.9999991136692512, iteration: 21035
loss: 1.0346554517745972,grad_norm: 0.9999991277169348, iteration: 21036
loss: 1.0138777494430542,grad_norm: 0.999999147072289, iteration: 21037
loss: 0.9993418455123901,grad_norm: 0.9999990084507544, iteration: 21038
loss: 1.0105537176132202,grad_norm: 0.9300577547233693, iteration: 21039
loss: 1.0333212614059448,grad_norm: 0.9537516745266923, iteration: 21040
loss: 1.0662360191345215,grad_norm: 0.9999991116779842, iteration: 21041
loss: 1.0712007284164429,grad_norm: 0.9999990566265068, iteration: 21042
loss: 0.9992099404335022,grad_norm: 0.999999091131151, iteration: 21043
loss: 0.9862632751464844,grad_norm: 0.8850985335376494, iteration: 21044
loss: 1.0059449672698975,grad_norm: 0.9999992242466329, iteration: 21045
loss: 1.0161657333374023,grad_norm: 0.8977310380291629, iteration: 21046
loss: 1.024318814277649,grad_norm: 0.9999992164814594, iteration: 21047
loss: 0.9746485352516174,grad_norm: 0.9443612165346824, iteration: 21048
loss: 1.0233488082885742,grad_norm: 0.9999992795360177, iteration: 21049
loss: 0.9945735931396484,grad_norm: 0.9999992500256306, iteration: 21050
loss: 0.9990493655204773,grad_norm: 0.999999286942541, iteration: 21051
loss: 1.0017660856246948,grad_norm: 0.9289429271210415, iteration: 21052
loss: 1.017863154411316,grad_norm: 0.9942841719482365, iteration: 21053
loss: 1.0098189115524292,grad_norm: 0.9999990686120194, iteration: 21054
loss: 0.9920639395713806,grad_norm: 0.9999990368971141, iteration: 21055
loss: 0.9936429858207703,grad_norm: 0.8894703347927964, iteration: 21056
loss: 1.0799341201782227,grad_norm: 0.9999994711933863, iteration: 21057
loss: 0.9936697483062744,grad_norm: 0.9999993780656279, iteration: 21058
loss: 1.0034335851669312,grad_norm: 0.999999192530501, iteration: 21059
loss: 1.0355055332183838,grad_norm: 0.9513518792380395, iteration: 21060
loss: 1.0049647092819214,grad_norm: 0.9999991183855237, iteration: 21061
loss: 1.0171319246292114,grad_norm: 0.8885592963221781, iteration: 21062
loss: 1.0140645503997803,grad_norm: 0.8016650373481324, iteration: 21063
loss: 1.063174843788147,grad_norm: 0.9999999008444928, iteration: 21064
loss: 1.0446325540542603,grad_norm: 0.9999993693348499, iteration: 21065
loss: 0.9791918396949768,grad_norm: 0.9999989880025949, iteration: 21066
loss: 1.0512754917144775,grad_norm: 0.9999990337751438, iteration: 21067
loss: 0.9987529516220093,grad_norm: 0.9255453923354718, iteration: 21068
loss: 0.9901907444000244,grad_norm: 0.9999995983558463, iteration: 21069
loss: 0.999058187007904,grad_norm: 0.8678034899037432, iteration: 21070
loss: 1.0552958250045776,grad_norm: 0.8361471948966607, iteration: 21071
loss: 0.9863642454147339,grad_norm: 0.9499914464809555, iteration: 21072
loss: 1.0332757234573364,grad_norm: 0.9318140727768175, iteration: 21073
loss: 1.0056285858154297,grad_norm: 0.9999991258682984, iteration: 21074
loss: 1.0240046977996826,grad_norm: 0.9999991583400704, iteration: 21075
loss: 0.9955160021781921,grad_norm: 0.9999990876416743, iteration: 21076
loss: 1.0569320917129517,grad_norm: 0.9506233495272242, iteration: 21077
loss: 1.0052355527877808,grad_norm: 0.8759712808080481, iteration: 21078
loss: 1.001031517982483,grad_norm: 0.9999998141650901, iteration: 21079
loss: 0.9780939221382141,grad_norm: 0.8672202240396161, iteration: 21080
loss: 0.9900832176208496,grad_norm: 0.999999113939077, iteration: 21081
loss: 0.9716178774833679,grad_norm: 0.9207009227534485, iteration: 21082
loss: 1.0357528924942017,grad_norm: 0.9999995841870729, iteration: 21083
loss: 1.0182541608810425,grad_norm: 0.9999993346321011, iteration: 21084
loss: 1.0001637935638428,grad_norm: 0.8960750089738443, iteration: 21085
loss: 0.9832421541213989,grad_norm: 0.8957412338149706, iteration: 21086
loss: 1.0061191320419312,grad_norm: 0.9999991508947003, iteration: 21087
loss: 1.0355454683303833,grad_norm: 0.9999997336518676, iteration: 21088
loss: 1.0142252445220947,grad_norm: 0.999999241830732, iteration: 21089
loss: 1.0560243129730225,grad_norm: 0.9999991505381575, iteration: 21090
loss: 0.9833998084068298,grad_norm: 0.9744381403385708, iteration: 21091
loss: 0.9969497919082642,grad_norm: 0.980312418323384, iteration: 21092
loss: 1.0435415506362915,grad_norm: 0.9999998625692074, iteration: 21093
loss: 1.0404136180877686,grad_norm: 0.9999992085198793, iteration: 21094
loss: 0.9879530668258667,grad_norm: 0.9572614468722772, iteration: 21095
loss: 0.9936794638633728,grad_norm: 0.9839401828663973, iteration: 21096
loss: 0.979187548160553,grad_norm: 0.9999991719155698, iteration: 21097
loss: 1.0323396921157837,grad_norm: 0.9999990988027134, iteration: 21098
loss: 1.0456740856170654,grad_norm: 0.9999990452965813, iteration: 21099
loss: 1.0058246850967407,grad_norm: 0.9999996619871173, iteration: 21100
loss: 0.967747151851654,grad_norm: 0.9522717250928211, iteration: 21101
loss: 1.0588692426681519,grad_norm: 0.9999994800372743, iteration: 21102
loss: 1.0067003965377808,grad_norm: 0.8942544234171943, iteration: 21103
loss: 1.044055700302124,grad_norm: 0.9999990907821198, iteration: 21104
loss: 1.0358916521072388,grad_norm: 0.9114196157387748, iteration: 21105
loss: 1.0037599802017212,grad_norm: 0.9285514315064022, iteration: 21106
loss: 1.0381547212600708,grad_norm: 0.9465949231508118, iteration: 21107
loss: 1.0201584100723267,grad_norm: 0.9999991025847466, iteration: 21108
loss: 0.9635500907897949,grad_norm: 0.9999991555684258, iteration: 21109
loss: 1.0200186967849731,grad_norm: 0.9999990941255626, iteration: 21110
loss: 1.0245323181152344,grad_norm: 0.968669702790495, iteration: 21111
loss: 1.0089784860610962,grad_norm: 0.9999991765555175, iteration: 21112
loss: 1.0042831897735596,grad_norm: 0.9999990925427642, iteration: 21113
loss: 1.0397228002548218,grad_norm: 0.999999116329341, iteration: 21114
loss: 1.0483793020248413,grad_norm: 0.9224107311247626, iteration: 21115
loss: 1.032449722290039,grad_norm: 0.9793328046190529, iteration: 21116
loss: 1.0472800731658936,grad_norm: 0.9999991906897071, iteration: 21117
loss: 0.9616734981536865,grad_norm: 0.908216775592164, iteration: 21118
loss: 1.0660749673843384,grad_norm: 0.9999996069108467, iteration: 21119
loss: 1.0439125299453735,grad_norm: 0.9999997641682704, iteration: 21120
loss: 1.0252652168273926,grad_norm: 0.9999991299785294, iteration: 21121
loss: 1.0557469129562378,grad_norm: 0.9707904911886124, iteration: 21122
loss: 1.0182510614395142,grad_norm: 0.9999990851003115, iteration: 21123
loss: 1.0017364025115967,grad_norm: 0.9999992085858634, iteration: 21124
loss: 0.9764770865440369,grad_norm: 0.9999992226273554, iteration: 21125
loss: 1.0357714891433716,grad_norm: 0.9999991164110912, iteration: 21126
loss: 1.036549687385559,grad_norm: 0.999999104223572, iteration: 21127
loss: 1.0345454216003418,grad_norm: 0.9999991182306647, iteration: 21128
loss: 1.0127962827682495,grad_norm: 0.9500175756393547, iteration: 21129
loss: 0.9830114841461182,grad_norm: 0.9075794311976308, iteration: 21130
loss: 1.0027321577072144,grad_norm: 0.9999991538612345, iteration: 21131
loss: 1.0639863014221191,grad_norm: 0.9999996690497229, iteration: 21132
loss: 0.9940862655639648,grad_norm: 0.9999990966465234, iteration: 21133
loss: 0.986897885799408,grad_norm: 0.9999991931286916, iteration: 21134
loss: 1.0425583124160767,grad_norm: 0.9999997749904476, iteration: 21135
loss: 1.0315558910369873,grad_norm: 0.9999995183153213, iteration: 21136
loss: 1.0002577304840088,grad_norm: 0.99999914437668, iteration: 21137
loss: 1.0441367626190186,grad_norm: 0.9999990636427267, iteration: 21138
loss: 0.988196849822998,grad_norm: 0.9999992386543042, iteration: 21139
loss: 1.0030924081802368,grad_norm: 0.9999990294022219, iteration: 21140
loss: 1.0416676998138428,grad_norm: 0.9066576002499691, iteration: 21141
loss: 1.045931100845337,grad_norm: 0.9999993564612966, iteration: 21142
loss: 1.0192601680755615,grad_norm: 0.9999996782425713, iteration: 21143
loss: 1.0320311784744263,grad_norm: 0.9999990525805258, iteration: 21144
loss: 1.025467038154602,grad_norm: 0.988283052842202, iteration: 21145
loss: 1.0475578308105469,grad_norm: 0.9999997560037381, iteration: 21146
loss: 1.0351866483688354,grad_norm: 0.9999991428586922, iteration: 21147
loss: 1.0015321969985962,grad_norm: 0.9999992458735814, iteration: 21148
loss: 1.027357578277588,grad_norm: 0.9096913463857451, iteration: 21149
loss: 1.0178991556167603,grad_norm: 0.9999992537396613, iteration: 21150
loss: 1.048573613166809,grad_norm: 0.8887020523573506, iteration: 21151
loss: 1.0293400287628174,grad_norm: 0.9999995918909721, iteration: 21152
loss: 1.034426212310791,grad_norm: 0.9999999690169341, iteration: 21153
loss: 1.0583168268203735,grad_norm: 0.9999991016889791, iteration: 21154
loss: 1.0525486469268799,grad_norm: 0.9999991270168298, iteration: 21155
loss: 0.9717422723770142,grad_norm: 0.9999993140775085, iteration: 21156
loss: 1.0179684162139893,grad_norm: 0.9999991434573845, iteration: 21157
loss: 1.0043408870697021,grad_norm: 0.990985525146604, iteration: 21158
loss: 1.028017520904541,grad_norm: 0.9999992471805905, iteration: 21159
loss: 0.9753610491752625,grad_norm: 0.999999250570036, iteration: 21160
loss: 1.0114386081695557,grad_norm: 0.9999993445347672, iteration: 21161
loss: 1.101560354232788,grad_norm: 0.9999993549841636, iteration: 21162
loss: 1.063887596130371,grad_norm: 0.9999991911117438, iteration: 21163
loss: 1.0214605331420898,grad_norm: 0.9999995153711917, iteration: 21164
loss: 1.0555243492126465,grad_norm: 0.9999995329301693, iteration: 21165
loss: 0.9637490510940552,grad_norm: 0.9653741908661362, iteration: 21166
loss: 1.014233946800232,grad_norm: 0.9999997157780414, iteration: 21167
loss: 1.0287824869155884,grad_norm: 0.9999992493807132, iteration: 21168
loss: 1.0091699361801147,grad_norm: 0.9999991338475702, iteration: 21169
loss: 1.0157302618026733,grad_norm: 0.9999991896190302, iteration: 21170
loss: 1.026563048362732,grad_norm: 0.9999991476865762, iteration: 21171
loss: 1.026646375656128,grad_norm: 0.9999995328774525, iteration: 21172
loss: 1.0306203365325928,grad_norm: 0.9999993678516853, iteration: 21173
loss: 1.0668747425079346,grad_norm: 0.9999996930340106, iteration: 21174
loss: 1.0201538801193237,grad_norm: 0.9999993764351297, iteration: 21175
loss: 1.0023671388626099,grad_norm: 0.9459980509095888, iteration: 21176
loss: 1.017998218536377,grad_norm: 0.9572735551720354, iteration: 21177
loss: 1.0257923603057861,grad_norm: 0.8948806034716481, iteration: 21178
loss: 1.0075126886367798,grad_norm: 0.9999993880020529, iteration: 21179
loss: 1.117699384689331,grad_norm: 0.9999997890485235, iteration: 21180
loss: 0.990459680557251,grad_norm: 0.9999992856404765, iteration: 21181
loss: 1.0072091817855835,grad_norm: 0.9999992745456249, iteration: 21182
loss: 1.0247178077697754,grad_norm: 0.9999992557530919, iteration: 21183
loss: 0.974642813205719,grad_norm: 0.9723556438482753, iteration: 21184
loss: 1.0313000679016113,grad_norm: 0.9999994446395586, iteration: 21185
loss: 1.0405540466308594,grad_norm: 0.9999992932569745, iteration: 21186
loss: 0.9884706139564514,grad_norm: 0.9040374781720856, iteration: 21187
loss: 1.0327293872833252,grad_norm: 0.9999992740755229, iteration: 21188
loss: 1.0274062156677246,grad_norm: 0.9999994046138966, iteration: 21189
loss: 1.0048511028289795,grad_norm: 0.9999990974863949, iteration: 21190
loss: 0.988976776599884,grad_norm: 0.9999990365525921, iteration: 21191
loss: 1.0166118144989014,grad_norm: 0.9999991978034724, iteration: 21192
loss: 1.0072057247161865,grad_norm: 0.9999991009188863, iteration: 21193
loss: 1.0391969680786133,grad_norm: 0.9999991111333334, iteration: 21194
loss: 1.0736907720565796,grad_norm: 0.9999998838973341, iteration: 21195
loss: 1.0637730360031128,grad_norm: 0.9999991897723632, iteration: 21196
loss: 1.006544589996338,grad_norm: 0.999999453494971, iteration: 21197
loss: 0.9794109463691711,grad_norm: 0.9323317397991074, iteration: 21198
loss: 1.0243923664093018,grad_norm: 0.9999990590771483, iteration: 21199
loss: 1.0237888097763062,grad_norm: 0.9999996891247286, iteration: 21200
loss: 1.0360476970672607,grad_norm: 0.999999005314689, iteration: 21201
loss: 0.9845924973487854,grad_norm: 0.9274959739332336, iteration: 21202
loss: 1.0257854461669922,grad_norm: 0.9999991282302307, iteration: 21203
loss: 1.051344394683838,grad_norm: 0.9999994725866449, iteration: 21204
loss: 0.9977802634239197,grad_norm: 0.9999990809123331, iteration: 21205
loss: 0.9740297198295593,grad_norm: 0.9999997151848764, iteration: 21206
loss: 1.0415042638778687,grad_norm: 0.9999994260933939, iteration: 21207
loss: 1.0245550870895386,grad_norm: 0.9999994955439803, iteration: 21208
loss: 1.0183675289154053,grad_norm: 0.9999995129056068, iteration: 21209
loss: 1.0507594347000122,grad_norm: 0.9999995787847163, iteration: 21210
loss: 1.015095591545105,grad_norm: 0.919417001114245, iteration: 21211
loss: 1.0095739364624023,grad_norm: 0.9999990824351586, iteration: 21212
loss: 1.0460426807403564,grad_norm: 0.9999992001539234, iteration: 21213
loss: 0.9820389151573181,grad_norm: 0.999999188801575, iteration: 21214
loss: 1.013466715812683,grad_norm: 0.9999990715769832, iteration: 21215
loss: 0.9982354640960693,grad_norm: 0.999999206225521, iteration: 21216
loss: 0.9597875475883484,grad_norm: 0.9999990038160556, iteration: 21217
loss: 1.0355284214019775,grad_norm: 0.9999993352679116, iteration: 21218
loss: 0.9686532616615295,grad_norm: 0.9999991304780499, iteration: 21219
loss: 1.0346944332122803,grad_norm: 0.9999991547066696, iteration: 21220
loss: 1.0985066890716553,grad_norm: 0.9999994914278776, iteration: 21221
loss: 1.0174756050109863,grad_norm: 0.9999992505503684, iteration: 21222
loss: 1.024391531944275,grad_norm: 0.9999990721884217, iteration: 21223
loss: 1.0730723142623901,grad_norm: 0.9999996720222576, iteration: 21224
loss: 1.0254714488983154,grad_norm: 0.9045857629889168, iteration: 21225
loss: 1.0185940265655518,grad_norm: 0.999999248865027, iteration: 21226
loss: 1.0168886184692383,grad_norm: 0.9999991564173283, iteration: 21227
loss: 1.0733033418655396,grad_norm: 0.9999997928281018, iteration: 21228
loss: 0.9949273467063904,grad_norm: 0.9999994136882198, iteration: 21229
loss: 1.0062979459762573,grad_norm: 0.999999059976764, iteration: 21230
loss: 1.0068647861480713,grad_norm: 0.9999991052192642, iteration: 21231
loss: 1.0499992370605469,grad_norm: 0.8754323316907378, iteration: 21232
loss: 1.0441533327102661,grad_norm: 0.8253917878599808, iteration: 21233
loss: 1.030121088027954,grad_norm: 0.9999992617640491, iteration: 21234
loss: 1.0646406412124634,grad_norm: 0.999999904565296, iteration: 21235
loss: 1.0230714082717896,grad_norm: 0.9999996273345817, iteration: 21236
loss: 0.9877411723136902,grad_norm: 0.9999990897270701, iteration: 21237
loss: 1.0096453428268433,grad_norm: 0.9999993397164492, iteration: 21238
loss: 1.044891119003296,grad_norm: 0.9999993001969206, iteration: 21239
loss: 0.9955228567123413,grad_norm: 0.9568179370471221, iteration: 21240
loss: 0.980295717716217,grad_norm: 0.8934249441538052, iteration: 21241
loss: 1.025922179222107,grad_norm: 0.8650468710301586, iteration: 21242
loss: 0.981580376625061,grad_norm: 0.9999990444093174, iteration: 21243
loss: 1.0077494382858276,grad_norm: 0.9999990532440046, iteration: 21244
loss: 1.0525081157684326,grad_norm: 0.999999248679614, iteration: 21245
loss: 0.9790166020393372,grad_norm: 0.9999993038954517, iteration: 21246
loss: 1.0440123081207275,grad_norm: 0.9999994890371254, iteration: 21247
loss: 1.0476750135421753,grad_norm: 0.999999054241795, iteration: 21248
loss: 1.0239416360855103,grad_norm: 0.9999991568112321, iteration: 21249
loss: 0.9938855767250061,grad_norm: 0.9999995160761203, iteration: 21250
loss: 1.013154149055481,grad_norm: 0.9999991307959265, iteration: 21251
loss: 0.9638543128967285,grad_norm: 0.9999991024495118, iteration: 21252
loss: 1.0046817064285278,grad_norm: 0.9773572719802253, iteration: 21253
loss: 1.025091290473938,grad_norm: 0.9999992439458864, iteration: 21254
loss: 0.9588403105735779,grad_norm: 0.9741202169621457, iteration: 21255
loss: 1.0514531135559082,grad_norm: 0.8295468024264409, iteration: 21256
loss: 0.9870632886886597,grad_norm: 0.999999223860753, iteration: 21257
loss: 1.0107998847961426,grad_norm: 0.9999991736879128, iteration: 21258
loss: 1.0016098022460938,grad_norm: 0.9128624409067819, iteration: 21259
loss: 1.0388535261154175,grad_norm: 0.9999993515079121, iteration: 21260
loss: 1.0071437358856201,grad_norm: 0.992218593048948, iteration: 21261
loss: 1.011752963066101,grad_norm: 0.9915513135490317, iteration: 21262
loss: 1.0258903503417969,grad_norm: 0.9802497935060644, iteration: 21263
loss: 1.0010830163955688,grad_norm: 0.9999996640971236, iteration: 21264
loss: 0.9918831586837769,grad_norm: 0.9999989630211428, iteration: 21265
loss: 1.0830894708633423,grad_norm: 0.9999996342838184, iteration: 21266
loss: 1.0078442096710205,grad_norm: 0.9999990272538485, iteration: 21267
loss: 1.0404478311538696,grad_norm: 0.991268517481254, iteration: 21268
loss: 1.0468889474868774,grad_norm: 0.9999992803144331, iteration: 21269
loss: 1.0077532529830933,grad_norm: 0.9999990027976199, iteration: 21270
loss: 0.9998733401298523,grad_norm: 0.999999167432302, iteration: 21271
loss: 1.0169885158538818,grad_norm: 0.9999991589872823, iteration: 21272
loss: 1.0207117795944214,grad_norm: 0.9999993359355466, iteration: 21273
loss: 1.0006017684936523,grad_norm: 0.9999991812316392, iteration: 21274
loss: 1.026686191558838,grad_norm: 0.9999991835980774, iteration: 21275
loss: 1.0269598960876465,grad_norm: 0.9999992164704405, iteration: 21276
loss: 1.0319234132766724,grad_norm: 0.9999991742708956, iteration: 21277
loss: 0.9993791580200195,grad_norm: 0.9397747319989748, iteration: 21278
loss: 1.0155200958251953,grad_norm: 0.9487801239773566, iteration: 21279
loss: 0.997945249080658,grad_norm: 0.9344372686391802, iteration: 21280
loss: 1.0340392589569092,grad_norm: 0.9999997138217525, iteration: 21281
loss: 1.060332179069519,grad_norm: 0.9999992377705198, iteration: 21282
loss: 0.9875714182853699,grad_norm: 0.9999991713558767, iteration: 21283
loss: 1.0214658975601196,grad_norm: 0.9999991902728353, iteration: 21284
loss: 1.009976863861084,grad_norm: 0.999999253822393, iteration: 21285
loss: 1.0327397584915161,grad_norm: 0.9999992949477962, iteration: 21286
loss: 0.9822597503662109,grad_norm: 0.9724042169024776, iteration: 21287
loss: 0.9737179279327393,grad_norm: 0.8948279454144927, iteration: 21288
loss: 1.032160758972168,grad_norm: 0.9456839458706681, iteration: 21289
loss: 0.9859796762466431,grad_norm: 0.8622382376083081, iteration: 21290
loss: 1.001611351966858,grad_norm: 0.9999990022337981, iteration: 21291
loss: 0.9942177534103394,grad_norm: 0.9155522126465598, iteration: 21292
loss: 1.0135226249694824,grad_norm: 0.99999921304486, iteration: 21293
loss: 1.000423550605774,grad_norm: 0.9999991008648601, iteration: 21294
loss: 1.064255714416504,grad_norm: 0.9751072981807091, iteration: 21295
loss: 1.0001276731491089,grad_norm: 0.9999991342331376, iteration: 21296
loss: 1.012382984161377,grad_norm: 0.9999991185137184, iteration: 21297
loss: 1.0204291343688965,grad_norm: 0.9519529311241315, iteration: 21298
loss: 1.0318248271942139,grad_norm: 0.992544437929541, iteration: 21299
loss: 1.043182611465454,grad_norm: 0.99999923392555, iteration: 21300
loss: 1.0286543369293213,grad_norm: 0.9999990495937583, iteration: 21301
loss: 1.056780219078064,grad_norm: 0.9999991227823342, iteration: 21302
loss: 0.9891671538352966,grad_norm: 0.9999990800997298, iteration: 21303
loss: 1.0129075050354004,grad_norm: 0.9999991249349895, iteration: 21304
loss: 1.0108563899993896,grad_norm: 0.999999275592978, iteration: 21305
loss: 1.0077672004699707,grad_norm: 0.9999990678699355, iteration: 21306
loss: 1.0156620740890503,grad_norm: 0.9999990786194913, iteration: 21307
loss: 1.0036156177520752,grad_norm: 0.999999147750558, iteration: 21308
loss: 1.0427203178405762,grad_norm: 0.9144806638389523, iteration: 21309
loss: 0.9993583559989929,grad_norm: 0.9999990822559287, iteration: 21310
loss: 1.0363637208938599,grad_norm: 0.9228052734932611, iteration: 21311
loss: 1.0156562328338623,grad_norm: 0.9999990814197574, iteration: 21312
loss: 1.0570639371871948,grad_norm: 0.9999994160106831, iteration: 21313
loss: 1.0427069664001465,grad_norm: 0.9549091621904378, iteration: 21314
loss: 1.0356385707855225,grad_norm: 0.9999991498445516, iteration: 21315
loss: 0.9979829788208008,grad_norm: 0.9736642046977707, iteration: 21316
loss: 0.97713303565979,grad_norm: 0.9999992315848767, iteration: 21317
loss: 1.0315717458724976,grad_norm: 0.9999991141250014, iteration: 21318
loss: 1.0092966556549072,grad_norm: 0.9999990972410029, iteration: 21319
loss: 1.0108004808425903,grad_norm: 0.9999992325707605, iteration: 21320
loss: 1.0068377256393433,grad_norm: 0.8664169048798597, iteration: 21321
loss: 1.0051206350326538,grad_norm: 0.999999150689252, iteration: 21322
loss: 0.9976632595062256,grad_norm: 0.9999992515271184, iteration: 21323
loss: 1.0461148023605347,grad_norm: 0.9366443774433076, iteration: 21324
loss: 1.0125197172164917,grad_norm: 0.9999989455883141, iteration: 21325
loss: 1.0368258953094482,grad_norm: 0.9999994785105573, iteration: 21326
loss: 0.9813367128372192,grad_norm: 0.9999991991417927, iteration: 21327
loss: 1.0130376815795898,grad_norm: 0.9999990796129875, iteration: 21328
loss: 0.9971956610679626,grad_norm: 0.999999137897805, iteration: 21329
loss: 1.034290075302124,grad_norm: 0.9999994079481515, iteration: 21330
loss: 1.0136370658874512,grad_norm: 0.999999093570318, iteration: 21331
loss: 1.00386381149292,grad_norm: 0.9999990529927588, iteration: 21332
loss: 1.0102492570877075,grad_norm: 0.9999990087937072, iteration: 21333
loss: 1.007925271987915,grad_norm: 0.999999133626266, iteration: 21334
loss: 1.0291693210601807,grad_norm: 0.9999990348463652, iteration: 21335
loss: 1.0209827423095703,grad_norm: 0.9999992352086977, iteration: 21336
loss: 0.9854654669761658,grad_norm: 0.8637516701421165, iteration: 21337
loss: 1.024521827697754,grad_norm: 0.9999990967447645, iteration: 21338
loss: 1.0046385526657104,grad_norm: 0.9641336768247794, iteration: 21339
loss: 1.0363624095916748,grad_norm: 0.997192000550708, iteration: 21340
loss: 1.012721061706543,grad_norm: 0.9999989977625935, iteration: 21341
loss: 1.031607747077942,grad_norm: 0.9999991373220963, iteration: 21342
loss: 1.016064167022705,grad_norm: 0.9306931196980315, iteration: 21343
loss: 0.9864398241043091,grad_norm: 0.9999990902288332, iteration: 21344
loss: 0.9863659143447876,grad_norm: 0.9999990306685197, iteration: 21345
loss: 1.0094186067581177,grad_norm: 0.9156196361523722, iteration: 21346
loss: 1.043466567993164,grad_norm: 0.9999992172323933, iteration: 21347
loss: 0.9959356784820557,grad_norm: 0.9999991722194339, iteration: 21348
loss: 0.981647253036499,grad_norm: 0.999999177830517, iteration: 21349
loss: 1.0005704164505005,grad_norm: 0.999999042008153, iteration: 21350
loss: 1.012689471244812,grad_norm: 0.9978647924323931, iteration: 21351
loss: 0.9906948208808899,grad_norm: 0.9999991390187258, iteration: 21352
loss: 0.9767791628837585,grad_norm: 0.9999991713710638, iteration: 21353
loss: 0.9950322508811951,grad_norm: 0.9999992038149954, iteration: 21354
loss: 1.015782356262207,grad_norm: 0.9999992209815995, iteration: 21355
loss: 1.0240038633346558,grad_norm: 0.9550729602382423, iteration: 21356
loss: 1.0300028324127197,grad_norm: 0.9999989979601098, iteration: 21357
loss: 1.0009586811065674,grad_norm: 0.9999991252424059, iteration: 21358
loss: 0.9986278414726257,grad_norm: 0.9999991512140877, iteration: 21359
loss: 1.0000405311584473,grad_norm: 0.9984495576044063, iteration: 21360
loss: 1.0069392919540405,grad_norm: 0.9999991584266432, iteration: 21361
loss: 1.0109730958938599,grad_norm: 0.9999992263525818, iteration: 21362
loss: 0.989561140537262,grad_norm: 0.9367934216030208, iteration: 21363
loss: 1.0494458675384521,grad_norm: 0.9999993804334469, iteration: 21364
loss: 1.0443519353866577,grad_norm: 0.9999993519370226, iteration: 21365
loss: 1.0116441249847412,grad_norm: 0.9999991918199697, iteration: 21366
loss: 1.0288265943527222,grad_norm: 0.9999992271088073, iteration: 21367
loss: 0.9797406792640686,grad_norm: 0.934169963351311, iteration: 21368
loss: 1.0248111486434937,grad_norm: 0.9999990226500621, iteration: 21369
loss: 1.0195374488830566,grad_norm: 0.9999990425140094, iteration: 21370
loss: 0.9559495449066162,grad_norm: 0.9999989981614735, iteration: 21371
loss: 0.9711740612983704,grad_norm: 0.9999991186600395, iteration: 21372
loss: 1.0203856229782104,grad_norm: 0.9999993938871399, iteration: 21373
loss: 0.9687135219573975,grad_norm: 0.9767216208270217, iteration: 21374
loss: 1.0159627199172974,grad_norm: 0.9999992049777598, iteration: 21375
loss: 1.0298465490341187,grad_norm: 0.9842512880783173, iteration: 21376
loss: 1.0216639041900635,grad_norm: 0.9999991840836062, iteration: 21377
loss: 0.9458198547363281,grad_norm: 0.9999991341223183, iteration: 21378
loss: 1.0526916980743408,grad_norm: 0.9999991392300674, iteration: 21379
loss: 1.0212867259979248,grad_norm: 0.9456761313245106, iteration: 21380
loss: 1.001548171043396,grad_norm: 0.9999991253750169, iteration: 21381
loss: 0.9936700463294983,grad_norm: 0.9994888782798084, iteration: 21382
loss: 1.0168894529342651,grad_norm: 0.9549815058213029, iteration: 21383
loss: 1.02007257938385,grad_norm: 0.9999990928030024, iteration: 21384
loss: 1.0278419256210327,grad_norm: 0.9999991483396625, iteration: 21385
loss: 0.9433543086051941,grad_norm: 0.9436439214691866, iteration: 21386
loss: 1.0330818891525269,grad_norm: 0.9999990170224645, iteration: 21387
loss: 1.038365125656128,grad_norm: 0.9999990335753688, iteration: 21388
loss: 1.0704089403152466,grad_norm: 0.9999997601182029, iteration: 21389
loss: 1.009743094444275,grad_norm: 0.9999990556743694, iteration: 21390
loss: 0.9782097935676575,grad_norm: 0.9999991048706551, iteration: 21391
loss: 0.9951847195625305,grad_norm: 0.9999990960869831, iteration: 21392
loss: 0.960808515548706,grad_norm: 0.9999992243602251, iteration: 21393
loss: 0.9957354068756104,grad_norm: 0.9999990606248301, iteration: 21394
loss: 1.0112173557281494,grad_norm: 0.8278837020657109, iteration: 21395
loss: 1.0112600326538086,grad_norm: 0.9999991264677859, iteration: 21396
loss: 1.076828956604004,grad_norm: 0.999999191945665, iteration: 21397
loss: 1.0148826837539673,grad_norm: 0.9999991643315216, iteration: 21398
loss: 1.0010920763015747,grad_norm: 0.976133099914808, iteration: 21399
loss: 0.9872224926948547,grad_norm: 0.9999990514660996, iteration: 21400
loss: 1.058117151260376,grad_norm: 0.9999991902379378, iteration: 21401
loss: 0.9720224738121033,grad_norm: 0.9630165795928349, iteration: 21402
loss: 1.0174827575683594,grad_norm: 0.9999991648862703, iteration: 21403
loss: 1.0347322225570679,grad_norm: 0.9999991703775002, iteration: 21404
loss: 1.0414890050888062,grad_norm: 0.9999993355608742, iteration: 21405
loss: 0.9987979531288147,grad_norm: 0.9999989993474608, iteration: 21406
loss: 1.0118193626403809,grad_norm: 0.9999990457859648, iteration: 21407
loss: 1.0671085119247437,grad_norm: 0.9999998885760832, iteration: 21408
loss: 0.9866611361503601,grad_norm: 0.9999989991209632, iteration: 21409
loss: 1.042096495628357,grad_norm: 0.999999181297553, iteration: 21410
loss: 1.0234355926513672,grad_norm: 0.9999993479616985, iteration: 21411
loss: 0.997394323348999,grad_norm: 0.7792573458070491, iteration: 21412
loss: 1.0541373491287231,grad_norm: 0.9999998056686175, iteration: 21413
loss: 1.010393500328064,grad_norm: 0.999999195454933, iteration: 21414
loss: 1.0193246603012085,grad_norm: 0.999999131111977, iteration: 21415
loss: 1.0277831554412842,grad_norm: 0.9999991952980792, iteration: 21416
loss: 1.015732765197754,grad_norm: 0.9463536380010587, iteration: 21417
loss: 1.0311914682388306,grad_norm: 0.999999076884902, iteration: 21418
loss: 0.9531408548355103,grad_norm: 0.9999991442367928, iteration: 21419
loss: 0.9978122711181641,grad_norm: 0.8599396051147773, iteration: 21420
loss: 1.01863694190979,grad_norm: 0.9374050550975571, iteration: 21421
loss: 1.0299843549728394,grad_norm: 0.9726774235125147, iteration: 21422
loss: 1.0310559272766113,grad_norm: 0.9999994858469758, iteration: 21423
loss: 0.9746909737586975,grad_norm: 0.9999991309480926, iteration: 21424
loss: 1.0134985446929932,grad_norm: 0.9999991808827094, iteration: 21425
loss: 0.9807611107826233,grad_norm: 0.9999991290578699, iteration: 21426
loss: 1.0069787502288818,grad_norm: 0.9999990779690447, iteration: 21427
loss: 0.9919162392616272,grad_norm: 0.9999990916524223, iteration: 21428
loss: 0.9900600910186768,grad_norm: 0.9125334839496498, iteration: 21429
loss: 1.0083788633346558,grad_norm: 0.9999991700084561, iteration: 21430
loss: 1.0176506042480469,grad_norm: 0.8925638662109106, iteration: 21431
loss: 0.9954395294189453,grad_norm: 0.9999990618576622, iteration: 21432
loss: 1.028955101966858,grad_norm: 0.9487286712236487, iteration: 21433
loss: 1.000179648399353,grad_norm: 0.9999990970388015, iteration: 21434
loss: 1.0646010637283325,grad_norm: 0.9548128654125594, iteration: 21435
loss: 0.9636659622192383,grad_norm: 0.9999991324373656, iteration: 21436
loss: 1.0032267570495605,grad_norm: 0.9999989543967903, iteration: 21437
loss: 1.0416852235794067,grad_norm: 0.9999992050806921, iteration: 21438
loss: 1.0329570770263672,grad_norm: 0.9750006033595879, iteration: 21439
loss: 1.0316481590270996,grad_norm: 0.9442232255870706, iteration: 21440
loss: 1.0336456298828125,grad_norm: 0.9999992509801333, iteration: 21441
loss: 0.9944522976875305,grad_norm: 0.9999990871228465, iteration: 21442
loss: 0.9652385115623474,grad_norm: 0.9999993003264164, iteration: 21443
loss: 1.03065824508667,grad_norm: 0.999998990580864, iteration: 21444
loss: 1.0082648992538452,grad_norm: 0.9999992465719028, iteration: 21445
loss: 0.9991691708564758,grad_norm: 0.999999120676229, iteration: 21446
loss: 1.0204812288284302,grad_norm: 0.9847526574831019, iteration: 21447
loss: 0.956598162651062,grad_norm: 0.8445256215944656, iteration: 21448
loss: 1.0100666284561157,grad_norm: 0.9999992789024649, iteration: 21449
loss: 1.0335749387741089,grad_norm: 0.9999989998992014, iteration: 21450
loss: 0.9699577689170837,grad_norm: 0.8845294979514305, iteration: 21451
loss: 0.9602563977241516,grad_norm: 0.9015082271203432, iteration: 21452
loss: 1.0073751211166382,grad_norm: 0.9999991090302632, iteration: 21453
loss: 0.981783390045166,grad_norm: 0.9754855429441034, iteration: 21454
loss: 1.0339651107788086,grad_norm: 0.999999175626147, iteration: 21455
loss: 1.025580883026123,grad_norm: 0.9999991509543638, iteration: 21456
loss: 1.0157735347747803,grad_norm: 0.9832781410434742, iteration: 21457
loss: 0.9962418079376221,grad_norm: 0.9999991496529391, iteration: 21458
loss: 0.9669917821884155,grad_norm: 0.9999992292462899, iteration: 21459
loss: 1.013660192489624,grad_norm: 0.9999992108958458, iteration: 21460
loss: 1.0431876182556152,grad_norm: 0.9999991200158466, iteration: 21461
loss: 1.040357232093811,grad_norm: 0.9999989566137313, iteration: 21462
loss: 0.997094988822937,grad_norm: 0.9999996978308917, iteration: 21463
loss: 0.9940392971038818,grad_norm: 0.9999995236735878, iteration: 21464
loss: 0.985791027545929,grad_norm: 0.9978001191664939, iteration: 21465
loss: 1.0105180740356445,grad_norm: 0.9999992041867596, iteration: 21466
loss: 0.9840810298919678,grad_norm: 0.9920945224704019, iteration: 21467
loss: 0.9805812239646912,grad_norm: 0.9999990915894885, iteration: 21468
loss: 1.0249825716018677,grad_norm: 0.918284498917584, iteration: 21469
loss: 1.022078275680542,grad_norm: 0.9999990668044338, iteration: 21470
loss: 0.9824737310409546,grad_norm: 0.9242581894377732, iteration: 21471
loss: 1.0017797946929932,grad_norm: 0.9741049375507635, iteration: 21472
loss: 1.0486900806427002,grad_norm: 0.9999995059703055, iteration: 21473
loss: 1.0740541219711304,grad_norm: 0.9999993374150511, iteration: 21474
loss: 0.9685913324356079,grad_norm: 0.9999990307999199, iteration: 21475
loss: 1.0513641834259033,grad_norm: 0.9999992019148218, iteration: 21476
loss: 1.0134572982788086,grad_norm: 0.9999991409038145, iteration: 21477
loss: 1.0055454969406128,grad_norm: 0.9045175555307924, iteration: 21478
loss: 0.9695709347724915,grad_norm: 0.9999991002256373, iteration: 21479
loss: 1.0130643844604492,grad_norm: 0.9999992236668532, iteration: 21480
loss: 0.9866201877593994,grad_norm: 0.8009127841375244, iteration: 21481
loss: 1.026397943496704,grad_norm: 0.9999990896604263, iteration: 21482
loss: 1.0124574899673462,grad_norm: 0.9013652141629661, iteration: 21483
loss: 0.9882773160934448,grad_norm: 0.9658346451748854, iteration: 21484
loss: 0.9940089583396912,grad_norm: 0.9999995808362614, iteration: 21485
loss: 1.0175261497497559,grad_norm: 0.9999992118893923, iteration: 21486
loss: 1.0282492637634277,grad_norm: 0.9438797579397834, iteration: 21487
loss: 0.9923768639564514,grad_norm: 0.9023646767477855, iteration: 21488
loss: 0.9780128002166748,grad_norm: 0.9934737868292445, iteration: 21489
loss: 1.0405381917953491,grad_norm: 0.999999120860791, iteration: 21490
loss: 1.0263093709945679,grad_norm: 0.999999327413116, iteration: 21491
loss: 1.002912163734436,grad_norm: 0.8928411491156526, iteration: 21492
loss: 1.0207947492599487,grad_norm: 0.9273187287643656, iteration: 21493
loss: 1.0221059322357178,grad_norm: 0.9999991470923155, iteration: 21494
loss: 0.9841920137405396,grad_norm: 0.9999990709357786, iteration: 21495
loss: 0.9944050312042236,grad_norm: 0.9652680832415833, iteration: 21496
loss: 1.0365254878997803,grad_norm: 0.9817787346572957, iteration: 21497
loss: 1.0536558628082275,grad_norm: 0.9999991787432279, iteration: 21498
loss: 1.0118699073791504,grad_norm: 0.9521412596195308, iteration: 21499
loss: 1.0084232091903687,grad_norm: 0.9999990782215394, iteration: 21500
loss: 0.9958198070526123,grad_norm: 0.8751290650185006, iteration: 21501
loss: 1.0245367288589478,grad_norm: 0.9999990913541629, iteration: 21502
loss: 0.9993801712989807,grad_norm: 0.8790601803610297, iteration: 21503
loss: 1.0235882997512817,grad_norm: 0.8904717778512097, iteration: 21504
loss: 1.0278174877166748,grad_norm: 0.9999991580542932, iteration: 21505
loss: 0.9948013424873352,grad_norm: 0.9999991197050624, iteration: 21506
loss: 0.992750883102417,grad_norm: 0.9999990590569658, iteration: 21507
loss: 1.0195119380950928,grad_norm: 0.9269387014049277, iteration: 21508
loss: 1.0022790431976318,grad_norm: 0.9999992579474011, iteration: 21509
loss: 0.9829716086387634,grad_norm: 0.9444001596194387, iteration: 21510
loss: 1.0217387676239014,grad_norm: 0.9974981263916943, iteration: 21511
loss: 1.0324972867965698,grad_norm: 0.9999990570855674, iteration: 21512
loss: 1.0540364980697632,grad_norm: 0.9999995070307156, iteration: 21513
loss: 0.9965006113052368,grad_norm: 0.9716364975891034, iteration: 21514
loss: 1.0437027215957642,grad_norm: 0.9999991080376412, iteration: 21515
loss: 1.0022196769714355,grad_norm: 0.9406889102410978, iteration: 21516
loss: 0.9746114611625671,grad_norm: 0.9999992469868927, iteration: 21517
loss: 0.9806908965110779,grad_norm: 0.9999992673851784, iteration: 21518
loss: 1.0498859882354736,grad_norm: 0.9999990231220145, iteration: 21519
loss: 1.0018080472946167,grad_norm: 0.9999992602775637, iteration: 21520
loss: 1.0005079507827759,grad_norm: 0.9999991875091669, iteration: 21521
loss: 0.9885755181312561,grad_norm: 0.9404584003679333, iteration: 21522
loss: 1.0037388801574707,grad_norm: 0.923122934251484, iteration: 21523
loss: 0.9972158074378967,grad_norm: 0.9999990931832831, iteration: 21524
loss: 1.0209959745407104,grad_norm: 0.9999992140377192, iteration: 21525
loss: 0.9901707768440247,grad_norm: 0.9721923001530949, iteration: 21526
loss: 0.9979889988899231,grad_norm: 0.9999990011918292, iteration: 21527
loss: 0.9938309788703918,grad_norm: 0.9999995835226929, iteration: 21528
loss: 1.043260097503662,grad_norm: 0.9999990551032929, iteration: 21529
loss: 1.0409139394760132,grad_norm: 0.9999992601054889, iteration: 21530
loss: 1.0334490537643433,grad_norm: 0.9495546584629777, iteration: 21531
loss: 1.0055919885635376,grad_norm: 0.9999989743083055, iteration: 21532
loss: 0.9960657358169556,grad_norm: 0.9999993184866961, iteration: 21533
loss: 0.9994479417800903,grad_norm: 0.9999991139042116, iteration: 21534
loss: 1.0503591299057007,grad_norm: 0.9662899763117405, iteration: 21535
loss: 1.0347367525100708,grad_norm: 0.988635923809262, iteration: 21536
loss: 0.9575396180152893,grad_norm: 0.9233974429003625, iteration: 21537
loss: 0.9798754453659058,grad_norm: 0.9927506480939947, iteration: 21538
loss: 1.0256669521331787,grad_norm: 0.8871961661048262, iteration: 21539
loss: 1.0285770893096924,grad_norm: 0.9999990901775717, iteration: 21540
loss: 1.031780481338501,grad_norm: 0.9999991888610412, iteration: 21541
loss: 1.033937692642212,grad_norm: 0.9550846414805991, iteration: 21542
loss: 0.9806467294692993,grad_norm: 0.9999990919488079, iteration: 21543
loss: 0.9969275593757629,grad_norm: 0.9599633337246531, iteration: 21544
loss: 1.025394082069397,grad_norm: 0.9999991163845469, iteration: 21545
loss: 1.031333088874817,grad_norm: 0.9999996100223181, iteration: 21546
loss: 1.0202405452728271,grad_norm: 0.999998981918364, iteration: 21547
loss: 1.002869725227356,grad_norm: 0.9795939537736209, iteration: 21548
loss: 1.0162526369094849,grad_norm: 0.9629685515938637, iteration: 21549
loss: 1.0068715810775757,grad_norm: 0.8831881623865612, iteration: 21550
loss: 1.0456106662750244,grad_norm: 0.9999996119893908, iteration: 21551
loss: 1.022995114326477,grad_norm: 0.9999991668060662, iteration: 21552
loss: 1.0126304626464844,grad_norm: 0.9427117761007681, iteration: 21553
loss: 1.022899866104126,grad_norm: 0.9999997526185379, iteration: 21554
loss: 0.9884241819381714,grad_norm: 0.9999993134230637, iteration: 21555
loss: 0.9868385195732117,grad_norm: 0.9999992393540325, iteration: 21556
loss: 1.055765151977539,grad_norm: 0.9999990839827282, iteration: 21557
loss: 1.0583704710006714,grad_norm: 0.9999995124156574, iteration: 21558
loss: 0.9774131774902344,grad_norm: 0.9999990187251416, iteration: 21559
loss: 0.9921468496322632,grad_norm: 0.9999990890282939, iteration: 21560
loss: 1.002211570739746,grad_norm: 0.8953632108820205, iteration: 21561
loss: 1.003695011138916,grad_norm: 0.9112000914186728, iteration: 21562
loss: 0.9965128898620605,grad_norm: 0.883109585419431, iteration: 21563
loss: 1.0040061473846436,grad_norm: 0.99999911656357, iteration: 21564
loss: 1.023422360420227,grad_norm: 0.9999990664569892, iteration: 21565
loss: 1.0342999696731567,grad_norm: 0.8232424442949493, iteration: 21566
loss: 0.989849865436554,grad_norm: 0.9999991677276143, iteration: 21567
loss: 1.0255687236785889,grad_norm: 0.9999992625808711, iteration: 21568
loss: 1.0046122074127197,grad_norm: 0.9999990624268046, iteration: 21569
loss: 1.0259506702423096,grad_norm: 0.9999992873868774, iteration: 21570
loss: 1.0829558372497559,grad_norm: 0.999999526251258, iteration: 21571
loss: 1.0337849855422974,grad_norm: 0.9999994432378719, iteration: 21572
loss: 1.0413284301757812,grad_norm: 0.9999990677781578, iteration: 21573
loss: 0.9867681264877319,grad_norm: 0.9999992503369411, iteration: 21574
loss: 0.9977986812591553,grad_norm: 0.9036523031969028, iteration: 21575
loss: 1.0091944932937622,grad_norm: 0.985497696039873, iteration: 21576
loss: 1.0468189716339111,grad_norm: 0.9999990836335177, iteration: 21577
loss: 0.985373854637146,grad_norm: 0.9116884816617602, iteration: 21578
loss: 1.0372247695922852,grad_norm: 0.9999990638527019, iteration: 21579
loss: 1.0432648658752441,grad_norm: 0.9999990881485217, iteration: 21580
loss: 1.007757544517517,grad_norm: 0.9860418306597469, iteration: 21581
loss: 1.0061320066452026,grad_norm: 0.9999991603649657, iteration: 21582
loss: 1.0302255153656006,grad_norm: 0.9915946488559204, iteration: 21583
loss: 1.0276631116867065,grad_norm: 0.9999991371617777, iteration: 21584
loss: 1.0087422132492065,grad_norm: 0.9920821590222605, iteration: 21585
loss: 0.9813005328178406,grad_norm: 0.9933987327181162, iteration: 21586
loss: 1.052870750427246,grad_norm: 0.9999994832679154, iteration: 21587
loss: 1.0495760440826416,grad_norm: 0.9999995302948459, iteration: 21588
loss: 1.0258021354675293,grad_norm: 0.9999991537865184, iteration: 21589
loss: 1.022325038909912,grad_norm: 0.9999995848113884, iteration: 21590
loss: 1.005817174911499,grad_norm: 0.9999992499448371, iteration: 21591
loss: 1.0126045942306519,grad_norm: 0.9340244974663937, iteration: 21592
loss: 1.0083034038543701,grad_norm: 0.9999996770314263, iteration: 21593
loss: 1.0551682710647583,grad_norm: 0.9999991539424298, iteration: 21594
loss: 1.0440890789031982,grad_norm: 0.9999991747189522, iteration: 21595
loss: 1.0500606298446655,grad_norm: 0.999999453708221, iteration: 21596
loss: 1.028066635131836,grad_norm: 0.9999994778014667, iteration: 21597
loss: 1.0105891227722168,grad_norm: 0.9999990719617028, iteration: 21598
loss: 1.0180240869522095,grad_norm: 0.9999992125334238, iteration: 21599
loss: 0.9786016941070557,grad_norm: 0.8149752434805886, iteration: 21600
loss: 1.0179826021194458,grad_norm: 0.9999991769819004, iteration: 21601
loss: 1.0222632884979248,grad_norm: 0.9006973491281649, iteration: 21602
loss: 1.0417035818099976,grad_norm: 0.8677834313514331, iteration: 21603
loss: 1.032923936843872,grad_norm: 0.9999991168377389, iteration: 21604
loss: 1.0617470741271973,grad_norm: 0.9999991630696677, iteration: 21605
loss: 1.0476678609848022,grad_norm: 0.9999991162636335, iteration: 21606
loss: 1.0335863828659058,grad_norm: 0.9364628078318384, iteration: 21607
loss: 0.9847975969314575,grad_norm: 0.9999990222216544, iteration: 21608
loss: 1.0354810953140259,grad_norm: 0.9169364519364174, iteration: 21609
loss: 1.0478028059005737,grad_norm: 0.9999991347380608, iteration: 21610
loss: 1.0328139066696167,grad_norm: 0.9999999452707855, iteration: 21611
loss: 0.9932588934898376,grad_norm: 0.999999172453012, iteration: 21612
loss: 1.0218626260757446,grad_norm: 0.9999990868945848, iteration: 21613
loss: 1.0070785284042358,grad_norm: 0.9999991399991764, iteration: 21614
loss: 0.9862421751022339,grad_norm: 0.9999992016870287, iteration: 21615
loss: 1.0304417610168457,grad_norm: 0.9999991686938157, iteration: 21616
loss: 1.0633292198181152,grad_norm: 0.9999997860051767, iteration: 21617
loss: 1.0553663969039917,grad_norm: 0.9999991037595731, iteration: 21618
loss: 1.0456278324127197,grad_norm: 0.9999991469874344, iteration: 21619
loss: 1.0546497106552124,grad_norm: 0.8886013022217526, iteration: 21620
loss: 1.0371403694152832,grad_norm: 0.9568620392109699, iteration: 21621
loss: 1.027058482170105,grad_norm: 0.999999498658485, iteration: 21622
loss: 1.0054519176483154,grad_norm: 0.9300365783421891, iteration: 21623
loss: 1.043134331703186,grad_norm: 0.9999997065401423, iteration: 21624
loss: 0.9506744742393494,grad_norm: 0.9999989753055688, iteration: 21625
loss: 1.0292291641235352,grad_norm: 0.9945698484108217, iteration: 21626
loss: 1.0016889572143555,grad_norm: 0.9999991918141931, iteration: 21627
loss: 1.0197792053222656,grad_norm: 0.9999990733932255, iteration: 21628
loss: 1.025545597076416,grad_norm: 0.9631691138068526, iteration: 21629
loss: 1.0002690553665161,grad_norm: 0.9999992088701101, iteration: 21630
loss: 0.9910601377487183,grad_norm: 0.997339782736167, iteration: 21631
loss: 1.0619480609893799,grad_norm: 0.9999995566204607, iteration: 21632
loss: 1.0279669761657715,grad_norm: 0.9999996173528165, iteration: 21633
loss: 1.049586534500122,grad_norm: 0.9999993888247671, iteration: 21634
loss: 1.0001723766326904,grad_norm: 0.9337378057721806, iteration: 21635
loss: 1.025902271270752,grad_norm: 0.9999991000051576, iteration: 21636
loss: 1.0067249536514282,grad_norm: 0.9999990882023888, iteration: 21637
loss: 1.0423295497894287,grad_norm: 0.9285169779390592, iteration: 21638
loss: 1.0005285739898682,grad_norm: 0.8578157775449552, iteration: 21639
loss: 1.022560715675354,grad_norm: 0.9999991876919169, iteration: 21640
loss: 1.0098527669906616,grad_norm: 0.9999991368819798, iteration: 21641
loss: 1.0186848640441895,grad_norm: 0.9999991997820726, iteration: 21642
loss: 0.9941352009773254,grad_norm: 0.9999992739075133, iteration: 21643
loss: 1.0688145160675049,grad_norm: 0.9999995095834393, iteration: 21644
loss: 1.0145409107208252,grad_norm: 0.9170449987610836, iteration: 21645
loss: 1.0383481979370117,grad_norm: 0.9999990424703677, iteration: 21646
loss: 0.9845556020736694,grad_norm: 0.9999995194852572, iteration: 21647
loss: 1.033527135848999,grad_norm: 0.9999991605925252, iteration: 21648
loss: 1.0301172733306885,grad_norm: 0.9999992003948025, iteration: 21649
loss: 1.0144236087799072,grad_norm: 0.9999993938112611, iteration: 21650
loss: 1.0037825107574463,grad_norm: 0.9272113890326068, iteration: 21651
loss: 1.035191297531128,grad_norm: 0.9999989744213276, iteration: 21652
loss: 1.0101373195648193,grad_norm: 0.9999991586753575, iteration: 21653
loss: 1.0445655584335327,grad_norm: 0.999999191374265, iteration: 21654
loss: 1.0435868501663208,grad_norm: 0.9999990458331833, iteration: 21655
loss: 1.0158113241195679,grad_norm: 0.9999992336761591, iteration: 21656
loss: 1.0173715353012085,grad_norm: 0.9999991887947144, iteration: 21657
loss: 0.9864891171455383,grad_norm: 0.999999127630966, iteration: 21658
loss: 0.9875782132148743,grad_norm: 0.9999991339300099, iteration: 21659
loss: 0.9925795793533325,grad_norm: 0.8966953992596177, iteration: 21660
loss: 1.0359457731246948,grad_norm: 0.9334026763730613, iteration: 21661
loss: 0.9729927778244019,grad_norm: 0.9999990655322819, iteration: 21662
loss: 0.9938346147537231,grad_norm: 0.999999270037541, iteration: 21663
loss: 0.9724281430244446,grad_norm: 0.8871346697668997, iteration: 21664
loss: 0.9793816804885864,grad_norm: 0.9999990290971552, iteration: 21665
loss: 1.0072404146194458,grad_norm: 0.9999993808227039, iteration: 21666
loss: 1.0021933317184448,grad_norm: 0.9999992589563851, iteration: 21667
loss: 1.0172382593154907,grad_norm: 0.9999991163405176, iteration: 21668
loss: 0.9840583801269531,grad_norm: 0.9999990972049093, iteration: 21669
loss: 1.00449800491333,grad_norm: 0.9999990992773373, iteration: 21670
loss: 0.9681400060653687,grad_norm: 0.8673442997664838, iteration: 21671
loss: 1.0298975706100464,grad_norm: 0.999999112146584, iteration: 21672
loss: 1.0352914333343506,grad_norm: 0.972213283882149, iteration: 21673
loss: 1.0529698133468628,grad_norm: 0.9295813569202556, iteration: 21674
loss: 1.0461235046386719,grad_norm: 0.9999993961218011, iteration: 21675
loss: 0.9779490232467651,grad_norm: 0.9999991987820638, iteration: 21676
loss: 1.0361731052398682,grad_norm: 0.8988133797158485, iteration: 21677
loss: 1.0397648811340332,grad_norm: 0.9999990666009584, iteration: 21678
loss: 1.0093350410461426,grad_norm: 0.9999992618129507, iteration: 21679
loss: 0.964267909526825,grad_norm: 0.916540297748052, iteration: 21680
loss: 0.9754132032394409,grad_norm: 0.8507505725349989, iteration: 21681
loss: 1.0314408540725708,grad_norm: 0.9999989721549584, iteration: 21682
loss: 0.9771263599395752,grad_norm: 0.9999994178681351, iteration: 21683
loss: 0.997765839099884,grad_norm: 0.999999397006314, iteration: 21684
loss: 1.0146424770355225,grad_norm: 0.999999241260908, iteration: 21685
loss: 1.0119456052780151,grad_norm: 0.9999990228030664, iteration: 21686
loss: 1.0196834802627563,grad_norm: 0.9999993092193514, iteration: 21687
loss: 1.0038241147994995,grad_norm: 0.9930531853906709, iteration: 21688
loss: 1.0082803964614868,grad_norm: 0.9869888663225564, iteration: 21689
loss: 0.9697657823562622,grad_norm: 0.999998993491872, iteration: 21690
loss: 1.0153799057006836,grad_norm: 0.9999990664948389, iteration: 21691
loss: 0.9797967672348022,grad_norm: 0.9999991522406132, iteration: 21692
loss: 1.008690357208252,grad_norm: 0.9157158699751243, iteration: 21693
loss: 1.0366952419281006,grad_norm: 0.9999991638363402, iteration: 21694
loss: 0.9908838272094727,grad_norm: 0.9999992882449135, iteration: 21695
loss: 1.030890703201294,grad_norm: 0.9999993281314642, iteration: 21696
loss: 1.0017942190170288,grad_norm: 0.951082587858798, iteration: 21697
loss: 1.0354887247085571,grad_norm: 0.999999091329114, iteration: 21698
loss: 0.9817581176757812,grad_norm: 0.9999990124981362, iteration: 21699
loss: 1.0025266408920288,grad_norm: 0.9999992859515017, iteration: 21700
loss: 0.9911935925483704,grad_norm: 0.999999148831369, iteration: 21701
loss: 0.9919677972793579,grad_norm: 0.9999991855794974, iteration: 21702
loss: 1.0040384531021118,grad_norm: 0.819323099022788, iteration: 21703
loss: 1.0275088548660278,grad_norm: 0.9999991622940074, iteration: 21704
loss: 0.9852598309516907,grad_norm: 0.999999234945608, iteration: 21705
loss: 0.9969536662101746,grad_norm: 0.9999991418400662, iteration: 21706
loss: 1.014933705329895,grad_norm: 0.9999990755059702, iteration: 21707
loss: 1.0004870891571045,grad_norm: 0.9089108985528409, iteration: 21708
loss: 1.1143996715545654,grad_norm: 0.999999705643134, iteration: 21709
loss: 1.0140575170516968,grad_norm: 0.9999991775054435, iteration: 21710
loss: 1.0227630138397217,grad_norm: 0.999999156197231, iteration: 21711
loss: 1.008009910583496,grad_norm: 0.9517698256913498, iteration: 21712
loss: 1.0081114768981934,grad_norm: 0.8919914348436694, iteration: 21713
loss: 1.0242303609848022,grad_norm: 0.9999990991958749, iteration: 21714
loss: 0.9991734027862549,grad_norm: 0.9999989528759243, iteration: 21715
loss: 0.9983835816383362,grad_norm: 0.9466677387145226, iteration: 21716
loss: 0.983755350112915,grad_norm: 0.9999990466652852, iteration: 21717
loss: 1.0193849802017212,grad_norm: 0.9999993458123431, iteration: 21718
loss: 1.0139939785003662,grad_norm: 0.999999224816593, iteration: 21719
loss: 1.0229830741882324,grad_norm: 0.9999991113165674, iteration: 21720
loss: 1.03102445602417,grad_norm: 0.9999989539287122, iteration: 21721
loss: 1.0486414432525635,grad_norm: 0.9999992525175744, iteration: 21722
loss: 1.0637253522872925,grad_norm: 0.9999993922834574, iteration: 21723
loss: 1.031884789466858,grad_norm: 0.9487028240063041, iteration: 21724
loss: 0.9530293345451355,grad_norm: 0.9999989936131356, iteration: 21725
loss: 0.991768479347229,grad_norm: 0.9999989939948727, iteration: 21726
loss: 0.9875646233558655,grad_norm: 0.9915659872001036, iteration: 21727
loss: 0.9474665522575378,grad_norm: 0.9999990866594216, iteration: 21728
loss: 1.0153707265853882,grad_norm: 0.8178970793972422, iteration: 21729
loss: 0.9946580529212952,grad_norm: 0.8801605532755123, iteration: 21730
loss: 1.0025116205215454,grad_norm: 0.9999994422588742, iteration: 21731
loss: 0.9665671586990356,grad_norm: 0.9999992899394169, iteration: 21732
loss: 1.023886799812317,grad_norm: 0.9999993551918616, iteration: 21733
loss: 1.0301434993743896,grad_norm: 0.9999990947715404, iteration: 21734
loss: 1.0170038938522339,grad_norm: 0.9999991394362506, iteration: 21735
loss: 1.023807168006897,grad_norm: 0.9999991758637565, iteration: 21736
loss: 1.0276840925216675,grad_norm: 0.9582942016874366, iteration: 21737
loss: 1.0278977155685425,grad_norm: 0.9999995646178079, iteration: 21738
loss: 1.0016387701034546,grad_norm: 0.9999993178520935, iteration: 21739
loss: 1.0172258615493774,grad_norm: 0.999999223249109, iteration: 21740
loss: 1.0201772451400757,grad_norm: 0.9999990940355173, iteration: 21741
loss: 1.0043919086456299,grad_norm: 0.9999992104146831, iteration: 21742
loss: 1.0214715003967285,grad_norm: 0.8949899500426861, iteration: 21743
loss: 1.03471040725708,grad_norm: 0.8542528715027589, iteration: 21744
loss: 1.0206321477890015,grad_norm: 0.9138824382298196, iteration: 21745
loss: 1.015760064125061,grad_norm: 0.9844333094481156, iteration: 21746
loss: 0.9918245673179626,grad_norm: 0.9999990194969718, iteration: 21747
loss: 1.0016942024230957,grad_norm: 0.9999222272964933, iteration: 21748
loss: 0.9584543704986572,grad_norm: 0.9999991631925362, iteration: 21749
loss: 1.0123422145843506,grad_norm: 0.9999990234589766, iteration: 21750
loss: 0.9947993159294128,grad_norm: 0.9999995530610364, iteration: 21751
loss: 1.0106768608093262,grad_norm: 0.9999994252979965, iteration: 21752
loss: 0.9719914197921753,grad_norm: 0.9999989671025796, iteration: 21753
loss: 1.026859998703003,grad_norm: 0.9999993049838178, iteration: 21754
loss: 1.051087498664856,grad_norm: 0.9999993077387819, iteration: 21755
loss: 0.9853200912475586,grad_norm: 0.9999992272923734, iteration: 21756
loss: 0.9711081981658936,grad_norm: 0.9161252766139929, iteration: 21757
loss: 1.0506482124328613,grad_norm: 0.9068340227599173, iteration: 21758
loss: 0.9972561001777649,grad_norm: 0.8828377791316802, iteration: 21759
loss: 0.9903242588043213,grad_norm: 0.9999991303004377, iteration: 21760
loss: 0.9879109263420105,grad_norm: 0.9999992555204776, iteration: 21761
loss: 1.0580354928970337,grad_norm: 0.9999992992522416, iteration: 21762
loss: 1.0203666687011719,grad_norm: 0.999999109884268, iteration: 21763
loss: 0.9897755980491638,grad_norm: 0.9999990406492766, iteration: 21764
loss: 0.9457594752311707,grad_norm: 0.9566846926467244, iteration: 21765
loss: 1.0020263195037842,grad_norm: 0.9999993346724512, iteration: 21766
loss: 0.9877273440361023,grad_norm: 0.9999992761911807, iteration: 21767
loss: 1.0259675979614258,grad_norm: 0.9517581980985902, iteration: 21768
loss: 0.9811428785324097,grad_norm: 0.9999991068870806, iteration: 21769
loss: 1.0673258304595947,grad_norm: 0.9999992273034249, iteration: 21770
loss: 1.0034595727920532,grad_norm: 0.9999990116974413, iteration: 21771
loss: 1.0763518810272217,grad_norm: 0.9999990811839783, iteration: 21772
loss: 1.004883885383606,grad_norm: 0.9979583199560352, iteration: 21773
loss: 1.012915849685669,grad_norm: 0.9999992724917734, iteration: 21774
loss: 0.9673744440078735,grad_norm: 0.9999990386528511, iteration: 21775
loss: 0.9896723628044128,grad_norm: 0.9410881558703276, iteration: 21776
loss: 1.0619854927062988,grad_norm: 0.9999990890729911, iteration: 21777
loss: 1.0067923069000244,grad_norm: 0.8762747547937945, iteration: 21778
loss: 1.0450814962387085,grad_norm: 0.9999992445911595, iteration: 21779
loss: 1.0637503862380981,grad_norm: 0.9999990233053426, iteration: 21780
loss: 0.983872652053833,grad_norm: 0.9313145232752491, iteration: 21781
loss: 1.0219568014144897,grad_norm: 0.9999990280463333, iteration: 21782
loss: 1.0291870832443237,grad_norm: 0.9999992742732132, iteration: 21783
loss: 1.0155657529830933,grad_norm: 0.9638192931948516, iteration: 21784
loss: 1.001554250717163,grad_norm: 0.9999990787134709, iteration: 21785
loss: 1.019802212715149,grad_norm: 0.8962654345455002, iteration: 21786
loss: 1.0082651376724243,grad_norm: 0.954197386275668, iteration: 21787
loss: 1.0242798328399658,grad_norm: 0.9582661277882224, iteration: 21788
loss: 1.010916829109192,grad_norm: 0.9999997783731115, iteration: 21789
loss: 1.0425325632095337,grad_norm: 0.9999991341202698, iteration: 21790
loss: 1.0057039260864258,grad_norm: 0.9999990715462815, iteration: 21791
loss: 1.025923252105713,grad_norm: 0.9722406971474328, iteration: 21792
loss: 1.0061008930206299,grad_norm: 0.999998896565168, iteration: 21793
loss: 1.0068873167037964,grad_norm: 0.9825166487600887, iteration: 21794
loss: 1.0150039196014404,grad_norm: 0.999999194287407, iteration: 21795
loss: 0.9718595147132874,grad_norm: 0.9999994572567685, iteration: 21796
loss: 1.0291039943695068,grad_norm: 0.9074962814942291, iteration: 21797
loss: 1.0306236743927002,grad_norm: 0.9811320461621268, iteration: 21798
loss: 1.0405011177062988,grad_norm: 0.9999997141723951, iteration: 21799
loss: 1.0065101385116577,grad_norm: 0.9999990251678188, iteration: 21800
loss: 0.9978566765785217,grad_norm: 0.950023164364092, iteration: 21801
loss: 0.9975452423095703,grad_norm: 0.9976901327411664, iteration: 21802
loss: 0.9889891147613525,grad_norm: 0.9999990307890703, iteration: 21803
loss: 1.0021533966064453,grad_norm: 0.9999990928656356, iteration: 21804
loss: 1.0060089826583862,grad_norm: 0.999999112665862, iteration: 21805
loss: 1.0821144580841064,grad_norm: 0.9797879039349168, iteration: 21806
loss: 1.0797247886657715,grad_norm: 0.9999995093902003, iteration: 21807
loss: 0.9489054679870605,grad_norm: 0.9999990734057694, iteration: 21808
loss: 1.0383176803588867,grad_norm: 0.9999991104039431, iteration: 21809
loss: 1.0047181844711304,grad_norm: 0.9999990006539837, iteration: 21810
loss: 1.027277946472168,grad_norm: 0.9999992821303139, iteration: 21811
loss: 0.9890814423561096,grad_norm: 0.9993545214629673, iteration: 21812
loss: 1.0402470827102661,grad_norm: 0.9999989449782709, iteration: 21813
loss: 1.0347784757614136,grad_norm: 0.9999991874731154, iteration: 21814
loss: 1.0339244604110718,grad_norm: 0.9999991482492275, iteration: 21815
loss: 0.97032231092453,grad_norm: 0.9999991700517709, iteration: 21816
loss: 1.0217691659927368,grad_norm: 0.9999992100266658, iteration: 21817
loss: 0.9734717011451721,grad_norm: 0.9999992241054287, iteration: 21818
loss: 1.029125452041626,grad_norm: 0.9999998442848378, iteration: 21819
loss: 1.0163521766662598,grad_norm: 0.9999991213949313, iteration: 21820
loss: 1.0003217458724976,grad_norm: 0.8546460179617913, iteration: 21821
loss: 1.0069894790649414,grad_norm: 0.9999991543109451, iteration: 21822
loss: 1.021646499633789,grad_norm: 0.9999990961842571, iteration: 21823
loss: 1.0409674644470215,grad_norm: 0.9999993325987474, iteration: 21824
loss: 1.0189013481140137,grad_norm: 0.9370327119527735, iteration: 21825
loss: 0.9478804469108582,grad_norm: 0.9959189876424364, iteration: 21826
loss: 1.0578824281692505,grad_norm: 0.9999997416383438, iteration: 21827
loss: 1.0601567029953003,grad_norm: 0.9999996155488335, iteration: 21828
loss: 1.0276275873184204,grad_norm: 0.9999991136111377, iteration: 21829
loss: 1.031981110572815,grad_norm: 0.993956138078462, iteration: 21830
loss: 1.1703407764434814,grad_norm: 0.9999993333546588, iteration: 21831
loss: 1.0198503732681274,grad_norm: 0.9999990928889385, iteration: 21832
loss: 1.0722352266311646,grad_norm: 0.9652616373914195, iteration: 21833
loss: 1.0202324390411377,grad_norm: 0.9999991103376098, iteration: 21834
loss: 1.0607470273971558,grad_norm: 0.989566083764703, iteration: 21835
loss: 1.0108380317687988,grad_norm: 0.9999992286405209, iteration: 21836
loss: 1.022456169128418,grad_norm: 0.9999992284474778, iteration: 21837
loss: 1.0562503337860107,grad_norm: 0.9999990789514891, iteration: 21838
loss: 1.0256019830703735,grad_norm: 0.933969337192175, iteration: 21839
loss: 0.9939391613006592,grad_norm: 0.9999992254109684, iteration: 21840
loss: 0.9975008964538574,grad_norm: 0.88107521077651, iteration: 21841
loss: 0.9739943146705627,grad_norm: 0.9033946807425837, iteration: 21842
loss: 1.034852385520935,grad_norm: 0.9999993774494341, iteration: 21843
loss: 1.0226138830184937,grad_norm: 0.9999991800026667, iteration: 21844
loss: 0.9780201315879822,grad_norm: 0.9999992308489063, iteration: 21845
loss: 1.005580186843872,grad_norm: 0.9999990874932851, iteration: 21846
loss: 0.9933207631111145,grad_norm: 0.9999990079239545, iteration: 21847
loss: 1.006920337677002,grad_norm: 0.9854101417309805, iteration: 21848
loss: 1.0350474119186401,grad_norm: 0.9999993040758517, iteration: 21849
loss: 1.0184522867202759,grad_norm: 0.9999990462241656, iteration: 21850
loss: 1.0445200204849243,grad_norm: 0.9999992451502627, iteration: 21851
loss: 0.9906364679336548,grad_norm: 0.9999992306404382, iteration: 21852
loss: 1.0222464799880981,grad_norm: 0.99999911015903, iteration: 21853
loss: 1.0165742635726929,grad_norm: 0.9999990624309683, iteration: 21854
loss: 0.9869065284729004,grad_norm: 0.9999990552843666, iteration: 21855
loss: 1.0526008605957031,grad_norm: 0.9539173141127226, iteration: 21856
loss: 1.0596683025360107,grad_norm: 0.989815136440528, iteration: 21857
loss: 1.0021787881851196,grad_norm: 0.9423363457337856, iteration: 21858
loss: 1.005806803703308,grad_norm: 0.8974727390242888, iteration: 21859
loss: 1.023260235786438,grad_norm: 0.9999993833821033, iteration: 21860
loss: 1.0275979042053223,grad_norm: 0.9999990619136824, iteration: 21861
loss: 1.0149099826812744,grad_norm: 0.9999992452853336, iteration: 21862
loss: 0.9801556468009949,grad_norm: 0.999999387681057, iteration: 21863
loss: 1.0361011028289795,grad_norm: 0.9999990768303813, iteration: 21864
loss: 0.999068021774292,grad_norm: 0.9719205578929876, iteration: 21865
loss: 1.0513875484466553,grad_norm: 0.999999343808564, iteration: 21866
loss: 0.9937601685523987,grad_norm: 0.9557207108613823, iteration: 21867
loss: 1.0883363485336304,grad_norm: 0.9999996973019205, iteration: 21868
loss: 1.0412766933441162,grad_norm: 0.9999995220713201, iteration: 21869
loss: 1.002379059791565,grad_norm: 0.971657347524094, iteration: 21870
loss: 1.0196459293365479,grad_norm: 0.887673020589659, iteration: 21871
loss: 1.0088847875595093,grad_norm: 0.9820319818498243, iteration: 21872
loss: 1.0262820720672607,grad_norm: 0.999999024886718, iteration: 21873
loss: 1.0281665325164795,grad_norm: 0.9999991184133299, iteration: 21874
loss: 0.9543537497520447,grad_norm: 0.9999991374383099, iteration: 21875
loss: 1.1510000228881836,grad_norm: 0.9999995418787998, iteration: 21876
loss: 1.025668978691101,grad_norm: 0.999999184566656, iteration: 21877
loss: 1.0108548402786255,grad_norm: 0.9189621620675623, iteration: 21878
loss: 0.9925125241279602,grad_norm: 0.999999201701324, iteration: 21879
loss: 1.0392509698867798,grad_norm: 0.9999991199137191, iteration: 21880
loss: 0.9887805581092834,grad_norm: 0.9999991069580216, iteration: 21881
loss: 0.9985854029655457,grad_norm: 0.9999992074438057, iteration: 21882
loss: 0.9643102288246155,grad_norm: 0.9628260796062008, iteration: 21883
loss: 1.0047999620437622,grad_norm: 0.9999992644595261, iteration: 21884
loss: 0.979732871055603,grad_norm: 0.9999991680713013, iteration: 21885
loss: 0.9893738627433777,grad_norm: 0.9999990680520283, iteration: 21886
loss: 1.0043811798095703,grad_norm: 0.9999991545213855, iteration: 21887
loss: 1.022589087486267,grad_norm: 0.9999993527920634, iteration: 21888
loss: 1.0147225856781006,grad_norm: 0.9893230733210037, iteration: 21889
loss: 1.041306972503662,grad_norm: 0.9999993933169561, iteration: 21890
loss: 1.02681565284729,grad_norm: 0.9999992437243728, iteration: 21891
loss: 1.0132122039794922,grad_norm: 0.9413171448724138, iteration: 21892
loss: 1.0254822969436646,grad_norm: 0.9999991880311528, iteration: 21893
loss: 1.0370409488677979,grad_norm: 0.9999990911065457, iteration: 21894
loss: 1.0411860942840576,grad_norm: 0.9830865611421522, iteration: 21895
loss: 1.0339930057525635,grad_norm: 0.9479846244737317, iteration: 21896
loss: 1.0073727369308472,grad_norm: 0.9999994562235163, iteration: 21897
loss: 1.035812258720398,grad_norm: 0.9999989899039377, iteration: 21898
loss: 1.0020191669464111,grad_norm: 0.9999991855373903, iteration: 21899
loss: 1.0266869068145752,grad_norm: 0.999999249567957, iteration: 21900
loss: 0.9677684903144836,grad_norm: 0.9999992107225124, iteration: 21901
loss: 1.0346816778182983,grad_norm: 0.9999992966638904, iteration: 21902
loss: 1.009568214416504,grad_norm: 0.999999160176285, iteration: 21903
loss: 1.0451347827911377,grad_norm: 0.9999996230172304, iteration: 21904
loss: 1.0427225828170776,grad_norm: 0.9999994414256526, iteration: 21905
loss: 1.0291444063186646,grad_norm: 0.9408458513494471, iteration: 21906
loss: 0.9915969967842102,grad_norm: 0.9999992103321356, iteration: 21907
loss: 1.0064523220062256,grad_norm: 0.9999990105538968, iteration: 21908
loss: 0.9548097252845764,grad_norm: 0.9999992583752455, iteration: 21909
loss: 1.0331348180770874,grad_norm: 0.9999997343462597, iteration: 21910
loss: 0.991642415523529,grad_norm: 0.9679949421208562, iteration: 21911
loss: 1.025080680847168,grad_norm: 0.8869753285214849, iteration: 21912
loss: 1.0280901193618774,grad_norm: 0.9999992348113083, iteration: 21913
loss: 0.995707631111145,grad_norm: 0.9999991457411375, iteration: 21914
loss: 1.0359164476394653,grad_norm: 0.9999990790156116, iteration: 21915
loss: 1.0577061176300049,grad_norm: 0.9999991495410235, iteration: 21916
loss: 0.9945095777511597,grad_norm: 0.9999991356149174, iteration: 21917
loss: 1.0571070909500122,grad_norm: 0.9999991843827795, iteration: 21918
loss: 1.037386417388916,grad_norm: 0.9212961666990831, iteration: 21919
loss: 0.9950684309005737,grad_norm: 0.9999994073417744, iteration: 21920
loss: 1.0351390838623047,grad_norm: 0.8745833207486061, iteration: 21921
loss: 1.0004054307937622,grad_norm: 0.9999991118296717, iteration: 21922
loss: 0.984866201877594,grad_norm: 0.8994288674314819, iteration: 21923
loss: 1.0144832134246826,grad_norm: 0.9999990586789697, iteration: 21924
loss: 1.0077568292617798,grad_norm: 0.9265972855463116, iteration: 21925
loss: 0.9847292900085449,grad_norm: 0.9999990100035508, iteration: 21926
loss: 1.0460916757583618,grad_norm: 0.9999996235613047, iteration: 21927
loss: 0.9583133459091187,grad_norm: 0.9814209667652776, iteration: 21928
loss: 1.0156400203704834,grad_norm: 0.9999991269589338, iteration: 21929
loss: 1.023359775543213,grad_norm: 0.9999992252205668, iteration: 21930
loss: 1.0256125926971436,grad_norm: 0.9999991660122003, iteration: 21931
loss: 1.0173909664154053,grad_norm: 0.9735935911331409, iteration: 21932
loss: 1.0102207660675049,grad_norm: 0.9999991502920564, iteration: 21933
loss: 1.018651008605957,grad_norm: 0.9999990431655289, iteration: 21934
loss: 1.0394549369812012,grad_norm: 0.9999991094769134, iteration: 21935
loss: 1.0102710723876953,grad_norm: 0.9999990524832923, iteration: 21936
loss: 1.010382890701294,grad_norm: 0.9322979820068424, iteration: 21937
loss: 1.0461809635162354,grad_norm: 0.9999992388622896, iteration: 21938
loss: 1.0369359254837036,grad_norm: 0.9999990892846583, iteration: 21939
loss: 1.0063502788543701,grad_norm: 0.9999993126196635, iteration: 21940
loss: 1.0751457214355469,grad_norm: 0.9999993869011847, iteration: 21941
loss: 1.0105650424957275,grad_norm: 0.9999989969373841, iteration: 21942
loss: 1.0109684467315674,grad_norm: 0.9999991672033135, iteration: 21943
loss: 1.012062430381775,grad_norm: 0.9999993324823918, iteration: 21944
loss: 1.0109827518463135,grad_norm: 0.9899640213932328, iteration: 21945
loss: 1.012068748474121,grad_norm: 0.9999993679020305, iteration: 21946
loss: 0.9947534203529358,grad_norm: 0.999999068574935, iteration: 21947
loss: 0.9934861660003662,grad_norm: 0.9999993040916598, iteration: 21948
loss: 1.0147812366485596,grad_norm: 0.9999991572590424, iteration: 21949
loss: 1.0156911611557007,grad_norm: 0.7558266013589583, iteration: 21950
loss: 1.0333590507507324,grad_norm: 0.9999993246343586, iteration: 21951
loss: 1.0337443351745605,grad_norm: 0.9999995114558498, iteration: 21952
loss: 1.0150113105773926,grad_norm: 0.9999991208150872, iteration: 21953
loss: 1.0217970609664917,grad_norm: 0.999998938204073, iteration: 21954
loss: 1.0202867984771729,grad_norm: 0.9999992306985063, iteration: 21955
loss: 1.0563180446624756,grad_norm: 0.9999990943800674, iteration: 21956
loss: 1.0453715324401855,grad_norm: 0.9999995518388124, iteration: 21957
loss: 1.0235074758529663,grad_norm: 0.9925617344573496, iteration: 21958
loss: 1.030293583869934,grad_norm: 0.9999991052155759, iteration: 21959
loss: 1.0073726177215576,grad_norm: 0.9999993147009558, iteration: 21960
loss: 0.9964890480041504,grad_norm: 0.9999997718215964, iteration: 21961
loss: 0.9999445080757141,grad_norm: 0.9999994909258669, iteration: 21962
loss: 1.0086541175842285,grad_norm: 0.9787907410051722, iteration: 21963
loss: 1.0410453081130981,grad_norm: 0.9999996616925204, iteration: 21964
loss: 1.0363765954971313,grad_norm: 0.9308700043974999, iteration: 21965
loss: 1.032555103302002,grad_norm: 0.9999990951005763, iteration: 21966
loss: 1.0246586799621582,grad_norm: 0.8644160602624043, iteration: 21967
loss: 0.9883689880371094,grad_norm: 0.9150424071658908, iteration: 21968
loss: 1.0225858688354492,grad_norm: 0.9999991368459804, iteration: 21969
loss: 1.0322520732879639,grad_norm: 0.8578799468435262, iteration: 21970
loss: 0.9979215860366821,grad_norm: 0.9601111588999234, iteration: 21971
loss: 1.0069080591201782,grad_norm: 0.9264120234791894, iteration: 21972
loss: 0.9898882508277893,grad_norm: 0.9999993899166227, iteration: 21973
loss: 1.0359638929367065,grad_norm: 0.8302539986160115, iteration: 21974
loss: 0.9767265915870667,grad_norm: 0.9999994939784872, iteration: 21975
loss: 1.0172722339630127,grad_norm: 0.9999990858337894, iteration: 21976
loss: 1.002355933189392,grad_norm: 0.9999992153985279, iteration: 21977
loss: 1.0517871379852295,grad_norm: 0.9999991817259102, iteration: 21978
loss: 1.0397082567214966,grad_norm: 0.9999995214379256, iteration: 21979
loss: 1.1040606498718262,grad_norm: 0.9999993430153845, iteration: 21980
loss: 1.0406928062438965,grad_norm: 0.9999992883151585, iteration: 21981
loss: 1.0611642599105835,grad_norm: 0.9999994298801387, iteration: 21982
loss: 1.0295861959457397,grad_norm: 0.999999184003811, iteration: 21983
loss: 0.9892672896385193,grad_norm: 0.9461287078631091, iteration: 21984
loss: 1.0180473327636719,grad_norm: 0.9999990665677118, iteration: 21985
loss: 1.0106563568115234,grad_norm: 0.9999990572924244, iteration: 21986
loss: 1.0161257982254028,grad_norm: 0.9999992569317648, iteration: 21987
loss: 1.085889458656311,grad_norm: 0.9999992590726644, iteration: 21988
loss: 1.0151596069335938,grad_norm: 0.9692753551574191, iteration: 21989
loss: 0.9754371047019958,grad_norm: 0.9036309744522137, iteration: 21990
loss: 1.0519598722457886,grad_norm: 0.9999991917160669, iteration: 21991
loss: 1.016619086265564,grad_norm: 0.999999044097124, iteration: 21992
loss: 0.9855359792709351,grad_norm: 0.9901923127392623, iteration: 21993
loss: 1.019181728363037,grad_norm: 0.9550876396672789, iteration: 21994
loss: 1.0111466646194458,grad_norm: 0.9999992019142911, iteration: 21995
loss: 1.0214192867279053,grad_norm: 0.9959459275156386, iteration: 21996
loss: 1.0391199588775635,grad_norm: 0.8612150022759779, iteration: 21997
loss: 1.042276382446289,grad_norm: 0.9999991017087143, iteration: 21998
loss: 1.0288783311843872,grad_norm: 0.9999990684326159, iteration: 21999
loss: 1.0015523433685303,grad_norm: 0.9905068809559598, iteration: 22000
loss: 1.0037699937820435,grad_norm: 0.9999990778274845, iteration: 22001
loss: 1.0388959646224976,grad_norm: 0.9999989701479443, iteration: 22002
loss: 1.033274531364441,grad_norm: 0.9142640233622835, iteration: 22003
loss: 1.0358842611312866,grad_norm: 0.9999991599158738, iteration: 22004
loss: 0.9881604909896851,grad_norm: 0.999999072284465, iteration: 22005
loss: 0.9669623374938965,grad_norm: 0.9878906934262707, iteration: 22006
loss: 1.054132103919983,grad_norm: 0.935338616510145, iteration: 22007
loss: 1.024428129196167,grad_norm: 0.9875869477822272, iteration: 22008
loss: 1.0102267265319824,grad_norm: 0.9930765383004625, iteration: 22009
loss: 1.0144124031066895,grad_norm: 0.9999992619476993, iteration: 22010
loss: 1.002183198928833,grad_norm: 0.9999991418992187, iteration: 22011
loss: 0.9769829511642456,grad_norm: 0.9999989858414557, iteration: 22012
loss: 1.0359492301940918,grad_norm: 0.8588543848523625, iteration: 22013
loss: 1.0171416997909546,grad_norm: 0.999999262353808, iteration: 22014
loss: 0.9711533784866333,grad_norm: 0.8901620532204944, iteration: 22015
loss: 1.0535223484039307,grad_norm: 0.9999992340319206, iteration: 22016
loss: 1.0241528749465942,grad_norm: 0.9999990591263339, iteration: 22017
loss: 1.0142799615859985,grad_norm: 0.9999991269993285, iteration: 22018
loss: 1.0263328552246094,grad_norm: 0.999999063367648, iteration: 22019
loss: 0.9879435896873474,grad_norm: 0.9999992063021748, iteration: 22020
loss: 1.0421066284179688,grad_norm: 0.9999991284281977, iteration: 22021
loss: 1.028314232826233,grad_norm: 0.999999199379898, iteration: 22022
loss: 1.005419135093689,grad_norm: 0.9777297810261665, iteration: 22023
loss: 1.0191224813461304,grad_norm: 0.9999991746792531, iteration: 22024
loss: 1.0047589540481567,grad_norm: 0.9999992430255027, iteration: 22025
loss: 0.9664638638496399,grad_norm: 0.835778024293874, iteration: 22026
loss: 1.0499076843261719,grad_norm: 0.9723716689253217, iteration: 22027
loss: 0.9877776503562927,grad_norm: 0.9912993314968243, iteration: 22028
loss: 1.0296847820281982,grad_norm: 0.9960932486303731, iteration: 22029
loss: 1.052313208580017,grad_norm: 0.9999992587977193, iteration: 22030
loss: 1.0168997049331665,grad_norm: 0.9999991529174488, iteration: 22031
loss: 1.031818151473999,grad_norm: 0.9999991076095106, iteration: 22032
loss: 0.9922463893890381,grad_norm: 0.9999990741878918, iteration: 22033
loss: 1.0172934532165527,grad_norm: 0.9999991685249258, iteration: 22034
loss: 1.0087674856185913,grad_norm: 0.8127767682309308, iteration: 22035
loss: 0.9621075987815857,grad_norm: 0.8724129473871847, iteration: 22036
loss: 1.0082504749298096,grad_norm: 0.9999993038624617, iteration: 22037
loss: 1.0196137428283691,grad_norm: 0.9417711718080815, iteration: 22038
loss: 1.021491527557373,grad_norm: 0.999999119551663, iteration: 22039
loss: 1.0234694480895996,grad_norm: 0.9999991294318752, iteration: 22040
loss: 0.9875562191009521,grad_norm: 0.9999990576747291, iteration: 22041
loss: 1.0327314138412476,grad_norm: 0.8859246748703903, iteration: 22042
loss: 1.0065038204193115,grad_norm: 0.9999991688064233, iteration: 22043
loss: 1.0429344177246094,grad_norm: 0.9560137990578507, iteration: 22044
loss: 1.0081948041915894,grad_norm: 0.9999991383409701, iteration: 22045
loss: 0.9777479767799377,grad_norm: 0.999999162581231, iteration: 22046
loss: 1.065531611442566,grad_norm: 0.9999990744407317, iteration: 22047
loss: 0.9817425012588501,grad_norm: 0.9999990849551071, iteration: 22048
loss: 0.9791977405548096,grad_norm: 0.9999991719383343, iteration: 22049
loss: 1.0191866159439087,grad_norm: 0.9521133519022777, iteration: 22050
loss: 0.9834515452384949,grad_norm: 0.8992992943925631, iteration: 22051
loss: 1.0058784484863281,grad_norm: 0.9092999967641164, iteration: 22052
loss: 1.0483838319778442,grad_norm: 0.9999991165339409, iteration: 22053
loss: 1.0412975549697876,grad_norm: 0.9999993349520813, iteration: 22054
loss: 1.034599781036377,grad_norm: 0.9789151956861925, iteration: 22055
loss: 0.992281436920166,grad_norm: 0.9842871132010087, iteration: 22056
loss: 0.9927618503570557,grad_norm: 0.81391467936282, iteration: 22057
loss: 0.9980083107948303,grad_norm: 0.9957748139121112, iteration: 22058
loss: 1.0013071298599243,grad_norm: 0.9999992215788737, iteration: 22059
loss: 0.9647476673126221,grad_norm: 0.9449264335289309, iteration: 22060
loss: 1.025477647781372,grad_norm: 0.8167698582359417, iteration: 22061
loss: 0.9988065361976624,grad_norm: 0.8977642269112803, iteration: 22062
loss: 1.0150562524795532,grad_norm: 0.9999991446560481, iteration: 22063
loss: 1.0548609495162964,grad_norm: 0.9999990953200464, iteration: 22064
loss: 0.9703183770179749,grad_norm: 0.9999990551555319, iteration: 22065
loss: 1.0433669090270996,grad_norm: 0.9741478061495157, iteration: 22066
loss: 1.0262537002563477,grad_norm: 0.8515180712562065, iteration: 22067
loss: 0.9581977725028992,grad_norm: 0.9999992379906201, iteration: 22068
loss: 1.016446590423584,grad_norm: 0.9999991257078997, iteration: 22069
loss: 0.9809523224830627,grad_norm: 0.9232857388448518, iteration: 22070
loss: 1.0355021953582764,grad_norm: 0.9959826292726147, iteration: 22071
loss: 1.017478108406067,grad_norm: 0.999999082466028, iteration: 22072
loss: 1.0077332258224487,grad_norm: 0.8412424236028191, iteration: 22073
loss: 0.9658136367797852,grad_norm: 0.9999991459612636, iteration: 22074
loss: 1.0039176940917969,grad_norm: 0.7327372787552613, iteration: 22075
loss: 1.019544243812561,grad_norm: 0.9999989786667703, iteration: 22076
loss: 1.012092113494873,grad_norm: 0.9999992055461234, iteration: 22077
loss: 0.990943431854248,grad_norm: 0.9999992018742216, iteration: 22078
loss: 1.0053260326385498,grad_norm: 0.9999991054567949, iteration: 22079
loss: 0.9696139693260193,grad_norm: 0.957940925739483, iteration: 22080
loss: 1.0089303255081177,grad_norm: 0.9236325539406824, iteration: 22081
loss: 1.0055210590362549,grad_norm: 0.9202267117542099, iteration: 22082
loss: 1.0239717960357666,grad_norm: 0.9999990750515324, iteration: 22083
loss: 1.042056918144226,grad_norm: 0.999999138058726, iteration: 22084
loss: 1.0053012371063232,grad_norm: 0.9920425239492496, iteration: 22085
loss: 0.9856216907501221,grad_norm: 0.9695651716838597, iteration: 22086
loss: 0.9790381193161011,grad_norm: 0.9999991695901064, iteration: 22087
loss: 0.9921464323997498,grad_norm: 0.9999994215012201, iteration: 22088
loss: 0.9834424257278442,grad_norm: 0.9971531774733797, iteration: 22089
loss: 0.9984227418899536,grad_norm: 0.9673330277155998, iteration: 22090
loss: 1.024649739265442,grad_norm: 0.9925145506111452, iteration: 22091
loss: 1.038910150527954,grad_norm: 0.8553696660608792, iteration: 22092
loss: 1.0399688482284546,grad_norm: 0.9999990987757874, iteration: 22093
loss: 1.002219319343567,grad_norm: 0.999999037232092, iteration: 22094
loss: 1.0109851360321045,grad_norm: 0.9999990530993689, iteration: 22095
loss: 1.0236072540283203,grad_norm: 0.9999992923156577, iteration: 22096
loss: 1.0373501777648926,grad_norm: 0.9901517322875993, iteration: 22097
loss: 1.0235157012939453,grad_norm: 0.8458503110528159, iteration: 22098
loss: 1.0399001836776733,grad_norm: 0.9999991382283419, iteration: 22099
loss: 1.022731900215149,grad_norm: 0.9999990395539784, iteration: 22100
loss: 0.9996359944343567,grad_norm: 0.9130290193235797, iteration: 22101
loss: 0.993192732334137,grad_norm: 0.9999993316782502, iteration: 22102
loss: 1.0118329524993896,grad_norm: 0.9999991489094328, iteration: 22103
loss: 1.0246278047561646,grad_norm: 0.8485044737775291, iteration: 22104
loss: 1.0001119375228882,grad_norm: 0.9999991926258984, iteration: 22105
loss: 1.0265928506851196,grad_norm: 0.9305177807683103, iteration: 22106
loss: 1.0320779085159302,grad_norm: 0.9999992466855793, iteration: 22107
loss: 1.012489914894104,grad_norm: 0.9999991634068404, iteration: 22108
loss: 0.9817192554473877,grad_norm: 0.9738440308376253, iteration: 22109
loss: 1.0236443281173706,grad_norm: 0.9420453289954857, iteration: 22110
loss: 1.0252037048339844,grad_norm: 0.977650888884716, iteration: 22111
loss: 1.0161147117614746,grad_norm: 0.8881066533773271, iteration: 22112
loss: 0.9942137002944946,grad_norm: 0.9999996631993168, iteration: 22113
loss: 0.9949005842208862,grad_norm: 0.8766305370867542, iteration: 22114
loss: 1.0116993188858032,grad_norm: 0.9102882951016945, iteration: 22115
loss: 0.9648485779762268,grad_norm: 0.9708591728297679, iteration: 22116
loss: 1.0223156213760376,grad_norm: 0.9999990707211601, iteration: 22117
loss: 1.0130904912948608,grad_norm: 0.9277027361108997, iteration: 22118
loss: 1.0211652517318726,grad_norm: 0.8715324633309849, iteration: 22119
loss: 0.9639421105384827,grad_norm: 0.9076196626761271, iteration: 22120
loss: 0.9990055561065674,grad_norm: 0.9999991070502388, iteration: 22121
loss: 0.9872661828994751,grad_norm: 0.9999991446089608, iteration: 22122
loss: 1.0263458490371704,grad_norm: 0.9999990684186568, iteration: 22123
loss: 0.9969757795333862,grad_norm: 0.8368333805610976, iteration: 22124
loss: 1.0334192514419556,grad_norm: 0.9981301981897442, iteration: 22125
loss: 0.9845700860023499,grad_norm: 0.9856090284917011, iteration: 22126
loss: 0.9864699840545654,grad_norm: 0.9999990637461913, iteration: 22127
loss: 0.9929072856903076,grad_norm: 0.94888827262314, iteration: 22128
loss: 1.0049655437469482,grad_norm: 0.9624162209216902, iteration: 22129
loss: 1.0229697227478027,grad_norm: 0.9999994052919333, iteration: 22130
loss: 0.9840611815452576,grad_norm: 0.8560286219461715, iteration: 22131
loss: 0.9877303242683411,grad_norm: 0.9999990990692084, iteration: 22132
loss: 1.040667176246643,grad_norm: 0.9999989947317892, iteration: 22133
loss: 1.0307912826538086,grad_norm: 0.9999996146465686, iteration: 22134
loss: 1.0575711727142334,grad_norm: 0.9999997701845322, iteration: 22135
loss: 0.9541810154914856,grad_norm: 0.9999994843022625, iteration: 22136
loss: 1.0461457967758179,grad_norm: 0.9999991243179438, iteration: 22137
loss: 1.010429859161377,grad_norm: 0.9999991151687786, iteration: 22138
loss: 1.0396596193313599,grad_norm: 0.9964854106631705, iteration: 22139
loss: 0.9843800663948059,grad_norm: 0.9999993635033456, iteration: 22140
loss: 1.034118890762329,grad_norm: 0.9999990616912956, iteration: 22141
loss: 0.9596886038780212,grad_norm: 0.9999991169017991, iteration: 22142
loss: 1.004624366760254,grad_norm: 0.993607944176775, iteration: 22143
loss: 0.9921005964279175,grad_norm: 0.9999993021742491, iteration: 22144
loss: 0.9948710799217224,grad_norm: 0.9999991977775391, iteration: 22145
loss: 0.9863728880882263,grad_norm: 0.9999990497106703, iteration: 22146
loss: 1.0051943063735962,grad_norm: 0.9999990416249853, iteration: 22147
loss: 0.9892003536224365,grad_norm: 0.9999991646692404, iteration: 22148
loss: 1.0444066524505615,grad_norm: 0.9999992209920929, iteration: 22149
loss: 1.0304101705551147,grad_norm: 0.9999992789822229, iteration: 22150
loss: 1.013230800628662,grad_norm: 0.9999993282586425, iteration: 22151
loss: 1.013864278793335,grad_norm: 0.9999992501110617, iteration: 22152
loss: 1.0266882181167603,grad_norm: 0.934247176228328, iteration: 22153
loss: 1.003261923789978,grad_norm: 0.9810066947320839, iteration: 22154
loss: 1.0114539861679077,grad_norm: 0.909358180629619, iteration: 22155
loss: 1.0237449407577515,grad_norm: 0.9999991646782469, iteration: 22156
loss: 1.0172178745269775,grad_norm: 0.999999292952703, iteration: 22157
loss: 1.019484043121338,grad_norm: 0.9999991446791862, iteration: 22158
loss: 1.0278680324554443,grad_norm: 0.999999034882675, iteration: 22159
loss: 0.9992165565490723,grad_norm: 0.925142101313984, iteration: 22160
loss: 1.0107674598693848,grad_norm: 0.9999989757677813, iteration: 22161
loss: 0.9881831407546997,grad_norm: 0.8880456243177544, iteration: 22162
loss: 0.9883806109428406,grad_norm: 0.9443200012416898, iteration: 22163
loss: 1.0245985984802246,grad_norm: 0.9999995112652629, iteration: 22164
loss: 1.0176314115524292,grad_norm: 0.9999991700267382, iteration: 22165
loss: 1.0149554014205933,grad_norm: 0.9999991151447717, iteration: 22166
loss: 1.0025280714035034,grad_norm: 0.8065488172033877, iteration: 22167
loss: 0.9829484820365906,grad_norm: 0.9999990816555122, iteration: 22168
loss: 1.0206927061080933,grad_norm: 0.9860966667745679, iteration: 22169
loss: 1.022698163986206,grad_norm: 0.9143690934905132, iteration: 22170
loss: 1.011415719985962,grad_norm: 0.9999992042838589, iteration: 22171
loss: 0.9992838501930237,grad_norm: 0.9999990305644769, iteration: 22172
loss: 0.9965071678161621,grad_norm: 0.9999990682776163, iteration: 22173
loss: 0.9859461784362793,grad_norm: 0.9999991499878204, iteration: 22174
loss: 1.0058956146240234,grad_norm: 0.9137173615998421, iteration: 22175
loss: 0.9712548851966858,grad_norm: 0.9999991519887743, iteration: 22176
loss: 1.0274509191513062,grad_norm: 0.9999990052231746, iteration: 22177
loss: 1.0458120107650757,grad_norm: 0.9999991774432093, iteration: 22178
loss: 1.0245838165283203,grad_norm: 0.9999992798452948, iteration: 22179
loss: 1.011991024017334,grad_norm: 0.9999990597609245, iteration: 22180
loss: 0.9942132234573364,grad_norm: 0.9999990183579102, iteration: 22181
loss: 0.9575275182723999,grad_norm: 0.9999991284332667, iteration: 22182
loss: 0.9898920655250549,grad_norm: 0.8320559982346046, iteration: 22183
loss: 1.0080111026763916,grad_norm: 0.8616647177212655, iteration: 22184
loss: 1.0468733310699463,grad_norm: 0.9373543615576357, iteration: 22185
loss: 1.0257518291473389,grad_norm: 0.9999991411844, iteration: 22186
loss: 0.9839255809783936,grad_norm: 0.9626213067783742, iteration: 22187
loss: 1.006346583366394,grad_norm: 0.9615864890574588, iteration: 22188
loss: 1.0077540874481201,grad_norm: 0.9464186435521352, iteration: 22189
loss: 1.0082541704177856,grad_norm: 0.9609434383754737, iteration: 22190
loss: 0.9851539134979248,grad_norm: 0.9999991555551295, iteration: 22191
loss: 1.0171327590942383,grad_norm: 0.8621419690822972, iteration: 22192
loss: 0.9482454061508179,grad_norm: 0.918867832996258, iteration: 22193
loss: 1.0010864734649658,grad_norm: 0.9999990505364079, iteration: 22194
loss: 1.0178406238555908,grad_norm: 0.9999991146172291, iteration: 22195
loss: 0.9808769822120667,grad_norm: 0.9575764079381682, iteration: 22196
loss: 1.0243632793426514,grad_norm: 0.989934742053216, iteration: 22197
loss: 1.0071537494659424,grad_norm: 0.9999993044957864, iteration: 22198
loss: 1.0470178127288818,grad_norm: 0.9999989858061505, iteration: 22199
loss: 0.967006504535675,grad_norm: 0.9999992420162963, iteration: 22200
loss: 0.9948733448982239,grad_norm: 0.9999998172388922, iteration: 22201
loss: 1.017449140548706,grad_norm: 0.981471165603943, iteration: 22202
loss: 1.0349352359771729,grad_norm: 0.8869902133304581, iteration: 22203
loss: 1.0230296850204468,grad_norm: 0.9999990874537529, iteration: 22204
loss: 1.0404636859893799,grad_norm: 0.852181020979032, iteration: 22205
loss: 0.9918766021728516,grad_norm: 0.9218248808323397, iteration: 22206
loss: 1.0374306440353394,grad_norm: 0.999999064603373, iteration: 22207
loss: 1.0506926774978638,grad_norm: 0.9822873538526093, iteration: 22208
loss: 0.9870374798774719,grad_norm: 0.9999991348609861, iteration: 22209
loss: 1.007503867149353,grad_norm: 0.9999991944264265, iteration: 22210
loss: 0.942372739315033,grad_norm: 0.9861623719721909, iteration: 22211
loss: 0.9663170576095581,grad_norm: 0.9999991683552578, iteration: 22212
loss: 0.9894256591796875,grad_norm: 0.9841838980359368, iteration: 22213
loss: 1.001150369644165,grad_norm: 0.7865343777871151, iteration: 22214
loss: 1.0250661373138428,grad_norm: 0.9999991644913115, iteration: 22215
loss: 1.0429491996765137,grad_norm: 0.9999992517800749, iteration: 22216
loss: 0.9675775170326233,grad_norm: 0.9999992039399913, iteration: 22217
loss: 0.992199718952179,grad_norm: 0.8602764674367492, iteration: 22218
loss: 1.0423229932785034,grad_norm: 0.999999254892528, iteration: 22219
loss: 0.9930694699287415,grad_norm: 0.9999991273106915, iteration: 22220
loss: 0.9532946944236755,grad_norm: 0.9999992168743963, iteration: 22221
loss: 1.044395089149475,grad_norm: 0.9832338695195049, iteration: 22222
loss: 1.0117242336273193,grad_norm: 0.9201026049539652, iteration: 22223
loss: 1.0435190200805664,grad_norm: 0.9999991991698511, iteration: 22224
loss: 1.042545199394226,grad_norm: 0.999999336319673, iteration: 22225
loss: 1.021406650543213,grad_norm: 0.9999993236757394, iteration: 22226
loss: 0.981415331363678,grad_norm: 0.9486557824876778, iteration: 22227
loss: 0.9947202801704407,grad_norm: 0.9999993130974285, iteration: 22228
loss: 1.036745309829712,grad_norm: 0.9999992699326492, iteration: 22229
loss: 1.0364248752593994,grad_norm: 0.9999990813564492, iteration: 22230
loss: 0.9912154078483582,grad_norm: 0.9999991133635671, iteration: 22231
loss: 1.0039260387420654,grad_norm: 0.9999992064763628, iteration: 22232
loss: 1.0502967834472656,grad_norm: 0.9999990335749542, iteration: 22233
loss: 0.9927637577056885,grad_norm: 0.9999991352407173, iteration: 22234
loss: 1.0202698707580566,grad_norm: 0.9282501056825241, iteration: 22235
loss: 1.0170117616653442,grad_norm: 0.9999993322567291, iteration: 22236
loss: 1.0154504776000977,grad_norm: 0.9999990798562466, iteration: 22237
loss: 0.9906923174858093,grad_norm: 0.8468756759188557, iteration: 22238
loss: 0.9899340271949768,grad_norm: 0.9998774750364817, iteration: 22239
loss: 1.0183039903640747,grad_norm: 0.9513973160054848, iteration: 22240
loss: 1.0302438735961914,grad_norm: 0.9999990590142237, iteration: 22241
loss: 1.0726953744888306,grad_norm: 0.9999993460995968, iteration: 22242
loss: 1.0033032894134521,grad_norm: 0.9999990579917063, iteration: 22243
loss: 1.0092284679412842,grad_norm: 0.9947677943297565, iteration: 22244
loss: 0.9709815382957458,grad_norm: 0.999999180160933, iteration: 22245
loss: 0.9880297183990479,grad_norm: 0.9822628771250986, iteration: 22246
loss: 1.0032696723937988,grad_norm: 0.9772664077759872, iteration: 22247
loss: 1.0229064226150513,grad_norm: 0.9999995056117282, iteration: 22248
loss: 0.9917320013046265,grad_norm: 0.9999990825607599, iteration: 22249
loss: 0.9911653399467468,grad_norm: 0.9999989181584992, iteration: 22250
loss: 1.046743631362915,grad_norm: 0.9999998643176004, iteration: 22251
loss: 1.0031483173370361,grad_norm: 0.8921120587324171, iteration: 22252
loss: 1.0003533363342285,grad_norm: 0.8776368423327537, iteration: 22253
loss: 1.018479585647583,grad_norm: 0.999999223054534, iteration: 22254
loss: 1.0312374830245972,grad_norm: 0.9999997633995015, iteration: 22255
loss: 1.0671395063400269,grad_norm: 0.9999990346109282, iteration: 22256
loss: 1.013440489768982,grad_norm: 0.999999302841939, iteration: 22257
loss: 1.031060814857483,grad_norm: 0.929475171580174, iteration: 22258
loss: 1.1425144672393799,grad_norm: 0.9999994775773194, iteration: 22259
loss: 0.9795882701873779,grad_norm: 0.9999991987080401, iteration: 22260
loss: 1.054248332977295,grad_norm: 0.9999990538342118, iteration: 22261
loss: 0.9796120524406433,grad_norm: 0.9756625790864493, iteration: 22262
loss: 0.9947978258132935,grad_norm: 0.9988552936466353, iteration: 22263
loss: 0.9863539338111877,grad_norm: 0.8927246893060488, iteration: 22264
loss: 1.0081522464752197,grad_norm: 0.9999991863267093, iteration: 22265
loss: 0.992124080657959,grad_norm: 0.846235963062142, iteration: 22266
loss: 0.9744378328323364,grad_norm: 0.9999990933759443, iteration: 22267
loss: 1.0030726194381714,grad_norm: 0.9999991484034441, iteration: 22268
loss: 0.9897324442863464,grad_norm: 0.9334316571429083, iteration: 22269
loss: 1.0515216588974,grad_norm: 0.9999991278840458, iteration: 22270
loss: 1.054695963859558,grad_norm: 0.9999990684159252, iteration: 22271
loss: 1.0102685689926147,grad_norm: 0.9999991777314939, iteration: 22272
loss: 0.9546217918395996,grad_norm: 0.9705797572032803, iteration: 22273
loss: 1.0691683292388916,grad_norm: 0.9999991088061854, iteration: 22274
loss: 1.0597736835479736,grad_norm: 0.9999997438686682, iteration: 22275
loss: 1.0170931816101074,grad_norm: 0.999999301679533, iteration: 22276
loss: 1.0385786294937134,grad_norm: 0.9999994201268116, iteration: 22277
loss: 1.0356125831604004,grad_norm: 0.9999993836448257, iteration: 22278
loss: 0.9960305690765381,grad_norm: 0.9838040953203406, iteration: 22279
loss: 0.972353994846344,grad_norm: 0.9999991544929822, iteration: 22280
loss: 1.0222560167312622,grad_norm: 0.9999993691114439, iteration: 22281
loss: 1.0029197931289673,grad_norm: 0.9999990699133513, iteration: 22282
loss: 1.0286012887954712,grad_norm: 0.9999993038998227, iteration: 22283
loss: 1.0180583000183105,grad_norm: 0.9999991452781805, iteration: 22284
loss: 0.98948073387146,grad_norm: 0.9999991221392095, iteration: 22285
loss: 1.0307163000106812,grad_norm: 0.999999209620008, iteration: 22286
loss: 1.0114518404006958,grad_norm: 0.9999991898273541, iteration: 22287
loss: 1.0243672132492065,grad_norm: 0.999999246827329, iteration: 22288
loss: 1.0209497213363647,grad_norm: 0.9999990391277642, iteration: 22289
loss: 1.0420594215393066,grad_norm: 0.999999051949829, iteration: 22290
loss: 1.0551302433013916,grad_norm: 0.9999997590443285, iteration: 22291
loss: 1.029789924621582,grad_norm: 0.9999991908219372, iteration: 22292
loss: 1.0011593103408813,grad_norm: 0.9999992006581686, iteration: 22293
loss: 0.9916096925735474,grad_norm: 0.9999991414894798, iteration: 22294
loss: 1.0425406694412231,grad_norm: 0.9999992219358173, iteration: 22295
loss: 1.0243515968322754,grad_norm: 0.8995185687180227, iteration: 22296
loss: 1.0303637981414795,grad_norm: 0.9008240656799685, iteration: 22297
loss: 0.9920659065246582,grad_norm: 0.9999992240168534, iteration: 22298
loss: 1.0296169519424438,grad_norm: 0.9999990269316653, iteration: 22299
loss: 0.9937846660614014,grad_norm: 0.9999990879689978, iteration: 22300
loss: 1.0392959117889404,grad_norm: 0.9999997649794936, iteration: 22301
loss: 1.0180455446243286,grad_norm: 0.9302216238603296, iteration: 22302
loss: 0.9808365702629089,grad_norm: 0.9644721535670152, iteration: 22303
loss: 0.9723179340362549,grad_norm: 0.9999991740011668, iteration: 22304
loss: 1.0129450559616089,grad_norm: 0.9192048946675216, iteration: 22305
loss: 1.0158721208572388,grad_norm: 0.9999991959682591, iteration: 22306
loss: 1.0590182542800903,grad_norm: 0.9999995708268143, iteration: 22307
loss: 0.9928746223449707,grad_norm: 0.9999992554683967, iteration: 22308
loss: 1.0640605688095093,grad_norm: 0.9999994231638242, iteration: 22309
loss: 0.9749864339828491,grad_norm: 0.9999991406641198, iteration: 22310
loss: 0.999392032623291,grad_norm: 0.938621630534099, iteration: 22311
loss: 0.9978256821632385,grad_norm: 0.9999997179214201, iteration: 22312
loss: 0.9630776047706604,grad_norm: 0.9999991284931868, iteration: 22313
loss: 0.982408344745636,grad_norm: 0.9999990457638417, iteration: 22314
loss: 1.0431419610977173,grad_norm: 0.9999991555838469, iteration: 22315
loss: 1.0426585674285889,grad_norm: 0.999999158594543, iteration: 22316
loss: 1.0234966278076172,grad_norm: 0.9999995280198264, iteration: 22317
loss: 0.9695728421211243,grad_norm: 0.8641976981881652, iteration: 22318
loss: 1.0300190448760986,grad_norm: 0.9999990899361864, iteration: 22319
loss: 1.0151019096374512,grad_norm: 0.9246914351256057, iteration: 22320
loss: 1.0226082801818848,grad_norm: 0.9999995845904031, iteration: 22321
loss: 1.0056483745574951,grad_norm: 0.999999283930092, iteration: 22322
loss: 1.0532026290893555,grad_norm: 0.9999993991979652, iteration: 22323
loss: 0.9784652590751648,grad_norm: 0.9999991391753742, iteration: 22324
loss: 1.0147045850753784,grad_norm: 0.886411872139466, iteration: 22325
loss: 1.0091344118118286,grad_norm: 0.9999992912249753, iteration: 22326
loss: 0.9961884021759033,grad_norm: 0.8622152290044114, iteration: 22327
loss: 1.01820707321167,grad_norm: 0.9999998239677143, iteration: 22328
loss: 0.9831959009170532,grad_norm: 0.9999997286579736, iteration: 22329
loss: 1.0123848915100098,grad_norm: 0.9999994919660933, iteration: 22330
loss: 1.0148650407791138,grad_norm: 0.9999993844046756, iteration: 22331
loss: 0.9797080755233765,grad_norm: 0.9833120499185402, iteration: 22332
loss: 0.9924708604812622,grad_norm: 0.8867079772311158, iteration: 22333
loss: 1.0071583986282349,grad_norm: 0.9999991737924414, iteration: 22334
loss: 0.964097261428833,grad_norm: 0.9871602755316845, iteration: 22335
loss: 0.9784249067306519,grad_norm: 0.9999990163909652, iteration: 22336
loss: 1.0785497426986694,grad_norm: 0.9999995355131327, iteration: 22337
loss: 0.9950762987136841,grad_norm: 0.9999991436226778, iteration: 22338
loss: 1.0237120389938354,grad_norm: 0.9999990495732122, iteration: 22339
loss: 0.9788320064544678,grad_norm: 0.9999991774889647, iteration: 22340
loss: 1.0466728210449219,grad_norm: 0.9999990828290914, iteration: 22341
loss: 1.005174160003662,grad_norm: 0.9925612238146502, iteration: 22342
loss: 0.9944765567779541,grad_norm: 0.9999993517192366, iteration: 22343
loss: 0.9847153425216675,grad_norm: 0.927097460845598, iteration: 22344
loss: 1.0928648710250854,grad_norm: 0.9999995307245919, iteration: 22345
loss: 0.9794507026672363,grad_norm: 0.999999117228511, iteration: 22346
loss: 1.0281890630722046,grad_norm: 0.9552975823527455, iteration: 22347
loss: 1.010688066482544,grad_norm: 0.9999992499656879, iteration: 22348
loss: 1.0025182962417603,grad_norm: 0.9999997507962145, iteration: 22349
loss: 0.9919953942298889,grad_norm: 0.9999992144496207, iteration: 22350
loss: 1.0386254787445068,grad_norm: 0.9999997177560634, iteration: 22351
loss: 0.992171585559845,grad_norm: 0.9999999341570621, iteration: 22352
loss: 1.0072795152664185,grad_norm: 0.9999992441743657, iteration: 22353
loss: 1.0344314575195312,grad_norm: 0.9045792751360835, iteration: 22354
loss: 1.0477218627929688,grad_norm: 0.9999995682447793, iteration: 22355
loss: 0.9946216940879822,grad_norm: 0.97867346435988, iteration: 22356
loss: 1.011089563369751,grad_norm: 0.9999993588458602, iteration: 22357
loss: 1.025560975074768,grad_norm: 0.9581185358954918, iteration: 22358
loss: 0.9814965128898621,grad_norm: 0.8886653128159445, iteration: 22359
loss: 1.0443644523620605,grad_norm: 0.9999991872555543, iteration: 22360
loss: 0.9926006197929382,grad_norm: 0.869983616726019, iteration: 22361
loss: 1.0086013078689575,grad_norm: 0.9240421279329409, iteration: 22362
loss: 1.0532888174057007,grad_norm: 0.9999996812005716, iteration: 22363
loss: 1.043682336807251,grad_norm: 0.9999990986517481, iteration: 22364
loss: 1.0391532182693481,grad_norm: 0.9999992387298072, iteration: 22365
loss: 1.022917628288269,grad_norm: 0.9642629308657026, iteration: 22366
loss: 1.0045679807662964,grad_norm: 0.94318473498781, iteration: 22367
loss: 1.0346535444259644,grad_norm: 0.9999990117709836, iteration: 22368
loss: 0.9612893462181091,grad_norm: 0.9999990041471378, iteration: 22369
loss: 1.0131205320358276,grad_norm: 0.9999993181025576, iteration: 22370
loss: 1.003373146057129,grad_norm: 0.9286495780189539, iteration: 22371
loss: 1.0097618103027344,grad_norm: 0.9708484718629011, iteration: 22372
loss: 1.0173401832580566,grad_norm: 0.9999991448649161, iteration: 22373
loss: 0.9769585132598877,grad_norm: 0.9999992166685123, iteration: 22374
loss: 0.9945902824401855,grad_norm: 0.8528898087617982, iteration: 22375
loss: 0.9627155661582947,grad_norm: 0.9999992290175233, iteration: 22376
loss: 0.9795100092887878,grad_norm: 0.7787110951458882, iteration: 22377
loss: 1.0156124830245972,grad_norm: 0.9999990694955131, iteration: 22378
loss: 1.0625038146972656,grad_norm: 0.999999661556984, iteration: 22379
loss: 1.0557788610458374,grad_norm: 0.9999991079162839, iteration: 22380
loss: 1.0094021558761597,grad_norm: 0.9999994500987139, iteration: 22381
loss: 1.016920804977417,grad_norm: 0.9375708676190097, iteration: 22382
loss: 1.0162067413330078,grad_norm: 0.9999990270759962, iteration: 22383
loss: 1.0353584289550781,grad_norm: 0.9906993263810304, iteration: 22384
loss: 0.9649218916893005,grad_norm: 0.9925491055932782, iteration: 22385
loss: 1.0232679843902588,grad_norm: 0.999999087743765, iteration: 22386
loss: 1.0413250923156738,grad_norm: 0.9999995348996843, iteration: 22387
loss: 1.0168708562850952,grad_norm: 0.9911657513299699, iteration: 22388
loss: 0.9879554510116577,grad_norm: 0.9999993163515757, iteration: 22389
loss: 1.0105503797531128,grad_norm: 0.9380168952280786, iteration: 22390
loss: 1.0029687881469727,grad_norm: 0.9928618213361252, iteration: 22391
loss: 1.0370798110961914,grad_norm: 0.9734541403211101, iteration: 22392
loss: 1.0588377714157104,grad_norm: 0.9179503109002413, iteration: 22393
loss: 1.014886498451233,grad_norm: 0.9326385901597849, iteration: 22394
loss: 1.0400850772857666,grad_norm: 0.9999992100637917, iteration: 22395
loss: 1.0041630268096924,grad_norm: 0.9999992823023698, iteration: 22396
loss: 0.9884361624717712,grad_norm: 0.9999991357924847, iteration: 22397
loss: 0.986831784248352,grad_norm: 0.9999993137163791, iteration: 22398
loss: 1.0146965980529785,grad_norm: 0.9900523893359983, iteration: 22399
loss: 1.0353000164031982,grad_norm: 0.9999992622667975, iteration: 22400
loss: 1.0102579593658447,grad_norm: 0.9567511923681622, iteration: 22401
loss: 1.0181411504745483,grad_norm: 0.9818238771743041, iteration: 22402
loss: 1.059165596961975,grad_norm: 0.9999998219750985, iteration: 22403
loss: 0.9904487133026123,grad_norm: 0.9558929818227025, iteration: 22404
loss: 1.0012731552124023,grad_norm: 0.9639829336328549, iteration: 22405
loss: 0.9601297974586487,grad_norm: 0.9999995255158582, iteration: 22406
loss: 1.0186117887496948,grad_norm: 0.9938408422162548, iteration: 22407
loss: 1.0050643682479858,grad_norm: 0.8767930898113566, iteration: 22408
loss: 1.126192569732666,grad_norm: 0.9999996238622328, iteration: 22409
loss: 1.0635145902633667,grad_norm: 0.9999996816975273, iteration: 22410
loss: 0.9935318231582642,grad_norm: 0.9328690749186914, iteration: 22411
loss: 0.977659285068512,grad_norm: 0.8675176504956745, iteration: 22412
loss: 0.9726570844650269,grad_norm: 0.9999990174089987, iteration: 22413
loss: 0.992652416229248,grad_norm: 0.9999989882898895, iteration: 22414
loss: 1.038684368133545,grad_norm: 0.9999992224368536, iteration: 22415
loss: 1.0308881998062134,grad_norm: 0.9270811784177744, iteration: 22416
loss: 1.0076886415481567,grad_norm: 0.811676872716058, iteration: 22417
loss: 0.9991077780723572,grad_norm: 0.9999998041648835, iteration: 22418
loss: 1.0377187728881836,grad_norm: 0.9999998209743772, iteration: 22419
loss: 1.0366177558898926,grad_norm: 0.9999992012061829, iteration: 22420
loss: 0.9776796102523804,grad_norm: 0.999999229556966, iteration: 22421
loss: 0.9947706460952759,grad_norm: 0.9999991898842003, iteration: 22422
loss: 1.027174949645996,grad_norm: 0.9999991193653512, iteration: 22423
loss: 1.0328247547149658,grad_norm: 0.9999991715319025, iteration: 22424
loss: 1.029483675956726,grad_norm: 0.9999992586727443, iteration: 22425
loss: 1.0203992128372192,grad_norm: 0.9999991747855472, iteration: 22426
loss: 1.007643461227417,grad_norm: 0.9236378429314547, iteration: 22427
loss: 1.028491735458374,grad_norm: 0.9999991410637394, iteration: 22428
loss: 0.9934653639793396,grad_norm: 0.9923333716875743, iteration: 22429
loss: 1.0030158758163452,grad_norm: 0.9999993166589037, iteration: 22430
loss: 1.0529847145080566,grad_norm: 0.9999996851346931, iteration: 22431
loss: 1.0035009384155273,grad_norm: 0.9999996250471754, iteration: 22432
loss: 1.0459178686141968,grad_norm: 0.9872037273775337, iteration: 22433
loss: 1.0262444019317627,grad_norm: 0.9549042023856334, iteration: 22434
loss: 1.0080749988555908,grad_norm: 0.9999991053598608, iteration: 22435
loss: 1.0129709243774414,grad_norm: 0.8990674910018739, iteration: 22436
loss: 0.997319221496582,grad_norm: 0.9999996561982095, iteration: 22437
loss: 1.0250418186187744,grad_norm: 0.9999992415260828, iteration: 22438
loss: 1.0141303539276123,grad_norm: 0.9999990505410808, iteration: 22439
loss: 1.0631874799728394,grad_norm: 0.9999992087613679, iteration: 22440
loss: 0.9744113683700562,grad_norm: 0.9999990862172984, iteration: 22441
loss: 0.9937494993209839,grad_norm: 0.9999990095239742, iteration: 22442
loss: 1.011098861694336,grad_norm: 0.9999990992899274, iteration: 22443
loss: 0.9717596769332886,grad_norm: 0.9999991171782551, iteration: 22444
loss: 1.0387544631958008,grad_norm: 0.8512327433182244, iteration: 22445
loss: 0.9972832798957825,grad_norm: 0.9541794833657807, iteration: 22446
loss: 1.0240939855575562,grad_norm: 0.9999997281258451, iteration: 22447
loss: 1.0021333694458008,grad_norm: 0.9999990879577358, iteration: 22448
loss: 1.0135390758514404,grad_norm: 0.9999995548195488, iteration: 22449
loss: 1.019976258277893,grad_norm: 0.9617547871455244, iteration: 22450
loss: 1.009608507156372,grad_norm: 0.8821908928725989, iteration: 22451
loss: 1.0166852474212646,grad_norm: 0.7529726661039787, iteration: 22452
loss: 1.0059174299240112,grad_norm: 0.9999990526156267, iteration: 22453
loss: 0.986183762550354,grad_norm: 0.9999123478050058, iteration: 22454
loss: 1.0086760520935059,grad_norm: 0.9999991871295286, iteration: 22455
loss: 1.0106170177459717,grad_norm: 0.9144876852581889, iteration: 22456
loss: 0.9814249277114868,grad_norm: 0.9999991952078524, iteration: 22457
loss: 1.0198637247085571,grad_norm: 0.9179349614813836, iteration: 22458
loss: 1.0384384393692017,grad_norm: 0.9164676647119648, iteration: 22459
loss: 0.9928112626075745,grad_norm: 0.9999990789956512, iteration: 22460
loss: 1.0413410663604736,grad_norm: 0.999999599477583, iteration: 22461
loss: 1.0100222826004028,grad_norm: 0.9460601488664336, iteration: 22462
loss: 1.0103024244308472,grad_norm: 0.9904173763840858, iteration: 22463
loss: 0.9935789704322815,grad_norm: 0.9999994247304973, iteration: 22464
loss: 0.9798799157142639,grad_norm: 0.9999991280627173, iteration: 22465
loss: 0.9843139052391052,grad_norm: 0.9999989815812532, iteration: 22466
loss: 1.029171347618103,grad_norm: 0.938395221009918, iteration: 22467
loss: 1.0893555879592896,grad_norm: 0.9999994173797284, iteration: 22468
loss: 1.0090463161468506,grad_norm: 0.9999990249404205, iteration: 22469
loss: 1.0097682476043701,grad_norm: 0.9359838791624049, iteration: 22470
loss: 1.0397807359695435,grad_norm: 0.8434292741855174, iteration: 22471
loss: 0.9896053671836853,grad_norm: 0.9092862680006518, iteration: 22472
loss: 1.0219565629959106,grad_norm: 0.9999993870465518, iteration: 22473
loss: 1.016880750656128,grad_norm: 0.9858345732417622, iteration: 22474
loss: 1.0117021799087524,grad_norm: 0.859198435894218, iteration: 22475
loss: 0.9930064082145691,grad_norm: 0.9686872296133185, iteration: 22476
loss: 1.0325452089309692,grad_norm: 0.8924817473240465, iteration: 22477
loss: 1.036790370941162,grad_norm: 0.9060473454122295, iteration: 22478
loss: 1.0138051509857178,grad_norm: 0.9999992198370623, iteration: 22479
loss: 0.9865962862968445,grad_norm: 0.9999990377629041, iteration: 22480
loss: 1.0490740537643433,grad_norm: 0.999998937390705, iteration: 22481
loss: 1.0616285800933838,grad_norm: 0.9999992411070535, iteration: 22482
loss: 1.0192668437957764,grad_norm: 0.9999990716738878, iteration: 22483
loss: 1.0430253744125366,grad_norm: 0.9705916380139403, iteration: 22484
loss: 0.97479248046875,grad_norm: 0.9999991943935272, iteration: 22485
loss: 0.9983749985694885,grad_norm: 0.9999994287008235, iteration: 22486
loss: 0.9856284260749817,grad_norm: 0.9911356961412972, iteration: 22487
loss: 1.003862977027893,grad_norm: 0.9146757545786952, iteration: 22488
loss: 1.0234631299972534,grad_norm: 0.9893358953078851, iteration: 22489
loss: 1.011863112449646,grad_norm: 0.9999991052365007, iteration: 22490
loss: 1.0242642164230347,grad_norm: 0.9999998369304823, iteration: 22491
loss: 1.0017786026000977,grad_norm: 0.9999991127603485, iteration: 22492
loss: 0.9609329700469971,grad_norm: 0.9999992321358777, iteration: 22493
loss: 1.0126962661743164,grad_norm: 0.999999095159456, iteration: 22494
loss: 1.0264657735824585,grad_norm: 0.8921579752129946, iteration: 22495
loss: 1.0366755723953247,grad_norm: 0.9999993072345799, iteration: 22496
loss: 1.016947865486145,grad_norm: 0.9999991814943704, iteration: 22497
loss: 1.0000747442245483,grad_norm: 0.9999991471843974, iteration: 22498
loss: 0.9866320490837097,grad_norm: 0.947590963235936, iteration: 22499
loss: 0.9969909191131592,grad_norm: 0.9999991885995372, iteration: 22500
loss: 0.9894464612007141,grad_norm: 0.999999085307908, iteration: 22501
loss: 0.995868980884552,grad_norm: 0.8804701641690733, iteration: 22502
loss: 1.0090407133102417,grad_norm: 0.9999992475726444, iteration: 22503
loss: 1.0342069864273071,grad_norm: 0.9999991341091145, iteration: 22504
loss: 1.0579737424850464,grad_norm: 0.9999993208324692, iteration: 22505
loss: 1.0235095024108887,grad_norm: 0.9999993552768892, iteration: 22506
loss: 1.0206538438796997,grad_norm: 0.962222679345058, iteration: 22507
loss: 0.9741842150688171,grad_norm: 0.9965095702580031, iteration: 22508
loss: 1.001397728919983,grad_norm: 0.9999991751730254, iteration: 22509
loss: 0.9835956692695618,grad_norm: 0.9999989787149555, iteration: 22510
loss: 1.005147099494934,grad_norm: 0.9999993775146733, iteration: 22511
loss: 0.9891703724861145,grad_norm: 0.9999991761008259, iteration: 22512
loss: 1.0044137239456177,grad_norm: 0.9999993471765899, iteration: 22513
loss: 0.9763491749763489,grad_norm: 0.999999061600107, iteration: 22514
loss: 1.008924126625061,grad_norm: 0.9999993177229853, iteration: 22515
loss: 1.0418275594711304,grad_norm: 0.9999996566226188, iteration: 22516
loss: 1.0243641138076782,grad_norm: 0.9999990410658672, iteration: 22517
loss: 1.0518615245819092,grad_norm: 0.9999992169121593, iteration: 22518
loss: 1.0247646570205688,grad_norm: 0.9999992138493455, iteration: 22519
loss: 1.0141056776046753,grad_norm: 0.9999991181915779, iteration: 22520
loss: 1.0007754564285278,grad_norm: 0.9999992270597633, iteration: 22521
loss: 1.0066355466842651,grad_norm: 0.9885982801229837, iteration: 22522
loss: 1.0293821096420288,grad_norm: 0.9999993325164908, iteration: 22523
loss: 1.0357420444488525,grad_norm: 0.9999991383097377, iteration: 22524
loss: 1.0102084875106812,grad_norm: 0.9999990575261289, iteration: 22525
loss: 1.0109225511550903,grad_norm: 0.9999990638343356, iteration: 22526
loss: 1.0301474332809448,grad_norm: 0.9999991412506767, iteration: 22527
loss: 1.0727877616882324,grad_norm: 0.9999997028587491, iteration: 22528
loss: 1.0085041522979736,grad_norm: 0.9999993057039237, iteration: 22529
loss: 1.0186066627502441,grad_norm: 0.9999991750005665, iteration: 22530
loss: 1.0411633253097534,grad_norm: 0.9999993033315109, iteration: 22531
loss: 0.9715323448181152,grad_norm: 0.9885323697468964, iteration: 22532
loss: 1.0482758283615112,grad_norm: 0.9999989897977138, iteration: 22533
loss: 0.983997642993927,grad_norm: 0.9999990977914428, iteration: 22534
loss: 0.9797928929328918,grad_norm: 0.9999991256664343, iteration: 22535
loss: 0.9959457516670227,grad_norm: 0.8476308968563482, iteration: 22536
loss: 1.029900312423706,grad_norm: 0.9999991145574422, iteration: 22537
loss: 1.0813156366348267,grad_norm: 0.9999995584837861, iteration: 22538
loss: 1.0202223062515259,grad_norm: 0.9999990847465852, iteration: 22539
loss: 0.980501115322113,grad_norm: 0.999999043553618, iteration: 22540
loss: 1.0395641326904297,grad_norm: 0.9274798213654549, iteration: 22541
loss: 1.052450180053711,grad_norm: 0.9058770283069939, iteration: 22542
loss: 1.0261727571487427,grad_norm: 0.9999997219377024, iteration: 22543
loss: 0.9674205183982849,grad_norm: 0.9999990109241087, iteration: 22544
loss: 0.999434769153595,grad_norm: 0.9999991128687891, iteration: 22545
loss: 0.9932347536087036,grad_norm: 0.9999997048674039, iteration: 22546
loss: 1.0107016563415527,grad_norm: 0.9999994184104366, iteration: 22547
loss: 0.9777804017066956,grad_norm: 0.9863360001109769, iteration: 22548
loss: 1.097455620765686,grad_norm: 0.9999991986236314, iteration: 22549
loss: 1.0316064357757568,grad_norm: 0.8873492855536689, iteration: 22550
loss: 1.047125220298767,grad_norm: 0.9999990834225557, iteration: 22551
loss: 0.9868381023406982,grad_norm: 0.9429928283237604, iteration: 22552
loss: 1.0014771223068237,grad_norm: 0.9999990199654909, iteration: 22553
loss: 0.985490620136261,grad_norm: 0.9999994629053409, iteration: 22554
loss: 1.0203155279159546,grad_norm: 0.9999991353695452, iteration: 22555
loss: 0.9676366448402405,grad_norm: 0.9527578415063964, iteration: 22556
loss: 1.00907564163208,grad_norm: 0.9999990783593324, iteration: 22557
loss: 1.0197452306747437,grad_norm: 0.8513158322529998, iteration: 22558
loss: 1.0287266969680786,grad_norm: 0.9999991809727481, iteration: 22559
loss: 1.0230772495269775,grad_norm: 0.999999096117978, iteration: 22560
loss: 1.0148950815200806,grad_norm: 0.999999182968799, iteration: 22561
loss: 0.9905253648757935,grad_norm: 0.9995026815589889, iteration: 22562
loss: 1.0395970344543457,grad_norm: 0.8900152562975736, iteration: 22563
loss: 1.032684087753296,grad_norm: 0.9999989981591103, iteration: 22564
loss: 0.9878578782081604,grad_norm: 0.9794299245968128, iteration: 22565
loss: 1.0107864141464233,grad_norm: 0.9999990499200713, iteration: 22566
loss: 0.9887427687644958,grad_norm: 0.9554229347728274, iteration: 22567
loss: 1.0038379430770874,grad_norm: 0.9999994744894438, iteration: 22568
loss: 1.044590711593628,grad_norm: 0.999999383003133, iteration: 22569
loss: 1.0106399059295654,grad_norm: 0.7965594190216706, iteration: 22570
loss: 1.0035122632980347,grad_norm: 0.9999992888234728, iteration: 22571
loss: 1.0058813095092773,grad_norm: 0.9834456274670171, iteration: 22572
loss: 1.0344667434692383,grad_norm: 0.999999029958749, iteration: 22573
loss: 1.0174168348312378,grad_norm: 0.9999990803617791, iteration: 22574
loss: 1.0565108060836792,grad_norm: 0.961606771089739, iteration: 22575
loss: 1.0060986280441284,grad_norm: 0.9712481068221245, iteration: 22576
loss: 1.0136561393737793,grad_norm: 0.9999990288544344, iteration: 22577
loss: 0.9975408315658569,grad_norm: 0.9999993281065399, iteration: 22578
loss: 1.0323703289031982,grad_norm: 0.9999992365276339, iteration: 22579
loss: 0.9868104457855225,grad_norm: 0.8667787011877647, iteration: 22580
loss: 1.0283615589141846,grad_norm: 0.9999991548631866, iteration: 22581
loss: 1.0191718339920044,grad_norm: 0.999999121569206, iteration: 22582
loss: 1.0151731967926025,grad_norm: 0.9999991177919891, iteration: 22583
loss: 0.968518078327179,grad_norm: 0.999999024297664, iteration: 22584
loss: 1.0089014768600464,grad_norm: 0.961365027699621, iteration: 22585
loss: 1.0356522798538208,grad_norm: 0.9999992642890853, iteration: 22586
loss: 0.9934414029121399,grad_norm: 0.9999991260146186, iteration: 22587
loss: 0.992406964302063,grad_norm: 0.9668567232598356, iteration: 22588
loss: 0.9901708960533142,grad_norm: 0.9999990950689777, iteration: 22589
loss: 0.9999786615371704,grad_norm: 0.9999991980247193, iteration: 22590
loss: 1.0050561428070068,grad_norm: 0.9578891911080507, iteration: 22591
loss: 0.983546257019043,grad_norm: 0.9999992728713187, iteration: 22592
loss: 1.003699541091919,grad_norm: 0.9228910835910841, iteration: 22593
loss: 1.0115108489990234,grad_norm: 0.928710644920605, iteration: 22594
loss: 0.9986296892166138,grad_norm: 0.8813334296723176, iteration: 22595
loss: 1.0459507703781128,grad_norm: 0.9999994685094753, iteration: 22596
loss: 0.9869059920310974,grad_norm: 0.8628077017451481, iteration: 22597
loss: 1.0227510929107666,grad_norm: 0.9198191076816254, iteration: 22598
loss: 0.9880927205085754,grad_norm: 0.8562676732568562, iteration: 22599
loss: 1.0275235176086426,grad_norm: 0.9999992358909072, iteration: 22600
loss: 1.0065213441848755,grad_norm: 0.8582942696789927, iteration: 22601
loss: 1.0462509393692017,grad_norm: 0.9999989677819839, iteration: 22602
loss: 1.0193688869476318,grad_norm: 0.9999991483451507, iteration: 22603
loss: 0.9709670543670654,grad_norm: 0.9999989703372881, iteration: 22604
loss: 0.9902355670928955,grad_norm: 0.9999991348415523, iteration: 22605
loss: 1.0319136381149292,grad_norm: 0.9970214185784504, iteration: 22606
loss: 0.9987112283706665,grad_norm: 0.999999071291724, iteration: 22607
loss: 1.0019025802612305,grad_norm: 0.999999042546257, iteration: 22608
loss: 0.9383753538131714,grad_norm: 0.9999990059770864, iteration: 22609
loss: 1.003018856048584,grad_norm: 0.9428750697293576, iteration: 22610
loss: 1.0364091396331787,grad_norm: 0.9999998057519934, iteration: 22611
loss: 1.0429152250289917,grad_norm: 0.9038020571496749, iteration: 22612
loss: 1.026982307434082,grad_norm: 0.999999112879278, iteration: 22613
loss: 1.0269635915756226,grad_norm: 0.9177442853942985, iteration: 22614
loss: 0.9844755530357361,grad_norm: 0.9999990048600833, iteration: 22615
loss: 1.0469685792922974,grad_norm: 0.9999993743494365, iteration: 22616
loss: 1.04365074634552,grad_norm: 0.9999997179675667, iteration: 22617
loss: 1.0406057834625244,grad_norm: 0.9999991434640552, iteration: 22618
loss: 1.019708275794983,grad_norm: 0.9723795002918836, iteration: 22619
loss: 1.035505771636963,grad_norm: 0.9288116780534453, iteration: 22620
loss: 0.9915610551834106,grad_norm: 0.9545172058876173, iteration: 22621
loss: 1.0099968910217285,grad_norm: 0.9999990866576104, iteration: 22622
loss: 1.0402171611785889,grad_norm: 0.999999055100426, iteration: 22623
loss: 1.098137617111206,grad_norm: 0.9999995730367486, iteration: 22624
loss: 1.0287690162658691,grad_norm: 0.9999990812461758, iteration: 22625
loss: 1.0245637893676758,grad_norm: 0.9999993731480502, iteration: 22626
loss: 1.0302973985671997,grad_norm: 0.9999991042996579, iteration: 22627
loss: 1.007073163986206,grad_norm: 0.9999992564757764, iteration: 22628
loss: 1.0281084775924683,grad_norm: 0.8950599160363484, iteration: 22629
loss: 1.122323989868164,grad_norm: 0.9999995806962926, iteration: 22630
loss: 1.0083452463150024,grad_norm: 0.9999991514015724, iteration: 22631
loss: 0.9904347658157349,grad_norm: 0.9999991144814215, iteration: 22632
loss: 1.0254994630813599,grad_norm: 0.9999989875871136, iteration: 22633
loss: 0.9709553718566895,grad_norm: 0.999999158684021, iteration: 22634
loss: 1.022642731666565,grad_norm: 0.9999993067946471, iteration: 22635
loss: 1.0226609706878662,grad_norm: 0.7766231268318138, iteration: 22636
loss: 1.0310826301574707,grad_norm: 0.8473607437637298, iteration: 22637
loss: 1.0651581287384033,grad_norm: 0.9999990829906377, iteration: 22638
loss: 0.9873706698417664,grad_norm: 0.9999994257969489, iteration: 22639
loss: 0.993396520614624,grad_norm: 0.9063103214267826, iteration: 22640
loss: 1.022810935974121,grad_norm: 0.9999990973769504, iteration: 22641
loss: 0.9927617311477661,grad_norm: 0.9999992483186182, iteration: 22642
loss: 1.0208443403244019,grad_norm: 0.9790376992438158, iteration: 22643
loss: 0.9960845708847046,grad_norm: 0.9999995533963911, iteration: 22644
loss: 1.0184693336486816,grad_norm: 0.9999994093582708, iteration: 22645
loss: 0.9889785051345825,grad_norm: 0.9999990767904127, iteration: 22646
loss: 1.0336179733276367,grad_norm: 0.9999991238439708, iteration: 22647
loss: 1.0197471380233765,grad_norm: 0.8233804712119444, iteration: 22648
loss: 0.9830214381217957,grad_norm: 0.9349747339229427, iteration: 22649
loss: 1.0294106006622314,grad_norm: 0.9999992630003928, iteration: 22650
loss: 1.0108823776245117,grad_norm: 0.9999992052258316, iteration: 22651
loss: 1.0279947519302368,grad_norm: 0.9999991323487595, iteration: 22652
loss: 1.02424955368042,grad_norm: 0.9404220303192754, iteration: 22653
loss: 1.013169527053833,grad_norm: 0.9999990583936653, iteration: 22654
loss: 0.9792565107345581,grad_norm: 0.9999991044011319, iteration: 22655
loss: 1.049282431602478,grad_norm: 0.9999991329313679, iteration: 22656
loss: 1.0091279745101929,grad_norm: 0.9013974200343658, iteration: 22657
loss: 1.009604811668396,grad_norm: 0.9999992439007079, iteration: 22658
loss: 0.981822669506073,grad_norm: 0.9999990956102842, iteration: 22659
loss: 0.9797546863555908,grad_norm: 0.9999991155506486, iteration: 22660
loss: 1.0112348794937134,grad_norm: 0.9999990030353271, iteration: 22661
loss: 1.0308313369750977,grad_norm: 0.9999992389494584, iteration: 22662
loss: 0.9977701306343079,grad_norm: 0.9794835995996428, iteration: 22663
loss: 1.0182147026062012,grad_norm: 0.9999997080102213, iteration: 22664
loss: 1.014412760734558,grad_norm: 0.8980743757712022, iteration: 22665
loss: 0.9925832152366638,grad_norm: 0.9999991096316581, iteration: 22666
loss: 1.0241882801055908,grad_norm: 0.9141639037513618, iteration: 22667
loss: 1.007747769355774,grad_norm: 0.9999991809418545, iteration: 22668
loss: 0.9860756993293762,grad_norm: 0.9999991983577222, iteration: 22669
loss: 1.052327275276184,grad_norm: 0.968960372383906, iteration: 22670
loss: 1.0518403053283691,grad_norm: 0.9999991284146766, iteration: 22671
loss: 1.0201270580291748,grad_norm: 0.9237750687715559, iteration: 22672
loss: 1.008813738822937,grad_norm: 0.9999991055186713, iteration: 22673
loss: 1.0040806531906128,grad_norm: 0.8854509138628182, iteration: 22674
loss: 1.015153169631958,grad_norm: 0.9999991017797133, iteration: 22675
loss: 1.022049069404602,grad_norm: 0.8979081230624485, iteration: 22676
loss: 1.0204942226409912,grad_norm: 0.9858472863940899, iteration: 22677
loss: 1.0166138410568237,grad_norm: 0.9999992217362422, iteration: 22678
loss: 1.0253090858459473,grad_norm: 0.9254520023123458, iteration: 22679
loss: 0.980090320110321,grad_norm: 0.9381828346028037, iteration: 22680
loss: 1.0059499740600586,grad_norm: 0.9999991096142348, iteration: 22681
loss: 1.0046098232269287,grad_norm: 0.9999990229161092, iteration: 22682
loss: 1.0092731714248657,grad_norm: 0.9999992444200875, iteration: 22683
loss: 0.9909164309501648,grad_norm: 0.9999992969616752, iteration: 22684
loss: 1.0632482767105103,grad_norm: 0.9999998427025926, iteration: 22685
loss: 0.9771959781646729,grad_norm: 0.8986493910087188, iteration: 22686
loss: 0.9915830492973328,grad_norm: 0.9999992292297978, iteration: 22687
loss: 1.0188297033309937,grad_norm: 0.9999999109418738, iteration: 22688
loss: 0.9818596243858337,grad_norm: 0.918736852092286, iteration: 22689
loss: 1.0117636919021606,grad_norm: 0.9586021794365424, iteration: 22690
loss: 1.0173453092575073,grad_norm: 0.9999991714812246, iteration: 22691
loss: 0.9945381879806519,grad_norm: 0.9999990911850496, iteration: 22692
loss: 1.0087708234786987,grad_norm: 0.9999991149523465, iteration: 22693
loss: 1.0240750312805176,grad_norm: 0.999999175830345, iteration: 22694
loss: 1.0234895944595337,grad_norm: 0.9274916080071572, iteration: 22695
loss: 0.9953674077987671,grad_norm: 0.9219349431991627, iteration: 22696
loss: 0.9729960560798645,grad_norm: 0.9999990974787724, iteration: 22697
loss: 1.0137871503829956,grad_norm: 0.9999991173303527, iteration: 22698
loss: 1.0145708322525024,grad_norm: 0.9902169996268747, iteration: 22699
loss: 1.0100349187850952,grad_norm: 0.9999991524563672, iteration: 22700
loss: 1.064257264137268,grad_norm: 0.9999994984429468, iteration: 22701
loss: 0.9820563793182373,grad_norm: 0.8481609004094608, iteration: 22702
loss: 1.0191527605056763,grad_norm: 0.9999990237244828, iteration: 22703
loss: 1.0149650573730469,grad_norm: 0.9999994542672793, iteration: 22704
loss: 1.004634976387024,grad_norm: 0.9999991109258485, iteration: 22705
loss: 1.0175635814666748,grad_norm: 0.9107365163808747, iteration: 22706
loss: 1.0061864852905273,grad_norm: 0.9999992103127647, iteration: 22707
loss: 1.0281038284301758,grad_norm: 0.8742593652157143, iteration: 22708
loss: 0.9945344924926758,grad_norm: 0.9496377851646772, iteration: 22709
loss: 1.0387496948242188,grad_norm: 0.9999995115708753, iteration: 22710
loss: 1.0349797010421753,grad_norm: 0.862700258654779, iteration: 22711
loss: 1.018678903579712,grad_norm: 0.9999990494419417, iteration: 22712
loss: 0.9892439842224121,grad_norm: 0.9999991014889738, iteration: 22713
loss: 0.9787997603416443,grad_norm: 0.9665102158729456, iteration: 22714
loss: 0.9945600628852844,grad_norm: 0.9526532670867586, iteration: 22715
loss: 0.9855931997299194,grad_norm: 0.9578382564827699, iteration: 22716
loss: 1.0000319480895996,grad_norm: 0.9699629524207659, iteration: 22717
loss: 0.9959964752197266,grad_norm: 0.9999991407635793, iteration: 22718
loss: 1.0248950719833374,grad_norm: 0.8677915675103265, iteration: 22719
loss: 1.0033732652664185,grad_norm: 0.9999991144817876, iteration: 22720
loss: 1.0325682163238525,grad_norm: 0.9999994466450919, iteration: 22721
loss: 1.0193357467651367,grad_norm: 0.9999990655922307, iteration: 22722
loss: 1.0370985269546509,grad_norm: 0.9999994414606591, iteration: 22723
loss: 0.9991667866706848,grad_norm: 0.956504359475496, iteration: 22724
loss: 1.0068556070327759,grad_norm: 0.9999991985203706, iteration: 22725
loss: 0.9861457347869873,grad_norm: 0.9999993027727184, iteration: 22726
loss: 1.0176464319229126,grad_norm: 0.9999992118860446, iteration: 22727
loss: 1.0502516031265259,grad_norm: 0.9999990055769279, iteration: 22728
loss: 0.9822441935539246,grad_norm: 0.9999991587004422, iteration: 22729
loss: 1.0434526205062866,grad_norm: 0.8350715641638615, iteration: 22730
loss: 1.0159120559692383,grad_norm: 0.9775399891826344, iteration: 22731
loss: 1.0079489946365356,grad_norm: 0.9331021159511288, iteration: 22732
loss: 0.9826233386993408,grad_norm: 0.9543266730268631, iteration: 22733
loss: 1.0006028413772583,grad_norm: 0.9999990088120277, iteration: 22734
loss: 0.9835756421089172,grad_norm: 0.8721419869676885, iteration: 22735
loss: 1.028003454208374,grad_norm: 0.9999991238978615, iteration: 22736
loss: 1.042757272720337,grad_norm: 0.9999990784424657, iteration: 22737
loss: 1.0645036697387695,grad_norm: 0.9999990227037422, iteration: 22738
loss: 0.9983513951301575,grad_norm: 0.9999992452368441, iteration: 22739
loss: 1.0567586421966553,grad_norm: 0.9999999429459501, iteration: 22740
loss: 0.9948179721832275,grad_norm: 0.9469727914726642, iteration: 22741
loss: 0.9852011799812317,grad_norm: 0.8966744095621724, iteration: 22742
loss: 1.0309251546859741,grad_norm: 0.99999912851282, iteration: 22743
loss: 1.0033178329467773,grad_norm: 0.9999992913328499, iteration: 22744
loss: 0.9923005104064941,grad_norm: 0.9999993565293469, iteration: 22745
loss: 0.9840366840362549,grad_norm: 0.9999991356675594, iteration: 22746
loss: 1.0004633665084839,grad_norm: 0.999999090742434, iteration: 22747
loss: 0.9676275253295898,grad_norm: 0.9999993230050729, iteration: 22748
loss: 0.9810817241668701,grad_norm: 0.8580747571978732, iteration: 22749
loss: 1.0039268732070923,grad_norm: 0.9336522722341796, iteration: 22750
loss: 0.9985391497612,grad_norm: 0.9999991304715, iteration: 22751
loss: 1.0615350008010864,grad_norm: 0.9999994467215184, iteration: 22752
loss: 1.0030094385147095,grad_norm: 0.9140317533460413, iteration: 22753
loss: 0.990806519985199,grad_norm: 0.9035314092572739, iteration: 22754
loss: 0.9878519177436829,grad_norm: 0.9155201025831505, iteration: 22755
loss: 0.9762039184570312,grad_norm: 0.9999991816223804, iteration: 22756
loss: 1.0154175758361816,grad_norm: 0.9999992172735608, iteration: 22757
loss: 1.0117483139038086,grad_norm: 0.8127695646815244, iteration: 22758
loss: 1.0121315717697144,grad_norm: 0.873349329435911, iteration: 22759
loss: 1.0769233703613281,grad_norm: 0.9999996005982814, iteration: 22760
loss: 1.0021324157714844,grad_norm: 0.9670640282438736, iteration: 22761
loss: 1.01533043384552,grad_norm: 0.9718101851549342, iteration: 22762
loss: 1.0504838228225708,grad_norm: 0.9999997156722924, iteration: 22763
loss: 1.0105068683624268,grad_norm: 0.9999990576200679, iteration: 22764
loss: 1.002528429031372,grad_norm: 0.9999992054650769, iteration: 22765
loss: 0.997899055480957,grad_norm: 0.9476854296166329, iteration: 22766
loss: 1.0098036527633667,grad_norm: 0.9999990115856022, iteration: 22767
loss: 1.0343137979507446,grad_norm: 0.9999991918210737, iteration: 22768
loss: 1.0336253643035889,grad_norm: 0.9072555010240717, iteration: 22769
loss: 1.0832183361053467,grad_norm: 0.9999997217816542, iteration: 22770
loss: 1.0022385120391846,grad_norm: 0.9999990391658705, iteration: 22771
loss: 1.0188162326812744,grad_norm: 0.9999997675513362, iteration: 22772
loss: 0.9761806726455688,grad_norm: 0.999999101680502, iteration: 22773
loss: 1.01237154006958,grad_norm: 0.8753754943938584, iteration: 22774
loss: 0.9889152646064758,grad_norm: 0.9999991008443294, iteration: 22775
loss: 1.0281951427459717,grad_norm: 0.9855437412163778, iteration: 22776
loss: 0.9800525903701782,grad_norm: 0.999999256395463, iteration: 22777
loss: 1.0107991695404053,grad_norm: 0.9999989651696658, iteration: 22778
loss: 0.9859533905982971,grad_norm: 0.9917085938091675, iteration: 22779
loss: 1.022717833518982,grad_norm: 0.9999990320187068, iteration: 22780
loss: 0.977351725101471,grad_norm: 0.9625300439280094, iteration: 22781
loss: 1.0147507190704346,grad_norm: 0.9999996783160254, iteration: 22782
loss: 1.0260578393936157,grad_norm: 0.9999990025254314, iteration: 22783
loss: 1.0231919288635254,grad_norm: 0.8651055315235053, iteration: 22784
loss: 0.9957901835441589,grad_norm: 0.999999035139728, iteration: 22785
loss: 1.0114140510559082,grad_norm: 0.9999992467358066, iteration: 22786
loss: 1.0344330072402954,grad_norm: 0.9999991384504187, iteration: 22787
loss: 1.0087469816207886,grad_norm: 0.9471600197661804, iteration: 22788
loss: 1.0547720193862915,grad_norm: 0.9707274785578168, iteration: 22789
loss: 1.0689009428024292,grad_norm: 0.9999991065862824, iteration: 22790
loss: 1.025948405265808,grad_norm: 0.9862321919370719, iteration: 22791
loss: 1.0217307806015015,grad_norm: 0.9999994689098674, iteration: 22792
loss: 0.9947185516357422,grad_norm: 0.999999253223768, iteration: 22793
loss: 0.9874148964881897,grad_norm: 0.9999990468363301, iteration: 22794
loss: 1.0107293128967285,grad_norm: 0.999999140297059, iteration: 22795
loss: 1.0037336349487305,grad_norm: 0.7989114876452713, iteration: 22796
loss: 0.987313985824585,grad_norm: 0.9999992350988078, iteration: 22797
loss: 1.001245141029358,grad_norm: 0.9999993105015285, iteration: 22798
loss: 0.9411516785621643,grad_norm: 0.9999990231435021, iteration: 22799
loss: 1.0321078300476074,grad_norm: 0.9999990992297806, iteration: 22800
loss: 0.993581235408783,grad_norm: 0.9999990654049444, iteration: 22801
loss: 0.9913368225097656,grad_norm: 0.949846106899011, iteration: 22802
loss: 1.0110375881195068,grad_norm: 0.9999990948751231, iteration: 22803
loss: 1.0537387132644653,grad_norm: 0.9999991168741923, iteration: 22804
loss: 1.0319157838821411,grad_norm: 0.9350269950357937, iteration: 22805
loss: 1.0272077322006226,grad_norm: 0.9999991386164279, iteration: 22806
loss: 1.048553228378296,grad_norm: 0.9999992107626803, iteration: 22807
loss: 1.0175846815109253,grad_norm: 0.9812930377347645, iteration: 22808
loss: 1.0061101913452148,grad_norm: 0.9999991549014989, iteration: 22809
loss: 1.0088465213775635,grad_norm: 0.9999990109137661, iteration: 22810
loss: 1.036415457725525,grad_norm: 0.999999385294662, iteration: 22811
loss: 1.0250624418258667,grad_norm: 0.9999991686852392, iteration: 22812
loss: 0.9911243915557861,grad_norm: 0.999999068103224, iteration: 22813
loss: 1.0053054094314575,grad_norm: 0.9999990327987589, iteration: 22814
loss: 1.053231954574585,grad_norm: 0.9999991018487144, iteration: 22815
loss: 0.9746248126029968,grad_norm: 0.937301449308265, iteration: 22816
loss: 1.0283435583114624,grad_norm: 0.9999991954070123, iteration: 22817
loss: 1.0363069772720337,grad_norm: 0.9999993467213326, iteration: 22818
loss: 1.0266029834747314,grad_norm: 0.9999991543316112, iteration: 22819
loss: 0.9815351963043213,grad_norm: 0.9965056826760866, iteration: 22820
loss: 1.0273003578186035,grad_norm: 0.9275725223488619, iteration: 22821
loss: 1.0213500261306763,grad_norm: 0.9999992262188093, iteration: 22822
loss: 0.9886854290962219,grad_norm: 0.9250350547730377, iteration: 22823
loss: 1.018515944480896,grad_norm: 0.9162434561226696, iteration: 22824
loss: 1.0271284580230713,grad_norm: 0.9010852645966057, iteration: 22825
loss: 1.0340077877044678,grad_norm: 0.9521559424334888, iteration: 22826
loss: 1.0380032062530518,grad_norm: 0.847136418675863, iteration: 22827
loss: 1.0415031909942627,grad_norm: 0.9218654220237614, iteration: 22828
loss: 1.0303112268447876,grad_norm: 0.9999991531690419, iteration: 22829
loss: 1.0027060508728027,grad_norm: 0.9401184026200395, iteration: 22830
loss: 1.0439571142196655,grad_norm: 0.9999991738579316, iteration: 22831
loss: 1.0276672840118408,grad_norm: 0.9999990199213061, iteration: 22832
loss: 0.9785707592964172,grad_norm: 0.9999991142660616, iteration: 22833
loss: 1.0418362617492676,grad_norm: 0.9999994000351674, iteration: 22834
loss: 1.0311939716339111,grad_norm: 0.9386444798746192, iteration: 22835
loss: 1.027290940284729,grad_norm: 0.9410459154210922, iteration: 22836
loss: 0.9928049445152283,grad_norm: 0.9999989743984394, iteration: 22837
loss: 1.0254745483398438,grad_norm: 0.9999989839609769, iteration: 22838
loss: 1.0265873670578003,grad_norm: 0.8648136532884454, iteration: 22839
loss: 1.0277202129364014,grad_norm: 0.9999990530834003, iteration: 22840
loss: 1.0392422676086426,grad_norm: 0.9999991975184751, iteration: 22841
loss: 1.002018928527832,grad_norm: 0.965460445559492, iteration: 22842
loss: 1.008558750152588,grad_norm: 0.9999990672890446, iteration: 22843
loss: 1.0064228773117065,grad_norm: 0.9861673063725477, iteration: 22844
loss: 0.9992441534996033,grad_norm: 0.9999991240590992, iteration: 22845
loss: 0.9997985363006592,grad_norm: 0.999999072774335, iteration: 22846
loss: 1.0378576517105103,grad_norm: 0.9789405448779454, iteration: 22847
loss: 0.9738885760307312,grad_norm: 0.8959163944634867, iteration: 22848
loss: 1.0190504789352417,grad_norm: 0.9999992003560799, iteration: 22849
loss: 0.9858110547065735,grad_norm: 0.958622627560974, iteration: 22850
loss: 1.0400620698928833,grad_norm: 0.9542796224144214, iteration: 22851
loss: 0.9847304224967957,grad_norm: 0.9999991151472122, iteration: 22852
loss: 0.9952242374420166,grad_norm: 0.9999991069758767, iteration: 22853
loss: 1.0660827159881592,grad_norm: 0.9999991150927152, iteration: 22854
loss: 1.005438208580017,grad_norm: 0.9999990741512407, iteration: 22855
loss: 1.0030180215835571,grad_norm: 0.9999991575153641, iteration: 22856
loss: 1.0328365564346313,grad_norm: 0.986308895739584, iteration: 22857
loss: 1.0458585023880005,grad_norm: 0.9999991331085227, iteration: 22858
loss: 1.0493632555007935,grad_norm: 0.9999991611392799, iteration: 22859
loss: 1.0440200567245483,grad_norm: 0.9250795827797835, iteration: 22860
loss: 1.0490089654922485,grad_norm: 0.9999991231337737, iteration: 22861
loss: 1.0584522485733032,grad_norm: 0.9999992138980159, iteration: 22862
loss: 1.0192463397979736,grad_norm: 0.9914757307729246, iteration: 22863
loss: 1.0092461109161377,grad_norm: 0.9999990572144873, iteration: 22864
loss: 1.011347770690918,grad_norm: 0.9999991052224356, iteration: 22865
loss: 1.0014362335205078,grad_norm: 0.999999161129362, iteration: 22866
loss: 0.9987820982933044,grad_norm: 0.8489790024109822, iteration: 22867
loss: 1.0036183595657349,grad_norm: 0.9994977714254513, iteration: 22868
loss: 1.010240077972412,grad_norm: 0.9583898192491258, iteration: 22869
loss: 0.9953057765960693,grad_norm: 0.8640438659647113, iteration: 22870
loss: 1.023621678352356,grad_norm: 0.9821561033866676, iteration: 22871
loss: 1.0169888734817505,grad_norm: 0.9999990618692897, iteration: 22872
loss: 1.0130506753921509,grad_norm: 0.854820759457108, iteration: 22873
loss: 1.0069557428359985,grad_norm: 0.9999991053903768, iteration: 22874
loss: 0.9890580773353577,grad_norm: 0.9999990445564976, iteration: 22875
loss: 1.0265605449676514,grad_norm: 0.9999994005759953, iteration: 22876
loss: 0.9752748012542725,grad_norm: 0.9999990663607596, iteration: 22877
loss: 0.9934442639350891,grad_norm: 0.9999992373520047, iteration: 22878
loss: 0.9901738166809082,grad_norm: 0.999999228280607, iteration: 22879
loss: 1.017436146736145,grad_norm: 0.9999990579208329, iteration: 22880
loss: 1.0215378999710083,grad_norm: 0.9999993268338753, iteration: 22881
loss: 1.037134051322937,grad_norm: 0.9999991289450973, iteration: 22882
loss: 0.9636833071708679,grad_norm: 0.9999990521647956, iteration: 22883
loss: 1.014007568359375,grad_norm: 0.9999992265958706, iteration: 22884
loss: 0.9987787008285522,grad_norm: 0.8188840676352318, iteration: 22885
loss: 1.0145982503890991,grad_norm: 0.9009312824307865, iteration: 22886
loss: 1.0047898292541504,grad_norm: 0.9620832324622215, iteration: 22887
loss: 1.0023317337036133,grad_norm: 0.9999992355213707, iteration: 22888
loss: 1.0364384651184082,grad_norm: 0.9999990197396836, iteration: 22889
loss: 1.0177762508392334,grad_norm: 0.9999992876270495, iteration: 22890
loss: 1.0125623941421509,grad_norm: 0.8928245416736064, iteration: 22891
loss: 0.9938040375709534,grad_norm: 0.9360670968067112, iteration: 22892
loss: 0.9882968664169312,grad_norm: 0.8178126857475491, iteration: 22893
loss: 1.0009781122207642,grad_norm: 0.9999992386670089, iteration: 22894
loss: 1.0364751815795898,grad_norm: 0.9620486669321864, iteration: 22895
loss: 1.0467203855514526,grad_norm: 0.9999989906190782, iteration: 22896
loss: 1.0161856412887573,grad_norm: 0.9376682539908786, iteration: 22897
loss: 0.9752436876296997,grad_norm: 0.9999990719416497, iteration: 22898
loss: 0.9710740447044373,grad_norm: 0.8505598394795808, iteration: 22899
loss: 1.0333714485168457,grad_norm: 0.9999991860842617, iteration: 22900
loss: 1.0132954120635986,grad_norm: 0.999999162472758, iteration: 22901
loss: 0.9938632845878601,grad_norm: 0.9999991818542563, iteration: 22902
loss: 0.9919153451919556,grad_norm: 0.9751533755257844, iteration: 22903
loss: 1.0194951295852661,grad_norm: 0.8944060719430158, iteration: 22904
loss: 0.9916046261787415,grad_norm: 0.8994891114099735, iteration: 22905
loss: 1.0014654397964478,grad_norm: 0.9999991320095589, iteration: 22906
loss: 1.0338904857635498,grad_norm: 0.999999067365844, iteration: 22907
loss: 0.9930667877197266,grad_norm: 0.999999027596155, iteration: 22908
loss: 0.9943804740905762,grad_norm: 0.999999218832892, iteration: 22909
loss: 1.024299144744873,grad_norm: 0.9091041079874395, iteration: 22910
loss: 0.9746701121330261,grad_norm: 0.9999991230471775, iteration: 22911
loss: 1.0570858716964722,grad_norm: 0.999999127232371, iteration: 22912
loss: 1.0158021450042725,grad_norm: 0.8930710085065252, iteration: 22913
loss: 0.9962968826293945,grad_norm: 0.9655330520954327, iteration: 22914
loss: 1.028091311454773,grad_norm: 0.9999991827724355, iteration: 22915
loss: 1.0020076036453247,grad_norm: 0.971068709013358, iteration: 22916
loss: 1.0471359491348267,grad_norm: 0.8453833522902581, iteration: 22917
loss: 0.9585806727409363,grad_norm: 0.9091985008519389, iteration: 22918
loss: 1.0169790983200073,grad_norm: 0.7741138373915942, iteration: 22919
loss: 1.042219877243042,grad_norm: 0.8940862661379522, iteration: 22920
loss: 0.9968163371086121,grad_norm: 0.9999990089971449, iteration: 22921
loss: 0.9696042537689209,grad_norm: 0.8317278544679967, iteration: 22922
loss: 1.0194158554077148,grad_norm: 0.9117571866048279, iteration: 22923
loss: 0.9953452348709106,grad_norm: 0.9999993778074093, iteration: 22924
loss: 1.0061150789260864,grad_norm: 0.999998965274032, iteration: 22925
loss: 1.0515539646148682,grad_norm: 0.9999991046288446, iteration: 22926
loss: 1.0028074979782104,grad_norm: 0.9158859581403856, iteration: 22927
loss: 1.0412559509277344,grad_norm: 0.9894874982887045, iteration: 22928
loss: 1.021470069885254,grad_norm: 0.9999992548452156, iteration: 22929
loss: 1.0239685773849487,grad_norm: 0.9999991388986762, iteration: 22930
loss: 1.0525546073913574,grad_norm: 0.9999992709715887, iteration: 22931
loss: 1.0060707330703735,grad_norm: 0.9999990559474672, iteration: 22932
loss: 1.0210368633270264,grad_norm: 0.9999993108064972, iteration: 22933
loss: 0.9836329221725464,grad_norm: 0.9999993062736696, iteration: 22934
loss: 0.997369647026062,grad_norm: 0.9999990647188984, iteration: 22935
loss: 1.0228817462921143,grad_norm: 0.9999990527081399, iteration: 22936
loss: 0.9988898634910583,grad_norm: 0.9880544206465993, iteration: 22937
loss: 1.1241108179092407,grad_norm: 0.999999475764431, iteration: 22938
loss: 0.9745023846626282,grad_norm: 0.9747114667254874, iteration: 22939
loss: 0.9532525539398193,grad_norm: 0.9999991797156227, iteration: 22940
loss: 1.0135301351547241,grad_norm: 0.9666065788841104, iteration: 22941
loss: 1.016453504562378,grad_norm: 0.99999909257274, iteration: 22942
loss: 1.001975655555725,grad_norm: 0.9474596225638536, iteration: 22943
loss: 1.0336881875991821,grad_norm: 0.9434333570668487, iteration: 22944
loss: 0.9963883757591248,grad_norm: 0.8387313764593844, iteration: 22945
loss: 1.0280771255493164,grad_norm: 0.9999995008697178, iteration: 22946
loss: 0.9984955787658691,grad_norm: 0.8299758350549208, iteration: 22947
loss: 1.0266822576522827,grad_norm: 0.9466482284826242, iteration: 22948
loss: 1.0149284601211548,grad_norm: 0.9999991085645286, iteration: 22949
loss: 1.0150407552719116,grad_norm: 0.9999991624694542, iteration: 22950
loss: 1.0595097541809082,grad_norm: 0.9999991301515294, iteration: 22951
loss: 1.0243483781814575,grad_norm: 0.9693764948513035, iteration: 22952
loss: 0.9845432043075562,grad_norm: 0.9999989118114514, iteration: 22953
loss: 1.00092613697052,grad_norm: 0.9999991293444529, iteration: 22954
loss: 0.9739297032356262,grad_norm: 0.9999992646776821, iteration: 22955
loss: 0.9933381080627441,grad_norm: 0.9999990334202422, iteration: 22956
loss: 1.0291424989700317,grad_norm: 0.9999992718861945, iteration: 22957
loss: 1.0384267568588257,grad_norm: 0.9255981037064847, iteration: 22958
loss: 1.0326855182647705,grad_norm: 0.9999990710516659, iteration: 22959
loss: 0.9882911443710327,grad_norm: 0.9999992871886841, iteration: 22960
loss: 0.9971646666526794,grad_norm: 0.9507625639474268, iteration: 22961
loss: 1.074419617652893,grad_norm: 0.9449201308995755, iteration: 22962
loss: 1.0254454612731934,grad_norm: 0.987373553430713, iteration: 22963
loss: 1.0095289945602417,grad_norm: 0.9999991922053825, iteration: 22964
loss: 1.019870638847351,grad_norm: 0.9742180611358519, iteration: 22965
loss: 0.9728344082832336,grad_norm: 0.9999991877881099, iteration: 22966
loss: 1.0195749998092651,grad_norm: 0.9999990790089889, iteration: 22967
loss: 0.9925575852394104,grad_norm: 0.9922941854312147, iteration: 22968
loss: 0.989100992679596,grad_norm: 0.9158060054755686, iteration: 22969
loss: 0.9716415405273438,grad_norm: 0.9999989993041427, iteration: 22970
loss: 1.0399417877197266,grad_norm: 0.9999990583218694, iteration: 22971
loss: 1.0063567161560059,grad_norm: 0.8934058181430582, iteration: 22972
loss: 1.0257846117019653,grad_norm: 0.9451791846276574, iteration: 22973
loss: 0.9487876892089844,grad_norm: 0.9826713914104115, iteration: 22974
loss: 0.9677587151527405,grad_norm: 0.7948687985133974, iteration: 22975
loss: 0.9979628920555115,grad_norm: 0.9664855067402844, iteration: 22976
loss: 0.9578835964202881,grad_norm: 0.8350661712921515, iteration: 22977
loss: 1.0235406160354614,grad_norm: 0.8587324973743203, iteration: 22978
loss: 1.003511667251587,grad_norm: 0.9999992521182329, iteration: 22979
loss: 0.9467580914497375,grad_norm: 0.9999991959855278, iteration: 22980
loss: 0.9475733637809753,grad_norm: 0.9999994974252078, iteration: 22981
loss: 1.004787564277649,grad_norm: 0.9999989686787801, iteration: 22982
loss: 1.0186665058135986,grad_norm: 0.9046776500396333, iteration: 22983
loss: 1.0137324333190918,grad_norm: 0.9999991470347359, iteration: 22984
loss: 1.005995512008667,grad_norm: 0.9999991610822729, iteration: 22985
loss: 1.0050832033157349,grad_norm: 0.9019060896213755, iteration: 22986
loss: 1.0019092559814453,grad_norm: 0.9999990560976358, iteration: 22987
loss: 1.0463546514511108,grad_norm: 0.92346906450253, iteration: 22988
loss: 1.0436805486679077,grad_norm: 0.9999990319317862, iteration: 22989
loss: 1.0113438367843628,grad_norm: 0.9545032255592786, iteration: 22990
loss: 1.0325227975845337,grad_norm: 0.9999993880710057, iteration: 22991
loss: 0.9928877353668213,grad_norm: 0.9755694984095723, iteration: 22992
loss: 1.0188262462615967,grad_norm: 0.9999990585213203, iteration: 22993
loss: 1.0016642808914185,grad_norm: 0.9999993927197814, iteration: 22994
loss: 1.030409336090088,grad_norm: 0.8610213268219425, iteration: 22995
loss: 1.0158919095993042,grad_norm: 0.9999993649875993, iteration: 22996
loss: 0.9969388842582703,grad_norm: 0.9999990228507675, iteration: 22997
loss: 1.0452582836151123,grad_norm: 0.9999991422958753, iteration: 22998
loss: 1.0111629962921143,grad_norm: 0.9421305410046835, iteration: 22999
loss: 1.0625174045562744,grad_norm: 0.9653261788785045, iteration: 23000
loss: 1.0117281675338745,grad_norm: 0.9999990919291223, iteration: 23001
loss: 1.0236526727676392,grad_norm: 0.9999990990074221, iteration: 23002
loss: 1.0196788311004639,grad_norm: 0.9999990865266525, iteration: 23003
loss: 0.9906423091888428,grad_norm: 0.9420958607575424, iteration: 23004
loss: 1.0186623334884644,grad_norm: 0.9999992040749164, iteration: 23005
loss: 1.0195578336715698,grad_norm: 0.9846083286351993, iteration: 23006
loss: 1.022276520729065,grad_norm: 0.9503015821059139, iteration: 23007
loss: 1.0005245208740234,grad_norm: 0.9243135616025409, iteration: 23008
loss: 1.019129753112793,grad_norm: 0.9999990038259937, iteration: 23009
loss: 1.0370036363601685,grad_norm: 0.9999990853662248, iteration: 23010
loss: 0.9892066717147827,grad_norm: 0.8226902323834264, iteration: 23011
loss: 1.0330090522766113,grad_norm: 0.967009344613255, iteration: 23012
loss: 1.0135859251022339,grad_norm: 0.9999991902176744, iteration: 23013
loss: 1.0284713506698608,grad_norm: 0.999999185187071, iteration: 23014
loss: 1.0117348432540894,grad_norm: 0.9999990088024452, iteration: 23015
loss: 1.0082228183746338,grad_norm: 0.9410835599234382, iteration: 23016
loss: 0.9782043099403381,grad_norm: 0.8500587695451188, iteration: 23017
loss: 0.9818745851516724,grad_norm: 0.9999991224885975, iteration: 23018
loss: 1.0241529941558838,grad_norm: 0.9999998346732093, iteration: 23019
loss: 0.995341420173645,grad_norm: 0.9999990409458294, iteration: 23020
loss: 0.9836211800575256,grad_norm: 0.9999993316511671, iteration: 23021
loss: 1.0573081970214844,grad_norm: 0.9999989309559776, iteration: 23022
loss: 1.018346905708313,grad_norm: 0.9999991129718211, iteration: 23023
loss: 1.0390280485153198,grad_norm: 0.871727119496581, iteration: 23024
loss: 1.0239125490188599,grad_norm: 0.99999924313094, iteration: 23025
loss: 1.0245665311813354,grad_norm: 0.9743781394874584, iteration: 23026
loss: 1.0052229166030884,grad_norm: 0.923496494650322, iteration: 23027
loss: 1.006642460823059,grad_norm: 0.999999037849001, iteration: 23028
loss: 0.9822997450828552,grad_norm: 0.881760252552545, iteration: 23029
loss: 0.9652995467185974,grad_norm: 0.9999990660019179, iteration: 23030
loss: 1.0057518482208252,grad_norm: 0.9702659527862477, iteration: 23031
loss: 1.016177773475647,grad_norm: 0.9388535062034231, iteration: 23032
loss: 1.0097131729125977,grad_norm: 0.9999990926616921, iteration: 23033
loss: 0.9994714260101318,grad_norm: 0.9999991691222848, iteration: 23034
loss: 1.0108377933502197,grad_norm: 0.9370147684143835, iteration: 23035
loss: 1.017332673072815,grad_norm: 0.9999994815316833, iteration: 23036
loss: 0.9918838143348694,grad_norm: 0.9999991840246908, iteration: 23037
loss: 0.9980571866035461,grad_norm: 0.9999991963763052, iteration: 23038
loss: 1.0728659629821777,grad_norm: 0.9999994404125552, iteration: 23039
loss: 1.0060327053070068,grad_norm: 0.8998040096585285, iteration: 23040
loss: 0.9960792064666748,grad_norm: 0.9999991270233216, iteration: 23041
loss: 0.9838354587554932,grad_norm: 0.9999990680268224, iteration: 23042
loss: 0.9697907567024231,grad_norm: 0.930559299610861, iteration: 23043
loss: 1.000525712966919,grad_norm: 0.8550680298235257, iteration: 23044
loss: 1.0101977586746216,grad_norm: 0.8881220874016275, iteration: 23045
loss: 1.0347031354904175,grad_norm: 0.9839774825036102, iteration: 23046
loss: 1.0156645774841309,grad_norm: 0.9999991287807929, iteration: 23047
loss: 1.0078197717666626,grad_norm: 0.9999990936569036, iteration: 23048
loss: 1.0503818988800049,grad_norm: 0.9999993456753931, iteration: 23049
loss: 1.0188804864883423,grad_norm: 0.9999990977896016, iteration: 23050
loss: 1.0320765972137451,grad_norm: 0.999999138369381, iteration: 23051
loss: 1.0269267559051514,grad_norm: 0.9207096309628447, iteration: 23052
loss: 0.9773839712142944,grad_norm: 0.9341194599678866, iteration: 23053
loss: 1.0053786039352417,grad_norm: 0.9093260624313123, iteration: 23054
loss: 1.0325921773910522,grad_norm: 0.9999999468898427, iteration: 23055
loss: 1.0644797086715698,grad_norm: 0.9999992126189322, iteration: 23056
loss: 1.0463171005249023,grad_norm: 0.9999994943925418, iteration: 23057
loss: 1.0108808279037476,grad_norm: 0.9697071481934735, iteration: 23058
loss: 1.0366579294204712,grad_norm: 0.9680899379337315, iteration: 23059
loss: 1.012751817703247,grad_norm: 0.8592946653004768, iteration: 23060
loss: 1.0718302726745605,grad_norm: 0.9999994927730915, iteration: 23061
loss: 0.9978860020637512,grad_norm: 0.9999992205905034, iteration: 23062
loss: 1.001598596572876,grad_norm: 0.9211938828362203, iteration: 23063
loss: 1.0434225797653198,grad_norm: 0.9999993568973984, iteration: 23064
loss: 1.003345012664795,grad_norm: 0.959839013201757, iteration: 23065
loss: 1.070192813873291,grad_norm: 0.9776334423484828, iteration: 23066
loss: 1.022385835647583,grad_norm: 0.999999176643735, iteration: 23067
loss: 1.049691915512085,grad_norm: 0.9999991203705019, iteration: 23068
loss: 1.0043728351593018,grad_norm: 0.9999990482740893, iteration: 23069
loss: 1.0299121141433716,grad_norm: 0.9999991087630239, iteration: 23070
loss: 1.0089961290359497,grad_norm: 0.9999992286114833, iteration: 23071
loss: 1.0048651695251465,grad_norm: 0.9686043475457147, iteration: 23072
loss: 1.013250708580017,grad_norm: 0.9999990838783498, iteration: 23073
loss: 1.0150386095046997,grad_norm: 0.9999990718095643, iteration: 23074
loss: 0.9966378808021545,grad_norm: 0.9999992098434892, iteration: 23075
loss: 1.0024269819259644,grad_norm: 0.8374864909160982, iteration: 23076
loss: 0.9899234771728516,grad_norm: 0.8923371588571958, iteration: 23077
loss: 0.9740917682647705,grad_norm: 0.9999991758049881, iteration: 23078
loss: 1.0102925300598145,grad_norm: 0.927258511933379, iteration: 23079
loss: 1.0011144876480103,grad_norm: 0.8882749937295573, iteration: 23080
loss: 1.0186388492584229,grad_norm: 0.9873239792862033, iteration: 23081
loss: 1.0193259716033936,grad_norm: 0.9999991080357561, iteration: 23082
loss: 1.0078669786453247,grad_norm: 0.9882538322730311, iteration: 23083
loss: 1.031658411026001,grad_norm: 0.9999992561120455, iteration: 23084
loss: 1.028450608253479,grad_norm: 0.8527221400088802, iteration: 23085
loss: 1.0174574851989746,grad_norm: 0.9999993678159154, iteration: 23086
loss: 0.9932075142860413,grad_norm: 0.9999990106607624, iteration: 23087
loss: 1.0330599546432495,grad_norm: 0.9633232840729438, iteration: 23088
loss: 1.0386908054351807,grad_norm: 0.9999992225296213, iteration: 23089
loss: 0.9827926754951477,grad_norm: 0.9999991200309035, iteration: 23090
loss: 1.0135842561721802,grad_norm: 0.9999995136742637, iteration: 23091
loss: 1.0022104978561401,grad_norm: 0.9999993658254062, iteration: 23092
loss: 0.9902323484420776,grad_norm: 0.9999991151856302, iteration: 23093
loss: 0.9858108162879944,grad_norm: 0.9999990753736206, iteration: 23094
loss: 1.0154094696044922,grad_norm: 0.9999990728341551, iteration: 23095
loss: 1.000914454460144,grad_norm: 0.888982196900232, iteration: 23096
loss: 0.9794084429740906,grad_norm: 0.9170530578952136, iteration: 23097
loss: 1.0437955856323242,grad_norm: 0.9999993754265513, iteration: 23098
loss: 1.0029855966567993,grad_norm: 0.973501617135177, iteration: 23099
loss: 1.0089739561080933,grad_norm: 0.9820096578340171, iteration: 23100
loss: 1.024053692817688,grad_norm: 0.9999991137287702, iteration: 23101
loss: 1.0209541320800781,grad_norm: 0.9999992499807955, iteration: 23102
loss: 0.9908215999603271,grad_norm: 0.9999990258287547, iteration: 23103
loss: 1.0202010869979858,grad_norm: 0.9999991919749739, iteration: 23104
loss: 0.9694789052009583,grad_norm: 0.9769722418306583, iteration: 23105
loss: 0.9663299918174744,grad_norm: 0.8354634008002388, iteration: 23106
loss: 1.0198535919189453,grad_norm: 0.9999990899491801, iteration: 23107
loss: 1.0130019187927246,grad_norm: 0.9481684337852834, iteration: 23108
loss: 1.0060555934906006,grad_norm: 0.8956385620546157, iteration: 23109
loss: 1.0411105155944824,grad_norm: 0.9999992524148059, iteration: 23110
loss: 1.0003626346588135,grad_norm: 0.9511345448318272, iteration: 23111
loss: 1.0170906782150269,grad_norm: 0.8899157793485264, iteration: 23112
loss: 1.0275561809539795,grad_norm: 0.9290111458126931, iteration: 23113
loss: 0.9749953746795654,grad_norm: 0.9999992469664059, iteration: 23114
loss: 0.9719781875610352,grad_norm: 0.8716918370842857, iteration: 23115
loss: 1.0217649936676025,grad_norm: 0.8620669946622405, iteration: 23116
loss: 0.9928544163703918,grad_norm: 0.9687581215090135, iteration: 23117
loss: 0.9848311543464661,grad_norm: 0.8337549857128936, iteration: 23118
loss: 0.9834802746772766,grad_norm: 0.9999990692921849, iteration: 23119
loss: 1.0059397220611572,grad_norm: 0.8738856709988233, iteration: 23120
loss: 1.017903208732605,grad_norm: 0.8785515475199388, iteration: 23121
loss: 1.0260417461395264,grad_norm: 0.9395888506259972, iteration: 23122
loss: 1.0393741130828857,grad_norm: 0.9999989412454324, iteration: 23123
loss: 1.0394432544708252,grad_norm: 0.9999993054085397, iteration: 23124
loss: 1.0013725757598877,grad_norm: 0.9999992162849068, iteration: 23125
loss: 0.967369556427002,grad_norm: 0.9999991110325345, iteration: 23126
loss: 1.0332833528518677,grad_norm: 0.9999990962570509, iteration: 23127
loss: 1.0539093017578125,grad_norm: 0.9999994353256934, iteration: 23128
loss: 1.0022391080856323,grad_norm: 0.9999990602001184, iteration: 23129
loss: 0.9941863417625427,grad_norm: 0.8940410703386532, iteration: 23130
loss: 0.9988683462142944,grad_norm: 0.9999993228629914, iteration: 23131
loss: 0.9774086475372314,grad_norm: 0.9999992030308754, iteration: 23132
loss: 1.036382794380188,grad_norm: 0.9999992304568782, iteration: 23133
loss: 1.0024523735046387,grad_norm: 0.9011974759713616, iteration: 23134
loss: 0.9727568030357361,grad_norm: 0.9999991037408191, iteration: 23135
loss: 1.1341232061386108,grad_norm: 0.9999993235993586, iteration: 23136
loss: 0.9907440543174744,grad_norm: 0.85794598659409, iteration: 23137
loss: 0.9966905117034912,grad_norm: 0.9999991691104753, iteration: 23138
loss: 0.9305984377861023,grad_norm: 0.999999084251199, iteration: 23139
loss: 1.0155553817749023,grad_norm: 0.9111405474289446, iteration: 23140
loss: 1.000934362411499,grad_norm: 0.9999991069810301, iteration: 23141
loss: 1.0063352584838867,grad_norm: 0.9999991883360381, iteration: 23142
loss: 1.0351296663284302,grad_norm: 0.9091340494028977, iteration: 23143
loss: 1.0326341390609741,grad_norm: 0.99999907704911, iteration: 23144
loss: 1.0200283527374268,grad_norm: 0.9999992026354867, iteration: 23145
loss: 0.986385703086853,grad_norm: 0.9999990994184191, iteration: 23146
loss: 1.0681086778640747,grad_norm: 0.9999993120900448, iteration: 23147
loss: 1.013351559638977,grad_norm: 0.9930165588618743, iteration: 23148
loss: 1.0736700296401978,grad_norm: 0.9999993402680248, iteration: 23149
loss: 1.013609766960144,grad_norm: 0.9999990867000706, iteration: 23150
loss: 1.029327630996704,grad_norm: 0.9384526479049962, iteration: 23151
loss: 1.0274571180343628,grad_norm: 0.9999996178720781, iteration: 23152
loss: 1.0384962558746338,grad_norm: 0.9999990480559395, iteration: 23153
loss: 1.0172148942947388,grad_norm: 0.9999991931856888, iteration: 23154
loss: 1.0343412160873413,grad_norm: 0.9999992614883045, iteration: 23155
loss: 0.9831943511962891,grad_norm: 0.868616798717492, iteration: 23156
loss: 0.9965158104896545,grad_norm: 0.9999991394792725, iteration: 23157
loss: 0.9905135631561279,grad_norm: 0.927850719798798, iteration: 23158
loss: 0.9675484895706177,grad_norm: 0.9732870219476905, iteration: 23159
loss: 1.044602870941162,grad_norm: 0.9999995253232681, iteration: 23160
loss: 1.0149011611938477,grad_norm: 0.9999990528236404, iteration: 23161
loss: 1.0155344009399414,grad_norm: 0.8740050700356856, iteration: 23162
loss: 1.0066206455230713,grad_norm: 0.99999922426211, iteration: 23163
loss: 0.998933732509613,grad_norm: 0.9999990414310176, iteration: 23164
loss: 1.0069729089736938,grad_norm: 0.9038737732105961, iteration: 23165
loss: 0.9769841432571411,grad_norm: 0.9693147033843825, iteration: 23166
loss: 1.025576114654541,grad_norm: 0.9446521613521435, iteration: 23167
loss: 0.9967677593231201,grad_norm: 0.9999991454239829, iteration: 23168
loss: 0.9961817860603333,grad_norm: 0.999999128144734, iteration: 23169
loss: 1.0267970561981201,grad_norm: 0.9999994115892162, iteration: 23170
loss: 1.06215500831604,grad_norm: 0.9999991174117017, iteration: 23171
loss: 1.0106253623962402,grad_norm: 0.8411805807614082, iteration: 23172
loss: 1.0132300853729248,grad_norm: 0.9999996036134766, iteration: 23173
loss: 1.017241358757019,grad_norm: 0.9580013198742888, iteration: 23174
loss: 1.0046404600143433,grad_norm: 0.9485077986237507, iteration: 23175
loss: 1.00539231300354,grad_norm: 0.9999991518612917, iteration: 23176
loss: 0.9974093437194824,grad_norm: 0.9999991794566752, iteration: 23177
loss: 1.0041828155517578,grad_norm: 0.9999994794202822, iteration: 23178
loss: 1.0310379266738892,grad_norm: 0.9999991767379318, iteration: 23179
loss: 1.0093845129013062,grad_norm: 0.9999992377620777, iteration: 23180
loss: 1.0178337097167969,grad_norm: 0.9999991985817279, iteration: 23181
loss: 0.9738538861274719,grad_norm: 0.9999991781744695, iteration: 23182
loss: 1.0382227897644043,grad_norm: 0.9999991050448139, iteration: 23183
loss: 0.9867414832115173,grad_norm: 0.9999991203462629, iteration: 23184
loss: 1.038412094116211,grad_norm: 0.9164390738001648, iteration: 23185
loss: 1.0656641721725464,grad_norm: 0.9999990876079176, iteration: 23186
loss: 0.9966293573379517,grad_norm: 0.9999992234317727, iteration: 23187
loss: 0.967382550239563,grad_norm: 0.9999990365145891, iteration: 23188
loss: 0.9547027945518494,grad_norm: 0.9999989170390902, iteration: 23189
loss: 1.0169200897216797,grad_norm: 0.9999991036210559, iteration: 23190
loss: 1.0026816129684448,grad_norm: 0.8521507793698543, iteration: 23191
loss: 1.0781255960464478,grad_norm: 0.9999997502158942, iteration: 23192
loss: 1.0012867450714111,grad_norm: 0.9995380499081332, iteration: 23193
loss: 1.0022562742233276,grad_norm: 0.996730188581685, iteration: 23194
loss: 0.99729984998703,grad_norm: 0.9999991006072905, iteration: 23195
loss: 1.0228090286254883,grad_norm: 0.9843854299549997, iteration: 23196
loss: 1.0521711111068726,grad_norm: 0.9999993618305721, iteration: 23197
loss: 1.0225099325180054,grad_norm: 0.9001371098999692, iteration: 23198
loss: 1.0098329782485962,grad_norm: 0.9999997828048249, iteration: 23199
loss: 1.0038204193115234,grad_norm: 0.9999990156217297, iteration: 23200
loss: 0.9782228469848633,grad_norm: 0.9999991336670755, iteration: 23201
loss: 0.9981209635734558,grad_norm: 0.9835162620698592, iteration: 23202
loss: 1.0010669231414795,grad_norm: 0.9999994367211426, iteration: 23203
loss: 1.0071322917938232,grad_norm: 0.9999990817804499, iteration: 23204
loss: 1.0441776514053345,grad_norm: 0.9999990455535179, iteration: 23205
loss: 1.045227289199829,grad_norm: 0.9999994373459241, iteration: 23206
loss: 1.0097101926803589,grad_norm: 0.9999992828596185, iteration: 23207
loss: 1.0664199590682983,grad_norm: 0.9999995652105161, iteration: 23208
loss: 0.9605833292007446,grad_norm: 0.9999991273180593, iteration: 23209
loss: 0.9992198348045349,grad_norm: 0.999999558249638, iteration: 23210
loss: 1.0129250288009644,grad_norm: 0.8504956766993461, iteration: 23211
loss: 1.011044979095459,grad_norm: 0.9999991306625456, iteration: 23212
loss: 1.001576542854309,grad_norm: 0.9999991109969103, iteration: 23213
loss: 1.0748567581176758,grad_norm: 0.9999992248774731, iteration: 23214
loss: 1.0523067712783813,grad_norm: 0.9999993559247617, iteration: 23215
loss: 1.0453763008117676,grad_norm: 0.9207023208071178, iteration: 23216
loss: 1.0222407579421997,grad_norm: 0.9989659860462552, iteration: 23217
loss: 1.0286173820495605,grad_norm: 0.999999126769273, iteration: 23218
loss: 1.034443974494934,grad_norm: 0.9714418404053867, iteration: 23219
loss: 1.035271167755127,grad_norm: 0.9999989664514212, iteration: 23220
loss: 1.0102710723876953,grad_norm: 0.9999991729179277, iteration: 23221
loss: 0.9986535310745239,grad_norm: 0.9999992394935979, iteration: 23222
loss: 0.996752917766571,grad_norm: 0.9443507025668236, iteration: 23223
loss: 1.0150843858718872,grad_norm: 0.9999990476192373, iteration: 23224
loss: 0.995726466178894,grad_norm: 0.9999990569578929, iteration: 23225
loss: 1.0149767398834229,grad_norm: 0.9999991089017184, iteration: 23226
loss: 1.0296870470046997,grad_norm: 0.9999997036462922, iteration: 23227
loss: 0.991400420665741,grad_norm: 0.9548623716811616, iteration: 23228
loss: 1.0102630853652954,grad_norm: 0.9999994596880919, iteration: 23229
loss: 1.038875699043274,grad_norm: 0.9999991111812118, iteration: 23230
loss: 1.0299996137619019,grad_norm: 0.9827730772984116, iteration: 23231
loss: 1.0067461729049683,grad_norm: 0.8917488650846268, iteration: 23232
loss: 1.0471160411834717,grad_norm: 0.9999997166077842, iteration: 23233
loss: 0.9781630635261536,grad_norm: 0.9999993039167387, iteration: 23234
loss: 0.9724152088165283,grad_norm: 0.9999991930097025, iteration: 23235
loss: 1.0021445751190186,grad_norm: 0.9999991374686925, iteration: 23236
loss: 1.0280544757843018,grad_norm: 0.820852238612224, iteration: 23237
loss: 0.9601390957832336,grad_norm: 0.999999169856104, iteration: 23238
loss: 1.0261297225952148,grad_norm: 0.8499757307441522, iteration: 23239
loss: 1.0098085403442383,grad_norm: 0.8915702887619432, iteration: 23240
loss: 1.0039652585983276,grad_norm: 0.9855782814021588, iteration: 23241
loss: 1.0567126274108887,grad_norm: 0.9999995679736325, iteration: 23242
loss: 1.0214916467666626,grad_norm: 0.9999991369541571, iteration: 23243
loss: 1.0253562927246094,grad_norm: 0.9999993250814733, iteration: 23244
loss: 1.0039745569229126,grad_norm: 0.9089829172393241, iteration: 23245
loss: 1.0179654359817505,grad_norm: 0.9999990714397781, iteration: 23246
loss: 1.0103809833526611,grad_norm: 0.9999996208009124, iteration: 23247
loss: 1.0041531324386597,grad_norm: 0.9951647071377319, iteration: 23248
loss: 1.0546058416366577,grad_norm: 0.9999994847962741, iteration: 23249
loss: 1.0038478374481201,grad_norm: 0.9999992637015398, iteration: 23250
loss: 0.9817381501197815,grad_norm: 0.9653638469779093, iteration: 23251
loss: 1.0204418897628784,grad_norm: 0.9999991554444821, iteration: 23252
loss: 1.0050939321517944,grad_norm: 0.9999993975501678, iteration: 23253
loss: 1.0223721265792847,grad_norm: 0.8322448822865894, iteration: 23254
loss: 1.0385918617248535,grad_norm: 0.9999996049094635, iteration: 23255
loss: 1.0014171600341797,grad_norm: 0.9999990202728631, iteration: 23256
loss: 1.018902063369751,grad_norm: 0.9624153067183899, iteration: 23257
loss: 1.0210248231887817,grad_norm: 0.8607544691456852, iteration: 23258
loss: 0.9749148488044739,grad_norm: 0.9999991835339338, iteration: 23259
loss: 1.0548490285873413,grad_norm: 0.9999990318050855, iteration: 23260
loss: 1.0665940046310425,grad_norm: 0.9999993025338462, iteration: 23261
loss: 1.0119144916534424,grad_norm: 0.9629375369779517, iteration: 23262
loss: 1.0373952388763428,grad_norm: 0.9999990378200975, iteration: 23263
loss: 1.04723060131073,grad_norm: 0.99999896423765, iteration: 23264
loss: 1.0387322902679443,grad_norm: 0.999999202245709, iteration: 23265
loss: 1.0178512334823608,grad_norm: 0.9832564712505474, iteration: 23266
loss: 0.999065101146698,grad_norm: 0.939497608605279, iteration: 23267
loss: 1.0118733644485474,grad_norm: 0.9999995155658968, iteration: 23268
loss: 1.01631760597229,grad_norm: 0.9999990362188902, iteration: 23269
loss: 1.0487815141677856,grad_norm: 0.9999990573833772, iteration: 23270
loss: 1.0369776487350464,grad_norm: 0.9999997170167754, iteration: 23271
loss: 1.0039275884628296,grad_norm: 0.9489302654017662, iteration: 23272
loss: 1.0268102884292603,grad_norm: 0.9999992188361954, iteration: 23273
loss: 1.028859257698059,grad_norm: 0.9999990473242911, iteration: 23274
loss: 1.0086334943771362,grad_norm: 0.9999991010695635, iteration: 23275
loss: 1.02412748336792,grad_norm: 0.9999991026030944, iteration: 23276
loss: 1.0061157941818237,grad_norm: 0.9999990995990017, iteration: 23277
loss: 0.9871204495429993,grad_norm: 0.9999990598352105, iteration: 23278
loss: 1.0984127521514893,grad_norm: 0.9999996923023118, iteration: 23279
loss: 1.0313348770141602,grad_norm: 0.9999996021712062, iteration: 23280
loss: 0.997357964515686,grad_norm: 0.9999990995702638, iteration: 23281
loss: 0.9835909008979797,grad_norm: 0.9999993050232544, iteration: 23282
loss: 1.023079752922058,grad_norm: 0.9999991420928118, iteration: 23283
loss: 1.0489658117294312,grad_norm: 0.9999992518451755, iteration: 23284
loss: 1.0143375396728516,grad_norm: 0.8811380951701923, iteration: 23285
loss: 1.0078679323196411,grad_norm: 0.9999995018270433, iteration: 23286
loss: 0.9983133673667908,grad_norm: 0.9999991149209789, iteration: 23287
loss: 1.0160527229309082,grad_norm: 0.9996777280781636, iteration: 23288
loss: 1.0637538433074951,grad_norm: 0.9999991810700959, iteration: 23289
loss: 0.9869020581245422,grad_norm: 0.9457246514116951, iteration: 23290
loss: 1.0214343070983887,grad_norm: 0.9999990147212329, iteration: 23291
loss: 0.9761836528778076,grad_norm: 0.9999990256794423, iteration: 23292
loss: 1.020116925239563,grad_norm: 0.9999990126969541, iteration: 23293
loss: 1.0047154426574707,grad_norm: 0.9999992370088726, iteration: 23294
loss: 1.0142748355865479,grad_norm: 0.9752037514042438, iteration: 23295
loss: 0.99730384349823,grad_norm: 0.9479442560789118, iteration: 23296
loss: 1.0275402069091797,grad_norm: 0.9999992339305479, iteration: 23297
loss: 1.031876802444458,grad_norm: 0.9999995404145411, iteration: 23298
loss: 0.9962784051895142,grad_norm: 0.9999989890312033, iteration: 23299
loss: 1.004942774772644,grad_norm: 0.9999990467031469, iteration: 23300
loss: 1.035361886024475,grad_norm: 0.9999989827927459, iteration: 23301
loss: 1.0051987171173096,grad_norm: 0.9999990602035934, iteration: 23302
loss: 0.9725995063781738,grad_norm: 0.8771261933496415, iteration: 23303
loss: 1.0278865098953247,grad_norm: 0.9999990412121684, iteration: 23304
loss: 1.007354974746704,grad_norm: 0.9999993056356997, iteration: 23305
loss: 1.0036758184432983,grad_norm: 0.9999998135867277, iteration: 23306
loss: 1.0045223236083984,grad_norm: 0.971197786361097, iteration: 23307
loss: 1.0097134113311768,grad_norm: 0.9999991202394205, iteration: 23308
loss: 1.0491251945495605,grad_norm: 0.8767560426464076, iteration: 23309
loss: 1.0174859762191772,grad_norm: 0.9413440362362067, iteration: 23310
loss: 0.9821042418479919,grad_norm: 0.8940438701637538, iteration: 23311
loss: 1.0168678760528564,grad_norm: 0.9999989871049837, iteration: 23312
loss: 1.005350947380066,grad_norm: 0.882740817054481, iteration: 23313
loss: 1.0338587760925293,grad_norm: 0.9999991368404962, iteration: 23314
loss: 0.9929348230361938,grad_norm: 0.8926407656404681, iteration: 23315
loss: 0.9953271746635437,grad_norm: 0.9999990272605405, iteration: 23316
loss: 0.9835719466209412,grad_norm: 0.9999991064870489, iteration: 23317
loss: 1.042028546333313,grad_norm: 0.9999994583462849, iteration: 23318
loss: 0.9532756805419922,grad_norm: 0.9999991329573638, iteration: 23319
loss: 1.0038949251174927,grad_norm: 0.9575470196563886, iteration: 23320
loss: 1.0301567316055298,grad_norm: 0.9999994225288369, iteration: 23321
loss: 1.0462167263031006,grad_norm: 0.9999995348984928, iteration: 23322
loss: 1.0948677062988281,grad_norm: 0.9999991001958848, iteration: 23323
loss: 1.034745693206787,grad_norm: 0.9999997496984435, iteration: 23324
loss: 1.0266432762145996,grad_norm: 0.9999992131825578, iteration: 23325
loss: 1.0154837369918823,grad_norm: 0.8398194545774007, iteration: 23326
loss: 1.0049324035644531,grad_norm: 0.99999961550444, iteration: 23327
loss: 0.9898281693458557,grad_norm: 0.9999990279068357, iteration: 23328
loss: 1.008463978767395,grad_norm: 0.9256126797650212, iteration: 23329
loss: 1.015993595123291,grad_norm: 0.9999990750041522, iteration: 23330
loss: 1.055803894996643,grad_norm: 0.9999995672384355, iteration: 23331
loss: 0.9997280240058899,grad_norm: 0.999999230993428, iteration: 23332
loss: 1.0358526706695557,grad_norm: 0.9358975393210884, iteration: 23333
loss: 1.0229226350784302,grad_norm: 0.9999993746720619, iteration: 23334
loss: 0.9952502846717834,grad_norm: 0.9999996826857327, iteration: 23335
loss: 1.0097004175186157,grad_norm: 0.864673432038109, iteration: 23336
loss: 0.9889670014381409,grad_norm: 0.8863689287478144, iteration: 23337
loss: 0.9753997325897217,grad_norm: 0.9269027222219196, iteration: 23338
loss: 0.9889833927154541,grad_norm: 0.9999990046945068, iteration: 23339
loss: 1.0022752285003662,grad_norm: 0.9999996361529088, iteration: 23340
loss: 0.9533935785293579,grad_norm: 0.9999990263800629, iteration: 23341
loss: 0.9848065376281738,grad_norm: 0.9999990800006394, iteration: 23342
loss: 1.01195228099823,grad_norm: 0.999999231167873, iteration: 23343
loss: 1.0337955951690674,grad_norm: 0.9999990822743373, iteration: 23344
loss: 1.0219769477844238,grad_norm: 0.9999993029505847, iteration: 23345
loss: 1.0250911712646484,grad_norm: 0.953632426106113, iteration: 23346
loss: 1.0087758302688599,grad_norm: 0.9999995477476267, iteration: 23347
loss: 1.0625168085098267,grad_norm: 0.9999996330955143, iteration: 23348
loss: 1.0030237436294556,grad_norm: 0.9655408613170137, iteration: 23349
loss: 1.0383687019348145,grad_norm: 0.9999995075616628, iteration: 23350
loss: 1.0151287317276,grad_norm: 0.9556712918881062, iteration: 23351
loss: 1.0228378772735596,grad_norm: 0.9999991793596538, iteration: 23352
loss: 1.017756462097168,grad_norm: 0.9999994466388233, iteration: 23353
loss: 0.9885844588279724,grad_norm: 0.7907768562350426, iteration: 23354
loss: 0.9453976154327393,grad_norm: 0.9999990660206257, iteration: 23355
loss: 1.0178122520446777,grad_norm: 0.9999991579295224, iteration: 23356
loss: 1.0392688512802124,grad_norm: 0.9999991403608429, iteration: 23357
loss: 0.972449541091919,grad_norm: 0.9999991418797168, iteration: 23358
loss: 1.0724400281906128,grad_norm: 0.9999994794428677, iteration: 23359
loss: 1.0170170068740845,grad_norm: 0.9563039626475092, iteration: 23360
loss: 1.0341768264770508,grad_norm: 0.8921185156520571, iteration: 23361
loss: 1.0049225091934204,grad_norm: 0.9574294080107001, iteration: 23362
loss: 1.003692626953125,grad_norm: 0.9999992582614481, iteration: 23363
loss: 0.9729194045066833,grad_norm: 0.9971713742309659, iteration: 23364
loss: 0.9825218319892883,grad_norm: 0.999999625687008, iteration: 23365
loss: 0.9884443879127502,grad_norm: 0.9999991809237895, iteration: 23366
loss: 1.0004587173461914,grad_norm: 0.9135584605246391, iteration: 23367
loss: 1.0115127563476562,grad_norm: 0.9999992533594975, iteration: 23368
loss: 0.9998628497123718,grad_norm: 0.9999995278366245, iteration: 23369
loss: 1.0210943222045898,grad_norm: 0.9999990412022484, iteration: 23370
loss: 1.0038727521896362,grad_norm: 0.9999992431261958, iteration: 23371
loss: 1.0267404317855835,grad_norm: 0.9999995285241186, iteration: 23372
loss: 0.9911237359046936,grad_norm: 0.942488451281522, iteration: 23373
loss: 1.0013700723648071,grad_norm: 0.9857321710324053, iteration: 23374
loss: 1.0143855810165405,grad_norm: 0.9999993134007048, iteration: 23375
loss: 1.0433604717254639,grad_norm: 0.942801082042189, iteration: 23376
loss: 1.0301340818405151,grad_norm: 0.9999990855322813, iteration: 23377
loss: 1.01771080493927,grad_norm: 0.9999991046119582, iteration: 23378
loss: 1.0194340944290161,grad_norm: 0.9999991585096557, iteration: 23379
loss: 1.0088214874267578,grad_norm: 0.9999990626274414, iteration: 23380
loss: 1.0208150148391724,grad_norm: 0.9999995120250673, iteration: 23381
loss: 1.0124032497406006,grad_norm: 0.9278179753661583, iteration: 23382
loss: 1.017175555229187,grad_norm: 0.9999992767882719, iteration: 23383
loss: 1.0234205722808838,grad_norm: 0.9437621679643262, iteration: 23384
loss: 1.1089433431625366,grad_norm: 0.9999995926674581, iteration: 23385
loss: 0.9790973663330078,grad_norm: 0.9999991705069359, iteration: 23386
loss: 1.013594150543213,grad_norm: 0.9086662136417233, iteration: 23387
loss: 1.0047358274459839,grad_norm: 0.9951581348860152, iteration: 23388
loss: 1.0251954793930054,grad_norm: 0.9999991911248011, iteration: 23389
loss: 0.9758267998695374,grad_norm: 0.9774744325770306, iteration: 23390
loss: 0.9573929309844971,grad_norm: 0.999999084220709, iteration: 23391
loss: 1.0102695226669312,grad_norm: 0.8980924601271402, iteration: 23392
loss: 1.0162605047225952,grad_norm: 0.9999994640886162, iteration: 23393
loss: 1.0452368259429932,grad_norm: 0.9999993315315029, iteration: 23394
loss: 1.0052326917648315,grad_norm: 0.9999990601433947, iteration: 23395
loss: 0.961966872215271,grad_norm: 0.9460157473103157, iteration: 23396
loss: 1.00272798538208,grad_norm: 0.8974533203936194, iteration: 23397
loss: 0.9874505996704102,grad_norm: 0.9667028241384948, iteration: 23398
loss: 1.020140528678894,grad_norm: 0.8614396270272398, iteration: 23399
loss: 1.0262494087219238,grad_norm: 0.9999992776847799, iteration: 23400
loss: 1.0203849077224731,grad_norm: 0.7685267321131846, iteration: 23401
loss: 0.998157799243927,grad_norm: 0.9160713933273298, iteration: 23402
loss: 1.027680516242981,grad_norm: 0.9232311454161211, iteration: 23403
loss: 0.9844891428947449,grad_norm: 0.9999990792987791, iteration: 23404
loss: 1.0087332725524902,grad_norm: 0.999998974325687, iteration: 23405
loss: 0.9932029843330383,grad_norm: 0.9417522223017756, iteration: 23406
loss: 1.0143839120864868,grad_norm: 0.9555629775172102, iteration: 23407
loss: 1.0421777963638306,grad_norm: 0.99999950879737, iteration: 23408
loss: 1.0210468769073486,grad_norm: 0.9999990863991385, iteration: 23409
loss: 1.0429028272628784,grad_norm: 0.9516778899020498, iteration: 23410
loss: 1.0085498094558716,grad_norm: 0.9999992394713676, iteration: 23411
loss: 0.9913368821144104,grad_norm: 0.9999989422529694, iteration: 23412
loss: 1.0059117078781128,grad_norm: 0.9970639926594455, iteration: 23413
loss: 0.9807682633399963,grad_norm: 0.9917093461509958, iteration: 23414
loss: 1.0135431289672852,grad_norm: 0.9493710344890866, iteration: 23415
loss: 1.0450106859207153,grad_norm: 0.9999993547125285, iteration: 23416
loss: 1.021440029144287,grad_norm: 0.9999991625525587, iteration: 23417
loss: 0.990847647190094,grad_norm: 0.8288168585328882, iteration: 23418
loss: 1.0100783109664917,grad_norm: 0.9999989697040381, iteration: 23419
loss: 1.0390492677688599,grad_norm: 0.9999990944047231, iteration: 23420
loss: 1.0215824842453003,grad_norm: 0.9999989364071347, iteration: 23421
loss: 0.9966694712638855,grad_norm: 0.9999990099116126, iteration: 23422
loss: 1.0112168788909912,grad_norm: 0.9999992666189086, iteration: 23423
loss: 1.0313093662261963,grad_norm: 0.8753189906070821, iteration: 23424
loss: 1.0453461408615112,grad_norm: 0.9999991950950837, iteration: 23425
loss: 1.0303082466125488,grad_norm: 0.9999991887198444, iteration: 23426
loss: 1.040777325630188,grad_norm: 0.9999991150132443, iteration: 23427
loss: 1.0163943767547607,grad_norm: 0.9999993057324047, iteration: 23428
loss: 0.9706308245658875,grad_norm: 0.9510869014251205, iteration: 23429
loss: 1.0069080591201782,grad_norm: 0.9885664694300915, iteration: 23430
loss: 1.0506055355072021,grad_norm: 0.999999297036303, iteration: 23431
loss: 0.9868846535682678,grad_norm: 0.9999991593112547, iteration: 23432
loss: 0.9948687553405762,grad_norm: 0.9513666940022658, iteration: 23433
loss: 0.9890792369842529,grad_norm: 0.8997826684380077, iteration: 23434
loss: 1.004063606262207,grad_norm: 0.9021517264769456, iteration: 23435
loss: 1.022739291191101,grad_norm: 0.9999992669419993, iteration: 23436
loss: 1.0475332736968994,grad_norm: 0.9999992769398744, iteration: 23437
loss: 0.960297703742981,grad_norm: 0.999999029157909, iteration: 23438
loss: 1.0090997219085693,grad_norm: 0.8178692253182559, iteration: 23439
loss: 1.011444330215454,grad_norm: 0.938854515548027, iteration: 23440
loss: 0.9682934284210205,grad_norm: 0.9999992731000389, iteration: 23441
loss: 1.0039039850234985,grad_norm: 0.9999990967504155, iteration: 23442
loss: 1.0129692554473877,grad_norm: 0.9999992297809273, iteration: 23443
loss: 1.0072919130325317,grad_norm: 0.860864826669577, iteration: 23444
loss: 1.085051417350769,grad_norm: 0.9999993647233231, iteration: 23445
loss: 1.01925790309906,grad_norm: 0.9139513354067992, iteration: 23446
loss: 1.0262964963912964,grad_norm: 0.9999993392574349, iteration: 23447
loss: 1.0432559251785278,grad_norm: 0.9999993629143381, iteration: 23448
loss: 0.9602459073066711,grad_norm: 0.9999990167148314, iteration: 23449
loss: 1.0071040391921997,grad_norm: 0.9999991824424064, iteration: 23450
loss: 0.997547447681427,grad_norm: 0.8396111633653456, iteration: 23451
loss: 1.0120947360992432,grad_norm: 0.8625057568648461, iteration: 23452
loss: 1.0291012525558472,grad_norm: 0.9999992757788686, iteration: 23453
loss: 0.978894054889679,grad_norm: 0.9999991491634145, iteration: 23454
loss: 1.0052484273910522,grad_norm: 0.9999991948196715, iteration: 23455
loss: 1.0234085321426392,grad_norm: 0.9128521093619292, iteration: 23456
loss: 1.0526760816574097,grad_norm: 0.9999991974295211, iteration: 23457
loss: 1.0035338401794434,grad_norm: 0.9096778557934628, iteration: 23458
loss: 1.0094314813613892,grad_norm: 0.9999989939029306, iteration: 23459
loss: 0.9776264429092407,grad_norm: 0.9999992596595687, iteration: 23460
loss: 0.9808210134506226,grad_norm: 0.8601849408857817, iteration: 23461
loss: 0.9730015397071838,grad_norm: 0.975339622769631, iteration: 23462
loss: 1.002948522567749,grad_norm: 0.9999991846021704, iteration: 23463
loss: 1.0206897258758545,grad_norm: 0.9999992379356915, iteration: 23464
loss: 0.9926725029945374,grad_norm: 0.9626843927681323, iteration: 23465
loss: 1.0181543827056885,grad_norm: 0.9999992221591111, iteration: 23466
loss: 1.0324290990829468,grad_norm: 0.9941425449956958, iteration: 23467
loss: 1.0695984363555908,grad_norm: 0.787951848579033, iteration: 23468
loss: 1.0347342491149902,grad_norm: 0.9999990432921951, iteration: 23469
loss: 1.0528600215911865,grad_norm: 0.8252030630901558, iteration: 23470
loss: 0.984135091304779,grad_norm: 0.999999009223487, iteration: 23471
loss: 1.000422716140747,grad_norm: 0.9416011538974749, iteration: 23472
loss: 1.0007010698318481,grad_norm: 0.9826858713017178, iteration: 23473
loss: 1.0219532251358032,grad_norm: 0.9999990296557268, iteration: 23474
loss: 1.0072717666625977,grad_norm: 0.9999991657072295, iteration: 23475
loss: 1.0007059574127197,grad_norm: 0.9999992047698413, iteration: 23476
loss: 1.0333600044250488,grad_norm: 0.9926271687165016, iteration: 23477
loss: 1.0399150848388672,grad_norm: 0.999999018108956, iteration: 23478
loss: 1.0331591367721558,grad_norm: 0.9535730209799744, iteration: 23479
loss: 1.0227904319763184,grad_norm: 0.9999990993272942, iteration: 23480
loss: 1.0738168954849243,grad_norm: 0.999999422243538, iteration: 23481
loss: 1.0198543071746826,grad_norm: 0.9425669779162291, iteration: 23482
loss: 1.0352869033813477,grad_norm: 0.8073063271729398, iteration: 23483
loss: 1.0252360105514526,grad_norm: 0.9999991592445675, iteration: 23484
loss: 1.0166107416152954,grad_norm: 0.9999990648851804, iteration: 23485
loss: 1.0304018259048462,grad_norm: 0.9478462347524689, iteration: 23486
loss: 1.0235960483551025,grad_norm: 0.9999993476504879, iteration: 23487
loss: 1.0169450044631958,grad_norm: 0.9966152292188439, iteration: 23488
loss: 1.0160813331604004,grad_norm: 0.9643008850447249, iteration: 23489
loss: 1.008397102355957,grad_norm: 0.9999990863294507, iteration: 23490
loss: 0.9950161576271057,grad_norm: 0.910907871996944, iteration: 23491
loss: 0.977843165397644,grad_norm: 0.9999991011591748, iteration: 23492
loss: 1.0011837482452393,grad_norm: 0.9999991509759777, iteration: 23493
loss: 0.9937558770179749,grad_norm: 0.9999992426611828, iteration: 23494
loss: 0.9776721000671387,grad_norm: 0.9248106698950799, iteration: 23495
loss: 0.9925516247749329,grad_norm: 0.9999990011832919, iteration: 23496
loss: 1.0182712078094482,grad_norm: 0.999999029018975, iteration: 23497
loss: 1.0341771841049194,grad_norm: 0.9999990437839564, iteration: 23498
loss: 1.0229377746582031,grad_norm: 0.9999992896563847, iteration: 23499
loss: 1.004388451576233,grad_norm: 0.9999997632163863, iteration: 23500
loss: 1.0048681497573853,grad_norm: 0.9999990493369229, iteration: 23501
loss: 1.0132253170013428,grad_norm: 0.9999990480802111, iteration: 23502
loss: 1.0301345586776733,grad_norm: 0.9999990976133688, iteration: 23503
loss: 1.0355970859527588,grad_norm: 0.9999994371725567, iteration: 23504
loss: 1.015954613685608,grad_norm: 0.9999992574315314, iteration: 23505
loss: 0.9972315430641174,grad_norm: 0.8285260008230328, iteration: 23506
loss: 1.0294829607009888,grad_norm: 0.9999989968983483, iteration: 23507
loss: 1.038839340209961,grad_norm: 0.9999992536630716, iteration: 23508
loss: 1.0200536251068115,grad_norm: 0.9494903168735876, iteration: 23509
loss: 1.0073515176773071,grad_norm: 0.9532394432632374, iteration: 23510
loss: 0.9780040979385376,grad_norm: 0.9999992401016122, iteration: 23511
loss: 1.0363847017288208,grad_norm: 0.943763077537525, iteration: 23512
loss: 1.0396003723144531,grad_norm: 0.9999995603292131, iteration: 23513
loss: 1.0042129755020142,grad_norm: 0.9066540057744003, iteration: 23514
loss: 1.0158025026321411,grad_norm: 0.9999994400405826, iteration: 23515
loss: 0.9641163349151611,grad_norm: 0.7934294554706609, iteration: 23516
loss: 1.027940273284912,grad_norm: 0.956639777236577, iteration: 23517
loss: 0.9700286984443665,grad_norm: 0.9999992937561336, iteration: 23518
loss: 1.0194463729858398,grad_norm: 0.999999045597542, iteration: 23519
loss: 1.0255478620529175,grad_norm: 0.9999990432166677, iteration: 23520
loss: 1.0139353275299072,grad_norm: 0.9305666317970885, iteration: 23521
loss: 1.0350902080535889,grad_norm: 0.9999995202089779, iteration: 23522
loss: 1.0399296283721924,grad_norm: 0.9999991810514696, iteration: 23523
loss: 1.005718469619751,grad_norm: 0.9999991251291243, iteration: 23524
loss: 0.9849114418029785,grad_norm: 0.9272152998302782, iteration: 23525
loss: 1.0060045719146729,grad_norm: 0.9999992074727115, iteration: 23526
loss: 0.9826191067695618,grad_norm: 0.999999037915766, iteration: 23527
loss: 0.9967557191848755,grad_norm: 0.9999990505922235, iteration: 23528
loss: 1.024271845817566,grad_norm: 0.9999990362993175, iteration: 23529
loss: 1.0098592042922974,grad_norm: 0.9999990282853729, iteration: 23530
loss: 1.0412358045578003,grad_norm: 0.999999542879999, iteration: 23531
loss: 0.9958028197288513,grad_norm: 0.9016798375088341, iteration: 23532
loss: 0.9962651133537292,grad_norm: 0.9999991740002656, iteration: 23533
loss: 0.9860805869102478,grad_norm: 0.8426844069925764, iteration: 23534
loss: 0.9875060319900513,grad_norm: 0.9427024589958809, iteration: 23535
loss: 1.037893533706665,grad_norm: 0.9033062062756623, iteration: 23536
loss: 1.0397052764892578,grad_norm: 0.9999991187507431, iteration: 23537
loss: 1.012463092803955,grad_norm: 0.9999989990793564, iteration: 23538
loss: 1.0216232538223267,grad_norm: 0.9999992649021934, iteration: 23539
loss: 1.03950035572052,grad_norm: 0.9999993157151018, iteration: 23540
loss: 1.0650269985198975,grad_norm: 0.999999141452331, iteration: 23541
loss: 0.9940224885940552,grad_norm: 0.999999164617812, iteration: 23542
loss: 0.9655042886734009,grad_norm: 0.9413206064261699, iteration: 23543
loss: 1.027652382850647,grad_norm: 0.9999995513777801, iteration: 23544
loss: 1.0075595378875732,grad_norm: 0.9999989451102171, iteration: 23545
loss: 1.0115617513656616,grad_norm: 0.9999991605489466, iteration: 23546
loss: 0.975626528263092,grad_norm: 0.9999991497695159, iteration: 23547
loss: 1.0308955907821655,grad_norm: 0.9774492649882516, iteration: 23548
loss: 0.9869402647018433,grad_norm: 0.9999990946171544, iteration: 23549
loss: 1.0316040515899658,grad_norm: 0.9999990469415695, iteration: 23550
loss: 1.0145535469055176,grad_norm: 0.9999991482182938, iteration: 23551
loss: 1.008097767829895,grad_norm: 0.9698623350865588, iteration: 23552
loss: 0.9927471280097961,grad_norm: 0.9999991721807199, iteration: 23553
loss: 1.0176740884780884,grad_norm: 0.8420262458415976, iteration: 23554
loss: 0.9867383241653442,grad_norm: 0.9999989987927007, iteration: 23555
loss: 1.005063772201538,grad_norm: 0.9899043362426483, iteration: 23556
loss: 1.0183172225952148,grad_norm: 0.9430909035362901, iteration: 23557
loss: 1.039393663406372,grad_norm: 0.9575428273833609, iteration: 23558
loss: 1.0004668235778809,grad_norm: 0.8938904855620945, iteration: 23559
loss: 1.0598936080932617,grad_norm: 0.9327919953831704, iteration: 23560
loss: 1.0034478902816772,grad_norm: 0.9999992136202599, iteration: 23561
loss: 1.0064938068389893,grad_norm: 0.9999989928730486, iteration: 23562
loss: 0.9892567992210388,grad_norm: 0.9835311767942753, iteration: 23563
loss: 1.0035070180892944,grad_norm: 0.9999991001798375, iteration: 23564
loss: 1.0165441036224365,grad_norm: 0.8687015481221769, iteration: 23565
loss: 0.9865699410438538,grad_norm: 0.9999993331571004, iteration: 23566
loss: 0.9408218264579773,grad_norm: 0.9098542675806163, iteration: 23567
loss: 0.9926002025604248,grad_norm: 0.8227086165446096, iteration: 23568
loss: 1.0176125764846802,grad_norm: 0.971674027810981, iteration: 23569
loss: 0.9995104074478149,grad_norm: 0.9999991221362389, iteration: 23570
loss: 1.0242540836334229,grad_norm: 0.9999991051173085, iteration: 23571
loss: 1.021633267402649,grad_norm: 0.9094085163713782, iteration: 23572
loss: 1.056716799736023,grad_norm: 0.9999991152471427, iteration: 23573
loss: 1.0067901611328125,grad_norm: 0.9999992323998723, iteration: 23574
loss: 1.0441207885742188,grad_norm: 0.9999993457256, iteration: 23575
loss: 1.0637387037277222,grad_norm: 0.9999990061554912, iteration: 23576
loss: 0.9763143658638,grad_norm: 0.9912529095975571, iteration: 23577
loss: 0.9940236806869507,grad_norm: 0.8800169725080756, iteration: 23578
loss: 0.9736388921737671,grad_norm: 0.9065677461350669, iteration: 23579
loss: 0.9716672301292419,grad_norm: 0.9999991808149186, iteration: 23580
loss: 1.0277713537216187,grad_norm: 0.9999991832626672, iteration: 23581
loss: 1.017799735069275,grad_norm: 0.9999992472906619, iteration: 23582
loss: 0.9966789484024048,grad_norm: 0.9015193587576926, iteration: 23583
loss: 1.0178577899932861,grad_norm: 0.9999991403761389, iteration: 23584
loss: 1.022356390953064,grad_norm: 0.999999196777616, iteration: 23585
loss: 1.0323781967163086,grad_norm: 0.9729162536530339, iteration: 23586
loss: 0.9995195269584656,grad_norm: 0.9857698172884541, iteration: 23587
loss: 1.0459390878677368,grad_norm: 0.9999994484061607, iteration: 23588
loss: 1.0109968185424805,grad_norm: 0.8771651140847293, iteration: 23589
loss: 1.0180025100708008,grad_norm: 0.9597072719271008, iteration: 23590
loss: 0.9848729372024536,grad_norm: 0.9999992828135282, iteration: 23591
loss: 1.0099561214447021,grad_norm: 0.9999993356547139, iteration: 23592
loss: 0.9665001034736633,grad_norm: 0.9379849311170613, iteration: 23593
loss: 0.9855684041976929,grad_norm: 0.9378182883408657, iteration: 23594
loss: 0.9758905172348022,grad_norm: 0.9999991092968391, iteration: 23595
loss: 1.0217970609664917,grad_norm: 0.9084820695821999, iteration: 23596
loss: 1.0153882503509521,grad_norm: 0.9999990373781851, iteration: 23597
loss: 1.0025641918182373,grad_norm: 0.8735567531970169, iteration: 23598
loss: 1.0513403415679932,grad_norm: 0.9807991534678902, iteration: 23599
loss: 1.0584073066711426,grad_norm: 0.9999991768405393, iteration: 23600
loss: 1.0163514614105225,grad_norm: 0.9000290829653246, iteration: 23601
loss: 1.009314775466919,grad_norm: 0.9074098081211577, iteration: 23602
loss: 1.0052145719528198,grad_norm: 0.9999991490003658, iteration: 23603
loss: 1.0151252746582031,grad_norm: 0.9999991403694901, iteration: 23604
loss: 1.0380489826202393,grad_norm: 0.9999991485967258, iteration: 23605
loss: 0.9902023077011108,grad_norm: 0.9548726649779344, iteration: 23606
loss: 1.0206001996994019,grad_norm: 0.989896330013556, iteration: 23607
loss: 0.9704264998435974,grad_norm: 0.9759385121050453, iteration: 23608
loss: 1.0191668272018433,grad_norm: 0.9999989713836567, iteration: 23609
loss: 1.0383052825927734,grad_norm: 0.9999990006336039, iteration: 23610
loss: 0.9983423948287964,grad_norm: 0.9999990852598077, iteration: 23611
loss: 1.0274057388305664,grad_norm: 0.923027092451282, iteration: 23612
loss: 0.998318612575531,grad_norm: 0.8653197731321794, iteration: 23613
loss: 1.0317906141281128,grad_norm: 0.8022443819235495, iteration: 23614
loss: 1.0086885690689087,grad_norm: 0.999999275939773, iteration: 23615
loss: 0.9558865427970886,grad_norm: 0.9999992283556383, iteration: 23616
loss: 1.0398917198181152,grad_norm: 0.9142113579032174, iteration: 23617
loss: 1.0304019451141357,grad_norm: 0.9999989635535095, iteration: 23618
loss: 0.9555748105049133,grad_norm: 0.9999991224111382, iteration: 23619
loss: 0.9921266436576843,grad_norm: 0.9999990859147128, iteration: 23620
loss: 1.025436520576477,grad_norm: 0.9999991698104753, iteration: 23621
loss: 1.0111141204833984,grad_norm: 0.9674338966572842, iteration: 23622
loss: 0.9958534836769104,grad_norm: 0.8785263150823893, iteration: 23623
loss: 1.0303292274475098,grad_norm: 0.9999991858196455, iteration: 23624
loss: 1.027660846710205,grad_norm: 0.9999992338766163, iteration: 23625
loss: 0.9834579825401306,grad_norm: 0.8943365462814761, iteration: 23626
loss: 1.0250662565231323,grad_norm: 0.9561400997660063, iteration: 23627
loss: 1.0053213834762573,grad_norm: 0.9359250710435484, iteration: 23628
loss: 1.0252348184585571,grad_norm: 0.9090797765312999, iteration: 23629
loss: 1.022475004196167,grad_norm: 0.9999992056052808, iteration: 23630
loss: 1.0296393632888794,grad_norm: 0.9999990906177085, iteration: 23631
loss: 1.019729733467102,grad_norm: 0.9343405039196447, iteration: 23632
loss: 0.9840981960296631,grad_norm: 0.9737257188991967, iteration: 23633
loss: 0.9559553265571594,grad_norm: 0.9999991977194408, iteration: 23634
loss: 0.9752699732780457,grad_norm: 0.9999990866444797, iteration: 23635
loss: 1.0097488164901733,grad_norm: 0.9344489874811698, iteration: 23636
loss: 0.9795348048210144,grad_norm: 0.9999991975824154, iteration: 23637
loss: 1.014577031135559,grad_norm: 0.9999991488299638, iteration: 23638
loss: 1.0596144199371338,grad_norm: 0.9999990190245421, iteration: 23639
loss: 1.0355514287948608,grad_norm: 0.8392533352375462, iteration: 23640
loss: 0.9664266705513,grad_norm: 0.9999990932337653, iteration: 23641
loss: 0.9901629686355591,grad_norm: 0.9943049286370219, iteration: 23642
loss: 0.9852988719940186,grad_norm: 0.9999992522294409, iteration: 23643
loss: 0.9982118010520935,grad_norm: 0.8686087134665543, iteration: 23644
loss: 1.0077136754989624,grad_norm: 0.99999909341502, iteration: 23645
loss: 1.0284311771392822,grad_norm: 0.9761040821704348, iteration: 23646
loss: 0.9965465068817139,grad_norm: 0.9999991883070936, iteration: 23647
loss: 1.0262221097946167,grad_norm: 0.9236933043029478, iteration: 23648
loss: 1.0209217071533203,grad_norm: 0.9999992155700429, iteration: 23649
loss: 1.0102207660675049,grad_norm: 0.9999992214467551, iteration: 23650
loss: 1.0198874473571777,grad_norm: 0.999998993074149, iteration: 23651
loss: 0.9763387441635132,grad_norm: 0.9999991099487214, iteration: 23652
loss: 1.0089285373687744,grad_norm: 0.8831610821799681, iteration: 23653
loss: 1.057274580001831,grad_norm: 0.9999993450627702, iteration: 23654
loss: 0.9960694313049316,grad_norm: 0.7968360986292733, iteration: 23655
loss: 1.0212087631225586,grad_norm: 0.9075921799134404, iteration: 23656
loss: 0.9758599400520325,grad_norm: 0.8380201095445973, iteration: 23657
loss: 1.0152937173843384,grad_norm: 0.9999990776624057, iteration: 23658
loss: 1.003291130065918,grad_norm: 0.9999992301916267, iteration: 23659
loss: 1.0023285150527954,grad_norm: 0.9675952468449176, iteration: 23660
loss: 0.9992352724075317,grad_norm: 0.9999992407563766, iteration: 23661
loss: 1.0120670795440674,grad_norm: 0.9999990880174913, iteration: 23662
loss: 1.0115840435028076,grad_norm: 0.9999995077979809, iteration: 23663
loss: 1.006069540977478,grad_norm: 0.9725109533178502, iteration: 23664
loss: 1.0286623239517212,grad_norm: 0.9999996369849953, iteration: 23665
loss: 1.0086780786514282,grad_norm: 0.9344799792630698, iteration: 23666
loss: 1.0050913095474243,grad_norm: 0.9999991271246674, iteration: 23667
loss: 1.034146785736084,grad_norm: 0.9999992927113208, iteration: 23668
loss: 0.9909902215003967,grad_norm: 0.9999990396881576, iteration: 23669
loss: 0.9851236343383789,grad_norm: 0.999999150475235, iteration: 23670
loss: 1.077954888343811,grad_norm: 0.9999990773146559, iteration: 23671
loss: 1.0498594045639038,grad_norm: 0.9999992029744288, iteration: 23672
loss: 1.0471786260604858,grad_norm: 0.9999990422585076, iteration: 23673
loss: 0.9948776364326477,grad_norm: 0.9999991716808294, iteration: 23674
loss: 1.002103567123413,grad_norm: 0.9469796342992963, iteration: 23675
loss: 1.0101442337036133,grad_norm: 0.8448465717590723, iteration: 23676
loss: 0.9945179224014282,grad_norm: 0.9999990795845686, iteration: 23677
loss: 1.0298007726669312,grad_norm: 0.9824662473993936, iteration: 23678
loss: 1.0457262992858887,grad_norm: 0.9999996510985372, iteration: 23679
loss: 0.996417224407196,grad_norm: 0.9999990036972947, iteration: 23680
loss: 1.0102964639663696,grad_norm: 0.9734794121785006, iteration: 23681
loss: 0.9981354475021362,grad_norm: 0.9157250852380355, iteration: 23682
loss: 1.0335476398468018,grad_norm: 0.9723919809778884, iteration: 23683
loss: 1.00403892993927,grad_norm: 0.873591113155306, iteration: 23684
loss: 1.0024833679199219,grad_norm: 0.9999991369182288, iteration: 23685
loss: 1.0208426713943481,grad_norm: 0.9999994098286341, iteration: 23686
loss: 1.037606954574585,grad_norm: 0.9999992606771397, iteration: 23687
loss: 1.0090687274932861,grad_norm: 0.9965435405288706, iteration: 23688
loss: 0.988085925579071,grad_norm: 0.9999993007898093, iteration: 23689
loss: 1.0507091283798218,grad_norm: 0.9999991631671469, iteration: 23690
loss: 0.9810687899589539,grad_norm: 0.9999991146388199, iteration: 23691
loss: 1.0120596885681152,grad_norm: 0.9999990237243692, iteration: 23692
loss: 1.0426239967346191,grad_norm: 0.9999990238908805, iteration: 23693
loss: 1.0069770812988281,grad_norm: 0.9999990743436108, iteration: 23694
loss: 1.0146901607513428,grad_norm: 0.9957904478933711, iteration: 23695
loss: 0.980030357837677,grad_norm: 0.9999992400114411, iteration: 23696
loss: 1.0076767206192017,grad_norm: 0.9668298660449565, iteration: 23697
loss: 0.9676113128662109,grad_norm: 0.8665966312701049, iteration: 23698
loss: 0.9685173034667969,grad_norm: 0.9999991696636841, iteration: 23699
loss: 1.0080633163452148,grad_norm: 0.9077623198410041, iteration: 23700
loss: 0.9796159863471985,grad_norm: 0.8657880631229685, iteration: 23701
loss: 0.9909005761146545,grad_norm: 0.8510556561372672, iteration: 23702
loss: 0.9975976347923279,grad_norm: 0.9605356842125009, iteration: 23703
loss: 1.002221703529358,grad_norm: 0.8948515670773136, iteration: 23704
loss: 1.0318719148635864,grad_norm: 0.9999993258669453, iteration: 23705
loss: 1.0018696784973145,grad_norm: 0.9935643181794117, iteration: 23706
loss: 1.015892505645752,grad_norm: 0.990997168697616, iteration: 23707
loss: 1.0136927366256714,grad_norm: 0.9999991129585892, iteration: 23708
loss: 1.0035489797592163,grad_norm: 0.9128583637849237, iteration: 23709
loss: 1.0228214263916016,grad_norm: 0.9490188342966306, iteration: 23710
loss: 1.0064948797225952,grad_norm: 0.9698792277989812, iteration: 23711
loss: 1.036342740058899,grad_norm: 0.9999990013131972, iteration: 23712
loss: 1.0541306734085083,grad_norm: 0.8995349011293419, iteration: 23713
loss: 1.0172491073608398,grad_norm: 0.999999112058289, iteration: 23714
loss: 0.9982210993766785,grad_norm: 0.9454040344176985, iteration: 23715
loss: 0.9899407625198364,grad_norm: 0.9478684245243463, iteration: 23716
loss: 0.9874149560928345,grad_norm: 0.9999990267492622, iteration: 23717
loss: 0.9955615401268005,grad_norm: 0.9999991704863482, iteration: 23718
loss: 1.0405795574188232,grad_norm: 0.9999995800035851, iteration: 23719
loss: 0.9663410186767578,grad_norm: 0.9999990266563056, iteration: 23720
loss: 1.0059337615966797,grad_norm: 0.9281816199779208, iteration: 23721
loss: 0.9967883229255676,grad_norm: 0.9999990502683187, iteration: 23722
loss: 0.9831877946853638,grad_norm: 0.9191165972277859, iteration: 23723
loss: 0.9980725646018982,grad_norm: 0.8533692957712188, iteration: 23724
loss: 1.0775943994522095,grad_norm: 0.9501079905096984, iteration: 23725
loss: 1.007249355316162,grad_norm: 0.9240165481551781, iteration: 23726
loss: 1.0058069229125977,grad_norm: 0.9999996549318103, iteration: 23727
loss: 0.9697341918945312,grad_norm: 0.9999991174119897, iteration: 23728
loss: 1.009108304977417,grad_norm: 0.9921575448920357, iteration: 23729
loss: 1.0289100408554077,grad_norm: 0.9999991366100496, iteration: 23730
loss: 0.9887387752532959,grad_norm: 0.8549047450098334, iteration: 23731
loss: 1.0518674850463867,grad_norm: 0.9188085152616512, iteration: 23732
loss: 1.0017149448394775,grad_norm: 0.8164133186006954, iteration: 23733
loss: 1.0076242685317993,grad_norm: 0.9297627420919532, iteration: 23734
loss: 0.986786961555481,grad_norm: 0.9969405484493853, iteration: 23735
loss: 1.008023977279663,grad_norm: 0.8241658872214035, iteration: 23736
loss: 1.0272917747497559,grad_norm: 0.9999990810447482, iteration: 23737
loss: 0.9742752313613892,grad_norm: 0.9123033164931534, iteration: 23738
loss: 1.0195097923278809,grad_norm: 0.9637918887231569, iteration: 23739
loss: 1.0418223142623901,grad_norm: 0.9999991056142882, iteration: 23740
loss: 1.0178073644638062,grad_norm: 0.7728914978190075, iteration: 23741
loss: 1.00563383102417,grad_norm: 0.9999990733478757, iteration: 23742
loss: 1.030332088470459,grad_norm: 0.9920243778059649, iteration: 23743
loss: 0.9787222743034363,grad_norm: 0.938083573372122, iteration: 23744
loss: 0.9970738887786865,grad_norm: 0.9999991517382004, iteration: 23745
loss: 1.0331178903579712,grad_norm: 0.9407843472822769, iteration: 23746
loss: 1.0001041889190674,grad_norm: 0.9999990516496893, iteration: 23747
loss: 0.9693273901939392,grad_norm: 0.9999990393701366, iteration: 23748
loss: 0.9889485836029053,grad_norm: 0.9278764325449618, iteration: 23749
loss: 0.9985268115997314,grad_norm: 0.7789401680320015, iteration: 23750
loss: 0.9960783123970032,grad_norm: 0.9708752183361095, iteration: 23751
loss: 0.9913418292999268,grad_norm: 0.9145199235862695, iteration: 23752
loss: 1.0208340883255005,grad_norm: 0.9999990134764151, iteration: 23753
loss: 1.078035593032837,grad_norm: 0.9999991917325309, iteration: 23754
loss: 0.9731433987617493,grad_norm: 0.9999990670917388, iteration: 23755
loss: 1.0073392391204834,grad_norm: 0.9685610422041792, iteration: 23756
loss: 1.0146429538726807,grad_norm: 0.916875304430434, iteration: 23757
loss: 1.0308758020401,grad_norm: 0.8719832211534623, iteration: 23758
loss: 1.0142664909362793,grad_norm: 0.9386566749972904, iteration: 23759
loss: 1.0239477157592773,grad_norm: 0.8875496176139807, iteration: 23760
loss: 0.9937773942947388,grad_norm: 0.9999990994903508, iteration: 23761
loss: 0.9897459149360657,grad_norm: 0.9999991413612593, iteration: 23762
loss: 1.0279887914657593,grad_norm: 0.9845354703585937, iteration: 23763
loss: 1.0388683080673218,grad_norm: 0.8995882826225934, iteration: 23764
loss: 1.0075074434280396,grad_norm: 0.9999990744538249, iteration: 23765
loss: 0.954603910446167,grad_norm: 0.9648881122566166, iteration: 23766
loss: 0.9805352687835693,grad_norm: 0.966189576272444, iteration: 23767
loss: 0.9983627200126648,grad_norm: 0.9999991256939349, iteration: 23768
loss: 1.0266635417938232,grad_norm: 0.8689650483380927, iteration: 23769
loss: 1.0068620443344116,grad_norm: 0.9999990960978796, iteration: 23770
loss: 1.039874792098999,grad_norm: 0.999999025621841, iteration: 23771
loss: 1.0545274019241333,grad_norm: 0.949042036355388, iteration: 23772
loss: 1.0402344465255737,grad_norm: 0.9999992650070165, iteration: 23773
loss: 1.0159658193588257,grad_norm: 0.893451521536397, iteration: 23774
loss: 1.0432833433151245,grad_norm: 0.9389429170834911, iteration: 23775
loss: 1.0056930780410767,grad_norm: 0.9922000372659512, iteration: 23776
loss: 1.0101470947265625,grad_norm: 0.9099117839341435, iteration: 23777
loss: 0.9498898386955261,grad_norm: 0.9999991464367579, iteration: 23778
loss: 0.9686564803123474,grad_norm: 0.9999992669720948, iteration: 23779
loss: 1.0170783996582031,grad_norm: 0.8150485774119405, iteration: 23780
loss: 1.0112613439559937,grad_norm: 0.9466147630662635, iteration: 23781
loss: 1.008658766746521,grad_norm: 0.9999993141542317, iteration: 23782
loss: 1.0643084049224854,grad_norm: 0.9670713464115921, iteration: 23783
loss: 1.0496716499328613,grad_norm: 0.9999992511801679, iteration: 23784
loss: 0.9997702240943909,grad_norm: 0.999999344943574, iteration: 23785
loss: 1.032911777496338,grad_norm: 0.939259721611873, iteration: 23786
loss: 1.001089334487915,grad_norm: 0.9999990434782935, iteration: 23787
loss: 1.0290852785110474,grad_norm: 0.9999993075779362, iteration: 23788
loss: 0.9968143701553345,grad_norm: 0.9715324817869015, iteration: 23789
loss: 1.046839714050293,grad_norm: 0.932268791213402, iteration: 23790
loss: 1.0061155557632446,grad_norm: 0.9999991271825683, iteration: 23791
loss: 0.9923011660575867,grad_norm: 0.9967501995371576, iteration: 23792
loss: 0.9834843873977661,grad_norm: 0.9329913080497791, iteration: 23793
loss: 0.9464894533157349,grad_norm: 0.9826371385945064, iteration: 23794
loss: 1.0181808471679688,grad_norm: 0.9697473898399179, iteration: 23795
loss: 1.0078126192092896,grad_norm: 0.9407585076058861, iteration: 23796
loss: 1.0534393787384033,grad_norm: 0.999999831037753, iteration: 23797
loss: 1.0165337324142456,grad_norm: 0.9978232054086016, iteration: 23798
loss: 1.0311256647109985,grad_norm: 0.9311412073378044, iteration: 23799
loss: 1.0441330671310425,grad_norm: 0.9999991316727748, iteration: 23800
loss: 1.0008103847503662,grad_norm: 0.99999918114884, iteration: 23801
loss: 0.968793511390686,grad_norm: 0.9999991099557183, iteration: 23802
loss: 1.0031006336212158,grad_norm: 0.9999991240099944, iteration: 23803
loss: 0.9929797649383545,grad_norm: 0.9073633946531207, iteration: 23804
loss: 0.9998903870582581,grad_norm: 0.8809647046912816, iteration: 23805
loss: 1.023247241973877,grad_norm: 0.9999990824159328, iteration: 23806
loss: 0.9658109545707703,grad_norm: 0.8846020378581966, iteration: 23807
loss: 1.013088583946228,grad_norm: 0.9734839427009565, iteration: 23808
loss: 1.0088433027267456,grad_norm: 0.9999991803432589, iteration: 23809
loss: 0.9813568592071533,grad_norm: 0.9999990197571118, iteration: 23810
loss: 0.9750348925590515,grad_norm: 0.8575175658545386, iteration: 23811
loss: 1.011472463607788,grad_norm: 0.999999296255804, iteration: 23812
loss: 0.9976636171340942,grad_norm: 0.953379352866417, iteration: 23813
loss: 1.0208529233932495,grad_norm: 0.9999990921172146, iteration: 23814
loss: 1.008241057395935,grad_norm: 0.8114460425977352, iteration: 23815
loss: 1.044719934463501,grad_norm: 0.9999992922073978, iteration: 23816
loss: 1.020249366760254,grad_norm: 0.9239505837003134, iteration: 23817
loss: 1.0494009256362915,grad_norm: 0.9999991420746902, iteration: 23818
loss: 1.0942248106002808,grad_norm: 0.9999992627923439, iteration: 23819
loss: 1.0087858438491821,grad_norm: 0.9999990080644682, iteration: 23820
loss: 1.0035020112991333,grad_norm: 0.9999999117213673, iteration: 23821
loss: 1.0108063220977783,grad_norm: 0.9662948292785911, iteration: 23822
loss: 1.0170462131500244,grad_norm: 0.9181603200094218, iteration: 23823
loss: 1.0176862478256226,grad_norm: 0.9746383437732196, iteration: 23824
loss: 1.0271484851837158,grad_norm: 0.9999990889119849, iteration: 23825
loss: 1.0371031761169434,grad_norm: 0.9337621602503842, iteration: 23826
loss: 1.0440733432769775,grad_norm: 0.912700604728001, iteration: 23827
loss: 0.985655665397644,grad_norm: 0.9999992034619605, iteration: 23828
loss: 1.0253382921218872,grad_norm: 0.9917874955462974, iteration: 23829
loss: 0.9813213348388672,grad_norm: 0.9595394885016505, iteration: 23830
loss: 0.9810240268707275,grad_norm: 0.90933154846419, iteration: 23831
loss: 1.0464450120925903,grad_norm: 0.9999992201335905, iteration: 23832
loss: 0.9554274678230286,grad_norm: 0.999999212734231, iteration: 23833
loss: 1.0234270095825195,grad_norm: 0.9690109563593415, iteration: 23834
loss: 1.0475271940231323,grad_norm: 0.9999990903570748, iteration: 23835
loss: 1.0530495643615723,grad_norm: 0.8617051341993296, iteration: 23836
loss: 0.9788432717323303,grad_norm: 0.9999990249748673, iteration: 23837
loss: 1.019181489944458,grad_norm: 0.999999224763664, iteration: 23838
loss: 1.007874608039856,grad_norm: 0.9999992733052482, iteration: 23839
loss: 0.9866704344749451,grad_norm: 0.9999992067054756, iteration: 23840
loss: 1.0220472812652588,grad_norm: 0.8872916268591567, iteration: 23841
loss: 0.9967555403709412,grad_norm: 0.999999167836735, iteration: 23842
loss: 0.9991469979286194,grad_norm: 0.9999991386242243, iteration: 23843
loss: 1.007717490196228,grad_norm: 0.9999990041684647, iteration: 23844
loss: 1.0439555644989014,grad_norm: 0.9942289990835244, iteration: 23845
loss: 1.0009311437606812,grad_norm: 0.9803046111567894, iteration: 23846
loss: 1.0340756177902222,grad_norm: 0.9999995896738162, iteration: 23847
loss: 1.0387842655181885,grad_norm: 0.9999993109217445, iteration: 23848
loss: 1.023185133934021,grad_norm: 0.8877444979961698, iteration: 23849
loss: 1.0045310258865356,grad_norm: 0.9208719820037944, iteration: 23850
loss: 1.0255228281021118,grad_norm: 0.9999993274290764, iteration: 23851
loss: 1.0521204471588135,grad_norm: 0.999999130906343, iteration: 23852
loss: 1.0205636024475098,grad_norm: 0.9999992446153273, iteration: 23853
loss: 1.0008105039596558,grad_norm: 0.9999996955159598, iteration: 23854
loss: 0.9889233708381653,grad_norm: 0.9606815800734844, iteration: 23855
loss: 0.9813380837440491,grad_norm: 0.9176892690990779, iteration: 23856
loss: 1.022239089012146,grad_norm: 0.8734780717462388, iteration: 23857
loss: 1.0041229724884033,grad_norm: 0.8549036801549519, iteration: 23858
loss: 1.0375487804412842,grad_norm: 0.9999995766500567, iteration: 23859
loss: 1.0027424097061157,grad_norm: 0.9751038751554524, iteration: 23860
loss: 1.038861870765686,grad_norm: 0.9999989629933369, iteration: 23861
loss: 0.9721921682357788,grad_norm: 0.9999990365178058, iteration: 23862
loss: 1.0118193626403809,grad_norm: 0.7909019161862934, iteration: 23863
loss: 1.0034804344177246,grad_norm: 0.9999991183121997, iteration: 23864
loss: 1.022790551185608,grad_norm: 0.8873933584428517, iteration: 23865
loss: 1.0555649995803833,grad_norm: 0.9999992603490387, iteration: 23866
loss: 1.1085023880004883,grad_norm: 0.9999997261965657, iteration: 23867
loss: 1.0072021484375,grad_norm: 0.9999994887193372, iteration: 23868
loss: 0.9899499416351318,grad_norm: 0.9999991487241384, iteration: 23869
loss: 1.0238046646118164,grad_norm: 0.9473628592571067, iteration: 23870
loss: 0.9774015545845032,grad_norm: 0.9877183398245492, iteration: 23871
loss: 0.9876090884208679,grad_norm: 0.9999990255729384, iteration: 23872
loss: 1.0160523653030396,grad_norm: 0.9999991726038171, iteration: 23873
loss: 0.9922747611999512,grad_norm: 0.9999991350215268, iteration: 23874
loss: 1.0401440858840942,grad_norm: 0.9999993961368214, iteration: 23875
loss: 1.0155138969421387,grad_norm: 0.9999992167403997, iteration: 23876
loss: 0.9878877997398376,grad_norm: 0.9999991498868861, iteration: 23877
loss: 1.0161511898040771,grad_norm: 0.999999178787885, iteration: 23878
loss: 1.0121431350708008,grad_norm: 0.9999992044412259, iteration: 23879
loss: 1.0006552934646606,grad_norm: 0.9237215675019818, iteration: 23880
loss: 0.9984696507453918,grad_norm: 0.9426699498452513, iteration: 23881
loss: 1.0030511617660522,grad_norm: 0.9999990707689417, iteration: 23882
loss: 1.0210274457931519,grad_norm: 0.9464826247866159, iteration: 23883
loss: 1.0054659843444824,grad_norm: 0.9227355715301356, iteration: 23884
loss: 1.0381178855895996,grad_norm: 0.999999154144653, iteration: 23885
loss: 0.9948840737342834,grad_norm: 0.9491533621468857, iteration: 23886
loss: 0.9979951977729797,grad_norm: 0.9663873004667378, iteration: 23887
loss: 0.958008885383606,grad_norm: 0.9356274282051921, iteration: 23888
loss: 0.9899070858955383,grad_norm: 0.9387688979952566, iteration: 23889
loss: 0.982655942440033,grad_norm: 0.9999990223903066, iteration: 23890
loss: 1.0687892436981201,grad_norm: 0.9680520402299755, iteration: 23891
loss: 0.9863525629043579,grad_norm: 0.999999166430039, iteration: 23892
loss: 1.0056893825531006,grad_norm: 0.9999997484392643, iteration: 23893
loss: 0.9851082563400269,grad_norm: 0.999999057101748, iteration: 23894
loss: 1.0247480869293213,grad_norm: 0.9999990868367807, iteration: 23895
loss: 1.0070207118988037,grad_norm: 0.9103681044383574, iteration: 23896
loss: 1.0110174417495728,grad_norm: 0.9999990042641073, iteration: 23897
loss: 1.0229897499084473,grad_norm: 0.9999992680604026, iteration: 23898
loss: 0.976149320602417,grad_norm: 0.8802748611124768, iteration: 23899
loss: 0.9667266011238098,grad_norm: 0.9999990562281278, iteration: 23900
loss: 1.0307161808013916,grad_norm: 0.92283291213319, iteration: 23901
loss: 1.0033549070358276,grad_norm: 0.9092430763349846, iteration: 23902
loss: 1.023256540298462,grad_norm: 0.8823428927219287, iteration: 23903
loss: 1.0308493375778198,grad_norm: 0.9661448199550805, iteration: 23904
loss: 0.991828978061676,grad_norm: 0.8072979144299737, iteration: 23905
loss: 0.9668150544166565,grad_norm: 0.9999991240947889, iteration: 23906
loss: 0.9861488938331604,grad_norm: 0.9999993103438759, iteration: 23907
loss: 0.9894287586212158,grad_norm: 0.9999992010832562, iteration: 23908
loss: 0.9647606015205383,grad_norm: 0.9999990911181884, iteration: 23909
loss: 1.0288703441619873,grad_norm: 0.9453405849040423, iteration: 23910
loss: 1.021549105644226,grad_norm: 0.9999991099636001, iteration: 23911
loss: 1.02847158908844,grad_norm: 0.9458779242763585, iteration: 23912
loss: 1.014925479888916,grad_norm: 0.9662532395259374, iteration: 23913
loss: 1.0110878944396973,grad_norm: 0.9999991174100659, iteration: 23914
loss: 0.9964823722839355,grad_norm: 0.9999992981680951, iteration: 23915
loss: 1.014032006263733,grad_norm: 0.9816759836045277, iteration: 23916
loss: 1.019762396812439,grad_norm: 0.9476004526871369, iteration: 23917
loss: 1.0076193809509277,grad_norm: 0.9319415459117713, iteration: 23918
loss: 1.0182902812957764,grad_norm: 0.9999990881795818, iteration: 23919
loss: 1.0171717405319214,grad_norm: 0.9999990534539127, iteration: 23920
loss: 1.0122876167297363,grad_norm: 0.9999991318132235, iteration: 23921
loss: 0.9991660714149475,grad_norm: 0.8855442327911762, iteration: 23922
loss: 0.9809555411338806,grad_norm: 0.9640616561706378, iteration: 23923
loss: 0.9981264472007751,grad_norm: 0.9999991079172358, iteration: 23924
loss: 1.0153671503067017,grad_norm: 0.9023156039966557, iteration: 23925
loss: 0.9830235838890076,grad_norm: 0.8921177000940972, iteration: 23926
loss: 1.0444860458374023,grad_norm: 0.9999993445306004, iteration: 23927
loss: 1.0269743204116821,grad_norm: 0.9999997041411125, iteration: 23928
loss: 1.0110009908676147,grad_norm: 0.9149627498656838, iteration: 23929
loss: 1.0271896123886108,grad_norm: 0.9572814623986152, iteration: 23930
loss: 1.0525010824203491,grad_norm: 0.9999992437324284, iteration: 23931
loss: 0.9920510053634644,grad_norm: 0.9999992548750318, iteration: 23932
loss: 1.0125316381454468,grad_norm: 0.9999990683199163, iteration: 23933
loss: 1.0169414281845093,grad_norm: 0.9999993329911312, iteration: 23934
loss: 1.0417771339416504,grad_norm: 0.999999249569917, iteration: 23935
loss: 1.0532886981964111,grad_norm: 0.9996853033154196, iteration: 23936
loss: 1.0166445970535278,grad_norm: 0.9386612492968057, iteration: 23937
loss: 1.0151807069778442,grad_norm: 0.9999990577060562, iteration: 23938
loss: 1.0289674997329712,grad_norm: 0.9999996552887882, iteration: 23939
loss: 1.0050982236862183,grad_norm: 0.9999993430825032, iteration: 23940
loss: 1.0256645679473877,grad_norm: 0.9999991759917397, iteration: 23941
loss: 1.017635464668274,grad_norm: 0.9457392830140497, iteration: 23942
loss: 1.0085805654525757,grad_norm: 0.9999992576357637, iteration: 23943
loss: 0.9988319277763367,grad_norm: 0.913924516759483, iteration: 23944
loss: 1.038266897201538,grad_norm: 0.9999990341007183, iteration: 23945
loss: 1.0389000177383423,grad_norm: 0.9999990989702617, iteration: 23946
loss: 1.0434410572052002,grad_norm: 0.999999215600653, iteration: 23947
loss: 1.0503380298614502,grad_norm: 0.999999023460688, iteration: 23948
loss: 1.0343067646026611,grad_norm: 0.9999989937115239, iteration: 23949
loss: 1.0521680116653442,grad_norm: 0.9999992936804392, iteration: 23950
loss: 1.0393143892288208,grad_norm: 0.9777935514606183, iteration: 23951
loss: 1.0008293390274048,grad_norm: 0.9999991125171422, iteration: 23952
loss: 1.0984094142913818,grad_norm: 0.9999990857111959, iteration: 23953
loss: 1.040015697479248,grad_norm: 0.8986792770814406, iteration: 23954
loss: 1.0370620489120483,grad_norm: 0.9406179975279294, iteration: 23955
loss: 0.990348219871521,grad_norm: 0.9907438099404081, iteration: 23956
loss: 1.055915355682373,grad_norm: 0.947753316913337, iteration: 23957
loss: 1.004983901977539,grad_norm: 0.9999992831207456, iteration: 23958
loss: 1.0074050426483154,grad_norm: 0.9247247513816346, iteration: 23959
loss: 0.9847241044044495,grad_norm: 0.8601854747374519, iteration: 23960
loss: 1.00040602684021,grad_norm: 0.969426599567803, iteration: 23961
loss: 0.9993763566017151,grad_norm: 0.9884085321927994, iteration: 23962
loss: 1.0417464971542358,grad_norm: 0.9057298982019976, iteration: 23963
loss: 1.0086637735366821,grad_norm: 0.9023846456076959, iteration: 23964
loss: 1.0211104154586792,grad_norm: 0.9999991144986423, iteration: 23965
loss: 1.0095062255859375,grad_norm: 0.9999996115180629, iteration: 23966
loss: 0.9877716302871704,grad_norm: 0.9999992442423266, iteration: 23967
loss: 1.0081086158752441,grad_norm: 0.9999995474949597, iteration: 23968
loss: 1.0359375476837158,grad_norm: 0.9281240778811223, iteration: 23969
loss: 1.002545714378357,grad_norm: 0.8045491935115295, iteration: 23970
loss: 1.0093175172805786,grad_norm: 0.9999992451502692, iteration: 23971
loss: 1.0266828536987305,grad_norm: 0.9731651764196055, iteration: 23972
loss: 1.030657410621643,grad_norm: 0.9999991737149192, iteration: 23973
loss: 1.0474858283996582,grad_norm: 0.9999991278806195, iteration: 23974
loss: 1.0121713876724243,grad_norm: 0.9999992333071994, iteration: 23975
loss: 1.0155845880508423,grad_norm: 0.9999993233398135, iteration: 23976
loss: 0.9786939024925232,grad_norm: 0.9636450907539235, iteration: 23977
loss: 1.0019428730010986,grad_norm: 0.9217776812850174, iteration: 23978
loss: 0.9580503106117249,grad_norm: 0.9999990546254629, iteration: 23979
loss: 1.0058521032333374,grad_norm: 0.9999990580988993, iteration: 23980
loss: 1.0282717943191528,grad_norm: 0.8706952626071341, iteration: 23981
loss: 1.0243335962295532,grad_norm: 0.9999993897999616, iteration: 23982
loss: 1.0234047174453735,grad_norm: 0.9999989471394622, iteration: 23983
loss: 1.0192887783050537,grad_norm: 0.9999990650651536, iteration: 23984
loss: 1.012518048286438,grad_norm: 0.9999991025160226, iteration: 23985
loss: 1.0583113431930542,grad_norm: 0.9999990897889499, iteration: 23986
loss: 1.0214096307754517,grad_norm: 0.8653577718779196, iteration: 23987
loss: 1.0123978853225708,grad_norm: 0.9999994118527437, iteration: 23988
loss: 0.9882554411888123,grad_norm: 0.9865900981352272, iteration: 23989
loss: 0.9857417941093445,grad_norm: 0.9933807077255971, iteration: 23990
loss: 0.9925729632377625,grad_norm: 0.9999991375834456, iteration: 23991
loss: 1.0535600185394287,grad_norm: 0.9554516442093589, iteration: 23992
loss: 0.9531558752059937,grad_norm: 0.999999136021421, iteration: 23993
loss: 1.0252954959869385,grad_norm: 0.9850068413469574, iteration: 23994
loss: 1.0501936674118042,grad_norm: 0.9999991013150971, iteration: 23995
loss: 1.0205384492874146,grad_norm: 0.8191601396777366, iteration: 23996
loss: 1.0080864429473877,grad_norm: 0.9011302418750221, iteration: 23997
loss: 0.9966386556625366,grad_norm: 0.999999728349145, iteration: 23998
loss: 1.0386184453964233,grad_norm: 0.9999989818365955, iteration: 23999
loss: 0.9724047183990479,grad_norm: 0.9807693978408711, iteration: 24000
loss: 1.0774447917938232,grad_norm: 0.9999994305879895, iteration: 24001
loss: 1.008010745048523,grad_norm: 0.9999991091236671, iteration: 24002
loss: 0.98811936378479,grad_norm: 0.9999990168333857, iteration: 24003
loss: 0.9691242575645447,grad_norm: 0.9999991840101711, iteration: 24004
loss: 1.0113698244094849,grad_norm: 0.9999990706867937, iteration: 24005
loss: 1.0135236978530884,grad_norm: 0.9067391599273174, iteration: 24006
loss: 1.0243357419967651,grad_norm: 0.9999992211756582, iteration: 24007
loss: 1.0089757442474365,grad_norm: 0.9999992338961953, iteration: 24008
loss: 0.9712956547737122,grad_norm: 0.9999989161303189, iteration: 24009
loss: 0.9998587369918823,grad_norm: 0.9999991109074011, iteration: 24010
loss: 1.029402732849121,grad_norm: 0.9999992936755004, iteration: 24011
loss: 1.0077301263809204,grad_norm: 0.9173465204881842, iteration: 24012
loss: 1.0892037153244019,grad_norm: 0.9999995774223739, iteration: 24013
loss: 1.0592472553253174,grad_norm: 0.9999992786203594, iteration: 24014
loss: 0.9864492416381836,grad_norm: 0.9999991724413956, iteration: 24015
loss: 1.0957576036453247,grad_norm: 0.9999996652793842, iteration: 24016
loss: 0.9964122772216797,grad_norm: 0.9999990423061595, iteration: 24017
loss: 0.9798257350921631,grad_norm: 0.9831200573681338, iteration: 24018
loss: 1.007704496383667,grad_norm: 0.999999230780001, iteration: 24019
loss: 0.9803511500358582,grad_norm: 0.9999990624252347, iteration: 24020
loss: 0.9974653124809265,grad_norm: 0.9059668001125716, iteration: 24021
loss: 1.0224862098693848,grad_norm: 0.9999990250072497, iteration: 24022
loss: 1.034179925918579,grad_norm: 0.9999991034053788, iteration: 24023
loss: 1.0294625759124756,grad_norm: 0.9999990981100925, iteration: 24024
loss: 1.0147054195404053,grad_norm: 0.8724946838678072, iteration: 24025
loss: 1.0415805578231812,grad_norm: 0.9887904980197972, iteration: 24026
loss: 1.0331041812896729,grad_norm: 0.9999995762682253, iteration: 24027
loss: 0.9973121881484985,grad_norm: 0.9999997532617508, iteration: 24028
loss: 1.0434318780899048,grad_norm: 0.999999559923613, iteration: 24029
loss: 1.0321699380874634,grad_norm: 0.9999993263536308, iteration: 24030
loss: 1.0595662593841553,grad_norm: 0.9999996331176578, iteration: 24031
loss: 1.0410078763961792,grad_norm: 0.999999074004477, iteration: 24032
loss: 1.0485860109329224,grad_norm: 0.9999998927339232, iteration: 24033
loss: 1.0349321365356445,grad_norm: 0.9999990764249757, iteration: 24034
loss: 1.0706381797790527,grad_norm: 0.9999989953058912, iteration: 24035
loss: 1.0162885189056396,grad_norm: 0.9999990592903821, iteration: 24036
loss: 1.0089515447616577,grad_norm: 0.9999990884408221, iteration: 24037
loss: 1.0298436880111694,grad_norm: 0.8646825508828494, iteration: 24038
loss: 1.0266878604888916,grad_norm: 0.9999990453447377, iteration: 24039
loss: 1.0193076133728027,grad_norm: 0.8649247583358928, iteration: 24040
loss: 1.0241663455963135,grad_norm: 0.9462345378749834, iteration: 24041
loss: 0.9817706942558289,grad_norm: 0.9999990114490205, iteration: 24042
loss: 0.9766474366188049,grad_norm: 0.9999991809125554, iteration: 24043
loss: 1.0101194381713867,grad_norm: 0.9999992231492085, iteration: 24044
loss: 1.0182654857635498,grad_norm: 0.9999991252626699, iteration: 24045
loss: 1.0088410377502441,grad_norm: 0.9999996726049338, iteration: 24046
loss: 1.0207889080047607,grad_norm: 0.9999989886219067, iteration: 24047
loss: 1.003615140914917,grad_norm: 0.9999991209101915, iteration: 24048
loss: 1.0196627378463745,grad_norm: 0.9136368283141836, iteration: 24049
loss: 1.0246795415878296,grad_norm: 0.9999991812711614, iteration: 24050
loss: 1.0069408416748047,grad_norm: 0.9999991042474192, iteration: 24051
loss: 1.0237957239151,grad_norm: 0.9999992932896384, iteration: 24052
loss: 0.9916474223136902,grad_norm: 0.9999991220959518, iteration: 24053
loss: 1.0896340608596802,grad_norm: 0.9999998214170017, iteration: 24054
loss: 1.0378129482269287,grad_norm: 0.9999997763700204, iteration: 24055
loss: 1.0064998865127563,grad_norm: 0.9999992380074707, iteration: 24056
loss: 1.0290930271148682,grad_norm: 0.9999992643661324, iteration: 24057
loss: 1.053911566734314,grad_norm: 0.9885119217813119, iteration: 24058
loss: 1.02630615234375,grad_norm: 0.7942407424161578, iteration: 24059
loss: 1.0011680126190186,grad_norm: 0.9999990677280443, iteration: 24060
loss: 0.9958792328834534,grad_norm: 0.9999989555070115, iteration: 24061
loss: 1.0476621389389038,grad_norm: 0.8647241724167771, iteration: 24062
loss: 1.0576467514038086,grad_norm: 0.9999995374126901, iteration: 24063
loss: 0.9622209668159485,grad_norm: 0.9313183883071439, iteration: 24064
loss: 0.9787718057632446,grad_norm: 0.9999991857882221, iteration: 24065
loss: 1.002220869064331,grad_norm: 0.9999990627509246, iteration: 24066
loss: 1.0438610315322876,grad_norm: 0.982658521923378, iteration: 24067
loss: 0.9820007681846619,grad_norm: 0.9596570113611765, iteration: 24068
loss: 0.9910503029823303,grad_norm: 0.9043043809277501, iteration: 24069
loss: 1.0350569486618042,grad_norm: 0.9999990685322061, iteration: 24070
loss: 0.9944683909416199,grad_norm: 0.9999991334302042, iteration: 24071
loss: 1.0578895807266235,grad_norm: 0.9999990190506213, iteration: 24072
loss: 1.0409537553787231,grad_norm: 0.9801232657387998, iteration: 24073
loss: 1.0289090871810913,grad_norm: 0.8677717980852879, iteration: 24074
loss: 1.0288406610488892,grad_norm: 0.9999991700534909, iteration: 24075
loss: 1.0181950330734253,grad_norm: 0.9999991688602855, iteration: 24076
loss: 1.0154972076416016,grad_norm: 0.9999995681506214, iteration: 24077
loss: 1.0429171323776245,grad_norm: 0.9999993214006766, iteration: 24078
loss: 1.03960382938385,grad_norm: 0.9999991072797579, iteration: 24079
loss: 1.0392788648605347,grad_norm: 0.9527676021594415, iteration: 24080
loss: 1.0172843933105469,grad_norm: 0.9864156293077939, iteration: 24081
loss: 0.9882425665855408,grad_norm: 0.9999992791448317, iteration: 24082
loss: 1.0181894302368164,grad_norm: 0.9269351707163198, iteration: 24083
loss: 1.0140362977981567,grad_norm: 0.8355712200176548, iteration: 24084
loss: 1.0164971351623535,grad_norm: 0.9364188079033651, iteration: 24085
loss: 1.0789631605148315,grad_norm: 0.9999996196533375, iteration: 24086
loss: 1.019323706626892,grad_norm: 0.7690236844884315, iteration: 24087
loss: 0.9902106523513794,grad_norm: 0.9999990123156721, iteration: 24088
loss: 1.0722848176956177,grad_norm: 0.9999996335071105, iteration: 24089
loss: 1.0345357656478882,grad_norm: 0.9999991130124268, iteration: 24090
loss: 1.0170156955718994,grad_norm: 0.9999992592155211, iteration: 24091
loss: 0.9835597276687622,grad_norm: 0.9286495844405676, iteration: 24092
loss: 0.9885814785957336,grad_norm: 0.9999991625881541, iteration: 24093
loss: 1.0185797214508057,grad_norm: 0.846784896882395, iteration: 24094
loss: 1.0362886190414429,grad_norm: 0.9999991074430443, iteration: 24095
loss: 1.0430192947387695,grad_norm: 0.9999990734878159, iteration: 24096
loss: 1.0242332220077515,grad_norm: 0.9999990908625465, iteration: 24097
loss: 0.9595232605934143,grad_norm: 0.999999276321332, iteration: 24098
loss: 1.0321215391159058,grad_norm: 0.9999995088801715, iteration: 24099
loss: 0.9788230061531067,grad_norm: 0.8748246527899931, iteration: 24100
loss: 1.0153889656066895,grad_norm: 0.9243090661924137, iteration: 24101
loss: 1.0305602550506592,grad_norm: 0.978474519250547, iteration: 24102
loss: 1.0253320932388306,grad_norm: 0.9999993738522801, iteration: 24103
loss: 1.0164954662322998,grad_norm: 0.8731185341263361, iteration: 24104
loss: 1.0177947282791138,grad_norm: 0.9999993479749107, iteration: 24105
loss: 1.0259860754013062,grad_norm: 0.9999991000781419, iteration: 24106
loss: 0.9883810877799988,grad_norm: 0.7627594786437291, iteration: 24107
loss: 0.9855256676673889,grad_norm: 0.927989547718536, iteration: 24108
loss: 1.0004432201385498,grad_norm: 0.9275255313916438, iteration: 24109
loss: 1.028537392616272,grad_norm: 0.999999076670978, iteration: 24110
loss: 1.0405397415161133,grad_norm: 0.9999992240772215, iteration: 24111
loss: 1.0183099508285522,grad_norm: 0.999999112939372, iteration: 24112
loss: 1.0376478433609009,grad_norm: 0.9282551302044929, iteration: 24113
loss: 1.0002022981643677,grad_norm: 0.9455687909370267, iteration: 24114
loss: 0.9983375072479248,grad_norm: 0.9999991158725489, iteration: 24115
loss: 1.0047414302825928,grad_norm: 0.956299674180628, iteration: 24116
loss: 1.03501558303833,grad_norm: 0.9923161179536908, iteration: 24117
loss: 0.9763093590736389,grad_norm: 0.9999989868086497, iteration: 24118
loss: 1.0395323038101196,grad_norm: 0.9999990316623233, iteration: 24119
loss: 1.0269882678985596,grad_norm: 0.9999990844088906, iteration: 24120
loss: 1.0107425451278687,grad_norm: 0.888753984981099, iteration: 24121
loss: 1.012197732925415,grad_norm: 0.8913845971247207, iteration: 24122
loss: 1.0077260732650757,grad_norm: 0.9999990587899981, iteration: 24123
loss: 1.0372411012649536,grad_norm: 0.9999990817123378, iteration: 24124
loss: 1.0163309574127197,grad_norm: 0.9097223322908706, iteration: 24125
loss: 1.0242037773132324,grad_norm: 0.9999991011969275, iteration: 24126
loss: 1.0038515329360962,grad_norm: 0.9999992767423389, iteration: 24127
loss: 1.0126408338546753,grad_norm: 0.9955986060691221, iteration: 24128
loss: 0.9904283881187439,grad_norm: 0.8214484010384981, iteration: 24129
loss: 0.9761591553688049,grad_norm: 0.9423184784714387, iteration: 24130
loss: 1.0207457542419434,grad_norm: 0.9999998392576656, iteration: 24131
loss: 1.0102888345718384,grad_norm: 0.9061512430350207, iteration: 24132
loss: 0.9749292731285095,grad_norm: 0.946515206127515, iteration: 24133
loss: 1.0233080387115479,grad_norm: 0.8992089702646253, iteration: 24134
loss: 0.9798306226730347,grad_norm: 0.9999996237262441, iteration: 24135
loss: 1.0203044414520264,grad_norm: 0.9755193748939563, iteration: 24136
loss: 0.9987284541130066,grad_norm: 0.99999926552703, iteration: 24137
loss: 0.9562167525291443,grad_norm: 0.9289713172726232, iteration: 24138
loss: 1.0340670347213745,grad_norm: 0.999999296683679, iteration: 24139
loss: 0.9970862865447998,grad_norm: 0.9999991377816801, iteration: 24140
loss: 1.0185989141464233,grad_norm: 0.9999991285004163, iteration: 24141
loss: 1.024129867553711,grad_norm: 0.9999991942152462, iteration: 24142
loss: 0.9731416702270508,grad_norm: 0.9889833702751712, iteration: 24143
loss: 1.0198919773101807,grad_norm: 0.9999991233315665, iteration: 24144
loss: 0.9734999537467957,grad_norm: 0.9999989750817221, iteration: 24145
loss: 1.0573601722717285,grad_norm: 0.9999998575874357, iteration: 24146
loss: 1.0271967649459839,grad_norm: 0.9999991466192792, iteration: 24147
loss: 1.0000455379486084,grad_norm: 0.9999990905631398, iteration: 24148
loss: 1.0609980821609497,grad_norm: 0.9999993629549468, iteration: 24149
loss: 1.104099988937378,grad_norm: 0.9999995335648295, iteration: 24150
loss: 1.0418472290039062,grad_norm: 0.999999035583274, iteration: 24151
loss: 1.038658857345581,grad_norm: 0.9999997528283652, iteration: 24152
loss: 1.0146616697311401,grad_norm: 0.9999990686422744, iteration: 24153
loss: 1.1053873300552368,grad_norm: 0.9999999382697868, iteration: 24154
loss: 1.0454826354980469,grad_norm: 0.9999993920266923, iteration: 24155
loss: 1.001871109008789,grad_norm: 0.8804721186557113, iteration: 24156
loss: 1.048769474029541,grad_norm: 0.9999996627496127, iteration: 24157
loss: 1.0804576873779297,grad_norm: 0.9999996064935212, iteration: 24158
loss: 1.044498085975647,grad_norm: 0.9259727353893427, iteration: 24159
loss: 1.0012757778167725,grad_norm: 0.9023024005385284, iteration: 24160
loss: 1.01852548122406,grad_norm: 0.9999990344451096, iteration: 24161
loss: 1.0042589902877808,grad_norm: 0.9391178645564732, iteration: 24162
loss: 1.0332072973251343,grad_norm: 0.999999601298534, iteration: 24163
loss: 0.9877249598503113,grad_norm: 0.9999994821529783, iteration: 24164
loss: 0.9877570867538452,grad_norm: 0.9999994006821376, iteration: 24165
loss: 0.9903380274772644,grad_norm: 0.9999991955342022, iteration: 24166
loss: 0.9466605186462402,grad_norm: 0.9682650258076351, iteration: 24167
loss: 1.0157530307769775,grad_norm: 0.9999990495857104, iteration: 24168
loss: 1.0699933767318726,grad_norm: 0.9999998877890962, iteration: 24169
loss: 1.0173026323318481,grad_norm: 0.9441595105649201, iteration: 24170
loss: 0.9979586601257324,grad_norm: 0.9999992079880019, iteration: 24171
loss: 0.9743872880935669,grad_norm: 0.9999990664491382, iteration: 24172
loss: 1.034072995185852,grad_norm: 0.9792160995072445, iteration: 24173
loss: 0.9807955026626587,grad_norm: 0.999999871958331, iteration: 24174
loss: 1.014321208000183,grad_norm: 0.9999996955136243, iteration: 24175
loss: 1.0075079202651978,grad_norm: 0.9874235095779526, iteration: 24176
loss: 1.0305148363113403,grad_norm: 0.9999990770513525, iteration: 24177
loss: 1.0170063972473145,grad_norm: 0.9999992526126203, iteration: 24178
loss: 1.0004116296768188,grad_norm: 0.9999990900409348, iteration: 24179
loss: 1.0605682134628296,grad_norm: 0.9593313161985223, iteration: 24180
loss: 1.0135761499404907,grad_norm: 0.9999992509980751, iteration: 24181
loss: 1.0006622076034546,grad_norm: 0.9086183874938355, iteration: 24182
loss: 0.9758972525596619,grad_norm: 0.8786043027148108, iteration: 24183
loss: 0.9686737060546875,grad_norm: 0.999999124925795, iteration: 24184
loss: 1.028649091720581,grad_norm: 0.9999992091284384, iteration: 24185
loss: 1.0126149654388428,grad_norm: 0.9715310358877172, iteration: 24186
loss: 1.0159835815429688,grad_norm: 0.8281652554108685, iteration: 24187
loss: 1.015592336654663,grad_norm: 0.7874454203945409, iteration: 24188
loss: 1.0060603618621826,grad_norm: 0.9999990824660643, iteration: 24189
loss: 1.0011622905731201,grad_norm: 0.8872915718655797, iteration: 24190
loss: 0.9834499955177307,grad_norm: 0.9999991447479641, iteration: 24191
loss: 0.9724867939949036,grad_norm: 0.9968306775278456, iteration: 24192
loss: 1.0113720893859863,grad_norm: 0.9999990226959498, iteration: 24193
loss: 1.034295916557312,grad_norm: 0.9999992442098499, iteration: 24194
loss: 1.0106998682022095,grad_norm: 0.9683330303740088, iteration: 24195
loss: 1.0500481128692627,grad_norm: 0.9999989397649832, iteration: 24196
loss: 0.9944536089897156,grad_norm: 0.9305450611503087, iteration: 24197
loss: 1.0057975053787231,grad_norm: 0.9773544512450738, iteration: 24198
loss: 1.0075596570968628,grad_norm: 0.9999991554051085, iteration: 24199
loss: 1.0193840265274048,grad_norm: 0.9999991295999408, iteration: 24200
loss: 1.059348225593567,grad_norm: 0.9999993130928747, iteration: 24201
loss: 0.9721371531486511,grad_norm: 0.9999990577458373, iteration: 24202
loss: 1.0218628644943237,grad_norm: 0.9999991398641572, iteration: 24203
loss: 1.039842963218689,grad_norm: 0.9999990831939953, iteration: 24204
loss: 1.0150188207626343,grad_norm: 0.9172592528426464, iteration: 24205
loss: 1.023405909538269,grad_norm: 0.9764515285578479, iteration: 24206
loss: 1.011064052581787,grad_norm: 0.9999990375185118, iteration: 24207
loss: 1.0356801748275757,grad_norm: 0.9999991128152634, iteration: 24208
loss: 1.0017540454864502,grad_norm: 0.9999991040184051, iteration: 24209
loss: 0.9993939995765686,grad_norm: 0.9632373274335072, iteration: 24210
loss: 0.9816203713417053,grad_norm: 0.9999991491174898, iteration: 24211
loss: 1.0140306949615479,grad_norm: 0.9999989729693088, iteration: 24212
loss: 1.0044604539871216,grad_norm: 0.9999991789245887, iteration: 24213
loss: 0.9974046945571899,grad_norm: 0.8524973978616838, iteration: 24214
loss: 1.021179437637329,grad_norm: 0.9766340980232164, iteration: 24215
loss: 1.0172538757324219,grad_norm: 0.9999992943093554, iteration: 24216
loss: 1.0302996635437012,grad_norm: 0.8591362324804415, iteration: 24217
loss: 0.9977902770042419,grad_norm: 0.9999990797389238, iteration: 24218
loss: 1.0146269798278809,grad_norm: 0.9241202260685316, iteration: 24219
loss: 1.0284515619277954,grad_norm: 0.998337209161238, iteration: 24220
loss: 1.0299015045166016,grad_norm: 0.9999996835249139, iteration: 24221
loss: 0.9762862920761108,grad_norm: 0.9999990488100564, iteration: 24222
loss: 1.0011392831802368,grad_norm: 0.999999124663468, iteration: 24223
loss: 1.0123648643493652,grad_norm: 0.9999990990512457, iteration: 24224
loss: 0.9925108551979065,grad_norm: 0.8679603749225414, iteration: 24225
loss: 1.0164546966552734,grad_norm: 0.9999990954557514, iteration: 24226
loss: 0.9934431910514832,grad_norm: 0.9999992325621578, iteration: 24227
loss: 1.0802373886108398,grad_norm: 0.9999994138596849, iteration: 24228
loss: 1.0366894006729126,grad_norm: 0.9999991188431017, iteration: 24229
loss: 1.0144706964492798,grad_norm: 0.848296501602494, iteration: 24230
loss: 1.0040926933288574,grad_norm: 0.9082450073242845, iteration: 24231
loss: 0.9894666075706482,grad_norm: 0.9504572012947847, iteration: 24232
loss: 1.0096096992492676,grad_norm: 0.999999062242093, iteration: 24233
loss: 1.0111396312713623,grad_norm: 0.999999021301885, iteration: 24234
loss: 0.9469140768051147,grad_norm: 0.8859586313649404, iteration: 24235
loss: 1.0434281826019287,grad_norm: 0.9999997676964427, iteration: 24236
loss: 0.9782167673110962,grad_norm: 0.9241338519187056, iteration: 24237
loss: 1.0335909128189087,grad_norm: 0.9999990097070505, iteration: 24238
loss: 0.97784024477005,grad_norm: 0.999998947285544, iteration: 24239
loss: 1.021303653717041,grad_norm: 0.9438563343790967, iteration: 24240
loss: 1.0197769403457642,grad_norm: 0.9925356063178846, iteration: 24241
loss: 0.9799031615257263,grad_norm: 0.9170650686213927, iteration: 24242
loss: 1.0093209743499756,grad_norm: 0.9572735141666517, iteration: 24243
loss: 1.0182781219482422,grad_norm: 0.9999993274199814, iteration: 24244
loss: 1.0163447856903076,grad_norm: 0.7963377849930401, iteration: 24245
loss: 0.9866247177124023,grad_norm: 0.9974990216071774, iteration: 24246
loss: 0.959754228591919,grad_norm: 0.9999995043145331, iteration: 24247
loss: 1.0307695865631104,grad_norm: 0.9999990358921775, iteration: 24248
loss: 1.0105665922164917,grad_norm: 0.9041716759966926, iteration: 24249
loss: 1.0557984113693237,grad_norm: 0.9374125773449499, iteration: 24250
loss: 0.9947696328163147,grad_norm: 0.9999992053693153, iteration: 24251
loss: 0.9817207455635071,grad_norm: 0.893402168597998, iteration: 24252
loss: 0.9677808880805969,grad_norm: 0.9838095402068574, iteration: 24253
loss: 0.9860566854476929,grad_norm: 0.9999990732830353, iteration: 24254
loss: 0.9820702075958252,grad_norm: 0.9999990935092977, iteration: 24255
loss: 0.986073911190033,grad_norm: 0.9999992130113354, iteration: 24256
loss: 0.9965120553970337,grad_norm: 0.9999990996464055, iteration: 24257
loss: 1.0145580768585205,grad_norm: 0.9999991730414867, iteration: 24258
loss: 1.002189040184021,grad_norm: 0.9999990940316693, iteration: 24259
loss: 0.9984868168830872,grad_norm: 0.8512586776433327, iteration: 24260
loss: 0.9723560810089111,grad_norm: 0.9999990366818653, iteration: 24261
loss: 1.0047686100006104,grad_norm: 0.9218815949974727, iteration: 24262
loss: 1.0546011924743652,grad_norm: 0.9999995740530533, iteration: 24263
loss: 0.9811903834342957,grad_norm: 0.9999991055732922, iteration: 24264
loss: 1.0066144466400146,grad_norm: 0.9999990998690328, iteration: 24265
loss: 1.0453749895095825,grad_norm: 0.9999991124490276, iteration: 24266
loss: 1.0578999519348145,grad_norm: 0.999999895913643, iteration: 24267
loss: 0.9846336245536804,grad_norm: 0.9999990449884989, iteration: 24268
loss: 0.947380006313324,grad_norm: 0.9771427037843179, iteration: 24269
loss: 0.9821395874023438,grad_norm: 0.9999994652734631, iteration: 24270
loss: 1.0271133184432983,grad_norm: 0.9999990555040941, iteration: 24271
loss: 1.022479772567749,grad_norm: 0.9999990619138082, iteration: 24272
loss: 0.9801518321037292,grad_norm: 0.8121876694076599, iteration: 24273
loss: 0.9903810024261475,grad_norm: 0.990189113227167, iteration: 24274
loss: 1.0256274938583374,grad_norm: 0.9285246227351263, iteration: 24275
loss: 1.0464884042739868,grad_norm: 0.9392199111650718, iteration: 24276
loss: 0.9902977347373962,grad_norm: 0.9999993609868154, iteration: 24277
loss: 1.0379940271377563,grad_norm: 0.9999991570351894, iteration: 24278
loss: 1.0249580144882202,grad_norm: 0.9095895208112201, iteration: 24279
loss: 0.992591917514801,grad_norm: 0.9999989734552817, iteration: 24280
loss: 1.0496724843978882,grad_norm: 0.9554053835252209, iteration: 24281
loss: 1.016379475593567,grad_norm: 0.9999992577224985, iteration: 24282
loss: 0.9728169441223145,grad_norm: 0.984374961864043, iteration: 24283
loss: 1.0218933820724487,grad_norm: 0.9090030330775725, iteration: 24284
loss: 1.0162475109100342,grad_norm: 0.9999989999692565, iteration: 24285
loss: 1.0202662944793701,grad_norm: 0.9999992040534645, iteration: 24286
loss: 0.9975618720054626,grad_norm: 0.9999992316712581, iteration: 24287
loss: 1.0276942253112793,grad_norm: 0.9934313065420153, iteration: 24288
loss: 1.0362824201583862,grad_norm: 0.9999994417334889, iteration: 24289
loss: 0.9723916053771973,grad_norm: 0.9999994080533134, iteration: 24290
loss: 1.013702154159546,grad_norm: 0.9999992702162942, iteration: 24291
loss: 0.9936280250549316,grad_norm: 0.9999992210546202, iteration: 24292
loss: 0.994656503200531,grad_norm: 0.9999991211022222, iteration: 24293
loss: 0.9691246151924133,grad_norm: 0.9643508026364732, iteration: 24294
loss: 1.0379451513290405,grad_norm: 0.9999992520919221, iteration: 24295
loss: 0.9800052046775818,grad_norm: 0.9999990907589836, iteration: 24296
loss: 1.039352536201477,grad_norm: 0.9779140769170949, iteration: 24297
loss: 1.0444941520690918,grad_norm: 0.9999992012374633, iteration: 24298
loss: 1.0306843519210815,grad_norm: 0.9999992626278512, iteration: 24299
loss: 1.0328905582427979,grad_norm: 0.9173141968148525, iteration: 24300
loss: 1.0216273069381714,grad_norm: 0.9588009792419554, iteration: 24301
loss: 1.0386402606964111,grad_norm: 0.9999991618013054, iteration: 24302
loss: 1.0249931812286377,grad_norm: 0.9999991216096201, iteration: 24303
loss: 1.032601237297058,grad_norm: 0.9999990371263201, iteration: 24304
loss: 1.0079295635223389,grad_norm: 0.8771286488814767, iteration: 24305
loss: 1.0076658725738525,grad_norm: 0.9521386547863949, iteration: 24306
loss: 1.0233452320098877,grad_norm: 0.9999991118442143, iteration: 24307
loss: 1.0016194581985474,grad_norm: 0.8247825110946408, iteration: 24308
loss: 0.9578090906143188,grad_norm: 0.9999991424561849, iteration: 24309
loss: 1.0021418333053589,grad_norm: 0.9999991089979641, iteration: 24310
loss: 1.042171597480774,grad_norm: 0.9999991582767782, iteration: 24311
loss: 1.0098187923431396,grad_norm: 0.9999991159423552, iteration: 24312
loss: 1.008277416229248,grad_norm: 0.8424941331534621, iteration: 24313
loss: 0.9978949427604675,grad_norm: 0.9999991080174668, iteration: 24314
loss: 1.0212459564208984,grad_norm: 0.9999997202881254, iteration: 24315
loss: 1.017732858657837,grad_norm: 0.8561291195667446, iteration: 24316
loss: 1.0789999961853027,grad_norm: 0.9999993333430989, iteration: 24317
loss: 0.9654075503349304,grad_norm: 0.9869349316662389, iteration: 24318
loss: 1.007013201713562,grad_norm: 0.9999995189436546, iteration: 24319
loss: 0.9907137155532837,grad_norm: 0.9999989938847181, iteration: 24320
loss: 0.9896417856216431,grad_norm: 0.9977185338143905, iteration: 24321
loss: 1.0015811920166016,grad_norm: 0.9999992248727456, iteration: 24322
loss: 0.9467697143554688,grad_norm: 0.9999990722040395, iteration: 24323
loss: 1.023492455482483,grad_norm: 0.9999994418434988, iteration: 24324
loss: 0.9923985004425049,grad_norm: 0.8556342427363577, iteration: 24325
loss: 0.9885421991348267,grad_norm: 0.9999993757800796, iteration: 24326
loss: 1.0525060892105103,grad_norm: 0.9999990285124646, iteration: 24327
loss: 1.0117857456207275,grad_norm: 0.9999992997703621, iteration: 24328
loss: 0.9674801230430603,grad_norm: 0.9999990840889347, iteration: 24329
loss: 1.0841789245605469,grad_norm: 0.9999991095582671, iteration: 24330
loss: 0.9915149807929993,grad_norm: 0.9999991739560318, iteration: 24331
loss: 1.0651799440383911,grad_norm: 0.9999995053725126, iteration: 24332
loss: 1.0635889768600464,grad_norm: 0.999999330230292, iteration: 24333
loss: 1.002793788909912,grad_norm: 0.9999993388384883, iteration: 24334
loss: 1.0469870567321777,grad_norm: 0.9999995719505168, iteration: 24335
loss: 0.9730971455574036,grad_norm: 0.9999991627576895, iteration: 24336
loss: 1.0035744905471802,grad_norm: 0.9388622311226628, iteration: 24337
loss: 1.025776743888855,grad_norm: 0.9999996665859691, iteration: 24338
loss: 0.9799039959907532,grad_norm: 0.9999993610419289, iteration: 24339
loss: 1.000012993812561,grad_norm: 0.921408585747315, iteration: 24340
loss: 1.0346864461898804,grad_norm: 0.9999990796757535, iteration: 24341
loss: 1.050647258758545,grad_norm: 0.9999993204974263, iteration: 24342
loss: 1.024625539779663,grad_norm: 0.9559577307761681, iteration: 24343
loss: 1.0114502906799316,grad_norm: 0.9999990931259581, iteration: 24344
loss: 1.0406776666641235,grad_norm: 0.9963534620269957, iteration: 24345
loss: 1.0204920768737793,grad_norm: 0.9999994096834434, iteration: 24346
loss: 0.9816429615020752,grad_norm: 0.999999189644468, iteration: 24347
loss: 1.0098515748977661,grad_norm: 0.987161169909512, iteration: 24348
loss: 1.0072988271713257,grad_norm: 0.9999991304921967, iteration: 24349
loss: 1.0313340425491333,grad_norm: 0.9974618721308707, iteration: 24350
loss: 1.0181503295898438,grad_norm: 0.9999992125051951, iteration: 24351
loss: 1.0154542922973633,grad_norm: 0.9228365459348168, iteration: 24352
loss: 1.028238296508789,grad_norm: 0.9999996659299465, iteration: 24353
loss: 1.0018819570541382,grad_norm: 0.8849304274483083, iteration: 24354
loss: 1.0320738554000854,grad_norm: 0.9999997867848184, iteration: 24355
loss: 1.0464141368865967,grad_norm: 0.9999990715365015, iteration: 24356
loss: 1.0055806636810303,grad_norm: 0.9999995321378853, iteration: 24357
loss: 1.0278996229171753,grad_norm: 0.9999990725176748, iteration: 24358
loss: 1.0011534690856934,grad_norm: 0.9999996354471113, iteration: 24359
loss: 1.0068312883377075,grad_norm: 0.8642824142442689, iteration: 24360
loss: 1.0361930131912231,grad_norm: 0.9999992995787501, iteration: 24361
loss: 1.0038584470748901,grad_norm: 0.8951404126028681, iteration: 24362
loss: 1.016777515411377,grad_norm: 0.9819467389255716, iteration: 24363
loss: 1.0286704301834106,grad_norm: 0.9999995270795676, iteration: 24364
loss: 1.0208542346954346,grad_norm: 0.9999991334276129, iteration: 24365
loss: 1.0223606824874878,grad_norm: 0.948069542684965, iteration: 24366
loss: 0.9795727133750916,grad_norm: 0.9441833973143493, iteration: 24367
loss: 1.0123971700668335,grad_norm: 0.9999991654999567, iteration: 24368
loss: 0.9717816710472107,grad_norm: 0.9189243781178648, iteration: 24369
loss: 1.0257552862167358,grad_norm: 0.9881766036620766, iteration: 24370
loss: 1.0326807498931885,grad_norm: 0.9999994342977149, iteration: 24371
loss: 0.9705806374549866,grad_norm: 0.9642490001462131, iteration: 24372
loss: 1.0305191278457642,grad_norm: 0.9999990736651884, iteration: 24373
loss: 1.024271845817566,grad_norm: 0.999999070737806, iteration: 24374
loss: 1.017210602760315,grad_norm: 0.9999991124955941, iteration: 24375
loss: 1.0462965965270996,grad_norm: 0.891004861592941, iteration: 24376
loss: 0.9826505780220032,grad_norm: 0.9298108120659997, iteration: 24377
loss: 1.0164556503295898,grad_norm: 0.9859181722879085, iteration: 24378
loss: 1.0538073778152466,grad_norm: 0.9999993630285391, iteration: 24379
loss: 1.0535832643508911,grad_norm: 0.9999992424917912, iteration: 24380
loss: 1.0433003902435303,grad_norm: 0.9999991340320425, iteration: 24381
loss: 0.9933118224143982,grad_norm: 0.8026162671332143, iteration: 24382
loss: 1.0767014026641846,grad_norm: 0.999999634161963, iteration: 24383
loss: 1.060190200805664,grad_norm: 0.9999991513899694, iteration: 24384
loss: 1.0009450912475586,grad_norm: 0.8827406356975877, iteration: 24385
loss: 1.0077807903289795,grad_norm: 0.9999989883214193, iteration: 24386
loss: 1.0020073652267456,grad_norm: 0.9182024721161005, iteration: 24387
loss: 1.035764455795288,grad_norm: 0.9999991077992668, iteration: 24388
loss: 1.0479342937469482,grad_norm: 0.9999992502417311, iteration: 24389
loss: 1.0137861967086792,grad_norm: 0.9999992195735182, iteration: 24390
loss: 0.9761841893196106,grad_norm: 0.8785171452383652, iteration: 24391
loss: 0.9887679815292358,grad_norm: 0.9999991198650311, iteration: 24392
loss: 0.988189697265625,grad_norm: 0.9999996004002629, iteration: 24393
loss: 0.9876212477684021,grad_norm: 0.9460770873374844, iteration: 24394
loss: 1.0334047079086304,grad_norm: 0.9999991380384993, iteration: 24395
loss: 1.011682391166687,grad_norm: 0.9436000219463235, iteration: 24396
loss: 1.0113998651504517,grad_norm: 0.9999993820939087, iteration: 24397
loss: 1.0121643543243408,grad_norm: 0.999999098323037, iteration: 24398
loss: 1.0259289741516113,grad_norm: 0.9999992603168226, iteration: 24399
loss: 0.9941763877868652,grad_norm: 0.864170415418904, iteration: 24400
loss: 0.9939315319061279,grad_norm: 0.9999996883278546, iteration: 24401
loss: 1.038142204284668,grad_norm: 0.9999993367406687, iteration: 24402
loss: 0.9883203506469727,grad_norm: 0.9999990248904969, iteration: 24403
loss: 1.0297250747680664,grad_norm: 0.9999991302940836, iteration: 24404
loss: 0.9925358891487122,grad_norm: 0.9999995777388376, iteration: 24405
loss: 1.0183717012405396,grad_norm: 0.8941036684747408, iteration: 24406
loss: 0.9772900938987732,grad_norm: 0.9399515985376804, iteration: 24407
loss: 1.0060677528381348,grad_norm: 0.8847704439330369, iteration: 24408
loss: 1.0325863361358643,grad_norm: 0.8687905254953958, iteration: 24409
loss: 1.0111955404281616,grad_norm: 0.9999989718762176, iteration: 24410
loss: 1.0555200576782227,grad_norm: 0.9999991805278776, iteration: 24411
loss: 0.974666953086853,grad_norm: 0.9999990167236885, iteration: 24412
loss: 1.041429877281189,grad_norm: 0.9999992536493449, iteration: 24413
loss: 1.0366511344909668,grad_norm: 0.9870773417240895, iteration: 24414
loss: 1.0203335285186768,grad_norm: 0.9999991195326017, iteration: 24415
loss: 1.01266348361969,grad_norm: 0.9151258317519129, iteration: 24416
loss: 1.0087281465530396,grad_norm: 0.9841349564181038, iteration: 24417
loss: 1.0936830043792725,grad_norm: 0.9999997692215451, iteration: 24418
loss: 1.0277626514434814,grad_norm: 0.9999991476523575, iteration: 24419
loss: 1.0422779321670532,grad_norm: 0.9910852901951029, iteration: 24420
loss: 1.0341980457305908,grad_norm: 0.9999993559954136, iteration: 24421
loss: 1.028588056564331,grad_norm: 0.9534559177160242, iteration: 24422
loss: 0.9921974539756775,grad_norm: 0.9999991560141833, iteration: 24423
loss: 0.9874814748764038,grad_norm: 0.9982757789442005, iteration: 24424
loss: 1.0203683376312256,grad_norm: 0.999999074137438, iteration: 24425
loss: 1.006726861000061,grad_norm: 0.9821353771688752, iteration: 24426
loss: 0.9846510887145996,grad_norm: 0.9999991590196469, iteration: 24427
loss: 1.014524221420288,grad_norm: 0.9999991699980562, iteration: 24428
loss: 1.02333402633667,grad_norm: 0.9999992425834533, iteration: 24429
loss: 1.1646567583084106,grad_norm: 0.9999999033712677, iteration: 24430
loss: 0.9878602623939514,grad_norm: 0.8845931873847375, iteration: 24431
loss: 0.9568125605583191,grad_norm: 0.9999990479315489, iteration: 24432
loss: 1.0013542175292969,grad_norm: 0.992016810634899, iteration: 24433
loss: 0.9722983241081238,grad_norm: 0.9031538710177175, iteration: 24434
loss: 1.0019512176513672,grad_norm: 0.9999995517129158, iteration: 24435
loss: 1.003701090812683,grad_norm: 0.9999991682053644, iteration: 24436
loss: 1.0269795656204224,grad_norm: 0.9999997325577037, iteration: 24437
loss: 0.9901080131530762,grad_norm: 0.9999992945201022, iteration: 24438
loss: 1.0401886701583862,grad_norm: 0.9292458203634731, iteration: 24439
loss: 1.0314676761627197,grad_norm: 0.7906754401773071, iteration: 24440
loss: 1.0391837358474731,grad_norm: 0.999999720179319, iteration: 24441
loss: 1.0419260263442993,grad_norm: 0.9102796136633003, iteration: 24442
loss: 1.0310499668121338,grad_norm: 0.9999990378281324, iteration: 24443
loss: 1.0579794645309448,grad_norm: 0.9999992322125926, iteration: 24444
loss: 1.0060153007507324,grad_norm: 0.9943517154635113, iteration: 24445
loss: 0.9907158613204956,grad_norm: 0.9999990616111597, iteration: 24446
loss: 0.9900187253952026,grad_norm: 0.8912639581280053, iteration: 24447
loss: 0.9965758919715881,grad_norm: 0.9999990340713852, iteration: 24448
loss: 1.052032470703125,grad_norm: 0.9999995891986017, iteration: 24449
loss: 1.005240559577942,grad_norm: 0.9376532331078407, iteration: 24450
loss: 1.0360702276229858,grad_norm: 0.9999990917149569, iteration: 24451
loss: 1.011009693145752,grad_norm: 0.8528535126898195, iteration: 24452
loss: 1.020631194114685,grad_norm: 0.9999991922511315, iteration: 24453
loss: 0.9697384834289551,grad_norm: 0.9739908103450731, iteration: 24454
loss: 0.9878580570220947,grad_norm: 0.9734454710971049, iteration: 24455
loss: 0.9876660704612732,grad_norm: 0.9235021680852685, iteration: 24456
loss: 1.0471359491348267,grad_norm: 0.9999991799281736, iteration: 24457
loss: 0.9927166700363159,grad_norm: 0.9999990762663781, iteration: 24458
loss: 1.0149133205413818,grad_norm: 0.9999993688951035, iteration: 24459
loss: 1.0206854343414307,grad_norm: 0.9280803752784225, iteration: 24460
loss: 1.0118763446807861,grad_norm: 0.999999223729368, iteration: 24461
loss: 1.0037137269973755,grad_norm: 0.9999992989236723, iteration: 24462
loss: 1.0260151624679565,grad_norm: 0.987442112361537, iteration: 24463
loss: 1.006344199180603,grad_norm: 0.8913426534305336, iteration: 24464
loss: 1.0243186950683594,grad_norm: 0.9999994048603448, iteration: 24465
loss: 1.0369117259979248,grad_norm: 0.9999989834969341, iteration: 24466
loss: 1.0255253314971924,grad_norm: 0.9999998887281315, iteration: 24467
loss: 1.0418415069580078,grad_norm: 0.9999990523133002, iteration: 24468
loss: 1.035354733467102,grad_norm: 0.9806412814918524, iteration: 24469
loss: 0.9599856734275818,grad_norm: 0.9654333619982581, iteration: 24470
loss: 1.0027692317962646,grad_norm: 0.7855634614527999, iteration: 24471
loss: 1.037608027458191,grad_norm: 0.9999992192877972, iteration: 24472
loss: 1.0525363683700562,grad_norm: 0.9719402364695595, iteration: 24473
loss: 1.0459574460983276,grad_norm: 0.9999994557815133, iteration: 24474
loss: 1.0194215774536133,grad_norm: 0.9712951124832989, iteration: 24475
loss: 1.015518069267273,grad_norm: 0.9999991018506919, iteration: 24476
loss: 1.0154662132263184,grad_norm: 0.9999991031012135, iteration: 24477
loss: 1.0082541704177856,grad_norm: 0.9999991472392057, iteration: 24478
loss: 1.0213874578475952,grad_norm: 0.9999993099713893, iteration: 24479
loss: 1.0493512153625488,grad_norm: 0.9999997823399145, iteration: 24480
loss: 1.0235042572021484,grad_norm: 0.9999991535991942, iteration: 24481
loss: 1.0369956493377686,grad_norm: 0.8819508224555723, iteration: 24482
loss: 1.006410837173462,grad_norm: 0.9017318627934021, iteration: 24483
loss: 1.0288017988204956,grad_norm: 0.9373587387003433, iteration: 24484
loss: 1.031172513961792,grad_norm: 0.9999992680960854, iteration: 24485
loss: 0.9793463349342346,grad_norm: 0.9999992110756514, iteration: 24486
loss: 1.0431129932403564,grad_norm: 0.9999992034664748, iteration: 24487
loss: 0.9892524480819702,grad_norm: 0.9039248237504761, iteration: 24488
loss: 1.00447416305542,grad_norm: 0.9999991481819617, iteration: 24489
loss: 1.0188610553741455,grad_norm: 0.9999991078080183, iteration: 24490
loss: 0.9919264316558838,grad_norm: 0.8994785897240142, iteration: 24491
loss: 1.0021618604660034,grad_norm: 0.9526243223781181, iteration: 24492
loss: 0.9992981553077698,grad_norm: 0.9692791798159869, iteration: 24493
loss: 1.0012470483779907,grad_norm: 0.9345714030813655, iteration: 24494
loss: 0.9764165282249451,grad_norm: 0.8755809636719062, iteration: 24495
loss: 1.0435341596603394,grad_norm: 0.999999309425877, iteration: 24496
loss: 1.009947657585144,grad_norm: 0.9999991635371843, iteration: 24497
loss: 0.9927862286567688,grad_norm: 0.999999044717039, iteration: 24498
loss: 1.1730303764343262,grad_norm: 0.999999858776988, iteration: 24499
loss: 1.0817251205444336,grad_norm: 0.9999994494605895, iteration: 24500
loss: 1.0510166883468628,grad_norm: 0.915549654979694, iteration: 24501
loss: 1.018931269645691,grad_norm: 0.8347868397553476, iteration: 24502
loss: 1.0454814434051514,grad_norm: 0.9999991545970716, iteration: 24503
loss: 1.005954384803772,grad_norm: 0.9999991154509917, iteration: 24504
loss: 1.014163851737976,grad_norm: 0.9999991837298541, iteration: 24505
loss: 1.0011107921600342,grad_norm: 0.9343089436541524, iteration: 24506
loss: 1.0036356449127197,grad_norm: 0.9999990566562318, iteration: 24507
loss: 1.008201003074646,grad_norm: 0.9999991622497537, iteration: 24508
loss: 1.003022313117981,grad_norm: 0.9999991823083547, iteration: 24509
loss: 1.0670627355575562,grad_norm: 0.9999997552756201, iteration: 24510
loss: 1.0435631275177002,grad_norm: 0.9999990946934413, iteration: 24511
loss: 1.0296165943145752,grad_norm: 0.9999990400603416, iteration: 24512
loss: 0.9816362857818604,grad_norm: 0.9999990362318136, iteration: 24513
loss: 1.025147557258606,grad_norm: 0.856519594684489, iteration: 24514
loss: 0.9778012037277222,grad_norm: 0.8385549020627657, iteration: 24515
loss: 1.0621848106384277,grad_norm: 0.9771741650610984, iteration: 24516
loss: 0.99088054895401,grad_norm: 0.9999990524027593, iteration: 24517
loss: 0.9875710606575012,grad_norm: 0.9939376054951328, iteration: 24518
loss: 1.0171104669570923,grad_norm: 0.9999992397909349, iteration: 24519
loss: 1.0605971813201904,grad_norm: 0.999999260365994, iteration: 24520
loss: 1.0111333131790161,grad_norm: 0.9999990509504784, iteration: 24521
loss: 1.0391261577606201,grad_norm: 0.8348902352504294, iteration: 24522
loss: 1.0222574472427368,grad_norm: 0.9999990736628743, iteration: 24523
loss: 1.055045485496521,grad_norm: 0.9999994798973632, iteration: 24524
loss: 0.992087721824646,grad_norm: 0.9968616570869631, iteration: 24525
loss: 1.0062811374664307,grad_norm: 0.9999994005599089, iteration: 24526
loss: 1.0097196102142334,grad_norm: 0.9211967126408745, iteration: 24527
loss: 1.005022406578064,grad_norm: 0.7904279272407146, iteration: 24528
loss: 0.9749423265457153,grad_norm: 0.9999989981199201, iteration: 24529
loss: 1.0656330585479736,grad_norm: 0.999999234296789, iteration: 24530
loss: 1.0019317865371704,grad_norm: 0.9317048675675041, iteration: 24531
loss: 1.021058440208435,grad_norm: 0.884408474513373, iteration: 24532
loss: 1.0308034420013428,grad_norm: 0.9442898821350747, iteration: 24533
loss: 0.9983586072921753,grad_norm: 0.8246829044695361, iteration: 24534
loss: 0.9707371592521667,grad_norm: 0.9999990568914802, iteration: 24535
loss: 1.0038988590240479,grad_norm: 0.9999991773410309, iteration: 24536
loss: 1.005142331123352,grad_norm: 0.9999990264672761, iteration: 24537
loss: 1.0374506711959839,grad_norm: 0.999999552526486, iteration: 24538
loss: 0.969275712966919,grad_norm: 0.9612675079751322, iteration: 24539
loss: 1.025982141494751,grad_norm: 0.8737718518471882, iteration: 24540
loss: 0.9543875455856323,grad_norm: 0.999999143384796, iteration: 24541
loss: 0.9976695775985718,grad_norm: 0.999999162602933, iteration: 24542
loss: 1.0526609420776367,grad_norm: 0.8688238896147952, iteration: 24543
loss: 1.0283757448196411,grad_norm: 0.9999990256753362, iteration: 24544
loss: 0.999431312084198,grad_norm: 0.9999993664141602, iteration: 24545
loss: 1.0106303691864014,grad_norm: 0.9999990992110981, iteration: 24546
loss: 1.0288212299346924,grad_norm: 0.9999992693970886, iteration: 24547
loss: 0.9619088768959045,grad_norm: 0.9999990474789963, iteration: 24548
loss: 0.996708869934082,grad_norm: 0.9999992508847627, iteration: 24549
loss: 1.0092148780822754,grad_norm: 0.9999997428503665, iteration: 24550
loss: 1.0203262567520142,grad_norm: 0.9999992314607212, iteration: 24551
loss: 1.0308171510696411,grad_norm: 0.969590138676346, iteration: 24552
loss: 0.9740737676620483,grad_norm: 0.9999994740935282, iteration: 24553
loss: 0.9996102452278137,grad_norm: 0.88162398765488, iteration: 24554
loss: 0.9994871020317078,grad_norm: 0.9697603773350426, iteration: 24555
loss: 1.0292540788650513,grad_norm: 0.9210374836532729, iteration: 24556
loss: 1.0521169900894165,grad_norm: 0.9999991091002968, iteration: 24557
loss: 0.9724192023277283,grad_norm: 0.999999005738748, iteration: 24558
loss: 1.0198261737823486,grad_norm: 0.999349700238245, iteration: 24559
loss: 1.0564302206039429,grad_norm: 0.9999997591304326, iteration: 24560
loss: 1.0119539499282837,grad_norm: 0.9900086523985203, iteration: 24561
loss: 0.9971795082092285,grad_norm: 0.9999990890625704, iteration: 24562
loss: 1.003637433052063,grad_norm: 0.9887972496601805, iteration: 24563
loss: 1.0414642095565796,grad_norm: 0.944594857803217, iteration: 24564
loss: 1.039098858833313,grad_norm: 0.9999990710638226, iteration: 24565
loss: 0.9908199906349182,grad_norm: 0.8905494839538394, iteration: 24566
loss: 0.98813396692276,grad_norm: 0.9999990492966357, iteration: 24567
loss: 1.0420591831207275,grad_norm: 0.9999993757869018, iteration: 24568
loss: 1.0003547668457031,grad_norm: 0.9999990742308847, iteration: 24569
loss: 1.0439645051956177,grad_norm: 0.9999990710243608, iteration: 24570
loss: 0.9940469861030579,grad_norm: 0.9999990900001966, iteration: 24571
loss: 1.0010075569152832,grad_norm: 0.999999621608174, iteration: 24572
loss: 1.0191138982772827,grad_norm: 0.9999993608701464, iteration: 24573
loss: 1.0018219947814941,grad_norm: 0.9999991826698487, iteration: 24574
loss: 1.0394278764724731,grad_norm: 0.9999993272935307, iteration: 24575
loss: 1.0138858556747437,grad_norm: 0.9999991085649418, iteration: 24576
loss: 1.024027705192566,grad_norm: 0.9999997469424446, iteration: 24577
loss: 1.0315051078796387,grad_norm: 0.9607888886036091, iteration: 24578
loss: 1.0064160823822021,grad_norm: 0.9999990896494564, iteration: 24579
loss: 1.0135539770126343,grad_norm: 0.999999050693589, iteration: 24580
loss: 1.0143578052520752,grad_norm: 0.9999992362517344, iteration: 24581
loss: 0.9700524210929871,grad_norm: 0.9174102650319081, iteration: 24582
loss: 1.0201137065887451,grad_norm: 0.999999034402188, iteration: 24583
loss: 1.0043812990188599,grad_norm: 0.9999990912422163, iteration: 24584
loss: 1.009939432144165,grad_norm: 0.999999183629553, iteration: 24585
loss: 1.0122309923171997,grad_norm: 0.9622837250159091, iteration: 24586
loss: 0.9859145283699036,grad_norm: 0.9646220079862234, iteration: 24587
loss: 1.0056126117706299,grad_norm: 0.9999991876580467, iteration: 24588
loss: 1.0904372930526733,grad_norm: 0.9999995049957359, iteration: 24589
loss: 1.0105682611465454,grad_norm: 0.9999991714248366, iteration: 24590
loss: 0.9721839427947998,grad_norm: 0.9999997336274874, iteration: 24591
loss: 1.0092254877090454,grad_norm: 0.9999990483676126, iteration: 24592
loss: 1.068955659866333,grad_norm: 0.9699898418115678, iteration: 24593
loss: 1.0312938690185547,grad_norm: 0.9999996283639195, iteration: 24594
loss: 0.990455150604248,grad_norm: 0.8350866488574755, iteration: 24595
loss: 1.0367223024368286,grad_norm: 0.9999997608396323, iteration: 24596
loss: 1.0170923471450806,grad_norm: 0.9999991781810564, iteration: 24597
loss: 0.9467053413391113,grad_norm: 0.9600550390523552, iteration: 24598
loss: 1.0366630554199219,grad_norm: 0.9999995258177101, iteration: 24599
loss: 1.0654079914093018,grad_norm: 0.9999992530006188, iteration: 24600
loss: 1.0513098239898682,grad_norm: 0.999999356317418, iteration: 24601
loss: 0.9874790906906128,grad_norm: 0.999999139105806, iteration: 24602
loss: 1.0601990222930908,grad_norm: 0.9999991144552577, iteration: 24603
loss: 1.0318630933761597,grad_norm: 0.9613196154135442, iteration: 24604
loss: 0.9969192147254944,grad_norm: 0.9999997113771312, iteration: 24605
loss: 1.0463716983795166,grad_norm: 0.9999995093867637, iteration: 24606
loss: 1.0137605667114258,grad_norm: 0.9999990402422133, iteration: 24607
loss: 1.1229839324951172,grad_norm: 0.9999993745151967, iteration: 24608
loss: 1.0178806781768799,grad_norm: 0.8925565893021629, iteration: 24609
loss: 1.07894766330719,grad_norm: 0.9999998350221775, iteration: 24610
loss: 0.9935393929481506,grad_norm: 0.9999991629307537, iteration: 24611
loss: 1.0185892581939697,grad_norm: 0.9999992891268044, iteration: 24612
loss: 1.0818225145339966,grad_norm: 0.9999995081596327, iteration: 24613
loss: 0.9919641017913818,grad_norm: 0.9563882422355376, iteration: 24614
loss: 1.0127677917480469,grad_norm: 0.9999993411951483, iteration: 24615
loss: 1.0043915510177612,grad_norm: 0.995386593331022, iteration: 24616
loss: 1.033918023109436,grad_norm: 0.9999993009220226, iteration: 24617
loss: 0.9903113842010498,grad_norm: 0.9999990955021413, iteration: 24618
loss: 1.0348402261734009,grad_norm: 0.9768865360511957, iteration: 24619
loss: 0.9907777905464172,grad_norm: 0.9458986019547109, iteration: 24620
loss: 1.0141558647155762,grad_norm: 0.8501358470484759, iteration: 24621
loss: 0.9800567030906677,grad_norm: 0.9999995565824751, iteration: 24622
loss: 0.9806900024414062,grad_norm: 0.9999990920054036, iteration: 24623
loss: 1.0027427673339844,grad_norm: 0.9821836494524022, iteration: 24624
loss: 0.9828781485557556,grad_norm: 0.9999990561186605, iteration: 24625
loss: 1.038090705871582,grad_norm: 0.9999995090472162, iteration: 24626
loss: 1.03152334690094,grad_norm: 0.999999127999855, iteration: 24627
loss: 1.0378085374832153,grad_norm: 0.9813981984304443, iteration: 24628
loss: 1.0186153650283813,grad_norm: 0.9999991059204538, iteration: 24629
loss: 0.9936245679855347,grad_norm: 0.9999991762304585, iteration: 24630
loss: 1.0192385911941528,grad_norm: 0.9999993588760329, iteration: 24631
loss: 1.0237212181091309,grad_norm: 0.9999993908599563, iteration: 24632
loss: 1.0032724142074585,grad_norm: 0.9999993584342719, iteration: 24633
loss: 0.9866724610328674,grad_norm: 0.9337150131784289, iteration: 24634
loss: 1.0039210319519043,grad_norm: 0.9999991875773612, iteration: 24635
loss: 1.0083565711975098,grad_norm: 0.9999991323031959, iteration: 24636
loss: 0.9558032155036926,grad_norm: 0.9999992296179637, iteration: 24637
loss: 0.9669778943061829,grad_norm: 0.9999991710602978, iteration: 24638
loss: 1.01243257522583,grad_norm: 0.9999991614977631, iteration: 24639
loss: 0.9652029871940613,grad_norm: 0.9999991627715985, iteration: 24640
loss: 1.0469235181808472,grad_norm: 0.999999170527762, iteration: 24641
loss: 1.016996145248413,grad_norm: 0.9564131189868714, iteration: 24642
loss: 1.022632360458374,grad_norm: 0.9999990211905515, iteration: 24643
loss: 1.0212072134017944,grad_norm: 0.9131891908195875, iteration: 24644
loss: 1.0522180795669556,grad_norm: 0.9999992604233777, iteration: 24645
loss: 1.0356602668762207,grad_norm: 0.9125962177762984, iteration: 24646
loss: 0.9890271425247192,grad_norm: 0.9999990960035963, iteration: 24647
loss: 1.0052504539489746,grad_norm: 0.9333518898216012, iteration: 24648
loss: 1.031076431274414,grad_norm: 0.9555479137330734, iteration: 24649
loss: 0.9906651377677917,grad_norm: 0.9999991500524632, iteration: 24650
loss: 1.047080397605896,grad_norm: 0.9999998323698037, iteration: 24651
loss: 1.0196404457092285,grad_norm: 0.9999991663296935, iteration: 24652
loss: 0.9983808994293213,grad_norm: 0.871196802988317, iteration: 24653
loss: 1.0776779651641846,grad_norm: 0.9999994717907158, iteration: 24654
loss: 1.0232785940170288,grad_norm: 0.9021466717953079, iteration: 24655
loss: 0.976467490196228,grad_norm: 0.8293757705439613, iteration: 24656
loss: 0.9860932230949402,grad_norm: 0.7874467926182471, iteration: 24657
loss: 1.0423083305358887,grad_norm: 0.9020143422662812, iteration: 24658
loss: 0.9867039918899536,grad_norm: 0.9999991235676652, iteration: 24659
loss: 0.9963976740837097,grad_norm: 0.8699920895971193, iteration: 24660
loss: 0.9647188782691956,grad_norm: 0.9999991244525542, iteration: 24661
loss: 0.9785352945327759,grad_norm: 0.9999993318462781, iteration: 24662
loss: 0.9863013625144958,grad_norm: 0.9999991768648487, iteration: 24663
loss: 1.0282797813415527,grad_norm: 0.8888168677719309, iteration: 24664
loss: 0.9661495089530945,grad_norm: 0.9638072942860786, iteration: 24665
loss: 1.0020169019699097,grad_norm: 0.9486450987459957, iteration: 24666
loss: 0.9944311380386353,grad_norm: 0.9999989888365229, iteration: 24667
loss: 1.0742554664611816,grad_norm: 0.9999993675744516, iteration: 24668
loss: 1.0666725635528564,grad_norm: 0.9999993691671079, iteration: 24669
loss: 1.0285342931747437,grad_norm: 0.9658051625005911, iteration: 24670
loss: 0.980546772480011,grad_norm: 0.9857136007755047, iteration: 24671
loss: 1.0057497024536133,grad_norm: 0.9999989889374143, iteration: 24672
loss: 1.0837926864624023,grad_norm: 0.9999989998433176, iteration: 24673
loss: 1.0497686862945557,grad_norm: 0.8868374077761143, iteration: 24674
loss: 1.0135031938552856,grad_norm: 0.99999969443422, iteration: 24675
loss: 0.984933078289032,grad_norm: 0.9999990518824797, iteration: 24676
loss: 1.0152511596679688,grad_norm: 0.9611304440558142, iteration: 24677
loss: 1.0226030349731445,grad_norm: 0.995263572984804, iteration: 24678
loss: 1.0082619190216064,grad_norm: 0.7617623124359993, iteration: 24679
loss: 1.0037126541137695,grad_norm: 0.999999095648772, iteration: 24680
loss: 1.0218712091445923,grad_norm: 0.9370983274127672, iteration: 24681
loss: 1.0043373107910156,grad_norm: 0.9999991192748442, iteration: 24682
loss: 0.989808201789856,grad_norm: 0.9992782626722522, iteration: 24683
loss: 1.0283677577972412,grad_norm: 0.9355565515907126, iteration: 24684
loss: 1.0134161710739136,grad_norm: 0.9925885308660841, iteration: 24685
loss: 1.0076372623443604,grad_norm: 0.9999995806091538, iteration: 24686
loss: 1.0121731758117676,grad_norm: 0.9999992520562774, iteration: 24687
loss: 1.016883134841919,grad_norm: 0.9999992124041904, iteration: 24688
loss: 0.9750624299049377,grad_norm: 0.7805124258891661, iteration: 24689
loss: 1.0093940496444702,grad_norm: 0.9999990086341831, iteration: 24690
loss: 0.9791098237037659,grad_norm: 0.9999991874913927, iteration: 24691
loss: 0.9829956889152527,grad_norm: 0.9999991475861776, iteration: 24692
loss: 1.005730390548706,grad_norm: 0.9935768674237068, iteration: 24693
loss: 1.006004810333252,grad_norm: 0.9999992197587826, iteration: 24694
loss: 1.0030380487442017,grad_norm: 0.8171758244415704, iteration: 24695
loss: 0.9859548807144165,grad_norm: 0.9823062491829323, iteration: 24696
loss: 1.0104492902755737,grad_norm: 0.9999991553125235, iteration: 24697
loss: 1.0125585794448853,grad_norm: 0.884198552704323, iteration: 24698
loss: 1.0604352951049805,grad_norm: 0.9525156508405367, iteration: 24699
loss: 0.979010820388794,grad_norm: 0.959969952096862, iteration: 24700
loss: 0.9945361018180847,grad_norm: 0.9706072771505571, iteration: 24701
loss: 0.9757724404335022,grad_norm: 0.9759529814519388, iteration: 24702
loss: 0.9907461404800415,grad_norm: 0.9999991177328533, iteration: 24703
loss: 0.9884821176528931,grad_norm: 0.9999990288673591, iteration: 24704
loss: 0.967304527759552,grad_norm: 0.7692574064076062, iteration: 24705
loss: 1.0296573638916016,grad_norm: 0.9999992467496495, iteration: 24706
loss: 0.9881252646446228,grad_norm: 0.9999990012421015, iteration: 24707
loss: 0.9931880831718445,grad_norm: 0.9999990309262797, iteration: 24708
loss: 1.0423336029052734,grad_norm: 0.999999160236898, iteration: 24709
loss: 1.0012810230255127,grad_norm: 0.9844882145732636, iteration: 24710
loss: 1.0417978763580322,grad_norm: 0.9887383411132379, iteration: 24711
loss: 1.0688533782958984,grad_norm: 0.9999991884331612, iteration: 24712
loss: 1.025985598564148,grad_norm: 0.9999990062655253, iteration: 24713
loss: 1.0119811296463013,grad_norm: 0.9999991679617247, iteration: 24714
loss: 1.0165284872055054,grad_norm: 0.9999992941982483, iteration: 24715
loss: 1.0311546325683594,grad_norm: 0.955044823789464, iteration: 24716
loss: 1.0056111812591553,grad_norm: 0.9999991669175416, iteration: 24717
loss: 1.0370311737060547,grad_norm: 0.9999991363644501, iteration: 24718
loss: 1.004509449005127,grad_norm: 0.9656659299848314, iteration: 24719
loss: 1.0262017250061035,grad_norm: 0.9589743258018403, iteration: 24720
loss: 1.009076714515686,grad_norm: 0.9999993224359647, iteration: 24721
loss: 1.0193158388137817,grad_norm: 0.9999990509403133, iteration: 24722
loss: 0.9910029172897339,grad_norm: 0.932651792215843, iteration: 24723
loss: 0.9657675623893738,grad_norm: 0.9304783175408943, iteration: 24724
loss: 1.0103157758712769,grad_norm: 0.999999485103105, iteration: 24725
loss: 1.0326489210128784,grad_norm: 0.9007273334633181, iteration: 24726
loss: 1.0117180347442627,grad_norm: 0.9999990732224507, iteration: 24727
loss: 1.0195494890213013,grad_norm: 0.8558721621135787, iteration: 24728
loss: 0.9959189891815186,grad_norm: 0.999999362386772, iteration: 24729
loss: 0.9483464360237122,grad_norm: 0.8856031899741287, iteration: 24730
loss: 1.0182818174362183,grad_norm: 0.9999991309911996, iteration: 24731
loss: 1.028003215789795,grad_norm: 0.9999992063416213, iteration: 24732
loss: 0.9880185723304749,grad_norm: 0.9582365248531434, iteration: 24733
loss: 0.9727333188056946,grad_norm: 0.9999992909295387, iteration: 24734
loss: 1.0202418565750122,grad_norm: 0.9999991886547078, iteration: 24735
loss: 1.0236989259719849,grad_norm: 0.9063278458878758, iteration: 24736
loss: 0.9942098259925842,grad_norm: 0.9854782796078865, iteration: 24737
loss: 1.0334317684173584,grad_norm: 0.9999993039905929, iteration: 24738
loss: 0.9825783967971802,grad_norm: 0.9999990393762954, iteration: 24739
loss: 1.0371724367141724,grad_norm: 0.9166064236829181, iteration: 24740
loss: 0.9759373664855957,grad_norm: 0.999999242245328, iteration: 24741
loss: 1.012818455696106,grad_norm: 0.9910249955826815, iteration: 24742
loss: 0.9916743040084839,grad_norm: 0.8817578400261323, iteration: 24743
loss: 1.0534474849700928,grad_norm: 0.9999993991546451, iteration: 24744
loss: 0.9914173483848572,grad_norm: 0.9999990847108856, iteration: 24745
loss: 1.012330174446106,grad_norm: 0.9999998416434552, iteration: 24746
loss: 1.0188392400741577,grad_norm: 0.9999992352100288, iteration: 24747
loss: 1.0503785610198975,grad_norm: 0.9999997827125177, iteration: 24748
loss: 0.9420533180236816,grad_norm: 0.9999990093841217, iteration: 24749
loss: 0.996335506439209,grad_norm: 0.9999991268676105, iteration: 24750
loss: 1.0559027194976807,grad_norm: 0.9999993208279243, iteration: 24751
loss: 1.0429145097732544,grad_norm: 0.942032662697634, iteration: 24752
loss: 1.037567377090454,grad_norm: 0.8475490302203862, iteration: 24753
loss: 0.9876443147659302,grad_norm: 0.9806129256594044, iteration: 24754
loss: 1.0004185438156128,grad_norm: 0.9999998440007314, iteration: 24755
loss: 1.0086101293563843,grad_norm: 0.9448504218412811, iteration: 24756
loss: 1.015273928642273,grad_norm: 0.9999992177151433, iteration: 24757
loss: 1.0200375318527222,grad_norm: 0.9999992104245286, iteration: 24758
loss: 1.0088107585906982,grad_norm: 0.9999993608636066, iteration: 24759
loss: 1.0438919067382812,grad_norm: 0.9999997307133514, iteration: 24760
loss: 1.0762745141983032,grad_norm: 0.9999992626261047, iteration: 24761
loss: 1.0237761735916138,grad_norm: 0.9999997100942957, iteration: 24762
loss: 1.033112645149231,grad_norm: 0.9246272031769476, iteration: 24763
loss: 1.0213754177093506,grad_norm: 0.9999991940406705, iteration: 24764
loss: 1.0110276937484741,grad_norm: 0.9913350206569255, iteration: 24765
loss: 1.0440974235534668,grad_norm: 0.9999991806730231, iteration: 24766
loss: 1.0299468040466309,grad_norm: 0.9449510621620137, iteration: 24767
loss: 1.023100733757019,grad_norm: 0.9999994023215135, iteration: 24768
loss: 1.0412147045135498,grad_norm: 0.9999997542837172, iteration: 24769
loss: 0.9914147853851318,grad_norm: 0.9496562307490494, iteration: 24770
loss: 0.9807747602462769,grad_norm: 0.9851933912502758, iteration: 24771
loss: 1.007489562034607,grad_norm: 0.9601148647502054, iteration: 24772
loss: 1.0192759037017822,grad_norm: 0.9827156390997902, iteration: 24773
loss: 1.0495104789733887,grad_norm: 0.9999996851067933, iteration: 24774
loss: 1.05451238155365,grad_norm: 0.999999389797902, iteration: 24775
loss: 1.0279330015182495,grad_norm: 0.9999991215152059, iteration: 24776
loss: 1.029822587966919,grad_norm: 0.9999992684309263, iteration: 24777
loss: 1.000439167022705,grad_norm: 0.9999991641801905, iteration: 24778
loss: 0.9844563007354736,grad_norm: 0.9999990729974719, iteration: 24779
loss: 1.0199205875396729,grad_norm: 0.9999992285245005, iteration: 24780
loss: 1.0476267337799072,grad_norm: 0.9960982290855971, iteration: 24781
loss: 0.9893286824226379,grad_norm: 0.9999990794381963, iteration: 24782
loss: 1.024948239326477,grad_norm: 0.9999994248866145, iteration: 24783
loss: 1.02239191532135,grad_norm: 0.9999990767564331, iteration: 24784
loss: 1.0063936710357666,grad_norm: 0.9929746020315998, iteration: 24785
loss: 0.9835287928581238,grad_norm: 0.9999991684908067, iteration: 24786
loss: 1.0229288339614868,grad_norm: 0.9660276803637773, iteration: 24787
loss: 0.9981511235237122,grad_norm: 0.999974973992763, iteration: 24788
loss: 0.9973615407943726,grad_norm: 0.9963575338811357, iteration: 24789
loss: 1.0155538320541382,grad_norm: 0.9382303308207799, iteration: 24790
loss: 1.0498653650283813,grad_norm: 0.9999992199571665, iteration: 24791
loss: 1.0289698839187622,grad_norm: 0.9999995516689104, iteration: 24792
loss: 1.020196557044983,grad_norm: 0.9999992083616723, iteration: 24793
loss: 1.0106122493743896,grad_norm: 0.9999991608277908, iteration: 24794
loss: 1.0426689386367798,grad_norm: 0.9999994668832776, iteration: 24795
loss: 0.9872416853904724,grad_norm: 0.999999090067937, iteration: 24796
loss: 0.985863983631134,grad_norm: 0.9258052222665872, iteration: 24797
loss: 0.9905501008033752,grad_norm: 0.7860495001976862, iteration: 24798
loss: 1.0118895769119263,grad_norm: 0.9896548881271616, iteration: 24799
loss: 1.0023728609085083,grad_norm: 0.8004806000813813, iteration: 24800
loss: 1.0034613609313965,grad_norm: 0.9181606604527361, iteration: 24801
loss: 1.013231873512268,grad_norm: 0.999999403589633, iteration: 24802
loss: 0.9997873902320862,grad_norm: 0.9999988169551258, iteration: 24803
loss: 1.0054936408996582,grad_norm: 0.9999991211536624, iteration: 24804
loss: 1.0253486633300781,grad_norm: 0.9999990724215289, iteration: 24805
loss: 1.0045223236083984,grad_norm: 0.8887539103661536, iteration: 24806
loss: 1.0148688554763794,grad_norm: 0.8669222925837348, iteration: 24807
loss: 1.005360722541809,grad_norm: 0.9478648950657477, iteration: 24808
loss: 1.0026251077651978,grad_norm: 0.9999993506937721, iteration: 24809
loss: 0.9922103881835938,grad_norm: 0.9999994394433688, iteration: 24810
loss: 1.0075799226760864,grad_norm: 0.8420224313732145, iteration: 24811
loss: 0.9773380756378174,grad_norm: 0.9999990796966812, iteration: 24812
loss: 0.9938400387763977,grad_norm: 0.9999991783217081, iteration: 24813
loss: 1.0111587047576904,grad_norm: 0.8806142320576726, iteration: 24814
loss: 1.002916932106018,grad_norm: 0.9999993474374586, iteration: 24815
loss: 0.9781197309494019,grad_norm: 0.9999990852557664, iteration: 24816
loss: 0.9918503761291504,grad_norm: 0.9320556556154567, iteration: 24817
loss: 0.995658278465271,grad_norm: 0.999999331514557, iteration: 24818
loss: 1.0418699979782104,grad_norm: 0.9999997087873603, iteration: 24819
loss: 1.0090703964233398,grad_norm: 0.8984209955681036, iteration: 24820
loss: 1.0335118770599365,grad_norm: 0.894359588750061, iteration: 24821
loss: 1.0260412693023682,grad_norm: 0.9999990586699693, iteration: 24822
loss: 1.0076982975006104,grad_norm: 0.9999992632522554, iteration: 24823
loss: 0.941500186920166,grad_norm: 0.9999991961302415, iteration: 24824
loss: 1.0147452354431152,grad_norm: 0.9999992397577169, iteration: 24825
loss: 1.0207253694534302,grad_norm: 0.9999990174095906, iteration: 24826
loss: 0.9402372241020203,grad_norm: 0.9568383443691352, iteration: 24827
loss: 1.029242753982544,grad_norm: 0.9640629399893073, iteration: 24828
loss: 0.9816550016403198,grad_norm: 0.9175106138499101, iteration: 24829
loss: 1.0347980260849,grad_norm: 0.9999994278184762, iteration: 24830
loss: 0.9751197695732117,grad_norm: 0.9879343478125149, iteration: 24831
loss: 1.0279492139816284,grad_norm: 0.9956506694832413, iteration: 24832
loss: 0.996803343296051,grad_norm: 0.9999991629533345, iteration: 24833
loss: 0.9896501302719116,grad_norm: 0.9345700721492559, iteration: 24834
loss: 1.0235856771469116,grad_norm: 0.800197359688784, iteration: 24835
loss: 0.9782260060310364,grad_norm: 0.8717116991550948, iteration: 24836
loss: 1.0363839864730835,grad_norm: 0.9999989648334999, iteration: 24837
loss: 1.00887131690979,grad_norm: 0.9999995914943831, iteration: 24838
loss: 0.9739344120025635,grad_norm: 0.999999097333654, iteration: 24839
loss: 1.0338793992996216,grad_norm: 0.9207935525283646, iteration: 24840
loss: 1.001712441444397,grad_norm: 0.9999991015356893, iteration: 24841
loss: 1.0502787828445435,grad_norm: 0.9091120006185801, iteration: 24842
loss: 1.0109041929244995,grad_norm: 0.9999990838523176, iteration: 24843
loss: 1.040440320968628,grad_norm: 0.9999991069106091, iteration: 24844
loss: 0.9828354120254517,grad_norm: 0.9999990642318005, iteration: 24845
loss: 0.9961509108543396,grad_norm: 0.9999992451394278, iteration: 24846
loss: 1.0061153173446655,grad_norm: 0.9999997127562124, iteration: 24847
loss: 1.0121678113937378,grad_norm: 0.9999991371003689, iteration: 24848
loss: 1.0068418979644775,grad_norm: 0.999999732489431, iteration: 24849
loss: 1.00136137008667,grad_norm: 0.9999993466648688, iteration: 24850
loss: 0.9803351759910583,grad_norm: 0.8731051083238817, iteration: 24851
loss: 1.052640676498413,grad_norm: 0.9999990648177023, iteration: 24852
loss: 1.0178146362304688,grad_norm: 0.9999989874218062, iteration: 24853
loss: 1.0195316076278687,grad_norm: 0.9475315721942301, iteration: 24854
loss: 0.9994690418243408,grad_norm: 0.9999990971761166, iteration: 24855
loss: 0.9890206456184387,grad_norm: 0.9448781052080842, iteration: 24856
loss: 1.020427942276001,grad_norm: 0.9999992485561056, iteration: 24857
loss: 0.9891654253005981,grad_norm: 0.9999991504359533, iteration: 24858
loss: 0.9745416045188904,grad_norm: 0.9999991945084371, iteration: 24859
loss: 0.9858487248420715,grad_norm: 0.9608560743648087, iteration: 24860
loss: 0.973537802696228,grad_norm: 0.9999991378052822, iteration: 24861
loss: 0.9905216693878174,grad_norm: 0.9999993730044427, iteration: 24862
loss: 0.9847466349601746,grad_norm: 0.9999996549762421, iteration: 24863
loss: 1.0225934982299805,grad_norm: 0.968867084527677, iteration: 24864
loss: 1.0341198444366455,grad_norm: 0.9671277572659193, iteration: 24865
loss: 0.9764239192008972,grad_norm: 0.9999992151760365, iteration: 24866
loss: 1.0043405294418335,grad_norm: 0.9999990153799443, iteration: 24867
loss: 0.9889068603515625,grad_norm: 0.8706328135159906, iteration: 24868
loss: 1.0675288438796997,grad_norm: 0.9999991237063552, iteration: 24869
loss: 1.0067144632339478,grad_norm: 0.9999990331579456, iteration: 24870
loss: 1.011419653892517,grad_norm: 0.9999991320959084, iteration: 24871
loss: 1.0260869264602661,grad_norm: 0.891865048010654, iteration: 24872
loss: 1.0098882913589478,grad_norm: 0.9999991334262545, iteration: 24873
loss: 0.9979534149169922,grad_norm: 0.9999996431067514, iteration: 24874
loss: 0.9985970258712769,grad_norm: 0.9317089884818132, iteration: 24875
loss: 1.0064270496368408,grad_norm: 0.991785503931111, iteration: 24876
loss: 0.9813401103019714,grad_norm: 0.999999248476326, iteration: 24877
loss: 1.0130494832992554,grad_norm: 0.9999991206087393, iteration: 24878
loss: 0.9524376392364502,grad_norm: 0.9974298601111531, iteration: 24879
loss: 1.0068570375442505,grad_norm: 0.8623622109425506, iteration: 24880
loss: 0.9831098914146423,grad_norm: 0.9838088100919373, iteration: 24881
loss: 0.9748100638389587,grad_norm: 0.9999992990693393, iteration: 24882
loss: 1.0853791236877441,grad_norm: 0.9999995710042532, iteration: 24883
loss: 1.0687888860702515,grad_norm: 0.9974439511464592, iteration: 24884
loss: 1.0232996940612793,grad_norm: 0.9999991124571149, iteration: 24885
loss: 1.0245552062988281,grad_norm: 0.9999990827059142, iteration: 24886
loss: 0.981475830078125,grad_norm: 0.9273881210653462, iteration: 24887
loss: 0.9818892478942871,grad_norm: 0.9039368654834982, iteration: 24888
loss: 0.9731994867324829,grad_norm: 0.7897163209827333, iteration: 24889
loss: 1.0338960886001587,grad_norm: 0.9999993815005243, iteration: 24890
loss: 1.0004682540893555,grad_norm: 0.9370404153069143, iteration: 24891
loss: 0.9881969690322876,grad_norm: 0.9999989921019533, iteration: 24892
loss: 0.9927650094032288,grad_norm: 0.9999989977861701, iteration: 24893
loss: 1.055160403251648,grad_norm: 0.9999988963551087, iteration: 24894
loss: 1.0155458450317383,grad_norm: 0.9367935288927307, iteration: 24895
loss: 1.061406135559082,grad_norm: 0.8357176299998698, iteration: 24896
loss: 0.9927449226379395,grad_norm: 0.999999022380646, iteration: 24897
loss: 0.9807650446891785,grad_norm: 0.9999991491149895, iteration: 24898
loss: 1.0396031141281128,grad_norm: 0.9999996545839241, iteration: 24899
loss: 1.033351182937622,grad_norm: 0.9999991463881382, iteration: 24900
loss: 1.0192869901657104,grad_norm: 0.9999991803680466, iteration: 24901
loss: 1.0279207229614258,grad_norm: 0.9999992493590297, iteration: 24902
loss: 1.0649453401565552,grad_norm: 0.9999992191491377, iteration: 24903
loss: 1.0291163921356201,grad_norm: 0.956485828622002, iteration: 24904
loss: 0.9741174578666687,grad_norm: 0.999999074204908, iteration: 24905
loss: 0.9874342083930969,grad_norm: 0.9927731854075951, iteration: 24906
loss: 1.0603408813476562,grad_norm: 0.9999991071152572, iteration: 24907
loss: 1.0059196949005127,grad_norm: 0.888456022062105, iteration: 24908
loss: 1.0476276874542236,grad_norm: 0.9337991721517684, iteration: 24909
loss: 0.9652851223945618,grad_norm: 0.9999992142075028, iteration: 24910
loss: 0.9865795373916626,grad_norm: 0.9999989475000356, iteration: 24911
loss: 1.0296634435653687,grad_norm: 0.9999989864166933, iteration: 24912
loss: 0.9939002394676208,grad_norm: 0.9999990316375713, iteration: 24913
loss: 1.0303263664245605,grad_norm: 0.9659800765014959, iteration: 24914
loss: 1.0210285186767578,grad_norm: 0.9999990135744679, iteration: 24915
loss: 1.0201525688171387,grad_norm: 0.9999990545130032, iteration: 24916
loss: 1.006374716758728,grad_norm: 0.9350119071274556, iteration: 24917
loss: 1.0240567922592163,grad_norm: 0.9999995908914696, iteration: 24918
loss: 1.005744218826294,grad_norm: 0.9999992707111773, iteration: 24919
loss: 1.0320544242858887,grad_norm: 0.9999992672653054, iteration: 24920
loss: 1.0208829641342163,grad_norm: 0.9443900428633277, iteration: 24921
loss: 1.0123534202575684,grad_norm: 0.9999990186583549, iteration: 24922
loss: 1.033437967300415,grad_norm: 0.9316946728366334, iteration: 24923
loss: 1.0212292671203613,grad_norm: 0.9550903451658018, iteration: 24924
loss: 1.0070079565048218,grad_norm: 0.9629927614676567, iteration: 24925
loss: 1.0125319957733154,grad_norm: 0.8877274776154304, iteration: 24926
loss: 1.0100655555725098,grad_norm: 0.9160138789587566, iteration: 24927
loss: 1.0340591669082642,grad_norm: 0.9999993879269867, iteration: 24928
loss: 0.9898124933242798,grad_norm: 0.9999991845662746, iteration: 24929
loss: 1.0129631757736206,grad_norm: 0.9410375585432782, iteration: 24930
loss: 0.9996613264083862,grad_norm: 0.8595287992411745, iteration: 24931
loss: 1.018653392791748,grad_norm: 0.9999998049702326, iteration: 24932
loss: 1.0189038515090942,grad_norm: 0.999999129417714, iteration: 24933
loss: 1.0249220132827759,grad_norm: 0.9999991257443359, iteration: 24934
loss: 1.0021884441375732,grad_norm: 0.9363725932878982, iteration: 24935
loss: 1.028007984161377,grad_norm: 0.8464004112930704, iteration: 24936
loss: 0.982248842716217,grad_norm: 0.7780258280070966, iteration: 24937
loss: 0.9927719235420227,grad_norm: 0.999999102210384, iteration: 24938
loss: 1.0409963130950928,grad_norm: 0.9999991018275523, iteration: 24939
loss: 0.9962926506996155,grad_norm: 0.9999996735198267, iteration: 24940
loss: 1.017749547958374,grad_norm: 0.9368590413588317, iteration: 24941
loss: 1.0414535999298096,grad_norm: 0.999999245352041, iteration: 24942
loss: 0.9886121153831482,grad_norm: 0.9704036439432435, iteration: 24943
loss: 0.9999330043792725,grad_norm: 0.999999062702706, iteration: 24944
loss: 1.0378918647766113,grad_norm: 0.8847255674441749, iteration: 24945
loss: 1.052180528640747,grad_norm: 0.8684682929641023, iteration: 24946
loss: 0.979486346244812,grad_norm: 0.9999997375676472, iteration: 24947
loss: 1.0404913425445557,grad_norm: 0.9442908514353932, iteration: 24948
loss: 1.0410891771316528,grad_norm: 0.9999994223698987, iteration: 24949
loss: 1.0591061115264893,grad_norm: 0.9999993117355551, iteration: 24950
loss: 1.0078843832015991,grad_norm: 0.9999993148464332, iteration: 24951
loss: 1.0316835641860962,grad_norm: 0.9999991517167215, iteration: 24952
loss: 0.9975994825363159,grad_norm: 0.9999989631628593, iteration: 24953
loss: 1.0588130950927734,grad_norm: 0.9999997762649471, iteration: 24954
loss: 0.9871730804443359,grad_norm: 0.9999992669137164, iteration: 24955
loss: 1.0095964670181274,grad_norm: 0.9999993084488162, iteration: 24956
loss: 1.0253111124038696,grad_norm: 0.9999992951621386, iteration: 24957
loss: 1.0202248096466064,grad_norm: 0.9420787627416628, iteration: 24958
loss: 0.9909886717796326,grad_norm: 0.9999994353586242, iteration: 24959
loss: 1.0028330087661743,grad_norm: 0.9897054257720209, iteration: 24960
loss: 1.0069206953048706,grad_norm: 0.9999991859425342, iteration: 24961
loss: 1.0258893966674805,grad_norm: 0.99999912692683, iteration: 24962
loss: 1.0394587516784668,grad_norm: 0.9999994628805468, iteration: 24963
loss: 0.9874003529548645,grad_norm: 0.9999996104928001, iteration: 24964
loss: 1.0536555051803589,grad_norm: 0.9999994376015038, iteration: 24965
loss: 1.0721817016601562,grad_norm: 0.9999994371171321, iteration: 24966
loss: 1.0006431341171265,grad_norm: 0.9212430405363387, iteration: 24967
loss: 1.0338702201843262,grad_norm: 0.9655271815629249, iteration: 24968
loss: 1.0047842264175415,grad_norm: 0.9999991249725407, iteration: 24969
loss: 1.0508215427398682,grad_norm: 0.9795579899143891, iteration: 24970
loss: 1.037498116493225,grad_norm: 0.9999993104111653, iteration: 24971
loss: 0.9984127879142761,grad_norm: 0.9999993599033189, iteration: 24972
loss: 1.0472643375396729,grad_norm: 0.999999072962689, iteration: 24973
loss: 1.0095289945602417,grad_norm: 0.9638774087571557, iteration: 24974
loss: 1.0416299104690552,grad_norm: 0.8683433634414638, iteration: 24975
loss: 1.0105870962142944,grad_norm: 0.9092859004516702, iteration: 24976
loss: 0.99980628490448,grad_norm: 0.999999067521252, iteration: 24977
loss: 0.9854485988616943,grad_norm: 0.9882069539392999, iteration: 24978
loss: 1.016626000404358,grad_norm: 0.9967161677455285, iteration: 24979
loss: 1.0208423137664795,grad_norm: 0.9999992058813622, iteration: 24980
loss: 1.0618245601654053,grad_norm: 0.9999998280380001, iteration: 24981
loss: 1.0126832723617554,grad_norm: 0.9999995479232905, iteration: 24982
loss: 0.9824422001838684,grad_norm: 0.9999991355975524, iteration: 24983
loss: 1.0379481315612793,grad_norm: 0.9999993270280204, iteration: 24984
loss: 0.9950380921363831,grad_norm: 0.9999991745055062, iteration: 24985
loss: 1.0468610525131226,grad_norm: 0.9999992960647504, iteration: 24986
loss: 1.0226529836654663,grad_norm: 0.9756209429126173, iteration: 24987
loss: 1.0372426509857178,grad_norm: 0.9999990719783263, iteration: 24988
loss: 1.047452449798584,grad_norm: 0.9519003862406766, iteration: 24989
loss: 0.9862329959869385,grad_norm: 0.9999991035010695, iteration: 24990
loss: 1.0955090522766113,grad_norm: 0.9999995759963206, iteration: 24991
loss: 1.0256659984588623,grad_norm: 0.9999996721674534, iteration: 24992
loss: 1.0177760124206543,grad_norm: 0.9999990847374378, iteration: 24993
loss: 0.9659819006919861,grad_norm: 0.9788619076559113, iteration: 24994
loss: 0.9543582201004028,grad_norm: 0.9999993145391519, iteration: 24995
loss: 1.0097583532333374,grad_norm: 0.927656976551954, iteration: 24996
loss: 0.9883350133895874,grad_norm: 0.9999992514855598, iteration: 24997
loss: 1.0042140483856201,grad_norm: 0.9999989754331401, iteration: 24998
loss: 1.0124351978302002,grad_norm: 0.9999996067099661, iteration: 24999
loss: 0.9865209460258484,grad_norm: 0.9999991274675569, iteration: 25000
loss: 1.0348881483078003,grad_norm: 0.999999397993049, iteration: 25001
loss: 1.0265100002288818,grad_norm: 0.9999992574075394, iteration: 25002
loss: 1.0216987133026123,grad_norm: 0.9999990428355863, iteration: 25003
loss: 0.9915598034858704,grad_norm: 0.9999991905413624, iteration: 25004
loss: 1.0397480726242065,grad_norm: 0.9999990754994306, iteration: 25005
loss: 1.0318763256072998,grad_norm: 0.9944360326117287, iteration: 25006
loss: 1.0310490131378174,grad_norm: 0.999999069742849, iteration: 25007
loss: 1.0148279666900635,grad_norm: 0.9999992117042389, iteration: 25008
loss: 0.997246503829956,grad_norm: 0.9999992774207271, iteration: 25009
loss: 0.9930343627929688,grad_norm: 0.9924657139696555, iteration: 25010
loss: 1.014443039894104,grad_norm: 0.9549315396658437, iteration: 25011
loss: 1.0459147691726685,grad_norm: 0.999999277561408, iteration: 25012
loss: 0.9874225854873657,grad_norm: 0.9999997701196139, iteration: 25013
loss: 0.99425208568573,grad_norm: 0.9999990227481644, iteration: 25014
loss: 1.0057456493377686,grad_norm: 0.8819404676149363, iteration: 25015
loss: 1.044299840927124,grad_norm: 0.9999994228567083, iteration: 25016
loss: 1.0238103866577148,grad_norm: 0.8417233833684492, iteration: 25017
loss: 1.0021787881851196,grad_norm: 0.9999990783515422, iteration: 25018
loss: 1.0364850759506226,grad_norm: 0.9999991122506651, iteration: 25019
loss: 1.0306966304779053,grad_norm: 0.999999771492319, iteration: 25020
loss: 0.9746253490447998,grad_norm: 0.7865058444939791, iteration: 25021
loss: 0.9988206624984741,grad_norm: 0.9999990822563698, iteration: 25022
loss: 1.0513019561767578,grad_norm: 0.9999990311618412, iteration: 25023
loss: 0.9841347336769104,grad_norm: 0.9149351204834174, iteration: 25024
loss: 0.9640534520149231,grad_norm: 0.9999992352792776, iteration: 25025
loss: 1.035530686378479,grad_norm: 0.9999994642784924, iteration: 25026
loss: 0.9753812551498413,grad_norm: 0.9999990761195768, iteration: 25027
loss: 1.040799856185913,grad_norm: 0.9999997697204533, iteration: 25028
loss: 1.0461288690567017,grad_norm: 0.8093093128684545, iteration: 25029
loss: 1.0124911069869995,grad_norm: 0.9999992123021861, iteration: 25030
loss: 1.0234999656677246,grad_norm: 0.9999990363707134, iteration: 25031
loss: 1.0061726570129395,grad_norm: 0.9688458535251242, iteration: 25032
loss: 1.0475001335144043,grad_norm: 0.9999991353349453, iteration: 25033
loss: 1.016582727432251,grad_norm: 0.999999291110562, iteration: 25034
loss: 1.0104782581329346,grad_norm: 0.9999992782712294, iteration: 25035
loss: 0.9985780119895935,grad_norm: 0.9999993624833196, iteration: 25036
loss: 1.010585904121399,grad_norm: 0.9999990472904848, iteration: 25037
loss: 1.0669984817504883,grad_norm: 0.999999805260415, iteration: 25038
loss: 1.0238256454467773,grad_norm: 0.999999255674465, iteration: 25039
loss: 0.9809615015983582,grad_norm: 0.9999991731868018, iteration: 25040
loss: 1.0047420263290405,grad_norm: 0.99999916914358, iteration: 25041
loss: 0.9347203969955444,grad_norm: 0.9999991337833413, iteration: 25042
loss: 0.9655179381370544,grad_norm: 0.9871491217920133, iteration: 25043
loss: 1.0120445489883423,grad_norm: 0.8836089122428662, iteration: 25044
loss: 1.08656644821167,grad_norm: 0.9999999282667165, iteration: 25045
loss: 1.0014762878417969,grad_norm: 0.8521377461279819, iteration: 25046
loss: 0.9813207387924194,grad_norm: 0.9216607449936904, iteration: 25047
loss: 1.0222617387771606,grad_norm: 0.9859799145799578, iteration: 25048
loss: 0.9840291142463684,grad_norm: 0.7771287751884102, iteration: 25049
loss: 0.9656971096992493,grad_norm: 0.9999991763272383, iteration: 25050
loss: 1.0022751092910767,grad_norm: 0.8664806077389343, iteration: 25051
loss: 1.0961511135101318,grad_norm: 0.9999996817579432, iteration: 25052
loss: 0.9906706213951111,grad_norm: 0.9999990878014385, iteration: 25053
loss: 1.0378245115280151,grad_norm: 0.9999994190918485, iteration: 25054
loss: 1.044306755065918,grad_norm: 0.9999995508935524, iteration: 25055
loss: 1.0096421241760254,grad_norm: 0.9999990072801576, iteration: 25056
loss: 1.0418639183044434,grad_norm: 0.9438036054497493, iteration: 25057
loss: 0.9936780333518982,grad_norm: 0.9823740210663082, iteration: 25058
loss: 0.9820943474769592,grad_norm: 0.9999994759317287, iteration: 25059
loss: 0.9817436337471008,grad_norm: 0.9999992047473756, iteration: 25060
loss: 1.042368769645691,grad_norm: 0.9999991976571865, iteration: 25061
loss: 0.9848198890686035,grad_norm: 0.9999991942475557, iteration: 25062
loss: 1.0003753900527954,grad_norm: 0.9999990937102632, iteration: 25063
loss: 0.9857528209686279,grad_norm: 0.9999993349147125, iteration: 25064
loss: 1.003164291381836,grad_norm: 0.9999993167984551, iteration: 25065
loss: 0.99903404712677,grad_norm: 0.9365569179352259, iteration: 25066
loss: 1.0060919523239136,grad_norm: 0.9999990985227002, iteration: 25067
loss: 1.060086727142334,grad_norm: 0.9992456751217678, iteration: 25068
loss: 1.0084216594696045,grad_norm: 0.9871717781920529, iteration: 25069
loss: 1.003821849822998,grad_norm: 0.9866638900169896, iteration: 25070
loss: 1.021951675415039,grad_norm: 0.9999994991129584, iteration: 25071
loss: 0.9799059629440308,grad_norm: 0.9999989634041977, iteration: 25072
loss: 1.0479236841201782,grad_norm: 0.9999992602384408, iteration: 25073
loss: 1.0116199254989624,grad_norm: 0.9999991125669422, iteration: 25074
loss: 1.0284498929977417,grad_norm: 0.9999996757368874, iteration: 25075
loss: 1.0290201902389526,grad_norm: 0.9999990981862432, iteration: 25076
loss: 0.9540195465087891,grad_norm: 0.9999991406179459, iteration: 25077
loss: 0.9342617392539978,grad_norm: 0.9999992414938055, iteration: 25078
loss: 1.0100146532058716,grad_norm: 0.9999992583298405, iteration: 25079
loss: 0.9923509359359741,grad_norm: 0.8550101357379595, iteration: 25080
loss: 1.0424234867095947,grad_norm: 0.9999991771808769, iteration: 25081
loss: 0.9836567640304565,grad_norm: 0.989523013078046, iteration: 25082
loss: 0.9964920878410339,grad_norm: 0.9133580820850601, iteration: 25083
loss: 1.0308830738067627,grad_norm: 0.9999991413880495, iteration: 25084
loss: 0.9992056488990784,grad_norm: 0.9999996495813015, iteration: 25085
loss: 0.9963627457618713,grad_norm: 0.9266245368752315, iteration: 25086
loss: 0.953035295009613,grad_norm: 0.8971706354849901, iteration: 25087
loss: 1.1088429689407349,grad_norm: 0.9999994056857182, iteration: 25088
loss: 0.9926456212997437,grad_norm: 0.9999992488469078, iteration: 25089
loss: 1.009671688079834,grad_norm: 0.9967240457925396, iteration: 25090
loss: 0.9968940019607544,grad_norm: 0.9727645669271099, iteration: 25091
loss: 1.0271928310394287,grad_norm: 0.9999991106899853, iteration: 25092
loss: 1.0171878337860107,grad_norm: 0.9999992959641132, iteration: 25093
loss: 1.006347417831421,grad_norm: 0.9879103620405161, iteration: 25094
loss: 1.0132092237472534,grad_norm: 0.99999900912866, iteration: 25095
loss: 0.9861301779747009,grad_norm: 0.9999990381114874, iteration: 25096
loss: 0.9762091040611267,grad_norm: 0.9999990242830703, iteration: 25097
loss: 1.016027808189392,grad_norm: 0.8314337799009651, iteration: 25098
loss: 1.025547742843628,grad_norm: 0.9999996914625935, iteration: 25099
loss: 1.0255135297775269,grad_norm: 0.9999991381699558, iteration: 25100
loss: 0.9908390641212463,grad_norm: 0.9999991049799631, iteration: 25101
loss: 1.0121445655822754,grad_norm: 0.9999990057954401, iteration: 25102
loss: 1.0526036024093628,grad_norm: 0.9999991526429203, iteration: 25103
loss: 1.0087554454803467,grad_norm: 0.9999991548120845, iteration: 25104
loss: 0.9721634984016418,grad_norm: 0.999999287581031, iteration: 25105
loss: 0.9935474395751953,grad_norm: 0.8660702783229302, iteration: 25106
loss: 1.0074899196624756,grad_norm: 0.9414423831218304, iteration: 25107
loss: 1.052808165550232,grad_norm: 0.9999994556965724, iteration: 25108
loss: 1.0004206895828247,grad_norm: 0.9999991218543999, iteration: 25109
loss: 1.026839017868042,grad_norm: 0.8927603269306471, iteration: 25110
loss: 1.0383992195129395,grad_norm: 0.9999998684914216, iteration: 25111
loss: 1.0156489610671997,grad_norm: 0.8987322549891947, iteration: 25112
loss: 0.990939736366272,grad_norm: 0.9999999194697159, iteration: 25113
loss: 0.9689968824386597,grad_norm: 0.8976576381296671, iteration: 25114
loss: 1.0009522438049316,grad_norm: 0.8913502512610362, iteration: 25115
loss: 1.0000507831573486,grad_norm: 0.8539437907617722, iteration: 25116
loss: 1.03263521194458,grad_norm: 0.9999990345409613, iteration: 25117
loss: 1.0347530841827393,grad_norm: 0.9999991207642163, iteration: 25118
loss: 1.0589593648910522,grad_norm: 0.9999992380758969, iteration: 25119
loss: 0.9879423975944519,grad_norm: 0.9242645586588236, iteration: 25120
loss: 1.0101947784423828,grad_norm: 0.9999991094318402, iteration: 25121
loss: 1.024834156036377,grad_norm: 0.9998292494857842, iteration: 25122
loss: 1.0442187786102295,grad_norm: 0.9999990958219728, iteration: 25123
loss: 1.0260300636291504,grad_norm: 0.917827284230963, iteration: 25124
loss: 1.0572761297225952,grad_norm: 0.9999991267754667, iteration: 25125
loss: 1.002130389213562,grad_norm: 0.9999990203429538, iteration: 25126
loss: 1.021514892578125,grad_norm: 0.9999991066658067, iteration: 25127
loss: 0.9881172776222229,grad_norm: 0.7824007367991578, iteration: 25128
loss: 1.0248740911483765,grad_norm: 0.9031452398854971, iteration: 25129
loss: 1.0353347063064575,grad_norm: 0.9669855503709687, iteration: 25130
loss: 0.9944658279418945,grad_norm: 0.8113743215698829, iteration: 25131
loss: 1.0177639722824097,grad_norm: 0.8468090078456805, iteration: 25132
loss: 1.0306777954101562,grad_norm: 0.9999990809048879, iteration: 25133
loss: 1.0151644945144653,grad_norm: 0.9999991795443347, iteration: 25134
loss: 0.9936743974685669,grad_norm: 0.999999173591965, iteration: 25135
loss: 1.0105785131454468,grad_norm: 0.9280579664200468, iteration: 25136
loss: 0.9959077835083008,grad_norm: 0.9999990058645504, iteration: 25137
loss: 1.0305589437484741,grad_norm: 0.9999992425227257, iteration: 25138
loss: 0.9893362522125244,grad_norm: 0.9337083957926346, iteration: 25139
loss: 1.0454699993133545,grad_norm: 0.9270776008432633, iteration: 25140
loss: 1.0143866539001465,grad_norm: 0.9999993519034793, iteration: 25141
loss: 1.0290584564208984,grad_norm: 0.8673584496456792, iteration: 25142
loss: 1.009281873703003,grad_norm: 0.9999991034531567, iteration: 25143
loss: 1.0126066207885742,grad_norm: 0.9307412787972085, iteration: 25144
loss: 0.9756812453269958,grad_norm: 0.9999990336787087, iteration: 25145
loss: 1.0383011102676392,grad_norm: 0.9999991386095785, iteration: 25146
loss: 1.0101020336151123,grad_norm: 0.9055996190912143, iteration: 25147
loss: 0.9969977736473083,grad_norm: 0.9426695120478211, iteration: 25148
loss: 0.995262861251831,grad_norm: 0.9641035803769901, iteration: 25149
loss: 1.0107423067092896,grad_norm: 0.8864143226001885, iteration: 25150
loss: 1.0409363508224487,grad_norm: 0.9999989458755318, iteration: 25151
loss: 1.036940336227417,grad_norm: 0.9999992661161113, iteration: 25152
loss: 1.0372284650802612,grad_norm: 0.9999995073580571, iteration: 25153
loss: 1.001430630683899,grad_norm: 0.8940692231895768, iteration: 25154
loss: 0.9972429275512695,grad_norm: 0.9565650152698632, iteration: 25155
loss: 1.0090924501419067,grad_norm: 0.8820622275058151, iteration: 25156
loss: 1.0485562086105347,grad_norm: 0.8640601898459365, iteration: 25157
loss: 0.9841989278793335,grad_norm: 0.8460514579749006, iteration: 25158
loss: 1.0205988883972168,grad_norm: 0.9999991442792032, iteration: 25159
loss: 1.0178804397583008,grad_norm: 0.9999990694451861, iteration: 25160
loss: 1.0017058849334717,grad_norm: 0.9999990205373417, iteration: 25161
loss: 1.0445830821990967,grad_norm: 0.9999991011820445, iteration: 25162
loss: 1.0307365655899048,grad_norm: 0.9999992085293156, iteration: 25163
loss: 1.0379881858825684,grad_norm: 0.9556411372007707, iteration: 25164
loss: 1.0403051376342773,grad_norm: 0.9999993095286993, iteration: 25165
loss: 1.027439832687378,grad_norm: 0.963232713921572, iteration: 25166
loss: 0.9933392405509949,grad_norm: 0.9999990472669373, iteration: 25167
loss: 1.044931411743164,grad_norm: 0.9999991728946307, iteration: 25168
loss: 0.9783090949058533,grad_norm: 0.9927850088605539, iteration: 25169
loss: 1.020272970199585,grad_norm: 0.9614094675025758, iteration: 25170
loss: 0.9800649881362915,grad_norm: 0.9999991030499031, iteration: 25171
loss: 1.004483699798584,grad_norm: 0.9999992076014957, iteration: 25172
loss: 1.0046138763427734,grad_norm: 0.9964127807002912, iteration: 25173
loss: 1.0459283590316772,grad_norm: 0.9487182717342146, iteration: 25174
loss: 0.9948923587799072,grad_norm: 0.9709183728891716, iteration: 25175
loss: 1.0270328521728516,grad_norm: 0.9104974473101709, iteration: 25176
loss: 1.0550994873046875,grad_norm: 0.9999994161159269, iteration: 25177
loss: 0.9913531541824341,grad_norm: 0.9999990505219686, iteration: 25178
loss: 0.9789372086524963,grad_norm: 0.9215880037135564, iteration: 25179
loss: 1.024402379989624,grad_norm: 0.999999219234015, iteration: 25180
loss: 1.0554900169372559,grad_norm: 0.9999996463903393, iteration: 25181
loss: 0.995913565158844,grad_norm: 0.9999993320361853, iteration: 25182
loss: 1.0329155921936035,grad_norm: 0.9999991962212166, iteration: 25183
loss: 1.0529789924621582,grad_norm: 0.9999992930138223, iteration: 25184
loss: 1.0155502557754517,grad_norm: 0.8770286453751333, iteration: 25185
loss: 1.0091525316238403,grad_norm: 0.9999993741194682, iteration: 25186
loss: 0.9803293347358704,grad_norm: 0.8704629613100119, iteration: 25187
loss: 0.9829043745994568,grad_norm: 0.8914768035770019, iteration: 25188
loss: 1.0436475276947021,grad_norm: 0.9999991528150259, iteration: 25189
loss: 0.9852412343025208,grad_norm: 0.9803670763930341, iteration: 25190
loss: 1.0076355934143066,grad_norm: 0.9999993052676825, iteration: 25191
loss: 1.0301762819290161,grad_norm: 0.9999990527911914, iteration: 25192
loss: 1.002398133277893,grad_norm: 0.999999300961655, iteration: 25193
loss: 0.9821156859397888,grad_norm: 0.9999992082062714, iteration: 25194
loss: 0.9972307682037354,grad_norm: 0.8114183436520775, iteration: 25195
loss: 0.9954311847686768,grad_norm: 0.9999993245224488, iteration: 25196
loss: 1.0132499933242798,grad_norm: 0.9497729852610183, iteration: 25197
loss: 0.9944258332252502,grad_norm: 0.9999991232149027, iteration: 25198
loss: 1.0192888975143433,grad_norm: 0.9999990074482614, iteration: 25199
loss: 1.0179692506790161,grad_norm: 0.999999048645269, iteration: 25200
loss: 1.000168800354004,grad_norm: 0.988737829520996, iteration: 25201
loss: 1.0056591033935547,grad_norm: 0.9315782335302591, iteration: 25202
loss: 1.0351396799087524,grad_norm: 0.9663246366477926, iteration: 25203
loss: 1.0183597803115845,grad_norm: 0.9999991856903384, iteration: 25204
loss: 1.0307245254516602,grad_norm: 0.8360371533777977, iteration: 25205
loss: 1.0220832824707031,grad_norm: 0.9999990766481494, iteration: 25206
loss: 0.9971327185630798,grad_norm: 0.9999992211675371, iteration: 25207
loss: 0.9827557802200317,grad_norm: 0.9999992542846717, iteration: 25208
loss: 1.0283228158950806,grad_norm: 0.949298044808873, iteration: 25209
loss: 1.0288540124893188,grad_norm: 0.8601819089650886, iteration: 25210
loss: 1.0187329053878784,grad_norm: 0.8996816176208634, iteration: 25211
loss: 0.9952857494354248,grad_norm: 0.9024584804465415, iteration: 25212
loss: 1.0171481370925903,grad_norm: 0.9999991073105668, iteration: 25213
loss: 0.9852732419967651,grad_norm: 0.9989663299281611, iteration: 25214
loss: 1.0131081342697144,grad_norm: 0.9057825282652611, iteration: 25215
loss: 1.0135602951049805,grad_norm: 0.9416208809793855, iteration: 25216
loss: 1.009380578994751,grad_norm: 0.9890809407152608, iteration: 25217
loss: 1.015352725982666,grad_norm: 0.999101363036499, iteration: 25218
loss: 0.9751977920532227,grad_norm: 0.9999991236346046, iteration: 25219
loss: 1.00148606300354,grad_norm: 0.8403702082396246, iteration: 25220
loss: 1.014062762260437,grad_norm: 0.9640475052492401, iteration: 25221
loss: 1.0267235040664673,grad_norm: 0.9999991634046007, iteration: 25222
loss: 0.9924936294555664,grad_norm: 0.997914295095525, iteration: 25223
loss: 1.012230396270752,grad_norm: 0.9999992218405774, iteration: 25224
loss: 1.0124238729476929,grad_norm: 0.9900137426222921, iteration: 25225
loss: 1.0161577463150024,grad_norm: 0.8951350848308081, iteration: 25226
loss: 0.993803858757019,grad_norm: 0.9999992456913986, iteration: 25227
loss: 1.0030161142349243,grad_norm: 0.999999073927293, iteration: 25228
loss: 1.0296099185943604,grad_norm: 0.9999991083348653, iteration: 25229
loss: 1.0027378797531128,grad_norm: 0.9130088353890903, iteration: 25230
loss: 1.0382893085479736,grad_norm: 0.9999990278160553, iteration: 25231
loss: 1.019243597984314,grad_norm: 0.9999993591574007, iteration: 25232
loss: 1.0604275465011597,grad_norm: 0.935026098700624, iteration: 25233
loss: 0.9897680878639221,grad_norm: 0.8587186437771545, iteration: 25234
loss: 1.016634464263916,grad_norm: 0.999999093846322, iteration: 25235
loss: 1.0040596723556519,grad_norm: 0.944842056192059, iteration: 25236
loss: 0.9870277047157288,grad_norm: 0.999998992394391, iteration: 25237
loss: 1.0114058256149292,grad_norm: 0.9999991305714103, iteration: 25238
loss: 0.9881871342658997,grad_norm: 0.8470131601405462, iteration: 25239
loss: 1.0173245668411255,grad_norm: 0.8935795094898709, iteration: 25240
loss: 1.0532128810882568,grad_norm: 0.9640227901339359, iteration: 25241
loss: 1.0240401029586792,grad_norm: 0.9999990583464943, iteration: 25242
loss: 1.0068899393081665,grad_norm: 0.9999990191363468, iteration: 25243
loss: 0.9858201742172241,grad_norm: 0.9999990826229117, iteration: 25244
loss: 0.9953542947769165,grad_norm: 0.9116321019175624, iteration: 25245
loss: 1.0101162195205688,grad_norm: 0.9999991012306466, iteration: 25246
loss: 1.0147753953933716,grad_norm: 0.9999990197244604, iteration: 25247
loss: 1.0026320219039917,grad_norm: 0.984287917311465, iteration: 25248
loss: 1.0165026187896729,grad_norm: 0.9400822090718438, iteration: 25249
loss: 0.9946907162666321,grad_norm: 0.9581727286298658, iteration: 25250
loss: 1.0215623378753662,grad_norm: 0.9999990667910902, iteration: 25251
loss: 0.9971803426742554,grad_norm: 0.9999990479623866, iteration: 25252
loss: 1.014691948890686,grad_norm: 0.9491907079021028, iteration: 25253
loss: 1.0524916648864746,grad_norm: 0.999999151951829, iteration: 25254
loss: 1.0155551433563232,grad_norm: 0.9325115654699169, iteration: 25255
loss: 1.0205039978027344,grad_norm: 0.8265112943133543, iteration: 25256
loss: 0.978218674659729,grad_norm: 0.9999992594680042, iteration: 25257
loss: 1.0075920820236206,grad_norm: 0.921115784854351, iteration: 25258
loss: 1.0060397386550903,grad_norm: 0.9999992263101886, iteration: 25259
loss: 1.0194505453109741,grad_norm: 0.9999991984715045, iteration: 25260
loss: 0.9817609190940857,grad_norm: 0.9999990492994524, iteration: 25261
loss: 1.0077227354049683,grad_norm: 0.941074719141472, iteration: 25262
loss: 0.9876857995986938,grad_norm: 0.9473435211288136, iteration: 25263
loss: 0.9778198599815369,grad_norm: 0.8438001879556922, iteration: 25264
loss: 1.0469316244125366,grad_norm: 0.9999991664332025, iteration: 25265
loss: 0.9520567059516907,grad_norm: 0.9999989974579374, iteration: 25266
loss: 1.0019387006759644,grad_norm: 0.9984017181514986, iteration: 25267
loss: 1.0058343410491943,grad_norm: 0.9999992545110863, iteration: 25268
loss: 0.9798358678817749,grad_norm: 0.7748882683307708, iteration: 25269
loss: 1.014175534248352,grad_norm: 0.9999991032933458, iteration: 25270
loss: 1.006980061531067,grad_norm: 0.9999993402792146, iteration: 25271
loss: 1.0136438608169556,grad_norm: 0.9999991861073103, iteration: 25272
loss: 1.0517685413360596,grad_norm: 0.8984983253677774, iteration: 25273
loss: 1.021969199180603,grad_norm: 0.9337202098426921, iteration: 25274
loss: 0.986940324306488,grad_norm: 0.8706010026655291, iteration: 25275
loss: 1.0011038780212402,grad_norm: 0.938293957131202, iteration: 25276
loss: 1.076278567314148,grad_norm: 0.9999990343242101, iteration: 25277
loss: 1.0265530347824097,grad_norm: 0.9999991260413518, iteration: 25278
loss: 1.002535104751587,grad_norm: 0.912594089331732, iteration: 25279
loss: 1.0374740362167358,grad_norm: 0.9999991483927431, iteration: 25280
loss: 1.0255632400512695,grad_norm: 0.8228849444482381, iteration: 25281
loss: 1.0077295303344727,grad_norm: 0.9999992861273954, iteration: 25282
loss: 1.002029538154602,grad_norm: 0.8786919572195185, iteration: 25283
loss: 1.0361034870147705,grad_norm: 0.9999990563939481, iteration: 25284
loss: 1.0265015363693237,grad_norm: 0.9999991036435114, iteration: 25285
loss: 1.0134145021438599,grad_norm: 0.891661498451018, iteration: 25286
loss: 1.0404589176177979,grad_norm: 0.9999990678223941, iteration: 25287
loss: 1.017539620399475,grad_norm: 0.9999991565470253, iteration: 25288
loss: 1.0308420658111572,grad_norm: 0.9290027931766681, iteration: 25289
loss: 0.9907589554786682,grad_norm: 0.9959695312472645, iteration: 25290
loss: 0.9921079277992249,grad_norm: 0.9534820057651837, iteration: 25291
loss: 1.0106693506240845,grad_norm: 0.9385914329240339, iteration: 25292
loss: 0.9909374117851257,grad_norm: 0.876160914993426, iteration: 25293
loss: 1.0121678113937378,grad_norm: 0.9999991087302927, iteration: 25294
loss: 0.9981237053871155,grad_norm: 0.9999990241597774, iteration: 25295
loss: 1.0161744356155396,grad_norm: 0.8360478891867698, iteration: 25296
loss: 1.0324193239212036,grad_norm: 0.9999991111140671, iteration: 25297
loss: 1.015588641166687,grad_norm: 0.8007133550586637, iteration: 25298
loss: 1.0285598039627075,grad_norm: 0.9211265907006124, iteration: 25299
loss: 1.0240484476089478,grad_norm: 0.9329483060895264, iteration: 25300
loss: 1.0256321430206299,grad_norm: 0.9999990952997844, iteration: 25301
loss: 1.064231038093567,grad_norm: 0.9999998767493121, iteration: 25302
loss: 0.9771484732627869,grad_norm: 0.8617150526024303, iteration: 25303
loss: 1.0525147914886475,grad_norm: 0.9999993362929308, iteration: 25304
loss: 1.015634536743164,grad_norm: 0.8082879968782097, iteration: 25305
loss: 1.0110902786254883,grad_norm: 0.999999034111932, iteration: 25306
loss: 1.0382754802703857,grad_norm: 0.9999991190959662, iteration: 25307
loss: 1.0324827432632446,grad_norm: 0.9415654879564734, iteration: 25308
loss: 1.0535788536071777,grad_norm: 0.9999992330142888, iteration: 25309
loss: 1.0065325498580933,grad_norm: 0.9999992001283539, iteration: 25310
loss: 1.0019625425338745,grad_norm: 0.9019814317538045, iteration: 25311
loss: 1.0065959692001343,grad_norm: 0.9999991728596807, iteration: 25312
loss: 1.0233542919158936,grad_norm: 0.999999079455893, iteration: 25313
loss: 1.0388009548187256,grad_norm: 0.904132411737128, iteration: 25314
loss: 1.006731629371643,grad_norm: 0.9351171682796866, iteration: 25315
loss: 1.0085142850875854,grad_norm: 0.9714104304073545, iteration: 25316
loss: 1.0223546028137207,grad_norm: 0.9837360101805058, iteration: 25317
loss: 0.9815782904624939,grad_norm: 0.8936016253226595, iteration: 25318
loss: 1.0179309844970703,grad_norm: 0.9999991220599921, iteration: 25319
loss: 0.9966362714767456,grad_norm: 0.9708000549542599, iteration: 25320
loss: 1.0156241655349731,grad_norm: 0.7962087845062207, iteration: 25321
loss: 1.0124950408935547,grad_norm: 0.9999991275519764, iteration: 25322
loss: 1.0239737033843994,grad_norm: 0.812976757784093, iteration: 25323
loss: 0.9849772453308105,grad_norm: 0.9867589993452727, iteration: 25324
loss: 0.9837508797645569,grad_norm: 0.985217004086142, iteration: 25325
loss: 1.0067517757415771,grad_norm: 0.9462766496699938, iteration: 25326
loss: 1.023862361907959,grad_norm: 0.9999990532263637, iteration: 25327
loss: 1.0021653175354004,grad_norm: 0.7591834974454557, iteration: 25328
loss: 1.025360107421875,grad_norm: 0.9999990969154757, iteration: 25329
loss: 0.9933736324310303,grad_norm: 0.8917809587418214, iteration: 25330
loss: 1.0125973224639893,grad_norm: 0.999999049756167, iteration: 25331
loss: 1.002286434173584,grad_norm: 0.8536183088473975, iteration: 25332
loss: 1.0229414701461792,grad_norm: 0.9999990499629547, iteration: 25333
loss: 1.0149786472320557,grad_norm: 0.9999990830310058, iteration: 25334
loss: 1.020751953125,grad_norm: 0.9335541477516939, iteration: 25335
loss: 1.001329779624939,grad_norm: 0.9999992186218044, iteration: 25336
loss: 1.0025070905685425,grad_norm: 0.9630162302496721, iteration: 25337
loss: 1.0197561979293823,grad_norm: 0.9304071287037355, iteration: 25338
loss: 1.0003254413604736,grad_norm: 0.853620269573217, iteration: 25339
loss: 1.0453424453735352,grad_norm: 0.9999994093843279, iteration: 25340
loss: 1.0670247077941895,grad_norm: 0.999999133139086, iteration: 25341
loss: 1.0231963396072388,grad_norm: 0.9292344765596903, iteration: 25342
loss: 0.9773852825164795,grad_norm: 0.9999991259205661, iteration: 25343
loss: 1.0233768224716187,grad_norm: 0.9999992304060327, iteration: 25344
loss: 1.0258091688156128,grad_norm: 0.9999996914916519, iteration: 25345
loss: 1.0199382305145264,grad_norm: 0.9999992129901506, iteration: 25346
loss: 1.004351258277893,grad_norm: 0.9656008063474144, iteration: 25347
loss: 0.9840871691703796,grad_norm: 0.9999989652170579, iteration: 25348
loss: 0.9993342757225037,grad_norm: 0.9999990901861366, iteration: 25349
loss: 0.9823765158653259,grad_norm: 0.8473490415803963, iteration: 25350
loss: 1.0196059942245483,grad_norm: 0.9481034088872724, iteration: 25351
loss: 1.0338995456695557,grad_norm: 0.9999989594987646, iteration: 25352
loss: 1.007946491241455,grad_norm: 0.9999990889847961, iteration: 25353
loss: 1.0211251974105835,grad_norm: 0.9058934710409914, iteration: 25354
loss: 1.021446943283081,grad_norm: 0.9999991685390279, iteration: 25355
loss: 1.008273720741272,grad_norm: 0.9266616314370137, iteration: 25356
loss: 1.0118013620376587,grad_norm: 0.9999992239319805, iteration: 25357
loss: 1.037460446357727,grad_norm: 0.9999993076734535, iteration: 25358
loss: 1.0030362606048584,grad_norm: 0.9999990264740851, iteration: 25359
loss: 1.0076905488967896,grad_norm: 0.8886790689699319, iteration: 25360
loss: 1.0120933055877686,grad_norm: 0.9999993756407467, iteration: 25361
loss: 1.0057103633880615,grad_norm: 0.9999993254985989, iteration: 25362
loss: 1.0136433839797974,grad_norm: 0.9355059815452664, iteration: 25363
loss: 1.0281000137329102,grad_norm: 0.9999992557766719, iteration: 25364
loss: 1.0256987810134888,grad_norm: 0.9999990278382141, iteration: 25365
loss: 0.9943431615829468,grad_norm: 0.9890839362589369, iteration: 25366
loss: 0.9939160943031311,grad_norm: 0.9264668600523814, iteration: 25367
loss: 0.9686294794082642,grad_norm: 0.9383097184144973, iteration: 25368
loss: 1.0043984651565552,grad_norm: 0.9953655408169916, iteration: 25369
loss: 0.9910072088241577,grad_norm: 0.965752177325757, iteration: 25370
loss: 0.9912934899330139,grad_norm: 0.7721859799224063, iteration: 25371
loss: 0.9772414565086365,grad_norm: 0.8231121320504278, iteration: 25372
loss: 0.9832768440246582,grad_norm: 0.9999990269723469, iteration: 25373
loss: 1.0474194288253784,grad_norm: 0.9999998623016214, iteration: 25374
loss: 1.012089490890503,grad_norm: 0.9999991547554222, iteration: 25375
loss: 0.9827872514724731,grad_norm: 0.9359830957375097, iteration: 25376
loss: 0.9800177812576294,grad_norm: 0.7719958330143524, iteration: 25377
loss: 1.0501669645309448,grad_norm: 0.9620248395379508, iteration: 25378
loss: 0.9964112639427185,grad_norm: 0.9999990731010825, iteration: 25379
loss: 1.0226564407348633,grad_norm: 0.9999989998922705, iteration: 25380
loss: 1.018588900566101,grad_norm: 0.9999990184456756, iteration: 25381
loss: 0.9650067090988159,grad_norm: 0.854415868324071, iteration: 25382
loss: 1.0605303049087524,grad_norm: 0.9999992483204438, iteration: 25383
loss: 1.0480042695999146,grad_norm: 0.9729627950267783, iteration: 25384
loss: 0.9661847352981567,grad_norm: 0.9574503394000532, iteration: 25385
loss: 1.0548179149627686,grad_norm: 0.9999990348412928, iteration: 25386
loss: 0.9956997036933899,grad_norm: 0.9635562575224432, iteration: 25387
loss: 0.9958685636520386,grad_norm: 0.9999992407594245, iteration: 25388
loss: 0.9945554137229919,grad_norm: 0.9999991313801229, iteration: 25389
loss: 0.9894430637359619,grad_norm: 0.9999989907518898, iteration: 25390
loss: 0.9962204694747925,grad_norm: 0.9202995528770487, iteration: 25391
loss: 0.9775514006614685,grad_norm: 0.9126372557890184, iteration: 25392
loss: 1.0288506746292114,grad_norm: 0.9460357401570809, iteration: 25393
loss: 1.0020997524261475,grad_norm: 0.9999990512800427, iteration: 25394
loss: 1.0157004594802856,grad_norm: 0.9887829918381721, iteration: 25395
loss: 1.0721983909606934,grad_norm: 0.9999994636359065, iteration: 25396
loss: 1.0201860666275024,grad_norm: 0.99999910283626, iteration: 25397
loss: 0.9624782800674438,grad_norm: 0.9999991951025002, iteration: 25398
loss: 1.0131551027297974,grad_norm: 0.9887524832609251, iteration: 25399
loss: 1.0588274002075195,grad_norm: 0.9999993247129129, iteration: 25400
loss: 0.9885029196739197,grad_norm: 0.9999995504715806, iteration: 25401
loss: 0.9594127535820007,grad_norm: 0.9198609188854837, iteration: 25402
loss: 1.0056201219558716,grad_norm: 0.9805702848771627, iteration: 25403
loss: 1.0145670175552368,grad_norm: 0.8949076384640012, iteration: 25404
loss: 1.0209108591079712,grad_norm: 0.8704050005934113, iteration: 25405
loss: 1.0356671810150146,grad_norm: 0.7771380052939003, iteration: 25406
loss: 1.0017210245132446,grad_norm: 0.9999992790853461, iteration: 25407
loss: 1.0259984731674194,grad_norm: 0.9011431462225151, iteration: 25408
loss: 1.0097917318344116,grad_norm: 0.9999990371201164, iteration: 25409
loss: 1.00089693069458,grad_norm: 0.999999211934152, iteration: 25410
loss: 1.0108227729797363,grad_norm: 0.999999238286597, iteration: 25411
loss: 1.0051544904708862,grad_norm: 0.9999991069008697, iteration: 25412
loss: 0.99998939037323,grad_norm: 0.9999992358840395, iteration: 25413
loss: 1.0616977214813232,grad_norm: 0.9999994055910398, iteration: 25414
loss: 1.002376914024353,grad_norm: 0.8306867295701172, iteration: 25415
loss: 0.9755880236625671,grad_norm: 0.9999990495225026, iteration: 25416
loss: 1.0362941026687622,grad_norm: 0.999999249647095, iteration: 25417
loss: 1.0182188749313354,grad_norm: 0.8373490815298587, iteration: 25418
loss: 1.0169997215270996,grad_norm: 0.9102586745976068, iteration: 25419
loss: 1.0378828048706055,grad_norm: 0.9999990616771565, iteration: 25420
loss: 1.0217416286468506,grad_norm: 0.999999014930785, iteration: 25421
loss: 1.012390375137329,grad_norm: 0.9637599733018672, iteration: 25422
loss: 1.0110656023025513,grad_norm: 0.9999991851414943, iteration: 25423
loss: 0.9832326769828796,grad_norm: 0.9999992041763912, iteration: 25424
loss: 1.0292909145355225,grad_norm: 0.9894219371227757, iteration: 25425
loss: 1.0223037004470825,grad_norm: 0.999999590339449, iteration: 25426
loss: 1.0251725912094116,grad_norm: 0.9999990135900736, iteration: 25427
loss: 1.0354207754135132,grad_norm: 0.9999994950208226, iteration: 25428
loss: 1.0209835767745972,grad_norm: 0.9747398899808725, iteration: 25429
loss: 1.0235062837600708,grad_norm: 0.8523440163869167, iteration: 25430
loss: 0.9923743605613708,grad_norm: 0.8567379036148219, iteration: 25431
loss: 0.99835604429245,grad_norm: 0.9999991268632453, iteration: 25432
loss: 1.0159393548965454,grad_norm: 0.8701148669047246, iteration: 25433
loss: 1.0143518447875977,grad_norm: 0.9794685592299176, iteration: 25434
loss: 1.0236916542053223,grad_norm: 0.9999991977999142, iteration: 25435
loss: 0.9874928593635559,grad_norm: 0.961583196774489, iteration: 25436
loss: 1.0067254304885864,grad_norm: 0.9999991869052383, iteration: 25437
loss: 1.0616945028305054,grad_norm: 0.8878654136386172, iteration: 25438
loss: 1.0157970190048218,grad_norm: 0.825589403990835, iteration: 25439
loss: 1.03365957736969,grad_norm: 0.9153708055594647, iteration: 25440
loss: 0.9994934797286987,grad_norm: 0.9999991931043132, iteration: 25441
loss: 0.9962639212608337,grad_norm: 0.9325615551535773, iteration: 25442
loss: 0.9827964901924133,grad_norm: 0.9999991631901306, iteration: 25443
loss: 0.9753784537315369,grad_norm: 0.999999235415, iteration: 25444
loss: 1.0022655725479126,grad_norm: 0.9999995689796265, iteration: 25445
loss: 0.9894673824310303,grad_norm: 0.8396545806196222, iteration: 25446
loss: 1.016493320465088,grad_norm: 0.9999990896935774, iteration: 25447
loss: 1.0000531673431396,grad_norm: 0.9099887357352114, iteration: 25448
loss: 1.0368839502334595,grad_norm: 0.9999991761281123, iteration: 25449
loss: 1.0555567741394043,grad_norm: 0.9999997908683238, iteration: 25450
loss: 1.0275479555130005,grad_norm: 0.9999992908826547, iteration: 25451
loss: 1.0343165397644043,grad_norm: 0.9999992573403761, iteration: 25452
loss: 1.0126069784164429,grad_norm: 0.9989403630050736, iteration: 25453
loss: 0.969687283039093,grad_norm: 0.9447672156044009, iteration: 25454
loss: 1.0181547403335571,grad_norm: 0.8939751290395848, iteration: 25455
loss: 0.996928870677948,grad_norm: 0.9999991545905632, iteration: 25456
loss: 1.0360697507858276,grad_norm: 0.8192024258708884, iteration: 25457
loss: 1.0138401985168457,grad_norm: 0.9999991762213164, iteration: 25458
loss: 1.0091792345046997,grad_norm: 0.7733969899402259, iteration: 25459
loss: 1.0221978425979614,grad_norm: 0.9999991592920372, iteration: 25460
loss: 1.0249204635620117,grad_norm: 0.9999990622484877, iteration: 25461
loss: 1.0260834693908691,grad_norm: 0.9154256628694855, iteration: 25462
loss: 0.9929417967796326,grad_norm: 0.902744757032774, iteration: 25463
loss: 1.006345510482788,grad_norm: 0.8879508699469006, iteration: 25464
loss: 1.1073201894760132,grad_norm: 0.9999998771252544, iteration: 25465
loss: 0.9927442073822021,grad_norm: 0.9999989032734495, iteration: 25466
loss: 0.9897640347480774,grad_norm: 0.9358385151616485, iteration: 25467
loss: 0.9998543858528137,grad_norm: 0.9999991099800604, iteration: 25468
loss: 1.0714064836502075,grad_norm: 0.8730239035054352, iteration: 25469
loss: 1.0068767070770264,grad_norm: 0.8697361989598779, iteration: 25470
loss: 1.007254719734192,grad_norm: 0.832778774026885, iteration: 25471
loss: 0.9886971116065979,grad_norm: 0.9999992516654469, iteration: 25472
loss: 0.9908304214477539,grad_norm: 0.9999991341285236, iteration: 25473
loss: 1.004390835762024,grad_norm: 0.9999991529174903, iteration: 25474
loss: 0.9988861083984375,grad_norm: 0.9658743342161622, iteration: 25475
loss: 1.0030831098556519,grad_norm: 0.98145531599156, iteration: 25476
loss: 1.013730764389038,grad_norm: 0.8898658987989301, iteration: 25477
loss: 0.9850046634674072,grad_norm: 0.9767725989222213, iteration: 25478
loss: 0.9779170155525208,grad_norm: 0.9336808358733979, iteration: 25479
loss: 1.0162659883499146,grad_norm: 0.9429247336198388, iteration: 25480
loss: 1.0195387601852417,grad_norm: 0.9066371021925504, iteration: 25481
loss: 1.036584734916687,grad_norm: 0.9999994536737952, iteration: 25482
loss: 1.0304179191589355,grad_norm: 0.9999998068292829, iteration: 25483
loss: 1.0370172262191772,grad_norm: 0.9999990016665657, iteration: 25484
loss: 1.086850881576538,grad_norm: 0.9999995857275675, iteration: 25485
loss: 0.986301064491272,grad_norm: 0.9999990763910274, iteration: 25486
loss: 0.9868605732917786,grad_norm: 0.9999991364653672, iteration: 25487
loss: 0.9898332953453064,grad_norm: 0.9999990399217862, iteration: 25488
loss: 0.9929417967796326,grad_norm: 0.9999991048297817, iteration: 25489
loss: 1.000232458114624,grad_norm: 0.9999990402546045, iteration: 25490
loss: 0.9866368770599365,grad_norm: 0.881487397309661, iteration: 25491
loss: 1.03505539894104,grad_norm: 0.9999992271937833, iteration: 25492
loss: 1.034177303314209,grad_norm: 0.9999992678077975, iteration: 25493
loss: 1.0099899768829346,grad_norm: 0.9999996507567926, iteration: 25494
loss: 1.0293996334075928,grad_norm: 0.9999996753377579, iteration: 25495
loss: 1.017264485359192,grad_norm: 0.9081472597464018, iteration: 25496
loss: 1.0461251735687256,grad_norm: 0.9999989921454638, iteration: 25497
loss: 1.0546785593032837,grad_norm: 0.9999990427323628, iteration: 25498
loss: 1.0248241424560547,grad_norm: 0.9332004466998619, iteration: 25499
loss: 0.9923707842826843,grad_norm: 0.8946463052423357, iteration: 25500
loss: 1.0196539163589478,grad_norm: 0.8927232424647212, iteration: 25501
loss: 1.0107074975967407,grad_norm: 0.9988645604144366, iteration: 25502
loss: 1.0236269235610962,grad_norm: 0.9586411143269432, iteration: 25503
loss: 1.0029207468032837,grad_norm: 0.9031719140395376, iteration: 25504
loss: 0.9930434226989746,grad_norm: 0.9999989959549308, iteration: 25505
loss: 1.0223147869110107,grad_norm: 0.9800018337370753, iteration: 25506
loss: 0.984024703502655,grad_norm: 0.999999911568029, iteration: 25507
loss: 1.0147591829299927,grad_norm: 0.9999993501027379, iteration: 25508
loss: 1.0397393703460693,grad_norm: 0.9999991418398296, iteration: 25509
loss: 1.0273149013519287,grad_norm: 0.9999993325270372, iteration: 25510
loss: 1.005505919456482,grad_norm: 0.9481646491772812, iteration: 25511
loss: 1.0035067796707153,grad_norm: 0.9999991621504298, iteration: 25512
loss: 1.0048637390136719,grad_norm: 0.9999992802739822, iteration: 25513
loss: 0.9720322489738464,grad_norm: 0.999999399773106, iteration: 25514
loss: 1.0209259986877441,grad_norm: 0.9772662964218959, iteration: 25515
loss: 0.9907614588737488,grad_norm: 0.9999991741096942, iteration: 25516
loss: 1.0016955137252808,grad_norm: 0.9999995063906705, iteration: 25517
loss: 1.0424200296401978,grad_norm: 0.8430171976676533, iteration: 25518
loss: 1.030259609222412,grad_norm: 0.8955199745614681, iteration: 25519
loss: 1.003907561302185,grad_norm: 0.9999991778475247, iteration: 25520
loss: 1.0121835470199585,grad_norm: 0.9999990267712393, iteration: 25521
loss: 1.030038595199585,grad_norm: 0.999999133001292, iteration: 25522
loss: 0.9967637658119202,grad_norm: 0.9999996205426105, iteration: 25523
loss: 1.0402244329452515,grad_norm: 0.9333124872992185, iteration: 25524
loss: 1.009535551071167,grad_norm: 0.9626131471410838, iteration: 25525
loss: 1.0312135219573975,grad_norm: 0.9070327343544767, iteration: 25526
loss: 1.0297751426696777,grad_norm: 0.8601463610895913, iteration: 25527
loss: 1.0207232236862183,grad_norm: 0.9311529526063264, iteration: 25528
loss: 1.0031155347824097,grad_norm: 0.8559061501265652, iteration: 25529
loss: 1.017967939376831,grad_norm: 0.9999991899434761, iteration: 25530
loss: 1.0246412754058838,grad_norm: 0.9999994626400851, iteration: 25531
loss: 1.0323209762573242,grad_norm: 0.9915571891080809, iteration: 25532
loss: 0.991206705570221,grad_norm: 0.9718702159227058, iteration: 25533
loss: 1.0300508737564087,grad_norm: 0.999999061670692, iteration: 25534
loss: 1.0362794399261475,grad_norm: 0.9999991600350523, iteration: 25535
loss: 1.0261887311935425,grad_norm: 0.999127134243951, iteration: 25536
loss: 1.0287843942642212,grad_norm: 0.9999992970860743, iteration: 25537
loss: 1.021367073059082,grad_norm: 0.8486391533576086, iteration: 25538
loss: 0.9999558329582214,grad_norm: 0.9999990502628607, iteration: 25539
loss: 1.0095449686050415,grad_norm: 0.9369262774487364, iteration: 25540
loss: 0.992504894733429,grad_norm: 0.9999992017835397, iteration: 25541
loss: 1.0105587244033813,grad_norm: 0.9247827699845619, iteration: 25542
loss: 1.0145151615142822,grad_norm: 0.9999993686811275, iteration: 25543
loss: 1.0393656492233276,grad_norm: 0.9999992000104706, iteration: 25544
loss: 1.0224671363830566,grad_norm: 0.9999991803431691, iteration: 25545
loss: 1.0170056819915771,grad_norm: 0.9999992507840846, iteration: 25546
loss: 1.0102951526641846,grad_norm: 0.9999993196932779, iteration: 25547
loss: 1.0150974988937378,grad_norm: 0.9999992767801226, iteration: 25548
loss: 1.0132999420166016,grad_norm: 0.9999995015237558, iteration: 25549
loss: 1.0158207416534424,grad_norm: 0.9781841350388724, iteration: 25550
loss: 0.996161699295044,grad_norm: 0.9995822664550805, iteration: 25551
loss: 1.0141291618347168,grad_norm: 0.8373547269070236, iteration: 25552
loss: 1.016409993171692,grad_norm: 0.8571907496157076, iteration: 25553
loss: 0.9873756766319275,grad_norm: 0.9999991986168453, iteration: 25554
loss: 1.0268012285232544,grad_norm: 0.9999990580827466, iteration: 25555
loss: 1.0100219249725342,grad_norm: 0.9999992967021891, iteration: 25556
loss: 1.000552773475647,grad_norm: 0.9465567906726372, iteration: 25557
loss: 1.013698935508728,grad_norm: 0.9999990729329903, iteration: 25558
loss: 0.99089115858078,grad_norm: 0.9991619600466596, iteration: 25559
loss: 0.9855194091796875,grad_norm: 0.8573674489689465, iteration: 25560
loss: 1.0033047199249268,grad_norm: 0.7897621141187983, iteration: 25561
loss: 1.0455350875854492,grad_norm: 0.999999278258302, iteration: 25562
loss: 0.9810205101966858,grad_norm: 0.9200934863796913, iteration: 25563
loss: 1.0091829299926758,grad_norm: 0.9394445588025901, iteration: 25564
loss: 1.0155962705612183,grad_norm: 0.9999990734389498, iteration: 25565
loss: 1.0071409940719604,grad_norm: 0.9999997081731037, iteration: 25566
loss: 1.0171518325805664,grad_norm: 0.9262780811797956, iteration: 25567
loss: 1.0072033405303955,grad_norm: 0.9550916414626749, iteration: 25568
loss: 1.0307515859603882,grad_norm: 0.999999779675214, iteration: 25569
loss: 0.9570513963699341,grad_norm: 0.8885601820670742, iteration: 25570
loss: 1.019310474395752,grad_norm: 0.9999991258284732, iteration: 25571
loss: 0.9719467163085938,grad_norm: 0.8760452243980982, iteration: 25572
loss: 1.0383342504501343,grad_norm: 0.99999909912791, iteration: 25573
loss: 1.0720728635787964,grad_norm: 0.9999996431454325, iteration: 25574
loss: 1.0473108291625977,grad_norm: 0.9999991151944605, iteration: 25575
loss: 1.015511393547058,grad_norm: 0.8338389250726533, iteration: 25576
loss: 0.9965805411338806,grad_norm: 0.9999991999808673, iteration: 25577
loss: 1.0315824747085571,grad_norm: 0.9999991659455736, iteration: 25578
loss: 0.9619327187538147,grad_norm: 0.9529132770429405, iteration: 25579
loss: 1.0178039073944092,grad_norm: 0.9999996298748509, iteration: 25580
loss: 1.032094955444336,grad_norm: 0.9958034516524413, iteration: 25581
loss: 1.0084227323532104,grad_norm: 0.9963337147676226, iteration: 25582
loss: 1.004257082939148,grad_norm: 0.9999993163800062, iteration: 25583
loss: 1.0165812969207764,grad_norm: 0.9999994838753856, iteration: 25584
loss: 1.0352128744125366,grad_norm: 0.9999991252033029, iteration: 25585
loss: 1.0174057483673096,grad_norm: 0.9999996962801629, iteration: 25586
loss: 1.0006160736083984,grad_norm: 0.9597022011384287, iteration: 25587
loss: 1.0044176578521729,grad_norm: 0.8447477650385606, iteration: 25588
loss: 1.0386477708816528,grad_norm: 0.9999989703544357, iteration: 25589
loss: 1.0061969757080078,grad_norm: 0.960960946835147, iteration: 25590
loss: 1.0586028099060059,grad_norm: 0.9999990794272938, iteration: 25591
loss: 0.9960010647773743,grad_norm: 0.8473968110084789, iteration: 25592
loss: 1.006828784942627,grad_norm: 0.8554004190209601, iteration: 25593
loss: 0.9700808525085449,grad_norm: 0.9999992510450939, iteration: 25594
loss: 1.0150519609451294,grad_norm: 0.9999991563825056, iteration: 25595
loss: 0.9939523339271545,grad_norm: 0.9999991070609546, iteration: 25596
loss: 1.0103132724761963,grad_norm: 0.8153082458389626, iteration: 25597
loss: 1.0422840118408203,grad_norm: 0.9999992442482328, iteration: 25598
loss: 0.9605205655097961,grad_norm: 0.8358656138217175, iteration: 25599
loss: 1.022017240524292,grad_norm: 0.9312763244514792, iteration: 25600
loss: 1.0017551183700562,grad_norm: 0.9255497871256888, iteration: 25601
loss: 1.0052704811096191,grad_norm: 0.9999995682024061, iteration: 25602
loss: 1.0121906995773315,grad_norm: 0.9999992089754883, iteration: 25603
loss: 1.0429303646087646,grad_norm: 0.999999210180307, iteration: 25604
loss: 0.9986791014671326,grad_norm: 0.9999992334691227, iteration: 25605
loss: 1.075931191444397,grad_norm: 0.9999991608008066, iteration: 25606
loss: 1.0169570446014404,grad_norm: 0.999999105625268, iteration: 25607
loss: 1.0380282402038574,grad_norm: 0.9999991294562065, iteration: 25608
loss: 1.0373964309692383,grad_norm: 0.9999992231850626, iteration: 25609
loss: 1.055949091911316,grad_norm: 0.9999992145901186, iteration: 25610
loss: 1.0160720348358154,grad_norm: 0.999999193920856, iteration: 25611
loss: 1.025670051574707,grad_norm: 0.9999992840081072, iteration: 25612
loss: 1.0470572710037231,grad_norm: 0.9999990992599614, iteration: 25613
loss: 0.9728940725326538,grad_norm: 0.9999991277431904, iteration: 25614
loss: 1.0300806760787964,grad_norm: 0.9999993606155044, iteration: 25615
loss: 0.9885171055793762,grad_norm: 0.9999989674379464, iteration: 25616
loss: 1.0012425184249878,grad_norm: 0.999999197450125, iteration: 25617
loss: 1.0341531038284302,grad_norm: 0.9999993445592534, iteration: 25618
loss: 1.0216128826141357,grad_norm: 0.9999991721909879, iteration: 25619
loss: 1.0347899198532104,grad_norm: 0.9315125037871821, iteration: 25620
loss: 1.0128806829452515,grad_norm: 0.983381607718182, iteration: 25621
loss: 1.0042595863342285,grad_norm: 0.999999110784365, iteration: 25622
loss: 0.9872359037399292,grad_norm: 0.9779239924718608, iteration: 25623
loss: 1.043736457824707,grad_norm: 0.9999990808645506, iteration: 25624
loss: 0.9831121563911438,grad_norm: 0.9999991058544391, iteration: 25625
loss: 1.0671359300613403,grad_norm: 0.99999914030026, iteration: 25626
loss: 1.0573738813400269,grad_norm: 0.9201594850171063, iteration: 25627
loss: 1.0590590238571167,grad_norm: 0.9999995830658138, iteration: 25628
loss: 1.0189740657806396,grad_norm: 0.8901958100506111, iteration: 25629
loss: 1.0992364883422852,grad_norm: 0.9999993533363688, iteration: 25630
loss: 0.9895721077919006,grad_norm: 0.9999991342880828, iteration: 25631
loss: 1.0104844570159912,grad_norm: 0.9999990841534315, iteration: 25632
loss: 0.9974169731140137,grad_norm: 0.9999990771616837, iteration: 25633
loss: 1.00566565990448,grad_norm: 0.8212011925102456, iteration: 25634
loss: 0.9908362627029419,grad_norm: 0.8958848855045277, iteration: 25635
loss: 0.9811252951622009,grad_norm: 0.9999990672196335, iteration: 25636
loss: 1.0111068487167358,grad_norm: 0.999999290190454, iteration: 25637
loss: 0.9745979905128479,grad_norm: 0.9940204265041829, iteration: 25638
loss: 1.006234049797058,grad_norm: 0.931609554001394, iteration: 25639
loss: 0.9870594143867493,grad_norm: 0.9999992837740154, iteration: 25640
loss: 0.99895179271698,grad_norm: 0.9999992999858841, iteration: 25641
loss: 1.1571913957595825,grad_norm: 0.99999990670946, iteration: 25642
loss: 1.0066410303115845,grad_norm: 0.9999990422610773, iteration: 25643
loss: 1.0874212980270386,grad_norm: 0.9999995433841203, iteration: 25644
loss: 1.0116891860961914,grad_norm: 0.9999991395268116, iteration: 25645
loss: 1.0346213579177856,grad_norm: 0.9999990102589279, iteration: 25646
loss: 1.0136476755142212,grad_norm: 0.9999990903341236, iteration: 25647
loss: 1.0171319246292114,grad_norm: 0.929493860892201, iteration: 25648
loss: 0.9958418011665344,grad_norm: 0.9999990638580019, iteration: 25649
loss: 1.0765728950500488,grad_norm: 0.9999992755856143, iteration: 25650
loss: 0.9875718355178833,grad_norm: 0.999999106255697, iteration: 25651
loss: 1.0193006992340088,grad_norm: 0.9881070796091067, iteration: 25652
loss: 1.006170392036438,grad_norm: 0.9609551161509219, iteration: 25653
loss: 1.0093638896942139,grad_norm: 0.9813665737484953, iteration: 25654
loss: 0.9611408710479736,grad_norm: 0.9999991737126841, iteration: 25655
loss: 0.9818792939186096,grad_norm: 0.999999302064385, iteration: 25656
loss: 1.0352799892425537,grad_norm: 0.9999990799159922, iteration: 25657
loss: 1.034596562385559,grad_norm: 0.8809011514892935, iteration: 25658
loss: 0.9674695134162903,grad_norm: 0.9999991118180389, iteration: 25659
loss: 0.9941717982292175,grad_norm: 0.9999990795893138, iteration: 25660
loss: 1.0516105890274048,grad_norm: 0.9999994752173205, iteration: 25661
loss: 1.0294395685195923,grad_norm: 0.9999992081412432, iteration: 25662
loss: 0.9792326092720032,grad_norm: 0.9999991142589957, iteration: 25663
loss: 1.0266845226287842,grad_norm: 0.9999994289412245, iteration: 25664
loss: 1.0352630615234375,grad_norm: 0.9999994085674082, iteration: 25665
loss: 0.9929122924804688,grad_norm: 0.9999990805895823, iteration: 25666
loss: 1.0335105657577515,grad_norm: 0.999999037385722, iteration: 25667
loss: 1.0476080179214478,grad_norm: 0.9999991909572681, iteration: 25668
loss: 1.0216736793518066,grad_norm: 0.9999990306867765, iteration: 25669
loss: 1.0567902326583862,grad_norm: 0.9999995821283606, iteration: 25670
loss: 0.9908236861228943,grad_norm: 0.9999990080553485, iteration: 25671
loss: 0.943819522857666,grad_norm: 0.943872651694108, iteration: 25672
loss: 1.0114628076553345,grad_norm: 0.9999990417509007, iteration: 25673
loss: 1.0624057054519653,grad_norm: 0.9999996181445528, iteration: 25674
loss: 1.0191056728363037,grad_norm: 0.9999990282092293, iteration: 25675
loss: 1.0043162107467651,grad_norm: 0.9320155789789135, iteration: 25676
loss: 1.0247787237167358,grad_norm: 0.9999992148913303, iteration: 25677
loss: 1.0314154624938965,grad_norm: 0.9999992474997691, iteration: 25678
loss: 0.9903119206428528,grad_norm: 0.9999992214484649, iteration: 25679
loss: 1.0362164974212646,grad_norm: 0.964633911247921, iteration: 25680
loss: 1.011006474494934,grad_norm: 0.9999991105808957, iteration: 25681
loss: 1.0147899389266968,grad_norm: 0.8498016677280061, iteration: 25682
loss: 0.9940031170845032,grad_norm: 0.9999990656933707, iteration: 25683
loss: 1.014249324798584,grad_norm: 0.9999995497779491, iteration: 25684
loss: 1.022159218788147,grad_norm: 0.999998949226854, iteration: 25685
loss: 1.0329580307006836,grad_norm: 0.9999993921936742, iteration: 25686
loss: 1.0247154235839844,grad_norm: 0.9999995166595175, iteration: 25687
loss: 1.0284234285354614,grad_norm: 0.9999991885407158, iteration: 25688
loss: 1.03443443775177,grad_norm: 0.9999991124215802, iteration: 25689
loss: 0.9977492094039917,grad_norm: 0.9999994687265386, iteration: 25690
loss: 1.0185807943344116,grad_norm: 0.8968148989986042, iteration: 25691
loss: 0.9611239433288574,grad_norm: 0.9999990775922082, iteration: 25692
loss: 1.0155359506607056,grad_norm: 0.9999991258953085, iteration: 25693
loss: 0.968825101852417,grad_norm: 0.9999990467562313, iteration: 25694
loss: 1.0191090106964111,grad_norm: 0.9874500338103575, iteration: 25695
loss: 0.9690531492233276,grad_norm: 0.8005440923552642, iteration: 25696
loss: 1.0348665714263916,grad_norm: 0.8945040507240448, iteration: 25697
loss: 1.0198633670806885,grad_norm: 0.9999992538605076, iteration: 25698
loss: 1.0204540491104126,grad_norm: 0.8847216495125645, iteration: 25699
loss: 1.0312528610229492,grad_norm: 0.9551850999493172, iteration: 25700
loss: 1.0401102304458618,grad_norm: 0.9533266102887702, iteration: 25701
loss: 1.013513207435608,grad_norm: 0.9999991649944067, iteration: 25702
loss: 1.0173190832138062,grad_norm: 0.9999991297114575, iteration: 25703
loss: 1.0292372703552246,grad_norm: 0.999999092073704, iteration: 25704
loss: 1.0197302103042603,grad_norm: 0.853584052012681, iteration: 25705
loss: 1.0025700330734253,grad_norm: 0.9999990504569628, iteration: 25706
loss: 1.0416042804718018,grad_norm: 0.9941101022434922, iteration: 25707
loss: 0.9938161373138428,grad_norm: 0.964395867000052, iteration: 25708
loss: 0.99607914686203,grad_norm: 0.9999991606770852, iteration: 25709
loss: 0.9885760545730591,grad_norm: 0.9999996000621043, iteration: 25710
loss: 1.1057586669921875,grad_norm: 0.9999995606459438, iteration: 25711
loss: 1.0050280094146729,grad_norm: 0.8856774017503181, iteration: 25712
loss: 0.9973678588867188,grad_norm: 0.9781251150061783, iteration: 25713
loss: 0.9965221285820007,grad_norm: 0.9131925521388917, iteration: 25714
loss: 1.0035431385040283,grad_norm: 0.9215270378081346, iteration: 25715
loss: 1.0358344316482544,grad_norm: 0.999999252425705, iteration: 25716
loss: 1.0462106466293335,grad_norm: 0.9999991349649145, iteration: 25717
loss: 1.0522959232330322,grad_norm: 0.9999989563527985, iteration: 25718
loss: 1.0348562002182007,grad_norm: 0.9999991224685185, iteration: 25719
loss: 1.0483007431030273,grad_norm: 0.999999083394465, iteration: 25720
loss: 1.0180201530456543,grad_norm: 0.9999993780852829, iteration: 25721
loss: 1.0284466743469238,grad_norm: 0.9449211901649377, iteration: 25722
loss: 0.9930353760719299,grad_norm: 0.9999991633804589, iteration: 25723
loss: 1.001068115234375,grad_norm: 0.999999032868663, iteration: 25724
loss: 1.0175663232803345,grad_norm: 0.9339019209357541, iteration: 25725
loss: 1.0317989587783813,grad_norm: 0.9999991787174138, iteration: 25726
loss: 1.0142985582351685,grad_norm: 0.999999070716267, iteration: 25727
loss: 1.0471173524856567,grad_norm: 0.9999992969855127, iteration: 25728
loss: 0.9868789315223694,grad_norm: 0.9999991907177055, iteration: 25729
loss: 1.042988657951355,grad_norm: 0.9999990745284524, iteration: 25730
loss: 1.0084965229034424,grad_norm: 0.9525701055849286, iteration: 25731
loss: 1.0170390605926514,grad_norm: 0.9999991603188856, iteration: 25732
loss: 1.014887809753418,grad_norm: 0.9999990596033841, iteration: 25733
loss: 1.0241519212722778,grad_norm: 0.9999990968011246, iteration: 25734
loss: 0.998055100440979,grad_norm: 0.9999991346556837, iteration: 25735
loss: 0.9927486777305603,grad_norm: 0.9999991366365406, iteration: 25736
loss: 0.9959709644317627,grad_norm: 0.9424112919254098, iteration: 25737
loss: 1.0289736986160278,grad_norm: 0.9999993155906822, iteration: 25738
loss: 0.9817544221878052,grad_norm: 0.9999990128582782, iteration: 25739
loss: 0.9929612874984741,grad_norm: 0.9999990998207944, iteration: 25740
loss: 1.1432536840438843,grad_norm: 0.9999994963646872, iteration: 25741
loss: 1.0138185024261475,grad_norm: 0.9582461026520612, iteration: 25742
loss: 1.0225300788879395,grad_norm: 0.9999995838655299, iteration: 25743
loss: 0.9801857471466064,grad_norm: 0.9748489093518442, iteration: 25744
loss: 1.046160101890564,grad_norm: 0.9999995177701797, iteration: 25745
loss: 0.9691627621650696,grad_norm: 0.9999990585079994, iteration: 25746
loss: 1.0293123722076416,grad_norm: 0.9144961890337876, iteration: 25747
loss: 1.0168951749801636,grad_norm: 0.9999996575480292, iteration: 25748
loss: 1.0294269323349,grad_norm: 0.9887660365912393, iteration: 25749
loss: 0.9833172559738159,grad_norm: 0.940527460891087, iteration: 25750
loss: 1.0242795944213867,grad_norm: 0.9999994371397688, iteration: 25751
loss: 0.9998818039894104,grad_norm: 0.9999990575706397, iteration: 25752
loss: 1.0338680744171143,grad_norm: 0.9999991597491144, iteration: 25753
loss: 0.9816288948059082,grad_norm: 0.8890367714401819, iteration: 25754
loss: 1.0306754112243652,grad_norm: 0.999999511598428, iteration: 25755
loss: 1.0256879329681396,grad_norm: 0.9999990327152908, iteration: 25756
loss: 1.0025722980499268,grad_norm: 0.9999996804783243, iteration: 25757
loss: 1.0160998106002808,grad_norm: 0.999999147803055, iteration: 25758
loss: 0.9820534586906433,grad_norm: 0.9999990118177355, iteration: 25759
loss: 1.0404455661773682,grad_norm: 0.922220156328383, iteration: 25760
loss: 0.9715281128883362,grad_norm: 0.9186796483161367, iteration: 25761
loss: 0.9715613722801208,grad_norm: 0.9380566218862799, iteration: 25762
loss: 1.015263557434082,grad_norm: 0.999999159303758, iteration: 25763
loss: 1.0317749977111816,grad_norm: 0.9999989140204535, iteration: 25764
loss: 1.0470972061157227,grad_norm: 0.9999996338080253, iteration: 25765
loss: 1.0567823648452759,grad_norm: 0.9999991944430643, iteration: 25766
loss: 1.0152689218521118,grad_norm: 0.998151292500696, iteration: 25767
loss: 0.9973023533821106,grad_norm: 0.9610219344531986, iteration: 25768
loss: 1.0148345232009888,grad_norm: 0.9999992145479342, iteration: 25769
loss: 1.0265252590179443,grad_norm: 0.9086083704453102, iteration: 25770
loss: 0.9758144021034241,grad_norm: 0.9999992658388069, iteration: 25771
loss: 0.9963547587394714,grad_norm: 0.9134056506385709, iteration: 25772
loss: 1.016202688217163,grad_norm: 0.9117603259585944, iteration: 25773
loss: 1.0731315612792969,grad_norm: 0.9999997025620306, iteration: 25774
loss: 1.0273350477218628,grad_norm: 0.9999992337513989, iteration: 25775
loss: 1.0396027565002441,grad_norm: 0.9999994882435823, iteration: 25776
loss: 1.0354641675949097,grad_norm: 0.9999994583473758, iteration: 25777
loss: 1.0011188983917236,grad_norm: 0.9999992109876723, iteration: 25778
loss: 1.018638014793396,grad_norm: 0.9444580597697657, iteration: 25779
loss: 0.99239182472229,grad_norm: 0.8398939551280719, iteration: 25780
loss: 0.9895263910293579,grad_norm: 0.9999993262753551, iteration: 25781
loss: 1.032273292541504,grad_norm: 0.9999992550522956, iteration: 25782
loss: 0.9844797849655151,grad_norm: 0.9999991153442458, iteration: 25783
loss: 0.9985468983650208,grad_norm: 0.9999992212957385, iteration: 25784
loss: 0.9806535840034485,grad_norm: 0.9999991018193742, iteration: 25785
loss: 0.9790509343147278,grad_norm: 0.9596356691299537, iteration: 25786
loss: 1.0342986583709717,grad_norm: 0.999999288171445, iteration: 25787
loss: 1.0293020009994507,grad_norm: 0.9643278395406203, iteration: 25788
loss: 1.0259958505630493,grad_norm: 0.9221594584186886, iteration: 25789
loss: 0.9844008088111877,grad_norm: 0.9999989638653396, iteration: 25790
loss: 1.020580530166626,grad_norm: 0.999999162566648, iteration: 25791
loss: 1.0329742431640625,grad_norm: 0.9468006291901162, iteration: 25792
loss: 0.9674658179283142,grad_norm: 0.9999991397338167, iteration: 25793
loss: 1.0556371212005615,grad_norm: 0.9476401021095484, iteration: 25794
loss: 0.985210657119751,grad_norm: 0.9890240710823968, iteration: 25795
loss: 1.034746766090393,grad_norm: 0.9999997151886473, iteration: 25796
loss: 1.065798282623291,grad_norm: 0.999999339355139, iteration: 25797
loss: 1.0227134227752686,grad_norm: 0.887155177452016, iteration: 25798
loss: 1.0076481103897095,grad_norm: 0.8487450289136106, iteration: 25799
loss: 1.0029019117355347,grad_norm: 0.9999992037392288, iteration: 25800
loss: 1.0181820392608643,grad_norm: 0.9999991625582219, iteration: 25801
loss: 1.013510823249817,grad_norm: 0.8298962393582154, iteration: 25802
loss: 1.0325552225112915,grad_norm: 0.9612172357531695, iteration: 25803
loss: 1.017228126525879,grad_norm: 0.8322332422089493, iteration: 25804
loss: 0.9748393297195435,grad_norm: 0.9999991630003815, iteration: 25805
loss: 1.0274137258529663,grad_norm: 0.9999992092561525, iteration: 25806
loss: 0.9962045550346375,grad_norm: 0.99999900359255, iteration: 25807
loss: 0.969190776348114,grad_norm: 0.8667902749476089, iteration: 25808
loss: 0.9826522469520569,grad_norm: 0.999999117281048, iteration: 25809
loss: 0.9946295022964478,grad_norm: 0.9734288408294706, iteration: 25810
loss: 1.0462640523910522,grad_norm: 0.9999991930931773, iteration: 25811
loss: 0.991228461265564,grad_norm: 0.9528749896408507, iteration: 25812
loss: 1.033142328262329,grad_norm: 0.9999990734974159, iteration: 25813
loss: 0.9898442625999451,grad_norm: 0.9999991785268776, iteration: 25814
loss: 1.0057917833328247,grad_norm: 0.9975232706138761, iteration: 25815
loss: 1.0085195302963257,grad_norm: 0.9999993429946631, iteration: 25816
loss: 1.0296698808670044,grad_norm: 0.9896931661351557, iteration: 25817
loss: 0.9643577933311462,grad_norm: 0.936440095761426, iteration: 25818
loss: 1.0310683250427246,grad_norm: 0.9999991393141269, iteration: 25819
loss: 1.011487603187561,grad_norm: 0.9999989946942388, iteration: 25820
loss: 1.0403273105621338,grad_norm: 0.9999992343739963, iteration: 25821
loss: 1.0072604417800903,grad_norm: 0.8779669423294619, iteration: 25822
loss: 1.0180764198303223,grad_norm: 0.9999990525401657, iteration: 25823
loss: 0.9859108924865723,grad_norm: 0.9396870739945811, iteration: 25824
loss: 1.0130274295806885,grad_norm: 0.9999995184984396, iteration: 25825
loss: 0.999670684337616,grad_norm: 0.9316757849133089, iteration: 25826
loss: 1.0280531644821167,grad_norm: 0.999999082949691, iteration: 25827
loss: 0.9852479696273804,grad_norm: 0.9999991830809181, iteration: 25828
loss: 1.0373363494873047,grad_norm: 0.9999992019439372, iteration: 25829
loss: 0.9925488233566284,grad_norm: 0.9542108659775038, iteration: 25830
loss: 1.0328636169433594,grad_norm: 0.9072871605685683, iteration: 25831
loss: 1.007805585861206,grad_norm: 0.9999992829150589, iteration: 25832
loss: 1.049372911453247,grad_norm: 0.9999995340517236, iteration: 25833
loss: 0.9751084446907043,grad_norm: 0.9093633470509627, iteration: 25834
loss: 1.0030932426452637,grad_norm: 0.9114107550569748, iteration: 25835
loss: 0.9934016466140747,grad_norm: 0.9999990809932353, iteration: 25836
loss: 1.012795329093933,grad_norm: 0.9999992029942216, iteration: 25837
loss: 1.000717043876648,grad_norm: 0.9881008018249535, iteration: 25838
loss: 1.028275728225708,grad_norm: 0.9999994225431011, iteration: 25839
loss: 1.02472984790802,grad_norm: 0.855112645028001, iteration: 25840
loss: 1.0053009986877441,grad_norm: 0.999999376140182, iteration: 25841
loss: 0.9990808367729187,grad_norm: 0.9999993029183893, iteration: 25842
loss: 0.9926343560218811,grad_norm: 0.9874616378236042, iteration: 25843
loss: 1.0301127433776855,grad_norm: 0.8722956335764517, iteration: 25844
loss: 0.9951459765434265,grad_norm: 0.9940776867230942, iteration: 25845
loss: 0.9727518558502197,grad_norm: 0.999999101517663, iteration: 25846
loss: 1.0133202075958252,grad_norm: 0.9999996771752886, iteration: 25847
loss: 1.0067428350448608,grad_norm: 0.8568254297621201, iteration: 25848
loss: 1.0178910493850708,grad_norm: 0.926063092640266, iteration: 25849
loss: 1.0010894536972046,grad_norm: 0.9999991255216342, iteration: 25850
loss: 1.014500379562378,grad_norm: 0.999999527600579, iteration: 25851
loss: 1.0250558853149414,grad_norm: 0.9999997527173312, iteration: 25852
loss: 0.9831322431564331,grad_norm: 0.999998942658998, iteration: 25853
loss: 1.0409398078918457,grad_norm: 0.9999991721338486, iteration: 25854
loss: 1.0417296886444092,grad_norm: 0.9433389870923091, iteration: 25855
loss: 1.0057231187820435,grad_norm: 0.8578745302311814, iteration: 25856
loss: 1.0507866144180298,grad_norm: 0.9999991364644959, iteration: 25857
loss: 0.9936254024505615,grad_norm: 0.9999991753724693, iteration: 25858
loss: 1.0035208463668823,grad_norm: 0.9999994717870726, iteration: 25859
loss: 1.0009605884552002,grad_norm: 0.7974237210976759, iteration: 25860
loss: 1.0294151306152344,grad_norm: 0.8685655685731919, iteration: 25861
loss: 1.0408991575241089,grad_norm: 0.9999989148456986, iteration: 25862
loss: 1.0017144680023193,grad_norm: 0.999999184952207, iteration: 25863
loss: 0.9790935516357422,grad_norm: 0.773442120147932, iteration: 25864
loss: 1.0146492719650269,grad_norm: 0.8964067158347409, iteration: 25865
loss: 0.998199462890625,grad_norm: 0.9756565267014952, iteration: 25866
loss: 1.0108946561813354,grad_norm: 0.9999997070726574, iteration: 25867
loss: 1.0410884618759155,grad_norm: 0.9999995538725938, iteration: 25868
loss: 1.0610265731811523,grad_norm: 0.9999992219515785, iteration: 25869
loss: 1.0386439561843872,grad_norm: 0.9999997920544829, iteration: 25870
loss: 0.9922508001327515,grad_norm: 0.9504018426172551, iteration: 25871
loss: 1.041340947151184,grad_norm: 0.8604020113801496, iteration: 25872
loss: 1.0262929201126099,grad_norm: 0.9999992280403974, iteration: 25873
loss: 1.0119966268539429,grad_norm: 0.9999993689566306, iteration: 25874
loss: 1.0022501945495605,grad_norm: 0.8379972757842408, iteration: 25875
loss: 1.0354470014572144,grad_norm: 0.999999043997068, iteration: 25876
loss: 0.9705286026000977,grad_norm: 0.966526296822053, iteration: 25877
loss: 1.0331542491912842,grad_norm: 0.9999991763972413, iteration: 25878
loss: 1.0171846151351929,grad_norm: 0.9914860502484092, iteration: 25879
loss: 0.9989460706710815,grad_norm: 0.8786050802498656, iteration: 25880
loss: 0.9986717104911804,grad_norm: 0.9999990247638443, iteration: 25881
loss: 1.0016862154006958,grad_norm: 0.8555989470192001, iteration: 25882
loss: 1.0068466663360596,grad_norm: 0.9860693274505614, iteration: 25883
loss: 1.035431981086731,grad_norm: 0.999999651491, iteration: 25884
loss: 0.997251033782959,grad_norm: 0.9999991183090717, iteration: 25885
loss: 1.0544347763061523,grad_norm: 0.9999994213228782, iteration: 25886
loss: 1.0207005739212036,grad_norm: 0.9273798593078768, iteration: 25887
loss: 0.9889340996742249,grad_norm: 0.8176499035902087, iteration: 25888
loss: 1.0378611087799072,grad_norm: 0.9999990081268638, iteration: 25889
loss: 1.0364776849746704,grad_norm: 0.9733625006395021, iteration: 25890
loss: 1.0112552642822266,grad_norm: 0.999999109197999, iteration: 25891
loss: 1.0323498249053955,grad_norm: 0.8819097886321821, iteration: 25892
loss: 1.0200203657150269,grad_norm: 0.999999575943658, iteration: 25893
loss: 1.0553901195526123,grad_norm: 0.9999990901688312, iteration: 25894
loss: 1.0444499254226685,grad_norm: 0.9999990589346645, iteration: 25895
loss: 1.0091322660446167,grad_norm: 0.9652995312477896, iteration: 25896
loss: 1.0099667310714722,grad_norm: 0.999999118353141, iteration: 25897
loss: 0.9835279583930969,grad_norm: 0.9929792104420934, iteration: 25898
loss: 1.0225157737731934,grad_norm: 0.9999993150548301, iteration: 25899
loss: 1.0110875368118286,grad_norm: 0.9999992721117676, iteration: 25900
loss: 1.0026862621307373,grad_norm: 0.9999992563432311, iteration: 25901
loss: 0.9948817491531372,grad_norm: 0.9383578933927691, iteration: 25902
loss: 0.9914500713348389,grad_norm: 0.999999062123879, iteration: 25903
loss: 1.036975383758545,grad_norm: 0.9999991789757674, iteration: 25904
loss: 0.9760395288467407,grad_norm: 0.8876227288328268, iteration: 25905
loss: 0.9985605478286743,grad_norm: 0.999999254807947, iteration: 25906
loss: 1.0494763851165771,grad_norm: 0.9835702380181117, iteration: 25907
loss: 0.9788479208946228,grad_norm: 0.9879016829251996, iteration: 25908
loss: 0.9896935820579529,grad_norm: 0.9999991011871034, iteration: 25909
loss: 1.058654546737671,grad_norm: 0.9999994139377915, iteration: 25910
loss: 1.0447852611541748,grad_norm: 0.9999992848401275, iteration: 25911
loss: 1.0393866300582886,grad_norm: 0.9999994190322883, iteration: 25912
loss: 0.9814307689666748,grad_norm: 0.9999990526859636, iteration: 25913
loss: 0.9935008883476257,grad_norm: 0.9640973967955168, iteration: 25914
loss: 1.0088709592819214,grad_norm: 0.9999991048719669, iteration: 25915
loss: 1.0730425119400024,grad_norm: 0.9999996603084339, iteration: 25916
loss: 0.9889311194419861,grad_norm: 0.999999197090504, iteration: 25917
loss: 0.9784256815910339,grad_norm: 0.95481314683556, iteration: 25918
loss: 1.024509072303772,grad_norm: 0.9999991246440865, iteration: 25919
loss: 1.023749828338623,grad_norm: 0.9999989793017802, iteration: 25920
loss: 1.0319669246673584,grad_norm: 0.8715938388993588, iteration: 25921
loss: 1.01865816116333,grad_norm: 0.9999993402550083, iteration: 25922
loss: 1.0371845960617065,grad_norm: 0.9999994820602094, iteration: 25923
loss: 1.0188634395599365,grad_norm: 0.9999989384844903, iteration: 25924
loss: 1.0306679010391235,grad_norm: 0.9999993219680765, iteration: 25925
loss: 0.9955754280090332,grad_norm: 0.9408524996947539, iteration: 25926
loss: 1.0099958181381226,grad_norm: 0.9999991553146651, iteration: 25927
loss: 1.0045605897903442,grad_norm: 0.9999990884026887, iteration: 25928
loss: 1.0186432600021362,grad_norm: 0.8998309679850637, iteration: 25929
loss: 0.9907585382461548,grad_norm: 0.9999991435126373, iteration: 25930
loss: 1.027594804763794,grad_norm: 0.8438921882436266, iteration: 25931
loss: 1.039431095123291,grad_norm: 0.9999989993060721, iteration: 25932
loss: 0.9852940440177917,grad_norm: 0.8178123827976068, iteration: 25933
loss: 1.0108675956726074,grad_norm: 0.8911016296392698, iteration: 25934
loss: 1.0121575593948364,grad_norm: 0.9999991663568769, iteration: 25935
loss: 0.9974182844161987,grad_norm: 0.9999991004019475, iteration: 25936
loss: 1.0108935832977295,grad_norm: 0.8883119960813347, iteration: 25937
loss: 1.0058209896087646,grad_norm: 0.9999991872696493, iteration: 25938
loss: 1.008387565612793,grad_norm: 0.8431369856411438, iteration: 25939
loss: 1.0135514736175537,grad_norm: 0.9999990971174106, iteration: 25940
loss: 1.0198557376861572,grad_norm: 0.7947451764526591, iteration: 25941
loss: 0.9988046884536743,grad_norm: 0.9999991537833937, iteration: 25942
loss: 1.0348849296569824,grad_norm: 0.9719490068667116, iteration: 25943
loss: 1.0158469676971436,grad_norm: 0.9999992671286797, iteration: 25944
loss: 1.0368338823318481,grad_norm: 0.9604430806626517, iteration: 25945
loss: 1.0547828674316406,grad_norm: 0.9999989887727729, iteration: 25946
loss: 1.0639092922210693,grad_norm: 0.9999993078725575, iteration: 25947
loss: 0.986296534538269,grad_norm: 0.999999069225827, iteration: 25948
loss: 0.9860219955444336,grad_norm: 0.9858647943351942, iteration: 25949
loss: 1.014970064163208,grad_norm: 0.9999997577342237, iteration: 25950
loss: 1.050864577293396,grad_norm: 0.9999991564999007, iteration: 25951
loss: 0.973962128162384,grad_norm: 0.9999996439834687, iteration: 25952
loss: 1.0444608926773071,grad_norm: 0.9999990025810336, iteration: 25953
loss: 0.9834207892417908,grad_norm: 0.9999991191170483, iteration: 25954
loss: 0.9817364811897278,grad_norm: 0.9672568284074418, iteration: 25955
loss: 1.0377346277236938,grad_norm: 0.9999992639406889, iteration: 25956
loss: 1.014973521232605,grad_norm: 0.9790417033734312, iteration: 25957
loss: 1.0065542459487915,grad_norm: 0.9999992222924573, iteration: 25958
loss: 0.9890443682670593,grad_norm: 0.9999990577072796, iteration: 25959
loss: 1.0024452209472656,grad_norm: 0.9762703599057221, iteration: 25960
loss: 0.9883648753166199,grad_norm: 0.9713039204689641, iteration: 25961
loss: 1.0074228048324585,grad_norm: 0.9999990001517318, iteration: 25962
loss: 0.9903928637504578,grad_norm: 0.9999993447507172, iteration: 25963
loss: 0.9868568778038025,grad_norm: 0.9999989693823864, iteration: 25964
loss: 1.0365769863128662,grad_norm: 0.9571521456205424, iteration: 25965
loss: 0.9810067415237427,grad_norm: 0.8943985568997262, iteration: 25966
loss: 1.0401686429977417,grad_norm: 0.9637596116482252, iteration: 25967
loss: 1.0298240184783936,grad_norm: 0.9999991380560763, iteration: 25968
loss: 1.0223116874694824,grad_norm: 0.9999990813671308, iteration: 25969
loss: 1.0366129875183105,grad_norm: 0.9038849985917173, iteration: 25970
loss: 0.9708652496337891,grad_norm: 0.9999991070824618, iteration: 25971
loss: 1.016237497329712,grad_norm: 0.9261370939295038, iteration: 25972
loss: 1.0267794132232666,grad_norm: 0.9752015466922251, iteration: 25973
loss: 1.0475901365280151,grad_norm: 0.9999992183653665, iteration: 25974
loss: 1.0260225534439087,grad_norm: 0.9999991397162131, iteration: 25975
loss: 1.0181654691696167,grad_norm: 0.8594304699276163, iteration: 25976
loss: 1.005735158920288,grad_norm: 0.9999990352285856, iteration: 25977
loss: 0.9977741241455078,grad_norm: 0.9999992065706923, iteration: 25978
loss: 1.0463647842407227,grad_norm: 0.9999991848369041, iteration: 25979
loss: 1.0278819799423218,grad_norm: 0.8371795725415146, iteration: 25980
loss: 1.0299910306930542,grad_norm: 0.9614311764494949, iteration: 25981
loss: 0.9985023736953735,grad_norm: 0.9963098316002504, iteration: 25982
loss: 0.992960512638092,grad_norm: 0.9999992246981907, iteration: 25983
loss: 1.0251468420028687,grad_norm: 0.9999990915345471, iteration: 25984
loss: 1.0762767791748047,grad_norm: 0.9999996225552575, iteration: 25985
loss: 1.0344946384429932,grad_norm: 0.9293820318940826, iteration: 25986
loss: 1.0089677572250366,grad_norm: 0.9999990361908949, iteration: 25987
loss: 1.0046517848968506,grad_norm: 0.9999990451438943, iteration: 25988
loss: 1.035029411315918,grad_norm: 0.9999992265733685, iteration: 25989
loss: 1.050323724746704,grad_norm: 0.999999374612185, iteration: 25990
loss: 0.987758457660675,grad_norm: 0.9799350319469525, iteration: 25991
loss: 1.0000739097595215,grad_norm: 0.9999990422036499, iteration: 25992
loss: 1.0375081300735474,grad_norm: 0.9999991028567488, iteration: 25993
loss: 1.0467582941055298,grad_norm: 0.999999296403123, iteration: 25994
loss: 0.9989083409309387,grad_norm: 0.9879235894523076, iteration: 25995
loss: 1.0152076482772827,grad_norm: 0.9999990445943714, iteration: 25996
loss: 1.016453742980957,grad_norm: 0.9999992155955946, iteration: 25997
loss: 0.9892139434814453,grad_norm: 0.9999991344540486, iteration: 25998
loss: 0.9989523887634277,grad_norm: 0.9312442633008299, iteration: 25999
loss: 0.9859281182289124,grad_norm: 0.9999992214253896, iteration: 26000
loss: 1.0100054740905762,grad_norm: 0.9999991503236833, iteration: 26001
loss: 1.0070995092391968,grad_norm: 0.9999990632506882, iteration: 26002
loss: 1.0218713283538818,grad_norm: 0.9999991568305049, iteration: 26003
loss: 1.0528478622436523,grad_norm: 0.9999992207710764, iteration: 26004
loss: 1.01572585105896,grad_norm: 0.89434561698991, iteration: 26005
loss: 0.9800639748573303,grad_norm: 0.9144850136624246, iteration: 26006
loss: 1.036698579788208,grad_norm: 0.9807139465793123, iteration: 26007
loss: 1.0401873588562012,grad_norm: 0.9999992212244418, iteration: 26008
loss: 0.9879150390625,grad_norm: 0.9999991772712411, iteration: 26009
loss: 0.9835054278373718,grad_norm: 0.9999993741042073, iteration: 26010
loss: 1.0005730390548706,grad_norm: 0.9999992925555032, iteration: 26011
loss: 1.0058846473693848,grad_norm: 0.9999993791314704, iteration: 26012
loss: 1.0118293762207031,grad_norm: 0.999999221957779, iteration: 26013
loss: 1.0145808458328247,grad_norm: 0.9999990105468878, iteration: 26014
loss: 1.0296343564987183,grad_norm: 0.9124057996184385, iteration: 26015
loss: 1.0135959386825562,grad_norm: 0.9656903531937091, iteration: 26016
loss: 1.046535611152649,grad_norm: 0.9999990645669833, iteration: 26017
loss: 1.046062707901001,grad_norm: 0.8621869732871318, iteration: 26018
loss: 1.0069735050201416,grad_norm: 0.9248270102684503, iteration: 26019
loss: 1.0053606033325195,grad_norm: 0.8343587615935596, iteration: 26020
loss: 1.0947176218032837,grad_norm: 0.9999992648162918, iteration: 26021
loss: 1.0388836860656738,grad_norm: 0.9407341146394762, iteration: 26022
loss: 1.0006294250488281,grad_norm: 0.9922498929363645, iteration: 26023
loss: 0.995857834815979,grad_norm: 0.9503064506428911, iteration: 26024
loss: 1.0318502187728882,grad_norm: 0.9999990814267089, iteration: 26025
loss: 1.0480587482452393,grad_norm: 0.9999991923803422, iteration: 26026
loss: 1.0166959762573242,grad_norm: 0.9999995040857674, iteration: 26027
loss: 1.0302084684371948,grad_norm: 0.8497317887642867, iteration: 26028
loss: 0.997435450553894,grad_norm: 0.9547178891527828, iteration: 26029
loss: 1.0040931701660156,grad_norm: 0.9999992565611493, iteration: 26030
loss: 1.0287539958953857,grad_norm: 0.999999081931324, iteration: 26031
loss: 1.0026640892028809,grad_norm: 0.8550695031497825, iteration: 26032
loss: 1.0092601776123047,grad_norm: 0.9999992592878851, iteration: 26033
loss: 1.0019420385360718,grad_norm: 0.9999990653814727, iteration: 26034
loss: 1.0170656442642212,grad_norm: 0.987997364805801, iteration: 26035
loss: 1.0155037641525269,grad_norm: 0.99999947021524, iteration: 26036
loss: 0.9911237955093384,grad_norm: 0.8825011041028645, iteration: 26037
loss: 0.9702560305595398,grad_norm: 0.999999184036288, iteration: 26038
loss: 1.0059524774551392,grad_norm: 0.8952303018558914, iteration: 26039
loss: 0.9927915930747986,grad_norm: 0.9999997277504685, iteration: 26040
loss: 1.0173375606536865,grad_norm: 0.9611911246204919, iteration: 26041
loss: 1.0372673273086548,grad_norm: 0.9760007666307949, iteration: 26042
loss: 0.9840558171272278,grad_norm: 0.9353190854515631, iteration: 26043
loss: 1.0098978281021118,grad_norm: 0.9999990413056838, iteration: 26044
loss: 1.004331350326538,grad_norm: 0.9595885732030304, iteration: 26045
loss: 0.9832534790039062,grad_norm: 0.9999998940070465, iteration: 26046
loss: 1.0262954235076904,grad_norm: 0.999999118151681, iteration: 26047
loss: 0.9673851132392883,grad_norm: 0.8382184054579521, iteration: 26048
loss: 0.9762877821922302,grad_norm: 0.9999989671654703, iteration: 26049
loss: 1.0302743911743164,grad_norm: 0.9822691616306126, iteration: 26050
loss: 1.0155889987945557,grad_norm: 0.999999167434436, iteration: 26051
loss: 1.0067853927612305,grad_norm: 0.9433055120510927, iteration: 26052
loss: 1.0378724336624146,grad_norm: 0.9891241069514035, iteration: 26053
loss: 1.0323115587234497,grad_norm: 0.9999992497797707, iteration: 26054
loss: 1.0412273406982422,grad_norm: 0.9999990688116491, iteration: 26055
loss: 1.0171302556991577,grad_norm: 0.8154511942869485, iteration: 26056
loss: 1.030734658241272,grad_norm: 0.9999989885265467, iteration: 26057
loss: 0.9829561710357666,grad_norm: 0.9565650420014944, iteration: 26058
loss: 1.0223780870437622,grad_norm: 0.9999992028488742, iteration: 26059
loss: 1.0485460758209229,grad_norm: 0.9999995204087617, iteration: 26060
loss: 1.0252612829208374,grad_norm: 0.9999991115306831, iteration: 26061
loss: 1.0093605518341064,grad_norm: 0.9999992260234168, iteration: 26062
loss: 1.0369105339050293,grad_norm: 0.9999991539291112, iteration: 26063
loss: 0.9828168153762817,grad_norm: 0.9999993033759126, iteration: 26064
loss: 1.0537126064300537,grad_norm: 0.8820090339907873, iteration: 26065
loss: 0.9935768246650696,grad_norm: 0.999999151498381, iteration: 26066
loss: 1.0415027141571045,grad_norm: 0.9102732992191521, iteration: 26067
loss: 0.9609228372573853,grad_norm: 0.9999992047405492, iteration: 26068
loss: 1.0291765928268433,grad_norm: 0.8256231812294494, iteration: 26069
loss: 1.0539265871047974,grad_norm: 0.9999993183283571, iteration: 26070
loss: 1.003549337387085,grad_norm: 0.9417329724215647, iteration: 26071
loss: 1.010872483253479,grad_norm: 0.9999989617479905, iteration: 26072
loss: 1.049646258354187,grad_norm: 0.9999991176639554, iteration: 26073
loss: 1.0018386840820312,grad_norm: 0.99999918548044, iteration: 26074
loss: 0.9999171495437622,grad_norm: 0.999999008243072, iteration: 26075
loss: 0.9913537502288818,grad_norm: 0.8907962334358835, iteration: 26076
loss: 1.011953592300415,grad_norm: 0.8925637655640895, iteration: 26077
loss: 0.9735810160636902,grad_norm: 0.8644616992562565, iteration: 26078
loss: 1.018363356590271,grad_norm: 0.9999990575279541, iteration: 26079
loss: 1.001460313796997,grad_norm: 0.9266784880592749, iteration: 26080
loss: 1.0267157554626465,grad_norm: 0.9288059901597749, iteration: 26081
loss: 0.9844022393226624,grad_norm: 0.7915643947979224, iteration: 26082
loss: 1.0157901048660278,grad_norm: 0.9999993181927902, iteration: 26083
loss: 0.9700392484664917,grad_norm: 0.9999992371102479, iteration: 26084
loss: 1.0026429891586304,grad_norm: 0.9376813160941109, iteration: 26085
loss: 1.0307865142822266,grad_norm: 0.7936052772743051, iteration: 26086
loss: 1.0350608825683594,grad_norm: 0.84281089717695, iteration: 26087
loss: 1.0131431818008423,grad_norm: 0.8742062622316922, iteration: 26088
loss: 1.005030870437622,grad_norm: 0.9974781392704626, iteration: 26089
loss: 1.0199419260025024,grad_norm: 0.999999065953368, iteration: 26090
loss: 0.9902521967887878,grad_norm: 0.870199599173098, iteration: 26091
loss: 1.0609667301177979,grad_norm: 0.9999990737157878, iteration: 26092
loss: 0.9989281892776489,grad_norm: 0.9999996291773516, iteration: 26093
loss: 1.0055608749389648,grad_norm: 0.896755976415276, iteration: 26094
loss: 0.9897329211235046,grad_norm: 0.914396834763383, iteration: 26095
loss: 1.0449672937393188,grad_norm: 0.9999994215096565, iteration: 26096
loss: 1.0295379161834717,grad_norm: 0.9999991700396174, iteration: 26097
loss: 1.02644681930542,grad_norm: 0.9906982227621188, iteration: 26098
loss: 1.037973165512085,grad_norm: 0.8408295527895371, iteration: 26099
loss: 0.9947240352630615,grad_norm: 0.9701732385522585, iteration: 26100
loss: 0.9826279282569885,grad_norm: 0.9280768328488471, iteration: 26101
loss: 1.0378369092941284,grad_norm: 0.9512625655302882, iteration: 26102
loss: 1.0401440858840942,grad_norm: 0.9999993113768195, iteration: 26103
loss: 1.0021626949310303,grad_norm: 0.9231339472643519, iteration: 26104
loss: 1.0069433450698853,grad_norm: 0.9930228873913062, iteration: 26105
loss: 0.9739712476730347,grad_norm: 0.7808231011332887, iteration: 26106
loss: 0.9952943325042725,grad_norm: 0.9218979567722256, iteration: 26107
loss: 1.0193743705749512,grad_norm: 0.9999991634727451, iteration: 26108
loss: 0.9808948040008545,grad_norm: 0.8735596214750874, iteration: 26109
loss: 1.0275931358337402,grad_norm: 0.9999993338769432, iteration: 26110
loss: 0.9897351264953613,grad_norm: 0.9999990678422789, iteration: 26111
loss: 0.9941724538803101,grad_norm: 0.999999290245231, iteration: 26112
loss: 1.0300790071487427,grad_norm: 0.9999991595833917, iteration: 26113
loss: 0.9819982051849365,grad_norm: 0.958108226902332, iteration: 26114
loss: 1.04632568359375,grad_norm: 0.9999991871998096, iteration: 26115
loss: 1.0031238794326782,grad_norm: 0.8943603783425101, iteration: 26116
loss: 1.0065276622772217,grad_norm: 0.8955902303618666, iteration: 26117
loss: 1.0279953479766846,grad_norm: 0.9332404330939853, iteration: 26118
loss: 1.0011099576950073,grad_norm: 0.886061883735031, iteration: 26119
loss: 1.0090287923812866,grad_norm: 0.8871225374021977, iteration: 26120
loss: 0.9751260876655579,grad_norm: 0.9999990651084938, iteration: 26121
loss: 0.982245922088623,grad_norm: 0.999999068347754, iteration: 26122
loss: 1.0688070058822632,grad_norm: 0.9999992766613595, iteration: 26123
loss: 0.9756838083267212,grad_norm: 0.999999109373241, iteration: 26124
loss: 0.9674015045166016,grad_norm: 0.9999992249633738, iteration: 26125
loss: 0.986717700958252,grad_norm: 0.9273546078511075, iteration: 26126
loss: 1.0184612274169922,grad_norm: 0.999999161936666, iteration: 26127
loss: 0.9859875440597534,grad_norm: 0.9899915933179613, iteration: 26128
loss: 0.9942762851715088,grad_norm: 0.920626469399019, iteration: 26129
loss: 0.9672505259513855,grad_norm: 0.999999045306464, iteration: 26130
loss: 0.9987404346466064,grad_norm: 0.9630113791104699, iteration: 26131
loss: 0.9830679893493652,grad_norm: 0.9120021745271276, iteration: 26132
loss: 1.0124036073684692,grad_norm: 0.9999989830210345, iteration: 26133
loss: 0.980017364025116,grad_norm: 0.9999992133854032, iteration: 26134
loss: 0.9811693429946899,grad_norm: 0.9307015322235725, iteration: 26135
loss: 0.9998756647109985,grad_norm: 0.9140619838054497, iteration: 26136
loss: 0.9911069273948669,grad_norm: 0.997380341592777, iteration: 26137
loss: 1.0179970264434814,grad_norm: 0.9274587334428236, iteration: 26138
loss: 1.020361304283142,grad_norm: 0.9789581194694628, iteration: 26139
loss: 0.9609869718551636,grad_norm: 0.9999996631459134, iteration: 26140
loss: 1.0412445068359375,grad_norm: 0.9999992274388861, iteration: 26141
loss: 1.0327931642532349,grad_norm: 0.9999991599994381, iteration: 26142
loss: 1.0696247816085815,grad_norm: 0.9999989945578429, iteration: 26143
loss: 1.0106878280639648,grad_norm: 0.811006207550767, iteration: 26144
loss: 0.9706564545631409,grad_norm: 0.9795259499746011, iteration: 26145
loss: 0.9864785075187683,grad_norm: 0.8916020160587695, iteration: 26146
loss: 0.9978051781654358,grad_norm: 0.999999084670807, iteration: 26147
loss: 1.0085772275924683,grad_norm: 0.9999991477808786, iteration: 26148
loss: 1.0057013034820557,grad_norm: 0.9999990391135273, iteration: 26149
loss: 0.9862428903579712,grad_norm: 0.9999995918276026, iteration: 26150
loss: 0.9954915642738342,grad_norm: 0.9999991173660319, iteration: 26151
loss: 0.998473048210144,grad_norm: 0.8955104881735841, iteration: 26152
loss: 0.9986602663993835,grad_norm: 0.999999153538535, iteration: 26153
loss: 1.044402003288269,grad_norm: 0.9999994825069594, iteration: 26154
loss: 0.9861408472061157,grad_norm: 0.9999993109613098, iteration: 26155
loss: 1.0333337783813477,grad_norm: 0.999998966133599, iteration: 26156
loss: 1.0149861574172974,grad_norm: 0.9999990664365419, iteration: 26157
loss: 0.9911783337593079,grad_norm: 0.9999990137650633, iteration: 26158
loss: 1.0424872636795044,grad_norm: 0.9999996446208287, iteration: 26159
loss: 1.0497041940689087,grad_norm: 0.999999086925874, iteration: 26160
loss: 0.9666619300842285,grad_norm: 0.8807996222682859, iteration: 26161
loss: 1.0064585208892822,grad_norm: 0.99999924800184, iteration: 26162
loss: 0.9760738611221313,grad_norm: 0.9999989649113749, iteration: 26163
loss: 1.0258769989013672,grad_norm: 0.9999990212696873, iteration: 26164
loss: 1.010024905204773,grad_norm: 0.937962156019548, iteration: 26165
loss: 1.0431687831878662,grad_norm: 0.9999991263583048, iteration: 26166
loss: 0.9929631948471069,grad_norm: 0.9999993827279681, iteration: 26167
loss: 1.0047138929367065,grad_norm: 0.9441459059104423, iteration: 26168
loss: 1.0075887441635132,grad_norm: 0.9504162299645521, iteration: 26169
loss: 0.9935505390167236,grad_norm: 0.914060477002617, iteration: 26170
loss: 0.9772589206695557,grad_norm: 0.9879934891482466, iteration: 26171
loss: 1.0339956283569336,grad_norm: 0.9999991566693214, iteration: 26172
loss: 1.0050048828125,grad_norm: 0.9999990798658804, iteration: 26173
loss: 1.063114881515503,grad_norm: 0.9999995141822446, iteration: 26174
loss: 0.9608569741249084,grad_norm: 0.9999990226343592, iteration: 26175
loss: 1.0082404613494873,grad_norm: 0.9999992293846932, iteration: 26176
loss: 1.0287359952926636,grad_norm: 0.9999991310480143, iteration: 26177
loss: 1.0157108306884766,grad_norm: 0.9202460308812181, iteration: 26178
loss: 0.9937244653701782,grad_norm: 0.9840946269171038, iteration: 26179
loss: 1.0223674774169922,grad_norm: 0.8784160869391903, iteration: 26180
loss: 0.9778366684913635,grad_norm: 0.9336979812020758, iteration: 26181
loss: 0.9787455201148987,grad_norm: 0.9420799077807651, iteration: 26182
loss: 0.9766692519187927,grad_norm: 0.8684441084409878, iteration: 26183
loss: 1.0068821907043457,grad_norm: 0.9125445586776176, iteration: 26184
loss: 1.0088859796524048,grad_norm: 0.8164220520664665, iteration: 26185
loss: 1.017383337020874,grad_norm: 0.9999992726804097, iteration: 26186
loss: 0.9391641616821289,grad_norm: 0.8082079231965452, iteration: 26187
loss: 1.0027835369110107,grad_norm: 0.999998980427018, iteration: 26188
loss: 1.021822452545166,grad_norm: 0.9804063167717285, iteration: 26189
loss: 1.0529754161834717,grad_norm: 0.9999992786210642, iteration: 26190
loss: 1.0111244916915894,grad_norm: 0.9999992668356391, iteration: 26191
loss: 0.9639338850975037,grad_norm: 0.9999990551095544, iteration: 26192
loss: 0.9886733293533325,grad_norm: 0.8061325880258571, iteration: 26193
loss: 1.0434868335723877,grad_norm: 0.9999991374126123, iteration: 26194
loss: 1.0093212127685547,grad_norm: 0.9999990307715568, iteration: 26195
loss: 1.0314927101135254,grad_norm: 0.9999992211358097, iteration: 26196
loss: 1.0290848016738892,grad_norm: 0.999999223867118, iteration: 26197
loss: 1.01706862449646,grad_norm: 0.9999991091510774, iteration: 26198
loss: 1.0075217485427856,grad_norm: 0.9999992375988376, iteration: 26199
loss: 1.0504347085952759,grad_norm: 0.9999991131752449, iteration: 26200
loss: 0.9676264524459839,grad_norm: 0.9033842074262144, iteration: 26201
loss: 0.9947356581687927,grad_norm: 0.9031078092138676, iteration: 26202
loss: 1.0347368717193604,grad_norm: 0.9999991072416918, iteration: 26203
loss: 1.0164486169815063,grad_norm: 0.8464130950771036, iteration: 26204
loss: 1.0474114418029785,grad_norm: 0.904700997735851, iteration: 26205
loss: 0.9928735494613647,grad_norm: 0.9999991621804823, iteration: 26206
loss: 1.0119715929031372,grad_norm: 0.9134600053225997, iteration: 26207
loss: 0.9807394742965698,grad_norm: 0.9999990760582916, iteration: 26208
loss: 1.0133668184280396,grad_norm: 0.9999990916176539, iteration: 26209
loss: 0.9661948680877686,grad_norm: 0.8715332506557207, iteration: 26210
loss: 1.0866129398345947,grad_norm: 0.9999995662768109, iteration: 26211
loss: 1.0051642656326294,grad_norm: 0.8150781123605197, iteration: 26212
loss: 1.0103884935379028,grad_norm: 0.9999992474471814, iteration: 26213
loss: 0.981293261051178,grad_norm: 0.9999991353308199, iteration: 26214
loss: 0.9844034314155579,grad_norm: 0.9979212879849257, iteration: 26215
loss: 1.0446152687072754,grad_norm: 0.9369085692107632, iteration: 26216
loss: 1.0043026208877563,grad_norm: 0.9748464891814745, iteration: 26217
loss: 1.0013796091079712,grad_norm: 0.7183854971856263, iteration: 26218
loss: 1.0124608278274536,grad_norm: 0.9999990826469839, iteration: 26219
loss: 1.0044567584991455,grad_norm: 0.9907359919229715, iteration: 26220
loss: 0.9596313834190369,grad_norm: 0.9999990438688183, iteration: 26221
loss: 0.9906702637672424,grad_norm: 0.9066346372610188, iteration: 26222
loss: 1.0242891311645508,grad_norm: 0.9638932564985409, iteration: 26223
loss: 0.9814432263374329,grad_norm: 0.91672898410629, iteration: 26224
loss: 1.0393950939178467,grad_norm: 0.9999991193336538, iteration: 26225
loss: 1.0548100471496582,grad_norm: 0.9999990969658482, iteration: 26226
loss: 0.9797688126564026,grad_norm: 0.9270127087735883, iteration: 26227
loss: 1.010310411453247,grad_norm: 0.9999992195924726, iteration: 26228
loss: 1.0056493282318115,grad_norm: 0.9711245089572073, iteration: 26229
loss: 0.9765167236328125,grad_norm: 0.9250479587018448, iteration: 26230
loss: 1.0195386409759521,grad_norm: 0.9999998069065134, iteration: 26231
loss: 1.027491569519043,grad_norm: 0.9999991878876145, iteration: 26232
loss: 0.9891130924224854,grad_norm: 0.9999991515589557, iteration: 26233
loss: 1.0264384746551514,grad_norm: 0.8673229449184304, iteration: 26234
loss: 1.0153840780258179,grad_norm: 0.8939272958229939, iteration: 26235
loss: 0.975853443145752,grad_norm: 0.9730146622234672, iteration: 26236
loss: 1.0157405138015747,grad_norm: 0.9999990169939521, iteration: 26237
loss: 1.0338889360427856,grad_norm: 0.9999992422586464, iteration: 26238
loss: 0.9974051117897034,grad_norm: 0.9734364075780476, iteration: 26239
loss: 1.0077697038650513,grad_norm: 0.90408225156654, iteration: 26240
loss: 1.0159977674484253,grad_norm: 0.7672320458904041, iteration: 26241
loss: 1.022502064704895,grad_norm: 0.9546001762751606, iteration: 26242
loss: 1.030176043510437,grad_norm: 0.99999916972798, iteration: 26243
loss: 1.0374021530151367,grad_norm: 0.9999991726833429, iteration: 26244
loss: 0.9972286820411682,grad_norm: 0.991233945271206, iteration: 26245
loss: 1.023331880569458,grad_norm: 0.9633986038791521, iteration: 26246
loss: 1.0176986455917358,grad_norm: 0.9999993058571311, iteration: 26247
loss: 0.9871518611907959,grad_norm: 0.9917645199825471, iteration: 26248
loss: 1.0059133768081665,grad_norm: 0.9999989896644828, iteration: 26249
loss: 1.0216102600097656,grad_norm: 0.9999993536148017, iteration: 26250
loss: 1.0165852308273315,grad_norm: 0.9999989815098961, iteration: 26251
loss: 0.982261598110199,grad_norm: 0.8147375057151098, iteration: 26252
loss: 1.00282621383667,grad_norm: 0.9810391302024731, iteration: 26253
loss: 1.0115916728973389,grad_norm: 0.9999989573107411, iteration: 26254
loss: 0.959101676940918,grad_norm: 0.9999990922136243, iteration: 26255
loss: 1.0077546834945679,grad_norm: 0.975547116984732, iteration: 26256
loss: 0.9938159584999084,grad_norm: 0.9907560186969956, iteration: 26257
loss: 1.0114142894744873,grad_norm: 0.9587615705759069, iteration: 26258
loss: 1.0155895948410034,grad_norm: 0.9999991138775948, iteration: 26259
loss: 1.0195789337158203,grad_norm: 0.9008960505176027, iteration: 26260
loss: 1.0383294820785522,grad_norm: 0.8847439570045488, iteration: 26261
loss: 1.0223637819290161,grad_norm: 0.9999992777918003, iteration: 26262
loss: 1.0311542749404907,grad_norm: 0.9999990505685076, iteration: 26263
loss: 0.9963659048080444,grad_norm: 0.9999992131905242, iteration: 26264
loss: 1.0413475036621094,grad_norm: 0.9999990127016869, iteration: 26265
loss: 0.9865430593490601,grad_norm: 0.7504555394839754, iteration: 26266
loss: 1.0196638107299805,grad_norm: 0.9999991161817391, iteration: 26267
loss: 1.0142627954483032,grad_norm: 0.8749878220434837, iteration: 26268
loss: 1.0178412199020386,grad_norm: 0.9999991749932418, iteration: 26269
loss: 0.9886616468429565,grad_norm: 0.8560950621863629, iteration: 26270
loss: 0.9710569381713867,grad_norm: 0.9999990213119583, iteration: 26271
loss: 1.0072404146194458,grad_norm: 0.8595914280148965, iteration: 26272
loss: 1.0023316144943237,grad_norm: 0.9999992004070928, iteration: 26273
loss: 1.0317659378051758,grad_norm: 0.9846407299138825, iteration: 26274
loss: 0.9864143133163452,grad_norm: 0.9999990245119891, iteration: 26275
loss: 1.0136936902999878,grad_norm: 0.9999992053645473, iteration: 26276
loss: 0.9794421195983887,grad_norm: 0.9999992331281204, iteration: 26277
loss: 0.9800927639007568,grad_norm: 0.9017002788820762, iteration: 26278
loss: 0.95560222864151,grad_norm: 0.9999990783370071, iteration: 26279
loss: 1.0143942832946777,grad_norm: 0.8645221379685244, iteration: 26280
loss: 1.0331286191940308,grad_norm: 0.8773410831441087, iteration: 26281
loss: 0.9822937250137329,grad_norm: 0.9999990973705398, iteration: 26282
loss: 1.0318522453308105,grad_norm: 0.9009073712213744, iteration: 26283
loss: 0.9586578011512756,grad_norm: 0.9117662757555487, iteration: 26284
loss: 1.0081990957260132,grad_norm: 0.993744830978929, iteration: 26285
loss: 1.0013569593429565,grad_norm: 0.9999990700176405, iteration: 26286
loss: 1.0051578283309937,grad_norm: 0.9700824322898741, iteration: 26287
loss: 1.0196038484573364,grad_norm: 0.9999989319433745, iteration: 26288
loss: 0.9760646224021912,grad_norm: 0.8909619544913168, iteration: 26289
loss: 1.0434608459472656,grad_norm: 0.9638160214852803, iteration: 26290
loss: 1.0233234167099,grad_norm: 0.999999228278647, iteration: 26291
loss: 0.9801920056343079,grad_norm: 0.9798137368221024, iteration: 26292
loss: 1.0233319997787476,grad_norm: 0.9983397387538308, iteration: 26293
loss: 1.005725383758545,grad_norm: 0.9999992632332628, iteration: 26294
loss: 1.0029312372207642,grad_norm: 0.8857917007032046, iteration: 26295
loss: 0.9869292378425598,grad_norm: 0.9459960783717627, iteration: 26296
loss: 1.030265212059021,grad_norm: 0.9040277331407008, iteration: 26297
loss: 1.0191882848739624,grad_norm: 0.8644206702935111, iteration: 26298
loss: 1.0277680158615112,grad_norm: 0.8503580151026645, iteration: 26299
loss: 1.0142505168914795,grad_norm: 0.9536300221446344, iteration: 26300
loss: 1.0300811529159546,grad_norm: 0.9772355627550192, iteration: 26301
loss: 0.9908407926559448,grad_norm: 0.9582398946369935, iteration: 26302
loss: 1.0027261972427368,grad_norm: 0.9018076932785785, iteration: 26303
loss: 0.9898768067359924,grad_norm: 0.9632344202892993, iteration: 26304
loss: 1.016745686531067,grad_norm: 0.9999990962010934, iteration: 26305
loss: 1.0250115394592285,grad_norm: 0.9999991677273321, iteration: 26306
loss: 1.0292000770568848,grad_norm: 0.9033360318033008, iteration: 26307
loss: 1.0163038969039917,grad_norm: 0.9999992194097125, iteration: 26308
loss: 0.9567075371742249,grad_norm: 0.9999990696621426, iteration: 26309
loss: 0.9807026982307434,grad_norm: 0.9772373685979736, iteration: 26310
loss: 1.03843092918396,grad_norm: 0.999999050834136, iteration: 26311
loss: 1.0184743404388428,grad_norm: 0.942673664068368, iteration: 26312
loss: 1.0207635164260864,grad_norm: 0.9053649228141378, iteration: 26313
loss: 1.0315489768981934,grad_norm: 0.9463506386510232, iteration: 26314
loss: 1.028809666633606,grad_norm: 0.990433507057865, iteration: 26315
loss: 0.9948789477348328,grad_norm: 0.9999990729901598, iteration: 26316
loss: 1.0143626928329468,grad_norm: 0.9999989896516428, iteration: 26317
loss: 1.0295004844665527,grad_norm: 0.9629235564494896, iteration: 26318
loss: 1.0163148641586304,grad_norm: 0.9581484203165919, iteration: 26319
loss: 0.9638930559158325,grad_norm: 0.9999991425124252, iteration: 26320
loss: 0.9986275434494019,grad_norm: 0.8705573419548425, iteration: 26321
loss: 0.9871774911880493,grad_norm: 0.8930923296349086, iteration: 26322
loss: 1.0526100397109985,grad_norm: 0.8905989214715933, iteration: 26323
loss: 1.0391992330551147,grad_norm: 0.9999989376208689, iteration: 26324
loss: 1.0317037105560303,grad_norm: 0.9999991390175209, iteration: 26325
loss: 0.9951075315475464,grad_norm: 0.9999990141428811, iteration: 26326
loss: 1.0057493448257446,grad_norm: 0.9492013194350335, iteration: 26327
loss: 1.016591191291809,grad_norm: 0.9999991218709376, iteration: 26328
loss: 0.971620500087738,grad_norm: 0.9999989960135429, iteration: 26329
loss: 0.9789162278175354,grad_norm: 0.9999992832457906, iteration: 26330
loss: 0.9812600016593933,grad_norm: 0.9752949311830131, iteration: 26331
loss: 1.0518832206726074,grad_norm: 0.9999990074079895, iteration: 26332
loss: 0.9805805683135986,grad_norm: 0.9999990532238334, iteration: 26333
loss: 1.0165519714355469,grad_norm: 0.8982728610210504, iteration: 26334
loss: 1.0391123294830322,grad_norm: 0.9999991836577176, iteration: 26335
loss: 1.0146427154541016,grad_norm: 0.999999096074618, iteration: 26336
loss: 1.0505834817886353,grad_norm: 0.9999993359647599, iteration: 26337
loss: 1.0110142230987549,grad_norm: 0.9999992660457498, iteration: 26338
loss: 1.0174297094345093,grad_norm: 0.922083452297402, iteration: 26339
loss: 1.0067024230957031,grad_norm: 0.9999991535913026, iteration: 26340
loss: 1.0163344144821167,grad_norm: 0.9872764998593703, iteration: 26341
loss: 1.028568983078003,grad_norm: 0.8670194173260868, iteration: 26342
loss: 1.0147149562835693,grad_norm: 0.9999992608399438, iteration: 26343
loss: 0.9699633717536926,grad_norm: 0.9229240239255146, iteration: 26344
loss: 0.9931606650352478,grad_norm: 0.9999990062660288, iteration: 26345
loss: 1.036039113998413,grad_norm: 0.9999988896610408, iteration: 26346
loss: 1.004286766052246,grad_norm: 0.9999991595770825, iteration: 26347
loss: 0.998908281326294,grad_norm: 0.9999990969603514, iteration: 26348
loss: 1.0706993341445923,grad_norm: 0.9999991357825634, iteration: 26349
loss: 1.0518296957015991,grad_norm: 0.9999990964002914, iteration: 26350
loss: 1.0169241428375244,grad_norm: 0.9999991955234198, iteration: 26351
loss: 0.9665142893791199,grad_norm: 0.9999990975002137, iteration: 26352
loss: 1.0142788887023926,grad_norm: 0.9999989436085994, iteration: 26353
loss: 0.9898316264152527,grad_norm: 0.8081458593086509, iteration: 26354
loss: 0.9808219075202942,grad_norm: 0.999999181894014, iteration: 26355
loss: 1.0110070705413818,grad_norm: 0.9911659860561506, iteration: 26356
loss: 1.069495677947998,grad_norm: 0.9999994377287564, iteration: 26357
loss: 1.018896460533142,grad_norm: 0.8804128960483195, iteration: 26358
loss: 0.9901200532913208,grad_norm: 0.9999991837660823, iteration: 26359
loss: 1.0560933351516724,grad_norm: 0.9999992696641377, iteration: 26360
loss: 0.9995197653770447,grad_norm: 0.9270205400524184, iteration: 26361
loss: 1.0064539909362793,grad_norm: 0.9352858394453342, iteration: 26362
loss: 1.0049512386322021,grad_norm: 0.8558430851584523, iteration: 26363
loss: 1.0098766088485718,grad_norm: 0.9867162220091218, iteration: 26364
loss: 1.0044009685516357,grad_norm: 0.9999991848448692, iteration: 26365
loss: 1.01223886013031,grad_norm: 0.979462304298931, iteration: 26366
loss: 1.016752004623413,grad_norm: 0.9351101582786984, iteration: 26367
loss: 1.0258615016937256,grad_norm: 0.9999990466551056, iteration: 26368
loss: 1.0030611753463745,grad_norm: 0.9999990137609077, iteration: 26369
loss: 0.9978696703910828,grad_norm: 0.9079833863346797, iteration: 26370
loss: 0.9765182137489319,grad_norm: 0.999999131260229, iteration: 26371
loss: 0.9990447163581848,grad_norm: 0.8976733932651343, iteration: 26372
loss: 1.0108730792999268,grad_norm: 0.9755664183930078, iteration: 26373
loss: 1.0355161428451538,grad_norm: 0.9999996051313705, iteration: 26374
loss: 0.9859005212783813,grad_norm: 0.9999990813984295, iteration: 26375
loss: 1.0235047340393066,grad_norm: 0.9999991484088386, iteration: 26376
loss: 0.9811597466468811,grad_norm: 0.8889505154885586, iteration: 26377
loss: 1.0248678922653198,grad_norm: 0.8721384825091565, iteration: 26378
loss: 1.0181266069412231,grad_norm: 0.999999282621286, iteration: 26379
loss: 0.9713848233222961,grad_norm: 0.9999990301749612, iteration: 26380
loss: 1.0070006847381592,grad_norm: 0.7837033056572721, iteration: 26381
loss: 1.0281968116760254,grad_norm: 0.999999155960205, iteration: 26382
loss: 1.023583173751831,grad_norm: 0.9999991182010564, iteration: 26383
loss: 1.0302314758300781,grad_norm: 0.9999998759851536, iteration: 26384
loss: 0.9589159488677979,grad_norm: 0.999999046928935, iteration: 26385
loss: 1.0453850030899048,grad_norm: 0.9999990263175087, iteration: 26386
loss: 0.9743028879165649,grad_norm: 0.9902353166120055, iteration: 26387
loss: 0.9623339772224426,grad_norm: 0.814725535897729, iteration: 26388
loss: 1.0249955654144287,grad_norm: 0.9768109049503884, iteration: 26389
loss: 0.9359865188598633,grad_norm: 0.9865034626824719, iteration: 26390
loss: 1.0179628133773804,grad_norm: 0.9999992959723826, iteration: 26391
loss: 0.9993301033973694,grad_norm: 0.972589625365576, iteration: 26392
loss: 0.993485152721405,grad_norm: 0.9999991600198898, iteration: 26393
loss: 1.0211694240570068,grad_norm: 0.9999991084829414, iteration: 26394
loss: 0.976351797580719,grad_norm: 0.8975467771110642, iteration: 26395
loss: 0.9737446904182434,grad_norm: 0.8275635757598057, iteration: 26396
loss: 1.0188632011413574,grad_norm: 0.9999989337330336, iteration: 26397
loss: 0.9965457916259766,grad_norm: 0.9268871756893644, iteration: 26398
loss: 0.9851558208465576,grad_norm: 0.9999991013115886, iteration: 26399
loss: 1.0076996088027954,grad_norm: 0.9999992172336951, iteration: 26400
loss: 0.9691252112388611,grad_norm: 0.9999991566854317, iteration: 26401
loss: 0.9848442077636719,grad_norm: 0.9782577180980264, iteration: 26402
loss: 0.9852866530418396,grad_norm: 0.9999990597600075, iteration: 26403
loss: 0.9843116998672485,grad_norm: 0.8732994206922771, iteration: 26404
loss: 1.016545295715332,grad_norm: 0.9547728955261794, iteration: 26405
loss: 1.0108542442321777,grad_norm: 0.9111677053019578, iteration: 26406
loss: 0.9791692495346069,grad_norm: 0.999999181177767, iteration: 26407
loss: 1.0144855976104736,grad_norm: 0.9999990839129487, iteration: 26408
loss: 1.0458954572677612,grad_norm: 0.9999993997093429, iteration: 26409
loss: 1.0054408311843872,grad_norm: 0.9999990813466436, iteration: 26410
loss: 0.9913323521614075,grad_norm: 0.9315921178638632, iteration: 26411
loss: 0.9941422343254089,grad_norm: 0.9999991678489695, iteration: 26412
loss: 1.0427414178848267,grad_norm: 0.999999135500389, iteration: 26413
loss: 0.9837101697921753,grad_norm: 0.9939883232926277, iteration: 26414
loss: 0.9513708353042603,grad_norm: 0.9999990660431001, iteration: 26415
loss: 1.0673160552978516,grad_norm: 0.9999990691934987, iteration: 26416
loss: 0.9952055811882019,grad_norm: 0.8730967411737924, iteration: 26417
loss: 1.031043529510498,grad_norm: 0.9007478460626825, iteration: 26418
loss: 1.0217156410217285,grad_norm: 0.9999991391523285, iteration: 26419
loss: 0.966090738773346,grad_norm: 0.9999991859723693, iteration: 26420
loss: 0.9721841812133789,grad_norm: 0.9999991150957531, iteration: 26421
loss: 1.0042405128479004,grad_norm: 0.9999992110406508, iteration: 26422
loss: 1.0061099529266357,grad_norm: 0.9999991496829, iteration: 26423
loss: 1.0162585973739624,grad_norm: 0.9448187812247013, iteration: 26424
loss: 1.0101567506790161,grad_norm: 0.8870103819519236, iteration: 26425
loss: 1.0168031454086304,grad_norm: 0.9999991528851802, iteration: 26426
loss: 1.049248218536377,grad_norm: 0.9999991391247611, iteration: 26427
loss: 0.9577456712722778,grad_norm: 0.9966434932265015, iteration: 26428
loss: 0.9730542302131653,grad_norm: 0.9999991747878234, iteration: 26429
loss: 0.9930757880210876,grad_norm: 0.9954812427536796, iteration: 26430
loss: 1.0216113328933716,grad_norm: 0.999999173407099, iteration: 26431
loss: 1.0371426343917847,grad_norm: 0.8741143343538611, iteration: 26432
loss: 1.0079374313354492,grad_norm: 0.9999990625171757, iteration: 26433
loss: 0.9977566599845886,grad_norm: 0.7977953095057988, iteration: 26434
loss: 0.9904221892356873,grad_norm: 0.9999990659882654, iteration: 26435
loss: 0.9972721934318542,grad_norm: 0.8760822437596748, iteration: 26436
loss: 1.0207422971725464,grad_norm: 0.9999990610652136, iteration: 26437
loss: 1.0237596035003662,grad_norm: 0.9999991163773334, iteration: 26438
loss: 1.0294904708862305,grad_norm: 0.8993397064557598, iteration: 26439
loss: 0.9577198624610901,grad_norm: 0.9735391700878537, iteration: 26440
loss: 0.99240642786026,grad_norm: 0.9515859663155702, iteration: 26441
loss: 1.0144885778427124,grad_norm: 0.9999990789364008, iteration: 26442
loss: 0.9737486243247986,grad_norm: 0.999999091876918, iteration: 26443
loss: 1.0323563814163208,grad_norm: 0.9014076893581517, iteration: 26444
loss: 0.9758704304695129,grad_norm: 0.9770039353770673, iteration: 26445
loss: 1.0062013864517212,grad_norm: 0.9902521479050346, iteration: 26446
loss: 1.0136158466339111,grad_norm: 0.9999991598134443, iteration: 26447
loss: 1.0033044815063477,grad_norm: 0.9056545187507128, iteration: 26448
loss: 1.0191541910171509,grad_norm: 0.8616483148262545, iteration: 26449
loss: 1.0277912616729736,grad_norm: 0.9999991723871494, iteration: 26450
loss: 0.9599950313568115,grad_norm: 0.9999990447403928, iteration: 26451
loss: 1.0027129650115967,grad_norm: 0.8356492962652287, iteration: 26452
loss: 1.005031943321228,grad_norm: 0.8355748441110004, iteration: 26453
loss: 0.9343504309654236,grad_norm: 0.8981735439667243, iteration: 26454
loss: 1.0358949899673462,grad_norm: 0.9999991990144925, iteration: 26455
loss: 1.0438668727874756,grad_norm: 0.9999989784216781, iteration: 26456
loss: 0.9897938370704651,grad_norm: 0.9685990661096414, iteration: 26457
loss: 1.0524579286575317,grad_norm: 0.9305404574306294, iteration: 26458
loss: 0.9952743053436279,grad_norm: 0.9999991573070998, iteration: 26459
loss: 0.9892492890357971,grad_norm: 0.9476098981812034, iteration: 26460
loss: 0.9870965480804443,grad_norm: 0.9535912806159555, iteration: 26461
loss: 0.9722743630409241,grad_norm: 0.8912159111644173, iteration: 26462
loss: 1.0018736124038696,grad_norm: 0.9064150289523484, iteration: 26463
loss: 1.0127170085906982,grad_norm: 0.8593764578248734, iteration: 26464
loss: 1.032410979270935,grad_norm: 0.9999997856190178, iteration: 26465
loss: 1.0331615209579468,grad_norm: 0.9999990861507223, iteration: 26466
loss: 1.0029408931732178,grad_norm: 0.9999991949006724, iteration: 26467
loss: 0.9937798380851746,grad_norm: 0.8279508616776218, iteration: 26468
loss: 0.9891456365585327,grad_norm: 0.9436534859440776, iteration: 26469
loss: 1.0052909851074219,grad_norm: 0.9642598437705342, iteration: 26470
loss: 1.0348646640777588,grad_norm: 0.8207951777187568, iteration: 26471
loss: 0.988635778427124,grad_norm: 0.9232805498307385, iteration: 26472
loss: 0.9764559268951416,grad_norm: 0.9661225047479021, iteration: 26473
loss: 0.9679063558578491,grad_norm: 0.9999995173306465, iteration: 26474
loss: 1.004326581954956,grad_norm: 0.9019402918847276, iteration: 26475
loss: 0.9996472001075745,grad_norm: 0.8985165743285827, iteration: 26476
loss: 1.0002182722091675,grad_norm: 0.9999990942025734, iteration: 26477
loss: 1.0417985916137695,grad_norm: 0.8754240634187758, iteration: 26478
loss: 0.9735517501831055,grad_norm: 0.9448114914473379, iteration: 26479
loss: 1.0043190717697144,grad_norm: 0.882329308961562, iteration: 26480
loss: 0.9734480977058411,grad_norm: 0.9999991387112035, iteration: 26481
loss: 1.0050758123397827,grad_norm: 0.9999989621040436, iteration: 26482
loss: 0.9841181635856628,grad_norm: 0.9999992134191463, iteration: 26483
loss: 1.0187934637069702,grad_norm: 0.9578509372918768, iteration: 26484
loss: 1.0192763805389404,grad_norm: 0.9004153428066198, iteration: 26485
loss: 0.9771891832351685,grad_norm: 0.9212734666066446, iteration: 26486
loss: 1.045011043548584,grad_norm: 0.9999990652918018, iteration: 26487
loss: 0.9702293872833252,grad_norm: 0.9861595741675425, iteration: 26488
loss: 1.043984055519104,grad_norm: 0.9999992204664578, iteration: 26489
loss: 1.0234991312026978,grad_norm: 0.8355230449156142, iteration: 26490
loss: 0.9844534993171692,grad_norm: 0.9999989805448922, iteration: 26491
loss: 0.986885130405426,grad_norm: 0.9304476612245851, iteration: 26492
loss: 1.0036756992340088,grad_norm: 0.8819512133294171, iteration: 26493
loss: 0.9773884415626526,grad_norm: 0.9999991475758757, iteration: 26494
loss: 1.019734501838684,grad_norm: 0.9999991527810881, iteration: 26495
loss: 1.0108658075332642,grad_norm: 0.8432819371308125, iteration: 26496
loss: 0.9819082617759705,grad_norm: 0.999999323205177, iteration: 26497
loss: 1.0215595960617065,grad_norm: 0.8733089459710495, iteration: 26498
loss: 0.990930438041687,grad_norm: 0.9999991527679535, iteration: 26499
loss: 1.0126996040344238,grad_norm: 0.997343924356828, iteration: 26500
loss: 0.9889647960662842,grad_norm: 0.9999990851502699, iteration: 26501
loss: 1.0823354721069336,grad_norm: 0.9999990684230197, iteration: 26502
loss: 0.9621177315711975,grad_norm: 0.9999991601250634, iteration: 26503
loss: 0.9869070649147034,grad_norm: 0.9755114627034168, iteration: 26504
loss: 1.0446395874023438,grad_norm: 0.8287343400656736, iteration: 26505
loss: 1.034532070159912,grad_norm: 0.9999996521969029, iteration: 26506
loss: 1.0316561460494995,grad_norm: 0.9720356418051768, iteration: 26507
loss: 0.9909065961837769,grad_norm: 0.9253818497158043, iteration: 26508
loss: 0.9525255560874939,grad_norm: 0.9999990990669617, iteration: 26509
loss: 0.995032548904419,grad_norm: 0.9999992111349294, iteration: 26510
loss: 1.0069810152053833,grad_norm: 0.8719352983353788, iteration: 26511
loss: 1.002568006515503,grad_norm: 0.8868335807705572, iteration: 26512
loss: 1.0010664463043213,grad_norm: 0.9270780814280162, iteration: 26513
loss: 0.9925710558891296,grad_norm: 0.8399106535706321, iteration: 26514
loss: 0.9783880710601807,grad_norm: 0.9971247809616223, iteration: 26515
loss: 1.0375360250473022,grad_norm: 0.9999993320101139, iteration: 26516
loss: 0.9797966480255127,grad_norm: 0.9999993284458948, iteration: 26517
loss: 1.0153813362121582,grad_norm: 0.9999995672961368, iteration: 26518
loss: 1.0079096555709839,grad_norm: 0.8900334534304091, iteration: 26519
loss: 1.0194183588027954,grad_norm: 0.9395255292365529, iteration: 26520
loss: 1.0660077333450317,grad_norm: 0.9999991302506022, iteration: 26521
loss: 1.0362194776535034,grad_norm: 0.9999990599347547, iteration: 26522
loss: 0.9734804630279541,grad_norm: 0.9999990556982695, iteration: 26523
loss: 1.0118002891540527,grad_norm: 0.9999991831605689, iteration: 26524
loss: 1.0299938917160034,grad_norm: 0.8528234892832718, iteration: 26525
loss: 0.9872137308120728,grad_norm: 0.9999990962832085, iteration: 26526
loss: 1.0213512182235718,grad_norm: 0.9999990980905221, iteration: 26527
loss: 1.0261305570602417,grad_norm: 0.9449270501993069, iteration: 26528
loss: 1.0010417699813843,grad_norm: 0.9999991005250543, iteration: 26529
loss: 1.043614149093628,grad_norm: 0.9999992275665885, iteration: 26530
loss: 1.028605580329895,grad_norm: 0.9560375032303832, iteration: 26531
loss: 1.059319257736206,grad_norm: 0.9999993271924357, iteration: 26532
loss: 1.0143492221832275,grad_norm: 0.9492103040177463, iteration: 26533
loss: 1.0298365354537964,grad_norm: 0.9981799666996584, iteration: 26534
loss: 0.9856922030448914,grad_norm: 0.9999990274613996, iteration: 26535
loss: 1.0169291496276855,grad_norm: 0.9999990792407641, iteration: 26536
loss: 1.0260396003723145,grad_norm: 0.9999990987611063, iteration: 26537
loss: 1.0503332614898682,grad_norm: 0.999999178555112, iteration: 26538
loss: 1.0228962898254395,grad_norm: 0.7879407472165914, iteration: 26539
loss: 0.9718347787857056,grad_norm: 0.9999990934799473, iteration: 26540
loss: 0.940932035446167,grad_norm: 0.9559103348617942, iteration: 26541
loss: 1.0108593702316284,grad_norm: 0.9908261716626333, iteration: 26542
loss: 0.9523579478263855,grad_norm: 0.9243435332174937, iteration: 26543
loss: 1.020857572555542,grad_norm: 0.9242132409714968, iteration: 26544
loss: 1.0225017070770264,grad_norm: 0.9042030563947003, iteration: 26545
loss: 0.997392475605011,grad_norm: 0.9999992721080366, iteration: 26546
loss: 0.9994626641273499,grad_norm: 0.9999990165409183, iteration: 26547
loss: 1.0283609628677368,grad_norm: 0.9477128775366636, iteration: 26548
loss: 1.0102052688598633,grad_norm: 0.969129410177389, iteration: 26549
loss: 1.0334898233413696,grad_norm: 0.9999990103076422, iteration: 26550
loss: 1.0179688930511475,grad_norm: 0.8939711058771068, iteration: 26551
loss: 0.9988998770713806,grad_norm: 0.9365603874060126, iteration: 26552
loss: 0.999302864074707,grad_norm: 0.9999991083789667, iteration: 26553
loss: 1.0097196102142334,grad_norm: 0.9089547092980103, iteration: 26554
loss: 1.027274250984192,grad_norm: 0.9081437041300475, iteration: 26555
loss: 1.0061869621276855,grad_norm: 0.9999991150830363, iteration: 26556
loss: 1.0429730415344238,grad_norm: 0.8259573032806916, iteration: 26557
loss: 1.0482465028762817,grad_norm: 0.9810662161397993, iteration: 26558
loss: 0.9897340536117554,grad_norm: 0.9999990627507123, iteration: 26559
loss: 0.9752682447433472,grad_norm: 0.9500594217391114, iteration: 26560
loss: 1.0160905122756958,grad_norm: 0.9999992245974603, iteration: 26561
loss: 0.9965534210205078,grad_norm: 0.8020014400401121, iteration: 26562
loss: 0.9273673295974731,grad_norm: 0.9708184460871392, iteration: 26563
loss: 1.016863226890564,grad_norm: 0.9999991873779065, iteration: 26564
loss: 1.0175458192825317,grad_norm: 0.9999991303537368, iteration: 26565
loss: 0.9825505018234253,grad_norm: 0.9499858603282582, iteration: 26566
loss: 0.9997961521148682,grad_norm: 0.9999991015982898, iteration: 26567
loss: 1.028928279876709,grad_norm: 0.9999991101360095, iteration: 26568
loss: 0.984043300151825,grad_norm: 0.9999990956846551, iteration: 26569
loss: 0.9751953482627869,grad_norm: 0.9092663045188866, iteration: 26570
loss: 1.0103379487991333,grad_norm: 0.9798862618244226, iteration: 26571
loss: 1.039042592048645,grad_norm: 0.9381052554429716, iteration: 26572
loss: 1.0289266109466553,grad_norm: 0.8867837354898965, iteration: 26573
loss: 0.9925848245620728,grad_norm: 0.9999990598290064, iteration: 26574
loss: 1.0167113542556763,grad_norm: 0.9500392696080724, iteration: 26575
loss: 0.9918927550315857,grad_norm: 0.9020832904531783, iteration: 26576
loss: 0.9983761310577393,grad_norm: 0.9778515354112866, iteration: 26577
loss: 1.0037097930908203,grad_norm: 0.9327438494356193, iteration: 26578
loss: 1.0126043558120728,grad_norm: 0.8078996077523769, iteration: 26579
loss: 1.0544472932815552,grad_norm: 0.9999992205634214, iteration: 26580
loss: 1.0408116579055786,grad_norm: 0.9999991403509768, iteration: 26581
loss: 1.018832802772522,grad_norm: 0.9999990416806515, iteration: 26582
loss: 1.027787685394287,grad_norm: 0.9999993687187232, iteration: 26583
loss: 1.0336781740188599,grad_norm: 0.9999992350710104, iteration: 26584
loss: 1.0185341835021973,grad_norm: 0.8840103859992909, iteration: 26585
loss: 0.9936427474021912,grad_norm: 0.925373241610711, iteration: 26586
loss: 1.0057834386825562,grad_norm: 0.8501739918305447, iteration: 26587
loss: 0.9771294593811035,grad_norm: 0.9999991061934296, iteration: 26588
loss: 0.9993906617164612,grad_norm: 0.9999989798445241, iteration: 26589
loss: 1.0560606718063354,grad_norm: 0.9173954107682706, iteration: 26590
loss: 1.0029391050338745,grad_norm: 0.9709962542661454, iteration: 26591
loss: 1.0230756998062134,grad_norm: 0.9384166019727501, iteration: 26592
loss: 1.010705828666687,grad_norm: 0.999998988790918, iteration: 26593
loss: 1.003703236579895,grad_norm: 0.9999989813411687, iteration: 26594
loss: 1.011157512664795,grad_norm: 0.9999991761548952, iteration: 26595
loss: 0.973259449005127,grad_norm: 0.8384905349028547, iteration: 26596
loss: 1.0059237480163574,grad_norm: 0.8767698659165664, iteration: 26597
loss: 1.0207676887512207,grad_norm: 0.9999995382401761, iteration: 26598
loss: 0.9945817589759827,grad_norm: 0.9999991762572357, iteration: 26599
loss: 1.0073286294937134,grad_norm: 0.9640470649892128, iteration: 26600
loss: 0.9902064800262451,grad_norm: 0.9999992932376637, iteration: 26601
loss: 1.0010218620300293,grad_norm: 0.868590147229489, iteration: 26602
loss: 1.0145293474197388,grad_norm: 0.965653753612801, iteration: 26603
loss: 1.0366129875183105,grad_norm: 0.9999989579311636, iteration: 26604
loss: 0.9989569187164307,grad_norm: 0.887725949015074, iteration: 26605
loss: 1.0597221851348877,grad_norm: 0.9972903511671513, iteration: 26606
loss: 1.0003420114517212,grad_norm: 0.9356250614640269, iteration: 26607
loss: 1.007999300956726,grad_norm: 0.9438680325842451, iteration: 26608
loss: 1.0315485000610352,grad_norm: 0.9521441688198783, iteration: 26609
loss: 0.9885304570198059,grad_norm: 0.8728824829515067, iteration: 26610
loss: 1.033841609954834,grad_norm: 0.9140770011930296, iteration: 26611
loss: 1.02142333984375,grad_norm: 0.9881512600812375, iteration: 26612
loss: 1.0426150560379028,grad_norm: 0.9999992995211223, iteration: 26613
loss: 1.043683648109436,grad_norm: 0.9999990487217979, iteration: 26614
loss: 1.0200905799865723,grad_norm: 0.9999990959559004, iteration: 26615
loss: 1.0368084907531738,grad_norm: 0.9999990329446957, iteration: 26616
loss: 1.0318706035614014,grad_norm: 0.97127730957788, iteration: 26617
loss: 1.0049010515213013,grad_norm: 0.9289335430930445, iteration: 26618
loss: 0.988703191280365,grad_norm: 0.9168440978960266, iteration: 26619
loss: 1.0378494262695312,grad_norm: 0.8605811276854636, iteration: 26620
loss: 1.0633995532989502,grad_norm: 0.9999991080550893, iteration: 26621
loss: 0.9770597815513611,grad_norm: 0.8107391012755066, iteration: 26622
loss: 0.9705725908279419,grad_norm: 0.8944688260303154, iteration: 26623
loss: 1.0163437128067017,grad_norm: 0.9807384256510173, iteration: 26624
loss: 1.0335760116577148,grad_norm: 0.999999004599054, iteration: 26625
loss: 1.0311545133590698,grad_norm: 0.9999992419452258, iteration: 26626
loss: 1.0245863199234009,grad_norm: 0.9572909078173739, iteration: 26627
loss: 1.0254992246627808,grad_norm: 0.9999991746259724, iteration: 26628
loss: 0.9852423667907715,grad_norm: 0.9999990884453929, iteration: 26629
loss: 1.0223101377487183,grad_norm: 0.8658510269938572, iteration: 26630
loss: 1.0104436874389648,grad_norm: 0.986999924939685, iteration: 26631
loss: 0.9898738861083984,grad_norm: 0.8727043857936045, iteration: 26632
loss: 1.0229685306549072,grad_norm: 0.9751612892046826, iteration: 26633
loss: 1.0073857307434082,grad_norm: 0.9999991461244392, iteration: 26634
loss: 1.0350799560546875,grad_norm: 0.9999990727128207, iteration: 26635
loss: 0.9463312029838562,grad_norm: 0.9999992692378998, iteration: 26636
loss: 1.0377168655395508,grad_norm: 0.8973222674153547, iteration: 26637
loss: 0.9697769284248352,grad_norm: 0.8661306794784213, iteration: 26638
loss: 1.017472505569458,grad_norm: 0.9269002679892095, iteration: 26639
loss: 0.9947198033332825,grad_norm: 0.9732754585117139, iteration: 26640
loss: 0.9948130249977112,grad_norm: 0.9516262981643369, iteration: 26641
loss: 1.0083582401275635,grad_norm: 0.9999992478701408, iteration: 26642
loss: 0.9744574427604675,grad_norm: 0.9999990313712731, iteration: 26643
loss: 1.0109190940856934,grad_norm: 0.9930396434201169, iteration: 26644
loss: 0.989729642868042,grad_norm: 0.9999991125231182, iteration: 26645
loss: 1.0086543560028076,grad_norm: 0.9999990797900531, iteration: 26646
loss: 1.1304816007614136,grad_norm: 0.9999993057099695, iteration: 26647
loss: 0.9929392337799072,grad_norm: 0.9999990538500585, iteration: 26648
loss: 1.0331379175186157,grad_norm: 0.8272823360288446, iteration: 26649
loss: 1.017006516456604,grad_norm: 0.9979132130768965, iteration: 26650
loss: 1.0102993249893188,grad_norm: 0.879560439771506, iteration: 26651
loss: 0.9943129420280457,grad_norm: 0.896127233996326, iteration: 26652
loss: 1.0163928270339966,grad_norm: 0.9818342616616637, iteration: 26653
loss: 1.0283746719360352,grad_norm: 0.9992046984431265, iteration: 26654
loss: 1.0356348752975464,grad_norm: 0.9280328781630419, iteration: 26655
loss: 0.9959944486618042,grad_norm: 0.9430477816251726, iteration: 26656
loss: 1.0023950338363647,grad_norm: 0.9999990399381674, iteration: 26657
loss: 0.9951456189155579,grad_norm: 0.8590605255647408, iteration: 26658
loss: 1.0131829977035522,grad_norm: 0.9433317098802798, iteration: 26659
loss: 0.9565792083740234,grad_norm: 0.9258018013213242, iteration: 26660
loss: 1.0181756019592285,grad_norm: 0.9999990354749391, iteration: 26661
loss: 1.020866870880127,grad_norm: 0.9999990040069575, iteration: 26662
loss: 1.0223181247711182,grad_norm: 0.9999991189961149, iteration: 26663
loss: 1.0402112007141113,grad_norm: 0.9216417222593422, iteration: 26664
loss: 1.0071609020233154,grad_norm: 0.9249599096121076, iteration: 26665
loss: 1.0616146326065063,grad_norm: 0.9698636109725377, iteration: 26666
loss: 0.9926661849021912,grad_norm: 0.8616762380372031, iteration: 26667
loss: 1.0156779289245605,grad_norm: 0.9999990720865042, iteration: 26668
loss: 1.018079161643982,grad_norm: 0.999999083420272, iteration: 26669
loss: 1.022047758102417,grad_norm: 0.9241417920967366, iteration: 26670
loss: 0.9893420934677124,grad_norm: 0.9999990941421436, iteration: 26671
loss: 1.0196164846420288,grad_norm: 0.9424021696771148, iteration: 26672
loss: 1.022433876991272,grad_norm: 0.9999990327258932, iteration: 26673
loss: 1.013063669204712,grad_norm: 0.9999991585591406, iteration: 26674
loss: 0.9951897859573364,grad_norm: 0.9999991266015067, iteration: 26675
loss: 1.001895546913147,grad_norm: 0.9999993057763616, iteration: 26676
loss: 1.0108532905578613,grad_norm: 0.7866079328933275, iteration: 26677
loss: 1.003821611404419,grad_norm: 0.9999991286472381, iteration: 26678
loss: 1.0187263488769531,grad_norm: 0.9651392452871168, iteration: 26679
loss: 1.0249089002609253,grad_norm: 0.9999992661803269, iteration: 26680
loss: 0.9813447594642639,grad_norm: 0.812879414822774, iteration: 26681
loss: 0.9641961455345154,grad_norm: 0.9853279103635698, iteration: 26682
loss: 1.009184718132019,grad_norm: 0.9505785521843448, iteration: 26683
loss: 0.994853675365448,grad_norm: 0.9404744536826184, iteration: 26684
loss: 1.0161960124969482,grad_norm: 0.9999990081908771, iteration: 26685
loss: 0.9774413108825684,grad_norm: 0.9537326087381596, iteration: 26686
loss: 0.9983654022216797,grad_norm: 0.913605556630776, iteration: 26687
loss: 1.0017403364181519,grad_norm: 0.9999991618158168, iteration: 26688
loss: 1.0284762382507324,grad_norm: 0.9312411947193766, iteration: 26689
loss: 1.0024553537368774,grad_norm: 0.82225215262458, iteration: 26690
loss: 0.990614116191864,grad_norm: 0.9592426853212682, iteration: 26691
loss: 1.0248161554336548,grad_norm: 0.9646795987731739, iteration: 26692
loss: 1.0105732679367065,grad_norm: 0.9999991567565939, iteration: 26693
loss: 0.9995631575584412,grad_norm: 0.9999991452888102, iteration: 26694
loss: 1.0186012983322144,grad_norm: 0.9999989949432871, iteration: 26695
loss: 0.9816597700119019,grad_norm: 0.9999991275217065, iteration: 26696
loss: 1.008470058441162,grad_norm: 0.9332350791787334, iteration: 26697
loss: 1.0304152965545654,grad_norm: 0.9684794465197933, iteration: 26698
loss: 0.9884655475616455,grad_norm: 0.9089967474999914, iteration: 26699
loss: 1.0018309354782104,grad_norm: 0.9999990444980579, iteration: 26700
loss: 0.9904437065124512,grad_norm: 0.9999990897464641, iteration: 26701
loss: 0.9888625741004944,grad_norm: 0.771207315548359, iteration: 26702
loss: 1.0234572887420654,grad_norm: 0.9999991306453923, iteration: 26703
loss: 1.0271207094192505,grad_norm: 0.9999990713023668, iteration: 26704
loss: 0.9877938628196716,grad_norm: 0.913216141709508, iteration: 26705
loss: 0.9839682579040527,grad_norm: 0.9999991660541678, iteration: 26706
loss: 1.0171074867248535,grad_norm: 0.997201321341258, iteration: 26707
loss: 1.0087984800338745,grad_norm: 0.9318657997940618, iteration: 26708
loss: 0.9851517081260681,grad_norm: 0.9999990619154343, iteration: 26709
loss: 1.0082025527954102,grad_norm: 0.9783694539300093, iteration: 26710
loss: 0.9891329407691956,grad_norm: 0.9999991027416274, iteration: 26711
loss: 1.0878827571868896,grad_norm: 0.9999992046694842, iteration: 26712
loss: 1.0185526609420776,grad_norm: 0.9935520631891112, iteration: 26713
loss: 1.0587029457092285,grad_norm: 0.9999995632853199, iteration: 26714
loss: 1.0177972316741943,grad_norm: 0.9949688204806608, iteration: 26715
loss: 1.0351040363311768,grad_norm: 0.9999991592655693, iteration: 26716
loss: 1.0282227993011475,grad_norm: 0.9907591707251505, iteration: 26717
loss: 1.010819911956787,grad_norm: 0.9999991534567889, iteration: 26718
loss: 1.0250682830810547,grad_norm: 0.9999989757449766, iteration: 26719
loss: 1.0002745389938354,grad_norm: 0.8249674373298489, iteration: 26720
loss: 0.9625833034515381,grad_norm: 0.9551008310612213, iteration: 26721
loss: 1.0448641777038574,grad_norm: 0.9151984040865232, iteration: 26722
loss: 1.0196256637573242,grad_norm: 0.9811085615991012, iteration: 26723
loss: 1.0126948356628418,grad_norm: 0.9999989868085327, iteration: 26724
loss: 0.9837552309036255,grad_norm: 0.9735673441468329, iteration: 26725
loss: 0.9623250365257263,grad_norm: 0.9550686836560284, iteration: 26726
loss: 1.023269534111023,grad_norm: 0.9999989860487803, iteration: 26727
loss: 1.0011427402496338,grad_norm: 0.9999994363576274, iteration: 26728
loss: 0.9891759157180786,grad_norm: 0.9064295240066643, iteration: 26729
loss: 0.9904125928878784,grad_norm: 0.8865468068285576, iteration: 26730
loss: 1.0089068412780762,grad_norm: 0.9999998350843664, iteration: 26731
loss: 1.0040873289108276,grad_norm: 0.9721910848281281, iteration: 26732
loss: 1.0293323993682861,grad_norm: 0.7908837627412267, iteration: 26733
loss: 1.012954831123352,grad_norm: 0.977298737754736, iteration: 26734
loss: 1.0201802253723145,grad_norm: 0.8553993714335836, iteration: 26735
loss: 0.9713983535766602,grad_norm: 0.8473596767955852, iteration: 26736
loss: 0.9787312746047974,grad_norm: 0.9069073826392725, iteration: 26737
loss: 0.9909961223602295,grad_norm: 0.999999139576147, iteration: 26738
loss: 1.0255197286605835,grad_norm: 0.9999997008499397, iteration: 26739
loss: 0.9763441681861877,grad_norm: 0.9237642923789865, iteration: 26740
loss: 0.9939473867416382,grad_norm: 0.999999019585379, iteration: 26741
loss: 1.0134592056274414,grad_norm: 0.9999991147876012, iteration: 26742
loss: 1.000476598739624,grad_norm: 0.9999990522746346, iteration: 26743
loss: 1.0527318716049194,grad_norm: 0.9999994635057418, iteration: 26744
loss: 0.9961645603179932,grad_norm: 0.999999159255577, iteration: 26745
loss: 1.0170435905456543,grad_norm: 0.965794364789637, iteration: 26746
loss: 0.9873462915420532,grad_norm: 0.930499546589625, iteration: 26747
loss: 1.0096384286880493,grad_norm: 0.9802339159916538, iteration: 26748
loss: 1.0180072784423828,grad_norm: 0.8990323546064252, iteration: 26749
loss: 1.0244172811508179,grad_norm: 0.8801419621687955, iteration: 26750
loss: 0.9987943768501282,grad_norm: 0.9437737473603619, iteration: 26751
loss: 1.0342013835906982,grad_norm: 0.947013499448791, iteration: 26752
loss: 0.9980809092521667,grad_norm: 0.9165101866945613, iteration: 26753
loss: 0.9711692929267883,grad_norm: 0.9999991134676711, iteration: 26754
loss: 0.9710513353347778,grad_norm: 0.9999990697118384, iteration: 26755
loss: 1.0263817310333252,grad_norm: 0.9999992726784765, iteration: 26756
loss: 1.0413343906402588,grad_norm: 0.999999072280015, iteration: 26757
loss: 0.9884837865829468,grad_norm: 0.934763917454933, iteration: 26758
loss: 0.9792031049728394,grad_norm: 0.9670700224713796, iteration: 26759
loss: 1.0030901432037354,grad_norm: 0.9999991418364939, iteration: 26760
loss: 1.01212739944458,grad_norm: 0.8366232387706033, iteration: 26761
loss: 1.0473976135253906,grad_norm: 0.9999991506040075, iteration: 26762
loss: 0.9971489310264587,grad_norm: 0.932294354701713, iteration: 26763
loss: 0.9975157380104065,grad_norm: 0.9662457163549665, iteration: 26764
loss: 1.0211390256881714,grad_norm: 0.9999991260991328, iteration: 26765
loss: 0.9688173532485962,grad_norm: 0.94479083937183, iteration: 26766
loss: 0.9920307993888855,grad_norm: 0.9999992817669878, iteration: 26767
loss: 1.092210054397583,grad_norm: 0.9999998696388036, iteration: 26768
loss: 1.043940544128418,grad_norm: 0.9942901178017679, iteration: 26769
loss: 0.9605124592781067,grad_norm: 0.9999991719376483, iteration: 26770
loss: 1.0225061178207397,grad_norm: 0.9993508479777307, iteration: 26771
loss: 0.9962295293807983,grad_norm: 0.7484003567157754, iteration: 26772
loss: 1.012688398361206,grad_norm: 0.9111882073257804, iteration: 26773
loss: 1.0177348852157593,grad_norm: 0.9072680889795607, iteration: 26774
loss: 1.0369733572006226,grad_norm: 0.8694555507936186, iteration: 26775
loss: 1.0074721574783325,grad_norm: 0.9963710911191681, iteration: 26776
loss: 1.0153372287750244,grad_norm: 0.9999991694617123, iteration: 26777
loss: 0.9877949953079224,grad_norm: 0.9999990867795359, iteration: 26778
loss: 1.0106923580169678,grad_norm: 0.9999994453615411, iteration: 26779
loss: 1.0096824169158936,grad_norm: 0.9707532818300976, iteration: 26780
loss: 1.0401124954223633,grad_norm: 0.8874485082500307, iteration: 26781
loss: 1.0497021675109863,grad_norm: 0.9196280555505039, iteration: 26782
loss: 0.9802111387252808,grad_norm: 0.9999991646963947, iteration: 26783
loss: 0.9993723630905151,grad_norm: 0.9999990877595402, iteration: 26784
loss: 0.975477933883667,grad_norm: 0.8195957742791424, iteration: 26785
loss: 1.0487534999847412,grad_norm: 0.9999991450038034, iteration: 26786
loss: 0.9982113838195801,grad_norm: 0.8782267075566023, iteration: 26787
loss: 1.0401076078414917,grad_norm: 0.9999991457213377, iteration: 26788
loss: 0.9981983304023743,grad_norm: 0.9993088374632589, iteration: 26789
loss: 1.0233936309814453,grad_norm: 0.9177435000822403, iteration: 26790
loss: 0.9550842046737671,grad_norm: 0.9999990382953602, iteration: 26791
loss: 0.995880663394928,grad_norm: 0.9084016266937663, iteration: 26792
loss: 0.9774155616760254,grad_norm: 0.9774048329367974, iteration: 26793
loss: 0.989170253276825,grad_norm: 0.748982061349359, iteration: 26794
loss: 0.9751161932945251,grad_norm: 0.9999990998885449, iteration: 26795
loss: 0.9838939309120178,grad_norm: 0.8237447484604545, iteration: 26796
loss: 0.9754801392555237,grad_norm: 0.999999068751089, iteration: 26797
loss: 1.0136178731918335,grad_norm: 0.9856907886634818, iteration: 26798
loss: 1.0228536128997803,grad_norm: 0.940578636546106, iteration: 26799
loss: 1.045912265777588,grad_norm: 0.999999140338157, iteration: 26800
loss: 1.014792799949646,grad_norm: 0.9290237849397344, iteration: 26801
loss: 1.0818724632263184,grad_norm: 0.9999991888612989, iteration: 26802
loss: 0.997490406036377,grad_norm: 0.9901874071417947, iteration: 26803
loss: 1.0241870880126953,grad_norm: 0.9999990700035075, iteration: 26804
loss: 1.020845890045166,grad_norm: 0.999999269360005, iteration: 26805
loss: 1.0363829135894775,grad_norm: 0.9999996331700206, iteration: 26806
loss: 1.0078718662261963,grad_norm: 0.9999992094059604, iteration: 26807
loss: 0.9866006970405579,grad_norm: 0.8059243210766126, iteration: 26808
loss: 0.9633669257164001,grad_norm: 0.9337421925731973, iteration: 26809
loss: 1.020780324935913,grad_norm: 0.9999993090032389, iteration: 26810
loss: 1.0284308195114136,grad_norm: 0.9999991466887281, iteration: 26811
loss: 1.0029559135437012,grad_norm: 0.9025943622959691, iteration: 26812
loss: 1.0166455507278442,grad_norm: 0.9598244269650614, iteration: 26813
loss: 0.9918674826622009,grad_norm: 0.9083316365212395, iteration: 26814
loss: 1.0415778160095215,grad_norm: 0.9171314402575488, iteration: 26815
loss: 1.0010387897491455,grad_norm: 0.8656561765211066, iteration: 26816
loss: 0.9882827997207642,grad_norm: 0.9999991028978904, iteration: 26817
loss: 1.0123757123947144,grad_norm: 0.8807555961749055, iteration: 26818
loss: 1.000057578086853,grad_norm: 0.999998953099281, iteration: 26819
loss: 1.0079015493392944,grad_norm: 0.9999991686646612, iteration: 26820
loss: 0.9983121752738953,grad_norm: 0.9999991933024951, iteration: 26821
loss: 0.9859643578529358,grad_norm: 0.9999992143219754, iteration: 26822
loss: 1.040826439857483,grad_norm: 0.9999989689347026, iteration: 26823
loss: 1.0034195184707642,grad_norm: 0.9784335548734748, iteration: 26824
loss: 1.0244886875152588,grad_norm: 0.9938715287668238, iteration: 26825
loss: 1.014633059501648,grad_norm: 0.8449972462968739, iteration: 26826
loss: 0.948641300201416,grad_norm: 0.8553609860775727, iteration: 26827
loss: 0.99235600233078,grad_norm: 0.999999144756037, iteration: 26828
loss: 1.0344350337982178,grad_norm: 0.9800142984243115, iteration: 26829
loss: 1.0042643547058105,grad_norm: 0.9983427979604487, iteration: 26830
loss: 0.9879364371299744,grad_norm: 0.9002989682445156, iteration: 26831
loss: 1.01046621799469,grad_norm: 0.9717553516163119, iteration: 26832
loss: 1.0319386720657349,grad_norm: 0.9999996873744266, iteration: 26833
loss: 0.9730208516120911,grad_norm: 0.830562580973788, iteration: 26834
loss: 1.0211724042892456,grad_norm: 0.8876183919503794, iteration: 26835
loss: 0.9876617789268494,grad_norm: 0.8217516329406008, iteration: 26836
loss: 0.9865655303001404,grad_norm: 0.90937075235896, iteration: 26837
loss: 0.9920914173126221,grad_norm: 0.92583153463458, iteration: 26838
loss: 0.9976744055747986,grad_norm: 0.9999992370652074, iteration: 26839
loss: 1.0059177875518799,grad_norm: 0.8474037654974919, iteration: 26840
loss: 0.9731935858726501,grad_norm: 0.9999991008508631, iteration: 26841
loss: 1.0023332834243774,grad_norm: 0.9781716676354774, iteration: 26842
loss: 1.0236645936965942,grad_norm: 0.9999991129148494, iteration: 26843
loss: 1.0146249532699585,grad_norm: 0.9999988974706013, iteration: 26844
loss: 1.117152214050293,grad_norm: 0.999999340874016, iteration: 26845
loss: 0.9856640696525574,grad_norm: 0.9999992827799193, iteration: 26846
loss: 1.0014137029647827,grad_norm: 0.9999990047330248, iteration: 26847
loss: 0.9918103218078613,grad_norm: 0.9999991589357826, iteration: 26848
loss: 0.9569706320762634,grad_norm: 0.9868068183589535, iteration: 26849
loss: 1.0327305793762207,grad_norm: 0.9999993894735815, iteration: 26850
loss: 0.952046275138855,grad_norm: 0.9999990065377999, iteration: 26851
loss: 1.030396580696106,grad_norm: 0.9999993195866264, iteration: 26852
loss: 0.984018087387085,grad_norm: 0.9999991735775416, iteration: 26853
loss: 1.0547194480895996,grad_norm: 0.9949927921230343, iteration: 26854
loss: 0.9741665124893188,grad_norm: 0.9999991584309965, iteration: 26855
loss: 0.9997527003288269,grad_norm: 0.9999992186860607, iteration: 26856
loss: 0.9807131886482239,grad_norm: 0.9999990774806268, iteration: 26857
loss: 0.9958745241165161,grad_norm: 0.9999990613149864, iteration: 26858
loss: 1.0131913423538208,grad_norm: 0.9999991317760475, iteration: 26859
loss: 1.0426230430603027,grad_norm: 0.9999990399539452, iteration: 26860
loss: 1.0407449007034302,grad_norm: 0.9999993233066109, iteration: 26861
loss: 1.0345059633255005,grad_norm: 0.967598345628028, iteration: 26862
loss: 0.9896444082260132,grad_norm: 0.9327822031019565, iteration: 26863
loss: 1.0637162923812866,grad_norm: 0.999998975096327, iteration: 26864
loss: 0.9775590300559998,grad_norm: 0.9999989647470254, iteration: 26865
loss: 0.9918073415756226,grad_norm: 0.9999992568341771, iteration: 26866
loss: 1.027018666267395,grad_norm: 0.8453589642289877, iteration: 26867
loss: 1.0035972595214844,grad_norm: 0.9999991053689224, iteration: 26868
loss: 0.9454480409622192,grad_norm: 0.9999990600637731, iteration: 26869
loss: 0.9823407530784607,grad_norm: 0.8469088929773995, iteration: 26870
loss: 1.014358401298523,grad_norm: 0.93798190532059, iteration: 26871
loss: 0.9661453366279602,grad_norm: 0.9937452142464047, iteration: 26872
loss: 0.9568678140640259,grad_norm: 0.858964538035612, iteration: 26873
loss: 1.0331215858459473,grad_norm: 0.9999991047500593, iteration: 26874
loss: 0.9880092740058899,grad_norm: 0.9821236640561436, iteration: 26875
loss: 1.0238406658172607,grad_norm: 0.9999990685560409, iteration: 26876
loss: 1.0080792903900146,grad_norm: 0.992183678257644, iteration: 26877
loss: 0.9916046857833862,grad_norm: 0.9999990470245983, iteration: 26878
loss: 1.0483734607696533,grad_norm: 0.9999990586199562, iteration: 26879
loss: 0.9835397601127625,grad_norm: 0.8672899797328859, iteration: 26880
loss: 1.0467875003814697,grad_norm: 0.9999991111008518, iteration: 26881
loss: 0.9791811108589172,grad_norm: 0.9787833760731299, iteration: 26882
loss: 1.0227893590927124,grad_norm: 0.9999989950404228, iteration: 26883
loss: 0.9832790493965149,grad_norm: 0.9919468046004953, iteration: 26884
loss: 1.0364476442337036,grad_norm: 0.9999991399965938, iteration: 26885
loss: 1.0252407789230347,grad_norm: 0.9498853797991842, iteration: 26886
loss: 0.9938815832138062,grad_norm: 0.9099258715239397, iteration: 26887
loss: 1.0244938135147095,grad_norm: 0.8354892610914983, iteration: 26888
loss: 0.9517933130264282,grad_norm: 0.9999991330464498, iteration: 26889
loss: 1.0163536071777344,grad_norm: 0.986524463699502, iteration: 26890
loss: 1.0025590658187866,grad_norm: 0.9999992917396144, iteration: 26891
loss: 0.9986066222190857,grad_norm: 0.9402021130386474, iteration: 26892
loss: 1.0201075077056885,grad_norm: 0.9814392329365482, iteration: 26893
loss: 1.0029833316802979,grad_norm: 0.894316245607037, iteration: 26894
loss: 1.0015840530395508,grad_norm: 0.999999129137899, iteration: 26895
loss: 0.9963819980621338,grad_norm: 0.9999992988054576, iteration: 26896
loss: 0.954851508140564,grad_norm: 0.9999992999994067, iteration: 26897
loss: 1.0063731670379639,grad_norm: 0.9999991175468477, iteration: 26898
loss: 0.9818649291992188,grad_norm: 0.8343357446467187, iteration: 26899
loss: 0.9929221868515015,grad_norm: 0.9125518797594169, iteration: 26900
loss: 0.9826878905296326,grad_norm: 0.8930365030319299, iteration: 26901
loss: 0.9919901490211487,grad_norm: 0.9999990479434042, iteration: 26902
loss: 1.0115607976913452,grad_norm: 0.8692176096838147, iteration: 26903
loss: 1.0068175792694092,grad_norm: 0.9922043623753862, iteration: 26904
loss: 1.0201303958892822,grad_norm: 0.972976422152889, iteration: 26905
loss: 1.0336410999298096,grad_norm: 0.9278004988177039, iteration: 26906
loss: 1.0283104181289673,grad_norm: 0.9179955230894478, iteration: 26907
loss: 1.0239664316177368,grad_norm: 0.9999992631081475, iteration: 26908
loss: 1.0111600160598755,grad_norm: 0.9999991137129944, iteration: 26909
loss: 1.034063458442688,grad_norm: 0.9999990371693314, iteration: 26910
loss: 0.98626309633255,grad_norm: 0.9999991816645497, iteration: 26911
loss: 0.9792422652244568,grad_norm: 0.9102649138957156, iteration: 26912
loss: 0.9892155528068542,grad_norm: 0.9999991675520593, iteration: 26913
loss: 1.0410774946212769,grad_norm: 0.9825272299681161, iteration: 26914
loss: 0.9847154021263123,grad_norm: 0.9703779041303469, iteration: 26915
loss: 1.0012733936309814,grad_norm: 0.9071121307506299, iteration: 26916
loss: 1.015210747718811,grad_norm: 0.9999990850194423, iteration: 26917
loss: 1.0288434028625488,grad_norm: 0.9999992796063317, iteration: 26918
loss: 1.0135977268218994,grad_norm: 0.8468224678356394, iteration: 26919
loss: 0.9634387493133545,grad_norm: 0.9999990137392534, iteration: 26920
loss: 1.000349760055542,grad_norm: 0.9797655157522217, iteration: 26921
loss: 1.0010721683502197,grad_norm: 0.9999990351030765, iteration: 26922
loss: 1.0035841464996338,grad_norm: 0.9846281047503992, iteration: 26923
loss: 0.9498600363731384,grad_norm: 0.8013586932920718, iteration: 26924
loss: 0.9915691614151001,grad_norm: 0.9999991255049051, iteration: 26925
loss: 1.0391309261322021,grad_norm: 0.9999991416703374, iteration: 26926
loss: 1.0273048877716064,grad_norm: 0.9243842834179395, iteration: 26927
loss: 0.9941693544387817,grad_norm: 0.9999990937104155, iteration: 26928
loss: 1.0200061798095703,grad_norm: 0.9871570760140587, iteration: 26929
loss: 1.0282909870147705,grad_norm: 0.9362831630448842, iteration: 26930
loss: 1.0092276334762573,grad_norm: 0.9999990955644523, iteration: 26931
loss: 1.0286576747894287,grad_norm: 0.9922571849047602, iteration: 26932
loss: 1.003982424736023,grad_norm: 0.8409313518001432, iteration: 26933
loss: 1.0023720264434814,grad_norm: 0.7854423042867569, iteration: 26934
loss: 1.0017788410186768,grad_norm: 0.845568997726484, iteration: 26935
loss: 0.9809920191764832,grad_norm: 0.999999101508051, iteration: 26936
loss: 1.0213984251022339,grad_norm: 0.810419383168609, iteration: 26937
loss: 0.9966023564338684,grad_norm: 0.9376953825904689, iteration: 26938
loss: 1.0416083335876465,grad_norm: 0.861285544540527, iteration: 26939
loss: 0.9793433547019958,grad_norm: 0.9999990217317751, iteration: 26940
loss: 0.9831584692001343,grad_norm: 0.9999991745850902, iteration: 26941
loss: 0.9806062579154968,grad_norm: 0.9210344192846843, iteration: 26942
loss: 0.9931524991989136,grad_norm: 0.9999992628637151, iteration: 26943
loss: 0.9960076212882996,grad_norm: 0.9643319224478908, iteration: 26944
loss: 0.9901472330093384,grad_norm: 0.9085196844237473, iteration: 26945
loss: 1.032863736152649,grad_norm: 0.9999179022800753, iteration: 26946
loss: 1.0352380275726318,grad_norm: 0.8094973525993664, iteration: 26947
loss: 1.0209033489227295,grad_norm: 0.959491894316352, iteration: 26948
loss: 0.9956011772155762,grad_norm: 0.9999990691699161, iteration: 26949
loss: 1.0024486780166626,grad_norm: 0.7823965171679036, iteration: 26950
loss: 0.982893705368042,grad_norm: 0.7997779727012133, iteration: 26951
loss: 0.9915322661399841,grad_norm: 0.9999990296140031, iteration: 26952
loss: 0.960079550743103,grad_norm: 0.9528243308044847, iteration: 26953
loss: 1.0109813213348389,grad_norm: 0.8979020896713644, iteration: 26954
loss: 1.019386649131775,grad_norm: 0.9999990320471156, iteration: 26955
loss: 1.0595672130584717,grad_norm: 0.9999991615038853, iteration: 26956
loss: 1.0358686447143555,grad_norm: 0.9999992831521693, iteration: 26957
loss: 0.9911001920700073,grad_norm: 0.9999991257057903, iteration: 26958
loss: 1.0089255571365356,grad_norm: 0.8024313323572811, iteration: 26959
loss: 0.9848098754882812,grad_norm: 0.9810397377003367, iteration: 26960
loss: 1.028435468673706,grad_norm: 0.9999990516339204, iteration: 26961
loss: 0.9995031952857971,grad_norm: 0.9772147267550718, iteration: 26962
loss: 1.0744835138320923,grad_norm: 0.9999989150610303, iteration: 26963
loss: 1.0220669507980347,grad_norm: 0.9619978705191482, iteration: 26964
loss: 0.977236270904541,grad_norm: 0.9999991130631296, iteration: 26965
loss: 0.9755874276161194,grad_norm: 0.9999992318926206, iteration: 26966
loss: 1.0154612064361572,grad_norm: 0.9999990194255276, iteration: 26967
loss: 0.9995275735855103,grad_norm: 0.9337983150181834, iteration: 26968
loss: 1.0453226566314697,grad_norm: 0.9101134139333205, iteration: 26969
loss: 0.9982872605323792,grad_norm: 0.9999990642701625, iteration: 26970
loss: 0.9982635974884033,grad_norm: 0.9999991635202639, iteration: 26971
loss: 0.9951902627944946,grad_norm: 0.8856767127945893, iteration: 26972
loss: 0.9913227558135986,grad_norm: 0.9999991054630801, iteration: 26973
loss: 0.9917932748794556,grad_norm: 0.9999990836317407, iteration: 26974
loss: 1.0100079774856567,grad_norm: 0.9139822825781257, iteration: 26975
loss: 1.0035160779953003,grad_norm: 0.9092117079749246, iteration: 26976
loss: 1.0377166271209717,grad_norm: 0.9999991362297618, iteration: 26977
loss: 1.0193172693252563,grad_norm: 0.9999991788649014, iteration: 26978
loss: 1.0155316591262817,grad_norm: 0.9836643944681962, iteration: 26979
loss: 1.0033843517303467,grad_norm: 0.9160884737404218, iteration: 26980
loss: 0.9882458448410034,grad_norm: 0.9382096060312122, iteration: 26981
loss: 1.0178042650222778,grad_norm: 0.999999144856268, iteration: 26982
loss: 0.9855378270149231,grad_norm: 0.9563065661688817, iteration: 26983
loss: 1.030395269393921,grad_norm: 0.9999989788645106, iteration: 26984
loss: 0.985195517539978,grad_norm: 0.9999990751776612, iteration: 26985
loss: 1.0254658460617065,grad_norm: 0.9526744462507427, iteration: 26986
loss: 0.9744762182235718,grad_norm: 0.9348128595876345, iteration: 26987
loss: 0.9705792665481567,grad_norm: 0.7790654843267597, iteration: 26988
loss: 0.9865334033966064,grad_norm: 0.9508756633929818, iteration: 26989
loss: 0.9876496195793152,grad_norm: 0.9999990595882003, iteration: 26990
loss: 0.9727140069007874,grad_norm: 0.9235394404301411, iteration: 26991
loss: 1.014894962310791,grad_norm: 0.8693913176649631, iteration: 26992
loss: 1.0167477130889893,grad_norm: 0.853280926793434, iteration: 26993
loss: 1.033928632736206,grad_norm: 0.9999991141917662, iteration: 26994
loss: 1.0277332067489624,grad_norm: 0.8329467224637599, iteration: 26995
loss: 0.9927765727043152,grad_norm: 0.9315690339783267, iteration: 26996
loss: 1.0307022333145142,grad_norm: 0.8493733131292356, iteration: 26997
loss: 0.9676024913787842,grad_norm: 0.9879308467137887, iteration: 26998
loss: 0.9949591755867004,grad_norm: 0.9427018788024684, iteration: 26999
loss: 1.0089237689971924,grad_norm: 0.9020707671150554, iteration: 27000
loss: 1.0120174884796143,grad_norm: 0.9792531978345783, iteration: 27001
loss: 1.04293954372406,grad_norm: 0.99949167341624, iteration: 27002
loss: 1.0136370658874512,grad_norm: 0.9999991315631077, iteration: 27003
loss: 1.0046875476837158,grad_norm: 0.9999991158766003, iteration: 27004
loss: 0.94394451379776,grad_norm: 0.9955326497174206, iteration: 27005
loss: 1.018298625946045,grad_norm: 0.9895798383445648, iteration: 27006
loss: 0.9726519584655762,grad_norm: 0.867974539571632, iteration: 27007
loss: 0.9904165267944336,grad_norm: 0.852490921488558, iteration: 27008
loss: 0.9766556024551392,grad_norm: 0.7592871247874663, iteration: 27009
loss: 1.0183396339416504,grad_norm: 0.9999990992780301, iteration: 27010
loss: 0.9978004693984985,grad_norm: 0.9626854808992853, iteration: 27011
loss: 1.014963984489441,grad_norm: 0.8236822491956087, iteration: 27012
loss: 0.9947419762611389,grad_norm: 0.9115836625939018, iteration: 27013
loss: 1.0488361120224,grad_norm: 0.9661104698483002, iteration: 27014
loss: 0.9786203503608704,grad_norm: 0.8867050602241349, iteration: 27015
loss: 0.9974257349967957,grad_norm: 0.9470424438842259, iteration: 27016
loss: 1.0528452396392822,grad_norm: 0.9171787698070969, iteration: 27017
loss: 1.001547932624817,grad_norm: 0.9999990371901397, iteration: 27018
loss: 1.0029518604278564,grad_norm: 0.9999991753814813, iteration: 27019
loss: 1.0358418226242065,grad_norm: 0.9626248926412703, iteration: 27020
loss: 0.9972909688949585,grad_norm: 0.9310010458931259, iteration: 27021
loss: 1.0174050331115723,grad_norm: 0.9999992665610061, iteration: 27022
loss: 0.9902402758598328,grad_norm: 0.9999990090421451, iteration: 27023
loss: 1.0435731410980225,grad_norm: 0.9469813906457952, iteration: 27024
loss: 1.0214693546295166,grad_norm: 0.9999991813577587, iteration: 27025
loss: 1.0356166362762451,grad_norm: 0.9999990965933929, iteration: 27026
loss: 1.0076497793197632,grad_norm: 0.9999989945077505, iteration: 27027
loss: 0.9938238263130188,grad_norm: 0.9611751244535932, iteration: 27028
loss: 1.0269314050674438,grad_norm: 0.9832905678423006, iteration: 27029
loss: 1.0216282606124878,grad_norm: 0.8787563431588485, iteration: 27030
loss: 1.010762095451355,grad_norm: 0.8902767443472271, iteration: 27031
loss: 1.0111966133117676,grad_norm: 0.9599625772078252, iteration: 27032
loss: 1.0629827976226807,grad_norm: 0.9661907973943629, iteration: 27033
loss: 1.0236741304397583,grad_norm: 0.9999990176506192, iteration: 27034
loss: 0.9858561754226685,grad_norm: 0.9999991688742582, iteration: 27035
loss: 1.000550627708435,grad_norm: 0.999999590018961, iteration: 27036
loss: 1.0233914852142334,grad_norm: 0.8594939382929943, iteration: 27037
loss: 1.0069351196289062,grad_norm: 0.8946413509231339, iteration: 27038
loss: 0.9903585314750671,grad_norm: 0.9887167429903638, iteration: 27039
loss: 1.0375922918319702,grad_norm: 0.9999991176706814, iteration: 27040
loss: 1.023565649986267,grad_norm: 0.7804251057590395, iteration: 27041
loss: 1.0219156742095947,grad_norm: 0.9999990547394175, iteration: 27042
loss: 0.9743350148200989,grad_norm: 0.9999990653929959, iteration: 27043
loss: 1.0021823644638062,grad_norm: 0.9223010176077702, iteration: 27044
loss: 0.9834903478622437,grad_norm: 0.9436165207052947, iteration: 27045
loss: 1.0034921169281006,grad_norm: 0.999999695077601, iteration: 27046
loss: 1.0084655284881592,grad_norm: 0.8883566969385623, iteration: 27047
loss: 1.0465495586395264,grad_norm: 0.9999989913252482, iteration: 27048
loss: 1.0242164134979248,grad_norm: 0.9910699535502999, iteration: 27049
loss: 0.9930838942527771,grad_norm: 0.9999990085081653, iteration: 27050
loss: 0.9928733706474304,grad_norm: 0.9164343063754186, iteration: 27051
loss: 1.0360691547393799,grad_norm: 0.8472837103049008, iteration: 27052
loss: 1.0089824199676514,grad_norm: 0.8769086857659911, iteration: 27053
loss: 1.0086220502853394,grad_norm: 0.9524143022515217, iteration: 27054
loss: 1.0227340459823608,grad_norm: 0.999999094366784, iteration: 27055
loss: 1.0546603202819824,grad_norm: 0.9999992173220035, iteration: 27056
loss: 0.9885389804840088,grad_norm: 0.9323269075629741, iteration: 27057
loss: 1.015182375907898,grad_norm: 0.9999991085854573, iteration: 27058
loss: 1.021057367324829,grad_norm: 0.9999991007663576, iteration: 27059
loss: 1.0096415281295776,grad_norm: 0.9999991996936993, iteration: 27060
loss: 0.9930297136306763,grad_norm: 0.8420536570918381, iteration: 27061
loss: 0.9676443934440613,grad_norm: 0.9999990717125121, iteration: 27062
loss: 1.0372531414031982,grad_norm: 0.8475167710503545, iteration: 27063
loss: 1.0057977437973022,grad_norm: 0.9317422577205604, iteration: 27064
loss: 1.0246020555496216,grad_norm: 0.9999990687837754, iteration: 27065
loss: 0.9984608888626099,grad_norm: 0.9753571852367314, iteration: 27066
loss: 1.0152164697647095,grad_norm: 0.9999994442019107, iteration: 27067
loss: 1.010603666305542,grad_norm: 0.8997788124854198, iteration: 27068
loss: 1.032936453819275,grad_norm: 0.9999995994784513, iteration: 27069
loss: 1.0081053972244263,grad_norm: 0.9999992374317292, iteration: 27070
loss: 0.9846362471580505,grad_norm: 0.9132491067754448, iteration: 27071
loss: 1.0420888662338257,grad_norm: 0.9999994964906214, iteration: 27072
loss: 1.0007375478744507,grad_norm: 0.8316352118397093, iteration: 27073
loss: 1.0311444997787476,grad_norm: 0.9631106552965278, iteration: 27074
loss: 0.9780011773109436,grad_norm: 0.9999991745986717, iteration: 27075
loss: 1.0327818393707275,grad_norm: 0.9624119481575348, iteration: 27076
loss: 1.0015137195587158,grad_norm: 0.9999989941544952, iteration: 27077
loss: 1.031380534172058,grad_norm: 0.9541384548082947, iteration: 27078
loss: 0.9841882586479187,grad_norm: 0.9999990942554237, iteration: 27079
loss: 0.9918011426925659,grad_norm: 0.9999990606276434, iteration: 27080
loss: 0.9790368676185608,grad_norm: 0.8923348869913326, iteration: 27081
loss: 1.0123765468597412,grad_norm: 0.9994215338525715, iteration: 27082
loss: 1.003124475479126,grad_norm: 0.9999995307075762, iteration: 27083
loss: 0.9873030185699463,grad_norm: 0.9999996623772127, iteration: 27084
loss: 0.9970070123672485,grad_norm: 0.9600424224654267, iteration: 27085
loss: 1.0000567436218262,grad_norm: 0.9999992250155179, iteration: 27086
loss: 1.0269854068756104,grad_norm: 0.9999992252308585, iteration: 27087
loss: 1.0064629316329956,grad_norm: 0.9831650229900668, iteration: 27088
loss: 1.0163015127182007,grad_norm: 0.9636873879901302, iteration: 27089
loss: 0.9961841106414795,grad_norm: 0.9393067294644303, iteration: 27090
loss: 0.9908846020698547,grad_norm: 0.9276421606281213, iteration: 27091
loss: 1.0158039331436157,grad_norm: 0.9563793244949768, iteration: 27092
loss: 1.0216915607452393,grad_norm: 0.823327751988428, iteration: 27093
loss: 1.002716064453125,grad_norm: 0.9765019231299037, iteration: 27094
loss: 0.9596077799797058,grad_norm: 0.9999992945995705, iteration: 27095
loss: 1.0038464069366455,grad_norm: 0.9190647597793641, iteration: 27096
loss: 0.9762270450592041,grad_norm: 0.9999990765763367, iteration: 27097
loss: 1.0595158338546753,grad_norm: 0.9999995617059948, iteration: 27098
loss: 1.060372233390808,grad_norm: 0.9999997452241659, iteration: 27099
loss: 1.035933256149292,grad_norm: 0.9999991872837392, iteration: 27100
loss: 1.1027954816818237,grad_norm: 0.9999997441021151, iteration: 27101
loss: 0.9988888502120972,grad_norm: 0.8267790084607496, iteration: 27102
loss: 1.0092263221740723,grad_norm: 0.9999992035947091, iteration: 27103
loss: 1.007325530052185,grad_norm: 0.9886806132447195, iteration: 27104
loss: 1.09073007106781,grad_norm: 0.9999991196221875, iteration: 27105
loss: 1.07374107837677,grad_norm: 0.9999998478755134, iteration: 27106
loss: 1.0165789127349854,grad_norm: 0.999999369222847, iteration: 27107
loss: 1.0369298458099365,grad_norm: 0.9999999255259215, iteration: 27108
loss: 1.0039929151535034,grad_norm: 0.8827281015711682, iteration: 27109
loss: 1.0144460201263428,grad_norm: 0.999999103904551, iteration: 27110
loss: 1.004287600517273,grad_norm: 0.9999992360266788, iteration: 27111
loss: 0.9822670817375183,grad_norm: 0.8290787915859903, iteration: 27112
loss: 1.000491976737976,grad_norm: 0.9999993208292419, iteration: 27113
loss: 0.9920610189437866,grad_norm: 0.8797197614271915, iteration: 27114
loss: 1.0352224111557007,grad_norm: 0.9999994535198765, iteration: 27115
loss: 0.9986504912376404,grad_norm: 0.9999991893126087, iteration: 27116
loss: 1.0630656480789185,grad_norm: 0.8571764732006744, iteration: 27117
loss: 1.0065269470214844,grad_norm: 0.9858807354969571, iteration: 27118
loss: 0.9963850378990173,grad_norm: 0.9825047065294388, iteration: 27119
loss: 1.003048062324524,grad_norm: 0.9999991469607974, iteration: 27120
loss: 0.9940097332000732,grad_norm: 0.8492489743525293, iteration: 27121
loss: 1.0220065116882324,grad_norm: 0.9999991949754895, iteration: 27122
loss: 1.0444203615188599,grad_norm: 0.9453062149635271, iteration: 27123
loss: 0.9846263527870178,grad_norm: 0.9218397626433815, iteration: 27124
loss: 0.9997677206993103,grad_norm: 0.9999990512650745, iteration: 27125
loss: 1.0381754636764526,grad_norm: 0.9999993313387303, iteration: 27126
loss: 0.9931460022926331,grad_norm: 0.7945579586393601, iteration: 27127
loss: 1.088374376296997,grad_norm: 0.9999996875024232, iteration: 27128
loss: 1.006937026977539,grad_norm: 0.9850106850011109, iteration: 27129
loss: 1.0382835865020752,grad_norm: 0.9999990516864403, iteration: 27130
loss: 1.0487406253814697,grad_norm: 0.9999991961816, iteration: 27131
loss: 0.9977418780326843,grad_norm: 0.8389806724150157, iteration: 27132
loss: 1.002087116241455,grad_norm: 0.9999990917892433, iteration: 27133
loss: 1.035012125968933,grad_norm: 0.999999041070707, iteration: 27134
loss: 0.977700412273407,grad_norm: 0.9999993721313328, iteration: 27135
loss: 1.0318137407302856,grad_norm: 0.9999990230808096, iteration: 27136
loss: 1.0150412321090698,grad_norm: 0.8857600621345418, iteration: 27137
loss: 0.9661162495613098,grad_norm: 0.9999992425702136, iteration: 27138
loss: 1.007269024848938,grad_norm: 0.9999991583725544, iteration: 27139
loss: 1.0484482049942017,grad_norm: 0.9380530405165679, iteration: 27140
loss: 1.0061297416687012,grad_norm: 0.9999991149775254, iteration: 27141
loss: 1.0023443698883057,grad_norm: 0.999998905682975, iteration: 27142
loss: 1.0262709856033325,grad_norm: 0.9999994578690633, iteration: 27143
loss: 0.9988793134689331,grad_norm: 0.8669866050973135, iteration: 27144
loss: 1.0067377090454102,grad_norm: 0.9999991504382801, iteration: 27145
loss: 0.9978703260421753,grad_norm: 0.9999991615899402, iteration: 27146
loss: 1.0232008695602417,grad_norm: 0.9999994246293687, iteration: 27147
loss: 1.0320689678192139,grad_norm: 0.9999993806622394, iteration: 27148
loss: 1.0481393337249756,grad_norm: 0.999999478380633, iteration: 27149
loss: 1.002956748008728,grad_norm: 0.9999992422009422, iteration: 27150
loss: 0.9805459380149841,grad_norm: 0.9999991473021497, iteration: 27151
loss: 0.9990732669830322,grad_norm: 0.9999992793870243, iteration: 27152
loss: 1.0024027824401855,grad_norm: 0.92078471364965, iteration: 27153
loss: 1.0299267768859863,grad_norm: 0.9779775830354188, iteration: 27154
loss: 1.014090657234192,grad_norm: 0.8819205450968202, iteration: 27155
loss: 1.024344563484192,grad_norm: 0.93329745048068, iteration: 27156
loss: 1.0587769746780396,grad_norm: 0.9999998070103514, iteration: 27157
loss: 0.9991621375083923,grad_norm: 0.9360513922267938, iteration: 27158
loss: 0.9944869875907898,grad_norm: 0.9999990227727898, iteration: 27159
loss: 0.9627681374549866,grad_norm: 0.9999993035583727, iteration: 27160
loss: 1.0691083669662476,grad_norm: 0.9999993244500435, iteration: 27161
loss: 1.0272296667099,grad_norm: 0.9999995467628319, iteration: 27162
loss: 1.004676342010498,grad_norm: 0.9999991152307676, iteration: 27163
loss: 1.0084182024002075,grad_norm: 0.9999995711107612, iteration: 27164
loss: 1.0448720455169678,grad_norm: 0.99999914444162, iteration: 27165
loss: 1.0197207927703857,grad_norm: 0.9572177882614205, iteration: 27166
loss: 1.0237116813659668,grad_norm: 0.9999991626908155, iteration: 27167
loss: 1.0067025423049927,grad_norm: 0.9641407478431074, iteration: 27168
loss: 1.005967617034912,grad_norm: 0.9999989553186086, iteration: 27169
loss: 1.033418893814087,grad_norm: 0.9999993672906443, iteration: 27170
loss: 0.9978915452957153,grad_norm: 0.9999990791353013, iteration: 27171
loss: 1.045811653137207,grad_norm: 0.9194138025124456, iteration: 27172
loss: 0.983476996421814,grad_norm: 0.9712754969361762, iteration: 27173
loss: 1.033697485923767,grad_norm: 0.9999990973260445, iteration: 27174
loss: 1.0260788202285767,grad_norm: 0.9999991360590237, iteration: 27175
loss: 0.998115599155426,grad_norm: 0.8325975379234718, iteration: 27176
loss: 0.9551386833190918,grad_norm: 0.999999057766371, iteration: 27177
loss: 1.0173848867416382,grad_norm: 0.999999759913493, iteration: 27178
loss: 1.0051496028900146,grad_norm: 0.9769587373154442, iteration: 27179
loss: 1.0571790933609009,grad_norm: 0.9999995484958333, iteration: 27180
loss: 0.9992614984512329,grad_norm: 0.9999993032964186, iteration: 27181
loss: 1.0485246181488037,grad_norm: 0.8306321826986303, iteration: 27182
loss: 1.0442720651626587,grad_norm: 0.9999997025272382, iteration: 27183
loss: 1.0206149816513062,grad_norm: 0.9717280086113298, iteration: 27184
loss: 0.9896266460418701,grad_norm: 0.9999990421620386, iteration: 27185
loss: 0.9977108240127563,grad_norm: 0.8969114127271942, iteration: 27186
loss: 0.9950265884399414,grad_norm: 0.8567114481285281, iteration: 27187
loss: 1.0382945537567139,grad_norm: 0.9999994303485564, iteration: 27188
loss: 1.042785882949829,grad_norm: 0.9999998140527369, iteration: 27189
loss: 1.0184036493301392,grad_norm: 0.9999991271109474, iteration: 27190
loss: 1.0303281545639038,grad_norm: 0.9999989646136946, iteration: 27191
loss: 1.021044373512268,grad_norm: 0.9999990710802509, iteration: 27192
loss: 1.0345405340194702,grad_norm: 0.9999993235691479, iteration: 27193
loss: 1.0244014263153076,grad_norm: 0.947096407113135, iteration: 27194
loss: 1.0088605880737305,grad_norm: 0.8711293332796638, iteration: 27195
loss: 1.0331636667251587,grad_norm: 0.9999992716193675, iteration: 27196
loss: 1.0246622562408447,grad_norm: 0.9999991583651903, iteration: 27197
loss: 1.0061488151550293,grad_norm: 0.9999992787216744, iteration: 27198
loss: 1.0403121709823608,grad_norm: 0.9999996717404476, iteration: 27199
loss: 0.9754988551139832,grad_norm: 0.9871910989334439, iteration: 27200
loss: 1.0501513481140137,grad_norm: 0.9999993262007826, iteration: 27201
loss: 1.0110822916030884,grad_norm: 0.8870294551803676, iteration: 27202
loss: 1.020406723022461,grad_norm: 0.9332712252356496, iteration: 27203
loss: 1.000177264213562,grad_norm: 0.9766495179729286, iteration: 27204
loss: 1.0095632076263428,grad_norm: 0.8684552051679978, iteration: 27205
loss: 0.9621040225028992,grad_norm: 0.9999991339119277, iteration: 27206
loss: 0.9894741773605347,grad_norm: 0.9999990729318486, iteration: 27207
loss: 0.9826101064682007,grad_norm: 0.8638544139690694, iteration: 27208
loss: 0.9819201231002808,grad_norm: 0.9999991119658297, iteration: 27209
loss: 0.992655873298645,grad_norm: 0.9999990914994672, iteration: 27210
loss: 0.9859395027160645,grad_norm: 0.9999992344180616, iteration: 27211
loss: 1.0066817998886108,grad_norm: 0.9999991754420301, iteration: 27212
loss: 1.0188952684402466,grad_norm: 0.9999990302183694, iteration: 27213
loss: 1.0136851072311401,grad_norm: 0.9999991784083346, iteration: 27214
loss: 0.9802538752555847,grad_norm: 0.9314660335696566, iteration: 27215
loss: 0.9876816868782043,grad_norm: 0.9999990570344569, iteration: 27216
loss: 1.0162972211837769,grad_norm: 0.999999854958878, iteration: 27217
loss: 1.0035570859909058,grad_norm: 0.9999991669096214, iteration: 27218
loss: 1.0504850149154663,grad_norm: 0.9999990330618134, iteration: 27219
loss: 1.00510573387146,grad_norm: 0.8215822907354808, iteration: 27220
loss: 0.9878119826316833,grad_norm: 0.999999096399311, iteration: 27221
loss: 1.0417945384979248,grad_norm: 0.9999991836251954, iteration: 27222
loss: 1.0360455513000488,grad_norm: 0.9999994699665303, iteration: 27223
loss: 1.0435279607772827,grad_norm: 0.9999996512154983, iteration: 27224
loss: 0.9993318319320679,grad_norm: 0.9999991540911539, iteration: 27225
loss: 1.0396428108215332,grad_norm: 0.9999996572178398, iteration: 27226
loss: 0.9541081190109253,grad_norm: 0.9744630446961502, iteration: 27227
loss: 0.9932739734649658,grad_norm: 0.9999997488811615, iteration: 27228
loss: 1.0179197788238525,grad_norm: 0.9999991236757789, iteration: 27229
loss: 1.0330240726470947,grad_norm: 0.9999990746405402, iteration: 27230
loss: 0.9917683601379395,grad_norm: 0.8789658238675914, iteration: 27231
loss: 1.0539109706878662,grad_norm: 0.9999994842014144, iteration: 27232
loss: 1.030258059501648,grad_norm: 0.9717103468232664, iteration: 27233
loss: 1.0390230417251587,grad_norm: 0.9999998431649477, iteration: 27234
loss: 0.9884058237075806,grad_norm: 0.9999990730860692, iteration: 27235
loss: 1.0088560581207275,grad_norm: 0.8907192746895086, iteration: 27236
loss: 1.0329350233078003,grad_norm: 0.9937395463211516, iteration: 27237
loss: 1.0331724882125854,grad_norm: 0.9725341086534617, iteration: 27238
loss: 1.0212879180908203,grad_norm: 0.9999990764889795, iteration: 27239
loss: 1.0352495908737183,grad_norm: 0.9999992180176271, iteration: 27240
loss: 1.013175368309021,grad_norm: 0.9607100988008521, iteration: 27241
loss: 1.0333073139190674,grad_norm: 0.999999186698372, iteration: 27242
loss: 0.9953661561012268,grad_norm: 0.9999992930438327, iteration: 27243
loss: 0.9968375563621521,grad_norm: 0.887415534647713, iteration: 27244
loss: 1.011150598526001,grad_norm: 0.9999990312725071, iteration: 27245
loss: 1.0498679876327515,grad_norm: 0.8343964214433759, iteration: 27246
loss: 1.0380427837371826,grad_norm: 0.9999993034128302, iteration: 27247
loss: 0.9873387813568115,grad_norm: 0.9999991697599857, iteration: 27248
loss: 1.0218009948730469,grad_norm: 0.9999993191622542, iteration: 27249
loss: 1.0528022050857544,grad_norm: 0.999999740522468, iteration: 27250
loss: 0.9765616655349731,grad_norm: 0.9999990736864149, iteration: 27251
loss: 1.0133953094482422,grad_norm: 0.9999991466522004, iteration: 27252
loss: 1.0186105966567993,grad_norm: 0.9999997073650575, iteration: 27253
loss: 1.0308419466018677,grad_norm: 0.9999997005508892, iteration: 27254
loss: 1.0016059875488281,grad_norm: 0.9999990553950389, iteration: 27255
loss: 1.0263172388076782,grad_norm: 0.9006171700949707, iteration: 27256
loss: 0.9939092993736267,grad_norm: 0.9988274775865142, iteration: 27257
loss: 1.0132455825805664,grad_norm: 0.8471890720972556, iteration: 27258
loss: 1.0244332551956177,grad_norm: 0.9999990549096216, iteration: 27259
loss: 0.9928490519523621,grad_norm: 0.8964434754209815, iteration: 27260
loss: 0.9784945845603943,grad_norm: 0.9999991806326537, iteration: 27261
loss: 0.9988929629325867,grad_norm: 0.9999991036600804, iteration: 27262
loss: 1.0489873886108398,grad_norm: 0.9999992713293796, iteration: 27263
loss: 0.9851504564285278,grad_norm: 0.9688063214756091, iteration: 27264
loss: 0.9566208720207214,grad_norm: 0.8738744913358432, iteration: 27265
loss: 1.0041513442993164,grad_norm: 0.9999995939353811, iteration: 27266
loss: 1.0195726156234741,grad_norm: 0.999999181662806, iteration: 27267
loss: 1.0348135232925415,grad_norm: 0.9948575449481631, iteration: 27268
loss: 1.009413242340088,grad_norm: 0.9999989388850798, iteration: 27269
loss: 0.9965952634811401,grad_norm: 0.8900410455079535, iteration: 27270
loss: 1.0096101760864258,grad_norm: 0.9999990874953171, iteration: 27271
loss: 1.0254688262939453,grad_norm: 0.9999992188394364, iteration: 27272
loss: 0.9937086701393127,grad_norm: 0.874358967912961, iteration: 27273
loss: 0.9413204789161682,grad_norm: 0.9719272122606925, iteration: 27274
loss: 1.0100919008255005,grad_norm: 0.9999990853601345, iteration: 27275
loss: 1.003664255142212,grad_norm: 0.9999996440568364, iteration: 27276
loss: 1.0368642807006836,grad_norm: 0.9999992210821825, iteration: 27277
loss: 1.0242267847061157,grad_norm: 0.999999348450477, iteration: 27278
loss: 1.0001524686813354,grad_norm: 0.9643206968108318, iteration: 27279
loss: 1.0152347087860107,grad_norm: 0.9330239315729619, iteration: 27280
loss: 0.9996061325073242,grad_norm: 0.90902733433258, iteration: 27281
loss: 1.0092941522598267,grad_norm: 0.9999990854145936, iteration: 27282
loss: 1.0050764083862305,grad_norm: 0.999999233021635, iteration: 27283
loss: 1.031017780303955,grad_norm: 0.9874682808336622, iteration: 27284
loss: 0.9875054359436035,grad_norm: 0.9059707061908879, iteration: 27285
loss: 1.0733507871627808,grad_norm: 0.9999996887276058, iteration: 27286
loss: 0.9827793836593628,grad_norm: 0.9828101103935681, iteration: 27287
loss: 1.0732120275497437,grad_norm: 0.9999997102784604, iteration: 27288
loss: 1.0116533041000366,grad_norm: 0.9999990096031667, iteration: 27289
loss: 1.0223124027252197,grad_norm: 0.9999992280729909, iteration: 27290
loss: 1.0495927333831787,grad_norm: 0.9999990577077629, iteration: 27291
loss: 0.9628007411956787,grad_norm: 0.999999246529504, iteration: 27292
loss: 1.0431272983551025,grad_norm: 0.9999993144888756, iteration: 27293
loss: 0.9876472353935242,grad_norm: 0.8928225634498049, iteration: 27294
loss: 1.01340651512146,grad_norm: 0.8581823338809202, iteration: 27295
loss: 1.0242207050323486,grad_norm: 0.9999997348319872, iteration: 27296
loss: 1.0241246223449707,grad_norm: 0.999999241539947, iteration: 27297
loss: 0.9833069443702698,grad_norm: 0.9159501442356709, iteration: 27298
loss: 1.0275791883468628,grad_norm: 0.9457746952219332, iteration: 27299
loss: 0.9973528981208801,grad_norm: 0.9999990388471628, iteration: 27300
loss: 0.9765881896018982,grad_norm: 0.9999991879645342, iteration: 27301
loss: 0.9929808378219604,grad_norm: 0.9721000172779787, iteration: 27302
loss: 1.020765781402588,grad_norm: 0.8363101261995082, iteration: 27303
loss: 1.0063279867172241,grad_norm: 0.9347099237263048, iteration: 27304
loss: 1.0605953931808472,grad_norm: 0.9999993751401538, iteration: 27305
loss: 1.0321091413497925,grad_norm: 0.8900241948740965, iteration: 27306
loss: 1.0071873664855957,grad_norm: 0.8381085496374369, iteration: 27307
loss: 1.0080407857894897,grad_norm: 0.9729187752870362, iteration: 27308
loss: 0.9956250786781311,grad_norm: 0.9999990062258862, iteration: 27309
loss: 1.018392562866211,grad_norm: 0.886611299951513, iteration: 27310
loss: 1.0253032445907593,grad_norm: 0.9999992551116735, iteration: 27311
loss: 0.9861915707588196,grad_norm: 0.9999990700448077, iteration: 27312
loss: 1.0041331052780151,grad_norm: 0.9999990239689357, iteration: 27313
loss: 1.0174826383590698,grad_norm: 0.9999990692009528, iteration: 27314
loss: 1.0105409622192383,grad_norm: 0.9405678069956694, iteration: 27315
loss: 0.9752665758132935,grad_norm: 0.9999991913779547, iteration: 27316
loss: 1.0684155225753784,grad_norm: 0.9999996381233964, iteration: 27317
loss: 0.9894561767578125,grad_norm: 0.9999992090597414, iteration: 27318
loss: 1.0044499635696411,grad_norm: 0.8592052808653124, iteration: 27319
loss: 1.02779221534729,grad_norm: 0.9999996281124442, iteration: 27320
loss: 1.0313665866851807,grad_norm: 0.9999992894427099, iteration: 27321
loss: 1.003773808479309,grad_norm: 0.812428679719828, iteration: 27322
loss: 1.0114332437515259,grad_norm: 0.8819321418530714, iteration: 27323
loss: 1.0068531036376953,grad_norm: 0.9999990321775868, iteration: 27324
loss: 1.0698151588439941,grad_norm: 0.9999992187697874, iteration: 27325
loss: 1.0054110288619995,grad_norm: 0.9999997170204329, iteration: 27326
loss: 0.9958579540252686,grad_norm: 0.9999992170283623, iteration: 27327
loss: 0.9815394878387451,grad_norm: 0.9999990976119553, iteration: 27328
loss: 1.0141277313232422,grad_norm: 0.9999994016445868, iteration: 27329
loss: 1.0163408517837524,grad_norm: 0.9999992021654831, iteration: 27330
loss: 1.031067967414856,grad_norm: 0.9999991905546506, iteration: 27331
loss: 1.0634217262268066,grad_norm: 0.9999993621151385, iteration: 27332
loss: 0.9854537844657898,grad_norm: 0.9999992376448116, iteration: 27333
loss: 0.9863396286964417,grad_norm: 0.9999991028855464, iteration: 27334
loss: 1.0054603815078735,grad_norm: 0.9103393652090082, iteration: 27335
loss: 0.9729981422424316,grad_norm: 0.9825173254277482, iteration: 27336
loss: 0.9999850988388062,grad_norm: 0.94590286855392, iteration: 27337
loss: 1.0055805444717407,grad_norm: 0.9940979009785031, iteration: 27338
loss: 1.0288277864456177,grad_norm: 0.8393919423005102, iteration: 27339
loss: 1.0352662801742554,grad_norm: 0.9999996591707773, iteration: 27340
loss: 1.0314518213272095,grad_norm: 0.9999994597029578, iteration: 27341
loss: 1.019286036491394,grad_norm: 0.9105702612561409, iteration: 27342
loss: 1.0531539916992188,grad_norm: 0.9999992175038802, iteration: 27343
loss: 0.9578036665916443,grad_norm: 0.8715172186271889, iteration: 27344
loss: 1.0272492170333862,grad_norm: 0.852461617082847, iteration: 27345
loss: 1.0104745626449585,grad_norm: 0.9789692660784057, iteration: 27346
loss: 1.129347562789917,grad_norm: 0.9999997120675849, iteration: 27347
loss: 1.0047751665115356,grad_norm: 0.9999990711960169, iteration: 27348
loss: 1.03203284740448,grad_norm: 0.9999993940815716, iteration: 27349
loss: 1.0203676223754883,grad_norm: 0.9999991705883831, iteration: 27350
loss: 0.9832686185836792,grad_norm: 0.9798813380145609, iteration: 27351
loss: 1.046798586845398,grad_norm: 0.9413543224766461, iteration: 27352
loss: 1.0096960067749023,grad_norm: 0.9999990464090862, iteration: 27353
loss: 1.013047218322754,grad_norm: 0.9999996079710344, iteration: 27354
loss: 1.0303740501403809,grad_norm: 0.9999990380188091, iteration: 27355
loss: 0.9874246120452881,grad_norm: 0.999999272861181, iteration: 27356
loss: 1.0411114692687988,grad_norm: 0.9999990848718281, iteration: 27357
loss: 1.0453327894210815,grad_norm: 0.9999991619225207, iteration: 27358
loss: 0.9887414574623108,grad_norm: 0.9999992181577055, iteration: 27359
loss: 0.9998608231544495,grad_norm: 0.9999992794687631, iteration: 27360
loss: 1.044409155845642,grad_norm: 0.999999715053882, iteration: 27361
loss: 1.0172945261001587,grad_norm: 0.9999993233514628, iteration: 27362
loss: 0.9545791149139404,grad_norm: 0.99999909291016, iteration: 27363
loss: 1.0489619970321655,grad_norm: 0.9999996137972302, iteration: 27364
loss: 0.9918586611747742,grad_norm: 0.8669159053661536, iteration: 27365
loss: 1.0236412286758423,grad_norm: 0.8600975192390169, iteration: 27366
loss: 1.0093510150909424,grad_norm: 0.8819731235210229, iteration: 27367
loss: 1.0211081504821777,grad_norm: 0.7789178231363566, iteration: 27368
loss: 1.0738725662231445,grad_norm: 0.9999997267326578, iteration: 27369
loss: 1.013083577156067,grad_norm: 0.9999992956167465, iteration: 27370
loss: 1.005042314529419,grad_norm: 0.9999991618355892, iteration: 27371
loss: 0.9956778883934021,grad_norm: 0.9999990906023813, iteration: 27372
loss: 0.9776473641395569,grad_norm: 0.9999992079701975, iteration: 27373
loss: 1.1112812757492065,grad_norm: 0.9999997113097578, iteration: 27374
loss: 1.0444221496582031,grad_norm: 0.9999993797213655, iteration: 27375
loss: 1.006963849067688,grad_norm: 0.9999992387154654, iteration: 27376
loss: 1.0398160219192505,grad_norm: 0.999999187377893, iteration: 27377
loss: 1.018424153327942,grad_norm: 0.9999991211682432, iteration: 27378
loss: 0.9849323630332947,grad_norm: 0.9999995137128154, iteration: 27379
loss: 0.9953423142433167,grad_norm: 0.9999993124181794, iteration: 27380
loss: 1.0428553819656372,grad_norm: 0.9999991517010324, iteration: 27381
loss: 1.0606566667556763,grad_norm: 0.8671529154594951, iteration: 27382
loss: 0.9891864061355591,grad_norm: 0.9999989371114328, iteration: 27383
loss: 1.0073962211608887,grad_norm: 0.9999991581332424, iteration: 27384
loss: 1.051674723625183,grad_norm: 0.9264510571812444, iteration: 27385
loss: 1.0020689964294434,grad_norm: 0.9999992432847382, iteration: 27386
loss: 0.9945032000541687,grad_norm: 0.9999990342116454, iteration: 27387
loss: 1.075862169265747,grad_norm: 0.9999994147791098, iteration: 27388
loss: 1.0329957008361816,grad_norm: 0.9266391535201058, iteration: 27389
loss: 1.0494041442871094,grad_norm: 0.9621631778753396, iteration: 27390
loss: 1.0028022527694702,grad_norm: 0.9118803861131999, iteration: 27391
loss: 0.9804544448852539,grad_norm: 0.999999480719225, iteration: 27392
loss: 1.0206457376480103,grad_norm: 0.9999991884813649, iteration: 27393
loss: 1.0179028511047363,grad_norm: 0.9999989558506753, iteration: 27394
loss: 0.9813764691352844,grad_norm: 0.999999045794444, iteration: 27395
loss: 0.9765231013298035,grad_norm: 0.9537487791621252, iteration: 27396
loss: 1.0160101652145386,grad_norm: 0.9999992151273354, iteration: 27397
loss: 1.0300713777542114,grad_norm: 0.9999992611539524, iteration: 27398
loss: 0.9976882934570312,grad_norm: 0.9999990208390672, iteration: 27399
loss: 1.0018008947372437,grad_norm: 0.9999992405476632, iteration: 27400
loss: 0.9976099729537964,grad_norm: 0.9999993725128149, iteration: 27401
loss: 0.9802505373954773,grad_norm: 0.9107268216906801, iteration: 27402
loss: 1.008873462677002,grad_norm: 0.7901939405590387, iteration: 27403
loss: 0.9703918099403381,grad_norm: 0.803724565286558, iteration: 27404
loss: 0.9771421551704407,grad_norm: 0.9999990633989388, iteration: 27405
loss: 1.0190138816833496,grad_norm: 0.99999916982037, iteration: 27406
loss: 0.997778058052063,grad_norm: 0.9999991661396269, iteration: 27407
loss: 1.0138957500457764,grad_norm: 0.8597600346896364, iteration: 27408
loss: 0.9867992401123047,grad_norm: 0.9999990010912992, iteration: 27409
loss: 0.9594228863716125,grad_norm: 0.9999992034811332, iteration: 27410
loss: 0.9762322902679443,grad_norm: 0.9999991673221549, iteration: 27411
loss: 1.0359866619110107,grad_norm: 0.9999991501988272, iteration: 27412
loss: 1.0408498048782349,grad_norm: 0.9999993702864836, iteration: 27413
loss: 1.0391254425048828,grad_norm: 0.9999992219185515, iteration: 27414
loss: 0.9802758693695068,grad_norm: 0.884417487860838, iteration: 27415
loss: 1.0081167221069336,grad_norm: 0.9999990852682535, iteration: 27416
loss: 1.0032076835632324,grad_norm: 0.9999997692084637, iteration: 27417
loss: 0.9822528958320618,grad_norm: 0.8506086997923072, iteration: 27418
loss: 1.0187904834747314,grad_norm: 0.9999990650137542, iteration: 27419
loss: 1.0213524103164673,grad_norm: 0.8965582179432431, iteration: 27420
loss: 0.9660571217536926,grad_norm: 0.9999991828488132, iteration: 27421
loss: 1.003569483757019,grad_norm: 0.9999999281908609, iteration: 27422
loss: 0.9691497087478638,grad_norm: 0.8355888862720273, iteration: 27423
loss: 0.9773620963096619,grad_norm: 0.9999989387599444, iteration: 27424
loss: 1.001405954360962,grad_norm: 0.9551709189926332, iteration: 27425
loss: 1.069366455078125,grad_norm: 0.9999992754277156, iteration: 27426
loss: 1.0036702156066895,grad_norm: 0.9125969200238421, iteration: 27427
loss: 0.997208297252655,grad_norm: 0.9782151426880563, iteration: 27428
loss: 1.0112192630767822,grad_norm: 0.9999990399343905, iteration: 27429
loss: 1.0094372034072876,grad_norm: 0.8578813167002564, iteration: 27430
loss: 1.0338882207870483,grad_norm: 0.9108956366724048, iteration: 27431
loss: 0.9879165291786194,grad_norm: 0.9432980566801371, iteration: 27432
loss: 1.044999599456787,grad_norm: 0.9999990247643524, iteration: 27433
loss: 0.9817917943000793,grad_norm: 0.9999990348307314, iteration: 27434
loss: 1.0142558813095093,grad_norm: 0.9999990462019707, iteration: 27435
loss: 0.9873402118682861,grad_norm: 0.9999990904796551, iteration: 27436
loss: 0.9979045987129211,grad_norm: 0.9643866460105275, iteration: 27437
loss: 1.0176855325698853,grad_norm: 0.9999990711622506, iteration: 27438
loss: 1.0237302780151367,grad_norm: 0.9999996421454782, iteration: 27439
loss: 0.9934219121932983,grad_norm: 0.9003276597597968, iteration: 27440
loss: 1.0491595268249512,grad_norm: 0.9999994619739917, iteration: 27441
loss: 1.0334532260894775,grad_norm: 0.9999991911764633, iteration: 27442
loss: 0.9949071407318115,grad_norm: 0.9999991469287086, iteration: 27443
loss: 1.0171059370040894,grad_norm: 0.9237583867140038, iteration: 27444
loss: 0.9951475262641907,grad_norm: 0.9999995074981891, iteration: 27445
loss: 1.0026865005493164,grad_norm: 0.9999991120481414, iteration: 27446
loss: 1.0160129070281982,grad_norm: 0.999999214656192, iteration: 27447
loss: 0.9637560844421387,grad_norm: 0.9952972515481315, iteration: 27448
loss: 0.9900590181350708,grad_norm: 0.8109024734079398, iteration: 27449
loss: 1.0302611589431763,grad_norm: 0.9756013148467862, iteration: 27450
loss: 1.0243617296218872,grad_norm: 0.982786029497111, iteration: 27451
loss: 1.0046906471252441,grad_norm: 0.9999991142946749, iteration: 27452
loss: 1.019085168838501,grad_norm: 0.9744626479249074, iteration: 27453
loss: 0.992275595664978,grad_norm: 0.8713573233121643, iteration: 27454
loss: 1.019297480583191,grad_norm: 0.99999889714198, iteration: 27455
loss: 1.0184234380722046,grad_norm: 0.9815975444323158, iteration: 27456
loss: 1.0206819772720337,grad_norm: 0.9999991463226, iteration: 27457
loss: 1.0621628761291504,grad_norm: 0.9999992872900947, iteration: 27458
loss: 1.0464576482772827,grad_norm: 0.9999991870586307, iteration: 27459
loss: 1.028764009475708,grad_norm: 0.9999994252452876, iteration: 27460
loss: 1.070855736732483,grad_norm: 0.9999996598736809, iteration: 27461
loss: 1.0059444904327393,grad_norm: 0.9182245175181728, iteration: 27462
loss: 0.9948478937149048,grad_norm: 0.9362660907119467, iteration: 27463
loss: 0.9800871014595032,grad_norm: 0.9999991644893851, iteration: 27464
loss: 1.0246381759643555,grad_norm: 0.9999993376353177, iteration: 27465
loss: 0.9935164451599121,grad_norm: 0.9999991310888103, iteration: 27466
loss: 1.030928373336792,grad_norm: 0.8358189318525697, iteration: 27467
loss: 1.009725570678711,grad_norm: 0.8384591480992911, iteration: 27468
loss: 1.0053635835647583,grad_norm: 0.999999136989087, iteration: 27469
loss: 0.9993009567260742,grad_norm: 0.9999992172239114, iteration: 27470
loss: 0.9809107780456543,grad_norm: 0.9999992058205354, iteration: 27471
loss: 1.0276811122894287,grad_norm: 0.9713452277293222, iteration: 27472
loss: 1.0190969705581665,grad_norm: 0.9414355144126194, iteration: 27473
loss: 1.0388422012329102,grad_norm: 0.9008349508818236, iteration: 27474
loss: 1.0292712450027466,grad_norm: 0.9999994740864633, iteration: 27475
loss: 1.0394383668899536,grad_norm: 0.9999989776239567, iteration: 27476
loss: 1.0119355916976929,grad_norm: 0.9285895850630287, iteration: 27477
loss: 1.040541410446167,grad_norm: 0.9999992961446821, iteration: 27478
loss: 1.0287355184555054,grad_norm: 0.9486174563423236, iteration: 27479
loss: 0.9713202714920044,grad_norm: 0.9999991066509764, iteration: 27480
loss: 0.9872430562973022,grad_norm: 0.9412554034826495, iteration: 27481
loss: 1.0699234008789062,grad_norm: 0.9999994611582087, iteration: 27482
loss: 1.000036597251892,grad_norm: 0.9882713030571442, iteration: 27483
loss: 1.013108730316162,grad_norm: 0.9999991372627995, iteration: 27484
loss: 1.0178838968276978,grad_norm: 0.9999991704185216, iteration: 27485
loss: 0.9793131947517395,grad_norm: 0.9999991285550505, iteration: 27486
loss: 1.0299376249313354,grad_norm: 0.9214578331094524, iteration: 27487
loss: 1.003534197807312,grad_norm: 0.9470589983677092, iteration: 27488
loss: 1.021111249923706,grad_norm: 0.9999991478616445, iteration: 27489
loss: 0.9902058243751526,grad_norm: 0.9593989271338647, iteration: 27490
loss: 0.9892463684082031,grad_norm: 0.9529921033811587, iteration: 27491
loss: 1.0253256559371948,grad_norm: 0.8945288968830456, iteration: 27492
loss: 1.0034878253936768,grad_norm: 0.8324644343823342, iteration: 27493
loss: 1.003458857536316,grad_norm: 0.9505765588935827, iteration: 27494
loss: 1.0598342418670654,grad_norm: 0.9999993139095289, iteration: 27495
loss: 1.004012942314148,grad_norm: 0.9999991251522574, iteration: 27496
loss: 1.0319284200668335,grad_norm: 0.9999991645738742, iteration: 27497
loss: 1.0260663032531738,grad_norm: 0.9999990615773131, iteration: 27498
loss: 0.9921074509620667,grad_norm: 0.9999990258694732, iteration: 27499
loss: 1.0171703100204468,grad_norm: 0.8371609212686115, iteration: 27500
loss: 1.0103133916854858,grad_norm: 0.9252886200800126, iteration: 27501
loss: 0.9818172454833984,grad_norm: 0.999999131709197, iteration: 27502
loss: 0.984503448009491,grad_norm: 0.9999990418028448, iteration: 27503
loss: 0.9685202836990356,grad_norm: 0.9999990652049203, iteration: 27504
loss: 1.0454870462417603,grad_norm: 0.9999993148504851, iteration: 27505
loss: 1.041975975036621,grad_norm: 0.999999076233167, iteration: 27506
loss: 1.0396541357040405,grad_norm: 0.9999994916740247, iteration: 27507
loss: 1.0065863132476807,grad_norm: 0.8977960968306079, iteration: 27508
loss: 1.009614109992981,grad_norm: 0.9756009864627673, iteration: 27509
loss: 1.028010606765747,grad_norm: 0.999999238470712, iteration: 27510
loss: 0.9956495761871338,grad_norm: 0.7775999840229678, iteration: 27511
loss: 1.0368455648422241,grad_norm: 0.9999992947109398, iteration: 27512
loss: 0.9999052286148071,grad_norm: 0.9999990852133299, iteration: 27513
loss: 0.9905357956886292,grad_norm: 0.9902549815778455, iteration: 27514
loss: 1.0021876096725464,grad_norm: 0.8779533852575548, iteration: 27515
loss: 0.9979320764541626,grad_norm: 0.9009007798014587, iteration: 27516
loss: 0.9871622323989868,grad_norm: 0.91964554162599, iteration: 27517
loss: 0.9905135631561279,grad_norm: 0.9415822474747342, iteration: 27518
loss: 0.9987234473228455,grad_norm: 0.9820403183785156, iteration: 27519
loss: 1.0777281522750854,grad_norm: 0.9999991387892673, iteration: 27520
loss: 1.0344289541244507,grad_norm: 0.9999992080444762, iteration: 27521
loss: 0.984158456325531,grad_norm: 0.8873362823656252, iteration: 27522
loss: 1.0313339233398438,grad_norm: 0.9319282254386455, iteration: 27523
loss: 0.9785428047180176,grad_norm: 0.9619262536384428, iteration: 27524
loss: 1.0502487421035767,grad_norm: 0.9999991631099644, iteration: 27525
loss: 0.9898011684417725,grad_norm: 0.7788968462965422, iteration: 27526
loss: 0.9613687992095947,grad_norm: 0.9999990945627192, iteration: 27527
loss: 1.036047101020813,grad_norm: 0.8734078955794635, iteration: 27528
loss: 1.0292434692382812,grad_norm: 0.969021892175242, iteration: 27529
loss: 0.9980923533439636,grad_norm: 0.9999989704957524, iteration: 27530
loss: 0.9835881590843201,grad_norm: 0.9999990161335531, iteration: 27531
loss: 1.0201226472854614,grad_norm: 0.9999989944412188, iteration: 27532
loss: 0.9602423906326294,grad_norm: 0.9999992124767065, iteration: 27533
loss: 1.0062742233276367,grad_norm: 0.9999991446019548, iteration: 27534
loss: 1.0037916898727417,grad_norm: 0.8264611258999609, iteration: 27535
loss: 1.0107311010360718,grad_norm: 0.9999990516752136, iteration: 27536
loss: 1.0116565227508545,grad_norm: 0.9761315955410185, iteration: 27537
loss: 1.0042951107025146,grad_norm: 0.986930741901015, iteration: 27538
loss: 0.9928865432739258,grad_norm: 0.8369711729941454, iteration: 27539
loss: 1.0130566358566284,grad_norm: 0.9969892888365836, iteration: 27540
loss: 1.0120022296905518,grad_norm: 0.9999994131471244, iteration: 27541
loss: 1.0033434629440308,grad_norm: 0.9999991922430723, iteration: 27542
loss: 0.9906925559043884,grad_norm: 0.9845861130554712, iteration: 27543
loss: 1.0272051095962524,grad_norm: 0.999999073379295, iteration: 27544
loss: 1.0238330364227295,grad_norm: 0.9999990729774122, iteration: 27545
loss: 1.0513076782226562,grad_norm: 0.9999989647883994, iteration: 27546
loss: 0.9911530017852783,grad_norm: 0.9863235323926304, iteration: 27547
loss: 0.9900760650634766,grad_norm: 0.9999991204361073, iteration: 27548
loss: 1.008298397064209,grad_norm: 0.8984985654088591, iteration: 27549
loss: 1.0252737998962402,grad_norm: 0.9307337553164546, iteration: 27550
loss: 1.0241612195968628,grad_norm: 0.8447690460657087, iteration: 27551
loss: 0.9908522367477417,grad_norm: 0.9773498653075693, iteration: 27552
loss: 0.9602677822113037,grad_norm: 0.7763018262563528, iteration: 27553
loss: 0.958939790725708,grad_norm: 0.9525587254629412, iteration: 27554
loss: 0.9657877087593079,grad_norm: 0.8933106786594509, iteration: 27555
loss: 0.9921470880508423,grad_norm: 0.9999990976940814, iteration: 27556
loss: 1.0110738277435303,grad_norm: 0.999999162063313, iteration: 27557
loss: 1.0127660036087036,grad_norm: 0.8743017263503097, iteration: 27558
loss: 1.0221189260482788,grad_norm: 0.999999161871162, iteration: 27559
loss: 1.001469373703003,grad_norm: 0.9999990778689776, iteration: 27560
loss: 1.030292272567749,grad_norm: 0.9747880777856426, iteration: 27561
loss: 0.9479343891143799,grad_norm: 0.8372222266327018, iteration: 27562
loss: 1.0101892948150635,grad_norm: 0.9029982278345974, iteration: 27563
loss: 1.0118327140808105,grad_norm: 0.9999998767242027, iteration: 27564
loss: 1.003607153892517,grad_norm: 0.9448309003566779, iteration: 27565
loss: 1.0208765268325806,grad_norm: 0.8389311203424669, iteration: 27566
loss: 0.9583658576011658,grad_norm: 0.999998971552693, iteration: 27567
loss: 1.0018001794815063,grad_norm: 0.9999990591031694, iteration: 27568
loss: 1.0232371091842651,grad_norm: 0.8831933995513869, iteration: 27569
loss: 1.0360709428787231,grad_norm: 0.9157989204262652, iteration: 27570
loss: 0.9928827285766602,grad_norm: 0.9400658785377496, iteration: 27571
loss: 1.0074564218521118,grad_norm: 0.9999991635152328, iteration: 27572
loss: 0.9916032552719116,grad_norm: 0.9999990666612806, iteration: 27573
loss: 1.0307103395462036,grad_norm: 0.9663651681799162, iteration: 27574
loss: 0.9989408850669861,grad_norm: 0.9999991500100676, iteration: 27575
loss: 1.0251634120941162,grad_norm: 0.9999991811059473, iteration: 27576
loss: 1.014656662940979,grad_norm: 0.9459333140573888, iteration: 27577
loss: 1.0653001070022583,grad_norm: 0.9999991069227788, iteration: 27578
loss: 1.0274943113327026,grad_norm: 0.9403876514272377, iteration: 27579
loss: 1.0159966945648193,grad_norm: 0.7363161619507732, iteration: 27580
loss: 0.9682216644287109,grad_norm: 0.8865053411675361, iteration: 27581
loss: 1.047508955001831,grad_norm: 0.8220692516501253, iteration: 27582
loss: 1.0022189617156982,grad_norm: 0.9308350969107185, iteration: 27583
loss: 1.023116946220398,grad_norm: 0.9999990942946647, iteration: 27584
loss: 0.9995790123939514,grad_norm: 0.9498208743238182, iteration: 27585
loss: 1.038817048072815,grad_norm: 0.9999991036685771, iteration: 27586
loss: 1.02381432056427,grad_norm: 0.9145114033694971, iteration: 27587
loss: 1.0221214294433594,grad_norm: 0.936837550464278, iteration: 27588
loss: 0.9728128910064697,grad_norm: 0.8834674129256782, iteration: 27589
loss: 1.0202597379684448,grad_norm: 0.9999995149466293, iteration: 27590
loss: 1.005710244178772,grad_norm: 0.9999991215906859, iteration: 27591
loss: 1.0114080905914307,grad_norm: 0.9999996577333232, iteration: 27592
loss: 1.021983027458191,grad_norm: 0.9491251571374705, iteration: 27593
loss: 1.024707317352295,grad_norm: 0.9336640832645777, iteration: 27594
loss: 0.9993526935577393,grad_norm: 0.9815015861740304, iteration: 27595
loss: 0.9852030277252197,grad_norm: 0.9083292997705577, iteration: 27596
loss: 0.9994633793830872,grad_norm: 0.9296877801528627, iteration: 27597
loss: 0.9821563363075256,grad_norm: 0.9999989864862203, iteration: 27598
loss: 0.9844391942024231,grad_norm: 0.8400312995829485, iteration: 27599
loss: 1.0408321619033813,grad_norm: 0.897270652728383, iteration: 27600
loss: 1.0066239833831787,grad_norm: 0.9999991014653641, iteration: 27601
loss: 0.9726139307022095,grad_norm: 0.8490708741039686, iteration: 27602
loss: 1.0349632501602173,grad_norm: 0.9999991794637886, iteration: 27603
loss: 1.0378191471099854,grad_norm: 0.8925903141422089, iteration: 27604
loss: 1.0457741022109985,grad_norm: 0.9999991371750535, iteration: 27605
loss: 1.0348149538040161,grad_norm: 0.999999025608464, iteration: 27606
loss: 0.9838602542877197,grad_norm: 0.9999996224927553, iteration: 27607
loss: 1.005994200706482,grad_norm: 0.9028079334654473, iteration: 27608
loss: 1.0261434316635132,grad_norm: 0.9999991614454717, iteration: 27609
loss: 0.9991697072982788,grad_norm: 0.9027690406409988, iteration: 27610
loss: 1.0105410814285278,grad_norm: 0.9553241931367791, iteration: 27611
loss: 1.0222067832946777,grad_norm: 0.9999991163077399, iteration: 27612
loss: 1.0159162282943726,grad_norm: 0.9999992073517237, iteration: 27613
loss: 1.0228739976882935,grad_norm: 0.892076571526883, iteration: 27614
loss: 1.0049892663955688,grad_norm: 0.8354560225356845, iteration: 27615
loss: 1.067064642906189,grad_norm: 0.999999625713675, iteration: 27616
loss: 1.0026593208312988,grad_norm: 0.9464339258680273, iteration: 27617
loss: 1.0172085762023926,grad_norm: 0.9999990060577304, iteration: 27618
loss: 1.0017755031585693,grad_norm: 0.9821251224106748, iteration: 27619
loss: 0.9977729320526123,grad_norm: 0.9999991915321862, iteration: 27620
loss: 1.0187071561813354,grad_norm: 0.9999995211514843, iteration: 27621
loss: 1.0004184246063232,grad_norm: 0.9999990549424851, iteration: 27622
loss: 1.0238662958145142,grad_norm: 0.9999992530402412, iteration: 27623
loss: 1.0359067916870117,grad_norm: 0.9699946038298803, iteration: 27624
loss: 0.9899306297302246,grad_norm: 0.9289016245236734, iteration: 27625
loss: 1.023688554763794,grad_norm: 0.9823106457544547, iteration: 27626
loss: 0.9955238103866577,grad_norm: 0.9999992785832955, iteration: 27627
loss: 0.9832122325897217,grad_norm: 0.9690971499718938, iteration: 27628
loss: 1.0114384889602661,grad_norm: 0.9999993317467525, iteration: 27629
loss: 0.9905778765678406,grad_norm: 0.9512182325967053, iteration: 27630
loss: 0.9989436268806458,grad_norm: 0.9999990485978436, iteration: 27631
loss: 0.995782732963562,grad_norm: 0.856683361129985, iteration: 27632
loss: 0.9945094585418701,grad_norm: 0.9203943226019425, iteration: 27633
loss: 1.011594533920288,grad_norm: 0.9332246113473561, iteration: 27634
loss: 0.9641543626785278,grad_norm: 0.9999993314780461, iteration: 27635
loss: 1.014142632484436,grad_norm: 0.9180177198186953, iteration: 27636
loss: 1.018481731414795,grad_norm: 0.9962530802873846, iteration: 27637
loss: 1.0089737176895142,grad_norm: 0.9999992002539453, iteration: 27638
loss: 1.019683837890625,grad_norm: 0.8751196054645439, iteration: 27639
loss: 1.006895899772644,grad_norm: 0.8853713175090504, iteration: 27640
loss: 1.0150220394134521,grad_norm: 0.9999992569076978, iteration: 27641
loss: 1.0361052751541138,grad_norm: 0.9999994955000637, iteration: 27642
loss: 1.0029457807540894,grad_norm: 0.9999991332136211, iteration: 27643
loss: 1.0107989311218262,grad_norm: 0.9333545445987635, iteration: 27644
loss: 1.0165468454360962,grad_norm: 0.9999990488865272, iteration: 27645
loss: 0.9834630489349365,grad_norm: 0.9999992131875872, iteration: 27646
loss: 1.0192238092422485,grad_norm: 0.9999995942090489, iteration: 27647
loss: 1.019457459449768,grad_norm: 0.9999992101969205, iteration: 27648
loss: 0.9861999750137329,grad_norm: 0.8144298784105086, iteration: 27649
loss: 0.9978639483451843,grad_norm: 0.9999991391538404, iteration: 27650
loss: 1.0069527626037598,grad_norm: 0.9552953777255973, iteration: 27651
loss: 0.9729928374290466,grad_norm: 0.9999991833312221, iteration: 27652
loss: 1.033760905265808,grad_norm: 0.9999992067003258, iteration: 27653
loss: 1.0093109607696533,grad_norm: 0.9221658050680988, iteration: 27654
loss: 1.0363825559616089,grad_norm: 0.9999992459185189, iteration: 27655
loss: 1.0194354057312012,grad_norm: 0.9999997871366799, iteration: 27656
loss: 1.0258485078811646,grad_norm: 0.964627025935632, iteration: 27657
loss: 1.0053831338882446,grad_norm: 0.9794337256432176, iteration: 27658
loss: 1.0215861797332764,grad_norm: 0.9766072700001142, iteration: 27659
loss: 0.9953336119651794,grad_norm: 0.9587680680148178, iteration: 27660
loss: 1.0130646228790283,grad_norm: 0.9999992465816886, iteration: 27661
loss: 0.9992002248764038,grad_norm: 0.9999992441839115, iteration: 27662
loss: 0.9969053864479065,grad_norm: 0.9999991317309725, iteration: 27663
loss: 0.9765375852584839,grad_norm: 0.9290198523881369, iteration: 27664
loss: 1.0122714042663574,grad_norm: 0.9999992163297217, iteration: 27665
loss: 1.0305628776550293,grad_norm: 0.9289653143215607, iteration: 27666
loss: 1.0088695287704468,grad_norm: 0.8552309447458707, iteration: 27667
loss: 0.9896612167358398,grad_norm: 0.957770138958475, iteration: 27668
loss: 0.9947113394737244,grad_norm: 0.9999990372683084, iteration: 27669
loss: 1.031141996383667,grad_norm: 0.9816436740279958, iteration: 27670
loss: 1.0166163444519043,grad_norm: 0.9915277847729433, iteration: 27671
loss: 1.019389271736145,grad_norm: 0.9999989984469737, iteration: 27672
loss: 1.0783950090408325,grad_norm: 0.9999995560925149, iteration: 27673
loss: 1.014411211013794,grad_norm: 0.9343789431223133, iteration: 27674
loss: 1.0473182201385498,grad_norm: 0.8932665312061402, iteration: 27675
loss: 1.0171436071395874,grad_norm: 0.9324453452042006, iteration: 27676
loss: 0.974902331829071,grad_norm: 0.837185204254792, iteration: 27677
loss: 1.0733453035354614,grad_norm: 0.999999048002522, iteration: 27678
loss: 0.9960218071937561,grad_norm: 0.999999155686012, iteration: 27679
loss: 1.0060276985168457,grad_norm: 0.948349032030348, iteration: 27680
loss: 1.0498671531677246,grad_norm: 0.9999992057988961, iteration: 27681
loss: 1.0367755889892578,grad_norm: 0.994481165838851, iteration: 27682
loss: 1.0219776630401611,grad_norm: 0.8607886541285915, iteration: 27683
loss: 1.033205509185791,grad_norm: 0.9999992459403695, iteration: 27684
loss: 1.0162644386291504,grad_norm: 0.9752716089394389, iteration: 27685
loss: 1.006166696548462,grad_norm: 0.9999991977499545, iteration: 27686
loss: 1.003564476966858,grad_norm: 0.9999990677554066, iteration: 27687
loss: 1.0035406351089478,grad_norm: 0.9999990856416644, iteration: 27688
loss: 1.0101159811019897,grad_norm: 0.999999215717374, iteration: 27689
loss: 1.0301491022109985,grad_norm: 0.9999992436685506, iteration: 27690
loss: 0.969342052936554,grad_norm: 0.8444339729209982, iteration: 27691
loss: 0.9962593913078308,grad_norm: 0.9452837123698605, iteration: 27692
loss: 1.0057486295700073,grad_norm: 0.9999990773807753, iteration: 27693
loss: 1.0200648307800293,grad_norm: 0.9999991009987861, iteration: 27694
loss: 1.0253556966781616,grad_norm: 0.9999992979672111, iteration: 27695
loss: 0.9928300380706787,grad_norm: 0.9999990694122978, iteration: 27696
loss: 0.9667484164237976,grad_norm: 0.9999990131358404, iteration: 27697
loss: 0.9823355078697205,grad_norm: 0.9483994204759356, iteration: 27698
loss: 1.0020676851272583,grad_norm: 0.9999989764254654, iteration: 27699
loss: 0.9687021970748901,grad_norm: 0.8931421148693691, iteration: 27700
loss: 1.0167065858840942,grad_norm: 0.9161915946415169, iteration: 27701
loss: 1.0481805801391602,grad_norm: 0.9999993200556986, iteration: 27702
loss: 1.0114436149597168,grad_norm: 0.9999990482302319, iteration: 27703
loss: 0.9833596348762512,grad_norm: 0.8831402264748643, iteration: 27704
loss: 1.0029412508010864,grad_norm: 0.8531537457383425, iteration: 27705
loss: 0.9817634224891663,grad_norm: 0.9391718601996434, iteration: 27706
loss: 1.0613794326782227,grad_norm: 0.9742602620503124, iteration: 27707
loss: 0.9795311093330383,grad_norm: 0.8895976327565966, iteration: 27708
loss: 1.0186597108840942,grad_norm: 0.8999659142630506, iteration: 27709
loss: 0.9793928265571594,grad_norm: 0.9999989441931834, iteration: 27710
loss: 0.9753106832504272,grad_norm: 0.8753343854663234, iteration: 27711
loss: 1.0004898309707642,grad_norm: 0.9023503806469727, iteration: 27712
loss: 1.0295615196228027,grad_norm: 0.9916805687848225, iteration: 27713
loss: 1.0657471418380737,grad_norm: 0.9999994315702948, iteration: 27714
loss: 0.9791198372840881,grad_norm: 0.9328798318755588, iteration: 27715
loss: 1.0175014734268188,grad_norm: 0.9999991493794284, iteration: 27716
loss: 1.0253299474716187,grad_norm: 0.8966607999492096, iteration: 27717
loss: 0.9887359142303467,grad_norm: 0.9451946705825991, iteration: 27718
loss: 0.9899948835372925,grad_norm: 0.839664215371477, iteration: 27719
loss: 1.0035183429718018,grad_norm: 0.865742160657645, iteration: 27720
loss: 0.9720566868782043,grad_norm: 0.8216139249710003, iteration: 27721
loss: 0.9994352459907532,grad_norm: 0.8852830312434031, iteration: 27722
loss: 1.0531671047210693,grad_norm: 0.9999990635945011, iteration: 27723
loss: 1.0335278511047363,grad_norm: 0.8863431425427981, iteration: 27724
loss: 1.0065505504608154,grad_norm: 0.8229175230461788, iteration: 27725
loss: 1.0326606035232544,grad_norm: 0.8126230100210973, iteration: 27726
loss: 1.0175375938415527,grad_norm: 0.8609063018186152, iteration: 27727
loss: 0.9868714809417725,grad_norm: 0.8986751535367363, iteration: 27728
loss: 0.9652653932571411,grad_norm: 0.9999992031112553, iteration: 27729
loss: 0.976571798324585,grad_norm: 0.9999990952010217, iteration: 27730
loss: 0.979709804058075,grad_norm: 0.9356731070994599, iteration: 27731
loss: 0.9738672971725464,grad_norm: 0.8761093611689332, iteration: 27732
loss: 1.042623519897461,grad_norm: 0.9999989780804086, iteration: 27733
loss: 1.0185834169387817,grad_norm: 0.9738521888340792, iteration: 27734
loss: 1.0228798389434814,grad_norm: 0.9470390435194169, iteration: 27735
loss: 1.0237904787063599,grad_norm: 0.8252918239661154, iteration: 27736
loss: 1.0042859315872192,grad_norm: 0.908745759965853, iteration: 27737
loss: 1.0471259355545044,grad_norm: 0.999999070281478, iteration: 27738
loss: 1.0565749406814575,grad_norm: 0.9999994049183211, iteration: 27739
loss: 0.9823276400566101,grad_norm: 0.9255415747591248, iteration: 27740
loss: 1.013961911201477,grad_norm: 0.924679047491472, iteration: 27741
loss: 0.9947890639305115,grad_norm: 0.9754293185212658, iteration: 27742
loss: 1.0383687019348145,grad_norm: 0.8727734473402378, iteration: 27743
loss: 0.9904781579971313,grad_norm: 0.905151461281172, iteration: 27744
loss: 0.9973596930503845,grad_norm: 0.9999990116341156, iteration: 27745
loss: 1.0084526538848877,grad_norm: 0.9999989671064847, iteration: 27746
loss: 1.0299263000488281,grad_norm: 0.9663297687349243, iteration: 27747
loss: 1.0399940013885498,grad_norm: 0.9999990844648295, iteration: 27748
loss: 1.0411113500595093,grad_norm: 0.9999992974286883, iteration: 27749
loss: 1.0104420185089111,grad_norm: 0.9999990832791966, iteration: 27750
loss: 1.0342774391174316,grad_norm: 0.9999990876578996, iteration: 27751
loss: 1.0168994665145874,grad_norm: 0.9443898719931151, iteration: 27752
loss: 1.0115069150924683,grad_norm: 0.9987618962177457, iteration: 27753
loss: 0.9874259233474731,grad_norm: 0.9999991707672216, iteration: 27754
loss: 0.9951272010803223,grad_norm: 0.9999990408773095, iteration: 27755
loss: 1.0087367296218872,grad_norm: 0.9999990829222326, iteration: 27756
loss: 0.9929108023643494,grad_norm: 0.999999031235538, iteration: 27757
loss: 0.9936110377311707,grad_norm: 0.9999989216264288, iteration: 27758
loss: 1.0121848583221436,grad_norm: 0.8591212508707129, iteration: 27759
loss: 0.980930507183075,grad_norm: 0.9252921433026431, iteration: 27760
loss: 0.9898048043251038,grad_norm: 0.9999992726503102, iteration: 27761
loss: 0.9869653582572937,grad_norm: 0.9999991411637282, iteration: 27762
loss: 0.9752655625343323,grad_norm: 0.860350400203585, iteration: 27763
loss: 0.9764685034751892,grad_norm: 0.9582883000850543, iteration: 27764
loss: 1.0075560808181763,grad_norm: 0.9431456265899676, iteration: 27765
loss: 1.0237501859664917,grad_norm: 0.8563967646734573, iteration: 27766
loss: 1.0099414587020874,grad_norm: 0.7520463044179343, iteration: 27767
loss: 1.0170317888259888,grad_norm: 0.9999989762031521, iteration: 27768
loss: 1.0174702405929565,grad_norm: 0.9713960523295778, iteration: 27769
loss: 0.9910175800323486,grad_norm: 0.9370556195194373, iteration: 27770
loss: 1.016717553138733,grad_norm: 0.9999989906948067, iteration: 27771
loss: 1.0142877101898193,grad_norm: 0.9999990837311457, iteration: 27772
loss: 1.018335223197937,grad_norm: 0.9934496918357277, iteration: 27773
loss: 1.0021365880966187,grad_norm: 0.9823773595694505, iteration: 27774
loss: 0.9518875479698181,grad_norm: 0.9997842385819056, iteration: 27775
loss: 0.9987283945083618,grad_norm: 0.9843238165575253, iteration: 27776
loss: 1.0366638898849487,grad_norm: 0.9999992074363475, iteration: 27777
loss: 1.0541027784347534,grad_norm: 0.999999172070327, iteration: 27778
loss: 1.015021800994873,grad_norm: 0.9999990825362127, iteration: 27779
loss: 1.0207115411758423,grad_norm: 0.9081877885380573, iteration: 27780
loss: 1.0255820751190186,grad_norm: 0.9999992378948913, iteration: 27781
loss: 1.0394636392593384,grad_norm: 0.9999990275276391, iteration: 27782
loss: 1.0236952304840088,grad_norm: 0.9999990940532344, iteration: 27783
loss: 1.0107662677764893,grad_norm: 0.9948952213575575, iteration: 27784
loss: 1.0079071521759033,grad_norm: 0.8501657034106083, iteration: 27785
loss: 1.0133779048919678,grad_norm: 0.9237659147798624, iteration: 27786
loss: 0.9632558822631836,grad_norm: 0.9149741959524408, iteration: 27787
loss: 1.0087287425994873,grad_norm: 0.9582213254923745, iteration: 27788
loss: 0.9808257222175598,grad_norm: 0.8853364442122881, iteration: 27789
loss: 1.032394528388977,grad_norm: 0.8738996377293096, iteration: 27790
loss: 0.9944497346878052,grad_norm: 0.9999990877723165, iteration: 27791
loss: 0.9755175113677979,grad_norm: 0.8970169841160063, iteration: 27792
loss: 1.0555442571640015,grad_norm: 0.8905044770299599, iteration: 27793
loss: 1.009132742881775,grad_norm: 0.9434107765713314, iteration: 27794
loss: 1.0300250053405762,grad_norm: 0.8250295320318262, iteration: 27795
loss: 0.9952551126480103,grad_norm: 0.9999996365398214, iteration: 27796
loss: 0.975243866443634,grad_norm: 0.8584392134452529, iteration: 27797
loss: 1.0546963214874268,grad_norm: 0.9707834895039776, iteration: 27798
loss: 1.004553198814392,grad_norm: 0.98357922304749, iteration: 27799
loss: 1.0088107585906982,grad_norm: 0.9377919949858802, iteration: 27800
loss: 1.0067369937896729,grad_norm: 0.9999989749541777, iteration: 27801
loss: 1.031295895576477,grad_norm: 0.9999991052789787, iteration: 27802
loss: 1.0322829484939575,grad_norm: 0.9999990178455426, iteration: 27803
loss: 1.0180840492248535,grad_norm: 0.7992057545982838, iteration: 27804
loss: 1.0214496850967407,grad_norm: 0.999999622363249, iteration: 27805
loss: 0.9892995357513428,grad_norm: 0.9999990652949937, iteration: 27806
loss: 0.9940648078918457,grad_norm: 0.975857724051594, iteration: 27807
loss: 0.9880167245864868,grad_norm: 0.9999990707586801, iteration: 27808
loss: 0.9925186038017273,grad_norm: 0.9999992130482133, iteration: 27809
loss: 0.9963376522064209,grad_norm: 0.8676533245246723, iteration: 27810
loss: 1.0240076780319214,grad_norm: 0.9420247851030251, iteration: 27811
loss: 0.9412823915481567,grad_norm: 0.9999990497345453, iteration: 27812
loss: 0.9863524436950684,grad_norm: 0.9724665029629839, iteration: 27813
loss: 1.0339481830596924,grad_norm: 0.9863431405796534, iteration: 27814
loss: 0.9920785427093506,grad_norm: 0.8826670449886368, iteration: 27815
loss: 0.997567892074585,grad_norm: 0.8622576306724955, iteration: 27816
loss: 1.0039397478103638,grad_norm: 0.8317142102700519, iteration: 27817
loss: 1.0334335565567017,grad_norm: 0.9999991962377063, iteration: 27818
loss: 1.0286177396774292,grad_norm: 0.9719665999656826, iteration: 27819
loss: 0.9967784285545349,grad_norm: 0.9999991017570846, iteration: 27820
loss: 1.0023683309555054,grad_norm: 0.9343305718635679, iteration: 27821
loss: 1.0082449913024902,grad_norm: 0.9999998357559188, iteration: 27822
loss: 1.0280011892318726,grad_norm: 0.9999993000406355, iteration: 27823
loss: 1.0286529064178467,grad_norm: 0.9112658066934777, iteration: 27824
loss: 0.979316771030426,grad_norm: 0.9824212777487528, iteration: 27825
loss: 1.0077967643737793,grad_norm: 0.9654166304062741, iteration: 27826
loss: 1.0163713693618774,grad_norm: 0.9999992184728602, iteration: 27827
loss: 1.0162667036056519,grad_norm: 0.9999998339978934, iteration: 27828
loss: 0.9717872738838196,grad_norm: 0.9999990334603278, iteration: 27829
loss: 1.0238136053085327,grad_norm: 0.9276282230071176, iteration: 27830
loss: 0.9880672693252563,grad_norm: 0.940741713173951, iteration: 27831
loss: 1.004621982574463,grad_norm: 0.8640879432864875, iteration: 27832
loss: 1.0401034355163574,grad_norm: 0.9999990359687656, iteration: 27833
loss: 1.0065439939498901,grad_norm: 0.9999991185308797, iteration: 27834
loss: 1.0061850547790527,grad_norm: 0.9358833315246533, iteration: 27835
loss: 1.0016371011734009,grad_norm: 0.9999991123509275, iteration: 27836
loss: 1.0834050178527832,grad_norm: 0.9999993755019339, iteration: 27837
loss: 1.030404806137085,grad_norm: 0.9999993198216599, iteration: 27838
loss: 1.018391489982605,grad_norm: 0.9999992271748904, iteration: 27839
loss: 1.0207642316818237,grad_norm: 0.9999991034699522, iteration: 27840
loss: 1.028259038925171,grad_norm: 0.9999991217837032, iteration: 27841
loss: 1.0039951801300049,grad_norm: 0.9999991196192913, iteration: 27842
loss: 0.9646411538124084,grad_norm: 0.9999991230271386, iteration: 27843
loss: 0.9796030521392822,grad_norm: 0.9999992811476592, iteration: 27844
loss: 1.0307443141937256,grad_norm: 0.9999991671754198, iteration: 27845
loss: 1.0151478052139282,grad_norm: 0.999999340505457, iteration: 27846
loss: 1.0388041734695435,grad_norm: 0.9999993049154787, iteration: 27847
loss: 1.0303884744644165,grad_norm: 0.8911623450644467, iteration: 27848
loss: 1.0080353021621704,grad_norm: 0.9999990871251664, iteration: 27849
loss: 1.0361837148666382,grad_norm: 0.9999991647094559, iteration: 27850
loss: 0.9980980753898621,grad_norm: 0.9999991368023396, iteration: 27851
loss: 0.9977395534515381,grad_norm: 0.9999990695290011, iteration: 27852
loss: 1.0351231098175049,grad_norm: 0.9183175542210105, iteration: 27853
loss: 1.0344583988189697,grad_norm: 0.9999990884230707, iteration: 27854
loss: 1.0223864316940308,grad_norm: 0.9999990065397824, iteration: 27855
loss: 1.0136358737945557,grad_norm: 0.9453991424276782, iteration: 27856
loss: 1.0228796005249023,grad_norm: 0.9999993648816533, iteration: 27857
loss: 0.9794134497642517,grad_norm: 0.99999938348183, iteration: 27858
loss: 1.029183268547058,grad_norm: 0.9923744474712818, iteration: 27859
loss: 0.9899481534957886,grad_norm: 0.9999991771238012, iteration: 27860
loss: 1.0321964025497437,grad_norm: 0.9999991046505757, iteration: 27861
loss: 0.9897958040237427,grad_norm: 0.8228719323926323, iteration: 27862
loss: 1.0179128646850586,grad_norm: 0.8706490373320508, iteration: 27863
loss: 1.0242375135421753,grad_norm: 0.9999992200580972, iteration: 27864
loss: 1.0059655904769897,grad_norm: 0.9999990577239318, iteration: 27865
loss: 1.0070995092391968,grad_norm: 0.9999992447274623, iteration: 27866
loss: 1.0054094791412354,grad_norm: 0.9817379687973394, iteration: 27867
loss: 1.0171301364898682,grad_norm: 0.9999992779301375, iteration: 27868
loss: 0.9861815571784973,grad_norm: 0.8674477147499465, iteration: 27869
loss: 0.9825128316879272,grad_norm: 0.9999992777442072, iteration: 27870
loss: 1.0021966695785522,grad_norm: 0.8797537079554719, iteration: 27871
loss: 1.0231627225875854,grad_norm: 0.9999990590289312, iteration: 27872
loss: 0.9524331092834473,grad_norm: 0.97310324252742, iteration: 27873
loss: 0.9849974513053894,grad_norm: 0.8909658794729781, iteration: 27874
loss: 1.016316533088684,grad_norm: 0.9039274020479361, iteration: 27875
loss: 1.0444496870040894,grad_norm: 0.9999993720753632, iteration: 27876
loss: 1.0448811054229736,grad_norm: 0.9999992601862957, iteration: 27877
loss: 0.9859797358512878,grad_norm: 0.9999992230825384, iteration: 27878
loss: 1.0149673223495483,grad_norm: 0.8352211789694935, iteration: 27879
loss: 1.0020345449447632,grad_norm: 0.9242034833365059, iteration: 27880
loss: 0.9955796599388123,grad_norm: 0.9999991572262615, iteration: 27881
loss: 1.000179648399353,grad_norm: 0.9999991884866892, iteration: 27882
loss: 1.0026730298995972,grad_norm: 0.8821615053374995, iteration: 27883
loss: 1.004183292388916,grad_norm: 0.9999990654045219, iteration: 27884
loss: 1.0060210227966309,grad_norm: 0.9007994840696429, iteration: 27885
loss: 0.9431719183921814,grad_norm: 0.9999989697051612, iteration: 27886
loss: 1.0037670135498047,grad_norm: 0.961932896683101, iteration: 27887
loss: 1.0129365921020508,grad_norm: 0.9999991998330751, iteration: 27888
loss: 0.9741230607032776,grad_norm: 0.9999990887660971, iteration: 27889
loss: 1.0152548551559448,grad_norm: 0.9152308033865367, iteration: 27890
loss: 0.9826262593269348,grad_norm: 0.9999990689973383, iteration: 27891
loss: 1.0263268947601318,grad_norm: 0.9708471131898635, iteration: 27892
loss: 1.0068244934082031,grad_norm: 0.9999990937977463, iteration: 27893
loss: 0.9762651324272156,grad_norm: 0.9999990096985439, iteration: 27894
loss: 0.952225387096405,grad_norm: 0.9938480530447861, iteration: 27895
loss: 0.9203059077262878,grad_norm: 0.8960139636419565, iteration: 27896
loss: 1.046764612197876,grad_norm: 0.9999997933952548, iteration: 27897
loss: 0.9937665462493896,grad_norm: 0.9043955306910941, iteration: 27898
loss: 1.0360939502716064,grad_norm: 0.9999989855680465, iteration: 27899
loss: 0.9804673790931702,grad_norm: 0.9144250551449836, iteration: 27900
loss: 0.9979389905929565,grad_norm: 0.952875487943092, iteration: 27901
loss: 1.061277151107788,grad_norm: 0.9999995959813998, iteration: 27902
loss: 0.9841324090957642,grad_norm: 0.9999991873319122, iteration: 27903
loss: 1.059397578239441,grad_norm: 0.9999994083797388, iteration: 27904
loss: 1.0194149017333984,grad_norm: 0.9979497519397251, iteration: 27905
loss: 1.0294785499572754,grad_norm: 0.9947808005149252, iteration: 27906
loss: 1.0029696226119995,grad_norm: 0.9999997814296469, iteration: 27907
loss: 1.03463613986969,grad_norm: 0.8731258267805625, iteration: 27908
loss: 0.9841371178627014,grad_norm: 0.9999992390110647, iteration: 27909
loss: 0.9824392795562744,grad_norm: 0.99999926504211, iteration: 27910
loss: 0.9844400882720947,grad_norm: 0.9999991279832579, iteration: 27911
loss: 1.0123934745788574,grad_norm: 0.9603794840993723, iteration: 27912
loss: 1.0321815013885498,grad_norm: 0.9999990974629588, iteration: 27913
loss: 1.0160163640975952,grad_norm: 0.9906433205122079, iteration: 27914
loss: 0.997196614742279,grad_norm: 0.973767774708856, iteration: 27915
loss: 1.0165399312973022,grad_norm: 0.9999991208440643, iteration: 27916
loss: 1.0126440525054932,grad_norm: 0.9999990757825595, iteration: 27917
loss: 0.9904688000679016,grad_norm: 0.9999992117464119, iteration: 27918
loss: 0.9967511296272278,grad_norm: 0.9519120800235534, iteration: 27919
loss: 0.9997543692588806,grad_norm: 0.9999991925682588, iteration: 27920
loss: 1.030970811843872,grad_norm: 0.9999990005090835, iteration: 27921
loss: 0.988462507724762,grad_norm: 0.999999375304103, iteration: 27922
loss: 1.0516090393066406,grad_norm: 0.9999994881500721, iteration: 27923
loss: 1.0286881923675537,grad_norm: 0.9312809970502468, iteration: 27924
loss: 1.023618459701538,grad_norm: 0.9999992686769624, iteration: 27925
loss: 1.0058711767196655,grad_norm: 0.952094238936814, iteration: 27926
loss: 1.029348611831665,grad_norm: 0.9999992401208398, iteration: 27927
loss: 1.0283881425857544,grad_norm: 0.962019311723351, iteration: 27928
loss: 1.0602396726608276,grad_norm: 0.9999992004112231, iteration: 27929
loss: 0.9909391403198242,grad_norm: 0.9999991963160014, iteration: 27930
loss: 1.0112488269805908,grad_norm: 0.9999991592369459, iteration: 27931
loss: 0.9863866567611694,grad_norm: 0.9999990437246449, iteration: 27932
loss: 1.0512999296188354,grad_norm: 0.9999995938519118, iteration: 27933
loss: 0.9968918561935425,grad_norm: 0.9999991352008996, iteration: 27934
loss: 1.0244951248168945,grad_norm: 0.9999991301853449, iteration: 27935
loss: 1.075563669204712,grad_norm: 0.9999993827123677, iteration: 27936
loss: 1.0179646015167236,grad_norm: 0.9999991504749923, iteration: 27937
loss: 0.9958765506744385,grad_norm: 0.9999991509692647, iteration: 27938
loss: 1.0058039426803589,grad_norm: 0.9999991876078013, iteration: 27939
loss: 1.0317236185073853,grad_norm: 0.9999991185301771, iteration: 27940
loss: 0.9820398688316345,grad_norm: 0.8810446592444242, iteration: 27941
loss: 0.9947240352630615,grad_norm: 0.999999385135025, iteration: 27942
loss: 1.0643423795700073,grad_norm: 0.9999993558435419, iteration: 27943
loss: 0.967191219329834,grad_norm: 0.9397440426838738, iteration: 27944
loss: 0.9848107695579529,grad_norm: 0.9999991889565564, iteration: 27945
loss: 0.9845501780509949,grad_norm: 0.9999991125900464, iteration: 27946
loss: 1.0227406024932861,grad_norm: 0.886424537509594, iteration: 27947
loss: 0.9739689826965332,grad_norm: 0.9824637404743823, iteration: 27948
loss: 1.0607688426971436,grad_norm: 0.9787997364873234, iteration: 27949
loss: 1.0203758478164673,grad_norm: 0.9761240760597549, iteration: 27950
loss: 1.008987307548523,grad_norm: 0.9999994257262209, iteration: 27951
loss: 1.0136284828186035,grad_norm: 0.9779540409139919, iteration: 27952
loss: 0.9932553172111511,grad_norm: 0.9999991801985592, iteration: 27953
loss: 1.029608130455017,grad_norm: 0.99999920265937, iteration: 27954
loss: 0.9922612905502319,grad_norm: 0.9999991129602127, iteration: 27955
loss: 1.0081653594970703,grad_norm: 0.999999001739392, iteration: 27956
loss: 1.0335793495178223,grad_norm: 0.7812935653828014, iteration: 27957
loss: 1.0354231595993042,grad_norm: 0.9672262707394301, iteration: 27958
loss: 1.0055502653121948,grad_norm: 0.9999991717045411, iteration: 27959
loss: 1.009475827217102,grad_norm: 0.9999990835807167, iteration: 27960
loss: 1.0334028005599976,grad_norm: 0.9922158307549158, iteration: 27961
loss: 0.9859580397605896,grad_norm: 0.9999990235415016, iteration: 27962
loss: 1.041001796722412,grad_norm: 0.9999994358994818, iteration: 27963
loss: 1.023821473121643,grad_norm: 0.9999991228650276, iteration: 27964
loss: 1.010884165763855,grad_norm: 0.9999995443645913, iteration: 27965
loss: 1.0271061658859253,grad_norm: 0.9999995579138118, iteration: 27966
loss: 1.0070245265960693,grad_norm: 0.9324550246734172, iteration: 27967
loss: 1.0033024549484253,grad_norm: 0.9999990772632699, iteration: 27968
loss: 0.9943828582763672,grad_norm: 0.98080988664276, iteration: 27969
loss: 1.019966721534729,grad_norm: 0.9999993830613445, iteration: 27970
loss: 0.9967451691627502,grad_norm: 0.999999010385164, iteration: 27971
loss: 1.0311014652252197,grad_norm: 0.9107964115291437, iteration: 27972
loss: 0.989093542098999,grad_norm: 0.9946337726028859, iteration: 27973
loss: 1.039773941040039,grad_norm: 0.9540407285944583, iteration: 27974
loss: 1.0012160539627075,grad_norm: 0.9999991749802039, iteration: 27975
loss: 0.970034658908844,grad_norm: 0.9101135169728278, iteration: 27976
loss: 0.9770516753196716,grad_norm: 0.9999991129371115, iteration: 27977
loss: 1.0067496299743652,grad_norm: 0.9999994726834031, iteration: 27978
loss: 1.0088084936141968,grad_norm: 0.9592690023029287, iteration: 27979
loss: 0.9852505326271057,grad_norm: 0.9999991677393056, iteration: 27980
loss: 1.0371876955032349,grad_norm: 0.9812603694510854, iteration: 27981
loss: 1.0003132820129395,grad_norm: 0.9999993088439879, iteration: 27982
loss: 1.1050138473510742,grad_norm: 0.9999995711376355, iteration: 27983
loss: 1.0063227415084839,grad_norm: 0.941220402570875, iteration: 27984
loss: 1.055397868156433,grad_norm: 0.9999992829691621, iteration: 27985
loss: 1.0568031072616577,grad_norm: 0.9999995183320034, iteration: 27986
loss: 1.0135549306869507,grad_norm: 0.8584362425739929, iteration: 27987
loss: 1.0253021717071533,grad_norm: 0.9999990614687841, iteration: 27988
loss: 0.9935926795005798,grad_norm: 0.8584662386041205, iteration: 27989
loss: 1.0611823797225952,grad_norm: 0.9999994052274276, iteration: 27990
loss: 1.0467700958251953,grad_norm: 0.975765421476861, iteration: 27991
loss: 1.0165472030639648,grad_norm: 0.9679960835380136, iteration: 27992
loss: 1.0287119150161743,grad_norm: 0.8194159818145702, iteration: 27993
loss: 1.0062255859375,grad_norm: 0.9971849470733056, iteration: 27994
loss: 0.9777264595031738,grad_norm: 0.9999990689268277, iteration: 27995
loss: 0.977614164352417,grad_norm: 0.9945614822807921, iteration: 27996
loss: 1.0533454418182373,grad_norm: 0.9502371132839844, iteration: 27997
loss: 0.9927393794059753,grad_norm: 0.9999989803277842, iteration: 27998
loss: 0.9705966114997864,grad_norm: 0.769038850432788, iteration: 27999
loss: 1.0175585746765137,grad_norm: 0.9331085861960685, iteration: 28000
loss: 1.059458613395691,grad_norm: 0.9999990971658715, iteration: 28001
loss: 1.0297025442123413,grad_norm: 0.999999076678731, iteration: 28002
loss: 1.010892391204834,grad_norm: 0.957887061901924, iteration: 28003
loss: 0.9999674558639526,grad_norm: 0.9312169628881048, iteration: 28004
loss: 1.0273115634918213,grad_norm: 0.9999991152720493, iteration: 28005
loss: 1.029593825340271,grad_norm: 0.9999996315663755, iteration: 28006
loss: 1.0039845705032349,grad_norm: 0.9536768722270658, iteration: 28007
loss: 1.0351595878601074,grad_norm: 0.8998190655016538, iteration: 28008
loss: 1.0227702856063843,grad_norm: 0.983355969040611, iteration: 28009
loss: 1.0160738229751587,grad_norm: 0.9298029030607593, iteration: 28010
loss: 1.017102837562561,grad_norm: 0.9999992442578872, iteration: 28011
loss: 0.9982470273971558,grad_norm: 0.8299057854768088, iteration: 28012
loss: 1.006717324256897,grad_norm: 0.9999991194192697, iteration: 28013
loss: 0.9857697486877441,grad_norm: 0.9999991416085057, iteration: 28014
loss: 1.1125502586364746,grad_norm: 0.9999997823193738, iteration: 28015
loss: 1.040586233139038,grad_norm: 0.9999989982523512, iteration: 28016
loss: 1.028896450996399,grad_norm: 0.9999992242352874, iteration: 28017
loss: 1.0154169797897339,grad_norm: 0.9999991423321114, iteration: 28018
loss: 1.021806001663208,grad_norm: 0.999999155512352, iteration: 28019
loss: 1.0357451438903809,grad_norm: 0.9999993738592174, iteration: 28020
loss: 1.017911434173584,grad_norm: 0.9072671340791743, iteration: 28021
loss: 1.0027989149093628,grad_norm: 0.9999992115072545, iteration: 28022
loss: 1.027380108833313,grad_norm: 0.9999998335900899, iteration: 28023
loss: 1.0202058553695679,grad_norm: 0.972628743108949, iteration: 28024
loss: 1.0220098495483398,grad_norm: 0.9999991981606376, iteration: 28025
loss: 1.0346176624298096,grad_norm: 0.9113062383201381, iteration: 28026
loss: 0.9908021092414856,grad_norm: 0.9093634548362702, iteration: 28027
loss: 1.013993263244629,grad_norm: 0.9999991212927042, iteration: 28028
loss: 1.0380346775054932,grad_norm: 0.9999997821306292, iteration: 28029
loss: 1.0194573402404785,grad_norm: 0.9999992704007539, iteration: 28030
loss: 0.9949221014976501,grad_norm: 0.8411676991786016, iteration: 28031
loss: 1.0308321714401245,grad_norm: 0.8487071618557737, iteration: 28032
loss: 0.987093448638916,grad_norm: 0.8936993203618633, iteration: 28033
loss: 1.0208417177200317,grad_norm: 0.9909857697437694, iteration: 28034
loss: 1.0183089971542358,grad_norm: 0.9742419956080942, iteration: 28035
loss: 1.0182875394821167,grad_norm: 0.9999992710623494, iteration: 28036
loss: 1.0375361442565918,grad_norm: 0.9118002194019936, iteration: 28037
loss: 0.9211716055870056,grad_norm: 0.9600573024453577, iteration: 28038
loss: 1.00568425655365,grad_norm: 0.9575604656764114, iteration: 28039
loss: 0.9842369556427002,grad_norm: 0.9601050556219931, iteration: 28040
loss: 1.0908715724945068,grad_norm: 0.9735449979037996, iteration: 28041
loss: 1.0433735847473145,grad_norm: 0.9817072978569016, iteration: 28042
loss: 0.9638888835906982,grad_norm: 0.9319303828516586, iteration: 28043
loss: 1.0145896673202515,grad_norm: 0.9999992263938472, iteration: 28044
loss: 0.9739022850990295,grad_norm: 0.9999991445988011, iteration: 28045
loss: 1.0147897005081177,grad_norm: 0.8893415739476047, iteration: 28046
loss: 1.0464543104171753,grad_norm: 0.9999992495902718, iteration: 28047
loss: 0.9546148777008057,grad_norm: 0.9999990920174888, iteration: 28048
loss: 1.0209136009216309,grad_norm: 0.9999992501765935, iteration: 28049
loss: 0.9620730876922607,grad_norm: 0.9999990689840299, iteration: 28050
loss: 1.026857852935791,grad_norm: 0.9999990774766717, iteration: 28051
loss: 0.9717483520507812,grad_norm: 0.9643933157587559, iteration: 28052
loss: 1.0289751291275024,grad_norm: 0.9993932263139509, iteration: 28053
loss: 0.9927169680595398,grad_norm: 0.9377466909936081, iteration: 28054
loss: 1.0042140483856201,grad_norm: 0.9082254586024331, iteration: 28055
loss: 0.986565887928009,grad_norm: 0.9999990661225606, iteration: 28056
loss: 1.041919469833374,grad_norm: 0.9617038294765903, iteration: 28057
loss: 1.0016010999679565,grad_norm: 0.9614655256239121, iteration: 28058
loss: 1.0134474039077759,grad_norm: 0.9999992267849931, iteration: 28059
loss: 0.9696991443634033,grad_norm: 0.8937049323837594, iteration: 28060
loss: 1.0039445161819458,grad_norm: 0.9999991031668288, iteration: 28061
loss: 0.9926645159721375,grad_norm: 0.8960244116740235, iteration: 28062
loss: 1.006177544593811,grad_norm: 0.9045078214958298, iteration: 28063
loss: 0.9826784133911133,grad_norm: 0.9336296327358644, iteration: 28064
loss: 1.031846284866333,grad_norm: 0.9999990817147423, iteration: 28065
loss: 1.0135639905929565,grad_norm: 0.8862790908209861, iteration: 28066
loss: 0.9437770843505859,grad_norm: 0.861265322741763, iteration: 28067
loss: 1.0001415014266968,grad_norm: 0.999999376847437, iteration: 28068
loss: 0.9868735671043396,grad_norm: 0.9620765252225233, iteration: 28069
loss: 1.0292718410491943,grad_norm: 0.9199371915890208, iteration: 28070
loss: 1.0380033254623413,grad_norm: 0.9999994873967698, iteration: 28071
loss: 0.9489242434501648,grad_norm: 0.9826584536024419, iteration: 28072
loss: 1.0035266876220703,grad_norm: 0.9999993101235911, iteration: 28073
loss: 1.0070159435272217,grad_norm: 0.9341192831652132, iteration: 28074
loss: 1.026094675064087,grad_norm: 0.9424830338940239, iteration: 28075
loss: 1.0179520845413208,grad_norm: 0.854260613517966, iteration: 28076
loss: 1.0120023488998413,grad_norm: 0.9625097815629491, iteration: 28077
loss: 1.0004950761795044,grad_norm: 0.9999991516769368, iteration: 28078
loss: 1.0684853792190552,grad_norm: 0.9999991353804396, iteration: 28079
loss: 1.0396147966384888,grad_norm: 0.9896179611561399, iteration: 28080
loss: 1.0001283884048462,grad_norm: 0.9999990846017276, iteration: 28081
loss: 0.9921663403511047,grad_norm: 0.9999991149104083, iteration: 28082
loss: 1.0173430442810059,grad_norm: 0.8924452529365894, iteration: 28083
loss: 0.9936881065368652,grad_norm: 0.9999992870724314, iteration: 28084
loss: 1.0333409309387207,grad_norm: 0.8716753286790496, iteration: 28085
loss: 1.009225606918335,grad_norm: 0.8283969299064712, iteration: 28086
loss: 0.9930827617645264,grad_norm: 0.9999998917825451, iteration: 28087
loss: 1.0168473720550537,grad_norm: 0.9999992156778311, iteration: 28088
loss: 0.9828683733940125,grad_norm: 0.986369875218252, iteration: 28089
loss: 1.0129914283752441,grad_norm: 0.9999993925775961, iteration: 28090
loss: 0.9927709698677063,grad_norm: 0.9650327273506097, iteration: 28091
loss: 0.9991349577903748,grad_norm: 0.9999991113316677, iteration: 28092
loss: 0.9729026556015015,grad_norm: 0.9999991161051507, iteration: 28093
loss: 1.0058879852294922,grad_norm: 0.9999990489160596, iteration: 28094
loss: 1.012760877609253,grad_norm: 0.8817519408705249, iteration: 28095
loss: 0.9868325591087341,grad_norm: 0.8021961696391794, iteration: 28096
loss: 0.987658679485321,grad_norm: 0.9735105162633843, iteration: 28097
loss: 1.0133486986160278,grad_norm: 0.9538527397269134, iteration: 28098
loss: 1.0186253786087036,grad_norm: 0.9999991362328631, iteration: 28099
loss: 1.0221619606018066,grad_norm: 0.9999996811998024, iteration: 28100
loss: 1.0732063055038452,grad_norm: 0.9999991637274963, iteration: 28101
loss: 1.0184967517852783,grad_norm: 0.9999992286731463, iteration: 28102
loss: 1.0147337913513184,grad_norm: 0.9999989189247761, iteration: 28103
loss: 1.0201908349990845,grad_norm: 0.974758557641804, iteration: 28104
loss: 1.0072473287582397,grad_norm: 0.999999124281034, iteration: 28105
loss: 1.007876992225647,grad_norm: 0.9999996972192978, iteration: 28106
loss: 0.9930636286735535,grad_norm: 0.9999992031823685, iteration: 28107
loss: 0.982653021812439,grad_norm: 0.921514055070617, iteration: 28108
loss: 1.0223808288574219,grad_norm: 0.8929047557427462, iteration: 28109
loss: 1.0114471912384033,grad_norm: 0.8779056851668089, iteration: 28110
loss: 1.0175279378890991,grad_norm: 0.9873418413991489, iteration: 28111
loss: 0.9533769488334656,grad_norm: 0.8896207794706098, iteration: 28112
loss: 0.9898133873939514,grad_norm: 0.9782985696805014, iteration: 28113
loss: 1.0364266633987427,grad_norm: 0.9944215022112122, iteration: 28114
loss: 1.0259560346603394,grad_norm: 0.999999162080718, iteration: 28115
loss: 1.000765323638916,grad_norm: 0.9999996138646509, iteration: 28116
loss: 1.0387314558029175,grad_norm: 0.8752902194929573, iteration: 28117
loss: 1.0434362888336182,grad_norm: 0.9999992436896352, iteration: 28118
loss: 1.004461646080017,grad_norm: 0.9999991487558398, iteration: 28119
loss: 0.9989503026008606,grad_norm: 0.9995966421597198, iteration: 28120
loss: 1.0103434324264526,grad_norm: 0.9999991337372497, iteration: 28121
loss: 1.011803388595581,grad_norm: 0.9999990416026673, iteration: 28122
loss: 1.0508801937103271,grad_norm: 0.9999998495306518, iteration: 28123
loss: 1.0112652778625488,grad_norm: 0.9999990517967027, iteration: 28124
loss: 1.0063717365264893,grad_norm: 0.9999991088872782, iteration: 28125
loss: 1.0453749895095825,grad_norm: 0.9999991857872613, iteration: 28126
loss: 1.0530421733856201,grad_norm: 0.9999990283789046, iteration: 28127
loss: 1.036905288696289,grad_norm: 0.8643059941123958, iteration: 28128
loss: 0.9624956846237183,grad_norm: 0.8660602928722031, iteration: 28129
loss: 1.010703206062317,grad_norm: 0.9999989940814232, iteration: 28130
loss: 0.9986122250556946,grad_norm: 0.9325775917155, iteration: 28131
loss: 1.036489725112915,grad_norm: 0.9844248610154821, iteration: 28132
loss: 1.0231783390045166,grad_norm: 0.9068913501016034, iteration: 28133
loss: 1.0115249156951904,grad_norm: 0.9407780902036914, iteration: 28134
loss: 1.0334086418151855,grad_norm: 0.924633718770192, iteration: 28135
loss: 1.0270090103149414,grad_norm: 0.8870702031208323, iteration: 28136
loss: 1.0257444381713867,grad_norm: 0.7777237636414076, iteration: 28137
loss: 1.0061430931091309,grad_norm: 0.9999992120123625, iteration: 28138
loss: 1.008296251296997,grad_norm: 0.9946051762191643, iteration: 28139
loss: 1.0174771547317505,grad_norm: 0.9167550150254871, iteration: 28140
loss: 1.0117071866989136,grad_norm: 0.9999990095778883, iteration: 28141
loss: 1.026477575302124,grad_norm: 0.9999993469983697, iteration: 28142
loss: 1.0220698118209839,grad_norm: 0.8148243113803654, iteration: 28143
loss: 0.9926052093505859,grad_norm: 0.9999991366094964, iteration: 28144
loss: 0.9969649314880371,grad_norm: 0.9596653717356178, iteration: 28145
loss: 1.001491665840149,grad_norm: 0.9999991493958389, iteration: 28146
loss: 1.0253676176071167,grad_norm: 0.9999990538239882, iteration: 28147
loss: 1.033665657043457,grad_norm: 0.99999908553851, iteration: 28148
loss: 1.009139895439148,grad_norm: 0.9999995003923526, iteration: 28149
loss: 1.0235964059829712,grad_norm: 0.9999990408029691, iteration: 28150
loss: 0.9939743280410767,grad_norm: 0.9404386555180796, iteration: 28151
loss: 1.0008395910263062,grad_norm: 0.97844989162208, iteration: 28152
loss: 1.0400391817092896,grad_norm: 0.9202177191069071, iteration: 28153
loss: 0.969792366027832,grad_norm: 0.9999991429223026, iteration: 28154
loss: 1.0024020671844482,grad_norm: 0.9999992329502922, iteration: 28155
loss: 0.9961371421813965,grad_norm: 0.9999991591705336, iteration: 28156
loss: 1.0050240755081177,grad_norm: 0.9536043382359864, iteration: 28157
loss: 1.0260627269744873,grad_norm: 0.99999960663294, iteration: 28158
loss: 1.0312983989715576,grad_norm: 0.9999990283581344, iteration: 28159
loss: 0.996376633644104,grad_norm: 0.9999989774703035, iteration: 28160
loss: 1.0039622783660889,grad_norm: 0.9016453220220919, iteration: 28161
loss: 0.9942740797996521,grad_norm: 0.9999990118683181, iteration: 28162
loss: 1.050300121307373,grad_norm: 0.8252375493473557, iteration: 28163
loss: 0.9976463913917542,grad_norm: 0.907024803778039, iteration: 28164
loss: 1.0335640907287598,grad_norm: 0.963377827447418, iteration: 28165
loss: 1.0245999097824097,grad_norm: 0.9999991239240897, iteration: 28166
loss: 1.0333119630813599,grad_norm: 0.9999996146765419, iteration: 28167
loss: 0.9817175269126892,grad_norm: 0.9999991487639984, iteration: 28168
loss: 0.9968122243881226,grad_norm: 0.918376637067785, iteration: 28169
loss: 1.0035767555236816,grad_norm: 0.9999991797596147, iteration: 28170
loss: 1.0117665529251099,grad_norm: 0.9100698838309451, iteration: 28171
loss: 1.0267120599746704,grad_norm: 0.9999990894328571, iteration: 28172
loss: 0.9857993125915527,grad_norm: 0.8443587295222232, iteration: 28173
loss: 0.9736514687538147,grad_norm: 0.9999995690303541, iteration: 28174
loss: 1.0057998895645142,grad_norm: 0.9120140759269223, iteration: 28175
loss: 0.9845781922340393,grad_norm: 0.9766616126803426, iteration: 28176
loss: 1.0052034854888916,grad_norm: 0.9737304424307198, iteration: 28177
loss: 1.00621497631073,grad_norm: 0.9530039598756812, iteration: 28178
loss: 1.0026458501815796,grad_norm: 0.9999992035781644, iteration: 28179
loss: 0.9931991100311279,grad_norm: 0.9999991727172314, iteration: 28180
loss: 0.9995235204696655,grad_norm: 0.9101909215112574, iteration: 28181
loss: 0.9921696782112122,grad_norm: 0.9999990913710715, iteration: 28182
loss: 1.0073610544204712,grad_norm: 0.9999990408584537, iteration: 28183
loss: 1.0266857147216797,grad_norm: 0.9999992094055834, iteration: 28184
loss: 1.010504961013794,grad_norm: 0.9675742333472367, iteration: 28185
loss: 1.0127143859863281,grad_norm: 0.9493805707501414, iteration: 28186
loss: 1.046044945716858,grad_norm: 0.9999990811491605, iteration: 28187
loss: 1.0309466123580933,grad_norm: 0.9642696626738135, iteration: 28188
loss: 1.0265803337097168,grad_norm: 0.9999989464575806, iteration: 28189
loss: 1.0328294038772583,grad_norm: 0.9999993965192232, iteration: 28190
loss: 0.9844481945037842,grad_norm: 0.9999991898026169, iteration: 28191
loss: 1.0493615865707397,grad_norm: 0.999999831017482, iteration: 28192
loss: 0.993827223777771,grad_norm: 0.8829431312603057, iteration: 28193
loss: 1.038466453552246,grad_norm: 0.9790606129789071, iteration: 28194
loss: 0.9805066585540771,grad_norm: 0.8336466485171099, iteration: 28195
loss: 1.016093373298645,grad_norm: 0.9142194465113721, iteration: 28196
loss: 0.9672083258628845,grad_norm: 0.9999990790946125, iteration: 28197
loss: 1.0063775777816772,grad_norm: 0.8307050069209821, iteration: 28198
loss: 1.0404844284057617,grad_norm: 0.9138420848204218, iteration: 28199
loss: 0.9998234510421753,grad_norm: 0.9999990578482302, iteration: 28200
loss: 0.9713175892829895,grad_norm: 0.9347395034734195, iteration: 28201
loss: 0.9827773571014404,grad_norm: 0.9999992036000325, iteration: 28202
loss: 0.9684473276138306,grad_norm: 0.8496658312751503, iteration: 28203
loss: 1.0229440927505493,grad_norm: 0.9165754147933606, iteration: 28204
loss: 0.9951501488685608,grad_norm: 0.9999989747969262, iteration: 28205
loss: 1.0042661428451538,grad_norm: 0.7819195310450637, iteration: 28206
loss: 1.0099472999572754,grad_norm: 0.999999258636253, iteration: 28207
loss: 0.9964277744293213,grad_norm: 0.9042739315684458, iteration: 28208
loss: 0.9763467311859131,grad_norm: 0.9843325569941169, iteration: 28209
loss: 1.0107706785202026,grad_norm: 0.9999990108085615, iteration: 28210
loss: 1.0347923040390015,grad_norm: 0.999999130775352, iteration: 28211
loss: 1.0448179244995117,grad_norm: 0.9817781088407461, iteration: 28212
loss: 0.9920657873153687,grad_norm: 0.9999992037310796, iteration: 28213
loss: 0.9984157681465149,grad_norm: 0.8608660852206492, iteration: 28214
loss: 0.9758653044700623,grad_norm: 0.9574812763849715, iteration: 28215
loss: 1.0246323347091675,grad_norm: 0.9999991459446566, iteration: 28216
loss: 0.9985319972038269,grad_norm: 0.9249087324910124, iteration: 28217
loss: 0.9939268231391907,grad_norm: 0.999999114230379, iteration: 28218
loss: 1.0224518775939941,grad_norm: 0.9999991097187764, iteration: 28219
loss: 1.0165915489196777,grad_norm: 0.895598565194498, iteration: 28220
loss: 1.0066347122192383,grad_norm: 0.9067887847677967, iteration: 28221
loss: 0.998261034488678,grad_norm: 0.9772289977220769, iteration: 28222
loss: 1.0002257823944092,grad_norm: 0.999999118860496, iteration: 28223
loss: 0.9876648187637329,grad_norm: 0.9803601311925355, iteration: 28224
loss: 1.021357536315918,grad_norm: 0.9999991535916642, iteration: 28225
loss: 1.0038669109344482,grad_norm: 0.9598875276661549, iteration: 28226
loss: 1.0259473323822021,grad_norm: 0.9999992738062456, iteration: 28227
loss: 0.9865259528160095,grad_norm: 0.9999990923342705, iteration: 28228
loss: 1.008370280265808,grad_norm: 0.9818746822912175, iteration: 28229
loss: 1.0248020887374878,grad_norm: 0.9999990883920084, iteration: 28230
loss: 0.9953486323356628,grad_norm: 0.902419102134041, iteration: 28231
loss: 0.9952095746994019,grad_norm: 0.955198841512068, iteration: 28232
loss: 0.9661270380020142,grad_norm: 0.8139444886665689, iteration: 28233
loss: 1.006583333015442,grad_norm: 0.9999991394572234, iteration: 28234
loss: 0.9889441132545471,grad_norm: 0.9672738479169961, iteration: 28235
loss: 0.9777340292930603,grad_norm: 0.9999995752863428, iteration: 28236
loss: 1.0276985168457031,grad_norm: 0.9999997108560363, iteration: 28237
loss: 1.0322339534759521,grad_norm: 0.9999995870835025, iteration: 28238
loss: 1.013284683227539,grad_norm: 0.9684595110942059, iteration: 28239
loss: 1.0257614850997925,grad_norm: 0.9291399959527655, iteration: 28240
loss: 1.0191004276275635,grad_norm: 0.9999995032184855, iteration: 28241
loss: 1.0005465745925903,grad_norm: 0.9324748713010235, iteration: 28242
loss: 1.0246198177337646,grad_norm: 0.9999991642100748, iteration: 28243
loss: 0.9812168478965759,grad_norm: 0.8797143898116685, iteration: 28244
loss: 1.0292415618896484,grad_norm: 0.9468665992953005, iteration: 28245
loss: 1.0041569471359253,grad_norm: 0.9999989923667697, iteration: 28246
loss: 1.0155102014541626,grad_norm: 0.9999991229911868, iteration: 28247
loss: 0.9996479153633118,grad_norm: 0.8796103644066271, iteration: 28248
loss: 1.0068724155426025,grad_norm: 0.9999999247887335, iteration: 28249
loss: 1.0585354566574097,grad_norm: 0.9999997958072931, iteration: 28250
loss: 1.0094643831253052,grad_norm: 0.9640413250104721, iteration: 28251
loss: 1.0146254301071167,grad_norm: 0.9999994935926321, iteration: 28252
loss: 1.0219658613204956,grad_norm: 0.7424844277691868, iteration: 28253
loss: 0.9618287086486816,grad_norm: 0.9999992127810098, iteration: 28254
loss: 1.0586987733840942,grad_norm: 0.9999990342894564, iteration: 28255
loss: 1.0087032318115234,grad_norm: 0.9999992808420617, iteration: 28256
loss: 1.0470049381256104,grad_norm: 0.999999643550258, iteration: 28257
loss: 1.057945966720581,grad_norm: 0.9999993017070653, iteration: 28258
loss: 0.9848289489746094,grad_norm: 0.809253562750919, iteration: 28259
loss: 1.0056867599487305,grad_norm: 0.999999082913381, iteration: 28260
loss: 1.016037106513977,grad_norm: 0.9999990562341634, iteration: 28261
loss: 0.999741792678833,grad_norm: 0.9999990991479407, iteration: 28262
loss: 1.0199068784713745,grad_norm: 0.9335141022655584, iteration: 28263
loss: 1.01152765750885,grad_norm: 0.9399084498846593, iteration: 28264
loss: 1.0247489213943481,grad_norm: 0.9999997622742224, iteration: 28265
loss: 0.9930960536003113,grad_norm: 0.9999990504849594, iteration: 28266
loss: 1.0063167810440063,grad_norm: 0.9999993077230098, iteration: 28267
loss: 1.005059838294983,grad_norm: 0.9999991798549852, iteration: 28268
loss: 0.9811988472938538,grad_norm: 0.7870612090150252, iteration: 28269
loss: 1.0204824209213257,grad_norm: 0.961492493830511, iteration: 28270
loss: 1.035753607749939,grad_norm: 0.999999122979816, iteration: 28271
loss: 1.0225129127502441,grad_norm: 0.7881495684156888, iteration: 28272
loss: 1.0216691493988037,grad_norm: 0.9487071241581334, iteration: 28273
loss: 1.0197449922561646,grad_norm: 0.9999991068674452, iteration: 28274
loss: 1.018058180809021,grad_norm: 0.9999994358781304, iteration: 28275
loss: 0.9692716002464294,grad_norm: 0.9999996529047932, iteration: 28276
loss: 0.9880419373512268,grad_norm: 0.8819783605143633, iteration: 28277
loss: 1.002663016319275,grad_norm: 0.9628625340922586, iteration: 28278
loss: 0.992781937122345,grad_norm: 0.9999990910097931, iteration: 28279
loss: 0.9760299921035767,grad_norm: 0.9068510298482766, iteration: 28280
loss: 1.0158394575119019,grad_norm: 0.7185178792004636, iteration: 28281
loss: 0.993238627910614,grad_norm: 0.9543128109636445, iteration: 28282
loss: 1.001975178718567,grad_norm: 0.9999990702955073, iteration: 28283
loss: 1.0161104202270508,grad_norm: 0.858611412189572, iteration: 28284
loss: 1.0077613592147827,grad_norm: 0.9999991009202706, iteration: 28285
loss: 1.0091038942337036,grad_norm: 0.8989495632197179, iteration: 28286
loss: 0.9964613318443298,grad_norm: 0.881980032963791, iteration: 28287
loss: 1.0004535913467407,grad_norm: 0.9181310548960114, iteration: 28288
loss: 1.0480155944824219,grad_norm: 0.9999990909389055, iteration: 28289
loss: 1.0164105892181396,grad_norm: 0.8760978498314514, iteration: 28290
loss: 0.9942461252212524,grad_norm: 0.9838853727517269, iteration: 28291
loss: 0.9903307557106018,grad_norm: 0.9999992516228718, iteration: 28292
loss: 0.9997453093528748,grad_norm: 0.9999991442149584, iteration: 28293
loss: 1.0947258472442627,grad_norm: 0.9999996531200833, iteration: 28294
loss: 1.029496192932129,grad_norm: 0.9999993107605722, iteration: 28295
loss: 0.975309431552887,grad_norm: 0.9999995712661767, iteration: 28296
loss: 1.0003446340560913,grad_norm: 0.9999992533905265, iteration: 28297
loss: 1.0025099515914917,grad_norm: 0.9168582909888355, iteration: 28298
loss: 1.01630699634552,grad_norm: 0.9999997193592239, iteration: 28299
loss: 1.0035914182662964,grad_norm: 0.9999990450222017, iteration: 28300
loss: 1.0382462739944458,grad_norm: 0.9934003425352652, iteration: 28301
loss: 1.0052034854888916,grad_norm: 0.9999991228871751, iteration: 28302
loss: 1.0041892528533936,grad_norm: 0.999999454221116, iteration: 28303
loss: 0.9902755618095398,grad_norm: 0.9439183790765546, iteration: 28304
loss: 1.0540400743484497,grad_norm: 0.9999997594892832, iteration: 28305
loss: 1.0377177000045776,grad_norm: 0.9916264628324146, iteration: 28306
loss: 1.0226478576660156,grad_norm: 0.9451243572178909, iteration: 28307
loss: 0.9874318242073059,grad_norm: 0.9265846732307513, iteration: 28308
loss: 1.031421422958374,grad_norm: 0.999999111301136, iteration: 28309
loss: 1.0300757884979248,grad_norm: 0.9999991049339876, iteration: 28310
loss: 1.061213731765747,grad_norm: 0.9999990066987378, iteration: 28311
loss: 0.9979956150054932,grad_norm: 0.9999989907218273, iteration: 28312
loss: 1.0198321342468262,grad_norm: 0.9999990633033169, iteration: 28313
loss: 1.038004994392395,grad_norm: 0.9995935399127878, iteration: 28314
loss: 0.9852409958839417,grad_norm: 0.9999992648635946, iteration: 28315
loss: 1.0017563104629517,grad_norm: 0.9999997815244988, iteration: 28316
loss: 0.9887054562568665,grad_norm: 0.9999991925729488, iteration: 28317
loss: 1.052169919013977,grad_norm: 0.9999997645392642, iteration: 28318
loss: 1.056623101234436,grad_norm: 0.999999604697499, iteration: 28319
loss: 1.0125614404678345,grad_norm: 0.9999991390730365, iteration: 28320
loss: 1.0020958185195923,grad_norm: 0.8649922136783261, iteration: 28321
loss: 1.0265554189682007,grad_norm: 0.9781276085869581, iteration: 28322
loss: 1.0153436660766602,grad_norm: 0.9408911994000202, iteration: 28323
loss: 1.0174314975738525,grad_norm: 0.9999990394076511, iteration: 28324
loss: 1.0118613243103027,grad_norm: 0.9903136097882418, iteration: 28325
loss: 1.035019874572754,grad_norm: 0.9999993941135804, iteration: 28326
loss: 0.9882689714431763,grad_norm: 0.9999993059837896, iteration: 28327
loss: 1.031756043434143,grad_norm: 0.9999990620574379, iteration: 28328
loss: 1.0041486024856567,grad_norm: 0.8555401177072516, iteration: 28329
loss: 0.9702599048614502,grad_norm: 0.939277205265749, iteration: 28330
loss: 1.0138933658599854,grad_norm: 0.9999997894500021, iteration: 28331
loss: 1.0207793712615967,grad_norm: 0.9325757989992871, iteration: 28332
loss: 1.0347963571548462,grad_norm: 0.9999994526942149, iteration: 28333
loss: 1.1097664833068848,grad_norm: 0.9999994363189537, iteration: 28334
loss: 1.0415841341018677,grad_norm: 0.9999996080119112, iteration: 28335
loss: 1.0623619556427002,grad_norm: 0.9999990346745201, iteration: 28336
loss: 1.0113985538482666,grad_norm: 0.9999991301844448, iteration: 28337
loss: 1.0623295307159424,grad_norm: 0.9999994077544733, iteration: 28338
loss: 1.001245141029358,grad_norm: 0.9999991179917955, iteration: 28339
loss: 1.014674186706543,grad_norm: 0.9999989938983309, iteration: 28340
loss: 0.9579097628593445,grad_norm: 0.999998972350908, iteration: 28341
loss: 0.9900970458984375,grad_norm: 0.9419212549188145, iteration: 28342
loss: 1.024827003479004,grad_norm: 0.9999991267806104, iteration: 28343
loss: 1.0162711143493652,grad_norm: 0.9835658081231522, iteration: 28344
loss: 1.0143063068389893,grad_norm: 0.9678113489542564, iteration: 28345
loss: 0.9937939643859863,grad_norm: 0.9999991072980071, iteration: 28346
loss: 1.015717625617981,grad_norm: 0.9999990151241135, iteration: 28347
loss: 1.0276347398757935,grad_norm: 0.8849870840486664, iteration: 28348
loss: 1.0132945775985718,grad_norm: 0.9159103856392045, iteration: 28349
loss: 1.0353021621704102,grad_norm: 0.9028521334198593, iteration: 28350
loss: 1.029486060142517,grad_norm: 0.9633076416216311, iteration: 28351
loss: 0.9853844046592712,grad_norm: 0.9999991316789512, iteration: 28352
loss: 1.0343971252441406,grad_norm: 0.9999991948095048, iteration: 28353
loss: 0.9927502274513245,grad_norm: 0.9015283031021851, iteration: 28354
loss: 0.9989654421806335,grad_norm: 0.9999993230073383, iteration: 28355
loss: 0.9958001375198364,grad_norm: 0.8523210445767887, iteration: 28356
loss: 1.0152076482772827,grad_norm: 0.9999991151017403, iteration: 28357
loss: 1.0004560947418213,grad_norm: 0.9999990606849055, iteration: 28358
loss: 1.0196874141693115,grad_norm: 0.9999991820991901, iteration: 28359
loss: 1.0385733842849731,grad_norm: 0.9999992189413226, iteration: 28360
loss: 0.9917865991592407,grad_norm: 0.9638039887603104, iteration: 28361
loss: 0.9766932725906372,grad_norm: 0.918438728522395, iteration: 28362
loss: 1.0194506645202637,grad_norm: 0.9999994448983642, iteration: 28363
loss: 0.9937182068824768,grad_norm: 0.8558557267140677, iteration: 28364
loss: 1.0095264911651611,grad_norm: 0.9358927652679901, iteration: 28365
loss: 0.9713413119316101,grad_norm: 0.7750652160455256, iteration: 28366
loss: 1.065653920173645,grad_norm: 0.9999991710806057, iteration: 28367
loss: 1.0144593715667725,grad_norm: 0.9999991022469317, iteration: 28368
loss: 1.0716296434402466,grad_norm: 0.9999993040421058, iteration: 28369
loss: 1.0045733451843262,grad_norm: 0.9927819933032636, iteration: 28370
loss: 1.0156002044677734,grad_norm: 0.9999991083913976, iteration: 28371
loss: 1.0284096002578735,grad_norm: 0.999078780424284, iteration: 28372
loss: 1.0587092638015747,grad_norm: 0.9971986997915362, iteration: 28373
loss: 0.9931168556213379,grad_norm: 0.9999991860434879, iteration: 28374
loss: 1.0733338594436646,grad_norm: 0.9999993736477252, iteration: 28375
loss: 1.0211396217346191,grad_norm: 0.9999990250869389, iteration: 28376
loss: 0.992654025554657,grad_norm: 0.9999991661755131, iteration: 28377
loss: 1.0232858657836914,grad_norm: 0.9999995592712239, iteration: 28378
loss: 1.0293684005737305,grad_norm: 0.9999992518165467, iteration: 28379
loss: 1.001677393913269,grad_norm: 0.9999991143178795, iteration: 28380
loss: 1.018135905265808,grad_norm: 0.8916361438941353, iteration: 28381
loss: 1.0479743480682373,grad_norm: 0.9999992596585342, iteration: 28382
loss: 1.0217375755310059,grad_norm: 0.9999995036875878, iteration: 28383
loss: 0.9969502091407776,grad_norm: 0.9999990851510134, iteration: 28384
loss: 0.9931091666221619,grad_norm: 0.9248332404228342, iteration: 28385
loss: 0.99817955493927,grad_norm: 0.8966922287222527, iteration: 28386
loss: 1.0269609689712524,grad_norm: 0.903150571270146, iteration: 28387
loss: 1.0295284986495972,grad_norm: 0.8889728333754134, iteration: 28388
loss: 0.9884593486785889,grad_norm: 0.9924961621706277, iteration: 28389
loss: 1.0177949666976929,grad_norm: 0.9360182315771872, iteration: 28390
loss: 1.0066092014312744,grad_norm: 0.9999992914554517, iteration: 28391
loss: 1.031368613243103,grad_norm: 0.919052338188619, iteration: 28392
loss: 0.9821239709854126,grad_norm: 0.9188464241744947, iteration: 28393
loss: 0.9926835894584656,grad_norm: 0.9999992108986678, iteration: 28394
loss: 0.9954479336738586,grad_norm: 0.9561191918523085, iteration: 28395
loss: 0.9768443703651428,grad_norm: 0.9999998696897484, iteration: 28396
loss: 0.9790825843811035,grad_norm: 0.8173664307212967, iteration: 28397
loss: 0.984595537185669,grad_norm: 0.920380842568886, iteration: 28398
loss: 0.986487865447998,grad_norm: 0.9999991360693392, iteration: 28399
loss: 1.0064289569854736,grad_norm: 0.9372534606424481, iteration: 28400
loss: 1.0257189273834229,grad_norm: 0.9999991390932216, iteration: 28401
loss: 1.061227798461914,grad_norm: 0.9999992668515083, iteration: 28402
loss: 1.019543170928955,grad_norm: 0.9999991246178711, iteration: 28403
loss: 1.0260270833969116,grad_norm: 0.9999992096032752, iteration: 28404
loss: 1.0114248991012573,grad_norm: 0.9999996433119432, iteration: 28405
loss: 1.0092540979385376,grad_norm: 0.9999991802983108, iteration: 28406
loss: 1.0021368265151978,grad_norm: 0.9576514171877657, iteration: 28407
loss: 0.9809854626655579,grad_norm: 0.9999989457837328, iteration: 28408
loss: 0.9878710508346558,grad_norm: 0.9999992925658379, iteration: 28409
loss: 0.9965475797653198,grad_norm: 0.8957144211293147, iteration: 28410
loss: 1.0151385068893433,grad_norm: 0.9966190701967134, iteration: 28411
loss: 1.0157065391540527,grad_norm: 0.9999992379439975, iteration: 28412
loss: 0.9703099131584167,grad_norm: 0.9999991686912013, iteration: 28413
loss: 1.0027488470077515,grad_norm: 0.9428194614252597, iteration: 28414
loss: 1.0010350942611694,grad_norm: 0.9397861844438377, iteration: 28415
loss: 0.9853699803352356,grad_norm: 0.9999991447565171, iteration: 28416
loss: 1.0143678188323975,grad_norm: 0.9999992375772679, iteration: 28417
loss: 1.0191116333007812,grad_norm: 0.9604705761985549, iteration: 28418
loss: 1.032586932182312,grad_norm: 0.9735906731332618, iteration: 28419
loss: 1.0296860933303833,grad_norm: 0.9999992758960171, iteration: 28420
loss: 0.9889682531356812,grad_norm: 0.9999991181984736, iteration: 28421
loss: 1.008561611175537,grad_norm: 0.8774518490567539, iteration: 28422
loss: 0.9855552911758423,grad_norm: 0.9999993643375821, iteration: 28423
loss: 0.9886394143104553,grad_norm: 0.9999998228561151, iteration: 28424
loss: 0.9650676250457764,grad_norm: 0.9395871000681478, iteration: 28425
loss: 0.9839608669281006,grad_norm: 0.938244367838016, iteration: 28426
loss: 1.0454292297363281,grad_norm: 0.8616655478858382, iteration: 28427
loss: 0.9864730834960938,grad_norm: 0.990857795407152, iteration: 28428
loss: 0.9798979759216309,grad_norm: 0.9689106959260543, iteration: 28429
loss: 0.9828653335571289,grad_norm: 0.8991076069555503, iteration: 28430
loss: 1.0132265090942383,grad_norm: 0.9999991042264154, iteration: 28431
loss: 0.9972253441810608,grad_norm: 0.9999989851872106, iteration: 28432
loss: 1.0090649127960205,grad_norm: 0.9999991207386831, iteration: 28433
loss: 1.0123707056045532,grad_norm: 0.9999991017977373, iteration: 28434
loss: 1.0158474445343018,grad_norm: 0.9999992279452219, iteration: 28435
loss: 1.0193490982055664,grad_norm: 0.9999991108044248, iteration: 28436
loss: 0.9971493482589722,grad_norm: 0.9188288927061519, iteration: 28437
loss: 0.9756543636322021,grad_norm: 0.9999989891332016, iteration: 28438
loss: 0.9584759473800659,grad_norm: 0.9584092673909717, iteration: 28439
loss: 1.0234969854354858,grad_norm: 0.9999990798704573, iteration: 28440
loss: 1.0200494527816772,grad_norm: 0.805753635037167, iteration: 28441
loss: 0.9759677648544312,grad_norm: 0.8037499720732061, iteration: 28442
loss: 1.0397671461105347,grad_norm: 0.9999996567851853, iteration: 28443
loss: 1.0023858547210693,grad_norm: 0.9999990667322851, iteration: 28444
loss: 1.056941032409668,grad_norm: 0.8659510346409963, iteration: 28445
loss: 1.0411320924758911,grad_norm: 0.9999991783823664, iteration: 28446
loss: 0.9699183702468872,grad_norm: 0.9285771708502123, iteration: 28447
loss: 1.0496326684951782,grad_norm: 0.999999111096789, iteration: 28448
loss: 0.9845720529556274,grad_norm: 0.9999991113335598, iteration: 28449
loss: 0.9994922876358032,grad_norm: 0.9047519371254994, iteration: 28450
loss: 0.9929312467575073,grad_norm: 0.9999992389774554, iteration: 28451
loss: 0.9772875308990479,grad_norm: 0.9999992304892075, iteration: 28452
loss: 0.9936780333518982,grad_norm: 0.9999993349844285, iteration: 28453
loss: 1.0095477104187012,grad_norm: 0.8728880149846299, iteration: 28454
loss: 1.0048236846923828,grad_norm: 0.999999061614931, iteration: 28455
loss: 1.0135716199874878,grad_norm: 0.9999993515308669, iteration: 28456
loss: 1.0282342433929443,grad_norm: 0.85241302483783, iteration: 28457
loss: 1.0190502405166626,grad_norm: 0.9999990993279456, iteration: 28458
loss: 1.0019854307174683,grad_norm: 0.8908312445746438, iteration: 28459
loss: 0.9808438420295715,grad_norm: 0.9999994825435934, iteration: 28460
loss: 1.0338276624679565,grad_norm: 0.9999990914902299, iteration: 28461
loss: 1.0113582611083984,grad_norm: 0.9390798533664041, iteration: 28462
loss: 1.0318409204483032,grad_norm: 0.9999992979431201, iteration: 28463
loss: 1.006593108177185,grad_norm: 0.7938376979134062, iteration: 28464
loss: 1.0147947072982788,grad_norm: 0.999999116762298, iteration: 28465
loss: 1.0284364223480225,grad_norm: 0.8095847864022756, iteration: 28466
loss: 1.0290813446044922,grad_norm: 0.8648643974848665, iteration: 28467
loss: 0.9516481161117554,grad_norm: 0.9999990377171725, iteration: 28468
loss: 0.9727516770362854,grad_norm: 0.9999991396223156, iteration: 28469
loss: 1.027449607849121,grad_norm: 0.9999992635301473, iteration: 28470
loss: 0.9833529591560364,grad_norm: 0.9999991336871327, iteration: 28471
loss: 1.0086770057678223,grad_norm: 0.9142365447578905, iteration: 28472
loss: 1.0492500066757202,grad_norm: 0.9999992176004319, iteration: 28473
loss: 0.9741993546485901,grad_norm: 0.9092275852722244, iteration: 28474
loss: 0.9719884395599365,grad_norm: 0.954871126918171, iteration: 28475
loss: 0.9655643701553345,grad_norm: 0.9999991131502709, iteration: 28476
loss: 1.0027161836624146,grad_norm: 0.8716623910807868, iteration: 28477
loss: 1.0046254396438599,grad_norm: 0.999999146200911, iteration: 28478
loss: 1.0444910526275635,grad_norm: 0.999999033577581, iteration: 28479
loss: 0.9910711050033569,grad_norm: 0.8025109036554084, iteration: 28480
loss: 1.037882924079895,grad_norm: 0.9999994483203642, iteration: 28481
loss: 1.0157854557037354,grad_norm: 0.7958245807675481, iteration: 28482
loss: 0.9799100160598755,grad_norm: 0.9500776701969844, iteration: 28483
loss: 0.9748846888542175,grad_norm: 0.9999994668679456, iteration: 28484
loss: 1.0281286239624023,grad_norm: 0.8852558770219741, iteration: 28485
loss: 0.9924038648605347,grad_norm: 0.9733952165097255, iteration: 28486
loss: 1.0186740159988403,grad_norm: 0.9999991306244568, iteration: 28487
loss: 1.0319266319274902,grad_norm: 0.9999991149959472, iteration: 28488
loss: 0.9982607960700989,grad_norm: 0.9398216812594006, iteration: 28489
loss: 1.00272798538208,grad_norm: 0.8427165330736843, iteration: 28490
loss: 1.0507477521896362,grad_norm: 0.9999995047174622, iteration: 28491
loss: 0.978464663028717,grad_norm: 0.9410596935870913, iteration: 28492
loss: 0.989770770072937,grad_norm: 0.9999991043791426, iteration: 28493
loss: 0.9761018753051758,grad_norm: 0.9999990956503996, iteration: 28494
loss: 0.9915719032287598,grad_norm: 0.9492405189571786, iteration: 28495
loss: 1.001036286354065,grad_norm: 0.9292671327572992, iteration: 28496
loss: 1.0212202072143555,grad_norm: 0.999999384126125, iteration: 28497
loss: 1.0090582370758057,grad_norm: 0.8228673064125703, iteration: 28498
loss: 1.0127531290054321,grad_norm: 0.8964087186178202, iteration: 28499
loss: 1.0466128587722778,grad_norm: 0.9999994508887936, iteration: 28500
loss: 1.0293117761611938,grad_norm: 0.9999989529650626, iteration: 28501
loss: 1.034843921661377,grad_norm: 0.9999991336199383, iteration: 28502
loss: 1.0220030546188354,grad_norm: 0.9999991689490463, iteration: 28503
loss: 1.015345811843872,grad_norm: 0.9464172097563724, iteration: 28504
loss: 1.0170423984527588,grad_norm: 0.9913557540277327, iteration: 28505
loss: 1.0129421949386597,grad_norm: 0.9999989949811499, iteration: 28506
loss: 1.0230443477630615,grad_norm: 0.999999129938621, iteration: 28507
loss: 1.068044662475586,grad_norm: 0.999999236478912, iteration: 28508
loss: 1.0400254726409912,grad_norm: 0.9999990504036897, iteration: 28509
loss: 1.0491993427276611,grad_norm: 0.9999989941107159, iteration: 28510
loss: 1.0261532068252563,grad_norm: 0.8050638444239099, iteration: 28511
loss: 0.9856336116790771,grad_norm: 0.996369360495407, iteration: 28512
loss: 1.0123533010482788,grad_norm: 0.8887072799552111, iteration: 28513
loss: 1.0008150339126587,grad_norm: 0.7832849283942167, iteration: 28514
loss: 1.0412853956222534,grad_norm: 0.9999992335734553, iteration: 28515
loss: 1.0116478204727173,grad_norm: 0.7840828261003521, iteration: 28516
loss: 1.0227257013320923,grad_norm: 0.9999991709582524, iteration: 28517
loss: 1.0016896724700928,grad_norm: 0.9999991144454118, iteration: 28518
loss: 0.9949222207069397,grad_norm: 0.953411448897313, iteration: 28519
loss: 0.9949630498886108,grad_norm: 0.9999990524128042, iteration: 28520
loss: 1.0021047592163086,grad_norm: 0.9999991836063578, iteration: 28521
loss: 0.9873540997505188,grad_norm: 0.9627695697949828, iteration: 28522
loss: 1.0101512670516968,grad_norm: 0.9999993333247692, iteration: 28523
loss: 1.0320972204208374,grad_norm: 0.9247229250723262, iteration: 28524
loss: 1.0434094667434692,grad_norm: 0.9999990327310293, iteration: 28525
loss: 1.0042108297348022,grad_norm: 0.9999992101610675, iteration: 28526
loss: 0.9729915261268616,grad_norm: 0.999999184153598, iteration: 28527
loss: 0.9884739518165588,grad_norm: 0.9999990191348401, iteration: 28528
loss: 0.9868633151054382,grad_norm: 0.9999991482703449, iteration: 28529
loss: 1.0121989250183105,grad_norm: 0.9029114135386042, iteration: 28530
loss: 1.00883948802948,grad_norm: 0.9999990569400096, iteration: 28531
loss: 1.036088228225708,grad_norm: 0.8543621378515746, iteration: 28532
loss: 0.9855089783668518,grad_norm: 0.9999990703127801, iteration: 28533
loss: 0.9594806432723999,grad_norm: 0.8954593840151914, iteration: 28534
loss: 0.993431568145752,grad_norm: 0.9278283509784319, iteration: 28535
loss: 1.0048675537109375,grad_norm: 0.9999991445866362, iteration: 28536
loss: 0.980394721031189,grad_norm: 0.9831360360795532, iteration: 28537
loss: 1.0004876852035522,grad_norm: 0.8557457241115171, iteration: 28538
loss: 0.9815057516098022,grad_norm: 0.8520612070042777, iteration: 28539
loss: 0.960518479347229,grad_norm: 0.9747341285730196, iteration: 28540
loss: 0.9499174356460571,grad_norm: 0.9129992726203228, iteration: 28541
loss: 1.0066795349121094,grad_norm: 0.9556826056573666, iteration: 28542
loss: 0.9904423356056213,grad_norm: 0.811147157179391, iteration: 28543
loss: 1.04133141040802,grad_norm: 0.9999991107021607, iteration: 28544
loss: 1.0672216415405273,grad_norm: 0.9999997515590726, iteration: 28545
loss: 0.9959805607795715,grad_norm: 0.9999991790700187, iteration: 28546
loss: 0.9824996590614319,grad_norm: 0.9999989369331944, iteration: 28547
loss: 0.986621618270874,grad_norm: 0.9999990674385337, iteration: 28548
loss: 1.0061107873916626,grad_norm: 0.9999991702952726, iteration: 28549
loss: 1.0246765613555908,grad_norm: 0.99999923158007, iteration: 28550
loss: 1.0375022888183594,grad_norm: 0.9999992035189962, iteration: 28551
loss: 0.9802308678627014,grad_norm: 0.999999372635569, iteration: 28552
loss: 1.010628342628479,grad_norm: 0.974215704897557, iteration: 28553
loss: 0.9970725774765015,grad_norm: 0.9999994463209491, iteration: 28554
loss: 1.0070288181304932,grad_norm: 0.9999990344365418, iteration: 28555
loss: 0.9772738218307495,grad_norm: 0.9999992660669251, iteration: 28556
loss: 1.0606193542480469,grad_norm: 0.9999989655757742, iteration: 28557
loss: 0.9845464825630188,grad_norm: 0.9907036106929393, iteration: 28558
loss: 0.9766315817832947,grad_norm: 0.8303053616766555, iteration: 28559
loss: 1.0060793161392212,grad_norm: 0.9999990344659179, iteration: 28560
loss: 1.005185842514038,grad_norm: 0.9999990106608461, iteration: 28561
loss: 1.0058773756027222,grad_norm: 0.8647729164857708, iteration: 28562
loss: 1.0263067483901978,grad_norm: 0.9999993930963716, iteration: 28563
loss: 0.9776804447174072,grad_norm: 0.9999990276654132, iteration: 28564
loss: 1.0190273523330688,grad_norm: 0.9999992509025261, iteration: 28565
loss: 1.0257174968719482,grad_norm: 0.9007520248889285, iteration: 28566
loss: 1.0096837282180786,grad_norm: 0.9758492042353791, iteration: 28567
loss: 0.9833738803863525,grad_norm: 0.8829854124225566, iteration: 28568
loss: 0.9856210350990295,grad_norm: 0.9999993594379264, iteration: 28569
loss: 1.015101671218872,grad_norm: 0.9999990320087211, iteration: 28570
loss: 0.9863160848617554,grad_norm: 0.9233277419405249, iteration: 28571
loss: 1.0398956537246704,grad_norm: 0.9364739621817562, iteration: 28572
loss: 0.9780223965644836,grad_norm: 0.9999992176154372, iteration: 28573
loss: 0.9645808339118958,grad_norm: 0.9849566105949562, iteration: 28574
loss: 1.0159841775894165,grad_norm: 0.9999991004816531, iteration: 28575
loss: 0.9879783391952515,grad_norm: 0.9999997673445321, iteration: 28576
loss: 0.9993718266487122,grad_norm: 0.9999991737668112, iteration: 28577
loss: 0.9906444549560547,grad_norm: 0.994978187271496, iteration: 28578
loss: 0.9842250943183899,grad_norm: 0.9601315074347249, iteration: 28579
loss: 0.9950515031814575,grad_norm: 0.9999990583238705, iteration: 28580
loss: 0.9919227957725525,grad_norm: 0.9999995978659795, iteration: 28581
loss: 1.0610101222991943,grad_norm: 0.9999992065428364, iteration: 28582
loss: 1.0353807210922241,grad_norm: 0.9999998611946617, iteration: 28583
loss: 1.0273314714431763,grad_norm: 0.9999991133837045, iteration: 28584
loss: 1.0290554761886597,grad_norm: 0.9999991231627356, iteration: 28585
loss: 0.9975970387458801,grad_norm: 0.9328479584503425, iteration: 28586
loss: 1.00681734085083,grad_norm: 0.9999991705312801, iteration: 28587
loss: 0.9887537360191345,grad_norm: 0.9631914662143589, iteration: 28588
loss: 1.0322000980377197,grad_norm: 0.9999990840167529, iteration: 28589
loss: 0.9858666062355042,grad_norm: 0.998196080178323, iteration: 28590
loss: 1.031030535697937,grad_norm: 0.9999991624852503, iteration: 28591
loss: 1.0499317646026611,grad_norm: 0.9999991797997445, iteration: 28592
loss: 0.9953718185424805,grad_norm: 0.9774093887021578, iteration: 28593
loss: 1.0297256708145142,grad_norm: 0.9945541011055757, iteration: 28594
loss: 1.053267478942871,grad_norm: 0.9999989970187384, iteration: 28595
loss: 1.0149165391921997,grad_norm: 0.9739265746063237, iteration: 28596
loss: 1.0055807828903198,grad_norm: 0.999999021114921, iteration: 28597
loss: 0.9972435235977173,grad_norm: 0.995709995268627, iteration: 28598
loss: 0.9980160593986511,grad_norm: 0.9999992377819743, iteration: 28599
loss: 0.9959052801132202,grad_norm: 0.9235958775709555, iteration: 28600
loss: 1.0500158071517944,grad_norm: 0.9999995801056991, iteration: 28601
loss: 0.9898760914802551,grad_norm: 0.9999992738100877, iteration: 28602
loss: 0.9771576523780823,grad_norm: 0.9999991458724232, iteration: 28603
loss: 0.9838522672653198,grad_norm: 0.9468090160734248, iteration: 28604
loss: 0.9897386431694031,grad_norm: 0.9999991599622314, iteration: 28605
loss: 0.9858513474464417,grad_norm: 0.9962262260488888, iteration: 28606
loss: 1.0066450834274292,grad_norm: 0.9697642438735949, iteration: 28607
loss: 1.0147818326950073,grad_norm: 0.9999990433468282, iteration: 28608
loss: 1.0299444198608398,grad_norm: 0.9999991005248564, iteration: 28609
loss: 1.0420539379119873,grad_norm: 0.9185853544439824, iteration: 28610
loss: 1.0088356733322144,grad_norm: 0.9851404226218199, iteration: 28611
loss: 1.0138869285583496,grad_norm: 0.9301309598626639, iteration: 28612
loss: 1.0302181243896484,grad_norm: 0.9999995883855475, iteration: 28613
loss: 1.068250298500061,grad_norm: 0.9999997383200347, iteration: 28614
loss: 1.002936840057373,grad_norm: 0.8901452416402766, iteration: 28615
loss: 1.0286571979522705,grad_norm: 0.9999990441179556, iteration: 28616
loss: 1.0206242799758911,grad_norm: 0.9999996219777452, iteration: 28617
loss: 0.9717985987663269,grad_norm: 0.9999991883356896, iteration: 28618
loss: 1.0083719491958618,grad_norm: 0.9860411691944251, iteration: 28619
loss: 1.0249841213226318,grad_norm: 0.9999991154800231, iteration: 28620
loss: 1.0151863098144531,grad_norm: 0.9999989755219043, iteration: 28621
loss: 1.0320401191711426,grad_norm: 0.9999991179445132, iteration: 28622
loss: 1.0105183124542236,grad_norm: 0.999999035396709, iteration: 28623
loss: 1.0494979619979858,grad_norm: 0.9999989562755571, iteration: 28624
loss: 1.0573794841766357,grad_norm: 0.9999993619807372, iteration: 28625
loss: 0.9843250513076782,grad_norm: 0.9645370632526957, iteration: 28626
loss: 1.0277671813964844,grad_norm: 0.8540231068086643, iteration: 28627
loss: 0.9799156188964844,grad_norm: 0.9970689631031238, iteration: 28628
loss: 1.0353323221206665,grad_norm: 0.9999995263040585, iteration: 28629
loss: 0.980568528175354,grad_norm: 0.9999991954450208, iteration: 28630
loss: 1.0120298862457275,grad_norm: 0.9999990708622899, iteration: 28631
loss: 0.9859133958816528,grad_norm: 0.9999995636573281, iteration: 28632
loss: 0.9707351326942444,grad_norm: 0.9239460366227457, iteration: 28633
loss: 1.0323680639266968,grad_norm: 0.999999179667077, iteration: 28634
loss: 1.0273761749267578,grad_norm: 0.8713971980470266, iteration: 28635
loss: 1.0224113464355469,grad_norm: 0.9999997856491831, iteration: 28636
loss: 0.9856417179107666,grad_norm: 0.9666832343910406, iteration: 28637
loss: 0.9906936883926392,grad_norm: 0.9236397895368205, iteration: 28638
loss: 1.0321764945983887,grad_norm: 0.9827459374860792, iteration: 28639
loss: 1.0320017337799072,grad_norm: 0.9297394203273854, iteration: 28640
loss: 1.092514991760254,grad_norm: 0.9999997902803766, iteration: 28641
loss: 1.0014185905456543,grad_norm: 0.8972623253075328, iteration: 28642
loss: 0.9775708317756653,grad_norm: 0.9999992952543825, iteration: 28643
loss: 1.025760293006897,grad_norm: 0.8669906570694964, iteration: 28644
loss: 1.0091232061386108,grad_norm: 0.9132333177218568, iteration: 28645
loss: 0.9830873608589172,grad_norm: 0.999999646020457, iteration: 28646
loss: 0.9735262989997864,grad_norm: 0.9701336210722551, iteration: 28647
loss: 1.006105661392212,grad_norm: 0.9999994000512277, iteration: 28648
loss: 1.0169936418533325,grad_norm: 0.9999992712626288, iteration: 28649
loss: 1.0223100185394287,grad_norm: 0.9999989851558699, iteration: 28650
loss: 0.9684955477714539,grad_norm: 0.9999992166464593, iteration: 28651
loss: 1.010327696800232,grad_norm: 0.9999990623032534, iteration: 28652
loss: 1.0645569562911987,grad_norm: 0.9999993941993898, iteration: 28653
loss: 0.9753885269165039,grad_norm: 0.999999066221466, iteration: 28654
loss: 0.9961404204368591,grad_norm: 0.9622254738479549, iteration: 28655
loss: 1.0043632984161377,grad_norm: 0.9024086961218651, iteration: 28656
loss: 0.9845208525657654,grad_norm: 0.9635500209613064, iteration: 28657
loss: 1.013611078262329,grad_norm: 0.9263788520011559, iteration: 28658
loss: 1.0048465728759766,grad_norm: 0.8902033163136088, iteration: 28659
loss: 1.0481629371643066,grad_norm: 0.9999990824830044, iteration: 28660
loss: 0.9477828145027161,grad_norm: 0.9730991253429123, iteration: 28661
loss: 0.9972823858261108,grad_norm: 0.999999127637778, iteration: 28662
loss: 1.051169514656067,grad_norm: 0.9999993488817026, iteration: 28663
loss: 1.0311834812164307,grad_norm: 0.9999995784902306, iteration: 28664
loss: 1.0234274864196777,grad_norm: 0.932380803528146, iteration: 28665
loss: 1.0410873889923096,grad_norm: 0.9999995822625019, iteration: 28666
loss: 1.0102742910385132,grad_norm: 0.9999991924830867, iteration: 28667
loss: 1.0045973062515259,grad_norm: 0.9334875711444718, iteration: 28668
loss: 1.0003246068954468,grad_norm: 0.9237080637492415, iteration: 28669
loss: 0.987417459487915,grad_norm: 0.9999991238379303, iteration: 28670
loss: 0.9922237992286682,grad_norm: 0.9999990510277846, iteration: 28671
loss: 1.0743927955627441,grad_norm: 0.9999992641003091, iteration: 28672
loss: 1.0047324895858765,grad_norm: 0.9999989469047019, iteration: 28673
loss: 0.9963676333427429,grad_norm: 0.8757759124435989, iteration: 28674
loss: 1.0131831169128418,grad_norm: 0.8859103031885637, iteration: 28675
loss: 0.9822080731391907,grad_norm: 0.7415713555515138, iteration: 28676
loss: 0.9712070822715759,grad_norm: 0.8114505851411105, iteration: 28677
loss: 1.0225131511688232,grad_norm: 0.9999993474556281, iteration: 28678
loss: 0.9755716919898987,grad_norm: 0.9999990774319105, iteration: 28679
loss: 0.9835425019264221,grad_norm: 0.9951993420358526, iteration: 28680
loss: 0.9938378930091858,grad_norm: 0.9999991302724288, iteration: 28681
loss: 0.9936741590499878,grad_norm: 0.9999992047144941, iteration: 28682
loss: 0.9846550226211548,grad_norm: 0.9999991648868422, iteration: 28683
loss: 1.0011783838272095,grad_norm: 0.9999990893171155, iteration: 28684
loss: 1.02109956741333,grad_norm: 0.9999995301474434, iteration: 28685
loss: 1.0295591354370117,grad_norm: 0.9601155561952428, iteration: 28686
loss: 1.020051121711731,grad_norm: 0.9999990145198694, iteration: 28687
loss: 1.0019642114639282,grad_norm: 0.9562770121032427, iteration: 28688
loss: 1.0192149877548218,grad_norm: 0.9999992100550916, iteration: 28689
loss: 0.9864124059677124,grad_norm: 0.9999990293631901, iteration: 28690
loss: 1.0193910598754883,grad_norm: 0.99999910381575, iteration: 28691
loss: 1.0637543201446533,grad_norm: 0.9999992463583521, iteration: 28692
loss: 0.9821904301643372,grad_norm: 0.9563651609329848, iteration: 28693
loss: 1.0089462995529175,grad_norm: 0.9999990633868961, iteration: 28694
loss: 1.0294965505599976,grad_norm: 0.9999992878001509, iteration: 28695
loss: 1.0083179473876953,grad_norm: 0.8427968472481352, iteration: 28696
loss: 0.9526860117912292,grad_norm: 0.9776007439141227, iteration: 28697
loss: 1.0138744115829468,grad_norm: 0.9999992294286355, iteration: 28698
loss: 1.0205539464950562,grad_norm: 0.9999990560147366, iteration: 28699
loss: 0.9898495674133301,grad_norm: 0.92252678998307, iteration: 28700
loss: 1.0321872234344482,grad_norm: 0.9894385077428295, iteration: 28701
loss: 0.9843899011611938,grad_norm: 0.9999991332806234, iteration: 28702
loss: 1.030245304107666,grad_norm: 0.9999992191403149, iteration: 28703
loss: 1.0084716081619263,grad_norm: 0.934297298522937, iteration: 28704
loss: 0.9954091906547546,grad_norm: 0.8431393815942285, iteration: 28705
loss: 1.0410445928573608,grad_norm: 0.9999995018886386, iteration: 28706
loss: 1.0021982192993164,grad_norm: 0.9999991798317223, iteration: 28707
loss: 1.0199276208877563,grad_norm: 0.9999993203972943, iteration: 28708
loss: 1.0143051147460938,grad_norm: 0.999999220295325, iteration: 28709
loss: 0.9887601137161255,grad_norm: 0.9999990588502562, iteration: 28710
loss: 0.9689107537269592,grad_norm: 0.9351673879867692, iteration: 28711
loss: 1.0153237581253052,grad_norm: 0.9175879398388536, iteration: 28712
loss: 0.9939234852790833,grad_norm: 0.9999993602044596, iteration: 28713
loss: 1.0134942531585693,grad_norm: 0.9999990097404438, iteration: 28714
loss: 0.9912051558494568,grad_norm: 0.8857016126883643, iteration: 28715
loss: 0.9931332468986511,grad_norm: 0.9860191012996147, iteration: 28716
loss: 1.0188307762145996,grad_norm: 0.8682880278492386, iteration: 28717
loss: 0.9818321466445923,grad_norm: 0.9533153926905781, iteration: 28718
loss: 1.0299421548843384,grad_norm: 0.9571835959500085, iteration: 28719
loss: 1.0302482843399048,grad_norm: 0.9371530054130564, iteration: 28720
loss: 0.9974708557128906,grad_norm: 0.924977473885151, iteration: 28721
loss: 1.0250682830810547,grad_norm: 0.9999992195062568, iteration: 28722
loss: 0.9706495404243469,grad_norm: 0.9999991240200993, iteration: 28723
loss: 0.996094286441803,grad_norm: 0.9999991048602407, iteration: 28724
loss: 0.9881393909454346,grad_norm: 0.8533277126056302, iteration: 28725
loss: 0.9546216130256653,grad_norm: 0.9943959155992366, iteration: 28726
loss: 1.0268011093139648,grad_norm: 0.934705746982923, iteration: 28727
loss: 0.9944536089897156,grad_norm: 0.882976683096005, iteration: 28728
loss: 1.0074909925460815,grad_norm: 0.9999992362110711, iteration: 28729
loss: 1.0083287954330444,grad_norm: 0.84392624104304, iteration: 28730
loss: 1.0094140768051147,grad_norm: 0.9657844260772879, iteration: 28731
loss: 0.9903120994567871,grad_norm: 0.950299377408724, iteration: 28732
loss: 1.000091552734375,grad_norm: 0.8553312688469404, iteration: 28733
loss: 1.0285242795944214,grad_norm: 0.9999991640709878, iteration: 28734
loss: 1.050992727279663,grad_norm: 0.880090659695702, iteration: 28735
loss: 1.0140042304992676,grad_norm: 0.8249003155010186, iteration: 28736
loss: 1.0158238410949707,grad_norm: 0.913474890020737, iteration: 28737
loss: 0.9945817589759827,grad_norm: 0.7885202361622748, iteration: 28738
loss: 1.0128240585327148,grad_norm: 0.9626169604810181, iteration: 28739
loss: 1.029692530632019,grad_norm: 0.9999990874601151, iteration: 28740
loss: 1.0023066997528076,grad_norm: 0.9999991483048671, iteration: 28741
loss: 1.0300321578979492,grad_norm: 0.9999992177797143, iteration: 28742
loss: 0.9847866892814636,grad_norm: 0.9999990587480898, iteration: 28743
loss: 0.9637035131454468,grad_norm: 0.9999990545273526, iteration: 28744
loss: 0.9972405433654785,grad_norm: 0.9999991468651703, iteration: 28745
loss: 0.9945526123046875,grad_norm: 0.9999991513163149, iteration: 28746
loss: 1.046498417854309,grad_norm: 0.9999992366622814, iteration: 28747
loss: 0.9820636510848999,grad_norm: 0.9999992551226252, iteration: 28748
loss: 1.0044481754302979,grad_norm: 0.9999991587055649, iteration: 28749
loss: 1.0142585039138794,grad_norm: 0.9659039298861336, iteration: 28750
loss: 0.9955217838287354,grad_norm: 0.9999991572304603, iteration: 28751
loss: 1.0023081302642822,grad_norm: 0.9459483070507402, iteration: 28752
loss: 0.9819622039794922,grad_norm: 0.9218131089842554, iteration: 28753
loss: 0.98912513256073,grad_norm: 0.9852563869675675, iteration: 28754
loss: 1.0884519815444946,grad_norm: 0.9999996684352793, iteration: 28755
loss: 0.9907983541488647,grad_norm: 0.9999992683950643, iteration: 28756
loss: 1.0211952924728394,grad_norm: 0.9999996804685799, iteration: 28757
loss: 1.0305297374725342,grad_norm: 0.9868076024195391, iteration: 28758
loss: 0.9581055641174316,grad_norm: 0.910951517517672, iteration: 28759
loss: 0.9996531009674072,grad_norm: 0.9999991512240378, iteration: 28760
loss: 0.9987345933914185,grad_norm: 0.8777496212415977, iteration: 28761
loss: 0.9793012142181396,grad_norm: 0.900928894683534, iteration: 28762
loss: 0.9829980134963989,grad_norm: 0.9999991832040158, iteration: 28763
loss: 1.0076466798782349,grad_norm: 0.9999992450597368, iteration: 28764
loss: 1.003901481628418,grad_norm: 0.9999991331285527, iteration: 28765
loss: 1.0382170677185059,grad_norm: 0.9999991015851695, iteration: 28766
loss: 1.050466775894165,grad_norm: 0.9497623415937718, iteration: 28767
loss: 1.0167689323425293,grad_norm: 0.9999993870220177, iteration: 28768
loss: 1.0129135847091675,grad_norm: 0.9585280790794538, iteration: 28769
loss: 1.016913652420044,grad_norm: 0.9949586076346193, iteration: 28770
loss: 1.0251816511154175,grad_norm: 0.9453753029464911, iteration: 28771
loss: 1.0146923065185547,grad_norm: 0.9999993928549001, iteration: 28772
loss: 0.9537869095802307,grad_norm: 0.8875892538274135, iteration: 28773
loss: 0.990405261516571,grad_norm: 0.9659926304693486, iteration: 28774
loss: 1.0209509134292603,grad_norm: 0.9999991879382977, iteration: 28775
loss: 0.9969468116760254,grad_norm: 0.9999991815449746, iteration: 28776
loss: 1.0381858348846436,grad_norm: 0.786131424986553, iteration: 28777
loss: 1.0159482955932617,grad_norm: 0.9999991834736558, iteration: 28778
loss: 0.9850958585739136,grad_norm: 0.9999992712885991, iteration: 28779
loss: 1.0189268589019775,grad_norm: 0.9978038834739069, iteration: 28780
loss: 0.978566586971283,grad_norm: 0.929777939489096, iteration: 28781
loss: 1.0136022567749023,grad_norm: 0.9999992834125837, iteration: 28782
loss: 0.9892904758453369,grad_norm: 0.9106411177854883, iteration: 28783
loss: 1.0369426012039185,grad_norm: 0.9999990313778343, iteration: 28784
loss: 0.9960705637931824,grad_norm: 0.9999992383436791, iteration: 28785
loss: 1.0269978046417236,grad_norm: 0.9999991482365249, iteration: 28786
loss: 0.9912306070327759,grad_norm: 0.9938231789697355, iteration: 28787
loss: 0.9907614588737488,grad_norm: 0.9999992926936251, iteration: 28788
loss: 1.0117816925048828,grad_norm: 0.9999990736392622, iteration: 28789
loss: 1.0607534646987915,grad_norm: 0.999999198623462, iteration: 28790
loss: 1.0126162767410278,grad_norm: 0.9999991886083135, iteration: 28791
loss: 1.0570790767669678,grad_norm: 0.9999992420672673, iteration: 28792
loss: 0.9830264449119568,grad_norm: 0.9999991225264226, iteration: 28793
loss: 0.9822644591331482,grad_norm: 0.9999990500678607, iteration: 28794
loss: 1.0382732152938843,grad_norm: 0.9536183561254615, iteration: 28795
loss: 0.9637289047241211,grad_norm: 0.9999991170263111, iteration: 28796
loss: 1.0176019668579102,grad_norm: 0.9997675718412992, iteration: 28797
loss: 0.937185525894165,grad_norm: 0.9275276098324078, iteration: 28798
loss: 0.9976396560668945,grad_norm: 0.9999991670116618, iteration: 28799
loss: 0.9955575466156006,grad_norm: 0.9999992204100929, iteration: 28800
loss: 1.01426100730896,grad_norm: 0.9999990193864563, iteration: 28801
loss: 0.9946936964988708,grad_norm: 0.9999990186799225, iteration: 28802
loss: 1.0126103162765503,grad_norm: 0.9999992292585187, iteration: 28803
loss: 0.9898962378501892,grad_norm: 0.9999990683793173, iteration: 28804
loss: 0.9667639136314392,grad_norm: 0.9999991499040916, iteration: 28805
loss: 1.0265635251998901,grad_norm: 0.8844827318822968, iteration: 28806
loss: 1.00908362865448,grad_norm: 0.8710353149349066, iteration: 28807
loss: 1.0286304950714111,grad_norm: 0.9293487771909731, iteration: 28808
loss: 1.0411522388458252,grad_norm: 0.9222757093129043, iteration: 28809
loss: 1.0377280712127686,grad_norm: 0.9999993985544336, iteration: 28810
loss: 1.0148159265518188,grad_norm: 0.9999991606814402, iteration: 28811
loss: 1.025978446006775,grad_norm: 0.9999998303471681, iteration: 28812
loss: 1.0120725631713867,grad_norm: 0.9039975965800017, iteration: 28813
loss: 1.0337117910385132,grad_norm: 0.9999992051652614, iteration: 28814
loss: 1.0210225582122803,grad_norm: 0.999999029216606, iteration: 28815
loss: 1.02729332447052,grad_norm: 0.9999990738980697, iteration: 28816
loss: 0.9998552799224854,grad_norm: 0.9999991853967586, iteration: 28817
loss: 0.9723494648933411,grad_norm: 0.9999990568066454, iteration: 28818
loss: 1.0136570930480957,grad_norm: 0.9999990827807411, iteration: 28819
loss: 0.9854965209960938,grad_norm: 0.9999992780620043, iteration: 28820
loss: 1.0082899332046509,grad_norm: 0.9999991616830339, iteration: 28821
loss: 0.9878346920013428,grad_norm: 0.8587482079569924, iteration: 28822
loss: 1.0104498863220215,grad_norm: 0.988578696711971, iteration: 28823
loss: 1.0215004682540894,grad_norm: 0.9584121714797266, iteration: 28824
loss: 0.9963437914848328,grad_norm: 0.9570094021362257, iteration: 28825
loss: 1.0336594581604004,grad_norm: 0.9511232701908195, iteration: 28826
loss: 0.9524205923080444,grad_norm: 0.9643683483698324, iteration: 28827
loss: 1.0403259992599487,grad_norm: 0.9999989775045062, iteration: 28828
loss: 1.0090782642364502,grad_norm: 0.9571610950479529, iteration: 28829
loss: 1.0428496599197388,grad_norm: 0.9999992146021188, iteration: 28830
loss: 0.9681483507156372,grad_norm: 0.9999988857623392, iteration: 28831
loss: 0.9778323769569397,grad_norm: 0.9586522151530539, iteration: 28832
loss: 1.0413427352905273,grad_norm: 0.9999992003060827, iteration: 28833
loss: 0.9807854890823364,grad_norm: 0.9999991363357932, iteration: 28834
loss: 0.9993627667427063,grad_norm: 0.9999990762026542, iteration: 28835
loss: 1.0179424285888672,grad_norm: 0.9999991890017174, iteration: 28836
loss: 1.038411021232605,grad_norm: 0.9999990528101996, iteration: 28837
loss: 0.996447741985321,grad_norm: 0.9434773402397244, iteration: 28838
loss: 0.9775975346565247,grad_norm: 0.9999992208289501, iteration: 28839
loss: 0.9921663999557495,grad_norm: 0.9999990273372351, iteration: 28840
loss: 0.9660924673080444,grad_norm: 0.9410130974447045, iteration: 28841
loss: 0.9867849349975586,grad_norm: 0.9999991772439537, iteration: 28842
loss: 1.0053701400756836,grad_norm: 0.9999994637989507, iteration: 28843
loss: 1.0040925741195679,grad_norm: 0.7944463013421428, iteration: 28844
loss: 1.042847990989685,grad_norm: 0.9999991600057778, iteration: 28845
loss: 1.0176366567611694,grad_norm: 0.9999992764324156, iteration: 28846
loss: 0.9992787837982178,grad_norm: 0.9999990313135754, iteration: 28847
loss: 1.017762303352356,grad_norm: 0.8597345312552883, iteration: 28848
loss: 1.0576146841049194,grad_norm: 0.8375353505458544, iteration: 28849
loss: 1.0417217016220093,grad_norm: 0.999999967738829, iteration: 28850
loss: 1.006782054901123,grad_norm: 0.894842011808449, iteration: 28851
loss: 1.0052490234375,grad_norm: 0.9999989766318247, iteration: 28852
loss: 1.04853355884552,grad_norm: 0.9930410493949162, iteration: 28853
loss: 1.0082718133926392,grad_norm: 0.9551078838741069, iteration: 28854
loss: 0.9988943934440613,grad_norm: 0.9999991335188033, iteration: 28855
loss: 1.025668740272522,grad_norm: 0.910758931052816, iteration: 28856
loss: 0.9867526888847351,grad_norm: 0.999998949763272, iteration: 28857
loss: 0.9790759682655334,grad_norm: 0.8565834550529801, iteration: 28858
loss: 1.0164239406585693,grad_norm: 0.9999992219540019, iteration: 28859
loss: 0.996880292892456,grad_norm: 0.9061089845418783, iteration: 28860
loss: 1.0034215450286865,grad_norm: 0.8255033361063233, iteration: 28861
loss: 0.9800204634666443,grad_norm: 0.9572283621861538, iteration: 28862
loss: 1.0133014917373657,grad_norm: 0.9116098861947498, iteration: 28863
loss: 0.9872732162475586,grad_norm: 0.8344677663940416, iteration: 28864
loss: 1.0165324211120605,grad_norm: 0.9999991364556111, iteration: 28865
loss: 1.0075409412384033,grad_norm: 0.9298649075773113, iteration: 28866
loss: 1.0106974840164185,grad_norm: 0.7841224098928647, iteration: 28867
loss: 1.0039514303207397,grad_norm: 0.9330186206684555, iteration: 28868
loss: 1.010140299797058,grad_norm: 0.9999990737934771, iteration: 28869
loss: 0.9730425477027893,grad_norm: 0.9710845967443985, iteration: 28870
loss: 0.9998124837875366,grad_norm: 0.999999086027376, iteration: 28871
loss: 1.0258831977844238,grad_norm: 0.9158553218825247, iteration: 28872
loss: 1.0340293645858765,grad_norm: 0.9999998321210719, iteration: 28873
loss: 0.9826407432556152,grad_norm: 0.955253298155255, iteration: 28874
loss: 0.9954926371574402,grad_norm: 0.9999991913108135, iteration: 28875
loss: 1.026755928993225,grad_norm: 0.9349521507955837, iteration: 28876
loss: 1.0433495044708252,grad_norm: 0.9999992725732052, iteration: 28877
loss: 1.0383623838424683,grad_norm: 0.8427215486881131, iteration: 28878
loss: 1.0516024827957153,grad_norm: 0.9999998858034934, iteration: 28879
loss: 1.0018354654312134,grad_norm: 0.9999991820112549, iteration: 28880
loss: 1.012882947921753,grad_norm: 0.887933953268914, iteration: 28881
loss: 1.0245949029922485,grad_norm: 0.9872824176176044, iteration: 28882
loss: 0.9994031190872192,grad_norm: 0.8685491650630807, iteration: 28883
loss: 1.0513825416564941,grad_norm: 0.948489251281488, iteration: 28884
loss: 0.9811955690383911,grad_norm: 0.9999992409676928, iteration: 28885
loss: 1.005223274230957,grad_norm: 0.9535327627847202, iteration: 28886
loss: 1.0126416683197021,grad_norm: 0.9088027209378289, iteration: 28887
loss: 1.0264261960983276,grad_norm: 0.9999990247498843, iteration: 28888
loss: 1.0332602262496948,grad_norm: 0.9999998302262977, iteration: 28889
loss: 1.0331982374191284,grad_norm: 0.9999997779519485, iteration: 28890
loss: 1.0016136169433594,grad_norm: 0.9791261924731326, iteration: 28891
loss: 0.9842216968536377,grad_norm: 0.9165994226885417, iteration: 28892
loss: 0.9952734708786011,grad_norm: 0.9999991428258669, iteration: 28893
loss: 1.0054948329925537,grad_norm: 0.913782505080246, iteration: 28894
loss: 1.017943024635315,grad_norm: 0.8849676233079672, iteration: 28895
loss: 1.0093414783477783,grad_norm: 0.9999993035285571, iteration: 28896
loss: 1.018690824508667,grad_norm: 0.9999991194322113, iteration: 28897
loss: 1.0361489057540894,grad_norm: 0.9909306494586908, iteration: 28898
loss: 0.9966828227043152,grad_norm: 0.9313611594914559, iteration: 28899
loss: 1.0121605396270752,grad_norm: 0.8668606001998713, iteration: 28900
loss: 0.989483654499054,grad_norm: 0.914479375730865, iteration: 28901
loss: 0.9921806454658508,grad_norm: 0.9737248511928193, iteration: 28902
loss: 0.9684146642684937,grad_norm: 0.9167842146310553, iteration: 28903
loss: 1.0185433626174927,grad_norm: 0.9999989793511888, iteration: 28904
loss: 1.0247526168823242,grad_norm: 0.9999991695283926, iteration: 28905
loss: 1.010689377784729,grad_norm: 0.9999993360502816, iteration: 28906
loss: 1.0177485942840576,grad_norm: 0.8770479504669164, iteration: 28907
loss: 0.9651674032211304,grad_norm: 0.9999992101043081, iteration: 28908
loss: 1.0216642618179321,grad_norm: 0.9999990072935566, iteration: 28909
loss: 1.0058308839797974,grad_norm: 0.8700218100174885, iteration: 28910
loss: 1.009167194366455,grad_norm: 0.9999990442734233, iteration: 28911
loss: 0.9961386919021606,grad_norm: 0.9710449556443249, iteration: 28912
loss: 1.0042781829833984,grad_norm: 0.9999991364477703, iteration: 28913
loss: 0.99040287733078,grad_norm: 0.8704316089785625, iteration: 28914
loss: 1.0776689052581787,grad_norm: 0.999999253537978, iteration: 28915
loss: 0.9869210720062256,grad_norm: 0.8833494777654978, iteration: 28916
loss: 1.016477346420288,grad_norm: 0.8553411012396188, iteration: 28917
loss: 0.9948732256889343,grad_norm: 0.8392647924473156, iteration: 28918
loss: 0.9946712255477905,grad_norm: 0.86848856186136, iteration: 28919
loss: 1.0161170959472656,grad_norm: 0.97037760776912, iteration: 28920
loss: 1.0363799333572388,grad_norm: 0.9999995640958002, iteration: 28921
loss: 0.9930477142333984,grad_norm: 0.9999990982376651, iteration: 28922
loss: 0.9980595707893372,grad_norm: 0.9435469486786702, iteration: 28923
loss: 0.9875997304916382,grad_norm: 0.999999253945067, iteration: 28924
loss: 1.0063223838806152,grad_norm: 0.9999991263684215, iteration: 28925
loss: 0.9777553677558899,grad_norm: 0.9641354544517424, iteration: 28926
loss: 1.0511255264282227,grad_norm: 0.9999997531306326, iteration: 28927
loss: 1.0096474885940552,grad_norm: 0.9999990984208458, iteration: 28928
loss: 1.0350091457366943,grad_norm: 0.9999994849674124, iteration: 28929
loss: 1.012318730354309,grad_norm: 0.7826073219237112, iteration: 28930
loss: 0.9934995174407959,grad_norm: 0.8773427781130323, iteration: 28931
loss: 0.9948722720146179,grad_norm: 0.9999990388343305, iteration: 28932
loss: 0.9655614495277405,grad_norm: 0.9999990736659496, iteration: 28933
loss: 1.0404257774353027,grad_norm: 0.9999991717581089, iteration: 28934
loss: 1.0382055044174194,grad_norm: 0.953924124722439, iteration: 28935
loss: 0.9716809988021851,grad_norm: 0.785463444273152, iteration: 28936
loss: 0.9755921959877014,grad_norm: 0.7567352156172839, iteration: 28937
loss: 0.994074821472168,grad_norm: 0.9921820996856549, iteration: 28938
loss: 1.0223913192749023,grad_norm: 0.9999992365201682, iteration: 28939
loss: 0.9805362224578857,grad_norm: 0.9999990613795217, iteration: 28940
loss: 1.0861232280731201,grad_norm: 0.9999996116382114, iteration: 28941
loss: 0.9968621730804443,grad_norm: 0.9636203582918855, iteration: 28942
loss: 1.020737648010254,grad_norm: 0.9999993542080493, iteration: 28943
loss: 1.0334453582763672,grad_norm: 0.9999991640854704, iteration: 28944
loss: 0.9444416761398315,grad_norm: 0.9999991354657461, iteration: 28945
loss: 1.0078753232955933,grad_norm: 0.9318385650193136, iteration: 28946
loss: 0.9863023161888123,grad_norm: 0.9999992281803224, iteration: 28947
loss: 0.9656099081039429,grad_norm: 0.9999991193930872, iteration: 28948
loss: 1.0117868185043335,grad_norm: 0.9999990093887173, iteration: 28949
loss: 0.9860807657241821,grad_norm: 0.9999991944795354, iteration: 28950
loss: 1.0260881185531616,grad_norm: 0.9999990976504808, iteration: 28951
loss: 1.0171514749526978,grad_norm: 0.8748710199078464, iteration: 28952
loss: 0.9681176543235779,grad_norm: 0.8729343271722756, iteration: 28953
loss: 0.9583449363708496,grad_norm: 0.8300152103414412, iteration: 28954
loss: 1.0068786144256592,grad_norm: 0.9244916231967827, iteration: 28955
loss: 1.0392348766326904,grad_norm: 0.9999993060028597, iteration: 28956
loss: 1.03462553024292,grad_norm: 0.9999991596336698, iteration: 28957
loss: 1.0368143320083618,grad_norm: 0.9999992137892305, iteration: 28958
loss: 1.0116733312606812,grad_norm: 0.9999991883904563, iteration: 28959
loss: 1.0114405155181885,grad_norm: 0.9782510553534479, iteration: 28960
loss: 1.0446666479110718,grad_norm: 0.9999996757919452, iteration: 28961
loss: 0.9864009618759155,grad_norm: 0.9999992172808416, iteration: 28962
loss: 1.0241475105285645,grad_norm: 0.9807058097802371, iteration: 28963
loss: 1.0265029668807983,grad_norm: 0.9999991361607791, iteration: 28964
loss: 1.0635862350463867,grad_norm: 0.9706837127673229, iteration: 28965
loss: 1.00468111038208,grad_norm: 0.8602007325939823, iteration: 28966
loss: 1.0139496326446533,grad_norm: 0.8350366964384981, iteration: 28967
loss: 1.0282659530639648,grad_norm: 0.9999990564148874, iteration: 28968
loss: 0.9946528673171997,grad_norm: 0.8030842877877373, iteration: 28969
loss: 1.0407644510269165,grad_norm: 0.9999993299435618, iteration: 28970
loss: 0.9455345869064331,grad_norm: 0.9999991650806659, iteration: 28971
loss: 0.9899938106536865,grad_norm: 0.9160441802343343, iteration: 28972
loss: 1.0016746520996094,grad_norm: 0.7847661031047208, iteration: 28973
loss: 1.0197299718856812,grad_norm: 0.9999989933299628, iteration: 28974
loss: 1.0045989751815796,grad_norm: 0.9999990760656761, iteration: 28975
loss: 0.9858893156051636,grad_norm: 0.9999990876850386, iteration: 28976
loss: 0.9832137227058411,grad_norm: 0.806979729714643, iteration: 28977
loss: 0.9803482890129089,grad_norm: 0.9999991004346902, iteration: 28978
loss: 0.9992135763168335,grad_norm: 0.9999991736356588, iteration: 28979
loss: 0.9770928025245667,grad_norm: 0.9837799906109506, iteration: 28980
loss: 0.9950040578842163,grad_norm: 0.9999991267085974, iteration: 28981
loss: 1.0118203163146973,grad_norm: 0.9999992744260305, iteration: 28982
loss: 1.0247488021850586,grad_norm: 0.986975648185148, iteration: 28983
loss: 0.9590853452682495,grad_norm: 0.9999992603350889, iteration: 28984
loss: 1.0116755962371826,grad_norm: 0.9789406073044064, iteration: 28985
loss: 1.0399895906448364,grad_norm: 0.9216278057626492, iteration: 28986
loss: 0.9911403656005859,grad_norm: 0.938086209008293, iteration: 28987
loss: 1.0195033550262451,grad_norm: 0.999999050241282, iteration: 28988
loss: 1.0034126043319702,grad_norm: 0.999999063679401, iteration: 28989
loss: 1.0531132221221924,grad_norm: 0.9999997949614985, iteration: 28990
loss: 0.9857003688812256,grad_norm: 0.9999991664205434, iteration: 28991
loss: 1.0057549476623535,grad_norm: 0.859513213182926, iteration: 28992
loss: 1.000354290008545,grad_norm: 0.9999991955308712, iteration: 28993
loss: 1.0328449010849,grad_norm: 0.9055621514217347, iteration: 28994
loss: 0.9931786060333252,grad_norm: 0.9999995572777349, iteration: 28995
loss: 1.0122385025024414,grad_norm: 0.938043917377076, iteration: 28996
loss: 1.0297157764434814,grad_norm: 0.9841592179983036, iteration: 28997
loss: 0.978890597820282,grad_norm: 0.918882411556522, iteration: 28998
loss: 0.9736171960830688,grad_norm: 0.8599569360234028, iteration: 28999
loss: 0.9807062149047852,grad_norm: 0.9999992009726907, iteration: 29000
loss: 0.980695366859436,grad_norm: 0.9135752890599541, iteration: 29001
loss: 1.0020856857299805,grad_norm: 0.9357401216835376, iteration: 29002
loss: 1.0336312055587769,grad_norm: 0.9867417089997896, iteration: 29003
loss: 1.0324127674102783,grad_norm: 0.9999990916092449, iteration: 29004
loss: 1.000911831855774,grad_norm: 0.9369528070954964, iteration: 29005
loss: 1.0136057138442993,grad_norm: 0.8203650149703634, iteration: 29006
loss: 1.062558889389038,grad_norm: 0.8581915660808577, iteration: 29007
loss: 1.0032622814178467,grad_norm: 0.9361808823080642, iteration: 29008
loss: 1.0133029222488403,grad_norm: 0.9757783176106385, iteration: 29009
loss: 1.035611867904663,grad_norm: 0.9109977142598991, iteration: 29010
loss: 1.022628903388977,grad_norm: 0.8388524786389459, iteration: 29011
loss: 1.0036908388137817,grad_norm: 0.913734979649264, iteration: 29012
loss: 1.0075761079788208,grad_norm: 0.9999990361432688, iteration: 29013
loss: 1.0021377801895142,grad_norm: 0.9999992674169034, iteration: 29014
loss: 0.9912599325180054,grad_norm: 0.9397542021303604, iteration: 29015
loss: 1.0321128368377686,grad_norm: 0.965341804185661, iteration: 29016
loss: 0.9735064506530762,grad_norm: 0.9999992110726599, iteration: 29017
loss: 1.007131576538086,grad_norm: 0.9999993059036006, iteration: 29018
loss: 1.0190783739089966,grad_norm: 0.9999991948843608, iteration: 29019
loss: 0.9928302764892578,grad_norm: 0.9999990452413091, iteration: 29020
loss: 1.0379799604415894,grad_norm: 0.9999990821290181, iteration: 29021
loss: 0.9742527604103088,grad_norm: 0.918130533905148, iteration: 29022
loss: 1.035221815109253,grad_norm: 0.9718812912416681, iteration: 29023
loss: 1.038633108139038,grad_norm: 0.9999989779675625, iteration: 29024
loss: 1.0345404148101807,grad_norm: 0.9637952802879545, iteration: 29025
loss: 1.0349403619766235,grad_norm: 0.9195452925863886, iteration: 29026
loss: 0.9944745898246765,grad_norm: 0.9277566274859745, iteration: 29027
loss: 1.0001673698425293,grad_norm: 0.8571939419017389, iteration: 29028
loss: 1.0223907232284546,grad_norm: 0.9999991233333426, iteration: 29029
loss: 1.024151086807251,grad_norm: 0.835920991240827, iteration: 29030
loss: 1.033242106437683,grad_norm: 0.9176560490985641, iteration: 29031
loss: 1.0268187522888184,grad_norm: 0.9678956291336595, iteration: 29032
loss: 1.0352517366409302,grad_norm: 0.999999045182166, iteration: 29033
loss: 0.9824354648590088,grad_norm: 0.9012557225935063, iteration: 29034
loss: 1.038809895515442,grad_norm: 0.999999166755365, iteration: 29035
loss: 0.9811409711837769,grad_norm: 0.8198150223620887, iteration: 29036
loss: 1.0426433086395264,grad_norm: 0.9999991365586705, iteration: 29037
loss: 1.023236870765686,grad_norm: 0.9999995416316708, iteration: 29038
loss: 1.00754976272583,grad_norm: 0.9999990847100718, iteration: 29039
loss: 1.0494526624679565,grad_norm: 0.8747754341190664, iteration: 29040
loss: 0.9979892373085022,grad_norm: 0.9999989679360344, iteration: 29041
loss: 1.0141558647155762,grad_norm: 0.999999073724479, iteration: 29042
loss: 1.0077242851257324,grad_norm: 0.9999994452761655, iteration: 29043
loss: 1.0209048986434937,grad_norm: 0.9533012598844339, iteration: 29044
loss: 1.0225032567977905,grad_norm: 0.9999994358409177, iteration: 29045
loss: 1.0039582252502441,grad_norm: 0.999999038386922, iteration: 29046
loss: 0.9610139727592468,grad_norm: 0.9413287982577649, iteration: 29047
loss: 1.0371280908584595,grad_norm: 0.9999990925437897, iteration: 29048
loss: 0.9310851097106934,grad_norm: 0.9999990059973338, iteration: 29049
loss: 0.9827052354812622,grad_norm: 0.9008794974316658, iteration: 29050
loss: 1.0000933408737183,grad_norm: 0.9791844658605101, iteration: 29051
loss: 1.0468045473098755,grad_norm: 0.9999993873690013, iteration: 29052
loss: 1.009185791015625,grad_norm: 0.915881416273663, iteration: 29053
loss: 0.9782087206840515,grad_norm: 0.9133699080630656, iteration: 29054
loss: 1.0466150045394897,grad_norm: 0.999999247087762, iteration: 29055
loss: 0.9724287390708923,grad_norm: 0.9999992379278029, iteration: 29056
loss: 1.0175223350524902,grad_norm: 0.9999991608905328, iteration: 29057
loss: 1.001096248626709,grad_norm: 0.9999992640653774, iteration: 29058
loss: 1.0070316791534424,grad_norm: 0.760321115595567, iteration: 29059
loss: 1.0300076007843018,grad_norm: 0.9253735072485427, iteration: 29060
loss: 1.0415353775024414,grad_norm: 0.9999992524869343, iteration: 29061
loss: 0.9893696308135986,grad_norm: 0.9999990390454755, iteration: 29062
loss: 0.9744871854782104,grad_norm: 0.9999997868336006, iteration: 29063
loss: 1.0248855352401733,grad_norm: 0.9999992368628695, iteration: 29064
loss: 0.9936155080795288,grad_norm: 0.934915852836249, iteration: 29065
loss: 1.0658916234970093,grad_norm: 0.9999997620104345, iteration: 29066
loss: 1.0205005407333374,grad_norm: 0.8767574508800355, iteration: 29067
loss: 1.0222959518432617,grad_norm: 0.9660010971825141, iteration: 29068
loss: 1.0110870599746704,grad_norm: 0.894054950764639, iteration: 29069
loss: 1.0106444358825684,grad_norm: 0.9999991984703248, iteration: 29070
loss: 1.0555939674377441,grad_norm: 0.9999990569850291, iteration: 29071
loss: 1.0320194959640503,grad_norm: 0.9999990517682914, iteration: 29072
loss: 0.99845290184021,grad_norm: 0.9999991214954709, iteration: 29073
loss: 0.9849455952644348,grad_norm: 0.9228197983975145, iteration: 29074
loss: 0.9787099361419678,grad_norm: 0.992231175766789, iteration: 29075
loss: 0.9937620162963867,grad_norm: 0.9999991004282351, iteration: 29076
loss: 1.0171071290969849,grad_norm: 0.9999990955264815, iteration: 29077
loss: 1.014482021331787,grad_norm: 0.8238827687431665, iteration: 29078
loss: 0.9894925355911255,grad_norm: 0.958417272340845, iteration: 29079
loss: 0.9890459179878235,grad_norm: 0.9506958259388888, iteration: 29080
loss: 0.9880803823471069,grad_norm: 0.9999991434876264, iteration: 29081
loss: 1.0084367990493774,grad_norm: 0.9999990786374113, iteration: 29082
loss: 0.9955108761787415,grad_norm: 0.8172077842861991, iteration: 29083
loss: 0.9817784428596497,grad_norm: 0.9999990071592195, iteration: 29084
loss: 1.016455054283142,grad_norm: 0.9999991426064503, iteration: 29085
loss: 1.0143499374389648,grad_norm: 0.9287739470032672, iteration: 29086
loss: 1.014015793800354,grad_norm: 0.9999993465094108, iteration: 29087
loss: 1.031589150428772,grad_norm: 0.8609145749162511, iteration: 29088
loss: 1.0137196779251099,grad_norm: 0.8391203782786607, iteration: 29089
loss: 0.9765001535415649,grad_norm: 0.9722227336646193, iteration: 29090
loss: 0.9737271070480347,grad_norm: 0.9999990872190558, iteration: 29091
loss: 0.996533215045929,grad_norm: 0.9129939117394105, iteration: 29092
loss: 1.0119112730026245,grad_norm: 0.9999990117866195, iteration: 29093
loss: 1.0281254053115845,grad_norm: 0.9999991016942484, iteration: 29094
loss: 0.9814231991767883,grad_norm: 0.8257007733314224, iteration: 29095
loss: 1.0144473314285278,grad_norm: 0.8508152901212566, iteration: 29096
loss: 1.0298818349838257,grad_norm: 0.8205409501165707, iteration: 29097
loss: 0.9955892562866211,grad_norm: 0.9964086666665266, iteration: 29098
loss: 1.0313217639923096,grad_norm: 0.99999974713355, iteration: 29099
loss: 1.0250858068466187,grad_norm: 0.999999421315079, iteration: 29100
loss: 0.9623929858207703,grad_norm: 0.9999990632825977, iteration: 29101
loss: 1.0091897249221802,grad_norm: 0.8674707203665998, iteration: 29102
loss: 1.0320923328399658,grad_norm: 0.9345677007629547, iteration: 29103
loss: 0.9876018762588501,grad_norm: 0.9999993433307067, iteration: 29104
loss: 0.9924470782279968,grad_norm: 0.9999991155567128, iteration: 29105
loss: 1.0323280096054077,grad_norm: 0.9999991324748208, iteration: 29106
loss: 1.0201443433761597,grad_norm: 0.9999994836612526, iteration: 29107
loss: 1.0033882856369019,grad_norm: 0.9106218410991951, iteration: 29108
loss: 1.0059943199157715,grad_norm: 0.9557228678934153, iteration: 29109
loss: 0.9942141771316528,grad_norm: 0.9999995216753994, iteration: 29110
loss: 1.0243582725524902,grad_norm: 0.8369409871574286, iteration: 29111
loss: 0.98917555809021,grad_norm: 0.8856655449428814, iteration: 29112
loss: 1.0664921998977661,grad_norm: 0.9999995278964999, iteration: 29113
loss: 1.0500481128692627,grad_norm: 0.9999991838493776, iteration: 29114
loss: 1.0077006816864014,grad_norm: 0.9999989753696945, iteration: 29115
loss: 1.0068445205688477,grad_norm: 0.9741237395297927, iteration: 29116
loss: 0.9698619246482849,grad_norm: 0.9999991032522072, iteration: 29117
loss: 0.998950719833374,grad_norm: 0.8767652482745434, iteration: 29118
loss: 0.9876602292060852,grad_norm: 0.9999991863002863, iteration: 29119
loss: 0.9997056722640991,grad_norm: 0.8894444262684268, iteration: 29120
loss: 1.02669358253479,grad_norm: 0.9999992579722512, iteration: 29121
loss: 1.0521214008331299,grad_norm: 0.9999995035253803, iteration: 29122
loss: 1.0095405578613281,grad_norm: 0.9015368810723228, iteration: 29123
loss: 0.9763503670692444,grad_norm: 0.999999321530602, iteration: 29124
loss: 1.045751929283142,grad_norm: 0.8782979260700003, iteration: 29125
loss: 1.0138511657714844,grad_norm: 0.9926014717437279, iteration: 29126
loss: 1.0183908939361572,grad_norm: 0.9638454518938077, iteration: 29127
loss: 1.0505307912826538,grad_norm: 0.9999991159313003, iteration: 29128
loss: 0.9886296987533569,grad_norm: 0.9999994278029655, iteration: 29129
loss: 1.0041320323944092,grad_norm: 0.9999996134890559, iteration: 29130
loss: 0.978037416934967,grad_norm: 0.999999096403249, iteration: 29131
loss: 1.0238525867462158,grad_norm: 0.8806876617951889, iteration: 29132
loss: 1.0224610567092896,grad_norm: 0.9999989829149459, iteration: 29133
loss: 1.0073847770690918,grad_norm: 0.9999990234264147, iteration: 29134
loss: 1.0267857313156128,grad_norm: 0.999999223454409, iteration: 29135
loss: 1.03492271900177,grad_norm: 0.9999990483323309, iteration: 29136
loss: 1.2304019927978516,grad_norm: 0.9999997187386079, iteration: 29137
loss: 0.9994489550590515,grad_norm: 0.9371475563541677, iteration: 29138
loss: 1.0193084478378296,grad_norm: 0.9736974161895491, iteration: 29139
loss: 1.0086004734039307,grad_norm: 0.81779585081056, iteration: 29140
loss: 1.017396092414856,grad_norm: 0.9999990401063429, iteration: 29141
loss: 1.0184011459350586,grad_norm: 0.999999776830242, iteration: 29142
loss: 0.9981306791305542,grad_norm: 0.9999990716033919, iteration: 29143
loss: 1.0415167808532715,grad_norm: 0.9448691896726537, iteration: 29144
loss: 0.958778977394104,grad_norm: 0.9637135218477848, iteration: 29145
loss: 1.0939717292785645,grad_norm: 0.9999996818343615, iteration: 29146
loss: 0.981452465057373,grad_norm: 0.9999991532962932, iteration: 29147
loss: 1.0140677690505981,grad_norm: 0.9999989191954397, iteration: 29148
loss: 1.001951813697815,grad_norm: 0.9838696958017921, iteration: 29149
loss: 1.0098551511764526,grad_norm: 0.999999243956431, iteration: 29150
loss: 1.0004117488861084,grad_norm: 0.9621687613969836, iteration: 29151
loss: 1.0327950716018677,grad_norm: 0.9640375310088731, iteration: 29152
loss: 1.0163171291351318,grad_norm: 0.8728161938485719, iteration: 29153
loss: 1.0165462493896484,grad_norm: 0.9999990763560128, iteration: 29154
loss: 0.9944039583206177,grad_norm: 0.9999993413269397, iteration: 29155
loss: 0.9609865546226501,grad_norm: 0.9863392463873716, iteration: 29156
loss: 0.9959454536437988,grad_norm: 0.9634333288856769, iteration: 29157
loss: 0.974628746509552,grad_norm: 0.9999991695370622, iteration: 29158
loss: 1.0023186206817627,grad_norm: 0.9999995916337492, iteration: 29159
loss: 1.0084413290023804,grad_norm: 0.9264437184466682, iteration: 29160
loss: 1.0033258199691772,grad_norm: 0.889408471711721, iteration: 29161
loss: 1.0262101888656616,grad_norm: 0.8374033096408355, iteration: 29162
loss: 0.9988052845001221,grad_norm: 0.9721645608779739, iteration: 29163
loss: 0.9845321178436279,grad_norm: 0.9592927946305269, iteration: 29164
loss: 0.9837064146995544,grad_norm: 0.999999047765381, iteration: 29165
loss: 0.9889515042304993,grad_norm: 0.999999115043541, iteration: 29166
loss: 1.0250790119171143,grad_norm: 0.9999990644538216, iteration: 29167
loss: 0.9745809435844421,grad_norm: 0.8365032668576451, iteration: 29168
loss: 0.9674741625785828,grad_norm: 0.8006637096321243, iteration: 29169
loss: 1.1004087924957275,grad_norm: 0.9999994689886094, iteration: 29170
loss: 1.0348505973815918,grad_norm: 0.8387449134591668, iteration: 29171
loss: 0.9748672246932983,grad_norm: 0.9999989605777243, iteration: 29172
loss: 1.0141392946243286,grad_norm: 0.9999989758867841, iteration: 29173
loss: 1.0322151184082031,grad_norm: 0.9999991681886271, iteration: 29174
loss: 1.0527527332305908,grad_norm: 0.9999992821310392, iteration: 29175
loss: 1.0444961786270142,grad_norm: 0.9999993297594553, iteration: 29176
loss: 1.0552259683609009,grad_norm: 0.9999992685590967, iteration: 29177
loss: 1.0298969745635986,grad_norm: 0.9999995679819138, iteration: 29178
loss: 1.0678999423980713,grad_norm: 0.9999991976469984, iteration: 29179
loss: 1.03131902217865,grad_norm: 0.9074718563064976, iteration: 29180
loss: 1.0079063177108765,grad_norm: 0.9999999590878381, iteration: 29181
loss: 0.9740046262741089,grad_norm: 0.8994766555527435, iteration: 29182
loss: 0.985123872756958,grad_norm: 0.9999992098104767, iteration: 29183
loss: 0.9813894033432007,grad_norm: 0.9999991880867669, iteration: 29184
loss: 1.0331522226333618,grad_norm: 0.999999058179963, iteration: 29185
loss: 1.0053677558898926,grad_norm: 0.9800425239819686, iteration: 29186
loss: 0.9843435287475586,grad_norm: 0.999999192900608, iteration: 29187
loss: 0.9969106912612915,grad_norm: 0.9999991513936495, iteration: 29188
loss: 1.0193068981170654,grad_norm: 0.8949339004705729, iteration: 29189
loss: 1.0515086650848389,grad_norm: 0.9999992588581481, iteration: 29190
loss: 0.9672335386276245,grad_norm: 0.9999990453533841, iteration: 29191
loss: 1.0500376224517822,grad_norm: 0.9999992680935508, iteration: 29192
loss: 0.9914425015449524,grad_norm: 0.999999189769454, iteration: 29193
loss: 1.0524674654006958,grad_norm: 0.8911522034436825, iteration: 29194
loss: 1.0303722620010376,grad_norm: 0.9999991099214924, iteration: 29195
loss: 1.0120465755462646,grad_norm: 0.8462293960269175, iteration: 29196
loss: 0.9911841750144958,grad_norm: 0.8798365328039965, iteration: 29197
loss: 1.075455665588379,grad_norm: 0.9999994338249065, iteration: 29198
loss: 1.027487874031067,grad_norm: 0.9999992031193173, iteration: 29199
loss: 0.978356659412384,grad_norm: 0.9606730762744844, iteration: 29200
loss: 1.0298781394958496,grad_norm: 0.999999159224036, iteration: 29201
loss: 1.0182430744171143,grad_norm: 0.9999990195237414, iteration: 29202
loss: 1.013830304145813,grad_norm: 0.9999995041501193, iteration: 29203
loss: 1.0276654958724976,grad_norm: 0.9999993544279198, iteration: 29204
loss: 1.012912392616272,grad_norm: 0.9685462409175862, iteration: 29205
loss: 0.9972546100616455,grad_norm: 0.9999992621426997, iteration: 29206
loss: 1.0282983779907227,grad_norm: 0.9907106505005779, iteration: 29207
loss: 1.0330432653427124,grad_norm: 0.8719388911125099, iteration: 29208
loss: 0.9744502305984497,grad_norm: 0.9999990922267098, iteration: 29209
loss: 1.0017449855804443,grad_norm: 0.7894531223489771, iteration: 29210
loss: 1.0184879302978516,grad_norm: 0.9291329189677873, iteration: 29211
loss: 0.9762405753135681,grad_norm: 0.837637986606406, iteration: 29212
loss: 0.9832208156585693,grad_norm: 0.848059391646509, iteration: 29213
loss: 0.9766771197319031,grad_norm: 0.9999995594431068, iteration: 29214
loss: 1.0170221328735352,grad_norm: 0.9999991677403122, iteration: 29215
loss: 1.007136583328247,grad_norm: 0.9999991296254774, iteration: 29216
loss: 1.0482490062713623,grad_norm: 0.9999990171863283, iteration: 29217
loss: 0.9788998365402222,grad_norm: 0.8862203085157245, iteration: 29218
loss: 0.9990747570991516,grad_norm: 0.9999995543673189, iteration: 29219
loss: 0.9847692847251892,grad_norm: 0.9999990282822673, iteration: 29220
loss: 1.0563616752624512,grad_norm: 0.9999999847265126, iteration: 29221
loss: 1.0085227489471436,grad_norm: 0.7307325076525175, iteration: 29222
loss: 1.0510257482528687,grad_norm: 0.863333791572603, iteration: 29223
loss: 1.0382685661315918,grad_norm: 0.9999997414105034, iteration: 29224
loss: 0.9456639885902405,grad_norm: 0.9999990417928126, iteration: 29225
loss: 0.9988101124763489,grad_norm: 0.9999991976555214, iteration: 29226
loss: 1.051922082901001,grad_norm: 0.9999991999907893, iteration: 29227
loss: 0.975461483001709,grad_norm: 0.9999990329256901, iteration: 29228
loss: 1.0092214345932007,grad_norm: 0.9999990393417254, iteration: 29229
loss: 1.0962045192718506,grad_norm: 0.9999997686363774, iteration: 29230
loss: 1.0363438129425049,grad_norm: 0.9999989747217213, iteration: 29231
loss: 0.9943011999130249,grad_norm: 0.9999992927676263, iteration: 29232
loss: 1.0253040790557861,grad_norm: 0.9999993316987139, iteration: 29233
loss: 1.0620654821395874,grad_norm: 0.9999994070472988, iteration: 29234
loss: 1.0006874799728394,grad_norm: 0.8528312257059645, iteration: 29235
loss: 0.9996169209480286,grad_norm: 0.9842788827511783, iteration: 29236
loss: 0.9893073439598083,grad_norm: 0.9476424844755525, iteration: 29237
loss: 1.024694561958313,grad_norm: 0.9999993361897359, iteration: 29238
loss: 1.007051706314087,grad_norm: 0.8709158078847776, iteration: 29239
loss: 1.0275990962982178,grad_norm: 0.8684953912764127, iteration: 29240
loss: 1.0300142765045166,grad_norm: 0.9999990714109873, iteration: 29241
loss: 0.9985806345939636,grad_norm: 0.8935741263927327, iteration: 29242
loss: 1.0440270900726318,grad_norm: 0.9999992779740358, iteration: 29243
loss: 0.972949206829071,grad_norm: 0.9487610163837564, iteration: 29244
loss: 1.0451375246047974,grad_norm: 0.9999990899638672, iteration: 29245
loss: 1.0389548540115356,grad_norm: 0.9999992811994952, iteration: 29246
loss: 0.9911116361618042,grad_norm: 0.9999997696193426, iteration: 29247
loss: 1.0072182416915894,grad_norm: 0.9758278560542992, iteration: 29248
loss: 0.9995047450065613,grad_norm: 0.8837222473952372, iteration: 29249
loss: 1.028014898300171,grad_norm: 0.999998993150389, iteration: 29250
loss: 1.0782946348190308,grad_norm: 0.9999997656093818, iteration: 29251
loss: 1.0120798349380493,grad_norm: 0.952409135604208, iteration: 29252
loss: 1.0426976680755615,grad_norm: 0.9999994209516998, iteration: 29253
loss: 1.0024142265319824,grad_norm: 0.9999993172761621, iteration: 29254
loss: 1.0022786855697632,grad_norm: 0.999999127986251, iteration: 29255
loss: 0.9775561094284058,grad_norm: 0.9999992880118759, iteration: 29256
loss: 1.016327142715454,grad_norm: 0.9999991989851893, iteration: 29257
loss: 1.046326756477356,grad_norm: 0.9999992792262034, iteration: 29258
loss: 1.0066273212432861,grad_norm: 0.8765541835892254, iteration: 29259
loss: 0.9853218793869019,grad_norm: 0.8959446186332181, iteration: 29260
loss: 1.1002229452133179,grad_norm: 0.9999998055121736, iteration: 29261
loss: 1.02610182762146,grad_norm: 0.9999998204372151, iteration: 29262
loss: 1.0201096534729004,grad_norm: 0.9999990630167951, iteration: 29263
loss: 0.9822103977203369,grad_norm: 0.9488303687048519, iteration: 29264
loss: 1.0063562393188477,grad_norm: 0.9999991817068236, iteration: 29265
loss: 1.0390037298202515,grad_norm: 0.91303902042114, iteration: 29266
loss: 1.0230283737182617,grad_norm: 0.9999991731750365, iteration: 29267
loss: 1.0162417888641357,grad_norm: 0.9999994783732353, iteration: 29268
loss: 0.9853510856628418,grad_norm: 0.857923981282327, iteration: 29269
loss: 0.9906637072563171,grad_norm: 0.9999992123530513, iteration: 29270
loss: 0.9753627777099609,grad_norm: 0.9999991100087481, iteration: 29271
loss: 1.0050833225250244,grad_norm: 0.999999126042247, iteration: 29272
loss: 1.0429738759994507,grad_norm: 0.9999990571393642, iteration: 29273
loss: 1.014298677444458,grad_norm: 0.9999992413816272, iteration: 29274
loss: 1.0329829454421997,grad_norm: 0.9999992592986093, iteration: 29275
loss: 1.0452018976211548,grad_norm: 0.9999992389206486, iteration: 29276
loss: 1.0067052841186523,grad_norm: 0.9999990552248827, iteration: 29277
loss: 1.0535954236984253,grad_norm: 0.9999997091792507, iteration: 29278
loss: 1.0303632020950317,grad_norm: 0.9999991799438219, iteration: 29279
loss: 0.9513646960258484,grad_norm: 0.9999991888282714, iteration: 29280
loss: 1.0348765850067139,grad_norm: 0.9999992229710989, iteration: 29281
loss: 0.9814949631690979,grad_norm: 0.947317484502553, iteration: 29282
loss: 1.0341304540634155,grad_norm: 0.9445287877176363, iteration: 29283
loss: 0.9889686703681946,grad_norm: 0.9999989873854519, iteration: 29284
loss: 0.9877541065216064,grad_norm: 0.9999994078225546, iteration: 29285
loss: 1.009671688079834,grad_norm: 0.9999995565825979, iteration: 29286
loss: 0.987389326095581,grad_norm: 0.9790000002029103, iteration: 29287
loss: 1.0426334142684937,grad_norm: 0.9999991839874209, iteration: 29288
loss: 1.0152289867401123,grad_norm: 0.9999993386487993, iteration: 29289
loss: 1.0323541164398193,grad_norm: 0.9999991427471678, iteration: 29290
loss: 1.0175902843475342,grad_norm: 0.9999990445592282, iteration: 29291
loss: 1.0229332447052002,grad_norm: 0.9860555129045561, iteration: 29292
loss: 0.9722028970718384,grad_norm: 0.9999993103708756, iteration: 29293
loss: 1.0134313106536865,grad_norm: 0.9609011238472345, iteration: 29294
loss: 1.0063252449035645,grad_norm: 0.9999991140324501, iteration: 29295
loss: 0.9546356201171875,grad_norm: 0.8059453733168304, iteration: 29296
loss: 1.0280042886734009,grad_norm: 0.9999996870206478, iteration: 29297
loss: 1.0035765171051025,grad_norm: 0.9999996359689137, iteration: 29298
loss: 0.9988077878952026,grad_norm: 0.9999991327596968, iteration: 29299
loss: 1.0133388042449951,grad_norm: 0.9803458206053508, iteration: 29300
loss: 1.0216799974441528,grad_norm: 0.9999992622974176, iteration: 29301
loss: 0.9818601608276367,grad_norm: 0.9999990316732206, iteration: 29302
loss: 1.0072529315948486,grad_norm: 0.9999990916082928, iteration: 29303
loss: 0.9919449090957642,grad_norm: 0.9999994631142902, iteration: 29304
loss: 1.0422080755233765,grad_norm: 0.9999996663810284, iteration: 29305
loss: 1.021582007408142,grad_norm: 0.9999993241610368, iteration: 29306
loss: 1.0025956630706787,grad_norm: 0.9810879751069617, iteration: 29307
loss: 1.0281825065612793,grad_norm: 0.9999991867160609, iteration: 29308
loss: 1.0122594833374023,grad_norm: 0.9876699708382909, iteration: 29309
loss: 0.9635019898414612,grad_norm: 0.8941377662534925, iteration: 29310
loss: 1.026947021484375,grad_norm: 0.9999992466257899, iteration: 29311
loss: 0.9789891839027405,grad_norm: 0.9902106346603784, iteration: 29312
loss: 0.9853132367134094,grad_norm: 0.9999996211323905, iteration: 29313
loss: 1.0405001640319824,grad_norm: 0.9999989679650342, iteration: 29314
loss: 1.0153613090515137,grad_norm: 0.9999992492218448, iteration: 29315
loss: 1.0301625728607178,grad_norm: 0.9999992293343418, iteration: 29316
loss: 0.9819161891937256,grad_norm: 0.9617501199893661, iteration: 29317
loss: 1.0281078815460205,grad_norm: 0.9914075984686345, iteration: 29318
loss: 1.0151164531707764,grad_norm: 0.9480817625022181, iteration: 29319
loss: 0.9924126863479614,grad_norm: 0.9999990689244906, iteration: 29320
loss: 0.9633364081382751,grad_norm: 0.9999990219256079, iteration: 29321
loss: 0.9461358785629272,grad_norm: 0.9999989475767526, iteration: 29322
loss: 0.9809534549713135,grad_norm: 0.9999994242590842, iteration: 29323
loss: 1.0713694095611572,grad_norm: 0.9999991359868409, iteration: 29324
loss: 1.0459219217300415,grad_norm: 0.9999992223412858, iteration: 29325
loss: 0.9934912919998169,grad_norm: 0.901883553532774, iteration: 29326
loss: 1.0641865730285645,grad_norm: 0.9999994354306729, iteration: 29327
loss: 1.0136867761611938,grad_norm: 0.8745897586255373, iteration: 29328
loss: 1.0003567934036255,grad_norm: 0.9954961649755144, iteration: 29329
loss: 0.9967992901802063,grad_norm: 0.9787603041356598, iteration: 29330
loss: 1.0383293628692627,grad_norm: 0.9055550102822139, iteration: 29331
loss: 1.0151495933532715,grad_norm: 0.9999990999749514, iteration: 29332
loss: 1.044908881187439,grad_norm: 0.965339769353074, iteration: 29333
loss: 1.0221960544586182,grad_norm: 0.9999992815918843, iteration: 29334
loss: 1.0193818807601929,grad_norm: 0.9999997871072855, iteration: 29335
loss: 0.9940469264984131,grad_norm: 0.9999990586005981, iteration: 29336
loss: 1.0229253768920898,grad_norm: 0.9999990547896567, iteration: 29337
loss: 0.970054030418396,grad_norm: 0.9999994247198611, iteration: 29338
loss: 1.0350449085235596,grad_norm: 0.9999993233972729, iteration: 29339
loss: 0.9824225902557373,grad_norm: 0.9999991598821255, iteration: 29340
loss: 1.0053596496582031,grad_norm: 0.9999991259787392, iteration: 29341
loss: 1.0517629384994507,grad_norm: 0.9999995207573842, iteration: 29342
loss: 1.0039112567901611,grad_norm: 0.9797136512060662, iteration: 29343
loss: 1.038072943687439,grad_norm: 0.999999230712562, iteration: 29344
loss: 0.9995260238647461,grad_norm: 0.9999991490059481, iteration: 29345
loss: 0.9658553004264832,grad_norm: 0.9999990741933049, iteration: 29346
loss: 1.019421100616455,grad_norm: 0.9999989943906072, iteration: 29347
loss: 1.0116641521453857,grad_norm: 0.999999731950214, iteration: 29348
loss: 1.0030248165130615,grad_norm: 0.9597246540902566, iteration: 29349
loss: 1.0085253715515137,grad_norm: 0.9999992180048124, iteration: 29350
loss: 0.9990796446800232,grad_norm: 0.9999993834769999, iteration: 29351
loss: 0.9886277318000793,grad_norm: 0.9457697631648698, iteration: 29352
loss: 1.0373226404190063,grad_norm: 0.908000742173078, iteration: 29353
loss: 0.9611121416091919,grad_norm: 0.999999076421504, iteration: 29354
loss: 1.0218113660812378,grad_norm: 0.9380044998829076, iteration: 29355
loss: 0.9614154696464539,grad_norm: 0.9999992276212791, iteration: 29356
loss: 1.0697695016860962,grad_norm: 0.9999994681055061, iteration: 29357
loss: 1.0096168518066406,grad_norm: 0.9999992719999911, iteration: 29358
loss: 1.0153906345367432,grad_norm: 0.9999990489230701, iteration: 29359
loss: 0.9991552233695984,grad_norm: 0.9999991694057387, iteration: 29360
loss: 1.0096997022628784,grad_norm: 0.9308367283222408, iteration: 29361
loss: 0.966096818447113,grad_norm: 0.9999990162058184, iteration: 29362
loss: 0.9832323789596558,grad_norm: 0.9999991286048859, iteration: 29363
loss: 1.041032075881958,grad_norm: 0.9999990272785663, iteration: 29364
loss: 1.0022934675216675,grad_norm: 0.9999990078861486, iteration: 29365
loss: 1.0073554515838623,grad_norm: 0.9999992748772227, iteration: 29366
loss: 0.9922796487808228,grad_norm: 0.9999989337708952, iteration: 29367
loss: 1.0109059810638428,grad_norm: 0.9999990122433701, iteration: 29368
loss: 0.9951532483100891,grad_norm: 0.883605277556944, iteration: 29369
loss: 1.008086919784546,grad_norm: 0.863374353577505, iteration: 29370
loss: 1.00409996509552,grad_norm: 0.9437423888748491, iteration: 29371
loss: 0.9870604872703552,grad_norm: 0.999999207046953, iteration: 29372
loss: 1.0031321048736572,grad_norm: 0.9999991974505472, iteration: 29373
loss: 1.0139824151992798,grad_norm: 0.9486142140714399, iteration: 29374
loss: 1.0526747703552246,grad_norm: 0.8940522126786518, iteration: 29375
loss: 0.9761247634887695,grad_norm: 0.9999990874495905, iteration: 29376
loss: 1.038604497909546,grad_norm: 0.999999245607026, iteration: 29377
loss: 1.0019553899765015,grad_norm: 0.9999992709466513, iteration: 29378
loss: 0.9757224321365356,grad_norm: 0.9999992291082912, iteration: 29379
loss: 0.9933710694313049,grad_norm: 0.9999991388777478, iteration: 29380
loss: 1.033554196357727,grad_norm: 0.9644995439552403, iteration: 29381
loss: 1.0091041326522827,grad_norm: 0.9484092596609198, iteration: 29382
loss: 1.048749327659607,grad_norm: 0.9999990603565957, iteration: 29383
loss: 0.9925975799560547,grad_norm: 0.9288764629544413, iteration: 29384
loss: 0.9808176755905151,grad_norm: 0.9830843739695763, iteration: 29385
loss: 0.9686731696128845,grad_norm: 0.9999996831097702, iteration: 29386
loss: 0.9775390625,grad_norm: 0.9999990940904193, iteration: 29387
loss: 1.0001300573349,grad_norm: 0.9999989926547759, iteration: 29388
loss: 1.0127270221710205,grad_norm: 0.9999990448171011, iteration: 29389
loss: 1.0361469984054565,grad_norm: 0.999998920438468, iteration: 29390
loss: 1.0258678197860718,grad_norm: 0.9575993600927014, iteration: 29391
loss: 1.0050976276397705,grad_norm: 0.999999207055185, iteration: 29392
loss: 0.9837931394577026,grad_norm: 0.9089477835806234, iteration: 29393
loss: 1.02547287940979,grad_norm: 0.8721329839430702, iteration: 29394
loss: 0.9908273220062256,grad_norm: 0.9999989810200662, iteration: 29395
loss: 1.0100383758544922,grad_norm: 0.9491332311049764, iteration: 29396
loss: 1.0463151931762695,grad_norm: 0.9999998943886014, iteration: 29397
loss: 1.0161681175231934,grad_norm: 0.9999992578699342, iteration: 29398
loss: 1.010880708694458,grad_norm: 0.9967752562271553, iteration: 29399
loss: 1.044804573059082,grad_norm: 0.999999304330143, iteration: 29400
loss: 0.9808754324913025,grad_norm: 0.9999991312907488, iteration: 29401
loss: 0.9973654747009277,grad_norm: 0.931195744747182, iteration: 29402
loss: 0.9456100463867188,grad_norm: 0.9999990229487054, iteration: 29403
loss: 1.0057724714279175,grad_norm: 0.7438793524138972, iteration: 29404
loss: 1.0128720998764038,grad_norm: 0.9999991198562789, iteration: 29405
loss: 1.0138012170791626,grad_norm: 0.9999990226623052, iteration: 29406
loss: 0.9626823663711548,grad_norm: 0.8781698636132103, iteration: 29407
loss: 1.0058039426803589,grad_norm: 0.9099242687053234, iteration: 29408
loss: 0.9474637508392334,grad_norm: 0.8971233102757896, iteration: 29409
loss: 0.9921266436576843,grad_norm: 0.9999990246071343, iteration: 29410
loss: 1.018851637840271,grad_norm: 0.9624304495988154, iteration: 29411
loss: 1.0151584148406982,grad_norm: 0.8916586140584449, iteration: 29412
loss: 1.0503344535827637,grad_norm: 0.9999991718525146, iteration: 29413
loss: 1.0545456409454346,grad_norm: 0.9999995052706678, iteration: 29414
loss: 0.9800904393196106,grad_norm: 0.9577881616153132, iteration: 29415
loss: 1.0036678314208984,grad_norm: 0.9899030296388023, iteration: 29416
loss: 0.9980463981628418,grad_norm: 0.9999989645836233, iteration: 29417
loss: 1.06105375289917,grad_norm: 0.9999991976912634, iteration: 29418
loss: 0.9954657554626465,grad_norm: 0.9227311404865414, iteration: 29419
loss: 0.9910746216773987,grad_norm: 0.9999990897629866, iteration: 29420
loss: 1.0238362550735474,grad_norm: 0.9999992888072119, iteration: 29421
loss: 0.9719802141189575,grad_norm: 0.9999989669566032, iteration: 29422
loss: 1.0175799131393433,grad_norm: 0.999999171391476, iteration: 29423
loss: 1.0329108238220215,grad_norm: 0.9999991523065962, iteration: 29424
loss: 1.0037107467651367,grad_norm: 0.9999991079931565, iteration: 29425
loss: 1.0284600257873535,grad_norm: 0.8978761162570563, iteration: 29426
loss: 1.0221906900405884,grad_norm: 0.9999991118158623, iteration: 29427
loss: 0.9875379800796509,grad_norm: 0.8613373135021479, iteration: 29428
loss: 0.9874734282493591,grad_norm: 0.9999992864163839, iteration: 29429
loss: 0.9916850924491882,grad_norm: 0.892462628992738, iteration: 29430
loss: 1.019586443901062,grad_norm: 0.9999990738605378, iteration: 29431
loss: 0.9821709990501404,grad_norm: 0.9999990255190245, iteration: 29432
loss: 0.9745237827301025,grad_norm: 0.9999989746677577, iteration: 29433
loss: 1.0385775566101074,grad_norm: 0.9999991743589491, iteration: 29434
loss: 1.031589150428772,grad_norm: 0.9645789589279703, iteration: 29435
loss: 1.0276697874069214,grad_norm: 0.8579984288284715, iteration: 29436
loss: 1.0058447122573853,grad_norm: 0.9999995189756117, iteration: 29437
loss: 1.0711530447006226,grad_norm: 0.986879694174953, iteration: 29438
loss: 1.015668272972107,grad_norm: 0.9686774827102627, iteration: 29439
loss: 1.0062525272369385,grad_norm: 0.9122870421892368, iteration: 29440
loss: 1.0202323198318481,grad_norm: 0.9948388444510109, iteration: 29441
loss: 0.9990408420562744,grad_norm: 0.9566605436698379, iteration: 29442
loss: 1.0010175704956055,grad_norm: 0.9999991558134853, iteration: 29443
loss: 1.0253254175186157,grad_norm: 0.9232565517496586, iteration: 29444
loss: 1.0460317134857178,grad_norm: 0.9999990622413191, iteration: 29445
loss: 1.0293587446212769,grad_norm: 0.9999991025541579, iteration: 29446
loss: 1.0051759481430054,grad_norm: 0.9999991568848281, iteration: 29447
loss: 1.007312297821045,grad_norm: 0.991981625340114, iteration: 29448
loss: 1.0261709690093994,grad_norm: 0.999999085711501, iteration: 29449
loss: 1.0023952722549438,grad_norm: 0.9665451338608974, iteration: 29450
loss: 1.025814414024353,grad_norm: 0.99999922025556, iteration: 29451
loss: 1.0184738636016846,grad_norm: 0.8886591917721948, iteration: 29452
loss: 0.9966997504234314,grad_norm: 0.9999991745049974, iteration: 29453
loss: 0.9951027035713196,grad_norm: 0.9454714833661341, iteration: 29454
loss: 1.0268445014953613,grad_norm: 0.9999992974316508, iteration: 29455
loss: 1.026968002319336,grad_norm: 0.9806546111337817, iteration: 29456
loss: 1.015517234802246,grad_norm: 0.9999992261337942, iteration: 29457
loss: 1.0680559873580933,grad_norm: 0.9999992746377292, iteration: 29458
loss: 1.0252479314804077,grad_norm: 0.9999990866524823, iteration: 29459
loss: 0.9738597273826599,grad_norm: 0.9091351025131078, iteration: 29460
loss: 1.003565788269043,grad_norm: 0.9539342710360422, iteration: 29461
loss: 1.0226927995681763,grad_norm: 0.9999991064019204, iteration: 29462
loss: 1.0159251689910889,grad_norm: 0.9999990836663141, iteration: 29463
loss: 0.9798145294189453,grad_norm: 0.9999991149470344, iteration: 29464
loss: 1.0146737098693848,grad_norm: 0.885062123050699, iteration: 29465
loss: 1.0035823583602905,grad_norm: 0.9489250941769175, iteration: 29466
loss: 0.9725446105003357,grad_norm: 0.8909068983008523, iteration: 29467
loss: 1.0006327629089355,grad_norm: 0.9999992212696251, iteration: 29468
loss: 1.0161077976226807,grad_norm: 0.9999991151512457, iteration: 29469
loss: 1.0439743995666504,grad_norm: 0.9999993245253327, iteration: 29470
loss: 0.9891408085823059,grad_norm: 0.9796668085206258, iteration: 29471
loss: 1.0415983200073242,grad_norm: 0.9999991688779133, iteration: 29472
loss: 1.0144662857055664,grad_norm: 0.701182900512974, iteration: 29473
loss: 1.0077934265136719,grad_norm: 0.9528059920946718, iteration: 29474
loss: 0.9932637214660645,grad_norm: 0.9999990807849458, iteration: 29475
loss: 1.0391470193862915,grad_norm: 0.9999991074876161, iteration: 29476
loss: 0.9942054748535156,grad_norm: 0.8826997638566711, iteration: 29477
loss: 1.0090728998184204,grad_norm: 0.9999998426390235, iteration: 29478
loss: 1.023140549659729,grad_norm: 0.9999990599174, iteration: 29479
loss: 1.03848397731781,grad_norm: 0.999999246050547, iteration: 29480
loss: 1.0279194116592407,grad_norm: 0.9999991994967589, iteration: 29481
loss: 0.992527186870575,grad_norm: 0.8462817893764105, iteration: 29482
loss: 1.030280590057373,grad_norm: 0.9028188370990706, iteration: 29483
loss: 0.9852844476699829,grad_norm: 0.999999052114885, iteration: 29484
loss: 1.018789529800415,grad_norm: 0.9999992204522224, iteration: 29485
loss: 1.003319263458252,grad_norm: 0.9999990914895031, iteration: 29486
loss: 0.9884430170059204,grad_norm: 0.999999076629933, iteration: 29487
loss: 1.0484514236450195,grad_norm: 0.9999990191761817, iteration: 29488
loss: 0.9906821846961975,grad_norm: 0.9709748964334987, iteration: 29489
loss: 0.9935449361801147,grad_norm: 0.999999081887978, iteration: 29490
loss: 1.0358654260635376,grad_norm: 0.9999989926927002, iteration: 29491
loss: 1.0215486288070679,grad_norm: 0.9999996149415757, iteration: 29492
loss: 1.0157030820846558,grad_norm: 0.9999990455122211, iteration: 29493
loss: 0.984512984752655,grad_norm: 0.9999990322740405, iteration: 29494
loss: 1.0028812885284424,grad_norm: 0.9198765227549011, iteration: 29495
loss: 0.9776699542999268,grad_norm: 0.9999992087640751, iteration: 29496
loss: 1.108655333518982,grad_norm: 0.999999421589432, iteration: 29497
loss: 0.9913011789321899,grad_norm: 0.9494608043494646, iteration: 29498
loss: 1.0143910646438599,grad_norm: 0.9999991123170493, iteration: 29499
loss: 1.0258772373199463,grad_norm: 0.9311919287682663, iteration: 29500
loss: 1.0051792860031128,grad_norm: 0.8136151409478676, iteration: 29501
loss: 1.0312811136245728,grad_norm: 0.851262868791012, iteration: 29502
loss: 1.0045068264007568,grad_norm: 0.9248006373721039, iteration: 29503
loss: 1.0183371305465698,grad_norm: 0.9999990921558409, iteration: 29504
loss: 1.0076440572738647,grad_norm: 0.9999993080453227, iteration: 29505
loss: 0.9694071412086487,grad_norm: 0.8078999179200265, iteration: 29506
loss: 0.9982423782348633,grad_norm: 0.9999992299994225, iteration: 29507
loss: 1.037354826927185,grad_norm: 0.912734729863198, iteration: 29508
loss: 1.015634298324585,grad_norm: 0.9965032668780094, iteration: 29509
loss: 1.0069050788879395,grad_norm: 0.9999991577710972, iteration: 29510
loss: 1.0231702327728271,grad_norm: 0.9436592437306873, iteration: 29511
loss: 1.0264338254928589,grad_norm: 0.9999992680703327, iteration: 29512
loss: 1.0234487056732178,grad_norm: 0.9999991082602977, iteration: 29513
loss: 1.0141103267669678,grad_norm: 0.9999990374993587, iteration: 29514
loss: 1.049179196357727,grad_norm: 0.9999995290979171, iteration: 29515
loss: 1.0624325275421143,grad_norm: 0.9335618799369575, iteration: 29516
loss: 1.0083117485046387,grad_norm: 0.9999989775549467, iteration: 29517
loss: 1.0107403993606567,grad_norm: 0.9999990575358079, iteration: 29518
loss: 0.9937619566917419,grad_norm: 0.9176287729350813, iteration: 29519
loss: 1.0315312147140503,grad_norm: 0.9999992444879657, iteration: 29520
loss: 1.0448002815246582,grad_norm: 0.9999993450189116, iteration: 29521
loss: 1.0316482782363892,grad_norm: 0.7296806796757208, iteration: 29522
loss: 0.9663683772087097,grad_norm: 0.8842092234732131, iteration: 29523
loss: 1.0047473907470703,grad_norm: 0.9999990696075645, iteration: 29524
loss: 1.0067384243011475,grad_norm: 0.822080716877824, iteration: 29525
loss: 1.0057275295257568,grad_norm: 0.9999994060290048, iteration: 29526
loss: 1.0059959888458252,grad_norm: 0.9999992876189732, iteration: 29527
loss: 0.976280152797699,grad_norm: 0.9107552774429378, iteration: 29528
loss: 0.9950024485588074,grad_norm: 0.9999992468981297, iteration: 29529
loss: 1.0138297080993652,grad_norm: 0.8541300925528044, iteration: 29530
loss: 1.018970012664795,grad_norm: 0.9999992761646804, iteration: 29531
loss: 1.026505947113037,grad_norm: 0.9518892244846732, iteration: 29532
loss: 0.9968956112861633,grad_norm: 0.7671019333710839, iteration: 29533
loss: 0.9764734506607056,grad_norm: 0.999999114927473, iteration: 29534
loss: 1.0389823913574219,grad_norm: 0.999999938698113, iteration: 29535
loss: 1.0290676355361938,grad_norm: 0.9999991178989883, iteration: 29536
loss: 1.0024073123931885,grad_norm: 0.9999989848151958, iteration: 29537
loss: 1.031981348991394,grad_norm: 0.9999990815486135, iteration: 29538
loss: 1.064887523651123,grad_norm: 0.9999991103846778, iteration: 29539
loss: 0.991335391998291,grad_norm: 0.9999989871691243, iteration: 29540
loss: 1.0207033157348633,grad_norm: 0.9999991220655768, iteration: 29541
loss: 1.0029187202453613,grad_norm: 0.9818582844287761, iteration: 29542
loss: 1.044876217842102,grad_norm: 0.9721061162207635, iteration: 29543
loss: 1.0413295030593872,grad_norm: 0.999999155050663, iteration: 29544
loss: 1.0273882150650024,grad_norm: 0.999999531420725, iteration: 29545
loss: 1.0227246284484863,grad_norm: 0.9947524118693362, iteration: 29546
loss: 0.9886618256568909,grad_norm: 0.9999990491697577, iteration: 29547
loss: 1.0203396081924438,grad_norm: 0.9558508074046271, iteration: 29548
loss: 1.02878737449646,grad_norm: 0.9999990668031781, iteration: 29549
loss: 1.0266472101211548,grad_norm: 0.8428072708463955, iteration: 29550
loss: 1.0411187410354614,grad_norm: 0.9831645698375118, iteration: 29551
loss: 0.9992874264717102,grad_norm: 0.9410520889105322, iteration: 29552
loss: 0.9453825354576111,grad_norm: 0.9999991750393679, iteration: 29553
loss: 1.0387790203094482,grad_norm: 0.9652626088173591, iteration: 29554
loss: 1.0102746486663818,grad_norm: 0.9999996789928588, iteration: 29555
loss: 1.0266526937484741,grad_norm: 0.9999995998813369, iteration: 29556
loss: 0.9709603190422058,grad_norm: 0.9999992672537333, iteration: 29557
loss: 1.023557424545288,grad_norm: 0.9999997598018395, iteration: 29558
loss: 0.9707878232002258,grad_norm: 0.9288565918960766, iteration: 29559
loss: 1.0091081857681274,grad_norm: 0.9999990705925887, iteration: 29560
loss: 1.0132144689559937,grad_norm: 0.9999993336247901, iteration: 29561
loss: 1.0118587017059326,grad_norm: 0.999999300562927, iteration: 29562
loss: 1.0317192077636719,grad_norm: 0.9999994200317804, iteration: 29563
loss: 1.0135859251022339,grad_norm: 0.900220215073466, iteration: 29564
loss: 0.9702226519584656,grad_norm: 0.999999202123325, iteration: 29565
loss: 1.0716915130615234,grad_norm: 0.9999998414840813, iteration: 29566
loss: 1.0288721323013306,grad_norm: 0.8722330439130453, iteration: 29567
loss: 0.9991013407707214,grad_norm: 0.9849052481640134, iteration: 29568
loss: 1.005799412727356,grad_norm: 0.9999994950813648, iteration: 29569
loss: 0.9956612586975098,grad_norm: 0.8503648065475551, iteration: 29570
loss: 0.9930551052093506,grad_norm: 0.9999993227775757, iteration: 29571
loss: 1.0135502815246582,grad_norm: 0.9999992800819095, iteration: 29572
loss: 0.950164794921875,grad_norm: 0.9700482010064508, iteration: 29573
loss: 0.978751003742218,grad_norm: 0.9999990457817088, iteration: 29574
loss: 0.9968085885047913,grad_norm: 0.9999990249582552, iteration: 29575
loss: 1.0131406784057617,grad_norm: 0.9999990520454725, iteration: 29576
loss: 1.0258744955062866,grad_norm: 0.9999990196882876, iteration: 29577
loss: 1.0492388010025024,grad_norm: 0.9999996606983447, iteration: 29578
loss: 1.0792101621627808,grad_norm: 0.9999993322600421, iteration: 29579
loss: 0.9992548823356628,grad_norm: 0.9999994907715022, iteration: 29580
loss: 1.0060832500457764,grad_norm: 0.9999992487356453, iteration: 29581
loss: 1.0562856197357178,grad_norm: 0.9999995028635634, iteration: 29582
loss: 0.9618495106697083,grad_norm: 0.8756957559439622, iteration: 29583
loss: 0.9607011675834656,grad_norm: 0.9459174911769119, iteration: 29584
loss: 0.991511344909668,grad_norm: 0.9999989981807978, iteration: 29585
loss: 0.9833567142486572,grad_norm: 0.806245527906231, iteration: 29586
loss: 1.0038633346557617,grad_norm: 0.9999990440886318, iteration: 29587
loss: 0.9792546033859253,grad_norm: 0.8754515595550957, iteration: 29588
loss: 0.988994836807251,grad_norm: 0.7960713339523874, iteration: 29589
loss: 1.0119059085845947,grad_norm: 0.9999990672154708, iteration: 29590
loss: 1.015447735786438,grad_norm: 0.9305788799871099, iteration: 29591
loss: 0.9975193738937378,grad_norm: 0.9999990722192201, iteration: 29592
loss: 0.9797409176826477,grad_norm: 0.999999022292979, iteration: 29593
loss: 1.0316190719604492,grad_norm: 0.9999995454497264, iteration: 29594
loss: 0.9854246377944946,grad_norm: 0.9999991550845778, iteration: 29595
loss: 0.9951740503311157,grad_norm: 0.9999994039348169, iteration: 29596
loss: 1.0269253253936768,grad_norm: 0.9740159595818101, iteration: 29597
loss: 0.9960486888885498,grad_norm: 0.9924388055072315, iteration: 29598
loss: 1.0060104131698608,grad_norm: 0.9999995902597352, iteration: 29599
loss: 1.0175648927688599,grad_norm: 0.9728750911772287, iteration: 29600
loss: 1.0149288177490234,grad_norm: 0.9999990508644919, iteration: 29601
loss: 1.0033589601516724,grad_norm: 0.8808355289363385, iteration: 29602
loss: 1.0278637409210205,grad_norm: 0.9999990580117774, iteration: 29603
loss: 0.9985359311103821,grad_norm: 0.9999991008488651, iteration: 29604
loss: 1.006317377090454,grad_norm: 0.95725660607025, iteration: 29605
loss: 1.0139129161834717,grad_norm: 0.999999019312359, iteration: 29606
loss: 0.9858015775680542,grad_norm: 0.9495114491078451, iteration: 29607
loss: 1.005540132522583,grad_norm: 0.955554850960027, iteration: 29608
loss: 1.0082471370697021,grad_norm: 0.9999992772976094, iteration: 29609
loss: 1.0549489259719849,grad_norm: 0.9999991450389457, iteration: 29610
loss: 1.0117141008377075,grad_norm: 0.9999991858273592, iteration: 29611
loss: 0.9686291813850403,grad_norm: 0.9498208969099246, iteration: 29612
loss: 1.0449665784835815,grad_norm: 0.9999994781114064, iteration: 29613
loss: 0.9881633520126343,grad_norm: 0.9755085072363755, iteration: 29614
loss: 0.9728518128395081,grad_norm: 0.9999995566722986, iteration: 29615
loss: 0.980591893196106,grad_norm: 0.9834927365756932, iteration: 29616
loss: 1.0095661878585815,grad_norm: 0.9999993814858199, iteration: 29617
loss: 1.006015419960022,grad_norm: 0.9999990702634493, iteration: 29618
loss: 0.9966866374015808,grad_norm: 0.999999179211233, iteration: 29619
loss: 1.0098214149475098,grad_norm: 0.9006447191215832, iteration: 29620
loss: 1.0369105339050293,grad_norm: 0.7394886899390963, iteration: 29621
loss: 1.0377111434936523,grad_norm: 0.9999990838472903, iteration: 29622
loss: 0.9969708919525146,grad_norm: 0.8717678585741, iteration: 29623
loss: 1.0538626909255981,grad_norm: 0.9999991013791074, iteration: 29624
loss: 1.0425899028778076,grad_norm: 0.9999992849726864, iteration: 29625
loss: 1.0021389722824097,grad_norm: 0.9441956657373651, iteration: 29626
loss: 1.0968739986419678,grad_norm: 0.9999995168128429, iteration: 29627
loss: 1.0487465858459473,grad_norm: 0.9999998870572732, iteration: 29628
loss: 0.9649516344070435,grad_norm: 0.9999991585243894, iteration: 29629
loss: 0.9861187934875488,grad_norm: 0.9339291810942887, iteration: 29630
loss: 1.034569501876831,grad_norm: 0.999999147612872, iteration: 29631
loss: 1.0058009624481201,grad_norm: 0.9999991546161049, iteration: 29632
loss: 1.0409274101257324,grad_norm: 0.9999991759738984, iteration: 29633
loss: 0.9893537163734436,grad_norm: 0.9999994226201779, iteration: 29634
loss: 1.0139588117599487,grad_norm: 0.9999990742699356, iteration: 29635
loss: 1.033408284187317,grad_norm: 0.9999990427799207, iteration: 29636
loss: 1.0276561975479126,grad_norm: 0.93584523284839, iteration: 29637
loss: 1.0084041357040405,grad_norm: 0.896267739177907, iteration: 29638
loss: 1.0135313272476196,grad_norm: 0.9999992615617895, iteration: 29639
loss: 0.9584590196609497,grad_norm: 0.9999989712015218, iteration: 29640
loss: 0.9960362911224365,grad_norm: 0.9999994739396602, iteration: 29641
loss: 1.0275323390960693,grad_norm: 0.9884221442293196, iteration: 29642
loss: 1.0588221549987793,grad_norm: 0.9999992174407446, iteration: 29643
loss: 1.0015815496444702,grad_norm: 0.9999991027211609, iteration: 29644
loss: 1.0776528120040894,grad_norm: 0.9999992005133461, iteration: 29645
loss: 0.9581335186958313,grad_norm: 0.8528387599557032, iteration: 29646
loss: 1.0148581266403198,grad_norm: 0.9529830357148176, iteration: 29647
loss: 1.0381284952163696,grad_norm: 0.9999996232458385, iteration: 29648
loss: 0.9598872661590576,grad_norm: 0.8592891406487393, iteration: 29649
loss: 1.014174461364746,grad_norm: 0.9999991327392135, iteration: 29650
loss: 1.011351466178894,grad_norm: 0.9999993953977562, iteration: 29651
loss: 1.002493977546692,grad_norm: 0.9999990910917327, iteration: 29652
loss: 1.0326242446899414,grad_norm: 0.9999993223926061, iteration: 29653
loss: 1.0340611934661865,grad_norm: 0.9999991047046746, iteration: 29654
loss: 1.0188535451889038,grad_norm: 0.9999991633352188, iteration: 29655
loss: 0.9862329959869385,grad_norm: 0.8802020494314, iteration: 29656
loss: 0.996555507183075,grad_norm: 0.9711237608123067, iteration: 29657
loss: 1.013979434967041,grad_norm: 0.7868327373921273, iteration: 29658
loss: 1.0201386213302612,grad_norm: 0.9155911945213134, iteration: 29659
loss: 1.0104962587356567,grad_norm: 0.9999991402738505, iteration: 29660
loss: 1.0494589805603027,grad_norm: 0.9999998075369998, iteration: 29661
loss: 1.0707370042800903,grad_norm: 0.9999999181550646, iteration: 29662
loss: 0.9625192284584045,grad_norm: 0.9999991020608998, iteration: 29663
loss: 0.9975976347923279,grad_norm: 0.9176537896346076, iteration: 29664
loss: 1.0078942775726318,grad_norm: 0.8591711816191372, iteration: 29665
loss: 1.004310965538025,grad_norm: 0.9999991812187642, iteration: 29666
loss: 1.107585072517395,grad_norm: 0.9999994951097853, iteration: 29667
loss: 1.0335440635681152,grad_norm: 0.9999991787979781, iteration: 29668
loss: 1.0032613277435303,grad_norm: 0.9999990509484552, iteration: 29669
loss: 0.979583740234375,grad_norm: 0.9615012436048356, iteration: 29670
loss: 0.9806035161018372,grad_norm: 0.823621735371781, iteration: 29671
loss: 1.0166198015213013,grad_norm: 0.9999991875060127, iteration: 29672
loss: 1.0352237224578857,grad_norm: 0.9999991488370331, iteration: 29673
loss: 0.9784835577011108,grad_norm: 0.9999992601290473, iteration: 29674
loss: 0.965756356716156,grad_norm: 0.8769799485172279, iteration: 29675
loss: 1.0035227537155151,grad_norm: 0.9325745391012248, iteration: 29676
loss: 1.0118401050567627,grad_norm: 0.9999992302538184, iteration: 29677
loss: 1.0065568685531616,grad_norm: 0.9999990673804213, iteration: 29678
loss: 1.0192292928695679,grad_norm: 0.9403245733666612, iteration: 29679
loss: 1.022778034210205,grad_norm: 0.9999990950444536, iteration: 29680
loss: 0.9942701458930969,grad_norm: 0.9999991025599123, iteration: 29681
loss: 1.0215065479278564,grad_norm: 0.9999992976241942, iteration: 29682
loss: 1.0320146083831787,grad_norm: 0.999434544541148, iteration: 29683
loss: 0.9442542195320129,grad_norm: 0.9781098909871841, iteration: 29684
loss: 1.024577260017395,grad_norm: 0.992081469030528, iteration: 29685
loss: 0.9962248802185059,grad_norm: 0.9848309833022821, iteration: 29686
loss: 0.9672847986221313,grad_norm: 0.9927113064975709, iteration: 29687
loss: 0.9878833889961243,grad_norm: 0.8815130680320871, iteration: 29688
loss: 1.024540901184082,grad_norm: 0.9999991594477249, iteration: 29689
loss: 0.9721868634223938,grad_norm: 0.9404136520711305, iteration: 29690
loss: 0.9823188185691833,grad_norm: 0.891155603096697, iteration: 29691
loss: 1.0189746618270874,grad_norm: 0.9999989966497874, iteration: 29692
loss: 1.0244114398956299,grad_norm: 0.9999991930790386, iteration: 29693
loss: 1.0691337585449219,grad_norm: 0.9999992160722915, iteration: 29694
loss: 0.9845649600028992,grad_norm: 0.9999990841783981, iteration: 29695
loss: 0.9943639039993286,grad_norm: 0.9664579764265087, iteration: 29696
loss: 0.9703055024147034,grad_norm: 0.9271636471516135, iteration: 29697
loss: 1.0192910432815552,grad_norm: 0.9999992006374687, iteration: 29698
loss: 1.0239369869232178,grad_norm: 0.8831997845491798, iteration: 29699
loss: 1.0417593717575073,grad_norm: 0.999999203826303, iteration: 29700
loss: 1.0094798803329468,grad_norm: 0.9686758955348824, iteration: 29701
loss: 1.0016365051269531,grad_norm: 0.9701147449840227, iteration: 29702
loss: 0.9978238344192505,grad_norm: 0.9999991292447887, iteration: 29703
loss: 1.031353235244751,grad_norm: 0.9999991644436095, iteration: 29704
loss: 0.9977784156799316,grad_norm: 0.9999991760680386, iteration: 29705
loss: 1.0010672807693481,grad_norm: 0.9999992510879158, iteration: 29706
loss: 1.0008773803710938,grad_norm: 0.9586978273591569, iteration: 29707
loss: 0.996036946773529,grad_norm: 0.9974084517125781, iteration: 29708
loss: 1.0454691648483276,grad_norm: 0.999999365163024, iteration: 29709
loss: 1.1098159551620483,grad_norm: 0.9999998046822703, iteration: 29710
loss: 1.0249524116516113,grad_norm: 0.9999990926616891, iteration: 29711
loss: 1.0001169443130493,grad_norm: 0.860035347714911, iteration: 29712
loss: 0.9845703840255737,grad_norm: 0.9999991511888485, iteration: 29713
loss: 0.9936750531196594,grad_norm: 0.732829433744297, iteration: 29714
loss: 0.9968072772026062,grad_norm: 0.8949407959980943, iteration: 29715
loss: 1.00563645362854,grad_norm: 0.98990638638602, iteration: 29716
loss: 1.0034478902816772,grad_norm: 0.9468558824479243, iteration: 29717
loss: 1.0159571170806885,grad_norm: 0.999999154817827, iteration: 29718
loss: 0.9786096811294556,grad_norm: 0.8814788114633735, iteration: 29719
loss: 1.0214476585388184,grad_norm: 0.999999237068423, iteration: 29720
loss: 0.9684378504753113,grad_norm: 0.9361949428759684, iteration: 29721
loss: 0.9992477297782898,grad_norm: 0.9009045783057154, iteration: 29722
loss: 1.0497496128082275,grad_norm: 0.9999989960555208, iteration: 29723
loss: 0.9975423812866211,grad_norm: 0.9999990827681908, iteration: 29724
loss: 1.041200041770935,grad_norm: 0.9999996011497877, iteration: 29725
loss: 1.0065035820007324,grad_norm: 0.9999991034966325, iteration: 29726
loss: 1.0124462842941284,grad_norm: 0.9999990981159528, iteration: 29727
loss: 1.0751476287841797,grad_norm: 0.9999997637645329, iteration: 29728
loss: 1.019863247871399,grad_norm: 0.999999415196088, iteration: 29729
loss: 0.9725368022918701,grad_norm: 0.9051770143249247, iteration: 29730
loss: 1.0670435428619385,grad_norm: 0.9999994965601046, iteration: 29731
loss: 1.0285005569458008,grad_norm: 0.9453796380289534, iteration: 29732
loss: 0.9967896938323975,grad_norm: 0.9999990261009356, iteration: 29733
loss: 1.0147159099578857,grad_norm: 0.9144510720741694, iteration: 29734
loss: 1.0478038787841797,grad_norm: 0.9999991900920105, iteration: 29735
loss: 0.9819574356079102,grad_norm: 0.9999990649774868, iteration: 29736
loss: 1.037047028541565,grad_norm: 0.9999997457289904, iteration: 29737
loss: 1.0245858430862427,grad_norm: 0.9999991317388085, iteration: 29738
loss: 1.0012905597686768,grad_norm: 0.9478633960075009, iteration: 29739
loss: 1.0330392122268677,grad_norm: 0.9999992987922557, iteration: 29740
loss: 1.0269463062286377,grad_norm: 0.7816871022243667, iteration: 29741
loss: 0.9856215715408325,grad_norm: 0.9999991651412666, iteration: 29742
loss: 1.0347826480865479,grad_norm: 0.9999991392821523, iteration: 29743
loss: 1.0285303592681885,grad_norm: 0.9999992655860569, iteration: 29744
loss: 1.0165523290634155,grad_norm: 0.9601930049290418, iteration: 29745
loss: 0.9850305914878845,grad_norm: 0.95773908299593, iteration: 29746
loss: 1.0309566259384155,grad_norm: 0.9164973081678722, iteration: 29747
loss: 1.0024895668029785,grad_norm: 0.9918274428423083, iteration: 29748
loss: 1.0298341512680054,grad_norm: 0.8346375434063178, iteration: 29749
loss: 1.005291223526001,grad_norm: 0.9999991197461321, iteration: 29750
loss: 0.99673992395401,grad_norm: 0.9999991365506719, iteration: 29751
loss: 1.009555697441101,grad_norm: 0.9999992655140815, iteration: 29752
loss: 1.001731514930725,grad_norm: 0.9600443565875274, iteration: 29753
loss: 1.006917953491211,grad_norm: 0.7671563662646681, iteration: 29754
loss: 1.0040704011917114,grad_norm: 0.886876306960913, iteration: 29755
loss: 1.0486050844192505,grad_norm: 0.9804068202972072, iteration: 29756
loss: 1.0283814668655396,grad_norm: 0.9552767331021601, iteration: 29757
loss: 0.9714150428771973,grad_norm: 0.9999990023215894, iteration: 29758
loss: 1.0313948392868042,grad_norm: 0.9834574618919678, iteration: 29759
loss: 0.9958121180534363,grad_norm: 0.9271855916149258, iteration: 29760
loss: 0.9838090538978577,grad_norm: 0.9999990309564588, iteration: 29761
loss: 1.0292803049087524,grad_norm: 0.9999991697312555, iteration: 29762
loss: 0.9731053709983826,grad_norm: 0.9999993004916077, iteration: 29763
loss: 1.0287463665008545,grad_norm: 0.9999992111903062, iteration: 29764
loss: 0.9843893051147461,grad_norm: 0.949938622251957, iteration: 29765
loss: 1.011289119720459,grad_norm: 0.7530637683396488, iteration: 29766
loss: 1.0407525300979614,grad_norm: 0.9999991907902421, iteration: 29767
loss: 1.0030370950698853,grad_norm: 0.9679671772835345, iteration: 29768
loss: 1.0170525312423706,grad_norm: 0.9999992149216186, iteration: 29769
loss: 0.99220210313797,grad_norm: 0.8954124322203907, iteration: 29770
loss: 1.0050400495529175,grad_norm: 0.9999993793017907, iteration: 29771
loss: 1.0079706907272339,grad_norm: 0.9927474543014112, iteration: 29772
loss: 1.0051448345184326,grad_norm: 0.9993163920594317, iteration: 29773
loss: 1.0196529626846313,grad_norm: 0.9999994144320358, iteration: 29774
loss: 1.0483413934707642,grad_norm: 0.999999256657696, iteration: 29775
loss: 1.0137317180633545,grad_norm: 0.9999990768666878, iteration: 29776
loss: 0.9995129704475403,grad_norm: 0.8587992821839165, iteration: 29777
loss: 1.016998291015625,grad_norm: 0.9241703706896061, iteration: 29778
loss: 1.0355746746063232,grad_norm: 0.9999992464828776, iteration: 29779
loss: 1.0069011449813843,grad_norm: 0.969870383212704, iteration: 29780
loss: 1.0215517282485962,grad_norm: 0.9999992015571786, iteration: 29781
loss: 0.9840625524520874,grad_norm: 0.999999043380522, iteration: 29782
loss: 0.968985378742218,grad_norm: 0.9999989670274451, iteration: 29783
loss: 1.0516130924224854,grad_norm: 0.9999991446597204, iteration: 29784
loss: 0.9568225145339966,grad_norm: 0.9999991407469953, iteration: 29785
loss: 1.0412284135818481,grad_norm: 0.9984443605169264, iteration: 29786
loss: 1.0103647708892822,grad_norm: 0.9958599545895193, iteration: 29787
loss: 1.0095449686050415,grad_norm: 0.9999992274931628, iteration: 29788
loss: 1.0168660879135132,grad_norm: 0.9999991723338699, iteration: 29789
loss: 1.018843173980713,grad_norm: 0.9955170633334405, iteration: 29790
loss: 0.989132285118103,grad_norm: 0.8976831040328962, iteration: 29791
loss: 1.000156044960022,grad_norm: 0.9999991335555937, iteration: 29792
loss: 1.0128748416900635,grad_norm: 0.9999989370427685, iteration: 29793
loss: 1.01027512550354,grad_norm: 0.9999991038803732, iteration: 29794
loss: 0.9469634294509888,grad_norm: 0.9999991263225935, iteration: 29795
loss: 1.0226656198501587,grad_norm: 0.8112085202054778, iteration: 29796
loss: 0.9677205681800842,grad_norm: 0.9999991150118592, iteration: 29797
loss: 1.0079721212387085,grad_norm: 0.9929837415241994, iteration: 29798
loss: 1.0369547605514526,grad_norm: 0.9999991948603275, iteration: 29799
loss: 1.029842495918274,grad_norm: 0.99999924363862, iteration: 29800
loss: 0.998042643070221,grad_norm: 0.9999990542398053, iteration: 29801
loss: 1.0192766189575195,grad_norm: 0.8199344651283665, iteration: 29802
loss: 0.9965299367904663,grad_norm: 0.9999990216441933, iteration: 29803
loss: 0.9971204996109009,grad_norm: 0.9999992994732488, iteration: 29804
loss: 0.9734930992126465,grad_norm: 0.9999993147027837, iteration: 29805
loss: 0.9681749939918518,grad_norm: 0.9999991898127252, iteration: 29806
loss: 0.9714548587799072,grad_norm: 0.9999997938410397, iteration: 29807
loss: 1.0161449909210205,grad_norm: 0.9999989981761994, iteration: 29808
loss: 0.9718785881996155,grad_norm: 0.9491841137010778, iteration: 29809
loss: 1.0020561218261719,grad_norm: 0.9999992711620711, iteration: 29810
loss: 0.976192057132721,grad_norm: 0.9999989215546415, iteration: 29811
loss: 0.9860378503799438,grad_norm: 0.945790512280624, iteration: 29812
loss: 1.0125476121902466,grad_norm: 0.8003914803848692, iteration: 29813
loss: 0.9975274801254272,grad_norm: 0.9999991096907654, iteration: 29814
loss: 0.9743155241012573,grad_norm: 0.9999992346845238, iteration: 29815
loss: 1.0089824199676514,grad_norm: 0.9999990724453783, iteration: 29816
loss: 1.0247821807861328,grad_norm: 0.9999991726606813, iteration: 29817
loss: 1.0330010652542114,grad_norm: 0.9999990714474448, iteration: 29818
loss: 1.0445657968521118,grad_norm: 0.9884323291885543, iteration: 29819
loss: 0.9609543085098267,grad_norm: 0.9999992082771199, iteration: 29820
loss: 1.0001457929611206,grad_norm: 0.9203671602100566, iteration: 29821
loss: 0.9881876111030579,grad_norm: 0.9999996481932634, iteration: 29822
loss: 1.0260969400405884,grad_norm: 0.9010544472479928, iteration: 29823
loss: 0.9897571206092834,grad_norm: 0.9402738065866919, iteration: 29824
loss: 1.0372642278671265,grad_norm: 0.9706230701228378, iteration: 29825
loss: 1.0171984434127808,grad_norm: 0.999999110321181, iteration: 29826
loss: 0.9993622303009033,grad_norm: 0.863963615206148, iteration: 29827
loss: 1.0244741439819336,grad_norm: 0.9999991586703177, iteration: 29828
loss: 0.9869688749313354,grad_norm: 0.9999989695944694, iteration: 29829
loss: 1.0095388889312744,grad_norm: 0.9440340395334479, iteration: 29830
loss: 0.9987573623657227,grad_norm: 0.9999991483398333, iteration: 29831
loss: 1.0190465450286865,grad_norm: 0.9999991739915313, iteration: 29832
loss: 1.0385961532592773,grad_norm: 0.9567689394356941, iteration: 29833
loss: 1.0159142017364502,grad_norm: 0.8710147692017366, iteration: 29834
loss: 0.9975317120552063,grad_norm: 0.885535793211841, iteration: 29835
loss: 0.9775827527046204,grad_norm: 0.9999990266345491, iteration: 29836
loss: 0.9940668940544128,grad_norm: 0.8922593818755217, iteration: 29837
loss: 1.0065867900848389,grad_norm: 0.9847906411630056, iteration: 29838
loss: 1.0203779935836792,grad_norm: 0.966398326440475, iteration: 29839
loss: 0.9801643490791321,grad_norm: 0.8754158890020739, iteration: 29840
loss: 1.0071591138839722,grad_norm: 0.8235388648282738, iteration: 29841
loss: 0.9791375398635864,grad_norm: 0.9191611610818501, iteration: 29842
loss: 1.0064008235931396,grad_norm: 0.999999143987643, iteration: 29843
loss: 0.9786596894264221,grad_norm: 0.9999990576574097, iteration: 29844
loss: 0.9979672431945801,grad_norm: 0.9469690899567617, iteration: 29845
loss: 1.035627007484436,grad_norm: 0.8698938469172847, iteration: 29846
loss: 1.012160062789917,grad_norm: 0.901080430169804, iteration: 29847
loss: 0.944266676902771,grad_norm: 0.894828155243749, iteration: 29848
loss: 0.9766989350318909,grad_norm: 0.9999991544401247, iteration: 29849
loss: 1.054258108139038,grad_norm: 0.9999990579208452, iteration: 29850
loss: 1.0124222040176392,grad_norm: 0.9999991147377618, iteration: 29851
loss: 1.0277066230773926,grad_norm: 0.8946868260743279, iteration: 29852
loss: 1.0167044401168823,grad_norm: 0.9716138361073631, iteration: 29853
loss: 1.0205016136169434,grad_norm: 0.999999207901167, iteration: 29854
loss: 1.0033109188079834,grad_norm: 0.999999189144894, iteration: 29855
loss: 0.995324969291687,grad_norm: 0.9999991886426384, iteration: 29856
loss: 1.004986047744751,grad_norm: 0.9999989904606004, iteration: 29857
loss: 1.0085512399673462,grad_norm: 0.9999992080479453, iteration: 29858
loss: 1.0428094863891602,grad_norm: 0.988980270148922, iteration: 29859
loss: 1.0250946283340454,grad_norm: 0.9080371248627924, iteration: 29860
loss: 0.9865724444389343,grad_norm: 0.9999992167646576, iteration: 29861
loss: 1.0217702388763428,grad_norm: 0.9786411367228479, iteration: 29862
loss: 1.1225312948226929,grad_norm: 0.9999995336146883, iteration: 29863
loss: 0.9753666520118713,grad_norm: 0.9999991898297282, iteration: 29864
loss: 1.0234133005142212,grad_norm: 0.9999991467333625, iteration: 29865
loss: 1.0248007774353027,grad_norm: 0.916949432962206, iteration: 29866
loss: 1.024219274520874,grad_norm: 0.9204438688690934, iteration: 29867
loss: 1.0357683897018433,grad_norm: 0.8728504890013937, iteration: 29868
loss: 1.0118762254714966,grad_norm: 0.9999990885476784, iteration: 29869
loss: 1.0121248960494995,grad_norm: 0.9999990621787891, iteration: 29870
loss: 0.9955636262893677,grad_norm: 0.9999992742317545, iteration: 29871
loss: 1.0025321245193481,grad_norm: 0.8433659652676062, iteration: 29872
loss: 0.9676824808120728,grad_norm: 0.9999992948007272, iteration: 29873
loss: 1.022983193397522,grad_norm: 0.9624542767812093, iteration: 29874
loss: 0.9721489548683167,grad_norm: 0.9999990177927793, iteration: 29875
loss: 1.0427249670028687,grad_norm: 0.999999372568999, iteration: 29876
loss: 1.0038540363311768,grad_norm: 0.9999991839010219, iteration: 29877
loss: 0.9971211552619934,grad_norm: 0.9421147812043801, iteration: 29878
loss: 0.9978278875350952,grad_norm: 0.9110928337747164, iteration: 29879
loss: 1.0439534187316895,grad_norm: 0.9694028219861025, iteration: 29880
loss: 0.9898955821990967,grad_norm: 0.9999991842997007, iteration: 29881
loss: 1.0076148509979248,grad_norm: 0.9528354030045738, iteration: 29882
loss: 1.0192571878433228,grad_norm: 0.9999991280652138, iteration: 29883
loss: 1.0285195112228394,grad_norm: 0.9800365387421748, iteration: 29884
loss: 0.9856982827186584,grad_norm: 0.999999188885741, iteration: 29885
loss: 1.0068062543869019,grad_norm: 0.8269689466599097, iteration: 29886
loss: 1.0310393571853638,grad_norm: 0.9429485241669854, iteration: 29887
loss: 1.004530668258667,grad_norm: 0.9200696654199315, iteration: 29888
loss: 1.0326623916625977,grad_norm: 0.9131172521554721, iteration: 29889
loss: 1.0352845191955566,grad_norm: 0.902275224004434, iteration: 29890
loss: 1.0190850496292114,grad_norm: 0.9395554957545649, iteration: 29891
loss: 0.979486882686615,grad_norm: 0.999999095419936, iteration: 29892
loss: 1.0273253917694092,grad_norm: 0.9999995377502191, iteration: 29893
loss: 1.0079014301300049,grad_norm: 0.8997933901579606, iteration: 29894
loss: 1.0082833766937256,grad_norm: 0.9999994181382104, iteration: 29895
loss: 1.033268928527832,grad_norm: 0.9526620081125133, iteration: 29896
loss: 1.0021681785583496,grad_norm: 0.9449010340565775, iteration: 29897
loss: 0.9884902834892273,grad_norm: 0.9593685790658124, iteration: 29898
loss: 0.9962607622146606,grad_norm: 0.8552381673555555, iteration: 29899
loss: 1.0008996725082397,grad_norm: 0.9999992007340353, iteration: 29900
loss: 0.9640746712684631,grad_norm: 0.9872012484462391, iteration: 29901
loss: 1.0051604509353638,grad_norm: 0.9245874764466714, iteration: 29902
loss: 0.9814071655273438,grad_norm: 0.8585281751177842, iteration: 29903
loss: 1.0135958194732666,grad_norm: 0.9999990300242625, iteration: 29904
loss: 0.9843675494194031,grad_norm: 0.9555135890637344, iteration: 29905
loss: 1.0220502614974976,grad_norm: 0.9999992024123825, iteration: 29906
loss: 1.0023720264434814,grad_norm: 0.9999993604929389, iteration: 29907
loss: 0.9484074711799622,grad_norm: 0.9999991029555851, iteration: 29908
loss: 0.9996259808540344,grad_norm: 0.9999990179969799, iteration: 29909
loss: 0.9879223704338074,grad_norm: 0.9999990416369021, iteration: 29910
loss: 1.0471446514129639,grad_norm: 0.9999990485465458, iteration: 29911
loss: 0.9452199935913086,grad_norm: 0.8711291689207367, iteration: 29912
loss: 1.020468831062317,grad_norm: 0.9999991928769588, iteration: 29913
loss: 1.017843246459961,grad_norm: 0.8631693350860401, iteration: 29914
loss: 1.038378119468689,grad_norm: 0.9443669781875172, iteration: 29915
loss: 1.0053021907806396,grad_norm: 0.9999991486747277, iteration: 29916
loss: 1.0067391395568848,grad_norm: 0.9999992688959504, iteration: 29917
loss: 1.0100191831588745,grad_norm: 0.9999990625517172, iteration: 29918
loss: 0.9457642436027527,grad_norm: 0.9677962809302711, iteration: 29919
loss: 1.0097932815551758,grad_norm: 0.9999994926462507, iteration: 29920
loss: 0.9760485887527466,grad_norm: 0.9883471058211892, iteration: 29921
loss: 0.9884142279624939,grad_norm: 0.9005884266457146, iteration: 29922
loss: 1.0201951265335083,grad_norm: 0.999999103104235, iteration: 29923
loss: 0.9973810315132141,grad_norm: 0.9999991021759095, iteration: 29924
loss: 1.0179723501205444,grad_norm: 0.9906131393586304, iteration: 29925
loss: 0.9930513501167297,grad_norm: 0.9999991759235332, iteration: 29926
loss: 1.0171350240707397,grad_norm: 0.9999992332268758, iteration: 29927
loss: 0.9567957520484924,grad_norm: 0.897707131007741, iteration: 29928
loss: 0.9890203475952148,grad_norm: 0.9999991454984715, iteration: 29929
loss: 1.0081144571304321,grad_norm: 0.9585716993512193, iteration: 29930
loss: 1.0019235610961914,grad_norm: 0.9999994736341253, iteration: 29931
loss: 1.0384184122085571,grad_norm: 0.9999990664385153, iteration: 29932
loss: 0.9983518719673157,grad_norm: 0.8462295315194226, iteration: 29933
loss: 1.028293251991272,grad_norm: 0.7904076557291723, iteration: 29934
loss: 1.0316672325134277,grad_norm: 0.927507013408285, iteration: 29935
loss: 1.0447560548782349,grad_norm: 0.9999992766259956, iteration: 29936
loss: 0.9714838266372681,grad_norm: 0.9999991220509774, iteration: 29937
loss: 1.0416134595870972,grad_norm: 0.9999992740688787, iteration: 29938
loss: 1.0235471725463867,grad_norm: 0.9324453769833444, iteration: 29939
loss: 1.054000973701477,grad_norm: 0.999999333742957, iteration: 29940
loss: 1.0048227310180664,grad_norm: 0.9999993414744585, iteration: 29941
loss: 1.0337495803833008,grad_norm: 0.9043406850502873, iteration: 29942
loss: 0.9812532663345337,grad_norm: 0.9462724494115056, iteration: 29943
loss: 1.0152956247329712,grad_norm: 0.9999993002559848, iteration: 29944
loss: 1.00798499584198,grad_norm: 0.9999990852844987, iteration: 29945
loss: 1.0361554622650146,grad_norm: 0.9999991477990114, iteration: 29946
loss: 1.0242480039596558,grad_norm: 0.9265567012339117, iteration: 29947
loss: 1.0054126977920532,grad_norm: 0.8757197855125786, iteration: 29948
loss: 1.0565015077590942,grad_norm: 0.9999996161219529, iteration: 29949
loss: 0.9844779372215271,grad_norm: 0.995001020894272, iteration: 29950
loss: 0.9896456003189087,grad_norm: 0.999999075523236, iteration: 29951
loss: 1.0149617195129395,grad_norm: 0.9999999355505427, iteration: 29952
loss: 1.0091215372085571,grad_norm: 0.999999163088066, iteration: 29953
loss: 1.0260778665542603,grad_norm: 0.999999353679727, iteration: 29954
loss: 0.9806959629058838,grad_norm: 0.8333549403350293, iteration: 29955
loss: 1.0133237838745117,grad_norm: 0.9999989824619362, iteration: 29956
loss: 0.9516677856445312,grad_norm: 0.9067038317250847, iteration: 29957
loss: 1.0365378856658936,grad_norm: 0.9999991546827375, iteration: 29958
loss: 1.0259791612625122,grad_norm: 0.9999990548799148, iteration: 29959
loss: 1.04429292678833,grad_norm: 0.9999990716049524, iteration: 29960
loss: 1.0116902589797974,grad_norm: 0.9999992860564659, iteration: 29961
loss: 0.9837509393692017,grad_norm: 0.9692953492838701, iteration: 29962
loss: 0.9936018586158752,grad_norm: 0.9999990441388424, iteration: 29963
loss: 1.0054271221160889,grad_norm: 0.9999992401518594, iteration: 29964
loss: 1.0813997983932495,grad_norm: 0.9999992136599763, iteration: 29965
loss: 1.018282175064087,grad_norm: 0.7742537496792178, iteration: 29966
loss: 1.0124881267547607,grad_norm: 0.9999990933515196, iteration: 29967
loss: 1.026586651802063,grad_norm: 0.9710962296939947, iteration: 29968
loss: 0.9867997169494629,grad_norm: 0.9999990979409593, iteration: 29969
loss: 1.000678539276123,grad_norm: 0.9999990624215729, iteration: 29970
loss: 1.0308163166046143,grad_norm: 0.9999991973359764, iteration: 29971
loss: 1.0211546421051025,grad_norm: 0.999999037138036, iteration: 29972
loss: 1.000482201576233,grad_norm: 0.9999990857604687, iteration: 29973
loss: 1.0011917352676392,grad_norm: 0.9012033183343793, iteration: 29974
loss: 0.998028039932251,grad_norm: 0.9999991348406687, iteration: 29975
loss: 1.0011308193206787,grad_norm: 0.9054621574780122, iteration: 29976
loss: 0.9952213764190674,grad_norm: 0.9907902449642879, iteration: 29977
loss: 0.9866654872894287,grad_norm: 0.9586449803082869, iteration: 29978
loss: 1.0330713987350464,grad_norm: 0.832538710665235, iteration: 29979
loss: 0.9922996759414673,grad_norm: 0.9623019903414741, iteration: 29980
loss: 0.9651270508766174,grad_norm: 0.9999991181690919, iteration: 29981
loss: 1.0053037405014038,grad_norm: 0.8730828376777152, iteration: 29982
loss: 0.9950982332229614,grad_norm: 0.9999992510192699, iteration: 29983
loss: 1.059822916984558,grad_norm: 0.9214319592310082, iteration: 29984
loss: 1.0116217136383057,grad_norm: 0.9999992015777002, iteration: 29985
loss: 0.9926052689552307,grad_norm: 0.9303638412411005, iteration: 29986
loss: 1.0465885400772095,grad_norm: 0.9999993322111954, iteration: 29987
loss: 0.9963514804840088,grad_norm: 0.8620736628735467, iteration: 29988
loss: 1.0260645151138306,grad_norm: 0.872507579337657, iteration: 29989
loss: 1.0120407342910767,grad_norm: 0.8826137772657746, iteration: 29990
loss: 0.9874864816665649,grad_norm: 0.8325297878784021, iteration: 29991
loss: 0.9869273900985718,grad_norm: 0.9805420989918756, iteration: 29992
loss: 1.001974105834961,grad_norm: 0.873067992779804, iteration: 29993
loss: 0.9792375564575195,grad_norm: 0.9640273391112837, iteration: 29994
loss: 1.026302695274353,grad_norm: 0.9999990669594692, iteration: 29995
loss: 0.9832298159599304,grad_norm: 0.9117493565481195, iteration: 29996
loss: 0.997111976146698,grad_norm: 0.9999992839552061, iteration: 29997
loss: 1.0314686298370361,grad_norm: 0.9999991098178517, iteration: 29998
loss: 1.0244295597076416,grad_norm: 0.9999989834291174, iteration: 29999
loss: 1.0052940845489502,grad_norm: 0.9999990611269427, iteration: 30000
Evaluating at step 30000
{'val': 0.9989889487624168, 'test': 2.7547546209999547}
loss: 1.0109968185424805,grad_norm: 0.9999991145949155, iteration: 30001
loss: 1.0248496532440186,grad_norm: 0.9541938017269951, iteration: 30002
loss: 0.9997978210449219,grad_norm: 0.9999991290536234, iteration: 30003
loss: 1.0358920097351074,grad_norm: 0.9999999538658986, iteration: 30004
loss: 1.019095778465271,grad_norm: 0.9642014221855904, iteration: 30005
loss: 1.0024648904800415,grad_norm: 0.999999174602137, iteration: 30006
loss: 1.0089079141616821,grad_norm: 0.9235960709125682, iteration: 30007
loss: 1.010505199432373,grad_norm: 0.8121893356610559, iteration: 30008
loss: 0.9539688229560852,grad_norm: 0.9525055222510649, iteration: 30009
loss: 1.0152184963226318,grad_norm: 0.7959394523812882, iteration: 30010
loss: 1.0220108032226562,grad_norm: 0.9999990285001579, iteration: 30011
loss: 1.0479580163955688,grad_norm: 0.9999994874063387, iteration: 30012
loss: 0.9981176257133484,grad_norm: 0.9315523172571404, iteration: 30013
loss: 1.0461146831512451,grad_norm: 0.8230413911655193, iteration: 30014
loss: 0.9500018954277039,grad_norm: 0.9999994885849391, iteration: 30015
loss: 1.0414979457855225,grad_norm: 0.9206687285261052, iteration: 30016
loss: 0.9780977964401245,grad_norm: 0.9555050873688887, iteration: 30017
loss: 0.98408043384552,grad_norm: 0.9999990873701979, iteration: 30018
loss: 1.0242937803268433,grad_norm: 0.9876726975304998, iteration: 30019
loss: 0.9866728186607361,grad_norm: 0.9999991360008168, iteration: 30020
loss: 1.0074313879013062,grad_norm: 0.9999990046554434, iteration: 30021
loss: 0.9955974221229553,grad_norm: 0.8837266911608148, iteration: 30022
loss: 1.0333044528961182,grad_norm: 0.9999992100958226, iteration: 30023
loss: 1.040365219116211,grad_norm: 0.9999991033344436, iteration: 30024
loss: 1.0129578113555908,grad_norm: 0.9586252347493752, iteration: 30025
loss: 1.0223299264907837,grad_norm: 0.8241520369732352, iteration: 30026
loss: 1.0129170417785645,grad_norm: 0.935989558218775, iteration: 30027
loss: 1.0287859439849854,grad_norm: 0.9999991745062078, iteration: 30028
loss: 0.9515069127082825,grad_norm: 0.999999256095677, iteration: 30029
loss: 1.0246069431304932,grad_norm: 0.9785329000887644, iteration: 30030
loss: 0.968052864074707,grad_norm: 0.9999990756854622, iteration: 30031
loss: 1.0224952697753906,grad_norm: 0.9999991197440496, iteration: 30032
loss: 0.9421019554138184,grad_norm: 0.9902819145431152, iteration: 30033
loss: 0.995508074760437,grad_norm: 0.9999992939923821, iteration: 30034
loss: 1.0319243669509888,grad_norm: 0.9999992157763172, iteration: 30035
loss: 1.0032777786254883,grad_norm: 0.9999990051580807, iteration: 30036
loss: 1.0543192625045776,grad_norm: 0.999834051687976, iteration: 30037
loss: 1.0054720640182495,grad_norm: 0.9999990877057943, iteration: 30038
loss: 0.982859194278717,grad_norm: 0.9999990869169332, iteration: 30039
loss: 0.9761437177658081,grad_norm: 0.9999990615335762, iteration: 30040
loss: 1.0096235275268555,grad_norm: 0.9999992366942876, iteration: 30041
loss: 0.9438549876213074,grad_norm: 0.9999991296146253, iteration: 30042
loss: 0.9827465415000916,grad_norm: 0.9864226300814184, iteration: 30043
loss: 1.0464069843292236,grad_norm: 0.9394458520003015, iteration: 30044
loss: 1.0057977437973022,grad_norm: 0.9999992166557368, iteration: 30045
loss: 1.0487576723098755,grad_norm: 0.9999992717447722, iteration: 30046
loss: 0.9946198463439941,grad_norm: 0.9438479325372642, iteration: 30047
loss: 1.0115121603012085,grad_norm: 0.9350868634791062, iteration: 30048
loss: 1.0158777236938477,grad_norm: 0.9539978270737095, iteration: 30049
loss: 0.9892089366912842,grad_norm: 0.9999991629323529, iteration: 30050
loss: 1.0392484664916992,grad_norm: 0.9999991316146252, iteration: 30051
loss: 0.9979112148284912,grad_norm: 0.9198084584640887, iteration: 30052
loss: 0.9786167740821838,grad_norm: 0.999999202729543, iteration: 30053
loss: 1.0124014616012573,grad_norm: 0.9999990391182554, iteration: 30054
loss: 0.9866586923599243,grad_norm: 0.9999991137907238, iteration: 30055
loss: 1.0039547681808472,grad_norm: 0.8801451808976896, iteration: 30056
loss: 1.0292762517929077,grad_norm: 0.9999990435430893, iteration: 30057
loss: 1.0134719610214233,grad_norm: 0.9526262231815724, iteration: 30058
loss: 1.0221617221832275,grad_norm: 0.9999992551051101, iteration: 30059
loss: 0.9847575426101685,grad_norm: 0.9999989167563466, iteration: 30060
loss: 0.986366868019104,grad_norm: 0.9999991982417753, iteration: 30061
loss: 0.9888602495193481,grad_norm: 0.8230157721520656, iteration: 30062
loss: 1.0071653127670288,grad_norm: 0.999999188136447, iteration: 30063
loss: 1.0142502784729004,grad_norm: 0.999999123156461, iteration: 30064
loss: 0.9517303705215454,grad_norm: 0.9999991176326933, iteration: 30065
loss: 1.0399796962738037,grad_norm: 0.9999989933500496, iteration: 30066
loss: 1.0307857990264893,grad_norm: 0.9999991335356107, iteration: 30067
loss: 1.0405011177062988,grad_norm: 0.9999991087928871, iteration: 30068
loss: 1.0471315383911133,grad_norm: 0.8864661198303807, iteration: 30069
loss: 0.9862480163574219,grad_norm: 0.8480221075106453, iteration: 30070
loss: 1.0598970651626587,grad_norm: 0.9999996253820606, iteration: 30071
loss: 0.9921634197235107,grad_norm: 0.9999991136854671, iteration: 30072
loss: 0.9970998167991638,grad_norm: 0.9999990867264019, iteration: 30073
loss: 0.9947341680526733,grad_norm: 0.9913116510100737, iteration: 30074
loss: 1.0327672958374023,grad_norm: 0.8327950134566043, iteration: 30075
loss: 0.9943199753761292,grad_norm: 0.9999992327326551, iteration: 30076
loss: 1.0071872472763062,grad_norm: 0.9999991010456142, iteration: 30077
loss: 0.9977819323539734,grad_norm: 0.9238878749447446, iteration: 30078
loss: 1.0460810661315918,grad_norm: 0.9999991300078769, iteration: 30079
loss: 0.9974044561386108,grad_norm: 0.9999991998630384, iteration: 30080
loss: 0.9875193238258362,grad_norm: 0.8914148031741751, iteration: 30081
loss: 0.9858190417289734,grad_norm: 0.9999989751722161, iteration: 30082
loss: 1.0341582298278809,grad_norm: 0.9999991260076833, iteration: 30083
loss: 1.0056809186935425,grad_norm: 0.9999992556218574, iteration: 30084
loss: 1.0046899318695068,grad_norm: 0.9999991595259925, iteration: 30085
loss: 0.9614040851593018,grad_norm: 0.8961188792514636, iteration: 30086
loss: 0.9783366918563843,grad_norm: 0.8855624398632347, iteration: 30087
loss: 1.02880859375,grad_norm: 0.861068093143534, iteration: 30088
loss: 1.0090763568878174,grad_norm: 0.9999991736605057, iteration: 30089
loss: 1.0234992504119873,grad_norm: 0.9999997329672126, iteration: 30090
loss: 1.011035442352295,grad_norm: 0.9999991830232825, iteration: 30091
loss: 1.0476765632629395,grad_norm: 0.9999994480443072, iteration: 30092
loss: 1.025701642036438,grad_norm: 0.9999990413314235, iteration: 30093
loss: 1.0475841760635376,grad_norm: 0.9999993531872609, iteration: 30094
loss: 1.030419945716858,grad_norm: 0.9999992239017241, iteration: 30095
loss: 1.016487717628479,grad_norm: 0.9999990817900699, iteration: 30096
loss: 0.9960840344429016,grad_norm: 0.9999990524274782, iteration: 30097
loss: 1.0153347253799438,grad_norm: 0.9999991295250644, iteration: 30098
loss: 1.0397108793258667,grad_norm: 0.9567629920440536, iteration: 30099
loss: 0.9950167536735535,grad_norm: 0.9999993863174924, iteration: 30100
loss: 1.0072282552719116,grad_norm: 0.875591883191606, iteration: 30101
loss: 0.9840489625930786,grad_norm: 0.94148178070624, iteration: 30102
loss: 1.0260862112045288,grad_norm: 0.9468143154874469, iteration: 30103
loss: 1.040428638458252,grad_norm: 0.9999989984829314, iteration: 30104
loss: 0.9934934973716736,grad_norm: 0.9130341639007561, iteration: 30105
loss: 1.0139012336730957,grad_norm: 0.9999993781779494, iteration: 30106
loss: 0.9942466020584106,grad_norm: 0.9818898294502663, iteration: 30107
loss: 1.0269290208816528,grad_norm: 0.7730967596100429, iteration: 30108
loss: 0.9732509851455688,grad_norm: 0.887255185123153, iteration: 30109
loss: 1.0102423429489136,grad_norm: 0.9999991112833899, iteration: 30110
loss: 0.9610756039619446,grad_norm: 0.9999992455229465, iteration: 30111
loss: 0.978452742099762,grad_norm: 0.9999989993687283, iteration: 30112
loss: 1.0121580362319946,grad_norm: 0.9999991293099211, iteration: 30113
loss: 1.009566307067871,grad_norm: 0.9999990300357483, iteration: 30114
loss: 1.0899815559387207,grad_norm: 0.9999996197816938, iteration: 30115
loss: 1.0063813924789429,grad_norm: 0.999999181180387, iteration: 30116
loss: 0.9756959676742554,grad_norm: 0.8690638219296766, iteration: 30117
loss: 0.9971776604652405,grad_norm: 0.9999991423306352, iteration: 30118
loss: 1.0042619705200195,grad_norm: 0.8338953344427414, iteration: 30119
loss: 1.0156612396240234,grad_norm: 0.9876945731955676, iteration: 30120
loss: 0.9998863339424133,grad_norm: 0.870673881232131, iteration: 30121
loss: 1.0062469244003296,grad_norm: 0.9943421101914516, iteration: 30122
loss: 0.9983899593353271,grad_norm: 0.9402119012950586, iteration: 30123
loss: 1.0022838115692139,grad_norm: 0.8514843866032172, iteration: 30124
loss: 1.0575547218322754,grad_norm: 0.99999975130328, iteration: 30125
loss: 0.9972383379936218,grad_norm: 0.9748757277626431, iteration: 30126
loss: 0.9893969297409058,grad_norm: 0.9651473868555036, iteration: 30127
loss: 0.988598108291626,grad_norm: 0.9999992086184923, iteration: 30128
loss: 1.0132014751434326,grad_norm: 0.8796651138893377, iteration: 30129
loss: 1.0321898460388184,grad_norm: 0.9170425541282241, iteration: 30130
loss: 0.9826241135597229,grad_norm: 0.999999194902078, iteration: 30131
loss: 1.0355339050292969,grad_norm: 0.9999991931920346, iteration: 30132
loss: 1.0197957754135132,grad_norm: 0.9797501802189466, iteration: 30133
loss: 1.0011005401611328,grad_norm: 0.9999993112794048, iteration: 30134
loss: 1.0573325157165527,grad_norm: 0.9999991267870477, iteration: 30135
loss: 0.9802375435829163,grad_norm: 0.9999992363052858, iteration: 30136
loss: 1.010122299194336,grad_norm: 0.9999990073087153, iteration: 30137
loss: 1.0063064098358154,grad_norm: 0.9999990751953725, iteration: 30138
loss: 0.9829412698745728,grad_norm: 0.9999996363452075, iteration: 30139
loss: 0.9897065758705139,grad_norm: 0.9412408596705537, iteration: 30140
loss: 1.0223406553268433,grad_norm: 0.9999991079746937, iteration: 30141
loss: 1.0360407829284668,grad_norm: 0.9999822906334093, iteration: 30142
loss: 1.0270512104034424,grad_norm: 0.9632046788919639, iteration: 30143
loss: 1.0113003253936768,grad_norm: 0.9999991294625842, iteration: 30144
loss: 0.9945234656333923,grad_norm: 0.9999990828533778, iteration: 30145
loss: 1.0173351764678955,grad_norm: 0.9999989921424527, iteration: 30146
loss: 1.008257508277893,grad_norm: 0.9637018463042397, iteration: 30147
loss: 1.0305776596069336,grad_norm: 0.9999990396600241, iteration: 30148
loss: 0.9984990358352661,grad_norm: 0.9999989852024063, iteration: 30149
loss: 1.008756399154663,grad_norm: 0.9999988825435242, iteration: 30150
loss: 1.014359951019287,grad_norm: 0.999999186783575, iteration: 30151
loss: 1.0310741662979126,grad_norm: 0.9060475931900369, iteration: 30152
loss: 1.030218482017517,grad_norm: 0.9999992296217802, iteration: 30153
loss: 0.9983841180801392,grad_norm: 0.9999990604611402, iteration: 30154
loss: 0.9938610196113586,grad_norm: 0.9999991696850519, iteration: 30155
loss: 0.9930408000946045,grad_norm: 0.9999989970210559, iteration: 30156
loss: 0.9879438877105713,grad_norm: 0.9999991233986114, iteration: 30157
loss: 1.0323973894119263,grad_norm: 0.9049951562143772, iteration: 30158
loss: 0.9659499526023865,grad_norm: 0.8434521924616432, iteration: 30159
loss: 0.9796915054321289,grad_norm: 0.9999991019879442, iteration: 30160
loss: 0.9858998656272888,grad_norm: 0.8904385467913285, iteration: 30161
loss: 0.9830831289291382,grad_norm: 0.9058240950638031, iteration: 30162
loss: 0.9825800061225891,grad_norm: 0.8157824757743963, iteration: 30163
loss: 1.0200717449188232,grad_norm: 0.8487702818514823, iteration: 30164
loss: 1.028294324874878,grad_norm: 0.933538856407364, iteration: 30165
loss: 1.0232912302017212,grad_norm: 0.9038715027755982, iteration: 30166
loss: 1.038212776184082,grad_norm: 0.9999998259236494, iteration: 30167
loss: 1.0332392454147339,grad_norm: 0.9634223456164971, iteration: 30168
loss: 1.0226227045059204,grad_norm: 0.9603322219481192, iteration: 30169
loss: 1.028864860534668,grad_norm: 0.8653217999500241, iteration: 30170
loss: 1.008754849433899,grad_norm: 0.9073236939617773, iteration: 30171
loss: 1.0395214557647705,grad_norm: 0.9159268481366524, iteration: 30172
loss: 0.9853145480155945,grad_norm: 0.9999990800203, iteration: 30173
loss: 1.0364121198654175,grad_norm: 0.9999992052680851, iteration: 30174
loss: 1.0044608116149902,grad_norm: 0.9999992077427697, iteration: 30175
loss: 0.9787349104881287,grad_norm: 0.8721553639947696, iteration: 30176
loss: 1.0136427879333496,grad_norm: 0.9224729619255513, iteration: 30177
loss: 1.0142912864685059,grad_norm: 0.999999190902253, iteration: 30178
loss: 1.0342589616775513,grad_norm: 0.9999994797035107, iteration: 30179
loss: 1.0163912773132324,grad_norm: 0.9999991993699747, iteration: 30180
loss: 1.0300976037979126,grad_norm: 0.9999992267812206, iteration: 30181
loss: 1.0408687591552734,grad_norm: 0.9999990972582016, iteration: 30182
loss: 0.9818796515464783,grad_norm: 0.9999991130155956, iteration: 30183
loss: 1.0494612455368042,grad_norm: 0.9999995867917558, iteration: 30184
loss: 1.010861873626709,grad_norm: 0.9999992772792808, iteration: 30185
loss: 0.991942286491394,grad_norm: 0.8606365369037052, iteration: 30186
loss: 1.0185822248458862,grad_norm: 0.8388716499441456, iteration: 30187
loss: 1.0084514617919922,grad_norm: 0.9051397825557757, iteration: 30188
loss: 0.9670766592025757,grad_norm: 0.9522116577900369, iteration: 30189
loss: 0.9980421662330627,grad_norm: 0.8227983266718142, iteration: 30190
loss: 0.9308969974517822,grad_norm: 0.9999990251627939, iteration: 30191
loss: 0.967157781124115,grad_norm: 0.7853710200796962, iteration: 30192
loss: 0.9986971616744995,grad_norm: 0.9999991061629805, iteration: 30193
loss: 1.0194377899169922,grad_norm: 0.9999991349687349, iteration: 30194
loss: 1.0085175037384033,grad_norm: 0.9657605700019612, iteration: 30195
loss: 1.004480242729187,grad_norm: 0.9999990378802055, iteration: 30196
loss: 1.0334594249725342,grad_norm: 0.9999990050464226, iteration: 30197
loss: 1.0084311962127686,grad_norm: 0.9999990834889195, iteration: 30198
loss: 1.0053141117095947,grad_norm: 0.9999991594300962, iteration: 30199
loss: 1.0047006607055664,grad_norm: 0.9999991201893603, iteration: 30200
loss: 1.0016322135925293,grad_norm: 0.9999992483893373, iteration: 30201
loss: 0.9926480054855347,grad_norm: 0.9999991119783646, iteration: 30202
loss: 1.0096802711486816,grad_norm: 0.9921193983149301, iteration: 30203
loss: 0.9589324593544006,grad_norm: 0.999999008632855, iteration: 30204
loss: 1.0345369577407837,grad_norm: 0.8545374769974011, iteration: 30205
loss: 0.9955248832702637,grad_norm: 0.9999992322998092, iteration: 30206
loss: 1.0272045135498047,grad_norm: 0.9693888268830854, iteration: 30207
loss: 1.0100253820419312,grad_norm: 0.9677926477044905, iteration: 30208
loss: 0.9816887974739075,grad_norm: 0.917405500630092, iteration: 30209
loss: 1.0203473567962646,grad_norm: 0.9999991090799805, iteration: 30210
loss: 1.0126115083694458,grad_norm: 0.9539549002353152, iteration: 30211
loss: 0.9900760650634766,grad_norm: 0.9224121981924883, iteration: 30212
loss: 0.9908238053321838,grad_norm: 0.9436426938697986, iteration: 30213
loss: 1.0309537649154663,grad_norm: 0.9999990943935099, iteration: 30214
loss: 0.996529221534729,grad_norm: 0.9999991520239209, iteration: 30215
loss: 0.9804211854934692,grad_norm: 0.9735001092974205, iteration: 30216
loss: 1.053098440170288,grad_norm: 0.9999993857015078, iteration: 30217
loss: 1.048551082611084,grad_norm: 0.999999467140601, iteration: 30218
loss: 1.0110678672790527,grad_norm: 0.9580491208930135, iteration: 30219
loss: 0.9961525797843933,grad_norm: 0.999999054255904, iteration: 30220
loss: 1.0356636047363281,grad_norm: 0.897204003069903, iteration: 30221
loss: 0.9843120574951172,grad_norm: 0.8567810975728622, iteration: 30222
loss: 0.9958458542823792,grad_norm: 0.8287235442492262, iteration: 30223
loss: 1.0564042329788208,grad_norm: 0.9820704707780377, iteration: 30224
loss: 1.013556718826294,grad_norm: 0.9999990589525279, iteration: 30225
loss: 0.983275294303894,grad_norm: 0.8825258959105298, iteration: 30226
loss: 1.0326871871948242,grad_norm: 0.9999990503804663, iteration: 30227
loss: 0.9624317288398743,grad_norm: 0.9999990436367008, iteration: 30228
loss: 1.0170190334320068,grad_norm: 0.9999991680751419, iteration: 30229
loss: 0.9874794483184814,grad_norm: 0.999999264546237, iteration: 30230
loss: 0.9897327423095703,grad_norm: 0.9999992477278747, iteration: 30231
loss: 0.9940816164016724,grad_norm: 0.9999998275917593, iteration: 30232
loss: 1.0132973194122314,grad_norm: 0.9999991743942743, iteration: 30233
loss: 1.0139410495758057,grad_norm: 0.9999992600587501, iteration: 30234
loss: 0.9965806007385254,grad_norm: 0.9999990897256044, iteration: 30235
loss: 0.9908539056777954,grad_norm: 0.9859669889571618, iteration: 30236
loss: 1.018669843673706,grad_norm: 0.951964264523353, iteration: 30237
loss: 0.973497211933136,grad_norm: 0.9416230298471682, iteration: 30238
loss: 0.9890650510787964,grad_norm: 0.9055188990261798, iteration: 30239
loss: 1.006880283355713,grad_norm: 0.8347525863553564, iteration: 30240
loss: 1.0300921201705933,grad_norm: 0.9508816817544243, iteration: 30241
loss: 1.0244495868682861,grad_norm: 0.9999992608494277, iteration: 30242
loss: 1.023075819015503,grad_norm: 0.987708678370613, iteration: 30243
loss: 0.9996564388275146,grad_norm: 0.8945850130948919, iteration: 30244
loss: 1.0150890350341797,grad_norm: 0.9755363761409555, iteration: 30245
loss: 0.9939928650856018,grad_norm: 0.9999991730726463, iteration: 30246
loss: 1.0290087461471558,grad_norm: 0.9999989802064786, iteration: 30247
loss: 1.0087178945541382,grad_norm: 0.9445166479989465, iteration: 30248
loss: 1.0388362407684326,grad_norm: 0.9892829103258322, iteration: 30249
loss: 1.023716926574707,grad_norm: 0.8759002962346685, iteration: 30250
loss: 1.0529359579086304,grad_norm: 0.9999993384925352, iteration: 30251
loss: 0.955457866191864,grad_norm: 0.9999991630370907, iteration: 30252
loss: 0.9657866358757019,grad_norm: 0.9029796738139075, iteration: 30253
loss: 1.0333625078201294,grad_norm: 0.9999990979404544, iteration: 30254
loss: 1.024437427520752,grad_norm: 0.9999992885670625, iteration: 30255
loss: 0.9821796417236328,grad_norm: 0.8568568451791778, iteration: 30256
loss: 1.0088999271392822,grad_norm: 0.999999067689777, iteration: 30257
loss: 1.00355064868927,grad_norm: 0.9732688295698203, iteration: 30258
loss: 1.0141743421554565,grad_norm: 0.9999991784507846, iteration: 30259
loss: 0.9800752401351929,grad_norm: 0.9083997009824768, iteration: 30260
loss: 1.0350539684295654,grad_norm: 0.9999990565771995, iteration: 30261
loss: 0.997409462928772,grad_norm: 0.9533994263049523, iteration: 30262
loss: 0.9724882245063782,grad_norm: 0.9999990656554292, iteration: 30263
loss: 0.9939571619033813,grad_norm: 0.9029975990929822, iteration: 30264
loss: 1.0660344362258911,grad_norm: 0.9999997335658536, iteration: 30265
loss: 1.0084556341171265,grad_norm: 0.8692657205252816, iteration: 30266
loss: 1.0082134008407593,grad_norm: 0.9999991539494102, iteration: 30267
loss: 1.040088415145874,grad_norm: 0.9952824552559758, iteration: 30268
loss: 0.9809789657592773,grad_norm: 0.9999991402923818, iteration: 30269
loss: 1.031581163406372,grad_norm: 0.9999998848599669, iteration: 30270
loss: 1.002081274986267,grad_norm: 0.8970159267450223, iteration: 30271
loss: 1.0016992092132568,grad_norm: 0.9999991032049355, iteration: 30272
loss: 1.0197862386703491,grad_norm: 0.9999991208091102, iteration: 30273
loss: 0.9775263667106628,grad_norm: 0.8902891672004066, iteration: 30274
loss: 1.0140010118484497,grad_norm: 0.999999196517499, iteration: 30275
loss: 1.0461833477020264,grad_norm: 0.9999993225493673, iteration: 30276
loss: 1.0043549537658691,grad_norm: 0.9999989808931474, iteration: 30277
loss: 0.9722306132316589,grad_norm: 0.9999990343902498, iteration: 30278
loss: 1.0158616304397583,grad_norm: 0.9999994356868334, iteration: 30279
loss: 1.0030739307403564,grad_norm: 0.9943146131525863, iteration: 30280
loss: 1.0072181224822998,grad_norm: 0.9999993944961671, iteration: 30281
loss: 1.014298439025879,grad_norm: 0.8211091312074776, iteration: 30282
loss: 0.9758352637290955,grad_norm: 0.9080021863450842, iteration: 30283
loss: 0.9855419397354126,grad_norm: 0.9999991092500798, iteration: 30284
loss: 1.0076916217803955,grad_norm: 0.8113030891409697, iteration: 30285
loss: 0.9802350997924805,grad_norm: 0.9999992120903951, iteration: 30286
loss: 0.9855909943580627,grad_norm: 0.8599277799248264, iteration: 30287
loss: 0.9971234202384949,grad_norm: 0.999999079446252, iteration: 30288
loss: 0.983390748500824,grad_norm: 0.8624692873216794, iteration: 30289
loss: 0.9978088736534119,grad_norm: 0.7945608635575218, iteration: 30290
loss: 1.0328298807144165,grad_norm: 0.9999990120302061, iteration: 30291
loss: 0.9883900880813599,grad_norm: 0.9391394365778523, iteration: 30292
loss: 1.0287796258926392,grad_norm: 0.8159370929845806, iteration: 30293
loss: 1.0443100929260254,grad_norm: 0.8705770120275527, iteration: 30294
loss: 1.0121002197265625,grad_norm: 0.9999990436873907, iteration: 30295
loss: 0.9814810156822205,grad_norm: 0.9999990457235399, iteration: 30296
loss: 0.979133129119873,grad_norm: 0.9999990706461468, iteration: 30297
loss: 1.0249594449996948,grad_norm: 0.7504425605753011, iteration: 30298
loss: 0.9675116539001465,grad_norm: 0.9699499197400387, iteration: 30299
loss: 1.0153813362121582,grad_norm: 0.9247559346119215, iteration: 30300
loss: 0.9684638977050781,grad_norm: 0.9999991861411036, iteration: 30301
loss: 0.9891186356544495,grad_norm: 0.8303928229545121, iteration: 30302
loss: 1.001778244972229,grad_norm: 0.9999989886022195, iteration: 30303
loss: 1.031525731086731,grad_norm: 0.99999925134778, iteration: 30304
loss: 0.9925808906555176,grad_norm: 0.8657484049897641, iteration: 30305
loss: 0.9750851392745972,grad_norm: 0.9125006085377925, iteration: 30306
loss: 0.9982409477233887,grad_norm: 0.9963696877457536, iteration: 30307
loss: 1.0151009559631348,grad_norm: 0.932287089245772, iteration: 30308
loss: 1.025413155555725,grad_norm: 0.9999996212303176, iteration: 30309
loss: 0.9692063331604004,grad_norm: 0.9354507869551928, iteration: 30310
loss: 0.9645509719848633,grad_norm: 0.8051518929196335, iteration: 30311
loss: 1.0140019655227661,grad_norm: 0.9999991030093442, iteration: 30312
loss: 1.0443265438079834,grad_norm: 0.9999990875496735, iteration: 30313
loss: 1.0166057348251343,grad_norm: 0.9476795684920083, iteration: 30314
loss: 0.9609624743461609,grad_norm: 0.9387633801935938, iteration: 30315
loss: 0.9912918210029602,grad_norm: 0.9908268302458214, iteration: 30316
loss: 0.9830076694488525,grad_norm: 0.9755649141799599, iteration: 30317
loss: 0.9925382733345032,grad_norm: 0.9999990997460907, iteration: 30318
loss: 0.9956837892532349,grad_norm: 0.8376918073771242, iteration: 30319
loss: 1.055739402770996,grad_norm: 0.9999992244479193, iteration: 30320
loss: 1.0157880783081055,grad_norm: 0.9346271930237757, iteration: 30321
loss: 0.9703741669654846,grad_norm: 0.9999991565122467, iteration: 30322
loss: 0.9725189208984375,grad_norm: 0.8890590367353246, iteration: 30323
loss: 0.9963462352752686,grad_norm: 0.9999992011688253, iteration: 30324
loss: 1.0315243005752563,grad_norm: 0.8873117048145966, iteration: 30325
loss: 1.015762209892273,grad_norm: 0.863077007445222, iteration: 30326
loss: 0.9894615411758423,grad_norm: 0.9213897245147964, iteration: 30327
loss: 0.9913462400436401,grad_norm: 0.9723843128189575, iteration: 30328
loss: 0.9964529275894165,grad_norm: 0.8276363694989599, iteration: 30329
loss: 1.000673770904541,grad_norm: 0.999999145831326, iteration: 30330
loss: 1.0122376680374146,grad_norm: 0.999999133227201, iteration: 30331
loss: 1.0159826278686523,grad_norm: 0.9999989851278379, iteration: 30332
loss: 1.0286155939102173,grad_norm: 0.7288196721727315, iteration: 30333
loss: 0.9987235069274902,grad_norm: 0.8458947280105358, iteration: 30334
loss: 1.0019123554229736,grad_norm: 0.9999990925492342, iteration: 30335
loss: 1.0252035856246948,grad_norm: 0.9999995517585377, iteration: 30336
loss: 1.0001047849655151,grad_norm: 0.9403645518305649, iteration: 30337
loss: 0.975153386592865,grad_norm: 0.9948774459589258, iteration: 30338
loss: 1.0084232091903687,grad_norm: 0.8260454088527586, iteration: 30339
loss: 1.0305540561676025,grad_norm: 0.9445329513104533, iteration: 30340
loss: 1.0243470668792725,grad_norm: 0.9999990667514057, iteration: 30341
loss: 1.017162799835205,grad_norm: 0.9999992161199792, iteration: 30342
loss: 1.050934076309204,grad_norm: 0.9873714745035792, iteration: 30343
loss: 1.0120866298675537,grad_norm: 0.9999991520926138, iteration: 30344
loss: 1.011497139930725,grad_norm: 0.9999992607176574, iteration: 30345
loss: 0.9654499292373657,grad_norm: 0.9296478393487415, iteration: 30346
loss: 1.0025850534439087,grad_norm: 0.931264070133128, iteration: 30347
loss: 1.0168869495391846,grad_norm: 0.9999989683697532, iteration: 30348
loss: 0.981349527835846,grad_norm: 0.9999994916930559, iteration: 30349
loss: 1.0389862060546875,grad_norm: 0.9999993872901752, iteration: 30350
loss: 0.9825395941734314,grad_norm: 0.8535186396081028, iteration: 30351
loss: 1.0234146118164062,grad_norm: 0.9823983024538762, iteration: 30352
loss: 1.0153377056121826,grad_norm: 0.9152602517028898, iteration: 30353
loss: 0.998679518699646,grad_norm: 0.9999992308231207, iteration: 30354
loss: 1.0542957782745361,grad_norm: 0.9999994690910063, iteration: 30355
loss: 1.0434848070144653,grad_norm: 0.9999990463120145, iteration: 30356
loss: 1.059125542640686,grad_norm: 0.9966176060853633, iteration: 30357
loss: 0.9985217452049255,grad_norm: 0.8581747222974734, iteration: 30358
loss: 1.0166165828704834,grad_norm: 0.9572023812778937, iteration: 30359
loss: 0.9947069883346558,grad_norm: 0.9999993196253143, iteration: 30360
loss: 1.0349223613739014,grad_norm: 0.9882956672140378, iteration: 30361
loss: 1.0006688833236694,grad_norm: 0.9346189944891777, iteration: 30362
loss: 1.0233054161071777,grad_norm: 0.9999994403451764, iteration: 30363
loss: 1.0137438774108887,grad_norm: 0.8548339012039498, iteration: 30364
loss: 1.0345786809921265,grad_norm: 0.9144012153868507, iteration: 30365
loss: 1.013748288154602,grad_norm: 0.9999992057788654, iteration: 30366
loss: 1.0293415784835815,grad_norm: 0.8260878652139697, iteration: 30367
loss: 1.0166014432907104,grad_norm: 0.8299828322821792, iteration: 30368
loss: 0.9755207896232605,grad_norm: 0.9160601495689739, iteration: 30369
loss: 0.9633383750915527,grad_norm: 0.9436640015449894, iteration: 30370
loss: 1.0427907705307007,grad_norm: 0.9999991486129508, iteration: 30371
loss: 1.0216315984725952,grad_norm: 0.8556390283418521, iteration: 30372
loss: 1.0287243127822876,grad_norm: 0.9999990700182632, iteration: 30373
loss: 0.9877219200134277,grad_norm: 0.8713944744705864, iteration: 30374
loss: 0.992957592010498,grad_norm: 0.8620811415224788, iteration: 30375
loss: 0.9711503386497498,grad_norm: 0.900959641822478, iteration: 30376
loss: 1.0258909463882446,grad_norm: 0.9999991708965882, iteration: 30377
loss: 1.035085916519165,grad_norm: 0.9738402315588626, iteration: 30378
loss: 1.03026282787323,grad_norm: 0.9999990430622521, iteration: 30379
loss: 1.000916838645935,grad_norm: 0.999999577401753, iteration: 30380
loss: 1.0015569925308228,grad_norm: 0.9314197626999363, iteration: 30381
loss: 0.9916858077049255,grad_norm: 0.8934582747565886, iteration: 30382
loss: 0.9918374419212341,grad_norm: 0.9999993304550152, iteration: 30383
loss: 1.0149866342544556,grad_norm: 0.9999990401206037, iteration: 30384
loss: 1.0092990398406982,grad_norm: 0.9999990910135852, iteration: 30385
loss: 1.0384942293167114,grad_norm: 0.9999992350168464, iteration: 30386
loss: 1.018295168876648,grad_norm: 0.9874687912912495, iteration: 30387
loss: 0.9972432851791382,grad_norm: 0.9780834307481107, iteration: 30388
loss: 1.028833270072937,grad_norm: 0.9910409273179032, iteration: 30389
loss: 1.059653878211975,grad_norm: 0.9999994810855319, iteration: 30390
loss: 1.0023788213729858,grad_norm: 0.9999993121463846, iteration: 30391
loss: 1.0338340997695923,grad_norm: 0.890058915354897, iteration: 30392
loss: 0.9826963543891907,grad_norm: 0.8897625019569053, iteration: 30393
loss: 1.018478274345398,grad_norm: 0.841394179111062, iteration: 30394
loss: 1.0203031301498413,grad_norm: 0.9999990640794361, iteration: 30395
loss: 1.0160725116729736,grad_norm: 0.9999992435376563, iteration: 30396
loss: 0.9687483310699463,grad_norm: 0.9999990286528786, iteration: 30397
loss: 0.9919883608818054,grad_norm: 0.9999991356320334, iteration: 30398
loss: 1.0454238653182983,grad_norm: 0.8491121197298404, iteration: 30399
loss: 0.9540414214134216,grad_norm: 0.9999993279595369, iteration: 30400
loss: 1.0045784711837769,grad_norm: 0.9999990947994347, iteration: 30401
loss: 1.002524733543396,grad_norm: 0.999998990893422, iteration: 30402
loss: 1.0326300859451294,grad_norm: 0.8943763965529319, iteration: 30403
loss: 1.0702767372131348,grad_norm: 0.9999993691483074, iteration: 30404
loss: 1.0156822204589844,grad_norm: 0.9467585843990067, iteration: 30405
loss: 0.9986557364463806,grad_norm: 0.979939425460148, iteration: 30406
loss: 0.9998295903205872,grad_norm: 0.94165252261764, iteration: 30407
loss: 1.0076689720153809,grad_norm: 0.9999991246725864, iteration: 30408
loss: 1.0512197017669678,grad_norm: 0.9622007789098417, iteration: 30409
loss: 1.0031442642211914,grad_norm: 0.9845718403125421, iteration: 30410
loss: 0.989313542842865,grad_norm: 0.882865616939672, iteration: 30411
loss: 1.0263097286224365,grad_norm: 0.9502483689293295, iteration: 30412
loss: 1.0051467418670654,grad_norm: 0.9999991698873408, iteration: 30413
loss: 1.0356308221817017,grad_norm: 0.9999994314788262, iteration: 30414
loss: 0.9820306301116943,grad_norm: 0.9999991940635581, iteration: 30415
loss: 0.9786607623100281,grad_norm: 0.815651613792045, iteration: 30416
loss: 1.0435566902160645,grad_norm: 0.9622241583095357, iteration: 30417
loss: 0.9843512773513794,grad_norm: 0.9074632675212455, iteration: 30418
loss: 1.0003025531768799,grad_norm: 0.999999086185723, iteration: 30419
loss: 0.9911927580833435,grad_norm: 0.9999990730606931, iteration: 30420
loss: 1.0191988945007324,grad_norm: 0.8928209749903356, iteration: 30421
loss: 1.0060824155807495,grad_norm: 0.8456617390503053, iteration: 30422
loss: 0.9727296829223633,grad_norm: 0.9999990879446886, iteration: 30423
loss: 1.0256388187408447,grad_norm: 0.9999995748228685, iteration: 30424
loss: 1.0082345008850098,grad_norm: 0.9999990051683526, iteration: 30425
loss: 0.9970880746841431,grad_norm: 0.9999991478193746, iteration: 30426
loss: 1.0274620056152344,grad_norm: 0.9999992986728212, iteration: 30427
loss: 0.987857460975647,grad_norm: 0.9770976387212134, iteration: 30428
loss: 1.04776132106781,grad_norm: 0.999999575291335, iteration: 30429
loss: 0.9976297616958618,grad_norm: 0.9999990684360767, iteration: 30430
loss: 1.0085070133209229,grad_norm: 0.9999995271667464, iteration: 30431
loss: 1.031838297843933,grad_norm: 0.9999993858762256, iteration: 30432
loss: 0.9912406802177429,grad_norm: 0.9999989380860991, iteration: 30433
loss: 1.0072238445281982,grad_norm: 0.9999990316304639, iteration: 30434
loss: 0.9796344637870789,grad_norm: 0.9266287941564899, iteration: 30435
loss: 1.0246144533157349,grad_norm: 0.8738239367328243, iteration: 30436
loss: 1.0501654148101807,grad_norm: 0.9999998140621801, iteration: 30437
loss: 1.0084012746810913,grad_norm: 0.9999991104099446, iteration: 30438
loss: 1.0043665170669556,grad_norm: 0.9999993016513881, iteration: 30439
loss: 1.0995742082595825,grad_norm: 0.9999994546385972, iteration: 30440
loss: 0.974460244178772,grad_norm: 0.999999101336624, iteration: 30441
loss: 1.017248511314392,grad_norm: 0.999999204086374, iteration: 30442
loss: 0.9752918481826782,grad_norm: 0.9999990777656762, iteration: 30443
loss: 1.0478732585906982,grad_norm: 0.9999997338882317, iteration: 30444
loss: 0.9915032982826233,grad_norm: 0.9999990733293882, iteration: 30445
loss: 0.9892634153366089,grad_norm: 0.9999992109856761, iteration: 30446
loss: 1.0509768724441528,grad_norm: 0.9999991129866975, iteration: 30447
loss: 1.0158629417419434,grad_norm: 0.8640833899620806, iteration: 30448
loss: 1.0239721536636353,grad_norm: 0.9999990959742756, iteration: 30449
loss: 1.0361192226409912,grad_norm: 0.9999991146826734, iteration: 30450
loss: 1.0575894117355347,grad_norm: 0.9999994256279506, iteration: 30451
loss: 1.023888349533081,grad_norm: 0.9316971381760357, iteration: 30452
loss: 0.9928379058837891,grad_norm: 0.7901301304995954, iteration: 30453
loss: 1.0302048921585083,grad_norm: 0.9999992493485943, iteration: 30454
loss: 0.9984889626502991,grad_norm: 0.9999996356625995, iteration: 30455
loss: 1.0163886547088623,grad_norm: 0.999999102843293, iteration: 30456
loss: 1.0036054849624634,grad_norm: 0.972512801405548, iteration: 30457
loss: 1.0123118162155151,grad_norm: 0.8741233756963874, iteration: 30458
loss: 1.019139289855957,grad_norm: 0.930713721717446, iteration: 30459
loss: 1.087949514389038,grad_norm: 0.9999996679662148, iteration: 30460
loss: 0.976826012134552,grad_norm: 0.9533991332671782, iteration: 30461
loss: 1.0346078872680664,grad_norm: 0.9055003225074301, iteration: 30462
loss: 0.9791685938835144,grad_norm: 0.999999370247219, iteration: 30463
loss: 1.0162830352783203,grad_norm: 0.8927521361638454, iteration: 30464
loss: 1.0230414867401123,grad_norm: 0.9999991410723107, iteration: 30465
loss: 1.0120389461517334,grad_norm: 0.9999995527686013, iteration: 30466
loss: 1.0121595859527588,grad_norm: 0.999999215779434, iteration: 30467
loss: 1.0235872268676758,grad_norm: 0.9999991600580671, iteration: 30468
loss: 1.0367262363433838,grad_norm: 0.8750325382706429, iteration: 30469
loss: 1.004528284072876,grad_norm: 0.9999997513780632, iteration: 30470
loss: 0.9951366782188416,grad_norm: 0.8329031573261346, iteration: 30471
loss: 0.9969570636749268,grad_norm: 0.9336648490075282, iteration: 30472
loss: 1.035538911819458,grad_norm: 0.9999998241936544, iteration: 30473
loss: 0.9995864033699036,grad_norm: 0.9999990721632981, iteration: 30474
loss: 1.0132206678390503,grad_norm: 0.9194002953330443, iteration: 30475
loss: 0.962142288684845,grad_norm: 0.9616109875742168, iteration: 30476
loss: 0.9840685129165649,grad_norm: 0.8365945306466177, iteration: 30477
loss: 1.0144729614257812,grad_norm: 0.9999991427547889, iteration: 30478
loss: 1.0130482912063599,grad_norm: 0.9999989838680846, iteration: 30479
loss: 1.0212939977645874,grad_norm: 0.7871509795403759, iteration: 30480
loss: 1.0103254318237305,grad_norm: 0.9748929036886659, iteration: 30481
loss: 1.0193355083465576,grad_norm: 0.8344789530856475, iteration: 30482
loss: 1.0322717428207397,grad_norm: 0.9999993315546739, iteration: 30483
loss: 1.0130866765975952,grad_norm: 0.9999990906386835, iteration: 30484
loss: 0.9755526781082153,grad_norm: 0.8249410050332376, iteration: 30485
loss: 1.0204277038574219,grad_norm: 0.9753706692477763, iteration: 30486
loss: 1.0013813972473145,grad_norm: 0.8620212354049851, iteration: 30487
loss: 1.0045647621154785,grad_norm: 0.8545676270699597, iteration: 30488
loss: 1.0338735580444336,grad_norm: 0.8828789724783305, iteration: 30489
loss: 0.9940759539604187,grad_norm: 0.7615533361840733, iteration: 30490
loss: 0.9845051765441895,grad_norm: 0.9269554165477316, iteration: 30491
loss: 1.001975655555725,grad_norm: 0.8356747673287661, iteration: 30492
loss: 1.0278376340866089,grad_norm: 0.9998591700847227, iteration: 30493
loss: 1.0096029043197632,grad_norm: 0.9999990904814702, iteration: 30494
loss: 1.0485410690307617,grad_norm: 0.9999999368901892, iteration: 30495
loss: 1.0150374174118042,grad_norm: 0.9999994193735596, iteration: 30496
loss: 1.011522889137268,grad_norm: 0.9197690681909484, iteration: 30497
loss: 1.0224709510803223,grad_norm: 0.9999992843100233, iteration: 30498
loss: 1.0151689052581787,grad_norm: 0.9999990318396519, iteration: 30499
loss: 1.0324956178665161,grad_norm: 0.9649917713481112, iteration: 30500
loss: 1.044366717338562,grad_norm: 0.9634036479788634, iteration: 30501
loss: 1.0159714221954346,grad_norm: 0.9999991768546507, iteration: 30502
loss: 1.0285923480987549,grad_norm: 0.9112289179208751, iteration: 30503
loss: 1.0042611360549927,grad_norm: 0.999998977937916, iteration: 30504
loss: 0.9956303834915161,grad_norm: 0.8473448035786765, iteration: 30505
loss: 1.0188418626785278,grad_norm: 0.8753062309697939, iteration: 30506
loss: 0.9932182431221008,grad_norm: 0.9571503927955346, iteration: 30507
loss: 1.012929916381836,grad_norm: 0.9999991165428952, iteration: 30508
loss: 1.0252245664596558,grad_norm: 0.9999992158007085, iteration: 30509
loss: 1.0202618837356567,grad_norm: 0.9916132992450564, iteration: 30510
loss: 0.9887014627456665,grad_norm: 0.9999990411395909, iteration: 30511
loss: 0.9877970218658447,grad_norm: 0.8809626361081927, iteration: 30512
loss: 1.0038203001022339,grad_norm: 0.8320830926063933, iteration: 30513
loss: 1.0155948400497437,grad_norm: 0.8460265075090521, iteration: 30514
loss: 1.018398404121399,grad_norm: 0.9542884671956419, iteration: 30515
loss: 0.9703594446182251,grad_norm: 0.9999990967470183, iteration: 30516
loss: 1.017449140548706,grad_norm: 0.9999990875354259, iteration: 30517
loss: 0.975349485874176,grad_norm: 0.9961251865040157, iteration: 30518
loss: 1.0020184516906738,grad_norm: 0.9999992739573863, iteration: 30519
loss: 1.005784034729004,grad_norm: 0.9196444157379033, iteration: 30520
loss: 1.002792239189148,grad_norm: 0.9987644987965447, iteration: 30521
loss: 1.0210144519805908,grad_norm: 0.8563830373064893, iteration: 30522
loss: 0.9747708439826965,grad_norm: 0.9495610779939254, iteration: 30523
loss: 1.015462875366211,grad_norm: 0.9519104111852619, iteration: 30524
loss: 1.0037888288497925,grad_norm: 0.9033915001012695, iteration: 30525
loss: 0.9980849623680115,grad_norm: 0.999999322459331, iteration: 30526
loss: 1.0009100437164307,grad_norm: 0.8191465090512522, iteration: 30527
loss: 1.0135210752487183,grad_norm: 0.8898428493676952, iteration: 30528
loss: 1.019433856010437,grad_norm: 0.8778074591816488, iteration: 30529
loss: 1.0167466402053833,grad_norm: 0.8759411962898321, iteration: 30530
loss: 0.9584941267967224,grad_norm: 0.9999990425980265, iteration: 30531
loss: 1.021884799003601,grad_norm: 0.9161743085194444, iteration: 30532
loss: 0.9864799380302429,grad_norm: 0.9683229041172627, iteration: 30533
loss: 0.9737770557403564,grad_norm: 0.9722051167440124, iteration: 30534
loss: 1.0106650590896606,grad_norm: 0.9999993605812335, iteration: 30535
loss: 1.0211269855499268,grad_norm: 0.9579516158256526, iteration: 30536
loss: 1.0152958631515503,grad_norm: 0.8780692720669718, iteration: 30537
loss: 1.022432804107666,grad_norm: 0.9999991228673422, iteration: 30538
loss: 0.9963122606277466,grad_norm: 0.9999991005095435, iteration: 30539
loss: 1.0169121026992798,grad_norm: 0.9999990824585594, iteration: 30540
loss: 0.9899497032165527,grad_norm: 0.9999993279577466, iteration: 30541
loss: 1.0042638778686523,grad_norm: 0.999999034816006, iteration: 30542
loss: 1.0320518016815186,grad_norm: 0.999999584967761, iteration: 30543
loss: 1.0049316883087158,grad_norm: 0.9254398529473193, iteration: 30544
loss: 0.9653158187866211,grad_norm: 0.921288992178546, iteration: 30545
loss: 1.0372967720031738,grad_norm: 0.9999990443408313, iteration: 30546
loss: 1.0328866243362427,grad_norm: 0.911057440615851, iteration: 30547
loss: 1.0114550590515137,grad_norm: 0.9591126434596892, iteration: 30548
loss: 1.027565598487854,grad_norm: 0.9999990259520479, iteration: 30549
loss: 1.0052517652511597,grad_norm: 0.9999993605288834, iteration: 30550
loss: 0.9985385537147522,grad_norm: 0.8152122296593172, iteration: 30551
loss: 0.9938669204711914,grad_norm: 0.9922566678388108, iteration: 30552
loss: 1.006181240081787,grad_norm: 0.9999991136863117, iteration: 30553
loss: 1.0117182731628418,grad_norm: 0.9999992441065, iteration: 30554
loss: 1.0176249742507935,grad_norm: 0.9999991086381418, iteration: 30555
loss: 1.0096741914749146,grad_norm: 0.9999992211779617, iteration: 30556
loss: 0.9917104840278625,grad_norm: 0.9999991680680933, iteration: 30557
loss: 0.9925519227981567,grad_norm: 0.8971623271896383, iteration: 30558
loss: 0.9964074492454529,grad_norm: 0.9504865666788662, iteration: 30559
loss: 0.9968968629837036,grad_norm: 0.9360145801370353, iteration: 30560
loss: 1.0330268144607544,grad_norm: 0.8480480226593208, iteration: 30561
loss: 1.0180168151855469,grad_norm: 0.9336594371223826, iteration: 30562
loss: 1.0179537534713745,grad_norm: 0.7610553354171137, iteration: 30563
loss: 1.025341272354126,grad_norm: 0.999999067365288, iteration: 30564
loss: 0.991532564163208,grad_norm: 0.9999989131943016, iteration: 30565
loss: 1.0065125226974487,grad_norm: 0.9999991095132155, iteration: 30566
loss: 1.0035934448242188,grad_norm: 0.9999990505234018, iteration: 30567
loss: 1.0034081935882568,grad_norm: 0.9999990872295711, iteration: 30568
loss: 1.0092390775680542,grad_norm: 0.9304805384595637, iteration: 30569
loss: 1.0220760107040405,grad_norm: 0.9999993158251129, iteration: 30570
loss: 0.9754437208175659,grad_norm: 0.8944205645575986, iteration: 30571
loss: 1.0390475988388062,grad_norm: 0.999999088626765, iteration: 30572
loss: 1.0466033220291138,grad_norm: 0.8319613414016751, iteration: 30573
loss: 0.9834257960319519,grad_norm: 0.943729673501613, iteration: 30574
loss: 1.0525437593460083,grad_norm: 0.9999996391505227, iteration: 30575
loss: 0.9975435137748718,grad_norm: 0.9999990945073599, iteration: 30576
loss: 1.016276240348816,grad_norm: 0.9999990950648469, iteration: 30577
loss: 1.010900616645813,grad_norm: 0.9097594013184682, iteration: 30578
loss: 1.0538512468338013,grad_norm: 0.9999990667572352, iteration: 30579
loss: 1.0246962308883667,grad_norm: 0.9999990951164428, iteration: 30580
loss: 1.014194369316101,grad_norm: 0.8814804868735816, iteration: 30581
loss: 1.005053162574768,grad_norm: 0.8740928010767042, iteration: 30582
loss: 0.9953770637512207,grad_norm: 0.9999990087711675, iteration: 30583
loss: 1.0098278522491455,grad_norm: 0.938910896468337, iteration: 30584
loss: 1.018174409866333,grad_norm: 0.9381610051843511, iteration: 30585
loss: 1.0139504671096802,grad_norm: 0.9999990267147322, iteration: 30586
loss: 1.0164785385131836,grad_norm: 0.9299216299153142, iteration: 30587
loss: 0.9763472080230713,grad_norm: 0.9360883890884439, iteration: 30588
loss: 1.0078343152999878,grad_norm: 0.9536924648931521, iteration: 30589
loss: 1.045466661453247,grad_norm: 0.99999933355844, iteration: 30590
loss: 1.0087592601776123,grad_norm: 0.993458273747414, iteration: 30591
loss: 1.0143065452575684,grad_norm: 0.9999990573443202, iteration: 30592
loss: 0.9824681878089905,grad_norm: 0.9598520737990613, iteration: 30593
loss: 1.050898790359497,grad_norm: 0.9999993660669778, iteration: 30594
loss: 0.9827105402946472,grad_norm: 0.9999991553316114, iteration: 30595
loss: 1.0268666744232178,grad_norm: 0.9889073975821435, iteration: 30596
loss: 1.0032236576080322,grad_norm: 0.967311821640398, iteration: 30597
loss: 1.0416243076324463,grad_norm: 0.9999996222794699, iteration: 30598
loss: 0.9986990690231323,grad_norm: 0.9999991975398912, iteration: 30599
loss: 1.0119664669036865,grad_norm: 0.79303556584274, iteration: 30600
loss: 1.0287927389144897,grad_norm: 0.8366484503567462, iteration: 30601
loss: 1.0192774534225464,grad_norm: 0.9999990862394169, iteration: 30602
loss: 1.051279902458191,grad_norm: 0.9999992324199299, iteration: 30603
loss: 1.0126019716262817,grad_norm: 0.999999173425636, iteration: 30604
loss: 1.0162289142608643,grad_norm: 0.9999992595134436, iteration: 30605
loss: 1.021507978439331,grad_norm: 0.8801191167098011, iteration: 30606
loss: 1.0065258741378784,grad_norm: 0.9999993306445917, iteration: 30607
loss: 0.9773150682449341,grad_norm: 0.9547800399088215, iteration: 30608
loss: 1.0148686170578003,grad_norm: 0.914339687909345, iteration: 30609
loss: 1.0081113576889038,grad_norm: 0.9337529002275708, iteration: 30610
loss: 1.0133694410324097,grad_norm: 0.9999990522771723, iteration: 30611
loss: 1.0243626832962036,grad_norm: 0.9999990614448993, iteration: 30612
loss: 1.0061163902282715,grad_norm: 0.8912991975998711, iteration: 30613
loss: 1.026693344116211,grad_norm: 0.9999992052877964, iteration: 30614
loss: 1.001724362373352,grad_norm: 0.9780147777879822, iteration: 30615
loss: 0.9514184594154358,grad_norm: 0.9999990924449089, iteration: 30616
loss: 0.9961622953414917,grad_norm: 0.8142643647776626, iteration: 30617
loss: 0.9700665473937988,grad_norm: 0.9999991981825836, iteration: 30618
loss: 1.0184918642044067,grad_norm: 0.9223296693833494, iteration: 30619
loss: 1.0233300924301147,grad_norm: 0.9369007349438797, iteration: 30620
loss: 0.9991397857666016,grad_norm: 0.9999991882646797, iteration: 30621
loss: 0.9807471632957458,grad_norm: 0.9014430869315893, iteration: 30622
loss: 1.0100923776626587,grad_norm: 0.9375114504355255, iteration: 30623
loss: 0.9731169939041138,grad_norm: 0.9833579817439879, iteration: 30624
loss: 0.9921331405639648,grad_norm: 0.9999990828453497, iteration: 30625
loss: 1.0267075300216675,grad_norm: 0.9999992674806815, iteration: 30626
loss: 1.0426380634307861,grad_norm: 0.9999991908820883, iteration: 30627
loss: 1.0055136680603027,grad_norm: 0.9552405314825523, iteration: 30628
loss: 1.0065085887908936,grad_norm: 0.9656138795941264, iteration: 30629
loss: 1.0473780632019043,grad_norm: 0.9999990676278646, iteration: 30630
loss: 1.0428762435913086,grad_norm: 0.9494698694610376, iteration: 30631
loss: 1.0178186893463135,grad_norm: 0.9008615548154738, iteration: 30632
loss: 0.9724728465080261,grad_norm: 0.9920208059029482, iteration: 30633
loss: 1.00471830368042,grad_norm: 0.8505955250460185, iteration: 30634
loss: 1.0311955213546753,grad_norm: 0.9964501761403729, iteration: 30635
loss: 1.0216898918151855,grad_norm: 0.9999989761362312, iteration: 30636
loss: 1.0509755611419678,grad_norm: 0.9999991556422281, iteration: 30637
loss: 1.012882113456726,grad_norm: 0.883240384648542, iteration: 30638
loss: 1.0082803964614868,grad_norm: 0.9590582595820083, iteration: 30639
loss: 0.9754136204719543,grad_norm: 0.9999990768516167, iteration: 30640
loss: 1.0217058658599854,grad_norm: 0.9999992219110784, iteration: 30641
loss: 1.0120686292648315,grad_norm: 0.9999990810590339, iteration: 30642
loss: 1.0258541107177734,grad_norm: 0.9999991027579878, iteration: 30643
loss: 1.001365303993225,grad_norm: 0.9863611791723609, iteration: 30644
loss: 1.0370560884475708,grad_norm: 0.9338362347482844, iteration: 30645
loss: 1.0031180381774902,grad_norm: 0.695523954142407, iteration: 30646
loss: 1.0239286422729492,grad_norm: 0.999999582364387, iteration: 30647
loss: 1.033536434173584,grad_norm: 0.9999995142656236, iteration: 30648
loss: 1.016487717628479,grad_norm: 0.8884822212296098, iteration: 30649
loss: 1.005962610244751,grad_norm: 0.9879716753735356, iteration: 30650
loss: 1.027712106704712,grad_norm: 0.9999992090434615, iteration: 30651
loss: 1.0326189994812012,grad_norm: 0.9999992026823658, iteration: 30652
loss: 1.0039077997207642,grad_norm: 0.9375671703460785, iteration: 30653
loss: 0.9958963394165039,grad_norm: 0.9999992611966847, iteration: 30654
loss: 1.0243370532989502,grad_norm: 0.8957714414640582, iteration: 30655
loss: 1.0346307754516602,grad_norm: 0.9708078728204795, iteration: 30656
loss: 0.9731650948524475,grad_norm: 0.9999992570695799, iteration: 30657
loss: 0.9895676374435425,grad_norm: 0.9024981458037874, iteration: 30658
loss: 1.1741318702697754,grad_norm: 0.9999994267703837, iteration: 30659
loss: 1.000487208366394,grad_norm: 0.999999149320933, iteration: 30660
loss: 1.0119136571884155,grad_norm: 0.9999991699657487, iteration: 30661
loss: 1.0510610342025757,grad_norm: 0.9999990871040709, iteration: 30662
loss: 1.0102155208587646,grad_norm: 0.9494566298894245, iteration: 30663
loss: 1.0025402307510376,grad_norm: 0.9999990725311181, iteration: 30664
loss: 1.0219796895980835,grad_norm: 0.9999992121840127, iteration: 30665
loss: 0.9983581304550171,grad_norm: 0.9999991122059365, iteration: 30666
loss: 1.0118883848190308,grad_norm: 0.8920732911279805, iteration: 30667
loss: 1.0108139514923096,grad_norm: 0.9999991952755901, iteration: 30668
loss: 0.9851879477500916,grad_norm: 0.9999992318523537, iteration: 30669
loss: 0.9921717643737793,grad_norm: 0.9999989905090008, iteration: 30670
loss: 1.0013668537139893,grad_norm: 0.9659437600280187, iteration: 30671
loss: 1.0026696920394897,grad_norm: 0.9557843897617778, iteration: 30672
loss: 0.9587237238883972,grad_norm: 0.9999989905512489, iteration: 30673
loss: 0.9926491379737854,grad_norm: 0.9999992382970075, iteration: 30674
loss: 1.0067260265350342,grad_norm: 0.8748307794977945, iteration: 30675
loss: 1.0126935243606567,grad_norm: 0.9339767381058552, iteration: 30676
loss: 1.0400012731552124,grad_norm: 0.8588757935828824, iteration: 30677
loss: 0.9773002862930298,grad_norm: 0.9427947419409661, iteration: 30678
loss: 0.9853163957595825,grad_norm: 0.9800583608066364, iteration: 30679
loss: 0.9877962470054626,grad_norm: 0.9923928573461572, iteration: 30680
loss: 1.0128107070922852,grad_norm: 0.851549573307275, iteration: 30681
loss: 0.9460585117340088,grad_norm: 0.9999990607765167, iteration: 30682
loss: 0.9999599456787109,grad_norm: 0.8754318810559739, iteration: 30683
loss: 0.99200439453125,grad_norm: 0.8694231316180002, iteration: 30684
loss: 1.0124772787094116,grad_norm: 0.8194521524509052, iteration: 30685
loss: 1.008567214012146,grad_norm: 0.9854160472888333, iteration: 30686
loss: 1.012437343597412,grad_norm: 0.9586393914352511, iteration: 30687
loss: 0.9846734404563904,grad_norm: 0.9665678084331404, iteration: 30688
loss: 0.9809529185295105,grad_norm: 0.8257988600588448, iteration: 30689
loss: 0.9887559413909912,grad_norm: 0.8681560172140145, iteration: 30690
loss: 0.9877708554267883,grad_norm: 0.9772100920355163, iteration: 30691
loss: 0.9848899245262146,grad_norm: 0.8892123128305855, iteration: 30692
loss: 1.0008906126022339,grad_norm: 0.9999992438837221, iteration: 30693
loss: 0.9991641640663147,grad_norm: 0.9709250738769254, iteration: 30694
loss: 0.9789385795593262,grad_norm: 0.9999991349419666, iteration: 30695
loss: 0.9732187390327454,grad_norm: 0.9313521998557764, iteration: 30696
loss: 0.9797198176383972,grad_norm: 0.8627654964222743, iteration: 30697
loss: 1.0071922540664673,grad_norm: 0.9999992130760813, iteration: 30698
loss: 1.0344734191894531,grad_norm: 0.999999174947404, iteration: 30699
loss: 0.9912278056144714,grad_norm: 0.9999992170895965, iteration: 30700
loss: 1.006173849105835,grad_norm: 0.9999990739921065, iteration: 30701
loss: 0.9755533933639526,grad_norm: 0.9999991446708322, iteration: 30702
loss: 1.0381239652633667,grad_norm: 0.9918513667436495, iteration: 30703
loss: 1.0642151832580566,grad_norm: 0.9999994567621947, iteration: 30704
loss: 0.989601194858551,grad_norm: 0.9528540301490068, iteration: 30705
loss: 1.0125606060028076,grad_norm: 0.9999991074534021, iteration: 30706
loss: 0.9926712512969971,grad_norm: 0.9999994294856025, iteration: 30707
loss: 0.9661386609077454,grad_norm: 0.9999990388117737, iteration: 30708
loss: 0.9986622333526611,grad_norm: 0.9999997662006724, iteration: 30709
loss: 1.034670114517212,grad_norm: 0.9999991437716025, iteration: 30710
loss: 0.9950701594352722,grad_norm: 0.9999991295399533, iteration: 30711
loss: 0.9950197339057922,grad_norm: 0.9999990870748078, iteration: 30712
loss: 1.0324898958206177,grad_norm: 0.9318077000706609, iteration: 30713
loss: 1.014330506324768,grad_norm: 0.9999991603278787, iteration: 30714
loss: 1.0167558193206787,grad_norm: 0.9999992067350263, iteration: 30715
loss: 1.0156726837158203,grad_norm: 0.9999991275691622, iteration: 30716
loss: 1.1087623834609985,grad_norm: 0.9999993487878986, iteration: 30717
loss: 1.0110770463943481,grad_norm: 0.9999992062317548, iteration: 30718
loss: 1.0651271343231201,grad_norm: 0.9999991689344913, iteration: 30719
loss: 0.9727708697319031,grad_norm: 0.9999990756884843, iteration: 30720
loss: 0.992185652256012,grad_norm: 0.9020147717167047, iteration: 30721
loss: 1.0011636018753052,grad_norm: 0.9553622567890299, iteration: 30722
loss: 1.0228654146194458,grad_norm: 0.999999005157551, iteration: 30723
loss: 1.0033411979675293,grad_norm: 0.827354594230197, iteration: 30724
loss: 1.0204384326934814,grad_norm: 0.9999991786994203, iteration: 30725
loss: 1.044813871383667,grad_norm: 0.9206623143264175, iteration: 30726
loss: 0.9891024231910706,grad_norm: 0.9999991572501802, iteration: 30727
loss: 1.0053759813308716,grad_norm: 0.9999990736161883, iteration: 30728
loss: 0.9903799891471863,grad_norm: 0.9252009992322846, iteration: 30729
loss: 1.0180704593658447,grad_norm: 0.9316368771947895, iteration: 30730
loss: 0.9914128184318542,grad_norm: 0.8976290973300527, iteration: 30731
loss: 0.992718517780304,grad_norm: 0.9999990157212, iteration: 30732
loss: 1.0211379528045654,grad_norm: 0.8956946035538963, iteration: 30733
loss: 1.0223169326782227,grad_norm: 0.9999990995968507, iteration: 30734
loss: 1.0329946279525757,grad_norm: 0.9999991031105679, iteration: 30735
loss: 1.0922958850860596,grad_norm: 0.9999992282291085, iteration: 30736
loss: 0.9977335333824158,grad_norm: 0.9999990296787935, iteration: 30737
loss: 1.0020928382873535,grad_norm: 0.9467897025312989, iteration: 30738
loss: 0.9841426014900208,grad_norm: 0.8888378337055517, iteration: 30739
loss: 1.0201566219329834,grad_norm: 0.999999331074954, iteration: 30740
loss: 1.025734305381775,grad_norm: 0.9947912568063966, iteration: 30741
loss: 1.0047805309295654,grad_norm: 0.9165145557144474, iteration: 30742
loss: 1.0401402711868286,grad_norm: 0.9168669481143894, iteration: 30743
loss: 1.0294795036315918,grad_norm: 0.8135709866520571, iteration: 30744
loss: 0.9754654765129089,grad_norm: 0.9382512921250954, iteration: 30745
loss: 0.9790596961975098,grad_norm: 0.9999992572251994, iteration: 30746
loss: 0.977836549282074,grad_norm: 0.8418162984193728, iteration: 30747
loss: 0.9931991100311279,grad_norm: 0.7567054559342045, iteration: 30748
loss: 1.0000066757202148,grad_norm: 0.9418877639108922, iteration: 30749
loss: 1.009372353553772,grad_norm: 0.9999992321778111, iteration: 30750
loss: 1.0346035957336426,grad_norm: 0.8798709220863649, iteration: 30751
loss: 1.0175868272781372,grad_norm: 0.8035343721200163, iteration: 30752
loss: 1.0230849981307983,grad_norm: 0.9539477781535068, iteration: 30753
loss: 1.0416072607040405,grad_norm: 0.9999990813995664, iteration: 30754
loss: 1.0031107664108276,grad_norm: 0.8890240191402089, iteration: 30755
loss: 0.9963369369506836,grad_norm: 0.8403046341681526, iteration: 30756
loss: 1.0269153118133545,grad_norm: 0.9898587040772521, iteration: 30757
loss: 0.9947347044944763,grad_norm: 0.7416462407017281, iteration: 30758
loss: 1.002023458480835,grad_norm: 0.8925043773938232, iteration: 30759
loss: 1.0140018463134766,grad_norm: 0.9619886293663295, iteration: 30760
loss: 1.0221799612045288,grad_norm: 0.9725291059469044, iteration: 30761
loss: 1.0251593589782715,grad_norm: 0.9999991084302509, iteration: 30762
loss: 1.0030567646026611,grad_norm: 0.9999992632888528, iteration: 30763
loss: 0.997409999370575,grad_norm: 0.6866087847716796, iteration: 30764
loss: 1.0393941402435303,grad_norm: 0.9871349655353521, iteration: 30765
loss: 1.0394362211227417,grad_norm: 0.9999995487728097, iteration: 30766
loss: 0.9685587882995605,grad_norm: 0.901325768529447, iteration: 30767
loss: 0.9855705499649048,grad_norm: 0.9493703562298549, iteration: 30768
loss: 0.9608583450317383,grad_norm: 0.9509645787687738, iteration: 30769
loss: 1.0286903381347656,grad_norm: 0.9999991084897084, iteration: 30770
loss: 0.9948790669441223,grad_norm: 0.9095002243862046, iteration: 30771
loss: 0.9879798293113708,grad_norm: 0.7935743384854136, iteration: 30772
loss: 1.0105254650115967,grad_norm: 0.9098602629605196, iteration: 30773
loss: 0.9727927446365356,grad_norm: 0.9999992628496479, iteration: 30774
loss: 1.003969669342041,grad_norm: 0.9999990819724389, iteration: 30775
loss: 0.9924911260604858,grad_norm: 0.9020813692695384, iteration: 30776
loss: 0.9673378467559814,grad_norm: 0.8783881467232307, iteration: 30777
loss: 1.0148237943649292,grad_norm: 0.8740941702730598, iteration: 30778
loss: 1.0285372734069824,grad_norm: 0.9999990992712281, iteration: 30779
loss: 1.0078070163726807,grad_norm: 0.9999990490494672, iteration: 30780
loss: 1.0021401643753052,grad_norm: 0.9999991006205885, iteration: 30781
loss: 1.0291436910629272,grad_norm: 0.9999991918620603, iteration: 30782
loss: 1.0234328508377075,grad_norm: 0.886116136790383, iteration: 30783
loss: 0.9971803426742554,grad_norm: 0.9999991223039368, iteration: 30784
loss: 1.0247011184692383,grad_norm: 0.971590926972378, iteration: 30785
loss: 0.9744679927825928,grad_norm: 0.9499368660029245, iteration: 30786
loss: 0.9845603108406067,grad_norm: 0.999999072737034, iteration: 30787
loss: 1.023123860359192,grad_norm: 0.9434999681931208, iteration: 30788
loss: 1.0176615715026855,grad_norm: 0.9999992276937183, iteration: 30789
loss: 0.9681538343429565,grad_norm: 0.9999991648929741, iteration: 30790
loss: 0.9998155236244202,grad_norm: 0.946900404366627, iteration: 30791
loss: 0.9813790321350098,grad_norm: 0.9142159603676695, iteration: 30792
loss: 0.9556958675384521,grad_norm: 0.9999994148213561, iteration: 30793
loss: 1.0678397417068481,grad_norm: 0.9999991032661247, iteration: 30794
loss: 1.0428682565689087,grad_norm: 0.8772597581170803, iteration: 30795
loss: 0.9734717011451721,grad_norm: 0.9096016139710391, iteration: 30796
loss: 0.9424872398376465,grad_norm: 0.9621131777074414, iteration: 30797
loss: 0.9634139537811279,grad_norm: 0.915786881414764, iteration: 30798
loss: 0.9905259609222412,grad_norm: 0.790665362940517, iteration: 30799
loss: 1.0402239561080933,grad_norm: 0.9275531881663752, iteration: 30800
loss: 0.9809941649436951,grad_norm: 0.8601913527017331, iteration: 30801
loss: 0.962042510509491,grad_norm: 0.9999990653248125, iteration: 30802
loss: 1.0113030672073364,grad_norm: 0.9999992157894537, iteration: 30803
loss: 1.024586796760559,grad_norm: 0.9232517728122882, iteration: 30804
loss: 0.9937629699707031,grad_norm: 0.8431747788549998, iteration: 30805
loss: 1.0115245580673218,grad_norm: 0.9999991387956452, iteration: 30806
loss: 0.998986542224884,grad_norm: 0.9999990863860638, iteration: 30807
loss: 1.0484415292739868,grad_norm: 0.9999991957530558, iteration: 30808
loss: 1.0053051710128784,grad_norm: 0.9999989916643486, iteration: 30809
loss: 0.9857528805732727,grad_norm: 0.8246970768630241, iteration: 30810
loss: 1.001021385192871,grad_norm: 0.7949086639583971, iteration: 30811
loss: 1.0034183263778687,grad_norm: 0.9125567523314259, iteration: 30812
loss: 1.0149481296539307,grad_norm: 0.9999990888886304, iteration: 30813
loss: 1.0037860870361328,grad_norm: 0.999999178060652, iteration: 30814
loss: 1.0297881364822388,grad_norm: 0.999999177171101, iteration: 30815
loss: 1.0580512285232544,grad_norm: 0.9999993309553108, iteration: 30816
loss: 1.0123907327651978,grad_norm: 0.8096701642796215, iteration: 30817
loss: 1.0157228708267212,grad_norm: 0.9999991789091006, iteration: 30818
loss: 1.0172394514083862,grad_norm: 0.9999991367238692, iteration: 30819
loss: 1.0609451532363892,grad_norm: 0.999999152746215, iteration: 30820
loss: 0.9985021948814392,grad_norm: 0.9999991728208452, iteration: 30821
loss: 1.0179685354232788,grad_norm: 0.8586666341038165, iteration: 30822
loss: 1.0182321071624756,grad_norm: 0.984598946544088, iteration: 30823
loss: 1.0156171321868896,grad_norm: 0.9752369473253597, iteration: 30824
loss: 1.014036774635315,grad_norm: 0.9823319889109723, iteration: 30825
loss: 1.0161110162734985,grad_norm: 0.9999992440349059, iteration: 30826
loss: 1.059569239616394,grad_norm: 0.9999990299640293, iteration: 30827
loss: 1.019294023513794,grad_norm: 0.9999991619928301, iteration: 30828
loss: 1.0132720470428467,grad_norm: 0.9376357715149162, iteration: 30829
loss: 1.0208158493041992,grad_norm: 0.9629250437606094, iteration: 30830
loss: 1.0521514415740967,grad_norm: 0.9999994491803501, iteration: 30831
loss: 1.0048582553863525,grad_norm: 0.8986812299612718, iteration: 30832
loss: 1.012064814567566,grad_norm: 0.9756516532001431, iteration: 30833
loss: 0.9604445695877075,grad_norm: 0.9374874019439271, iteration: 30834
loss: 0.9805304408073425,grad_norm: 0.8143069108500923, iteration: 30835
loss: 0.9897181391716003,grad_norm: 0.920843622274897, iteration: 30836
loss: 0.9830464720726013,grad_norm: 0.8886372507591725, iteration: 30837
loss: 1.0041041374206543,grad_norm: 0.9550211434309512, iteration: 30838
loss: 0.9703611731529236,grad_norm: 0.9860917676289352, iteration: 30839
loss: 0.9715653657913208,grad_norm: 0.8862306959104832, iteration: 30840
loss: 0.9971754550933838,grad_norm: 0.848223762306454, iteration: 30841
loss: 0.9994803071022034,grad_norm: 0.9423669035531064, iteration: 30842
loss: 1.0369569063186646,grad_norm: 0.8710663748388455, iteration: 30843
loss: 0.9954447746276855,grad_norm: 0.999999080028657, iteration: 30844
loss: 1.0077223777770996,grad_norm: 0.9999990606436613, iteration: 30845
loss: 1.016141653060913,grad_norm: 0.8872829125132655, iteration: 30846
loss: 1.0248454809188843,grad_norm: 0.7938589448488464, iteration: 30847
loss: 0.989600419998169,grad_norm: 0.9655825875235111, iteration: 30848
loss: 0.9814479947090149,grad_norm: 0.9999990570624714, iteration: 30849
loss: 0.9682120680809021,grad_norm: 0.999999158667316, iteration: 30850
loss: 1.0415937900543213,grad_norm: 0.9999991159465244, iteration: 30851
loss: 1.0084534883499146,grad_norm: 0.9999990904789355, iteration: 30852
loss: 1.0300077199935913,grad_norm: 0.9152444171815455, iteration: 30853
loss: 1.0145342350006104,grad_norm: 0.9999989710950675, iteration: 30854
loss: 0.9811866283416748,grad_norm: 0.9999990929434277, iteration: 30855
loss: 1.0275566577911377,grad_norm: 0.9134838881703528, iteration: 30856
loss: 1.0189015865325928,grad_norm: 0.9999993015652495, iteration: 30857
loss: 1.0107816457748413,grad_norm: 0.8978098014042447, iteration: 30858
loss: 1.0202080011367798,grad_norm: 0.9999990362027156, iteration: 30859
loss: 1.0047640800476074,grad_norm: 0.913537786579604, iteration: 30860
loss: 1.0112065076828003,grad_norm: 0.9671678137773093, iteration: 30861
loss: 0.9970542788505554,grad_norm: 0.9999990645444948, iteration: 30862
loss: 1.0100750923156738,grad_norm: 0.9999991280556116, iteration: 30863
loss: 1.0067440271377563,grad_norm: 0.8805397770479263, iteration: 30864
loss: 1.0020076036453247,grad_norm: 0.9962886190572915, iteration: 30865
loss: 1.0119926929473877,grad_norm: 0.9999990324951293, iteration: 30866
loss: 1.0080028772354126,grad_norm: 0.9999989672498923, iteration: 30867
loss: 0.9723352789878845,grad_norm: 0.9999990948601111, iteration: 30868
loss: 1.0401941537857056,grad_norm: 0.9573995200460231, iteration: 30869
loss: 1.0052152872085571,grad_norm: 0.9644610121686317, iteration: 30870
loss: 0.9838959574699402,grad_norm: 0.9999992657074754, iteration: 30871
loss: 0.9923620223999023,grad_norm: 0.9070880214503739, iteration: 30872
loss: 1.026242733001709,grad_norm: 0.9999989776250657, iteration: 30873
loss: 1.0072529315948486,grad_norm: 0.9370175766540352, iteration: 30874
loss: 1.021555781364441,grad_norm: 0.7965389784449157, iteration: 30875
loss: 0.9867137670516968,grad_norm: 0.9999992053758944, iteration: 30876
loss: 0.9841287136077881,grad_norm: 0.9849230996053762, iteration: 30877
loss: 0.9874972105026245,grad_norm: 0.957788973283397, iteration: 30878
loss: 1.0786603689193726,grad_norm: 0.9999994267397642, iteration: 30879
loss: 0.9795508980751038,grad_norm: 0.9461643563574825, iteration: 30880
loss: 1.0016142129898071,grad_norm: 0.9497821781867837, iteration: 30881
loss: 0.9863343834877014,grad_norm: 0.9999990778572393, iteration: 30882
loss: 0.9599942564964294,grad_norm: 0.9999991844529214, iteration: 30883
loss: 1.031236171722412,grad_norm: 0.9092291566726797, iteration: 30884
loss: 1.0448604822158813,grad_norm: 0.7219590277779689, iteration: 30885
loss: 1.0126076936721802,grad_norm: 0.8056150589420347, iteration: 30886
loss: 0.9948904514312744,grad_norm: 0.9999991430813728, iteration: 30887
loss: 1.0170763731002808,grad_norm: 0.9950254264535398, iteration: 30888
loss: 1.0143611431121826,grad_norm: 0.9999992136552176, iteration: 30889
loss: 1.000760555267334,grad_norm: 0.8727728593337873, iteration: 30890
loss: 0.9972386956214905,grad_norm: 0.9364374013622984, iteration: 30891
loss: 0.9853963255882263,grad_norm: 0.9400563874383602, iteration: 30892
loss: 0.9954514503479004,grad_norm: 0.9999990863047726, iteration: 30893
loss: 1.0177067518234253,grad_norm: 0.9999994088801286, iteration: 30894
loss: 0.994816243648529,grad_norm: 0.9875846512819249, iteration: 30895
loss: 1.0061085224151611,grad_norm: 0.9999992279926394, iteration: 30896
loss: 1.0579272508621216,grad_norm: 0.9999994473001595, iteration: 30897
loss: 0.9938555955886841,grad_norm: 0.99999907673867, iteration: 30898
loss: 0.9797912836074829,grad_norm: 0.923133599790235, iteration: 30899
loss: 1.0046892166137695,grad_norm: 0.9999999571552661, iteration: 30900
loss: 1.0013915300369263,grad_norm: 0.9554319401541641, iteration: 30901
loss: 1.0062226057052612,grad_norm: 0.999999390129677, iteration: 30902
loss: 1.0052322149276733,grad_norm: 0.8426747658112411, iteration: 30903
loss: 1.0321838855743408,grad_norm: 0.9999991779876963, iteration: 30904
loss: 1.0079139471054077,grad_norm: 0.8106926783595386, iteration: 30905
loss: 0.9911866188049316,grad_norm: 0.9502474801374962, iteration: 30906
loss: 0.9711514711380005,grad_norm: 0.9411190606205685, iteration: 30907
loss: 1.0114774703979492,grad_norm: 0.9341553063714678, iteration: 30908
loss: 1.0066486597061157,grad_norm: 0.7733968988116389, iteration: 30909
loss: 1.0049389600753784,grad_norm: 0.9925707250494501, iteration: 30910
loss: 0.9812877774238586,grad_norm: 0.9999990977237885, iteration: 30911
loss: 0.9814807772636414,grad_norm: 0.9999990683618687, iteration: 30912
loss: 0.9877112507820129,grad_norm: 0.9920062864842192, iteration: 30913
loss: 0.9767945408821106,grad_norm: 0.9999990763727329, iteration: 30914
loss: 0.9963183403015137,grad_norm: 0.9999994378176966, iteration: 30915
loss: 0.9958157539367676,grad_norm: 0.9999990450480741, iteration: 30916
loss: 0.9494594931602478,grad_norm: 0.9792888700600252, iteration: 30917
loss: 0.9977936744689941,grad_norm: 0.9999991354343466, iteration: 30918
loss: 0.9498359560966492,grad_norm: 0.9929154543372405, iteration: 30919
loss: 0.9556410312652588,grad_norm: 0.9214353656379857, iteration: 30920
loss: 1.0220292806625366,grad_norm: 0.9959525959811627, iteration: 30921
loss: 0.9989671111106873,grad_norm: 0.9999992324468535, iteration: 30922
loss: 1.0211191177368164,grad_norm: 0.9209805869327251, iteration: 30923
loss: 1.027455449104309,grad_norm: 0.9999991317466691, iteration: 30924
loss: 1.031193733215332,grad_norm: 0.9999994972878301, iteration: 30925
loss: 0.9823545217514038,grad_norm: 0.999999288313075, iteration: 30926
loss: 0.9867983460426331,grad_norm: 0.9999992474499013, iteration: 30927
loss: 1.022055983543396,grad_norm: 0.9999993032760244, iteration: 30928
loss: 1.0270189046859741,grad_norm: 0.9999991511526773, iteration: 30929
loss: 0.9725786447525024,grad_norm: 0.8169155429263644, iteration: 30930
loss: 0.9685209393501282,grad_norm: 0.9707464343388059, iteration: 30931
loss: 0.9654094576835632,grad_norm: 0.9999990867209948, iteration: 30932
loss: 1.0074498653411865,grad_norm: 0.9999991882117417, iteration: 30933
loss: 1.0022802352905273,grad_norm: 0.9999990565570493, iteration: 30934
loss: 1.0558288097381592,grad_norm: 0.9999996219659839, iteration: 30935
loss: 1.0443650484085083,grad_norm: 0.9999994598854461, iteration: 30936
loss: 0.9755871295928955,grad_norm: 0.9999991245763946, iteration: 30937
loss: 0.9949391484260559,grad_norm: 0.9383158616819601, iteration: 30938
loss: 1.0044416189193726,grad_norm: 0.9331418222857362, iteration: 30939
loss: 1.0290710926055908,grad_norm: 0.8723541072546771, iteration: 30940
loss: 1.0630055665969849,grad_norm: 0.9999991298473835, iteration: 30941
loss: 0.9986429214477539,grad_norm: 0.977766314590312, iteration: 30942
loss: 1.0381155014038086,grad_norm: 0.9999992734583518, iteration: 30943
loss: 1.0511972904205322,grad_norm: 0.9999997057061346, iteration: 30944
loss: 0.9952443242073059,grad_norm: 0.9999993283105825, iteration: 30945
loss: 1.0147112607955933,grad_norm: 0.9714429352281224, iteration: 30946
loss: 1.0301638841629028,grad_norm: 0.9999990675799387, iteration: 30947
loss: 1.0042532682418823,grad_norm: 0.9999991526952644, iteration: 30948
loss: 0.963603675365448,grad_norm: 0.9999990755632595, iteration: 30949
loss: 0.9974235892295837,grad_norm: 0.9999991209777864, iteration: 30950
loss: 0.9889988303184509,grad_norm: 0.7894577147323381, iteration: 30951
loss: 1.009418249130249,grad_norm: 0.9177302341248217, iteration: 30952
loss: 1.0152031183242798,grad_norm: 0.9661056960864705, iteration: 30953
loss: 1.028751254081726,grad_norm: 0.9068544913885259, iteration: 30954
loss: 1.0324575901031494,grad_norm: 0.8513667193981737, iteration: 30955
loss: 0.9867205619812012,grad_norm: 0.9767401147643483, iteration: 30956
loss: 1.0093663930892944,grad_norm: 0.9999991765380344, iteration: 30957
loss: 1.0293631553649902,grad_norm: 0.9999993460265579, iteration: 30958
loss: 1.01114022731781,grad_norm: 0.9371448511439088, iteration: 30959
loss: 0.9756045937538147,grad_norm: 0.9891416343896227, iteration: 30960
loss: 1.0004273653030396,grad_norm: 0.9946090184909935, iteration: 30961
loss: 0.9643122553825378,grad_norm: 0.8756951336530809, iteration: 30962
loss: 0.9864513874053955,grad_norm: 0.988805714003195, iteration: 30963
loss: 0.9954519867897034,grad_norm: 0.9480961067841857, iteration: 30964
loss: 1.0050212144851685,grad_norm: 0.9999990621923501, iteration: 30965
loss: 1.0761363506317139,grad_norm: 0.9999999171208492, iteration: 30966
loss: 1.0322821140289307,grad_norm: 0.9734711927700965, iteration: 30967
loss: 1.039813756942749,grad_norm: 0.9192457349932001, iteration: 30968
loss: 1.0140819549560547,grad_norm: 0.9999990767035177, iteration: 30969
loss: 1.000983715057373,grad_norm: 0.7993130330711872, iteration: 30970
loss: 0.9948111772537231,grad_norm: 0.9596600395467318, iteration: 30971
loss: 1.0163583755493164,grad_norm: 0.8346297739185895, iteration: 30972
loss: 1.0027227401733398,grad_norm: 0.8747066404459621, iteration: 30973
loss: 1.0133626461029053,grad_norm: 0.9869563232372074, iteration: 30974
loss: 0.9955899119377136,grad_norm: 0.8787598839991658, iteration: 30975
loss: 1.0556668043136597,grad_norm: 0.9999996053918883, iteration: 30976
loss: 0.9698302149772644,grad_norm: 0.9999991525522987, iteration: 30977
loss: 1.013197422027588,grad_norm: 0.9999991076104329, iteration: 30978
loss: 0.954178512096405,grad_norm: 0.8756375361330433, iteration: 30979
loss: 1.040217638015747,grad_norm: 0.9999991410478275, iteration: 30980
loss: 1.0732526779174805,grad_norm: 0.9999995533558173, iteration: 30981
loss: 1.0028103590011597,grad_norm: 0.9921707724169473, iteration: 30982
loss: 1.0434006452560425,grad_norm: 0.9999992184253229, iteration: 30983
loss: 1.0024960041046143,grad_norm: 0.9999991897491182, iteration: 30984
loss: 0.9953937530517578,grad_norm: 0.9234213854917277, iteration: 30985
loss: 1.0472958087921143,grad_norm: 0.9999993494701237, iteration: 30986
loss: 1.0256385803222656,grad_norm: 0.9329274961790058, iteration: 30987
loss: 1.003994107246399,grad_norm: 0.8243984267470451, iteration: 30988
loss: 1.0429115295410156,grad_norm: 0.9999990215277392, iteration: 30989
loss: 0.991639256477356,grad_norm: 0.7754825863428925, iteration: 30990
loss: 0.9942722916603088,grad_norm: 0.9999991463657729, iteration: 30991
loss: 1.0026191473007202,grad_norm: 0.9999993521752046, iteration: 30992
loss: 0.9911267161369324,grad_norm: 0.9899719945892962, iteration: 30993
loss: 0.9718608260154724,grad_norm: 0.7655277069435688, iteration: 30994
loss: 1.0132564306259155,grad_norm: 0.8927885529887101, iteration: 30995
loss: 0.9856100678443909,grad_norm: 0.8668406060791853, iteration: 30996
loss: 1.0041935443878174,grad_norm: 0.9618105559302921, iteration: 30997
loss: 1.0175533294677734,grad_norm: 0.931887472095186, iteration: 30998
loss: 1.0150352716445923,grad_norm: 0.9888471196405351, iteration: 30999
loss: 0.9955374598503113,grad_norm: 0.9999989848703272, iteration: 31000
loss: 1.0185768604278564,grad_norm: 0.9999991879588532, iteration: 31001
loss: 1.0074177980422974,grad_norm: 0.919446792508633, iteration: 31002
loss: 1.0305826663970947,grad_norm: 0.9611214100712678, iteration: 31003
loss: 0.9818999171257019,grad_norm: 0.9999990048738032, iteration: 31004
loss: 0.9945437908172607,grad_norm: 0.9655106337949964, iteration: 31005
loss: 1.0477272272109985,grad_norm: 0.9999990191762216, iteration: 31006
loss: 0.9741360545158386,grad_norm: 0.9999989849986798, iteration: 31007
loss: 1.039076805114746,grad_norm: 0.9999991307778896, iteration: 31008
loss: 1.0128902196884155,grad_norm: 0.8844889003788095, iteration: 31009
loss: 1.0178992748260498,grad_norm: 0.8220536814687536, iteration: 31010
loss: 1.0018004179000854,grad_norm: 0.9455540678163857, iteration: 31011
loss: 1.0220316648483276,grad_norm: 0.9811564840002531, iteration: 31012
loss: 0.9760022163391113,grad_norm: 0.9156739674159693, iteration: 31013
loss: 1.0296156406402588,grad_norm: 0.9999990954371879, iteration: 31014
loss: 1.0298389196395874,grad_norm: 0.9607999045980746, iteration: 31015
loss: 0.9899840950965881,grad_norm: 0.8322503660108561, iteration: 31016
loss: 1.0142364501953125,grad_norm: 0.8624313063516248, iteration: 31017
loss: 1.0706419944763184,grad_norm: 0.999999833817337, iteration: 31018
loss: 1.0308446884155273,grad_norm: 0.835545812996945, iteration: 31019
loss: 1.0224586725234985,grad_norm: 0.8232886820958402, iteration: 31020
loss: 1.0360867977142334,grad_norm: 0.945263142043922, iteration: 31021
loss: 1.0404212474822998,grad_norm: 0.9228666289523183, iteration: 31022
loss: 1.0293185710906982,grad_norm: 0.9999991398695682, iteration: 31023
loss: 0.9969609379768372,grad_norm: 0.9373482264593608, iteration: 31024
loss: 0.9966551661491394,grad_norm: 0.9669826194358034, iteration: 31025
loss: 1.0103250741958618,grad_norm: 0.8153158848806638, iteration: 31026
loss: 0.9970186948776245,grad_norm: 0.9999992734398597, iteration: 31027
loss: 1.0017825365066528,grad_norm: 0.8768221193355249, iteration: 31028
loss: 1.0113986730575562,grad_norm: 0.9660314866124572, iteration: 31029
loss: 0.9907346963882446,grad_norm: 0.9223653684165237, iteration: 31030
loss: 0.9789853692054749,grad_norm: 0.9999990575127098, iteration: 31031
loss: 0.9903079271316528,grad_norm: 0.9999990408902919, iteration: 31032
loss: 1.0346355438232422,grad_norm: 0.8832931901707427, iteration: 31033
loss: 0.9871019721031189,grad_norm: 0.9999993075763807, iteration: 31034
loss: 1.0055984258651733,grad_norm: 0.970876571034771, iteration: 31035
loss: 1.0058234930038452,grad_norm: 0.8194946026337517, iteration: 31036
loss: 1.0117889642715454,grad_norm: 0.9323767019117816, iteration: 31037
loss: 1.000451922416687,grad_norm: 0.9999991166113992, iteration: 31038
loss: 0.9809632301330566,grad_norm: 0.9999990797511719, iteration: 31039
loss: 1.0275880098342896,grad_norm: 0.8464806443857517, iteration: 31040
loss: 1.0180673599243164,grad_norm: 0.9999991357800707, iteration: 31041
loss: 1.012627363204956,grad_norm: 0.9999991620248381, iteration: 31042
loss: 1.0207865238189697,grad_norm: 0.9729103964001964, iteration: 31043
loss: 0.9906768798828125,grad_norm: 0.8699288686355263, iteration: 31044
loss: 1.0200159549713135,grad_norm: 0.9042495525541556, iteration: 31045
loss: 1.0419304370880127,grad_norm: 0.908996632497483, iteration: 31046
loss: 1.0102174282073975,grad_norm: 0.9220386312119446, iteration: 31047
loss: 0.9746416807174683,grad_norm: 0.8941400995093185, iteration: 31048
loss: 0.9989473223686218,grad_norm: 0.878729479422236, iteration: 31049
loss: 0.9969674348831177,grad_norm: 0.9469594774038718, iteration: 31050
loss: 0.9871701598167419,grad_norm: 0.999999113254218, iteration: 31051
loss: 1.0007145404815674,grad_norm: 0.7407864101195989, iteration: 31052
loss: 0.979027509689331,grad_norm: 0.834945841920003, iteration: 31053
loss: 1.010151743888855,grad_norm: 0.9999991651355311, iteration: 31054
loss: 0.9986757636070251,grad_norm: 0.9664828441087692, iteration: 31055
loss: 0.9928932785987854,grad_norm: 0.9997408967979311, iteration: 31056
loss: 1.0187153816223145,grad_norm: 0.9999990234745343, iteration: 31057
loss: 1.0177315473556519,grad_norm: 0.9999992425943967, iteration: 31058
loss: 1.046505331993103,grad_norm: 0.999999456015145, iteration: 31059
loss: 0.9785788655281067,grad_norm: 0.9193504081674393, iteration: 31060
loss: 0.9840860366821289,grad_norm: 0.9999992203074297, iteration: 31061
loss: 1.0243169069290161,grad_norm: 0.8623258686114267, iteration: 31062
loss: 1.0134721994400024,grad_norm: 0.9999992208462505, iteration: 31063
loss: 1.0222855806350708,grad_norm: 0.8825647155497647, iteration: 31064
loss: 1.0153119564056396,grad_norm: 0.9843363032548653, iteration: 31065
loss: 0.9750552177429199,grad_norm: 0.8828783082378483, iteration: 31066
loss: 1.0097888708114624,grad_norm: 0.974018632929401, iteration: 31067
loss: 1.0336744785308838,grad_norm: 0.9999991403680475, iteration: 31068
loss: 1.0115422010421753,grad_norm: 0.9880022103333833, iteration: 31069
loss: 1.044275164604187,grad_norm: 0.9999991617721287, iteration: 31070
loss: 0.9953362345695496,grad_norm: 0.9999990348930802, iteration: 31071
loss: 0.9995524287223816,grad_norm: 0.9042223154392527, iteration: 31072
loss: 1.021748661994934,grad_norm: 0.834927731273639, iteration: 31073
loss: 1.0078768730163574,grad_norm: 0.8248798575318234, iteration: 31074
loss: 0.9894422888755798,grad_norm: 0.8911154016213841, iteration: 31075
loss: 0.9951469898223877,grad_norm: 0.9463225891512702, iteration: 31076
loss: 1.009928822517395,grad_norm: 0.8911829553188353, iteration: 31077
loss: 0.9915771484375,grad_norm: 0.9999991571584178, iteration: 31078
loss: 1.0477662086486816,grad_norm: 0.9300353509370914, iteration: 31079
loss: 1.0269984006881714,grad_norm: 0.9999992075418259, iteration: 31080
loss: 0.9579700231552124,grad_norm: 0.9999990550461707, iteration: 31081
loss: 0.986221969127655,grad_norm: 0.965974981929712, iteration: 31082
loss: 1.0162426233291626,grad_norm: 0.9999990773177772, iteration: 31083
loss: 1.0123509168624878,grad_norm: 0.9999990789598441, iteration: 31084
loss: 1.0033339262008667,grad_norm: 0.9059228778229469, iteration: 31085
loss: 1.0574891567230225,grad_norm: 0.9999998999498378, iteration: 31086
loss: 1.0309821367263794,grad_norm: 0.9670315909734163, iteration: 31087
loss: 0.9918076395988464,grad_norm: 0.9999990539265786, iteration: 31088
loss: 1.0158599615097046,grad_norm: 0.9751662517552004, iteration: 31089
loss: 0.9900672435760498,grad_norm: 0.9999989748916007, iteration: 31090
loss: 1.0160284042358398,grad_norm: 0.9999990556530326, iteration: 31091
loss: 1.0473576784133911,grad_norm: 0.9999990790321409, iteration: 31092
loss: 1.0054374933242798,grad_norm: 0.9999991080589462, iteration: 31093
loss: 1.0206944942474365,grad_norm: 0.9247342342160969, iteration: 31094
loss: 1.0471351146697998,grad_norm: 0.9999999109426629, iteration: 31095
loss: 1.0322351455688477,grad_norm: 0.9999990817131544, iteration: 31096
loss: 1.0188697576522827,grad_norm: 0.9999990771666257, iteration: 31097
loss: 1.0230231285095215,grad_norm: 0.9999992681790939, iteration: 31098
loss: 1.0330560207366943,grad_norm: 0.9999997201581298, iteration: 31099
loss: 0.9665600657463074,grad_norm: 0.9999994767383298, iteration: 31100
loss: 1.014230728149414,grad_norm: 0.9623017467753272, iteration: 31101
loss: 1.0033094882965088,grad_norm: 0.9475413122365566, iteration: 31102
loss: 0.9913733601570129,grad_norm: 0.9999995552164614, iteration: 31103
loss: 1.0712083578109741,grad_norm: 0.9999995212255226, iteration: 31104
loss: 1.0708025693893433,grad_norm: 0.9999998470429718, iteration: 31105
loss: 1.0893328189849854,grad_norm: 0.9999994092928679, iteration: 31106
loss: 0.9675069451332092,grad_norm: 0.9279401776975492, iteration: 31107
loss: 0.9715524911880493,grad_norm: 0.9999993333792818, iteration: 31108
loss: 0.9926681518554688,grad_norm: 0.9999991066248335, iteration: 31109
loss: 0.9901593327522278,grad_norm: 0.999999652988348, iteration: 31110
loss: 1.0252524614334106,grad_norm: 0.9193833534283598, iteration: 31111
loss: 1.013268232345581,grad_norm: 0.9999998511649899, iteration: 31112
loss: 1.0168752670288086,grad_norm: 0.893603403697192, iteration: 31113
loss: 1.0389773845672607,grad_norm: 0.9999991940461908, iteration: 31114
loss: 1.0115331411361694,grad_norm: 0.9999991579814744, iteration: 31115
loss: 1.0021709203720093,grad_norm: 0.9372453816084382, iteration: 31116
loss: 0.988858163356781,grad_norm: 0.9999990402780152, iteration: 31117
loss: 1.032749056816101,grad_norm: 0.9999992071101536, iteration: 31118
loss: 0.9927907586097717,grad_norm: 0.9999993126862229, iteration: 31119
loss: 1.0016319751739502,grad_norm: 0.9999993265896859, iteration: 31120
loss: 1.1085467338562012,grad_norm: 0.9999991145631978, iteration: 31121
loss: 0.9889760613441467,grad_norm: 0.9477150352820439, iteration: 31122
loss: 0.9653095602989197,grad_norm: 0.9999991708431151, iteration: 31123
loss: 1.0153563022613525,grad_norm: 0.9999990315869514, iteration: 31124
loss: 0.9914973974227905,grad_norm: 0.9999991634100645, iteration: 31125
loss: 1.027458667755127,grad_norm: 0.9814348682404836, iteration: 31126
loss: 0.964690625667572,grad_norm: 0.9999991152777398, iteration: 31127
loss: 0.9909924864768982,grad_norm: 0.9999991758905448, iteration: 31128
loss: 1.0077848434448242,grad_norm: 0.999999230142219, iteration: 31129
loss: 0.9956676363945007,grad_norm: 0.8896518420995466, iteration: 31130
loss: 1.0155024528503418,grad_norm: 0.9999992428869723, iteration: 31131
loss: 0.979880690574646,grad_norm: 0.9635764125253087, iteration: 31132
loss: 1.0015246868133545,grad_norm: 0.9999990283010729, iteration: 31133
loss: 0.9888794422149658,grad_norm: 0.9010562665402564, iteration: 31134
loss: 1.0030797719955444,grad_norm: 0.8483898126553407, iteration: 31135
loss: 1.000638723373413,grad_norm: 0.9155756649349217, iteration: 31136
loss: 0.998541533946991,grad_norm: 0.9999990673254769, iteration: 31137
loss: 0.9785385131835938,grad_norm: 0.9999990194564554, iteration: 31138
loss: 0.9937478303909302,grad_norm: 0.9999997359591687, iteration: 31139
loss: 1.0157134532928467,grad_norm: 0.999999178122545, iteration: 31140
loss: 0.9682090282440186,grad_norm: 0.8359679244509437, iteration: 31141
loss: 0.9828148484230042,grad_norm: 0.9999990220855832, iteration: 31142
loss: 1.0011119842529297,grad_norm: 0.9440242740073419, iteration: 31143
loss: 1.0249618291854858,grad_norm: 0.9999996474415839, iteration: 31144
loss: 0.9994756579399109,grad_norm: 0.999999090768226, iteration: 31145
loss: 0.9992392659187317,grad_norm: 0.9999990938093828, iteration: 31146
loss: 1.0151786804199219,grad_norm: 0.999999254482995, iteration: 31147
loss: 1.0003870725631714,grad_norm: 0.9817910662329168, iteration: 31148
loss: 1.0217534303665161,grad_norm: 0.9999991199505961, iteration: 31149
loss: 1.0211505889892578,grad_norm: 0.999999211885217, iteration: 31150
loss: 0.9953823089599609,grad_norm: 0.9336533369960393, iteration: 31151
loss: 1.0345638990402222,grad_norm: 0.9999992885633828, iteration: 31152
loss: 1.0130869150161743,grad_norm: 0.9999993754496012, iteration: 31153
loss: 1.0012141466140747,grad_norm: 0.9999990502393478, iteration: 31154
loss: 1.038503646850586,grad_norm: 0.9999991186686803, iteration: 31155
loss: 1.0208740234375,grad_norm: 0.9279919282417484, iteration: 31156
loss: 1.0243345499038696,grad_norm: 0.9999989852749499, iteration: 31157
loss: 1.0135726928710938,grad_norm: 0.9999991016049479, iteration: 31158
loss: 0.9950991868972778,grad_norm: 0.9999991253079439, iteration: 31159
loss: 0.980757474899292,grad_norm: 0.9999991348261785, iteration: 31160
loss: 0.9912768006324768,grad_norm: 0.9999992848809568, iteration: 31161
loss: 0.9946116805076599,grad_norm: 0.8763937182407622, iteration: 31162
loss: 1.0704442262649536,grad_norm: 0.9999997558193033, iteration: 31163
loss: 1.0084928274154663,grad_norm: 0.9999989540648759, iteration: 31164
loss: 1.0170893669128418,grad_norm: 0.9999991244166828, iteration: 31165
loss: 1.0083980560302734,grad_norm: 0.9999991416104629, iteration: 31166
loss: 0.9909847974777222,grad_norm: 0.8671956807886007, iteration: 31167
loss: 1.002913236618042,grad_norm: 0.8595299669816752, iteration: 31168
loss: 1.0272830724716187,grad_norm: 0.9683672840288902, iteration: 31169
loss: 1.0106079578399658,grad_norm: 0.99999927186052, iteration: 31170
loss: 1.0430063009262085,grad_norm: 0.9999998469805459, iteration: 31171
loss: 1.0438467264175415,grad_norm: 0.9999992984443283, iteration: 31172
loss: 1.0218465328216553,grad_norm: 0.7575664527167142, iteration: 31173
loss: 0.9847348928451538,grad_norm: 0.9999991364567498, iteration: 31174
loss: 1.023119330406189,grad_norm: 0.9999990680005144, iteration: 31175
loss: 1.0440326929092407,grad_norm: 0.8941522373739426, iteration: 31176
loss: 1.048154592514038,grad_norm: 0.9999996606831377, iteration: 31177
loss: 1.0004576444625854,grad_norm: 0.9339421460726435, iteration: 31178
loss: 0.9822503924369812,grad_norm: 0.9999997536708777, iteration: 31179
loss: 1.1258854866027832,grad_norm: 0.9999997667704431, iteration: 31180
loss: 1.0138921737670898,grad_norm: 0.7222475374831372, iteration: 31181
loss: 0.9794420599937439,grad_norm: 0.9999991681927566, iteration: 31182
loss: 1.0122177600860596,grad_norm: 0.9650163468645202, iteration: 31183
loss: 1.027861475944519,grad_norm: 0.9999990525902173, iteration: 31184
loss: 1.012864589691162,grad_norm: 0.8703025474215487, iteration: 31185
loss: 1.0648459196090698,grad_norm: 0.9999996542410646, iteration: 31186
loss: 0.9775090217590332,grad_norm: 0.9999989846451532, iteration: 31187
loss: 1.0035783052444458,grad_norm: 0.8285513109667549, iteration: 31188
loss: 1.005492925643921,grad_norm: 0.8201086050561814, iteration: 31189
loss: 1.000495433807373,grad_norm: 0.9069489320970927, iteration: 31190
loss: 0.9695714712142944,grad_norm: 0.9999990848741235, iteration: 31191
loss: 1.0031622648239136,grad_norm: 0.8149364613392662, iteration: 31192
loss: 1.0060802698135376,grad_norm: 0.9999990630859595, iteration: 31193
loss: 1.0208622217178345,grad_norm: 0.842215590200815, iteration: 31194
loss: 1.0338441133499146,grad_norm: 0.8833137698119966, iteration: 31195
loss: 1.02117121219635,grad_norm: 0.9999994987631279, iteration: 31196
loss: 0.9745513200759888,grad_norm: 0.953363903331634, iteration: 31197
loss: 1.0210124254226685,grad_norm: 0.9999994877871086, iteration: 31198
loss: 1.0256644487380981,grad_norm: 0.9999996324947673, iteration: 31199
loss: 1.0393763780593872,grad_norm: 0.9999993630767469, iteration: 31200
loss: 1.0203546285629272,grad_norm: 0.999999102712211, iteration: 31201
loss: 1.0145574808120728,grad_norm: 0.9289757877179594, iteration: 31202
loss: 1.018632411956787,grad_norm: 0.9999991791847082, iteration: 31203
loss: 1.0045454502105713,grad_norm: 0.8677695517929606, iteration: 31204
loss: 0.9959668517112732,grad_norm: 0.9999992078861812, iteration: 31205
loss: 1.0165941715240479,grad_norm: 0.8764159089450315, iteration: 31206
loss: 0.9919400215148926,grad_norm: 0.9745820176774255, iteration: 31207
loss: 1.0157346725463867,grad_norm: 0.7983566925189621, iteration: 31208
loss: 1.017754316329956,grad_norm: 0.8666584781776713, iteration: 31209
loss: 1.0202161073684692,grad_norm: 0.9470729884283661, iteration: 31210
loss: 1.0208176374435425,grad_norm: 0.999999073285945, iteration: 31211
loss: 1.025536060333252,grad_norm: 0.9999992088853802, iteration: 31212
loss: 1.0136191844940186,grad_norm: 0.963677380205358, iteration: 31213
loss: 1.0086960792541504,grad_norm: 0.9999991283799438, iteration: 31214
loss: 0.9498428106307983,grad_norm: 0.9999991499678331, iteration: 31215
loss: 1.0303164720535278,grad_norm: 0.9999992339523123, iteration: 31216
loss: 0.972247838973999,grad_norm: 0.9999991633781045, iteration: 31217
loss: 1.041110873222351,grad_norm: 0.9999991812556214, iteration: 31218
loss: 0.9633268713951111,grad_norm: 0.9999990802163246, iteration: 31219
loss: 1.008699893951416,grad_norm: 0.9999991220101244, iteration: 31220
loss: 1.0153403282165527,grad_norm: 0.8748609234790575, iteration: 31221
loss: 1.00385582447052,grad_norm: 0.9971657165848419, iteration: 31222
loss: 1.0334645509719849,grad_norm: 0.9999990997848621, iteration: 31223
loss: 0.9873908162117004,grad_norm: 0.9999993777261376, iteration: 31224
loss: 0.9898300170898438,grad_norm: 0.9999990148576644, iteration: 31225
loss: 1.0088051557540894,grad_norm: 0.9631583362886497, iteration: 31226
loss: 1.0514200925827026,grad_norm: 0.9999992047099325, iteration: 31227
loss: 1.0088189840316772,grad_norm: 0.9999992431408027, iteration: 31228
loss: 0.9931609630584717,grad_norm: 0.9635237930196825, iteration: 31229
loss: 0.9903987050056458,grad_norm: 0.9043570770909996, iteration: 31230
loss: 0.9774628281593323,grad_norm: 0.9716382576497921, iteration: 31231
loss: 1.0089045763015747,grad_norm: 0.9999990385423608, iteration: 31232
loss: 0.9796888828277588,grad_norm: 0.9482667885681618, iteration: 31233
loss: 1.0011848211288452,grad_norm: 0.8687645893906525, iteration: 31234
loss: 1.0607060194015503,grad_norm: 0.9999991114620147, iteration: 31235
loss: 1.0078576803207397,grad_norm: 0.9944878097739998, iteration: 31236
loss: 1.0200023651123047,grad_norm: 0.9433441092027236, iteration: 31237
loss: 0.9763113856315613,grad_norm: 0.867583584748237, iteration: 31238
loss: 0.9968898892402649,grad_norm: 0.9565243709870751, iteration: 31239
loss: 1.020251989364624,grad_norm: 0.9965978495328676, iteration: 31240
loss: 1.0411462783813477,grad_norm: 0.9999990653205226, iteration: 31241
loss: 1.0085397958755493,grad_norm: 0.9999990581288242, iteration: 31242
loss: 1.0344030857086182,grad_norm: 0.9731339840419192, iteration: 31243
loss: 0.946746826171875,grad_norm: 0.9999990334514068, iteration: 31244
loss: 0.9787978529930115,grad_norm: 0.999999055697069, iteration: 31245
loss: 0.9960886836051941,grad_norm: 0.911743795539038, iteration: 31246
loss: 1.0094717741012573,grad_norm: 0.8443306250070353, iteration: 31247
loss: 1.0608149766921997,grad_norm: 0.999999696443504, iteration: 31248
loss: 1.0241883993148804,grad_norm: 0.97782803922081, iteration: 31249
loss: 1.047266960144043,grad_norm: 0.9698109238868293, iteration: 31250
loss: 1.0210932493209839,grad_norm: 0.9999990763200389, iteration: 31251
loss: 1.0178570747375488,grad_norm: 0.83916767173751, iteration: 31252
loss: 1.0161763429641724,grad_norm: 0.9999990649215343, iteration: 31253
loss: 0.9752269387245178,grad_norm: 0.9128816252633826, iteration: 31254
loss: 0.9634515047073364,grad_norm: 0.999999270559161, iteration: 31255
loss: 1.0006403923034668,grad_norm: 0.9119107383966565, iteration: 31256
loss: 1.0440741777420044,grad_norm: 0.927429282756859, iteration: 31257
loss: 0.9874515533447266,grad_norm: 0.9074151748595317, iteration: 31258
loss: 1.053297758102417,grad_norm: 0.9999995811383066, iteration: 31259
loss: 1.01373291015625,grad_norm: 0.9999989921521696, iteration: 31260
loss: 1.0851234197616577,grad_norm: 0.9999993701851358, iteration: 31261
loss: 1.0379877090454102,grad_norm: 0.999999052695573, iteration: 31262
loss: 1.0347598791122437,grad_norm: 0.9524288517558596, iteration: 31263
loss: 1.001021146774292,grad_norm: 0.9999998018436657, iteration: 31264
loss: 0.9958433508872986,grad_norm: 0.8875827317413708, iteration: 31265
loss: 0.9749471545219421,grad_norm: 0.8787714203655762, iteration: 31266
loss: 1.0277190208435059,grad_norm: 0.9999992172771911, iteration: 31267
loss: 1.0186641216278076,grad_norm: 0.7523729961629115, iteration: 31268
loss: 1.0090947151184082,grad_norm: 0.9331223839321076, iteration: 31269
loss: 0.9897395372390747,grad_norm: 0.9999991350041955, iteration: 31270
loss: 0.998117208480835,grad_norm: 0.8436777894596984, iteration: 31271
loss: 1.0172480344772339,grad_norm: 0.9999990322565953, iteration: 31272
loss: 1.0269742012023926,grad_norm: 0.9484869591559116, iteration: 31273
loss: 1.0046191215515137,grad_norm: 0.7388921366880684, iteration: 31274
loss: 1.026107668876648,grad_norm: 0.9363275378099314, iteration: 31275
loss: 1.0449563264846802,grad_norm: 0.9999991459874411, iteration: 31276
loss: 0.9888123869895935,grad_norm: 0.9999993668224306, iteration: 31277
loss: 1.030038595199585,grad_norm: 0.9999993464284287, iteration: 31278
loss: 0.991916298866272,grad_norm: 0.9999992979657257, iteration: 31279
loss: 1.0235176086425781,grad_norm: 0.9999991677015911, iteration: 31280
loss: 0.9901224970817566,grad_norm: 0.9999991426932545, iteration: 31281
loss: 1.028918743133545,grad_norm: 0.9999991570764366, iteration: 31282
loss: 1.0040675401687622,grad_norm: 0.9999991187426481, iteration: 31283
loss: 1.0278122425079346,grad_norm: 0.9999992074654291, iteration: 31284
loss: 1.0027227401733398,grad_norm: 0.9999991376585351, iteration: 31285
loss: 0.9853440523147583,grad_norm: 0.9426336517951585, iteration: 31286
loss: 0.9958535432815552,grad_norm: 0.8669512225611039, iteration: 31287
loss: 1.0507258176803589,grad_norm: 0.9999990536705619, iteration: 31288
loss: 1.0423171520233154,grad_norm: 0.9673013215775004, iteration: 31289
loss: 1.095145344734192,grad_norm: 0.9999999055397636, iteration: 31290
loss: 1.0354200601577759,grad_norm: 0.9999991911738487, iteration: 31291
loss: 0.9678502082824707,grad_norm: 0.9999992611833692, iteration: 31292
loss: 1.0050791501998901,grad_norm: 0.9999992422101336, iteration: 31293
loss: 0.9811676740646362,grad_norm: 0.9999991189245585, iteration: 31294
loss: 1.0235264301300049,grad_norm: 0.857181571499052, iteration: 31295
loss: 0.9895428419113159,grad_norm: 0.9235481698918476, iteration: 31296
loss: 1.0128129720687866,grad_norm: 0.9999990887496547, iteration: 31297
loss: 1.0665044784545898,grad_norm: 0.9999996141334588, iteration: 31298
loss: 0.9745076298713684,grad_norm: 0.8856352050474938, iteration: 31299
loss: 1.0199241638183594,grad_norm: 0.9999991291935582, iteration: 31300
loss: 1.012129783630371,grad_norm: 0.9999989863999496, iteration: 31301
loss: 0.9575567245483398,grad_norm: 0.999999197419805, iteration: 31302
loss: 0.99693763256073,grad_norm: 0.9472315551507595, iteration: 31303
loss: 1.0319098234176636,grad_norm: 0.9999992342783599, iteration: 31304
loss: 0.9828574061393738,grad_norm: 0.9999990971731755, iteration: 31305
loss: 0.9743599891662598,grad_norm: 0.928912739746082, iteration: 31306
loss: 1.019702434539795,grad_norm: 0.9955235017184197, iteration: 31307
loss: 1.0105257034301758,grad_norm: 0.971261946403095, iteration: 31308
loss: 1.016032338142395,grad_norm: 0.9999989765646755, iteration: 31309
loss: 1.0192840099334717,grad_norm: 0.9420684131625171, iteration: 31310
loss: 1.0156714916229248,grad_norm: 0.9999992929851923, iteration: 31311
loss: 1.0446876287460327,grad_norm: 0.9999992522448539, iteration: 31312
loss: 0.9810827970504761,grad_norm: 0.9878580818157094, iteration: 31313
loss: 1.0237168073654175,grad_norm: 0.9999990624273676, iteration: 31314
loss: 1.0261448621749878,grad_norm: 0.9211693552102868, iteration: 31315
loss: 1.109470009803772,grad_norm: 0.999999204850954, iteration: 31316
loss: 0.9902665615081787,grad_norm: 0.99999920684156, iteration: 31317
loss: 0.9960249066352844,grad_norm: 0.9999994454365195, iteration: 31318
loss: 0.9932515621185303,grad_norm: 0.9999990770863016, iteration: 31319
loss: 0.9978505373001099,grad_norm: 0.9999991090602159, iteration: 31320
loss: 0.9592099785804749,grad_norm: 0.9865907257341564, iteration: 31321
loss: 0.9882464408874512,grad_norm: 0.9999991774743199, iteration: 31322
loss: 0.9865254759788513,grad_norm: 0.9999991835660181, iteration: 31323
loss: 1.0431406497955322,grad_norm: 0.9999994962978714, iteration: 31324
loss: 1.0297871828079224,grad_norm: 0.7944287258487313, iteration: 31325
loss: 1.0160863399505615,grad_norm: 0.999999227572422, iteration: 31326
loss: 1.0032883882522583,grad_norm: 0.8770192537961554, iteration: 31327
loss: 1.032016396522522,grad_norm: 0.9999992629602503, iteration: 31328
loss: 1.0354164838790894,grad_norm: 0.9999992593720821, iteration: 31329
loss: 1.028069019317627,grad_norm: 0.9202566853612909, iteration: 31330
loss: 1.0081268548965454,grad_norm: 0.9999989850254174, iteration: 31331
loss: 0.9708436131477356,grad_norm: 0.9999991401105633, iteration: 31332
loss: 0.9959147572517395,grad_norm: 0.9999990654426067, iteration: 31333
loss: 1.0172536373138428,grad_norm: 0.9999990322069295, iteration: 31334
loss: 1.0231542587280273,grad_norm: 0.9082145128545035, iteration: 31335
loss: 1.0314545631408691,grad_norm: 0.9999990394997382, iteration: 31336
loss: 1.0572065114974976,grad_norm: 0.9830534874731612, iteration: 31337
loss: 1.0062534809112549,grad_norm: 0.9214560921170917, iteration: 31338
loss: 1.0262371301651,grad_norm: 0.9999990625756323, iteration: 31339
loss: 1.0108554363250732,grad_norm: 0.9999991156046774, iteration: 31340
loss: 1.0018218755722046,grad_norm: 0.9999989532373942, iteration: 31341
loss: 1.0403022766113281,grad_norm: 0.9999991090066801, iteration: 31342
loss: 1.0283150672912598,grad_norm: 0.746600135120729, iteration: 31343
loss: 1.024291753768921,grad_norm: 0.9999991453737223, iteration: 31344
loss: 0.9913718700408936,grad_norm: 0.9999992012996862, iteration: 31345
loss: 1.0267291069030762,grad_norm: 0.9999993276719266, iteration: 31346
loss: 1.0313148498535156,grad_norm: 0.9999990851058191, iteration: 31347
loss: 0.9936951398849487,grad_norm: 0.9975303878016446, iteration: 31348
loss: 0.970764696598053,grad_norm: 0.9999990389254381, iteration: 31349
loss: 1.007226586341858,grad_norm: 0.9999992477246613, iteration: 31350
loss: 1.0023010969161987,grad_norm: 0.9999991260253616, iteration: 31351
loss: 1.000309944152832,grad_norm: 0.9003322802633683, iteration: 31352
loss: 0.9868510961532593,grad_norm: 0.937862301836498, iteration: 31353
loss: 1.0002411603927612,grad_norm: 0.9999992219795073, iteration: 31354
loss: 1.0054597854614258,grad_norm: 0.8774456389068976, iteration: 31355
loss: 0.9983213543891907,grad_norm: 0.9328021362443556, iteration: 31356
loss: 0.9842908382415771,grad_norm: 0.9413201005531551, iteration: 31357
loss: 1.0371438264846802,grad_norm: 0.9999997039614642, iteration: 31358
loss: 0.9704113602638245,grad_norm: 0.999999050537484, iteration: 31359
loss: 0.9789862632751465,grad_norm: 0.9204176818584342, iteration: 31360
loss: 1.0121525526046753,grad_norm: 0.7680373966203532, iteration: 31361
loss: 1.0252137184143066,grad_norm: 0.928302253772969, iteration: 31362
loss: 1.0093634128570557,grad_norm: 0.999999176844702, iteration: 31363
loss: 1.0052396059036255,grad_norm: 0.7603858550406171, iteration: 31364
loss: 0.9996834397315979,grad_norm: 0.8600969869730873, iteration: 31365
loss: 1.037630319595337,grad_norm: 0.999999344350273, iteration: 31366
loss: 1.0386086702346802,grad_norm: 0.9999990891243472, iteration: 31367
loss: 0.9970453977584839,grad_norm: 0.9999992322275478, iteration: 31368
loss: 1.0130772590637207,grad_norm: 0.999999072889651, iteration: 31369
loss: 1.0918254852294922,grad_norm: 0.9999997924092895, iteration: 31370
loss: 0.9748731255531311,grad_norm: 0.9999990427904569, iteration: 31371
loss: 0.99329674243927,grad_norm: 0.8149779839269043, iteration: 31372
loss: 1.042959451675415,grad_norm: 0.9646052668866755, iteration: 31373
loss: 1.0224138498306274,grad_norm: 0.8330719009861007, iteration: 31374
loss: 1.0607010126113892,grad_norm: 0.9999995607241421, iteration: 31375
loss: 1.0150235891342163,grad_norm: 0.9999990595999364, iteration: 31376
loss: 1.0073356628417969,grad_norm: 0.9999990883269987, iteration: 31377
loss: 1.0024341344833374,grad_norm: 0.8297238264001606, iteration: 31378
loss: 1.0014197826385498,grad_norm: 0.9999993589426982, iteration: 31379
loss: 1.0022425651550293,grad_norm: 0.9999990609760935, iteration: 31380
loss: 1.007492184638977,grad_norm: 0.9999989955979609, iteration: 31381
loss: 1.0281455516815186,grad_norm: 0.9999996298571162, iteration: 31382
loss: 1.0605390071868896,grad_norm: 0.9999998295862805, iteration: 31383
loss: 1.0052647590637207,grad_norm: 0.8298789761846859, iteration: 31384
loss: 0.9919204115867615,grad_norm: 0.9999992483819733, iteration: 31385
loss: 1.0392943620681763,grad_norm: 0.95936742292687, iteration: 31386
loss: 1.0060350894927979,grad_norm: 0.8643940885872434, iteration: 31387
loss: 0.9723736047744751,grad_norm: 0.9999463538553616, iteration: 31388
loss: 1.0052680969238281,grad_norm: 0.9999990647022657, iteration: 31389
loss: 1.002081036567688,grad_norm: 0.9131553819603407, iteration: 31390
loss: 1.0159324407577515,grad_norm: 0.9381356180985831, iteration: 31391
loss: 0.9610827565193176,grad_norm: 0.9999991088427675, iteration: 31392
loss: 1.017336130142212,grad_norm: 0.8221144142735772, iteration: 31393
loss: 1.00632643699646,grad_norm: 0.8620246707364351, iteration: 31394
loss: 0.9911430478096008,grad_norm: 0.9999995251272193, iteration: 31395
loss: 1.075382113456726,grad_norm: 0.9999991551130805, iteration: 31396
loss: 0.9749515652656555,grad_norm: 0.9536014698968178, iteration: 31397
loss: 1.0000172853469849,grad_norm: 0.9999994337511828, iteration: 31398
loss: 0.9899577498435974,grad_norm: 0.9933224048469597, iteration: 31399
loss: 1.0327762365341187,grad_norm: 0.9950400459772546, iteration: 31400
loss: 0.9938529133796692,grad_norm: 0.9999989984122966, iteration: 31401
loss: 1.0169568061828613,grad_norm: 0.8332001810750217, iteration: 31402
loss: 0.9797683954238892,grad_norm: 0.999999218720019, iteration: 31403
loss: 0.9968739748001099,grad_norm: 0.9999992525723267, iteration: 31404
loss: 1.027605652809143,grad_norm: 0.8946946074675094, iteration: 31405
loss: 1.0663591623306274,grad_norm: 0.999999752137359, iteration: 31406
loss: 0.9730557799339294,grad_norm: 0.98097283763171, iteration: 31407
loss: 1.0391812324523926,grad_norm: 0.9999991713818213, iteration: 31408
loss: 1.006387710571289,grad_norm: 0.9999995986816247, iteration: 31409
loss: 0.9879082441329956,grad_norm: 0.9999991760778675, iteration: 31410
loss: 1.0223877429962158,grad_norm: 0.9999990217633358, iteration: 31411
loss: 1.0217242240905762,grad_norm: 0.9999994017796728, iteration: 31412
loss: 1.0006664991378784,grad_norm: 0.999999292775455, iteration: 31413
loss: 1.0369046926498413,grad_norm: 0.8409931443055698, iteration: 31414
loss: 0.9909069538116455,grad_norm: 0.9999991917347579, iteration: 31415
loss: 1.0144151449203491,grad_norm: 0.9254339582732944, iteration: 31416
loss: 1.0140362977981567,grad_norm: 0.8641318063903154, iteration: 31417
loss: 1.0131282806396484,grad_norm: 0.9021579625699171, iteration: 31418
loss: 0.96040278673172,grad_norm: 0.9999992847479593, iteration: 31419
loss: 1.0009820461273193,grad_norm: 0.9828419687977744, iteration: 31420
loss: 1.0074195861816406,grad_norm: 0.9999991535157231, iteration: 31421
loss: 1.014418363571167,grad_norm: 0.8754289928059772, iteration: 31422
loss: 1.0096408128738403,grad_norm: 0.9999990031347964, iteration: 31423
loss: 1.0306581258773804,grad_norm: 0.9999992038411837, iteration: 31424
loss: 1.014719843864441,grad_norm: 0.9999997834771577, iteration: 31425
loss: 0.9620503783226013,grad_norm: 0.9748751279804049, iteration: 31426
loss: 0.9696444869041443,grad_norm: 0.9999992149766977, iteration: 31427
loss: 1.0208473205566406,grad_norm: 0.999998996323693, iteration: 31428
loss: 0.9667844176292419,grad_norm: 0.921540737564013, iteration: 31429
loss: 1.0464200973510742,grad_norm: 0.999999615275616, iteration: 31430
loss: 0.9938751459121704,grad_norm: 0.7752446888110984, iteration: 31431
loss: 0.9543085098266602,grad_norm: 0.8999511628443551, iteration: 31432
loss: 0.9879024028778076,grad_norm: 0.9290614750080034, iteration: 31433
loss: 1.033094882965088,grad_norm: 0.9137197755948723, iteration: 31434
loss: 1.013738751411438,grad_norm: 0.9217530020428223, iteration: 31435
loss: 1.026753544807434,grad_norm: 0.9999990090507587, iteration: 31436
loss: 0.9707838296890259,grad_norm: 0.9393333872197531, iteration: 31437
loss: 1.0128902196884155,grad_norm: 0.8085390060308518, iteration: 31438
loss: 0.9826011061668396,grad_norm: 0.9999991142195614, iteration: 31439
loss: 0.99172443151474,grad_norm: 0.8270630672475169, iteration: 31440
loss: 0.9928232431411743,grad_norm: 0.9999991700856856, iteration: 31441
loss: 0.9726019501686096,grad_norm: 0.9999992966344413, iteration: 31442
loss: 1.1457706689834595,grad_norm: 0.9999994894649089, iteration: 31443
loss: 1.0119041204452515,grad_norm: 0.9905814236945091, iteration: 31444
loss: 1.024380087852478,grad_norm: 0.9313622870754259, iteration: 31445
loss: 0.9891340136528015,grad_norm: 0.9999991114786108, iteration: 31446
loss: 1.0124956369400024,grad_norm: 0.9999991092343428, iteration: 31447
loss: 1.0501995086669922,grad_norm: 0.9999996431331962, iteration: 31448
loss: 0.9762185215950012,grad_norm: 0.8135908842099396, iteration: 31449
loss: 1.0098838806152344,grad_norm: 0.9098693224011966, iteration: 31450
loss: 1.016489863395691,grad_norm: 0.8842773429368033, iteration: 31451
loss: 0.9670510292053223,grad_norm: 0.9999989863899857, iteration: 31452
loss: 1.0358797311782837,grad_norm: 0.999999751784822, iteration: 31453
loss: 1.005077600479126,grad_norm: 0.9999996248396977, iteration: 31454
loss: 0.9813255071640015,grad_norm: 0.9999992134080159, iteration: 31455
loss: 0.9832058548927307,grad_norm: 0.9462108008048118, iteration: 31456
loss: 1.0221507549285889,grad_norm: 0.8261144473870692, iteration: 31457
loss: 0.991688072681427,grad_norm: 0.8902374359055836, iteration: 31458
loss: 1.0386700630187988,grad_norm: 0.8888913086693441, iteration: 31459
loss: 1.0032923221588135,grad_norm: 0.958295782217916, iteration: 31460
loss: 1.083183765411377,grad_norm: 0.999999840181064, iteration: 31461
loss: 0.9955125451087952,grad_norm: 0.9320990650749976, iteration: 31462
loss: 1.0085361003875732,grad_norm: 0.999999226987423, iteration: 31463
loss: 1.0411226749420166,grad_norm: 0.9999990339657083, iteration: 31464
loss: 1.013765811920166,grad_norm: 0.9999990842713995, iteration: 31465
loss: 1.0106921195983887,grad_norm: 0.9999992129141807, iteration: 31466
loss: 0.9735116958618164,grad_norm: 0.9999995562051159, iteration: 31467
loss: 1.0305776596069336,grad_norm: 0.9999992505637487, iteration: 31468
loss: 0.9992702603340149,grad_norm: 0.9178109802527081, iteration: 31469
loss: 0.9552774429321289,grad_norm: 0.8980958042525402, iteration: 31470
loss: 1.0137152671813965,grad_norm: 0.9999991993917099, iteration: 31471
loss: 1.0202410221099854,grad_norm: 0.9999993836377111, iteration: 31472
loss: 1.0703418254852295,grad_norm: 0.9999994792568138, iteration: 31473
loss: 0.9883325099945068,grad_norm: 0.9999993042063723, iteration: 31474
loss: 1.0099492073059082,grad_norm: 0.9999991836802214, iteration: 31475
loss: 1.0119067430496216,grad_norm: 0.9999990439317831, iteration: 31476
loss: 1.0494818687438965,grad_norm: 0.9999996656366744, iteration: 31477
loss: 1.020857334136963,grad_norm: 0.9999991814936818, iteration: 31478
loss: 0.9987601041793823,grad_norm: 0.9757586162333778, iteration: 31479
loss: 1.0474070310592651,grad_norm: 0.9080081351505507, iteration: 31480
loss: 1.0115176439285278,grad_norm: 0.9999994338065609, iteration: 31481
loss: 1.0246824026107788,grad_norm: 0.9985963400207019, iteration: 31482
loss: 1.012496829032898,grad_norm: 0.9999992666476576, iteration: 31483
loss: 1.043429970741272,grad_norm: 0.7939462746745224, iteration: 31484
loss: 1.0280107259750366,grad_norm: 0.9999993140834463, iteration: 31485
loss: 0.9365242123603821,grad_norm: 0.8772794721030376, iteration: 31486
loss: 0.9783802032470703,grad_norm: 0.9999990271722704, iteration: 31487
loss: 1.0125055313110352,grad_norm: 0.9999993554414849, iteration: 31488
loss: 0.9619202613830566,grad_norm: 0.977342173907851, iteration: 31489
loss: 1.053084135055542,grad_norm: 0.999999187264997, iteration: 31490
loss: 0.9924009442329407,grad_norm: 0.8924257241036794, iteration: 31491
loss: 1.0184037685394287,grad_norm: 0.9471217669250337, iteration: 31492
loss: 0.9709804058074951,grad_norm: 0.926892535862721, iteration: 31493
loss: 1.0084853172302246,grad_norm: 0.9999990717004615, iteration: 31494
loss: 1.0238080024719238,grad_norm: 0.9392766257600138, iteration: 31495
loss: 0.9820964336395264,grad_norm: 0.9018132374787887, iteration: 31496
loss: 1.0440970659255981,grad_norm: 0.9631059734878668, iteration: 31497
loss: 1.044716477394104,grad_norm: 0.9999993967609168, iteration: 31498
loss: 1.0142525434494019,grad_norm: 0.999999232461045, iteration: 31499
loss: 0.950997531414032,grad_norm: 0.9171979918840177, iteration: 31500
loss: 1.0172157287597656,grad_norm: 0.9573588445029161, iteration: 31501
loss: 0.9802799820899963,grad_norm: 0.9999991235938401, iteration: 31502
loss: 0.9736852049827576,grad_norm: 0.877748139956217, iteration: 31503
loss: 0.9904353618621826,grad_norm: 0.9979093564816464, iteration: 31504
loss: 1.0267679691314697,grad_norm: 0.9606580975130399, iteration: 31505
loss: 0.9912062287330627,grad_norm: 0.9999991817670074, iteration: 31506
loss: 1.0297538042068481,grad_norm: 0.9969237009343767, iteration: 31507
loss: 0.9462371468544006,grad_norm: 0.8244494248023015, iteration: 31508
loss: 1.036481499671936,grad_norm: 0.9999992647030793, iteration: 31509
loss: 1.0080963373184204,grad_norm: 0.9999991643274472, iteration: 31510
loss: 1.0174633264541626,grad_norm: 0.9181717621484495, iteration: 31511
loss: 1.0252768993377686,grad_norm: 0.9999990017275261, iteration: 31512
loss: 1.0060982704162598,grad_norm: 0.9999990697884056, iteration: 31513
loss: 0.9830836057662964,grad_norm: 0.9031564581620531, iteration: 31514
loss: 1.0240753889083862,grad_norm: 0.9999996981968734, iteration: 31515
loss: 1.0079728364944458,grad_norm: 0.8414029480002359, iteration: 31516
loss: 1.0356452465057373,grad_norm: 0.9999993081410387, iteration: 31517
loss: 0.9963571429252625,grad_norm: 0.9671589259735057, iteration: 31518
loss: 1.0300792455673218,grad_norm: 0.9999993280145122, iteration: 31519
loss: 0.9998636245727539,grad_norm: 0.9926442345459874, iteration: 31520
loss: 1.0201441049575806,grad_norm: 0.9372729078592466, iteration: 31521
loss: 1.020512342453003,grad_norm: 0.9999994656889328, iteration: 31522
loss: 1.032802700996399,grad_norm: 0.9999990690203825, iteration: 31523
loss: 1.0263460874557495,grad_norm: 0.9999994966486692, iteration: 31524
loss: 1.0364799499511719,grad_norm: 0.9883931118031994, iteration: 31525
loss: 1.0023642778396606,grad_norm: 0.9989038626251313, iteration: 31526
loss: 1.0324970483779907,grad_norm: 0.999999067988067, iteration: 31527
loss: 1.0273821353912354,grad_norm: 0.9973779341522987, iteration: 31528
loss: 0.9904454350471497,grad_norm: 0.9999991886955404, iteration: 31529
loss: 0.9638448357582092,grad_norm: 0.9999992004526124, iteration: 31530
loss: 1.0076701641082764,grad_norm: 0.9999993026552796, iteration: 31531
loss: 1.0084458589553833,grad_norm: 0.9999996052596093, iteration: 31532
loss: 0.9953303933143616,grad_norm: 0.9999991936057271, iteration: 31533
loss: 0.9980450868606567,grad_norm: 0.9999995900178837, iteration: 31534
loss: 1.0046684741973877,grad_norm: 0.9999991819690722, iteration: 31535
loss: 0.9960300922393799,grad_norm: 0.9999991693195154, iteration: 31536
loss: 1.0316938161849976,grad_norm: 0.9782799514790695, iteration: 31537
loss: 0.9878551959991455,grad_norm: 0.9999993434269043, iteration: 31538
loss: 0.9773067235946655,grad_norm: 0.9999991375430549, iteration: 31539
loss: 1.0325350761413574,grad_norm: 0.9999991055325375, iteration: 31540
loss: 0.9985325932502747,grad_norm: 0.999999263104916, iteration: 31541
loss: 1.017592430114746,grad_norm: 0.9428359455562931, iteration: 31542
loss: 1.0198771953582764,grad_norm: 0.9999992941673412, iteration: 31543
loss: 0.966124951839447,grad_norm: 0.8541685098241226, iteration: 31544
loss: 0.9982867240905762,grad_norm: 0.9109428116299102, iteration: 31545
loss: 1.001854419708252,grad_norm: 0.8379019168677914, iteration: 31546
loss: 1.032691478729248,grad_norm: 0.8490867843916057, iteration: 31547
loss: 0.9975801706314087,grad_norm: 0.8423426191136949, iteration: 31548
loss: 0.9764974117279053,grad_norm: 0.9607811309144589, iteration: 31549
loss: 1.017623782157898,grad_norm: 0.913632678662842, iteration: 31550
loss: 1.0261996984481812,grad_norm: 0.9853467804866133, iteration: 31551
loss: 1.0021083354949951,grad_norm: 0.9001362688400011, iteration: 31552
loss: 1.007702112197876,grad_norm: 0.9999990529747113, iteration: 31553
loss: 1.0066239833831787,grad_norm: 0.9999990535971323, iteration: 31554
loss: 1.0027596950531006,grad_norm: 0.9225230021680856, iteration: 31555
loss: 1.0459011793136597,grad_norm: 0.9291317363250019, iteration: 31556
loss: 1.0679140090942383,grad_norm: 0.9999992977101174, iteration: 31557
loss: 1.0165635347366333,grad_norm: 0.9405292262377943, iteration: 31558
loss: 1.0251408815383911,grad_norm: 0.9999991061570587, iteration: 31559
loss: 1.0198217630386353,grad_norm: 0.8458370481732574, iteration: 31560
loss: 1.028997540473938,grad_norm: 0.9999992224391266, iteration: 31561
loss: 1.02967369556427,grad_norm: 0.9999997080169057, iteration: 31562
loss: 1.019420862197876,grad_norm: 0.9999991451642355, iteration: 31563
loss: 0.9854413270950317,grad_norm: 0.9999990768052976, iteration: 31564
loss: 1.009977102279663,grad_norm: 0.9999994517458525, iteration: 31565
loss: 1.0252559185028076,grad_norm: 0.8347771287491017, iteration: 31566
loss: 0.9505900144577026,grad_norm: 0.9429998333597989, iteration: 31567
loss: 0.971926212310791,grad_norm: 0.8893689210664745, iteration: 31568
loss: 1.0016844272613525,grad_norm: 0.9999990597001637, iteration: 31569
loss: 1.004136085510254,grad_norm: 0.9999990519120349, iteration: 31570
loss: 1.0145955085754395,grad_norm: 0.9999990715794264, iteration: 31571
loss: 0.9733195304870605,grad_norm: 0.9999990812711361, iteration: 31572
loss: 0.9739809036254883,grad_norm: 0.999999215209561, iteration: 31573
loss: 1.0151907205581665,grad_norm: 0.9929562182926065, iteration: 31574
loss: 1.0549112558364868,grad_norm: 0.999999916055625, iteration: 31575
loss: 1.0122737884521484,grad_norm: 0.9999992759886626, iteration: 31576
loss: 1.0481388568878174,grad_norm: 0.9999991203318663, iteration: 31577
loss: 1.0064146518707275,grad_norm: 0.9999991727499252, iteration: 31578
loss: 1.033033847808838,grad_norm: 0.9474018657291332, iteration: 31579
loss: 1.0167793035507202,grad_norm: 0.9923962463574034, iteration: 31580
loss: 0.98992520570755,grad_norm: 0.999999622126568, iteration: 31581
loss: 0.9824419021606445,grad_norm: 0.9999992673013605, iteration: 31582
loss: 1.030063271522522,grad_norm: 0.9999992452612472, iteration: 31583
loss: 1.0780454874038696,grad_norm: 0.9999994180332946, iteration: 31584
loss: 1.013260841369629,grad_norm: 0.9999992259404689, iteration: 31585
loss: 0.9973688721656799,grad_norm: 0.9999991935071495, iteration: 31586
loss: 1.0344816446304321,grad_norm: 0.9999996394471707, iteration: 31587
loss: 1.0220715999603271,grad_norm: 0.9999992078232898, iteration: 31588
loss: 1.0159944295883179,grad_norm: 0.8932672849092472, iteration: 31589
loss: 0.9860982894897461,grad_norm: 0.9159778132228233, iteration: 31590
loss: 1.02289879322052,grad_norm: 0.9015959907298335, iteration: 31591
loss: 0.9525835514068604,grad_norm: 0.9999991077953917, iteration: 31592
loss: 1.0168671607971191,grad_norm: 0.9244103633861657, iteration: 31593
loss: 0.9859493374824524,grad_norm: 0.9999990087629933, iteration: 31594
loss: 1.0186585187911987,grad_norm: 0.9999992168188143, iteration: 31595
loss: 1.0220502614974976,grad_norm: 0.9999990838129854, iteration: 31596
loss: 0.9705452919006348,grad_norm: 0.8919372491901916, iteration: 31597
loss: 0.995623767375946,grad_norm: 0.8666155449226901, iteration: 31598
loss: 1.0150107145309448,grad_norm: 0.9999991648398239, iteration: 31599
loss: 1.0158177614212036,grad_norm: 0.9157262191498802, iteration: 31600
loss: 1.000580906867981,grad_norm: 0.9695463526641939, iteration: 31601
loss: 1.0141854286193848,grad_norm: 0.937581299055645, iteration: 31602
loss: 1.028645396232605,grad_norm: 0.9536782862367873, iteration: 31603
loss: 0.9836850762367249,grad_norm: 0.9336483053930524, iteration: 31604
loss: 1.0333285331726074,grad_norm: 0.9999994962365927, iteration: 31605
loss: 1.0084329843521118,grad_norm: 0.8790174851955966, iteration: 31606
loss: 1.0460617542266846,grad_norm: 0.9999994873059349, iteration: 31607
loss: 0.9700520634651184,grad_norm: 0.999999219295634, iteration: 31608
loss: 0.9794917106628418,grad_norm: 0.9117967060307889, iteration: 31609
loss: 1.0132365226745605,grad_norm: 0.9999991665447217, iteration: 31610
loss: 1.0245774984359741,grad_norm: 0.960637885140905, iteration: 31611
loss: 1.0046968460083008,grad_norm: 0.9845503814150578, iteration: 31612
loss: 0.9703713059425354,grad_norm: 0.8740085127312597, iteration: 31613
loss: 1.0153268575668335,grad_norm: 0.9326910044840399, iteration: 31614
loss: 0.9648302793502808,grad_norm: 0.9880592157308107, iteration: 31615
loss: 1.009228229522705,grad_norm: 0.9999991578364277, iteration: 31616
loss: 0.9915297627449036,grad_norm: 0.9512323134996004, iteration: 31617
loss: 1.0122644901275635,grad_norm: 0.9999990953582033, iteration: 31618
loss: 1.000789761543274,grad_norm: 0.9999993495293931, iteration: 31619
loss: 0.9936856031417847,grad_norm: 0.9999991771937828, iteration: 31620
loss: 1.0017688274383545,grad_norm: 0.8954392414558134, iteration: 31621
loss: 1.0140019655227661,grad_norm: 0.9999990989292237, iteration: 31622
loss: 0.9699668288230896,grad_norm: 0.9837141186611729, iteration: 31623
loss: 0.9985480904579163,grad_norm: 0.9999990534553495, iteration: 31624
loss: 1.0413763523101807,grad_norm: 0.900291066965286, iteration: 31625
loss: 0.992900013923645,grad_norm: 0.9509012960292549, iteration: 31626
loss: 1.0106804370880127,grad_norm: 0.927938794720957, iteration: 31627
loss: 0.9874458909034729,grad_norm: 0.9999990521182377, iteration: 31628
loss: 0.9904587864875793,grad_norm: 0.9097044989872298, iteration: 31629
loss: 0.9980675578117371,grad_norm: 0.9999991255871576, iteration: 31630
loss: 1.0395972728729248,grad_norm: 0.9471815696978988, iteration: 31631
loss: 1.0275030136108398,grad_norm: 0.9139653985731357, iteration: 31632
loss: 1.0230655670166016,grad_norm: 0.999999190494493, iteration: 31633
loss: 1.005826473236084,grad_norm: 0.9999991282754609, iteration: 31634
loss: 1.0173187255859375,grad_norm: 0.8717968737308631, iteration: 31635
loss: 1.0133240222930908,grad_norm: 0.9362353431834125, iteration: 31636
loss: 0.9966042041778564,grad_norm: 0.8896832146171242, iteration: 31637
loss: 1.0032391548156738,grad_norm: 0.9860674697469619, iteration: 31638
loss: 0.9755354523658752,grad_norm: 0.9999991260852434, iteration: 31639
loss: 0.9686104655265808,grad_norm: 0.907555402543905, iteration: 31640
loss: 0.9957717657089233,grad_norm: 0.862324496870644, iteration: 31641
loss: 0.9990823864936829,grad_norm: 0.9999992218248426, iteration: 31642
loss: 1.0230827331542969,grad_norm: 0.8123752041961263, iteration: 31643
loss: 1.0149335861206055,grad_norm: 0.8453443697279928, iteration: 31644
loss: 1.0067378282546997,grad_norm: 0.9869863959813516, iteration: 31645
loss: 1.0791560411453247,grad_norm: 0.9999996732968535, iteration: 31646
loss: 1.0155707597732544,grad_norm: 0.9999990344720417, iteration: 31647
loss: 1.0150221586227417,grad_norm: 0.9999992797366756, iteration: 31648
loss: 1.0129011869430542,grad_norm: 0.9999991056016709, iteration: 31649
loss: 1.03110933303833,grad_norm: 0.9999991652274616, iteration: 31650
loss: 1.0437211990356445,grad_norm: 0.9999992729313404, iteration: 31651
loss: 0.9742347002029419,grad_norm: 0.9466052531807033, iteration: 31652
loss: 1.0316669940948486,grad_norm: 0.8251312041176697, iteration: 31653
loss: 0.9525846838951111,grad_norm: 0.9691684549210418, iteration: 31654
loss: 1.0238975286483765,grad_norm: 0.9999998211375352, iteration: 31655
loss: 0.9920624494552612,grad_norm: 0.9383679057966444, iteration: 31656
loss: 1.0076557397842407,grad_norm: 0.8805258812285669, iteration: 31657
loss: 1.0122168064117432,grad_norm: 0.8023816135230304, iteration: 31658
loss: 1.0027704238891602,grad_norm: 0.9111980337810544, iteration: 31659
loss: 0.9867497086524963,grad_norm: 0.9648006087992493, iteration: 31660
loss: 1.0350583791732788,grad_norm: 0.9999989705599626, iteration: 31661
loss: 0.9884438514709473,grad_norm: 0.9580672869364993, iteration: 31662
loss: 1.039747953414917,grad_norm: 0.9999992534748323, iteration: 31663
loss: 1.0169404745101929,grad_norm: 0.9407850981856207, iteration: 31664
loss: 1.0116215944290161,grad_norm: 0.9624312616160944, iteration: 31665
loss: 1.0195837020874023,grad_norm: 0.9999991352424615, iteration: 31666
loss: 0.992901086807251,grad_norm: 0.8781164032828168, iteration: 31667
loss: 1.0029709339141846,grad_norm: 0.999999018586727, iteration: 31668
loss: 1.0020216703414917,grad_norm: 0.9851360898511822, iteration: 31669
loss: 1.015577793121338,grad_norm: 0.9667113183408048, iteration: 31670
loss: 1.0241518020629883,grad_norm: 0.9066420517918184, iteration: 31671
loss: 1.0289899110794067,grad_norm: 0.999999311065054, iteration: 31672
loss: 1.03327214717865,grad_norm: 0.9999992771208404, iteration: 31673
loss: 0.9967326521873474,grad_norm: 0.8397668524061492, iteration: 31674
loss: 1.029059648513794,grad_norm: 0.9952385092958712, iteration: 31675
loss: 0.9882171154022217,grad_norm: 0.939243638919761, iteration: 31676
loss: 0.9855039715766907,grad_norm: 0.9999991341735334, iteration: 31677
loss: 0.993524432182312,grad_norm: 0.9394709620525801, iteration: 31678
loss: 0.9987754821777344,grad_norm: 0.9108141246530698, iteration: 31679
loss: 0.9802054166793823,grad_norm: 0.9037806200945013, iteration: 31680
loss: 0.9844465851783752,grad_norm: 0.9527577713818602, iteration: 31681
loss: 0.9922786951065063,grad_norm: 0.9999991216004004, iteration: 31682
loss: 1.022523045539856,grad_norm: 0.9607791293775673, iteration: 31683
loss: 0.9528262615203857,grad_norm: 0.9999990109988346, iteration: 31684
loss: 1.0295323133468628,grad_norm: 0.9999990976471359, iteration: 31685
loss: 0.9884544610977173,grad_norm: 0.9999992500578658, iteration: 31686
loss: 0.9698191285133362,grad_norm: 0.9999990652798761, iteration: 31687
loss: 1.0383051633834839,grad_norm: 0.8874086518785116, iteration: 31688
loss: 1.0097548961639404,grad_norm: 0.9999998620567987, iteration: 31689
loss: 0.9903716444969177,grad_norm: 0.8766275716178061, iteration: 31690
loss: 0.9891741275787354,grad_norm: 0.9375363752314141, iteration: 31691
loss: 1.039842128753662,grad_norm: 0.9999990560376596, iteration: 31692
loss: 1.1231708526611328,grad_norm: 0.9999993211236032, iteration: 31693
loss: 1.0135430097579956,grad_norm: 0.9999992481508616, iteration: 31694
loss: 1.0121126174926758,grad_norm: 0.9331265126651062, iteration: 31695
loss: 0.9794920086860657,grad_norm: 0.9580097289262767, iteration: 31696
loss: 1.0123790502548218,grad_norm: 0.9361860190961067, iteration: 31697
loss: 0.9949902296066284,grad_norm: 0.9999992012938052, iteration: 31698
loss: 0.9548960328102112,grad_norm: 0.9882430896338266, iteration: 31699
loss: 1.0067338943481445,grad_norm: 0.90441131862791, iteration: 31700
loss: 1.0621659755706787,grad_norm: 0.9999998603040076, iteration: 31701
loss: 0.9379448890686035,grad_norm: 0.9137140218528654, iteration: 31702
loss: 0.9576241970062256,grad_norm: 0.9999991924059642, iteration: 31703
loss: 1.0091298818588257,grad_norm: 0.9373520502537005, iteration: 31704
loss: 1.0023972988128662,grad_norm: 0.9999991095783123, iteration: 31705
loss: 1.0690563917160034,grad_norm: 1.0000000028456002, iteration: 31706
loss: 0.9780178666114807,grad_norm: 0.9999991445794091, iteration: 31707
loss: 0.9896546006202698,grad_norm: 0.7817354073098792, iteration: 31708
loss: 0.9542678594589233,grad_norm: 0.8960456569229159, iteration: 31709
loss: 1.0198414325714111,grad_norm: 0.9999991690954864, iteration: 31710
loss: 1.00004243850708,grad_norm: 0.9999993693454848, iteration: 31711
loss: 1.0326714515686035,grad_norm: 0.9999990483022538, iteration: 31712
loss: 0.9758092164993286,grad_norm: 0.9699774855188077, iteration: 31713
loss: 1.010266900062561,grad_norm: 0.8198958555365679, iteration: 31714
loss: 1.0084364414215088,grad_norm: 0.9999990274764018, iteration: 31715
loss: 0.9920856356620789,grad_norm: 0.8202028279367463, iteration: 31716
loss: 1.0068930387496948,grad_norm: 0.8888967084331539, iteration: 31717
loss: 1.0168229341506958,grad_norm: 0.9999990643161809, iteration: 31718
loss: 1.034676194190979,grad_norm: 0.9999997604154381, iteration: 31719
loss: 1.0381282567977905,grad_norm: 0.9999992498097802, iteration: 31720
loss: 1.0773677825927734,grad_norm: 0.9999996760634262, iteration: 31721
loss: 0.9866171479225159,grad_norm: 0.8882040104129039, iteration: 31722
loss: 0.9958593249320984,grad_norm: 0.9999991087115275, iteration: 31723
loss: 0.9934549331665039,grad_norm: 0.9999992685418405, iteration: 31724
loss: 1.0002856254577637,grad_norm: 0.9343285568055912, iteration: 31725
loss: 1.0835140943527222,grad_norm: 0.9999998855828703, iteration: 31726
loss: 1.0297832489013672,grad_norm: 0.9978229448865913, iteration: 31727
loss: 0.9968463778495789,grad_norm: 0.9999991878146448, iteration: 31728
loss: 1.0293173789978027,grad_norm: 0.9999991490816406, iteration: 31729
loss: 1.045320987701416,grad_norm: 0.9999994815032738, iteration: 31730
loss: 0.9980836510658264,grad_norm: 0.8544660903102542, iteration: 31731
loss: 1.0007442235946655,grad_norm: 0.8884303602474959, iteration: 31732
loss: 0.9819772839546204,grad_norm: 0.9572804125423562, iteration: 31733
loss: 1.0131266117095947,grad_norm: 0.9999992789853192, iteration: 31734
loss: 1.0319726467132568,grad_norm: 0.8881464403326763, iteration: 31735
loss: 1.0322415828704834,grad_norm: 0.8718445602404601, iteration: 31736
loss: 1.0109565258026123,grad_norm: 0.9999990311514098, iteration: 31737
loss: 1.0386300086975098,grad_norm: 0.9999997435221247, iteration: 31738
loss: 0.9773278832435608,grad_norm: 0.915937898105124, iteration: 31739
loss: 1.0121687650680542,grad_norm: 0.9541461028002702, iteration: 31740
loss: 1.0074212551116943,grad_norm: 0.9403630329839509, iteration: 31741
loss: 1.0284662246704102,grad_norm: 0.9999991737953338, iteration: 31742
loss: 1.0412777662277222,grad_norm: 0.975913193190461, iteration: 31743
loss: 1.0127099752426147,grad_norm: 0.9166047583861986, iteration: 31744
loss: 1.0622045993804932,grad_norm: 0.9999996159090488, iteration: 31745
loss: 1.0506112575531006,grad_norm: 0.9999991938535547, iteration: 31746
loss: 1.0345889329910278,grad_norm: 0.9999997488717852, iteration: 31747
loss: 1.0187700986862183,grad_norm: 0.9999991325958831, iteration: 31748
loss: 1.065314769744873,grad_norm: 0.9999998397468745, iteration: 31749
loss: 1.0011712312698364,grad_norm: 0.9999990795719633, iteration: 31750
loss: 1.0115841627120972,grad_norm: 0.9999990115939129, iteration: 31751
loss: 1.0058369636535645,grad_norm: 0.999999340273189, iteration: 31752
loss: 1.0014344453811646,grad_norm: 0.9644697219415604, iteration: 31753
loss: 0.9921640157699585,grad_norm: 0.9999990347133142, iteration: 31754
loss: 1.0070511102676392,grad_norm: 0.9034619282913201, iteration: 31755
loss: 0.9674463868141174,grad_norm: 0.8470146858256433, iteration: 31756
loss: 1.0025386810302734,grad_norm: 0.9999990533973948, iteration: 31757
loss: 0.9884001612663269,grad_norm: 0.9999991316438424, iteration: 31758
loss: 1.0054659843444824,grad_norm: 0.9999992579090603, iteration: 31759
loss: 1.023157000541687,grad_norm: 0.9999990342483109, iteration: 31760
loss: 1.0026204586029053,grad_norm: 0.8735031784354391, iteration: 31761
loss: 1.0018372535705566,grad_norm: 0.999999064336758, iteration: 31762
loss: 1.0084713697433472,grad_norm: 0.9999990865734874, iteration: 31763
loss: 1.0197792053222656,grad_norm: 0.9731447277332576, iteration: 31764
loss: 0.9990688562393188,grad_norm: 0.9942643366079732, iteration: 31765
loss: 1.0315890312194824,grad_norm: 0.9999998222842666, iteration: 31766
loss: 1.0269670486450195,grad_norm: 0.927737693859861, iteration: 31767
loss: 1.0724371671676636,grad_norm: 0.9999993207672994, iteration: 31768
loss: 0.9982259273529053,grad_norm: 0.9999992010978469, iteration: 31769
loss: 1.0343213081359863,grad_norm: 0.9999991044154514, iteration: 31770
loss: 1.014613389968872,grad_norm: 0.9999990870353627, iteration: 31771
loss: 1.0308892726898193,grad_norm: 0.99999912401839, iteration: 31772
loss: 0.9928914308547974,grad_norm: 0.9338848689740971, iteration: 31773
loss: 1.0754578113555908,grad_norm: 0.9999994911501485, iteration: 31774
loss: 0.9920665621757507,grad_norm: 0.9711718260406478, iteration: 31775
loss: 1.0031030178070068,grad_norm: 0.9999990850414503, iteration: 31776
loss: 1.0275726318359375,grad_norm: 0.99999906457903, iteration: 31777
loss: 1.035118579864502,grad_norm: 0.9999998016146058, iteration: 31778
loss: 1.0090131759643555,grad_norm: 0.8871148815543779, iteration: 31779
loss: 1.0154387950897217,grad_norm: 0.9999992992296196, iteration: 31780
loss: 1.0166707038879395,grad_norm: 0.992683805435964, iteration: 31781
loss: 1.0293155908584595,grad_norm: 0.9481835430777176, iteration: 31782
loss: 1.0135875940322876,grad_norm: 0.8282591374240622, iteration: 31783
loss: 1.0295323133468628,grad_norm: 0.9999992152846472, iteration: 31784
loss: 1.0334553718566895,grad_norm: 0.9300431280494568, iteration: 31785
loss: 1.0243936777114868,grad_norm: 0.8972922963402856, iteration: 31786
loss: 1.0410010814666748,grad_norm: 0.9999993534975193, iteration: 31787
loss: 0.999027669429779,grad_norm: 0.9605870909394496, iteration: 31788
loss: 0.9911029934883118,grad_norm: 0.8974939334401019, iteration: 31789
loss: 1.004346489906311,grad_norm: 0.9999996205732011, iteration: 31790
loss: 1.037779688835144,grad_norm: 0.9568692103557939, iteration: 31791
loss: 0.9887180328369141,grad_norm: 0.9779606883118193, iteration: 31792
loss: 1.03233802318573,grad_norm: 0.9999997799189577, iteration: 31793
loss: 1.0133440494537354,grad_norm: 0.8099251880619316, iteration: 31794
loss: 0.9956097602844238,grad_norm: 0.9999990940370106, iteration: 31795
loss: 1.0036873817443848,grad_norm: 0.9849073148725676, iteration: 31796
loss: 1.0013214349746704,grad_norm: 0.902615115438562, iteration: 31797
loss: 0.9892076253890991,grad_norm: 0.9114674924714773, iteration: 31798
loss: 0.9720101356506348,grad_norm: 0.9999990526993524, iteration: 31799
loss: 0.9959321618080139,grad_norm: 0.999999201954279, iteration: 31800
loss: 1.0150306224822998,grad_norm: 0.8926372153696798, iteration: 31801
loss: 0.9915618896484375,grad_norm: 0.9117236430626785, iteration: 31802
loss: 0.9913236498832703,grad_norm: 0.8879144260659607, iteration: 31803
loss: 1.050068736076355,grad_norm: 0.9999990452714721, iteration: 31804
loss: 0.9959657788276672,grad_norm: 0.9999991366090742, iteration: 31805
loss: 1.0270137786865234,grad_norm: 0.9999990985275311, iteration: 31806
loss: 0.9986100196838379,grad_norm: 0.9999991278213813, iteration: 31807
loss: 1.0162311792373657,grad_norm: 0.920100032542925, iteration: 31808
loss: 1.0035748481750488,grad_norm: 0.9274214667523674, iteration: 31809
loss: 1.0052578449249268,grad_norm: 0.912945012796427, iteration: 31810
loss: 1.0301169157028198,grad_norm: 0.9999990530482221, iteration: 31811
loss: 0.9473424553871155,grad_norm: 0.9106671030681346, iteration: 31812
loss: 1.0194060802459717,grad_norm: 0.9999991239035486, iteration: 31813
loss: 1.0150974988937378,grad_norm: 0.8236541479188447, iteration: 31814
loss: 1.0208059549331665,grad_norm: 0.9999997807039145, iteration: 31815
loss: 0.9978176355361938,grad_norm: 0.9999990940060649, iteration: 31816
loss: 1.0030455589294434,grad_norm: 0.9999990816948231, iteration: 31817
loss: 1.016053557395935,grad_norm: 0.9007945923824464, iteration: 31818
loss: 1.0117628574371338,grad_norm: 0.968315577256859, iteration: 31819
loss: 1.0151091814041138,grad_norm: 0.8855016755536333, iteration: 31820
loss: 1.020133137702942,grad_norm: 0.9999992674351007, iteration: 31821
loss: 0.9710519909858704,grad_norm: 0.978485139939574, iteration: 31822
loss: 1.0177499055862427,grad_norm: 0.9999990173859082, iteration: 31823
loss: 1.011618733406067,grad_norm: 0.9950597044871733, iteration: 31824
loss: 1.0031479597091675,grad_norm: 0.8622605584018481, iteration: 31825
loss: 0.9900580048561096,grad_norm: 0.992735158039442, iteration: 31826
loss: 1.0326861143112183,grad_norm: 0.937166153486416, iteration: 31827
loss: 0.9919067621231079,grad_norm: 0.9999989978238714, iteration: 31828
loss: 0.9643638730049133,grad_norm: 0.9999991140349342, iteration: 31829
loss: 1.03892982006073,grad_norm: 0.8322541513165667, iteration: 31830
loss: 1.0007250308990479,grad_norm: 0.9999990746849697, iteration: 31831
loss: 0.944591760635376,grad_norm: 0.9999990517973713, iteration: 31832
loss: 1.0245258808135986,grad_norm: 0.999999164346769, iteration: 31833
loss: 0.9948990941047668,grad_norm: 0.999999138666986, iteration: 31834
loss: 0.9983381032943726,grad_norm: 0.9999991242334147, iteration: 31835
loss: 1.0138148069381714,grad_norm: 0.9999996841256068, iteration: 31836
loss: 1.0244258642196655,grad_norm: 0.9666611190428305, iteration: 31837
loss: 1.0143697261810303,grad_norm: 0.9999991113622013, iteration: 31838
loss: 1.0105091333389282,grad_norm: 0.7860425373668283, iteration: 31839
loss: 1.0050647258758545,grad_norm: 0.99141499847234, iteration: 31840
loss: 1.012796401977539,grad_norm: 0.9760351208162168, iteration: 31841
loss: 0.99466872215271,grad_norm: 0.9999991778135777, iteration: 31842
loss: 0.9577814936637878,grad_norm: 0.9999991004730933, iteration: 31843
loss: 1.0539220571517944,grad_norm: 0.8743737872910579, iteration: 31844
loss: 0.9876367449760437,grad_norm: 0.8658886191929259, iteration: 31845
loss: 1.009128212928772,grad_norm: 0.9999997723956469, iteration: 31846
loss: 1.034420132637024,grad_norm: 0.9052914534985553, iteration: 31847
loss: 1.0338469743728638,grad_norm: 0.8877516794960493, iteration: 31848
loss: 1.0284847021102905,grad_norm: 0.9999991831330326, iteration: 31849
loss: 1.0056072473526,grad_norm: 0.9384023175709848, iteration: 31850
loss: 0.9884569644927979,grad_norm: 0.9978660281732489, iteration: 31851
loss: 1.002313256263733,grad_norm: 0.9128451256063621, iteration: 31852
loss: 1.0083891153335571,grad_norm: 0.9999990047680358, iteration: 31853
loss: 0.9756637811660767,grad_norm: 0.9999992331710461, iteration: 31854
loss: 1.0127512216567993,grad_norm: 0.9999990955704485, iteration: 31855
loss: 0.9979097843170166,grad_norm: 0.930506994289558, iteration: 31856
loss: 0.9825524687767029,grad_norm: 0.9296691545325749, iteration: 31857
loss: 0.9778819680213928,grad_norm: 0.9999990784761997, iteration: 31858
loss: 1.0456371307373047,grad_norm: 0.9999991244239927, iteration: 31859
loss: 1.0299214124679565,grad_norm: 0.99999913000194, iteration: 31860
loss: 0.9969003796577454,grad_norm: 0.783434303472689, iteration: 31861
loss: 1.047416090965271,grad_norm: 0.9999993817616488, iteration: 31862
loss: 1.0097095966339111,grad_norm: 0.9999991296273427, iteration: 31863
loss: 0.9951682686805725,grad_norm: 0.9778509028848437, iteration: 31864
loss: 1.0278981924057007,grad_norm: 0.9999990721693501, iteration: 31865
loss: 1.0224477052688599,grad_norm: 0.9999991780591948, iteration: 31866
loss: 1.0043504238128662,grad_norm: 0.9999990675055727, iteration: 31867
loss: 1.0213168859481812,grad_norm: 0.9198563853279416, iteration: 31868
loss: 0.9866979718208313,grad_norm: 0.9999991441914783, iteration: 31869
loss: 0.9817993640899658,grad_norm: 0.9758122895380719, iteration: 31870
loss: 0.9905495643615723,grad_norm: 0.7297395391528206, iteration: 31871
loss: 1.0112988948822021,grad_norm: 0.9999994910464711, iteration: 31872
loss: 0.9625552296638489,grad_norm: 0.9999992199129838, iteration: 31873
loss: 0.948775053024292,grad_norm: 0.9343117501419792, iteration: 31874
loss: 0.9706257581710815,grad_norm: 0.9999989458562737, iteration: 31875
loss: 1.036071538925171,grad_norm: 0.9999997134956452, iteration: 31876
loss: 0.9851927757263184,grad_norm: 0.8844375222023189, iteration: 31877
loss: 1.016721487045288,grad_norm: 0.9999991915753231, iteration: 31878
loss: 1.0279719829559326,grad_norm: 0.9999992779741838, iteration: 31879
loss: 0.9692209959030151,grad_norm: 0.9999989816511203, iteration: 31880
loss: 1.0455069541931152,grad_norm: 0.9999991098164489, iteration: 31881
loss: 1.0463194847106934,grad_norm: 0.9999993333856473, iteration: 31882
loss: 1.0135642290115356,grad_norm: 0.9076863103816799, iteration: 31883
loss: 1.0404483079910278,grad_norm: 0.9999992122619478, iteration: 31884
loss: 0.9809744358062744,grad_norm: 0.9999991495925529, iteration: 31885
loss: 1.0290297269821167,grad_norm: 0.9345530279027987, iteration: 31886
loss: 0.9987109303474426,grad_norm: 0.9999990939415383, iteration: 31887
loss: 0.984673261642456,grad_norm: 0.8913054891228165, iteration: 31888
loss: 1.0371708869934082,grad_norm: 0.9999991677112231, iteration: 31889
loss: 1.0203380584716797,grad_norm: 0.9999994597528351, iteration: 31890
loss: 1.0216261148452759,grad_norm: 0.9508507841353939, iteration: 31891
loss: 0.9846083521842957,grad_norm: 0.9999991066579129, iteration: 31892
loss: 1.0340189933776855,grad_norm: 0.9043650686502175, iteration: 31893
loss: 1.0186015367507935,grad_norm: 0.9999997410994803, iteration: 31894
loss: 1.0213390588760376,grad_norm: 0.9999991471118571, iteration: 31895
loss: 0.959617555141449,grad_norm: 0.9117002341852657, iteration: 31896
loss: 1.0452749729156494,grad_norm: 0.8710174114649296, iteration: 31897
loss: 1.0296748876571655,grad_norm: 0.9999994315199381, iteration: 31898
loss: 1.0255932807922363,grad_norm: 0.9357926929828919, iteration: 31899
loss: 1.0261445045471191,grad_norm: 0.9999990944379696, iteration: 31900
loss: 0.9908530116081238,grad_norm: 0.999999043799339, iteration: 31901
loss: 0.9893214106559753,grad_norm: 0.9999990996770074, iteration: 31902
loss: 1.0082820653915405,grad_norm: 0.8485828369411477, iteration: 31903
loss: 1.035394310951233,grad_norm: 0.9999990501523505, iteration: 31904
loss: 1.0071195363998413,grad_norm: 0.9808176591586607, iteration: 31905
loss: 1.0037256479263306,grad_norm: 0.7584811218237535, iteration: 31906
loss: 1.0089102983474731,grad_norm: 0.9547128552027687, iteration: 31907
loss: 0.9857431054115295,grad_norm: 0.9999991005476281, iteration: 31908
loss: 1.03049898147583,grad_norm: 0.9999990999842749, iteration: 31909
loss: 0.9966620206832886,grad_norm: 0.9999990551295624, iteration: 31910
loss: 0.9807136058807373,grad_norm: 0.9999989804028098, iteration: 31911
loss: 1.033512830734253,grad_norm: 0.9999991966795079, iteration: 31912
loss: 1.034106731414795,grad_norm: 0.8858330895876668, iteration: 31913
loss: 0.9931285381317139,grad_norm: 0.9495692421588842, iteration: 31914
loss: 1.0005139112472534,grad_norm: 0.9999990143873034, iteration: 31915
loss: 1.0007532835006714,grad_norm: 0.7833106892087072, iteration: 31916
loss: 1.0190311670303345,grad_norm: 0.9330283462773507, iteration: 31917
loss: 1.0021487474441528,grad_norm: 0.9999990810119216, iteration: 31918
loss: 0.9791074395179749,grad_norm: 0.8612074508629308, iteration: 31919
loss: 0.9907271265983582,grad_norm: 0.9999991249292904, iteration: 31920
loss: 1.0140106678009033,grad_norm: 0.907721127008573, iteration: 31921
loss: 0.9916645288467407,grad_norm: 0.9999991823780044, iteration: 31922
loss: 1.0185881853103638,grad_norm: 0.9999991040217072, iteration: 31923
loss: 1.0236797332763672,grad_norm: 0.9999991492812476, iteration: 31924
loss: 0.993290364742279,grad_norm: 0.896937982996466, iteration: 31925
loss: 1.0314141511917114,grad_norm: 0.9999992059119037, iteration: 31926
loss: 1.0020259618759155,grad_norm: 0.917085272662004, iteration: 31927
loss: 1.0543339252471924,grad_norm: 0.874885301424782, iteration: 31928
loss: 1.003684639930725,grad_norm: 0.9999993519716988, iteration: 31929
loss: 0.9521484375,grad_norm: 0.9999992372460628, iteration: 31930
loss: 0.9969892501831055,grad_norm: 0.9911177337269819, iteration: 31931
loss: 1.0184794664382935,grad_norm: 0.9430106568325791, iteration: 31932
loss: 1.0324585437774658,grad_norm: 0.933360313802032, iteration: 31933
loss: 1.038987159729004,grad_norm: 0.9999992270134533, iteration: 31934
loss: 1.0039128065109253,grad_norm: 0.9999989984090872, iteration: 31935
loss: 0.9732690453529358,grad_norm: 0.9999993648551501, iteration: 31936
loss: 1.0429152250289917,grad_norm: 0.9781973725071327, iteration: 31937
loss: 1.0385349988937378,grad_norm: 0.9999994575568076, iteration: 31938
loss: 0.9934700727462769,grad_norm: 0.8923214987188932, iteration: 31939
loss: 0.9677218198776245,grad_norm: 0.8891891545335031, iteration: 31940
loss: 1.0162824392318726,grad_norm: 0.9686395401688684, iteration: 31941
loss: 1.014615535736084,grad_norm: 0.9260015381321163, iteration: 31942
loss: 1.0357388257980347,grad_norm: 0.9999994853912189, iteration: 31943
loss: 1.0031390190124512,grad_norm: 0.9783088580013392, iteration: 31944
loss: 1.0269920825958252,grad_norm: 0.9801395953031595, iteration: 31945
loss: 1.0475976467132568,grad_norm: 0.9999994814539248, iteration: 31946
loss: 1.0082993507385254,grad_norm: 0.9999991319156568, iteration: 31947
loss: 1.008673071861267,grad_norm: 0.9638754776684957, iteration: 31948
loss: 1.0579947233200073,grad_norm: 0.9660499730761121, iteration: 31949
loss: 0.9921373724937439,grad_norm: 0.9999989594675266, iteration: 31950
loss: 0.975903332233429,grad_norm: 0.8863813064640206, iteration: 31951
loss: 1.0160402059555054,grad_norm: 0.8860972234477585, iteration: 31952
loss: 1.0076076984405518,grad_norm: 0.8870204119435315, iteration: 31953
loss: 0.9818351864814758,grad_norm: 0.8411621778842953, iteration: 31954
loss: 1.0310364961624146,grad_norm: 0.9999991964751358, iteration: 31955
loss: 1.0176042318344116,grad_norm: 0.9999992380454213, iteration: 31956
loss: 1.0352482795715332,grad_norm: 0.9999992802468058, iteration: 31957
loss: 0.9858603477478027,grad_norm: 0.8637680510826716, iteration: 31958
loss: 0.9726693034172058,grad_norm: 0.9999990448939651, iteration: 31959
loss: 1.0016146898269653,grad_norm: 0.9999990870199328, iteration: 31960
loss: 1.0225597620010376,grad_norm: 0.9999992492925264, iteration: 31961
loss: 0.9788680076599121,grad_norm: 0.8818705182961531, iteration: 31962
loss: 0.9883684515953064,grad_norm: 0.9999991662940074, iteration: 31963
loss: 0.9694426655769348,grad_norm: 0.9999991597207324, iteration: 31964
loss: 0.9980343580245972,grad_norm: 0.9999989118085425, iteration: 31965
loss: 0.9845052361488342,grad_norm: 0.8818311355471758, iteration: 31966
loss: 0.991722047328949,grad_norm: 0.8746546899447378, iteration: 31967
loss: 0.986438512802124,grad_norm: 0.9142836168740326, iteration: 31968
loss: 0.9933513402938843,grad_norm: 0.9999992300068861, iteration: 31969
loss: 1.034021019935608,grad_norm: 0.9360967890759515, iteration: 31970
loss: 0.9743309020996094,grad_norm: 0.999998982348464, iteration: 31971
loss: 1.0126391649246216,grad_norm: 0.9956379188228311, iteration: 31972
loss: 1.0249555110931396,grad_norm: 0.9999991233702129, iteration: 31973
loss: 1.019502878189087,grad_norm: 0.9999992396505276, iteration: 31974
loss: 1.0565682649612427,grad_norm: 0.9999993368355773, iteration: 31975
loss: 0.977717936038971,grad_norm: 0.9999991518791129, iteration: 31976
loss: 0.9777911305427551,grad_norm: 0.9999991924022981, iteration: 31977
loss: 1.0302098989486694,grad_norm: 0.9999991204386252, iteration: 31978
loss: 1.0277467966079712,grad_norm: 0.9999989609073082, iteration: 31979
loss: 0.9605361819267273,grad_norm: 0.9999990772531959, iteration: 31980
loss: 1.0097037553787231,grad_norm: 0.9999993098854143, iteration: 31981
loss: 0.9702657461166382,grad_norm: 0.9999990539439644, iteration: 31982
loss: 0.9842458963394165,grad_norm: 0.9999991906841927, iteration: 31983
loss: 1.050060749053955,grad_norm: 0.9999992169654844, iteration: 31984
loss: 1.0616216659545898,grad_norm: 0.9999996201266709, iteration: 31985
loss: 0.9711594581604004,grad_norm: 0.9999992419623746, iteration: 31986
loss: 1.0082296133041382,grad_norm: 0.8042924779437142, iteration: 31987
loss: 0.9994677305221558,grad_norm: 0.9996422606069688, iteration: 31988
loss: 0.992545485496521,grad_norm: 0.8780047815199672, iteration: 31989
loss: 1.0047214031219482,grad_norm: 0.9999992663347816, iteration: 31990
loss: 0.9879480600357056,grad_norm: 0.8975695140986146, iteration: 31991
loss: 1.0147037506103516,grad_norm: 0.9224731460887498, iteration: 31992
loss: 0.990619957447052,grad_norm: 0.9596088020710132, iteration: 31993
loss: 0.9763855338096619,grad_norm: 0.9999991908179628, iteration: 31994
loss: 1.0053008794784546,grad_norm: 0.9999989810697486, iteration: 31995
loss: 0.9923461675643921,grad_norm: 0.9016785368762613, iteration: 31996
loss: 0.9985823035240173,grad_norm: 0.9786639852068194, iteration: 31997
loss: 0.9894804954528809,grad_norm: 0.8923009680141397, iteration: 31998
loss: 0.990293025970459,grad_norm: 0.9117883879756038, iteration: 31999
loss: 1.0219849348068237,grad_norm: 0.9710094421563279, iteration: 32000
loss: 1.118337631225586,grad_norm: 0.9999992522048287, iteration: 32001
loss: 0.9785139560699463,grad_norm: 0.9999991704524137, iteration: 32002
loss: 1.0051182508468628,grad_norm: 0.9208928864105179, iteration: 32003
loss: 0.9857603907585144,grad_norm: 0.9999991055252391, iteration: 32004
loss: 1.0052334070205688,grad_norm: 0.9999991669168349, iteration: 32005
loss: 1.0016608238220215,grad_norm: 0.9723733555401866, iteration: 32006
loss: 1.0168063640594482,grad_norm: 0.9999991222760847, iteration: 32007
loss: 1.0262930393218994,grad_norm: 0.9999991871125046, iteration: 32008
loss: 0.9882739186286926,grad_norm: 0.9814519398866776, iteration: 32009
loss: 0.9604333639144897,grad_norm: 0.9895745006703855, iteration: 32010
loss: 0.988765299320221,grad_norm: 0.9999990662253778, iteration: 32011
loss: 0.983296275138855,grad_norm: 0.9857427041815497, iteration: 32012
loss: 0.9951107501983643,grad_norm: 0.999999253610285, iteration: 32013
loss: 1.0141565799713135,grad_norm: 0.8409362773385465, iteration: 32014
loss: 0.9776824116706848,grad_norm: 0.8592738097135751, iteration: 32015
loss: 0.9635944366455078,grad_norm: 0.8229821289904072, iteration: 32016
loss: 0.9900655150413513,grad_norm: 0.9853518535064283, iteration: 32017
loss: 1.051632285118103,grad_norm: 0.9999990591113657, iteration: 32018
loss: 1.0854039192199707,grad_norm: 0.9999995964781503, iteration: 32019
loss: 1.0027035474777222,grad_norm: 0.9955231341317288, iteration: 32020
loss: 1.0165140628814697,grad_norm: 0.9336581940560962, iteration: 32021
loss: 1.0122904777526855,grad_norm: 0.9999991747009246, iteration: 32022
loss: 0.9826077222824097,grad_norm: 0.9999992727252319, iteration: 32023
loss: 0.9969770312309265,grad_norm: 0.9999991582251929, iteration: 32024
loss: 0.9749380350112915,grad_norm: 0.93386461557255, iteration: 32025
loss: 1.007040023803711,grad_norm: 0.8627936318630527, iteration: 32026
loss: 1.0075600147247314,grad_norm: 0.9649523031411139, iteration: 32027
loss: 0.9972671270370483,grad_norm: 0.9141428020305143, iteration: 32028
loss: 1.0150784254074097,grad_norm: 0.999998903244999, iteration: 32029
loss: 0.9807692170143127,grad_norm: 0.9969225996019311, iteration: 32030
loss: 1.0248068571090698,grad_norm: 0.9999991543875537, iteration: 32031
loss: 1.0025763511657715,grad_norm: 0.9999995182118335, iteration: 32032
loss: 1.0002416372299194,grad_norm: 0.9999991439413862, iteration: 32033
loss: 1.023938536643982,grad_norm: 0.9532000109664388, iteration: 32034
loss: 1.000653862953186,grad_norm: 0.9955290293836978, iteration: 32035
loss: 0.9769273400306702,grad_norm: 0.9999989459168237, iteration: 32036
loss: 1.026338815689087,grad_norm: 0.9988331703276216, iteration: 32037
loss: 1.032804012298584,grad_norm: 0.840603031912645, iteration: 32038
loss: 0.9545863270759583,grad_norm: 0.9999991807528674, iteration: 32039
loss: 1.0739816427230835,grad_norm: 0.9781019613425909, iteration: 32040
loss: 0.9870599508285522,grad_norm: 0.9999991034274678, iteration: 32041
loss: 0.9800138473510742,grad_norm: 0.9072291838931792, iteration: 32042
loss: 1.0066529512405396,grad_norm: 0.9747152033604082, iteration: 32043
loss: 0.9925135374069214,grad_norm: 0.9421625660954625, iteration: 32044
loss: 0.9971945881843567,grad_norm: 0.8813224448817722, iteration: 32045
loss: 1.0389471054077148,grad_norm: 0.9999991209910686, iteration: 32046
loss: 1.0064202547073364,grad_norm: 0.9907620850056509, iteration: 32047
loss: 1.016758680343628,grad_norm: 0.8436005733489063, iteration: 32048
loss: 0.9994125962257385,grad_norm: 0.8137410499161919, iteration: 32049
loss: 1.0563570261001587,grad_norm: 0.9999996013966165, iteration: 32050
loss: 0.9839505553245544,grad_norm: 0.999999286336314, iteration: 32051
loss: 0.9888536930084229,grad_norm: 0.9213154079836822, iteration: 32052
loss: 1.0009946823120117,grad_norm: 0.9999991010163933, iteration: 32053
loss: 0.9566390514373779,grad_norm: 0.9999991798082476, iteration: 32054
loss: 1.0478482246398926,grad_norm: 0.999999717661328, iteration: 32055
loss: 0.9629380702972412,grad_norm: 0.9789872443548303, iteration: 32056
loss: 1.0253992080688477,grad_norm: 0.9999993069495063, iteration: 32057
loss: 1.0300949811935425,grad_norm: 0.9999992278692451, iteration: 32058
loss: 1.01124906539917,grad_norm: 0.6757022099110285, iteration: 32059
loss: 0.965505063533783,grad_norm: 0.9770815387114149, iteration: 32060
loss: 1.0290096998214722,grad_norm: 0.8406415541858903, iteration: 32061
loss: 0.9947081208229065,grad_norm: 0.9999993907478948, iteration: 32062
loss: 1.0218777656555176,grad_norm: 0.9037665053640361, iteration: 32063
loss: 1.016165018081665,grad_norm: 0.9999989728569401, iteration: 32064
loss: 0.9717617630958557,grad_norm: 0.888123449524701, iteration: 32065
loss: 0.9886761903762817,grad_norm: 0.9941031704023933, iteration: 32066
loss: 1.0069679021835327,grad_norm: 0.9999991708579988, iteration: 32067
loss: 0.9719248414039612,grad_norm: 0.9114272270062868, iteration: 32068
loss: 1.0235787630081177,grad_norm: 0.9303472964178705, iteration: 32069
loss: 1.032976508140564,grad_norm: 0.91811247626828, iteration: 32070
loss: 1.0239964723587036,grad_norm: 0.9999989563264349, iteration: 32071
loss: 0.9919261932373047,grad_norm: 0.9999990367816575, iteration: 32072
loss: 1.0211379528045654,grad_norm: 0.9999992154327872, iteration: 32073
loss: 1.0390106439590454,grad_norm: 0.9999992830319268, iteration: 32074
loss: 0.9922499060630798,grad_norm: 0.9107960590896473, iteration: 32075
loss: 1.0288197994232178,grad_norm: 0.9770447632226159, iteration: 32076
loss: 0.9828169941902161,grad_norm: 0.9999990206545682, iteration: 32077
loss: 0.9825629591941833,grad_norm: 0.8497020699393847, iteration: 32078
loss: 0.9939908385276794,grad_norm: 0.8869254763232339, iteration: 32079
loss: 0.9837236404418945,grad_norm: 0.9733304787440457, iteration: 32080
loss: 1.0221621990203857,grad_norm: 0.9652117978788098, iteration: 32081
loss: 1.0232645273208618,grad_norm: 0.9999991025748094, iteration: 32082
loss: 1.014150857925415,grad_norm: 0.9999991560382773, iteration: 32083
loss: 0.9848850965499878,grad_norm: 0.999999129476637, iteration: 32084
loss: 1.0490531921386719,grad_norm: 0.999999861063432, iteration: 32085
loss: 0.9280955791473389,grad_norm: 0.8382690506663886, iteration: 32086
loss: 1.0160012245178223,grad_norm: 0.9999991687144935, iteration: 32087
loss: 0.967349648475647,grad_norm: 0.9999990438670104, iteration: 32088
loss: 1.0026090145111084,grad_norm: 0.9999990408368055, iteration: 32089
loss: 1.0323786735534668,grad_norm: 0.9685694709191615, iteration: 32090
loss: 1.0003118515014648,grad_norm: 0.9999991521577068, iteration: 32091
loss: 1.0347946882247925,grad_norm: 0.9828446632384283, iteration: 32092
loss: 0.9749879240989685,grad_norm: 0.9900602079077062, iteration: 32093
loss: 0.9871267676353455,grad_norm: 0.9999997552595995, iteration: 32094
loss: 0.9894452095031738,grad_norm: 0.8855260472150703, iteration: 32095
loss: 1.0149296522140503,grad_norm: 0.9999990922705537, iteration: 32096
loss: 1.039186954498291,grad_norm: 0.9162765108129457, iteration: 32097
loss: 1.000406265258789,grad_norm: 0.9999990516537564, iteration: 32098
loss: 0.9642496109008789,grad_norm: 0.9999989937143628, iteration: 32099
loss: 0.990241527557373,grad_norm: 0.9999991944126709, iteration: 32100
loss: 1.0770869255065918,grad_norm: 0.999999344402489, iteration: 32101
loss: 1.0010136365890503,grad_norm: 0.9261144584000293, iteration: 32102
loss: 1.0112460851669312,grad_norm: 0.999999168524452, iteration: 32103
loss: 1.0009609460830688,grad_norm: 0.9259757885464118, iteration: 32104
loss: 0.9886142015457153,grad_norm: 0.9078513144351381, iteration: 32105
loss: 0.9696971774101257,grad_norm: 0.942198974288962, iteration: 32106
loss: 1.0046486854553223,grad_norm: 0.9999996063637808, iteration: 32107
loss: 1.0096131563186646,grad_norm: 0.9999992835650189, iteration: 32108
loss: 0.9878154993057251,grad_norm: 0.9999992545231883, iteration: 32109
loss: 1.022764801979065,grad_norm: 0.8282494058911153, iteration: 32110
loss: 1.0154740810394287,grad_norm: 0.9999990968856093, iteration: 32111
loss: 0.9957755208015442,grad_norm: 0.9999991096324939, iteration: 32112
loss: 1.0241502523422241,grad_norm: 0.9999992872265474, iteration: 32113
loss: 1.0526920557022095,grad_norm: 0.9303002208839041, iteration: 32114
loss: 1.0089375972747803,grad_norm: 0.9021388083938271, iteration: 32115
loss: 1.0448185205459595,grad_norm: 0.9999990263332296, iteration: 32116
loss: 1.010359287261963,grad_norm: 0.8806354741970284, iteration: 32117
loss: 1.0282334089279175,grad_norm: 0.9999996459405742, iteration: 32118
loss: 1.0088779926300049,grad_norm: 0.8222569074172532, iteration: 32119
loss: 1.0038399696350098,grad_norm: 0.9999991563371731, iteration: 32120
loss: 0.9998677968978882,grad_norm: 0.9999992784140683, iteration: 32121
loss: 0.9897719025611877,grad_norm: 0.9999990083116889, iteration: 32122
loss: 1.014186143875122,grad_norm: 0.9274640284019269, iteration: 32123
loss: 1.0034079551696777,grad_norm: 0.8550664220262217, iteration: 32124
loss: 1.005553126335144,grad_norm: 0.8176260365658119, iteration: 32125
loss: 0.9718767404556274,grad_norm: 0.9251056781724941, iteration: 32126
loss: 1.0246697664260864,grad_norm: 0.8020518626825672, iteration: 32127
loss: 0.9922497272491455,grad_norm: 0.9761091600635586, iteration: 32128
loss: 0.9986000657081604,grad_norm: 0.9999991652450235, iteration: 32129
loss: 1.0170764923095703,grad_norm: 0.999999432388451, iteration: 32130
loss: 1.0475867986679077,grad_norm: 0.9549289489184128, iteration: 32131
loss: 1.0189141035079956,grad_norm: 0.9999991441973993, iteration: 32132
loss: 0.9881151914596558,grad_norm: 0.9999991006394021, iteration: 32133
loss: 1.0324000120162964,grad_norm: 0.9826052218930084, iteration: 32134
loss: 0.996339738368988,grad_norm: 0.9999990722045422, iteration: 32135
loss: 1.002863883972168,grad_norm: 0.9999991268945492, iteration: 32136
loss: 1.0218546390533447,grad_norm: 0.9750718933573433, iteration: 32137
loss: 1.0262151956558228,grad_norm: 0.8999405219157974, iteration: 32138
loss: 1.007335901260376,grad_norm: 0.9780557833237389, iteration: 32139
loss: 1.0241756439208984,grad_norm: 0.8328371250477484, iteration: 32140
loss: 1.0187040567398071,grad_norm: 0.9999989687046089, iteration: 32141
loss: 0.982032835483551,grad_norm: 0.877603869565475, iteration: 32142
loss: 1.0560661554336548,grad_norm: 0.9999995920536139, iteration: 32143
loss: 0.9871107339859009,grad_norm: 0.9870963244705471, iteration: 32144
loss: 1.026384711265564,grad_norm: 0.9999993319708385, iteration: 32145
loss: 1.0203025341033936,grad_norm: 0.9999995215197395, iteration: 32146
loss: 1.0219695568084717,grad_norm: 0.9249868807393603, iteration: 32147
loss: 1.004746913909912,grad_norm: 0.999999070718128, iteration: 32148
loss: 0.9597986936569214,grad_norm: 0.9999990577318457, iteration: 32149
loss: 1.045156717300415,grad_norm: 0.9999992054837511, iteration: 32150
loss: 1.0059937238693237,grad_norm: 0.9689891414322276, iteration: 32151
loss: 1.0562347173690796,grad_norm: 0.9772414225109299, iteration: 32152
loss: 1.0316472053527832,grad_norm: 0.9999992070970557, iteration: 32153
loss: 1.013474464416504,grad_norm: 0.9999992507587255, iteration: 32154
loss: 1.0104477405548096,grad_norm: 0.9999993207537184, iteration: 32155
loss: 0.9930233359336853,grad_norm: 0.9999990143995682, iteration: 32156
loss: 1.0165891647338867,grad_norm: 0.9327230326254149, iteration: 32157
loss: 1.007339596748352,grad_norm: 0.9999990961773308, iteration: 32158
loss: 0.986577570438385,grad_norm: 0.9999992332617488, iteration: 32159
loss: 1.0615698099136353,grad_norm: 0.9999994492900788, iteration: 32160
loss: 1.0016534328460693,grad_norm: 0.8217614662037481, iteration: 32161
loss: 1.0335952043533325,grad_norm: 0.9999990964097802, iteration: 32162
loss: 0.9679276943206787,grad_norm: 0.9999991130931485, iteration: 32163
loss: 1.0391007661819458,grad_norm: 0.9999995050599371, iteration: 32164
loss: 0.9859921932220459,grad_norm: 0.9999991118357313, iteration: 32165
loss: 0.9914405345916748,grad_norm: 0.9358164201356151, iteration: 32166
loss: 0.9931395649909973,grad_norm: 0.8046739376228781, iteration: 32167
loss: 1.0225675106048584,grad_norm: 0.9999996024827034, iteration: 32168
loss: 1.0139542818069458,grad_norm: 0.9999993618895021, iteration: 32169
loss: 1.0061359405517578,grad_norm: 0.9825155549086748, iteration: 32170
loss: 1.0278528928756714,grad_norm: 0.9999991647404499, iteration: 32171
loss: 0.9844949841499329,grad_norm: 0.9999989889106498, iteration: 32172
loss: 1.010948657989502,grad_norm: 0.9066583229151629, iteration: 32173
loss: 1.0144340991973877,grad_norm: 0.850925831102506, iteration: 32174
loss: 1.0362235307693481,grad_norm: 0.9999991430657528, iteration: 32175
loss: 0.9691006541252136,grad_norm: 0.9999991931256322, iteration: 32176
loss: 1.065246343612671,grad_norm: 0.9237186309099413, iteration: 32177
loss: 1.0060536861419678,grad_norm: 0.917421466549807, iteration: 32178
loss: 1.0073401927947998,grad_norm: 0.9999990678767852, iteration: 32179
loss: 1.0159392356872559,grad_norm: 0.9970657458563398, iteration: 32180
loss: 1.0057939291000366,grad_norm: 0.9999992062269046, iteration: 32181
loss: 1.0233474969863892,grad_norm: 0.9999997513680815, iteration: 32182
loss: 0.9895153045654297,grad_norm: 0.9999991790369712, iteration: 32183
loss: 1.0244600772857666,grad_norm: 0.9999994038462606, iteration: 32184
loss: 1.005527138710022,grad_norm: 0.9999991851333221, iteration: 32185
loss: 1.001619815826416,grad_norm: 0.9332739554716718, iteration: 32186
loss: 0.9958858489990234,grad_norm: 0.9380462856697114, iteration: 32187
loss: 1.0296415090560913,grad_norm: 0.9382457667656046, iteration: 32188
loss: 0.9885779023170471,grad_norm: 0.8902877111665469, iteration: 32189
loss: 1.0163365602493286,grad_norm: 0.9867845003146106, iteration: 32190
loss: 1.0238491296768188,grad_norm: 0.9999992577385213, iteration: 32191
loss: 1.0088332891464233,grad_norm: 0.938785831888457, iteration: 32192
loss: 1.0170320272445679,grad_norm: 0.999999086576063, iteration: 32193
loss: 1.0023871660232544,grad_norm: 0.9999992458719742, iteration: 32194
loss: 1.024282693862915,grad_norm: 0.7932901926194383, iteration: 32195
loss: 1.0322070121765137,grad_norm: 0.8124660523563982, iteration: 32196
loss: 1.1020735502243042,grad_norm: 0.9999994903999595, iteration: 32197
loss: 1.0118218660354614,grad_norm: 0.9999991794179266, iteration: 32198
loss: 1.0243960618972778,grad_norm: 0.9574887369437367, iteration: 32199
loss: 0.9946518540382385,grad_norm: 0.8438875099726674, iteration: 32200
loss: 0.9720271825790405,grad_norm: 0.9351368039651342, iteration: 32201
loss: 1.017897367477417,grad_norm: 0.9999997395582783, iteration: 32202
loss: 0.9618725776672363,grad_norm: 0.9999992379369446, iteration: 32203
loss: 0.9971904158592224,grad_norm: 0.8984210345047634, iteration: 32204
loss: 0.9681116938591003,grad_norm: 0.9999995787642827, iteration: 32205
loss: 0.9977009892463684,grad_norm: 0.9999991719308766, iteration: 32206
loss: 1.0190670490264893,grad_norm: 0.9093918583060518, iteration: 32207
loss: 0.9692957401275635,grad_norm: 0.9999991659122247, iteration: 32208
loss: 1.0296272039413452,grad_norm: 0.9999996422813072, iteration: 32209
loss: 1.0260367393493652,grad_norm: 0.9999991823754988, iteration: 32210
loss: 1.0260273218154907,grad_norm: 0.9884090343874613, iteration: 32211
loss: 1.0004268884658813,grad_norm: 0.9576176054283706, iteration: 32212
loss: 1.0184569358825684,grad_norm: 0.9999991225276013, iteration: 32213
loss: 1.0101869106292725,grad_norm: 0.9999992792733492, iteration: 32214
loss: 0.9992607235908508,grad_norm: 0.9999989862605589, iteration: 32215
loss: 1.0026549100875854,grad_norm: 0.9999990409591087, iteration: 32216
loss: 1.0068284273147583,grad_norm: 0.9065234619948042, iteration: 32217
loss: 1.0200350284576416,grad_norm: 0.999999134570662, iteration: 32218
loss: 0.9912702441215515,grad_norm: 0.8555836092353415, iteration: 32219
loss: 1.0102797746658325,grad_norm: 0.8756840375256391, iteration: 32220
loss: 1.0289825201034546,grad_norm: 0.9582386445923444, iteration: 32221
loss: 0.9651222229003906,grad_norm: 0.999999041427344, iteration: 32222
loss: 0.9885572791099548,grad_norm: 0.999999524408994, iteration: 32223
loss: 1.0281002521514893,grad_norm: 0.9999991158845386, iteration: 32224
loss: 1.0240646600723267,grad_norm: 0.9999991610088718, iteration: 32225
loss: 0.9787670373916626,grad_norm: 0.9999992491225141, iteration: 32226
loss: 1.0340349674224854,grad_norm: 0.9999998341959118, iteration: 32227
loss: 0.9796462655067444,grad_norm: 0.8725580598762845, iteration: 32228
loss: 1.0429964065551758,grad_norm: 0.9999997240892187, iteration: 32229
loss: 0.9902050495147705,grad_norm: 0.9025300705752959, iteration: 32230
loss: 0.9964346289634705,grad_norm: 0.9999994828605533, iteration: 32231
loss: 0.9808842539787292,grad_norm: 0.999999225992519, iteration: 32232
loss: 0.9963240027427673,grad_norm: 0.8341248726001276, iteration: 32233
loss: 1.0175447463989258,grad_norm: 0.9999991126718148, iteration: 32234
loss: 0.9888404011726379,grad_norm: 0.9335148325536745, iteration: 32235
loss: 1.0757750272750854,grad_norm: 0.9999998148540069, iteration: 32236
loss: 0.9922241568565369,grad_norm: 0.8823384098038334, iteration: 32237
loss: 1.0375964641571045,grad_norm: 0.9850396658540967, iteration: 32238
loss: 1.017714023590088,grad_norm: 0.9999990996185633, iteration: 32239
loss: 0.954268217086792,grad_norm: 0.9999990651993458, iteration: 32240
loss: 1.0285727977752686,grad_norm: 0.9752274548070401, iteration: 32241
loss: 1.0367488861083984,grad_norm: 0.9999992765791468, iteration: 32242
loss: 1.0000218152999878,grad_norm: 0.9999990246165253, iteration: 32243
loss: 1.0742528438568115,grad_norm: 0.9999997369046527, iteration: 32244
loss: 1.0318101644515991,grad_norm: 0.9999990846645692, iteration: 32245
loss: 0.9958447217941284,grad_norm: 0.9392478056038231, iteration: 32246
loss: 0.9163401126861572,grad_norm: 0.9954522016443658, iteration: 32247
loss: 1.008568286895752,grad_norm: 0.8744895876295367, iteration: 32248
loss: 1.0085303783416748,grad_norm: 0.9999992922346396, iteration: 32249
loss: 1.016110897064209,grad_norm: 0.9999992225863731, iteration: 32250
loss: 0.9782024025917053,grad_norm: 0.8855320794804448, iteration: 32251
loss: 0.9732838869094849,grad_norm: 0.9999990812347016, iteration: 32252
loss: 1.0127750635147095,grad_norm: 0.9999992369838256, iteration: 32253
loss: 1.000477910041809,grad_norm: 0.9999991445876001, iteration: 32254
loss: 0.9948078393936157,grad_norm: 0.9323974084291565, iteration: 32255
loss: 0.9757285714149475,grad_norm: 0.9797192973153124, iteration: 32256
loss: 1.0515468120574951,grad_norm: 0.9999992445603356, iteration: 32257
loss: 0.9965059757232666,grad_norm: 0.8767087654365601, iteration: 32258
loss: 1.084648609161377,grad_norm: 0.9999997028278312, iteration: 32259
loss: 0.9974561929702759,grad_norm: 0.9706557674563429, iteration: 32260
loss: 1.0039864778518677,grad_norm: 0.9999990873054093, iteration: 32261
loss: 1.0413594245910645,grad_norm: 0.8739480404127739, iteration: 32262
loss: 1.0015064477920532,grad_norm: 0.9589656018604016, iteration: 32263
loss: 1.0071051120758057,grad_norm: 0.9039831604684583, iteration: 32264
loss: 1.0245598554611206,grad_norm: 0.9999991473012553, iteration: 32265
loss: 1.0419138669967651,grad_norm: 0.9999989790720084, iteration: 32266
loss: 1.0097737312316895,grad_norm: 0.944730865273272, iteration: 32267
loss: 1.0112062692642212,grad_norm: 0.999999209678658, iteration: 32268
loss: 0.9948240518569946,grad_norm: 0.7869695518457553, iteration: 32269
loss: 1.0245203971862793,grad_norm: 0.9999994655203301, iteration: 32270
loss: 0.9934492707252502,grad_norm: 0.9444112429833172, iteration: 32271
loss: 1.0113205909729004,grad_norm: 0.9999997182839249, iteration: 32272
loss: 0.9746856093406677,grad_norm: 0.7895877868394826, iteration: 32273
loss: 1.0280016660690308,grad_norm: 0.9999997372306131, iteration: 32274
loss: 1.0256049633026123,grad_norm: 0.9709050859388758, iteration: 32275
loss: 0.967560350894928,grad_norm: 0.9999996104931341, iteration: 32276
loss: 0.9958348870277405,grad_norm: 0.9999999452720968, iteration: 32277
loss: 0.9916895627975464,grad_norm: 0.8822887632534935, iteration: 32278
loss: 0.9692808985710144,grad_norm: 0.9999989757365612, iteration: 32279
loss: 0.9685964584350586,grad_norm: 0.9952606748207797, iteration: 32280
loss: 0.9910071492195129,grad_norm: 0.7663073076056032, iteration: 32281
loss: 1.058910846710205,grad_norm: 0.9999997521506111, iteration: 32282
loss: 1.0032644271850586,grad_norm: 0.9999990411288472, iteration: 32283
loss: 1.0198496580123901,grad_norm: 0.999999462288145, iteration: 32284
loss: 0.9763893485069275,grad_norm: 0.9905604525515526, iteration: 32285
loss: 1.0469163656234741,grad_norm: 0.999999698189983, iteration: 32286
loss: 0.9779435992240906,grad_norm: 0.9720394179568035, iteration: 32287
loss: 1.0321028232574463,grad_norm: 0.9999993157552908, iteration: 32288
loss: 0.9666834473609924,grad_norm: 0.9999990099650091, iteration: 32289
loss: 1.034422755241394,grad_norm: 0.9999993847658463, iteration: 32290
loss: 1.0332180261611938,grad_norm: 0.9999990934336468, iteration: 32291
loss: 1.0010439157485962,grad_norm: 0.8497641001565391, iteration: 32292
loss: 1.0027520656585693,grad_norm: 0.999999162016599, iteration: 32293
loss: 0.9575457572937012,grad_norm: 0.9999882164809897, iteration: 32294
loss: 1.0128039121627808,grad_norm: 0.925524324220845, iteration: 32295
loss: 0.9744740128517151,grad_norm: 0.8224492082079435, iteration: 32296
loss: 0.9735047221183777,grad_norm: 0.9999992146936204, iteration: 32297
loss: 1.078618049621582,grad_norm: 0.9999997142177172, iteration: 32298
loss: 1.0271539688110352,grad_norm: 0.9049012398007217, iteration: 32299
loss: 1.0424790382385254,grad_norm: 0.9915504674464454, iteration: 32300
loss: 1.0121575593948364,grad_norm: 0.9999993629272992, iteration: 32301
loss: 1.0078357458114624,grad_norm: 0.9385962261692049, iteration: 32302
loss: 0.9808588624000549,grad_norm: 0.966784416759882, iteration: 32303
loss: 1.022329568862915,grad_norm: 0.9645665522632847, iteration: 32304
loss: 0.997115433216095,grad_norm: 0.9999991382309037, iteration: 32305
loss: 0.9999215006828308,grad_norm: 0.999999291577132, iteration: 32306
loss: 1.0009403228759766,grad_norm: 0.9999992824331998, iteration: 32307
loss: 1.0169880390167236,grad_norm: 0.9854019368837957, iteration: 32308
loss: 1.0227476358413696,grad_norm: 0.999999137848178, iteration: 32309
loss: 1.0142180919647217,grad_norm: 0.9593994019826203, iteration: 32310
loss: 0.9720433950424194,grad_norm: 0.9999993608938379, iteration: 32311
loss: 1.0056439638137817,grad_norm: 0.9999992796078896, iteration: 32312
loss: 1.0059994459152222,grad_norm: 0.9999991000674657, iteration: 32313
loss: 0.9816077351570129,grad_norm: 0.9002492301448658, iteration: 32314
loss: 0.9856908917427063,grad_norm: 0.9291360025000301, iteration: 32315
loss: 1.0170236825942993,grad_norm: 0.9999992971120552, iteration: 32316
loss: 1.0297329425811768,grad_norm: 0.999999045570234, iteration: 32317
loss: 1.019282579421997,grad_norm: 0.9999994722607354, iteration: 32318
loss: 0.9685125946998596,grad_norm: 0.999999267117767, iteration: 32319
loss: 1.03255295753479,grad_norm: 0.999999153893369, iteration: 32320
loss: 1.0093777179718018,grad_norm: 0.8081986311350138, iteration: 32321
loss: 0.9985595941543579,grad_norm: 0.999999067391655, iteration: 32322
loss: 0.9987461566925049,grad_norm: 0.9999991961810293, iteration: 32323
loss: 0.9912945628166199,grad_norm: 0.9209684869414017, iteration: 32324
loss: 1.0034822225570679,grad_norm: 0.9999991550874909, iteration: 32325
loss: 0.9727287292480469,grad_norm: 0.9706047227027181, iteration: 32326
loss: 0.9878552556037903,grad_norm: 0.9999994021970077, iteration: 32327
loss: 0.9894018769264221,grad_norm: 0.9597576153462974, iteration: 32328
loss: 1.0176892280578613,grad_norm: 0.9448872369385933, iteration: 32329
loss: 0.9828490614891052,grad_norm: 0.9758221157049933, iteration: 32330
loss: 0.989616334438324,grad_norm: 0.9999990851896525, iteration: 32331
loss: 1.013380527496338,grad_norm: 0.9999991023432641, iteration: 32332
loss: 0.986272931098938,grad_norm: 0.999999323704877, iteration: 32333
loss: 1.034316062927246,grad_norm: 0.9999992055689351, iteration: 32334
loss: 1.0348807573318481,grad_norm: 0.9999990957254429, iteration: 32335
loss: 1.0029321908950806,grad_norm: 0.8974994631392842, iteration: 32336
loss: 1.0307819843292236,grad_norm: 0.9870506266260749, iteration: 32337
loss: 0.990219235420227,grad_norm: 0.9490686818814036, iteration: 32338
loss: 1.0305523872375488,grad_norm: 0.9999991362730853, iteration: 32339
loss: 0.9981287717819214,grad_norm: 0.9999992904934121, iteration: 32340
loss: 0.9689033031463623,grad_norm: 0.9257099999064571, iteration: 32341
loss: 0.9630932807922363,grad_norm: 0.9999990328315425, iteration: 32342
loss: 1.0180023908615112,grad_norm: 0.9999991389940343, iteration: 32343
loss: 1.104141116142273,grad_norm: 0.9999993644861713, iteration: 32344
loss: 1.059891939163208,grad_norm: 0.9999991479052411, iteration: 32345
loss: 1.0289669036865234,grad_norm: 0.9576657675191358, iteration: 32346
loss: 1.0093637704849243,grad_norm: 0.8418946867281938, iteration: 32347
loss: 0.9843109250068665,grad_norm: 0.9999991767946828, iteration: 32348
loss: 1.055273413658142,grad_norm: 0.9898842098965528, iteration: 32349
loss: 1.0103614330291748,grad_norm: 0.9999992029255027, iteration: 32350
loss: 0.9779083132743835,grad_norm: 0.9999990669311102, iteration: 32351
loss: 0.995427131652832,grad_norm: 0.9963388629353968, iteration: 32352
loss: 0.998792290687561,grad_norm: 0.8526413098069466, iteration: 32353
loss: 1.0270822048187256,grad_norm: 0.9999991439174171, iteration: 32354
loss: 1.0312997102737427,grad_norm: 0.8386249300027583, iteration: 32355
loss: 1.1165404319763184,grad_norm: 0.9999995690424337, iteration: 32356
loss: 1.0482677221298218,grad_norm: 0.8864412473642671, iteration: 32357
loss: 1.0231794118881226,grad_norm: 0.9449373306177903, iteration: 32358
loss: 1.016000747680664,grad_norm: 0.9999992305834604, iteration: 32359
loss: 1.0454440116882324,grad_norm: 0.9999991806614477, iteration: 32360
loss: 0.9966305494308472,grad_norm: 0.9999988797804562, iteration: 32361
loss: 1.0393493175506592,grad_norm: 0.9942342733476609, iteration: 32362
loss: 1.0232347249984741,grad_norm: 0.8906036333255438, iteration: 32363
loss: 1.0484881401062012,grad_norm: 0.9999993756802963, iteration: 32364
loss: 1.0665181875228882,grad_norm: 0.9999991234268453, iteration: 32365
loss: 0.9875494837760925,grad_norm: 0.9723324219491611, iteration: 32366
loss: 1.0465357303619385,grad_norm: 0.9999990127312618, iteration: 32367
loss: 1.0207375288009644,grad_norm: 0.9040011467062309, iteration: 32368
loss: 1.0327696800231934,grad_norm: 0.9999989448051461, iteration: 32369
loss: 1.0056298971176147,grad_norm: 0.9999995311255792, iteration: 32370
loss: 1.0018327236175537,grad_norm: 0.9999992281190496, iteration: 32371
loss: 0.9978143572807312,grad_norm: 0.946446131684361, iteration: 32372
loss: 1.057938814163208,grad_norm: 0.9827717360485201, iteration: 32373
loss: 1.0633624792099,grad_norm: 0.9999997219372105, iteration: 32374
loss: 1.004304051399231,grad_norm: 0.9316137258785573, iteration: 32375
loss: 0.999017059803009,grad_norm: 0.9999990777677957, iteration: 32376
loss: 1.0212657451629639,grad_norm: 0.9999990174200499, iteration: 32377
loss: 1.0046348571777344,grad_norm: 0.9999993168451399, iteration: 32378
loss: 1.0307306051254272,grad_norm: 0.9999990993433853, iteration: 32379
loss: 0.995108425617218,grad_norm: 0.9999992372032627, iteration: 32380
loss: 1.0293545722961426,grad_norm: 0.9999991710965072, iteration: 32381
loss: 1.0236331224441528,grad_norm: 0.9008051910969459, iteration: 32382
loss: 1.0399165153503418,grad_norm: 0.8819807346514877, iteration: 32383
loss: 1.0594534873962402,grad_norm: 0.9033026627356634, iteration: 32384
loss: 0.9689301252365112,grad_norm: 0.8439580461661715, iteration: 32385
loss: 1.0231081247329712,grad_norm: 0.9999992575732796, iteration: 32386
loss: 0.9643175601959229,grad_norm: 0.9332708566501822, iteration: 32387
loss: 1.0156015157699585,grad_norm: 0.785301614823256, iteration: 32388
loss: 0.9995166063308716,grad_norm: 0.9999995424161441, iteration: 32389
loss: 0.9875336289405823,grad_norm: 0.7555285722486715, iteration: 32390
loss: 1.0169119834899902,grad_norm: 0.9999992361910082, iteration: 32391
loss: 1.029024362564087,grad_norm: 0.9999990398718669, iteration: 32392
loss: 0.9963042140007019,grad_norm: 0.8786106354127786, iteration: 32393
loss: 0.998638927936554,grad_norm: 0.8950195888357585, iteration: 32394
loss: 0.9626627564430237,grad_norm: 0.9999989767643577, iteration: 32395
loss: 0.9858750700950623,grad_norm: 0.96010988932144, iteration: 32396
loss: 1.003662109375,grad_norm: 0.9460656471814008, iteration: 32397
loss: 1.0087178945541382,grad_norm: 0.8166507380272507, iteration: 32398
loss: 1.0077897310256958,grad_norm: 0.7221524332095537, iteration: 32399
loss: 0.9873722791671753,grad_norm: 0.9315280748463419, iteration: 32400
loss: 0.9787036776542664,grad_norm: 0.9772005996028665, iteration: 32401
loss: 0.9649274945259094,grad_norm: 0.9999990939149811, iteration: 32402
loss: 1.0413284301757812,grad_norm: 0.9999992399728831, iteration: 32403
loss: 1.0145559310913086,grad_norm: 0.9999991624308753, iteration: 32404
loss: 0.9988426566123962,grad_norm: 0.8395697367834698, iteration: 32405
loss: 1.0292855501174927,grad_norm: 0.9953678476336617, iteration: 32406
loss: 1.0400913953781128,grad_norm: 0.9830536585747773, iteration: 32407
loss: 1.0177148580551147,grad_norm: 0.8696226233940207, iteration: 32408
loss: 1.0126491785049438,grad_norm: 0.8215748705623657, iteration: 32409
loss: 1.0853917598724365,grad_norm: 0.9999999769123333, iteration: 32410
loss: 0.9786189198493958,grad_norm: 0.9999992942646144, iteration: 32411
loss: 1.0004494190216064,grad_norm: 0.9466148223253364, iteration: 32412
loss: 1.0113439559936523,grad_norm: 0.9544319874714186, iteration: 32413
loss: 0.9930191040039062,grad_norm: 0.8448457573865483, iteration: 32414
loss: 0.9929201006889343,grad_norm: 0.9576480023674344, iteration: 32415
loss: 1.1119554042816162,grad_norm: 0.9999996192063767, iteration: 32416
loss: 0.9942836761474609,grad_norm: 0.999999059804871, iteration: 32417
loss: 0.9799724817276001,grad_norm: 0.9273013053094807, iteration: 32418
loss: 0.9876542687416077,grad_norm: 0.7684795571301655, iteration: 32419
loss: 1.0151636600494385,grad_norm: 0.9903173950540152, iteration: 32420
loss: 1.0188120603561401,grad_norm: 0.9999997499734387, iteration: 32421
loss: 1.0117741823196411,grad_norm: 0.822499519026622, iteration: 32422
loss: 0.9975622296333313,grad_norm: 0.9811446780379723, iteration: 32423
loss: 1.0267478227615356,grad_norm: 0.9999991996975638, iteration: 32424
loss: 0.9637739658355713,grad_norm: 0.9999990675327007, iteration: 32425
loss: 0.9947579503059387,grad_norm: 0.999999044261501, iteration: 32426
loss: 0.998776912689209,grad_norm: 0.8473747896259707, iteration: 32427
loss: 0.9902235865592957,grad_norm: 0.9999992076508678, iteration: 32428
loss: 1.0350041389465332,grad_norm: 0.9999992120065996, iteration: 32429
loss: 0.9798287749290466,grad_norm: 0.9316242725469628, iteration: 32430
loss: 1.0552232265472412,grad_norm: 0.9999991751572558, iteration: 32431
loss: 1.0126093626022339,grad_norm: 0.8492749706745475, iteration: 32432
loss: 1.0212857723236084,grad_norm: 0.8164358249943635, iteration: 32433
loss: 0.9966307282447815,grad_norm: 0.9483156971674566, iteration: 32434
loss: 1.0189499855041504,grad_norm: 0.9999991027149245, iteration: 32435
loss: 1.0231590270996094,grad_norm: 0.9999990309229605, iteration: 32436
loss: 0.9978010654449463,grad_norm: 0.9381043758216208, iteration: 32437
loss: 1.0213268995285034,grad_norm: 0.9999997317511137, iteration: 32438
loss: 0.9858523607254028,grad_norm: 0.9004556966523355, iteration: 32439
loss: 1.0037868022918701,grad_norm: 0.9999990210544916, iteration: 32440
loss: 0.9975826144218445,grad_norm: 0.9958988613984887, iteration: 32441
loss: 1.028743028640747,grad_norm: 0.9778914228010126, iteration: 32442
loss: 1.001063346862793,grad_norm: 0.9999991276235799, iteration: 32443
loss: 1.009042501449585,grad_norm: 0.9242175966558138, iteration: 32444
loss: 1.0196473598480225,grad_norm: 0.9851264640333729, iteration: 32445
loss: 1.0232374668121338,grad_norm: 0.999999220590202, iteration: 32446
loss: 0.9827612042427063,grad_norm: 0.9999993208517515, iteration: 32447
loss: 1.0128040313720703,grad_norm: 0.9999992677401078, iteration: 32448
loss: 1.020046353340149,grad_norm: 0.9255926267042432, iteration: 32449
loss: 1.0146077871322632,grad_norm: 0.9999993727350738, iteration: 32450
loss: 0.9797070026397705,grad_norm: 0.9999992803327553, iteration: 32451
loss: 1.0143213272094727,grad_norm: 0.95584567563411, iteration: 32452
loss: 1.0421005487442017,grad_norm: 0.9999998173227692, iteration: 32453
loss: 1.014068603515625,grad_norm: 0.9784500316765014, iteration: 32454
loss: 1.0007444620132446,grad_norm: 0.9752244578730603, iteration: 32455
loss: 1.0383836030960083,grad_norm: 0.999999098905521, iteration: 32456
loss: 1.0341994762420654,grad_norm: 0.9209188499339419, iteration: 32457
loss: 0.99664306640625,grad_norm: 0.9999992002006864, iteration: 32458
loss: 1.0206310749053955,grad_norm: 0.9663669008780938, iteration: 32459
loss: 0.9925550818443298,grad_norm: 0.9053219663567871, iteration: 32460
loss: 0.9977123141288757,grad_norm: 0.9685123178834867, iteration: 32461
loss: 1.0424813032150269,grad_norm: 0.9999990426683646, iteration: 32462
loss: 0.991859495639801,grad_norm: 0.9296270881715202, iteration: 32463
loss: 0.9819097518920898,grad_norm: 0.9999991936829146, iteration: 32464
loss: 1.0013712644577026,grad_norm: 0.9999989881553201, iteration: 32465
loss: 0.9953599572181702,grad_norm: 0.9739266378180674, iteration: 32466
loss: 1.0053961277008057,grad_norm: 0.9710980094575049, iteration: 32467
loss: 0.9607944488525391,grad_norm: 0.9244115942275919, iteration: 32468
loss: 1.0416089296340942,grad_norm: 0.9999989658844417, iteration: 32469
loss: 0.9702425003051758,grad_norm: 0.8105206556066996, iteration: 32470
loss: 1.0293856859207153,grad_norm: 0.8717163938346145, iteration: 32471
loss: 0.9838371872901917,grad_norm: 0.8378987807792039, iteration: 32472
loss: 1.0164916515350342,grad_norm: 0.9999990253722416, iteration: 32473
loss: 1.027073621749878,grad_norm: 0.9999991255973384, iteration: 32474
loss: 0.9987906813621521,grad_norm: 0.8610593759022258, iteration: 32475
loss: 1.003413438796997,grad_norm: 0.9999992760492895, iteration: 32476
loss: 1.00798499584198,grad_norm: 0.9879288411635607, iteration: 32477
loss: 1.0012272596359253,grad_norm: 0.9396965450493254, iteration: 32478
loss: 1.0282564163208008,grad_norm: 0.8721854296789034, iteration: 32479
loss: 1.0546878576278687,grad_norm: 0.9999995785348648, iteration: 32480
loss: 0.9982733130455017,grad_norm: 0.7755279217523652, iteration: 32481
loss: 0.9863004088401794,grad_norm: 0.8685142141578217, iteration: 32482
loss: 1.0215333700180054,grad_norm: 0.9999993225762093, iteration: 32483
loss: 0.9932446479797363,grad_norm: 0.944825906233204, iteration: 32484
loss: 1.0037974119186401,grad_norm: 0.9999991066560491, iteration: 32485
loss: 0.9855334758758545,grad_norm: 0.9999990792553862, iteration: 32486
loss: 1.0192763805389404,grad_norm: 0.9999990305705018, iteration: 32487
loss: 1.0207552909851074,grad_norm: 0.9961801784089613, iteration: 32488
loss: 1.0036672353744507,grad_norm: 0.9953805064558582, iteration: 32489
loss: 1.021824598312378,grad_norm: 0.9999990609104702, iteration: 32490
loss: 1.0503329038619995,grad_norm: 0.9999993048173216, iteration: 32491
loss: 0.9890729188919067,grad_norm: 0.9999990984070751, iteration: 32492
loss: 1.0073862075805664,grad_norm: 0.9999990853844962, iteration: 32493
loss: 0.9875459671020508,grad_norm: 0.9392449086214759, iteration: 32494
loss: 1.0199639797210693,grad_norm: 0.9999995409567731, iteration: 32495
loss: 1.024565577507019,grad_norm: 0.9999994551259437, iteration: 32496
loss: 0.9775351881980896,grad_norm: 0.999999112414608, iteration: 32497
loss: 1.0048203468322754,grad_norm: 0.9999990879784595, iteration: 32498
loss: 1.155892252922058,grad_norm: 0.9999997701072721, iteration: 32499
loss: 1.0081028938293457,grad_norm: 0.9999989751756684, iteration: 32500
loss: 1.014298439025879,grad_norm: 0.9999990521853972, iteration: 32501
loss: 1.00766122341156,grad_norm: 0.9999989995974957, iteration: 32502
loss: 1.023452639579773,grad_norm: 0.9999993296878653, iteration: 32503
loss: 0.9986703395843506,grad_norm: 0.9677054389214339, iteration: 32504
loss: 1.0157780647277832,grad_norm: 0.9999990952462532, iteration: 32505
loss: 0.9924132227897644,grad_norm: 0.7696186844950853, iteration: 32506
loss: 1.0116227865219116,grad_norm: 0.9999991853528644, iteration: 32507
loss: 0.9865480661392212,grad_norm: 0.9265951190322463, iteration: 32508
loss: 0.9860673546791077,grad_norm: 0.9999991493919234, iteration: 32509
loss: 1.001670479774475,grad_norm: 0.9163351756349577, iteration: 32510
loss: 1.0388139486312866,grad_norm: 0.9545587303345077, iteration: 32511
loss: 1.0120165348052979,grad_norm: 0.9999989271191552, iteration: 32512
loss: 0.9830634593963623,grad_norm: 0.999999144701154, iteration: 32513
loss: 1.0225142240524292,grad_norm: 0.9999991843754954, iteration: 32514
loss: 0.9863667488098145,grad_norm: 0.986369963737435, iteration: 32515
loss: 1.000611662864685,grad_norm: 0.9911640699119472, iteration: 32516
loss: 1.0406677722930908,grad_norm: 0.9216044105341124, iteration: 32517
loss: 1.0344411134719849,grad_norm: 0.9999990219396014, iteration: 32518
loss: 1.0242564678192139,grad_norm: 0.9999990932528181, iteration: 32519
loss: 0.9727707505226135,grad_norm: 0.9999990723489663, iteration: 32520
loss: 1.0030372142791748,grad_norm: 0.9999990618932721, iteration: 32521
loss: 1.0125494003295898,grad_norm: 0.8377331430186759, iteration: 32522
loss: 1.1039015054702759,grad_norm: 0.9999992620858408, iteration: 32523
loss: 1.0196622610092163,grad_norm: 0.9999991254157612, iteration: 32524
loss: 1.0139520168304443,grad_norm: 0.999999099940187, iteration: 32525
loss: 1.0324913263320923,grad_norm: 0.9999990379761692, iteration: 32526
loss: 1.028241753578186,grad_norm: 0.9999992719020458, iteration: 32527
loss: 1.0380825996398926,grad_norm: 0.8959092344390848, iteration: 32528
loss: 1.0058568716049194,grad_norm: 0.9999993871454915, iteration: 32529
loss: 1.0044163465499878,grad_norm: 0.8837949536655187, iteration: 32530
loss: 1.0574151277542114,grad_norm: 0.999999873870078, iteration: 32531
loss: 1.006178855895996,grad_norm: 0.9354664328060538, iteration: 32532
loss: 1.0079901218414307,grad_norm: 0.9781101298447789, iteration: 32533
loss: 0.983788013458252,grad_norm: 0.989638998539144, iteration: 32534
loss: 1.0109779834747314,grad_norm: 0.9999991202223918, iteration: 32535
loss: 0.9943784475326538,grad_norm: 0.9999992103378842, iteration: 32536
loss: 0.9657901525497437,grad_norm: 0.9999992133270611, iteration: 32537
loss: 1.0396260023117065,grad_norm: 0.999999030152642, iteration: 32538
loss: 0.9825780987739563,grad_norm: 0.9999988989902542, iteration: 32539
loss: 0.9716944098472595,grad_norm: 0.7660959934430664, iteration: 32540
loss: 0.9624289870262146,grad_norm: 0.9508240626351305, iteration: 32541
loss: 1.0124790668487549,grad_norm: 0.9999990113304562, iteration: 32542
loss: 0.9705103039741516,grad_norm: 0.9999993059240957, iteration: 32543
loss: 1.0239813327789307,grad_norm: 0.9999991772625118, iteration: 32544
loss: 1.0396414995193481,grad_norm: 0.9999993545666237, iteration: 32545
loss: 0.9825338125228882,grad_norm: 0.9999990737154327, iteration: 32546
loss: 1.037609338760376,grad_norm: 0.9999992306132989, iteration: 32547
loss: 1.0108873844146729,grad_norm: 0.8495797390804186, iteration: 32548
loss: 0.9741236567497253,grad_norm: 0.9999995173755161, iteration: 32549
loss: 0.9738062024116516,grad_norm: 0.9063863364552807, iteration: 32550
loss: 0.9953081607818604,grad_norm: 0.982137344203749, iteration: 32551
loss: 0.9877966642379761,grad_norm: 0.9088648902047033, iteration: 32552
loss: 0.9926748275756836,grad_norm: 0.9022555556798612, iteration: 32553
loss: 1.0961778163909912,grad_norm: 0.9999999086012451, iteration: 32554
loss: 1.0172030925750732,grad_norm: 0.9363648544776847, iteration: 32555
loss: 0.9981914758682251,grad_norm: 0.9999993825293835, iteration: 32556
loss: 1.035510540008545,grad_norm: 0.9999990770956834, iteration: 32557
loss: 1.0059314966201782,grad_norm: 0.9999992611827448, iteration: 32558
loss: 0.9792044162750244,grad_norm: 0.9383284811337368, iteration: 32559
loss: 1.0115487575531006,grad_norm: 0.9999994687282506, iteration: 32560
loss: 1.000760555267334,grad_norm: 0.9999991977635967, iteration: 32561
loss: 1.0512081384658813,grad_norm: 0.9999990910158869, iteration: 32562
loss: 1.006479263305664,grad_norm: 0.8938474371651483, iteration: 32563
loss: 0.9478678107261658,grad_norm: 0.9356968777422872, iteration: 32564
loss: 1.0236847400665283,grad_norm: 0.8256974231945637, iteration: 32565
loss: 1.0802764892578125,grad_norm: 0.9338323919829441, iteration: 32566
loss: 1.0435328483581543,grad_norm: 0.9506615770077826, iteration: 32567
loss: 1.0350736379623413,grad_norm: 0.9999991143661455, iteration: 32568
loss: 1.0909863710403442,grad_norm: 0.999999313765386, iteration: 32569
loss: 1.0425081253051758,grad_norm: 0.9999991599361524, iteration: 32570
loss: 1.1232784986495972,grad_norm: 0.9732782280422412, iteration: 32571
loss: 1.0211509466171265,grad_norm: 0.9999996841051586, iteration: 32572
loss: 0.9820832014083862,grad_norm: 0.999998936267953, iteration: 32573
loss: 1.0670664310455322,grad_norm: 0.99999924852883, iteration: 32574
loss: 0.9942556619644165,grad_norm: 0.9999990707817954, iteration: 32575
loss: 1.0247349739074707,grad_norm: 0.9827059332802104, iteration: 32576
loss: 1.0143182277679443,grad_norm: 0.8956440595970061, iteration: 32577
loss: 1.0419116020202637,grad_norm: 0.9999996764246359, iteration: 32578
loss: 1.0220965147018433,grad_norm: 0.9402147680339751, iteration: 32579
loss: 1.0489258766174316,grad_norm: 0.9999997324088281, iteration: 32580
loss: 0.9996044635772705,grad_norm: 0.999999108540704, iteration: 32581
loss: 1.024237871170044,grad_norm: 0.9495644970555603, iteration: 32582
loss: 0.9604156017303467,grad_norm: 0.9292507423614479, iteration: 32583
loss: 1.0231765508651733,grad_norm: 0.9999990509826315, iteration: 32584
loss: 0.9840947389602661,grad_norm: 0.9999990257899879, iteration: 32585
loss: 1.0233697891235352,grad_norm: 0.8809478618182905, iteration: 32586
loss: 1.040401816368103,grad_norm: 0.9999999032294358, iteration: 32587
loss: 1.02616286277771,grad_norm: 0.9643234617809763, iteration: 32588
loss: 0.999989926815033,grad_norm: 0.9999992751395556, iteration: 32589
loss: 1.0054640769958496,grad_norm: 0.9999989697878221, iteration: 32590
loss: 1.0080604553222656,grad_norm: 0.9999990429660343, iteration: 32591
loss: 1.010709524154663,grad_norm: 0.9999997410549758, iteration: 32592
loss: 0.9997602105140686,grad_norm: 0.9111274722101471, iteration: 32593
loss: 1.0128957033157349,grad_norm: 0.9095142404418434, iteration: 32594
loss: 0.9718422293663025,grad_norm: 0.9118015733827125, iteration: 32595
loss: 1.0464907884597778,grad_norm: 0.9999997326928077, iteration: 32596
loss: 0.999809980392456,grad_norm: 0.94197412897754, iteration: 32597
loss: 1.0194364786148071,grad_norm: 0.7371701885947954, iteration: 32598
loss: 1.0364962816238403,grad_norm: 0.9999990546729962, iteration: 32599
loss: 0.984304666519165,grad_norm: 0.8518953234775043, iteration: 32600
loss: 1.028525710105896,grad_norm: 0.999999172511784, iteration: 32601
loss: 1.0107102394104004,grad_norm: 0.9838279197181853, iteration: 32602
loss: 0.9894502758979797,grad_norm: 0.9443162813754683, iteration: 32603
loss: 0.9914776682853699,grad_norm: 0.904799887618061, iteration: 32604
loss: 0.9867971539497375,grad_norm: 0.9999991608048334, iteration: 32605
loss: 0.9676485061645508,grad_norm: 0.9999991464446369, iteration: 32606
loss: 0.9624069929122925,grad_norm: 0.9999991386704574, iteration: 32607
loss: 1.0179353952407837,grad_norm: 0.9999994022052968, iteration: 32608
loss: 0.9981475472450256,grad_norm: 0.9999991511267703, iteration: 32609
loss: 0.9831942319869995,grad_norm: 0.8654276905060216, iteration: 32610
loss: 1.010945439338684,grad_norm: 0.9076964233378378, iteration: 32611
loss: 1.0288496017456055,grad_norm: 0.8111117211613611, iteration: 32612
loss: 1.0229861736297607,grad_norm: 0.8656465943695252, iteration: 32613
loss: 1.0158796310424805,grad_norm: 0.8682646428756773, iteration: 32614
loss: 1.0180021524429321,grad_norm: 0.8911331050416882, iteration: 32615
loss: 0.9902846813201904,grad_norm: 0.9999995983000399, iteration: 32616
loss: 0.9993677735328674,grad_norm: 0.9999990964687475, iteration: 32617
loss: 1.0464770793914795,grad_norm: 0.9999994367394179, iteration: 32618
loss: 1.0339261293411255,grad_norm: 0.9822661944157832, iteration: 32619
loss: 1.028609037399292,grad_norm: 0.9999991905920101, iteration: 32620
loss: 1.0125476121902466,grad_norm: 0.9266714118943862, iteration: 32621
loss: 1.0713657140731812,grad_norm: 0.9999994230622561, iteration: 32622
loss: 1.013221263885498,grad_norm: 0.9999990282735153, iteration: 32623
loss: 0.9960931539535522,grad_norm: 0.9174062780872654, iteration: 32624
loss: 0.9981409311294556,grad_norm: 0.9999989738378636, iteration: 32625
loss: 1.0400018692016602,grad_norm: 0.9999990907257538, iteration: 32626
loss: 0.9955994486808777,grad_norm: 0.9999989562228834, iteration: 32627
loss: 1.0012208223342896,grad_norm: 0.999998977513373, iteration: 32628
loss: 1.0157907009124756,grad_norm: 0.9275832968763197, iteration: 32629
loss: 1.0067814588546753,grad_norm: 0.942819613020864, iteration: 32630
loss: 0.9973715543746948,grad_norm: 0.9999992377377073, iteration: 32631
loss: 1.0083281993865967,grad_norm: 0.969986314055294, iteration: 32632
loss: 1.056580662727356,grad_norm: 0.9999993374668418, iteration: 32633
loss: 1.0137046575546265,grad_norm: 0.9082110280844241, iteration: 32634
loss: 1.0049924850463867,grad_norm: 0.9999991358192254, iteration: 32635
loss: 1.0165859460830688,grad_norm: 0.8802214939743007, iteration: 32636
loss: 1.019310712814331,grad_norm: 0.9862065741009671, iteration: 32637
loss: 0.9452077150344849,grad_norm: 0.8483346504063656, iteration: 32638
loss: 0.9905691742897034,grad_norm: 0.9999990308675033, iteration: 32639
loss: 0.9986575841903687,grad_norm: 0.9999992631694409, iteration: 32640
loss: 1.0038623809814453,grad_norm: 0.9999990841788462, iteration: 32641
loss: 1.010467290878296,grad_norm: 0.9999992349163724, iteration: 32642
loss: 1.024301528930664,grad_norm: 0.9821915921131006, iteration: 32643
loss: 0.9694479703903198,grad_norm: 0.9999990400138449, iteration: 32644
loss: 1.031481146812439,grad_norm: 0.9999992109539235, iteration: 32645
loss: 1.0071704387664795,grad_norm: 0.9999994926791373, iteration: 32646
loss: 1.0194956064224243,grad_norm: 0.999999248989688, iteration: 32647
loss: 1.0115485191345215,grad_norm: 0.9999994344512939, iteration: 32648
loss: 1.0285512208938599,grad_norm: 0.9999989389399009, iteration: 32649
loss: 1.0205692052841187,grad_norm: 0.9636877078285473, iteration: 32650
loss: 1.007323145866394,grad_norm: 0.9999990191963931, iteration: 32651
loss: 1.0223124027252197,grad_norm: 0.9999992918794779, iteration: 32652
loss: 0.986791729927063,grad_norm: 0.999999216109869, iteration: 32653
loss: 0.9867013692855835,grad_norm: 0.9999989302043951, iteration: 32654
loss: 1.0126100778579712,grad_norm: 0.8840873836954729, iteration: 32655
loss: 1.0046396255493164,grad_norm: 0.9209461979928162, iteration: 32656
loss: 0.9977381229400635,grad_norm: 0.7601922554480965, iteration: 32657
loss: 0.9693383574485779,grad_norm: 0.9352347312985506, iteration: 32658
loss: 0.9737696051597595,grad_norm: 0.868362398445934, iteration: 32659
loss: 0.9999090433120728,grad_norm: 0.999999167803305, iteration: 32660
loss: 1.04128897190094,grad_norm: 0.9999991325484553, iteration: 32661
loss: 0.9728602766990662,grad_norm: 0.9219962845198574, iteration: 32662
loss: 1.0141282081604004,grad_norm: 0.9536394927960397, iteration: 32663
loss: 1.0610098838806152,grad_norm: 0.999999136535099, iteration: 32664
loss: 1.0333377122879028,grad_norm: 0.9268357473138888, iteration: 32665
loss: 1.0255072116851807,grad_norm: 0.8971087456195733, iteration: 32666
loss: 0.9895995855331421,grad_norm: 0.9107882967011057, iteration: 32667
loss: 0.9757557511329651,grad_norm: 0.9296116764621813, iteration: 32668
loss: 1.0158107280731201,grad_norm: 0.9332671407520687, iteration: 32669
loss: 0.9647533297538757,grad_norm: 0.8514535683575027, iteration: 32670
loss: 1.0139580965042114,grad_norm: 0.9698796492438397, iteration: 32671
loss: 1.0168896913528442,grad_norm: 0.9999991607005577, iteration: 32672
loss: 1.0161446332931519,grad_norm: 0.9999992453871465, iteration: 32673
loss: 1.0044664144515991,grad_norm: 0.97968400986976, iteration: 32674
loss: 1.0126079320907593,grad_norm: 0.9999992682061616, iteration: 32675
loss: 0.963055431842804,grad_norm: 0.8801444591531914, iteration: 32676
loss: 0.9549214839935303,grad_norm: 0.9413211969720247, iteration: 32677
loss: 0.9900619387626648,grad_norm: 0.8909685259602033, iteration: 32678
loss: 0.9898796677589417,grad_norm: 0.9267965833974593, iteration: 32679
loss: 1.0002739429473877,grad_norm: 0.9999993775057164, iteration: 32680
loss: 0.9870620965957642,grad_norm: 0.9935548632061877, iteration: 32681
loss: 0.9929656982421875,grad_norm: 0.8470498308846026, iteration: 32682
loss: 1.0109065771102905,grad_norm: 0.9813297483487349, iteration: 32683
loss: 1.0428764820098877,grad_norm: 0.9999992938172979, iteration: 32684
loss: 1.0290535688400269,grad_norm: 0.9999994062684233, iteration: 32685
loss: 0.9819920659065247,grad_norm: 0.8526392701306198, iteration: 32686
loss: 1.022360920906067,grad_norm: 0.9220606449015968, iteration: 32687
loss: 0.9938082695007324,grad_norm: 0.9999991646116382, iteration: 32688
loss: 0.9603599309921265,grad_norm: 0.9999990872646778, iteration: 32689
loss: 0.9865558743476868,grad_norm: 0.9999993088303174, iteration: 32690
loss: 1.0246407985687256,grad_norm: 0.9999990629921539, iteration: 32691
loss: 0.9938628673553467,grad_norm: 0.9883072787921794, iteration: 32692
loss: 0.9963398575782776,grad_norm: 0.907863473747204, iteration: 32693
loss: 1.0287374258041382,grad_norm: 0.802347440293099, iteration: 32694
loss: 0.9913017153739929,grad_norm: 0.9728668154915971, iteration: 32695
loss: 1.070070743560791,grad_norm: 0.9999992712070933, iteration: 32696
loss: 0.9637982249259949,grad_norm: 0.9622604559974359, iteration: 32697
loss: 1.0097376108169556,grad_norm: 0.9986512293542823, iteration: 32698
loss: 1.0077145099639893,grad_norm: 0.9956506353612852, iteration: 32699
loss: 1.006479263305664,grad_norm: 0.8838446804427604, iteration: 32700
loss: 1.009592056274414,grad_norm: 0.9999990588865119, iteration: 32701
loss: 1.0175893306732178,grad_norm: 0.9235745756664638, iteration: 32702
loss: 1.070783019065857,grad_norm: 0.9999991990134947, iteration: 32703
loss: 0.991015613079071,grad_norm: 0.9900062421820889, iteration: 32704
loss: 0.9836439490318298,grad_norm: 0.9999989406594173, iteration: 32705
loss: 1.0164499282836914,grad_norm: 0.9858032903032116, iteration: 32706
loss: 0.9883310198783875,grad_norm: 0.8368661363980449, iteration: 32707
loss: 0.9933869242668152,grad_norm: 0.9999991077630234, iteration: 32708
loss: 0.9505358338356018,grad_norm: 0.9999990164394346, iteration: 32709
loss: 1.00823175907135,grad_norm: 0.9953105926454907, iteration: 32710
loss: 0.9828060269355774,grad_norm: 0.9638638734404503, iteration: 32711
loss: 0.9871091246604919,grad_norm: 0.9999992740582383, iteration: 32712
loss: 0.9704515933990479,grad_norm: 0.9698534264395815, iteration: 32713
loss: 1.0007617473602295,grad_norm: 0.9999993749610964, iteration: 32714
loss: 0.9573292136192322,grad_norm: 0.9999991338844716, iteration: 32715
loss: 1.0397275686264038,grad_norm: 0.958756267949141, iteration: 32716
loss: 0.9776092767715454,grad_norm: 0.9999990683188127, iteration: 32717
loss: 0.9888273477554321,grad_norm: 0.99999921260162, iteration: 32718
loss: 0.9829111695289612,grad_norm: 0.8673439883787523, iteration: 32719
loss: 0.9996912479400635,grad_norm: 0.9999996375061257, iteration: 32720
loss: 0.9964283108711243,grad_norm: 0.9999989425696, iteration: 32721
loss: 1.0109518766403198,grad_norm: 0.9999990708603191, iteration: 32722
loss: 1.0231964588165283,grad_norm: 0.9999991598214112, iteration: 32723
loss: 1.0462805032730103,grad_norm: 0.9404794066649614, iteration: 32724
loss: 0.9692363739013672,grad_norm: 0.9999992507909663, iteration: 32725
loss: 0.9791368246078491,grad_norm: 0.9999991304661489, iteration: 32726
loss: 0.9862003922462463,grad_norm: 0.9272578509785201, iteration: 32727
loss: 1.005255937576294,grad_norm: 0.8896258750116641, iteration: 32728
loss: 0.969609260559082,grad_norm: 0.8144186225280415, iteration: 32729
loss: 0.9886124730110168,grad_norm: 0.8731447995814688, iteration: 32730
loss: 0.994235098361969,grad_norm: 0.790313833473773, iteration: 32731
loss: 1.0011883974075317,grad_norm: 0.9999992638266126, iteration: 32732
loss: 1.0247324705123901,grad_norm: 0.9999995526624019, iteration: 32733
loss: 1.012642741203308,grad_norm: 0.9185576189249751, iteration: 32734
loss: 1.0021214485168457,grad_norm: 0.8736262372341008, iteration: 32735
loss: 1.0192139148712158,grad_norm: 0.9831028879416033, iteration: 32736
loss: 1.0059788227081299,grad_norm: 0.9999991144487518, iteration: 32737
loss: 1.00016188621521,grad_norm: 0.9999990936239446, iteration: 32738
loss: 1.0001388788223267,grad_norm: 0.854186418375679, iteration: 32739
loss: 0.9977945685386658,grad_norm: 0.9324820244237226, iteration: 32740
loss: 1.0534083843231201,grad_norm: 0.9999995966868828, iteration: 32741
loss: 1.054347038269043,grad_norm: 0.9999991824752665, iteration: 32742
loss: 0.9939935207366943,grad_norm: 0.999999190514071, iteration: 32743
loss: 0.9982722997665405,grad_norm: 0.9608248864556713, iteration: 32744
loss: 1.049475073814392,grad_norm: 0.9999993329698039, iteration: 32745
loss: 1.0030096769332886,grad_norm: 0.999999748465951, iteration: 32746
loss: 0.9781131148338318,grad_norm: 0.9612459715195153, iteration: 32747
loss: 0.9753711819648743,grad_norm: 0.9999996642551826, iteration: 32748
loss: 1.0206319093704224,grad_norm: 0.9947456708182081, iteration: 32749
loss: 0.9935919642448425,grad_norm: 0.8880634304217343, iteration: 32750
loss: 0.9930638074874878,grad_norm: 0.9987693218475578, iteration: 32751
loss: 1.026877999305725,grad_norm: 0.9999990598290507, iteration: 32752
loss: 0.9932453632354736,grad_norm: 0.9999991531018729, iteration: 32753
loss: 0.9952247738838196,grad_norm: 0.999999434113492, iteration: 32754
loss: 1.0057001113891602,grad_norm: 0.9999993483481355, iteration: 32755
loss: 0.988631010055542,grad_norm: 0.9059984305963644, iteration: 32756
loss: 1.035441279411316,grad_norm: 0.9999990272730338, iteration: 32757
loss: 1.0063248872756958,grad_norm: 0.8978862397621568, iteration: 32758
loss: 1.0354697704315186,grad_norm: 0.9999996591834246, iteration: 32759
loss: 1.0163352489471436,grad_norm: 0.9999992421672979, iteration: 32760
loss: 0.9979294538497925,grad_norm: 0.9999992304231857, iteration: 32761
loss: 1.0524197816848755,grad_norm: 0.9999992625067022, iteration: 32762
loss: 0.962567150592804,grad_norm: 0.929431329708668, iteration: 32763
loss: 0.9981677532196045,grad_norm: 0.999999057454852, iteration: 32764
loss: 0.9575523734092712,grad_norm: 0.810155703374757, iteration: 32765
loss: 1.0025553703308105,grad_norm: 0.9999989908942023, iteration: 32766
loss: 1.0304936170578003,grad_norm: 0.9999990811363738, iteration: 32767
loss: 0.9688113927841187,grad_norm: 0.9855978417336355, iteration: 32768
loss: 1.001333475112915,grad_norm: 0.9999991135930272, iteration: 32769
loss: 1.0201574563980103,grad_norm: 0.9999991373595892, iteration: 32770
loss: 1.0134906768798828,grad_norm: 0.9424515571388123, iteration: 32771
loss: 0.9969465732574463,grad_norm: 0.9999989995679567, iteration: 32772
loss: 1.0137518644332886,grad_norm: 0.9999992017862296, iteration: 32773
loss: 1.0251449346542358,grad_norm: 0.9400922590924339, iteration: 32774
loss: 0.9982015490531921,grad_norm: 0.9035624400138509, iteration: 32775
loss: 0.9769100546836853,grad_norm: 0.9578509599291274, iteration: 32776
loss: 0.986561119556427,grad_norm: 0.8940945178279253, iteration: 32777
loss: 1.0095759630203247,grad_norm: 0.9834598790561957, iteration: 32778
loss: 1.0208022594451904,grad_norm: 0.807627753596147, iteration: 32779
loss: 1.0028035640716553,grad_norm: 0.999999160551161, iteration: 32780
loss: 1.0665228366851807,grad_norm: 0.8932010494760829, iteration: 32781
loss: 0.9734120965003967,grad_norm: 0.9999990284157373, iteration: 32782
loss: 1.036515712738037,grad_norm: 0.9999995162285298, iteration: 32783
loss: 0.99269700050354,grad_norm: 0.8031357591757708, iteration: 32784
loss: 1.0356292724609375,grad_norm: 0.7810848915380795, iteration: 32785
loss: 1.0072102546691895,grad_norm: 0.9999991559949202, iteration: 32786
loss: 1.0358763933181763,grad_norm: 0.9720908654311489, iteration: 32787
loss: 0.9805343151092529,grad_norm: 0.999999014313557, iteration: 32788
loss: 1.012935757637024,grad_norm: 0.9999991350709978, iteration: 32789
loss: 1.0106842517852783,grad_norm: 0.932416112277285, iteration: 32790
loss: 0.9868271946907043,grad_norm: 0.9999991529157203, iteration: 32791
loss: 0.9534544348716736,grad_norm: 0.9660699107919369, iteration: 32792
loss: 1.048854947090149,grad_norm: 0.9637254841779308, iteration: 32793
loss: 1.009454369544983,grad_norm: 0.8624247962549073, iteration: 32794
loss: 0.9960371255874634,grad_norm: 0.7364008742784797, iteration: 32795
loss: 1.0168594121932983,grad_norm: 0.9644111893359929, iteration: 32796
loss: 1.0041438341140747,grad_norm: 0.9999992799141996, iteration: 32797
loss: 0.9762074947357178,grad_norm: 0.8393305818930874, iteration: 32798
loss: 1.005335807800293,grad_norm: 0.7740978544477465, iteration: 32799
loss: 1.0000325441360474,grad_norm: 0.9999993184863063, iteration: 32800
loss: 1.0939661264419556,grad_norm: 0.9999997079469216, iteration: 32801
loss: 1.0767066478729248,grad_norm: 0.9999995752949224, iteration: 32802
loss: 0.9756149649620056,grad_norm: 0.999999713471881, iteration: 32803
loss: 1.0536223649978638,grad_norm: 0.9999995978698109, iteration: 32804
loss: 1.0011727809906006,grad_norm: 0.9765143208348429, iteration: 32805
loss: 0.9972787499427795,grad_norm: 0.8109444843926132, iteration: 32806
loss: 1.0369751453399658,grad_norm: 0.9999996236345781, iteration: 32807
loss: 0.9982609748840332,grad_norm: 0.8139323933799704, iteration: 32808
loss: 0.9975234866142273,grad_norm: 0.9999990512290429, iteration: 32809
loss: 1.0095065832138062,grad_norm: 0.9999991624202123, iteration: 32810
loss: 0.9765785336494446,grad_norm: 0.9894645900045465, iteration: 32811
loss: 0.9935714602470398,grad_norm: 0.8203912489356237, iteration: 32812
loss: 1.0112557411193848,grad_norm: 0.8558174328074085, iteration: 32813
loss: 0.9966996908187866,grad_norm: 0.9331337946587761, iteration: 32814
loss: 1.001822590827942,grad_norm: 0.9999990368757706, iteration: 32815
loss: 1.0022337436676025,grad_norm: 0.9154921743638915, iteration: 32816
loss: 1.0307222604751587,grad_norm: 0.9999991154173209, iteration: 32817
loss: 1.0663248300552368,grad_norm: 0.9999995301325756, iteration: 32818
loss: 1.032441258430481,grad_norm: 0.818204345008472, iteration: 32819
loss: 0.9979393482208252,grad_norm: 0.8387449003122189, iteration: 32820
loss: 1.006739616394043,grad_norm: 0.9999991395057782, iteration: 32821
loss: 1.0140728950500488,grad_norm: 0.8281643506343481, iteration: 32822
loss: 1.0520023107528687,grad_norm: 0.9999992288807455, iteration: 32823
loss: 1.0523732900619507,grad_norm: 0.999999276526232, iteration: 32824
loss: 1.0064105987548828,grad_norm: 0.9999995067129456, iteration: 32825
loss: 1.0088080167770386,grad_norm: 0.9999993463901251, iteration: 32826
loss: 0.982070803642273,grad_norm: 0.999999359874165, iteration: 32827
loss: 1.0136842727661133,grad_norm: 0.9757733435948839, iteration: 32828
loss: 1.0553430318832397,grad_norm: 0.9242397166912658, iteration: 32829
loss: 0.9580928087234497,grad_norm: 0.8714130363454767, iteration: 32830
loss: 0.9981221556663513,grad_norm: 0.9278278661604852, iteration: 32831
loss: 1.0074269771575928,grad_norm: 0.9999999270728641, iteration: 32832
loss: 1.0729382038116455,grad_norm: 0.99999955662345, iteration: 32833
loss: 1.0014033317565918,grad_norm: 0.9999994604307678, iteration: 32834
loss: 1.010098934173584,grad_norm: 0.9209764051045977, iteration: 32835
loss: 1.0081278085708618,grad_norm: 0.9999991989009248, iteration: 32836
loss: 1.0097397565841675,grad_norm: 0.8920211104198263, iteration: 32837
loss: 0.9706854820251465,grad_norm: 0.9999990270954247, iteration: 32838
loss: 1.0220024585723877,grad_norm: 0.9999990964054614, iteration: 32839
loss: 1.028958797454834,grad_norm: 0.9999992464476624, iteration: 32840
loss: 1.0065290927886963,grad_norm: 0.9999991252844631, iteration: 32841
loss: 0.9910820126533508,grad_norm: 0.9398227132847058, iteration: 32842
loss: 1.0042554140090942,grad_norm: 0.9999999001046299, iteration: 32843
loss: 1.045091152191162,grad_norm: 0.9165796201854816, iteration: 32844
loss: 1.0239505767822266,grad_norm: 0.9432212068789237, iteration: 32845
loss: 1.0352457761764526,grad_norm: 0.9377953746647293, iteration: 32846
loss: 0.9408519268035889,grad_norm: 0.9999990513188795, iteration: 32847
loss: 0.9982877969741821,grad_norm: 0.9999991702999171, iteration: 32848
loss: 1.0083179473876953,grad_norm: 0.9197410767830952, iteration: 32849
loss: 1.0282155275344849,grad_norm: 0.9999993228161805, iteration: 32850
loss: 1.0084640979766846,grad_norm: 0.9999994434150434, iteration: 32851
loss: 1.010072112083435,grad_norm: 0.959281986275764, iteration: 32852
loss: 1.0083376169204712,grad_norm: 0.999999157069399, iteration: 32853
loss: 1.0119627714157104,grad_norm: 0.9700024092097012, iteration: 32854
loss: 1.0052320957183838,grad_norm: 0.899859626593864, iteration: 32855
loss: 0.9840798377990723,grad_norm: 0.9999992277735066, iteration: 32856
loss: 0.9829192757606506,grad_norm: 0.8973541609528882, iteration: 32857
loss: 1.0273555517196655,grad_norm: 0.9325939981461858, iteration: 32858
loss: 0.9777150750160217,grad_norm: 0.9999992292985662, iteration: 32859
loss: 1.008927822113037,grad_norm: 0.9942047277806936, iteration: 32860
loss: 1.004655122756958,grad_norm: 0.9999994066853918, iteration: 32861
loss: 1.006893277168274,grad_norm: 0.9986041687604751, iteration: 32862
loss: 1.0070875883102417,grad_norm: 0.9377509413773846, iteration: 32863
loss: 1.022229552268982,grad_norm: 0.9999991335898717, iteration: 32864
loss: 1.0143147706985474,grad_norm: 0.945505409311843, iteration: 32865
loss: 1.0597723722457886,grad_norm: 0.9999990731504151, iteration: 32866
loss: 1.0233968496322632,grad_norm: 0.8219455066210046, iteration: 32867
loss: 0.9677750468254089,grad_norm: 0.9655402969193777, iteration: 32868
loss: 1.0455840826034546,grad_norm: 0.9999998809227074, iteration: 32869
loss: 0.9832163453102112,grad_norm: 0.8060657339069446, iteration: 32870
loss: 1.0041954517364502,grad_norm: 0.9999991484443089, iteration: 32871
loss: 0.9849671721458435,grad_norm: 0.9999990028836283, iteration: 32872
loss: 1.005839228630066,grad_norm: 0.9999992276887798, iteration: 32873
loss: 0.9793309569358826,grad_norm: 0.9744747157212887, iteration: 32874
loss: 1.033808946609497,grad_norm: 0.9999993840899144, iteration: 32875
loss: 1.0389755964279175,grad_norm: 0.9111921172768431, iteration: 32876
loss: 1.0286650657653809,grad_norm: 0.9999991580917961, iteration: 32877
loss: 0.9881687760353088,grad_norm: 0.8535548153498774, iteration: 32878
loss: 0.9784078598022461,grad_norm: 0.9804271987719072, iteration: 32879
loss: 0.9827431440353394,grad_norm: 0.999999067227096, iteration: 32880
loss: 0.9825258255004883,grad_norm: 0.9169682389637246, iteration: 32881
loss: 1.0250327587127686,grad_norm: 0.9999992213212912, iteration: 32882
loss: 1.037663221359253,grad_norm: 0.8620074819425603, iteration: 32883
loss: 1.0094785690307617,grad_norm: 0.8542613904143276, iteration: 32884
loss: 1.016580581665039,grad_norm: 0.9999991087872024, iteration: 32885
loss: 1.071649432182312,grad_norm: 0.9999998805128152, iteration: 32886
loss: 0.9971648454666138,grad_norm: 0.9999990534428583, iteration: 32887
loss: 1.0556057691574097,grad_norm: 0.9999993797955031, iteration: 32888
loss: 1.019325613975525,grad_norm: 0.9999991010597085, iteration: 32889
loss: 0.9928662776947021,grad_norm: 0.7911670805960158, iteration: 32890
loss: 1.0114028453826904,grad_norm: 0.9999990817682358, iteration: 32891
loss: 1.001495361328125,grad_norm: 0.9268891073137692, iteration: 32892
loss: 1.040641188621521,grad_norm: 0.9174753983373863, iteration: 32893
loss: 0.937932014465332,grad_norm: 0.9999990660607461, iteration: 32894
loss: 1.004371166229248,grad_norm: 0.9999990682380164, iteration: 32895
loss: 0.9901722073554993,grad_norm: 0.9999991713618639, iteration: 32896
loss: 1.0139981508255005,grad_norm: 0.9374679206473419, iteration: 32897
loss: 1.0308400392532349,grad_norm: 0.9084574488851255, iteration: 32898
loss: 0.9974871873855591,grad_norm: 0.7606664964574903, iteration: 32899
loss: 1.0045745372772217,grad_norm: 0.7958434866930113, iteration: 32900
loss: 0.9900393486022949,grad_norm: 0.9999992374642755, iteration: 32901
loss: 1.017332911491394,grad_norm: 0.9999991776915145, iteration: 32902
loss: 1.0210052728652954,grad_norm: 0.9999990880735451, iteration: 32903
loss: 0.9789997935295105,grad_norm: 0.8982227027274314, iteration: 32904
loss: 1.0189106464385986,grad_norm: 0.9999992057036204, iteration: 32905
loss: 1.0191773176193237,grad_norm: 0.9575944992056846, iteration: 32906
loss: 0.9967300295829773,grad_norm: 0.9999991347920677, iteration: 32907
loss: 0.9908636808395386,grad_norm: 0.9835316286923147, iteration: 32908
loss: 1.0068974494934082,grad_norm: 0.8289404455250418, iteration: 32909
loss: 1.0120635032653809,grad_norm: 0.9999990997398768, iteration: 32910
loss: 0.991515040397644,grad_norm: 0.925813398699411, iteration: 32911
loss: 1.0108615159988403,grad_norm: 0.9999990516044038, iteration: 32912
loss: 1.0230449438095093,grad_norm: 0.9999990783335294, iteration: 32913
loss: 0.9968190789222717,grad_norm: 0.9999990576806334, iteration: 32914
loss: 1.0049623250961304,grad_norm: 0.9615666985251715, iteration: 32915
loss: 0.9904915690422058,grad_norm: 0.9921837384969288, iteration: 32916
loss: 0.9972861409187317,grad_norm: 0.9014464264681707, iteration: 32917
loss: 1.0084937810897827,grad_norm: 0.9999992824503909, iteration: 32918
loss: 1.0212278366088867,grad_norm: 0.9699762429848287, iteration: 32919
loss: 1.0006767511367798,grad_norm: 0.9414118866374767, iteration: 32920
loss: 1.030862808227539,grad_norm: 0.9999991557970648, iteration: 32921
loss: 0.9588184952735901,grad_norm: 0.8838047380365981, iteration: 32922
loss: 1.0025660991668701,grad_norm: 0.9893522378802517, iteration: 32923
loss: 0.9975971579551697,grad_norm: 0.8895669082264126, iteration: 32924
loss: 1.0194761753082275,grad_norm: 0.9999989730330207, iteration: 32925
loss: 0.9804251194000244,grad_norm: 0.9419569734905324, iteration: 32926
loss: 0.9905482530593872,grad_norm: 0.9667118588390091, iteration: 32927
loss: 1.0134536027908325,grad_norm: 0.9388103932077941, iteration: 32928
loss: 1.0229912996292114,grad_norm: 0.9793899507370146, iteration: 32929
loss: 1.0707509517669678,grad_norm: 1.0000000038224368, iteration: 32930
loss: 0.9949150681495667,grad_norm: 0.999999179657428, iteration: 32931
loss: 1.0076807737350464,grad_norm: 0.9540856134610033, iteration: 32932
loss: 1.05844247341156,grad_norm: 0.9999993049017813, iteration: 32933
loss: 0.9821175336837769,grad_norm: 0.8771341991256417, iteration: 32934
loss: 0.9861600995063782,grad_norm: 0.9999991195049227, iteration: 32935
loss: 1.0472420454025269,grad_norm: 0.9999991629763985, iteration: 32936
loss: 0.991637110710144,grad_norm: 0.8787980403796358, iteration: 32937
loss: 1.0153220891952515,grad_norm: 0.9999991440032143, iteration: 32938
loss: 1.0310512781143188,grad_norm: 0.8730604975056989, iteration: 32939
loss: 1.0556427240371704,grad_norm: 0.9999989466356136, iteration: 32940
loss: 0.9954842329025269,grad_norm: 0.9780403964983746, iteration: 32941
loss: 0.981634259223938,grad_norm: 0.7731128388288282, iteration: 32942
loss: 1.0230650901794434,grad_norm: 0.999999175090268, iteration: 32943
loss: 1.0038857460021973,grad_norm: 0.9117603700471105, iteration: 32944
loss: 0.978832483291626,grad_norm: 0.91312063285227, iteration: 32945
loss: 0.9938799738883972,grad_norm: 0.8752060886848905, iteration: 32946
loss: 1.048117756843567,grad_norm: 0.8187250136023265, iteration: 32947
loss: 1.0408536195755005,grad_norm: 0.9999991366388377, iteration: 32948
loss: 1.018625259399414,grad_norm: 0.9999989721775312, iteration: 32949
loss: 1.0275150537490845,grad_norm: 0.9999990716923808, iteration: 32950
loss: 1.0117770433425903,grad_norm: 0.9999991065618573, iteration: 32951
loss: 0.9986816644668579,grad_norm: 0.999999180375737, iteration: 32952
loss: 1.0410850048065186,grad_norm: 0.9999990721815897, iteration: 32953
loss: 1.014263391494751,grad_norm: 0.9999993670774746, iteration: 32954
loss: 1.0220550298690796,grad_norm: 0.9999990379118385, iteration: 32955
loss: 1.0129477977752686,grad_norm: 0.9999990577545728, iteration: 32956
loss: 1.0059012174606323,grad_norm: 0.9999990838538246, iteration: 32957
loss: 0.9681475162506104,grad_norm: 0.8444661454378133, iteration: 32958
loss: 1.0185047388076782,grad_norm: 0.8547274791898584, iteration: 32959
loss: 0.9942159056663513,grad_norm: 0.8440707936394032, iteration: 32960
loss: 1.004111647605896,grad_norm: 0.8762432377048026, iteration: 32961
loss: 1.0295703411102295,grad_norm: 0.8471998260199709, iteration: 32962
loss: 0.9990406632423401,grad_norm: 0.9373701457706956, iteration: 32963
loss: 1.0140931606292725,grad_norm: 0.9999991704681646, iteration: 32964
loss: 1.018200397491455,grad_norm: 0.9782920458051584, iteration: 32965
loss: 1.0567667484283447,grad_norm: 0.990118310613817, iteration: 32966
loss: 0.9905971884727478,grad_norm: 0.820656190430138, iteration: 32967
loss: 0.9775161743164062,grad_norm: 0.9337344385202868, iteration: 32968
loss: 1.0092625617980957,grad_norm: 0.9999991471706483, iteration: 32969
loss: 0.9876243472099304,grad_norm: 0.9999992823664146, iteration: 32970
loss: 1.0114847421646118,grad_norm: 0.9999991347742857, iteration: 32971
loss: 1.0215591192245483,grad_norm: 0.9999989596127414, iteration: 32972
loss: 0.9502034783363342,grad_norm: 0.9999990516611665, iteration: 32973
loss: 1.0342779159545898,grad_norm: 0.8880540811004815, iteration: 32974
loss: 0.9668712019920349,grad_norm: 0.9231261285104797, iteration: 32975
loss: 1.0154918432235718,grad_norm: 0.8125395699854474, iteration: 32976
loss: 1.0435110330581665,grad_norm: 0.9999990253673556, iteration: 32977
loss: 1.008919596672058,grad_norm: 0.9999991816114547, iteration: 32978
loss: 0.9684127569198608,grad_norm: 0.9763262678262601, iteration: 32979
loss: 0.9401211738586426,grad_norm: 0.9566213264456176, iteration: 32980
loss: 1.0131523609161377,grad_norm: 0.9999991575666366, iteration: 32981
loss: 1.0071070194244385,grad_norm: 0.8665414700219037, iteration: 32982
loss: 0.9855152368545532,grad_norm: 0.9308801317333854, iteration: 32983
loss: 0.9509692788124084,grad_norm: 0.8523462235476975, iteration: 32984
loss: 1.0360487699508667,grad_norm: 0.9999995413290885, iteration: 32985
loss: 0.9985509514808655,grad_norm: 0.9999990847816101, iteration: 32986
loss: 1.0399061441421509,grad_norm: 0.9999991052969893, iteration: 32987
loss: 1.019619345664978,grad_norm: 0.9999995593722472, iteration: 32988
loss: 1.0011004209518433,grad_norm: 0.8938998078651451, iteration: 32989
loss: 0.9879915118217468,grad_norm: 0.9396040657019578, iteration: 32990
loss: 1.04002845287323,grad_norm: 0.999999178226223, iteration: 32991
loss: 0.9947990775108337,grad_norm: 0.8585813588746908, iteration: 32992
loss: 1.0351014137268066,grad_norm: 0.8661439127334001, iteration: 32993
loss: 0.9904321432113647,grad_norm: 0.9999996334349602, iteration: 32994
loss: 1.0712896585464478,grad_norm: 0.9999993708507213, iteration: 32995
loss: 1.0300880670547485,grad_norm: 0.9999994090555705, iteration: 32996
loss: 1.029101014137268,grad_norm: 0.9278229880328683, iteration: 32997
loss: 0.9736442565917969,grad_norm: 0.9999991515215043, iteration: 32998
loss: 1.0069984197616577,grad_norm: 0.8763109663005195, iteration: 32999
loss: 0.9748657941818237,grad_norm: 0.8370983190923524, iteration: 33000
loss: 1.02953040599823,grad_norm: 0.9999990752558563, iteration: 33001
loss: 1.0164940357208252,grad_norm: 0.9654821878890266, iteration: 33002
loss: 0.9672390222549438,grad_norm: 0.9999992356395032, iteration: 33003
loss: 0.9501248002052307,grad_norm: 0.9999991387656185, iteration: 33004
loss: 0.997863233089447,grad_norm: 0.9999990922836697, iteration: 33005
loss: 1.0044758319854736,grad_norm: 0.7655747420243171, iteration: 33006
loss: 1.0155795812606812,grad_norm: 0.9999991077438874, iteration: 33007
loss: 1.0182781219482422,grad_norm: 0.9999995398980578, iteration: 33008
loss: 0.9823984503746033,grad_norm: 0.9054692714384389, iteration: 33009
loss: 0.9654037356376648,grad_norm: 0.967456547146419, iteration: 33010
loss: 0.9738211631774902,grad_norm: 0.8709768936237543, iteration: 33011
loss: 1.0364078283309937,grad_norm: 0.9514760239402037, iteration: 33012
loss: 1.009962797164917,grad_norm: 0.9610078636635043, iteration: 33013
loss: 1.0149247646331787,grad_norm: 0.940991728995279, iteration: 33014
loss: 1.0040594339370728,grad_norm: 0.999999036014547, iteration: 33015
loss: 0.9966971278190613,grad_norm: 0.9999991401848919, iteration: 33016
loss: 1.0301296710968018,grad_norm: 0.9999991724590603, iteration: 33017
loss: 1.011946678161621,grad_norm: 0.9999990300530003, iteration: 33018
loss: 1.0185914039611816,grad_norm: 0.8463751335460965, iteration: 33019
loss: 0.951954185962677,grad_norm: 0.8875281855218821, iteration: 33020
loss: 1.0028914213180542,grad_norm: 0.8800472337845114, iteration: 33021
loss: 1.0092310905456543,grad_norm: 0.8939515541894514, iteration: 33022
loss: 1.0277658700942993,grad_norm: 0.9999989872989444, iteration: 33023
loss: 0.9786762595176697,grad_norm: 0.7737520311004318, iteration: 33024
loss: 0.9755352735519409,grad_norm: 0.9999991380527729, iteration: 33025
loss: 1.013590931892395,grad_norm: 0.9903665683370658, iteration: 33026
loss: 0.987572193145752,grad_norm: 0.999999153633773, iteration: 33027
loss: 0.982994794845581,grad_norm: 0.8672335225339166, iteration: 33028
loss: 1.029518485069275,grad_norm: 0.9999990240036812, iteration: 33029
loss: 1.0640214681625366,grad_norm: 0.9999990491735419, iteration: 33030
loss: 0.9852760434150696,grad_norm: 0.9999990768575565, iteration: 33031
loss: 0.9979498386383057,grad_norm: 0.9999990492015731, iteration: 33032
loss: 1.0333056449890137,grad_norm: 0.9999994514446843, iteration: 33033
loss: 1.0126769542694092,grad_norm: 0.9356829170119718, iteration: 33034
loss: 1.0178698301315308,grad_norm: 0.84201271914076, iteration: 33035
loss: 0.9726148247718811,grad_norm: 0.9929858543840676, iteration: 33036
loss: 1.0382318496704102,grad_norm: 0.9999990605330938, iteration: 33037
loss: 0.9850128293037415,grad_norm: 0.9896289177087015, iteration: 33038
loss: 1.0018112659454346,grad_norm: 0.9501292532118996, iteration: 33039
loss: 1.0123484134674072,grad_norm: 0.9999990253040831, iteration: 33040
loss: 0.9768646359443665,grad_norm: 0.9999990668674791, iteration: 33041
loss: 1.028429388999939,grad_norm: 0.9999989987277554, iteration: 33042
loss: 0.9797894358634949,grad_norm: 0.9999992584593886, iteration: 33043
loss: 0.9742313623428345,grad_norm: 0.8778356579262668, iteration: 33044
loss: 0.9872717261314392,grad_norm: 0.8783089665535754, iteration: 33045
loss: 0.9884611368179321,grad_norm: 0.9235035313710857, iteration: 33046
loss: 1.0206148624420166,grad_norm: 0.917368040473159, iteration: 33047
loss: 1.0182561874389648,grad_norm: 0.9102469576096991, iteration: 33048
loss: 0.9907858371734619,grad_norm: 0.9607816425174112, iteration: 33049
loss: 1.0361669063568115,grad_norm: 0.9075222451453474, iteration: 33050
loss: 1.0305200815200806,grad_norm: 0.9999991345699584, iteration: 33051
loss: 1.0253477096557617,grad_norm: 0.7956234307975634, iteration: 33052
loss: 1.0345221757888794,grad_norm: 0.8972237120539759, iteration: 33053
loss: 1.0076777935028076,grad_norm: 0.9999990472603382, iteration: 33054
loss: 0.9913622140884399,grad_norm: 0.9919542441418796, iteration: 33055
loss: 1.0000758171081543,grad_norm: 0.964553151619612, iteration: 33056
loss: 0.9923815727233887,grad_norm: 0.8321924692812552, iteration: 33057
loss: 1.0130250453948975,grad_norm: 0.9999998890915579, iteration: 33058
loss: 0.9843511581420898,grad_norm: 0.9999988826896258, iteration: 33059
loss: 1.0174660682678223,grad_norm: 0.9999991711661054, iteration: 33060
loss: 1.016802430152893,grad_norm: 0.9488331740410813, iteration: 33061
loss: 0.9978625178337097,grad_norm: 0.9999991091398047, iteration: 33062
loss: 0.99953293800354,grad_norm: 0.9999990725020342, iteration: 33063
loss: 1.038447618484497,grad_norm: 0.9999991408263734, iteration: 33064
loss: 1.023904800415039,grad_norm: 0.8652199701193062, iteration: 33065
loss: 0.9887849688529968,grad_norm: 0.9619037654519027, iteration: 33066
loss: 1.004095435142517,grad_norm: 0.9999991748063284, iteration: 33067
loss: 1.0131714344024658,grad_norm: 0.9999991315378005, iteration: 33068
loss: 1.0187524557113647,grad_norm: 0.90905451168657, iteration: 33069
loss: 0.9983658194541931,grad_norm: 0.920407496856078, iteration: 33070
loss: 1.023991346359253,grad_norm: 0.9999993308253463, iteration: 33071
loss: 1.0023295879364014,grad_norm: 0.9999992765053893, iteration: 33072
loss: 1.0015407800674438,grad_norm: 0.9548032278431237, iteration: 33073
loss: 1.0214402675628662,grad_norm: 0.9999992814514864, iteration: 33074
loss: 0.9876629114151001,grad_norm: 0.8959058747850835, iteration: 33075
loss: 1.008489727973938,grad_norm: 0.9999991539730466, iteration: 33076
loss: 1.0147830247879028,grad_norm: 0.9999994065520821, iteration: 33077
loss: 0.9798248410224915,grad_norm: 0.9127742418620852, iteration: 33078
loss: 1.0426312685012817,grad_norm: 0.9999991825013794, iteration: 33079
loss: 0.9733964800834656,grad_norm: 0.8394441035413063, iteration: 33080
loss: 1.0084563493728638,grad_norm: 0.9720309752592319, iteration: 33081
loss: 1.0094305276870728,grad_norm: 0.9999990636404699, iteration: 33082
loss: 1.0107802152633667,grad_norm: 0.9114761449287951, iteration: 33083
loss: 0.9680604338645935,grad_norm: 0.9123829775500399, iteration: 33084
loss: 0.9828699231147766,grad_norm: 0.8881891897184301, iteration: 33085
loss: 1.0027556419372559,grad_norm: 0.8975331505079677, iteration: 33086
loss: 1.0110546350479126,grad_norm: 0.8356134563464819, iteration: 33087
loss: 0.972008466720581,grad_norm: 0.9382822058304139, iteration: 33088
loss: 0.9751697778701782,grad_norm: 0.9999992858294636, iteration: 33089
loss: 0.9802679419517517,grad_norm: 0.8694471295549011, iteration: 33090
loss: 0.9923512935638428,grad_norm: 0.9999990398283172, iteration: 33091
loss: 1.0139403343200684,grad_norm: 0.9999991187087196, iteration: 33092
loss: 1.0272092819213867,grad_norm: 0.9999990601806965, iteration: 33093
loss: 1.0027828216552734,grad_norm: 0.8691190237865295, iteration: 33094
loss: 0.9927459359169006,grad_norm: 0.9999995324704627, iteration: 33095
loss: 1.0229251384735107,grad_norm: 0.9999990669953278, iteration: 33096
loss: 1.021590232849121,grad_norm: 0.9999990698052444, iteration: 33097
loss: 0.9975535869598389,grad_norm: 0.9999993771623243, iteration: 33098
loss: 0.9622270464897156,grad_norm: 0.9830865932231075, iteration: 33099
loss: 1.0251773595809937,grad_norm: 0.9999991441420897, iteration: 33100
loss: 0.995089590549469,grad_norm: 0.9999991138437041, iteration: 33101
loss: 0.9738602042198181,grad_norm: 0.9963041158190704, iteration: 33102
loss: 1.0371639728546143,grad_norm: 0.9999992218802448, iteration: 33103
loss: 0.9825939536094666,grad_norm: 0.8568964044522506, iteration: 33104
loss: 1.0368679761886597,grad_norm: 0.9999990946512883, iteration: 33105
loss: 1.043961763381958,grad_norm: 0.9999993007742848, iteration: 33106
loss: 1.0026988983154297,grad_norm: 0.9999992048445907, iteration: 33107
loss: 1.0058181285858154,grad_norm: 0.9999994291832154, iteration: 33108
loss: 0.9978975653648376,grad_norm: 0.9492357193190117, iteration: 33109
loss: 0.9750936627388,grad_norm: 0.9465248039572268, iteration: 33110
loss: 0.9785782694816589,grad_norm: 0.960377463763263, iteration: 33111
loss: 1.0120333433151245,grad_norm: 0.9999990271905377, iteration: 33112
loss: 1.027869462966919,grad_norm: 0.9999990322455545, iteration: 33113
loss: 1.0449084043502808,grad_norm: 0.999999178061146, iteration: 33114
loss: 1.0208215713500977,grad_norm: 0.9472722303868838, iteration: 33115
loss: 0.9926155209541321,grad_norm: 0.7515368645894602, iteration: 33116
loss: 1.0193097591400146,grad_norm: 0.9999994601883939, iteration: 33117
loss: 1.0085750818252563,grad_norm: 0.9281001970275152, iteration: 33118
loss: 1.0248239040374756,grad_norm: 0.999999151426453, iteration: 33119
loss: 1.0080020427703857,grad_norm: 0.9352782674256817, iteration: 33120
loss: 1.035850167274475,grad_norm: 0.9999991720878382, iteration: 33121
loss: 1.011080265045166,grad_norm: 0.9999991061220675, iteration: 33122
loss: 1.0130819082260132,grad_norm: 0.9999993924512763, iteration: 33123
loss: 1.0087826251983643,grad_norm: 0.9625046328531658, iteration: 33124
loss: 1.0107587575912476,grad_norm: 0.9999992138049881, iteration: 33125
loss: 0.985872745513916,grad_norm: 0.9999989394220357, iteration: 33126
loss: 1.0089503526687622,grad_norm: 0.9999992357680085, iteration: 33127
loss: 1.047753930091858,grad_norm: 0.8805178583820283, iteration: 33128
loss: 0.9940927624702454,grad_norm: 0.9999991411474758, iteration: 33129
loss: 1.0213959217071533,grad_norm: 0.9999993604211275, iteration: 33130
loss: 1.0139507055282593,grad_norm: 0.9999992125234468, iteration: 33131
loss: 0.9965018630027771,grad_norm: 0.999999118159051, iteration: 33132
loss: 0.9763304591178894,grad_norm: 0.9199739874058376, iteration: 33133
loss: 0.9977625012397766,grad_norm: 0.9114683246789476, iteration: 33134
loss: 1.0262454748153687,grad_norm: 0.9999991401325709, iteration: 33135
loss: 0.9860996603965759,grad_norm: 0.9999990570005821, iteration: 33136
loss: 0.9916772842407227,grad_norm: 0.9999991031576534, iteration: 33137
loss: 0.9761806726455688,grad_norm: 0.9999991561032088, iteration: 33138
loss: 1.0267302989959717,grad_norm: 0.9999992761466447, iteration: 33139
loss: 0.9917989373207092,grad_norm: 0.9999992097099192, iteration: 33140
loss: 1.0310895442962646,grad_norm: 0.9906898293955297, iteration: 33141
loss: 1.0458182096481323,grad_norm: 0.9999991633384312, iteration: 33142
loss: 1.037053108215332,grad_norm: 0.9999992319472193, iteration: 33143
loss: 0.9842051267623901,grad_norm: 0.9999990260923557, iteration: 33144
loss: 1.0067532062530518,grad_norm: 0.9999991587471376, iteration: 33145
loss: 1.0086160898208618,grad_norm: 0.8234847939826705, iteration: 33146
loss: 1.0152232646942139,grad_norm: 0.8823000587193209, iteration: 33147
loss: 0.9751639366149902,grad_norm: 0.9835752259588124, iteration: 33148
loss: 1.0032001733779907,grad_norm: 0.8136041676675267, iteration: 33149
loss: 0.9936477541923523,grad_norm: 0.9999991017429358, iteration: 33150
loss: 0.9743027687072754,grad_norm: 0.9999990714926654, iteration: 33151
loss: 1.0511363744735718,grad_norm: 0.9960195622839455, iteration: 33152
loss: 1.0082169771194458,grad_norm: 0.9103760827000614, iteration: 33153
loss: 1.01681387424469,grad_norm: 0.7820732412775507, iteration: 33154
loss: 0.985186755657196,grad_norm: 0.999999159014466, iteration: 33155
loss: 1.0145318508148193,grad_norm: 0.9999992230836657, iteration: 33156
loss: 1.0103849172592163,grad_norm: 0.9999992458443343, iteration: 33157
loss: 1.0463052988052368,grad_norm: 0.9999995736499359, iteration: 33158
loss: 1.0047171115875244,grad_norm: 0.9438807153866137, iteration: 33159
loss: 1.0032373666763306,grad_norm: 0.9421049562343238, iteration: 33160
loss: 1.0299327373504639,grad_norm: 0.999999603881108, iteration: 33161
loss: 1.0211490392684937,grad_norm: 0.9999991069908419, iteration: 33162
loss: 1.0006768703460693,grad_norm: 0.8404690810077279, iteration: 33163
loss: 1.0010693073272705,grad_norm: 0.9550429663499294, iteration: 33164
loss: 1.0073785781860352,grad_norm: 0.9999992404575716, iteration: 33165
loss: 1.0106650590896606,grad_norm: 0.9516261093616561, iteration: 33166
loss: 0.9967843294143677,grad_norm: 0.9999990473114533, iteration: 33167
loss: 1.0201208591461182,grad_norm: 0.8500044784628286, iteration: 33168
loss: 1.037062406539917,grad_norm: 0.9999990423980423, iteration: 33169
loss: 0.9938146471977234,grad_norm: 0.8678101330276006, iteration: 33170
loss: 1.0157532691955566,grad_norm: 0.9999989932700775, iteration: 33171
loss: 1.0007004737854004,grad_norm: 0.9547439249641393, iteration: 33172
loss: 1.0622093677520752,grad_norm: 0.9008509651058243, iteration: 33173
loss: 0.9820020794868469,grad_norm: 0.9999993634577063, iteration: 33174
loss: 0.9625656008720398,grad_norm: 0.9999990875744162, iteration: 33175
loss: 0.987654983997345,grad_norm: 0.999999272503673, iteration: 33176
loss: 1.0189495086669922,grad_norm: 0.9224702451689162, iteration: 33177
loss: 0.9965816140174866,grad_norm: 0.9999992346728194, iteration: 33178
loss: 1.0058101415634155,grad_norm: 0.9999989963975624, iteration: 33179
loss: 1.0247958898544312,grad_norm: 0.999999219344605, iteration: 33180
loss: 1.0249868631362915,grad_norm: 0.987566959812435, iteration: 33181
loss: 0.986916184425354,grad_norm: 0.8944965404427798, iteration: 33182
loss: 1.043226957321167,grad_norm: 0.9999991359878263, iteration: 33183
loss: 0.9766006469726562,grad_norm: 0.9537875300217251, iteration: 33184
loss: 1.0112049579620361,grad_norm: 0.999999329716821, iteration: 33185
loss: 1.0029884576797485,grad_norm: 0.9997786242493479, iteration: 33186
loss: 1.0206701755523682,grad_norm: 0.8523546978088983, iteration: 33187
loss: 0.988271176815033,grad_norm: 0.981873202776566, iteration: 33188
loss: 1.005149245262146,grad_norm: 0.9959239960972031, iteration: 33189
loss: 1.0074489116668701,grad_norm: 0.9553454391597392, iteration: 33190
loss: 1.0215529203414917,grad_norm: 0.9999990997859552, iteration: 33191
loss: 1.0087953805923462,grad_norm: 0.844137613317742, iteration: 33192
loss: 0.998458743095398,grad_norm: 0.9456077716602752, iteration: 33193
loss: 1.0292446613311768,grad_norm: 0.9999994975595851, iteration: 33194
loss: 0.9598502516746521,grad_norm: 0.9809282996301272, iteration: 33195
loss: 0.9845831394195557,grad_norm: 0.9952254181486827, iteration: 33196
loss: 1.0201095342636108,grad_norm: 0.9999990999439369, iteration: 33197
loss: 1.045828938484192,grad_norm: 0.9999993660369578, iteration: 33198
loss: 1.0004531145095825,grad_norm: 0.9442430388519715, iteration: 33199
loss: 1.0160062313079834,grad_norm: 0.9753386912680118, iteration: 33200
loss: 1.03434419631958,grad_norm: 0.8787338256727959, iteration: 33201
loss: 0.9516834020614624,grad_norm: 0.9999991894368935, iteration: 33202
loss: 1.022505283355713,grad_norm: 0.9999992753907749, iteration: 33203
loss: 0.9879061579704285,grad_norm: 0.9848099394468649, iteration: 33204
loss: 0.9784722328186035,grad_norm: 0.9776030514467902, iteration: 33205
loss: 1.005386233329773,grad_norm: 0.897304437934689, iteration: 33206
loss: 1.0270414352416992,grad_norm: 0.9956978337704987, iteration: 33207
loss: 1.0317102670669556,grad_norm: 0.9763231253948569, iteration: 33208
loss: 1.0439624786376953,grad_norm: 0.999999549866805, iteration: 33209
loss: 1.0161868333816528,grad_norm: 0.9999990072057215, iteration: 33210
loss: 1.0009478330612183,grad_norm: 0.9999991852037481, iteration: 33211
loss: 1.0117257833480835,grad_norm: 0.999999265145112, iteration: 33212
loss: 1.0052361488342285,grad_norm: 0.8896698288996455, iteration: 33213
loss: 1.0029399394989014,grad_norm: 0.8855910227532109, iteration: 33214
loss: 0.9991256594657898,grad_norm: 0.9804293801871896, iteration: 33215
loss: 1.0145881175994873,grad_norm: 0.9999991243559304, iteration: 33216
loss: 0.9959297776222229,grad_norm: 0.9718622432124641, iteration: 33217
loss: 1.0208115577697754,grad_norm: 0.9037145604034414, iteration: 33218
loss: 0.9830138087272644,grad_norm: 0.9535817356688409, iteration: 33219
loss: 0.9788603186607361,grad_norm: 0.9655514157795848, iteration: 33220
loss: 0.9995867013931274,grad_norm: 0.8602176537688628, iteration: 33221
loss: 1.0030102729797363,grad_norm: 0.9999991072507163, iteration: 33222
loss: 1.0494099855422974,grad_norm: 0.9999992311638692, iteration: 33223
loss: 0.9939513206481934,grad_norm: 0.9999991453268964, iteration: 33224
loss: 0.9685084223747253,grad_norm: 0.9013071548859956, iteration: 33225
loss: 1.0242174863815308,grad_norm: 0.9999990957232541, iteration: 33226
loss: 1.0321696996688843,grad_norm: 0.9019887567129747, iteration: 33227
loss: 1.0053975582122803,grad_norm: 0.827353917347888, iteration: 33228
loss: 0.9852287173271179,grad_norm: 0.9373169350893515, iteration: 33229
loss: 0.9857615232467651,grad_norm: 0.9953697551228063, iteration: 33230
loss: 1.0095322132110596,grad_norm: 0.9175985829559629, iteration: 33231
loss: 1.030369758605957,grad_norm: 0.9998816957549692, iteration: 33232
loss: 1.0109740495681763,grad_norm: 0.956566126563935, iteration: 33233
loss: 1.0109825134277344,grad_norm: 0.8926631492792056, iteration: 33234
loss: 1.0291178226470947,grad_norm: 0.9999992600040813, iteration: 33235
loss: 1.0363914966583252,grad_norm: 0.9683223957775443, iteration: 33236
loss: 0.9950286149978638,grad_norm: 0.8360299328693873, iteration: 33237
loss: 0.9974608421325684,grad_norm: 0.996652105985149, iteration: 33238
loss: 0.9832336902618408,grad_norm: 0.9762057442263371, iteration: 33239
loss: 0.9912349581718445,grad_norm: 0.8962274340666107, iteration: 33240
loss: 0.9981917142868042,grad_norm: 0.9627966942560291, iteration: 33241
loss: 1.0171618461608887,grad_norm: 0.9999990904017313, iteration: 33242
loss: 0.9823191165924072,grad_norm: 0.8681959054270746, iteration: 33243
loss: 1.0052021741867065,grad_norm: 0.9150556011878511, iteration: 33244
loss: 0.9854260087013245,grad_norm: 0.9343925027625267, iteration: 33245
loss: 0.9821122884750366,grad_norm: 0.9999991562535048, iteration: 33246
loss: 1.0189977884292603,grad_norm: 0.9999995708181688, iteration: 33247
loss: 0.9760658144950867,grad_norm: 0.9323520235353862, iteration: 33248
loss: 1.009970784187317,grad_norm: 0.999999131725526, iteration: 33249
loss: 0.9968793988227844,grad_norm: 0.9999990973827708, iteration: 33250
loss: 1.0161879062652588,grad_norm: 0.9837101121454223, iteration: 33251
loss: 1.0248606204986572,grad_norm: 0.8840384831557646, iteration: 33252
loss: 0.9596312046051025,grad_norm: 0.9943892204324973, iteration: 33253
loss: 0.9910033941268921,grad_norm: 0.999999285135979, iteration: 33254
loss: 0.9792380332946777,grad_norm: 0.9999992597377625, iteration: 33255
loss: 1.0106748342514038,grad_norm: 0.7610358589941763, iteration: 33256
loss: 1.011563777923584,grad_norm: 0.9088576953362312, iteration: 33257
loss: 1.0842227935791016,grad_norm: 0.9071483215170854, iteration: 33258
loss: 0.9786520004272461,grad_norm: 0.9999993030228107, iteration: 33259
loss: 1.0254004001617432,grad_norm: 0.9789842549716828, iteration: 33260
loss: 0.966389000415802,grad_norm: 0.8676229253747472, iteration: 33261
loss: 1.0138274431228638,grad_norm: 0.9999991579364766, iteration: 33262
loss: 1.0197807550430298,grad_norm: 0.9999994537384811, iteration: 33263
loss: 1.009735345840454,grad_norm: 0.9999991131148122, iteration: 33264
loss: 1.0397993326187134,grad_norm: 0.9999990470651865, iteration: 33265
loss: 1.0110036134719849,grad_norm: 0.8559440863313481, iteration: 33266
loss: 0.9936793446540833,grad_norm: 0.9999991288759863, iteration: 33267
loss: 0.992309033870697,grad_norm: 0.9999990094361239, iteration: 33268
loss: 0.994653582572937,grad_norm: 0.999999051063484, iteration: 33269
loss: 0.9818981885910034,grad_norm: 0.999999111436703, iteration: 33270
loss: 1.0356299877166748,grad_norm: 0.999999182409095, iteration: 33271
loss: 0.9816213846206665,grad_norm: 0.9752756056929711, iteration: 33272
loss: 0.9495227932929993,grad_norm: 0.9999994179312196, iteration: 33273
loss: 1.0209614038467407,grad_norm: 0.907063012393858, iteration: 33274
loss: 1.000251293182373,grad_norm: 0.8768824555775364, iteration: 33275
loss: 1.0032331943511963,grad_norm: 0.93506150331908, iteration: 33276
loss: 0.9790732264518738,grad_norm: 0.9999990912266123, iteration: 33277
loss: 1.0064314603805542,grad_norm: 0.9807235485014276, iteration: 33278
loss: 1.0141854286193848,grad_norm: 0.9194659219394526, iteration: 33279
loss: 1.0057483911514282,grad_norm: 0.9481268563722105, iteration: 33280
loss: 1.0181232690811157,grad_norm: 0.9999992796792614, iteration: 33281
loss: 1.001208782196045,grad_norm: 0.9999991700732163, iteration: 33282
loss: 0.9823351502418518,grad_norm: 0.8885867021046238, iteration: 33283
loss: 1.0016733407974243,grad_norm: 0.9755068261365644, iteration: 33284
loss: 0.9989874958992004,grad_norm: 0.9999991166133403, iteration: 33285
loss: 1.0005648136138916,grad_norm: 0.9999996817766809, iteration: 33286
loss: 1.0099493265151978,grad_norm: 0.9999991583895439, iteration: 33287
loss: 0.964718759059906,grad_norm: 0.9832183357999548, iteration: 33288
loss: 1.0237691402435303,grad_norm: 0.8450455157161113, iteration: 33289
loss: 1.0209338665008545,grad_norm: 0.9999990179337389, iteration: 33290
loss: 0.977497935295105,grad_norm: 0.9425978828223417, iteration: 33291
loss: 1.0053255558013916,grad_norm: 0.7734148886767808, iteration: 33292
loss: 0.9650350213050842,grad_norm: 0.8931400292154403, iteration: 33293
loss: 0.9939666390419006,grad_norm: 0.9243258204503737, iteration: 33294
loss: 1.0105829238891602,grad_norm: 0.9798260189999222, iteration: 33295
loss: 1.0276862382888794,grad_norm: 0.9999995257809552, iteration: 33296
loss: 0.9921445250511169,grad_norm: 0.9047098067352534, iteration: 33297
loss: 1.0311998128890991,grad_norm: 0.9999992615351476, iteration: 33298
loss: 1.0215522050857544,grad_norm: 0.9999990566188494, iteration: 33299
loss: 0.9530686736106873,grad_norm: 0.9601031510660557, iteration: 33300
loss: 1.0095020532608032,grad_norm: 0.9999992065733777, iteration: 33301
loss: 0.9816861748695374,grad_norm: 0.9999989983104051, iteration: 33302
loss: 1.0879158973693848,grad_norm: 0.999999982321832, iteration: 33303
loss: 0.9694701433181763,grad_norm: 0.9999992507383113, iteration: 33304
loss: 1.0060654878616333,grad_norm: 0.9760859577851858, iteration: 33305
loss: 1.0261003971099854,grad_norm: 0.9802939183771722, iteration: 33306
loss: 0.994099497795105,grad_norm: 0.9264104323775371, iteration: 33307
loss: 1.003098487854004,grad_norm: 0.9250186761023793, iteration: 33308
loss: 0.994545042514801,grad_norm: 0.90576461059142, iteration: 33309
loss: 0.9952337741851807,grad_norm: 0.9999991656895396, iteration: 33310
loss: 1.013142704963684,grad_norm: 0.9999991617827809, iteration: 33311
loss: 1.0374665260314941,grad_norm: 0.999999259124974, iteration: 33312
loss: 1.025158405303955,grad_norm: 0.9830394653811902, iteration: 33313
loss: 1.0218547582626343,grad_norm: 0.9999992393103202, iteration: 33314
loss: 1.0298728942871094,grad_norm: 0.9698465420248455, iteration: 33315
loss: 1.0701237916946411,grad_norm: 0.9999997530235655, iteration: 33316
loss: 0.9917210340499878,grad_norm: 0.999999135559226, iteration: 33317
loss: 1.0152909755706787,grad_norm: 0.9999992515584684, iteration: 33318
loss: 1.014741063117981,grad_norm: 0.9999999611563037, iteration: 33319
loss: 1.002436876296997,grad_norm: 0.9999990296681457, iteration: 33320
loss: 0.9961310029029846,grad_norm: 0.9999991483477727, iteration: 33321
loss: 1.0287549495697021,grad_norm: 0.8363279839042952, iteration: 33322
loss: 1.0148969888687134,grad_norm: 0.9999992917873056, iteration: 33323
loss: 1.0189310312271118,grad_norm: 0.8402750424710264, iteration: 33324
loss: 1.0268609523773193,grad_norm: 0.98149936760459, iteration: 33325
loss: 0.9935951828956604,grad_norm: 0.917118945794058, iteration: 33326
loss: 1.0360751152038574,grad_norm: 0.9597701601841216, iteration: 33327
loss: 1.0076196193695068,grad_norm: 0.9072449934544329, iteration: 33328
loss: 1.0259768962860107,grad_norm: 0.9999994708452121, iteration: 33329
loss: 0.977224588394165,grad_norm: 0.9171326300504721, iteration: 33330
loss: 1.0648548603057861,grad_norm: 0.9155404047739106, iteration: 33331
loss: 1.030137062072754,grad_norm: 0.7695836710237551, iteration: 33332
loss: 0.9974743723869324,grad_norm: 0.8597557602243635, iteration: 33333
loss: 0.9866859316825867,grad_norm: 0.9999995040373844, iteration: 33334
loss: 1.0305074453353882,grad_norm: 0.9999993239014237, iteration: 33335
loss: 0.9882451891899109,grad_norm: 0.9999991077357014, iteration: 33336
loss: 0.9962546825408936,grad_norm: 0.882541466466753, iteration: 33337
loss: 1.0025694370269775,grad_norm: 0.8191751564300718, iteration: 33338
loss: 1.060687780380249,grad_norm: 0.9999997428657373, iteration: 33339
loss: 1.014207124710083,grad_norm: 0.9999992529781401, iteration: 33340
loss: 0.9943196773529053,grad_norm: 0.9999989517307362, iteration: 33341
loss: 1.0303441286087036,grad_norm: 0.8843902263349329, iteration: 33342
loss: 1.0319939851760864,grad_norm: 0.9413235247036202, iteration: 33343
loss: 1.0307775735855103,grad_norm: 0.9192534592842839, iteration: 33344
loss: 1.0414104461669922,grad_norm: 0.822783199190317, iteration: 33345
loss: 1.0101094245910645,grad_norm: 0.978855119383322, iteration: 33346
loss: 1.0347278118133545,grad_norm: 0.9999991165416972, iteration: 33347
loss: 1.0100898742675781,grad_norm: 0.9999991202825058, iteration: 33348
loss: 1.0284501314163208,grad_norm: 0.9999990440175989, iteration: 33349
loss: 0.9807928800582886,grad_norm: 0.9999990422759579, iteration: 33350
loss: 0.984768807888031,grad_norm: 0.9999990405169571, iteration: 33351
loss: 1.01301109790802,grad_norm: 0.999998947045296, iteration: 33352
loss: 1.016693353652954,grad_norm: 0.8131724746824323, iteration: 33353
loss: 1.0125309228897095,grad_norm: 0.9999990295314443, iteration: 33354
loss: 1.0125224590301514,grad_norm: 0.9999990891272904, iteration: 33355
loss: 0.9666794538497925,grad_norm: 0.9999989967994568, iteration: 33356
loss: 1.0017657279968262,grad_norm: 0.9999992260043619, iteration: 33357
loss: 0.9615263938903809,grad_norm: 0.9469807136748531, iteration: 33358
loss: 1.0027447938919067,grad_norm: 0.9999990797836179, iteration: 33359
loss: 1.0037856101989746,grad_norm: 0.9110856815035119, iteration: 33360
loss: 0.9849624037742615,grad_norm: 0.8966696788590075, iteration: 33361
loss: 1.0181108713150024,grad_norm: 0.9239330248948145, iteration: 33362
loss: 0.9841925501823425,grad_norm: 0.8296752562445153, iteration: 33363
loss: 0.9562126398086548,grad_norm: 0.9999991683395523, iteration: 33364
loss: 1.0600793361663818,grad_norm: 0.7783103706255867, iteration: 33365
loss: 1.0102379322052002,grad_norm: 0.9295685684431815, iteration: 33366
loss: 0.9952282309532166,grad_norm: 0.9607677570115214, iteration: 33367
loss: 0.9976792931556702,grad_norm: 0.9773954790600309, iteration: 33368
loss: 1.0540506839752197,grad_norm: 0.9999998607092365, iteration: 33369
loss: 1.0155079364776611,grad_norm: 0.9407463906293785, iteration: 33370
loss: 0.9775018692016602,grad_norm: 0.8118429035363776, iteration: 33371
loss: 1.00296151638031,grad_norm: 0.9999990908987881, iteration: 33372
loss: 0.9856842160224915,grad_norm: 0.8666621078987619, iteration: 33373
loss: 1.0324612855911255,grad_norm: 0.906056502783092, iteration: 33374
loss: 1.052980661392212,grad_norm: 0.9999991094137134, iteration: 33375
loss: 0.9785219430923462,grad_norm: 0.9999990302641851, iteration: 33376
loss: 1.0202199220657349,grad_norm: 0.999999278001511, iteration: 33377
loss: 1.0319734811782837,grad_norm: 0.9999993548363153, iteration: 33378
loss: 1.0244550704956055,grad_norm: 0.9999991509024068, iteration: 33379
loss: 1.029160737991333,grad_norm: 0.9999991076465781, iteration: 33380
loss: 1.0283113718032837,grad_norm: 0.999999121922513, iteration: 33381
loss: 0.9927110075950623,grad_norm: 0.9999991571401595, iteration: 33382
loss: 1.0558934211730957,grad_norm: 0.8685920807026668, iteration: 33383
loss: 1.0293333530426025,grad_norm: 0.9437940904893497, iteration: 33384
loss: 1.0111445188522339,grad_norm: 0.9999991578485357, iteration: 33385
loss: 0.9405661821365356,grad_norm: 0.8539985579737722, iteration: 33386
loss: 1.0184541940689087,grad_norm: 0.9999997308152028, iteration: 33387
loss: 0.96091628074646,grad_norm: 0.9999990617680462, iteration: 33388
loss: 1.0082584619522095,grad_norm: 0.999999417100893, iteration: 33389
loss: 1.0050653219223022,grad_norm: 0.8997797624277518, iteration: 33390
loss: 0.9842407703399658,grad_norm: 0.8169104542890868, iteration: 33391
loss: 1.019506812095642,grad_norm: 0.9999998113494202, iteration: 33392
loss: 0.9991095066070557,grad_norm: 0.7955394212001025, iteration: 33393
loss: 0.9882870316505432,grad_norm: 0.9999990155438367, iteration: 33394
loss: 1.0259356498718262,grad_norm: 0.9999992099582958, iteration: 33395
loss: 1.008873701095581,grad_norm: 0.9456810999097063, iteration: 33396
loss: 1.0428577661514282,grad_norm: 0.8924199293233924, iteration: 33397
loss: 0.9676888585090637,grad_norm: 0.9405312304052245, iteration: 33398
loss: 0.9824814796447754,grad_norm: 0.9999989682984264, iteration: 33399
loss: 0.9681363105773926,grad_norm: 0.9999991278694903, iteration: 33400
loss: 1.0172178745269775,grad_norm: 0.8951159041315092, iteration: 33401
loss: 0.9865256547927856,grad_norm: 0.7833898883071639, iteration: 33402
loss: 1.0324915647506714,grad_norm: 0.9963119458591387, iteration: 33403
loss: 1.0224456787109375,grad_norm: 0.9271953838793852, iteration: 33404
loss: 1.0204976797103882,grad_norm: 0.999999129726421, iteration: 33405
loss: 1.0404657125473022,grad_norm: 0.9999992450836668, iteration: 33406
loss: 1.0124698877334595,grad_norm: 0.8671963400655475, iteration: 33407
loss: 1.0126633644104004,grad_norm: 0.9999992494334582, iteration: 33408
loss: 1.018386960029602,grad_norm: 0.9948864669865448, iteration: 33409
loss: 0.9827523231506348,grad_norm: 0.9999991670222791, iteration: 33410
loss: 1.0105018615722656,grad_norm: 0.9077730133271494, iteration: 33411
loss: 1.0048378705978394,grad_norm: 0.8482915864746065, iteration: 33412
loss: 0.9976567625999451,grad_norm: 0.9999990078410912, iteration: 33413
loss: 1.0305556058883667,grad_norm: 0.8980582365471816, iteration: 33414
loss: 1.0001075267791748,grad_norm: 0.8722892231187723, iteration: 33415
loss: 1.0048917531967163,grad_norm: 0.9795883532134861, iteration: 33416
loss: 1.0178320407867432,grad_norm: 0.968341517716549, iteration: 33417
loss: 0.9922913312911987,grad_norm: 0.9362463534005108, iteration: 33418
loss: 0.9814620614051819,grad_norm: 0.8717724670998175, iteration: 33419
loss: 1.0432560443878174,grad_norm: 0.999999494919227, iteration: 33420
loss: 1.0399428606033325,grad_norm: 0.9659634703377729, iteration: 33421
loss: 0.9846433997154236,grad_norm: 0.8999826559959032, iteration: 33422
loss: 1.008302092552185,grad_norm: 0.9999990066783897, iteration: 33423
loss: 1.0027154684066772,grad_norm: 0.9999990106963937, iteration: 33424
loss: 1.047898530960083,grad_norm: 0.871337898467292, iteration: 33425
loss: 0.9851460456848145,grad_norm: 0.9999992444274641, iteration: 33426
loss: 1.0132912397384644,grad_norm: 0.9999997245842039, iteration: 33427
loss: 1.0125364065170288,grad_norm: 0.9999992152192168, iteration: 33428
loss: 1.0355613231658936,grad_norm: 0.9999996314253143, iteration: 33429
loss: 1.0500742197036743,grad_norm: 0.9999992427138965, iteration: 33430
loss: 0.9925583600997925,grad_norm: 0.9999990418165958, iteration: 33431
loss: 0.9952343702316284,grad_norm: 0.9999990840469525, iteration: 33432
loss: 1.007691740989685,grad_norm: 0.9902023629439262, iteration: 33433
loss: 1.031449794769287,grad_norm: 0.9999990627567231, iteration: 33434
loss: 1.0137450695037842,grad_norm: 0.9741857423063536, iteration: 33435
loss: 1.035610318183899,grad_norm: 0.9999991636674794, iteration: 33436
loss: 0.9843342304229736,grad_norm: 0.9999990223953069, iteration: 33437
loss: 1.041030764579773,grad_norm: 0.967671137687103, iteration: 33438
loss: 0.9875038266181946,grad_norm: 0.9543903188276172, iteration: 33439
loss: 1.0224586725234985,grad_norm: 0.9999990149740849, iteration: 33440
loss: 1.0177499055862427,grad_norm: 0.9999993421580058, iteration: 33441
loss: 1.0062954425811768,grad_norm: 0.9378731168104222, iteration: 33442
loss: 0.9990578293800354,grad_norm: 0.9999991629250816, iteration: 33443
loss: 1.0046155452728271,grad_norm: 0.9999989768179041, iteration: 33444
loss: 0.957358181476593,grad_norm: 0.9999989974651008, iteration: 33445
loss: 1.0190634727478027,grad_norm: 0.9999990034027083, iteration: 33446
loss: 1.0422546863555908,grad_norm: 0.9999989651761197, iteration: 33447
loss: 0.9973028898239136,grad_norm: 0.9356570307020883, iteration: 33448
loss: 0.9801046252250671,grad_norm: 0.9999990393057471, iteration: 33449
loss: 1.0659867525100708,grad_norm: 0.9999996005842666, iteration: 33450
loss: 0.9821606278419495,grad_norm: 0.9999991888408283, iteration: 33451
loss: 1.0057934522628784,grad_norm: 0.9749913979965995, iteration: 33452
loss: 1.0173251628875732,grad_norm: 0.9347579465097198, iteration: 33453
loss: 0.993243932723999,grad_norm: 0.9999993080736239, iteration: 33454
loss: 0.9953502416610718,grad_norm: 0.9999992578763023, iteration: 33455
loss: 1.0393049716949463,grad_norm: 0.9999995638909678, iteration: 33456
loss: 1.0565197467803955,grad_norm: 0.9999994564403217, iteration: 33457
loss: 1.024963617324829,grad_norm: 0.9999991562527297, iteration: 33458
loss: 1.0284616947174072,grad_norm: 0.8888033381079664, iteration: 33459
loss: 1.040395736694336,grad_norm: 0.9999993036988353, iteration: 33460
loss: 1.0324227809906006,grad_norm: 0.9999997639945315, iteration: 33461
loss: 0.9852069616317749,grad_norm: 0.8979428436286357, iteration: 33462
loss: 1.003033995628357,grad_norm: 0.9999992048884947, iteration: 33463
loss: 1.000307321548462,grad_norm: 0.982841991494538, iteration: 33464
loss: 1.0595020055770874,grad_norm: 0.9999998510344537, iteration: 33465
loss: 1.0065962076187134,grad_norm: 0.8910954021237588, iteration: 33466
loss: 0.9861918091773987,grad_norm: 0.9999989680194792, iteration: 33467
loss: 0.989571213722229,grad_norm: 0.9195516658345538, iteration: 33468
loss: 0.9805815815925598,grad_norm: 1.0000000453714313, iteration: 33469
loss: 0.98210209608078,grad_norm: 0.9301175111136006, iteration: 33470
loss: 1.051025390625,grad_norm: 0.9999993557377328, iteration: 33471
loss: 0.9803025126457214,grad_norm: 0.9418979859422604, iteration: 33472
loss: 1.00734281539917,grad_norm: 0.927973043166491, iteration: 33473
loss: 1.001268744468689,grad_norm: 0.9999991978925721, iteration: 33474
loss: 1.0308876037597656,grad_norm: 0.9999994960630116, iteration: 33475
loss: 1.0738351345062256,grad_norm: 0.9999990872576723, iteration: 33476
loss: 0.9824239611625671,grad_norm: 0.9999992595941475, iteration: 33477
loss: 1.0029323101043701,grad_norm: 0.9999990194064828, iteration: 33478
loss: 1.0616908073425293,grad_norm: 0.9258297809949233, iteration: 33479
loss: 1.0028574466705322,grad_norm: 0.9999990124424029, iteration: 33480
loss: 0.9797652959823608,grad_norm: 0.8599695717593023, iteration: 33481
loss: 0.9475383758544922,grad_norm: 0.9999991566531281, iteration: 33482
loss: 1.0150927305221558,grad_norm: 0.9999991792397497, iteration: 33483
loss: 0.958977997303009,grad_norm: 0.8782068824292135, iteration: 33484
loss: 1.0867255926132202,grad_norm: 0.9999994966539866, iteration: 33485
loss: 1.0473926067352295,grad_norm: 0.9999992577175494, iteration: 33486
loss: 1.0496217012405396,grad_norm: 0.999999637810899, iteration: 33487
loss: 1.0357038974761963,grad_norm: 0.8386522200706633, iteration: 33488
loss: 0.9959839582443237,grad_norm: 0.924464168344082, iteration: 33489
loss: 0.9956774115562439,grad_norm: 0.9999990513452987, iteration: 33490
loss: 1.008936882019043,grad_norm: 0.8873764291048858, iteration: 33491
loss: 0.9891376495361328,grad_norm: 0.9693815276552268, iteration: 33492
loss: 0.9968713521957397,grad_norm: 0.9999992195471819, iteration: 33493
loss: 0.9784619808197021,grad_norm: 0.9999991745123625, iteration: 33494
loss: 1.0279536247253418,grad_norm: 0.9999992094877439, iteration: 33495
loss: 0.9975947141647339,grad_norm: 0.9878185297513152, iteration: 33496
loss: 1.0142943859100342,grad_norm: 0.9867030496226786, iteration: 33497
loss: 1.0377097129821777,grad_norm: 0.9999991677877349, iteration: 33498
loss: 1.0343713760375977,grad_norm: 0.9964739538981038, iteration: 33499
loss: 1.0066182613372803,grad_norm: 0.9585752247691777, iteration: 33500
loss: 1.0333034992218018,grad_norm: 0.9999991554125653, iteration: 33501
loss: 1.0188543796539307,grad_norm: 0.9618019133596013, iteration: 33502
loss: 1.0299677848815918,grad_norm: 0.999999196410239, iteration: 33503
loss: 1.0019325017929077,grad_norm: 0.9999990602987835, iteration: 33504
loss: 1.0153841972351074,grad_norm: 0.999999581086298, iteration: 33505
loss: 0.9967535138130188,grad_norm: 0.9129953200770312, iteration: 33506
loss: 1.0351173877716064,grad_norm: 0.8695836305229142, iteration: 33507
loss: 1.005415439605713,grad_norm: 0.9929131987666566, iteration: 33508
loss: 1.0177268981933594,grad_norm: 0.9999991531457393, iteration: 33509
loss: 1.01203453540802,grad_norm: 0.94758183795441, iteration: 33510
loss: 1.0059947967529297,grad_norm: 0.9201827898123118, iteration: 33511
loss: 0.9984103441238403,grad_norm: 0.9999997944884306, iteration: 33512
loss: 1.0037566423416138,grad_norm: 0.9999992215230832, iteration: 33513
loss: 1.0088410377502441,grad_norm: 0.9999991244246971, iteration: 33514
loss: 1.013986349105835,grad_norm: 0.8924060942580351, iteration: 33515
loss: 1.0044130086898804,grad_norm: 0.9622482894068951, iteration: 33516
loss: 1.020390510559082,grad_norm: 0.8428372912554296, iteration: 33517
loss: 1.0265934467315674,grad_norm: 0.9962328053623705, iteration: 33518
loss: 1.0205650329589844,grad_norm: 0.8473591843374491, iteration: 33519
loss: 0.9551445841789246,grad_norm: 0.9458694874939175, iteration: 33520
loss: 1.004194974899292,grad_norm: 0.9999996670728747, iteration: 33521
loss: 1.0252982378005981,grad_norm: 0.9999990833448462, iteration: 33522
loss: 1.001308798789978,grad_norm: 0.9999992981642407, iteration: 33523
loss: 0.9840674996376038,grad_norm: 0.935034593913237, iteration: 33524
loss: 1.0075064897537231,grad_norm: 0.9858158333189729, iteration: 33525
loss: 1.0321435928344727,grad_norm: 0.999999106011083, iteration: 33526
loss: 1.0527622699737549,grad_norm: 0.9999998243991872, iteration: 33527
loss: 1.0136066675186157,grad_norm: 0.9656489537041264, iteration: 33528
loss: 1.0088497400283813,grad_norm: 0.9624338558894809, iteration: 33529
loss: 1.0399101972579956,grad_norm: 0.9999997681141454, iteration: 33530
loss: 0.9955437183380127,grad_norm: 0.9999996987726826, iteration: 33531
loss: 1.0146374702453613,grad_norm: 0.999999148265379, iteration: 33532
loss: 1.0135107040405273,grad_norm: 0.8532626737691885, iteration: 33533
loss: 1.0181370973587036,grad_norm: 0.9979881327013664, iteration: 33534
loss: 0.9875701069831848,grad_norm: 0.8395735962413388, iteration: 33535
loss: 1.0463684797286987,grad_norm: 0.863937283742865, iteration: 33536
loss: 0.9960242509841919,grad_norm: 0.9999996747795104, iteration: 33537
loss: 1.0161811113357544,grad_norm: 0.9999992010908737, iteration: 33538
loss: 1.0006325244903564,grad_norm: 0.9754741963546042, iteration: 33539
loss: 0.9832698106765747,grad_norm: 0.8215105244955656, iteration: 33540
loss: 1.002569556236267,grad_norm: 0.9644916279849762, iteration: 33541
loss: 1.0094714164733887,grad_norm: 0.9999991813102133, iteration: 33542
loss: 1.0406745672225952,grad_norm: 0.9999991202678278, iteration: 33543
loss: 1.0395642518997192,grad_norm: 0.9999996111360225, iteration: 33544
loss: 1.0047446489334106,grad_norm: 0.9999996435371764, iteration: 33545
loss: 1.0352073907852173,grad_norm: 0.9133556208708089, iteration: 33546
loss: 0.9951154589653015,grad_norm: 0.9999993598383379, iteration: 33547
loss: 1.0134543180465698,grad_norm: 0.9206418324668464, iteration: 33548
loss: 1.0221498012542725,grad_norm: 0.9550313344595851, iteration: 33549
loss: 1.03533935546875,grad_norm: 0.9734701732211247, iteration: 33550
loss: 0.9975129961967468,grad_norm: 0.9999994039937934, iteration: 33551
loss: 1.0364588499069214,grad_norm: 0.9131365670396772, iteration: 33552
loss: 0.9933138489723206,grad_norm: 0.8713857309531344, iteration: 33553
loss: 1.0153611898422241,grad_norm: 0.9999990859518932, iteration: 33554
loss: 1.0338505506515503,grad_norm: 0.8956158415291493, iteration: 33555
loss: 0.9906758069992065,grad_norm: 0.9999991578293113, iteration: 33556
loss: 0.9957431554794312,grad_norm: 0.9999990711520634, iteration: 33557
loss: 1.0220277309417725,grad_norm: 0.9999992699101224, iteration: 33558
loss: 0.9920605421066284,grad_norm: 0.950201500200503, iteration: 33559
loss: 0.9546563625335693,grad_norm: 0.9999989853282378, iteration: 33560
loss: 0.991248607635498,grad_norm: 0.9999991548881111, iteration: 33561
loss: 0.9882572293281555,grad_norm: 0.9999994382079058, iteration: 33562
loss: 0.9998465776443481,grad_norm: 0.8245553616656752, iteration: 33563
loss: 0.9888231158256531,grad_norm: 0.9999991508339051, iteration: 33564
loss: 0.984492301940918,grad_norm: 0.9999990430702359, iteration: 33565
loss: 0.9696438908576965,grad_norm: 0.8581448463546227, iteration: 33566
loss: 1.0358929634094238,grad_norm: 0.9999997599807966, iteration: 33567
loss: 0.9673968553543091,grad_norm: 0.9999992529671894, iteration: 33568
loss: 0.9967153072357178,grad_norm: 0.7633813577739356, iteration: 33569
loss: 0.9950070381164551,grad_norm: 0.9619685129634037, iteration: 33570
loss: 0.9982090592384338,grad_norm: 0.9999993709661343, iteration: 33571
loss: 0.9726843237876892,grad_norm: 0.9628967360956251, iteration: 33572
loss: 1.0316754579544067,grad_norm: 0.9999991878246475, iteration: 33573
loss: 1.0073251724243164,grad_norm: 0.9999991470587557, iteration: 33574
loss: 1.0129401683807373,grad_norm: 0.9999993019174237, iteration: 33575
loss: 1.016097068786621,grad_norm: 0.9999992885405221, iteration: 33576
loss: 0.9861272573471069,grad_norm: 0.9725905369922333, iteration: 33577
loss: 1.0229530334472656,grad_norm: 0.9999991372502371, iteration: 33578
loss: 1.0013014078140259,grad_norm: 0.9999996936240227, iteration: 33579
loss: 1.0057076215744019,grad_norm: 0.8547661972351431, iteration: 33580
loss: 1.0585135221481323,grad_norm: 0.9917480749521774, iteration: 33581
loss: 1.0509506464004517,grad_norm: 0.9999996230775069, iteration: 33582
loss: 1.0145213603973389,grad_norm: 0.9999990401486266, iteration: 33583
loss: 0.9922683238983154,grad_norm: 0.9999990786284403, iteration: 33584
loss: 1.0263347625732422,grad_norm: 0.9999997190071751, iteration: 33585
loss: 1.017425537109375,grad_norm: 0.9999992102838441, iteration: 33586
loss: 1.0006500482559204,grad_norm: 0.9494580581556249, iteration: 33587
loss: 1.0081571340560913,grad_norm: 0.9999992365075849, iteration: 33588
loss: 0.9986476302146912,grad_norm: 0.999999253190024, iteration: 33589
loss: 1.0292762517929077,grad_norm: 0.9999991215677135, iteration: 33590
loss: 1.004723310470581,grad_norm: 0.8522232343554338, iteration: 33591
loss: 1.0155349969863892,grad_norm: 0.9532265476084949, iteration: 33592
loss: 1.0069693326950073,grad_norm: 0.9999993769603033, iteration: 33593
loss: 1.0077632665634155,grad_norm: 0.9999989766755284, iteration: 33594
loss: 1.0377079248428345,grad_norm: 0.8173440659917875, iteration: 33595
loss: 1.0401111841201782,grad_norm: 0.9999995219423208, iteration: 33596
loss: 1.0440809726715088,grad_norm: 0.9999995406889512, iteration: 33597
loss: 1.026699423789978,grad_norm: 0.9999989307062539, iteration: 33598
loss: 0.9981476068496704,grad_norm: 0.9999992308170145, iteration: 33599
loss: 1.0069602727890015,grad_norm: 0.999999298921434, iteration: 33600
loss: 1.0153510570526123,grad_norm: 0.9999992370363555, iteration: 33601
loss: 0.9690558314323425,grad_norm: 0.9999992231616374, iteration: 33602
loss: 0.9701486825942993,grad_norm: 0.8642797705761656, iteration: 33603
loss: 1.0241578817367554,grad_norm: 0.9999993442029667, iteration: 33604
loss: 0.9846262335777283,grad_norm: 0.9999991909968572, iteration: 33605
loss: 0.9975186586380005,grad_norm: 0.9291957280501212, iteration: 33606
loss: 1.0107353925704956,grad_norm: 0.9999989337255124, iteration: 33607
loss: 1.0030838251113892,grad_norm: 0.915609954701585, iteration: 33608
loss: 1.0503909587860107,grad_norm: 0.9999991649078548, iteration: 33609
loss: 0.9993969202041626,grad_norm: 0.9999992677512695, iteration: 33610
loss: 1.013302206993103,grad_norm: 0.888347454206163, iteration: 33611
loss: 1.0663074254989624,grad_norm: 0.9999996319329706, iteration: 33612
loss: 1.0347460508346558,grad_norm: 0.9999994148340817, iteration: 33613
loss: 0.964257001876831,grad_norm: 0.9999999294131154, iteration: 33614
loss: 0.9867734313011169,grad_norm: 0.9999991394105994, iteration: 33615
loss: 0.9989835023880005,grad_norm: 0.950705748037567, iteration: 33616
loss: 0.9915757775306702,grad_norm: 0.9999991131234711, iteration: 33617
loss: 0.98846435546875,grad_norm: 0.9382610503935155, iteration: 33618
loss: 1.043148398399353,grad_norm: 0.9999991927587583, iteration: 33619
loss: 1.0257048606872559,grad_norm: 0.9329780260665625, iteration: 33620
loss: 1.0823569297790527,grad_norm: 0.9999992647523465, iteration: 33621
loss: 1.0431491136550903,grad_norm: 0.9999990906527614, iteration: 33622
loss: 1.0168932676315308,grad_norm: 0.9491638605143159, iteration: 33623
loss: 1.0063186883926392,grad_norm: 0.9999990232760194, iteration: 33624
loss: 1.0038337707519531,grad_norm: 0.9999992624336235, iteration: 33625
loss: 1.0374637842178345,grad_norm: 0.9999990636734151, iteration: 33626
loss: 1.0056324005126953,grad_norm: 0.937351574064831, iteration: 33627
loss: 1.0094666481018066,grad_norm: 0.9179988365390581, iteration: 33628
loss: 1.0357040166854858,grad_norm: 0.8919422824087222, iteration: 33629
loss: 1.0003621578216553,grad_norm: 0.9999996334173642, iteration: 33630
loss: 0.9843336939811707,grad_norm: 0.9999992092063986, iteration: 33631
loss: 1.04075026512146,grad_norm: 0.9999992122053375, iteration: 33632
loss: 0.9971315264701843,grad_norm: 0.9999994639039517, iteration: 33633
loss: 0.9963447451591492,grad_norm: 0.7431362672093137, iteration: 33634
loss: 1.0229814052581787,grad_norm: 0.8499369248163402, iteration: 33635
loss: 0.9970754384994507,grad_norm: 0.8566384714812394, iteration: 33636
loss: 0.994392454624176,grad_norm: 0.8701154252374951, iteration: 33637
loss: 1.0610600709915161,grad_norm: 0.9761150893389806, iteration: 33638
loss: 0.9791135191917419,grad_norm: 0.999999029629703, iteration: 33639
loss: 1.0114903450012207,grad_norm: 0.9999989755686516, iteration: 33640
loss: 1.0855865478515625,grad_norm: 0.9999998607541967, iteration: 33641
loss: 1.0025920867919922,grad_norm: 0.8664560291043125, iteration: 33642
loss: 1.0410826206207275,grad_norm: 0.999999747176579, iteration: 33643
loss: 1.0291470289230347,grad_norm: 0.9999992270893128, iteration: 33644
loss: 1.0284755229949951,grad_norm: 0.9999991950964787, iteration: 33645
loss: 0.9998267292976379,grad_norm: 0.9137183749358404, iteration: 33646
loss: 0.9703828692436218,grad_norm: 0.9186631900305403, iteration: 33647
loss: 1.0220751762390137,grad_norm: 0.8657680408826676, iteration: 33648
loss: 1.0226526260375977,grad_norm: 0.9999991648513542, iteration: 33649
loss: 1.0099974870681763,grad_norm: 0.9999989973857013, iteration: 33650
loss: 0.9831645488739014,grad_norm: 0.9999990642377392, iteration: 33651
loss: 0.9778890609741211,grad_norm: 0.9999990152733754, iteration: 33652
loss: 0.9970013499259949,grad_norm: 0.9789229024494979, iteration: 33653
loss: 1.0369685888290405,grad_norm: 0.9999998440754432, iteration: 33654
loss: 1.0116826295852661,grad_norm: 0.9038582944531779, iteration: 33655
loss: 1.0415958166122437,grad_norm: 0.9999992796851359, iteration: 33656
loss: 1.017106056213379,grad_norm: 0.9999990074449473, iteration: 33657
loss: 0.9819467067718506,grad_norm: 0.9999991231377263, iteration: 33658
loss: 1.022743582725525,grad_norm: 0.9999994385338381, iteration: 33659
loss: 1.0025346279144287,grad_norm: 0.8283472155671697, iteration: 33660
loss: 1.0483464002609253,grad_norm: 0.8781614472639917, iteration: 33661
loss: 1.025871753692627,grad_norm: 0.927917733069213, iteration: 33662
loss: 1.002902626991272,grad_norm: 0.8635374289291701, iteration: 33663
loss: 1.0209388732910156,grad_norm: 0.9999993126289296, iteration: 33664
loss: 0.984910249710083,grad_norm: 0.9279261513711036, iteration: 33665
loss: 0.9990144968032837,grad_norm: 0.9999991368522417, iteration: 33666
loss: 0.9744004607200623,grad_norm: 0.971962190774114, iteration: 33667
loss: 1.0084651708602905,grad_norm: 0.9921468324258089, iteration: 33668
loss: 0.9998070001602173,grad_norm: 0.999999512464545, iteration: 33669
loss: 0.9833714365959167,grad_norm: 0.9134195940677383, iteration: 33670
loss: 1.009591817855835,grad_norm: 0.781470461597597, iteration: 33671
loss: 1.0338586568832397,grad_norm: 0.9999992267080108, iteration: 33672
loss: 1.0130484104156494,grad_norm: 0.8698857994402444, iteration: 33673
loss: 0.9861903786659241,grad_norm: 0.9999989820732934, iteration: 33674
loss: 1.0398703813552856,grad_norm: 0.9999991047352094, iteration: 33675
loss: 0.9792332053184509,grad_norm: 0.9999989907162828, iteration: 33676
loss: 1.0434361696243286,grad_norm: 0.9999993670336511, iteration: 33677
loss: 1.0168638229370117,grad_norm: 0.9451344014527445, iteration: 33678
loss: 1.0080244541168213,grad_norm: 0.9999990754795782, iteration: 33679
loss: 1.005638837814331,grad_norm: 0.8899464834717487, iteration: 33680
loss: 1.0354853868484497,grad_norm: 0.9999991900886993, iteration: 33681
loss: 1.0049657821655273,grad_norm: 0.9999991921831202, iteration: 33682
loss: 1.0059349536895752,grad_norm: 0.9783787341106353, iteration: 33683
loss: 1.0353833436965942,grad_norm: 0.9648857008254463, iteration: 33684
loss: 1.0011980533599854,grad_norm: 0.9999995966937739, iteration: 33685
loss: 1.008606195449829,grad_norm: 0.9765073661216863, iteration: 33686
loss: 1.0029715299606323,grad_norm: 0.9574030911346283, iteration: 33687
loss: 0.9874153733253479,grad_norm: 0.9714134448860842, iteration: 33688
loss: 0.9959836602210999,grad_norm: 0.9718416632285128, iteration: 33689
loss: 0.9822630882263184,grad_norm: 0.9739496289279129, iteration: 33690
loss: 1.011817455291748,grad_norm: 0.9999992272443191, iteration: 33691
loss: 0.9958393573760986,grad_norm: 0.9999991489810249, iteration: 33692
loss: 1.0093822479248047,grad_norm: 0.9999991798106451, iteration: 33693
loss: 1.0306882858276367,grad_norm: 0.7610944257651987, iteration: 33694
loss: 0.9841269254684448,grad_norm: 0.9389354496363282, iteration: 33695
loss: 1.060836911201477,grad_norm: 0.9999993947954785, iteration: 33696
loss: 1.0215672254562378,grad_norm: 0.8149075518189559, iteration: 33697
loss: 1.021122694015503,grad_norm: 0.8773322159950822, iteration: 33698
loss: 1.0076582431793213,grad_norm: 0.8732182562055594, iteration: 33699
loss: 0.9921289682388306,grad_norm: 0.9887600714387965, iteration: 33700
loss: 0.9921084642410278,grad_norm: 0.9033526236838976, iteration: 33701
loss: 1.0351675748825073,grad_norm: 0.9999991959052149, iteration: 33702
loss: 1.0596942901611328,grad_norm: 0.9999990096190968, iteration: 33703
loss: 0.9954129457473755,grad_norm: 0.9368516945249692, iteration: 33704
loss: 0.9687761068344116,grad_norm: 0.9415824385168534, iteration: 33705
loss: 1.0504580736160278,grad_norm: 0.9999996940736028, iteration: 33706
loss: 0.9954127073287964,grad_norm: 0.9632153005205469, iteration: 33707
loss: 0.9947978258132935,grad_norm: 0.9999992104723807, iteration: 33708
loss: 1.012757420539856,grad_norm: 0.9999991291106559, iteration: 33709
loss: 1.0090761184692383,grad_norm: 0.9999991660652106, iteration: 33710
loss: 1.0067541599273682,grad_norm: 0.881133996966249, iteration: 33711
loss: 1.0245076417922974,grad_norm: 0.9219859420537289, iteration: 33712
loss: 0.9804071187973022,grad_norm: 0.9999992053053762, iteration: 33713
loss: 0.9798187613487244,grad_norm: 0.8354024847084242, iteration: 33714
loss: 1.0510532855987549,grad_norm: 0.9069796712944812, iteration: 33715
loss: 1.0143269300460815,grad_norm: 0.9999989713434395, iteration: 33716
loss: 1.0285762548446655,grad_norm: 0.9999999589771631, iteration: 33717
loss: 1.005489468574524,grad_norm: 0.9999990211855312, iteration: 33718
loss: 0.999268114566803,grad_norm: 0.9999991108681638, iteration: 33719
loss: 0.9913938641548157,grad_norm: 0.8737429386975528, iteration: 33720
loss: 0.9816113710403442,grad_norm: 0.9902475104085259, iteration: 33721
loss: 0.9978797435760498,grad_norm: 0.8923486807510509, iteration: 33722
loss: 1.0002174377441406,grad_norm: 0.8343768000760998, iteration: 33723
loss: 1.009757399559021,grad_norm: 0.9571729277147923, iteration: 33724
loss: 1.0113966464996338,grad_norm: 0.9999991208834517, iteration: 33725
loss: 1.0176784992218018,grad_norm: 0.9999991099818534, iteration: 33726
loss: 1.0066312551498413,grad_norm: 0.9999991799469035, iteration: 33727
loss: 1.0040303468704224,grad_norm: 0.9425848704841641, iteration: 33728
loss: 1.0212656259536743,grad_norm: 0.7829473063921639, iteration: 33729
loss: 1.0184463262557983,grad_norm: 0.9103658085496303, iteration: 33730
loss: 0.9991223216056824,grad_norm: 0.9999991333073197, iteration: 33731
loss: 1.0269604921340942,grad_norm: 0.999999164563652, iteration: 33732
loss: 0.9867895245552063,grad_norm: 0.9897705706325945, iteration: 33733
loss: 1.0510342121124268,grad_norm: 0.9999990495180064, iteration: 33734
loss: 1.0318740606307983,grad_norm: 0.8623656952171785, iteration: 33735
loss: 1.0219037532806396,grad_norm: 0.8582158407521683, iteration: 33736
loss: 0.9741182327270508,grad_norm: 0.9671894621350208, iteration: 33737
loss: 0.9567947387695312,grad_norm: 0.9999991631817232, iteration: 33738
loss: 0.9908993244171143,grad_norm: 0.972191187194455, iteration: 33739
loss: 0.9647362232208252,grad_norm: 0.9974207512973502, iteration: 33740
loss: 1.0234947204589844,grad_norm: 0.9203391942839573, iteration: 33741
loss: 0.9802252054214478,grad_norm: 0.9200510543381575, iteration: 33742
loss: 1.0187875032424927,grad_norm: 0.9999991975457552, iteration: 33743
loss: 0.9945245981216431,grad_norm: 0.9999991470914783, iteration: 33744
loss: 0.9917824268341064,grad_norm: 0.9221982258911796, iteration: 33745
loss: 0.9670212864875793,grad_norm: 0.8042745161824263, iteration: 33746
loss: 0.9871094822883606,grad_norm: 0.8388829085706806, iteration: 33747
loss: 0.977279543876648,grad_norm: 0.8406314514704124, iteration: 33748
loss: 1.028773307800293,grad_norm: 0.9999999427228273, iteration: 33749
loss: 0.9802826046943665,grad_norm: 0.9032286823692502, iteration: 33750
loss: 1.0354011058807373,grad_norm: 0.9703911762925314, iteration: 33751
loss: 1.0051442384719849,grad_norm: 0.9999990998174964, iteration: 33752
loss: 0.9893090128898621,grad_norm: 0.9999990670344606, iteration: 33753
loss: 1.0361946821212769,grad_norm: 0.9999990863901754, iteration: 33754
loss: 1.0046550035476685,grad_norm: 0.999999140993103, iteration: 33755
loss: 1.0298117399215698,grad_norm: 1.0000000194736127, iteration: 33756
loss: 1.0101805925369263,grad_norm: 0.9150543688176803, iteration: 33757
loss: 1.004499077796936,grad_norm: 0.9702630492746608, iteration: 33758
loss: 1.0231729745864868,grad_norm: 0.9859976994733031, iteration: 33759
loss: 0.9841075539588928,grad_norm: 0.9999990145283787, iteration: 33760
loss: 1.0455880165100098,grad_norm: 0.8974986868446534, iteration: 33761
loss: 0.9719052314758301,grad_norm: 0.9999990442537723, iteration: 33762
loss: 0.9840309619903564,grad_norm: 0.7889567752205885, iteration: 33763
loss: 1.0322890281677246,grad_norm: 0.9788169346574207, iteration: 33764
loss: 1.0133427381515503,grad_norm: 0.9999991458912252, iteration: 33765
loss: 1.0072214603424072,grad_norm: 0.82698192923889, iteration: 33766
loss: 1.010774850845337,grad_norm: 0.9999990593690395, iteration: 33767
loss: 1.0116093158721924,grad_norm: 0.9999990513332352, iteration: 33768
loss: 1.018280029296875,grad_norm: 0.751006817324065, iteration: 33769
loss: 1.0181866884231567,grad_norm: 0.9999990223696487, iteration: 33770
loss: 0.9709255695343018,grad_norm: 0.9999990902541671, iteration: 33771
loss: 0.9863253235816956,grad_norm: 0.8692435525526048, iteration: 33772
loss: 1.0311301946640015,grad_norm: 0.9999991939970564, iteration: 33773
loss: 0.987970769405365,grad_norm: 0.999999149414053, iteration: 33774
loss: 1.0425851345062256,grad_norm: 0.9999993244002471, iteration: 33775
loss: 0.9701390862464905,grad_norm: 0.9999990434873854, iteration: 33776
loss: 1.0140516757965088,grad_norm: 0.9999998688414751, iteration: 33777
loss: 1.0121768712997437,grad_norm: 0.9407731790579122, iteration: 33778
loss: 1.008398175239563,grad_norm: 0.9228825244627135, iteration: 33779
loss: 1.0161819458007812,grad_norm: 0.999999317466304, iteration: 33780
loss: 0.9784169793128967,grad_norm: 0.9458517603600853, iteration: 33781
loss: 1.01860773563385,grad_norm: 0.9999993615949081, iteration: 33782
loss: 1.1221362352371216,grad_norm: 0.9999999197193852, iteration: 33783
loss: 1.01247239112854,grad_norm: 0.9999990932121029, iteration: 33784
loss: 0.9616273045539856,grad_norm: 0.8969124554783763, iteration: 33785
loss: 1.0013763904571533,grad_norm: 0.9999992284381032, iteration: 33786
loss: 1.1615403890609741,grad_norm: 0.9999997940145519, iteration: 33787
loss: 1.0327717065811157,grad_norm: 0.9831833108174438, iteration: 33788
loss: 0.9943814873695374,grad_norm: 0.9714037681322071, iteration: 33789
loss: 0.9713932275772095,grad_norm: 0.943963651999682, iteration: 33790
loss: 1.0316505432128906,grad_norm: 0.9598082416568227, iteration: 33791
loss: 1.0029077529907227,grad_norm: 0.9083777122747492, iteration: 33792
loss: 0.9861523509025574,grad_norm: 0.9999991889468355, iteration: 33793
loss: 1.0315492153167725,grad_norm: 0.9999990691010049, iteration: 33794
loss: 1.0051778554916382,grad_norm: 0.9999991249691227, iteration: 33795
loss: 1.0131069421768188,grad_norm: 0.8294463052351142, iteration: 33796
loss: 1.0070912837982178,grad_norm: 0.9527810172047588, iteration: 33797
loss: 1.0298480987548828,grad_norm: 0.9600163395573367, iteration: 33798
loss: 1.013447880744934,grad_norm: 0.8593493134047732, iteration: 33799
loss: 1.0086957216262817,grad_norm: 0.9999989897500832, iteration: 33800
loss: 0.9907820224761963,grad_norm: 0.9084691885211764, iteration: 33801
loss: 1.0233842134475708,grad_norm: 0.9999997177088634, iteration: 33802
loss: 1.0281362533569336,grad_norm: 0.9999991652282268, iteration: 33803
loss: 0.9839115142822266,grad_norm: 0.9412038008406209, iteration: 33804
loss: 1.0423582792282104,grad_norm: 0.9999989932801306, iteration: 33805
loss: 1.0245023965835571,grad_norm: 0.9424240171355416, iteration: 33806
loss: 1.046644926071167,grad_norm: 0.999999065342121, iteration: 33807
loss: 0.999095618724823,grad_norm: 0.8058309473873907, iteration: 33808
loss: 0.977548360824585,grad_norm: 0.9267752096416052, iteration: 33809
loss: 1.0017189979553223,grad_norm: 0.8714760550218839, iteration: 33810
loss: 1.0025700330734253,grad_norm: 0.8535273687643323, iteration: 33811
loss: 1.0112401247024536,grad_norm: 0.9999990396655563, iteration: 33812
loss: 1.0074609518051147,grad_norm: 0.8890078441878858, iteration: 33813
loss: 1.0100051164627075,grad_norm: 0.9570214551006896, iteration: 33814
loss: 1.0236808061599731,grad_norm: 0.9999993786339346, iteration: 33815
loss: 0.9746203422546387,grad_norm: 0.9195332568092167, iteration: 33816
loss: 1.0178453922271729,grad_norm: 0.9999991283249238, iteration: 33817
loss: 1.0216853618621826,grad_norm: 0.9422725347222951, iteration: 33818
loss: 1.0206363201141357,grad_norm: 0.9496965911009498, iteration: 33819
loss: 1.0293124914169312,grad_norm: 0.9209315472113578, iteration: 33820
loss: 0.9897341728210449,grad_norm: 0.9999990678865076, iteration: 33821
loss: 1.0277892351150513,grad_norm: 0.9326440325469912, iteration: 33822
loss: 1.004710078239441,grad_norm: 0.9999991744026527, iteration: 33823
loss: 1.0267764329910278,grad_norm: 0.9999991636314439, iteration: 33824
loss: 1.008533239364624,grad_norm: 0.9999993047716315, iteration: 33825
loss: 1.0044372081756592,grad_norm: 0.8467696446531868, iteration: 33826
loss: 1.0136387348175049,grad_norm: 0.9527675105912968, iteration: 33827
loss: 1.015844702720642,grad_norm: 0.8685178798825732, iteration: 33828
loss: 1.0205440521240234,grad_norm: 0.9999990460208404, iteration: 33829
loss: 1.0197852849960327,grad_norm: 0.9954917034063223, iteration: 33830
loss: 0.9708640575408936,grad_norm: 0.9999990859820183, iteration: 33831
loss: 0.9931558966636658,grad_norm: 0.8463596381797686, iteration: 33832
loss: 1.0241997241973877,grad_norm: 0.8973915641134629, iteration: 33833
loss: 1.0112004280090332,grad_norm: 0.9329554690931641, iteration: 33834
loss: 1.0214537382125854,grad_norm: 0.936138828995416, iteration: 33835
loss: 0.9835286140441895,grad_norm: 0.8231971518588614, iteration: 33836
loss: 0.998936116695404,grad_norm: 0.9776115904364195, iteration: 33837
loss: 1.0401372909545898,grad_norm: 0.9999990907729417, iteration: 33838
loss: 1.0103706121444702,grad_norm: 0.9999997915997834, iteration: 33839
loss: 1.0192517042160034,grad_norm: 0.9213706225316675, iteration: 33840
loss: 1.017148733139038,grad_norm: 0.8746143817118538, iteration: 33841
loss: 0.9922187924385071,grad_norm: 0.9999989518512297, iteration: 33842
loss: 0.9820373058319092,grad_norm: 0.9405136822135078, iteration: 33843
loss: 1.0506895780563354,grad_norm: 0.9999997427540406, iteration: 33844
loss: 1.0382925271987915,grad_norm: 0.8264351715604576, iteration: 33845
loss: 0.9866823554039001,grad_norm: 0.9275503543536621, iteration: 33846
loss: 0.9913739562034607,grad_norm: 0.9999990241467502, iteration: 33847
loss: 0.9929471015930176,grad_norm: 0.8800907438897055, iteration: 33848
loss: 1.0075042247772217,grad_norm: 0.9627745781987164, iteration: 33849
loss: 1.0276175737380981,grad_norm: 0.9474845493192711, iteration: 33850
loss: 1.0309253931045532,grad_norm: 0.9411833804473135, iteration: 33851
loss: 1.0072662830352783,grad_norm: 0.981287913536483, iteration: 33852
loss: 1.023468017578125,grad_norm: 0.9844910846846378, iteration: 33853
loss: 0.9819695949554443,grad_norm: 0.892888142192778, iteration: 33854
loss: 1.0280451774597168,grad_norm: 0.9999992062416069, iteration: 33855
loss: 1.0286945104599,grad_norm: 0.9420868485596324, iteration: 33856
loss: 0.9949457049369812,grad_norm: 0.9999991934276443, iteration: 33857
loss: 0.9697654247283936,grad_norm: 0.8109574149922023, iteration: 33858
loss: 0.9503082036972046,grad_norm: 0.9323814411212198, iteration: 33859
loss: 1.0168694257736206,grad_norm: 0.9099598624698023, iteration: 33860
loss: 0.9929322004318237,grad_norm: 0.9043247601928859, iteration: 33861
loss: 1.0051774978637695,grad_norm: 0.9999989546564849, iteration: 33862
loss: 0.9915510416030884,grad_norm: 0.9999989329819348, iteration: 33863
loss: 1.0162596702575684,grad_norm: 0.7639814239932688, iteration: 33864
loss: 1.0331122875213623,grad_norm: 0.7718781329184713, iteration: 33865
loss: 0.9901507496833801,grad_norm: 0.9289446852754756, iteration: 33866
loss: 1.0448719263076782,grad_norm: 0.9999992616736922, iteration: 33867
loss: 0.9905574917793274,grad_norm: 0.9860793643326303, iteration: 33868
loss: 1.0278384685516357,grad_norm: 0.9059541110214687, iteration: 33869
loss: 0.9789997935295105,grad_norm: 0.7948389623403849, iteration: 33870
loss: 0.9714149236679077,grad_norm: 0.9999991134977128, iteration: 33871
loss: 1.0319546461105347,grad_norm: 0.9999990996765067, iteration: 33872
loss: 1.0071895122528076,grad_norm: 0.9999993031676231, iteration: 33873
loss: 1.001562237739563,grad_norm: 0.8452311249094473, iteration: 33874
loss: 1.0241925716400146,grad_norm: 0.8195814400979377, iteration: 33875
loss: 1.0104899406433105,grad_norm: 0.9999989829401689, iteration: 33876
loss: 1.0124772787094116,grad_norm: 0.805200981717855, iteration: 33877
loss: 0.9529387354850769,grad_norm: 0.8684005078991746, iteration: 33878
loss: 1.0079069137573242,grad_norm: 0.8639288364516584, iteration: 33879
loss: 0.99126797914505,grad_norm: 0.9999990136992865, iteration: 33880
loss: 1.0176444053649902,grad_norm: 0.9622335093865451, iteration: 33881
loss: 1.0236303806304932,grad_norm: 0.9999989929520602, iteration: 33882
loss: 1.0308140516281128,grad_norm: 0.9767497390123977, iteration: 33883
loss: 0.9823085069656372,grad_norm: 0.9999990677942866, iteration: 33884
loss: 1.0333868265151978,grad_norm: 0.9999994637388123, iteration: 33885
loss: 1.1109637022018433,grad_norm: 0.9999996624374796, iteration: 33886
loss: 0.9673125743865967,grad_norm: 0.9305271565911258, iteration: 33887
loss: 0.9948804378509521,grad_norm: 0.8658005055624061, iteration: 33888
loss: 1.0208570957183838,grad_norm: 0.8867329524266993, iteration: 33889
loss: 1.0243815183639526,grad_norm: 0.9999990527111959, iteration: 33890
loss: 1.014606237411499,grad_norm: 0.9666013134442303, iteration: 33891
loss: 1.021396279335022,grad_norm: 0.9132573447105375, iteration: 33892
loss: 1.0046550035476685,grad_norm: 0.9371812660242806, iteration: 33893
loss: 1.016810655593872,grad_norm: 0.9053936570643901, iteration: 33894
loss: 0.9945268630981445,grad_norm: 0.9585270092523643, iteration: 33895
loss: 1.0201853513717651,grad_norm: 0.9999989957128967, iteration: 33896
loss: 0.9990940690040588,grad_norm: 0.9999991237722154, iteration: 33897
loss: 0.9936406016349792,grad_norm: 0.9999989964255239, iteration: 33898
loss: 1.0065895318984985,grad_norm: 0.999999080766144, iteration: 33899
loss: 1.0160951614379883,grad_norm: 0.9999993408366331, iteration: 33900
loss: 1.0055580139160156,grad_norm: 0.9713308101595969, iteration: 33901
loss: 1.0088107585906982,grad_norm: 0.9999990935029776, iteration: 33902
loss: 1.035456895828247,grad_norm: 0.8751450169793547, iteration: 33903
loss: 1.0459778308868408,grad_norm: 0.9999998736777986, iteration: 33904
loss: 0.9690448641777039,grad_norm: 0.9202009058583149, iteration: 33905
loss: 1.0014877319335938,grad_norm: 0.9876895196461186, iteration: 33906
loss: 1.001038670539856,grad_norm: 0.9999991550622187, iteration: 33907
loss: 0.9699364304542542,grad_norm: 0.7254379903468187, iteration: 33908
loss: 0.9703454375267029,grad_norm: 0.9639204655259119, iteration: 33909
loss: 1.0313513278961182,grad_norm: 0.9398552411368646, iteration: 33910
loss: 0.959716796875,grad_norm: 0.7230573979352171, iteration: 33911
loss: 1.027683138847351,grad_norm: 0.9989909222209369, iteration: 33912
loss: 1.0378247499465942,grad_norm: 0.8292120121341384, iteration: 33913
loss: 0.9742070436477661,grad_norm: 0.9999990577595935, iteration: 33914
loss: 0.9605423808097839,grad_norm: 0.9999991750976374, iteration: 33915
loss: 1.0378834009170532,grad_norm: 0.94135404223988, iteration: 33916
loss: 1.0008971691131592,grad_norm: 0.9999990931097896, iteration: 33917
loss: 1.0430552959442139,grad_norm: 0.9999994473993048, iteration: 33918
loss: 1.0010193586349487,grad_norm: 0.9999990756421574, iteration: 33919
loss: 1.0035545825958252,grad_norm: 0.9769480874083704, iteration: 33920
loss: 0.989831805229187,grad_norm: 0.9954989259255435, iteration: 33921
loss: 0.959014356136322,grad_norm: 0.9409348158566271, iteration: 33922
loss: 0.9952678084373474,grad_norm: 0.8728733970976423, iteration: 33923
loss: 0.9729841351509094,grad_norm: 0.8918851143038596, iteration: 33924
loss: 1.0220996141433716,grad_norm: 0.9999997566579403, iteration: 33925
loss: 1.003828763961792,grad_norm: 0.880211745113143, iteration: 33926
loss: 0.9755011796951294,grad_norm: 0.9227198478333584, iteration: 33927
loss: 1.0247491598129272,grad_norm: 0.9710673116766866, iteration: 33928
loss: 1.0383045673370361,grad_norm: 0.9652296419995549, iteration: 33929
loss: 1.0123233795166016,grad_norm: 0.9494826231007364, iteration: 33930
loss: 1.0149307250976562,grad_norm: 0.9999990630334058, iteration: 33931
loss: 0.9640452265739441,grad_norm: 0.9818488061986067, iteration: 33932
loss: 1.0136425495147705,grad_norm: 0.9999991409822636, iteration: 33933
loss: 0.9644792675971985,grad_norm: 0.9999992062629058, iteration: 33934
loss: 1.006498098373413,grad_norm: 0.9234110727187482, iteration: 33935
loss: 1.0055303573608398,grad_norm: 0.8348703184999074, iteration: 33936
loss: 1.0271583795547485,grad_norm: 0.999999217841877, iteration: 33937
loss: 1.0550906658172607,grad_norm: 0.9999996570554138, iteration: 33938
loss: 1.0182620286941528,grad_norm: 0.7231154168865324, iteration: 33939
loss: 1.0074527263641357,grad_norm: 0.9999993393402464, iteration: 33940
loss: 1.0186606645584106,grad_norm: 0.9797259310367831, iteration: 33941
loss: 0.9967437386512756,grad_norm: 0.9227973853040189, iteration: 33942
loss: 1.0160671472549438,grad_norm: 0.9999990036797131, iteration: 33943
loss: 1.0119229555130005,grad_norm: 0.9999989346446337, iteration: 33944
loss: 0.9969387054443359,grad_norm: 0.9720629446944357, iteration: 33945
loss: 1.0015588998794556,grad_norm: 0.9999990774106549, iteration: 33946
loss: 0.9948768019676208,grad_norm: 0.9999990506396071, iteration: 33947
loss: 1.0350686311721802,grad_norm: 0.9759288050539927, iteration: 33948
loss: 0.9746854901313782,grad_norm: 0.9485066141651177, iteration: 33949
loss: 1.0261157751083374,grad_norm: 0.9999990811251183, iteration: 33950
loss: 1.0194554328918457,grad_norm: 0.9631204327651467, iteration: 33951
loss: 0.9891843795776367,grad_norm: 0.9999990541942706, iteration: 33952
loss: 0.9686072468757629,grad_norm: 0.9949661314574594, iteration: 33953
loss: 0.9598389863967896,grad_norm: 0.9557257034615685, iteration: 33954
loss: 0.9495452642440796,grad_norm: 0.8602692888101775, iteration: 33955
loss: 1.0280373096466064,grad_norm: 0.8578563707789856, iteration: 33956
loss: 1.0291922092437744,grad_norm: 0.999999210698332, iteration: 33957
loss: 1.0477417707443237,grad_norm: 0.8228522795288126, iteration: 33958
loss: 1.0094636678695679,grad_norm: 0.8946220017609595, iteration: 33959
loss: 0.9952940940856934,grad_norm: 0.921139818048957, iteration: 33960
loss: 1.039566159248352,grad_norm: 0.8335033979072619, iteration: 33961
loss: 0.9765989184379578,grad_norm: 0.9364148338796935, iteration: 33962
loss: 0.9988160133361816,grad_norm: 0.9999990367145579, iteration: 33963
loss: 1.0207769870758057,grad_norm: 0.9329655268679414, iteration: 33964
loss: 1.0039817094802856,grad_norm: 0.9401247139789849, iteration: 33965
loss: 1.014984130859375,grad_norm: 0.9724052295244786, iteration: 33966
loss: 1.0037423372268677,grad_norm: 0.9999990240877042, iteration: 33967
loss: 1.0077366828918457,grad_norm: 0.9854128604702471, iteration: 33968
loss: 1.0103211402893066,grad_norm: 0.9999989871124173, iteration: 33969
loss: 0.999706506729126,grad_norm: 0.9999991547861993, iteration: 33970
loss: 0.9964154362678528,grad_norm: 0.9538084495477652, iteration: 33971
loss: 1.0577681064605713,grad_norm: 0.9769628041219608, iteration: 33972
loss: 0.9796294569969177,grad_norm: 0.8894178393056817, iteration: 33973
loss: 1.0083948373794556,grad_norm: 0.7060257365188864, iteration: 33974
loss: 0.9951803684234619,grad_norm: 0.9999990038734422, iteration: 33975
loss: 1.0163543224334717,grad_norm: 0.9999990732043289, iteration: 33976
loss: 0.9757156372070312,grad_norm: 0.9194415111337952, iteration: 33977
loss: 0.9938486218452454,grad_norm: 0.8538583188325098, iteration: 33978
loss: 1.017727017402649,grad_norm: 0.8764946409575025, iteration: 33979
loss: 0.9804195761680603,grad_norm: 0.9999991712675985, iteration: 33980
loss: 1.024945616722107,grad_norm: 0.9999990549672566, iteration: 33981
loss: 0.986459493637085,grad_norm: 0.9999992681580482, iteration: 33982
loss: 1.021829605102539,grad_norm: 0.9999993067055318, iteration: 33983
loss: 1.0048737525939941,grad_norm: 0.9593478317167091, iteration: 33984
loss: 1.0245376825332642,grad_norm: 1.0000000431431053, iteration: 33985
loss: 1.0334510803222656,grad_norm: 0.978845810024618, iteration: 33986
loss: 0.987587571144104,grad_norm: 0.8782911554559681, iteration: 33987
loss: 0.9782029986381531,grad_norm: 0.9197557252948111, iteration: 33988
loss: 0.9995184540748596,grad_norm: 0.9533450606022458, iteration: 33989
loss: 1.030111312866211,grad_norm: 0.8795442188306508, iteration: 33990
loss: 1.0259931087493896,grad_norm: 0.8515096115392286, iteration: 33991
loss: 1.0021733045578003,grad_norm: 0.9999991656520525, iteration: 33992
loss: 0.9919236302375793,grad_norm: 0.8466262333027993, iteration: 33993
loss: 0.9776529669761658,grad_norm: 0.8373247127497512, iteration: 33994
loss: 1.0105152130126953,grad_norm: 0.884514514147275, iteration: 33995
loss: 1.0418031215667725,grad_norm: 0.9429743120197576, iteration: 33996
loss: 0.9803898334503174,grad_norm: 0.8239655608120178, iteration: 33997
loss: 1.0342134237289429,grad_norm: 0.9999995749913134, iteration: 33998
loss: 1.0467513799667358,grad_norm: 0.9872736512614108, iteration: 33999
loss: 1.0131686925888062,grad_norm: 0.9999992280927763, iteration: 34000
loss: 1.002356767654419,grad_norm: 0.9999991241146478, iteration: 34001
loss: 1.0184221267700195,grad_norm: 0.9999991535172257, iteration: 34002
loss: 1.023122787475586,grad_norm: 0.9999991278130608, iteration: 34003
loss: 0.9798010587692261,grad_norm: 0.999999086111533, iteration: 34004
loss: 0.9908163547515869,grad_norm: 0.9030021771967316, iteration: 34005
loss: 1.0005779266357422,grad_norm: 0.7394940449259991, iteration: 34006
loss: 0.9833816885948181,grad_norm: 0.8803991777629225, iteration: 34007
loss: 1.0287442207336426,grad_norm: 0.8657250546970329, iteration: 34008
loss: 1.0354381799697876,grad_norm: 0.9999991960429266, iteration: 34009
loss: 1.002503752708435,grad_norm: 0.9999992135582062, iteration: 34010
loss: 1.0516825914382935,grad_norm: 0.9999991676200768, iteration: 34011
loss: 0.9792754054069519,grad_norm: 0.9999991208537103, iteration: 34012
loss: 1.0170249938964844,grad_norm: 0.9999991503240189, iteration: 34013
loss: 0.9744012951850891,grad_norm: 0.9999991238569451, iteration: 34014
loss: 0.9987725615501404,grad_norm: 0.9867544914820524, iteration: 34015
loss: 1.0020489692687988,grad_norm: 0.900216418070828, iteration: 34016
loss: 1.0646532773971558,grad_norm: 0.999999313813106, iteration: 34017
loss: 0.9828835725784302,grad_norm: 0.8920841011200884, iteration: 34018
loss: 1.0030895471572876,grad_norm: 0.9999991705330965, iteration: 34019
loss: 0.9729083776473999,grad_norm: 0.9999992162449811, iteration: 34020
loss: 1.0124777555465698,grad_norm: 0.9999990255135992, iteration: 34021
loss: 0.9979814291000366,grad_norm: 0.9999992662988068, iteration: 34022
loss: 0.9695723652839661,grad_norm: 0.8884861158019938, iteration: 34023
loss: 0.9974737763404846,grad_norm: 0.9918893038351427, iteration: 34024
loss: 1.0194867849349976,grad_norm: 0.9999990571115482, iteration: 34025
loss: 1.0171277523040771,grad_norm: 0.9999990381238928, iteration: 34026
loss: 1.018592119216919,grad_norm: 0.8823595702788049, iteration: 34027
loss: 0.9839089512825012,grad_norm: 0.7745555126652546, iteration: 34028
loss: 0.9450225830078125,grad_norm: 0.9733523551706572, iteration: 34029
loss: 1.0070570707321167,grad_norm: 0.9999990741858987, iteration: 34030
loss: 0.9968090057373047,grad_norm: 0.9037406947274539, iteration: 34031
loss: 1.0016696453094482,grad_norm: 0.9772336081603478, iteration: 34032
loss: 0.9980670809745789,grad_norm: 0.9478750278872172, iteration: 34033
loss: 1.051381230354309,grad_norm: 0.9688180387526273, iteration: 34034
loss: 1.011584997177124,grad_norm: 0.8912488117021454, iteration: 34035
loss: 1.0388630628585815,grad_norm: 0.9999991618120316, iteration: 34036
loss: 1.0460593700408936,grad_norm: 0.9999995743015503, iteration: 34037
loss: 0.9639075994491577,grad_norm: 0.9999992452119348, iteration: 34038
loss: 0.992091178894043,grad_norm: 0.9121923757232676, iteration: 34039
loss: 0.9896973371505737,grad_norm: 0.93358361605842, iteration: 34040
loss: 0.9869765639305115,grad_norm: 0.8907001229404093, iteration: 34041
loss: 0.9848047494888306,grad_norm: 0.9999992789234917, iteration: 34042
loss: 0.9886770844459534,grad_norm: 0.9999990383192905, iteration: 34043
loss: 1.0313594341278076,grad_norm: 0.9999992227969373, iteration: 34044
loss: 0.973888099193573,grad_norm: 0.9420514313111817, iteration: 34045
loss: 1.064805269241333,grad_norm: 0.9999997282728338, iteration: 34046
loss: 0.9665147662162781,grad_norm: 0.9245961387052587, iteration: 34047
loss: 0.9699296951293945,grad_norm: 0.9999989830764054, iteration: 34048
loss: 1.0278042554855347,grad_norm: 0.9999990882113609, iteration: 34049
loss: 0.9794304966926575,grad_norm: 0.9632772406184171, iteration: 34050
loss: 1.0039409399032593,grad_norm: 0.9999991882557518, iteration: 34051
loss: 1.0146116018295288,grad_norm: 0.8780271595625727, iteration: 34052
loss: 1.055242657661438,grad_norm: 0.7581175339072641, iteration: 34053
loss: 0.9899002909660339,grad_norm: 0.9797896321886705, iteration: 34054
loss: 0.9807010889053345,grad_norm: 0.9221736502883637, iteration: 34055
loss: 0.9974251985549927,grad_norm: 0.9999990902954813, iteration: 34056
loss: 0.9788221716880798,grad_norm: 0.7563540453224955, iteration: 34057
loss: 1.0220822095870972,grad_norm: 0.9470211952814791, iteration: 34058
loss: 1.0579156875610352,grad_norm: 0.9999991075608202, iteration: 34059
loss: 0.983893096446991,grad_norm: 0.9999990196657167, iteration: 34060
loss: 1.009830117225647,grad_norm: 0.9999992388718383, iteration: 34061
loss: 1.0034226179122925,grad_norm: 0.9999990465161293, iteration: 34062
loss: 1.0568586587905884,grad_norm: 0.8773899539052448, iteration: 34063
loss: 1.0003467798233032,grad_norm: 0.9999992655074165, iteration: 34064
loss: 0.9989359974861145,grad_norm: 0.9970872241653148, iteration: 34065
loss: 1.0246281623840332,grad_norm: 0.7917622363231112, iteration: 34066
loss: 1.012999176979065,grad_norm: 0.9999990892227503, iteration: 34067
loss: 0.9876828789710999,grad_norm: 0.8832007376589165, iteration: 34068
loss: 0.9661368727684021,grad_norm: 0.8944513213972636, iteration: 34069
loss: 1.0017591714859009,grad_norm: 0.876025607815906, iteration: 34070
loss: 0.9588799476623535,grad_norm: 0.9999990615900598, iteration: 34071
loss: 1.0402765274047852,grad_norm: 0.9051818874300197, iteration: 34072
loss: 1.059317708015442,grad_norm: 0.999999693432905, iteration: 34073
loss: 0.9888644814491272,grad_norm: 0.9855051029004169, iteration: 34074
loss: 1.0248262882232666,grad_norm: 0.8942065581569788, iteration: 34075
loss: 0.9873360991477966,grad_norm: 0.8635881282619979, iteration: 34076
loss: 0.9915306568145752,grad_norm: 0.9999991534143252, iteration: 34077
loss: 1.0283766984939575,grad_norm: 0.9999989317078795, iteration: 34078
loss: 1.0260467529296875,grad_norm: 0.8151808237458059, iteration: 34079
loss: 1.0256900787353516,grad_norm: 0.8913809742573241, iteration: 34080
loss: 1.0228415727615356,grad_norm: 0.9716496407144962, iteration: 34081
loss: 1.004462480545044,grad_norm: 0.9721321163688643, iteration: 34082
loss: 0.9785705804824829,grad_norm: 0.9149106161914151, iteration: 34083
loss: 1.0312418937683105,grad_norm: 0.8318200175416175, iteration: 34084
loss: 0.9961772561073303,grad_norm: 0.8411320830244219, iteration: 34085
loss: 1.022773265838623,grad_norm: 0.9045785661561749, iteration: 34086
loss: 0.9974005222320557,grad_norm: 0.9999991609282657, iteration: 34087
loss: 1.0175788402557373,grad_norm: 0.9032040287807548, iteration: 34088
loss: 0.9659174680709839,grad_norm: 0.9333683704243468, iteration: 34089
loss: 1.0286763906478882,grad_norm: 0.8775336228988163, iteration: 34090
loss: 0.9989988207817078,grad_norm: 0.8485044820628909, iteration: 34091
loss: 0.9702919125556946,grad_norm: 0.945615570437873, iteration: 34092
loss: 1.026045322418213,grad_norm: 0.9999990168593832, iteration: 34093
loss: 1.0022932291030884,grad_norm: 0.8114585270882685, iteration: 34094
loss: 1.0424739122390747,grad_norm: 0.999999062299549, iteration: 34095
loss: 0.9641917943954468,grad_norm: 0.886812099633116, iteration: 34096
loss: 1.030958890914917,grad_norm: 0.9999991657613263, iteration: 34097
loss: 0.987545907497406,grad_norm: 0.9142608980729036, iteration: 34098
loss: 0.966220498085022,grad_norm: 0.9999990170967915, iteration: 34099
loss: 0.988561749458313,grad_norm: 0.913402934324868, iteration: 34100
loss: 0.9817773103713989,grad_norm: 0.9037533812354791, iteration: 34101
loss: 1.001902461051941,grad_norm: 0.8884346087962336, iteration: 34102
loss: 1.0242971181869507,grad_norm: 0.9999991638362863, iteration: 34103
loss: 1.0086243152618408,grad_norm: 0.941502456484333, iteration: 34104
loss: 0.9805290102958679,grad_norm: 0.9034252896668507, iteration: 34105
loss: 0.9838419556617737,grad_norm: 0.9999993668006655, iteration: 34106
loss: 1.0116623640060425,grad_norm: 0.949725676245042, iteration: 34107
loss: 0.9627932906150818,grad_norm: 0.9999991702032361, iteration: 34108
loss: 1.01755952835083,grad_norm: 0.9999991014732662, iteration: 34109
loss: 0.977543830871582,grad_norm: 0.9999990708551211, iteration: 34110
loss: 0.9943548440933228,grad_norm: 0.9999991559321368, iteration: 34111
loss: 1.011959433555603,grad_norm: 0.9492375951224535, iteration: 34112
loss: 1.0306466817855835,grad_norm: 0.8492819281762647, iteration: 34113
loss: 1.0210288763046265,grad_norm: 0.7789500121066789, iteration: 34114
loss: 1.0153868198394775,grad_norm: 0.8864555703906006, iteration: 34115
loss: 1.005277156829834,grad_norm: 0.8811174002307437, iteration: 34116
loss: 0.9932663440704346,grad_norm: 0.846549853930514, iteration: 34117
loss: 1.0214163064956665,grad_norm: 0.8917651180227285, iteration: 34118
loss: 1.0321646928787231,grad_norm: 0.9999990364596877, iteration: 34119
loss: 1.0189428329467773,grad_norm: 0.9999990430481243, iteration: 34120
loss: 1.0055094957351685,grad_norm: 0.999999074032136, iteration: 34121
loss: 1.0326534509658813,grad_norm: 0.9999990210665304, iteration: 34122
loss: 1.0134727954864502,grad_norm: 0.9389960134047499, iteration: 34123
loss: 1.0181056261062622,grad_norm: 0.8524052248365911, iteration: 34124
loss: 0.990928053855896,grad_norm: 0.9999990875628874, iteration: 34125
loss: 1.0009671449661255,grad_norm: 0.9551553255339205, iteration: 34126
loss: 0.9583410620689392,grad_norm: 0.7379649379180406, iteration: 34127
loss: 1.005153775215149,grad_norm: 0.8761587486410928, iteration: 34128
loss: 0.9451854228973389,grad_norm: 0.9999990728119726, iteration: 34129
loss: 1.0029176473617554,grad_norm: 0.9429171850773086, iteration: 34130
loss: 1.0152415037155151,grad_norm: 0.9763249582009407, iteration: 34131
loss: 0.9515407681465149,grad_norm: 0.8894520734351499, iteration: 34132
loss: 1.0273833274841309,grad_norm: 0.9999992045458171, iteration: 34133
loss: 1.0326597690582275,grad_norm: 0.9191814388387809, iteration: 34134
loss: 1.0005429983139038,grad_norm: 0.803117671153983, iteration: 34135
loss: 0.9958118200302124,grad_norm: 0.9999989870804709, iteration: 34136
loss: 1.00812828540802,grad_norm: 0.8058935196666308, iteration: 34137
loss: 1.0352047681808472,grad_norm: 0.9874296618144432, iteration: 34138
loss: 0.9653991460800171,grad_norm: 0.8925468607584117, iteration: 34139
loss: 0.9815673828125,grad_norm: 0.9399906629617659, iteration: 34140
loss: 0.9910920858383179,grad_norm: 0.9442284252775192, iteration: 34141
loss: 1.0273964405059814,grad_norm: 0.9999990935807052, iteration: 34142
loss: 1.019063949584961,grad_norm: 0.99999910429173, iteration: 34143
loss: 0.9989745020866394,grad_norm: 0.8956868386804293, iteration: 34144
loss: 0.9596238136291504,grad_norm: 0.9999992218219381, iteration: 34145
loss: 1.0321249961853027,grad_norm: 0.82366549756203, iteration: 34146
loss: 0.9600911736488342,grad_norm: 0.867375552236888, iteration: 34147
loss: 1.029529333114624,grad_norm: 0.8788264930457648, iteration: 34148
loss: 1.0013196468353271,grad_norm: 0.9999990670587124, iteration: 34149
loss: 0.990143895149231,grad_norm: 0.999999338819113, iteration: 34150
loss: 1.0127999782562256,grad_norm: 0.9999990659605763, iteration: 34151
loss: 1.0167988538742065,grad_norm: 0.9226363097928248, iteration: 34152
loss: 0.9961910843849182,grad_norm: 0.9244616326473647, iteration: 34153
loss: 1.0083712339401245,grad_norm: 0.9999989908204564, iteration: 34154
loss: 1.015258550643921,grad_norm: 0.7875944070675822, iteration: 34155
loss: 0.9845305681228638,grad_norm: 0.9999992550904014, iteration: 34156
loss: 1.0322786569595337,grad_norm: 0.9999990885617347, iteration: 34157
loss: 0.9804337024688721,grad_norm: 0.9999992471563396, iteration: 34158
loss: 1.0133534669876099,grad_norm: 0.9245545119517601, iteration: 34159
loss: 0.9850519299507141,grad_norm: 0.9391474734794021, iteration: 34160
loss: 1.0052570104599,grad_norm: 0.9999991116644701, iteration: 34161
loss: 1.0069681406021118,grad_norm: 0.9999991715396469, iteration: 34162
loss: 0.9954186081886292,grad_norm: 0.894899418953174, iteration: 34163
loss: 1.0157673358917236,grad_norm: 0.8299265778566979, iteration: 34164
loss: 0.9845905303955078,grad_norm: 0.9927986147799446, iteration: 34165
loss: 0.9820970892906189,grad_norm: 0.999999167816575, iteration: 34166
loss: 1.0369223356246948,grad_norm: 0.8769012936165574, iteration: 34167
loss: 0.9876736998558044,grad_norm: 0.9999991272006212, iteration: 34168
loss: 1.0024088621139526,grad_norm: 0.9162810153283033, iteration: 34169
loss: 1.0303386449813843,grad_norm: 0.9777497467608021, iteration: 34170
loss: 0.9956696033477783,grad_norm: 0.8980928979438022, iteration: 34171
loss: 1.0233254432678223,grad_norm: 0.9999989830397392, iteration: 34172
loss: 1.012829303741455,grad_norm: 0.999999279719992, iteration: 34173
loss: 1.0381115674972534,grad_norm: 0.999999137052821, iteration: 34174
loss: 0.9801039695739746,grad_norm: 0.9305831587690807, iteration: 34175
loss: 1.0470150709152222,grad_norm: 0.999999031683367, iteration: 34176
loss: 1.00772225856781,grad_norm: 0.9595694641958803, iteration: 34177
loss: 1.031538963317871,grad_norm: 0.9439528837316371, iteration: 34178
loss: 1.012281894683838,grad_norm: 0.8035714910364773, iteration: 34179
loss: 1.0156574249267578,grad_norm: 0.9999990437391207, iteration: 34180
loss: 0.9931163191795349,grad_norm: 0.9530388968272465, iteration: 34181
loss: 0.9734373688697815,grad_norm: 0.8769637837377167, iteration: 34182
loss: 0.979963481426239,grad_norm: 0.9881686893979353, iteration: 34183
loss: 1.0392206907272339,grad_norm: 0.9999991329020826, iteration: 34184
loss: 0.9857929944992065,grad_norm: 0.8623383606007153, iteration: 34185
loss: 1.0047093629837036,grad_norm: 0.9276776489692274, iteration: 34186
loss: 1.0048130750656128,grad_norm: 0.88873691425412, iteration: 34187
loss: 1.0098414421081543,grad_norm: 0.9999992321199903, iteration: 34188
loss: 1.0223313570022583,grad_norm: 0.9999991172894271, iteration: 34189
loss: 0.9974838495254517,grad_norm: 0.9999990091213468, iteration: 34190
loss: 0.9909560680389404,grad_norm: 0.9621362908876776, iteration: 34191
loss: 0.9976390600204468,grad_norm: 0.7119321364241931, iteration: 34192
loss: 1.012742519378662,grad_norm: 0.9837485037804724, iteration: 34193
loss: 0.9799920916557312,grad_norm: 0.917423432781231, iteration: 34194
loss: 1.0284925699234009,grad_norm: 0.8785805096711721, iteration: 34195
loss: 0.9913529753684998,grad_norm: 0.9999991245344929, iteration: 34196
loss: 0.9745345115661621,grad_norm: 0.9518450634799567, iteration: 34197
loss: 1.0030661821365356,grad_norm: 0.8796466306059941, iteration: 34198
loss: 0.9667112231254578,grad_norm: 0.9496615854841075, iteration: 34199
loss: 1.0050700902938843,grad_norm: 0.9999990478494295, iteration: 34200
loss: 0.9749610424041748,grad_norm: 0.9999988723239559, iteration: 34201
loss: 0.9781429767608643,grad_norm: 0.9585637223566202, iteration: 34202
loss: 0.98246169090271,grad_norm: 0.938881248081044, iteration: 34203
loss: 1.0256227254867554,grad_norm: 0.9733760082045292, iteration: 34204
loss: 0.976434588432312,grad_norm: 0.8422807323816538, iteration: 34205
loss: 0.9988266229629517,grad_norm: 0.9933641268358556, iteration: 34206
loss: 0.9814826250076294,grad_norm: 0.9514282347643832, iteration: 34207
loss: 1.0183496475219727,grad_norm: 0.9410403515992011, iteration: 34208
loss: 0.9935651421546936,grad_norm: 0.9999991125892445, iteration: 34209
loss: 0.9677225351333618,grad_norm: 0.8538069060838868, iteration: 34210
loss: 0.9681156277656555,grad_norm: 0.9929744708111771, iteration: 34211
loss: 1.0300052165985107,grad_norm: 0.9999990562746708, iteration: 34212
loss: 1.0107089281082153,grad_norm: 0.9736344684103252, iteration: 34213
loss: 1.009562611579895,grad_norm: 0.9690721453418518, iteration: 34214
loss: 0.9697007536888123,grad_norm: 0.7861498010514231, iteration: 34215
loss: 0.9920310974121094,grad_norm: 0.9032592806184339, iteration: 34216
loss: 1.025624394416809,grad_norm: 0.9999991102094742, iteration: 34217
loss: 0.9934137463569641,grad_norm: 0.9782321184541184, iteration: 34218
loss: 1.0384995937347412,grad_norm: 0.9999991123328255, iteration: 34219
loss: 1.0312514305114746,grad_norm: 0.8893286418200889, iteration: 34220
loss: 1.0296424627304077,grad_norm: 0.8723852677855466, iteration: 34221
loss: 0.9882367849349976,grad_norm: 0.9999990202273521, iteration: 34222
loss: 1.0117493867874146,grad_norm: 0.895133996330451, iteration: 34223
loss: 0.9964317083358765,grad_norm: 0.9999991879431082, iteration: 34224
loss: 0.9914025664329529,grad_norm: 0.9556061228860891, iteration: 34225
loss: 1.0060142278671265,grad_norm: 0.871361413647939, iteration: 34226
loss: 0.9731137752532959,grad_norm: 0.9999990294616088, iteration: 34227
loss: 0.9581856727600098,grad_norm: 0.9999990192864683, iteration: 34228
loss: 1.0205955505371094,grad_norm: 0.9684820974868827, iteration: 34229
loss: 1.0067470073699951,grad_norm: 0.8405822118933177, iteration: 34230
loss: 1.0235323905944824,grad_norm: 0.9999991180227408, iteration: 34231
loss: 1.0275154113769531,grad_norm: 0.9364080853767368, iteration: 34232
loss: 1.006941556930542,grad_norm: 0.9999991858888913, iteration: 34233
loss: 1.0203595161437988,grad_norm: 0.9809649244618519, iteration: 34234
loss: 0.9820908904075623,grad_norm: 0.9672420056968569, iteration: 34235
loss: 0.984035074710846,grad_norm: 0.9161977533751332, iteration: 34236
loss: 1.020235300064087,grad_norm: 0.8720416658191426, iteration: 34237
loss: 0.9666371941566467,grad_norm: 0.9887442006419526, iteration: 34238
loss: 0.9728593826293945,grad_norm: 0.9999994112926122, iteration: 34239
loss: 1.0196417570114136,grad_norm: 0.8955877047256642, iteration: 34240
loss: 1.0131423473358154,grad_norm: 0.7743955401854932, iteration: 34241
loss: 0.9963308572769165,grad_norm: 0.89791977015383, iteration: 34242
loss: 0.9812999963760376,grad_norm: 0.9067672648061127, iteration: 34243
loss: 1.0042905807495117,grad_norm: 0.9036791618291169, iteration: 34244
loss: 1.0014528036117554,grad_norm: 0.9486688882556454, iteration: 34245
loss: 1.0117095708847046,grad_norm: 0.9999990401595374, iteration: 34246
loss: 0.9752124547958374,grad_norm: 0.9999995524279186, iteration: 34247
loss: 0.9885717630386353,grad_norm: 0.9999991082212273, iteration: 34248
loss: 1.0131233930587769,grad_norm: 0.905204755856592, iteration: 34249
loss: 1.0331581830978394,grad_norm: 0.9999991106081396, iteration: 34250
loss: 1.031820297241211,grad_norm: 0.9587900241418986, iteration: 34251
loss: 1.0122401714324951,grad_norm: 0.999999098561875, iteration: 34252
loss: 0.9970174431800842,grad_norm: 0.9219562863524645, iteration: 34253
loss: 1.0134220123291016,grad_norm: 0.8651467931816209, iteration: 34254
loss: 1.0036499500274658,grad_norm: 0.851923070969178, iteration: 34255
loss: 1.029271125793457,grad_norm: 0.9787308939114032, iteration: 34256
loss: 1.0271546840667725,grad_norm: 0.8694715835703551, iteration: 34257
loss: 0.9830586314201355,grad_norm: 0.9999992200937916, iteration: 34258
loss: 1.052925944328308,grad_norm: 0.9999996201374503, iteration: 34259
loss: 1.0035585165023804,grad_norm: 0.9917718805553388, iteration: 34260
loss: 1.0042005777359009,grad_norm: 0.8042669937981539, iteration: 34261
loss: 1.012721300125122,grad_norm: 0.9999993165572544, iteration: 34262
loss: 1.0148646831512451,grad_norm: 0.9999996374740712, iteration: 34263
loss: 1.040381669998169,grad_norm: 0.9999992178508773, iteration: 34264
loss: 0.9982285499572754,grad_norm: 0.9999991748867576, iteration: 34265
loss: 1.02668297290802,grad_norm: 0.8843153806624954, iteration: 34266
loss: 1.0141336917877197,grad_norm: 0.9999990855706714, iteration: 34267
loss: 1.002790093421936,grad_norm: 0.938120002410791, iteration: 34268
loss: 1.0253214836120605,grad_norm: 0.959765238946626, iteration: 34269
loss: 0.9796577095985413,grad_norm: 0.9999992752177378, iteration: 34270
loss: 1.0040851831436157,grad_norm: 0.9999990826145768, iteration: 34271
loss: 0.9992454648017883,grad_norm: 0.9999997669071775, iteration: 34272
loss: 0.9793181419372559,grad_norm: 0.9022630644917264, iteration: 34273
loss: 1.014521837234497,grad_norm: 0.9999990323009821, iteration: 34274
loss: 0.9879242777824402,grad_norm: 0.9999991105529324, iteration: 34275
loss: 0.980440616607666,grad_norm: 0.9915920496103271, iteration: 34276
loss: 1.001495599746704,grad_norm: 0.9999993833061704, iteration: 34277
loss: 0.9539910554885864,grad_norm: 0.9669071646418601, iteration: 34278
loss: 1.0157694816589355,grad_norm: 0.936105791971662, iteration: 34279
loss: 0.988946259021759,grad_norm: 0.8816284706368026, iteration: 34280
loss: 1.0274039506912231,grad_norm: 0.9999991705879518, iteration: 34281
loss: 1.0361019372940063,grad_norm: 0.9999990694726693, iteration: 34282
loss: 1.0052598714828491,grad_norm: 0.9506474370295653, iteration: 34283
loss: 1.0162792205810547,grad_norm: 0.9999991404222094, iteration: 34284
loss: 1.0084902048110962,grad_norm: 0.9999991464720671, iteration: 34285
loss: 0.9997813105583191,grad_norm: 0.9687419423331849, iteration: 34286
loss: 0.991719663143158,grad_norm: 0.853931234614162, iteration: 34287
loss: 1.0000500679016113,grad_norm: 0.9337925379164355, iteration: 34288
loss: 1.025073766708374,grad_norm: 0.9999992704189793, iteration: 34289
loss: 0.9710655808448792,grad_norm: 0.8784968480926881, iteration: 34290
loss: 0.9829784035682678,grad_norm: 0.9537950615033661, iteration: 34291
loss: 1.0024694204330444,grad_norm: 0.9999990203723382, iteration: 34292
loss: 1.015063762664795,grad_norm: 0.9999991428698002, iteration: 34293
loss: 1.0245935916900635,grad_norm: 0.999998952711296, iteration: 34294
loss: 0.9671356081962585,grad_norm: 0.7746343273105406, iteration: 34295
loss: 1.0225383043289185,grad_norm: 0.9591823853859587, iteration: 34296
loss: 1.034454584121704,grad_norm: 0.9999993260284793, iteration: 34297
loss: 0.9989181160926819,grad_norm: 0.9860734583218199, iteration: 34298
loss: 1.0072375535964966,grad_norm: 0.8860494494792615, iteration: 34299
loss: 1.0615150928497314,grad_norm: 0.9687081857962829, iteration: 34300
loss: 1.00191330909729,grad_norm: 0.926656648866128, iteration: 34301
loss: 1.0191959142684937,grad_norm: 0.8840963472090974, iteration: 34302
loss: 1.0097019672393799,grad_norm: 0.9118520436750468, iteration: 34303
loss: 0.986253023147583,grad_norm: 0.9999991186486763, iteration: 34304
loss: 0.9974759817123413,grad_norm: 0.9839836147693963, iteration: 34305
loss: 1.0007116794586182,grad_norm: 0.9999991462990986, iteration: 34306
loss: 0.9925346374511719,grad_norm: 0.9999991210010674, iteration: 34307
loss: 0.983367383480072,grad_norm: 0.9448732628512582, iteration: 34308
loss: 0.981659471988678,grad_norm: 0.9630631173491091, iteration: 34309
loss: 0.9392502903938293,grad_norm: 0.983462804823692, iteration: 34310
loss: 1.0158727169036865,grad_norm: 0.9999991991642013, iteration: 34311
loss: 1.03184974193573,grad_norm: 0.9999991853510324, iteration: 34312
loss: 0.9963751435279846,grad_norm: 0.9999990624416907, iteration: 34313
loss: 1.009101152420044,grad_norm: 0.9995192162252459, iteration: 34314
loss: 0.9689818620681763,grad_norm: 0.8887557241238389, iteration: 34315
loss: 1.0097707509994507,grad_norm: 0.9554771913880404, iteration: 34316
loss: 1.0572106838226318,grad_norm: 0.9999994282004374, iteration: 34317
loss: 0.960835337638855,grad_norm: 0.8934855830086783, iteration: 34318
loss: 1.0166682004928589,grad_norm: 0.9999995858636714, iteration: 34319
loss: 0.9614899158477783,grad_norm: 0.7829040265012104, iteration: 34320
loss: 1.019601583480835,grad_norm: 0.9359234140310005, iteration: 34321
loss: 1.0125848054885864,grad_norm: 0.9999989609493944, iteration: 34322
loss: 1.034799575805664,grad_norm: 0.9280617228233445, iteration: 34323
loss: 1.0186710357666016,grad_norm: 0.8839855607190227, iteration: 34324
loss: 1.000327467918396,grad_norm: 0.9675107931170356, iteration: 34325
loss: 0.9940697550773621,grad_norm: 0.9813050052164596, iteration: 34326
loss: 1.0063575506210327,grad_norm: 0.9999994091972332, iteration: 34327
loss: 0.9970289468765259,grad_norm: 0.8494617930226672, iteration: 34328
loss: 0.9771950840950012,grad_norm: 0.9660224595667876, iteration: 34329
loss: 1.032076358795166,grad_norm: 0.9690651180072973, iteration: 34330
loss: 0.997473418712616,grad_norm: 0.9999998159956973, iteration: 34331
loss: 0.9922475218772888,grad_norm: 0.8268560278064075, iteration: 34332
loss: 0.9975225925445557,grad_norm: 0.9999992011327372, iteration: 34333
loss: 0.9835224151611328,grad_norm: 0.9999992647308312, iteration: 34334
loss: 1.0028505325317383,grad_norm: 0.999999193150103, iteration: 34335
loss: 0.9723906517028809,grad_norm: 0.9999990016979382, iteration: 34336
loss: 1.0462510585784912,grad_norm: 0.9751798988672927, iteration: 34337
loss: 1.1751784086227417,grad_norm: 0.9999998323718351, iteration: 34338
loss: 1.0256847143173218,grad_norm: 0.9999991961642207, iteration: 34339
loss: 0.9842290282249451,grad_norm: 0.874849791733492, iteration: 34340
loss: 1.0506678819656372,grad_norm: 0.9999991860293014, iteration: 34341
loss: 0.9810521602630615,grad_norm: 0.9999990715868144, iteration: 34342
loss: 0.9802683591842651,grad_norm: 0.9999992845261797, iteration: 34343
loss: 1.0403650999069214,grad_norm: 0.9499503986957543, iteration: 34344
loss: 1.0027450323104858,grad_norm: 0.9999991194884748, iteration: 34345
loss: 0.9826669096946716,grad_norm: 0.9999998099329046, iteration: 34346
loss: 0.9930292367935181,grad_norm: 0.9850639797123042, iteration: 34347
loss: 0.9772578477859497,grad_norm: 0.9999991127217336, iteration: 34348
loss: 1.0153838396072388,grad_norm: 0.9999991688874874, iteration: 34349
loss: 0.999035120010376,grad_norm: 0.9999990823284028, iteration: 34350
loss: 0.9689090251922607,grad_norm: 0.9999992501993836, iteration: 34351
loss: 1.0679563283920288,grad_norm: 0.9999996971860371, iteration: 34352
loss: 1.005142331123352,grad_norm: 0.9244314756070174, iteration: 34353
loss: 1.0194545984268188,grad_norm: 0.9999996541284938, iteration: 34354
loss: 1.0313136577606201,grad_norm: 0.9999996762133667, iteration: 34355
loss: 1.1372439861297607,grad_norm: 0.9999996483896745, iteration: 34356
loss: 0.9899752736091614,grad_norm: 0.9999993503297502, iteration: 34357
loss: 1.0598323345184326,grad_norm: 0.9999993387247424, iteration: 34358
loss: 1.0294413566589355,grad_norm: 0.9999992713414582, iteration: 34359
loss: 0.9652996063232422,grad_norm: 0.9999990755665843, iteration: 34360
loss: 1.0434426069259644,grad_norm: 0.9999994665935906, iteration: 34361
loss: 1.0314295291900635,grad_norm: 0.9928859710333547, iteration: 34362
loss: 1.0185656547546387,grad_norm: 0.9999992418142448, iteration: 34363
loss: 1.0400218963623047,grad_norm: 0.9999994229577804, iteration: 34364
loss: 1.0475704669952393,grad_norm: 0.9999994118316955, iteration: 34365
loss: 1.0103533267974854,grad_norm: 0.7655363605362271, iteration: 34366
loss: 0.9789100289344788,grad_norm: 0.9999991001580096, iteration: 34367
loss: 1.0015755891799927,grad_norm: 0.9999993878249773, iteration: 34368
loss: 0.9780913591384888,grad_norm: 0.9747635564140777, iteration: 34369
loss: 1.0119940042495728,grad_norm: 0.9999990126074891, iteration: 34370
loss: 0.9751981496810913,grad_norm: 0.9999992113691539, iteration: 34371
loss: 1.0114609003067017,grad_norm: 0.9999998803586239, iteration: 34372
loss: 1.0131535530090332,grad_norm: 0.9999990388815067, iteration: 34373
loss: 0.9877057075500488,grad_norm: 0.9245151792506118, iteration: 34374
loss: 1.0282615423202515,grad_norm: 0.9124622750628038, iteration: 34375
loss: 1.039509654045105,grad_norm: 0.9999998540064101, iteration: 34376
loss: 1.0876014232635498,grad_norm: 0.9999992583034576, iteration: 34377
loss: 0.9837135672569275,grad_norm: 0.9999992030618317, iteration: 34378
loss: 1.0120601654052734,grad_norm: 0.9999989976101097, iteration: 34379
loss: 1.1044169664382935,grad_norm: 0.9999996245450886, iteration: 34380
loss: 1.007789969444275,grad_norm: 0.9999993258520313, iteration: 34381
loss: 1.0072921514511108,grad_norm: 0.9999992507932712, iteration: 34382
loss: 0.9977078437805176,grad_norm: 0.9999995875897149, iteration: 34383
loss: 1.0219488143920898,grad_norm: 0.9799188667267663, iteration: 34384
loss: 1.012724757194519,grad_norm: 0.9999990468411257, iteration: 34385
loss: 1.0178906917572021,grad_norm: 0.9999999420312046, iteration: 34386
loss: 1.002747654914856,grad_norm: 0.999999076253962, iteration: 34387
loss: 1.0263382196426392,grad_norm: 0.9999991850570646, iteration: 34388
loss: 1.0078693628311157,grad_norm: 0.9899979438899857, iteration: 34389
loss: 1.0550493001937866,grad_norm: 0.9999996249692661, iteration: 34390
loss: 1.0134469270706177,grad_norm: 0.999999651876852, iteration: 34391
loss: 1.027144193649292,grad_norm: 0.999999564408082, iteration: 34392
loss: 0.9880565404891968,grad_norm: 0.9695663853636511, iteration: 34393
loss: 0.9890478849411011,grad_norm: 0.9805204323289367, iteration: 34394
loss: 1.0038591623306274,grad_norm: 0.999999075684977, iteration: 34395
loss: 0.9872523546218872,grad_norm: 0.9179572022662297, iteration: 34396
loss: 1.020982265472412,grad_norm: 0.968676949188373, iteration: 34397
loss: 1.008756160736084,grad_norm: 0.9999992760572671, iteration: 34398
loss: 1.0505977869033813,grad_norm: 0.9999991608941591, iteration: 34399
loss: 1.001570701599121,grad_norm: 0.9999991392162227, iteration: 34400
loss: 1.0258851051330566,grad_norm: 0.9999991430330806, iteration: 34401
loss: 1.016280174255371,grad_norm: 0.9999991240207249, iteration: 34402
loss: 1.0593620538711548,grad_norm: 0.9999995429746776, iteration: 34403
loss: 1.0269784927368164,grad_norm: 0.9999992577641353, iteration: 34404
loss: 0.9935094714164734,grad_norm: 0.7547256766648751, iteration: 34405
loss: 1.0303359031677246,grad_norm: 0.9999990986204568, iteration: 34406
loss: 0.9957745671272278,grad_norm: 0.9696485414216223, iteration: 34407
loss: 1.0289794206619263,grad_norm: 0.8671507721844334, iteration: 34408
loss: 1.007644534111023,grad_norm: 0.9999994306750284, iteration: 34409
loss: 1.017203450202942,grad_norm: 0.9999993221685135, iteration: 34410
loss: 1.0327810049057007,grad_norm: 0.9999991115100076, iteration: 34411
loss: 1.0030906200408936,grad_norm: 0.7966149609004313, iteration: 34412
loss: 1.026429295539856,grad_norm: 0.99999919797067, iteration: 34413
loss: 0.9801162481307983,grad_norm: 0.9233975881314335, iteration: 34414
loss: 1.0269925594329834,grad_norm: 0.9999991222256083, iteration: 34415
loss: 1.0076769590377808,grad_norm: 0.9999992787395149, iteration: 34416
loss: 1.0236870050430298,grad_norm: 0.9999990318630624, iteration: 34417
loss: 1.0158480405807495,grad_norm: 0.8415149957748057, iteration: 34418
loss: 1.0271079540252686,grad_norm: 0.9999991977622202, iteration: 34419
loss: 0.9775781035423279,grad_norm: 0.999999336534645, iteration: 34420
loss: 1.0162094831466675,grad_norm: 0.9559596956318991, iteration: 34421
loss: 1.01491117477417,grad_norm: 0.9303117926472655, iteration: 34422
loss: 1.0068303346633911,grad_norm: 0.9999992915987638, iteration: 34423
loss: 1.020004153251648,grad_norm: 0.9999990807764284, iteration: 34424
loss: 1.0790525674819946,grad_norm: 0.9999998982775808, iteration: 34425
loss: 0.9964643716812134,grad_norm: 0.9999990774687713, iteration: 34426
loss: 1.0055409669876099,grad_norm: 0.999999456635053, iteration: 34427
loss: 1.0090992450714111,grad_norm: 0.9999992100349568, iteration: 34428
loss: 0.9940727949142456,grad_norm: 0.9999991798072946, iteration: 34429
loss: 1.017228126525879,grad_norm: 0.9999994549763932, iteration: 34430
loss: 1.0525373220443726,grad_norm: 0.9999995807452702, iteration: 34431
loss: 1.0215238332748413,grad_norm: 0.9066057735738507, iteration: 34432
loss: 0.9887531995773315,grad_norm: 0.9999991328543361, iteration: 34433
loss: 0.9697301387786865,grad_norm: 0.9999990965268728, iteration: 34434
loss: 0.9751875400543213,grad_norm: 0.9999990023996828, iteration: 34435
loss: 1.044463038444519,grad_norm: 0.9999994389801589, iteration: 34436
loss: 1.0106983184814453,grad_norm: 0.8699020140434561, iteration: 34437
loss: 1.0255427360534668,grad_norm: 0.9999994202986919, iteration: 34438
loss: 1.0022861957550049,grad_norm: 0.9999990386779353, iteration: 34439
loss: 1.0232056379318237,grad_norm: 0.9978824138575654, iteration: 34440
loss: 1.003876805305481,grad_norm: 0.8675257010862649, iteration: 34441
loss: 0.9942946434020996,grad_norm: 0.9618361496066121, iteration: 34442
loss: 0.9674394726753235,grad_norm: 0.9999991648958678, iteration: 34443
loss: 0.9824013710021973,grad_norm: 0.9002522406722503, iteration: 34444
loss: 1.0060523748397827,grad_norm: 0.9015074643767119, iteration: 34445
loss: 0.9533892869949341,grad_norm: 0.998688371581091, iteration: 34446
loss: 1.0255542993545532,grad_norm: 0.9999992655374047, iteration: 34447
loss: 1.0197505950927734,grad_norm: 0.9333611262976526, iteration: 34448
loss: 0.9950205683708191,grad_norm: 0.9999992743840556, iteration: 34449
loss: 1.0440378189086914,grad_norm: 0.9999992141802687, iteration: 34450
loss: 1.0229713916778564,grad_norm: 0.9999991534932031, iteration: 34451
loss: 1.073718547821045,grad_norm: 0.9999992575354029, iteration: 34452
loss: 1.002901554107666,grad_norm: 0.9999991630098212, iteration: 34453
loss: 1.004860520362854,grad_norm: 0.9467223130739792, iteration: 34454
loss: 1.0264391899108887,grad_norm: 0.93641973148386, iteration: 34455
loss: 1.0223921537399292,grad_norm: 0.9999990323116444, iteration: 34456
loss: 1.0389320850372314,grad_norm: 0.9740641901100845, iteration: 34457
loss: 1.007170557975769,grad_norm: 0.9999991095858624, iteration: 34458
loss: 0.9767800569534302,grad_norm: 0.9999992246928368, iteration: 34459
loss: 0.9904499053955078,grad_norm: 0.9078455464027817, iteration: 34460
loss: 0.9932042360305786,grad_norm: 0.9999991646947274, iteration: 34461
loss: 0.9977560043334961,grad_norm: 0.9999992265348557, iteration: 34462
loss: 1.0145930051803589,grad_norm: 0.9999991263601528, iteration: 34463
loss: 0.9615456461906433,grad_norm: 0.9999990814945545, iteration: 34464
loss: 1.0224915742874146,grad_norm: 0.9999991073516974, iteration: 34465
loss: 1.029618501663208,grad_norm: 0.9999993273310847, iteration: 34466
loss: 1.0524953603744507,grad_norm: 0.999999711511954, iteration: 34467
loss: 1.0223802328109741,grad_norm: 0.9999994588835961, iteration: 34468
loss: 0.9800778031349182,grad_norm: 0.7771006874020759, iteration: 34469
loss: 1.0607913732528687,grad_norm: 0.999999413321244, iteration: 34470
loss: 1.1126474142074585,grad_norm: 0.999999888982889, iteration: 34471
loss: 1.006833791732788,grad_norm: 0.9413630908580064, iteration: 34472
loss: 1.0481551885604858,grad_norm: 0.9999993423724949, iteration: 34473
loss: 1.0226880311965942,grad_norm: 0.9999991713765022, iteration: 34474
loss: 0.9779521226882935,grad_norm: 0.9999990300303899, iteration: 34475
loss: 1.0220383405685425,grad_norm: 0.8361854613312248, iteration: 34476
loss: 0.9901748299598694,grad_norm: 0.9999990741695838, iteration: 34477
loss: 1.0545365810394287,grad_norm: 0.9652233920189275, iteration: 34478
loss: 1.0553048849105835,grad_norm: 0.9999997685121278, iteration: 34479
loss: 1.011104941368103,grad_norm: 0.955258131334483, iteration: 34480
loss: 1.030992031097412,grad_norm: 0.9999993333332812, iteration: 34481
loss: 1.0007059574127197,grad_norm: 0.9999991322348611, iteration: 34482
loss: 0.9979568719863892,grad_norm: 0.8293683256497068, iteration: 34483
loss: 1.1368387937545776,grad_norm: 0.9999997647310791, iteration: 34484
loss: 0.9859848022460938,grad_norm: 0.933137229057756, iteration: 34485
loss: 1.0267586708068848,grad_norm: 0.9299922764584283, iteration: 34486
loss: 1.0078175067901611,grad_norm: 0.9999991881360959, iteration: 34487
loss: 1.0252677202224731,grad_norm: 0.9478680150723477, iteration: 34488
loss: 1.008919596672058,grad_norm: 0.9999991859928121, iteration: 34489
loss: 0.9889529943466187,grad_norm: 0.9999992034296334, iteration: 34490
loss: 0.9541739821434021,grad_norm: 0.9526522254285436, iteration: 34491
loss: 1.0219882726669312,grad_norm: 0.898312932805456, iteration: 34492
loss: 0.9261543154716492,grad_norm: 0.9999991581568859, iteration: 34493
loss: 1.0984067916870117,grad_norm: 0.9999995641084949, iteration: 34494
loss: 1.0461021661758423,grad_norm: 0.9999990256152342, iteration: 34495
loss: 1.0026658773422241,grad_norm: 0.9053325815073802, iteration: 34496
loss: 1.0322600603103638,grad_norm: 0.9999996483515765, iteration: 34497
loss: 1.0243159532546997,grad_norm: 0.9999993327291048, iteration: 34498
loss: 1.016027808189392,grad_norm: 0.817458769491308, iteration: 34499
loss: 1.0080087184906006,grad_norm: 0.9999991515986931, iteration: 34500
loss: 0.9896129369735718,grad_norm: 0.9999990672257628, iteration: 34501
loss: 1.032657265663147,grad_norm: 0.9999993531820255, iteration: 34502
loss: 1.0170155763626099,grad_norm: 0.999998997862112, iteration: 34503
loss: 0.9879666566848755,grad_norm: 0.9785743209087716, iteration: 34504
loss: 1.0212639570236206,grad_norm: 0.874585947787547, iteration: 34505
loss: 1.0006136894226074,grad_norm: 0.933275287162678, iteration: 34506
loss: 1.0160857439041138,grad_norm: 0.9219002625089519, iteration: 34507
loss: 1.0019792318344116,grad_norm: 0.8297428960401662, iteration: 34508
loss: 1.015933632850647,grad_norm: 0.9999993957212668, iteration: 34509
loss: 1.0407990217208862,grad_norm: 0.9999991749813564, iteration: 34510
loss: 1.0548678636550903,grad_norm: 0.9999991751250578, iteration: 34511
loss: 1.0051920413970947,grad_norm: 0.9999994008508908, iteration: 34512
loss: 0.988997757434845,grad_norm: 0.9999992318989432, iteration: 34513
loss: 1.0089764595031738,grad_norm: 0.9999991273503779, iteration: 34514
loss: 0.970506489276886,grad_norm: 0.9871520423012995, iteration: 34515
loss: 0.9895312786102295,grad_norm: 0.9753421646889826, iteration: 34516
loss: 1.0537638664245605,grad_norm: 0.9999994704518526, iteration: 34517
loss: 1.0336177349090576,grad_norm: 0.9999991780983687, iteration: 34518
loss: 1.0348975658416748,grad_norm: 0.9999996355932051, iteration: 34519
loss: 1.0022916793823242,grad_norm: 0.9999992057605057, iteration: 34520
loss: 1.0041327476501465,grad_norm: 0.9999991285173423, iteration: 34521
loss: 1.0006405115127563,grad_norm: 0.9456245557392796, iteration: 34522
loss: 0.9886099696159363,grad_norm: 0.9999991634055704, iteration: 34523
loss: 1.0506713390350342,grad_norm: 0.9999991506266741, iteration: 34524
loss: 1.0146267414093018,grad_norm: 0.9999991395887463, iteration: 34525
loss: 0.9751987457275391,grad_norm: 0.9999990295049657, iteration: 34526
loss: 1.0274509191513062,grad_norm: 0.9999995137461076, iteration: 34527
loss: 1.0124608278274536,grad_norm: 0.9999991250883097, iteration: 34528
loss: 0.986846387386322,grad_norm: 0.8597111406025045, iteration: 34529
loss: 0.9934147000312805,grad_norm: 0.882845082262318, iteration: 34530
loss: 1.045345425605774,grad_norm: 0.9999998244999841, iteration: 34531
loss: 0.9970871806144714,grad_norm: 0.9999990633714853, iteration: 34532
loss: 1.002074122428894,grad_norm: 0.8491861617790611, iteration: 34533
loss: 1.025986909866333,grad_norm: 0.8911410607004323, iteration: 34534
loss: 1.0435168743133545,grad_norm: 0.9999991214954296, iteration: 34535
loss: 0.9991899728775024,grad_norm: 0.9999990210483506, iteration: 34536
loss: 1.0008800029754639,grad_norm: 0.8622322482768713, iteration: 34537
loss: 1.0181500911712646,grad_norm: 0.9999991793949914, iteration: 34538
loss: 1.023811936378479,grad_norm: 0.9031347939734007, iteration: 34539
loss: 1.0023820400238037,grad_norm: 0.8260794696046336, iteration: 34540
loss: 1.021431565284729,grad_norm: 0.9354708110416354, iteration: 34541
loss: 0.9918736815452576,grad_norm: 0.999999161344215, iteration: 34542
loss: 1.0118987560272217,grad_norm: 0.9999992010677019, iteration: 34543
loss: 1.0517241954803467,grad_norm: 0.9999991302411343, iteration: 34544
loss: 0.946612536907196,grad_norm: 0.9999990687150245, iteration: 34545
loss: 1.0242516994476318,grad_norm: 0.9473532533217548, iteration: 34546
loss: 0.9690693616867065,grad_norm: 0.8646070614927904, iteration: 34547
loss: 1.0250647068023682,grad_norm: 0.9999991152890756, iteration: 34548
loss: 0.9860514402389526,grad_norm: 0.9999991921412522, iteration: 34549
loss: 0.9745660424232483,grad_norm: 0.9999992691574264, iteration: 34550
loss: 0.9775285720825195,grad_norm: 0.9999994461674507, iteration: 34551
loss: 1.0041019916534424,grad_norm: 0.9999991993790863, iteration: 34552
loss: 0.9990770220756531,grad_norm: 0.9035015911956403, iteration: 34553
loss: 0.9941917061805725,grad_norm: 0.9999989691639878, iteration: 34554
loss: 0.9820107221603394,grad_norm: 0.8189408881192414, iteration: 34555
loss: 1.0023993253707886,grad_norm: 0.9504815180393099, iteration: 34556
loss: 0.9735116958618164,grad_norm: 0.9936204861175193, iteration: 34557
loss: 1.0085654258728027,grad_norm: 0.9855670734315495, iteration: 34558
loss: 1.0156073570251465,grad_norm: 0.9999995780999132, iteration: 34559
loss: 0.9852442741394043,grad_norm: 0.9550399546907337, iteration: 34560
loss: 0.9940763115882874,grad_norm: 0.7745565040404465, iteration: 34561
loss: 0.973476231098175,grad_norm: 0.8177833101057419, iteration: 34562
loss: 0.9993125796318054,grad_norm: 0.7882774038636056, iteration: 34563
loss: 1.0410568714141846,grad_norm: 0.9999994479679956, iteration: 34564
loss: 1.0099586248397827,grad_norm: 0.999999040145938, iteration: 34565
loss: 0.9645476937294006,grad_norm: 0.9696850756872946, iteration: 34566
loss: 1.0270805358886719,grad_norm: 0.933263209602448, iteration: 34567
loss: 1.0402497053146362,grad_norm: 0.999999100873035, iteration: 34568
loss: 1.0217124223709106,grad_norm: 0.9999991362139945, iteration: 34569
loss: 1.0122936964035034,grad_norm: 0.9999991196682013, iteration: 34570
loss: 1.0235267877578735,grad_norm: 0.9999989649006937, iteration: 34571
loss: 1.0112780332565308,grad_norm: 0.9156534872450464, iteration: 34572
loss: 1.000148057937622,grad_norm: 0.9687665432657692, iteration: 34573
loss: 1.014730453491211,grad_norm: 0.8981070725373652, iteration: 34574
loss: 1.0096733570098877,grad_norm: 0.9999990248884618, iteration: 34575
loss: 1.0132462978363037,grad_norm: 0.9999993052503399, iteration: 34576
loss: 1.0424938201904297,grad_norm: 0.9999990564722864, iteration: 34577
loss: 1.0010994672775269,grad_norm: 0.8836948134505662, iteration: 34578
loss: 1.0522093772888184,grad_norm: 0.9733858464055786, iteration: 34579
loss: 1.035474181175232,grad_norm: 0.9922795246348475, iteration: 34580
loss: 0.9419428706169128,grad_norm: 0.9999991008359795, iteration: 34581
loss: 1.0374846458435059,grad_norm: 0.9999991994869464, iteration: 34582
loss: 0.9891671538352966,grad_norm: 0.9999992549868095, iteration: 34583
loss: 0.9931783676147461,grad_norm: 0.999999225686893, iteration: 34584
loss: 1.0225698947906494,grad_norm: 0.9999992145025072, iteration: 34585
loss: 1.010175108909607,grad_norm: 0.9999990329283717, iteration: 34586
loss: 1.0144044160842896,grad_norm: 0.99999925205066, iteration: 34587
loss: 1.0490680932998657,grad_norm: 0.856758059686439, iteration: 34588
loss: 0.9661742448806763,grad_norm: 0.8333616011481301, iteration: 34589
loss: 1.0044552087783813,grad_norm: 0.9185864886443961, iteration: 34590
loss: 1.0063115358352661,grad_norm: 0.927271339518441, iteration: 34591
loss: 0.9947423338890076,grad_norm: 0.9409461182802163, iteration: 34592
loss: 0.9934830069541931,grad_norm: 0.9999992247176663, iteration: 34593
loss: 0.9854817986488342,grad_norm: 0.9999996066099425, iteration: 34594
loss: 0.9741972088813782,grad_norm: 0.958148475251436, iteration: 34595
loss: 0.9774575233459473,grad_norm: 0.9999991247847595, iteration: 34596
loss: 1.004105567932129,grad_norm: 0.8200464067899059, iteration: 34597
loss: 1.0116344690322876,grad_norm: 0.9721483953053436, iteration: 34598
loss: 0.9721165895462036,grad_norm: 0.9999991035399701, iteration: 34599
loss: 0.9982931613922119,grad_norm: 0.9355132048343651, iteration: 34600
loss: 0.9907382726669312,grad_norm: 0.9963958878492494, iteration: 34601
loss: 0.9839943647384644,grad_norm: 0.9950828997999064, iteration: 34602
loss: 0.9890324473381042,grad_norm: 0.9010779734154816, iteration: 34603
loss: 0.9858199954032898,grad_norm: 0.9999991316216129, iteration: 34604
loss: 0.9335651397705078,grad_norm: 0.9525624198569326, iteration: 34605
loss: 1.0067261457443237,grad_norm: 0.9334031365685322, iteration: 34606
loss: 0.9849211573600769,grad_norm: 0.9973473156021805, iteration: 34607
loss: 0.994720458984375,grad_norm: 0.9999990162882235, iteration: 34608
loss: 0.985251247882843,grad_norm: 0.8635322388937094, iteration: 34609
loss: 1.0235706567764282,grad_norm: 0.9947698938172445, iteration: 34610
loss: 1.051762342453003,grad_norm: 0.9999994210552902, iteration: 34611
loss: 1.0538723468780518,grad_norm: 0.9999991142830849, iteration: 34612
loss: 0.9927694201469421,grad_norm: 0.960437248928674, iteration: 34613
loss: 1.00089430809021,grad_norm: 0.989046023455956, iteration: 34614
loss: 1.0036975145339966,grad_norm: 0.7531835652428345, iteration: 34615
loss: 1.0182327032089233,grad_norm: 0.9438127009131945, iteration: 34616
loss: 1.0173516273498535,grad_norm: 0.99999914599138, iteration: 34617
loss: 1.0006262063980103,grad_norm: 0.9999990225696399, iteration: 34618
loss: 0.9880269765853882,grad_norm: 0.8667526352738384, iteration: 34619
loss: 1.0093886852264404,grad_norm: 0.8961656680963144, iteration: 34620
loss: 1.0181478261947632,grad_norm: 0.7999933011621656, iteration: 34621
loss: 1.0208485126495361,grad_norm: 0.8619133504690825, iteration: 34622
loss: 1.0061930418014526,grad_norm: 0.986152584981824, iteration: 34623
loss: 1.0201308727264404,grad_norm: 0.9999991175952798, iteration: 34624
loss: 1.035272479057312,grad_norm: 0.962916579310272, iteration: 34625
loss: 0.9963850378990173,grad_norm: 0.9989255629293703, iteration: 34626
loss: 0.988834023475647,grad_norm: 0.9999990578344526, iteration: 34627
loss: 0.9914316534996033,grad_norm: 0.9061039161967577, iteration: 34628
loss: 1.071102499961853,grad_norm: 0.9999997246761284, iteration: 34629
loss: 0.9896019101142883,grad_norm: 0.8806891390416282, iteration: 34630
loss: 1.0503953695297241,grad_norm: 0.8688052424229485, iteration: 34631
loss: 1.0365939140319824,grad_norm: 0.9328693440283748, iteration: 34632
loss: 0.9832635521888733,grad_norm: 0.9999991015301529, iteration: 34633
loss: 0.996678352355957,grad_norm: 0.8576548243868493, iteration: 34634
loss: 1.0046943426132202,grad_norm: 0.9675024251983442, iteration: 34635
loss: 1.0242774486541748,grad_norm: 0.9999992477409041, iteration: 34636
loss: 1.0268326997756958,grad_norm: 0.9999991004100349, iteration: 34637
loss: 0.9842659831047058,grad_norm: 0.8714931575786418, iteration: 34638
loss: 0.994484543800354,grad_norm: 0.9999991805232572, iteration: 34639
loss: 1.0250715017318726,grad_norm: 0.9999991321446428, iteration: 34640
loss: 0.97467440366745,grad_norm: 0.9692708943255982, iteration: 34641
loss: 1.0053595304489136,grad_norm: 0.8737516617866966, iteration: 34642
loss: 0.9999524354934692,grad_norm: 0.91470141942153, iteration: 34643
loss: 0.9747790098190308,grad_norm: 0.9999990811930424, iteration: 34644
loss: 1.0110251903533936,grad_norm: 0.9999991740434551, iteration: 34645
loss: 0.9977267384529114,grad_norm: 0.9999990139781447, iteration: 34646
loss: 0.9827436208724976,grad_norm: 0.9075767656369318, iteration: 34647
loss: 1.0156316757202148,grad_norm: 0.8350110142944515, iteration: 34648
loss: 0.9823927879333496,grad_norm: 0.9999991156021514, iteration: 34649
loss: 1.0455021858215332,grad_norm: 0.9357009798371584, iteration: 34650
loss: 1.0235329866409302,grad_norm: 0.9037507828037881, iteration: 34651
loss: 1.0273298025131226,grad_norm: 0.9999988664153254, iteration: 34652
loss: 1.0013097524642944,grad_norm: 0.9302827926536777, iteration: 34653
loss: 1.0012387037277222,grad_norm: 0.9999993060903777, iteration: 34654
loss: 1.1237863302230835,grad_norm: 0.9999997072959458, iteration: 34655
loss: 1.0013216733932495,grad_norm: 0.9170461172694999, iteration: 34656
loss: 1.0042829513549805,grad_norm: 0.9677823745057322, iteration: 34657
loss: 0.9698503017425537,grad_norm: 0.9999990834654927, iteration: 34658
loss: 1.0221291780471802,grad_norm: 0.9036424552396829, iteration: 34659
loss: 1.0208934545516968,grad_norm: 0.7893961142681599, iteration: 34660
loss: 0.9978847503662109,grad_norm: 0.870586762255595, iteration: 34661
loss: 1.0014641284942627,grad_norm: 0.9400871877908108, iteration: 34662
loss: 0.9853309988975525,grad_norm: 0.9684177052151349, iteration: 34663
loss: 1.0223191976547241,grad_norm: 0.866034723055936, iteration: 34664
loss: 1.002137303352356,grad_norm: 0.8852111040883908, iteration: 34665
loss: 0.9766921401023865,grad_norm: 0.940875065373064, iteration: 34666
loss: 1.001557469367981,grad_norm: 0.8893078518165264, iteration: 34667
loss: 1.013959527015686,grad_norm: 0.9999992736002444, iteration: 34668
loss: 1.0177901983261108,grad_norm: 0.9452125241374701, iteration: 34669
loss: 0.9785548448562622,grad_norm: 0.9393367905601402, iteration: 34670
loss: 0.9959008693695068,grad_norm: 0.9999991827179622, iteration: 34671
loss: 1.043933629989624,grad_norm: 0.999999090468575, iteration: 34672
loss: 1.0072178840637207,grad_norm: 0.965075731688908, iteration: 34673
loss: 0.9938563108444214,grad_norm: 0.7287060274610796, iteration: 34674
loss: 1.042830228805542,grad_norm: 0.8720320418755709, iteration: 34675
loss: 0.9921112060546875,grad_norm: 0.999999108039168, iteration: 34676
loss: 1.0369006395339966,grad_norm: 0.9499612436215504, iteration: 34677
loss: 0.9992006421089172,grad_norm: 0.9999990938799707, iteration: 34678
loss: 0.9937894940376282,grad_norm: 0.9107140631382455, iteration: 34679
loss: 0.9839812517166138,grad_norm: 0.9999988948653149, iteration: 34680
loss: 0.9889000058174133,grad_norm: 0.9872185432795912, iteration: 34681
loss: 0.9702497720718384,grad_norm: 0.9999991607403966, iteration: 34682
loss: 0.9908932447433472,grad_norm: 0.9460895828993687, iteration: 34683
loss: 1.0299983024597168,grad_norm: 0.9999990591155538, iteration: 34684
loss: 0.9674124121665955,grad_norm: 0.9258037283041372, iteration: 34685
loss: 1.0227878093719482,grad_norm: 0.8747111595862039, iteration: 34686
loss: 1.0285695791244507,grad_norm: 0.9999991597572392, iteration: 34687
loss: 1.0251015424728394,grad_norm: 0.8488673441545984, iteration: 34688
loss: 0.9633176922798157,grad_norm: 0.9513973907959526, iteration: 34689
loss: 0.9872574210166931,grad_norm: 0.9999992593932518, iteration: 34690
loss: 0.9618271589279175,grad_norm: 0.9522201666069187, iteration: 34691
loss: 1.046743631362915,grad_norm: 0.9460556095428168, iteration: 34692
loss: 1.0027040243148804,grad_norm: 0.9183567077836522, iteration: 34693
loss: 1.0062637329101562,grad_norm: 0.9482441473741379, iteration: 34694
loss: 0.9878697395324707,grad_norm: 0.9999992141514289, iteration: 34695
loss: 0.9703333377838135,grad_norm: 0.9999991157013743, iteration: 34696
loss: 0.978451669216156,grad_norm: 0.9999990812755772, iteration: 34697
loss: 0.9503856897354126,grad_norm: 0.8431923360425806, iteration: 34698
loss: 1.0210713148117065,grad_norm: 0.9494844404958445, iteration: 34699
loss: 1.0179941654205322,grad_norm: 0.9999990946987161, iteration: 34700
loss: 1.0069959163665771,grad_norm: 0.9999991911390336, iteration: 34701
loss: 1.0151481628417969,grad_norm: 0.8355757109281913, iteration: 34702
loss: 1.026231288909912,grad_norm: 0.9999992434673107, iteration: 34703
loss: 1.0274795293807983,grad_norm: 0.9332214156724096, iteration: 34704
loss: 0.9932332038879395,grad_norm: 0.9515611422312231, iteration: 34705
loss: 0.998948335647583,grad_norm: 0.9931047155611692, iteration: 34706
loss: 1.0242763757705688,grad_norm: 0.9999990482925728, iteration: 34707
loss: 0.9984568953514099,grad_norm: 0.9534832197753804, iteration: 34708
loss: 0.9930011034011841,grad_norm: 0.9484532352064787, iteration: 34709
loss: 0.9671608209609985,grad_norm: 0.8794924759297887, iteration: 34710
loss: 1.0112498998641968,grad_norm: 0.8812493165329023, iteration: 34711
loss: 1.0159578323364258,grad_norm: 0.855199481433899, iteration: 34712
loss: 0.9974642395973206,grad_norm: 0.9436474904070956, iteration: 34713
loss: 1.0096930265426636,grad_norm: 0.9999991991010392, iteration: 34714
loss: 1.0230555534362793,grad_norm: 0.998324576238862, iteration: 34715
loss: 1.0489505529403687,grad_norm: 0.999999329483139, iteration: 34716
loss: 1.0385805368423462,grad_norm: 0.9733583565614279, iteration: 34717
loss: 1.0189764499664307,grad_norm: 0.8052443438417725, iteration: 34718
loss: 0.9928631782531738,grad_norm: 0.999999353222569, iteration: 34719
loss: 0.9907884001731873,grad_norm: 0.8629469203193479, iteration: 34720
loss: 1.0182839632034302,grad_norm: 0.8499166968040175, iteration: 34721
loss: 1.0081923007965088,grad_norm: 0.8349608663009321, iteration: 34722
loss: 1.040846347808838,grad_norm: 0.9999991295716916, iteration: 34723
loss: 1.0268627405166626,grad_norm: 0.9676173002359705, iteration: 34724
loss: 1.0109018087387085,grad_norm: 0.9999994887858458, iteration: 34725
loss: 0.9965320825576782,grad_norm: 0.9248715823281783, iteration: 34726
loss: 0.9826447367668152,grad_norm: 0.8634910360677877, iteration: 34727
loss: 0.9907309412956238,grad_norm: 0.8428748116098995, iteration: 34728
loss: 1.024354100227356,grad_norm: 0.9453296067793491, iteration: 34729
loss: 0.9821218252182007,grad_norm: 0.9999991767211506, iteration: 34730
loss: 0.9917486310005188,grad_norm: 0.9698591970986832, iteration: 34731
loss: 0.9969098567962646,grad_norm: 0.9999991938265355, iteration: 34732
loss: 0.9806976318359375,grad_norm: 0.8787758949146679, iteration: 34733
loss: 0.9647496938705444,grad_norm: 0.9999991197794454, iteration: 34734
loss: 1.0098673105239868,grad_norm: 0.8523090834748643, iteration: 34735
loss: 1.040831208229065,grad_norm: 0.9999991365960983, iteration: 34736
loss: 1.0338717699050903,grad_norm: 0.9366653550425313, iteration: 34737
loss: 1.0122902393341064,grad_norm: 0.9999993907818997, iteration: 34738
loss: 0.995663046836853,grad_norm: 0.9999991683199758, iteration: 34739
loss: 0.9979038834571838,grad_norm: 0.9999989575120717, iteration: 34740
loss: 0.9969499111175537,grad_norm: 0.9202988134116278, iteration: 34741
loss: 0.9716184735298157,grad_norm: 0.9999990657447198, iteration: 34742
loss: 0.967985212802887,grad_norm: 0.9967768797016403, iteration: 34743
loss: 1.011212706565857,grad_norm: 0.9842512233885384, iteration: 34744
loss: 0.9783940315246582,grad_norm: 0.9061563875897463, iteration: 34745
loss: 1.0508021116256714,grad_norm: 0.9999991128939166, iteration: 34746
loss: 1.0514291524887085,grad_norm: 0.9999991248812542, iteration: 34747
loss: 1.0092954635620117,grad_norm: 0.8276875378658403, iteration: 34748
loss: 0.9957770705223083,grad_norm: 0.9649008123690943, iteration: 34749
loss: 0.9794684648513794,grad_norm: 0.9271455018009547, iteration: 34750
loss: 0.9842086434364319,grad_norm: 0.9926301950788183, iteration: 34751
loss: 0.986192524433136,grad_norm: 0.9484088583360591, iteration: 34752
loss: 1.0309003591537476,grad_norm: 0.9999989992254603, iteration: 34753
loss: 1.0027371644973755,grad_norm: 0.999998890173347, iteration: 34754
loss: 1.0487322807312012,grad_norm: 0.999999135601085, iteration: 34755
loss: 1.016257405281067,grad_norm: 0.9999992234875684, iteration: 34756
loss: 1.0113836526870728,grad_norm: 0.9999994115228598, iteration: 34757
loss: 1.0390607118606567,grad_norm: 0.9260566256378749, iteration: 34758
loss: 0.9913017153739929,grad_norm: 0.9999991234253993, iteration: 34759
loss: 0.9964756965637207,grad_norm: 0.8648405922696399, iteration: 34760
loss: 1.0194987058639526,grad_norm: 0.8473437280600371, iteration: 34761
loss: 1.0179860591888428,grad_norm: 0.965185843923686, iteration: 34762
loss: 1.0474281311035156,grad_norm: 0.9999991305273173, iteration: 34763
loss: 1.0074623823165894,grad_norm: 0.8807609603269344, iteration: 34764
loss: 1.021685242652893,grad_norm: 0.9999992343333103, iteration: 34765
loss: 1.005199670791626,grad_norm: 0.9999989797745771, iteration: 34766
loss: 0.9922357201576233,grad_norm: 0.9669209461137118, iteration: 34767
loss: 1.013637900352478,grad_norm: 0.8935418176071672, iteration: 34768
loss: 1.007152795791626,grad_norm: 0.9999990875583183, iteration: 34769
loss: 1.009078025817871,grad_norm: 0.8396231217605759, iteration: 34770
loss: 0.9781268835067749,grad_norm: 0.9999991169680736, iteration: 34771
loss: 0.9894228577613831,grad_norm: 0.9999992742479513, iteration: 34772
loss: 1.0635284185409546,grad_norm: 0.8894390918586327, iteration: 34773
loss: 1.0265390872955322,grad_norm: 0.9809073361083247, iteration: 34774
loss: 0.9617059826850891,grad_norm: 0.8833797519305788, iteration: 34775
loss: 0.9792355895042419,grad_norm: 0.9413900872874121, iteration: 34776
loss: 0.9896436333656311,grad_norm: 0.8667981043465625, iteration: 34777
loss: 1.0317375659942627,grad_norm: 0.8964135270224974, iteration: 34778
loss: 0.9534898400306702,grad_norm: 0.9810938920650207, iteration: 34779
loss: 1.0173007249832153,grad_norm: 0.9999991103822475, iteration: 34780
loss: 1.0358147621154785,grad_norm: 0.9286720241735347, iteration: 34781
loss: 1.0181747674942017,grad_norm: 0.9999989645282352, iteration: 34782
loss: 0.9958001375198364,grad_norm: 0.9709406546449776, iteration: 34783
loss: 0.9624842405319214,grad_norm: 0.931359121208204, iteration: 34784
loss: 0.9963778257369995,grad_norm: 0.9999990048161589, iteration: 34785
loss: 1.0192168951034546,grad_norm: 0.9440457030714104, iteration: 34786
loss: 1.0106171369552612,grad_norm: 0.9846274992213263, iteration: 34787
loss: 0.995698094367981,grad_norm: 0.9828276516084204, iteration: 34788
loss: 1.0311199426651,grad_norm: 0.9999992121751099, iteration: 34789
loss: 1.023210883140564,grad_norm: 0.8857930576623106, iteration: 34790
loss: 0.975043773651123,grad_norm: 0.9999990815932926, iteration: 34791
loss: 0.9874341487884521,grad_norm: 0.9383762108538305, iteration: 34792
loss: 0.9763951897621155,grad_norm: 0.8020389395632389, iteration: 34793
loss: 1.012162208557129,grad_norm: 0.8273746933816403, iteration: 34794
loss: 1.0013846158981323,grad_norm: 0.983058351282172, iteration: 34795
loss: 1.0106996297836304,grad_norm: 0.9011618735160342, iteration: 34796
loss: 1.021101713180542,grad_norm: 0.8027510803281642, iteration: 34797
loss: 1.0308971405029297,grad_norm: 0.9999992088292231, iteration: 34798
loss: 1.0298041105270386,grad_norm: 0.9724333859234633, iteration: 34799
loss: 1.024802803993225,grad_norm: 0.9999991043774302, iteration: 34800
loss: 1.0112216472625732,grad_norm: 0.7319562744991168, iteration: 34801
loss: 0.9688975811004639,grad_norm: 0.8063228517797538, iteration: 34802
loss: 1.009486436843872,grad_norm: 0.9999990754244586, iteration: 34803
loss: 1.007030725479126,grad_norm: 0.8963764263052062, iteration: 34804
loss: 0.9586955904960632,grad_norm: 0.9999991478751095, iteration: 34805
loss: 1.0025207996368408,grad_norm: 0.929807599628238, iteration: 34806
loss: 0.9768474102020264,grad_norm: 0.9959407580272626, iteration: 34807
loss: 1.0373562574386597,grad_norm: 0.9999995128252849, iteration: 34808
loss: 0.9806204438209534,grad_norm: 0.9999992986008994, iteration: 34809
loss: 0.9778963327407837,grad_norm: 0.7960111141555843, iteration: 34810
loss: 1.035573124885559,grad_norm: 0.9999991251860505, iteration: 34811
loss: 0.9912286996841431,grad_norm: 0.9999992484896075, iteration: 34812
loss: 1.0023181438446045,grad_norm: 0.9999990623219388, iteration: 34813
loss: 0.9646120071411133,grad_norm: 0.9999991449476607, iteration: 34814
loss: 1.043525218963623,grad_norm: 0.9999990454863222, iteration: 34815
loss: 0.9573741555213928,grad_norm: 0.86719010366334, iteration: 34816
loss: 1.0173038244247437,grad_norm: 0.9999991681375223, iteration: 34817
loss: 0.9772881269454956,grad_norm: 0.9693246906367309, iteration: 34818
loss: 1.0232982635498047,grad_norm: 0.8348689487376486, iteration: 34819
loss: 1.036782145500183,grad_norm: 0.9450541068722551, iteration: 34820
loss: 1.0136758089065552,grad_norm: 0.9723871635722976, iteration: 34821
loss: 1.025872826576233,grad_norm: 0.9430165867090331, iteration: 34822
loss: 1.0084072351455688,grad_norm: 0.9789556047030286, iteration: 34823
loss: 1.0276503562927246,grad_norm: 0.9537360819399929, iteration: 34824
loss: 1.0163098573684692,grad_norm: 0.945779833460723, iteration: 34825
loss: 0.9442459940910339,grad_norm: 0.8592129126440817, iteration: 34826
loss: 1.043808102607727,grad_norm: 0.8826473244967209, iteration: 34827
loss: 1.0108531713485718,grad_norm: 0.9394396189292933, iteration: 34828
loss: 1.0223716497421265,grad_norm: 0.9999992268534124, iteration: 34829
loss: 0.997112512588501,grad_norm: 0.8850149612535381, iteration: 34830
loss: 0.9639315009117126,grad_norm: 0.8868848827582325, iteration: 34831
loss: 0.9761505126953125,grad_norm: 0.9999992727836965, iteration: 34832
loss: 1.0093791484832764,grad_norm: 0.999999089641469, iteration: 34833
loss: 0.957304835319519,grad_norm: 0.9999991912706139, iteration: 34834
loss: 1.018645167350769,grad_norm: 0.9094913739161146, iteration: 34835
loss: 1.0037789344787598,grad_norm: 0.9999992595180488, iteration: 34836
loss: 0.9775375723838806,grad_norm: 0.989419200709779, iteration: 34837
loss: 1.050413966178894,grad_norm: 0.9999996147691117, iteration: 34838
loss: 0.973552942276001,grad_norm: 0.9999991194941892, iteration: 34839
loss: 0.9829297065734863,grad_norm: 0.873728318497991, iteration: 34840
loss: 0.9927099347114563,grad_norm: 0.9280313972582291, iteration: 34841
loss: 0.95698481798172,grad_norm: 0.8792941798721043, iteration: 34842
loss: 1.0024755001068115,grad_norm: 0.9999990566987151, iteration: 34843
loss: 1.0100960731506348,grad_norm: 0.869969060816059, iteration: 34844
loss: 1.014319896697998,grad_norm: 0.9999990902199932, iteration: 34845
loss: 1.0080654621124268,grad_norm: 0.9811625849194413, iteration: 34846
loss: 0.9858131408691406,grad_norm: 0.9089099965610146, iteration: 34847
loss: 1.0225615501403809,grad_norm: 0.9521155326532369, iteration: 34848
loss: 0.9906926155090332,grad_norm: 0.9999992654010418, iteration: 34849
loss: 0.9789904952049255,grad_norm: 0.9203532956454205, iteration: 34850
loss: 1.0078877210617065,grad_norm: 0.999999173416242, iteration: 34851
loss: 1.025336742401123,grad_norm: 0.9999991067086115, iteration: 34852
loss: 1.0087448358535767,grad_norm: 0.8930396163721179, iteration: 34853
loss: 0.9974375367164612,grad_norm: 0.8045760498552629, iteration: 34854
loss: 1.01109778881073,grad_norm: 0.8990352851517285, iteration: 34855
loss: 1.007603645324707,grad_norm: 0.8262735211574237, iteration: 34856
loss: 0.9933305978775024,grad_norm: 0.9999991063445947, iteration: 34857
loss: 0.999055027961731,grad_norm: 0.9999992645454074, iteration: 34858
loss: 0.9435539841651917,grad_norm: 0.947757810893136, iteration: 34859
loss: 0.9690442681312561,grad_norm: 0.9999992249333347, iteration: 34860
loss: 1.031672477722168,grad_norm: 0.9261457606511239, iteration: 34861
loss: 0.998592734336853,grad_norm: 0.8323820876363964, iteration: 34862
loss: 0.9814214706420898,grad_norm: 0.9426018644464209, iteration: 34863
loss: 1.0025187730789185,grad_norm: 0.9607661825908241, iteration: 34864
loss: 0.9864794015884399,grad_norm: 0.9623189751635, iteration: 34865
loss: 1.0552351474761963,grad_norm: 0.999999025720084, iteration: 34866
loss: 1.045350193977356,grad_norm: 0.9999995982431948, iteration: 34867
loss: 0.9756189584732056,grad_norm: 0.9999991323972908, iteration: 34868
loss: 1.0140304565429688,grad_norm: 0.9999993451915424, iteration: 34869
loss: 1.0383405685424805,grad_norm: 0.9510016040981003, iteration: 34870
loss: 1.0514577627182007,grad_norm: 0.999999019212788, iteration: 34871
loss: 1.028435230255127,grad_norm: 0.8175343094548003, iteration: 34872
loss: 1.0477550029754639,grad_norm: 0.9999991041673704, iteration: 34873
loss: 1.0293116569519043,grad_norm: 0.9999992091143263, iteration: 34874
loss: 1.0084232091903687,grad_norm: 0.7853134267193126, iteration: 34875
loss: 1.0118191242218018,grad_norm: 0.8385686750736431, iteration: 34876
loss: 1.0100507736206055,grad_norm: 0.9833604496243175, iteration: 34877
loss: 0.9880592226982117,grad_norm: 0.8663358075670632, iteration: 34878
loss: 1.0241115093231201,grad_norm: 0.9051213666333611, iteration: 34879
loss: 0.9764726758003235,grad_norm: 0.9999991790448917, iteration: 34880
loss: 1.018258810043335,grad_norm: 0.9999990146339327, iteration: 34881
loss: 1.0116984844207764,grad_norm: 0.8103431114745308, iteration: 34882
loss: 1.0401614904403687,grad_norm: 0.8660374965093194, iteration: 34883
loss: 1.0198544263839722,grad_norm: 0.8509322731695198, iteration: 34884
loss: 1.0006622076034546,grad_norm: 0.8496764763247698, iteration: 34885
loss: 1.0148895978927612,grad_norm: 0.9999990761360196, iteration: 34886
loss: 1.006851077079773,grad_norm: 0.975384764547634, iteration: 34887
loss: 1.0239930152893066,grad_norm: 0.7933747304060242, iteration: 34888
loss: 1.0240776538848877,grad_norm: 0.9524701594437451, iteration: 34889
loss: 0.9935691356658936,grad_norm: 0.8661818068150349, iteration: 34890
loss: 0.9786392450332642,grad_norm: 0.9417237310231505, iteration: 34891
loss: 1.0110349655151367,grad_norm: 0.7643321828869627, iteration: 34892
loss: 1.015621542930603,grad_norm: 0.9455849240663832, iteration: 34893
loss: 1.042890191078186,grad_norm: 0.99999919036995, iteration: 34894
loss: 1.0612283945083618,grad_norm: 0.9999996931504053, iteration: 34895
loss: 1.028908133506775,grad_norm: 0.9239403054555405, iteration: 34896
loss: 0.9831146597862244,grad_norm: 0.9999990801261381, iteration: 34897
loss: 1.042872667312622,grad_norm: 0.9999992742172082, iteration: 34898
loss: 0.9509411454200745,grad_norm: 0.9282443237529666, iteration: 34899
loss: 1.0290837287902832,grad_norm: 0.9999991529121913, iteration: 34900
loss: 1.0146703720092773,grad_norm: 0.9999991653878737, iteration: 34901
loss: 0.9977673888206482,grad_norm: 0.9457028371081859, iteration: 34902
loss: 0.9544274210929871,grad_norm: 0.9220359610294552, iteration: 34903
loss: 1.0126152038574219,grad_norm: 0.992668438230015, iteration: 34904
loss: 1.011497974395752,grad_norm: 0.9999995998027873, iteration: 34905
loss: 1.049803376197815,grad_norm: 0.999999592787641, iteration: 34906
loss: 0.9921115636825562,grad_norm: 0.9999991911421133, iteration: 34907
loss: 1.0097935199737549,grad_norm: 0.9967132297032796, iteration: 34908
loss: 0.9922277331352234,grad_norm: 0.9999994545744969, iteration: 34909
loss: 0.9841853380203247,grad_norm: 0.9999992365281074, iteration: 34910
loss: 0.9865320920944214,grad_norm: 0.9999991811729106, iteration: 34911
loss: 0.9765565395355225,grad_norm: 0.982453613788845, iteration: 34912
loss: 1.0092600584030151,grad_norm: 0.9444446007901154, iteration: 34913
loss: 1.019750952720642,grad_norm: 0.8205947837574444, iteration: 34914
loss: 0.9852772951126099,grad_norm: 0.9587590140615111, iteration: 34915
loss: 0.9930948615074158,grad_norm: 0.8995794447819959, iteration: 34916
loss: 1.028885006904602,grad_norm: 0.9489263376064482, iteration: 34917
loss: 0.9832773804664612,grad_norm: 0.9419901325183753, iteration: 34918
loss: 0.9416446685791016,grad_norm: 0.9999991696454689, iteration: 34919
loss: 1.0075432062149048,grad_norm: 0.921126522522861, iteration: 34920
loss: 1.0202525854110718,grad_norm: 0.999999089165663, iteration: 34921
loss: 0.9853470325469971,grad_norm: 0.8994922578801873, iteration: 34922
loss: 0.9952945709228516,grad_norm: 0.9999990521980346, iteration: 34923
loss: 1.043338418006897,grad_norm: 0.9999998017079088, iteration: 34924
loss: 1.0229562520980835,grad_norm: 0.8405772458589826, iteration: 34925
loss: 0.984107255935669,grad_norm: 0.9999990954992122, iteration: 34926
loss: 1.030092477798462,grad_norm: 0.8772703848719008, iteration: 34927
loss: 1.0140695571899414,grad_norm: 0.9999990509277273, iteration: 34928
loss: 1.0271501541137695,grad_norm: 0.9999989363075723, iteration: 34929
loss: 0.9969706535339355,grad_norm: 0.9999989229315687, iteration: 34930
loss: 1.0180675983428955,grad_norm: 0.9999995597760954, iteration: 34931
loss: 1.0003695487976074,grad_norm: 0.9102711747036297, iteration: 34932
loss: 1.001570224761963,grad_norm: 0.9512265775788935, iteration: 34933
loss: 1.002176284790039,grad_norm: 0.8595940350761342, iteration: 34934
loss: 0.9839367270469666,grad_norm: 0.9999990462341113, iteration: 34935
loss: 1.0046390295028687,grad_norm: 0.9999989897133338, iteration: 34936
loss: 0.9981067180633545,grad_norm: 0.9394863527276239, iteration: 34937
loss: 1.0243226289749146,grad_norm: 0.9458950106668054, iteration: 34938
loss: 1.021225094795227,grad_norm: 0.9856182682385851, iteration: 34939
loss: 1.0339436531066895,grad_norm: 0.99999948352021, iteration: 34940
loss: 0.9929995536804199,grad_norm: 0.9999990768404107, iteration: 34941
loss: 0.9380501508712769,grad_norm: 0.9605413665325119, iteration: 34942
loss: 1.0546897649765015,grad_norm: 0.9999990581015169, iteration: 34943
loss: 0.9711927771568298,grad_norm: 0.9486546384449416, iteration: 34944
loss: 1.0196584463119507,grad_norm: 0.8739641346761111, iteration: 34945
loss: 0.96394282579422,grad_norm: 0.9999991950738313, iteration: 34946
loss: 1.0061942338943481,grad_norm: 0.9786920344521468, iteration: 34947
loss: 0.9928686022758484,grad_norm: 0.9876021250736989, iteration: 34948
loss: 1.0088921785354614,grad_norm: 0.9999992667194189, iteration: 34949
loss: 1.03142511844635,grad_norm: 0.8250769972918787, iteration: 34950
loss: 1.0344200134277344,grad_norm: 0.8832732442909503, iteration: 34951
loss: 1.0116255283355713,grad_norm: 0.9999992438085005, iteration: 34952
loss: 0.979095458984375,grad_norm: 0.9999991011296483, iteration: 34953
loss: 1.0038648843765259,grad_norm: 0.9999991037971582, iteration: 34954
loss: 0.9745968580245972,grad_norm: 0.8626344544055847, iteration: 34955
loss: 1.0119203329086304,grad_norm: 0.9999990835138112, iteration: 34956
loss: 1.0055711269378662,grad_norm: 0.9999993199264834, iteration: 34957
loss: 0.9945815205574036,grad_norm: 0.9999991719268864, iteration: 34958
loss: 1.0260289907455444,grad_norm: 0.9999991311852048, iteration: 34959
loss: 0.993680477142334,grad_norm: 0.999999133480438, iteration: 34960
loss: 1.004429817199707,grad_norm: 0.9816924848718369, iteration: 34961
loss: 0.9758166670799255,grad_norm: 0.975022144324455, iteration: 34962
loss: 0.9769964218139648,grad_norm: 0.9298987729813233, iteration: 34963
loss: 1.0329872369766235,grad_norm: 0.8826644190978487, iteration: 34964
loss: 1.0151585340499878,grad_norm: 0.9999992127361232, iteration: 34965
loss: 0.9939962029457092,grad_norm: 0.9999991558121311, iteration: 34966
loss: 1.0131149291992188,grad_norm: 0.9999991858941952, iteration: 34967
loss: 0.9925616383552551,grad_norm: 0.999999077892627, iteration: 34968
loss: 1.0456210374832153,grad_norm: 0.9334139974784696, iteration: 34969
loss: 1.0304754972457886,grad_norm: 0.9999991135071015, iteration: 34970
loss: 1.0213267803192139,grad_norm: 0.9999991363760325, iteration: 34971
loss: 0.9959999322891235,grad_norm: 0.9987323022149641, iteration: 34972
loss: 0.9907450675964355,grad_norm: 0.9689511253997182, iteration: 34973
loss: 1.0254204273223877,grad_norm: 0.9999989898551409, iteration: 34974
loss: 1.0940121412277222,grad_norm: 0.9999993173903902, iteration: 34975
loss: 0.9935869574546814,grad_norm: 0.857130269195324, iteration: 34976
loss: 0.9843443036079407,grad_norm: 0.8476766342248963, iteration: 34977
loss: 1.048600435256958,grad_norm: 0.99999936235987, iteration: 34978
loss: 1.0202136039733887,grad_norm: 0.9036793967880241, iteration: 34979
loss: 1.0378795862197876,grad_norm: 0.9999993285299938, iteration: 34980
loss: 0.9845688939094543,grad_norm: 0.8241978190507269, iteration: 34981
loss: 1.0288563966751099,grad_norm: 0.9249961743558196, iteration: 34982
loss: 1.0231709480285645,grad_norm: 0.9999989847370201, iteration: 34983
loss: 1.0114191770553589,grad_norm: 0.8468841515134821, iteration: 34984
loss: 1.0264942646026611,grad_norm: 0.9023394987498012, iteration: 34985
loss: 0.9822928309440613,grad_norm: 0.9108075021407188, iteration: 34986
loss: 1.0027788877487183,grad_norm: 0.9999991157355864, iteration: 34987
loss: 0.9938225746154785,grad_norm: 0.9999991618640421, iteration: 34988
loss: 0.9881128072738647,grad_norm: 0.9999992523036734, iteration: 34989
loss: 0.9911711812019348,grad_norm: 0.9103755609078705, iteration: 34990
loss: 1.028731107711792,grad_norm: 0.8974195377521098, iteration: 34991
loss: 1.0244961977005005,grad_norm: 0.9232303358505116, iteration: 34992
loss: 1.0317150354385376,grad_norm: 0.9416089968361245, iteration: 34993
loss: 1.0188997983932495,grad_norm: 0.9999989646731204, iteration: 34994
loss: 1.0261094570159912,grad_norm: 0.9999990905421834, iteration: 34995
loss: 1.0161463022232056,grad_norm: 0.8498163825578724, iteration: 34996
loss: 1.035443902015686,grad_norm: 0.7675159968928952, iteration: 34997
loss: 0.9837315678596497,grad_norm: 0.9999993794873491, iteration: 34998
loss: 1.0396243333816528,grad_norm: 0.9999995692965915, iteration: 34999
loss: 0.9902717471122742,grad_norm: 0.9999990108200026, iteration: 35000
loss: 0.989081859588623,grad_norm: 0.9808696022855122, iteration: 35001
loss: 1.0320405960083008,grad_norm: 0.8191425217927196, iteration: 35002
loss: 0.9831554889678955,grad_norm: 0.85089409779389, iteration: 35003
loss: 1.0544657707214355,grad_norm: 0.9999994112294864, iteration: 35004
loss: 0.9974483847618103,grad_norm: 0.9840672812825383, iteration: 35005
loss: 1.0003373622894287,grad_norm: 0.9329332119804189, iteration: 35006
loss: 0.9991261959075928,grad_norm: 0.9999990053873817, iteration: 35007
loss: 1.0033769607543945,grad_norm: 0.9999995906139433, iteration: 35008
loss: 0.9791429042816162,grad_norm: 0.9999993864099819, iteration: 35009
loss: 0.9980666041374207,grad_norm: 0.9953181076004223, iteration: 35010
loss: 1.019852876663208,grad_norm: 0.999999294924689, iteration: 35011
loss: 1.0019161701202393,grad_norm: 0.9999990115113709, iteration: 35012
loss: 0.976603090763092,grad_norm: 0.9999990699161756, iteration: 35013
loss: 0.999228298664093,grad_norm: 0.8414026926594141, iteration: 35014
loss: 0.9936208128929138,grad_norm: 0.9999991080265161, iteration: 35015
loss: 0.9750766754150391,grad_norm: 0.9654270107254135, iteration: 35016
loss: 1.0306755304336548,grad_norm: 0.9999991696271299, iteration: 35017
loss: 1.0296545028686523,grad_norm: 0.9999991352920338, iteration: 35018
loss: 1.0017540454864502,grad_norm: 0.9786383387160609, iteration: 35019
loss: 0.9971920251846313,grad_norm: 0.9999991763608472, iteration: 35020
loss: 1.0047485828399658,grad_norm: 0.9263364067568763, iteration: 35021
loss: 1.0840996503829956,grad_norm: 0.9999995460648986, iteration: 35022
loss: 0.9699868559837341,grad_norm: 0.9999991033436997, iteration: 35023
loss: 0.9750713109970093,grad_norm: 0.8073548670281929, iteration: 35024
loss: 0.9929462671279907,grad_norm: 0.916164051025472, iteration: 35025
loss: 1.0171473026275635,grad_norm: 0.8908942044821305, iteration: 35026
loss: 0.9479597806930542,grad_norm: 0.9999991711068645, iteration: 35027
loss: 1.009442925453186,grad_norm: 0.892914457508606, iteration: 35028
loss: 0.9631115198135376,grad_norm: 0.9999990862155763, iteration: 35029
loss: 0.9888707399368286,grad_norm: 0.9999994224348538, iteration: 35030
loss: 1.0249050855636597,grad_norm: 0.9999991970405075, iteration: 35031
loss: 0.9678264260292053,grad_norm: 0.9980013110499512, iteration: 35032
loss: 1.0224473476409912,grad_norm: 0.9140089307915366, iteration: 35033
loss: 1.0074236392974854,grad_norm: 0.9890504953247682, iteration: 35034
loss: 1.0195636749267578,grad_norm: 0.733144298678642, iteration: 35035
loss: 1.0010967254638672,grad_norm: 0.9999991148836616, iteration: 35036
loss: 1.030940294265747,grad_norm: 0.9999993378272837, iteration: 35037
loss: 1.0442167520523071,grad_norm: 0.9385360242473069, iteration: 35038
loss: 0.9751772880554199,grad_norm: 0.8067849419727724, iteration: 35039
loss: 1.0272457599639893,grad_norm: 0.9999999505489618, iteration: 35040
loss: 0.9340085983276367,grad_norm: 0.9363282921604508, iteration: 35041
loss: 0.9761179089546204,grad_norm: 0.99999910369264, iteration: 35042
loss: 0.9777051210403442,grad_norm: 0.9479904347742147, iteration: 35043
loss: 1.0254803895950317,grad_norm: 0.9999991922249213, iteration: 35044
loss: 0.9814389944076538,grad_norm: 0.9617320817547845, iteration: 35045
loss: 0.9905004501342773,grad_norm: 0.9999992300878633, iteration: 35046
loss: 1.0463321208953857,grad_norm: 0.9999991117332244, iteration: 35047
loss: 0.996356725692749,grad_norm: 0.9076881852379645, iteration: 35048
loss: 1.0556185245513916,grad_norm: 0.8930289534792105, iteration: 35049
loss: 0.9939305186271667,grad_norm: 0.978419231012083, iteration: 35050
loss: 1.0100183486938477,grad_norm: 0.9320875403657458, iteration: 35051
loss: 0.9884684085845947,grad_norm: 0.9999990707945957, iteration: 35052
loss: 0.993740439414978,grad_norm: 0.9881079447506684, iteration: 35053
loss: 0.9649273157119751,grad_norm: 0.9203020282470977, iteration: 35054
loss: 0.9920912384986877,grad_norm: 0.9999993200279109, iteration: 35055
loss: 1.0616862773895264,grad_norm: 0.9999992400946839, iteration: 35056
loss: 1.0016133785247803,grad_norm: 0.9999990829130693, iteration: 35057
loss: 0.9834731221199036,grad_norm: 0.9999990744043511, iteration: 35058
loss: 1.005159854888916,grad_norm: 0.9999989083018102, iteration: 35059
loss: 1.0346428155899048,grad_norm: 0.9999991719600884, iteration: 35060
loss: 1.0411994457244873,grad_norm: 0.999999179902675, iteration: 35061
loss: 1.011819839477539,grad_norm: 0.9140493039325651, iteration: 35062
loss: 1.005638837814331,grad_norm: 0.8572288581333151, iteration: 35063
loss: 0.968383252620697,grad_norm: 0.8875076848478489, iteration: 35064
loss: 0.9574077129364014,grad_norm: 0.9522906045631021, iteration: 35065
loss: 0.9952703714370728,grad_norm: 0.9999990470672719, iteration: 35066
loss: 0.9851956367492676,grad_norm: 0.9999990999146299, iteration: 35067
loss: 0.9482640624046326,grad_norm: 0.9852301845856287, iteration: 35068
loss: 1.0277711153030396,grad_norm: 0.8438365447189289, iteration: 35069
loss: 1.0019828081130981,grad_norm: 0.985662905701793, iteration: 35070
loss: 0.951088011264801,grad_norm: 0.7883934973042015, iteration: 35071
loss: 1.0338393449783325,grad_norm: 0.9999991854878331, iteration: 35072
loss: 0.9956989288330078,grad_norm: 0.8386780494784136, iteration: 35073
loss: 1.0010929107666016,grad_norm: 0.9999989272260076, iteration: 35074
loss: 0.995805561542511,grad_norm: 0.7865854498298496, iteration: 35075
loss: 1.0054521560668945,grad_norm: 0.9999989305739321, iteration: 35076
loss: 0.997480571269989,grad_norm: 0.9999990582225728, iteration: 35077
loss: 0.9751036763191223,grad_norm: 0.897496518583989, iteration: 35078
loss: 1.03635835647583,grad_norm: 0.8810244421282036, iteration: 35079
loss: 0.9688742160797119,grad_norm: 0.9999990920369016, iteration: 35080
loss: 0.9972782135009766,grad_norm: 0.9999997891511851, iteration: 35081
loss: 1.031083345413208,grad_norm: 0.9999992107883393, iteration: 35082
loss: 1.011771321296692,grad_norm: 0.9999991737862222, iteration: 35083
loss: 0.9833462834358215,grad_norm: 0.8962594640806146, iteration: 35084
loss: 0.972136378288269,grad_norm: 0.9541465567606812, iteration: 35085
loss: 1.0111019611358643,grad_norm: 0.9999989328482022, iteration: 35086
loss: 1.0281282663345337,grad_norm: 0.8814770781488079, iteration: 35087
loss: 1.0374561548233032,grad_norm: 0.9999991390050724, iteration: 35088
loss: 0.9866909384727478,grad_norm: 0.9007913346100572, iteration: 35089
loss: 1.0012282133102417,grad_norm: 0.890124425124488, iteration: 35090
loss: 1.065637469291687,grad_norm: 0.9999997978896773, iteration: 35091
loss: 0.9842774868011475,grad_norm: 0.9999993567050602, iteration: 35092
loss: 1.004713773727417,grad_norm: 0.9999990646797913, iteration: 35093
loss: 0.9984588623046875,grad_norm: 0.8820645300038575, iteration: 35094
loss: 1.0233629941940308,grad_norm: 0.9470517524641787, iteration: 35095
loss: 0.9898057579994202,grad_norm: 0.9019847399446227, iteration: 35096
loss: 0.9719016551971436,grad_norm: 0.9967332034587423, iteration: 35097
loss: 1.0240674018859863,grad_norm: 0.8987515093425199, iteration: 35098
loss: 0.9939695596694946,grad_norm: 0.9999991252735573, iteration: 35099
loss: 0.981817364692688,grad_norm: 0.8988081965044562, iteration: 35100
loss: 0.9567726850509644,grad_norm: 0.9556565072981918, iteration: 35101
loss: 1.00809645652771,grad_norm: 0.9999992576712521, iteration: 35102
loss: 1.0073182582855225,grad_norm: 0.9366406100988129, iteration: 35103
loss: 0.9773707389831543,grad_norm: 0.8172724029326964, iteration: 35104
loss: 0.9818705320358276,grad_norm: 0.8451849410622839, iteration: 35105
loss: 1.227500557899475,grad_norm: 0.9999999210188112, iteration: 35106
loss: 0.9981068968772888,grad_norm: 0.8920683709783342, iteration: 35107
loss: 0.9725888967514038,grad_norm: 0.9999991440539387, iteration: 35108
loss: 1.0024949312210083,grad_norm: 0.999999148401493, iteration: 35109
loss: 0.988600492477417,grad_norm: 0.8901310923263528, iteration: 35110
loss: 1.0214827060699463,grad_norm: 0.9999991269323808, iteration: 35111
loss: 0.9690395593643188,grad_norm: 0.9367404630219467, iteration: 35112
loss: 1.0246376991271973,grad_norm: 0.999999234324108, iteration: 35113
loss: 0.9596213102340698,grad_norm: 0.8650846525463508, iteration: 35114
loss: 0.9495221972465515,grad_norm: 0.9999990686149473, iteration: 35115
loss: 1.0968691110610962,grad_norm: 0.9999998589534568, iteration: 35116
loss: 1.0002055168151855,grad_norm: 0.9999996991744576, iteration: 35117
loss: 0.9758968353271484,grad_norm: 0.8619352006357108, iteration: 35118
loss: 1.006036400794983,grad_norm: 0.9999989494030028, iteration: 35119
loss: 0.9918686151504517,grad_norm: 0.9999991565947235, iteration: 35120
loss: 1.0272588729858398,grad_norm: 0.9577869600248996, iteration: 35121
loss: 1.037256121635437,grad_norm: 0.9999990908883035, iteration: 35122
loss: 1.0224957466125488,grad_norm: 0.920650261845691, iteration: 35123
loss: 1.0023441314697266,grad_norm: 0.9999991229183759, iteration: 35124
loss: 1.035897135734558,grad_norm: 0.9999992251927403, iteration: 35125
loss: 1.0294852256774902,grad_norm: 0.9784889049324426, iteration: 35126
loss: 1.0560623407363892,grad_norm: 0.9106780772345927, iteration: 35127
loss: 0.9770318865776062,grad_norm: 0.9999991677299407, iteration: 35128
loss: 1.0030016899108887,grad_norm: 0.9274136907008307, iteration: 35129
loss: 1.0279114246368408,grad_norm: 0.9999991187099223, iteration: 35130
loss: 1.0158753395080566,grad_norm: 0.7802067273255668, iteration: 35131
loss: 1.0449906587600708,grad_norm: 0.9999992748187932, iteration: 35132
loss: 1.003493309020996,grad_norm: 0.9216021620027303, iteration: 35133
loss: 0.9766706824302673,grad_norm: 0.9241023556540283, iteration: 35134
loss: 0.9701781272888184,grad_norm: 0.9529048384509641, iteration: 35135
loss: 1.0072630643844604,grad_norm: 0.9999989979082742, iteration: 35136
loss: 1.002958059310913,grad_norm: 0.9557018456380497, iteration: 35137
loss: 0.9937351942062378,grad_norm: 0.9756742437714929, iteration: 35138
loss: 1.060416340827942,grad_norm: 0.9999992707102804, iteration: 35139
loss: 1.001604676246643,grad_norm: 0.9415755321149509, iteration: 35140
loss: 0.9791209101676941,grad_norm: 0.9999990924658483, iteration: 35141
loss: 0.9460921287536621,grad_norm: 0.9999990852009599, iteration: 35142
loss: 0.9626734256744385,grad_norm: 0.8814105930016128, iteration: 35143
loss: 1.0127918720245361,grad_norm: 0.9141017674310006, iteration: 35144
loss: 1.0224430561065674,grad_norm: 0.9999991625411176, iteration: 35145
loss: 0.9898310899734497,grad_norm: 0.8187237401332219, iteration: 35146
loss: 0.9806405305862427,grad_norm: 0.9999991741789128, iteration: 35147
loss: 0.9921866655349731,grad_norm: 0.9999991970036364, iteration: 35148
loss: 1.1638484001159668,grad_norm: 0.9999996230411081, iteration: 35149
loss: 0.9696997404098511,grad_norm: 0.9542268804564529, iteration: 35150
loss: 0.9872691035270691,grad_norm: 0.9949400838265201, iteration: 35151
loss: 0.9971017837524414,grad_norm: 0.7999188931024448, iteration: 35152
loss: 0.9607611894607544,grad_norm: 0.9999991688965091, iteration: 35153
loss: 0.9947922229766846,grad_norm: 0.9999991232064768, iteration: 35154
loss: 1.0044525861740112,grad_norm: 0.9999991169931209, iteration: 35155
loss: 0.9944407939910889,grad_norm: 0.932720018363809, iteration: 35156
loss: 1.013259768486023,grad_norm: 0.9087372324794303, iteration: 35157
loss: 1.0178369283676147,grad_norm: 0.9999991337971308, iteration: 35158
loss: 1.0239298343658447,grad_norm: 0.9999990768709028, iteration: 35159
loss: 1.0264204740524292,grad_norm: 0.9999989969996622, iteration: 35160
loss: 0.9749691486358643,grad_norm: 0.8271153010018403, iteration: 35161
loss: 1.0156680345535278,grad_norm: 0.953072746333321, iteration: 35162
loss: 1.004461646080017,grad_norm: 0.9999995268017607, iteration: 35163
loss: 0.9734316468238831,grad_norm: 0.999999138864718, iteration: 35164
loss: 1.0000648498535156,grad_norm: 0.9370623332878095, iteration: 35165
loss: 0.9847332835197449,grad_norm: 0.8585798943051044, iteration: 35166
loss: 1.0135674476623535,grad_norm: 0.9999992337633536, iteration: 35167
loss: 1.0100940465927124,grad_norm: 0.8294735235896629, iteration: 35168
loss: 0.9756655693054199,grad_norm: 0.912536891371542, iteration: 35169
loss: 1.075656771659851,grad_norm: 0.9999996612878037, iteration: 35170
loss: 1.0135445594787598,grad_norm: 0.9671842047016551, iteration: 35171
loss: 0.9939619898796082,grad_norm: 0.827043363089807, iteration: 35172
loss: 1.0120493173599243,grad_norm: 0.9212556578380636, iteration: 35173
loss: 0.9842106103897095,grad_norm: 0.9932247654931962, iteration: 35174
loss: 1.00114107131958,grad_norm: 0.9999991658645468, iteration: 35175
loss: 0.9723470211029053,grad_norm: 0.9999992132350833, iteration: 35176
loss: 0.9936565160751343,grad_norm: 0.9229792047280931, iteration: 35177
loss: 1.0373139381408691,grad_norm: 0.9563033121956928, iteration: 35178
loss: 1.0208660364151,grad_norm: 0.977194951393014, iteration: 35179
loss: 1.063364863395691,grad_norm: 0.9658393840765066, iteration: 35180
loss: 1.0831048488616943,grad_norm: 0.9999995812671799, iteration: 35181
loss: 0.9783101081848145,grad_norm: 0.8728297010317247, iteration: 35182
loss: 1.0387382507324219,grad_norm: 0.8720576312091479, iteration: 35183
loss: 0.9992122650146484,grad_norm: 0.9574155306166355, iteration: 35184
loss: 1.0218573808670044,grad_norm: 0.9999991377514779, iteration: 35185
loss: 1.0261094570159912,grad_norm: 0.9999994369839874, iteration: 35186
loss: 0.9821336269378662,grad_norm: 0.9707330389856266, iteration: 35187
loss: 0.9480438232421875,grad_norm: 0.9999990559911766, iteration: 35188
loss: 1.0097241401672363,grad_norm: 0.9944402699018157, iteration: 35189
loss: 1.0077449083328247,grad_norm: 0.9731035550701673, iteration: 35190
loss: 0.995022177696228,grad_norm: 0.8796712141215804, iteration: 35191
loss: 0.9968023300170898,grad_norm: 0.7430627390357749, iteration: 35192
loss: 1.0038750171661377,grad_norm: 0.9999992998055065, iteration: 35193
loss: 0.9838994145393372,grad_norm: 0.9999991078215963, iteration: 35194
loss: 0.9745045900344849,grad_norm: 0.95236655060245, iteration: 35195
loss: 0.9837706685066223,grad_norm: 0.9988466605152635, iteration: 35196
loss: 1.0226233005523682,grad_norm: 0.9999990633390273, iteration: 35197
loss: 0.9811902046203613,grad_norm: 0.9035823633305619, iteration: 35198
loss: 0.982990562915802,grad_norm: 0.9999992586026899, iteration: 35199
loss: 0.9863934516906738,grad_norm: 0.9118718140368081, iteration: 35200
loss: 0.984023928642273,grad_norm: 0.9999995082578037, iteration: 35201
loss: 1.008366584777832,grad_norm: 0.938106592957443, iteration: 35202
loss: 1.0273240804672241,grad_norm: 0.9476460237022755, iteration: 35203
loss: 0.9771419167518616,grad_norm: 0.9036070681890465, iteration: 35204
loss: 0.9760940670967102,grad_norm: 0.9617158990316497, iteration: 35205
loss: 0.9588280320167542,grad_norm: 0.9999991549021356, iteration: 35206
loss: 1.023957371711731,grad_norm: 0.8713122841137305, iteration: 35207
loss: 1.0033584833145142,grad_norm: 0.9750742342605632, iteration: 35208
loss: 0.9987130761146545,grad_norm: 0.9246961085146693, iteration: 35209
loss: 0.9848875999450684,grad_norm: 0.8586819374109185, iteration: 35210
loss: 0.9979254603385925,grad_norm: 0.9999990572207716, iteration: 35211
loss: 0.9745608568191528,grad_norm: 0.9904163173555296, iteration: 35212
loss: 1.0349431037902832,grad_norm: 0.9999992427406238, iteration: 35213
loss: 1.029673457145691,grad_norm: 0.9999990601515366, iteration: 35214
loss: 0.9838429689407349,grad_norm: 0.8549685870509591, iteration: 35215
loss: 0.9975499510765076,grad_norm: 0.9550249717803969, iteration: 35216
loss: 1.0239372253417969,grad_norm: 0.9999994748076715, iteration: 35217
loss: 1.0097042322158813,grad_norm: 0.9999991765580881, iteration: 35218
loss: 1.0215226411819458,grad_norm: 0.9765617081023024, iteration: 35219
loss: 1.0186443328857422,grad_norm: 0.953474961629487, iteration: 35220
loss: 0.9882232546806335,grad_norm: 0.8997744064490742, iteration: 35221
loss: 0.9789462089538574,grad_norm: 0.8242428980944438, iteration: 35222
loss: 0.9700245261192322,grad_norm: 0.8754633984084645, iteration: 35223
loss: 1.0261125564575195,grad_norm: 0.8354855900529471, iteration: 35224
loss: 1.0068094730377197,grad_norm: 0.8629398420162415, iteration: 35225
loss: 1.0204559564590454,grad_norm: 0.9427019297565878, iteration: 35226
loss: 0.991696298122406,grad_norm: 0.8522440240317539, iteration: 35227
loss: 0.9797573089599609,grad_norm: 0.9373517551342873, iteration: 35228
loss: 1.026928424835205,grad_norm: 0.9999994121234825, iteration: 35229
loss: 1.0031033754348755,grad_norm: 0.804896319475125, iteration: 35230
loss: 1.0146220922470093,grad_norm: 0.9106372367781804, iteration: 35231
loss: 0.9946720004081726,grad_norm: 0.9825611978715129, iteration: 35232
loss: 1.0423266887664795,grad_norm: 0.9999991143242731, iteration: 35233
loss: 0.9800785183906555,grad_norm: 0.9509407019770949, iteration: 35234
loss: 1.0021487474441528,grad_norm: 0.9999992286878057, iteration: 35235
loss: 0.9824527502059937,grad_norm: 0.9273774807693553, iteration: 35236
loss: 0.9636924266815186,grad_norm: 0.9999991515530989, iteration: 35237
loss: 0.9325118064880371,grad_norm: 0.9999991311949028, iteration: 35238
loss: 0.9961856007575989,grad_norm: 0.963451476372092, iteration: 35239
loss: 1.0175317525863647,grad_norm: 0.9999991625228631, iteration: 35240
loss: 1.0206804275512695,grad_norm: 0.9999989340593384, iteration: 35241
loss: 0.9815667867660522,grad_norm: 0.9999990402545743, iteration: 35242
loss: 0.9974814057350159,grad_norm: 0.8635079465539706, iteration: 35243
loss: 1.0136069059371948,grad_norm: 0.8591501593477664, iteration: 35244
loss: 0.9913221597671509,grad_norm: 0.9984463445903938, iteration: 35245
loss: 1.0245158672332764,grad_norm: 0.9999991521555613, iteration: 35246
loss: 0.9894195795059204,grad_norm: 0.95571592590301, iteration: 35247
loss: 1.004045844078064,grad_norm: 0.999999032992941, iteration: 35248
loss: 1.005246877670288,grad_norm: 0.9999991904769775, iteration: 35249
loss: 0.9973629713058472,grad_norm: 0.9999991767009635, iteration: 35250
loss: 1.0129512548446655,grad_norm: 0.9130026898525525, iteration: 35251
loss: 1.0611213445663452,grad_norm: 0.9999991665705913, iteration: 35252
loss: 0.923397958278656,grad_norm: 0.9999990086292434, iteration: 35253
loss: 1.0093077421188354,grad_norm: 0.9274071905212133, iteration: 35254
loss: 1.0129356384277344,grad_norm: 0.9999991897600166, iteration: 35255
loss: 1.055830955505371,grad_norm: 0.9161078960849754, iteration: 35256
loss: 1.0100287199020386,grad_norm: 0.8553295760081219, iteration: 35257
loss: 1.0260181427001953,grad_norm: 0.999999268394947, iteration: 35258
loss: 1.052325963973999,grad_norm: 0.9194250984551283, iteration: 35259
loss: 0.9844262599945068,grad_norm: 0.9999989434691454, iteration: 35260
loss: 0.994860827922821,grad_norm: 0.8441592899350515, iteration: 35261
loss: 1.0313475131988525,grad_norm: 0.8377622735821194, iteration: 35262
loss: 1.0124843120574951,grad_norm: 0.9794609748548021, iteration: 35263
loss: 1.013615369796753,grad_norm: 0.9999993100563666, iteration: 35264
loss: 0.965351939201355,grad_norm: 0.8655066310327695, iteration: 35265
loss: 1.009229302406311,grad_norm: 0.9999991429752234, iteration: 35266
loss: 1.018001914024353,grad_norm: 0.9504882172937238, iteration: 35267
loss: 1.017337679862976,grad_norm: 0.9899104975423315, iteration: 35268
loss: 0.9921594858169556,grad_norm: 0.9999991409234653, iteration: 35269
loss: 1.0522059202194214,grad_norm: 0.9999994534328954, iteration: 35270
loss: 0.9885810613632202,grad_norm: 0.8679983676381583, iteration: 35271
loss: 0.9851409792900085,grad_norm: 0.8769120343680264, iteration: 35272
loss: 1.0051543712615967,grad_norm: 0.9999991118158191, iteration: 35273
loss: 1.0000109672546387,grad_norm: 0.9516497395965211, iteration: 35274
loss: 1.0041821002960205,grad_norm: 0.9999989523626297, iteration: 35275
loss: 0.9954583048820496,grad_norm: 0.9999997325681308, iteration: 35276
loss: 1.0339486598968506,grad_norm: 0.9999994478432767, iteration: 35277
loss: 1.0151137113571167,grad_norm: 0.9999992584459028, iteration: 35278
loss: 0.9898542761802673,grad_norm: 0.7517327428126115, iteration: 35279
loss: 1.0331158638000488,grad_norm: 0.9999991014059715, iteration: 35280
loss: 0.9756454825401306,grad_norm: 0.9999992638203642, iteration: 35281
loss: 0.9784059524536133,grad_norm: 0.9999991487715617, iteration: 35282
loss: 1.0237884521484375,grad_norm: 0.7348386859115157, iteration: 35283
loss: 0.9753941893577576,grad_norm: 0.977392438273741, iteration: 35284
loss: 1.0337815284729004,grad_norm: 0.9701684376219365, iteration: 35285
loss: 0.9978181719779968,grad_norm: 0.9379355999539893, iteration: 35286
loss: 0.959618330001831,grad_norm: 0.9379053091741384, iteration: 35287
loss: 1.0313513278961182,grad_norm: 0.8998617401263407, iteration: 35288
loss: 1.0188515186309814,grad_norm: 0.9999991915906978, iteration: 35289
loss: 1.0240108966827393,grad_norm: 0.9999995433126033, iteration: 35290
loss: 1.012862205505371,grad_norm: 0.9999995143598801, iteration: 35291
loss: 0.9963501691818237,grad_norm: 0.9748062532652555, iteration: 35292
loss: 0.9737553000450134,grad_norm: 0.9751167883139336, iteration: 35293
loss: 1.018551230430603,grad_norm: 0.959238185791534, iteration: 35294
loss: 0.9765703678131104,grad_norm: 0.8606952671780875, iteration: 35295
loss: 1.0185333490371704,grad_norm: 0.9999989827176907, iteration: 35296
loss: 1.0023870468139648,grad_norm: 0.9664304960127513, iteration: 35297
loss: 1.0243202447891235,grad_norm: 0.9999997983614185, iteration: 35298
loss: 1.0063544511795044,grad_norm: 0.9999991003247835, iteration: 35299
loss: 1.0030460357666016,grad_norm: 0.9999992814901388, iteration: 35300
loss: 1.0263822078704834,grad_norm: 0.999999359760462, iteration: 35301
loss: 1.010336995124817,grad_norm: 0.8723302936711004, iteration: 35302
loss: 0.9190302491188049,grad_norm: 0.9999992550609046, iteration: 35303
loss: 1.0061548948287964,grad_norm: 0.9999990385572036, iteration: 35304
loss: 0.9887109398841858,grad_norm: 0.9126351606683315, iteration: 35305
loss: 1.000919222831726,grad_norm: 0.9743381243126742, iteration: 35306
loss: 0.9720267057418823,grad_norm: 0.8143415822205955, iteration: 35307
loss: 1.0036405324935913,grad_norm: 0.9999991486013814, iteration: 35308
loss: 1.0030571222305298,grad_norm: 0.9999991011503746, iteration: 35309
loss: 1.0304769277572632,grad_norm: 0.9999991120967697, iteration: 35310
loss: 1.0371403694152832,grad_norm: 0.8855351063055821, iteration: 35311
loss: 1.016714334487915,grad_norm: 0.8362521782328866, iteration: 35312
loss: 0.992254912853241,grad_norm: 0.999999249280273, iteration: 35313
loss: 1.018889307975769,grad_norm: 0.9999992022052834, iteration: 35314
loss: 0.9970878958702087,grad_norm: 0.956713381033375, iteration: 35315
loss: 1.0497615337371826,grad_norm: 0.9999995691578553, iteration: 35316
loss: 1.034001350402832,grad_norm: 0.9989287295893867, iteration: 35317
loss: 1.0198168754577637,grad_norm: 0.9999995975455448, iteration: 35318
loss: 0.9681867957115173,grad_norm: 0.9999991262854574, iteration: 35319
loss: 0.9741027355194092,grad_norm: 0.9999991136832109, iteration: 35320
loss: 1.026052713394165,grad_norm: 0.9970423550370646, iteration: 35321
loss: 1.0225979089736938,grad_norm: 0.852581536815158, iteration: 35322
loss: 1.0544604063034058,grad_norm: 0.9414420648753572, iteration: 35323
loss: 1.0214283466339111,grad_norm: 0.9738409473372197, iteration: 35324
loss: 0.9863924384117126,grad_norm: 0.9999990541786776, iteration: 35325
loss: 0.9628851413726807,grad_norm: 0.8403254077086169, iteration: 35326
loss: 1.0452446937561035,grad_norm: 0.9999992646336779, iteration: 35327
loss: 1.0030587911605835,grad_norm: 0.9690190119945836, iteration: 35328
loss: 1.0039758682250977,grad_norm: 0.8819757243336832, iteration: 35329
loss: 1.0111109018325806,grad_norm: 0.9999992296814768, iteration: 35330
loss: 1.0099653005599976,grad_norm: 0.9594137521441422, iteration: 35331
loss: 1.0010653734207153,grad_norm: 0.9923548002767554, iteration: 35332
loss: 0.992854654788971,grad_norm: 0.8296022755772322, iteration: 35333
loss: 0.9786819815635681,grad_norm: 0.999999251598685, iteration: 35334
loss: 0.9989198446273804,grad_norm: 0.9999992551676323, iteration: 35335
loss: 1.0152479410171509,grad_norm: 0.9999992674981193, iteration: 35336
loss: 1.0185649394989014,grad_norm: 0.9999992181622617, iteration: 35337
loss: 0.9998689889907837,grad_norm: 0.8187345330779087, iteration: 35338
loss: 0.9896033406257629,grad_norm: 0.9359829806443053, iteration: 35339
loss: 1.0462090969085693,grad_norm: 0.9999992165333272, iteration: 35340
loss: 1.0224658250808716,grad_norm: 0.9739551976733662, iteration: 35341
loss: 0.9669957756996155,grad_norm: 0.9866693091802902, iteration: 35342
loss: 1.049594521522522,grad_norm: 0.8386393837335967, iteration: 35343
loss: 0.9923122525215149,grad_norm: 0.7955492216710542, iteration: 35344
loss: 1.0175315141677856,grad_norm: 0.8378429607747948, iteration: 35345
loss: 1.0186485052108765,grad_norm: 0.8965370149210649, iteration: 35346
loss: 1.0415617227554321,grad_norm: 0.9999995103889632, iteration: 35347
loss: 1.0345097780227661,grad_norm: 0.9999991089450853, iteration: 35348
loss: 1.0251984596252441,grad_norm: 0.9999991471592262, iteration: 35349
loss: 0.9998725652694702,grad_norm: 0.9999993048576402, iteration: 35350
loss: 1.0049545764923096,grad_norm: 0.9110184556034805, iteration: 35351
loss: 0.9740822911262512,grad_norm: 0.8395030979358813, iteration: 35352
loss: 0.9930599927902222,grad_norm: 0.9999993927913512, iteration: 35353
loss: 1.0161837339401245,grad_norm: 0.9963716048604027, iteration: 35354
loss: 0.9997414350509644,grad_norm: 0.9999991189759897, iteration: 35355
loss: 1.0220950841903687,grad_norm: 0.9042448032511577, iteration: 35356
loss: 1.0113935470581055,grad_norm: 0.892958333288448, iteration: 35357
loss: 0.9803112745285034,grad_norm: 0.7592564947587369, iteration: 35358
loss: 0.9944455027580261,grad_norm: 0.9999991257327068, iteration: 35359
loss: 1.0085679292678833,grad_norm: 0.9999989943394357, iteration: 35360
loss: 1.0440655946731567,grad_norm: 0.9144440204612541, iteration: 35361
loss: 0.9883338212966919,grad_norm: 0.9999990932352522, iteration: 35362
loss: 0.9720150232315063,grad_norm: 0.9050068746505125, iteration: 35363
loss: 1.028973937034607,grad_norm: 0.8404805669850672, iteration: 35364
loss: 1.0396783351898193,grad_norm: 0.9999991271548899, iteration: 35365
loss: 0.9677833914756775,grad_norm: 0.860740479852854, iteration: 35366
loss: 1.0251716375350952,grad_norm: 0.9571718502921878, iteration: 35367
loss: 1.0108113288879395,grad_norm: 0.9478545491421239, iteration: 35368
loss: 0.9925246834754944,grad_norm: 0.9999990745456421, iteration: 35369
loss: 1.0394803285598755,grad_norm: 0.9999995015595428, iteration: 35370
loss: 0.9968391060829163,grad_norm: 0.9999990911486, iteration: 35371
loss: 1.0021864175796509,grad_norm: 0.9999991562259904, iteration: 35372
loss: 0.9976545572280884,grad_norm: 0.7305891355336461, iteration: 35373
loss: 0.968718945980072,grad_norm: 0.996838972689309, iteration: 35374
loss: 1.0692918300628662,grad_norm: 0.9999993251832981, iteration: 35375
loss: 1.026383399963379,grad_norm: 0.9515100380488906, iteration: 35376
loss: 1.0030313730239868,grad_norm: 0.9999990870633781, iteration: 35377
loss: 1.0116684436798096,grad_norm: 0.999999341767346, iteration: 35378
loss: 1.0149657726287842,grad_norm: 0.9999997627108793, iteration: 35379
loss: 0.9895079135894775,grad_norm: 0.9999990742041799, iteration: 35380
loss: 1.0049622058868408,grad_norm: 0.9999996270082946, iteration: 35381
loss: 0.9852320551872253,grad_norm: 0.9656628403646628, iteration: 35382
loss: 1.0325324535369873,grad_norm: 0.9502944444225612, iteration: 35383
loss: 1.0205918550491333,grad_norm: 0.999999312888308, iteration: 35384
loss: 0.9747695922851562,grad_norm: 0.999999108037886, iteration: 35385
loss: 1.022294521331787,grad_norm: 0.9999994084911377, iteration: 35386
loss: 1.0033262968063354,grad_norm: 0.9999991266861068, iteration: 35387
loss: 1.0070526599884033,grad_norm: 0.9999990713158731, iteration: 35388
loss: 0.9851831793785095,grad_norm: 0.9999990707034954, iteration: 35389
loss: 1.0130443572998047,grad_norm: 0.9028938111162909, iteration: 35390
loss: 1.0418508052825928,grad_norm: 0.9999997688094786, iteration: 35391
loss: 0.9949775338172913,grad_norm: 0.9999990005597831, iteration: 35392
loss: 1.033350944519043,grad_norm: 0.9999990564564843, iteration: 35393
loss: 0.9797317981719971,grad_norm: 0.9999990094833725, iteration: 35394
loss: 1.0563204288482666,grad_norm: 0.9999997007996979, iteration: 35395
loss: 0.9999255537986755,grad_norm: 0.9999992142739098, iteration: 35396
loss: 1.0922033786773682,grad_norm: 0.9999995710968791, iteration: 35397
loss: 1.01511549949646,grad_norm: 0.9671474775436335, iteration: 35398
loss: 1.0111101865768433,grad_norm: 0.9547478260560636, iteration: 35399
loss: 1.0163893699645996,grad_norm: 0.9999991516098293, iteration: 35400
loss: 1.017435908317566,grad_norm: 0.9257456789740309, iteration: 35401
loss: 1.006690502166748,grad_norm: 0.9389973207561138, iteration: 35402
loss: 1.0999627113342285,grad_norm: 0.9999993870342958, iteration: 35403
loss: 1.017370581626892,grad_norm: 0.9999990609241985, iteration: 35404
loss: 1.0210636854171753,grad_norm: 0.999999019406224, iteration: 35405
loss: 1.0238456726074219,grad_norm: 0.9189901281377268, iteration: 35406
loss: 0.9543015360832214,grad_norm: 0.9999991905171257, iteration: 35407
loss: 0.9999995231628418,grad_norm: 0.8445961973909341, iteration: 35408
loss: 1.0076934099197388,grad_norm: 0.9999995171968445, iteration: 35409
loss: 1.0031265020370483,grad_norm: 0.9999992265579688, iteration: 35410
loss: 0.9913535714149475,grad_norm: 0.9405719682787465, iteration: 35411
loss: 1.0252095460891724,grad_norm: 0.999999003834237, iteration: 35412
loss: 0.9726959466934204,grad_norm: 0.9999991023283568, iteration: 35413
loss: 1.007832407951355,grad_norm: 0.9999991102508363, iteration: 35414
loss: 0.9871011972427368,grad_norm: 0.9999995977567336, iteration: 35415
loss: 1.025400161743164,grad_norm: 0.9069176987785963, iteration: 35416
loss: 0.9415871500968933,grad_norm: 0.9999991992499736, iteration: 35417
loss: 0.998952329158783,grad_norm: 0.9999991302507112, iteration: 35418
loss: 1.0014278888702393,grad_norm: 0.9999991494311916, iteration: 35419
loss: 1.0372394323349,grad_norm: 0.973271186360005, iteration: 35420
loss: 1.1374351978302002,grad_norm: 0.9999996887693113, iteration: 35421
loss: 0.9401296973228455,grad_norm: 0.9999990736810771, iteration: 35422
loss: 0.9804344773292542,grad_norm: 0.9999990567965982, iteration: 35423
loss: 1.029680848121643,grad_norm: 0.8387870857520711, iteration: 35424
loss: 1.0578465461730957,grad_norm: 0.9999997569825922, iteration: 35425
loss: 1.0026932954788208,grad_norm: 0.8688906593140596, iteration: 35426
loss: 1.0008727312088013,grad_norm: 0.8846894109435416, iteration: 35427
loss: 1.0189653635025024,grad_norm: 0.999999390336502, iteration: 35428
loss: 1.0104950666427612,grad_norm: 0.9832828967014416, iteration: 35429
loss: 0.972748875617981,grad_norm: 0.9334138079800401, iteration: 35430
loss: 0.9788969159126282,grad_norm: 0.9999991205569849, iteration: 35431
loss: 1.0390905141830444,grad_norm: 0.9999993836776015, iteration: 35432
loss: 1.103560209274292,grad_norm: 0.9999997751916042, iteration: 35433
loss: 1.0041944980621338,grad_norm: 0.9078900815557875, iteration: 35434
loss: 1.0000077486038208,grad_norm: 0.8993883732189392, iteration: 35435
loss: 1.004502773284912,grad_norm: 0.999998998615589, iteration: 35436
loss: 0.9813014268875122,grad_norm: 0.9999992960761496, iteration: 35437
loss: 1.0218658447265625,grad_norm: 0.8889188617464631, iteration: 35438
loss: 1.0211094617843628,grad_norm: 0.9580602701881625, iteration: 35439
loss: 1.067286729812622,grad_norm: 0.9999994511559293, iteration: 35440
loss: 1.0316543579101562,grad_norm: 0.9261272048890328, iteration: 35441
loss: 1.0533400774002075,grad_norm: 0.9999992228802684, iteration: 35442
loss: 0.988066554069519,grad_norm: 0.9999992142721039, iteration: 35443
loss: 1.1788614988327026,grad_norm: 0.9999997877573849, iteration: 35444
loss: 0.9782538414001465,grad_norm: 0.9828983316028875, iteration: 35445
loss: 1.0681835412979126,grad_norm: 0.9999999247689013, iteration: 35446
loss: 1.0088841915130615,grad_norm: 0.9999992759855304, iteration: 35447
loss: 1.097027063369751,grad_norm: 0.9999994451917137, iteration: 35448
loss: 1.0366196632385254,grad_norm: 0.8418356562518646, iteration: 35449
loss: 1.0093715190887451,grad_norm: 0.9999991346264697, iteration: 35450
loss: 0.9856823086738586,grad_norm: 0.9370224020364036, iteration: 35451
loss: 0.9995969533920288,grad_norm: 0.9999991774047321, iteration: 35452
loss: 0.988127589225769,grad_norm: 0.9999996078663654, iteration: 35453
loss: 1.0095641613006592,grad_norm: 0.9999990399925898, iteration: 35454
loss: 1.0384327173233032,grad_norm: 0.9570139066622816, iteration: 35455
loss: 1.0118913650512695,grad_norm: 0.999999160821526, iteration: 35456
loss: 1.050622820854187,grad_norm: 0.9512874715707481, iteration: 35457
loss: 1.013970136642456,grad_norm: 0.9540752659208012, iteration: 35458
loss: 1.0398403406143188,grad_norm: 0.9999991225561872, iteration: 35459
loss: 1.0161832571029663,grad_norm: 0.8993829465564046, iteration: 35460
loss: 1.0085147619247437,grad_norm: 0.9999991762198044, iteration: 35461
loss: 1.024558186531067,grad_norm: 0.9780685957734673, iteration: 35462
loss: 0.9898858070373535,grad_norm: 0.9999990163050878, iteration: 35463
loss: 1.0026346445083618,grad_norm: 0.9949525920442724, iteration: 35464
loss: 1.049944519996643,grad_norm: 0.9999990908374791, iteration: 35465
loss: 1.047404170036316,grad_norm: 0.9022973233718902, iteration: 35466
loss: 1.0139248371124268,grad_norm: 0.8467971669320818, iteration: 35467
loss: 0.9970968961715698,grad_norm: 0.9772878289454409, iteration: 35468
loss: 0.9905439615249634,grad_norm: 0.9999990756588168, iteration: 35469
loss: 0.9920167326927185,grad_norm: 0.9999990793587488, iteration: 35470
loss: 1.0389137268066406,grad_norm: 0.9466430644718027, iteration: 35471
loss: 1.0388197898864746,grad_norm: 0.9734385029255866, iteration: 35472
loss: 1.0027432441711426,grad_norm: 0.8804800859459591, iteration: 35473
loss: 0.9762982726097107,grad_norm: 0.9999992901511825, iteration: 35474
loss: 1.0190846920013428,grad_norm: 0.9297098891629326, iteration: 35475
loss: 0.9865931868553162,grad_norm: 0.8718225040676956, iteration: 35476
loss: 1.0126157999038696,grad_norm: 0.9709969656921248, iteration: 35477
loss: 1.0341103076934814,grad_norm: 0.9999991135849087, iteration: 35478
loss: 1.03687584400177,grad_norm: 0.9516428931867703, iteration: 35479
loss: 1.041608452796936,grad_norm: 0.9672746598630527, iteration: 35480
loss: 1.0024410486221313,grad_norm: 0.999998999676585, iteration: 35481
loss: 0.9793305993080139,grad_norm: 0.9999991172143913, iteration: 35482
loss: 1.0089761018753052,grad_norm: 0.9999998531881013, iteration: 35483
loss: 0.9660295248031616,grad_norm: 0.8357417949072568, iteration: 35484
loss: 1.012670874595642,grad_norm: 0.9999990341790629, iteration: 35485
loss: 1.0552775859832764,grad_norm: 0.999999082811333, iteration: 35486
loss: 0.9805310368537903,grad_norm: 0.8825563268391343, iteration: 35487
loss: 1.0335978269577026,grad_norm: 0.9999991805122536, iteration: 35488
loss: 1.0177624225616455,grad_norm: 0.9999991157809656, iteration: 35489
loss: 1.015614628791809,grad_norm: 0.982284378410836, iteration: 35490
loss: 1.0189296007156372,grad_norm: 0.919505681536954, iteration: 35491
loss: 1.0072569847106934,grad_norm: 0.9999991199913756, iteration: 35492
loss: 0.985521674156189,grad_norm: 0.9660747320011278, iteration: 35493
loss: 1.0048960447311401,grad_norm: 0.9395724749762716, iteration: 35494
loss: 1.0188871622085571,grad_norm: 0.9999990961121848, iteration: 35495
loss: 1.016458511352539,grad_norm: 0.9758555658062336, iteration: 35496
loss: 1.0216277837753296,grad_norm: 0.872232227758372, iteration: 35497
loss: 1.0418332815170288,grad_norm: 0.9864178607771328, iteration: 35498
loss: 1.0239723920822144,grad_norm: 0.8792218484412292, iteration: 35499
loss: 0.9971154928207397,grad_norm: 0.9999991508675726, iteration: 35500
loss: 1.0042434930801392,grad_norm: 0.9999990552583818, iteration: 35501
loss: 1.004178762435913,grad_norm: 0.9803103641263696, iteration: 35502
loss: 0.992646336555481,grad_norm: 0.8058264313145699, iteration: 35503
loss: 1.0336053371429443,grad_norm: 0.999999153423933, iteration: 35504
loss: 1.0576910972595215,grad_norm: 0.9999996516930948, iteration: 35505
loss: 1.0015077590942383,grad_norm: 0.9999991411530347, iteration: 35506
loss: 1.0259907245635986,grad_norm: 0.9999990797750036, iteration: 35507
loss: 1.0061513185501099,grad_norm: 0.8798973952582216, iteration: 35508
loss: 1.041723370552063,grad_norm: 0.9999990559469395, iteration: 35509
loss: 0.9776829481124878,grad_norm: 0.8923451845216426, iteration: 35510
loss: 1.0154931545257568,grad_norm: 0.9980738695994057, iteration: 35511
loss: 0.9379128217697144,grad_norm: 0.9545167071190139, iteration: 35512
loss: 1.0138640403747559,grad_norm: 0.9622727398352228, iteration: 35513
loss: 1.0112779140472412,grad_norm: 0.8724965256403796, iteration: 35514
loss: 1.0139925479888916,grad_norm: 0.8477998311956235, iteration: 35515
loss: 1.025422215461731,grad_norm: 0.999999061608457, iteration: 35516
loss: 1.0296151638031006,grad_norm: 0.9999996736673147, iteration: 35517
loss: 1.0085242986679077,grad_norm: 0.9999991493144308, iteration: 35518
loss: 0.9846067428588867,grad_norm: 0.9702548467175846, iteration: 35519
loss: 0.9587175250053406,grad_norm: 0.9999991259446106, iteration: 35520
loss: 0.9944486021995544,grad_norm: 0.968396241742147, iteration: 35521
loss: 0.9799681901931763,grad_norm: 0.9999991295050434, iteration: 35522
loss: 1.016281247138977,grad_norm: 0.9999990274202252, iteration: 35523
loss: 1.0054800510406494,grad_norm: 0.8826398682641545, iteration: 35524
loss: 0.9810892939567566,grad_norm: 0.9467255568729634, iteration: 35525
loss: 1.0787749290466309,grad_norm: 0.8803997791618174, iteration: 35526
loss: 1.0180814266204834,grad_norm: 0.9999992679705326, iteration: 35527
loss: 1.0493335723876953,grad_norm: 0.9999992313120307, iteration: 35528
loss: 1.0048125982284546,grad_norm: 0.9999991454953344, iteration: 35529
loss: 1.0054887533187866,grad_norm: 0.9999996598215196, iteration: 35530
loss: 0.974531352519989,grad_norm: 0.9999988865660776, iteration: 35531
loss: 1.0190379619598389,grad_norm: 0.9999991185238644, iteration: 35532
loss: 1.0054148435592651,grad_norm: 0.8866855006350435, iteration: 35533
loss: 0.9954507350921631,grad_norm: 0.999999033858773, iteration: 35534
loss: 0.9973888397216797,grad_norm: 0.8685632089680643, iteration: 35535
loss: 0.9835880994796753,grad_norm: 0.9756065356629676, iteration: 35536
loss: 0.9873622059822083,grad_norm: 0.877294811678604, iteration: 35537
loss: 1.0144627094268799,grad_norm: 0.9627626399246032, iteration: 35538
loss: 1.0055508613586426,grad_norm: 0.9999991807746651, iteration: 35539
loss: 1.02790105342865,grad_norm: 0.976075332416106, iteration: 35540
loss: 1.0132026672363281,grad_norm: 0.9999989870358712, iteration: 35541
loss: 1.0261369943618774,grad_norm: 0.8113840294575986, iteration: 35542
loss: 1.0018988847732544,grad_norm: 0.9999990860513025, iteration: 35543
loss: 1.0219175815582275,grad_norm: 0.9750394306439649, iteration: 35544
loss: 1.0115898847579956,grad_norm: 0.999999625627592, iteration: 35545
loss: 1.0048589706420898,grad_norm: 0.9905220426090598, iteration: 35546
loss: 0.9926769137382507,grad_norm: 0.8691782557170703, iteration: 35547
loss: 1.0344961881637573,grad_norm: 0.999999133407353, iteration: 35548
loss: 0.9938943982124329,grad_norm: 0.9999991056185596, iteration: 35549
loss: 1.0228265523910522,grad_norm: 0.9827278791832258, iteration: 35550
loss: 0.9680408239364624,grad_norm: 0.9393709891892008, iteration: 35551
loss: 1.0172295570373535,grad_norm: 0.9999995383318236, iteration: 35552
loss: 0.9904701113700867,grad_norm: 0.9692167708479952, iteration: 35553
loss: 1.0125455856323242,grad_norm: 0.9425977525002506, iteration: 35554
loss: 0.9966570734977722,grad_norm: 0.9999989766544608, iteration: 35555
loss: 0.9785010814666748,grad_norm: 0.9506439269674605, iteration: 35556
loss: 0.9844520092010498,grad_norm: 0.9999993598458348, iteration: 35557
loss: 0.9744833111763,grad_norm: 0.9085770291992208, iteration: 35558
loss: 0.9801194071769714,grad_norm: 0.9999990407571754, iteration: 35559
loss: 0.9904230833053589,grad_norm: 0.9999990839814965, iteration: 35560
loss: 0.990948498249054,grad_norm: 0.9438384941083395, iteration: 35561
loss: 0.9741788506507874,grad_norm: 0.9999991754237786, iteration: 35562
loss: 1.007887840270996,grad_norm: 0.9866242596752133, iteration: 35563
loss: 0.9930923581123352,grad_norm: 0.9999990756648088, iteration: 35564
loss: 1.0087136030197144,grad_norm: 0.9168511043784165, iteration: 35565
loss: 1.0215511322021484,grad_norm: 0.9906936646636603, iteration: 35566
loss: 0.9887104630470276,grad_norm: 0.9999991323265235, iteration: 35567
loss: 0.9953184127807617,grad_norm: 0.999999057781769, iteration: 35568
loss: 1.0237003564834595,grad_norm: 0.9999995339798092, iteration: 35569
loss: 1.0991101264953613,grad_norm: 0.9999997184802492, iteration: 35570
loss: 1.0094951391220093,grad_norm: 0.9590412046786259, iteration: 35571
loss: 1.1480778455734253,grad_norm: 0.9999992819098813, iteration: 35572
loss: 1.0517150163650513,grad_norm: 0.9733641314546324, iteration: 35573
loss: 1.0295435190200806,grad_norm: 0.9999991877631165, iteration: 35574
loss: 1.0008653402328491,grad_norm: 0.9999990229115768, iteration: 35575
loss: 0.9484380483627319,grad_norm: 0.907956422848566, iteration: 35576
loss: 0.9955129027366638,grad_norm: 0.7111002408993521, iteration: 35577
loss: 1.013658881187439,grad_norm: 0.9999989560526338, iteration: 35578
loss: 1.010959506034851,grad_norm: 0.9005346531347079, iteration: 35579
loss: 0.992041826248169,grad_norm: 0.9999991973001295, iteration: 35580
loss: 0.9955758452415466,grad_norm: 0.9999991411388935, iteration: 35581
loss: 1.009413480758667,grad_norm: 0.9999996271713301, iteration: 35582
loss: 0.9842873215675354,grad_norm: 0.954622926992304, iteration: 35583
loss: 1.0118346214294434,grad_norm: 0.9999996048203708, iteration: 35584
loss: 1.0832854509353638,grad_norm: 0.999999367465521, iteration: 35585
loss: 0.9846718311309814,grad_norm: 0.9842017081512731, iteration: 35586
loss: 1.0170536041259766,grad_norm: 0.9139227921566506, iteration: 35587
loss: 0.9879853129386902,grad_norm: 0.9823769439184907, iteration: 35588
loss: 1.0125658512115479,grad_norm: 0.8863581147863354, iteration: 35589
loss: 1.0024926662445068,grad_norm: 0.9999991199488856, iteration: 35590
loss: 0.9912976622581482,grad_norm: 0.9999993111812988, iteration: 35591
loss: 1.0197592973709106,grad_norm: 0.9999991308314983, iteration: 35592
loss: 0.9669315814971924,grad_norm: 0.8632643088546331, iteration: 35593
loss: 0.9890349507331848,grad_norm: 0.7927853511625713, iteration: 35594
loss: 0.9773517847061157,grad_norm: 0.9999991657664695, iteration: 35595
loss: 1.0679630041122437,grad_norm: 0.999999589257303, iteration: 35596
loss: 1.0356192588806152,grad_norm: 0.9999996298051246, iteration: 35597
loss: 0.9434358477592468,grad_norm: 0.9999992099645226, iteration: 35598
loss: 1.0386322736740112,grad_norm: 0.999999287550095, iteration: 35599
loss: 0.9936938881874084,grad_norm: 0.9999992447318794, iteration: 35600
loss: 0.9969222545623779,grad_norm: 0.8813376461264703, iteration: 35601
loss: 1.0435477495193481,grad_norm: 0.9999999515036481, iteration: 35602
loss: 0.9980096817016602,grad_norm: 0.9584595156217744, iteration: 35603
loss: 0.9475635290145874,grad_norm: 0.9314208046687908, iteration: 35604
loss: 0.9974702000617981,grad_norm: 0.8960197207108624, iteration: 35605
loss: 0.9887508153915405,grad_norm: 0.9223708713831047, iteration: 35606
loss: 1.0131735801696777,grad_norm: 0.78745772458068, iteration: 35607
loss: 1.0134106874465942,grad_norm: 0.9230154388997462, iteration: 35608
loss: 1.0034153461456299,grad_norm: 0.9848146614211646, iteration: 35609
loss: 1.0015959739685059,grad_norm: 0.9999994934290214, iteration: 35610
loss: 0.9456967711448669,grad_norm: 0.935508969976402, iteration: 35611
loss: 1.0188896656036377,grad_norm: 0.9999991594277938, iteration: 35612
loss: 1.038094401359558,grad_norm: 0.9999993336846941, iteration: 35613
loss: 1.0127158164978027,grad_norm: 0.9936380136046604, iteration: 35614
loss: 1.0247440338134766,grad_norm: 0.9999992565623301, iteration: 35615
loss: 0.9873633980751038,grad_norm: 0.9999990303446316, iteration: 35616
loss: 1.1091647148132324,grad_norm: 0.9999997270844068, iteration: 35617
loss: 1.0150409936904907,grad_norm: 0.9608969716826821, iteration: 35618
loss: 0.9831660985946655,grad_norm: 0.904334982757137, iteration: 35619
loss: 1.041990876197815,grad_norm: 0.9999989747492333, iteration: 35620
loss: 0.9954788088798523,grad_norm: 0.9074469795438538, iteration: 35621
loss: 1.021303415298462,grad_norm: 0.9999992460288046, iteration: 35622
loss: 1.0175224542617798,grad_norm: 0.9352764975330148, iteration: 35623
loss: 1.0142360925674438,grad_norm: 0.9999991677930269, iteration: 35624
loss: 0.9871793389320374,grad_norm: 0.8114036677244679, iteration: 35625
loss: 1.015088677406311,grad_norm: 0.9999992805710991, iteration: 35626
loss: 1.007729411125183,grad_norm: 0.9999992154260261, iteration: 35627
loss: 1.01626718044281,grad_norm: 0.999999303176804, iteration: 35628
loss: 1.0222430229187012,grad_norm: 0.941880847964666, iteration: 35629
loss: 1.0158472061157227,grad_norm: 0.9999992446251548, iteration: 35630
loss: 0.9972810745239258,grad_norm: 0.9999991921821021, iteration: 35631
loss: 0.9909543395042419,grad_norm: 0.9999998579974484, iteration: 35632
loss: 1.0062155723571777,grad_norm: 0.8478055306801434, iteration: 35633
loss: 1.0045886039733887,grad_norm: 0.9947496709858777, iteration: 35634
loss: 0.9715098142623901,grad_norm: 0.9999990566349285, iteration: 35635
loss: 0.9805960059165955,grad_norm: 0.9999992333623301, iteration: 35636
loss: 0.988642156124115,grad_norm: 0.9999991686545693, iteration: 35637
loss: 1.000860333442688,grad_norm: 0.9999989321440989, iteration: 35638
loss: 1.023492455482483,grad_norm: 0.9999990325229405, iteration: 35639
loss: 1.0279521942138672,grad_norm: 0.9999990995644668, iteration: 35640
loss: 1.016302227973938,grad_norm: 0.9999994394805451, iteration: 35641
loss: 1.0020959377288818,grad_norm: 0.9999994536367437, iteration: 35642
loss: 1.0092939138412476,grad_norm: 0.8262947967535037, iteration: 35643
loss: 1.015401840209961,grad_norm: 0.9999990613738243, iteration: 35644
loss: 0.993217945098877,grad_norm: 0.9074634687571993, iteration: 35645
loss: 1.0235209465026855,grad_norm: 0.884391456319575, iteration: 35646
loss: 1.026750087738037,grad_norm: 0.9999992114104033, iteration: 35647
loss: 1.0218185186386108,grad_norm: 0.9999990337765691, iteration: 35648
loss: 0.9785668849945068,grad_norm: 0.8384670292397066, iteration: 35649
loss: 1.0233948230743408,grad_norm: 0.955558539957776, iteration: 35650
loss: 1.012610912322998,grad_norm: 0.9999990063155064, iteration: 35651
loss: 1.0055391788482666,grad_norm: 0.9999993436616877, iteration: 35652
loss: 1.0409647226333618,grad_norm: 0.9999991710628587, iteration: 35653
loss: 0.9782368540763855,grad_norm: 0.9315477476833912, iteration: 35654
loss: 1.0104426145553589,grad_norm: 0.9999992242301546, iteration: 35655
loss: 1.0134930610656738,grad_norm: 0.9999996722861424, iteration: 35656
loss: 1.0111801624298096,grad_norm: 0.9134335938156031, iteration: 35657
loss: 1.117937684059143,grad_norm: 0.9999991383041589, iteration: 35658
loss: 1.0098028182983398,grad_norm: 0.9999991891403237, iteration: 35659
loss: 1.020967721939087,grad_norm: 0.9999991012153518, iteration: 35660
loss: 1.0079076290130615,grad_norm: 0.999999204108474, iteration: 35661
loss: 0.9737869501113892,grad_norm: 0.9999993831033744, iteration: 35662
loss: 1.056483507156372,grad_norm: 0.999999680349863, iteration: 35663
loss: 1.0556646585464478,grad_norm: 0.9999990800856823, iteration: 35664
loss: 1.0264403820037842,grad_norm: 0.9728430462722412, iteration: 35665
loss: 1.0053565502166748,grad_norm: 0.9891399470418637, iteration: 35666
loss: 1.0293830633163452,grad_norm: 0.9999991020840786, iteration: 35667
loss: 0.9694728255271912,grad_norm: 0.999999108384719, iteration: 35668
loss: 1.0202059745788574,grad_norm: 0.9999991559606574, iteration: 35669
loss: 1.0456140041351318,grad_norm: 0.9999992450880334, iteration: 35670
loss: 1.0049604177474976,grad_norm: 0.9999996638068493, iteration: 35671
loss: 1.0004843473434448,grad_norm: 0.9999990663307001, iteration: 35672
loss: 1.0175846815109253,grad_norm: 0.999998974830236, iteration: 35673
loss: 0.997749924659729,grad_norm: 0.9999991077778481, iteration: 35674
loss: 1.0150774717330933,grad_norm: 0.9999990321517607, iteration: 35675
loss: 0.9778718948364258,grad_norm: 0.9647496541095011, iteration: 35676
loss: 1.0495123863220215,grad_norm: 0.9267529262579602, iteration: 35677
loss: 1.0107357501983643,grad_norm: 0.9955591463182062, iteration: 35678
loss: 1.020532250404358,grad_norm: 0.9999991894426351, iteration: 35679
loss: 0.9814637303352356,grad_norm: 0.9999989512794393, iteration: 35680
loss: 1.006004810333252,grad_norm: 0.9999992450645028, iteration: 35681
loss: 1.003442406654358,grad_norm: 0.9999991611524214, iteration: 35682
loss: 1.0198887586593628,grad_norm: 0.7815901341228267, iteration: 35683
loss: 0.9757153391838074,grad_norm: 0.9408231895865905, iteration: 35684
loss: 0.9680724143981934,grad_norm: 0.9599836758033438, iteration: 35685
loss: 1.0111815929412842,grad_norm: 0.9310319671077121, iteration: 35686
loss: 1.0142462253570557,grad_norm: 0.999998954263077, iteration: 35687
loss: 1.0006256103515625,grad_norm: 0.8151900870641476, iteration: 35688
loss: 0.9553974866867065,grad_norm: 0.989750210423894, iteration: 35689
loss: 0.977276623249054,grad_norm: 0.9995367445626292, iteration: 35690
loss: 1.0308822393417358,grad_norm: 0.9999991199797077, iteration: 35691
loss: 1.0032402276992798,grad_norm: 0.9999990045581822, iteration: 35692
loss: 0.9833559989929199,grad_norm: 0.8239683796679649, iteration: 35693
loss: 1.0312448740005493,grad_norm: 0.9999993217159413, iteration: 35694
loss: 0.9621729850769043,grad_norm: 0.999999143544481, iteration: 35695
loss: 1.0360180139541626,grad_norm: 0.9999995488957295, iteration: 35696
loss: 0.9703758955001831,grad_norm: 0.9999992098398791, iteration: 35697
loss: 0.99104243516922,grad_norm: 0.9999990404766683, iteration: 35698
loss: 0.9907739162445068,grad_norm: 0.9648460885906964, iteration: 35699
loss: 1.0057388544082642,grad_norm: 0.9999990790352323, iteration: 35700
loss: 0.9988965392112732,grad_norm: 0.9999991584303874, iteration: 35701
loss: 1.0253773927688599,grad_norm: 0.9999996893889418, iteration: 35702
loss: 1.0078314542770386,grad_norm: 0.7707948584303651, iteration: 35703
loss: 1.0129358768463135,grad_norm: 0.8542559895864289, iteration: 35704
loss: 1.0172127485275269,grad_norm: 0.8678586995481608, iteration: 35705
loss: 1.007132887840271,grad_norm: 0.9999990285568854, iteration: 35706
loss: 1.0228321552276611,grad_norm: 0.9999991876098757, iteration: 35707
loss: 1.0171862840652466,grad_norm: 0.9999992442361425, iteration: 35708
loss: 1.0016615390777588,grad_norm: 0.9207049570979179, iteration: 35709
loss: 1.0162540674209595,grad_norm: 0.9554414877484899, iteration: 35710
loss: 1.006365180015564,grad_norm: 0.9854537616997238, iteration: 35711
loss: 1.0347883701324463,grad_norm: 0.9999992917568721, iteration: 35712
loss: 1.0171749591827393,grad_norm: 0.8723350589539909, iteration: 35713
loss: 0.9780603647232056,grad_norm: 0.999999240029025, iteration: 35714
loss: 1.0008689165115356,grad_norm: 0.8349931244294165, iteration: 35715
loss: 0.9624063372612,grad_norm: 0.9672889772630848, iteration: 35716
loss: 0.9715492725372314,grad_norm: 0.8518570865301428, iteration: 35717
loss: 1.0492860078811646,grad_norm: 0.9161758265112421, iteration: 35718
loss: 1.0290641784667969,grad_norm: 0.999999130666356, iteration: 35719
loss: 0.9908447265625,grad_norm: 0.9235867325296089, iteration: 35720
loss: 1.0738561153411865,grad_norm: 0.9999992870891766, iteration: 35721
loss: 1.0235885381698608,grad_norm: 0.9932320789431972, iteration: 35722
loss: 0.9733191728591919,grad_norm: 0.9198401321330435, iteration: 35723
loss: 0.9914801716804504,grad_norm: 0.961124214694033, iteration: 35724
loss: 1.0266131162643433,grad_norm: 0.9999992525213086, iteration: 35725
loss: 1.0503320693969727,grad_norm: 0.9962006135840117, iteration: 35726
loss: 1.0038280487060547,grad_norm: 0.9646666030906672, iteration: 35727
loss: 1.0337294340133667,grad_norm: 0.9999996640204197, iteration: 35728
loss: 0.9940339922904968,grad_norm: 0.9325262672584433, iteration: 35729
loss: 0.9956772327423096,grad_norm: 0.9920372211818703, iteration: 35730
loss: 1.0178475379943848,grad_norm: 0.9999990817814995, iteration: 35731
loss: 1.0272283554077148,grad_norm: 0.9999990321775716, iteration: 35732
loss: 0.9958356022834778,grad_norm: 0.9999992851165574, iteration: 35733
loss: 0.9862226843833923,grad_norm: 0.9615404048340686, iteration: 35734
loss: 0.9763175845146179,grad_norm: 0.8206942302253991, iteration: 35735
loss: 0.9948535561561584,grad_norm: 0.9605721427241929, iteration: 35736
loss: 1.0160207748413086,grad_norm: 0.9999991219912083, iteration: 35737
loss: 1.013656497001648,grad_norm: 0.7999008905677306, iteration: 35738
loss: 1.0082862377166748,grad_norm: 0.9999992385907839, iteration: 35739
loss: 0.95029616355896,grad_norm: 0.9143597130387989, iteration: 35740
loss: 1.0361127853393555,grad_norm: 0.9999993392898173, iteration: 35741
loss: 1.0189266204833984,grad_norm: 0.971924825448695, iteration: 35742
loss: 1.0062289237976074,grad_norm: 0.9468086294354063, iteration: 35743
loss: 1.0559428930282593,grad_norm: 0.9999991153993035, iteration: 35744
loss: 1.0019077062606812,grad_norm: 0.9601214658908823, iteration: 35745
loss: 0.965202808380127,grad_norm: 0.8884720516757513, iteration: 35746
loss: 1.0093332529067993,grad_norm: 0.9331833395723035, iteration: 35747
loss: 1.0885125398635864,grad_norm: 0.9999993838588205, iteration: 35748
loss: 0.9915616512298584,grad_norm: 0.8467098223808761, iteration: 35749
loss: 1.011562705039978,grad_norm: 0.9999992077695051, iteration: 35750
loss: 1.0108025074005127,grad_norm: 0.9299567382640515, iteration: 35751
loss: 1.0212000608444214,grad_norm: 0.9999991546990864, iteration: 35752
loss: 1.08655846118927,grad_norm: 0.9999996681592227, iteration: 35753
loss: 1.0487231016159058,grad_norm: 0.9609782022672104, iteration: 35754
loss: 1.0257760286331177,grad_norm: 0.9352117654952931, iteration: 35755
loss: 0.9991894960403442,grad_norm: 0.922651553311628, iteration: 35756
loss: 1.0106505155563354,grad_norm: 0.9999992713421316, iteration: 35757
loss: 1.007746696472168,grad_norm: 0.9999993226129285, iteration: 35758
loss: 0.9547261595726013,grad_norm: 0.9999992232703232, iteration: 35759
loss: 0.9894261956214905,grad_norm: 0.9436047459595479, iteration: 35760
loss: 1.0538451671600342,grad_norm: 0.9999996526403941, iteration: 35761
loss: 1.0258499383926392,grad_norm: 0.8535889706816001, iteration: 35762
loss: 1.024863600730896,grad_norm: 0.9999991050968692, iteration: 35763
loss: 1.0486027002334595,grad_norm: 0.9999991527096392, iteration: 35764
loss: 1.0209294557571411,grad_norm: 0.9999997577194694, iteration: 35765
loss: 0.9837082028388977,grad_norm: 0.9999989525929077, iteration: 35766
loss: 0.9963903427124023,grad_norm: 0.8952623772105179, iteration: 35767
loss: 1.1014188528060913,grad_norm: 0.999999901101062, iteration: 35768
loss: 0.9926472306251526,grad_norm: 0.8995188174207543, iteration: 35769
loss: 1.033870816230774,grad_norm: 0.9999991831017135, iteration: 35770
loss: 0.9806311130523682,grad_norm: 0.8585889288454582, iteration: 35771
loss: 1.0039021968841553,grad_norm: 0.9103787189484995, iteration: 35772
loss: 1.0054512023925781,grad_norm: 0.9999991235126032, iteration: 35773
loss: 1.0084316730499268,grad_norm: 0.9536794208839404, iteration: 35774
loss: 0.9916606545448303,grad_norm: 0.9271326703770089, iteration: 35775
loss: 0.974253237247467,grad_norm: 0.9999992021102784, iteration: 35776
loss: 1.0329605340957642,grad_norm: 0.999999007425524, iteration: 35777
loss: 1.0150209665298462,grad_norm: 0.8444221231927722, iteration: 35778
loss: 0.9882014989852905,grad_norm: 0.9999993984397815, iteration: 35779
loss: 1.029999017715454,grad_norm: 0.9999992947694777, iteration: 35780
loss: 0.9685885906219482,grad_norm: 0.9999990776753286, iteration: 35781
loss: 0.9685388207435608,grad_norm: 0.9656518035276137, iteration: 35782
loss: 1.0188097953796387,grad_norm: 0.9766748879531814, iteration: 35783
loss: 0.9699092507362366,grad_norm: 0.9705946869243307, iteration: 35784
loss: 0.9938797950744629,grad_norm: 0.8574191468226586, iteration: 35785
loss: 1.0763911008834839,grad_norm: 0.9999998677753722, iteration: 35786
loss: 1.030067801475525,grad_norm: 0.9999990200809155, iteration: 35787
loss: 0.989929735660553,grad_norm: 0.9999989551423348, iteration: 35788
loss: 1.0233958959579468,grad_norm: 0.9999998132916853, iteration: 35789
loss: 0.9891530275344849,grad_norm: 0.9787035555730023, iteration: 35790
loss: 1.0183188915252686,grad_norm: 0.9784616922361687, iteration: 35791
loss: 1.0058149099349976,grad_norm: 0.8612991463943726, iteration: 35792
loss: 1.0085684061050415,grad_norm: 0.999999073835284, iteration: 35793
loss: 0.965859055519104,grad_norm: 0.8005743542459031, iteration: 35794
loss: 0.999616265296936,grad_norm: 0.999999170364504, iteration: 35795
loss: 0.9989929795265198,grad_norm: 0.9842332671919671, iteration: 35796
loss: 1.054049015045166,grad_norm: 0.999999881479341, iteration: 35797
loss: 1.041866421699524,grad_norm: 0.9490046694856195, iteration: 35798
loss: 1.0161924362182617,grad_norm: 0.9999990994212111, iteration: 35799
loss: 1.0203282833099365,grad_norm: 0.9999992520067784, iteration: 35800
loss: 1.0053527355194092,grad_norm: 0.9999990057039206, iteration: 35801
loss: 1.010156273841858,grad_norm: 0.986596466592436, iteration: 35802
loss: 1.0161504745483398,grad_norm: 0.9999991487133685, iteration: 35803
loss: 1.015262246131897,grad_norm: 0.9999992193968615, iteration: 35804
loss: 1.0395457744598389,grad_norm: 0.9999996238379066, iteration: 35805
loss: 0.9918555021286011,grad_norm: 0.8534530942236703, iteration: 35806
loss: 0.99704909324646,grad_norm: 0.9647411502371825, iteration: 35807
loss: 0.9974838495254517,grad_norm: 0.9490966223502316, iteration: 35808
loss: 0.9589071869850159,grad_norm: 0.8795551319126123, iteration: 35809
loss: 0.9879129528999329,grad_norm: 0.7942467313715136, iteration: 35810
loss: 0.9867497682571411,grad_norm: 0.856637624590761, iteration: 35811
loss: 0.9772228598594666,grad_norm: 0.8373443091296267, iteration: 35812
loss: 0.9963077306747437,grad_norm: 0.999999060083057, iteration: 35813
loss: 1.0258150100708008,grad_norm: 0.9025742451366031, iteration: 35814
loss: 1.0005536079406738,grad_norm: 0.999999244496886, iteration: 35815
loss: 0.9780197143554688,grad_norm: 0.865983037024989, iteration: 35816
loss: 0.9597872495651245,grad_norm: 0.959557632183456, iteration: 35817
loss: 0.9671076536178589,grad_norm: 0.9999990249457124, iteration: 35818
loss: 1.0058525800704956,grad_norm: 0.8861437578241419, iteration: 35819
loss: 0.9702839851379395,grad_norm: 0.9410079996359221, iteration: 35820
loss: 1.0262582302093506,grad_norm: 0.9999993731346967, iteration: 35821
loss: 1.0045363903045654,grad_norm: 0.8598531501308855, iteration: 35822
loss: 1.039197564125061,grad_norm: 0.9999992284022174, iteration: 35823
loss: 0.9804747104644775,grad_norm: 0.8784898738692286, iteration: 35824
loss: 0.9817101359367371,grad_norm: 0.9999992472768774, iteration: 35825
loss: 1.0337574481964111,grad_norm: 0.9520275892514037, iteration: 35826
loss: 1.0415682792663574,grad_norm: 0.9999992196222541, iteration: 35827
loss: 0.9916822910308838,grad_norm: 0.9999990083297898, iteration: 35828
loss: 1.0092365741729736,grad_norm: 0.8809071874260116, iteration: 35829
loss: 0.9839739203453064,grad_norm: 0.9999991904202002, iteration: 35830
loss: 1.041854739189148,grad_norm: 0.9999989857611767, iteration: 35831
loss: 0.9884447455406189,grad_norm: 0.9307153502457458, iteration: 35832
loss: 0.9930800795555115,grad_norm: 0.9999990697003571, iteration: 35833
loss: 1.0235775709152222,grad_norm: 0.9810027349986207, iteration: 35834
loss: 1.0211151838302612,grad_norm: 0.999999208070836, iteration: 35835
loss: 0.9796115756034851,grad_norm: 0.8263429387318276, iteration: 35836
loss: 1.0217509269714355,grad_norm: 0.9315579578971439, iteration: 35837
loss: 1.0343519449234009,grad_norm: 0.9999990947962232, iteration: 35838
loss: 1.0139095783233643,grad_norm: 0.9427181856308667, iteration: 35839
loss: 1.0264631509780884,grad_norm: 0.911716712716343, iteration: 35840
loss: 1.0098968744277954,grad_norm: 0.8557815743083709, iteration: 35841
loss: 1.0166292190551758,grad_norm: 0.9809066255050756, iteration: 35842
loss: 0.9885561466217041,grad_norm: 0.8425470968089221, iteration: 35843
loss: 0.9824036359786987,grad_norm: 0.8295157952376081, iteration: 35844
loss: 1.0123182535171509,grad_norm: 0.838897615571678, iteration: 35845
loss: 1.0080139636993408,grad_norm: 0.8275127439372655, iteration: 35846
loss: 1.0068612098693848,grad_norm: 0.9046120636416853, iteration: 35847
loss: 1.0462602376937866,grad_norm: 0.9999999502548877, iteration: 35848
loss: 1.0095595121383667,grad_norm: 0.9999991364996184, iteration: 35849
loss: 1.0244979858398438,grad_norm: 0.7669461909599457, iteration: 35850
loss: 1.001869559288025,grad_norm: 0.9999990390436475, iteration: 35851
loss: 0.9989295601844788,grad_norm: 0.9999990939636988, iteration: 35852
loss: 0.9948179721832275,grad_norm: 0.9351619401790326, iteration: 35853
loss: 1.0136656761169434,grad_norm: 0.7939306173266033, iteration: 35854
loss: 0.9681068658828735,grad_norm: 0.9999992983127337, iteration: 35855
loss: 0.9876661896705627,grad_norm: 0.9218763413187188, iteration: 35856
loss: 1.0209227800369263,grad_norm: 0.9765447741832197, iteration: 35857
loss: 1.0221611261367798,grad_norm: 0.9999990184758419, iteration: 35858
loss: 1.0021346807479858,grad_norm: 0.805907421765276, iteration: 35859
loss: 0.9778249859809875,grad_norm: 0.8587907261452187, iteration: 35860
loss: 1.0416227579116821,grad_norm: 0.9522063222084085, iteration: 35861
loss: 0.9847682118415833,grad_norm: 0.8919543597075584, iteration: 35862
loss: 1.011854648590088,grad_norm: 0.9785111448904058, iteration: 35863
loss: 1.007807970046997,grad_norm: 0.9999991429566397, iteration: 35864
loss: 0.9879279732704163,grad_norm: 0.8526496936182707, iteration: 35865
loss: 1.0418621301651,grad_norm: 0.854975880349247, iteration: 35866
loss: 1.0124598741531372,grad_norm: 0.9999990943005237, iteration: 35867
loss: 1.0497519969940186,grad_norm: 0.9516289653169971, iteration: 35868
loss: 1.0143011808395386,grad_norm: 0.9999990331255556, iteration: 35869
loss: 1.0249255895614624,grad_norm: 0.8284725456512477, iteration: 35870
loss: 1.0210258960723877,grad_norm: 0.9999991334564929, iteration: 35871
loss: 0.9884752035140991,grad_norm: 0.9392241640504847, iteration: 35872
loss: 1.0125579833984375,grad_norm: 0.9999992521621027, iteration: 35873
loss: 0.9940222501754761,grad_norm: 0.9999989644610487, iteration: 35874
loss: 1.0431705713272095,grad_norm: 0.7957558592292987, iteration: 35875
loss: 0.9861128330230713,grad_norm: 0.7979523232033013, iteration: 35876
loss: 1.0198942422866821,grad_norm: 0.8571595489348983, iteration: 35877
loss: 1.040863037109375,grad_norm: 0.9999990730164036, iteration: 35878
loss: 0.9574202299118042,grad_norm: 0.9522571914796146, iteration: 35879
loss: 1.0189651250839233,grad_norm: 0.9999992653345197, iteration: 35880
loss: 0.9836102724075317,grad_norm: 0.894441175906157, iteration: 35881
loss: 0.9829961657524109,grad_norm: 0.9655456292078328, iteration: 35882
loss: 0.9956657886505127,grad_norm: 0.980968127993952, iteration: 35883
loss: 0.9999192357063293,grad_norm: 0.9999990969712405, iteration: 35884
loss: 1.007961392402649,grad_norm: 0.9840234654282808, iteration: 35885
loss: 0.9819254875183105,grad_norm: 0.9999989981420878, iteration: 35886
loss: 0.9995877146720886,grad_norm: 0.9338109566494163, iteration: 35887
loss: 0.9681135416030884,grad_norm: 0.9999990993378937, iteration: 35888
loss: 1.0066441297531128,grad_norm: 0.9072803128549867, iteration: 35889
loss: 1.009049892425537,grad_norm: 0.8800196128746538, iteration: 35890
loss: 1.0080219507217407,grad_norm: 0.999999384822431, iteration: 35891
loss: 0.9832291603088379,grad_norm: 0.8942319511047514, iteration: 35892
loss: 1.0147358179092407,grad_norm: 0.9999991017374793, iteration: 35893
loss: 0.9998697638511658,grad_norm: 0.9985581529291954, iteration: 35894
loss: 1.0020219087600708,grad_norm: 0.9999989725335997, iteration: 35895
loss: 1.0049906969070435,grad_norm: 0.8409729094428589, iteration: 35896
loss: 1.0016300678253174,grad_norm: 0.9924399298000544, iteration: 35897
loss: 0.9738829135894775,grad_norm: 0.8972993993472068, iteration: 35898
loss: 0.9851940870285034,grad_norm: 0.9022166890014867, iteration: 35899
loss: 0.9848922491073608,grad_norm: 0.9999989146151391, iteration: 35900
loss: 1.0415648221969604,grad_norm: 0.9999992622390035, iteration: 35901
loss: 0.9880464673042297,grad_norm: 0.7638756001042013, iteration: 35902
loss: 1.00173020362854,grad_norm: 0.9999992401414894, iteration: 35903
loss: 1.019331693649292,grad_norm: 0.842212608181041, iteration: 35904
loss: 1.0370802879333496,grad_norm: 0.9999993293136124, iteration: 35905
loss: 0.9938036203384399,grad_norm: 0.9999990306936528, iteration: 35906
loss: 0.9600228071212769,grad_norm: 0.9999990363562093, iteration: 35907
loss: 0.94487065076828,grad_norm: 0.8105885024735784, iteration: 35908
loss: 0.9654227495193481,grad_norm: 0.9999989506206727, iteration: 35909
loss: 0.9562898278236389,grad_norm: 0.9345314367368838, iteration: 35910
loss: 0.9945189356803894,grad_norm: 0.9999990910828888, iteration: 35911
loss: 0.9971747994422913,grad_norm: 0.8943710527029756, iteration: 35912
loss: 1.0267735719680786,grad_norm: 0.9024319679217716, iteration: 35913
loss: 1.0171983242034912,grad_norm: 0.9897214326543808, iteration: 35914
loss: 1.048874855041504,grad_norm: 0.9596987392895122, iteration: 35915
loss: 1.013636589050293,grad_norm: 0.9770721778943244, iteration: 35916
loss: 1.0213433504104614,grad_norm: 0.9081628357071555, iteration: 35917
loss: 1.029889464378357,grad_norm: 0.9903932236260238, iteration: 35918
loss: 1.0186195373535156,grad_norm: 0.999999154127551, iteration: 35919
loss: 1.0334995985031128,grad_norm: 0.9999990363946838, iteration: 35920
loss: 1.0279642343521118,grad_norm: 0.9999991564908352, iteration: 35921
loss: 1.057037115097046,grad_norm: 0.9225184333673443, iteration: 35922
loss: 1.0253657102584839,grad_norm: 0.9999990463497083, iteration: 35923
loss: 1.103374719619751,grad_norm: 0.9999998756926097, iteration: 35924
loss: 1.013283133506775,grad_norm: 0.9325122683193436, iteration: 35925
loss: 1.031563639640808,grad_norm: 0.7750328461540759, iteration: 35926
loss: 1.0226291418075562,grad_norm: 0.9999991615586054, iteration: 35927
loss: 1.0315282344818115,grad_norm: 0.8013812960699249, iteration: 35928
loss: 1.0472474098205566,grad_norm: 0.9553679364605119, iteration: 35929
loss: 0.9721745252609253,grad_norm: 0.9999993492691106, iteration: 35930
loss: 0.9908888339996338,grad_norm: 0.9246209632513055, iteration: 35931
loss: 0.9561320543289185,grad_norm: 0.933277020770255, iteration: 35932
loss: 1.0066179037094116,grad_norm: 0.9999990565694133, iteration: 35933
loss: 0.969012975692749,grad_norm: 0.8540920497008017, iteration: 35934
loss: 1.0037692785263062,grad_norm: 0.9999991900245219, iteration: 35935
loss: 0.9836207032203674,grad_norm: 0.9999992899809454, iteration: 35936
loss: 1.0294126272201538,grad_norm: 0.9844971833532188, iteration: 35937
loss: 1.0008395910263062,grad_norm: 0.9055717685981193, iteration: 35938
loss: 0.982828676700592,grad_norm: 0.9857512559043314, iteration: 35939
loss: 1.00397789478302,grad_norm: 0.887865940000974, iteration: 35940
loss: 1.0281503200531006,grad_norm: 0.9999992980414107, iteration: 35941
loss: 1.0011972188949585,grad_norm: 0.9999991069013907, iteration: 35942
loss: 1.0018285512924194,grad_norm: 0.9804239411728256, iteration: 35943
loss: 1.000086784362793,grad_norm: 0.8036103580748246, iteration: 35944
loss: 0.9953563809394836,grad_norm: 0.9121102819995464, iteration: 35945
loss: 1.0080952644348145,grad_norm: 0.9999990437943151, iteration: 35946
loss: 1.021235466003418,grad_norm: 0.928684887233761, iteration: 35947
loss: 1.0393908023834229,grad_norm: 0.9999990339828929, iteration: 35948
loss: 0.9914125204086304,grad_norm: 0.7753689264921946, iteration: 35949
loss: 1.0264571905136108,grad_norm: 0.9999991256362754, iteration: 35950
loss: 0.9741512537002563,grad_norm: 0.9999991390425085, iteration: 35951
loss: 1.0301084518432617,grad_norm: 0.9999991691717384, iteration: 35952
loss: 0.9439201951026917,grad_norm: 0.9365153489422017, iteration: 35953
loss: 1.003025770187378,grad_norm: 0.781998792081417, iteration: 35954
loss: 0.9623200297355652,grad_norm: 0.9195530558669818, iteration: 35955
loss: 1.037654161453247,grad_norm: 0.9999993988508149, iteration: 35956
loss: 1.0432441234588623,grad_norm: 0.9999992407375254, iteration: 35957
loss: 1.0148277282714844,grad_norm: 0.9999992002309792, iteration: 35958
loss: 0.9913743734359741,grad_norm: 0.9270504257938945, iteration: 35959
loss: 0.9925994277000427,grad_norm: 0.9999990470399507, iteration: 35960
loss: 0.9656092524528503,grad_norm: 0.9662225732151833, iteration: 35961
loss: 0.9997798204421997,grad_norm: 0.9999990424445251, iteration: 35962
loss: 1.0119765996932983,grad_norm: 0.9999992322396785, iteration: 35963
loss: 1.0423741340637207,grad_norm: 0.9999992497887804, iteration: 35964
loss: 0.9906328916549683,grad_norm: 0.9999991758965764, iteration: 35965
loss: 1.0107896327972412,grad_norm: 0.8794326776653436, iteration: 35966
loss: 1.0066741704940796,grad_norm: 0.999999079094774, iteration: 35967
loss: 1.0412321090698242,grad_norm: 0.9789779647949027, iteration: 35968
loss: 0.9855511784553528,grad_norm: 0.9357812304289382, iteration: 35969
loss: 1.0123342275619507,grad_norm: 0.9999991287674516, iteration: 35970
loss: 0.9780007600784302,grad_norm: 0.9999992362199833, iteration: 35971
loss: 0.9851924180984497,grad_norm: 0.9299636905202318, iteration: 35972
loss: 1.0061702728271484,grad_norm: 0.8355629337046339, iteration: 35973
loss: 1.0027860403060913,grad_norm: 0.9495240598749528, iteration: 35974
loss: 1.0079233646392822,grad_norm: 0.9999991366329815, iteration: 35975
loss: 1.0116130113601685,grad_norm: 0.9193482366111929, iteration: 35976
loss: 1.0308042764663696,grad_norm: 0.9999989721114498, iteration: 35977
loss: 0.995701789855957,grad_norm: 0.9333483292997384, iteration: 35978
loss: 0.9727765321731567,grad_norm: 0.8096813937247306, iteration: 35979
loss: 1.0060386657714844,grad_norm: 0.9205500743357493, iteration: 35980
loss: 0.9961507320404053,grad_norm: 0.901978430733522, iteration: 35981
loss: 1.035546898841858,grad_norm: 0.9999990440791836, iteration: 35982
loss: 0.9895340204238892,grad_norm: 0.9999989454240752, iteration: 35983
loss: 1.0061086416244507,grad_norm: 0.8670851808445694, iteration: 35984
loss: 0.9982788562774658,grad_norm: 0.9999993986588737, iteration: 35985
loss: 1.021572232246399,grad_norm: 0.9999992157739397, iteration: 35986
loss: 0.9931726455688477,grad_norm: 0.8413028199300485, iteration: 35987
loss: 0.9974769949913025,grad_norm: 0.9300511474929208, iteration: 35988
loss: 1.0430591106414795,grad_norm: 0.9957117578167056, iteration: 35989
loss: 0.994209349155426,grad_norm: 0.9558130719258694, iteration: 35990
loss: 1.0003340244293213,grad_norm: 0.8840221348527376, iteration: 35991
loss: 0.9859456419944763,grad_norm: 0.9999992389508456, iteration: 35992
loss: 0.9841881990432739,grad_norm: 0.9604159090177696, iteration: 35993
loss: 1.0486643314361572,grad_norm: 0.8805784097007867, iteration: 35994
loss: 1.0200319290161133,grad_norm: 0.7599661692540106, iteration: 35995
loss: 0.9789396524429321,grad_norm: 0.9999990120445948, iteration: 35996
loss: 0.9677669405937195,grad_norm: 0.9999992455762455, iteration: 35997
loss: 0.9683348536491394,grad_norm: 0.9999990881398418, iteration: 35998
loss: 0.9925031065940857,grad_norm: 0.9999994323656196, iteration: 35999
loss: 1.0120259523391724,grad_norm: 0.7726472002611962, iteration: 36000
loss: 1.0194040536880493,grad_norm: 0.9577868321165853, iteration: 36001
loss: 1.032408356666565,grad_norm: 0.9960206661450978, iteration: 36002
loss: 1.000427484512329,grad_norm: 0.8976527053578712, iteration: 36003
loss: 0.9942249655723572,grad_norm: 0.8758913570101278, iteration: 36004
loss: 0.9869475364685059,grad_norm: 0.9999992338548432, iteration: 36005
loss: 1.0341730117797852,grad_norm: 0.9999994670787437, iteration: 36006
loss: 1.0132198333740234,grad_norm: 0.9216654443568153, iteration: 36007
loss: 1.0114495754241943,grad_norm: 0.9999990580092969, iteration: 36008
loss: 0.9587286710739136,grad_norm: 0.8459164865185143, iteration: 36009
loss: 1.0287002325057983,grad_norm: 0.9806224259576425, iteration: 36010
loss: 0.9845710396766663,grad_norm: 0.9586272899430952, iteration: 36011
loss: 1.0360336303710938,grad_norm: 0.9737948937744484, iteration: 36012
loss: 1.0379873514175415,grad_norm: 0.9999992080467728, iteration: 36013
loss: 1.022879958152771,grad_norm: 0.972521175810896, iteration: 36014
loss: 1.0101698637008667,grad_norm: 0.8693352824682585, iteration: 36015
loss: 0.9901941418647766,grad_norm: 0.9106025044670628, iteration: 36016
loss: 1.026350975036621,grad_norm: 0.9286417005583503, iteration: 36017
loss: 0.9440044164657593,grad_norm: 0.9999992110906011, iteration: 36018
loss: 1.0287208557128906,grad_norm: 0.9999990152856209, iteration: 36019
loss: 1.0170890092849731,grad_norm: 0.8593389288333986, iteration: 36020
loss: 1.009205937385559,grad_norm: 0.9999991794647453, iteration: 36021
loss: 0.9933257699012756,grad_norm: 0.839215141371602, iteration: 36022
loss: 1.003313422203064,grad_norm: 0.9999991422240377, iteration: 36023
loss: 0.9822483062744141,grad_norm: 0.9150230087612815, iteration: 36024
loss: 0.9988926649093628,grad_norm: 0.9396716439221378, iteration: 36025
loss: 1.0427919626235962,grad_norm: 0.9999994195807936, iteration: 36026
loss: 0.9995748996734619,grad_norm: 0.9709604695117683, iteration: 36027
loss: 1.024220585823059,grad_norm: 0.9999992177403609, iteration: 36028
loss: 0.9776247143745422,grad_norm: 0.9651989332500448, iteration: 36029
loss: 0.980495035648346,grad_norm: 0.8540738350017737, iteration: 36030
loss: 1.0394108295440674,grad_norm: 0.9999991055692025, iteration: 36031
loss: 1.0449213981628418,grad_norm: 0.841202151415023, iteration: 36032
loss: 1.020677924156189,grad_norm: 0.9029133762309651, iteration: 36033
loss: 0.9792273640632629,grad_norm: 0.9999991652519472, iteration: 36034
loss: 0.9952461123466492,grad_norm: 0.8531381075591472, iteration: 36035
loss: 0.9868061542510986,grad_norm: 0.984843107597597, iteration: 36036
loss: 1.0081374645233154,grad_norm: 0.9347041864951928, iteration: 36037
loss: 1.012740969657898,grad_norm: 0.9999992007226082, iteration: 36038
loss: 0.9700390696525574,grad_norm: 0.9999989786356496, iteration: 36039
loss: 0.9869582653045654,grad_norm: 0.9093406164226181, iteration: 36040
loss: 0.9759876728057861,grad_norm: 0.9999991045886201, iteration: 36041
loss: 1.0106154680252075,grad_norm: 0.9999996847285539, iteration: 36042
loss: 1.0125524997711182,grad_norm: 0.9254467965543841, iteration: 36043
loss: 1.0163520574569702,grad_norm: 0.9335719510952026, iteration: 36044
loss: 0.9889448881149292,grad_norm: 0.9999990954789213, iteration: 36045
loss: 1.0067366361618042,grad_norm: 0.9999990528459435, iteration: 36046
loss: 0.9707643389701843,grad_norm: 0.8412819460357126, iteration: 36047
loss: 1.1232997179031372,grad_norm: 0.9999998783034407, iteration: 36048
loss: 1.0255557298660278,grad_norm: 0.9999990933521867, iteration: 36049
loss: 0.9592089056968689,grad_norm: 0.9999989853264869, iteration: 36050
loss: 1.04277765750885,grad_norm: 0.9999990747967709, iteration: 36051
loss: 1.0376194715499878,grad_norm: 0.8325920123313565, iteration: 36052
loss: 1.0084649324417114,grad_norm: 0.8619937265849626, iteration: 36053
loss: 1.0159839391708374,grad_norm: 0.9999995103504334, iteration: 36054
loss: 0.9521210193634033,grad_norm: 0.9453088599849236, iteration: 36055
loss: 1.0114887952804565,grad_norm: 0.9181706508210127, iteration: 36056
loss: 0.9767864942550659,grad_norm: 0.9145085026112872, iteration: 36057
loss: 0.9720757603645325,grad_norm: 0.9999991746796454, iteration: 36058
loss: 0.9987844824790955,grad_norm: 0.9006642852369302, iteration: 36059
loss: 1.01026451587677,grad_norm: 0.6825547908336199, iteration: 36060
loss: 1.001831293106079,grad_norm: 0.9954967308917226, iteration: 36061
loss: 0.9757857322692871,grad_norm: 0.9298222827068375, iteration: 36062
loss: 1.0801494121551514,grad_norm: 0.9999996269660713, iteration: 36063
loss: 1.0288556814193726,grad_norm: 0.8669167238811607, iteration: 36064
loss: 1.0109052658081055,grad_norm: 0.9532269406473413, iteration: 36065
loss: 1.0249788761138916,grad_norm: 0.8811085619616538, iteration: 36066
loss: 1.0037034749984741,grad_norm: 0.9800351107559635, iteration: 36067
loss: 1.0131192207336426,grad_norm: 0.8470375838988508, iteration: 36068
loss: 1.0210124254226685,grad_norm: 0.9999990975881545, iteration: 36069
loss: 0.9899500608444214,grad_norm: 0.9999990446597705, iteration: 36070
loss: 1.0982416868209839,grad_norm: 0.9999995303823728, iteration: 36071
loss: 1.0459294319152832,grad_norm: 0.9999991390515769, iteration: 36072
loss: 1.0335034132003784,grad_norm: 0.9353658436384976, iteration: 36073
loss: 1.007686734199524,grad_norm: 0.9999994066673881, iteration: 36074
loss: 1.0177019834518433,grad_norm: 0.9999991633682052, iteration: 36075
loss: 0.9797526597976685,grad_norm: 0.8968430421371208, iteration: 36076
loss: 1.0321046113967896,grad_norm: 0.9999997899671909, iteration: 36077
loss: 0.9883770942687988,grad_norm: 0.8119437708910185, iteration: 36078
loss: 0.9895336031913757,grad_norm: 0.9486026573200679, iteration: 36079
loss: 0.9750518798828125,grad_norm: 0.9473989554106717, iteration: 36080
loss: 1.0017266273498535,grad_norm: 0.9999992278691514, iteration: 36081
loss: 1.000404953956604,grad_norm: 0.9999991648763021, iteration: 36082
loss: 1.0152076482772827,grad_norm: 0.9686256595286331, iteration: 36083
loss: 1.052535891532898,grad_norm: 0.8899679807494782, iteration: 36084
loss: 1.0097897052764893,grad_norm: 0.9927740042088129, iteration: 36085
loss: 0.9961851835250854,grad_norm: 0.8670814886252635, iteration: 36086
loss: 1.0188262462615967,grad_norm: 0.897319094568472, iteration: 36087
loss: 0.983894944190979,grad_norm: 0.9999990067591037, iteration: 36088
loss: 0.9984471797943115,grad_norm: 0.9999991080522354, iteration: 36089
loss: 1.0213769674301147,grad_norm: 0.9713225305098069, iteration: 36090
loss: 1.0052516460418701,grad_norm: 0.9999991614343617, iteration: 36091
loss: 1.0372341871261597,grad_norm: 0.9999990711140768, iteration: 36092
loss: 0.9942668676376343,grad_norm: 0.9999991099661096, iteration: 36093
loss: 0.9676980972290039,grad_norm: 0.8306351231051662, iteration: 36094
loss: 1.029363989830017,grad_norm: 0.9999992677859009, iteration: 36095
loss: 1.0392310619354248,grad_norm: 0.9999994791248454, iteration: 36096
loss: 0.9934189915657043,grad_norm: 0.9999992002437095, iteration: 36097
loss: 0.991783857345581,grad_norm: 0.8607958741760914, iteration: 36098
loss: 1.0580456256866455,grad_norm: 0.9388715898002137, iteration: 36099
loss: 0.9649271965026855,grad_norm: 0.9463071500006778, iteration: 36100
loss: 1.0472451448440552,grad_norm: 0.9999992958116709, iteration: 36101
loss: 1.0404425859451294,grad_norm: 0.9275127574733713, iteration: 36102
loss: 1.0199975967407227,grad_norm: 0.9999989927781341, iteration: 36103
loss: 0.9961624145507812,grad_norm: 0.9999990357188346, iteration: 36104
loss: 1.0253149271011353,grad_norm: 0.9016128185734411, iteration: 36105
loss: 1.0466283559799194,grad_norm: 0.9999990173369983, iteration: 36106
loss: 0.9655676484107971,grad_norm: 0.9999989579292078, iteration: 36107
loss: 1.015384316444397,grad_norm: 0.9950340481490011, iteration: 36108
loss: 0.9916083216667175,grad_norm: 0.9677812416025893, iteration: 36109
loss: 0.9811801314353943,grad_norm: 0.8890209218906575, iteration: 36110
loss: 0.9711499214172363,grad_norm: 0.9562613614447709, iteration: 36111
loss: 1.0035279989242554,grad_norm: 0.9776835400134088, iteration: 36112
loss: 0.9826354384422302,grad_norm: 0.8740790098696065, iteration: 36113
loss: 0.9939280152320862,grad_norm: 0.824496755827115, iteration: 36114
loss: 0.9887459874153137,grad_norm: 0.9051458984359065, iteration: 36115
loss: 0.9881089329719543,grad_norm: 0.9750572555274329, iteration: 36116
loss: 0.9851733446121216,grad_norm: 0.9999990440731944, iteration: 36117
loss: 0.9776896834373474,grad_norm: 0.9339816840941041, iteration: 36118
loss: 0.9960100650787354,grad_norm: 0.9642748934188906, iteration: 36119
loss: 0.9508873820304871,grad_norm: 0.9999991628500694, iteration: 36120
loss: 1.0188606977462769,grad_norm: 0.999999148665088, iteration: 36121
loss: 1.0599935054779053,grad_norm: 0.9999992430840612, iteration: 36122
loss: 1.0116486549377441,grad_norm: 0.9999990826922736, iteration: 36123
loss: 1.0231398344039917,grad_norm: 0.9999994779805361, iteration: 36124
loss: 1.0065038204193115,grad_norm: 0.8353774024350223, iteration: 36125
loss: 1.0321877002716064,grad_norm: 0.9999991640160715, iteration: 36126
loss: 1.0112545490264893,grad_norm: 0.9999990515536062, iteration: 36127
loss: 1.0214710235595703,grad_norm: 0.9963808597698317, iteration: 36128
loss: 0.9966500997543335,grad_norm: 0.9999989895996064, iteration: 36129
loss: 1.036178708076477,grad_norm: 0.8933332809245569, iteration: 36130
loss: 0.9770542979240417,grad_norm: 0.9999990439496818, iteration: 36131
loss: 1.0814191102981567,grad_norm: 0.999999811640247, iteration: 36132
loss: 1.0426760911941528,grad_norm: 0.9999990833721408, iteration: 36133
loss: 1.0020753145217896,grad_norm: 0.9794622685368969, iteration: 36134
loss: 1.0156720876693726,grad_norm: 0.9999992432598245, iteration: 36135
loss: 0.9903619885444641,grad_norm: 0.8880396935005891, iteration: 36136
loss: 1.0150177478790283,grad_norm: 0.999999363608758, iteration: 36137
loss: 0.9909998178482056,grad_norm: 0.9999996935259174, iteration: 36138
loss: 0.9696035385131836,grad_norm: 0.9923317986027264, iteration: 36139
loss: 0.9771413803100586,grad_norm: 0.8775235811188054, iteration: 36140
loss: 0.9731061458587646,grad_norm: 0.9442454363405155, iteration: 36141
loss: 1.005018949508667,grad_norm: 0.9842411379456806, iteration: 36142
loss: 1.0405120849609375,grad_norm: 0.999999210687257, iteration: 36143
loss: 1.0005818605422974,grad_norm: 0.8437914955620796, iteration: 36144
loss: 1.0450451374053955,grad_norm: 0.990683182679004, iteration: 36145
loss: 1.0011476278305054,grad_norm: 0.8926068792605708, iteration: 36146
loss: 0.9836128354072571,grad_norm: 0.9999990917740195, iteration: 36147
loss: 1.0073531866073608,grad_norm: 0.8590635564850108, iteration: 36148
loss: 1.053749918937683,grad_norm: 0.99999970867772, iteration: 36149
loss: 1.0327036380767822,grad_norm: 0.9999990522503563, iteration: 36150
loss: 0.9758030772209167,grad_norm: 0.8434237352435621, iteration: 36151
loss: 0.9923319816589355,grad_norm: 0.9999990910873331, iteration: 36152
loss: 0.9874077439308167,grad_norm: 0.999999187373917, iteration: 36153
loss: 1.013319730758667,grad_norm: 0.9433609525248613, iteration: 36154
loss: 1.0211236476898193,grad_norm: 0.9999991038327707, iteration: 36155
loss: 0.9779571890830994,grad_norm: 0.933910623727312, iteration: 36156
loss: 1.0215482711791992,grad_norm: 0.970252330806371, iteration: 36157
loss: 1.0074379444122314,grad_norm: 0.9737225687957333, iteration: 36158
loss: 0.9873913526535034,grad_norm: 0.9071684777055804, iteration: 36159
loss: 1.0246095657348633,grad_norm: 0.9999993035114191, iteration: 36160
loss: 0.9939620494842529,grad_norm: 0.9999992101769707, iteration: 36161
loss: 1.050473690032959,grad_norm: 0.9999990476373175, iteration: 36162
loss: 1.023261308670044,grad_norm: 0.8590077914875031, iteration: 36163
loss: 1.0751702785491943,grad_norm: 0.9999990587487805, iteration: 36164
loss: 0.9984743595123291,grad_norm: 0.8843866353691344, iteration: 36165
loss: 0.9964280724525452,grad_norm: 0.9999991082583152, iteration: 36166
loss: 1.046142816543579,grad_norm: 0.843270164482989, iteration: 36167
loss: 0.9899868965148926,grad_norm: 0.9845524785302263, iteration: 36168
loss: 0.9896581172943115,grad_norm: 0.9999990237472779, iteration: 36169
loss: 1.0121735334396362,grad_norm: 0.9481232079878282, iteration: 36170
loss: 1.0307563543319702,grad_norm: 0.9999991453153481, iteration: 36171
loss: 1.0106375217437744,grad_norm: 0.9600786600671031, iteration: 36172
loss: 1.0407742261886597,grad_norm: 0.9999991491632306, iteration: 36173
loss: 1.0326461791992188,grad_norm: 0.9972991760919834, iteration: 36174
loss: 0.9935488104820251,grad_norm: 0.992068048369971, iteration: 36175
loss: 0.9937264323234558,grad_norm: 0.9287009455430819, iteration: 36176
loss: 1.0065935850143433,grad_norm: 0.999999163579678, iteration: 36177
loss: 1.0008320808410645,grad_norm: 0.8780462591587617, iteration: 36178
loss: 1.0175104141235352,grad_norm: 0.9999991880881253, iteration: 36179
loss: 1.040023684501648,grad_norm: 0.9999992950951817, iteration: 36180
loss: 1.010022759437561,grad_norm: 0.839248005116527, iteration: 36181
loss: 1.0041497945785522,grad_norm: 0.9999990867218611, iteration: 36182
loss: 1.0038975477218628,grad_norm: 0.9999991958729619, iteration: 36183
loss: 1.0189000368118286,grad_norm: 0.9999991491637807, iteration: 36184
loss: 1.012596845626831,grad_norm: 0.9995179898132106, iteration: 36185
loss: 1.010645866394043,grad_norm: 0.9999991612982674, iteration: 36186
loss: 1.0041828155517578,grad_norm: 0.8999438949689865, iteration: 36187
loss: 1.0408481359481812,grad_norm: 0.9148089001084284, iteration: 36188
loss: 0.9904211759567261,grad_norm: 0.9999990381652866, iteration: 36189
loss: 0.999407172203064,grad_norm: 0.8488806797451173, iteration: 36190
loss: 0.9998756051063538,grad_norm: 0.9999996962916328, iteration: 36191
loss: 0.9708553552627563,grad_norm: 0.8618134874063681, iteration: 36192
loss: 1.0488539934158325,grad_norm: 0.9999992269160201, iteration: 36193
loss: 0.9590776562690735,grad_norm: 0.9999991724401884, iteration: 36194
loss: 1.020168662071228,grad_norm: 0.9999993154158562, iteration: 36195
loss: 0.9969437718391418,grad_norm: 0.9999989936431264, iteration: 36196
loss: 1.0119274854660034,grad_norm: 0.8273612950833082, iteration: 36197
loss: 0.948117196559906,grad_norm: 0.9999991162504773, iteration: 36198
loss: 1.0111405849456787,grad_norm: 0.9148627810859052, iteration: 36199
loss: 1.0261625051498413,grad_norm: 0.999999736292442, iteration: 36200
loss: 0.9990963935852051,grad_norm: 0.9999990575491785, iteration: 36201
loss: 1.018095850944519,grad_norm: 0.9999990301442991, iteration: 36202
loss: 0.9808940291404724,grad_norm: 0.8246919136368739, iteration: 36203
loss: 1.0090938806533813,grad_norm: 0.9999992019371523, iteration: 36204
loss: 0.9453153014183044,grad_norm: 0.9412639008230036, iteration: 36205
loss: 1.0329959392547607,grad_norm: 0.9086756969673656, iteration: 36206
loss: 0.983244001865387,grad_norm: 0.9252302619470143, iteration: 36207
loss: 0.9962893724441528,grad_norm: 0.9999991339452886, iteration: 36208
loss: 1.031104564666748,grad_norm: 0.9999993714347637, iteration: 36209
loss: 0.9912734627723694,grad_norm: 0.9251772444122849, iteration: 36210
loss: 1.062224268913269,grad_norm: 0.9999996267827524, iteration: 36211
loss: 0.9937490820884705,grad_norm: 0.99999960566049, iteration: 36212
loss: 0.9921680092811584,grad_norm: 0.9444521881154296, iteration: 36213
loss: 0.9627016186714172,grad_norm: 0.8693118409820063, iteration: 36214
loss: 1.0207723379135132,grad_norm: 0.9310047595969347, iteration: 36215
loss: 0.9995812773704529,grad_norm: 0.999999121746577, iteration: 36216
loss: 1.0363410711288452,grad_norm: 0.9999990534525871, iteration: 36217
loss: 1.0235683917999268,grad_norm: 0.918659122062144, iteration: 36218
loss: 1.0022690296173096,grad_norm: 0.9620743120588084, iteration: 36219
loss: 0.9882888793945312,grad_norm: 0.9406358030355418, iteration: 36220
loss: 1.0550787448883057,grad_norm: 0.9999995273705921, iteration: 36221
loss: 1.0090073347091675,grad_norm: 0.999999036950725, iteration: 36222
loss: 0.967431366443634,grad_norm: 0.9933440172568067, iteration: 36223
loss: 1.0164839029312134,grad_norm: 0.9927209095119216, iteration: 36224
loss: 1.0331796407699585,grad_norm: 0.9999993188948358, iteration: 36225
loss: 0.9798027873039246,grad_norm: 0.9999991641954427, iteration: 36226
loss: 0.9902957677841187,grad_norm: 0.9218085376205458, iteration: 36227
loss: 0.9876013994216919,grad_norm: 0.9476293828916087, iteration: 36228
loss: 1.0301884412765503,grad_norm: 0.9999991343451019, iteration: 36229
loss: 1.0340123176574707,grad_norm: 0.9999993101710154, iteration: 36230
loss: 0.9823799133300781,grad_norm: 0.9999991514774829, iteration: 36231
loss: 0.9844584465026855,grad_norm: 0.9493144082869037, iteration: 36232
loss: 1.0095473527908325,grad_norm: 0.9999992914013508, iteration: 36233
loss: 0.9959191679954529,grad_norm: 0.8416888557602313, iteration: 36234
loss: 1.0026313066482544,grad_norm: 0.999999182126489, iteration: 36235
loss: 1.0115106105804443,grad_norm: 0.9999991103953625, iteration: 36236
loss: 0.9788902401924133,grad_norm: 0.9999991395722788, iteration: 36237
loss: 1.0191516876220703,grad_norm: 0.9999990448267472, iteration: 36238
loss: 0.984007716178894,grad_norm: 0.8746707351690515, iteration: 36239
loss: 1.056044340133667,grad_norm: 0.9999995236658062, iteration: 36240
loss: 1.0035133361816406,grad_norm: 0.999998954657039, iteration: 36241
loss: 0.9904160499572754,grad_norm: 0.9237133457971989, iteration: 36242
loss: 1.033045768737793,grad_norm: 0.9999996512529916, iteration: 36243
loss: 1.0116465091705322,grad_norm: 0.9999990622356327, iteration: 36244
loss: 0.9911112189292908,grad_norm: 0.9620742798675941, iteration: 36245
loss: 0.9873504638671875,grad_norm: 0.9999992560291724, iteration: 36246
loss: 0.9999518394470215,grad_norm: 0.9999991362705344, iteration: 36247
loss: 1.0065786838531494,grad_norm: 0.999999223455358, iteration: 36248
loss: 1.00070059299469,grad_norm: 0.9387201276817533, iteration: 36249
loss: 1.0287355184555054,grad_norm: 0.9999991755174323, iteration: 36250
loss: 1.00985586643219,grad_norm: 0.9999991614751154, iteration: 36251
loss: 0.9933273196220398,grad_norm: 0.9534536698626177, iteration: 36252
loss: 1.0006771087646484,grad_norm: 0.9999990889162125, iteration: 36253
loss: 1.007752776145935,grad_norm: 0.8360030831465577, iteration: 36254
loss: 0.9798344373703003,grad_norm: 0.9843104671936073, iteration: 36255
loss: 0.9932572841644287,grad_norm: 0.9968656850401373, iteration: 36256
loss: 1.0026458501815796,grad_norm: 0.9999990324699625, iteration: 36257
loss: 1.0556167364120483,grad_norm: 0.9999992287446902, iteration: 36258
loss: 1.0025755167007446,grad_norm: 0.9747452347033057, iteration: 36259
loss: 1.0084986686706543,grad_norm: 0.9999995954789319, iteration: 36260
loss: 1.0033612251281738,grad_norm: 0.9999994056631228, iteration: 36261
loss: 1.0035077333450317,grad_norm: 0.9752953056383402, iteration: 36262
loss: 1.0007364749908447,grad_norm: 0.999999152809434, iteration: 36263
loss: 1.0053085088729858,grad_norm: 0.8853443217158846, iteration: 36264
loss: 0.9923379421234131,grad_norm: 0.9411491628075666, iteration: 36265
loss: 1.021891713142395,grad_norm: 0.9421513510130832, iteration: 36266
loss: 1.0113251209259033,grad_norm: 0.8855936380751507, iteration: 36267
loss: 1.0296863317489624,grad_norm: 0.9385390853545147, iteration: 36268
loss: 1.0189038515090942,grad_norm: 0.9999992341088343, iteration: 36269
loss: 0.9963329434394836,grad_norm: 0.9842683864527403, iteration: 36270
loss: 1.0393174886703491,grad_norm: 0.9999991436902489, iteration: 36271
loss: 0.9715901017189026,grad_norm: 0.8384418222215833, iteration: 36272
loss: 1.0331363677978516,grad_norm: 0.9999995497424149, iteration: 36273
loss: 1.033716082572937,grad_norm: 0.9999990389800085, iteration: 36274
loss: 1.0002378225326538,grad_norm: 0.9999990702923849, iteration: 36275
loss: 1.0057317018508911,grad_norm: 0.9827383179385791, iteration: 36276
loss: 1.0112884044647217,grad_norm: 0.9999991469598606, iteration: 36277
loss: 0.9708751440048218,grad_norm: 0.9380264936960705, iteration: 36278
loss: 1.0183742046356201,grad_norm: 0.9999992124534158, iteration: 36279
loss: 1.0174167156219482,grad_norm: 0.908686645601434, iteration: 36280
loss: 1.0156511068344116,grad_norm: 0.9999989572797603, iteration: 36281
loss: 1.0348454713821411,grad_norm: 0.9999998959458538, iteration: 36282
loss: 0.9652608633041382,grad_norm: 0.9999991115925867, iteration: 36283
loss: 1.023724913597107,grad_norm: 0.8808129382132732, iteration: 36284
loss: 1.03914213180542,grad_norm: 0.9999990984466979, iteration: 36285
loss: 0.9922041296958923,grad_norm: 0.8073114530604164, iteration: 36286
loss: 0.990922212600708,grad_norm: 0.9241822483837917, iteration: 36287
loss: 1.0496746301651,grad_norm: 0.8618182275856712, iteration: 36288
loss: 1.042412519454956,grad_norm: 0.9999992402851988, iteration: 36289
loss: 0.9971165657043457,grad_norm: 0.8638412533592994, iteration: 36290
loss: 0.9990523457527161,grad_norm: 0.9330721712927006, iteration: 36291
loss: 1.0025447607040405,grad_norm: 0.8805239427811509, iteration: 36292
loss: 1.0103089809417725,grad_norm: 0.9999991043436252, iteration: 36293
loss: 1.0139179229736328,grad_norm: 0.866801236633752, iteration: 36294
loss: 0.9877377152442932,grad_norm: 0.9133623116263088, iteration: 36295
loss: 1.0106879472732544,grad_norm: 0.8292873662587938, iteration: 36296
loss: 1.0249301195144653,grad_norm: 0.9677854175726164, iteration: 36297
loss: 1.0083340406417847,grad_norm: 0.8870927990377669, iteration: 36298
loss: 0.9947373867034912,grad_norm: 0.9999991047882757, iteration: 36299
loss: 1.0524342060089111,grad_norm: 0.9717762161026385, iteration: 36300
loss: 0.9905067086219788,grad_norm: 0.9999991261906255, iteration: 36301
loss: 0.9574021697044373,grad_norm: 0.9999990439543247, iteration: 36302
loss: 0.9971242547035217,grad_norm: 0.9494771784787096, iteration: 36303
loss: 1.0274816751480103,grad_norm: 0.9999991570612033, iteration: 36304
loss: 1.025089979171753,grad_norm: 0.9287766376429328, iteration: 36305
loss: 1.0289196968078613,grad_norm: 0.9736745692567658, iteration: 36306
loss: 0.9950737357139587,grad_norm: 0.9943546323973955, iteration: 36307
loss: 1.0153398513793945,grad_norm: 0.857841151436556, iteration: 36308
loss: 0.9819714426994324,grad_norm: 0.977659611070037, iteration: 36309
loss: 1.0039291381835938,grad_norm: 0.9999993325610324, iteration: 36310
loss: 1.0183346271514893,grad_norm: 0.9841845852594862, iteration: 36311
loss: 1.0170429944992065,grad_norm: 0.9999991448093587, iteration: 36312
loss: 0.9982919692993164,grad_norm: 0.8956021704654412, iteration: 36313
loss: 0.9790186285972595,grad_norm: 0.9016600204622355, iteration: 36314
loss: 1.048511266708374,grad_norm: 0.873115671462296, iteration: 36315
loss: 0.9601083397865295,grad_norm: 0.8079303129246722, iteration: 36316
loss: 0.9914266467094421,grad_norm: 0.8089034032040695, iteration: 36317
loss: 1.0494372844696045,grad_norm: 0.7459522801234784, iteration: 36318
loss: 1.026023268699646,grad_norm: 0.9999992018329283, iteration: 36319
loss: 0.9994457960128784,grad_norm: 0.7505707746320219, iteration: 36320
loss: 0.9722843766212463,grad_norm: 0.8279773982106392, iteration: 36321
loss: 0.9960381388664246,grad_norm: 0.999998985088819, iteration: 36322
loss: 1.0107476711273193,grad_norm: 0.8775422967194024, iteration: 36323
loss: 0.9787086248397827,grad_norm: 0.9999990746658634, iteration: 36324
loss: 0.9850463271141052,grad_norm: 0.9180332896598917, iteration: 36325
loss: 1.0488659143447876,grad_norm: 0.9999989783224258, iteration: 36326
loss: 0.9984205365180969,grad_norm: 0.9129076591664139, iteration: 36327
loss: 1.0321056842803955,grad_norm: 0.9008918017481491, iteration: 36328
loss: 1.0396597385406494,grad_norm: 0.8560531484093991, iteration: 36329
loss: 0.9651545286178589,grad_norm: 0.87459676970651, iteration: 36330
loss: 1.0290888547897339,grad_norm: 0.9999989435725086, iteration: 36331
loss: 1.036527156829834,grad_norm: 0.999999041673429, iteration: 36332
loss: 1.0019848346710205,grad_norm: 0.9999989838211158, iteration: 36333
loss: 0.9859176278114319,grad_norm: 0.9827221480793455, iteration: 36334
loss: 1.0007997751235962,grad_norm: 0.9758220968950209, iteration: 36335
loss: 1.0316075086593628,grad_norm: 0.8948874849774264, iteration: 36336
loss: 1.0091594457626343,grad_norm: 0.9999990494956099, iteration: 36337
loss: 1.0012633800506592,grad_norm: 0.9819636754218058, iteration: 36338
loss: 1.0037531852722168,grad_norm: 0.992739867695367, iteration: 36339
loss: 1.0856000185012817,grad_norm: 0.9999998460828404, iteration: 36340
loss: 1.0334582328796387,grad_norm: 0.9581638253831137, iteration: 36341
loss: 1.0101147890090942,grad_norm: 0.9491281060680863, iteration: 36342
loss: 1.0241646766662598,grad_norm: 0.9612698966251576, iteration: 36343
loss: 1.0110681056976318,grad_norm: 0.8844679797714877, iteration: 36344
loss: 1.0049796104431152,grad_norm: 0.8293017788969611, iteration: 36345
loss: 1.030504584312439,grad_norm: 0.9254684541929836, iteration: 36346
loss: 1.0106514692306519,grad_norm: 0.9163648640890376, iteration: 36347
loss: 0.9887678623199463,grad_norm: 0.9999991170607916, iteration: 36348
loss: 1.0243537425994873,grad_norm: 0.9609102598644362, iteration: 36349
loss: 0.9829949736595154,grad_norm: 0.8501318482139865, iteration: 36350
loss: 1.0139211416244507,grad_norm: 0.9893287615841432, iteration: 36351
loss: 1.083640456199646,grad_norm: 0.9999993651243073, iteration: 36352
loss: 1.0035655498504639,grad_norm: 0.8917187865983217, iteration: 36353
loss: 1.018072247505188,grad_norm: 0.9999991241208026, iteration: 36354
loss: 1.0377930402755737,grad_norm: 0.9999991081629336, iteration: 36355
loss: 1.0417447090148926,grad_norm: 0.918560311617958, iteration: 36356
loss: 1.0132657289505005,grad_norm: 0.9787381056022924, iteration: 36357
loss: 0.9883973598480225,grad_norm: 0.9067741960603327, iteration: 36358
loss: 1.0040768384933472,grad_norm: 0.9999990429110069, iteration: 36359
loss: 0.9944227337837219,grad_norm: 0.9371484045239192, iteration: 36360
loss: 1.016444206237793,grad_norm: 0.9971853730128495, iteration: 36361
loss: 0.9587522149085999,grad_norm: 0.9999991503327734, iteration: 36362
loss: 0.9970356822013855,grad_norm: 0.918646736578033, iteration: 36363
loss: 0.9642038941383362,grad_norm: 0.9999991944464967, iteration: 36364
loss: 1.011582851409912,grad_norm: 0.9124894137386951, iteration: 36365
loss: 1.0300610065460205,grad_norm: 0.85066001563903, iteration: 36366
loss: 1.017660140991211,grad_norm: 0.8318192788853417, iteration: 36367
loss: 1.0136260986328125,grad_norm: 0.999999669848684, iteration: 36368
loss: 1.012449026107788,grad_norm: 0.9162099881854628, iteration: 36369
loss: 1.005778193473816,grad_norm: 0.7955021034495041, iteration: 36370
loss: 1.0108369588851929,grad_norm: 0.9580084314425943, iteration: 36371
loss: 0.9862983226776123,grad_norm: 0.9938772163167344, iteration: 36372
loss: 0.9893459677696228,grad_norm: 0.9711772807518239, iteration: 36373
loss: 1.0304046869277954,grad_norm: 0.9999995534391609, iteration: 36374
loss: 0.9899537563323975,grad_norm: 0.9353170775492229, iteration: 36375
loss: 1.065651774406433,grad_norm: 0.999999105449901, iteration: 36376
loss: 1.0043572187423706,grad_norm: 0.9886049814192381, iteration: 36377
loss: 1.020536184310913,grad_norm: 0.88099656309522, iteration: 36378
loss: 1.012404203414917,grad_norm: 0.8348751909725389, iteration: 36379
loss: 1.0048424005508423,grad_norm: 0.8575568966211342, iteration: 36380
loss: 0.980096697807312,grad_norm: 0.8150573246632016, iteration: 36381
loss: 1.0180076360702515,grad_norm: 0.7764212819417813, iteration: 36382
loss: 0.9896116852760315,grad_norm: 0.8092810967625023, iteration: 36383
loss: 1.0097676515579224,grad_norm: 0.996150661537857, iteration: 36384
loss: 1.0313674211502075,grad_norm: 0.836864200640565, iteration: 36385
loss: 1.0199978351593018,grad_norm: 0.999999134461635, iteration: 36386
loss: 1.029287576675415,grad_norm: 0.9999991218183469, iteration: 36387
loss: 0.9817107915878296,grad_norm: 0.9074833033235533, iteration: 36388
loss: 0.9739909768104553,grad_norm: 0.9490703801014088, iteration: 36389
loss: 1.001164436340332,grad_norm: 0.9146211561142601, iteration: 36390
loss: 1.010556936264038,grad_norm: 0.9884507532162137, iteration: 36391
loss: 1.0196187496185303,grad_norm: 0.9459880050018594, iteration: 36392
loss: 0.9882103800773621,grad_norm: 0.914312236248205, iteration: 36393
loss: 1.0244982242584229,grad_norm: 0.9999991548371618, iteration: 36394
loss: 0.9831670522689819,grad_norm: 0.815676359321387, iteration: 36395
loss: 1.0315300226211548,grad_norm: 0.9999994538804994, iteration: 36396
loss: 1.0170371532440186,grad_norm: 0.8636304013562204, iteration: 36397
loss: 0.9731991291046143,grad_norm: 0.8350040236814015, iteration: 36398
loss: 1.02919340133667,grad_norm: 0.9999990930221121, iteration: 36399
loss: 1.0478142499923706,grad_norm: 0.9572622923597476, iteration: 36400
loss: 0.9528588652610779,grad_norm: 0.9999990581751733, iteration: 36401
loss: 1.0021058320999146,grad_norm: 0.9886731083389269, iteration: 36402
loss: 0.9965347051620483,grad_norm: 0.9251391431153325, iteration: 36403
loss: 0.993108332157135,grad_norm: 0.9123325262077483, iteration: 36404
loss: 1.012940764427185,grad_norm: 0.8792253164338399, iteration: 36405
loss: 1.0230960845947266,grad_norm: 0.9999990324850272, iteration: 36406
loss: 1.02695631980896,grad_norm: 0.9999989781576564, iteration: 36407
loss: 0.9933618903160095,grad_norm: 0.9999992327004656, iteration: 36408
loss: 1.0196996927261353,grad_norm: 0.9999993039786071, iteration: 36409
loss: 1.0114054679870605,grad_norm: 0.9568551869777719, iteration: 36410
loss: 1.0220955610275269,grad_norm: 0.9999991135296448, iteration: 36411
loss: 1.0237327814102173,grad_norm: 0.999999183736315, iteration: 36412
loss: 1.021142601966858,grad_norm: 0.999999156833755, iteration: 36413
loss: 0.9894463419914246,grad_norm: 0.7970289055888156, iteration: 36414
loss: 0.9840150475502014,grad_norm: 0.9999990407900247, iteration: 36415
loss: 0.9765069484710693,grad_norm: 0.8269751393111026, iteration: 36416
loss: 0.9739156365394592,grad_norm: 0.9398958015791234, iteration: 36417
loss: 1.0503523349761963,grad_norm: 0.9999992284443292, iteration: 36418
loss: 1.015709400177002,grad_norm: 0.9999990894651045, iteration: 36419
loss: 1.0507878065109253,grad_norm: 0.9999991963761317, iteration: 36420
loss: 0.9838981628417969,grad_norm: 0.7812766284952123, iteration: 36421
loss: 1.0143147706985474,grad_norm: 0.790056589725053, iteration: 36422
loss: 1.0046268701553345,grad_norm: 0.9999991292137452, iteration: 36423
loss: 1.01918363571167,grad_norm: 0.9999994442542753, iteration: 36424
loss: 1.025154948234558,grad_norm: 0.9133810411021888, iteration: 36425
loss: 0.9900521039962769,grad_norm: 0.9999991527829993, iteration: 36426
loss: 1.0323036909103394,grad_norm: 0.817567593435388, iteration: 36427
loss: 1.0092682838439941,grad_norm: 0.9163865681971455, iteration: 36428
loss: 1.0216819047927856,grad_norm: 0.845974841416161, iteration: 36429
loss: 1.01426100730896,grad_norm: 0.9035553900853913, iteration: 36430
loss: 1.0036629438400269,grad_norm: 0.9999991221646657, iteration: 36431
loss: 1.0114935636520386,grad_norm: 0.9715218060770551, iteration: 36432
loss: 1.0095281600952148,grad_norm: 0.9724179425086542, iteration: 36433
loss: 0.987241804599762,grad_norm: 0.9999992399833284, iteration: 36434
loss: 1.0001503229141235,grad_norm: 0.893903934048732, iteration: 36435
loss: 1.0325080156326294,grad_norm: 0.9157168033735177, iteration: 36436
loss: 1.0144093036651611,grad_norm: 0.8444916384769621, iteration: 36437
loss: 1.0377082824707031,grad_norm: 0.9999997076055777, iteration: 36438
loss: 1.0300743579864502,grad_norm: 0.9999990627664135, iteration: 36439
loss: 0.9818085432052612,grad_norm: 0.8492037617044197, iteration: 36440
loss: 1.0336076021194458,grad_norm: 0.9543872168276076, iteration: 36441
loss: 1.011175513267517,grad_norm: 0.9999992829572198, iteration: 36442
loss: 0.9783479571342468,grad_norm: 0.9999989844256147, iteration: 36443
loss: 0.9742295742034912,grad_norm: 0.9089287909547985, iteration: 36444
loss: 1.0306100845336914,grad_norm: 0.9999991181541728, iteration: 36445
loss: 0.9757689237594604,grad_norm: 0.8941508236903929, iteration: 36446
loss: 0.964178204536438,grad_norm: 0.9999992065333289, iteration: 36447
loss: 1.0087366104125977,grad_norm: 0.9333665347474198, iteration: 36448
loss: 0.9937936663627625,grad_norm: 0.9999990910156344, iteration: 36449
loss: 1.0195999145507812,grad_norm: 0.9999991152657095, iteration: 36450
loss: 1.0343316793441772,grad_norm: 0.9999992598597163, iteration: 36451
loss: 1.0110305547714233,grad_norm: 0.9999989759724169, iteration: 36452
loss: 1.053560733795166,grad_norm: 0.9999995175207481, iteration: 36453
loss: 0.9659078121185303,grad_norm: 0.947774471207579, iteration: 36454
loss: 1.0090868473052979,grad_norm: 0.9186524955620086, iteration: 36455
loss: 0.9830840826034546,grad_norm: 0.9999991068539597, iteration: 36456
loss: 0.9978348612785339,grad_norm: 0.9878017881544527, iteration: 36457
loss: 1.0045316219329834,grad_norm: 0.9999988866214695, iteration: 36458
loss: 1.1501506567001343,grad_norm: 0.9999996000821287, iteration: 36459
loss: 1.0016716718673706,grad_norm: 0.826618115941331, iteration: 36460
loss: 1.006108283996582,grad_norm: 0.8265905547665757, iteration: 36461
loss: 0.95753014087677,grad_norm: 0.9999990176572511, iteration: 36462
loss: 0.9950255751609802,grad_norm: 0.9806187419410423, iteration: 36463
loss: 1.0014214515686035,grad_norm: 0.9999997013983799, iteration: 36464
loss: 1.0311890840530396,grad_norm: 0.9784018647934347, iteration: 36465
loss: 1.0303306579589844,grad_norm: 0.9999989867784331, iteration: 36466
loss: 1.08841872215271,grad_norm: 0.9999999325686749, iteration: 36467
loss: 1.0037004947662354,grad_norm: 0.880650791034626, iteration: 36468
loss: 0.9741597175598145,grad_norm: 0.9903385897034553, iteration: 36469
loss: 0.987889289855957,grad_norm: 0.9999991450267306, iteration: 36470
loss: 1.0009300708770752,grad_norm: 0.9999991318801368, iteration: 36471
loss: 0.9700065851211548,grad_norm: 0.9999991025036951, iteration: 36472
loss: 1.0005141496658325,grad_norm: 0.9999991647683726, iteration: 36473
loss: 1.001842975616455,grad_norm: 0.9999989400274358, iteration: 36474
loss: 1.0037262439727783,grad_norm: 0.8198472456264254, iteration: 36475
loss: 1.0062856674194336,grad_norm: 0.9999992027425325, iteration: 36476
loss: 1.0722829103469849,grad_norm: 0.9999998344642508, iteration: 36477
loss: 0.9884121417999268,grad_norm: 0.9999993584234361, iteration: 36478
loss: 0.9877362251281738,grad_norm: 0.8271079305023992, iteration: 36479
loss: 1.019998550415039,grad_norm: 0.8778403238490821, iteration: 36480
loss: 1.0366706848144531,grad_norm: 0.9432409923954885, iteration: 36481
loss: 1.0063939094543457,grad_norm: 0.8719657984174295, iteration: 36482
loss: 0.9769850373268127,grad_norm: 0.9690063533125876, iteration: 36483
loss: 0.9993692636489868,grad_norm: 0.9381938825719867, iteration: 36484
loss: 1.0294334888458252,grad_norm: 0.8935856755140094, iteration: 36485
loss: 1.009453296661377,grad_norm: 0.9999991066724977, iteration: 36486
loss: 0.9841675162315369,grad_norm: 0.9157256254010714, iteration: 36487
loss: 0.9925991892814636,grad_norm: 0.9559411375933303, iteration: 36488
loss: 1.0298094749450684,grad_norm: 0.8324012685231127, iteration: 36489
loss: 0.9678829908370972,grad_norm: 0.8234549987547894, iteration: 36490
loss: 0.9885044097900391,grad_norm: 0.9999992086882111, iteration: 36491
loss: 1.011323094367981,grad_norm: 0.9999991114384376, iteration: 36492
loss: 0.9867789149284363,grad_norm: 0.999999059237902, iteration: 36493
loss: 1.0555418729782104,grad_norm: 0.999999463178099, iteration: 36494
loss: 0.9936007261276245,grad_norm: 0.9999990245041505, iteration: 36495
loss: 0.9572880268096924,grad_norm: 0.908131314011403, iteration: 36496
loss: 1.012373685836792,grad_norm: 0.9999992384676497, iteration: 36497
loss: 1.0191291570663452,grad_norm: 0.999999773118518, iteration: 36498
loss: 1.0229634046554565,grad_norm: 0.9999993826462961, iteration: 36499
loss: 1.0050249099731445,grad_norm: 0.9999990831717943, iteration: 36500
loss: 0.989193320274353,grad_norm: 0.8391535375516416, iteration: 36501
loss: 1.0254782438278198,grad_norm: 0.7914295444100502, iteration: 36502
loss: 0.9444952607154846,grad_norm: 0.8562536158869958, iteration: 36503
loss: 1.0094695091247559,grad_norm: 0.9300332945246691, iteration: 36504
loss: 1.0361237525939941,grad_norm: 0.9999993927457834, iteration: 36505
loss: 1.0356121063232422,grad_norm: 0.9017965866471758, iteration: 36506
loss: 0.984559178352356,grad_norm: 0.9976866382423774, iteration: 36507
loss: 1.0257947444915771,grad_norm: 0.9999994134533954, iteration: 36508
loss: 0.9889246225357056,grad_norm: 0.8789129592721551, iteration: 36509
loss: 0.9600157141685486,grad_norm: 0.9999991257744151, iteration: 36510
loss: 0.9888760447502136,grad_norm: 0.9999991005021611, iteration: 36511
loss: 0.9893761277198792,grad_norm: 0.9999990067109289, iteration: 36512
loss: 1.0366249084472656,grad_norm: 0.8039306665671003, iteration: 36513
loss: 0.9930707216262817,grad_norm: 0.8215030726300863, iteration: 36514
loss: 0.9957903623580933,grad_norm: 0.9999992690924345, iteration: 36515
loss: 1.0070178508758545,grad_norm: 0.9999990949081639, iteration: 36516
loss: 1.0450679063796997,grad_norm: 0.9999989940993917, iteration: 36517
loss: 1.0249910354614258,grad_norm: 0.8571945933583048, iteration: 36518
loss: 1.011164665222168,grad_norm: 0.999999643873838, iteration: 36519
loss: 0.9526829123497009,grad_norm: 0.999999082612347, iteration: 36520
loss: 1.0412423610687256,grad_norm: 0.8688461697752767, iteration: 36521
loss: 1.0146716833114624,grad_norm: 0.9999991234043086, iteration: 36522
loss: 0.9722961783409119,grad_norm: 0.9999992476463233, iteration: 36523
loss: 1.010377287864685,grad_norm: 0.9091923806667656, iteration: 36524
loss: 0.9929563999176025,grad_norm: 0.9999990798970396, iteration: 36525
loss: 0.979977011680603,grad_norm: 0.8393134057963246, iteration: 36526
loss: 0.9940339922904968,grad_norm: 0.9673054005408158, iteration: 36527
loss: 0.9993767738342285,grad_norm: 0.9693774975195891, iteration: 36528
loss: 0.9717485904693604,grad_norm: 0.9959176564879686, iteration: 36529
loss: 1.0156768560409546,grad_norm: 0.9999993893166279, iteration: 36530
loss: 0.9776723980903625,grad_norm: 0.9999992283614856, iteration: 36531
loss: 1.0656222105026245,grad_norm: 0.9999991316835521, iteration: 36532
loss: 0.9872764348983765,grad_norm: 0.8163012410617149, iteration: 36533
loss: 1.0340917110443115,grad_norm: 0.9999992022868138, iteration: 36534
loss: 0.996300458908081,grad_norm: 0.9999991405806787, iteration: 36535
loss: 0.9781772494316101,grad_norm: 0.9999990871536515, iteration: 36536
loss: 1.017086386680603,grad_norm: 0.9999992523024099, iteration: 36537
loss: 1.020247459411621,grad_norm: 0.9999992812401868, iteration: 36538
loss: 0.9866393804550171,grad_norm: 0.9049967324667388, iteration: 36539
loss: 0.9879416227340698,grad_norm: 0.999999051543724, iteration: 36540
loss: 1.0275123119354248,grad_norm: 0.999999248180255, iteration: 36541
loss: 1.05980384349823,grad_norm: 0.9999991980954317, iteration: 36542
loss: 1.0463627576828003,grad_norm: 0.9999990575255727, iteration: 36543
loss: 1.0507771968841553,grad_norm: 0.9999991475583251, iteration: 36544
loss: 0.9560441374778748,grad_norm: 0.873393273888985, iteration: 36545
loss: 0.9980452060699463,grad_norm: 0.9999990837725287, iteration: 36546
loss: 1.0037410259246826,grad_norm: 0.9510118204652841, iteration: 36547
loss: 0.9702151417732239,grad_norm: 0.9999989787874127, iteration: 36548
loss: 1.0062536001205444,grad_norm: 0.9696323547784647, iteration: 36549
loss: 1.0245862007141113,grad_norm: 0.9999991276319407, iteration: 36550
loss: 0.9911171793937683,grad_norm: 0.9588042693396964, iteration: 36551
loss: 1.0091108083724976,grad_norm: 0.849141633511188, iteration: 36552
loss: 0.9681993722915649,grad_norm: 0.827709950389415, iteration: 36553
loss: 1.0377068519592285,grad_norm: 0.9059274372950524, iteration: 36554
loss: 1.0522464513778687,grad_norm: 0.999999148130972, iteration: 36555
loss: 0.9899755716323853,grad_norm: 0.9999993334412973, iteration: 36556
loss: 1.0138839483261108,grad_norm: 0.8597490412148745, iteration: 36557
loss: 0.9886264801025391,grad_norm: 0.9783030315514145, iteration: 36558
loss: 0.9711212515830994,grad_norm: 0.9128925302431112, iteration: 36559
loss: 0.993818998336792,grad_norm: 0.9999990955796147, iteration: 36560
loss: 1.0157413482666016,grad_norm: 0.8977257364372513, iteration: 36561
loss: 0.9471539258956909,grad_norm: 0.9999991157565589, iteration: 36562
loss: 1.0060685873031616,grad_norm: 0.8811640658647222, iteration: 36563
loss: 0.9733577370643616,grad_norm: 0.8477789358389454, iteration: 36564
loss: 1.023833155632019,grad_norm: 0.9999992851032329, iteration: 36565
loss: 1.0514025688171387,grad_norm: 0.999999188251963, iteration: 36566
loss: 0.987181544303894,grad_norm: 0.9999990366367884, iteration: 36567
loss: 0.9874271750450134,grad_norm: 0.9187065891937438, iteration: 36568
loss: 0.9642647504806519,grad_norm: 0.9999993292785454, iteration: 36569
loss: 0.9981510639190674,grad_norm: 0.9036328886295099, iteration: 36570
loss: 0.9776309132575989,grad_norm: 0.9467004140264137, iteration: 36571
loss: 1.0091164112091064,grad_norm: 0.9196583678031428, iteration: 36572
loss: 0.9969744682312012,grad_norm: 0.9335911699481264, iteration: 36573
loss: 1.0150399208068848,grad_norm: 0.9069143765065596, iteration: 36574
loss: 0.975289523601532,grad_norm: 0.9999989975474676, iteration: 36575
loss: 1.024773359298706,grad_norm: 0.9105441731413407, iteration: 36576
loss: 0.9697288870811462,grad_norm: 0.9999996347409026, iteration: 36577
loss: 0.9934186935424805,grad_norm: 0.9045249071959434, iteration: 36578
loss: 1.0190330743789673,grad_norm: 0.9999991788582778, iteration: 36579
loss: 0.9951155781745911,grad_norm: 0.999999246936813, iteration: 36580
loss: 1.0323090553283691,grad_norm: 0.9759776877798456, iteration: 36581
loss: 1.0074111223220825,grad_norm: 0.9999992687864503, iteration: 36582
loss: 1.008443832397461,grad_norm: 0.942525069881412, iteration: 36583
loss: 1.0394396781921387,grad_norm: 0.9999992067344377, iteration: 36584
loss: 0.9813997149467468,grad_norm: 0.9999991769965839, iteration: 36585
loss: 0.996271014213562,grad_norm: 0.9716061682671152, iteration: 36586
loss: 1.0043894052505493,grad_norm: 0.8464871490407977, iteration: 36587
loss: 0.9800968766212463,grad_norm: 0.8883510929244751, iteration: 36588
loss: 1.0340536832809448,grad_norm: 0.9999991489820955, iteration: 36589
loss: 1.0018547773361206,grad_norm: 0.9999991027578289, iteration: 36590
loss: 1.0302666425704956,grad_norm: 0.9520969933894757, iteration: 36591
loss: 0.9492418169975281,grad_norm: 0.8460215334055624, iteration: 36592
loss: 0.9950182437896729,grad_norm: 0.8776560646364941, iteration: 36593
loss: 1.0176267623901367,grad_norm: 0.8493970843273451, iteration: 36594
loss: 0.9950702786445618,grad_norm: 0.9999991981331787, iteration: 36595
loss: 1.0507900714874268,grad_norm: 0.9999994052054508, iteration: 36596
loss: 1.0520660877227783,grad_norm: 0.9999995997409762, iteration: 36597
loss: 1.0325024127960205,grad_norm: 0.8325126752523234, iteration: 36598
loss: 1.028144121170044,grad_norm: 0.9914358452272869, iteration: 36599
loss: 1.0193015336990356,grad_norm: 0.9999996497220512, iteration: 36600
loss: 1.0215448141098022,grad_norm: 0.8707638859837696, iteration: 36601
loss: 0.9934963583946228,grad_norm: 0.7300107467328772, iteration: 36602
loss: 0.9981927871704102,grad_norm: 0.9999997512621384, iteration: 36603
loss: 1.018688678741455,grad_norm: 0.8673668412985284, iteration: 36604
loss: 1.0055644512176514,grad_norm: 0.9999998813669159, iteration: 36605
loss: 0.9782087206840515,grad_norm: 0.9002136667236418, iteration: 36606
loss: 0.962756335735321,grad_norm: 0.9999990713880226, iteration: 36607
loss: 0.9997170567512512,grad_norm: 0.8899026613195313, iteration: 36608
loss: 1.015633225440979,grad_norm: 0.9469498282492117, iteration: 36609
loss: 1.0059479475021362,grad_norm: 0.9999989861056828, iteration: 36610
loss: 1.012627124786377,grad_norm: 0.826157715898607, iteration: 36611
loss: 0.9964728951454163,grad_norm: 0.9999994254599447, iteration: 36612
loss: 1.0173858404159546,grad_norm: 0.9999991934874063, iteration: 36613
loss: 0.9900137186050415,grad_norm: 0.9584488141565323, iteration: 36614
loss: 1.0127863883972168,grad_norm: 0.9973326026657866, iteration: 36615
loss: 1.031957745552063,grad_norm: 0.9381817993130493, iteration: 36616
loss: 1.0102698802947998,grad_norm: 0.9176269876913337, iteration: 36617
loss: 1.014017939567566,grad_norm: 0.9999996778346386, iteration: 36618
loss: 1.0548229217529297,grad_norm: 0.9999991502115503, iteration: 36619
loss: 1.0048749446868896,grad_norm: 0.9999989105858054, iteration: 36620
loss: 1.0159878730773926,grad_norm: 0.9999990521271062, iteration: 36621
loss: 1.0177295207977295,grad_norm: 0.9563593669043972, iteration: 36622
loss: 0.9980180859565735,grad_norm: 0.8170837719231606, iteration: 36623
loss: 1.0203241109848022,grad_norm: 0.9999990783089969, iteration: 36624
loss: 0.979583740234375,grad_norm: 0.8818595220728619, iteration: 36625
loss: 0.9981198906898499,grad_norm: 0.9999991837740483, iteration: 36626
loss: 1.0325605869293213,grad_norm: 0.947673975362043, iteration: 36627
loss: 1.0131268501281738,grad_norm: 0.9775662386840467, iteration: 36628
loss: 0.9882485270500183,grad_norm: 0.8029879339879682, iteration: 36629
loss: 1.0284088850021362,grad_norm: 0.9999990171046937, iteration: 36630
loss: 1.0172199010849,grad_norm: 0.9458178030775755, iteration: 36631
loss: 0.9833352565765381,grad_norm: 0.9999991490922537, iteration: 36632
loss: 0.9963683485984802,grad_norm: 0.855730996642089, iteration: 36633
loss: 1.0059638023376465,grad_norm: 0.9924969020358563, iteration: 36634
loss: 1.0412896871566772,grad_norm: 0.9999990553530881, iteration: 36635
loss: 1.0024240016937256,grad_norm: 0.9373502507867116, iteration: 36636
loss: 1.0230737924575806,grad_norm: 0.9999989923418797, iteration: 36637
loss: 0.9921360611915588,grad_norm: 0.8565906427953174, iteration: 36638
loss: 0.95295649766922,grad_norm: 0.8863948943835677, iteration: 36639
loss: 1.0358525514602661,grad_norm: 0.9999997473792644, iteration: 36640
loss: 1.0775599479675293,grad_norm: 0.9999995990372372, iteration: 36641
loss: 1.0080875158309937,grad_norm: 0.9999991749017874, iteration: 36642
loss: 0.9838657975196838,grad_norm: 0.9523822692511327, iteration: 36643
loss: 1.098689079284668,grad_norm: 0.9999997118154185, iteration: 36644
loss: 0.9871125817298889,grad_norm: 0.7574792628214341, iteration: 36645
loss: 1.0110206604003906,grad_norm: 0.9999989967276293, iteration: 36646
loss: 1.0419986248016357,grad_norm: 0.9999990700636978, iteration: 36647
loss: 1.0193883180618286,grad_norm: 0.8203652906777309, iteration: 36648
loss: 1.030221700668335,grad_norm: 0.9999991341508101, iteration: 36649
loss: 0.9801620244979858,grad_norm: 0.8659570118390282, iteration: 36650
loss: 1.0089004039764404,grad_norm: 0.9548759125608525, iteration: 36651
loss: 0.9945302605628967,grad_norm: 0.8017609122626794, iteration: 36652
loss: 1.0162944793701172,grad_norm: 0.9928045102556543, iteration: 36653
loss: 0.9674557447433472,grad_norm: 0.9325781142224068, iteration: 36654
loss: 0.9890919923782349,grad_norm: 0.9999991539523708, iteration: 36655
loss: 1.0098638534545898,grad_norm: 0.9999990459986411, iteration: 36656
loss: 0.9496050477027893,grad_norm: 0.9999991248536262, iteration: 36657
loss: 0.9711967706680298,grad_norm: 0.8950568927872357, iteration: 36658
loss: 0.9968530535697937,grad_norm: 0.890886797340086, iteration: 36659
loss: 1.048086166381836,grad_norm: 0.999999307701863, iteration: 36660
loss: 1.0126928091049194,grad_norm: 0.9999991245255615, iteration: 36661
loss: 0.9859814047813416,grad_norm: 0.9999990592789748, iteration: 36662
loss: 1.0374921560287476,grad_norm: 0.9590448310406922, iteration: 36663
loss: 1.0242459774017334,grad_norm: 0.9999991135615992, iteration: 36664
loss: 1.006808876991272,grad_norm: 0.9999990444859803, iteration: 36665
loss: 0.9930192828178406,grad_norm: 0.8013836160689743, iteration: 36666
loss: 0.9976209402084351,grad_norm: 0.9999991679652954, iteration: 36667
loss: 1.0107799768447876,grad_norm: 0.9999990319993283, iteration: 36668
loss: 0.9550811648368835,grad_norm: 0.9362398250098257, iteration: 36669
loss: 1.0142720937728882,grad_norm: 0.9999989288279902, iteration: 36670
loss: 0.997971773147583,grad_norm: 0.9999990952579081, iteration: 36671
loss: 1.0070189237594604,grad_norm: 0.859700383825405, iteration: 36672
loss: 1.0164270401000977,grad_norm: 0.9999991201036067, iteration: 36673
loss: 1.0037230253219604,grad_norm: 0.9999995443218814, iteration: 36674
loss: 1.012161374092102,grad_norm: 0.9999990446126193, iteration: 36675
loss: 1.002951741218567,grad_norm: 0.9999997381903217, iteration: 36676
loss: 1.016422152519226,grad_norm: 0.9999993801034883, iteration: 36677
loss: 1.0059471130371094,grad_norm: 0.9999995893843601, iteration: 36678
loss: 1.001050353050232,grad_norm: 0.9999990354955767, iteration: 36679
loss: 0.9980250000953674,grad_norm: 0.8107239466284896, iteration: 36680
loss: 1.0366860628128052,grad_norm: 0.9999991088106195, iteration: 36681
loss: 1.0072299242019653,grad_norm: 0.9999997126700735, iteration: 36682
loss: 1.0620732307434082,grad_norm: 0.9999992937043285, iteration: 36683
loss: 0.9959121942520142,grad_norm: 0.9999990669867197, iteration: 36684
loss: 0.9880266189575195,grad_norm: 0.9994084895226567, iteration: 36685
loss: 1.0329338312149048,grad_norm: 0.9999992026002148, iteration: 36686
loss: 1.0330555438995361,grad_norm: 0.9999994471166859, iteration: 36687
loss: 1.0046656131744385,grad_norm: 0.9999991072854689, iteration: 36688
loss: 1.0142959356307983,grad_norm: 0.9796237901139936, iteration: 36689
loss: 1.0216255187988281,grad_norm: 0.999999454884774, iteration: 36690
loss: 0.9974166750907898,grad_norm: 0.92418518477747, iteration: 36691
loss: 1.0126947164535522,grad_norm: 0.9999991631874402, iteration: 36692
loss: 1.0498398542404175,grad_norm: 0.9999989657813066, iteration: 36693
loss: 0.9990074634552002,grad_norm: 0.9192218975851437, iteration: 36694
loss: 1.0097085237503052,grad_norm: 0.9577821175385413, iteration: 36695
loss: 1.0037405490875244,grad_norm: 0.9999990851631173, iteration: 36696
loss: 1.0217082500457764,grad_norm: 0.9340245380763013, iteration: 36697
loss: 0.9814599752426147,grad_norm: 0.9999990040463855, iteration: 36698
loss: 0.9672380685806274,grad_norm: 0.9999991989636096, iteration: 36699
loss: 1.0030289888381958,grad_norm: 0.894485412287447, iteration: 36700
loss: 0.9974139928817749,grad_norm: 0.7913914948228683, iteration: 36701
loss: 0.9955925345420837,grad_norm: 0.9217153288829835, iteration: 36702
loss: 0.9930702447891235,grad_norm: 0.9960907234642814, iteration: 36703
loss: 1.0036195516586304,grad_norm: 0.9999990483841594, iteration: 36704
loss: 0.9551882743835449,grad_norm: 0.9439297875219169, iteration: 36705
loss: 0.9856910705566406,grad_norm: 0.9369864906092557, iteration: 36706
loss: 0.9240804314613342,grad_norm: 0.9267707422943084, iteration: 36707
loss: 0.9965086579322815,grad_norm: 0.996400105037743, iteration: 36708
loss: 1.0476117134094238,grad_norm: 0.999998998612238, iteration: 36709
loss: 0.9843416810035706,grad_norm: 0.9999990441548101, iteration: 36710
loss: 0.9852816462516785,grad_norm: 0.9809814786085944, iteration: 36711
loss: 0.9540140628814697,grad_norm: 0.9417905195957097, iteration: 36712
loss: 1.000057578086853,grad_norm: 0.9162023886046486, iteration: 36713
loss: 0.9872485995292664,grad_norm: 0.9995678356641019, iteration: 36714
loss: 1.0322996377944946,grad_norm: 0.9999994149077971, iteration: 36715
loss: 1.0092740058898926,grad_norm: 0.8117259979356827, iteration: 36716
loss: 1.0314749479293823,grad_norm: 0.9999989709489903, iteration: 36717
loss: 1.0064191818237305,grad_norm: 0.9999992457993324, iteration: 36718
loss: 1.0454071760177612,grad_norm: 0.9999995517044615, iteration: 36719
loss: 1.023989200592041,grad_norm: 0.9244034843295521, iteration: 36720
loss: 0.9492570757865906,grad_norm: 0.9999996908877784, iteration: 36721
loss: 1.0036942958831787,grad_norm: 0.9999991447084507, iteration: 36722
loss: 1.0660744905471802,grad_norm: 0.9999992590208853, iteration: 36723
loss: 0.9907424449920654,grad_norm: 0.7981331643339695, iteration: 36724
loss: 0.996991753578186,grad_norm: 0.999999079205958, iteration: 36725
loss: 1.0579694509506226,grad_norm: 0.7904519366783885, iteration: 36726
loss: 1.0001955032348633,grad_norm: 0.9999991291178251, iteration: 36727
loss: 0.9700600504875183,grad_norm: 0.999999210535745, iteration: 36728
loss: 1.0299677848815918,grad_norm: 0.9999990522322081, iteration: 36729
loss: 1.080214500427246,grad_norm: 0.999999825090275, iteration: 36730
loss: 1.039342999458313,grad_norm: 0.9999991707454705, iteration: 36731
loss: 1.0343049764633179,grad_norm: 0.9999992785826487, iteration: 36732
loss: 1.0823346376419067,grad_norm: 0.9180667943997721, iteration: 36733
loss: 1.0362387895584106,grad_norm: 0.9999991213300276, iteration: 36734
loss: 1.0347214937210083,grad_norm: 0.9999991729775097, iteration: 36735
loss: 0.991184651851654,grad_norm: 0.9882212282189734, iteration: 36736
loss: 1.050456166267395,grad_norm: 0.9999992804852234, iteration: 36737
loss: 1.0363028049468994,grad_norm: 0.9999990994034308, iteration: 36738
loss: 1.0353552103042603,grad_norm: 0.9876122859815606, iteration: 36739
loss: 1.0469098091125488,grad_norm: 0.948761416477467, iteration: 36740
loss: 1.0114494562149048,grad_norm: 0.9874473548307452, iteration: 36741
loss: 1.0120701789855957,grad_norm: 0.9283754736438573, iteration: 36742
loss: 1.0058056116104126,grad_norm: 0.999999115009817, iteration: 36743
loss: 1.0141911506652832,grad_norm: 0.9999990578482935, iteration: 36744
loss: 0.9977650046348572,grad_norm: 0.9999996775731053, iteration: 36745
loss: 1.0085455179214478,grad_norm: 0.9999994429320338, iteration: 36746
loss: 0.9889783263206482,grad_norm: 0.9999990251577988, iteration: 36747
loss: 1.1122426986694336,grad_norm: 0.9999999457743259, iteration: 36748
loss: 1.0061298608779907,grad_norm: 0.9332918239017587, iteration: 36749
loss: 1.0009050369262695,grad_norm: 0.8946331945314286, iteration: 36750
loss: 0.9831376075744629,grad_norm: 0.9999992195920956, iteration: 36751
loss: 1.028275489807129,grad_norm: 0.9999992857236671, iteration: 36752
loss: 1.0281975269317627,grad_norm: 0.989211342860111, iteration: 36753
loss: 0.9862349629402161,grad_norm: 0.8947839406237814, iteration: 36754
loss: 1.0255647897720337,grad_norm: 0.9999989780944979, iteration: 36755
loss: 1.0524979829788208,grad_norm: 0.9999993444799611, iteration: 36756
loss: 1.0085227489471436,grad_norm: 0.9999992801741779, iteration: 36757
loss: 1.0223625898361206,grad_norm: 0.9080580081663134, iteration: 36758
loss: 0.9928407073020935,grad_norm: 0.9999991844675047, iteration: 36759
loss: 0.9775888919830322,grad_norm: 0.999999212560019, iteration: 36760
loss: 1.0046577453613281,grad_norm: 0.899541499596457, iteration: 36761
loss: 0.9776656627655029,grad_norm: 0.9999989928589177, iteration: 36762
loss: 1.004313588142395,grad_norm: 0.9601908429633552, iteration: 36763
loss: 0.9986773729324341,grad_norm: 0.9054409457424822, iteration: 36764
loss: 1.014784812927246,grad_norm: 0.822391371056553, iteration: 36765
loss: 0.98098224401474,grad_norm: 0.9999995076570479, iteration: 36766
loss: 0.9988484382629395,grad_norm: 0.9999994180591313, iteration: 36767
loss: 1.0059360265731812,grad_norm: 0.999999492726154, iteration: 36768
loss: 1.01227605342865,grad_norm: 0.913265904317848, iteration: 36769
loss: 1.0215725898742676,grad_norm: 0.9999992116530096, iteration: 36770
loss: 1.0242865085601807,grad_norm: 0.8780478547145657, iteration: 36771
loss: 1.0108662843704224,grad_norm: 0.9265726967596949, iteration: 36772
loss: 1.0068414211273193,grad_norm: 0.8755026709550134, iteration: 36773
loss: 0.9734793305397034,grad_norm: 0.9999994401152168, iteration: 36774
loss: 0.9796481132507324,grad_norm: 0.9320420376064381, iteration: 36775
loss: 1.04558265209198,grad_norm: 0.9999993041758822, iteration: 36776
loss: 1.0059504508972168,grad_norm: 0.9739176029191002, iteration: 36777
loss: 1.0421669483184814,grad_norm: 0.8911098923121149, iteration: 36778
loss: 0.9823402166366577,grad_norm: 0.8751515225956294, iteration: 36779
loss: 1.0119004249572754,grad_norm: 0.9213950862210232, iteration: 36780
loss: 1.0243736505508423,grad_norm: 0.9999992238850789, iteration: 36781
loss: 1.0189470052719116,grad_norm: 0.966531650049647, iteration: 36782
loss: 0.9902666807174683,grad_norm: 0.8529591035446941, iteration: 36783
loss: 0.9473926424980164,grad_norm: 0.9400035412105114, iteration: 36784
loss: 0.9929108619689941,grad_norm: 0.9117624400350255, iteration: 36785
loss: 0.9847376942634583,grad_norm: 0.8913622030637428, iteration: 36786
loss: 0.9935138821601868,grad_norm: 0.9999997921580974, iteration: 36787
loss: 1.1733696460723877,grad_norm: 0.9999998562814197, iteration: 36788
loss: 0.9788913130760193,grad_norm: 0.9519862968273305, iteration: 36789
loss: 1.0090309381484985,grad_norm: 0.9999992320106592, iteration: 36790
loss: 1.018202304840088,grad_norm: 0.9999990641880586, iteration: 36791
loss: 1.1663963794708252,grad_norm: 0.9999996016776431, iteration: 36792
loss: 1.035663366317749,grad_norm: 0.920190391797677, iteration: 36793
loss: 0.9918338060379028,grad_norm: 0.8963341750086297, iteration: 36794
loss: 0.9895302653312683,grad_norm: 0.9999990707955649, iteration: 36795
loss: 1.0100053548812866,grad_norm: 0.9999989761608078, iteration: 36796
loss: 1.0029723644256592,grad_norm: 0.9708427465564718, iteration: 36797
loss: 1.0704442262649536,grad_norm: 0.9999992262238676, iteration: 36798
loss: 1.017812967300415,grad_norm: 0.8860277565728975, iteration: 36799
loss: 1.0093355178833008,grad_norm: 0.9999991388292117, iteration: 36800
loss: 1.0030301809310913,grad_norm: 0.9999991290980902, iteration: 36801
loss: 1.0116971731185913,grad_norm: 0.902580267730672, iteration: 36802
loss: 1.001872181892395,grad_norm: 0.940511588146125, iteration: 36803
loss: 0.9768396615982056,grad_norm: 0.9999991550032067, iteration: 36804
loss: 1.0038152933120728,grad_norm: 0.8088746062471132, iteration: 36805
loss: 1.0481154918670654,grad_norm: 0.9933104979916166, iteration: 36806
loss: 1.00273859500885,grad_norm: 0.8660600984818093, iteration: 36807
loss: 1.0354480743408203,grad_norm: 0.9999991779614922, iteration: 36808
loss: 0.9905720949172974,grad_norm: 0.9999991217367071, iteration: 36809
loss: 1.0739097595214844,grad_norm: 0.9999999101531454, iteration: 36810
loss: 1.0322203636169434,grad_norm: 0.9999991616859905, iteration: 36811
loss: 0.9688124060630798,grad_norm: 0.9999991685833473, iteration: 36812
loss: 1.0205323696136475,grad_norm: 0.9360843121227336, iteration: 36813
loss: 1.0020965337753296,grad_norm: 0.9999991527572899, iteration: 36814
loss: 1.046937108039856,grad_norm: 0.9999991233164741, iteration: 36815
loss: 0.9885290265083313,grad_norm: 0.9999989735893502, iteration: 36816
loss: 0.9689226150512695,grad_norm: 0.8455137697354903, iteration: 36817
loss: 1.0160529613494873,grad_norm: 0.9884605789279806, iteration: 36818
loss: 0.9838030338287354,grad_norm: 0.9999990108870102, iteration: 36819
loss: 0.9779154658317566,grad_norm: 0.9822185411322968, iteration: 36820
loss: 1.034548282623291,grad_norm: 0.9784353737769527, iteration: 36821
loss: 0.9783517122268677,grad_norm: 0.9999991348796279, iteration: 36822
loss: 0.9931471347808838,grad_norm: 0.9999991177889345, iteration: 36823
loss: 0.9716986417770386,grad_norm: 0.9921548252316992, iteration: 36824
loss: 1.0271631479263306,grad_norm: 0.9999996891585079, iteration: 36825
loss: 0.9972356557846069,grad_norm: 0.999999127885852, iteration: 36826
loss: 0.9924314618110657,grad_norm: 0.8872928429465241, iteration: 36827
loss: 1.0258961915969849,grad_norm: 0.9999992668056858, iteration: 36828
loss: 0.9971893429756165,grad_norm: 0.8273211921874811, iteration: 36829
loss: 0.9706054925918579,grad_norm: 0.9358139392159238, iteration: 36830
loss: 0.9905433058738708,grad_norm: 0.886798491486623, iteration: 36831
loss: 0.9757603406906128,grad_norm: 0.9999992327674753, iteration: 36832
loss: 1.017343521118164,grad_norm: 0.893547303492337, iteration: 36833
loss: 0.9945276975631714,grad_norm: 0.9305819183953646, iteration: 36834
loss: 1.0007750988006592,grad_norm: 0.9476301960218695, iteration: 36835
loss: 1.003544569015503,grad_norm: 0.9999993486373343, iteration: 36836
loss: 1.0622366666793823,grad_norm: 0.9999994944435167, iteration: 36837
loss: 0.9979909062385559,grad_norm: 0.999999571747715, iteration: 36838
loss: 0.9797786474227905,grad_norm: 0.8934520690288956, iteration: 36839
loss: 1.0085006952285767,grad_norm: 0.9999990601064915, iteration: 36840
loss: 0.9774907827377319,grad_norm: 0.9999992138055612, iteration: 36841
loss: 1.001158356666565,grad_norm: 0.9999990142941549, iteration: 36842
loss: 0.9715703725814819,grad_norm: 0.9950247875985808, iteration: 36843
loss: 1.0125203132629395,grad_norm: 0.9999991391670737, iteration: 36844
loss: 0.9967072606086731,grad_norm: 0.9742869160200274, iteration: 36845
loss: 0.9654037356376648,grad_norm: 0.9999993326125353, iteration: 36846
loss: 0.9814732670783997,grad_norm: 0.9999991116929282, iteration: 36847
loss: 1.0587258338928223,grad_norm: 0.8889745436492782, iteration: 36848
loss: 1.0642814636230469,grad_norm: 0.8694735454908721, iteration: 36849
loss: 0.9912645220756531,grad_norm: 0.9999993408203309, iteration: 36850
loss: 1.0037094354629517,grad_norm: 0.9075678846666065, iteration: 36851
loss: 1.0054823160171509,grad_norm: 0.9245275739553575, iteration: 36852
loss: 0.9955466389656067,grad_norm: 0.9999990314400362, iteration: 36853
loss: 1.0396908521652222,grad_norm: 0.9310379927325985, iteration: 36854
loss: 1.0475465059280396,grad_norm: 0.9999995002345451, iteration: 36855
loss: 1.0231026411056519,grad_norm: 0.9999991774541229, iteration: 36856
loss: 1.0016484260559082,grad_norm: 0.9999992571101018, iteration: 36857
loss: 1.044140338897705,grad_norm: 0.9999994413454294, iteration: 36858
loss: 0.9929826855659485,grad_norm: 0.9999989590031201, iteration: 36859
loss: 1.0426408052444458,grad_norm: 0.9999992509258785, iteration: 36860
loss: 1.021002173423767,grad_norm: 0.9207569018565704, iteration: 36861
loss: 0.9992643594741821,grad_norm: 0.8702594758354519, iteration: 36862
loss: 1.0199626684188843,grad_norm: 0.9999992329096632, iteration: 36863
loss: 1.0544718503952026,grad_norm: 0.9447999769703542, iteration: 36864
loss: 1.018375277519226,grad_norm: 0.9061635078172033, iteration: 36865
loss: 1.0035409927368164,grad_norm: 0.7943542886784749, iteration: 36866
loss: 0.9886975884437561,grad_norm: 0.7739825742596843, iteration: 36867
loss: 1.0006179809570312,grad_norm: 0.9574906981152507, iteration: 36868
loss: 0.9815125465393066,grad_norm: 0.9599532657855679, iteration: 36869
loss: 1.013663411140442,grad_norm: 0.9565733308455892, iteration: 36870
loss: 1.0104111433029175,grad_norm: 0.9999992592284882, iteration: 36871
loss: 0.9923553466796875,grad_norm: 0.9396813881925796, iteration: 36872
loss: 1.0136816501617432,grad_norm: 0.8312516294899299, iteration: 36873
loss: 0.9902920126914978,grad_norm: 0.9999992067995113, iteration: 36874
loss: 0.974845290184021,grad_norm: 0.9825364750682515, iteration: 36875
loss: 0.9760339856147766,grad_norm: 0.930414059435391, iteration: 36876
loss: 1.0429760217666626,grad_norm: 0.999999327044103, iteration: 36877
loss: 0.994879961013794,grad_norm: 0.9999990215541844, iteration: 36878
loss: 1.0132102966308594,grad_norm: 0.9866773452765659, iteration: 36879
loss: 1.0035873651504517,grad_norm: 0.9933862122365069, iteration: 36880
loss: 0.9921219348907471,grad_norm: 0.9999992324485317, iteration: 36881
loss: 0.9750283360481262,grad_norm: 0.9467353795576904, iteration: 36882
loss: 1.0369222164154053,grad_norm: 0.9907161084480817, iteration: 36883
loss: 1.0208910703659058,grad_norm: 0.9999991707545435, iteration: 36884
loss: 1.153098225593567,grad_norm: 0.9999998571093653, iteration: 36885
loss: 0.9826255440711975,grad_norm: 0.8617575680490178, iteration: 36886
loss: 1.0202707052230835,grad_norm: 0.9999991848885165, iteration: 36887
loss: 1.0184333324432373,grad_norm: 0.9928184085603203, iteration: 36888
loss: 1.0243011713027954,grad_norm: 0.9671118192013332, iteration: 36889
loss: 0.9852697849273682,grad_norm: 0.7633968340915709, iteration: 36890
loss: 0.9843644499778748,grad_norm: 0.8670166970226174, iteration: 36891
loss: 0.9966191649436951,grad_norm: 0.929709175291268, iteration: 36892
loss: 1.0254606008529663,grad_norm: 0.8902374295234499, iteration: 36893
loss: 0.9553486704826355,grad_norm: 0.8957134722050896, iteration: 36894
loss: 1.038156509399414,grad_norm: 0.8911568545666438, iteration: 36895
loss: 1.0086127519607544,grad_norm: 0.9999992860414135, iteration: 36896
loss: 0.983466386795044,grad_norm: 0.8298170687130735, iteration: 36897
loss: 1.0086350440979004,grad_norm: 0.9653649228220973, iteration: 36898
loss: 1.0083603858947754,grad_norm: 0.9999997483685893, iteration: 36899
loss: 0.9996299147605896,grad_norm: 0.9999991469344987, iteration: 36900
loss: 1.0723135471343994,grad_norm: 0.9758675197783813, iteration: 36901
loss: 1.014984130859375,grad_norm: 0.9865782397810997, iteration: 36902
loss: 1.008339762687683,grad_norm: 0.8817250459049661, iteration: 36903
loss: 0.9746124148368835,grad_norm: 0.9999991072123062, iteration: 36904
loss: 1.0273844003677368,grad_norm: 0.7560942114082156, iteration: 36905
loss: 0.9938468337059021,grad_norm: 0.9886211687614098, iteration: 36906
loss: 0.975625216960907,grad_norm: 0.9999990052336287, iteration: 36907
loss: 1.0138698816299438,grad_norm: 0.9999994002626205, iteration: 36908
loss: 1.0343934297561646,grad_norm: 0.8843673014753279, iteration: 36909
loss: 0.9855734705924988,grad_norm: 0.9139652841875162, iteration: 36910
loss: 1.0063046216964722,grad_norm: 0.9571133034328188, iteration: 36911
loss: 0.9827980995178223,grad_norm: 0.9361116177345745, iteration: 36912
loss: 1.0113563537597656,grad_norm: 0.9411244882502198, iteration: 36913
loss: 0.9796692132949829,grad_norm: 0.9999989425630661, iteration: 36914
loss: 0.9979774951934814,grad_norm: 0.8740388352135611, iteration: 36915
loss: 0.9910801649093628,grad_norm: 0.9999993967791638, iteration: 36916
loss: 0.9848514795303345,grad_norm: 0.992214701459798, iteration: 36917
loss: 0.9738566875457764,grad_norm: 0.9999989625131023, iteration: 36918
loss: 1.0319164991378784,grad_norm: 0.8915538028276774, iteration: 36919
loss: 1.0141099691390991,grad_norm: 0.9999991382368649, iteration: 36920
loss: 1.0389996767044067,grad_norm: 0.9225216112657568, iteration: 36921
loss: 1.099061131477356,grad_norm: 0.9999993721910029, iteration: 36922
loss: 1.0045346021652222,grad_norm: 0.9667629459512963, iteration: 36923
loss: 0.9808154106140137,grad_norm: 0.9738219090678238, iteration: 36924
loss: 0.9771431684494019,grad_norm: 0.9999991515616639, iteration: 36925
loss: 1.0090248584747314,grad_norm: 0.9999991357993913, iteration: 36926
loss: 1.0077747106552124,grad_norm: 0.9999991791904498, iteration: 36927
loss: 1.0272475481033325,grad_norm: 0.880796213896462, iteration: 36928
loss: 1.0096244812011719,grad_norm: 0.9136492991460518, iteration: 36929
loss: 0.9991061687469482,grad_norm: 0.9999990382487051, iteration: 36930
loss: 1.0063103437423706,grad_norm: 0.9999990308509822, iteration: 36931
loss: 1.0073277950286865,grad_norm: 0.9090887476520854, iteration: 36932
loss: 1.0291718244552612,grad_norm: 0.9999990422157626, iteration: 36933
loss: 1.0043537616729736,grad_norm: 0.8944090172520626, iteration: 36934
loss: 0.9962279200553894,grad_norm: 0.999999093773921, iteration: 36935
loss: 1.0214322805404663,grad_norm: 0.9471896276220252, iteration: 36936
loss: 0.9821446537971497,grad_norm: 0.9999990731892199, iteration: 36937
loss: 1.0336459875106812,grad_norm: 0.9999989569728539, iteration: 36938
loss: 1.0413492918014526,grad_norm: 0.9999996251756563, iteration: 36939
loss: 1.0035516023635864,grad_norm: 0.9430121071717082, iteration: 36940
loss: 1.0526736974716187,grad_norm: 0.9999993840377374, iteration: 36941
loss: 1.0471817255020142,grad_norm: 0.9999991680694803, iteration: 36942
loss: 1.0343605279922485,grad_norm: 0.9999994139475993, iteration: 36943
loss: 1.015601634979248,grad_norm: 0.8896593669593083, iteration: 36944
loss: 1.0174195766448975,grad_norm: 0.9999990370468512, iteration: 36945
loss: 1.0228908061981201,grad_norm: 0.9097206741408118, iteration: 36946
loss: 1.0000145435333252,grad_norm: 0.9999990229374617, iteration: 36947
loss: 1.025534749031067,grad_norm: 0.999999196296107, iteration: 36948
loss: 1.0035715103149414,grad_norm: 0.9999990934166623, iteration: 36949
loss: 1.0021837949752808,grad_norm: 0.9814822449664468, iteration: 36950
loss: 1.011551856994629,grad_norm: 0.9999991337618408, iteration: 36951
loss: 1.0243693590164185,grad_norm: 0.9999991565159305, iteration: 36952
loss: 0.9989853501319885,grad_norm: 0.8349694474119826, iteration: 36953
loss: 1.0741188526153564,grad_norm: 0.999999314309265, iteration: 36954
loss: 1.012652039527893,grad_norm: 0.9833014491890814, iteration: 36955
loss: 1.0003663301467896,grad_norm: 0.9999991877341284, iteration: 36956
loss: 0.999059796333313,grad_norm: 0.9999992108797247, iteration: 36957
loss: 1.0317102670669556,grad_norm: 0.91335478273217, iteration: 36958
loss: 0.99609375,grad_norm: 0.9172356906706693, iteration: 36959
loss: 0.9771690368652344,grad_norm: 0.9316111246971099, iteration: 36960
loss: 1.008837103843689,grad_norm: 0.9999991048195231, iteration: 36961
loss: 1.0273106098175049,grad_norm: 0.9999989699826345, iteration: 36962
loss: 1.0022002458572388,grad_norm: 0.785017886255075, iteration: 36963
loss: 1.010667085647583,grad_norm: 0.9973081425882554, iteration: 36964
loss: 0.9912651777267456,grad_norm: 0.967174721486326, iteration: 36965
loss: 1.0282920598983765,grad_norm: 0.9999991448759731, iteration: 36966
loss: 1.0444823503494263,grad_norm: 0.9999991949468272, iteration: 36967
loss: 1.090126395225525,grad_norm: 0.999999631696033, iteration: 36968
loss: 0.9837536811828613,grad_norm: 0.9069720344508542, iteration: 36969
loss: 1.008395791053772,grad_norm: 0.9637384215049448, iteration: 36970
loss: 1.0145984888076782,grad_norm: 0.9999997682681019, iteration: 36971
loss: 1.0624037981033325,grad_norm: 0.9999994056369449, iteration: 36972
loss: 1.0304621458053589,grad_norm: 0.9999990893040238, iteration: 36973
loss: 0.9822716116905212,grad_norm: 0.9999996073724639, iteration: 36974
loss: 1.0064042806625366,grad_norm: 0.9005808965484083, iteration: 36975
loss: 1.0146924257278442,grad_norm: 0.8033512849217286, iteration: 36976
loss: 1.0155589580535889,grad_norm: 0.999999195220654, iteration: 36977
loss: 1.0087648630142212,grad_norm: 0.9086145471702126, iteration: 36978
loss: 1.0128231048583984,grad_norm: 0.9811180998772719, iteration: 36979
loss: 0.9788500070571899,grad_norm: 0.9337697793970083, iteration: 36980
loss: 1.0205082893371582,grad_norm: 0.9999991029561993, iteration: 36981
loss: 1.03188157081604,grad_norm: 0.9999991315474211, iteration: 36982
loss: 1.011403203010559,grad_norm: 0.999999103239119, iteration: 36983
loss: 1.0074958801269531,grad_norm: 0.9999992655814848, iteration: 36984
loss: 1.0165437459945679,grad_norm: 0.8769229128825702, iteration: 36985
loss: 1.0190815925598145,grad_norm: 0.9214613965703754, iteration: 36986
loss: 1.0263828039169312,grad_norm: 0.9999993539611322, iteration: 36987
loss: 0.9993681311607361,grad_norm: 0.922893395685427, iteration: 36988
loss: 1.0166714191436768,grad_norm: 0.8296022962624072, iteration: 36989
loss: 1.0725090503692627,grad_norm: 0.9999992142428884, iteration: 36990
loss: 1.014186978340149,grad_norm: 0.8790371710559522, iteration: 36991
loss: 1.0078109502792358,grad_norm: 0.9923089706524241, iteration: 36992
loss: 1.0217151641845703,grad_norm: 0.8456818227080016, iteration: 36993
loss: 1.005477786064148,grad_norm: 0.9941258019118244, iteration: 36994
loss: 0.9938789010047913,grad_norm: 0.9999991081422014, iteration: 36995
loss: 1.0029006004333496,grad_norm: 0.9999992469880381, iteration: 36996
loss: 1.013055443763733,grad_norm: 0.8800445354433698, iteration: 36997
loss: 0.9609325528144836,grad_norm: 0.9894200261575834, iteration: 36998
loss: 1.0172138214111328,grad_norm: 0.9846797090060394, iteration: 36999
loss: 1.0141737461090088,grad_norm: 0.9832058493054686, iteration: 37000
loss: 0.9581448435783386,grad_norm: 0.9954114288208142, iteration: 37001
loss: 0.9996002912521362,grad_norm: 0.9824278019450858, iteration: 37002
loss: 1.024336338043213,grad_norm: 0.9999989776075339, iteration: 37003
loss: 0.973926305770874,grad_norm: 0.9571053486313253, iteration: 37004
loss: 1.0088999271392822,grad_norm: 0.9955171905998378, iteration: 37005
loss: 1.017428994178772,grad_norm: 0.9999991425868434, iteration: 37006
loss: 0.9783869385719299,grad_norm: 0.8540517787127413, iteration: 37007
loss: 1.0118180513381958,grad_norm: 0.9999989979649856, iteration: 37008
loss: 0.9802335500717163,grad_norm: 0.9259116975888874, iteration: 37009
loss: 1.0169106721878052,grad_norm: 0.9752780728187318, iteration: 37010
loss: 1.0304828882217407,grad_norm: 0.9496222310268086, iteration: 37011
loss: 1.0049251317977905,grad_norm: 0.9999991401650755, iteration: 37012
loss: 0.9968857169151306,grad_norm: 0.9999990767223863, iteration: 37013
loss: 1.0085728168487549,grad_norm: 0.8168131902179349, iteration: 37014
loss: 1.0214463472366333,grad_norm: 0.8692421442555744, iteration: 37015
loss: 1.0400323867797852,grad_norm: 0.9716992113411743, iteration: 37016
loss: 0.9877048134803772,grad_norm: 0.9539919341918627, iteration: 37017
loss: 0.9839969277381897,grad_norm: 0.9999992182786603, iteration: 37018
loss: 1.0043879747390747,grad_norm: 0.9844201447518262, iteration: 37019
loss: 1.0203322172164917,grad_norm: 0.9999993049139844, iteration: 37020
loss: 1.0161716938018799,grad_norm: 0.9282653669217364, iteration: 37021
loss: 1.005666732788086,grad_norm: 0.9999990589302348, iteration: 37022
loss: 0.9941270351409912,grad_norm: 0.9999990360788691, iteration: 37023
loss: 1.025069236755371,grad_norm: 0.9999993552407365, iteration: 37024
loss: 1.016601324081421,grad_norm: 0.9999990297730161, iteration: 37025
loss: 1.0131759643554688,grad_norm: 0.8098281098728838, iteration: 37026
loss: 0.9758159518241882,grad_norm: 0.9999992099908688, iteration: 37027
loss: 0.9951492547988892,grad_norm: 0.856332189872887, iteration: 37028
loss: 1.02121102809906,grad_norm: 0.9999996859979364, iteration: 37029
loss: 1.034279227256775,grad_norm: 0.9999997205717076, iteration: 37030
loss: 0.9844485521316528,grad_norm: 0.9999991759641346, iteration: 37031
loss: 1.0477497577667236,grad_norm: 0.8786227915924261, iteration: 37032
loss: 1.1690053939819336,grad_norm: 0.9999998688175624, iteration: 37033
loss: 1.0213338136672974,grad_norm: 0.9666122304366226, iteration: 37034
loss: 1.033944845199585,grad_norm: 0.9999992326995606, iteration: 37035
loss: 1.013317584991455,grad_norm: 0.9999991530592677, iteration: 37036
loss: 1.0296800136566162,grad_norm: 0.9999991754530305, iteration: 37037
loss: 0.9957747459411621,grad_norm: 0.8942137905650042, iteration: 37038
loss: 0.9962253570556641,grad_norm: 0.9396897470355674, iteration: 37039
loss: 0.9801110625267029,grad_norm: 0.9999990569181471, iteration: 37040
loss: 1.050191879272461,grad_norm: 0.9999993025008683, iteration: 37041
loss: 1.0038878917694092,grad_norm: 0.8878133633850288, iteration: 37042
loss: 0.9669649004936218,grad_norm: 0.9999990112037892, iteration: 37043
loss: 1.0367587804794312,grad_norm: 0.9999994555191065, iteration: 37044
loss: 1.047197699546814,grad_norm: 0.9461281498090559, iteration: 37045
loss: 0.9807694554328918,grad_norm: 0.9245033190664955, iteration: 37046
loss: 1.028049111366272,grad_norm: 0.935586967611195, iteration: 37047
loss: 0.9966966509819031,grad_norm: 0.8729718321274265, iteration: 37048
loss: 1.0041865110397339,grad_norm: 0.9571463570942815, iteration: 37049
loss: 1.0253212451934814,grad_norm: 0.9999993491955605, iteration: 37050
loss: 0.9900473952293396,grad_norm: 0.9999991627378019, iteration: 37051
loss: 1.047194480895996,grad_norm: 0.9447758224768221, iteration: 37052
loss: 0.9984021186828613,grad_norm: 0.9999991628209127, iteration: 37053
loss: 0.9682685732841492,grad_norm: 0.999999143334436, iteration: 37054
loss: 1.0196001529693604,grad_norm: 0.9999991280350584, iteration: 37055
loss: 0.9994941353797913,grad_norm: 0.9458839363720807, iteration: 37056
loss: 1.0188335180282593,grad_norm: 0.7895822320764979, iteration: 37057
loss: 1.0213673114776611,grad_norm: 0.9478580276593732, iteration: 37058
loss: 0.9583505392074585,grad_norm: 0.9777245435716394, iteration: 37059
loss: 0.992501974105835,grad_norm: 0.8126561681014584, iteration: 37060
loss: 0.9664181470870972,grad_norm: 0.9999990797511167, iteration: 37061
loss: 0.9752689599990845,grad_norm: 0.9246959330641905, iteration: 37062
loss: 1.0072481632232666,grad_norm: 0.9999992372731115, iteration: 37063
loss: 0.9302558898925781,grad_norm: 0.8545479513230759, iteration: 37064
loss: 1.016799807548523,grad_norm: 0.9340702367579949, iteration: 37065
loss: 0.9951757788658142,grad_norm: 0.8039528955995163, iteration: 37066
loss: 0.9779922962188721,grad_norm: 0.9999991210374953, iteration: 37067
loss: 0.9802595973014832,grad_norm: 0.94398532031675, iteration: 37068
loss: 0.9766327142715454,grad_norm: 0.8612898133136577, iteration: 37069
loss: 0.9987799525260925,grad_norm: 0.9634257675670799, iteration: 37070
loss: 0.9499528408050537,grad_norm: 0.9999988679883245, iteration: 37071
loss: 1.020984411239624,grad_norm: 0.9999990501679393, iteration: 37072
loss: 1.053253412246704,grad_norm: 0.9999994539127861, iteration: 37073
loss: 0.9562991857528687,grad_norm: 0.9954778915676142, iteration: 37074
loss: 0.9962195754051208,grad_norm: 0.830735673853627, iteration: 37075
loss: 0.9972202181816101,grad_norm: 0.9999999130262395, iteration: 37076
loss: 0.9793486595153809,grad_norm: 0.9999991856266128, iteration: 37077
loss: 0.9464023113250732,grad_norm: 0.9999990022018671, iteration: 37078
loss: 0.98781818151474,grad_norm: 0.9675165835652757, iteration: 37079
loss: 1.0050604343414307,grad_norm: 0.9457544813348001, iteration: 37080
loss: 1.0021016597747803,grad_norm: 0.999999081436477, iteration: 37081
loss: 0.9825232028961182,grad_norm: 0.8907352324695721, iteration: 37082
loss: 1.0456327199935913,grad_norm: 0.73057420737267, iteration: 37083
loss: 0.9985827207565308,grad_norm: 0.9999991951842051, iteration: 37084
loss: 0.9773943424224854,grad_norm: 0.9999990578845909, iteration: 37085
loss: 0.9992227554321289,grad_norm: 0.9716506839317264, iteration: 37086
loss: 0.9988377094268799,grad_norm: 0.955892290110616, iteration: 37087
loss: 1.0558445453643799,grad_norm: 0.9999991113729231, iteration: 37088
loss: 0.9850953817367554,grad_norm: 0.9999989958394342, iteration: 37089
loss: 1.0113117694854736,grad_norm: 0.9591558564970463, iteration: 37090
loss: 0.9677231907844543,grad_norm: 0.9833330076015312, iteration: 37091
loss: 1.0094151496887207,grad_norm: 0.9999993007790218, iteration: 37092
loss: 1.0227254629135132,grad_norm: 0.9999990321826717, iteration: 37093
loss: 1.0287740230560303,grad_norm: 0.9191262928937775, iteration: 37094
loss: 1.025437355041504,grad_norm: 0.99999898753803, iteration: 37095
loss: 1.0048600435256958,grad_norm: 0.8454935179890926, iteration: 37096
loss: 1.0036375522613525,grad_norm: 0.9652742002491604, iteration: 37097
loss: 0.9589999914169312,grad_norm: 0.9707010537395641, iteration: 37098
loss: 1.0577051639556885,grad_norm: 0.999999138332382, iteration: 37099
loss: 1.0278476476669312,grad_norm: 0.8851985599642205, iteration: 37100
loss: 0.9898596405982971,grad_norm: 0.9999992545823223, iteration: 37101
loss: 1.0052717924118042,grad_norm: 0.8649709994929065, iteration: 37102
loss: 1.0319586992263794,grad_norm: 0.999999530030368, iteration: 37103
loss: 1.0064650774002075,grad_norm: 0.7845515206885332, iteration: 37104
loss: 1.0328580141067505,grad_norm: 0.8851786814307667, iteration: 37105
loss: 0.9699534773826599,grad_norm: 0.9999992047411873, iteration: 37106
loss: 1.048691749572754,grad_norm: 0.924712191383102, iteration: 37107
loss: 1.0006532669067383,grad_norm: 0.9843600150107633, iteration: 37108
loss: 0.9774594902992249,grad_norm: 0.8714215038589992, iteration: 37109
loss: 1.008317470550537,grad_norm: 0.8073964482585823, iteration: 37110
loss: 1.0103946924209595,grad_norm: 0.9942450839892067, iteration: 37111
loss: 1.0082647800445557,grad_norm: 0.9103101285748378, iteration: 37112
loss: 1.0042930841445923,grad_norm: 0.9373795679706142, iteration: 37113
loss: 1.0016498565673828,grad_norm: 0.9694542488144253, iteration: 37114
loss: 0.9985231161117554,grad_norm: 0.9667918763906063, iteration: 37115
loss: 1.0168601274490356,grad_norm: 0.9972323173018612, iteration: 37116
loss: 1.0028749704360962,grad_norm: 0.9999992113821633, iteration: 37117
loss: 1.0157166719436646,grad_norm: 0.7759456851742886, iteration: 37118
loss: 1.0462430715560913,grad_norm: 0.999999507365824, iteration: 37119
loss: 1.0091227293014526,grad_norm: 0.8885376833197837, iteration: 37120
loss: 1.0003299713134766,grad_norm: 0.8365491118520767, iteration: 37121
loss: 0.9918514490127563,grad_norm: 0.9999992320323998, iteration: 37122
loss: 0.9844974875450134,grad_norm: 0.9999990853196778, iteration: 37123
loss: 0.994615912437439,grad_norm: 0.8651397084507864, iteration: 37124
loss: 0.9909562468528748,grad_norm: 0.9966285327702359, iteration: 37125
loss: 1.0018184185028076,grad_norm: 0.9999990928725541, iteration: 37126
loss: 1.0116292238235474,grad_norm: 0.8372619449752373, iteration: 37127
loss: 1.0144912004470825,grad_norm: 0.9887089474388441, iteration: 37128
loss: 1.0059864521026611,grad_norm: 0.9999991423376774, iteration: 37129
loss: 1.0066838264465332,grad_norm: 0.8693556036477481, iteration: 37130
loss: 1.0121501684188843,grad_norm: 0.9999990564115535, iteration: 37131
loss: 1.0044101476669312,grad_norm: 0.9253400891518074, iteration: 37132
loss: 0.9956912398338318,grad_norm: 0.9999994286470925, iteration: 37133
loss: 1.037239909172058,grad_norm: 0.9991583609440374, iteration: 37134
loss: 1.0102908611297607,grad_norm: 0.9999990640200023, iteration: 37135
loss: 1.0048209428787231,grad_norm: 0.9999992016332833, iteration: 37136
loss: 0.9906145334243774,grad_norm: 0.9999991115879113, iteration: 37137
loss: 0.9819644689559937,grad_norm: 0.9443501025196501, iteration: 37138
loss: 1.0383223295211792,grad_norm: 0.999999093027012, iteration: 37139
loss: 0.9387802481651306,grad_norm: 0.9999991756208388, iteration: 37140
loss: 0.982574462890625,grad_norm: 0.9622048952311711, iteration: 37141
loss: 0.9952743053436279,grad_norm: 0.976696117834873, iteration: 37142
loss: 1.0096104145050049,grad_norm: 0.9862253741115339, iteration: 37143
loss: 1.0225448608398438,grad_norm: 0.9999990991943097, iteration: 37144
loss: 1.0200679302215576,grad_norm: 0.903377602861544, iteration: 37145
loss: 1.003220558166504,grad_norm: 0.9999990874703165, iteration: 37146
loss: 1.003552794456482,grad_norm: 0.9999991678307285, iteration: 37147
loss: 1.0124717950820923,grad_norm: 0.9147862594680961, iteration: 37148
loss: 1.0324028730392456,grad_norm: 0.9999990729772612, iteration: 37149
loss: 1.016988754272461,grad_norm: 0.9999993381245545, iteration: 37150
loss: 1.0014783143997192,grad_norm: 0.9999989886610271, iteration: 37151
loss: 1.0032644271850586,grad_norm: 0.825624675242533, iteration: 37152
loss: 0.9876524209976196,grad_norm: 0.9999990759822587, iteration: 37153
loss: 0.9957022666931152,grad_norm: 0.9353532303010731, iteration: 37154
loss: 1.0286213159561157,grad_norm: 0.9701709263941316, iteration: 37155
loss: 1.0223381519317627,grad_norm: 0.8567139766634129, iteration: 37156
loss: 0.9925548434257507,grad_norm: 0.9059012203417068, iteration: 37157
loss: 1.0239461660385132,grad_norm: 0.85007827146244, iteration: 37158
loss: 1.028253436088562,grad_norm: 0.99999938647652, iteration: 37159
loss: 1.0110440254211426,grad_norm: 0.9999991151183251, iteration: 37160
loss: 1.0049086809158325,grad_norm: 0.9999989999530059, iteration: 37161
loss: 0.9837379455566406,grad_norm: 0.9999991891427096, iteration: 37162
loss: 1.0461252927780151,grad_norm: 0.9999991817879155, iteration: 37163
loss: 1.0083059072494507,grad_norm: 0.8724341192224309, iteration: 37164
loss: 1.0238895416259766,grad_norm: 0.9041288780649147, iteration: 37165
loss: 0.9877281785011292,grad_norm: 0.9644972609317858, iteration: 37166
loss: 1.0295056104660034,grad_norm: 0.9331436602472115, iteration: 37167
loss: 0.969156801700592,grad_norm: 0.9652744231971966, iteration: 37168
loss: 1.0227686166763306,grad_norm: 0.9665217427302911, iteration: 37169
loss: 1.0279474258422852,grad_norm: 0.942821143783946, iteration: 37170
loss: 0.9927581548690796,grad_norm: 0.7582896519751027, iteration: 37171
loss: 1.0295253992080688,grad_norm: 0.8807178494167159, iteration: 37172
loss: 0.9856160879135132,grad_norm: 0.999999119964733, iteration: 37173
loss: 1.0093357563018799,grad_norm: 0.878454167853207, iteration: 37174
loss: 1.0437164306640625,grad_norm: 0.9999992499492018, iteration: 37175
loss: 1.0265281200408936,grad_norm: 0.9999991388480813, iteration: 37176
loss: 1.0319240093231201,grad_norm: 0.8740130743309351, iteration: 37177
loss: 0.9937068819999695,grad_norm: 0.9999990949601035, iteration: 37178
loss: 0.989332377910614,grad_norm: 0.9999990661317385, iteration: 37179
loss: 0.9834283590316772,grad_norm: 0.9999990313600011, iteration: 37180
loss: 0.9944785237312317,grad_norm: 0.7395446808039889, iteration: 37181
loss: 1.0018014907836914,grad_norm: 0.9999989480788398, iteration: 37182
loss: 1.0082354545593262,grad_norm: 0.9999990667931236, iteration: 37183
loss: 1.0375444889068604,grad_norm: 0.9790698484747477, iteration: 37184
loss: 1.011681079864502,grad_norm: 0.9999990924174116, iteration: 37185
loss: 1.0220603942871094,grad_norm: 0.9999991881336002, iteration: 37186
loss: 1.0327156782150269,grad_norm: 0.8261243656515352, iteration: 37187
loss: 0.9892608523368835,grad_norm: 0.9999990298168075, iteration: 37188
loss: 1.0376015901565552,grad_norm: 0.973068894602879, iteration: 37189
loss: 1.0068879127502441,grad_norm: 0.9999990213091202, iteration: 37190
loss: 1.0303137302398682,grad_norm: 0.9700133945131518, iteration: 37191
loss: 1.0311015844345093,grad_norm: 0.9773645956305227, iteration: 37192
loss: 1.0226494073867798,grad_norm: 0.9999992845025611, iteration: 37193
loss: 1.0219281911849976,grad_norm: 0.8310246278480395, iteration: 37194
loss: 1.03301203250885,grad_norm: 0.9125263160887727, iteration: 37195
loss: 1.0323842763900757,grad_norm: 0.9061830620699125, iteration: 37196
loss: 0.9833829998970032,grad_norm: 0.99999923616753, iteration: 37197
loss: 0.9887632131576538,grad_norm: 0.9986278865512804, iteration: 37198
loss: 1.0108537673950195,grad_norm: 0.9999993477402612, iteration: 37199
loss: 1.0183045864105225,grad_norm: 0.9999990312615176, iteration: 37200
loss: 0.9999731779098511,grad_norm: 0.9475029041100931, iteration: 37201
loss: 0.9579134583473206,grad_norm: 0.9999990090765314, iteration: 37202
loss: 0.9874513745307922,grad_norm: 0.903316540246827, iteration: 37203
loss: 1.0073729753494263,grad_norm: 0.9392474648202102, iteration: 37204
loss: 1.0037193298339844,grad_norm: 0.8631447736314457, iteration: 37205
loss: 0.9994388818740845,grad_norm: 0.901611957186793, iteration: 37206
loss: 1.0020853281021118,grad_norm: 0.9885517039139193, iteration: 37207
loss: 1.0051510334014893,grad_norm: 0.9999991159235592, iteration: 37208
loss: 1.0327630043029785,grad_norm: 0.9037449043786479, iteration: 37209
loss: 0.971197247505188,grad_norm: 0.9608994494248059, iteration: 37210
loss: 0.9947556257247925,grad_norm: 0.9999992149413348, iteration: 37211
loss: 0.9724130034446716,grad_norm: 0.9999990748046359, iteration: 37212
loss: 0.9872109889984131,grad_norm: 0.8810405254308724, iteration: 37213
loss: 0.977101743221283,grad_norm: 0.9152309159941705, iteration: 37214
loss: 1.020538330078125,grad_norm: 0.9999990191658864, iteration: 37215
loss: 0.9749439358711243,grad_norm: 0.9999991823762204, iteration: 37216
loss: 1.009655475616455,grad_norm: 0.9398490800265659, iteration: 37217
loss: 1.0205689668655396,grad_norm: 0.999999404919634, iteration: 37218
loss: 0.993139386177063,grad_norm: 0.8320970884590432, iteration: 37219
loss: 1.002143383026123,grad_norm: 0.8450549967804811, iteration: 37220
loss: 0.9852562546730042,grad_norm: 0.9999991897735804, iteration: 37221
loss: 0.9853195548057556,grad_norm: 0.9999992518733014, iteration: 37222
loss: 0.9850562214851379,grad_norm: 0.9556728291505094, iteration: 37223
loss: 0.9744827151298523,grad_norm: 0.8084175164794489, iteration: 37224
loss: 1.0112943649291992,grad_norm: 0.9999990267370812, iteration: 37225
loss: 1.0351173877716064,grad_norm: 0.9720267430600529, iteration: 37226
loss: 1.0021227598190308,grad_norm: 0.9820643843055764, iteration: 37227
loss: 1.0133827924728394,grad_norm: 0.9999990833284527, iteration: 37228
loss: 0.9784510731697083,grad_norm: 0.9999991071723072, iteration: 37229
loss: 0.9752956628799438,grad_norm: 0.9999990406860852, iteration: 37230
loss: 1.0242486000061035,grad_norm: 0.9999992150503294, iteration: 37231
loss: 1.014052152633667,grad_norm: 0.8653484345895823, iteration: 37232
loss: 1.0087306499481201,grad_norm: 0.9704630193454599, iteration: 37233
loss: 1.0391521453857422,grad_norm: 0.9999991086136417, iteration: 37234
loss: 1.000796914100647,grad_norm: 0.88378719717511, iteration: 37235
loss: 1.031502366065979,grad_norm: 0.9730076623881193, iteration: 37236
loss: 1.0032943487167358,grad_norm: 0.953485129984656, iteration: 37237
loss: 1.0216916799545288,grad_norm: 0.9999990646621711, iteration: 37238
loss: 0.9769866466522217,grad_norm: 0.9195042242637851, iteration: 37239
loss: 0.9913196563720703,grad_norm: 0.9999992447286375, iteration: 37240
loss: 0.9940199851989746,grad_norm: 0.9999990662865008, iteration: 37241
loss: 0.9649739861488342,grad_norm: 0.9999990840535102, iteration: 37242
loss: 1.0011553764343262,grad_norm: 0.905851935164567, iteration: 37243
loss: 0.9975402355194092,grad_norm: 0.9278956577297622, iteration: 37244
loss: 1.0103731155395508,grad_norm: 0.9881763716423834, iteration: 37245
loss: 0.993626058101654,grad_norm: 0.9561530958836818, iteration: 37246
loss: 0.981478750705719,grad_norm: 0.9999990039836953, iteration: 37247
loss: 0.9598953723907471,grad_norm: 0.9999993148578389, iteration: 37248
loss: 0.9927518963813782,grad_norm: 0.950362793638872, iteration: 37249
loss: 0.9921720027923584,grad_norm: 0.999999063404712, iteration: 37250
loss: 1.0087769031524658,grad_norm: 0.9999991271338481, iteration: 37251
loss: 1.0054107904434204,grad_norm: 0.9999991267667273, iteration: 37252
loss: 0.9909759759902954,grad_norm: 0.9736211218866444, iteration: 37253
loss: 0.9810013771057129,grad_norm: 0.9387375874733513, iteration: 37254
loss: 1.0240540504455566,grad_norm: 0.9934383314823607, iteration: 37255
loss: 0.9893494248390198,grad_norm: 0.9244259621421611, iteration: 37256
loss: 0.9936796426773071,grad_norm: 0.8815230027946258, iteration: 37257
loss: 1.0276048183441162,grad_norm: 0.9999991390246181, iteration: 37258
loss: 0.9940286874771118,grad_norm: 0.990169887668721, iteration: 37259
loss: 0.9898696541786194,grad_norm: 0.8656061474652769, iteration: 37260
loss: 0.9860147833824158,grad_norm: 0.9639213668812906, iteration: 37261
loss: 0.9931714534759521,grad_norm: 0.758757757999904, iteration: 37262
loss: 0.9964861273765564,grad_norm: 0.9999991050617696, iteration: 37263
loss: 1.0089830160140991,grad_norm: 0.9999991991111838, iteration: 37264
loss: 1.0151716470718384,grad_norm: 0.9999992384873755, iteration: 37265
loss: 0.9765336513519287,grad_norm: 0.9999991500506277, iteration: 37266
loss: 1.0221575498580933,grad_norm: 0.9381559495637528, iteration: 37267
loss: 1.0405969619750977,grad_norm: 0.9477444935803025, iteration: 37268
loss: 1.009870171546936,grad_norm: 0.9999996115635739, iteration: 37269
loss: 1.0169481039047241,grad_norm: 0.9999991096211316, iteration: 37270
loss: 0.9358010292053223,grad_norm: 0.9999990255373338, iteration: 37271
loss: 1.0058658123016357,grad_norm: 0.9999992625596043, iteration: 37272
loss: 0.9887023568153381,grad_norm: 0.9846001917512911, iteration: 37273
loss: 1.0655336380004883,grad_norm: 0.9999989335343366, iteration: 37274
loss: 1.0071245431900024,grad_norm: 0.9999990241963514, iteration: 37275
loss: 1.0056096315383911,grad_norm: 0.9963414560874477, iteration: 37276
loss: 1.0194114446640015,grad_norm: 0.8910524652731332, iteration: 37277
loss: 1.0173203945159912,grad_norm: 0.9999990376431237, iteration: 37278
loss: 1.022762417793274,grad_norm: 0.9486968571227591, iteration: 37279
loss: 1.0075035095214844,grad_norm: 0.9216081572738228, iteration: 37280
loss: 1.021034836769104,grad_norm: 0.7976382085654506, iteration: 37281
loss: 0.99910569190979,grad_norm: 0.8431778800688365, iteration: 37282
loss: 1.0205641984939575,grad_norm: 0.9825400802418186, iteration: 37283
loss: 0.9719294905662537,grad_norm: 0.9452760105108864, iteration: 37284
loss: 1.004902958869934,grad_norm: 0.999999089291989, iteration: 37285
loss: 0.9865514039993286,grad_norm: 0.9515839374582933, iteration: 37286
loss: 1.0222971439361572,grad_norm: 0.9999997621766293, iteration: 37287
loss: 1.0288257598876953,grad_norm: 0.9999991855578823, iteration: 37288
loss: 1.0375460386276245,grad_norm: 0.9999991115373532, iteration: 37289
loss: 1.0052490234375,grad_norm: 0.9999990909698436, iteration: 37290
loss: 0.9955916404724121,grad_norm: 0.8881723012789621, iteration: 37291
loss: 0.9831291437149048,grad_norm: 0.9999991269072853, iteration: 37292
loss: 1.0358134508132935,grad_norm: 0.9999991412736021, iteration: 37293
loss: 1.025064468383789,grad_norm: 0.8715136876153364, iteration: 37294
loss: 1.007720947265625,grad_norm: 0.8410622050153979, iteration: 37295
loss: 1.0024961233139038,grad_norm: 0.9999992836023291, iteration: 37296
loss: 1.0033458471298218,grad_norm: 0.9345239188412674, iteration: 37297
loss: 1.021143913269043,grad_norm: 0.7083412751475483, iteration: 37298
loss: 1.0252094268798828,grad_norm: 0.9999990258886677, iteration: 37299
loss: 0.9893856644630432,grad_norm: 0.9597641759452992, iteration: 37300
loss: 0.9791389107704163,grad_norm: 0.9732030055330113, iteration: 37301
loss: 0.9862189292907715,grad_norm: 0.7782330769596221, iteration: 37302
loss: 0.9993829727172852,grad_norm: 0.9764345826301154, iteration: 37303
loss: 0.9669923186302185,grad_norm: 0.9767222938033693, iteration: 37304
loss: 1.0191490650177002,grad_norm: 0.9053359301284771, iteration: 37305
loss: 1.0086283683776855,grad_norm: 0.9060556838165218, iteration: 37306
loss: 1.0057971477508545,grad_norm: 0.9999991248408022, iteration: 37307
loss: 1.0269182920455933,grad_norm: 0.9999991860489502, iteration: 37308
loss: 0.9993594288825989,grad_norm: 0.9776577844909039, iteration: 37309
loss: 1.0202685594558716,grad_norm: 0.9999991794486357, iteration: 37310
loss: 1.0042062997817993,grad_norm: 0.9999991990908224, iteration: 37311
loss: 1.0177699327468872,grad_norm: 0.9999990400572846, iteration: 37312
loss: 1.0337413549423218,grad_norm: 0.9397791698266634, iteration: 37313
loss: 1.0016282796859741,grad_norm: 0.8150945724137013, iteration: 37314
loss: 1.0032097101211548,grad_norm: 0.8604086511139796, iteration: 37315
loss: 1.016984224319458,grad_norm: 0.8212196167439338, iteration: 37316
loss: 1.0334973335266113,grad_norm: 0.9999991896631516, iteration: 37317
loss: 1.0443329811096191,grad_norm: 0.9032404817230153, iteration: 37318
loss: 0.9678856134414673,grad_norm: 0.9270467710590817, iteration: 37319
loss: 0.9961327314376831,grad_norm: 0.8752843071535753, iteration: 37320
loss: 0.9949162602424622,grad_norm: 0.9596388496203009, iteration: 37321
loss: 1.0088354349136353,grad_norm: 0.9996888537614544, iteration: 37322
loss: 1.0212513208389282,grad_norm: 0.9900022843953625, iteration: 37323
loss: 1.0075803995132446,grad_norm: 0.9999991023181266, iteration: 37324
loss: 1.0207343101501465,grad_norm: 0.9999990649157402, iteration: 37325
loss: 1.0293850898742676,grad_norm: 0.9999994019398226, iteration: 37326
loss: 1.0067856311798096,grad_norm: 0.9999993525643848, iteration: 37327
loss: 0.9838190078735352,grad_norm: 0.999999127519443, iteration: 37328
loss: 1.0088070631027222,grad_norm: 0.8481398115103668, iteration: 37329
loss: 0.9920929074287415,grad_norm: 0.999999099688661, iteration: 37330
loss: 1.0032070875167847,grad_norm: 0.8778464472197177, iteration: 37331
loss: 0.9817167520523071,grad_norm: 0.9999990790584817, iteration: 37332
loss: 1.029057502746582,grad_norm: 0.9999990465656101, iteration: 37333
loss: 1.0295450687408447,grad_norm: 0.8764288769176599, iteration: 37334
loss: 0.9654085040092468,grad_norm: 0.9735413951100156, iteration: 37335
loss: 0.9748643040657043,grad_norm: 0.9777825247224763, iteration: 37336
loss: 1.0156960487365723,grad_norm: 0.9999991668916763, iteration: 37337
loss: 1.0012425184249878,grad_norm: 0.7071910476237522, iteration: 37338
loss: 1.001587152481079,grad_norm: 0.9466740266188522, iteration: 37339
loss: 1.002496600151062,grad_norm: 0.9902790273024389, iteration: 37340
loss: 0.9982419610023499,grad_norm: 0.9999990518144363, iteration: 37341
loss: 1.0190101861953735,grad_norm: 0.9999992037688269, iteration: 37342
loss: 1.034795880317688,grad_norm: 0.7523187975070504, iteration: 37343
loss: 1.005741000175476,grad_norm: 0.9999993180764013, iteration: 37344
loss: 1.0043240785598755,grad_norm: 0.993324895304821, iteration: 37345
loss: 1.0079231262207031,grad_norm: 0.948822596429462, iteration: 37346
loss: 1.0027164220809937,grad_norm: 0.9999989089573247, iteration: 37347
loss: 1.0017106533050537,grad_norm: 0.9566651597825329, iteration: 37348
loss: 0.9591439962387085,grad_norm: 0.9029137860518031, iteration: 37349
loss: 0.9958363771438599,grad_norm: 0.9135059984325553, iteration: 37350
loss: 1.0145456790924072,grad_norm: 0.9999989864946294, iteration: 37351
loss: 1.0179468393325806,grad_norm: 0.9999991873724535, iteration: 37352
loss: 0.9755778312683105,grad_norm: 0.9598551132582641, iteration: 37353
loss: 0.9981634616851807,grad_norm: 0.9333897436735765, iteration: 37354
loss: 0.9929484128952026,grad_norm: 0.8831724966685653, iteration: 37355
loss: 0.994371771812439,grad_norm: 0.973917511731264, iteration: 37356
loss: 1.0143109560012817,grad_norm: 0.7247280537256012, iteration: 37357
loss: 0.9996705055236816,grad_norm: 0.9999991188596734, iteration: 37358
loss: 1.0324994325637817,grad_norm: 0.9885191003191904, iteration: 37359
loss: 0.9813498854637146,grad_norm: 0.7894464188616191, iteration: 37360
loss: 1.013883352279663,grad_norm: 0.9999990339840413, iteration: 37361
loss: 1.0359407663345337,grad_norm: 0.9781031315959132, iteration: 37362
loss: 1.0106382369995117,grad_norm: 0.903162012770505, iteration: 37363
loss: 0.9913521409034729,grad_norm: 0.8959218101646163, iteration: 37364
loss: 1.010961651802063,grad_norm: 0.9999990499048079, iteration: 37365
loss: 0.9981832504272461,grad_norm: 0.9999990966693857, iteration: 37366
loss: 0.9885887503623962,grad_norm: 0.9459668868314988, iteration: 37367
loss: 0.9988804459571838,grad_norm: 0.9889713220151599, iteration: 37368
loss: 1.0351555347442627,grad_norm: 0.8903495586498583, iteration: 37369
loss: 1.0095349550247192,grad_norm: 0.9999994593927635, iteration: 37370
loss: 1.0131536722183228,grad_norm: 0.9999990830028215, iteration: 37371
loss: 1.0004514455795288,grad_norm: 0.9999847739614526, iteration: 37372
loss: 1.0347185134887695,grad_norm: 0.980281531758841, iteration: 37373
loss: 1.0037636756896973,grad_norm: 0.9388334855724102, iteration: 37374
loss: 0.9827932715415955,grad_norm: 0.8118299264650796, iteration: 37375
loss: 1.0027644634246826,grad_norm: 0.9631388183742655, iteration: 37376
loss: 0.9870708584785461,grad_norm: 0.9999990360928194, iteration: 37377
loss: 1.0064154863357544,grad_norm: 0.9881119231752762, iteration: 37378
loss: 0.9885208606719971,grad_norm: 0.9999991189712119, iteration: 37379
loss: 1.027154803276062,grad_norm: 0.9267651814144973, iteration: 37380
loss: 1.011717677116394,grad_norm: 0.86315580397755, iteration: 37381
loss: 0.9844391942024231,grad_norm: 0.8175130218851946, iteration: 37382
loss: 1.0120073556900024,grad_norm: 0.9766568400955417, iteration: 37383
loss: 1.1107903718948364,grad_norm: 0.9999998392486036, iteration: 37384
loss: 1.044268250465393,grad_norm: 0.9999998222905433, iteration: 37385
loss: 1.011396050453186,grad_norm: 0.9999993978838945, iteration: 37386
loss: 0.9906548857688904,grad_norm: 0.8676133168929779, iteration: 37387
loss: 0.9651391506195068,grad_norm: 0.9832979068068335, iteration: 37388
loss: 0.9942403435707092,grad_norm: 0.94748875898928, iteration: 37389
loss: 1.018600583076477,grad_norm: 0.9618152904949466, iteration: 37390
loss: 1.0088449716567993,grad_norm: 0.9156000696230955, iteration: 37391
loss: 1.0378105640411377,grad_norm: 0.9999996125187861, iteration: 37392
loss: 1.0357797145843506,grad_norm: 0.9008280345086123, iteration: 37393
loss: 0.9955242872238159,grad_norm: 0.970885064583132, iteration: 37394
loss: 0.9883859753608704,grad_norm: 0.999999126185328, iteration: 37395
loss: 0.985307514667511,grad_norm: 0.8406897436519775, iteration: 37396
loss: 1.003991961479187,grad_norm: 0.9999992006445018, iteration: 37397
loss: 1.0138413906097412,grad_norm: 0.9999990960477544, iteration: 37398
loss: 1.0127780437469482,grad_norm: 0.9999990065099778, iteration: 37399
loss: 1.0227341651916504,grad_norm: 0.8951202287055603, iteration: 37400
loss: 0.9955894351005554,grad_norm: 0.9369692737397097, iteration: 37401
loss: 1.0168774127960205,grad_norm: 0.9999993954811607, iteration: 37402
loss: 1.0136862993240356,grad_norm: 0.9174286780293889, iteration: 37403
loss: 1.0090439319610596,grad_norm: 0.7857088871915306, iteration: 37404
loss: 1.0136650800704956,grad_norm: 0.9464704437958847, iteration: 37405
loss: 0.9848940968513489,grad_norm: 0.9099937750897644, iteration: 37406
loss: 0.9962461590766907,grad_norm: 0.9466142047471963, iteration: 37407
loss: 1.0029209852218628,grad_norm: 0.9999991358459731, iteration: 37408
loss: 1.0016905069351196,grad_norm: 0.999999229042068, iteration: 37409
loss: 1.0143845081329346,grad_norm: 0.7987264795576002, iteration: 37410
loss: 1.010178804397583,grad_norm: 0.9305449488795723, iteration: 37411
loss: 0.9811232686042786,grad_norm: 0.962144349317834, iteration: 37412
loss: 0.9850520491600037,grad_norm: 0.9999990222519632, iteration: 37413
loss: 0.9936160445213318,grad_norm: 0.9166078219934457, iteration: 37414
loss: 1.025237798690796,grad_norm: 0.9999990750114877, iteration: 37415
loss: 0.958935022354126,grad_norm: 0.9313649013966167, iteration: 37416
loss: 1.0362764596939087,grad_norm: 0.9999990415097871, iteration: 37417
loss: 0.9699690341949463,grad_norm: 0.8921278739395962, iteration: 37418
loss: 0.9764041304588318,grad_norm: 0.9999990021402119, iteration: 37419
loss: 0.9998860359191895,grad_norm: 0.9999994843843729, iteration: 37420
loss: 0.9675708413124084,grad_norm: 0.8403568406793962, iteration: 37421
loss: 1.0621765851974487,grad_norm: 0.999999019049509, iteration: 37422
loss: 1.0149173736572266,grad_norm: 0.9999990469682618, iteration: 37423
loss: 1.0292147397994995,grad_norm: 0.9452804982549988, iteration: 37424
loss: 0.9911214709281921,grad_norm: 0.999999205447369, iteration: 37425
loss: 0.9950780272483826,grad_norm: 0.7958024789750842, iteration: 37426
loss: 1.002685308456421,grad_norm: 0.9790801258615635, iteration: 37427
loss: 1.004662275314331,grad_norm: 0.9683197114504674, iteration: 37428
loss: 1.0037111043930054,grad_norm: 0.9191237568973949, iteration: 37429
loss: 1.0237787961959839,grad_norm: 0.8781062306137215, iteration: 37430
loss: 1.0220298767089844,grad_norm: 0.9999990503017329, iteration: 37431
loss: 1.014074683189392,grad_norm: 0.9999991014382976, iteration: 37432
loss: 0.9872115850448608,grad_norm: 0.8765050123601228, iteration: 37433
loss: 0.9657869338989258,grad_norm: 0.9999992041870843, iteration: 37434
loss: 0.988386332988739,grad_norm: 0.9432509278849293, iteration: 37435
loss: 1.0114766359329224,grad_norm: 0.83402425948094, iteration: 37436
loss: 1.0108579397201538,grad_norm: 0.9999989169719464, iteration: 37437
loss: 1.002015471458435,grad_norm: 0.9509672606293532, iteration: 37438
loss: 0.9903698563575745,grad_norm: 0.9306425349770959, iteration: 37439
loss: 1.0259106159210205,grad_norm: 0.9067633723542297, iteration: 37440
loss: 0.9943615198135376,grad_norm: 0.9999991260780462, iteration: 37441
loss: 1.0316025018692017,grad_norm: 0.8568837570853246, iteration: 37442
loss: 1.0522360801696777,grad_norm: 0.9999997960193299, iteration: 37443
loss: 0.9999797344207764,grad_norm: 0.9925563625263836, iteration: 37444
loss: 0.973275363445282,grad_norm: 0.9605472520173706, iteration: 37445
loss: 1.0005851984024048,grad_norm: 0.9999991309422511, iteration: 37446
loss: 1.0186128616333008,grad_norm: 0.9999989853925526, iteration: 37447
loss: 1.0331088304519653,grad_norm: 0.9229302795609735, iteration: 37448
loss: 1.005250334739685,grad_norm: 0.8908824035531583, iteration: 37449
loss: 1.0203605890274048,grad_norm: 0.9585450285920986, iteration: 37450
loss: 0.9660869240760803,grad_norm: 0.9999991953694687, iteration: 37451
loss: 1.040097951889038,grad_norm: 0.9999989931789773, iteration: 37452
loss: 1.0161793231964111,grad_norm: 0.9895519555939456, iteration: 37453
loss: 1.01198148727417,grad_norm: 0.9999995675367588, iteration: 37454
loss: 1.0456231832504272,grad_norm: 0.9999996859326993, iteration: 37455
loss: 1.0054913759231567,grad_norm: 0.8717576891854356, iteration: 37456
loss: 0.9818123579025269,grad_norm: 0.9999991371984324, iteration: 37457
loss: 0.9930565357208252,grad_norm: 0.8646018555931013, iteration: 37458
loss: 1.026288628578186,grad_norm: 0.999999516362378, iteration: 37459
loss: 1.0348377227783203,grad_norm: 0.9999994849831644, iteration: 37460
loss: 0.997528612613678,grad_norm: 0.9686997198509273, iteration: 37461
loss: 0.9808605313301086,grad_norm: 0.9999990899698411, iteration: 37462
loss: 1.0085638761520386,grad_norm: 0.8179405176418607, iteration: 37463
loss: 1.010136604309082,grad_norm: 0.9828485375855329, iteration: 37464
loss: 0.9839186668395996,grad_norm: 0.99999902562871, iteration: 37465
loss: 1.0092499256134033,grad_norm: 0.9252112584062665, iteration: 37466
loss: 0.9880123734474182,grad_norm: 0.9045229515206696, iteration: 37467
loss: 0.9953687191009521,grad_norm: 0.9520984599913883, iteration: 37468
loss: 0.9915145039558411,grad_norm: 0.9999995115742438, iteration: 37469
loss: 1.0144703388214111,grad_norm: 0.989626933087598, iteration: 37470
loss: 0.9920459985733032,grad_norm: 0.9999990407306886, iteration: 37471
loss: 1.0004751682281494,grad_norm: 0.9099977824765738, iteration: 37472
loss: 1.0136382579803467,grad_norm: 0.9999991030271922, iteration: 37473
loss: 1.0260757207870483,grad_norm: 0.9825843405364686, iteration: 37474
loss: 0.9694163203239441,grad_norm: 0.9259056165101073, iteration: 37475
loss: 0.9930612444877625,grad_norm: 0.9999993804663183, iteration: 37476
loss: 0.990439772605896,grad_norm: 0.8309983227507616, iteration: 37477
loss: 1.0342005491256714,grad_norm: 0.9308334384221962, iteration: 37478
loss: 1.0371425151824951,grad_norm: 0.9999990271777864, iteration: 37479
loss: 1.0037097930908203,grad_norm: 0.9999991274955813, iteration: 37480
loss: 1.0134180784225464,grad_norm: 0.9999990666967082, iteration: 37481
loss: 0.9898461699485779,grad_norm: 0.9702129270713478, iteration: 37482
loss: 1.0177377462387085,grad_norm: 0.8784852851755862, iteration: 37483
loss: 1.0560227632522583,grad_norm: 0.9999996203654047, iteration: 37484
loss: 1.0290759801864624,grad_norm: 0.999999611352843, iteration: 37485
loss: 0.9760885238647461,grad_norm: 0.9999990021150564, iteration: 37486
loss: 1.0226991176605225,grad_norm: 0.8879130632122607, iteration: 37487
loss: 0.9777633547782898,grad_norm: 0.9999991930306884, iteration: 37488
loss: 0.9957926273345947,grad_norm: 0.9999997231620148, iteration: 37489
loss: 0.9839597940444946,grad_norm: 0.9999992361949778, iteration: 37490
loss: 0.9962614178657532,grad_norm: 0.9999990984866615, iteration: 37491
loss: 1.0275964736938477,grad_norm: 0.9999991500451132, iteration: 37492
loss: 0.9940652847290039,grad_norm: 0.9628141939819509, iteration: 37493
loss: 1.0887954235076904,grad_norm: 0.9999995034751095, iteration: 37494
loss: 1.012529730796814,grad_norm: 0.9999990739275486, iteration: 37495
loss: 1.0311223268508911,grad_norm: 0.9716172493598159, iteration: 37496
loss: 1.0039671659469604,grad_norm: 0.8021856163988864, iteration: 37497
loss: 0.9702541828155518,grad_norm: 0.861396524360804, iteration: 37498
loss: 1.0991398096084595,grad_norm: 0.999999937360665, iteration: 37499
loss: 1.0044242143630981,grad_norm: 0.9999990819651113, iteration: 37500
loss: 1.0219827890396118,grad_norm: 0.8577525924263975, iteration: 37501
loss: 0.9928536415100098,grad_norm: 0.999999026646349, iteration: 37502
loss: 1.0544993877410889,grad_norm: 0.9999997028843938, iteration: 37503
loss: 0.9956994652748108,grad_norm: 0.9999990859871067, iteration: 37504
loss: 1.013595461845398,grad_norm: 0.9999991887349629, iteration: 37505
loss: 1.0303559303283691,grad_norm: 0.9999997108178902, iteration: 37506
loss: 1.0356653928756714,grad_norm: 0.999999192962494, iteration: 37507
loss: 0.9939255118370056,grad_norm: 0.9999994833515293, iteration: 37508
loss: 1.0204776525497437,grad_norm: 0.9999990703336122, iteration: 37509
loss: 0.9714978933334351,grad_norm: 0.9999991843975926, iteration: 37510
loss: 1.0121921300888062,grad_norm: 0.971616077748816, iteration: 37511
loss: 1.038071632385254,grad_norm: 0.9999992993603577, iteration: 37512
loss: 1.0323576927185059,grad_norm: 0.8905832993589763, iteration: 37513
loss: 1.0340361595153809,grad_norm: 0.9999995804098922, iteration: 37514
loss: 0.9651293754577637,grad_norm: 0.8762499293604021, iteration: 37515
loss: 0.9804446697235107,grad_norm: 0.7882462598129508, iteration: 37516
loss: 1.0057512521743774,grad_norm: 0.9234443373010496, iteration: 37517
loss: 0.9847844243049622,grad_norm: 0.9042204173680121, iteration: 37518
loss: 1.0034751892089844,grad_norm: 0.8898135547200715, iteration: 37519
loss: 1.030502438545227,grad_norm: 0.8659292616018, iteration: 37520
loss: 1.0426589250564575,grad_norm: 0.9999990666327425, iteration: 37521
loss: 1.0369277000427246,grad_norm: 0.8831163546659867, iteration: 37522
loss: 0.9966663718223572,grad_norm: 0.8423808367835584, iteration: 37523
loss: 0.9998437762260437,grad_norm: 0.9999992675495389, iteration: 37524
loss: 0.981633186340332,grad_norm: 0.9999998386249037, iteration: 37525
loss: 1.0243804454803467,grad_norm: 0.9999993667603987, iteration: 37526
loss: 0.9977113008499146,grad_norm: 0.9999990223012093, iteration: 37527
loss: 1.0252857208251953,grad_norm: 0.9991738609717948, iteration: 37528
loss: 0.9950564503669739,grad_norm: 0.8119607410767931, iteration: 37529
loss: 0.9800307750701904,grad_norm: 0.9999991658284201, iteration: 37530
loss: 1.0145933628082275,grad_norm: 0.9999991178956126, iteration: 37531
loss: 1.0693060159683228,grad_norm: 0.9999992099430137, iteration: 37532
loss: 0.9922612905502319,grad_norm: 0.9999991327814948, iteration: 37533
loss: 1.018292784690857,grad_norm: 0.9999991196275803, iteration: 37534
loss: 1.0379480123519897,grad_norm: 0.9999993820243769, iteration: 37535
loss: 1.0147048234939575,grad_norm: 0.9575343763693722, iteration: 37536
loss: 0.9707202911376953,grad_norm: 0.971379889006204, iteration: 37537
loss: 1.0197714567184448,grad_norm: 0.9999992600704366, iteration: 37538
loss: 0.9980316758155823,grad_norm: 0.8367822187253727, iteration: 37539
loss: 1.0521243810653687,grad_norm: 0.9999992520088523, iteration: 37540
loss: 0.986635684967041,grad_norm: 0.999999149474954, iteration: 37541
loss: 0.9992717504501343,grad_norm: 0.989185113003301, iteration: 37542
loss: 0.9820504784584045,grad_norm: 0.953133281473635, iteration: 37543
loss: 0.9971230030059814,grad_norm: 0.9999992461440181, iteration: 37544
loss: 0.9866881966590881,grad_norm: 0.934131724619544, iteration: 37545
loss: 1.01837956905365,grad_norm: 0.7269049197573668, iteration: 37546
loss: 1.023229956626892,grad_norm: 0.999999157299088, iteration: 37547
loss: 0.9950233101844788,grad_norm: 0.872114984228743, iteration: 37548
loss: 1.0103992223739624,grad_norm: 0.9103140369835118, iteration: 37549
loss: 0.9879392385482788,grad_norm: 0.999999092750935, iteration: 37550
loss: 0.9953967928886414,grad_norm: 0.9999991435829909, iteration: 37551
loss: 1.0265299081802368,grad_norm: 0.8818159191238333, iteration: 37552
loss: 1.0030683279037476,grad_norm: 0.9698147414317446, iteration: 37553
loss: 0.9859613180160522,grad_norm: 0.9773986567099374, iteration: 37554
loss: 0.9935881495475769,grad_norm: 0.9883801738740584, iteration: 37555
loss: 0.9682992696762085,grad_norm: 0.9598071668120777, iteration: 37556
loss: 1.022175908088684,grad_norm: 0.9078726266680044, iteration: 37557
loss: 1.03737211227417,grad_norm: 0.8034583481121085, iteration: 37558
loss: 1.0088123083114624,grad_norm: 0.9999993944058636, iteration: 37559
loss: 1.0066585540771484,grad_norm: 0.9999992772273242, iteration: 37560
loss: 1.0195868015289307,grad_norm: 0.9526280005661811, iteration: 37561
loss: 0.991049587726593,grad_norm: 0.9999990721254727, iteration: 37562
loss: 0.9702301025390625,grad_norm: 0.9999990405939819, iteration: 37563
loss: 0.9854304194450378,grad_norm: 0.9137927644357118, iteration: 37564
loss: 1.031894564628601,grad_norm: 0.9999990999558872, iteration: 37565
loss: 1.0347651243209839,grad_norm: 0.9999996530244006, iteration: 37566
loss: 0.9802485108375549,grad_norm: 0.9999990779320644, iteration: 37567
loss: 1.0406131744384766,grad_norm: 0.9495864257007143, iteration: 37568
loss: 0.9998449683189392,grad_norm: 0.8927818484970996, iteration: 37569
loss: 0.9944065809249878,grad_norm: 0.9264966480680721, iteration: 37570
loss: 0.9618303775787354,grad_norm: 0.8294859097768428, iteration: 37571
loss: 1.010117769241333,grad_norm: 0.8427893328534578, iteration: 37572
loss: 1.008013367652893,grad_norm: 0.919399528393453, iteration: 37573
loss: 1.0318362712860107,grad_norm: 0.9676858799647282, iteration: 37574
loss: 0.9948911666870117,grad_norm: 0.9999994638361136, iteration: 37575
loss: 0.9795008301734924,grad_norm: 0.9345109241633903, iteration: 37576
loss: 0.99648517370224,grad_norm: 0.8877356242957244, iteration: 37577
loss: 1.0048608779907227,grad_norm: 0.9999991692998778, iteration: 37578
loss: 1.0350502729415894,grad_norm: 0.9999991134959103, iteration: 37579
loss: 1.0094799995422363,grad_norm: 0.8547644718380428, iteration: 37580
loss: 1.01416015625,grad_norm: 0.9999991081995123, iteration: 37581
loss: 1.0018398761749268,grad_norm: 0.9327714169529291, iteration: 37582
loss: 1.0093222856521606,grad_norm: 0.9590931884558421, iteration: 37583
loss: 1.0522242784500122,grad_norm: 0.999999552934337, iteration: 37584
loss: 1.0261404514312744,grad_norm: 0.9418726377044587, iteration: 37585
loss: 0.9402210712432861,grad_norm: 0.8828676313493151, iteration: 37586
loss: 1.0088621377944946,grad_norm: 0.9469137954913432, iteration: 37587
loss: 0.966335654258728,grad_norm: 0.999999088714911, iteration: 37588
loss: 0.9746410250663757,grad_norm: 0.9815796601513084, iteration: 37589
loss: 0.9913886189460754,grad_norm: 0.8777702979755485, iteration: 37590
loss: 1.0056893825531006,grad_norm: 0.915511809550267, iteration: 37591
loss: 1.0097098350524902,grad_norm: 0.9999991965803452, iteration: 37592
loss: 1.0025190114974976,grad_norm: 0.9930539481114041, iteration: 37593
loss: 1.002852439880371,grad_norm: 0.9999990586968696, iteration: 37594
loss: 0.9813252687454224,grad_norm: 0.938680583109648, iteration: 37595
loss: 1.014707088470459,grad_norm: 0.9999990663620981, iteration: 37596
loss: 1.0225317478179932,grad_norm: 0.8660535033178453, iteration: 37597
loss: 1.0327575206756592,grad_norm: 0.9999991809950175, iteration: 37598
loss: 1.0016909837722778,grad_norm: 0.9999991105925847, iteration: 37599
loss: 0.9943544268608093,grad_norm: 0.9999990768901059, iteration: 37600
loss: 0.9974711537361145,grad_norm: 0.9999991541601957, iteration: 37601
loss: 0.99177485704422,grad_norm: 0.9999991634372404, iteration: 37602
loss: 1.0157341957092285,grad_norm: 0.9999992490645147, iteration: 37603
loss: 1.015706181526184,grad_norm: 0.9999991169014601, iteration: 37604
loss: 0.9814441204071045,grad_norm: 0.8932221143788587, iteration: 37605
loss: 1.0018917322158813,grad_norm: 0.867167581375638, iteration: 37606
loss: 0.9768809676170349,grad_norm: 0.998616309610055, iteration: 37607
loss: 1.0416241884231567,grad_norm: 0.9078036633954834, iteration: 37608
loss: 1.032873511314392,grad_norm: 0.9999989525036699, iteration: 37609
loss: 1.012626051902771,grad_norm: 0.7393771024066648, iteration: 37610
loss: 0.9823030233383179,grad_norm: 0.9652406882366646, iteration: 37611
loss: 0.9702978730201721,grad_norm: 0.9768487948579386, iteration: 37612
loss: 0.9830447435379028,grad_norm: 0.9923008554699043, iteration: 37613
loss: 1.0031673908233643,grad_norm: 0.9557165367566309, iteration: 37614
loss: 0.9934355616569519,grad_norm: 0.976170642919208, iteration: 37615
loss: 0.9965909719467163,grad_norm: 0.99999914778675, iteration: 37616
loss: 1.023475170135498,grad_norm: 0.9711094623639767, iteration: 37617
loss: 1.0119729042053223,grad_norm: 0.9999991565649564, iteration: 37618
loss: 1.018495798110962,grad_norm: 0.9244010421335611, iteration: 37619
loss: 1.0118101835250854,grad_norm: 0.9999989100975161, iteration: 37620
loss: 0.9902125597000122,grad_norm: 0.9999991135213234, iteration: 37621
loss: 1.0051472187042236,grad_norm: 0.8744467634942046, iteration: 37622
loss: 1.0343897342681885,grad_norm: 0.9999991559070825, iteration: 37623
loss: 1.017499327659607,grad_norm: 0.9402690561421949, iteration: 37624
loss: 1.0691972970962524,grad_norm: 0.9999997556249981, iteration: 37625
loss: 0.9802592396736145,grad_norm: 0.8468744866558032, iteration: 37626
loss: 1.0774147510528564,grad_norm: 0.9999995950474654, iteration: 37627
loss: 0.9811965823173523,grad_norm: 0.8585948372871044, iteration: 37628
loss: 0.9699429273605347,grad_norm: 0.9636169770750069, iteration: 37629
loss: 1.004457950592041,grad_norm: 0.8912419909852664, iteration: 37630
loss: 1.0167521238327026,grad_norm: 0.9357257747764794, iteration: 37631
loss: 1.0228524208068848,grad_norm: 0.9999990159173002, iteration: 37632
loss: 1.0259358882904053,grad_norm: 0.9512382383208771, iteration: 37633
loss: 0.9744786024093628,grad_norm: 0.9287858465091875, iteration: 37634
loss: 0.9901549220085144,grad_norm: 0.8887836787309491, iteration: 37635
loss: 1.0004427433013916,grad_norm: 0.9793284585911705, iteration: 37636
loss: 0.9957285523414612,grad_norm: 0.8260239497082263, iteration: 37637
loss: 0.9854453802108765,grad_norm: 0.9069357430344316, iteration: 37638
loss: 0.9793546795845032,grad_norm: 0.9999990910076957, iteration: 37639
loss: 0.99849534034729,grad_norm: 0.9999992324177653, iteration: 37640
loss: 0.9883963465690613,grad_norm: 0.8882550417787395, iteration: 37641
loss: 1.0297273397445679,grad_norm: 0.9529548464953113, iteration: 37642
loss: 1.019905686378479,grad_norm: 0.9999988686947177, iteration: 37643
loss: 1.0172220468521118,grad_norm: 0.901870100134354, iteration: 37644
loss: 1.116039514541626,grad_norm: 0.9999998286704691, iteration: 37645
loss: 1.0478758811950684,grad_norm: 0.999999191862087, iteration: 37646
loss: 1.0039775371551514,grad_norm: 0.9999991303259449, iteration: 37647
loss: 1.060541033744812,grad_norm: 0.9999991277509102, iteration: 37648
loss: 1.0180318355560303,grad_norm: 0.9999993935346593, iteration: 37649
loss: 0.9880235195159912,grad_norm: 0.9999991078023327, iteration: 37650
loss: 1.0645259618759155,grad_norm: 0.9999992581301996, iteration: 37651
loss: 1.0437599420547485,grad_norm: 0.99727333404677, iteration: 37652
loss: 0.9728990197181702,grad_norm: 0.9999991747445443, iteration: 37653
loss: 1.026170015335083,grad_norm: 0.9999991543319704, iteration: 37654
loss: 1.0246407985687256,grad_norm: 0.7917336600063988, iteration: 37655
loss: 0.9685453772544861,grad_norm: 0.9999990498634359, iteration: 37656
loss: 1.049005389213562,grad_norm: 0.9199406382393521, iteration: 37657
loss: 1.0273157358169556,grad_norm: 0.9908862161191708, iteration: 37658
loss: 1.0309544801712036,grad_norm: 0.9999991395769798, iteration: 37659
loss: 0.9637426137924194,grad_norm: 0.9999995149164589, iteration: 37660
loss: 1.0145211219787598,grad_norm: 0.9999991385343093, iteration: 37661
loss: 0.9861853718757629,grad_norm: 0.999999222939171, iteration: 37662
loss: 1.0301464796066284,grad_norm: 0.999998963855812, iteration: 37663
loss: 1.0506877899169922,grad_norm: 0.9120181449384878, iteration: 37664
loss: 0.9953136444091797,grad_norm: 0.9999991105108983, iteration: 37665
loss: 1.0159459114074707,grad_norm: 0.9999990442921408, iteration: 37666
loss: 1.0008450746536255,grad_norm: 0.9999992504942886, iteration: 37667
loss: 1.000213623046875,grad_norm: 0.9999991946589809, iteration: 37668
loss: 1.04230535030365,grad_norm: 0.8733634206201975, iteration: 37669
loss: 1.0091537237167358,grad_norm: 0.9490092629072611, iteration: 37670
loss: 0.9820932745933533,grad_norm: 0.9999996078368345, iteration: 37671
loss: 0.9925370812416077,grad_norm: 0.9999991339255073, iteration: 37672
loss: 1.043137550354004,grad_norm: 0.9999989927052519, iteration: 37673
loss: 1.002410650253296,grad_norm: 0.9999990674624136, iteration: 37674
loss: 1.0370815992355347,grad_norm: 0.9999993545322114, iteration: 37675
loss: 1.0111842155456543,grad_norm: 0.9999991413857879, iteration: 37676
loss: 0.9913729429244995,grad_norm: 0.9999992242482466, iteration: 37677
loss: 0.987954318523407,grad_norm: 0.9999991208706844, iteration: 37678
loss: 0.9850447773933411,grad_norm: 0.9999990245075494, iteration: 37679
loss: 0.9865462183952332,grad_norm: 0.9532226241229053, iteration: 37680
loss: 1.0190315246582031,grad_norm: 0.99999938375936, iteration: 37681
loss: 1.0403552055358887,grad_norm: 0.9639608355291895, iteration: 37682
loss: 0.9927560687065125,grad_norm: 0.9999990400175313, iteration: 37683
loss: 1.0116888284683228,grad_norm: 0.9999991946856392, iteration: 37684
loss: 1.0045114755630493,grad_norm: 0.9999991617586897, iteration: 37685
loss: 0.9798938035964966,grad_norm: 0.9999996426978204, iteration: 37686
loss: 1.012839674949646,grad_norm: 0.9999990082115798, iteration: 37687
loss: 1.012596607208252,grad_norm: 0.9999991892936411, iteration: 37688
loss: 0.9923148155212402,grad_norm: 0.9284149499647146, iteration: 37689
loss: 0.986158013343811,grad_norm: 0.9999992636997614, iteration: 37690
loss: 0.9866669178009033,grad_norm: 0.9999990858225116, iteration: 37691
loss: 1.012476921081543,grad_norm: 0.9667387810164877, iteration: 37692
loss: 1.0018157958984375,grad_norm: 0.9716611175471637, iteration: 37693
loss: 0.9884303212165833,grad_norm: 0.9999992988363795, iteration: 37694
loss: 0.9831507802009583,grad_norm: 0.9999992519019667, iteration: 37695
loss: 0.9926037192344666,grad_norm: 0.9552995417943436, iteration: 37696
loss: 0.9944008588790894,grad_norm: 0.9999993267685774, iteration: 37697
loss: 1.0265616178512573,grad_norm: 0.9999990432630148, iteration: 37698
loss: 1.055471658706665,grad_norm: 0.9999994350774025, iteration: 37699
loss: 0.9844222068786621,grad_norm: 0.9999991520404165, iteration: 37700
loss: 0.9598073959350586,grad_norm: 0.9999991511416645, iteration: 37701
loss: 1.0260014533996582,grad_norm: 0.938704957883507, iteration: 37702
loss: 1.0300798416137695,grad_norm: 0.9253936934965539, iteration: 37703
loss: 0.9944151043891907,grad_norm: 0.8895978304844102, iteration: 37704
loss: 0.9889392256736755,grad_norm: 0.9999991877910442, iteration: 37705
loss: 0.9900830388069153,grad_norm: 0.9791825007658794, iteration: 37706
loss: 1.0316216945648193,grad_norm: 0.9910438120833529, iteration: 37707
loss: 1.000617504119873,grad_norm: 0.9544845364655083, iteration: 37708
loss: 1.0336815118789673,grad_norm: 0.9999991100123944, iteration: 37709
loss: 1.0747660398483276,grad_norm: 0.9999991810191793, iteration: 37710
loss: 0.9896335601806641,grad_norm: 0.9891645228653378, iteration: 37711
loss: 0.977096676826477,grad_norm: 0.997227629008376, iteration: 37712
loss: 1.01658034324646,grad_norm: 0.9999991608886913, iteration: 37713
loss: 1.0010429620742798,grad_norm: 0.999999145192695, iteration: 37714
loss: 0.9657784104347229,grad_norm: 0.9292122420413031, iteration: 37715
loss: 1.0382496118545532,grad_norm: 0.7839853444134962, iteration: 37716
loss: 0.9906327128410339,grad_norm: 0.9999991202010144, iteration: 37717
loss: 0.9648218154907227,grad_norm: 0.8662370646304451, iteration: 37718
loss: 1.0105458498001099,grad_norm: 0.8045128773698693, iteration: 37719
loss: 0.9754739999771118,grad_norm: 0.9129303872810469, iteration: 37720
loss: 1.005527377128601,grad_norm: 0.9999991937179589, iteration: 37721
loss: 1.0510934591293335,grad_norm: 0.9999991876297163, iteration: 37722
loss: 0.9948316216468811,grad_norm: 0.7967422193486259, iteration: 37723
loss: 1.0132182836532593,grad_norm: 0.8567697683970631, iteration: 37724
loss: 1.032513976097107,grad_norm: 0.9999992780810327, iteration: 37725
loss: 1.0254143476486206,grad_norm: 0.9999992979592859, iteration: 37726
loss: 1.0167651176452637,grad_norm: 0.9999990182594996, iteration: 37727
loss: 0.9966894388198853,grad_norm: 0.9909407519175221, iteration: 37728
loss: 1.0365158319473267,grad_norm: 0.9999991125988809, iteration: 37729
loss: 1.0004807710647583,grad_norm: 0.9929546951220934, iteration: 37730
loss: 1.0094609260559082,grad_norm: 0.8667134576648656, iteration: 37731
loss: 1.0143845081329346,grad_norm: 0.8051638834677052, iteration: 37732
loss: 0.9931182861328125,grad_norm: 0.8443252620813098, iteration: 37733
loss: 0.9888365268707275,grad_norm: 0.9999991663478651, iteration: 37734
loss: 1.0401283502578735,grad_norm: 0.8585625119759424, iteration: 37735
loss: 1.0448787212371826,grad_norm: 0.9999990411411815, iteration: 37736
loss: 1.0122489929199219,grad_norm: 0.9999998586348182, iteration: 37737
loss: 1.0129085779190063,grad_norm: 0.8915869645315486, iteration: 37738
loss: 0.9842529296875,grad_norm: 0.9999989937060758, iteration: 37739
loss: 1.0083281993865967,grad_norm: 0.9999991774904773, iteration: 37740
loss: 0.992689847946167,grad_norm: 0.913075519843362, iteration: 37741
loss: 0.9964377284049988,grad_norm: 0.7670448660108765, iteration: 37742
loss: 1.004620909690857,grad_norm: 0.9999991410341621, iteration: 37743
loss: 1.0051729679107666,grad_norm: 0.9999992189651141, iteration: 37744
loss: 1.00121009349823,grad_norm: 0.999999140034729, iteration: 37745
loss: 1.0264896154403687,grad_norm: 0.9999995536669156, iteration: 37746
loss: 0.9981805682182312,grad_norm: 0.999999146331331, iteration: 37747
loss: 1.0208879709243774,grad_norm: 0.9495161916667905, iteration: 37748
loss: 0.9952073097229004,grad_norm: 0.8813199909871733, iteration: 37749
loss: 1.0491697788238525,grad_norm: 0.9999992720346792, iteration: 37750
loss: 0.9956644773483276,grad_norm: 0.8520837441897028, iteration: 37751
loss: 1.0137038230895996,grad_norm: 0.9999991072319979, iteration: 37752
loss: 1.0329718589782715,grad_norm: 0.9403538824779061, iteration: 37753
loss: 0.9928053021430969,grad_norm: 0.9999990114651542, iteration: 37754
loss: 0.9953931570053101,grad_norm: 0.9999992465570698, iteration: 37755
loss: 1.0131267309188843,grad_norm: 0.8737146868527028, iteration: 37756
loss: 1.0026346445083618,grad_norm: 0.9311780534898272, iteration: 37757
loss: 0.9939846396446228,grad_norm: 0.8627265524236236, iteration: 37758
loss: 1.0095869302749634,grad_norm: 0.9999989657596621, iteration: 37759
loss: 1.026543140411377,grad_norm: 0.9919290469953624, iteration: 37760
loss: 1.0181862115859985,grad_norm: 0.9999992607566573, iteration: 37761
loss: 0.9737691283226013,grad_norm: 0.8761158441826287, iteration: 37762
loss: 1.0572925806045532,grad_norm: 0.9999993827446609, iteration: 37763
loss: 0.9878894686698914,grad_norm: 0.9253156831492011, iteration: 37764
loss: 0.9839656352996826,grad_norm: 0.9999991140018707, iteration: 37765
loss: 0.9680923819541931,grad_norm: 0.9999990731519067, iteration: 37766
loss: 1.0149681568145752,grad_norm: 0.9999992308547169, iteration: 37767
loss: 1.0301513671875,grad_norm: 0.999999719637281, iteration: 37768
loss: 0.9881274104118347,grad_norm: 0.9999991202068204, iteration: 37769
loss: 1.0019662380218506,grad_norm: 0.9999990355443902, iteration: 37770
loss: 1.0255812406539917,grad_norm: 0.9999991630377294, iteration: 37771
loss: 0.9991529583930969,grad_norm: 0.9541018067145413, iteration: 37772
loss: 1.0152422189712524,grad_norm: 0.9999990995850574, iteration: 37773
loss: 0.9833863973617554,grad_norm: 0.8824706843512334, iteration: 37774
loss: 1.03057861328125,grad_norm: 0.9511352652289708, iteration: 37775
loss: 0.9770185351371765,grad_norm: 0.999999287540057, iteration: 37776
loss: 1.04804265499115,grad_norm: 0.9999993137020272, iteration: 37777
loss: 0.9872702360153198,grad_norm: 0.9999991330426747, iteration: 37778
loss: 1.034515619277954,grad_norm: 0.9983786868599706, iteration: 37779
loss: 1.0483543872833252,grad_norm: 0.9999990891952942, iteration: 37780
loss: 1.0152156352996826,grad_norm: 0.9999995076284715, iteration: 37781
loss: 1.0114343166351318,grad_norm: 0.9999991549663759, iteration: 37782
loss: 1.0074920654296875,grad_norm: 0.9999989970028355, iteration: 37783
loss: 1.056402564048767,grad_norm: 0.999999362808286, iteration: 37784
loss: 1.0101855993270874,grad_norm: 0.9999992572260104, iteration: 37785
loss: 1.0421044826507568,grad_norm: 0.9321565052355633, iteration: 37786
loss: 1.029149055480957,grad_norm: 0.9999993181056142, iteration: 37787
loss: 0.993166983127594,grad_norm: 0.920201256469161, iteration: 37788
loss: 1.0282691717147827,grad_norm: 0.9999991590753521, iteration: 37789
loss: 0.9771286249160767,grad_norm: 0.9641757776191081, iteration: 37790
loss: 0.9718644022941589,grad_norm: 0.9382019439471037, iteration: 37791
loss: 0.9955824017524719,grad_norm: 0.942363512297886, iteration: 37792
loss: 0.9726902842521667,grad_norm: 0.9796099267712106, iteration: 37793
loss: 1.019325613975525,grad_norm: 0.9999991592462875, iteration: 37794
loss: 1.0305352210998535,grad_norm: 0.9999991195486275, iteration: 37795
loss: 1.0122967958450317,grad_norm: 0.9473760645659436, iteration: 37796
loss: 0.9856728911399841,grad_norm: 0.8926911582375693, iteration: 37797
loss: 1.0031481981277466,grad_norm: 0.8561782714010838, iteration: 37798
loss: 0.9929856657981873,grad_norm: 0.9999990472825395, iteration: 37799
loss: 1.0362114906311035,grad_norm: 0.9999996904943528, iteration: 37800
loss: 1.0293021202087402,grad_norm: 0.9999990005182438, iteration: 37801
loss: 0.9856560230255127,grad_norm: 0.9765065726397604, iteration: 37802
loss: 0.9884007573127747,grad_norm: 0.938211156869791, iteration: 37803
loss: 1.0258986949920654,grad_norm: 0.9999991450125785, iteration: 37804
loss: 1.0001044273376465,grad_norm: 0.9999990133624018, iteration: 37805
loss: 0.9965219497680664,grad_norm: 0.9743173648593912, iteration: 37806
loss: 1.0131852626800537,grad_norm: 0.9495441461809191, iteration: 37807
loss: 1.0235651731491089,grad_norm: 0.9999991924829508, iteration: 37808
loss: 1.0102208852767944,grad_norm: 0.9999991484217865, iteration: 37809
loss: 0.9925643801689148,grad_norm: 0.8096879696874846, iteration: 37810
loss: 1.0309526920318604,grad_norm: 0.9907441550152379, iteration: 37811
loss: 1.040264368057251,grad_norm: 0.9378033909912558, iteration: 37812
loss: 0.9650309681892395,grad_norm: 0.81817693256033, iteration: 37813
loss: 1.0328336954116821,grad_norm: 0.999999310819498, iteration: 37814
loss: 1.022469401359558,grad_norm: 0.9999991627433307, iteration: 37815
loss: 0.9925184845924377,grad_norm: 0.9008522677115606, iteration: 37816
loss: 1.012778878211975,grad_norm: 0.8784187175776887, iteration: 37817
loss: 0.997187614440918,grad_norm: 0.9978042339180505, iteration: 37818
loss: 1.016059398651123,grad_norm: 0.8737358246874389, iteration: 37819
loss: 0.9927666187286377,grad_norm: 0.9999995058348534, iteration: 37820
loss: 0.9601140022277832,grad_norm: 0.9999991937330569, iteration: 37821
loss: 1.0207487344741821,grad_norm: 0.9999994481525136, iteration: 37822
loss: 1.0124191045761108,grad_norm: 0.9999991267305915, iteration: 37823
loss: 1.001184105873108,grad_norm: 0.8459847727696531, iteration: 37824
loss: 1.0044492483139038,grad_norm: 0.9999992773845048, iteration: 37825
loss: 1.0183354616165161,grad_norm: 0.7653323801897992, iteration: 37826
loss: 0.9784453511238098,grad_norm: 0.999998997706836, iteration: 37827
loss: 1.0289666652679443,grad_norm: 0.8582669184083318, iteration: 37828
loss: 0.9556156992912292,grad_norm: 0.9999990443649338, iteration: 37829
loss: 1.033428430557251,grad_norm: 0.9654890342117931, iteration: 37830
loss: 1.0256571769714355,grad_norm: 0.9999991594923642, iteration: 37831
loss: 0.9929057359695435,grad_norm: 0.9999990800626207, iteration: 37832
loss: 1.0086052417755127,grad_norm: 0.9999994166558316, iteration: 37833
loss: 1.0033032894134521,grad_norm: 0.8975391480536997, iteration: 37834
loss: 1.0325497388839722,grad_norm: 0.9433450073642287, iteration: 37835
loss: 1.0086113214492798,grad_norm: 0.9999992745919276, iteration: 37836
loss: 1.0064706802368164,grad_norm: 0.8548110619982052, iteration: 37837
loss: 1.0303641557693481,grad_norm: 0.9974338208583657, iteration: 37838
loss: 1.0036600828170776,grad_norm: 0.9490460538170331, iteration: 37839
loss: 1.034485936164856,grad_norm: 0.8505013765424253, iteration: 37840
loss: 1.01302170753479,grad_norm: 0.9999990739022586, iteration: 37841
loss: 1.0037122964859009,grad_norm: 0.9999997387519172, iteration: 37842
loss: 0.9730523228645325,grad_norm: 0.8947204161152298, iteration: 37843
loss: 0.9883173108100891,grad_norm: 0.9999992352286816, iteration: 37844
loss: 0.9908525943756104,grad_norm: 0.9903400170564771, iteration: 37845
loss: 1.010077953338623,grad_norm: 0.9128727561347135, iteration: 37846
loss: 1.0033212900161743,grad_norm: 0.866323979337277, iteration: 37847
loss: 0.9971031546592712,grad_norm: 0.8696205588688525, iteration: 37848
loss: 0.9750559329986572,grad_norm: 0.907792740694459, iteration: 37849
loss: 1.0449706315994263,grad_norm: 0.8887562109746379, iteration: 37850
loss: 0.9999935626983643,grad_norm: 0.9999991134828431, iteration: 37851
loss: 0.9825775623321533,grad_norm: 0.9187363603916855, iteration: 37852
loss: 1.0043609142303467,grad_norm: 0.9999991706165584, iteration: 37853
loss: 1.0138123035430908,grad_norm: 0.942305656466425, iteration: 37854
loss: 0.9596190452575684,grad_norm: 0.9999991248267962, iteration: 37855
loss: 0.993194043636322,grad_norm: 0.9999991772425989, iteration: 37856
loss: 0.9878417253494263,grad_norm: 0.9415278369648858, iteration: 37857
loss: 0.9918941855430603,grad_norm: 0.8895056986442929, iteration: 37858
loss: 0.9932705760002136,grad_norm: 0.80234846764212, iteration: 37859
loss: 0.9864038228988647,grad_norm: 0.8808545393644494, iteration: 37860
loss: 1.0005794763565063,grad_norm: 0.8485231824493152, iteration: 37861
loss: 1.0045720338821411,grad_norm: 0.9999991587717662, iteration: 37862
loss: 1.0146830081939697,grad_norm: 0.9999991992737557, iteration: 37863
loss: 0.989842414855957,grad_norm: 0.9940872800097993, iteration: 37864
loss: 0.999122679233551,grad_norm: 0.901045427353025, iteration: 37865
loss: 1.0585241317749023,grad_norm: 0.999999198959407, iteration: 37866
loss: 1.0029165744781494,grad_norm: 0.9999990694058486, iteration: 37867
loss: 1.013524055480957,grad_norm: 0.9999991340116294, iteration: 37868
loss: 1.0299572944641113,grad_norm: 0.9999991769846219, iteration: 37869
loss: 1.0121971368789673,grad_norm: 0.9528314132501859, iteration: 37870
loss: 0.9948632717132568,grad_norm: 0.9879518081049121, iteration: 37871
loss: 0.9767840504646301,grad_norm: 0.986376650413162, iteration: 37872
loss: 1.003115177154541,grad_norm: 0.9999990717169124, iteration: 37873
loss: 0.9901923537254333,grad_norm: 0.9999991070707686, iteration: 37874
loss: 0.9666165709495544,grad_norm: 0.9915643466134592, iteration: 37875
loss: 1.0265350341796875,grad_norm: 0.9999989943983298, iteration: 37876
loss: 1.0126235485076904,grad_norm: 0.9682403015052826, iteration: 37877
loss: 1.0330870151519775,grad_norm: 0.9289904127104446, iteration: 37878
loss: 1.0100594758987427,grad_norm: 0.9393208527147894, iteration: 37879
loss: 0.9947999119758606,grad_norm: 0.8375657945013649, iteration: 37880
loss: 1.000253677368164,grad_norm: 0.9999990859423948, iteration: 37881
loss: 1.0106546878814697,grad_norm: 0.9999995684447501, iteration: 37882
loss: 1.037875771522522,grad_norm: 0.9999997161338745, iteration: 37883
loss: 0.9803399443626404,grad_norm: 0.9579573278440976, iteration: 37884
loss: 0.986158013343811,grad_norm: 0.9364753976335799, iteration: 37885
loss: 1.0154709815979004,grad_norm: 0.9999992004030728, iteration: 37886
loss: 0.9905065894126892,grad_norm: 0.9999990124960928, iteration: 37887
loss: 0.9951682686805725,grad_norm: 0.9493628232241731, iteration: 37888
loss: 1.0174708366394043,grad_norm: 0.9999996843485839, iteration: 37889
loss: 1.0834767818450928,grad_norm: 0.9999994857559801, iteration: 37890
loss: 1.0131876468658447,grad_norm: 0.8477266127185688, iteration: 37891
loss: 0.9905754327774048,grad_norm: 0.9999990386079348, iteration: 37892
loss: 0.9627747535705566,grad_norm: 0.9999990206642698, iteration: 37893
loss: 1.0142537355422974,grad_norm: 0.9999989057498994, iteration: 37894
loss: 1.028956413269043,grad_norm: 0.9999991176072635, iteration: 37895
loss: 1.0088051557540894,grad_norm: 0.8910841553465133, iteration: 37896
loss: 1.0050400495529175,grad_norm: 0.9307482465849582, iteration: 37897
loss: 0.998376190662384,grad_norm: 0.9261483280459185, iteration: 37898
loss: 1.008583903312683,grad_norm: 0.8623900368315719, iteration: 37899
loss: 0.9951507449150085,grad_norm: 0.9999991759163626, iteration: 37900
loss: 0.9763073325157166,grad_norm: 0.9450186404946435, iteration: 37901
loss: 1.0012708902359009,grad_norm: 0.9874542032673373, iteration: 37902
loss: 1.0047804117202759,grad_norm: 0.9999991562908136, iteration: 37903
loss: 1.0083450078964233,grad_norm: 0.9898092032333199, iteration: 37904
loss: 0.9992765784263611,grad_norm: 0.9999990113997425, iteration: 37905
loss: 0.9510329961776733,grad_norm: 0.9303454779875331, iteration: 37906
loss: 0.9733086228370667,grad_norm: 0.9999990883966798, iteration: 37907
loss: 1.000107765197754,grad_norm: 0.8212429607778607, iteration: 37908
loss: 1.0293333530426025,grad_norm: 0.9999992145176226, iteration: 37909
loss: 0.9738486409187317,grad_norm: 0.8444572118011873, iteration: 37910
loss: 0.9841140508651733,grad_norm: 0.9999992365158777, iteration: 37911
loss: 1.0145397186279297,grad_norm: 0.9999991512012517, iteration: 37912
loss: 1.0473812818527222,grad_norm: 0.9691854235799929, iteration: 37913
loss: 0.9977124929428101,grad_norm: 0.9999992511489237, iteration: 37914
loss: 1.0205343961715698,grad_norm: 0.9501565434367746, iteration: 37915
loss: 1.0286966562271118,grad_norm: 0.9999990599879421, iteration: 37916
loss: 1.010810136795044,grad_norm: 0.7637340864398076, iteration: 37917
loss: 1.0054512023925781,grad_norm: 0.953744363102263, iteration: 37918
loss: 0.9945022463798523,grad_norm: 0.9999992812790662, iteration: 37919
loss: 1.0753562450408936,grad_norm: 0.9999992938144372, iteration: 37920
loss: 1.0192677974700928,grad_norm: 0.9999990792962795, iteration: 37921
loss: 1.008691430091858,grad_norm: 0.9320099021629035, iteration: 37922
loss: 1.0206272602081299,grad_norm: 0.9677062245024556, iteration: 37923
loss: 1.0168213844299316,grad_norm: 0.8505097403440836, iteration: 37924
loss: 0.9906620979309082,grad_norm: 0.9999991323816585, iteration: 37925
loss: 1.0480303764343262,grad_norm: 0.9121418700669612, iteration: 37926
loss: 1.0303434133529663,grad_norm: 0.9999992935522553, iteration: 37927
loss: 0.9987174272537231,grad_norm: 0.9999994765389629, iteration: 37928
loss: 0.9928345680236816,grad_norm: 0.9999990723648777, iteration: 37929
loss: 0.9922537207603455,grad_norm: 0.9593878917572792, iteration: 37930
loss: 0.9763268828392029,grad_norm: 0.8210320015590888, iteration: 37931
loss: 1.0225586891174316,grad_norm: 0.9999992212935032, iteration: 37932
loss: 1.04512619972229,grad_norm: 0.999999089321138, iteration: 37933
loss: 0.9947922825813293,grad_norm: 0.8779854388680076, iteration: 37934
loss: 1.0124143362045288,grad_norm: 0.9429225900779059, iteration: 37935
loss: 1.006760835647583,grad_norm: 0.7408153759656423, iteration: 37936
loss: 1.0383931398391724,grad_norm: 0.9421639048493762, iteration: 37937
loss: 1.0284955501556396,grad_norm: 0.9999991042160904, iteration: 37938
loss: 0.9624992609024048,grad_norm: 0.9141530664476518, iteration: 37939
loss: 1.0399507284164429,grad_norm: 0.9999990922321248, iteration: 37940
loss: 0.9978019595146179,grad_norm: 0.981935340934387, iteration: 37941
loss: 0.9884754419326782,grad_norm: 0.9136044245078999, iteration: 37942
loss: 1.0191805362701416,grad_norm: 0.9999997356600712, iteration: 37943
loss: 1.0145782232284546,grad_norm: 0.879724756803043, iteration: 37944
loss: 1.0116243362426758,grad_norm: 0.9999991173296903, iteration: 37945
loss: 1.007659673690796,grad_norm: 0.999999255621155, iteration: 37946
loss: 1.0203804969787598,grad_norm: 0.9999991595115315, iteration: 37947
loss: 1.0028080940246582,grad_norm: 0.9999992025245317, iteration: 37948
loss: 0.9666351675987244,grad_norm: 0.9207932368573255, iteration: 37949
loss: 0.9971436858177185,grad_norm: 0.9371229041997828, iteration: 37950
loss: 0.9867801666259766,grad_norm: 0.9798728487937679, iteration: 37951
loss: 0.9656006097793579,grad_norm: 0.9999991644322386, iteration: 37952
loss: 1.025925874710083,grad_norm: 0.9999995512193769, iteration: 37953
loss: 0.9656721353530884,grad_norm: 0.9999990149424417, iteration: 37954
loss: 1.002365231513977,grad_norm: 0.9613275683973214, iteration: 37955
loss: 0.9827167391777039,grad_norm: 0.9281418543499869, iteration: 37956
loss: 0.9944120049476624,grad_norm: 0.9999993180458222, iteration: 37957
loss: 0.9836894273757935,grad_norm: 0.936025611475805, iteration: 37958
loss: 1.038158655166626,grad_norm: 0.999999122191334, iteration: 37959
loss: 0.9915797114372253,grad_norm: 0.9999993182879643, iteration: 37960
loss: 1.0160865783691406,grad_norm: 0.9999992849987731, iteration: 37961
loss: 1.0081534385681152,grad_norm: 0.9999991791644725, iteration: 37962
loss: 1.0124907493591309,grad_norm: 0.9999990950209978, iteration: 37963
loss: 1.0192614793777466,grad_norm: 0.9630449992125961, iteration: 37964
loss: 1.0126919746398926,grad_norm: 0.9082252049521574, iteration: 37965
loss: 1.0816041231155396,grad_norm: 0.9999996457112525, iteration: 37966
loss: 1.0114967823028564,grad_norm: 0.9503470169487034, iteration: 37967
loss: 0.9682521224021912,grad_norm: 0.9999991925408251, iteration: 37968
loss: 1.0175836086273193,grad_norm: 0.9407952857908114, iteration: 37969
loss: 1.0221894979476929,grad_norm: 0.9923884552475226, iteration: 37970
loss: 1.0421421527862549,grad_norm: 0.9849040629712105, iteration: 37971
loss: 0.9956487417221069,grad_norm: 0.9256203868878902, iteration: 37972
loss: 0.9833149909973145,grad_norm: 0.9999991161272331, iteration: 37973
loss: 1.0189915895462036,grad_norm: 0.9999992839685059, iteration: 37974
loss: 0.9893417358398438,grad_norm: 0.9634806555308013, iteration: 37975
loss: 1.0218244791030884,grad_norm: 0.9134983337102254, iteration: 37976
loss: 0.9893779754638672,grad_norm: 0.9676827592709403, iteration: 37977
loss: 0.9888598322868347,grad_norm: 0.9251813226592976, iteration: 37978
loss: 0.9879766702651978,grad_norm: 0.8649417229231227, iteration: 37979
loss: 1.0114259719848633,grad_norm: 0.8603393343093825, iteration: 37980
loss: 0.964126467704773,grad_norm: 0.9999990077199306, iteration: 37981
loss: 0.9825893044471741,grad_norm: 0.9138988169769295, iteration: 37982
loss: 1.0237888097763062,grad_norm: 0.9999990027805423, iteration: 37983
loss: 1.0060933828353882,grad_norm: 0.9999991477094078, iteration: 37984
loss: 1.0027978420257568,grad_norm: 0.9999990853235712, iteration: 37985
loss: 1.0013779401779175,grad_norm: 0.9999992085051975, iteration: 37986
loss: 1.0136826038360596,grad_norm: 0.9451774054063559, iteration: 37987
loss: 0.9910851716995239,grad_norm: 0.9999991922850702, iteration: 37988
loss: 0.9603245258331299,grad_norm: 0.9999990778827027, iteration: 37989
loss: 0.9753162860870361,grad_norm: 0.9853353832810864, iteration: 37990
loss: 0.9964523911476135,grad_norm: 0.9044790046792043, iteration: 37991
loss: 0.9958657026290894,grad_norm: 0.9999991181200049, iteration: 37992
loss: 0.9646947979927063,grad_norm: 0.9281007583849772, iteration: 37993
loss: 1.0407538414001465,grad_norm: 0.9999997989975192, iteration: 37994
loss: 1.0107622146606445,grad_norm: 0.9127847643160946, iteration: 37995
loss: 1.0334105491638184,grad_norm: 0.9576456301833783, iteration: 37996
loss: 1.0141105651855469,grad_norm: 0.9999991036980397, iteration: 37997
loss: 1.009843111038208,grad_norm: 0.9999990282537228, iteration: 37998
loss: 1.0209614038467407,grad_norm: 0.9999989846440323, iteration: 37999
loss: 0.9768849015235901,grad_norm: 0.8613389022931929, iteration: 38000
loss: 1.0356825590133667,grad_norm: 0.9397816635917576, iteration: 38001
loss: 0.9980018734931946,grad_norm: 0.8867200925234594, iteration: 38002
loss: 1.024315595626831,grad_norm: 0.9226571475884251, iteration: 38003
loss: 1.0017954111099243,grad_norm: 0.9999991429021812, iteration: 38004
loss: 0.963013768196106,grad_norm: 0.9999991399400312, iteration: 38005
loss: 0.98203444480896,grad_norm: 0.9999992315592764, iteration: 38006
loss: 0.9918800592422485,grad_norm: 0.883609840314545, iteration: 38007
loss: 1.0363214015960693,grad_norm: 0.9540055812599589, iteration: 38008
loss: 0.9659555554389954,grad_norm: 0.9480309381990589, iteration: 38009
loss: 0.9978501200675964,grad_norm: 0.9999989845241825, iteration: 38010
loss: 0.9718557596206665,grad_norm: 0.9840620569629008, iteration: 38011
loss: 0.9800671935081482,grad_norm: 0.9999990833652648, iteration: 38012
loss: 1.021194338798523,grad_norm: 0.9999991025297778, iteration: 38013
loss: 1.0177875757217407,grad_norm: 0.9999998125998788, iteration: 38014
loss: 0.9726824760437012,grad_norm: 0.9999991331036846, iteration: 38015
loss: 0.9562119245529175,grad_norm: 0.9999989944758716, iteration: 38016
loss: 1.0544748306274414,grad_norm: 0.9999994446667969, iteration: 38017
loss: 0.9545126557350159,grad_norm: 0.9999990965011998, iteration: 38018
loss: 1.0184332132339478,grad_norm: 0.9316518319543614, iteration: 38019
loss: 0.9655921459197998,grad_norm: 0.953717345124149, iteration: 38020
loss: 1.0009303092956543,grad_norm: 0.9088147081258492, iteration: 38021
loss: 1.004277229309082,grad_norm: 0.9999996044223334, iteration: 38022
loss: 0.9982110857963562,grad_norm: 0.9012350109221471, iteration: 38023
loss: 1.0105774402618408,grad_norm: 0.9999997055247959, iteration: 38024
loss: 0.9576992988586426,grad_norm: 0.9999989519542565, iteration: 38025
loss: 1.002311110496521,grad_norm: 0.9831915546741506, iteration: 38026
loss: 0.9961974620819092,grad_norm: 0.9221407540007626, iteration: 38027
loss: 0.9565214514732361,grad_norm: 0.9999991214407705, iteration: 38028
loss: 0.9843185544013977,grad_norm: 0.9685400965354418, iteration: 38029
loss: 1.0221093893051147,grad_norm: 0.9067013704516032, iteration: 38030
loss: 0.9953866004943848,grad_norm: 0.8673705626934421, iteration: 38031
loss: 0.9659968614578247,grad_norm: 0.9035008900725855, iteration: 38032
loss: 1.0037277936935425,grad_norm: 0.8992933356056982, iteration: 38033
loss: 1.0322140455245972,grad_norm: 0.9127171888862745, iteration: 38034
loss: 1.0062470436096191,grad_norm: 0.9999992526929352, iteration: 38035
loss: 1.0313811302185059,grad_norm: 0.9999994036868931, iteration: 38036
loss: 0.9590819478034973,grad_norm: 0.9514286104571167, iteration: 38037
loss: 0.9927079677581787,grad_norm: 0.9999991664727231, iteration: 38038
loss: 0.967466413974762,grad_norm: 0.999999142912966, iteration: 38039
loss: 1.0412005186080933,grad_norm: 0.9999993083568878, iteration: 38040
loss: 0.9993173480033875,grad_norm: 0.9044617472337501, iteration: 38041
loss: 1.0647989511489868,grad_norm: 0.9999994125840348, iteration: 38042
loss: 1.0255155563354492,grad_norm: 0.9999990482431749, iteration: 38043
loss: 1.011006236076355,grad_norm: 0.9978935165659858, iteration: 38044
loss: 0.9848304390907288,grad_norm: 0.91558506144136, iteration: 38045
loss: 1.0075092315673828,grad_norm: 0.9999993077308316, iteration: 38046
loss: 0.9994428753852844,grad_norm: 0.951257992409368, iteration: 38047
loss: 0.9965896010398865,grad_norm: 0.9427782199009013, iteration: 38048
loss: 0.9954379796981812,grad_norm: 0.9999991284404931, iteration: 38049
loss: 0.9858978986740112,grad_norm: 0.9999991276960613, iteration: 38050
loss: 0.9770851731300354,grad_norm: 0.7470262484428731, iteration: 38051
loss: 0.959590494632721,grad_norm: 0.940515406638787, iteration: 38052
loss: 1.0172388553619385,grad_norm: 0.9999991208172199, iteration: 38053
loss: 0.9844163060188293,grad_norm: 0.999999898518466, iteration: 38054
loss: 0.9978778958320618,grad_norm: 0.9999994022715377, iteration: 38055
loss: 1.0099745988845825,grad_norm: 0.9999990794353985, iteration: 38056
loss: 0.9953728914260864,grad_norm: 0.9999991668234229, iteration: 38057
loss: 1.0235681533813477,grad_norm: 0.9999989751702494, iteration: 38058
loss: 0.9864098429679871,grad_norm: 0.9999991935722501, iteration: 38059
loss: 0.9951255917549133,grad_norm: 0.999999133961666, iteration: 38060
loss: 1.0222482681274414,grad_norm: 0.9999991695902313, iteration: 38061
loss: 0.9874334931373596,grad_norm: 0.9670786639071027, iteration: 38062
loss: 1.0017298460006714,grad_norm: 0.9999989912404779, iteration: 38063
loss: 1.0184531211853027,grad_norm: 0.9370589943488812, iteration: 38064
loss: 1.0408892631530762,grad_norm: 0.9999992081358131, iteration: 38065
loss: 0.9899150133132935,grad_norm: 0.9999991063875077, iteration: 38066
loss: 0.984880805015564,grad_norm: 0.7805996748573997, iteration: 38067
loss: 1.003528356552124,grad_norm: 0.9999991667286077, iteration: 38068
loss: 1.039718508720398,grad_norm: 0.9999996375628407, iteration: 38069
loss: 0.9724966883659363,grad_norm: 0.9999991779088943, iteration: 38070
loss: 0.9926855564117432,grad_norm: 0.9089999399943302, iteration: 38071
loss: 1.0320791006088257,grad_norm: 0.9999990685490519, iteration: 38072
loss: 1.0497602224349976,grad_norm: 0.9999991474831907, iteration: 38073
loss: 1.0241321325302124,grad_norm: 0.9961278389857952, iteration: 38074
loss: 1.00049889087677,grad_norm: 0.9885726078354359, iteration: 38075
loss: 0.9962580800056458,grad_norm: 0.7808474772672045, iteration: 38076
loss: 1.0028517246246338,grad_norm: 0.9248914525143971, iteration: 38077
loss: 1.006428599357605,grad_norm: 0.9999990338922963, iteration: 38078
loss: 0.9870305061340332,grad_norm: 0.9999991425420486, iteration: 38079
loss: 0.990905225276947,grad_norm: 0.9999990850923344, iteration: 38080
loss: 1.0281375646591187,grad_norm: 0.8946012591909058, iteration: 38081
loss: 0.9960062503814697,grad_norm: 0.9999991217206204, iteration: 38082
loss: 1.016266942024231,grad_norm: 0.906493329898919, iteration: 38083
loss: 0.9689759016036987,grad_norm: 0.9999991969008417, iteration: 38084
loss: 0.9838706254959106,grad_norm: 0.9999990600749566, iteration: 38085
loss: 1.0378745794296265,grad_norm: 0.9999996901719942, iteration: 38086
loss: 1.0047717094421387,grad_norm: 0.7922770718498798, iteration: 38087
loss: 0.9632648229598999,grad_norm: 0.9999991049559673, iteration: 38088
loss: 1.0279288291931152,grad_norm: 0.77268220629643, iteration: 38089
loss: 0.9892330169677734,grad_norm: 0.9999990378321978, iteration: 38090
loss: 1.035159945487976,grad_norm: 0.9999996732640377, iteration: 38091
loss: 1.015217661857605,grad_norm: 0.9999991005068154, iteration: 38092
loss: 1.0325846672058105,grad_norm: 0.9045682279493589, iteration: 38093
loss: 1.0133099555969238,grad_norm: 0.9211871942130195, iteration: 38094
loss: 1.012908935546875,grad_norm: 0.9054398422919413, iteration: 38095
loss: 0.9908938407897949,grad_norm: 0.9571465472968708, iteration: 38096
loss: 1.0453275442123413,grad_norm: 0.9999992271196755, iteration: 38097
loss: 1.0015548467636108,grad_norm: 0.9100310092892168, iteration: 38098
loss: 0.989997923374176,grad_norm: 0.8247941731531407, iteration: 38099
loss: 0.9942850470542908,grad_norm: 0.9999989195305407, iteration: 38100
loss: 0.9850126504898071,grad_norm: 0.9999991710675562, iteration: 38101
loss: 0.9970364570617676,grad_norm: 0.9999992011354665, iteration: 38102
loss: 1.0371084213256836,grad_norm: 0.9917521566634663, iteration: 38103
loss: 1.0363622903823853,grad_norm: 0.9824487583833188, iteration: 38104
loss: 1.0156011581420898,grad_norm: 0.9614715547462336, iteration: 38105
loss: 1.0084260702133179,grad_norm: 0.999999163923415, iteration: 38106
loss: 0.9992852210998535,grad_norm: 0.7121168418397876, iteration: 38107
loss: 0.9899709820747375,grad_norm: 0.9999992467856768, iteration: 38108
loss: 0.9768097400665283,grad_norm: 0.841670469400741, iteration: 38109
loss: 0.9964787364006042,grad_norm: 0.939472422308369, iteration: 38110
loss: 0.9775297045707703,grad_norm: 0.9999993355700728, iteration: 38111
loss: 1.0061241388320923,grad_norm: 0.7713385653526531, iteration: 38112
loss: 1.003540277481079,grad_norm: 0.9999991089656096, iteration: 38113
loss: 0.9702620506286621,grad_norm: 0.9999989892390437, iteration: 38114
loss: 1.024330496788025,grad_norm: 0.9599936995167705, iteration: 38115
loss: 1.0536274909973145,grad_norm: 0.9999991745743748, iteration: 38116
loss: 1.0119210481643677,grad_norm: 0.9352870253730132, iteration: 38117
loss: 1.0764325857162476,grad_norm: 0.9999998390858874, iteration: 38118
loss: 0.9824122786521912,grad_norm: 0.8747013928342975, iteration: 38119
loss: 1.037846565246582,grad_norm: 0.908779918695852, iteration: 38120
loss: 1.007049560546875,grad_norm: 0.7961186100984372, iteration: 38121
loss: 1.0289170742034912,grad_norm: 0.9641346001411626, iteration: 38122
loss: 1.025743007659912,grad_norm: 0.9999992476258787, iteration: 38123
loss: 1.0096583366394043,grad_norm: 0.9999991193221116, iteration: 38124
loss: 1.0405405759811401,grad_norm: 0.9999991782909649, iteration: 38125
loss: 1.0077255964279175,grad_norm: 0.9999994166999813, iteration: 38126
loss: 0.9846636652946472,grad_norm: 0.9403827707069076, iteration: 38127
loss: 1.0670456886291504,grad_norm: 0.9999993485523909, iteration: 38128
loss: 1.0070688724517822,grad_norm: 0.8671854357505688, iteration: 38129
loss: 0.928473949432373,grad_norm: 0.9054410093468933, iteration: 38130
loss: 1.0514743328094482,grad_norm: 0.9064791474019954, iteration: 38131
loss: 0.9639202356338501,grad_norm: 0.9298537289094979, iteration: 38132
loss: 1.0323686599731445,grad_norm: 0.9999991544237321, iteration: 38133
loss: 1.0123472213745117,grad_norm: 0.9999992002657009, iteration: 38134
loss: 1.020980715751648,grad_norm: 0.7860087194141715, iteration: 38135
loss: 1.0375001430511475,grad_norm: 0.8557907181120372, iteration: 38136
loss: 1.0057774782180786,grad_norm: 0.9471376480475082, iteration: 38137
loss: 1.0150330066680908,grad_norm: 0.9153544016265193, iteration: 38138
loss: 0.9965118765830994,grad_norm: 0.8907887557200761, iteration: 38139
loss: 1.0078635215759277,grad_norm: 0.9379191461759299, iteration: 38140
loss: 1.0272562503814697,grad_norm: 0.9999997339964842, iteration: 38141
loss: 1.0294853448867798,grad_norm: 0.9113347037272289, iteration: 38142
loss: 0.9835599660873413,grad_norm: 0.9760396559306297, iteration: 38143
loss: 1.0156291723251343,grad_norm: 0.856624275235931, iteration: 38144
loss: 0.9840207099914551,grad_norm: 0.9999990959665417, iteration: 38145
loss: 0.9848184585571289,grad_norm: 0.999999037821678, iteration: 38146
loss: 0.9939320683479309,grad_norm: 0.9973753165839867, iteration: 38147
loss: 0.987562894821167,grad_norm: 0.9135417041349461, iteration: 38148
loss: 0.9743165969848633,grad_norm: 0.9442039880414125, iteration: 38149
loss: 1.0267258882522583,grad_norm: 0.9618161814377255, iteration: 38150
loss: 0.9680470824241638,grad_norm: 0.9999993776965637, iteration: 38151
loss: 1.0160588026046753,grad_norm: 0.9999989624900469, iteration: 38152
loss: 0.9911831617355347,grad_norm: 0.9795928110719844, iteration: 38153
loss: 1.002123475074768,grad_norm: 0.9999990765501191, iteration: 38154
loss: 0.9806484580039978,grad_norm: 0.9394641119371816, iteration: 38155
loss: 1.0017520189285278,grad_norm: 0.9999990465350966, iteration: 38156
loss: 1.0229413509368896,grad_norm: 0.9948266384936386, iteration: 38157
loss: 0.996930718421936,grad_norm: 0.7870032379849965, iteration: 38158
loss: 0.9823946356773376,grad_norm: 0.9999991928556877, iteration: 38159
loss: 1.0427008867263794,grad_norm: 0.999999214784652, iteration: 38160
loss: 1.0183035135269165,grad_norm: 0.9999996070491539, iteration: 38161
loss: 0.9722803235054016,grad_norm: 0.9789024803938075, iteration: 38162
loss: 1.01790189743042,grad_norm: 0.99999911074658, iteration: 38163
loss: 0.9928032159805298,grad_norm: 0.999999191161899, iteration: 38164
loss: 1.000598430633545,grad_norm: 0.9999990125669955, iteration: 38165
loss: 1.0114808082580566,grad_norm: 0.9863759323485419, iteration: 38166
loss: 1.0236738920211792,grad_norm: 0.9999989949064642, iteration: 38167
loss: 1.0080227851867676,grad_norm: 0.9999991059203766, iteration: 38168
loss: 1.0181702375411987,grad_norm: 0.9999992051806678, iteration: 38169
loss: 0.9539146423339844,grad_norm: 0.9999992129510883, iteration: 38170
loss: 0.984498918056488,grad_norm: 0.9335017926814483, iteration: 38171
loss: 0.9585469961166382,grad_norm: 0.9999992961830668, iteration: 38172
loss: 0.9920875430107117,grad_norm: 0.9999995795047196, iteration: 38173
loss: 0.9586807489395142,grad_norm: 0.891833898737701, iteration: 38174
loss: 1.0206451416015625,grad_norm: 0.8532557743325249, iteration: 38175
loss: 1.0261585712432861,grad_norm: 0.9372020295777619, iteration: 38176
loss: 0.9937644600868225,grad_norm: 0.9008324069274901, iteration: 38177
loss: 1.000569224357605,grad_norm: 0.8376248393126394, iteration: 38178
loss: 1.0001819133758545,grad_norm: 0.9473841899225859, iteration: 38179
loss: 1.0496119260787964,grad_norm: 0.9999991725026194, iteration: 38180
loss: 1.0056164264678955,grad_norm: 0.999999112575187, iteration: 38181
loss: 0.9813615083694458,grad_norm: 0.930123219696534, iteration: 38182
loss: 1.0208823680877686,grad_norm: 0.9999990936703563, iteration: 38183
loss: 0.9843349456787109,grad_norm: 0.8581180686426841, iteration: 38184
loss: 1.004266381263733,grad_norm: 0.9741989663361582, iteration: 38185
loss: 1.0161473751068115,grad_norm: 0.9999993350601775, iteration: 38186
loss: 0.9520387053489685,grad_norm: 0.7817192543937479, iteration: 38187
loss: 0.9530543088912964,grad_norm: 0.9999991068757201, iteration: 38188
loss: 1.0085541009902954,grad_norm: 0.840690438954876, iteration: 38189
loss: 1.0237728357315063,grad_norm: 0.9786424614803673, iteration: 38190
loss: 0.9743404984474182,grad_norm: 0.8954950225880095, iteration: 38191
loss: 1.0261191129684448,grad_norm: 0.8264066246585545, iteration: 38192
loss: 0.9932512044906616,grad_norm: 0.9448515613517786, iteration: 38193
loss: 0.9950271844863892,grad_norm: 0.9999990715811948, iteration: 38194
loss: 0.9890439510345459,grad_norm: 0.7720025558569426, iteration: 38195
loss: 0.9865463972091675,grad_norm: 0.9999991864729361, iteration: 38196
loss: 0.9800441265106201,grad_norm: 0.8636335498081167, iteration: 38197
loss: 0.9905431270599365,grad_norm: 0.8911596331248335, iteration: 38198
loss: 0.9520136713981628,grad_norm: 0.9999992416791283, iteration: 38199
loss: 1.0180178880691528,grad_norm: 0.9999991975917972, iteration: 38200
loss: 0.9776406288146973,grad_norm: 0.8975432950509161, iteration: 38201
loss: 1.0276679992675781,grad_norm: 0.9763686833576077, iteration: 38202
loss: 1.0642942190170288,grad_norm: 0.9999996172776966, iteration: 38203
loss: 1.0205093622207642,grad_norm: 0.9467104297307032, iteration: 38204
loss: 1.0089865922927856,grad_norm: 0.8829372123677519, iteration: 38205
loss: 1.0062143802642822,grad_norm: 0.8671267561905033, iteration: 38206
loss: 1.0275852680206299,grad_norm: 0.916228013375713, iteration: 38207
loss: 0.99407559633255,grad_norm: 0.9717388804209695, iteration: 38208
loss: 0.9872068166732788,grad_norm: 0.8900026387670741, iteration: 38209
loss: 1.0134648084640503,grad_norm: 0.9999989720417334, iteration: 38210
loss: 1.0129021406173706,grad_norm: 0.8663743581880142, iteration: 38211
loss: 1.0172994136810303,grad_norm: 0.9999998822914125, iteration: 38212
loss: 1.0222892761230469,grad_norm: 0.9999991447612849, iteration: 38213
loss: 1.0207031965255737,grad_norm: 0.9999994977762067, iteration: 38214
loss: 0.9982441663742065,grad_norm: 0.9999990315542135, iteration: 38215
loss: 1.0108033418655396,grad_norm: 0.999999115270035, iteration: 38216
loss: 1.01229989528656,grad_norm: 0.999999765026051, iteration: 38217
loss: 1.014499545097351,grad_norm: 0.8131103031986378, iteration: 38218
loss: 1.0082935094833374,grad_norm: 0.9999990799659902, iteration: 38219
loss: 1.0315769910812378,grad_norm: 0.9999992604926744, iteration: 38220
loss: 1.0076038837432861,grad_norm: 0.9999996555505628, iteration: 38221
loss: 0.9881502389907837,grad_norm: 0.9999992370534045, iteration: 38222
loss: 1.031833529472351,grad_norm: 0.9999991931479658, iteration: 38223
loss: 1.0434783697128296,grad_norm: 0.9999990363638732, iteration: 38224
loss: 1.0218935012817383,grad_norm: 0.9992753378015498, iteration: 38225
loss: 1.0240260362625122,grad_norm: 0.9999996013663891, iteration: 38226
loss: 1.0363675355911255,grad_norm: 0.9999993176616289, iteration: 38227
loss: 1.0417345762252808,grad_norm: 0.9999991101185213, iteration: 38228
loss: 1.1372162103652954,grad_norm: 0.999999542656812, iteration: 38229
loss: 1.0549832582473755,grad_norm: 0.9999995827751735, iteration: 38230
loss: 0.988645613193512,grad_norm: 0.9999992938779027, iteration: 38231
loss: 1.0439586639404297,grad_norm: 0.9999990603032919, iteration: 38232
loss: 1.0243812799453735,grad_norm: 0.9999991876875922, iteration: 38233
loss: 1.0465126037597656,grad_norm: 0.9999991012867905, iteration: 38234
loss: 1.0074677467346191,grad_norm: 0.9999992052541407, iteration: 38235
loss: 0.9992662668228149,grad_norm: 0.9999992327715241, iteration: 38236
loss: 1.0546666383743286,grad_norm: 0.9999993908881785, iteration: 38237
loss: 0.9873185157775879,grad_norm: 0.7940730620741697, iteration: 38238
loss: 1.0010826587677002,grad_norm: 0.8528745382531214, iteration: 38239
loss: 0.979160726070404,grad_norm: 0.9999991274265206, iteration: 38240
loss: 0.9814678430557251,grad_norm: 0.9999991607103939, iteration: 38241
loss: 1.0245765447616577,grad_norm: 0.9999995008683336, iteration: 38242
loss: 0.9601994752883911,grad_norm: 0.9436169742646404, iteration: 38243
loss: 1.0212959051132202,grad_norm: 0.9999994015585602, iteration: 38244
loss: 1.0671939849853516,grad_norm: 0.999999304422359, iteration: 38245
loss: 1.0365122556686401,grad_norm: 0.9999990584559286, iteration: 38246
loss: 1.0787124633789062,grad_norm: 0.9999996487127792, iteration: 38247
loss: 0.9755135774612427,grad_norm: 0.9999989604741137, iteration: 38248
loss: 1.0009527206420898,grad_norm: 0.9423155355079108, iteration: 38249
loss: 0.9741764664649963,grad_norm: 0.9999992620323656, iteration: 38250
loss: 1.0139803886413574,grad_norm: 0.9999993755904609, iteration: 38251
loss: 0.9982707500457764,grad_norm: 0.9999991062826719, iteration: 38252
loss: 0.9928802251815796,grad_norm: 0.9999990086137199, iteration: 38253
loss: 0.9992125034332275,grad_norm: 0.9999992789456082, iteration: 38254
loss: 1.0371975898742676,grad_norm: 0.9999992385123114, iteration: 38255
loss: 0.9835588335990906,grad_norm: 0.9999991641853343, iteration: 38256
loss: 0.9959345459938049,grad_norm: 0.999999061253849, iteration: 38257
loss: 1.0080039501190186,grad_norm: 0.9999990237873829, iteration: 38258
loss: 1.0206544399261475,grad_norm: 0.9873390476815909, iteration: 38259
loss: 1.004692792892456,grad_norm: 0.9999991446226394, iteration: 38260
loss: 1.0087467432022095,grad_norm: 0.9999991689480422, iteration: 38261
loss: 0.9751812219619751,grad_norm: 0.9999990917231002, iteration: 38262
loss: 1.0327705144882202,grad_norm: 0.9999992348212532, iteration: 38263
loss: 1.0108929872512817,grad_norm: 0.8715903860051527, iteration: 38264
loss: 0.9842089414596558,grad_norm: 0.8613237056566019, iteration: 38265
loss: 1.00810968875885,grad_norm: 0.925202494856981, iteration: 38266
loss: 1.033850908279419,grad_norm: 0.9999993121369828, iteration: 38267
loss: 1.0235012769699097,grad_norm: 0.9999996459686917, iteration: 38268
loss: 1.003252387046814,grad_norm: 0.9887777218403857, iteration: 38269
loss: 1.1711477041244507,grad_norm: 0.9999997568185588, iteration: 38270
loss: 0.9640170335769653,grad_norm: 0.9999996183820627, iteration: 38271
loss: 1.0244762897491455,grad_norm: 0.9008111344058639, iteration: 38272
loss: 0.968970537185669,grad_norm: 0.9376695381726519, iteration: 38273
loss: 1.0335787534713745,grad_norm: 0.8174669891684273, iteration: 38274
loss: 0.9578002095222473,grad_norm: 0.9436908335044949, iteration: 38275
loss: 0.9801154136657715,grad_norm: 0.9361258030409338, iteration: 38276
loss: 0.9889218807220459,grad_norm: 0.8378763348938495, iteration: 38277
loss: 1.2250257730484009,grad_norm: 0.9999998563166902, iteration: 38278
loss: 1.02296781539917,grad_norm: 0.9999991057230306, iteration: 38279
loss: 1.0053821802139282,grad_norm: 0.9999994647358105, iteration: 38280
loss: 0.9511831402778625,grad_norm: 0.9115307883479887, iteration: 38281
loss: 1.0300538539886475,grad_norm: 0.9999992142613948, iteration: 38282
loss: 0.9652333855628967,grad_norm: 0.9999992556309198, iteration: 38283
loss: 1.0146759748458862,grad_norm: 0.8644366375571834, iteration: 38284
loss: 1.0874195098876953,grad_norm: 0.9999991458924571, iteration: 38285
loss: 0.9472681879997253,grad_norm: 0.9694270537872686, iteration: 38286
loss: 1.000527024269104,grad_norm: 0.8813422832633068, iteration: 38287
loss: 1.0253055095672607,grad_norm: 0.9999991900190195, iteration: 38288
loss: 0.9665642976760864,grad_norm: 0.9426066607934471, iteration: 38289
loss: 1.0180302858352661,grad_norm: 0.9545377222406695, iteration: 38290
loss: 1.0438215732574463,grad_norm: 0.9999991964235827, iteration: 38291
loss: 0.9724560379981995,grad_norm: 0.8419000455613992, iteration: 38292
loss: 0.9967508912086487,grad_norm: 0.9999990754431318, iteration: 38293
loss: 1.0088917016983032,grad_norm: 0.999999068137649, iteration: 38294
loss: 1.017223834991455,grad_norm: 0.99999912681185, iteration: 38295
loss: 1.0014697313308716,grad_norm: 0.9304909329023386, iteration: 38296
loss: 1.0372809171676636,grad_norm: 0.9799253898067779, iteration: 38297
loss: 0.992420494556427,grad_norm: 0.9999992617312384, iteration: 38298
loss: 1.0149120092391968,grad_norm: 0.99999913828081, iteration: 38299
loss: 1.0103105306625366,grad_norm: 0.9999991839056733, iteration: 38300
loss: 1.007825255393982,grad_norm: 0.8884407361073416, iteration: 38301
loss: 1.0337218046188354,grad_norm: 0.8831650603111546, iteration: 38302
loss: 1.0240001678466797,grad_norm: 0.9999993614566657, iteration: 38303
loss: 1.008466124534607,grad_norm: 0.9793299230534409, iteration: 38304
loss: 1.0499699115753174,grad_norm: 0.9999994401657798, iteration: 38305
loss: 1.0233936309814453,grad_norm: 0.9999991677313478, iteration: 38306
loss: 1.0087437629699707,grad_norm: 0.922147643799275, iteration: 38307
loss: 1.0183677673339844,grad_norm: 0.9684152240802838, iteration: 38308
loss: 1.0568046569824219,grad_norm: 0.9999997243507402, iteration: 38309
loss: 1.0161148309707642,grad_norm: 0.9999992938741096, iteration: 38310
loss: 1.0900589227676392,grad_norm: 0.9999993854628535, iteration: 38311
loss: 1.0071228742599487,grad_norm: 0.9999990580016489, iteration: 38312
loss: 0.9813563227653503,grad_norm: 0.9999995922141298, iteration: 38313
loss: 1.043642520904541,grad_norm: 0.9286563248687852, iteration: 38314
loss: 1.032563328742981,grad_norm: 0.931639721735786, iteration: 38315
loss: 1.0163646936416626,grad_norm: 0.9392289689854967, iteration: 38316
loss: 1.0191047191619873,grad_norm: 0.9999996116743581, iteration: 38317
loss: 1.0247722864151,grad_norm: 0.8570865903859036, iteration: 38318
loss: 1.0089025497436523,grad_norm: 0.921006287846761, iteration: 38319
loss: 0.9780901074409485,grad_norm: 0.9167544097998394, iteration: 38320
loss: 1.0130343437194824,grad_norm: 0.9999991745301254, iteration: 38321
loss: 1.0227293968200684,grad_norm: 0.9071336607816716, iteration: 38322
loss: 0.9565172791481018,grad_norm: 0.8798648478056751, iteration: 38323
loss: 0.9911726117134094,grad_norm: 0.8478712923104613, iteration: 38324
loss: 0.9948718547821045,grad_norm: 0.9999995630712629, iteration: 38325
loss: 1.0078984498977661,grad_norm: 0.9999991043534546, iteration: 38326
loss: 1.0136897563934326,grad_norm: 0.9634076926996944, iteration: 38327
loss: 0.9709759950637817,grad_norm: 0.9647977877585614, iteration: 38328
loss: 0.9790239930152893,grad_norm: 0.8774358657450114, iteration: 38329
loss: 1.0415403842926025,grad_norm: 0.9999990256254473, iteration: 38330
loss: 1.0120316743850708,grad_norm: 0.9999998235556812, iteration: 38331
loss: 1.0181169509887695,grad_norm: 0.775966650673738, iteration: 38332
loss: 0.9966028332710266,grad_norm: 0.9443872256309195, iteration: 38333
loss: 0.9644715785980225,grad_norm: 0.8840601195431158, iteration: 38334
loss: 1.0055208206176758,grad_norm: 0.9626631387306616, iteration: 38335
loss: 1.054893970489502,grad_norm: 0.9999995692456761, iteration: 38336
loss: 1.0060573816299438,grad_norm: 0.9999991644845404, iteration: 38337
loss: 0.9733102321624756,grad_norm: 0.9999990194358024, iteration: 38338
loss: 0.9953016638755798,grad_norm: 0.999999049521196, iteration: 38339
loss: 0.9911476373672485,grad_norm: 0.885753739932244, iteration: 38340
loss: 0.9979400634765625,grad_norm: 0.8838554131515972, iteration: 38341
loss: 0.9821614027023315,grad_norm: 0.9520581872026529, iteration: 38342
loss: 1.0159244537353516,grad_norm: 0.9035410539142175, iteration: 38343
loss: 1.0116342306137085,grad_norm: 0.999999437637694, iteration: 38344
loss: 0.9934065341949463,grad_norm: 0.8098411993694128, iteration: 38345
loss: 1.0471056699752808,grad_norm: 0.9999991157453129, iteration: 38346
loss: 1.0167295932769775,grad_norm: 0.8536178764680769, iteration: 38347
loss: 1.0268425941467285,grad_norm: 0.9999991932820298, iteration: 38348
loss: 1.0260124206542969,grad_norm: 0.9246076495088483, iteration: 38349
loss: 1.0098119974136353,grad_norm: 0.9999992288664202, iteration: 38350
loss: 0.9950363636016846,grad_norm: 0.9999990066545423, iteration: 38351
loss: 1.051282286643982,grad_norm: 0.9999992105184938, iteration: 38352
loss: 0.9938095808029175,grad_norm: 0.9018183928602973, iteration: 38353
loss: 1.0336588621139526,grad_norm: 0.9364588273071931, iteration: 38354
loss: 0.995351254940033,grad_norm: 0.9999991583324545, iteration: 38355
loss: 1.079525351524353,grad_norm: 0.9999995613804796, iteration: 38356
loss: 0.9811996817588806,grad_norm: 0.9999990888239224, iteration: 38357
loss: 1.0031752586364746,grad_norm: 0.93993026907772, iteration: 38358
loss: 1.0108537673950195,grad_norm: 0.8994019708417614, iteration: 38359
loss: 1.0515825748443604,grad_norm: 0.9999989340341131, iteration: 38360
loss: 0.9690054059028625,grad_norm: 0.9999991474264586, iteration: 38361
loss: 1.0165624618530273,grad_norm: 0.9999993189210175, iteration: 38362
loss: 0.982049822807312,grad_norm: 0.808976954962804, iteration: 38363
loss: 1.0057553052902222,grad_norm: 0.9940625968880801, iteration: 38364
loss: 1.037001132965088,grad_norm: 0.9693796736901604, iteration: 38365
loss: 0.9923505187034607,grad_norm: 0.9999991226337913, iteration: 38366
loss: 1.0321460962295532,grad_norm: 0.9156581857771543, iteration: 38367
loss: 1.0238587856292725,grad_norm: 0.999999144805515, iteration: 38368
loss: 1.0406694412231445,grad_norm: 0.9999996020480779, iteration: 38369
loss: 0.9668723940849304,grad_norm: 0.9999992131529957, iteration: 38370
loss: 1.0005874633789062,grad_norm: 0.9999991185803683, iteration: 38371
loss: 1.009330153465271,grad_norm: 0.9999992421014493, iteration: 38372
loss: 0.9669100642204285,grad_norm: 0.9999990739677332, iteration: 38373
loss: 0.9959098696708679,grad_norm: 0.9999990707006938, iteration: 38374
loss: 0.9638054966926575,grad_norm: 0.9999990609421618, iteration: 38375
loss: 1.0095195770263672,grad_norm: 0.9999992448842009, iteration: 38376
loss: 0.9756317138671875,grad_norm: 0.999999064279502, iteration: 38377
loss: 0.9755471348762512,grad_norm: 0.9999992114222894, iteration: 38378
loss: 1.0228997468948364,grad_norm: 0.8660478093754479, iteration: 38379
loss: 1.027166724205017,grad_norm: 0.9999991134220924, iteration: 38380
loss: 1.0107213258743286,grad_norm: 0.9848928830580607, iteration: 38381
loss: 1.0398812294006348,grad_norm: 0.964327904764631, iteration: 38382
loss: 1.0272393226623535,grad_norm: 0.9999992749690553, iteration: 38383
loss: 1.02483069896698,grad_norm: 0.8463574600057904, iteration: 38384
loss: 1.0062406063079834,grad_norm: 0.7858946392322478, iteration: 38385
loss: 0.9784695506095886,grad_norm: 0.995766227422367, iteration: 38386
loss: 1.0307655334472656,grad_norm: 0.9006445474766884, iteration: 38387
loss: 0.9764048457145691,grad_norm: 0.9638958779407321, iteration: 38388
loss: 1.042052149772644,grad_norm: 0.9999991515540952, iteration: 38389
loss: 0.9520381093025208,grad_norm: 0.9999991005099432, iteration: 38390
loss: 1.0192785263061523,grad_norm: 0.9934367212612579, iteration: 38391
loss: 0.9973544478416443,grad_norm: 0.9999990833141723, iteration: 38392
loss: 0.9989067912101746,grad_norm: 0.9999990593498493, iteration: 38393
loss: 1.026208519935608,grad_norm: 0.9999994285337777, iteration: 38394
loss: 0.9739142060279846,grad_norm: 0.8867715156569431, iteration: 38395
loss: 0.9981132745742798,grad_norm: 0.9999990817779658, iteration: 38396
loss: 1.020155429840088,grad_norm: 0.9999997844018558, iteration: 38397
loss: 0.9803546667098999,grad_norm: 0.9766313618351936, iteration: 38398
loss: 1.0398104190826416,grad_norm: 0.9999997263920343, iteration: 38399
loss: 1.0527620315551758,grad_norm: 0.9999990451729689, iteration: 38400
loss: 1.006787896156311,grad_norm: 0.9999991000746247, iteration: 38401
loss: 1.025407314300537,grad_norm: 0.9999991474666396, iteration: 38402
loss: 0.9952909350395203,grad_norm: 0.9082846273677333, iteration: 38403
loss: 0.9977501034736633,grad_norm: 0.9999991445217312, iteration: 38404
loss: 1.008654236793518,grad_norm: 0.9999990932807612, iteration: 38405
loss: 1.0100786685943604,grad_norm: 0.9999991387955162, iteration: 38406
loss: 0.9900609850883484,grad_norm: 0.8117895249977768, iteration: 38407
loss: 1.0200679302215576,grad_norm: 0.9934608288613546, iteration: 38408
loss: 0.9911297559738159,grad_norm: 0.9955692461612837, iteration: 38409
loss: 0.9949349761009216,grad_norm: 0.9999989874586734, iteration: 38410
loss: 0.9935048818588257,grad_norm: 0.9999991540520028, iteration: 38411
loss: 1.029948353767395,grad_norm: 0.9365397441812698, iteration: 38412
loss: 1.0159040689468384,grad_norm: 0.9208958680534945, iteration: 38413
loss: 1.044196605682373,grad_norm: 0.999999308195286, iteration: 38414
loss: 0.9921970963478088,grad_norm: 0.999999168838615, iteration: 38415
loss: 1.0003321170806885,grad_norm: 0.999999168674677, iteration: 38416
loss: 1.0357714891433716,grad_norm: 0.9701958763875811, iteration: 38417
loss: 0.9963693618774414,grad_norm: 0.9999991332433062, iteration: 38418
loss: 0.9955657124519348,grad_norm: 0.9999991185170065, iteration: 38419
loss: 1.010210633277893,grad_norm: 0.9263167081578556, iteration: 38420
loss: 1.011427879333496,grad_norm: 0.857680679782513, iteration: 38421
loss: 1.0341694355010986,grad_norm: 0.9289543837384556, iteration: 38422
loss: 0.9799922108650208,grad_norm: 0.9478114988549323, iteration: 38423
loss: 1.0313690900802612,grad_norm: 0.9999989490010055, iteration: 38424
loss: 1.081331491470337,grad_norm: 0.9999996925657124, iteration: 38425
loss: 1.0172626972198486,grad_norm: 0.9999994169589136, iteration: 38426
loss: 1.018891453742981,grad_norm: 0.9999990966073259, iteration: 38427
loss: 0.9925736784934998,grad_norm: 0.807208932214738, iteration: 38428
loss: 1.038040041923523,grad_norm: 0.9999992820183575, iteration: 38429
loss: 1.0179458856582642,grad_norm: 0.9624419920735888, iteration: 38430
loss: 1.0171716213226318,grad_norm: 0.8911478361561136, iteration: 38431
loss: 1.0006470680236816,grad_norm: 0.9966762624273253, iteration: 38432
loss: 1.0321662425994873,grad_norm: 0.9215731659304849, iteration: 38433
loss: 1.0016841888427734,grad_norm: 0.8768279125506533, iteration: 38434
loss: 0.9794384837150574,grad_norm: 0.9227866684945345, iteration: 38435
loss: 1.0159661769866943,grad_norm: 0.9907509301326364, iteration: 38436
loss: 0.9980866312980652,grad_norm: 0.9999991642252652, iteration: 38437
loss: 0.9742351174354553,grad_norm: 0.9999990198961118, iteration: 38438
loss: 1.0179353952407837,grad_norm: 0.9999991635969543, iteration: 38439
loss: 1.0881028175354004,grad_norm: 0.9999997218291489, iteration: 38440
loss: 1.0185974836349487,grad_norm: 0.9999991107887651, iteration: 38441
loss: 0.9935357570648193,grad_norm: 0.9999992197715059, iteration: 38442
loss: 1.0251097679138184,grad_norm: 0.9499510754350619, iteration: 38443
loss: 1.0189861059188843,grad_norm: 0.9999990783762085, iteration: 38444
loss: 1.0527106523513794,grad_norm: 0.9036817399424916, iteration: 38445
loss: 1.0121153593063354,grad_norm: 0.9999992949365175, iteration: 38446
loss: 1.029557466506958,grad_norm: 0.999999057220457, iteration: 38447
loss: 0.9970640540122986,grad_norm: 0.8037714823559517, iteration: 38448
loss: 1.0329325199127197,grad_norm: 0.9999991412884977, iteration: 38449
loss: 1.0080806016921997,grad_norm: 0.9999991258490724, iteration: 38450
loss: 0.969668447971344,grad_norm: 0.9429055343550794, iteration: 38451
loss: 0.9754236936569214,grad_norm: 0.9404554010968028, iteration: 38452
loss: 1.0274041891098022,grad_norm: 0.999999329841606, iteration: 38453
loss: 0.9871074557304382,grad_norm: 0.9999990710445563, iteration: 38454
loss: 0.968786358833313,grad_norm: 0.9021083575374694, iteration: 38455
loss: 0.9820538759231567,grad_norm: 0.8243428487754351, iteration: 38456
loss: 1.018082857131958,grad_norm: 0.960256838092813, iteration: 38457
loss: 0.9651070833206177,grad_norm: 0.9999992456583656, iteration: 38458
loss: 1.0445009469985962,grad_norm: 0.9999991457767226, iteration: 38459
loss: 1.041991949081421,grad_norm: 0.9999990988224009, iteration: 38460
loss: 0.9890352487564087,grad_norm: 0.999999183920932, iteration: 38461
loss: 1.0225468873977661,grad_norm: 0.9999992335622762, iteration: 38462
loss: 1.0028921365737915,grad_norm: 0.9486787658263424, iteration: 38463
loss: 1.0239322185516357,grad_norm: 0.9999990947256385, iteration: 38464
loss: 1.0292340517044067,grad_norm: 0.977640981176193, iteration: 38465
loss: 0.9872094392776489,grad_norm: 0.901552254093014, iteration: 38466
loss: 1.006656289100647,grad_norm: 0.9999989190168039, iteration: 38467
loss: 1.0066189765930176,grad_norm: 0.9894315464997814, iteration: 38468
loss: 0.9912376999855042,grad_norm: 0.7966692399179193, iteration: 38469
loss: 1.0217602252960205,grad_norm: 0.9799897687551132, iteration: 38470
loss: 0.979894757270813,grad_norm: 0.9999993633105765, iteration: 38471
loss: 0.9928061366081238,grad_norm: 0.9999991016304969, iteration: 38472
loss: 1.0389217138290405,grad_norm: 0.9999995035402296, iteration: 38473
loss: 0.992414116859436,grad_norm: 0.9999989835534779, iteration: 38474
loss: 1.0086778402328491,grad_norm: 0.9999996603171325, iteration: 38475
loss: 1.0506447553634644,grad_norm: 0.9999995615889221, iteration: 38476
loss: 0.9815219640731812,grad_norm: 0.9999991053697292, iteration: 38477
loss: 0.9852675199508667,grad_norm: 0.9463834983583082, iteration: 38478
loss: 1.0668082237243652,grad_norm: 0.9999992118424098, iteration: 38479
loss: 1.0507495403289795,grad_norm: 0.9999992218114321, iteration: 38480
loss: 1.0120147466659546,grad_norm: 0.8573045314731732, iteration: 38481
loss: 1.0050578117370605,grad_norm: 0.9236467774840564, iteration: 38482
loss: 1.0294848680496216,grad_norm: 0.9999992163828001, iteration: 38483
loss: 1.0093846321105957,grad_norm: 0.9999992019542464, iteration: 38484
loss: 1.0007933378219604,grad_norm: 0.851305473706368, iteration: 38485
loss: 1.0127826929092407,grad_norm: 0.9999992979179431, iteration: 38486
loss: 0.9989638924598694,grad_norm: 0.9841335892085112, iteration: 38487
loss: 1.0833066701889038,grad_norm: 0.9999991978659587, iteration: 38488
loss: 0.9787668585777283,grad_norm: 0.9999991929661111, iteration: 38489
loss: 1.0195567607879639,grad_norm: 0.999998951851574, iteration: 38490
loss: 0.9756725430488586,grad_norm: 0.9779540655504755, iteration: 38491
loss: 1.0452146530151367,grad_norm: 0.9999991245662935, iteration: 38492
loss: 1.0217373371124268,grad_norm: 0.903759055130608, iteration: 38493
loss: 0.98931884765625,grad_norm: 0.9999992000617564, iteration: 38494
loss: 1.0018881559371948,grad_norm: 0.9999991203191356, iteration: 38495
loss: 1.0341410636901855,grad_norm: 0.8117705939587687, iteration: 38496
loss: 1.000862717628479,grad_norm: 0.9999991737894323, iteration: 38497
loss: 0.9697076082229614,grad_norm: 0.9999990476280183, iteration: 38498
loss: 0.9541986584663391,grad_norm: 0.8968689149323872, iteration: 38499
loss: 1.0077346563339233,grad_norm: 0.9999990500912841, iteration: 38500
loss: 0.9899246692657471,grad_norm: 0.9999992343306642, iteration: 38501
loss: 0.9748196601867676,grad_norm: 0.9999990312530416, iteration: 38502
loss: 0.9824332594871521,grad_norm: 0.9999993017942621, iteration: 38503
loss: 0.9782973527908325,grad_norm: 0.9869478645095923, iteration: 38504
loss: 0.9703434109687805,grad_norm: 0.9132913527341768, iteration: 38505
loss: 1.0246978998184204,grad_norm: 0.999999444826973, iteration: 38506
loss: 1.0280632972717285,grad_norm: 0.9583611936717251, iteration: 38507
loss: 0.993141233921051,grad_norm: 0.8529323440905336, iteration: 38508
loss: 1.0281883478164673,grad_norm: 0.999999191447529, iteration: 38509
loss: 1.0062826871871948,grad_norm: 0.9999991835141805, iteration: 38510
loss: 1.0377074480056763,grad_norm: 0.9999993396322466, iteration: 38511
loss: 0.9765709638595581,grad_norm: 0.9019116575165513, iteration: 38512
loss: 0.9773120880126953,grad_norm: 0.9999993246830263, iteration: 38513
loss: 0.9756746292114258,grad_norm: 0.9297990968290003, iteration: 38514
loss: 1.021056890487671,grad_norm: 0.9999991131391388, iteration: 38515
loss: 1.0081692934036255,grad_norm: 0.9070008155309558, iteration: 38516
loss: 0.9885404706001282,grad_norm: 0.8777026259249984, iteration: 38517
loss: 1.0082638263702393,grad_norm: 0.9803217042384222, iteration: 38518
loss: 0.9730672836303711,grad_norm: 0.9238222607513336, iteration: 38519
loss: 0.9950888156890869,grad_norm: 0.9625342944103114, iteration: 38520
loss: 0.9850031733512878,grad_norm: 0.826030413391855, iteration: 38521
loss: 1.1029348373413086,grad_norm: 0.9999993506309354, iteration: 38522
loss: 1.0182058811187744,grad_norm: 0.9584963576643262, iteration: 38523
loss: 0.9948278069496155,grad_norm: 0.9999990734652455, iteration: 38524
loss: 1.0201627016067505,grad_norm: 0.9999998234981841, iteration: 38525
loss: 1.029423475265503,grad_norm: 0.9999991158095766, iteration: 38526
loss: 1.0239319801330566,grad_norm: 0.9999990182053421, iteration: 38527
loss: 1.0308964252471924,grad_norm: 0.9999996649503754, iteration: 38528
loss: 0.9790272116661072,grad_norm: 0.9065820694334833, iteration: 38529
loss: 1.0298019647598267,grad_norm: 0.9999992395129573, iteration: 38530
loss: 0.9952123165130615,grad_norm: 0.9999995276566501, iteration: 38531
loss: 1.0342484712600708,grad_norm: 0.9229842799783119, iteration: 38532
loss: 0.988226056098938,grad_norm: 0.8594618857186728, iteration: 38533
loss: 1.0463552474975586,grad_norm: 0.9999991183857645, iteration: 38534
loss: 1.014270544052124,grad_norm: 0.9999990719808162, iteration: 38535
loss: 1.0502527952194214,grad_norm: 0.9999992275137737, iteration: 38536
loss: 1.0420188903808594,grad_norm: 0.9999992237472887, iteration: 38537
loss: 0.9966613054275513,grad_norm: 0.9058122307138056, iteration: 38538
loss: 1.0097805261611938,grad_norm: 0.9564885110504666, iteration: 38539
loss: 1.0165613889694214,grad_norm: 0.999999160011954, iteration: 38540
loss: 0.9764065742492676,grad_norm: 0.999999470573656, iteration: 38541
loss: 0.9676083922386169,grad_norm: 0.9479180011873191, iteration: 38542
loss: 1.0400609970092773,grad_norm: 0.9999993835917702, iteration: 38543
loss: 0.9951351881027222,grad_norm: 0.878922225555453, iteration: 38544
loss: 0.9934340715408325,grad_norm: 0.9135086184300407, iteration: 38545
loss: 0.9913524985313416,grad_norm: 0.8519283704140185, iteration: 38546
loss: 1.0152361392974854,grad_norm: 0.9960873751182268, iteration: 38547
loss: 0.9647737145423889,grad_norm: 0.999999058785505, iteration: 38548
loss: 1.0216810703277588,grad_norm: 0.9999992375617199, iteration: 38549
loss: 1.028201937675476,grad_norm: 0.9999990646935979, iteration: 38550
loss: 0.9920927286148071,grad_norm: 0.9999991040626537, iteration: 38551
loss: 0.9849634170532227,grad_norm: 0.9999992350698454, iteration: 38552
loss: 1.0141054391860962,grad_norm: 0.9999990048423524, iteration: 38553
loss: 1.0129332542419434,grad_norm: 0.9951036107511113, iteration: 38554
loss: 1.033118486404419,grad_norm: 0.9610578402923088, iteration: 38555
loss: 1.022049903869629,grad_norm: 0.8491954087137483, iteration: 38556
loss: 1.0378742218017578,grad_norm: 0.9999991500498844, iteration: 38557
loss: 1.0121005773544312,grad_norm: 0.9107011465450384, iteration: 38558
loss: 1.016583800315857,grad_norm: 0.9084623011327122, iteration: 38559
loss: 0.9887815117835999,grad_norm: 0.9849392748531672, iteration: 38560
loss: 1.0091464519500732,grad_norm: 0.9074042609392047, iteration: 38561
loss: 1.0239496231079102,grad_norm: 0.999999176422813, iteration: 38562
loss: 0.9930511116981506,grad_norm: 0.9999991222921331, iteration: 38563
loss: 0.9890967011451721,grad_norm: 0.9308430801665397, iteration: 38564
loss: 0.987880527973175,grad_norm: 0.9123470509951234, iteration: 38565
loss: 1.013800859451294,grad_norm: 0.999999292787844, iteration: 38566
loss: 1.0148375034332275,grad_norm: 0.9999992568747328, iteration: 38567
loss: 0.9851732850074768,grad_norm: 0.9999991397473916, iteration: 38568
loss: 0.9822549223899841,grad_norm: 0.9999989832602402, iteration: 38569
loss: 1.0003736019134521,grad_norm: 0.951595150398939, iteration: 38570
loss: 1.0126670598983765,grad_norm: 0.9844813641696362, iteration: 38571
loss: 1.0345935821533203,grad_norm: 0.9999997056578226, iteration: 38572
loss: 1.0262219905853271,grad_norm: 0.9999990822488015, iteration: 38573
loss: 1.004473090171814,grad_norm: 0.9999990676305263, iteration: 38574
loss: 1.0853772163391113,grad_norm: 0.9999992747742484, iteration: 38575
loss: 1.0355122089385986,grad_norm: 0.9062484584028682, iteration: 38576
loss: 0.9917671084403992,grad_norm: 0.9955437099292865, iteration: 38577
loss: 1.0064817667007446,grad_norm: 0.9374599929418607, iteration: 38578
loss: 1.0201878547668457,grad_norm: 0.999999038714312, iteration: 38579
loss: 0.9957081079483032,grad_norm: 0.9999995272176863, iteration: 38580
loss: 1.001198410987854,grad_norm: 0.8190304536444748, iteration: 38581
loss: 1.0498536825180054,grad_norm: 0.9999990964205691, iteration: 38582
loss: 1.0121488571166992,grad_norm: 0.8549748531979924, iteration: 38583
loss: 0.9903537034988403,grad_norm: 0.9999996606310967, iteration: 38584
loss: 0.9537990689277649,grad_norm: 0.999999279730515, iteration: 38585
loss: 1.007143497467041,grad_norm: 0.999999169322391, iteration: 38586
loss: 1.0153917074203491,grad_norm: 0.983565309735884, iteration: 38587
loss: 1.0181108713150024,grad_norm: 0.8387165519367276, iteration: 38588
loss: 0.9729543328285217,grad_norm: 0.8585749819984433, iteration: 38589
loss: 0.9787338972091675,grad_norm: 0.9946483938738512, iteration: 38590
loss: 1.016318917274475,grad_norm: 0.9999991357436896, iteration: 38591
loss: 1.0977344512939453,grad_norm: 0.9999995317433759, iteration: 38592
loss: 1.020706057548523,grad_norm: 0.9999991372932625, iteration: 38593
loss: 0.9745787382125854,grad_norm: 0.9999989761565486, iteration: 38594
loss: 1.008266568183899,grad_norm: 0.9724518191190853, iteration: 38595
loss: 1.0090069770812988,grad_norm: 0.9999990249026018, iteration: 38596
loss: 1.0030725002288818,grad_norm: 0.9999990770971956, iteration: 38597
loss: 1.0101451873779297,grad_norm: 0.9999989447065742, iteration: 38598
loss: 1.1507318019866943,grad_norm: 0.999999859922542, iteration: 38599
loss: 1.0197566747665405,grad_norm: 0.9613143152147593, iteration: 38600
loss: 1.0048723220825195,grad_norm: 0.9914523550507391, iteration: 38601
loss: 1.0180960893630981,grad_norm: 0.9999991103967837, iteration: 38602
loss: 1.0148251056671143,grad_norm: 0.999999185770583, iteration: 38603
loss: 0.9853675961494446,grad_norm: 0.9460922903580338, iteration: 38604
loss: 1.0312626361846924,grad_norm: 0.9113707945889603, iteration: 38605
loss: 1.0009981393814087,grad_norm: 0.8460303244835106, iteration: 38606
loss: 1.0278589725494385,grad_norm: 0.9999992871495157, iteration: 38607
loss: 0.952863335609436,grad_norm: 0.9515220609322921, iteration: 38608
loss: 0.9983384609222412,grad_norm: 0.9999990513077006, iteration: 38609
loss: 0.9849740266799927,grad_norm: 0.9999992468587903, iteration: 38610
loss: 0.9901105761528015,grad_norm: 0.9999991702383556, iteration: 38611
loss: 1.0208935737609863,grad_norm: 0.9999989565328046, iteration: 38612
loss: 0.9718378186225891,grad_norm: 0.9999990818356094, iteration: 38613
loss: 0.9701544046401978,grad_norm: 0.9999993612428193, iteration: 38614
loss: 0.9971562623977661,grad_norm: 0.9364597540346172, iteration: 38615
loss: 0.9808876514434814,grad_norm: 0.8892651167671297, iteration: 38616
loss: 1.0431417226791382,grad_norm: 0.9104751822687825, iteration: 38617
loss: 0.9682700634002686,grad_norm: 0.9679522746819174, iteration: 38618
loss: 0.97945636510849,grad_norm: 0.8020789233771997, iteration: 38619
loss: 1.014971137046814,grad_norm: 0.9999998677846146, iteration: 38620
loss: 1.0258163213729858,grad_norm: 0.9999995906552618, iteration: 38621
loss: 1.0318299531936646,grad_norm: 0.9078962999059532, iteration: 38622
loss: 1.0194555521011353,grad_norm: 0.9808145781715282, iteration: 38623
loss: 0.9968360662460327,grad_norm: 0.9999992284039595, iteration: 38624
loss: 1.0008546113967896,grad_norm: 0.7786470426782606, iteration: 38625
loss: 1.0662963390350342,grad_norm: 0.9999993129716002, iteration: 38626
loss: 0.9995633363723755,grad_norm: 0.9999990514750353, iteration: 38627
loss: 1.0198004245758057,grad_norm: 0.9999991400553966, iteration: 38628
loss: 1.022557258605957,grad_norm: 0.9999992539282664, iteration: 38629
loss: 1.0076905488967896,grad_norm: 0.901950639170767, iteration: 38630
loss: 1.0456770658493042,grad_norm: 0.9999992036720388, iteration: 38631
loss: 0.9734411239624023,grad_norm: 0.9481581582313963, iteration: 38632
loss: 0.9993448257446289,grad_norm: 0.9746826334871015, iteration: 38633
loss: 0.9763933420181274,grad_norm: 0.9966030743112126, iteration: 38634
loss: 0.9908298850059509,grad_norm: 0.8178254321506533, iteration: 38635
loss: 1.0046401023864746,grad_norm: 0.9076393421223082, iteration: 38636
loss: 1.0030155181884766,grad_norm: 0.9845792379954993, iteration: 38637
loss: 0.9594914317131042,grad_norm: 0.9009092380582487, iteration: 38638
loss: 0.9907506108283997,grad_norm: 0.9999989288866726, iteration: 38639
loss: 1.0449318885803223,grad_norm: 0.9999993171225521, iteration: 38640
loss: 1.0497026443481445,grad_norm: 0.8346796232430845, iteration: 38641
loss: 0.993118166923523,grad_norm: 0.9999990102505513, iteration: 38642
loss: 1.0153148174285889,grad_norm: 0.9682883687343462, iteration: 38643
loss: 1.0123121738433838,grad_norm: 0.9999991404425542, iteration: 38644
loss: 1.0121915340423584,grad_norm: 0.99999914871471, iteration: 38645
loss: 1.0063776969909668,grad_norm: 0.9999996796939011, iteration: 38646
loss: 1.0384992361068726,grad_norm: 0.9536887453494418, iteration: 38647
loss: 1.0535204410552979,grad_norm: 0.9542077939455279, iteration: 38648
loss: 1.0121477842330933,grad_norm: 0.8126359718617603, iteration: 38649
loss: 1.0292620658874512,grad_norm: 0.9999991827146009, iteration: 38650
loss: 0.9962921738624573,grad_norm: 0.9999992207603904, iteration: 38651
loss: 1.0231904983520508,grad_norm: 0.9999990808202098, iteration: 38652
loss: 0.9915300607681274,grad_norm: 0.9999991382570668, iteration: 38653
loss: 1.0149005651474,grad_norm: 0.9999993046672742, iteration: 38654
loss: 0.9996181726455688,grad_norm: 0.9999990816647596, iteration: 38655
loss: 0.9771283864974976,grad_norm: 0.9309243454259205, iteration: 38656
loss: 0.972604513168335,grad_norm: 0.9999989233562893, iteration: 38657
loss: 1.0115387439727783,grad_norm: 0.9178205817343864, iteration: 38658
loss: 0.987302839756012,grad_norm: 0.9878731725902508, iteration: 38659
loss: 0.9991357326507568,grad_norm: 0.9761583507711266, iteration: 38660
loss: 0.9924757480621338,grad_norm: 0.9999992749566368, iteration: 38661
loss: 0.9705312252044678,grad_norm: 0.9510764486395008, iteration: 38662
loss: 1.0414177179336548,grad_norm: 0.9999991579950227, iteration: 38663
loss: 1.0157915353775024,grad_norm: 0.9999992534246377, iteration: 38664
loss: 1.0101313591003418,grad_norm: 0.9765514095520437, iteration: 38665
loss: 1.0286654233932495,grad_norm: 0.9999992925521942, iteration: 38666
loss: 1.0254426002502441,grad_norm: 0.9999995313904234, iteration: 38667
loss: 1.0241937637329102,grad_norm: 0.9999991800328445, iteration: 38668
loss: 1.0194058418273926,grad_norm: 0.9999989622107849, iteration: 38669
loss: 0.9826605916023254,grad_norm: 0.9550530249942626, iteration: 38670
loss: 1.0363638401031494,grad_norm: 0.8900015008078377, iteration: 38671
loss: 1.0549439191818237,grad_norm: 0.9999996803872229, iteration: 38672
loss: 0.9845578074455261,grad_norm: 0.9999991814991815, iteration: 38673
loss: 1.0024514198303223,grad_norm: 0.9999991795613662, iteration: 38674
loss: 0.9914957284927368,grad_norm: 0.9999991603063092, iteration: 38675
loss: 0.98112553358078,grad_norm: 0.9999990960537657, iteration: 38676
loss: 1.0009363889694214,grad_norm: 0.9010943629713429, iteration: 38677
loss: 0.9529832005500793,grad_norm: 0.897979609709266, iteration: 38678
loss: 1.0043710470199585,grad_norm: 0.9999993168004082, iteration: 38679
loss: 1.0177959203720093,grad_norm: 0.8529811733623448, iteration: 38680
loss: 1.0523028373718262,grad_norm: 0.9999994856357161, iteration: 38681
loss: 1.0165979862213135,grad_norm: 0.9067679306765111, iteration: 38682
loss: 1.035691499710083,grad_norm: 0.999999183044945, iteration: 38683
loss: 1.0437439680099487,grad_norm: 0.9222273231219228, iteration: 38684
loss: 0.9887664914131165,grad_norm: 0.9999992249484229, iteration: 38685
loss: 0.985351026058197,grad_norm: 0.9999990294218344, iteration: 38686
loss: 0.9971527457237244,grad_norm: 0.900356340610324, iteration: 38687
loss: 1.0099929571151733,grad_norm: 0.8569532844334543, iteration: 38688
loss: 0.9954577088356018,grad_norm: 0.9999993975945033, iteration: 38689
loss: 0.9863667488098145,grad_norm: 0.8083975695331622, iteration: 38690
loss: 0.964213490486145,grad_norm: 0.9012763044601579, iteration: 38691
loss: 1.0075653791427612,grad_norm: 0.999999032195957, iteration: 38692
loss: 0.9916481375694275,grad_norm: 0.8649176654859155, iteration: 38693
loss: 1.0520890951156616,grad_norm: 0.9999996176001255, iteration: 38694
loss: 1.0191372632980347,grad_norm: 0.8847355457742412, iteration: 38695
loss: 0.9974235892295837,grad_norm: 0.9999990478155057, iteration: 38696
loss: 0.9803798794746399,grad_norm: 0.983622312971129, iteration: 38697
loss: 1.0138863325119019,grad_norm: 0.9809975816325561, iteration: 38698
loss: 0.9871983528137207,grad_norm: 0.9999990930201264, iteration: 38699
loss: 0.9779762625694275,grad_norm: 0.8429054065835623, iteration: 38700
loss: 0.9470770955085754,grad_norm: 0.8956586383239307, iteration: 38701
loss: 1.0334370136260986,grad_norm: 0.9999991631493772, iteration: 38702
loss: 0.9697795510292053,grad_norm: 0.9939333160912596, iteration: 38703
loss: 1.042244791984558,grad_norm: 0.9999992748584261, iteration: 38704
loss: 1.057999610900879,grad_norm: 0.9192352049662214, iteration: 38705
loss: 1.0180567502975464,grad_norm: 0.8627310715929768, iteration: 38706
loss: 1.0029127597808838,grad_norm: 0.8214384817114647, iteration: 38707
loss: 1.0030934810638428,grad_norm: 0.7867519132185526, iteration: 38708
loss: 1.020235300064087,grad_norm: 0.9156143661145896, iteration: 38709
loss: 1.0094366073608398,grad_norm: 0.9999992723894636, iteration: 38710
loss: 1.0080482959747314,grad_norm: 0.9531097075624108, iteration: 38711
loss: 0.986942708492279,grad_norm: 0.9999992863612702, iteration: 38712
loss: 1.01749849319458,grad_norm: 0.999999021879379, iteration: 38713
loss: 1.0118619203567505,grad_norm: 0.8664752709353496, iteration: 38714
loss: 1.0743632316589355,grad_norm: 0.9999991135924857, iteration: 38715
loss: 0.9981691837310791,grad_norm: 0.9589834998414024, iteration: 38716
loss: 0.9746332764625549,grad_norm: 0.999999245706674, iteration: 38717
loss: 1.024735450744629,grad_norm: 0.9999991132100079, iteration: 38718
loss: 0.9810401201248169,grad_norm: 0.9999991802656015, iteration: 38719
loss: 0.9854329824447632,grad_norm: 0.8568210651617402, iteration: 38720
loss: 0.9826711416244507,grad_norm: 0.862471016737838, iteration: 38721
loss: 1.0113725662231445,grad_norm: 0.9999990805799356, iteration: 38722
loss: 1.0249855518341064,grad_norm: 0.948334818757688, iteration: 38723
loss: 0.9840437769889832,grad_norm: 0.9770349600436409, iteration: 38724
loss: 1.0063046216964722,grad_norm: 0.8855957356900188, iteration: 38725
loss: 0.969416081905365,grad_norm: 0.9980051263523809, iteration: 38726
loss: 1.0046578645706177,grad_norm: 0.9999991782656952, iteration: 38727
loss: 1.0088850259780884,grad_norm: 0.9937534045315156, iteration: 38728
loss: 0.9924168586730957,grad_norm: 0.9999993807634534, iteration: 38729
loss: 0.9918841123580933,grad_norm: 0.9999990042822441, iteration: 38730
loss: 1.0712542533874512,grad_norm: 0.939937301145939, iteration: 38731
loss: 0.9768422842025757,grad_norm: 0.9999990873012699, iteration: 38732
loss: 1.010059118270874,grad_norm: 0.9726576771015049, iteration: 38733
loss: 1.00730562210083,grad_norm: 0.9856103056854721, iteration: 38734
loss: 0.9731083512306213,grad_norm: 0.9516874747110516, iteration: 38735
loss: 1.014182448387146,grad_norm: 0.9999995945450095, iteration: 38736
loss: 0.9981662034988403,grad_norm: 0.9954483335844917, iteration: 38737
loss: 0.9854611754417419,grad_norm: 0.9999992953577092, iteration: 38738
loss: 1.0033153295516968,grad_norm: 0.8753981512758674, iteration: 38739
loss: 1.0358554124832153,grad_norm: 0.8269812195675634, iteration: 38740
loss: 1.0187932252883911,grad_norm: 0.8694532269283137, iteration: 38741
loss: 1.0173436403274536,grad_norm: 0.9427537176149203, iteration: 38742
loss: 1.0047210454940796,grad_norm: 0.8442447417631239, iteration: 38743
loss: 0.9892060160636902,grad_norm: 0.9001646797462511, iteration: 38744
loss: 1.0296785831451416,grad_norm: 0.8897409528329284, iteration: 38745
loss: 0.9802151322364807,grad_norm: 0.9999990855742344, iteration: 38746
loss: 0.9942861199378967,grad_norm: 0.9999991528522991, iteration: 38747
loss: 1.0170978307724,grad_norm: 0.986637629484878, iteration: 38748
loss: 1.02935791015625,grad_norm: 0.9905780937161375, iteration: 38749
loss: 0.9864059686660767,grad_norm: 0.9401089947132396, iteration: 38750
loss: 1.020235538482666,grad_norm: 0.952097544547468, iteration: 38751
loss: 1.0178743600845337,grad_norm: 0.9999990646103624, iteration: 38752
loss: 1.0214704275131226,grad_norm: 0.9112485620559935, iteration: 38753
loss: 1.0356172323226929,grad_norm: 0.9860783671936216, iteration: 38754
loss: 0.9964486360549927,grad_norm: 0.8112572694472909, iteration: 38755
loss: 1.0094531774520874,grad_norm: 0.9999990459994754, iteration: 38756
loss: 1.0273535251617432,grad_norm: 0.9723785265920444, iteration: 38757
loss: 0.9957669973373413,grad_norm: 0.8942732136646606, iteration: 38758
loss: 1.0239479541778564,grad_norm: 0.8964158023481343, iteration: 38759
loss: 0.9874745607376099,grad_norm: 0.8017659443408889, iteration: 38760
loss: 1.0074644088745117,grad_norm: 0.8992362284388229, iteration: 38761
loss: 0.9857000112533569,grad_norm: 0.9999992428878538, iteration: 38762
loss: 1.0309251546859741,grad_norm: 0.9999998395269313, iteration: 38763
loss: 0.9946697950363159,grad_norm: 0.9999991591126054, iteration: 38764
loss: 0.9750245213508606,grad_norm: 0.9999990283450868, iteration: 38765
loss: 1.0289286375045776,grad_norm: 0.9999991221282577, iteration: 38766
loss: 0.9939733743667603,grad_norm: 0.9292858524547944, iteration: 38767
loss: 1.0306419134140015,grad_norm: 0.9999990422636686, iteration: 38768
loss: 0.9783201217651367,grad_norm: 0.960494152800044, iteration: 38769
loss: 0.9817022681236267,grad_norm: 0.9999990141166565, iteration: 38770
loss: 1.0137505531311035,grad_norm: 0.9999990995484912, iteration: 38771
loss: 0.9754801988601685,grad_norm: 0.9151734407311103, iteration: 38772
loss: 0.9726312160491943,grad_norm: 0.9999992161300874, iteration: 38773
loss: 1.0456968545913696,grad_norm: 0.9999994529789666, iteration: 38774
loss: 1.0392073392868042,grad_norm: 0.9578486180150162, iteration: 38775
loss: 0.9761873483657837,grad_norm: 0.9999990687159409, iteration: 38776
loss: 0.982830822467804,grad_norm: 0.9152124540352806, iteration: 38777
loss: 1.0047193765640259,grad_norm: 0.9999991921579943, iteration: 38778
loss: 0.9847001433372498,grad_norm: 0.9729162215930374, iteration: 38779
loss: 0.9754928946495056,grad_norm: 0.9999999246598641, iteration: 38780
loss: 1.0247271060943604,grad_norm: 0.9076971107808762, iteration: 38781
loss: 1.0203973054885864,grad_norm: 0.919838362157354, iteration: 38782
loss: 1.007184624671936,grad_norm: 0.9735050214829377, iteration: 38783
loss: 0.9766872525215149,grad_norm: 0.9999991932192246, iteration: 38784
loss: 1.0172573328018188,grad_norm: 0.8809856833649655, iteration: 38785
loss: 0.9976766705513,grad_norm: 0.8604681687568739, iteration: 38786
loss: 0.9707288146018982,grad_norm: 0.9999990460001861, iteration: 38787
loss: 1.0347027778625488,grad_norm: 0.9999991104205593, iteration: 38788
loss: 0.9740663170814514,grad_norm: 0.9999991100856923, iteration: 38789
loss: 1.0067843198776245,grad_norm: 0.9999991080846414, iteration: 38790
loss: 0.9954061508178711,grad_norm: 0.999998974325445, iteration: 38791
loss: 0.9828711748123169,grad_norm: 0.9858451780172682, iteration: 38792
loss: 0.9961210489273071,grad_norm: 0.9999991685393016, iteration: 38793
loss: 0.9890539646148682,grad_norm: 0.9999992740079928, iteration: 38794
loss: 1.0890930891036987,grad_norm: 0.9999993552520734, iteration: 38795
loss: 0.9838076829910278,grad_norm: 0.999999085201113, iteration: 38796
loss: 0.9783573150634766,grad_norm: 0.9936567251211643, iteration: 38797
loss: 0.9767496585845947,grad_norm: 0.8626987051707988, iteration: 38798
loss: 1.0107908248901367,grad_norm: 0.9999990347753909, iteration: 38799
loss: 0.9914785623550415,grad_norm: 0.9223022287825712, iteration: 38800
loss: 1.0331069231033325,grad_norm: 0.9999992348954753, iteration: 38801
loss: 1.0327814817428589,grad_norm: 0.8413162330244605, iteration: 38802
loss: 0.9984555244445801,grad_norm: 0.9999992770484751, iteration: 38803
loss: 1.0155305862426758,grad_norm: 0.9999991154011157, iteration: 38804
loss: 0.9981759190559387,grad_norm: 0.9999990683825438, iteration: 38805
loss: 0.9687042236328125,grad_norm: 0.958232357306176, iteration: 38806
loss: 0.9969738125801086,grad_norm: 0.9999992564754776, iteration: 38807
loss: 1.0055456161499023,grad_norm: 0.999999036357623, iteration: 38808
loss: 1.0073764324188232,grad_norm: 0.8920408671988531, iteration: 38809
loss: 1.0433117151260376,grad_norm: 0.9999990924581699, iteration: 38810
loss: 1.0130305290222168,grad_norm: 0.8994888262414149, iteration: 38811
loss: 1.0319132804870605,grad_norm: 0.9544999167657172, iteration: 38812
loss: 1.0029274225234985,grad_norm: 0.9999998856090689, iteration: 38813
loss: 0.9903595447540283,grad_norm: 0.9999991497213943, iteration: 38814
loss: 1.0249848365783691,grad_norm: 0.9999990125607143, iteration: 38815
loss: 1.016603708267212,grad_norm: 0.9195186886150657, iteration: 38816
loss: 1.0184569358825684,grad_norm: 0.8985544069093613, iteration: 38817
loss: 0.9981393814086914,grad_norm: 0.9999990698535592, iteration: 38818
loss: 1.04935884475708,grad_norm: 0.9213667713682835, iteration: 38819
loss: 1.0982908010482788,grad_norm: 0.9999993412947124, iteration: 38820
loss: 1.0229268074035645,grad_norm: 0.999999180662439, iteration: 38821
loss: 1.0429291725158691,grad_norm: 0.9999990213142479, iteration: 38822
loss: 0.9820789098739624,grad_norm: 0.9131002756478237, iteration: 38823
loss: 0.972724974155426,grad_norm: 0.9115854114263722, iteration: 38824
loss: 0.9989638924598694,grad_norm: 0.9999992096506392, iteration: 38825
loss: 0.9974707365036011,grad_norm: 0.9999992327457579, iteration: 38826
loss: 1.0209449529647827,grad_norm: 0.9999991843587495, iteration: 38827
loss: 1.0328582525253296,grad_norm: 0.891307322943034, iteration: 38828
loss: 1.0305068492889404,grad_norm: 0.9999991608084873, iteration: 38829
loss: 0.9665416479110718,grad_norm: 0.9464869953507591, iteration: 38830
loss: 1.0000596046447754,grad_norm: 0.9999991276428808, iteration: 38831
loss: 1.0056239366531372,grad_norm: 0.9999999036705006, iteration: 38832
loss: 0.9820732474327087,grad_norm: 0.8019301385674684, iteration: 38833
loss: 1.0393625497817993,grad_norm: 0.9999997893165995, iteration: 38834
loss: 0.9978764653205872,grad_norm: 0.9999990145983892, iteration: 38835
loss: 1.0098851919174194,grad_norm: 0.9267132464070497, iteration: 38836
loss: 0.9949347972869873,grad_norm: 0.879103984288449, iteration: 38837
loss: 0.9912431240081787,grad_norm: 0.9999995339625084, iteration: 38838
loss: 0.9938667416572571,grad_norm: 0.997267509286173, iteration: 38839
loss: 1.026197075843811,grad_norm: 0.9999993095212169, iteration: 38840
loss: 1.0250957012176514,grad_norm: 0.9999993866638532, iteration: 38841
loss: 1.0239810943603516,grad_norm: 0.9999994892817861, iteration: 38842
loss: 1.035068392753601,grad_norm: 0.817434652884203, iteration: 38843
loss: 1.0040323734283447,grad_norm: 0.9999991161736818, iteration: 38844
loss: 1.0061930418014526,grad_norm: 0.9999990877441435, iteration: 38845
loss: 1.0019376277923584,grad_norm: 0.9999990962603541, iteration: 38846
loss: 0.9862979650497437,grad_norm: 0.9975690496809332, iteration: 38847
loss: 1.0224155187606812,grad_norm: 0.999999865317078, iteration: 38848
loss: 1.0125300884246826,grad_norm: 0.8754473367696877, iteration: 38849
loss: 0.9992231130599976,grad_norm: 0.9963684918972162, iteration: 38850
loss: 1.0203959941864014,grad_norm: 0.9918960523733802, iteration: 38851
loss: 1.0043317079544067,grad_norm: 0.991805310885562, iteration: 38852
loss: 1.0282037258148193,grad_norm: 0.9999989704082538, iteration: 38853
loss: 0.9933536648750305,grad_norm: 0.9999991066848152, iteration: 38854
loss: 0.9678342342376709,grad_norm: 0.8156619795043142, iteration: 38855
loss: 0.9795750379562378,grad_norm: 0.8985321039766581, iteration: 38856
loss: 0.9891243577003479,grad_norm: 0.9999993408879391, iteration: 38857
loss: 1.0364875793457031,grad_norm: 0.9999990017564274, iteration: 38858
loss: 1.0179473161697388,grad_norm: 0.9999993429412808, iteration: 38859
loss: 0.9796538949012756,grad_norm: 0.999999095067266, iteration: 38860
loss: 1.0275790691375732,grad_norm: 0.9999995699632234, iteration: 38861
loss: 0.9940441846847534,grad_norm: 0.9999991340353841, iteration: 38862
loss: 1.076169490814209,grad_norm: 0.9999993990321033, iteration: 38863
loss: 1.052317500114441,grad_norm: 0.9478338887894581, iteration: 38864
loss: 1.0127582550048828,grad_norm: 0.9999991807765244, iteration: 38865
loss: 0.9886118173599243,grad_norm: 0.7958316375513941, iteration: 38866
loss: 0.9966028332710266,grad_norm: 0.9033643615659289, iteration: 38867
loss: 1.0260790586471558,grad_norm: 0.9999994165027394, iteration: 38868
loss: 1.0732637643814087,grad_norm: 0.9999991662904995, iteration: 38869
loss: 1.015587568283081,grad_norm: 0.9999996066791623, iteration: 38870
loss: 1.0144251585006714,grad_norm: 0.999999415895968, iteration: 38871
loss: 0.9904316663742065,grad_norm: 0.9670323075964062, iteration: 38872
loss: 0.9854264855384827,grad_norm: 0.995335812727227, iteration: 38873
loss: 1.061268925666809,grad_norm: 0.9999999429456032, iteration: 38874
loss: 1.026384949684143,grad_norm: 0.9999991195681571, iteration: 38875
loss: 0.9873276352882385,grad_norm: 0.972081973481426, iteration: 38876
loss: 1.008651852607727,grad_norm: 0.99999923097892, iteration: 38877
loss: 1.0632113218307495,grad_norm: 0.9999993126540943, iteration: 38878
loss: 0.9807630181312561,grad_norm: 0.8816056633658174, iteration: 38879
loss: 1.0103687047958374,grad_norm: 0.9999995169848351, iteration: 38880
loss: 1.0019409656524658,grad_norm: 0.9999991154206835, iteration: 38881
loss: 1.0312751531600952,grad_norm: 0.9554559066310794, iteration: 38882
loss: 0.9934061169624329,grad_norm: 0.8715289593726031, iteration: 38883
loss: 0.9998817443847656,grad_norm: 0.9025271120448279, iteration: 38884
loss: 0.984215259552002,grad_norm: 0.97550112678789, iteration: 38885
loss: 1.0268303155899048,grad_norm: 0.9999993521410324, iteration: 38886
loss: 1.0436214208602905,grad_norm: 0.9999990283257231, iteration: 38887
loss: 1.0850648880004883,grad_norm: 0.9999993745407999, iteration: 38888
loss: 0.9767493009567261,grad_norm: 0.8726325218494942, iteration: 38889
loss: 1.0445901155471802,grad_norm: 0.8668280735277514, iteration: 38890
loss: 1.0263097286224365,grad_norm: 0.8122387933753532, iteration: 38891
loss: 0.9830400347709656,grad_norm: 0.958805888288017, iteration: 38892
loss: 1.037630319595337,grad_norm: 0.9999992012070369, iteration: 38893
loss: 1.0007051229476929,grad_norm: 0.9434790241853938, iteration: 38894
loss: 1.0192095041275024,grad_norm: 0.9463948723747235, iteration: 38895
loss: 1.0498988628387451,grad_norm: 0.9999991709536011, iteration: 38896
loss: 1.0293810367584229,grad_norm: 0.9999991330663512, iteration: 38897
loss: 1.0125137567520142,grad_norm: 0.9999989659444916, iteration: 38898
loss: 0.9888463616371155,grad_norm: 0.9012537945614394, iteration: 38899
loss: 0.9831693768501282,grad_norm: 0.9999991968345407, iteration: 38900
loss: 1.0122214555740356,grad_norm: 0.9334781594718754, iteration: 38901
loss: 0.9756567478179932,grad_norm: 0.9999992495960608, iteration: 38902
loss: 1.008302092552185,grad_norm: 0.9999991691911705, iteration: 38903
loss: 1.0092471837997437,grad_norm: 0.9999991709185102, iteration: 38904
loss: 1.0454914569854736,grad_norm: 0.9999996199001888, iteration: 38905
loss: 0.9774292707443237,grad_norm: 0.9754406201072332, iteration: 38906
loss: 1.0258148908615112,grad_norm: 0.9999995710129006, iteration: 38907
loss: 1.0495951175689697,grad_norm: 0.9999993887095403, iteration: 38908
loss: 1.0151793956756592,grad_norm: 0.9999990285755715, iteration: 38909
loss: 1.0139048099517822,grad_norm: 0.9449190588655405, iteration: 38910
loss: 0.9853775501251221,grad_norm: 0.9999992198211487, iteration: 38911
loss: 0.9906806349754333,grad_norm: 0.7637146242237408, iteration: 38912
loss: 0.9726352095603943,grad_norm: 0.9042058902817938, iteration: 38913
loss: 1.0075753927230835,grad_norm: 0.9999992949798688, iteration: 38914
loss: 0.9921471476554871,grad_norm: 0.9027942277761344, iteration: 38915
loss: 0.9808554649353027,grad_norm: 0.8618994841676438, iteration: 38916
loss: 0.9851697087287903,grad_norm: 0.9999990983252212, iteration: 38917
loss: 1.0117970705032349,grad_norm: 0.9947342499617282, iteration: 38918
loss: 0.9858566522598267,grad_norm: 0.8943914382880818, iteration: 38919
loss: 1.0128129720687866,grad_norm: 0.8188019456459957, iteration: 38920
loss: 1.0275051593780518,grad_norm: 0.9567691948539856, iteration: 38921
loss: 1.0138078927993774,grad_norm: 0.9999991915232196, iteration: 38922
loss: 1.0307492017745972,grad_norm: 0.9658579871893711, iteration: 38923
loss: 1.0158783197402954,grad_norm: 0.9999989842507321, iteration: 38924
loss: 1.03515625,grad_norm: 0.8745787159063064, iteration: 38925
loss: 1.0279685258865356,grad_norm: 0.999999669769098, iteration: 38926
loss: 1.0382351875305176,grad_norm: 0.999999270945064, iteration: 38927
loss: 1.034706473350525,grad_norm: 0.9999990616783554, iteration: 38928
loss: 0.9593657851219177,grad_norm: 0.9764790753010009, iteration: 38929
loss: 0.9549384117126465,grad_norm: 0.9999989937625346, iteration: 38930
loss: 1.0532546043395996,grad_norm: 0.9999996788030028, iteration: 38931
loss: 1.0068596601486206,grad_norm: 0.9729451976129698, iteration: 38932
loss: 1.0313363075256348,grad_norm: 0.9999992443208578, iteration: 38933
loss: 1.01174795627594,grad_norm: 0.9999990713908721, iteration: 38934
loss: 1.0222465991973877,grad_norm: 0.8974484919269414, iteration: 38935
loss: 1.0440372228622437,grad_norm: 0.9999990880097123, iteration: 38936
loss: 0.9732098579406738,grad_norm: 0.9025512717464826, iteration: 38937
loss: 1.0148200988769531,grad_norm: 0.999999115304409, iteration: 38938
loss: 0.9972167611122131,grad_norm: 0.9753643489860744, iteration: 38939
loss: 1.0278767347335815,grad_norm: 0.9999992200669571, iteration: 38940
loss: 1.0162403583526611,grad_norm: 0.8189076252865595, iteration: 38941
loss: 1.0257130861282349,grad_norm: 0.9212193228085834, iteration: 38942
loss: 0.9935215711593628,grad_norm: 0.9999991550624292, iteration: 38943
loss: 1.0496830940246582,grad_norm: 0.9999996866222889, iteration: 38944
loss: 0.9969120621681213,grad_norm: 0.8890224513028324, iteration: 38945
loss: 1.0201879739761353,grad_norm: 0.8800963119310502, iteration: 38946
loss: 1.0417304039001465,grad_norm: 0.9999996205768908, iteration: 38947
loss: 1.0107057094573975,grad_norm: 0.9926721990794932, iteration: 38948
loss: 0.9808924794197083,grad_norm: 0.9557351640720597, iteration: 38949
loss: 1.0063799619674683,grad_norm: 0.8981695515833387, iteration: 38950
loss: 0.9853427410125732,grad_norm: 0.9999990373097211, iteration: 38951
loss: 1.01668381690979,grad_norm: 0.999999193008211, iteration: 38952
loss: 1.0151487588882446,grad_norm: 0.8537347729992157, iteration: 38953
loss: 1.0238505601882935,grad_norm: 0.9999991329078076, iteration: 38954
loss: 1.0045303106307983,grad_norm: 0.9999990711717438, iteration: 38955
loss: 1.0112484693527222,grad_norm: 0.9999991875152312, iteration: 38956
loss: 1.026082992553711,grad_norm: 0.9999991509862455, iteration: 38957
loss: 1.042233943939209,grad_norm: 0.9999993567933587, iteration: 38958
loss: 1.0569438934326172,grad_norm: 0.999999467979907, iteration: 38959
loss: 0.9507353901863098,grad_norm: 0.9999991591490963, iteration: 38960
loss: 1.0031256675720215,grad_norm: 0.9061352531268464, iteration: 38961
loss: 0.9963803887367249,grad_norm: 0.9607515579771846, iteration: 38962
loss: 1.0128002166748047,grad_norm: 0.9999991223759847, iteration: 38963
loss: 0.999859094619751,grad_norm: 0.8057804779282132, iteration: 38964
loss: 1.0180572271347046,grad_norm: 0.8209072645845195, iteration: 38965
loss: 0.9783806204795837,grad_norm: 0.9999991674191037, iteration: 38966
loss: 0.9879062175750732,grad_norm: 0.999999573238966, iteration: 38967
loss: 1.0117709636688232,grad_norm: 0.999999099490175, iteration: 38968
loss: 1.0592738389968872,grad_norm: 0.9999992464956687, iteration: 38969
loss: 1.0202761888504028,grad_norm: 0.9690027130364335, iteration: 38970
loss: 1.01996910572052,grad_norm: 0.9999990673699443, iteration: 38971
loss: 0.9997745156288147,grad_norm: 0.999999167178403, iteration: 38972
loss: 1.007073998451233,grad_norm: 0.9999991730758855, iteration: 38973
loss: 1.0442222356796265,grad_norm: 0.9999991223710215, iteration: 38974
loss: 0.9878312945365906,grad_norm: 0.9854973718661327, iteration: 38975
loss: 1.043165683746338,grad_norm: 0.9999995953691561, iteration: 38976
loss: 1.023732304573059,grad_norm: 0.9999997264607555, iteration: 38977
loss: 1.0858490467071533,grad_norm: 0.9999995878137333, iteration: 38978
loss: 0.9673270583152771,grad_norm: 0.983425994648544, iteration: 38979
loss: 1.0177805423736572,grad_norm: 0.9977810407281447, iteration: 38980
loss: 0.9952835440635681,grad_norm: 0.8990424385364939, iteration: 38981
loss: 0.976108193397522,grad_norm: 0.9999993416516901, iteration: 38982
loss: 1.0053783655166626,grad_norm: 0.8331379165064238, iteration: 38983
loss: 1.0139826536178589,grad_norm: 0.9636843182498087, iteration: 38984
loss: 0.9788820147514343,grad_norm: 0.9676732051564989, iteration: 38985
loss: 0.9745373129844666,grad_norm: 0.9915970993473027, iteration: 38986
loss: 1.0122604370117188,grad_norm: 0.9999991154336421, iteration: 38987
loss: 1.020540714263916,grad_norm: 0.9706402503095221, iteration: 38988
loss: 1.0119019746780396,grad_norm: 0.9999991861363734, iteration: 38989
loss: 0.9902105927467346,grad_norm: 0.9999991153604878, iteration: 38990
loss: 1.0227866172790527,grad_norm: 0.8949469811870228, iteration: 38991
loss: 1.0108742713928223,grad_norm: 0.9999995034776065, iteration: 38992
loss: 0.9797189235687256,grad_norm: 0.8811299822354731, iteration: 38993
loss: 1.0040019750595093,grad_norm: 0.9267241605928903, iteration: 38994
loss: 1.0574250221252441,grad_norm: 0.9999992567115631, iteration: 38995
loss: 0.9774073958396912,grad_norm: 0.9309358138787985, iteration: 38996
loss: 0.9927347898483276,grad_norm: 0.86065781504351, iteration: 38997
loss: 1.002122402191162,grad_norm: 0.89713118076651, iteration: 38998
loss: 1.0274587869644165,grad_norm: 0.9999991093152812, iteration: 38999
loss: 1.0024936199188232,grad_norm: 0.8534591171437732, iteration: 39000
loss: 0.9911867380142212,grad_norm: 0.9642767489760116, iteration: 39001
loss: 1.0119060277938843,grad_norm: 0.9244235947572697, iteration: 39002
loss: 1.0728504657745361,grad_norm: 0.9999993486479373, iteration: 39003
loss: 1.0151293277740479,grad_norm: 0.9773405025193788, iteration: 39004
loss: 1.0159133672714233,grad_norm: 0.9999997596467106, iteration: 39005
loss: 1.0169812440872192,grad_norm: 0.94575766979449, iteration: 39006
loss: 1.0426841974258423,grad_norm: 0.9999993361846188, iteration: 39007
loss: 0.992313027381897,grad_norm: 0.8795369387759767, iteration: 39008
loss: 1.050317406654358,grad_norm: 0.9999993448677599, iteration: 39009
loss: 1.0086029767990112,grad_norm: 0.9658202837999308, iteration: 39010
loss: 1.0662567615509033,grad_norm: 0.9999996859723901, iteration: 39011
loss: 1.0078730583190918,grad_norm: 0.9999993955722595, iteration: 39012
loss: 1.0024340152740479,grad_norm: 0.9999990995140106, iteration: 39013
loss: 1.0223333835601807,grad_norm: 0.9548499144825944, iteration: 39014
loss: 0.9880881309509277,grad_norm: 0.7766060945129996, iteration: 39015
loss: 0.9901642799377441,grad_norm: 0.9240277321015515, iteration: 39016
loss: 0.9823538661003113,grad_norm: 0.9898311472571943, iteration: 39017
loss: 1.0132849216461182,grad_norm: 0.8838603767903255, iteration: 39018
loss: 0.9909669756889343,grad_norm: 0.9999991938638687, iteration: 39019
loss: 1.0181584358215332,grad_norm: 0.9999991230694586, iteration: 39020
loss: 0.9828616976737976,grad_norm: 0.9999990883174004, iteration: 39021
loss: 1.0077840089797974,grad_norm: 0.9582042365981766, iteration: 39022
loss: 1.010661005973816,grad_norm: 0.8087140898458105, iteration: 39023
loss: 0.9731636643409729,grad_norm: 0.9355395872707323, iteration: 39024
loss: 1.0153181552886963,grad_norm: 0.9999991828870067, iteration: 39025
loss: 0.9911193251609802,grad_norm: 0.8996470257943958, iteration: 39026
loss: 0.9953711032867432,grad_norm: 0.9999989790731411, iteration: 39027
loss: 1.0127308368682861,grad_norm: 0.9999991052585083, iteration: 39028
loss: 0.9621424078941345,grad_norm: 0.9999991836445407, iteration: 39029
loss: 1.0117366313934326,grad_norm: 0.96639589737818, iteration: 39030
loss: 1.0032413005828857,grad_norm: 0.895851631719191, iteration: 39031
loss: 1.0064674615859985,grad_norm: 0.9999992917572275, iteration: 39032
loss: 1.0465377569198608,grad_norm: 0.9699138864006914, iteration: 39033
loss: 0.9889547228813171,grad_norm: 0.8877611705998139, iteration: 39034
loss: 1.017143964767456,grad_norm: 0.9999993714910501, iteration: 39035
loss: 1.0105602741241455,grad_norm: 0.8824283867033091, iteration: 39036
loss: 0.9821553230285645,grad_norm: 0.9999991672565327, iteration: 39037
loss: 1.0101696252822876,grad_norm: 0.9206928466062492, iteration: 39038
loss: 1.0121358633041382,grad_norm: 0.9999990332998868, iteration: 39039
loss: 1.0043376684188843,grad_norm: 0.9531406208994756, iteration: 39040
loss: 1.001349687576294,grad_norm: 0.9999991139478103, iteration: 39041
loss: 1.0031193494796753,grad_norm: 0.9770137466489995, iteration: 39042
loss: 1.000672698020935,grad_norm: 0.9999992273992756, iteration: 39043
loss: 0.986478328704834,grad_norm: 0.9869923080308556, iteration: 39044
loss: 0.9978707432746887,grad_norm: 0.9479755804013776, iteration: 39045
loss: 1.0124285221099854,grad_norm: 0.9623021273382402, iteration: 39046
loss: 0.9959476590156555,grad_norm: 0.9999994349839837, iteration: 39047
loss: 0.9905551671981812,grad_norm: 0.9987268534168087, iteration: 39048
loss: 1.0257294178009033,grad_norm: 0.9999990040328952, iteration: 39049
loss: 1.0016509294509888,grad_norm: 0.9224410920939956, iteration: 39050
loss: 1.0102438926696777,grad_norm: 0.7167986227325955, iteration: 39051
loss: 0.9850358963012695,grad_norm: 0.9266162003155705, iteration: 39052
loss: 1.006161093711853,grad_norm: 0.9844831240513672, iteration: 39053
loss: 1.0390063524246216,grad_norm: 0.9999990769409745, iteration: 39054
loss: 1.0215569734573364,grad_norm: 0.966627985287397, iteration: 39055
loss: 1.0257198810577393,grad_norm: 0.9999989721431213, iteration: 39056
loss: 0.9953863620758057,grad_norm: 0.9999992279684283, iteration: 39057
loss: 0.9902793765068054,grad_norm: 0.9872658847922613, iteration: 39058
loss: 0.9729889631271362,grad_norm: 0.9148923412918833, iteration: 39059
loss: 0.9795741438865662,grad_norm: 0.9999989912331818, iteration: 39060
loss: 1.0280704498291016,grad_norm: 0.9135175005192314, iteration: 39061
loss: 1.0078953504562378,grad_norm: 0.9999992817138367, iteration: 39062
loss: 1.0128859281539917,grad_norm: 0.9999991086314004, iteration: 39063
loss: 1.0133448839187622,grad_norm: 0.9999990158362627, iteration: 39064
loss: 1.0077043771743774,grad_norm: 0.9999991058993971, iteration: 39065
loss: 1.005873680114746,grad_norm: 0.9304252796719155, iteration: 39066
loss: 0.9792760014533997,grad_norm: 0.9999989848315783, iteration: 39067
loss: 1.0114185810089111,grad_norm: 0.9487213603114594, iteration: 39068
loss: 0.9624016880989075,grad_norm: 0.9999992602780503, iteration: 39069
loss: 0.9860436320304871,grad_norm: 0.9999990476434004, iteration: 39070
loss: 0.9987246990203857,grad_norm: 0.8278848287478909, iteration: 39071
loss: 0.9763412475585938,grad_norm: 0.7945584124220817, iteration: 39072
loss: 1.00642728805542,grad_norm: 0.8781802838410359, iteration: 39073
loss: 1.0122017860412598,grad_norm: 0.8719732101254154, iteration: 39074
loss: 1.0127886533737183,grad_norm: 0.8970066108989337, iteration: 39075
loss: 0.9841671586036682,grad_norm: 0.9999990623498212, iteration: 39076
loss: 0.9936298727989197,grad_norm: 0.9999990399994501, iteration: 39077
loss: 0.9798344969749451,grad_norm: 0.8821099829889059, iteration: 39078
loss: 0.9916874766349792,grad_norm: 0.8367949204679783, iteration: 39079
loss: 0.9627073407173157,grad_norm: 0.9999991420102936, iteration: 39080
loss: 1.0358102321624756,grad_norm: 0.9733275160392382, iteration: 39081
loss: 0.9802996516227722,grad_norm: 0.9999990211769971, iteration: 39082
loss: 1.0216076374053955,grad_norm: 0.8635963413078529, iteration: 39083
loss: 0.9692997336387634,grad_norm: 0.9185388900706348, iteration: 39084
loss: 0.9736940860748291,grad_norm: 0.9999990008463057, iteration: 39085
loss: 1.01933753490448,grad_norm: 0.9545249759773827, iteration: 39086
loss: 1.0188933610916138,grad_norm: 0.840403428731619, iteration: 39087
loss: 1.0415395498275757,grad_norm: 0.9602911793454091, iteration: 39088
loss: 1.0492478609085083,grad_norm: 0.9999990769137268, iteration: 39089
loss: 1.0244203805923462,grad_norm: 0.9999990621874166, iteration: 39090
loss: 0.99695885181427,grad_norm: 0.8489238441723561, iteration: 39091
loss: 0.9928855299949646,grad_norm: 0.9874806083654099, iteration: 39092
loss: 1.0169237852096558,grad_norm: 0.9520536414710958, iteration: 39093
loss: 1.0183820724487305,grad_norm: 0.8715657685697766, iteration: 39094
loss: 1.0279887914657593,grad_norm: 0.9999990683455775, iteration: 39095
loss: 0.9828360676765442,grad_norm: 0.8171902533054608, iteration: 39096
loss: 1.0013443231582642,grad_norm: 0.7976066531201755, iteration: 39097
loss: 0.9909868836402893,grad_norm: 0.9999992466202318, iteration: 39098
loss: 0.98764568567276,grad_norm: 0.9851573930615053, iteration: 39099
loss: 1.0150818824768066,grad_norm: 0.9999989807072845, iteration: 39100
loss: 0.9865872263908386,grad_norm: 0.8028338778970954, iteration: 39101
loss: 1.0084530115127563,grad_norm: 0.9999990415699597, iteration: 39102
loss: 0.9420636892318726,grad_norm: 0.9896782019265981, iteration: 39103
loss: 0.9628626108169556,grad_norm: 0.9526360688105574, iteration: 39104
loss: 0.9782698750495911,grad_norm: 0.8718445029873882, iteration: 39105
loss: 0.9754840731620789,grad_norm: 0.9999990953865268, iteration: 39106
loss: 1.0107762813568115,grad_norm: 0.9999991816946109, iteration: 39107
loss: 0.9928370118141174,grad_norm: 0.9999990897032152, iteration: 39108
loss: 0.9815887808799744,grad_norm: 0.9999993308018288, iteration: 39109
loss: 0.9854400157928467,grad_norm: 0.9999991574985762, iteration: 39110
loss: 0.9837018847465515,grad_norm: 0.9940883389485747, iteration: 39111
loss: 1.0191378593444824,grad_norm: 0.937918364619803, iteration: 39112
loss: 1.0001236200332642,grad_norm: 0.9519197027645524, iteration: 39113
loss: 0.9867199659347534,grad_norm: 0.9435884936558748, iteration: 39114
loss: 0.9978310465812683,grad_norm: 0.891806103065448, iteration: 39115
loss: 0.974175751209259,grad_norm: 0.9410576197056797, iteration: 39116
loss: 1.0076040029525757,grad_norm: 0.9999992973768004, iteration: 39117
loss: 0.9952645301818848,grad_norm: 0.9999989814803322, iteration: 39118
loss: 1.0020151138305664,grad_norm: 0.9559272785539399, iteration: 39119
loss: 0.9994198679924011,grad_norm: 0.9999990935073655, iteration: 39120
loss: 1.0285385847091675,grad_norm: 0.9460560820211528, iteration: 39121
loss: 1.025653600692749,grad_norm: 0.8695957565921175, iteration: 39122
loss: 0.9434875249862671,grad_norm: 0.9932618855946322, iteration: 39123
loss: 0.9831912517547607,grad_norm: 0.7830409736770579, iteration: 39124
loss: 0.966113805770874,grad_norm: 0.9085931649349515, iteration: 39125
loss: 0.9770393967628479,grad_norm: 0.967029935713808, iteration: 39126
loss: 1.007432460784912,grad_norm: 0.9999991027513943, iteration: 39127
loss: 0.9744802117347717,grad_norm: 0.9999989673934218, iteration: 39128
loss: 0.9885604977607727,grad_norm: 0.8782222605515223, iteration: 39129
loss: 1.0319640636444092,grad_norm: 0.9999990672793174, iteration: 39130
loss: 0.9846857190132141,grad_norm: 0.9932447180578798, iteration: 39131
loss: 0.978928804397583,grad_norm: 0.9554666279318904, iteration: 39132
loss: 1.0058083534240723,grad_norm: 0.9615325116533533, iteration: 39133
loss: 0.9848818182945251,grad_norm: 0.9999990261345136, iteration: 39134
loss: 1.0087425708770752,grad_norm: 0.9773862937700843, iteration: 39135
loss: 0.9921582341194153,grad_norm: 0.9999993334441954, iteration: 39136
loss: 0.9675595164299011,grad_norm: 0.9999991447354578, iteration: 39137
loss: 1.0120024681091309,grad_norm: 0.9498514512642188, iteration: 39138
loss: 0.9649298787117004,grad_norm: 0.9999992080656082, iteration: 39139
loss: 0.9920596480369568,grad_norm: 0.9754280538350742, iteration: 39140
loss: 0.9793909192085266,grad_norm: 0.9999994130475586, iteration: 39141
loss: 0.9476958513259888,grad_norm: 0.9999992232039911, iteration: 39142
loss: 0.9983119368553162,grad_norm: 0.9355996951318679, iteration: 39143
loss: 1.0181500911712646,grad_norm: 0.8526007794592803, iteration: 39144
loss: 0.9516200423240662,grad_norm: 0.8369354656721794, iteration: 39145
loss: 1.0511804819107056,grad_norm: 0.9999991586472322, iteration: 39146
loss: 1.0016928911209106,grad_norm: 0.8560997710842012, iteration: 39147
loss: 1.0102225542068481,grad_norm: 0.9402263678969861, iteration: 39148
loss: 0.9836581945419312,grad_norm: 0.9811981788182519, iteration: 39149
loss: 1.0155937671661377,grad_norm: 0.9999990890597745, iteration: 39150
loss: 1.0295581817626953,grad_norm: 0.9999992579339839, iteration: 39151
loss: 1.0074102878570557,grad_norm: 0.9999991595097693, iteration: 39152
loss: 0.9623808264732361,grad_norm: 0.9039538622849784, iteration: 39153
loss: 0.9923152327537537,grad_norm: 0.9376225411357462, iteration: 39154
loss: 1.019335150718689,grad_norm: 0.913720732680189, iteration: 39155
loss: 1.0073591470718384,grad_norm: 0.9999991926013426, iteration: 39156
loss: 0.9733344912528992,grad_norm: 0.9657620347814603, iteration: 39157
loss: 1.0284076929092407,grad_norm: 0.9999992033955585, iteration: 39158
loss: 0.94704270362854,grad_norm: 0.9999990404214671, iteration: 39159
loss: 1.0007903575897217,grad_norm: 0.9361302482548464, iteration: 39160
loss: 1.0219857692718506,grad_norm: 0.9999990340970438, iteration: 39161
loss: 1.0235117673873901,grad_norm: 0.9999990975839051, iteration: 39162
loss: 0.983242392539978,grad_norm: 0.9999990658514176, iteration: 39163
loss: 1.0198291540145874,grad_norm: 0.9999990253722184, iteration: 39164
loss: 0.9845609664916992,grad_norm: 0.8440369506332436, iteration: 39165
loss: 1.000109314918518,grad_norm: 0.7932670071362304, iteration: 39166
loss: 1.0204496383666992,grad_norm: 0.8040685177807827, iteration: 39167
loss: 0.9737856388092041,grad_norm: 0.9999991939170184, iteration: 39168
loss: 1.0008996725082397,grad_norm: 0.9414763004731128, iteration: 39169
loss: 1.0098652839660645,grad_norm: 0.9405714573763923, iteration: 39170
loss: 1.01705801486969,grad_norm: 0.9267463838630244, iteration: 39171
loss: 0.9834045767784119,grad_norm: 0.9999990827063958, iteration: 39172
loss: 0.9782432317733765,grad_norm: 0.9999991403011836, iteration: 39173
loss: 1.050808072090149,grad_norm: 0.999999110402481, iteration: 39174
loss: 1.0160095691680908,grad_norm: 0.835522401072235, iteration: 39175
loss: 1.0205011367797852,grad_norm: 0.9999996236844337, iteration: 39176
loss: 0.9815942049026489,grad_norm: 0.9959473455403808, iteration: 39177
loss: 1.048978567123413,grad_norm: 0.891692639988453, iteration: 39178
loss: 0.9564971923828125,grad_norm: 0.9571785376273605, iteration: 39179
loss: 0.9889085292816162,grad_norm: 0.8383975357466316, iteration: 39180
loss: 0.9945083856582642,grad_norm: 0.998646229339011, iteration: 39181
loss: 0.9651302695274353,grad_norm: 0.9443075766781098, iteration: 39182
loss: 1.0065696239471436,grad_norm: 0.8006903691441422, iteration: 39183
loss: 1.0315957069396973,grad_norm: 0.8368900674062153, iteration: 39184
loss: 1.0153690576553345,grad_norm: 0.9308639176173772, iteration: 39185
loss: 1.0116673707962036,grad_norm: 0.9999991806675903, iteration: 39186
loss: 0.9910628795623779,grad_norm: 0.8899320010644027, iteration: 39187
loss: 0.9710738658905029,grad_norm: 0.988615699130673, iteration: 39188
loss: 0.9762259721755981,grad_norm: 0.9999990750570468, iteration: 39189
loss: 1.0155216455459595,grad_norm: 0.999999418097828, iteration: 39190
loss: 1.0388846397399902,grad_norm: 0.999999023357109, iteration: 39191
loss: 0.9774793982505798,grad_norm: 0.9599527175523878, iteration: 39192
loss: 1.0059326887130737,grad_norm: 0.9999990345807577, iteration: 39193
loss: 1.0513131618499756,grad_norm: 0.8218528602145235, iteration: 39194
loss: 1.000576376914978,grad_norm: 0.8760619117344478, iteration: 39195
loss: 0.9864959716796875,grad_norm: 0.9328377969074851, iteration: 39196
loss: 0.9822235107421875,grad_norm: 0.9777736846057656, iteration: 39197
loss: 0.9886720776557922,grad_norm: 0.8000744560752427, iteration: 39198
loss: 1.0008243322372437,grad_norm: 0.9692928647535076, iteration: 39199
loss: 1.0160013437271118,grad_norm: 0.9598701159032819, iteration: 39200
loss: 1.0091475248336792,grad_norm: 0.9999993990589976, iteration: 39201
loss: 1.0025780200958252,grad_norm: 0.9678218345474237, iteration: 39202
loss: 0.9896556735038757,grad_norm: 0.8616530624409829, iteration: 39203
loss: 0.9950280785560608,grad_norm: 0.99999922769579, iteration: 39204
loss: 1.0090649127960205,grad_norm: 0.9999991491426197, iteration: 39205
loss: 0.984605073928833,grad_norm: 0.9437008710849121, iteration: 39206
loss: 0.9897293448448181,grad_norm: 0.7902214325465211, iteration: 39207
loss: 1.0075196027755737,grad_norm: 0.999999221817844, iteration: 39208
loss: 1.045493721961975,grad_norm: 0.999998970252268, iteration: 39209
loss: 1.028996229171753,grad_norm: 0.9999991494952242, iteration: 39210
loss: 0.9991922378540039,grad_norm: 0.9637708735810474, iteration: 39211
loss: 1.0384522676467896,grad_norm: 0.999999106464946, iteration: 39212
loss: 1.0241143703460693,grad_norm: 0.99999895886454, iteration: 39213
loss: 0.9781584739685059,grad_norm: 0.9083636698772091, iteration: 39214
loss: 0.991353452205658,grad_norm: 0.9493878953456167, iteration: 39215
loss: 0.9803102612495422,grad_norm: 0.9379093233510267, iteration: 39216
loss: 0.9868704080581665,grad_norm: 0.9999991448961927, iteration: 39217
loss: 1.0437270402908325,grad_norm: 0.9999991595042443, iteration: 39218
loss: 0.9884123802185059,grad_norm: 0.9999991837290163, iteration: 39219
loss: 0.9744768142700195,grad_norm: 0.878883302121229, iteration: 39220
loss: 1.016937255859375,grad_norm: 0.8280268411283234, iteration: 39221
loss: 1.0637121200561523,grad_norm: 0.9999992899698714, iteration: 39222
loss: 0.9811123013496399,grad_norm: 0.9445950504730681, iteration: 39223
loss: 0.9754918813705444,grad_norm: 0.9384883960650638, iteration: 39224
loss: 0.9997209906578064,grad_norm: 0.7885324429035059, iteration: 39225
loss: 1.0241960287094116,grad_norm: 0.9709771313073583, iteration: 39226
loss: 1.0028808116912842,grad_norm: 0.9999992505716827, iteration: 39227
loss: 1.0530792474746704,grad_norm: 0.99999932746647, iteration: 39228
loss: 0.972347617149353,grad_norm: 0.8755990262375232, iteration: 39229
loss: 1.0303748846054077,grad_norm: 0.9999992064508538, iteration: 39230
loss: 1.0741771459579468,grad_norm: 0.9999991836415748, iteration: 39231
loss: 0.992359459400177,grad_norm: 0.9371255838443155, iteration: 39232
loss: 0.9796600937843323,grad_norm: 0.8897288501102302, iteration: 39233
loss: 1.0207890272140503,grad_norm: 0.9742522095776336, iteration: 39234
loss: 1.0429279804229736,grad_norm: 0.9999990990562284, iteration: 39235
loss: 0.9722729325294495,grad_norm: 0.9002403111677427, iteration: 39236
loss: 0.963970959186554,grad_norm: 0.9887450890090629, iteration: 39237
loss: 1.017677664756775,grad_norm: 0.9104270473499804, iteration: 39238
loss: 0.9984307289123535,grad_norm: 0.8414487240317078, iteration: 39239
loss: 0.9858372211456299,grad_norm: 0.9191675460495221, iteration: 39240
loss: 0.9563641548156738,grad_norm: 0.9660525820937961, iteration: 39241
loss: 1.0008867979049683,grad_norm: 0.8149917280978999, iteration: 39242
loss: 1.0350420475006104,grad_norm: 0.9999990705304198, iteration: 39243
loss: 0.9796962141990662,grad_norm: 0.9999990382806104, iteration: 39244
loss: 0.9837355017662048,grad_norm: 0.9701580208855101, iteration: 39245
loss: 1.034027099609375,grad_norm: 0.9305448918288823, iteration: 39246
loss: 0.9718084335327148,grad_norm: 0.9045719734834635, iteration: 39247
loss: 0.97535640001297,grad_norm: 0.9999991801577379, iteration: 39248
loss: 0.9700754880905151,grad_norm: 0.999998939096478, iteration: 39249
loss: 1.0206081867218018,grad_norm: 0.9999991333500786, iteration: 39250
loss: 1.030924916267395,grad_norm: 0.8220440550222541, iteration: 39251
loss: 0.992577850818634,grad_norm: 0.9123721913703313, iteration: 39252
loss: 1.0049378871917725,grad_norm: 0.9924783158227308, iteration: 39253
loss: 0.9888206124305725,grad_norm: 0.9133599399934207, iteration: 39254
loss: 0.9848946332931519,grad_norm: 0.9667078471678209, iteration: 39255
loss: 1.0390986204147339,grad_norm: 0.9999990024886128, iteration: 39256
loss: 0.989871084690094,grad_norm: 0.9049891746898923, iteration: 39257
loss: 1.0039565563201904,grad_norm: 0.9999990254310737, iteration: 39258
loss: 1.0269455909729004,grad_norm: 0.8127767354331473, iteration: 39259
loss: 1.0140433311462402,grad_norm: 0.9999992460047894, iteration: 39260
loss: 0.9750973582267761,grad_norm: 0.9990925104888136, iteration: 39261
loss: 1.0196115970611572,grad_norm: 0.9999990132963583, iteration: 39262
loss: 1.0163850784301758,grad_norm: 0.9628613244283982, iteration: 39263
loss: 1.0104403495788574,grad_norm: 0.9999990322116378, iteration: 39264
loss: 0.9675803184509277,grad_norm: 0.9999990059899861, iteration: 39265
loss: 1.032225489616394,grad_norm: 0.8063795130518656, iteration: 39266
loss: 1.0108698606491089,grad_norm: 0.9875089285324734, iteration: 39267
loss: 1.0025122165679932,grad_norm: 0.9999994737749986, iteration: 39268
loss: 1.033104419708252,grad_norm: 0.8851159946002815, iteration: 39269
loss: 0.9865646958351135,grad_norm: 0.9844594914904253, iteration: 39270
loss: 0.9898862242698669,grad_norm: 0.8813235668637989, iteration: 39271
loss: 1.015228271484375,grad_norm: 0.999999271584208, iteration: 39272
loss: 0.9849625825881958,grad_norm: 0.999999268838739, iteration: 39273
loss: 1.0271904468536377,grad_norm: 0.8493023097847903, iteration: 39274
loss: 0.9840419888496399,grad_norm: 0.8840421688101445, iteration: 39275
loss: 1.024240255355835,grad_norm: 0.9999991373722887, iteration: 39276
loss: 1.0197358131408691,grad_norm: 0.9161595718578125, iteration: 39277
loss: 1.0170193910598755,grad_norm: 0.9648067821540258, iteration: 39278
loss: 0.9983320832252502,grad_norm: 0.9999992405924039, iteration: 39279
loss: 0.9792553186416626,grad_norm: 0.9977991315488615, iteration: 39280
loss: 0.9915894269943237,grad_norm: 0.9661895139118293, iteration: 39281
loss: 1.0150976181030273,grad_norm: 0.9091908219672125, iteration: 39282
loss: 1.0102260112762451,grad_norm: 0.9729676732685968, iteration: 39283
loss: 1.0036115646362305,grad_norm: 0.9513205187042956, iteration: 39284
loss: 0.9895080924034119,grad_norm: 0.9999990946607815, iteration: 39285
loss: 1.0140756368637085,grad_norm: 0.9999994370045776, iteration: 39286
loss: 0.9734710454940796,grad_norm: 0.9024087332729597, iteration: 39287
loss: 0.9973978400230408,grad_norm: 0.918133472989367, iteration: 39288
loss: 0.974609375,grad_norm: 0.8249401327137104, iteration: 39289
loss: 0.9359793663024902,grad_norm: 0.9999989444449624, iteration: 39290
loss: 1.0060899257659912,grad_norm: 0.9999991780898534, iteration: 39291
loss: 0.9928631782531738,grad_norm: 0.999998940576535, iteration: 39292
loss: 0.9843946695327759,grad_norm: 0.8709588807484714, iteration: 39293
loss: 0.9996416568756104,grad_norm: 0.8064979811515273, iteration: 39294
loss: 1.0459054708480835,grad_norm: 0.9999992473329058, iteration: 39295
loss: 1.030157446861267,grad_norm: 0.8883541156439925, iteration: 39296
loss: 0.9959192872047424,grad_norm: 0.9251213794650893, iteration: 39297
loss: 0.9950814843177795,grad_norm: 0.9532582030578541, iteration: 39298
loss: 0.9716805219650269,grad_norm: 0.8966672606555196, iteration: 39299
loss: 0.9874299168586731,grad_norm: 0.9787023951585613, iteration: 39300
loss: 0.972379744052887,grad_norm: 0.9431689593046015, iteration: 39301
loss: 0.9397092461585999,grad_norm: 0.9560531319840719, iteration: 39302
loss: 0.9975670576095581,grad_norm: 0.9999994044470714, iteration: 39303
loss: 1.0289119482040405,grad_norm: 0.9262542341575413, iteration: 39304
loss: 0.9832483530044556,grad_norm: 0.9999991016518149, iteration: 39305
loss: 1.0268555879592896,grad_norm: 0.8782739961876507, iteration: 39306
loss: 0.9889112710952759,grad_norm: 0.9040796867321245, iteration: 39307
loss: 0.98152756690979,grad_norm: 0.980539039851844, iteration: 39308
loss: 0.9855657815933228,grad_norm: 0.9891545739738232, iteration: 39309
loss: 1.1656501293182373,grad_norm: 0.999999574088709, iteration: 39310
loss: 1.0151339769363403,grad_norm: 0.8431055710178381, iteration: 39311
loss: 1.0086970329284668,grad_norm: 0.8769294343179407, iteration: 39312
loss: 0.9892300963401794,grad_norm: 0.9999989578195655, iteration: 39313
loss: 0.9960668683052063,grad_norm: 0.8998828416683873, iteration: 39314
loss: 0.9822551012039185,grad_norm: 0.999999284355329, iteration: 39315
loss: 0.9728921055793762,grad_norm: 0.8103205635506195, iteration: 39316
loss: 0.9554935693740845,grad_norm: 0.9999991693726378, iteration: 39317
loss: 1.0146236419677734,grad_norm: 0.8888372888453202, iteration: 39318
loss: 1.0104589462280273,grad_norm: 0.9999990565515114, iteration: 39319
loss: 1.0222179889678955,grad_norm: 0.9668884543272204, iteration: 39320
loss: 1.0434340238571167,grad_norm: 0.9999990393654666, iteration: 39321
loss: 0.9868874549865723,grad_norm: 0.9868467351396896, iteration: 39322
loss: 0.9934846758842468,grad_norm: 0.8348133994107719, iteration: 39323
loss: 1.068304419517517,grad_norm: 0.999999402328117, iteration: 39324
loss: 1.009112000465393,grad_norm: 0.8836927407085343, iteration: 39325
loss: 1.0218364000320435,grad_norm: 0.9999991316062702, iteration: 39326
loss: 1.0189226865768433,grad_norm: 0.9688150861560104, iteration: 39327
loss: 1.0240989923477173,grad_norm: 0.9471958071533054, iteration: 39328
loss: 1.0261383056640625,grad_norm: 0.9970930270793984, iteration: 39329
loss: 1.0348948240280151,grad_norm: 0.9048428097048052, iteration: 39330
loss: 1.0239003896713257,grad_norm: 0.9999991436050377, iteration: 39331
loss: 0.9867990612983704,grad_norm: 0.8898099771666954, iteration: 39332
loss: 1.040614366531372,grad_norm: 0.9224057691739035, iteration: 39333
loss: 0.9905986189842224,grad_norm: 0.9249914103806024, iteration: 39334
loss: 1.015390396118164,grad_norm: 0.9728261373894432, iteration: 39335
loss: 1.0227813720703125,grad_norm: 0.9999991351791265, iteration: 39336
loss: 1.0321965217590332,grad_norm: 0.9999989689620956, iteration: 39337
loss: 0.9850213527679443,grad_norm: 0.9493867298354473, iteration: 39338
loss: 0.9621742367744446,grad_norm: 0.9000505124165157, iteration: 39339
loss: 0.9898223876953125,grad_norm: 0.9999990289246937, iteration: 39340
loss: 0.9887085556983948,grad_norm: 0.8039118103063814, iteration: 39341
loss: 0.988713800907135,grad_norm: 0.9999989638742087, iteration: 39342
loss: 1.0345511436462402,grad_norm: 0.9999990424264646, iteration: 39343
loss: 1.046883225440979,grad_norm: 0.9999991532647405, iteration: 39344
loss: 0.997927725315094,grad_norm: 0.9999991517048072, iteration: 39345
loss: 0.9686358571052551,grad_norm: 0.9999991285764046, iteration: 39346
loss: 1.00603449344635,grad_norm: 0.9999990410885047, iteration: 39347
loss: 0.985822319984436,grad_norm: 0.9273779943565424, iteration: 39348
loss: 0.98990797996521,grad_norm: 0.9999992121507492, iteration: 39349
loss: 1.0059400796890259,grad_norm: 0.9653801057595678, iteration: 39350
loss: 0.9822068214416504,grad_norm: 0.894532082974171, iteration: 39351
loss: 0.9851958751678467,grad_norm: 0.9999990082678989, iteration: 39352
loss: 0.9689823389053345,grad_norm: 0.9999990504508516, iteration: 39353
loss: 0.9987236261367798,grad_norm: 0.8882392635798253, iteration: 39354
loss: 1.034149169921875,grad_norm: 0.9999991630367128, iteration: 39355
loss: 1.0206605195999146,grad_norm: 0.9999990946240548, iteration: 39356
loss: 1.0249196290969849,grad_norm: 0.9924851639230051, iteration: 39357
loss: 1.0068705081939697,grad_norm: 0.9999992323620194, iteration: 39358
loss: 1.0050499439239502,grad_norm: 0.9702422739148048, iteration: 39359
loss: 1.0162256956100464,grad_norm: 0.9999990773734783, iteration: 39360
loss: 0.9710332751274109,grad_norm: 0.9999989965194446, iteration: 39361
loss: 1.0142372846603394,grad_norm: 0.9999992569534177, iteration: 39362
loss: 1.022252082824707,grad_norm: 0.9537093500334464, iteration: 39363
loss: 1.0150141716003418,grad_norm: 0.9999991723955777, iteration: 39364
loss: 0.974574089050293,grad_norm: 0.9154497009467892, iteration: 39365
loss: 1.021263599395752,grad_norm: 0.9203600781070576, iteration: 39366
loss: 1.0445643663406372,grad_norm: 0.9999991992497554, iteration: 39367
loss: 0.9891230463981628,grad_norm: 0.8793143701072069, iteration: 39368
loss: 0.9996294379234314,grad_norm: 0.8556355071723638, iteration: 39369
loss: 0.9792324304580688,grad_norm: 0.9999991012537887, iteration: 39370
loss: 0.9917287826538086,grad_norm: 0.9829362455273064, iteration: 39371
loss: 0.9948614239692688,grad_norm: 0.9610720314717673, iteration: 39372
loss: 1.011546015739441,grad_norm: 0.9301862197080146, iteration: 39373
loss: 1.0397595167160034,grad_norm: 0.8789145547589106, iteration: 39374
loss: 1.0160413980484009,grad_norm: 0.9644145900623935, iteration: 39375
loss: 1.0149251222610474,grad_norm: 0.9999991293420479, iteration: 39376
loss: 1.0330171585083008,grad_norm: 0.9629347575837548, iteration: 39377
loss: 1.0014240741729736,grad_norm: 0.955096799051126, iteration: 39378
loss: 1.0155367851257324,grad_norm: 0.9999990171875432, iteration: 39379
loss: 0.9684183597564697,grad_norm: 0.9999991315011988, iteration: 39380
loss: 0.9924522638320923,grad_norm: 0.951537449120732, iteration: 39381
loss: 1.0051435232162476,grad_norm: 0.9999990569840002, iteration: 39382
loss: 1.0008814334869385,grad_norm: 0.9264131636906788, iteration: 39383
loss: 0.9785757660865784,grad_norm: 0.8894433211855844, iteration: 39384
loss: 0.9868396520614624,grad_norm: 0.8055978413915033, iteration: 39385
loss: 1.0157873630523682,grad_norm: 0.9999989976182444, iteration: 39386
loss: 1.0051324367523193,grad_norm: 0.8564505044476612, iteration: 39387
loss: 1.0168277025222778,grad_norm: 0.9999992122790806, iteration: 39388
loss: 0.9925262928009033,grad_norm: 0.8418398364945026, iteration: 39389
loss: 0.996128499507904,grad_norm: 0.9042854320852319, iteration: 39390
loss: 1.0043823719024658,grad_norm: 0.842129581589929, iteration: 39391
loss: 0.9908690452575684,grad_norm: 0.9189571968920056, iteration: 39392
loss: 1.0337499380111694,grad_norm: 0.9999992321969065, iteration: 39393
loss: 1.0170073509216309,grad_norm: 0.9168995628274804, iteration: 39394
loss: 1.0514434576034546,grad_norm: 0.9999997695081269, iteration: 39395
loss: 1.0285124778747559,grad_norm: 0.8966187105647634, iteration: 39396
loss: 0.9562875628471375,grad_norm: 0.9999989866567176, iteration: 39397
loss: 1.016696572303772,grad_norm: 0.9595959898146736, iteration: 39398
loss: 0.99288409948349,grad_norm: 0.9723034549963251, iteration: 39399
loss: 1.0022289752960205,grad_norm: 0.8850627568784453, iteration: 39400
loss: 0.9701014757156372,grad_norm: 0.9406072071560225, iteration: 39401
loss: 1.0272856950759888,grad_norm: 0.8895265675151808, iteration: 39402
loss: 1.0106698274612427,grad_norm: 0.9999991129666034, iteration: 39403
loss: 0.9983770251274109,grad_norm: 0.8881169205691225, iteration: 39404
loss: 1.0020254850387573,grad_norm: 0.9284808352278462, iteration: 39405
loss: 1.007696270942688,grad_norm: 0.9999991462823261, iteration: 39406
loss: 0.9946316480636597,grad_norm: 0.8761884700699075, iteration: 39407
loss: 1.0025911331176758,grad_norm: 0.9721564683269842, iteration: 39408
loss: 1.0083086490631104,grad_norm: 0.8927281691218757, iteration: 39409
loss: 1.0303747653961182,grad_norm: 0.9999994430991039, iteration: 39410
loss: 0.9738088846206665,grad_norm: 0.9393036578607941, iteration: 39411
loss: 1.0598050355911255,grad_norm: 0.9999998928369314, iteration: 39412
loss: 0.9989113807678223,grad_norm: 0.8717276422658733, iteration: 39413
loss: 0.9801325798034668,grad_norm: 0.9493914965489191, iteration: 39414
loss: 0.9634155035018921,grad_norm: 0.795045437369201, iteration: 39415
loss: 1.0045247077941895,grad_norm: 0.8303735874856333, iteration: 39416
loss: 1.047224760055542,grad_norm: 0.9999990623493183, iteration: 39417
loss: 1.0197288990020752,grad_norm: 0.9999991826497966, iteration: 39418
loss: 0.998199462890625,grad_norm: 0.8638919972125376, iteration: 39419
loss: 0.9862158298492432,grad_norm: 0.8860816158294068, iteration: 39420
loss: 1.0041823387145996,grad_norm: 0.9629965005336079, iteration: 39421
loss: 0.9519616961479187,grad_norm: 0.8653207768056056, iteration: 39422
loss: 0.9665732383728027,grad_norm: 0.8822015085534652, iteration: 39423
loss: 0.9741899967193604,grad_norm: 0.9762173076446913, iteration: 39424
loss: 1.0178157091140747,grad_norm: 0.9651667239736178, iteration: 39425
loss: 0.9851967692375183,grad_norm: 0.9166394598836697, iteration: 39426
loss: 1.0052043199539185,grad_norm: 0.9953003023894422, iteration: 39427
loss: 0.9844290614128113,grad_norm: 0.9030348740373312, iteration: 39428
loss: 1.043117880821228,grad_norm: 0.9999991212052097, iteration: 39429
loss: 1.0058107376098633,grad_norm: 0.9999990059102237, iteration: 39430
loss: 1.0199480056762695,grad_norm: 0.9999997116396668, iteration: 39431
loss: 1.0315942764282227,grad_norm: 0.9551667362197082, iteration: 39432
loss: 1.0033679008483887,grad_norm: 0.9684606928655334, iteration: 39433
loss: 1.0075899362564087,grad_norm: 0.9566113175068384, iteration: 39434
loss: 0.9836103916168213,grad_norm: 0.9883610184197553, iteration: 39435
loss: 1.0001540184020996,grad_norm: 0.999999077127917, iteration: 39436
loss: 1.0009105205535889,grad_norm: 0.9999991553698003, iteration: 39437
loss: 0.9809247255325317,grad_norm: 0.9630501357484048, iteration: 39438
loss: 1.0131806135177612,grad_norm: 0.9999990931616669, iteration: 39439
loss: 0.9834722876548767,grad_norm: 0.8539392504411941, iteration: 39440
loss: 1.0164737701416016,grad_norm: 0.9999991984124568, iteration: 39441
loss: 1.0056078433990479,grad_norm: 0.9492671808490821, iteration: 39442
loss: 1.0126309394836426,grad_norm: 0.8911923593891106, iteration: 39443
loss: 1.0041176080703735,grad_norm: 0.9700258082982557, iteration: 39444
loss: 1.026870608329773,grad_norm: 0.9346430059627214, iteration: 39445
loss: 0.9968583583831787,grad_norm: 0.9704556788748934, iteration: 39446
loss: 1.0026135444641113,grad_norm: 0.9999991808227137, iteration: 39447
loss: 1.013644814491272,grad_norm: 0.9999996179338301, iteration: 39448
loss: 0.9958785772323608,grad_norm: 0.999999114286162, iteration: 39449
loss: 0.9752891659736633,grad_norm: 0.8483547954781929, iteration: 39450
loss: 1.032415509223938,grad_norm: 0.988190313170883, iteration: 39451
loss: 0.9925854802131653,grad_norm: 0.8905048189514204, iteration: 39452
loss: 1.0342217683792114,grad_norm: 0.999999168855202, iteration: 39453
loss: 1.0233420133590698,grad_norm: 0.9926631644599007, iteration: 39454
loss: 1.0033278465270996,grad_norm: 0.9707533012289424, iteration: 39455
loss: 0.9891449809074402,grad_norm: 0.8629186158154458, iteration: 39456
loss: 1.0099116563796997,grad_norm: 0.9011378815431323, iteration: 39457
loss: 0.9765775203704834,grad_norm: 0.9964308555638567, iteration: 39458
loss: 0.9964455962181091,grad_norm: 0.9999991892131462, iteration: 39459
loss: 0.959938645362854,grad_norm: 0.9999991232537201, iteration: 39460
loss: 0.9932329058647156,grad_norm: 0.9734726029450943, iteration: 39461
loss: 0.982204020023346,grad_norm: 0.9999992339724991, iteration: 39462
loss: 1.0353120565414429,grad_norm: 0.9999993349642092, iteration: 39463
loss: 0.998550295829773,grad_norm: 0.99999918392712, iteration: 39464
loss: 1.0450053215026855,grad_norm: 0.9999997053721805, iteration: 39465
loss: 1.0227408409118652,grad_norm: 0.9672050810567956, iteration: 39466
loss: 1.0061651468276978,grad_norm: 0.9999991757477402, iteration: 39467
loss: 0.9909135699272156,grad_norm: 0.8608049413670723, iteration: 39468
loss: 0.9750669598579407,grad_norm: 0.9704042168434306, iteration: 39469
loss: 1.0379784107208252,grad_norm: 0.9999995674746381, iteration: 39470
loss: 1.025213360786438,grad_norm: 0.9999996972083023, iteration: 39471
loss: 0.9389253258705139,grad_norm: 0.9999990955959234, iteration: 39472
loss: 0.9826927781105042,grad_norm: 0.9999989483767247, iteration: 39473
loss: 1.0115753412246704,grad_norm: 0.9180169335727455, iteration: 39474
loss: 1.030391812324524,grad_norm: 0.9999990795647709, iteration: 39475
loss: 0.9646337032318115,grad_norm: 0.9702983851719649, iteration: 39476
loss: 0.9904739856719971,grad_norm: 0.9999990240173601, iteration: 39477
loss: 1.036666750907898,grad_norm: 0.9603657122877095, iteration: 39478
loss: 0.9482822418212891,grad_norm: 0.999998911923628, iteration: 39479
loss: 0.9820988774299622,grad_norm: 0.9061071741886232, iteration: 39480
loss: 0.9886476993560791,grad_norm: 0.8245157299749294, iteration: 39481
loss: 0.9974837899208069,grad_norm: 0.9999993733873184, iteration: 39482
loss: 1.0117464065551758,grad_norm: 0.8533262586642492, iteration: 39483
loss: 0.968874990940094,grad_norm: 0.9999991558821562, iteration: 39484
loss: 1.0335043668746948,grad_norm: 0.8951922033121079, iteration: 39485
loss: 1.0020650625228882,grad_norm: 0.876302388021681, iteration: 39486
loss: 1.0113215446472168,grad_norm: 0.8817031600647145, iteration: 39487
loss: 1.0053237676620483,grad_norm: 0.9999990575634617, iteration: 39488
loss: 0.9813132286071777,grad_norm: 0.9988190491835471, iteration: 39489
loss: 0.9793823957443237,grad_norm: 0.8555211004403046, iteration: 39490
loss: 0.9613091945648193,grad_norm: 0.9812459072352819, iteration: 39491
loss: 1.0194648504257202,grad_norm: 0.999999107792497, iteration: 39492
loss: 1.0170925855636597,grad_norm: 0.9999992912727567, iteration: 39493
loss: 1.0140742063522339,grad_norm: 0.9999991410142861, iteration: 39494
loss: 0.9818638563156128,grad_norm: 0.9999990500251922, iteration: 39495
loss: 0.9905910491943359,grad_norm: 0.9999992160156942, iteration: 39496
loss: 0.949699878692627,grad_norm: 0.8735800981375224, iteration: 39497
loss: 0.9890923500061035,grad_norm: 0.8962020480909563, iteration: 39498
loss: 1.0334904193878174,grad_norm: 0.8868062507117314, iteration: 39499
loss: 0.979166567325592,grad_norm: 0.9999992450796178, iteration: 39500
loss: 0.9642359614372253,grad_norm: 0.9999991891703467, iteration: 39501
loss: 1.0096065998077393,grad_norm: 0.9999994843387385, iteration: 39502
loss: 1.0207713842391968,grad_norm: 0.9999990891294218, iteration: 39503
loss: 0.9717293381690979,grad_norm: 0.7863479097614019, iteration: 39504
loss: 1.0321869850158691,grad_norm: 0.9999992130085719, iteration: 39505
loss: 1.0104773044586182,grad_norm: 0.9999990478464811, iteration: 39506
loss: 1.0096403360366821,grad_norm: 0.9217314668071201, iteration: 39507
loss: 1.065488576889038,grad_norm: 0.9999998738565089, iteration: 39508
loss: 0.9980173110961914,grad_norm: 0.9365692221433788, iteration: 39509
loss: 1.0073895454406738,grad_norm: 0.9999991221115673, iteration: 39510
loss: 0.9808796048164368,grad_norm: 0.9666217815217002, iteration: 39511
loss: 1.0260659456253052,grad_norm: 0.9999990888562891, iteration: 39512
loss: 0.9922890067100525,grad_norm: 0.9999992200155945, iteration: 39513
loss: 1.0406521558761597,grad_norm: 0.9999989637784269, iteration: 39514
loss: 1.0045698881149292,grad_norm: 0.921066056705725, iteration: 39515
loss: 0.9982545375823975,grad_norm: 0.9186099674966268, iteration: 39516
loss: 1.0103836059570312,grad_norm: 0.9651842379867418, iteration: 39517
loss: 1.0104258060455322,grad_norm: 0.9256259829520361, iteration: 39518
loss: 1.0129574537277222,grad_norm: 0.9999991255176699, iteration: 39519
loss: 1.0211235284805298,grad_norm: 0.9999991129216698, iteration: 39520
loss: 1.0274806022644043,grad_norm: 0.9999992270146255, iteration: 39521
loss: 0.9913313388824463,grad_norm: 0.9999991905240427, iteration: 39522
loss: 0.96541428565979,grad_norm: 0.9999991165354547, iteration: 39523
loss: 1.0119792222976685,grad_norm: 0.9999991856547784, iteration: 39524
loss: 0.9610803723335266,grad_norm: 0.9999991232094037, iteration: 39525
loss: 1.0266259908676147,grad_norm: 0.999999758526145, iteration: 39526
loss: 1.010202169418335,grad_norm: 0.9999998740710496, iteration: 39527
loss: 1.0125575065612793,grad_norm: 0.9999991867635225, iteration: 39528
loss: 0.9633437991142273,grad_norm: 0.9999990134207477, iteration: 39529
loss: 1.0404483079910278,grad_norm: 0.9999996447204197, iteration: 39530
loss: 0.9571877717971802,grad_norm: 0.9999991349360072, iteration: 39531
loss: 1.018259882926941,grad_norm: 0.9999992734664417, iteration: 39532
loss: 1.0114288330078125,grad_norm: 0.910734566481687, iteration: 39533
loss: 0.9892061352729797,grad_norm: 0.9231601965145528, iteration: 39534
loss: 0.9888179302215576,grad_norm: 0.9791037223729041, iteration: 39535
loss: 1.0020147562026978,grad_norm: 0.999999208820872, iteration: 39536
loss: 1.0142968893051147,grad_norm: 0.9524143027911354, iteration: 39537
loss: 0.9901366829872131,grad_norm: 0.8543708117169507, iteration: 39538
loss: 1.018653392791748,grad_norm: 0.8781200227298437, iteration: 39539
loss: 1.0147498846054077,grad_norm: 0.9999993280351175, iteration: 39540
loss: 1.0455633401870728,grad_norm: 0.9999994731849521, iteration: 39541
loss: 1.013637661933899,grad_norm: 0.9632465208164789, iteration: 39542
loss: 0.9768528938293457,grad_norm: 0.9999991278010647, iteration: 39543
loss: 1.0077214241027832,grad_norm: 0.9999991175285675, iteration: 39544
loss: 1.0162302255630493,grad_norm: 0.7950341202226221, iteration: 39545
loss: 1.0000141859054565,grad_norm: 0.9057644840422641, iteration: 39546
loss: 0.997730553150177,grad_norm: 0.860320200182151, iteration: 39547
loss: 1.0071847438812256,grad_norm: 0.9877816606362497, iteration: 39548
loss: 1.016489028930664,grad_norm: 0.999999106860521, iteration: 39549
loss: 1.0558096170425415,grad_norm: 0.9999990991523093, iteration: 39550
loss: 1.0087318420410156,grad_norm: 0.9730292396620235, iteration: 39551
loss: 1.0640302896499634,grad_norm: 0.999999332159532, iteration: 39552
loss: 1.0077288150787354,grad_norm: 0.999999052634696, iteration: 39553
loss: 0.9937973618507385,grad_norm: 0.9999991009409904, iteration: 39554
loss: 0.9843534827232361,grad_norm: 0.9150659483103112, iteration: 39555
loss: 0.9907187223434448,grad_norm: 0.9063778824270471, iteration: 39556
loss: 1.0415860414505005,grad_norm: 0.9999990514872987, iteration: 39557
loss: 0.9823172688484192,grad_norm: 0.999999146879625, iteration: 39558
loss: 1.0098190307617188,grad_norm: 0.8474651620644824, iteration: 39559
loss: 0.9961556196212769,grad_norm: 0.9973759344499588, iteration: 39560
loss: 0.9973459839820862,grad_norm: 0.9999992072232091, iteration: 39561
loss: 1.0090621709823608,grad_norm: 0.9999992726067849, iteration: 39562
loss: 1.0343483686447144,grad_norm: 0.9999990918832271, iteration: 39563
loss: 0.9889052510261536,grad_norm: 0.9999990775437425, iteration: 39564
loss: 1.020831823348999,grad_norm: 0.9999988949666748, iteration: 39565
loss: 1.0011625289916992,grad_norm: 0.9999990129801264, iteration: 39566
loss: 1.038750171661377,grad_norm: 0.9999991465526092, iteration: 39567
loss: 1.017573595046997,grad_norm: 0.999999097704539, iteration: 39568
loss: 0.9875813126564026,grad_norm: 0.8013639764471467, iteration: 39569
loss: 0.9932695627212524,grad_norm: 0.9999991093674271, iteration: 39570
loss: 0.9441334009170532,grad_norm: 0.9999990500015711, iteration: 39571
loss: 1.000537395477295,grad_norm: 0.8957892593069452, iteration: 39572
loss: 0.9731029272079468,grad_norm: 0.9999991319338116, iteration: 39573
loss: 1.0005017518997192,grad_norm: 0.9999989752483488, iteration: 39574
loss: 0.9887142777442932,grad_norm: 0.9743122034507385, iteration: 39575
loss: 0.9815783500671387,grad_norm: 0.9999991525991994, iteration: 39576
loss: 1.0116980075836182,grad_norm: 0.9683184321661508, iteration: 39577
loss: 1.0285463333129883,grad_norm: 0.9525882787454713, iteration: 39578
loss: 1.0081051588058472,grad_norm: 0.9999989452105682, iteration: 39579
loss: 1.0369991064071655,grad_norm: 0.9999995787069131, iteration: 39580
loss: 1.005490779876709,grad_norm: 0.8465945564777227, iteration: 39581
loss: 1.0379492044448853,grad_norm: 0.9721657414404242, iteration: 39582
loss: 1.025100827217102,grad_norm: 0.8936400193345304, iteration: 39583
loss: 0.9824178814888,grad_norm: 0.9647854485494192, iteration: 39584
loss: 1.0284392833709717,grad_norm: 0.9074884628379457, iteration: 39585
loss: 0.9666627049446106,grad_norm: 0.9999991927048035, iteration: 39586
loss: 1.0040971040725708,grad_norm: 0.885768447867515, iteration: 39587
loss: 0.9642850160598755,grad_norm: 0.8549853351446046, iteration: 39588
loss: 0.9590803384780884,grad_norm: 0.9478026591697606, iteration: 39589
loss: 1.0013644695281982,grad_norm: 0.9424730617963962, iteration: 39590
loss: 0.9891067743301392,grad_norm: 0.99999905307419, iteration: 39591
loss: 0.9806999564170837,grad_norm: 0.9833809962130984, iteration: 39592
loss: 0.9740273356437683,grad_norm: 0.8877165661616453, iteration: 39593
loss: 0.9986032247543335,grad_norm: 0.9219781165176187, iteration: 39594
loss: 0.9821168780326843,grad_norm: 0.975962823826633, iteration: 39595
loss: 1.0405017137527466,grad_norm: 0.999999054201636, iteration: 39596
loss: 0.9997710585594177,grad_norm: 0.7899189897983149, iteration: 39597
loss: 0.9903026223182678,grad_norm: 0.99183560326517, iteration: 39598
loss: 1.0858269929885864,grad_norm: 0.9999995984708187, iteration: 39599
loss: 0.975793719291687,grad_norm: 0.8653733386690241, iteration: 39600
loss: 1.0077850818634033,grad_norm: 0.9999990283686806, iteration: 39601
loss: 1.0197542905807495,grad_norm: 0.9999991291589005, iteration: 39602
loss: 1.0171064138412476,grad_norm: 0.9644632762187499, iteration: 39603
loss: 1.0463732481002808,grad_norm: 0.9999996291863992, iteration: 39604
loss: 0.9288766384124756,grad_norm: 0.8016172556258455, iteration: 39605
loss: 1.0095343589782715,grad_norm: 0.999999124175954, iteration: 39606
loss: 1.0186858177185059,grad_norm: 0.868054176212171, iteration: 39607
loss: 0.9941484928131104,grad_norm: 0.9198499199414363, iteration: 39608
loss: 0.9903873205184937,grad_norm: 0.9999990961633325, iteration: 39609
loss: 1.0194852352142334,grad_norm: 0.967410443774188, iteration: 39610
loss: 1.0257703065872192,grad_norm: 0.9999990885663419, iteration: 39611
loss: 0.9917387962341309,grad_norm: 0.946095727175786, iteration: 39612
loss: 1.0183427333831787,grad_norm: 0.8032135245135692, iteration: 39613
loss: 1.015921711921692,grad_norm: 0.8581603945157638, iteration: 39614
loss: 0.9900420904159546,grad_norm: 0.9159388257437561, iteration: 39615
loss: 1.0145784616470337,grad_norm: 0.999999047237174, iteration: 39616
loss: 0.9824978709220886,grad_norm: 0.9999990648356417, iteration: 39617
loss: 1.0436733961105347,grad_norm: 0.9999997873303778, iteration: 39618
loss: 0.995369017124176,grad_norm: 0.9908016719323521, iteration: 39619
loss: 1.0338178873062134,grad_norm: 0.994381837538001, iteration: 39620
loss: 1.0093237161636353,grad_norm: 0.9999990973603374, iteration: 39621
loss: 1.0235377550125122,grad_norm: 0.8449456368802226, iteration: 39622
loss: 0.9686034321784973,grad_norm: 0.999999080627542, iteration: 39623
loss: 1.0029351711273193,grad_norm: 0.9999991898688559, iteration: 39624
loss: 1.0121808052062988,grad_norm: 0.9604453857895637, iteration: 39625
loss: 1.017859697341919,grad_norm: 0.9999990272708553, iteration: 39626
loss: 1.0513708591461182,grad_norm: 0.9999993012747913, iteration: 39627
loss: 0.9846394062042236,grad_norm: 0.7684473163271652, iteration: 39628
loss: 1.0888599157333374,grad_norm: 0.9364900360036785, iteration: 39629
loss: 1.0117932558059692,grad_norm: 0.9080485095137781, iteration: 39630
loss: 0.9834277629852295,grad_norm: 0.9999991204569249, iteration: 39631
loss: 1.0456973314285278,grad_norm: 0.9445314804340884, iteration: 39632
loss: 1.0005823373794556,grad_norm: 0.9999990616043297, iteration: 39633
loss: 0.998748242855072,grad_norm: 0.9787698467951272, iteration: 39634
loss: 1.0100520849227905,grad_norm: 0.999999077493428, iteration: 39635
loss: 0.9821142554283142,grad_norm: 0.9568090347541193, iteration: 39636
loss: 0.9476267099380493,grad_norm: 0.9487137510400296, iteration: 39637
loss: 1.0246553421020508,grad_norm: 0.9999991976560597, iteration: 39638
loss: 0.9978926181793213,grad_norm: 0.9999992419277092, iteration: 39639
loss: 1.0073761940002441,grad_norm: 0.8509890276207517, iteration: 39640
loss: 1.0155726671218872,grad_norm: 0.9090875460930419, iteration: 39641
loss: 1.0049095153808594,grad_norm: 0.9788774981610535, iteration: 39642
loss: 1.0407243967056274,grad_norm: 0.9300485425498712, iteration: 39643
loss: 0.99978107213974,grad_norm: 0.9464237956733834, iteration: 39644
loss: 0.9850356578826904,grad_norm: 0.9428443713200019, iteration: 39645
loss: 1.0198885202407837,grad_norm: 0.9999991035348366, iteration: 39646
loss: 1.0120949745178223,grad_norm: 0.9999990584429692, iteration: 39647
loss: 1.0168676376342773,grad_norm: 0.9543101395874511, iteration: 39648
loss: 0.9942099452018738,grad_norm: 0.9765900661634733, iteration: 39649
loss: 0.9922581911087036,grad_norm: 0.9708779785941791, iteration: 39650
loss: 0.9965535998344421,grad_norm: 0.8460743301191993, iteration: 39651
loss: 0.9927493929862976,grad_norm: 0.9999991975401297, iteration: 39652
loss: 0.9874197244644165,grad_norm: 0.9459321761736846, iteration: 39653
loss: 1.0453468561172485,grad_norm: 0.989650095348288, iteration: 39654
loss: 1.0257091522216797,grad_norm: 0.9588312772786199, iteration: 39655
loss: 0.9832267761230469,grad_norm: 0.909868535762619, iteration: 39656
loss: 1.0558826923370361,grad_norm: 0.9999993486468775, iteration: 39657
loss: 1.0363184213638306,grad_norm: 0.999999306690033, iteration: 39658
loss: 1.0168226957321167,grad_norm: 0.9669694661561467, iteration: 39659
loss: 0.9771180152893066,grad_norm: 0.9591323165088258, iteration: 39660
loss: 1.0147947072982788,grad_norm: 0.9876584527345151, iteration: 39661
loss: 1.0138568878173828,grad_norm: 0.9960962435828873, iteration: 39662
loss: 1.0124114751815796,grad_norm: 0.9033223554880891, iteration: 39663
loss: 0.9822874665260315,grad_norm: 0.9999990962190161, iteration: 39664
loss: 0.9907616376876831,grad_norm: 0.9999991815855879, iteration: 39665
loss: 0.9972943067550659,grad_norm: 0.9253469868183047, iteration: 39666
loss: 0.9863497018814087,grad_norm: 0.9653496312262082, iteration: 39667
loss: 1.0426547527313232,grad_norm: 0.9999995966314845, iteration: 39668
loss: 0.9727415442466736,grad_norm: 0.9999990716084247, iteration: 39669
loss: 0.9987494349479675,grad_norm: 0.9999991248067338, iteration: 39670
loss: 0.9850634932518005,grad_norm: 0.999999398370568, iteration: 39671
loss: 1.029831886291504,grad_norm: 0.9519050847633885, iteration: 39672
loss: 1.028028964996338,grad_norm: 0.9999998183833403, iteration: 39673
loss: 0.9355396628379822,grad_norm: 0.9714442238651386, iteration: 39674
loss: 1.0076993703842163,grad_norm: 0.8169935371619483, iteration: 39675
loss: 1.0130451917648315,grad_norm: 0.7977341526574874, iteration: 39676
loss: 1.003713607788086,grad_norm: 0.999999113068637, iteration: 39677
loss: 0.9929612874984741,grad_norm: 0.9492774842268265, iteration: 39678
loss: 1.0224281549453735,grad_norm: 0.9999993189845441, iteration: 39679
loss: 1.0151948928833008,grad_norm: 0.865379690954589, iteration: 39680
loss: 0.983924150466919,grad_norm: 0.9205427635639504, iteration: 39681
loss: 1.0040926933288574,grad_norm: 0.9999992200127531, iteration: 39682
loss: 1.015991449356079,grad_norm: 0.9999996426324124, iteration: 39683
loss: 1.0257807970046997,grad_norm: 0.9367911131461256, iteration: 39684
loss: 1.0541247129440308,grad_norm: 0.9007385201082225, iteration: 39685
loss: 0.9986363649368286,grad_norm: 0.9052459236006154, iteration: 39686
loss: 1.078343152999878,grad_norm: 0.9999994276585542, iteration: 39687
loss: 1.01591157913208,grad_norm: 0.8887795834888568, iteration: 39688
loss: 1.0155121088027954,grad_norm: 0.9785176967029061, iteration: 39689
loss: 1.002427577972412,grad_norm: 0.9936451745214578, iteration: 39690
loss: 0.9681195616722107,grad_norm: 0.9133179006940619, iteration: 39691
loss: 1.022384524345398,grad_norm: 0.9450429987343026, iteration: 39692
loss: 0.9794949293136597,grad_norm: 0.8448344894497339, iteration: 39693
loss: 0.9818994998931885,grad_norm: 0.9999990015554986, iteration: 39694
loss: 0.9881780743598938,grad_norm: 0.763561943026103, iteration: 39695
loss: 1.0083069801330566,grad_norm: 0.9999989873155793, iteration: 39696
loss: 1.0185248851776123,grad_norm: 0.9979249745595822, iteration: 39697
loss: 1.0181461572647095,grad_norm: 0.9999991655906633, iteration: 39698
loss: 0.9916895627975464,grad_norm: 0.9999989519382595, iteration: 39699
loss: 0.9758138060569763,grad_norm: 0.906051464564798, iteration: 39700
loss: 1.0099564790725708,grad_norm: 0.8186331157811464, iteration: 39701
loss: 0.9948334097862244,grad_norm: 0.9511221675135894, iteration: 39702
loss: 0.986547589302063,grad_norm: 0.9027751951051133, iteration: 39703
loss: 1.0071253776550293,grad_norm: 0.9999990878882677, iteration: 39704
loss: 0.9619649648666382,grad_norm: 0.8623723582802278, iteration: 39705
loss: 1.0054574012756348,grad_norm: 0.9629844300850088, iteration: 39706
loss: 0.9951543807983398,grad_norm: 0.9393429585863151, iteration: 39707
loss: 1.0393257141113281,grad_norm: 0.9908533250961463, iteration: 39708
loss: 1.0121006965637207,grad_norm: 0.9973549127190637, iteration: 39709
loss: 0.9992141723632812,grad_norm: 0.8327548515591812, iteration: 39710
loss: 1.0326594114303589,grad_norm: 0.9999990583247625, iteration: 39711
loss: 0.9800128936767578,grad_norm: 0.9530244993023284, iteration: 39712
loss: 0.9703606367111206,grad_norm: 0.9999989046830601, iteration: 39713
loss: 0.9876853227615356,grad_norm: 0.9495082218462049, iteration: 39714
loss: 0.9910402894020081,grad_norm: 0.9043127796815197, iteration: 39715
loss: 0.9823402762413025,grad_norm: 0.8845584725131215, iteration: 39716
loss: 0.9801737666130066,grad_norm: 0.9999992533799452, iteration: 39717
loss: 0.9728835821151733,grad_norm: 0.9081932319663114, iteration: 39718
loss: 0.9661418795585632,grad_norm: 0.9999991440808846, iteration: 39719
loss: 0.9825189709663391,grad_norm: 0.9999991750920106, iteration: 39720
loss: 1.0143165588378906,grad_norm: 0.9818203200803056, iteration: 39721
loss: 0.9780385494232178,grad_norm: 0.9044574831364327, iteration: 39722
loss: 0.9985839128494263,grad_norm: 0.9532656490291511, iteration: 39723
loss: 1.028937578201294,grad_norm: 0.9592133117402588, iteration: 39724
loss: 1.0168336629867554,grad_norm: 0.9999992815195653, iteration: 39725
loss: 1.0096429586410522,grad_norm: 0.9999991909783965, iteration: 39726
loss: 1.034365177154541,grad_norm: 0.927730996993298, iteration: 39727
loss: 1.012783169746399,grad_norm: 0.9999991567157192, iteration: 39728
loss: 0.9672653675079346,grad_norm: 0.9999993217079418, iteration: 39729
loss: 0.9757856130599976,grad_norm: 0.9395092481285284, iteration: 39730
loss: 0.9666419625282288,grad_norm: 0.9999992217682997, iteration: 39731
loss: 1.011772632598877,grad_norm: 0.9744036552882085, iteration: 39732
loss: 0.963760495185852,grad_norm: 0.9451331983348266, iteration: 39733
loss: 1.0213886499404907,grad_norm: 0.9999990119887143, iteration: 39734
loss: 0.9869775176048279,grad_norm: 0.9999992075561694, iteration: 39735
loss: 0.9846704602241516,grad_norm: 0.770787381053205, iteration: 39736
loss: 1.0135332345962524,grad_norm: 0.9180533485890819, iteration: 39737
loss: 1.0074397325515747,grad_norm: 0.7919054173689699, iteration: 39738
loss: 1.0110502243041992,grad_norm: 0.8725193038456918, iteration: 39739
loss: 0.977714478969574,grad_norm: 0.9624441794943637, iteration: 39740
loss: 1.0313994884490967,grad_norm: 0.9999998662273935, iteration: 39741
loss: 0.9894015192985535,grad_norm: 0.8613147128870754, iteration: 39742
loss: 1.0433478355407715,grad_norm: 0.9999992061134878, iteration: 39743
loss: 1.0206246376037598,grad_norm: 0.9999990453930837, iteration: 39744
loss: 1.057410717010498,grad_norm: 0.9133841306269506, iteration: 39745
loss: 0.9737717509269714,grad_norm: 0.9999990655825226, iteration: 39746
loss: 0.9683233499526978,grad_norm: 0.9999989781317244, iteration: 39747
loss: 1.027167558670044,grad_norm: 0.9999990728536512, iteration: 39748
loss: 0.9950714111328125,grad_norm: 0.9999992078587258, iteration: 39749
loss: 1.0279715061187744,grad_norm: 0.999999123517964, iteration: 39750
loss: 1.0236824750900269,grad_norm: 0.9999991167635769, iteration: 39751
loss: 1.0221171379089355,grad_norm: 0.9059104703486245, iteration: 39752
loss: 0.9918883442878723,grad_norm: 0.9465154120707443, iteration: 39753
loss: 1.0261787176132202,grad_norm: 0.8853101252097129, iteration: 39754
loss: 0.9819428324699402,grad_norm: 0.9999991235086904, iteration: 39755
loss: 0.9927658438682556,grad_norm: 0.9999992270533248, iteration: 39756
loss: 1.0211070775985718,grad_norm: 0.9715906479496705, iteration: 39757
loss: 1.0248664617538452,grad_norm: 0.9999990537617187, iteration: 39758
loss: 0.977027416229248,grad_norm: 0.9002897020587632, iteration: 39759
loss: 1.0346180200576782,grad_norm: 0.8666253513869767, iteration: 39760
loss: 1.0691592693328857,grad_norm: 0.9999993724705741, iteration: 39761
loss: 0.9877330660820007,grad_norm: 0.9999990451663524, iteration: 39762
loss: 0.9968085885047913,grad_norm: 0.999999088168202, iteration: 39763
loss: 0.9677290320396423,grad_norm: 0.9916630616588684, iteration: 39764
loss: 0.9875560402870178,grad_norm: 0.9999991388205677, iteration: 39765
loss: 1.0011028051376343,grad_norm: 0.959414476316795, iteration: 39766
loss: 0.9705617427825928,grad_norm: 0.9124840095658945, iteration: 39767
loss: 1.0010589361190796,grad_norm: 0.9061249481210077, iteration: 39768
loss: 1.0034642219543457,grad_norm: 0.9999990784643396, iteration: 39769
loss: 0.9888702034950256,grad_norm: 0.9999992256842247, iteration: 39770
loss: 0.9805819988250732,grad_norm: 0.9305383702080203, iteration: 39771
loss: 1.0153861045837402,grad_norm: 0.8863818017777809, iteration: 39772
loss: 1.005892276763916,grad_norm: 0.9999992214937091, iteration: 39773
loss: 1.0096904039382935,grad_norm: 0.9467695413969105, iteration: 39774
loss: 1.040473222732544,grad_norm: 0.9999991133236265, iteration: 39775
loss: 1.0114561319351196,grad_norm: 0.999999035484596, iteration: 39776
loss: 1.0030615329742432,grad_norm: 0.9999991324609036, iteration: 39777
loss: 1.0129038095474243,grad_norm: 0.9999989630652055, iteration: 39778
loss: 1.0026392936706543,grad_norm: 0.9225352107642535, iteration: 39779
loss: 0.9604191780090332,grad_norm: 0.9999992386682086, iteration: 39780
loss: 1.0258690118789673,grad_norm: 0.8804449162948678, iteration: 39781
loss: 1.0035314559936523,grad_norm: 0.999999252011069, iteration: 39782
loss: 0.9899425506591797,grad_norm: 0.972848643353214, iteration: 39783
loss: 0.9961358904838562,grad_norm: 0.9999989866251114, iteration: 39784
loss: 0.9803635478019714,grad_norm: 0.9340308414312961, iteration: 39785
loss: 0.99112468957901,grad_norm: 0.9757704712728725, iteration: 39786
loss: 1.0038604736328125,grad_norm: 0.9427219631084129, iteration: 39787
loss: 1.0167088508605957,grad_norm: 0.9999990174947193, iteration: 39788
loss: 0.9947744011878967,grad_norm: 0.8995870332882099, iteration: 39789
loss: 0.9894161820411682,grad_norm: 0.9999989583525396, iteration: 39790
loss: 1.0092252492904663,grad_norm: 0.991245153894385, iteration: 39791
loss: 0.9937869310379028,grad_norm: 0.9999990504681606, iteration: 39792
loss: 0.9768071174621582,grad_norm: 0.9779459673515037, iteration: 39793
loss: 0.9946767091751099,grad_norm: 0.7695215520367842, iteration: 39794
loss: 0.993346095085144,grad_norm: 0.999999038742902, iteration: 39795
loss: 1.023232102394104,grad_norm: 0.9999995493692264, iteration: 39796
loss: 1.0093178749084473,grad_norm: 0.898060901176215, iteration: 39797
loss: 1.0252900123596191,grad_norm: 0.9329781784779997, iteration: 39798
loss: 1.043050765991211,grad_norm: 0.9999995418916816, iteration: 39799
loss: 1.029256820678711,grad_norm: 0.9999990936206125, iteration: 39800
loss: 1.0260331630706787,grad_norm: 0.9539614480229851, iteration: 39801
loss: 0.9578943848609924,grad_norm: 0.8973298641538918, iteration: 39802
loss: 0.9933283925056458,grad_norm: 0.9999990744693408, iteration: 39803
loss: 1.011096715927124,grad_norm: 0.9999992456602356, iteration: 39804
loss: 0.9980072379112244,grad_norm: 0.9999989961407006, iteration: 39805
loss: 0.9619689583778381,grad_norm: 0.9999991887846227, iteration: 39806
loss: 0.9710712432861328,grad_norm: 0.9999991834119649, iteration: 39807
loss: 0.9861047863960266,grad_norm: 0.9999991181144348, iteration: 39808
loss: 1.0078885555267334,grad_norm: 0.9999991122597821, iteration: 39809
loss: 1.020369291305542,grad_norm: 0.830291090261242, iteration: 39810
loss: 1.0139209032058716,grad_norm: 0.8204765850465535, iteration: 39811
loss: 1.0279173851013184,grad_norm: 0.9265682839206247, iteration: 39812
loss: 0.978939414024353,grad_norm: 0.9999990575161762, iteration: 39813
loss: 1.004452109336853,grad_norm: 0.9592628661158471, iteration: 39814
loss: 1.0376754999160767,grad_norm: 0.9999994118444743, iteration: 39815
loss: 1.0028249025344849,grad_norm: 0.9999992505157362, iteration: 39816
loss: 0.99349445104599,grad_norm: 0.9999992882302031, iteration: 39817
loss: 1.0459731817245483,grad_norm: 0.9999993810840416, iteration: 39818
loss: 1.0381221771240234,grad_norm: 0.9999991690527923, iteration: 39819
loss: 0.9832088947296143,grad_norm: 0.9569181110777054, iteration: 39820
loss: 1.0196396112442017,grad_norm: 0.9999990349789003, iteration: 39821
loss: 1.0037661790847778,grad_norm: 0.8564802268103077, iteration: 39822
loss: 0.989329993724823,grad_norm: 0.9999991697010427, iteration: 39823
loss: 0.9722617864608765,grad_norm: 0.9999991873467774, iteration: 39824
loss: 1.00175940990448,grad_norm: 0.8989743291113846, iteration: 39825
loss: 0.9705703854560852,grad_norm: 0.9455027804547086, iteration: 39826
loss: 0.967140793800354,grad_norm: 0.830922751880421, iteration: 39827
loss: 0.9781570434570312,grad_norm: 0.9999990544718382, iteration: 39828
loss: 1.0178675651550293,grad_norm: 0.7665372690383728, iteration: 39829
loss: 1.0453050136566162,grad_norm: 0.9999992280133024, iteration: 39830
loss: 0.9999048113822937,grad_norm: 0.9999992005615307, iteration: 39831
loss: 1.0077084302902222,grad_norm: 0.912895316937134, iteration: 39832
loss: 0.9951451420783997,grad_norm: 0.9433153663811209, iteration: 39833
loss: 0.9831540584564209,grad_norm: 0.8798556705152808, iteration: 39834
loss: 1.0263679027557373,grad_norm: 0.9621254051342633, iteration: 39835
loss: 1.00300133228302,grad_norm: 0.999999060269925, iteration: 39836
loss: 1.010386347770691,grad_norm: 0.9999990489323768, iteration: 39837
loss: 1.0012232065200806,grad_norm: 0.9403291481393158, iteration: 39838
loss: 1.0342823266983032,grad_norm: 0.8985800588255421, iteration: 39839
loss: 1.0079394578933716,grad_norm: 0.9999991043742601, iteration: 39840
loss: 0.9680266976356506,grad_norm: 0.9047801300635162, iteration: 39841
loss: 0.9951171875,grad_norm: 0.9999991129752682, iteration: 39842
loss: 1.0111582279205322,grad_norm: 0.9602465325995252, iteration: 39843
loss: 0.9942476153373718,grad_norm: 0.999999140662347, iteration: 39844
loss: 1.0470263957977295,grad_norm: 0.853403118329545, iteration: 39845
loss: 1.0280951261520386,grad_norm: 0.8634622641055983, iteration: 39846
loss: 1.040664792060852,grad_norm: 0.9576372025039537, iteration: 39847
loss: 0.9726877808570862,grad_norm: 0.8966247361558751, iteration: 39848
loss: 0.9966160655021667,grad_norm: 0.9552460593099898, iteration: 39849
loss: 0.9842875003814697,grad_norm: 0.8256549717003023, iteration: 39850
loss: 1.0408610105514526,grad_norm: 0.9570771200710314, iteration: 39851
loss: 1.0242815017700195,grad_norm: 0.9999993172130708, iteration: 39852
loss: 0.9550585150718689,grad_norm: 0.9999990303222955, iteration: 39853
loss: 1.0071661472320557,grad_norm: 0.9999999302116004, iteration: 39854
loss: 0.9852234721183777,grad_norm: 0.9934212653199538, iteration: 39855
loss: 0.994681179523468,grad_norm: 0.9320575918490586, iteration: 39856
loss: 0.9911057353019714,grad_norm: 0.9917455117239184, iteration: 39857
loss: 0.9898273348808289,grad_norm: 0.8565674744786437, iteration: 39858
loss: 0.9979297518730164,grad_norm: 0.9382742958642527, iteration: 39859
loss: 0.9754297733306885,grad_norm: 0.999999057401876, iteration: 39860
loss: 1.028810739517212,grad_norm: 0.9999992826583599, iteration: 39861
loss: 0.9737544655799866,grad_norm: 0.9316192162408372, iteration: 39862
loss: 0.9704106450080872,grad_norm: 0.9999990927761517, iteration: 39863
loss: 0.9836419820785522,grad_norm: 0.8334722345710351, iteration: 39864
loss: 1.0174469947814941,grad_norm: 0.9999991607263444, iteration: 39865
loss: 0.9759211540222168,grad_norm: 0.9999992036814803, iteration: 39866
loss: 1.0029600858688354,grad_norm: 0.9656521135468327, iteration: 39867
loss: 1.029097080230713,grad_norm: 0.9506050221516285, iteration: 39868
loss: 1.0130406618118286,grad_norm: 0.9999991600595538, iteration: 39869
loss: 1.0144871473312378,grad_norm: 0.9999991634467978, iteration: 39870
loss: 0.9888731241226196,grad_norm: 0.9339180962686205, iteration: 39871
loss: 1.0341897010803223,grad_norm: 0.9678180605702681, iteration: 39872
loss: 1.0195742845535278,grad_norm: 0.8000670126940421, iteration: 39873
loss: 1.0052709579467773,grad_norm: 0.9999992558343543, iteration: 39874
loss: 0.9927129149436951,grad_norm: 0.9999991876540124, iteration: 39875
loss: 1.0399280786514282,grad_norm: 0.9877786483901836, iteration: 39876
loss: 0.9618431925773621,grad_norm: 0.9999988844891257, iteration: 39877
loss: 0.9830951690673828,grad_norm: 0.9999991810242506, iteration: 39878
loss: 0.9513276815414429,grad_norm: 0.9999991176283403, iteration: 39879
loss: 1.0628931522369385,grad_norm: 0.9999995773694152, iteration: 39880
loss: 1.0065618753433228,grad_norm: 0.9999991272713872, iteration: 39881
loss: 1.0117019414901733,grad_norm: 0.9999992669708081, iteration: 39882
loss: 1.0237692594528198,grad_norm: 0.9999996296294544, iteration: 39883
loss: 1.0132063627243042,grad_norm: 0.9999989602800977, iteration: 39884
loss: 1.0291303396224976,grad_norm: 0.9713708767219256, iteration: 39885
loss: 0.9914018511772156,grad_norm: 0.9518519488742619, iteration: 39886
loss: 1.0435891151428223,grad_norm: 0.999999237644694, iteration: 39887
loss: 1.0401495695114136,grad_norm: 0.9999998777562563, iteration: 39888
loss: 1.0349441766738892,grad_norm: 0.9999991476014501, iteration: 39889
loss: 1.003156065940857,grad_norm: 0.9999990204601217, iteration: 39890
loss: 1.0164196491241455,grad_norm: 0.9763436348492328, iteration: 39891
loss: 0.9973323941230774,grad_norm: 0.9999991374680121, iteration: 39892
loss: 1.0158300399780273,grad_norm: 0.6835640517508765, iteration: 39893
loss: 0.988542377948761,grad_norm: 0.8750398032064759, iteration: 39894
loss: 1.0470178127288818,grad_norm: 0.924739338505736, iteration: 39895
loss: 1.0065022706985474,grad_norm: 0.8748710866906114, iteration: 39896
loss: 0.9806123971939087,grad_norm: 0.9999992673823561, iteration: 39897
loss: 0.976547122001648,grad_norm: 0.9999991926187971, iteration: 39898
loss: 1.0178968906402588,grad_norm: 0.9999991730826371, iteration: 39899
loss: 1.0079762935638428,grad_norm: 0.9999990823601765, iteration: 39900
loss: 0.9912331700325012,grad_norm: 0.9744947571547158, iteration: 39901
loss: 1.0081290006637573,grad_norm: 0.9701790576613247, iteration: 39902
loss: 0.9982941746711731,grad_norm: 0.9999991088917562, iteration: 39903
loss: 1.0253102779388428,grad_norm: 0.7859790148322298, iteration: 39904
loss: 1.0077723264694214,grad_norm: 0.8473317147313892, iteration: 39905
loss: 0.9816032648086548,grad_norm: 0.999999112033527, iteration: 39906
loss: 1.0151091814041138,grad_norm: 0.8309828419617507, iteration: 39907
loss: 0.9614534378051758,grad_norm: 0.9404910304790399, iteration: 39908
loss: 1.0028934478759766,grad_norm: 0.9999990357667247, iteration: 39909
loss: 1.0071388483047485,grad_norm: 0.8024838864305128, iteration: 39910
loss: 1.0268163681030273,grad_norm: 0.9578814627841137, iteration: 39911
loss: 0.9899260401725769,grad_norm: 0.9618904366143761, iteration: 39912
loss: 1.0315420627593994,grad_norm: 0.9999991361066984, iteration: 39913
loss: 0.9771538972854614,grad_norm: 0.9467427950085496, iteration: 39914
loss: 1.0444896221160889,grad_norm: 0.9104379890631713, iteration: 39915
loss: 0.9795607924461365,grad_norm: 0.989367647326514, iteration: 39916
loss: 0.9949608445167542,grad_norm: 0.8491144451556026, iteration: 39917
loss: 1.0076566934585571,grad_norm: 0.9330510477297322, iteration: 39918
loss: 1.0025315284729004,grad_norm: 0.9412381742136979, iteration: 39919
loss: 1.0562407970428467,grad_norm: 0.9466758738998844, iteration: 39920
loss: 0.9996396899223328,grad_norm: 0.9999995108130184, iteration: 39921
loss: 1.0083411931991577,grad_norm: 0.9213052333233446, iteration: 39922
loss: 1.0313725471496582,grad_norm: 0.9680950264227258, iteration: 39923
loss: 1.0232468843460083,grad_norm: 0.9113102358588832, iteration: 39924
loss: 1.004783034324646,grad_norm: 0.8910224309407156, iteration: 39925
loss: 1.0252983570098877,grad_norm: 0.9275939988851428, iteration: 39926
loss: 0.9952807426452637,grad_norm: 0.9805005589967611, iteration: 39927
loss: 1.0132969617843628,grad_norm: 0.9999990036177803, iteration: 39928
loss: 1.0251635313034058,grad_norm: 0.8129527015662961, iteration: 39929
loss: 0.9522265791893005,grad_norm: 0.9999991462986527, iteration: 39930
loss: 0.9998045563697815,grad_norm: 0.9999990011807467, iteration: 39931
loss: 0.9461877346038818,grad_norm: 0.9265059983408682, iteration: 39932
loss: 0.9760032296180725,grad_norm: 0.9999993000073836, iteration: 39933
loss: 1.0114272832870483,grad_norm: 0.9906528942638624, iteration: 39934
loss: 1.0235453844070435,grad_norm: 0.9432884777278862, iteration: 39935
loss: 0.9712163209915161,grad_norm: 0.971333693696172, iteration: 39936
loss: 0.9923656582832336,grad_norm: 0.999999175925069, iteration: 39937
loss: 0.998957633972168,grad_norm: 0.9999991086445468, iteration: 39938
loss: 0.9927257895469666,grad_norm: 0.999999209508399, iteration: 39939
loss: 1.0130839347839355,grad_norm: 0.9999992730796597, iteration: 39940
loss: 1.0010920763015747,grad_norm: 0.9999992951222245, iteration: 39941
loss: 1.015812873840332,grad_norm: 0.9676040375471373, iteration: 39942
loss: 0.949651837348938,grad_norm: 0.9999991366310915, iteration: 39943
loss: 0.9941037893295288,grad_norm: 0.9999994723799804, iteration: 39944
loss: 1.0486416816711426,grad_norm: 0.9999993066152159, iteration: 39945
loss: 1.0082660913467407,grad_norm: 0.9945074535204002, iteration: 39946
loss: 0.9720847010612488,grad_norm: 0.9168282986410415, iteration: 39947
loss: 1.0570547580718994,grad_norm: 0.9999994797411452, iteration: 39948
loss: 0.9863510727882385,grad_norm: 0.9116891687657531, iteration: 39949
loss: 1.010130763053894,grad_norm: 0.9309066130871598, iteration: 39950
loss: 1.0144281387329102,grad_norm: 0.8368259454529248, iteration: 39951
loss: 1.029669165611267,grad_norm: 0.9999991630609995, iteration: 39952
loss: 1.0496788024902344,grad_norm: 0.9999992750885213, iteration: 39953
loss: 0.9757112860679626,grad_norm: 0.9528672764432374, iteration: 39954
loss: 0.9884048104286194,grad_norm: 0.8105853673902224, iteration: 39955
loss: 1.0230823755264282,grad_norm: 0.9467713985546096, iteration: 39956
loss: 1.0145108699798584,grad_norm: 0.9107699226372314, iteration: 39957
loss: 1.0009111166000366,grad_norm: 0.9999992221546354, iteration: 39958
loss: 0.9959831237792969,grad_norm: 0.9971341886523175, iteration: 39959
loss: 0.9675087928771973,grad_norm: 0.9999990606546062, iteration: 39960
loss: 1.0275830030441284,grad_norm: 0.9699654962000273, iteration: 39961
loss: 1.0165396928787231,grad_norm: 0.9999992904782582, iteration: 39962
loss: 1.0716326236724854,grad_norm: 0.9999992132726178, iteration: 39963
loss: 1.005391240119934,grad_norm: 0.9999993552839135, iteration: 39964
loss: 0.9972336292266846,grad_norm: 0.9999994490167063, iteration: 39965
loss: 0.9882301092147827,grad_norm: 0.9049859959383483, iteration: 39966
loss: 1.009913444519043,grad_norm: 0.9999991657330695, iteration: 39967
loss: 1.0237321853637695,grad_norm: 0.8236787551249605, iteration: 39968
loss: 1.013089656829834,grad_norm: 0.862736362382733, iteration: 39969
loss: 0.9819529056549072,grad_norm: 0.9024404289096383, iteration: 39970
loss: 1.0157228708267212,grad_norm: 0.9999993770694723, iteration: 39971
loss: 0.9892184138298035,grad_norm: 0.9999990789553382, iteration: 39972
loss: 1.0186021327972412,grad_norm: 0.9149771234057625, iteration: 39973
loss: 0.9855782985687256,grad_norm: 0.9487527622295073, iteration: 39974
loss: 0.9793312549591064,grad_norm: 0.9999995934099204, iteration: 39975
loss: 0.994779646396637,grad_norm: 0.9744464668596267, iteration: 39976
loss: 1.024309754371643,grad_norm: 0.9999992915354847, iteration: 39977
loss: 1.0253081321716309,grad_norm: 0.9999991601076762, iteration: 39978
loss: 0.996687114238739,grad_norm: 0.9999991971807308, iteration: 39979
loss: 1.0291638374328613,grad_norm: 0.9999993311960589, iteration: 39980
loss: 1.0243191719055176,grad_norm: 0.9999990579468483, iteration: 39981
loss: 1.0130772590637207,grad_norm: 0.9999992775793335, iteration: 39982
loss: 1.035173773765564,grad_norm: 0.8753775358143123, iteration: 39983
loss: 1.0203408002853394,grad_norm: 0.9749254394448266, iteration: 39984
loss: 1.031995415687561,grad_norm: 0.8265611275228043, iteration: 39985
loss: 1.1264379024505615,grad_norm: 0.9462143329499101, iteration: 39986
loss: 0.9870306253433228,grad_norm: 0.8883342206500151, iteration: 39987
loss: 1.038624882698059,grad_norm: 0.999999210516628, iteration: 39988
loss: 0.9433112144470215,grad_norm: 0.9083169853916069, iteration: 39989
loss: 1.0676040649414062,grad_norm: 0.9999995517309839, iteration: 39990
loss: 0.9552419781684875,grad_norm: 0.9999990350795365, iteration: 39991
loss: 1.0554574728012085,grad_norm: 0.9999998553623223, iteration: 39992
loss: 1.037966251373291,grad_norm: 0.9999993585125032, iteration: 39993
loss: 1.023281455039978,grad_norm: 0.9103206481408901, iteration: 39994
loss: 0.9661597013473511,grad_norm: 0.836138370953036, iteration: 39995
loss: 1.0393290519714355,grad_norm: 0.9999993123777009, iteration: 39996
loss: 1.0011905431747437,grad_norm: 0.9120638917347422, iteration: 39997
loss: 1.009041428565979,grad_norm: 0.9999993379681806, iteration: 39998
loss: 1.00198233127594,grad_norm: 0.9999990460700827, iteration: 39999
loss: 1.026687741279602,grad_norm: 0.999999129989805, iteration: 40000
Evaluating at step 40000
{'val': 0.9988108314573765, 'test': 2.553293075805274}
loss: 0.9953625202178955,grad_norm: 0.909679149991906, iteration: 40001
loss: 1.0085290670394897,grad_norm: 0.9999988360232628, iteration: 40002
loss: 0.9744240045547485,grad_norm: 0.9273444623812255, iteration: 40003
loss: 1.0337015390396118,grad_norm: 0.9999990713085933, iteration: 40004
loss: 1.0424516201019287,grad_norm: 0.999999800556232, iteration: 40005
loss: 1.0944820642471313,grad_norm: 0.9999991596272985, iteration: 40006
loss: 1.0355651378631592,grad_norm: 0.9999999079189471, iteration: 40007
loss: 1.123513102531433,grad_norm: 0.9999999328019568, iteration: 40008
loss: 1.0400922298431396,grad_norm: 0.9999990509109831, iteration: 40009
loss: 1.020828366279602,grad_norm: 0.9999991224249654, iteration: 40010
loss: 1.0497288703918457,grad_norm: 0.9999991361152023, iteration: 40011
loss: 0.9907388091087341,grad_norm: 0.7616519702233427, iteration: 40012
loss: 1.0056257247924805,grad_norm: 0.9497546461746477, iteration: 40013
loss: 1.0225545167922974,grad_norm: 0.9999991479886796, iteration: 40014
loss: 0.9719657301902771,grad_norm: 0.8173942204187474, iteration: 40015
loss: 1.0327160358428955,grad_norm: 0.9999996009316664, iteration: 40016
loss: 1.015464425086975,grad_norm: 0.9999990778357669, iteration: 40017
loss: 1.0118049383163452,grad_norm: 0.9999989442153893, iteration: 40018
loss: 0.9823472499847412,grad_norm: 0.9778621121571333, iteration: 40019
loss: 0.9911021590232849,grad_norm: 0.7785122041463204, iteration: 40020
loss: 0.982822835445404,grad_norm: 0.8985522196945211, iteration: 40021
loss: 0.9966322779655457,grad_norm: 0.9633574094225867, iteration: 40022
loss: 1.0171828269958496,grad_norm: 0.9868649253993179, iteration: 40023
loss: 0.988468587398529,grad_norm: 0.9825098991725717, iteration: 40024
loss: 1.1151031255722046,grad_norm: 0.9999995562293181, iteration: 40025
loss: 1.0082827806472778,grad_norm: 0.9999993391357637, iteration: 40026
loss: 0.9934349656105042,grad_norm: 0.9935649126271593, iteration: 40027
loss: 1.0023460388183594,grad_norm: 0.9999990824393149, iteration: 40028
loss: 0.9901614785194397,grad_norm: 0.9031110591287257, iteration: 40029
loss: 1.1843442916870117,grad_norm: 0.9999998023198727, iteration: 40030
loss: 1.0401407480239868,grad_norm: 0.9999991035951152, iteration: 40031
loss: 1.0295559167861938,grad_norm: 0.9999998191312353, iteration: 40032
loss: 0.963252604007721,grad_norm: 0.9999996689987402, iteration: 40033
loss: 0.976641058921814,grad_norm: 0.9577801283121031, iteration: 40034
loss: 1.0761549472808838,grad_norm: 0.9999991906351388, iteration: 40035
loss: 1.056370735168457,grad_norm: 0.9999995495554443, iteration: 40036
loss: 1.0241410732269287,grad_norm: 0.999999375726423, iteration: 40037
loss: 0.996744692325592,grad_norm: 0.7977761070394015, iteration: 40038
loss: 1.0079773664474487,grad_norm: 0.9874029572766285, iteration: 40039
loss: 1.0044928789138794,grad_norm: 0.8580306355212954, iteration: 40040
loss: 1.0244144201278687,grad_norm: 0.9718805376461713, iteration: 40041
loss: 0.9974861145019531,grad_norm: 0.9709101782688306, iteration: 40042
loss: 1.0197434425354004,grad_norm: 0.9454028816626198, iteration: 40043
loss: 0.9995814561843872,grad_norm: 0.8436550137042794, iteration: 40044
loss: 0.9883081912994385,grad_norm: 0.9999990131517437, iteration: 40045
loss: 1.0127235651016235,grad_norm: 0.9999994827965184, iteration: 40046
loss: 1.0061523914337158,grad_norm: 0.9999992816526669, iteration: 40047
loss: 0.978629469871521,grad_norm: 0.9999997645831952, iteration: 40048
loss: 0.9895721673965454,grad_norm: 0.8246970255517028, iteration: 40049
loss: 1.0036509037017822,grad_norm: 0.999999163702768, iteration: 40050
loss: 0.9948583245277405,grad_norm: 0.9447275991405486, iteration: 40051
loss: 0.998573899269104,grad_norm: 0.9999990807821045, iteration: 40052
loss: 1.036479115486145,grad_norm: 0.9282726407706546, iteration: 40053
loss: 1.020542860031128,grad_norm: 0.9999990451649989, iteration: 40054
loss: 1.0046933889389038,grad_norm: 0.9999991592973605, iteration: 40055
loss: 1.0279995203018188,grad_norm: 0.9999993023678537, iteration: 40056
loss: 1.033268690109253,grad_norm: 0.9667857355735968, iteration: 40057
loss: 1.017094373703003,grad_norm: 0.8291908684451175, iteration: 40058
loss: 0.9673876762390137,grad_norm: 0.9999996171270389, iteration: 40059
loss: 1.0061115026474,grad_norm: 0.9999992805175383, iteration: 40060
loss: 0.9780142903327942,grad_norm: 0.9999992444525299, iteration: 40061
loss: 1.0349751710891724,grad_norm: 0.9826909873862449, iteration: 40062
loss: 1.0018534660339355,grad_norm: 0.9999991412944542, iteration: 40063
loss: 0.9843970537185669,grad_norm: 0.8964416170373751, iteration: 40064
loss: 1.0110900402069092,grad_norm: 0.9999992765800054, iteration: 40065
loss: 1.014878749847412,grad_norm: 0.8079039887572338, iteration: 40066
loss: 1.0432404279708862,grad_norm: 0.9999995776507045, iteration: 40067
loss: 1.0422025918960571,grad_norm: 0.9999996289598939, iteration: 40068
loss: 1.0121617317199707,grad_norm: 0.9619626145877551, iteration: 40069
loss: 0.9994782209396362,grad_norm: 0.8651670512684576, iteration: 40070
loss: 1.0266770124435425,grad_norm: 0.9051516772737208, iteration: 40071
loss: 0.9873645901679993,grad_norm: 0.9999990972928802, iteration: 40072
loss: 1.015089988708496,grad_norm: 0.9999990133077707, iteration: 40073
loss: 0.9945532083511353,grad_norm: 0.9312005508096353, iteration: 40074
loss: 0.9976311922073364,grad_norm: 0.9999991967747743, iteration: 40075
loss: 1.0109347105026245,grad_norm: 0.9999996117346931, iteration: 40076
loss: 1.0313072204589844,grad_norm: 0.999999681677822, iteration: 40077
loss: 0.9991166591644287,grad_norm: 0.9968910117736678, iteration: 40078
loss: 1.0452120304107666,grad_norm: 0.9999990290656052, iteration: 40079
loss: 1.0234875679016113,grad_norm: 0.9999991618736936, iteration: 40080
loss: 1.031090497970581,grad_norm: 0.761131949824038, iteration: 40081
loss: 0.9843955039978027,grad_norm: 0.9981728221265548, iteration: 40082
loss: 0.9990782141685486,grad_norm: 0.9999992198646449, iteration: 40083
loss: 1.0009312629699707,grad_norm: 0.9999991581111217, iteration: 40084
loss: 1.0045653581619263,grad_norm: 0.9352322240083702, iteration: 40085
loss: 0.9902734160423279,grad_norm: 0.9999990962442191, iteration: 40086
loss: 0.9871516823768616,grad_norm: 0.917567584688467, iteration: 40087
loss: 0.9889615774154663,grad_norm: 0.9999991104833458, iteration: 40088
loss: 1.0111464262008667,grad_norm: 0.9999993459625568, iteration: 40089
loss: 1.0090736150741577,grad_norm: 0.8373204546655302, iteration: 40090
loss: 1.0180658102035522,grad_norm: 0.9999992861441761, iteration: 40091
loss: 1.0304697751998901,grad_norm: 0.9051039894451702, iteration: 40092
loss: 0.9943181872367859,grad_norm: 0.999999068138656, iteration: 40093
loss: 0.9907596111297607,grad_norm: 0.9999991385238952, iteration: 40094
loss: 1.068003535270691,grad_norm: 0.9999995383967585, iteration: 40095
loss: 1.0105888843536377,grad_norm: 0.8563926640441901, iteration: 40096
loss: 1.007097601890564,grad_norm: 0.9999991592841713, iteration: 40097
loss: 0.9968226552009583,grad_norm: 0.8060019365564911, iteration: 40098
loss: 1.0030920505523682,grad_norm: 0.9999992457791831, iteration: 40099
loss: 1.0175178050994873,grad_norm: 0.7448195253015688, iteration: 40100
loss: 0.9615007638931274,grad_norm: 0.9999991277175578, iteration: 40101
loss: 0.9581098556518555,grad_norm: 0.9845626398974269, iteration: 40102
loss: 0.9951573014259338,grad_norm: 0.9999990053840933, iteration: 40103
loss: 1.0389617681503296,grad_norm: 0.8642005875116178, iteration: 40104
loss: 0.9787799715995789,grad_norm: 0.8866466073030892, iteration: 40105
loss: 0.9968386292457581,grad_norm: 0.9999992605439607, iteration: 40106
loss: 1.0567806959152222,grad_norm: 0.9999992530887544, iteration: 40107
loss: 1.0175559520721436,grad_norm: 0.9761972915028688, iteration: 40108
loss: 1.0051085948944092,grad_norm: 0.9999991164530287, iteration: 40109
loss: 1.032950520515442,grad_norm: 0.9999993100686738, iteration: 40110
loss: 1.0522873401641846,grad_norm: 0.9999990939320186, iteration: 40111
loss: 0.9848042726516724,grad_norm: 0.9999991725688728, iteration: 40112
loss: 1.0263409614562988,grad_norm: 0.999999148586941, iteration: 40113
loss: 0.9815589189529419,grad_norm: 0.8414634146879398, iteration: 40114
loss: 1.0228806734085083,grad_norm: 0.9999989708792514, iteration: 40115
loss: 0.9840590357780457,grad_norm: 0.994391760528807, iteration: 40116
loss: 1.0064948797225952,grad_norm: 0.9999992294001354, iteration: 40117
loss: 1.021088719367981,grad_norm: 0.9999994564067002, iteration: 40118
loss: 1.041957974433899,grad_norm: 0.9999999219013405, iteration: 40119
loss: 1.0500688552856445,grad_norm: 0.9839790185785079, iteration: 40120
loss: 1.000123381614685,grad_norm: 0.9999991259113424, iteration: 40121
loss: 0.9897672533988953,grad_norm: 0.9263793465754976, iteration: 40122
loss: 1.0135787725448608,grad_norm: 0.9999996685338449, iteration: 40123
loss: 1.0124448537826538,grad_norm: 0.9999990895056545, iteration: 40124
loss: 0.9732218384742737,grad_norm: 0.9472323062027181, iteration: 40125
loss: 1.032778024673462,grad_norm: 0.9123547597056891, iteration: 40126
loss: 1.022505521774292,grad_norm: 0.8604628116592246, iteration: 40127
loss: 0.9994507431983948,grad_norm: 0.9999990481772565, iteration: 40128
loss: 1.0307613611221313,grad_norm: 0.8994669185410554, iteration: 40129
loss: 1.0213463306427002,grad_norm: 0.9630229646068398, iteration: 40130
loss: 1.1396573781967163,grad_norm: 0.999999662951767, iteration: 40131
loss: 1.0243947505950928,grad_norm: 0.9999997748126181, iteration: 40132
loss: 0.9997740387916565,grad_norm: 0.9999992321452404, iteration: 40133
loss: 0.9909898638725281,grad_norm: 0.9999997144568176, iteration: 40134
loss: 1.0315418243408203,grad_norm: 0.8931866649895344, iteration: 40135
loss: 1.0004616975784302,grad_norm: 0.9225129151069977, iteration: 40136
loss: 1.0291448831558228,grad_norm: 0.9999992450238324, iteration: 40137
loss: 1.020607352256775,grad_norm: 0.9999993490445694, iteration: 40138
loss: 0.9915758371353149,grad_norm: 0.9999991069748414, iteration: 40139
loss: 1.0510295629501343,grad_norm: 0.9999991724175364, iteration: 40140
loss: 1.0565028190612793,grad_norm: 0.9999996476774041, iteration: 40141
loss: 0.9929861426353455,grad_norm: 0.9999992979277791, iteration: 40142
loss: 0.9958018660545349,grad_norm: 0.8738877131657784, iteration: 40143
loss: 1.021143913269043,grad_norm: 0.9999990468339763, iteration: 40144
loss: 0.9918482899665833,grad_norm: 0.9999990681319123, iteration: 40145
loss: 1.0403705835342407,grad_norm: 0.9136862626957701, iteration: 40146
loss: 0.991628885269165,grad_norm: 0.8449849351595236, iteration: 40147
loss: 1.0053577423095703,grad_norm: 0.8458658399227481, iteration: 40148
loss: 1.0224226713180542,grad_norm: 0.9999994935764551, iteration: 40149
loss: 0.9987166523933411,grad_norm: 0.9999991446507722, iteration: 40150
loss: 1.0080130100250244,grad_norm: 0.8132653202788435, iteration: 40151
loss: 1.0304335355758667,grad_norm: 0.9999994276111017, iteration: 40152
loss: 1.007914662361145,grad_norm: 0.7613025808184053, iteration: 40153
loss: 1.0157145261764526,grad_norm: 0.8504887357443112, iteration: 40154
loss: 1.095018744468689,grad_norm: 0.9999996714058258, iteration: 40155
loss: 0.9920223951339722,grad_norm: 0.8923457271618507, iteration: 40156
loss: 0.9775177240371704,grad_norm: 0.9347595813787245, iteration: 40157
loss: 1.0065511465072632,grad_norm: 0.9999995030820699, iteration: 40158
loss: 0.9956578016281128,grad_norm: 0.919027509202015, iteration: 40159
loss: 0.9875078797340393,grad_norm: 0.9999991353887736, iteration: 40160
loss: 0.9641557931900024,grad_norm: 0.8107702468598131, iteration: 40161
loss: 1.0003494024276733,grad_norm: 0.999999102652028, iteration: 40162
loss: 0.9912326335906982,grad_norm: 0.9532910057838496, iteration: 40163
loss: 1.0372949838638306,grad_norm: 0.9999995918051995, iteration: 40164
loss: 1.001718521118164,grad_norm: 0.999999054983342, iteration: 40165
loss: 0.9545106291770935,grad_norm: 0.9999991874214971, iteration: 40166
loss: 0.9840954542160034,grad_norm: 0.9999996699658192, iteration: 40167
loss: 0.9995822906494141,grad_norm: 0.9999994730660302, iteration: 40168
loss: 1.036436676979065,grad_norm: 0.9905245010537835, iteration: 40169
loss: 0.9883102774620056,grad_norm: 0.8717123094948593, iteration: 40170
loss: 0.987010657787323,grad_norm: 0.878124104025102, iteration: 40171
loss: 0.9473727941513062,grad_norm: 0.9056098100534372, iteration: 40172
loss: 1.001579761505127,grad_norm: 0.9762025182268628, iteration: 40173
loss: 1.036390781402588,grad_norm: 0.89966638543397, iteration: 40174
loss: 0.9423028826713562,grad_norm: 0.9999990394278913, iteration: 40175
loss: 1.0100619792938232,grad_norm: 0.9999990518100621, iteration: 40176
loss: 0.9656031727790833,grad_norm: 0.9999992597860115, iteration: 40177
loss: 1.0456079244613647,grad_norm: 0.999999488118796, iteration: 40178
loss: 1.0004559755325317,grad_norm: 0.9999992057356604, iteration: 40179
loss: 0.9885867834091187,grad_norm: 0.9594317183658914, iteration: 40180
loss: 1.0274423360824585,grad_norm: 0.9999996205501328, iteration: 40181
loss: 0.9872817397117615,grad_norm: 0.9388012345518535, iteration: 40182
loss: 0.9945256114006042,grad_norm: 0.9573925138232235, iteration: 40183
loss: 1.0067509412765503,grad_norm: 0.9139748432321326, iteration: 40184
loss: 0.9681228399276733,grad_norm: 0.9746840739480951, iteration: 40185
loss: 0.976032018661499,grad_norm: 0.9999991119530415, iteration: 40186
loss: 1.0242805480957031,grad_norm: 0.9999994683600022, iteration: 40187
loss: 1.0067081451416016,grad_norm: 0.9999991917016041, iteration: 40188
loss: 0.9989084601402283,grad_norm: 0.9999991219180124, iteration: 40189
loss: 0.9840368032455444,grad_norm: 0.9999991126940256, iteration: 40190
loss: 1.0073652267456055,grad_norm: 0.99999918896203, iteration: 40191
loss: 1.0159622430801392,grad_norm: 0.8953041384539204, iteration: 40192
loss: 0.9881309866905212,grad_norm: 0.9513095759494277, iteration: 40193
loss: 0.9912753701210022,grad_norm: 0.9966595735721664, iteration: 40194
loss: 0.9807409644126892,grad_norm: 0.8940266084152821, iteration: 40195
loss: 0.9890393018722534,grad_norm: 0.9327726985574353, iteration: 40196
loss: 1.0073716640472412,grad_norm: 0.9999992398966489, iteration: 40197
loss: 0.994673490524292,grad_norm: 0.9358590409819306, iteration: 40198
loss: 0.9963496923446655,grad_norm: 0.872817738214386, iteration: 40199
loss: 1.0095926523208618,grad_norm: 0.9999992386065522, iteration: 40200
loss: 1.027927041053772,grad_norm: 0.9999993554468599, iteration: 40201
loss: 0.9945563673973083,grad_norm: 0.8449153559645206, iteration: 40202
loss: 0.9819884896278381,grad_norm: 0.9999991499980109, iteration: 40203
loss: 1.0102711915969849,grad_norm: 0.9469803058234344, iteration: 40204
loss: 0.9908671379089355,grad_norm: 0.9999990052694946, iteration: 40205
loss: 1.053371787071228,grad_norm: 0.7796003421687324, iteration: 40206
loss: 1.056689977645874,grad_norm: 0.9999991089661242, iteration: 40207
loss: 0.9919719099998474,grad_norm: 0.999999171148399, iteration: 40208
loss: 1.0471243858337402,grad_norm: 0.8947623062965092, iteration: 40209
loss: 0.9705624580383301,grad_norm: 0.9999990711613476, iteration: 40210
loss: 0.9799456000328064,grad_norm: 0.9999990506595827, iteration: 40211
loss: 1.045556902885437,grad_norm: 0.999999804833403, iteration: 40212
loss: 0.9762704968452454,grad_norm: 0.9999990444693, iteration: 40213
loss: 1.0212149620056152,grad_norm: 0.9999990355563855, iteration: 40214
loss: 1.0116950273513794,grad_norm: 0.8942402431114722, iteration: 40215
loss: 1.034372329711914,grad_norm: 0.999999160885268, iteration: 40216
loss: 1.0193403959274292,grad_norm: 0.9729979820050128, iteration: 40217
loss: 0.9877477884292603,grad_norm: 0.9999991277936248, iteration: 40218
loss: 1.0304206609725952,grad_norm: 0.9999991834017511, iteration: 40219
loss: 0.9442129135131836,grad_norm: 0.9425984899446811, iteration: 40220
loss: 0.9923675656318665,grad_norm: 0.9999991533609005, iteration: 40221
loss: 1.027262806892395,grad_norm: 0.9999994908228905, iteration: 40222
loss: 0.9864917993545532,grad_norm: 0.9999989467234233, iteration: 40223
loss: 1.0354076623916626,grad_norm: 0.9999992221566628, iteration: 40224
loss: 1.0054768323898315,grad_norm: 0.8000181018066658, iteration: 40225
loss: 0.9934635758399963,grad_norm: 0.9200743359261043, iteration: 40226
loss: 0.9664997458457947,grad_norm: 0.9707407933547287, iteration: 40227
loss: 1.023190975189209,grad_norm: 0.7727695017086614, iteration: 40228
loss: 0.9931898713111877,grad_norm: 0.9999990371930038, iteration: 40229
loss: 1.008424162864685,grad_norm: 0.9999994797303129, iteration: 40230
loss: 1.020725131034851,grad_norm: 0.9999993543185689, iteration: 40231
loss: 1.0097540616989136,grad_norm: 0.9836773841047317, iteration: 40232
loss: 0.9951861500740051,grad_norm: 0.991299222780602, iteration: 40233
loss: 1.0170540809631348,grad_norm: 0.872992524345855, iteration: 40234
loss: 1.0307507514953613,grad_norm: 0.9999991165615376, iteration: 40235
loss: 1.03567373752594,grad_norm: 0.9999990609126733, iteration: 40236
loss: 0.9748232364654541,grad_norm: 0.8332165997362717, iteration: 40237
loss: 1.0120996236801147,grad_norm: 0.9999996773987545, iteration: 40238
loss: 0.9973694682121277,grad_norm: 0.8514582517786689, iteration: 40239
loss: 1.0129344463348389,grad_norm: 0.9514379753783597, iteration: 40240
loss: 0.9967085719108582,grad_norm: 0.946933296305428, iteration: 40241
loss: 0.9795892834663391,grad_norm: 0.9999991902032056, iteration: 40242
loss: 1.0159339904785156,grad_norm: 0.9292826243156797, iteration: 40243
loss: 1.0031414031982422,grad_norm: 0.9999990718399263, iteration: 40244
loss: 1.0324835777282715,grad_norm: 0.9789132534536332, iteration: 40245
loss: 1.023807168006897,grad_norm: 0.9905168700279999, iteration: 40246
loss: 1.0907433032989502,grad_norm: 0.9999994702473787, iteration: 40247
loss: 0.9848700165748596,grad_norm: 0.7422233354090834, iteration: 40248
loss: 1.024053692817688,grad_norm: 0.9999991139973657, iteration: 40249
loss: 0.9880073070526123,grad_norm: 0.9999992554155385, iteration: 40250
loss: 1.0237817764282227,grad_norm: 0.9153473341026503, iteration: 40251
loss: 0.9712404608726501,grad_norm: 0.9999991206743912, iteration: 40252
loss: 0.9996770024299622,grad_norm: 0.9999988175963015, iteration: 40253
loss: 0.9843202233314514,grad_norm: 0.9999990416897709, iteration: 40254
loss: 1.0273752212524414,grad_norm: 0.8840931131899357, iteration: 40255
loss: 1.0080664157867432,grad_norm: 0.9999995298379105, iteration: 40256
loss: 1.0269991159439087,grad_norm: 0.9999989515285974, iteration: 40257
loss: 0.9962349534034729,grad_norm: 0.9999994147015941, iteration: 40258
loss: 0.9972082376480103,grad_norm: 0.8434113851162106, iteration: 40259
loss: 1.025267481803894,grad_norm: 0.9487036364125954, iteration: 40260
loss: 1.0091755390167236,grad_norm: 0.9999990739811531, iteration: 40261
loss: 1.0328400135040283,grad_norm: 0.9602954410794092, iteration: 40262
loss: 0.9917320013046265,grad_norm: 0.8307948847613015, iteration: 40263
loss: 1.0128700733184814,grad_norm: 0.9999998400913561, iteration: 40264
loss: 1.0393143892288208,grad_norm: 0.9005019859853931, iteration: 40265
loss: 0.9775844812393188,grad_norm: 0.999999295836942, iteration: 40266
loss: 1.0050851106643677,grad_norm: 0.8761100453615053, iteration: 40267
loss: 1.0194816589355469,grad_norm: 0.9999990686427788, iteration: 40268
loss: 0.9603361487388611,grad_norm: 0.9376792642576549, iteration: 40269
loss: 0.9746648073196411,grad_norm: 0.9143268454405882, iteration: 40270
loss: 1.0093003511428833,grad_norm: 0.9415124002366377, iteration: 40271
loss: 1.0256083011627197,grad_norm: 0.876017456605723, iteration: 40272
loss: 1.0014030933380127,grad_norm: 0.8898444400201412, iteration: 40273
loss: 1.0039011240005493,grad_norm: 0.9999994183106786, iteration: 40274
loss: 0.9716646075248718,grad_norm: 0.964969916903825, iteration: 40275
loss: 0.9772529006004333,grad_norm: 0.9293560106806877, iteration: 40276
loss: 0.9684457182884216,grad_norm: 0.9811663491391396, iteration: 40277
loss: 0.9949313998222351,grad_norm: 0.9313520858133851, iteration: 40278
loss: 0.9670652747154236,grad_norm: 0.9999992895410603, iteration: 40279
loss: 1.0120532512664795,grad_norm: 0.7929092818711408, iteration: 40280
loss: 1.0311328172683716,grad_norm: 0.9963371562515848, iteration: 40281
loss: 1.048158884048462,grad_norm: 0.9237363121836459, iteration: 40282
loss: 1.017406702041626,grad_norm: 0.9999992373830952, iteration: 40283
loss: 1.0108261108398438,grad_norm: 0.9999993670670259, iteration: 40284
loss: 1.0476855039596558,grad_norm: 0.9999994747597565, iteration: 40285
loss: 0.998201847076416,grad_norm: 0.9705911713979961, iteration: 40286
loss: 1.0867714881896973,grad_norm: 0.9999992328702475, iteration: 40287
loss: 1.0299237966537476,grad_norm: 0.999999687575725, iteration: 40288
loss: 1.0339597463607788,grad_norm: 0.9233861824292945, iteration: 40289
loss: 0.9551011919975281,grad_norm: 0.9899546284957578, iteration: 40290
loss: 1.0319323539733887,grad_norm: 0.999999734744781, iteration: 40291
loss: 0.9773907661437988,grad_norm: 0.920992690134176, iteration: 40292
loss: 1.020102858543396,grad_norm: 0.9999992502603664, iteration: 40293
loss: 0.9903028011322021,grad_norm: 0.9767795506493768, iteration: 40294
loss: 1.0581036806106567,grad_norm: 0.9999990989593252, iteration: 40295
loss: 0.9897791743278503,grad_norm: 0.970226915622909, iteration: 40296
loss: 0.984130859375,grad_norm: 0.9999993266775365, iteration: 40297
loss: 0.9931282997131348,grad_norm: 0.9974897981793793, iteration: 40298
loss: 0.9459739923477173,grad_norm: 0.9091785705270096, iteration: 40299
loss: 0.9970112442970276,grad_norm: 0.9999991012692497, iteration: 40300
loss: 1.0359915494918823,grad_norm: 0.999999226016142, iteration: 40301
loss: 0.9910158514976501,grad_norm: 0.956083178439868, iteration: 40302
loss: 0.986040472984314,grad_norm: 0.9999991086530173, iteration: 40303
loss: 1.0200047492980957,grad_norm: 0.9999996628232554, iteration: 40304
loss: 0.9896643757820129,grad_norm: 0.8318080096130611, iteration: 40305
loss: 0.9719915390014648,grad_norm: 0.8295558696201327, iteration: 40306
loss: 0.9697712659835815,grad_norm: 0.9116768620395298, iteration: 40307
loss: 0.9671874046325684,grad_norm: 0.9222157798624913, iteration: 40308
loss: 1.027781367301941,grad_norm: 0.9532762952194649, iteration: 40309
loss: 1.0050073862075806,grad_norm: 0.9886362819605604, iteration: 40310
loss: 1.006622314453125,grad_norm: 0.9579409906308223, iteration: 40311
loss: 0.9915117621421814,grad_norm: 0.9156023003075732, iteration: 40312
loss: 1.005258560180664,grad_norm: 0.7767659657837718, iteration: 40313
loss: 1.0027086734771729,grad_norm: 0.999999419698159, iteration: 40314
loss: 1.008896827697754,grad_norm: 0.9999990838633268, iteration: 40315
loss: 0.9413467049598694,grad_norm: 0.9126416898301617, iteration: 40316
loss: 1.0451372861862183,grad_norm: 0.9999994763637299, iteration: 40317
loss: 0.9895810484886169,grad_norm: 0.8825627077989614, iteration: 40318
loss: 1.0100455284118652,grad_norm: 0.8749578064023283, iteration: 40319
loss: 1.0379847288131714,grad_norm: 0.9999993063478567, iteration: 40320
loss: 0.980197012424469,grad_norm: 0.9999991875343494, iteration: 40321
loss: 1.0055774450302124,grad_norm: 0.9246001164789771, iteration: 40322
loss: 1.0080124139785767,grad_norm: 0.9999991336487553, iteration: 40323
loss: 0.9681285619735718,grad_norm: 0.9999992048269098, iteration: 40324
loss: 0.9788793325424194,grad_norm: 0.9999997364893947, iteration: 40325
loss: 0.9972627758979797,grad_norm: 0.8500644747645175, iteration: 40326
loss: 1.0493156909942627,grad_norm: 0.9999995283780502, iteration: 40327
loss: 1.0012370347976685,grad_norm: 0.99999921996439, iteration: 40328
loss: 1.0150067806243896,grad_norm: 0.9999990206050108, iteration: 40329
loss: 0.9813030362129211,grad_norm: 0.999999023473024, iteration: 40330
loss: 1.0131464004516602,grad_norm: 0.9257152188932699, iteration: 40331
loss: 1.0157185792922974,grad_norm: 0.9999991970888162, iteration: 40332
loss: 1.020092487335205,grad_norm: 0.9999990960078904, iteration: 40333
loss: 1.0096739530563354,grad_norm: 0.9999995522729839, iteration: 40334
loss: 0.9617112278938293,grad_norm: 0.99999921359977, iteration: 40335
loss: 1.011467695236206,grad_norm: 0.770498877692358, iteration: 40336
loss: 1.0176496505737305,grad_norm: 0.7987362520131407, iteration: 40337
loss: 0.9930521845817566,grad_norm: 0.8176551617344434, iteration: 40338
loss: 0.9728640913963318,grad_norm: 0.9749126942364874, iteration: 40339
loss: 0.9935358762741089,grad_norm: 0.9999993323722065, iteration: 40340
loss: 1.0038095712661743,grad_norm: 0.9999998192245435, iteration: 40341
loss: 1.0222059488296509,grad_norm: 0.9999992174704012, iteration: 40342
loss: 1.006018877029419,grad_norm: 0.9999991768788707, iteration: 40343
loss: 1.0529695749282837,grad_norm: 0.9999997034886188, iteration: 40344
loss: 0.9809526801109314,grad_norm: 0.9999990057889717, iteration: 40345
loss: 0.9995502233505249,grad_norm: 0.9999991110773451, iteration: 40346
loss: 1.0144214630126953,grad_norm: 0.9448999818848525, iteration: 40347
loss: 1.023256540298462,grad_norm: 0.9999991307378957, iteration: 40348
loss: 0.9915603399276733,grad_norm: 0.9999991871699887, iteration: 40349
loss: 1.0371999740600586,grad_norm: 0.8491379256921127, iteration: 40350
loss: 0.9992156028747559,grad_norm: 0.9999990540226692, iteration: 40351
loss: 1.0000636577606201,grad_norm: 0.8204881211422358, iteration: 40352
loss: 1.0290075540542603,grad_norm: 0.9999991789443226, iteration: 40353
loss: 1.0284754037857056,grad_norm: 0.9999992185908267, iteration: 40354
loss: 0.9801992177963257,grad_norm: 0.9949523371869072, iteration: 40355
loss: 1.0565847158432007,grad_norm: 0.9999993677331306, iteration: 40356
loss: 0.9697773456573486,grad_norm: 0.9909300564468579, iteration: 40357
loss: 0.9979223608970642,grad_norm: 0.9999997497556469, iteration: 40358
loss: 1.0169870853424072,grad_norm: 0.8160674445168538, iteration: 40359
loss: 1.0109857320785522,grad_norm: 0.9999991441300491, iteration: 40360
loss: 0.9679976105690002,grad_norm: 0.9538508106614719, iteration: 40361
loss: 0.9915026426315308,grad_norm: 0.9999991606272675, iteration: 40362
loss: 1.0228992700576782,grad_norm: 0.9999996566005275, iteration: 40363
loss: 1.0224745273590088,grad_norm: 0.9999993142010453, iteration: 40364
loss: 0.9983909130096436,grad_norm: 0.9999992676661077, iteration: 40365
loss: 0.9788743257522583,grad_norm: 0.9999992088869339, iteration: 40366
loss: 1.0081489086151123,grad_norm: 0.9999990851355408, iteration: 40367
loss: 0.9868409037590027,grad_norm: 0.9999992179471774, iteration: 40368
loss: 0.9623055458068848,grad_norm: 0.9096024778520262, iteration: 40369
loss: 1.003878116607666,grad_norm: 0.9999991824048747, iteration: 40370
loss: 1.015211582183838,grad_norm: 0.8723273636043004, iteration: 40371
loss: 0.999297022819519,grad_norm: 0.9999992205808111, iteration: 40372
loss: 1.0635509490966797,grad_norm: 0.9999997265718688, iteration: 40373
loss: 1.0501543283462524,grad_norm: 0.9999995478101932, iteration: 40374
loss: 0.9863463044166565,grad_norm: 0.9622951907288456, iteration: 40375
loss: 1.0082695484161377,grad_norm: 0.955722623448487, iteration: 40376
loss: 1.0092825889587402,grad_norm: 0.9999990635065259, iteration: 40377
loss: 1.011312484741211,grad_norm: 0.9999991964647819, iteration: 40378
loss: 1.0271271467208862,grad_norm: 0.9999992987299514, iteration: 40379
loss: 1.0072177648544312,grad_norm: 0.9492688584483543, iteration: 40380
loss: 1.0211262702941895,grad_norm: 0.9902384765572504, iteration: 40381
loss: 0.9766695499420166,grad_norm: 0.999999087745469, iteration: 40382
loss: 1.0093075037002563,grad_norm: 0.9472472373925988, iteration: 40383
loss: 1.032692790031433,grad_norm: 0.892642559886903, iteration: 40384
loss: 1.0252357721328735,grad_norm: 0.9999990891480368, iteration: 40385
loss: 0.9923247694969177,grad_norm: 0.9002653945916188, iteration: 40386
loss: 1.024483323097229,grad_norm: 0.9999992746417014, iteration: 40387
loss: 1.0164369344711304,grad_norm: 0.9999992443793349, iteration: 40388
loss: 1.0278279781341553,grad_norm: 0.9999989891925316, iteration: 40389
loss: 0.9851229786872864,grad_norm: 0.8277049451737009, iteration: 40390
loss: 1.0374113321304321,grad_norm: 0.8619686393208055, iteration: 40391
loss: 0.9772747159004211,grad_norm: 0.9733740005441287, iteration: 40392
loss: 1.013468861579895,grad_norm: 0.943523881070552, iteration: 40393
loss: 0.9838805198669434,grad_norm: 0.9999990628560375, iteration: 40394
loss: 1.0198566913604736,grad_norm: 0.9999990667062221, iteration: 40395
loss: 0.9966579079627991,grad_norm: 0.9186430752713269, iteration: 40396
loss: 1.0075819492340088,grad_norm: 0.9999991845100792, iteration: 40397
loss: 1.0160794258117676,grad_norm: 0.9999990185843669, iteration: 40398
loss: 1.0150147676467896,grad_norm: 0.9999990852807665, iteration: 40399
loss: 0.9656031727790833,grad_norm: 0.9999990903002544, iteration: 40400
loss: 1.0062147378921509,grad_norm: 0.9649219198717711, iteration: 40401
loss: 0.9853975176811218,grad_norm: 0.9999992295282644, iteration: 40402
loss: 0.9461870193481445,grad_norm: 0.9999991703819552, iteration: 40403
loss: 0.9828264117240906,grad_norm: 0.9999994303108642, iteration: 40404
loss: 1.0245773792266846,grad_norm: 0.9699541098408946, iteration: 40405
loss: 1.0337470769882202,grad_norm: 0.9999992225289934, iteration: 40406
loss: 0.9864881634712219,grad_norm: 0.9790037564539439, iteration: 40407
loss: 1.0189489126205444,grad_norm: 0.9834515917757686, iteration: 40408
loss: 1.0183486938476562,grad_norm: 0.8039702112058934, iteration: 40409
loss: 0.9461110234260559,grad_norm: 0.9383049575089302, iteration: 40410
loss: 0.9805232286453247,grad_norm: 0.9801320585705784, iteration: 40411
loss: 1.0547130107879639,grad_norm: 0.999999150017977, iteration: 40412
loss: 0.998817503452301,grad_norm: 0.994039440318999, iteration: 40413
loss: 1.002292275428772,grad_norm: 0.9770319231450229, iteration: 40414
loss: 1.0311142206192017,grad_norm: 0.9999992991321124, iteration: 40415
loss: 1.0068055391311646,grad_norm: 0.9999991191111284, iteration: 40416
loss: 1.0267486572265625,grad_norm: 0.9188895031684415, iteration: 40417
loss: 0.981311559677124,grad_norm: 0.9999997757528455, iteration: 40418
loss: 0.983160674571991,grad_norm: 0.999998976810091, iteration: 40419
loss: 1.0044288635253906,grad_norm: 0.9639789858835153, iteration: 40420
loss: 0.9759106636047363,grad_norm: 0.9999992202907879, iteration: 40421
loss: 0.9835414290428162,grad_norm: 0.8564012104161861, iteration: 40422
loss: 0.9731774926185608,grad_norm: 0.9999990593881539, iteration: 40423
loss: 0.9947971701622009,grad_norm: 0.9999992727165219, iteration: 40424
loss: 1.0130032300949097,grad_norm: 0.9788209994042093, iteration: 40425
loss: 0.9817264676094055,grad_norm: 0.8514865101450412, iteration: 40426
loss: 1.0281368494033813,grad_norm: 0.9999997618550669, iteration: 40427
loss: 0.9701976776123047,grad_norm: 0.9621052879237971, iteration: 40428
loss: 0.9735547304153442,grad_norm: 0.900276615842732, iteration: 40429
loss: 1.012613296508789,grad_norm: 0.8215156866153455, iteration: 40430
loss: 0.9949550032615662,grad_norm: 0.9852463477576474, iteration: 40431
loss: 1.0102840662002563,grad_norm: 0.9121937832091211, iteration: 40432
loss: 1.015613079071045,grad_norm: 0.9999990946009051, iteration: 40433
loss: 1.052382230758667,grad_norm: 0.9999990948856484, iteration: 40434
loss: 0.9911381006240845,grad_norm: 0.9999989926161977, iteration: 40435
loss: 0.9982365369796753,grad_norm: 0.9999990000183223, iteration: 40436
loss: 0.9858888983726501,grad_norm: 0.99999913863234, iteration: 40437
loss: 1.002421498298645,grad_norm: 0.9999991534979249, iteration: 40438
loss: 0.9931268095970154,grad_norm: 0.8968483931091358, iteration: 40439
loss: 1.014004111289978,grad_norm: 0.8144905635442264, iteration: 40440
loss: 1.0295284986495972,grad_norm: 0.9999989984077253, iteration: 40441
loss: 0.9939776062965393,grad_norm: 0.9762705508687601, iteration: 40442
loss: 1.034623146057129,grad_norm: 0.9999996033705948, iteration: 40443
loss: 1.0337755680084229,grad_norm: 0.99999935053053, iteration: 40444
loss: 1.0109360218048096,grad_norm: 0.9999992351811714, iteration: 40445
loss: 0.973876953125,grad_norm: 0.8420812014338034, iteration: 40446
loss: 0.9938636422157288,grad_norm: 0.999999265947049, iteration: 40447
loss: 1.0025668144226074,grad_norm: 0.9919170754190523, iteration: 40448
loss: 1.0236138105392456,grad_norm: 0.9999992268256555, iteration: 40449
loss: 0.9792249202728271,grad_norm: 0.9999991035987831, iteration: 40450
loss: 0.9751898050308228,grad_norm: 0.9817465111837865, iteration: 40451
loss: 1.0025688409805298,grad_norm: 0.8950607132504635, iteration: 40452
loss: 1.0005524158477783,grad_norm: 0.914066701815371, iteration: 40453
loss: 1.0354275703430176,grad_norm: 0.9999990332232216, iteration: 40454
loss: 0.9754160642623901,grad_norm: 0.9390680391074502, iteration: 40455
loss: 0.9849244952201843,grad_norm: 0.9999996328986412, iteration: 40456
loss: 1.0129531621932983,grad_norm: 0.775001129134622, iteration: 40457
loss: 0.9750576019287109,grad_norm: 0.9377623104255824, iteration: 40458
loss: 1.056311011314392,grad_norm: 0.9999990543377595, iteration: 40459
loss: 1.0035685300827026,grad_norm: 0.7946221870891681, iteration: 40460
loss: 1.039620041847229,grad_norm: 0.9580844902706028, iteration: 40461
loss: 1.0333491563796997,grad_norm: 0.9521413996583467, iteration: 40462
loss: 0.9853459596633911,grad_norm: 0.99999900945069, iteration: 40463
loss: 0.9802825450897217,grad_norm: 0.9039794842931412, iteration: 40464
loss: 0.9852215647697449,grad_norm: 0.9999989913019179, iteration: 40465
loss: 0.9900340437889099,grad_norm: 0.9999991009673775, iteration: 40466
loss: 1.0272632837295532,grad_norm: 0.9525004485291206, iteration: 40467
loss: 0.9940593242645264,grad_norm: 0.9999992357294425, iteration: 40468
loss: 0.9872947335243225,grad_norm: 0.9999990068040713, iteration: 40469
loss: 1.0028835535049438,grad_norm: 0.9999991492985945, iteration: 40470
loss: 0.975347638130188,grad_norm: 0.9741631041949678, iteration: 40471
loss: 0.9999955892562866,grad_norm: 0.8520814790422319, iteration: 40472
loss: 1.0164268016815186,grad_norm: 0.9747855550309777, iteration: 40473
loss: 0.9899849891662598,grad_norm: 0.8850031872611126, iteration: 40474
loss: 1.0070347785949707,grad_norm: 0.8996207839612685, iteration: 40475
loss: 0.9801143407821655,grad_norm: 0.963330008356502, iteration: 40476
loss: 1.0385456085205078,grad_norm: 0.9999992396298105, iteration: 40477
loss: 0.9876389503479004,grad_norm: 0.9181322622450226, iteration: 40478
loss: 1.0163952112197876,grad_norm: 0.9816348994201699, iteration: 40479
loss: 1.0007609128952026,grad_norm: 0.9305730729165332, iteration: 40480
loss: 0.9575151205062866,grad_norm: 0.9999991880325643, iteration: 40481
loss: 0.9980137348175049,grad_norm: 0.9999992465888224, iteration: 40482
loss: 1.140303373336792,grad_norm: 0.9999997894276224, iteration: 40483
loss: 1.0088915824890137,grad_norm: 0.9999991349293409, iteration: 40484
loss: 0.9734188318252563,grad_norm: 0.9836224064076712, iteration: 40485
loss: 0.9621936082839966,grad_norm: 0.9971270524545117, iteration: 40486
loss: 0.9841601848602295,grad_norm: 0.7371607524907987, iteration: 40487
loss: 1.0050474405288696,grad_norm: 0.9999992454500096, iteration: 40488
loss: 0.9757193326950073,grad_norm: 0.9999992758694412, iteration: 40489
loss: 1.0054457187652588,grad_norm: 0.9999992916049972, iteration: 40490
loss: 1.0183353424072266,grad_norm: 0.996015803636698, iteration: 40491
loss: 1.0591814517974854,grad_norm: 0.9999990545808255, iteration: 40492
loss: 1.0041450262069702,grad_norm: 0.8729994265568621, iteration: 40493
loss: 1.0200295448303223,grad_norm: 0.8821526003547694, iteration: 40494
loss: 0.9891446232795715,grad_norm: 0.9999990628941114, iteration: 40495
loss: 1.0168285369873047,grad_norm: 0.9999989963629381, iteration: 40496
loss: 1.0115313529968262,grad_norm: 0.8642745807798144, iteration: 40497
loss: 1.0289809703826904,grad_norm: 0.8080942669984085, iteration: 40498
loss: 0.958112359046936,grad_norm: 0.8467609156955227, iteration: 40499
loss: 0.9917835593223572,grad_norm: 0.9633802691228655, iteration: 40500
loss: 1.0506420135498047,grad_norm: 0.9610353283558766, iteration: 40501
loss: 0.9907265305519104,grad_norm: 0.9445242501890344, iteration: 40502
loss: 1.0096684694290161,grad_norm: 0.9695076919665507, iteration: 40503
loss: 0.9933304786682129,grad_norm: 0.9999992080007769, iteration: 40504
loss: 1.027878999710083,grad_norm: 0.9999992975950861, iteration: 40505
loss: 1.0062592029571533,grad_norm: 0.9794872676355998, iteration: 40506
loss: 0.9794349074363708,grad_norm: 0.9999992158037133, iteration: 40507
loss: 1.005305290222168,grad_norm: 0.9500772643168549, iteration: 40508
loss: 0.9560284614562988,grad_norm: 0.9740239009894093, iteration: 40509
loss: 0.9933174252510071,grad_norm: 0.999999151160796, iteration: 40510
loss: 0.9920303821563721,grad_norm: 0.9999993569633171, iteration: 40511
loss: 1.01010262966156,grad_norm: 0.9315170439746814, iteration: 40512
loss: 1.0006400346755981,grad_norm: 0.8878195782887905, iteration: 40513
loss: 1.0197309255599976,grad_norm: 0.9220108429181708, iteration: 40514
loss: 1.0129848718643188,grad_norm: 0.9999991294632358, iteration: 40515
loss: 0.956870436668396,grad_norm: 0.9512441421903074, iteration: 40516
loss: 0.9909417629241943,grad_norm: 0.8754110945562764, iteration: 40517
loss: 1.0320008993148804,grad_norm: 0.8865554809652235, iteration: 40518
loss: 1.0271644592285156,grad_norm: 0.9999991406483814, iteration: 40519
loss: 0.9746350646018982,grad_norm: 0.976667331521789, iteration: 40520
loss: 0.9814476370811462,grad_norm: 0.9458993376125537, iteration: 40521
loss: 1.0071866512298584,grad_norm: 0.9999991917908161, iteration: 40522
loss: 0.971376895904541,grad_norm: 0.9999991696680672, iteration: 40523
loss: 0.9877040386199951,grad_norm: 0.9889735498510881, iteration: 40524
loss: 0.971738874912262,grad_norm: 0.8739375861270113, iteration: 40525
loss: 1.0011333227157593,grad_norm: 0.8279536171389663, iteration: 40526
loss: 1.0175913572311401,grad_norm: 0.9310944144312315, iteration: 40527
loss: 1.0187560319900513,grad_norm: 0.999999364750018, iteration: 40528
loss: 1.0193027257919312,grad_norm: 0.81846532554014, iteration: 40529
loss: 0.9837530255317688,grad_norm: 0.9999993078391365, iteration: 40530
loss: 1.0590406656265259,grad_norm: 0.9999997845711347, iteration: 40531
loss: 1.0675158500671387,grad_norm: 0.9999997286833289, iteration: 40532
loss: 0.9943981170654297,grad_norm: 0.8748172005278508, iteration: 40533
loss: 0.991435170173645,grad_norm: 0.99999897372585, iteration: 40534
loss: 1.009050965309143,grad_norm: 0.9999990736855435, iteration: 40535
loss: 0.9777701497077942,grad_norm: 0.9999991251651914, iteration: 40536
loss: 0.9811388254165649,grad_norm: 0.9999991420309686, iteration: 40537
loss: 0.9678220748901367,grad_norm: 0.9008603535997772, iteration: 40538
loss: 0.9873148202896118,grad_norm: 0.8482732549550741, iteration: 40539
loss: 0.9936517477035522,grad_norm: 0.9999989930303305, iteration: 40540
loss: 0.9900325536727905,grad_norm: 0.8740610271980823, iteration: 40541
loss: 0.9943159222602844,grad_norm: 0.999998983415515, iteration: 40542
loss: 0.9731702208518982,grad_norm: 0.7733824965309471, iteration: 40543
loss: 0.98731929063797,grad_norm: 0.9999991380510963, iteration: 40544
loss: 0.9774940013885498,grad_norm: 0.9646287477612705, iteration: 40545
loss: 0.9882109761238098,grad_norm: 0.9999994424050137, iteration: 40546
loss: 1.0300171375274658,grad_norm: 0.9999995716170166, iteration: 40547
loss: 0.9886349439620972,grad_norm: 0.9612856932594712, iteration: 40548
loss: 1.0560842752456665,grad_norm: 0.9490358780166752, iteration: 40549
loss: 1.0471274852752686,grad_norm: 0.9439577088145812, iteration: 40550
loss: 1.0387952327728271,grad_norm: 0.9999990819091539, iteration: 40551
loss: 0.9991390109062195,grad_norm: 0.9999991482159503, iteration: 40552
loss: 1.0330451726913452,grad_norm: 0.9999990671039214, iteration: 40553
loss: 0.970177948474884,grad_norm: 0.9024254420600063, iteration: 40554
loss: 0.9565544724464417,grad_norm: 0.9999991091854492, iteration: 40555
loss: 1.0261505842208862,grad_norm: 0.9999990751803294, iteration: 40556
loss: 0.9980112910270691,grad_norm: 0.9435277002244042, iteration: 40557
loss: 1.0709093809127808,grad_norm: 0.9999997058469889, iteration: 40558
loss: 1.0318748950958252,grad_norm: 0.9080161943285235, iteration: 40559
loss: 1.0164785385131836,grad_norm: 0.9999991934077539, iteration: 40560
loss: 1.0293207168579102,grad_norm: 0.9999989788375403, iteration: 40561
loss: 1.0347172021865845,grad_norm: 0.8990690240549862, iteration: 40562
loss: 1.0126646757125854,grad_norm: 0.999999088170507, iteration: 40563
loss: 0.9830217361450195,grad_norm: 0.9999990977307671, iteration: 40564
loss: 1.037675380706787,grad_norm: 0.8920668299416438, iteration: 40565
loss: 0.9950296878814697,grad_norm: 0.9999992400993403, iteration: 40566
loss: 1.0065282583236694,grad_norm: 0.999999285703266, iteration: 40567
loss: 0.9890567064285278,grad_norm: 0.9951555860561819, iteration: 40568
loss: 1.004044771194458,grad_norm: 0.9999990491671762, iteration: 40569
loss: 0.9816913604736328,grad_norm: 0.9999990860969405, iteration: 40570
loss: 1.0177286863327026,grad_norm: 0.8156137298440519, iteration: 40571
loss: 1.1051682233810425,grad_norm: 0.9999993145560374, iteration: 40572
loss: 0.967872142791748,grad_norm: 0.9999992558675906, iteration: 40573
loss: 0.9836301207542419,grad_norm: 0.9999991017233927, iteration: 40574
loss: 1.0194356441497803,grad_norm: 0.9990904480753304, iteration: 40575
loss: 1.0481455326080322,grad_norm: 0.9672029690651539, iteration: 40576
loss: 1.0112742185592651,grad_norm: 0.9999990164197309, iteration: 40577
loss: 0.9736747145652771,grad_norm: 0.8709140096119768, iteration: 40578
loss: 0.9843084812164307,grad_norm: 0.929195956417058, iteration: 40579
loss: 1.0074656009674072,grad_norm: 0.8854129859771539, iteration: 40580
loss: 0.9906898736953735,grad_norm: 0.8864709376760143, iteration: 40581
loss: 0.9988114237785339,grad_norm: 0.999999169966482, iteration: 40582
loss: 1.006255030632019,grad_norm: 0.9293845962221012, iteration: 40583
loss: 0.9428037405014038,grad_norm: 0.9611040368671135, iteration: 40584
loss: 1.0338234901428223,grad_norm: 0.9381123865837747, iteration: 40585
loss: 0.9818632006645203,grad_norm: 0.999998990344454, iteration: 40586
loss: 1.002082109451294,grad_norm: 0.9999989522496423, iteration: 40587
loss: 1.0364536046981812,grad_norm: 0.9999991830328357, iteration: 40588
loss: 0.9670190215110779,grad_norm: 0.9999992907112644, iteration: 40589
loss: 1.02226722240448,grad_norm: 0.7954566434372506, iteration: 40590
loss: 0.9838446974754333,grad_norm: 0.8904066758470419, iteration: 40591
loss: 0.9896342158317566,grad_norm: 0.9999991335184816, iteration: 40592
loss: 1.0046740770339966,grad_norm: 0.9999990957774387, iteration: 40593
loss: 0.9759580492973328,grad_norm: 0.9999989803179665, iteration: 40594
loss: 1.0106115341186523,grad_norm: 0.999999008043676, iteration: 40595
loss: 1.0062611103057861,grad_norm: 0.9999990133162907, iteration: 40596
loss: 0.98397296667099,grad_norm: 0.9441089772038654, iteration: 40597
loss: 1.0201257467269897,grad_norm: 0.8828145869634852, iteration: 40598
loss: 1.0028011798858643,grad_norm: 0.9999990010055161, iteration: 40599
loss: 1.0250509977340698,grad_norm: 0.8856779851240678, iteration: 40600
loss: 1.0724291801452637,grad_norm: 0.9999992157634098, iteration: 40601
loss: 1.03957200050354,grad_norm: 0.9999992195538634, iteration: 40602
loss: 1.0261493921279907,grad_norm: 0.8961236068057553, iteration: 40603
loss: 1.019135594367981,grad_norm: 0.9999991583278502, iteration: 40604
loss: 1.0334843397140503,grad_norm: 0.9999993422659516, iteration: 40605
loss: 1.0362056493759155,grad_norm: 0.9999991927739761, iteration: 40606
loss: 0.992685079574585,grad_norm: 0.9829620851303702, iteration: 40607
loss: 0.9751629829406738,grad_norm: 0.9999992244675571, iteration: 40608
loss: 1.0294746160507202,grad_norm: 0.8451660089988572, iteration: 40609
loss: 0.9777096509933472,grad_norm: 0.8878297828519518, iteration: 40610
loss: 1.0252046585083008,grad_norm: 0.944568467039093, iteration: 40611
loss: 1.0165762901306152,grad_norm: 0.9978291827408644, iteration: 40612
loss: 1.0105338096618652,grad_norm: 0.8800743072501539, iteration: 40613
loss: 0.9999112486839294,grad_norm: 0.8562559959649837, iteration: 40614
loss: 1.0169316530227661,grad_norm: 0.9999991359923257, iteration: 40615
loss: 1.0069488286972046,grad_norm: 0.8916068565460278, iteration: 40616
loss: 0.969269335269928,grad_norm: 0.9158452520937995, iteration: 40617
loss: 1.0081169605255127,grad_norm: 0.8626478909338504, iteration: 40618
loss: 0.9870989918708801,grad_norm: 0.7736949252587241, iteration: 40619
loss: 0.9922345280647278,grad_norm: 0.9999990181462585, iteration: 40620
loss: 1.0311254262924194,grad_norm: 0.9999992329875218, iteration: 40621
loss: 1.021880030632019,grad_norm: 0.905900771512165, iteration: 40622
loss: 1.0025508403778076,grad_norm: 0.9381123420054988, iteration: 40623
loss: 0.9783466458320618,grad_norm: 0.8524722537881834, iteration: 40624
loss: 1.0152472257614136,grad_norm: 0.9355473997215771, iteration: 40625
loss: 0.9830892086029053,grad_norm: 0.911627038220094, iteration: 40626
loss: 0.9852599501609802,grad_norm: 0.8973833408745346, iteration: 40627
loss: 1.0021538734436035,grad_norm: 0.8417632694432229, iteration: 40628
loss: 1.0442733764648438,grad_norm: 0.9195962126751097, iteration: 40629
loss: 1.0236420631408691,grad_norm: 0.996665005004388, iteration: 40630
loss: 1.0105414390563965,grad_norm: 0.8571592316095096, iteration: 40631
loss: 1.0308139324188232,grad_norm: 0.9999991621793966, iteration: 40632
loss: 1.0087641477584839,grad_norm: 0.9999990730899524, iteration: 40633
loss: 1.006503939628601,grad_norm: 0.9999991732791653, iteration: 40634
loss: 1.0179873704910278,grad_norm: 0.999999049947018, iteration: 40635
loss: 1.0487626791000366,grad_norm: 0.9999996957238155, iteration: 40636
loss: 1.0090482234954834,grad_norm: 0.8919955717088212, iteration: 40637
loss: 1.0330039262771606,grad_norm: 0.8048086891788586, iteration: 40638
loss: 1.0410621166229248,grad_norm: 0.9999991455798475, iteration: 40639
loss: 1.0590143203735352,grad_norm: 0.9999990404743605, iteration: 40640
loss: 1.040967583656311,grad_norm: 0.9999992672059952, iteration: 40641
loss: 1.0130904912948608,grad_norm: 0.9449741972812393, iteration: 40642
loss: 1.0426270961761475,grad_norm: 0.9999991096640716, iteration: 40643
loss: 0.9791432619094849,grad_norm: 0.9775429955279651, iteration: 40644
loss: 1.031532645225525,grad_norm: 0.9768322497920063, iteration: 40645
loss: 0.9805834889411926,grad_norm: 0.9999990638538256, iteration: 40646
loss: 1.0067434310913086,grad_norm: 0.9314856716194908, iteration: 40647
loss: 0.9831050038337708,grad_norm: 0.9999992281074945, iteration: 40648
loss: 1.0173636674880981,grad_norm: 0.9796361855860948, iteration: 40649
loss: 0.9927687048912048,grad_norm: 0.9999989577160038, iteration: 40650
loss: 0.9934422373771667,grad_norm: 0.9240856393010343, iteration: 40651
loss: 0.9981355667114258,grad_norm: 0.9330990239893249, iteration: 40652
loss: 1.010771632194519,grad_norm: 0.8798711394954392, iteration: 40653
loss: 0.9956135749816895,grad_norm: 0.8663497761251631, iteration: 40654
loss: 1.0568796396255493,grad_norm: 0.9999993470544363, iteration: 40655
loss: 1.0204055309295654,grad_norm: 0.999999163958677, iteration: 40656
loss: 1.005486011505127,grad_norm: 0.8605729777739747, iteration: 40657
loss: 1.004834771156311,grad_norm: 0.842057428183911, iteration: 40658
loss: 1.0072015523910522,grad_norm: 0.9679808357064458, iteration: 40659
loss: 1.0246347188949585,grad_norm: 0.9999989970321886, iteration: 40660
loss: 0.9920171499252319,grad_norm: 0.9662926235920192, iteration: 40661
loss: 0.989736020565033,grad_norm: 0.999999007250409, iteration: 40662
loss: 1.021316647529602,grad_norm: 0.9746826121561317, iteration: 40663
loss: 1.0166176557540894,grad_norm: 0.9999991582557798, iteration: 40664
loss: 1.012309193611145,grad_norm: 0.999999247663064, iteration: 40665
loss: 1.0136065483093262,grad_norm: 0.9168557987344294, iteration: 40666
loss: 0.9888206720352173,grad_norm: 0.9999990258951411, iteration: 40667
loss: 1.0129058361053467,grad_norm: 0.9999991066544066, iteration: 40668
loss: 0.9975854754447937,grad_norm: 0.999999585624047, iteration: 40669
loss: 1.013784408569336,grad_norm: 0.999999532428471, iteration: 40670
loss: 1.0112172365188599,grad_norm: 0.9470110440981595, iteration: 40671
loss: 0.9958163499832153,grad_norm: 0.9999989028078852, iteration: 40672
loss: 1.014655590057373,grad_norm: 0.9999992407513156, iteration: 40673
loss: 1.007269024848938,grad_norm: 0.9999991146726822, iteration: 40674
loss: 0.9968117475509644,grad_norm: 0.9063679962637301, iteration: 40675
loss: 1.0041148662567139,grad_norm: 0.9116199249412796, iteration: 40676
loss: 1.0306569337844849,grad_norm: 0.823421028477632, iteration: 40677
loss: 1.0156300067901611,grad_norm: 0.9999990882323755, iteration: 40678
loss: 0.987370491027832,grad_norm: 0.9446236043568271, iteration: 40679
loss: 1.0047831535339355,grad_norm: 0.9999992087060752, iteration: 40680
loss: 0.9842251539230347,grad_norm: 0.9999991672744791, iteration: 40681
loss: 1.0022245645523071,grad_norm: 0.9999990229238175, iteration: 40682
loss: 0.9979081749916077,grad_norm: 0.8133233782372484, iteration: 40683
loss: 1.0174649953842163,grad_norm: 0.999999059761293, iteration: 40684
loss: 1.013820767402649,grad_norm: 0.9999992101060474, iteration: 40685
loss: 0.9796658158302307,grad_norm: 0.9999992024448466, iteration: 40686
loss: 1.0120919942855835,grad_norm: 0.9232717657775906, iteration: 40687
loss: 0.9652784466743469,grad_norm: 0.9999990579114272, iteration: 40688
loss: 1.0221976041793823,grad_norm: 0.8705543167690282, iteration: 40689
loss: 1.050602912902832,grad_norm: 0.9856971019048114, iteration: 40690
loss: 1.0364681482315063,grad_norm: 0.999999069188875, iteration: 40691
loss: 1.029211163520813,grad_norm: 0.9999990276164882, iteration: 40692
loss: 0.9939567446708679,grad_norm: 0.9999990450710644, iteration: 40693
loss: 0.9798856377601624,grad_norm: 0.935578079480049, iteration: 40694
loss: 0.9939724802970886,grad_norm: 0.8793284166943361, iteration: 40695
loss: 0.9981778264045715,grad_norm: 0.9999989938555688, iteration: 40696
loss: 1.0184016227722168,grad_norm: 0.9999992547120279, iteration: 40697
loss: 0.9871904253959656,grad_norm: 0.9161095727053419, iteration: 40698
loss: 1.0067098140716553,grad_norm: 0.9999989761720534, iteration: 40699
loss: 0.9939761757850647,grad_norm: 0.9999991215646783, iteration: 40700
loss: 0.989133358001709,grad_norm: 0.960349605221557, iteration: 40701
loss: 0.981971025466919,grad_norm: 0.9095191516198413, iteration: 40702
loss: 1.002547264099121,grad_norm: 0.9999992081461402, iteration: 40703
loss: 1.0157482624053955,grad_norm: 0.8781864363846275, iteration: 40704
loss: 1.0009336471557617,grad_norm: 0.9999992441543967, iteration: 40705
loss: 1.008689522743225,grad_norm: 0.9999992204415966, iteration: 40706
loss: 1.0050106048583984,grad_norm: 0.9999992107704944, iteration: 40707
loss: 0.9704323410987854,grad_norm: 0.9999991783318956, iteration: 40708
loss: 0.9878411889076233,grad_norm: 0.9999990994298842, iteration: 40709
loss: 0.9828854203224182,grad_norm: 0.9306412527679744, iteration: 40710
loss: 0.9953045845031738,grad_norm: 0.9519491746507077, iteration: 40711
loss: 1.030534029006958,grad_norm: 0.9999992069118815, iteration: 40712
loss: 0.9685075283050537,grad_norm: 0.8093418451340797, iteration: 40713
loss: 0.9940131902694702,grad_norm: 0.8541564565279103, iteration: 40714
loss: 1.031040072441101,grad_norm: 0.9999992137593818, iteration: 40715
loss: 0.9918093085289001,grad_norm: 0.9348527153025817, iteration: 40716
loss: 1.020954966545105,grad_norm: 0.9192325550044043, iteration: 40717
loss: 1.002864122390747,grad_norm: 0.8880251792909057, iteration: 40718
loss: 1.0309120416641235,grad_norm: 0.9605570588072809, iteration: 40719
loss: 1.0181481838226318,grad_norm: 0.9999991975128094, iteration: 40720
loss: 1.0607032775878906,grad_norm: 0.999999027318578, iteration: 40721
loss: 0.980019748210907,grad_norm: 0.9999989601402575, iteration: 40722
loss: 0.9938387870788574,grad_norm: 0.8915781736332425, iteration: 40723
loss: 1.0247224569320679,grad_norm: 0.9999993943657001, iteration: 40724
loss: 0.9709816575050354,grad_norm: 0.9999992205464266, iteration: 40725
loss: 0.9762289524078369,grad_norm: 0.9999993604123332, iteration: 40726
loss: 0.9871236681938171,grad_norm: 0.9157807996072148, iteration: 40727
loss: 1.0293374061584473,grad_norm: 0.9999991026147268, iteration: 40728
loss: 1.0086971521377563,grad_norm: 0.9198209207725433, iteration: 40729
loss: 1.048384428024292,grad_norm: 0.9999991136199318, iteration: 40730
loss: 0.9967647790908813,grad_norm: 0.9999941209108683, iteration: 40731
loss: 0.9896535277366638,grad_norm: 0.9999992530212649, iteration: 40732
loss: 0.9831075072288513,grad_norm: 0.99999908358524, iteration: 40733
loss: 0.997502863407135,grad_norm: 0.9999991805325069, iteration: 40734
loss: 1.015421748161316,grad_norm: 0.9999992097902424, iteration: 40735
loss: 1.036741852760315,grad_norm: 0.9999992484685014, iteration: 40736
loss: 0.9665844440460205,grad_norm: 0.9162001277296412, iteration: 40737
loss: 1.0495368242263794,grad_norm: 0.9999992651464878, iteration: 40738
loss: 1.006980538368225,grad_norm: 0.9999989849280432, iteration: 40739
loss: 1.0148955583572388,grad_norm: 0.9112770909127845, iteration: 40740
loss: 1.018561601638794,grad_norm: 0.9999990932609001, iteration: 40741
loss: 1.0108180046081543,grad_norm: 0.9999991289676395, iteration: 40742
loss: 0.9801273345947266,grad_norm: 0.9211214535070316, iteration: 40743
loss: 1.0256211757659912,grad_norm: 0.9999992364359683, iteration: 40744
loss: 1.0261647701263428,grad_norm: 0.8681321736518809, iteration: 40745
loss: 1.0308059453964233,grad_norm: 0.9999990463485589, iteration: 40746
loss: 1.036290168762207,grad_norm: 0.9999992177522817, iteration: 40747
loss: 0.999655544757843,grad_norm: 0.9999992323833007, iteration: 40748
loss: 0.9859252572059631,grad_norm: 0.9999990629692046, iteration: 40749
loss: 1.0525012016296387,grad_norm: 0.9999998768410615, iteration: 40750
loss: 0.993771493434906,grad_norm: 0.9477411381571206, iteration: 40751
loss: 1.0090886354446411,grad_norm: 0.9999992161331549, iteration: 40752
loss: 1.0135507583618164,grad_norm: 0.9999993693697175, iteration: 40753
loss: 1.043107509613037,grad_norm: 0.9999991795134493, iteration: 40754
loss: 1.0304076671600342,grad_norm: 0.8990921403944551, iteration: 40755
loss: 1.0410265922546387,grad_norm: 0.9999995527660913, iteration: 40756
loss: 1.035581350326538,grad_norm: 0.9081462537517804, iteration: 40757
loss: 1.0040615797042847,grad_norm: 0.9999996955940788, iteration: 40758
loss: 1.0269105434417725,grad_norm: 0.9999990934133168, iteration: 40759
loss: 1.0192666053771973,grad_norm: 0.9999991335157183, iteration: 40760
loss: 1.016292691230774,grad_norm: 0.9007477657015694, iteration: 40761
loss: 1.012083888053894,grad_norm: 0.9999992562884197, iteration: 40762
loss: 1.006018877029419,grad_norm: 0.7908734946890211, iteration: 40763
loss: 1.0124915838241577,grad_norm: 0.9788808830336829, iteration: 40764
loss: 1.0197964906692505,grad_norm: 0.9952348110112896, iteration: 40765
loss: 1.0164799690246582,grad_norm: 0.9232803161025391, iteration: 40766
loss: 1.0464820861816406,grad_norm: 0.9999991427414213, iteration: 40767
loss: 1.0122497081756592,grad_norm: 0.9999996779689208, iteration: 40768
loss: 0.9716317057609558,grad_norm: 0.9789373404345934, iteration: 40769
loss: 1.028605341911316,grad_norm: 0.8989686679745472, iteration: 40770
loss: 1.0056952238082886,grad_norm: 0.947925018195924, iteration: 40771
loss: 0.9964038729667664,grad_norm: 0.9999992027364579, iteration: 40772
loss: 1.0662901401519775,grad_norm: 0.9999996630111826, iteration: 40773
loss: 1.0398483276367188,grad_norm: 0.9999990995127627, iteration: 40774
loss: 0.9955686926841736,grad_norm: 0.9872162768631018, iteration: 40775
loss: 1.037576675415039,grad_norm: 0.9999995187112568, iteration: 40776
loss: 1.0209943056106567,grad_norm: 0.9880169634179415, iteration: 40777
loss: 0.9784886837005615,grad_norm: 0.9999990810764638, iteration: 40778
loss: 1.0155525207519531,grad_norm: 0.9999993215364456, iteration: 40779
loss: 1.0214439630508423,grad_norm: 0.9999993942370998, iteration: 40780
loss: 1.023329257965088,grad_norm: 0.889986545623287, iteration: 40781
loss: 1.017335057258606,grad_norm: 0.9999991095748932, iteration: 40782
loss: 0.9860548973083496,grad_norm: 0.9999992598106201, iteration: 40783
loss: 0.9955805540084839,grad_norm: 0.9999991777028887, iteration: 40784
loss: 1.0158506631851196,grad_norm: 0.980686538638797, iteration: 40785
loss: 0.9986863732337952,grad_norm: 0.9759580795334194, iteration: 40786
loss: 1.0106780529022217,grad_norm: 0.9999996635855976, iteration: 40787
loss: 1.0141334533691406,grad_norm: 0.8799152099615007, iteration: 40788
loss: 1.0016614198684692,grad_norm: 0.9999992818747041, iteration: 40789
loss: 1.018761157989502,grad_norm: 0.9428569024837683, iteration: 40790
loss: 1.004009485244751,grad_norm: 0.9524639560586375, iteration: 40791
loss: 1.0175977945327759,grad_norm: 0.9999992828615435, iteration: 40792
loss: 1.0083879232406616,grad_norm: 0.9220278886093428, iteration: 40793
loss: 0.9808661341667175,grad_norm: 0.999999141760773, iteration: 40794
loss: 1.0169047117233276,grad_norm: 0.9999989378302279, iteration: 40795
loss: 0.9930493831634521,grad_norm: 0.9221514824436765, iteration: 40796
loss: 1.0103638172149658,grad_norm: 0.987006719793508, iteration: 40797
loss: 0.959475040435791,grad_norm: 0.9988184067501952, iteration: 40798
loss: 1.021573781967163,grad_norm: 0.9651394549147618, iteration: 40799
loss: 1.0137745141983032,grad_norm: 0.9999990509568871, iteration: 40800
loss: 1.0300095081329346,grad_norm: 0.9999993614852952, iteration: 40801
loss: 1.003840684890747,grad_norm: 0.9615770308320728, iteration: 40802
loss: 1.0291831493377686,grad_norm: 0.9999993092057943, iteration: 40803
loss: 0.9813425540924072,grad_norm: 0.9438568233878939, iteration: 40804
loss: 0.9966400861740112,grad_norm: 0.89739341189739, iteration: 40805
loss: 0.9917146563529968,grad_norm: 0.9946900822576936, iteration: 40806
loss: 0.9916373491287231,grad_norm: 0.999999044275263, iteration: 40807
loss: 0.9812473654747009,grad_norm: 0.9455954596340723, iteration: 40808
loss: 1.0264006853103638,grad_norm: 0.9999990515284054, iteration: 40809
loss: 1.0008867979049683,grad_norm: 0.9999991625987977, iteration: 40810
loss: 0.9838423132896423,grad_norm: 0.9265226455467903, iteration: 40811
loss: 1.044420838356018,grad_norm: 0.9529687627628899, iteration: 40812
loss: 1.0226401090621948,grad_norm: 0.9999992596415463, iteration: 40813
loss: 1.0230001211166382,grad_norm: 0.9999992384858039, iteration: 40814
loss: 1.0158535242080688,grad_norm: 0.9999991753257862, iteration: 40815
loss: 1.0005650520324707,grad_norm: 0.9999992254284619, iteration: 40816
loss: 1.0336401462554932,grad_norm: 0.9999989572031104, iteration: 40817
loss: 0.9978514909744263,grad_norm: 0.9084426489120145, iteration: 40818
loss: 1.0247114896774292,grad_norm: 0.9295146677304416, iteration: 40819
loss: 1.0359457731246948,grad_norm: 0.999999778810809, iteration: 40820
loss: 0.9817034006118774,grad_norm: 0.9128975085828382, iteration: 40821
loss: 1.0349135398864746,grad_norm: 0.9999992019384778, iteration: 40822
loss: 0.9693207144737244,grad_norm: 0.9785574640459924, iteration: 40823
loss: 0.9958654046058655,grad_norm: 0.9999989553649401, iteration: 40824
loss: 1.0107582807540894,grad_norm: 0.9873947893276587, iteration: 40825
loss: 0.9952341914176941,grad_norm: 0.902203454570422, iteration: 40826
loss: 0.9970667362213135,grad_norm: 0.9921591079819428, iteration: 40827
loss: 0.9660536050796509,grad_norm: 0.9999991198266788, iteration: 40828
loss: 0.9960571527481079,grad_norm: 0.8818245850453982, iteration: 40829
loss: 1.0391746759414673,grad_norm: 0.9537554423309769, iteration: 40830
loss: 1.0243496894836426,grad_norm: 0.9819775849820176, iteration: 40831
loss: 1.0175153017044067,grad_norm: 0.9532411452152766, iteration: 40832
loss: 0.9631729125976562,grad_norm: 0.9280250811382282, iteration: 40833
loss: 1.003225326538086,grad_norm: 0.9999991568652852, iteration: 40834
loss: 0.9809687733650208,grad_norm: 0.9270129156538073, iteration: 40835
loss: 1.0194988250732422,grad_norm: 0.9089480488438987, iteration: 40836
loss: 0.9887240529060364,grad_norm: 0.9999990665956843, iteration: 40837
loss: 1.0194756984710693,grad_norm: 0.9999991637324178, iteration: 40838
loss: 1.0115976333618164,grad_norm: 0.986503221421179, iteration: 40839
loss: 0.9983965158462524,grad_norm: 0.891380205800559, iteration: 40840
loss: 1.0274968147277832,grad_norm: 0.999999074010761, iteration: 40841
loss: 0.998453676700592,grad_norm: 0.9079763114717743, iteration: 40842
loss: 0.9886094331741333,grad_norm: 0.9337615101883558, iteration: 40843
loss: 1.028159737586975,grad_norm: 0.9557254892626774, iteration: 40844
loss: 0.9903829097747803,grad_norm: 0.9999991845644398, iteration: 40845
loss: 0.9680585861206055,grad_norm: 0.9999990396392244, iteration: 40846
loss: 0.9825621843338013,grad_norm: 0.9999991120416474, iteration: 40847
loss: 1.0292181968688965,grad_norm: 0.999999121102431, iteration: 40848
loss: 0.9720808863639832,grad_norm: 0.8824590942027559, iteration: 40849
loss: 0.9782634377479553,grad_norm: 0.9999990628461023, iteration: 40850
loss: 1.0163674354553223,grad_norm: 0.9999990784689058, iteration: 40851
loss: 1.023114562034607,grad_norm: 0.9999991226047353, iteration: 40852
loss: 0.9564031362533569,grad_norm: 0.9091202211533282, iteration: 40853
loss: 1.0434629917144775,grad_norm: 0.9999992706279123, iteration: 40854
loss: 0.9722269177436829,grad_norm: 0.9999991965129597, iteration: 40855
loss: 0.9920074343681335,grad_norm: 0.9999991799049601, iteration: 40856
loss: 1.0448096990585327,grad_norm: 0.8233290930256681, iteration: 40857
loss: 0.9956931471824646,grad_norm: 0.9999989527837376, iteration: 40858
loss: 0.9559927582740784,grad_norm: 0.9999991063650612, iteration: 40859
loss: 1.0235424041748047,grad_norm: 0.9383044120268704, iteration: 40860
loss: 0.9633460640907288,grad_norm: 0.9896877146086094, iteration: 40861
loss: 1.0134189128875732,grad_norm: 0.9073116198414335, iteration: 40862
loss: 1.0138392448425293,grad_norm: 0.9999990486373093, iteration: 40863
loss: 1.0225831270217896,grad_norm: 0.9999990305108706, iteration: 40864
loss: 0.9595203995704651,grad_norm: 0.9999991137274028, iteration: 40865
loss: 1.0475181341171265,grad_norm: 0.8732020589717757, iteration: 40866
loss: 0.9883507490158081,grad_norm: 0.8983878845306846, iteration: 40867
loss: 0.9800747632980347,grad_norm: 0.9999990828487696, iteration: 40868
loss: 0.9761494994163513,grad_norm: 0.9138163725650194, iteration: 40869
loss: 1.0660563707351685,grad_norm: 0.9411545066096239, iteration: 40870
loss: 0.9948046207427979,grad_norm: 0.9963587122620006, iteration: 40871
loss: 1.01931631565094,grad_norm: 0.909668719453553, iteration: 40872
loss: 1.007408857345581,grad_norm: 0.8251267277054265, iteration: 40873
loss: 1.0387012958526611,grad_norm: 0.8955664824022276, iteration: 40874
loss: 1.014482855796814,grad_norm: 0.999998996808318, iteration: 40875
loss: 0.9907822608947754,grad_norm: 0.9524758211423223, iteration: 40876
loss: 1.0231070518493652,grad_norm: 0.9999990021316318, iteration: 40877
loss: 1.0205285549163818,grad_norm: 0.9999994424549163, iteration: 40878
loss: 0.983197033405304,grad_norm: 0.8851682410530085, iteration: 40879
loss: 1.0386738777160645,grad_norm: 0.9999990623695326, iteration: 40880
loss: 1.0146031379699707,grad_norm: 0.9999992074172686, iteration: 40881
loss: 0.9742349982261658,grad_norm: 0.942928120955695, iteration: 40882
loss: 1.0501796007156372,grad_norm: 0.9999990951586444, iteration: 40883
loss: 1.0287485122680664,grad_norm: 0.9870713789463611, iteration: 40884
loss: 0.9724547863006592,grad_norm: 0.8644414439922797, iteration: 40885
loss: 1.0009971857070923,grad_norm: 0.9801293997376174, iteration: 40886
loss: 0.9901894927024841,grad_norm: 0.9999994969826956, iteration: 40887
loss: 1.022647500038147,grad_norm: 0.8959406582314988, iteration: 40888
loss: 0.9962636828422546,grad_norm: 0.9999990954164716, iteration: 40889
loss: 1.0305119752883911,grad_norm: 0.999998994486267, iteration: 40890
loss: 0.9835723042488098,grad_norm: 0.9282599009316205, iteration: 40891
loss: 0.962600827217102,grad_norm: 0.9999992149182615, iteration: 40892
loss: 0.9551759362220764,grad_norm: 0.9999991386345769, iteration: 40893
loss: 0.9835525751113892,grad_norm: 0.938814980785036, iteration: 40894
loss: 0.9837279319763184,grad_norm: 0.8111198438314995, iteration: 40895
loss: 0.9977734088897705,grad_norm: 0.9130381194330515, iteration: 40896
loss: 0.9705539345741272,grad_norm: 0.9947126781596225, iteration: 40897
loss: 1.0159964561462402,grad_norm: 0.9145757981477226, iteration: 40898
loss: 0.9937702417373657,grad_norm: 0.8893400513704235, iteration: 40899
loss: 0.9907652735710144,grad_norm: 0.833421608299157, iteration: 40900
loss: 0.9706878066062927,grad_norm: 0.9999991211102398, iteration: 40901
loss: 0.9882374405860901,grad_norm: 0.9436202832438192, iteration: 40902
loss: 1.0126986503601074,grad_norm: 0.9999991098597998, iteration: 40903
loss: 0.9763012528419495,grad_norm: 0.8731233687512185, iteration: 40904
loss: 1.014750361442566,grad_norm: 0.8255499946310384, iteration: 40905
loss: 0.9918085932731628,grad_norm: 0.9999990043911536, iteration: 40906
loss: 1.0089153051376343,grad_norm: 0.946112615374728, iteration: 40907
loss: 1.0088727474212646,grad_norm: 0.9999991774873946, iteration: 40908
loss: 0.9777978658676147,grad_norm: 0.9890390799187058, iteration: 40909
loss: 1.051733374595642,grad_norm: 0.9999990566979907, iteration: 40910
loss: 0.9976102113723755,grad_norm: 0.999999133214477, iteration: 40911
loss: 1.0016937255859375,grad_norm: 0.8716653282834914, iteration: 40912
loss: 0.9648966789245605,grad_norm: 0.905394876141633, iteration: 40913
loss: 0.978049635887146,grad_norm: 0.9999992678096632, iteration: 40914
loss: 1.0137763023376465,grad_norm: 0.9999991500888936, iteration: 40915
loss: 1.0291416645050049,grad_norm: 0.9147466881885833, iteration: 40916
loss: 0.9971076846122742,grad_norm: 0.9292900841129456, iteration: 40917
loss: 0.9803921580314636,grad_norm: 0.9999991782980072, iteration: 40918
loss: 0.9671236872673035,grad_norm: 0.9367804878032366, iteration: 40919
loss: 0.9673202037811279,grad_norm: 0.9999990566334169, iteration: 40920
loss: 1.0187991857528687,grad_norm: 0.9999989629892994, iteration: 40921
loss: 1.0195152759552002,grad_norm: 0.9999993203304729, iteration: 40922
loss: 1.035939335823059,grad_norm: 0.9999993236481802, iteration: 40923
loss: 0.9994086623191833,grad_norm: 0.9999999738706707, iteration: 40924
loss: 0.9913222789764404,grad_norm: 0.9805068755672696, iteration: 40925
loss: 1.013270616531372,grad_norm: 0.8581720803261635, iteration: 40926
loss: 1.002436876296997,grad_norm: 0.9579890584500773, iteration: 40927
loss: 1.0146701335906982,grad_norm: 0.9999991998519132, iteration: 40928
loss: 1.0048466920852661,grad_norm: 0.999998975009301, iteration: 40929
loss: 0.9774481654167175,grad_norm: 0.945834161845586, iteration: 40930
loss: 0.9709277153015137,grad_norm: 0.8562380396052535, iteration: 40931
loss: 0.9737675786018372,grad_norm: 0.8882596392766098, iteration: 40932
loss: 1.0121842622756958,grad_norm: 0.9999864033195855, iteration: 40933
loss: 0.9763146638870239,grad_norm: 0.9653310484767528, iteration: 40934
loss: 1.0232220888137817,grad_norm: 0.9999991151236204, iteration: 40935
loss: 0.951729953289032,grad_norm: 0.9999994045260837, iteration: 40936
loss: 1.0356874465942383,grad_norm: 0.9934529474503329, iteration: 40937
loss: 1.0667318105697632,grad_norm: 0.9999994210398305, iteration: 40938
loss: 0.9704853892326355,grad_norm: 0.9170237759246314, iteration: 40939
loss: 0.9980973601341248,grad_norm: 0.978128885121312, iteration: 40940
loss: 1.032584309577942,grad_norm: 0.9999989396829513, iteration: 40941
loss: 1.0328271389007568,grad_norm: 0.8677560772582339, iteration: 40942
loss: 0.9919657707214355,grad_norm: 0.8067900568737973, iteration: 40943
loss: 1.1142754554748535,grad_norm: 0.9999997605697961, iteration: 40944
loss: 0.9804709553718567,grad_norm: 0.9852923453192773, iteration: 40945
loss: 0.9973608255386353,grad_norm: 0.9999990244510961, iteration: 40946
loss: 1.019195556640625,grad_norm: 0.9805204076799058, iteration: 40947
loss: 0.9986306428909302,grad_norm: 0.8945159398650028, iteration: 40948
loss: 1.005646824836731,grad_norm: 0.9530612474700667, iteration: 40949
loss: 1.0096250772476196,grad_norm: 0.9999991322234044, iteration: 40950
loss: 1.004459023475647,grad_norm: 0.9999990886784073, iteration: 40951
loss: 0.9867479205131531,grad_norm: 0.7714131835377599, iteration: 40952
loss: 1.0037823915481567,grad_norm: 0.9401468939137922, iteration: 40953
loss: 1.018335223197937,grad_norm: 0.9999991349323449, iteration: 40954
loss: 1.0105692148208618,grad_norm: 0.8593105270495747, iteration: 40955
loss: 1.0076524019241333,grad_norm: 0.8406230039049691, iteration: 40956
loss: 1.0488563776016235,grad_norm: 0.9311340310890084, iteration: 40957
loss: 1.004148244857788,grad_norm: 0.9864812481566403, iteration: 40958
loss: 0.9964604377746582,grad_norm: 0.7877160956382883, iteration: 40959
loss: 1.0421866178512573,grad_norm: 0.9999990574739175, iteration: 40960
loss: 1.024341344833374,grad_norm: 0.9999991534490891, iteration: 40961
loss: 0.9915711879730225,grad_norm: 0.9996516949100382, iteration: 40962
loss: 1.00387442111969,grad_norm: 0.9961083690480033, iteration: 40963
loss: 1.0194759368896484,grad_norm: 0.905743532613494, iteration: 40964
loss: 1.0059314966201782,grad_norm: 0.999999353327121, iteration: 40965
loss: 0.9850343465805054,grad_norm: 0.8496025016678999, iteration: 40966
loss: 1.0253734588623047,grad_norm: 0.9999991438734169, iteration: 40967
loss: 1.0139288902282715,grad_norm: 0.9639453870740654, iteration: 40968
loss: 0.992175817489624,grad_norm: 0.9781698472877431, iteration: 40969
loss: 0.9580572247505188,grad_norm: 0.8149512593908604, iteration: 40970
loss: 1.0182749032974243,grad_norm: 0.9616889031372244, iteration: 40971
loss: 1.0216459035873413,grad_norm: 0.9999994161054362, iteration: 40972
loss: 1.0006171464920044,grad_norm: 0.9999989673145808, iteration: 40973
loss: 1.025062918663025,grad_norm: 0.9133670476040776, iteration: 40974
loss: 0.986028254032135,grad_norm: 0.9814651924271682, iteration: 40975
loss: 1.0026979446411133,grad_norm: 0.9362451324216517, iteration: 40976
loss: 1.0049512386322021,grad_norm: 0.8621019949742501, iteration: 40977
loss: 1.0111515522003174,grad_norm: 0.9999990087596866, iteration: 40978
loss: 1.0419939756393433,grad_norm: 0.9534768948428423, iteration: 40979
loss: 0.9797130227088928,grad_norm: 0.9999990826496791, iteration: 40980
loss: 1.0034358501434326,grad_norm: 0.9252940178244735, iteration: 40981
loss: 1.007074236869812,grad_norm: 0.993681127839422, iteration: 40982
loss: 1.0426298379898071,grad_norm: 0.9999998441657035, iteration: 40983
loss: 1.04934823513031,grad_norm: 0.8473785271402242, iteration: 40984
loss: 0.9876136183738708,grad_norm: 0.9999990677765516, iteration: 40985
loss: 0.9709441661834717,grad_norm: 0.9999996036407767, iteration: 40986
loss: 1.0042484998703003,grad_norm: 0.8582756915030524, iteration: 40987
loss: 1.0172089338302612,grad_norm: 0.9999992045748073, iteration: 40988
loss: 1.0041754245758057,grad_norm: 0.9858751246144466, iteration: 40989
loss: 1.0387543439865112,grad_norm: 0.9733456589234291, iteration: 40990
loss: 1.0437332391738892,grad_norm: 0.999999401869376, iteration: 40991
loss: 0.9898881912231445,grad_norm: 0.9360330746354435, iteration: 40992
loss: 0.9986883401870728,grad_norm: 0.8718062058029602, iteration: 40993
loss: 1.0237237215042114,grad_norm: 0.9999992091924006, iteration: 40994
loss: 0.988271176815033,grad_norm: 0.8184187612579014, iteration: 40995
loss: 0.997724175453186,grad_norm: 0.9665577086854282, iteration: 40996
loss: 1.0104029178619385,grad_norm: 0.9095539214343799, iteration: 40997
loss: 0.9589834213256836,grad_norm: 0.851532163664657, iteration: 40998
loss: 0.996074378490448,grad_norm: 0.9999990529836355, iteration: 40999
loss: 1.0119938850402832,grad_norm: 0.8506639282769647, iteration: 41000
loss: 1.01007878780365,grad_norm: 0.8590567602937188, iteration: 41001
loss: 1.002128005027771,grad_norm: 0.8710830153199166, iteration: 41002
loss: 1.0514529943466187,grad_norm: 0.9355523047996993, iteration: 41003
loss: 1.0052192211151123,grad_norm: 0.9612813536983058, iteration: 41004
loss: 0.9965238571166992,grad_norm: 0.9999991002157113, iteration: 41005
loss: 1.0060936212539673,grad_norm: 0.9464088406041659, iteration: 41006
loss: 0.9957751035690308,grad_norm: 0.9957375141097177, iteration: 41007
loss: 1.0079742670059204,grad_norm: 0.9999990999840481, iteration: 41008
loss: 1.029096007347107,grad_norm: 0.9999990080041804, iteration: 41009
loss: 0.9708932042121887,grad_norm: 0.999999013217512, iteration: 41010
loss: 0.97733074426651,grad_norm: 0.9732139011563157, iteration: 41011
loss: 1.0260497331619263,grad_norm: 0.9920774104151886, iteration: 41012
loss: 1.0147554874420166,grad_norm: 0.9235522152267902, iteration: 41013
loss: 1.06352698802948,grad_norm: 0.9999995579462658, iteration: 41014
loss: 1.0122385025024414,grad_norm: 0.9512489548462213, iteration: 41015
loss: 1.0444296598434448,grad_norm: 0.9999990665574721, iteration: 41016
loss: 0.9870627522468567,grad_norm: 0.9999990667584986, iteration: 41017
loss: 0.9802998304367065,grad_norm: 0.9946235273154919, iteration: 41018
loss: 0.9563208818435669,grad_norm: 0.9999991691071163, iteration: 41019
loss: 1.02467679977417,grad_norm: 0.9999990412309872, iteration: 41020
loss: 0.9999969005584717,grad_norm: 0.980463470448393, iteration: 41021
loss: 1.029192566871643,grad_norm: 0.9999991807909011, iteration: 41022
loss: 1.0008172988891602,grad_norm: 0.8561283153485777, iteration: 41023
loss: 1.0580365657806396,grad_norm: 0.9999991082170185, iteration: 41024
loss: 0.9773780107498169,grad_norm: 0.9999991988984788, iteration: 41025
loss: 1.0281649827957153,grad_norm: 0.9999993432078884, iteration: 41026
loss: 1.0473324060440063,grad_norm: 0.9999992403180921, iteration: 41027
loss: 1.0066124200820923,grad_norm: 0.941588530058984, iteration: 41028
loss: 1.006967306137085,grad_norm: 0.9999995715736896, iteration: 41029
loss: 0.9560286402702332,grad_norm: 0.9999992553315139, iteration: 41030
loss: 0.9859065413475037,grad_norm: 0.8436357597129786, iteration: 41031
loss: 1.1367621421813965,grad_norm: 0.9999994506372338, iteration: 41032
loss: 0.9892696142196655,grad_norm: 0.9999992235650259, iteration: 41033
loss: 1.0046664476394653,grad_norm: 0.9999992983405894, iteration: 41034
loss: 0.9926435947418213,grad_norm: 0.8990533344177355, iteration: 41035
loss: 0.997209370136261,grad_norm: 0.9999991653705549, iteration: 41036
loss: 0.9748315215110779,grad_norm: 0.8613261783458684, iteration: 41037
loss: 1.0254828929901123,grad_norm: 0.9999994062350913, iteration: 41038
loss: 0.965531051158905,grad_norm: 0.9999991991400379, iteration: 41039
loss: 0.9964686036109924,grad_norm: 0.8849651453114792, iteration: 41040
loss: 0.976319432258606,grad_norm: 0.999999277982247, iteration: 41041
loss: 0.9791060090065002,grad_norm: 0.9999990513983184, iteration: 41042
loss: 0.9924796223640442,grad_norm: 0.9558921245870491, iteration: 41043
loss: 0.9675882458686829,grad_norm: 0.872253899800521, iteration: 41044
loss: 0.9809207916259766,grad_norm: 0.9999992097583417, iteration: 41045
loss: 1.0114357471466064,grad_norm: 0.9763588312815077, iteration: 41046
loss: 1.0072462558746338,grad_norm: 0.8994800730347503, iteration: 41047
loss: 0.9896166324615479,grad_norm: 0.9924279782628472, iteration: 41048
loss: 0.9710648655891418,grad_norm: 0.999999131597216, iteration: 41049
loss: 1.0435850620269775,grad_norm: 0.9858958055048098, iteration: 41050
loss: 1.0133622884750366,grad_norm: 0.9999997312640957, iteration: 41051
loss: 1.001528263092041,grad_norm: 0.9937728393279291, iteration: 41052
loss: 0.9888864159584045,grad_norm: 0.9999995183893619, iteration: 41053
loss: 1.063191294670105,grad_norm: 0.9999998426549277, iteration: 41054
loss: 1.0286396741867065,grad_norm: 0.9999992281790484, iteration: 41055
loss: 1.0184005498886108,grad_norm: 0.9757877165444866, iteration: 41056
loss: 1.0364152193069458,grad_norm: 0.9999991484422665, iteration: 41057
loss: 0.9668962359428406,grad_norm: 0.9999993079191799, iteration: 41058
loss: 1.01530921459198,grad_norm: 0.9549060079032936, iteration: 41059
loss: 0.9686554670333862,grad_norm: 0.9999994280702769, iteration: 41060
loss: 0.9778926968574524,grad_norm: 0.9999990382848092, iteration: 41061
loss: 0.9838220477104187,grad_norm: 0.9913009300164617, iteration: 41062
loss: 1.0144541263580322,grad_norm: 0.8247844186922587, iteration: 41063
loss: 0.9885315895080566,grad_norm: 0.9999990483919557, iteration: 41064
loss: 0.9614512324333191,grad_norm: 0.999999092847911, iteration: 41065
loss: 1.0370228290557861,grad_norm: 0.9597688688208527, iteration: 41066
loss: 1.0419371128082275,grad_norm: 0.9999991990977042, iteration: 41067
loss: 0.992310643196106,grad_norm: 0.9999989731867597, iteration: 41068
loss: 1.0050607919692993,grad_norm: 0.9999990073637374, iteration: 41069
loss: 0.9764178991317749,grad_norm: 0.9999991002236545, iteration: 41070
loss: 1.017504096031189,grad_norm: 0.9999990977509924, iteration: 41071
loss: 1.0210235118865967,grad_norm: 0.9999995062495174, iteration: 41072
loss: 1.0112594366073608,grad_norm: 0.9999996485576091, iteration: 41073
loss: 1.0191835165023804,grad_norm: 0.8243952488877571, iteration: 41074
loss: 1.0336124897003174,grad_norm: 0.9999991270545985, iteration: 41075
loss: 0.9591832160949707,grad_norm: 0.877253251911671, iteration: 41076
loss: 1.042042851448059,grad_norm: 0.999999048250172, iteration: 41077
loss: 1.0133098363876343,grad_norm: 0.9999993648614006, iteration: 41078
loss: 1.036081314086914,grad_norm: 0.9791451166629275, iteration: 41079
loss: 0.9992541670799255,grad_norm: 0.999999176794406, iteration: 41080
loss: 0.9900344014167786,grad_norm: 0.9653989586840253, iteration: 41081
loss: 1.0797544717788696,grad_norm: 0.9999992155073185, iteration: 41082
loss: 1.029445767402649,grad_norm: 0.99999924632335, iteration: 41083
loss: 1.0325804948806763,grad_norm: 0.9999998062517907, iteration: 41084
loss: 1.0329927206039429,grad_norm: 0.8356563038193091, iteration: 41085
loss: 0.9976407289505005,grad_norm: 0.9999990754278729, iteration: 41086
loss: 0.9896728992462158,grad_norm: 0.9931007612324649, iteration: 41087
loss: 1.0037474632263184,grad_norm: 0.999998992801025, iteration: 41088
loss: 1.0656803846359253,grad_norm: 0.9999995214366292, iteration: 41089
loss: 0.967846691608429,grad_norm: 0.8588631981241122, iteration: 41090
loss: 1.0477256774902344,grad_norm: 0.924311602702078, iteration: 41091
loss: 1.0122348070144653,grad_norm: 0.9270835774367129, iteration: 41092
loss: 0.9931502938270569,grad_norm: 0.9999990068068968, iteration: 41093
loss: 1.0348502397537231,grad_norm: 0.9528037127831154, iteration: 41094
loss: 1.0314377546310425,grad_norm: 0.9999993611889668, iteration: 41095
loss: 1.0054447650909424,grad_norm: 0.9999991974542352, iteration: 41096
loss: 1.0037908554077148,grad_norm: 0.9830844462014752, iteration: 41097
loss: 1.0096479654312134,grad_norm: 0.9659393501782366, iteration: 41098
loss: 0.9720775485038757,grad_norm: 0.9999990191081854, iteration: 41099
loss: 1.0307044982910156,grad_norm: 0.9710242477992554, iteration: 41100
loss: 1.1593245267868042,grad_norm: 0.9999996436844556, iteration: 41101
loss: 1.0323234796524048,grad_norm: 0.9999998356063577, iteration: 41102
loss: 1.005142092704773,grad_norm: 0.979489952995628, iteration: 41103
loss: 0.9888744950294495,grad_norm: 0.9999993709370931, iteration: 41104
loss: 1.0259783267974854,grad_norm: 0.9999992945497163, iteration: 41105
loss: 0.988956093788147,grad_norm: 0.9999992274834697, iteration: 41106
loss: 1.012610912322998,grad_norm: 0.972742284315376, iteration: 41107
loss: 0.9820772409439087,grad_norm: 0.7962749442901859, iteration: 41108
loss: 1.0089592933654785,grad_norm: 0.9999996071760334, iteration: 41109
loss: 1.0274131298065186,grad_norm: 0.9999991959262767, iteration: 41110
loss: 1.0329909324645996,grad_norm: 0.8267021827535023, iteration: 41111
loss: 1.009097695350647,grad_norm: 0.8530962782588603, iteration: 41112
loss: 0.9744871258735657,grad_norm: 0.7998519724694001, iteration: 41113
loss: 1.0135048627853394,grad_norm: 0.987933435419254, iteration: 41114
loss: 0.9756584167480469,grad_norm: 0.8940133323645751, iteration: 41115
loss: 0.9892889261245728,grad_norm: 0.9805708268760368, iteration: 41116
loss: 1.0308732986450195,grad_norm: 0.993451214602777, iteration: 41117
loss: 1.0053882598876953,grad_norm: 0.9999991006948766, iteration: 41118
loss: 1.0385297536849976,grad_norm: 0.999999452376124, iteration: 41119
loss: 1.0146303176879883,grad_norm: 0.8760733309174484, iteration: 41120
loss: 0.9964171648025513,grad_norm: 0.9999990593964774, iteration: 41121
loss: 1.0378203392028809,grad_norm: 0.999999393598016, iteration: 41122
loss: 1.0044851303100586,grad_norm: 0.975424296995806, iteration: 41123
loss: 1.0188015699386597,grad_norm: 0.7545171100804218, iteration: 41124
loss: 0.9927285313606262,grad_norm: 0.9736715337491724, iteration: 41125
loss: 1.0108088254928589,grad_norm: 0.999999240454793, iteration: 41126
loss: 1.026768684387207,grad_norm: 0.9999991861293667, iteration: 41127
loss: 1.0489530563354492,grad_norm: 0.9999991019379438, iteration: 41128
loss: 1.0188006162643433,grad_norm: 0.9999990822645941, iteration: 41129
loss: 0.9929160475730896,grad_norm: 0.9654392681191222, iteration: 41130
loss: 0.9793885946273804,grad_norm: 0.8276737611981199, iteration: 41131
loss: 1.0491770505905151,grad_norm: 0.9999993190894579, iteration: 41132
loss: 1.0212823152542114,grad_norm: 0.9999990178341228, iteration: 41133
loss: 1.0630502700805664,grad_norm: 0.9999994643078476, iteration: 41134
loss: 0.9989417195320129,grad_norm: 0.870604887939949, iteration: 41135
loss: 1.0028201341629028,grad_norm: 0.8896096088094119, iteration: 41136
loss: 0.963670015335083,grad_norm: 0.9307153882303053, iteration: 41137
loss: 0.9843519330024719,grad_norm: 0.9999993905357503, iteration: 41138
loss: 0.963813066482544,grad_norm: 0.9999995124306712, iteration: 41139
loss: 1.0092722177505493,grad_norm: 0.964206798273944, iteration: 41140
loss: 0.9936087131500244,grad_norm: 0.9353088625311056, iteration: 41141
loss: 1.0030381679534912,grad_norm: 0.889785186633188, iteration: 41142
loss: 1.0337671041488647,grad_norm: 0.9999995468328107, iteration: 41143
loss: 0.9973900318145752,grad_norm: 0.9999992722721384, iteration: 41144
loss: 0.9968706965446472,grad_norm: 0.9999990958689385, iteration: 41145
loss: 1.0444064140319824,grad_norm: 0.9999993905162173, iteration: 41146
loss: 0.9965741038322449,grad_norm: 0.9999988220586009, iteration: 41147
loss: 1.0373375415802002,grad_norm: 0.9999993766934511, iteration: 41148
loss: 1.0230469703674316,grad_norm: 0.9763170114645091, iteration: 41149
loss: 1.0345888137817383,grad_norm: 0.8952598128637919, iteration: 41150
loss: 1.0102964639663696,grad_norm: 0.9999994614349036, iteration: 41151
loss: 0.9515849351882935,grad_norm: 0.999803439794817, iteration: 41152
loss: 1.02323579788208,grad_norm: 0.9999993950978605, iteration: 41153
loss: 1.0181877613067627,grad_norm: 0.9300374918766173, iteration: 41154
loss: 0.9920625686645508,grad_norm: 0.9999991835756752, iteration: 41155
loss: 1.011096715927124,grad_norm: 0.9999990513645973, iteration: 41156
loss: 0.9970910549163818,grad_norm: 0.9600108398105841, iteration: 41157
loss: 0.9889469146728516,grad_norm: 0.9535336812508971, iteration: 41158
loss: 1.0455321073532104,grad_norm: 0.9999990542194027, iteration: 41159
loss: 1.0021191835403442,grad_norm: 0.9999991327213139, iteration: 41160
loss: 0.9857170581817627,grad_norm: 0.902336478652698, iteration: 41161
loss: 1.0000300407409668,grad_norm: 0.8075439779288245, iteration: 41162
loss: 1.0124279260635376,grad_norm: 0.999999139820848, iteration: 41163
loss: 1.0397710800170898,grad_norm: 0.9999990299887107, iteration: 41164
loss: 0.9917874336242676,grad_norm: 0.9999994224468199, iteration: 41165
loss: 1.0133329629898071,grad_norm: 0.9999992559043491, iteration: 41166
loss: 0.935505211353302,grad_norm: 0.9999990755560413, iteration: 41167
loss: 1.0796071290969849,grad_norm: 0.9999998805619549, iteration: 41168
loss: 1.0038869380950928,grad_norm: 0.93841790743059, iteration: 41169
loss: 1.0175422430038452,grad_norm: 0.9703286699777582, iteration: 41170
loss: 1.0251030921936035,grad_norm: 0.9999992801799803, iteration: 41171
loss: 0.970429539680481,grad_norm: 0.9132062388279527, iteration: 41172
loss: 0.9708969593048096,grad_norm: 0.9999991416920475, iteration: 41173
loss: 0.9251405000686646,grad_norm: 0.999999083246614, iteration: 41174
loss: 1.0071688890457153,grad_norm: 0.9999990751132186, iteration: 41175
loss: 1.0830082893371582,grad_norm: 0.9999991197577459, iteration: 41176
loss: 0.9794130921363831,grad_norm: 0.8817593008416487, iteration: 41177
loss: 0.9697747826576233,grad_norm: 0.9308681000330552, iteration: 41178
loss: 1.0032254457473755,grad_norm: 0.9489519679412438, iteration: 41179
loss: 0.97446608543396,grad_norm: 0.9281481559609484, iteration: 41180
loss: 1.0303877592086792,grad_norm: 0.8606967686904775, iteration: 41181
loss: 0.997246503829956,grad_norm: 0.9999990122297359, iteration: 41182
loss: 0.9563808441162109,grad_norm: 0.9999992032981272, iteration: 41183
loss: 0.998997688293457,grad_norm: 0.9359601831274958, iteration: 41184
loss: 0.9849122762680054,grad_norm: 0.9999990717061724, iteration: 41185
loss: 1.0097036361694336,grad_norm: 0.9571231695461678, iteration: 41186
loss: 1.0154467821121216,grad_norm: 0.9300688113491958, iteration: 41187
loss: 0.9802883863449097,grad_norm: 0.9999989724895877, iteration: 41188
loss: 1.0345138311386108,grad_norm: 0.9999992471952797, iteration: 41189
loss: 0.9971008896827698,grad_norm: 0.9999991394219985, iteration: 41190
loss: 1.0026042461395264,grad_norm: 0.9999991493791337, iteration: 41191
loss: 1.0140315294265747,grad_norm: 0.999999075562411, iteration: 41192
loss: 0.9661254286766052,grad_norm: 0.9068822290534706, iteration: 41193
loss: 1.0485169887542725,grad_norm: 0.9999990812961332, iteration: 41194
loss: 0.9535894393920898,grad_norm: 0.8137267086549493, iteration: 41195
loss: 1.0143336057662964,grad_norm: 0.9999990737698812, iteration: 41196
loss: 0.995730459690094,grad_norm: 0.9156147459043286, iteration: 41197
loss: 1.0068411827087402,grad_norm: 0.9335790156154636, iteration: 41198
loss: 1.0073468685150146,grad_norm: 0.9999991475268423, iteration: 41199
loss: 0.9903450012207031,grad_norm: 0.8925458638074609, iteration: 41200
loss: 0.9788768291473389,grad_norm: 0.9786899495301614, iteration: 41201
loss: 1.0130161046981812,grad_norm: 0.8625090502754864, iteration: 41202
loss: 1.012821912765503,grad_norm: 0.8634600386216735, iteration: 41203
loss: 0.970981240272522,grad_norm: 0.928771525526034, iteration: 41204
loss: 1.0310940742492676,grad_norm: 0.9999992506661064, iteration: 41205
loss: 0.9631557464599609,grad_norm: 0.9775736375034982, iteration: 41206
loss: 0.9733229875564575,grad_norm: 0.8850167100834289, iteration: 41207
loss: 0.9965717196464539,grad_norm: 0.8781160077486396, iteration: 41208
loss: 0.9808275103569031,grad_norm: 0.956474733697187, iteration: 41209
loss: 0.9628379940986633,grad_norm: 0.9999991912868912, iteration: 41210
loss: 1.0316364765167236,grad_norm: 0.9999990462998413, iteration: 41211
loss: 1.0129268169403076,grad_norm: 0.9299692451133293, iteration: 41212
loss: 0.9999060034751892,grad_norm: 0.9180412052968417, iteration: 41213
loss: 0.9639106392860413,grad_norm: 0.8304824161694958, iteration: 41214
loss: 0.9934743046760559,grad_norm: 0.9690839578988583, iteration: 41215
loss: 0.9975765943527222,grad_norm: 0.950611319619687, iteration: 41216
loss: 0.99649977684021,grad_norm: 0.9029046791690617, iteration: 41217
loss: 1.065461277961731,grad_norm: 0.99999930949401, iteration: 41218
loss: 1.0212510824203491,grad_norm: 0.9999990834397787, iteration: 41219
loss: 0.9947103261947632,grad_norm: 0.8346168543418612, iteration: 41220
loss: 1.0137583017349243,grad_norm: 0.9999992260924934, iteration: 41221
loss: 0.9693100452423096,grad_norm: 0.9999993011374727, iteration: 41222
loss: 1.0258878469467163,grad_norm: 0.9395401758298431, iteration: 41223
loss: 0.9838870763778687,grad_norm: 0.9065596442121467, iteration: 41224
loss: 1.005732536315918,grad_norm: 0.9999991902848985, iteration: 41225
loss: 1.0539512634277344,grad_norm: 0.9999990153768016, iteration: 41226
loss: 0.9959799647331238,grad_norm: 0.9999991439713626, iteration: 41227
loss: 0.9503809213638306,grad_norm: 0.9999990996598033, iteration: 41228
loss: 1.0023149251937866,grad_norm: 0.9999880433479631, iteration: 41229
loss: 0.9879752397537231,grad_norm: 0.9999992387746772, iteration: 41230
loss: 1.0236033201217651,grad_norm: 0.9999990940939103, iteration: 41231
loss: 1.0387052297592163,grad_norm: 0.9999993806397929, iteration: 41232
loss: 0.9970221519470215,grad_norm: 0.9999992477399567, iteration: 41233
loss: 1.0368374586105347,grad_norm: 0.8785642321572824, iteration: 41234
loss: 0.9971508979797363,grad_norm: 0.9877445607137132, iteration: 41235
loss: 1.0147978067398071,grad_norm: 0.8676788389812037, iteration: 41236
loss: 1.0156588554382324,grad_norm: 0.9999990774739524, iteration: 41237
loss: 1.0404353141784668,grad_norm: 0.9362372938581537, iteration: 41238
loss: 1.0109649896621704,grad_norm: 0.9999992008462303, iteration: 41239
loss: 1.0111770629882812,grad_norm: 0.9585912662124954, iteration: 41240
loss: 0.9829485416412354,grad_norm: 0.9067946719121558, iteration: 41241
loss: 1.0496560335159302,grad_norm: 0.999999659339798, iteration: 41242
loss: 0.970468282699585,grad_norm: 0.9999991688881289, iteration: 41243
loss: 0.9876888394355774,grad_norm: 0.9051944098704432, iteration: 41244
loss: 0.98338782787323,grad_norm: 0.9999990621826337, iteration: 41245
loss: 1.0354045629501343,grad_norm: 0.9319290467293785, iteration: 41246
loss: 0.9709680080413818,grad_norm: 0.8646320779304159, iteration: 41247
loss: 0.9817224144935608,grad_norm: 0.9999996388132186, iteration: 41248
loss: 1.0091053247451782,grad_norm: 0.8561855319446632, iteration: 41249
loss: 1.017456293106079,grad_norm: 0.9409040507338947, iteration: 41250
loss: 1.0161195993423462,grad_norm: 0.8320986553901432, iteration: 41251
loss: 1.004889965057373,grad_norm: 0.9999991575325115, iteration: 41252
loss: 1.0390058755874634,grad_norm: 0.8206893558010167, iteration: 41253
loss: 1.0821278095245361,grad_norm: 0.9999996405955573, iteration: 41254
loss: 1.0131794214248657,grad_norm: 0.9999991671244579, iteration: 41255
loss: 0.994148850440979,grad_norm: 0.8983213452835553, iteration: 41256
loss: 0.9929842948913574,grad_norm: 0.9234158776648452, iteration: 41257
loss: 1.0020262002944946,grad_norm: 0.9999990576307856, iteration: 41258
loss: 1.030622959136963,grad_norm: 0.9999996494966741, iteration: 41259
loss: 1.0123248100280762,grad_norm: 0.8715046653580367, iteration: 41260
loss: 0.9925116896629333,grad_norm: 0.9999992150222252, iteration: 41261
loss: 0.9849351048469543,grad_norm: 0.8658214129180228, iteration: 41262
loss: 1.015769600868225,grad_norm: 0.8395750258596971, iteration: 41263
loss: 0.9875596165657043,grad_norm: 0.7822948175198539, iteration: 41264
loss: 1.01112699508667,grad_norm: 0.9999989799761919, iteration: 41265
loss: 1.004568338394165,grad_norm: 0.9999992257707325, iteration: 41266
loss: 0.987180233001709,grad_norm: 0.9442996654138434, iteration: 41267
loss: 1.0044687986373901,grad_norm: 0.8809019908529988, iteration: 41268
loss: 0.9957648515701294,grad_norm: 0.9999990327639222, iteration: 41269
loss: 0.9696155190467834,grad_norm: 0.9572881799566207, iteration: 41270
loss: 1.020257592201233,grad_norm: 0.9999991391868317, iteration: 41271
loss: 0.9787793755531311,grad_norm: 0.8379733742393939, iteration: 41272
loss: 1.0003999471664429,grad_norm: 0.9999990835111545, iteration: 41273
loss: 0.9670689105987549,grad_norm: 0.9999992303058518, iteration: 41274
loss: 1.0420411825180054,grad_norm: 0.9999992242079623, iteration: 41275
loss: 1.0631694793701172,grad_norm: 0.9999994695479437, iteration: 41276
loss: 1.0004605054855347,grad_norm: 0.840443371006924, iteration: 41277
loss: 1.0238215923309326,grad_norm: 0.9999991163946892, iteration: 41278
loss: 0.9842437505722046,grad_norm: 0.9027313097724567, iteration: 41279
loss: 0.9927022457122803,grad_norm: 0.999132574567294, iteration: 41280
loss: 1.007154941558838,grad_norm: 0.9999990594635195, iteration: 41281
loss: 1.078864574432373,grad_norm: 0.9999998438391281, iteration: 41282
loss: 1.012988805770874,grad_norm: 0.8162229394658762, iteration: 41283
loss: 1.0292527675628662,grad_norm: 0.9999992777894476, iteration: 41284
loss: 1.0001349449157715,grad_norm: 0.9209447434773114, iteration: 41285
loss: 0.9922152757644653,grad_norm: 0.9877953309403595, iteration: 41286
loss: 1.0047951936721802,grad_norm: 0.9999991792273073, iteration: 41287
loss: 1.0383825302124023,grad_norm: 0.9999992225006895, iteration: 41288
loss: 1.007926344871521,grad_norm: 0.9999997633781095, iteration: 41289
loss: 1.0338057279586792,grad_norm: 0.999999103219596, iteration: 41290
loss: 1.0204273462295532,grad_norm: 0.9999994513125309, iteration: 41291
loss: 0.979889452457428,grad_norm: 0.8729650386000808, iteration: 41292
loss: 1.0318291187286377,grad_norm: 0.9999991227898541, iteration: 41293
loss: 1.0126969814300537,grad_norm: 0.9999991980275125, iteration: 41294
loss: 1.0281729698181152,grad_norm: 0.9999989395768951, iteration: 41295
loss: 1.0484120845794678,grad_norm: 0.9999992613900389, iteration: 41296
loss: 0.9909282922744751,grad_norm: 0.9999990715740399, iteration: 41297
loss: 1.0201656818389893,grad_norm: 0.9999989926346784, iteration: 41298
loss: 1.0312262773513794,grad_norm: 0.9999991523484089, iteration: 41299
loss: 1.0295456647872925,grad_norm: 0.8911764447236578, iteration: 41300
loss: 1.0113940238952637,grad_norm: 0.8482719704949595, iteration: 41301
loss: 1.0353347063064575,grad_norm: 0.7979129988294936, iteration: 41302
loss: 0.9838650822639465,grad_norm: 0.9999989253186709, iteration: 41303
loss: 1.0225626230239868,grad_norm: 0.9999992414258146, iteration: 41304
loss: 0.9816938042640686,grad_norm: 0.9999992323391886, iteration: 41305
loss: 0.9870527386665344,grad_norm: 0.9243630533341713, iteration: 41306
loss: 1.011129379272461,grad_norm: 0.9068008676046821, iteration: 41307
loss: 1.0052515268325806,grad_norm: 0.902805779620526, iteration: 41308
loss: 1.0303012132644653,grad_norm: 0.9305432428446594, iteration: 41309
loss: 1.0077857971191406,grad_norm: 0.9999991493045028, iteration: 41310
loss: 1.0037156343460083,grad_norm: 0.9999996597197588, iteration: 41311
loss: 0.9931861758232117,grad_norm: 0.9999989666062884, iteration: 41312
loss: 0.9947021007537842,grad_norm: 0.8870500508734709, iteration: 41313
loss: 1.0286006927490234,grad_norm: 0.8680051070359455, iteration: 41314
loss: 1.0015225410461426,grad_norm: 0.9999991714916848, iteration: 41315
loss: 1.016092300415039,grad_norm: 0.999999300492128, iteration: 41316
loss: 0.9812732934951782,grad_norm: 0.8762562813090088, iteration: 41317
loss: 0.9728460907936096,grad_norm: 0.999999035721282, iteration: 41318
loss: 1.041056752204895,grad_norm: 0.802939924618005, iteration: 41319
loss: 0.9528364539146423,grad_norm: 0.9219810785586353, iteration: 41320
loss: 1.0087151527404785,grad_norm: 0.9270045482517862, iteration: 41321
loss: 1.0304075479507446,grad_norm: 0.865320413242979, iteration: 41322
loss: 0.9780883193016052,grad_norm: 0.920129929117485, iteration: 41323
loss: 1.0048422813415527,grad_norm: 0.9905896927142896, iteration: 41324
loss: 1.0082659721374512,grad_norm: 0.8818500989514367, iteration: 41325
loss: 0.9945471286773682,grad_norm: 0.9999989980262102, iteration: 41326
loss: 1.0318737030029297,grad_norm: 0.9999996239889575, iteration: 41327
loss: 1.1149832010269165,grad_norm: 0.9999996152138237, iteration: 41328
loss: 1.0193498134613037,grad_norm: 0.9999994243553182, iteration: 41329
loss: 0.9895298480987549,grad_norm: 0.9069135339128276, iteration: 41330
loss: 1.0094196796417236,grad_norm: 0.9184477159509107, iteration: 41331
loss: 1.0045756101608276,grad_norm: 0.9999989990990354, iteration: 41332
loss: 0.9709071516990662,grad_norm: 0.9999990959352232, iteration: 41333
loss: 0.9936605095863342,grad_norm: 0.9999991793782833, iteration: 41334
loss: 0.9854435324668884,grad_norm: 0.8886980968209548, iteration: 41335
loss: 1.04666006565094,grad_norm: 0.9999989860767082, iteration: 41336
loss: 1.008665919303894,grad_norm: 0.89634147143549, iteration: 41337
loss: 0.9730338454246521,grad_norm: 0.9999991703167009, iteration: 41338
loss: 1.0109765529632568,grad_norm: 0.9999991239434658, iteration: 41339
loss: 1.0091485977172852,grad_norm: 0.9999990201563046, iteration: 41340
loss: 1.0039141178131104,grad_norm: 0.7585669348674366, iteration: 41341
loss: 0.9904476404190063,grad_norm: 0.9605336359263974, iteration: 41342
loss: 1.000149130821228,grad_norm: 0.8963652545981834, iteration: 41343
loss: 1.0202327966690063,grad_norm: 0.9999995900929795, iteration: 41344
loss: 1.0702576637268066,grad_norm: 0.9999996776361042, iteration: 41345
loss: 1.0280539989471436,grad_norm: 0.9510687129489898, iteration: 41346
loss: 1.0900323390960693,grad_norm: 0.9999995033722175, iteration: 41347
loss: 1.061414361000061,grad_norm: 0.9582573911737541, iteration: 41348
loss: 1.0038495063781738,grad_norm: 0.9754792576719273, iteration: 41349
loss: 1.0008080005645752,grad_norm: 0.9999992964671018, iteration: 41350
loss: 1.0143893957138062,grad_norm: 0.9999991042304357, iteration: 41351
loss: 0.9999809861183167,grad_norm: 0.9999997035051914, iteration: 41352
loss: 1.0114446878433228,grad_norm: 0.9999991724781001, iteration: 41353
loss: 1.01358962059021,grad_norm: 0.9063498537221863, iteration: 41354
loss: 0.9691548347473145,grad_norm: 0.9999992449210122, iteration: 41355
loss: 0.9752886891365051,grad_norm: 0.9999990903559645, iteration: 41356
loss: 1.0000052452087402,grad_norm: 0.9999992655405395, iteration: 41357
loss: 1.030831217765808,grad_norm: 0.999999196678689, iteration: 41358
loss: 1.0306495428085327,grad_norm: 0.9999990058808186, iteration: 41359
loss: 1.0020791292190552,grad_norm: 0.9999991739884436, iteration: 41360
loss: 0.9892007112503052,grad_norm: 0.8711788677519368, iteration: 41361
loss: 1.0263559818267822,grad_norm: 0.9999991819035648, iteration: 41362
loss: 1.0066046714782715,grad_norm: 0.9999991627597545, iteration: 41363
loss: 1.0583997964859009,grad_norm: 0.9999996607708899, iteration: 41364
loss: 1.0133119821548462,grad_norm: 0.9999990057166342, iteration: 41365
loss: 0.9847018122673035,grad_norm: 0.8741453039381718, iteration: 41366
loss: 1.0067189931869507,grad_norm: 0.9399215799313797, iteration: 41367
loss: 0.9809074997901917,grad_norm: 0.8558354979212494, iteration: 41368
loss: 1.0098087787628174,grad_norm: 0.9996291197402515, iteration: 41369
loss: 0.9885927438735962,grad_norm: 0.9999991137748531, iteration: 41370
loss: 1.009424090385437,grad_norm: 0.9586874927128308, iteration: 41371
loss: 0.9888380169868469,grad_norm: 0.9614358094115113, iteration: 41372
loss: 0.9766591787338257,grad_norm: 0.8658682918633813, iteration: 41373
loss: 1.0058350563049316,grad_norm: 0.999999123787907, iteration: 41374
loss: 1.033973217010498,grad_norm: 0.9676094222143037, iteration: 41375
loss: 1.0020180940628052,grad_norm: 0.9999993231241536, iteration: 41376
loss: 0.982267439365387,grad_norm: 0.9999992077526589, iteration: 41377
loss: 1.0049363374710083,grad_norm: 0.9999990411748049, iteration: 41378
loss: 1.0192452669143677,grad_norm: 0.8389363059837405, iteration: 41379
loss: 0.9973501563072205,grad_norm: 0.9851890415058954, iteration: 41380
loss: 1.0025588274002075,grad_norm: 0.8725738122844268, iteration: 41381
loss: 1.0418663024902344,grad_norm: 0.9999990798857309, iteration: 41382
loss: 1.009823203086853,grad_norm: 0.9522813265701228, iteration: 41383
loss: 0.9750321507453918,grad_norm: 0.927944921136313, iteration: 41384
loss: 1.0071263313293457,grad_norm: 0.9999991835200411, iteration: 41385
loss: 1.0211505889892578,grad_norm: 0.9999989835992551, iteration: 41386
loss: 1.0558195114135742,grad_norm: 0.9999997809020154, iteration: 41387
loss: 1.0316098928451538,grad_norm: 0.9575321663653616, iteration: 41388
loss: 1.0006701946258545,grad_norm: 0.9318121044171773, iteration: 41389
loss: 0.9946084022521973,grad_norm: 0.9999989844343559, iteration: 41390
loss: 1.0230504274368286,grad_norm: 0.9999995784640612, iteration: 41391
loss: 1.0250755548477173,grad_norm: 0.9999991246214851, iteration: 41392
loss: 1.032158613204956,grad_norm: 0.9999993024963115, iteration: 41393
loss: 0.9977009892463684,grad_norm: 0.9577270120759984, iteration: 41394
loss: 1.0121780633926392,grad_norm: 0.9999995044388414, iteration: 41395
loss: 1.045454978942871,grad_norm: 0.9999989732703694, iteration: 41396
loss: 1.0180965662002563,grad_norm: 0.9999994678091723, iteration: 41397
loss: 1.0249992609024048,grad_norm: 0.9999991654595191, iteration: 41398
loss: 1.0032376050949097,grad_norm: 0.9999991182078005, iteration: 41399
loss: 0.9945435523986816,grad_norm: 0.8537800088973345, iteration: 41400
loss: 1.019728422164917,grad_norm: 0.869031221394927, iteration: 41401
loss: 1.0236033201217651,grad_norm: 0.9787366745239525, iteration: 41402
loss: 0.9419440031051636,grad_norm: 0.8888750145917063, iteration: 41403
loss: 1.0116844177246094,grad_norm: 0.9999992979095127, iteration: 41404
loss: 0.9900697469711304,grad_norm: 0.9814521501526903, iteration: 41405
loss: 0.9807007908821106,grad_norm: 0.9999990555634551, iteration: 41406
loss: 1.0093133449554443,grad_norm: 0.8398402845162048, iteration: 41407
loss: 0.9827225208282471,grad_norm: 0.999999075401944, iteration: 41408
loss: 0.9945482611656189,grad_norm: 0.943678184829394, iteration: 41409
loss: 0.9812573194503784,grad_norm: 0.9027517906211275, iteration: 41410
loss: 0.9858288764953613,grad_norm: 0.999999159561931, iteration: 41411
loss: 1.0157772302627563,grad_norm: 0.9999996675551327, iteration: 41412
loss: 0.9965266585350037,grad_norm: 0.9999995558530433, iteration: 41413
loss: 0.9786396622657776,grad_norm: 0.9546848982303828, iteration: 41414
loss: 0.9744006991386414,grad_norm: 0.9999991422745188, iteration: 41415
loss: 1.0580904483795166,grad_norm: 0.9999991229856874, iteration: 41416
loss: 1.0013362169265747,grad_norm: 0.9684931987204051, iteration: 41417
loss: 1.028943419456482,grad_norm: 0.9999991662785179, iteration: 41418
loss: 1.0247470140457153,grad_norm: 0.9717875280095085, iteration: 41419
loss: 1.0313420295715332,grad_norm: 0.9621488704736718, iteration: 41420
loss: 1.0089863538742065,grad_norm: 0.9999993333961281, iteration: 41421
loss: 1.0026781558990479,grad_norm: 0.9999990301875563, iteration: 41422
loss: 0.9851504564285278,grad_norm: 0.7788282744153304, iteration: 41423
loss: 1.0118335485458374,grad_norm: 0.9999998340850698, iteration: 41424
loss: 1.0234836339950562,grad_norm: 0.9999992981845369, iteration: 41425
loss: 1.0453816652297974,grad_norm: 0.9999995615588073, iteration: 41426
loss: 0.9864087104797363,grad_norm: 0.8302023486774138, iteration: 41427
loss: 0.9881982207298279,grad_norm: 0.9999990519029976, iteration: 41428
loss: 1.055067777633667,grad_norm: 0.9999996011338708, iteration: 41429
loss: 0.9931320548057556,grad_norm: 0.9999991623167249, iteration: 41430
loss: 0.9947465062141418,grad_norm: 0.8554525466704623, iteration: 41431
loss: 1.0365126132965088,grad_norm: 0.9715487428433471, iteration: 41432
loss: 1.017714262008667,grad_norm: 0.9408267909722202, iteration: 41433
loss: 0.9978044033050537,grad_norm: 0.7745530452971102, iteration: 41434
loss: 0.9816825985908508,grad_norm: 0.9999989893053779, iteration: 41435
loss: 1.0295873880386353,grad_norm: 0.9736005046441006, iteration: 41436
loss: 0.9991430640220642,grad_norm: 0.8448171192158597, iteration: 41437
loss: 0.9755866527557373,grad_norm: 0.9999991383842114, iteration: 41438
loss: 0.9920456409454346,grad_norm: 0.999999267251921, iteration: 41439
loss: 0.980099618434906,grad_norm: 0.9093126337580779, iteration: 41440
loss: 0.9860674142837524,grad_norm: 0.998415326160505, iteration: 41441
loss: 0.98027503490448,grad_norm: 0.9999990484726771, iteration: 41442
loss: 0.9951149225234985,grad_norm: 0.9283151605544594, iteration: 41443
loss: 0.9746893644332886,grad_norm: 0.9999989445384337, iteration: 41444
loss: 0.9999350309371948,grad_norm: 0.9668708726052259, iteration: 41445
loss: 0.9933880567550659,grad_norm: 0.9390635675518091, iteration: 41446
loss: 0.9797254204750061,grad_norm: 0.9999992382248213, iteration: 41447
loss: 0.9884111881256104,grad_norm: 0.9312005215767287, iteration: 41448
loss: 0.9567147493362427,grad_norm: 0.8596895229354137, iteration: 41449
loss: 1.011278510093689,grad_norm: 0.9532642360919613, iteration: 41450
loss: 1.005018949508667,grad_norm: 0.905982030773489, iteration: 41451
loss: 1.0033091306686401,grad_norm: 0.9999992892942924, iteration: 41452
loss: 1.0210679769515991,grad_norm: 0.9999990293011902, iteration: 41453
loss: 1.0218042135238647,grad_norm: 0.8509354431411514, iteration: 41454
loss: 1.0345309972763062,grad_norm: 0.9473840001814376, iteration: 41455
loss: 0.9948908090591431,grad_norm: 0.8958598929514429, iteration: 41456
loss: 1.0166987180709839,grad_norm: 0.9999992336402663, iteration: 41457
loss: 1.0157161951065063,grad_norm: 0.9999998734717385, iteration: 41458
loss: 1.0062111616134644,grad_norm: 0.9304291780585705, iteration: 41459
loss: 1.0103017091751099,grad_norm: 0.9999992355300319, iteration: 41460
loss: 1.0106565952301025,grad_norm: 0.9515528985160528, iteration: 41461
loss: 0.9592486023902893,grad_norm: 0.9999990826308903, iteration: 41462
loss: 1.0212918519973755,grad_norm: 0.9999994022533922, iteration: 41463
loss: 1.0060920715332031,grad_norm: 0.9891146721515451, iteration: 41464
loss: 1.0332629680633545,grad_norm: 0.88310148317235, iteration: 41465
loss: 0.9764515161514282,grad_norm: 0.9999990838647157, iteration: 41466
loss: 1.0015267133712769,grad_norm: 0.9999991815878075, iteration: 41467
loss: 0.9699211120605469,grad_norm: 0.9678686715967146, iteration: 41468
loss: 1.0236144065856934,grad_norm: 0.9999991912198578, iteration: 41469
loss: 1.0027717351913452,grad_norm: 0.9999992049095033, iteration: 41470
loss: 1.212990164756775,grad_norm: 0.999999793694882, iteration: 41471
loss: 0.9819778203964233,grad_norm: 0.9181435071181916, iteration: 41472
loss: 0.9908784031867981,grad_norm: 0.8016866984645613, iteration: 41473
loss: 0.9858839511871338,grad_norm: 0.9999990719136808, iteration: 41474
loss: 0.9568514823913574,grad_norm: 0.8780070735614445, iteration: 41475
loss: 1.0036678314208984,grad_norm: 0.9351424566760418, iteration: 41476
loss: 0.9895084500312805,grad_norm: 0.9999992834846778, iteration: 41477
loss: 0.9708109498023987,grad_norm: 0.9999992603974013, iteration: 41478
loss: 0.9567398428916931,grad_norm: 0.9999990939456265, iteration: 41479
loss: 1.0239254236221313,grad_norm: 0.7484863485632612, iteration: 41480
loss: 0.9930450916290283,grad_norm: 0.9999989973406275, iteration: 41481
loss: 1.024064540863037,grad_norm: 0.8745311862375229, iteration: 41482
loss: 0.9776034355163574,grad_norm: 0.9438190410162937, iteration: 41483
loss: 0.9994763135910034,grad_norm: 0.8134431374351369, iteration: 41484
loss: 0.9721270799636841,grad_norm: 0.8633815892700017, iteration: 41485
loss: 0.986262321472168,grad_norm: 0.9999990612616432, iteration: 41486
loss: 0.9678412675857544,grad_norm: 0.9849099534855159, iteration: 41487
loss: 0.9904167056083679,grad_norm: 0.9999991316017152, iteration: 41488
loss: 1.0422531366348267,grad_norm: 0.9999992195867445, iteration: 41489
loss: 1.0210542678833008,grad_norm: 0.9999990369423312, iteration: 41490
loss: 0.9812417030334473,grad_norm: 0.9572290847787194, iteration: 41491
loss: 0.986242949962616,grad_norm: 0.8855545518743918, iteration: 41492
loss: 0.999947726726532,grad_norm: 0.9999991796524939, iteration: 41493
loss: 0.9779722690582275,grad_norm: 0.9484354503528329, iteration: 41494
loss: 1.0115660429000854,grad_norm: 0.8936824520237528, iteration: 41495
loss: 0.9949778914451599,grad_norm: 0.9999990959179084, iteration: 41496
loss: 1.026943325996399,grad_norm: 0.9999990395587203, iteration: 41497
loss: 1.1018766164779663,grad_norm: 0.9999993768589523, iteration: 41498
loss: 1.0195895433425903,grad_norm: 0.9999991240651562, iteration: 41499
loss: 0.9611465334892273,grad_norm: 0.9143950876050685, iteration: 41500
loss: 0.9877690672874451,grad_norm: 0.9999991797696247, iteration: 41501
loss: 0.9788141846656799,grad_norm: 0.9492574586393633, iteration: 41502
loss: 0.9972008466720581,grad_norm: 0.9999990884572931, iteration: 41503
loss: 0.9903318285942078,grad_norm: 0.8861221113052299, iteration: 41504
loss: 0.9573960900306702,grad_norm: 0.8743403497117779, iteration: 41505
loss: 0.9786145687103271,grad_norm: 0.8179631509038933, iteration: 41506
loss: 0.980538010597229,grad_norm: 0.9999991074930544, iteration: 41507
loss: 0.9860068559646606,grad_norm: 0.9999991237743858, iteration: 41508
loss: 1.031301736831665,grad_norm: 0.9821485381921642, iteration: 41509
loss: 0.9713468551635742,grad_norm: 0.9394029042967776, iteration: 41510
loss: 1.024652123451233,grad_norm: 0.9234420378481951, iteration: 41511
loss: 1.0142971277236938,grad_norm: 0.9970292726453777, iteration: 41512
loss: 1.0107280015945435,grad_norm: 0.9999989615276516, iteration: 41513
loss: 1.0139598846435547,grad_norm: 0.9999990556425143, iteration: 41514
loss: 0.9940689206123352,grad_norm: 0.859062883469769, iteration: 41515
loss: 1.0121992826461792,grad_norm: 0.9833495368190402, iteration: 41516
loss: 1.0040408372879028,grad_norm: 0.8137775279714975, iteration: 41517
loss: 1.0300337076187134,grad_norm: 0.9999990828287016, iteration: 41518
loss: 0.9969491362571716,grad_norm: 0.999999114416806, iteration: 41519
loss: 1.008355736732483,grad_norm: 0.9999990305501744, iteration: 41520
loss: 0.9996038675308228,grad_norm: 0.9999990926233562, iteration: 41521
loss: 1.0094748735427856,grad_norm: 0.9999991043301362, iteration: 41522
loss: 0.9747917652130127,grad_norm: 0.861659808294133, iteration: 41523
loss: 0.9678881168365479,grad_norm: 0.8385317581301907, iteration: 41524
loss: 0.9803520441055298,grad_norm: 0.8316015318164678, iteration: 41525
loss: 0.9718151688575745,grad_norm: 0.9750649884817373, iteration: 41526
loss: 1.0006344318389893,grad_norm: 0.956999917193615, iteration: 41527
loss: 1.0284559726715088,grad_norm: 0.8834477932481034, iteration: 41528
loss: 1.023477554321289,grad_norm: 0.8672221798545287, iteration: 41529
loss: 1.004514455795288,grad_norm: 0.9822799385073958, iteration: 41530
loss: 1.0138970613479614,grad_norm: 0.9132294352985896, iteration: 41531
loss: 1.043443202972412,grad_norm: 0.9883602581096832, iteration: 41532
loss: 0.9904897212982178,grad_norm: 0.9999989542791108, iteration: 41533
loss: 0.967632532119751,grad_norm: 0.8935482355476244, iteration: 41534
loss: 1.0218216180801392,grad_norm: 0.896075467775541, iteration: 41535
loss: 0.9883298873901367,grad_norm: 0.9999992348849459, iteration: 41536
loss: 0.9954959154129028,grad_norm: 0.9824477480036626, iteration: 41537
loss: 1.0043678283691406,grad_norm: 0.9211496913818319, iteration: 41538
loss: 0.9394136667251587,grad_norm: 0.9091043597106697, iteration: 41539
loss: 1.0017242431640625,grad_norm: 0.9634742753950312, iteration: 41540
loss: 0.9984174370765686,grad_norm: 0.935574701460508, iteration: 41541
loss: 1.011962890625,grad_norm: 0.9555852088156023, iteration: 41542
loss: 0.9964737892150879,grad_norm: 0.9383567556395834, iteration: 41543
loss: 1.0154746770858765,grad_norm: 0.9827444765874132, iteration: 41544
loss: 0.9840596318244934,grad_norm: 0.982671928758349, iteration: 41545
loss: 0.9790862202644348,grad_norm: 0.9999992258696145, iteration: 41546
loss: 1.0150494575500488,grad_norm: 0.9999992951066211, iteration: 41547
loss: 0.9938897490501404,grad_norm: 0.8860684394890559, iteration: 41548
loss: 0.995221734046936,grad_norm: 0.8645956345489371, iteration: 41549
loss: 1.0048999786376953,grad_norm: 0.9798443955205612, iteration: 41550
loss: 0.9992794990539551,grad_norm: 0.9999993194889358, iteration: 41551
loss: 0.9914600253105164,grad_norm: 0.9999990933541625, iteration: 41552
loss: 0.9948662519454956,grad_norm: 0.999999180418462, iteration: 41553
loss: 0.9748583436012268,grad_norm: 0.9999991981999494, iteration: 41554
loss: 1.087280511856079,grad_norm: 0.9999998451228893, iteration: 41555
loss: 1.0226489305496216,grad_norm: 0.9999991865592422, iteration: 41556
loss: 0.9915992617607117,grad_norm: 0.8978029001049799, iteration: 41557
loss: 0.9896274209022522,grad_norm: 0.9536711581092907, iteration: 41558
loss: 1.004914402961731,grad_norm: 0.9061278307407956, iteration: 41559
loss: 0.9686784148216248,grad_norm: 0.9999989927732335, iteration: 41560
loss: 1.0387258529663086,grad_norm: 0.9159824403574439, iteration: 41561
loss: 1.0094289779663086,grad_norm: 0.8715356174601026, iteration: 41562
loss: 1.0398529767990112,grad_norm: 0.9999992435442853, iteration: 41563
loss: 1.0051980018615723,grad_norm: 0.9999991235296358, iteration: 41564
loss: 0.9530882239341736,grad_norm: 0.8348234042087992, iteration: 41565
loss: 0.9991853833198547,grad_norm: 0.9057761861978341, iteration: 41566
loss: 0.9506645798683167,grad_norm: 0.9895532514971448, iteration: 41567
loss: 0.9959296584129333,grad_norm: 0.9125414406640477, iteration: 41568
loss: 1.0357720851898193,grad_norm: 0.8670263432643247, iteration: 41569
loss: 0.9729506373405457,grad_norm: 0.9999990181201847, iteration: 41570
loss: 0.9887670874595642,grad_norm: 0.9999990743386888, iteration: 41571
loss: 1.0504266023635864,grad_norm: 0.9999995439708037, iteration: 41572
loss: 1.019046664237976,grad_norm: 0.9183412160227961, iteration: 41573
loss: 1.0055060386657715,grad_norm: 0.7505889248035016, iteration: 41574
loss: 1.0064657926559448,grad_norm: 0.9999989856218887, iteration: 41575
loss: 1.0243593454360962,grad_norm: 0.9999991699611118, iteration: 41576
loss: 0.9685561060905457,grad_norm: 0.9248999663976586, iteration: 41577
loss: 0.9681031107902527,grad_norm: 0.9999990900789016, iteration: 41578
loss: 0.9900637865066528,grad_norm: 0.8183600752342273, iteration: 41579
loss: 0.9994223713874817,grad_norm: 0.9638879340663626, iteration: 41580
loss: 0.9777827858924866,grad_norm: 0.9630030837095193, iteration: 41581
loss: 1.0263551473617554,grad_norm: 0.9449610892391533, iteration: 41582
loss: 1.0047492980957031,grad_norm: 0.990045085246187, iteration: 41583
loss: 1.0459020137786865,grad_norm: 0.9635925172507571, iteration: 41584
loss: 1.008231520652771,grad_norm: 0.8306146021706274, iteration: 41585
loss: 0.9679234027862549,grad_norm: 0.8881120949845702, iteration: 41586
loss: 0.99920654296875,grad_norm: 0.8956143001079051, iteration: 41587
loss: 1.0235916376113892,grad_norm: 0.9999992025108466, iteration: 41588
loss: 0.9628186821937561,grad_norm: 0.9999999841747353, iteration: 41589
loss: 1.0123589038848877,grad_norm: 0.9333596987001096, iteration: 41590
loss: 0.9653069972991943,grad_norm: 0.9999991261307217, iteration: 41591
loss: 0.9465281367301941,grad_norm: 0.9999989707012465, iteration: 41592
loss: 0.9885786175727844,grad_norm: 0.8941396525385983, iteration: 41593
loss: 1.0242023468017578,grad_norm: 0.8477694002345919, iteration: 41594
loss: 1.0049097537994385,grad_norm: 0.9999990880367331, iteration: 41595
loss: 1.0126032829284668,grad_norm: 0.7995948634859318, iteration: 41596
loss: 0.9928240776062012,grad_norm: 0.9950692383608511, iteration: 41597
loss: 0.9911081790924072,grad_norm: 0.9229558194202714, iteration: 41598
loss: 0.9950342774391174,grad_norm: 0.9999992760557502, iteration: 41599
loss: 0.9638296365737915,grad_norm: 0.8675006592486386, iteration: 41600
loss: 0.9766384363174438,grad_norm: 0.9367273477322173, iteration: 41601
loss: 1.0315301418304443,grad_norm: 0.9999993136632233, iteration: 41602
loss: 0.9880021810531616,grad_norm: 0.9999991348167449, iteration: 41603
loss: 1.0196592807769775,grad_norm: 0.9999991414334225, iteration: 41604
loss: 0.9968088269233704,grad_norm: 0.9999990992666377, iteration: 41605
loss: 0.9849037528038025,grad_norm: 0.9296310237832337, iteration: 41606
loss: 0.9930477142333984,grad_norm: 0.9888941595748342, iteration: 41607
loss: 0.9861341714859009,grad_norm: 0.9999990401326896, iteration: 41608
loss: 1.020180106163025,grad_norm: 0.9999991338694701, iteration: 41609
loss: 1.0265558958053589,grad_norm: 0.9999990346999023, iteration: 41610
loss: 1.0312422513961792,grad_norm: 0.9999998757333232, iteration: 41611
loss: 1.0071957111358643,grad_norm: 0.9999991174993405, iteration: 41612
loss: 0.961364209651947,grad_norm: 0.9095633560381837, iteration: 41613
loss: 1.0411956310272217,grad_norm: 0.9951721012404456, iteration: 41614
loss: 0.9778509140014648,grad_norm: 0.8288103882868743, iteration: 41615
loss: 0.9972734451293945,grad_norm: 0.9890686441628581, iteration: 41616
loss: 0.9952846169471741,grad_norm: 0.9338687919715147, iteration: 41617
loss: 1.0032801628112793,grad_norm: 0.9999993518889864, iteration: 41618
loss: 1.0199239253997803,grad_norm: 0.9002010247081259, iteration: 41619
loss: 0.9948411583900452,grad_norm: 0.94660707097899, iteration: 41620
loss: 1.0235393047332764,grad_norm: 0.999999084608721, iteration: 41621
loss: 1.0176925659179688,grad_norm: 0.9999991242152816, iteration: 41622
loss: 0.9698412418365479,grad_norm: 0.999999047310393, iteration: 41623
loss: 0.9780271649360657,grad_norm: 0.9999991332426067, iteration: 41624
loss: 1.0254887342453003,grad_norm: 0.8957940641881393, iteration: 41625
loss: 0.9989464282989502,grad_norm: 0.8680974281246969, iteration: 41626
loss: 0.9867875576019287,grad_norm: 0.881148439783149, iteration: 41627
loss: 1.0068572759628296,grad_norm: 0.9999993092577248, iteration: 41628
loss: 0.9755694270133972,grad_norm: 0.8767854226046551, iteration: 41629
loss: 1.0298317670822144,grad_norm: 0.9130240764473281, iteration: 41630
loss: 0.962421178817749,grad_norm: 0.9999990559569646, iteration: 41631
loss: 0.9953588247299194,grad_norm: 0.9999991653780549, iteration: 41632
loss: 1.0240544080734253,grad_norm: 0.9961263460342555, iteration: 41633
loss: 1.0360760688781738,grad_norm: 0.9899227400345407, iteration: 41634
loss: 1.0236501693725586,grad_norm: 0.9123592039960863, iteration: 41635
loss: 1.032636046409607,grad_norm: 0.9999990729973235, iteration: 41636
loss: 0.9992896914482117,grad_norm: 0.999999155721725, iteration: 41637
loss: 0.9776347279548645,grad_norm: 0.9328364996928639, iteration: 41638
loss: 1.0258179903030396,grad_norm: 0.992111533246547, iteration: 41639
loss: 1.0000377893447876,grad_norm: 0.9999990888839376, iteration: 41640
loss: 1.0243929624557495,grad_norm: 0.8486134378809206, iteration: 41641
loss: 1.032353401184082,grad_norm: 0.8718584978403817, iteration: 41642
loss: 1.0012091398239136,grad_norm: 0.9337898855257414, iteration: 41643
loss: 1.0062081813812256,grad_norm: 0.9999991079018425, iteration: 41644
loss: 0.9849331974983215,grad_norm: 0.9552708155886607, iteration: 41645
loss: 0.9835279583930969,grad_norm: 0.9999990083175022, iteration: 41646
loss: 1.0442776679992676,grad_norm: 0.9999989235955388, iteration: 41647
loss: 1.0017389059066772,grad_norm: 0.9393649535385362, iteration: 41648
loss: 0.9651996493339539,grad_norm: 0.9140306736173149, iteration: 41649
loss: 1.03673255443573,grad_norm: 0.8828914131784288, iteration: 41650
loss: 0.9835806488990784,grad_norm: 0.9999991420341884, iteration: 41651
loss: 0.9939249157905579,grad_norm: 0.9781127191913002, iteration: 41652
loss: 1.0220441818237305,grad_norm: 0.9541723544584014, iteration: 41653
loss: 1.0078500509262085,grad_norm: 0.7567259406524123, iteration: 41654
loss: 1.0424094200134277,grad_norm: 0.9999993560428176, iteration: 41655
loss: 0.9916028380393982,grad_norm: 0.8665673870989204, iteration: 41656
loss: 0.9728219509124756,grad_norm: 0.9941624366336792, iteration: 41657
loss: 0.9957041144371033,grad_norm: 0.9999991148480599, iteration: 41658
loss: 1.008044958114624,grad_norm: 0.9999988854873354, iteration: 41659
loss: 0.9614948630332947,grad_norm: 0.8838509821478584, iteration: 41660
loss: 1.0135961771011353,grad_norm: 0.9999991067434253, iteration: 41661
loss: 0.9862959384918213,grad_norm: 0.9577907974944176, iteration: 41662
loss: 0.99668949842453,grad_norm: 0.9999991845349295, iteration: 41663
loss: 0.9984361529350281,grad_norm: 0.99999903339041, iteration: 41664
loss: 0.9897095561027527,grad_norm: 0.999999296501977, iteration: 41665
loss: 0.9470274448394775,grad_norm: 0.9999992227317864, iteration: 41666
loss: 1.0004971027374268,grad_norm: 0.9334499045229218, iteration: 41667
loss: 1.0022525787353516,grad_norm: 0.9837854737596232, iteration: 41668
loss: 1.0152819156646729,grad_norm: 0.9999993149921333, iteration: 41669
loss: 1.0180562734603882,grad_norm: 0.9999993226267688, iteration: 41670
loss: 1.008686900138855,grad_norm: 0.9999990338088782, iteration: 41671
loss: 1.0033138990402222,grad_norm: 0.9283096721313504, iteration: 41672
loss: 0.9977136850357056,grad_norm: 0.9746943995098236, iteration: 41673
loss: 0.982296884059906,grad_norm: 0.9453842758686186, iteration: 41674
loss: 1.0163633823394775,grad_norm: 0.8940179709608651, iteration: 41675
loss: 1.005982518196106,grad_norm: 0.868802685832699, iteration: 41676
loss: 1.0202147960662842,grad_norm: 0.8680572383422235, iteration: 41677
loss: 0.9645967483520508,grad_norm: 0.9359192452495131, iteration: 41678
loss: 0.9994449019432068,grad_norm: 0.9999991366429594, iteration: 41679
loss: 0.9881894588470459,grad_norm: 0.836182117266136, iteration: 41680
loss: 0.9980592727661133,grad_norm: 0.9999990371291215, iteration: 41681
loss: 1.0534833669662476,grad_norm: 0.9999991859254986, iteration: 41682
loss: 1.0078415870666504,grad_norm: 0.8471531421233661, iteration: 41683
loss: 1.023593544960022,grad_norm: 0.915206130735885, iteration: 41684
loss: 1.0161882638931274,grad_norm: 0.8735235853630594, iteration: 41685
loss: 0.9846131205558777,grad_norm: 0.9999990464024058, iteration: 41686
loss: 0.9917765259742737,grad_norm: 0.9738221455262308, iteration: 41687
loss: 1.0262434482574463,grad_norm: 0.9578013994793941, iteration: 41688
loss: 0.9887428283691406,grad_norm: 0.8602741097963185, iteration: 41689
loss: 1.0028080940246582,grad_norm: 0.9999990109575374, iteration: 41690
loss: 0.9991351366043091,grad_norm: 0.8938262997300284, iteration: 41691
loss: 1.001676321029663,grad_norm: 0.9999991954745762, iteration: 41692
loss: 1.042446255683899,grad_norm: 0.9999991134435656, iteration: 41693
loss: 1.0070269107818604,grad_norm: 0.9034631607569508, iteration: 41694
loss: 1.0101447105407715,grad_norm: 0.9999989863558063, iteration: 41695
loss: 0.9890685081481934,grad_norm: 0.9661075911534482, iteration: 41696
loss: 1.013216257095337,grad_norm: 0.8416877362984244, iteration: 41697
loss: 1.039402961730957,grad_norm: 0.8593895659610892, iteration: 41698
loss: 1.0285828113555908,grad_norm: 0.9999989835762337, iteration: 41699
loss: 1.00331449508667,grad_norm: 0.8369720062215973, iteration: 41700
loss: 1.0318686962127686,grad_norm: 0.9383329676768294, iteration: 41701
loss: 1.014588475227356,grad_norm: 0.9387356053061611, iteration: 41702
loss: 1.012459635734558,grad_norm: 0.9834228070576607, iteration: 41703
loss: 0.9787402153015137,grad_norm: 0.9047615561460859, iteration: 41704
loss: 1.024903655052185,grad_norm: 0.9999990500019149, iteration: 41705
loss: 0.9817367792129517,grad_norm: 0.841504821778301, iteration: 41706
loss: 1.0248093605041504,grad_norm: 0.9961617268950008, iteration: 41707
loss: 0.9754666686058044,grad_norm: 0.9535632612103628, iteration: 41708
loss: 0.9827954173088074,grad_norm: 0.8703875733436862, iteration: 41709
loss: 1.0277513265609741,grad_norm: 0.9999991981593375, iteration: 41710
loss: 0.9740776419639587,grad_norm: 0.9999990474343884, iteration: 41711
loss: 0.986703634262085,grad_norm: 0.9418979103516256, iteration: 41712
loss: 1.0056225061416626,grad_norm: 0.7547637476031543, iteration: 41713
loss: 0.9850450158119202,grad_norm: 0.9498915330351462, iteration: 41714
loss: 0.9932954907417297,grad_norm: 0.9432062211425907, iteration: 41715
loss: 1.0031583309173584,grad_norm: 0.9995154366555776, iteration: 41716
loss: 1.0011423826217651,grad_norm: 0.9047983181739493, iteration: 41717
loss: 1.0152689218521118,grad_norm: 0.9999989886487342, iteration: 41718
loss: 1.0234841108322144,grad_norm: 0.93879327131546, iteration: 41719
loss: 0.9989026784896851,grad_norm: 0.9621634512287036, iteration: 41720
loss: 0.9860839247703552,grad_norm: 0.9999990292443889, iteration: 41721
loss: 0.9706276655197144,grad_norm: 0.8306129287917086, iteration: 41722
loss: 0.939831554889679,grad_norm: 0.9892641085537394, iteration: 41723
loss: 1.0089398622512817,grad_norm: 0.9999990027494421, iteration: 41724
loss: 1.016829013824463,grad_norm: 0.999999180150851, iteration: 41725
loss: 0.9890788793563843,grad_norm: 0.9772661991020846, iteration: 41726
loss: 1.0255376100540161,grad_norm: 0.9757584788098679, iteration: 41727
loss: 0.9667497277259827,grad_norm: 0.802395826240617, iteration: 41728
loss: 1.0099886655807495,grad_norm: 0.8341823340816439, iteration: 41729
loss: 1.032500982284546,grad_norm: 0.9999990878796584, iteration: 41730
loss: 0.9859760999679565,grad_norm: 0.9381409148322072, iteration: 41731
loss: 1.0005886554718018,grad_norm: 0.8745643317499396, iteration: 41732
loss: 1.0076593160629272,grad_norm: 0.8320284377801395, iteration: 41733
loss: 0.9711556434631348,grad_norm: 0.8312111644239758, iteration: 41734
loss: 0.9830570816993713,grad_norm: 0.900090304950251, iteration: 41735
loss: 1.0379722118377686,grad_norm: 0.9721486842183976, iteration: 41736
loss: 1.019857406616211,grad_norm: 0.9999991401371007, iteration: 41737
loss: 0.9952789545059204,grad_norm: 0.9480534844715659, iteration: 41738
loss: 1.0354621410369873,grad_norm: 0.8894971641769495, iteration: 41739
loss: 0.9942241311073303,grad_norm: 0.8250235170564975, iteration: 41740
loss: 1.003158688545227,grad_norm: 0.9918225609820317, iteration: 41741
loss: 1.0042518377304077,grad_norm: 0.9999989442204508, iteration: 41742
loss: 0.9584422707557678,grad_norm: 0.9454861063849415, iteration: 41743
loss: 1.0337574481964111,grad_norm: 0.9999991005015766, iteration: 41744
loss: 1.0539647340774536,grad_norm: 0.9999989622410222, iteration: 41745
loss: 0.9870564937591553,grad_norm: 0.9832280812338766, iteration: 41746
loss: 1.0741641521453857,grad_norm: 0.9999991568160032, iteration: 41747
loss: 1.0101361274719238,grad_norm: 0.9405290856393992, iteration: 41748
loss: 0.9984939098358154,grad_norm: 0.8312640327664729, iteration: 41749
loss: 0.9909120202064514,grad_norm: 0.9999990238846721, iteration: 41750
loss: 1.0004709959030151,grad_norm: 0.9271093705282745, iteration: 41751
loss: 1.022787094116211,grad_norm: 0.9999990381465224, iteration: 41752
loss: 1.0053248405456543,grad_norm: 0.8677196687524386, iteration: 41753
loss: 1.000184416770935,grad_norm: 0.9999997808822532, iteration: 41754
loss: 1.0013198852539062,grad_norm: 0.9999991565156828, iteration: 41755
loss: 0.9970697164535522,grad_norm: 0.9999991865197765, iteration: 41756
loss: 1.017835021018982,grad_norm: 0.9999989021302091, iteration: 41757
loss: 0.9987962245941162,grad_norm: 0.9480684874958736, iteration: 41758
loss: 1.0237772464752197,grad_norm: 0.8894543178586689, iteration: 41759
loss: 1.0146665573120117,grad_norm: 0.9408864161542554, iteration: 41760
loss: 0.9940915703773499,grad_norm: 0.8330548433743762, iteration: 41761
loss: 1.0127272605895996,grad_norm: 0.9633643492482166, iteration: 41762
loss: 0.9782854318618774,grad_norm: 0.9170140291334121, iteration: 41763
loss: 0.9793426394462585,grad_norm: 0.9079067641748287, iteration: 41764
loss: 1.0386430025100708,grad_norm: 0.9999990283746419, iteration: 41765
loss: 0.9750643372535706,grad_norm: 0.9243814822374903, iteration: 41766
loss: 1.0292223691940308,grad_norm: 0.8961104625508145, iteration: 41767
loss: 1.017429232597351,grad_norm: 0.9999990475587787, iteration: 41768
loss: 1.032357096672058,grad_norm: 0.9999990936791785, iteration: 41769
loss: 1.0267189741134644,grad_norm: 0.8693474702427603, iteration: 41770
loss: 0.9977148175239563,grad_norm: 0.9110431521423443, iteration: 41771
loss: 0.9909189343452454,grad_norm: 0.8949274041989183, iteration: 41772
loss: 1.0055170059204102,grad_norm: 0.9700852694267513, iteration: 41773
loss: 0.9894092082977295,grad_norm: 0.9999992803259294, iteration: 41774
loss: 0.9871643781661987,grad_norm: 0.9999991251680976, iteration: 41775
loss: 1.0308290719985962,grad_norm: 0.9098359169200941, iteration: 41776
loss: 1.0350114107131958,grad_norm: 0.9999993206049125, iteration: 41777
loss: 0.9844804406166077,grad_norm: 0.8744291442480407, iteration: 41778
loss: 0.9776962995529175,grad_norm: 0.9877995157594882, iteration: 41779
loss: 0.9621469378471375,grad_norm: 0.9596673263683501, iteration: 41780
loss: 0.9940140247344971,grad_norm: 0.9999992261999376, iteration: 41781
loss: 0.9985336661338806,grad_norm: 0.9793348808288522, iteration: 41782
loss: 1.014816403388977,grad_norm: 0.9999990362186734, iteration: 41783
loss: 0.9876437783241272,grad_norm: 0.9296528185973739, iteration: 41784
loss: 0.9755878448486328,grad_norm: 0.8422012752100226, iteration: 41785
loss: 1.004357099533081,grad_norm: 0.9999991972021127, iteration: 41786
loss: 0.9605959057807922,grad_norm: 0.9409308952635154, iteration: 41787
loss: 1.0094672441482544,grad_norm: 0.9659983853151853, iteration: 41788
loss: 1.016188383102417,grad_norm: 0.8616568995242587, iteration: 41789
loss: 1.0134272575378418,grad_norm: 0.9999991676851984, iteration: 41790
loss: 1.0141997337341309,grad_norm: 0.9999991198878656, iteration: 41791
loss: 0.9602487087249756,grad_norm: 0.8887792452498559, iteration: 41792
loss: 1.0354042053222656,grad_norm: 0.9262093811139114, iteration: 41793
loss: 1.0161212682724,grad_norm: 0.9999993195255117, iteration: 41794
loss: 1.0008596181869507,grad_norm: 0.9312026476902295, iteration: 41795
loss: 0.9867057204246521,grad_norm: 0.9999993631237601, iteration: 41796
loss: 0.998777449131012,grad_norm: 0.9144912553355061, iteration: 41797
loss: 1.036458969116211,grad_norm: 0.939845866852495, iteration: 41798
loss: 0.9951947331428528,grad_norm: 0.9649420785196778, iteration: 41799
loss: 0.9811290502548218,grad_norm: 0.9947402396249516, iteration: 41800
loss: 1.0361552238464355,grad_norm: 0.9596884839145496, iteration: 41801
loss: 0.9908460378646851,grad_norm: 0.981744998216979, iteration: 41802
loss: 1.0085608959197998,grad_norm: 0.9427327087186169, iteration: 41803
loss: 0.9725376963615417,grad_norm: 0.9999992940159566, iteration: 41804
loss: 1.0069572925567627,grad_norm: 0.9440929400148318, iteration: 41805
loss: 1.028547763824463,grad_norm: 0.9999999509889446, iteration: 41806
loss: 1.0228710174560547,grad_norm: 0.9999990605222577, iteration: 41807
loss: 1.008313775062561,grad_norm: 0.9493411018574267, iteration: 41808
loss: 1.038857102394104,grad_norm: 0.9999994222706277, iteration: 41809
loss: 1.0168381929397583,grad_norm: 0.9599765475017181, iteration: 41810
loss: 1.0078743696212769,grad_norm: 0.924834812298206, iteration: 41811
loss: 0.9892498254776001,grad_norm: 0.873179461955844, iteration: 41812
loss: 0.9884412288665771,grad_norm: 0.9999992147741503, iteration: 41813
loss: 1.0172792673110962,grad_norm: 0.9996352840455357, iteration: 41814
loss: 1.006725549697876,grad_norm: 0.9999991161715318, iteration: 41815
loss: 1.0102355480194092,grad_norm: 0.9999990422151205, iteration: 41816
loss: 1.0186721086502075,grad_norm: 0.9999990423995434, iteration: 41817
loss: 0.9738025665283203,grad_norm: 0.886037199849868, iteration: 41818
loss: 1.0109632015228271,grad_norm: 0.9999991319114293, iteration: 41819
loss: 1.0027896165847778,grad_norm: 0.9999991090543329, iteration: 41820
loss: 1.0188548564910889,grad_norm: 0.9696626125609408, iteration: 41821
loss: 1.0461939573287964,grad_norm: 0.9999991656912273, iteration: 41822
loss: 0.9934629797935486,grad_norm: 0.7932676528477607, iteration: 41823
loss: 1.0024549961090088,grad_norm: 0.9999989917464928, iteration: 41824
loss: 1.0154792070388794,grad_norm: 0.8376840238575115, iteration: 41825
loss: 0.9852744936943054,grad_norm: 0.8629169213005843, iteration: 41826
loss: 0.9726831316947937,grad_norm: 0.8695067304050855, iteration: 41827
loss: 0.9729490280151367,grad_norm: 0.8018808953666723, iteration: 41828
loss: 1.0211158990859985,grad_norm: 0.9679622090637698, iteration: 41829
loss: 1.0067373514175415,grad_norm: 0.9348411445916249, iteration: 41830
loss: 1.007673978805542,grad_norm: 0.9318825948172488, iteration: 41831
loss: 0.9947496652603149,grad_norm: 0.9049260322493837, iteration: 41832
loss: 1.0379658937454224,grad_norm: 0.9999990203466095, iteration: 41833
loss: 0.9600704312324524,grad_norm: 0.9635046361020104, iteration: 41834
loss: 1.0272212028503418,grad_norm: 0.9999990957213722, iteration: 41835
loss: 1.0306874513626099,grad_norm: 0.9999991978001828, iteration: 41836
loss: 1.0428388118743896,grad_norm: 0.9999991236778506, iteration: 41837
loss: 0.9896230101585388,grad_norm: 0.9066012894675246, iteration: 41838
loss: 1.0272024869918823,grad_norm: 0.9999992038344511, iteration: 41839
loss: 0.949100911617279,grad_norm: 0.9219024044782339, iteration: 41840
loss: 1.0086241960525513,grad_norm: 0.9232788543560799, iteration: 41841
loss: 0.996570348739624,grad_norm: 0.9999992471998741, iteration: 41842
loss: 0.9761090874671936,grad_norm: 0.9999992177771598, iteration: 41843
loss: 1.0092934370040894,grad_norm: 0.9355884206741456, iteration: 41844
loss: 1.0054631233215332,grad_norm: 0.8138522000220706, iteration: 41845
loss: 1.019989013671875,grad_norm: 0.9015375291806439, iteration: 41846
loss: 0.9703560471534729,grad_norm: 0.99999919493841, iteration: 41847
loss: 1.0215628147125244,grad_norm: 0.928579570579313, iteration: 41848
loss: 0.9999321699142456,grad_norm: 0.9999991123131947, iteration: 41849
loss: 1.0191389322280884,grad_norm: 0.9999992179760857, iteration: 41850
loss: 1.0337706804275513,grad_norm: 0.9999991418500486, iteration: 41851
loss: 1.0054782629013062,grad_norm: 0.9999989920848459, iteration: 41852
loss: 1.0163284540176392,grad_norm: 0.9537921887764732, iteration: 41853
loss: 0.988353431224823,grad_norm: 0.970942047472591, iteration: 41854
loss: 0.9907968044281006,grad_norm: 0.9999992015006851, iteration: 41855
loss: 0.9841133952140808,grad_norm: 0.9422692193247366, iteration: 41856
loss: 1.0258492231369019,grad_norm: 0.7893544659146774, iteration: 41857
loss: 0.9947001934051514,grad_norm: 0.8997007692658874, iteration: 41858
loss: 1.0406978130340576,grad_norm: 0.9362687523379373, iteration: 41859
loss: 0.9936575889587402,grad_norm: 0.9532239052154183, iteration: 41860
loss: 1.0263245105743408,grad_norm: 0.8036896092282168, iteration: 41861
loss: 0.9637079834938049,grad_norm: 0.9999991456589804, iteration: 41862
loss: 0.9760351181030273,grad_norm: 0.9999990102958953, iteration: 41863
loss: 0.9524554014205933,grad_norm: 0.9999992981024173, iteration: 41864
loss: 0.996359646320343,grad_norm: 0.9999992429836103, iteration: 41865
loss: 0.9893286824226379,grad_norm: 0.9678996122578035, iteration: 41866
loss: 1.025870680809021,grad_norm: 0.8332049694445521, iteration: 41867
loss: 0.97129225730896,grad_norm: 0.9999991595187693, iteration: 41868
loss: 0.9929799437522888,grad_norm: 0.9999991780733009, iteration: 41869
loss: 1.0084222555160522,grad_norm: 0.8139593092622358, iteration: 41870
loss: 1.0143964290618896,grad_norm: 0.9999993123761853, iteration: 41871
loss: 0.992688775062561,grad_norm: 0.9999991972967227, iteration: 41872
loss: 1.027246117591858,grad_norm: 0.9999991865131962, iteration: 41873
loss: 1.0072985887527466,grad_norm: 0.8534030697110034, iteration: 41874
loss: 1.0156208276748657,grad_norm: 0.8728370168547049, iteration: 41875
loss: 1.0100382566452026,grad_norm: 0.9852585908872361, iteration: 41876
loss: 0.9869950413703918,grad_norm: 0.9930979380581781, iteration: 41877
loss: 0.9764619469642639,grad_norm: 0.8894824715651761, iteration: 41878
loss: 0.9951030015945435,grad_norm: 0.9999991208546144, iteration: 41879
loss: 0.9903755784034729,grad_norm: 0.9999992257250664, iteration: 41880
loss: 0.9396103024482727,grad_norm: 0.911516251312316, iteration: 41881
loss: 1.0137475728988647,grad_norm: 0.9999991556348784, iteration: 41882
loss: 0.9908875823020935,grad_norm: 0.99999913565639, iteration: 41883
loss: 0.969517707824707,grad_norm: 0.9135632804535373, iteration: 41884
loss: 0.9801910519599915,grad_norm: 0.9999990201025908, iteration: 41885
loss: 0.9564468860626221,grad_norm: 0.9999991350386326, iteration: 41886
loss: 1.0031262636184692,grad_norm: 0.8567805440340373, iteration: 41887
loss: 0.9737223386764526,grad_norm: 0.999999065554302, iteration: 41888
loss: 1.0556145906448364,grad_norm: 0.999999137445035, iteration: 41889
loss: 1.0269746780395508,grad_norm: 0.7480513450800338, iteration: 41890
loss: 1.0125771760940552,grad_norm: 0.8947215829152707, iteration: 41891
loss: 1.0185210704803467,grad_norm: 0.7834851886202017, iteration: 41892
loss: 0.9867637157440186,grad_norm: 0.9016159487902459, iteration: 41893
loss: 1.001267433166504,grad_norm: 0.8550853479766305, iteration: 41894
loss: 1.0010457038879395,grad_norm: 0.9999991684785325, iteration: 41895
loss: 0.9749419093132019,grad_norm: 0.9537965014834019, iteration: 41896
loss: 1.0024131536483765,grad_norm: 0.9432043123175912, iteration: 41897
loss: 1.002894639968872,grad_norm: 0.999999230066254, iteration: 41898
loss: 1.0307892560958862,grad_norm: 0.9834030139551736, iteration: 41899
loss: 1.0035969018936157,grad_norm: 0.8917363914023311, iteration: 41900
loss: 0.9580124616622925,grad_norm: 0.9599882990044522, iteration: 41901
loss: 0.9971420168876648,grad_norm: 0.9999993823177454, iteration: 41902
loss: 1.000909686088562,grad_norm: 0.8993114492828828, iteration: 41903
loss: 0.9883332252502441,grad_norm: 0.9999992754826279, iteration: 41904
loss: 1.0024781227111816,grad_norm: 0.9999992085368061, iteration: 41905
loss: 1.0131970643997192,grad_norm: 0.9999990705597029, iteration: 41906
loss: 1.00126051902771,grad_norm: 0.9262914428476994, iteration: 41907
loss: 1.0246107578277588,grad_norm: 0.9999990823863638, iteration: 41908
loss: 1.0011849403381348,grad_norm: 0.9999991660040253, iteration: 41909
loss: 0.982214093208313,grad_norm: 0.9999992515226802, iteration: 41910
loss: 1.0339239835739136,grad_norm: 0.9999990733584309, iteration: 41911
loss: 1.0187780857086182,grad_norm: 0.8706129964932436, iteration: 41912
loss: 0.9930999279022217,grad_norm: 0.9999991141194782, iteration: 41913
loss: 0.9914386868476868,grad_norm: 0.9999989013195407, iteration: 41914
loss: 0.9841563105583191,grad_norm: 0.9750262691047626, iteration: 41915
loss: 1.009321689605713,grad_norm: 0.999999144818766, iteration: 41916
loss: 1.0352516174316406,grad_norm: 0.9999990512927213, iteration: 41917
loss: 1.0401932001113892,grad_norm: 0.9999990985768734, iteration: 41918
loss: 0.9935349822044373,grad_norm: 0.916258294854561, iteration: 41919
loss: 1.02228844165802,grad_norm: 0.9999990957350556, iteration: 41920
loss: 1.0046640634536743,grad_norm: 0.9498510164994707, iteration: 41921
loss: 1.0096931457519531,grad_norm: 0.9999990420541718, iteration: 41922
loss: 1.0027180910110474,grad_norm: 0.9409080991129964, iteration: 41923
loss: 1.0120160579681396,grad_norm: 0.9999992235633351, iteration: 41924
loss: 0.9917560815811157,grad_norm: 0.8864152023146881, iteration: 41925
loss: 0.9954042434692383,grad_norm: 0.936211429246252, iteration: 41926
loss: 1.0281447172164917,grad_norm: 0.9999990929644481, iteration: 41927
loss: 1.0169363021850586,grad_norm: 0.9999991673184879, iteration: 41928
loss: 1.0059537887573242,grad_norm: 0.9820268639016443, iteration: 41929
loss: 0.9709640741348267,grad_norm: 0.9999992495815672, iteration: 41930
loss: 1.023774266242981,grad_norm: 0.9999989489185886, iteration: 41931
loss: 1.0196127891540527,grad_norm: 0.9790563733462205, iteration: 41932
loss: 0.992644727230072,grad_norm: 0.9438019316839945, iteration: 41933
loss: 1.02655827999115,grad_norm: 0.999999703034907, iteration: 41934
loss: 1.063914179801941,grad_norm: 0.9999995157362321, iteration: 41935
loss: 1.0040571689605713,grad_norm: 0.9999990162491712, iteration: 41936
loss: 1.0384546518325806,grad_norm: 0.999999339293046, iteration: 41937
loss: 0.9560644030570984,grad_norm: 0.88073688305646, iteration: 41938
loss: 1.027590036392212,grad_norm: 0.9999991306816298, iteration: 41939
loss: 0.9985170364379883,grad_norm: 0.9887903227377889, iteration: 41940
loss: 1.0077762603759766,grad_norm: 0.9420963321413643, iteration: 41941
loss: 1.0172823667526245,grad_norm: 0.9999990033140537, iteration: 41942
loss: 0.9915874600410461,grad_norm: 0.78646138864484, iteration: 41943
loss: 1.0169481039047241,grad_norm: 0.9934215594685484, iteration: 41944
loss: 0.9824426174163818,grad_norm: 0.9999990604902822, iteration: 41945
loss: 1.0451701879501343,grad_norm: 0.999999522205017, iteration: 41946
loss: 0.9789922833442688,grad_norm: 0.9383769732101952, iteration: 41947
loss: 1.0074518918991089,grad_norm: 0.8861650976536479, iteration: 41948
loss: 0.9815102815628052,grad_norm: 0.9999992187471813, iteration: 41949
loss: 1.0177545547485352,grad_norm: 0.9999989946131266, iteration: 41950
loss: 0.997046947479248,grad_norm: 0.9423188096259365, iteration: 41951
loss: 0.9921201467514038,grad_norm: 0.8927716546783172, iteration: 41952
loss: 1.037663459777832,grad_norm: 0.9999992935352917, iteration: 41953
loss: 1.0207514762878418,grad_norm: 0.9019141450123623, iteration: 41954
loss: 0.9776168465614319,grad_norm: 0.9968739608440076, iteration: 41955
loss: 0.9720442891120911,grad_norm: 0.9412311035336092, iteration: 41956
loss: 1.0053675174713135,grad_norm: 0.9999991712404709, iteration: 41957
loss: 1.021709680557251,grad_norm: 0.9560935578583755, iteration: 41958
loss: 1.020687222480774,grad_norm: 0.999999055970582, iteration: 41959
loss: 0.9999926090240479,grad_norm: 0.9754281540766412, iteration: 41960
loss: 0.986890435218811,grad_norm: 0.9999991598727603, iteration: 41961
loss: 1.0028116703033447,grad_norm: 0.9999989995524898, iteration: 41962
loss: 1.0156737565994263,grad_norm: 0.9999990980220385, iteration: 41963
loss: 1.010932445526123,grad_norm: 0.9999990416247772, iteration: 41964
loss: 0.9932279586791992,grad_norm: 0.9278337367779522, iteration: 41965
loss: 1.0095105171203613,grad_norm: 0.9999991252938488, iteration: 41966
loss: 1.021752953529358,grad_norm: 0.9429514662856879, iteration: 41967
loss: 0.9528120160102844,grad_norm: 0.9846669691011601, iteration: 41968
loss: 1.1000032424926758,grad_norm: 0.9999995605395519, iteration: 41969
loss: 1.0488263368606567,grad_norm: 0.9999997889972054, iteration: 41970
loss: 1.001084327697754,grad_norm: 0.9999989969038398, iteration: 41971
loss: 1.029159426689148,grad_norm: 0.9999991871713376, iteration: 41972
loss: 1.012065052986145,grad_norm: 0.994233109441529, iteration: 41973
loss: 0.9472824931144714,grad_norm: 0.9706273417533852, iteration: 41974
loss: 1.0055009126663208,grad_norm: 0.8440300853009401, iteration: 41975
loss: 1.0514564514160156,grad_norm: 0.9999990273403195, iteration: 41976
loss: 0.9437503218650818,grad_norm: 0.8510311900050468, iteration: 41977
loss: 1.0299872159957886,grad_norm: 0.8648920155399408, iteration: 41978
loss: 1.018060326576233,grad_norm: 0.9999992563554515, iteration: 41979
loss: 1.011644959449768,grad_norm: 0.9999991072395352, iteration: 41980
loss: 1.0001729726791382,grad_norm: 0.755920618776354, iteration: 41981
loss: 0.9864664077758789,grad_norm: 0.966486672098623, iteration: 41982
loss: 0.9963895082473755,grad_norm: 0.8769613021711342, iteration: 41983
loss: 1.0013092756271362,grad_norm: 0.9999991491043614, iteration: 41984
loss: 1.0235432386398315,grad_norm: 0.9999989710581293, iteration: 41985
loss: 1.0049569606781006,grad_norm: 0.9999992095195424, iteration: 41986
loss: 1.006834864616394,grad_norm: 0.9539585074764891, iteration: 41987
loss: 0.9986657500267029,grad_norm: 0.8747241658097386, iteration: 41988
loss: 0.9653652310371399,grad_norm: 0.9999992840638392, iteration: 41989
loss: 1.0043110847473145,grad_norm: 0.9999990885245503, iteration: 41990
loss: 1.024106502532959,grad_norm: 0.9643705278814647, iteration: 41991
loss: 1.0009859800338745,grad_norm: 0.9999990875817046, iteration: 41992
loss: 0.9701852798461914,grad_norm: 0.865375502448713, iteration: 41993
loss: 0.9626070261001587,grad_norm: 0.9801703383879342, iteration: 41994
loss: 1.0854015350341797,grad_norm: 0.9999990867455844, iteration: 41995
loss: 0.9774056077003479,grad_norm: 0.960388131425474, iteration: 41996
loss: 0.9849446415901184,grad_norm: 0.9999989633259725, iteration: 41997
loss: 0.9983882308006287,grad_norm: 0.9999991433038828, iteration: 41998
loss: 0.9902659058570862,grad_norm: 0.9999991096328601, iteration: 41999
loss: 1.022710919380188,grad_norm: 0.9990729824393659, iteration: 42000
loss: 1.0191570520401,grad_norm: 0.9999991435565019, iteration: 42001
loss: 1.0272444486618042,grad_norm: 0.9999995174226439, iteration: 42002
loss: 1.0765434503555298,grad_norm: 0.9999994804541308, iteration: 42003
loss: 0.9956099987030029,grad_norm: 0.9999990910953913, iteration: 42004
loss: 0.9727436304092407,grad_norm: 0.9999990685968531, iteration: 42005
loss: 1.0124038457870483,grad_norm: 0.8688057267489406, iteration: 42006
loss: 1.037474274635315,grad_norm: 0.9999993015205285, iteration: 42007
loss: 0.9650182127952576,grad_norm: 0.9150528690619382, iteration: 42008
loss: 1.0479772090911865,grad_norm: 0.9709788783737476, iteration: 42009
loss: 0.9984315633773804,grad_norm: 0.9999991107533012, iteration: 42010
loss: 0.979884147644043,grad_norm: 0.9999991120788299, iteration: 42011
loss: 1.0225480794906616,grad_norm: 0.9999991828690942, iteration: 42012
loss: 1.0197243690490723,grad_norm: 0.9999991178997003, iteration: 42013
loss: 1.0134365558624268,grad_norm: 0.9999992676253503, iteration: 42014
loss: 1.0170775651931763,grad_norm: 0.8698942721033502, iteration: 42015
loss: 1.0182124376296997,grad_norm: 0.9999990410752314, iteration: 42016
loss: 1.0167042016983032,grad_norm: 0.999999227344006, iteration: 42017
loss: 0.9913159608840942,grad_norm: 0.9999994928028348, iteration: 42018
loss: 1.0039558410644531,grad_norm: 0.8939103929748573, iteration: 42019
loss: 0.984232485294342,grad_norm: 0.9999991507574311, iteration: 42020
loss: 1.0167045593261719,grad_norm: 0.9999991934384983, iteration: 42021
loss: 1.023033618927002,grad_norm: 0.9999994221176911, iteration: 42022
loss: 1.0323079824447632,grad_norm: 0.9999991489884162, iteration: 42023
loss: 0.9825385808944702,grad_norm: 0.9999990066386116, iteration: 42024
loss: 1.0248466730117798,grad_norm: 0.9184889551513059, iteration: 42025
loss: 1.055952548980713,grad_norm: 0.9999991688095985, iteration: 42026
loss: 1.0075923204421997,grad_norm: 0.9999995582216417, iteration: 42027
loss: 0.9940822720527649,grad_norm: 0.9999992847201662, iteration: 42028
loss: 1.0822056531906128,grad_norm: 0.9999997703381551, iteration: 42029
loss: 1.0001471042633057,grad_norm: 0.9999991987280415, iteration: 42030
loss: 0.9901434183120728,grad_norm: 0.9212488178053048, iteration: 42031
loss: 0.9833899140357971,grad_norm: 0.9821424821651641, iteration: 42032
loss: 1.0161712169647217,grad_norm: 0.9999996443286965, iteration: 42033
loss: 0.9929050207138062,grad_norm: 0.9999990646706414, iteration: 42034
loss: 1.0228530168533325,grad_norm: 0.9999990989782255, iteration: 42035
loss: 0.9745017290115356,grad_norm: 0.9804758516351058, iteration: 42036
loss: 0.975392758846283,grad_norm: 0.9999990814754822, iteration: 42037
loss: 0.9818591475486755,grad_norm: 0.9999990225517815, iteration: 42038
loss: 0.9768715500831604,grad_norm: 0.9275919784395622, iteration: 42039
loss: 1.0317342281341553,grad_norm: 0.9999990312947629, iteration: 42040
loss: 1.0093967914581299,grad_norm: 0.9464078664875709, iteration: 42041
loss: 0.9917641282081604,grad_norm: 0.9025261397315273, iteration: 42042
loss: 0.9919983744621277,grad_norm: 0.9999991832364293, iteration: 42043
loss: 1.0081629753112793,grad_norm: 0.9999991145836264, iteration: 42044
loss: 1.043644905090332,grad_norm: 0.9999989559184764, iteration: 42045
loss: 0.9706425666809082,grad_norm: 0.9999990791491925, iteration: 42046
loss: 1.0320875644683838,grad_norm: 0.9017411926980939, iteration: 42047
loss: 0.9999548196792603,grad_norm: 0.9999992999735801, iteration: 42048
loss: 0.9936691522598267,grad_norm: 0.9146440761470738, iteration: 42049
loss: 0.9900156259536743,grad_norm: 0.9999991552841778, iteration: 42050
loss: 1.0270440578460693,grad_norm: 0.9999991109168909, iteration: 42051
loss: 1.044968843460083,grad_norm: 0.9999992221182297, iteration: 42052
loss: 0.9926537871360779,grad_norm: 0.9999993497953138, iteration: 42053
loss: 0.971642255783081,grad_norm: 0.9999991720790682, iteration: 42054
loss: 0.9870759844779968,grad_norm: 0.9999992216007588, iteration: 42055
loss: 1.0055071115493774,grad_norm: 0.8579163584282714, iteration: 42056
loss: 1.0391641855239868,grad_norm: 0.9999989771264675, iteration: 42057
loss: 1.0165940523147583,grad_norm: 0.7493644553072176, iteration: 42058
loss: 1.0139116048812866,grad_norm: 0.8934812644281093, iteration: 42059
loss: 0.9812341928482056,grad_norm: 0.9034555685638903, iteration: 42060
loss: 0.9973987936973572,grad_norm: 0.9999991709423213, iteration: 42061
loss: 1.020349383354187,grad_norm: 0.9999991272872819, iteration: 42062
loss: 0.9844005703926086,grad_norm: 0.9790604219975987, iteration: 42063
loss: 0.9732149839401245,grad_norm: 0.9999992295569498, iteration: 42064
loss: 0.9791491031646729,grad_norm: 0.9999992783708656, iteration: 42065
loss: 1.0023831129074097,grad_norm: 0.9057023207021836, iteration: 42066
loss: 0.9695445895195007,grad_norm: 0.9999996418431516, iteration: 42067
loss: 0.9923306107521057,grad_norm: 0.9523626727719371, iteration: 42068
loss: 0.9972037076950073,grad_norm: 0.9999992586612685, iteration: 42069
loss: 1.0243481397628784,grad_norm: 0.9999992728792299, iteration: 42070
loss: 1.0206475257873535,grad_norm: 0.9999992514928554, iteration: 42071
loss: 1.0480000972747803,grad_norm: 0.9999991129511325, iteration: 42072
loss: 0.9592583179473877,grad_norm: 0.9999989615732825, iteration: 42073
loss: 1.0307761430740356,grad_norm: 0.9999992430147236, iteration: 42074
loss: 0.9888343214988708,grad_norm: 0.9576448528528163, iteration: 42075
loss: 1.0284059047698975,grad_norm: 0.8450830039489272, iteration: 42076
loss: 0.9847996830940247,grad_norm: 0.9999991269656071, iteration: 42077
loss: 0.9918517470359802,grad_norm: 0.999999094184584, iteration: 42078
loss: 1.0282766819000244,grad_norm: 0.8554917184755716, iteration: 42079
loss: 0.9823392629623413,grad_norm: 0.9999997948433597, iteration: 42080
loss: 1.0343745946884155,grad_norm: 0.9999991019274163, iteration: 42081
loss: 1.0130318403244019,grad_norm: 0.9999990652523365, iteration: 42082
loss: 0.992598831653595,grad_norm: 0.9256161856805448, iteration: 42083
loss: 0.9925782084465027,grad_norm: 0.999999117368064, iteration: 42084
loss: 1.0324879884719849,grad_norm: 0.8766581488321378, iteration: 42085
loss: 0.9611430168151855,grad_norm: 0.9999989352819663, iteration: 42086
loss: 0.961912214756012,grad_norm: 0.9537380592617973, iteration: 42087
loss: 1.0178769826889038,grad_norm: 0.9280291447341664, iteration: 42088
loss: 0.9868447780609131,grad_norm: 0.9999991254049452, iteration: 42089
loss: 1.0162960290908813,grad_norm: 0.9999995135961499, iteration: 42090
loss: 1.0222946405410767,grad_norm: 0.9999989762454012, iteration: 42091
loss: 1.014514446258545,grad_norm: 0.9999991641339743, iteration: 42092
loss: 1.052311658859253,grad_norm: 0.9999994747863433, iteration: 42093
loss: 0.9582524299621582,grad_norm: 0.9999991965218877, iteration: 42094
loss: 1.0315361022949219,grad_norm: 0.9999989879108632, iteration: 42095
loss: 1.0132719278335571,grad_norm: 0.9470087946584971, iteration: 42096
loss: 0.9789965748786926,grad_norm: 0.8368549517867772, iteration: 42097
loss: 1.0430248975753784,grad_norm: 0.9999992940237801, iteration: 42098
loss: 0.9877578616142273,grad_norm: 0.8679225507822824, iteration: 42099
loss: 0.950989305973053,grad_norm: 0.9999989977057063, iteration: 42100
loss: 0.9813127517700195,grad_norm: 0.9434800112253179, iteration: 42101
loss: 1.0010724067687988,grad_norm: 0.859182887077962, iteration: 42102
loss: 1.0318248271942139,grad_norm: 0.9999991118814568, iteration: 42103
loss: 1.004453182220459,grad_norm: 0.9999993169664846, iteration: 42104
loss: 1.0041983127593994,grad_norm: 0.9878433469389033, iteration: 42105
loss: 1.0179609060287476,grad_norm: 0.9999990405664392, iteration: 42106
loss: 0.977511465549469,grad_norm: 0.9999992033261966, iteration: 42107
loss: 0.9968796372413635,grad_norm: 0.9999995635475245, iteration: 42108
loss: 1.0231704711914062,grad_norm: 0.9897643510491533, iteration: 42109
loss: 1.0225393772125244,grad_norm: 0.913615329796401, iteration: 42110
loss: 1.0028589963912964,grad_norm: 0.9354199227326385, iteration: 42111
loss: 1.042170763015747,grad_norm: 0.9999990911681662, iteration: 42112
loss: 0.9796693325042725,grad_norm: 0.9241550355923928, iteration: 42113
loss: 1.0429874658584595,grad_norm: 0.9999999340196101, iteration: 42114
loss: 0.9995835423469543,grad_norm: 0.9999992314552338, iteration: 42115
loss: 1.0337508916854858,grad_norm: 0.7869507575613794, iteration: 42116
loss: 1.0282570123672485,grad_norm: 0.9999992070296443, iteration: 42117
loss: 0.9805353879928589,grad_norm: 0.853466299630025, iteration: 42118
loss: 1.0130313634872437,grad_norm: 0.9021815367504658, iteration: 42119
loss: 0.9915659427642822,grad_norm: 0.9999991214457314, iteration: 42120
loss: 0.9969266653060913,grad_norm: 0.865521713306193, iteration: 42121
loss: 0.9942898154258728,grad_norm: 0.9905913163672125, iteration: 42122
loss: 1.0157676935195923,grad_norm: 0.9999992637032128, iteration: 42123
loss: 0.9703810214996338,grad_norm: 0.9933112689296058, iteration: 42124
loss: 0.998824417591095,grad_norm: 0.9307663549432039, iteration: 42125
loss: 1.045150876045227,grad_norm: 0.9999990702140349, iteration: 42126
loss: 1.0042814016342163,grad_norm: 0.9251680627794673, iteration: 42127
loss: 1.004617691040039,grad_norm: 0.9999991790707797, iteration: 42128
loss: 1.008646845817566,grad_norm: 0.9752620336824215, iteration: 42129
loss: 1.0238919258117676,grad_norm: 0.9823355342624975, iteration: 42130
loss: 1.0146305561065674,grad_norm: 0.942487405241536, iteration: 42131
loss: 1.0186692476272583,grad_norm: 0.9999990983736564, iteration: 42132
loss: 1.055842638015747,grad_norm: 0.9999998514513385, iteration: 42133
loss: 1.0016299486160278,grad_norm: 0.9999992868382396, iteration: 42134
loss: 1.0335859060287476,grad_norm: 0.9279689493979116, iteration: 42135
loss: 1.0110729932785034,grad_norm: 0.9246236893449072, iteration: 42136
loss: 0.9914615154266357,grad_norm: 0.9252236443160607, iteration: 42137
loss: 1.0400224924087524,grad_norm: 0.9817150514817565, iteration: 42138
loss: 1.0898058414459229,grad_norm: 0.999999126989432, iteration: 42139
loss: 0.995373010635376,grad_norm: 0.9888283969326261, iteration: 42140
loss: 1.0047649145126343,grad_norm: 0.9128319364966422, iteration: 42141
loss: 0.9727738499641418,grad_norm: 0.8871808461693932, iteration: 42142
loss: 1.0102022886276245,grad_norm: 0.8857328572208861, iteration: 42143
loss: 0.9979493021965027,grad_norm: 0.9851944956827444, iteration: 42144
loss: 0.9938997626304626,grad_norm: 0.892232572567083, iteration: 42145
loss: 0.9758530259132385,grad_norm: 0.9490029084875675, iteration: 42146
loss: 0.9929453730583191,grad_norm: 0.9999991226370635, iteration: 42147
loss: 1.0048470497131348,grad_norm: 0.9234724811485261, iteration: 42148
loss: 0.994968593120575,grad_norm: 0.8468079030192804, iteration: 42149
loss: 1.032651424407959,grad_norm: 0.921326203683846, iteration: 42150
loss: 1.0222796201705933,grad_norm: 0.9999992418750774, iteration: 42151
loss: 0.9854629635810852,grad_norm: 0.8012071386715514, iteration: 42152
loss: 0.9561006426811218,grad_norm: 0.9484006360197621, iteration: 42153
loss: 0.9876710772514343,grad_norm: 0.9999991687474372, iteration: 42154
loss: 1.0014760494232178,grad_norm: 0.9999992040155804, iteration: 42155
loss: 0.9437372088432312,grad_norm: 0.9530888397921138, iteration: 42156
loss: 1.0208429098129272,grad_norm: 0.8678218431821166, iteration: 42157
loss: 1.0098257064819336,grad_norm: 0.999999237608111, iteration: 42158
loss: 1.0288578271865845,grad_norm: 0.999999073482676, iteration: 42159
loss: 1.004520297050476,grad_norm: 0.999999253629149, iteration: 42160
loss: 1.003919005393982,grad_norm: 0.999999114956765, iteration: 42161
loss: 1.0612332820892334,grad_norm: 0.9999990887504796, iteration: 42162
loss: 1.019363284111023,grad_norm: 0.9642044235568769, iteration: 42163
loss: 1.0262000560760498,grad_norm: 0.9999990957857712, iteration: 42164
loss: 1.0121207237243652,grad_norm: 0.9999990830517438, iteration: 42165
loss: 1.010176420211792,grad_norm: 0.9999990658569284, iteration: 42166
loss: 0.9781582951545715,grad_norm: 0.9954195193897193, iteration: 42167
loss: 1.03476881980896,grad_norm: 0.9999990873700481, iteration: 42168
loss: 1.0152873992919922,grad_norm: 0.9516739922612902, iteration: 42169
loss: 1.0207158327102661,grad_norm: 0.9999999085654028, iteration: 42170
loss: 0.9973781704902649,grad_norm: 0.999999032377987, iteration: 42171
loss: 0.9873411655426025,grad_norm: 0.9999993699550025, iteration: 42172
loss: 1.0070900917053223,grad_norm: 0.8656385503045635, iteration: 42173
loss: 1.0107285976409912,grad_norm: 0.885750779047833, iteration: 42174
loss: 1.0047913789749146,grad_norm: 0.878514198259055, iteration: 42175
loss: 0.9974680542945862,grad_norm: 0.8957916273445052, iteration: 42176
loss: 1.04154634475708,grad_norm: 0.8589501128555714, iteration: 42177
loss: 0.9972277283668518,grad_norm: 0.9999990307753374, iteration: 42178
loss: 1.0068851709365845,grad_norm: 0.9999994961672032, iteration: 42179
loss: 1.1162023544311523,grad_norm: 0.99999943606496, iteration: 42180
loss: 1.0103721618652344,grad_norm: 0.97664897013597, iteration: 42181
loss: 1.0072695016860962,grad_norm: 0.9999998562354605, iteration: 42182
loss: 0.9977144002914429,grad_norm: 0.8547301102735267, iteration: 42183
loss: 1.0326471328735352,grad_norm: 0.9999990119854961, iteration: 42184
loss: 1.0033197402954102,grad_norm: 0.9311138096638354, iteration: 42185
loss: 0.985261857509613,grad_norm: 0.9999997649616938, iteration: 42186
loss: 1.0153093338012695,grad_norm: 0.9999989897218394, iteration: 42187
loss: 0.9925466179847717,grad_norm: 0.8815127885488163, iteration: 42188
loss: 0.9907212257385254,grad_norm: 0.9999992876923504, iteration: 42189
loss: 0.9887282252311707,grad_norm: 0.8929495097557779, iteration: 42190
loss: 1.0086489915847778,grad_norm: 0.9999992229828092, iteration: 42191
loss: 1.0329943895339966,grad_norm: 0.8682701388090599, iteration: 42192
loss: 1.0174853801727295,grad_norm: 0.9550039518388359, iteration: 42193
loss: 1.044981598854065,grad_norm: 0.968771028225686, iteration: 42194
loss: 1.0089914798736572,grad_norm: 0.9770268655776999, iteration: 42195
loss: 1.0306740999221802,grad_norm: 0.9999995978026003, iteration: 42196
loss: 0.9820046424865723,grad_norm: 0.9999991579320172, iteration: 42197
loss: 1.0225871801376343,grad_norm: 0.9999991573268171, iteration: 42198
loss: 1.0325011014938354,grad_norm: 0.9999991427512457, iteration: 42199
loss: 1.03900146484375,grad_norm: 0.9999995542004467, iteration: 42200
loss: 1.016621708869934,grad_norm: 0.999999174581526, iteration: 42201
loss: 0.9929841756820679,grad_norm: 0.9569230663214746, iteration: 42202
loss: 1.0198293924331665,grad_norm: 0.88312638193315, iteration: 42203
loss: 1.011899471282959,grad_norm: 0.999999060888225, iteration: 42204
loss: 0.9936420321464539,grad_norm: 0.9999991478452647, iteration: 42205
loss: 1.0171393156051636,grad_norm: 0.9999991336154455, iteration: 42206
loss: 1.0057806968688965,grad_norm: 0.9232593916807136, iteration: 42207
loss: 0.982243001461029,grad_norm: 0.9999991084647738, iteration: 42208
loss: 0.9662961959838867,grad_norm: 0.9999991891949919, iteration: 42209
loss: 1.001373529434204,grad_norm: 0.9999989835668789, iteration: 42210
loss: 1.0281400680541992,grad_norm: 0.8693436434037134, iteration: 42211
loss: 0.9954264760017395,grad_norm: 0.9520672555183585, iteration: 42212
loss: 0.9908324480056763,grad_norm: 0.9999991656950297, iteration: 42213
loss: 0.9918637871742249,grad_norm: 0.8901951739315048, iteration: 42214
loss: 0.9986482262611389,grad_norm: 0.9967518505053794, iteration: 42215
loss: 0.9961780309677124,grad_norm: 0.8948558228491397, iteration: 42216
loss: 1.0090872049331665,grad_norm: 0.9999992044545721, iteration: 42217
loss: 0.9911468625068665,grad_norm: 0.9825901306590453, iteration: 42218
loss: 1.032597303390503,grad_norm: 0.999999161509002, iteration: 42219
loss: 1.040713906288147,grad_norm: 0.9999990702284034, iteration: 42220
loss: 1.0237528085708618,grad_norm: 0.9999992126881201, iteration: 42221
loss: 1.06201171875,grad_norm: 0.9999990965009318, iteration: 42222
loss: 0.9989665150642395,grad_norm: 0.9594115980292279, iteration: 42223
loss: 1.0392142534255981,grad_norm: 0.9556551514016541, iteration: 42224
loss: 0.9725937843322754,grad_norm: 0.9999990916655782, iteration: 42225
loss: 1.0070382356643677,grad_norm: 0.8758015586208602, iteration: 42226
loss: 1.0110509395599365,grad_norm: 0.9878790172236039, iteration: 42227
loss: 1.0296709537506104,grad_norm: 0.999998916282651, iteration: 42228
loss: 0.9560416340827942,grad_norm: 0.9999991638902165, iteration: 42229
loss: 0.9887478947639465,grad_norm: 0.9936135126212721, iteration: 42230
loss: 0.9268832206726074,grad_norm: 0.9775821013782607, iteration: 42231
loss: 0.9763137102127075,grad_norm: 0.9354618230389952, iteration: 42232
loss: 1.0351340770721436,grad_norm: 0.882479416450164, iteration: 42233
loss: 1.0384957790374756,grad_norm: 0.9641819872916788, iteration: 42234
loss: 1.0523754358291626,grad_norm: 0.9999991674427968, iteration: 42235
loss: 0.9874320030212402,grad_norm: 0.9999991062013917, iteration: 42236
loss: 1.0023396015167236,grad_norm: 0.9999992878894712, iteration: 42237
loss: 0.9829792380332947,grad_norm: 0.9999992188431973, iteration: 42238
loss: 1.0041579008102417,grad_norm: 0.9701066411598701, iteration: 42239
loss: 1.0175472497940063,grad_norm: 0.9999994890255867, iteration: 42240
loss: 1.0386375188827515,grad_norm: 0.9999989935653963, iteration: 42241
loss: 1.0027885437011719,grad_norm: 0.9425598381944618, iteration: 42242
loss: 0.9800353646278381,grad_norm: 0.9552013703490942, iteration: 42243
loss: 1.0348221063613892,grad_norm: 0.9999992359153471, iteration: 42244
loss: 0.9582270979881287,grad_norm: 0.9999992064270122, iteration: 42245
loss: 1.0027118921279907,grad_norm: 0.867937724735809, iteration: 42246
loss: 1.0004866123199463,grad_norm: 0.9613750571698568, iteration: 42247
loss: 0.974603533744812,grad_norm: 0.8255825850772539, iteration: 42248
loss: 1.0051723718643188,grad_norm: 0.9999990283963436, iteration: 42249
loss: 1.0272152423858643,grad_norm: 0.8571946694721722, iteration: 42250
loss: 0.9896030426025391,grad_norm: 0.9172508786271373, iteration: 42251
loss: 1.0244274139404297,grad_norm: 0.8457867122300499, iteration: 42252
loss: 0.9869407415390015,grad_norm: 0.9572991665403116, iteration: 42253
loss: 1.0168604850769043,grad_norm: 0.9999990887493354, iteration: 42254
loss: 1.004084587097168,grad_norm: 0.9999993372363343, iteration: 42255
loss: 1.0023702383041382,grad_norm: 0.8671495030438461, iteration: 42256
loss: 1.036969780921936,grad_norm: 0.99999929155503, iteration: 42257
loss: 1.0019229650497437,grad_norm: 0.8355279091695951, iteration: 42258
loss: 1.0313669443130493,grad_norm: 0.762174156480897, iteration: 42259
loss: 0.989109992980957,grad_norm: 0.9054050915918461, iteration: 42260
loss: 0.9907742142677307,grad_norm: 0.9999990418302197, iteration: 42261
loss: 0.9658631682395935,grad_norm: 0.9839692143732504, iteration: 42262
loss: 0.987581729888916,grad_norm: 0.9999991366685366, iteration: 42263
loss: 0.9833294749259949,grad_norm: 0.9336893484902793, iteration: 42264
loss: 1.0185205936431885,grad_norm: 0.9999991695968377, iteration: 42265
loss: 1.016158938407898,grad_norm: 0.7760369215624466, iteration: 42266
loss: 1.0093326568603516,grad_norm: 0.9999991583756662, iteration: 42267
loss: 1.0234005451202393,grad_norm: 0.9999990906268291, iteration: 42268
loss: 1.0112601518630981,grad_norm: 0.885406094950154, iteration: 42269
loss: 1.0296696424484253,grad_norm: 0.9999997520210594, iteration: 42270
loss: 1.00753653049469,grad_norm: 0.9393422596631356, iteration: 42271
loss: 1.006749153137207,grad_norm: 0.7982712638223017, iteration: 42272
loss: 0.9875012636184692,grad_norm: 0.9158743309325889, iteration: 42273
loss: 0.9732789397239685,grad_norm: 0.9624666009962725, iteration: 42274
loss: 0.9808589220046997,grad_norm: 0.9999992062735952, iteration: 42275
loss: 0.9847592711448669,grad_norm: 0.908461409000168, iteration: 42276
loss: 0.9567087292671204,grad_norm: 0.9999993150465036, iteration: 42277
loss: 1.0029178857803345,grad_norm: 0.7778289277927477, iteration: 42278
loss: 1.0471566915512085,grad_norm: 0.9919077605697919, iteration: 42279
loss: 1.025240421295166,grad_norm: 0.999999328099734, iteration: 42280
loss: 1.004164457321167,grad_norm: 0.9999994753860613, iteration: 42281
loss: 0.9429392218589783,grad_norm: 0.806926265184452, iteration: 42282
loss: 1.0641471147537231,grad_norm: 0.9999992187842263, iteration: 42283
loss: 0.9990601539611816,grad_norm: 0.918556006242705, iteration: 42284
loss: 1.031120777130127,grad_norm: 0.904112277624854, iteration: 42285
loss: 1.0210211277008057,grad_norm: 0.9999991377093042, iteration: 42286
loss: 0.9881092309951782,grad_norm: 0.9999993940847839, iteration: 42287
loss: 0.9942920207977295,grad_norm: 0.830839992983138, iteration: 42288
loss: 0.9910563826560974,grad_norm: 0.9999990468960321, iteration: 42289
loss: 1.0165421962738037,grad_norm: 0.9764707174486479, iteration: 42290
loss: 0.992702841758728,grad_norm: 0.9999991366803047, iteration: 42291
loss: 1.0417550802230835,grad_norm: 0.9999990824164676, iteration: 42292
loss: 0.980826735496521,grad_norm: 0.9222261756413247, iteration: 42293
loss: 1.0089643001556396,grad_norm: 0.9769574215486085, iteration: 42294
loss: 0.936157763004303,grad_norm: 0.9999991418931214, iteration: 42295
loss: 0.9866005778312683,grad_norm: 0.9999992539519149, iteration: 42296
loss: 0.9866565465927124,grad_norm: 0.9999991102421382, iteration: 42297
loss: 0.9944238066673279,grad_norm: 0.9218451509621058, iteration: 42298
loss: 1.043926477432251,grad_norm: 0.9999991593823231, iteration: 42299
loss: 0.9888322353363037,grad_norm: 0.9999990448119395, iteration: 42300
loss: 0.9767743349075317,grad_norm: 0.9204281973549329, iteration: 42301
loss: 0.9720706939697266,grad_norm: 0.8979065535128758, iteration: 42302
loss: 1.0054651498794556,grad_norm: 0.9179313907254514, iteration: 42303
loss: 0.9851396083831787,grad_norm: 0.999998893076485, iteration: 42304
loss: 0.9920017719268799,grad_norm: 0.9999991048323703, iteration: 42305
loss: 0.9699632525444031,grad_norm: 0.9188899561273128, iteration: 42306
loss: 1.0035734176635742,grad_norm: 0.8168885794946427, iteration: 42307
loss: 1.0208510160446167,grad_norm: 0.9999992008702207, iteration: 42308
loss: 0.9906287789344788,grad_norm: 0.99999913470569, iteration: 42309
loss: 0.985531210899353,grad_norm: 0.8254454891269145, iteration: 42310
loss: 1.0069947242736816,grad_norm: 0.9783507025702873, iteration: 42311
loss: 1.118499755859375,grad_norm: 0.9999992419274977, iteration: 42312
loss: 1.0030943155288696,grad_norm: 0.9999991245780198, iteration: 42313
loss: 0.9975537061691284,grad_norm: 0.9999991088814815, iteration: 42314
loss: 0.9729351997375488,grad_norm: 0.9395100780690214, iteration: 42315
loss: 0.9949807524681091,grad_norm: 0.9630869420193682, iteration: 42316
loss: 1.0316345691680908,grad_norm: 0.999999102263217, iteration: 42317
loss: 1.0016289949417114,grad_norm: 0.9415577810287831, iteration: 42318
loss: 0.9927146434783936,grad_norm: 0.9907895386699525, iteration: 42319
loss: 0.9789556264877319,grad_norm: 0.9999995732654977, iteration: 42320
loss: 1.001265525817871,grad_norm: 0.932398000718931, iteration: 42321
loss: 1.0267341136932373,grad_norm: 0.8407032923401109, iteration: 42322
loss: 1.0562143325805664,grad_norm: 0.8151557018560316, iteration: 42323
loss: 1.0018126964569092,grad_norm: 0.9717125189364686, iteration: 42324
loss: 1.009029507637024,grad_norm: 0.9999993474396374, iteration: 42325
loss: 0.9855024814605713,grad_norm: 0.7797546253615113, iteration: 42326
loss: 0.9744242429733276,grad_norm: 0.9999990119416577, iteration: 42327
loss: 1.035378336906433,grad_norm: 0.9201718306036213, iteration: 42328
loss: 1.04130220413208,grad_norm: 0.9999991483809754, iteration: 42329
loss: 0.9827356934547424,grad_norm: 0.9999990678188855, iteration: 42330
loss: 1.0194498300552368,grad_norm: 0.7681992032918603, iteration: 42331
loss: 1.06354558467865,grad_norm: 0.9999995286981649, iteration: 42332
loss: 0.9923760890960693,grad_norm: 0.8962141404013714, iteration: 42333
loss: 1.0732462406158447,grad_norm: 0.9999994069789316, iteration: 42334
loss: 0.9809179306030273,grad_norm: 0.9337109393425643, iteration: 42335
loss: 1.023270845413208,grad_norm: 0.7466075826693902, iteration: 42336
loss: 0.9875907301902771,grad_norm: 0.9999994115638775, iteration: 42337
loss: 0.9909765720367432,grad_norm: 0.9553344232542608, iteration: 42338
loss: 1.003728985786438,grad_norm: 0.8961641210146202, iteration: 42339
loss: 0.9955869317054749,grad_norm: 0.9832680579043753, iteration: 42340
loss: 1.0312187671661377,grad_norm: 0.9999992422354711, iteration: 42341
loss: 0.9908069372177124,grad_norm: 0.9999991869512587, iteration: 42342
loss: 0.994818389415741,grad_norm: 0.9999992554456871, iteration: 42343
loss: 1.059110403060913,grad_norm: 0.9999995124151182, iteration: 42344
loss: 0.9956967234611511,grad_norm: 0.9999990911077964, iteration: 42345
loss: 1.0203553438186646,grad_norm: 0.9999993961131229, iteration: 42346
loss: 1.0331084728240967,grad_norm: 0.9999990883726967, iteration: 42347
loss: 1.0209057331085205,grad_norm: 0.9890110382610436, iteration: 42348
loss: 0.9778116941452026,grad_norm: 0.9999991833790234, iteration: 42349
loss: 1.0029178857803345,grad_norm: 0.9157890931653057, iteration: 42350
loss: 1.0124294757843018,grad_norm: 0.95977513588972, iteration: 42351
loss: 1.0081589221954346,grad_norm: 0.999999231535575, iteration: 42352
loss: 0.9873046875,grad_norm: 0.9999991853160085, iteration: 42353
loss: 0.9923332929611206,grad_norm: 0.9622899918932393, iteration: 42354
loss: 1.0129462480545044,grad_norm: 0.9417087770683337, iteration: 42355
loss: 0.9779816269874573,grad_norm: 0.9344656196081054, iteration: 42356
loss: 0.9586538672447205,grad_norm: 0.9999990815027474, iteration: 42357
loss: 1.0246230363845825,grad_norm: 0.9999991340927373, iteration: 42358
loss: 1.0001146793365479,grad_norm: 0.9999994547850252, iteration: 42359
loss: 0.9826246500015259,grad_norm: 0.9715848335940432, iteration: 42360
loss: 0.9935583472251892,grad_norm: 0.9999991424282448, iteration: 42361
loss: 0.9809883832931519,grad_norm: 0.8135747793376576, iteration: 42362
loss: 1.0236988067626953,grad_norm: 0.988653095073524, iteration: 42363
loss: 1.0205804109573364,grad_norm: 0.999999337128453, iteration: 42364
loss: 1.0421721935272217,grad_norm: 0.9449817217965683, iteration: 42365
loss: 0.9912884831428528,grad_norm: 0.9885793505835949, iteration: 42366
loss: 1.0260837078094482,grad_norm: 0.9999990024355946, iteration: 42367
loss: 0.9972214698791504,grad_norm: 0.9999991883118492, iteration: 42368
loss: 1.0182552337646484,grad_norm: 0.8859552677363589, iteration: 42369
loss: 1.0448253154754639,grad_norm: 0.9999996384561901, iteration: 42370
loss: 0.9783077239990234,grad_norm: 0.8815837027737048, iteration: 42371
loss: 0.957776665687561,grad_norm: 0.9392395454713398, iteration: 42372
loss: 1.0434818267822266,grad_norm: 0.9999994238278063, iteration: 42373
loss: 0.9868748784065247,grad_norm: 0.9999989756332277, iteration: 42374
loss: 1.0293079614639282,grad_norm: 0.9230318169367551, iteration: 42375
loss: 1.007089376449585,grad_norm: 0.9576123224926775, iteration: 42376
loss: 1.044762134552002,grad_norm: 0.9914068244758365, iteration: 42377
loss: 0.9916889667510986,grad_norm: 0.9999991676247604, iteration: 42378
loss: 1.0262352228164673,grad_norm: 0.9999992506672024, iteration: 42379
loss: 1.0044975280761719,grad_norm: 0.9999991370194685, iteration: 42380
loss: 1.0235908031463623,grad_norm: 0.9999992098685176, iteration: 42381
loss: 1.002772569656372,grad_norm: 0.9345857215182055, iteration: 42382
loss: 1.0243940353393555,grad_norm: 0.999999129246306, iteration: 42383
loss: 0.9702780246734619,grad_norm: 0.9802099514618399, iteration: 42384
loss: 0.9393926858901978,grad_norm: 0.9999991018812998, iteration: 42385
loss: 0.9513916969299316,grad_norm: 0.8757268946059343, iteration: 42386
loss: 0.9840279221534729,grad_norm: 0.9999990671748125, iteration: 42387
loss: 0.9862995147705078,grad_norm: 0.9028789138758951, iteration: 42388
loss: 0.9972213506698608,grad_norm: 0.9999992620056088, iteration: 42389
loss: 1.037414312362671,grad_norm: 0.9999992592235729, iteration: 42390
loss: 1.0247114896774292,grad_norm: 0.9999992008500673, iteration: 42391
loss: 1.04135000705719,grad_norm: 0.9612828829524379, iteration: 42392
loss: 1.020203709602356,grad_norm: 0.9194560373334925, iteration: 42393
loss: 1.0289853811264038,grad_norm: 0.9999991765137367, iteration: 42394
loss: 0.9506608843803406,grad_norm: 0.9999990722835942, iteration: 42395
loss: 1.031992793083191,grad_norm: 0.9999993965052688, iteration: 42396
loss: 1.0247007608413696,grad_norm: 0.8986212529363471, iteration: 42397
loss: 1.013749361038208,grad_norm: 0.9528184530077513, iteration: 42398
loss: 0.9543925523757935,grad_norm: 0.970407069478876, iteration: 42399
loss: 1.0341793298721313,grad_norm: 0.9999992552676162, iteration: 42400
loss: 0.9983065128326416,grad_norm: 0.9826554166697183, iteration: 42401
loss: 1.0117748975753784,grad_norm: 0.9999990896805627, iteration: 42402
loss: 1.0005172491073608,grad_norm: 0.9999991943777408, iteration: 42403
loss: 1.0222793817520142,grad_norm: 0.9551674681071951, iteration: 42404
loss: 1.0265982151031494,grad_norm: 0.9999991079433578, iteration: 42405
loss: 1.0342286825180054,grad_norm: 0.9999991368972693, iteration: 42406
loss: 0.9756967425346375,grad_norm: 0.9999990137634988, iteration: 42407
loss: 1.0360418558120728,grad_norm: 0.9911993983239598, iteration: 42408
loss: 1.014003038406372,grad_norm: 0.9999990688468272, iteration: 42409
loss: 0.9677015542984009,grad_norm: 0.9165534998030891, iteration: 42410
loss: 1.0150991678237915,grad_norm: 0.9999993154982189, iteration: 42411
loss: 1.0203412771224976,grad_norm: 0.9999991665634299, iteration: 42412
loss: 0.9953234791755676,grad_norm: 0.9635459923640792, iteration: 42413
loss: 1.0246875286102295,grad_norm: 0.8597481552207357, iteration: 42414
loss: 1.0206027030944824,grad_norm: 0.9999992665936938, iteration: 42415
loss: 0.9709380865097046,grad_norm: 0.9999990644229807, iteration: 42416
loss: 1.052142858505249,grad_norm: 0.9999991736333251, iteration: 42417
loss: 1.0091614723205566,grad_norm: 0.9999991447126224, iteration: 42418
loss: 1.0073134899139404,grad_norm: 0.999999147291426, iteration: 42419
loss: 1.0193506479263306,grad_norm: 0.9999991615835633, iteration: 42420
loss: 0.9984574317932129,grad_norm: 0.9180905788976148, iteration: 42421
loss: 1.0541813373565674,grad_norm: 0.9999991054064911, iteration: 42422
loss: 1.021192193031311,grad_norm: 0.727310117044704, iteration: 42423
loss: 1.0246222019195557,grad_norm: 0.928879235730598, iteration: 42424
loss: 0.9997521042823792,grad_norm: 0.9999993915712677, iteration: 42425
loss: 1.0146207809448242,grad_norm: 0.8380653228249205, iteration: 42426
loss: 1.030174732208252,grad_norm: 0.9999990435759873, iteration: 42427
loss: 1.018816590309143,grad_norm: 0.9977678074707, iteration: 42428
loss: 1.0224391222000122,grad_norm: 0.9999991621517174, iteration: 42429
loss: 1.0163267850875854,grad_norm: 0.9999992788926049, iteration: 42430
loss: 0.969090461730957,grad_norm: 0.9468270602200809, iteration: 42431
loss: 1.0213806629180908,grad_norm: 0.9999990429331477, iteration: 42432
loss: 0.9663143754005432,grad_norm: 0.9999991700738521, iteration: 42433
loss: 1.0082708597183228,grad_norm: 0.8676725833932674, iteration: 42434
loss: 1.0194077491760254,grad_norm: 0.9497733106604038, iteration: 42435
loss: 0.9942654371261597,grad_norm: 0.9999991697381471, iteration: 42436
loss: 1.0034016370773315,grad_norm: 0.9999991153743113, iteration: 42437
loss: 0.9761862754821777,grad_norm: 0.9712426159657405, iteration: 42438
loss: 1.0088837146759033,grad_norm: 0.8489089521673197, iteration: 42439
loss: 1.019830584526062,grad_norm: 0.8709670811744733, iteration: 42440
loss: 0.9951249957084656,grad_norm: 0.8184562889953038, iteration: 42441
loss: 1.0007538795471191,grad_norm: 0.9999989620582422, iteration: 42442
loss: 1.0306161642074585,grad_norm: 0.999999672297276, iteration: 42443
loss: 1.0143927335739136,grad_norm: 0.9999991996390529, iteration: 42444
loss: 0.9749180674552917,grad_norm: 0.9819718806589158, iteration: 42445
loss: 1.0256404876708984,grad_norm: 0.9490514509766846, iteration: 42446
loss: 0.9861289262771606,grad_norm: 0.9978214663102208, iteration: 42447
loss: 0.9645739793777466,grad_norm: 0.8679565883810612, iteration: 42448
loss: 0.9756280779838562,grad_norm: 0.8634082226603046, iteration: 42449
loss: 1.0071171522140503,grad_norm: 0.9999990665458461, iteration: 42450
loss: 0.9713767766952515,grad_norm: 0.9999989982929588, iteration: 42451
loss: 0.9838885068893433,grad_norm: 0.9999990356563498, iteration: 42452
loss: 1.0046967267990112,grad_norm: 0.999999199276809, iteration: 42453
loss: 0.9805906414985657,grad_norm: 0.9999992437308372, iteration: 42454
loss: 1.0250184535980225,grad_norm: 0.8307626155119269, iteration: 42455
loss: 0.9852272868156433,grad_norm: 0.9999991411098926, iteration: 42456
loss: 1.028383731842041,grad_norm: 0.9999989659297948, iteration: 42457
loss: 1.008292555809021,grad_norm: 0.9096132945714888, iteration: 42458
loss: 1.0262413024902344,grad_norm: 0.9999994895950022, iteration: 42459
loss: 1.005069613456726,grad_norm: 0.9329653183678162, iteration: 42460
loss: 0.9809926748275757,grad_norm: 0.9999990979534282, iteration: 42461
loss: 1.0349867343902588,grad_norm: 0.9957976280441974, iteration: 42462
loss: 1.0301131010055542,grad_norm: 0.9999992091630399, iteration: 42463
loss: 0.9641162753105164,grad_norm: 0.9999990866290562, iteration: 42464
loss: 1.0151764154434204,grad_norm: 0.9731004903814559, iteration: 42465
loss: 1.0172371864318848,grad_norm: 0.9925334451535948, iteration: 42466
loss: 0.9942302703857422,grad_norm: 0.9237907610847752, iteration: 42467
loss: 1.0000839233398438,grad_norm: 0.992082917258297, iteration: 42468
loss: 0.9790288805961609,grad_norm: 0.9999989731978889, iteration: 42469
loss: 1.0112329721450806,grad_norm: 0.8368221550378923, iteration: 42470
loss: 1.030691385269165,grad_norm: 0.9999992151565446, iteration: 42471
loss: 1.0272579193115234,grad_norm: 0.9999990896004992, iteration: 42472
loss: 1.0086873769760132,grad_norm: 0.9709583807286656, iteration: 42473
loss: 1.021821141242981,grad_norm: 0.9687497925459863, iteration: 42474
loss: 1.0587044954299927,grad_norm: 0.9999992514407883, iteration: 42475
loss: 1.0155378580093384,grad_norm: 0.945610831915548, iteration: 42476
loss: 0.9779634475708008,grad_norm: 0.8426391001402023, iteration: 42477
loss: 1.0346200466156006,grad_norm: 0.9999990440912, iteration: 42478
loss: 1.0413933992385864,grad_norm: 0.9999990491026102, iteration: 42479
loss: 1.0019975900650024,grad_norm: 0.9999992173256933, iteration: 42480
loss: 0.9942811727523804,grad_norm: 0.9999991535858698, iteration: 42481
loss: 0.9912852048873901,grad_norm: 0.9999991013464735, iteration: 42482
loss: 1.0262240171432495,grad_norm: 0.8919764316483214, iteration: 42483
loss: 0.995755672454834,grad_norm: 0.9999991343800936, iteration: 42484
loss: 1.0027990341186523,grad_norm: 0.9999992030220121, iteration: 42485
loss: 1.081714153289795,grad_norm: 0.9999998814013853, iteration: 42486
loss: 1.0092602968215942,grad_norm: 0.9808400365484462, iteration: 42487
loss: 0.9740126132965088,grad_norm: 0.9999992041230872, iteration: 42488
loss: 0.970223605632782,grad_norm: 0.9999990298172703, iteration: 42489
loss: 1.0215799808502197,grad_norm: 0.820653936668728, iteration: 42490
loss: 0.9866063594818115,grad_norm: 0.936684746462289, iteration: 42491
loss: 1.001779317855835,grad_norm: 0.7731351192210261, iteration: 42492
loss: 1.0086262226104736,grad_norm: 0.7933367752308308, iteration: 42493
loss: 1.0233054161071777,grad_norm: 0.9999993573236283, iteration: 42494
loss: 1.0007429122924805,grad_norm: 0.9089852170429117, iteration: 42495
loss: 1.012470006942749,grad_norm: 0.9999990445046882, iteration: 42496
loss: 0.9800938367843628,grad_norm: 0.9218924940333697, iteration: 42497
loss: 0.9976757168769836,grad_norm: 0.8562505749011944, iteration: 42498
loss: 1.0028244256973267,grad_norm: 0.9999989987035784, iteration: 42499
loss: 1.0047883987426758,grad_norm: 0.8962847451342962, iteration: 42500
loss: 0.9999618530273438,grad_norm: 0.8107455144343275, iteration: 42501
loss: 0.9992934465408325,grad_norm: 0.9999989208714031, iteration: 42502
loss: 0.9711505174636841,grad_norm: 0.8874026791292766, iteration: 42503
loss: 0.9853998422622681,grad_norm: 0.8379142357099388, iteration: 42504
loss: 0.9937783479690552,grad_norm: 0.8484185464450084, iteration: 42505
loss: 0.9927141070365906,grad_norm: 0.999999181089266, iteration: 42506
loss: 1.05207097530365,grad_norm: 0.9144526794369583, iteration: 42507
loss: 1.0157302618026733,grad_norm: 0.8757134236492159, iteration: 42508
loss: 0.9920333623886108,grad_norm: 0.9999992482879894, iteration: 42509
loss: 0.9822566509246826,grad_norm: 0.8651759378806648, iteration: 42510
loss: 0.9957358241081238,grad_norm: 0.8432359008858887, iteration: 42511
loss: 0.9672829508781433,grad_norm: 0.877545969072218, iteration: 42512
loss: 0.9926586151123047,grad_norm: 0.9999991800600715, iteration: 42513
loss: 1.0765691995620728,grad_norm: 0.9999998079094764, iteration: 42514
loss: 1.0185080766677856,grad_norm: 0.9999991900849898, iteration: 42515
loss: 1.0047513246536255,grad_norm: 0.9206217800965096, iteration: 42516
loss: 0.9892686009407043,grad_norm: 0.9442501578188341, iteration: 42517
loss: 0.9879331588745117,grad_norm: 0.9287849092413628, iteration: 42518
loss: 1.0034308433532715,grad_norm: 0.9999993738132332, iteration: 42519
loss: 1.0018821954727173,grad_norm: 0.9237075772197275, iteration: 42520
loss: 1.02217435836792,grad_norm: 0.9721435803371665, iteration: 42521
loss: 1.0054715871810913,grad_norm: 0.9999990811815016, iteration: 42522
loss: 1.0080060958862305,grad_norm: 0.9999990224933217, iteration: 42523
loss: 1.0058293342590332,grad_norm: 0.9999990171866096, iteration: 42524
loss: 1.0376818180084229,grad_norm: 0.9291556484442908, iteration: 42525
loss: 1.0118472576141357,grad_norm: 0.9999992484600649, iteration: 42526
loss: 0.9959343075752258,grad_norm: 0.9767277071609863, iteration: 42527
loss: 1.0188884735107422,grad_norm: 0.999999130472355, iteration: 42528
loss: 1.0198708772659302,grad_norm: 0.9999992107482967, iteration: 42529
loss: 0.9742150902748108,grad_norm: 0.9999990386522742, iteration: 42530
loss: 1.0220719575881958,grad_norm: 0.9999989021481168, iteration: 42531
loss: 1.0044203996658325,grad_norm: 0.7752936758538858, iteration: 42532
loss: 0.9963904023170471,grad_norm: 0.9999991006036015, iteration: 42533
loss: 1.0084964036941528,grad_norm: 0.8974182530111805, iteration: 42534
loss: 0.9726189970970154,grad_norm: 0.8600704469093593, iteration: 42535
loss: 1.0079153776168823,grad_norm: 0.9779805380130407, iteration: 42536
loss: 1.0103529691696167,grad_norm: 0.9999991002311711, iteration: 42537
loss: 0.9657870531082153,grad_norm: 0.9577180673397262, iteration: 42538
loss: 1.004481315612793,grad_norm: 0.9999998440314481, iteration: 42539
loss: 1.0315985679626465,grad_norm: 0.9999990608576024, iteration: 42540
loss: 0.9664428234100342,grad_norm: 0.999999219555848, iteration: 42541
loss: 0.9562037587165833,grad_norm: 0.9150865303405372, iteration: 42542
loss: 0.9769686460494995,grad_norm: 0.9999991594349724, iteration: 42543
loss: 0.9989929795265198,grad_norm: 0.8373037734426969, iteration: 42544
loss: 0.9945406913757324,grad_norm: 0.9029941061062229, iteration: 42545
loss: 0.9935148358345032,grad_norm: 0.9999992061760231, iteration: 42546
loss: 0.9797859787940979,grad_norm: 0.922716609009136, iteration: 42547
loss: 1.0181636810302734,grad_norm: 0.9999991412657758, iteration: 42548
loss: 1.0292068719863892,grad_norm: 0.9999996968909879, iteration: 42549
loss: 1.0171475410461426,grad_norm: 0.8285803489367745, iteration: 42550
loss: 1.026452898979187,grad_norm: 0.9999990450437211, iteration: 42551
loss: 0.9917027950286865,grad_norm: 0.9999993105756735, iteration: 42552
loss: 0.9746888875961304,grad_norm: 0.9999991541152397, iteration: 42553
loss: 0.9754087924957275,grad_norm: 0.9636578947272093, iteration: 42554
loss: 1.0328789949417114,grad_norm: 0.999999465323135, iteration: 42555
loss: 0.9958316683769226,grad_norm: 0.9483822139244497, iteration: 42556
loss: 0.979084312915802,grad_norm: 0.9873256917614797, iteration: 42557
loss: 1.009860873222351,grad_norm: 0.9999993442209736, iteration: 42558
loss: 0.9955736398696899,grad_norm: 0.9664942971176151, iteration: 42559
loss: 1.0018264055252075,grad_norm: 0.9508457345443038, iteration: 42560
loss: 1.021868109703064,grad_norm: 0.9098041570853509, iteration: 42561
loss: 0.9518424272537231,grad_norm: 0.8362770894827803, iteration: 42562
loss: 0.9885254502296448,grad_norm: 0.8636631352247146, iteration: 42563
loss: 0.948257565498352,grad_norm: 0.9881635087890228, iteration: 42564
loss: 1.0206074714660645,grad_norm: 0.9704896307525818, iteration: 42565
loss: 0.9908686280250549,grad_norm: 0.9999992049550127, iteration: 42566
loss: 1.0190088748931885,grad_norm: 0.9999990562911885, iteration: 42567
loss: 1.0077779293060303,grad_norm: 0.9603821023094413, iteration: 42568
loss: 1.008768916130066,grad_norm: 0.999999519927284, iteration: 42569
loss: 1.0326662063598633,grad_norm: 0.9943208497482454, iteration: 42570
loss: 1.0095027685165405,grad_norm: 0.9999997634419672, iteration: 42571
loss: 1.0324523448944092,grad_norm: 0.9999989938465521, iteration: 42572
loss: 1.02401864528656,grad_norm: 0.9999994932519229, iteration: 42573
loss: 1.002475380897522,grad_norm: 0.9784772141826531, iteration: 42574
loss: 1.0291422605514526,grad_norm: 0.985730838439601, iteration: 42575
loss: 0.9803825616836548,grad_norm: 0.9999990369214942, iteration: 42576
loss: 0.9842641353607178,grad_norm: 0.9999990224210951, iteration: 42577
loss: 0.9720823168754578,grad_norm: 0.9720131036210268, iteration: 42578
loss: 0.9713180661201477,grad_norm: 0.9999989431316049, iteration: 42579
loss: 1.014445185661316,grad_norm: 0.9375862697009023, iteration: 42580
loss: 1.0004645586013794,grad_norm: 0.97751613684138, iteration: 42581
loss: 1.008340835571289,grad_norm: 0.9586473633860644, iteration: 42582
loss: 1.0450338125228882,grad_norm: 0.9999990850062247, iteration: 42583
loss: 0.9730830192565918,grad_norm: 0.815633719988282, iteration: 42584
loss: 1.0169779062271118,grad_norm: 0.975240621787584, iteration: 42585
loss: 1.0527169704437256,grad_norm: 0.9867098554201962, iteration: 42586
loss: 0.9867496490478516,grad_norm: 0.9368400738488311, iteration: 42587
loss: 1.0283236503601074,grad_norm: 0.8566995752027282, iteration: 42588
loss: 0.9806976318359375,grad_norm: 0.9261037245741162, iteration: 42589
loss: 0.9692487120628357,grad_norm: 0.8591291128872736, iteration: 42590
loss: 1.049136996269226,grad_norm: 0.7534292814317995, iteration: 42591
loss: 0.9993440508842468,grad_norm: 0.9999991282702844, iteration: 42592
loss: 0.980552077293396,grad_norm: 0.8795583846976297, iteration: 42593
loss: 1.0173377990722656,grad_norm: 0.9999993630120904, iteration: 42594
loss: 0.9379359483718872,grad_norm: 0.9999990833582714, iteration: 42595
loss: 1.0565959215164185,grad_norm: 0.9999997613861532, iteration: 42596
loss: 0.9593884348869324,grad_norm: 0.9999992296510222, iteration: 42597
loss: 0.961863100528717,grad_norm: 0.9999990839492069, iteration: 42598
loss: 1.0313042402267456,grad_norm: 0.9999991860905434, iteration: 42599
loss: 0.9856237173080444,grad_norm: 0.9999990051843033, iteration: 42600
loss: 1.000144124031067,grad_norm: 0.9999990057579174, iteration: 42601
loss: 0.9990571737289429,grad_norm: 0.9999991402822227, iteration: 42602
loss: 1.0238507986068726,grad_norm: 0.9999992811332092, iteration: 42603
loss: 0.9831202626228333,grad_norm: 0.8441560232627505, iteration: 42604
loss: 0.9948599338531494,grad_norm: 0.9999993404385815, iteration: 42605
loss: 1.0110442638397217,grad_norm: 0.9999990337704227, iteration: 42606
loss: 0.9761171340942383,grad_norm: 0.9693004381992665, iteration: 42607
loss: 1.0486371517181396,grad_norm: 0.9999993489648709, iteration: 42608
loss: 1.125006079673767,grad_norm: 0.9999996879948887, iteration: 42609
loss: 1.0066282749176025,grad_norm: 0.9999991378857371, iteration: 42610
loss: 1.0039632320404053,grad_norm: 0.9410043743491204, iteration: 42611
loss: 1.000623345375061,grad_norm: 0.9999991257724308, iteration: 42612
loss: 0.9780162572860718,grad_norm: 0.8451712297462207, iteration: 42613
loss: 1.0128744840621948,grad_norm: 0.9456949700082077, iteration: 42614
loss: 1.0069178342819214,grad_norm: 0.9999991387840232, iteration: 42615
loss: 1.0012749433517456,grad_norm: 0.9999993891992776, iteration: 42616
loss: 1.0219649076461792,grad_norm: 0.8936821544135056, iteration: 42617
loss: 0.9781365394592285,grad_norm: 0.8826335348193958, iteration: 42618
loss: 1.0476584434509277,grad_norm: 0.9999994087063092, iteration: 42619
loss: 1.004754900932312,grad_norm: 0.9999993333915265, iteration: 42620
loss: 1.007636547088623,grad_norm: 0.9999991770809563, iteration: 42621
loss: 1.0248926877975464,grad_norm: 0.9549909671441074, iteration: 42622
loss: 0.9987104535102844,grad_norm: 0.8612977167092191, iteration: 42623
loss: 1.0389540195465088,grad_norm: 0.9999996523670662, iteration: 42624
loss: 1.0175501108169556,grad_norm: 0.959478116309728, iteration: 42625
loss: 1.0130939483642578,grad_norm: 0.8585274196495633, iteration: 42626
loss: 1.0134981870651245,grad_norm: 0.9660111786524138, iteration: 42627
loss: 0.9898247718811035,grad_norm: 0.9999990999142672, iteration: 42628
loss: 1.0818887948989868,grad_norm: 0.9999990540392143, iteration: 42629
loss: 1.0145843029022217,grad_norm: 0.930440652028187, iteration: 42630
loss: 1.0097562074661255,grad_norm: 0.9999997080178022, iteration: 42631
loss: 0.9920965433120728,grad_norm: 0.9758273038305272, iteration: 42632
loss: 1.0120645761489868,grad_norm: 0.9999989717983722, iteration: 42633
loss: 1.0003300905227661,grad_norm: 0.9999992626625299, iteration: 42634
loss: 1.065706729888916,grad_norm: 0.9999991471050506, iteration: 42635
loss: 1.004723072052002,grad_norm: 0.9854607340174001, iteration: 42636
loss: 1.0163072347640991,grad_norm: 0.999999178716862, iteration: 42637
loss: 1.0358861684799194,grad_norm: 0.9999998227083403, iteration: 42638
loss: 0.9920772314071655,grad_norm: 0.9999992259790391, iteration: 42639
loss: 0.9926580190658569,grad_norm: 0.9999993608130852, iteration: 42640
loss: 1.0206061601638794,grad_norm: 0.9999995144652978, iteration: 42641
loss: 0.9976527690887451,grad_norm: 0.9999991350301916, iteration: 42642
loss: 1.0065274238586426,grad_norm: 0.931964684941777, iteration: 42643
loss: 0.980010449886322,grad_norm: 0.9999990388571016, iteration: 42644
loss: 1.0096924304962158,grad_norm: 0.9999991468052082, iteration: 42645
loss: 1.0145851373672485,grad_norm: 0.9728822225160491, iteration: 42646
loss: 1.011030673980713,grad_norm: 0.902834666668576, iteration: 42647
loss: 0.9870155453681946,grad_norm: 0.9924543393232215, iteration: 42648
loss: 1.0008941888809204,grad_norm: 0.9999994287683899, iteration: 42649
loss: 0.9864708781242371,grad_norm: 0.9999991429333176, iteration: 42650
loss: 0.9991438984870911,grad_norm: 0.9999992606577027, iteration: 42651
loss: 1.0003232955932617,grad_norm: 0.880242369526682, iteration: 42652
loss: 1.0100634098052979,grad_norm: 0.7940720471439945, iteration: 42653
loss: 0.9827818274497986,grad_norm: 0.9255548952633268, iteration: 42654
loss: 1.0327141284942627,grad_norm: 0.9999994049429208, iteration: 42655
loss: 0.9967545866966248,grad_norm: 0.9999990779935631, iteration: 42656
loss: 1.0306655168533325,grad_norm: 0.9999990166839989, iteration: 42657
loss: 1.1721343994140625,grad_norm: 0.9999992032844867, iteration: 42658
loss: 1.0037392377853394,grad_norm: 0.9999990457150598, iteration: 42659
loss: 0.989507794380188,grad_norm: 0.8818182614121066, iteration: 42660
loss: 1.0354092121124268,grad_norm: 0.9222114015926017, iteration: 42661
loss: 1.0233081579208374,grad_norm: 0.9999992375729125, iteration: 42662
loss: 1.0043288469314575,grad_norm: 0.999999075131358, iteration: 42663
loss: 1.061640977859497,grad_norm: 0.9999997525650398, iteration: 42664
loss: 1.0221096277236938,grad_norm: 0.9999995192918425, iteration: 42665
loss: 1.0157238245010376,grad_norm: 0.9999994417786552, iteration: 42666
loss: 0.9986138939857483,grad_norm: 0.9999991028298848, iteration: 42667
loss: 1.0452778339385986,grad_norm: 0.9999996978467005, iteration: 42668
loss: 0.9939332604408264,grad_norm: 0.9999989841390761, iteration: 42669
loss: 0.9955441355705261,grad_norm: 0.916854788532587, iteration: 42670
loss: 1.0211039781570435,grad_norm: 0.9608489472274018, iteration: 42671
loss: 0.9635546207427979,grad_norm: 0.9751643366324909, iteration: 42672
loss: 0.9931878447532654,grad_norm: 0.8317265489838355, iteration: 42673
loss: 0.9677890539169312,grad_norm: 0.8824526039779368, iteration: 42674
loss: 0.9947795867919922,grad_norm: 0.8879022637119157, iteration: 42675
loss: 0.9953721761703491,grad_norm: 0.9999990041184056, iteration: 42676
loss: 1.0383663177490234,grad_norm: 0.9999997602853143, iteration: 42677
loss: 0.9758231043815613,grad_norm: 0.9999989558721353, iteration: 42678
loss: 1.0282660722732544,grad_norm: 0.9689461911276116, iteration: 42679
loss: 1.0004233121871948,grad_norm: 0.9009526665443941, iteration: 42680
loss: 0.9918686151504517,grad_norm: 0.9999990903806549, iteration: 42681
loss: 1.0682302713394165,grad_norm: 0.9999995420546484, iteration: 42682
loss: 1.006160020828247,grad_norm: 0.9341052604270539, iteration: 42683
loss: 1.0181812047958374,grad_norm: 0.9999991354475444, iteration: 42684
loss: 1.0374059677124023,grad_norm: 0.9352097859006178, iteration: 42685
loss: 1.002331256866455,grad_norm: 0.8378732558775248, iteration: 42686
loss: 1.0002870559692383,grad_norm: 0.9999991569658107, iteration: 42687
loss: 0.9774099588394165,grad_norm: 0.9999991345032345, iteration: 42688
loss: 1.0000030994415283,grad_norm: 0.8282220204706099, iteration: 42689
loss: 0.9826286435127258,grad_norm: 0.9999996185882694, iteration: 42690
loss: 0.9923967123031616,grad_norm: 0.9999990097893267, iteration: 42691
loss: 0.964396595954895,grad_norm: 0.8977891526723125, iteration: 42692
loss: 1.0178226232528687,grad_norm: 0.9765018861151736, iteration: 42693
loss: 1.017439842224121,grad_norm: 0.8454218984466721, iteration: 42694
loss: 1.0244823694229126,grad_norm: 0.9999992298213369, iteration: 42695
loss: 0.9775757193565369,grad_norm: 0.9999992201777234, iteration: 42696
loss: 0.9685122966766357,grad_norm: 0.986419852845401, iteration: 42697
loss: 1.0093532800674438,grad_norm: 0.9299503624864186, iteration: 42698
loss: 1.0248842239379883,grad_norm: 0.9861345350377271, iteration: 42699
loss: 0.9794618487358093,grad_norm: 0.90655678474411, iteration: 42700
loss: 1.0155714750289917,grad_norm: 0.8658275874716203, iteration: 42701
loss: 0.9956489205360413,grad_norm: 0.9999991716610177, iteration: 42702
loss: 1.0129860639572144,grad_norm: 0.9199118300673467, iteration: 42703
loss: 1.0152403116226196,grad_norm: 0.9999992158340876, iteration: 42704
loss: 1.041914701461792,grad_norm: 0.9999993510031712, iteration: 42705
loss: 1.0030584335327148,grad_norm: 0.9933374014179872, iteration: 42706
loss: 1.0313926935195923,grad_norm: 0.9999993893348974, iteration: 42707
loss: 1.0148829221725464,grad_norm: 0.7171537490250285, iteration: 42708
loss: 0.9741641283035278,grad_norm: 0.8495912986862121, iteration: 42709
loss: 0.9842746257781982,grad_norm: 0.9999992463625547, iteration: 42710
loss: 1.0525104999542236,grad_norm: 0.9999992373343108, iteration: 42711
loss: 1.032952904701233,grad_norm: 0.9669186876079785, iteration: 42712
loss: 1.0018197298049927,grad_norm: 0.915457013426483, iteration: 42713
loss: 0.9659996628761292,grad_norm: 0.9999991617507934, iteration: 42714
loss: 1.038270115852356,grad_norm: 0.9950133270671202, iteration: 42715
loss: 1.0165636539459229,grad_norm: 0.9999990779167247, iteration: 42716
loss: 1.0282373428344727,grad_norm: 0.9999997165839656, iteration: 42717
loss: 1.0053850412368774,grad_norm: 0.9144916415638872, iteration: 42718
loss: 0.9881091117858887,grad_norm: 0.9999991322531239, iteration: 42719
loss: 0.9650242328643799,grad_norm: 0.999999121400034, iteration: 42720
loss: 1.0592375993728638,grad_norm: 0.9231463565561444, iteration: 42721
loss: 1.0103137493133545,grad_norm: 0.9999991137971054, iteration: 42722
loss: 1.035691261291504,grad_norm: 0.9600351117801293, iteration: 42723
loss: 1.003263235092163,grad_norm: 0.9999990641329518, iteration: 42724
loss: 1.021411657333374,grad_norm: 0.9999991931708595, iteration: 42725
loss: 1.01340913772583,grad_norm: 0.9999996938940833, iteration: 42726
loss: 1.0077104568481445,grad_norm: 0.9999990925149792, iteration: 42727
loss: 1.017160415649414,grad_norm: 0.9999990522794129, iteration: 42728
loss: 1.0425198078155518,grad_norm: 0.9999990943922716, iteration: 42729
loss: 1.0277665853500366,grad_norm: 0.957803911931139, iteration: 42730
loss: 1.0029174089431763,grad_norm: 0.9999990138246606, iteration: 42731
loss: 0.9892969131469727,grad_norm: 0.9999995318793431, iteration: 42732
loss: 1.0262484550476074,grad_norm: 0.9999992129307776, iteration: 42733
loss: 1.0266386270523071,grad_norm: 0.8631073447552242, iteration: 42734
loss: 1.0109516382217407,grad_norm: 0.9999990991036473, iteration: 42735
loss: 1.0140182971954346,grad_norm: 0.9999998946194427, iteration: 42736
loss: 0.9871096014976501,grad_norm: 0.999999209203229, iteration: 42737
loss: 1.0083082914352417,grad_norm: 0.9999993244327963, iteration: 42738
loss: 1.023066520690918,grad_norm: 0.9999993161611533, iteration: 42739
loss: 0.9797214269638062,grad_norm: 0.9999990181562906, iteration: 42740
loss: 1.0203355550765991,grad_norm: 0.8213455722487601, iteration: 42741
loss: 1.0244947671890259,grad_norm: 0.9999992836437392, iteration: 42742
loss: 0.9837703108787537,grad_norm: 0.807632258408544, iteration: 42743
loss: 1.0783480405807495,grad_norm: 0.9999996618665192, iteration: 42744
loss: 1.0016969442367554,grad_norm: 0.9999992495301893, iteration: 42745
loss: 1.0170238018035889,grad_norm: 0.9999993123815877, iteration: 42746
loss: 0.9995949268341064,grad_norm: 0.9856523294398074, iteration: 42747
loss: 1.0215458869934082,grad_norm: 0.9999994178418113, iteration: 42748
loss: 1.027666687965393,grad_norm: 0.9427244274421732, iteration: 42749
loss: 1.0326058864593506,grad_norm: 0.9999991182263048, iteration: 42750
loss: 1.0149399042129517,grad_norm: 0.8804691656013215, iteration: 42751
loss: 0.9594420194625854,grad_norm: 0.9999993349226984, iteration: 42752
loss: 0.9794702529907227,grad_norm: 0.9307212060015434, iteration: 42753
loss: 0.9874487519264221,grad_norm: 0.7748483347653928, iteration: 42754
loss: 1.0143165588378906,grad_norm: 0.9439719754399399, iteration: 42755
loss: 1.0663483142852783,grad_norm: 0.9999997208544207, iteration: 42756
loss: 0.9911685585975647,grad_norm: 0.9928169109720543, iteration: 42757
loss: 0.960743248462677,grad_norm: 0.9999990413509152, iteration: 42758
loss: 0.9662539958953857,grad_norm: 0.8761894376844827, iteration: 42759
loss: 1.0100573301315308,grad_norm: 0.9454618844601913, iteration: 42760
loss: 1.0005172491073608,grad_norm: 0.9999991646981232, iteration: 42761
loss: 1.0350446701049805,grad_norm: 0.9999991991763534, iteration: 42762
loss: 0.9448791742324829,grad_norm: 0.882316879950278, iteration: 42763
loss: 0.997576117515564,grad_norm: 0.9635939153000487, iteration: 42764
loss: 1.0061959028244019,grad_norm: 0.9999993682670244, iteration: 42765
loss: 1.0271905660629272,grad_norm: 0.9999989987403024, iteration: 42766
loss: 1.0363109111785889,grad_norm: 0.9728000119231985, iteration: 42767
loss: 1.0293546915054321,grad_norm: 0.9072647988089763, iteration: 42768
loss: 1.0448137521743774,grad_norm: 0.999999212895567, iteration: 42769
loss: 1.04441237449646,grad_norm: 0.9641283699865312, iteration: 42770
loss: 0.9816603064537048,grad_norm: 0.9999992119244099, iteration: 42771
loss: 0.9902406930923462,grad_norm: 0.8915477888366989, iteration: 42772
loss: 1.024143934249878,grad_norm: 0.9999990835251716, iteration: 42773
loss: 1.0096797943115234,grad_norm: 0.9130340996923721, iteration: 42774
loss: 1.0111249685287476,grad_norm: 0.9473167327481923, iteration: 42775
loss: 0.9600058794021606,grad_norm: 0.8281760234608607, iteration: 42776
loss: 0.9813100695610046,grad_norm: 0.9999991884312508, iteration: 42777
loss: 1.0096360445022583,grad_norm: 0.9751530442205861, iteration: 42778
loss: 1.01534104347229,grad_norm: 0.9999990790431176, iteration: 42779
loss: 1.0005722045898438,grad_norm: 0.9124617927908188, iteration: 42780
loss: 1.03300142288208,grad_norm: 0.8579854915141991, iteration: 42781
loss: 1.0559450387954712,grad_norm: 0.9999992920279441, iteration: 42782
loss: 1.0194926261901855,grad_norm: 0.9999997957736716, iteration: 42783
loss: 1.0042001008987427,grad_norm: 0.930248432179862, iteration: 42784
loss: 1.0152027606964111,grad_norm: 0.9999991735421272, iteration: 42785
loss: 1.0216926336288452,grad_norm: 0.9999991796037783, iteration: 42786
loss: 1.0059553384780884,grad_norm: 0.9999990933961201, iteration: 42787
loss: 1.0013244152069092,grad_norm: 0.9916881187491565, iteration: 42788
loss: 0.99395352602005,grad_norm: 0.8615201167315105, iteration: 42789
loss: 1.0257867574691772,grad_norm: 0.9999991867848115, iteration: 42790
loss: 1.1123287677764893,grad_norm: 0.9999993208710803, iteration: 42791
loss: 1.0030970573425293,grad_norm: 0.9999990129758544, iteration: 42792
loss: 0.9746857285499573,grad_norm: 0.8995594139882337, iteration: 42793
loss: 1.0186314582824707,grad_norm: 0.8186015906429195, iteration: 42794
loss: 1.0149842500686646,grad_norm: 0.8790052557919193, iteration: 42795
loss: 0.9974355697631836,grad_norm: 0.9999989345879055, iteration: 42796
loss: 0.9884030222892761,grad_norm: 0.8906136994199458, iteration: 42797
loss: 0.990914523601532,grad_norm: 0.9999992514991753, iteration: 42798
loss: 0.9914751648902893,grad_norm: 0.999999100500151, iteration: 42799
loss: 0.9922593235969543,grad_norm: 0.9999990662484549, iteration: 42800
loss: 1.0050932168960571,grad_norm: 0.9999990620628664, iteration: 42801
loss: 0.9901513457298279,grad_norm: 0.9999991031845956, iteration: 42802
loss: 1.0136610269546509,grad_norm: 0.9999991037810237, iteration: 42803
loss: 0.9736379981040955,grad_norm: 0.8274850370350109, iteration: 42804
loss: 1.0245240926742554,grad_norm: 0.9999996117789707, iteration: 42805
loss: 0.9651654362678528,grad_norm: 0.8185273550064923, iteration: 42806
loss: 0.990233302116394,grad_norm: 0.8801395765740156, iteration: 42807
loss: 1.034478783607483,grad_norm: 0.9999991255963713, iteration: 42808
loss: 0.979320228099823,grad_norm: 0.8980448952275043, iteration: 42809
loss: 1.0343337059020996,grad_norm: 0.9157393912808599, iteration: 42810
loss: 1.0084060430526733,grad_norm: 0.9999989972830348, iteration: 42811
loss: 1.0250952243804932,grad_norm: 0.9315822571410487, iteration: 42812
loss: 0.9939200282096863,grad_norm: 0.8342611348490274, iteration: 42813
loss: 0.9679847955703735,grad_norm: 0.9954974412538725, iteration: 42814
loss: 1.0320748090744019,grad_norm: 0.9999992104694082, iteration: 42815
loss: 1.1148420572280884,grad_norm: 0.999999502333945, iteration: 42816
loss: 1.012749433517456,grad_norm: 0.9144484448238385, iteration: 42817
loss: 1.009739875793457,grad_norm: 0.9999991972210457, iteration: 42818
loss: 1.0416007041931152,grad_norm: 0.9999989338386834, iteration: 42819
loss: 0.9668741226196289,grad_norm: 0.9999992189409074, iteration: 42820
loss: 0.9818962812423706,grad_norm: 0.9906701366850771, iteration: 42821
loss: 1.010840892791748,grad_norm: 0.9999993414340523, iteration: 42822
loss: 0.9962286353111267,grad_norm: 0.9999989827206233, iteration: 42823
loss: 0.9698057174682617,grad_norm: 0.9231031504089792, iteration: 42824
loss: 1.0226608514785767,grad_norm: 0.9999992056796493, iteration: 42825
loss: 0.9659016728401184,grad_norm: 0.9999990799451604, iteration: 42826
loss: 1.0122172832489014,grad_norm: 0.937944368065781, iteration: 42827
loss: 0.9662567377090454,grad_norm: 0.9538021304874581, iteration: 42828
loss: 1.0304688215255737,grad_norm: 0.864739674573683, iteration: 42829
loss: 0.972474992275238,grad_norm: 0.9999991768978745, iteration: 42830
loss: 1.0300971269607544,grad_norm: 0.9999992300305323, iteration: 42831
loss: 0.9883161783218384,grad_norm: 0.8968433296763423, iteration: 42832
loss: 1.010809302330017,grad_norm: 0.9967965657264144, iteration: 42833
loss: 0.998716413974762,grad_norm: 0.9999990551111132, iteration: 42834
loss: 0.9618253707885742,grad_norm: 0.9746596012070408, iteration: 42835
loss: 1.0081841945648193,grad_norm: 0.8459524914969712, iteration: 42836
loss: 1.013045310974121,grad_norm: 0.9130955606051662, iteration: 42837
loss: 0.9559856653213501,grad_norm: 0.9999991838668463, iteration: 42838
loss: 1.0078494548797607,grad_norm: 0.9562451939804029, iteration: 42839
loss: 1.02256178855896,grad_norm: 0.9999990027334885, iteration: 42840
loss: 1.0334713459014893,grad_norm: 0.9999994263729557, iteration: 42841
loss: 0.9966820478439331,grad_norm: 0.9999997573949083, iteration: 42842
loss: 1.0149595737457275,grad_norm: 0.9679771697211983, iteration: 42843
loss: 1.0566426515579224,grad_norm: 0.9999993009714738, iteration: 42844
loss: 0.97930908203125,grad_norm: 0.989571691174033, iteration: 42845
loss: 1.0066983699798584,grad_norm: 0.9999992505039247, iteration: 42846
loss: 1.0163851976394653,grad_norm: 0.944120546317694, iteration: 42847
loss: 1.0002381801605225,grad_norm: 0.9999991576649744, iteration: 42848
loss: 1.0163646936416626,grad_norm: 0.9999989897017499, iteration: 42849
loss: 1.0632026195526123,grad_norm: 0.9999992677768756, iteration: 42850
loss: 0.9987643957138062,grad_norm: 0.865381435827853, iteration: 42851
loss: 0.9932719469070435,grad_norm: 0.9202331590728944, iteration: 42852
loss: 0.9984421730041504,grad_norm: 0.9999998378946863, iteration: 42853
loss: 0.9832212924957275,grad_norm: 0.8885240132112149, iteration: 42854
loss: 1.0247055292129517,grad_norm: 0.9999995398511848, iteration: 42855
loss: 0.9861624836921692,grad_norm: 0.9543281113225441, iteration: 42856
loss: 0.9863128066062927,grad_norm: 0.9999992154402699, iteration: 42857
loss: 1.018168330192566,grad_norm: 0.9999991769434208, iteration: 42858
loss: 1.0342758893966675,grad_norm: 0.8074079117312288, iteration: 42859
loss: 1.0016512870788574,grad_norm: 0.9564247016290844, iteration: 42860
loss: 1.0160232782363892,grad_norm: 0.9718998251586078, iteration: 42861
loss: 1.0035390853881836,grad_norm: 0.9999998923557555, iteration: 42862
loss: 0.9804222583770752,grad_norm: 0.9999991135796801, iteration: 42863
loss: 0.9826059937477112,grad_norm: 0.8748320220054743, iteration: 42864
loss: 1.0012545585632324,grad_norm: 0.9961123122449254, iteration: 42865
loss: 0.9769577383995056,grad_norm: 0.9999989639509107, iteration: 42866
loss: 1.0251580476760864,grad_norm: 0.999999212754092, iteration: 42867
loss: 1.0077849626541138,grad_norm: 0.9596610046461571, iteration: 42868
loss: 1.0021781921386719,grad_norm: 0.9335915062172159, iteration: 42869
loss: 1.005942463874817,grad_norm: 0.9999993706695064, iteration: 42870
loss: 0.9877094626426697,grad_norm: 0.9815676404192227, iteration: 42871
loss: 1.0260589122772217,grad_norm: 0.9340833891678458, iteration: 42872
loss: 0.999918520450592,grad_norm: 0.999999269507914, iteration: 42873
loss: 0.9929512143135071,grad_norm: 0.9832202813461125, iteration: 42874
loss: 1.0170700550079346,grad_norm: 0.9999993361247116, iteration: 42875
loss: 1.0156066417694092,grad_norm: 0.951478186953987, iteration: 42876
loss: 1.0225881338119507,grad_norm: 0.9999990936929174, iteration: 42877
loss: 1.0085172653198242,grad_norm: 0.9326541590505221, iteration: 42878
loss: 1.0244252681732178,grad_norm: 0.9457769098410844, iteration: 42879
loss: 1.0048211812973022,grad_norm: 0.8905923349561307, iteration: 42880
loss: 1.0012916326522827,grad_norm: 0.9999991307471241, iteration: 42881
loss: 1.0176892280578613,grad_norm: 0.9999990664611368, iteration: 42882
loss: 1.0216608047485352,grad_norm: 0.9999990696998877, iteration: 42883
loss: 1.000032901763916,grad_norm: 0.9398335766745703, iteration: 42884
loss: 0.9614742398262024,grad_norm: 0.9004033870662854, iteration: 42885
loss: 1.0289965867996216,grad_norm: 0.9999993976864902, iteration: 42886
loss: 1.0204823017120361,grad_norm: 0.8171525684555716, iteration: 42887
loss: 1.0299183130264282,grad_norm: 0.9747761757539506, iteration: 42888
loss: 0.9991459250450134,grad_norm: 0.9292547773851256, iteration: 42889
loss: 0.9975151419639587,grad_norm: 0.963281124614977, iteration: 42890
loss: 1.0217974185943604,grad_norm: 0.9350407067468824, iteration: 42891
loss: 1.019692301750183,grad_norm: 0.9999992091451213, iteration: 42892
loss: 1.0094575881958008,grad_norm: 0.9599443905398325, iteration: 42893
loss: 1.0066633224487305,grad_norm: 0.9130363651346555, iteration: 42894
loss: 0.9894388914108276,grad_norm: 0.9999992925745353, iteration: 42895
loss: 1.0237181186676025,grad_norm: 0.999999291467521, iteration: 42896
loss: 1.0213698148727417,grad_norm: 0.9297664590892563, iteration: 42897
loss: 1.0010498762130737,grad_norm: 0.9999992604583424, iteration: 42898
loss: 0.943824052810669,grad_norm: 0.9999990553688358, iteration: 42899
loss: 1.0401588678359985,grad_norm: 0.7923952905845218, iteration: 42900
loss: 1.0147510766983032,grad_norm: 0.9846300650629468, iteration: 42901
loss: 1.0030027627944946,grad_norm: 0.9999995392401179, iteration: 42902
loss: 0.9899166226387024,grad_norm: 0.8084328326232044, iteration: 42903
loss: 1.001549243927002,grad_norm: 0.9999991320617123, iteration: 42904
loss: 1.00146484375,grad_norm: 0.8871460020193795, iteration: 42905
loss: 0.9877931475639343,grad_norm: 0.9999991168833541, iteration: 42906
loss: 1.1276224851608276,grad_norm: 0.9999999867202847, iteration: 42907
loss: 1.0142420530319214,grad_norm: 0.999998974204437, iteration: 42908
loss: 1.0322238206863403,grad_norm: 0.9999991233830232, iteration: 42909
loss: 1.0017297267913818,grad_norm: 0.8994583894960104, iteration: 42910
loss: 0.9639970660209656,grad_norm: 0.9999991798136898, iteration: 42911
loss: 1.0114020109176636,grad_norm: 0.9798633596659584, iteration: 42912
loss: 1.0017331838607788,grad_norm: 0.9348987901291002, iteration: 42913
loss: 1.0403664112091064,grad_norm: 0.9999994791821317, iteration: 42914
loss: 1.0539131164550781,grad_norm: 0.9999992620986814, iteration: 42915
loss: 1.0076528787612915,grad_norm: 0.9999997565524558, iteration: 42916
loss: 0.9661055207252502,grad_norm: 0.9999991160021432, iteration: 42917
loss: 1.0053184032440186,grad_norm: 0.835028305533973, iteration: 42918
loss: 0.9615525007247925,grad_norm: 0.9616919824413594, iteration: 42919
loss: 1.0064951181411743,grad_norm: 0.9999995152285225, iteration: 42920
loss: 1.022503137588501,grad_norm: 0.8780067916381911, iteration: 42921
loss: 1.0022943019866943,grad_norm: 0.9392013660186737, iteration: 42922
loss: 0.9769753813743591,grad_norm: 0.9612150056194639, iteration: 42923
loss: 1.0035547018051147,grad_norm: 0.838811931655444, iteration: 42924
loss: 1.0005851984024048,grad_norm: 0.9090856689887832, iteration: 42925
loss: 1.0260878801345825,grad_norm: 0.9329552757017212, iteration: 42926
loss: 0.9830595254898071,grad_norm: 0.9478937075307691, iteration: 42927
loss: 1.0149199962615967,grad_norm: 0.9999994617673726, iteration: 42928
loss: 1.024198055267334,grad_norm: 0.9940470959475872, iteration: 42929
loss: 1.034717321395874,grad_norm: 0.9620758590272476, iteration: 42930
loss: 1.0235463380813599,grad_norm: 0.9847803549013645, iteration: 42931
loss: 1.0073047876358032,grad_norm: 0.9275612254439334, iteration: 42932
loss: 1.0066484212875366,grad_norm: 0.9995550976823885, iteration: 42933
loss: 0.9629121422767639,grad_norm: 0.9999996977518287, iteration: 42934
loss: 1.0320518016815186,grad_norm: 0.9999991206044706, iteration: 42935
loss: 0.9746313691139221,grad_norm: 0.9999990585646658, iteration: 42936
loss: 1.0188654661178589,grad_norm: 0.9999998809876415, iteration: 42937
loss: 1.0076946020126343,grad_norm: 0.8495006125845582, iteration: 42938
loss: 1.0199960470199585,grad_norm: 0.9999990684407789, iteration: 42939
loss: 0.9906149506568909,grad_norm: 0.9020629867170704, iteration: 42940
loss: 1.0541437864303589,grad_norm: 0.9999996136504448, iteration: 42941
loss: 1.007979393005371,grad_norm: 0.9799404195535438, iteration: 42942
loss: 1.0013949871063232,grad_norm: 0.9999990060688148, iteration: 42943
loss: 1.044712781906128,grad_norm: 0.9999996502427654, iteration: 42944
loss: 1.0480321645736694,grad_norm: 0.9743386687711901, iteration: 42945
loss: 1.0500340461730957,grad_norm: 0.9999997111565162, iteration: 42946
loss: 1.0670033693313599,grad_norm: 0.999999723727128, iteration: 42947
loss: 0.9885534644126892,grad_norm: 0.9509032493794695, iteration: 42948
loss: 1.0276057720184326,grad_norm: 0.9999990316903365, iteration: 42949
loss: 0.9926743507385254,grad_norm: 0.887437549680548, iteration: 42950
loss: 1.053107738494873,grad_norm: 0.9999991721917546, iteration: 42951
loss: 1.025430679321289,grad_norm: 0.9999995045475158, iteration: 42952
loss: 0.9967104196548462,grad_norm: 0.9999989759650114, iteration: 42953
loss: 1.0241819620132446,grad_norm: 0.9999992245065839, iteration: 42954
loss: 1.0254464149475098,grad_norm: 0.9519787907840462, iteration: 42955
loss: 1.0007379055023193,grad_norm: 0.8550522009447753, iteration: 42956
loss: 1.0127440690994263,grad_norm: 0.9999991900774237, iteration: 42957
loss: 1.0026113986968994,grad_norm: 0.9263604305745129, iteration: 42958
loss: 1.01389741897583,grad_norm: 0.9783848093560363, iteration: 42959
loss: 1.0093423128128052,grad_norm: 0.9634384747459475, iteration: 42960
loss: 0.9906377792358398,grad_norm: 0.874877054852438, iteration: 42961
loss: 0.9938991665840149,grad_norm: 0.9999991193709011, iteration: 42962
loss: 1.0168641805648804,grad_norm: 0.9999990687056394, iteration: 42963
loss: 1.0219535827636719,grad_norm: 0.9999992250342067, iteration: 42964
loss: 1.0220240354537964,grad_norm: 0.9954117212828771, iteration: 42965
loss: 0.9851779937744141,grad_norm: 0.8664140581551957, iteration: 42966
loss: 1.0328933000564575,grad_norm: 0.9489068930420043, iteration: 42967
loss: 1.0536866188049316,grad_norm: 0.9999998789044544, iteration: 42968
loss: 1.0054407119750977,grad_norm: 0.931749275485466, iteration: 42969
loss: 1.0505949258804321,grad_norm: 0.9999994109193197, iteration: 42970
loss: 1.0152983665466309,grad_norm: 0.929751252722833, iteration: 42971
loss: 1.0080021619796753,grad_norm: 0.9999994506443936, iteration: 42972
loss: 0.9905970692634583,grad_norm: 0.8608904494907683, iteration: 42973
loss: 0.9814128279685974,grad_norm: 0.9830787641246641, iteration: 42974
loss: 0.9986123442649841,grad_norm: 0.9999991089418524, iteration: 42975
loss: 1.0443871021270752,grad_norm: 0.9999996528801287, iteration: 42976
loss: 1.0101110935211182,grad_norm: 0.999999055345075, iteration: 42977
loss: 1.0639735460281372,grad_norm: 0.9999999215197514, iteration: 42978
loss: 0.9863445162773132,grad_norm: 0.9999990387023456, iteration: 42979
loss: 1.0059516429901123,grad_norm: 0.9999994096539913, iteration: 42980
loss: 0.9990788698196411,grad_norm: 0.9999990226108113, iteration: 42981
loss: 1.0367584228515625,grad_norm: 0.9999991433423271, iteration: 42982
loss: 1.0102735757827759,grad_norm: 0.9999992127697018, iteration: 42983
loss: 1.0592409372329712,grad_norm: 0.8569907460806495, iteration: 42984
loss: 0.9913203716278076,grad_norm: 0.9394186758414997, iteration: 42985
loss: 1.0178422927856445,grad_norm: 0.8872201679275448, iteration: 42986
loss: 1.0250556468963623,grad_norm: 0.92379130526042, iteration: 42987
loss: 0.983338475227356,grad_norm: 0.9019239615093311, iteration: 42988
loss: 0.9844744205474854,grad_norm: 0.9261886141146672, iteration: 42989
loss: 1.0178958177566528,grad_norm: 0.999999248783861, iteration: 42990
loss: 1.0441206693649292,grad_norm: 0.9999996635458783, iteration: 42991
loss: 0.9919420480728149,grad_norm: 0.9999990914425619, iteration: 42992
loss: 0.9700214862823486,grad_norm: 0.9654335532152168, iteration: 42993
loss: 1.0107945203781128,grad_norm: 0.9887144839747498, iteration: 42994
loss: 1.047677755355835,grad_norm: 0.9999990654273322, iteration: 42995
loss: 1.0108250379562378,grad_norm: 0.9694565581431828, iteration: 42996
loss: 0.9771198630332947,grad_norm: 0.9350046828067388, iteration: 42997
loss: 1.0331463813781738,grad_norm: 0.981748077564105, iteration: 42998
loss: 0.9923214912414551,grad_norm: 0.999999119061697, iteration: 42999
loss: 1.0146056413650513,grad_norm: 0.9989119781593983, iteration: 43000
loss: 0.9892714619636536,grad_norm: 0.927891636022917, iteration: 43001
loss: 1.0078295469284058,grad_norm: 0.9999990393240525, iteration: 43002
loss: 1.0811423063278198,grad_norm: 0.9999999138217482, iteration: 43003
loss: 1.0215401649475098,grad_norm: 0.9483209459255965, iteration: 43004
loss: 1.0386273860931396,grad_norm: 0.9999991927002281, iteration: 43005
loss: 0.9804390072822571,grad_norm: 0.7847532593023671, iteration: 43006
loss: 1.0170598030090332,grad_norm: 0.9247311711957209, iteration: 43007
loss: 0.9989839196205139,grad_norm: 0.9948236957806028, iteration: 43008
loss: 1.0032926797866821,grad_norm: 0.9999993204357663, iteration: 43009
loss: 0.9869914650917053,grad_norm: 0.9999991424821402, iteration: 43010
loss: 1.024172067642212,grad_norm: 0.9571614884080192, iteration: 43011
loss: 0.9737277030944824,grad_norm: 0.9714387803996889, iteration: 43012
loss: 1.0640201568603516,grad_norm: 0.999999368318237, iteration: 43013
loss: 0.9630431532859802,grad_norm: 0.9570604117920881, iteration: 43014
loss: 1.0305149555206299,grad_norm: 0.999999115796371, iteration: 43015
loss: 1.0155326128005981,grad_norm: 0.9999990732864764, iteration: 43016
loss: 1.01544189453125,grad_norm: 0.9111395651439882, iteration: 43017
loss: 0.9927765727043152,grad_norm: 0.9999992423371683, iteration: 43018
loss: 0.9765743017196655,grad_norm: 0.9999990923462901, iteration: 43019
loss: 0.9873583316802979,grad_norm: 0.9999990588720997, iteration: 43020
loss: 1.005170226097107,grad_norm: 0.9999992749073701, iteration: 43021
loss: 1.008394718170166,grad_norm: 0.9999989917579446, iteration: 43022
loss: 1.0025532245635986,grad_norm: 0.9999991382965675, iteration: 43023
loss: 0.95124351978302,grad_norm: 0.9216810357777059, iteration: 43024
loss: 1.0922361612319946,grad_norm: 0.9999991260244314, iteration: 43025
loss: 1.021506667137146,grad_norm: 0.9623210694447666, iteration: 43026
loss: 0.986181914806366,grad_norm: 0.9999992640747458, iteration: 43027
loss: 0.9965518712997437,grad_norm: 0.9999991290863226, iteration: 43028
loss: 1.0144565105438232,grad_norm: 0.9999989094774736, iteration: 43029
loss: 0.9874099493026733,grad_norm: 0.8865783010178668, iteration: 43030
loss: 1.014133334159851,grad_norm: 0.999999286219686, iteration: 43031
loss: 0.9997361302375793,grad_norm: 0.9281617579346587, iteration: 43032
loss: 0.9772919416427612,grad_norm: 0.9999990792849566, iteration: 43033
loss: 1.0174094438552856,grad_norm: 0.9999994372503084, iteration: 43034
loss: 0.9823627471923828,grad_norm: 0.9254297243230908, iteration: 43035
loss: 1.0292919874191284,grad_norm: 0.9999992454601339, iteration: 43036
loss: 1.020758867263794,grad_norm: 0.9999992214022795, iteration: 43037
loss: 1.020735740661621,grad_norm: 0.9574044783558183, iteration: 43038
loss: 1.0468556880950928,grad_norm: 0.9999994326874402, iteration: 43039
loss: 1.0195627212524414,grad_norm: 0.9999991449768011, iteration: 43040
loss: 1.02565336227417,grad_norm: 0.9999988601921255, iteration: 43041
loss: 0.9822115898132324,grad_norm: 0.9999991469160092, iteration: 43042
loss: 0.9966204762458801,grad_norm: 0.9999991391859423, iteration: 43043
loss: 1.0106761455535889,grad_norm: 0.8163738662750106, iteration: 43044
loss: 1.0032399892807007,grad_norm: 0.9999999062747068, iteration: 43045
loss: 0.9584348797798157,grad_norm: 0.9999990986037028, iteration: 43046
loss: 1.0175970792770386,grad_norm: 0.999999073927639, iteration: 43047
loss: 0.9931696653366089,grad_norm: 0.999999179664111, iteration: 43048
loss: 0.9804564118385315,grad_norm: 0.8127020600618199, iteration: 43049
loss: 0.9742705225944519,grad_norm: 0.9631163787558301, iteration: 43050
loss: 1.0124731063842773,grad_norm: 0.9999992246046866, iteration: 43051
loss: 1.0292869806289673,grad_norm: 0.9999991769702907, iteration: 43052
loss: 0.9863165616989136,grad_norm: 0.857417762381941, iteration: 43053
loss: 0.9722899794578552,grad_norm: 0.9326971778627476, iteration: 43054
loss: 1.011299967765808,grad_norm: 0.9235681084411197, iteration: 43055
loss: 0.9537707567214966,grad_norm: 0.9222103592191165, iteration: 43056
loss: 1.0026366710662842,grad_norm: 0.999999264006369, iteration: 43057
loss: 0.958651602268219,grad_norm: 0.9638736660807827, iteration: 43058
loss: 0.9768757820129395,grad_norm: 0.9999992194804407, iteration: 43059
loss: 0.9905865788459778,grad_norm: 0.9999990024507273, iteration: 43060
loss: 1.0147407054901123,grad_norm: 0.9652971548182832, iteration: 43061
loss: 1.0105782747268677,grad_norm: 0.887304534924924, iteration: 43062
loss: 1.0135987997055054,grad_norm: 0.8928240980978502, iteration: 43063
loss: 0.9919228553771973,grad_norm: 0.9473185248363031, iteration: 43064
loss: 0.9808828234672546,grad_norm: 0.877544764572379, iteration: 43065
loss: 1.0373800992965698,grad_norm: 0.9999991610428939, iteration: 43066
loss: 1.0295730829238892,grad_norm: 0.9942474307309677, iteration: 43067
loss: 0.9970574975013733,grad_norm: 0.956077526741879, iteration: 43068
loss: 0.9916126728057861,grad_norm: 0.9163801800551165, iteration: 43069
loss: 1.0432287454605103,grad_norm: 0.9288197354801369, iteration: 43070
loss: 1.033678412437439,grad_norm: 0.9999998826433208, iteration: 43071
loss: 1.0268796682357788,grad_norm: 0.8709777754412492, iteration: 43072
loss: 0.9998461604118347,grad_norm: 0.9999991801361281, iteration: 43073
loss: 1.0187453031539917,grad_norm: 0.8867761671087292, iteration: 43074
loss: 1.040920376777649,grad_norm: 0.9664329474557588, iteration: 43075
loss: 1.0298669338226318,grad_norm: 0.999999167115462, iteration: 43076
loss: 1.0232019424438477,grad_norm: 0.9728036582525063, iteration: 43077
loss: 1.0273600816726685,grad_norm: 0.9999990384169635, iteration: 43078
loss: 0.9878897070884705,grad_norm: 0.9999992471970832, iteration: 43079
loss: 1.0053106546401978,grad_norm: 0.9999990634264183, iteration: 43080
loss: 1.0116914510726929,grad_norm: 0.919695354679107, iteration: 43081
loss: 1.0180811882019043,grad_norm: 0.999998997775501, iteration: 43082
loss: 1.018594741821289,grad_norm: 0.9999991444813672, iteration: 43083
loss: 0.9737803936004639,grad_norm: 0.8128544340441111, iteration: 43084
loss: 1.0408082008361816,grad_norm: 0.9999995355109612, iteration: 43085
loss: 0.9445580244064331,grad_norm: 0.9423378618151659, iteration: 43086
loss: 0.9890058040618896,grad_norm: 0.953034806435442, iteration: 43087
loss: 1.0089759826660156,grad_norm: 0.9999996495726134, iteration: 43088
loss: 0.9738258123397827,grad_norm: 0.9999990500440687, iteration: 43089
loss: 0.9765204787254333,grad_norm: 0.9999990820944494, iteration: 43090
loss: 1.00348699092865,grad_norm: 0.9999990332398495, iteration: 43091
loss: 0.9847626686096191,grad_norm: 0.9268769861914693, iteration: 43092
loss: 0.9845183491706848,grad_norm: 0.9858947478066554, iteration: 43093
loss: 1.0083582401275635,grad_norm: 0.9999991525555868, iteration: 43094
loss: 1.0183161497116089,grad_norm: 0.9999998493157796, iteration: 43095
loss: 0.9672046303749084,grad_norm: 0.9374239164677076, iteration: 43096
loss: 0.9650431275367737,grad_norm: 0.9970233993662158, iteration: 43097
loss: 1.0066232681274414,grad_norm: 0.9999996067057602, iteration: 43098
loss: 0.9886132478713989,grad_norm: 0.9767918043436933, iteration: 43099
loss: 0.9877440333366394,grad_norm: 0.999999272424333, iteration: 43100
loss: 1.0276671648025513,grad_norm: 0.9999990412335696, iteration: 43101
loss: 0.9833893775939941,grad_norm: 0.9999991519923672, iteration: 43102
loss: 0.9920251965522766,grad_norm: 0.7288079438947254, iteration: 43103
loss: 0.9951199889183044,grad_norm: 0.9999990352099941, iteration: 43104
loss: 0.9817391037940979,grad_norm: 0.8834870556201575, iteration: 43105
loss: 1.0018094778060913,grad_norm: 0.9999990947170948, iteration: 43106
loss: 0.9826995730400085,grad_norm: 0.9999991733599944, iteration: 43107
loss: 1.0007518529891968,grad_norm: 0.9999991321357883, iteration: 43108
loss: 1.0170698165893555,grad_norm: 0.999999193192041, iteration: 43109
loss: 1.1021240949630737,grad_norm: 0.999999648126436, iteration: 43110
loss: 1.035082459449768,grad_norm: 0.9999994779253056, iteration: 43111
loss: 1.0159696340560913,grad_norm: 0.9999992017200626, iteration: 43112
loss: 1.0609697103500366,grad_norm: 0.9999994514983098, iteration: 43113
loss: 1.0144401788711548,grad_norm: 0.9004407116814981, iteration: 43114
loss: 1.0227528810501099,grad_norm: 0.9999990965999993, iteration: 43115
loss: 1.0352773666381836,grad_norm: 0.9999993974387974, iteration: 43116
loss: 1.0138808488845825,grad_norm: 0.8812196008081403, iteration: 43117
loss: 1.012842059135437,grad_norm: 0.895794485297137, iteration: 43118
loss: 1.01508629322052,grad_norm: 0.8134735463627654, iteration: 43119
loss: 1.0076199769973755,grad_norm: 0.912518886643322, iteration: 43120
loss: 1.0059834718704224,grad_norm: 0.9999991527450809, iteration: 43121
loss: 1.1270241737365723,grad_norm: 0.9999994599556058, iteration: 43122
loss: 1.0104008913040161,grad_norm: 0.9999993918603568, iteration: 43123
loss: 0.9530587792396545,grad_norm: 0.9999991138859, iteration: 43124
loss: 1.0210148096084595,grad_norm: 0.9424900595729082, iteration: 43125
loss: 0.9937869906425476,grad_norm: 0.8173478670068578, iteration: 43126
loss: 1.0235717296600342,grad_norm: 0.99999929253854, iteration: 43127
loss: 1.0242385864257812,grad_norm: 0.9999993557227242, iteration: 43128
loss: 1.0137733221054077,grad_norm: 0.9999989494656716, iteration: 43129
loss: 0.9997332096099854,grad_norm: 0.9057230411853703, iteration: 43130
loss: 0.9883207082748413,grad_norm: 0.9999990771778704, iteration: 43131
loss: 1.0215551853179932,grad_norm: 0.9999996654255183, iteration: 43132
loss: 1.0033220052719116,grad_norm: 0.9999992426808522, iteration: 43133
loss: 1.0130209922790527,grad_norm: 0.9999991224722806, iteration: 43134
loss: 1.02086341381073,grad_norm: 0.8893389437184447, iteration: 43135
loss: 1.0015088319778442,grad_norm: 0.9999990490939898, iteration: 43136
loss: 0.9692200422286987,grad_norm: 0.9999992396215174, iteration: 43137
loss: 0.9636944532394409,grad_norm: 0.9365356336409276, iteration: 43138
loss: 1.027388334274292,grad_norm: 0.9999993090097714, iteration: 43139
loss: 1.0128310918807983,grad_norm: 0.9999991149724746, iteration: 43140
loss: 1.0500311851501465,grad_norm: 0.999999090415929, iteration: 43141
loss: 1.0015077590942383,grad_norm: 0.9999990679231919, iteration: 43142
loss: 1.02750825881958,grad_norm: 0.999999096669335, iteration: 43143
loss: 1.0297890901565552,grad_norm: 0.897802208633786, iteration: 43144
loss: 0.9903525114059448,grad_norm: 0.9999992067209955, iteration: 43145
loss: 1.0340449810028076,grad_norm: 0.9999990637836813, iteration: 43146
loss: 1.0258535146713257,grad_norm: 0.9999990838610127, iteration: 43147
loss: 0.9883151650428772,grad_norm: 0.9999989843741299, iteration: 43148
loss: 1.0014629364013672,grad_norm: 0.9999989656866769, iteration: 43149
loss: 1.0154392719268799,grad_norm: 0.9894272444850134, iteration: 43150
loss: 1.0002923011779785,grad_norm: 0.9242096838731736, iteration: 43151
loss: 1.0243823528289795,grad_norm: 0.9999994936420501, iteration: 43152
loss: 0.9957851767539978,grad_norm: 0.9733600971606129, iteration: 43153
loss: 1.011006236076355,grad_norm: 0.8756857870424588, iteration: 43154
loss: 1.0252686738967896,grad_norm: 0.9999990617639763, iteration: 43155
loss: 1.0179842710494995,grad_norm: 0.8396244532271759, iteration: 43156
loss: 0.9761117696762085,grad_norm: 0.9999990209080306, iteration: 43157
loss: 1.020523190498352,grad_norm: 0.9999991986750061, iteration: 43158
loss: 0.9711196422576904,grad_norm: 0.8796426524613175, iteration: 43159
loss: 1.029133915901184,grad_norm: 0.9999991509626044, iteration: 43160
loss: 0.9888527989387512,grad_norm: 0.949114130761261, iteration: 43161
loss: 1.0140494108200073,grad_norm: 0.9999996505044596, iteration: 43162
loss: 0.9777492880821228,grad_norm: 0.8110182175440946, iteration: 43163
loss: 0.9910064935684204,grad_norm: 0.9999991070409069, iteration: 43164
loss: 0.9988386034965515,grad_norm: 0.9999992147355591, iteration: 43165
loss: 1.0329952239990234,grad_norm: 0.947629470080642, iteration: 43166
loss: 1.0116708278656006,grad_norm: 0.9999990830253382, iteration: 43167
loss: 1.0284061431884766,grad_norm: 0.9999990431140795, iteration: 43168
loss: 0.9852235913276672,grad_norm: 0.9330191551049317, iteration: 43169
loss: 1.0047986507415771,grad_norm: 0.9999993447033556, iteration: 43170
loss: 0.9960834980010986,grad_norm: 0.9082890389394764, iteration: 43171
loss: 1.014814019203186,grad_norm: 0.9248239683332747, iteration: 43172
loss: 1.0202486515045166,grad_norm: 0.9959932467688322, iteration: 43173
loss: 0.9981451630592346,grad_norm: 0.9573050546113099, iteration: 43174
loss: 0.944190502166748,grad_norm: 0.9999990918710042, iteration: 43175
loss: 1.0329643487930298,grad_norm: 0.9878413366723161, iteration: 43176
loss: 0.993387758731842,grad_norm: 0.9999996946358524, iteration: 43177
loss: 1.0220956802368164,grad_norm: 0.9999991588998606, iteration: 43178
loss: 1.0290709733963013,grad_norm: 0.9999998390465844, iteration: 43179
loss: 1.0243346691131592,grad_norm: 0.828952875258259, iteration: 43180
loss: 0.9867727756500244,grad_norm: 0.8919741845269534, iteration: 43181
loss: 0.9854071736335754,grad_norm: 0.9999995282729361, iteration: 43182
loss: 1.009676456451416,grad_norm: 0.9999991171683777, iteration: 43183
loss: 0.9733585119247437,grad_norm: 0.9231450138830108, iteration: 43184
loss: 1.0042896270751953,grad_norm: 0.9230183059490709, iteration: 43185
loss: 0.9922030568122864,grad_norm: 0.9999990937027873, iteration: 43186
loss: 0.9683998823165894,grad_norm: 0.945331210690106, iteration: 43187
loss: 0.9856228232383728,grad_norm: 0.9605755789852256, iteration: 43188
loss: 0.9841334819793701,grad_norm: 0.9999994197481873, iteration: 43189
loss: 1.0056418180465698,grad_norm: 0.8227262235842537, iteration: 43190
loss: 0.9925358891487122,grad_norm: 0.9999990879731709, iteration: 43191
loss: 1.0137077569961548,grad_norm: 0.8112519110504934, iteration: 43192
loss: 0.9889286160469055,grad_norm: 0.9999991477663149, iteration: 43193
loss: 1.0547178983688354,grad_norm: 0.9999997707668847, iteration: 43194
loss: 0.979414701461792,grad_norm: 0.9137985510323563, iteration: 43195
loss: 0.9771719574928284,grad_norm: 0.999999135035502, iteration: 43196
loss: 0.9908058047294617,grad_norm: 0.9534857354363846, iteration: 43197
loss: 1.0212632417678833,grad_norm: 0.8748331608365992, iteration: 43198
loss: 1.0267741680145264,grad_norm: 0.8589977866838159, iteration: 43199
loss: 0.9897489547729492,grad_norm: 0.9999990852245929, iteration: 43200
loss: 1.0043150186538696,grad_norm: 0.811776817847538, iteration: 43201
loss: 1.0510188341140747,grad_norm: 0.9999998416363687, iteration: 43202
loss: 1.0569394826889038,grad_norm: 0.999999078476638, iteration: 43203
loss: 1.072972297668457,grad_norm: 0.9999990779118983, iteration: 43204
loss: 1.004928708076477,grad_norm: 0.9934544948904537, iteration: 43205
loss: 1.0370793342590332,grad_norm: 0.8216510388910719, iteration: 43206
loss: 0.998755693435669,grad_norm: 0.9999990465043759, iteration: 43207
loss: 1.0059163570404053,grad_norm: 0.9999991126204777, iteration: 43208
loss: 1.0018587112426758,grad_norm: 0.9999990790162926, iteration: 43209
loss: 1.0135418176651,grad_norm: 0.8890113174296932, iteration: 43210
loss: 1.0015050172805786,grad_norm: 0.9999991393039488, iteration: 43211
loss: 0.9887840747833252,grad_norm: 0.9748433164476641, iteration: 43212
loss: 1.0332951545715332,grad_norm: 0.9999992724557474, iteration: 43213
loss: 1.0506635904312134,grad_norm: 0.9999991849926838, iteration: 43214
loss: 1.007529377937317,grad_norm: 0.9999991179968396, iteration: 43215
loss: 1.0165200233459473,grad_norm: 0.9037400409826398, iteration: 43216
loss: 0.996602475643158,grad_norm: 0.9999990912980155, iteration: 43217
loss: 0.9858172535896301,grad_norm: 0.9999993503656218, iteration: 43218
loss: 0.9942817687988281,grad_norm: 0.8361901234635396, iteration: 43219
loss: 1.016566276550293,grad_norm: 0.9999991719642778, iteration: 43220
loss: 1.002068042755127,grad_norm: 0.9999990153434544, iteration: 43221
loss: 1.006231427192688,grad_norm: 0.8149847813606693, iteration: 43222
loss: 0.9929438233375549,grad_norm: 0.860877893577287, iteration: 43223
loss: 0.9729853272438049,grad_norm: 0.9999991885529783, iteration: 43224
loss: 1.0317270755767822,grad_norm: 0.9497320530268013, iteration: 43225
loss: 1.0047025680541992,grad_norm: 0.956258390302259, iteration: 43226
loss: 1.0015062093734741,grad_norm: 0.9543597335875509, iteration: 43227
loss: 0.9854100942611694,grad_norm: 0.9057334696286842, iteration: 43228
loss: 0.9521952867507935,grad_norm: 0.9350093465373212, iteration: 43229
loss: 0.9970100522041321,grad_norm: 0.9057221553111223, iteration: 43230
loss: 1.0122122764587402,grad_norm: 0.9999990545808, iteration: 43231
loss: 1.018249750137329,grad_norm: 0.9086983156248797, iteration: 43232
loss: 1.0230036973953247,grad_norm: 0.9999991666614048, iteration: 43233
loss: 0.9749244451522827,grad_norm: 0.9610023604239214, iteration: 43234
loss: 1.0653172731399536,grad_norm: 0.999999090926826, iteration: 43235
loss: 0.9954402446746826,grad_norm: 0.9628026738799791, iteration: 43236
loss: 0.9965487718582153,grad_norm: 0.9650992151300951, iteration: 43237
loss: 0.9980611801147461,grad_norm: 0.9999991660838683, iteration: 43238
loss: 0.9835795164108276,grad_norm: 0.9999992115306583, iteration: 43239
loss: 0.9999163150787354,grad_norm: 0.9999990215022168, iteration: 43240
loss: 0.993335485458374,grad_norm: 0.999999275063742, iteration: 43241
loss: 1.03987717628479,grad_norm: 0.9781235094125175, iteration: 43242
loss: 0.9738763570785522,grad_norm: 0.9898357957031694, iteration: 43243
loss: 1.0074328184127808,grad_norm: 0.9999991931164344, iteration: 43244
loss: 0.9874879717826843,grad_norm: 0.9910184172965194, iteration: 43245
loss: 1.0024833679199219,grad_norm: 0.9468832766456947, iteration: 43246
loss: 1.0118964910507202,grad_norm: 0.9999991420082878, iteration: 43247
loss: 1.028357744216919,grad_norm: 0.9999993748130663, iteration: 43248
loss: 0.9764126539230347,grad_norm: 0.9264064035242292, iteration: 43249
loss: 1.0318180322647095,grad_norm: 0.9999990460032946, iteration: 43250
loss: 1.0192795991897583,grad_norm: 0.999999255121358, iteration: 43251
loss: 1.010914921760559,grad_norm: 0.999999135193389, iteration: 43252
loss: 1.0313526391983032,grad_norm: 0.9999990548328226, iteration: 43253
loss: 1.0109812021255493,grad_norm: 0.9999992160879718, iteration: 43254
loss: 0.9690455794334412,grad_norm: 0.9287028599929325, iteration: 43255
loss: 0.9752227663993835,grad_norm: 0.8951416806413374, iteration: 43256
loss: 1.026111125946045,grad_norm: 0.999999152875298, iteration: 43257
loss: 1.0179452896118164,grad_norm: 0.9999990522410172, iteration: 43258
loss: 1.0119801759719849,grad_norm: 0.9681529263461435, iteration: 43259
loss: 1.0059915781021118,grad_norm: 0.942130974261679, iteration: 43260
loss: 1.0084271430969238,grad_norm: 0.9482104995731389, iteration: 43261
loss: 1.007951259613037,grad_norm: 0.9999997962127473, iteration: 43262
loss: 0.9819438457489014,grad_norm: 0.9737176834215706, iteration: 43263
loss: 1.0034505128860474,grad_norm: 0.9999992138193733, iteration: 43264
loss: 0.9802653789520264,grad_norm: 0.8436176901480519, iteration: 43265
loss: 1.051505446434021,grad_norm: 0.9999993645676996, iteration: 43266
loss: 0.9856042265892029,grad_norm: 0.9999992424566531, iteration: 43267
loss: 1.0040857791900635,grad_norm: 0.9179647828593014, iteration: 43268
loss: 0.9956721067428589,grad_norm: 0.999999124689421, iteration: 43269
loss: 1.019860029220581,grad_norm: 0.9999991604965379, iteration: 43270
loss: 1.0022103786468506,grad_norm: 0.7952508725876435, iteration: 43271
loss: 1.0306973457336426,grad_norm: 0.9999990907785815, iteration: 43272
loss: 1.0052740573883057,grad_norm: 0.8477789102575789, iteration: 43273
loss: 1.0227359533309937,grad_norm: 0.9999991893803563, iteration: 43274
loss: 1.0000351667404175,grad_norm: 0.9999992382256847, iteration: 43275
loss: 1.0349546670913696,grad_norm: 0.9999991773787499, iteration: 43276
loss: 0.9988066554069519,grad_norm: 0.9539195948401403, iteration: 43277
loss: 1.019357442855835,grad_norm: 0.9999990687770075, iteration: 43278
loss: 0.970539391040802,grad_norm: 0.9999991185404239, iteration: 43279
loss: 0.9638129472732544,grad_norm: 0.9999992998822158, iteration: 43280
loss: 1.0118114948272705,grad_norm: 0.9999991215429106, iteration: 43281
loss: 1.0075232982635498,grad_norm: 0.9167135065685496, iteration: 43282
loss: 1.008556604385376,grad_norm: 0.8807816541822447, iteration: 43283
loss: 0.9770224690437317,grad_norm: 0.9999990161436559, iteration: 43284
loss: 0.9910981059074402,grad_norm: 0.9999992523562978, iteration: 43285
loss: 1.0184071063995361,grad_norm: 0.888170296592141, iteration: 43286
loss: 0.9609277248382568,grad_norm: 0.8182310324927912, iteration: 43287
loss: 1.0259301662445068,grad_norm: 0.8820288654863571, iteration: 43288
loss: 1.0250115394592285,grad_norm: 0.8853373797794447, iteration: 43289
loss: 0.9606297612190247,grad_norm: 0.9999990789924627, iteration: 43290
loss: 1.0622648000717163,grad_norm: 0.9867974885840285, iteration: 43291
loss: 1.0048691034317017,grad_norm: 0.9940228008049251, iteration: 43292
loss: 0.9986040592193604,grad_norm: 0.9878461155249276, iteration: 43293
loss: 0.9922468662261963,grad_norm: 0.7799758655659798, iteration: 43294
loss: 0.9958630204200745,grad_norm: 0.9999992082126345, iteration: 43295
loss: 1.0198578834533691,grad_norm: 0.9999993046655453, iteration: 43296
loss: 1.0538133382797241,grad_norm: 0.9999996959311176, iteration: 43297
loss: 0.9743718504905701,grad_norm: 0.9999989859262692, iteration: 43298
loss: 0.9923719167709351,grad_norm: 0.9999991916773581, iteration: 43299
loss: 1.0270533561706543,grad_norm: 0.9802351830051489, iteration: 43300
loss: 1.0480284690856934,grad_norm: 0.9999990947462525, iteration: 43301
loss: 1.003556728363037,grad_norm: 0.9999993535526385, iteration: 43302
loss: 1.0098440647125244,grad_norm: 0.9999990088182164, iteration: 43303
loss: 0.9915406703948975,grad_norm: 0.9999992702240647, iteration: 43304
loss: 1.0039421319961548,grad_norm: 0.8907227491703826, iteration: 43305
loss: 1.0051333904266357,grad_norm: 0.9670025506499415, iteration: 43306
loss: 0.9813007116317749,grad_norm: 0.9999991087320581, iteration: 43307
loss: 0.9879847764968872,grad_norm: 0.9999181300178412, iteration: 43308
loss: 0.9832736253738403,grad_norm: 0.8912055528809578, iteration: 43309
loss: 0.9761233329772949,grad_norm: 0.9999992080802526, iteration: 43310
loss: 0.9869402647018433,grad_norm: 0.999999124603073, iteration: 43311
loss: 1.0040150880813599,grad_norm: 0.9456024415303187, iteration: 43312
loss: 1.044000506401062,grad_norm: 0.9999991453508376, iteration: 43313
loss: 0.9855300784111023,grad_norm: 0.9761167871359523, iteration: 43314
loss: 1.0293468236923218,grad_norm: 0.9999991371631474, iteration: 43315
loss: 1.0198345184326172,grad_norm: 0.9999992760380614, iteration: 43316
loss: 1.0223535299301147,grad_norm: 0.9999990560759616, iteration: 43317
loss: 0.9737513661384583,grad_norm: 0.9999992185123818, iteration: 43318
loss: 0.9965863227844238,grad_norm: 0.9999997817088938, iteration: 43319
loss: 0.9636909365653992,grad_norm: 0.7928403965958314, iteration: 43320
loss: 1.0352027416229248,grad_norm: 0.9999992579777703, iteration: 43321
loss: 1.0065560340881348,grad_norm: 0.9999991729534614, iteration: 43322
loss: 0.9872350096702576,grad_norm: 0.8051724939058924, iteration: 43323
loss: 0.9862504601478577,grad_norm: 0.9999990870479213, iteration: 43324
loss: 1.001883625984192,grad_norm: 0.9265278861354905, iteration: 43325
loss: 1.000981092453003,grad_norm: 0.999999183106408, iteration: 43326
loss: 0.9880581498146057,grad_norm: 0.9114152922303468, iteration: 43327
loss: 1.0089187622070312,grad_norm: 0.9999991463254639, iteration: 43328
loss: 1.0137879848480225,grad_norm: 0.9999990804949641, iteration: 43329
loss: 1.0397531986236572,grad_norm: 0.9910052643912706, iteration: 43330
loss: 1.051917552947998,grad_norm: 0.9999990380097549, iteration: 43331
loss: 0.9965820908546448,grad_norm: 0.9999991536823742, iteration: 43332
loss: 1.0016047954559326,grad_norm: 0.98245383135364, iteration: 43333
loss: 0.990291953086853,grad_norm: 0.9981821247790359, iteration: 43334
loss: 0.9973182082176208,grad_norm: 0.9217568671722179, iteration: 43335
loss: 0.9796186089515686,grad_norm: 0.9326954764866839, iteration: 43336
loss: 0.9602221846580505,grad_norm: 0.9999990985302687, iteration: 43337
loss: 0.99777752161026,grad_norm: 0.8944787699061139, iteration: 43338
loss: 0.9649236798286438,grad_norm: 0.9999990079348525, iteration: 43339
loss: 1.0059807300567627,grad_norm: 0.99999903644444, iteration: 43340
loss: 0.9615076184272766,grad_norm: 0.9999992591494873, iteration: 43341
loss: 1.0463550090789795,grad_norm: 0.9999991572765038, iteration: 43342
loss: 1.0276846885681152,grad_norm: 0.9578380816351842, iteration: 43343
loss: 1.007380485534668,grad_norm: 0.9560651651594654, iteration: 43344
loss: 0.9552142024040222,grad_norm: 0.9999991471467623, iteration: 43345
loss: 0.9786773324012756,grad_norm: 0.7533987878839603, iteration: 43346
loss: 0.9928856492042542,grad_norm: 0.9999993149087889, iteration: 43347
loss: 1.0038840770721436,grad_norm: 0.9999992251696794, iteration: 43348
loss: 1.0249919891357422,grad_norm: 0.9999991962038627, iteration: 43349
loss: 1.0128350257873535,grad_norm: 0.9999991354525581, iteration: 43350
loss: 0.9728579521179199,grad_norm: 0.9999990381510204, iteration: 43351
loss: 1.0135531425476074,grad_norm: 0.9818474206680569, iteration: 43352
loss: 0.9595342874526978,grad_norm: 0.8707592439488031, iteration: 43353
loss: 1.0008667707443237,grad_norm: 0.9999990140254021, iteration: 43354
loss: 0.9633340835571289,grad_norm: 0.9092693402538379, iteration: 43355
loss: 0.9891465306282043,grad_norm: 0.8973244177955735, iteration: 43356
loss: 1.0341187715530396,grad_norm: 0.9999990756638024, iteration: 43357
loss: 0.9884312152862549,grad_norm: 0.8961204789276216, iteration: 43358
loss: 0.9803029894828796,grad_norm: 0.9999990681201578, iteration: 43359
loss: 1.036432147026062,grad_norm: 0.9999990800015159, iteration: 43360
loss: 1.004671573638916,grad_norm: 0.9794884598491383, iteration: 43361
loss: 1.0264191627502441,grad_norm: 0.9492917613261094, iteration: 43362
loss: 0.9619225859642029,grad_norm: 0.9999991929665581, iteration: 43363
loss: 1.0083085298538208,grad_norm: 0.9999990448165176, iteration: 43364
loss: 0.9941951632499695,grad_norm: 0.9194283290713564, iteration: 43365
loss: 0.9759603142738342,grad_norm: 0.9999992831987011, iteration: 43366
loss: 1.022910475730896,grad_norm: 0.9456797807177767, iteration: 43367
loss: 0.9927303194999695,grad_norm: 0.9218643461678621, iteration: 43368
loss: 1.02408766746521,grad_norm: 0.9999989606158688, iteration: 43369
loss: 1.0090030431747437,grad_norm: 0.9999991243721212, iteration: 43370
loss: 1.0227779150009155,grad_norm: 0.8582588818948129, iteration: 43371
loss: 1.0387779474258423,grad_norm: 0.9999991121026381, iteration: 43372
loss: 0.9575660228729248,grad_norm: 0.999999017146773, iteration: 43373
loss: 1.0421422719955444,grad_norm: 0.935911451996328, iteration: 43374
loss: 1.0145306587219238,grad_norm: 0.999999455874355, iteration: 43375
loss: 1.0172609090805054,grad_norm: 0.9280117470893162, iteration: 43376
loss: 1.028725504875183,grad_norm: 0.9695490491999906, iteration: 43377
loss: 1.0004032850265503,grad_norm: 0.8845082796491811, iteration: 43378
loss: 1.0246837139129639,grad_norm: 0.999999080110997, iteration: 43379
loss: 1.0278960466384888,grad_norm: 0.9137616885564809, iteration: 43380
loss: 1.0058953762054443,grad_norm: 0.9999992949658439, iteration: 43381
loss: 1.0155401229858398,grad_norm: 0.9328317755940345, iteration: 43382
loss: 1.0416340827941895,grad_norm: 0.9999990880112265, iteration: 43383
loss: 0.9812101125717163,grad_norm: 0.9999990615409547, iteration: 43384
loss: 0.9673669934272766,grad_norm: 0.9999992354592522, iteration: 43385
loss: 1.0124081373214722,grad_norm: 0.9999989815642772, iteration: 43386
loss: 1.0285240411758423,grad_norm: 0.8471474755882501, iteration: 43387
loss: 1.029812216758728,grad_norm: 0.9522579600112571, iteration: 43388
loss: 1.0192816257476807,grad_norm: 0.9999990783601093, iteration: 43389
loss: 1.0259225368499756,grad_norm: 0.9999991273696165, iteration: 43390
loss: 1.0213576555252075,grad_norm: 0.999999160917065, iteration: 43391
loss: 1.0061568021774292,grad_norm: 0.999999103586652, iteration: 43392
loss: 1.0067689418792725,grad_norm: 0.9999991443895547, iteration: 43393
loss: 1.0151913166046143,grad_norm: 0.9999990682170246, iteration: 43394
loss: 0.9984691143035889,grad_norm: 0.9999992081443118, iteration: 43395
loss: 0.9951294660568237,grad_norm: 0.8469649653466529, iteration: 43396
loss: 1.0023694038391113,grad_norm: 0.9292281359631055, iteration: 43397
loss: 1.033773422241211,grad_norm: 0.9650272119286571, iteration: 43398
loss: 1.01604425907135,grad_norm: 0.9999991637730062, iteration: 43399
loss: 0.9971409440040588,grad_norm: 0.8977318368991394, iteration: 43400
loss: 0.9943240284919739,grad_norm: 0.9999990531313536, iteration: 43401
loss: 1.000173568725586,grad_norm: 0.9681686167783168, iteration: 43402
loss: 1.038062572479248,grad_norm: 0.8885831417220859, iteration: 43403
loss: 1.0039255619049072,grad_norm: 0.9982224761871284, iteration: 43404
loss: 1.0158089399337769,grad_norm: 0.9999991318743225, iteration: 43405
loss: 1.0039730072021484,grad_norm: 0.8844628074587643, iteration: 43406
loss: 1.0059239864349365,grad_norm: 0.9788590870528068, iteration: 43407
loss: 0.9779062867164612,grad_norm: 0.9553948508909523, iteration: 43408
loss: 0.9979195594787598,grad_norm: 0.9999990455548089, iteration: 43409
loss: 1.0107460021972656,grad_norm: 0.878723530455383, iteration: 43410
loss: 1.0319639444351196,grad_norm: 0.9906811403480232, iteration: 43411
loss: 0.9810622930526733,grad_norm: 0.9999992101953613, iteration: 43412
loss: 1.0084431171417236,grad_norm: 0.9024020328240551, iteration: 43413
loss: 0.9850248694419861,grad_norm: 0.9999990242192842, iteration: 43414
loss: 0.961057722568512,grad_norm: 0.9446650272499276, iteration: 43415
loss: 0.9925873279571533,grad_norm: 0.9208651478185873, iteration: 43416
loss: 0.9940015077590942,grad_norm: 0.8511362047610045, iteration: 43417
loss: 1.0506761074066162,grad_norm: 0.999999186680521, iteration: 43418
loss: 1.0120290517807007,grad_norm: 0.8849773801017622, iteration: 43419
loss: 0.9856905341148376,grad_norm: 0.9999990143936344, iteration: 43420
loss: 0.983893871307373,grad_norm: 0.9999991066273952, iteration: 43421
loss: 0.9767960906028748,grad_norm: 0.9999992099531152, iteration: 43422
loss: 1.017094612121582,grad_norm: 0.8794409112137509, iteration: 43423
loss: 1.0266536474227905,grad_norm: 0.9999990973513349, iteration: 43424
loss: 0.9859345555305481,grad_norm: 0.9307726159805909, iteration: 43425
loss: 1.0002082586288452,grad_norm: 0.9111113837169696, iteration: 43426
loss: 0.9547982215881348,grad_norm: 0.9532996454859359, iteration: 43427
loss: 1.0343468189239502,grad_norm: 0.9999991475327337, iteration: 43428
loss: 0.9953513145446777,grad_norm: 0.846493986483222, iteration: 43429
loss: 1.0556341409683228,grad_norm: 0.8693337440063306, iteration: 43430
loss: 1.027970314025879,grad_norm: 0.9292971995583004, iteration: 43431
loss: 0.9997172951698303,grad_norm: 0.9999991449754997, iteration: 43432
loss: 0.9817015528678894,grad_norm: 0.9887251012936777, iteration: 43433
loss: 1.0526466369628906,grad_norm: 0.9999997711842483, iteration: 43434
loss: 0.9948243498802185,grad_norm: 0.9999990715362719, iteration: 43435
loss: 0.9872432947158813,grad_norm: 0.9999991989577393, iteration: 43436
loss: 1.0073935985565186,grad_norm: 0.9329258846790001, iteration: 43437
loss: 1.0107076168060303,grad_norm: 0.9996932457179117, iteration: 43438
loss: 0.9991885423660278,grad_norm: 0.9999991598206627, iteration: 43439
loss: 1.0049777030944824,grad_norm: 0.9799514664458111, iteration: 43440
loss: 0.9926925301551819,grad_norm: 0.8920177521796631, iteration: 43441
loss: 0.9711149334907532,grad_norm: 0.8812890309898488, iteration: 43442
loss: 0.9949812293052673,grad_norm: 0.9797417517885425, iteration: 43443
loss: 0.9635623097419739,grad_norm: 0.9383430655988562, iteration: 43444
loss: 1.0159434080123901,grad_norm: 0.9649025117993283, iteration: 43445
loss: 1.0344997644424438,grad_norm: 0.9999992436677371, iteration: 43446
loss: 0.989432692527771,grad_norm: 0.9999991067708759, iteration: 43447
loss: 0.9846137166023254,grad_norm: 0.9527007985975577, iteration: 43448
loss: 1.0094064474105835,grad_norm: 0.9999990294850742, iteration: 43449
loss: 0.988467812538147,grad_norm: 0.9228769617869371, iteration: 43450
loss: 1.0204956531524658,grad_norm: 0.99999939167681, iteration: 43451
loss: 1.0719739198684692,grad_norm: 0.9999996167644033, iteration: 43452
loss: 1.014327049255371,grad_norm: 0.9605077246164127, iteration: 43453
loss: 1.1065670251846313,grad_norm: 0.9999991209136267, iteration: 43454
loss: 0.9874541759490967,grad_norm: 0.9999989240371059, iteration: 43455
loss: 1.0260554552078247,grad_norm: 0.9999998664746265, iteration: 43456
loss: 1.0078924894332886,grad_norm: 0.999999230804099, iteration: 43457
loss: 1.0060173273086548,grad_norm: 0.9999991133227502, iteration: 43458
loss: 1.0162835121154785,grad_norm: 0.9999990283556164, iteration: 43459
loss: 1.038895606994629,grad_norm: 0.9999990933008456, iteration: 43460
loss: 1.0249593257904053,grad_norm: 0.9448928873490392, iteration: 43461
loss: 1.0143243074417114,grad_norm: 0.863637500248275, iteration: 43462
loss: 0.9536705613136292,grad_norm: 0.8527816071968642, iteration: 43463
loss: 1.0231330394744873,grad_norm: 0.9999991515834175, iteration: 43464
loss: 1.0503854751586914,grad_norm: 0.999999003500908, iteration: 43465
loss: 1.010478138923645,grad_norm: 0.999999471556455, iteration: 43466
loss: 1.0185976028442383,grad_norm: 0.8948354197787359, iteration: 43467
loss: 1.0291295051574707,grad_norm: 0.9999998379881624, iteration: 43468
loss: 1.0268821716308594,grad_norm: 0.9575078557713446, iteration: 43469
loss: 1.0160151720046997,grad_norm: 0.9999991663827359, iteration: 43470
loss: 1.0299010276794434,grad_norm: 0.9999992073630077, iteration: 43471
loss: 1.0034228563308716,grad_norm: 0.9999990997300083, iteration: 43472
loss: 1.0032750368118286,grad_norm: 0.9999991807172659, iteration: 43473
loss: 0.9835447669029236,grad_norm: 0.9999991688835517, iteration: 43474
loss: 1.0402904748916626,grad_norm: 0.9746026914438164, iteration: 43475
loss: 0.9962265491485596,grad_norm: 0.9044377158344293, iteration: 43476
loss: 1.0084784030914307,grad_norm: 0.9377951706123602, iteration: 43477
loss: 1.004145860671997,grad_norm: 0.9783941000027145, iteration: 43478
loss: 1.020552158355713,grad_norm: 0.9999991965311967, iteration: 43479
loss: 0.9546895623207092,grad_norm: 0.9012904102166024, iteration: 43480
loss: 1.0291630029678345,grad_norm: 0.9402349367797752, iteration: 43481
loss: 0.9775900840759277,grad_norm: 0.9999990564142931, iteration: 43482
loss: 0.9933178424835205,grad_norm: 0.9509602309517683, iteration: 43483
loss: 0.9904234409332275,grad_norm: 0.9999991072562248, iteration: 43484
loss: 0.9810383915901184,grad_norm: 0.9999992214752821, iteration: 43485
loss: 1.0135912895202637,grad_norm: 0.9999992234214077, iteration: 43486
loss: 1.0295121669769287,grad_norm: 0.9999990175293831, iteration: 43487
loss: 1.0371249914169312,grad_norm: 0.9891437808880207, iteration: 43488
loss: 1.0375269651412964,grad_norm: 0.9999994165463856, iteration: 43489
loss: 0.963698148727417,grad_norm: 0.9503829711607202, iteration: 43490
loss: 1.0095231533050537,grad_norm: 0.9268139733017154, iteration: 43491
loss: 0.9923292994499207,grad_norm: 0.9491564824241886, iteration: 43492
loss: 1.048102617263794,grad_norm: 0.999999361576294, iteration: 43493
loss: 0.9880489706993103,grad_norm: 0.9999991143473275, iteration: 43494
loss: 1.0086750984191895,grad_norm: 0.9477091099824159, iteration: 43495
loss: 1.0033464431762695,grad_norm: 0.9023358519669394, iteration: 43496
loss: 1.0167744159698486,grad_norm: 0.9999997443392616, iteration: 43497
loss: 0.9561229348182678,grad_norm: 0.852528797943613, iteration: 43498
loss: 0.975775420665741,grad_norm: 0.999999115108638, iteration: 43499
loss: 1.0054165124893188,grad_norm: 0.9746603075677961, iteration: 43500
loss: 0.9669913053512573,grad_norm: 0.9070822352470637, iteration: 43501
loss: 0.9993727803230286,grad_norm: 0.9999991468768904, iteration: 43502
loss: 1.0156220197677612,grad_norm: 0.9999990001385072, iteration: 43503
loss: 1.0011471509933472,grad_norm: 0.9721989749472922, iteration: 43504
loss: 0.9938420057296753,grad_norm: 0.9999990539475994, iteration: 43505
loss: 1.0411962270736694,grad_norm: 0.999999762296137, iteration: 43506
loss: 0.9831568598747253,grad_norm: 0.9572710599902063, iteration: 43507
loss: 0.9614359736442566,grad_norm: 0.9650390774026278, iteration: 43508
loss: 1.0352916717529297,grad_norm: 0.9999993363268718, iteration: 43509
loss: 1.0302287340164185,grad_norm: 0.9999991367366041, iteration: 43510
loss: 0.9808961749076843,grad_norm: 0.9682350884175068, iteration: 43511
loss: 1.0460166931152344,grad_norm: 0.9999991039080262, iteration: 43512
loss: 0.9577000141143799,grad_norm: 0.99999908695628, iteration: 43513
loss: 0.9985690712928772,grad_norm: 0.9999991281248951, iteration: 43514
loss: 1.0380345582962036,grad_norm: 0.9768509734313615, iteration: 43515
loss: 1.0657976865768433,grad_norm: 0.9753227245425805, iteration: 43516
loss: 1.010424017906189,grad_norm: 0.8461825580052157, iteration: 43517
loss: 0.9909907579421997,grad_norm: 0.9999990677102891, iteration: 43518
loss: 1.0318645238876343,grad_norm: 0.9999991257815551, iteration: 43519
loss: 0.9805932641029358,grad_norm: 0.9766840684139144, iteration: 43520
loss: 0.9837802648544312,grad_norm: 0.873508576063791, iteration: 43521
loss: 0.9975304007530212,grad_norm: 0.9999991163071767, iteration: 43522
loss: 1.0162560939788818,grad_norm: 0.9999990541951024, iteration: 43523
loss: 1.0061440467834473,grad_norm: 0.9415979007016491, iteration: 43524
loss: 0.9958162307739258,grad_norm: 0.9999991043572739, iteration: 43525
loss: 0.9637486934661865,grad_norm: 0.9999990672514267, iteration: 43526
loss: 1.092902660369873,grad_norm: 0.9999993641634658, iteration: 43527
loss: 0.9471569061279297,grad_norm: 0.9999991270242308, iteration: 43528
loss: 0.9836174249649048,grad_norm: 0.9999991634491723, iteration: 43529
loss: 0.9875329732894897,grad_norm: 0.9812834852265383, iteration: 43530
loss: 1.0305968523025513,grad_norm: 0.8910824322106271, iteration: 43531
loss: 0.986027717590332,grad_norm: 0.9999990083235926, iteration: 43532
loss: 1.004324197769165,grad_norm: 0.9999990824544933, iteration: 43533
loss: 1.011098027229309,grad_norm: 0.9999991061965264, iteration: 43534
loss: 0.9599072933197021,grad_norm: 0.9324133302660438, iteration: 43535
loss: 1.0474066734313965,grad_norm: 0.9832741630586648, iteration: 43536
loss: 0.9698138236999512,grad_norm: 0.9725358400887006, iteration: 43537
loss: 0.9911220669746399,grad_norm: 0.9727698925097908, iteration: 43538
loss: 1.0282049179077148,grad_norm: 0.9999991628851813, iteration: 43539
loss: 0.9683005213737488,grad_norm: 0.9999991579758462, iteration: 43540
loss: 0.9961278438568115,grad_norm: 0.9999990883457419, iteration: 43541
loss: 1.0236375331878662,grad_norm: 0.9999993509253693, iteration: 43542
loss: 0.9855182766914368,grad_norm: 0.8712115538564544, iteration: 43543
loss: 0.9608954787254333,grad_norm: 0.8721299489558236, iteration: 43544
loss: 1.0088562965393066,grad_norm: 0.9080416522310806, iteration: 43545
loss: 0.9904699921607971,grad_norm: 0.9999992084362513, iteration: 43546
loss: 1.0257703065872192,grad_norm: 0.9339896554633884, iteration: 43547
loss: 1.026464581489563,grad_norm: 0.9999989715291501, iteration: 43548
loss: 0.9852645993232727,grad_norm: 0.9692655586414967, iteration: 43549
loss: 1.0066511631011963,grad_norm: 0.9004203615366992, iteration: 43550
loss: 1.0066471099853516,grad_norm: 0.9996873377730069, iteration: 43551
loss: 1.0180180072784424,grad_norm: 0.9856293790858187, iteration: 43552
loss: 0.9960240125656128,grad_norm: 0.9999991012691966, iteration: 43553
loss: 0.9850980639457703,grad_norm: 0.9999992652926892, iteration: 43554
loss: 0.963698148727417,grad_norm: 0.9999991212829952, iteration: 43555
loss: 1.002914547920227,grad_norm: 0.9999993421167126, iteration: 43556
loss: 0.9917081594467163,grad_norm: 0.9479886665665327, iteration: 43557
loss: 0.9990071058273315,grad_norm: 0.999999090612391, iteration: 43558
loss: 1.0314233303070068,grad_norm: 0.9273265231554036, iteration: 43559
loss: 0.9771217107772827,grad_norm: 0.9999990671825186, iteration: 43560
loss: 1.0124188661575317,grad_norm: 0.9999990902949937, iteration: 43561
loss: 1.0202206373214722,grad_norm: 0.9605221718669293, iteration: 43562
loss: 0.9925410747528076,grad_norm: 0.9589445147933836, iteration: 43563
loss: 1.0310990810394287,grad_norm: 0.9999991282905524, iteration: 43564
loss: 1.0116084814071655,grad_norm: 0.842692897463708, iteration: 43565
loss: 0.9761300683021545,grad_norm: 0.9999990158915235, iteration: 43566
loss: 1.0274680852890015,grad_norm: 0.9362749278341169, iteration: 43567
loss: 0.9811179041862488,grad_norm: 0.9202526609821718, iteration: 43568
loss: 1.0021392107009888,grad_norm: 0.8798861227355551, iteration: 43569
loss: 0.9724471569061279,grad_norm: 0.9243010140898779, iteration: 43570
loss: 0.9817441701889038,grad_norm: 0.9059099530931454, iteration: 43571
loss: 1.0044662952423096,grad_norm: 0.8916821112836294, iteration: 43572
loss: 1.00546395778656,grad_norm: 0.9999991185470117, iteration: 43573
loss: 1.0111677646636963,grad_norm: 0.9999991812721317, iteration: 43574
loss: 0.9782871007919312,grad_norm: 0.9999991269420909, iteration: 43575
loss: 0.9572780132293701,grad_norm: 0.9999991897021793, iteration: 43576
loss: 1.032485842704773,grad_norm: 0.9999993781962043, iteration: 43577
loss: 0.9960903525352478,grad_norm: 0.9999995303420391, iteration: 43578
loss: 1.0139832496643066,grad_norm: 0.9999992094893949, iteration: 43579
loss: 1.0072940587997437,grad_norm: 0.999999037686773, iteration: 43580
loss: 0.9808869957923889,grad_norm: 0.9999991541320844, iteration: 43581
loss: 1.0240541696548462,grad_norm: 0.76178790032807, iteration: 43582
loss: 0.9683431386947632,grad_norm: 0.999998984630347, iteration: 43583
loss: 0.9888802170753479,grad_norm: 0.9999990951155691, iteration: 43584
loss: 1.0327448844909668,grad_norm: 0.919572771048251, iteration: 43585
loss: 1.0129072666168213,grad_norm: 0.9851271989979288, iteration: 43586
loss: 1.0241775512695312,grad_norm: 0.9574281460146993, iteration: 43587
loss: 0.9784102439880371,grad_norm: 0.9119473021170313, iteration: 43588
loss: 1.0279792547225952,grad_norm: 0.9999997990921508, iteration: 43589
loss: 0.9750013947486877,grad_norm: 0.8949711245487936, iteration: 43590
loss: 1.003660798072815,grad_norm: 0.99067730774754, iteration: 43591
loss: 1.0010093450546265,grad_norm: 0.9999991253887363, iteration: 43592
loss: 0.9668490886688232,grad_norm: 0.9999990077757408, iteration: 43593
loss: 1.0300055742263794,grad_norm: 0.9999990945307243, iteration: 43594
loss: 0.9935734272003174,grad_norm: 0.9574700240279443, iteration: 43595
loss: 1.0014513731002808,grad_norm: 0.9142256249624462, iteration: 43596
loss: 1.0070332288742065,grad_norm: 0.904405629152193, iteration: 43597
loss: 1.0020509958267212,grad_norm: 0.9088921115798294, iteration: 43598
loss: 0.9986605048179626,grad_norm: 0.8177993998683484, iteration: 43599
loss: 1.0497180223464966,grad_norm: 0.9860344490493551, iteration: 43600
loss: 1.0397545099258423,grad_norm: 0.9999990630771931, iteration: 43601
loss: 1.0040199756622314,grad_norm: 0.9999992750899367, iteration: 43602
loss: 1.0303616523742676,grad_norm: 0.999999034448243, iteration: 43603
loss: 1.018849492073059,grad_norm: 0.8836960810006533, iteration: 43604
loss: 1.0045619010925293,grad_norm: 0.9999991731262408, iteration: 43605
loss: 0.986337423324585,grad_norm: 0.9878204948183075, iteration: 43606
loss: 0.9991060495376587,grad_norm: 0.9465348541187418, iteration: 43607
loss: 1.0131239891052246,grad_norm: 0.9999991976799816, iteration: 43608
loss: 1.0195508003234863,grad_norm: 0.8601295127808216, iteration: 43609
loss: 0.9842061996459961,grad_norm: 0.9999991096497941, iteration: 43610
loss: 1.0277284383773804,grad_norm: 0.8855231953029868, iteration: 43611
loss: 1.005981683731079,grad_norm: 0.8917811751381306, iteration: 43612
loss: 1.0267225503921509,grad_norm: 0.9999989498669296, iteration: 43613
loss: 0.9774947166442871,grad_norm: 0.9999992714059984, iteration: 43614
loss: 1.0385425090789795,grad_norm: 0.8854955911841476, iteration: 43615
loss: 1.0108565092086792,grad_norm: 0.9515094491671916, iteration: 43616
loss: 1.0241584777832031,grad_norm: 0.9999991627614346, iteration: 43617
loss: 0.9709871411323547,grad_norm: 0.9999991786688943, iteration: 43618
loss: 0.9782560467720032,grad_norm: 0.9648050240186753, iteration: 43619
loss: 1.0190566778182983,grad_norm: 0.8392179543633529, iteration: 43620
loss: 1.0350902080535889,grad_norm: 0.9999990818759461, iteration: 43621
loss: 1.0250542163848877,grad_norm: 0.9732512056379448, iteration: 43622
loss: 0.9958973526954651,grad_norm: 0.9999991946737699, iteration: 43623
loss: 1.0079271793365479,grad_norm: 0.9999991266002858, iteration: 43624
loss: 1.014272928237915,grad_norm: 0.9999991784369259, iteration: 43625
loss: 0.9903738498687744,grad_norm: 0.9999991052869696, iteration: 43626
loss: 1.0371832847595215,grad_norm: 0.9999993135059874, iteration: 43627
loss: 0.9871270656585693,grad_norm: 0.9588256076790596, iteration: 43628
loss: 1.0011154413223267,grad_norm: 0.9653619672136703, iteration: 43629
loss: 1.001121997833252,grad_norm: 0.8507563835847932, iteration: 43630
loss: 1.0038721561431885,grad_norm: 0.9999990508950003, iteration: 43631
loss: 0.9958900809288025,grad_norm: 0.9159186796484272, iteration: 43632
loss: 0.9713525176048279,grad_norm: 0.999998997516831, iteration: 43633
loss: 1.015548825263977,grad_norm: 0.8061364653896284, iteration: 43634
loss: 1.0054277181625366,grad_norm: 0.9373190050613991, iteration: 43635
loss: 1.009291172027588,grad_norm: 0.9999993681524585, iteration: 43636
loss: 1.000432014465332,grad_norm: 0.9113695575430628, iteration: 43637
loss: 0.9980780482292175,grad_norm: 0.9728660148392135, iteration: 43638
loss: 1.0004096031188965,grad_norm: 0.9969106179055267, iteration: 43639
loss: 0.9915935397148132,grad_norm: 0.9611630211369437, iteration: 43640
loss: 1.009897232055664,grad_norm: 1.000000024923711, iteration: 43641
loss: 1.0188571214675903,grad_norm: 0.9999990696074219, iteration: 43642
loss: 0.989802896976471,grad_norm: 0.894831869822907, iteration: 43643
loss: 0.9924082159996033,grad_norm: 0.9557520467712426, iteration: 43644
loss: 0.9914034605026245,grad_norm: 0.9999991166193493, iteration: 43645
loss: 1.005924105644226,grad_norm: 0.9999990393016659, iteration: 43646
loss: 0.9930028915405273,grad_norm: 0.9517248295020604, iteration: 43647
loss: 0.970770001411438,grad_norm: 0.9999989825280898, iteration: 43648
loss: 1.018011450767517,grad_norm: 0.9999991287986478, iteration: 43649
loss: 0.991240382194519,grad_norm: 0.9942526229831871, iteration: 43650
loss: 1.0520635843276978,grad_norm: 0.9999992807084134, iteration: 43651
loss: 0.9752779006958008,grad_norm: 0.9999990098500685, iteration: 43652
loss: 1.0011121034622192,grad_norm: 0.7803750148675536, iteration: 43653
loss: 1.0056310892105103,grad_norm: 0.9587718191373055, iteration: 43654
loss: 1.0045756101608276,grad_norm: 0.9232844704387339, iteration: 43655
loss: 0.9825361967086792,grad_norm: 0.9291127328589138, iteration: 43656
loss: 1.0487431287765503,grad_norm: 0.9999990963202791, iteration: 43657
loss: 1.0017554759979248,grad_norm: 0.9999990891712871, iteration: 43658
loss: 0.9727442264556885,grad_norm: 0.8748647479069508, iteration: 43659
loss: 1.0523085594177246,grad_norm: 0.9566683891029379, iteration: 43660
loss: 1.0203479528427124,grad_norm: 0.9195010127184728, iteration: 43661
loss: 1.0088999271392822,grad_norm: 0.9714747133667281, iteration: 43662
loss: 0.9864746332168579,grad_norm: 0.7421869705895755, iteration: 43663
loss: 0.9801592826843262,grad_norm: 0.9999990800687472, iteration: 43664
loss: 1.0114638805389404,grad_norm: 0.9999991116286595, iteration: 43665
loss: 0.9741719961166382,grad_norm: 0.8931614750056328, iteration: 43666
loss: 1.0229198932647705,grad_norm: 0.9475255515893964, iteration: 43667
loss: 0.9624252915382385,grad_norm: 0.8142789157310156, iteration: 43668
loss: 1.02773118019104,grad_norm: 0.9719759534398359, iteration: 43669
loss: 1.0033531188964844,grad_norm: 0.9999990832226935, iteration: 43670
loss: 1.0511672496795654,grad_norm: 0.9999997432162062, iteration: 43671
loss: 1.0087658166885376,grad_norm: 0.8373376477594306, iteration: 43672
loss: 0.9812130331993103,grad_norm: 0.9999991526217689, iteration: 43673
loss: 1.0398545265197754,grad_norm: 0.9999990834167978, iteration: 43674
loss: 1.004765510559082,grad_norm: 0.879054622472521, iteration: 43675
loss: 1.0305609703063965,grad_norm: 0.9999990542084142, iteration: 43676
loss: 1.0267095565795898,grad_norm: 0.9999990546215786, iteration: 43677
loss: 1.0246951580047607,grad_norm: 0.8613459482989686, iteration: 43678
loss: 0.9918249249458313,grad_norm: 0.8761249703960806, iteration: 43679
loss: 1.0187307596206665,grad_norm: 0.999999108329705, iteration: 43680
loss: 1.004878044128418,grad_norm: 0.999999121294586, iteration: 43681
loss: 0.969458818435669,grad_norm: 0.9470297463428654, iteration: 43682
loss: 0.998245894908905,grad_norm: 0.9999991904210724, iteration: 43683
loss: 0.9920162558555603,grad_norm: 0.99999908013197, iteration: 43684
loss: 1.0075632333755493,grad_norm: 0.9435661957923274, iteration: 43685
loss: 0.9816492199897766,grad_norm: 0.9474086157032294, iteration: 43686
loss: 1.0395792722702026,grad_norm: 0.9999993850731519, iteration: 43687
loss: 0.9735385775566101,grad_norm: 0.9776878276263473, iteration: 43688
loss: 1.0053105354309082,grad_norm: 0.8954633057851475, iteration: 43689
loss: 1.0185264348983765,grad_norm: 0.9999990689547436, iteration: 43690
loss: 0.9803211092948914,grad_norm: 0.9844829694385302, iteration: 43691
loss: 0.9774479269981384,grad_norm: 0.9227625248896457, iteration: 43692
loss: 1.005520224571228,grad_norm: 0.8677146961106609, iteration: 43693
loss: 1.034874677658081,grad_norm: 0.9859036618987055, iteration: 43694
loss: 1.0216971635818481,grad_norm: 0.7622711078994069, iteration: 43695
loss: 0.9927526116371155,grad_norm: 0.9999989759212152, iteration: 43696
loss: 0.9786463379859924,grad_norm: 0.927569661667838, iteration: 43697
loss: 1.0160377025604248,grad_norm: 0.9987640080905854, iteration: 43698
loss: 1.0444356203079224,grad_norm: 0.9999991689562548, iteration: 43699
loss: 1.0021603107452393,grad_norm: 0.8394599694737582, iteration: 43700
loss: 0.9937717318534851,grad_norm: 0.9999991581716415, iteration: 43701
loss: 0.9893401265144348,grad_norm: 0.9999992403047135, iteration: 43702
loss: 0.9528139233589172,grad_norm: 0.9920732019097016, iteration: 43703
loss: 1.0084575414657593,grad_norm: 0.9999991252075345, iteration: 43704
loss: 1.0347295999526978,grad_norm: 0.9999991446737729, iteration: 43705
loss: 1.0053635835647583,grad_norm: 0.8752090209225871, iteration: 43706
loss: 1.0104957818984985,grad_norm: 0.8055780440470857, iteration: 43707
loss: 1.0191437005996704,grad_norm: 0.9999991997130753, iteration: 43708
loss: 1.013270378112793,grad_norm: 0.9999998680020102, iteration: 43709
loss: 0.9785306453704834,grad_norm: 0.9362998461723959, iteration: 43710
loss: 1.0002861022949219,grad_norm: 0.9999989757934029, iteration: 43711
loss: 1.0026099681854248,grad_norm: 0.9840995949623557, iteration: 43712
loss: 0.9497565627098083,grad_norm: 0.8902528036943919, iteration: 43713
loss: 0.9693286418914795,grad_norm: 0.9795819770892742, iteration: 43714
loss: 0.992872953414917,grad_norm: 0.8692907429225896, iteration: 43715
loss: 0.9985392689704895,grad_norm: 0.9332515879211081, iteration: 43716
loss: 1.003913402557373,grad_norm: 0.9468830268291529, iteration: 43717
loss: 1.0363314151763916,grad_norm: 0.9999990927211339, iteration: 43718
loss: 0.9917755722999573,grad_norm: 0.8810029709669062, iteration: 43719
loss: 0.9539298415184021,grad_norm: 0.8940136218413443, iteration: 43720
loss: 1.0080327987670898,grad_norm: 0.999999165358356, iteration: 43721
loss: 1.012789011001587,grad_norm: 0.9999991813810504, iteration: 43722
loss: 0.9715140461921692,grad_norm: 0.868289998898006, iteration: 43723
loss: 0.9723742604255676,grad_norm: 0.9018428257925113, iteration: 43724
loss: 1.0045561790466309,grad_norm: 0.8814520990292146, iteration: 43725
loss: 1.013593077659607,grad_norm: 0.8905150132671109, iteration: 43726
loss: 1.0093295574188232,grad_norm: 0.999999126065271, iteration: 43727
loss: 1.0334680080413818,grad_norm: 0.8135907386543174, iteration: 43728
loss: 1.0506643056869507,grad_norm: 0.9999993652346766, iteration: 43729
loss: 1.0382648706436157,grad_norm: 0.9999991065573256, iteration: 43730
loss: 1.0468631982803345,grad_norm: 0.9999994616436422, iteration: 43731
loss: 1.0377284288406372,grad_norm: 0.9791900786400611, iteration: 43732
loss: 0.9948759078979492,grad_norm: 0.7548541220697867, iteration: 43733
loss: 1.0043758153915405,grad_norm: 0.9999992615016071, iteration: 43734
loss: 1.0082958936691284,grad_norm: 0.9085638683918218, iteration: 43735
loss: 1.0126941204071045,grad_norm: 0.8182998243182075, iteration: 43736
loss: 1.0260341167449951,grad_norm: 0.9999990217282912, iteration: 43737
loss: 1.0092233419418335,grad_norm: 0.9999992188544189, iteration: 43738
loss: 0.9864252209663391,grad_norm: 0.9741705415242519, iteration: 43739
loss: 1.031663417816162,grad_norm: 0.9999991934226656, iteration: 43740
loss: 1.0346119403839111,grad_norm: 0.9715748398083475, iteration: 43741
loss: 1.0109994411468506,grad_norm: 0.9999989575627657, iteration: 43742
loss: 1.0121850967407227,grad_norm: 0.9999991147339539, iteration: 43743
loss: 0.9785650372505188,grad_norm: 0.9999991293520321, iteration: 43744
loss: 0.9952719211578369,grad_norm: 0.9686984673681411, iteration: 43745
loss: 0.9751964211463928,grad_norm: 0.9999991276385385, iteration: 43746
loss: 1.0071524381637573,grad_norm: 0.9746780976061895, iteration: 43747
loss: 0.9911247491836548,grad_norm: 0.8842748369587087, iteration: 43748
loss: 1.0500290393829346,grad_norm: 0.9999994155704999, iteration: 43749
loss: 0.9700542092323303,grad_norm: 0.9326078635834693, iteration: 43750
loss: 0.9955612421035767,grad_norm: 0.9999990456308977, iteration: 43751
loss: 0.9925249814987183,grad_norm: 0.8169451312080589, iteration: 43752
loss: 1.0055805444717407,grad_norm: 0.9999991614287304, iteration: 43753
loss: 0.9927477240562439,grad_norm: 0.9785702919733636, iteration: 43754
loss: 1.0128018856048584,grad_norm: 0.9501991524918724, iteration: 43755
loss: 1.0237998962402344,grad_norm: 0.9999990922937783, iteration: 43756
loss: 0.9946139454841614,grad_norm: 0.9096346263647745, iteration: 43757
loss: 1.0343565940856934,grad_norm: 0.9999990641084388, iteration: 43758
loss: 1.011401891708374,grad_norm: 0.9999990204493181, iteration: 43759
loss: 1.0114624500274658,grad_norm: 0.8037078943226557, iteration: 43760
loss: 0.9803404211997986,grad_norm: 0.999999150107134, iteration: 43761
loss: 1.0081559419631958,grad_norm: 0.9999991880265878, iteration: 43762
loss: 1.0220824480056763,grad_norm: 0.9999990118348132, iteration: 43763
loss: 1.0184252262115479,grad_norm: 0.911100197339888, iteration: 43764
loss: 1.0122735500335693,grad_norm: 0.9999990630287852, iteration: 43765
loss: 1.0051372051239014,grad_norm: 0.9116136653474942, iteration: 43766
loss: 0.9884560704231262,grad_norm: 0.9999991841651292, iteration: 43767
loss: 0.9883678555488586,grad_norm: 0.9601308492497415, iteration: 43768
loss: 1.0000492334365845,grad_norm: 0.9999990285699187, iteration: 43769
loss: 1.0131397247314453,grad_norm: 0.9999989291784124, iteration: 43770
loss: 0.9476913809776306,grad_norm: 0.9985925499419078, iteration: 43771
loss: 0.9644827842712402,grad_norm: 0.9775320254248765, iteration: 43772
loss: 1.0300935506820679,grad_norm: 0.9656002578396371, iteration: 43773
loss: 1.0254982709884644,grad_norm: 0.9288681311375797, iteration: 43774
loss: 0.9763941168785095,grad_norm: 0.9887192088363437, iteration: 43775
loss: 1.0143017768859863,grad_norm: 0.9999990933268358, iteration: 43776
loss: 1.0453815460205078,grad_norm: 0.9299028468361408, iteration: 43777
loss: 0.9540192484855652,grad_norm: 0.9999990445869748, iteration: 43778
loss: 1.0086863040924072,grad_norm: 0.9999996295800692, iteration: 43779
loss: 1.0170756578445435,grad_norm: 0.9257365657666397, iteration: 43780
loss: 0.9889936447143555,grad_norm: 0.9999990171330158, iteration: 43781
loss: 1.0126867294311523,grad_norm: 0.9686464151870777, iteration: 43782
loss: 1.0016535520553589,grad_norm: 0.9999991610022279, iteration: 43783
loss: 0.9788749814033508,grad_norm: 0.9336126585246635, iteration: 43784
loss: 1.0217690467834473,grad_norm: 0.8639180990680101, iteration: 43785
loss: 1.0094200372695923,grad_norm: 0.9999991784449268, iteration: 43786
loss: 1.057839035987854,grad_norm: 0.999999461118905, iteration: 43787
loss: 1.0209355354309082,grad_norm: 0.8676802000289734, iteration: 43788
loss: 0.9997276663780212,grad_norm: 0.9469672858425205, iteration: 43789
loss: 1.0022895336151123,grad_norm: 0.9962233969569597, iteration: 43790
loss: 1.022242784500122,grad_norm: 0.7899983961043506, iteration: 43791
loss: 0.9659531712532043,grad_norm: 0.9541300748546268, iteration: 43792
loss: 0.9710173606872559,grad_norm: 0.9999990428946911, iteration: 43793
loss: 0.9768505096435547,grad_norm: 0.971550955534344, iteration: 43794
loss: 0.9932495355606079,grad_norm: 0.9999991759852727, iteration: 43795
loss: 0.9898144006729126,grad_norm: 0.9999997325584291, iteration: 43796
loss: 1.0112743377685547,grad_norm: 0.8960203853803741, iteration: 43797
loss: 0.9871787428855896,grad_norm: 0.9007810794381049, iteration: 43798
loss: 0.9925740957260132,grad_norm: 0.9999990104556206, iteration: 43799
loss: 1.0081013441085815,grad_norm: 0.9256061658745236, iteration: 43800
loss: 1.0236924886703491,grad_norm: 0.7537623545789681, iteration: 43801
loss: 0.9958111643791199,grad_norm: 0.9999991614126047, iteration: 43802
loss: 1.0002093315124512,grad_norm: 0.9999992249925023, iteration: 43803
loss: 1.0085676908493042,grad_norm: 0.9999992722711545, iteration: 43804
loss: 0.9892067909240723,grad_norm: 0.9999991122499499, iteration: 43805
loss: 1.0226540565490723,grad_norm: 0.9999992217694265, iteration: 43806
loss: 1.035494327545166,grad_norm: 0.9999991529748962, iteration: 43807
loss: 0.993661105632782,grad_norm: 0.9999992862254297, iteration: 43808
loss: 1.0326433181762695,grad_norm: 0.9999991480220187, iteration: 43809
loss: 0.9920284152030945,grad_norm: 0.846735518331408, iteration: 43810
loss: 1.0336127281188965,grad_norm: 0.9091893614629347, iteration: 43811
loss: 0.9867839813232422,grad_norm: 0.8475110263173158, iteration: 43812
loss: 0.9921269416809082,grad_norm: 0.9866959047780219, iteration: 43813
loss: 0.9766468405723572,grad_norm: 0.9170562668525386, iteration: 43814
loss: 1.0019080638885498,grad_norm: 0.9022125736672212, iteration: 43815
loss: 1.0023568868637085,grad_norm: 0.9804387210207357, iteration: 43816
loss: 0.9639455080032349,grad_norm: 0.9237325446911925, iteration: 43817
loss: 0.9959697127342224,grad_norm: 0.9750898344496177, iteration: 43818
loss: 0.9833963513374329,grad_norm: 0.9999991585048533, iteration: 43819
loss: 1.0347187519073486,grad_norm: 0.9944482261910856, iteration: 43820
loss: 1.0156387090682983,grad_norm: 0.8565236853954548, iteration: 43821
loss: 1.0204997062683105,grad_norm: 0.9999989326969344, iteration: 43822
loss: 0.9529513716697693,grad_norm: 0.9999990090444256, iteration: 43823
loss: 0.99126797914505,grad_norm: 0.9723518806419388, iteration: 43824
loss: 1.0148708820343018,grad_norm: 0.9999989528974793, iteration: 43825
loss: 1.0128495693206787,grad_norm: 0.6846126818217871, iteration: 43826
loss: 1.0152438879013062,grad_norm: 0.9999992467871935, iteration: 43827
loss: 0.9980003833770752,grad_norm: 0.9999990526604966, iteration: 43828
loss: 1.0087343454360962,grad_norm: 0.9999992108747874, iteration: 43829
loss: 1.0159270763397217,grad_norm: 0.9999989765706558, iteration: 43830
loss: 1.024533987045288,grad_norm: 0.999999294910454, iteration: 43831
loss: 0.9777933359146118,grad_norm: 0.8589583039982485, iteration: 43832
loss: 1.0155417919158936,grad_norm: 0.8813441708262489, iteration: 43833
loss: 1.0121053457260132,grad_norm: 0.9999989905949944, iteration: 43834
loss: 0.996833860874176,grad_norm: 0.7887092285512197, iteration: 43835
loss: 1.011938452720642,grad_norm: 0.9028249928515396, iteration: 43836
loss: 1.026586890220642,grad_norm: 0.999999193497076, iteration: 43837
loss: 0.9910643100738525,grad_norm: 0.8512692841127841, iteration: 43838
loss: 0.9566572904586792,grad_norm: 0.9999992403425905, iteration: 43839
loss: 1.0047509670257568,grad_norm: 0.9999991284668259, iteration: 43840
loss: 0.9828361868858337,grad_norm: 0.9999990687698189, iteration: 43841
loss: 1.0582314729690552,grad_norm: 0.9999997102398623, iteration: 43842
loss: 0.9938957691192627,grad_norm: 0.9999990382622863, iteration: 43843
loss: 1.0382072925567627,grad_norm: 0.9685194244035661, iteration: 43844
loss: 1.103577733039856,grad_norm: 0.9999998006713936, iteration: 43845
loss: 0.995689868927002,grad_norm: 0.9999990666371349, iteration: 43846
loss: 0.9837960600852966,grad_norm: 0.9999993257425223, iteration: 43847
loss: 1.0232877731323242,grad_norm: 0.9999992447814204, iteration: 43848
loss: 0.9801020622253418,grad_norm: 0.8225074078413336, iteration: 43849
loss: 0.9971868395805359,grad_norm: 0.7960549270242037, iteration: 43850
loss: 0.9958867430686951,grad_norm: 0.9003766363533805, iteration: 43851
loss: 1.0316553115844727,grad_norm: 0.9999989713523794, iteration: 43852
loss: 1.0305057764053345,grad_norm: 0.977044711917886, iteration: 43853
loss: 0.9996980428695679,grad_norm: 0.9999990834010017, iteration: 43854
loss: 1.0359551906585693,grad_norm: 0.9999989852444918, iteration: 43855
loss: 1.0108015537261963,grad_norm: 0.9999991952259905, iteration: 43856
loss: 0.9919345378875732,grad_norm: 0.9999991720160487, iteration: 43857
loss: 0.9855638742446899,grad_norm: 0.9999992936177791, iteration: 43858
loss: 0.9899309277534485,grad_norm: 0.9999990236281541, iteration: 43859
loss: 1.0139082670211792,grad_norm: 0.9999990808103135, iteration: 43860
loss: 0.9943980574607849,grad_norm: 0.9866560091718104, iteration: 43861
loss: 1.024275302886963,grad_norm: 0.999999028733659, iteration: 43862
loss: 1.0247161388397217,grad_norm: 0.9999993000281092, iteration: 43863
loss: 0.9928439855575562,grad_norm: 0.8390153411237239, iteration: 43864
loss: 1.0631654262542725,grad_norm: 0.9708973783302157, iteration: 43865
loss: 1.0137982368469238,grad_norm: 0.9999992246415518, iteration: 43866
loss: 1.0061568021774292,grad_norm: 0.9438074999263638, iteration: 43867
loss: 0.9815451502799988,grad_norm: 0.9999996084145929, iteration: 43868
loss: 0.9603005647659302,grad_norm: 0.9999992333094143, iteration: 43869
loss: 0.984140932559967,grad_norm: 0.8310411345452936, iteration: 43870
loss: 1.045222282409668,grad_norm: 0.999999244671221, iteration: 43871
loss: 0.9986054301261902,grad_norm: 0.9999991957476377, iteration: 43872
loss: 0.996630072593689,grad_norm: 0.9999991282749079, iteration: 43873
loss: 0.973080039024353,grad_norm: 0.9999991204996262, iteration: 43874
loss: 1.0321412086486816,grad_norm: 0.9999990744986289, iteration: 43875
loss: 1.0082260370254517,grad_norm: 0.9999992135888809, iteration: 43876
loss: 0.993279218673706,grad_norm: 0.9999990390243892, iteration: 43877
loss: 1.1007941961288452,grad_norm: 0.9999992208312453, iteration: 43878
loss: 0.9585788249969482,grad_norm: 0.9824601222602778, iteration: 43879
loss: 1.0175011157989502,grad_norm: 0.9999991189118709, iteration: 43880
loss: 0.9926474690437317,grad_norm: 0.9999998730224443, iteration: 43881
loss: 1.014898657798767,grad_norm: 0.9231469667540038, iteration: 43882
loss: 1.0211440324783325,grad_norm: 0.969379164989667, iteration: 43883
loss: 0.9874555468559265,grad_norm: 0.9999990389336249, iteration: 43884
loss: 0.9992654323577881,grad_norm: 0.9716691915260344, iteration: 43885
loss: 1.0200717449188232,grad_norm: 0.9942528284362787, iteration: 43886
loss: 1.007650375366211,grad_norm: 0.9023159916327121, iteration: 43887
loss: 1.00357985496521,grad_norm: 0.928270034818245, iteration: 43888
loss: 1.0103529691696167,grad_norm: 0.878907983061201, iteration: 43889
loss: 0.9943914413452148,grad_norm: 0.9999991089229012, iteration: 43890
loss: 1.0195043087005615,grad_norm: 0.999999417124211, iteration: 43891
loss: 0.9575466513633728,grad_norm: 0.99980704231722, iteration: 43892
loss: 1.0405603647232056,grad_norm: 0.9157615441276117, iteration: 43893
loss: 1.0229164361953735,grad_norm: 0.9142406104191783, iteration: 43894
loss: 0.9889518022537231,grad_norm: 0.8015013142563081, iteration: 43895
loss: 1.0063695907592773,grad_norm: 0.9999991907956961, iteration: 43896
loss: 0.9852839708328247,grad_norm: 0.9297101724231404, iteration: 43897
loss: 0.9995193481445312,grad_norm: 0.9999993253654242, iteration: 43898
loss: 1.01136314868927,grad_norm: 0.9908561584526855, iteration: 43899
loss: 1.0318793058395386,grad_norm: 0.8033650869439758, iteration: 43900
loss: 1.0109127759933472,grad_norm: 0.9999994899096546, iteration: 43901
loss: 1.0276774168014526,grad_norm: 0.9999989984308609, iteration: 43902
loss: 1.0307317972183228,grad_norm: 0.9999989620381434, iteration: 43903
loss: 1.0242809057235718,grad_norm: 0.807473858461249, iteration: 43904
loss: 1.0369949340820312,grad_norm: 0.9999989636772113, iteration: 43905
loss: 1.0172187089920044,grad_norm: 0.9999990996703525, iteration: 43906
loss: 1.0330241918563843,grad_norm: 0.9999991645284816, iteration: 43907
loss: 0.9998569488525391,grad_norm: 0.999999082654629, iteration: 43908
loss: 0.977608859539032,grad_norm: 0.9999993859268506, iteration: 43909
loss: 1.0435210466384888,grad_norm: 0.999999275815393, iteration: 43910
loss: 1.00041925907135,grad_norm: 0.9999991920137221, iteration: 43911
loss: 1.0291863679885864,grad_norm: 0.9651294280257178, iteration: 43912
loss: 0.9855096340179443,grad_norm: 0.9999998365504368, iteration: 43913
loss: 1.0106205940246582,grad_norm: 0.9127383516317568, iteration: 43914
loss: 1.0046031475067139,grad_norm: 0.9954860147328327, iteration: 43915
loss: 1.0065137147903442,grad_norm: 0.9376361153207079, iteration: 43916
loss: 1.0085409879684448,grad_norm: 0.9999991378763727, iteration: 43917
loss: 0.9974119067192078,grad_norm: 0.9594942023332833, iteration: 43918
loss: 0.9816468358039856,grad_norm: 0.9474384415077458, iteration: 43919
loss: 0.979851484298706,grad_norm: 0.8502879799101217, iteration: 43920
loss: 0.9681093096733093,grad_norm: 0.9910332845426683, iteration: 43921
loss: 0.9827693700790405,grad_norm: 0.9999991512237317, iteration: 43922
loss: 1.002426266670227,grad_norm: 0.9199840702298867, iteration: 43923
loss: 1.0023163557052612,grad_norm: 0.999999182880136, iteration: 43924
loss: 1.0170632600784302,grad_norm: 0.9999990155809984, iteration: 43925
loss: 0.9992697834968567,grad_norm: 0.9714796413601674, iteration: 43926
loss: 1.0104801654815674,grad_norm: 0.806432085015673, iteration: 43927
loss: 0.9685465693473816,grad_norm: 0.9999991481839056, iteration: 43928
loss: 0.9966662526130676,grad_norm: 0.9576159899992653, iteration: 43929
loss: 0.9696784019470215,grad_norm: 0.9146497935227582, iteration: 43930
loss: 0.9770406484603882,grad_norm: 0.9944010997110977, iteration: 43931
loss: 1.0093495845794678,grad_norm: 0.8949123805522495, iteration: 43932
loss: 1.0008862018585205,grad_norm: 0.9999990904080551, iteration: 43933
loss: 1.0070706605911255,grad_norm: 0.9999991081373587, iteration: 43934
loss: 1.0197665691375732,grad_norm: 0.9540322756537525, iteration: 43935
loss: 0.9991269707679749,grad_norm: 0.9416409680660836, iteration: 43936
loss: 1.0019720792770386,grad_norm: 0.9080000254746345, iteration: 43937
loss: 0.9988147616386414,grad_norm: 0.8946696952826753, iteration: 43938
loss: 0.9758322834968567,grad_norm: 0.999999198439277, iteration: 43939
loss: 1.0197193622589111,grad_norm: 0.9999989573693184, iteration: 43940
loss: 0.9758253693580627,grad_norm: 0.9428961013955793, iteration: 43941
loss: 1.00157630443573,grad_norm: 0.9663397538171715, iteration: 43942
loss: 1.0079470872879028,grad_norm: 0.9573976313265568, iteration: 43943
loss: 0.9942478537559509,grad_norm: 0.8016766123269005, iteration: 43944
loss: 1.0657627582550049,grad_norm: 0.9652543935549803, iteration: 43945
loss: 0.9869744181632996,grad_norm: 0.9999991896456806, iteration: 43946
loss: 1.0238510370254517,grad_norm: 0.960405263148467, iteration: 43947
loss: 1.0029420852661133,grad_norm: 0.9999990656965688, iteration: 43948
loss: 1.0117815732955933,grad_norm: 0.998470748496784, iteration: 43949
loss: 0.9852648377418518,grad_norm: 0.781501718276248, iteration: 43950
loss: 0.9877756237983704,grad_norm: 0.975884000109164, iteration: 43951
loss: 1.0429950952529907,grad_norm: 0.9999991338929912, iteration: 43952
loss: 1.0362070798873901,grad_norm: 0.9999995897904864, iteration: 43953
loss: 1.0038493871688843,grad_norm: 0.9571122272960295, iteration: 43954
loss: 0.9938361048698425,grad_norm: 0.8701568314431293, iteration: 43955
loss: 1.0149335861206055,grad_norm: 0.9999992109507375, iteration: 43956
loss: 1.012954592704773,grad_norm: 0.999999085434335, iteration: 43957
loss: 0.9820602536201477,grad_norm: 0.9999992889752953, iteration: 43958
loss: 0.9935868382453918,grad_norm: 0.9217432371667023, iteration: 43959
loss: 1.009558916091919,grad_norm: 0.9887391677120023, iteration: 43960
loss: 1.0198726654052734,grad_norm: 0.9999993561311565, iteration: 43961
loss: 1.0317081212997437,grad_norm: 0.997990950636315, iteration: 43962
loss: 0.9986587762832642,grad_norm: 0.8558577445923522, iteration: 43963
loss: 0.9790988564491272,grad_norm: 0.9999990602911693, iteration: 43964
loss: 1.000093698501587,grad_norm: 0.7459490279371239, iteration: 43965
loss: 1.0220831632614136,grad_norm: 0.8692674307565117, iteration: 43966
loss: 1.0033411979675293,grad_norm: 0.9999990982134023, iteration: 43967
loss: 0.9961715936660767,grad_norm: 0.9515759202584804, iteration: 43968
loss: 1.0339431762695312,grad_norm: 0.9991929278424243, iteration: 43969
loss: 1.0114378929138184,grad_norm: 0.9999992339455023, iteration: 43970
loss: 1.0083742141723633,grad_norm: 0.9999991730860718, iteration: 43971
loss: 0.9952074289321899,grad_norm: 0.9999992701794376, iteration: 43972
loss: 0.9946463108062744,grad_norm: 0.9526801949191314, iteration: 43973
loss: 0.9811400771141052,grad_norm: 0.8043923409968543, iteration: 43974
loss: 0.9834937453269958,grad_norm: 0.9999995724787853, iteration: 43975
loss: 1.018968105316162,grad_norm: 0.9542342761032359, iteration: 43976
loss: 1.014382004737854,grad_norm: 0.9999997453586358, iteration: 43977
loss: 0.9545692801475525,grad_norm: 0.9591503508853197, iteration: 43978
loss: 1.0224018096923828,grad_norm: 0.9999991574168511, iteration: 43979
loss: 1.0251257419586182,grad_norm: 0.999999261357886, iteration: 43980
loss: 1.0206904411315918,grad_norm: 0.999999070445945, iteration: 43981
loss: 1.0140944719314575,grad_norm: 0.9999990970513031, iteration: 43982
loss: 0.9906997680664062,grad_norm: 0.9999991251960189, iteration: 43983
loss: 0.9784785509109497,grad_norm: 0.8280960346097848, iteration: 43984
loss: 0.9961321949958801,grad_norm: 0.9999991294544901, iteration: 43985
loss: 1.0137982368469238,grad_norm: 0.9557956790383348, iteration: 43986
loss: 0.9945665001869202,grad_norm: 0.879693754353797, iteration: 43987
loss: 1.0437933206558228,grad_norm: 0.9999990321231214, iteration: 43988
loss: 0.9684129357337952,grad_norm: 0.9432559569162641, iteration: 43989
loss: 1.0005429983139038,grad_norm: 0.9999990979689725, iteration: 43990
loss: 0.981132447719574,grad_norm: 0.9999990966865244, iteration: 43991
loss: 1.0135847330093384,grad_norm: 0.9751546230058248, iteration: 43992
loss: 1.0116021633148193,grad_norm: 0.9999991089469548, iteration: 43993
loss: 1.0049961805343628,grad_norm: 0.9999991053957877, iteration: 43994
loss: 0.9730287194252014,grad_norm: 0.8294822762714691, iteration: 43995
loss: 1.0090537071228027,grad_norm: 0.9999992183544616, iteration: 43996
loss: 0.9911660552024841,grad_norm: 0.9999991443065236, iteration: 43997
loss: 0.993707001209259,grad_norm: 0.9697787606352185, iteration: 43998
loss: 0.9844908714294434,grad_norm: 0.9024422385156515, iteration: 43999
loss: 1.003667950630188,grad_norm: 0.842348742995105, iteration: 44000
loss: 0.9465744495391846,grad_norm: 0.999999054914179, iteration: 44001
loss: 0.9922966957092285,grad_norm: 0.9355145718208582, iteration: 44002
loss: 0.989297091960907,grad_norm: 0.811136929628033, iteration: 44003
loss: 0.9652720093727112,grad_norm: 0.9548004002056565, iteration: 44004
loss: 1.0229114294052124,grad_norm: 0.9580325202284853, iteration: 44005
loss: 1.0056277513504028,grad_norm: 0.8564464575702634, iteration: 44006
loss: 0.9911708831787109,grad_norm: 0.9390162922907959, iteration: 44007
loss: 1.0312165021896362,grad_norm: 0.9999996874880058, iteration: 44008
loss: 1.019336462020874,grad_norm: 0.9999990057614812, iteration: 44009
loss: 0.9952993988990784,grad_norm: 0.999999140115217, iteration: 44010
loss: 0.9922769069671631,grad_norm: 0.9562918499059685, iteration: 44011
loss: 0.9868196845054626,grad_norm: 0.9733888596779378, iteration: 44012
loss: 1.0539519786834717,grad_norm: 0.9563905705763636, iteration: 44013
loss: 1.0542104244232178,grad_norm: 0.9999997865341722, iteration: 44014
loss: 0.9875985980033875,grad_norm: 0.9999996806972015, iteration: 44015
loss: 1.0064467191696167,grad_norm: 0.9999990266579157, iteration: 44016
loss: 0.9786906838417053,grad_norm: 0.9369193471827444, iteration: 44017
loss: 1.0134249925613403,grad_norm: 0.9999990897239678, iteration: 44018
loss: 1.0148134231567383,grad_norm: 0.9999992448501619, iteration: 44019
loss: 0.9914907217025757,grad_norm: 0.999998986752828, iteration: 44020
loss: 1.0118569135665894,grad_norm: 0.939717517934411, iteration: 44021
loss: 0.9954143166542053,grad_norm: 0.9442830954271496, iteration: 44022
loss: 0.9827452301979065,grad_norm: 0.9999988931776175, iteration: 44023
loss: 0.9880537986755371,grad_norm: 0.9999991613795391, iteration: 44024
loss: 1.0407789945602417,grad_norm: 0.9366656356560279, iteration: 44025
loss: 0.9600136280059814,grad_norm: 0.9999992871873457, iteration: 44026
loss: 1.0247633457183838,grad_norm: 0.9739151027607019, iteration: 44027
loss: 0.9927852749824524,grad_norm: 0.8886144034808902, iteration: 44028
loss: 0.978085458278656,grad_norm: 0.9999990541500019, iteration: 44029
loss: 1.0165390968322754,grad_norm: 0.9147763802371343, iteration: 44030
loss: 1.0128748416900635,grad_norm: 0.9465567156915705, iteration: 44031
loss: 1.0769028663635254,grad_norm: 0.9936195388922676, iteration: 44032
loss: 1.0543583631515503,grad_norm: 0.9999991268671873, iteration: 44033
loss: 1.002478003501892,grad_norm: 0.9387717926887013, iteration: 44034
loss: 1.1600462198257446,grad_norm: 0.9999996974941229, iteration: 44035
loss: 1.0158363580703735,grad_norm: 0.9769012220368833, iteration: 44036
loss: 1.0296244621276855,grad_norm: 0.8177771794635976, iteration: 44037
loss: 1.0065265893936157,grad_norm: 0.9692871604550181, iteration: 44038
loss: 0.9874545931816101,grad_norm: 0.8853433675436235, iteration: 44039
loss: 1.0153383016586304,grad_norm: 0.9999991255985211, iteration: 44040
loss: 1.0061249732971191,grad_norm: 0.9999990421488246, iteration: 44041
loss: 1.014805793762207,grad_norm: 0.9806244717774657, iteration: 44042
loss: 1.0232752561569214,grad_norm: 0.9706336980523252, iteration: 44043
loss: 1.0429081916809082,grad_norm: 0.999999095218781, iteration: 44044
loss: 1.033066987991333,grad_norm: 0.9933038373537216, iteration: 44045
loss: 1.0066936016082764,grad_norm: 0.8958031373168875, iteration: 44046
loss: 0.9901228547096252,grad_norm: 0.9999991165863272, iteration: 44047
loss: 1.0010546445846558,grad_norm: 0.999999104882116, iteration: 44048
loss: 0.9607256054878235,grad_norm: 0.8739317658854053, iteration: 44049
loss: 1.0297046899795532,grad_norm: 0.869618883901901, iteration: 44050
loss: 1.0033421516418457,grad_norm: 0.8811535700432394, iteration: 44051
loss: 1.0005810260772705,grad_norm: 0.8119136516142291, iteration: 44052
loss: 1.026268720626831,grad_norm: 0.9999991127558987, iteration: 44053
loss: 1.016247272491455,grad_norm: 0.9412557066495063, iteration: 44054
loss: 1.013783574104309,grad_norm: 0.9999990279355956, iteration: 44055
loss: 1.0035617351531982,grad_norm: 0.9767201496507398, iteration: 44056
loss: 0.9641023278236389,grad_norm: 0.9833258576162733, iteration: 44057
loss: 1.0048450231552124,grad_norm: 0.7371585638317778, iteration: 44058
loss: 1.0005812644958496,grad_norm: 0.9999992208599436, iteration: 44059
loss: 0.9804329872131348,grad_norm: 0.9999991170617162, iteration: 44060
loss: 1.0138719081878662,grad_norm: 0.9994340552095253, iteration: 44061
loss: 0.9912824034690857,grad_norm: 0.9638185514602585, iteration: 44062
loss: 1.0388381481170654,grad_norm: 0.9999996229288144, iteration: 44063
loss: 1.023147702217102,grad_norm: 0.9999991691556878, iteration: 44064
loss: 0.9906651973724365,grad_norm: 0.9999993496362679, iteration: 44065
loss: 0.9886190295219421,grad_norm: 0.9999989921236376, iteration: 44066
loss: 0.979638934135437,grad_norm: 0.9557977588638048, iteration: 44067
loss: 1.007714867591858,grad_norm: 0.999998964811637, iteration: 44068
loss: 1.0321248769760132,grad_norm: 0.9999993523315336, iteration: 44069
loss: 0.999707043170929,grad_norm: 0.999998866608167, iteration: 44070
loss: 1.00582754611969,grad_norm: 0.8910015700768094, iteration: 44071
loss: 1.0042779445648193,grad_norm: 0.8717133314039592, iteration: 44072
loss: 1.0527353286743164,grad_norm: 0.9999990878376136, iteration: 44073
loss: 0.9996504187583923,grad_norm: 0.9999991480744813, iteration: 44074
loss: 0.9833558797836304,grad_norm: 0.9912838110826259, iteration: 44075
loss: 0.997900128364563,grad_norm: 0.9999990873791353, iteration: 44076
loss: 1.019435167312622,grad_norm: 0.9999991418238852, iteration: 44077
loss: 1.004691481590271,grad_norm: 0.985005651777476, iteration: 44078
loss: 1.0010759830474854,grad_norm: 0.9999992660099879, iteration: 44079
loss: 0.9798418283462524,grad_norm: 0.999999060392801, iteration: 44080
loss: 1.0133910179138184,grad_norm: 0.8833624767261723, iteration: 44081
loss: 1.01462984085083,grad_norm: 0.9237379477683972, iteration: 44082
loss: 0.9912595748901367,grad_norm: 0.9999988413344097, iteration: 44083
loss: 0.9898234009742737,grad_norm: 0.99446117540094, iteration: 44084
loss: 0.9671271443367004,grad_norm: 0.8704786914918606, iteration: 44085
loss: 0.9727072715759277,grad_norm: 0.9056308697091456, iteration: 44086
loss: 1.0033154487609863,grad_norm: 0.9435594341529226, iteration: 44087
loss: 1.0500389337539673,grad_norm: 0.9907954836057343, iteration: 44088
loss: 1.0071760416030884,grad_norm: 0.9999992378972333, iteration: 44089
loss: 0.9892643690109253,grad_norm: 0.9775463524638325, iteration: 44090
loss: 0.9968574643135071,grad_norm: 0.9999990377056115, iteration: 44091
loss: 1.0157021284103394,grad_norm: 0.9099938087863644, iteration: 44092
loss: 0.9916326403617859,grad_norm: 0.8827714668181907, iteration: 44093
loss: 0.9816032648086548,grad_norm: 0.9999990988710049, iteration: 44094
loss: 1.012447476387024,grad_norm: 0.9999990190068938, iteration: 44095
loss: 1.0417062044143677,grad_norm: 0.9475478948086571, iteration: 44096
loss: 1.0165354013442993,grad_norm: 0.9999991702305817, iteration: 44097
loss: 0.9690679907798767,grad_norm: 0.9999990046352087, iteration: 44098
loss: 0.9552022814750671,grad_norm: 0.9999989282947955, iteration: 44099
loss: 0.9606646299362183,grad_norm: 0.9999992731287407, iteration: 44100
loss: 0.9740650653839111,grad_norm: 0.9577189191592652, iteration: 44101
loss: 1.0279806852340698,grad_norm: 0.9812041715437434, iteration: 44102
loss: 1.0214643478393555,grad_norm: 0.9999989783800206, iteration: 44103
loss: 1.027248501777649,grad_norm: 0.9999991105476874, iteration: 44104
loss: 1.0211995840072632,grad_norm: 0.9999991981290768, iteration: 44105
loss: 1.0220688581466675,grad_norm: 0.8334969894409645, iteration: 44106
loss: 0.9979521036148071,grad_norm: 0.9999992828652248, iteration: 44107
loss: 0.995313823223114,grad_norm: 0.9999990782360006, iteration: 44108
loss: 0.9946640133857727,grad_norm: 0.9999991137756793, iteration: 44109
loss: 1.0141782760620117,grad_norm: 0.9749320299228517, iteration: 44110
loss: 0.9468708038330078,grad_norm: 0.9535430378198979, iteration: 44111
loss: 0.9748373031616211,grad_norm: 0.9999991118395284, iteration: 44112
loss: 1.0527578592300415,grad_norm: 0.9999992538937346, iteration: 44113
loss: 0.9962658882141113,grad_norm: 0.9999990262703601, iteration: 44114
loss: 1.0078890323638916,grad_norm: 0.9378645766746725, iteration: 44115
loss: 0.9928674101829529,grad_norm: 0.9455100805649954, iteration: 44116
loss: 0.9599442481994629,grad_norm: 0.9809135993894756, iteration: 44117
loss: 0.9663932919502258,grad_norm: 0.9999990541569421, iteration: 44118
loss: 0.9760828614234924,grad_norm: 0.957811338467595, iteration: 44119
loss: 1.0160466432571411,grad_norm: 0.9999991241276062, iteration: 44120
loss: 1.0231873989105225,grad_norm: 0.9999996760766681, iteration: 44121
loss: 1.048986792564392,grad_norm: 0.9999991451828883, iteration: 44122
loss: 1.0280698537826538,grad_norm: 0.9999991788196084, iteration: 44123
loss: 0.9965775012969971,grad_norm: 0.9999989308828382, iteration: 44124
loss: 0.9754985570907593,grad_norm: 0.9776999573698605, iteration: 44125
loss: 0.9833230376243591,grad_norm: 0.9999992255227528, iteration: 44126
loss: 0.9983682036399841,grad_norm: 0.8651279187605738, iteration: 44127
loss: 1.0192408561706543,grad_norm: 0.9999991134462382, iteration: 44128
loss: 1.0106056928634644,grad_norm: 0.9073721174694015, iteration: 44129
loss: 0.9958235621452332,grad_norm: 0.8331958536328561, iteration: 44130
loss: 0.9974352717399597,grad_norm: 0.9999990870414671, iteration: 44131
loss: 0.9894582033157349,grad_norm: 0.9999990019719426, iteration: 44132
loss: 1.0174680948257446,grad_norm: 0.8475419146055783, iteration: 44133
loss: 0.9877358675003052,grad_norm: 0.9730468359994215, iteration: 44134
loss: 0.9680744409561157,grad_norm: 0.9999990455730337, iteration: 44135
loss: 0.9737718105316162,grad_norm: 0.9566820072623691, iteration: 44136
loss: 0.9910649061203003,grad_norm: 0.9999992122248599, iteration: 44137
loss: 1.0226277112960815,grad_norm: 0.9999992086643313, iteration: 44138
loss: 0.9742528796195984,grad_norm: 0.9999990906799694, iteration: 44139
loss: 1.0086498260498047,grad_norm: 0.9999990284105132, iteration: 44140
loss: 1.0080208778381348,grad_norm: 0.9999992861853686, iteration: 44141
loss: 0.96980881690979,grad_norm: 0.9311895240676582, iteration: 44142
loss: 0.9761268496513367,grad_norm: 0.9059931233964164, iteration: 44143
loss: 0.9523609280586243,grad_norm: 0.9762377832569298, iteration: 44144
loss: 1.0281180143356323,grad_norm: 0.99999910678581, iteration: 44145
loss: 1.003146767616272,grad_norm: 0.9999990635658363, iteration: 44146
loss: 0.9938962459564209,grad_norm: 0.8883105781802443, iteration: 44147
loss: 1.011542558670044,grad_norm: 0.9222814270846715, iteration: 44148
loss: 1.0154824256896973,grad_norm: 0.901877885495523, iteration: 44149
loss: 1.0353931188583374,grad_norm: 0.9999990325790636, iteration: 44150
loss: 0.9862316846847534,grad_norm: 0.9999990322311171, iteration: 44151
loss: 0.9820132255554199,grad_norm: 0.9999989443393946, iteration: 44152
loss: 0.9775951504707336,grad_norm: 0.9999990481592896, iteration: 44153
loss: 1.0332064628601074,grad_norm: 0.9999991600847191, iteration: 44154
loss: 0.9774547815322876,grad_norm: 0.9999998371120614, iteration: 44155
loss: 0.9999626874923706,grad_norm: 0.9999991629233465, iteration: 44156
loss: 1.0304800271987915,grad_norm: 0.9568287604934401, iteration: 44157
loss: 1.0008436441421509,grad_norm: 0.9999990530975309, iteration: 44158
loss: 0.9883154034614563,grad_norm: 0.999999001009146, iteration: 44159
loss: 1.0115940570831299,grad_norm: 0.999999189184505, iteration: 44160
loss: 1.003607153892517,grad_norm: 0.9345975235642815, iteration: 44161
loss: 1.0418492555618286,grad_norm: 0.9702421366107016, iteration: 44162
loss: 0.9663612246513367,grad_norm: 0.9166444037488053, iteration: 44163
loss: 0.9966076612472534,grad_norm: 0.9999990236423072, iteration: 44164
loss: 0.98958420753479,grad_norm: 0.9999992242678656, iteration: 44165
loss: 0.9651215672492981,grad_norm: 0.9999991347798259, iteration: 44166
loss: 0.9753583669662476,grad_norm: 0.999999067969255, iteration: 44167
loss: 1.0688854455947876,grad_norm: 0.999998987728215, iteration: 44168
loss: 0.9681981801986694,grad_norm: 0.9108160974596486, iteration: 44169
loss: 0.9894479513168335,grad_norm: 0.8347107915524136, iteration: 44170
loss: 1.0023748874664307,grad_norm: 0.8794763639986781, iteration: 44171
loss: 1.0157480239868164,grad_norm: 0.9999992094607906, iteration: 44172
loss: 0.993983805179596,grad_norm: 0.8763935332690144, iteration: 44173
loss: 1.002401351928711,grad_norm: 0.9999991672335714, iteration: 44174
loss: 1.0125725269317627,grad_norm: 0.9035181709365513, iteration: 44175
loss: 1.0366017818450928,grad_norm: 0.9999998725011218, iteration: 44176
loss: 1.0113428831100464,grad_norm: 0.9999990708254249, iteration: 44177
loss: 0.9821410179138184,grad_norm: 0.9999991655621454, iteration: 44178
loss: 1.0264919996261597,grad_norm: 0.9999991395449851, iteration: 44179
loss: 1.0298681259155273,grad_norm: 0.8912840297510236, iteration: 44180
loss: 1.046807885169983,grad_norm: 0.99999927764085, iteration: 44181
loss: 1.0318095684051514,grad_norm: 0.9999993176265912, iteration: 44182
loss: 0.9854617118835449,grad_norm: 0.9999990322445621, iteration: 44183
loss: 0.9268583059310913,grad_norm: 0.8745468370762267, iteration: 44184
loss: 1.0104156732559204,grad_norm: 0.9359336281710983, iteration: 44185
loss: 0.9756180644035339,grad_norm: 0.8773184849912088, iteration: 44186
loss: 0.9988546967506409,grad_norm: 0.9999991157235909, iteration: 44187
loss: 1.0471272468566895,grad_norm: 0.9999990133762358, iteration: 44188
loss: 1.0247267484664917,grad_norm: 0.8164809896903585, iteration: 44189
loss: 1.0077544450759888,grad_norm: 0.9999991513052383, iteration: 44190
loss: 1.0183950662612915,grad_norm: 0.9999990284400622, iteration: 44191
loss: 1.0035713911056519,grad_norm: 0.9705492247390449, iteration: 44192
loss: 0.9973570704460144,grad_norm: 0.9999991791079691, iteration: 44193
loss: 1.0528695583343506,grad_norm: 0.9273127261147298, iteration: 44194
loss: 1.036072015762329,grad_norm: 0.9999992195220693, iteration: 44195
loss: 0.9876397848129272,grad_norm: 0.8254879361153455, iteration: 44196
loss: 0.9834396243095398,grad_norm: 0.9999991871059964, iteration: 44197
loss: 0.9661055207252502,grad_norm: 0.9999989370450053, iteration: 44198
loss: 0.9942576289176941,grad_norm: 0.9999992567410969, iteration: 44199
loss: 1.0173624753952026,grad_norm: 0.9349856154687854, iteration: 44200
loss: 1.0040432214736938,grad_norm: 0.999999130535258, iteration: 44201
loss: 1.050671935081482,grad_norm: 0.8107736266690531, iteration: 44202
loss: 1.0179005861282349,grad_norm: 0.928340890858368, iteration: 44203
loss: 1.040427803993225,grad_norm: 0.9999994712942426, iteration: 44204
loss: 1.0236575603485107,grad_norm: 0.9999991154273217, iteration: 44205
loss: 1.0138187408447266,grad_norm: 0.9999991043140474, iteration: 44206
loss: 0.9977011680603027,grad_norm: 0.9999991342421304, iteration: 44207
loss: 0.9559351205825806,grad_norm: 0.9999992643632457, iteration: 44208
loss: 0.9425470232963562,grad_norm: 0.9999990608267256, iteration: 44209
loss: 0.9935652613639832,grad_norm: 0.9999990013299667, iteration: 44210
loss: 0.9978029131889343,grad_norm: 0.9073455357291159, iteration: 44211
loss: 1.0168132781982422,grad_norm: 0.9708059888235184, iteration: 44212
loss: 0.9409234523773193,grad_norm: 0.9730584357869996, iteration: 44213
loss: 1.010237455368042,grad_norm: 0.8708830012227231, iteration: 44214
loss: 1.0301334857940674,grad_norm: 0.886325529406686, iteration: 44215
loss: 1.0065877437591553,grad_norm: 0.9999993625874221, iteration: 44216
loss: 1.0072157382965088,grad_norm: 0.9999990098529165, iteration: 44217
loss: 1.0216964483261108,grad_norm: 0.9999990619657257, iteration: 44218
loss: 1.01718008518219,grad_norm: 0.9999991722873116, iteration: 44219
loss: 0.9850701093673706,grad_norm: 0.8108848310348765, iteration: 44220
loss: 1.0213009119033813,grad_norm: 0.9768276978150959, iteration: 44221
loss: 0.9999557137489319,grad_norm: 0.8563397801881467, iteration: 44222
loss: 0.9927350878715515,grad_norm: 0.9999991383970426, iteration: 44223
loss: 1.0024428367614746,grad_norm: 0.8367684097996161, iteration: 44224
loss: 1.0046929121017456,grad_norm: 0.9999991573345663, iteration: 44225
loss: 0.9936540722846985,grad_norm: 0.9615489669035489, iteration: 44226
loss: 0.9725335240364075,grad_norm: 0.9595180773066223, iteration: 44227
loss: 1.0186071395874023,grad_norm: 0.9279746738781822, iteration: 44228
loss: 1.0380058288574219,grad_norm: 0.8206825993708025, iteration: 44229
loss: 1.0200546979904175,grad_norm: 0.999999364941183, iteration: 44230
loss: 1.000800609588623,grad_norm: 0.9715262753704279, iteration: 44231
loss: 1.0371830463409424,grad_norm: 0.9999990510420737, iteration: 44232
loss: 1.0234628915786743,grad_norm: 0.8102303033996647, iteration: 44233
loss: 1.0098603963851929,grad_norm: 0.9134952826414605, iteration: 44234
loss: 1.0229464769363403,grad_norm: 0.825673620287266, iteration: 44235
loss: 0.9699890613555908,grad_norm: 0.979133559982265, iteration: 44236
loss: 1.0097192525863647,grad_norm: 0.999999118362801, iteration: 44237
loss: 1.0537419319152832,grad_norm: 0.9999991499636733, iteration: 44238
loss: 1.003103494644165,grad_norm: 0.999999057029589, iteration: 44239
loss: 1.0037091970443726,grad_norm: 0.9999993758338711, iteration: 44240
loss: 1.0166147947311401,grad_norm: 0.9999989777559524, iteration: 44241
loss: 0.9647181630134583,grad_norm: 0.9974023696807733, iteration: 44242
loss: 1.0007426738739014,grad_norm: 0.9146622163638554, iteration: 44243
loss: 0.981983482837677,grad_norm: 0.9999992945612866, iteration: 44244
loss: 0.9929565787315369,grad_norm: 0.9999991384294499, iteration: 44245
loss: 0.9886012077331543,grad_norm: 0.9999990565165104, iteration: 44246
loss: 1.0244934558868408,grad_norm: 0.8794262102652338, iteration: 44247
loss: 1.0334049463272095,grad_norm: 0.9541467345364726, iteration: 44248
loss: 0.950664758682251,grad_norm: 0.9999991071486836, iteration: 44249
loss: 1.0165961980819702,grad_norm: 0.9999995782132484, iteration: 44250
loss: 1.0076587200164795,grad_norm: 0.999999138469025, iteration: 44251
loss: 1.0127767324447632,grad_norm: 0.999999232376486, iteration: 44252
loss: 1.072055697441101,grad_norm: 0.9147194070372007, iteration: 44253
loss: 1.0107810497283936,grad_norm: 0.9624119550542938, iteration: 44254
loss: 1.0082018375396729,grad_norm: 0.939501472423292, iteration: 44255
loss: 0.966582179069519,grad_norm: 0.9999992357388091, iteration: 44256
loss: 1.0419567823410034,grad_norm: 0.9579378303759677, iteration: 44257
loss: 0.9956022500991821,grad_norm: 0.9999992653404032, iteration: 44258
loss: 0.9937699437141418,grad_norm: 0.9999991378668578, iteration: 44259
loss: 1.0198071002960205,grad_norm: 0.9999991848834126, iteration: 44260
loss: 1.0022920370101929,grad_norm: 0.9813189899214335, iteration: 44261
loss: 1.013219952583313,grad_norm: 0.9828055435919272, iteration: 44262
loss: 1.022557020187378,grad_norm: 0.9999989547890683, iteration: 44263
loss: 1.0237433910369873,grad_norm: 0.9999991080784452, iteration: 44264
loss: 1.0134937763214111,grad_norm: 0.8927797169001422, iteration: 44265
loss: 0.9804738163948059,grad_norm: 0.8656311377637026, iteration: 44266
loss: 1.0046035051345825,grad_norm: 0.9999992944611444, iteration: 44267
loss: 0.9940291047096252,grad_norm: 0.9999990401609822, iteration: 44268
loss: 1.0066580772399902,grad_norm: 0.9999990532980465, iteration: 44269
loss: 0.9870675802230835,grad_norm: 0.8923634769531169, iteration: 44270
loss: 1.0208097696304321,grad_norm: 0.9514162196130708, iteration: 44271
loss: 1.0140019655227661,grad_norm: 0.9172254209361622, iteration: 44272
loss: 0.9939181804656982,grad_norm: 0.9999990875284647, iteration: 44273
loss: 0.9970628619194031,grad_norm: 0.8869906995351928, iteration: 44274
loss: 1.0183384418487549,grad_norm: 0.9999991194624525, iteration: 44275
loss: 0.9885358810424805,grad_norm: 0.9999991010371952, iteration: 44276
loss: 0.9759253859519958,grad_norm: 0.8187055019236122, iteration: 44277
loss: 1.0117413997650146,grad_norm: 0.7681019970378183, iteration: 44278
loss: 0.9916065335273743,grad_norm: 0.9663863465814153, iteration: 44279
loss: 1.0402696132659912,grad_norm: 0.9999992806218828, iteration: 44280
loss: 1.023809552192688,grad_norm: 0.9999993133901179, iteration: 44281
loss: 1.0197457075119019,grad_norm: 0.9788620438108178, iteration: 44282
loss: 1.0656933784484863,grad_norm: 0.999999186872112, iteration: 44283
loss: 0.9887824654579163,grad_norm: 0.9999990282141384, iteration: 44284
loss: 0.9846675992012024,grad_norm: 0.9999994167958418, iteration: 44285
loss: 1.0130239725112915,grad_norm: 0.9999991719815275, iteration: 44286
loss: 0.9798395037651062,grad_norm: 0.8949611691570812, iteration: 44287
loss: 0.9768088459968567,grad_norm: 0.9999989890822764, iteration: 44288
loss: 0.9993982911109924,grad_norm: 0.9999993006720921, iteration: 44289
loss: 0.9848210215568542,grad_norm: 0.9325263942948033, iteration: 44290
loss: 1.0021848678588867,grad_norm: 0.9790237951241856, iteration: 44291
loss: 1.0190958976745605,grad_norm: 0.9999989100907329, iteration: 44292
loss: 1.0349044799804688,grad_norm: 0.8402268865541391, iteration: 44293
loss: 1.0144926309585571,grad_norm: 0.9372226906583522, iteration: 44294
loss: 1.011925458908081,grad_norm: 0.7887996836730892, iteration: 44295
loss: 1.042624592781067,grad_norm: 0.9999991355754907, iteration: 44296
loss: 0.9853478074073792,grad_norm: 0.9999989590332249, iteration: 44297
loss: 1.0295699834823608,grad_norm: 0.9999992046203502, iteration: 44298
loss: 0.997501790523529,grad_norm: 0.889002565819112, iteration: 44299
loss: 1.0415637493133545,grad_norm: 0.9825586972967528, iteration: 44300
loss: 1.0156495571136475,grad_norm: 0.9999991704875294, iteration: 44301
loss: 1.0187495946884155,grad_norm: 0.9999990532920451, iteration: 44302
loss: 0.9980155825614929,grad_norm: 0.9176446009711777, iteration: 44303
loss: 1.0385791063308716,grad_norm: 0.9999992207021393, iteration: 44304
loss: 1.0220248699188232,grad_norm: 0.9999992271492032, iteration: 44305
loss: 1.0167737007141113,grad_norm: 0.8340779698834, iteration: 44306
loss: 0.9915438294410706,grad_norm: 0.9999990903324063, iteration: 44307
loss: 1.0332952737808228,grad_norm: 0.999999313955154, iteration: 44308
loss: 0.9930627346038818,grad_norm: 0.9999991339632451, iteration: 44309
loss: 1.021427035331726,grad_norm: 0.9312579398740997, iteration: 44310
loss: 0.9827986359596252,grad_norm: 0.9720582741190382, iteration: 44311
loss: 1.0362944602966309,grad_norm: 0.9999991237851165, iteration: 44312
loss: 1.0118355751037598,grad_norm: 0.9011127342861712, iteration: 44313
loss: 1.005907654762268,grad_norm: 0.9999990877274669, iteration: 44314
loss: 0.984359085559845,grad_norm: 0.9270161978954933, iteration: 44315
loss: 0.988053023815155,grad_norm: 0.99999917373533, iteration: 44316
loss: 0.9797619581222534,grad_norm: 0.8379650348888679, iteration: 44317
loss: 1.0153210163116455,grad_norm: 0.9999990156889005, iteration: 44318
loss: 0.9694364070892334,grad_norm: 0.9999991091134007, iteration: 44319
loss: 1.0050852298736572,grad_norm: 0.9999992835916617, iteration: 44320
loss: 1.0124390125274658,grad_norm: 0.9999992299152038, iteration: 44321
loss: 0.9986947774887085,grad_norm: 0.9713790688675515, iteration: 44322
loss: 1.016618013381958,grad_norm: 0.9536430235753648, iteration: 44323
loss: 1.0345213413238525,grad_norm: 0.9999992778173358, iteration: 44324
loss: 0.9726154804229736,grad_norm: 0.8551073027266618, iteration: 44325
loss: 1.0310144424438477,grad_norm: 0.9999999174791566, iteration: 44326
loss: 1.0590527057647705,grad_norm: 0.9999989942953165, iteration: 44327
loss: 0.9870200753211975,grad_norm: 0.9999993892797122, iteration: 44328
loss: 1.021544098854065,grad_norm: 0.9999992495596014, iteration: 44329
loss: 1.0091543197631836,grad_norm: 0.9999990497444878, iteration: 44330
loss: 1.0013339519500732,grad_norm: 0.9999991936450722, iteration: 44331
loss: 1.0133414268493652,grad_norm: 0.9999990550376011, iteration: 44332
loss: 1.0140020847320557,grad_norm: 0.9999989651594663, iteration: 44333
loss: 1.0032069683074951,grad_norm: 0.915058678737803, iteration: 44334
loss: 1.0051542520523071,grad_norm: 0.9999991342622195, iteration: 44335
loss: 0.9648041129112244,grad_norm: 0.9999990547117813, iteration: 44336
loss: 0.9841102361679077,grad_norm: 0.8434196658131228, iteration: 44337
loss: 0.9975786805152893,grad_norm: 0.9999991656621, iteration: 44338
loss: 1.0205270051956177,grad_norm: 0.9999993563259203, iteration: 44339
loss: 1.0233924388885498,grad_norm: 0.9996801272559203, iteration: 44340
loss: 0.9876533150672913,grad_norm: 0.9999994417986252, iteration: 44341
loss: 0.9795515537261963,grad_norm: 0.9999993353309803, iteration: 44342
loss: 1.017532229423523,grad_norm: 0.9801144659977866, iteration: 44343
loss: 0.9667096734046936,grad_norm: 0.9980594923047647, iteration: 44344
loss: 0.9926455616950989,grad_norm: 0.9999990402009059, iteration: 44345
loss: 1.0298619270324707,grad_norm: 0.9999991992283028, iteration: 44346
loss: 1.0076302289962769,grad_norm: 0.9645528330786833, iteration: 44347
loss: 1.0042880773544312,grad_norm: 0.9130168061625603, iteration: 44348
loss: 1.0206986665725708,grad_norm: 0.9999791652551256, iteration: 44349
loss: 0.9713406562805176,grad_norm: 0.9123327423813582, iteration: 44350
loss: 0.9974136352539062,grad_norm: 0.9026837997296995, iteration: 44351
loss: 1.0462281703948975,grad_norm: 0.999999019056583, iteration: 44352
loss: 1.0016765594482422,grad_norm: 0.9999990450525011, iteration: 44353
loss: 1.045164704322815,grad_norm: 0.9999990722647735, iteration: 44354
loss: 1.000043511390686,grad_norm: 0.9533532259496703, iteration: 44355
loss: 0.9957060217857361,grad_norm: 0.9119963540752709, iteration: 44356
loss: 0.9807841777801514,grad_norm: 0.8945911174269883, iteration: 44357
loss: 0.9731498956680298,grad_norm: 0.9999991486547432, iteration: 44358
loss: 0.9907377362251282,grad_norm: 0.9999993363603439, iteration: 44359
loss: 0.9903409481048584,grad_norm: 0.9999991190909351, iteration: 44360
loss: 1.0030946731567383,grad_norm: 0.910821294519014, iteration: 44361
loss: 1.0036431550979614,grad_norm: 0.999998977626899, iteration: 44362
loss: 1.005293607711792,grad_norm: 0.9999992727806504, iteration: 44363
loss: 1.0146814584732056,grad_norm: 0.9999991500653072, iteration: 44364
loss: 1.0390231609344482,grad_norm: 0.9999991535023619, iteration: 44365
loss: 0.9972299933433533,grad_norm: 0.9999991278506075, iteration: 44366
loss: 0.9500225782394409,grad_norm: 0.7375975551676041, iteration: 44367
loss: 0.9883182644844055,grad_norm: 0.9088483892776983, iteration: 44368
loss: 1.0388656854629517,grad_norm: 0.8596059592241342, iteration: 44369
loss: 1.0007442235946655,grad_norm: 0.9971384047584558, iteration: 44370
loss: 0.9762499928474426,grad_norm: 0.9999990008350229, iteration: 44371
loss: 0.9927008748054504,grad_norm: 0.9360481226182594, iteration: 44372
loss: 0.9839191436767578,grad_norm: 0.8984771516250522, iteration: 44373
loss: 1.015307903289795,grad_norm: 0.9198114834114508, iteration: 44374
loss: 0.9992002248764038,grad_norm: 0.9999990849489128, iteration: 44375
loss: 1.047505497932434,grad_norm: 0.9999992069700182, iteration: 44376
loss: 0.9689165353775024,grad_norm: 0.948331329743457, iteration: 44377
loss: 0.9747927188873291,grad_norm: 0.8258444969945716, iteration: 44378
loss: 1.0172759294509888,grad_norm: 0.9999990725946919, iteration: 44379
loss: 1.0006535053253174,grad_norm: 0.9999992431936444, iteration: 44380
loss: 0.991651177406311,grad_norm: 0.9999992891054005, iteration: 44381
loss: 0.996235728263855,grad_norm: 0.9999990662323308, iteration: 44382
loss: 0.9711788296699524,grad_norm: 0.9999991872523198, iteration: 44383
loss: 0.9698575735092163,grad_norm: 0.9999990144165672, iteration: 44384
loss: 1.0314197540283203,grad_norm: 0.9714632408423423, iteration: 44385
loss: 0.9779016375541687,grad_norm: 0.881999697020963, iteration: 44386
loss: 0.9918285608291626,grad_norm: 0.8498503689162118, iteration: 44387
loss: 0.9764540195465088,grad_norm: 0.9740040831095323, iteration: 44388
loss: 0.9794391393661499,grad_norm: 0.999999194426194, iteration: 44389
loss: 1.0093984603881836,grad_norm: 0.9739903564182018, iteration: 44390
loss: 1.0308194160461426,grad_norm: 0.9999990616176326, iteration: 44391
loss: 1.0019371509552002,grad_norm: 0.999999091728451, iteration: 44392
loss: 0.9296954870223999,grad_norm: 0.9999991754802534, iteration: 44393
loss: 1.0814828872680664,grad_norm: 0.9999993167467396, iteration: 44394
loss: 0.9963687062263489,grad_norm: 0.99999902376913, iteration: 44395
loss: 0.9746665358543396,grad_norm: 0.8941137565618605, iteration: 44396
loss: 1.0363712310791016,grad_norm: 0.9999992469013941, iteration: 44397
loss: 0.9812844395637512,grad_norm: 0.9999990307342347, iteration: 44398
loss: 1.0206981897354126,grad_norm: 0.8674328040814241, iteration: 44399
loss: 1.0221003293991089,grad_norm: 0.9999991484447155, iteration: 44400
loss: 1.058469533920288,grad_norm: 0.999999186182375, iteration: 44401
loss: 0.9933345317840576,grad_norm: 0.910298451538147, iteration: 44402
loss: 1.0026122331619263,grad_norm: 0.9999991335747138, iteration: 44403
loss: 1.013049840927124,grad_norm: 0.9999992360958231, iteration: 44404
loss: 1.0143131017684937,grad_norm: 0.9999990403960549, iteration: 44405
loss: 1.0320087671279907,grad_norm: 0.9999990772437223, iteration: 44406
loss: 0.9937150478363037,grad_norm: 0.9882613612619108, iteration: 44407
loss: 1.0296757221221924,grad_norm: 0.9866519377620024, iteration: 44408
loss: 1.027779459953308,grad_norm: 0.8834702343712746, iteration: 44409
loss: 1.0336982011795044,grad_norm: 0.9999990843798696, iteration: 44410
loss: 0.9994786381721497,grad_norm: 0.9999992054479223, iteration: 44411
loss: 1.041465163230896,grad_norm: 1.000000026146792, iteration: 44412
loss: 0.9903701543807983,grad_norm: 0.9338989690191791, iteration: 44413
loss: 1.0097609758377075,grad_norm: 0.99999917199029, iteration: 44414
loss: 0.9790706634521484,grad_norm: 0.99999901447297, iteration: 44415
loss: 1.0083853006362915,grad_norm: 0.9555777998346696, iteration: 44416
loss: 1.014862060546875,grad_norm: 0.9580413923546756, iteration: 44417
loss: 0.9675731062889099,grad_norm: 0.9802013575309854, iteration: 44418
loss: 0.9541826844215393,grad_norm: 0.9849173806936934, iteration: 44419
loss: 0.9844970107078552,grad_norm: 0.8814792636958589, iteration: 44420
loss: 1.0108907222747803,grad_norm: 0.9999990225466868, iteration: 44421
loss: 0.9963220357894897,grad_norm: 0.925246480393263, iteration: 44422
loss: 1.0375657081604004,grad_norm: 0.9171163810788365, iteration: 44423
loss: 1.063530445098877,grad_norm: 0.9999998421092927, iteration: 44424
loss: 0.9647557735443115,grad_norm: 0.9427486005426668, iteration: 44425
loss: 1.0008540153503418,grad_norm: 0.8457699575764024, iteration: 44426
loss: 1.0227864980697632,grad_norm: 0.9999998520551412, iteration: 44427
loss: 1.003264307975769,grad_norm: 0.9999990804004865, iteration: 44428
loss: 1.01003098487854,grad_norm: 0.9999991867304079, iteration: 44429
loss: 1.016668438911438,grad_norm: 0.9999991146381443, iteration: 44430
loss: 0.9621788263320923,grad_norm: 0.9999991362835134, iteration: 44431
loss: 1.0004655122756958,grad_norm: 0.9752640970268203, iteration: 44432
loss: 0.943962812423706,grad_norm: 0.9138816070593754, iteration: 44433
loss: 1.0071320533752441,grad_norm: 0.9999992026939288, iteration: 44434
loss: 1.0130956172943115,grad_norm: 0.9999991398063695, iteration: 44435
loss: 1.000588297843933,grad_norm: 0.9622374627295546, iteration: 44436
loss: 1.0011430978775024,grad_norm: 0.9999994243716825, iteration: 44437
loss: 1.0161545276641846,grad_norm: 0.9631901890354579, iteration: 44438
loss: 1.0083787441253662,grad_norm: 0.9999993310211038, iteration: 44439
loss: 1.0048693418502808,grad_norm: 0.9468024492741962, iteration: 44440
loss: 1.0350421667099,grad_norm: 0.9999999128832466, iteration: 44441
loss: 1.0391490459442139,grad_norm: 0.9999991839108544, iteration: 44442
loss: 1.017913579940796,grad_norm: 0.9999998755812702, iteration: 44443
loss: 1.0281620025634766,grad_norm: 0.9816930482851886, iteration: 44444
loss: 1.0124324560165405,grad_norm: 0.9427780433563382, iteration: 44445
loss: 1.047782301902771,grad_norm: 0.9999990873568187, iteration: 44446
loss: 0.9797658324241638,grad_norm: 0.7628307625528933, iteration: 44447
loss: 0.9733064770698547,grad_norm: 0.8644472509001323, iteration: 44448
loss: 1.017943263053894,grad_norm: 0.9999990185540932, iteration: 44449
loss: 1.0161147117614746,grad_norm: 0.9999990913025649, iteration: 44450
loss: 1.004244327545166,grad_norm: 0.9951620501854946, iteration: 44451
loss: 0.9888211488723755,grad_norm: 0.9956619024412381, iteration: 44452
loss: 0.9977929592132568,grad_norm: 0.9999991309208993, iteration: 44453
loss: 0.9725071787834167,grad_norm: 0.9999991733361371, iteration: 44454
loss: 1.0062717199325562,grad_norm: 0.8976814696919472, iteration: 44455
loss: 1.0508720874786377,grad_norm: 0.9999996292528931, iteration: 44456
loss: 1.0000193119049072,grad_norm: 0.9833823622125207, iteration: 44457
loss: 0.9845812320709229,grad_norm: 0.9219767238618459, iteration: 44458
loss: 1.0434664487838745,grad_norm: 0.9812885648016817, iteration: 44459
loss: 1.0324851274490356,grad_norm: 0.999999028302654, iteration: 44460
loss: 0.9902324080467224,grad_norm: 0.9734052142251582, iteration: 44461
loss: 0.9928510785102844,grad_norm: 0.9023752235756703, iteration: 44462
loss: 1.0186996459960938,grad_norm: 0.9999990982994399, iteration: 44463
loss: 0.9943953156471252,grad_norm: 0.9999990192288435, iteration: 44464
loss: 0.9486070871353149,grad_norm: 0.8073409459476754, iteration: 44465
loss: 1.0225001573562622,grad_norm: 0.9456154869081674, iteration: 44466
loss: 1.1193877458572388,grad_norm: 0.9999993935110189, iteration: 44467
loss: 1.005111575126648,grad_norm: 0.942901803075889, iteration: 44468
loss: 1.030258297920227,grad_norm: 0.9999990270439353, iteration: 44469
loss: 0.9713225364685059,grad_norm: 0.9733654709984272, iteration: 44470
loss: 0.98639315366745,grad_norm: 0.8420859338719268, iteration: 44471
loss: 1.0695505142211914,grad_norm: 0.9999991104015669, iteration: 44472
loss: 1.0012317895889282,grad_norm: 0.99999970728704, iteration: 44473
loss: 0.9954516291618347,grad_norm: 0.9999990720534535, iteration: 44474
loss: 1.0863072872161865,grad_norm: 0.9999992096140725, iteration: 44475
loss: 0.9945662021636963,grad_norm: 0.9999991091164829, iteration: 44476
loss: 0.9959380030632019,grad_norm: 0.9328817075232855, iteration: 44477
loss: 0.9659225940704346,grad_norm: 0.9049205137596923, iteration: 44478
loss: 0.981553852558136,grad_norm: 0.9999990840887916, iteration: 44479
loss: 1.0169247388839722,grad_norm: 0.9999993206118074, iteration: 44480
loss: 1.0116328001022339,grad_norm: 0.9999990240245428, iteration: 44481
loss: 1.0097558498382568,grad_norm: 0.9753080464114045, iteration: 44482
loss: 1.0050772428512573,grad_norm: 0.9476143878163648, iteration: 44483
loss: 1.0132460594177246,grad_norm: 0.9999991673486103, iteration: 44484
loss: 1.0225796699523926,grad_norm: 0.9999991606531035, iteration: 44485
loss: 0.9989137649536133,grad_norm: 0.808680580381741, iteration: 44486
loss: 0.9753251671791077,grad_norm: 0.9999991300860703, iteration: 44487
loss: 0.9948984384536743,grad_norm: 0.9695085510870871, iteration: 44488
loss: 0.9766903519630432,grad_norm: 0.999998951196678, iteration: 44489
loss: 1.023034930229187,grad_norm: 0.9999992341988769, iteration: 44490
loss: 1.0048834085464478,grad_norm: 0.922315532539758, iteration: 44491
loss: 1.0161640644073486,grad_norm: 0.9613121477189775, iteration: 44492
loss: 1.0033708810806274,grad_norm: 0.9999991582225914, iteration: 44493
loss: 1.0334384441375732,grad_norm: 0.8001285148078587, iteration: 44494
loss: 0.9913465976715088,grad_norm: 0.9999993014290794, iteration: 44495
loss: 1.0215015411376953,grad_norm: 0.9999993412776785, iteration: 44496
loss: 0.9866650104522705,grad_norm: 0.878499995938737, iteration: 44497
loss: 0.9930551052093506,grad_norm: 0.9999990537953687, iteration: 44498
loss: 0.9780042171478271,grad_norm: 0.9999990141432021, iteration: 44499
loss: 0.9600306153297424,grad_norm: 0.9828452504327532, iteration: 44500
loss: 1.0414867401123047,grad_norm: 0.9999990696394212, iteration: 44501
loss: 0.999337375164032,grad_norm: 0.9999995458829813, iteration: 44502
loss: 1.014008641242981,grad_norm: 0.9999994591090932, iteration: 44503
loss: 0.9812700152397156,grad_norm: 0.9410176192462996, iteration: 44504
loss: 0.992318868637085,grad_norm: 0.9999990858814302, iteration: 44505
loss: 1.0056291818618774,grad_norm: 0.9880152480410397, iteration: 44506
loss: 0.9911284446716309,grad_norm: 0.9999991082659945, iteration: 44507
loss: 0.9871318936347961,grad_norm: 0.9999990168291494, iteration: 44508
loss: 1.0528192520141602,grad_norm: 0.99999937282468, iteration: 44509
loss: 0.9952619671821594,grad_norm: 0.9999990663944955, iteration: 44510
loss: 1.0173078775405884,grad_norm: 0.8703042448853037, iteration: 44511
loss: 1.012933373451233,grad_norm: 0.9979196154734241, iteration: 44512
loss: 1.0246926546096802,grad_norm: 0.9999992604928953, iteration: 44513
loss: 1.0244934558868408,grad_norm: 0.9999992148724794, iteration: 44514
loss: 1.0170462131500244,grad_norm: 0.9204130148339456, iteration: 44515
loss: 1.0313396453857422,grad_norm: 0.9999992862818661, iteration: 44516
loss: 0.9657279849052429,grad_norm: 0.9999990249716723, iteration: 44517
loss: 0.9999691247940063,grad_norm: 0.7770968724784483, iteration: 44518
loss: 1.0275862216949463,grad_norm: 0.9999991455160997, iteration: 44519
loss: 0.9903832077980042,grad_norm: 0.999999599050488, iteration: 44520
loss: 0.9973976612091064,grad_norm: 0.9999996495034681, iteration: 44521
loss: 0.9985514879226685,grad_norm: 0.9999990389600777, iteration: 44522
loss: 0.9921251535415649,grad_norm: 0.9999993293219335, iteration: 44523
loss: 1.0732685327529907,grad_norm: 0.9999994998492236, iteration: 44524
loss: 0.9510106444358826,grad_norm: 0.9999990681619284, iteration: 44525
loss: 0.9948870539665222,grad_norm: 0.9999994885262815, iteration: 44526
loss: 0.9902341365814209,grad_norm: 0.8721733954077845, iteration: 44527
loss: 1.0382732152938843,grad_norm: 0.9999989944424441, iteration: 44528
loss: 1.0098004341125488,grad_norm: 0.7404452416499281, iteration: 44529
loss: 1.0185248851776123,grad_norm: 0.9959061187412506, iteration: 44530
loss: 1.0020482540130615,grad_norm: 0.999999183581856, iteration: 44531
loss: 1.0136373043060303,grad_norm: 0.9451346340061141, iteration: 44532
loss: 1.0006499290466309,grad_norm: 0.9285325021526484, iteration: 44533
loss: 1.0124422311782837,grad_norm: 0.9999992956410745, iteration: 44534
loss: 0.9916693568229675,grad_norm: 0.8934439613628725, iteration: 44535
loss: 0.9912657141685486,grad_norm: 0.9040215457128561, iteration: 44536
loss: 1.0167773962020874,grad_norm: 0.9999991157132464, iteration: 44537
loss: 1.0061110258102417,grad_norm: 0.9999994388180314, iteration: 44538
loss: 0.9633926153182983,grad_norm: 0.9465956956451453, iteration: 44539
loss: 1.0151512622833252,grad_norm: 0.9999996469126248, iteration: 44540
loss: 0.9915741086006165,grad_norm: 0.9999992235122024, iteration: 44541
loss: 0.9835596084594727,grad_norm: 0.8664377879362184, iteration: 44542
loss: 1.0028893947601318,grad_norm: 0.9999992132541929, iteration: 44543
loss: 1.0087709426879883,grad_norm: 0.8958565828431183, iteration: 44544
loss: 1.0125130414962769,grad_norm: 0.9999990495969729, iteration: 44545
loss: 0.9215537905693054,grad_norm: 0.9999990823573219, iteration: 44546
loss: 0.9655190706253052,grad_norm: 0.9179785298358064, iteration: 44547
loss: 1.0348913669586182,grad_norm: 0.9222623876935601, iteration: 44548
loss: 1.0103950500488281,grad_norm: 0.9999992190667146, iteration: 44549
loss: 1.0088062286376953,grad_norm: 0.8402543982515652, iteration: 44550
loss: 0.9971244931221008,grad_norm: 0.999999233899315, iteration: 44551
loss: 0.9900773167610168,grad_norm: 0.9999994296211904, iteration: 44552
loss: 1.0053423643112183,grad_norm: 0.9838463625324129, iteration: 44553
loss: 1.0285331010818481,grad_norm: 0.9999991469054131, iteration: 44554
loss: 0.9600031971931458,grad_norm: 0.9308703191071388, iteration: 44555
loss: 0.9823201298713684,grad_norm: 0.7877796921737366, iteration: 44556
loss: 1.0073988437652588,grad_norm: 0.999999166789377, iteration: 44557
loss: 0.9911198616027832,grad_norm: 0.8411723258078732, iteration: 44558
loss: 0.9770799279212952,grad_norm: 0.9884931051217483, iteration: 44559
loss: 1.0235031843185425,grad_norm: 0.999999833889704, iteration: 44560
loss: 1.0281647443771362,grad_norm: 0.9999991828669135, iteration: 44561
loss: 0.9916547536849976,grad_norm: 0.9999990499476975, iteration: 44562
loss: 1.0102157592773438,grad_norm: 0.9999991397012843, iteration: 44563
loss: 0.9839123487472534,grad_norm: 0.9999991781919223, iteration: 44564
loss: 1.0084129571914673,grad_norm: 0.8820394035361108, iteration: 44565
loss: 0.9852676391601562,grad_norm: 0.9999991398217338, iteration: 44566
loss: 1.075476884841919,grad_norm: 0.9999995121901429, iteration: 44567
loss: 1.0229579210281372,grad_norm: 0.9999991966092227, iteration: 44568
loss: 0.9878490567207336,grad_norm: 0.9999992250765617, iteration: 44569
loss: 1.025718331336975,grad_norm: 0.9999998185349964, iteration: 44570
loss: 0.9979623556137085,grad_norm: 0.9999992251942086, iteration: 44571
loss: 0.9967071413993835,grad_norm: 0.85901667607547, iteration: 44572
loss: 1.0137501955032349,grad_norm: 0.9999995849553186, iteration: 44573
loss: 0.9632383584976196,grad_norm: 0.9999990685116853, iteration: 44574
loss: 1.0724238157272339,grad_norm: 0.9999997312539415, iteration: 44575
loss: 1.0139192342758179,grad_norm: 0.9999989792375417, iteration: 44576
loss: 1.0382521152496338,grad_norm: 0.9068921560217137, iteration: 44577
loss: 0.9983235597610474,grad_norm: 0.9999990548928291, iteration: 44578
loss: 0.9997287392616272,grad_norm: 0.9999994114908225, iteration: 44579
loss: 0.9653719067573547,grad_norm: 0.7668202355025846, iteration: 44580
loss: 0.9657453298568726,grad_norm: 0.8997239572746504, iteration: 44581
loss: 1.0515660047531128,grad_norm: 0.9999998240266, iteration: 44582
loss: 1.044649600982666,grad_norm: 0.9999996209505717, iteration: 44583
loss: 1.1253446340560913,grad_norm: 0.9999993997105127, iteration: 44584
loss: 1.0134047269821167,grad_norm: 0.9999990219458201, iteration: 44585
loss: 0.9812500476837158,grad_norm: 0.8685299215185338, iteration: 44586
loss: 1.0212129354476929,grad_norm: 0.9590637987033671, iteration: 44587
loss: 1.0182045698165894,grad_norm: 0.9999991218760322, iteration: 44588
loss: 1.004372000694275,grad_norm: 0.9386872547914552, iteration: 44589
loss: 1.0536351203918457,grad_norm: 0.9999992543030785, iteration: 44590
loss: 0.9794719219207764,grad_norm: 0.9764642925161022, iteration: 44591
loss: 1.033742070198059,grad_norm: 0.8751515263888973, iteration: 44592
loss: 0.9823298454284668,grad_norm: 0.9999991899065647, iteration: 44593
loss: 0.985065758228302,grad_norm: 0.9999989994474143, iteration: 44594
loss: 1.0192829370498657,grad_norm: 0.9999991230846099, iteration: 44595
loss: 1.0031284093856812,grad_norm: 0.999998993676572, iteration: 44596
loss: 1.0004596710205078,grad_norm: 0.9999991022360821, iteration: 44597
loss: 0.9751428365707397,grad_norm: 0.9999990999409981, iteration: 44598
loss: 1.0241010189056396,grad_norm: 0.8785047798081542, iteration: 44599
loss: 0.9643093347549438,grad_norm: 0.9999994331987202, iteration: 44600
loss: 0.9837860465049744,grad_norm: 0.8674604333320518, iteration: 44601
loss: 0.983432412147522,grad_norm: 0.9999994307183806, iteration: 44602
loss: 1.023384928703308,grad_norm: 0.9780121417381468, iteration: 44603
loss: 0.9887076020240784,grad_norm: 0.999999292813863, iteration: 44604
loss: 1.0168274641036987,grad_norm: 0.9999994793897318, iteration: 44605
loss: 1.0298248529434204,grad_norm: 0.9999989615816388, iteration: 44606
loss: 1.0198380947113037,grad_norm: 0.9999991295569536, iteration: 44607
loss: 1.0632821321487427,grad_norm: 0.999999460133492, iteration: 44608
loss: 1.0389021635055542,grad_norm: 0.999999277938118, iteration: 44609
loss: 0.9890350699424744,grad_norm: 0.9766506541355812, iteration: 44610
loss: 1.0352650880813599,grad_norm: 0.9999992377125188, iteration: 44611
loss: 1.0360137224197388,grad_norm: 0.9999993639602566, iteration: 44612
loss: 0.9965432286262512,grad_norm: 0.9999989832947369, iteration: 44613
loss: 0.9847488403320312,grad_norm: 0.9820625067867593, iteration: 44614
loss: 0.98956298828125,grad_norm: 0.7320927744361643, iteration: 44615
loss: 1.00348961353302,grad_norm: 0.9933456195787699, iteration: 44616
loss: 1.0127959251403809,grad_norm: 0.9225185597324053, iteration: 44617
loss: 0.9836505651473999,grad_norm: 0.9142546983043117, iteration: 44618
loss: 1.0123461484909058,grad_norm: 0.9999997130503001, iteration: 44619
loss: 1.017787218093872,grad_norm: 0.9291951554185597, iteration: 44620
loss: 0.9538154602050781,grad_norm: 0.9613287175920432, iteration: 44621
loss: 0.9913115501403809,grad_norm: 0.944301701234107, iteration: 44622
loss: 1.0386273860931396,grad_norm: 0.999999149522741, iteration: 44623
loss: 0.9913188815116882,grad_norm: 0.9370352039869043, iteration: 44624
loss: 0.9739392995834351,grad_norm: 0.9999990869123045, iteration: 44625
loss: 1.0002167224884033,grad_norm: 0.9999992241197978, iteration: 44626
loss: 0.9705904126167297,grad_norm: 0.9356708693442665, iteration: 44627
loss: 1.0463844537734985,grad_norm: 0.9558945954859395, iteration: 44628
loss: 1.0052443742752075,grad_norm: 0.8937710770262296, iteration: 44629
loss: 1.0189847946166992,grad_norm: 0.8642538187353682, iteration: 44630
loss: 1.0217361450195312,grad_norm: 0.999999197024801, iteration: 44631
loss: 1.0303922891616821,grad_norm: 0.9999991503544396, iteration: 44632
loss: 0.9693360924720764,grad_norm: 0.9999990766050896, iteration: 44633
loss: 1.0060772895812988,grad_norm: 0.8718609891542851, iteration: 44634
loss: 1.0021955966949463,grad_norm: 0.9128246792871063, iteration: 44635
loss: 0.9952400326728821,grad_norm: 0.9999998638717382, iteration: 44636
loss: 1.033313274383545,grad_norm: 0.9999989247732537, iteration: 44637
loss: 1.0304065942764282,grad_norm: 0.8641538662986067, iteration: 44638
loss: 0.9634192585945129,grad_norm: 0.9808461276347998, iteration: 44639
loss: 1.0025025606155396,grad_norm: 0.9751175790403886, iteration: 44640
loss: 1.026740550994873,grad_norm: 0.9999990389521793, iteration: 44641
loss: 1.0330029726028442,grad_norm: 0.9459126824230306, iteration: 44642
loss: 0.9749786257743835,grad_norm: 0.9587358144061677, iteration: 44643
loss: 1.020866870880127,grad_norm: 0.996736192744611, iteration: 44644
loss: 1.0352954864501953,grad_norm: 0.9281268662975548, iteration: 44645
loss: 0.9900781512260437,grad_norm: 0.9454450522185197, iteration: 44646
loss: 1.0212793350219727,grad_norm: 0.9999991730532368, iteration: 44647
loss: 1.0326039791107178,grad_norm: 0.9999991830166196, iteration: 44648
loss: 1.0105323791503906,grad_norm: 0.999999782231699, iteration: 44649
loss: 1.0235097408294678,grad_norm: 0.9999993842016253, iteration: 44650
loss: 1.0053802728652954,grad_norm: 0.9014904433776417, iteration: 44651
loss: 1.0366111993789673,grad_norm: 0.9999993312412105, iteration: 44652
loss: 1.0348984003067017,grad_norm: 0.9290192339921245, iteration: 44653
loss: 0.9526668787002563,grad_norm: 0.9999990764488709, iteration: 44654
loss: 0.9944056868553162,grad_norm: 0.9479344680482478, iteration: 44655
loss: 0.9955700039863586,grad_norm: 0.906436689944659, iteration: 44656
loss: 0.9573988318443298,grad_norm: 0.8751677505765516, iteration: 44657
loss: 1.0137149095535278,grad_norm: 0.9999991045158921, iteration: 44658
loss: 1.019295573234558,grad_norm: 0.9254860407795769, iteration: 44659
loss: 0.9723083972930908,grad_norm: 0.9999991518071778, iteration: 44660
loss: 1.0040345191955566,grad_norm: 0.9999990955409396, iteration: 44661
loss: 0.9857791662216187,grad_norm: 0.9999990999491102, iteration: 44662
loss: 1.042979121208191,grad_norm: 0.9999993781553053, iteration: 44663
loss: 1.0820770263671875,grad_norm: 0.9999995163844968, iteration: 44664
loss: 1.021132230758667,grad_norm: 0.9999992783179609, iteration: 44665
loss: 0.968169629573822,grad_norm: 0.9999991419025882, iteration: 44666
loss: 1.0457853078842163,grad_norm: 0.9999995734325242, iteration: 44667
loss: 1.0520622730255127,grad_norm: 0.9999992754913719, iteration: 44668
loss: 0.9996078014373779,grad_norm: 0.9332970368904824, iteration: 44669
loss: 1.0160562992095947,grad_norm: 0.99681683587936, iteration: 44670
loss: 1.0221320390701294,grad_norm: 0.9999991588629572, iteration: 44671
loss: 1.0539844036102295,grad_norm: 0.9999990487687481, iteration: 44672
loss: 1.007722020149231,grad_norm: 0.8376274068303894, iteration: 44673
loss: 1.0049506425857544,grad_norm: 0.9999990383112997, iteration: 44674
loss: 0.9936733245849609,grad_norm: 0.9999991570635073, iteration: 44675
loss: 0.992897629737854,grad_norm: 0.9999991730278479, iteration: 44676
loss: 1.0298117399215698,grad_norm: 0.9999990821695532, iteration: 44677
loss: 1.0475081205368042,grad_norm: 0.9999995685223191, iteration: 44678
loss: 1.0014874935150146,grad_norm: 0.9999990339688037, iteration: 44679
loss: 1.0129892826080322,grad_norm: 0.9999990204061515, iteration: 44680
loss: 0.9888074398040771,grad_norm: 0.9364476238543864, iteration: 44681
loss: 1.0333048105239868,grad_norm: 0.9999993250893607, iteration: 44682
loss: 0.9953917860984802,grad_norm: 0.9999991003230422, iteration: 44683
loss: 0.9995900392532349,grad_norm: 0.999999231129928, iteration: 44684
loss: 0.975738525390625,grad_norm: 0.8873396099848402, iteration: 44685
loss: 1.0163222551345825,grad_norm: 0.9085849774806543, iteration: 44686
loss: 0.995891273021698,grad_norm: 0.8853734183541087, iteration: 44687
loss: 0.9735242128372192,grad_norm: 0.9999991132663516, iteration: 44688
loss: 0.9796298146247864,grad_norm: 0.8589419503152179, iteration: 44689
loss: 0.9767544865608215,grad_norm: 0.8316636252333233, iteration: 44690
loss: 1.0005712509155273,grad_norm: 0.8737611528904451, iteration: 44691
loss: 0.9968520998954773,grad_norm: 0.9246596423919725, iteration: 44692
loss: 0.9922056794166565,grad_norm: 0.859930905415843, iteration: 44693
loss: 1.0005149841308594,grad_norm: 0.953219733337855, iteration: 44694
loss: 1.0092891454696655,grad_norm: 0.932946866099085, iteration: 44695
loss: 1.0410728454589844,grad_norm: 0.9999993347681422, iteration: 44696
loss: 0.9921908974647522,grad_norm: 0.8460901009690306, iteration: 44697
loss: 0.9632579684257507,grad_norm: 0.9772021150424676, iteration: 44698
loss: 0.9966471195220947,grad_norm: 0.9457942538859765, iteration: 44699
loss: 1.038469910621643,grad_norm: 0.9999994780444216, iteration: 44700
loss: 1.0055245161056519,grad_norm: 0.8951290461398611, iteration: 44701
loss: 1.0423076152801514,grad_norm: 0.9999992242269063, iteration: 44702
loss: 0.9969677925109863,grad_norm: 0.9560989220574755, iteration: 44703
loss: 0.9485104084014893,grad_norm: 0.9352110530901498, iteration: 44704
loss: 0.9767284393310547,grad_norm: 0.9999991376797343, iteration: 44705
loss: 1.012440800666809,grad_norm: 0.8986941921656001, iteration: 44706
loss: 0.9760411977767944,grad_norm: 0.9979277066141774, iteration: 44707
loss: 0.9918381571769714,grad_norm: 0.9999991183682694, iteration: 44708
loss: 1.0249360799789429,grad_norm: 0.9732749868110888, iteration: 44709
loss: 1.2181967496871948,grad_norm: 0.9999996522263951, iteration: 44710
loss: 0.9790725708007812,grad_norm: 0.9999992280131017, iteration: 44711
loss: 1.0128810405731201,grad_norm: 0.9999992894188223, iteration: 44712
loss: 1.0475704669952393,grad_norm: 0.9999997674401514, iteration: 44713
loss: 0.9982852935791016,grad_norm: 0.8751914629413098, iteration: 44714
loss: 0.980904221534729,grad_norm: 0.9999994686004895, iteration: 44715
loss: 0.9711544513702393,grad_norm: 0.9497455867617179, iteration: 44716
loss: 0.9611522555351257,grad_norm: 0.9999991931463437, iteration: 44717
loss: 0.976848840713501,grad_norm: 0.9531334583097609, iteration: 44718
loss: 1.032710313796997,grad_norm: 0.9838250290331062, iteration: 44719
loss: 1.002339243888855,grad_norm: 0.9999994995298309, iteration: 44720
loss: 0.9829261898994446,grad_norm: 0.9999991222645958, iteration: 44721
loss: 0.9605790376663208,grad_norm: 0.9999991117318502, iteration: 44722
loss: 1.0021196603775024,grad_norm: 0.9041015677915727, iteration: 44723
loss: 1.020837426185608,grad_norm: 0.9999990379907328, iteration: 44724
loss: 1.0013740062713623,grad_norm: 0.9999990482885778, iteration: 44725
loss: 0.9827876687049866,grad_norm: 0.8598249269529826, iteration: 44726
loss: 0.9967471361160278,grad_norm: 0.9920805691718549, iteration: 44727
loss: 1.008277416229248,grad_norm: 0.9999989983283991, iteration: 44728
loss: 0.963034451007843,grad_norm: 0.9613178829305711, iteration: 44729
loss: 1.0344856977462769,grad_norm: 0.9999994298407744, iteration: 44730
loss: 1.0002713203430176,grad_norm: 0.9938970642508869, iteration: 44731
loss: 1.052498459815979,grad_norm: 0.999999400415654, iteration: 44732
loss: 0.9936680793762207,grad_norm: 0.9999992342227854, iteration: 44733
loss: 0.9616484045982361,grad_norm: 0.8859784827835799, iteration: 44734
loss: 0.982389509677887,grad_norm: 0.8640517036723411, iteration: 44735
loss: 1.0083177089691162,grad_norm: 0.9999989763447034, iteration: 44736
loss: 1.0094354152679443,grad_norm: 0.9737284434918134, iteration: 44737
loss: 1.0053399801254272,grad_norm: 0.9999990561318953, iteration: 44738
loss: 0.994077742099762,grad_norm: 0.9842973922452348, iteration: 44739
loss: 1.078698754310608,grad_norm: 0.9999997977284882, iteration: 44740
loss: 0.9414317607879639,grad_norm: 0.9999990942467946, iteration: 44741
loss: 0.9855057001113892,grad_norm: 0.9263128393224586, iteration: 44742
loss: 1.0263490676879883,grad_norm: 0.9999992471506718, iteration: 44743
loss: 1.0101219415664673,grad_norm: 0.9951496200856236, iteration: 44744
loss: 1.0108925104141235,grad_norm: 0.9999998799750041, iteration: 44745
loss: 1.0066324472427368,grad_norm: 0.9838988577483503, iteration: 44746
loss: 1.018561601638794,grad_norm: 0.9999991488767311, iteration: 44747
loss: 0.9758269190788269,grad_norm: 0.9999990391864797, iteration: 44748
loss: 1.03020441532135,grad_norm: 0.9999992248012278, iteration: 44749
loss: 1.032346248626709,grad_norm: 0.9999991098211938, iteration: 44750
loss: 1.0065973997116089,grad_norm: 0.9494870365867684, iteration: 44751
loss: 0.9923978447914124,grad_norm: 0.9999994811763616, iteration: 44752
loss: 0.977230966091156,grad_norm: 0.934673267015535, iteration: 44753
loss: 1.0272668600082397,grad_norm: 0.9721329940515874, iteration: 44754
loss: 0.9973986744880676,grad_norm: 0.9999992230837268, iteration: 44755
loss: 0.9781085252761841,grad_norm: 0.813320304329566, iteration: 44756
loss: 0.9774925708770752,grad_norm: 0.9999990255030979, iteration: 44757
loss: 0.9960767030715942,grad_norm: 0.9999997245615156, iteration: 44758
loss: 1.0068082809448242,grad_norm: 0.9543888726700023, iteration: 44759
loss: 0.9934062361717224,grad_norm: 0.9378700140572752, iteration: 44760
loss: 0.9730406999588013,grad_norm: 0.9999991429302445, iteration: 44761
loss: 1.0058465003967285,grad_norm: 0.999999341926348, iteration: 44762
loss: 0.9946936964988708,grad_norm: 0.9999991277496239, iteration: 44763
loss: 0.9693329334259033,grad_norm: 0.9999992487666061, iteration: 44764
loss: 1.0213861465454102,grad_norm: 0.9999992783939348, iteration: 44765
loss: 0.9924764037132263,grad_norm: 0.9999996047037664, iteration: 44766
loss: 1.0006519556045532,grad_norm: 0.999999203360009, iteration: 44767
loss: 1.013496994972229,grad_norm: 0.8479831638422848, iteration: 44768
loss: 0.9653091430664062,grad_norm: 0.9999991584639386, iteration: 44769
loss: 1.0067038536071777,grad_norm: 0.9888789307312327, iteration: 44770
loss: 0.9931530952453613,grad_norm: 0.9999990458768218, iteration: 44771
loss: 1.0002520084381104,grad_norm: 0.9034020962278858, iteration: 44772
loss: 1.037460446357727,grad_norm: 0.9999991115979604, iteration: 44773
loss: 1.0199167728424072,grad_norm: 0.9999989860648969, iteration: 44774
loss: 0.9872156977653503,grad_norm: 0.9238929148584751, iteration: 44775
loss: 0.9956548810005188,grad_norm: 0.9999992249593769, iteration: 44776
loss: 1.0009928941726685,grad_norm: 0.9537241645862686, iteration: 44777
loss: 1.0157124996185303,grad_norm: 0.9999992266635466, iteration: 44778
loss: 1.0168942213058472,grad_norm: 0.8573244170147889, iteration: 44779
loss: 0.9973931312561035,grad_norm: 0.999999261932168, iteration: 44780
loss: 1.0126595497131348,grad_norm: 0.9999992775963605, iteration: 44781
loss: 0.9928612112998962,grad_norm: 0.9198600999420112, iteration: 44782
loss: 1.0049784183502197,grad_norm: 0.8730216416323762, iteration: 44783
loss: 1.000982403755188,grad_norm: 0.9068611515343895, iteration: 44784
loss: 0.9819656610488892,grad_norm: 0.8122313897511638, iteration: 44785
loss: 1.0272504091262817,grad_norm: 0.9999990615741025, iteration: 44786
loss: 1.0438287258148193,grad_norm: 0.9999994233459064, iteration: 44787
loss: 0.9928244352340698,grad_norm: 0.9999991331157784, iteration: 44788
loss: 0.9968417882919312,grad_norm: 0.9488641016711736, iteration: 44789
loss: 1.0167330503463745,grad_norm: 0.8378863257890075, iteration: 44790
loss: 0.9892092347145081,grad_norm: 0.9999990068619946, iteration: 44791
loss: 0.9941557049751282,grad_norm: 0.9999991094228885, iteration: 44792
loss: 0.9726559519767761,grad_norm: 0.9999991646557622, iteration: 44793
loss: 0.9777037501335144,grad_norm: 0.9999990736846448, iteration: 44794
loss: 1.1598602533340454,grad_norm: 0.999999157154754, iteration: 44795
loss: 0.9913379549980164,grad_norm: 0.9746878643729562, iteration: 44796
loss: 1.0058801174163818,grad_norm: 0.9853902234491196, iteration: 44797
loss: 1.0085649490356445,grad_norm: 0.8825035974335623, iteration: 44798
loss: 1.097718358039856,grad_norm: 0.9999995960660923, iteration: 44799
loss: 1.0068837404251099,grad_norm: 0.9999991435353494, iteration: 44800
loss: 0.9823757410049438,grad_norm: 0.9428335378631262, iteration: 44801
loss: 1.039906620979309,grad_norm: 0.956043985215892, iteration: 44802
loss: 1.0098108053207397,grad_norm: 0.9823826040715482, iteration: 44803
loss: 0.9551498293876648,grad_norm: 0.9999992597071026, iteration: 44804
loss: 1.0249595642089844,grad_norm: 0.9999991388842604, iteration: 44805
loss: 1.0185437202453613,grad_norm: 0.999998935215444, iteration: 44806
loss: 0.9970288276672363,grad_norm: 0.9999990577563258, iteration: 44807
loss: 0.9835567474365234,grad_norm: 0.9999991924049443, iteration: 44808
loss: 0.9976818561553955,grad_norm: 0.9999992926628077, iteration: 44809
loss: 0.9854812622070312,grad_norm: 0.9999991670932793, iteration: 44810
loss: 1.0126112699508667,grad_norm: 0.9999990830706501, iteration: 44811
loss: 0.9970422387123108,grad_norm: 0.9531971802341412, iteration: 44812
loss: 0.9932625889778137,grad_norm: 0.8739780652003928, iteration: 44813
loss: 1.0437684059143066,grad_norm: 0.9999992197346198, iteration: 44814
loss: 1.073997974395752,grad_norm: 0.9999998603948629, iteration: 44815
loss: 1.0179883241653442,grad_norm: 0.9237277460536193, iteration: 44816
loss: 1.017723798751831,grad_norm: 0.9999991339938721, iteration: 44817
loss: 1.0043894052505493,grad_norm: 0.9999999712856412, iteration: 44818
loss: 0.9860358834266663,grad_norm: 0.9999992105201342, iteration: 44819
loss: 1.0103859901428223,grad_norm: 0.999999124998556, iteration: 44820
loss: 1.039459466934204,grad_norm: 0.9999993101190624, iteration: 44821
loss: 1.0264301300048828,grad_norm: 0.9999991208111999, iteration: 44822
loss: 0.9936034083366394,grad_norm: 0.9202446692281918, iteration: 44823
loss: 1.042001724243164,grad_norm: 0.9999992306259309, iteration: 44824
loss: 1.02215576171875,grad_norm: 0.9256943663485859, iteration: 44825
loss: 0.9943144917488098,grad_norm: 0.9999992398400148, iteration: 44826
loss: 1.0365928411483765,grad_norm: 0.9999990641515885, iteration: 44827
loss: 0.9514753222465515,grad_norm: 0.9999992242798252, iteration: 44828
loss: 1.0110228061676025,grad_norm: 0.9975936128302291, iteration: 44829
loss: 0.9977141618728638,grad_norm: 0.8476815522337229, iteration: 44830
loss: 1.022278904914856,grad_norm: 0.9999990715040208, iteration: 44831
loss: 1.0076847076416016,grad_norm: 0.8312013425962345, iteration: 44832
loss: 1.0158343315124512,grad_norm: 0.9709464303884009, iteration: 44833
loss: 0.9823500514030457,grad_norm: 0.9999990948584134, iteration: 44834
loss: 1.0085817575454712,grad_norm: 0.9200518411245834, iteration: 44835
loss: 0.9977758526802063,grad_norm: 0.9999993036291278, iteration: 44836
loss: 1.0164597034454346,grad_norm: 0.9999991073827222, iteration: 44837
loss: 1.0033574104309082,grad_norm: 0.9999990515868783, iteration: 44838
loss: 1.0163894891738892,grad_norm: 0.9999991040024782, iteration: 44839
loss: 1.0234049558639526,grad_norm: 0.9999990573714951, iteration: 44840
loss: 0.9988287091255188,grad_norm: 0.999999100078334, iteration: 44841
loss: 1.0249437093734741,grad_norm: 0.8567856418839935, iteration: 44842
loss: 1.0242128372192383,grad_norm: 0.8985706774374168, iteration: 44843
loss: 1.011518955230713,grad_norm: 0.9999991148598776, iteration: 44844
loss: 1.0243788957595825,grad_norm: 0.9999991549694521, iteration: 44845
loss: 0.9917054772377014,grad_norm: 0.9871274713203833, iteration: 44846
loss: 1.0048320293426514,grad_norm: 0.9590729647885033, iteration: 44847
loss: 0.9766665697097778,grad_norm: 0.9999990461065744, iteration: 44848
loss: 1.0015368461608887,grad_norm: 0.95352203144836, iteration: 44849
loss: 1.0028855800628662,grad_norm: 0.9404959768288956, iteration: 44850
loss: 1.0469956398010254,grad_norm: 0.9999994024360938, iteration: 44851
loss: 0.9768466949462891,grad_norm: 0.960929777196726, iteration: 44852
loss: 1.085493803024292,grad_norm: 0.999999924498217, iteration: 44853
loss: 1.0303984880447388,grad_norm: 0.9999989767573346, iteration: 44854
loss: 0.9818010330200195,grad_norm: 0.9809273640485812, iteration: 44855
loss: 1.016648292541504,grad_norm: 0.9067688491964467, iteration: 44856
loss: 1.0987558364868164,grad_norm: 0.9999996549665049, iteration: 44857
loss: 0.9988354444503784,grad_norm: 0.8189293381040803, iteration: 44858
loss: 1.0119763612747192,grad_norm: 0.8876968289408892, iteration: 44859
loss: 1.1116423606872559,grad_norm: 0.9999995787501226, iteration: 44860
loss: 1.0396205186843872,grad_norm: 0.9999996772807201, iteration: 44861
loss: 1.040076494216919,grad_norm: 1.0000000021852713, iteration: 44862
loss: 0.9770231246948242,grad_norm: 0.9611609498108749, iteration: 44863
loss: 0.9998857975006104,grad_norm: 0.9116931109925231, iteration: 44864
loss: 1.0022127628326416,grad_norm: 0.978656332095405, iteration: 44865
loss: 1.0442203283309937,grad_norm: 0.9999993823267398, iteration: 44866
loss: 1.0454237461090088,grad_norm: 0.933113262869035, iteration: 44867
loss: 1.0130008459091187,grad_norm: 0.999999128516168, iteration: 44868
loss: 1.0062285661697388,grad_norm: 0.9999989925423808, iteration: 44869
loss: 0.9830779433250427,grad_norm: 0.9999991347155185, iteration: 44870
loss: 1.0283223390579224,grad_norm: 0.9112775521400415, iteration: 44871
loss: 1.0066481828689575,grad_norm: 0.7880200107471103, iteration: 44872
loss: 1.0298177003860474,grad_norm: 0.9999991295528451, iteration: 44873
loss: 1.0028624534606934,grad_norm: 0.9691792045456588, iteration: 44874
loss: 1.0075546503067017,grad_norm: 0.8163429481766703, iteration: 44875
loss: 0.9952222108840942,grad_norm: 0.9469887337974778, iteration: 44876
loss: 1.0670087337493896,grad_norm: 0.9999994693761408, iteration: 44877
loss: 1.1354343891143799,grad_norm: 0.999999425452773, iteration: 44878
loss: 0.9908177256584167,grad_norm: 0.8913179980381384, iteration: 44879
loss: 1.0159326791763306,grad_norm: 0.9999991234201854, iteration: 44880
loss: 1.00943124294281,grad_norm: 0.9999994363888443, iteration: 44881
loss: 0.9652475714683533,grad_norm: 0.9229866327038817, iteration: 44882
loss: 1.0016520023345947,grad_norm: 0.9390749481547337, iteration: 44883
loss: 1.0152292251586914,grad_norm: 0.999999087761465, iteration: 44884
loss: 1.0485554933547974,grad_norm: 0.9999993647172079, iteration: 44885
loss: 1.0007199048995972,grad_norm: 0.975314069562256, iteration: 44886
loss: 0.9934377670288086,grad_norm: 0.786870214751285, iteration: 44887
loss: 0.9735432863235474,grad_norm: 0.9999991106832544, iteration: 44888
loss: 1.0092638731002808,grad_norm: 0.9999993267220311, iteration: 44889
loss: 0.9905068874359131,grad_norm: 0.8980257299797679, iteration: 44890
loss: 0.970401406288147,grad_norm: 0.9999997558820383, iteration: 44891
loss: 1.0324667692184448,grad_norm: 0.999999830534389, iteration: 44892
loss: 1.0174485445022583,grad_norm: 0.9999991659363877, iteration: 44893
loss: 1.0049171447753906,grad_norm: 0.9999990609674875, iteration: 44894
loss: 1.008815050125122,grad_norm: 0.9999997800084934, iteration: 44895
loss: 1.039711356163025,grad_norm: 0.9999995973988071, iteration: 44896
loss: 0.9591609239578247,grad_norm: 0.9402518597697976, iteration: 44897
loss: 1.0288465023040771,grad_norm: 0.9999992680527435, iteration: 44898
loss: 0.9841607213020325,grad_norm: 0.9999996838811609, iteration: 44899
loss: 1.0093085765838623,grad_norm: 0.8800235620525708, iteration: 44900
loss: 1.0059336423873901,grad_norm: 0.8227865278449455, iteration: 44901
loss: 1.0091291666030884,grad_norm: 0.9000715402953114, iteration: 44902
loss: 0.9551270604133606,grad_norm: 0.9999991701986699, iteration: 44903
loss: 1.0447980165481567,grad_norm: 0.9999992517399044, iteration: 44904
loss: 0.9830825328826904,grad_norm: 0.9999999199925721, iteration: 44905
loss: 1.0144294500350952,grad_norm: 0.9999991115854022, iteration: 44906
loss: 0.9751484990119934,grad_norm: 0.8417993829786562, iteration: 44907
loss: 1.0147476196289062,grad_norm: 0.9233653235284986, iteration: 44908
loss: 1.005898356437683,grad_norm: 0.9999994949396284, iteration: 44909
loss: 0.9682408571243286,grad_norm: 0.9999991692000321, iteration: 44910
loss: 1.0039176940917969,grad_norm: 0.8904040681947267, iteration: 44911
loss: 1.0241994857788086,grad_norm: 0.9999995541446386, iteration: 44912
loss: 1.0073740482330322,grad_norm: 0.9999992014036011, iteration: 44913
loss: 1.0176488161087036,grad_norm: 0.9999991980229898, iteration: 44914
loss: 0.9953910112380981,grad_norm: 0.9454344640148056, iteration: 44915
loss: 1.0149121284484863,grad_norm: 0.9999989649401788, iteration: 44916
loss: 0.9857694506645203,grad_norm: 0.999999200338356, iteration: 44917
loss: 1.0241552591323853,grad_norm: 0.9971110833370501, iteration: 44918
loss: 1.0358129739761353,grad_norm: 0.9999991279629031, iteration: 44919
loss: 1.013839602470398,grad_norm: 0.9999994726254277, iteration: 44920
loss: 0.9682559967041016,grad_norm: 0.9999992111183124, iteration: 44921
loss: 1.0236155986785889,grad_norm: 0.8655306666774977, iteration: 44922
loss: 0.9443869590759277,grad_norm: 0.9999990587558307, iteration: 44923
loss: 0.9772593975067139,grad_norm: 0.999999198703336, iteration: 44924
loss: 0.9884974360466003,grad_norm: 0.8364233601951019, iteration: 44925
loss: 1.011918067932129,grad_norm: 0.9999994678412494, iteration: 44926
loss: 1.0426373481750488,grad_norm: 0.9999990425538752, iteration: 44927
loss: 1.0114396810531616,grad_norm: 0.999999142238681, iteration: 44928
loss: 1.038746953010559,grad_norm: 0.999999021437792, iteration: 44929
loss: 1.0051203966140747,grad_norm: 0.9999998877302381, iteration: 44930
loss: 1.0068339109420776,grad_norm: 0.8768645240124366, iteration: 44931
loss: 1.106910228729248,grad_norm: 0.9999997526837353, iteration: 44932
loss: 0.9625142216682434,grad_norm: 0.9999991804435805, iteration: 44933
loss: 1.0061826705932617,grad_norm: 0.8905617950522383, iteration: 44934
loss: 1.0574628114700317,grad_norm: 0.9999991942130125, iteration: 44935
loss: 0.9870041012763977,grad_norm: 0.9999997718662809, iteration: 44936
loss: 1.035993218421936,grad_norm: 0.9999998368650898, iteration: 44937
loss: 1.0104377269744873,grad_norm: 0.9376795412796871, iteration: 44938
loss: 1.0121707916259766,grad_norm: 0.982401767790692, iteration: 44939
loss: 1.0174438953399658,grad_norm: 0.9999996282502184, iteration: 44940
loss: 0.9995740652084351,grad_norm: 0.9999992247655527, iteration: 44941
loss: 1.0403566360473633,grad_norm: 0.9999994154288157, iteration: 44942
loss: 1.0031068325042725,grad_norm: 0.8508868469292119, iteration: 44943
loss: 1.0179675817489624,grad_norm: 0.8379875238330583, iteration: 44944
loss: 1.0109306573867798,grad_norm: 0.9585255495261092, iteration: 44945
loss: 1.0224875211715698,grad_norm: 0.9999993821740844, iteration: 44946
loss: 1.0237449407577515,grad_norm: 0.9285488673197227, iteration: 44947
loss: 0.9840202331542969,grad_norm: 0.9999992246322834, iteration: 44948
loss: 0.9846269488334656,grad_norm: 0.9989025620281295, iteration: 44949
loss: 0.9504978060722351,grad_norm: 0.9087087784415718, iteration: 44950
loss: 1.0073152780532837,grad_norm: 0.9482263621144646, iteration: 44951
loss: 1.0084351301193237,grad_norm: 0.9360726411764286, iteration: 44952
loss: 1.0083253383636475,grad_norm: 0.9999990764309278, iteration: 44953
loss: 1.001503348350525,grad_norm: 0.8399595029730285, iteration: 44954
loss: 1.0053799152374268,grad_norm: 0.9999992929480125, iteration: 44955
loss: 0.9998617172241211,grad_norm: 0.9999993437785895, iteration: 44956
loss: 0.9948755502700806,grad_norm: 0.9420794152826313, iteration: 44957
loss: 1.0042246580123901,grad_norm: 0.9999992658456418, iteration: 44958
loss: 1.0162287950515747,grad_norm: 0.99999947444594, iteration: 44959
loss: 1.0332878828048706,grad_norm: 0.9999989688579087, iteration: 44960
loss: 1.041398525238037,grad_norm: 0.9999990195287733, iteration: 44961
loss: 0.9896210432052612,grad_norm: 0.9999989042529056, iteration: 44962
loss: 1.0952394008636475,grad_norm: 0.9999991594506121, iteration: 44963
loss: 0.9751970767974854,grad_norm: 0.9999990836294868, iteration: 44964
loss: 1.0226716995239258,grad_norm: 0.8693349102601474, iteration: 44965
loss: 0.9944316148757935,grad_norm: 0.9999991273706927, iteration: 44966
loss: 1.145919680595398,grad_norm: 0.9999997566898039, iteration: 44967
loss: 1.074474811553955,grad_norm: 0.9999998250323527, iteration: 44968
loss: 1.0285329818725586,grad_norm: 0.9610352013034521, iteration: 44969
loss: 0.9875460863113403,grad_norm: 0.9999995359431912, iteration: 44970
loss: 1.0480332374572754,grad_norm: 0.9999992943104544, iteration: 44971
loss: 1.029226541519165,grad_norm: 0.8582060224303079, iteration: 44972
loss: 1.0069971084594727,grad_norm: 0.8994564525330315, iteration: 44973
loss: 1.0115028619766235,grad_norm: 0.9999991596584094, iteration: 44974
loss: 1.0379492044448853,grad_norm: 0.9999990106609062, iteration: 44975
loss: 1.0336558818817139,grad_norm: 0.999999404583502, iteration: 44976
loss: 1.0274548530578613,grad_norm: 0.9999996243320282, iteration: 44977
loss: 1.0514845848083496,grad_norm: 0.9999994170242337, iteration: 44978
loss: 1.0134458541870117,grad_norm: 0.9999992144867581, iteration: 44979
loss: 0.9973594546318054,grad_norm: 0.999999147533695, iteration: 44980
loss: 0.9779489040374756,grad_norm: 0.9999991282567233, iteration: 44981
loss: 1.0610918998718262,grad_norm: 0.9999998552610532, iteration: 44982
loss: 1.0190517902374268,grad_norm: 0.9999995480344392, iteration: 44983
loss: 1.0219193696975708,grad_norm: 0.9999995168912563, iteration: 44984
loss: 1.0324434041976929,grad_norm: 0.9999993857512557, iteration: 44985
loss: 0.9842842221260071,grad_norm: 0.9999992064537482, iteration: 44986
loss: 0.9720433950424194,grad_norm: 0.9999990829944675, iteration: 44987
loss: 1.0056072473526,grad_norm: 0.9999990514452068, iteration: 44988
loss: 1.0184792280197144,grad_norm: 0.9999990469503689, iteration: 44989
loss: 1.0575940608978271,grad_norm: 0.9999991892338069, iteration: 44990
loss: 1.0240676403045654,grad_norm: 0.9999991416854507, iteration: 44991
loss: 1.0614724159240723,grad_norm: 0.999999739176648, iteration: 44992
loss: 1.0141998529434204,grad_norm: 0.9334574202017697, iteration: 44993
loss: 0.991420567035675,grad_norm: 0.9476510426258267, iteration: 44994
loss: 0.9957921504974365,grad_norm: 0.7890027515952168, iteration: 44995
loss: 1.0049515962600708,grad_norm: 0.9999992438555816, iteration: 44996
loss: 0.9981588125228882,grad_norm: 0.8691757023580343, iteration: 44997
loss: 0.9805006384849548,grad_norm: 0.9999990342853644, iteration: 44998
loss: 0.9968359470367432,grad_norm: 0.9999989674413211, iteration: 44999
loss: 1.0118027925491333,grad_norm: 0.9999991843790242, iteration: 45000
loss: 1.0207297801971436,grad_norm: 0.9999992033745606, iteration: 45001
loss: 1.0717757940292358,grad_norm: 0.9999990672563969, iteration: 45002
loss: 1.0212703943252563,grad_norm: 0.9999990445247501, iteration: 45003
loss: 1.005244493484497,grad_norm: 0.8590699548794559, iteration: 45004
loss: 1.0085057020187378,grad_norm: 0.9999992492071813, iteration: 45005
loss: 1.0579581260681152,grad_norm: 0.999999024564403, iteration: 45006
loss: 1.0415853261947632,grad_norm: 0.9999994031229533, iteration: 45007
loss: 1.0289924144744873,grad_norm: 0.999998983650271, iteration: 45008
loss: 1.0405396223068237,grad_norm: 0.9999999113189274, iteration: 45009
loss: 1.0046881437301636,grad_norm: 0.9999993952176777, iteration: 45010
loss: 1.0126314163208008,grad_norm: 0.9999998819859027, iteration: 45011
loss: 0.9846641421318054,grad_norm: 0.8747979939152071, iteration: 45012
loss: 0.9829003214836121,grad_norm: 0.9999991265330352, iteration: 45013
loss: 0.9693487882614136,grad_norm: 0.9626662123676021, iteration: 45014
loss: 1.008759617805481,grad_norm: 0.7983075136664798, iteration: 45015
loss: 0.9972663521766663,grad_norm: 0.9202314836338431, iteration: 45016
loss: 1.0091779232025146,grad_norm: 0.999999026564314, iteration: 45017
loss: 0.9782682061195374,grad_norm: 0.8918727782623935, iteration: 45018
loss: 1.0109342336654663,grad_norm: 0.9375020292388895, iteration: 45019
loss: 1.0065616369247437,grad_norm: 0.9255585862385904, iteration: 45020
loss: 0.9981655478477478,grad_norm: 0.9999990749762917, iteration: 45021
loss: 0.9672780632972717,grad_norm: 0.8277120964865756, iteration: 45022
loss: 0.9912287592887878,grad_norm: 0.9999991010789998, iteration: 45023
loss: 1.0794116258621216,grad_norm: 0.9999999240067469, iteration: 45024
loss: 0.9997438192367554,grad_norm: 0.9999994686657288, iteration: 45025
loss: 1.044288158416748,grad_norm: 0.9999996915321412, iteration: 45026
loss: 1.0319503545761108,grad_norm: 0.9447964452018902, iteration: 45027
loss: 1.0453152656555176,grad_norm: 0.9999993873216936, iteration: 45028
loss: 1.0288240909576416,grad_norm: 0.9987253435481521, iteration: 45029
loss: 1.011910080909729,grad_norm: 0.9999991573927801, iteration: 45030
loss: 1.0003536939620972,grad_norm: 0.9999991873466743, iteration: 45031
loss: 0.9744182825088501,grad_norm: 0.9999992426377413, iteration: 45032
loss: 0.9866780042648315,grad_norm: 0.9999990718032935, iteration: 45033
loss: 1.0534464120864868,grad_norm: 0.9999989742763647, iteration: 45034
loss: 0.9473273158073425,grad_norm: 0.9579905505007619, iteration: 45035
loss: 1.0402710437774658,grad_norm: 0.9999991606800258, iteration: 45036
loss: 1.0470844507217407,grad_norm: 0.9999997106934924, iteration: 45037
loss: 1.1034150123596191,grad_norm: 0.9999993352575176, iteration: 45038
loss: 1.0117275714874268,grad_norm: 0.9999993485155143, iteration: 45039
loss: 1.0608186721801758,grad_norm: 0.9999998116741854, iteration: 45040
loss: 1.0517481565475464,grad_norm: 0.9999992714222622, iteration: 45041
loss: 1.041977047920227,grad_norm: 0.9999997142375332, iteration: 45042
loss: 1.010719656944275,grad_norm: 0.9999991557043667, iteration: 45043
loss: 0.9641526341438293,grad_norm: 0.9318766069476212, iteration: 45044
loss: 1.0910981893539429,grad_norm: 0.9999995854954374, iteration: 45045
loss: 1.02238929271698,grad_norm: 0.9999990699649114, iteration: 45046
loss: 0.9823330044746399,grad_norm: 0.8770321408144472, iteration: 45047
loss: 0.9611635804176331,grad_norm: 0.9615874561588855, iteration: 45048
loss: 0.974397599697113,grad_norm: 0.9108062677515564, iteration: 45049
loss: 1.0162172317504883,grad_norm: 0.9747357774523134, iteration: 45050
loss: 1.0779212713241577,grad_norm: 0.9999992652755143, iteration: 45051
loss: 1.0251930952072144,grad_norm: 0.9999994722403083, iteration: 45052
loss: 1.0044541358947754,grad_norm: 0.999999264519351, iteration: 45053
loss: 1.0337109565734863,grad_norm: 0.8870232617354543, iteration: 45054
loss: 0.983536422252655,grad_norm: 0.9999995262019837, iteration: 45055
loss: 1.0127389430999756,grad_norm: 0.9910387483647668, iteration: 45056
loss: 1.0333340167999268,grad_norm: 0.9999992095805893, iteration: 45057
loss: 1.0116207599639893,grad_norm: 0.8917329140347576, iteration: 45058
loss: 0.9895860552787781,grad_norm: 0.9999991165478603, iteration: 45059
loss: 1.0108064413070679,grad_norm: 0.9999991011677258, iteration: 45060
loss: 0.9970262050628662,grad_norm: 0.9999995357007756, iteration: 45061
loss: 1.0207972526550293,grad_norm: 0.9999990843981325, iteration: 45062
loss: 1.0012359619140625,grad_norm: 0.9999991591892827, iteration: 45063
loss: 0.9848512411117554,grad_norm: 0.9999993330127988, iteration: 45064
loss: 1.0161089897155762,grad_norm: 0.9999990560252522, iteration: 45065
loss: 1.0048631429672241,grad_norm: 0.999999253900357, iteration: 45066
loss: 1.0040454864501953,grad_norm: 0.9803296052531044, iteration: 45067
loss: 1.0177029371261597,grad_norm: 0.8720456042833235, iteration: 45068
loss: 1.002024531364441,grad_norm: 0.9999988982677389, iteration: 45069
loss: 0.9953941702842712,grad_norm: 0.9999992895130001, iteration: 45070
loss: 0.9944776892662048,grad_norm: 0.9999994256397107, iteration: 45071
loss: 1.0079442262649536,grad_norm: 0.9999991455638733, iteration: 45072
loss: 1.00883150100708,grad_norm: 0.9999989941346696, iteration: 45073
loss: 1.0258623361587524,grad_norm: 0.9999990387573648, iteration: 45074
loss: 1.0162955522537231,grad_norm: 0.9999993786384253, iteration: 45075
loss: 1.0228346586227417,grad_norm: 0.9999990526763317, iteration: 45076
loss: 1.0481441020965576,grad_norm: 0.9999995972536054, iteration: 45077
loss: 1.0041537284851074,grad_norm: 0.8681713605856246, iteration: 45078
loss: 0.9629279375076294,grad_norm: 0.9999992362938193, iteration: 45079
loss: 0.9965866804122925,grad_norm: 0.9980217071997458, iteration: 45080
loss: 1.0015082359313965,grad_norm: 0.7693433345527609, iteration: 45081
loss: 1.0095466375350952,grad_norm: 0.9999990300592612, iteration: 45082
loss: 1.0703009366989136,grad_norm: 0.9999996661114593, iteration: 45083
loss: 1.0120571851730347,grad_norm: 0.8920776210284665, iteration: 45084
loss: 0.9993108510971069,grad_norm: 0.9537779226522869, iteration: 45085
loss: 0.9557029604911804,grad_norm: 0.9999989870616655, iteration: 45086
loss: 1.0118024349212646,grad_norm: 0.9999989812235336, iteration: 45087
loss: 1.0249146223068237,grad_norm: 0.9639388080366619, iteration: 45088
loss: 1.0193172693252563,grad_norm: 0.9999990673112943, iteration: 45089
loss: 1.034333348274231,grad_norm: 0.9999992677205267, iteration: 45090
loss: 1.0019344091415405,grad_norm: 0.9966474636527214, iteration: 45091
loss: 1.0527968406677246,grad_norm: 0.9999991777249322, iteration: 45092
loss: 1.0056190490722656,grad_norm: 0.8187778432028876, iteration: 45093
loss: 1.0013301372528076,grad_norm: 0.9925180228633103, iteration: 45094
loss: 1.0339750051498413,grad_norm: 0.9999996957992522, iteration: 45095
loss: 0.9743094444274902,grad_norm: 0.999999115230735, iteration: 45096
loss: 1.0000125169754028,grad_norm: 0.9452577765148581, iteration: 45097
loss: 1.009136438369751,grad_norm: 0.9999990650338771, iteration: 45098
loss: 0.9970384240150452,grad_norm: 0.9999992404346627, iteration: 45099
loss: 0.9938927292823792,grad_norm: 0.9912001702993811, iteration: 45100
loss: 0.9997847080230713,grad_norm: 0.9999990082270777, iteration: 45101
loss: 0.9957770705223083,grad_norm: 0.9999991142724678, iteration: 45102
loss: 0.9948368668556213,grad_norm: 0.9999999478391146, iteration: 45103
loss: 1.0109233856201172,grad_norm: 0.9999991975119722, iteration: 45104
loss: 1.0132157802581787,grad_norm: 0.9999990580862237, iteration: 45105
loss: 0.9914781451225281,grad_norm: 0.9999994564617359, iteration: 45106
loss: 0.976006805896759,grad_norm: 0.9999991197349716, iteration: 45107
loss: 0.9776360392570496,grad_norm: 0.8632591877489854, iteration: 45108
loss: 0.998871922492981,grad_norm: 0.9166764762019967, iteration: 45109
loss: 0.9934225082397461,grad_norm: 0.9329501539732614, iteration: 45110
loss: 0.9953805804252625,grad_norm: 0.9999991745713539, iteration: 45111
loss: 1.0350587368011475,grad_norm: 0.9999989143802014, iteration: 45112
loss: 1.0570483207702637,grad_norm: 0.9999993259813765, iteration: 45113
loss: 0.9968301057815552,grad_norm: 0.9999989864168601, iteration: 45114
loss: 0.9982860684394836,grad_norm: 0.8308179457704571, iteration: 45115
loss: 0.9956231117248535,grad_norm: 0.9999998292377876, iteration: 45116
loss: 0.9865334630012512,grad_norm: 0.9999991299207036, iteration: 45117
loss: 1.0124198198318481,grad_norm: 0.9999991147652336, iteration: 45118
loss: 1.00203275680542,grad_norm: 0.9999991365786296, iteration: 45119
loss: 0.9814679026603699,grad_norm: 0.9999990296497128, iteration: 45120
loss: 0.9813272356987,grad_norm: 0.8545069069522061, iteration: 45121
loss: 1.017333984375,grad_norm: 0.9999990189235478, iteration: 45122
loss: 1.0193506479263306,grad_norm: 0.9999993167840946, iteration: 45123
loss: 0.9889243245124817,grad_norm: 0.9999992620630441, iteration: 45124
loss: 1.049505352973938,grad_norm: 0.9999992744669001, iteration: 45125
loss: 1.0435017347335815,grad_norm: 0.9999993453454554, iteration: 45126
loss: 1.0225677490234375,grad_norm: 0.9999993067853176, iteration: 45127
loss: 1.0030406713485718,grad_norm: 0.9098777455971141, iteration: 45128
loss: 1.0195744037628174,grad_norm: 0.9831684301048466, iteration: 45129
loss: 1.032455563545227,grad_norm: 0.9999989745222367, iteration: 45130
loss: 0.994647204875946,grad_norm: 0.9999991022339291, iteration: 45131
loss: 0.9958494901657104,grad_norm: 0.9999990622090388, iteration: 45132
loss: 1.0263409614562988,grad_norm: 0.8872976167582232, iteration: 45133
loss: 0.9820793271064758,grad_norm: 0.87943755567059, iteration: 45134
loss: 1.0277255773544312,grad_norm: 0.9999997677498079, iteration: 45135
loss: 1.0337828397750854,grad_norm: 0.9999995237866393, iteration: 45136
loss: 0.9464221000671387,grad_norm: 0.9336723134691951, iteration: 45137
loss: 1.0789917707443237,grad_norm: 0.9999992696738206, iteration: 45138
loss: 1.0093369483947754,grad_norm: 0.999999016908897, iteration: 45139
loss: 1.0441298484802246,grad_norm: 0.9525743368170385, iteration: 45140
loss: 1.0101478099822998,grad_norm: 0.9972210174380896, iteration: 45141
loss: 0.9530694484710693,grad_norm: 0.9999992661110539, iteration: 45142
loss: 1.0172815322875977,grad_norm: 0.8913091180771999, iteration: 45143
loss: 1.0241684913635254,grad_norm: 0.9621224591315715, iteration: 45144
loss: 0.9943214058876038,grad_norm: 0.9999992526235452, iteration: 45145
loss: 0.9784682989120483,grad_norm: 0.9999989822915467, iteration: 45146
loss: 0.9941846132278442,grad_norm: 0.9999990703805383, iteration: 45147
loss: 0.9626260995864868,grad_norm: 0.9717684956748069, iteration: 45148
loss: 1.0206159353256226,grad_norm: 0.9999991280410445, iteration: 45149
loss: 0.9884962439537048,grad_norm: 0.9274293166604223, iteration: 45150
loss: 0.9952641725540161,grad_norm: 0.9999991965866009, iteration: 45151
loss: 1.002323865890503,grad_norm: 0.999999071229486, iteration: 45152
loss: 0.9937506318092346,grad_norm: 0.9999991246022376, iteration: 45153
loss: 0.9878111481666565,grad_norm: 0.9578812068867936, iteration: 45154
loss: 1.0155640840530396,grad_norm: 0.9999991890535244, iteration: 45155
loss: 1.0543028116226196,grad_norm: 0.9112371079314451, iteration: 45156
loss: 0.984103798866272,grad_norm: 0.9146824732366421, iteration: 45157
loss: 1.0288900136947632,grad_norm: 0.9566307490585988, iteration: 45158
loss: 0.999222457408905,grad_norm: 0.9999992980669632, iteration: 45159
loss: 1.0219013690948486,grad_norm: 0.9999995156832491, iteration: 45160
loss: 1.0190010070800781,grad_norm: 0.9999989342789972, iteration: 45161
loss: 0.9795722365379333,grad_norm: 0.9999990960320352, iteration: 45162
loss: 1.0129519701004028,grad_norm: 0.9999993174558093, iteration: 45163
loss: 0.9907788634300232,grad_norm: 0.772311882713317, iteration: 45164
loss: 0.9916437864303589,grad_norm: 0.9778424640284934, iteration: 45165
loss: 0.986351490020752,grad_norm: 0.9999990342100235, iteration: 45166
loss: 1.0057494640350342,grad_norm: 0.999999133104962, iteration: 45167
loss: 0.989695131778717,grad_norm: 0.999999084889651, iteration: 45168
loss: 0.9735221862792969,grad_norm: 0.9679868148131273, iteration: 45169
loss: 0.988745391368866,grad_norm: 0.9655921820562534, iteration: 45170
loss: 1.0114092826843262,grad_norm: 0.8684939047920962, iteration: 45171
loss: 1.0007997751235962,grad_norm: 0.9999992606388036, iteration: 45172
loss: 1.0070626735687256,grad_norm: 0.8807475569804858, iteration: 45173
loss: 0.9864311814308167,grad_norm: 0.868790036726287, iteration: 45174
loss: 1.0332728624343872,grad_norm: 0.9930207643786352, iteration: 45175
loss: 1.005505919456482,grad_norm: 0.9321119942137363, iteration: 45176
loss: 0.9968769550323486,grad_norm: 0.9498277705691792, iteration: 45177
loss: 1.0337605476379395,grad_norm: 0.999999642330407, iteration: 45178
loss: 0.9990876317024231,grad_norm: 0.8500160939885433, iteration: 45179
loss: 0.9820440411567688,grad_norm: 0.9215468943118827, iteration: 45180
loss: 0.986598014831543,grad_norm: 0.7761400304989783, iteration: 45181
loss: 0.9944522380828857,grad_norm: 0.9999991357014358, iteration: 45182
loss: 0.9989490509033203,grad_norm: 0.8202378182306669, iteration: 45183
loss: 1.0292320251464844,grad_norm: 0.9999992489422328, iteration: 45184
loss: 1.0076730251312256,grad_norm: 0.9999994990424047, iteration: 45185
loss: 1.0149707794189453,grad_norm: 0.9733063532311084, iteration: 45186
loss: 0.9615683555603027,grad_norm: 0.9875854255494696, iteration: 45187
loss: 0.978871762752533,grad_norm: 0.9999991024132389, iteration: 45188
loss: 0.9991442561149597,grad_norm: 0.8360392240894012, iteration: 45189
loss: 0.9747166633605957,grad_norm: 0.9098208955384649, iteration: 45190
loss: 0.9902913570404053,grad_norm: 0.9999991065517053, iteration: 45191
loss: 0.9737135171890259,grad_norm: 0.8586986873836749, iteration: 45192
loss: 0.9863417148590088,grad_norm: 0.9999997761621301, iteration: 45193
loss: 1.001326084136963,grad_norm: 0.9999990789500378, iteration: 45194
loss: 0.9806757569313049,grad_norm: 0.9878202355507677, iteration: 45195
loss: 0.9960928559303284,grad_norm: 0.9911114590876691, iteration: 45196
loss: 1.0073820352554321,grad_norm: 0.9999990869178674, iteration: 45197
loss: 1.010276198387146,grad_norm: 0.9999995120781491, iteration: 45198
loss: 1.0280951261520386,grad_norm: 0.8901083224099966, iteration: 45199
loss: 1.0343674421310425,grad_norm: 0.992470220506361, iteration: 45200
loss: 0.9734616279602051,grad_norm: 0.9999989939074116, iteration: 45201
loss: 0.9938555955886841,grad_norm: 0.999999077974834, iteration: 45202
loss: 1.030730962753296,grad_norm: 0.9999990476293017, iteration: 45203
loss: 1.023586392402649,grad_norm: 0.9999991202885273, iteration: 45204
loss: 0.9968876838684082,grad_norm: 0.9887280564073184, iteration: 45205
loss: 1.0088205337524414,grad_norm: 0.999999124914045, iteration: 45206
loss: 0.990215539932251,grad_norm: 0.9999991630594068, iteration: 45207
loss: 0.9672874808311462,grad_norm: 0.954909792948475, iteration: 45208
loss: 1.0219084024429321,grad_norm: 0.9547016324411627, iteration: 45209
loss: 1.0096620321273804,grad_norm: 0.8869613147603068, iteration: 45210
loss: 1.0062040090560913,grad_norm: 0.873015207347401, iteration: 45211
loss: 1.009881615638733,grad_norm: 0.9999990180531462, iteration: 45212
loss: 0.9794741868972778,grad_norm: 0.8820844167641473, iteration: 45213
loss: 1.0375583171844482,grad_norm: 0.9999996871093891, iteration: 45214
loss: 0.9562708735466003,grad_norm: 0.9160459224078777, iteration: 45215
loss: 0.9632389545440674,grad_norm: 0.9127102637570584, iteration: 45216
loss: 0.9828731417655945,grad_norm: 0.9999990335237311, iteration: 45217
loss: 1.012657880783081,grad_norm: 0.9999990760438703, iteration: 45218
loss: 1.0125083923339844,grad_norm: 0.9718372968685971, iteration: 45219
loss: 1.0301601886749268,grad_norm: 0.9999991379462968, iteration: 45220
loss: 1.0068408250808716,grad_norm: 0.9965157763925906, iteration: 45221
loss: 0.9991859793663025,grad_norm: 0.8709874143268628, iteration: 45222
loss: 0.9975157380104065,grad_norm: 0.9092598974659871, iteration: 45223
loss: 0.9726137518882751,grad_norm: 0.999999006803953, iteration: 45224
loss: 0.9980979561805725,grad_norm: 0.9999992619060526, iteration: 45225
loss: 0.9854953289031982,grad_norm: 0.999999050164495, iteration: 45226
loss: 0.9997656941413879,grad_norm: 0.999999279155102, iteration: 45227
loss: 1.0000948905944824,grad_norm: 0.8793552684939581, iteration: 45228
loss: 1.0118577480316162,grad_norm: 0.9002205971454029, iteration: 45229
loss: 0.9792007207870483,grad_norm: 0.894893191773767, iteration: 45230
loss: 1.0087261199951172,grad_norm: 0.9153575944497289, iteration: 45231
loss: 0.9969232678413391,grad_norm: 0.872495286531406, iteration: 45232
loss: 0.9906764030456543,grad_norm: 0.8925278718777476, iteration: 45233
loss: 1.010607123374939,grad_norm: 0.9999991433531513, iteration: 45234
loss: 0.9795003533363342,grad_norm: 0.9017349229368713, iteration: 45235
loss: 0.9879317879676819,grad_norm: 0.9764553201918813, iteration: 45236
loss: 1.0180374383926392,grad_norm: 0.9299320642849848, iteration: 45237
loss: 0.9725494384765625,grad_norm: 0.8727559554566217, iteration: 45238
loss: 1.012423038482666,grad_norm: 0.9999992589933039, iteration: 45239
loss: 1.00905179977417,grad_norm: 0.8216617999277165, iteration: 45240
loss: 1.011082649230957,grad_norm: 0.999999104929417, iteration: 45241
loss: 0.9782230257987976,grad_norm: 0.9999990996698438, iteration: 45242
loss: 1.0009080171585083,grad_norm: 0.9999994850641376, iteration: 45243
loss: 0.997313916683197,grad_norm: 0.9999991448028099, iteration: 45244
loss: 1.0002529621124268,grad_norm: 0.9682051027766811, iteration: 45245
loss: 1.0653835535049438,grad_norm: 0.9999996782991751, iteration: 45246
loss: 1.0144627094268799,grad_norm: 0.7545040353305448, iteration: 45247
loss: 0.9972155094146729,grad_norm: 0.8370070983601385, iteration: 45248
loss: 0.9939318299293518,grad_norm: 0.9655912295402026, iteration: 45249
loss: 1.0287569761276245,grad_norm: 0.9999991383551989, iteration: 45250
loss: 0.9882166385650635,grad_norm: 0.9999992982629894, iteration: 45251
loss: 1.0346041917800903,grad_norm: 0.9999989964845776, iteration: 45252
loss: 1.026283860206604,grad_norm: 0.9999997967295273, iteration: 45253
loss: 1.0320231914520264,grad_norm: 0.9999993462107964, iteration: 45254
loss: 0.9738562107086182,grad_norm: 0.9999993496291038, iteration: 45255
loss: 0.9939904808998108,grad_norm: 0.851530125908464, iteration: 45256
loss: 1.0107723474502563,grad_norm: 0.985294980192527, iteration: 45257
loss: 1.0198378562927246,grad_norm: 0.8602826175377833, iteration: 45258
loss: 1.0385109186172485,grad_norm: 0.8937118378648405, iteration: 45259
loss: 0.9876300096511841,grad_norm: 0.9999991006313177, iteration: 45260
loss: 1.0221673250198364,grad_norm: 0.9999991565553121, iteration: 45261
loss: 1.0114909410476685,grad_norm: 0.9049496377386437, iteration: 45262
loss: 1.0305423736572266,grad_norm: 0.8881330468022058, iteration: 45263
loss: 0.9850650429725647,grad_norm: 0.9999990041590632, iteration: 45264
loss: 0.994942307472229,grad_norm: 0.9133453440319759, iteration: 45265
loss: 1.0074577331542969,grad_norm: 0.8997220886689609, iteration: 45266
loss: 1.0045268535614014,grad_norm: 0.9868491105381517, iteration: 45267
loss: 1.0227066278457642,grad_norm: 0.9999990880130387, iteration: 45268
loss: 1.0199934244155884,grad_norm: 0.9637570683588242, iteration: 45269
loss: 0.986760675907135,grad_norm: 0.9522292650360374, iteration: 45270
loss: 0.9812235832214355,grad_norm: 0.8766685400535585, iteration: 45271
loss: 1.016129493713379,grad_norm: 0.9999989994495774, iteration: 45272
loss: 0.9667244553565979,grad_norm: 0.99999918683111, iteration: 45273
loss: 1.0134413242340088,grad_norm: 0.9582993398796861, iteration: 45274
loss: 1.0393580198287964,grad_norm: 0.9999990450785843, iteration: 45275
loss: 1.0264222621917725,grad_norm: 0.9999992090956172, iteration: 45276
loss: 0.997534990310669,grad_norm: 0.9999990006943807, iteration: 45277
loss: 1.0015379190444946,grad_norm: 0.9999991635519238, iteration: 45278
loss: 0.997809886932373,grad_norm: 0.9999993079218048, iteration: 45279
loss: 1.020646572113037,grad_norm: 0.9999990757615647, iteration: 45280
loss: 1.0196545124053955,grad_norm: 0.9566419358900277, iteration: 45281
loss: 1.013615608215332,grad_norm: 0.9142160761943325, iteration: 45282
loss: 1.0321213006973267,grad_norm: 0.9999990939198097, iteration: 45283
loss: 1.0242605209350586,grad_norm: 0.9999990556968194, iteration: 45284
loss: 1.0188705921173096,grad_norm: 0.9999990264961108, iteration: 45285
loss: 1.0180346965789795,grad_norm: 0.9774422361994445, iteration: 45286
loss: 0.9959798455238342,grad_norm: 0.968778244808088, iteration: 45287
loss: 0.985646665096283,grad_norm: 0.9229528282093756, iteration: 45288
loss: 0.9691116213798523,grad_norm: 0.9132911480991539, iteration: 45289
loss: 1.02894127368927,grad_norm: 0.9914477763867593, iteration: 45290
loss: 1.0166635513305664,grad_norm: 0.904431792470968, iteration: 45291
loss: 0.9595275521278381,grad_norm: 0.9945430782563973, iteration: 45292
loss: 0.9921653270721436,grad_norm: 0.904148226759324, iteration: 45293
loss: 0.9983073472976685,grad_norm: 0.9999992761104609, iteration: 45294
loss: 0.9931929707527161,grad_norm: 0.9999990759328903, iteration: 45295
loss: 1.0224056243896484,grad_norm: 0.9999991376252159, iteration: 45296
loss: 1.0098474025726318,grad_norm: 0.9999996878230808, iteration: 45297
loss: 1.0029722452163696,grad_norm: 0.9999990327196612, iteration: 45298
loss: 1.0505057573318481,grad_norm: 0.9999990913925866, iteration: 45299
loss: 1.007216215133667,grad_norm: 0.9999992448987585, iteration: 45300
loss: 1.0179057121276855,grad_norm: 0.9813072337143461, iteration: 45301
loss: 1.028493046760559,grad_norm: 0.9999996211179917, iteration: 45302
loss: 1.0059750080108643,grad_norm: 0.8997093856886338, iteration: 45303
loss: 0.9831124544143677,grad_norm: 0.9999991477346472, iteration: 45304
loss: 1.0092169046401978,grad_norm: 0.9051578070988372, iteration: 45305
loss: 1.0112247467041016,grad_norm: 0.9999991000034927, iteration: 45306
loss: 1.0016182661056519,grad_norm: 0.9999992328257935, iteration: 45307
loss: 1.0289379358291626,grad_norm: 0.908749677121797, iteration: 45308
loss: 1.0569379329681396,grad_norm: 0.9999997500500992, iteration: 45309
loss: 1.0243176221847534,grad_norm: 0.7954727569621505, iteration: 45310
loss: 1.005058765411377,grad_norm: 0.9999990469286854, iteration: 45311
loss: 0.9824254512786865,grad_norm: 0.9338369401203717, iteration: 45312
loss: 1.035909652709961,grad_norm: 0.9389095839947563, iteration: 45313
loss: 1.0200241804122925,grad_norm: 0.9999991467978931, iteration: 45314
loss: 1.0184980630874634,grad_norm: 0.9999989647478681, iteration: 45315
loss: 1.003066062927246,grad_norm: 0.9673488076147287, iteration: 45316
loss: 0.9880392551422119,grad_norm: 0.9648960953286805, iteration: 45317
loss: 1.0455530881881714,grad_norm: 0.8601629267017317, iteration: 45318
loss: 1.0254838466644287,grad_norm: 0.8507806526883852, iteration: 45319
loss: 1.0229521989822388,grad_norm: 0.9443276138461738, iteration: 45320
loss: 0.9971997737884521,grad_norm: 0.8940196788899133, iteration: 45321
loss: 1.0264692306518555,grad_norm: 0.9913625099709731, iteration: 45322
loss: 0.9655625224113464,grad_norm: 0.8536069649134996, iteration: 45323
loss: 1.0299321413040161,grad_norm: 0.9999991976440671, iteration: 45324
loss: 0.9887049794197083,grad_norm: 0.9999990551195858, iteration: 45325
loss: 1.0030643939971924,grad_norm: 0.8947151893684558, iteration: 45326
loss: 1.039744257926941,grad_norm: 0.9999992259808927, iteration: 45327
loss: 0.9922574758529663,grad_norm: 0.9999992688510566, iteration: 45328
loss: 1.0094077587127686,grad_norm: 0.9999992130016282, iteration: 45329
loss: 1.031792163848877,grad_norm: 0.9999991385754952, iteration: 45330
loss: 0.9529814720153809,grad_norm: 0.9999991186505514, iteration: 45331
loss: 1.015670657157898,grad_norm: 0.9999992225918016, iteration: 45332
loss: 1.001281499862671,grad_norm: 0.9999991008918795, iteration: 45333
loss: 1.0020421743392944,grad_norm: 0.9871935130346744, iteration: 45334
loss: 0.9537038803100586,grad_norm: 0.9138299023900691, iteration: 45335
loss: 0.9907450675964355,grad_norm: 0.9149744785934167, iteration: 45336
loss: 1.0401339530944824,grad_norm: 0.9438611922533682, iteration: 45337
loss: 0.9953346252441406,grad_norm: 0.9999990517287294, iteration: 45338
loss: 0.9701880812644958,grad_norm: 0.9625577406987305, iteration: 45339
loss: 0.9446060657501221,grad_norm: 0.9999993331611998, iteration: 45340
loss: 1.0011926889419556,grad_norm: 0.9077334980329743, iteration: 45341
loss: 0.9974439740180969,grad_norm: 0.9999990478778822, iteration: 45342
loss: 1.0170047283172607,grad_norm: 0.9758404878591628, iteration: 45343
loss: 0.9779723286628723,grad_norm: 0.9999991970169836, iteration: 45344
loss: 1.0121792554855347,grad_norm: 0.9575464613122555, iteration: 45345
loss: 0.9930923581123352,grad_norm: 0.9999991628970731, iteration: 45346
loss: 0.9905191659927368,grad_norm: 0.9384410212251283, iteration: 45347
loss: 0.9923086166381836,grad_norm: 0.9989810411418574, iteration: 45348
loss: 1.0155645608901978,grad_norm: 0.9999992492134363, iteration: 45349
loss: 1.0401314496994019,grad_norm: 0.9999990073272198, iteration: 45350
loss: 0.9946116209030151,grad_norm: 0.999999223281172, iteration: 45351
loss: 0.9985653758049011,grad_norm: 0.8681968206515784, iteration: 45352
loss: 0.9835484027862549,grad_norm: 0.8653095962718148, iteration: 45353
loss: 1.0016934871673584,grad_norm: 0.8054070579325586, iteration: 45354
loss: 0.9787991642951965,grad_norm: 0.9999990786563064, iteration: 45355
loss: 1.0073466300964355,grad_norm: 0.9926426457331617, iteration: 45356
loss: 0.9832742810249329,grad_norm: 0.9999992291559786, iteration: 45357
loss: 0.9808425307273865,grad_norm: 0.8704647661788547, iteration: 45358
loss: 0.9830199480056763,grad_norm: 0.9999991058238086, iteration: 45359
loss: 1.012356162071228,grad_norm: 0.993335433393894, iteration: 45360
loss: 1.010919213294983,grad_norm: 0.9999991829952037, iteration: 45361
loss: 0.9787284731864929,grad_norm: 0.9999991891440978, iteration: 45362
loss: 1.0474659204483032,grad_norm: 0.9999989615455473, iteration: 45363
loss: 1.0341895818710327,grad_norm: 0.9548657616762275, iteration: 45364
loss: 0.9912598729133606,grad_norm: 0.9239034530662832, iteration: 45365
loss: 0.982517421245575,grad_norm: 0.9999991121347801, iteration: 45366
loss: 1.0175400972366333,grad_norm: 0.896174034851909, iteration: 45367
loss: 0.9746271967887878,grad_norm: 0.9680091847540947, iteration: 45368
loss: 1.0046591758728027,grad_norm: 0.999999102398736, iteration: 45369
loss: 1.004443883895874,grad_norm: 0.8207969536093719, iteration: 45370
loss: 1.003942608833313,grad_norm: 0.8911890678028306, iteration: 45371
loss: 1.003846526145935,grad_norm: 0.7769159481853702, iteration: 45372
loss: 1.032035231590271,grad_norm: 0.9999992094223732, iteration: 45373
loss: 1.0130910873413086,grad_norm: 0.999999105617098, iteration: 45374
loss: 1.0365715026855469,grad_norm: 0.8310310498160379, iteration: 45375
loss: 1.0010154247283936,grad_norm: 0.9999990805462712, iteration: 45376
loss: 1.0149693489074707,grad_norm: 0.7900901108869017, iteration: 45377
loss: 0.9564594626426697,grad_norm: 0.9999991818884162, iteration: 45378
loss: 0.9933117032051086,grad_norm: 0.9999990775711157, iteration: 45379
loss: 1.0088602304458618,grad_norm: 0.9999997890742948, iteration: 45380
loss: 0.9746754169464111,grad_norm: 0.9999992289034955, iteration: 45381
loss: 0.9951114058494568,grad_norm: 0.9487109640387644, iteration: 45382
loss: 0.9949907660484314,grad_norm: 0.9489859459977193, iteration: 45383
loss: 0.9709288477897644,grad_norm: 0.8611993207563595, iteration: 45384
loss: 1.0395796298980713,grad_norm: 0.9999990532576505, iteration: 45385
loss: 0.9989327192306519,grad_norm: 0.9999991723576713, iteration: 45386
loss: 0.9887629747390747,grad_norm: 0.999999040603714, iteration: 45387
loss: 1.012069582939148,grad_norm: 0.8863403460369169, iteration: 45388
loss: 0.9894375801086426,grad_norm: 0.9656223005326606, iteration: 45389
loss: 1.003723382949829,grad_norm: 0.9999990624774804, iteration: 45390
loss: 1.0031921863555908,grad_norm: 0.9999992749376321, iteration: 45391
loss: 1.0338534116744995,grad_norm: 0.9864651001965679, iteration: 45392
loss: 1.0218937397003174,grad_norm: 0.9999990680013118, iteration: 45393
loss: 0.9989234805107117,grad_norm: 0.9999992622842617, iteration: 45394
loss: 0.9826768040657043,grad_norm: 0.9999990570430096, iteration: 45395
loss: 1.0031530857086182,grad_norm: 0.9999993418979182, iteration: 45396
loss: 0.9785410761833191,grad_norm: 0.9999992886745869, iteration: 45397
loss: 1.0081136226654053,grad_norm: 0.9999992571963658, iteration: 45398
loss: 1.02747642993927,grad_norm: 0.8396399236651554, iteration: 45399
loss: 1.0376888513565063,grad_norm: 0.9999992167915277, iteration: 45400
loss: 0.9885849952697754,grad_norm: 0.825158667173002, iteration: 45401
loss: 0.9521467089653015,grad_norm: 0.9170045658994168, iteration: 45402
loss: 0.9934275150299072,grad_norm: 0.9999989597256059, iteration: 45403
loss: 1.022926688194275,grad_norm: 0.9999990562920621, iteration: 45404
loss: 1.0239365100860596,grad_norm: 0.9742334904359351, iteration: 45405
loss: 1.0500726699829102,grad_norm: 0.8973118981987943, iteration: 45406
loss: 1.00327467918396,grad_norm: 0.9999990103698159, iteration: 45407
loss: 1.0196006298065186,grad_norm: 0.999998996697561, iteration: 45408
loss: 0.9732295274734497,grad_norm: 0.9717481913353228, iteration: 45409
loss: 1.0353657007217407,grad_norm: 0.9364709683759282, iteration: 45410
loss: 0.9961948990821838,grad_norm: 0.9999990099859788, iteration: 45411
loss: 1.0275083780288696,grad_norm: 0.873844823201343, iteration: 45412
loss: 1.0064935684204102,grad_norm: 0.9618975429669918, iteration: 45413
loss: 1.0141711235046387,grad_norm: 0.9999992236268869, iteration: 45414
loss: 0.9890352487564087,grad_norm: 0.9999990417593264, iteration: 45415
loss: 1.09748113155365,grad_norm: 0.9999993373485718, iteration: 45416
loss: 1.0249637365341187,grad_norm: 0.9999990545331823, iteration: 45417
loss: 1.0076968669891357,grad_norm: 0.9999991736043438, iteration: 45418
loss: 0.9911593198776245,grad_norm: 0.9361595853490154, iteration: 45419
loss: 1.0187503099441528,grad_norm: 0.8961161624972932, iteration: 45420
loss: 1.012834906578064,grad_norm: 0.9999990130851983, iteration: 45421
loss: 1.0246553421020508,grad_norm: 0.9203474186863007, iteration: 45422
loss: 1.0455058813095093,grad_norm: 0.9999992430837021, iteration: 45423
loss: 0.9772173166275024,grad_norm: 0.9748147836909338, iteration: 45424
loss: 1.0104258060455322,grad_norm: 0.9999992732740024, iteration: 45425
loss: 1.0066981315612793,grad_norm: 0.9999991747507133, iteration: 45426
loss: 0.9750758409500122,grad_norm: 0.9999991930228891, iteration: 45427
loss: 1.1840018033981323,grad_norm: 0.9999997735712305, iteration: 45428
loss: 1.0599586963653564,grad_norm: 0.9497441579826033, iteration: 45429
loss: 1.0217006206512451,grad_norm: 0.9999995994928175, iteration: 45430
loss: 0.9538233876228333,grad_norm: 0.9999990446348317, iteration: 45431
loss: 0.9931432604789734,grad_norm: 0.9999989687982954, iteration: 45432
loss: 0.9853807687759399,grad_norm: 0.8524763163989155, iteration: 45433
loss: 1.0031287670135498,grad_norm: 0.9999991190486773, iteration: 45434
loss: 1.0070786476135254,grad_norm: 0.8865688558025291, iteration: 45435
loss: 0.9945505261421204,grad_norm: 0.9999991607508248, iteration: 45436
loss: 1.0804443359375,grad_norm: 0.9999992176434712, iteration: 45437
loss: 0.9877907037734985,grad_norm: 0.999999152117274, iteration: 45438
loss: 0.9858747124671936,grad_norm: 0.9156703715556151, iteration: 45439
loss: 0.9832779765129089,grad_norm: 0.970177914579527, iteration: 45440
loss: 0.9737210869789124,grad_norm: 0.9999991182259351, iteration: 45441
loss: 1.0679421424865723,grad_norm: 0.999999624584154, iteration: 45442
loss: 1.0318865776062012,grad_norm: 0.9999991071783668, iteration: 45443
loss: 1.0166394710540771,grad_norm: 0.8313712097229112, iteration: 45444
loss: 0.9661163687705994,grad_norm: 0.9999990360093499, iteration: 45445
loss: 1.0324217081069946,grad_norm: 0.9837041518570564, iteration: 45446
loss: 1.0012845993041992,grad_norm: 0.9533400384029317, iteration: 45447
loss: 0.9622111320495605,grad_norm: 0.9999990091194795, iteration: 45448
loss: 0.9513833522796631,grad_norm: 0.975233936060364, iteration: 45449
loss: 1.0461266040802002,grad_norm: 0.9999996801906005, iteration: 45450
loss: 1.0199860334396362,grad_norm: 0.9192765543701729, iteration: 45451
loss: 1.0013128519058228,grad_norm: 0.8828555470483107, iteration: 45452
loss: 0.9911239743232727,grad_norm: 0.8726747462917391, iteration: 45453
loss: 0.9730854034423828,grad_norm: 0.9350133999658827, iteration: 45454
loss: 1.0027258396148682,grad_norm: 0.8304172506374106, iteration: 45455
loss: 0.9992504715919495,grad_norm: 0.9999991075435272, iteration: 45456
loss: 1.0398112535476685,grad_norm: 0.9999992771128543, iteration: 45457
loss: 0.9963732361793518,grad_norm: 0.9999992704404953, iteration: 45458
loss: 0.9576222896575928,grad_norm: 0.9999990515345538, iteration: 45459
loss: 1.027268886566162,grad_norm: 0.9999991875490388, iteration: 45460
loss: 1.0432548522949219,grad_norm: 0.9352009629860254, iteration: 45461
loss: 1.02157723903656,grad_norm: 0.9398770404425868, iteration: 45462
loss: 0.9847127795219421,grad_norm: 0.9999992181767738, iteration: 45463
loss: 0.9920108914375305,grad_norm: 0.9951023838846359, iteration: 45464
loss: 1.012560248374939,grad_norm: 0.9999989712459396, iteration: 45465
loss: 0.9869014024734497,grad_norm: 0.9555759701620606, iteration: 45466
loss: 1.0037460327148438,grad_norm: 0.999999123786703, iteration: 45467
loss: 0.9954034090042114,grad_norm: 0.9999995727950876, iteration: 45468
loss: 1.0259191989898682,grad_norm: 0.8456082103173884, iteration: 45469
loss: 1.01361882686615,grad_norm: 0.7867421809975629, iteration: 45470
loss: 1.0058673620224,grad_norm: 0.9999992588040834, iteration: 45471
loss: 0.9984176158905029,grad_norm: 0.999999260960397, iteration: 45472
loss: 0.9913986325263977,grad_norm: 0.9999991559412178, iteration: 45473
loss: 1.0053242444992065,grad_norm: 0.9983786162454465, iteration: 45474
loss: 1.0490909814834595,grad_norm: 0.9999992557190591, iteration: 45475
loss: 1.0305694341659546,grad_norm: 0.9999990294768594, iteration: 45476
loss: 0.9884440898895264,grad_norm: 0.9999991655909979, iteration: 45477
loss: 1.0174719095230103,grad_norm: 0.9012507458962936, iteration: 45478
loss: 0.9648917317390442,grad_norm: 0.9199557842305841, iteration: 45479
loss: 1.0166141986846924,grad_norm: 0.9999991077533268, iteration: 45480
loss: 1.0071231126785278,grad_norm: 0.9933407575409233, iteration: 45481
loss: 1.0238020420074463,grad_norm: 0.9999993824381743, iteration: 45482
loss: 0.983957052230835,grad_norm: 0.9999991931704447, iteration: 45483
loss: 1.0494431257247925,grad_norm: 0.9999996374572523, iteration: 45484
loss: 0.9788585901260376,grad_norm: 0.9127433857365523, iteration: 45485
loss: 1.0303863286972046,grad_norm: 0.9142692675862146, iteration: 45486
loss: 0.9842458963394165,grad_norm: 0.9309942663012645, iteration: 45487
loss: 1.0577696561813354,grad_norm: 0.9341336410798242, iteration: 45488
loss: 1.0243111848831177,grad_norm: 0.9663488674636428, iteration: 45489
loss: 1.0297980308532715,grad_norm: 0.9846932837852345, iteration: 45490
loss: 0.9877976179122925,grad_norm: 0.9709043446978635, iteration: 45491
loss: 1.0122636556625366,grad_norm: 0.9999992491263398, iteration: 45492
loss: 1.014500379562378,grad_norm: 0.9999993834187354, iteration: 45493
loss: 1.0086722373962402,grad_norm: 0.9999991506366462, iteration: 45494
loss: 0.9948728680610657,grad_norm: 0.8256118710711745, iteration: 45495
loss: 0.9934139847755432,grad_norm: 0.9999992632522873, iteration: 45496
loss: 0.9827312231063843,grad_norm: 0.9999989492791445, iteration: 45497
loss: 0.9688068628311157,grad_norm: 0.7489692552387677, iteration: 45498
loss: 0.9730737209320068,grad_norm: 0.9999993166096995, iteration: 45499
loss: 1.0217539072036743,grad_norm: 0.9692073129689683, iteration: 45500
loss: 1.0346019268035889,grad_norm: 0.9631033155394144, iteration: 45501
loss: 0.9815704226493835,grad_norm: 0.8798621648300005, iteration: 45502
loss: 1.003060221672058,grad_norm: 0.9999992952926355, iteration: 45503
loss: 1.005419373512268,grad_norm: 0.9084277713373631, iteration: 45504
loss: 0.9912480711936951,grad_norm: 0.9582302301558241, iteration: 45505
loss: 0.9977306127548218,grad_norm: 0.9999991338686347, iteration: 45506
loss: 0.9600905776023865,grad_norm: 0.841642363740284, iteration: 45507
loss: 1.0284316539764404,grad_norm: 0.9999991905321283, iteration: 45508
loss: 0.978489100933075,grad_norm: 0.7957942287220229, iteration: 45509
loss: 1.1614362001419067,grad_norm: 0.9999991830427375, iteration: 45510
loss: 1.0378412008285522,grad_norm: 0.9999993561420086, iteration: 45511
loss: 0.9993656873703003,grad_norm: 0.9999992546910268, iteration: 45512
loss: 1.1174232959747314,grad_norm: 0.9999999740490607, iteration: 45513
loss: 0.998121440410614,grad_norm: 0.8906867432796401, iteration: 45514
loss: 1.0083158016204834,grad_norm: 0.9999991967915015, iteration: 45515
loss: 1.0138306617736816,grad_norm: 0.897097590541878, iteration: 45516
loss: 1.0169087648391724,grad_norm: 0.9568353992443343, iteration: 45517
loss: 1.0039775371551514,grad_norm: 0.9999991012866966, iteration: 45518
loss: 1.0076923370361328,grad_norm: 0.9999992820783576, iteration: 45519
loss: 1.2323789596557617,grad_norm: 0.9999991861933692, iteration: 45520
loss: 1.0565301179885864,grad_norm: 0.9999999845247922, iteration: 45521
loss: 0.9844717979431152,grad_norm: 0.8919833360820149, iteration: 45522
loss: 0.9820390939712524,grad_norm: 0.8761271097666941, iteration: 45523
loss: 0.9957499504089355,grad_norm: 0.8563486574878219, iteration: 45524
loss: 0.9999578595161438,grad_norm: 0.9721973256613957, iteration: 45525
loss: 0.9490829706192017,grad_norm: 0.964532823827656, iteration: 45526
loss: 0.95709228515625,grad_norm: 0.8453143933847768, iteration: 45527
loss: 1.0000237226486206,grad_norm: 0.9999991819973295, iteration: 45528
loss: 0.9597792625427246,grad_norm: 0.8769139908275684, iteration: 45529
loss: 1.0053519010543823,grad_norm: 0.9999991579566763, iteration: 45530
loss: 0.9756487011909485,grad_norm: 0.9228081581519048, iteration: 45531
loss: 1.0077131986618042,grad_norm: 0.9999991868108994, iteration: 45532
loss: 0.9716700911521912,grad_norm: 0.9999991138008368, iteration: 45533
loss: 1.0351417064666748,grad_norm: 0.9999996770879934, iteration: 45534
loss: 1.0167855024337769,grad_norm: 0.9999991169309245, iteration: 45535
loss: 1.0287541151046753,grad_norm: 0.9302591807539986, iteration: 45536
loss: 0.9990304112434387,grad_norm: 0.977258096177554, iteration: 45537
loss: 1.01406729221344,grad_norm: 0.9146109406140186, iteration: 45538
loss: 1.0071313381195068,grad_norm: 0.87144505265057, iteration: 45539
loss: 1.038570761680603,grad_norm: 0.9284074471706559, iteration: 45540
loss: 1.0020982027053833,grad_norm: 0.9999990645745269, iteration: 45541
loss: 0.9470455646514893,grad_norm: 0.9111657434942816, iteration: 45542
loss: 1.009308934211731,grad_norm: 0.9999990314402594, iteration: 45543
loss: 0.9992819428443909,grad_norm: 0.9999991476392935, iteration: 45544
loss: 1.0124976634979248,grad_norm: 0.9999993497145981, iteration: 45545
loss: 1.0087097883224487,grad_norm: 0.9999991172831535, iteration: 45546
loss: 1.0224512815475464,grad_norm: 0.8817946403742732, iteration: 45547
loss: 1.0057233572006226,grad_norm: 0.9999990847238355, iteration: 45548
loss: 0.994378387928009,grad_norm: 0.9997821592855543, iteration: 45549
loss: 0.9548762440681458,grad_norm: 0.9999991146187627, iteration: 45550
loss: 0.9786086678504944,grad_norm: 0.9899256544596462, iteration: 45551
loss: 0.9805830121040344,grad_norm: 0.926447141933752, iteration: 45552
loss: 1.0592657327651978,grad_norm: 0.9999997182658549, iteration: 45553
loss: 0.9914959669113159,grad_norm: 0.9999991976549224, iteration: 45554
loss: 1.0288718938827515,grad_norm: 0.9999990631264827, iteration: 45555
loss: 1.010671615600586,grad_norm: 0.8893104318226784, iteration: 45556
loss: 1.048240065574646,grad_norm: 0.9410325725845873, iteration: 45557
loss: 0.9867576956748962,grad_norm: 0.9560417346017612, iteration: 45558
loss: 1.0169721841812134,grad_norm: 0.9999990974147336, iteration: 45559
loss: 1.0149437189102173,grad_norm: 0.9999992576364924, iteration: 45560
loss: 1.017736554145813,grad_norm: 0.9954566626939725, iteration: 45561
loss: 0.9907144904136658,grad_norm: 0.979906081509085, iteration: 45562
loss: 1.0314421653747559,grad_norm: 0.8502282920843194, iteration: 45563
loss: 1.0100868940353394,grad_norm: 0.965310567929955, iteration: 45564
loss: 1.0011396408081055,grad_norm: 0.8909224369541554, iteration: 45565
loss: 1.0101454257965088,grad_norm: 0.9611578551730956, iteration: 45566
loss: 1.0318694114685059,grad_norm: 0.9999994646503224, iteration: 45567
loss: 0.9917040467262268,grad_norm: 0.9319317482784133, iteration: 45568
loss: 1.003313660621643,grad_norm: 0.8974788289566964, iteration: 45569
loss: 1.0525352954864502,grad_norm: 0.9632822350102748, iteration: 45570
loss: 1.0118894577026367,grad_norm: 0.9999991680043437, iteration: 45571
loss: 0.9839913249015808,grad_norm: 0.9999991865517092, iteration: 45572
loss: 1.0263975858688354,grad_norm: 0.9999989644567205, iteration: 45573
loss: 1.0010464191436768,grad_norm: 0.9881260251895857, iteration: 45574
loss: 0.9437599778175354,grad_norm: 0.9999990341001448, iteration: 45575
loss: 0.9822385311126709,grad_norm: 0.9999990488254036, iteration: 45576
loss: 1.027458906173706,grad_norm: 0.9642697560217346, iteration: 45577
loss: 1.0416127443313599,grad_norm: 0.9999998181612776, iteration: 45578
loss: 0.9978464245796204,grad_norm: 0.9371647723112382, iteration: 45579
loss: 0.9768136143684387,grad_norm: 0.8935540509483064, iteration: 45580
loss: 1.020368218421936,grad_norm: 0.9907268233216898, iteration: 45581
loss: 0.979585587978363,grad_norm: 0.9999990359991613, iteration: 45582
loss: 1.0047181844711304,grad_norm: 0.9851785572313894, iteration: 45583
loss: 1.0219508409500122,grad_norm: 0.9999990263561034, iteration: 45584
loss: 1.0046172142028809,grad_norm: 0.9499941309273919, iteration: 45585
loss: 0.9838523268699646,grad_norm: 0.8713599053793089, iteration: 45586
loss: 1.004035234451294,grad_norm: 0.999999082255924, iteration: 45587
loss: 0.9916876554489136,grad_norm: 0.9225297072603879, iteration: 45588
loss: 1.040587306022644,grad_norm: 0.9999992445347122, iteration: 45589
loss: 1.0248721837997437,grad_norm: 0.8987402315718654, iteration: 45590
loss: 0.9954053163528442,grad_norm: 0.9360601135109651, iteration: 45591
loss: 0.9727185964584351,grad_norm: 0.9697311371096929, iteration: 45592
loss: 1.0260066986083984,grad_norm: 0.999998986599431, iteration: 45593
loss: 0.9857836961746216,grad_norm: 0.9933497050976283, iteration: 45594
loss: 0.998876690864563,grad_norm: 0.9122469801145522, iteration: 45595
loss: 1.0190476179122925,grad_norm: 0.982525078591544, iteration: 45596
loss: 0.9702287912368774,grad_norm: 0.9441077005811928, iteration: 45597
loss: 0.97561115026474,grad_norm: 0.990164556531614, iteration: 45598
loss: 0.9907689094543457,grad_norm: 0.9999990274976635, iteration: 45599
loss: 1.011002779006958,grad_norm: 0.999999052522144, iteration: 45600
loss: 0.9730579853057861,grad_norm: 0.9057621047245105, iteration: 45601
loss: 1.0093015432357788,grad_norm: 0.9703561516593273, iteration: 45602
loss: 1.0019636154174805,grad_norm: 0.8799940348331153, iteration: 45603
loss: 1.010973572731018,grad_norm: 0.9999991715800595, iteration: 45604
loss: 0.9842031002044678,grad_norm: 0.8294324390427201, iteration: 45605
loss: 0.9905277490615845,grad_norm: 0.9574642799569977, iteration: 45606
loss: 0.9987658858299255,grad_norm: 0.9818537109487917, iteration: 45607
loss: 0.9666314721107483,grad_norm: 0.9408899305210795, iteration: 45608
loss: 1.0285427570343018,grad_norm: 0.9482845590802363, iteration: 45609
loss: 1.0847963094711304,grad_norm: 0.9999993007651219, iteration: 45610
loss: 0.9802442193031311,grad_norm: 0.9705916382772215, iteration: 45611
loss: 1.0122863054275513,grad_norm: 0.9458361362993062, iteration: 45612
loss: 1.0182592868804932,grad_norm: 0.8282352510770964, iteration: 45613
loss: 1.0238261222839355,grad_norm: 0.9999992847924223, iteration: 45614
loss: 1.028567910194397,grad_norm: 0.9999993065361462, iteration: 45615
loss: 0.9868947863578796,grad_norm: 0.8087419784799981, iteration: 45616
loss: 1.003390908241272,grad_norm: 0.9999991057591986, iteration: 45617
loss: 1.0094155073165894,grad_norm: 0.9999989882278114, iteration: 45618
loss: 1.0402177572250366,grad_norm: 0.9999993130351964, iteration: 45619
loss: 1.001867651939392,grad_norm: 0.9131657904635043, iteration: 45620
loss: 0.9688369035720825,grad_norm: 0.967851743435015, iteration: 45621
loss: 1.0324875116348267,grad_norm: 0.9856920709621936, iteration: 45622
loss: 0.9871993660926819,grad_norm: 0.9745656793053906, iteration: 45623
loss: 1.0539765357971191,grad_norm: 0.9999991958385976, iteration: 45624
loss: 1.0014568567276,grad_norm: 0.8101841501248327, iteration: 45625
loss: 0.9698997735977173,grad_norm: 0.9999992472375345, iteration: 45626
loss: 0.991289496421814,grad_norm: 0.9690099584616682, iteration: 45627
loss: 1.0058504343032837,grad_norm: 0.999999152737063, iteration: 45628
loss: 0.9861107468605042,grad_norm: 0.9999990994824965, iteration: 45629
loss: 1.0171849727630615,grad_norm: 0.9999991093984908, iteration: 45630
loss: 0.9752320647239685,grad_norm: 0.9999997667356008, iteration: 45631
loss: 1.0311344861984253,grad_norm: 0.7717845917981072, iteration: 45632
loss: 0.9892128109931946,grad_norm: 0.8825056544153708, iteration: 45633
loss: 1.0052051544189453,grad_norm: 0.9999991645966885, iteration: 45634
loss: 0.9803179502487183,grad_norm: 0.8677218879865998, iteration: 45635
loss: 0.9780219197273254,grad_norm: 0.9999991465625153, iteration: 45636
loss: 1.0108331441879272,grad_norm: 0.9999992769484599, iteration: 45637
loss: 1.0351275205612183,grad_norm: 0.9465941696371781, iteration: 45638
loss: 0.972625195980072,grad_norm: 0.9999991060239728, iteration: 45639
loss: 1.0034511089324951,grad_norm: 0.9999989603490839, iteration: 45640
loss: 0.9612486362457275,grad_norm: 0.9999990851286998, iteration: 45641
loss: 1.022180438041687,grad_norm: 0.9999992912109936, iteration: 45642
loss: 1.0144230127334595,grad_norm: 0.9224732694495043, iteration: 45643
loss: 1.045685052871704,grad_norm: 0.8535936400259801, iteration: 45644
loss: 0.9968587160110474,grad_norm: 0.8881610887130758, iteration: 45645
loss: 0.980945885181427,grad_norm: 0.8848050248023239, iteration: 45646
loss: 1.0486971139907837,grad_norm: 0.9999995817710413, iteration: 45647
loss: 0.9947298765182495,grad_norm: 0.9794238102026945, iteration: 45648
loss: 1.0229158401489258,grad_norm: 0.9814413359519465, iteration: 45649
loss: 1.0302894115447998,grad_norm: 0.9001286986178467, iteration: 45650
loss: 1.012455940246582,grad_norm: 0.9999992317731632, iteration: 45651
loss: 0.9806863069534302,grad_norm: 0.83246415871115, iteration: 45652
loss: 0.9967600703239441,grad_norm: 0.9999990617885037, iteration: 45653
loss: 0.9649242758750916,grad_norm: 0.9999991599485348, iteration: 45654
loss: 0.9922330975532532,grad_norm: 0.9812401314630107, iteration: 45655
loss: 0.9636217951774597,grad_norm: 0.9999989795563519, iteration: 45656
loss: 0.9862184524536133,grad_norm: 0.8455444387599658, iteration: 45657
loss: 0.9671686887741089,grad_norm: 0.9776926074769028, iteration: 45658
loss: 1.0108593702316284,grad_norm: 0.9999992251947275, iteration: 45659
loss: 0.9920837879180908,grad_norm: 0.9785944626476474, iteration: 45660
loss: 0.9897482991218567,grad_norm: 0.9992399930804748, iteration: 45661
loss: 1.016089677810669,grad_norm: 0.9884342823319053, iteration: 45662
loss: 0.9916154146194458,grad_norm: 0.9687437752835137, iteration: 45663
loss: 1.03741455078125,grad_norm: 0.9999996973003931, iteration: 45664
loss: 0.9824740290641785,grad_norm: 0.9999990894762582, iteration: 45665
loss: 1.0121870040893555,grad_norm: 0.8718370718455056, iteration: 45666
loss: 1.0058097839355469,grad_norm: 0.9999990520986359, iteration: 45667
loss: 1.002128005027771,grad_norm: 0.9176353285358582, iteration: 45668
loss: 1.0373406410217285,grad_norm: 0.9564850691704447, iteration: 45669
loss: 1.028855800628662,grad_norm: 0.9999997590280737, iteration: 45670
loss: 1.0311999320983887,grad_norm: 0.9999991387089279, iteration: 45671
loss: 0.9878124594688416,grad_norm: 0.9999992242800668, iteration: 45672
loss: 0.9639776349067688,grad_norm: 0.9999992301354148, iteration: 45673
loss: 1.009235143661499,grad_norm: 0.9999991289423789, iteration: 45674
loss: 1.0348540544509888,grad_norm: 0.981144024544061, iteration: 45675
loss: 1.0432393550872803,grad_norm: 0.9999991806718843, iteration: 45676
loss: 1.0041612386703491,grad_norm: 0.9181234130189846, iteration: 45677
loss: 1.0058715343475342,grad_norm: 0.9340060569631073, iteration: 45678
loss: 1.0044341087341309,grad_norm: 0.9999991519813275, iteration: 45679
loss: 1.0107625722885132,grad_norm: 0.9250846426630188, iteration: 45680
loss: 0.9626676440238953,grad_norm: 0.8163853313606274, iteration: 45681
loss: 0.9645897150039673,grad_norm: 0.9753130231872219, iteration: 45682
loss: 0.9785228371620178,grad_norm: 0.9406301677712123, iteration: 45683
loss: 1.0297644138336182,grad_norm: 0.9705444617325439, iteration: 45684
loss: 1.0334641933441162,grad_norm: 0.8711636237293444, iteration: 45685
loss: 1.0012675523757935,grad_norm: 0.9957172559809186, iteration: 45686
loss: 1.010105013847351,grad_norm: 0.9673296343372608, iteration: 45687
loss: 1.0087589025497437,grad_norm: 0.9764038953190196, iteration: 45688
loss: 1.0237202644348145,grad_norm: 0.999999802703662, iteration: 45689
loss: 0.9880589246749878,grad_norm: 0.9672085244537272, iteration: 45690
loss: 0.9871468544006348,grad_norm: 0.9052084402739321, iteration: 45691
loss: 0.9920717477798462,grad_norm: 0.9999990952104373, iteration: 45692
loss: 0.9315782785415649,grad_norm: 0.9954103004203817, iteration: 45693
loss: 1.0224589109420776,grad_norm: 0.8174643018133163, iteration: 45694
loss: 0.9949641227722168,grad_norm: 0.9999990414009279, iteration: 45695
loss: 0.9635888934135437,grad_norm: 0.9999991600424644, iteration: 45696
loss: 0.9565945267677307,grad_norm: 0.9999992396751818, iteration: 45697
loss: 0.9837812781333923,grad_norm: 0.8993485837579475, iteration: 45698
loss: 1.047100305557251,grad_norm: 0.8176655928872, iteration: 45699
loss: 0.9900644421577454,grad_norm: 0.999999575865312, iteration: 45700
loss: 1.035301923751831,grad_norm: 0.9595751274367706, iteration: 45701
loss: 0.9812320470809937,grad_norm: 0.9687139175054684, iteration: 45702
loss: 1.0095030069351196,grad_norm: 0.9999990487408871, iteration: 45703
loss: 1.0025091171264648,grad_norm: 0.9693214131197362, iteration: 45704
loss: 0.9865972995758057,grad_norm: 0.9075775567255276, iteration: 45705
loss: 1.0289688110351562,grad_norm: 0.976025714068207, iteration: 45706
loss: 0.9984320998191833,grad_norm: 0.8476652094604415, iteration: 45707
loss: 0.9743325114250183,grad_norm: 0.9999990640810158, iteration: 45708
loss: 0.9769558310508728,grad_norm: 0.9999991614402373, iteration: 45709
loss: 1.0012702941894531,grad_norm: 0.8968672129357624, iteration: 45710
loss: 0.9655932784080505,grad_norm: 0.928714812239816, iteration: 45711
loss: 0.97820645570755,grad_norm: 0.9999992160279033, iteration: 45712
loss: 0.9699006676673889,grad_norm: 0.9053617620955076, iteration: 45713
loss: 0.9946035742759705,grad_norm: 0.9321469259729864, iteration: 45714
loss: 1.0133740901947021,grad_norm: 0.999999156551265, iteration: 45715
loss: 1.0090585947036743,grad_norm: 0.9413158355499477, iteration: 45716
loss: 1.0083379745483398,grad_norm: 0.9999991543747663, iteration: 45717
loss: 0.9785920977592468,grad_norm: 0.9424506541007094, iteration: 45718
loss: 1.0246163606643677,grad_norm: 0.9770065006224647, iteration: 45719
loss: 0.9861574172973633,grad_norm: 0.8638200646797649, iteration: 45720
loss: 0.9524104595184326,grad_norm: 0.9999993160420328, iteration: 45721
loss: 0.9844239354133606,grad_norm: 0.8933454876630906, iteration: 45722
loss: 1.0332043170928955,grad_norm: 0.9999991589807723, iteration: 45723
loss: 0.9387191534042358,grad_norm: 0.8938888413204452, iteration: 45724
loss: 1.0065175294876099,grad_norm: 0.9999993876234763, iteration: 45725
loss: 1.005460262298584,grad_norm: 0.9999990864264912, iteration: 45726
loss: 1.0230213403701782,grad_norm: 0.9118832385149073, iteration: 45727
loss: 1.028002142906189,grad_norm: 0.9999993157753545, iteration: 45728
loss: 1.0121941566467285,grad_norm: 0.9999988364507179, iteration: 45729
loss: 0.9801937341690063,grad_norm: 0.9999992176083895, iteration: 45730
loss: 0.9871963262557983,grad_norm: 0.8741917961265937, iteration: 45731
loss: 0.9975845813751221,grad_norm: 0.9067325811647459, iteration: 45732
loss: 0.9874621629714966,grad_norm: 0.8861761267557463, iteration: 45733
loss: 0.9926517605781555,grad_norm: 0.9191631851246365, iteration: 45734
loss: 1.0202231407165527,grad_norm: 0.9343830155190249, iteration: 45735
loss: 0.9615122675895691,grad_norm: 0.8856933388506943, iteration: 45736
loss: 0.9794952869415283,grad_norm: 0.9941503943049017, iteration: 45737
loss: 0.9899680614471436,grad_norm: 0.9957142663528009, iteration: 45738
loss: 1.0416045188903809,grad_norm: 0.99999942195036, iteration: 45739
loss: 1.0169267654418945,grad_norm: 0.9106452825931945, iteration: 45740
loss: 1.013282299041748,grad_norm: 0.845995551180632, iteration: 45741
loss: 1.000685214996338,grad_norm: 0.9337350381879327, iteration: 45742
loss: 0.9851632118225098,grad_norm: 0.9999991565323259, iteration: 45743
loss: 0.997211217880249,grad_norm: 0.9999992537033147, iteration: 45744
loss: 1.0090880393981934,grad_norm: 0.9999990495866045, iteration: 45745
loss: 1.0305474996566772,grad_norm: 0.9999993873650705, iteration: 45746
loss: 1.0287755727767944,grad_norm: 0.9999993062127126, iteration: 45747
loss: 0.9682209491729736,grad_norm: 0.9857319737500789, iteration: 45748
loss: 0.9960880875587463,grad_norm: 0.9999990565096329, iteration: 45749
loss: 0.9973322749137878,grad_norm: 0.8900914359861324, iteration: 45750
loss: 0.9919077157974243,grad_norm: 0.999999043794249, iteration: 45751
loss: 1.002351999282837,grad_norm: 0.928418246176297, iteration: 45752
loss: 0.9905321598052979,grad_norm: 0.8783304404425257, iteration: 45753
loss: 0.9629442691802979,grad_norm: 0.9999991712560004, iteration: 45754
loss: 1.017471194267273,grad_norm: 0.9999991786016156, iteration: 45755
loss: 1.0187437534332275,grad_norm: 0.9999991050358487, iteration: 45756
loss: 1.0223478078842163,grad_norm: 0.9999992686982514, iteration: 45757
loss: 1.0262093544006348,grad_norm: 0.9999991715576966, iteration: 45758
loss: 1.0032685995101929,grad_norm: 0.9798266953811028, iteration: 45759
loss: 0.9924496412277222,grad_norm: 0.8802652985969898, iteration: 45760
loss: 1.0090707540512085,grad_norm: 0.9999991272063244, iteration: 45761
loss: 0.9804871082305908,grad_norm: 0.9999991125953223, iteration: 45762
loss: 0.9752156734466553,grad_norm: 0.9999990728521425, iteration: 45763
loss: 1.0436798334121704,grad_norm: 0.9983357515526131, iteration: 45764
loss: 0.9956024885177612,grad_norm: 0.9999990412446619, iteration: 45765
loss: 0.9476497769355774,grad_norm: 0.8021516258731483, iteration: 45766
loss: 1.0148357152938843,grad_norm: 0.999998997320754, iteration: 45767
loss: 1.0063190460205078,grad_norm: 0.922857176044064, iteration: 45768
loss: 1.018625259399414,grad_norm: 0.9999990815863941, iteration: 45769
loss: 1.0349477529525757,grad_norm: 0.9999997453099221, iteration: 45770
loss: 1.0222723484039307,grad_norm: 0.9999993737604221, iteration: 45771
loss: 1.0640426874160767,grad_norm: 0.9949742017244184, iteration: 45772
loss: 1.028889536857605,grad_norm: 0.9999992033443975, iteration: 45773
loss: 0.970930278301239,grad_norm: 0.7619764511187431, iteration: 45774
loss: 1.0217773914337158,grad_norm: 0.9999991628651902, iteration: 45775
loss: 0.9946044683456421,grad_norm: 0.9999991812545974, iteration: 45776
loss: 1.0136122703552246,grad_norm: 0.969261502145646, iteration: 45777
loss: 1.0039589405059814,grad_norm: 0.999999654393275, iteration: 45778
loss: 0.9568755030632019,grad_norm: 0.9581875292698547, iteration: 45779
loss: 1.0288400650024414,grad_norm: 0.9999990196497582, iteration: 45780
loss: 0.9833954572677612,grad_norm: 0.8866057315462564, iteration: 45781
loss: 1.0295190811157227,grad_norm: 0.9715678477025512, iteration: 45782
loss: 0.9852855205535889,grad_norm: 0.9999990532514649, iteration: 45783
loss: 1.034134030342102,grad_norm: 0.9999993928918677, iteration: 45784
loss: 1.047376036643982,grad_norm: 0.9999989340749449, iteration: 45785
loss: 0.9758558869361877,grad_norm: 0.9364648754443132, iteration: 45786
loss: 1.025418996810913,grad_norm: 0.9579125901486102, iteration: 45787
loss: 1.0047215223312378,grad_norm: 0.8380328329556134, iteration: 45788
loss: 0.9841101765632629,grad_norm: 0.9728390941044434, iteration: 45789
loss: 1.013395071029663,grad_norm: 0.9999991854771964, iteration: 45790
loss: 0.9896253347396851,grad_norm: 0.9999989903745071, iteration: 45791
loss: 1.0020626783370972,grad_norm: 0.9999991506916834, iteration: 45792
loss: 0.9881500601768494,grad_norm: 0.8820265191102596, iteration: 45793
loss: 0.9963866472244263,grad_norm: 0.999999109616584, iteration: 45794
loss: 1.0211889743804932,grad_norm: 0.9206571515291115, iteration: 45795
loss: 1.010507345199585,grad_norm: 0.9030061630275438, iteration: 45796
loss: 0.9870520830154419,grad_norm: 0.9999990828986387, iteration: 45797
loss: 1.0449684858322144,grad_norm: 0.9999990819839298, iteration: 45798
loss: 1.0103131532669067,grad_norm: 0.8763373494971913, iteration: 45799
loss: 0.99582839012146,grad_norm: 0.9999993549627325, iteration: 45800
loss: 1.0027936697006226,grad_norm: 0.9896228021851996, iteration: 45801
loss: 1.0362019538879395,grad_norm: 0.9059492373775224, iteration: 45802
loss: 1.0162971019744873,grad_norm: 0.9014813031683505, iteration: 45803
loss: 0.9809128642082214,grad_norm: 0.941375709289543, iteration: 45804
loss: 1.0098750591278076,grad_norm: 0.9999990583491366, iteration: 45805
loss: 0.9713269472122192,grad_norm: 0.9999991836499843, iteration: 45806
loss: 1.0257539749145508,grad_norm: 0.9999993640823768, iteration: 45807
loss: 0.9814189076423645,grad_norm: 0.9999990926120587, iteration: 45808
loss: 0.9860463738441467,grad_norm: 0.9634335342085361, iteration: 45809
loss: 1.0238969326019287,grad_norm: 0.999999103628447, iteration: 45810
loss: 0.9976299405097961,grad_norm: 0.8016520191395028, iteration: 45811
loss: 1.035598874092102,grad_norm: 0.8596504269685712, iteration: 45812
loss: 1.029259443283081,grad_norm: 0.9999991926192148, iteration: 45813
loss: 0.9649518728256226,grad_norm: 0.8973944685761127, iteration: 45814
loss: 1.0332834720611572,grad_norm: 0.9999990889513278, iteration: 45815
loss: 1.1008785963058472,grad_norm: 0.9999992276218571, iteration: 45816
loss: 1.0306413173675537,grad_norm: 0.9999991919718848, iteration: 45817
loss: 0.9756877422332764,grad_norm: 0.999999377503536, iteration: 45818
loss: 0.975096583366394,grad_norm: 0.9865452516749773, iteration: 45819
loss: 0.9709131717681885,grad_norm: 0.919061312267121, iteration: 45820
loss: 1.0533431768417358,grad_norm: 0.999999035640654, iteration: 45821
loss: 0.9832826852798462,grad_norm: 0.9999990934127915, iteration: 45822
loss: 1.0083186626434326,grad_norm: 0.9999991805239827, iteration: 45823
loss: 0.9813134670257568,grad_norm: 0.9999991247112409, iteration: 45824
loss: 1.0085868835449219,grad_norm: 0.9812397766686349, iteration: 45825
loss: 0.9699728488922119,grad_norm: 0.9999991155505288, iteration: 45826
loss: 1.0160425901412964,grad_norm: 0.9999997910528983, iteration: 45827
loss: 0.9649568796157837,grad_norm: 0.957156198057993, iteration: 45828
loss: 0.9878954291343689,grad_norm: 0.9999991941754166, iteration: 45829
loss: 1.022857904434204,grad_norm: 0.8897494759066186, iteration: 45830
loss: 1.0381757020950317,grad_norm: 0.9401316821887754, iteration: 45831
loss: 0.994718074798584,grad_norm: 0.980328198722228, iteration: 45832
loss: 0.9552481770515442,grad_norm: 0.99999927418709, iteration: 45833
loss: 0.9948186874389648,grad_norm: 0.9999991162245061, iteration: 45834
loss: 1.025215744972229,grad_norm: 0.8200298865815948, iteration: 45835
loss: 1.016420602798462,grad_norm: 0.9999991866346607, iteration: 45836
loss: 1.0085707902908325,grad_norm: 0.9999990529887847, iteration: 45837
loss: 0.9897967576980591,grad_norm: 0.8725332597350526, iteration: 45838
loss: 1.0030239820480347,grad_norm: 0.9999990458082679, iteration: 45839
loss: 1.0126129388809204,grad_norm: 0.9581146306795238, iteration: 45840
loss: 0.9714344143867493,grad_norm: 0.9999991495764853, iteration: 45841
loss: 1.0313152074813843,grad_norm: 0.8465032163671652, iteration: 45842
loss: 1.000260353088379,grad_norm: 0.9999991334736921, iteration: 45843
loss: 1.0116032361984253,grad_norm: 0.9635507622621328, iteration: 45844
loss: 0.9891452789306641,grad_norm: 0.9999990638301839, iteration: 45845
loss: 0.9962672591209412,grad_norm: 0.9999990729342026, iteration: 45846
loss: 0.964469850063324,grad_norm: 0.8380461660729428, iteration: 45847
loss: 0.9673469662666321,grad_norm: 0.8173839357613784, iteration: 45848
loss: 1.025304913520813,grad_norm: 0.9999991467280537, iteration: 45849
loss: 1.026076078414917,grad_norm: 0.9999990403017839, iteration: 45850
loss: 0.9515485167503357,grad_norm: 0.9999991790812878, iteration: 45851
loss: 1.0203694105148315,grad_norm: 0.9594533680736209, iteration: 45852
loss: 1.1872612237930298,grad_norm: 0.9999995140946326, iteration: 45853
loss: 0.9976301789283752,grad_norm: 0.9999991967252915, iteration: 45854
loss: 1.0284905433654785,grad_norm: 0.8933329460635788, iteration: 45855
loss: 0.9827862977981567,grad_norm: 0.9292426679087419, iteration: 45856
loss: 1.0146070718765259,grad_norm: 0.9521907033856569, iteration: 45857
loss: 1.0027775764465332,grad_norm: 0.8726947662123238, iteration: 45858
loss: 0.9735483527183533,grad_norm: 0.9999991775721057, iteration: 45859
loss: 1.0081501007080078,grad_norm: 0.8847489623320696, iteration: 45860
loss: 0.9843135476112366,grad_norm: 0.9999990836212141, iteration: 45861
loss: 1.0130133628845215,grad_norm: 0.8745133143180611, iteration: 45862
loss: 1.0126354694366455,grad_norm: 0.9999995498625405, iteration: 45863
loss: 0.9930902123451233,grad_norm: 0.999999047407195, iteration: 45864
loss: 1.0258022546768188,grad_norm: 0.9999989939420931, iteration: 45865
loss: 1.0184108018875122,grad_norm: 0.9063583677389014, iteration: 45866
loss: 1.0425814390182495,grad_norm: 0.9170300607614391, iteration: 45867
loss: 1.0013858079910278,grad_norm: 0.9999990827375169, iteration: 45868
loss: 0.977741003036499,grad_norm: 0.999999074441975, iteration: 45869
loss: 0.9980502128601074,grad_norm: 0.9999992119602805, iteration: 45870
loss: 1.057848334312439,grad_norm: 0.988986108705214, iteration: 45871
loss: 1.026056170463562,grad_norm: 0.9999991386267282, iteration: 45872
loss: 0.9845272302627563,grad_norm: 0.9999989957483182, iteration: 45873
loss: 1.0378201007843018,grad_norm: 0.9926197824443055, iteration: 45874
loss: 1.0120294094085693,grad_norm: 0.9999991998292229, iteration: 45875
loss: 1.0238770246505737,grad_norm: 0.9999993804816909, iteration: 45876
loss: 0.9983789324760437,grad_norm: 0.8683756886510082, iteration: 45877
loss: 1.0021616220474243,grad_norm: 0.9999989061549114, iteration: 45878
loss: 1.0011283159255981,grad_norm: 0.9999993401488005, iteration: 45879
loss: 1.0166786909103394,grad_norm: 0.999998878317078, iteration: 45880
loss: 1.0080188512802124,grad_norm: 0.9140989671327692, iteration: 45881
loss: 0.9838840365409851,grad_norm: 0.9999991756355926, iteration: 45882
loss: 1.0002555847167969,grad_norm: 0.9999989811090108, iteration: 45883
loss: 1.0013309717178345,grad_norm: 0.8785068509221899, iteration: 45884
loss: 0.9796969890594482,grad_norm: 0.9110260990976549, iteration: 45885
loss: 0.9886422157287598,grad_norm: 0.9661150243243163, iteration: 45886
loss: 0.9932236671447754,grad_norm: 0.9999990082210768, iteration: 45887
loss: 1.0161995887756348,grad_norm: 0.8127223385360416, iteration: 45888
loss: 1.0254895687103271,grad_norm: 0.999999166466812, iteration: 45889
loss: 1.0111037492752075,grad_norm: 0.9884978401563456, iteration: 45890
loss: 0.9990636110305786,grad_norm: 0.9999989212842986, iteration: 45891
loss: 1.0119538307189941,grad_norm: 0.9999991312048242, iteration: 45892
loss: 1.0187244415283203,grad_norm: 0.999999231716739, iteration: 45893
loss: 1.0213661193847656,grad_norm: 0.9999992320730565, iteration: 45894
loss: 0.9970094561576843,grad_norm: 0.9632541599762511, iteration: 45895
loss: 1.0751428604125977,grad_norm: 0.9999994995884098, iteration: 45896
loss: 0.9939972162246704,grad_norm: 0.9999994092415022, iteration: 45897
loss: 1.010968565940857,grad_norm: 0.9999990171038675, iteration: 45898
loss: 1.002798318862915,grad_norm: 0.9999991837183649, iteration: 45899
loss: 0.9979544281959534,grad_norm: 0.9108444105087737, iteration: 45900
loss: 0.9904977679252625,grad_norm: 0.9999992698526781, iteration: 45901
loss: 1.0519585609436035,grad_norm: 0.9999994085848442, iteration: 45902
loss: 1.0240001678466797,grad_norm: 0.999999174870115, iteration: 45903
loss: 0.9955101013183594,grad_norm: 0.9999992729130537, iteration: 45904
loss: 0.9844703674316406,grad_norm: 0.9999992200212103, iteration: 45905
loss: 1.0271055698394775,grad_norm: 0.9999991195601242, iteration: 45906
loss: 1.0149626731872559,grad_norm: 0.9999994633092424, iteration: 45907
loss: 1.0347630977630615,grad_norm: 0.9999991908668422, iteration: 45908
loss: 1.0567305088043213,grad_norm: 0.9986400759226567, iteration: 45909
loss: 0.9919986724853516,grad_norm: 0.9999992842521996, iteration: 45910
loss: 0.9694997668266296,grad_norm: 0.9467635997387394, iteration: 45911
loss: 1.0444142818450928,grad_norm: 0.999999247285129, iteration: 45912
loss: 0.9853314161300659,grad_norm: 0.9999990705497874, iteration: 45913
loss: 0.989450991153717,grad_norm: 0.9975720148869769, iteration: 45914
loss: 0.984951913356781,grad_norm: 0.9999991945344016, iteration: 45915
loss: 1.0031992197036743,grad_norm: 0.9708895694580555, iteration: 45916
loss: 1.0188263654708862,grad_norm: 0.9999993948130359, iteration: 45917
loss: 0.9956541657447815,grad_norm: 0.8647965621401462, iteration: 45918
loss: 0.98676598072052,grad_norm: 0.9722138402535294, iteration: 45919
loss: 1.0543545484542847,grad_norm: 0.9999992243114822, iteration: 45920
loss: 1.0196484327316284,grad_norm: 0.9999991051615249, iteration: 45921
loss: 1.0225317478179932,grad_norm: 0.9999990562461483, iteration: 45922
loss: 0.9542630314826965,grad_norm: 0.896783889264151, iteration: 45923
loss: 0.9969696402549744,grad_norm: 0.9999990892078564, iteration: 45924
loss: 0.9981251358985901,grad_norm: 0.9245444154669835, iteration: 45925
loss: 0.9901825785636902,grad_norm: 0.9999991199619329, iteration: 45926
loss: 1.0059128999710083,grad_norm: 0.9739561754015799, iteration: 45927
loss: 0.9948809146881104,grad_norm: 0.9129757635013238, iteration: 45928
loss: 0.9543905854225159,grad_norm: 0.8955786219115478, iteration: 45929
loss: 1.014111042022705,grad_norm: 0.9524627390528491, iteration: 45930
loss: 1.0121382474899292,grad_norm: 0.9999989767271275, iteration: 45931
loss: 1.04022216796875,grad_norm: 0.9451043867257407, iteration: 45932
loss: 1.0098332166671753,grad_norm: 0.9999991059038368, iteration: 45933
loss: 1.0208290815353394,grad_norm: 0.999999160669728, iteration: 45934
loss: 0.9724559187889099,grad_norm: 0.9133564346214843, iteration: 45935
loss: 1.000089168548584,grad_norm: 0.9999990094829472, iteration: 45936
loss: 1.027328610420227,grad_norm: 0.9032457721180619, iteration: 45937
loss: 1.0284488201141357,grad_norm: 0.9999990081179421, iteration: 45938
loss: 0.976886510848999,grad_norm: 0.89534728946421, iteration: 45939
loss: 1.019160270690918,grad_norm: 0.9060118244432561, iteration: 45940
loss: 0.9730790257453918,grad_norm: 0.9426419511538635, iteration: 45941
loss: 0.9944175481796265,grad_norm: 0.9999991327775839, iteration: 45942
loss: 1.0269739627838135,grad_norm: 0.9999990958406074, iteration: 45943
loss: 1.0147918462753296,grad_norm: 0.9127524808684224, iteration: 45944
loss: 1.0148199796676636,grad_norm: 0.8448405849632189, iteration: 45945
loss: 0.9918560981750488,grad_norm: 0.9999991121685864, iteration: 45946
loss: 0.9734383225440979,grad_norm: 0.9999990227434827, iteration: 45947
loss: 1.0109045505523682,grad_norm: 0.8899604114937995, iteration: 45948
loss: 1.007399559020996,grad_norm: 0.9259829207855503, iteration: 45949
loss: 0.9886062741279602,grad_norm: 0.9999990735069733, iteration: 45950
loss: 0.9621081948280334,grad_norm: 0.978561836742158, iteration: 45951
loss: 1.0294321775436401,grad_norm: 0.9081970476425858, iteration: 45952
loss: 0.9846216440200806,grad_norm: 0.9999991632568326, iteration: 45953
loss: 1.1172951459884644,grad_norm: 0.9999994863613537, iteration: 45954
loss: 1.0532381534576416,grad_norm: 0.9999991422087109, iteration: 45955
loss: 0.9792970418930054,grad_norm: 0.9999991270268606, iteration: 45956
loss: 1.0292171239852905,grad_norm: 0.9272714701135322, iteration: 45957
loss: 1.009658694267273,grad_norm: 0.999999122446264, iteration: 45958
loss: 1.0043929815292358,grad_norm: 0.8844137094412668, iteration: 45959
loss: 0.9630506038665771,grad_norm: 0.8731689688648054, iteration: 45960
loss: 1.004225492477417,grad_norm: 0.9927883590374121, iteration: 45961
loss: 0.9727499485015869,grad_norm: 0.9999990157807731, iteration: 45962
loss: 1.0104857683181763,grad_norm: 0.9892527593279044, iteration: 45963
loss: 1.0064235925674438,grad_norm: 0.9160323214309161, iteration: 45964
loss: 0.996074914932251,grad_norm: 0.9754523867526907, iteration: 45965
loss: 0.9956977963447571,grad_norm: 0.8141319962139808, iteration: 45966
loss: 0.9961255192756653,grad_norm: 0.9999990324150965, iteration: 45967
loss: 0.9890643358230591,grad_norm: 0.9999990076684854, iteration: 45968
loss: 1.0135008096694946,grad_norm: 0.9999992272874627, iteration: 45969
loss: 0.9856012463569641,grad_norm: 0.9999990267002038, iteration: 45970
loss: 1.0195509195327759,grad_norm: 0.9712282885036231, iteration: 45971
loss: 1.002933382987976,grad_norm: 0.9894438421260181, iteration: 45972
loss: 0.9998066425323486,grad_norm: 0.9999991404474192, iteration: 45973
loss: 0.9972594380378723,grad_norm: 0.9669408612515216, iteration: 45974
loss: 0.9861986637115479,grad_norm: 0.9239782173058027, iteration: 45975
loss: 1.037760853767395,grad_norm: 0.94871693237018, iteration: 45976
loss: 1.0157548189163208,grad_norm: 0.9999992341648267, iteration: 45977
loss: 0.9756624102592468,grad_norm: 0.8167368948581442, iteration: 45978
loss: 0.9978853464126587,grad_norm: 0.999999251099717, iteration: 45979
loss: 1.0052201747894287,grad_norm: 0.9685657806071806, iteration: 45980
loss: 0.984596312046051,grad_norm: 1.0000000555720547, iteration: 45981
loss: 0.9703720808029175,grad_norm: 0.8875564394215538, iteration: 45982
loss: 0.9889699220657349,grad_norm: 0.9999990763299723, iteration: 45983
loss: 1.040340542793274,grad_norm: 0.999999081766621, iteration: 45984
loss: 1.0097101926803589,grad_norm: 0.999999022940037, iteration: 45985
loss: 0.9574324488639832,grad_norm: 0.9999990116110782, iteration: 45986
loss: 1.042461633682251,grad_norm: 0.9116234793729528, iteration: 45987
loss: 0.9908684492111206,grad_norm: 0.9276000188299502, iteration: 45988
loss: 0.9973764419555664,grad_norm: 0.9645669400789262, iteration: 45989
loss: 0.9739843010902405,grad_norm: 0.958738559735832, iteration: 45990
loss: 0.9661533236503601,grad_norm: 0.9730036960441825, iteration: 45991
loss: 0.9836345314979553,grad_norm: 0.9889194800101009, iteration: 45992
loss: 0.9739430546760559,grad_norm: 0.8835615781961278, iteration: 45993
loss: 0.9815968871116638,grad_norm: 0.9999992124744852, iteration: 45994
loss: 1.003977656364441,grad_norm: 0.9999990124918651, iteration: 45995
loss: 0.989867627620697,grad_norm: 0.9999991827922587, iteration: 45996
loss: 1.0169588327407837,grad_norm: 0.9354067371598193, iteration: 45997
loss: 1.0311639308929443,grad_norm: 0.9864126211217462, iteration: 45998
loss: 1.0067901611328125,grad_norm: 0.8793645015996754, iteration: 45999
loss: 1.071051001548767,grad_norm: 0.9999991014892532, iteration: 46000
loss: 1.0292569398880005,grad_norm: 0.999999132359735, iteration: 46001
loss: 0.9828983545303345,grad_norm: 0.8562227263270974, iteration: 46002
loss: 0.9741488099098206,grad_norm: 0.9553765522528256, iteration: 46003
loss: 0.9882878065109253,grad_norm: 0.999999125351287, iteration: 46004
loss: 0.9857406616210938,grad_norm: 0.8794944252017881, iteration: 46005
loss: 0.9971548318862915,grad_norm: 0.9999990097079532, iteration: 46006
loss: 1.0168596506118774,grad_norm: 0.9999990943529348, iteration: 46007
loss: 0.9845340251922607,grad_norm: 0.9999991490777497, iteration: 46008
loss: 1.0526193380355835,grad_norm: 0.9999992124362483, iteration: 46009
loss: 0.9985433220863342,grad_norm: 0.9999989690391321, iteration: 46010
loss: 1.029798150062561,grad_norm: 0.9999992977313938, iteration: 46011
loss: 1.0361288785934448,grad_norm: 0.9999991059786985, iteration: 46012
loss: 1.0056157112121582,grad_norm: 0.9004442521320684, iteration: 46013
loss: 1.0061396360397339,grad_norm: 0.9384032923631178, iteration: 46014
loss: 1.0087158679962158,grad_norm: 0.7360271156343403, iteration: 46015
loss: 0.9946966767311096,grad_norm: 0.9999989512419776, iteration: 46016
loss: 1.0117244720458984,grad_norm: 0.8196763691323152, iteration: 46017
loss: 1.0260181427001953,grad_norm: 0.8671980506187867, iteration: 46018
loss: 1.0277565717697144,grad_norm: 0.9999989894434625, iteration: 46019
loss: 0.9732605814933777,grad_norm: 0.9731665460256979, iteration: 46020
loss: 1.0080945491790771,grad_norm: 0.8845342462935433, iteration: 46021
loss: 0.9764091372489929,grad_norm: 0.9999990733481335, iteration: 46022
loss: 0.9670743942260742,grad_norm: 0.9520159472692288, iteration: 46023
loss: 0.9787124395370483,grad_norm: 0.9450460308610824, iteration: 46024
loss: 1.0910989046096802,grad_norm: 0.9999994777251995, iteration: 46025
loss: 0.9999799728393555,grad_norm: 0.9999991765989088, iteration: 46026
loss: 0.9728030562400818,grad_norm: 0.9277519737577469, iteration: 46027
loss: 0.9892880320549011,grad_norm: 0.9999989952260592, iteration: 46028
loss: 1.015297293663025,grad_norm: 0.909681117289063, iteration: 46029
loss: 1.0521838665008545,grad_norm: 0.999999298746971, iteration: 46030
loss: 1.0219309329986572,grad_norm: 0.999999362913932, iteration: 46031
loss: 1.0250575542449951,grad_norm: 0.9999994246717151, iteration: 46032
loss: 0.9711101055145264,grad_norm: 0.9688076975439568, iteration: 46033
loss: 1.0052911043167114,grad_norm: 0.8282363265196512, iteration: 46034
loss: 1.0206780433654785,grad_norm: 0.9999990607798752, iteration: 46035
loss: 1.000576138496399,grad_norm: 0.9999992472510291, iteration: 46036
loss: 1.0453310012817383,grad_norm: 0.9999990218381011, iteration: 46037
loss: 1.001675009727478,grad_norm: 0.9718454065314088, iteration: 46038
loss: 1.0272254943847656,grad_norm: 0.8816437888349168, iteration: 46039
loss: 1.0294013023376465,grad_norm: 0.8979276019287518, iteration: 46040
loss: 1.0110551118850708,grad_norm: 0.9999990481000439, iteration: 46041
loss: 1.0129443407058716,grad_norm: 0.9286089499834013, iteration: 46042
loss: 0.9892371296882629,grad_norm: 0.7729787036786189, iteration: 46043
loss: 1.0258764028549194,grad_norm: 0.9999990214534822, iteration: 46044
loss: 0.9749271273612976,grad_norm: 0.9999991700441859, iteration: 46045
loss: 1.0028630495071411,grad_norm: 0.8244058462771129, iteration: 46046
loss: 0.9902767539024353,grad_norm: 0.9798972758812559, iteration: 46047
loss: 1.0163607597351074,grad_norm: 0.9967185053580189, iteration: 46048
loss: 1.0090653896331787,grad_norm: 0.9890912520928402, iteration: 46049
loss: 1.0228954553604126,grad_norm: 0.9999990582509081, iteration: 46050
loss: 1.0207072496414185,grad_norm: 0.9999992338099313, iteration: 46051
loss: 1.0281344652175903,grad_norm: 0.9999989963540856, iteration: 46052
loss: 1.0197540521621704,grad_norm: 0.9999990258859421, iteration: 46053
loss: 1.067247986793518,grad_norm: 0.9999996835095427, iteration: 46054
loss: 0.9964442253112793,grad_norm: 0.8831284971672548, iteration: 46055
loss: 0.9985246658325195,grad_norm: 0.999999018941404, iteration: 46056
loss: 1.0138152837753296,grad_norm: 0.9162306661336509, iteration: 46057
loss: 1.0027577877044678,grad_norm: 0.9999992006067858, iteration: 46058
loss: 0.9765340685844421,grad_norm: 0.999999056464871, iteration: 46059
loss: 1.0514013767242432,grad_norm: 0.9999989918243659, iteration: 46060
loss: 1.0011119842529297,grad_norm: 0.9999995513906997, iteration: 46061
loss: 1.0255498886108398,grad_norm: 0.9444605561109328, iteration: 46062
loss: 1.0292741060256958,grad_norm: 0.960586897930032, iteration: 46063
loss: 1.0034345388412476,grad_norm: 0.930332420106854, iteration: 46064
loss: 1.1439629793167114,grad_norm: 0.9999998650644012, iteration: 46065
loss: 1.013068675994873,grad_norm: 0.9023156532476257, iteration: 46066
loss: 1.0148096084594727,grad_norm: 0.7939383039439164, iteration: 46067
loss: 1.045567274093628,grad_norm: 0.9615092665076214, iteration: 46068
loss: 0.9743986129760742,grad_norm: 0.9999990709942888, iteration: 46069
loss: 0.9570344686508179,grad_norm: 0.999999105184954, iteration: 46070
loss: 0.9933342933654785,grad_norm: 0.9748037982924699, iteration: 46071
loss: 1.0159283876419067,grad_norm: 0.9973149561455008, iteration: 46072
loss: 0.9880543351173401,grad_norm: 0.9606558722953674, iteration: 46073
loss: 0.9724032878875732,grad_norm: 0.8671782342608828, iteration: 46074
loss: 1.023902416229248,grad_norm: 0.8154898538097078, iteration: 46075
loss: 0.9961212277412415,grad_norm: 0.9693374806550827, iteration: 46076
loss: 1.000143051147461,grad_norm: 0.9978247897229492, iteration: 46077
loss: 0.9767763018608093,grad_norm: 0.9999990801004439, iteration: 46078
loss: 1.0137205123901367,grad_norm: 0.9982735768338931, iteration: 46079
loss: 1.0007007122039795,grad_norm: 0.9999989377588805, iteration: 46080
loss: 1.0230276584625244,grad_norm: 0.9999990860258776, iteration: 46081
loss: 1.0311384201049805,grad_norm: 0.9999991206260482, iteration: 46082
loss: 1.0275799036026,grad_norm: 0.9999991212982979, iteration: 46083
loss: 1.0402905941009521,grad_norm: 0.9999992518449452, iteration: 46084
loss: 0.9938492178916931,grad_norm: 0.9829834300372892, iteration: 46085
loss: 1.0003156661987305,grad_norm: 0.9439227834720153, iteration: 46086
loss: 0.9816449284553528,grad_norm: 0.9999990311264253, iteration: 46087
loss: 1.0051132440567017,grad_norm: 0.8444053485060582, iteration: 46088
loss: 1.0201835632324219,grad_norm: 0.9765815173396685, iteration: 46089
loss: 0.9843553900718689,grad_norm: 0.9894762267237399, iteration: 46090
loss: 1.0165966749191284,grad_norm: 0.9256639136426645, iteration: 46091
loss: 1.0194270610809326,grad_norm: 0.9999994205973236, iteration: 46092
loss: 0.9990869164466858,grad_norm: 0.9999992116227545, iteration: 46093
loss: 0.9979630708694458,grad_norm: 0.9999990696552844, iteration: 46094
loss: 0.9606395959854126,grad_norm: 0.9999995097511603, iteration: 46095
loss: 1.0360736846923828,grad_norm: 0.9999994887364084, iteration: 46096
loss: 1.0059224367141724,grad_norm: 0.9999996957095807, iteration: 46097
loss: 1.0118050575256348,grad_norm: 0.8879847893310309, iteration: 46098
loss: 1.0113238096237183,grad_norm: 0.9042496686877043, iteration: 46099
loss: 1.0226716995239258,grad_norm: 0.9999992431136158, iteration: 46100
loss: 1.0394572019577026,grad_norm: 0.9999991014816226, iteration: 46101
loss: 0.9390608072280884,grad_norm: 0.897745320379687, iteration: 46102
loss: 1.097048282623291,grad_norm: 0.9999996829880297, iteration: 46103
loss: 0.9973846077919006,grad_norm: 0.9999999004720713, iteration: 46104
loss: 1.0126185417175293,grad_norm: 0.9999991899134711, iteration: 46105
loss: 0.9794870615005493,grad_norm: 0.999999281554553, iteration: 46106
loss: 0.9766142964363098,grad_norm: 0.9999992030094557, iteration: 46107
loss: 1.0423630475997925,grad_norm: 0.9999994077283535, iteration: 46108
loss: 0.9955575466156006,grad_norm: 0.999999048695739, iteration: 46109
loss: 0.994501531124115,grad_norm: 0.8836172785860213, iteration: 46110
loss: 1.0360184907913208,grad_norm: 0.9999993923932456, iteration: 46111
loss: 1.0024276971817017,grad_norm: 0.9999990091611394, iteration: 46112
loss: 0.9838094115257263,grad_norm: 0.9999992316291527, iteration: 46113
loss: 0.987786591053009,grad_norm: 0.8297561751144024, iteration: 46114
loss: 1.007382869720459,grad_norm: 0.999999167544259, iteration: 46115
loss: 1.0022327899932861,grad_norm: 0.9999991412546583, iteration: 46116
loss: 1.031087875366211,grad_norm: 0.9856256987876597, iteration: 46117
loss: 0.9685620069503784,grad_norm: 0.9999991747800344, iteration: 46118
loss: 1.018998146057129,grad_norm: 0.9683051591329886, iteration: 46119
loss: 1.0185762643814087,grad_norm: 0.999999048391632, iteration: 46120
loss: 1.0108195543289185,grad_norm: 0.9999991981942754, iteration: 46121
loss: 1.0324338674545288,grad_norm: 0.9999994381677149, iteration: 46122
loss: 1.0165519714355469,grad_norm: 0.9753344594153359, iteration: 46123
loss: 1.1226892471313477,grad_norm: 0.9999994719408349, iteration: 46124
loss: 1.016666293144226,grad_norm: 0.9723916549334033, iteration: 46125
loss: 1.0286693572998047,grad_norm: 0.999999172074428, iteration: 46126
loss: 1.040359377861023,grad_norm: 0.9999993489941539, iteration: 46127
loss: 0.995485246181488,grad_norm: 0.9999992956498782, iteration: 46128
loss: 1.0178388357162476,grad_norm: 0.9999991334046799, iteration: 46129
loss: 1.0049744844436646,grad_norm: 0.9999991352543951, iteration: 46130
loss: 0.9891618490219116,grad_norm: 0.9161401248622024, iteration: 46131
loss: 0.9806631803512573,grad_norm: 0.9966196298810028, iteration: 46132
loss: 1.0819460153579712,grad_norm: 0.9999996742002851, iteration: 46133
loss: 1.0338982343673706,grad_norm: 0.8874858261454975, iteration: 46134
loss: 0.9896900653839111,grad_norm: 0.9351218806094502, iteration: 46135
loss: 0.9849672913551331,grad_norm: 0.9999990708255468, iteration: 46136
loss: 1.00819730758667,grad_norm: 0.9999991599748735, iteration: 46137
loss: 1.0020548105239868,grad_norm: 0.8036638462904121, iteration: 46138
loss: 0.9776009321212769,grad_norm: 0.9999990457640963, iteration: 46139
loss: 0.9691603779792786,grad_norm: 0.9569116964902664, iteration: 46140
loss: 0.9751473665237427,grad_norm: 0.9650419889298443, iteration: 46141
loss: 0.9877471923828125,grad_norm: 0.9024841593875148, iteration: 46142
loss: 1.0126196146011353,grad_norm: 0.9999992368990542, iteration: 46143
loss: 0.9848871231079102,grad_norm: 0.9999991465921282, iteration: 46144
loss: 1.095779299736023,grad_norm: 0.9999994967022584, iteration: 46145
loss: 1.0241607427597046,grad_norm: 0.9591011358314114, iteration: 46146
loss: 1.0164573192596436,grad_norm: 0.9389290775470256, iteration: 46147
loss: 1.0155657529830933,grad_norm: 0.9999990172725229, iteration: 46148
loss: 0.9924958944320679,grad_norm: 0.9999991169838729, iteration: 46149
loss: 1.0369114875793457,grad_norm: 0.9999993442811294, iteration: 46150
loss: 1.02977454662323,grad_norm: 0.999999105798779, iteration: 46151
loss: 1.0493780374526978,grad_norm: 0.9825195229212146, iteration: 46152
loss: 0.984769880771637,grad_norm: 0.9422382127956356, iteration: 46153
loss: 0.9905238151550293,grad_norm: 0.9999990970243403, iteration: 46154
loss: 1.0089328289031982,grad_norm: 0.7994340951107696, iteration: 46155
loss: 0.9825911521911621,grad_norm: 0.9999990342239643, iteration: 46156
loss: 1.0065028667449951,grad_norm: 0.9961652400959679, iteration: 46157
loss: 1.0498393774032593,grad_norm: 0.8466022235928503, iteration: 46158
loss: 0.9923779368400574,grad_norm: 0.999999220522242, iteration: 46159
loss: 1.0388374328613281,grad_norm: 0.90174251132299, iteration: 46160
loss: 0.9717268943786621,grad_norm: 0.8694158299468031, iteration: 46161
loss: 1.025078535079956,grad_norm: 0.8891160812819477, iteration: 46162
loss: 1.0255892276763916,grad_norm: 0.9149078301170676, iteration: 46163
loss: 1.0208492279052734,grad_norm: 0.9315527033853573, iteration: 46164
loss: 0.9901133179664612,grad_norm: 0.9999991554162038, iteration: 46165
loss: 0.9910030364990234,grad_norm: 0.9999990209047398, iteration: 46166
loss: 0.9978134036064148,grad_norm: 0.8738280126023227, iteration: 46167
loss: 0.9786468148231506,grad_norm: 0.9999990324255684, iteration: 46168
loss: 1.0091986656188965,grad_norm: 0.8500934481138428, iteration: 46169
loss: 1.01979398727417,grad_norm: 0.9287027295161244, iteration: 46170
loss: 1.0350632667541504,grad_norm: 0.99999917110302, iteration: 46171
loss: 1.0117379426956177,grad_norm: 0.9999991184496521, iteration: 46172
loss: 0.9921740889549255,grad_norm: 0.999999132211936, iteration: 46173
loss: 1.0189626216888428,grad_norm: 0.9999999530380526, iteration: 46174
loss: 1.0235633850097656,grad_norm: 0.9123011332701203, iteration: 46175
loss: 0.9891303777694702,grad_norm: 0.9509268969590099, iteration: 46176
loss: 1.0209834575653076,grad_norm: 0.9999990658360673, iteration: 46177
loss: 1.016469120979309,grad_norm: 0.8414761450941647, iteration: 46178
loss: 0.993228018283844,grad_norm: 0.8332684135399947, iteration: 46179
loss: 1.0282617807388306,grad_norm: 0.9999991785039601, iteration: 46180
loss: 1.0021418333053589,grad_norm: 0.9327177790004122, iteration: 46181
loss: 1.0311959981918335,grad_norm: 0.9999992187497742, iteration: 46182
loss: 1.0385093688964844,grad_norm: 0.9999994662070439, iteration: 46183
loss: 0.9730175137519836,grad_norm: 0.9999992811456009, iteration: 46184
loss: 1.035279393196106,grad_norm: 0.9999991175281004, iteration: 46185
loss: 0.9969353079795837,grad_norm: 0.9999993309871471, iteration: 46186
loss: 0.9770613312721252,grad_norm: 0.9867668537793564, iteration: 46187
loss: 1.011673927307129,grad_norm: 0.9077492610394826, iteration: 46188
loss: 1.03585946559906,grad_norm: 0.9999996487531763, iteration: 46189
loss: 1.0342905521392822,grad_norm: 0.9999989268702707, iteration: 46190
loss: 0.9836109280586243,grad_norm: 0.9999990661432634, iteration: 46191
loss: 0.9874700307846069,grad_norm: 0.9442779141581288, iteration: 46192
loss: 1.0060147047042847,grad_norm: 0.9999991346100083, iteration: 46193
loss: 0.9565286636352539,grad_norm: 0.8770901292589028, iteration: 46194
loss: 0.9861162900924683,grad_norm: 0.9999989724385064, iteration: 46195
loss: 1.0218888521194458,grad_norm: 0.9040360629324881, iteration: 46196
loss: 0.9758589267730713,grad_norm: 0.999999217629397, iteration: 46197
loss: 1.0204176902770996,grad_norm: 0.967001690676296, iteration: 46198
loss: 1.0211927890777588,grad_norm: 0.9999990254673623, iteration: 46199
loss: 0.9914607405662537,grad_norm: 0.9999990498698976, iteration: 46200
loss: 1.0230985879898071,grad_norm: 0.9950695837153064, iteration: 46201
loss: 1.0324088335037231,grad_norm: 0.9999990860183801, iteration: 46202
loss: 1.0665572881698608,grad_norm: 0.9999993933839724, iteration: 46203
loss: 0.9875347018241882,grad_norm: 0.9128181961519791, iteration: 46204
loss: 1.0100610256195068,grad_norm: 0.8616082739541356, iteration: 46205
loss: 1.034837245941162,grad_norm: 0.9834341429123372, iteration: 46206
loss: 0.9811484813690186,grad_norm: 0.9999991197982467, iteration: 46207
loss: 1.0219382047653198,grad_norm: 0.9999992287729963, iteration: 46208
loss: 1.0085482597351074,grad_norm: 0.9105256049633306, iteration: 46209
loss: 1.026068091392517,grad_norm: 0.9999992565803695, iteration: 46210
loss: 0.9678098559379578,grad_norm: 0.9999990128179951, iteration: 46211
loss: 0.9816096425056458,grad_norm: 0.8901998929936429, iteration: 46212
loss: 0.9971039295196533,grad_norm: 0.885877791031187, iteration: 46213
loss: 1.0164930820465088,grad_norm: 0.9490906362658253, iteration: 46214
loss: 1.0125102996826172,grad_norm: 0.9999989923858487, iteration: 46215
loss: 1.1392414569854736,grad_norm: 0.9999994446218785, iteration: 46216
loss: 0.9853105545043945,grad_norm: 0.9813277510650398, iteration: 46217
loss: 0.9805641174316406,grad_norm: 0.9999991961917865, iteration: 46218
loss: 1.0158392190933228,grad_norm: 0.9009269584723298, iteration: 46219
loss: 1.0080586671829224,grad_norm: 0.9640914413655877, iteration: 46220
loss: 1.0164282321929932,grad_norm: 0.9999989938878125, iteration: 46221
loss: 0.9920538067817688,grad_norm: 0.9999989497932091, iteration: 46222
loss: 0.9932168126106262,grad_norm: 0.9999992954555625, iteration: 46223
loss: 1.031048059463501,grad_norm: 0.9999997200899327, iteration: 46224
loss: 1.0001407861709595,grad_norm: 0.999999162570404, iteration: 46225
loss: 1.014426589012146,grad_norm: 0.9999991906280683, iteration: 46226
loss: 1.0455193519592285,grad_norm: 0.9315054506287089, iteration: 46227
loss: 0.9601244330406189,grad_norm: 0.9999991896049163, iteration: 46228
loss: 1.0049024820327759,grad_norm: 0.9999990831763258, iteration: 46229
loss: 1.0255459547042847,grad_norm: 0.9999991216143911, iteration: 46230
loss: 1.0219628810882568,grad_norm: 0.8839038066813566, iteration: 46231
loss: 1.028605341911316,grad_norm: 0.9999995054988449, iteration: 46232
loss: 1.002427577972412,grad_norm: 0.9206481825173123, iteration: 46233
loss: 1.009785532951355,grad_norm: 0.9999991221565753, iteration: 46234
loss: 0.9921920299530029,grad_norm: 0.8176277978684147, iteration: 46235
loss: 1.0088609457015991,grad_norm: 0.9903051770244322, iteration: 46236
loss: 0.9748281836509705,grad_norm: 0.986771945774522, iteration: 46237
loss: 0.9867434501647949,grad_norm: 0.9782279136006744, iteration: 46238
loss: 0.9996974468231201,grad_norm: 0.9250139543333095, iteration: 46239
loss: 0.9919450283050537,grad_norm: 0.9692853689086771, iteration: 46240
loss: 1.0452356338500977,grad_norm: 0.9836318728243065, iteration: 46241
loss: 1.0017311573028564,grad_norm: 0.9999990502148576, iteration: 46242
loss: 1.0036805868148804,grad_norm: 0.999999030506508, iteration: 46243
loss: 0.9968721866607666,grad_norm: 0.9476797373712417, iteration: 46244
loss: 1.0073883533477783,grad_norm: 0.9999996248555969, iteration: 46245
loss: 0.9742090106010437,grad_norm: 0.7990352588423839, iteration: 46246
loss: 1.0036338567733765,grad_norm: 0.9999997957282826, iteration: 46247
loss: 1.008492112159729,grad_norm: 0.8912979619143083, iteration: 46248
loss: 1.0212376117706299,grad_norm: 0.9537048425949103, iteration: 46249
loss: 1.0283282995224,grad_norm: 0.9966712202804018, iteration: 46250
loss: 1.0128926038742065,grad_norm: 0.9999990307095743, iteration: 46251
loss: 0.9856411218643188,grad_norm: 0.9999991706204434, iteration: 46252
loss: 1.0328706502914429,grad_norm: 0.9999993001745725, iteration: 46253
loss: 1.0341691970825195,grad_norm: 0.9999991810337681, iteration: 46254
loss: 0.996502161026001,grad_norm: 0.9379722762946671, iteration: 46255
loss: 1.0225836038589478,grad_norm: 0.8507346356445288, iteration: 46256
loss: 1.0229579210281372,grad_norm: 0.9076733096156712, iteration: 46257
loss: 0.9771121144294739,grad_norm: 0.9999991255602378, iteration: 46258
loss: 1.0044537782669067,grad_norm: 0.9039566512203941, iteration: 46259
loss: 1.048504114151001,grad_norm: 0.9999990412943689, iteration: 46260
loss: 0.985818088054657,grad_norm: 0.9999992130315575, iteration: 46261
loss: 0.9801168441772461,grad_norm: 0.9999989652354946, iteration: 46262
loss: 1.0164623260498047,grad_norm: 0.9452020065177215, iteration: 46263
loss: 1.0049561262130737,grad_norm: 0.9593371783497988, iteration: 46264
loss: 1.0460615158081055,grad_norm: 0.9999989898223686, iteration: 46265
loss: 1.0245896577835083,grad_norm: 0.947904350159149, iteration: 46266
loss: 1.0058183670043945,grad_norm: 0.9689229926843971, iteration: 46267
loss: 1.036108136177063,grad_norm: 0.9999998160766398, iteration: 46268
loss: 0.9900917410850525,grad_norm: 0.9999992508036775, iteration: 46269
loss: 0.9799141883850098,grad_norm: 0.9999991054858564, iteration: 46270
loss: 0.9892175793647766,grad_norm: 0.9538714640308484, iteration: 46271
loss: 1.0108529329299927,grad_norm: 0.8646209579544875, iteration: 46272
loss: 1.028757929801941,grad_norm: 0.999999636047479, iteration: 46273
loss: 1.0155092477798462,grad_norm: 0.8767009540566493, iteration: 46274
loss: 1.0048798322677612,grad_norm: 0.8870992670483763, iteration: 46275
loss: 0.9894781708717346,grad_norm: 0.9999991587115421, iteration: 46276
loss: 0.9858805537223816,grad_norm: 0.999999156092881, iteration: 46277
loss: 1.0040913820266724,grad_norm: 0.9999990471040419, iteration: 46278
loss: 1.0065048933029175,grad_norm: 0.9999992739392702, iteration: 46279
loss: 1.0149134397506714,grad_norm: 0.9599478164278068, iteration: 46280
loss: 1.0057474374771118,grad_norm: 0.9999990614045264, iteration: 46281
loss: 1.025999665260315,grad_norm: 0.9999994296534522, iteration: 46282
loss: 1.0573229789733887,grad_norm: 0.9999995416766989, iteration: 46283
loss: 1.0252091884613037,grad_norm: 0.9999991280236016, iteration: 46284
loss: 1.0199095010757446,grad_norm: 0.9999991099501616, iteration: 46285
loss: 1.016518473625183,grad_norm: 0.9999998503174236, iteration: 46286
loss: 1.009034276008606,grad_norm: 0.999998958955224, iteration: 46287
loss: 1.0128748416900635,grad_norm: 0.9849257006425851, iteration: 46288
loss: 0.9698465466499329,grad_norm: 0.9483798583415549, iteration: 46289
loss: 1.0244219303131104,grad_norm: 0.9505588477144982, iteration: 46290
loss: 0.9856467843055725,grad_norm: 0.9716507188708198, iteration: 46291
loss: 0.9825268387794495,grad_norm: 0.9741520434153408, iteration: 46292
loss: 0.9942678213119507,grad_norm: 0.9999994643511182, iteration: 46293
loss: 1.0066063404083252,grad_norm: 0.8189778663239441, iteration: 46294
loss: 0.993417501449585,grad_norm: 0.9247165676642647, iteration: 46295
loss: 0.9663044810295105,grad_norm: 0.9999989908313971, iteration: 46296
loss: 0.9744731187820435,grad_norm: 0.9681577695621151, iteration: 46297
loss: 0.9872002601623535,grad_norm: 0.9999991923709837, iteration: 46298
loss: 1.0084083080291748,grad_norm: 0.9999992210445089, iteration: 46299
loss: 0.9990893006324768,grad_norm: 0.9583712066980814, iteration: 46300
loss: 1.035840630531311,grad_norm: 0.9625886498076279, iteration: 46301
loss: 1.0066536664962769,grad_norm: 0.9493941876453288, iteration: 46302
loss: 1.0323882102966309,grad_norm: 0.999999149465781, iteration: 46303
loss: 0.9602259397506714,grad_norm: 0.9016508679746567, iteration: 46304
loss: 1.0466545820236206,grad_norm: 0.9239450066324371, iteration: 46305
loss: 1.0191506147384644,grad_norm: 0.8989465374240622, iteration: 46306
loss: 1.0090076923370361,grad_norm: 0.9999991019434596, iteration: 46307
loss: 0.985420823097229,grad_norm: 0.9999991227017098, iteration: 46308
loss: 1.0132625102996826,grad_norm: 0.9265833525210473, iteration: 46309
loss: 1.0016475915908813,grad_norm: 0.9999992404686985, iteration: 46310
loss: 0.9957315921783447,grad_norm: 0.8984940581916994, iteration: 46311
loss: 0.9539394974708557,grad_norm: 0.884667743119304, iteration: 46312
loss: 0.9590080380439758,grad_norm: 0.9156302699005674, iteration: 46313
loss: 1.0201163291931152,grad_norm: 0.9932022959449556, iteration: 46314
loss: 0.9797917604446411,grad_norm: 0.999999078864558, iteration: 46315
loss: 0.9567680358886719,grad_norm: 0.8556756630616252, iteration: 46316
loss: 0.9951372146606445,grad_norm: 0.9999990753175654, iteration: 46317
loss: 1.0222879648208618,grad_norm: 0.8980737415636436, iteration: 46318
loss: 1.0105748176574707,grad_norm: 0.9449108803925723, iteration: 46319
loss: 0.9679737091064453,grad_norm: 0.9999991409015628, iteration: 46320
loss: 0.9564071893692017,grad_norm: 0.999999172303967, iteration: 46321
loss: 1.0101298093795776,grad_norm: 0.8209506081564616, iteration: 46322
loss: 0.9819175004959106,grad_norm: 0.9999991566071053, iteration: 46323
loss: 1.0120536088943481,grad_norm: 0.9334789712431268, iteration: 46324
loss: 0.9902125000953674,grad_norm: 0.9398478527064613, iteration: 46325
loss: 0.9941604137420654,grad_norm: 0.9445926068721393, iteration: 46326
loss: 1.011594533920288,grad_norm: 0.9142093716798673, iteration: 46327
loss: 1.0012849569320679,grad_norm: 0.9999991378048153, iteration: 46328
loss: 0.9726252555847168,grad_norm: 0.9339317123009155, iteration: 46329
loss: 1.0128682851791382,grad_norm: 0.9999991166169053, iteration: 46330
loss: 1.0008790493011475,grad_norm: 0.8104268024188437, iteration: 46331
loss: 0.988598108291626,grad_norm: 0.9918106090874067, iteration: 46332
loss: 0.998173177242279,grad_norm: 0.999999162196234, iteration: 46333
loss: 0.9999895095825195,grad_norm: 0.9154400293282735, iteration: 46334
loss: 1.0369445085525513,grad_norm: 0.9999998272761356, iteration: 46335
loss: 0.9745758771896362,grad_norm: 0.926179503401829, iteration: 46336
loss: 0.9771198630332947,grad_norm: 0.9999997724704452, iteration: 46337
loss: 1.0195469856262207,grad_norm: 0.8163674333953752, iteration: 46338
loss: 1.0139440298080444,grad_norm: 0.979857527184966, iteration: 46339
loss: 1.0407782793045044,grad_norm: 0.8860302285572164, iteration: 46340
loss: 0.9861208200454712,grad_norm: 0.9064386681426995, iteration: 46341
loss: 0.9828562140464783,grad_norm: 0.9328606551871239, iteration: 46342
loss: 1.0412123203277588,grad_norm: 0.9999990336929584, iteration: 46343
loss: 1.0136183500289917,grad_norm: 0.8563182052536101, iteration: 46344
loss: 0.9750388264656067,grad_norm: 0.8978520495114235, iteration: 46345
loss: 1.013532280921936,grad_norm: 0.9655665462793863, iteration: 46346
loss: 0.9870287775993347,grad_norm: 0.9530585643597684, iteration: 46347
loss: 1.0167505741119385,grad_norm: 0.9999993857546673, iteration: 46348
loss: 1.011833667755127,grad_norm: 0.9870720790245593, iteration: 46349
loss: 0.9779355525970459,grad_norm: 0.8946066589298112, iteration: 46350
loss: 1.0282503366470337,grad_norm: 0.9831399597602022, iteration: 46351
loss: 1.0033639669418335,grad_norm: 0.9723867274341672, iteration: 46352
loss: 1.0165477991104126,grad_norm: 0.9301708768730423, iteration: 46353
loss: 1.0509675741195679,grad_norm: 0.9999989846413312, iteration: 46354
loss: 0.9850874543190002,grad_norm: 0.8743358570261711, iteration: 46355
loss: 0.9936419129371643,grad_norm: 0.9169662236832781, iteration: 46356
loss: 1.0012341737747192,grad_norm: 0.9999990198742716, iteration: 46357
loss: 1.0344781875610352,grad_norm: 0.8706205832427553, iteration: 46358
loss: 0.9973811507225037,grad_norm: 0.8882274549226911, iteration: 46359
loss: 1.0069092512130737,grad_norm: 0.9999989642079823, iteration: 46360
loss: 0.9867818355560303,grad_norm: 0.8918679028491203, iteration: 46361
loss: 0.9992761015892029,grad_norm: 0.8066301624323587, iteration: 46362
loss: 0.9984244108200073,grad_norm: 0.8930354294829094, iteration: 46363
loss: 1.029518723487854,grad_norm: 0.8576997491689708, iteration: 46364
loss: 1.0146318674087524,grad_norm: 0.9999991429273964, iteration: 46365
loss: 0.9954677820205688,grad_norm: 0.9348680465319705, iteration: 46366
loss: 1.0057053565979004,grad_norm: 0.967065366115425, iteration: 46367
loss: 0.9675235748291016,grad_norm: 0.99703471645895, iteration: 46368
loss: 0.9777567982673645,grad_norm: 0.934553097383046, iteration: 46369
loss: 1.0253878831863403,grad_norm: 0.9999990470528894, iteration: 46370
loss: 1.037717342376709,grad_norm: 0.9014009475427069, iteration: 46371
loss: 0.9974496960639954,grad_norm: 0.9999991563616245, iteration: 46372
loss: 0.9945815801620483,grad_norm: 0.7924715798968176, iteration: 46373
loss: 1.0507726669311523,grad_norm: 0.9999990558802554, iteration: 46374
loss: 0.981071949005127,grad_norm: 0.9025901352260648, iteration: 46375
loss: 1.0198553800582886,grad_norm: 0.9999992894944262, iteration: 46376
loss: 1.0296680927276611,grad_norm: 0.999999192628974, iteration: 46377
loss: 0.9929375052452087,grad_norm: 0.9055843933182461, iteration: 46378
loss: 1.0013505220413208,grad_norm: 0.9667766017715572, iteration: 46379
loss: 1.0123112201690674,grad_norm: 0.8190615614601282, iteration: 46380
loss: 0.9721469879150391,grad_norm: 0.8745503184529239, iteration: 46381
loss: 1.0177541971206665,grad_norm: 0.8832808878350598, iteration: 46382
loss: 0.9926750659942627,grad_norm: 0.9770671761094192, iteration: 46383
loss: 1.0478907823562622,grad_norm: 0.9999991698936753, iteration: 46384
loss: 1.0371088981628418,grad_norm: 0.9999991327190435, iteration: 46385
loss: 1.0088412761688232,grad_norm: 0.9999991635344654, iteration: 46386
loss: 1.029654622077942,grad_norm: 0.8729412451511145, iteration: 46387
loss: 1.0035865306854248,grad_norm: 0.9380627275206506, iteration: 46388
loss: 1.0288819074630737,grad_norm: 0.9541643712859268, iteration: 46389
loss: 1.029199242591858,grad_norm: 0.9003907670622862, iteration: 46390
loss: 0.9863452315330505,grad_norm: 0.9999991018188819, iteration: 46391
loss: 1.0089689493179321,grad_norm: 0.9999996280377723, iteration: 46392
loss: 1.0240510702133179,grad_norm: 0.9999992450213451, iteration: 46393
loss: 1.0102533102035522,grad_norm: 0.949733252735279, iteration: 46394
loss: 0.9775905013084412,grad_norm: 0.8805242123521104, iteration: 46395
loss: 1.0058064460754395,grad_norm: 0.9999991001479401, iteration: 46396
loss: 0.9779663681983948,grad_norm: 0.8977497263380512, iteration: 46397
loss: 1.0148513317108154,grad_norm: 0.9999993195301026, iteration: 46398
loss: 1.0131707191467285,grad_norm: 0.8626346740753041, iteration: 46399
loss: 1.0318576097488403,grad_norm: 0.9999992699530346, iteration: 46400
loss: 1.0395103693008423,grad_norm: 0.9999995932582816, iteration: 46401
loss: 0.9878631234169006,grad_norm: 0.9999990680388738, iteration: 46402
loss: 0.9723277688026428,grad_norm: 0.9999991846124976, iteration: 46403
loss: 0.9925124645233154,grad_norm: 0.9412461276338306, iteration: 46404
loss: 0.9824344515800476,grad_norm: 0.8195142126330454, iteration: 46405
loss: 0.996539831161499,grad_norm: 0.979040974205099, iteration: 46406
loss: 1.082382082939148,grad_norm: 0.9999992337808805, iteration: 46407
loss: 0.9858335256576538,grad_norm: 0.855813669974565, iteration: 46408
loss: 0.9983606934547424,grad_norm: 0.9702430586993184, iteration: 46409
loss: 0.9766737818717957,grad_norm: 0.8064796878742154, iteration: 46410
loss: 1.0287216901779175,grad_norm: 0.9999990272757553, iteration: 46411
loss: 1.0039325952529907,grad_norm: 0.9569126546666302, iteration: 46412
loss: 0.9634616374969482,grad_norm: 0.9330067804331573, iteration: 46413
loss: 1.0316320657730103,grad_norm: 0.9999990670060619, iteration: 46414
loss: 0.9995472431182861,grad_norm: 0.9999989011913133, iteration: 46415
loss: 1.030125379562378,grad_norm: 0.9999993050325997, iteration: 46416
loss: 0.9961791634559631,grad_norm: 0.9999993623211515, iteration: 46417
loss: 1.0293097496032715,grad_norm: 0.9999990816236366, iteration: 46418
loss: 1.0239779949188232,grad_norm: 0.952281994058564, iteration: 46419
loss: 0.9664024114608765,grad_norm: 0.854897164920542, iteration: 46420
loss: 1.0063656568527222,grad_norm: 0.8584529297934063, iteration: 46421
loss: 1.045224666595459,grad_norm: 0.9999991104386904, iteration: 46422
loss: 0.98490309715271,grad_norm: 0.8738918759801005, iteration: 46423
loss: 0.9627695083618164,grad_norm: 0.9999990469418729, iteration: 46424
loss: 0.9836430549621582,grad_norm: 0.9417350274351413, iteration: 46425
loss: 0.9983727931976318,grad_norm: 0.9999991765015566, iteration: 46426
loss: 0.977016031742096,grad_norm: 0.9938901358510952, iteration: 46427
loss: 0.9806469082832336,grad_norm: 0.8933954232706858, iteration: 46428
loss: 0.9943016767501831,grad_norm: 0.9999994735738916, iteration: 46429
loss: 1.0120563507080078,grad_norm: 0.9999989327208403, iteration: 46430
loss: 0.9573045372962952,grad_norm: 0.9999991488979916, iteration: 46431
loss: 1.0238889455795288,grad_norm: 0.999999215190861, iteration: 46432
loss: 0.977713406085968,grad_norm: 0.9999990281133436, iteration: 46433
loss: 1.0010626316070557,grad_norm: 0.9894100190313353, iteration: 46434
loss: 0.9944074153900146,grad_norm: 0.9331551742437796, iteration: 46435
loss: 0.9983922839164734,grad_norm: 0.9821438391951296, iteration: 46436
loss: 1.000051736831665,grad_norm: 0.9999992160943915, iteration: 46437
loss: 0.9973047375679016,grad_norm: 0.9999991382087129, iteration: 46438
loss: 1.0279464721679688,grad_norm: 0.944290982684279, iteration: 46439
loss: 1.0115517377853394,grad_norm: 0.9999989961945455, iteration: 46440
loss: 0.9466110467910767,grad_norm: 0.9999989810272071, iteration: 46441
loss: 1.0073373317718506,grad_norm: 0.9999990895031032, iteration: 46442
loss: 1.0154732465744019,grad_norm: 0.999999161625763, iteration: 46443
loss: 1.000747561454773,grad_norm: 0.9999991961443283, iteration: 46444
loss: 0.9929496645927429,grad_norm: 0.8469149276336291, iteration: 46445
loss: 0.9712344408035278,grad_norm: 0.9901841778461752, iteration: 46446
loss: 1.0313400030136108,grad_norm: 0.9999992768278352, iteration: 46447
loss: 1.005326509475708,grad_norm: 0.9278783415700097, iteration: 46448
loss: 1.0103797912597656,grad_norm: 0.8357978211960162, iteration: 46449
loss: 1.0063908100128174,grad_norm: 0.8743939601926531, iteration: 46450
loss: 0.9966508746147156,grad_norm: 0.9999991104644764, iteration: 46451
loss: 1.0011705160140991,grad_norm: 0.9999989999785902, iteration: 46452
loss: 0.9722726345062256,grad_norm: 0.9814578910299347, iteration: 46453
loss: 1.0412790775299072,grad_norm: 0.9999992390552789, iteration: 46454
loss: 0.9908380508422852,grad_norm: 0.9999993518711232, iteration: 46455
loss: 1.002312183380127,grad_norm: 0.9999990939566387, iteration: 46456
loss: 0.9962953925132751,grad_norm: 0.9742575611119051, iteration: 46457
loss: 0.9785370230674744,grad_norm: 0.9999994128359193, iteration: 46458
loss: 0.9707287549972534,grad_norm: 0.9999990841782052, iteration: 46459
loss: 1.0168755054473877,grad_norm: 0.9999990400262175, iteration: 46460
loss: 0.9402048587799072,grad_norm: 0.9999992761243605, iteration: 46461
loss: 1.0007810592651367,grad_norm: 0.8969252054758468, iteration: 46462
loss: 1.016394853591919,grad_norm: 0.9535706750568812, iteration: 46463
loss: 1.0282447338104248,grad_norm: 0.9203921634094728, iteration: 46464
loss: 1.0143018960952759,grad_norm: 0.8755766593648648, iteration: 46465
loss: 1.0000128746032715,grad_norm: 0.9999990971100844, iteration: 46466
loss: 0.998842716217041,grad_norm: 0.9999991171551393, iteration: 46467
loss: 1.0799287557601929,grad_norm: 0.9999993120481766, iteration: 46468
loss: 1.0250190496444702,grad_norm: 0.9999990985691946, iteration: 46469
loss: 1.0125676393508911,grad_norm: 0.9789357443476585, iteration: 46470
loss: 1.0660492181777954,grad_norm: 0.9999991347061575, iteration: 46471
loss: 0.9811580777168274,grad_norm: 0.9999992459600627, iteration: 46472
loss: 1.0114670991897583,grad_norm: 0.9999992800786126, iteration: 46473
loss: 0.9957650899887085,grad_norm: 0.9999991038294344, iteration: 46474
loss: 0.9948468208312988,grad_norm: 0.9615344316157839, iteration: 46475
loss: 0.9671063423156738,grad_norm: 0.9498801543683754, iteration: 46476
loss: 1.007333517074585,grad_norm: 0.9999990146141038, iteration: 46477
loss: 1.0028550624847412,grad_norm: 0.8460707360123213, iteration: 46478
loss: 1.0205111503601074,grad_norm: 0.9999991180981668, iteration: 46479
loss: 1.0153789520263672,grad_norm: 0.9999991090091837, iteration: 46480
loss: 0.9871600270271301,grad_norm: 0.9873807827149481, iteration: 46481
loss: 1.0175179243087769,grad_norm: 0.9999990953023782, iteration: 46482
loss: 1.0511269569396973,grad_norm: 0.9999990274075997, iteration: 46483
loss: 1.0108145475387573,grad_norm: 0.990918939298771, iteration: 46484
loss: 1.0329902172088623,grad_norm: 0.8052687001614325, iteration: 46485
loss: 1.0367845296859741,grad_norm: 0.9999992454489681, iteration: 46486
loss: 0.9594351053237915,grad_norm: 0.9999991913742521, iteration: 46487
loss: 0.9845961928367615,grad_norm: 0.9052233105558232, iteration: 46488
loss: 1.0121679306030273,grad_norm: 0.8721929786951415, iteration: 46489
loss: 1.006865382194519,grad_norm: 0.9838316276942994, iteration: 46490
loss: 0.9820261597633362,grad_norm: 0.9999992576579263, iteration: 46491
loss: 1.003416895866394,grad_norm: 0.9999991442387367, iteration: 46492
loss: 0.9870395660400391,grad_norm: 0.9999990380671094, iteration: 46493
loss: 1.016278862953186,grad_norm: 0.999998943482515, iteration: 46494
loss: 1.0252269506454468,grad_norm: 0.9999992078775044, iteration: 46495
loss: 1.050126075744629,grad_norm: 0.9999996517820415, iteration: 46496
loss: 1.0415921211242676,grad_norm: 0.9542492240828531, iteration: 46497
loss: 1.0255508422851562,grad_norm: 0.9167152384466776, iteration: 46498
loss: 1.011494517326355,grad_norm: 0.9999990855034171, iteration: 46499
loss: 0.9807341694831848,grad_norm: 0.9780775347282012, iteration: 46500
loss: 1.0458958148956299,grad_norm: 0.9537041007568097, iteration: 46501
loss: 1.0322771072387695,grad_norm: 0.9999990298737967, iteration: 46502
loss: 1.0239219665527344,grad_norm: 0.9705729103118348, iteration: 46503
loss: 1.0299322605133057,grad_norm: 0.9433984454870349, iteration: 46504
loss: 1.0042645931243896,grad_norm: 0.9088888292442432, iteration: 46505
loss: 1.0279957056045532,grad_norm: 0.9999990440954535, iteration: 46506
loss: 1.0113933086395264,grad_norm: 0.9999991655681247, iteration: 46507
loss: 1.0407177209854126,grad_norm: 0.929420447450324, iteration: 46508
loss: 1.0146390199661255,grad_norm: 0.8495447429725953, iteration: 46509
loss: 1.0185184478759766,grad_norm: 0.9999997756723666, iteration: 46510
loss: 1.0132567882537842,grad_norm: 0.8847842275196386, iteration: 46511
loss: 0.9815465807914734,grad_norm: 0.9999990479106666, iteration: 46512
loss: 1.0027199983596802,grad_norm: 0.9850322860277403, iteration: 46513
loss: 0.9688190221786499,grad_norm: 0.9629663289027203, iteration: 46514
loss: 1.0111970901489258,grad_norm: 0.9108727335180979, iteration: 46515
loss: 1.002242088317871,grad_norm: 0.9999990338577225, iteration: 46516
loss: 0.9883186221122742,grad_norm: 0.9999990794007662, iteration: 46517
loss: 1.000810146331787,grad_norm: 0.9561070245118038, iteration: 46518
loss: 1.0230042934417725,grad_norm: 0.9999990171767421, iteration: 46519
loss: 0.9415429830551147,grad_norm: 0.985466487895385, iteration: 46520
loss: 0.9839754700660706,grad_norm: 0.9999991731761213, iteration: 46521
loss: 1.0151057243347168,grad_norm: 0.9999991284899641, iteration: 46522
loss: 1.0034912824630737,grad_norm: 0.9999990659580961, iteration: 46523
loss: 1.0060385465621948,grad_norm: 0.9953704301816707, iteration: 46524
loss: 0.9914731979370117,grad_norm: 0.8403207966636973, iteration: 46525
loss: 1.0300356149673462,grad_norm: 0.9496981604428558, iteration: 46526
loss: 1.0306799411773682,grad_norm: 0.9999990709936551, iteration: 46527
loss: 1.0356698036193848,grad_norm: 0.9999992381730071, iteration: 46528
loss: 1.015613317489624,grad_norm: 0.9441928725346682, iteration: 46529
loss: 1.013218879699707,grad_norm: 0.9999991091706874, iteration: 46530
loss: 0.9879791140556335,grad_norm: 0.9213943688670246, iteration: 46531
loss: 0.9858981370925903,grad_norm: 0.9999995852464839, iteration: 46532
loss: 1.0548863410949707,grad_norm: 0.9212852629823095, iteration: 46533
loss: 0.9919947981834412,grad_norm: 0.9999991538615225, iteration: 46534
loss: 1.0257338285446167,grad_norm: 0.9999991361552196, iteration: 46535
loss: 1.0171077251434326,grad_norm: 0.9999991467070273, iteration: 46536
loss: 1.000817894935608,grad_norm: 0.9743514398821841, iteration: 46537
loss: 1.0196425914764404,grad_norm: 0.9999996459463565, iteration: 46538
loss: 0.9894303679466248,grad_norm: 0.9999991708795465, iteration: 46539
loss: 0.9714471101760864,grad_norm: 0.8940582857098877, iteration: 46540
loss: 1.0111089944839478,grad_norm: 0.9493228001862428, iteration: 46541
loss: 0.9928857684135437,grad_norm: 0.9622330783695391, iteration: 46542
loss: 1.0130116939544678,grad_norm: 0.8963492076071605, iteration: 46543
loss: 0.9971942901611328,grad_norm: 0.9862449702634802, iteration: 46544
loss: 0.9942734241485596,grad_norm: 0.9380323884202911, iteration: 46545
loss: 1.012851357460022,grad_norm: 0.9999990985746923, iteration: 46546
loss: 1.0339874029159546,grad_norm: 0.9999995430828867, iteration: 46547
loss: 0.9932618141174316,grad_norm: 0.9999992213207747, iteration: 46548
loss: 0.959025502204895,grad_norm: 0.9999991219497216, iteration: 46549
loss: 1.0590413808822632,grad_norm: 0.9999990633331757, iteration: 46550
loss: 1.0276075601577759,grad_norm: 0.999999244899875, iteration: 46551
loss: 0.9866398572921753,grad_norm: 0.9258380785756095, iteration: 46552
loss: 0.9907535314559937,grad_norm: 0.910837912600566, iteration: 46553
loss: 0.9960156083106995,grad_norm: 0.9999991277704099, iteration: 46554
loss: 0.9908351302146912,grad_norm: 0.9999993604484273, iteration: 46555
loss: 0.9987258911132812,grad_norm: 0.9781056023406379, iteration: 46556
loss: 1.016162395477295,grad_norm: 0.9999990735229959, iteration: 46557
loss: 0.9944044947624207,grad_norm: 0.9999990915177879, iteration: 46558
loss: 0.9774014353752136,grad_norm: 0.9999989989773711, iteration: 46559
loss: 1.019724726676941,grad_norm: 0.779078807540934, iteration: 46560
loss: 1.0111216306686401,grad_norm: 0.9391659083749523, iteration: 46561
loss: 1.0080742835998535,grad_norm: 0.9188014847285879, iteration: 46562
loss: 0.9830811023712158,grad_norm: 0.9999992358669464, iteration: 46563
loss: 0.9782431125640869,grad_norm: 0.7456977414166961, iteration: 46564
loss: 0.9795963764190674,grad_norm: 0.9148961472903654, iteration: 46565
loss: 0.9640586376190186,grad_norm: 0.9999992028555638, iteration: 46566
loss: 1.0073015689849854,grad_norm: 0.9999991339626928, iteration: 46567
loss: 0.9741448760032654,grad_norm: 0.9529437296465557, iteration: 46568
loss: 0.9569050669670105,grad_norm: 0.9999990390199498, iteration: 46569
loss: 0.9885631203651428,grad_norm: 0.9999989948919081, iteration: 46570
loss: 0.9594854116439819,grad_norm: 0.904666732175339, iteration: 46571
loss: 1.0295292139053345,grad_norm: 0.9999991306195457, iteration: 46572
loss: 1.0095421075820923,grad_norm: 0.9907390265364576, iteration: 46573
loss: 0.9918604493141174,grad_norm: 0.9999992286460917, iteration: 46574
loss: 0.9836001396179199,grad_norm: 0.8052611086869902, iteration: 46575
loss: 1.004030466079712,grad_norm: 0.9022117178036207, iteration: 46576
loss: 1.0009052753448486,grad_norm: 0.9999991628598865, iteration: 46577
loss: 0.9886637330055237,grad_norm: 0.9688267034384958, iteration: 46578
loss: 0.9490205645561218,grad_norm: 0.9156985508177696, iteration: 46579
loss: 1.0066313743591309,grad_norm: 0.9894252013249654, iteration: 46580
loss: 0.9912286400794983,grad_norm: 0.9840084910369937, iteration: 46581
loss: 1.0044746398925781,grad_norm: 0.9710910701053076, iteration: 46582
loss: 1.0130982398986816,grad_norm: 0.9999991921349808, iteration: 46583
loss: 0.9958135485649109,grad_norm: 0.9397199599136556, iteration: 46584
loss: 0.9725742340087891,grad_norm: 0.9999992604726091, iteration: 46585
loss: 1.0436476469039917,grad_norm: 0.8584928630654151, iteration: 46586
loss: 1.0038340091705322,grad_norm: 0.9251694775823902, iteration: 46587
loss: 0.9994229674339294,grad_norm: 0.9999991222989079, iteration: 46588
loss: 1.0045713186264038,grad_norm: 0.9999990161733238, iteration: 46589
loss: 0.9612040519714355,grad_norm: 0.9999991362851176, iteration: 46590
loss: 1.0251264572143555,grad_norm: 0.9999991502726487, iteration: 46591
loss: 0.9492205381393433,grad_norm: 0.9048186225994302, iteration: 46592
loss: 1.024492621421814,grad_norm: 0.99999930537391, iteration: 46593
loss: 1.0033067464828491,grad_norm: 0.9999998233324863, iteration: 46594
loss: 1.0295705795288086,grad_norm: 0.9152740374535829, iteration: 46595
loss: 0.9973723888397217,grad_norm: 0.9758560443195619, iteration: 46596
loss: 1.0337682962417603,grad_norm: 0.9999993409083822, iteration: 46597
loss: 0.9768169522285461,grad_norm: 0.8512085007803125, iteration: 46598
loss: 1.02079439163208,grad_norm: 0.9351075951458958, iteration: 46599
loss: 1.0118855237960815,grad_norm: 0.9975561600505632, iteration: 46600
loss: 1.0427900552749634,grad_norm: 0.9999995905638231, iteration: 46601
loss: 1.0048795938491821,grad_norm: 0.9572995808165463, iteration: 46602
loss: 0.9957937598228455,grad_norm: 0.9999991838667699, iteration: 46603
loss: 0.9868113398551941,grad_norm: 0.818540819480243, iteration: 46604
loss: 0.9757353663444519,grad_norm: 0.9999992062676824, iteration: 46605
loss: 1.033951759338379,grad_norm: 0.9571417985751416, iteration: 46606
loss: 0.9807726740837097,grad_norm: 0.9999992005310276, iteration: 46607
loss: 0.9815101027488708,grad_norm: 0.9252268922998619, iteration: 46608
loss: 1.062923789024353,grad_norm: 0.9999995520149629, iteration: 46609
loss: 0.9979729056358337,grad_norm: 0.9777328510172539, iteration: 46610
loss: 0.9908825755119324,grad_norm: 0.8727213851525727, iteration: 46611
loss: 1.0353449583053589,grad_norm: 0.9999996504480783, iteration: 46612
loss: 1.0135202407836914,grad_norm: 0.8470215025376623, iteration: 46613
loss: 0.9672188758850098,grad_norm: 0.9999990882688049, iteration: 46614
loss: 0.9746779799461365,grad_norm: 0.9999989043617824, iteration: 46615
loss: 1.0448869466781616,grad_norm: 0.9999991112478555, iteration: 46616
loss: 0.9781262278556824,grad_norm: 0.9999990403951725, iteration: 46617
loss: 0.9709425568580627,grad_norm: 0.9506897883513837, iteration: 46618
loss: 1.1141756772994995,grad_norm: 0.9999993822675018, iteration: 46619
loss: 1.0645822286605835,grad_norm: 0.9411758270622493, iteration: 46620
loss: 1.0709103345870972,grad_norm: 0.9999991715750686, iteration: 46621
loss: 1.015372633934021,grad_norm: 0.9999991610028784, iteration: 46622
loss: 0.9891340732574463,grad_norm: 0.9199345304511526, iteration: 46623
loss: 1.036317229270935,grad_norm: 0.9999997926836436, iteration: 46624
loss: 1.0411752462387085,grad_norm: 0.9999989483826032, iteration: 46625
loss: 0.9631548523902893,grad_norm: 0.9832623375829511, iteration: 46626
loss: 1.0291199684143066,grad_norm: 0.9999991344599543, iteration: 46627
loss: 0.9931091666221619,grad_norm: 0.9762857643940406, iteration: 46628
loss: 0.9758701324462891,grad_norm: 0.9999989716960738, iteration: 46629
loss: 1.0215486288070679,grad_norm: 0.9999991969265848, iteration: 46630
loss: 1.0032505989074707,grad_norm: 0.9113166259006928, iteration: 46631
loss: 0.9879797697067261,grad_norm: 0.9356409109479717, iteration: 46632
loss: 1.0011948347091675,grad_norm: 0.9999989756023825, iteration: 46633
loss: 0.9838787317276001,grad_norm: 0.9999990845534603, iteration: 46634
loss: 1.0349472761154175,grad_norm: 0.9999990858721333, iteration: 46635
loss: 1.0266821384429932,grad_norm: 0.9454624900223539, iteration: 46636
loss: 1.0013903379440308,grad_norm: 0.9999989819735121, iteration: 46637
loss: 0.980027437210083,grad_norm: 0.9999995519062695, iteration: 46638
loss: 1.0154303312301636,grad_norm: 0.9999991763768445, iteration: 46639
loss: 1.0006097555160522,grad_norm: 0.970086769418996, iteration: 46640
loss: 1.0218006372451782,grad_norm: 0.8578240849211641, iteration: 46641
loss: 1.0221455097198486,grad_norm: 0.9999990988199846, iteration: 46642
loss: 1.0024720430374146,grad_norm: 0.8824895289341472, iteration: 46643
loss: 1.0419148206710815,grad_norm: 0.999999120636713, iteration: 46644
loss: 0.9440937042236328,grad_norm: 0.9999991144321037, iteration: 46645
loss: 1.0144271850585938,grad_norm: 0.999999211496028, iteration: 46646
loss: 1.0137015581130981,grad_norm: 0.8479342931747158, iteration: 46647
loss: 0.979498565196991,grad_norm: 0.9999991781275329, iteration: 46648
loss: 1.0014818906784058,grad_norm: 0.9093053288829669, iteration: 46649
loss: 1.0215480327606201,grad_norm: 0.9868785949473221, iteration: 46650
loss: 0.9888688325881958,grad_norm: 0.999999129774203, iteration: 46651
loss: 1.009594202041626,grad_norm: 0.9999989737098112, iteration: 46652
loss: 1.0047377347946167,grad_norm: 0.9593941862482406, iteration: 46653
loss: 1.0304206609725952,grad_norm: 0.9999992125446117, iteration: 46654
loss: 1.0136680603027344,grad_norm: 0.8117553217868922, iteration: 46655
loss: 0.9829211235046387,grad_norm: 0.978430036113866, iteration: 46656
loss: 0.9601666927337646,grad_norm: 0.9162081100769267, iteration: 46657
loss: 1.0273290872573853,grad_norm: 0.9097613484992981, iteration: 46658
loss: 1.0031977891921997,grad_norm: 0.9999991126711655, iteration: 46659
loss: 0.9960253238677979,grad_norm: 0.9999998125190628, iteration: 46660
loss: 1.0364068746566772,grad_norm: 0.940253317837677, iteration: 46661
loss: 0.9896435141563416,grad_norm: 0.8940407747953018, iteration: 46662
loss: 0.9687458276748657,grad_norm: 0.9999991507160289, iteration: 46663
loss: 1.0039589405059814,grad_norm: 0.9999997239290157, iteration: 46664
loss: 0.9995321035385132,grad_norm: 0.9999990524937097, iteration: 46665
loss: 0.9802553653717041,grad_norm: 0.8896983880936146, iteration: 46666
loss: 0.9932971000671387,grad_norm: 0.8954534812513573, iteration: 46667
loss: 1.0206843614578247,grad_norm: 0.9999990417525092, iteration: 46668
loss: 1.0081853866577148,grad_norm: 0.8915248456102399, iteration: 46669
loss: 0.987949788570404,grad_norm: 0.9379469099178989, iteration: 46670
loss: 0.9976716041564941,grad_norm: 0.9947675267299708, iteration: 46671
loss: 1.0027785301208496,grad_norm: 0.7671999117119703, iteration: 46672
loss: 1.0274851322174072,grad_norm: 1.0000000975861747, iteration: 46673
loss: 0.9547059535980225,grad_norm: 0.974531742051202, iteration: 46674
loss: 1.022141456604004,grad_norm: 0.9999992385164086, iteration: 46675
loss: 1.0658237934112549,grad_norm: 0.9999993907832182, iteration: 46676
loss: 1.0452063083648682,grad_norm: 0.9999997474167883, iteration: 46677
loss: 1.018380880355835,grad_norm: 0.9999990356832182, iteration: 46678
loss: 1.0225082635879517,grad_norm: 0.9999994877301032, iteration: 46679
loss: 1.0054765939712524,grad_norm: 0.9215233594881845, iteration: 46680
loss: 1.0360743999481201,grad_norm: 0.9875578121257659, iteration: 46681
loss: 0.9843040108680725,grad_norm: 0.9999989179871422, iteration: 46682
loss: 1.0261250734329224,grad_norm: 0.9953046001237187, iteration: 46683
loss: 1.0155069828033447,grad_norm: 0.9999991111795754, iteration: 46684
loss: 0.9920818209648132,grad_norm: 0.9999991032510944, iteration: 46685
loss: 1.0186899900436401,grad_norm: 0.9999992499602931, iteration: 46686
loss: 1.032060980796814,grad_norm: 0.9888313126302053, iteration: 46687
loss: 0.987079918384552,grad_norm: 0.9999990412876896, iteration: 46688
loss: 1.0309540033340454,grad_norm: 0.9999990833767405, iteration: 46689
loss: 1.0263863801956177,grad_norm: 0.9999993914567429, iteration: 46690
loss: 1.0112671852111816,grad_norm: 0.9999991790085255, iteration: 46691
loss: 1.000888466835022,grad_norm: 0.9999990757059735, iteration: 46692
loss: 1.026320219039917,grad_norm: 0.9999989599784652, iteration: 46693
loss: 0.9949257969856262,grad_norm: 0.9999989920657439, iteration: 46694
loss: 1.0225805044174194,grad_norm: 0.920006135432711, iteration: 46695
loss: 1.019590139389038,grad_norm: 0.9999992130386707, iteration: 46696
loss: 1.0663083791732788,grad_norm: 0.9696151056263637, iteration: 46697
loss: 1.0131171941757202,grad_norm: 0.9999991835623656, iteration: 46698
loss: 1.0709973573684692,grad_norm: 0.9999995901011797, iteration: 46699
loss: 1.0222465991973877,grad_norm: 0.9674025103171819, iteration: 46700
loss: 1.0307248830795288,grad_norm: 0.999999302139739, iteration: 46701
loss: 1.0057446956634521,grad_norm: 0.9999990831147478, iteration: 46702
loss: 0.9779715538024902,grad_norm: 0.8927287564875166, iteration: 46703
loss: 1.046507716178894,grad_norm: 0.999999297399545, iteration: 46704
loss: 1.0004163980484009,grad_norm: 0.9851557202238972, iteration: 46705
loss: 0.9988685846328735,grad_norm: 0.9157574825157749, iteration: 46706
loss: 1.0330862998962402,grad_norm: 0.9999990371340868, iteration: 46707
loss: 0.954535961151123,grad_norm: 0.9201308950152342, iteration: 46708
loss: 1.0255910158157349,grad_norm: 0.9999989099914378, iteration: 46709
loss: 1.0240696668624878,grad_norm: 0.9999990737467834, iteration: 46710
loss: 1.0101971626281738,grad_norm: 0.999999361866951, iteration: 46711
loss: 1.0216336250305176,grad_norm: 0.9643336123580702, iteration: 46712
loss: 1.0028551816940308,grad_norm: 0.9999991969196885, iteration: 46713
loss: 1.0169757604599,grad_norm: 0.9999990687360595, iteration: 46714
loss: 1.0011667013168335,grad_norm: 0.981280930631454, iteration: 46715
loss: 1.0164794921875,grad_norm: 0.9647283356506374, iteration: 46716
loss: 0.9837104082107544,grad_norm: 0.9999991361814743, iteration: 46717
loss: 1.0288063287734985,grad_norm: 0.9061411793843849, iteration: 46718
loss: 0.9636950492858887,grad_norm: 0.9912452883244302, iteration: 46719
loss: 0.973189651966095,grad_norm: 0.9677484765608841, iteration: 46720
loss: 1.0121829509735107,grad_norm: 0.9529628468623957, iteration: 46721
loss: 1.0051580667495728,grad_norm: 0.9553571547679505, iteration: 46722
loss: 0.9964141249656677,grad_norm: 0.8799179239672243, iteration: 46723
loss: 1.0419137477874756,grad_norm: 0.9786223475939498, iteration: 46724
loss: 0.9969012141227722,grad_norm: 0.9999990040323153, iteration: 46725
loss: 0.9976750016212463,grad_norm: 0.8353906899482072, iteration: 46726
loss: 0.9986306428909302,grad_norm: 0.8911846495892852, iteration: 46727
loss: 1.0255248546600342,grad_norm: 0.9129780433415345, iteration: 46728
loss: 1.0580251216888428,grad_norm: 0.999999344688907, iteration: 46729
loss: 1.0006651878356934,grad_norm: 0.8446033831004268, iteration: 46730
loss: 0.995396077632904,grad_norm: 0.9999990505800966, iteration: 46731
loss: 1.0197933912277222,grad_norm: 0.9999992035033861, iteration: 46732
loss: 1.0588464736938477,grad_norm: 0.9269336002568745, iteration: 46733
loss: 1.0238666534423828,grad_norm: 0.9999992546468508, iteration: 46734
loss: 1.0122981071472168,grad_norm: 0.9640940778772843, iteration: 46735
loss: 1.0018175840377808,grad_norm: 0.9859282606986683, iteration: 46736
loss: 1.0325344800949097,grad_norm: 0.9396686333004189, iteration: 46737
loss: 0.9793698191642761,grad_norm: 0.9735471742473443, iteration: 46738
loss: 1.038187026977539,grad_norm: 0.9999989169720545, iteration: 46739
loss: 0.9905561208724976,grad_norm: 0.8570204116980027, iteration: 46740
loss: 0.9949153661727905,grad_norm: 0.9999991005320685, iteration: 46741
loss: 0.9917624592781067,grad_norm: 0.9999991728120855, iteration: 46742
loss: 0.983307957649231,grad_norm: 0.9957787095414087, iteration: 46743
loss: 0.9810435175895691,grad_norm: 0.8936927472328309, iteration: 46744
loss: 0.9770646691322327,grad_norm: 0.8463681357931574, iteration: 46745
loss: 0.9987590909004211,grad_norm: 0.9999991719614044, iteration: 46746
loss: 1.0024685859680176,grad_norm: 0.9029170715474752, iteration: 46747
loss: 0.9914628267288208,grad_norm: 0.9189084076585116, iteration: 46748
loss: 1.0287537574768066,grad_norm: 0.9999993129116248, iteration: 46749
loss: 0.9973680973052979,grad_norm: 0.8459493642474915, iteration: 46750
loss: 0.9711288213729858,grad_norm: 0.9917165443317129, iteration: 46751
loss: 0.9862615466117859,grad_norm: 0.9750225831276976, iteration: 46752
loss: 0.9880912899971008,grad_norm: 0.9067477660803569, iteration: 46753
loss: 1.0023961067199707,grad_norm: 0.9999989748437472, iteration: 46754
loss: 1.029402732849121,grad_norm: 0.9140911990412065, iteration: 46755
loss: 1.061632752418518,grad_norm: 0.9286546201497474, iteration: 46756
loss: 1.0473555326461792,grad_norm: 0.9621436670025912, iteration: 46757
loss: 1.0061371326446533,grad_norm: 0.9999990730505587, iteration: 46758
loss: 1.0085219144821167,grad_norm: 0.9999991793795551, iteration: 46759
loss: 1.006313443183899,grad_norm: 0.9999991550183589, iteration: 46760
loss: 0.9800786972045898,grad_norm: 0.9999992438770896, iteration: 46761
loss: 1.0294541120529175,grad_norm: 0.9999992571852399, iteration: 46762
loss: 1.0356929302215576,grad_norm: 0.9999990549901842, iteration: 46763
loss: 1.0037577152252197,grad_norm: 0.8831383517144522, iteration: 46764
loss: 1.0117226839065552,grad_norm: 0.9999991197181329, iteration: 46765
loss: 1.0255995988845825,grad_norm: 0.9999997304455368, iteration: 46766
loss: 1.0001296997070312,grad_norm: 0.9013663478027099, iteration: 46767
loss: 1.0258821249008179,grad_norm: 0.891713387184598, iteration: 46768
loss: 0.9764714241027832,grad_norm: 0.9999990303449331, iteration: 46769
loss: 1.021864414215088,grad_norm: 0.9999991450522595, iteration: 46770
loss: 0.9590159058570862,grad_norm: 0.9999990118829981, iteration: 46771
loss: 0.9888989925384521,grad_norm: 0.9999992912092919, iteration: 46772
loss: 0.9852132201194763,grad_norm: 0.9999993715092793, iteration: 46773
loss: 1.0047729015350342,grad_norm: 0.8569164308505027, iteration: 46774
loss: 1.0139987468719482,grad_norm: 0.9999989963339041, iteration: 46775
loss: 1.0201832056045532,grad_norm: 0.9999990335200354, iteration: 46776
loss: 1.028827428817749,grad_norm: 0.9999991688033018, iteration: 46777
loss: 1.0061429738998413,grad_norm: 0.999999132114607, iteration: 46778
loss: 1.000092625617981,grad_norm: 0.9999990845565475, iteration: 46779
loss: 1.0050194263458252,grad_norm: 0.9878034397806911, iteration: 46780
loss: 1.0069501399993896,grad_norm: 0.8422165101530571, iteration: 46781
loss: 1.0002565383911133,grad_norm: 0.9999991668828075, iteration: 46782
loss: 0.9993515014648438,grad_norm: 0.9999991642721161, iteration: 46783
loss: 1.0412708520889282,grad_norm: 0.9999991595756387, iteration: 46784
loss: 0.9368667602539062,grad_norm: 0.9999989805244719, iteration: 46785
loss: 1.0142444372177124,grad_norm: 0.796202537620905, iteration: 46786
loss: 1.0275599956512451,grad_norm: 0.9627836836767072, iteration: 46787
loss: 0.9794763326644897,grad_norm: 0.9995479578992787, iteration: 46788
loss: 1.0258370637893677,grad_norm: 0.9999990708698262, iteration: 46789
loss: 0.9697099328041077,grad_norm: 0.9999992449418996, iteration: 46790
loss: 1.0306692123413086,grad_norm: 0.9999990925896451, iteration: 46791
loss: 1.0597243309020996,grad_norm: 0.9999993260469396, iteration: 46792
loss: 1.0045005083084106,grad_norm: 0.941554753441915, iteration: 46793
loss: 1.0375139713287354,grad_norm: 0.8957565414011625, iteration: 46794
loss: 1.066985011100769,grad_norm: 0.9999993078806045, iteration: 46795
loss: 1.0501295328140259,grad_norm: 0.9753198875963245, iteration: 46796
loss: 0.9954398274421692,grad_norm: 0.9999991657092154, iteration: 46797
loss: 1.0105353593826294,grad_norm: 0.9501403889938661, iteration: 46798
loss: 1.034511685371399,grad_norm: 0.9239133036074599, iteration: 46799
loss: 0.9973562955856323,grad_norm: 0.911908263988973, iteration: 46800
loss: 0.9821931719779968,grad_norm: 0.988331014207137, iteration: 46801
loss: 1.024731159210205,grad_norm: 0.9999989658646183, iteration: 46802
loss: 1.0372697114944458,grad_norm: 0.8873145053402144, iteration: 46803
loss: 0.9752309322357178,grad_norm: 0.8608173996276594, iteration: 46804
loss: 1.0081849098205566,grad_norm: 0.999999126430404, iteration: 46805
loss: 1.017253041267395,grad_norm: 0.9316856068998315, iteration: 46806
loss: 1.0117745399475098,grad_norm: 0.8603947892722644, iteration: 46807
loss: 0.973667562007904,grad_norm: 0.9999989206446109, iteration: 46808
loss: 0.9705507159233093,grad_norm: 0.8873493237159962, iteration: 46809
loss: 0.9972111582756042,grad_norm: 0.9999992489952539, iteration: 46810
loss: 0.9981014728546143,grad_norm: 0.9999991138934426, iteration: 46811
loss: 1.0025968551635742,grad_norm: 0.9999991990939395, iteration: 46812
loss: 0.9914886951446533,grad_norm: 0.9999989583375412, iteration: 46813
loss: 1.0212944746017456,grad_norm: 0.9999991895647585, iteration: 46814
loss: 1.0103448629379272,grad_norm: 0.9445567547230077, iteration: 46815
loss: 0.9526869654655457,grad_norm: 0.9988589991051721, iteration: 46816
loss: 1.0277601480484009,grad_norm: 0.9419711217889879, iteration: 46817
loss: 1.0025818347930908,grad_norm: 0.8754686331334344, iteration: 46818
loss: 1.0430419445037842,grad_norm: 0.9999994309390076, iteration: 46819
loss: 1.0195884704589844,grad_norm: 0.9999992218656221, iteration: 46820
loss: 1.0048702955245972,grad_norm: 0.9999990424831534, iteration: 46821
loss: 1.0322574377059937,grad_norm: 0.9999994406900832, iteration: 46822
loss: 1.0022552013397217,grad_norm: 0.8202771374837929, iteration: 46823
loss: 1.0132036209106445,grad_norm: 0.9999991261901042, iteration: 46824
loss: 1.0046436786651611,grad_norm: 0.8919027755033921, iteration: 46825
loss: 1.041089653968811,grad_norm: 0.9867817096477314, iteration: 46826
loss: 0.9972707033157349,grad_norm: 0.8896646911253852, iteration: 46827
loss: 0.9988189339637756,grad_norm: 0.9999989990283727, iteration: 46828
loss: 0.9928613901138306,grad_norm: 0.9999992439488563, iteration: 46829
loss: 0.9949511289596558,grad_norm: 0.9999990407372829, iteration: 46830
loss: 1.0370194911956787,grad_norm: 0.9077421871183415, iteration: 46831
loss: 1.0022341012954712,grad_norm: 0.9216021898699468, iteration: 46832
loss: 0.9753564596176147,grad_norm: 0.9999991609956302, iteration: 46833
loss: 1.0300689935684204,grad_norm: 0.9999995749486358, iteration: 46834
loss: 1.0114641189575195,grad_norm: 0.9999991506712538, iteration: 46835
loss: 0.9872339963912964,grad_norm: 0.9999988494944191, iteration: 46836
loss: 0.989851176738739,grad_norm: 0.9999992291628054, iteration: 46837
loss: 1.01603364944458,grad_norm: 0.8613230935497687, iteration: 46838
loss: 1.008315920829773,grad_norm: 0.9368970867217444, iteration: 46839
loss: 1.0266389846801758,grad_norm: 0.9999991926739088, iteration: 46840
loss: 1.0062313079833984,grad_norm: 0.999999096811535, iteration: 46841
loss: 1.0148067474365234,grad_norm: 0.9516751830463465, iteration: 46842
loss: 0.9840099215507507,grad_norm: 0.9999990843219253, iteration: 46843
loss: 0.9482757449150085,grad_norm: 0.9999990871608866, iteration: 46844
loss: 1.0212346315383911,grad_norm: 0.9755748886235434, iteration: 46845
loss: 1.0096410512924194,grad_norm: 0.9772145445774638, iteration: 46846
loss: 1.0125699043273926,grad_norm: 0.9843298162550745, iteration: 46847
loss: 0.9980787634849548,grad_norm: 0.8238866412867043, iteration: 46848
loss: 0.9926746487617493,grad_norm: 0.9077036951452694, iteration: 46849
loss: 1.0248064994812012,grad_norm: 0.9815168628916824, iteration: 46850
loss: 1.018075704574585,grad_norm: 0.9578599422723293, iteration: 46851
loss: 1.0722393989562988,grad_norm: 0.9999993255650497, iteration: 46852
loss: 1.021260380744934,grad_norm: 0.9999990755285478, iteration: 46853
loss: 1.0100173950195312,grad_norm: 0.9999990591127861, iteration: 46854
loss: 1.003673791885376,grad_norm: 0.8992131357879996, iteration: 46855
loss: 1.0135910511016846,grad_norm: 0.8544493548946015, iteration: 46856
loss: 0.9988999962806702,grad_norm: 0.9999989407163359, iteration: 46857
loss: 1.0441789627075195,grad_norm: 0.9999989702045303, iteration: 46858
loss: 0.9644228219985962,grad_norm: 0.9769747586819937, iteration: 46859
loss: 1.002427339553833,grad_norm: 0.832566891879562, iteration: 46860
loss: 0.9926719665527344,grad_norm: 0.920666507973841, iteration: 46861
loss: 1.0197476148605347,grad_norm: 0.9999990461119808, iteration: 46862
loss: 1.0262783765792847,grad_norm: 0.9993632135609208, iteration: 46863
loss: 0.9897738099098206,grad_norm: 0.9999992005556789, iteration: 46864
loss: 0.9946295022964478,grad_norm: 0.9656967841176278, iteration: 46865
loss: 0.9696323871612549,grad_norm: 0.9999989939045866, iteration: 46866
loss: 0.9783103466033936,grad_norm: 0.9634945562287653, iteration: 46867
loss: 1.0224913358688354,grad_norm: 0.9999992609227654, iteration: 46868
loss: 0.9962342977523804,grad_norm: 0.9999992193676731, iteration: 46869
loss: 1.0165109634399414,grad_norm: 0.9999993179386748, iteration: 46870
loss: 1.004032015800476,grad_norm: 0.864483164694087, iteration: 46871
loss: 1.038802146911621,grad_norm: 0.8835866108216651, iteration: 46872
loss: 1.0378906726837158,grad_norm: 0.9366681505648088, iteration: 46873
loss: 0.9916332364082336,grad_norm: 0.9677146738863308, iteration: 46874
loss: 1.008834719657898,grad_norm: 0.8343678097168675, iteration: 46875
loss: 1.0325204133987427,grad_norm: 0.9050393395938572, iteration: 46876
loss: 0.9913569092750549,grad_norm: 0.8625738790597323, iteration: 46877
loss: 0.9798071384429932,grad_norm: 0.9142226772175273, iteration: 46878
loss: 1.010699987411499,grad_norm: 0.9955877479282333, iteration: 46879
loss: 1.0295661687850952,grad_norm: 0.9999990593303031, iteration: 46880
loss: 1.0028139352798462,grad_norm: 0.9999991486907026, iteration: 46881
loss: 1.0273082256317139,grad_norm: 0.8678737765586718, iteration: 46882
loss: 1.0281981229782104,grad_norm: 0.9999989794627266, iteration: 46883
loss: 1.0019567012786865,grad_norm: 0.9999989866444922, iteration: 46884
loss: 0.9954517483711243,grad_norm: 0.9138324396733836, iteration: 46885
loss: 0.9916890263557434,grad_norm: 0.8918644742257721, iteration: 46886
loss: 0.981541097164154,grad_norm: 0.9122019163466479, iteration: 46887
loss: 0.9927682280540466,grad_norm: 0.9999990178955552, iteration: 46888
loss: 1.0034221410751343,grad_norm: 0.8545679300403383, iteration: 46889
loss: 1.0069985389709473,grad_norm: 0.9774400319120057, iteration: 46890
loss: 1.0123865604400635,grad_norm: 0.9999991148062274, iteration: 46891
loss: 0.9950477480888367,grad_norm: 0.9619768773010378, iteration: 46892
loss: 1.0011464357376099,grad_norm: 0.9999993600997024, iteration: 46893
loss: 1.0357911586761475,grad_norm: 0.9999991074254433, iteration: 46894
loss: 0.9361473917961121,grad_norm: 0.9354187641941196, iteration: 46895
loss: 0.9848243594169617,grad_norm: 0.9999990677385303, iteration: 46896
loss: 0.967471718788147,grad_norm: 0.9999991008930719, iteration: 46897
loss: 0.986335039138794,grad_norm: 0.9999991519957002, iteration: 46898
loss: 1.024510383605957,grad_norm: 0.9765178911105169, iteration: 46899
loss: 1.0124579668045044,grad_norm: 0.8432235248825324, iteration: 46900
loss: 1.0189789533615112,grad_norm: 0.9659487673047998, iteration: 46901
loss: 0.9855483770370483,grad_norm: 0.964609652474991, iteration: 46902
loss: 1.0163559913635254,grad_norm: 0.9999992034618207, iteration: 46903
loss: 1.007059097290039,grad_norm: 0.9999990277293223, iteration: 46904
loss: 0.9761068224906921,grad_norm: 0.7929393825861861, iteration: 46905
loss: 1.0121508836746216,grad_norm: 0.9272896180763458, iteration: 46906
loss: 0.9702865481376648,grad_norm: 0.9183687403912418, iteration: 46907
loss: 0.9999560713768005,grad_norm: 0.9471385875442683, iteration: 46908
loss: 1.004948377609253,grad_norm: 0.9360223074807182, iteration: 46909
loss: 0.9949979186058044,grad_norm: 0.8846481348151389, iteration: 46910
loss: 1.0422947406768799,grad_norm: 0.9999988870050518, iteration: 46911
loss: 1.0204222202301025,grad_norm: 0.9999991970745149, iteration: 46912
loss: 0.9907302856445312,grad_norm: 0.9974923398157669, iteration: 46913
loss: 0.9917030930519104,grad_norm: 0.9999991152915183, iteration: 46914
loss: 1.0302643775939941,grad_norm: 0.9504739006428816, iteration: 46915
loss: 0.9825079441070557,grad_norm: 0.9252817741519425, iteration: 46916
loss: 1.0280065536499023,grad_norm: 0.9600849326131405, iteration: 46917
loss: 0.994364321231842,grad_norm: 0.8035026892686807, iteration: 46918
loss: 1.013974905014038,grad_norm: 0.9999991187843967, iteration: 46919
loss: 1.011793851852417,grad_norm: 0.9999991243319512, iteration: 46920
loss: 1.025830864906311,grad_norm: 0.9145445255997723, iteration: 46921
loss: 0.9900881052017212,grad_norm: 0.9999992261658781, iteration: 46922
loss: 1.0299452543258667,grad_norm: 0.8205896251017168, iteration: 46923
loss: 1.0177276134490967,grad_norm: 0.9915493089875312, iteration: 46924
loss: 1.0175024271011353,grad_norm: 0.9599291794785726, iteration: 46925
loss: 1.0090209245681763,grad_norm: 0.9999996932414553, iteration: 46926
loss: 1.0275896787643433,grad_norm: 0.999999118487113, iteration: 46927
loss: 0.9578250050544739,grad_norm: 0.9518081504522407, iteration: 46928
loss: 1.025511384010315,grad_norm: 0.9154108854172179, iteration: 46929
loss: 0.997478723526001,grad_norm: 0.9245101663687213, iteration: 46930
loss: 0.9667975306510925,grad_norm: 0.9999991274759484, iteration: 46931
loss: 1.00761079788208,grad_norm: 0.9999991660148351, iteration: 46932
loss: 0.9838317632675171,grad_norm: 0.9458843124134734, iteration: 46933
loss: 1.0180073976516724,grad_norm: 0.8290555822813377, iteration: 46934
loss: 0.9880409240722656,grad_norm: 0.9999991612916432, iteration: 46935
loss: 0.9591234922409058,grad_norm: 0.9627050040470319, iteration: 46936
loss: 0.9839051961898804,grad_norm: 0.8285055141250239, iteration: 46937
loss: 1.0018162727355957,grad_norm: 0.9999991326322482, iteration: 46938
loss: 0.9871775507926941,grad_norm: 0.9294855431597107, iteration: 46939
loss: 0.9843961000442505,grad_norm: 0.9314597704033976, iteration: 46940
loss: 0.9740719795227051,grad_norm: 0.9780431584920153, iteration: 46941
loss: 0.9989320635795593,grad_norm: 0.9342595051898394, iteration: 46942
loss: 1.0229891538619995,grad_norm: 0.9349513760924757, iteration: 46943
loss: 1.01706862449646,grad_norm: 0.9773317709074981, iteration: 46944
loss: 0.9789709448814392,grad_norm: 0.8794524618040542, iteration: 46945
loss: 0.996296226978302,grad_norm: 0.7843649869556862, iteration: 46946
loss: 0.9797967076301575,grad_norm: 0.8250913747258808, iteration: 46947
loss: 1.016492486000061,grad_norm: 0.9776560117773866, iteration: 46948
loss: 0.9941808581352234,grad_norm: 0.8992431351128405, iteration: 46949
loss: 1.008447527885437,grad_norm: 0.9999989736085487, iteration: 46950
loss: 0.9516554474830627,grad_norm: 0.9625660804459496, iteration: 46951
loss: 0.993084728717804,grad_norm: 0.982007359871185, iteration: 46952
loss: 0.9950056076049805,grad_norm: 0.9388892092593297, iteration: 46953
loss: 1.05103600025177,grad_norm: 0.9531434578733907, iteration: 46954
loss: 1.0327435731887817,grad_norm: 0.8938081515587404, iteration: 46955
loss: 0.9900397658348083,grad_norm: 0.7814240818078176, iteration: 46956
loss: 1.0364044904708862,grad_norm: 0.9799585735893195, iteration: 46957
loss: 0.9914182424545288,grad_norm: 0.9899981040867671, iteration: 46958
loss: 1.044245958328247,grad_norm: 0.9698419247421994, iteration: 46959
loss: 1.0066413879394531,grad_norm: 0.9999991797504793, iteration: 46960
loss: 1.0328798294067383,grad_norm: 0.9000572048089365, iteration: 46961
loss: 1.0165996551513672,grad_norm: 0.9340457209758893, iteration: 46962
loss: 1.0376458168029785,grad_norm: 0.9999990823140359, iteration: 46963
loss: 1.0160775184631348,grad_norm: 0.9109076168806902, iteration: 46964
loss: 1.0150372982025146,grad_norm: 0.999999206443698, iteration: 46965
loss: 1.0165164470672607,grad_norm: 0.9439559680321117, iteration: 46966
loss: 1.0163202285766602,grad_norm: 0.9999989431853873, iteration: 46967
loss: 1.001373052597046,grad_norm: 0.9999990465003212, iteration: 46968
loss: 1.0477091073989868,grad_norm: 0.9192690735312263, iteration: 46969
loss: 0.9875190854072571,grad_norm: 0.9126339573071742, iteration: 46970
loss: 1.013386607170105,grad_norm: 0.9922335366537984, iteration: 46971
loss: 1.0043847560882568,grad_norm: 0.9851732264878699, iteration: 46972
loss: 1.021008849143982,grad_norm: 0.9999990923327841, iteration: 46973
loss: 0.981088399887085,grad_norm: 0.8856016258688707, iteration: 46974
loss: 1.006906509399414,grad_norm: 0.9469835880164802, iteration: 46975
loss: 0.992975115776062,grad_norm: 0.9999992311213783, iteration: 46976
loss: 1.0158330202102661,grad_norm: 0.999999075646306, iteration: 46977
loss: 0.9956763982772827,grad_norm: 0.9999990883331515, iteration: 46978
loss: 0.9916828870773315,grad_norm: 0.8787700295452777, iteration: 46979
loss: 0.9410215020179749,grad_norm: 0.9110643500192416, iteration: 46980
loss: 0.9867025017738342,grad_norm: 0.9999991240550274, iteration: 46981
loss: 1.0089396238327026,grad_norm: 0.9999991251898029, iteration: 46982
loss: 1.0128734111785889,grad_norm: 0.9926477488837868, iteration: 46983
loss: 0.9926623702049255,grad_norm: 0.8954880959133126, iteration: 46984
loss: 1.0176602602005005,grad_norm: 0.999999102868785, iteration: 46985
loss: 1.01828134059906,grad_norm: 0.9999992366574563, iteration: 46986
loss: 1.0004849433898926,grad_norm: 0.9576900146257601, iteration: 46987
loss: 1.029184103012085,grad_norm: 0.9873080011574025, iteration: 46988
loss: 1.0183840990066528,grad_norm: 0.9999989080568238, iteration: 46989
loss: 0.9915456771850586,grad_norm: 0.917946851463164, iteration: 46990
loss: 1.0104742050170898,grad_norm: 0.9559479154911372, iteration: 46991
loss: 1.025858998298645,grad_norm: 0.9999991427207402, iteration: 46992
loss: 1.0323117971420288,grad_norm: 0.9999991674826603, iteration: 46993
loss: 1.0089834928512573,grad_norm: 0.9999990948499963, iteration: 46994
loss: 0.9739295840263367,grad_norm: 0.9999991957771122, iteration: 46995
loss: 0.9973912239074707,grad_norm: 0.9999990548611499, iteration: 46996
loss: 0.998802900314331,grad_norm: 0.9912558689443285, iteration: 46997
loss: 1.052039384841919,grad_norm: 0.9029661552877896, iteration: 46998
loss: 1.0330909490585327,grad_norm: 0.9590821947933426, iteration: 46999
loss: 1.0399807691574097,grad_norm: 0.9999991539244645, iteration: 47000
loss: 1.0402823686599731,grad_norm: 0.9808760306723476, iteration: 47001
loss: 1.0310463905334473,grad_norm: 0.9999991045775638, iteration: 47002
loss: 0.9877356290817261,grad_norm: 0.9999992778768283, iteration: 47003
loss: 1.015425682067871,grad_norm: 0.967099750506819, iteration: 47004
loss: 1.0165739059448242,grad_norm: 0.913739029411158, iteration: 47005
loss: 1.004377841949463,grad_norm: 0.9999991339303046, iteration: 47006
loss: 1.0136054754257202,grad_norm: 0.9999990909523446, iteration: 47007
loss: 1.0043808221817017,grad_norm: 0.9628912398822931, iteration: 47008
loss: 0.9934188723564148,grad_norm: 0.9629561204609853, iteration: 47009
loss: 1.0051435232162476,grad_norm: 0.9999991092732723, iteration: 47010
loss: 1.0203273296356201,grad_norm: 0.9714991680216566, iteration: 47011
loss: 0.9753051400184631,grad_norm: 0.999998975571418, iteration: 47012
loss: 1.01605224609375,grad_norm: 0.993558875319025, iteration: 47013
loss: 0.9764357209205627,grad_norm: 0.8894348007778158, iteration: 47014
loss: 1.0232645273208618,grad_norm: 0.9999990599063032, iteration: 47015
loss: 0.9953396320343018,grad_norm: 0.9956437813527501, iteration: 47016
loss: 1.01180100440979,grad_norm: 0.974496713392414, iteration: 47017
loss: 0.9911112785339355,grad_norm: 0.9999992287074225, iteration: 47018
loss: 0.9974521994590759,grad_norm: 0.9417121314061184, iteration: 47019
loss: 0.9794101715087891,grad_norm: 0.9999990055971852, iteration: 47020
loss: 0.9919902682304382,grad_norm: 0.9999990443651128, iteration: 47021
loss: 1.0023014545440674,grad_norm: 0.9999992429377924, iteration: 47022
loss: 1.0229421854019165,grad_norm: 0.9051002134926062, iteration: 47023
loss: 1.008598804473877,grad_norm: 0.9999991243748269, iteration: 47024
loss: 1.0137882232666016,grad_norm: 0.9668163303813472, iteration: 47025
loss: 0.9950169324874878,grad_norm: 0.8926475094188319, iteration: 47026
loss: 0.9910586476325989,grad_norm: 0.984079918289147, iteration: 47027
loss: 0.9914552569389343,grad_norm: 0.9999992149172057, iteration: 47028
loss: 1.0178877115249634,grad_norm: 0.9999991475372436, iteration: 47029
loss: 1.032315969467163,grad_norm: 0.9999991427491037, iteration: 47030
loss: 1.015292763710022,grad_norm: 0.9999990799702316, iteration: 47031
loss: 1.0148522853851318,grad_norm: 0.7912662278481994, iteration: 47032
loss: 0.989704430103302,grad_norm: 0.9999990451340194, iteration: 47033
loss: 0.9879260659217834,grad_norm: 0.9665802507967501, iteration: 47034
loss: 1.0078532695770264,grad_norm: 0.9379733292376774, iteration: 47035
loss: 1.0422087907791138,grad_norm: 0.9999992843999738, iteration: 47036
loss: 0.966492772102356,grad_norm: 0.82092205837417, iteration: 47037
loss: 1.0046346187591553,grad_norm: 0.999999129704853, iteration: 47038
loss: 1.010749101638794,grad_norm: 0.9999990521349186, iteration: 47039
loss: 0.9896736145019531,grad_norm: 0.9999989856423197, iteration: 47040
loss: 1.037662386894226,grad_norm: 0.8608916805801108, iteration: 47041
loss: 0.9935569167137146,grad_norm: 0.7563884878123122, iteration: 47042
loss: 0.9966974854469299,grad_norm: 0.9451077047351935, iteration: 47043
loss: 0.989301323890686,grad_norm: 0.9505115156616128, iteration: 47044
loss: 0.9839950799942017,grad_norm: 0.9433189748283227, iteration: 47045
loss: 1.0369741916656494,grad_norm: 0.9999991819938793, iteration: 47046
loss: 0.9903194904327393,grad_norm: 0.8900210058378981, iteration: 47047
loss: 1.031018853187561,grad_norm: 0.879759755518644, iteration: 47048
loss: 0.9722921252250671,grad_norm: 0.9752345806532571, iteration: 47049
loss: 0.9923935532569885,grad_norm: 0.8991613474572647, iteration: 47050
loss: 0.9664772748947144,grad_norm: 0.9013562458617286, iteration: 47051
loss: 1.017126441001892,grad_norm: 0.8990083894145313, iteration: 47052
loss: 1.0155385732650757,grad_norm: 0.9999990890089334, iteration: 47053
loss: 1.0263361930847168,grad_norm: 0.7509124138909988, iteration: 47054
loss: 1.0140843391418457,grad_norm: 0.9387691952212431, iteration: 47055
loss: 1.0015666484832764,grad_norm: 0.9999992018321818, iteration: 47056
loss: 1.008488416671753,grad_norm: 0.9447278198934588, iteration: 47057
loss: 0.9823153614997864,grad_norm: 0.9794823841800631, iteration: 47058
loss: 0.9891780018806458,grad_norm: 0.9999992780784234, iteration: 47059
loss: 0.9788604378700256,grad_norm: 0.9838448810561384, iteration: 47060
loss: 1.046126365661621,grad_norm: 0.9716303477438929, iteration: 47061
loss: 0.9778790473937988,grad_norm: 0.9333379381183753, iteration: 47062
loss: 0.9719048738479614,grad_norm: 0.9999991781258933, iteration: 47063
loss: 1.0336346626281738,grad_norm: 0.9999991943414351, iteration: 47064
loss: 0.9961990714073181,grad_norm: 0.9591647102236157, iteration: 47065
loss: 0.9855299592018127,grad_norm: 0.9999991426227027, iteration: 47066
loss: 1.0336235761642456,grad_norm: 0.9999991959507232, iteration: 47067
loss: 0.9881278276443481,grad_norm: 0.9468826224961125, iteration: 47068
loss: 1.031562328338623,grad_norm: 0.9999991126762138, iteration: 47069
loss: 1.0152764320373535,grad_norm: 0.9999991548414097, iteration: 47070
loss: 1.0167685747146606,grad_norm: 0.881288487845699, iteration: 47071
loss: 1.0041418075561523,grad_norm: 0.8804023674540584, iteration: 47072
loss: 1.0220284461975098,grad_norm: 0.9743753095812527, iteration: 47073
loss: 0.9992523789405823,grad_norm: 0.8434105564041506, iteration: 47074
loss: 1.0432227849960327,grad_norm: 0.878783041879715, iteration: 47075
loss: 1.0338029861450195,grad_norm: 0.9999991002661327, iteration: 47076
loss: 1.0245630741119385,grad_norm: 0.9999992298204078, iteration: 47077
loss: 1.023552656173706,grad_norm: 0.999999072963943, iteration: 47078
loss: 1.0067189931869507,grad_norm: 0.876738838956446, iteration: 47079
loss: 1.0153719186782837,grad_norm: 0.9999992206786784, iteration: 47080
loss: 0.9904544949531555,grad_norm: 0.9999998984218075, iteration: 47081
loss: 0.974293053150177,grad_norm: 0.8844385174036449, iteration: 47082
loss: 1.0117319822311401,grad_norm: 0.9999990033753836, iteration: 47083
loss: 1.0185785293579102,grad_norm: 0.9670157496258753, iteration: 47084
loss: 1.0368907451629639,grad_norm: 0.9738404434951036, iteration: 47085
loss: 0.9892386794090271,grad_norm: 0.9999990669977885, iteration: 47086
loss: 1.0333664417266846,grad_norm: 0.9999990863796402, iteration: 47087
loss: 0.981532633304596,grad_norm: 0.981805624695309, iteration: 47088
loss: 1.0099178552627563,grad_norm: 0.9187819369565183, iteration: 47089
loss: 0.9837737083435059,grad_norm: 0.9999990657962244, iteration: 47090
loss: 1.0219151973724365,grad_norm: 0.9999992227501507, iteration: 47091
loss: 1.0072935819625854,grad_norm: 0.9356637894128788, iteration: 47092
loss: 1.0291472673416138,grad_norm: 0.9497623372506288, iteration: 47093
loss: 1.005925178527832,grad_norm: 0.9626944685370512, iteration: 47094
loss: 0.9886988401412964,grad_norm: 0.9999992037115234, iteration: 47095
loss: 1.0324186086654663,grad_norm: 0.989629520690985, iteration: 47096
loss: 0.9772132635116577,grad_norm: 0.8702354149292426, iteration: 47097
loss: 0.9649621248245239,grad_norm: 0.9999992670233852, iteration: 47098
loss: 1.0247547626495361,grad_norm: 0.9795064821561131, iteration: 47099
loss: 0.9895728826522827,grad_norm: 0.95381846852827, iteration: 47100
loss: 1.0091981887817383,grad_norm: 0.986272807716777, iteration: 47101
loss: 0.9922026991844177,grad_norm: 0.8339357320199682, iteration: 47102
loss: 0.9936112761497498,grad_norm: 0.9999990608926138, iteration: 47103
loss: 1.004582405090332,grad_norm: 0.9680648701275264, iteration: 47104
loss: 0.9926572442054749,grad_norm: 0.9999991394110781, iteration: 47105
loss: 0.9875090718269348,grad_norm: 0.9238540110856992, iteration: 47106
loss: 0.9997565746307373,grad_norm: 0.9999991453450332, iteration: 47107
loss: 0.9820501208305359,grad_norm: 0.9999991126697765, iteration: 47108
loss: 0.9785671234130859,grad_norm: 0.8387897768897242, iteration: 47109
loss: 0.9962672591209412,grad_norm: 0.9211508654134097, iteration: 47110
loss: 0.9912728667259216,grad_norm: 0.9999990253070796, iteration: 47111
loss: 0.9674011468887329,grad_norm: 0.9728589443424366, iteration: 47112
loss: 0.9975273013114929,grad_norm: 0.9999992536512257, iteration: 47113
loss: 1.0099858045578003,grad_norm: 0.956801942106368, iteration: 47114
loss: 1.0213735103607178,grad_norm: 0.9999991181175532, iteration: 47115
loss: 1.0126274824142456,grad_norm: 0.9946634470627055, iteration: 47116
loss: 1.0099234580993652,grad_norm: 0.9286913139405698, iteration: 47117
loss: 1.0033190250396729,grad_norm: 0.9782470292017867, iteration: 47118
loss: 0.960990309715271,grad_norm: 0.9999989736594718, iteration: 47119
loss: 0.9834532737731934,grad_norm: 0.9999990491190058, iteration: 47120
loss: 0.981908917427063,grad_norm: 0.999999193154962, iteration: 47121
loss: 0.9812524318695068,grad_norm: 0.9999990737875534, iteration: 47122
loss: 0.9956352114677429,grad_norm: 0.9999990965913405, iteration: 47123
loss: 0.9831132292747498,grad_norm: 0.9999989987717073, iteration: 47124
loss: 1.0310211181640625,grad_norm: 0.999999222207271, iteration: 47125
loss: 1.0042189359664917,grad_norm: 0.9999990631564415, iteration: 47126
loss: 0.9713031053543091,grad_norm: 0.8743421853276817, iteration: 47127
loss: 1.0020484924316406,grad_norm: 0.9744817230902877, iteration: 47128
loss: 1.0294333696365356,grad_norm: 0.9999991180318437, iteration: 47129
loss: 0.990445077419281,grad_norm: 0.9999990649411805, iteration: 47130
loss: 0.9914884567260742,grad_norm: 0.9673100932670031, iteration: 47131
loss: 1.0035631656646729,grad_norm: 0.9999990685502008, iteration: 47132
loss: 1.0315220355987549,grad_norm: 0.9274221467991968, iteration: 47133
loss: 1.0403951406478882,grad_norm: 0.9834136269282074, iteration: 47134
loss: 1.0181001424789429,grad_norm: 0.9999993491662356, iteration: 47135
loss: 1.0282821655273438,grad_norm: 0.9571003072298644, iteration: 47136
loss: 1.0454524755477905,grad_norm: 0.9999993309476435, iteration: 47137
loss: 1.0235040187835693,grad_norm: 0.91690579340885, iteration: 47138
loss: 1.0075064897537231,grad_norm: 0.9886876014364765, iteration: 47139
loss: 0.99654221534729,grad_norm: 0.9667094280095596, iteration: 47140
loss: 0.9871096014976501,grad_norm: 0.8126550209205837, iteration: 47141
loss: 1.0134470462799072,grad_norm: 0.9999991356695916, iteration: 47142
loss: 0.9867796301841736,grad_norm: 0.9833670415291296, iteration: 47143
loss: 1.0314587354660034,grad_norm: 0.9999990004057328, iteration: 47144
loss: 0.9660873413085938,grad_norm: 0.999999213032632, iteration: 47145
loss: 1.0085642337799072,grad_norm: 0.923792397277794, iteration: 47146
loss: 0.9867984056472778,grad_norm: 0.9163267746088074, iteration: 47147
loss: 0.9788946509361267,grad_norm: 0.9999990253948515, iteration: 47148
loss: 1.0279803276062012,grad_norm: 0.8784587454631032, iteration: 47149
loss: 1.0036393404006958,grad_norm: 0.885394073328865, iteration: 47150
loss: 0.9934096932411194,grad_norm: 0.9928526885100041, iteration: 47151
loss: 0.9797647595405579,grad_norm: 0.9999991353528661, iteration: 47152
loss: 1.0051591396331787,grad_norm: 0.912482073370146, iteration: 47153
loss: 0.9956543445587158,grad_norm: 0.9999989752125972, iteration: 47154
loss: 1.0460175275802612,grad_norm: 0.9999991343350428, iteration: 47155
loss: 1.0188753604888916,grad_norm: 0.9511924438893097, iteration: 47156
loss: 1.017189621925354,grad_norm: 0.9999997892659669, iteration: 47157
loss: 0.9910922646522522,grad_norm: 0.9940410325420505, iteration: 47158
loss: 0.9704524278640747,grad_norm: 0.9999992510126723, iteration: 47159
loss: 1.021681785583496,grad_norm: 0.8739648251450334, iteration: 47160
loss: 1.0352622270584106,grad_norm: 0.9999989544838763, iteration: 47161
loss: 0.9754809141159058,grad_norm: 0.9498703120098001, iteration: 47162
loss: 1.0062241554260254,grad_norm: 0.9054540134167927, iteration: 47163
loss: 1.0152829885482788,grad_norm: 0.9999991566681471, iteration: 47164
loss: 0.9945071339607239,grad_norm: 0.910610825589867, iteration: 47165
loss: 1.018776297569275,grad_norm: 0.9738391643866077, iteration: 47166
loss: 1.0574166774749756,grad_norm: 0.9983897141277857, iteration: 47167
loss: 0.9525107145309448,grad_norm: 0.9999991406855223, iteration: 47168
loss: 1.0062675476074219,grad_norm: 0.8947126276529848, iteration: 47169
loss: 0.9911821484565735,grad_norm: 0.9999990965564866, iteration: 47170
loss: 0.9830102324485779,grad_norm: 0.9225557325001634, iteration: 47171
loss: 1.002089500427246,grad_norm: 0.8965867039028653, iteration: 47172
loss: 1.0274267196655273,grad_norm: 0.9999990295293782, iteration: 47173
loss: 0.9753996729850769,grad_norm: 0.9999990108315677, iteration: 47174
loss: 1.0064809322357178,grad_norm: 0.9999991838637817, iteration: 47175
loss: 1.0083873271942139,grad_norm: 0.9578460787328311, iteration: 47176
loss: 0.9746913909912109,grad_norm: 0.9999991284468307, iteration: 47177
loss: 0.9887242317199707,grad_norm: 0.9186519235212726, iteration: 47178
loss: 0.9905659556388855,grad_norm: 0.9999990074795604, iteration: 47179
loss: 1.0136387348175049,grad_norm: 0.8764821373242583, iteration: 47180
loss: 0.9549724459648132,grad_norm: 0.8181534328151694, iteration: 47181
loss: 1.0208961963653564,grad_norm: 0.989753432910201, iteration: 47182
loss: 0.9744086861610413,grad_norm: 0.9999991584288985, iteration: 47183
loss: 0.9741432666778564,grad_norm: 0.8908360392891038, iteration: 47184
loss: 1.032638430595398,grad_norm: 0.9999992316494947, iteration: 47185
loss: 0.9838442802429199,grad_norm: 0.8854990052783958, iteration: 47186
loss: 1.0016376972198486,grad_norm: 0.9372719822247851, iteration: 47187
loss: 1.025146245956421,grad_norm: 0.85836730146414, iteration: 47188
loss: 0.9684604406356812,grad_norm: 0.9387468588753741, iteration: 47189
loss: 0.9747613072395325,grad_norm: 0.8255631660865025, iteration: 47190
loss: 0.9930181503295898,grad_norm: 0.9999990869093225, iteration: 47191
loss: 0.973017156124115,grad_norm: 0.999999151276686, iteration: 47192
loss: 0.9870932102203369,grad_norm: 0.9350278365521572, iteration: 47193
loss: 1.0187987089157104,grad_norm: 0.8573450464801037, iteration: 47194
loss: 1.0404982566833496,grad_norm: 0.9999991002247935, iteration: 47195
loss: 0.9752359986305237,grad_norm: 0.9217177383941467, iteration: 47196
loss: 0.9970453977584839,grad_norm: 0.9506561744247968, iteration: 47197
loss: 0.9985851049423218,grad_norm: 0.9999990629613973, iteration: 47198
loss: 0.9927617311477661,grad_norm: 0.9926824916768634, iteration: 47199
loss: 1.03596031665802,grad_norm: 0.9648910872780223, iteration: 47200
loss: 0.981309711933136,grad_norm: 0.9999990750541037, iteration: 47201
loss: 0.9933123588562012,grad_norm: 0.8857679053144984, iteration: 47202
loss: 0.9880408644676208,grad_norm: 0.8551053757684899, iteration: 47203
loss: 0.9710590243339539,grad_norm: 0.9283631730824289, iteration: 47204
loss: 1.0254724025726318,grad_norm: 0.9999991233325238, iteration: 47205
loss: 1.0177268981933594,grad_norm: 0.9999992129405024, iteration: 47206
loss: 1.0121856927871704,grad_norm: 0.9999990583795768, iteration: 47207
loss: 1.0231068134307861,grad_norm: 0.9999990616920335, iteration: 47208
loss: 1.018633246421814,grad_norm: 0.9999991325085182, iteration: 47209
loss: 1.0042150020599365,grad_norm: 0.9999991890591654, iteration: 47210
loss: 0.9989116787910461,grad_norm: 0.9299062765930899, iteration: 47211
loss: 1.0384467840194702,grad_norm: 0.9999991587909312, iteration: 47212
loss: 0.9831438064575195,grad_norm: 0.9999991329299504, iteration: 47213
loss: 0.960911750793457,grad_norm: 0.8256550574388007, iteration: 47214
loss: 0.9856882691383362,grad_norm: 0.9999991700302758, iteration: 47215
loss: 1.018638253211975,grad_norm: 0.9769043944207435, iteration: 47216
loss: 0.9828892350196838,grad_norm: 0.9999990746187069, iteration: 47217
loss: 1.0052707195281982,grad_norm: 0.9999990828168296, iteration: 47218
loss: 1.0267667770385742,grad_norm: 0.9999992151096239, iteration: 47219
loss: 1.002912998199463,grad_norm: 0.9897985624045437, iteration: 47220
loss: 1.0195232629776,grad_norm: 0.9999991528348592, iteration: 47221
loss: 1.0177158117294312,grad_norm: 0.9999990784724605, iteration: 47222
loss: 1.0391972064971924,grad_norm: 0.9999991721362177, iteration: 47223
loss: 0.9995129704475403,grad_norm: 0.9041977322611395, iteration: 47224
loss: 1.0097218751907349,grad_norm: 0.9758959246398125, iteration: 47225
loss: 0.9693617820739746,grad_norm: 0.9957195075376747, iteration: 47226
loss: 0.9996781945228577,grad_norm: 0.9677203099717576, iteration: 47227
loss: 1.0120514631271362,grad_norm: 0.9999990270460115, iteration: 47228
loss: 1.0293004512786865,grad_norm: 0.999999142946589, iteration: 47229
loss: 1.0122514963150024,grad_norm: 0.9534852856380351, iteration: 47230
loss: 1.0232223272323608,grad_norm: 0.9999990691964247, iteration: 47231
loss: 0.9626122713088989,grad_norm: 0.9840501442332454, iteration: 47232
loss: 0.9735608696937561,grad_norm: 0.999999110445624, iteration: 47233
loss: 1.0158929824829102,grad_norm: 0.9024450317135898, iteration: 47234
loss: 1.0203555822372437,grad_norm: 0.8488550311135582, iteration: 47235
loss: 1.0424838066101074,grad_norm: 0.9999991197565099, iteration: 47236
loss: 1.0020794868469238,grad_norm: 0.9999990656821284, iteration: 47237
loss: 1.0243149995803833,grad_norm: 0.9295445025714325, iteration: 47238
loss: 1.0083057880401611,grad_norm: 0.7589648930257591, iteration: 47239
loss: 0.9947217702865601,grad_norm: 0.8298260953996756, iteration: 47240
loss: 1.0011392831802368,grad_norm: 0.9999997529232907, iteration: 47241
loss: 1.0029641389846802,grad_norm: 0.8871319898909813, iteration: 47242
loss: 0.9816324710845947,grad_norm: 0.9143615982670537, iteration: 47243
loss: 0.997481644153595,grad_norm: 0.8692043675385649, iteration: 47244
loss: 0.944435179233551,grad_norm: 0.9999993060226448, iteration: 47245
loss: 1.0062763690948486,grad_norm: 0.8755543955197308, iteration: 47246
loss: 1.0269261598587036,grad_norm: 0.9999989893270183, iteration: 47247
loss: 1.0079517364501953,grad_norm: 0.9999990432033729, iteration: 47248
loss: 0.991562008857727,grad_norm: 0.9999991017322989, iteration: 47249
loss: 0.9866291880607605,grad_norm: 0.9842471191681934, iteration: 47250
loss: 1.014146327972412,grad_norm: 0.999999065880094, iteration: 47251
loss: 0.9816994667053223,grad_norm: 0.94443765570934, iteration: 47252
loss: 0.9973903298377991,grad_norm: 0.9999990433994429, iteration: 47253
loss: 1.0007867813110352,grad_norm: 0.9999991967449685, iteration: 47254
loss: 1.0392552614212036,grad_norm: 0.9617940791401385, iteration: 47255
loss: 1.000104308128357,grad_norm: 0.9999991795201769, iteration: 47256
loss: 1.0325371026992798,grad_norm: 0.9999990155941124, iteration: 47257
loss: 0.9953118562698364,grad_norm: 0.9999991884716164, iteration: 47258
loss: 0.9892207980155945,grad_norm: 0.9999991682645408, iteration: 47259
loss: 1.0150258541107178,grad_norm: 0.8911700838459654, iteration: 47260
loss: 1.022214651107788,grad_norm: 0.9036119596313776, iteration: 47261
loss: 1.0501530170440674,grad_norm: 0.9580957172998138, iteration: 47262
loss: 0.995781660079956,grad_norm: 0.999999238032993, iteration: 47263
loss: 1.019479513168335,grad_norm: 0.9321868367696193, iteration: 47264
loss: 1.01987624168396,grad_norm: 0.9999991654774073, iteration: 47265
loss: 0.9986546039581299,grad_norm: 0.9472385889423169, iteration: 47266
loss: 1.007223129272461,grad_norm: 0.9810348446509675, iteration: 47267
loss: 1.0044816732406616,grad_norm: 0.96454051556273, iteration: 47268
loss: 0.9971103072166443,grad_norm: 0.8374425140180846, iteration: 47269
loss: 0.9935822486877441,grad_norm: 0.9045708073830777, iteration: 47270
loss: 1.003261685371399,grad_norm: 0.9999991183075306, iteration: 47271
loss: 0.9915028810501099,grad_norm: 0.9552851292444673, iteration: 47272
loss: 1.0280135869979858,grad_norm: 0.9999990599534976, iteration: 47273
loss: 0.9546079039573669,grad_norm: 0.9999990601600426, iteration: 47274
loss: 1.0050923824310303,grad_norm: 0.8873437714740995, iteration: 47275
loss: 0.9811644554138184,grad_norm: 0.9168985947156125, iteration: 47276
loss: 1.0082603693008423,grad_norm: 0.8435746659798345, iteration: 47277
loss: 0.9885711073875427,grad_norm: 0.9616115655530048, iteration: 47278
loss: 0.9888947010040283,grad_norm: 0.9999992987175373, iteration: 47279
loss: 1.0128806829452515,grad_norm: 0.9067055217247012, iteration: 47280
loss: 0.9833890199661255,grad_norm: 0.8548989908276048, iteration: 47281
loss: 1.0030348300933838,grad_norm: 0.9380283760358302, iteration: 47282
loss: 1.025307297706604,grad_norm: 0.9999991354714377, iteration: 47283
loss: 1.0124397277832031,grad_norm: 0.999999172451612, iteration: 47284
loss: 0.9812628626823425,grad_norm: 0.9654460069482972, iteration: 47285
loss: 1.0325446128845215,grad_norm: 0.9730724935473898, iteration: 47286
loss: 1.0383740663528442,grad_norm: 0.9917543768488623, iteration: 47287
loss: 1.014210820198059,grad_norm: 0.9999991480862847, iteration: 47288
loss: 0.9801011681556702,grad_norm: 0.9627376996838845, iteration: 47289
loss: 1.0241659879684448,grad_norm: 0.8061687845242969, iteration: 47290
loss: 0.9932601451873779,grad_norm: 0.9642391668243554, iteration: 47291
loss: 0.9768638014793396,grad_norm: 0.9999989441766621, iteration: 47292
loss: 0.9934779405593872,grad_norm: 0.9999991444089367, iteration: 47293
loss: 0.9832932949066162,grad_norm: 0.9999995560665296, iteration: 47294
loss: 1.0124708414077759,grad_norm: 0.9569482309839253, iteration: 47295
loss: 0.9808769226074219,grad_norm: 0.9999991742098407, iteration: 47296
loss: 0.9985488057136536,grad_norm: 0.8469239123186478, iteration: 47297
loss: 1.0064760446548462,grad_norm: 0.9999990933441898, iteration: 47298
loss: 0.9751120805740356,grad_norm: 0.8470685153300305, iteration: 47299
loss: 0.9941838979721069,grad_norm: 0.9999993666725097, iteration: 47300
loss: 0.9573887586593628,grad_norm: 0.9999992487696499, iteration: 47301
loss: 1.001686692237854,grad_norm: 0.9999990476244652, iteration: 47302
loss: 0.9956069588661194,grad_norm: 0.999999026983888, iteration: 47303
loss: 1.0251338481903076,grad_norm: 0.8452106759463585, iteration: 47304
loss: 1.0133014917373657,grad_norm: 0.9628719868769692, iteration: 47305
loss: 0.9912875294685364,grad_norm: 0.9944618856218549, iteration: 47306
loss: 1.0038387775421143,grad_norm: 0.9999991897959988, iteration: 47307
loss: 1.0363471508026123,grad_norm: 0.9999991656026165, iteration: 47308
loss: 1.0178425312042236,grad_norm: 0.9999996287848314, iteration: 47309
loss: 1.0361323356628418,grad_norm: 0.9999990947615567, iteration: 47310
loss: 0.9995630979537964,grad_norm: 0.9999991386353604, iteration: 47311
loss: 0.97495037317276,grad_norm: 0.8238666837951305, iteration: 47312
loss: 0.9560921788215637,grad_norm: 0.9065805261482851, iteration: 47313
loss: 0.9786124229431152,grad_norm: 0.9232009657563217, iteration: 47314
loss: 1.0053938627243042,grad_norm: 0.9999993756692456, iteration: 47315
loss: 1.0933767557144165,grad_norm: 0.9999991864737873, iteration: 47316
loss: 1.0087436437606812,grad_norm: 0.8972831189158287, iteration: 47317
loss: 1.0047426223754883,grad_norm: 0.9456313881367797, iteration: 47318
loss: 0.9965417385101318,grad_norm: 0.9040433251740105, iteration: 47319
loss: 1.0296976566314697,grad_norm: 0.9999991708873958, iteration: 47320
loss: 0.9942409992218018,grad_norm: 0.9239892414657754, iteration: 47321
loss: 0.9988532662391663,grad_norm: 0.9999991110612344, iteration: 47322
loss: 0.9640277624130249,grad_norm: 0.9854133377843152, iteration: 47323
loss: 0.9754617810249329,grad_norm: 0.9331278008609314, iteration: 47324
loss: 0.9772451519966125,grad_norm: 0.7546380633129337, iteration: 47325
loss: 1.0214799642562866,grad_norm: 0.9999991724716717, iteration: 47326
loss: 0.979336678981781,grad_norm: 0.9318804062493907, iteration: 47327
loss: 1.0056418180465698,grad_norm: 0.8802262669365777, iteration: 47328
loss: 0.9991064667701721,grad_norm: 0.882959459326221, iteration: 47329
loss: 0.9900189638137817,grad_norm: 0.993615799142719, iteration: 47330
loss: 0.987328290939331,grad_norm: 0.9070790987450554, iteration: 47331
loss: 1.004869818687439,grad_norm: 0.9431792948822725, iteration: 47332
loss: 1.013190746307373,grad_norm: 0.9999989060906, iteration: 47333
loss: 0.9732242226600647,grad_norm: 0.9999991713662403, iteration: 47334
loss: 1.0647293329238892,grad_norm: 0.9999990497851391, iteration: 47335
loss: 1.0225847959518433,grad_norm: 0.9999990532467561, iteration: 47336
loss: 0.9987000823020935,grad_norm: 0.9999992140556364, iteration: 47337
loss: 1.014370083808899,grad_norm: 0.7339653744903726, iteration: 47338
loss: 0.9832344651222229,grad_norm: 0.8134566476731453, iteration: 47339
loss: 1.0186350345611572,grad_norm: 0.9999990038936407, iteration: 47340
loss: 1.0043588876724243,grad_norm: 0.9999991472878709, iteration: 47341
loss: 1.0108833312988281,grad_norm: 0.999999160549195, iteration: 47342
loss: 0.9775126576423645,grad_norm: 0.9871020461055137, iteration: 47343
loss: 0.974342405796051,grad_norm: 0.999998978213855, iteration: 47344
loss: 1.030608057975769,grad_norm: 0.9999991314132269, iteration: 47345
loss: 0.9917149543762207,grad_norm: 0.999999261090174, iteration: 47346
loss: 0.9854990243911743,grad_norm: 0.8420422778169714, iteration: 47347
loss: 1.0256308317184448,grad_norm: 0.9273593529085983, iteration: 47348
loss: 0.9818862080574036,grad_norm: 0.9934713392123703, iteration: 47349
loss: 0.9932019114494324,grad_norm: 0.8933763069108658, iteration: 47350
loss: 1.0124458074569702,grad_norm: 0.9999990558051013, iteration: 47351
loss: 1.0176624059677124,grad_norm: 0.9941003593131225, iteration: 47352
loss: 1.027042031288147,grad_norm: 0.9999989997172303, iteration: 47353
loss: 1.0060890913009644,grad_norm: 0.9261153846030297, iteration: 47354
loss: 1.0270076990127563,grad_norm: 0.9999991353490593, iteration: 47355
loss: 1.0043437480926514,grad_norm: 0.9999990534636534, iteration: 47356
loss: 0.9811132550239563,grad_norm: 0.9999992251189369, iteration: 47357
loss: 0.9998103976249695,grad_norm: 0.9999991390099505, iteration: 47358
loss: 1.0396530628204346,grad_norm: 0.9999989953055168, iteration: 47359
loss: 0.995897114276886,grad_norm: 0.9628730811462458, iteration: 47360
loss: 1.0392496585845947,grad_norm: 0.991368621358917, iteration: 47361
loss: 1.0186790227890015,grad_norm: 0.9479867316573559, iteration: 47362
loss: 0.9908257126808167,grad_norm: 0.9999990312304265, iteration: 47363
loss: 0.9766640067100525,grad_norm: 0.9102110295158781, iteration: 47364
loss: 1.0126980543136597,grad_norm: 0.9999990124282355, iteration: 47365
loss: 1.0414546728134155,grad_norm: 0.9999992018104519, iteration: 47366
loss: 0.9859369993209839,grad_norm: 0.9999991544062908, iteration: 47367
loss: 1.0156984329223633,grad_norm: 0.8541501073775495, iteration: 47368
loss: 1.0117521286010742,grad_norm: 0.9396111442664674, iteration: 47369
loss: 1.0038838386535645,grad_norm: 0.999999021755575, iteration: 47370
loss: 1.0026572942733765,grad_norm: 0.9999990425676194, iteration: 47371
loss: 1.030834436416626,grad_norm: 0.9999992283113963, iteration: 47372
loss: 1.0016039609909058,grad_norm: 0.9066981404698666, iteration: 47373
loss: 0.9545586705207825,grad_norm: 0.9999992141638107, iteration: 47374
loss: 0.974393904209137,grad_norm: 0.9999991517641724, iteration: 47375
loss: 1.0238336324691772,grad_norm: 0.9999992231687719, iteration: 47376
loss: 1.0299012660980225,grad_norm: 0.8242042338829271, iteration: 47377
loss: 0.9944907426834106,grad_norm: 0.8883496457505279, iteration: 47378
loss: 0.9899526238441467,grad_norm: 0.7653137699606081, iteration: 47379
loss: 1.003820538520813,grad_norm: 0.9999991771923751, iteration: 47380
loss: 1.0210292339324951,grad_norm: 0.8942393368409765, iteration: 47381
loss: 0.9864842295646667,grad_norm: 0.9215699331702216, iteration: 47382
loss: 1.0381968021392822,grad_norm: 0.9999990619113525, iteration: 47383
loss: 1.0081745386123657,grad_norm: 0.9999990760067109, iteration: 47384
loss: 1.0156903266906738,grad_norm: 0.8765436034243954, iteration: 47385
loss: 0.9960982799530029,grad_norm: 0.9999991402212121, iteration: 47386
loss: 1.010072946548462,grad_norm: 0.9999991515186908, iteration: 47387
loss: 1.0241080522537231,grad_norm: 0.9771745172113594, iteration: 47388
loss: 0.9915370345115662,grad_norm: 0.977902302411224, iteration: 47389
loss: 1.0128684043884277,grad_norm: 0.9999991418318899, iteration: 47390
loss: 1.0193372964859009,grad_norm: 0.8960622915994901, iteration: 47391
loss: 1.0054326057434082,grad_norm: 0.74976791732386, iteration: 47392
loss: 0.9825503826141357,grad_norm: 0.9757643720385373, iteration: 47393
loss: 1.0318198204040527,grad_norm: 0.9135938071825678, iteration: 47394
loss: 0.9842204451560974,grad_norm: 0.9999992427380012, iteration: 47395
loss: 1.0005582571029663,grad_norm: 0.8687058165963324, iteration: 47396
loss: 1.0079419612884521,grad_norm: 0.9999989025862926, iteration: 47397
loss: 1.0053566694259644,grad_norm: 0.9153411371820341, iteration: 47398
loss: 1.0066289901733398,grad_norm: 0.9999991957135383, iteration: 47399
loss: 1.0339174270629883,grad_norm: 0.8583076287045722, iteration: 47400
loss: 0.997850775718689,grad_norm: 0.8800436030533338, iteration: 47401
loss: 1.042914628982544,grad_norm: 0.9790598289940281, iteration: 47402
loss: 0.9949241876602173,grad_norm: 0.8133591140593014, iteration: 47403
loss: 0.9963328838348389,grad_norm: 0.9999989766447736, iteration: 47404
loss: 1.0065487623214722,grad_norm: 0.9070926922950981, iteration: 47405
loss: 1.0236341953277588,grad_norm: 0.9999990852388518, iteration: 47406
loss: 1.0084091424942017,grad_norm: 0.9999990829730321, iteration: 47407
loss: 0.9653000831604004,grad_norm: 0.9718871364384787, iteration: 47408
loss: 0.9697239995002747,grad_norm: 0.9999990439056342, iteration: 47409
loss: 1.0048854351043701,grad_norm: 0.9068864322771112, iteration: 47410
loss: 1.010467290878296,grad_norm: 0.9897970587114582, iteration: 47411
loss: 0.9986255764961243,grad_norm: 0.8961478267595808, iteration: 47412
loss: 1.022228717803955,grad_norm: 0.9623455817777643, iteration: 47413
loss: 1.0104948282241821,grad_norm: 0.9999990695189752, iteration: 47414
loss: 1.014168620109558,grad_norm: 0.9999990975436306, iteration: 47415
loss: 0.986156702041626,grad_norm: 0.9871567213105681, iteration: 47416
loss: 1.034630537033081,grad_norm: 0.9594797032195648, iteration: 47417
loss: 1.0182428359985352,grad_norm: 0.9063719851187079, iteration: 47418
loss: 0.9890531301498413,grad_norm: 0.844662716579682, iteration: 47419
loss: 1.0195330381393433,grad_norm: 0.9999992357609497, iteration: 47420
loss: 0.980429470539093,grad_norm: 0.9216748322442154, iteration: 47421
loss: 1.019108533859253,grad_norm: 0.7742575115373842, iteration: 47422
loss: 1.0163028240203857,grad_norm: 0.9575163749930256, iteration: 47423
loss: 0.9603990316390991,grad_norm: 0.9416768373819288, iteration: 47424
loss: 0.9858472347259521,grad_norm: 0.9744651381787942, iteration: 47425
loss: 1.0077680349349976,grad_norm: 0.8021328673244279, iteration: 47426
loss: 0.9947234392166138,grad_norm: 0.9888677279523163, iteration: 47427
loss: 1.0155476331710815,grad_norm: 0.9373839130688671, iteration: 47428
loss: 1.0117762088775635,grad_norm: 0.9814439871523816, iteration: 47429
loss: 0.979651927947998,grad_norm: 0.9999997295050308, iteration: 47430
loss: 1.026869773864746,grad_norm: 0.9999989165797658, iteration: 47431
loss: 0.9617059826850891,grad_norm: 0.9638530648755532, iteration: 47432
loss: 1.032451868057251,grad_norm: 0.8437904937549786, iteration: 47433
loss: 1.0509662628173828,grad_norm: 0.9999993391557275, iteration: 47434
loss: 0.9933372139930725,grad_norm: 0.9999991281793311, iteration: 47435
loss: 0.982841432094574,grad_norm: 0.8909526714703571, iteration: 47436
loss: 0.9953798055648804,grad_norm: 0.9582346785405608, iteration: 47437
loss: 1.0056036710739136,grad_norm: 0.9999991363229601, iteration: 47438
loss: 1.0558490753173828,grad_norm: 0.999999034966988, iteration: 47439
loss: 1.0151876211166382,grad_norm: 0.8906415663423584, iteration: 47440
loss: 0.9665306210517883,grad_norm: 0.9999990216276333, iteration: 47441
loss: 0.973010241985321,grad_norm: 0.9309118348490459, iteration: 47442
loss: 1.003646969795227,grad_norm: 0.9613697955744129, iteration: 47443
loss: 1.0311779975891113,grad_norm: 0.9999991542479768, iteration: 47444
loss: 0.9855729341506958,grad_norm: 0.9999990403283094, iteration: 47445
loss: 1.0033586025238037,grad_norm: 0.9999992002210532, iteration: 47446
loss: 1.0062572956085205,grad_norm: 0.9420542963058394, iteration: 47447
loss: 0.9961157441139221,grad_norm: 0.855296326467861, iteration: 47448
loss: 0.976502001285553,grad_norm: 0.916973204768599, iteration: 47449
loss: 1.0043479204177856,grad_norm: 0.9999992226103944, iteration: 47450
loss: 1.0206788778305054,grad_norm: 0.9358164344316642, iteration: 47451
loss: 1.0179880857467651,grad_norm: 0.8980712582061288, iteration: 47452
loss: 1.0001096725463867,grad_norm: 0.999999294840915, iteration: 47453
loss: 1.007115364074707,grad_norm: 0.9343119250208543, iteration: 47454
loss: 1.0115753412246704,grad_norm: 0.9999990418715409, iteration: 47455
loss: 1.0262517929077148,grad_norm: 0.999999037722112, iteration: 47456
loss: 0.9940847158432007,grad_norm: 0.9999990861133087, iteration: 47457
loss: 0.9528032541275024,grad_norm: 0.9787370526344658, iteration: 47458
loss: 1.0063854455947876,grad_norm: 0.9999991263799796, iteration: 47459
loss: 1.0239382982254028,grad_norm: 0.8389199584668374, iteration: 47460
loss: 0.985298216342926,grad_norm: 0.8800684956184034, iteration: 47461
loss: 1.0023856163024902,grad_norm: 0.9632496869320953, iteration: 47462
loss: 0.969170093536377,grad_norm: 0.9769122735970187, iteration: 47463
loss: 1.0102282762527466,grad_norm: 0.9999991315770974, iteration: 47464
loss: 1.0360010862350464,grad_norm: 0.9467188137193744, iteration: 47465
loss: 1.0049355030059814,grad_norm: 0.9999990521751498, iteration: 47466
loss: 0.9804495573043823,grad_norm: 0.9999991115962582, iteration: 47467
loss: 1.026661992073059,grad_norm: 0.8581003688467554, iteration: 47468
loss: 1.0078070163726807,grad_norm: 0.9999990938800426, iteration: 47469
loss: 1.0194116830825806,grad_norm: 0.9999990291995489, iteration: 47470
loss: 1.0240968465805054,grad_norm: 0.9194096434433199, iteration: 47471
loss: 0.999754786491394,grad_norm: 0.9721880698670475, iteration: 47472
loss: 0.9836952686309814,grad_norm: 0.9999989877265031, iteration: 47473
loss: 0.9404555559158325,grad_norm: 0.9999991478642397, iteration: 47474
loss: 0.997272789478302,grad_norm: 0.9827675598830985, iteration: 47475
loss: 0.985349178314209,grad_norm: 0.7444260335260559, iteration: 47476
loss: 1.0072462558746338,grad_norm: 0.9999989594257996, iteration: 47477
loss: 0.952318549156189,grad_norm: 0.818749073988228, iteration: 47478
loss: 0.98989337682724,grad_norm: 0.8957864620629838, iteration: 47479
loss: 1.0327361822128296,grad_norm: 0.9259360798868485, iteration: 47480
loss: 1.004144549369812,grad_norm: 0.9207925372767052, iteration: 47481
loss: 0.9637816548347473,grad_norm: 0.8701371801788121, iteration: 47482
loss: 0.9969367384910583,grad_norm: 0.9999990683785182, iteration: 47483
loss: 1.0267292261123657,grad_norm: 0.9258067745169567, iteration: 47484
loss: 0.9934296607971191,grad_norm: 0.9999990431856156, iteration: 47485
loss: 1.0062658786773682,grad_norm: 0.8793351903534357, iteration: 47486
loss: 0.9945580363273621,grad_norm: 0.9999991907694499, iteration: 47487
loss: 0.9954720139503479,grad_norm: 0.9195653850946384, iteration: 47488
loss: 1.0062613487243652,grad_norm: 0.8948684502390983, iteration: 47489
loss: 0.9396381974220276,grad_norm: 0.9999990768312714, iteration: 47490
loss: 0.9960215091705322,grad_norm: 0.9999991042668717, iteration: 47491
loss: 0.9910349249839783,grad_norm: 0.9999990376624135, iteration: 47492
loss: 1.0274752378463745,grad_norm: 0.8997094599759262, iteration: 47493
loss: 0.9993334412574768,grad_norm: 0.9999991714786436, iteration: 47494
loss: 1.0206108093261719,grad_norm: 0.99999902974528, iteration: 47495
loss: 0.9660220146179199,grad_norm: 0.9999990386949694, iteration: 47496
loss: 1.0408809185028076,grad_norm: 0.9072958025858565, iteration: 47497
loss: 1.011987328529358,grad_norm: 0.9017099198551597, iteration: 47498
loss: 0.9846635460853577,grad_norm: 0.8832887719612122, iteration: 47499
loss: 1.0187373161315918,grad_norm: 0.9999993737749018, iteration: 47500
loss: 1.050580620765686,grad_norm: 0.9779001834652961, iteration: 47501
loss: 1.0191222429275513,grad_norm: 0.9327306333750432, iteration: 47502
loss: 0.9938359260559082,grad_norm: 0.9999992038199544, iteration: 47503
loss: 1.0077131986618042,grad_norm: 0.9999990984920173, iteration: 47504
loss: 1.041203498840332,grad_norm: 0.9856746366255879, iteration: 47505
loss: 1.0061253309249878,grad_norm: 0.8855642966182261, iteration: 47506
loss: 1.0029646158218384,grad_norm: 0.9999993438417967, iteration: 47507
loss: 1.025480031967163,grad_norm: 0.8955423255882464, iteration: 47508
loss: 0.9918997287750244,grad_norm: 0.9999990899550323, iteration: 47509
loss: 0.9991041421890259,grad_norm: 0.999999110444175, iteration: 47510
loss: 1.0253351926803589,grad_norm: 0.8658633475685582, iteration: 47511
loss: 1.0393942594528198,grad_norm: 0.9999992378354492, iteration: 47512
loss: 1.0015132427215576,grad_norm: 0.8244356614994311, iteration: 47513
loss: 1.00062096118927,grad_norm: 0.8334218928467, iteration: 47514
loss: 0.9999061822891235,grad_norm: 0.9999990567361646, iteration: 47515
loss: 1.000771403312683,grad_norm: 0.9999989697188959, iteration: 47516
loss: 1.0086851119995117,grad_norm: 0.9999990990489805, iteration: 47517
loss: 1.0142686367034912,grad_norm: 0.9999993347220069, iteration: 47518
loss: 0.9981465339660645,grad_norm: 0.9774500025536784, iteration: 47519
loss: 0.9985505938529968,grad_norm: 0.9530221331727767, iteration: 47520
loss: 1.0368226766586304,grad_norm: 0.9999991187085632, iteration: 47521
loss: 0.9989697933197021,grad_norm: 0.9553402577339958, iteration: 47522
loss: 1.0181825160980225,grad_norm: 0.8742102107813747, iteration: 47523
loss: 1.005165696144104,grad_norm: 0.782673045294642, iteration: 47524
loss: 0.9899374842643738,grad_norm: 0.9999992699782227, iteration: 47525
loss: 0.9875304698944092,grad_norm: 0.9999993193341545, iteration: 47526
loss: 0.9835789203643799,grad_norm: 0.9025759312363161, iteration: 47527
loss: 1.0118615627288818,grad_norm: 0.9389173876063336, iteration: 47528
loss: 0.9928116798400879,grad_norm: 0.9999991484116637, iteration: 47529
loss: 1.014678955078125,grad_norm: 0.9999991524726268, iteration: 47530
loss: 0.9819832444190979,grad_norm: 0.9999991218570639, iteration: 47531
loss: 1.005066990852356,grad_norm: 0.9700513670764221, iteration: 47532
loss: 1.0052460432052612,grad_norm: 0.9650658669925847, iteration: 47533
loss: 0.986294150352478,grad_norm: 0.9777470015696673, iteration: 47534
loss: 1.0277870893478394,grad_norm: 0.999999105774976, iteration: 47535
loss: 1.0129106044769287,grad_norm: 0.9999992093303436, iteration: 47536
loss: 1.0053009986877441,grad_norm: 0.9001732603229146, iteration: 47537
loss: 1.0017956495285034,grad_norm: 0.9999991296680336, iteration: 47538
loss: 0.9794886112213135,grad_norm: 0.8226077540930462, iteration: 47539
loss: 1.0165396928787231,grad_norm: 0.9999990555322997, iteration: 47540
loss: 0.9760347604751587,grad_norm: 0.9999991400993805, iteration: 47541
loss: 1.0236454010009766,grad_norm: 0.9999991651532737, iteration: 47542
loss: 1.0033727884292603,grad_norm: 0.9291290827837035, iteration: 47543
loss: 1.0045524835586548,grad_norm: 0.888599434487443, iteration: 47544
loss: 1.0260242223739624,grad_norm: 0.9999993415521856, iteration: 47545
loss: 1.0219745635986328,grad_norm: 0.9999990455119618, iteration: 47546
loss: 1.0419018268585205,grad_norm: 0.999998931594282, iteration: 47547
loss: 0.9977028965950012,grad_norm: 0.9999991233994822, iteration: 47548
loss: 0.9857308268547058,grad_norm: 0.9494283708493894, iteration: 47549
loss: 1.02861487865448,grad_norm: 0.9445974728412645, iteration: 47550
loss: 1.039201021194458,grad_norm: 0.9999996710935917, iteration: 47551
loss: 0.980424702167511,grad_norm: 0.9999989778249994, iteration: 47552
loss: 1.0154224634170532,grad_norm: 0.9999990008622558, iteration: 47553
loss: 0.9973954558372498,grad_norm: 0.9999991714404806, iteration: 47554
loss: 0.989493191242218,grad_norm: 0.9999990940284161, iteration: 47555
loss: 0.966846227645874,grad_norm: 0.9999990475106453, iteration: 47556
loss: 1.0107169151306152,grad_norm: 0.8812032484139335, iteration: 47557
loss: 1.0537678003311157,grad_norm: 0.9999992378272674, iteration: 47558
loss: 0.9786686897277832,grad_norm: 0.999999006859402, iteration: 47559
loss: 0.9962562918663025,grad_norm: 0.9069593160033533, iteration: 47560
loss: 0.9945675134658813,grad_norm: 0.9999990646319606, iteration: 47561
loss: 1.0069304704666138,grad_norm: 0.860412576894287, iteration: 47562
loss: 1.0046340227127075,grad_norm: 0.9358173330331253, iteration: 47563
loss: 0.9824531078338623,grad_norm: 0.9330984230195011, iteration: 47564
loss: 0.9839094281196594,grad_norm: 0.8634856157355324, iteration: 47565
loss: 1.0097956657409668,grad_norm: 0.8978230657775395, iteration: 47566
loss: 0.983972430229187,grad_norm: 0.9152004320778037, iteration: 47567
loss: 1.0632429122924805,grad_norm: 0.9999995942755451, iteration: 47568
loss: 0.9764261841773987,grad_norm: 0.999999248180577, iteration: 47569
loss: 0.9288261532783508,grad_norm: 0.9999991596461106, iteration: 47570
loss: 1.0118720531463623,grad_norm: 0.9999991229263923, iteration: 47571
loss: 1.0213533639907837,grad_norm: 0.9999990907661773, iteration: 47572
loss: 0.9585180282592773,grad_norm: 0.9609715366103367, iteration: 47573
loss: 1.0487254858016968,grad_norm: 0.9565275928695998, iteration: 47574
loss: 1.0027539730072021,grad_norm: 0.8940216674823378, iteration: 47575
loss: 0.9981947541236877,grad_norm: 0.9999990327384773, iteration: 47576
loss: 1.0053423643112183,grad_norm: 0.9672458708132469, iteration: 47577
loss: 0.9925382137298584,grad_norm: 0.9850671362265795, iteration: 47578
loss: 1.0291385650634766,grad_norm: 0.9047032007787558, iteration: 47579
loss: 1.013505220413208,grad_norm: 0.9804333357041853, iteration: 47580
loss: 1.0011988878250122,grad_norm: 0.9999990783676377, iteration: 47581
loss: 0.9657187461853027,grad_norm: 0.9846149763806187, iteration: 47582
loss: 1.0383714437484741,grad_norm: 0.9999990604226993, iteration: 47583
loss: 1.0584967136383057,grad_norm: 0.9542807568708809, iteration: 47584
loss: 0.9929426312446594,grad_norm: 0.976214630547795, iteration: 47585
loss: 1.017824649810791,grad_norm: 0.9490712915342749, iteration: 47586
loss: 0.9762482643127441,grad_norm: 0.8573142080181404, iteration: 47587
loss: 1.0045536756515503,grad_norm: 0.9999991388062114, iteration: 47588
loss: 1.0001968145370483,grad_norm: 0.9999990550641974, iteration: 47589
loss: 0.9935389161109924,grad_norm: 0.9999990987168993, iteration: 47590
loss: 1.0222300291061401,grad_norm: 0.9923311685611961, iteration: 47591
loss: 0.9940974712371826,grad_norm: 0.8372748629066817, iteration: 47592
loss: 0.9996082782745361,grad_norm: 0.9999991287335955, iteration: 47593
loss: 1.016129732131958,grad_norm: 0.9999992173732554, iteration: 47594
loss: 1.0317226648330688,grad_norm: 0.9838821770924064, iteration: 47595
loss: 1.0014582872390747,grad_norm: 0.8500455612872171, iteration: 47596
loss: 1.0460602045059204,grad_norm: 0.9820055167905349, iteration: 47597
loss: 0.9920836687088013,grad_norm: 0.922100736946188, iteration: 47598
loss: 1.0399986505508423,grad_norm: 0.9999991045342423, iteration: 47599
loss: 0.9915520548820496,grad_norm: 0.8005201350732476, iteration: 47600
loss: 1.0024288892745972,grad_norm: 0.9024152869462293, iteration: 47601
loss: 1.016668438911438,grad_norm: 0.9369251942323572, iteration: 47602
loss: 0.9964714050292969,grad_norm: 0.9572381852417242, iteration: 47603
loss: 1.0074381828308105,grad_norm: 0.9833646927319555, iteration: 47604
loss: 0.9792782068252563,grad_norm: 0.8755272095647965, iteration: 47605
loss: 1.0159114599227905,grad_norm: 0.9783543618722267, iteration: 47606
loss: 1.1009607315063477,grad_norm: 0.9999998041980265, iteration: 47607
loss: 0.9881522059440613,grad_norm: 0.9999991995449721, iteration: 47608
loss: 1.0063996315002441,grad_norm: 0.8855389152965929, iteration: 47609
loss: 0.9987664818763733,grad_norm: 0.9389372351209807, iteration: 47610
loss: 0.9641236662864685,grad_norm: 0.9311190123542965, iteration: 47611
loss: 1.0068891048431396,grad_norm: 0.9999990125315904, iteration: 47612
loss: 1.0177366733551025,grad_norm: 0.999999109298645, iteration: 47613
loss: 1.0091935396194458,grad_norm: 0.91776011494699, iteration: 47614
loss: 0.9950992465019226,grad_norm: 0.9999991674197176, iteration: 47615
loss: 1.0140820741653442,grad_norm: 0.9999991540487534, iteration: 47616
loss: 0.980033814907074,grad_norm: 0.9251770665775272, iteration: 47617
loss: 1.058586835861206,grad_norm: 0.9999992671420348, iteration: 47618
loss: 1.0196974277496338,grad_norm: 0.9454680396921679, iteration: 47619
loss: 1.0542551279067993,grad_norm: 0.9999991737675408, iteration: 47620
loss: 0.9825895428657532,grad_norm: 0.9999991202377371, iteration: 47621
loss: 1.0102571249008179,grad_norm: 0.9999990399970138, iteration: 47622
loss: 1.0496729612350464,grad_norm: 0.986125254198752, iteration: 47623
loss: 1.0134806632995605,grad_norm: 0.7436527331674916, iteration: 47624
loss: 0.9728845953941345,grad_norm: 0.9098369394438929, iteration: 47625
loss: 0.9863709807395935,grad_norm: 0.9999989924465199, iteration: 47626
loss: 0.9916819334030151,grad_norm: 0.9678301967986301, iteration: 47627
loss: 1.0106092691421509,grad_norm: 0.8104285557217494, iteration: 47628
loss: 1.0038114786148071,grad_norm: 0.9434706233248982, iteration: 47629
loss: 0.9857094883918762,grad_norm: 0.9999990255053753, iteration: 47630
loss: 0.9971680641174316,grad_norm: 0.9999991957496432, iteration: 47631
loss: 0.9950043559074402,grad_norm: 0.9508941098258981, iteration: 47632
loss: 0.9832536578178406,grad_norm: 0.9256830523562304, iteration: 47633
loss: 0.9647623896598816,grad_norm: 0.999999063925773, iteration: 47634
loss: 1.0288033485412598,grad_norm: 0.999999178349964, iteration: 47635
loss: 1.0440441370010376,grad_norm: 0.9229287003071912, iteration: 47636
loss: 1.003409504890442,grad_norm: 0.9999991226189943, iteration: 47637
loss: 1.017250657081604,grad_norm: 0.9999990110698928, iteration: 47638
loss: 0.9916548132896423,grad_norm: 0.9999991146768272, iteration: 47639
loss: 1.0236519575119019,grad_norm: 0.99999910034888, iteration: 47640
loss: 0.9824810028076172,grad_norm: 0.9999992982340513, iteration: 47641
loss: 1.006529688835144,grad_norm: 0.9999994904776388, iteration: 47642
loss: 1.0101920366287231,grad_norm: 0.9999991250138274, iteration: 47643
loss: 1.0078539848327637,grad_norm: 0.9329550154603863, iteration: 47644
loss: 0.9929323792457581,grad_norm: 0.9484573648553812, iteration: 47645
loss: 0.9532942771911621,grad_norm: 0.9192721169692868, iteration: 47646
loss: 0.9936620593070984,grad_norm: 0.9448161858364972, iteration: 47647
loss: 0.9994305372238159,grad_norm: 0.9999990232208694, iteration: 47648
loss: 1.0196211338043213,grad_norm: 0.9999991664690362, iteration: 47649
loss: 0.9964960813522339,grad_norm: 0.926757831550269, iteration: 47650
loss: 0.9563552141189575,grad_norm: 0.9341823496284434, iteration: 47651
loss: 1.0060056447982788,grad_norm: 0.9999990856606042, iteration: 47652
loss: 1.0178277492523193,grad_norm: 0.9625615120599378, iteration: 47653
loss: 1.041142225265503,grad_norm: 0.9904902278514374, iteration: 47654
loss: 0.9929004907608032,grad_norm: 0.9864642580666085, iteration: 47655
loss: 1.017881989479065,grad_norm: 0.9999993911491712, iteration: 47656
loss: 1.0229984521865845,grad_norm: 0.9999990789060001, iteration: 47657
loss: 1.0074262619018555,grad_norm: 0.8633196044652737, iteration: 47658
loss: 0.9958163499832153,grad_norm: 0.999999782260212, iteration: 47659
loss: 0.994615375995636,grad_norm: 0.999999146473227, iteration: 47660
loss: 0.9983492493629456,grad_norm: 0.9999992535086542, iteration: 47661
loss: 0.9910992383956909,grad_norm: 0.9705483888854003, iteration: 47662
loss: 1.026955485343933,grad_norm: 0.9999990680819313, iteration: 47663
loss: 1.0344080924987793,grad_norm: 0.9246571225179572, iteration: 47664
loss: 0.9942494630813599,grad_norm: 0.9679258548937145, iteration: 47665
loss: 1.022359013557434,grad_norm: 0.9999990336840389, iteration: 47666
loss: 1.0237185955047607,grad_norm: 0.9999994151920175, iteration: 47667
loss: 1.0159351825714111,grad_norm: 0.9999990966644845, iteration: 47668
loss: 0.9983956217765808,grad_norm: 0.9999990514507532, iteration: 47669
loss: 0.9887694120407104,grad_norm: 0.9999991674069104, iteration: 47670
loss: 0.9662045240402222,grad_norm: 0.9999990235856155, iteration: 47671
loss: 1.01064133644104,grad_norm: 0.9999991097397022, iteration: 47672
loss: 1.0105583667755127,grad_norm: 0.9151585677094028, iteration: 47673
loss: 0.9543736577033997,grad_norm: 0.9999990975207028, iteration: 47674
loss: 0.9868841767311096,grad_norm: 0.9999990809475076, iteration: 47675
loss: 0.9793097376823425,grad_norm: 0.8718316798450046, iteration: 47676
loss: 1.0362660884857178,grad_norm: 0.9325497315102973, iteration: 47677
loss: 1.0020167827606201,grad_norm: 0.999999168396965, iteration: 47678
loss: 0.9625614285469055,grad_norm: 0.9480996117045328, iteration: 47679
loss: 0.9777010679244995,grad_norm: 0.9999993133200469, iteration: 47680
loss: 1.0275347232818604,grad_norm: 0.9999991967773547, iteration: 47681
loss: 1.0174424648284912,grad_norm: 0.9999991566935454, iteration: 47682
loss: 1.033227801322937,grad_norm: 0.8969568814804905, iteration: 47683
loss: 0.9764369130134583,grad_norm: 0.9999991868046783, iteration: 47684
loss: 1.0331997871398926,grad_norm: 0.9999991430893417, iteration: 47685
loss: 0.9727373123168945,grad_norm: 0.7912704910072641, iteration: 47686
loss: 1.0198713541030884,grad_norm: 0.9999991696025523, iteration: 47687
loss: 1.0261435508728027,grad_norm: 0.9999991106442416, iteration: 47688
loss: 0.9843751192092896,grad_norm: 0.9999991768408406, iteration: 47689
loss: 0.9789644479751587,grad_norm: 0.9999992018401962, iteration: 47690
loss: 0.9784371852874756,grad_norm: 0.985904954706434, iteration: 47691
loss: 0.9965961575508118,grad_norm: 0.9999989451589946, iteration: 47692
loss: 1.0323454141616821,grad_norm: 0.982648467387788, iteration: 47693
loss: 1.0154273509979248,grad_norm: 0.9999991554999444, iteration: 47694
loss: 1.0164868831634521,grad_norm: 0.957762062120528, iteration: 47695
loss: 0.987814724445343,grad_norm: 0.9999990056473987, iteration: 47696
loss: 0.9898492097854614,grad_norm: 0.9999995104646168, iteration: 47697
loss: 1.0098804235458374,grad_norm: 0.9999990359258935, iteration: 47698
loss: 0.9451982378959656,grad_norm: 0.9999991369143051, iteration: 47699
loss: 0.9989321827888489,grad_norm: 0.8724623020008084, iteration: 47700
loss: 0.9955008625984192,grad_norm: 0.9265445810634176, iteration: 47701
loss: 0.9648906588554382,grad_norm: 0.9999992373270217, iteration: 47702
loss: 0.9825992584228516,grad_norm: 0.9439357376719356, iteration: 47703
loss: 0.9978983998298645,grad_norm: 0.9604283063340568, iteration: 47704
loss: 0.9942018389701843,grad_norm: 0.9411189431360839, iteration: 47705
loss: 0.9843904376029968,grad_norm: 0.9999989738479005, iteration: 47706
loss: 1.024592399597168,grad_norm: 0.9999991587951241, iteration: 47707
loss: 0.9965724945068359,grad_norm: 0.9999991446816804, iteration: 47708
loss: 1.0197268724441528,grad_norm: 0.892549813556548, iteration: 47709
loss: 0.9908766150474548,grad_norm: 0.908607953012969, iteration: 47710
loss: 0.9292401671409607,grad_norm: 0.9999991582348408, iteration: 47711
loss: 1.0173691511154175,grad_norm: 0.9622045363071846, iteration: 47712
loss: 0.9933077692985535,grad_norm: 0.9733463460754935, iteration: 47713
loss: 1.0022423267364502,grad_norm: 0.9868998849993355, iteration: 47714
loss: 1.0321555137634277,grad_norm: 0.9999992038505346, iteration: 47715
loss: 1.0076247453689575,grad_norm: 0.8972923109369464, iteration: 47716
loss: 1.0101374387741089,grad_norm: 0.8560691937481059, iteration: 47717
loss: 0.9519324898719788,grad_norm: 0.9817272899684334, iteration: 47718
loss: 0.9865575432777405,grad_norm: 0.8995134786697685, iteration: 47719
loss: 1.0659393072128296,grad_norm: 0.9999993215979446, iteration: 47720
loss: 1.021275520324707,grad_norm: 0.9953699253250162, iteration: 47721
loss: 1.0188324451446533,grad_norm: 0.9502402701628113, iteration: 47722
loss: 1.0392584800720215,grad_norm: 0.9999994618501626, iteration: 47723
loss: 1.026763916015625,grad_norm: 0.9134977121443557, iteration: 47724
loss: 1.0280287265777588,grad_norm: 0.9947522332960643, iteration: 47725
loss: 1.0130665302276611,grad_norm: 0.9445008956025871, iteration: 47726
loss: 0.9855903387069702,grad_norm: 0.8655197687871222, iteration: 47727
loss: 1.0283024311065674,grad_norm: 0.9721752062158849, iteration: 47728
loss: 0.9771354794502258,grad_norm: 0.9616914612135156, iteration: 47729
loss: 0.9754943251609802,grad_norm: 0.9010227428788665, iteration: 47730
loss: 0.9837599992752075,grad_norm: 0.9999989195268978, iteration: 47731
loss: 1.0319404602050781,grad_norm: 0.9172642128575202, iteration: 47732
loss: 1.015093445777893,grad_norm: 0.8180118000377947, iteration: 47733
loss: 1.003738522529602,grad_norm: 0.999999197536605, iteration: 47734
loss: 1.0159157514572144,grad_norm: 0.9688754255890079, iteration: 47735
loss: 1.0147877931594849,grad_norm: 0.999999283623559, iteration: 47736
loss: 1.0066461563110352,grad_norm: 0.9123523662743142, iteration: 47737
loss: 0.9681966304779053,grad_norm: 0.9999990143239579, iteration: 47738
loss: 0.9981310963630676,grad_norm: 0.9943609313760864, iteration: 47739
loss: 1.0227245092391968,grad_norm: 0.9360353306875625, iteration: 47740
loss: 1.0193378925323486,grad_norm: 0.9999991679269093, iteration: 47741
loss: 1.0316053628921509,grad_norm: 0.9999990946346511, iteration: 47742
loss: 1.0182487964630127,grad_norm: 0.9821850459394863, iteration: 47743
loss: 0.996901273727417,grad_norm: 0.9822902653510268, iteration: 47744
loss: 1.0050475597381592,grad_norm: 0.8390823597155938, iteration: 47745
loss: 1.0154473781585693,grad_norm: 0.9999990887125355, iteration: 47746
loss: 1.005727767944336,grad_norm: 0.9999991554909229, iteration: 47747
loss: 0.9907940626144409,grad_norm: 0.9448988672865121, iteration: 47748
loss: 0.9839954376220703,grad_norm: 0.9999990599981302, iteration: 47749
loss: 0.9785096645355225,grad_norm: 0.9999992566423641, iteration: 47750
loss: 1.0011484622955322,grad_norm: 0.9999989809461992, iteration: 47751
loss: 1.0356615781784058,grad_norm: 0.9999990142552887, iteration: 47752
loss: 0.9934399127960205,grad_norm: 0.9999991043377191, iteration: 47753
loss: 1.0324702262878418,grad_norm: 0.9999995120063037, iteration: 47754
loss: 0.9922093152999878,grad_norm: 0.9999992255096323, iteration: 47755
loss: 1.0005394220352173,grad_norm: 0.9999988804636557, iteration: 47756
loss: 0.9756457209587097,grad_norm: 0.9999992152399423, iteration: 47757
loss: 0.9920420050621033,grad_norm: 0.9999990683157031, iteration: 47758
loss: 0.997527003288269,grad_norm: 0.9985985381797612, iteration: 47759
loss: 1.0034923553466797,grad_norm: 0.8438291382749606, iteration: 47760
loss: 1.0305650234222412,grad_norm: 0.9193989014777045, iteration: 47761
loss: 0.9646100997924805,grad_norm: 0.9999991960373841, iteration: 47762
loss: 0.9756277203559875,grad_norm: 0.9087261790877847, iteration: 47763
loss: 1.004654049873352,grad_norm: 0.9999990081399728, iteration: 47764
loss: 1.043558120727539,grad_norm: 0.937271713275762, iteration: 47765
loss: 1.0182037353515625,grad_norm: 0.9936009305547028, iteration: 47766
loss: 0.9489781260490417,grad_norm: 0.9920162314251462, iteration: 47767
loss: 1.003142237663269,grad_norm: 0.9908210318679224, iteration: 47768
loss: 1.0028899908065796,grad_norm: 0.9346171459939814, iteration: 47769
loss: 1.0231378078460693,grad_norm: 0.9999990963789971, iteration: 47770
loss: 0.9852399230003357,grad_norm: 0.969363592844719, iteration: 47771
loss: 1.0174216032028198,grad_norm: 0.999999021787534, iteration: 47772
loss: 0.9988479614257812,grad_norm: 0.9824906604684833, iteration: 47773
loss: 1.012416124343872,grad_norm: 0.9999992932637023, iteration: 47774
loss: 0.9760705232620239,grad_norm: 0.9999991884194034, iteration: 47775
loss: 1.0378648042678833,grad_norm: 0.9999991233409367, iteration: 47776
loss: 1.0306886434555054,grad_norm: 0.9999995878286011, iteration: 47777
loss: 1.0525161027908325,grad_norm: 0.9999992379146729, iteration: 47778
loss: 0.995583713054657,grad_norm: 0.9426734734026633, iteration: 47779
loss: 0.9942693710327148,grad_norm: 0.9999990273041576, iteration: 47780
loss: 1.1067743301391602,grad_norm: 0.9999994357902542, iteration: 47781
loss: 1.0338062047958374,grad_norm: 0.9999991985322203, iteration: 47782
loss: 1.0039759874343872,grad_norm: 0.9216284024718536, iteration: 47783
loss: 0.9752584099769592,grad_norm: 0.9751868522162038, iteration: 47784
loss: 0.9851422309875488,grad_norm: 0.9593235336645277, iteration: 47785
loss: 0.989571213722229,grad_norm: 0.8485225499539228, iteration: 47786
loss: 0.9785700440406799,grad_norm: 0.8623053554960941, iteration: 47787
loss: 0.9928438067436218,grad_norm: 0.9999990695159463, iteration: 47788
loss: 0.9661509990692139,grad_norm: 0.9347245759678362, iteration: 47789
loss: 1.0760910511016846,grad_norm: 0.9999994604101187, iteration: 47790
loss: 0.9578855633735657,grad_norm: 0.9999991844461834, iteration: 47791
loss: 1.043450117111206,grad_norm: 0.9999989678184699, iteration: 47792
loss: 0.991111159324646,grad_norm: 0.9999991493387512, iteration: 47793
loss: 0.9813390970230103,grad_norm: 0.999999075007047, iteration: 47794
loss: 0.9681974649429321,grad_norm: 0.9927520272639931, iteration: 47795
loss: 0.9823049902915955,grad_norm: 0.9286413824833706, iteration: 47796
loss: 1.0279980897903442,grad_norm: 0.9999996090551159, iteration: 47797
loss: 0.9992150664329529,grad_norm: 0.999999111379199, iteration: 47798
loss: 0.9882455468177795,grad_norm: 0.9096745043638099, iteration: 47799
loss: 1.0048645734786987,grad_norm: 0.9999991175263564, iteration: 47800
loss: 1.0013248920440674,grad_norm: 0.9999991130918461, iteration: 47801
loss: 0.9748870134353638,grad_norm: 0.8105638099537795, iteration: 47802
loss: 0.9877825379371643,grad_norm: 0.9781189109699803, iteration: 47803
loss: 1.001144528388977,grad_norm: 0.8315132286351057, iteration: 47804
loss: 0.9845163226127625,grad_norm: 0.9999991170640239, iteration: 47805
loss: 1.158859372138977,grad_norm: 0.9999997624784956, iteration: 47806
loss: 1.0038959980010986,grad_norm: 0.999999009309166, iteration: 47807
loss: 1.0298773050308228,grad_norm: 0.9999991096555169, iteration: 47808
loss: 1.0263781547546387,grad_norm: 0.945256012623492, iteration: 47809
loss: 1.0156763792037964,grad_norm: 0.8735931905893947, iteration: 47810
loss: 1.0028072595596313,grad_norm: 0.9999992698999388, iteration: 47811
loss: 0.9951214790344238,grad_norm: 0.9999990381068808, iteration: 47812
loss: 0.9980294704437256,grad_norm: 0.9687358657898105, iteration: 47813
loss: 0.9919820427894592,grad_norm: 0.8385848034514913, iteration: 47814
loss: 1.0126228332519531,grad_norm: 0.9999994439087562, iteration: 47815
loss: 0.9810085892677307,grad_norm: 0.9999991160346197, iteration: 47816
loss: 0.9916775822639465,grad_norm: 0.7769463488948896, iteration: 47817
loss: 1.024224877357483,grad_norm: 0.9999991470297617, iteration: 47818
loss: 1.1146882772445679,grad_norm: 0.9999995082342938, iteration: 47819
loss: 1.022108554840088,grad_norm: 0.9709236241671587, iteration: 47820
loss: 1.01805579662323,grad_norm: 0.9079334716569505, iteration: 47821
loss: 1.0057660341262817,grad_norm: 0.9999991227979496, iteration: 47822
loss: 0.9914133548736572,grad_norm: 0.9556783578355159, iteration: 47823
loss: 0.9983583688735962,grad_norm: 0.9638365839163278, iteration: 47824
loss: 0.9499773979187012,grad_norm: 0.8260441658493225, iteration: 47825
loss: 1.0121123790740967,grad_norm: 0.9999990683443403, iteration: 47826
loss: 0.9459391236305237,grad_norm: 0.9735920521829821, iteration: 47827
loss: 1.0200122594833374,grad_norm: 0.9999991076732286, iteration: 47828
loss: 1.0144027471542358,grad_norm: 0.9999990507790995, iteration: 47829
loss: 1.0261307954788208,grad_norm: 0.9999992049797993, iteration: 47830
loss: 1.009407877922058,grad_norm: 0.9999990738388219, iteration: 47831
loss: 1.016283631324768,grad_norm: 0.9999992380703481, iteration: 47832
loss: 0.9762742519378662,grad_norm: 0.9500660532629873, iteration: 47833
loss: 1.0129460096359253,grad_norm: 0.9999992434912536, iteration: 47834
loss: 1.0019216537475586,grad_norm: 0.9417970232005844, iteration: 47835
loss: 1.178311824798584,grad_norm: 0.9999998620174008, iteration: 47836
loss: 0.9947554469108582,grad_norm: 0.875484546112337, iteration: 47837
loss: 1.0243961811065674,grad_norm: 0.9999991180918348, iteration: 47838
loss: 1.0025042295455933,grad_norm: 0.999999276357305, iteration: 47839
loss: 0.9883731603622437,grad_norm: 0.9999990321510835, iteration: 47840
loss: 1.0445696115493774,grad_norm: 0.9999992247512085, iteration: 47841
loss: 1.0023893117904663,grad_norm: 0.9999991124124874, iteration: 47842
loss: 1.024238109588623,grad_norm: 0.9999993920583067, iteration: 47843
loss: 0.9862855672836304,grad_norm: 0.9999991629031674, iteration: 47844
loss: 1.0279223918914795,grad_norm: 0.9999990198493597, iteration: 47845
loss: 0.9713114500045776,grad_norm: 0.9061879940214258, iteration: 47846
loss: 1.0208643674850464,grad_norm: 0.9999990622208536, iteration: 47847
loss: 1.0389432907104492,grad_norm: 0.9999991816452898, iteration: 47848
loss: 1.0429645776748657,grad_norm: 0.9999991101933718, iteration: 47849
loss: 0.9675848484039307,grad_norm: 0.9916860487801823, iteration: 47850
loss: 1.0201624631881714,grad_norm: 0.83390534830386, iteration: 47851
loss: 1.0106096267700195,grad_norm: 0.898344521129865, iteration: 47852
loss: 1.0231784582138062,grad_norm: 0.9690385396384856, iteration: 47853
loss: 1.0195263624191284,grad_norm: 0.95564768639783, iteration: 47854
loss: 0.9983901381492615,grad_norm: 0.9999990218931498, iteration: 47855
loss: 1.0040512084960938,grad_norm: 0.876499013124671, iteration: 47856
loss: 0.9977777004241943,grad_norm: 0.9999991721652647, iteration: 47857
loss: 1.003589153289795,grad_norm: 0.9999992022916692, iteration: 47858
loss: 0.9648076295852661,grad_norm: 0.9039096381699001, iteration: 47859
loss: 0.9979690909385681,grad_norm: 0.9758656666387386, iteration: 47860
loss: 0.986672043800354,grad_norm: 0.9999990586893076, iteration: 47861
loss: 1.0026586055755615,grad_norm: 0.9999991471990262, iteration: 47862
loss: 0.9707968235015869,grad_norm: 0.9999990618883196, iteration: 47863
loss: 1.0176945924758911,grad_norm: 0.9999992638111644, iteration: 47864
loss: 0.9728792905807495,grad_norm: 0.9584155900619045, iteration: 47865
loss: 1.0284738540649414,grad_norm: 0.9368730381427173, iteration: 47866
loss: 0.9877803325653076,grad_norm: 0.9999989451256733, iteration: 47867
loss: 1.0433789491653442,grad_norm: 0.9999993742414283, iteration: 47868
loss: 0.971066415309906,grad_norm: 0.9579082889573579, iteration: 47869
loss: 1.0166773796081543,grad_norm: 0.9999992567106943, iteration: 47870
loss: 0.9728971123695374,grad_norm: 0.971082556795035, iteration: 47871
loss: 0.9989334940910339,grad_norm: 0.9999991855284123, iteration: 47872
loss: 0.9916090965270996,grad_norm: 0.9999990092040704, iteration: 47873
loss: 1.0328859090805054,grad_norm: 0.9716486365140198, iteration: 47874
loss: 1.0284026861190796,grad_norm: 0.9999992279635384, iteration: 47875
loss: 0.9972822666168213,grad_norm: 0.9165196356857476, iteration: 47876
loss: 1.0372881889343262,grad_norm: 0.9405072627875394, iteration: 47877
loss: 0.9905083775520325,grad_norm: 0.999999004944001, iteration: 47878
loss: 1.0305759906768799,grad_norm: 0.9682911999390653, iteration: 47879
loss: 1.0226833820343018,grad_norm: 0.8801058290836847, iteration: 47880
loss: 1.0004019737243652,grad_norm: 0.9234328699617267, iteration: 47881
loss: 1.0321871042251587,grad_norm: 0.9657344116653712, iteration: 47882
loss: 1.0230273008346558,grad_norm: 0.9999993666050899, iteration: 47883
loss: 0.9831712245941162,grad_norm: 0.9457941382238567, iteration: 47884
loss: 1.0301687717437744,grad_norm: 0.9291073408271577, iteration: 47885
loss: 1.0369511842727661,grad_norm: 0.9963071377649996, iteration: 47886
loss: 0.987141489982605,grad_norm: 0.9999990775950166, iteration: 47887
loss: 0.9768900275230408,grad_norm: 0.9077985257933872, iteration: 47888
loss: 0.9849646091461182,grad_norm: 0.9794791407964385, iteration: 47889
loss: 0.9931206703186035,grad_norm: 0.999999431331043, iteration: 47890
loss: 0.9788065552711487,grad_norm: 0.9999992533406771, iteration: 47891
loss: 1.0168911218643188,grad_norm: 0.999999734811181, iteration: 47892
loss: 1.0456162691116333,grad_norm: 0.9999993240196182, iteration: 47893
loss: 1.0196503400802612,grad_norm: 0.9999995690914828, iteration: 47894
loss: 1.0415724515914917,grad_norm: 0.9999990593312544, iteration: 47895
loss: 0.971639096736908,grad_norm: 0.9245047098184136, iteration: 47896
loss: 1.0266939401626587,grad_norm: 0.9999991445811995, iteration: 47897
loss: 0.9807169437408447,grad_norm: 0.8968192908417146, iteration: 47898
loss: 1.0236269235610962,grad_norm: 0.9999993253649347, iteration: 47899
loss: 0.9925851821899414,grad_norm: 0.9999991095586352, iteration: 47900
loss: 1.0175665616989136,grad_norm: 0.9999991205629054, iteration: 47901
loss: 0.9760696887969971,grad_norm: 0.9999992352712176, iteration: 47902
loss: 0.9871234893798828,grad_norm: 0.9772081831914396, iteration: 47903
loss: 1.0067684650421143,grad_norm: 0.9538021857187093, iteration: 47904
loss: 1.0228826999664307,grad_norm: 0.9999991442383909, iteration: 47905
loss: 1.0008187294006348,grad_norm: 0.9999990380042314, iteration: 47906
loss: 1.0168695449829102,grad_norm: 0.8609992008412827, iteration: 47907
loss: 0.9825992584228516,grad_norm: 0.8836195367399733, iteration: 47908
loss: 1.017815113067627,grad_norm: 0.9999990772447088, iteration: 47909
loss: 1.0069931745529175,grad_norm: 0.9210913628226984, iteration: 47910
loss: 1.023574948310852,grad_norm: 0.9999995660638231, iteration: 47911
loss: 1.0235347747802734,grad_norm: 0.9999994610304703, iteration: 47912
loss: 1.114405632019043,grad_norm: 0.9999994904496505, iteration: 47913
loss: 1.0342837572097778,grad_norm: 0.9210170094918383, iteration: 47914
loss: 1.032049536705017,grad_norm: 0.9999991667059211, iteration: 47915
loss: 1.0264545679092407,grad_norm: 0.9999990586298466, iteration: 47916
loss: 0.984241247177124,grad_norm: 0.9026962796787028, iteration: 47917
loss: 0.9792057871818542,grad_norm: 0.9999990154749738, iteration: 47918
loss: 1.020500659942627,grad_norm: 0.999999001532899, iteration: 47919
loss: 1.0099341869354248,grad_norm: 0.9999991822395325, iteration: 47920
loss: 0.9709630608558655,grad_norm: 0.9999991402296277, iteration: 47921
loss: 1.007907748222351,grad_norm: 0.9999991234330411, iteration: 47922
loss: 0.9863851070404053,grad_norm: 0.8873906756914572, iteration: 47923
loss: 1.0339162349700928,grad_norm: 0.9999993428278968, iteration: 47924
loss: 1.0221424102783203,grad_norm: 0.9999991991957881, iteration: 47925
loss: 1.0247911214828491,grad_norm: 0.999999196276938, iteration: 47926
loss: 1.0008498430252075,grad_norm: 0.9999998402698531, iteration: 47927
loss: 1.0242446660995483,grad_norm: 0.9244817100676597, iteration: 47928
loss: 0.9773902297019958,grad_norm: 0.9679973774769053, iteration: 47929
loss: 1.0008167028427124,grad_norm: 0.9520215013906012, iteration: 47930
loss: 1.016454815864563,grad_norm: 0.9991452339931419, iteration: 47931
loss: 1.0045270919799805,grad_norm: 0.9999992271078438, iteration: 47932
loss: 0.9904928207397461,grad_norm: 0.9999992592103323, iteration: 47933
loss: 0.9952117800712585,grad_norm: 0.7153490633460224, iteration: 47934
loss: 0.9835497140884399,grad_norm: 0.9301132203686134, iteration: 47935
loss: 0.9956342577934265,grad_norm: 0.9822506429018865, iteration: 47936
loss: 0.9808608293533325,grad_norm: 0.999999120974588, iteration: 47937
loss: 1.0180177688598633,grad_norm: 0.9999995888360913, iteration: 47938
loss: 1.0116596221923828,grad_norm: 0.9999991996110524, iteration: 47939
loss: 1.018876314163208,grad_norm: 0.9999992018963221, iteration: 47940
loss: 0.9902938008308411,grad_norm: 0.9904714345044973, iteration: 47941
loss: 0.9711290597915649,grad_norm: 0.9999991173761504, iteration: 47942
loss: 0.9964704513549805,grad_norm: 0.9338272542317736, iteration: 47943
loss: 1.0150657892227173,grad_norm: 0.870417345106464, iteration: 47944
loss: 1.007348895072937,grad_norm: 0.8574068788998349, iteration: 47945
loss: 0.9923750162124634,grad_norm: 0.9999990945177356, iteration: 47946
loss: 1.0047749280929565,grad_norm: 0.9208920986615664, iteration: 47947
loss: 1.0326130390167236,grad_norm: 0.9999991242531677, iteration: 47948
loss: 1.0411767959594727,grad_norm: 0.9999991541271392, iteration: 47949
loss: 1.0108164548873901,grad_norm: 0.9999990760025078, iteration: 47950
loss: 0.990634560585022,grad_norm: 0.9999993598322001, iteration: 47951
loss: 1.008616328239441,grad_norm: 0.9906241563317428, iteration: 47952
loss: 1.0182886123657227,grad_norm: 0.9999991565415036, iteration: 47953
loss: 1.017656922340393,grad_norm: 0.9999991190034593, iteration: 47954
loss: 0.9779985547065735,grad_norm: 0.9414117154879968, iteration: 47955
loss: 1.0016281604766846,grad_norm: 0.9999992090122154, iteration: 47956
loss: 1.0254082679748535,grad_norm: 0.9999988788117755, iteration: 47957
loss: 1.0059738159179688,grad_norm: 0.9999989807698114, iteration: 47958
loss: 0.9797624945640564,grad_norm: 0.9999992166916133, iteration: 47959
loss: 1.0265223979949951,grad_norm: 0.9999990368075858, iteration: 47960
loss: 1.0130691528320312,grad_norm: 0.9999992478132722, iteration: 47961
loss: 1.0413140058517456,grad_norm: 0.9999992073892638, iteration: 47962
loss: 1.0044044256210327,grad_norm: 0.9999990858663964, iteration: 47963
loss: 1.0366884469985962,grad_norm: 0.9999991294269814, iteration: 47964
loss: 0.9946975111961365,grad_norm: 0.9500714518949462, iteration: 47965
loss: 0.993247926235199,grad_norm: 0.9480732989109348, iteration: 47966
loss: 0.9876645803451538,grad_norm: 0.9999991019289187, iteration: 47967
loss: 1.0237276554107666,grad_norm: 0.9999990319781283, iteration: 47968
loss: 1.0084986686706543,grad_norm: 0.9999991535524757, iteration: 47969
loss: 1.0082144737243652,grad_norm: 0.9075690848247009, iteration: 47970
loss: 0.9985629320144653,grad_norm: 0.9999991872234223, iteration: 47971
loss: 0.9861583709716797,grad_norm: 0.9999992793841652, iteration: 47972
loss: 1.0167148113250732,grad_norm: 0.8742693470254359, iteration: 47973
loss: 1.0332727432250977,grad_norm: 0.9645782126436458, iteration: 47974
loss: 1.0057239532470703,grad_norm: 0.9526536116181276, iteration: 47975
loss: 1.0001641511917114,grad_norm: 0.9215048453113657, iteration: 47976
loss: 0.982663094997406,grad_norm: 0.8546444837010678, iteration: 47977
loss: 1.0278605222702026,grad_norm: 0.9999991217269826, iteration: 47978
loss: 0.9977381825447083,grad_norm: 0.9999991025667585, iteration: 47979
loss: 1.0073232650756836,grad_norm: 0.9999991641022514, iteration: 47980
loss: 0.9633275866508484,grad_norm: 0.9578696519157213, iteration: 47981
loss: 1.0044598579406738,grad_norm: 0.9999989944810803, iteration: 47982
loss: 1.0527971982955933,grad_norm: 0.885512954378441, iteration: 47983
loss: 1.0105291604995728,grad_norm: 0.9999992045345386, iteration: 47984
loss: 0.9662050008773804,grad_norm: 0.9999989662417708, iteration: 47985
loss: 0.9708501696586609,grad_norm: 0.9999990189072192, iteration: 47986
loss: 1.0197367668151855,grad_norm: 0.9999991661002017, iteration: 47987
loss: 0.9685077667236328,grad_norm: 0.9999990227243633, iteration: 47988
loss: 0.9802420139312744,grad_norm: 0.8120154092765728, iteration: 47989
loss: 1.010146975517273,grad_norm: 0.9999990232864238, iteration: 47990
loss: 1.0094075202941895,grad_norm: 0.9999990296748242, iteration: 47991
loss: 0.9869227409362793,grad_norm: 0.9999991299748989, iteration: 47992
loss: 1.0160682201385498,grad_norm: 0.9999992323371586, iteration: 47993
loss: 1.0313867330551147,grad_norm: 0.9999990967922923, iteration: 47994
loss: 1.034017562866211,grad_norm: 0.9999989872889676, iteration: 47995
loss: 1.0125963687896729,grad_norm: 0.9310351873640698, iteration: 47996
loss: 0.9834553599357605,grad_norm: 0.9999991673229658, iteration: 47997
loss: 1.032798171043396,grad_norm: 0.9999992257740591, iteration: 47998
loss: 1.0085525512695312,grad_norm: 0.8823356952552581, iteration: 47999
loss: 1.006221890449524,grad_norm: 0.9999992703333148, iteration: 48000
loss: 0.9928469061851501,grad_norm: 0.997364014215545, iteration: 48001
loss: 0.9971289038658142,grad_norm: 0.9820581649621275, iteration: 48002
loss: 1.015071153640747,grad_norm: 0.9763291482205273, iteration: 48003
loss: 1.0243494510650635,grad_norm: 0.9999990689185619, iteration: 48004
loss: 1.000288724899292,grad_norm: 0.9999994526626742, iteration: 48005
loss: 0.9967501759529114,grad_norm: 0.9392096705879892, iteration: 48006
loss: 1.004227876663208,grad_norm: 0.999999088949978, iteration: 48007
loss: 1.0312895774841309,grad_norm: 0.999998985740986, iteration: 48008
loss: 1.026232361793518,grad_norm: 0.999999103827197, iteration: 48009
loss: 0.9900625944137573,grad_norm: 0.8929413104903342, iteration: 48010
loss: 0.996412992477417,grad_norm: 0.9376065174601341, iteration: 48011
loss: 0.9874338507652283,grad_norm: 0.9928233353084284, iteration: 48012
loss: 0.9987345933914185,grad_norm: 0.999999148917498, iteration: 48013
loss: 1.0258445739746094,grad_norm: 0.9999993194282087, iteration: 48014
loss: 1.0246033668518066,grad_norm: 0.9281219236706185, iteration: 48015
loss: 0.9880244731903076,grad_norm: 0.9273385968811858, iteration: 48016
loss: 1.0297391414642334,grad_norm: 0.9999992159418984, iteration: 48017
loss: 0.9820803999900818,grad_norm: 0.8180775960088191, iteration: 48018
loss: 0.9929858446121216,grad_norm: 0.9999991397600326, iteration: 48019
loss: 1.033047080039978,grad_norm: 0.9322138591112064, iteration: 48020
loss: 1.0114624500274658,grad_norm: 0.9659392890950425, iteration: 48021
loss: 1.0153313875198364,grad_norm: 0.9903068433692034, iteration: 48022
loss: 1.012967824935913,grad_norm: 0.9706334382774184, iteration: 48023
loss: 1.0140125751495361,grad_norm: 0.8326283065549204, iteration: 48024
loss: 0.9960179328918457,grad_norm: 0.9009552315427907, iteration: 48025
loss: 0.9843283295631409,grad_norm: 0.9999992644436221, iteration: 48026
loss: 0.9660027623176575,grad_norm: 0.9892043279554327, iteration: 48027
loss: 0.9979814291000366,grad_norm: 0.9999996168093926, iteration: 48028
loss: 0.9832594394683838,grad_norm: 0.9042369244118117, iteration: 48029
loss: 0.9661617875099182,grad_norm: 0.9999990984610624, iteration: 48030
loss: 0.976940393447876,grad_norm: 0.9022330766883172, iteration: 48031
loss: 1.0474752187728882,grad_norm: 0.8957728720419783, iteration: 48032
loss: 0.9970794320106506,grad_norm: 0.999999010983233, iteration: 48033
loss: 1.02374267578125,grad_norm: 0.9768538320333461, iteration: 48034
loss: 1.0102118253707886,grad_norm: 0.9607673966005531, iteration: 48035
loss: 1.0074039697647095,grad_norm: 0.8347423904946506, iteration: 48036
loss: 1.0180665254592896,grad_norm: 0.937071441275967, iteration: 48037
loss: 0.9798193573951721,grad_norm: 0.9999990824692693, iteration: 48038
loss: 1.0189228057861328,grad_norm: 0.9999993166464001, iteration: 48039
loss: 0.9976335763931274,grad_norm: 0.9778952979006713, iteration: 48040
loss: 1.0289705991744995,grad_norm: 0.9999992451460914, iteration: 48041
loss: 0.9832828044891357,grad_norm: 0.9403819568650796, iteration: 48042
loss: 1.0021964311599731,grad_norm: 0.9999990958727915, iteration: 48043
loss: 0.9877116084098816,grad_norm: 0.8821560049139, iteration: 48044
loss: 0.9965236186981201,grad_norm: 0.9999991382899251, iteration: 48045
loss: 1.1050779819488525,grad_norm: 0.9999994755017123, iteration: 48046
loss: 0.956976056098938,grad_norm: 0.9999991912399286, iteration: 48047
loss: 0.9656693935394287,grad_norm: 0.9999990674266708, iteration: 48048
loss: 0.9549366235733032,grad_norm: 0.9049022088800422, iteration: 48049
loss: 1.0142267942428589,grad_norm: 0.999999135739995, iteration: 48050
loss: 0.9847585558891296,grad_norm: 0.9156772831898807, iteration: 48051
loss: 0.9797316789627075,grad_norm: 0.9999991534183817, iteration: 48052
loss: 1.0157482624053955,grad_norm: 0.922644633880554, iteration: 48053
loss: 1.0313447713851929,grad_norm: 0.9999990127564371, iteration: 48054
loss: 0.9906436204910278,grad_norm: 0.9851742273992588, iteration: 48055
loss: 0.9861819744110107,grad_norm: 0.980732707616211, iteration: 48056
loss: 1.0226714611053467,grad_norm: 0.8786335253385467, iteration: 48057
loss: 1.076006531715393,grad_norm: 0.999999542820701, iteration: 48058
loss: 1.0332869291305542,grad_norm: 0.9999991496874067, iteration: 48059
loss: 0.9919666051864624,grad_norm: 0.9999992218008249, iteration: 48060
loss: 1.0136253833770752,grad_norm: 0.9999989690867641, iteration: 48061
loss: 1.038800835609436,grad_norm: 0.9999990962817741, iteration: 48062
loss: 1.045356035232544,grad_norm: 0.9999990072904074, iteration: 48063
loss: 1.165470838546753,grad_norm: 0.9999999371811699, iteration: 48064
loss: 0.997694194316864,grad_norm: 0.9999990596089906, iteration: 48065
loss: 1.0339428186416626,grad_norm: 0.9454109565408779, iteration: 48066
loss: 0.959330677986145,grad_norm: 0.999999260899501, iteration: 48067
loss: 0.9991499781608582,grad_norm: 0.9999990408562666, iteration: 48068
loss: 1.020500898361206,grad_norm: 0.9030930753397978, iteration: 48069
loss: 0.9855340123176575,grad_norm: 0.9999993276102509, iteration: 48070
loss: 0.9965382218360901,grad_norm: 0.9999990366434055, iteration: 48071
loss: 1.0001847743988037,grad_norm: 0.9999991742618752, iteration: 48072
loss: 0.9871724247932434,grad_norm: 0.9469739097344956, iteration: 48073
loss: 1.0084456205368042,grad_norm: 0.7996055133613821, iteration: 48074
loss: 1.0315594673156738,grad_norm: 0.9999995933562048, iteration: 48075
loss: 1.010671854019165,grad_norm: 0.9828559910093975, iteration: 48076
loss: 0.9970505833625793,grad_norm: 0.9114973272779129, iteration: 48077
loss: 1.0688464641571045,grad_norm: 0.9999991265204266, iteration: 48078
loss: 0.9924839735031128,grad_norm: 0.9999989704788218, iteration: 48079
loss: 1.0228036642074585,grad_norm: 0.9999993757940139, iteration: 48080
loss: 0.9795985817909241,grad_norm: 0.9122007886203088, iteration: 48081
loss: 0.9999998807907104,grad_norm: 0.953419429155642, iteration: 48082
loss: 0.9794276356697083,grad_norm: 0.8771809485498362, iteration: 48083
loss: 1.012169599533081,grad_norm: 0.9913662443144222, iteration: 48084
loss: 0.9587908983230591,grad_norm: 0.9219653254989262, iteration: 48085
loss: 1.0044697523117065,grad_norm: 0.9268722203773242, iteration: 48086
loss: 1.0039479732513428,grad_norm: 0.8532164266738288, iteration: 48087
loss: 1.0433861017227173,grad_norm: 0.9999991280329699, iteration: 48088
loss: 0.9974546432495117,grad_norm: 0.8606016379476251, iteration: 48089
loss: 0.9905092120170593,grad_norm: 0.89542561726603, iteration: 48090
loss: 1.106109857559204,grad_norm: 0.999999521196332, iteration: 48091
loss: 0.9964532256126404,grad_norm: 0.9999990942712851, iteration: 48092
loss: 1.018000841140747,grad_norm: 0.9760168153601353, iteration: 48093
loss: 0.9747127294540405,grad_norm: 0.8960032385143886, iteration: 48094
loss: 0.9751666784286499,grad_norm: 0.8355263978712203, iteration: 48095
loss: 1.01300847530365,grad_norm: 0.9999996204861152, iteration: 48096
loss: 0.9711061716079712,grad_norm: 0.7917594256219803, iteration: 48097
loss: 1.0091605186462402,grad_norm: 0.9302343579177506, iteration: 48098
loss: 0.9937339425086975,grad_norm: 0.987838651148373, iteration: 48099
loss: 1.0266493558883667,grad_norm: 0.9999991879358497, iteration: 48100
loss: 0.9876925945281982,grad_norm: 0.8475432776566119, iteration: 48101
loss: 1.0259780883789062,grad_norm: 0.9999990731001691, iteration: 48102
loss: 1.0525497198104858,grad_norm: 0.9999991119858599, iteration: 48103
loss: 1.0302785634994507,grad_norm: 0.9999991245627476, iteration: 48104
loss: 0.9756495356559753,grad_norm: 0.9462092712894209, iteration: 48105
loss: 1.0285146236419678,grad_norm: 0.999999208336693, iteration: 48106
loss: 0.9859251976013184,grad_norm: 0.9999990628343198, iteration: 48107
loss: 1.0224659442901611,grad_norm: 0.7891297080590935, iteration: 48108
loss: 0.9876720309257507,grad_norm: 0.9999990546736129, iteration: 48109
loss: 1.02176034450531,grad_norm: 0.9999992295791057, iteration: 48110
loss: 1.0272289514541626,grad_norm: 0.8657131069868139, iteration: 48111
loss: 1.0392093658447266,grad_norm: 0.9999991786026368, iteration: 48112
loss: 1.0261553525924683,grad_norm: 0.9999991501457889, iteration: 48113
loss: 1.0068110227584839,grad_norm: 0.9916681868747992, iteration: 48114
loss: 0.9773141741752625,grad_norm: 0.8796284396968553, iteration: 48115
loss: 0.9995048642158508,grad_norm: 0.9999989820877316, iteration: 48116
loss: 0.9916813373565674,grad_norm: 0.9682427132115972, iteration: 48117
loss: 1.0127075910568237,grad_norm: 0.9999992951937368, iteration: 48118
loss: 1.0134210586547852,grad_norm: 0.9132330558543157, iteration: 48119
loss: 1.0258007049560547,grad_norm: 0.9999990570830833, iteration: 48120
loss: 0.9976407885551453,grad_norm: 0.89746781398514, iteration: 48121
loss: 0.9707031846046448,grad_norm: 0.9798936809166782, iteration: 48122
loss: 1.0064152479171753,grad_norm: 0.999999033762801, iteration: 48123
loss: 0.9916893243789673,grad_norm: 0.9999990778129969, iteration: 48124
loss: 0.98213791847229,grad_norm: 0.9999991243726714, iteration: 48125
loss: 1.0338016748428345,grad_norm: 0.9999990378624477, iteration: 48126
loss: 1.0157533884048462,grad_norm: 0.9999991633358525, iteration: 48127
loss: 1.0504902601242065,grad_norm: 1.000000001087061, iteration: 48128
loss: 1.0150820016860962,grad_norm: 0.9835025200816159, iteration: 48129
loss: 0.9826303124427795,grad_norm: 0.9999991643761237, iteration: 48130
loss: 1.0178937911987305,grad_norm: 0.9999991533112395, iteration: 48131
loss: 1.0006846189498901,grad_norm: 0.8121587763700532, iteration: 48132
loss: 1.0205951929092407,grad_norm: 0.999999051844101, iteration: 48133
loss: 0.9864173531532288,grad_norm: 0.8936554483661253, iteration: 48134
loss: 1.0227912664413452,grad_norm: 0.9054636188541921, iteration: 48135
loss: 1.0259071588516235,grad_norm: 0.9999991335749338, iteration: 48136
loss: 1.0086290836334229,grad_norm: 0.9827547481701103, iteration: 48137
loss: 1.0125046968460083,grad_norm: 0.999999658398115, iteration: 48138
loss: 1.028809666633606,grad_norm: 0.9999992072756441, iteration: 48139
loss: 1.0207136869430542,grad_norm: 0.879717040856628, iteration: 48140
loss: 0.9792012572288513,grad_norm: 0.8045617027481664, iteration: 48141
loss: 1.0276280641555786,grad_norm: 0.9999990753995106, iteration: 48142
loss: 1.0112992525100708,grad_norm: 0.999999229985961, iteration: 48143
loss: 0.9944030046463013,grad_norm: 0.945851142081015, iteration: 48144
loss: 1.0495991706848145,grad_norm: 0.9999990433939162, iteration: 48145
loss: 0.9748515486717224,grad_norm: 0.9999992718820668, iteration: 48146
loss: 1.0607730150222778,grad_norm: 0.8967972326711265, iteration: 48147
loss: 1.0213819742202759,grad_norm: 0.9999993372329177, iteration: 48148
loss: 1.0241690874099731,grad_norm: 0.938313508924896, iteration: 48149
loss: 0.9942216873168945,grad_norm: 0.9999992815435896, iteration: 48150
loss: 1.0088921785354614,grad_norm: 0.865040387455708, iteration: 48151
loss: 1.0376673936843872,grad_norm: 0.9999991174605081, iteration: 48152
loss: 0.984656810760498,grad_norm: 0.930091492645174, iteration: 48153
loss: 1.012982726097107,grad_norm: 0.9999991121923517, iteration: 48154
loss: 0.9980365037918091,grad_norm: 0.9999991896561041, iteration: 48155
loss: 1.000327467918396,grad_norm: 0.9999992361096259, iteration: 48156
loss: 0.9951356649398804,grad_norm: 0.9943014441351827, iteration: 48157
loss: 0.9784529805183411,grad_norm: 0.8483618759250042, iteration: 48158
loss: 0.9777876734733582,grad_norm: 0.9999990420076298, iteration: 48159
loss: 1.0149996280670166,grad_norm: 0.9999992016078122, iteration: 48160
loss: 0.9831677675247192,grad_norm: 0.9999997656163536, iteration: 48161
loss: 1.0019761323928833,grad_norm: 0.9999991826650395, iteration: 48162
loss: 1.005037784576416,grad_norm: 0.8728337551063614, iteration: 48163
loss: 0.9920765161514282,grad_norm: 0.9999992205304166, iteration: 48164
loss: 1.0083342790603638,grad_norm: 0.9999989474336458, iteration: 48165
loss: 1.024893879890442,grad_norm: 0.9931109329470957, iteration: 48166
loss: 1.0248910188674927,grad_norm: 0.9999993599031614, iteration: 48167
loss: 1.0403497219085693,grad_norm: 0.9999999290114941, iteration: 48168
loss: 1.0535389184951782,grad_norm: 0.9999991414171605, iteration: 48169
loss: 0.9922089576721191,grad_norm: 0.9999991378642339, iteration: 48170
loss: 0.9938710331916809,grad_norm: 0.879901092337919, iteration: 48171
loss: 0.9984667897224426,grad_norm: 0.9708019850973241, iteration: 48172
loss: 1.0265874862670898,grad_norm: 0.9999991974282983, iteration: 48173
loss: 0.9884052872657776,grad_norm: 0.9999991207225962, iteration: 48174
loss: 1.0220248699188232,grad_norm: 0.9999990570783661, iteration: 48175
loss: 1.0134086608886719,grad_norm: 0.9999994374979375, iteration: 48176
loss: 0.9836333990097046,grad_norm: 0.9999989937465799, iteration: 48177
loss: 0.996370255947113,grad_norm: 0.9820325137489164, iteration: 48178
loss: 0.9831967949867249,grad_norm: 0.9056846670625397, iteration: 48179
loss: 1.0166913270950317,grad_norm: 0.9999991638179146, iteration: 48180
loss: 0.9968942403793335,grad_norm: 0.8861752094858119, iteration: 48181
loss: 1.011096715927124,grad_norm: 0.9999991222336202, iteration: 48182
loss: 1.022039532661438,grad_norm: 0.9999995689335947, iteration: 48183
loss: 1.0119739770889282,grad_norm: 0.9461182973415572, iteration: 48184
loss: 0.972199559211731,grad_norm: 0.9635123894477884, iteration: 48185
loss: 0.9920207858085632,grad_norm: 0.980406199148352, iteration: 48186
loss: 0.9979734420776367,grad_norm: 0.9999990650172819, iteration: 48187
loss: 1.0017603635787964,grad_norm: 0.908271710036967, iteration: 48188
loss: 0.9589004516601562,grad_norm: 0.9999989940163936, iteration: 48189
loss: 1.0280827283859253,grad_norm: 0.9999996388183184, iteration: 48190
loss: 0.9853731393814087,grad_norm: 0.972267359179892, iteration: 48191
loss: 1.0388646125793457,grad_norm: 0.8657069960916429, iteration: 48192
loss: 1.040602207183838,grad_norm: 0.9999990685641879, iteration: 48193
loss: 1.0419085025787354,grad_norm: 0.9999991029614599, iteration: 48194
loss: 0.9789236187934875,grad_norm: 0.9999990996101538, iteration: 48195
loss: 1.0293184518814087,grad_norm: 0.9999996244225526, iteration: 48196
loss: 1.0315037965774536,grad_norm: 0.883086340433173, iteration: 48197
loss: 0.9398682117462158,grad_norm: 0.9999992569141132, iteration: 48198
loss: 0.9879810810089111,grad_norm: 0.9999991266173713, iteration: 48199
loss: 1.0068995952606201,grad_norm: 0.9089096278245568, iteration: 48200
loss: 0.9926307201385498,grad_norm: 0.9999990834466342, iteration: 48201
loss: 1.0224851369857788,grad_norm: 0.9999990964495815, iteration: 48202
loss: 0.9667828679084778,grad_norm: 0.999999131135384, iteration: 48203
loss: 1.0100977420806885,grad_norm: 0.969849838623204, iteration: 48204
loss: 1.024617314338684,grad_norm: 0.9999990773512537, iteration: 48205
loss: 1.0235909223556519,grad_norm: 0.9743178876500431, iteration: 48206
loss: 0.9928582310676575,grad_norm: 0.9999990250515761, iteration: 48207
loss: 0.9711925387382507,grad_norm: 0.9846039053329563, iteration: 48208
loss: 1.023946762084961,grad_norm: 0.9999992366116135, iteration: 48209
loss: 1.0017681121826172,grad_norm: 0.9999990530392036, iteration: 48210
loss: 1.029962420463562,grad_norm: 0.9277762969721656, iteration: 48211
loss: 1.029232382774353,grad_norm: 0.9999991399789706, iteration: 48212
loss: 1.0074231624603271,grad_norm: 0.9999989865833209, iteration: 48213
loss: 1.0307974815368652,grad_norm: 0.9368381405212222, iteration: 48214
loss: 1.013420581817627,grad_norm: 0.9502701000166109, iteration: 48215
loss: 0.9823467135429382,grad_norm: 0.9746118184402122, iteration: 48216
loss: 1.0163381099700928,grad_norm: 0.9999993678606087, iteration: 48217
loss: 0.9811628460884094,grad_norm: 0.9878458757927261, iteration: 48218
loss: 0.9728077054023743,grad_norm: 0.9050495788505076, iteration: 48219
loss: 1.014987826347351,grad_norm: 0.9999990773813017, iteration: 48220
loss: 1.001934289932251,grad_norm: 0.9682450338104733, iteration: 48221
loss: 1.0407530069351196,grad_norm: 0.9999992687032645, iteration: 48222
loss: 0.9975108504295349,grad_norm: 0.896435081572112, iteration: 48223
loss: 1.0716036558151245,grad_norm: 0.9999992686542749, iteration: 48224
loss: 1.0120505094528198,grad_norm: 0.9999990746385579, iteration: 48225
loss: 1.0275146961212158,grad_norm: 0.9999992030042494, iteration: 48226
loss: 1.012627124786377,grad_norm: 0.99999961497232, iteration: 48227
loss: 1.0488321781158447,grad_norm: 0.9999991719895476, iteration: 48228
loss: 0.9841659069061279,grad_norm: 0.9054634615011817, iteration: 48229
loss: 1.0066580772399902,grad_norm: 0.9999992087995119, iteration: 48230
loss: 1.0174916982650757,grad_norm: 0.999999262478437, iteration: 48231
loss: 0.9884545207023621,grad_norm: 0.9861312419864873, iteration: 48232
loss: 1.0085080862045288,grad_norm: 0.9999990862156917, iteration: 48233
loss: 0.9911676645278931,grad_norm: 0.9999990440931278, iteration: 48234
loss: 0.9910047650337219,grad_norm: 0.9999991389411954, iteration: 48235
loss: 1.0097335577011108,grad_norm: 0.9371790392337797, iteration: 48236
loss: 1.0033900737762451,grad_norm: 0.9907601183181406, iteration: 48237
loss: 0.9705649018287659,grad_norm: 0.9999991366132843, iteration: 48238
loss: 0.9976738691329956,grad_norm: 0.9999990094312621, iteration: 48239
loss: 0.9942064881324768,grad_norm: 0.9383888825420214, iteration: 48240
loss: 0.9688946008682251,grad_norm: 0.9999992207618636, iteration: 48241
loss: 1.001063585281372,grad_norm: 0.8885904647325502, iteration: 48242
loss: 1.0131551027297974,grad_norm: 0.922287622145276, iteration: 48243
loss: 0.987553596496582,grad_norm: 0.9436850824152717, iteration: 48244
loss: 1.0232278108596802,grad_norm: 0.9665193713536916, iteration: 48245
loss: 1.0101081132888794,grad_norm: 0.9999995066494345, iteration: 48246
loss: 1.0091228485107422,grad_norm: 0.9039551223097684, iteration: 48247
loss: 1.001957893371582,grad_norm: 0.9823012578573438, iteration: 48248
loss: 1.0043717622756958,grad_norm: 0.9999990475862212, iteration: 48249
loss: 0.9925593733787537,grad_norm: 0.9175833701905047, iteration: 48250
loss: 1.0187253952026367,grad_norm: 0.9116721575991229, iteration: 48251
loss: 0.9725980758666992,grad_norm: 0.9381267651091715, iteration: 48252
loss: 1.0099918842315674,grad_norm: 0.830693125186754, iteration: 48253
loss: 0.9863507151603699,grad_norm: 0.9999992088018667, iteration: 48254
loss: 1.0371307134628296,grad_norm: 0.9156632646997817, iteration: 48255
loss: 1.0086802244186401,grad_norm: 0.8818822099206132, iteration: 48256
loss: 0.985127329826355,grad_norm: 0.9309669741893416, iteration: 48257
loss: 1.0151238441467285,grad_norm: 0.9999991252126117, iteration: 48258
loss: 1.011479377746582,grad_norm: 0.9999990122792176, iteration: 48259
loss: 1.001847267150879,grad_norm: 0.805312055133113, iteration: 48260
loss: 0.9716233611106873,grad_norm: 0.9858361770761486, iteration: 48261
loss: 1.0352263450622559,grad_norm: 0.9999991466296004, iteration: 48262
loss: 0.99410480260849,grad_norm: 0.9407958208867009, iteration: 48263
loss: 1.0362141132354736,grad_norm: 0.912158384464011, iteration: 48264
loss: 0.9821538925170898,grad_norm: 0.9999988889225341, iteration: 48265
loss: 0.980532705783844,grad_norm: 0.9999990637759387, iteration: 48266
loss: 0.9980441927909851,grad_norm: 0.9094883857365339, iteration: 48267
loss: 0.9770150184631348,grad_norm: 0.8813889097383544, iteration: 48268
loss: 1.0170785188674927,grad_norm: 0.9999991866420875, iteration: 48269
loss: 1.048295259475708,grad_norm: 0.9999990270881622, iteration: 48270
loss: 0.9866487979888916,grad_norm: 0.9823873404559328, iteration: 48271
loss: 0.9699738621711731,grad_norm: 0.9999994121938629, iteration: 48272
loss: 1.0277482271194458,grad_norm: 0.9532594500756859, iteration: 48273
loss: 1.00003981590271,grad_norm: 0.9999996772830704, iteration: 48274
loss: 1.0079821348190308,grad_norm: 0.9374896663097175, iteration: 48275
loss: 0.9952948689460754,grad_norm: 0.9017630131432087, iteration: 48276
loss: 0.951842725276947,grad_norm: 0.9999989331235933, iteration: 48277
loss: 1.0198616981506348,grad_norm: 0.8851714120883788, iteration: 48278
loss: 0.9908367395401001,grad_norm: 0.8808796725931537, iteration: 48279
loss: 1.0304384231567383,grad_norm: 0.9999991475381346, iteration: 48280
loss: 0.9660171270370483,grad_norm: 0.9999991015866878, iteration: 48281
loss: 1.0008078813552856,grad_norm: 0.9029355105397148, iteration: 48282
loss: 0.9958968758583069,grad_norm: 0.925290655101593, iteration: 48283
loss: 1.005288004875183,grad_norm: 0.9999990097391614, iteration: 48284
loss: 1.0222902297973633,grad_norm: 0.9999991811014115, iteration: 48285
loss: 0.9856641888618469,grad_norm: 0.9366673330969074, iteration: 48286
loss: 1.0583653450012207,grad_norm: 0.9999994199056337, iteration: 48287
loss: 0.967440128326416,grad_norm: 0.9999991148683323, iteration: 48288
loss: 1.0164059400558472,grad_norm: 0.9758193504698339, iteration: 48289
loss: 1.0260770320892334,grad_norm: 0.8826198411368592, iteration: 48290
loss: 1.0021777153015137,grad_norm: 0.8753189397662028, iteration: 48291
loss: 1.013038158416748,grad_norm: 0.9915360332684161, iteration: 48292
loss: 0.9998288154602051,grad_norm: 0.9999990903243974, iteration: 48293
loss: 1.0028618574142456,grad_norm: 0.999999051289034, iteration: 48294
loss: 1.0100256204605103,grad_norm: 0.9999991915850458, iteration: 48295
loss: 1.021628975868225,grad_norm: 0.9999989494443743, iteration: 48296
loss: 0.9935392737388611,grad_norm: 0.9999990755912178, iteration: 48297
loss: 1.033889889717102,grad_norm: 0.9999992967166027, iteration: 48298
loss: 1.0123953819274902,grad_norm: 0.9999991806563195, iteration: 48299
loss: 0.9923328161239624,grad_norm: 0.997824952409718, iteration: 48300
loss: 1.0089412927627563,grad_norm: 0.9172966865865304, iteration: 48301
loss: 1.0272674560546875,grad_norm: 0.9323825947400448, iteration: 48302
loss: 0.9793803691864014,grad_norm: 0.9999991000662898, iteration: 48303
loss: 0.9826804995536804,grad_norm: 0.9697603986441274, iteration: 48304
loss: 1.0167262554168701,grad_norm: 0.9999991719113663, iteration: 48305
loss: 1.0236762762069702,grad_norm: 0.9999993654381166, iteration: 48306
loss: 0.9932153224945068,grad_norm: 0.9962353235981709, iteration: 48307
loss: 0.992975115776062,grad_norm: 0.9999991627297155, iteration: 48308
loss: 0.9866339564323425,grad_norm: 0.999999043482707, iteration: 48309
loss: 1.028496503829956,grad_norm: 0.9739157839263461, iteration: 48310
loss: 1.0165208578109741,grad_norm: 0.9565873272703265, iteration: 48311
loss: 1.0262458324432373,grad_norm: 0.9999990416611322, iteration: 48312
loss: 1.0119800567626953,grad_norm: 0.9645282917293574, iteration: 48313
loss: 1.0430736541748047,grad_norm: 0.9999991464592431, iteration: 48314
loss: 0.9713492393493652,grad_norm: 0.9999991386150575, iteration: 48315
loss: 1.0015833377838135,grad_norm: 0.9999992750885041, iteration: 48316
loss: 1.033172845840454,grad_norm: 0.8873933051464931, iteration: 48317
loss: 1.0186407566070557,grad_norm: 0.9102489377738794, iteration: 48318
loss: 0.9994797706604004,grad_norm: 0.8940575115242513, iteration: 48319
loss: 1.0100678205490112,grad_norm: 0.9999990220504498, iteration: 48320
loss: 1.0106093883514404,grad_norm: 0.9736826504811869, iteration: 48321
loss: 0.9999986290931702,grad_norm: 0.9372163240860338, iteration: 48322
loss: 1.029236912727356,grad_norm: 0.9999992677838762, iteration: 48323
loss: 0.9720415472984314,grad_norm: 0.9853986573624607, iteration: 48324
loss: 0.9999572038650513,grad_norm: 0.9813789915333234, iteration: 48325
loss: 1.0047661066055298,grad_norm: 0.999999059811789, iteration: 48326
loss: 1.036576271057129,grad_norm: 0.9999994789802219, iteration: 48327
loss: 0.9723291993141174,grad_norm: 0.9789858242676828, iteration: 48328
loss: 0.9612407088279724,grad_norm: 0.9287639617151671, iteration: 48329
loss: 0.9671463966369629,grad_norm: 0.9999996052227259, iteration: 48330
loss: 0.984072744846344,grad_norm: 0.9999989639505267, iteration: 48331
loss: 0.9797312021255493,grad_norm: 0.99999903740642, iteration: 48332
loss: 1.0076347589492798,grad_norm: 0.9999994660609967, iteration: 48333
loss: 1.0283844470977783,grad_norm: 0.999999071128401, iteration: 48334
loss: 1.0315912961959839,grad_norm: 0.8293388805697565, iteration: 48335
loss: 0.9719167947769165,grad_norm: 0.9048007328677272, iteration: 48336
loss: 0.9995959401130676,grad_norm: 0.8280181146185128, iteration: 48337
loss: 1.0162571668624878,grad_norm: 0.9999990610601354, iteration: 48338
loss: 0.9674720764160156,grad_norm: 0.9999991914411127, iteration: 48339
loss: 1.0097931623458862,grad_norm: 0.9999991370541428, iteration: 48340
loss: 0.9873815774917603,grad_norm: 0.9999991782561591, iteration: 48341
loss: 0.9724411368370056,grad_norm: 0.9718131755525581, iteration: 48342
loss: 1.0353271961212158,grad_norm: 0.8962438331891519, iteration: 48343
loss: 1.0362499952316284,grad_norm: 0.9999991820210211, iteration: 48344
loss: 1.017727017402649,grad_norm: 0.9999991111603604, iteration: 48345
loss: 0.953132152557373,grad_norm: 0.8841237460686052, iteration: 48346
loss: 1.043173909187317,grad_norm: 0.9999993841365995, iteration: 48347
loss: 1.0035512447357178,grad_norm: 0.9999996872322661, iteration: 48348
loss: 1.0340620279312134,grad_norm: 0.944417630478699, iteration: 48349
loss: 1.0156830549240112,grad_norm: 0.9999995175205988, iteration: 48350
loss: 1.0015625953674316,grad_norm: 0.8143649061998598, iteration: 48351
loss: 1.0205638408660889,grad_norm: 0.781339126135923, iteration: 48352
loss: 0.9523523449897766,grad_norm: 0.9999990063007719, iteration: 48353
loss: 1.0245742797851562,grad_norm: 0.9999991230835117, iteration: 48354
loss: 0.9624270796775818,grad_norm: 0.9322950468995271, iteration: 48355
loss: 1.0004216432571411,grad_norm: 0.9999989984265238, iteration: 48356
loss: 0.9808908700942993,grad_norm: 0.9999990615467755, iteration: 48357
loss: 1.033079743385315,grad_norm: 0.932649539616961, iteration: 48358
loss: 1.0308425426483154,grad_norm: 0.9968407148619733, iteration: 48359
loss: 0.9550763964653015,grad_norm: 0.9999990971246913, iteration: 48360
loss: 0.9825229644775391,grad_norm: 0.9999991209789446, iteration: 48361
loss: 0.9733480215072632,grad_norm: 0.999999099427694, iteration: 48362
loss: 1.0149717330932617,grad_norm: 0.9993628352662759, iteration: 48363
loss: 0.9805957674980164,grad_norm: 0.8952187960745376, iteration: 48364
loss: 1.003200888633728,grad_norm: 0.9999991684735714, iteration: 48365
loss: 0.9928769469261169,grad_norm: 0.928522657624782, iteration: 48366
loss: 1.0227251052856445,grad_norm: 0.9999990494224895, iteration: 48367
loss: 0.9954432845115662,grad_norm: 0.9999990569743726, iteration: 48368
loss: 1.028855562210083,grad_norm: 0.9999992569599649, iteration: 48369
loss: 1.0124446153640747,grad_norm: 0.9999990942407383, iteration: 48370
loss: 1.022504210472107,grad_norm: 0.9785863171784337, iteration: 48371
loss: 1.033630609512329,grad_norm: 0.9999989555202526, iteration: 48372
loss: 1.012434482574463,grad_norm: 0.9001731082232205, iteration: 48373
loss: 1.0168966054916382,grad_norm: 0.9999994049946187, iteration: 48374
loss: 1.0286579132080078,grad_norm: 0.9706987209048236, iteration: 48375
loss: 1.0285015106201172,grad_norm: 0.9202793326782793, iteration: 48376
loss: 1.0309796333312988,grad_norm: 0.8822199752568882, iteration: 48377
loss: 1.0210069417953491,grad_norm: 0.9999995077712849, iteration: 48378
loss: 1.0357701778411865,grad_norm: 0.9999991565308937, iteration: 48379
loss: 0.9926334023475647,grad_norm: 0.9999989615972835, iteration: 48380
loss: 1.0053986310958862,grad_norm: 0.9624417116436842, iteration: 48381
loss: 1.046521782875061,grad_norm: 0.9999998712285016, iteration: 48382
loss: 0.9956581592559814,grad_norm: 0.999999049714615, iteration: 48383
loss: 1.0634664297103882,grad_norm: 0.9999994582220423, iteration: 48384
loss: 1.038159728050232,grad_norm: 0.9999993742318993, iteration: 48385
loss: 0.9824296832084656,grad_norm: 0.9428698390017313, iteration: 48386
loss: 0.9616376757621765,grad_norm: 0.8820004999791555, iteration: 48387
loss: 0.9714391827583313,grad_norm: 0.9260998380003156, iteration: 48388
loss: 0.9995086193084717,grad_norm: 0.9999992000512219, iteration: 48389
loss: 0.9812034964561462,grad_norm: 0.9549873167863288, iteration: 48390
loss: 0.9912512302398682,grad_norm: 0.9148751950292443, iteration: 48391
loss: 1.072818636894226,grad_norm: 0.9999991621939672, iteration: 48392
loss: 1.0053904056549072,grad_norm: 0.8876175521518977, iteration: 48393
loss: 1.0061137676239014,grad_norm: 0.9999990395088499, iteration: 48394
loss: 1.0140775442123413,grad_norm: 0.9620882798676429, iteration: 48395
loss: 0.993376612663269,grad_norm: 0.8648331454186055, iteration: 48396
loss: 0.9835222959518433,grad_norm: 0.9814124507907782, iteration: 48397
loss: 1.0173434019088745,grad_norm: 0.940184799786432, iteration: 48398
loss: 0.9926900267601013,grad_norm: 0.9999990671734057, iteration: 48399
loss: 0.9978277683258057,grad_norm: 0.9999990116536933, iteration: 48400
loss: 0.9984765648841858,grad_norm: 0.9999990869499404, iteration: 48401
loss: 0.9946588277816772,grad_norm: 0.9999992572381954, iteration: 48402
loss: 1.0139909982681274,grad_norm: 0.9999990652590623, iteration: 48403
loss: 1.0052931308746338,grad_norm: 0.952383176216832, iteration: 48404
loss: 0.9818300604820251,grad_norm: 0.7827907180520437, iteration: 48405
loss: 0.9757696986198425,grad_norm: 0.9999990664843281, iteration: 48406
loss: 1.0049768686294556,grad_norm: 0.8942829405252232, iteration: 48407
loss: 1.0056508779525757,grad_norm: 0.9901080153677015, iteration: 48408
loss: 1.0084011554718018,grad_norm: 0.9999991836481895, iteration: 48409
loss: 1.0120638608932495,grad_norm: 0.9999995102639051, iteration: 48410
loss: 1.0202513933181763,grad_norm: 0.9521639456555491, iteration: 48411
loss: 1.0616426467895508,grad_norm: 0.9999996492219915, iteration: 48412
loss: 0.9880415201187134,grad_norm: 0.9999990143932522, iteration: 48413
loss: 1.0160064697265625,grad_norm: 0.9209200095113113, iteration: 48414
loss: 1.029625654220581,grad_norm: 0.9999995614218312, iteration: 48415
loss: 1.0479096174240112,grad_norm: 0.9999993553652164, iteration: 48416
loss: 0.9874939322471619,grad_norm: 0.9485895061345242, iteration: 48417
loss: 0.9933955669403076,grad_norm: 0.9999990271973344, iteration: 48418
loss: 0.9902693629264832,grad_norm: 0.9999991273458533, iteration: 48419
loss: 1.0376161336898804,grad_norm: 0.9999993983050647, iteration: 48420
loss: 0.9870243668556213,grad_norm: 0.999999187993398, iteration: 48421
loss: 1.0228883028030396,grad_norm: 0.999999106455218, iteration: 48422
loss: 0.9962795376777649,grad_norm: 0.9999990961167257, iteration: 48423
loss: 1.0133732557296753,grad_norm: 0.9999997477705546, iteration: 48424
loss: 0.9954164624214172,grad_norm: 0.9443442994042571, iteration: 48425
loss: 0.9785165190696716,grad_norm: 0.9999994831177171, iteration: 48426
loss: 0.9794350862503052,grad_norm: 0.9999992273799881, iteration: 48427
loss: 1.0164226293563843,grad_norm: 0.9999996059631938, iteration: 48428
loss: 1.0133637189865112,grad_norm: 0.9999991667278016, iteration: 48429
loss: 1.0816864967346191,grad_norm: 0.9999996037027775, iteration: 48430
loss: 0.967706024646759,grad_norm: 0.999999624081868, iteration: 48431
loss: 1.0517836809158325,grad_norm: 0.9999991338705112, iteration: 48432
loss: 0.9498050808906555,grad_norm: 0.999999182983342, iteration: 48433
loss: 0.9913319945335388,grad_norm: 0.9999991229310816, iteration: 48434
loss: 1.0411489009857178,grad_norm: 0.9999996274058691, iteration: 48435
loss: 1.0217808485031128,grad_norm: 0.8556211819475287, iteration: 48436
loss: 1.054977536201477,grad_norm: 0.9999999085460167, iteration: 48437
loss: 1.0190473794937134,grad_norm: 0.9999993638268254, iteration: 48438
loss: 1.094098687171936,grad_norm: 0.9999999326013432, iteration: 48439
loss: 1.0179064273834229,grad_norm: 0.9790056529369654, iteration: 48440
loss: 0.9886508584022522,grad_norm: 0.8917883889498699, iteration: 48441
loss: 1.0081530809402466,grad_norm: 0.9035234348471779, iteration: 48442
loss: 1.003084421157837,grad_norm: 0.9999990133488268, iteration: 48443
loss: 0.9816016554832458,grad_norm: 0.9999991260320598, iteration: 48444
loss: 1.050136685371399,grad_norm: 0.999999034730139, iteration: 48445
loss: 0.9895623922348022,grad_norm: 0.9999991233267408, iteration: 48446
loss: 0.993540346622467,grad_norm: 0.9999991924423446, iteration: 48447
loss: 1.005774736404419,grad_norm: 0.999999097191111, iteration: 48448
loss: 1.040655255317688,grad_norm: 0.9999995102363161, iteration: 48449
loss: 0.9908732175827026,grad_norm: 0.9999990981979737, iteration: 48450
loss: 1.0312155485153198,grad_norm: 0.9999991849515308, iteration: 48451
loss: 1.012691855430603,grad_norm: 0.9999991955900708, iteration: 48452
loss: 1.0258400440216064,grad_norm: 0.9999992160756647, iteration: 48453
loss: 1.0046314001083374,grad_norm: 0.9771798461630307, iteration: 48454
loss: 1.0105386972427368,grad_norm: 0.9999991326536886, iteration: 48455
loss: 1.0102174282073975,grad_norm: 0.9999990179359486, iteration: 48456
loss: 0.9647514224052429,grad_norm: 0.9875852724921739, iteration: 48457
loss: 1.0340137481689453,grad_norm: 0.9999994817650367, iteration: 48458
loss: 1.0006543397903442,grad_norm: 0.9999998716807313, iteration: 48459
loss: 0.9947450757026672,grad_norm: 0.9999992441312073, iteration: 48460
loss: 1.027744174003601,grad_norm: 0.9764446234417465, iteration: 48461
loss: 1.0263952016830444,grad_norm: 0.9999992095073864, iteration: 48462
loss: 0.9966002702713013,grad_norm: 0.9999992012214188, iteration: 48463
loss: 1.0143777132034302,grad_norm: 0.9251230287149445, iteration: 48464
loss: 0.9900621771812439,grad_norm: 0.9999991624803426, iteration: 48465
loss: 1.0193346738815308,grad_norm: 0.9999994039021902, iteration: 48466
loss: 1.0076225996017456,grad_norm: 0.9999991903358639, iteration: 48467
loss: 1.0272103548049927,grad_norm: 0.9526778272075496, iteration: 48468
loss: 0.9808238744735718,grad_norm: 0.999999177206064, iteration: 48469
loss: 1.0046206712722778,grad_norm: 0.9999990667934407, iteration: 48470
loss: 1.0188783407211304,grad_norm: 0.890519024147277, iteration: 48471
loss: 1.035681962966919,grad_norm: 0.9715617285664194, iteration: 48472
loss: 0.958062469959259,grad_norm: 0.9999991311247347, iteration: 48473
loss: 1.0536013841629028,grad_norm: 0.9999997502430519, iteration: 48474
loss: 0.9747637510299683,grad_norm: 0.9872506019877162, iteration: 48475
loss: 1.004313349723816,grad_norm: 0.9999990081132201, iteration: 48476
loss: 1.0071187019348145,grad_norm: 0.983986109682412, iteration: 48477
loss: 1.0236103534698486,grad_norm: 0.9960972864851445, iteration: 48478
loss: 1.0188552141189575,grad_norm: 0.9410953592492108, iteration: 48479
loss: 0.9752190113067627,grad_norm: 0.9852578919740355, iteration: 48480
loss: 1.0324275493621826,grad_norm: 0.8498519222818846, iteration: 48481
loss: 1.018168568611145,grad_norm: 0.9468040860201539, iteration: 48482
loss: 0.9871617555618286,grad_norm: 0.9999989524871024, iteration: 48483
loss: 0.9938858151435852,grad_norm: 0.8845740831660757, iteration: 48484
loss: 1.014296293258667,grad_norm: 0.9579993058527414, iteration: 48485
loss: 1.007556438446045,grad_norm: 0.9999990606184405, iteration: 48486
loss: 1.0121843814849854,grad_norm: 0.9999993131667899, iteration: 48487
loss: 1.0034592151641846,grad_norm: 0.9912810208831179, iteration: 48488
loss: 1.0366877317428589,grad_norm: 0.9999996424540558, iteration: 48489
loss: 0.991439163684845,grad_norm: 0.9999990972846425, iteration: 48490
loss: 1.0417581796646118,grad_norm: 0.9999990526329131, iteration: 48491
loss: 0.9716416001319885,grad_norm: 0.9607672415260187, iteration: 48492
loss: 0.9921419024467468,grad_norm: 0.999999142141563, iteration: 48493
loss: 1.034264326095581,grad_norm: 0.9999991692090406, iteration: 48494
loss: 0.9772449731826782,grad_norm: 0.9603239329638141, iteration: 48495
loss: 0.9676157832145691,grad_norm: 0.9184666401619486, iteration: 48496
loss: 1.0563198328018188,grad_norm: 0.9999996748584844, iteration: 48497
loss: 1.0163131952285767,grad_norm: 0.999999319506251, iteration: 48498
loss: 0.9918278455734253,grad_norm: 0.9999989933091132, iteration: 48499
loss: 1.0129311084747314,grad_norm: 0.999999233425178, iteration: 48500
loss: 0.9801069498062134,grad_norm: 0.9999990855120034, iteration: 48501
loss: 1.0167678594589233,grad_norm: 0.999999334737023, iteration: 48502
loss: 0.9984239935874939,grad_norm: 0.9999991446108949, iteration: 48503
loss: 0.966044008731842,grad_norm: 0.9999991323508663, iteration: 48504
loss: 1.0186543464660645,grad_norm: 0.9999998015853925, iteration: 48505
loss: 0.9953444004058838,grad_norm: 0.9999990720225215, iteration: 48506
loss: 0.996792733669281,grad_norm: 0.9999994254425307, iteration: 48507
loss: 1.06722891330719,grad_norm: 0.9999994376535088, iteration: 48508
loss: 1.0282105207443237,grad_norm: 0.9999990632736955, iteration: 48509
loss: 1.0068678855895996,grad_norm: 0.9974358481491596, iteration: 48510
loss: 1.00115168094635,grad_norm: 0.9999992871845931, iteration: 48511
loss: 1.030198335647583,grad_norm: 0.9999990765587288, iteration: 48512
loss: 0.9949028491973877,grad_norm: 0.9999992030242524, iteration: 48513
loss: 0.9892163276672363,grad_norm: 0.886400022492637, iteration: 48514
loss: 1.0484154224395752,grad_norm: 0.9999990383563634, iteration: 48515
loss: 1.0219871997833252,grad_norm: 0.9352695108767354, iteration: 48516
loss: 1.0527552366256714,grad_norm: 0.9999991121929971, iteration: 48517
loss: 1.0265400409698486,grad_norm: 0.9239347185968485, iteration: 48518
loss: 1.0115070343017578,grad_norm: 0.9999994249070052, iteration: 48519
loss: 0.9567796587944031,grad_norm: 0.9755856867339636, iteration: 48520
loss: 1.0490418672561646,grad_norm: 0.9999990984331552, iteration: 48521
loss: 1.011749029159546,grad_norm: 0.8296583121649947, iteration: 48522
loss: 1.0013076066970825,grad_norm: 0.9669068297757207, iteration: 48523
loss: 0.9807891845703125,grad_norm: 0.9238345236264122, iteration: 48524
loss: 1.0112111568450928,grad_norm: 0.9999992873377898, iteration: 48525
loss: 1.0172113180160522,grad_norm: 0.9999993015634597, iteration: 48526
loss: 0.9890903234481812,grad_norm: 0.999999138340441, iteration: 48527
loss: 0.9645584225654602,grad_norm: 0.9999991280359123, iteration: 48528
loss: 0.9813252091407776,grad_norm: 0.9884151221405667, iteration: 48529
loss: 0.9809219241142273,grad_norm: 0.8953770256181863, iteration: 48530
loss: 0.9929011464118958,grad_norm: 0.9999991928295155, iteration: 48531
loss: 0.9948699474334717,grad_norm: 0.9999993198976069, iteration: 48532
loss: 1.003125786781311,grad_norm: 0.9999990832165643, iteration: 48533
loss: 1.0784764289855957,grad_norm: 0.9999991985069026, iteration: 48534
loss: 1.0159894227981567,grad_norm: 0.9999991069159893, iteration: 48535
loss: 1.0041522979736328,grad_norm: 0.8883400971050747, iteration: 48536
loss: 1.0257350206375122,grad_norm: 0.999999162927603, iteration: 48537
loss: 1.042800784111023,grad_norm: 0.9999990919533737, iteration: 48538
loss: 1.0064475536346436,grad_norm: 0.9999992784910118, iteration: 48539
loss: 0.996046245098114,grad_norm: 0.9999990416957859, iteration: 48540
loss: 0.9907615780830383,grad_norm: 0.9196284282283627, iteration: 48541
loss: 0.9617306590080261,grad_norm: 0.9999991102016255, iteration: 48542
loss: 0.9836676120758057,grad_norm: 0.99999926287949, iteration: 48543
loss: 1.004296898841858,grad_norm: 0.9999991292666283, iteration: 48544
loss: 1.016219973564148,grad_norm: 0.9999992332649503, iteration: 48545
loss: 1.002886414527893,grad_norm: 0.9806477152681827, iteration: 48546
loss: 1.009634256362915,grad_norm: 0.7571071155324001, iteration: 48547
loss: 0.9979612827301025,grad_norm: 0.9999992503888157, iteration: 48548
loss: 1.0154296159744263,grad_norm: 0.9999993846005775, iteration: 48549
loss: 0.9758815169334412,grad_norm: 0.9929498908076714, iteration: 48550
loss: 0.9581539630889893,grad_norm: 0.9999990528451191, iteration: 48551
loss: 1.0325511693954468,grad_norm: 0.8034930911476051, iteration: 48552
loss: 1.015549898147583,grad_norm: 0.9295493856711036, iteration: 48553
loss: 1.0105422735214233,grad_norm: 0.9999991323903337, iteration: 48554
loss: 0.9979956746101379,grad_norm: 0.9999993049838426, iteration: 48555
loss: 0.9981684684753418,grad_norm: 0.9087761741099457, iteration: 48556
loss: 0.9631158709526062,grad_norm: 0.9999991057984894, iteration: 48557
loss: 1.0056747198104858,grad_norm: 0.9999992530868416, iteration: 48558
loss: 1.0357240438461304,grad_norm: 0.999999107952102, iteration: 48559
loss: 1.0767676830291748,grad_norm: 1.0000000064017338, iteration: 48560
loss: 0.9685943722724915,grad_norm: 0.9177206982897338, iteration: 48561
loss: 0.955856442451477,grad_norm: 0.9447358121915364, iteration: 48562
loss: 0.9678252339363098,grad_norm: 0.9266208662105422, iteration: 48563
loss: 0.9997724890708923,grad_norm: 0.9549697629413769, iteration: 48564
loss: 1.0003942251205444,grad_norm: 0.9999991629538671, iteration: 48565
loss: 1.0312731266021729,grad_norm: 0.9999992661311703, iteration: 48566
loss: 0.9879132509231567,grad_norm: 0.8901774503766596, iteration: 48567
loss: 1.000830054283142,grad_norm: 0.7943605334176261, iteration: 48568
loss: 1.03908371925354,grad_norm: 0.8901644094712301, iteration: 48569
loss: 0.9696041345596313,grad_norm: 0.926821045493599, iteration: 48570
loss: 0.9579887986183167,grad_norm: 0.8655395413660011, iteration: 48571
loss: 0.9804959297180176,grad_norm: 0.9250355644609518, iteration: 48572
loss: 1.0248825550079346,grad_norm: 0.961368278533172, iteration: 48573
loss: 1.0504717826843262,grad_norm: 0.9999995666464998, iteration: 48574
loss: 0.9457792639732361,grad_norm: 0.9008777059749432, iteration: 48575
loss: 0.9636315107345581,grad_norm: 0.9999991352611014, iteration: 48576
loss: 0.9974181056022644,grad_norm: 0.8650682439781952, iteration: 48577
loss: 1.0132153034210205,grad_norm: 0.9136457387848481, iteration: 48578
loss: 1.015397548675537,grad_norm: 0.9999990709480392, iteration: 48579
loss: 1.019350528717041,grad_norm: 0.9999990648058813, iteration: 48580
loss: 0.9829058647155762,grad_norm: 0.9999995402766628, iteration: 48581
loss: 1.0333720445632935,grad_norm: 0.8652354481579638, iteration: 48582
loss: 0.9817629456520081,grad_norm: 0.879654034758165, iteration: 48583
loss: 1.0104960203170776,grad_norm: 0.9999992092245219, iteration: 48584
loss: 1.0096875429153442,grad_norm: 0.9995527882098969, iteration: 48585
loss: 0.9770363569259644,grad_norm: 0.9641004044666338, iteration: 48586
loss: 1.0029091835021973,grad_norm: 0.9999991459853145, iteration: 48587
loss: 0.9911667108535767,grad_norm: 0.9999991456674946, iteration: 48588
loss: 0.9994661808013916,grad_norm: 0.9999996211512179, iteration: 48589
loss: 0.9464945793151855,grad_norm: 0.8753570148485151, iteration: 48590
loss: 1.0199329853057861,grad_norm: 0.89158572568116, iteration: 48591
loss: 0.9863051772117615,grad_norm: 0.9280401404367171, iteration: 48592
loss: 0.9758992195129395,grad_norm: 0.9990511422840443, iteration: 48593
loss: 0.9933473467826843,grad_norm: 0.9999998253592979, iteration: 48594
loss: 0.9819338321685791,grad_norm: 0.9999991441801663, iteration: 48595
loss: 1.0213052034378052,grad_norm: 0.9999992036237106, iteration: 48596
loss: 0.9702939391136169,grad_norm: 0.999999342642517, iteration: 48597
loss: 1.023165225982666,grad_norm: 0.9942157123627362, iteration: 48598
loss: 0.986518383026123,grad_norm: 0.8237175757874996, iteration: 48599
loss: 0.9880610704421997,grad_norm: 0.9661947662324617, iteration: 48600
loss: 1.0170429944992065,grad_norm: 0.9689821926746384, iteration: 48601
loss: 1.0454230308532715,grad_norm: 0.9999995535363909, iteration: 48602
loss: 1.0192043781280518,grad_norm: 0.9214225411008456, iteration: 48603
loss: 0.9955182075500488,grad_norm: 0.999999004960177, iteration: 48604
loss: 1.0103883743286133,grad_norm: 0.9999991619814216, iteration: 48605
loss: 1.0517069101333618,grad_norm: 0.9999995984599854, iteration: 48606
loss: 1.0459189414978027,grad_norm: 0.9096264841343703, iteration: 48607
loss: 1.0066523551940918,grad_norm: 0.999999184535273, iteration: 48608
loss: 0.9953285455703735,grad_norm: 0.9180424602259226, iteration: 48609
loss: 0.9941796064376831,grad_norm: 0.9999996128333968, iteration: 48610
loss: 1.0293046236038208,grad_norm: 0.9999990765247136, iteration: 48611
loss: 0.9911184310913086,grad_norm: 0.8332335109154513, iteration: 48612
loss: 0.9788197875022888,grad_norm: 0.9978435325470257, iteration: 48613
loss: 1.01705801486969,grad_norm: 0.9999990119358285, iteration: 48614
loss: 0.9786787033081055,grad_norm: 0.9999989743630946, iteration: 48615
loss: 1.0343122482299805,grad_norm: 0.9962184593486544, iteration: 48616
loss: 0.9850670695304871,grad_norm: 0.9999992733278713, iteration: 48617
loss: 0.9848886132240295,grad_norm: 0.9687875012015302, iteration: 48618
loss: 0.9825739860534668,grad_norm: 0.9549464635520735, iteration: 48619
loss: 0.9860910177230835,grad_norm: 0.9999991591665037, iteration: 48620
loss: 1.0217647552490234,grad_norm: 0.9825839278400299, iteration: 48621
loss: 1.0179401636123657,grad_norm: 0.8228068221519269, iteration: 48622
loss: 1.0085006952285767,grad_norm: 0.999999296282567, iteration: 48623
loss: 1.0052155256271362,grad_norm: 0.9999996469821929, iteration: 48624
loss: 0.9790300726890564,grad_norm: 0.9999994643847842, iteration: 48625
loss: 1.007949948310852,grad_norm: 0.9490047532227363, iteration: 48626
loss: 0.971622884273529,grad_norm: 0.9999996791819045, iteration: 48627
loss: 0.988072395324707,grad_norm: 0.8500757359058945, iteration: 48628
loss: 0.9905219674110413,grad_norm: 0.999999070088644, iteration: 48629
loss: 0.9747149348258972,grad_norm: 0.8872869282462713, iteration: 48630
loss: 1.0293190479278564,grad_norm: 0.9528201534019417, iteration: 48631
loss: 1.0058789253234863,grad_norm: 0.8026185691594218, iteration: 48632
loss: 0.9987464547157288,grad_norm: 0.9999991324987108, iteration: 48633
loss: 1.0460633039474487,grad_norm: 0.824783386670106, iteration: 48634
loss: 1.0185019969940186,grad_norm: 0.9999992431749077, iteration: 48635
loss: 1.0148824453353882,grad_norm: 0.999999205727818, iteration: 48636
loss: 0.9862847924232483,grad_norm: 0.9999991629735084, iteration: 48637
loss: 0.9740923047065735,grad_norm: 0.9999990925387771, iteration: 48638
loss: 1.0075643062591553,grad_norm: 0.9999996195676321, iteration: 48639
loss: 0.9517471194267273,grad_norm: 0.9259742657813694, iteration: 48640
loss: 0.9823966026306152,grad_norm: 0.9999991032600022, iteration: 48641
loss: 0.9933768510818481,grad_norm: 0.9999990379168158, iteration: 48642
loss: 1.0331648588180542,grad_norm: 0.9999989862812664, iteration: 48643
loss: 0.9988383650779724,grad_norm: 0.9541021905702225, iteration: 48644
loss: 0.9483962059020996,grad_norm: 0.999999070089207, iteration: 48645
loss: 1.0190587043762207,grad_norm: 0.9999992005737742, iteration: 48646
loss: 0.9940107464790344,grad_norm: 0.8365218339398391, iteration: 48647
loss: 1.0140999555587769,grad_norm: 0.9789885407220116, iteration: 48648
loss: 0.979928731918335,grad_norm: 0.9457711500487392, iteration: 48649
loss: 1.074558973312378,grad_norm: 0.9999992573127987, iteration: 48650
loss: 0.9901212453842163,grad_norm: 0.9999993029230941, iteration: 48651
loss: 0.9686728119850159,grad_norm: 0.9999989946286334, iteration: 48652
loss: 0.9828352332115173,grad_norm: 0.9193875435348144, iteration: 48653
loss: 0.9846552014350891,grad_norm: 0.9776631926383603, iteration: 48654
loss: 1.0245375633239746,grad_norm: 0.9999991502459832, iteration: 48655
loss: 1.0162376165390015,grad_norm: 0.9999991094224688, iteration: 48656
loss: 1.0273867845535278,grad_norm: 0.8849364022747123, iteration: 48657
loss: 1.0383731126785278,grad_norm: 0.9858579719815188, iteration: 48658
loss: 1.0132243633270264,grad_norm: 0.8897021044235136, iteration: 48659
loss: 1.0112379789352417,grad_norm: 0.9999992314762237, iteration: 48660
loss: 1.0038926601409912,grad_norm: 0.8066230120675795, iteration: 48661
loss: 0.986560046672821,grad_norm: 0.8988592883830974, iteration: 48662
loss: 1.0114350318908691,grad_norm: 0.9999992510139111, iteration: 48663
loss: 0.9902625679969788,grad_norm: 0.9999989437558233, iteration: 48664
loss: 0.9606403708457947,grad_norm: 0.9999990243409256, iteration: 48665
loss: 1.0048303604125977,grad_norm: 0.9130160128444571, iteration: 48666
loss: 1.0106816291809082,grad_norm: 0.7775402203898041, iteration: 48667
loss: 0.9517149925231934,grad_norm: 0.9999991771628659, iteration: 48668
loss: 0.9806190133094788,grad_norm: 0.869926107549793, iteration: 48669
loss: 1.051231861114502,grad_norm: 0.999999661028941, iteration: 48670
loss: 1.0173792839050293,grad_norm: 0.9458838628655355, iteration: 48671
loss: 1.0298571586608887,grad_norm: 0.8444386822711011, iteration: 48672
loss: 0.952426016330719,grad_norm: 0.9423120997309732, iteration: 48673
loss: 1.0059421062469482,grad_norm: 0.8654266678936058, iteration: 48674
loss: 1.0086396932601929,grad_norm: 0.999999174960227, iteration: 48675
loss: 0.985493004322052,grad_norm: 0.9999991712838631, iteration: 48676
loss: 1.008981704711914,grad_norm: 0.9086995022303025, iteration: 48677
loss: 1.008406400680542,grad_norm: 0.9999990753046251, iteration: 48678
loss: 1.0094223022460938,grad_norm: 0.85946956704387, iteration: 48679
loss: 1.0236049890518188,grad_norm: 0.9999990465319476, iteration: 48680
loss: 0.9985256195068359,grad_norm: 0.9729521415136148, iteration: 48681
loss: 0.9864383339881897,grad_norm: 0.9907735176791095, iteration: 48682
loss: 0.9905794262886047,grad_norm: 0.9892782690361104, iteration: 48683
loss: 0.9697713255882263,grad_norm: 0.9743473852822843, iteration: 48684
loss: 1.0363436937332153,grad_norm: 0.9999991719743663, iteration: 48685
loss: 1.00351083278656,grad_norm: 0.9999990861289834, iteration: 48686
loss: 0.9933254718780518,grad_norm: 0.9999990982526945, iteration: 48687
loss: 1.0332905054092407,grad_norm: 0.9999998955770466, iteration: 48688
loss: 0.9907659888267517,grad_norm: 0.9999990845554129, iteration: 48689
loss: 1.0340896844863892,grad_norm: 0.9369222066115537, iteration: 48690
loss: 1.0117968320846558,grad_norm: 0.9999991251992975, iteration: 48691
loss: 1.004172444343567,grad_norm: 0.9999991987932813, iteration: 48692
loss: 1.0020809173583984,grad_norm: 0.9999989545446334, iteration: 48693
loss: 0.9999200105667114,grad_norm: 0.897680056925917, iteration: 48694
loss: 1.0204087495803833,grad_norm: 0.9492555145929908, iteration: 48695
loss: 1.0324971675872803,grad_norm: 0.956578846660871, iteration: 48696
loss: 1.003153920173645,grad_norm: 0.9999989822805143, iteration: 48697
loss: 1.0123573541641235,grad_norm: 0.999999202919001, iteration: 48698
loss: 0.989136278629303,grad_norm: 0.991992638443508, iteration: 48699
loss: 0.9914544224739075,grad_norm: 0.9921265476666943, iteration: 48700
loss: 0.9676904082298279,grad_norm: 0.8979752902935124, iteration: 48701
loss: 0.9925574064254761,grad_norm: 0.7972481826623214, iteration: 48702
loss: 1.0269378423690796,grad_norm: 0.9999991307716102, iteration: 48703
loss: 1.1148667335510254,grad_norm: 0.9999996611145392, iteration: 48704
loss: 0.9962552785873413,grad_norm: 0.999999599643537, iteration: 48705
loss: 0.9942831993103027,grad_norm: 0.8425335732906126, iteration: 48706
loss: 0.9759261608123779,grad_norm: 0.9542659434855314, iteration: 48707
loss: 1.0097637176513672,grad_norm: 0.9999991930626921, iteration: 48708
loss: 1.0242722034454346,grad_norm: 0.9999990198277087, iteration: 48709
loss: 1.028603196144104,grad_norm: 0.9999993606084557, iteration: 48710
loss: 1.0141466856002808,grad_norm: 0.8480572047539013, iteration: 48711
loss: 1.0189473628997803,grad_norm: 0.9999990687492382, iteration: 48712
loss: 1.0844999551773071,grad_norm: 0.9999996404366656, iteration: 48713
loss: 0.9904054403305054,grad_norm: 0.9938998158480723, iteration: 48714
loss: 1.0213783979415894,grad_norm: 0.9854328783022945, iteration: 48715
loss: 1.0212743282318115,grad_norm: 0.9999991747079061, iteration: 48716
loss: 0.9987398982048035,grad_norm: 0.9999990362224374, iteration: 48717
loss: 1.0251996517181396,grad_norm: 0.9999991205960951, iteration: 48718
loss: 1.0172466039657593,grad_norm: 0.9999990490683227, iteration: 48719
loss: 0.9637805223464966,grad_norm: 0.9370015076013926, iteration: 48720
loss: 0.9800856709480286,grad_norm: 0.9999992669506989, iteration: 48721
loss: 1.0249056816101074,grad_norm: 0.9090335415146621, iteration: 48722
loss: 1.0103285312652588,grad_norm: 0.9999994002855046, iteration: 48723
loss: 0.9865941405296326,grad_norm: 0.8546375226979509, iteration: 48724
loss: 1.0722270011901855,grad_norm: 0.9999992061992913, iteration: 48725
loss: 1.066806435585022,grad_norm: 0.9853253871044115, iteration: 48726
loss: 1.0325130224227905,grad_norm: 0.9999990214165263, iteration: 48727
loss: 1.0525106191635132,grad_norm: 0.9999992064785367, iteration: 48728
loss: 1.0097291469573975,grad_norm: 0.9920978048318414, iteration: 48729
loss: 1.0416640043258667,grad_norm: 0.857896459350434, iteration: 48730
loss: 1.0420366525650024,grad_norm: 0.9999991751897738, iteration: 48731
loss: 1.015687108039856,grad_norm: 0.9999994751261118, iteration: 48732
loss: 1.0337251424789429,grad_norm: 0.9999993987743244, iteration: 48733
loss: 1.0149896144866943,grad_norm: 0.9999990599125432, iteration: 48734
loss: 0.9652543067932129,grad_norm: 0.999999064335451, iteration: 48735
loss: 0.9827548265457153,grad_norm: 0.950845269329846, iteration: 48736
loss: 1.0189390182495117,grad_norm: 0.9999990659325385, iteration: 48737
loss: 0.983941912651062,grad_norm: 0.9999991468433896, iteration: 48738
loss: 1.0234565734863281,grad_norm: 0.9999990895840741, iteration: 48739
loss: 1.0100926160812378,grad_norm: 0.9865573728081874, iteration: 48740
loss: 1.0329663753509521,grad_norm: 0.9653539846864635, iteration: 48741
loss: 0.9920430779457092,grad_norm: 0.999999255277302, iteration: 48742
loss: 1.010694980621338,grad_norm: 0.999999196556699, iteration: 48743
loss: 1.0248051881790161,grad_norm: 0.9999991385033026, iteration: 48744
loss: 1.0115864276885986,grad_norm: 0.9999989458040401, iteration: 48745
loss: 1.0127291679382324,grad_norm: 0.9999991145783577, iteration: 48746
loss: 0.9851159453392029,grad_norm: 0.9999992269672398, iteration: 48747
loss: 0.9842186570167542,grad_norm: 0.9999996713141726, iteration: 48748
loss: 1.0188404321670532,grad_norm: 0.9999999140294199, iteration: 48749
loss: 1.0217875242233276,grad_norm: 0.9647090776673115, iteration: 48750
loss: 1.0158401727676392,grad_norm: 0.9999990719160529, iteration: 48751
loss: 0.9921249747276306,grad_norm: 0.9999990978236523, iteration: 48752
loss: 1.0003421306610107,grad_norm: 0.9300876774364025, iteration: 48753
loss: 0.9684504866600037,grad_norm: 0.999999216923009, iteration: 48754
loss: 0.9896897077560425,grad_norm: 0.999999148836482, iteration: 48755
loss: 1.0309128761291504,grad_norm: 0.9280513768217675, iteration: 48756
loss: 1.113624095916748,grad_norm: 0.9999994355850197, iteration: 48757
loss: 1.032486915588379,grad_norm: 0.9999991225306906, iteration: 48758
loss: 1.0092551708221436,grad_norm: 0.9999991021069008, iteration: 48759
loss: 1.0168483257293701,grad_norm: 0.8786906306971697, iteration: 48760
loss: 1.0371201038360596,grad_norm: 0.9999994194258437, iteration: 48761
loss: 0.9834442734718323,grad_norm: 0.9999990449970338, iteration: 48762
loss: 1.0006091594696045,grad_norm: 0.9375004636703997, iteration: 48763
loss: 0.9783172607421875,grad_norm: 0.9988774785940518, iteration: 48764
loss: 1.0015864372253418,grad_norm: 0.999999193208345, iteration: 48765
loss: 1.0242215394973755,grad_norm: 0.9999995328428791, iteration: 48766
loss: 1.026034951210022,grad_norm: 0.9999992162700879, iteration: 48767
loss: 1.0295087099075317,grad_norm: 0.9559167992365346, iteration: 48768
loss: 1.0079978704452515,grad_norm: 0.8805095559343958, iteration: 48769
loss: 1.0229253768920898,grad_norm: 0.9999990413421636, iteration: 48770
loss: 1.0476001501083374,grad_norm: 0.9999992467290411, iteration: 48771
loss: 1.0304503440856934,grad_norm: 0.8336788958483645, iteration: 48772
loss: 1.012649416923523,grad_norm: 0.9461237720424585, iteration: 48773
loss: 0.9998310208320618,grad_norm: 0.9999992306584075, iteration: 48774
loss: 0.9613022804260254,grad_norm: 0.9152916015136258, iteration: 48775
loss: 1.0400303602218628,grad_norm: 0.9999993621756297, iteration: 48776
loss: 0.983296275138855,grad_norm: 0.9081812085107884, iteration: 48777
loss: 1.0333635807037354,grad_norm: 0.9999993948523741, iteration: 48778
loss: 0.9812232255935669,grad_norm: 0.9999991689943853, iteration: 48779
loss: 1.0070565938949585,grad_norm: 0.8731523038770036, iteration: 48780
loss: 1.00185227394104,grad_norm: 0.9999990946464548, iteration: 48781
loss: 1.0009914636611938,grad_norm: 0.9014330076188544, iteration: 48782
loss: 0.997062623500824,grad_norm: 0.9999992232515544, iteration: 48783
loss: 1.0095824003219604,grad_norm: 0.9145151455449656, iteration: 48784
loss: 1.0043715238571167,grad_norm: 0.9999991971440987, iteration: 48785
loss: 0.9913368225097656,grad_norm: 0.8154238971496113, iteration: 48786
loss: 0.9916135668754578,grad_norm: 0.9999990986344727, iteration: 48787
loss: 0.9989677667617798,grad_norm: 0.9494850956259474, iteration: 48788
loss: 1.0081212520599365,grad_norm: 0.9813165166084343, iteration: 48789
loss: 1.0518897771835327,grad_norm: 0.9999994113711363, iteration: 48790
loss: 1.0834258794784546,grad_norm: 0.9999996968634349, iteration: 48791
loss: 0.9746087789535522,grad_norm: 0.999999050109105, iteration: 48792
loss: 1.0012491941452026,grad_norm: 0.9813954598102992, iteration: 48793
loss: 0.9638962149620056,grad_norm: 0.9416181117496931, iteration: 48794
loss: 1.0087093114852905,grad_norm: 0.8520917509427706, iteration: 48795
loss: 1.0038691759109497,grad_norm: 0.9999990151438048, iteration: 48796
loss: 1.0305651426315308,grad_norm: 0.9389285660606683, iteration: 48797
loss: 0.9982649683952332,grad_norm: 0.8751774950683199, iteration: 48798
loss: 1.0124045610427856,grad_norm: 0.9999990094166049, iteration: 48799
loss: 1.027135968208313,grad_norm: 0.9999993172428633, iteration: 48800
loss: 1.0369610786437988,grad_norm: 0.9999994615426995, iteration: 48801
loss: 0.9894552826881409,grad_norm: 0.9999990696807273, iteration: 48802
loss: 1.0756540298461914,grad_norm: 0.9999990741408277, iteration: 48803
loss: 1.0254671573638916,grad_norm: 0.9179103948194629, iteration: 48804
loss: 0.9952525496482849,grad_norm: 0.9152215640688584, iteration: 48805
loss: 0.9954976439476013,grad_norm: 0.981426133688274, iteration: 48806
loss: 1.0166728496551514,grad_norm: 0.9999995524131496, iteration: 48807
loss: 1.0012918710708618,grad_norm: 0.9698810121664299, iteration: 48808
loss: 1.0468947887420654,grad_norm: 0.9999997774647256, iteration: 48809
loss: 0.972072422504425,grad_norm: 0.9999991268491029, iteration: 48810
loss: 0.99934983253479,grad_norm: 0.9999992708435638, iteration: 48811
loss: 1.0079240798950195,grad_norm: 0.9999991451112631, iteration: 48812
loss: 1.0243357419967651,grad_norm: 0.9999992431865335, iteration: 48813
loss: 1.027733325958252,grad_norm: 0.9999991630531803, iteration: 48814
loss: 0.9741459488868713,grad_norm: 0.9288863839738758, iteration: 48815
loss: 0.9701182842254639,grad_norm: 0.9051176743608274, iteration: 48816
loss: 0.9722183346748352,grad_norm: 0.9999991367701387, iteration: 48817
loss: 0.9983431696891785,grad_norm: 0.9999990334925647, iteration: 48818
loss: 1.0220534801483154,grad_norm: 0.9999994051587545, iteration: 48819
loss: 0.930205762386322,grad_norm: 0.9308556583745962, iteration: 48820
loss: 1.0058480501174927,grad_norm: 0.9999994606081348, iteration: 48821
loss: 0.9943960905075073,grad_norm: 0.9999991706498356, iteration: 48822
loss: 1.0513800382614136,grad_norm: 0.9999992410952027, iteration: 48823
loss: 1.0883029699325562,grad_norm: 0.9999995351723711, iteration: 48824
loss: 1.0119588375091553,grad_norm: 0.8862428231206115, iteration: 48825
loss: 1.024776816368103,grad_norm: 0.9999997389167257, iteration: 48826
loss: 1.0193041563034058,grad_norm: 0.9999996820356097, iteration: 48827
loss: 1.0358725786209106,grad_norm: 0.9893610644039742, iteration: 48828
loss: 0.9898273348808289,grad_norm: 0.9014559331363199, iteration: 48829
loss: 0.9811627268791199,grad_norm: 0.9752606152622587, iteration: 48830
loss: 1.0402103662490845,grad_norm: 0.999999528062178, iteration: 48831
loss: 0.9529507160186768,grad_norm: 0.9999989626837666, iteration: 48832
loss: 1.0100011825561523,grad_norm: 0.9999992138815836, iteration: 48833
loss: 1.0210144519805908,grad_norm: 0.9354540326906674, iteration: 48834
loss: 1.002829670906067,grad_norm: 0.9007288246669749, iteration: 48835
loss: 1.04425048828125,grad_norm: 0.9999989538097219, iteration: 48836
loss: 0.9700648784637451,grad_norm: 0.9999992060371494, iteration: 48837
loss: 1.0339754819869995,grad_norm: 0.9999993194919246, iteration: 48838
loss: 0.9981783032417297,grad_norm: 0.9999991436878874, iteration: 48839
loss: 0.991401731967926,grad_norm: 0.9999993113833047, iteration: 48840
loss: 1.0053213834762573,grad_norm: 0.999999171341745, iteration: 48841
loss: 1.024309515953064,grad_norm: 0.9999991269121169, iteration: 48842
loss: 1.0236496925354004,grad_norm: 0.9999995635591314, iteration: 48843
loss: 0.9915159940719604,grad_norm: 0.9999991867568029, iteration: 48844
loss: 1.020769715309143,grad_norm: 0.9999992181292535, iteration: 48845
loss: 1.0010254383087158,grad_norm: 0.9999994072248644, iteration: 48846
loss: 0.9743430018424988,grad_norm: 0.978137849911983, iteration: 48847
loss: 1.0080081224441528,grad_norm: 0.9999990715455651, iteration: 48848
loss: 1.0123518705368042,grad_norm: 0.9999991242763822, iteration: 48849
loss: 1.03692626953125,grad_norm: 0.9999991100499596, iteration: 48850
loss: 1.0267062187194824,grad_norm: 0.9999989823301098, iteration: 48851
loss: 1.0085759162902832,grad_norm: 0.9185141212197288, iteration: 48852
loss: 0.9830069541931152,grad_norm: 0.9999990489162536, iteration: 48853
loss: 1.0348490476608276,grad_norm: 0.9999990677358418, iteration: 48854
loss: 0.9862154722213745,grad_norm: 0.9245349922224312, iteration: 48855
loss: 1.0426528453826904,grad_norm: 0.9999991410631804, iteration: 48856
loss: 0.9898107051849365,grad_norm: 0.9454546181810983, iteration: 48857
loss: 0.9988241791725159,grad_norm: 0.9999991035900359, iteration: 48858
loss: 0.9921091198921204,grad_norm: 0.9694030591613573, iteration: 48859
loss: 1.0563331842422485,grad_norm: 0.9999992804917057, iteration: 48860
loss: 1.0372178554534912,grad_norm: 0.9999991849273592, iteration: 48861
loss: 1.0463930368423462,grad_norm: 0.9999996605203314, iteration: 48862
loss: 1.0351645946502686,grad_norm: 0.9999991339811265, iteration: 48863
loss: 1.0240521430969238,grad_norm: 0.9160384357893154, iteration: 48864
loss: 0.9929796457290649,grad_norm: 0.9445278675972038, iteration: 48865
loss: 0.9949684739112854,grad_norm: 0.8219944604902134, iteration: 48866
loss: 0.9939523935317993,grad_norm: 0.9374108633449486, iteration: 48867
loss: 1.0287189483642578,grad_norm: 0.9999991561207962, iteration: 48868
loss: 0.9945803880691528,grad_norm: 0.876356722736158, iteration: 48869
loss: 0.9651485085487366,grad_norm: 0.9289796892611649, iteration: 48870
loss: 1.0002470016479492,grad_norm: 0.9999990889264229, iteration: 48871
loss: 1.057948112487793,grad_norm: 0.9999993720058994, iteration: 48872
loss: 0.9788788557052612,grad_norm: 0.9999992142636512, iteration: 48873
loss: 0.9669303894042969,grad_norm: 0.9667506759368141, iteration: 48874
loss: 1.0463013648986816,grad_norm: 0.9999996825556314, iteration: 48875
loss: 1.0121041536331177,grad_norm: 0.9700499192537717, iteration: 48876
loss: 0.9925903081893921,grad_norm: 0.9574339212695798, iteration: 48877
loss: 1.0037333965301514,grad_norm: 0.9999990697383281, iteration: 48878
loss: 1.00023353099823,grad_norm: 0.8564138810428307, iteration: 48879
loss: 1.026660680770874,grad_norm: 0.9999990679662256, iteration: 48880
loss: 1.0094400644302368,grad_norm: 0.9954635787115899, iteration: 48881
loss: 1.0125125646591187,grad_norm: 0.9999992695249531, iteration: 48882
loss: 1.0246678590774536,grad_norm: 0.999999835035958, iteration: 48883
loss: 0.9991949200630188,grad_norm: 0.9999992080808625, iteration: 48884
loss: 0.9859693646430969,grad_norm: 0.9999990409065361, iteration: 48885
loss: 1.0045620203018188,grad_norm: 0.9219136331463401, iteration: 48886
loss: 1.0052913427352905,grad_norm: 0.9999990385726716, iteration: 48887
loss: 1.0021402835845947,grad_norm: 0.9999990909608173, iteration: 48888
loss: 0.9802553653717041,grad_norm: 0.8762249324591187, iteration: 48889
loss: 0.998956561088562,grad_norm: 0.794841107354364, iteration: 48890
loss: 0.9864863753318787,grad_norm: 0.995820878183359, iteration: 48891
loss: 0.9885542392730713,grad_norm: 0.9999991707948259, iteration: 48892
loss: 0.9741318225860596,grad_norm: 0.9999990096774692, iteration: 48893
loss: 1.070030927658081,grad_norm: 0.9999995312040986, iteration: 48894
loss: 1.043971061706543,grad_norm: 0.9999990481752626, iteration: 48895
loss: 1.0119221210479736,grad_norm: 0.8874714900450871, iteration: 48896
loss: 0.9935672879219055,grad_norm: 0.9999991117021596, iteration: 48897
loss: 1.0053343772888184,grad_norm: 0.9819381298561255, iteration: 48898
loss: 1.0018310546875,grad_norm: 0.9088741487482795, iteration: 48899
loss: 1.0167406797409058,grad_norm: 0.9999989218002443, iteration: 48900
loss: 0.9813222885131836,grad_norm: 0.8868905057692197, iteration: 48901
loss: 0.9748458862304688,grad_norm: 0.9999992136103049, iteration: 48902
loss: 1.0187854766845703,grad_norm: 0.8086054181365031, iteration: 48903
loss: 1.0080887079238892,grad_norm: 0.9999993449463456, iteration: 48904
loss: 1.0442249774932861,grad_norm: 0.8519626754629084, iteration: 48905
loss: 0.9884485006332397,grad_norm: 0.9999993115552804, iteration: 48906
loss: 0.9973907470703125,grad_norm: 0.9999990579640741, iteration: 48907
loss: 1.0400441884994507,grad_norm: 0.9957031203442532, iteration: 48908
loss: 1.0244985818862915,grad_norm: 0.8489227430639383, iteration: 48909
loss: 1.0000121593475342,grad_norm: 0.999999052544601, iteration: 48910
loss: 0.9973384141921997,grad_norm: 0.9610668604283539, iteration: 48911
loss: 0.9932964444160461,grad_norm: 0.9007665316254461, iteration: 48912
loss: 0.986103892326355,grad_norm: 0.8594423944286499, iteration: 48913
loss: 1.0276720523834229,grad_norm: 0.9999991993226868, iteration: 48914
loss: 1.0366731882095337,grad_norm: 0.8556271606416035, iteration: 48915
loss: 1.0110456943511963,grad_norm: 0.9999990661591127, iteration: 48916
loss: 1.0211716890335083,grad_norm: 0.9999998892730747, iteration: 48917
loss: 1.0126219987869263,grad_norm: 0.9999991357905151, iteration: 48918
loss: 1.011436939239502,grad_norm: 0.9999993386208432, iteration: 48919
loss: 0.9904578328132629,grad_norm: 0.9999990662142175, iteration: 48920
loss: 1.0330625772476196,grad_norm: 0.9079545171025768, iteration: 48921
loss: 0.9886599779129028,grad_norm: 0.9286315834041301, iteration: 48922
loss: 0.9840304851531982,grad_norm: 0.9999991303260191, iteration: 48923
loss: 1.0065815448760986,grad_norm: 0.999999194469755, iteration: 48924
loss: 1.0210195779800415,grad_norm: 0.9999995777676449, iteration: 48925
loss: 0.9925447702407837,grad_norm: 0.8291016046950538, iteration: 48926
loss: 0.997789740562439,grad_norm: 0.9999990290024267, iteration: 48927
loss: 1.0005816221237183,grad_norm: 0.9492820596381663, iteration: 48928
loss: 1.00119948387146,grad_norm: 0.9764573537750526, iteration: 48929
loss: 1.0194687843322754,grad_norm: 0.9999991247300167, iteration: 48930
loss: 1.0071187019348145,grad_norm: 0.9999991008066142, iteration: 48931
loss: 1.0008881092071533,grad_norm: 0.9999991713944393, iteration: 48932
loss: 0.9898781180381775,grad_norm: 0.9999992166355525, iteration: 48933
loss: 1.100814938545227,grad_norm: 0.9999995611445102, iteration: 48934
loss: 0.9756707549095154,grad_norm: 0.8712694009395879, iteration: 48935
loss: 0.9902393817901611,grad_norm: 0.9999993710488168, iteration: 48936
loss: 1.0076006650924683,grad_norm: 0.9999992619570903, iteration: 48937
loss: 0.9941635727882385,grad_norm: 0.9804677089906468, iteration: 48938
loss: 0.9974305033683777,grad_norm: 0.9999990620500259, iteration: 48939
loss: 0.9727922677993774,grad_norm: 0.8398463608975865, iteration: 48940
loss: 1.0175495147705078,grad_norm: 0.9999995831786143, iteration: 48941
loss: 1.021592378616333,grad_norm: 0.9999990207083189, iteration: 48942
loss: 1.008271336555481,grad_norm: 0.7429841884304431, iteration: 48943
loss: 1.0101797580718994,grad_norm: 0.9015107017433611, iteration: 48944
loss: 1.0154433250427246,grad_norm: 0.9036642770339084, iteration: 48945
loss: 1.019054889678955,grad_norm: 0.9999991252177004, iteration: 48946
loss: 0.9896805286407471,grad_norm: 0.9999992022545727, iteration: 48947
loss: 1.0781266689300537,grad_norm: 0.9999992449927807, iteration: 48948
loss: 0.992335319519043,grad_norm: 0.9450432259945578, iteration: 48949
loss: 1.0169141292572021,grad_norm: 0.9999991438103475, iteration: 48950
loss: 1.0051562786102295,grad_norm: 0.9999991927005123, iteration: 48951
loss: 0.9875596761703491,grad_norm: 0.94834265097293, iteration: 48952
loss: 1.0073522329330444,grad_norm: 0.8764034481830679, iteration: 48953
loss: 1.0087908506393433,grad_norm: 0.9999992748891039, iteration: 48954
loss: 0.9911096692085266,grad_norm: 0.9266700653620557, iteration: 48955
loss: 0.9979885220527649,grad_norm: 0.926787955316805, iteration: 48956
loss: 1.03956139087677,grad_norm: 0.9999991427205916, iteration: 48957
loss: 1.060167908668518,grad_norm: 0.9999992798049042, iteration: 48958
loss: 1.023320198059082,grad_norm: 0.9999991889460471, iteration: 48959
loss: 0.987549901008606,grad_norm: 0.8792288515497381, iteration: 48960
loss: 1.0036693811416626,grad_norm: 0.9999992522034912, iteration: 48961
loss: 0.9921660423278809,grad_norm: 0.9396198933749603, iteration: 48962
loss: 1.011805534362793,grad_norm: 0.9999995497758186, iteration: 48963
loss: 0.984382688999176,grad_norm: 0.9999991597004052, iteration: 48964
loss: 0.956907331943512,grad_norm: 0.9301281393888596, iteration: 48965
loss: 1.0129040479660034,grad_norm: 0.9999990943639986, iteration: 48966
loss: 1.0072451829910278,grad_norm: 0.9618616173700226, iteration: 48967
loss: 0.9663160443305969,grad_norm: 0.8572461099623749, iteration: 48968
loss: 0.9957515001296997,grad_norm: 0.9394453715676881, iteration: 48969
loss: 0.9621649384498596,grad_norm: 0.950624365932161, iteration: 48970
loss: 1.0250611305236816,grad_norm: 0.9999990506318569, iteration: 48971
loss: 1.0092037916183472,grad_norm: 0.9420666051487202, iteration: 48972
loss: 0.9874086976051331,grad_norm: 0.999999112739484, iteration: 48973
loss: 0.9768431186676025,grad_norm: 0.999998959538028, iteration: 48974
loss: 1.0164480209350586,grad_norm: 0.8561183131327019, iteration: 48975
loss: 1.0376688241958618,grad_norm: 0.9999992565905933, iteration: 48976
loss: 0.9982770681381226,grad_norm: 0.9999992098061744, iteration: 48977
loss: 1.0406213998794556,grad_norm: 0.8252884000998, iteration: 48978
loss: 1.0119630098342896,grad_norm: 0.9246377658994176, iteration: 48979
loss: 1.010430097579956,grad_norm: 0.9999990374753529, iteration: 48980
loss: 1.013228416442871,grad_norm: 0.927561662833759, iteration: 48981
loss: 0.9504149556159973,grad_norm: 0.9999994366953266, iteration: 48982
loss: 1.0295897722244263,grad_norm: 0.9999995269325671, iteration: 48983
loss: 1.0075631141662598,grad_norm: 0.9999990892304798, iteration: 48984
loss: 1.0175844430923462,grad_norm: 0.999999103693782, iteration: 48985
loss: 0.9674070477485657,grad_norm: 0.9228196508083083, iteration: 48986
loss: 1.0195071697235107,grad_norm: 0.9004628344045589, iteration: 48987
loss: 0.9990029335021973,grad_norm: 0.9423213926995576, iteration: 48988
loss: 1.0183671712875366,grad_norm: 0.9999995256940628, iteration: 48989
loss: 0.9961762428283691,grad_norm: 0.8534168974122348, iteration: 48990
loss: 1.032734751701355,grad_norm: 0.9427058313277265, iteration: 48991
loss: 0.995762288570404,grad_norm: 0.8599216409227314, iteration: 48992
loss: 1.0596249103546143,grad_norm: 0.9999993079838487, iteration: 48993
loss: 1.0130938291549683,grad_norm: 0.9999993759605413, iteration: 48994
loss: 1.035741925239563,grad_norm: 0.999999140142845, iteration: 48995
loss: 1.0039730072021484,grad_norm: 0.9999992016263357, iteration: 48996
loss: 1.058853030204773,grad_norm: 0.9999995049394651, iteration: 48997
loss: 0.9639437198638916,grad_norm: 0.8910687481665548, iteration: 48998
loss: 1.0048773288726807,grad_norm: 0.9836702152155664, iteration: 48999
loss: 0.9917704463005066,grad_norm: 0.9999990328497287, iteration: 49000
loss: 0.9846583604812622,grad_norm: 0.9999994506723723, iteration: 49001
loss: 1.0247055292129517,grad_norm: 0.9999990278571331, iteration: 49002
loss: 0.9975743889808655,grad_norm: 0.9999991759479498, iteration: 49003
loss: 0.9883034825325012,grad_norm: 0.999999162480354, iteration: 49004
loss: 1.0273497104644775,grad_norm: 0.9999995434557276, iteration: 49005
loss: 1.0302947759628296,grad_norm: 0.9999990932828442, iteration: 49006
loss: 1.0264559984207153,grad_norm: 0.9999991647863377, iteration: 49007
loss: 0.9910985827445984,grad_norm: 0.9999992316949521, iteration: 49008
loss: 1.0170977115631104,grad_norm: 0.9999991748823813, iteration: 49009
loss: 0.9594078660011292,grad_norm: 0.999999008271061, iteration: 49010
loss: 1.036239743232727,grad_norm: 0.9999990899882484, iteration: 49011
loss: 0.9660943746566772,grad_norm: 0.9647816817504697, iteration: 49012
loss: 0.9870052337646484,grad_norm: 0.9951379728908203, iteration: 49013
loss: 1.0463075637817383,grad_norm: 0.9999991701782048, iteration: 49014
loss: 0.9979895353317261,grad_norm: 0.9068473940510359, iteration: 49015
loss: 0.9936801791191101,grad_norm: 0.9999992581679709, iteration: 49016
loss: 1.0257731676101685,grad_norm: 0.9999989568009378, iteration: 49017
loss: 0.9998782873153687,grad_norm: 0.9999991657851549, iteration: 49018
loss: 0.9823307394981384,grad_norm: 0.9999992191574398, iteration: 49019
loss: 0.9878669381141663,grad_norm: 0.9999993215158083, iteration: 49020
loss: 1.010105013847351,grad_norm: 0.9999989710124705, iteration: 49021
loss: 1.0183745622634888,grad_norm: 0.9999991946135017, iteration: 49022
loss: 0.9958245754241943,grad_norm: 0.924459086571972, iteration: 49023
loss: 1.017925500869751,grad_norm: 0.9999991539460857, iteration: 49024
loss: 1.0089962482452393,grad_norm: 0.9999991787228384, iteration: 49025
loss: 0.9840019345283508,grad_norm: 0.9999989975049608, iteration: 49026
loss: 1.0267635583877563,grad_norm: 0.9999995006501076, iteration: 49027
loss: 1.0017602443695068,grad_norm: 0.970976547702837, iteration: 49028
loss: 1.0166317224502563,grad_norm: 0.9999993285438024, iteration: 49029
loss: 1.0483499765396118,grad_norm: 0.9999990966663096, iteration: 49030
loss: 1.0918039083480835,grad_norm: 0.999999300468584, iteration: 49031
loss: 0.9894400238990784,grad_norm: 0.9013147413419988, iteration: 49032
loss: 0.9621192812919617,grad_norm: 0.9999991844726855, iteration: 49033
loss: 1.0285342931747437,grad_norm: 0.9999992391537099, iteration: 49034
loss: 0.9562368988990784,grad_norm: 0.9999990853774512, iteration: 49035
loss: 0.9820953607559204,grad_norm: 0.9999997223413184, iteration: 49036
loss: 1.0412001609802246,grad_norm: 0.9999992072713513, iteration: 49037
loss: 1.0122727155685425,grad_norm: 0.9999990768480926, iteration: 49038
loss: 0.9825878739356995,grad_norm: 0.8851579753578627, iteration: 49039
loss: 1.0041117668151855,grad_norm: 0.9999990859293485, iteration: 49040
loss: 1.0118849277496338,grad_norm: 0.9999991618130151, iteration: 49041
loss: 0.9989418387413025,grad_norm: 0.9999993400925407, iteration: 49042
loss: 1.0558974742889404,grad_norm: 0.9999992521715289, iteration: 49043
loss: 0.9993829131126404,grad_norm: 0.9999990884366704, iteration: 49044
loss: 0.990763247013092,grad_norm: 0.9999992788329285, iteration: 49045
loss: 1.0084376335144043,grad_norm: 0.8930393753367245, iteration: 49046
loss: 1.0268621444702148,grad_norm: 0.9387932195225532, iteration: 49047
loss: 1.0219024419784546,grad_norm: 0.9999989314436996, iteration: 49048
loss: 0.9779669642448425,grad_norm: 0.9500128630343149, iteration: 49049
loss: 1.0365073680877686,grad_norm: 0.7802001873786177, iteration: 49050
loss: 1.0256465673446655,grad_norm: 0.9999990274340663, iteration: 49051
loss: 1.035293698310852,grad_norm: 0.99999949979945, iteration: 49052
loss: 0.9959703087806702,grad_norm: 0.9999990560548204, iteration: 49053
loss: 0.9854221343994141,grad_norm: 0.9999990440127132, iteration: 49054
loss: 1.019284725189209,grad_norm: 0.9999990832456371, iteration: 49055
loss: 1.028746247291565,grad_norm: 0.9999991608237222, iteration: 49056
loss: 1.0400974750518799,grad_norm: 1.0000000333262953, iteration: 49057
loss: 1.0276882648468018,grad_norm: 0.8601952856828409, iteration: 49058
loss: 0.9810641407966614,grad_norm: 0.9999990802247679, iteration: 49059
loss: 1.141090989112854,grad_norm: 0.9999993952304319, iteration: 49060
loss: 0.9803478121757507,grad_norm: 0.9999992153223184, iteration: 49061
loss: 1.0359221696853638,grad_norm: 0.9999991996562941, iteration: 49062
loss: 1.019803524017334,grad_norm: 0.9999992679635384, iteration: 49063
loss: 1.0567421913146973,grad_norm: 0.9999993077277841, iteration: 49064
loss: 1.0101085901260376,grad_norm: 0.9999990510729244, iteration: 49065
loss: 0.9836499094963074,grad_norm: 0.9438029149127047, iteration: 49066
loss: 0.992016613483429,grad_norm: 0.9120183716069994, iteration: 49067
loss: 1.0220832824707031,grad_norm: 0.9999990509070708, iteration: 49068
loss: 1.006591558456421,grad_norm: 0.9999991570030353, iteration: 49069
loss: 1.0326881408691406,grad_norm: 0.889544371730375, iteration: 49070
loss: 1.0263651609420776,grad_norm: 0.9999991324190415, iteration: 49071
loss: 1.0245977640151978,grad_norm: 0.9999992144136586, iteration: 49072
loss: 1.0162019729614258,grad_norm: 0.9999992995223447, iteration: 49073
loss: 0.9672608971595764,grad_norm: 0.9999992229229976, iteration: 49074
loss: 1.0251892805099487,grad_norm: 0.9999992082612611, iteration: 49075
loss: 0.9883309602737427,grad_norm: 0.9999992525212126, iteration: 49076
loss: 1.184382677078247,grad_norm: 0.9999996294207775, iteration: 49077
loss: 0.9586747884750366,grad_norm: 0.8707744219348519, iteration: 49078
loss: 1.0157921314239502,grad_norm: 0.9999993128852515, iteration: 49079
loss: 1.0519992113113403,grad_norm: 0.9999990410072274, iteration: 49080
loss: 1.0384180545806885,grad_norm: 0.9999996637284221, iteration: 49081
loss: 1.0008928775787354,grad_norm: 0.9085604516809042, iteration: 49082
loss: 1.0162789821624756,grad_norm: 0.9999996663979784, iteration: 49083
loss: 1.0122920274734497,grad_norm: 0.8376056217669934, iteration: 49084
loss: 0.9987875819206238,grad_norm: 0.9999990064502758, iteration: 49085
loss: 1.0145959854125977,grad_norm: 0.9161507801847506, iteration: 49086
loss: 1.0149075984954834,grad_norm: 0.9999993046559574, iteration: 49087
loss: 1.0026808977127075,grad_norm: 0.999999077307118, iteration: 49088
loss: 0.9868913888931274,grad_norm: 0.9999991402928379, iteration: 49089
loss: 1.021985411643982,grad_norm: 0.9999992938793504, iteration: 49090
loss: 0.9827220439910889,grad_norm: 0.8198380244277216, iteration: 49091
loss: 0.9877989292144775,grad_norm: 0.9999997438184349, iteration: 49092
loss: 1.009048342704773,grad_norm: 0.9999995454731888, iteration: 49093
loss: 0.9916468858718872,grad_norm: 0.9999992234406744, iteration: 49094
loss: 1.0121996402740479,grad_norm: 0.999998995024149, iteration: 49095
loss: 0.9856709837913513,grad_norm: 0.9863567796975787, iteration: 49096
loss: 0.9851335287094116,grad_norm: 0.9999990408114403, iteration: 49097
loss: 1.0321781635284424,grad_norm: 0.9602356282297078, iteration: 49098
loss: 1.097471833229065,grad_norm: 0.9999994716918191, iteration: 49099
loss: 1.0112286806106567,grad_norm: 0.9999992555107585, iteration: 49100
loss: 1.0040541887283325,grad_norm: 0.9723000213996914, iteration: 49101
loss: 0.9873771667480469,grad_norm: 0.9999989662046752, iteration: 49102
loss: 1.0118123292922974,grad_norm: 0.999999239678429, iteration: 49103
loss: 0.9866945147514343,grad_norm: 0.9999991773934194, iteration: 49104
loss: 0.993404746055603,grad_norm: 0.9999991602508457, iteration: 49105
loss: 1.0217384099960327,grad_norm: 0.9842201815841447, iteration: 49106
loss: 0.9982874989509583,grad_norm: 0.9097795459383312, iteration: 49107
loss: 1.0413151979446411,grad_norm: 0.9999990987809446, iteration: 49108
loss: 1.0760496854782104,grad_norm: 0.9999992360728722, iteration: 49109
loss: 1.0420682430267334,grad_norm: 0.9999991491617087, iteration: 49110
loss: 1.0343756675720215,grad_norm: 0.9373103612086807, iteration: 49111
loss: 1.0137916803359985,grad_norm: 0.9999989194252523, iteration: 49112
loss: 1.0079177618026733,grad_norm: 0.9999991250049115, iteration: 49113
loss: 1.0362757444381714,grad_norm: 0.9999990681827646, iteration: 49114
loss: 0.9997896552085876,grad_norm: 0.9999990385327308, iteration: 49115
loss: 0.9902939796447754,grad_norm: 0.9999989466635623, iteration: 49116
loss: 1.0237623453140259,grad_norm: 0.9999996878626372, iteration: 49117
loss: 1.0164612531661987,grad_norm: 0.9999993644710239, iteration: 49118
loss: 0.994705319404602,grad_norm: 0.9107910091594917, iteration: 49119
loss: 0.9957121014595032,grad_norm: 0.9999990049658027, iteration: 49120
loss: 1.0006731748580933,grad_norm: 0.9641674303494874, iteration: 49121
loss: 1.0305184125900269,grad_norm: 0.9999990397477599, iteration: 49122
loss: 1.0197551250457764,grad_norm: 0.9999991114205272, iteration: 49123
loss: 0.9784271121025085,grad_norm: 0.9999992107286361, iteration: 49124
loss: 0.9822257161140442,grad_norm: 0.8831129406337951, iteration: 49125
loss: 1.0254422426223755,grad_norm: 0.9999998021729967, iteration: 49126
loss: 0.973432183265686,grad_norm: 0.9999989947880656, iteration: 49127
loss: 1.0056166648864746,grad_norm: 0.9538791704623677, iteration: 49128
loss: 1.0051878690719604,grad_norm: 0.9800851597833827, iteration: 49129
loss: 1.0262459516525269,grad_norm: 0.8426996068049633, iteration: 49130
loss: 1.002070426940918,grad_norm: 0.989132746513169, iteration: 49131
loss: 1.0117920637130737,grad_norm: 0.918185481007267, iteration: 49132
loss: 1.0023488998413086,grad_norm: 0.9999994582006776, iteration: 49133
loss: 1.0978339910507202,grad_norm: 0.9999997145112025, iteration: 49134
loss: 0.9938104748725891,grad_norm: 0.9999993703834852, iteration: 49135
loss: 0.9690752625465393,grad_norm: 0.9999992686183672, iteration: 49136
loss: 1.0217843055725098,grad_norm: 0.999998991420327, iteration: 49137
loss: 1.0746254920959473,grad_norm: 0.9999994760021698, iteration: 49138
loss: 1.0357049703598022,grad_norm: 0.907811272132938, iteration: 49139
loss: 1.0214729309082031,grad_norm: 0.9819362386474048, iteration: 49140
loss: 0.9912286996841431,grad_norm: 0.9999991531314725, iteration: 49141
loss: 1.0106936693191528,grad_norm: 0.9050967924020704, iteration: 49142
loss: 1.0094250440597534,grad_norm: 0.9999992255728978, iteration: 49143
loss: 0.9827066659927368,grad_norm: 0.9999991545403134, iteration: 49144
loss: 1.0219382047653198,grad_norm: 0.9999995289651409, iteration: 49145
loss: 0.9667978286743164,grad_norm: 0.9999989660286064, iteration: 49146
loss: 1.0109268426895142,grad_norm: 0.9999993117203616, iteration: 49147
loss: 1.0231422185897827,grad_norm: 0.9999990905557847, iteration: 49148
loss: 1.0118907690048218,grad_norm: 0.9999998039453821, iteration: 49149
loss: 1.0601834058761597,grad_norm: 0.9999995337987405, iteration: 49150
loss: 1.0430514812469482,grad_norm: 0.9829515442905954, iteration: 49151
loss: 1.0219701528549194,grad_norm: 0.9060119134620033, iteration: 49152
loss: 1.0529626607894897,grad_norm: 0.9999995708472472, iteration: 49153
loss: 1.0392988920211792,grad_norm: 0.9999990957040501, iteration: 49154
loss: 1.0328526496887207,grad_norm: 0.9999996872974427, iteration: 49155
loss: 1.0215601921081543,grad_norm: 0.9999990792723201, iteration: 49156
loss: 0.9934549331665039,grad_norm: 0.9999992072059051, iteration: 49157
loss: 1.0403074026107788,grad_norm: 0.9999991586565748, iteration: 49158
loss: 1.0130995512008667,grad_norm: 0.999999192949712, iteration: 49159
loss: 1.0101147890090942,grad_norm: 0.7983491383114631, iteration: 49160
loss: 1.0120848417282104,grad_norm: 0.9999990954995388, iteration: 49161
loss: 1.0145825147628784,grad_norm: 0.9999990465203086, iteration: 49162
loss: 1.0197677612304688,grad_norm: 0.9999991811167362, iteration: 49163
loss: 1.0498378276824951,grad_norm: 0.9999992550476899, iteration: 49164
loss: 0.9958932399749756,grad_norm: 0.9999990851745069, iteration: 49165
loss: 1.0171680450439453,grad_norm: 0.9999991939523032, iteration: 49166
loss: 1.0042881965637207,grad_norm: 0.9999991089405215, iteration: 49167
loss: 1.0329233407974243,grad_norm: 0.9999992440844054, iteration: 49168
loss: 1.0120186805725098,grad_norm: 0.9999991766015691, iteration: 49169
loss: 1.001440167427063,grad_norm: 0.9999993483874694, iteration: 49170
loss: 0.9860643744468689,grad_norm: 0.9747502899322553, iteration: 49171
loss: 1.0202442407608032,grad_norm: 0.9999995780076947, iteration: 49172
loss: 1.0373601913452148,grad_norm: 0.999999459139726, iteration: 49173
loss: 1.0009657144546509,grad_norm: 0.9999993414911439, iteration: 49174
loss: 1.0315262079238892,grad_norm: 0.999998993000304, iteration: 49175
loss: 1.0177061557769775,grad_norm: 0.9999992202846456, iteration: 49176
loss: 0.9634743928909302,grad_norm: 0.999999083864877, iteration: 49177
loss: 1.0218247175216675,grad_norm: 0.9999991806452319, iteration: 49178
loss: 1.0429657697677612,grad_norm: 0.9999999129262304, iteration: 49179
loss: 1.054232120513916,grad_norm: 0.9999992768607354, iteration: 49180
loss: 1.0332212448120117,grad_norm: 0.9999991950110384, iteration: 49181
loss: 1.0073715448379517,grad_norm: 0.8976629392571022, iteration: 49182
loss: 1.0073521137237549,grad_norm: 0.999999116197972, iteration: 49183
loss: 1.0053402185440063,grad_norm: 0.9999991271805667, iteration: 49184
loss: 1.0060555934906006,grad_norm: 0.9999991451013576, iteration: 49185
loss: 1.010499119758606,grad_norm: 0.9999991209282576, iteration: 49186
loss: 1.0021123886108398,grad_norm: 0.9999990585162516, iteration: 49187
loss: 0.9904026389122009,grad_norm: 0.9999991264358582, iteration: 49188
loss: 0.9776319265365601,grad_norm: 0.9999992072510598, iteration: 49189
loss: 0.9980763792991638,grad_norm: 0.9999990333377553, iteration: 49190
loss: 1.0619878768920898,grad_norm: 0.999999613853384, iteration: 49191
loss: 0.9992526769638062,grad_norm: 0.9999992949250283, iteration: 49192
loss: 0.9945818185806274,grad_norm: 0.833133207551475, iteration: 49193
loss: 1.0059187412261963,grad_norm: 0.9058981073419601, iteration: 49194
loss: 1.0342788696289062,grad_norm: 0.9999992055058196, iteration: 49195
loss: 1.0088533163070679,grad_norm: 0.9205326779570056, iteration: 49196
loss: 1.00194251537323,grad_norm: 0.999999074776666, iteration: 49197
loss: 1.0164475440979004,grad_norm: 0.9999990643833259, iteration: 49198
loss: 0.9779868721961975,grad_norm: 0.9999990141596338, iteration: 49199
loss: 0.993335485458374,grad_norm: 0.9999992593084173, iteration: 49200
loss: 1.1799019575119019,grad_norm: 0.999999762962904, iteration: 49201
loss: 1.0250625610351562,grad_norm: 0.9699141999839765, iteration: 49202
loss: 0.9604265689849854,grad_norm: 0.9999992505040788, iteration: 49203
loss: 0.9807180166244507,grad_norm: 0.9999991869916176, iteration: 49204
loss: 1.0320531129837036,grad_norm: 0.9513345196980363, iteration: 49205
loss: 1.0373501777648926,grad_norm: 0.9999996667172619, iteration: 49206
loss: 0.9936584234237671,grad_norm: 0.9059826066312012, iteration: 49207
loss: 1.1691564321517944,grad_norm: 0.9999995742044039, iteration: 49208
loss: 0.9919306039810181,grad_norm: 0.9522235913147276, iteration: 49209
loss: 1.0262855291366577,grad_norm: 0.9999993105361393, iteration: 49210
loss: 0.9935354590415955,grad_norm: 0.8805954466411267, iteration: 49211
loss: 1.0429503917694092,grad_norm: 0.9999992593046289, iteration: 49212
loss: 0.9816063046455383,grad_norm: 0.999999111248997, iteration: 49213
loss: 0.9910693168640137,grad_norm: 0.9844353761390736, iteration: 49214
loss: 0.9843049049377441,grad_norm: 0.9237407392274458, iteration: 49215
loss: 1.010469913482666,grad_norm: 0.9999996151104085, iteration: 49216
loss: 1.0119963884353638,grad_norm: 0.9999991913065843, iteration: 49217
loss: 1.0104820728302002,grad_norm: 0.9999990591450878, iteration: 49218
loss: 0.9395792484283447,grad_norm: 0.9999990704182635, iteration: 49219
loss: 0.9749034643173218,grad_norm: 0.999999179383889, iteration: 49220
loss: 1.0376691818237305,grad_norm: 0.9999993288948505, iteration: 49221
loss: 1.1933420896530151,grad_norm: 0.9999999428428255, iteration: 49222
loss: 1.0127085447311401,grad_norm: 0.99999919681247, iteration: 49223
loss: 1.0025138854980469,grad_norm: 0.9999990895042932, iteration: 49224
loss: 0.9707520008087158,grad_norm: 0.999999170133882, iteration: 49225
loss: 0.9863489270210266,grad_norm: 0.9210160826018011, iteration: 49226
loss: 1.0156277418136597,grad_norm: 0.9198685422434422, iteration: 49227
loss: 1.0206865072250366,grad_norm: 0.9999991810755463, iteration: 49228
loss: 0.9802955389022827,grad_norm: 0.9999991239863923, iteration: 49229
loss: 1.1064584255218506,grad_norm: 0.9999999273473837, iteration: 49230
loss: 1.0226877927780151,grad_norm: 0.9523689019077519, iteration: 49231
loss: 1.0257450342178345,grad_norm: 0.9999993560228851, iteration: 49232
loss: 1.0058190822601318,grad_norm: 0.9999991571659635, iteration: 49233
loss: 1.0085422992706299,grad_norm: 0.9880254110928781, iteration: 49234
loss: 1.058728814125061,grad_norm: 0.9999990711481912, iteration: 49235
loss: 0.9947367906570435,grad_norm: 0.9459874678830555, iteration: 49236
loss: 1.0156296491622925,grad_norm: 0.9999989656107545, iteration: 49237
loss: 1.0227956771850586,grad_norm: 0.9999991043058811, iteration: 49238
loss: 0.9669193029403687,grad_norm: 0.9999991575527559, iteration: 49239
loss: 1.1178315877914429,grad_norm: 0.9999995644398477, iteration: 49240
loss: 0.95639568567276,grad_norm: 0.9999992408794152, iteration: 49241
loss: 0.9833585023880005,grad_norm: 0.9999991255892459, iteration: 49242
loss: 1.1361472606658936,grad_norm: 0.9999997699668953, iteration: 49243
loss: 1.034104347229004,grad_norm: 0.9376713419120185, iteration: 49244
loss: 1.0143507719039917,grad_norm: 0.9999991801248599, iteration: 49245
loss: 0.9561821818351746,grad_norm: 0.9999991405516397, iteration: 49246
loss: 1.024449110031128,grad_norm: 0.9999993685335988, iteration: 49247
loss: 1.0324311256408691,grad_norm: 0.8866440092383242, iteration: 49248
loss: 0.953436553478241,grad_norm: 0.9999990146160352, iteration: 49249
loss: 0.9934011101722717,grad_norm: 0.9731887120006485, iteration: 49250
loss: 1.0335761308670044,grad_norm: 0.9774613528343169, iteration: 49251
loss: 1.0113201141357422,grad_norm: 0.9145400986031937, iteration: 49252
loss: 0.9742498397827148,grad_norm: 0.8874351296560652, iteration: 49253
loss: 1.0084853172302246,grad_norm: 0.9999995298158338, iteration: 49254
loss: 0.9767669439315796,grad_norm: 0.9876792524946527, iteration: 49255
loss: 0.974258303642273,grad_norm: 0.999999097457923, iteration: 49256
loss: 1.0296849012374878,grad_norm: 0.9999994164020446, iteration: 49257
loss: 1.0494998693466187,grad_norm: 0.9999991299441766, iteration: 49258
loss: 0.9680280685424805,grad_norm: 0.9401235439393242, iteration: 49259
loss: 0.9936969876289368,grad_norm: 0.8736920804256113, iteration: 49260
loss: 0.9657117128372192,grad_norm: 0.9999989926124468, iteration: 49261
loss: 0.9948497414588928,grad_norm: 0.9999992873621693, iteration: 49262
loss: 0.9966092109680176,grad_norm: 0.9999991650196592, iteration: 49263
loss: 0.9854440093040466,grad_norm: 0.952983555014992, iteration: 49264
loss: 1.041008710861206,grad_norm: 0.9999991669413135, iteration: 49265
loss: 0.9823771119117737,grad_norm: 0.9999990823594863, iteration: 49266
loss: 1.0096614360809326,grad_norm: 0.9999991440684893, iteration: 49267
loss: 0.9986768960952759,grad_norm: 0.999999017782665, iteration: 49268
loss: 0.9875249862670898,grad_norm: 0.9999990237169469, iteration: 49269
loss: 0.9790263772010803,grad_norm: 0.8312505190182707, iteration: 49270
loss: 0.9727875590324402,grad_norm: 0.7821185249936502, iteration: 49271
loss: 0.9700799584388733,grad_norm: 0.8003134437924802, iteration: 49272
loss: 1.0094274282455444,grad_norm: 0.9720745607791003, iteration: 49273
loss: 0.9816339612007141,grad_norm: 0.9999991609972531, iteration: 49274
loss: 1.0567166805267334,grad_norm: 0.9999996218311357, iteration: 49275
loss: 1.0098726749420166,grad_norm: 0.7932922545632406, iteration: 49276
loss: 1.0145000219345093,grad_norm: 0.99999908271332, iteration: 49277
loss: 1.037522554397583,grad_norm: 0.8818014015619514, iteration: 49278
loss: 0.977867603302002,grad_norm: 0.9999989673552196, iteration: 49279
loss: 1.0313137769699097,grad_norm: 0.999999196744815, iteration: 49280
loss: 1.0258632898330688,grad_norm: 0.8427915718120775, iteration: 49281
loss: 1.0017766952514648,grad_norm: 0.8588668054416043, iteration: 49282
loss: 0.9906883239746094,grad_norm: 0.9999992184449316, iteration: 49283
loss: 1.022517442703247,grad_norm: 0.9999990632118636, iteration: 49284
loss: 1.0310338735580444,grad_norm: 0.9999988350214732, iteration: 49285
loss: 1.0189831256866455,grad_norm: 0.9999992357004699, iteration: 49286
loss: 0.9844993948936462,grad_norm: 0.8816662277239498, iteration: 49287
loss: 0.9812745451927185,grad_norm: 0.9999991589873658, iteration: 49288
loss: 1.0222359895706177,grad_norm: 0.9292098188838415, iteration: 49289
loss: 1.0193657875061035,grad_norm: 0.7533191925737028, iteration: 49290
loss: 1.0236239433288574,grad_norm: 0.9999990386607283, iteration: 49291
loss: 1.0018137693405151,grad_norm: 0.9113798729052228, iteration: 49292
loss: 0.9896124601364136,grad_norm: 0.9999991800629907, iteration: 49293
loss: 1.0284169912338257,grad_norm: 0.9580118338512256, iteration: 49294
loss: 0.9882102608680725,grad_norm: 0.9999991709261643, iteration: 49295
loss: 1.0007137060165405,grad_norm: 0.8613904330141032, iteration: 49296
loss: 0.9662656188011169,grad_norm: 0.9837137448672432, iteration: 49297
loss: 1.0468790531158447,grad_norm: 0.9999993617222793, iteration: 49298
loss: 0.9780254364013672,grad_norm: 0.9895145153829122, iteration: 49299
loss: 0.9940573573112488,grad_norm: 0.930674471906992, iteration: 49300
loss: 0.9964480400085449,grad_norm: 0.9738683966751095, iteration: 49301
loss: 0.9957250356674194,grad_norm: 0.886766535933937, iteration: 49302
loss: 0.9813833236694336,grad_norm: 0.9387914477530542, iteration: 49303
loss: 1.009291648864746,grad_norm: 0.9999990985704655, iteration: 49304
loss: 0.9798831343650818,grad_norm: 0.999999118967633, iteration: 49305
loss: 0.9570484161376953,grad_norm: 0.9348374270274004, iteration: 49306
loss: 1.016243577003479,grad_norm: 0.9999994914966253, iteration: 49307
loss: 1.0467432737350464,grad_norm: 0.9999994604740108, iteration: 49308
loss: 1.031225323677063,grad_norm: 0.9999993383120837, iteration: 49309
loss: 1.010178804397583,grad_norm: 0.9999989349514835, iteration: 49310
loss: 1.0117777585983276,grad_norm: 0.9999992541503013, iteration: 49311
loss: 0.9804224967956543,grad_norm: 0.9397012035195389, iteration: 49312
loss: 1.0011589527130127,grad_norm: 0.9513834913319283, iteration: 49313
loss: 1.0304579734802246,grad_norm: 0.9999991736694366, iteration: 49314
loss: 1.037832260131836,grad_norm: 0.9999991034350877, iteration: 49315
loss: 1.0348460674285889,grad_norm: 0.9440662890171143, iteration: 49316
loss: 1.0049810409545898,grad_norm: 0.9404772854206179, iteration: 49317
loss: 1.006425142288208,grad_norm: 0.9999992494940154, iteration: 49318
loss: 0.990485668182373,grad_norm: 0.9451628753530927, iteration: 49319
loss: 0.9830642938613892,grad_norm: 0.9977105689275838, iteration: 49320
loss: 1.0518770217895508,grad_norm: 0.9999992871947507, iteration: 49321
loss: 1.0192168951034546,grad_norm: 0.9999991562037931, iteration: 49322
loss: 1.0486623048782349,grad_norm: 0.9999994849374906, iteration: 49323
loss: 0.9718053340911865,grad_norm: 0.8787332974924223, iteration: 49324
loss: 0.9936292171478271,grad_norm: 0.999998976692175, iteration: 49325
loss: 0.9962339997291565,grad_norm: 0.9999992950431105, iteration: 49326
loss: 1.027795433998108,grad_norm: 0.9999993594521436, iteration: 49327
loss: 1.027611255645752,grad_norm: 0.9999990846247364, iteration: 49328
loss: 1.0096991062164307,grad_norm: 0.9403131620618329, iteration: 49329
loss: 1.0148190259933472,grad_norm: 0.92091375578756, iteration: 49330
loss: 1.0757414102554321,grad_norm: 0.9999993726657204, iteration: 49331
loss: 1.0011768341064453,grad_norm: 0.9999993978009395, iteration: 49332
loss: 0.9678584337234497,grad_norm: 0.9999989768980234, iteration: 49333
loss: 1.0140783786773682,grad_norm: 0.9999990111517716, iteration: 49334
loss: 1.0210472345352173,grad_norm: 0.9999995364097222, iteration: 49335
loss: 1.0005072355270386,grad_norm: 0.8957963656795269, iteration: 49336
loss: 1.035399317741394,grad_norm: 0.999999622631483, iteration: 49337
loss: 1.0470890998840332,grad_norm: 0.9999990863163216, iteration: 49338
loss: 1.090806245803833,grad_norm: 0.9999996250355655, iteration: 49339
loss: 1.0145372152328491,grad_norm: 0.9999991757036472, iteration: 49340
loss: 0.9738920331001282,grad_norm: 0.9994758064847122, iteration: 49341
loss: 1.015689730644226,grad_norm: 0.9447118504329418, iteration: 49342
loss: 1.014614224433899,grad_norm: 0.9423040889026917, iteration: 49343
loss: 0.965329110622406,grad_norm: 0.999999057965217, iteration: 49344
loss: 1.0013432502746582,grad_norm: 0.8235780959059019, iteration: 49345
loss: 1.0217055082321167,grad_norm: 0.999998966543001, iteration: 49346
loss: 0.9595588445663452,grad_norm: 0.9999991310937592, iteration: 49347
loss: 1.0456013679504395,grad_norm: 0.9999990659194397, iteration: 49348
loss: 1.0018858909606934,grad_norm: 0.9604386455742803, iteration: 49349
loss: 1.018539547920227,grad_norm: 0.8855608739694428, iteration: 49350
loss: 0.9876317977905273,grad_norm: 0.9999991445856345, iteration: 49351
loss: 0.9920204877853394,grad_norm: 0.9373523935462922, iteration: 49352
loss: 1.0101501941680908,grad_norm: 0.9999991141794415, iteration: 49353
loss: 1.0134996175765991,grad_norm: 0.99999911140197, iteration: 49354
loss: 1.064842939376831,grad_norm: 0.9999994974737408, iteration: 49355
loss: 1.0180681943893433,grad_norm: 0.9999996956674335, iteration: 49356
loss: 1.0240421295166016,grad_norm: 0.9999990557233325, iteration: 49357
loss: 1.031839370727539,grad_norm: 0.9536123323613475, iteration: 49358
loss: 0.9840361475944519,grad_norm: 0.8785488998866657, iteration: 49359
loss: 1.0567203760147095,grad_norm: 0.999999193310044, iteration: 49360
loss: 0.9809740781784058,grad_norm: 0.8395710570202444, iteration: 49361
loss: 1.0214159488677979,grad_norm: 0.9126238921154208, iteration: 49362
loss: 1.0182757377624512,grad_norm: 0.9999989178010155, iteration: 49363
loss: 1.0340710878372192,grad_norm: 0.9999989863945983, iteration: 49364
loss: 1.0121955871582031,grad_norm: 0.9999991311343922, iteration: 49365
loss: 0.991573691368103,grad_norm: 0.9999990949673953, iteration: 49366
loss: 0.9870578050613403,grad_norm: 0.9999990004338191, iteration: 49367
loss: 1.02687668800354,grad_norm: 0.9719426616684467, iteration: 49368
loss: 1.0361639261245728,grad_norm: 0.7819669852850011, iteration: 49369
loss: 1.0050007104873657,grad_norm: 0.9999992401090341, iteration: 49370
loss: 1.0064054727554321,grad_norm: 0.9999997076286539, iteration: 49371
loss: 0.9989057779312134,grad_norm: 0.99999910067218, iteration: 49372
loss: 1.0136923789978027,grad_norm: 0.975043919622602, iteration: 49373
loss: 1.0296834707260132,grad_norm: 0.9999996714506486, iteration: 49374
loss: 0.9796550869941711,grad_norm: 0.9999992960802038, iteration: 49375
loss: 1.0146063566207886,grad_norm: 0.9779856373289829, iteration: 49376
loss: 0.975588321685791,grad_norm: 0.9471454003598935, iteration: 49377
loss: 0.9727353453636169,grad_norm: 0.9999990658350014, iteration: 49378
loss: 1.0261173248291016,grad_norm: 0.9999995579025395, iteration: 49379
loss: 0.9870079755783081,grad_norm: 0.9999996032962944, iteration: 49380
loss: 0.9840807914733887,grad_norm: 0.9999991891276975, iteration: 49381
loss: 1.0334997177124023,grad_norm: 0.9999994636318373, iteration: 49382
loss: 0.991108238697052,grad_norm: 0.8356901015163813, iteration: 49383
loss: 0.9894988536834717,grad_norm: 0.9999991612338702, iteration: 49384
loss: 0.9385559558868408,grad_norm: 0.957842241285537, iteration: 49385
loss: 0.9968937039375305,grad_norm: 0.9999990694172343, iteration: 49386
loss: 1.0361379384994507,grad_norm: 0.9334174417863202, iteration: 49387
loss: 1.0393850803375244,grad_norm: 0.9999991045031071, iteration: 49388
loss: 1.0128281116485596,grad_norm: 0.9585793630491231, iteration: 49389
loss: 1.0447473526000977,grad_norm: 0.9999998592125148, iteration: 49390
loss: 1.0138728618621826,grad_norm: 0.9999992522155006, iteration: 49391
loss: 0.9705237746238708,grad_norm: 0.9669258346980214, iteration: 49392
loss: 0.9785673022270203,grad_norm: 0.9999991376225744, iteration: 49393
loss: 0.991389811038971,grad_norm: 0.9158833499073356, iteration: 49394
loss: 1.0880900621414185,grad_norm: 0.9630629716989597, iteration: 49395
loss: 1.0290355682373047,grad_norm: 0.9999991216565584, iteration: 49396
loss: 1.0756196975708008,grad_norm: 0.9999997832694641, iteration: 49397
loss: 1.0573958158493042,grad_norm: 0.9999992528377384, iteration: 49398
loss: 1.237949013710022,grad_norm: 0.9999999079120034, iteration: 49399
loss: 1.1419187784194946,grad_norm: 0.9999997398346839, iteration: 49400
loss: 1.0021880865097046,grad_norm: 0.9999991293619612, iteration: 49401
loss: 0.9906643033027649,grad_norm: 0.919997289360379, iteration: 49402
loss: 1.0364452600479126,grad_norm: 0.9999991275408524, iteration: 49403
loss: 0.9896076917648315,grad_norm: 0.9906896071526888, iteration: 49404
loss: 1.0343396663665771,grad_norm: 0.9133717209437561, iteration: 49405
loss: 1.022653579711914,grad_norm: 0.95902039653051, iteration: 49406
loss: 0.9929483532905579,grad_norm: 0.9999992953040286, iteration: 49407
loss: 0.9700670838356018,grad_norm: 0.8146463264723395, iteration: 49408
loss: 0.9728935360908508,grad_norm: 0.9884158836281414, iteration: 49409
loss: 0.989023745059967,grad_norm: 0.9999990079719662, iteration: 49410
loss: 1.0324965715408325,grad_norm: 0.999999327962937, iteration: 49411
loss: 1.0219752788543701,grad_norm: 0.9999991207342082, iteration: 49412
loss: 0.9732775092124939,grad_norm: 0.9724162525132768, iteration: 49413
loss: 1.0402424335479736,grad_norm: 0.9071327639183432, iteration: 49414
loss: 0.980587899684906,grad_norm: 0.9999991451194272, iteration: 49415
loss: 1.021635890007019,grad_norm: 0.9999996314557729, iteration: 49416
loss: 1.0060255527496338,grad_norm: 0.9999990055643009, iteration: 49417
loss: 0.9868305921554565,grad_norm: 0.9999990417735115, iteration: 49418
loss: 0.9987833499908447,grad_norm: 0.9999989933441342, iteration: 49419
loss: 0.9926964044570923,grad_norm: 0.9999991998746286, iteration: 49420
loss: 1.0436370372772217,grad_norm: 0.9999992711888958, iteration: 49421
loss: 0.9923038482666016,grad_norm: 0.9567405666503511, iteration: 49422
loss: 1.0139797925949097,grad_norm: 0.9840169006736721, iteration: 49423
loss: 0.9431588649749756,grad_norm: 0.9999993789659849, iteration: 49424
loss: 0.9831645488739014,grad_norm: 0.9481484955376746, iteration: 49425
loss: 0.9929793477058411,grad_norm: 0.999999242466615, iteration: 49426
loss: 0.9661580324172974,grad_norm: 0.9999991772618626, iteration: 49427
loss: 0.9894943237304688,grad_norm: 0.8957298700698162, iteration: 49428
loss: 1.1281027793884277,grad_norm: 0.9999993168825378, iteration: 49429
loss: 1.0127040147781372,grad_norm: 0.8983839211540033, iteration: 49430
loss: 0.9976861476898193,grad_norm: 0.8628871937712201, iteration: 49431
loss: 1.0283102989196777,grad_norm: 0.8146998596667547, iteration: 49432
loss: 0.9861736297607422,grad_norm: 0.9999992795741054, iteration: 49433
loss: 0.9697373509407043,grad_norm: 0.9486097743422475, iteration: 49434
loss: 1.0022478103637695,grad_norm: 0.9600822778070127, iteration: 49435
loss: 1.0083084106445312,grad_norm: 0.9999990418393498, iteration: 49436
loss: 0.9998310208320618,grad_norm: 0.9999989226559172, iteration: 49437
loss: 0.9717182517051697,grad_norm: 0.8488983993993086, iteration: 49438
loss: 1.0696853399276733,grad_norm: 0.9999993747339784, iteration: 49439
loss: 0.9642164707183838,grad_norm: 0.9999990454538696, iteration: 49440
loss: 1.00307297706604,grad_norm: 0.9467588209809937, iteration: 49441
loss: 1.009692907333374,grad_norm: 0.9999990851467856, iteration: 49442
loss: 0.9734479188919067,grad_norm: 0.9895364792279273, iteration: 49443
loss: 1.0198276042938232,grad_norm: 0.9999989535066037, iteration: 49444
loss: 0.9898491501808167,grad_norm: 0.9999991137101674, iteration: 49445
loss: 1.0019370317459106,grad_norm: 0.9999992983332886, iteration: 49446
loss: 0.9831709861755371,grad_norm: 0.9999990674953736, iteration: 49447
loss: 0.9942821860313416,grad_norm: 0.9126605665700067, iteration: 49448
loss: 1.0103237628936768,grad_norm: 0.8488363638510849, iteration: 49449
loss: 1.0213618278503418,grad_norm: 0.9347617662727278, iteration: 49450
loss: 0.9908058643341064,grad_norm: 0.9999991702098366, iteration: 49451
loss: 1.0337796211242676,grad_norm: 0.9999992684011576, iteration: 49452
loss: 1.022696852684021,grad_norm: 0.9087245882060407, iteration: 49453
loss: 1.0194647312164307,grad_norm: 0.9999991244123153, iteration: 49454
loss: 0.9809852242469788,grad_norm: 0.9999990451443218, iteration: 49455
loss: 1.0456568002700806,grad_norm: 0.8791750878385672, iteration: 49456
loss: 0.9974346160888672,grad_norm: 0.9999992063964103, iteration: 49457
loss: 1.0304542779922485,grad_norm: 0.9168675260062263, iteration: 49458
loss: 0.9724502563476562,grad_norm: 0.9999992251092805, iteration: 49459
loss: 1.0232056379318237,grad_norm: 0.9999992036907985, iteration: 49460
loss: 0.9798031449317932,grad_norm: 0.9999992633038088, iteration: 49461
loss: 1.0012108087539673,grad_norm: 0.9719143735477227, iteration: 49462
loss: 0.9981054067611694,grad_norm: 0.9999992483108221, iteration: 49463
loss: 1.0243526697158813,grad_norm: 0.9990700308997013, iteration: 49464
loss: 1.0165774822235107,grad_norm: 0.9999991132220659, iteration: 49465
loss: 0.9988957643508911,grad_norm: 0.9167476462225504, iteration: 49466
loss: 1.0134013891220093,grad_norm: 0.9232085189987923, iteration: 49467
loss: 0.9655227065086365,grad_norm: 0.8828320880617114, iteration: 49468
loss: 1.0028314590454102,grad_norm: 0.9999991301777649, iteration: 49469
loss: 1.0269606113433838,grad_norm: 0.9999992367138805, iteration: 49470
loss: 1.0345474481582642,grad_norm: 0.9999990510507938, iteration: 49471
loss: 1.0465333461761475,grad_norm: 0.9999992498858661, iteration: 49472
loss: 0.9791723489761353,grad_norm: 0.8460494812681512, iteration: 49473
loss: 0.9929549098014832,grad_norm: 0.9999992807955391, iteration: 49474
loss: 1.0320571660995483,grad_norm: 0.9999989759168908, iteration: 49475
loss: 0.9943097233772278,grad_norm: 0.9999990677033217, iteration: 49476
loss: 1.0066936016082764,grad_norm: 0.9999994301863059, iteration: 49477
loss: 0.970212459564209,grad_norm: 0.7947169886392893, iteration: 49478
loss: 0.9574109315872192,grad_norm: 0.9999991745705349, iteration: 49479
loss: 1.0271884202957153,grad_norm: 0.9999991826369433, iteration: 49480
loss: 1.064315676689148,grad_norm: 0.999999173199316, iteration: 49481
loss: 1.0313955545425415,grad_norm: 0.9066283615294513, iteration: 49482
loss: 1.002454161643982,grad_norm: 0.8878860253558272, iteration: 49483
loss: 1.028502345085144,grad_norm: 0.9999990452531089, iteration: 49484
loss: 0.9858468770980835,grad_norm: 0.974523100219974, iteration: 49485
loss: 0.9959054589271545,grad_norm: 0.9323029105965, iteration: 49486
loss: 1.027769684791565,grad_norm: 0.9982034301532148, iteration: 49487
loss: 1.0069774389266968,grad_norm: 0.9443885318234083, iteration: 49488
loss: 1.0007615089416504,grad_norm: 0.9999991530965889, iteration: 49489
loss: 1.0099570751190186,grad_norm: 0.9999991536481927, iteration: 49490
loss: 1.0201252698898315,grad_norm: 0.9999992093142286, iteration: 49491
loss: 1.0132842063903809,grad_norm: 0.9999994151499878, iteration: 49492
loss: 1.0096065998077393,grad_norm: 0.9060758007231147, iteration: 49493
loss: 1.0881990194320679,grad_norm: 0.9999990308068157, iteration: 49494
loss: 0.9876333475112915,grad_norm: 0.999999079994236, iteration: 49495
loss: 1.0181360244750977,grad_norm: 0.9999990777195136, iteration: 49496
loss: 0.9827569127082825,grad_norm: 0.9999991787726478, iteration: 49497
loss: 1.020420789718628,grad_norm: 0.9868616204804265, iteration: 49498
loss: 1.0152397155761719,grad_norm: 0.9999994508712375, iteration: 49499
loss: 1.0121698379516602,grad_norm: 0.9987845760695924, iteration: 49500
loss: 1.0020489692687988,grad_norm: 0.999999150587213, iteration: 49501
loss: 0.9858328104019165,grad_norm: 0.9999991397358348, iteration: 49502
loss: 1.0283153057098389,grad_norm: 0.9999992440006177, iteration: 49503
loss: 1.0077427625656128,grad_norm: 0.9999989450720568, iteration: 49504
loss: 0.9986723065376282,grad_norm: 0.9999990428635729, iteration: 49505
loss: 1.033836841583252,grad_norm: 0.9999990844389979, iteration: 49506
loss: 0.9919822216033936,grad_norm: 0.9984738136504755, iteration: 49507
loss: 1.0230783224105835,grad_norm: 0.9619980166030452, iteration: 49508
loss: 1.0073734521865845,grad_norm: 0.9999991086001485, iteration: 49509
loss: 0.9883109331130981,grad_norm: 0.9999991637883038, iteration: 49510
loss: 1.0390921831130981,grad_norm: 0.9400278157124029, iteration: 49511
loss: 0.9937199354171753,grad_norm: 0.9999992107998409, iteration: 49512
loss: 0.9784079194068909,grad_norm: 0.9896042605950158, iteration: 49513
loss: 1.0301223993301392,grad_norm: 0.9711029520085876, iteration: 49514
loss: 1.0062384605407715,grad_norm: 0.9744348783879684, iteration: 49515
loss: 1.0465878248214722,grad_norm: 0.9999990253602894, iteration: 49516
loss: 1.0430582761764526,grad_norm: 0.8857020647003296, iteration: 49517
loss: 1.021921157836914,grad_norm: 0.9999992081280611, iteration: 49518
loss: 1.009584665298462,grad_norm: 0.9806059788794098, iteration: 49519
loss: 1.0107342004776,grad_norm: 0.9641900473564496, iteration: 49520
loss: 1.0137335062026978,grad_norm: 0.9277729790716807, iteration: 49521
loss: 1.02046537399292,grad_norm: 0.9803162058172631, iteration: 49522
loss: 1.0066472291946411,grad_norm: 0.9999991529460132, iteration: 49523
loss: 1.0057096481323242,grad_norm: 0.9326116890319488, iteration: 49524
loss: 1.0048179626464844,grad_norm: 0.9999992268000125, iteration: 49525
loss: 0.9844693541526794,grad_norm: 0.9999992300434686, iteration: 49526
loss: 0.9918631911277771,grad_norm: 0.9999990934125293, iteration: 49527
loss: 1.0029515027999878,grad_norm: 0.9999992192887965, iteration: 49528
loss: 0.9948587417602539,grad_norm: 0.9999990587998595, iteration: 49529
loss: 1.0503649711608887,grad_norm: 0.9999992537948837, iteration: 49530
loss: 0.9969527721405029,grad_norm: 0.9999991195620622, iteration: 49531
loss: 1.0337721109390259,grad_norm: 0.8950855235079428, iteration: 49532
loss: 1.0140068531036377,grad_norm: 0.9851907833067222, iteration: 49533
loss: 1.005544900894165,grad_norm: 0.917892681803338, iteration: 49534
loss: 1.0148894786834717,grad_norm: 0.899842462009784, iteration: 49535
loss: 1.0442053079605103,grad_norm: 0.8853493284252342, iteration: 49536
loss: 0.9683141112327576,grad_norm: 0.9277253329105507, iteration: 49537
loss: 1.042770504951477,grad_norm: 0.9999996648614214, iteration: 49538
loss: 1.08360755443573,grad_norm: 0.999999301111927, iteration: 49539
loss: 1.0135127305984497,grad_norm: 0.8543911825833194, iteration: 49540
loss: 1.0251293182373047,grad_norm: 0.9999992943913265, iteration: 49541
loss: 1.027119517326355,grad_norm: 0.9999991382977982, iteration: 49542
loss: 0.9938091039657593,grad_norm: 0.9075678274417867, iteration: 49543
loss: 1.0333915948867798,grad_norm: 0.875397639261295, iteration: 49544
loss: 0.9955065250396729,grad_norm: 0.9999991222700819, iteration: 49545
loss: 0.9897820949554443,grad_norm: 0.8867448294434143, iteration: 49546
loss: 1.0368653535842896,grad_norm: 0.7997281936245993, iteration: 49547
loss: 1.0108047723770142,grad_norm: 0.8828830927622173, iteration: 49548
loss: 0.9886289834976196,grad_norm: 0.9385562521077221, iteration: 49549
loss: 0.9765774011611938,grad_norm: 0.9331109275236753, iteration: 49550
loss: 1.0034205913543701,grad_norm: 0.9524267877476799, iteration: 49551
loss: 0.9818668365478516,grad_norm: 0.8979430762327312, iteration: 49552
loss: 0.9973286986351013,grad_norm: 0.9999992718182774, iteration: 49553
loss: 0.9673116207122803,grad_norm: 0.9999991119006183, iteration: 49554
loss: 0.9526881575584412,grad_norm: 0.8677388605163231, iteration: 49555
loss: 1.0131022930145264,grad_norm: 0.9999990766376098, iteration: 49556
loss: 0.9932202696800232,grad_norm: 0.9999990697901979, iteration: 49557
loss: 1.0310344696044922,grad_norm: 0.9999990924443669, iteration: 49558
loss: 1.0183182954788208,grad_norm: 0.9999990649276669, iteration: 49559
loss: 0.9737061858177185,grad_norm: 0.9831959974685934, iteration: 49560
loss: 0.9978587031364441,grad_norm: 0.7390789034748387, iteration: 49561
loss: 1.0203027725219727,grad_norm: 0.9999991119167978, iteration: 49562
loss: 0.9727063179016113,grad_norm: 0.9999991815875515, iteration: 49563
loss: 0.9863718748092651,grad_norm: 0.9122960497176418, iteration: 49564
loss: 0.9956785440444946,grad_norm: 0.9999991226548212, iteration: 49565
loss: 1.0130738019943237,grad_norm: 0.999999237412885, iteration: 49566
loss: 0.999172568321228,grad_norm: 0.9999993814652112, iteration: 49567
loss: 1.0142713785171509,grad_norm: 0.9999991559139765, iteration: 49568
loss: 0.9841245412826538,grad_norm: 0.9416472542002355, iteration: 49569
loss: 0.9826281070709229,grad_norm: 0.9015358965064221, iteration: 49570
loss: 1.0140968561172485,grad_norm: 0.8443678848132352, iteration: 49571
loss: 1.0111793279647827,grad_norm: 0.8933822924152577, iteration: 49572
loss: 1.0163838863372803,grad_norm: 0.8557332990939684, iteration: 49573
loss: 1.0398006439208984,grad_norm: 0.9999987761964634, iteration: 49574
loss: 1.0354588031768799,grad_norm: 0.8919327603468215, iteration: 49575
loss: 0.995655357837677,grad_norm: 0.9025376963015815, iteration: 49576
loss: 1.086630940437317,grad_norm: 0.9999996127293433, iteration: 49577
loss: 0.9878722429275513,grad_norm: 0.9305665150871099, iteration: 49578
loss: 1.0139389038085938,grad_norm: 0.9999990346568886, iteration: 49579
loss: 1.009495735168457,grad_norm: 0.9999991195782998, iteration: 49580
loss: 1.0295217037200928,grad_norm: 0.9999990744781904, iteration: 49581
loss: 1.0359795093536377,grad_norm: 0.999999359688976, iteration: 49582
loss: 1.022350788116455,grad_norm: 0.9999992581514896, iteration: 49583
loss: 1.0296841859817505,grad_norm: 0.8997360051649101, iteration: 49584
loss: 1.034611463546753,grad_norm: 0.9846087689080952, iteration: 49585
loss: 1.0504924058914185,grad_norm: 0.9999994280509829, iteration: 49586
loss: 0.9834676384925842,grad_norm: 0.9999991286019191, iteration: 49587
loss: 0.9937333464622498,grad_norm: 0.9999990292496336, iteration: 49588
loss: 0.9865117073059082,grad_norm: 0.9999992840324904, iteration: 49589
loss: 1.0081453323364258,grad_norm: 0.9606056418242124, iteration: 49590
loss: 0.986218273639679,grad_norm: 0.9999992384693732, iteration: 49591
loss: 1.0611730813980103,grad_norm: 0.9909878682297087, iteration: 49592
loss: 0.9987911581993103,grad_norm: 0.9091274362207276, iteration: 49593
loss: 0.9794521331787109,grad_norm: 0.9324005174219469, iteration: 49594
loss: 0.9747443199157715,grad_norm: 0.999999097871295, iteration: 49595
loss: 0.9775766134262085,grad_norm: 0.9999996794884589, iteration: 49596
loss: 0.9977326989173889,grad_norm: 0.8737209165413193, iteration: 49597
loss: 1.0072287321090698,grad_norm: 0.9321466295740636, iteration: 49598
loss: 0.997097909450531,grad_norm: 0.9150001064130333, iteration: 49599
loss: 1.0066337585449219,grad_norm: 0.9999993155941781, iteration: 49600
loss: 1.0051826238632202,grad_norm: 0.9999990650202425, iteration: 49601
loss: 1.0116842985153198,grad_norm: 0.9999991299860217, iteration: 49602
loss: 1.0096194744110107,grad_norm: 0.9999992525633541, iteration: 49603
loss: 1.0122694969177246,grad_norm: 0.9999994533430959, iteration: 49604
loss: 0.9800701141357422,grad_norm: 0.9999990508988402, iteration: 49605
loss: 1.019640326499939,grad_norm: 0.980240676698428, iteration: 49606
loss: 1.0148433446884155,grad_norm: 0.9321023478318825, iteration: 49607
loss: 1.014534592628479,grad_norm: 0.9999993722821368, iteration: 49608
loss: 0.9775431752204895,grad_norm: 0.9999991941115547, iteration: 49609
loss: 1.014979362487793,grad_norm: 0.767605203305227, iteration: 49610
loss: 1.031843662261963,grad_norm: 0.9999991228225154, iteration: 49611
loss: 0.9935947060585022,grad_norm: 0.9999991009830846, iteration: 49612
loss: 0.9949793219566345,grad_norm: 0.9999991216330364, iteration: 49613
loss: 1.032443642616272,grad_norm: 0.9701738531705452, iteration: 49614
loss: 0.9874506592750549,grad_norm: 0.8789831357141438, iteration: 49615
loss: 1.0540437698364258,grad_norm: 0.9999996861995328, iteration: 49616
loss: 1.033840537071228,grad_norm: 0.999999145063573, iteration: 49617
loss: 1.008022665977478,grad_norm: 0.9471597849526889, iteration: 49618
loss: 0.9804542064666748,grad_norm: 0.9162400156629393, iteration: 49619
loss: 1.011972188949585,grad_norm: 0.8826892164686746, iteration: 49620
loss: 1.0102711915969849,grad_norm: 0.8988214785618969, iteration: 49621
loss: 1.0085222721099854,grad_norm: 0.9999993274061674, iteration: 49622
loss: 0.975399911403656,grad_norm: 0.9999990779406676, iteration: 49623
loss: 1.024619460105896,grad_norm: 0.9999991666750685, iteration: 49624
loss: 0.945197582244873,grad_norm: 0.9999990227957344, iteration: 49625
loss: 1.0272871255874634,grad_norm: 0.9999991426744821, iteration: 49626
loss: 0.9991976618766785,grad_norm: 0.9141126986577859, iteration: 49627
loss: 0.9861363172531128,grad_norm: 0.9999990726638563, iteration: 49628
loss: 0.9748282432556152,grad_norm: 0.999999617378288, iteration: 49629
loss: 1.020155429840088,grad_norm: 0.999999032033396, iteration: 49630
loss: 1.0033154487609863,grad_norm: 0.9999990828390629, iteration: 49631
loss: 0.9967784881591797,grad_norm: 0.9999992077923051, iteration: 49632
loss: 1.018133521080017,grad_norm: 0.9999991701582528, iteration: 49633
loss: 0.9829158186912537,grad_norm: 0.975948712682051, iteration: 49634
loss: 0.9945279955863953,grad_norm: 0.9999990579101622, iteration: 49635
loss: 1.0125876665115356,grad_norm: 0.9396627831640744, iteration: 49636
loss: 1.015968918800354,grad_norm: 0.9936170842905359, iteration: 49637
loss: 0.9682338237762451,grad_norm: 0.9999990714141272, iteration: 49638
loss: 1.0067732334136963,grad_norm: 0.9999991674072191, iteration: 49639
loss: 0.9936552047729492,grad_norm: 0.923970101667195, iteration: 49640
loss: 0.978358805179596,grad_norm: 0.9999991691256108, iteration: 49641
loss: 0.983224093914032,grad_norm: 0.9999992892805446, iteration: 49642
loss: 1.0604534149169922,grad_norm: 0.9234697567331446, iteration: 49643
loss: 1.0069143772125244,grad_norm: 0.9480784553085617, iteration: 49644
loss: 0.9994838237762451,grad_norm: 0.9999991127160456, iteration: 49645
loss: 0.9789690971374512,grad_norm: 0.9999992289909296, iteration: 49646
loss: 1.0022987127304077,grad_norm: 0.9652661551157647, iteration: 49647
loss: 1.0294896364212036,grad_norm: 0.9999990820031419, iteration: 49648
loss: 1.0195105075836182,grad_norm: 0.9266759246453352, iteration: 49649
loss: 1.0216690301895142,grad_norm: 0.999999080154848, iteration: 49650
loss: 0.9881195425987244,grad_norm: 0.9999990157684706, iteration: 49651
loss: 1.0218597650527954,grad_norm: 0.9747870701786021, iteration: 49652
loss: 1.0026425123214722,grad_norm: 0.9955903269204887, iteration: 49653
loss: 0.9861221313476562,grad_norm: 0.8872846575446365, iteration: 49654
loss: 1.008223295211792,grad_norm: 0.9999990265531141, iteration: 49655
loss: 1.017143726348877,grad_norm: 0.9999990499516735, iteration: 49656
loss: 0.9936658143997192,grad_norm: 0.9999990694820109, iteration: 49657
loss: 1.0034955739974976,grad_norm: 0.8962027370777795, iteration: 49658
loss: 0.9898828864097595,grad_norm: 0.9999991326466197, iteration: 49659
loss: 0.9950125813484192,grad_norm: 0.897740414975815, iteration: 49660
loss: 1.0023798942565918,grad_norm: 0.8009769333836414, iteration: 49661
loss: 0.9636937975883484,grad_norm: 0.9558946355218882, iteration: 49662
loss: 1.004321575164795,grad_norm: 0.9999991564282145, iteration: 49663
loss: 0.9893205761909485,grad_norm: 0.9999991505195089, iteration: 49664
loss: 0.9982525110244751,grad_norm: 0.9999991278506041, iteration: 49665
loss: 1.0050711631774902,grad_norm: 0.9620318032524113, iteration: 49666
loss: 0.9832221865653992,grad_norm: 0.9999997571100003, iteration: 49667
loss: 1.038949728012085,grad_norm: 0.9999990322620638, iteration: 49668
loss: 0.9868677854537964,grad_norm: 0.999999143958177, iteration: 49669
loss: 0.9979670643806458,grad_norm: 0.9999990460527276, iteration: 49670
loss: 1.0025849342346191,grad_norm: 0.8768426472596283, iteration: 49671
loss: 0.9896084666252136,grad_norm: 0.9999992673414215, iteration: 49672
loss: 1.012005090713501,grad_norm: 0.8822846338864402, iteration: 49673
loss: 0.9839569926261902,grad_norm: 0.9999991210916453, iteration: 49674
loss: 0.9728197455406189,grad_norm: 0.9300154851468193, iteration: 49675
loss: 1.0130454301834106,grad_norm: 0.9999992360060229, iteration: 49676
loss: 1.038780927658081,grad_norm: 0.8619282909922188, iteration: 49677
loss: 1.0021852254867554,grad_norm: 0.9753629994908865, iteration: 49678
loss: 1.036800503730774,grad_norm: 0.9837261182050758, iteration: 49679
loss: 1.0377678871154785,grad_norm: 0.9999991703737147, iteration: 49680
loss: 1.0272873640060425,grad_norm: 0.999999285135469, iteration: 49681
loss: 1.0110749006271362,grad_norm: 0.9798569735257685, iteration: 49682
loss: 1.0207695960998535,grad_norm: 0.9727628409788877, iteration: 49683
loss: 1.0072418451309204,grad_norm: 0.9497629813153922, iteration: 49684
loss: 1.015132188796997,grad_norm: 0.8916153538066094, iteration: 49685
loss: 1.025112509727478,grad_norm: 0.9999990813907595, iteration: 49686
loss: 1.0227251052856445,grad_norm: 0.9999991490465664, iteration: 49687
loss: 1.0198055505752563,grad_norm: 0.9999993528957519, iteration: 49688
loss: 0.9786845445632935,grad_norm: 0.9999991121624574, iteration: 49689
loss: 1.0330541133880615,grad_norm: 0.8411236346649068, iteration: 49690
loss: 0.9978894591331482,grad_norm: 0.9999991769480477, iteration: 49691
loss: 1.006482720375061,grad_norm: 0.9999990447950645, iteration: 49692
loss: 0.986309289932251,grad_norm: 0.856891356882504, iteration: 49693
loss: 0.9942958354949951,grad_norm: 0.9999992085224579, iteration: 49694
loss: 1.0136092901229858,grad_norm: 0.9999991716951917, iteration: 49695
loss: 1.0363550186157227,grad_norm: 0.8941884254228445, iteration: 49696
loss: 1.0270508527755737,grad_norm: 0.999999253447214, iteration: 49697
loss: 0.9777863621711731,grad_norm: 0.8993712766723213, iteration: 49698
loss: 1.0060008764266968,grad_norm: 0.9259476792557529, iteration: 49699
loss: 1.0187187194824219,grad_norm: 0.9999991510606188, iteration: 49700
loss: 1.0162527561187744,grad_norm: 0.9758113119763026, iteration: 49701
loss: 1.0302493572235107,grad_norm: 0.9999991786060518, iteration: 49702
loss: 0.9762212038040161,grad_norm: 0.9999991752559908, iteration: 49703
loss: 0.989103376865387,grad_norm: 0.9999991485876022, iteration: 49704
loss: 0.9753025770187378,grad_norm: 0.9999991243028605, iteration: 49705
loss: 0.9733647108078003,grad_norm: 0.9061506502422327, iteration: 49706
loss: 1.0053507089614868,grad_norm: 0.9146015366784698, iteration: 49707
loss: 1.0331379175186157,grad_norm: 0.9227842664559948, iteration: 49708
loss: 1.018230676651001,grad_norm: 0.8659344957582478, iteration: 49709
loss: 0.9704466462135315,grad_norm: 0.8788729379265736, iteration: 49710
loss: 0.986354410648346,grad_norm: 0.9411166879428823, iteration: 49711
loss: 1.0245295763015747,grad_norm: 0.9999989054570408, iteration: 49712
loss: 1.0028787851333618,grad_norm: 0.8235664471052577, iteration: 49713
loss: 0.9938707947731018,grad_norm: 0.9999990494789205, iteration: 49714
loss: 1.0087584257125854,grad_norm: 0.999999114971136, iteration: 49715
loss: 0.9552238583564758,grad_norm: 0.973025158259062, iteration: 49716
loss: 1.0043591260910034,grad_norm: 0.88981832313405, iteration: 49717
loss: 1.0139009952545166,grad_norm: 0.9999989733663356, iteration: 49718
loss: 0.9343671798706055,grad_norm: 0.9999990827598745, iteration: 49719
loss: 0.9961404800415039,grad_norm: 0.9999991525797023, iteration: 49720
loss: 0.9861860275268555,grad_norm: 0.9999991668600813, iteration: 49721
loss: 0.9943614602088928,grad_norm: 0.9999991127773332, iteration: 49722
loss: 1.0262941122055054,grad_norm: 0.9660496742933601, iteration: 49723
loss: 1.0025001764297485,grad_norm: 0.9999991429874884, iteration: 49724
loss: 1.0154170989990234,grad_norm: 0.9999990169626346, iteration: 49725
loss: 0.9381526112556458,grad_norm: 0.9479976409938822, iteration: 49726
loss: 0.9989042282104492,grad_norm: 0.9999991526485137, iteration: 49727
loss: 0.9629711508750916,grad_norm: 0.928027379021501, iteration: 49728
loss: 1.0007364749908447,grad_norm: 0.9999990686663259, iteration: 49729
loss: 1.044887661933899,grad_norm: 0.9227095036007594, iteration: 49730
loss: 1.0334454774856567,grad_norm: 0.9427101792271884, iteration: 49731
loss: 1.016068696975708,grad_norm: 0.8086152620591588, iteration: 49732
loss: 1.0345407724380493,grad_norm: 0.8982868702849441, iteration: 49733
loss: 1.000378131866455,grad_norm: 0.9587432493415637, iteration: 49734
loss: 1.0215390920639038,grad_norm: 0.8982189584537569, iteration: 49735
loss: 0.9982196688652039,grad_norm: 0.9999991082215445, iteration: 49736
loss: 1.0205049514770508,grad_norm: 0.9999992118630716, iteration: 49737
loss: 1.0245158672332764,grad_norm: 0.999999220719411, iteration: 49738
loss: 0.996906578540802,grad_norm: 0.9999990439903989, iteration: 49739
loss: 1.0005066394805908,grad_norm: 0.9999990693845173, iteration: 49740
loss: 0.9884716272354126,grad_norm: 0.9589180947785102, iteration: 49741
loss: 0.9915505051612854,grad_norm: 0.9999992061087742, iteration: 49742
loss: 0.9948127865791321,grad_norm: 0.9483830298718591, iteration: 49743
loss: 1.0126961469650269,grad_norm: 0.7862711152405146, iteration: 49744
loss: 0.9964275360107422,grad_norm: 0.7972255383367786, iteration: 49745
loss: 0.9887716770172119,grad_norm: 0.9999991424974046, iteration: 49746
loss: 1.0095674991607666,grad_norm: 0.9999991581571726, iteration: 49747
loss: 0.9968646764755249,grad_norm: 0.9999991057734563, iteration: 49748
loss: 1.042342185974121,grad_norm: 0.9999990018060315, iteration: 49749
loss: 1.0162564516067505,grad_norm: 0.8115059697815573, iteration: 49750
loss: 1.0086157321929932,grad_norm: 0.9999990898576466, iteration: 49751
loss: 1.0318485498428345,grad_norm: 0.9999993690028247, iteration: 49752
loss: 1.0069608688354492,grad_norm: 0.999999011021167, iteration: 49753
loss: 1.0221515893936157,grad_norm: 0.8878209687087222, iteration: 49754
loss: 1.0026804208755493,grad_norm: 0.941519392722324, iteration: 49755
loss: 0.9897257089614868,grad_norm: 0.9999991648437605, iteration: 49756
loss: 0.9906612634658813,grad_norm: 0.9999991752169632, iteration: 49757
loss: 1.0020114183425903,grad_norm: 0.9999990852075394, iteration: 49758
loss: 1.0050653219223022,grad_norm: 0.9999991997491572, iteration: 49759
loss: 0.9820375442504883,grad_norm: 0.8771808341196853, iteration: 49760
loss: 1.0537611246109009,grad_norm: 0.9999992007278361, iteration: 49761
loss: 0.9773783087730408,grad_norm: 0.9800538246861309, iteration: 49762
loss: 1.0169299840927124,grad_norm: 0.9999990408146942, iteration: 49763
loss: 1.010374665260315,grad_norm: 0.9999992020000434, iteration: 49764
loss: 0.9817801117897034,grad_norm: 0.9730419430196291, iteration: 49765
loss: 0.9885836243629456,grad_norm: 0.9923189391618815, iteration: 49766
loss: 0.9812253713607788,grad_norm: 0.9999990559976668, iteration: 49767
loss: 1.0038496255874634,grad_norm: 0.9999991289859566, iteration: 49768
loss: 0.9932246804237366,grad_norm: 0.9999990667690644, iteration: 49769
loss: 1.0032963752746582,grad_norm: 0.9999991778541296, iteration: 49770
loss: 1.0101470947265625,grad_norm: 0.9999989511719463, iteration: 49771
loss: 0.9767564535140991,grad_norm: 0.9850223730897885, iteration: 49772
loss: 1.0175037384033203,grad_norm: 0.9999991186298754, iteration: 49773
loss: 1.0126943588256836,grad_norm: 0.9999990367661479, iteration: 49774
loss: 0.9929764866828918,grad_norm: 0.9999990432572733, iteration: 49775
loss: 0.9934669733047485,grad_norm: 0.9999991638255651, iteration: 49776
loss: 1.0017566680908203,grad_norm: 0.8512671457663366, iteration: 49777
loss: 1.0239003896713257,grad_norm: 0.9999990741722684, iteration: 49778
loss: 0.967017650604248,grad_norm: 0.9769165870808677, iteration: 49779
loss: 1.0006884336471558,grad_norm: 0.8629627064929445, iteration: 49780
loss: 1.0251878499984741,grad_norm: 0.8735194284658174, iteration: 49781
loss: 0.9881303906440735,grad_norm: 0.9999991040561688, iteration: 49782
loss: 1.0575436353683472,grad_norm: 0.999999133953979, iteration: 49783
loss: 0.9961265325546265,grad_norm: 0.9999991718039225, iteration: 49784
loss: 0.9988351464271545,grad_norm: 0.9999990671817514, iteration: 49785
loss: 1.0007284879684448,grad_norm: 0.9999993518712716, iteration: 49786
loss: 1.0182937383651733,grad_norm: 0.9999991364788839, iteration: 49787
loss: 1.010504126548767,grad_norm: 0.9999992445403909, iteration: 49788
loss: 1.013433814048767,grad_norm: 0.983323252759943, iteration: 49789
loss: 0.9777989983558655,grad_norm: 0.9983926894220491, iteration: 49790
loss: 0.9949261546134949,grad_norm: 0.8693786204436862, iteration: 49791
loss: 0.9563493728637695,grad_norm: 0.9999989261840448, iteration: 49792
loss: 0.9709198474884033,grad_norm: 0.9490565941924493, iteration: 49793
loss: 1.0492807626724243,grad_norm: 0.9293282356111306, iteration: 49794
loss: 1.002436876296997,grad_norm: 0.9999991468457206, iteration: 49795
loss: 0.9887879490852356,grad_norm: 0.9999990656258669, iteration: 49796
loss: 0.9819246530532837,grad_norm: 0.9469699638607812, iteration: 49797
loss: 0.9751375913619995,grad_norm: 0.9999992589564412, iteration: 49798
loss: 1.0370486974716187,grad_norm: 0.9298220924744846, iteration: 49799
loss: 1.049272060394287,grad_norm: 0.9999996133730384, iteration: 49800
loss: 1.05123770236969,grad_norm: 0.9999991751851529, iteration: 49801
loss: 1.0114527940750122,grad_norm: 0.9999992456853767, iteration: 49802
loss: 0.9847123622894287,grad_norm: 0.8059470272484449, iteration: 49803
loss: 1.0189443826675415,grad_norm: 0.9999990303021568, iteration: 49804
loss: 0.9825225472450256,grad_norm: 0.8554097499663932, iteration: 49805
loss: 1.0148125886917114,grad_norm: 0.9999992148702638, iteration: 49806
loss: 0.9963139295578003,grad_norm: 0.9999990687153676, iteration: 49807
loss: 0.9854868650436401,grad_norm: 0.9999991618782108, iteration: 49808
loss: 0.9856036305427551,grad_norm: 0.9999990843779497, iteration: 49809
loss: 0.9856574535369873,grad_norm: 0.9673072969205254, iteration: 49810
loss: 0.9832580089569092,grad_norm: 0.9785342035039768, iteration: 49811
loss: 0.9811514616012573,grad_norm: 0.9999991675901275, iteration: 49812
loss: 1.0154272317886353,grad_norm: 0.9579300608955239, iteration: 49813
loss: 0.9881765246391296,grad_norm: 0.9456890834661121, iteration: 49814
loss: 1.0098752975463867,grad_norm: 0.9481871374787907, iteration: 49815
loss: 1.0197011232376099,grad_norm: 0.9999998800609692, iteration: 49816
loss: 0.9838162660598755,grad_norm: 0.9999989887941018, iteration: 49817
loss: 1.0201408863067627,grad_norm: 0.9359429640209512, iteration: 49818
loss: 1.0401452779769897,grad_norm: 0.9670396857706534, iteration: 49819
loss: 0.9749987721443176,grad_norm: 0.9999992171962067, iteration: 49820
loss: 0.9869266152381897,grad_norm: 0.9359663620114083, iteration: 49821
loss: 0.9955309629440308,grad_norm: 0.9999992965248268, iteration: 49822
loss: 0.9757591485977173,grad_norm: 0.9999991140293799, iteration: 49823
loss: 1.034691333770752,grad_norm: 0.9999990960359025, iteration: 49824
loss: 0.9474067091941833,grad_norm: 0.8802250346981163, iteration: 49825
loss: 0.997035562992096,grad_norm: 0.9533151005248667, iteration: 49826
loss: 0.9736523628234863,grad_norm: 0.9999995062662906, iteration: 49827
loss: 1.0411016941070557,grad_norm: 0.9999991671341356, iteration: 49828
loss: 0.9874759316444397,grad_norm: 0.9999995648629693, iteration: 49829
loss: 1.0263051986694336,grad_norm: 0.9999990908832879, iteration: 49830
loss: 0.9427903890609741,grad_norm: 0.9408189683808288, iteration: 49831
loss: 0.9548221230506897,grad_norm: 0.9454540253021776, iteration: 49832
loss: 1.0056354999542236,grad_norm: 0.7962082987716479, iteration: 49833
loss: 0.9647011160850525,grad_norm: 0.9492216029262546, iteration: 49834
loss: 0.9605615735054016,grad_norm: 0.9999991796421736, iteration: 49835
loss: 1.003015398979187,grad_norm: 0.9347307454501963, iteration: 49836
loss: 1.0269945859909058,grad_norm: 0.821270162125695, iteration: 49837
loss: 1.0206084251403809,grad_norm: 0.9755940986042524, iteration: 49838
loss: 1.0032099485397339,grad_norm: 0.8310619751400763, iteration: 49839
loss: 0.9666550755500793,grad_norm: 0.9565767791794951, iteration: 49840
loss: 0.9980533123016357,grad_norm: 0.9869860931114024, iteration: 49841
loss: 0.9920604228973389,grad_norm: 0.999999196421565, iteration: 49842
loss: 1.0030215978622437,grad_norm: 0.9999990826242283, iteration: 49843
loss: 0.9335651397705078,grad_norm: 0.9999991663462149, iteration: 49844
loss: 1.0103071928024292,grad_norm: 0.7938698543058972, iteration: 49845
loss: 1.0031681060791016,grad_norm: 0.9999991131801805, iteration: 49846
loss: 0.9577130079269409,grad_norm: 0.9999990468188017, iteration: 49847
loss: 1.0282719135284424,grad_norm: 0.9999991029260049, iteration: 49848
loss: 0.9876338243484497,grad_norm: 0.9999991545543591, iteration: 49849
loss: 1.00581693649292,grad_norm: 0.9885142917647203, iteration: 49850
loss: 1.0065189599990845,grad_norm: 0.9999991744542499, iteration: 49851
loss: 0.999117910861969,grad_norm: 0.9634784296963287, iteration: 49852
loss: 0.9825292229652405,grad_norm: 0.8962292415886806, iteration: 49853
loss: 1.0212342739105225,grad_norm: 0.9999992568815446, iteration: 49854
loss: 1.0253137350082397,grad_norm: 0.8607469718651806, iteration: 49855
loss: 1.0062199831008911,grad_norm: 0.9999990320381964, iteration: 49856
loss: 1.040278434753418,grad_norm: 0.999999082450515, iteration: 49857
loss: 1.0151491165161133,grad_norm: 0.9999991024530214, iteration: 49858
loss: 1.0062601566314697,grad_norm: 0.9999990377517135, iteration: 49859
loss: 0.98921799659729,grad_norm: 0.9999990451213584, iteration: 49860
loss: 1.0091651678085327,grad_norm: 0.9999990367482322, iteration: 49861
loss: 0.9466111063957214,grad_norm: 0.9999990638079762, iteration: 49862
loss: 1.0308177471160889,grad_norm: 0.8106711136801594, iteration: 49863
loss: 1.0166221857070923,grad_norm: 0.9999992336901119, iteration: 49864
loss: 1.031296968460083,grad_norm: 0.8468430719105076, iteration: 49865
loss: 1.0164527893066406,grad_norm: 0.9905640441513867, iteration: 49866
loss: 1.0093365907669067,grad_norm: 0.9999991020008746, iteration: 49867
loss: 0.9830718636512756,grad_norm: 0.9999990555050786, iteration: 49868
loss: 1.0408474206924438,grad_norm: 0.9999990792514679, iteration: 49869
loss: 0.9562171697616577,grad_norm: 0.9999990353591605, iteration: 49870
loss: 0.9658121466636658,grad_norm: 0.9684127298845282, iteration: 49871
loss: 0.9942567348480225,grad_norm: 0.9356802929584118, iteration: 49872
loss: 0.9998232126235962,grad_norm: 0.8730856838249662, iteration: 49873
loss: 0.9870855808258057,grad_norm: 0.9999991949140777, iteration: 49874
loss: 1.0328090190887451,grad_norm: 0.8714181187378512, iteration: 49875
loss: 1.0217176675796509,grad_norm: 0.908798988078382, iteration: 49876
loss: 0.9814886450767517,grad_norm: 0.9622155474518791, iteration: 49877
loss: 1.0100765228271484,grad_norm: 0.9999991370502567, iteration: 49878
loss: 1.0174474716186523,grad_norm: 0.8236905265139634, iteration: 49879
loss: 1.020556926727295,grad_norm: 0.9999992283348665, iteration: 49880
loss: 0.9985742568969727,grad_norm: 0.996160401668928, iteration: 49881
loss: 0.9983561635017395,grad_norm: 0.8733858312543598, iteration: 49882
loss: 0.9824143648147583,grad_norm: 0.9999992815802603, iteration: 49883
loss: 1.0258251428604126,grad_norm: 0.9688077438315996, iteration: 49884
loss: 0.9908391833305359,grad_norm: 0.9616878253771982, iteration: 49885
loss: 0.9748888611793518,grad_norm: 0.9126993261173625, iteration: 49886
loss: 0.9933244585990906,grad_norm: 0.939212319899784, iteration: 49887
loss: 1.0139752626419067,grad_norm: 0.9999991020663025, iteration: 49888
loss: 1.0297185182571411,grad_norm: 0.9999990975072895, iteration: 49889
loss: 0.997007429599762,grad_norm: 0.9999989691874465, iteration: 49890
loss: 1.0352489948272705,grad_norm: 0.9068412554260654, iteration: 49891
loss: 0.9913529753684998,grad_norm: 0.9999991838021989, iteration: 49892
loss: 1.0192182064056396,grad_norm: 0.999999188535702, iteration: 49893
loss: 0.9868677854537964,grad_norm: 0.999999268082382, iteration: 49894
loss: 0.9969996809959412,grad_norm: 0.9166318735444439, iteration: 49895
loss: 0.9484438896179199,grad_norm: 0.9999991918905146, iteration: 49896
loss: 0.9840884208679199,grad_norm: 0.9999992852328965, iteration: 49897
loss: 0.9773285388946533,grad_norm: 0.9999989958195759, iteration: 49898
loss: 0.986504077911377,grad_norm: 0.9907407830980928, iteration: 49899
loss: 0.9851493239402771,grad_norm: 0.8734324772912135, iteration: 49900
loss: 1.0081452131271362,grad_norm: 0.9747342553268756, iteration: 49901
loss: 1.0171058177947998,grad_norm: 0.9993024109356242, iteration: 49902
loss: 1.024970531463623,grad_norm: 0.8438963204238712, iteration: 49903
loss: 0.9464168548583984,grad_norm: 0.8801490635981508, iteration: 49904
loss: 1.0331454277038574,grad_norm: 0.8722968317322655, iteration: 49905
loss: 0.998328685760498,grad_norm: 0.9697193537963067, iteration: 49906
loss: 1.0052118301391602,grad_norm: 0.9999990964104015, iteration: 49907
loss: 1.0389883518218994,grad_norm: 0.9999990205579216, iteration: 49908
loss: 0.9938412308692932,grad_norm: 0.9999990837270175, iteration: 49909
loss: 1.0179990530014038,grad_norm: 0.8958043230041255, iteration: 49910
loss: 0.9916078448295593,grad_norm: 0.9999991898526649, iteration: 49911
loss: 0.9579836130142212,grad_norm: 0.9370154987101694, iteration: 49912
loss: 1.0348747968673706,grad_norm: 0.9999990289651286, iteration: 49913
loss: 1.0005325078964233,grad_norm: 0.8984067006932801, iteration: 49914
loss: 1.0161621570587158,grad_norm: 0.9717256099162184, iteration: 49915
loss: 0.9726819396018982,grad_norm: 0.9999991505670819, iteration: 49916
loss: 0.9806622266769409,grad_norm: 0.8930121639948813, iteration: 49917
loss: 0.972082257270813,grad_norm: 0.9999989886257765, iteration: 49918
loss: 0.9994170069694519,grad_norm: 0.8933602442704833, iteration: 49919
loss: 1.0015639066696167,grad_norm: 0.9649689917943988, iteration: 49920
loss: 0.9957590103149414,grad_norm: 0.9999991848165849, iteration: 49921
loss: 0.994508683681488,grad_norm: 0.9680079101256943, iteration: 49922
loss: 0.9924052953720093,grad_norm: 0.9518523718104555, iteration: 49923
loss: 1.0176368951797485,grad_norm: 0.999999142014087, iteration: 49924
loss: 0.9977525472640991,grad_norm: 0.9999990808964193, iteration: 49925
loss: 0.9919257760047913,grad_norm: 0.9999990835766652, iteration: 49926
loss: 0.9732639193534851,grad_norm: 0.8408930046947874, iteration: 49927
loss: 1.0053189992904663,grad_norm: 0.8856888128105619, iteration: 49928
loss: 0.9928226470947266,grad_norm: 0.9356890188146801, iteration: 49929
loss: 0.9719651341438293,grad_norm: 0.9115093477490626, iteration: 49930
loss: 1.0340389013290405,grad_norm: 0.9951632749089423, iteration: 49931
loss: 1.0626856088638306,grad_norm: 0.9999990667393455, iteration: 49932
loss: 1.0026735067367554,grad_norm: 0.9749495646452727, iteration: 49933
loss: 1.0310691595077515,grad_norm: 0.9364803457532679, iteration: 49934
loss: 1.0140094757080078,grad_norm: 0.9347375896273725, iteration: 49935
loss: 0.9998481273651123,grad_norm: 0.9999991149802848, iteration: 49936
loss: 0.9674669504165649,grad_norm: 0.9471622560874607, iteration: 49937
loss: 0.9760518074035645,grad_norm: 0.9999992703540623, iteration: 49938
loss: 1.0057873725891113,grad_norm: 0.9419938696201288, iteration: 49939
loss: 0.9690483212471008,grad_norm: 0.9999990964735698, iteration: 49940
loss: 1.0056875944137573,grad_norm: 0.9802493950866901, iteration: 49941
loss: 1.0209094285964966,grad_norm: 0.9999991848468163, iteration: 49942
loss: 1.0000706911087036,grad_norm: 0.8345752558567984, iteration: 49943
loss: 0.9771509170532227,grad_norm: 0.9464814315739591, iteration: 49944
loss: 1.004622459411621,grad_norm: 0.9488838079555646, iteration: 49945
loss: 0.9892759323120117,grad_norm: 0.9722748668180409, iteration: 49946
loss: 1.0236494541168213,grad_norm: 0.8804095985362755, iteration: 49947
loss: 0.9891141653060913,grad_norm: 0.9999992985328635, iteration: 49948
loss: 1.0326306819915771,grad_norm: 0.8215154867904536, iteration: 49949
loss: 0.9976963996887207,grad_norm: 0.8512102832573816, iteration: 49950
loss: 0.9869589805603027,grad_norm: 0.999999121353942, iteration: 49951
loss: 1.027243971824646,grad_norm: 0.9999990697953036, iteration: 49952
loss: 1.0444923639297485,grad_norm: 0.8470593684710156, iteration: 49953
loss: 1.000265121459961,grad_norm: 0.8747654360486811, iteration: 49954
loss: 0.9970593452453613,grad_norm: 0.8677099689610539, iteration: 49955
loss: 1.0173430442810059,grad_norm: 0.9999991011921163, iteration: 49956
loss: 0.9776431918144226,grad_norm: 0.9999990972752089, iteration: 49957
loss: 1.0051137208938599,grad_norm: 0.9595014436589845, iteration: 49958
loss: 1.0579981803894043,grad_norm: 0.9999990911317436, iteration: 49959
loss: 1.0118951797485352,grad_norm: 0.9999995444340517, iteration: 49960
loss: 0.9920119643211365,grad_norm: 0.9999990558466895, iteration: 49961
loss: 0.9997687339782715,grad_norm: 0.9999992098897647, iteration: 49962
loss: 0.9816998839378357,grad_norm: 0.9968469809721409, iteration: 49963
loss: 1.00808846950531,grad_norm: 0.9613581464933786, iteration: 49964
loss: 1.0233711004257202,grad_norm: 0.8849466603693535, iteration: 49965
loss: 1.0372949838638306,grad_norm: 0.9999990953368105, iteration: 49966
loss: 0.9749478697776794,grad_norm: 0.9999999266052455, iteration: 49967
loss: 0.9847291111946106,grad_norm: 0.9999990559658902, iteration: 49968
loss: 0.9862539172172546,grad_norm: 0.9999990022263331, iteration: 49969
loss: 1.0439494848251343,grad_norm: 0.9999991992554994, iteration: 49970
loss: 1.028189778327942,grad_norm: 0.8964344884423422, iteration: 49971
loss: 0.984662652015686,grad_norm: 0.9999992160765467, iteration: 49972
loss: 1.0067975521087646,grad_norm: 0.8701841621333783, iteration: 49973
loss: 1.0109206438064575,grad_norm: 0.9999991037605238, iteration: 49974
loss: 0.9820491075515747,grad_norm: 0.9909776013523287, iteration: 49975
loss: 1.0409761667251587,grad_norm: 0.9807596323125841, iteration: 49976
loss: 1.0101791620254517,grad_norm: 0.9999993010719721, iteration: 49977
loss: 1.0146409273147583,grad_norm: 0.898161273174406, iteration: 49978
loss: 1.004141092300415,grad_norm: 0.9999992483091033, iteration: 49979
loss: 1.025913953781128,grad_norm: 0.9127271037004154, iteration: 49980
loss: 1.0042344331741333,grad_norm: 0.9999990744523414, iteration: 49981
loss: 1.0537134408950806,grad_norm: 0.9999991364703128, iteration: 49982
loss: 1.0044453144073486,grad_norm: 0.996159560415733, iteration: 49983
loss: 0.9822438955307007,grad_norm: 0.9999991251931503, iteration: 49984
loss: 0.9878785610198975,grad_norm: 0.9999990350348302, iteration: 49985
loss: 0.9931758642196655,grad_norm: 0.999999093284988, iteration: 49986
loss: 0.9841516017913818,grad_norm: 0.9999991352640208, iteration: 49987
loss: 1.0145314931869507,grad_norm: 0.9521664372172705, iteration: 49988
loss: 1.0380085706710815,grad_norm: 0.9999993784040292, iteration: 49989
loss: 1.0330815315246582,grad_norm: 0.8784436376129511, iteration: 49990
loss: 1.0213181972503662,grad_norm: 0.999999043768068, iteration: 49991
loss: 0.9929807186126709,grad_norm: 0.9999994561554885, iteration: 49992
loss: 0.9752967953681946,grad_norm: 0.9070658213727578, iteration: 49993
loss: 0.9560117125511169,grad_norm: 0.999999168781547, iteration: 49994
loss: 0.9738820791244507,grad_norm: 0.99999939030104, iteration: 49995
loss: 0.9823247790336609,grad_norm: 0.9999992957738278, iteration: 49996
loss: 0.9932898879051208,grad_norm: 0.9366856801722474, iteration: 49997
loss: 1.001900315284729,grad_norm: 0.9462432571951891, iteration: 49998
loss: 0.9900661706924438,grad_norm: 0.9232032246464305, iteration: 49999
loss: 0.9760446548461914,grad_norm: 0.9999990464156613, iteration: 50000
Evaluating at step 50000
{'val': 0.9966089874505997, 'test': 2.993959436782252}
loss: 1.0137008428573608,grad_norm: 0.9999991527564476, iteration: 50001
loss: 1.0128933191299438,grad_norm: 0.9857983845320104, iteration: 50002
loss: 0.9664668440818787,grad_norm: 0.9999991377578495, iteration: 50003
loss: 0.992743194103241,grad_norm: 0.9999990187803633, iteration: 50004
loss: 1.0176024436950684,grad_norm: 0.843673680358251, iteration: 50005
loss: 1.0016357898712158,grad_norm: 0.9738304772442664, iteration: 50006
loss: 1.0156782865524292,grad_norm: 0.9984540685033736, iteration: 50007
loss: 0.9690847396850586,grad_norm: 0.9999991748270373, iteration: 50008
loss: 1.0167521238327026,grad_norm: 0.8672844404670031, iteration: 50009
loss: 0.9527485370635986,grad_norm: 0.9999991998011094, iteration: 50010
loss: 1.0258663892745972,grad_norm: 0.9999992228791915, iteration: 50011
loss: 0.9961653351783752,grad_norm: 0.9783635105199943, iteration: 50012
loss: 0.9824530482292175,grad_norm: 0.8595885756648758, iteration: 50013
loss: 1.0055747032165527,grad_norm: 0.9694875603554886, iteration: 50014
loss: 1.0015593767166138,grad_norm: 0.8664514091316043, iteration: 50015
loss: 1.027295708656311,grad_norm: 0.8144934938454109, iteration: 50016
loss: 1.031777024269104,grad_norm: 0.9999991795601384, iteration: 50017
loss: 1.0102035999298096,grad_norm: 0.9999990576035767, iteration: 50018
loss: 1.0045254230499268,grad_norm: 0.9999992512683936, iteration: 50019
loss: 1.0376334190368652,grad_norm: 0.9999991500617704, iteration: 50020
loss: 0.994239330291748,grad_norm: 0.9999993682277464, iteration: 50021
loss: 1.0102676153182983,grad_norm: 0.9999991969207453, iteration: 50022
loss: 1.013340950012207,grad_norm: 0.8298509994449436, iteration: 50023
loss: 0.9863158464431763,grad_norm: 0.999999225532658, iteration: 50024
loss: 0.9614295959472656,grad_norm: 0.999999120792255, iteration: 50025
loss: 1.030627727508545,grad_norm: 0.9999992220808808, iteration: 50026
loss: 1.0814825296401978,grad_norm: 0.9999996395164975, iteration: 50027
loss: 1.0155987739562988,grad_norm: 0.9999990367852203, iteration: 50028
loss: 1.0180264711380005,grad_norm: 0.9223265680609753, iteration: 50029
loss: 1.012893795967102,grad_norm: 0.9999990313975904, iteration: 50030
loss: 0.9680725336074829,grad_norm: 0.9999992584933759, iteration: 50031
loss: 0.9858174920082092,grad_norm: 0.9999991650950377, iteration: 50032
loss: 1.002731204032898,grad_norm: 0.8465608418286488, iteration: 50033
loss: 0.9853489398956299,grad_norm: 0.9999992123895023, iteration: 50034
loss: 0.9994335770606995,grad_norm: 0.9999990680449885, iteration: 50035
loss: 1.027932047843933,grad_norm: 0.9999992341807725, iteration: 50036
loss: 1.0247875452041626,grad_norm: 0.9999990887619993, iteration: 50037
loss: 1.0231778621673584,grad_norm: 0.9999998901754867, iteration: 50038
loss: 0.9804458618164062,grad_norm: 0.9117114646254352, iteration: 50039
loss: 0.9942611455917358,grad_norm: 0.9751197756413899, iteration: 50040
loss: 0.9998374581336975,grad_norm: 0.999999296533053, iteration: 50041
loss: 0.9988031983375549,grad_norm: 0.9999991252871098, iteration: 50042
loss: 1.027848720550537,grad_norm: 0.8362898299505057, iteration: 50043
loss: 1.0212467908859253,grad_norm: 0.9999992263685332, iteration: 50044
loss: 1.039117455482483,grad_norm: 0.9999989763608833, iteration: 50045
loss: 0.9841174483299255,grad_norm: 0.9081961379110037, iteration: 50046
loss: 0.9849062561988831,grad_norm: 0.9999991626510321, iteration: 50047
loss: 1.0270652770996094,grad_norm: 0.9999990202186484, iteration: 50048
loss: 0.9652884602546692,grad_norm: 0.9999991339702876, iteration: 50049
loss: 0.9998469948768616,grad_norm: 0.9258202676942782, iteration: 50050
loss: 1.0064833164215088,grad_norm: 0.9997119270749766, iteration: 50051
loss: 0.9893008470535278,grad_norm: 0.9513000505221759, iteration: 50052
loss: 0.9904966950416565,grad_norm: 0.9999990761178215, iteration: 50053
loss: 0.9830793142318726,grad_norm: 0.999999339943821, iteration: 50054
loss: 0.9900572299957275,grad_norm: 0.9667651143783212, iteration: 50055
loss: 1.0011557340621948,grad_norm: 0.9732224416331546, iteration: 50056
loss: 0.9706712961196899,grad_norm: 0.9523894322399642, iteration: 50057
loss: 0.9960255026817322,grad_norm: 0.9999991531170701, iteration: 50058
loss: 0.9717912673950195,grad_norm: 0.9999991077275043, iteration: 50059
loss: 0.9867183566093445,grad_norm: 0.9999990120012048, iteration: 50060
loss: 0.981151282787323,grad_norm: 0.9999990956808851, iteration: 50061
loss: 1.0339617729187012,grad_norm: 0.9506516499217537, iteration: 50062
loss: 0.9967641234397888,grad_norm: 0.9999992482111125, iteration: 50063
loss: 0.9894059300422668,grad_norm: 0.9999991236594798, iteration: 50064
loss: 1.0286210775375366,grad_norm: 0.9999991441884131, iteration: 50065
loss: 0.9772952795028687,grad_norm: 0.9999990844295268, iteration: 50066
loss: 0.9827584028244019,grad_norm: 0.9646668401963625, iteration: 50067
loss: 0.9767959117889404,grad_norm: 0.9964658676005478, iteration: 50068
loss: 0.9707657694816589,grad_norm: 0.8567844481137312, iteration: 50069
loss: 0.9729204177856445,grad_norm: 0.9159986139137838, iteration: 50070
loss: 0.9992462992668152,grad_norm: 0.9745161982684455, iteration: 50071
loss: 1.005347728729248,grad_norm: 0.9999990193624397, iteration: 50072
loss: 0.975275993347168,grad_norm: 0.9999992182075834, iteration: 50073
loss: 1.032459020614624,grad_norm: 0.9999991110363187, iteration: 50074
loss: 1.011312484741211,grad_norm: 0.8485975529393909, iteration: 50075
loss: 1.0180720090866089,grad_norm: 0.9999991274172608, iteration: 50076
loss: 0.9999220967292786,grad_norm: 0.9999992831060049, iteration: 50077
loss: 1.0331507921218872,grad_norm: 0.9999990747396569, iteration: 50078
loss: 1.0051941871643066,grad_norm: 0.9999991531850558, iteration: 50079
loss: 1.0167869329452515,grad_norm: 0.9999990005965156, iteration: 50080
loss: 1.0042290687561035,grad_norm: 0.9410952547289342, iteration: 50081
loss: 1.0090477466583252,grad_norm: 0.9999995264456587, iteration: 50082
loss: 0.98124098777771,grad_norm: 0.7930584686042221, iteration: 50083
loss: 0.9791417121887207,grad_norm: 0.9999989845655356, iteration: 50084
loss: 0.9975823163986206,grad_norm: 0.9570179693642067, iteration: 50085
loss: 0.9724242687225342,grad_norm: 0.9999992339226335, iteration: 50086
loss: 0.9712799191474915,grad_norm: 0.9020791127623566, iteration: 50087
loss: 1.0132534503936768,grad_norm: 0.9551830701102779, iteration: 50088
loss: 0.9900220632553101,grad_norm: 0.8843980892640311, iteration: 50089
loss: 1.0176563262939453,grad_norm: 0.7929981442124151, iteration: 50090
loss: 0.9998762607574463,grad_norm: 0.9403098776201625, iteration: 50091
loss: 0.9914740920066833,grad_norm: 0.9911551157256023, iteration: 50092
loss: 0.9715877175331116,grad_norm: 0.8625150355066683, iteration: 50093
loss: 1.0117131471633911,grad_norm: 0.9999990536092068, iteration: 50094
loss: 0.9988456964492798,grad_norm: 0.9757723561274627, iteration: 50095
loss: 1.0246244668960571,grad_norm: 0.9540375863241936, iteration: 50096
loss: 1.0019701719284058,grad_norm: 0.9999991189014859, iteration: 50097
loss: 1.0478103160858154,grad_norm: 0.9999990890072823, iteration: 50098
loss: 1.0242317914962769,grad_norm: 0.9999991398842104, iteration: 50099
loss: 1.0146044492721558,grad_norm: 0.9999989904120387, iteration: 50100
loss: 1.0127215385437012,grad_norm: 0.8764760442326186, iteration: 50101
loss: 1.0070933103561401,grad_norm: 0.9999992545782157, iteration: 50102
loss: 0.9914399981498718,grad_norm: 0.9999991396041662, iteration: 50103
loss: 1.0305042266845703,grad_norm: 0.9999991265687755, iteration: 50104
loss: 0.9977806806564331,grad_norm: 0.8089529282977108, iteration: 50105
loss: 1.0540131330490112,grad_norm: 0.9999993776017297, iteration: 50106
loss: 0.9719440937042236,grad_norm: 0.9573500227625314, iteration: 50107
loss: 1.0078836679458618,grad_norm: 0.9629771451321975, iteration: 50108
loss: 0.9854618310928345,grad_norm: 0.9999992973493179, iteration: 50109
loss: 1.059767723083496,grad_norm: 0.9691041749715664, iteration: 50110
loss: 0.9727888703346252,grad_norm: 0.9999991286890257, iteration: 50111
loss: 0.9994154572486877,grad_norm: 0.9468694246273442, iteration: 50112
loss: 0.9860813021659851,grad_norm: 0.9999990037248517, iteration: 50113
loss: 0.9926549792289734,grad_norm: 0.999999155022902, iteration: 50114
loss: 1.0086281299591064,grad_norm: 0.9999990941348442, iteration: 50115
loss: 1.0101864337921143,grad_norm: 0.988174300896159, iteration: 50116
loss: 0.9989352822303772,grad_norm: 0.9999992361927428, iteration: 50117
loss: 1.0046520233154297,grad_norm: 0.999999431207833, iteration: 50118
loss: 1.0335164070129395,grad_norm: 0.9999990901679207, iteration: 50119
loss: 1.0273815393447876,grad_norm: 0.9999990349503004, iteration: 50120
loss: 1.0021741390228271,grad_norm: 0.9999992802258754, iteration: 50121
loss: 0.974555253982544,grad_norm: 0.8408018084581591, iteration: 50122
loss: 0.9844071865081787,grad_norm: 0.8810612645528886, iteration: 50123
loss: 1.0300201177597046,grad_norm: 0.9485271310347515, iteration: 50124
loss: 1.025901436805725,grad_norm: 0.9999992262174698, iteration: 50125
loss: 0.9931878447532654,grad_norm: 0.7846483905582976, iteration: 50126
loss: 0.9973639845848083,grad_norm: 0.9785933904184546, iteration: 50127
loss: 0.9934170246124268,grad_norm: 0.9999990485814774, iteration: 50128
loss: 1.0080037117004395,grad_norm: 0.9999990791661437, iteration: 50129
loss: 0.9579168558120728,grad_norm: 0.9999991837721653, iteration: 50130
loss: 0.9874427318572998,grad_norm: 0.9999991297826061, iteration: 50131
loss: 0.984229326248169,grad_norm: 0.8484311926681836, iteration: 50132
loss: 1.0027313232421875,grad_norm: 0.9999991814654132, iteration: 50133
loss: 1.0240238904953003,grad_norm: 0.9999991762862414, iteration: 50134
loss: 1.0339497327804565,grad_norm: 0.9999992868909651, iteration: 50135
loss: 1.0098947286605835,grad_norm: 0.9999991766816287, iteration: 50136
loss: 1.0063096284866333,grad_norm: 0.9999997326918676, iteration: 50137
loss: 1.0194547176361084,grad_norm: 0.9999991560266903, iteration: 50138
loss: 0.9935100078582764,grad_norm: 0.999998988099578, iteration: 50139
loss: 1.0193355083465576,grad_norm: 0.9369900062422564, iteration: 50140
loss: 0.9732608199119568,grad_norm: 0.9999991333022967, iteration: 50141
loss: 1.0168616771697998,grad_norm: 0.9999990249820583, iteration: 50142
loss: 0.979257345199585,grad_norm: 0.9714709347990059, iteration: 50143
loss: 1.0565582513809204,grad_norm: 0.9999997904847662, iteration: 50144
loss: 1.007871150970459,grad_norm: 0.9999991103777301, iteration: 50145
loss: 1.0105888843536377,grad_norm: 0.9240023577324725, iteration: 50146
loss: 0.9881173372268677,grad_norm: 0.8465487969816934, iteration: 50147
loss: 1.0196936130523682,grad_norm: 0.9131237437506544, iteration: 50148
loss: 0.9771446585655212,grad_norm: 0.8227084516906366, iteration: 50149
loss: 0.9911417961120605,grad_norm: 0.9999991379844366, iteration: 50150
loss: 0.9931431412696838,grad_norm: 0.8579614858934083, iteration: 50151
loss: 0.9931214451789856,grad_norm: 0.9999995796698188, iteration: 50152
loss: 1.0218194723129272,grad_norm: 0.9999990021631046, iteration: 50153
loss: 0.958629310131073,grad_norm: 0.9999990105103751, iteration: 50154
loss: 1.0371712446212769,grad_norm: 0.9999991380843053, iteration: 50155
loss: 0.9938778877258301,grad_norm: 0.9999992073796486, iteration: 50156
loss: 1.0289462804794312,grad_norm: 0.972680156446573, iteration: 50157
loss: 1.0044299364089966,grad_norm: 0.9999991015608607, iteration: 50158
loss: 0.9988704323768616,grad_norm: 0.8740735669414457, iteration: 50159
loss: 1.0386245250701904,grad_norm: 0.9999991901181402, iteration: 50160
loss: 1.010513424873352,grad_norm: 0.9999991233628741, iteration: 50161
loss: 0.9871999025344849,grad_norm: 0.9318966224448293, iteration: 50162
loss: 0.9331896901130676,grad_norm: 0.9231944436898909, iteration: 50163
loss: 1.0237892866134644,grad_norm: 0.9657338401576685, iteration: 50164
loss: 1.020723581314087,grad_norm: 0.8890801809321921, iteration: 50165
loss: 0.9779658317565918,grad_norm: 0.9515339569036652, iteration: 50166
loss: 1.0036189556121826,grad_norm: 0.9999993963980581, iteration: 50167
loss: 1.024375319480896,grad_norm: 0.999999180300295, iteration: 50168
loss: 0.975418746471405,grad_norm: 0.9809450066426731, iteration: 50169
loss: 0.9945984482765198,grad_norm: 0.9999990971894941, iteration: 50170
loss: 1.014845371246338,grad_norm: 0.9999994655308535, iteration: 50171
loss: 0.9872432947158813,grad_norm: 0.9999992881804871, iteration: 50172
loss: 0.9878013134002686,grad_norm: 0.9999990842874315, iteration: 50173
loss: 1.0274630784988403,grad_norm: 0.8964147255520951, iteration: 50174
loss: 0.9973257184028625,grad_norm: 0.9999991332799001, iteration: 50175
loss: 1.0017203092575073,grad_norm: 0.9117635211640871, iteration: 50176
loss: 0.9954953789710999,grad_norm: 0.9999990591617223, iteration: 50177
loss: 0.9945104718208313,grad_norm: 0.9852880553507584, iteration: 50178
loss: 0.9839768409729004,grad_norm: 0.9687745725902255, iteration: 50179
loss: 0.9984104633331299,grad_norm: 0.9999992998824323, iteration: 50180
loss: 1.0167057514190674,grad_norm: 0.9999991729856703, iteration: 50181
loss: 0.9952844977378845,grad_norm: 0.9228744711813405, iteration: 50182
loss: 0.9788196086883545,grad_norm: 0.8953657789987177, iteration: 50183
loss: 1.0242782831192017,grad_norm: 0.9661257096750885, iteration: 50184
loss: 1.0409709215164185,grad_norm: 0.9999990890458968, iteration: 50185
loss: 0.9909193515777588,grad_norm: 0.9999994037705503, iteration: 50186
loss: 0.9865210056304932,grad_norm: 0.9999992176939909, iteration: 50187
loss: 0.9862438440322876,grad_norm: 0.8671240443859765, iteration: 50188
loss: 1.0243381261825562,grad_norm: 0.9185383465785684, iteration: 50189
loss: 0.9799514412879944,grad_norm: 0.9999991724998556, iteration: 50190
loss: 0.9949681758880615,grad_norm: 0.9909467198185602, iteration: 50191
loss: 1.0165294408798218,grad_norm: 0.9999991948856098, iteration: 50192
loss: 0.9841552376747131,grad_norm: 0.9999990788349306, iteration: 50193
loss: 1.0026450157165527,grad_norm: 0.888088561841824, iteration: 50194
loss: 0.9827278256416321,grad_norm: 0.9999992066128457, iteration: 50195
loss: 0.9904641509056091,grad_norm: 0.999999236425044, iteration: 50196
loss: 1.0114052295684814,grad_norm: 0.9416939947156265, iteration: 50197
loss: 1.0164190530776978,grad_norm: 0.8867913363500043, iteration: 50198
loss: 1.0725740194320679,grad_norm: 0.9999992582027719, iteration: 50199
loss: 0.9969367384910583,grad_norm: 0.9626657032198043, iteration: 50200
loss: 1.0346190929412842,grad_norm: 0.9999991216492294, iteration: 50201
loss: 1.027881145477295,grad_norm: 0.8752784065232488, iteration: 50202
loss: 1.0201106071472168,grad_norm: 0.9999992364165504, iteration: 50203
loss: 0.9811027646064758,grad_norm: 0.9999990358676598, iteration: 50204
loss: 1.0291681289672852,grad_norm: 0.9270290104854589, iteration: 50205
loss: 1.0159962177276611,grad_norm: 0.9463693366369308, iteration: 50206
loss: 0.9940926432609558,grad_norm: 0.9999992228391348, iteration: 50207
loss: 0.9672053456306458,grad_norm: 0.9999990920534731, iteration: 50208
loss: 0.981835663318634,grad_norm: 0.8690803482342226, iteration: 50209
loss: 1.0421504974365234,grad_norm: 0.9999990677714128, iteration: 50210
loss: 1.0011396408081055,grad_norm: 0.9999991763878144, iteration: 50211
loss: 1.0132020711898804,grad_norm: 0.9434460718756188, iteration: 50212
loss: 1.0049614906311035,grad_norm: 0.9999992973115576, iteration: 50213
loss: 0.9886797070503235,grad_norm: 0.9428042020185211, iteration: 50214
loss: 1.0108082294464111,grad_norm: 0.7935489232440939, iteration: 50215
loss: 1.007917881011963,grad_norm: 0.9407596201999832, iteration: 50216
loss: 0.9669070839881897,grad_norm: 0.8966164067592677, iteration: 50217
loss: 1.0061012506484985,grad_norm: 0.9999989930063907, iteration: 50218
loss: 0.9758455753326416,grad_norm: 0.9174063321580209, iteration: 50219
loss: 1.0289645195007324,grad_norm: 0.9999992675049842, iteration: 50220
loss: 1.0021636486053467,grad_norm: 0.9999992376254334, iteration: 50221
loss: 1.006042718887329,grad_norm: 0.9709618482969323, iteration: 50222
loss: 1.0397380590438843,grad_norm: 0.9667707004224442, iteration: 50223
loss: 1.0158406496047974,grad_norm: 0.993487378539422, iteration: 50224
loss: 1.0198886394500732,grad_norm: 0.9731897102916727, iteration: 50225
loss: 0.9793760180473328,grad_norm: 0.999999085520028, iteration: 50226
loss: 0.9932628273963928,grad_norm: 0.8301824756083608, iteration: 50227
loss: 0.9945378303527832,grad_norm: 0.9999990676851361, iteration: 50228
loss: 1.0103998184204102,grad_norm: 0.999999274519972, iteration: 50229
loss: 1.0119670629501343,grad_norm: 0.9999990440498925, iteration: 50230
loss: 1.0076243877410889,grad_norm: 0.9999990845218903, iteration: 50231
loss: 0.9825843572616577,grad_norm: 0.9645235526523016, iteration: 50232
loss: 1.0140082836151123,grad_norm: 0.9999992146724297, iteration: 50233
loss: 0.9867217540740967,grad_norm: 0.9516853169367935, iteration: 50234
loss: 1.0158185958862305,grad_norm: 0.9095450478203102, iteration: 50235
loss: 1.0408637523651123,grad_norm: 0.8783442216678343, iteration: 50236
loss: 0.9874414801597595,grad_norm: 0.9999990614836564, iteration: 50237
loss: 1.0173157453536987,grad_norm: 0.9999993116956437, iteration: 50238
loss: 0.9847835898399353,grad_norm: 0.9999991537415456, iteration: 50239
loss: 1.0284329652786255,grad_norm: 0.9999991351297286, iteration: 50240
loss: 1.025127649307251,grad_norm: 0.9999990257302358, iteration: 50241
loss: 0.9826974868774414,grad_norm: 0.9844375121338976, iteration: 50242
loss: 0.9629442095756531,grad_norm: 0.9401826700406973, iteration: 50243
loss: 0.9812625050544739,grad_norm: 0.9999992200208833, iteration: 50244
loss: 0.9710240364074707,grad_norm: 0.9338018793377287, iteration: 50245
loss: 1.0352951288223267,grad_norm: 0.9999991747184163, iteration: 50246
loss: 1.0402671098709106,grad_norm: 0.9999992309524598, iteration: 50247
loss: 1.0078855752944946,grad_norm: 0.8902485180091559, iteration: 50248
loss: 0.983259916305542,grad_norm: 0.834786056251178, iteration: 50249
loss: 0.9877609014511108,grad_norm: 0.8538229407605045, iteration: 50250
loss: 0.9953476190567017,grad_norm: 0.9999990962367842, iteration: 50251
loss: 0.9568278789520264,grad_norm: 0.9437826186655831, iteration: 50252
loss: 1.0182130336761475,grad_norm: 0.9449065829070273, iteration: 50253
loss: 0.9897164702415466,grad_norm: 0.9800680527652744, iteration: 50254
loss: 1.0329936742782593,grad_norm: 0.9999991545868581, iteration: 50255
loss: 1.0084905624389648,grad_norm: 0.9356965887707713, iteration: 50256
loss: 0.9736046195030212,grad_norm: 0.9999990593168103, iteration: 50257
loss: 0.9787787795066833,grad_norm: 0.9999992023059531, iteration: 50258
loss: 0.9747199416160583,grad_norm: 0.9999990209672664, iteration: 50259
loss: 0.9856635928153992,grad_norm: 0.966620066262367, iteration: 50260
loss: 0.9787626266479492,grad_norm: 0.9293626250092424, iteration: 50261
loss: 1.0306003093719482,grad_norm: 0.9999992022364735, iteration: 50262
loss: 1.0351775884628296,grad_norm: 0.9999990665789865, iteration: 50263
loss: 1.0202617645263672,grad_norm: 0.9450726929502054, iteration: 50264
loss: 1.021043300628662,grad_norm: 0.9702695249354133, iteration: 50265
loss: 0.9720481038093567,grad_norm: 0.8542329459643971, iteration: 50266
loss: 0.9979416131973267,grad_norm: 0.9101739555490839, iteration: 50267
loss: 0.9971985816955566,grad_norm: 0.9999990738915955, iteration: 50268
loss: 0.9783447980880737,grad_norm: 0.9999992029650047, iteration: 50269
loss: 1.0346511602401733,grad_norm: 0.918890979877814, iteration: 50270
loss: 0.9502049684524536,grad_norm: 0.9824179293946824, iteration: 50271
loss: 1.043424367904663,grad_norm: 0.9999991102144966, iteration: 50272
loss: 1.0075377225875854,grad_norm: 0.9999991267222748, iteration: 50273
loss: 1.0027657747268677,grad_norm: 0.9541928855919041, iteration: 50274
loss: 1.0318683385849,grad_norm: 0.9403415210120694, iteration: 50275
loss: 1.0272045135498047,grad_norm: 0.7822347195415859, iteration: 50276
loss: 1.0027375221252441,grad_norm: 0.9025050197287726, iteration: 50277
loss: 1.015519618988037,grad_norm: 0.9999992729305732, iteration: 50278
loss: 1.000659465789795,grad_norm: 0.9930486950301551, iteration: 50279
loss: 1.0199763774871826,grad_norm: 0.999999133391666, iteration: 50280
loss: 1.0096609592437744,grad_norm: 0.9999990065705409, iteration: 50281
loss: 1.0136001110076904,grad_norm: 0.999999087965958, iteration: 50282
loss: 1.0452591180801392,grad_norm: 0.9999989490895291, iteration: 50283
loss: 1.000352144241333,grad_norm: 0.9703192303740759, iteration: 50284
loss: 0.9874088168144226,grad_norm: 0.999999117212266, iteration: 50285
loss: 0.9878533482551575,grad_norm: 0.9869343062163783, iteration: 50286
loss: 1.0533678531646729,grad_norm: 0.9999992486903956, iteration: 50287
loss: 1.013407588005066,grad_norm: 0.9999990827798529, iteration: 50288
loss: 0.965716540813446,grad_norm: 0.9999991357188679, iteration: 50289
loss: 0.9911803603172302,grad_norm: 0.9999989398231468, iteration: 50290
loss: 0.9590848684310913,grad_norm: 0.9905797685664288, iteration: 50291
loss: 1.0131604671478271,grad_norm: 0.9867547674410735, iteration: 50292
loss: 1.012918472290039,grad_norm: 0.9212419449011087, iteration: 50293
loss: 0.9670301675796509,grad_norm: 0.9982245326143078, iteration: 50294
loss: 1.010346531867981,grad_norm: 0.9999991994006147, iteration: 50295
loss: 0.9693744778633118,grad_norm: 0.9999992221112418, iteration: 50296
loss: 1.008978009223938,grad_norm: 0.9999992447399836, iteration: 50297
loss: 0.9611173272132874,grad_norm: 0.9144410364770196, iteration: 50298
loss: 1.024025559425354,grad_norm: 0.9810680821710809, iteration: 50299
loss: 1.0080516338348389,grad_norm: 0.9999997219104347, iteration: 50300
loss: 0.9821743369102478,grad_norm: 0.999998891239996, iteration: 50301
loss: 1.0552968978881836,grad_norm: 0.9999990615581041, iteration: 50302
loss: 1.0549668073654175,grad_norm: 0.9999991551865923, iteration: 50303
loss: 1.002362608909607,grad_norm: 0.9999993730202466, iteration: 50304
loss: 1.0200660228729248,grad_norm: 0.935140864423951, iteration: 50305
loss: 1.011521339416504,grad_norm: 0.999999035640246, iteration: 50306
loss: 1.0015519857406616,grad_norm: 0.9025723911855186, iteration: 50307
loss: 0.978432834148407,grad_norm: 0.9999992062499754, iteration: 50308
loss: 1.0030505657196045,grad_norm: 0.9999989927618584, iteration: 50309
loss: 1.0334835052490234,grad_norm: 0.9999990480485595, iteration: 50310
loss: 1.0509657859802246,grad_norm: 0.9638850776888676, iteration: 50311
loss: 0.9914449453353882,grad_norm: 0.9999990902616503, iteration: 50312
loss: 1.0059024095535278,grad_norm: 0.999999180614831, iteration: 50313
loss: 1.0113049745559692,grad_norm: 0.9999990153637884, iteration: 50314
loss: 1.0074940919876099,grad_norm: 0.9999991200134385, iteration: 50315
loss: 0.976868212223053,grad_norm: 0.9850490182587975, iteration: 50316
loss: 1.060911774635315,grad_norm: 0.9999998416488134, iteration: 50317
loss: 1.012089490890503,grad_norm: 0.9999991338593197, iteration: 50318
loss: 1.002182126045227,grad_norm: 0.9999990573056903, iteration: 50319
loss: 1.0416009426116943,grad_norm: 0.9625207357836365, iteration: 50320
loss: 0.9758434891700745,grad_norm: 0.9999992266029366, iteration: 50321
loss: 1.0302505493164062,grad_norm: 0.9999991387824813, iteration: 50322
loss: 0.9720371961593628,grad_norm: 0.9285846194412872, iteration: 50323
loss: 0.9889605641365051,grad_norm: 0.8932106035723637, iteration: 50324
loss: 0.9615756869316101,grad_norm: 0.9925283129362812, iteration: 50325
loss: 1.0026599168777466,grad_norm: 0.9034746916943777, iteration: 50326
loss: 1.0090506076812744,grad_norm: 0.9999991845882026, iteration: 50327
loss: 1.0121140480041504,grad_norm: 0.9989961518429701, iteration: 50328
loss: 1.0146701335906982,grad_norm: 0.9999993975246935, iteration: 50329
loss: 1.0119708776474,grad_norm: 0.8325337204059935, iteration: 50330
loss: 1.0172621011734009,grad_norm: 0.8846034483402998, iteration: 50331
loss: 1.0086791515350342,grad_norm: 0.999999119683357, iteration: 50332
loss: 1.0320478677749634,grad_norm: 0.9999990643407303, iteration: 50333
loss: 0.9625095129013062,grad_norm: 0.9999991556736301, iteration: 50334
loss: 0.9884179830551147,grad_norm: 0.9944736707497395, iteration: 50335
loss: 1.005999207496643,grad_norm: 0.9999990381662296, iteration: 50336
loss: 0.9624165892601013,grad_norm: 0.9999990606727261, iteration: 50337
loss: 1.00316321849823,grad_norm: 0.9107383530683983, iteration: 50338
loss: 1.0188000202178955,grad_norm: 0.9999989154846712, iteration: 50339
loss: 0.993373692035675,grad_norm: 0.8580033784586829, iteration: 50340
loss: 1.0147546529769897,grad_norm: 0.9601087846171034, iteration: 50341
loss: 1.0094075202941895,grad_norm: 0.9124423270758631, iteration: 50342
loss: 0.9654788374900818,grad_norm: 0.9472677331881939, iteration: 50343
loss: 1.0071483850479126,grad_norm: 0.8349909696384666, iteration: 50344
loss: 0.9677707552909851,grad_norm: 0.999999131528096, iteration: 50345
loss: 0.9674209356307983,grad_norm: 0.9372912714108781, iteration: 50346
loss: 1.0180820226669312,grad_norm: 0.9268403069756161, iteration: 50347
loss: 1.0167337656021118,grad_norm: 0.9486171035232963, iteration: 50348
loss: 1.008293867111206,grad_norm: 0.9999992194795049, iteration: 50349
loss: 0.9631364345550537,grad_norm: 0.9680362672758641, iteration: 50350
loss: 1.0352203845977783,grad_norm: 0.9999992228884703, iteration: 50351
loss: 1.0334826707839966,grad_norm: 0.9985171425900164, iteration: 50352
loss: 0.9752829670906067,grad_norm: 0.9999990236991139, iteration: 50353
loss: 1.0057213306427002,grad_norm: 0.9344459372301493, iteration: 50354
loss: 1.0054725408554077,grad_norm: 0.9999991202377498, iteration: 50355
loss: 0.9579473733901978,grad_norm: 0.9999990552867012, iteration: 50356
loss: 1.158953070640564,grad_norm: 0.999999960991011, iteration: 50357
loss: 0.9779210686683655,grad_norm: 0.9999991335138494, iteration: 50358
loss: 0.9751216769218445,grad_norm: 0.9999989404007416, iteration: 50359
loss: 0.975635290145874,grad_norm: 0.9636189226658503, iteration: 50360
loss: 1.0161643028259277,grad_norm: 0.8135319513232812, iteration: 50361
loss: 0.995674192905426,grad_norm: 0.893857516768474, iteration: 50362
loss: 1.0084871053695679,grad_norm: 0.9626828507466169, iteration: 50363
loss: 0.9797665476799011,grad_norm: 0.999999024789205, iteration: 50364
loss: 1.0365562438964844,grad_norm: 0.9999995564058045, iteration: 50365
loss: 1.0298405885696411,grad_norm: 0.9923326201789917, iteration: 50366
loss: 1.0141078233718872,grad_norm: 0.829196240743873, iteration: 50367
loss: 1.03618323802948,grad_norm: 0.9760504357332557, iteration: 50368
loss: 0.9899300336837769,grad_norm: 0.9999990290649994, iteration: 50369
loss: 1.0078210830688477,grad_norm: 0.9999991725596011, iteration: 50370
loss: 0.9697809219360352,grad_norm: 0.999999335987757, iteration: 50371
loss: 1.090919852256775,grad_norm: 0.9999995987328332, iteration: 50372
loss: 0.9893615245819092,grad_norm: 0.9603676939868381, iteration: 50373
loss: 1.018977165222168,grad_norm: 0.968074991071963, iteration: 50374
loss: 0.9783251285552979,grad_norm: 0.9999991426430054, iteration: 50375
loss: 0.9957212805747986,grad_norm: 0.9999996712691424, iteration: 50376
loss: 0.9987011551856995,grad_norm: 0.999999688286265, iteration: 50377
loss: 1.0174270868301392,grad_norm: 0.9684432883427009, iteration: 50378
loss: 1.0105695724487305,grad_norm: 0.9491459761413042, iteration: 50379
loss: 1.008552074432373,grad_norm: 0.9338423600010357, iteration: 50380
loss: 1.0706539154052734,grad_norm: 0.9999996412350346, iteration: 50381
loss: 0.983604371547699,grad_norm: 0.9999989604851919, iteration: 50382
loss: 1.0263704061508179,grad_norm: 0.9999994377457809, iteration: 50383
loss: 1.0581833124160767,grad_norm: 0.9999994589908343, iteration: 50384
loss: 1.0488489866256714,grad_norm: 0.9999994273670172, iteration: 50385
loss: 0.9864569902420044,grad_norm: 0.9999991970022064, iteration: 50386
loss: 1.0353997945785522,grad_norm: 0.9999993052596163, iteration: 50387
loss: 1.075393795967102,grad_norm: 0.9999998172555039, iteration: 50388
loss: 0.9810159206390381,grad_norm: 0.9999996030979366, iteration: 50389
loss: 1.0349013805389404,grad_norm: 0.9999994412305129, iteration: 50390
loss: 1.0112711191177368,grad_norm: 0.9812650098010044, iteration: 50391
loss: 1.0080763101577759,grad_norm: 0.9297919794488716, iteration: 50392
loss: 1.029191255569458,grad_norm: 0.9999991658617976, iteration: 50393
loss: 1.0228995084762573,grad_norm: 0.9999991339218439, iteration: 50394
loss: 1.0042979717254639,grad_norm: 0.9999990445761654, iteration: 50395
loss: 1.0087374448776245,grad_norm: 0.8785343488906103, iteration: 50396
loss: 1.0070536136627197,grad_norm: 0.9917211041684574, iteration: 50397
loss: 1.0518927574157715,grad_norm: 0.9999990644873491, iteration: 50398
loss: 1.00886070728302,grad_norm: 0.9999995968188805, iteration: 50399
loss: 1.006274700164795,grad_norm: 0.8902524900276955, iteration: 50400
loss: 1.0359023809432983,grad_norm: 0.999999113436086, iteration: 50401
loss: 1.0074406862258911,grad_norm: 0.9999990743186354, iteration: 50402
loss: 0.9886025786399841,grad_norm: 0.9595758572993316, iteration: 50403
loss: 1.011665940284729,grad_norm: 0.9805726632886449, iteration: 50404
loss: 1.0108914375305176,grad_norm: 0.9974723600224558, iteration: 50405
loss: 1.0014995336532593,grad_norm: 0.9432849543664522, iteration: 50406
loss: 0.9958639144897461,grad_norm: 0.9999990632776105, iteration: 50407
loss: 1.0093339681625366,grad_norm: 0.9999991590357546, iteration: 50408
loss: 0.9961171746253967,grad_norm: 0.9999992466790559, iteration: 50409
loss: 0.9946120977401733,grad_norm: 0.9999991753793076, iteration: 50410
loss: 1.0244628190994263,grad_norm: 0.8459181031193181, iteration: 50411
loss: 0.9765656590461731,grad_norm: 0.9999991259788986, iteration: 50412
loss: 1.0343616008758545,grad_norm: 0.9999996447872201, iteration: 50413
loss: 0.9954567551612854,grad_norm: 0.863737813744392, iteration: 50414
loss: 1.0181266069412231,grad_norm: 0.9999999100774807, iteration: 50415
loss: 1.012276291847229,grad_norm: 0.9399354264241871, iteration: 50416
loss: 0.9946950674057007,grad_norm: 0.9999990576232646, iteration: 50417
loss: 1.0308799743652344,grad_norm: 0.9082958754784128, iteration: 50418
loss: 1.0202516317367554,grad_norm: 0.9273498186641188, iteration: 50419
loss: 0.9639837741851807,grad_norm: 0.9999993172990854, iteration: 50420
loss: 1.009042739868164,grad_norm: 0.99999910355833, iteration: 50421
loss: 0.9872408509254456,grad_norm: 0.9999998399987241, iteration: 50422
loss: 1.0169695615768433,grad_norm: 0.9089516748082686, iteration: 50423
loss: 1.0312321186065674,grad_norm: 0.9999991479455548, iteration: 50424
loss: 1.1074601411819458,grad_norm: 0.9999994149837862, iteration: 50425
loss: 1.0109590291976929,grad_norm: 0.9999991917429677, iteration: 50426
loss: 1.0087826251983643,grad_norm: 0.9880543343504307, iteration: 50427
loss: 1.0225344896316528,grad_norm: 0.9030405832879712, iteration: 50428
loss: 1.0193387269973755,grad_norm: 0.9999990615308001, iteration: 50429
loss: 1.0211611986160278,grad_norm: 0.9999991347740603, iteration: 50430
loss: 1.0514905452728271,grad_norm: 0.9999990155091794, iteration: 50431
loss: 1.068098545074463,grad_norm: 0.9999996864683254, iteration: 50432
loss: 0.9758914709091187,grad_norm: 0.9999989737469559, iteration: 50433
loss: 0.9864649772644043,grad_norm: 0.9999992275772261, iteration: 50434
loss: 0.9873387217521667,grad_norm: 0.8814926895281646, iteration: 50435
loss: 1.0036907196044922,grad_norm: 0.9819542266751218, iteration: 50436
loss: 1.0331815481185913,grad_norm: 0.9999991156781239, iteration: 50437
loss: 1.0255794525146484,grad_norm: 0.9999991957251524, iteration: 50438
loss: 1.0333551168441772,grad_norm: 0.9410329455598292, iteration: 50439
loss: 0.980060338973999,grad_norm: 0.9999992099629443, iteration: 50440
loss: 1.0147191286087036,grad_norm: 0.999999801665148, iteration: 50441
loss: 0.9729315042495728,grad_norm: 0.9999991419843987, iteration: 50442
loss: 0.9846919775009155,grad_norm: 0.9999991093792492, iteration: 50443
loss: 0.9868715405464172,grad_norm: 0.9319362549124675, iteration: 50444
loss: 0.9821834564208984,grad_norm: 0.9999997343823912, iteration: 50445
loss: 1.0883978605270386,grad_norm: 0.9999990521849987, iteration: 50446
loss: 1.0158131122589111,grad_norm: 0.8628117276181847, iteration: 50447
loss: 1.0080562829971313,grad_norm: 0.9999990750530418, iteration: 50448
loss: 1.013633131980896,grad_norm: 0.9773366844929081, iteration: 50449
loss: 1.0224292278289795,grad_norm: 0.9999992078875928, iteration: 50450
loss: 0.9944667816162109,grad_norm: 0.9597883324972174, iteration: 50451
loss: 1.0081870555877686,grad_norm: 0.9569706762318886, iteration: 50452
loss: 0.9920380115509033,grad_norm: 0.9999989914812356, iteration: 50453
loss: 1.0394477844238281,grad_norm: 0.9999991316961112, iteration: 50454
loss: 1.0022406578063965,grad_norm: 0.999999197204402, iteration: 50455
loss: 0.9814408421516418,grad_norm: 0.9520521193540283, iteration: 50456
loss: 1.0409772396087646,grad_norm: 0.9795058485731148, iteration: 50457
loss: 1.13572359085083,grad_norm: 0.9999998698155627, iteration: 50458
loss: 0.9530660510063171,grad_norm: 0.9999990610624392, iteration: 50459
loss: 1.0386648178100586,grad_norm: 0.9999989297735858, iteration: 50460
loss: 0.9853124618530273,grad_norm: 0.9881387517485991, iteration: 50461
loss: 0.9855189323425293,grad_norm: 0.9999991063591072, iteration: 50462
loss: 0.9970011711120605,grad_norm: 0.9999989883248971, iteration: 50463
loss: 0.974578857421875,grad_norm: 0.9475662112726606, iteration: 50464
loss: 0.9884886145591736,grad_norm: 0.9563463765097858, iteration: 50465
loss: 1.006488561630249,grad_norm: 0.9257523629462417, iteration: 50466
loss: 0.9764750003814697,grad_norm: 0.9999990486828665, iteration: 50467
loss: 0.9956541657447815,grad_norm: 0.9999991992647117, iteration: 50468
loss: 1.0142968893051147,grad_norm: 0.9999991368630315, iteration: 50469
loss: 0.9849853515625,grad_norm: 0.9999990671131852, iteration: 50470
loss: 1.0281349420547485,grad_norm: 0.9349979145074147, iteration: 50471
loss: 0.989134669303894,grad_norm: 0.9388525897242692, iteration: 50472
loss: 1.010358452796936,grad_norm: 0.8737422899964008, iteration: 50473
loss: 1.0108145475387573,grad_norm: 0.9245468189070936, iteration: 50474
loss: 1.0128238201141357,grad_norm: 0.9246750388319717, iteration: 50475
loss: 0.9790566563606262,grad_norm: 0.9821304774475705, iteration: 50476
loss: 1.051225185394287,grad_norm: 0.9999989492352687, iteration: 50477
loss: 0.9801727533340454,grad_norm: 0.9999991822481308, iteration: 50478
loss: 0.9900981187820435,grad_norm: 0.9297888650005396, iteration: 50479
loss: 1.0264549255371094,grad_norm: 0.8897200074186817, iteration: 50480
loss: 1.0082154273986816,grad_norm: 0.9999990715124454, iteration: 50481
loss: 1.0121243000030518,grad_norm: 0.942842200217103, iteration: 50482
loss: 1.0247535705566406,grad_norm: 0.9549095105342993, iteration: 50483
loss: 0.9796476364135742,grad_norm: 0.9999989390998782, iteration: 50484
loss: 1.0139573812484741,grad_norm: 0.8684619523126645, iteration: 50485
loss: 1.026414155960083,grad_norm: 0.966795943499432, iteration: 50486
loss: 0.9967226982116699,grad_norm: 0.9999993095442304, iteration: 50487
loss: 1.0148828029632568,grad_norm: 0.999999939945635, iteration: 50488
loss: 1.0543978214263916,grad_norm: 0.9395457968029778, iteration: 50489
loss: 0.994723379611969,grad_norm: 0.9581046507560533, iteration: 50490
loss: 1.0094995498657227,grad_norm: 0.9408653889225572, iteration: 50491
loss: 0.9795318245887756,grad_norm: 0.9132035369305704, iteration: 50492
loss: 0.9883810877799988,grad_norm: 0.9999991691525442, iteration: 50493
loss: 0.9753564596176147,grad_norm: 0.999999052265703, iteration: 50494
loss: 0.9965522885322571,grad_norm: 0.9852464442113607, iteration: 50495
loss: 0.9851030111312866,grad_norm: 0.767900016579269, iteration: 50496
loss: 0.9974209666252136,grad_norm: 0.9999990324152499, iteration: 50497
loss: 1.0370830297470093,grad_norm: 0.881514242715796, iteration: 50498
loss: 0.9766412973403931,grad_norm: 0.9013228103970344, iteration: 50499
loss: 0.9883942008018494,grad_norm: 0.999999098234983, iteration: 50500
loss: 0.9797767400741577,grad_norm: 0.9999989299361883, iteration: 50501
loss: 1.0163742303848267,grad_norm: 0.8858698744928445, iteration: 50502
loss: 1.0492327213287354,grad_norm: 0.9999997485096318, iteration: 50503
loss: 0.9762561917304993,grad_norm: 0.9040748041493609, iteration: 50504
loss: 1.0019559860229492,grad_norm: 0.8868299651106445, iteration: 50505
loss: 1.0019785165786743,grad_norm: 0.958581808310522, iteration: 50506
loss: 1.0158928632736206,grad_norm: 0.9429708168569797, iteration: 50507
loss: 1.0116087198257446,grad_norm: 0.9592390438829345, iteration: 50508
loss: 0.9796492457389832,grad_norm: 0.999999054643149, iteration: 50509
loss: 1.0089168548583984,grad_norm: 0.9999992740793997, iteration: 50510
loss: 1.024498462677002,grad_norm: 0.8637465415523152, iteration: 50511
loss: 1.0173847675323486,grad_norm: 0.9999989567359484, iteration: 50512
loss: 1.0229367017745972,grad_norm: 0.9999991432575857, iteration: 50513
loss: 1.0254786014556885,grad_norm: 0.9768299253245948, iteration: 50514
loss: 1.0083253383636475,grad_norm: 0.9999990548621073, iteration: 50515
loss: 1.0175658464431763,grad_norm: 0.9675947072904698, iteration: 50516
loss: 0.9861003160476685,grad_norm: 0.9999992145831953, iteration: 50517
loss: 1.1038535833358765,grad_norm: 0.9999992937058373, iteration: 50518
loss: 0.982819676399231,grad_norm: 0.9409921995343311, iteration: 50519
loss: 1.0067706108093262,grad_norm: 0.9443722859785267, iteration: 50520
loss: 0.9849923849105835,grad_norm: 0.9999997967617348, iteration: 50521
loss: 0.9787735342979431,grad_norm: 0.999999655757262, iteration: 50522
loss: 0.993601381778717,grad_norm: 0.990760750080664, iteration: 50523
loss: 1.0275976657867432,grad_norm: 0.8069336319027136, iteration: 50524
loss: 1.0079480409622192,grad_norm: 0.9999990244569591, iteration: 50525
loss: 0.9807268381118774,grad_norm: 0.7971455036073871, iteration: 50526
loss: 1.024850845336914,grad_norm: 0.9999991796771242, iteration: 50527
loss: 0.9876266121864319,grad_norm: 0.9999998255901975, iteration: 50528
loss: 1.0289649963378906,grad_norm: 0.9999991462229066, iteration: 50529
loss: 1.019019603729248,grad_norm: 0.9999990315593783, iteration: 50530
loss: 1.021773338317871,grad_norm: 0.9999990629749673, iteration: 50531
loss: 0.991093635559082,grad_norm: 0.8898000638386483, iteration: 50532
loss: 1.0256210565567017,grad_norm: 0.9999991950470736, iteration: 50533
loss: 0.9648075103759766,grad_norm: 0.9215031803905193, iteration: 50534
loss: 1.039575219154358,grad_norm: 0.9999990736223572, iteration: 50535
loss: 0.99208664894104,grad_norm: 0.9999993168179764, iteration: 50536
loss: 0.9991158843040466,grad_norm: 0.9999996246458545, iteration: 50537
loss: 1.0021095275878906,grad_norm: 0.9999991474087573, iteration: 50538
loss: 1.0374575853347778,grad_norm: 0.9999990549250583, iteration: 50539
loss: 1.0350641012191772,grad_norm: 0.9999990467433667, iteration: 50540
loss: 1.0054686069488525,grad_norm: 0.9999992199831607, iteration: 50541
loss: 0.984263002872467,grad_norm: 0.9999990376693342, iteration: 50542
loss: 0.9854154586791992,grad_norm: 0.8119911983880853, iteration: 50543
loss: 0.9777154922485352,grad_norm: 0.9999990148127695, iteration: 50544
loss: 0.9825539588928223,grad_norm: 0.999999776378015, iteration: 50545
loss: 0.9786944389343262,grad_norm: 0.9999991903397818, iteration: 50546
loss: 1.0222859382629395,grad_norm: 0.9999991399333578, iteration: 50547
loss: 1.0148460865020752,grad_norm: 0.8905400776406438, iteration: 50548
loss: 1.002213478088379,grad_norm: 0.9999990919262054, iteration: 50549
loss: 1.0191056728363037,grad_norm: 0.9999990982585798, iteration: 50550
loss: 0.9735496044158936,grad_norm: 0.9999991222180434, iteration: 50551
loss: 1.0103564262390137,grad_norm: 0.9999991379823575, iteration: 50552
loss: 1.0184547901153564,grad_norm: 0.9999991645486822, iteration: 50553
loss: 0.996774435043335,grad_norm: 0.8386816469253897, iteration: 50554
loss: 1.010264277458191,grad_norm: 0.9666814006443004, iteration: 50555
loss: 1.00456964969635,grad_norm: 0.999999275510995, iteration: 50556
loss: 1.005938172340393,grad_norm: 0.9999990262872664, iteration: 50557
loss: 0.9462648630142212,grad_norm: 0.9762125906527174, iteration: 50558
loss: 1.0245490074157715,grad_norm: 0.9999994091567587, iteration: 50559
loss: 1.0197924375534058,grad_norm: 0.9999991460561697, iteration: 50560
loss: 0.9964444637298584,grad_norm: 0.9999991751650097, iteration: 50561
loss: 0.9999182820320129,grad_norm: 0.9332449817178269, iteration: 50562
loss: 1.0214382410049438,grad_norm: 0.9999991108948322, iteration: 50563
loss: 0.9966776371002197,grad_norm: 0.8919316345675875, iteration: 50564
loss: 0.9990551471710205,grad_norm: 0.8931833669773216, iteration: 50565
loss: 1.0195332765579224,grad_norm: 0.8764310396306296, iteration: 50566
loss: 0.9932510256767273,grad_norm: 0.9999990026407356, iteration: 50567
loss: 0.9941605925559998,grad_norm: 0.7766581901031225, iteration: 50568
loss: 1.0179495811462402,grad_norm: 0.8869735556527959, iteration: 50569
loss: 0.996558427810669,grad_norm: 0.9403903277112382, iteration: 50570
loss: 0.9911372661590576,grad_norm: 0.891607877359681, iteration: 50571
loss: 0.9574506878852844,grad_norm: 0.9999991148732329, iteration: 50572
loss: 0.9977135062217712,grad_norm: 0.9999990668978865, iteration: 50573
loss: 1.0355530977249146,grad_norm: 0.9697829992114451, iteration: 50574
loss: 1.0227841138839722,grad_norm: 0.9999993012874151, iteration: 50575
loss: 1.0083380937576294,grad_norm: 0.9073033643055094, iteration: 50576
loss: 1.022829532623291,grad_norm: 0.9999991586414199, iteration: 50577
loss: 1.0168031454086304,grad_norm: 0.9999992896401622, iteration: 50578
loss: 0.9978358149528503,grad_norm: 0.9999992530999164, iteration: 50579
loss: 1.0195742845535278,grad_norm: 0.9999989091891862, iteration: 50580
loss: 0.973361611366272,grad_norm: 0.9999991305493857, iteration: 50581
loss: 0.9880058765411377,grad_norm: 0.9999990248580677, iteration: 50582
loss: 0.9838686585426331,grad_norm: 0.999999252348455, iteration: 50583
loss: 1.0249706506729126,grad_norm: 0.9942082435652416, iteration: 50584
loss: 1.0356676578521729,grad_norm: 0.998274779077971, iteration: 50585
loss: 0.9929032921791077,grad_norm: 0.9999992306620109, iteration: 50586
loss: 0.9654673337936401,grad_norm: 0.9999991259273615, iteration: 50587
loss: 1.0092335939407349,grad_norm: 0.9999994218253432, iteration: 50588
loss: 0.974199116230011,grad_norm: 0.9330243992236013, iteration: 50589
loss: 1.0074357986450195,grad_norm: 0.9999991252233896, iteration: 50590
loss: 1.0070158243179321,grad_norm: 0.9491788298402539, iteration: 50591
loss: 0.95499587059021,grad_norm: 0.9427811801967703, iteration: 50592
loss: 0.9636155962944031,grad_norm: 0.9999991360043303, iteration: 50593
loss: 0.9409582018852234,grad_norm: 0.8773104947477202, iteration: 50594
loss: 1.045223355293274,grad_norm: 0.9999995415604754, iteration: 50595
loss: 1.0415462255477905,grad_norm: 0.9368591032257103, iteration: 50596
loss: 0.9773671627044678,grad_norm: 0.9102251779977806, iteration: 50597
loss: 1.0126875638961792,grad_norm: 0.999999341412967, iteration: 50598
loss: 1.013903021812439,grad_norm: 0.9999991196797935, iteration: 50599
loss: 1.0467660427093506,grad_norm: 0.9999992274000351, iteration: 50600
loss: 1.1103209257125854,grad_norm: 0.9999998124425383, iteration: 50601
loss: 1.005616307258606,grad_norm: 0.9999991666854744, iteration: 50602
loss: 0.9871002435684204,grad_norm: 0.9404257655109289, iteration: 50603
loss: 1.020550012588501,grad_norm: 0.92362227488109, iteration: 50604
loss: 1.0098227262496948,grad_norm: 0.8069203471104479, iteration: 50605
loss: 1.0268439054489136,grad_norm: 0.9766535339562867, iteration: 50606
loss: 1.010596752166748,grad_norm: 0.9999992026665043, iteration: 50607
loss: 1.0199583768844604,grad_norm: 0.9999991352652418, iteration: 50608
loss: 0.9982242584228516,grad_norm: 0.9999992731539096, iteration: 50609
loss: 1.0046991109848022,grad_norm: 0.9999989806717622, iteration: 50610
loss: 0.9774121642112732,grad_norm: 0.9999992457824655, iteration: 50611
loss: 1.0661733150482178,grad_norm: 0.999999317014135, iteration: 50612
loss: 1.0382332801818848,grad_norm: 0.999999177771055, iteration: 50613
loss: 1.0235844850540161,grad_norm: 0.9563214409833446, iteration: 50614
loss: 1.0284713506698608,grad_norm: 0.9999991697879866, iteration: 50615
loss: 0.982668399810791,grad_norm: 0.9257926697343659, iteration: 50616
loss: 1.013451099395752,grad_norm: 0.9460670763697068, iteration: 50617
loss: 0.9871886372566223,grad_norm: 0.9999991217719796, iteration: 50618
loss: 0.9850283861160278,grad_norm: 0.95010586835612, iteration: 50619
loss: 1.012006402015686,grad_norm: 0.9999994168347179, iteration: 50620
loss: 1.0292809009552002,grad_norm: 0.976739262849111, iteration: 50621
loss: 1.0177968740463257,grad_norm: 0.8805045798510658, iteration: 50622
loss: 1.000630497932434,grad_norm: 0.9395867586539026, iteration: 50623
loss: 0.9882645010948181,grad_norm: 0.9999990884221087, iteration: 50624
loss: 0.9873819351196289,grad_norm: 0.7953177492130863, iteration: 50625
loss: 1.0621984004974365,grad_norm: 0.9999993153753329, iteration: 50626
loss: 0.9923537969589233,grad_norm: 0.7979242739753343, iteration: 50627
loss: 1.008878469467163,grad_norm: 0.9999991485957226, iteration: 50628
loss: 0.9913396835327148,grad_norm: 0.9910081081229807, iteration: 50629
loss: 0.9649714827537537,grad_norm: 0.9999990298942675, iteration: 50630
loss: 1.0021299123764038,grad_norm: 0.9999989726869735, iteration: 50631
loss: 1.0069113969802856,grad_norm: 0.9761299420892172, iteration: 50632
loss: 1.0069185495376587,grad_norm: 0.9999991036885907, iteration: 50633
loss: 0.9809102416038513,grad_norm: 0.9086091357969515, iteration: 50634
loss: 1.0237596035003662,grad_norm: 0.9999993205650753, iteration: 50635
loss: 0.9713498950004578,grad_norm: 0.9999991392594557, iteration: 50636
loss: 1.031504511833191,grad_norm: 0.999999532788512, iteration: 50637
loss: 1.0248723030090332,grad_norm: 0.999999417352989, iteration: 50638
loss: 0.981634795665741,grad_norm: 0.9999990843478189, iteration: 50639
loss: 0.9952495694160461,grad_norm: 0.8943524368019201, iteration: 50640
loss: 1.015833854675293,grad_norm: 0.9999990159262604, iteration: 50641
loss: 1.0020424127578735,grad_norm: 0.9314627804365071, iteration: 50642
loss: 1.0467561483383179,grad_norm: 0.8952122896970618, iteration: 50643
loss: 1.0236879587173462,grad_norm: 0.9513318451660674, iteration: 50644
loss: 1.0168887376785278,grad_norm: 0.8180710200261784, iteration: 50645
loss: 1.0208967924118042,grad_norm: 0.9999991540203819, iteration: 50646
loss: 0.9965315461158752,grad_norm: 0.9951512035451134, iteration: 50647
loss: 0.9857003092765808,grad_norm: 0.9550108993895738, iteration: 50648
loss: 0.9893932342529297,grad_norm: 0.9875120112413689, iteration: 50649
loss: 0.9977914690971375,grad_norm: 0.9027610261014511, iteration: 50650
loss: 1.062082052230835,grad_norm: 0.990475385150263, iteration: 50651
loss: 0.9937217235565186,grad_norm: 0.9222348882878408, iteration: 50652
loss: 1.0432511568069458,grad_norm: 0.9182668862038771, iteration: 50653
loss: 1.0277680158615112,grad_norm: 0.9743507285727361, iteration: 50654
loss: 1.005847692489624,grad_norm: 0.99999987329043, iteration: 50655
loss: 1.0093488693237305,grad_norm: 0.9999996643240464, iteration: 50656
loss: 1.043764352798462,grad_norm: 0.999999011410127, iteration: 50657
loss: 1.0954482555389404,grad_norm: 0.9999995858313774, iteration: 50658
loss: 1.0328946113586426,grad_norm: 0.9999991265004265, iteration: 50659
loss: 1.0098072290420532,grad_norm: 0.8877556322868663, iteration: 50660
loss: 0.9840071201324463,grad_norm: 0.9999995742641642, iteration: 50661
loss: 1.0367522239685059,grad_norm: 0.999999153304958, iteration: 50662
loss: 0.9873611330986023,grad_norm: 0.9999990796985392, iteration: 50663
loss: 1.0191655158996582,grad_norm: 0.9999991484487363, iteration: 50664
loss: 1.031339168548584,grad_norm: 0.9999993034651846, iteration: 50665
loss: 0.990505576133728,grad_norm: 0.9999991153027207, iteration: 50666
loss: 0.998115599155426,grad_norm: 0.9999991912868338, iteration: 50667
loss: 0.952639102935791,grad_norm: 0.9999991272748895, iteration: 50668
loss: 1.011770248413086,grad_norm: 0.9868464169814513, iteration: 50669
loss: 1.0420902967453003,grad_norm: 0.9999992766579254, iteration: 50670
loss: 1.0437268018722534,grad_norm: 0.9999990926801758, iteration: 50671
loss: 1.0183331966400146,grad_norm: 0.932924192846566, iteration: 50672
loss: 1.0162698030471802,grad_norm: 0.9999992971810439, iteration: 50673
loss: 1.0169906616210938,grad_norm: 0.9308961338119556, iteration: 50674
loss: 1.016316294670105,grad_norm: 0.9018014279902434, iteration: 50675
loss: 1.1319122314453125,grad_norm: 0.9999992924715095, iteration: 50676
loss: 1.009011149406433,grad_norm: 0.9999995001193459, iteration: 50677
loss: 1.0246325731277466,grad_norm: 0.9999990190587188, iteration: 50678
loss: 1.013201117515564,grad_norm: 0.9999991966479771, iteration: 50679
loss: 1.0261588096618652,grad_norm: 0.9999991977145056, iteration: 50680
loss: 0.9844132661819458,grad_norm: 0.9775206082818809, iteration: 50681
loss: 1.029140830039978,grad_norm: 0.9999994480019276, iteration: 50682
loss: 1.0184358358383179,grad_norm: 0.9999991320147946, iteration: 50683
loss: 0.9779726266860962,grad_norm: 0.9483973006894487, iteration: 50684
loss: 1.0293824672698975,grad_norm: 0.9999990180825621, iteration: 50685
loss: 1.0241026878356934,grad_norm: 0.816732329885664, iteration: 50686
loss: 0.9777007102966309,grad_norm: 0.9560219731578009, iteration: 50687
loss: 1.0498926639556885,grad_norm: 0.9999995682147322, iteration: 50688
loss: 1.0071113109588623,grad_norm: 0.9716605212941689, iteration: 50689
loss: 0.9819967150688171,grad_norm: 0.9999992521647791, iteration: 50690
loss: 1.0351648330688477,grad_norm: 0.9584200538713156, iteration: 50691
loss: 1.0277767181396484,grad_norm: 0.9999992614382952, iteration: 50692
loss: 0.9850677251815796,grad_norm: 0.9985818352426551, iteration: 50693
loss: 0.9904283285140991,grad_norm: 0.9999991052904619, iteration: 50694
loss: 1.0387303829193115,grad_norm: 0.9999992269638512, iteration: 50695
loss: 0.9928915500640869,grad_norm: 0.9999990378072044, iteration: 50696
loss: 1.0129368305206299,grad_norm: 0.9622451800672975, iteration: 50697
loss: 0.9876060485839844,grad_norm: 0.9999990613675798, iteration: 50698
loss: 1.0203299522399902,grad_norm: 0.9965439887037139, iteration: 50699
loss: 1.0108829736709595,grad_norm: 0.9999992168760483, iteration: 50700
loss: 1.052068829536438,grad_norm: 0.999999824099192, iteration: 50701
loss: 1.0212335586547852,grad_norm: 0.9289088954856861, iteration: 50702
loss: 1.0046613216400146,grad_norm: 0.8615707485818979, iteration: 50703
loss: 0.9918960928916931,grad_norm: 0.9466960346442184, iteration: 50704
loss: 1.0211421251296997,grad_norm: 0.9999992850989573, iteration: 50705
loss: 1.029668927192688,grad_norm: 0.9317990215687083, iteration: 50706
loss: 0.9984153509140015,grad_norm: 0.9999991541730856, iteration: 50707
loss: 1.0066345930099487,grad_norm: 0.9999993165185586, iteration: 50708
loss: 1.031916618347168,grad_norm: 0.900142855743532, iteration: 50709
loss: 1.0219935178756714,grad_norm: 0.9932402466039996, iteration: 50710
loss: 1.0065265893936157,grad_norm: 0.9999991398406776, iteration: 50711
loss: 1.0277618169784546,grad_norm: 0.9999991492055755, iteration: 50712
loss: 0.9786060452461243,grad_norm: 0.9999992824763354, iteration: 50713
loss: 0.9872502088546753,grad_norm: 0.9999991422717124, iteration: 50714
loss: 1.0333762168884277,grad_norm: 0.9999991244536025, iteration: 50715
loss: 1.0034608840942383,grad_norm: 0.9886378343142865, iteration: 50716
loss: 0.9838424324989319,grad_norm: 0.9168107038634107, iteration: 50717
loss: 0.9845657348632812,grad_norm: 0.9999992639729404, iteration: 50718
loss: 1.0170890092849731,grad_norm: 0.9999992183549811, iteration: 50719
loss: 1.0256677865982056,grad_norm: 0.9999990549013051, iteration: 50720
loss: 1.0495809316635132,grad_norm: 0.9999991407413032, iteration: 50721
loss: 0.9968766570091248,grad_norm: 0.9758836240275004, iteration: 50722
loss: 0.9916788339614868,grad_norm: 0.9999991892214906, iteration: 50723
loss: 1.001071572303772,grad_norm: 0.8301248425929865, iteration: 50724
loss: 1.0110740661621094,grad_norm: 0.8527118886822236, iteration: 50725
loss: 1.011937141418457,grad_norm: 0.9999991075092848, iteration: 50726
loss: 1.0082706212997437,grad_norm: 0.9275989964460589, iteration: 50727
loss: 1.000506043434143,grad_norm: 0.9999990740931968, iteration: 50728
loss: 1.0269947052001953,grad_norm: 0.9999992614817468, iteration: 50729
loss: 1.0016065835952759,grad_norm: 0.9420933297353843, iteration: 50730
loss: 1.0300036668777466,grad_norm: 0.744468026202593, iteration: 50731
loss: 1.010990023612976,grad_norm: 0.9209382614389928, iteration: 50732
loss: 1.0367437601089478,grad_norm: 0.9999989898244059, iteration: 50733
loss: 0.9679162502288818,grad_norm: 0.9999990440755064, iteration: 50734
loss: 1.0109829902648926,grad_norm: 0.8467288342814266, iteration: 50735
loss: 0.9950037002563477,grad_norm: 0.8972171382444785, iteration: 50736
loss: 1.023169994354248,grad_norm: 0.9030508063071309, iteration: 50737
loss: 1.0180885791778564,grad_norm: 0.9999989995425547, iteration: 50738
loss: 0.9970656633377075,grad_norm: 0.9335016979610994, iteration: 50739
loss: 1.0073720216751099,grad_norm: 0.999999088223782, iteration: 50740
loss: 0.9964255690574646,grad_norm: 0.9316208493655784, iteration: 50741
loss: 1.0253113508224487,grad_norm: 0.9882687732932409, iteration: 50742
loss: 1.0029844045639038,grad_norm: 0.9999993066725785, iteration: 50743
loss: 1.0297050476074219,grad_norm: 0.9999993805109412, iteration: 50744
loss: 1.022960901260376,grad_norm: 0.9999991680289698, iteration: 50745
loss: 0.9807722568511963,grad_norm: 0.9999991723434539, iteration: 50746
loss: 1.0118827819824219,grad_norm: 0.9999992588460291, iteration: 50747
loss: 0.9768736362457275,grad_norm: 0.9155835529048681, iteration: 50748
loss: 1.02157461643219,grad_norm: 0.8940038943550729, iteration: 50749
loss: 0.9971010088920593,grad_norm: 0.9999992818021864, iteration: 50750
loss: 1.0346646308898926,grad_norm: 0.9999994785575749, iteration: 50751
loss: 0.9891974329948425,grad_norm: 0.9999991507679985, iteration: 50752
loss: 1.0233346223831177,grad_norm: 0.9790838828934194, iteration: 50753
loss: 1.0181026458740234,grad_norm: 0.9719141049035844, iteration: 50754
loss: 1.0227776765823364,grad_norm: 0.9999991038906759, iteration: 50755
loss: 0.9666276574134827,grad_norm: 0.9999991002112147, iteration: 50756
loss: 1.0114816427230835,grad_norm: 0.9999992447532995, iteration: 50757
loss: 0.9823499321937561,grad_norm: 0.9999991648210953, iteration: 50758
loss: 0.981321394443512,grad_norm: 0.8623392005758947, iteration: 50759
loss: 0.9748901128768921,grad_norm: 0.999999253544663, iteration: 50760
loss: 1.009455919265747,grad_norm: 0.9999991319878633, iteration: 50761
loss: 0.9936391711235046,grad_norm: 0.9999991461073042, iteration: 50762
loss: 1.0003917217254639,grad_norm: 0.9213303225808162, iteration: 50763
loss: 1.0271501541137695,grad_norm: 0.9999991857376274, iteration: 50764
loss: 1.0368764400482178,grad_norm: 0.9999991310557684, iteration: 50765
loss: 0.9661769270896912,grad_norm: 0.980902645236155, iteration: 50766
loss: 1.007067322731018,grad_norm: 0.9999990403793407, iteration: 50767
loss: 0.992422878742218,grad_norm: 0.9072296858977227, iteration: 50768
loss: 0.9866300225257874,grad_norm: 0.9254369771643858, iteration: 50769
loss: 0.9754370450973511,grad_norm: 0.9999991195151178, iteration: 50770
loss: 1.0077770948410034,grad_norm: 0.9822416079202341, iteration: 50771
loss: 1.0496504306793213,grad_norm: 0.9999994756902421, iteration: 50772
loss: 0.9676018357276917,grad_norm: 0.9508310419717536, iteration: 50773
loss: 0.983174204826355,grad_norm: 0.8679334831177413, iteration: 50774
loss: 1.0219916105270386,grad_norm: 0.9528401001345294, iteration: 50775
loss: 0.9656051397323608,grad_norm: 0.96763647624394, iteration: 50776
loss: 0.9837955832481384,grad_norm: 0.999999281004711, iteration: 50777
loss: 1.0243511199951172,grad_norm: 0.9999992766784548, iteration: 50778
loss: 0.9648176431655884,grad_norm: 0.9999994976507094, iteration: 50779
loss: 1.0350722074508667,grad_norm: 0.9999991798361878, iteration: 50780
loss: 1.0326416492462158,grad_norm: 0.9999991518853547, iteration: 50781
loss: 0.9523921608924866,grad_norm: 0.9999992000932788, iteration: 50782
loss: 0.9969322085380554,grad_norm: 0.8592689314098862, iteration: 50783
loss: 1.021716833114624,grad_norm: 0.9479134537161169, iteration: 50784
loss: 0.9971497058868408,grad_norm: 0.999999169659114, iteration: 50785
loss: 1.0083435773849487,grad_norm: 0.9999992391534298, iteration: 50786
loss: 1.0152539014816284,grad_norm: 0.8937416331155066, iteration: 50787
loss: 0.9823330640792847,grad_norm: 0.9999990935678067, iteration: 50788
loss: 1.015695571899414,grad_norm: 0.7878111081849265, iteration: 50789
loss: 1.0185856819152832,grad_norm: 0.9999997898420117, iteration: 50790
loss: 0.9931603670120239,grad_norm: 0.8844720798295951, iteration: 50791
loss: 1.0062220096588135,grad_norm: 0.999998988969835, iteration: 50792
loss: 1.0008949041366577,grad_norm: 0.9999990519411819, iteration: 50793
loss: 0.9805880784988403,grad_norm: 0.9999991955312353, iteration: 50794
loss: 1.0250234603881836,grad_norm: 0.8159631061104484, iteration: 50795
loss: 1.0102418661117554,grad_norm: 0.9999990862323809, iteration: 50796
loss: 0.9997164607048035,grad_norm: 0.9897672549509191, iteration: 50797
loss: 1.0008435249328613,grad_norm: 0.9999996016093441, iteration: 50798
loss: 1.0197094678878784,grad_norm: 0.9597401467158373, iteration: 50799
loss: 0.986224353313446,grad_norm: 0.9479087005151916, iteration: 50800
loss: 1.0221747159957886,grad_norm: 0.9267908330511198, iteration: 50801
loss: 0.9951035380363464,grad_norm: 0.9439049797768541, iteration: 50802
loss: 1.0301127433776855,grad_norm: 0.9999991960667248, iteration: 50803
loss: 0.9883943796157837,grad_norm: 0.99999909019625, iteration: 50804
loss: 1.001800775527954,grad_norm: 0.9999990952354514, iteration: 50805
loss: 1.0984456539154053,grad_norm: 0.9999996766059629, iteration: 50806
loss: 0.9889077544212341,grad_norm: 0.9548174558216833, iteration: 50807
loss: 1.0048506259918213,grad_norm: 0.9253270432997296, iteration: 50808
loss: 0.9927902817726135,grad_norm: 0.9999989235404877, iteration: 50809
loss: 0.9815434813499451,grad_norm: 0.9999991457116502, iteration: 50810
loss: 0.9959219098091125,grad_norm: 0.8531214091721728, iteration: 50811
loss: 0.9751349091529846,grad_norm: 0.999999258394133, iteration: 50812
loss: 1.0232760906219482,grad_norm: 0.9999990274743433, iteration: 50813
loss: 1.1028156280517578,grad_norm: 0.9999996254772248, iteration: 50814
loss: 0.993780791759491,grad_norm: 0.8580718050450197, iteration: 50815
loss: 1.0336936712265015,grad_norm: 0.8344008027304808, iteration: 50816
loss: 1.0148729085922241,grad_norm: 0.9999988988501715, iteration: 50817
loss: 0.9930626153945923,grad_norm: 0.9999989982867339, iteration: 50818
loss: 1.0049892663955688,grad_norm: 0.979535105210976, iteration: 50819
loss: 0.9912664294242859,grad_norm: 0.9999990318332906, iteration: 50820
loss: 1.007964015007019,grad_norm: 0.9999996024933017, iteration: 50821
loss: 1.0149060487747192,grad_norm: 0.99999900172641, iteration: 50822
loss: 0.9975406527519226,grad_norm: 0.9373075100926529, iteration: 50823
loss: 0.9912387728691101,grad_norm: 0.9271393714206463, iteration: 50824
loss: 0.9916119575500488,grad_norm: 0.8512048452765236, iteration: 50825
loss: 0.9967620968818665,grad_norm: 0.9716931782731367, iteration: 50826
loss: 1.1181914806365967,grad_norm: 0.9999999677597913, iteration: 50827
loss: 0.9740201830863953,grad_norm: 0.999999163312134, iteration: 50828
loss: 0.9716870784759521,grad_norm: 0.9521717883084045, iteration: 50829
loss: 1.0042235851287842,grad_norm: 0.9999989886221509, iteration: 50830
loss: 1.0280994176864624,grad_norm: 0.9999990647309129, iteration: 50831
loss: 1.0151770114898682,grad_norm: 0.9999997740076852, iteration: 50832
loss: 1.0094302892684937,grad_norm: 0.9999991328760419, iteration: 50833
loss: 1.0036897659301758,grad_norm: 0.9999990096010216, iteration: 50834
loss: 1.0059868097305298,grad_norm: 0.9999991552932984, iteration: 50835
loss: 0.9820446372032166,grad_norm: 0.9999990478710666, iteration: 50836
loss: 0.9825267195701599,grad_norm: 0.9658759154163846, iteration: 50837
loss: 0.9894019961357117,grad_norm: 0.9999992031953006, iteration: 50838
loss: 1.0269057750701904,grad_norm: 0.9999991201817175, iteration: 50839
loss: 0.9804030060768127,grad_norm: 0.9381921484981649, iteration: 50840
loss: 1.0227712392807007,grad_norm: 0.9999991479271293, iteration: 50841
loss: 1.0880117416381836,grad_norm: 0.9999993183700912, iteration: 50842
loss: 1.015425205230713,grad_norm: 0.9999990235509838, iteration: 50843
loss: 1.0350306034088135,grad_norm: 0.9445658790949344, iteration: 50844
loss: 0.9853466153144836,grad_norm: 0.9999990487481337, iteration: 50845
loss: 0.9912169575691223,grad_norm: 0.9999994403262189, iteration: 50846
loss: 1.0062404870986938,grad_norm: 0.8632458673084094, iteration: 50847
loss: 0.9664133191108704,grad_norm: 0.9431738735767254, iteration: 50848
loss: 0.9998709559440613,grad_norm: 0.9999991075646794, iteration: 50849
loss: 0.9976676106452942,grad_norm: 0.9999991016571034, iteration: 50850
loss: 0.9625059962272644,grad_norm: 0.9999993425327766, iteration: 50851
loss: 0.9915827512741089,grad_norm: 0.9999992448174702, iteration: 50852
loss: 0.9968213438987732,grad_norm: 0.9198513984419969, iteration: 50853
loss: 0.9848913550376892,grad_norm: 0.9676674560315296, iteration: 50854
loss: 1.0096747875213623,grad_norm: 0.9999990653921677, iteration: 50855
loss: 1.0454401969909668,grad_norm: 0.9761216512902672, iteration: 50856
loss: 1.003669023513794,grad_norm: 0.9221957954608166, iteration: 50857
loss: 1.0287840366363525,grad_norm: 0.9999992412993682, iteration: 50858
loss: 0.9875900149345398,grad_norm: 0.9357371643048592, iteration: 50859
loss: 1.0271413326263428,grad_norm: 0.9999992822583439, iteration: 50860
loss: 0.9865046739578247,grad_norm: 0.9999991969506868, iteration: 50861
loss: 1.0448507070541382,grad_norm: 0.9999991029065805, iteration: 50862
loss: 1.036359190940857,grad_norm: 0.9458304289161087, iteration: 50863
loss: 1.0079294443130493,grad_norm: 0.9705673598046405, iteration: 50864
loss: 1.0013868808746338,grad_norm: 0.9367182181774968, iteration: 50865
loss: 1.0101609230041504,grad_norm: 0.9999993048562869, iteration: 50866
loss: 0.9712792038917542,grad_norm: 0.999999140931214, iteration: 50867
loss: 0.9957848787307739,grad_norm: 0.999999064772347, iteration: 50868
loss: 0.9602615833282471,grad_norm: 0.999999125329752, iteration: 50869
loss: 0.998663604259491,grad_norm: 0.7537236077114654, iteration: 50870
loss: 1.0095834732055664,grad_norm: 0.9242685551387438, iteration: 50871
loss: 1.028352975845337,grad_norm: 0.9999989880825447, iteration: 50872
loss: 0.9865556359291077,grad_norm: 0.9999992847407335, iteration: 50873
loss: 0.9862784743309021,grad_norm: 0.9358998660801551, iteration: 50874
loss: 0.992051899433136,grad_norm: 0.9272114893180426, iteration: 50875
loss: 0.9801206588745117,grad_norm: 0.9999992259362195, iteration: 50876
loss: 1.0083569288253784,grad_norm: 0.9999992516625353, iteration: 50877
loss: 0.993772029876709,grad_norm: 0.9179296838026271, iteration: 50878
loss: 1.0160281658172607,grad_norm: 0.9999990940810489, iteration: 50879
loss: 1.016875147819519,grad_norm: 0.9999995194972269, iteration: 50880
loss: 1.0168730020523071,grad_norm: 0.84236476332306, iteration: 50881
loss: 1.0106266736984253,grad_norm: 0.9999989770422761, iteration: 50882
loss: 1.0405583381652832,grad_norm: 0.9759222401549508, iteration: 50883
loss: 0.9747893214225769,grad_norm: 0.9332342859632257, iteration: 50884
loss: 0.9625104665756226,grad_norm: 0.9999990270957763, iteration: 50885
loss: 0.9989545345306396,grad_norm: 0.999999030313443, iteration: 50886
loss: 0.99198317527771,grad_norm: 0.7252794551053326, iteration: 50887
loss: 1.0180537700653076,grad_norm: 0.99999908721112, iteration: 50888
loss: 1.0571542978286743,grad_norm: 0.9499568676724331, iteration: 50889
loss: 0.9814887046813965,grad_norm: 0.987809251523793, iteration: 50890
loss: 1.0303683280944824,grad_norm: 0.9538359322571356, iteration: 50891
loss: 1.0236644744873047,grad_norm: 0.9765951999255383, iteration: 50892
loss: 1.0256367921829224,grad_norm: 0.9314943244179367, iteration: 50893
loss: 1.0002424716949463,grad_norm: 0.9434494481958575, iteration: 50894
loss: 1.012850046157837,grad_norm: 0.9999992425331541, iteration: 50895
loss: 0.9981051087379456,grad_norm: 0.9999991417152093, iteration: 50896
loss: 0.9998039603233337,grad_norm: 0.8960181230868126, iteration: 50897
loss: 0.9707040190696716,grad_norm: 0.9404925431112038, iteration: 50898
loss: 1.0176219940185547,grad_norm: 0.928041657155241, iteration: 50899
loss: 1.034934401512146,grad_norm: 0.8897227549310658, iteration: 50900
loss: 1.0186455249786377,grad_norm: 0.9999989682234794, iteration: 50901
loss: 0.9892604947090149,grad_norm: 0.9999991627054315, iteration: 50902
loss: 0.9858101010322571,grad_norm: 0.9649677272415983, iteration: 50903
loss: 1.0330135822296143,grad_norm: 0.9999991896047982, iteration: 50904
loss: 1.001330852508545,grad_norm: 0.9999992730422989, iteration: 50905
loss: 0.9891204833984375,grad_norm: 0.9999990488045186, iteration: 50906
loss: 1.0092663764953613,grad_norm: 0.9999990857307097, iteration: 50907
loss: 1.0208934545516968,grad_norm: 0.9999992393023488, iteration: 50908
loss: 0.9772692918777466,grad_norm: 0.9999990544140742, iteration: 50909
loss: 0.9911707639694214,grad_norm: 0.9999990192251543, iteration: 50910
loss: 1.0311704874038696,grad_norm: 0.968898701956915, iteration: 50911
loss: 0.9929625391960144,grad_norm: 0.999999160981917, iteration: 50912
loss: 0.9722353219985962,grad_norm: 0.9999992593131037, iteration: 50913
loss: 0.983296275138855,grad_norm: 0.9488635839840424, iteration: 50914
loss: 0.9962146878242493,grad_norm: 0.9472538637946609, iteration: 50915
loss: 1.0205481052398682,grad_norm: 0.9234934090408047, iteration: 50916
loss: 1.0515676736831665,grad_norm: 0.9999991315712642, iteration: 50917
loss: 1.0235029458999634,grad_norm: 0.9999994118132689, iteration: 50918
loss: 1.0109177827835083,grad_norm: 0.9455168884667193, iteration: 50919
loss: 1.0123984813690186,grad_norm: 0.9747083282605171, iteration: 50920
loss: 1.0045815706253052,grad_norm: 0.8927088279525133, iteration: 50921
loss: 0.9616435766220093,grad_norm: 0.9999990708809546, iteration: 50922
loss: 1.0117027759552002,grad_norm: 0.956999224660403, iteration: 50923
loss: 0.9961755871772766,grad_norm: 0.9825114785125958, iteration: 50924
loss: 1.0042210817337036,grad_norm: 0.9999990337606242, iteration: 50925
loss: 1.0174258947372437,grad_norm: 0.9999991772481542, iteration: 50926
loss: 0.9935736656188965,grad_norm: 0.9999991838439124, iteration: 50927
loss: 0.9744971394538879,grad_norm: 0.9999990639271915, iteration: 50928
loss: 1.000407099723816,grad_norm: 0.8155562682960037, iteration: 50929
loss: 0.9970716834068298,grad_norm: 0.9999990707664672, iteration: 50930
loss: 1.0165810585021973,grad_norm: 0.8551747762456949, iteration: 50931
loss: 1.0114686489105225,grad_norm: 0.9999991442652819, iteration: 50932
loss: 0.9963249564170837,grad_norm: 0.9503493288415386, iteration: 50933
loss: 0.9894102215766907,grad_norm: 0.926542221788322, iteration: 50934
loss: 0.9980795979499817,grad_norm: 0.9999991814264317, iteration: 50935
loss: 1.0067600011825562,grad_norm: 0.9999991062109209, iteration: 50936
loss: 0.9916188716888428,grad_norm: 0.9999992037539011, iteration: 50937
loss: 0.9686031341552734,grad_norm: 0.9356429304470854, iteration: 50938
loss: 0.9931397438049316,grad_norm: 0.9861300191561942, iteration: 50939
loss: 0.9832648634910583,grad_norm: 0.8039090459938443, iteration: 50940
loss: 1.0154913663864136,grad_norm: 0.9029493997273548, iteration: 50941
loss: 0.9614023566246033,grad_norm: 0.9349209270918755, iteration: 50942
loss: 0.9988937377929688,grad_norm: 0.8914058946854692, iteration: 50943
loss: 1.0221644639968872,grad_norm: 0.9999991416759427, iteration: 50944
loss: 1.0099055767059326,grad_norm: 0.9999991587253374, iteration: 50945
loss: 1.0194913148880005,grad_norm: 0.8946501875232534, iteration: 50946
loss: 0.9552596807479858,grad_norm: 0.9999990773980011, iteration: 50947
loss: 0.9663796424865723,grad_norm: 0.8267596330457015, iteration: 50948
loss: 0.9590833783149719,grad_norm: 0.9750254732412907, iteration: 50949
loss: 1.0282599925994873,grad_norm: 0.9999990121838572, iteration: 50950
loss: 0.982973575592041,grad_norm: 0.888287190549847, iteration: 50951
loss: 1.011116862297058,grad_norm: 0.9999992514755467, iteration: 50952
loss: 1.022384762763977,grad_norm: 0.9999990958217339, iteration: 50953
loss: 0.968582272529602,grad_norm: 0.9999990752590714, iteration: 50954
loss: 1.0571954250335693,grad_norm: 0.9999991415600741, iteration: 50955
loss: 1.0132545232772827,grad_norm: 0.9999991903523704, iteration: 50956
loss: 1.009443759918213,grad_norm: 0.9507912694261853, iteration: 50957
loss: 0.9911843538284302,grad_norm: 0.9981933426263028, iteration: 50958
loss: 1.005297064781189,grad_norm: 0.9036612626670364, iteration: 50959
loss: 0.9957874417304993,grad_norm: 0.9999999527553926, iteration: 50960
loss: 0.9666756987571716,grad_norm: 0.8351238023800461, iteration: 50961
loss: 1.010974645614624,grad_norm: 0.9999989711304778, iteration: 50962
loss: 1.078590750694275,grad_norm: 0.9999991692492847, iteration: 50963
loss: 1.0224586725234985,grad_norm: 0.9999990009309342, iteration: 50964
loss: 1.0000059604644775,grad_norm: 0.9999991129682091, iteration: 50965
loss: 1.0007295608520508,grad_norm: 0.9454962389314822, iteration: 50966
loss: 0.9846867918968201,grad_norm: 0.9999990685672837, iteration: 50967
loss: 1.0441704988479614,grad_norm: 0.9999992918097049, iteration: 50968
loss: 0.9839978814125061,grad_norm: 0.989517864892351, iteration: 50969
loss: 0.9575772881507874,grad_norm: 0.9999990808457967, iteration: 50970
loss: 1.0548228025436401,grad_norm: 0.9999995083451477, iteration: 50971
loss: 0.9693033695220947,grad_norm: 0.9188646587193043, iteration: 50972
loss: 0.9773160815238953,grad_norm: 0.9999990888170649, iteration: 50973
loss: 0.9929670691490173,grad_norm: 0.9376664504564131, iteration: 50974
loss: 0.9807591438293457,grad_norm: 0.8790179544614645, iteration: 50975
loss: 1.0391558408737183,grad_norm: 0.9999991801993037, iteration: 50976
loss: 1.0024546384811401,grad_norm: 0.9955774532254346, iteration: 50977
loss: 0.967248260974884,grad_norm: 0.9999992208999374, iteration: 50978
loss: 1.0071440935134888,grad_norm: 0.7754835670220604, iteration: 50979
loss: 1.0178133249282837,grad_norm: 0.9737630295880794, iteration: 50980
loss: 1.0305159091949463,grad_norm: 0.9519592026390662, iteration: 50981
loss: 1.0018434524536133,grad_norm: 0.9999991881351216, iteration: 50982
loss: 0.9826481342315674,grad_norm: 0.9999990441256391, iteration: 50983
loss: 1.0189440250396729,grad_norm: 0.9500843720408073, iteration: 50984
loss: 0.9997762441635132,grad_norm: 0.9999995380182306, iteration: 50985
loss: 1.0311963558197021,grad_norm: 0.999999091229109, iteration: 50986
loss: 0.9970792531967163,grad_norm: 0.9999989731182164, iteration: 50987
loss: 1.0070509910583496,grad_norm: 0.9999990532465631, iteration: 50988
loss: 0.9865805506706238,grad_norm: 0.9940233013043316, iteration: 50989
loss: 0.9764127731323242,grad_norm: 0.8747906978543188, iteration: 50990
loss: 0.9895722270011902,grad_norm: 0.9198293909585027, iteration: 50991
loss: 0.9882683157920837,grad_norm: 0.9999991307634356, iteration: 50992
loss: 0.9946621656417847,grad_norm: 0.99999912744247, iteration: 50993
loss: 0.960911750793457,grad_norm: 0.9999991883848839, iteration: 50994
loss: 0.9905040264129639,grad_norm: 0.970508500715194, iteration: 50995
loss: 0.9607645273208618,grad_norm: 0.9725258173182965, iteration: 50996
loss: 0.9966857433319092,grad_norm: 0.999998983674286, iteration: 50997
loss: 0.9801386594772339,grad_norm: 0.9727538360411646, iteration: 50998
loss: 0.9970081448554993,grad_norm: 0.9999993299767386, iteration: 50999
loss: 1.082849383354187,grad_norm: 0.9999994175679845, iteration: 51000
loss: 1.006541132926941,grad_norm: 0.8237263393368205, iteration: 51001
loss: 0.9958584904670715,grad_norm: 0.9606342340719431, iteration: 51002
loss: 1.020293951034546,grad_norm: 0.9999990954957646, iteration: 51003
loss: 1.0158427953720093,grad_norm: 0.9999990418681654, iteration: 51004
loss: 1.0179044008255005,grad_norm: 0.999999205377624, iteration: 51005
loss: 1.0081504583358765,grad_norm: 0.9833968370087034, iteration: 51006
loss: 0.968690812587738,grad_norm: 0.9886076174742172, iteration: 51007
loss: 1.0695834159851074,grad_norm: 0.9999991550274752, iteration: 51008
loss: 0.9735074043273926,grad_norm: 0.9999990948710222, iteration: 51009
loss: 0.9928810596466064,grad_norm: 0.9999992534412241, iteration: 51010
loss: 0.9704004526138306,grad_norm: 0.9199827810815376, iteration: 51011
loss: 0.9842062592506409,grad_norm: 0.9950614580838134, iteration: 51012
loss: 0.9876806139945984,grad_norm: 0.9999989760712359, iteration: 51013
loss: 1.022148847579956,grad_norm: 0.9999990693723753, iteration: 51014
loss: 1.0185515880584717,grad_norm: 0.9563881316540265, iteration: 51015
loss: 1.0115121603012085,grad_norm: 0.907171778274064, iteration: 51016
loss: 1.0004562139511108,grad_norm: 0.9830404480053214, iteration: 51017
loss: 0.9633368849754333,grad_norm: 0.9999989683844304, iteration: 51018
loss: 1.0112507343292236,grad_norm: 0.9786650585173108, iteration: 51019
loss: 0.9880564212799072,grad_norm: 0.999999045270035, iteration: 51020
loss: 1.0035276412963867,grad_norm: 0.9999991732860651, iteration: 51021
loss: 0.9869927763938904,grad_norm: 0.8311484674973509, iteration: 51022
loss: 0.9861674904823303,grad_norm: 0.9769297105056846, iteration: 51023
loss: 0.9974680542945862,grad_norm: 0.9999993506884662, iteration: 51024
loss: 1.0334659814834595,grad_norm: 0.8576833993502375, iteration: 51025
loss: 1.003754734992981,grad_norm: 0.9999995382030237, iteration: 51026
loss: 0.9928662776947021,grad_norm: 0.9999992174749751, iteration: 51027
loss: 1.0194103717803955,grad_norm: 0.999999047629079, iteration: 51028
loss: 1.032604455947876,grad_norm: 0.9999992981355827, iteration: 51029
loss: 0.9638733863830566,grad_norm: 0.9999991695182125, iteration: 51030
loss: 1.0363144874572754,grad_norm: 0.9999992390890774, iteration: 51031
loss: 0.9751799702644348,grad_norm: 0.9622394000147996, iteration: 51032
loss: 0.9914992451667786,grad_norm: 0.999999138803909, iteration: 51033
loss: 1.005854845046997,grad_norm: 0.9792939284153093, iteration: 51034
loss: 1.083424687385559,grad_norm: 0.9999999373011103, iteration: 51035
loss: 1.0091640949249268,grad_norm: 0.9556562424943613, iteration: 51036
loss: 0.9500457048416138,grad_norm: 0.9426231577029879, iteration: 51037
loss: 1.042783498764038,grad_norm: 0.9999994837653762, iteration: 51038
loss: 0.973045289516449,grad_norm: 0.9900601385887211, iteration: 51039
loss: 1.0749051570892334,grad_norm: 0.9999990916236715, iteration: 51040
loss: 1.0286401510238647,grad_norm: 0.9119170489310566, iteration: 51041
loss: 1.005033016204834,grad_norm: 0.9053739356851156, iteration: 51042
loss: 0.9956195950508118,grad_norm: 0.9369886301567508, iteration: 51043
loss: 0.9924092888832092,grad_norm: 0.9999991465995881, iteration: 51044
loss: 1.0051538944244385,grad_norm: 0.999999288497504, iteration: 51045
loss: 1.0065690279006958,grad_norm: 0.8206222809335203, iteration: 51046
loss: 0.9951349496841431,grad_norm: 0.8405538104991486, iteration: 51047
loss: 1.0040242671966553,grad_norm: 0.9999991521852497, iteration: 51048
loss: 1.0017298460006714,grad_norm: 0.9061738741883507, iteration: 51049
loss: 1.0659401416778564,grad_norm: 0.9999992435216821, iteration: 51050
loss: 1.0020277500152588,grad_norm: 0.999999013479355, iteration: 51051
loss: 1.0072746276855469,grad_norm: 0.9649064029213905, iteration: 51052
loss: 1.008546233177185,grad_norm: 0.9753501708932142, iteration: 51053
loss: 0.995414137840271,grad_norm: 0.9208225459821742, iteration: 51054
loss: 1.0100568532943726,grad_norm: 0.90962092176349, iteration: 51055
loss: 1.0228315591812134,grad_norm: 0.9999990872149355, iteration: 51056
loss: 1.0302038192749023,grad_norm: 0.9999990801723644, iteration: 51057
loss: 1.0245026350021362,grad_norm: 0.9821648724992087, iteration: 51058
loss: 0.9788035154342651,grad_norm: 0.9999991699158531, iteration: 51059
loss: 1.018936276435852,grad_norm: 0.9999990780190454, iteration: 51060
loss: 0.9796004891395569,grad_norm: 0.9999991370650813, iteration: 51061
loss: 0.9892726540565491,grad_norm: 0.9999994341966547, iteration: 51062
loss: 1.0460126399993896,grad_norm: 0.9024170194457998, iteration: 51063
loss: 0.9850469827651978,grad_norm: 0.999999061036094, iteration: 51064
loss: 1.0210561752319336,grad_norm: 0.9999996204147705, iteration: 51065
loss: 0.9834488034248352,grad_norm: 0.9999991462655778, iteration: 51066
loss: 0.9945330023765564,grad_norm: 0.918301864677176, iteration: 51067
loss: 1.017456293106079,grad_norm: 0.9999994127098324, iteration: 51068
loss: 1.021024227142334,grad_norm: 0.9999992262919548, iteration: 51069
loss: 0.9869298934936523,grad_norm: 0.9999991347379446, iteration: 51070
loss: 0.9877011775970459,grad_norm: 0.9222408133323943, iteration: 51071
loss: 0.99947190284729,grad_norm: 0.9928643691247185, iteration: 51072
loss: 0.9866764545440674,grad_norm: 0.9999990605874708, iteration: 51073
loss: 1.045718789100647,grad_norm: 0.9066066963092358, iteration: 51074
loss: 1.003404140472412,grad_norm: 0.9999992413568263, iteration: 51075
loss: 1.0400221347808838,grad_norm: 0.9999996549734805, iteration: 51076
loss: 0.9757985472679138,grad_norm: 0.999999121049819, iteration: 51077
loss: 1.0062193870544434,grad_norm: 0.9211713930855304, iteration: 51078
loss: 0.9904359579086304,grad_norm: 0.9999992902375401, iteration: 51079
loss: 0.9874938726425171,grad_norm: 0.9999991341218059, iteration: 51080
loss: 1.0038005113601685,grad_norm: 0.9491680286486289, iteration: 51081
loss: 0.9704126119613647,grad_norm: 0.901200012164863, iteration: 51082
loss: 1.0497043132781982,grad_norm: 0.999999574705408, iteration: 51083
loss: 0.9795933961868286,grad_norm: 0.9999990184551468, iteration: 51084
loss: 0.997265100479126,grad_norm: 0.9032519340074224, iteration: 51085
loss: 0.9880523681640625,grad_norm: 0.9653931890277336, iteration: 51086
loss: 0.9785465002059937,grad_norm: 0.9999990885800575, iteration: 51087
loss: 1.0186516046524048,grad_norm: 0.999999084849427, iteration: 51088
loss: 1.0477086305618286,grad_norm: 0.9999992728453205, iteration: 51089
loss: 0.9972519874572754,grad_norm: 0.98252023470842, iteration: 51090
loss: 0.98106849193573,grad_norm: 0.8939733643863662, iteration: 51091
loss: 1.0021977424621582,grad_norm: 0.8517736962047611, iteration: 51092
loss: 1.0079944133758545,grad_norm: 0.9999989993791008, iteration: 51093
loss: 1.0167572498321533,grad_norm: 0.9945445005613898, iteration: 51094
loss: 0.9629473686218262,grad_norm: 0.9999990694527123, iteration: 51095
loss: 1.0200049877166748,grad_norm: 0.9999993114709559, iteration: 51096
loss: 0.999582052230835,grad_norm: 0.999999126482644, iteration: 51097
loss: 1.0484623908996582,grad_norm: 0.9599391801341971, iteration: 51098
loss: 1.0316964387893677,grad_norm: 0.9999993220732085, iteration: 51099
loss: 0.9783792495727539,grad_norm: 0.9999993168028358, iteration: 51100
loss: 1.024095892906189,grad_norm: 0.999999101521839, iteration: 51101
loss: 1.035671591758728,grad_norm: 0.9800333976860153, iteration: 51102
loss: 1.027842402458191,grad_norm: 0.9999995129857412, iteration: 51103
loss: 1.0153703689575195,grad_norm: 0.9999991795448808, iteration: 51104
loss: 1.032666802406311,grad_norm: 0.999999013473229, iteration: 51105
loss: 1.025333285331726,grad_norm: 0.9999994758098278, iteration: 51106
loss: 0.9766049981117249,grad_norm: 0.999999085571208, iteration: 51107
loss: 1.0500836372375488,grad_norm: 0.9323507126675709, iteration: 51108
loss: 0.9765855073928833,grad_norm: 0.9670454312830824, iteration: 51109
loss: 1.0029730796813965,grad_norm: 0.9999991121823101, iteration: 51110
loss: 1.02052640914917,grad_norm: 0.9999991230818628, iteration: 51111
loss: 0.9889290928840637,grad_norm: 0.9999995447378353, iteration: 51112
loss: 0.9531358480453491,grad_norm: 0.9503502941357576, iteration: 51113
loss: 1.015203833580017,grad_norm: 0.9774976077021086, iteration: 51114
loss: 1.0204564332962036,grad_norm: 0.999999234802773, iteration: 51115
loss: 1.0075864791870117,grad_norm: 0.9999990783028535, iteration: 51116
loss: 0.9713160991668701,grad_norm: 0.9999991156708075, iteration: 51117
loss: 0.9668099284172058,grad_norm: 0.945150644166264, iteration: 51118
loss: 0.9989373683929443,grad_norm: 0.9999991509383725, iteration: 51119
loss: 1.007183313369751,grad_norm: 0.9999991870726087, iteration: 51120
loss: 1.0095715522766113,grad_norm: 0.8920815393891919, iteration: 51121
loss: 0.9744145274162292,grad_norm: 0.9281101490832357, iteration: 51122
loss: 1.0239899158477783,grad_norm: 0.9399469020103616, iteration: 51123
loss: 1.0069992542266846,grad_norm: 0.7982636129207705, iteration: 51124
loss: 1.0098968744277954,grad_norm: 0.9866209721203965, iteration: 51125
loss: 0.9928698539733887,grad_norm: 0.9999992744211783, iteration: 51126
loss: 1.0192714929580688,grad_norm: 0.9422077971102102, iteration: 51127
loss: 0.9795895218849182,grad_norm: 0.9999990453955512, iteration: 51128
loss: 1.0449315309524536,grad_norm: 0.9999990845934641, iteration: 51129
loss: 0.9656518697738647,grad_norm: 0.983868755328479, iteration: 51130
loss: 0.9424758553504944,grad_norm: 0.9999992740646916, iteration: 51131
loss: 0.9962019920349121,grad_norm: 0.9999992292579488, iteration: 51132
loss: 1.0327503681182861,grad_norm: 0.9999989942650499, iteration: 51133
loss: 0.9740980267524719,grad_norm: 0.99999922614459, iteration: 51134
loss: 1.0146887302398682,grad_norm: 0.99999927032666, iteration: 51135
loss: 0.9988089799880981,grad_norm: 0.9999991244973545, iteration: 51136
loss: 0.9812884330749512,grad_norm: 0.9429004802864609, iteration: 51137
loss: 0.9855615496635437,grad_norm: 0.9999990590619854, iteration: 51138
loss: 1.0219401121139526,grad_norm: 0.9999991874452556, iteration: 51139
loss: 1.001743197441101,grad_norm: 0.8754816087476763, iteration: 51140
loss: 1.0167430639266968,grad_norm: 0.9792328319927596, iteration: 51141
loss: 1.0972706079483032,grad_norm: 0.9999997663962655, iteration: 51142
loss: 0.9527830481529236,grad_norm: 0.9999990609975018, iteration: 51143
loss: 1.008657455444336,grad_norm: 0.8600826942627584, iteration: 51144
loss: 0.9990381002426147,grad_norm: 0.9654048129554492, iteration: 51145
loss: 1.034770131111145,grad_norm: 0.8538560749441826, iteration: 51146
loss: 1.0331168174743652,grad_norm: 0.9999991647629791, iteration: 51147
loss: 0.9800352454185486,grad_norm: 0.9999989959424664, iteration: 51148
loss: 1.008381724357605,grad_norm: 0.9999990962886361, iteration: 51149
loss: 1.047065019607544,grad_norm: 0.9899693765856921, iteration: 51150
loss: 0.9696480631828308,grad_norm: 0.9999992007982091, iteration: 51151
loss: 0.9742552042007446,grad_norm: 0.9999990246530809, iteration: 51152
loss: 0.9817765951156616,grad_norm: 0.9999990858986073, iteration: 51153
loss: 0.9557002782821655,grad_norm: 0.9264160451431344, iteration: 51154
loss: 1.0322210788726807,grad_norm: 0.9999991740016346, iteration: 51155
loss: 1.0317023992538452,grad_norm: 0.9999994483900705, iteration: 51156
loss: 0.982031524181366,grad_norm: 0.9736472898686007, iteration: 51157
loss: 0.9941425323486328,grad_norm: 0.8545215610121244, iteration: 51158
loss: 0.988669216632843,grad_norm: 0.9535059951344433, iteration: 51159
loss: 1.0405656099319458,grad_norm: 0.8921664814202555, iteration: 51160
loss: 1.024702548980713,grad_norm: 0.999999322159756, iteration: 51161
loss: 1.0082991123199463,grad_norm: 0.7732567459116116, iteration: 51162
loss: 1.0346170663833618,grad_norm: 0.9999995412641458, iteration: 51163
loss: 0.970154881477356,grad_norm: 0.9221826794911158, iteration: 51164
loss: 1.0053176879882812,grad_norm: 0.9999992644819171, iteration: 51165
loss: 0.9554803967475891,grad_norm: 0.9999992335137762, iteration: 51166
loss: 0.9829522371292114,grad_norm: 0.9969631733657615, iteration: 51167
loss: 0.9920979738235474,grad_norm: 0.9999993782600851, iteration: 51168
loss: 1.020135521888733,grad_norm: 0.999999300960639, iteration: 51169
loss: 1.0429611206054688,grad_norm: 0.999999877563506, iteration: 51170
loss: 1.0223389863967896,grad_norm: 0.9999993128864447, iteration: 51171
loss: 1.0059484243392944,grad_norm: 0.9999991432761345, iteration: 51172
loss: 1.0013844966888428,grad_norm: 0.9999989561079285, iteration: 51173
loss: 1.0408154726028442,grad_norm: 0.970014516178352, iteration: 51174
loss: 1.0296708345413208,grad_norm: 0.9689399466802158, iteration: 51175
loss: 0.9749469757080078,grad_norm: 0.9673009741240718, iteration: 51176
loss: 1.0267860889434814,grad_norm: 0.9999994977865523, iteration: 51177
loss: 0.994379997253418,grad_norm: 0.9999992782591007, iteration: 51178
loss: 1.0114394426345825,grad_norm: 0.9999992213155585, iteration: 51179
loss: 0.9737208485603333,grad_norm: 0.8035607326349791, iteration: 51180
loss: 1.0006687641143799,grad_norm: 0.9999989906118539, iteration: 51181
loss: 0.9846230745315552,grad_norm: 0.9999990563937565, iteration: 51182
loss: 1.0335105657577515,grad_norm: 0.8369978902060268, iteration: 51183
loss: 0.9896562099456787,grad_norm: 0.9864800654128426, iteration: 51184
loss: 0.9995230436325073,grad_norm: 0.9425344239190399, iteration: 51185
loss: 1.0062708854675293,grad_norm: 0.9999991618274691, iteration: 51186
loss: 1.0006225109100342,grad_norm: 0.8683984634340783, iteration: 51187
loss: 0.9866699576377869,grad_norm: 0.9896084220996173, iteration: 51188
loss: 1.0335770845413208,grad_norm: 0.9999993751113722, iteration: 51189
loss: 1.0311145782470703,grad_norm: 0.9916709753452481, iteration: 51190
loss: 1.0231584310531616,grad_norm: 0.9999992359356987, iteration: 51191
loss: 0.9751980900764465,grad_norm: 0.9999991112003451, iteration: 51192
loss: 1.0231099128723145,grad_norm: 0.999999492321648, iteration: 51193
loss: 0.9912715554237366,grad_norm: 0.9999991154154599, iteration: 51194
loss: 1.030187964439392,grad_norm: 0.9999991402998569, iteration: 51195
loss: 0.9766658544540405,grad_norm: 0.9999991501060802, iteration: 51196
loss: 1.0663175582885742,grad_norm: 0.9999997660411895, iteration: 51197
loss: 1.0201019048690796,grad_norm: 0.9999994854839996, iteration: 51198
loss: 1.0235753059387207,grad_norm: 0.9756313986285089, iteration: 51199
loss: 1.0206847190856934,grad_norm: 0.999999214967038, iteration: 51200
loss: 0.9994984269142151,grad_norm: 0.8721748055493044, iteration: 51201
loss: 1.0034278631210327,grad_norm: 0.9999995400100499, iteration: 51202
loss: 0.9768303632736206,grad_norm: 0.9999991418672567, iteration: 51203
loss: 1.0178284645080566,grad_norm: 0.9999990051043478, iteration: 51204
loss: 0.9708234071731567,grad_norm: 0.9999990881994754, iteration: 51205
loss: 1.00404691696167,grad_norm: 0.9999990846874776, iteration: 51206
loss: 1.016068458557129,grad_norm: 0.9999991680716964, iteration: 51207
loss: 1.0285255908966064,grad_norm: 0.9999991764759044, iteration: 51208
loss: 0.9729763865470886,grad_norm: 0.9999995711631473, iteration: 51209
loss: 1.0395989418029785,grad_norm: 0.9999990968276486, iteration: 51210
loss: 1.0505685806274414,grad_norm: 0.9999993140734895, iteration: 51211
loss: 1.004576563835144,grad_norm: 0.9999992123875118, iteration: 51212
loss: 1.005950927734375,grad_norm: 0.89308640183203, iteration: 51213
loss: 1.0102572441101074,grad_norm: 0.805758629327134, iteration: 51214
loss: 1.0239323377609253,grad_norm: 0.9682715033369335, iteration: 51215
loss: 1.0525102615356445,grad_norm: 0.9999990675105845, iteration: 51216
loss: 1.0632096529006958,grad_norm: 0.9999992520452825, iteration: 51217
loss: 0.9869822263717651,grad_norm: 0.9859827980595077, iteration: 51218
loss: 0.9727051854133606,grad_norm: 0.9999992787443905, iteration: 51219
loss: 1.1289312839508057,grad_norm: 1.0000000303995107, iteration: 51220
loss: 0.9828775525093079,grad_norm: 0.9999993984263014, iteration: 51221
loss: 1.0205483436584473,grad_norm: 0.9999992268783764, iteration: 51222
loss: 1.0186333656311035,grad_norm: 0.9999989423702558, iteration: 51223
loss: 1.0004498958587646,grad_norm: 0.9224045411153592, iteration: 51224
loss: 0.9843608140945435,grad_norm: 0.8603737927149169, iteration: 51225
loss: 1.0071020126342773,grad_norm: 0.9999992530029892, iteration: 51226
loss: 1.0167527198791504,grad_norm: 0.9999997893566184, iteration: 51227
loss: 1.0146349668502808,grad_norm: 0.9999990771426341, iteration: 51228
loss: 1.0206633806228638,grad_norm: 0.9999991168332786, iteration: 51229
loss: 1.0194292068481445,grad_norm: 0.8517276060084744, iteration: 51230
loss: 1.004607915878296,grad_norm: 0.9999998741365609, iteration: 51231
loss: 1.0235717296600342,grad_norm: 0.9172722105717491, iteration: 51232
loss: 0.9758366942405701,grad_norm: 0.9999990784861127, iteration: 51233
loss: 1.0089792013168335,grad_norm: 0.9999992820045597, iteration: 51234
loss: 1.0142829418182373,grad_norm: 0.9999997633916076, iteration: 51235
loss: 0.9662588834762573,grad_norm: 0.9999991670866412, iteration: 51236
loss: 0.987784743309021,grad_norm: 0.8115550917970065, iteration: 51237
loss: 1.0201153755187988,grad_norm: 0.9999990688895221, iteration: 51238
loss: 1.0213532447814941,grad_norm: 0.9898654581187963, iteration: 51239
loss: 1.1319960355758667,grad_norm: 0.9999995661299264, iteration: 51240
loss: 0.9850975275039673,grad_norm: 0.9999990804492801, iteration: 51241
loss: 1.0242948532104492,grad_norm: 0.9999992791925011, iteration: 51242
loss: 1.0254396200180054,grad_norm: 0.9008070958564692, iteration: 51243
loss: 0.9647225141525269,grad_norm: 0.9768386757836217, iteration: 51244
loss: 1.0144625902175903,grad_norm: 0.9999991269726322, iteration: 51245
loss: 1.0203731060028076,grad_norm: 0.999998993086963, iteration: 51246
loss: 1.0117278099060059,grad_norm: 0.9661357645264756, iteration: 51247
loss: 1.0000801086425781,grad_norm: 0.9397435106442747, iteration: 51248
loss: 1.008731484413147,grad_norm: 0.9399358566268091, iteration: 51249
loss: 1.0559059381484985,grad_norm: 0.9454088359530722, iteration: 51250
loss: 1.0337616205215454,grad_norm: 0.9999991220228298, iteration: 51251
loss: 1.0383156538009644,grad_norm: 0.9999990706979124, iteration: 51252
loss: 1.0037012100219727,grad_norm: 0.9999991908044946, iteration: 51253
loss: 1.030003547668457,grad_norm: 0.9999996388328729, iteration: 51254
loss: 1.0204732418060303,grad_norm: 0.9248962652405177, iteration: 51255
loss: 1.03927481174469,grad_norm: 0.9999989867253377, iteration: 51256
loss: 0.9940422177314758,grad_norm: 0.9006405230233278, iteration: 51257
loss: 0.9761261940002441,grad_norm: 0.8820786933175141, iteration: 51258
loss: 0.9763414859771729,grad_norm: 0.9999989715647514, iteration: 51259
loss: 0.9903919696807861,grad_norm: 0.999999134273212, iteration: 51260
loss: 1.0066684484481812,grad_norm: 0.9999990767142134, iteration: 51261
loss: 0.9902101159095764,grad_norm: 0.9999992997183946, iteration: 51262
loss: 0.9842864274978638,grad_norm: 0.8003850739025961, iteration: 51263
loss: 0.991902768611908,grad_norm: 0.9999990818477676, iteration: 51264
loss: 1.0317888259887695,grad_norm: 0.9999990838066423, iteration: 51265
loss: 1.004442811012268,grad_norm: 0.9999990467225015, iteration: 51266
loss: 1.0278557538986206,grad_norm: 0.9999994719980398, iteration: 51267
loss: 1.011081576347351,grad_norm: 0.9999992890157661, iteration: 51268
loss: 0.9707425832748413,grad_norm: 0.9999991139843258, iteration: 51269
loss: 1.0157771110534668,grad_norm: 0.9999991585333495, iteration: 51270
loss: 0.996762752532959,grad_norm: 0.9999991188237658, iteration: 51271
loss: 1.0379676818847656,grad_norm: 0.9999990284353639, iteration: 51272
loss: 0.977347195148468,grad_norm: 0.9623118664290076, iteration: 51273
loss: 0.9637812376022339,grad_norm: 0.960655182999453, iteration: 51274
loss: 1.0009348392486572,grad_norm: 0.9999989757040274, iteration: 51275
loss: 0.9805030226707458,grad_norm: 0.9985903018276057, iteration: 51276
loss: 1.019728422164917,grad_norm: 0.9999990354852443, iteration: 51277
loss: 0.9818007946014404,grad_norm: 0.8837980850338576, iteration: 51278
loss: 1.006146788597107,grad_norm: 0.9999991422171091, iteration: 51279
loss: 1.0205045938491821,grad_norm: 0.9702194656446474, iteration: 51280
loss: 0.9922263622283936,grad_norm: 0.9999991383830339, iteration: 51281
loss: 0.9705600738525391,grad_norm: 0.9514480256735609, iteration: 51282
loss: 0.9997285008430481,grad_norm: 0.8284890152778324, iteration: 51283
loss: 1.0043400526046753,grad_norm: 0.9999990753228687, iteration: 51284
loss: 0.9710691571235657,grad_norm: 0.9999992279532284, iteration: 51285
loss: 1.0721981525421143,grad_norm: 0.9999991812027207, iteration: 51286
loss: 0.9712797403335571,grad_norm: 0.9999991801079261, iteration: 51287
loss: 1.017857313156128,grad_norm: 0.850359123907274, iteration: 51288
loss: 0.97691810131073,grad_norm: 0.9999991887960822, iteration: 51289
loss: 0.9764618873596191,grad_norm: 0.8990451151384219, iteration: 51290
loss: 1.0128631591796875,grad_norm: 0.9658386794509211, iteration: 51291
loss: 0.989665150642395,grad_norm: 0.9183479496231313, iteration: 51292
loss: 1.0172080993652344,grad_norm: 0.99999913950412, iteration: 51293
loss: 1.0193181037902832,grad_norm: 0.9999990465033315, iteration: 51294
loss: 1.0036396980285645,grad_norm: 0.9692289592014606, iteration: 51295
loss: 0.9687173366546631,grad_norm: 0.999999280063047, iteration: 51296
loss: 0.9791919589042664,grad_norm: 0.945774828179135, iteration: 51297
loss: 1.0028096437454224,grad_norm: 0.9999991168492773, iteration: 51298
loss: 0.9915233850479126,grad_norm: 0.8780243968814514, iteration: 51299
loss: 1.016396164894104,grad_norm: 0.9999991310094175, iteration: 51300
loss: 0.9897580146789551,grad_norm: 0.9999990709044551, iteration: 51301
loss: 1.0118402242660522,grad_norm: 0.9999991185955813, iteration: 51302
loss: 1.0292494297027588,grad_norm: 0.8613000333181565, iteration: 51303
loss: 0.9763979911804199,grad_norm: 0.8636357504101453, iteration: 51304
loss: 1.0021189451217651,grad_norm: 0.9999992949303508, iteration: 51305
loss: 1.0031768083572388,grad_norm: 0.8962248169780485, iteration: 51306
loss: 0.982524573802948,grad_norm: 0.87352691074368, iteration: 51307
loss: 0.9768663048744202,grad_norm: 0.9999991731013392, iteration: 51308
loss: 1.0342051982879639,grad_norm: 0.9421335658654884, iteration: 51309
loss: 1.0171579122543335,grad_norm: 0.882769373298121, iteration: 51310
loss: 1.0067050457000732,grad_norm: 0.999999090675439, iteration: 51311
loss: 1.0037730932235718,grad_norm: 0.8545044801198843, iteration: 51312
loss: 1.007928490638733,grad_norm: 0.8472065162035749, iteration: 51313
loss: 1.0049031972885132,grad_norm: 0.9999990705058268, iteration: 51314
loss: 1.0290582180023193,grad_norm: 0.9126425369423877, iteration: 51315
loss: 1.007790207862854,grad_norm: 0.9999992055950052, iteration: 51316
loss: 1.0081247091293335,grad_norm: 0.8930930449748863, iteration: 51317
loss: 0.9970961809158325,grad_norm: 0.9999996203161777, iteration: 51318
loss: 0.9637402296066284,grad_norm: 0.9999991382940506, iteration: 51319
loss: 1.0243836641311646,grad_norm: 0.9992809547452276, iteration: 51320
loss: 1.0361735820770264,grad_norm: 0.9125567139263611, iteration: 51321
loss: 0.9969578981399536,grad_norm: 0.999999187949403, iteration: 51322
loss: 0.9337683320045471,grad_norm: 0.9999990165912126, iteration: 51323
loss: 1.008175253868103,grad_norm: 0.9348650916697707, iteration: 51324
loss: 1.0087591409683228,grad_norm: 0.9625377029545408, iteration: 51325
loss: 1.018814206123352,grad_norm: 0.9776269079140602, iteration: 51326
loss: 1.0061876773834229,grad_norm: 0.8733261986449284, iteration: 51327
loss: 1.010010004043579,grad_norm: 0.9227796892766814, iteration: 51328
loss: 1.0272713899612427,grad_norm: 0.9999992004690291, iteration: 51329
loss: 1.0079787969589233,grad_norm: 0.8653471633261904, iteration: 51330
loss: 0.982741117477417,grad_norm: 0.9775641813027867, iteration: 51331
loss: 1.070494532585144,grad_norm: 0.9162681760240311, iteration: 51332
loss: 0.9716140627861023,grad_norm: 0.9402285373565032, iteration: 51333
loss: 0.9983438849449158,grad_norm: 0.8075678362400105, iteration: 51334
loss: 1.0035121440887451,grad_norm: 0.8488999932458796, iteration: 51335
loss: 1.0321437120437622,grad_norm: 0.9999992360607287, iteration: 51336
loss: 1.0245040655136108,grad_norm: 0.9999992434889431, iteration: 51337
loss: 0.9488491415977478,grad_norm: 0.9999990535266731, iteration: 51338
loss: 1.0227904319763184,grad_norm: 0.9999990799233508, iteration: 51339
loss: 0.9966976642608643,grad_norm: 0.8068225402912369, iteration: 51340
loss: 1.0008788108825684,grad_norm: 0.9532769783036175, iteration: 51341
loss: 1.0216923952102661,grad_norm: 0.8185041602751574, iteration: 51342
loss: 1.0165919065475464,grad_norm: 0.9808672431005744, iteration: 51343
loss: 1.0245667695999146,grad_norm: 0.9999993161164874, iteration: 51344
loss: 0.9801653623580933,grad_norm: 0.8452234436489379, iteration: 51345
loss: 1.0332508087158203,grad_norm: 0.999999224148923, iteration: 51346
loss: 0.9760441780090332,grad_norm: 0.9396302196404296, iteration: 51347
loss: 0.9980531930923462,grad_norm: 0.9509783156163667, iteration: 51348
loss: 1.0198355913162231,grad_norm: 0.9808032579781165, iteration: 51349
loss: 1.0229154825210571,grad_norm: 0.9999991169492506, iteration: 51350
loss: 1.0313867330551147,grad_norm: 0.9999991161567438, iteration: 51351
loss: 1.0253032445907593,grad_norm: 0.9999992638515872, iteration: 51352
loss: 0.972114086151123,grad_norm: 0.9999992778463338, iteration: 51353
loss: 1.0000046491622925,grad_norm: 0.9381203459334869, iteration: 51354
loss: 1.107014536857605,grad_norm: 0.9999999816007843, iteration: 51355
loss: 0.9974714517593384,grad_norm: 0.9999992204293959, iteration: 51356
loss: 0.9690066576004028,grad_norm: 0.9720460571532831, iteration: 51357
loss: 1.0081409215927124,grad_norm: 0.9320263082601896, iteration: 51358
loss: 1.018454670906067,grad_norm: 0.8965320809787575, iteration: 51359
loss: 0.9757291674613953,grad_norm: 0.8379638263336567, iteration: 51360
loss: 1.013972520828247,grad_norm: 0.9999988974856592, iteration: 51361
loss: 1.0375727415084839,grad_norm: 0.9999989803120537, iteration: 51362
loss: 0.9779741764068604,grad_norm: 0.853696267417647, iteration: 51363
loss: 1.024326205253601,grad_norm: 0.9357454842875977, iteration: 51364
loss: 0.9783421158790588,grad_norm: 0.8518779632900795, iteration: 51365
loss: 1.010841965675354,grad_norm: 0.9999991726572464, iteration: 51366
loss: 0.97260582447052,grad_norm: 0.9999991562454646, iteration: 51367
loss: 1.0451931953430176,grad_norm: 0.9999990339381001, iteration: 51368
loss: 1.038934588432312,grad_norm: 0.9041529463472169, iteration: 51369
loss: 1.011734127998352,grad_norm: 0.9999991460540688, iteration: 51370
loss: 0.9881410598754883,grad_norm: 0.9999994499130257, iteration: 51371
loss: 1.0249217748641968,grad_norm: 0.8789790093152863, iteration: 51372
loss: 1.0058000087738037,grad_norm: 0.8474680011536619, iteration: 51373
loss: 0.981479287147522,grad_norm: 0.8046681398562491, iteration: 51374
loss: 1.0036848783493042,grad_norm: 0.9756435128589805, iteration: 51375
loss: 0.9813902378082275,grad_norm: 0.9999992294024556, iteration: 51376
loss: 1.022814154624939,grad_norm: 0.9518394125889544, iteration: 51377
loss: 1.00193452835083,grad_norm: 0.9999992642675373, iteration: 51378
loss: 1.0226690769195557,grad_norm: 0.9007208636202582, iteration: 51379
loss: 0.9868271350860596,grad_norm: 0.9652403252091847, iteration: 51380
loss: 1.0058242082595825,grad_norm: 0.8965464004249728, iteration: 51381
loss: 0.9745014309883118,grad_norm: 0.9999991891930827, iteration: 51382
loss: 1.0215857028961182,grad_norm: 0.9867006820087443, iteration: 51383
loss: 1.010107159614563,grad_norm: 0.9999997748675064, iteration: 51384
loss: 1.029296875,grad_norm: 0.9919240744922289, iteration: 51385
loss: 1.015621304512024,grad_norm: 0.999999013821672, iteration: 51386
loss: 1.0037522315979004,grad_norm: 0.9999990207689565, iteration: 51387
loss: 1.0265986919403076,grad_norm: 0.9999991536874705, iteration: 51388
loss: 1.0057244300842285,grad_norm: 0.9980943167880507, iteration: 51389
loss: 1.0386738777160645,grad_norm: 0.9999993440396575, iteration: 51390
loss: 0.9965048432350159,grad_norm: 0.956000369963524, iteration: 51391
loss: 0.9937525987625122,grad_norm: 0.9999991730715483, iteration: 51392
loss: 0.9983235001564026,grad_norm: 0.9475742223045737, iteration: 51393
loss: 0.9661076068878174,grad_norm: 0.9936584808703796, iteration: 51394
loss: 1.0879554748535156,grad_norm: 0.9999996533592641, iteration: 51395
loss: 0.9822779893875122,grad_norm: 0.8900958545349691, iteration: 51396
loss: 0.9763034582138062,grad_norm: 0.8265194597371223, iteration: 51397
loss: 1.0105202198028564,grad_norm: 0.9999991180874842, iteration: 51398
loss: 1.0261876583099365,grad_norm: 0.9999990361164924, iteration: 51399
loss: 0.9953179955482483,grad_norm: 0.987110778402689, iteration: 51400
loss: 1.003079891204834,grad_norm: 0.9534446074102106, iteration: 51401
loss: 1.0130716562271118,grad_norm: 0.9999990546198662, iteration: 51402
loss: 1.0561833381652832,grad_norm: 0.9332751695163588, iteration: 51403
loss: 0.9886768460273743,grad_norm: 0.9416895678263165, iteration: 51404
loss: 0.9997090101242065,grad_norm: 0.9999991632085649, iteration: 51405
loss: 0.9705884456634521,grad_norm: 0.9621948658229328, iteration: 51406
loss: 1.0219390392303467,grad_norm: 0.9572742554302395, iteration: 51407
loss: 1.0069369077682495,grad_norm: 0.9854910808839102, iteration: 51408
loss: 1.0241620540618896,grad_norm: 0.8757958453931359, iteration: 51409
loss: 1.0142748355865479,grad_norm: 0.999999123582561, iteration: 51410
loss: 1.0015192031860352,grad_norm: 0.9999992424733476, iteration: 51411
loss: 1.0060425996780396,grad_norm: 0.999999151596431, iteration: 51412
loss: 1.0191888809204102,grad_norm: 0.899276724723152, iteration: 51413
loss: 1.036728858947754,grad_norm: 0.9999991555355422, iteration: 51414
loss: 0.9989702701568604,grad_norm: 0.9999990816567141, iteration: 51415
loss: 0.9866744875907898,grad_norm: 0.9513704120123092, iteration: 51416
loss: 0.9891289472579956,grad_norm: 0.9999999893464427, iteration: 51417
loss: 1.0063862800598145,grad_norm: 0.9999992447596726, iteration: 51418
loss: 1.0086299180984497,grad_norm: 0.9662974968576588, iteration: 51419
loss: 1.0011762380599976,grad_norm: 0.9999990574987835, iteration: 51420
loss: 1.0105454921722412,grad_norm: 0.9999990144839455, iteration: 51421
loss: 1.0356582403182983,grad_norm: 0.9999991160109912, iteration: 51422
loss: 0.986308217048645,grad_norm: 0.9999989316682836, iteration: 51423
loss: 1.0176150798797607,grad_norm: 0.925839015540966, iteration: 51424
loss: 1.0341535806655884,grad_norm: 0.9999990294747613, iteration: 51425
loss: 1.0364903211593628,grad_norm: 0.9999993652796653, iteration: 51426
loss: 1.04975426197052,grad_norm: 0.9999996376622151, iteration: 51427
loss: 1.0092151165008545,grad_norm: 0.9816991815908671, iteration: 51428
loss: 1.0339908599853516,grad_norm: 0.9999990318951512, iteration: 51429
loss: 0.9965583682060242,grad_norm: 0.9652183125938032, iteration: 51430
loss: 1.0002496242523193,grad_norm: 0.8582974289285124, iteration: 51431
loss: 1.0540554523468018,grad_norm: 0.969617314374981, iteration: 51432
loss: 1.0376511812210083,grad_norm: 0.9259736604468288, iteration: 51433
loss: 1.0095278024673462,grad_norm: 0.8017901491800992, iteration: 51434
loss: 1.0000251531600952,grad_norm: 0.786743425392009, iteration: 51435
loss: 0.9735665917396545,grad_norm: 0.9767580690130654, iteration: 51436
loss: 1.0124176740646362,grad_norm: 0.9077928956784895, iteration: 51437
loss: 1.011980652809143,grad_norm: 0.9989756235735322, iteration: 51438
loss: 1.028262972831726,grad_norm: 0.9999990220916987, iteration: 51439
loss: 0.9549945592880249,grad_norm: 0.9999990861628728, iteration: 51440
loss: 1.0314477682113647,grad_norm: 0.9999991185897855, iteration: 51441
loss: 0.978861391544342,grad_norm: 0.9999993864330677, iteration: 51442
loss: 0.967602014541626,grad_norm: 0.8809734619622244, iteration: 51443
loss: 0.9870499968528748,grad_norm: 0.9793484431202558, iteration: 51444
loss: 0.9805592894554138,grad_norm: 0.9909489488677538, iteration: 51445
loss: 1.014685869216919,grad_norm: 0.9999990546674303, iteration: 51446
loss: 1.0087776184082031,grad_norm: 0.9999993223078071, iteration: 51447
loss: 0.9766286015510559,grad_norm: 0.9999991683304378, iteration: 51448
loss: 0.9889647364616394,grad_norm: 0.9939252726470551, iteration: 51449
loss: 0.9876554608345032,grad_norm: 0.9667109807212368, iteration: 51450
loss: 0.9824674129486084,grad_norm: 0.9415993753753124, iteration: 51451
loss: 0.9835161566734314,grad_norm: 0.8673182670566805, iteration: 51452
loss: 1.0386266708374023,grad_norm: 0.8639547553158083, iteration: 51453
loss: 1.0144275426864624,grad_norm: 0.9999990980413062, iteration: 51454
loss: 1.1685092449188232,grad_norm: 0.9999995112200322, iteration: 51455
loss: 0.9803967475891113,grad_norm: 0.9999991690218906, iteration: 51456
loss: 1.0081461668014526,grad_norm: 0.951867586759143, iteration: 51457
loss: 0.977098822593689,grad_norm: 0.9999990244920267, iteration: 51458
loss: 0.9868059158325195,grad_norm: 0.9617438550578291, iteration: 51459
loss: 1.0048003196716309,grad_norm: 0.9999990469661072, iteration: 51460
loss: 1.049086570739746,grad_norm: 0.9999990238847667, iteration: 51461
loss: 1.0193649530410767,grad_norm: 0.9999994291503385, iteration: 51462
loss: 0.9964465498924255,grad_norm: 0.9905205474624695, iteration: 51463
loss: 1.0314990282058716,grad_norm: 0.9999993273607685, iteration: 51464
loss: 0.9998660087585449,grad_norm: 0.9302851873317131, iteration: 51465
loss: 0.9752500653266907,grad_norm: 0.9999991122630851, iteration: 51466
loss: 1.005280613899231,grad_norm: 0.943590439036641, iteration: 51467
loss: 1.0294182300567627,grad_norm: 0.999999198115383, iteration: 51468
loss: 1.0220757722854614,grad_norm: 0.999999094545396, iteration: 51469
loss: 1.070737600326538,grad_norm: 0.9999993266114293, iteration: 51470
loss: 0.9852381944656372,grad_norm: 0.9999993196108494, iteration: 51471
loss: 1.0372966527938843,grad_norm: 0.9999991478817445, iteration: 51472
loss: 1.0136661529541016,grad_norm: 0.9999990814566825, iteration: 51473
loss: 1.0008641481399536,grad_norm: 0.9098763745541881, iteration: 51474
loss: 1.0324628353118896,grad_norm: 0.9999991954095799, iteration: 51475
loss: 1.00410795211792,grad_norm: 0.9759476587494874, iteration: 51476
loss: 1.0188149213790894,grad_norm: 0.9999992400806863, iteration: 51477
loss: 1.0046641826629639,grad_norm: 0.9634543642649167, iteration: 51478
loss: 0.971419095993042,grad_norm: 0.9203336003358125, iteration: 51479
loss: 1.032497763633728,grad_norm: 0.9999991749754439, iteration: 51480
loss: 1.0083849430084229,grad_norm: 0.9999991295265103, iteration: 51481
loss: 0.9745811820030212,grad_norm: 0.9893931597991117, iteration: 51482
loss: 1.043851613998413,grad_norm: 0.9601350181406909, iteration: 51483
loss: 0.99994957447052,grad_norm: 0.8530487429375103, iteration: 51484
loss: 1.0232480764389038,grad_norm: 0.9369714362239884, iteration: 51485
loss: 0.9663853645324707,grad_norm: 0.8311990821978835, iteration: 51486
loss: 1.0153770446777344,grad_norm: 0.9999990343862365, iteration: 51487
loss: 0.9767853021621704,grad_norm: 0.8695701510642128, iteration: 51488
loss: 1.0031650066375732,grad_norm: 0.9999990905389744, iteration: 51489
loss: 0.9782443046569824,grad_norm: 0.9762608323167751, iteration: 51490
loss: 0.9974040985107422,grad_norm: 0.9999993299084694, iteration: 51491
loss: 1.000765323638916,grad_norm: 0.8684883377639677, iteration: 51492
loss: 1.0146546363830566,grad_norm: 0.929533594910867, iteration: 51493
loss: 0.9832375049591064,grad_norm: 0.9999990710880801, iteration: 51494
loss: 0.9720393419265747,grad_norm: 0.8877345839219876, iteration: 51495
loss: 0.9976866245269775,grad_norm: 0.9201018919318413, iteration: 51496
loss: 0.9649161696434021,grad_norm: 0.9999992779940586, iteration: 51497
loss: 1.051123857498169,grad_norm: 0.9999989836358048, iteration: 51498
loss: 0.9820874333381653,grad_norm: 0.9411458083961673, iteration: 51499
loss: 1.0404980182647705,grad_norm: 0.999998965689855, iteration: 51500
loss: 1.02269446849823,grad_norm: 0.9987415858042672, iteration: 51501
loss: 0.9781490564346313,grad_norm: 0.999999144353761, iteration: 51502
loss: 1.0326744318008423,grad_norm: 0.963169616798541, iteration: 51503
loss: 1.0182691812515259,grad_norm: 0.9999993218221508, iteration: 51504
loss: 0.9812030792236328,grad_norm: 0.8371543350626945, iteration: 51505
loss: 0.9697577953338623,grad_norm: 0.9447479617440626, iteration: 51506
loss: 0.9937375783920288,grad_norm: 0.8998483238523751, iteration: 51507
loss: 0.9577867984771729,grad_norm: 0.9999991774234378, iteration: 51508
loss: 1.038339614868164,grad_norm: 0.9793651809199814, iteration: 51509
loss: 1.0160588026046753,grad_norm: 0.9999991852628283, iteration: 51510
loss: 0.999421238899231,grad_norm: 0.9782404803907038, iteration: 51511
loss: 0.9757927060127258,grad_norm: 0.8931179391861336, iteration: 51512
loss: 0.9757549166679382,grad_norm: 0.9118502532484786, iteration: 51513
loss: 0.988950252532959,grad_norm: 0.8904364395043572, iteration: 51514
loss: 1.0193456411361694,grad_norm: 0.9999991238386402, iteration: 51515
loss: 0.9794188737869263,grad_norm: 0.9999990244282027, iteration: 51516
loss: 1.0155243873596191,grad_norm: 0.9357150232097763, iteration: 51517
loss: 0.9769937992095947,grad_norm: 0.8955780298597179, iteration: 51518
loss: 0.9563074111938477,grad_norm: 0.9362659970395122, iteration: 51519
loss: 1.018398642539978,grad_norm: 0.9023144336270749, iteration: 51520
loss: 1.0351828336715698,grad_norm: 0.9999990759364005, iteration: 51521
loss: 1.0287699699401855,grad_norm: 0.9999992936661093, iteration: 51522
loss: 0.9977825880050659,grad_norm: 0.9999992597459467, iteration: 51523
loss: 0.9940113425254822,grad_norm: 0.9853727784035675, iteration: 51524
loss: 0.9960700869560242,grad_norm: 0.9999990298917883, iteration: 51525
loss: 1.0074620246887207,grad_norm: 0.9264273734178456, iteration: 51526
loss: 0.9702464938163757,grad_norm: 0.9047157625531893, iteration: 51527
loss: 0.9940727353096008,grad_norm: 0.9999991383208278, iteration: 51528
loss: 0.9959880709648132,grad_norm: 0.9999991860514295, iteration: 51529
loss: 0.9990538954734802,grad_norm: 0.9999990139902041, iteration: 51530
loss: 0.9671955108642578,grad_norm: 0.9999990068966598, iteration: 51531
loss: 1.0488470792770386,grad_norm: 0.9999992354497558, iteration: 51532
loss: 1.0506545305252075,grad_norm: 0.9999993798769088, iteration: 51533
loss: 1.011051058769226,grad_norm: 0.9999991754236262, iteration: 51534
loss: 1.024526596069336,grad_norm: 0.8760220846608963, iteration: 51535
loss: 0.9933032989501953,grad_norm: 0.9999990888534787, iteration: 51536
loss: 0.9965936541557312,grad_norm: 0.9999991559492373, iteration: 51537
loss: 0.9414631724357605,grad_norm: 0.9999991007269605, iteration: 51538
loss: 0.9855718612670898,grad_norm: 0.8814442862062797, iteration: 51539
loss: 1.0188490152359009,grad_norm: 0.8171114128824034, iteration: 51540
loss: 0.9811699986457825,grad_norm: 0.9531777708637044, iteration: 51541
loss: 0.953795850276947,grad_norm: 0.9999991282405035, iteration: 51542
loss: 1.0032042264938354,grad_norm: 0.9130646591479078, iteration: 51543
loss: 1.0312563180923462,grad_norm: 0.999999149808286, iteration: 51544
loss: 1.0284485816955566,grad_norm: 0.9670446059828727, iteration: 51545
loss: 1.0026267766952515,grad_norm: 0.9386794520063381, iteration: 51546
loss: 1.002094030380249,grad_norm: 0.9540610426849321, iteration: 51547
loss: 0.9701851010322571,grad_norm: 0.9999990761404183, iteration: 51548
loss: 1.026165246963501,grad_norm: 0.9055261167470012, iteration: 51549
loss: 1.0258517265319824,grad_norm: 0.992976120317237, iteration: 51550
loss: 1.0159562826156616,grad_norm: 0.9999991888853295, iteration: 51551
loss: 0.9785327911376953,grad_norm: 0.9615539336376296, iteration: 51552
loss: 0.9805771708488464,grad_norm: 0.9999990569329593, iteration: 51553
loss: 0.9936830401420593,grad_norm: 0.9133672233009669, iteration: 51554
loss: 1.0305395126342773,grad_norm: 0.999999117154697, iteration: 51555
loss: 0.9802433252334595,grad_norm: 0.9357274436639684, iteration: 51556
loss: 1.0186201333999634,grad_norm: 0.9999991697193048, iteration: 51557
loss: 1.0152838230133057,grad_norm: 0.9599556764194191, iteration: 51558
loss: 0.994920015335083,grad_norm: 0.9999990699837454, iteration: 51559
loss: 1.0142475366592407,grad_norm: 0.9999991228254914, iteration: 51560
loss: 0.9601554274559021,grad_norm: 0.9878487788795274, iteration: 51561
loss: 0.9828209280967712,grad_norm: 0.9999991262353647, iteration: 51562
loss: 0.9655627012252808,grad_norm: 0.9877725658468068, iteration: 51563
loss: 1.045291781425476,grad_norm: 0.9999992146461945, iteration: 51564
loss: 1.0229228734970093,grad_norm: 0.9283561619105162, iteration: 51565
loss: 1.0149400234222412,grad_norm: 0.9192976561000437, iteration: 51566
loss: 1.0272690057754517,grad_norm: 0.8838058265770558, iteration: 51567
loss: 0.9587875604629517,grad_norm: 0.9999996069188195, iteration: 51568
loss: 0.9825333952903748,grad_norm: 0.9937650850738681, iteration: 51569
loss: 1.0051748752593994,grad_norm: 0.9986269670665142, iteration: 51570
loss: 0.9934742450714111,grad_norm: 0.9959282163087576, iteration: 51571
loss: 1.0303095579147339,grad_norm: 0.9999990439989757, iteration: 51572
loss: 1.0092453956604004,grad_norm: 0.8843519366366757, iteration: 51573
loss: 0.9777654409408569,grad_norm: 0.9329094332418437, iteration: 51574
loss: 0.9984688758850098,grad_norm: 0.9999991624256668, iteration: 51575
loss: 0.9761020541191101,grad_norm: 0.8968145336967557, iteration: 51576
loss: 1.0348865985870361,grad_norm: 0.9999996672392468, iteration: 51577
loss: 1.0047776699066162,grad_norm: 0.8981239159772412, iteration: 51578
loss: 0.9990586638450623,grad_norm: 0.9397849558201038, iteration: 51579
loss: 0.9951221942901611,grad_norm: 0.9999992087764702, iteration: 51580
loss: 1.0198957920074463,grad_norm: 0.9999990295883677, iteration: 51581
loss: 1.0198087692260742,grad_norm: 0.9999992268928437, iteration: 51582
loss: 0.9623562693595886,grad_norm: 0.9999990500881065, iteration: 51583
loss: 0.994688868522644,grad_norm: 0.9555463934281394, iteration: 51584
loss: 0.9898363947868347,grad_norm: 0.9999991701592579, iteration: 51585
loss: 1.0091804265975952,grad_norm: 0.9999993560428556, iteration: 51586
loss: 0.9609902501106262,grad_norm: 0.9979654494357614, iteration: 51587
loss: 0.9668158292770386,grad_norm: 0.994718793124551, iteration: 51588
loss: 0.9867063164710999,grad_norm: 0.9999991653638177, iteration: 51589
loss: 1.0191563367843628,grad_norm: 0.9734977925400753, iteration: 51590
loss: 0.9885295033454895,grad_norm: 0.9999990405196273, iteration: 51591
loss: 1.014552116394043,grad_norm: 0.9999993301426934, iteration: 51592
loss: 0.9921818971633911,grad_norm: 0.8950290869909344, iteration: 51593
loss: 1.0255250930786133,grad_norm: 0.999999242607539, iteration: 51594
loss: 0.9943085312843323,grad_norm: 0.8903614727027811, iteration: 51595
loss: 0.9948868751525879,grad_norm: 0.9999991949985827, iteration: 51596
loss: 0.9971714019775391,grad_norm: 0.8344383924682367, iteration: 51597
loss: 0.9739370942115784,grad_norm: 0.9999992324122766, iteration: 51598
loss: 0.9950077533721924,grad_norm: 0.9230761158471295, iteration: 51599
loss: 1.0951957702636719,grad_norm: 0.9999996856296439, iteration: 51600
loss: 1.0209739208221436,grad_norm: 0.9999991353038681, iteration: 51601
loss: 0.9931031465530396,grad_norm: 0.9527712672197552, iteration: 51602
loss: 1.0045299530029297,grad_norm: 0.9999990684470446, iteration: 51603
loss: 1.0113563537597656,grad_norm: 0.9453504428442143, iteration: 51604
loss: 1.005841612815857,grad_norm: 0.8684949816553468, iteration: 51605
loss: 0.9708380103111267,grad_norm: 0.8907398700595752, iteration: 51606
loss: 0.9734476804733276,grad_norm: 0.9999990970407042, iteration: 51607
loss: 1.0193421840667725,grad_norm: 0.9999991390575144, iteration: 51608
loss: 0.9959526062011719,grad_norm: 0.9387726363987521, iteration: 51609
loss: 1.0322405099868774,grad_norm: 0.9999991623156584, iteration: 51610
loss: 1.0085211992263794,grad_norm: 0.9999992501303322, iteration: 51611
loss: 1.0067440271377563,grad_norm: 0.9999992116132523, iteration: 51612
loss: 0.9438871145248413,grad_norm: 0.9999989694963286, iteration: 51613
loss: 1.0087409019470215,grad_norm: 0.9773832542931968, iteration: 51614
loss: 1.0005015134811401,grad_norm: 0.9274302870601839, iteration: 51615
loss: 1.0026735067367554,grad_norm: 0.9999991039078536, iteration: 51616
loss: 1.0261492729187012,grad_norm: 0.999999373398551, iteration: 51617
loss: 0.9874259829521179,grad_norm: 0.999999098652074, iteration: 51618
loss: 0.9861673712730408,grad_norm: 0.9730616980729119, iteration: 51619
loss: 0.992574155330658,grad_norm: 0.909534731684695, iteration: 51620
loss: 0.9951962232589722,grad_norm: 0.8626039834387469, iteration: 51621
loss: 0.9891148209571838,grad_norm: 0.9999991302835685, iteration: 51622
loss: 1.001939296722412,grad_norm: 0.9435459658992172, iteration: 51623
loss: 0.9978481531143188,grad_norm: 0.9999991746235998, iteration: 51624
loss: 1.0508580207824707,grad_norm: 0.999999332815083, iteration: 51625
loss: 0.990952730178833,grad_norm: 0.9666246133844413, iteration: 51626
loss: 0.9876510500907898,grad_norm: 0.8738356241247963, iteration: 51627
loss: 1.0215175151824951,grad_norm: 0.9999990286712616, iteration: 51628
loss: 0.9974692463874817,grad_norm: 0.9999992103360781, iteration: 51629
loss: 0.9762736558914185,grad_norm: 0.9999989943017705, iteration: 51630
loss: 1.0174411535263062,grad_norm: 0.9999992318346693, iteration: 51631
loss: 1.0188114643096924,grad_norm: 0.9246326885344356, iteration: 51632
loss: 0.9783360958099365,grad_norm: 0.9508971961649313, iteration: 51633
loss: 1.0238546133041382,grad_norm: 0.9999997282443702, iteration: 51634
loss: 1.0146338939666748,grad_norm: 0.985180972157029, iteration: 51635
loss: 1.0118941068649292,grad_norm: 0.9999990779365721, iteration: 51636
loss: 1.022904634475708,grad_norm: 0.9510598663670524, iteration: 51637
loss: 1.0475800037384033,grad_norm: 0.9999992402326179, iteration: 51638
loss: 0.9955548048019409,grad_norm: 0.9999991222121062, iteration: 51639
loss: 0.9877521395683289,grad_norm: 0.9081532419607844, iteration: 51640
loss: 1.0001224279403687,grad_norm: 0.9412513994718738, iteration: 51641
loss: 0.995464026927948,grad_norm: 0.9999991992344597, iteration: 51642
loss: 0.9881119132041931,grad_norm: 0.9999991609315714, iteration: 51643
loss: 1.0001990795135498,grad_norm: 0.9999990523317812, iteration: 51644
loss: 1.0124797821044922,grad_norm: 0.9999992905561297, iteration: 51645
loss: 0.9934908747673035,grad_norm: 0.9999992088218461, iteration: 51646
loss: 1.0043903589248657,grad_norm: 0.8188197443430798, iteration: 51647
loss: 0.9843860864639282,grad_norm: 0.9331425638180407, iteration: 51648
loss: 0.9714916944503784,grad_norm: 0.999999067085256, iteration: 51649
loss: 0.9870038628578186,grad_norm: 0.9999991278835528, iteration: 51650
loss: 0.9683289527893066,grad_norm: 0.9999990338571718, iteration: 51651
loss: 1.0010193586349487,grad_norm: 0.9999989944752203, iteration: 51652
loss: 0.9873993992805481,grad_norm: 0.8245867341430271, iteration: 51653
loss: 1.0173559188842773,grad_norm: 0.9096399941297035, iteration: 51654
loss: 1.004701852798462,grad_norm: 0.9999991780535177, iteration: 51655
loss: 1.0194854736328125,grad_norm: 0.9119441563464151, iteration: 51656
loss: 1.0305663347244263,grad_norm: 0.9601446528684691, iteration: 51657
loss: 0.9939821362495422,grad_norm: 0.7761467860927856, iteration: 51658
loss: 1.009873867034912,grad_norm: 0.9999991329394531, iteration: 51659
loss: 1.0148470401763916,grad_norm: 0.9999990847463939, iteration: 51660
loss: 0.9980131983757019,grad_norm: 0.8431477103985147, iteration: 51661
loss: 0.9716420769691467,grad_norm: 0.9999992741135357, iteration: 51662
loss: 1.0207666158676147,grad_norm: 0.9999996977501723, iteration: 51663
loss: 0.961370050907135,grad_norm: 0.9999989218562876, iteration: 51664
loss: 0.9807242751121521,grad_norm: 0.9999991509505911, iteration: 51665
loss: 0.9528765082359314,grad_norm: 0.8353643980259405, iteration: 51666
loss: 0.9415766596794128,grad_norm: 0.9206146677591838, iteration: 51667
loss: 0.9989673495292664,grad_norm: 0.7230219104201858, iteration: 51668
loss: 1.0354993343353271,grad_norm: 0.9999992343311943, iteration: 51669
loss: 0.9953821897506714,grad_norm: 0.8242856437035213, iteration: 51670
loss: 0.9937913417816162,grad_norm: 0.999999093337031, iteration: 51671
loss: 1.0737239122390747,grad_norm: 0.9999990929783613, iteration: 51672
loss: 1.008683681488037,grad_norm: 0.9869211809111861, iteration: 51673
loss: 1.0570412874221802,grad_norm: 0.9999991139556459, iteration: 51674
loss: 1.0305657386779785,grad_norm: 0.9999990555974724, iteration: 51675
loss: 1.0236865282058716,grad_norm: 0.9999995424555165, iteration: 51676
loss: 0.9652155637741089,grad_norm: 0.9999992497336404, iteration: 51677
loss: 1.0321099758148193,grad_norm: 0.9999991208620932, iteration: 51678
loss: 0.9857844710350037,grad_norm: 0.9394847631618755, iteration: 51679
loss: 0.9752947688102722,grad_norm: 0.9999992361785677, iteration: 51680
loss: 1.0025221109390259,grad_norm: 0.9999991663614455, iteration: 51681
loss: 1.0056992769241333,grad_norm: 0.846783060370525, iteration: 51682
loss: 0.9639250040054321,grad_norm: 0.9999991168348088, iteration: 51683
loss: 0.9774132370948792,grad_norm: 0.9999992202419046, iteration: 51684
loss: 0.9997439980506897,grad_norm: 0.9332424253058803, iteration: 51685
loss: 1.0085846185684204,grad_norm: 0.9542503531154058, iteration: 51686
loss: 0.973358690738678,grad_norm: 0.9999990758323068, iteration: 51687
loss: 0.9989365339279175,grad_norm: 0.865448394166389, iteration: 51688
loss: 1.0128211975097656,grad_norm: 0.9999990575842468, iteration: 51689
loss: 1.0427435636520386,grad_norm: 0.9999992736149719, iteration: 51690
loss: 0.9828394055366516,grad_norm: 0.9689755201783218, iteration: 51691
loss: 1.0422176122665405,grad_norm: 0.9999991651192487, iteration: 51692
loss: 0.9757527112960815,grad_norm: 0.9999992004769646, iteration: 51693
loss: 1.0240120887756348,grad_norm: 0.9999991540787201, iteration: 51694
loss: 1.0020813941955566,grad_norm: 0.8989260764000564, iteration: 51695
loss: 0.9889110922813416,grad_norm: 0.9392957341073019, iteration: 51696
loss: 1.0655080080032349,grad_norm: 0.999999204271601, iteration: 51697
loss: 0.9831538200378418,grad_norm: 0.9625071845714208, iteration: 51698
loss: 1.008692979812622,grad_norm: 0.8820784709337371, iteration: 51699
loss: 1.0130923986434937,grad_norm: 0.9999990360637192, iteration: 51700
loss: 0.9950822591781616,grad_norm: 0.9506575188050875, iteration: 51701
loss: 1.0090796947479248,grad_norm: 0.9632042415440172, iteration: 51702
loss: 1.0257726907730103,grad_norm: 0.9999990730087199, iteration: 51703
loss: 1.0104411840438843,grad_norm: 0.9485685046610524, iteration: 51704
loss: 0.9798839092254639,grad_norm: 0.9999992141708617, iteration: 51705
loss: 0.9717554450035095,grad_norm: 0.9999992701726056, iteration: 51706
loss: 1.0053638219833374,grad_norm: 0.999999047885013, iteration: 51707
loss: 1.048120379447937,grad_norm: 0.999999714308284, iteration: 51708
loss: 1.0039616823196411,grad_norm: 0.9999990128690126, iteration: 51709
loss: 1.0018424987792969,grad_norm: 0.9856569917927648, iteration: 51710
loss: 1.0226608514785767,grad_norm: 0.9999992726247203, iteration: 51711
loss: 1.0214754343032837,grad_norm: 0.935563319024437, iteration: 51712
loss: 0.9893668293952942,grad_norm: 0.9999990015140501, iteration: 51713
loss: 0.988445520401001,grad_norm: 0.9759573071296729, iteration: 51714
loss: 0.9690893888473511,grad_norm: 0.9999991331001814, iteration: 51715
loss: 0.9753913879394531,grad_norm: 0.999999186674014, iteration: 51716
loss: 0.9958758354187012,grad_norm: 0.9999991581462337, iteration: 51717
loss: 0.984536349773407,grad_norm: 0.9999993122537115, iteration: 51718
loss: 0.9787793755531311,grad_norm: 0.9999991337475573, iteration: 51719
loss: 1.0256096124649048,grad_norm: 0.9999989771101699, iteration: 51720
loss: 1.033110499382019,grad_norm: 0.9999991448311554, iteration: 51721
loss: 1.0027399063110352,grad_norm: 0.9999990988442691, iteration: 51722
loss: 0.9920997023582458,grad_norm: 0.9999991577351273, iteration: 51723
loss: 0.9731141328811646,grad_norm: 0.9999990810784676, iteration: 51724
loss: 0.9978325366973877,grad_norm: 0.999999109399485, iteration: 51725
loss: 1.0275404453277588,grad_norm: 0.9999995328621119, iteration: 51726
loss: 0.9956281781196594,grad_norm: 0.9623494872308468, iteration: 51727
loss: 0.9990416169166565,grad_norm: 0.9331656302005307, iteration: 51728
loss: 1.0154931545257568,grad_norm: 0.9578857214919293, iteration: 51729
loss: 1.0153053998947144,grad_norm: 0.8379432189710486, iteration: 51730
loss: 0.9959777593612671,grad_norm: 0.9383156933243068, iteration: 51731
loss: 0.9724339842796326,grad_norm: 0.9801445484880354, iteration: 51732
loss: 1.0290746688842773,grad_norm: 0.9270419379138808, iteration: 51733
loss: 1.022432804107666,grad_norm: 0.8466320765438766, iteration: 51734
loss: 1.004245400428772,grad_norm: 0.9999991579423888, iteration: 51735
loss: 0.9939650893211365,grad_norm: 0.9999992158255627, iteration: 51736
loss: 1.0121817588806152,grad_norm: 0.9799589206306053, iteration: 51737
loss: 0.9862735271453857,grad_norm: 0.9374479316011102, iteration: 51738
loss: 1.0165163278579712,grad_norm: 0.9999991507384408, iteration: 51739
loss: 1.0181771516799927,grad_norm: 0.9933048229091183, iteration: 51740
loss: 1.025225043296814,grad_norm: 0.9913961060209044, iteration: 51741
loss: 1.0422736406326294,grad_norm: 0.8675914314401482, iteration: 51742
loss: 0.9911934733390808,grad_norm: 0.9999991123349565, iteration: 51743
loss: 0.9916661977767944,grad_norm: 0.9999991435091599, iteration: 51744
loss: 0.9947928190231323,grad_norm: 0.9999989146953917, iteration: 51745
loss: 1.0210833549499512,grad_norm: 0.9999995052550728, iteration: 51746
loss: 0.9722681045532227,grad_norm: 0.9999990808399408, iteration: 51747
loss: 1.0077674388885498,grad_norm: 0.9999991223199446, iteration: 51748
loss: 1.0355290174484253,grad_norm: 0.9999990643248101, iteration: 51749
loss: 0.9809004068374634,grad_norm: 0.9999991616311029, iteration: 51750
loss: 1.0056196451187134,grad_norm: 0.9999992182795198, iteration: 51751
loss: 1.0126181840896606,grad_norm: 0.9428245674771439, iteration: 51752
loss: 1.0035022497177124,grad_norm: 0.9999991452990241, iteration: 51753
loss: 0.9897832274436951,grad_norm: 0.7896662119113239, iteration: 51754
loss: 1.0122677087783813,grad_norm: 0.9397829890577795, iteration: 51755
loss: 0.9924695491790771,grad_norm: 0.9999990475284178, iteration: 51756
loss: 0.993240237236023,grad_norm: 0.9999989431612353, iteration: 51757
loss: 1.0226562023162842,grad_norm: 0.9999990694477462, iteration: 51758
loss: 0.9958457946777344,grad_norm: 0.9999989925704212, iteration: 51759
loss: 0.9841875433921814,grad_norm: 0.9999991930326046, iteration: 51760
loss: 0.980388343334198,grad_norm: 0.9999990593347441, iteration: 51761
loss: 0.9991412162780762,grad_norm: 0.9999989739827693, iteration: 51762
loss: 1.046756386756897,grad_norm: 0.9999992009917215, iteration: 51763
loss: 1.028757095336914,grad_norm: 0.9999990265191645, iteration: 51764
loss: 1.0507110357284546,grad_norm: 0.9999991004427748, iteration: 51765
loss: 0.9734923243522644,grad_norm: 0.9999990262688052, iteration: 51766
loss: 1.0001262426376343,grad_norm: 0.9999991299376624, iteration: 51767
loss: 1.0125598907470703,grad_norm: 0.9305969621757034, iteration: 51768
loss: 1.009580373764038,grad_norm: 0.809141276267754, iteration: 51769
loss: 0.9925021529197693,grad_norm: 0.8915694431015593, iteration: 51770
loss: 1.0092570781707764,grad_norm: 0.9999992782324163, iteration: 51771
loss: 1.0084514617919922,grad_norm: 0.9999991782675971, iteration: 51772
loss: 0.9785162210464478,grad_norm: 0.9999989681089321, iteration: 51773
loss: 0.9993659257888794,grad_norm: 0.9999991096774336, iteration: 51774
loss: 1.0112190246582031,grad_norm: 0.9999996310078223, iteration: 51775
loss: 0.9758568406105042,grad_norm: 0.9999990540257487, iteration: 51776
loss: 0.9853850603103638,grad_norm: 0.9999991192573703, iteration: 51777
loss: 0.9755414128303528,grad_norm: 0.9627909101285778, iteration: 51778
loss: 0.9978706240653992,grad_norm: 0.9103379650606116, iteration: 51779
loss: 0.9991364479064941,grad_norm: 0.999999177225588, iteration: 51780
loss: 1.0046446323394775,grad_norm: 0.9999991354880355, iteration: 51781
loss: 0.9896958470344543,grad_norm: 0.9999991404178445, iteration: 51782
loss: 1.0155000686645508,grad_norm: 0.9999990982406443, iteration: 51783
loss: 0.9649065136909485,grad_norm: 0.9999990715815916, iteration: 51784
loss: 1.0077450275421143,grad_norm: 0.9999989274560749, iteration: 51785
loss: 0.9878398180007935,grad_norm: 0.9999991880895245, iteration: 51786
loss: 1.015371561050415,grad_norm: 0.9857725269192606, iteration: 51787
loss: 1.037041187286377,grad_norm: 0.9999991882642043, iteration: 51788
loss: 1.0091006755828857,grad_norm: 0.9999991514664578, iteration: 51789
loss: 0.9899446368217468,grad_norm: 0.9441561790976533, iteration: 51790
loss: 0.9882478713989258,grad_norm: 0.8579277490007624, iteration: 51791
loss: 1.023997187614441,grad_norm: 0.8907633962929853, iteration: 51792
loss: 0.9999734163284302,grad_norm: 0.9999991095079309, iteration: 51793
loss: 1.0113381147384644,grad_norm: 0.8042923056358746, iteration: 51794
loss: 1.0733405351638794,grad_norm: 0.9999993307915268, iteration: 51795
loss: 1.0027687549591064,grad_norm: 0.9698271925278781, iteration: 51796
loss: 1.0254945755004883,grad_norm: 0.998865262095364, iteration: 51797
loss: 1.0158164501190186,grad_norm: 0.9999992689730041, iteration: 51798
loss: 1.0072927474975586,grad_norm: 0.9518942503400629, iteration: 51799
loss: 1.0201659202575684,grad_norm: 0.9750024523393007, iteration: 51800
loss: 1.0002155303955078,grad_norm: 0.8130913163434224, iteration: 51801
loss: 0.98796546459198,grad_norm: 0.9999991827772483, iteration: 51802
loss: 1.0162032842636108,grad_norm: 0.9829297816925473, iteration: 51803
loss: 0.9881561994552612,grad_norm: 0.9999990521492906, iteration: 51804
loss: 0.9935994744300842,grad_norm: 0.9875975605608568, iteration: 51805
loss: 1.0186678171157837,grad_norm: 0.9999990675180318, iteration: 51806
loss: 0.9853537678718567,grad_norm: 0.8773238329255532, iteration: 51807
loss: 0.9984728097915649,grad_norm: 0.9999990743061816, iteration: 51808
loss: 1.0420295000076294,grad_norm: 0.9999990728816891, iteration: 51809
loss: 1.0112253427505493,grad_norm: 0.9999990772714763, iteration: 51810
loss: 1.0166988372802734,grad_norm: 0.9999992220440737, iteration: 51811
loss: 0.9717368483543396,grad_norm: 0.8269717201336617, iteration: 51812
loss: 1.029039740562439,grad_norm: 0.9999993103780656, iteration: 51813
loss: 1.0400564670562744,grad_norm: 0.859243184476267, iteration: 51814
loss: 0.9731703996658325,grad_norm: 0.9839728505162204, iteration: 51815
loss: 1.0204464197158813,grad_norm: 0.8685612695291259, iteration: 51816
loss: 1.0045233964920044,grad_norm: 0.89316097955219, iteration: 51817
loss: 1.0355452299118042,grad_norm: 0.8988796214754846, iteration: 51818
loss: 0.9744439721107483,grad_norm: 0.9867608657290377, iteration: 51819
loss: 0.9971756935119629,grad_norm: 0.9387713780765286, iteration: 51820
loss: 0.9652625918388367,grad_norm: 0.9113170837640038, iteration: 51821
loss: 0.9911269545555115,grad_norm: 0.9999991843199166, iteration: 51822
loss: 1.023924469947815,grad_norm: 0.999999011416665, iteration: 51823
loss: 0.9671674966812134,grad_norm: 0.9455544550090849, iteration: 51824
loss: 0.9837350249290466,grad_norm: 0.8342812596429969, iteration: 51825
loss: 0.978775680065155,grad_norm: 0.8964154646816025, iteration: 51826
loss: 1.0059773921966553,grad_norm: 0.8859307587539711, iteration: 51827
loss: 1.0374611616134644,grad_norm: 0.9999990584700458, iteration: 51828
loss: 0.9969884753227234,grad_norm: 0.9999992065259611, iteration: 51829
loss: 1.0110772848129272,grad_norm: 0.8101710396516236, iteration: 51830
loss: 0.9951125979423523,grad_norm: 0.9212846550259497, iteration: 51831
loss: 0.9963602423667908,grad_norm: 0.9999992307494912, iteration: 51832
loss: 1.0273479223251343,grad_norm: 0.9788005311202163, iteration: 51833
loss: 1.0036475658416748,grad_norm: 0.9641862944311675, iteration: 51834
loss: 1.0087628364562988,grad_norm: 0.8577541750435194, iteration: 51835
loss: 0.9594349265098572,grad_norm: 0.9407037289047916, iteration: 51836
loss: 0.9919338226318359,grad_norm: 0.9720715407362076, iteration: 51837
loss: 1.0425020456314087,grad_norm: 0.8741792400926921, iteration: 51838
loss: 1.0193328857421875,grad_norm: 0.9999990079737628, iteration: 51839
loss: 1.0186134576797485,grad_norm: 0.9452067140864817, iteration: 51840
loss: 1.0365948677062988,grad_norm: 0.8834860090354079, iteration: 51841
loss: 1.007800817489624,grad_norm: 0.9999992220497267, iteration: 51842
loss: 1.0392470359802246,grad_norm: 0.9999993439896891, iteration: 51843
loss: 1.0140149593353271,grad_norm: 0.999999114021479, iteration: 51844
loss: 1.0436087846755981,grad_norm: 0.9490589486544997, iteration: 51845
loss: 1.0288032293319702,grad_norm: 0.999999331584193, iteration: 51846
loss: 1.0083339214324951,grad_norm: 0.9999992027241161, iteration: 51847
loss: 1.0222904682159424,grad_norm: 0.9999994150677555, iteration: 51848
loss: 0.9564998745918274,grad_norm: 0.9999991678176674, iteration: 51849
loss: 0.9846709370613098,grad_norm: 0.9060022744820728, iteration: 51850
loss: 1.0198354721069336,grad_norm: 0.9999992011171354, iteration: 51851
loss: 1.026179313659668,grad_norm: 0.9999994723858217, iteration: 51852
loss: 1.0153405666351318,grad_norm: 0.9757017870377089, iteration: 51853
loss: 1.0123624801635742,grad_norm: 0.9436964546819484, iteration: 51854
loss: 0.9856283068656921,grad_norm: 0.9853019816045131, iteration: 51855
loss: 1.0056959390640259,grad_norm: 0.873115840255098, iteration: 51856
loss: 0.9896969795227051,grad_norm: 0.9471075528575226, iteration: 51857
loss: 0.9889183640480042,grad_norm: 0.9999990902659417, iteration: 51858
loss: 1.0137323141098022,grad_norm: 0.9402521770768644, iteration: 51859
loss: 1.0137202739715576,grad_norm: 0.9792308052909094, iteration: 51860
loss: 1.033453345298767,grad_norm: 0.9874315142441106, iteration: 51861
loss: 0.9647712111473083,grad_norm: 0.9010279808775151, iteration: 51862
loss: 0.9941104650497437,grad_norm: 0.8886088857650695, iteration: 51863
loss: 0.9975307583808899,grad_norm: 0.9769776651104821, iteration: 51864
loss: 0.979371964931488,grad_norm: 0.999999000464875, iteration: 51865
loss: 1.018696904182434,grad_norm: 0.8086224756259949, iteration: 51866
loss: 0.9566298127174377,grad_norm: 0.9717463621467953, iteration: 51867
loss: 0.9932491183280945,grad_norm: 0.999999200723625, iteration: 51868
loss: 0.9898526072502136,grad_norm: 0.9999991304664384, iteration: 51869
loss: 1.015631914138794,grad_norm: 0.9336332293169722, iteration: 51870
loss: 1.0161633491516113,grad_norm: 0.999999168625742, iteration: 51871
loss: 1.0165334939956665,grad_norm: 0.9524040952530987, iteration: 51872
loss: 0.9771857857704163,grad_norm: 0.9786979413214711, iteration: 51873
loss: 0.9601006507873535,grad_norm: 0.9999992256326669, iteration: 51874
loss: 0.9482595324516296,grad_norm: 0.9999993338902821, iteration: 51875
loss: 0.9882829189300537,grad_norm: 0.9888349079998945, iteration: 51876
loss: 0.9800496697425842,grad_norm: 0.8688550962307623, iteration: 51877
loss: 1.0365360975265503,grad_norm: 0.9109771743459738, iteration: 51878
loss: 1.0174428224563599,grad_norm: 0.9999991922530163, iteration: 51879
loss: 1.026696801185608,grad_norm: 0.9999991564212678, iteration: 51880
loss: 0.9996682405471802,grad_norm: 0.9978136589873045, iteration: 51881
loss: 1.022736668586731,grad_norm: 0.8395872816463638, iteration: 51882
loss: 1.096961259841919,grad_norm: 0.9999991810289439, iteration: 51883
loss: 0.975395679473877,grad_norm: 0.9491955848313309, iteration: 51884
loss: 0.9971104264259338,grad_norm: 0.9999988946435118, iteration: 51885
loss: 1.0164885520935059,grad_norm: 0.8844453311495699, iteration: 51886
loss: 1.0237631797790527,grad_norm: 0.9999990681472329, iteration: 51887
loss: 1.007470726966858,grad_norm: 0.843800315998216, iteration: 51888
loss: 0.9880486726760864,grad_norm: 0.9770127707186751, iteration: 51889
loss: 0.9713134765625,grad_norm: 0.999999192588229, iteration: 51890
loss: 1.0008519887924194,grad_norm: 0.999999183574731, iteration: 51891
loss: 0.9805096387863159,grad_norm: 0.9999992865373785, iteration: 51892
loss: 0.9864157438278198,grad_norm: 0.9999991673695313, iteration: 51893
loss: 1.0259208679199219,grad_norm: 0.9999991574403029, iteration: 51894
loss: 0.9686269760131836,grad_norm: 0.9996613260157498, iteration: 51895
loss: 1.019879937171936,grad_norm: 0.9089734487247221, iteration: 51896
loss: 1.0386807918548584,grad_norm: 0.9999990684503739, iteration: 51897
loss: 1.015086054801941,grad_norm: 0.9999996315717764, iteration: 51898
loss: 1.0290799140930176,grad_norm: 0.9999994809964766, iteration: 51899
loss: 1.0015348196029663,grad_norm: 0.9445940083604695, iteration: 51900
loss: 1.0389703512191772,grad_norm: 0.9999992825820465, iteration: 51901
loss: 1.0304111242294312,grad_norm: 0.9999991209116895, iteration: 51902
loss: 1.0158569812774658,grad_norm: 0.9516235043843893, iteration: 51903
loss: 0.9839555621147156,grad_norm: 0.9999990883860977, iteration: 51904
loss: 1.018235206604004,grad_norm: 0.8624674811054357, iteration: 51905
loss: 1.0067147016525269,grad_norm: 0.9752476304835127, iteration: 51906
loss: 0.9990270137786865,grad_norm: 0.9999992407121032, iteration: 51907
loss: 1.0343842506408691,grad_norm: 0.9999992603435428, iteration: 51908
loss: 1.0153359174728394,grad_norm: 0.9758094620890589, iteration: 51909
loss: 0.9960769414901733,grad_norm: 0.9999991737408901, iteration: 51910
loss: 1.0740240812301636,grad_norm: 0.9999994212706359, iteration: 51911
loss: 1.1276315450668335,grad_norm: 0.9999994264628487, iteration: 51912
loss: 0.9749500751495361,grad_norm: 0.9999990232201904, iteration: 51913
loss: 0.9950747489929199,grad_norm: 0.9999990657128368, iteration: 51914
loss: 0.9890225529670715,grad_norm: 0.9355696489155164, iteration: 51915
loss: 1.00047767162323,grad_norm: 0.9999990907644883, iteration: 51916
loss: 0.9955391883850098,grad_norm: 0.9999994517633901, iteration: 51917
loss: 0.9856829643249512,grad_norm: 0.9850350192044537, iteration: 51918
loss: 1.022946834564209,grad_norm: 0.9285984379095583, iteration: 51919
loss: 0.9589751958847046,grad_norm: 0.9999990810598696, iteration: 51920
loss: 0.9868782162666321,grad_norm: 0.9815223822139911, iteration: 51921
loss: 0.998580276966095,grad_norm: 0.9999992918929034, iteration: 51922
loss: 0.9975665211677551,grad_norm: 0.999999650624432, iteration: 51923
loss: 0.974041223526001,grad_norm: 0.999999320156886, iteration: 51924
loss: 1.0088564157485962,grad_norm: 0.9999990481740493, iteration: 51925
loss: 1.0192536115646362,grad_norm: 0.9999991801050945, iteration: 51926
loss: 1.038693904876709,grad_norm: 0.9497957909699151, iteration: 51927
loss: 1.0310182571411133,grad_norm: 0.9072610303554562, iteration: 51928
loss: 1.028855800628662,grad_norm: 0.9999994163467345, iteration: 51929
loss: 0.9991627335548401,grad_norm: 0.8989621482297319, iteration: 51930
loss: 1.0196346044540405,grad_norm: 0.9999991514563996, iteration: 51931
loss: 0.9940791130065918,grad_norm: 0.9999994491261057, iteration: 51932
loss: 0.9880815148353577,grad_norm: 0.999999032104406, iteration: 51933
loss: 1.0036633014678955,grad_norm: 0.9745477727357794, iteration: 51934
loss: 0.9920257329940796,grad_norm: 0.9999991276453883, iteration: 51935
loss: 1.0008469820022583,grad_norm: 0.8773027642430953, iteration: 51936
loss: 1.0100483894348145,grad_norm: 0.9999989637513932, iteration: 51937
loss: 0.9991009831428528,grad_norm: 0.8839796224052882, iteration: 51938
loss: 1.0061826705932617,grad_norm: 0.9999992680787487, iteration: 51939
loss: 1.0131222009658813,grad_norm: 0.7797291888629632, iteration: 51940
loss: 0.9983106255531311,grad_norm: 0.999999295287651, iteration: 51941
loss: 0.9743233323097229,grad_norm: 0.9757055153932043, iteration: 51942
loss: 0.9622200727462769,grad_norm: 0.8885954588717988, iteration: 51943
loss: 1.0067559480667114,grad_norm: 0.999999408472398, iteration: 51944
loss: 0.9959122538566589,grad_norm: 0.9999991310099028, iteration: 51945
loss: 0.9904739856719971,grad_norm: 0.9999993271830281, iteration: 51946
loss: 1.0180996656417847,grad_norm: 0.941978657379508, iteration: 51947
loss: 1.0104000568389893,grad_norm: 0.999999146019772, iteration: 51948
loss: 1.0253292322158813,grad_norm: 0.8424500030382959, iteration: 51949
loss: 1.0025992393493652,grad_norm: 0.9999994023537973, iteration: 51950
loss: 1.0256823301315308,grad_norm: 0.9154380068130605, iteration: 51951
loss: 0.970492422580719,grad_norm: 0.95021582006944, iteration: 51952
loss: 0.9847568273544312,grad_norm: 0.8880594073670993, iteration: 51953
loss: 1.088972568511963,grad_norm: 0.9999991799617521, iteration: 51954
loss: 0.9924387335777283,grad_norm: 0.9788215067902026, iteration: 51955
loss: 1.017747163772583,grad_norm: 0.9999992749824402, iteration: 51956
loss: 0.9828251004219055,grad_norm: 0.9457642443229801, iteration: 51957
loss: 1.0157166719436646,grad_norm: 0.8490685536017414, iteration: 51958
loss: 1.0127190351486206,grad_norm: 0.8662271492344731, iteration: 51959
loss: 1.0328857898712158,grad_norm: 0.938446186670208, iteration: 51960
loss: 1.0232291221618652,grad_norm: 0.9999990519931946, iteration: 51961
loss: 1.0174974203109741,grad_norm: 0.9999990912145256, iteration: 51962
loss: 1.0129681825637817,grad_norm: 0.8687719237650339, iteration: 51963
loss: 1.016109824180603,grad_norm: 0.9974472187473826, iteration: 51964
loss: 1.0162492990493774,grad_norm: 0.9899713800487765, iteration: 51965
loss: 1.0071920156478882,grad_norm: 0.8173497637141123, iteration: 51966
loss: 1.0090991258621216,grad_norm: 0.9804876707350225, iteration: 51967
loss: 1.0070821046829224,grad_norm: 0.9999990445723027, iteration: 51968
loss: 0.9649865627288818,grad_norm: 0.963636785346631, iteration: 51969
loss: 1.0108540058135986,grad_norm: 0.9999990704513219, iteration: 51970
loss: 0.9927893877029419,grad_norm: 0.9984807370270101, iteration: 51971
loss: 0.9634987115859985,grad_norm: 0.9999990240540422, iteration: 51972
loss: 1.0200788974761963,grad_norm: 0.9999991838262141, iteration: 51973
loss: 1.0058467388153076,grad_norm: 0.9138822684516172, iteration: 51974
loss: 1.0119450092315674,grad_norm: 0.9522105510254828, iteration: 51975
loss: 0.9719139933586121,grad_norm: 0.9917025074881226, iteration: 51976
loss: 0.9550072550773621,grad_norm: 0.9646222642513081, iteration: 51977
loss: 0.9940967559814453,grad_norm: 0.9111556197249345, iteration: 51978
loss: 1.0018404722213745,grad_norm: 0.9377034639153287, iteration: 51979
loss: 1.014020323753357,grad_norm: 0.9999991868563852, iteration: 51980
loss: 1.0091476440429688,grad_norm: 0.8634141089135653, iteration: 51981
loss: 1.0123786926269531,grad_norm: 0.9999997810273042, iteration: 51982
loss: 1.017500877380371,grad_norm: 0.9999991272549927, iteration: 51983
loss: 0.9783922433853149,grad_norm: 0.8911321670921882, iteration: 51984
loss: 1.0035122632980347,grad_norm: 0.909229871950988, iteration: 51985
loss: 1.0342859029769897,grad_norm: 0.8407949297803793, iteration: 51986
loss: 0.9816097617149353,grad_norm: 0.8831909980676004, iteration: 51987
loss: 1.0278695821762085,grad_norm: 0.9906476291529979, iteration: 51988
loss: 1.010867953300476,grad_norm: 0.9999995385301789, iteration: 51989
loss: 1.032578468322754,grad_norm: 0.9999991185230072, iteration: 51990
loss: 1.024997591972351,grad_norm: 0.9999995702766368, iteration: 51991
loss: 1.0280945301055908,grad_norm: 0.999999197087877, iteration: 51992
loss: 0.9973214864730835,grad_norm: 0.9181379566087482, iteration: 51993
loss: 1.0089482069015503,grad_norm: 0.9999990940872335, iteration: 51994
loss: 0.9876804351806641,grad_norm: 0.9502890869605267, iteration: 51995
loss: 1.012658715248108,grad_norm: 0.7876751887401088, iteration: 51996
loss: 1.0103411674499512,grad_norm: 0.9999991990047358, iteration: 51997
loss: 1.002238392829895,grad_norm: 0.9999992275704626, iteration: 51998
loss: 0.9957589507102966,grad_norm: 0.9419748682477769, iteration: 51999
loss: 0.9822705388069153,grad_norm: 0.9999992688469235, iteration: 52000
loss: 0.9907594323158264,grad_norm: 0.9999991140419344, iteration: 52001
loss: 1.0002161264419556,grad_norm: 0.9999990751208538, iteration: 52002
loss: 1.0156360864639282,grad_norm: 0.9999991032500206, iteration: 52003
loss: 1.0011245012283325,grad_norm: 0.8211354361793275, iteration: 52004
loss: 1.0159823894500732,grad_norm: 0.9999990526024053, iteration: 52005
loss: 0.9966880679130554,grad_norm: 0.9135835635545921, iteration: 52006
loss: 0.9711522459983826,grad_norm: 0.9534750304406793, iteration: 52007
loss: 0.9578260779380798,grad_norm: 0.9999991662353599, iteration: 52008
loss: 0.9905752539634705,grad_norm: 0.8556642160095377, iteration: 52009
loss: 0.9981110095977783,grad_norm: 0.9853239338238124, iteration: 52010
loss: 1.0341711044311523,grad_norm: 0.9999992218633164, iteration: 52011
loss: 1.0272722244262695,grad_norm: 0.9431801539914524, iteration: 52012
loss: 0.949212372303009,grad_norm: 0.9999758521881146, iteration: 52013
loss: 0.9706825017929077,grad_norm: 0.9999990996557012, iteration: 52014
loss: 0.9682193994522095,grad_norm: 0.9999993179173523, iteration: 52015
loss: 1.0091091394424438,grad_norm: 0.9999989277575579, iteration: 52016
loss: 1.0188132524490356,grad_norm: 0.9999990375580511, iteration: 52017
loss: 0.9602531790733337,grad_norm: 0.9999990996265224, iteration: 52018
loss: 0.9976882934570312,grad_norm: 0.9999990414152777, iteration: 52019
loss: 1.026697039604187,grad_norm: 0.8225519931832047, iteration: 52020
loss: 1.0066624879837036,grad_norm: 0.9342476624702768, iteration: 52021
loss: 1.0130021572113037,grad_norm: 0.9999990581485269, iteration: 52022
loss: 1.0005080699920654,grad_norm: 0.9324947287173521, iteration: 52023
loss: 1.017346978187561,grad_norm: 0.9999990639585209, iteration: 52024
loss: 0.967693567276001,grad_norm: 0.9207458096340569, iteration: 52025
loss: 0.9726120233535767,grad_norm: 0.8602341352670122, iteration: 52026
loss: 1.047677755355835,grad_norm: 0.9948006667922716, iteration: 52027
loss: 1.0068895816802979,grad_norm: 0.9999990406827678, iteration: 52028
loss: 0.9878278374671936,grad_norm: 0.9999992000000267, iteration: 52029
loss: 1.0052108764648438,grad_norm: 0.99999920771605, iteration: 52030
loss: 0.990820050239563,grad_norm: 0.9999990356973184, iteration: 52031
loss: 1.012351155281067,grad_norm: 0.9999991051013802, iteration: 52032
loss: 0.9825716614723206,grad_norm: 0.9993907742876643, iteration: 52033
loss: 1.0215405225753784,grad_norm: 0.9999992594122002, iteration: 52034
loss: 1.031383991241455,grad_norm: 0.9999990680993683, iteration: 52035
loss: 1.0474964380264282,grad_norm: 0.9648625191978811, iteration: 52036
loss: 0.9825931191444397,grad_norm: 0.9999991346115654, iteration: 52037
loss: 1.0240051746368408,grad_norm: 0.9375826887488128, iteration: 52038
loss: 1.0364758968353271,grad_norm: 0.9499665910759483, iteration: 52039
loss: 1.0317062139511108,grad_norm: 0.9880079985229048, iteration: 52040
loss: 1.0313154458999634,grad_norm: 0.9999996888697725, iteration: 52041
loss: 1.034430980682373,grad_norm: 0.9999992914130281, iteration: 52042
loss: 1.0111104249954224,grad_norm: 0.999998994539455, iteration: 52043
loss: 1.1861411333084106,grad_norm: 0.999999681193147, iteration: 52044
loss: 0.9728512167930603,grad_norm: 0.932009637063802, iteration: 52045
loss: 0.9910850524902344,grad_norm: 0.9999992884845514, iteration: 52046
loss: 0.9880576133728027,grad_norm: 0.9899089745058689, iteration: 52047
loss: 1.0118420124053955,grad_norm: 0.9999993117153054, iteration: 52048
loss: 0.9931887984275818,grad_norm: 0.9883922269270915, iteration: 52049
loss: 0.9720125794410706,grad_norm: 0.9999991318702605, iteration: 52050
loss: 0.9757287502288818,grad_norm: 0.9999990435975702, iteration: 52051
loss: 0.9739241003990173,grad_norm: 0.9999990282106284, iteration: 52052
loss: 0.9645546078681946,grad_norm: 0.9287043437398802, iteration: 52053
loss: 1.004852533340454,grad_norm: 0.9362960792405152, iteration: 52054
loss: 0.9966226816177368,grad_norm: 0.9999989536943681, iteration: 52055
loss: 0.9627146124839783,grad_norm: 0.9870420086786973, iteration: 52056
loss: 1.0025278329849243,grad_norm: 0.9999990394158143, iteration: 52057
loss: 1.01279616355896,grad_norm: 0.9999992694498141, iteration: 52058
loss: 1.035630464553833,grad_norm: 0.9999992738511022, iteration: 52059
loss: 1.0061627626419067,grad_norm: 0.9941898901110242, iteration: 52060
loss: 1.0208638906478882,grad_norm: 0.9999991137233114, iteration: 52061
loss: 0.97184818983078,grad_norm: 0.9856633721914396, iteration: 52062
loss: 1.0325984954833984,grad_norm: 0.9999993969282533, iteration: 52063
loss: 0.9803034663200378,grad_norm: 0.8931971124251121, iteration: 52064
loss: 1.020878791809082,grad_norm: 0.849317519996138, iteration: 52065
loss: 0.9610017538070679,grad_norm: 0.9923527483058324, iteration: 52066
loss: 0.9929496645927429,grad_norm: 0.9999992134427522, iteration: 52067
loss: 1.0099416971206665,grad_norm: 0.9999990492709138, iteration: 52068
loss: 0.9738316535949707,grad_norm: 0.999999142734778, iteration: 52069
loss: 1.008148431777954,grad_norm: 0.8892776775189873, iteration: 52070
loss: 1.0099416971206665,grad_norm: 0.9999994310303449, iteration: 52071
loss: 1.0311930179595947,grad_norm: 0.8584204954173692, iteration: 52072
loss: 1.0028904676437378,grad_norm: 0.9208713146499957, iteration: 52073
loss: 1.0162241458892822,grad_norm: 0.9999991741300086, iteration: 52074
loss: 0.9909648895263672,grad_norm: 0.9999990126719105, iteration: 52075
loss: 1.0377593040466309,grad_norm: 0.9999990928497423, iteration: 52076
loss: 1.0216480493545532,grad_norm: 0.9999990999322642, iteration: 52077
loss: 1.0130807161331177,grad_norm: 0.9999990453720645, iteration: 52078
loss: 1.0394421815872192,grad_norm: 0.9999992148370118, iteration: 52079
loss: 1.0110441446304321,grad_norm: 0.9826810707441963, iteration: 52080
loss: 1.0082660913467407,grad_norm: 0.9714888383880943, iteration: 52081
loss: 0.9786375164985657,grad_norm: 0.9999989822997164, iteration: 52082
loss: 0.9613857269287109,grad_norm: 0.9999991901415625, iteration: 52083
loss: 0.9908263683319092,grad_norm: 0.8276549385152788, iteration: 52084
loss: 1.0196664333343506,grad_norm: 0.999999630573187, iteration: 52085
loss: 1.006377935409546,grad_norm: 0.9999990355267658, iteration: 52086
loss: 1.0005789995193481,grad_norm: 0.9999991670817032, iteration: 52087
loss: 0.9865903258323669,grad_norm: 0.9201229235718387, iteration: 52088
loss: 0.9822596907615662,grad_norm: 0.982277595639048, iteration: 52089
loss: 0.9619434475898743,grad_norm: 0.9999992690595129, iteration: 52090
loss: 1.0118741989135742,grad_norm: 0.8682832442551474, iteration: 52091
loss: 0.9984748363494873,grad_norm: 0.9999989996678446, iteration: 52092
loss: 0.9665971994400024,grad_norm: 0.9999990917044983, iteration: 52093
loss: 0.9982209205627441,grad_norm: 0.9999990342224943, iteration: 52094
loss: 0.9913825392723083,grad_norm: 0.9999989203701866, iteration: 52095
loss: 1.0124099254608154,grad_norm: 0.999999112889861, iteration: 52096
loss: 1.0115054845809937,grad_norm: 0.9230455853059434, iteration: 52097
loss: 1.0004675388336182,grad_norm: 0.9999991854618555, iteration: 52098
loss: 0.9776597023010254,grad_norm: 0.9999991434443954, iteration: 52099
loss: 0.9850737452507019,grad_norm: 0.9999993230589604, iteration: 52100
loss: 0.9990450739860535,grad_norm: 0.9488212381749583, iteration: 52101
loss: 0.9878059029579163,grad_norm: 0.9999990311464, iteration: 52102
loss: 0.98784339427948,grad_norm: 0.9926078808844497, iteration: 52103
loss: 0.9786188006401062,grad_norm: 0.7907550189215752, iteration: 52104
loss: 0.9995377063751221,grad_norm: 0.9601200926176034, iteration: 52105
loss: 1.0237735509872437,grad_norm: 0.9999991461813531, iteration: 52106
loss: 1.0518999099731445,grad_norm: 0.86552733399858, iteration: 52107
loss: 0.9724761247634888,grad_norm: 0.8331681195108878, iteration: 52108
loss: 1.0349332094192505,grad_norm: 0.9999991223656897, iteration: 52109
loss: 1.0941654443740845,grad_norm: 0.9999998364380428, iteration: 52110
loss: 0.996909499168396,grad_norm: 0.8647615675399616, iteration: 52111
loss: 0.9997124075889587,grad_norm: 0.9029001666128756, iteration: 52112
loss: 0.9825965762138367,grad_norm: 0.9194204543190305, iteration: 52113
loss: 0.9907328486442566,grad_norm: 0.9999990782028206, iteration: 52114
loss: 0.991270899772644,grad_norm: 0.9885991866571515, iteration: 52115
loss: 0.9958174824714661,grad_norm: 0.8840608552326988, iteration: 52116
loss: 1.0042804479599,grad_norm: 0.9236382646581429, iteration: 52117
loss: 0.999883770942688,grad_norm: 0.911356211855203, iteration: 52118
loss: 1.0020917654037476,grad_norm: 0.999999141306385, iteration: 52119
loss: 0.9868564605712891,grad_norm: 0.8067906079126719, iteration: 52120
loss: 1.0330662727355957,grad_norm: 0.9999992900368189, iteration: 52121
loss: 1.0144156217575073,grad_norm: 0.9999995608777731, iteration: 52122
loss: 1.0220437049865723,grad_norm: 0.9077208331151899, iteration: 52123
loss: 0.9988932609558105,grad_norm: 0.9999993600154217, iteration: 52124
loss: 0.9937134385108948,grad_norm: 0.9999990325235193, iteration: 52125
loss: 0.9963414669036865,grad_norm: 0.9909200421211083, iteration: 52126
loss: 1.0365335941314697,grad_norm: 0.9999994405221455, iteration: 52127
loss: 0.9789668321609497,grad_norm: 0.9479328461858262, iteration: 52128
loss: 1.0154194831848145,grad_norm: 0.9433958875890356, iteration: 52129
loss: 1.0112738609313965,grad_norm: 0.9999990274047256, iteration: 52130
loss: 0.9827442169189453,grad_norm: 0.9042638331131014, iteration: 52131
loss: 1.044609546661377,grad_norm: 0.9999990907191778, iteration: 52132
loss: 0.9824316501617432,grad_norm: 0.7864503143153397, iteration: 52133
loss: 1.018183708190918,grad_norm: 0.9671828588331826, iteration: 52134
loss: 1.0186963081359863,grad_norm: 0.9999992372912179, iteration: 52135
loss: 1.015061855316162,grad_norm: 0.9999990028289453, iteration: 52136
loss: 1.0358226299285889,grad_norm: 0.9999996882550495, iteration: 52137
loss: 1.0075517892837524,grad_norm: 0.9296012136789084, iteration: 52138
loss: 1.0093533992767334,grad_norm: 0.9999991247929951, iteration: 52139
loss: 0.9967716336250305,grad_norm: 0.9999989508332692, iteration: 52140
loss: 0.9992040991783142,grad_norm: 0.9551064504073029, iteration: 52141
loss: 1.053117036819458,grad_norm: 0.9999996432686513, iteration: 52142
loss: 1.0097216367721558,grad_norm: 0.9999991189308899, iteration: 52143
loss: 0.9864833354949951,grad_norm: 0.9999990367104554, iteration: 52144
loss: 1.0076082944869995,grad_norm: 0.9794509670640471, iteration: 52145
loss: 0.9780030250549316,grad_norm: 0.981067080082907, iteration: 52146
loss: 0.965890645980835,grad_norm: 0.999999315961989, iteration: 52147
loss: 0.9766532778739929,grad_norm: 0.9999990993147619, iteration: 52148
loss: 0.9945197105407715,grad_norm: 0.8857368498813645, iteration: 52149
loss: 1.0521867275238037,grad_norm: 0.9999999746277707, iteration: 52150
loss: 0.9910435676574707,grad_norm: 0.9999990865811319, iteration: 52151
loss: 0.9819475412368774,grad_norm: 0.9999991807272997, iteration: 52152
loss: 1.0002578496932983,grad_norm: 0.941373143175894, iteration: 52153
loss: 0.9806398153305054,grad_norm: 0.9129905241909166, iteration: 52154
loss: 0.9725003838539124,grad_norm: 0.9647493569535177, iteration: 52155
loss: 1.0351816415786743,grad_norm: 0.9261255295693531, iteration: 52156
loss: 1.003645420074463,grad_norm: 0.9999990649231513, iteration: 52157
loss: 0.9999245405197144,grad_norm: 0.999999065568433, iteration: 52158
loss: 1.0055946111679077,grad_norm: 0.9999990048714184, iteration: 52159
loss: 0.9824627041816711,grad_norm: 0.9999992172564849, iteration: 52160
loss: 1.0189262628555298,grad_norm: 0.9999992055047416, iteration: 52161
loss: 0.9814387559890747,grad_norm: 0.9999997282810897, iteration: 52162
loss: 0.9730843901634216,grad_norm: 0.9999991347014138, iteration: 52163
loss: 0.9999761581420898,grad_norm: 0.9929492452590315, iteration: 52164
loss: 1.0196017026901245,grad_norm: 0.9627317352262312, iteration: 52165
loss: 0.9501022696495056,grad_norm: 0.9997003481563072, iteration: 52166
loss: 1.0052727460861206,grad_norm: 0.9226565601380926, iteration: 52167
loss: 1.027429461479187,grad_norm: 0.9780967791075571, iteration: 52168
loss: 0.979352593421936,grad_norm: 0.9042797221623953, iteration: 52169
loss: 0.9872577786445618,grad_norm: 0.8784051866902981, iteration: 52170
loss: 1.015255331993103,grad_norm: 0.9999991893762011, iteration: 52171
loss: 1.0234187841415405,grad_norm: 0.9999991134761089, iteration: 52172
loss: 0.9871006011962891,grad_norm: 0.9999989873309946, iteration: 52173
loss: 1.0402283668518066,grad_norm: 0.9999992091344723, iteration: 52174
loss: 1.0148131847381592,grad_norm: 0.9999991740872357, iteration: 52175
loss: 0.9981545805931091,grad_norm: 0.9999990596400241, iteration: 52176
loss: 1.0000039339065552,grad_norm: 0.9179956655452769, iteration: 52177
loss: 1.0107262134552002,grad_norm: 0.9999990934708174, iteration: 52178
loss: 1.004235029220581,grad_norm: 0.9999990710213478, iteration: 52179
loss: 0.9804631471633911,grad_norm: 0.9184669515400178, iteration: 52180
loss: 1.0073161125183105,grad_norm: 0.9777394826776555, iteration: 52181
loss: 1.0215781927108765,grad_norm: 0.9999991555244644, iteration: 52182
loss: 0.975352942943573,grad_norm: 0.9675184317594848, iteration: 52183
loss: 1.0124274492263794,grad_norm: 0.923918915702079, iteration: 52184
loss: 0.9960267543792725,grad_norm: 0.9749686317460567, iteration: 52185
loss: 1.010026216506958,grad_norm: 0.8639461419947816, iteration: 52186
loss: 1.046562671661377,grad_norm: 0.9999994774145289, iteration: 52187
loss: 1.004415512084961,grad_norm: 0.9999997865386674, iteration: 52188
loss: 1.03231942653656,grad_norm: 0.9999991887216872, iteration: 52189
loss: 0.9605690836906433,grad_norm: 0.9999990938217649, iteration: 52190
loss: 1.028441071510315,grad_norm: 0.999998868027534, iteration: 52191
loss: 1.0085526704788208,grad_norm: 0.9999990658667584, iteration: 52192
loss: 1.0244178771972656,grad_norm: 0.9999989865335724, iteration: 52193
loss: 0.9826248288154602,grad_norm: 0.99999918609083, iteration: 52194
loss: 1.0348299741744995,grad_norm: 0.9999992415733572, iteration: 52195
loss: 1.0050466060638428,grad_norm: 0.9999990012472882, iteration: 52196
loss: 1.0293605327606201,grad_norm: 0.9999992329811699, iteration: 52197
loss: 0.9922855496406555,grad_norm: 0.9999991310290858, iteration: 52198
loss: 1.0339550971984863,grad_norm: 0.9999991549749228, iteration: 52199
loss: 0.9468007683753967,grad_norm: 0.8949097733927256, iteration: 52200
loss: 1.0106284618377686,grad_norm: 0.9999991537279543, iteration: 52201
loss: 1.0213149785995483,grad_norm: 0.9999991143489221, iteration: 52202
loss: 1.0113425254821777,grad_norm: 0.9999990706418872, iteration: 52203
loss: 0.9642355442047119,grad_norm: 0.9999991603283629, iteration: 52204
loss: 1.0001835823059082,grad_norm: 0.9584842612718389, iteration: 52205
loss: 0.984977662563324,grad_norm: 0.8892643748358325, iteration: 52206
loss: 0.9804644584655762,grad_norm: 0.9833035001117729, iteration: 52207
loss: 1.0091496706008911,grad_norm: 0.8955787346570714, iteration: 52208
loss: 0.9968137145042419,grad_norm: 0.9999990901675558, iteration: 52209
loss: 1.0282886028289795,grad_norm: 0.9427153363238024, iteration: 52210
loss: 1.0025460720062256,grad_norm: 0.888117075081567, iteration: 52211
loss: 1.021506905555725,grad_norm: 0.9999999248225174, iteration: 52212
loss: 0.9903910756111145,grad_norm: 0.9098079722209004, iteration: 52213
loss: 1.0247297286987305,grad_norm: 0.9999990932088986, iteration: 52214
loss: 0.9973056316375732,grad_norm: 0.9999989897043001, iteration: 52215
loss: 1.0232731103897095,grad_norm: 0.9304910686230977, iteration: 52216
loss: 1.0048779249191284,grad_norm: 0.900830768637774, iteration: 52217
loss: 1.019910216331482,grad_norm: 0.9496725287603314, iteration: 52218
loss: 0.9911221861839294,grad_norm: 0.999999126640293, iteration: 52219
loss: 1.0090075731277466,grad_norm: 0.9999991209106223, iteration: 52220
loss: 1.0059748888015747,grad_norm: 0.9999992273861935, iteration: 52221
loss: 0.9888064861297607,grad_norm: 0.9618204112291306, iteration: 52222
loss: 0.9893879294395447,grad_norm: 0.8014165151786591, iteration: 52223
loss: 1.010377287864685,grad_norm: 0.9999991590913487, iteration: 52224
loss: 0.9812151789665222,grad_norm: 0.9999997555474135, iteration: 52225
loss: 0.9848629832267761,grad_norm: 0.8815515929465627, iteration: 52226
loss: 0.9834995865821838,grad_norm: 0.9999992047080156, iteration: 52227
loss: 1.0254589319229126,grad_norm: 0.9588293582286785, iteration: 52228
loss: 0.9601621627807617,grad_norm: 0.8945036530622154, iteration: 52229
loss: 1.017809271812439,grad_norm: 0.9999990495170482, iteration: 52230
loss: 1.0035432577133179,grad_norm: 0.8642537648324286, iteration: 52231
loss: 0.9901378154754639,grad_norm: 0.9952911269313165, iteration: 52232
loss: 1.0202291011810303,grad_norm: 0.999999211465021, iteration: 52233
loss: 1.0069327354431152,grad_norm: 0.8997741104881765, iteration: 52234
loss: 1.011008620262146,grad_norm: 0.7969597981772383, iteration: 52235
loss: 1.0392638444900513,grad_norm: 0.8791108031347091, iteration: 52236
loss: 1.000335693359375,grad_norm: 0.9999990772699389, iteration: 52237
loss: 0.9851032495498657,grad_norm: 0.9999990546427683, iteration: 52238
loss: 0.9969708323478699,grad_norm: 0.9999991977971455, iteration: 52239
loss: 0.9977588653564453,grad_norm: 0.9999991277434156, iteration: 52240
loss: 0.9671507477760315,grad_norm: 0.9999992122936944, iteration: 52241
loss: 1.0116162300109863,grad_norm: 0.999999190963954, iteration: 52242
loss: 1.0008349418640137,grad_norm: 0.999999099647394, iteration: 52243
loss: 1.009421944618225,grad_norm: 0.9999990367051435, iteration: 52244
loss: 0.9799272418022156,grad_norm: 0.9999989688769177, iteration: 52245
loss: 1.0437569618225098,grad_norm: 0.9999998220815205, iteration: 52246
loss: 0.9945089221000671,grad_norm: 0.9101549400955744, iteration: 52247
loss: 1.000268816947937,grad_norm: 0.9999992237822253, iteration: 52248
loss: 1.0429502725601196,grad_norm: 0.9380331804533132, iteration: 52249
loss: 1.032046914100647,grad_norm: 0.999999135277135, iteration: 52250
loss: 0.9949274063110352,grad_norm: 0.9999989979095529, iteration: 52251
loss: 0.9951592087745667,grad_norm: 0.9698554128229583, iteration: 52252
loss: 1.0128744840621948,grad_norm: 0.9999996858396716, iteration: 52253
loss: 1.0260753631591797,grad_norm: 0.9999991313593446, iteration: 52254
loss: 1.0014768838882446,grad_norm: 0.9999997195209775, iteration: 52255
loss: 0.9962805509567261,grad_norm: 0.9444270915169397, iteration: 52256
loss: 1.0018411874771118,grad_norm: 0.9999989486566718, iteration: 52257
loss: 0.9902284741401672,grad_norm: 0.999999092278263, iteration: 52258
loss: 1.0114424228668213,grad_norm: 0.9858204469095097, iteration: 52259
loss: 0.9967257380485535,grad_norm: 0.9999992723979517, iteration: 52260
loss: 1.0207939147949219,grad_norm: 0.9999990426605692, iteration: 52261
loss: 1.0257997512817383,grad_norm: 0.9999989842801764, iteration: 52262
loss: 0.9942286014556885,grad_norm: 0.9999992515776899, iteration: 52263
loss: 1.0169352293014526,grad_norm: 0.895037089503808, iteration: 52264
loss: 1.0131220817565918,grad_norm: 0.953300741127461, iteration: 52265
loss: 0.9921233654022217,grad_norm: 0.9371092853196957, iteration: 52266
loss: 1.005629539489746,grad_norm: 0.977060369287605, iteration: 52267
loss: 1.0205501317977905,grad_norm: 0.9135308163136742, iteration: 52268
loss: 1.021759033203125,grad_norm: 0.9999991773677248, iteration: 52269
loss: 0.9946295619010925,grad_norm: 0.8554999073800124, iteration: 52270
loss: 1.0176864862442017,grad_norm: 0.9375314980257171, iteration: 52271
loss: 0.9989152550697327,grad_norm: 0.9999991078839522, iteration: 52272
loss: 1.0048894882202148,grad_norm: 0.9999991468334352, iteration: 52273
loss: 0.9857774972915649,grad_norm: 0.9999991199836326, iteration: 52274
loss: 0.975803017616272,grad_norm: 0.9345889447419968, iteration: 52275
loss: 0.982754111289978,grad_norm: 0.9285951895453973, iteration: 52276
loss: 0.9862840175628662,grad_norm: 0.9999996776027698, iteration: 52277
loss: 1.018164873123169,grad_norm: 0.9246042678058667, iteration: 52278
loss: 1.0023105144500732,grad_norm: 0.9999992204145199, iteration: 52279
loss: 0.9858596324920654,grad_norm: 0.9999990829548024, iteration: 52280
loss: 1.011354923248291,grad_norm: 0.9175999617455091, iteration: 52281
loss: 0.9933084845542908,grad_norm: 0.9258361632021209, iteration: 52282
loss: 1.0023200511932373,grad_norm: 0.9999991291068575, iteration: 52283
loss: 1.0015698671340942,grad_norm: 0.8795694012715303, iteration: 52284
loss: 1.0219582319259644,grad_norm: 0.9999992366369264, iteration: 52285
loss: 0.9798008799552917,grad_norm: 0.9999991152540809, iteration: 52286
loss: 1.0049822330474854,grad_norm: 0.9077236236527281, iteration: 52287
loss: 0.9531196355819702,grad_norm: 0.9999990155149764, iteration: 52288
loss: 0.9675408005714417,grad_norm: 0.9441911858346949, iteration: 52289
loss: 1.0129836797714233,grad_norm: 0.9999994531701178, iteration: 52290
loss: 1.0067309141159058,grad_norm: 0.9035249701405514, iteration: 52291
loss: 1.05043625831604,grad_norm: 0.9999993283755847, iteration: 52292
loss: 0.9900996088981628,grad_norm: 0.999999201218907, iteration: 52293
loss: 0.9957575798034668,grad_norm: 0.9999991047661846, iteration: 52294
loss: 1.0039366483688354,grad_norm: 0.9209769102838194, iteration: 52295
loss: 1.0244123935699463,grad_norm: 0.9999992925631553, iteration: 52296
loss: 0.9892852306365967,grad_norm: 0.9999993883447018, iteration: 52297
loss: 0.9745187759399414,grad_norm: 0.9999990334026063, iteration: 52298
loss: 0.9925999641418457,grad_norm: 0.9999989385481438, iteration: 52299
loss: 1.0161123275756836,grad_norm: 0.9717719469441308, iteration: 52300
loss: 0.9755471348762512,grad_norm: 0.947150230870046, iteration: 52301
loss: 0.9805586338043213,grad_norm: 0.974261616807661, iteration: 52302
loss: 1.0184667110443115,grad_norm: 0.9999990375351782, iteration: 52303
loss: 0.9801533818244934,grad_norm: 0.9189620850867253, iteration: 52304
loss: 0.9801554083824158,grad_norm: 0.8838758229034249, iteration: 52305
loss: 1.006654977798462,grad_norm: 0.9999990118857077, iteration: 52306
loss: 1.0269275903701782,grad_norm: 0.9999991894367612, iteration: 52307
loss: 1.0435994863510132,grad_norm: 0.9999991493096109, iteration: 52308
loss: 1.034504771232605,grad_norm: 0.9506685583316324, iteration: 52309
loss: 1.008739709854126,grad_norm: 0.9999991225845044, iteration: 52310
loss: 1.033024549484253,grad_norm: 0.9410386804748093, iteration: 52311
loss: 0.9893225431442261,grad_norm: 0.9999991363487702, iteration: 52312
loss: 1.0077290534973145,grad_norm: 0.9999990703262064, iteration: 52313
loss: 1.0723955631256104,grad_norm: 0.9999993932329099, iteration: 52314
loss: 0.995039165019989,grad_norm: 0.9071410963693975, iteration: 52315
loss: 0.9666250348091125,grad_norm: 0.9999991175101355, iteration: 52316
loss: 1.0197244882583618,grad_norm: 0.9999991813450105, iteration: 52317
loss: 0.9746285676956177,grad_norm: 0.9374024298173305, iteration: 52318
loss: 1.0309067964553833,grad_norm: 0.9609351415753337, iteration: 52319
loss: 0.9971239566802979,grad_norm: 0.9999990502533114, iteration: 52320
loss: 0.9990711212158203,grad_norm: 0.9349725066868655, iteration: 52321
loss: 0.9933608174324036,grad_norm: 0.8173929712135454, iteration: 52322
loss: 1.0098719596862793,grad_norm: 0.9999990717507399, iteration: 52323
loss: 1.0023359060287476,grad_norm: 0.9999992499801982, iteration: 52324
loss: 1.003969430923462,grad_norm: 0.8376344995576467, iteration: 52325
loss: 1.0647035837173462,grad_norm: 0.9999999500592014, iteration: 52326
loss: 0.9998874068260193,grad_norm: 0.9217928147450866, iteration: 52327
loss: 1.0192707777023315,grad_norm: 0.9547359633588612, iteration: 52328
loss: 0.9876874089241028,grad_norm: 0.9662508272799657, iteration: 52329
loss: 1.0504956245422363,grad_norm: 0.9999997061610203, iteration: 52330
loss: 0.9758979082107544,grad_norm: 0.979052416279263, iteration: 52331
loss: 1.0015151500701904,grad_norm: 0.9999991577651027, iteration: 52332
loss: 0.975125789642334,grad_norm: 0.9999991446511868, iteration: 52333
loss: 0.9704816341400146,grad_norm: 0.9999990375556332, iteration: 52334
loss: 1.004911184310913,grad_norm: 0.986135773081008, iteration: 52335
loss: 0.9832077026367188,grad_norm: 0.9999992295153483, iteration: 52336
loss: 1.0291892290115356,grad_norm: 0.8684937000323717, iteration: 52337
loss: 0.9843740463256836,grad_norm: 0.9999991664182492, iteration: 52338
loss: 0.9850708246231079,grad_norm: 0.938168283457029, iteration: 52339
loss: 1.0210233926773071,grad_norm: 0.9999992497127274, iteration: 52340
loss: 1.0300383567810059,grad_norm: 0.9816868265714321, iteration: 52341
loss: 0.9943310618400574,grad_norm: 0.9474982858538548, iteration: 52342
loss: 1.0144792795181274,grad_norm: 0.9708161290005208, iteration: 52343
loss: 1.0270003080368042,grad_norm: 0.9569292337920806, iteration: 52344
loss: 1.041123867034912,grad_norm: 0.9999995232602997, iteration: 52345
loss: 0.9780539870262146,grad_norm: 0.8997864480303399, iteration: 52346
loss: 1.036921501159668,grad_norm: 0.9999995745257396, iteration: 52347
loss: 1.048324704170227,grad_norm: 0.9133191893143259, iteration: 52348
loss: 0.9960648417472839,grad_norm: 0.9445767741566462, iteration: 52349
loss: 0.9881204962730408,grad_norm: 0.9149784277300848, iteration: 52350
loss: 1.030165433883667,grad_norm: 0.9999991005797461, iteration: 52351
loss: 0.9813818335533142,grad_norm: 0.8829110431653954, iteration: 52352
loss: 1.0314617156982422,grad_norm: 0.8628643211659341, iteration: 52353
loss: 0.9753251671791077,grad_norm: 0.9999990704028828, iteration: 52354
loss: 0.973405659198761,grad_norm: 0.9999991832945966, iteration: 52355
loss: 0.990280032157898,grad_norm: 0.9999990671315736, iteration: 52356
loss: 1.0044405460357666,grad_norm: 0.9999990574377825, iteration: 52357
loss: 0.9985483884811401,grad_norm: 0.9524622160802478, iteration: 52358
loss: 1.0052717924118042,grad_norm: 0.8061016166856527, iteration: 52359
loss: 0.9884070754051208,grad_norm: 0.8538769035629189, iteration: 52360
loss: 1.036816954612732,grad_norm: 0.9999991315356153, iteration: 52361
loss: 0.9917975068092346,grad_norm: 0.9999992215608928, iteration: 52362
loss: 0.9654774069786072,grad_norm: 0.8198079951197269, iteration: 52363
loss: 0.9779846668243408,grad_norm: 0.9999990257964284, iteration: 52364
loss: 0.9797824025154114,grad_norm: 0.9999990509564992, iteration: 52365
loss: 0.9905213117599487,grad_norm: 0.9241664250044422, iteration: 52366
loss: 1.0008022785186768,grad_norm: 0.9474162135421533, iteration: 52367
loss: 1.002936840057373,grad_norm: 0.9999991850313911, iteration: 52368
loss: 1.0027320384979248,grad_norm: 0.9999992512166422, iteration: 52369
loss: 0.9781144261360168,grad_norm: 0.9999992755250008, iteration: 52370
loss: 1.012731909751892,grad_norm: 0.9999990543452159, iteration: 52371
loss: 0.9982840418815613,grad_norm: 0.9582932344059258, iteration: 52372
loss: 0.965903639793396,grad_norm: 0.7937322333784381, iteration: 52373
loss: 1.010343074798584,grad_norm: 0.9962616347619752, iteration: 52374
loss: 1.0201544761657715,grad_norm: 0.9999993288629859, iteration: 52375
loss: 0.9991720914840698,grad_norm: 0.9838232119565714, iteration: 52376
loss: 0.9990724325180054,grad_norm: 0.9604526544884161, iteration: 52377
loss: 1.0264328718185425,grad_norm: 0.9999992111415624, iteration: 52378
loss: 0.9596548080444336,grad_norm: 0.9999990363210239, iteration: 52379
loss: 0.9983943700790405,grad_norm: 0.9999991356252339, iteration: 52380
loss: 0.9889541268348694,grad_norm: 0.9999991810312544, iteration: 52381
loss: 0.9950007200241089,grad_norm: 0.9999991110986504, iteration: 52382
loss: 1.0149109363555908,grad_norm: 0.8909602493179408, iteration: 52383
loss: 1.0225636959075928,grad_norm: 0.8996517517665226, iteration: 52384
loss: 0.97123122215271,grad_norm: 0.9999996546726595, iteration: 52385
loss: 1.0089255571365356,grad_norm: 0.9671995872288756, iteration: 52386
loss: 1.0049638748168945,grad_norm: 0.9999995823242923, iteration: 52387
loss: 1.021492600440979,grad_norm: 0.9999991005353716, iteration: 52388
loss: 0.9908379316329956,grad_norm: 0.9999992548772983, iteration: 52389
loss: 1.0252463817596436,grad_norm: 0.9999991077566072, iteration: 52390
loss: 0.9911283850669861,grad_norm: 0.999998949671806, iteration: 52391
loss: 0.9969287514686584,grad_norm: 0.9001397067828038, iteration: 52392
loss: 0.9835979342460632,grad_norm: 0.9999991093389959, iteration: 52393
loss: 1.0075098276138306,grad_norm: 0.9939106451473309, iteration: 52394
loss: 1.0215742588043213,grad_norm: 0.9999991442312454, iteration: 52395
loss: 0.9987658858299255,grad_norm: 0.8481336601325873, iteration: 52396
loss: 0.9828414916992188,grad_norm: 0.9999991454387398, iteration: 52397
loss: 1.0040700435638428,grad_norm: 0.990846162731969, iteration: 52398
loss: 0.9822572469711304,grad_norm: 0.9999990465557287, iteration: 52399
loss: 0.9606578350067139,grad_norm: 0.9999990077390936, iteration: 52400
loss: 1.0320401191711426,grad_norm: 0.8860557261831712, iteration: 52401
loss: 0.9936793446540833,grad_norm: 0.9999989654973127, iteration: 52402
loss: 0.9963012337684631,grad_norm: 0.9999991085167877, iteration: 52403
loss: 0.9735731482505798,grad_norm: 0.9999989732251774, iteration: 52404
loss: 0.9881724119186401,grad_norm: 0.9765417621425748, iteration: 52405
loss: 0.9785771369934082,grad_norm: 0.8094496666148244, iteration: 52406
loss: 0.9920075535774231,grad_norm: 0.9999992057780381, iteration: 52407
loss: 1.015998363494873,grad_norm: 0.9999991068279097, iteration: 52408
loss: 1.0017738342285156,grad_norm: 0.9284765407096254, iteration: 52409
loss: 0.9933993816375732,grad_norm: 0.9244313726416609, iteration: 52410
loss: 1.010339617729187,grad_norm: 0.8682437595399468, iteration: 52411
loss: 1.0134778022766113,grad_norm: 0.9785107201127952, iteration: 52412
loss: 0.9860495924949646,grad_norm: 0.878595235990591, iteration: 52413
loss: 0.9727494120597839,grad_norm: 0.9400999027931563, iteration: 52414
loss: 1.0092288255691528,grad_norm: 0.9999990828894507, iteration: 52415
loss: 1.0237681865692139,grad_norm: 0.9999992291467817, iteration: 52416
loss: 1.052198886871338,grad_norm: 0.9999991093636758, iteration: 52417
loss: 1.0056982040405273,grad_norm: 0.9452691991810138, iteration: 52418
loss: 1.0376087427139282,grad_norm: 0.8380874280907825, iteration: 52419
loss: 0.9577839970588684,grad_norm: 0.999999024403971, iteration: 52420
loss: 1.0145882368087769,grad_norm: 0.8481262244614777, iteration: 52421
loss: 0.944351077079773,grad_norm: 0.9051301433642532, iteration: 52422
loss: 0.9955536127090454,grad_norm: 0.9108825662545165, iteration: 52423
loss: 1.0309265851974487,grad_norm: 0.9999993739648237, iteration: 52424
loss: 0.9979011416435242,grad_norm: 0.9999993107023593, iteration: 52425
loss: 1.0177726745605469,grad_norm: 0.8767034465735974, iteration: 52426
loss: 0.9852657318115234,grad_norm: 0.9999991011147871, iteration: 52427
loss: 1.0057262182235718,grad_norm: 0.999999060882297, iteration: 52428
loss: 0.9939820170402527,grad_norm: 0.9619393224898846, iteration: 52429
loss: 0.9757434725761414,grad_norm: 0.9532392315025523, iteration: 52430
loss: 0.9916090369224548,grad_norm: 0.9999989607385957, iteration: 52431
loss: 1.0022320747375488,grad_norm: 0.923002036035361, iteration: 52432
loss: 1.026460886001587,grad_norm: 0.9301773233348195, iteration: 52433
loss: 0.991183340549469,grad_norm: 0.9933637904977064, iteration: 52434
loss: 1.0101161003112793,grad_norm: 0.999999595391295, iteration: 52435
loss: 1.022278904914856,grad_norm: 0.9516914789320347, iteration: 52436
loss: 1.0006444454193115,grad_norm: 0.8210688764912298, iteration: 52437
loss: 1.036005973815918,grad_norm: 0.9654527753112582, iteration: 52438
loss: 0.987868070602417,grad_norm: 0.9595316651275413, iteration: 52439
loss: 0.9875596761703491,grad_norm: 0.9957242634512032, iteration: 52440
loss: 1.0173027515411377,grad_norm: 0.9999991965486409, iteration: 52441
loss: 0.986329972743988,grad_norm: 0.9999993262585537, iteration: 52442
loss: 1.004253625869751,grad_norm: 0.9910222278175888, iteration: 52443
loss: 0.9875437021255493,grad_norm: 0.957573063091961, iteration: 52444
loss: 1.0405259132385254,grad_norm: 0.9999990803145544, iteration: 52445
loss: 0.9695523977279663,grad_norm: 0.931071886637842, iteration: 52446
loss: 1.0005912780761719,grad_norm: 0.9031038689300306, iteration: 52447
loss: 1.0266172885894775,grad_norm: 0.9999992060677886, iteration: 52448
loss: 1.0206642150878906,grad_norm: 0.9999990876390403, iteration: 52449
loss: 1.0063438415527344,grad_norm: 0.999999734892651, iteration: 52450
loss: 1.0238065719604492,grad_norm: 0.9999991290826126, iteration: 52451
loss: 1.0427197217941284,grad_norm: 0.9999990698020161, iteration: 52452
loss: 1.104009747505188,grad_norm: 0.9999998662740593, iteration: 52453
loss: 0.988917887210846,grad_norm: 0.9512138035222929, iteration: 52454
loss: 0.9866706728935242,grad_norm: 0.943087218860856, iteration: 52455
loss: 1.0303575992584229,grad_norm: 0.9999991430043582, iteration: 52456
loss: 1.011254072189331,grad_norm: 0.9999989153301736, iteration: 52457
loss: 1.0246189832687378,grad_norm: 0.9999991788651316, iteration: 52458
loss: 1.0293883085250854,grad_norm: 0.9999991260785681, iteration: 52459
loss: 1.0141255855560303,grad_norm: 0.9523447009922343, iteration: 52460
loss: 1.0189851522445679,grad_norm: 0.9999990913183486, iteration: 52461
loss: 1.001399040222168,grad_norm: 0.9999991464366349, iteration: 52462
loss: 0.9822985529899597,grad_norm: 0.9338512276832018, iteration: 52463
loss: 0.9851021766662598,grad_norm: 0.9999993717508914, iteration: 52464
loss: 1.008982539176941,grad_norm: 0.8960796930471813, iteration: 52465
loss: 1.0113835334777832,grad_norm: 0.9943599332816891, iteration: 52466
loss: 1.0172498226165771,grad_norm: 0.9507326814870521, iteration: 52467
loss: 1.0352833271026611,grad_norm: 0.9999989775015771, iteration: 52468
loss: 0.9963052272796631,grad_norm: 0.7692064295610292, iteration: 52469
loss: 0.9858381748199463,grad_norm: 0.9662051542421544, iteration: 52470
loss: 0.992134153842926,grad_norm: 0.9999995876404032, iteration: 52471
loss: 0.96084064245224,grad_norm: 0.9999991370472805, iteration: 52472
loss: 0.9864747524261475,grad_norm: 0.9418915187350264, iteration: 52473
loss: 1.143874168395996,grad_norm: 0.9999990537028353, iteration: 52474
loss: 1.0081466436386108,grad_norm: 0.9999993615672789, iteration: 52475
loss: 1.0417029857635498,grad_norm: 0.9999991968504909, iteration: 52476
loss: 1.0548584461212158,grad_norm: 0.9999993532491714, iteration: 52477
loss: 1.0260450839996338,grad_norm: 0.9999990588449389, iteration: 52478
loss: 0.9891172647476196,grad_norm: 0.9529549990388164, iteration: 52479
loss: 1.0004972219467163,grad_norm: 0.9999992073610385, iteration: 52480
loss: 1.0049247741699219,grad_norm: 0.9999992570302059, iteration: 52481
loss: 1.0167158842086792,grad_norm: 0.9999991034566867, iteration: 52482
loss: 1.0441566705703735,grad_norm: 0.9999990904768877, iteration: 52483
loss: 1.0056573152542114,grad_norm: 0.906316774309922, iteration: 52484
loss: 0.972723662853241,grad_norm: 0.977057438061903, iteration: 52485
loss: 1.0019487142562866,grad_norm: 0.9999991481356173, iteration: 52486
loss: 0.9904470443725586,grad_norm: 0.9999989686478679, iteration: 52487
loss: 0.998421847820282,grad_norm: 0.9999990905539222, iteration: 52488
loss: 1.0126941204071045,grad_norm: 0.9708671315261989, iteration: 52489
loss: 1.0194376707077026,grad_norm: 0.9997991953794874, iteration: 52490
loss: 0.9914805889129639,grad_norm: 0.9904908750840594, iteration: 52491
loss: 0.9891508221626282,grad_norm: 0.9999997636678387, iteration: 52492
loss: 0.9852991700172424,grad_norm: 0.9629905424020513, iteration: 52493
loss: 1.007817029953003,grad_norm: 0.9999993448063444, iteration: 52494
loss: 0.9756603240966797,grad_norm: 0.9461587778226805, iteration: 52495
loss: 1.041533350944519,grad_norm: 0.9604784906812177, iteration: 52496
loss: 1.0140503644943237,grad_norm: 0.9518549808305847, iteration: 52497
loss: 0.9988463521003723,grad_norm: 0.9774816895194992, iteration: 52498
loss: 1.0056071281433105,grad_norm: 0.9909602273915038, iteration: 52499
loss: 1.0135303735733032,grad_norm: 0.8967768427026218, iteration: 52500
loss: 0.9826733469963074,grad_norm: 0.9999989426433924, iteration: 52501
loss: 0.9736502170562744,grad_norm: 0.9415075338727873, iteration: 52502
loss: 1.0043989419937134,grad_norm: 0.8581025975398678, iteration: 52503
loss: 0.9848787784576416,grad_norm: 0.8741877356878719, iteration: 52504
loss: 0.9646077752113342,grad_norm: 0.9999991991116067, iteration: 52505
loss: 1.0207828283309937,grad_norm: 0.9999995089469427, iteration: 52506
loss: 0.9925031065940857,grad_norm: 0.9999992474088746, iteration: 52507
loss: 1.027146339416504,grad_norm: 0.9999990838671889, iteration: 52508
loss: 1.0064282417297363,grad_norm: 0.9999990245216479, iteration: 52509
loss: 0.9856387376785278,grad_norm: 0.9999989319808059, iteration: 52510
loss: 1.0082151889801025,grad_norm: 0.9999991817018224, iteration: 52511
loss: 1.0020458698272705,grad_norm: 0.9999990601458005, iteration: 52512
loss: 0.9567057490348816,grad_norm: 0.9576692928613076, iteration: 52513
loss: 1.0088785886764526,grad_norm: 0.9999991505878216, iteration: 52514
loss: 1.0372201204299927,grad_norm: 0.9125081796732064, iteration: 52515
loss: 0.9972261786460876,grad_norm: 0.9999991497839964, iteration: 52516
loss: 0.9997828006744385,grad_norm: 0.9999990638549491, iteration: 52517
loss: 0.937892735004425,grad_norm: 0.9999992028709442, iteration: 52518
loss: 0.9796501398086548,grad_norm: 0.9999990823364022, iteration: 52519
loss: 1.0337733030319214,grad_norm: 0.9611168624340181, iteration: 52520
loss: 0.9870139360427856,grad_norm: 0.999999088263095, iteration: 52521
loss: 0.9608628153800964,grad_norm: 0.9766822047409396, iteration: 52522
loss: 0.9813275933265686,grad_norm: 0.9999991951595493, iteration: 52523
loss: 0.9994842410087585,grad_norm: 0.9999998526619399, iteration: 52524
loss: 0.976033627986908,grad_norm: 0.8800756225789617, iteration: 52525
loss: 0.9573622941970825,grad_norm: 0.9999991098108529, iteration: 52526
loss: 0.9679209589958191,grad_norm: 0.9999990765921712, iteration: 52527
loss: 0.971044659614563,grad_norm: 0.9778898893353454, iteration: 52528
loss: 1.0150238275527954,grad_norm: 0.999999184347342, iteration: 52529
loss: 0.9813985824584961,grad_norm: 0.999999061565367, iteration: 52530
loss: 1.042454719543457,grad_norm: 0.9793708946258615, iteration: 52531
loss: 0.9777301549911499,grad_norm: 0.9745497489062672, iteration: 52532
loss: 0.9705896973609924,grad_norm: 0.9999991011887309, iteration: 52533
loss: 1.0004278421401978,grad_norm: 0.8598926537646991, iteration: 52534
loss: 0.9852398633956909,grad_norm: 0.9999990724622548, iteration: 52535
loss: 0.9654357433319092,grad_norm: 0.9609437938399528, iteration: 52536
loss: 1.0385645627975464,grad_norm: 0.9551815672605141, iteration: 52537
loss: 1.0183926820755005,grad_norm: 0.9999991528721193, iteration: 52538
loss: 1.0073251724243164,grad_norm: 0.908710488949777, iteration: 52539
loss: 1.0143884420394897,grad_norm: 0.9448875316512051, iteration: 52540
loss: 1.0177768468856812,grad_norm: 0.8031222149198606, iteration: 52541
loss: 1.007042407989502,grad_norm: 0.9059597609272231, iteration: 52542
loss: 1.0117627382278442,grad_norm: 0.958954172829234, iteration: 52543
loss: 1.0057487487792969,grad_norm: 0.9999992346883209, iteration: 52544
loss: 1.0236271619796753,grad_norm: 0.887520913956231, iteration: 52545
loss: 1.036190152168274,grad_norm: 0.9999990838308161, iteration: 52546
loss: 0.9891981482505798,grad_norm: 0.9075773889467228, iteration: 52547
loss: 0.9879035353660583,grad_norm: 0.9999991017023399, iteration: 52548
loss: 0.997960090637207,grad_norm: 0.8620232917830046, iteration: 52549
loss: 1.0262327194213867,grad_norm: 0.9999990494898234, iteration: 52550
loss: 1.049631118774414,grad_norm: 0.999999649633296, iteration: 52551
loss: 0.9653907418251038,grad_norm: 0.9999990372642273, iteration: 52552
loss: 1.0061310529708862,grad_norm: 0.999999182250785, iteration: 52553
loss: 0.9674562215805054,grad_norm: 0.9999991207700908, iteration: 52554
loss: 1.0103927850723267,grad_norm: 0.9999990794640251, iteration: 52555
loss: 0.997208833694458,grad_norm: 0.9176979822486327, iteration: 52556
loss: 0.9763466715812683,grad_norm: 0.9999991847971307, iteration: 52557
loss: 1.0256608724594116,grad_norm: 0.9424770049700852, iteration: 52558
loss: 1.0513031482696533,grad_norm: 0.9999996747264212, iteration: 52559
loss: 1.0223801136016846,grad_norm: 0.9999992956252644, iteration: 52560
loss: 1.0076478719711304,grad_norm: 0.999998958672437, iteration: 52561
loss: 1.0067195892333984,grad_norm: 0.999999097633121, iteration: 52562
loss: 0.9900213479995728,grad_norm: 0.8794082926064, iteration: 52563
loss: 0.9771530628204346,grad_norm: 0.9999990646488738, iteration: 52564
loss: 1.021129846572876,grad_norm: 0.965179659143733, iteration: 52565
loss: 0.9962019324302673,grad_norm: 0.8788066259827685, iteration: 52566
loss: 1.0034780502319336,grad_norm: 0.9556369287120504, iteration: 52567
loss: 0.9838570952415466,grad_norm: 0.9999990097082048, iteration: 52568
loss: 1.0171942710876465,grad_norm: 0.992146297652701, iteration: 52569
loss: 1.00474214553833,grad_norm: 0.9427640076965564, iteration: 52570
loss: 0.9916775822639465,grad_norm: 0.9091725372187848, iteration: 52571
loss: 0.9650363922119141,grad_norm: 0.9999991256358467, iteration: 52572
loss: 1.0255719423294067,grad_norm: 0.9999991947778475, iteration: 52573
loss: 0.9641063809394836,grad_norm: 0.999999053814259, iteration: 52574
loss: 1.001450777053833,grad_norm: 0.9999990564918967, iteration: 52575
loss: 0.9819746017456055,grad_norm: 0.9895207645570062, iteration: 52576
loss: 1.0177271366119385,grad_norm: 0.8980371055579514, iteration: 52577
loss: 0.9718939065933228,grad_norm: 0.999999061543249, iteration: 52578
loss: 0.9859785437583923,grad_norm: 0.9517374703360354, iteration: 52579
loss: 1.0266050100326538,grad_norm: 0.9999992060601469, iteration: 52580
loss: 1.1205801963806152,grad_norm: 0.999999440268428, iteration: 52581
loss: 1.0214581489562988,grad_norm: 0.9999990691055584, iteration: 52582
loss: 0.9862600564956665,grad_norm: 0.9999990578128323, iteration: 52583
loss: 0.9849754571914673,grad_norm: 0.9999991841665127, iteration: 52584
loss: 0.9947140216827393,grad_norm: 0.9999990280634765, iteration: 52585
loss: 0.9492166638374329,grad_norm: 0.9999991077491915, iteration: 52586
loss: 1.013553261756897,grad_norm: 0.8691463679428449, iteration: 52587
loss: 0.9853118062019348,grad_norm: 0.9244082750665965, iteration: 52588
loss: 0.9964886903762817,grad_norm: 0.9999991510075682, iteration: 52589
loss: 0.9782238006591797,grad_norm: 0.9844585728101898, iteration: 52590
loss: 1.0055155754089355,grad_norm: 0.9999993158363423, iteration: 52591
loss: 1.0304428339004517,grad_norm: 0.9999990881780412, iteration: 52592
loss: 0.9835703372955322,grad_norm: 0.9999993691701627, iteration: 52593
loss: 0.9959302544593811,grad_norm: 0.999999040887642, iteration: 52594
loss: 0.9941575527191162,grad_norm: 0.9732489288362943, iteration: 52595
loss: 1.0355361700057983,grad_norm: 0.9999993040114951, iteration: 52596
loss: 1.0069829225540161,grad_norm: 0.9999989777307999, iteration: 52597
loss: 1.003294587135315,grad_norm: 0.9293950451888721, iteration: 52598
loss: 1.0138522386550903,grad_norm: 0.9999994293210206, iteration: 52599
loss: 0.9821313619613647,grad_norm: 0.981261077563069, iteration: 52600
loss: 0.9827798008918762,grad_norm: 0.9292890049446, iteration: 52601
loss: 1.0170798301696777,grad_norm: 0.8784990218362901, iteration: 52602
loss: 1.0322726964950562,grad_norm: 0.9628844588045166, iteration: 52603
loss: 0.9837108254432678,grad_norm: 0.9999991102553377, iteration: 52604
loss: 0.9915457367897034,grad_norm: 0.9999990658226845, iteration: 52605
loss: 1.000648856163025,grad_norm: 0.9999991779687086, iteration: 52606
loss: 0.9796247482299805,grad_norm: 0.9999990978416365, iteration: 52607
loss: 1.0395063161849976,grad_norm: 0.9999992084866346, iteration: 52608
loss: 0.9773001074790955,grad_norm: 0.9999991194917264, iteration: 52609
loss: 0.9880622029304504,grad_norm: 0.9999990896657448, iteration: 52610
loss: 1.0063393115997314,grad_norm: 0.9999992724736261, iteration: 52611
loss: 0.9868960380554199,grad_norm: 0.8718292052355638, iteration: 52612
loss: 0.990661084651947,grad_norm: 0.9999993073749, iteration: 52613
loss: 0.9962190985679626,grad_norm: 0.9999990380493331, iteration: 52614
loss: 0.9807633757591248,grad_norm: 0.9999991517429139, iteration: 52615
loss: 0.9643175005912781,grad_norm: 0.8754568989459736, iteration: 52616
loss: 0.9855276346206665,grad_norm: 0.9728022465698489, iteration: 52617
loss: 1.0015828609466553,grad_norm: 0.9999993836756677, iteration: 52618
loss: 1.001564621925354,grad_norm: 0.837354371583725, iteration: 52619
loss: 0.9865967631340027,grad_norm: 0.9861653936791617, iteration: 52620
loss: 0.979448139667511,grad_norm: 0.982676210965739, iteration: 52621
loss: 1.004113793373108,grad_norm: 0.8854157464496732, iteration: 52622
loss: 1.002455711364746,grad_norm: 0.9999991114074462, iteration: 52623
loss: 1.013784646987915,grad_norm: 0.9999992853220858, iteration: 52624
loss: 1.0174094438552856,grad_norm: 0.8678375330244631, iteration: 52625
loss: 0.9846438765525818,grad_norm: 0.8760357911424772, iteration: 52626
loss: 0.9903106093406677,grad_norm: 0.9999990395239805, iteration: 52627
loss: 1.001651406288147,grad_norm: 0.9380858187349228, iteration: 52628
loss: 1.0614646673202515,grad_norm: 0.9999991423652449, iteration: 52629
loss: 1.0109176635742188,grad_norm: 0.9999991685022186, iteration: 52630
loss: 0.9503737092018127,grad_norm: 0.999999066908778, iteration: 52631
loss: 0.9533805251121521,grad_norm: 0.9999990034491129, iteration: 52632
loss: 0.9963559508323669,grad_norm: 0.9999991879860948, iteration: 52633
loss: 0.96925950050354,grad_norm: 0.9999989789839678, iteration: 52634
loss: 1.0226707458496094,grad_norm: 0.9999990886383849, iteration: 52635
loss: 1.0168119668960571,grad_norm: 0.7936927381681702, iteration: 52636
loss: 0.9988667368888855,grad_norm: 0.9999991834096738, iteration: 52637
loss: 1.0228462219238281,grad_norm: 0.9999992595941712, iteration: 52638
loss: 0.9778266549110413,grad_norm: 0.9999989650076245, iteration: 52639
loss: 1.0312801599502563,grad_norm: 0.9999990952278038, iteration: 52640
loss: 0.9927046895027161,grad_norm: 0.9999989540305616, iteration: 52641
loss: 0.9768559336662292,grad_norm: 0.9984320297878236, iteration: 52642
loss: 0.9555545449256897,grad_norm: 0.9675577147852646, iteration: 52643
loss: 1.0192158222198486,grad_norm: 0.9999991982447833, iteration: 52644
loss: 0.9825811982154846,grad_norm: 0.9687554032261607, iteration: 52645
loss: 1.0377593040466309,grad_norm: 0.999999209333636, iteration: 52646
loss: 0.965254545211792,grad_norm: 0.9999990693243621, iteration: 52647
loss: 1.0119589567184448,grad_norm: 0.7981167242200267, iteration: 52648
loss: 0.9820476174354553,grad_norm: 0.978440360285463, iteration: 52649
loss: 1.0216784477233887,grad_norm: 0.9999990814739076, iteration: 52650
loss: 0.9493891596794128,grad_norm: 0.9999992925798538, iteration: 52651
loss: 1.018018364906311,grad_norm: 0.999999149762107, iteration: 52652
loss: 1.019119381904602,grad_norm: 0.9999994739863665, iteration: 52653
loss: 1.0504940748214722,grad_norm: 0.999999162789998, iteration: 52654
loss: 0.9786203503608704,grad_norm: 0.9837268972382497, iteration: 52655
loss: 0.9623539447784424,grad_norm: 0.9744981541423848, iteration: 52656
loss: 0.9953281283378601,grad_norm: 0.9999990231403822, iteration: 52657
loss: 1.0474462509155273,grad_norm: 0.999999068726626, iteration: 52658
loss: 1.0419951677322388,grad_norm: 0.990837702872224, iteration: 52659
loss: 1.0111207962036133,grad_norm: 0.9999992206776931, iteration: 52660
loss: 0.9906688332557678,grad_norm: 0.9843054706264851, iteration: 52661
loss: 0.9862822890281677,grad_norm: 0.9356754741908304, iteration: 52662
loss: 1.0365184545516968,grad_norm: 0.9999990545381514, iteration: 52663
loss: 0.955994725227356,grad_norm: 0.9999992070067625, iteration: 52664
loss: 0.967545747756958,grad_norm: 0.9725610847993199, iteration: 52665
loss: 1.001336693763733,grad_norm: 0.999999039960298, iteration: 52666
loss: 1.011475920677185,grad_norm: 0.9999990833796758, iteration: 52667
loss: 1.0521137714385986,grad_norm: 0.9870128996767732, iteration: 52668
loss: 0.973052978515625,grad_norm: 0.9999992899933439, iteration: 52669
loss: 0.9763443470001221,grad_norm: 0.9209187954783509, iteration: 52670
loss: 0.9873015880584717,grad_norm: 0.9999998714006398, iteration: 52671
loss: 1.0280146598815918,grad_norm: 0.9999991851976279, iteration: 52672
loss: 1.0191911458969116,grad_norm: 0.8703730592564384, iteration: 52673
loss: 1.0448946952819824,grad_norm: 0.7996536358415892, iteration: 52674
loss: 0.9931496977806091,grad_norm: 0.8211318044576459, iteration: 52675
loss: 0.9940061569213867,grad_norm: 0.9690286039817078, iteration: 52676
loss: 0.9994361400604248,grad_norm: 0.9999990994810285, iteration: 52677
loss: 1.0431166887283325,grad_norm: 0.9999990409615903, iteration: 52678
loss: 1.0065395832061768,grad_norm: 0.8988775136937763, iteration: 52679
loss: 1.0139222145080566,grad_norm: 0.9999991618537994, iteration: 52680
loss: 0.9708988666534424,grad_norm: 0.9999992088964215, iteration: 52681
loss: 1.030558705329895,grad_norm: 0.9999990680232472, iteration: 52682
loss: 0.9891064167022705,grad_norm: 0.9999992272272789, iteration: 52683
loss: 0.9888050556182861,grad_norm: 0.977701250225332, iteration: 52684
loss: 0.9985817074775696,grad_norm: 0.9999990568391378, iteration: 52685
loss: 0.9947821497917175,grad_norm: 0.9999990847225882, iteration: 52686
loss: 0.9726913571357727,grad_norm: 0.7712678216696086, iteration: 52687
loss: 0.9927619695663452,grad_norm: 0.9646698666252648, iteration: 52688
loss: 0.9497556686401367,grad_norm: 0.9999992229348479, iteration: 52689
loss: 1.0231764316558838,grad_norm: 0.9999991046401225, iteration: 52690
loss: 1.0345784425735474,grad_norm: 0.9999992252525349, iteration: 52691
loss: 1.0112305879592896,grad_norm: 0.8964558777691098, iteration: 52692
loss: 0.9971947073936462,grad_norm: 0.9999991935886943, iteration: 52693
loss: 0.9949501752853394,grad_norm: 0.965819325233802, iteration: 52694
loss: 1.018829584121704,grad_norm: 0.9999990559401069, iteration: 52695
loss: 0.9828627109527588,grad_norm: 0.9999991691820695, iteration: 52696
loss: 1.0559744834899902,grad_norm: 0.9999997710480067, iteration: 52697
loss: 0.976764976978302,grad_norm: 0.9999991656322694, iteration: 52698
loss: 1.0327329635620117,grad_norm: 0.8421194133854043, iteration: 52699
loss: 1.0129066705703735,grad_norm: 0.9999992360562686, iteration: 52700
loss: 1.0183171033859253,grad_norm: 0.9999999235717103, iteration: 52701
loss: 1.0259771347045898,grad_norm: 0.9783235572486697, iteration: 52702
loss: 1.0385395288467407,grad_norm: 0.9999992216669001, iteration: 52703
loss: 0.9738723039627075,grad_norm: 0.9085678922699141, iteration: 52704
loss: 0.9873661994934082,grad_norm: 0.9999989639570739, iteration: 52705
loss: 1.0099185705184937,grad_norm: 0.9910609048096534, iteration: 52706
loss: 0.9993929266929626,grad_norm: 0.9999991426207842, iteration: 52707
loss: 1.005689263343811,grad_norm: 0.931468644857957, iteration: 52708
loss: 0.9630514979362488,grad_norm: 0.9560956451681171, iteration: 52709
loss: 0.9539657235145569,grad_norm: 0.9999991646532225, iteration: 52710
loss: 0.9815464615821838,grad_norm: 0.9999991901828148, iteration: 52711
loss: 1.0076411962509155,grad_norm: 0.9999991571367385, iteration: 52712
loss: 0.9961169958114624,grad_norm: 0.9245962219139292, iteration: 52713
loss: 0.9919601082801819,grad_norm: 0.8775968513511141, iteration: 52714
loss: 1.0220369100570679,grad_norm: 0.9999995762094106, iteration: 52715
loss: 1.0652128458023071,grad_norm: 0.9999990932104287, iteration: 52716
loss: 1.0202338695526123,grad_norm: 0.8299445529827468, iteration: 52717
loss: 0.9719228148460388,grad_norm: 0.9609640123848514, iteration: 52718
loss: 0.9887683987617493,grad_norm: 0.9014918918207813, iteration: 52719
loss: 1.0018329620361328,grad_norm: 0.9475057487232906, iteration: 52720
loss: 0.9682267904281616,grad_norm: 0.9999989980208825, iteration: 52721
loss: 1.0240739583969116,grad_norm: 0.9999990504600212, iteration: 52722
loss: 1.0262489318847656,grad_norm: 0.9999989377187413, iteration: 52723
loss: 1.0428544282913208,grad_norm: 0.8883594254807525, iteration: 52724
loss: 1.0096876621246338,grad_norm: 0.9999991775500631, iteration: 52725
loss: 0.9687943458557129,grad_norm: 0.747346013740047, iteration: 52726
loss: 1.006556510925293,grad_norm: 0.9999990418221874, iteration: 52727
loss: 1.013750433921814,grad_norm: 0.9675425669244296, iteration: 52728
loss: 1.0344246625900269,grad_norm: 0.9999990597926438, iteration: 52729
loss: 1.006341814994812,grad_norm: 0.9999999193318955, iteration: 52730
loss: 0.9841167330741882,grad_norm: 0.9035749685987579, iteration: 52731
loss: 0.9988844394683838,grad_norm: 0.9999992401294361, iteration: 52732
loss: 1.0278387069702148,grad_norm: 0.9999990470574328, iteration: 52733
loss: 0.9953004717826843,grad_norm: 0.9999992169421061, iteration: 52734
loss: 1.0048918724060059,grad_norm: 0.9348767567600553, iteration: 52735
loss: 0.9898699522018433,grad_norm: 0.9999990530287903, iteration: 52736
loss: 0.9732517004013062,grad_norm: 0.9435094069783787, iteration: 52737
loss: 1.002882719039917,grad_norm: 0.9999990654693632, iteration: 52738
loss: 1.0185577869415283,grad_norm: 0.9999996276031672, iteration: 52739
loss: 0.9895790219306946,grad_norm: 0.9999991832185061, iteration: 52740
loss: 1.0367435216903687,grad_norm: 0.9999989926993175, iteration: 52741
loss: 0.9844395518302917,grad_norm: 0.9999991535642037, iteration: 52742
loss: 0.9919329881668091,grad_norm: 0.8243795616754426, iteration: 52743
loss: 0.9725611805915833,grad_norm: 0.8567091381226329, iteration: 52744
loss: 0.960283100605011,grad_norm: 0.9999992072764506, iteration: 52745
loss: 0.993024468421936,grad_norm: 0.7979825436272746, iteration: 52746
loss: 1.003011703491211,grad_norm: 0.9994317666470168, iteration: 52747
loss: 1.017159104347229,grad_norm: 0.9840714798855003, iteration: 52748
loss: 0.9905094504356384,grad_norm: 0.9999990144574615, iteration: 52749
loss: 1.002608060836792,grad_norm: 0.8392575280796237, iteration: 52750
loss: 1.023475170135498,grad_norm: 0.9999990210728072, iteration: 52751
loss: 1.0199038982391357,grad_norm: 0.999999017879081, iteration: 52752
loss: 0.9656267762184143,grad_norm: 0.9436113854637105, iteration: 52753
loss: 1.0131016969680786,grad_norm: 0.967621015416484, iteration: 52754
loss: 0.9559504389762878,grad_norm: 0.9169885426689806, iteration: 52755
loss: 1.0004346370697021,grad_norm: 0.9088812902823945, iteration: 52756
loss: 1.002920389175415,grad_norm: 0.9999991916543496, iteration: 52757
loss: 0.9973534345626831,grad_norm: 0.9999992206548939, iteration: 52758
loss: 0.9825465679168701,grad_norm: 0.9999991160353142, iteration: 52759
loss: 1.0273903608322144,grad_norm: 0.9135636968287859, iteration: 52760
loss: 1.032802700996399,grad_norm: 0.8709434471281741, iteration: 52761
loss: 1.0378576517105103,grad_norm: 0.9466952496959901, iteration: 52762
loss: 0.9964630603790283,grad_norm: 0.9999991395361373, iteration: 52763
loss: 0.9991644024848938,grad_norm: 0.967898567800522, iteration: 52764
loss: 0.9915823936462402,grad_norm: 0.8759835133219261, iteration: 52765
loss: 0.9673345685005188,grad_norm: 0.9612281814248063, iteration: 52766
loss: 0.9927350878715515,grad_norm: 0.916236116017399, iteration: 52767
loss: 0.9760308265686035,grad_norm: 0.9999989329268362, iteration: 52768
loss: 1.0020769834518433,grad_norm: 0.9721344017592587, iteration: 52769
loss: 0.9969446659088135,grad_norm: 0.9999996460697758, iteration: 52770
loss: 1.0078245401382446,grad_norm: 0.9391008353442444, iteration: 52771
loss: 1.0128213167190552,grad_norm: 0.9999993029570489, iteration: 52772
loss: 1.0479247570037842,grad_norm: 0.9568556633657277, iteration: 52773
loss: 1.0078123807907104,grad_norm: 0.924858412434629, iteration: 52774
loss: 1.0169377326965332,grad_norm: 0.8894624207801229, iteration: 52775
loss: 1.008723497390747,grad_norm: 0.999999759196642, iteration: 52776
loss: 1.035273790359497,grad_norm: 0.8546319691663519, iteration: 52777
loss: 0.9909165501594543,grad_norm: 0.9685187971565684, iteration: 52778
loss: 0.990153968334198,grad_norm: 0.9826895021637004, iteration: 52779
loss: 1.0304796695709229,grad_norm: 0.9999993409169519, iteration: 52780
loss: 0.9995707869529724,grad_norm: 0.9999990352433193, iteration: 52781
loss: 0.9657255411148071,grad_norm: 0.9068744269682014, iteration: 52782
loss: 0.9908866882324219,grad_norm: 0.9999992630213862, iteration: 52783
loss: 1.0195796489715576,grad_norm: 0.9999992775145388, iteration: 52784
loss: 1.017910122871399,grad_norm: 0.9999992149182103, iteration: 52785
loss: 0.9804076552391052,grad_norm: 0.9999991795212231, iteration: 52786
loss: 1.035145878791809,grad_norm: 0.9999990819624424, iteration: 52787
loss: 1.018316388130188,grad_norm: 0.9999991592810095, iteration: 52788
loss: 0.9837404489517212,grad_norm: 0.9999990775800288, iteration: 52789
loss: 0.9877002835273743,grad_norm: 0.8029229169881648, iteration: 52790
loss: 0.9967640042304993,grad_norm: 0.9999990408227483, iteration: 52791
loss: 1.0081580877304077,grad_norm: 0.9999991081250279, iteration: 52792
loss: 1.0091474056243896,grad_norm: 0.9999992465116905, iteration: 52793
loss: 0.9803710579872131,grad_norm: 0.9104636644779873, iteration: 52794
loss: 1.047594428062439,grad_norm: 0.999999048039758, iteration: 52795
loss: 1.023730993270874,grad_norm: 0.9665787709819899, iteration: 52796
loss: 1.0197725296020508,grad_norm: 0.9999993147545551, iteration: 52797
loss: 1.034030795097351,grad_norm: 0.9654410223258989, iteration: 52798
loss: 1.0208923816680908,grad_norm: 0.9359152160813236, iteration: 52799
loss: 0.9552600979804993,grad_norm: 0.9552541993820852, iteration: 52800
loss: 0.9961714744567871,grad_norm: 0.9999992433471517, iteration: 52801
loss: 0.9877534508705139,grad_norm: 0.9919736280431155, iteration: 52802
loss: 1.0016241073608398,grad_norm: 0.7639514726482832, iteration: 52803
loss: 0.9832100868225098,grad_norm: 0.9746033971597926, iteration: 52804
loss: 1.0427792072296143,grad_norm: 0.9517421084201061, iteration: 52805
loss: 1.0335537195205688,grad_norm: 0.9999996955815302, iteration: 52806
loss: 0.9969484210014343,grad_norm: 0.8803035400940492, iteration: 52807
loss: 0.9714287519454956,grad_norm: 0.9475156426063447, iteration: 52808
loss: 1.036259412765503,grad_norm: 0.8870892334343866, iteration: 52809
loss: 0.9793236255645752,grad_norm: 0.9999991136129565, iteration: 52810
loss: 0.9981130957603455,grad_norm: 0.9999992640079023, iteration: 52811
loss: 1.05177903175354,grad_norm: 0.9999998811782806, iteration: 52812
loss: 0.9772882461547852,grad_norm: 0.9999991699723675, iteration: 52813
loss: 0.988383948802948,grad_norm: 0.9220775496464815, iteration: 52814
loss: 1.000283122062683,grad_norm: 0.9999991595293867, iteration: 52815
loss: 0.984362781047821,grad_norm: 0.9186652343153382, iteration: 52816
loss: 1.0231188535690308,grad_norm: 0.8193981757716874, iteration: 52817
loss: 1.017032504081726,grad_norm: 0.9999991819533149, iteration: 52818
loss: 1.0116101503372192,grad_norm: 0.9766900309226973, iteration: 52819
loss: 1.0136306285858154,grad_norm: 0.8745235793352689, iteration: 52820
loss: 0.9726231098175049,grad_norm: 0.9999990141929446, iteration: 52821
loss: 0.9916015267372131,grad_norm: 0.9854400629748589, iteration: 52822
loss: 1.0065488815307617,grad_norm: 0.8698636657270893, iteration: 52823
loss: 1.004009485244751,grad_norm: 0.7936031436801505, iteration: 52824
loss: 0.9910096526145935,grad_norm: 0.9999992641416388, iteration: 52825
loss: 1.0203319787979126,grad_norm: 0.9999991908333987, iteration: 52826
loss: 1.0067890882492065,grad_norm: 0.9999992042156498, iteration: 52827
loss: 1.0083013772964478,grad_norm: 0.8762150874344492, iteration: 52828
loss: 1.007841944694519,grad_norm: 0.7884515728662218, iteration: 52829
loss: 1.0187426805496216,grad_norm: 0.9999995315572147, iteration: 52830
loss: 0.9973639845848083,grad_norm: 0.9918373905558444, iteration: 52831
loss: 0.9851725101470947,grad_norm: 0.9095850010164466, iteration: 52832
loss: 1.100163221359253,grad_norm: 0.9999993977570864, iteration: 52833
loss: 0.9677894711494446,grad_norm: 0.9999991268862439, iteration: 52834
loss: 1.0108085870742798,grad_norm: 0.9999990232343221, iteration: 52835
loss: 1.0380501747131348,grad_norm: 0.9999990482953046, iteration: 52836
loss: 1.0231666564941406,grad_norm: 0.9999990976828662, iteration: 52837
loss: 1.0433815717697144,grad_norm: 0.8925981524143244, iteration: 52838
loss: 1.0025756359100342,grad_norm: 0.9999989926953016, iteration: 52839
loss: 0.9759426712989807,grad_norm: 0.9999990862353573, iteration: 52840
loss: 0.9578098058700562,grad_norm: 0.8621804668381533, iteration: 52841
loss: 1.0289102792739868,grad_norm: 0.9999990780683892, iteration: 52842
loss: 0.9911975264549255,grad_norm: 0.9999992041262721, iteration: 52843
loss: 1.0063860416412354,grad_norm: 0.999999258511562, iteration: 52844
loss: 1.0241905450820923,grad_norm: 0.8510204983324501, iteration: 52845
loss: 0.9843825101852417,grad_norm: 0.9808807428351793, iteration: 52846
loss: 1.036104440689087,grad_norm: 0.9999992870341008, iteration: 52847
loss: 0.9619361162185669,grad_norm: 0.9966947440084468, iteration: 52848
loss: 1.0082502365112305,grad_norm: 0.9806789890571493, iteration: 52849
loss: 0.9790029525756836,grad_norm: 0.9867243661075525, iteration: 52850
loss: 0.9932600259780884,grad_norm: 0.8807218249595227, iteration: 52851
loss: 1.0271128416061401,grad_norm: 0.9999992133140001, iteration: 52852
loss: 0.9842759370803833,grad_norm: 0.9719137801477742, iteration: 52853
loss: 0.988466203212738,grad_norm: 0.9732667872187168, iteration: 52854
loss: 0.9876100420951843,grad_norm: 0.9999991949497054, iteration: 52855
loss: 0.99217289686203,grad_norm: 0.9999990592975572, iteration: 52856
loss: 1.0323994159698486,grad_norm: 0.9999991723640225, iteration: 52857
loss: 0.9929661154747009,grad_norm: 0.9999989916548397, iteration: 52858
loss: 1.013479471206665,grad_norm: 0.9126331944367753, iteration: 52859
loss: 0.9901815056800842,grad_norm: 0.9999990776971902, iteration: 52860
loss: 1.0007903575897217,grad_norm: 0.9999991042174793, iteration: 52861
loss: 0.9995622038841248,grad_norm: 0.9614734170101737, iteration: 52862
loss: 1.0000756978988647,grad_norm: 0.9865983113524524, iteration: 52863
loss: 0.9824132919311523,grad_norm: 0.8484889766628786, iteration: 52864
loss: 1.041896939277649,grad_norm: 0.9795652002636587, iteration: 52865
loss: 1.098524808883667,grad_norm: 0.9999991592329661, iteration: 52866
loss: 0.9825388193130493,grad_norm: 0.9580875539334545, iteration: 52867
loss: 1.0965349674224854,grad_norm: 0.9999998813636727, iteration: 52868
loss: 1.0240094661712646,grad_norm: 0.9038762981841301, iteration: 52869
loss: 1.017974615097046,grad_norm: 0.9999990962321671, iteration: 52870
loss: 1.021278977394104,grad_norm: 0.9999992380672718, iteration: 52871
loss: 0.9727374911308289,grad_norm: 0.9472636489627176, iteration: 52872
loss: 1.012261152267456,grad_norm: 0.9999991553059197, iteration: 52873
loss: 1.0012861490249634,grad_norm: 0.9999992450532906, iteration: 52874
loss: 0.9919006824493408,grad_norm: 0.9999992049078109, iteration: 52875
loss: 1.0096278190612793,grad_norm: 0.999999248641004, iteration: 52876
loss: 0.9820985198020935,grad_norm: 0.817444647908806, iteration: 52877
loss: 1.0082484483718872,grad_norm: 0.9734381945735044, iteration: 52878
loss: 1.0030183792114258,grad_norm: 0.9999990604640889, iteration: 52879
loss: 0.9841682314872742,grad_norm: 0.947478008987981, iteration: 52880
loss: 0.9981436133384705,grad_norm: 0.9959518270904196, iteration: 52881
loss: 1.0048620700836182,grad_norm: 0.9999990773773482, iteration: 52882
loss: 1.0005558729171753,grad_norm: 0.9387591116794252, iteration: 52883
loss: 0.9730559587478638,grad_norm: 0.9999992209362729, iteration: 52884
loss: 0.9699220061302185,grad_norm: 0.8890389588872154, iteration: 52885
loss: 1.0614255666732788,grad_norm: 0.9999993924545375, iteration: 52886
loss: 1.0181211233139038,grad_norm: 0.9981006341023934, iteration: 52887
loss: 1.0206516981124878,grad_norm: 0.9999990982642128, iteration: 52888
loss: 1.0227240324020386,grad_norm: 0.8668671540262193, iteration: 52889
loss: 0.9827109575271606,grad_norm: 0.8870453489120482, iteration: 52890
loss: 1.023990511894226,grad_norm: 0.9298325511812232, iteration: 52891
loss: 1.031757116317749,grad_norm: 0.8632775105563265, iteration: 52892
loss: 1.0160144567489624,grad_norm: 0.9999991094414241, iteration: 52893
loss: 1.0212706327438354,grad_norm: 0.9999991236482431, iteration: 52894
loss: 0.9956252574920654,grad_norm: 0.987466269047607, iteration: 52895
loss: 0.9768631458282471,grad_norm: 0.9552570706590133, iteration: 52896
loss: 0.9705278873443604,grad_norm: 0.9485209413840809, iteration: 52897
loss: 1.0453017950057983,grad_norm: 0.9999992012892215, iteration: 52898
loss: 1.0101889371871948,grad_norm: 0.9999991808176746, iteration: 52899
loss: 0.9772028923034668,grad_norm: 0.9999991857013394, iteration: 52900
loss: 0.9921364784240723,grad_norm: 0.9874735234970435, iteration: 52901
loss: 0.9946398735046387,grad_norm: 0.90591246713414, iteration: 52902
loss: 0.9601495862007141,grad_norm: 0.963615391894099, iteration: 52903
loss: 1.013056993484497,grad_norm: 0.9999991854654614, iteration: 52904
loss: 1.0229512453079224,grad_norm: 0.9246337655006873, iteration: 52905
loss: 1.0059638023376465,grad_norm: 0.9999993134720402, iteration: 52906
loss: 0.9517870545387268,grad_norm: 0.9999992179427254, iteration: 52907
loss: 0.9804861545562744,grad_norm: 0.9519070764975659, iteration: 52908
loss: 1.005179524421692,grad_norm: 0.8094375693357873, iteration: 52909
loss: 1.0186253786087036,grad_norm: 0.9772666741306749, iteration: 52910
loss: 0.9906970858573914,grad_norm: 0.9622094008979766, iteration: 52911
loss: 0.9995222091674805,grad_norm: 0.8866579410181397, iteration: 52912
loss: 1.0318193435668945,grad_norm: 0.9502874860384288, iteration: 52913
loss: 1.0274994373321533,grad_norm: 0.8766979343396846, iteration: 52914
loss: 0.9863242506980896,grad_norm: 0.9033594361073018, iteration: 52915
loss: 1.0366113185882568,grad_norm: 0.9999992290316451, iteration: 52916
loss: 1.0067236423492432,grad_norm: 0.9617317640287016, iteration: 52917
loss: 1.0216046571731567,grad_norm: 0.9801357880759095, iteration: 52918
loss: 1.0417262315750122,grad_norm: 0.9999995021125414, iteration: 52919
loss: 1.0132216215133667,grad_norm: 0.9999992226586516, iteration: 52920
loss: 0.9513117671012878,grad_norm: 0.8724508647181699, iteration: 52921
loss: 0.9772061705589294,grad_norm: 0.9057689374779861, iteration: 52922
loss: 0.9789583086967468,grad_norm: 0.9999991230255965, iteration: 52923
loss: 1.0128936767578125,grad_norm: 0.992164293765323, iteration: 52924
loss: 0.9913649559020996,grad_norm: 0.9999991279464407, iteration: 52925
loss: 1.0013542175292969,grad_norm: 0.999999031743467, iteration: 52926
loss: 0.9687374830245972,grad_norm: 0.965731194806573, iteration: 52927
loss: 1.0387722253799438,grad_norm: 0.9999991345048959, iteration: 52928
loss: 0.9957128167152405,grad_norm: 0.999999126031811, iteration: 52929
loss: 0.9996517896652222,grad_norm: 0.9999989952356713, iteration: 52930
loss: 1.0308383703231812,grad_norm: 0.9999990311250477, iteration: 52931
loss: 0.9874156713485718,grad_norm: 0.9999990982038554, iteration: 52932
loss: 0.9621248841285706,grad_norm: 0.9999993486137027, iteration: 52933
loss: 1.0319169759750366,grad_norm: 0.9999990457740934, iteration: 52934
loss: 0.9729666709899902,grad_norm: 0.9122604354215648, iteration: 52935
loss: 0.9966445565223694,grad_norm: 0.9999997826590062, iteration: 52936
loss: 1.0240442752838135,grad_norm: 0.7975116457701446, iteration: 52937
loss: 1.0157216787338257,grad_norm: 0.7919090538088335, iteration: 52938
loss: 1.0355188846588135,grad_norm: 0.9999990011601447, iteration: 52939
loss: 0.9898435473442078,grad_norm: 0.9999993054443497, iteration: 52940
loss: 1.0365526676177979,grad_norm: 0.9999990865944881, iteration: 52941
loss: 1.013435959815979,grad_norm: 0.999999268578863, iteration: 52942
loss: 0.9831775426864624,grad_norm: 0.9674041638380024, iteration: 52943
loss: 1.0002185106277466,grad_norm: 0.8442953797907685, iteration: 52944
loss: 1.0113933086395264,grad_norm: 0.8637007028956539, iteration: 52945
loss: 1.0029898881912231,grad_norm: 0.9999991012875509, iteration: 52946
loss: 1.0015697479248047,grad_norm: 0.8663504882680428, iteration: 52947
loss: 0.985872745513916,grad_norm: 0.9999991983757616, iteration: 52948
loss: 0.9959525465965271,grad_norm: 0.9999992193472815, iteration: 52949
loss: 0.9845455288887024,grad_norm: 0.929113994616886, iteration: 52950
loss: 1.0262070894241333,grad_norm: 0.9999991062130199, iteration: 52951
loss: 0.9982506632804871,grad_norm: 0.9999991540520182, iteration: 52952
loss: 1.0182886123657227,grad_norm: 0.9765685424704674, iteration: 52953
loss: 0.9566712379455566,grad_norm: 0.9734105165930146, iteration: 52954
loss: 1.0019478797912598,grad_norm: 0.9923654835358661, iteration: 52955
loss: 1.0239957571029663,grad_norm: 0.9999991993562917, iteration: 52956
loss: 0.988410472869873,grad_norm: 0.9999990624113918, iteration: 52957
loss: 1.020591139793396,grad_norm: 0.8891573118704332, iteration: 52958
loss: 1.0526366233825684,grad_norm: 0.9999992518139166, iteration: 52959
loss: 0.9705458283424377,grad_norm: 0.866960048590059, iteration: 52960
loss: 1.0021075010299683,grad_norm: 0.9860115561988625, iteration: 52961
loss: 0.9694127440452576,grad_norm: 0.9999991594726289, iteration: 52962
loss: 0.9860067367553711,grad_norm: 0.9999990615206554, iteration: 52963
loss: 1.0438271760940552,grad_norm: 0.9574995560936249, iteration: 52964
loss: 1.0061911344528198,grad_norm: 0.9999991372202511, iteration: 52965
loss: 0.9790642261505127,grad_norm: 0.9833342141956053, iteration: 52966
loss: 1.0020864009857178,grad_norm: 0.9999991240366733, iteration: 52967
loss: 0.9717247486114502,grad_norm: 0.9799951281820006, iteration: 52968
loss: 1.0234254598617554,grad_norm: 0.9999991120667063, iteration: 52969
loss: 0.9703467488288879,grad_norm: 0.9292974357972469, iteration: 52970
loss: 1.0152915716171265,grad_norm: 0.99544245387828, iteration: 52971
loss: 1.0263569355010986,grad_norm: 0.9999993949207877, iteration: 52972
loss: 1.0042065382003784,grad_norm: 0.9094235311366021, iteration: 52973
loss: 1.0811609029769897,grad_norm: 0.9999995009717381, iteration: 52974
loss: 1.0230402946472168,grad_norm: 0.9999991062549162, iteration: 52975
loss: 1.005367398262024,grad_norm: 0.9619717118817233, iteration: 52976
loss: 0.9929724931716919,grad_norm: 0.7774298868856129, iteration: 52977
loss: 0.9531314373016357,grad_norm: 0.9999989547112944, iteration: 52978
loss: 0.9472845792770386,grad_norm: 0.9067737828372111, iteration: 52979
loss: 1.0232025384902954,grad_norm: 0.9999991624465677, iteration: 52980
loss: 1.0580402612686157,grad_norm: 0.9999991691828546, iteration: 52981
loss: 0.9811841249465942,grad_norm: 0.9999990734391245, iteration: 52982
loss: 1.0221959352493286,grad_norm: 0.9999990158325959, iteration: 52983
loss: 1.0168620347976685,grad_norm: 0.9462312040000966, iteration: 52984
loss: 0.9939782023429871,grad_norm: 0.999999109512601, iteration: 52985
loss: 1.0190569162368774,grad_norm: 0.9853914858590769, iteration: 52986
loss: 0.9857433438301086,grad_norm: 0.9999990203535013, iteration: 52987
loss: 1.0337231159210205,grad_norm: 0.9999990968367625, iteration: 52988
loss: 1.0069977045059204,grad_norm: 0.999999013753932, iteration: 52989
loss: 0.9742467999458313,grad_norm: 0.8025167622195343, iteration: 52990
loss: 1.0063549280166626,grad_norm: 0.9999990027462354, iteration: 52991
loss: 1.0435490608215332,grad_norm: 0.9999990881487183, iteration: 52992
loss: 0.9807647466659546,grad_norm: 0.9460697617961664, iteration: 52993
loss: 1.0367401838302612,grad_norm: 0.9424353785668274, iteration: 52994
loss: 1.0063556432724,grad_norm: 0.9040786077334514, iteration: 52995
loss: 0.9913833737373352,grad_norm: 0.9999993900663753, iteration: 52996
loss: 1.0004230737686157,grad_norm: 0.9439195305894545, iteration: 52997
loss: 1.0484808683395386,grad_norm: 0.9999994895814729, iteration: 52998
loss: 1.01852285861969,grad_norm: 0.9999992645426835, iteration: 52999
loss: 1.0044066905975342,grad_norm: 0.9686585631680156, iteration: 53000
loss: 0.9750696420669556,grad_norm: 0.9760639520048965, iteration: 53001
loss: 0.9828100800514221,grad_norm: 0.999999140487941, iteration: 53002
loss: 0.983041524887085,grad_norm: 0.9367815714555969, iteration: 53003
loss: 1.019310712814331,grad_norm: 0.960308179553709, iteration: 53004
loss: 1.021256685256958,grad_norm: 0.9999991405653192, iteration: 53005
loss: 0.9961341023445129,grad_norm: 0.7715387660313546, iteration: 53006
loss: 0.9983430504798889,grad_norm: 0.9999991282558497, iteration: 53007
loss: 0.9955736398696899,grad_norm: 0.9416424793025925, iteration: 53008
loss: 1.0055687427520752,grad_norm: 0.999999214763328, iteration: 53009
loss: 0.9793598651885986,grad_norm: 0.9999989964346849, iteration: 53010
loss: 0.9800837635993958,grad_norm: 0.8265312121870109, iteration: 53011
loss: 1.0413087606430054,grad_norm: 0.9999995480828877, iteration: 53012
loss: 1.0099992752075195,grad_norm: 0.8940522102259837, iteration: 53013
loss: 1.0054740905761719,grad_norm: 0.8888273464358772, iteration: 53014
loss: 1.031741976737976,grad_norm: 0.985752071719809, iteration: 53015
loss: 1.0129436254501343,grad_norm: 0.9999991012191697, iteration: 53016
loss: 1.016445279121399,grad_norm: 0.9999991771283824, iteration: 53017
loss: 0.9842607378959656,grad_norm: 0.999999081270562, iteration: 53018
loss: 0.997558057308197,grad_norm: 0.7512110495342511, iteration: 53019
loss: 1.0175801515579224,grad_norm: 0.8850472397800816, iteration: 53020
loss: 0.9986100792884827,grad_norm: 0.9999992280927679, iteration: 53021
loss: 1.0004549026489258,grad_norm: 0.9276057835453062, iteration: 53022
loss: 1.0431503057479858,grad_norm: 0.9999990517156228, iteration: 53023
loss: 1.0153521299362183,grad_norm: 0.9630222453319192, iteration: 53024
loss: 1.0806241035461426,grad_norm: 0.9999993449980437, iteration: 53025
loss: 1.0274873971939087,grad_norm: 0.9402008590339048, iteration: 53026
loss: 0.9952442049980164,grad_norm: 0.999999040421033, iteration: 53027
loss: 1.011560320854187,grad_norm: 0.8475825745477115, iteration: 53028
loss: 1.041990876197815,grad_norm: 0.8972082065857001, iteration: 53029
loss: 0.9966561794281006,grad_norm: 0.9008747270109724, iteration: 53030
loss: 1.0101443529129028,grad_norm: 0.9999992176460165, iteration: 53031
loss: 1.0123121738433838,grad_norm: 0.9075993998790908, iteration: 53032
loss: 1.0475115776062012,grad_norm: 0.9999993694940974, iteration: 53033
loss: 1.0262532234191895,grad_norm: 0.9999990997617678, iteration: 53034
loss: 0.9691227078437805,grad_norm: 0.9999992405561449, iteration: 53035
loss: 1.0065016746520996,grad_norm: 0.9886116802929781, iteration: 53036
loss: 1.0096203088760376,grad_norm: 0.9999990033649941, iteration: 53037
loss: 0.9772660732269287,grad_norm: 0.9326782063679641, iteration: 53038
loss: 1.0150272846221924,grad_norm: 0.9999992212229409, iteration: 53039
loss: 1.033140778541565,grad_norm: 0.9999992621144349, iteration: 53040
loss: 1.0178823471069336,grad_norm: 0.9999992401868861, iteration: 53041
loss: 1.019876480102539,grad_norm: 0.9943769857092567, iteration: 53042
loss: 1.0111732482910156,grad_norm: 0.9999992551031253, iteration: 53043
loss: 1.0004656314849854,grad_norm: 0.9999995001063001, iteration: 53044
loss: 1.0428993701934814,grad_norm: 0.9462942043483621, iteration: 53045
loss: 0.9977913498878479,grad_norm: 0.9999991595928394, iteration: 53046
loss: 1.0085252523422241,grad_norm: 0.9999993976183179, iteration: 53047
loss: 0.9850659966468811,grad_norm: 0.8613544511418416, iteration: 53048
loss: 1.021958589553833,grad_norm: 0.9999990896250802, iteration: 53049
loss: 0.9862057566642761,grad_norm: 0.9731755582748586, iteration: 53050
loss: 0.9931878447532654,grad_norm: 0.9116762543148703, iteration: 53051
loss: 1.0129133462905884,grad_norm: 0.9999991515353854, iteration: 53052
loss: 0.9826046228408813,grad_norm: 0.7659971481342044, iteration: 53053
loss: 1.009006381034851,grad_norm: 0.999999111228026, iteration: 53054
loss: 0.983898937702179,grad_norm: 0.9281069910635567, iteration: 53055
loss: 1.0448540449142456,grad_norm: 0.9999989807045111, iteration: 53056
loss: 1.0337085723876953,grad_norm: 0.9840395247362211, iteration: 53057
loss: 1.0207533836364746,grad_norm: 0.9999992724063774, iteration: 53058
loss: 0.9909799695014954,grad_norm: 0.99999927765443, iteration: 53059
loss: 1.0057250261306763,grad_norm: 0.8941449338082266, iteration: 53060
loss: 0.995374858379364,grad_norm: 0.9999989780036395, iteration: 53061
loss: 1.030644416809082,grad_norm: 0.9999990098080846, iteration: 53062
loss: 0.9948872923851013,grad_norm: 0.9415060444151283, iteration: 53063
loss: 1.0205048322677612,grad_norm: 0.9684558938812108, iteration: 53064
loss: 0.9959251880645752,grad_norm: 0.9999990863932268, iteration: 53065
loss: 0.9890913367271423,grad_norm: 0.9999990714727389, iteration: 53066
loss: 1.0039845705032349,grad_norm: 0.9641995192142966, iteration: 53067
loss: 1.0043299198150635,grad_norm: 0.9773140848187524, iteration: 53068
loss: 1.0112942457199097,grad_norm: 0.868165199310041, iteration: 53069
loss: 0.9946747422218323,grad_norm: 0.9999990470659956, iteration: 53070
loss: 1.0015437602996826,grad_norm: 0.9956781890792339, iteration: 53071
loss: 1.02145254611969,grad_norm: 0.9525693951152328, iteration: 53072
loss: 0.9732282161712646,grad_norm: 0.9044302050487109, iteration: 53073
loss: 0.9843016266822815,grad_norm: 0.9999991538414121, iteration: 53074
loss: 0.986354649066925,grad_norm: 0.999999306146663, iteration: 53075
loss: 1.021820306777954,grad_norm: 0.9468244629802837, iteration: 53076
loss: 0.9804415106773376,grad_norm: 0.8814891338000982, iteration: 53077
loss: 1.000186562538147,grad_norm: 0.999999185062382, iteration: 53078
loss: 1.0074199438095093,grad_norm: 0.9999990224779832, iteration: 53079
loss: 1.0328439474105835,grad_norm: 0.9999999898283931, iteration: 53080
loss: 0.9787143468856812,grad_norm: 0.9999990876762246, iteration: 53081
loss: 1.0004609823226929,grad_norm: 0.9511438300865468, iteration: 53082
loss: 1.0236811637878418,grad_norm: 0.9999991173070762, iteration: 53083
loss: 1.0083659887313843,grad_norm: 0.9970411913538015, iteration: 53084
loss: 0.9900147318840027,grad_norm: 0.8547616847495176, iteration: 53085
loss: 1.0024588108062744,grad_norm: 0.9660136351349846, iteration: 53086
loss: 1.0196532011032104,grad_norm: 0.9999990272232139, iteration: 53087
loss: 1.0707519054412842,grad_norm: 0.9999991327789246, iteration: 53088
loss: 0.9973767995834351,grad_norm: 0.9999992551611738, iteration: 53089
loss: 1.0374218225479126,grad_norm: 0.9999990017029063, iteration: 53090
loss: 1.0077569484710693,grad_norm: 0.9999991132002015, iteration: 53091
loss: 1.0183197259902954,grad_norm: 0.999999164806075, iteration: 53092
loss: 0.972745418548584,grad_norm: 0.9101711276045575, iteration: 53093
loss: 0.9822208285331726,grad_norm: 0.998302022294822, iteration: 53094
loss: 1.0018351078033447,grad_norm: 0.9594524946597395, iteration: 53095
loss: 1.0076549053192139,grad_norm: 0.9999990917707438, iteration: 53096
loss: 1.0054153203964233,grad_norm: 0.888482406521218, iteration: 53097
loss: 1.0048412084579468,grad_norm: 0.999998927138448, iteration: 53098
loss: 1.0185784101486206,grad_norm: 0.9999995245504656, iteration: 53099
loss: 0.9719317555427551,grad_norm: 0.9999992272962422, iteration: 53100
loss: 1.0482211112976074,grad_norm: 0.9999995050724934, iteration: 53101
loss: 1.009716272354126,grad_norm: 0.9373653906124536, iteration: 53102
loss: 1.002760887145996,grad_norm: 0.9999989571902407, iteration: 53103
loss: 0.9979283809661865,grad_norm: 0.9983413157062613, iteration: 53104
loss: 1.0047369003295898,grad_norm: 0.9999990599033552, iteration: 53105
loss: 1.0100458860397339,grad_norm: 0.9999991693072087, iteration: 53106
loss: 0.9999814033508301,grad_norm: 0.9999991004296865, iteration: 53107
loss: 1.0309308767318726,grad_norm: 0.9999990486214612, iteration: 53108
loss: 0.9768437743186951,grad_norm: 0.8852390701037715, iteration: 53109
loss: 1.0204315185546875,grad_norm: 0.9433971557331404, iteration: 53110
loss: 1.046139121055603,grad_norm: 0.8544396527653764, iteration: 53111
loss: 1.039536476135254,grad_norm: 0.9999993316661815, iteration: 53112
loss: 0.9939306378364563,grad_norm: 0.8118933127337914, iteration: 53113
loss: 1.0232479572296143,grad_norm: 0.8759252482338297, iteration: 53114
loss: 1.0037238597869873,grad_norm: 0.8955707183656788, iteration: 53115
loss: 1.0056743621826172,grad_norm: 0.9999991835391604, iteration: 53116
loss: 0.981788158416748,grad_norm: 0.9422784904958649, iteration: 53117
loss: 1.0008238554000854,grad_norm: 0.9394707744966646, iteration: 53118
loss: 1.0309460163116455,grad_norm: 0.876433997464957, iteration: 53119
loss: 1.0341315269470215,grad_norm: 0.9410507403986937, iteration: 53120
loss: 0.9865442514419556,grad_norm: 0.8838268789737398, iteration: 53121
loss: 1.0302261114120483,grad_norm: 0.9999997988900018, iteration: 53122
loss: 0.9917953610420227,grad_norm: 0.9690817359091899, iteration: 53123
loss: 1.0123450756072998,grad_norm: 0.9999991502207457, iteration: 53124
loss: 0.969892144203186,grad_norm: 0.9999990880258989, iteration: 53125
loss: 1.0173765420913696,grad_norm: 0.9999991631151971, iteration: 53126
loss: 0.9764519333839417,grad_norm: 0.9574933853368869, iteration: 53127
loss: 0.9707068800926208,grad_norm: 0.9896426000764403, iteration: 53128
loss: 1.0258198976516724,grad_norm: 0.9475962118148026, iteration: 53129
loss: 1.0218292474746704,grad_norm: 0.9999990692775931, iteration: 53130
loss: 0.9709643721580505,grad_norm: 0.9999989861540829, iteration: 53131
loss: 0.9895747303962708,grad_norm: 0.9999991738719394, iteration: 53132
loss: 1.0190415382385254,grad_norm: 0.9467040418533166, iteration: 53133
loss: 0.9787455797195435,grad_norm: 0.9578698114406158, iteration: 53134
loss: 0.9773147106170654,grad_norm: 0.9051738998838593, iteration: 53135
loss: 1.0311596393585205,grad_norm: 0.9773090085125565, iteration: 53136
loss: 1.0032916069030762,grad_norm: 0.9999993197418308, iteration: 53137
loss: 0.9746607542037964,grad_norm: 0.9652939157482938, iteration: 53138
loss: 0.9725790619850159,grad_norm: 0.9999992282848761, iteration: 53139
loss: 1.0060142278671265,grad_norm: 0.999999125790907, iteration: 53140
loss: 1.0097287893295288,grad_norm: 0.9999991620688428, iteration: 53141
loss: 0.9992743134498596,grad_norm: 0.910097550236998, iteration: 53142
loss: 1.0141552686691284,grad_norm: 0.97522300888868, iteration: 53143
loss: 0.9962312579154968,grad_norm: 0.9999990593976283, iteration: 53144
loss: 1.0314971208572388,grad_norm: 0.9533585634783693, iteration: 53145
loss: 1.0009998083114624,grad_norm: 0.9597877201247312, iteration: 53146
loss: 0.9857613444328308,grad_norm: 0.9378446659733406, iteration: 53147
loss: 0.980835497379303,grad_norm: 0.9221237454232338, iteration: 53148
loss: 1.06195068359375,grad_norm: 0.9999993246555446, iteration: 53149
loss: 0.9888851046562195,grad_norm: 0.8777442577552779, iteration: 53150
loss: 0.991314172744751,grad_norm: 0.9999991181980883, iteration: 53151
loss: 1.0254018306732178,grad_norm: 0.9999992216989169, iteration: 53152
loss: 1.0206345319747925,grad_norm: 0.9544759983609408, iteration: 53153
loss: 1.0040065050125122,grad_norm: 0.9999990057431788, iteration: 53154
loss: 1.036647081375122,grad_norm: 0.9452040702367417, iteration: 53155
loss: 0.9937744736671448,grad_norm: 0.9233536339200046, iteration: 53156
loss: 1.0080879926681519,grad_norm: 0.9999990823162092, iteration: 53157
loss: 1.0017417669296265,grad_norm: 0.9999989039105754, iteration: 53158
loss: 0.9878822565078735,grad_norm: 0.7913345523821707, iteration: 53159
loss: 1.0126608610153198,grad_norm: 0.8809732039460333, iteration: 53160
loss: 0.9847152829170227,grad_norm: 0.9999990409245337, iteration: 53161
loss: 1.0042777061462402,grad_norm: 0.9762618190713763, iteration: 53162
loss: 1.027373194694519,grad_norm: 0.9999992365540616, iteration: 53163
loss: 0.9923862218856812,grad_norm: 0.9505269531335919, iteration: 53164
loss: 0.9705175161361694,grad_norm: 0.8726823106732348, iteration: 53165
loss: 0.9786450862884521,grad_norm: 0.9673961266677986, iteration: 53166
loss: 0.9976140260696411,grad_norm: 0.9574751726276736, iteration: 53167
loss: 1.0278891324996948,grad_norm: 0.9999991248101314, iteration: 53168
loss: 1.007183313369751,grad_norm: 0.9587859299583724, iteration: 53169
loss: 0.9996349811553955,grad_norm: 0.8149661146610023, iteration: 53170
loss: 1.025213599205017,grad_norm: 0.9999990460061553, iteration: 53171
loss: 1.04204523563385,grad_norm: 0.9999998586875767, iteration: 53172
loss: 1.0284149646759033,grad_norm: 0.999999904272191, iteration: 53173
loss: 1.0141994953155518,grad_norm: 0.9807574465964736, iteration: 53174
loss: 0.9647271633148193,grad_norm: 0.999999190046, iteration: 53175
loss: 0.9903287887573242,grad_norm: 0.9999991047455477, iteration: 53176
loss: 0.9617268443107605,grad_norm: 0.9396787895135397, iteration: 53177
loss: 1.0074247121810913,grad_norm: 0.9999990924979096, iteration: 53178
loss: 1.0427345037460327,grad_norm: 0.985881753889056, iteration: 53179
loss: 0.9873088002204895,grad_norm: 0.9999990721011367, iteration: 53180
loss: 1.0171856880187988,grad_norm: 0.9216698007140776, iteration: 53181
loss: 1.0369142293930054,grad_norm: 0.9652572652367652, iteration: 53182
loss: 1.0148303508758545,grad_norm: 0.9999990411378995, iteration: 53183
loss: 1.0042020082473755,grad_norm: 0.9103073848268205, iteration: 53184
loss: 1.0078706741333008,grad_norm: 0.9999989825883129, iteration: 53185
loss: 1.0467948913574219,grad_norm: 0.9999993100146222, iteration: 53186
loss: 1.0103363990783691,grad_norm: 0.8102072493292756, iteration: 53187
loss: 0.9951927065849304,grad_norm: 0.9142708049867059, iteration: 53188
loss: 1.0150401592254639,grad_norm: 0.9760148132855057, iteration: 53189
loss: 1.0326896905899048,grad_norm: 0.8663606903785028, iteration: 53190
loss: 0.9900725483894348,grad_norm: 0.999999065487851, iteration: 53191
loss: 0.9748831987380981,grad_norm: 0.9999991716324113, iteration: 53192
loss: 0.9972033500671387,grad_norm: 0.9999992247706762, iteration: 53193
loss: 1.006708025932312,grad_norm: 0.9999992062185771, iteration: 53194
loss: 0.973881721496582,grad_norm: 0.8444318162093654, iteration: 53195
loss: 0.976701021194458,grad_norm: 0.9999990782448986, iteration: 53196
loss: 1.019320011138916,grad_norm: 0.9999993244540133, iteration: 53197
loss: 1.0119410753250122,grad_norm: 0.999999205895595, iteration: 53198
loss: 0.9804072380065918,grad_norm: 0.999999177325468, iteration: 53199
loss: 0.9876518845558167,grad_norm: 0.982616248926406, iteration: 53200
loss: 1.0334628820419312,grad_norm: 0.9999991382508798, iteration: 53201
loss: 0.994342565536499,grad_norm: 0.9940568213081774, iteration: 53202
loss: 0.9879874587059021,grad_norm: 0.9728712057319212, iteration: 53203
loss: 1.0358209609985352,grad_norm: 0.9999996915830497, iteration: 53204
loss: 0.9714468121528625,grad_norm: 0.9999992229437608, iteration: 53205
loss: 1.0135035514831543,grad_norm: 0.9999990689712217, iteration: 53206
loss: 1.0022612810134888,grad_norm: 0.9999990984812699, iteration: 53207
loss: 1.030779242515564,grad_norm: 0.9534427771736568, iteration: 53208
loss: 1.0002351999282837,grad_norm: 0.9348276752768055, iteration: 53209
loss: 0.9976434707641602,grad_norm: 0.9999992525852434, iteration: 53210
loss: 1.0291632413864136,grad_norm: 0.9999997278642339, iteration: 53211
loss: 1.0368834733963013,grad_norm: 0.9999989966101969, iteration: 53212
loss: 0.9933105707168579,grad_norm: 0.9158651134015476, iteration: 53213
loss: 0.9948285222053528,grad_norm: 0.8417587319969586, iteration: 53214
loss: 0.9835695028305054,grad_norm: 0.9999992274407735, iteration: 53215
loss: 1.0013452768325806,grad_norm: 0.9451520146000665, iteration: 53216
loss: 0.99062579870224,grad_norm: 0.9999991343055749, iteration: 53217
loss: 1.0145655870437622,grad_norm: 0.9703937359957948, iteration: 53218
loss: 0.9909364581108093,grad_norm: 0.8126176949357856, iteration: 53219
loss: 0.9823011755943298,grad_norm: 0.9999991073293848, iteration: 53220
loss: 0.9841064214706421,grad_norm: 0.9999991663676949, iteration: 53221
loss: 1.0277620553970337,grad_norm: 0.9999992470401973, iteration: 53222
loss: 1.0121005773544312,grad_norm: 0.9999991763309314, iteration: 53223
loss: 0.9817323684692383,grad_norm: 0.9999992056305993, iteration: 53224
loss: 1.0192499160766602,grad_norm: 0.9999989874853197, iteration: 53225
loss: 0.9792943596839905,grad_norm: 0.9999991327236302, iteration: 53226
loss: 1.004989743232727,grad_norm: 0.8493588143668124, iteration: 53227
loss: 0.9721323847770691,grad_norm: 0.9999992119141452, iteration: 53228
loss: 0.9767175316810608,grad_norm: 0.9874636511725127, iteration: 53229
loss: 1.0212098360061646,grad_norm: 0.9454992770274223, iteration: 53230
loss: 0.9871054291725159,grad_norm: 0.999999080126945, iteration: 53231
loss: 0.9821571707725525,grad_norm: 0.9999990874402872, iteration: 53232
loss: 1.014449954032898,grad_norm: 0.9958151988929487, iteration: 53233
loss: 0.9876744747161865,grad_norm: 0.9001869525850562, iteration: 53234
loss: 1.0263553857803345,grad_norm: 0.9999992368656807, iteration: 53235
loss: 1.0222373008728027,grad_norm: 0.9999992661426903, iteration: 53236
loss: 1.0109519958496094,grad_norm: 0.9999992173929948, iteration: 53237
loss: 1.015365719795227,grad_norm: 0.9633249933296194, iteration: 53238
loss: 0.9847196936607361,grad_norm: 0.9739040363624608, iteration: 53239
loss: 0.9716773629188538,grad_norm: 0.9330463299767098, iteration: 53240
loss: 1.019225001335144,grad_norm: 0.9999991977414578, iteration: 53241
loss: 1.0140671730041504,grad_norm: 0.9325983621511248, iteration: 53242
loss: 1.0243902206420898,grad_norm: 0.9999991108771782, iteration: 53243
loss: 1.0322937965393066,grad_norm: 0.9450530482626206, iteration: 53244
loss: 1.0011932849884033,grad_norm: 0.9009013675740077, iteration: 53245
loss: 0.9951491355895996,grad_norm: 0.9999990773029268, iteration: 53246
loss: 1.0197300910949707,grad_norm: 0.9659005092200706, iteration: 53247
loss: 0.9772052764892578,grad_norm: 0.8954751467236813, iteration: 53248
loss: 1.0206574201583862,grad_norm: 0.9999991198960715, iteration: 53249
loss: 0.9517844319343567,grad_norm: 0.8474195876319046, iteration: 53250
loss: 1.0123449563980103,grad_norm: 0.9999998877920304, iteration: 53251
loss: 0.9837345480918884,grad_norm: 0.9357022836119268, iteration: 53252
loss: 0.9899999499320984,grad_norm: 0.9999990148280028, iteration: 53253
loss: 1.0512162446975708,grad_norm: 0.9804187880486157, iteration: 53254
loss: 1.03330397605896,grad_norm: 0.9490238487608064, iteration: 53255
loss: 1.0320123434066772,grad_norm: 0.999999063663179, iteration: 53256
loss: 0.9805859327316284,grad_norm: 0.9999991605466352, iteration: 53257
loss: 1.033994197845459,grad_norm: 0.999998978528981, iteration: 53258
loss: 0.989874541759491,grad_norm: 0.9999993474664814, iteration: 53259
loss: 1.014388918876648,grad_norm: 0.8724757656389898, iteration: 53260
loss: 0.966864287853241,grad_norm: 0.9999990848697197, iteration: 53261
loss: 1.0150502920150757,grad_norm: 0.9999991413327941, iteration: 53262
loss: 0.9972836375236511,grad_norm: 0.9999991978238816, iteration: 53263
loss: 0.9814416766166687,grad_norm: 0.8821257580124265, iteration: 53264
loss: 1.0379416942596436,grad_norm: 0.9854771839095208, iteration: 53265
loss: 0.9887619614601135,grad_norm: 0.9999992247639422, iteration: 53266
loss: 1.0037482976913452,grad_norm: 0.9999990330395024, iteration: 53267
loss: 0.9979456663131714,grad_norm: 0.9999991855128145, iteration: 53268
loss: 0.9958031177520752,grad_norm: 0.9999990289457805, iteration: 53269
loss: 1.01297926902771,grad_norm: 0.9999995764957054, iteration: 53270
loss: 1.003349781036377,grad_norm: 0.9999991388451357, iteration: 53271
loss: 0.9804365038871765,grad_norm: 0.9999990513142414, iteration: 53272
loss: 1.0481367111206055,grad_norm: 0.9999992313255892, iteration: 53273
loss: 1.0175365209579468,grad_norm: 0.999999048179593, iteration: 53274
loss: 1.030457615852356,grad_norm: 0.808421003602361, iteration: 53275
loss: 0.9942935705184937,grad_norm: 0.9999991633174554, iteration: 53276
loss: 1.0384293794631958,grad_norm: 0.9999999226855407, iteration: 53277
loss: 0.9964386820793152,grad_norm: 0.99999910345527, iteration: 53278
loss: 1.0155922174453735,grad_norm: 0.9999989308035183, iteration: 53279
loss: 0.9939378499984741,grad_norm: 0.9999991157333182, iteration: 53280
loss: 0.9773033857345581,grad_norm: 0.9411211670016298, iteration: 53281
loss: 0.9911805987358093,grad_norm: 0.9999990487816158, iteration: 53282
loss: 0.9718666672706604,grad_norm: 0.9999991642568459, iteration: 53283
loss: 0.98248690366745,grad_norm: 0.9439830201601994, iteration: 53284
loss: 0.9839726090431213,grad_norm: 0.9999992370393316, iteration: 53285
loss: 1.0649555921554565,grad_norm: 0.9999993931730649, iteration: 53286
loss: 0.9921726584434509,grad_norm: 0.999999166713702, iteration: 53287
loss: 1.0586944818496704,grad_norm: 0.9999991835966031, iteration: 53288
loss: 0.9971110224723816,grad_norm: 0.9999991772275703, iteration: 53289
loss: 1.0155751705169678,grad_norm: 0.9999990648175021, iteration: 53290
loss: 0.9862233400344849,grad_norm: 0.9999991375457609, iteration: 53291
loss: 1.0508742332458496,grad_norm: 0.9999991470014358, iteration: 53292
loss: 1.0148062705993652,grad_norm: 0.9999991748117468, iteration: 53293
loss: 0.9795762300491333,grad_norm: 0.9999993728294106, iteration: 53294
loss: 0.9891833662986755,grad_norm: 0.89887830600895, iteration: 53295
loss: 1.0045480728149414,grad_norm: 0.9999991029759695, iteration: 53296
loss: 0.9942034482955933,grad_norm: 0.9946240451334533, iteration: 53297
loss: 0.9853331446647644,grad_norm: 0.9999991559172894, iteration: 53298
loss: 0.9866514801979065,grad_norm: 0.9064790252397179, iteration: 53299
loss: 0.9935336709022522,grad_norm: 0.9999990640608853, iteration: 53300
loss: 0.9844210147857666,grad_norm: 0.999998976623668, iteration: 53301
loss: 1.03611159324646,grad_norm: 0.9999991930939077, iteration: 53302
loss: 0.9730175733566284,grad_norm: 0.9999991132465191, iteration: 53303
loss: 0.9982070922851562,grad_norm: 0.9999990120584855, iteration: 53304
loss: 1.045088768005371,grad_norm: 0.9999997629778287, iteration: 53305
loss: 1.0194413661956787,grad_norm: 0.9999992512738478, iteration: 53306
loss: 1.0048614740371704,grad_norm: 0.9999990690058849, iteration: 53307
loss: 1.0134494304656982,grad_norm: 0.999999069088542, iteration: 53308
loss: 0.9757278561592102,grad_norm: 0.9999991213242816, iteration: 53309
loss: 0.9948118329048157,grad_norm: 0.9999992814897833, iteration: 53310
loss: 1.0054229497909546,grad_norm: 0.9999988818152814, iteration: 53311
loss: 1.005351185798645,grad_norm: 0.9999991129774066, iteration: 53312
loss: 0.976709246635437,grad_norm: 0.9843391607916383, iteration: 53313
loss: 0.976100742816925,grad_norm: 0.7869594279344907, iteration: 53314
loss: 0.9972879886627197,grad_norm: 0.9999992999403056, iteration: 53315
loss: 0.950737476348877,grad_norm: 0.8879544664339564, iteration: 53316
loss: 0.953603208065033,grad_norm: 0.8250801225228738, iteration: 53317
loss: 1.008543610572815,grad_norm: 0.936484399454775, iteration: 53318
loss: 1.0155384540557861,grad_norm: 0.9999999697247854, iteration: 53319
loss: 0.994949996471405,grad_norm: 0.9999990935394298, iteration: 53320
loss: 0.9916757941246033,grad_norm: 0.8923570179336802, iteration: 53321
loss: 0.9751321077346802,grad_norm: 0.9999989541019697, iteration: 53322
loss: 0.99057537317276,grad_norm: 0.8770848175012789, iteration: 53323
loss: 1.0054550170898438,grad_norm: 0.9999991370422439, iteration: 53324
loss: 0.9907751083374023,grad_norm: 0.8898070116474481, iteration: 53325
loss: 0.9781723022460938,grad_norm: 0.9999992202177683, iteration: 53326
loss: 0.95682293176651,grad_norm: 0.9999990028976782, iteration: 53327
loss: 0.9872369766235352,grad_norm: 0.9024127823946071, iteration: 53328
loss: 0.9642978310585022,grad_norm: 0.958440896752486, iteration: 53329
loss: 0.9870142936706543,grad_norm: 0.8771040267505571, iteration: 53330
loss: 0.9519529342651367,grad_norm: 0.9999991672812928, iteration: 53331
loss: 0.9677416682243347,grad_norm: 0.9999992813805354, iteration: 53332
loss: 0.9975166916847229,grad_norm: 0.9974226281724812, iteration: 53333
loss: 1.02383553981781,grad_norm: 0.9034304636031143, iteration: 53334
loss: 1.0507714748382568,grad_norm: 0.9999989777008531, iteration: 53335
loss: 0.9971838593482971,grad_norm: 0.8022135482119541, iteration: 53336
loss: 1.001107931137085,grad_norm: 0.9315115546561537, iteration: 53337
loss: 1.0271004438400269,grad_norm: 0.9999991384687644, iteration: 53338
loss: 1.0298064947128296,grad_norm: 0.9373619760133579, iteration: 53339
loss: 0.9775989055633545,grad_norm: 0.9999989770731872, iteration: 53340
loss: 0.9816940426826477,grad_norm: 0.849154155819823, iteration: 53341
loss: 1.0156787633895874,grad_norm: 0.9999991327067524, iteration: 53342
loss: 0.9962345361709595,grad_norm: 0.876155387338466, iteration: 53343
loss: 1.0393751859664917,grad_norm: 0.7538529344181211, iteration: 53344
loss: 0.9808748960494995,grad_norm: 0.9487553026043434, iteration: 53345
loss: 1.0201966762542725,grad_norm: 0.9417818534149353, iteration: 53346
loss: 1.0063554048538208,grad_norm: 0.999999237804916, iteration: 53347
loss: 1.0077816247940063,grad_norm: 0.8644653148248441, iteration: 53348
loss: 0.9892347455024719,grad_norm: 0.9999991542417053, iteration: 53349
loss: 1.009028434753418,grad_norm: 0.9999988967247239, iteration: 53350
loss: 0.9844757914543152,grad_norm: 0.9735667371988816, iteration: 53351
loss: 1.0094014406204224,grad_norm: 0.9999991719574497, iteration: 53352
loss: 0.9972421526908875,grad_norm: 0.9726697356347823, iteration: 53353
loss: 0.9706745147705078,grad_norm: 0.9891556951421052, iteration: 53354
loss: 0.9824258089065552,grad_norm: 0.9999990143573184, iteration: 53355
loss: 0.9750423431396484,grad_norm: 0.9999990445756318, iteration: 53356
loss: 1.0328289270401,grad_norm: 0.9840136417947123, iteration: 53357
loss: 1.0177122354507446,grad_norm: 0.9999991794922158, iteration: 53358
loss: 1.0952790975570679,grad_norm: 0.9999998278084538, iteration: 53359
loss: 0.9845231175422668,grad_norm: 0.9699077554458511, iteration: 53360
loss: 0.9914048910140991,grad_norm: 0.9999990178435724, iteration: 53361
loss: 1.0024675130844116,grad_norm: 0.9549664427553677, iteration: 53362
loss: 1.0435153245925903,grad_norm: 0.9494363116352035, iteration: 53363
loss: 1.0400338172912598,grad_norm: 0.948915729007261, iteration: 53364
loss: 0.9895458221435547,grad_norm: 0.981976875158825, iteration: 53365
loss: 0.9913672804832458,grad_norm: 0.9999990170353873, iteration: 53366
loss: 1.0283397436141968,grad_norm: 0.9999991992705927, iteration: 53367
loss: 0.9910443425178528,grad_norm: 0.9469395252592753, iteration: 53368
loss: 1.0247323513031006,grad_norm: 0.9999991549515806, iteration: 53369
loss: 1.0104180574417114,grad_norm: 0.9055325509255416, iteration: 53370
loss: 1.0088635683059692,grad_norm: 0.9999989946350423, iteration: 53371
loss: 0.9801812767982483,grad_norm: 0.9999992509650348, iteration: 53372
loss: 0.976860761642456,grad_norm: 0.9968911984557981, iteration: 53373
loss: 1.0262432098388672,grad_norm: 0.9999992242897916, iteration: 53374
loss: 1.0173259973526,grad_norm: 0.9750422406677774, iteration: 53375
loss: 0.9945913553237915,grad_norm: 0.9199770553436202, iteration: 53376
loss: 0.9999516010284424,grad_norm: 0.9872425374004894, iteration: 53377
loss: 0.9630576968193054,grad_norm: 0.99999909578609, iteration: 53378
loss: 1.0068806409835815,grad_norm: 0.9999990582310303, iteration: 53379
loss: 1.023384690284729,grad_norm: 0.9411527861443906, iteration: 53380
loss: 0.9853967428207397,grad_norm: 0.9536167246458523, iteration: 53381
loss: 1.0814446210861206,grad_norm: 0.9999993893844784, iteration: 53382
loss: 1.0543572902679443,grad_norm: 0.9999990800414165, iteration: 53383
loss: 1.0150625705718994,grad_norm: 0.945434767783391, iteration: 53384
loss: 0.9804205298423767,grad_norm: 0.8640782437528607, iteration: 53385
loss: 0.9497163891792297,grad_norm: 0.9999992240356496, iteration: 53386
loss: 1.0118038654327393,grad_norm: 0.9999991290630714, iteration: 53387
loss: 0.9681382179260254,grad_norm: 0.9986169613160774, iteration: 53388
loss: 1.0198665857315063,grad_norm: 0.9999991125259716, iteration: 53389
loss: 1.012953758239746,grad_norm: 0.9999996017367605, iteration: 53390
loss: 1.0284749269485474,grad_norm: 0.9941137687172331, iteration: 53391
loss: 1.0356318950653076,grad_norm: 0.9999994440086896, iteration: 53392
loss: 0.9845855236053467,grad_norm: 0.9999992127118412, iteration: 53393
loss: 1.0011405944824219,grad_norm: 0.986153565754822, iteration: 53394
loss: 0.9843016862869263,grad_norm: 0.9999990520724752, iteration: 53395
loss: 0.9794895648956299,grad_norm: 0.9999992032191174, iteration: 53396
loss: 0.959100604057312,grad_norm: 0.9999990943554865, iteration: 53397
loss: 1.0073215961456299,grad_norm: 0.9523724149281007, iteration: 53398
loss: 0.9823249578475952,grad_norm: 0.9999989809163333, iteration: 53399
loss: 1.0220606327056885,grad_norm: 0.9999990467685356, iteration: 53400
loss: 1.0210250616073608,grad_norm: 0.9282182682981258, iteration: 53401
loss: 0.9809259176254272,grad_norm: 0.9999990091538914, iteration: 53402
loss: 0.9974587559700012,grad_norm: 0.9999990850905314, iteration: 53403
loss: 0.9822872281074524,grad_norm: 0.9999992207801713, iteration: 53404
loss: 0.9903551340103149,grad_norm: 0.9674458806744779, iteration: 53405
loss: 1.0638662576675415,grad_norm: 0.999999614855099, iteration: 53406
loss: 0.9970865249633789,grad_norm: 0.9999991325446779, iteration: 53407
loss: 1.035696029663086,grad_norm: 0.9627889107812178, iteration: 53408
loss: 1.0164204835891724,grad_norm: 0.8027882739151313, iteration: 53409
loss: 0.9877534508705139,grad_norm: 0.9773452569432736, iteration: 53410
loss: 0.9813957810401917,grad_norm: 0.9999991078065968, iteration: 53411
loss: 0.9506227970123291,grad_norm: 0.9921933844557465, iteration: 53412
loss: 1.0370076894760132,grad_norm: 0.9999991796538522, iteration: 53413
loss: 0.9924337267875671,grad_norm: 0.9999991576565349, iteration: 53414
loss: 0.991438627243042,grad_norm: 0.927353323196033, iteration: 53415
loss: 1.0280519723892212,grad_norm: 0.9739410965044353, iteration: 53416
loss: 1.0233044624328613,grad_norm: 0.8241161637879126, iteration: 53417
loss: 0.9712203145027161,grad_norm: 0.9999992040661678, iteration: 53418
loss: 0.9712691307067871,grad_norm: 0.9999992143388132, iteration: 53419
loss: 0.998010516166687,grad_norm: 0.9999990755326779, iteration: 53420
loss: 1.0295342206954956,grad_norm: 0.9999998686242811, iteration: 53421
loss: 0.9969416260719299,grad_norm: 0.9999989974884604, iteration: 53422
loss: 1.0107897520065308,grad_norm: 0.9999991704324607, iteration: 53423
loss: 1.0263553857803345,grad_norm: 0.993483669948429, iteration: 53424
loss: 1.0422050952911377,grad_norm: 0.9999990965306116, iteration: 53425
loss: 0.9856459498405457,grad_norm: 0.9999990985297199, iteration: 53426
loss: 1.0227195024490356,grad_norm: 0.9999991686361271, iteration: 53427
loss: 0.9648642539978027,grad_norm: 0.8993378934737638, iteration: 53428
loss: 1.0256292819976807,grad_norm: 0.9999991797069533, iteration: 53429
loss: 0.9960333108901978,grad_norm: 0.9369950067012451, iteration: 53430
loss: 1.0176390409469604,grad_norm: 0.9999991681780872, iteration: 53431
loss: 1.013790488243103,grad_norm: 0.8537693280014651, iteration: 53432
loss: 1.0254451036453247,grad_norm: 0.9963370278971999, iteration: 53433
loss: 1.0460008382797241,grad_norm: 0.9999996592268071, iteration: 53434
loss: 0.9649710655212402,grad_norm: 0.9366948530729576, iteration: 53435
loss: 1.003539800643921,grad_norm: 0.9999991668131757, iteration: 53436
loss: 0.9339426755905151,grad_norm: 0.9650866800191997, iteration: 53437
loss: 1.03449547290802,grad_norm: 0.9999991265548188, iteration: 53438
loss: 1.0068613290786743,grad_norm: 0.9999990835801017, iteration: 53439
loss: 0.9937275052070618,grad_norm: 0.9999989994302746, iteration: 53440
loss: 0.9827952980995178,grad_norm: 0.9141164678390279, iteration: 53441
loss: 0.9673324227333069,grad_norm: 0.9963177309903354, iteration: 53442
loss: 1.0234882831573486,grad_norm: 0.9494603220505266, iteration: 53443
loss: 1.0067538022994995,grad_norm: 0.9999991190388873, iteration: 53444
loss: 0.995728075504303,grad_norm: 0.9999993470253336, iteration: 53445
loss: 1.0382369756698608,grad_norm: 0.9999995171989832, iteration: 53446
loss: 1.0247231721878052,grad_norm: 0.9575999392523796, iteration: 53447
loss: 0.9868572950363159,grad_norm: 0.9543871294183445, iteration: 53448
loss: 0.9957236051559448,grad_norm: 0.9999990302714289, iteration: 53449
loss: 1.0290855169296265,grad_norm: 0.9999991085225022, iteration: 53450
loss: 1.012163519859314,grad_norm: 0.9999996093733561, iteration: 53451
loss: 1.02780020236969,grad_norm: 0.9999990547432397, iteration: 53452
loss: 1.0138698816299438,grad_norm: 0.8782876001168161, iteration: 53453
loss: 0.9785858988761902,grad_norm: 0.9457293792762895, iteration: 53454
loss: 0.9847269058227539,grad_norm: 0.9999990143462474, iteration: 53455
loss: 1.0200352668762207,grad_norm: 0.9139496817207118, iteration: 53456
loss: 1.0153149366378784,grad_norm: 0.9639678832339377, iteration: 53457
loss: 1.0012565851211548,grad_norm: 0.999999079759308, iteration: 53458
loss: 1.024428367614746,grad_norm: 0.9999991867611302, iteration: 53459
loss: 1.0149869918823242,grad_norm: 0.9999995384117941, iteration: 53460
loss: 0.997003972530365,grad_norm: 0.9993905435657899, iteration: 53461
loss: 1.0178675651550293,grad_norm: 0.9999997434889412, iteration: 53462
loss: 1.0014938116073608,grad_norm: 0.9999991709966259, iteration: 53463
loss: 0.9919883012771606,grad_norm: 0.7870070483969601, iteration: 53464
loss: 1.0811771154403687,grad_norm: 0.9999995314958758, iteration: 53465
loss: 0.9849355220794678,grad_norm: 0.9999990085387865, iteration: 53466
loss: 1.0218067169189453,grad_norm: 0.8439035890477198, iteration: 53467
loss: 0.9853696227073669,grad_norm: 0.9999994631808935, iteration: 53468
loss: 1.0255625247955322,grad_norm: 0.9999992377519108, iteration: 53469
loss: 0.9846598505973816,grad_norm: 0.9999992147695095, iteration: 53470
loss: 1.0501043796539307,grad_norm: 0.9999991706908194, iteration: 53471
loss: 0.9669734239578247,grad_norm: 0.8756891980793168, iteration: 53472
loss: 0.9984970688819885,grad_norm: 0.9999991286014747, iteration: 53473
loss: 1.0034879446029663,grad_norm: 0.9999990650881097, iteration: 53474
loss: 0.998909056186676,grad_norm: 0.9999991084712219, iteration: 53475
loss: 1.0230931043624878,grad_norm: 0.9999990328229346, iteration: 53476
loss: 1.0248804092407227,grad_norm: 0.99999961669316, iteration: 53477
loss: 0.9882263541221619,grad_norm: 0.9999994249950579, iteration: 53478
loss: 1.0041269063949585,grad_norm: 0.9141730542736557, iteration: 53479
loss: 1.0234183073043823,grad_norm: 0.9999990948099777, iteration: 53480
loss: 1.0300133228302002,grad_norm: 0.9999994602409182, iteration: 53481
loss: 0.9837377667427063,grad_norm: 0.9999990799620007, iteration: 53482
loss: 0.9664273858070374,grad_norm: 0.9999990535617111, iteration: 53483
loss: 1.0337271690368652,grad_norm: 0.9999992151707041, iteration: 53484
loss: 0.993781328201294,grad_norm: 0.9999991565968382, iteration: 53485
loss: 1.0143221616744995,grad_norm: 0.9999996529595201, iteration: 53486
loss: 1.0227422714233398,grad_norm: 0.9999992604995688, iteration: 53487
loss: 0.9985931515693665,grad_norm: 0.9066507598595628, iteration: 53488
loss: 0.9799731373786926,grad_norm: 0.9630777658382514, iteration: 53489
loss: 1.0429048538208008,grad_norm: 0.9433906255387133, iteration: 53490
loss: 1.0181154012680054,grad_norm: 0.9063382210193873, iteration: 53491
loss: 0.9981285333633423,grad_norm: 0.9999991493198355, iteration: 53492
loss: 1.0190824270248413,grad_norm: 0.999999346896564, iteration: 53493
loss: 0.9742791056632996,grad_norm: 0.9634423757562174, iteration: 53494
loss: 1.0075422525405884,grad_norm: 0.9999992053754228, iteration: 53495
loss: 0.9569140076637268,grad_norm: 0.9849151186303442, iteration: 53496
loss: 0.9817689657211304,grad_norm: 0.9583784908679435, iteration: 53497
loss: 1.0167388916015625,grad_norm: 0.9999994681545671, iteration: 53498
loss: 0.9710702896118164,grad_norm: 0.9348743999644441, iteration: 53499
loss: 0.972966194152832,grad_norm: 0.9999990424306399, iteration: 53500
loss: 1.001851201057434,grad_norm: 0.999999166324674, iteration: 53501
loss: 0.9646828174591064,grad_norm: 0.999999029683404, iteration: 53502
loss: 0.9953097105026245,grad_norm: 0.8971966362999027, iteration: 53503
loss: 1.014591097831726,grad_norm: 0.9750204563378383, iteration: 53504
loss: 0.9998670220375061,grad_norm: 0.9999989903520067, iteration: 53505
loss: 1.0474073886871338,grad_norm: 0.9999997535716654, iteration: 53506
loss: 0.9861500263214111,grad_norm: 0.9999990201204814, iteration: 53507
loss: 1.0186301469802856,grad_norm: 0.9999993791671866, iteration: 53508
loss: 1.0320860147476196,grad_norm: 0.9999990608222689, iteration: 53509
loss: 0.9763899445533752,grad_norm: 0.9688933334893084, iteration: 53510
loss: 0.9949719309806824,grad_norm: 0.8750238529726132, iteration: 53511
loss: 1.0319044589996338,grad_norm: 0.9999992585684299, iteration: 53512
loss: 1.0688464641571045,grad_norm: 0.999999119966109, iteration: 53513
loss: 1.0076699256896973,grad_norm: 0.9999990488018531, iteration: 53514
loss: 1.012296199798584,grad_norm: 0.9249849673723447, iteration: 53515
loss: 1.0383994579315186,grad_norm: 0.9999993651936969, iteration: 53516
loss: 1.0402319431304932,grad_norm: 0.9999990743724768, iteration: 53517
loss: 0.9843689203262329,grad_norm: 0.9999991604011127, iteration: 53518
loss: 0.9716792106628418,grad_norm: 0.9999991424942133, iteration: 53519
loss: 0.9940839409828186,grad_norm: 0.9999990551435906, iteration: 53520
loss: 1.012540340423584,grad_norm: 0.9999994828949964, iteration: 53521
loss: 1.0239202976226807,grad_norm: 0.9999992125711248, iteration: 53522
loss: 0.9764756560325623,grad_norm: 0.9796041987106713, iteration: 53523
loss: 1.0349081754684448,grad_norm: 0.9999998111863644, iteration: 53524
loss: 1.0661852359771729,grad_norm: 0.9999993698082186, iteration: 53525
loss: 0.9917718172073364,grad_norm: 0.999999177639697, iteration: 53526
loss: 0.9774526953697205,grad_norm: 0.9999990505941341, iteration: 53527
loss: 1.005135416984558,grad_norm: 0.9999991111229368, iteration: 53528
loss: 1.030807614326477,grad_norm: 0.9212064817825731, iteration: 53529
loss: 1.0255756378173828,grad_norm: 0.8615549714979477, iteration: 53530
loss: 0.9966012239456177,grad_norm: 0.978204575547204, iteration: 53531
loss: 1.0715570449829102,grad_norm: 0.9999996019993529, iteration: 53532
loss: 1.0299609899520874,grad_norm: 0.9999993047975233, iteration: 53533
loss: 1.0079071521759033,grad_norm: 0.9941620651682292, iteration: 53534
loss: 0.9967176914215088,grad_norm: 0.999999189946664, iteration: 53535
loss: 0.9713612794876099,grad_norm: 0.9345359432601744, iteration: 53536
loss: 1.0343173742294312,grad_norm: 0.9963844672713527, iteration: 53537
loss: 1.0213502645492554,grad_norm: 0.8971050148305307, iteration: 53538
loss: 1.0364861488342285,grad_norm: 0.9999996127928054, iteration: 53539
loss: 1.0385452508926392,grad_norm: 0.9999991954954062, iteration: 53540
loss: 0.979966938495636,grad_norm: 0.863478379131811, iteration: 53541
loss: 1.0118211507797241,grad_norm: 0.9273698649606859, iteration: 53542
loss: 0.9910281896591187,grad_norm: 0.9999992161345309, iteration: 53543
loss: 1.0179246664047241,grad_norm: 0.9999996098861518, iteration: 53544
loss: 0.9928486347198486,grad_norm: 0.9999991560760481, iteration: 53545
loss: 1.0062696933746338,grad_norm: 0.8635940275514963, iteration: 53546
loss: 1.00850248336792,grad_norm: 0.9988976317761784, iteration: 53547
loss: 1.0036749839782715,grad_norm: 0.999998969573738, iteration: 53548
loss: 1.0507742166519165,grad_norm: 0.9999990786161149, iteration: 53549
loss: 0.9752880930900574,grad_norm: 0.9999991558462907, iteration: 53550
loss: 0.9765541553497314,grad_norm: 0.9999993487650287, iteration: 53551
loss: 1.034140706062317,grad_norm: 0.999999180750151, iteration: 53552
loss: 0.9889253973960876,grad_norm: 0.9999991594185611, iteration: 53553
loss: 1.0217901468276978,grad_norm: 0.999999170426836, iteration: 53554
loss: 1.0294160842895508,grad_norm: 0.9892027163641108, iteration: 53555
loss: 1.002518653869629,grad_norm: 0.9999992778439106, iteration: 53556
loss: 0.9708688259124756,grad_norm: 0.9999992250225606, iteration: 53557
loss: 1.0819315910339355,grad_norm: 0.9999991744161265, iteration: 53558
loss: 1.0495038032531738,grad_norm: 0.9999994891310573, iteration: 53559
loss: 1.0129398107528687,grad_norm: 0.9999991021195971, iteration: 53560
loss: 0.9957788586616516,grad_norm: 0.9999990040544172, iteration: 53561
loss: 0.9740637540817261,grad_norm: 0.9999990428596491, iteration: 53562
loss: 1.0192278623580933,grad_norm: 0.9698401483486847, iteration: 53563
loss: 0.9830261468887329,grad_norm: 0.9999992264931324, iteration: 53564
loss: 1.0455830097198486,grad_norm: 0.999999308216281, iteration: 53565
loss: 0.96497642993927,grad_norm: 0.9048236125621721, iteration: 53566
loss: 0.9543472528457642,grad_norm: 0.9625388532512565, iteration: 53567
loss: 1.0391734838485718,grad_norm: 0.9481546097634929, iteration: 53568
loss: 1.0154145956039429,grad_norm: 0.9999990586964201, iteration: 53569
loss: 1.0008430480957031,grad_norm: 0.9999992388572898, iteration: 53570
loss: 1.0224615335464478,grad_norm: 0.9999991565965279, iteration: 53571
loss: 0.9850842356681824,grad_norm: 0.9999991883094594, iteration: 53572
loss: 1.019753098487854,grad_norm: 0.98361003868413, iteration: 53573
loss: 0.9915236830711365,grad_norm: 0.9999992551978298, iteration: 53574
loss: 0.9850836992263794,grad_norm: 0.9604648100757673, iteration: 53575
loss: 0.9979477524757385,grad_norm: 0.9653412684592472, iteration: 53576
loss: 1.0300779342651367,grad_norm: 0.9738236228139576, iteration: 53577
loss: 0.9672353267669678,grad_norm: 0.9999995381964301, iteration: 53578
loss: 0.9473286867141724,grad_norm: 0.999999239965866, iteration: 53579
loss: 1.0200880765914917,grad_norm: 0.9509670598749188, iteration: 53580
loss: 1.0215990543365479,grad_norm: 0.9999990517725135, iteration: 53581
loss: 1.0184482336044312,grad_norm: 0.9999991167455005, iteration: 53582
loss: 1.0058047771453857,grad_norm: 0.9126540399866848, iteration: 53583
loss: 1.0055758953094482,grad_norm: 0.9295565666440263, iteration: 53584
loss: 1.032296061515808,grad_norm: 0.9999993413550958, iteration: 53585
loss: 0.9889284372329712,grad_norm: 0.9999992388056814, iteration: 53586
loss: 1.011213779449463,grad_norm: 0.9999991276922108, iteration: 53587
loss: 1.0110033750534058,grad_norm: 0.9975835194720217, iteration: 53588
loss: 1.049585223197937,grad_norm: 0.9999992525035591, iteration: 53589
loss: 1.029837727546692,grad_norm: 0.9847507799669036, iteration: 53590
loss: 1.0370031595230103,grad_norm: 0.9999994903958339, iteration: 53591
loss: 0.998412013053894,grad_norm: 0.999999206679135, iteration: 53592
loss: 1.039683222770691,grad_norm: 0.9999991128563556, iteration: 53593
loss: 1.0110468864440918,grad_norm: 0.8164560821913573, iteration: 53594
loss: 0.9993250966072083,grad_norm: 0.9999991303875775, iteration: 53595
loss: 1.0219452381134033,grad_norm: 0.8832776933514445, iteration: 53596
loss: 0.9819113612174988,grad_norm: 0.9999994263990212, iteration: 53597
loss: 1.0246608257293701,grad_norm: 0.9999990608424907, iteration: 53598
loss: 0.9989033341407776,grad_norm: 0.9802594693016833, iteration: 53599
loss: 1.023298740386963,grad_norm: 0.9999990245044118, iteration: 53600
loss: 1.0236215591430664,grad_norm: 0.9985744936640406, iteration: 53601
loss: 1.0261906385421753,grad_norm: 0.9999990456201265, iteration: 53602
loss: 1.053124189376831,grad_norm: 0.9999993660597379, iteration: 53603
loss: 1.0327814817428589,grad_norm: 0.8523048220076692, iteration: 53604
loss: 1.003104329109192,grad_norm: 0.9999990795539776, iteration: 53605
loss: 1.0085618495941162,grad_norm: 0.9999992341255008, iteration: 53606
loss: 1.0530744791030884,grad_norm: 0.9999994489656125, iteration: 53607
loss: 0.9968712329864502,grad_norm: 0.882448718782057, iteration: 53608
loss: 0.9995241761207581,grad_norm: 0.9999991103229721, iteration: 53609
loss: 0.9712232947349548,grad_norm: 0.8754577969985164, iteration: 53610
loss: 0.999503493309021,grad_norm: 0.999999140711573, iteration: 53611
loss: 1.0321433544158936,grad_norm: 0.9999991992051227, iteration: 53612
loss: 1.0094207525253296,grad_norm: 0.9322785849003408, iteration: 53613
loss: 0.9905621409416199,grad_norm: 0.9999991287110246, iteration: 53614
loss: 1.011751651763916,grad_norm: 0.9312486230590908, iteration: 53615
loss: 0.9867649078369141,grad_norm: 0.9713293276361752, iteration: 53616
loss: 1.0120043754577637,grad_norm: 0.9999990057463752, iteration: 53617
loss: 1.0371057987213135,grad_norm: 0.9999991164199409, iteration: 53618
loss: 1.00835120677948,grad_norm: 0.9999991334800862, iteration: 53619
loss: 0.9692696332931519,grad_norm: 0.9999990492542283, iteration: 53620
loss: 1.025937557220459,grad_norm: 0.999999030654114, iteration: 53621
loss: 0.9828727841377258,grad_norm: 0.9999992243055598, iteration: 53622
loss: 0.9979755878448486,grad_norm: 0.9518280485282239, iteration: 53623
loss: 0.9991971850395203,grad_norm: 0.9883867438844192, iteration: 53624
loss: 1.0194541215896606,grad_norm: 0.999999819060408, iteration: 53625
loss: 0.9969707727432251,grad_norm: 0.862822956134423, iteration: 53626
loss: 0.9870798587799072,grad_norm: 0.9999992771641872, iteration: 53627
loss: 0.9859429001808167,grad_norm: 0.9713097306092932, iteration: 53628
loss: 1.0022526979446411,grad_norm: 0.9327510508175245, iteration: 53629
loss: 0.9986565113067627,grad_norm: 0.9999991528511415, iteration: 53630
loss: 1.0202507972717285,grad_norm: 0.953847422107085, iteration: 53631
loss: 0.9790520668029785,grad_norm: 0.7853022098407684, iteration: 53632
loss: 1.027157187461853,grad_norm: 0.9999990861938012, iteration: 53633
loss: 0.964318573474884,grad_norm: 0.9999992054963671, iteration: 53634
loss: 1.0037193298339844,grad_norm: 0.9999990568160776, iteration: 53635
loss: 1.0068327188491821,grad_norm: 0.9999991026102704, iteration: 53636
loss: 0.9860624670982361,grad_norm: 0.9999991521318468, iteration: 53637
loss: 0.9957022070884705,grad_norm: 0.9463778997753596, iteration: 53638
loss: 1.0049322843551636,grad_norm: 0.9999991616118408, iteration: 53639
loss: 1.0068174600601196,grad_norm: 0.946176970459576, iteration: 53640
loss: 1.0014203786849976,grad_norm: 0.9645401398859925, iteration: 53641
loss: 1.0777658224105835,grad_norm: 0.9999995091829218, iteration: 53642
loss: 1.004064679145813,grad_norm: 0.9999992111433114, iteration: 53643
loss: 0.9811595678329468,grad_norm: 0.9999992431263404, iteration: 53644
loss: 1.0103734731674194,grad_norm: 0.9999990652075532, iteration: 53645
loss: 1.0023623704910278,grad_norm: 0.9982876605061348, iteration: 53646
loss: 0.9871553778648376,grad_norm: 0.8794153833132443, iteration: 53647
loss: 1.0006089210510254,grad_norm: 0.9999992081824206, iteration: 53648
loss: 1.0152400732040405,grad_norm: 0.9999994704459951, iteration: 53649
loss: 1.0191407203674316,grad_norm: 0.8795650234482457, iteration: 53650
loss: 0.9748877882957458,grad_norm: 0.9999990243240549, iteration: 53651
loss: 0.9921066164970398,grad_norm: 0.999999189453405, iteration: 53652
loss: 0.9923738241195679,grad_norm: 0.9410873845870019, iteration: 53653
loss: 0.9849335551261902,grad_norm: 0.9999992094865764, iteration: 53654
loss: 0.9791799783706665,grad_norm: 0.9999991492951263, iteration: 53655
loss: 1.0482585430145264,grad_norm: 0.9999989761820585, iteration: 53656
loss: 0.996402382850647,grad_norm: 0.9999990610558286, iteration: 53657
loss: 0.9997624754905701,grad_norm: 0.9999990926073444, iteration: 53658
loss: 0.9919869303703308,grad_norm: 0.9364200020476754, iteration: 53659
loss: 0.9781495928764343,grad_norm: 0.9548781003471253, iteration: 53660
loss: 1.020501971244812,grad_norm: 0.9387748806168276, iteration: 53661
loss: 1.0052428245544434,grad_norm: 0.9999995356604805, iteration: 53662
loss: 1.0161426067352295,grad_norm: 0.9999998280057892, iteration: 53663
loss: 1.008797287940979,grad_norm: 0.9999991404878938, iteration: 53664
loss: 0.9906924366950989,grad_norm: 0.9999992198300227, iteration: 53665
loss: 0.9745774269104004,grad_norm: 0.9999991517675717, iteration: 53666
loss: 0.9860159158706665,grad_norm: 0.9413206774949542, iteration: 53667
loss: 1.006186604499817,grad_norm: 0.8910387835930142, iteration: 53668
loss: 1.0033103227615356,grad_norm: 0.9217209374925847, iteration: 53669
loss: 0.999741792678833,grad_norm: 0.9999990908370788, iteration: 53670
loss: 0.9952809810638428,grad_norm: 0.9924789427236592, iteration: 53671
loss: 0.9664326310157776,grad_norm: 0.9999990079645793, iteration: 53672
loss: 1.0318944454193115,grad_norm: 0.9838946125287006, iteration: 53673
loss: 1.0378965139389038,grad_norm: 0.9999990337211246, iteration: 53674
loss: 1.0511064529418945,grad_norm: 0.9999990134098428, iteration: 53675
loss: 0.980694055557251,grad_norm: 0.9999990423913794, iteration: 53676
loss: 0.9752106666564941,grad_norm: 0.9999990343335662, iteration: 53677
loss: 0.9820389151573181,grad_norm: 0.9999992349049455, iteration: 53678
loss: 1.0500446557998657,grad_norm: 0.9999992705494543, iteration: 53679
loss: 1.0320327281951904,grad_norm: 0.9999991058524109, iteration: 53680
loss: 0.9587868452072144,grad_norm: 0.9999990602967779, iteration: 53681
loss: 1.0118743181228638,grad_norm: 0.950667001227554, iteration: 53682
loss: 0.9878658056259155,grad_norm: 0.8256660631157541, iteration: 53683
loss: 1.0242654085159302,grad_norm: 0.9999992229970042, iteration: 53684
loss: 1.020546555519104,grad_norm: 0.9999990529293906, iteration: 53685
loss: 1.001851201057434,grad_norm: 0.9999992106259055, iteration: 53686
loss: 1.0378518104553223,grad_norm: 0.9667054418354998, iteration: 53687
loss: 0.9929234385490417,grad_norm: 0.9967747013019308, iteration: 53688
loss: 1.016128659248352,grad_norm: 0.9999992739127105, iteration: 53689
loss: 0.9958120584487915,grad_norm: 0.9578153351022864, iteration: 53690
loss: 1.0195847749710083,grad_norm: 0.9999991126392458, iteration: 53691
loss: 0.9659276008605957,grad_norm: 0.9999991186919488, iteration: 53692
loss: 0.9938902854919434,grad_norm: 0.9999991853343447, iteration: 53693
loss: 1.005165696144104,grad_norm: 0.8240711299245563, iteration: 53694
loss: 1.0145543813705444,grad_norm: 0.9575566161232814, iteration: 53695
loss: 1.0317806005477905,grad_norm: 0.9291785314516449, iteration: 53696
loss: 0.9722213745117188,grad_norm: 0.9999992477065516, iteration: 53697
loss: 1.0255193710327148,grad_norm: 0.8875926141384027, iteration: 53698
loss: 0.996044397354126,grad_norm: 0.9999989606027162, iteration: 53699
loss: 1.0012571811676025,grad_norm: 0.9999991793740939, iteration: 53700
loss: 0.95166015625,grad_norm: 0.9049501166374588, iteration: 53701
loss: 0.9846752882003784,grad_norm: 0.9387929889234463, iteration: 53702
loss: 1.0010143518447876,grad_norm: 0.9999991605232421, iteration: 53703
loss: 0.9798617362976074,grad_norm: 0.9999991017095673, iteration: 53704
loss: 0.9782487750053406,grad_norm: 0.9999992173491582, iteration: 53705
loss: 0.9856086373329163,grad_norm: 0.9059219820711192, iteration: 53706
loss: 1.0390163660049438,grad_norm: 0.9999994158487233, iteration: 53707
loss: 1.024641752243042,grad_norm: 0.8577613598001481, iteration: 53708
loss: 0.9746264219284058,grad_norm: 0.9095974119120904, iteration: 53709
loss: 1.0013086795806885,grad_norm: 0.9999990383311654, iteration: 53710
loss: 0.9955599904060364,grad_norm: 0.9999990675028128, iteration: 53711
loss: 0.971078634262085,grad_norm: 0.9999992001274759, iteration: 53712
loss: 0.9952585101127625,grad_norm: 0.9760247220440637, iteration: 53713
loss: 0.9385896921157837,grad_norm: 0.9330097618416294, iteration: 53714
loss: 0.9933690428733826,grad_norm: 0.9999991003520027, iteration: 53715
loss: 0.9674543738365173,grad_norm: 0.9999991316992178, iteration: 53716
loss: 1.0262062549591064,grad_norm: 0.963882072311444, iteration: 53717
loss: 0.9836577773094177,grad_norm: 0.9745419655288838, iteration: 53718
loss: 1.0008238554000854,grad_norm: 0.9999991703450419, iteration: 53719
loss: 1.0323879718780518,grad_norm: 0.9999990633710012, iteration: 53720
loss: 1.0093293190002441,grad_norm: 0.8190681078476115, iteration: 53721
loss: 0.9817399978637695,grad_norm: 0.8270585407640052, iteration: 53722
loss: 0.9655676484107971,grad_norm: 0.8553345661973363, iteration: 53723
loss: 1.0039403438568115,grad_norm: 0.8359780077862846, iteration: 53724
loss: 1.0094709396362305,grad_norm: 0.9155217102557561, iteration: 53725
loss: 1.056186318397522,grad_norm: 0.9999993809847004, iteration: 53726
loss: 1.003313422203064,grad_norm: 0.9999991073085406, iteration: 53727
loss: 1.0096511840820312,grad_norm: 0.8708105739753221, iteration: 53728
loss: 1.0013872385025024,grad_norm: 0.9999995667465936, iteration: 53729
loss: 0.9948330521583557,grad_norm: 0.978328228709096, iteration: 53730
loss: 1.007838487625122,grad_norm: 0.9442452805051816, iteration: 53731
loss: 0.9741232991218567,grad_norm: 0.9999989447851415, iteration: 53732
loss: 1.0146484375,grad_norm: 0.9999991057055249, iteration: 53733
loss: 0.9949512481689453,grad_norm: 0.999999138031511, iteration: 53734
loss: 0.9415456652641296,grad_norm: 0.9506869343078811, iteration: 53735
loss: 1.0231823921203613,grad_norm: 0.9482783438551242, iteration: 53736
loss: 1.0361547470092773,grad_norm: 0.9667383987645606, iteration: 53737
loss: 0.9933125972747803,grad_norm: 0.875136248487948, iteration: 53738
loss: 0.9825780987739563,grad_norm: 0.8816710721373968, iteration: 53739
loss: 0.9931350946426392,grad_norm: 0.9873859441129187, iteration: 53740
loss: 0.9860967397689819,grad_norm: 0.9999992258634451, iteration: 53741
loss: 0.9974972605705261,grad_norm: 0.999999273793978, iteration: 53742
loss: 0.9674559831619263,grad_norm: 0.99999924927004, iteration: 53743
loss: 0.9977964162826538,grad_norm: 0.8495628006395151, iteration: 53744
loss: 1.0135424137115479,grad_norm: 0.999998973497447, iteration: 53745
loss: 1.0315353870391846,grad_norm: 0.8311636396824481, iteration: 53746
loss: 0.9782281517982483,grad_norm: 0.9749550698233178, iteration: 53747
loss: 1.0264033079147339,grad_norm: 0.9999990820557584, iteration: 53748
loss: 0.983855128288269,grad_norm: 0.9999990385019442, iteration: 53749
loss: 0.9837949872016907,grad_norm: 0.9408214656258522, iteration: 53750
loss: 1.03577721118927,grad_norm: 0.9999993749255126, iteration: 53751
loss: 1.005955696105957,grad_norm: 0.9999993559890379, iteration: 53752
loss: 0.985928475856781,grad_norm: 0.8495510791628362, iteration: 53753
loss: 0.9756132364273071,grad_norm: 0.9999994889903288, iteration: 53754
loss: 1.010387897491455,grad_norm: 0.9307183354407442, iteration: 53755
loss: 0.9783938527107239,grad_norm: 0.9999991736458382, iteration: 53756
loss: 0.9917828440666199,grad_norm: 0.99999898643103, iteration: 53757
loss: 0.9349163174629211,grad_norm: 0.9999991651503118, iteration: 53758
loss: 0.9726614952087402,grad_norm: 0.9999991111934571, iteration: 53759
loss: 0.948104202747345,grad_norm: 0.8374666127012499, iteration: 53760
loss: 1.0250369310379028,grad_norm: 0.9999991328201413, iteration: 53761
loss: 1.0409456491470337,grad_norm: 0.9999998590577702, iteration: 53762
loss: 1.0028842687606812,grad_norm: 0.956501766643681, iteration: 53763
loss: 1.0659605264663696,grad_norm: 0.9999993954893338, iteration: 53764
loss: 0.9981684684753418,grad_norm: 0.9887275394153213, iteration: 53765
loss: 0.9901612997055054,grad_norm: 0.9888383497390092, iteration: 53766
loss: 1.0047250986099243,grad_norm: 0.9999990074799662, iteration: 53767
loss: 1.0259453058242798,grad_norm: 0.9999990565585616, iteration: 53768
loss: 0.9972120523452759,grad_norm: 0.999999127239547, iteration: 53769
loss: 0.9912763237953186,grad_norm: 0.9999994529315696, iteration: 53770
loss: 0.9867194294929504,grad_norm: 0.9999990608622341, iteration: 53771
loss: 1.0237523317337036,grad_norm: 0.9999993370687087, iteration: 53772
loss: 0.9690735936164856,grad_norm: 0.8433344307128956, iteration: 53773
loss: 0.992551326751709,grad_norm: 0.9890750044374114, iteration: 53774
loss: 0.9962396621704102,grad_norm: 0.9999990458700482, iteration: 53775
loss: 1.0035390853881836,grad_norm: 0.9686053006898978, iteration: 53776
loss: 0.9646475315093994,grad_norm: 0.9999992672596278, iteration: 53777
loss: 0.9755522012710571,grad_norm: 0.999998906961733, iteration: 53778
loss: 1.019989252090454,grad_norm: 0.9319056174758351, iteration: 53779
loss: 0.9837485551834106,grad_norm: 0.9825225043440203, iteration: 53780
loss: 1.0034176111221313,grad_norm: 0.9999992467039047, iteration: 53781
loss: 1.0392719507217407,grad_norm: 0.9999996190132313, iteration: 53782
loss: 1.048587441444397,grad_norm: 0.9341772402742348, iteration: 53783
loss: 1.0193862915039062,grad_norm: 0.9627667325166308, iteration: 53784
loss: 1.0288341045379639,grad_norm: 0.9999994340068236, iteration: 53785
loss: 0.9991182684898376,grad_norm: 0.9999990023706601, iteration: 53786
loss: 1.0044537782669067,grad_norm: 0.9999990941188946, iteration: 53787
loss: 1.0115361213684082,grad_norm: 0.999999839976365, iteration: 53788
loss: 1.043446660041809,grad_norm: 0.9999997478074656, iteration: 53789
loss: 1.0225976705551147,grad_norm: 0.9999994315325322, iteration: 53790
loss: 1.0285727977752686,grad_norm: 0.9999991802512218, iteration: 53791
loss: 1.0054974555969238,grad_norm: 0.8686523930285552, iteration: 53792
loss: 1.0521914958953857,grad_norm: 0.9999989967009466, iteration: 53793
loss: 1.0011076927185059,grad_norm: 0.9555209944573202, iteration: 53794
loss: 1.0277408361434937,grad_norm: 0.9376019144603295, iteration: 53795
loss: 1.03822660446167,grad_norm: 0.999999147375679, iteration: 53796
loss: 1.007346749305725,grad_norm: 0.8956726113272867, iteration: 53797
loss: 1.0065606832504272,grad_norm: 0.9999990453560933, iteration: 53798
loss: 1.0321784019470215,grad_norm: 0.9999994861624927, iteration: 53799
loss: 0.961776852607727,grad_norm: 0.9999995790393248, iteration: 53800
loss: 1.0195366144180298,grad_norm: 0.9999995791783496, iteration: 53801
loss: 1.0122610330581665,grad_norm: 0.9999992938945275, iteration: 53802
loss: 0.9926889538764954,grad_norm: 0.944642342887449, iteration: 53803
loss: 1.0894217491149902,grad_norm: 0.9999998065678771, iteration: 53804
loss: 0.9979975819587708,grad_norm: 0.9312931121991497, iteration: 53805
loss: 1.0021297931671143,grad_norm: 0.999999156694027, iteration: 53806
loss: 0.9976475834846497,grad_norm: 0.9999994076616989, iteration: 53807
loss: 0.9971193075180054,grad_norm: 0.9999990302884426, iteration: 53808
loss: 0.9822197556495667,grad_norm: 0.9999992444362994, iteration: 53809
loss: 1.0052622556686401,grad_norm: 0.9999996670672128, iteration: 53810
loss: 1.0231573581695557,grad_norm: 0.976147269839809, iteration: 53811
loss: 1.031462550163269,grad_norm: 0.9999995880965311, iteration: 53812
loss: 0.9962713718414307,grad_norm: 0.9999991728717542, iteration: 53813
loss: 1.0250853300094604,grad_norm: 0.9999993316721018, iteration: 53814
loss: 1.007603645324707,grad_norm: 0.9999992506461016, iteration: 53815
loss: 0.9750893115997314,grad_norm: 0.9342929747687744, iteration: 53816
loss: 1.0929397344589233,grad_norm: 0.9999991851678315, iteration: 53817
loss: 0.9665834307670593,grad_norm: 0.9999992244607416, iteration: 53818
loss: 0.9930034279823303,grad_norm: 0.9999991573207554, iteration: 53819
loss: 0.9727218151092529,grad_norm: 0.9837799738684258, iteration: 53820
loss: 1.0293277502059937,grad_norm: 0.9999996553739974, iteration: 53821
loss: 1.0041756629943848,grad_norm: 0.9999992200217942, iteration: 53822
loss: 1.0409910678863525,grad_norm: 0.9999991950736756, iteration: 53823
loss: 0.9821744561195374,grad_norm: 0.9999990578389757, iteration: 53824
loss: 1.0012465715408325,grad_norm: 0.9999991198460364, iteration: 53825
loss: 1.0361778736114502,grad_norm: 0.9999993144535478, iteration: 53826
loss: 1.000080943107605,grad_norm: 0.9999993416033438, iteration: 53827
loss: 1.007875919342041,grad_norm: 0.8545523003141638, iteration: 53828
loss: 1.0341535806655884,grad_norm: 0.9999993050219196, iteration: 53829
loss: 1.0137187242507935,grad_norm: 0.9895324703230842, iteration: 53830
loss: 0.9878332018852234,grad_norm: 0.999999045016081, iteration: 53831
loss: 1.0471091270446777,grad_norm: 0.9999992392283433, iteration: 53832
loss: 0.9863664507865906,grad_norm: 0.9999990545485122, iteration: 53833
loss: 1.0125364065170288,grad_norm: 0.999999164981228, iteration: 53834
loss: 1.021788239479065,grad_norm: 0.8653464869205724, iteration: 53835
loss: 0.996824324131012,grad_norm: 0.9999990690086032, iteration: 53836
loss: 1.0076044797897339,grad_norm: 0.9999993468005062, iteration: 53837
loss: 1.0426303148269653,grad_norm: 0.9999996215759869, iteration: 53838
loss: 0.9784143567085266,grad_norm: 0.9843642699496368, iteration: 53839
loss: 1.012866735458374,grad_norm: 0.9999991356532522, iteration: 53840
loss: 0.985596776008606,grad_norm: 0.999998845633105, iteration: 53841
loss: 0.9984074234962463,grad_norm: 0.9712380286528459, iteration: 53842
loss: 0.9997878670692444,grad_norm: 0.9999990814910288, iteration: 53843
loss: 0.9778047204017639,grad_norm: 0.9691290967845491, iteration: 53844
loss: 1.0226197242736816,grad_norm: 0.9999998365295997, iteration: 53845
loss: 1.0131605863571167,grad_norm: 0.9642461354706034, iteration: 53846
loss: 1.0028414726257324,grad_norm: 0.9999990401490256, iteration: 53847
loss: 1.0505212545394897,grad_norm: 0.9999996185835174, iteration: 53848
loss: 0.9580124020576477,grad_norm: 0.869801306411219, iteration: 53849
loss: 1.0135548114776611,grad_norm: 0.988386166522154, iteration: 53850
loss: 1.0513275861740112,grad_norm: 0.9999991964927825, iteration: 53851
loss: 0.9702574610710144,grad_norm: 0.9999991417156603, iteration: 53852
loss: 0.9982572197914124,grad_norm: 0.9677781687846269, iteration: 53853
loss: 1.0028153657913208,grad_norm: 0.9863822147600679, iteration: 53854
loss: 1.0057522058486938,grad_norm: 0.9999990915619758, iteration: 53855
loss: 0.9812270998954773,grad_norm: 0.9999991479189326, iteration: 53856
loss: 1.005889654159546,grad_norm: 0.999999669303893, iteration: 53857
loss: 0.9802155494689941,grad_norm: 0.9541654322852549, iteration: 53858
loss: 1.0268524885177612,grad_norm: 1.0000000114889824, iteration: 53859
loss: 1.0909687280654907,grad_norm: 0.9999992090730463, iteration: 53860
loss: 1.0170989036560059,grad_norm: 0.9664392888180973, iteration: 53861
loss: 0.9691877961158752,grad_norm: 0.9473884841661971, iteration: 53862
loss: 0.9799237251281738,grad_norm: 0.8935907784648636, iteration: 53863
loss: 1.0350215435028076,grad_norm: 0.9999993601581735, iteration: 53864
loss: 1.0085769891738892,grad_norm: 0.8338893788345402, iteration: 53865
loss: 1.007190465927124,grad_norm: 0.9999992349605528, iteration: 53866
loss: 0.9991611242294312,grad_norm: 0.9999991159954251, iteration: 53867
loss: 0.980594277381897,grad_norm: 0.9999997243177214, iteration: 53868
loss: 1.0150552988052368,grad_norm: 0.9473065897242555, iteration: 53869
loss: 0.9817962646484375,grad_norm: 0.9335059211165545, iteration: 53870
loss: 1.0252423286437988,grad_norm: 0.9999991383574676, iteration: 53871
loss: 1.018479585647583,grad_norm: 0.9363778379323227, iteration: 53872
loss: 1.032351016998291,grad_norm: 0.9999992691839455, iteration: 53873
loss: 0.9785252809524536,grad_norm: 0.9999993605487247, iteration: 53874
loss: 1.0088551044464111,grad_norm: 0.9999990973307868, iteration: 53875
loss: 0.9914694428443909,grad_norm: 0.9999992551095926, iteration: 53876
loss: 1.00535249710083,grad_norm: 0.9999994861476328, iteration: 53877
loss: 0.9609124064445496,grad_norm: 0.999998989121803, iteration: 53878
loss: 1.0520944595336914,grad_norm: 0.9999995114689251, iteration: 53879
loss: 1.0250712633132935,grad_norm: 0.999999160589897, iteration: 53880
loss: 1.0451531410217285,grad_norm: 0.9999990898779523, iteration: 53881
loss: 1.0867559909820557,grad_norm: 0.9999993166909932, iteration: 53882
loss: 0.9957129955291748,grad_norm: 0.8061329608932669, iteration: 53883
loss: 0.985791027545929,grad_norm: 0.9999990826883959, iteration: 53884
loss: 0.9980702996253967,grad_norm: 0.9079647800938132, iteration: 53885
loss: 1.011130690574646,grad_norm: 0.9999996490037425, iteration: 53886
loss: 1.059396743774414,grad_norm: 0.9999993353908396, iteration: 53887
loss: 0.989945650100708,grad_norm: 0.8793823443779385, iteration: 53888
loss: 1.0032060146331787,grad_norm: 0.9999991542754574, iteration: 53889
loss: 0.991088330745697,grad_norm: 0.977836315999542, iteration: 53890
loss: 0.9714986681938171,grad_norm: 0.9562858424529064, iteration: 53891
loss: 1.1843665838241577,grad_norm: 0.9999993451199407, iteration: 53892
loss: 1.0176596641540527,grad_norm: 0.9999990943834869, iteration: 53893
loss: 0.9731959104537964,grad_norm: 0.925875316981934, iteration: 53894
loss: 1.0183366537094116,grad_norm: 0.9999990335392438, iteration: 53895
loss: 1.0645055770874023,grad_norm: 0.9999992870542672, iteration: 53896
loss: 1.0152242183685303,grad_norm: 0.9734655223795867, iteration: 53897
loss: 0.9883512854576111,grad_norm: 0.9999990060315754, iteration: 53898
loss: 0.9869096875190735,grad_norm: 0.9999991362169106, iteration: 53899
loss: 1.0120303630828857,grad_norm: 0.9950191269227041, iteration: 53900
loss: 1.0249030590057373,grad_norm: 0.9804461391413082, iteration: 53901
loss: 0.9547259211540222,grad_norm: 0.9999990714362117, iteration: 53902
loss: 0.985499382019043,grad_norm: 0.9999989808756975, iteration: 53903
loss: 0.9959146976470947,grad_norm: 0.9089333239788063, iteration: 53904
loss: 1.0147567987442017,grad_norm: 0.8721045989547203, iteration: 53905
loss: 1.0260695219039917,grad_norm: 0.9999991558931814, iteration: 53906
loss: 0.9883903861045837,grad_norm: 0.9999990963816029, iteration: 53907
loss: 1.183321475982666,grad_norm: 0.9999994384934816, iteration: 53908
loss: 0.949425220489502,grad_norm: 0.9999991331621741, iteration: 53909
loss: 0.996931791305542,grad_norm: 0.9999991188568561, iteration: 53910
loss: 0.9963474273681641,grad_norm: 0.9828440201861577, iteration: 53911
loss: 1.0009615421295166,grad_norm: 0.9999993128975609, iteration: 53912
loss: 0.9502015709877014,grad_norm: 0.9999992088524251, iteration: 53913
loss: 0.9821935892105103,grad_norm: 0.9999989184713528, iteration: 53914
loss: 1.018445611000061,grad_norm: 0.999999786574507, iteration: 53915
loss: 1.037203311920166,grad_norm: 0.9999996656250865, iteration: 53916
loss: 1.0296573638916016,grad_norm: 0.9999995502158329, iteration: 53917
loss: 1.0108277797698975,grad_norm: 0.9999993721404142, iteration: 53918
loss: 1.0615462064743042,grad_norm: 0.9999993557246359, iteration: 53919
loss: 1.0069774389266968,grad_norm: 0.9916564166916201, iteration: 53920
loss: 1.0493109226226807,grad_norm: 0.9999993006787551, iteration: 53921
loss: 1.0075269937515259,grad_norm: 0.9999990891599951, iteration: 53922
loss: 1.0066014528274536,grad_norm: 0.9001552708480798, iteration: 53923
loss: 1.0071569681167603,grad_norm: 0.9999990127467292, iteration: 53924
loss: 1.0144277811050415,grad_norm: 0.9488749684721483, iteration: 53925
loss: 0.9710943698883057,grad_norm: 0.9999990021540716, iteration: 53926
loss: 0.9950928092002869,grad_norm: 0.9999999392960818, iteration: 53927
loss: 1.0189677476882935,grad_norm: 0.9273057944272792, iteration: 53928
loss: 1.007724642753601,grad_norm: 0.9999999345186374, iteration: 53929
loss: 0.9645934104919434,grad_norm: 0.9323172658733891, iteration: 53930
loss: 1.0081381797790527,grad_norm: 0.9999991641564466, iteration: 53931
loss: 0.9850330352783203,grad_norm: 0.9999992455287791, iteration: 53932
loss: 0.9782028794288635,grad_norm: 0.9999990475907079, iteration: 53933
loss: 1.0303245782852173,grad_norm: 0.9999991680595672, iteration: 53934
loss: 0.9803361892700195,grad_norm: 0.9999991777028933, iteration: 53935
loss: 1.032668948173523,grad_norm: 0.9999991938964512, iteration: 53936
loss: 0.9664295315742493,grad_norm: 0.9999991494402181, iteration: 53937
loss: 1.0140464305877686,grad_norm: 0.849158155249005, iteration: 53938
loss: 0.9640455842018127,grad_norm: 0.9999989453990936, iteration: 53939
loss: 0.9926062226295471,grad_norm: 0.9999992093207972, iteration: 53940
loss: 1.0314562320709229,grad_norm: 0.9999994541649473, iteration: 53941
loss: 0.9862332344055176,grad_norm: 0.8965084446767814, iteration: 53942
loss: 1.0048907995224,grad_norm: 0.9999991430210321, iteration: 53943
loss: 0.9754572510719299,grad_norm: 0.9247843757404192, iteration: 53944
loss: 1.0582979917526245,grad_norm: 0.9999991584666235, iteration: 53945
loss: 1.0149905681610107,grad_norm: 0.9999994123675507, iteration: 53946
loss: 0.9995172619819641,grad_norm: 0.9999991255151568, iteration: 53947
loss: 0.9761399626731873,grad_norm: 0.947164429308962, iteration: 53948
loss: 1.013798475265503,grad_norm: 0.9439229357837159, iteration: 53949
loss: 1.0266613960266113,grad_norm: 0.9999991896276289, iteration: 53950
loss: 0.9882021546363831,grad_norm: 0.9999990535919203, iteration: 53951
loss: 1.03850257396698,grad_norm: 0.9999993870023713, iteration: 53952
loss: 1.0076967477798462,grad_norm: 0.9999990972094575, iteration: 53953
loss: 1.08047616481781,grad_norm: 0.999999509909213, iteration: 53954
loss: 0.9874302744865417,grad_norm: 0.9999992151909537, iteration: 53955
loss: 1.0447196960449219,grad_norm: 0.9999992738042993, iteration: 53956
loss: 0.9786126613616943,grad_norm: 0.9592047065996971, iteration: 53957
loss: 1.019963026046753,grad_norm: 0.9999997763464912, iteration: 53958
loss: 1.013237476348877,grad_norm: 0.9999991435573921, iteration: 53959
loss: 0.9766073822975159,grad_norm: 0.9999990675778311, iteration: 53960
loss: 1.026130199432373,grad_norm: 0.9999989798164, iteration: 53961
loss: 0.9719841480255127,grad_norm: 0.9999992588217675, iteration: 53962
loss: 0.9895251393318176,grad_norm: 0.9999988493688801, iteration: 53963
loss: 1.002172589302063,grad_norm: 0.9999996712109547, iteration: 53964
loss: 1.0106703042984009,grad_norm: 0.9256286176028805, iteration: 53965
loss: 1.0092664957046509,grad_norm: 0.9323328939634873, iteration: 53966
loss: 0.9925334453582764,grad_norm: 0.9098068671071737, iteration: 53967
loss: 0.999572217464447,grad_norm: 0.9999991992887348, iteration: 53968
loss: 0.9924376010894775,grad_norm: 0.999999166107607, iteration: 53969
loss: 0.9893806576728821,grad_norm: 0.9197527218080391, iteration: 53970
loss: 1.023793339729309,grad_norm: 0.950372319684395, iteration: 53971
loss: 0.988658607006073,grad_norm: 0.9999990471731244, iteration: 53972
loss: 1.0284842252731323,grad_norm: 0.9302905184985221, iteration: 53973
loss: 1.0137180089950562,grad_norm: 0.9999993762765093, iteration: 53974
loss: 0.9808416962623596,grad_norm: 0.9916369433154035, iteration: 53975
loss: 1.0562862157821655,grad_norm: 0.9977265445684728, iteration: 53976
loss: 1.0108734369277954,grad_norm: 0.9999990418677269, iteration: 53977
loss: 1.0338197946548462,grad_norm: 0.9380939474491862, iteration: 53978
loss: 1.006941556930542,grad_norm: 0.9418579396460123, iteration: 53979
loss: 0.9994506239891052,grad_norm: 0.9341512920721791, iteration: 53980
loss: 0.9986718893051147,grad_norm: 0.9999989264449249, iteration: 53981
loss: 0.9935891628265381,grad_norm: 0.9999991696892773, iteration: 53982
loss: 0.9875144958496094,grad_norm: 0.999999194350147, iteration: 53983
loss: 1.006643533706665,grad_norm: 0.9999994162233414, iteration: 53984
loss: 1.008881688117981,grad_norm: 0.9999992500992372, iteration: 53985
loss: 0.9919819831848145,grad_norm: 0.9999991627439322, iteration: 53986
loss: 1.0085946321487427,grad_norm: 0.9980328823134563, iteration: 53987
loss: 1.0092412233352661,grad_norm: 0.9674877607321462, iteration: 53988
loss: 1.0041913986206055,grad_norm: 0.9999992299644626, iteration: 53989
loss: 1.0394847393035889,grad_norm: 0.9999991407591657, iteration: 53990
loss: 0.9733914732933044,grad_norm: 0.9833228408724299, iteration: 53991
loss: 1.0250532627105713,grad_norm: 0.9999991005844053, iteration: 53992
loss: 0.9922893047332764,grad_norm: 0.999999205886295, iteration: 53993
loss: 0.9975919127464294,grad_norm: 0.8985546066038735, iteration: 53994
loss: 0.9727093577384949,grad_norm: 0.9757713447172608, iteration: 53995
loss: 1.0157071352005005,grad_norm: 0.9081470149077956, iteration: 53996
loss: 1.0388797521591187,grad_norm: 0.9367130597281059, iteration: 53997
loss: 1.0108619928359985,grad_norm: 0.9927111075229336, iteration: 53998
loss: 1.0307284593582153,grad_norm: 0.9999993450518915, iteration: 53999
loss: 1.0003122091293335,grad_norm: 0.942170218030184, iteration: 54000
loss: 0.9933656454086304,grad_norm: 0.9929855751095377, iteration: 54001
loss: 1.0205219984054565,grad_norm: 0.9999992275445146, iteration: 54002
loss: 1.0575904846191406,grad_norm: 0.9999998028551323, iteration: 54003
loss: 0.9808064699172974,grad_norm: 0.9514936095854297, iteration: 54004
loss: 1.018804907798767,grad_norm: 0.9999991892458899, iteration: 54005
loss: 1.1633479595184326,grad_norm: 0.9999997605330548, iteration: 54006
loss: 1.0210480690002441,grad_norm: 0.9999990932480486, iteration: 54007
loss: 0.9768686890602112,grad_norm: 0.9999989884811264, iteration: 54008
loss: 1.0238806009292603,grad_norm: 0.9995622486919251, iteration: 54009
loss: 1.0229566097259521,grad_norm: 0.857385055073142, iteration: 54010
loss: 1.0051311254501343,grad_norm: 0.9999989659832731, iteration: 54011
loss: 1.0087506771087646,grad_norm: 0.7984902170010852, iteration: 54012
loss: 0.9733780026435852,grad_norm: 0.9269164155297659, iteration: 54013
loss: 1.0422922372817993,grad_norm: 0.999999599076874, iteration: 54014
loss: 1.022089958190918,grad_norm: 0.9999991001627384, iteration: 54015
loss: 1.0461394786834717,grad_norm: 0.926636047528823, iteration: 54016
loss: 0.967194676399231,grad_norm: 0.9060753968744574, iteration: 54017
loss: 1.0149568319320679,grad_norm: 0.8263282106740394, iteration: 54018
loss: 1.0413779020309448,grad_norm: 0.9999996786025865, iteration: 54019
loss: 1.0533859729766846,grad_norm: 0.9999990304277969, iteration: 54020
loss: 0.9757482409477234,grad_norm: 0.9999991626220709, iteration: 54021
loss: 1.0120058059692383,grad_norm: 0.9999992032082538, iteration: 54022
loss: 1.0032552480697632,grad_norm: 0.9999990666684778, iteration: 54023
loss: 1.0384846925735474,grad_norm: 0.9764983242119545, iteration: 54024
loss: 1.002398133277893,grad_norm: 0.9019993828841314, iteration: 54025
loss: 1.020151138305664,grad_norm: 0.9999990415610508, iteration: 54026
loss: 0.9949073195457458,grad_norm: 0.9488111616199668, iteration: 54027
loss: 1.011118769645691,grad_norm: 0.9999992935928391, iteration: 54028
loss: 1.0483520030975342,grad_norm: 0.9999991722680359, iteration: 54029
loss: 1.0053433179855347,grad_norm: 0.9999993511581312, iteration: 54030
loss: 1.0049468278884888,grad_norm: 0.9999992138799906, iteration: 54031
loss: 0.9976669549942017,grad_norm: 0.9999991377474083, iteration: 54032
loss: 0.9803181290626526,grad_norm: 0.92891335380423, iteration: 54033
loss: 0.9397423267364502,grad_norm: 0.9999991197241721, iteration: 54034
loss: 0.9873846173286438,grad_norm: 0.9999990488128437, iteration: 54035
loss: 0.9924923777580261,grad_norm: 0.9999989854026096, iteration: 54036
loss: 1.0172748565673828,grad_norm: 0.9999991111853421, iteration: 54037
loss: 0.9966415166854858,grad_norm: 0.8448779709841163, iteration: 54038
loss: 0.9983627796173096,grad_norm: 0.9095409091743099, iteration: 54039
loss: 0.953748345375061,grad_norm: 0.8747421434399641, iteration: 54040
loss: 0.9989300966262817,grad_norm: 0.9999990676111149, iteration: 54041
loss: 1.0088425874710083,grad_norm: 0.9818595055294782, iteration: 54042
loss: 1.0039424896240234,grad_norm: 0.9113870885393589, iteration: 54043
loss: 0.965380072593689,grad_norm: 0.9999991738306867, iteration: 54044
loss: 0.9732643365859985,grad_norm: 0.999999198096189, iteration: 54045
loss: 0.9585593938827515,grad_norm: 0.9714979682819083, iteration: 54046
loss: 0.9998478889465332,grad_norm: 0.9999992416237373, iteration: 54047
loss: 1.0275936126708984,grad_norm: 0.8430288715201958, iteration: 54048
loss: 0.9780594706535339,grad_norm: 0.9646183096655165, iteration: 54049
loss: 0.9581025838851929,grad_norm: 0.9392067588699741, iteration: 54050
loss: 1.001571536064148,grad_norm: 0.9373328580850856, iteration: 54051
loss: 0.9786556363105774,grad_norm: 0.9999990937828827, iteration: 54052
loss: 0.9795677065849304,grad_norm: 0.953117612821159, iteration: 54053
loss: 0.9965233206748962,grad_norm: 0.999999251733223, iteration: 54054
loss: 0.9760645627975464,grad_norm: 0.9999992082792335, iteration: 54055
loss: 1.0126956701278687,grad_norm: 0.987298234209491, iteration: 54056
loss: 1.0275847911834717,grad_norm: 0.9999992474475969, iteration: 54057
loss: 0.9761391878128052,grad_norm: 0.9999990073504053, iteration: 54058
loss: 1.028937578201294,grad_norm: 0.9797160611031065, iteration: 54059
loss: 0.97954922914505,grad_norm: 0.9581492337089146, iteration: 54060
loss: 0.9993104934692383,grad_norm: 0.8799781265550772, iteration: 54061
loss: 1.007239580154419,grad_norm: 0.7916693026558023, iteration: 54062
loss: 1.0015366077423096,grad_norm: 0.9526376482820857, iteration: 54063
loss: 1.0140211582183838,grad_norm: 0.9999990317515126, iteration: 54064
loss: 1.0166336297988892,grad_norm: 0.8484710074013725, iteration: 54065
loss: 1.0130071640014648,grad_norm: 0.7959501234524863, iteration: 54066
loss: 1.0492868423461914,grad_norm: 0.999999810334974, iteration: 54067
loss: 1.0229227542877197,grad_norm: 0.9999989582639962, iteration: 54068
loss: 0.9649056196212769,grad_norm: 0.999999093085938, iteration: 54069
loss: 1.0496690273284912,grad_norm: 0.9932918071181628, iteration: 54070
loss: 1.0137490034103394,grad_norm: 0.8888109441455317, iteration: 54071
loss: 0.982929527759552,grad_norm: 0.8631749318188146, iteration: 54072
loss: 1.0197798013687134,grad_norm: 0.9999995706792147, iteration: 54073
loss: 1.0368205308914185,grad_norm: 0.9999990177636561, iteration: 54074
loss: 1.0137805938720703,grad_norm: 0.9965248671604963, iteration: 54075
loss: 0.9448899030685425,grad_norm: 0.9999990373547273, iteration: 54076
loss: 1.037559986114502,grad_norm: 0.9999991456606357, iteration: 54077
loss: 1.0301984548568726,grad_norm: 0.9772239862869554, iteration: 54078
loss: 1.0226815938949585,grad_norm: 0.9478430513292416, iteration: 54079
loss: 1.0035630464553833,grad_norm: 0.9265519047933833, iteration: 54080
loss: 0.9827677607536316,grad_norm: 0.9831745381141715, iteration: 54081
loss: 0.9798979759216309,grad_norm: 0.9999989919836377, iteration: 54082
loss: 0.988341212272644,grad_norm: 0.9239718008254207, iteration: 54083
loss: 0.9758509397506714,grad_norm: 0.9770037653348163, iteration: 54084
loss: 0.9981266260147095,grad_norm: 0.94375482502865, iteration: 54085
loss: 1.0063822269439697,grad_norm: 0.9943583409942708, iteration: 54086
loss: 1.0033440589904785,grad_norm: 0.9999996167255546, iteration: 54087
loss: 0.9955667853355408,grad_norm: 0.9999991859943611, iteration: 54088
loss: 0.9919523596763611,grad_norm: 0.9999990253070198, iteration: 54089
loss: 0.9967244863510132,grad_norm: 0.9999990459137932, iteration: 54090
loss: 1.0080629587173462,grad_norm: 0.999999042283036, iteration: 54091
loss: 1.0057601928710938,grad_norm: 0.9999992148295394, iteration: 54092
loss: 0.9951258897781372,grad_norm: 0.8598871555347906, iteration: 54093
loss: 0.9792397618293762,grad_norm: 0.9999991499067632, iteration: 54094
loss: 1.0359582901000977,grad_norm: 0.9931407992806462, iteration: 54095
loss: 1.0211347341537476,grad_norm: 0.9173359003446989, iteration: 54096
loss: 0.984367311000824,grad_norm: 0.9999990818684694, iteration: 54097
loss: 1.034785509109497,grad_norm: 0.9999990599364782, iteration: 54098
loss: 0.9867658615112305,grad_norm: 0.9468069514089688, iteration: 54099
loss: 0.9989253282546997,grad_norm: 0.9999990144674543, iteration: 54100
loss: 1.0096622705459595,grad_norm: 0.8311820866962123, iteration: 54101
loss: 0.9859676957130432,grad_norm: 0.9999991402812383, iteration: 54102
loss: 1.0424001216888428,grad_norm: 0.9999991549132706, iteration: 54103
loss: 1.0344849824905396,grad_norm: 0.9999992083858218, iteration: 54104
loss: 0.9651188850402832,grad_norm: 0.9932183277132589, iteration: 54105
loss: 0.9933534264564514,grad_norm: 0.8604040005390466, iteration: 54106
loss: 1.0236760377883911,grad_norm: 0.9821902942211774, iteration: 54107
loss: 0.9725190997123718,grad_norm: 0.9337756233231196, iteration: 54108
loss: 1.011560082435608,grad_norm: 0.9999990644801885, iteration: 54109
loss: 1.0066862106323242,grad_norm: 0.9449004242366826, iteration: 54110
loss: 1.0311745405197144,grad_norm: 0.9880538212755392, iteration: 54111
loss: 0.968893826007843,grad_norm: 0.9004611483957, iteration: 54112
loss: 1.0047821998596191,grad_norm: 0.9999990472147929, iteration: 54113
loss: 1.0217856168746948,grad_norm: 0.999999204857172, iteration: 54114
loss: 0.9881582856178284,grad_norm: 0.9999990967322827, iteration: 54115
loss: 0.9581523537635803,grad_norm: 0.8546505364236675, iteration: 54116
loss: 1.0471646785736084,grad_norm: 0.9444162677908509, iteration: 54117
loss: 1.0132551193237305,grad_norm: 0.999999174032133, iteration: 54118
loss: 0.989837646484375,grad_norm: 0.99999906773399, iteration: 54119
loss: 1.0128304958343506,grad_norm: 0.9999990941160938, iteration: 54120
loss: 1.0166213512420654,grad_norm: 0.7694759072517744, iteration: 54121
loss: 1.0288708209991455,grad_norm: 0.9999992624373693, iteration: 54122
loss: 1.0154969692230225,grad_norm: 0.9999990700816372, iteration: 54123
loss: 0.9830954670906067,grad_norm: 0.9335527392415603, iteration: 54124
loss: 0.9940612316131592,grad_norm: 0.8678322961035373, iteration: 54125
loss: 0.9962546825408936,grad_norm: 0.99999899412519, iteration: 54126
loss: 1.0314617156982422,grad_norm: 0.8648553854068569, iteration: 54127
loss: 1.0403491258621216,grad_norm: 0.9999990775221443, iteration: 54128
loss: 1.0105299949645996,grad_norm: 0.9156596136194181, iteration: 54129
loss: 0.9771272540092468,grad_norm: 0.9999990898631743, iteration: 54130
loss: 0.9975844621658325,grad_norm: 0.9811482130805502, iteration: 54131
loss: 1.0094670057296753,grad_norm: 0.9999991238851514, iteration: 54132
loss: 0.9741437435150146,grad_norm: 0.9999991364626515, iteration: 54133
loss: 0.9904046058654785,grad_norm: 0.9824973792335556, iteration: 54134
loss: 1.0319558382034302,grad_norm: 0.9999995822633206, iteration: 54135
loss: 0.9894461631774902,grad_norm: 0.9999991018890225, iteration: 54136
loss: 1.0023287534713745,grad_norm: 0.9999991151859164, iteration: 54137
loss: 1.0400738716125488,grad_norm: 0.9999991062508973, iteration: 54138
loss: 0.9810677170753479,grad_norm: 0.9087028915439992, iteration: 54139
loss: 0.9982315897941589,grad_norm: 0.9999992016585231, iteration: 54140
loss: 0.9967117309570312,grad_norm: 0.9999992647145701, iteration: 54141
loss: 1.0129555463790894,grad_norm: 0.8826316843041051, iteration: 54142
loss: 1.0476678609848022,grad_norm: 0.9999990768450203, iteration: 54143
loss: 1.016141414642334,grad_norm: 0.9912815646349884, iteration: 54144
loss: 1.0290297269821167,grad_norm: 0.9101299004197007, iteration: 54145
loss: 1.0187128782272339,grad_norm: 0.975377514755496, iteration: 54146
loss: 1.01669180393219,grad_norm: 0.99999914135564, iteration: 54147
loss: 1.024795651435852,grad_norm: 0.9447427166829077, iteration: 54148
loss: 1.0020992755889893,grad_norm: 0.9999995947439996, iteration: 54149
loss: 1.0027567148208618,grad_norm: 0.8688779349897273, iteration: 54150
loss: 0.9719818830490112,grad_norm: 0.999999636554864, iteration: 54151
loss: 0.9919566512107849,grad_norm: 0.9548757049549975, iteration: 54152
loss: 1.0370827913284302,grad_norm: 0.9515390100822889, iteration: 54153
loss: 1.0249451398849487,grad_norm: 0.8919516808358889, iteration: 54154
loss: 1.041695475578308,grad_norm: 0.9648827606636946, iteration: 54155
loss: 1.0026302337646484,grad_norm: 0.9999991114231287, iteration: 54156
loss: 1.0210518836975098,grad_norm: 0.9999991809088742, iteration: 54157
loss: 0.9796763062477112,grad_norm: 0.912947369802101, iteration: 54158
loss: 1.0118638277053833,grad_norm: 0.9999989288690622, iteration: 54159
loss: 1.007924199104309,grad_norm: 0.9175999671070643, iteration: 54160
loss: 1.005993366241455,grad_norm: 0.9999992005470383, iteration: 54161
loss: 0.9937232732772827,grad_norm: 0.9404329011942055, iteration: 54162
loss: 1.0419425964355469,grad_norm: 0.9999991597750383, iteration: 54163
loss: 1.0150396823883057,grad_norm: 0.9999991713429619, iteration: 54164
loss: 1.065026879310608,grad_norm: 0.999999170834898, iteration: 54165
loss: 0.9540440440177917,grad_norm: 0.9999991032105312, iteration: 54166
loss: 0.9627034068107605,grad_norm: 0.9999991736533286, iteration: 54167
loss: 1.0181169509887695,grad_norm: 0.9999994792780486, iteration: 54168
loss: 1.0001412630081177,grad_norm: 0.9332993006109769, iteration: 54169
loss: 1.0002330541610718,grad_norm: 0.9999991859856802, iteration: 54170
loss: 1.0031640529632568,grad_norm: 0.9582570362903801, iteration: 54171
loss: 0.9726019501686096,grad_norm: 0.9999989816745612, iteration: 54172
loss: 0.9961845874786377,grad_norm: 0.9999989940744433, iteration: 54173
loss: 1.013615608215332,grad_norm: 0.9999990809778663, iteration: 54174
loss: 1.0333584547042847,grad_norm: 0.9999990675239806, iteration: 54175
loss: 1.0177233219146729,grad_norm: 0.9999991070270016, iteration: 54176
loss: 1.007346510887146,grad_norm: 0.9523089088948693, iteration: 54177
loss: 0.9802900552749634,grad_norm: 0.9773967784250881, iteration: 54178
loss: 0.9783501625061035,grad_norm: 0.9999989704181397, iteration: 54179
loss: 1.0106208324432373,grad_norm: 0.9886080729949017, iteration: 54180
loss: 0.9827771186828613,grad_norm: 0.9999991620072536, iteration: 54181
loss: 0.9919917583465576,grad_norm: 0.9914710104321941, iteration: 54182
loss: 0.9808117151260376,grad_norm: 0.9999990653316213, iteration: 54183
loss: 0.9860638976097107,grad_norm: 0.999999098063436, iteration: 54184
loss: 1.0002142190933228,grad_norm: 0.9957417571271215, iteration: 54185
loss: 0.9792087078094482,grad_norm: 0.9999996753748116, iteration: 54186
loss: 1.0127545595169067,grad_norm: 0.9999991840172734, iteration: 54187
loss: 1.012961745262146,grad_norm: 0.9999992632805973, iteration: 54188
loss: 1.0189738273620605,grad_norm: 0.9999991029503649, iteration: 54189
loss: 0.9999772906303406,grad_norm: 0.9784143512562591, iteration: 54190
loss: 1.0027081966400146,grad_norm: 0.9999991295314923, iteration: 54191
loss: 1.0132863521575928,grad_norm: 0.8407227717861223, iteration: 54192
loss: 0.9773145914077759,grad_norm: 0.9635844336218996, iteration: 54193
loss: 1.0137083530426025,grad_norm: 0.8401609836024088, iteration: 54194
loss: 0.9956294298171997,grad_norm: 0.9999990606235835, iteration: 54195
loss: 1.041832447052002,grad_norm: 0.9999990437268971, iteration: 54196
loss: 1.0075674057006836,grad_norm: 0.9999994794942497, iteration: 54197
loss: 0.9837490320205688,grad_norm: 0.9999991495734337, iteration: 54198
loss: 0.9916746616363525,grad_norm: 0.9999990832321187, iteration: 54199
loss: 0.985754668712616,grad_norm: 0.9755516551038135, iteration: 54200
loss: 0.9964487552642822,grad_norm: 0.8924525760414436, iteration: 54201
loss: 1.0045475959777832,grad_norm: 0.844547786965146, iteration: 54202
loss: 0.9683142304420471,grad_norm: 0.9999990384668005, iteration: 54203
loss: 0.9587681293487549,grad_norm: 0.9404046547863766, iteration: 54204
loss: 1.0031061172485352,grad_norm: 0.9999994877798039, iteration: 54205
loss: 1.017890453338623,grad_norm: 0.9999991483049261, iteration: 54206
loss: 1.002610445022583,grad_norm: 0.9999992126148803, iteration: 54207
loss: 0.9823923707008362,grad_norm: 0.9999990855764234, iteration: 54208
loss: 0.9948533177375793,grad_norm: 0.9672515643725696, iteration: 54209
loss: 1.0009198188781738,grad_norm: 0.9999990285469026, iteration: 54210
loss: 0.9706451296806335,grad_norm: 0.9999991642855461, iteration: 54211
loss: 1.0973117351531982,grad_norm: 0.9999993035529414, iteration: 54212
loss: 0.9928765296936035,grad_norm: 0.9678838255087827, iteration: 54213
loss: 1.0135339498519897,grad_norm: 0.8779827692417814, iteration: 54214
loss: 0.9994064569473267,grad_norm: 0.9999990976643831, iteration: 54215
loss: 1.0175366401672363,grad_norm: 0.9999991107464865, iteration: 54216
loss: 0.9843975305557251,grad_norm: 0.8633622826282172, iteration: 54217
loss: 1.0694564580917358,grad_norm: 0.999999750807915, iteration: 54218
loss: 1.0043599605560303,grad_norm: 0.8809703067400384, iteration: 54219
loss: 0.9960188865661621,grad_norm: 0.9376236163950449, iteration: 54220
loss: 0.9954757690429688,grad_norm: 0.999999128233152, iteration: 54221
loss: 0.9924865961074829,grad_norm: 0.9270374724703038, iteration: 54222
loss: 1.0466103553771973,grad_norm: 0.9999992546127289, iteration: 54223
loss: 0.996239960193634,grad_norm: 0.9999990573710733, iteration: 54224
loss: 0.9969194531440735,grad_norm: 0.9999992060237392, iteration: 54225
loss: 1.0186481475830078,grad_norm: 0.9999996112515801, iteration: 54226
loss: 0.9955520629882812,grad_norm: 0.9999989864861097, iteration: 54227
loss: 1.04841947555542,grad_norm: 0.9999993632480201, iteration: 54228
loss: 0.9749770164489746,grad_norm: 0.9547773839573163, iteration: 54229
loss: 0.9977328777313232,grad_norm: 0.9617789612494804, iteration: 54230
loss: 0.9990518689155579,grad_norm: 0.8947965380778747, iteration: 54231
loss: 1.0112887620925903,grad_norm: 0.9999990432642321, iteration: 54232
loss: 1.034212350845337,grad_norm: 0.9999991796195294, iteration: 54233
loss: 1.0038586854934692,grad_norm: 0.9999990417923336, iteration: 54234
loss: 1.0110098123550415,grad_norm: 0.9999991224208793, iteration: 54235
loss: 1.0262731313705444,grad_norm: 0.9053233831587012, iteration: 54236
loss: 1.012228012084961,grad_norm: 0.9999990139765331, iteration: 54237
loss: 0.9395372271537781,grad_norm: 0.9999991639618748, iteration: 54238
loss: 1.0113575458526611,grad_norm: 0.8653152761577503, iteration: 54239
loss: 1.0045180320739746,grad_norm: 0.9999990624072651, iteration: 54240
loss: 0.9957327246665955,grad_norm: 0.9320840789555408, iteration: 54241
loss: 0.9873859882354736,grad_norm: 0.9223034300816243, iteration: 54242
loss: 1.0164049863815308,grad_norm: 0.920696386803134, iteration: 54243
loss: 1.018488883972168,grad_norm: 0.9262719918183117, iteration: 54244
loss: 1.0047138929367065,grad_norm: 0.9999989951601869, iteration: 54245
loss: 0.9954725503921509,grad_norm: 0.9578525666347608, iteration: 54246
loss: 0.9509257078170776,grad_norm: 0.9999991140541519, iteration: 54247
loss: 0.99351966381073,grad_norm: 0.999999178862902, iteration: 54248
loss: 1.0561773777008057,grad_norm: 0.9999991888172504, iteration: 54249
loss: 0.9761234521865845,grad_norm: 0.9137701106013969, iteration: 54250
loss: 0.9754474759101868,grad_norm: 0.9191660263899838, iteration: 54251
loss: 0.9694969654083252,grad_norm: 0.919353336150062, iteration: 54252
loss: 1.0384105443954468,grad_norm: 0.9999990154671269, iteration: 54253
loss: 1.0158779621124268,grad_norm: 0.9924267900602195, iteration: 54254
loss: 1.0070596933364868,grad_norm: 0.999999164676959, iteration: 54255
loss: 1.0062049627304077,grad_norm: 0.999999064167959, iteration: 54256
loss: 0.988935112953186,grad_norm: 0.9999991283683939, iteration: 54257
loss: 0.9797493815422058,grad_norm: 0.9999991103300317, iteration: 54258
loss: 1.032801866531372,grad_norm: 0.969035768192959, iteration: 54259
loss: 0.9693203568458557,grad_norm: 0.9261232880113994, iteration: 54260
loss: 1.0070263147354126,grad_norm: 0.9999991049840156, iteration: 54261
loss: 0.9953750371932983,grad_norm: 0.9999992613384893, iteration: 54262
loss: 1.0182112455368042,grad_norm: 0.9042779100517702, iteration: 54263
loss: 1.005123496055603,grad_norm: 0.8404596271996949, iteration: 54264
loss: 1.0003775358200073,grad_norm: 0.9999992021160096, iteration: 54265
loss: 0.9964277148246765,grad_norm: 0.8535569003903176, iteration: 54266
loss: 0.9827391505241394,grad_norm: 0.9999990850141451, iteration: 54267
loss: 0.9870826601982117,grad_norm: 0.9999990317973237, iteration: 54268
loss: 1.00729501247406,grad_norm: 0.9850664228326597, iteration: 54269
loss: 0.9775147438049316,grad_norm: 0.9999991566509224, iteration: 54270
loss: 1.0275688171386719,grad_norm: 0.9999990218112695, iteration: 54271
loss: 0.9891721606254578,grad_norm: 0.8463743988897964, iteration: 54272
loss: 1.0349538326263428,grad_norm: 0.9999994682576587, iteration: 54273
loss: 1.0117073059082031,grad_norm: 0.9999991160781816, iteration: 54274
loss: 1.0327551364898682,grad_norm: 0.9999989994164595, iteration: 54275
loss: 0.9331012964248657,grad_norm: 0.9999991450308042, iteration: 54276
loss: 1.0717744827270508,grad_norm: 0.999999512282595, iteration: 54277
loss: 1.006411075592041,grad_norm: 0.9999990397506473, iteration: 54278
loss: 1.0222432613372803,grad_norm: 0.9999990287286279, iteration: 54279
loss: 1.0064082145690918,grad_norm: 0.8136210588097643, iteration: 54280
loss: 0.9881308674812317,grad_norm: 0.9999991450610876, iteration: 54281
loss: 1.0089951753616333,grad_norm: 0.9999991081218441, iteration: 54282
loss: 0.9887555241584778,grad_norm: 0.9999991618220839, iteration: 54283
loss: 1.008160948753357,grad_norm: 0.8322547647749496, iteration: 54284
loss: 0.9932664632797241,grad_norm: 0.9172219627085212, iteration: 54285
loss: 1.0129581689834595,grad_norm: 0.9999994306169879, iteration: 54286
loss: 1.0241888761520386,grad_norm: 0.9999992136761746, iteration: 54287
loss: 1.027025580406189,grad_norm: 0.9999997736793994, iteration: 54288
loss: 1.0155941247940063,grad_norm: 0.9270151460184833, iteration: 54289
loss: 1.0100632905960083,grad_norm: 0.9999998716548135, iteration: 54290
loss: 1.026382327079773,grad_norm: 0.9999990871648006, iteration: 54291
loss: 0.9889501333236694,grad_norm: 0.99999934896699, iteration: 54292
loss: 0.9997225403785706,grad_norm: 0.9999990300616525, iteration: 54293
loss: 1.0307323932647705,grad_norm: 0.9999991119684944, iteration: 54294
loss: 0.9771881103515625,grad_norm: 0.9999992336952208, iteration: 54295
loss: 1.0221483707427979,grad_norm: 0.990102492768629, iteration: 54296
loss: 0.9898019433021545,grad_norm: 0.9106818638726726, iteration: 54297
loss: 0.9787002205848694,grad_norm: 0.9999991818008404, iteration: 54298
loss: 1.0033929347991943,grad_norm: 0.9999990871271093, iteration: 54299
loss: 0.9886492490768433,grad_norm: 0.9580388392106775, iteration: 54300
loss: 0.9832284450531006,grad_norm: 0.9999991515037708, iteration: 54301
loss: 1.0175789594650269,grad_norm: 0.9766389809239283, iteration: 54302
loss: 1.0196280479431152,grad_norm: 0.9999991052047119, iteration: 54303
loss: 1.0021144151687622,grad_norm: 0.9999991278586088, iteration: 54304
loss: 1.0101361274719238,grad_norm: 0.9999992777968317, iteration: 54305
loss: 0.9682000279426575,grad_norm: 0.9982167341654609, iteration: 54306
loss: 1.015300989151001,grad_norm: 0.9999993013708199, iteration: 54307
loss: 1.0139381885528564,grad_norm: 0.9999993210174815, iteration: 54308
loss: 1.0204222202301025,grad_norm: 0.9999995853452862, iteration: 54309
loss: 0.9904993176460266,grad_norm: 0.9999990340041746, iteration: 54310
loss: 1.0053164958953857,grad_norm: 0.9627416027221697, iteration: 54311
loss: 0.9636086821556091,grad_norm: 0.99999896580532, iteration: 54312
loss: 1.035063624382019,grad_norm: 0.9143995235427681, iteration: 54313
loss: 1.0229579210281372,grad_norm: 0.9999992715394245, iteration: 54314
loss: 1.0152183771133423,grad_norm: 0.9529313603301882, iteration: 54315
loss: 1.0150991678237915,grad_norm: 0.9999990554333406, iteration: 54316
loss: 0.9874317049980164,grad_norm: 0.9030967672798085, iteration: 54317
loss: 1.0045496225357056,grad_norm: 0.9999991868331276, iteration: 54318
loss: 0.9756173491477966,grad_norm: 0.9845085392963894, iteration: 54319
loss: 1.0076502561569214,grad_norm: 0.9398211427810047, iteration: 54320
loss: 1.0103710889816284,grad_norm: 0.8893043485661111, iteration: 54321
loss: 0.9815410375595093,grad_norm: 0.9999989779967764, iteration: 54322
loss: 0.9940987229347229,grad_norm: 0.998695859535005, iteration: 54323
loss: 0.9955117702484131,grad_norm: 0.8968773983560141, iteration: 54324
loss: 0.9977529644966125,grad_norm: 0.9999993807938568, iteration: 54325
loss: 1.0064021348953247,grad_norm: 0.9999991677211524, iteration: 54326
loss: 0.9834064841270447,grad_norm: 0.936991699397965, iteration: 54327
loss: 1.0003801584243774,grad_norm: 0.7946947101082373, iteration: 54328
loss: 0.9984645843505859,grad_norm: 0.9758634677060423, iteration: 54329
loss: 0.9602051377296448,grad_norm: 0.9999991532135443, iteration: 54330
loss: 1.0177499055862427,grad_norm: 0.8871695162851025, iteration: 54331
loss: 1.0305140018463135,grad_norm: 0.9999992368943756, iteration: 54332
loss: 1.003189206123352,grad_norm: 0.9999991520467494, iteration: 54333
loss: 1.010561466217041,grad_norm: 0.8670631338734129, iteration: 54334
loss: 1.000399112701416,grad_norm: 0.9977432648099521, iteration: 54335
loss: 0.9725358486175537,grad_norm: 0.8227325252852943, iteration: 54336
loss: 0.9734327793121338,grad_norm: 0.945779381045772, iteration: 54337
loss: 1.0139765739440918,grad_norm: 0.9999988914513372, iteration: 54338
loss: 1.0143659114837646,grad_norm: 0.9999991572177526, iteration: 54339
loss: 0.9662237167358398,grad_norm: 0.999999040634114, iteration: 54340
loss: 1.0397974252700806,grad_norm: 0.9999996568086875, iteration: 54341
loss: 0.998611330986023,grad_norm: 0.958310550125488, iteration: 54342
loss: 0.9741668701171875,grad_norm: 0.9999991850245952, iteration: 54343
loss: 1.043340802192688,grad_norm: 0.999999176638348, iteration: 54344
loss: 0.9639045596122742,grad_norm: 0.999999063106568, iteration: 54345
loss: 1.0182522535324097,grad_norm: 0.9999992295540318, iteration: 54346
loss: 1.011530876159668,grad_norm: 0.9999993283730076, iteration: 54347
loss: 0.9552937746047974,grad_norm: 0.8570015537448136, iteration: 54348
loss: 1.0196329355239868,grad_norm: 0.9999992391575151, iteration: 54349
loss: 1.0079087018966675,grad_norm: 0.8942140673875683, iteration: 54350
loss: 0.9603308439254761,grad_norm: 0.9999989417529256, iteration: 54351
loss: 1.033035159111023,grad_norm: 0.8485402778714697, iteration: 54352
loss: 1.0276297330856323,grad_norm: 0.9999998337939832, iteration: 54353
loss: 1.0403382778167725,grad_norm: 0.9989878114526883, iteration: 54354
loss: 1.0194212198257446,grad_norm: 0.9483272368194426, iteration: 54355
loss: 1.0021743774414062,grad_norm: 0.9999994060847979, iteration: 54356
loss: 1.0033005475997925,grad_norm: 0.9303074560926622, iteration: 54357
loss: 0.9930717349052429,grad_norm: 0.9999991122443248, iteration: 54358
loss: 0.9984759092330933,grad_norm: 0.9489908741401992, iteration: 54359
loss: 1.0437359809875488,grad_norm: 0.9905403341892993, iteration: 54360
loss: 0.9702321887016296,grad_norm: 0.8909565589695917, iteration: 54361
loss: 0.989268958568573,grad_norm: 0.9504343466972739, iteration: 54362
loss: 1.0073838233947754,grad_norm: 0.9185645709850839, iteration: 54363
loss: 0.9806926846504211,grad_norm: 0.9106044467420856, iteration: 54364
loss: 1.0101820230484009,grad_norm: 0.9999992269721921, iteration: 54365
loss: 1.0392423868179321,grad_norm: 0.9999992446046081, iteration: 54366
loss: 0.9892001748085022,grad_norm: 0.982196914464134, iteration: 54367
loss: 1.0132310390472412,grad_norm: 0.9999990743136632, iteration: 54368
loss: 1.03586745262146,grad_norm: 0.9999990436472258, iteration: 54369
loss: 0.9673087000846863,grad_norm: 0.9999991952538543, iteration: 54370
loss: 0.9900403618812561,grad_norm: 0.9999991097196761, iteration: 54371
loss: 0.9889642000198364,grad_norm: 0.9491839772888664, iteration: 54372
loss: 0.9934641122817993,grad_norm: 0.9084254070883758, iteration: 54373
loss: 0.9953154921531677,grad_norm: 0.8839145531297263, iteration: 54374
loss: 1.0176687240600586,grad_norm: 0.8519825750417822, iteration: 54375
loss: 1.0049710273742676,grad_norm: 0.8447033425707375, iteration: 54376
loss: 1.0183935165405273,grad_norm: 0.9999991919498034, iteration: 54377
loss: 1.004396915435791,grad_norm: 0.933986693071504, iteration: 54378
loss: 0.9896236062049866,grad_norm: 0.9996228831092451, iteration: 54379
loss: 0.9755085110664368,grad_norm: 0.9999990203511628, iteration: 54380
loss: 0.9709528088569641,grad_norm: 0.9448115038191616, iteration: 54381
loss: 0.9800773859024048,grad_norm: 0.9999991638742304, iteration: 54382
loss: 1.0169042348861694,grad_norm: 0.9619547384528274, iteration: 54383
loss: 1.0274103879928589,grad_norm: 0.9999990862747622, iteration: 54384
loss: 1.0085517168045044,grad_norm: 0.9927746263588637, iteration: 54385
loss: 0.9820050001144409,grad_norm: 0.9794593433321933, iteration: 54386
loss: 1.0090854167938232,grad_norm: 0.9999991390469294, iteration: 54387
loss: 1.009162425994873,grad_norm: 0.9038932061142317, iteration: 54388
loss: 0.9513209462165833,grad_norm: 0.8337106994840334, iteration: 54389
loss: 1.0333778858184814,grad_norm: 0.9999991326501902, iteration: 54390
loss: 0.9844875335693359,grad_norm: 0.9155487436007583, iteration: 54391
loss: 1.046124815940857,grad_norm: 0.9417892853937361, iteration: 54392
loss: 1.017844796180725,grad_norm: 0.9999990939187042, iteration: 54393
loss: 1.028943657875061,grad_norm: 0.9910194846825583, iteration: 54394
loss: 0.9820347428321838,grad_norm: 0.9999991045966089, iteration: 54395
loss: 0.9965553879737854,grad_norm: 0.9999990963219187, iteration: 54396
loss: 0.9879783987998962,grad_norm: 0.9676775082440444, iteration: 54397
loss: 0.9753203988075256,grad_norm: 0.9999993711652767, iteration: 54398
loss: 0.9914153218269348,grad_norm: 0.9522352259633018, iteration: 54399
loss: 1.0219143629074097,grad_norm: 0.9577230586859152, iteration: 54400
loss: 0.9963985681533813,grad_norm: 0.999999351781754, iteration: 54401
loss: 1.0440315008163452,grad_norm: 0.9999993928514584, iteration: 54402
loss: 0.9788303375244141,grad_norm: 0.9999992624278148, iteration: 54403
loss: 0.9774026274681091,grad_norm: 0.9179157132196342, iteration: 54404
loss: 1.0179284811019897,grad_norm: 0.9767848022182517, iteration: 54405
loss: 1.0320625305175781,grad_norm: 0.9714456012883078, iteration: 54406
loss: 1.0459799766540527,grad_norm: 0.9718303777424135, iteration: 54407
loss: 1.01297128200531,grad_norm: 0.9999991047720524, iteration: 54408
loss: 1.0198215246200562,grad_norm: 0.8527307447979734, iteration: 54409
loss: 1.012817621231079,grad_norm: 0.8682598738658286, iteration: 54410
loss: 0.9523894190788269,grad_norm: 0.9999990374409201, iteration: 54411
loss: 0.9586347937583923,grad_norm: 0.9736807987996849, iteration: 54412
loss: 1.0144938230514526,grad_norm: 0.9999991810465463, iteration: 54413
loss: 1.0151262283325195,grad_norm: 0.9999990799320766, iteration: 54414
loss: 1.0105633735656738,grad_norm: 0.9894647687276865, iteration: 54415
loss: 1.005759358406067,grad_norm: 0.9911851840203983, iteration: 54416
loss: 1.0338804721832275,grad_norm: 0.999999195688045, iteration: 54417
loss: 1.0298898220062256,grad_norm: 0.9999993580557389, iteration: 54418
loss: 1.0206164121627808,grad_norm: 0.9883005886971492, iteration: 54419
loss: 1.0160465240478516,grad_norm: 0.9132156081840341, iteration: 54420
loss: 1.0003384351730347,grad_norm: 0.9999990977909421, iteration: 54421
loss: 0.9793172478675842,grad_norm: 0.9999991874357843, iteration: 54422
loss: 1.0505272150039673,grad_norm: 0.9999994782525023, iteration: 54423
loss: 1.0007179975509644,grad_norm: 0.9999990858181016, iteration: 54424
loss: 0.9647979736328125,grad_norm: 0.9999989673225682, iteration: 54425
loss: 1.0003186464309692,grad_norm: 0.8896733877227813, iteration: 54426
loss: 1.0275872945785522,grad_norm: 0.9999990424044949, iteration: 54427
loss: 1.0253289937973022,grad_norm: 0.9217242420873251, iteration: 54428
loss: 0.9973351359367371,grad_norm: 0.9513367300348099, iteration: 54429
loss: 1.0207350254058838,grad_norm: 0.9999997508049424, iteration: 54430
loss: 1.0089486837387085,grad_norm: 0.9999990770919902, iteration: 54431
loss: 0.9628838896751404,grad_norm: 0.9987052309527104, iteration: 54432
loss: 1.0097346305847168,grad_norm: 0.9999991606632647, iteration: 54433
loss: 0.9843732714653015,grad_norm: 0.9999991447309162, iteration: 54434
loss: 1.0233625173568726,grad_norm: 0.9999990770108349, iteration: 54435
loss: 0.9995217323303223,grad_norm: 0.9999994212806173, iteration: 54436
loss: 0.9603225588798523,grad_norm: 0.9999992635105257, iteration: 54437
loss: 1.0467867851257324,grad_norm: 0.9999992263480416, iteration: 54438
loss: 1.0015888214111328,grad_norm: 0.9999991822786832, iteration: 54439
loss: 0.9956769347190857,grad_norm: 0.825382213910928, iteration: 54440
loss: 0.9842424988746643,grad_norm: 0.9667117166751902, iteration: 54441
loss: 1.0062806606292725,grad_norm: 0.9999996282390374, iteration: 54442
loss: 0.9827495813369751,grad_norm: 0.9999991900278491, iteration: 54443
loss: 1.0302785634994507,grad_norm: 0.999999164169372, iteration: 54444
loss: 0.9683254957199097,grad_norm: 0.9999990442389208, iteration: 54445
loss: 0.9766221046447754,grad_norm: 0.9999989205573169, iteration: 54446
loss: 0.9962192177772522,grad_norm: 0.9637660892608332, iteration: 54447
loss: 0.9803075790405273,grad_norm: 0.9727694261504621, iteration: 54448
loss: 0.9746479988098145,grad_norm: 0.9999990135911586, iteration: 54449
loss: 1.0024548768997192,grad_norm: 0.9999993240319031, iteration: 54450
loss: 0.9754791855812073,grad_norm: 0.8884472466342005, iteration: 54451
loss: 1.005170464515686,grad_norm: 0.8343826056819473, iteration: 54452
loss: 0.9642847180366516,grad_norm: 0.9999992835491478, iteration: 54453
loss: 0.9781110882759094,grad_norm: 0.9590353867736039, iteration: 54454
loss: 1.0170648097991943,grad_norm: 0.9999990589267337, iteration: 54455
loss: 1.0106312036514282,grad_norm: 0.8262650647261099, iteration: 54456
loss: 0.9656726121902466,grad_norm: 0.9999990210035393, iteration: 54457
loss: 0.9999628067016602,grad_norm: 0.9728718990751457, iteration: 54458
loss: 0.980664849281311,grad_norm: 0.999999040356602, iteration: 54459
loss: 0.9948480725288391,grad_norm: 0.9999990661453962, iteration: 54460
loss: 1.0285977125167847,grad_norm: 0.8724617136604237, iteration: 54461
loss: 0.9683048725128174,grad_norm: 0.9261893796580712, iteration: 54462
loss: 0.991513192653656,grad_norm: 0.8530607862155306, iteration: 54463
loss: 1.0139728784561157,grad_norm: 0.999999191258994, iteration: 54464
loss: 1.0198851823806763,grad_norm: 0.9999995040289834, iteration: 54465
loss: 1.040827989578247,grad_norm: 0.9999990887048635, iteration: 54466
loss: 1.0085583925247192,grad_norm: 0.97743006060332, iteration: 54467
loss: 0.9895067811012268,grad_norm: 0.9999991701503266, iteration: 54468
loss: 1.028959035873413,grad_norm: 0.9999994498307091, iteration: 54469
loss: 0.9900532364845276,grad_norm: 0.9999991477759845, iteration: 54470
loss: 0.99833083152771,grad_norm: 0.9883996641273641, iteration: 54471
loss: 1.0115809440612793,grad_norm: 0.999999095161402, iteration: 54472
loss: 0.9946423172950745,grad_norm: 0.9999989900542919, iteration: 54473
loss: 0.9780228137969971,grad_norm: 0.9999994678440128, iteration: 54474
loss: 1.0108627080917358,grad_norm: 0.8016363167377906, iteration: 54475
loss: 1.0089646577835083,grad_norm: 0.8587812339397405, iteration: 54476
loss: 1.0217407941818237,grad_norm: 0.9999992366238754, iteration: 54477
loss: 0.9939715266227722,grad_norm: 0.9999989037993823, iteration: 54478
loss: 1.1038885116577148,grad_norm: 0.9999995958081485, iteration: 54479
loss: 0.9944797158241272,grad_norm: 0.9999992309709104, iteration: 54480
loss: 0.963055431842804,grad_norm: 0.999999141586879, iteration: 54481
loss: 1.0098485946655273,grad_norm: 0.9167906623815987, iteration: 54482
loss: 1.0174145698547363,grad_norm: 0.9999992286364021, iteration: 54483
loss: 0.9931736588478088,grad_norm: 0.9999992320558135, iteration: 54484
loss: 1.0196868181228638,grad_norm: 0.9999991474543104, iteration: 54485
loss: 0.9896945953369141,grad_norm: 0.8936392169147304, iteration: 54486
loss: 0.9994884729385376,grad_norm: 0.9667471009927219, iteration: 54487
loss: 1.0209084749221802,grad_norm: 0.9999991609179857, iteration: 54488
loss: 1.0233582258224487,grad_norm: 0.794801760933709, iteration: 54489
loss: 1.011027216911316,grad_norm: 0.9999991430112863, iteration: 54490
loss: 0.9775671362876892,grad_norm: 0.9999992216064318, iteration: 54491
loss: 0.9853954911231995,grad_norm: 0.9999990388026384, iteration: 54492
loss: 1.059908390045166,grad_norm: 0.9999990527918204, iteration: 54493
loss: 0.9601678848266602,grad_norm: 0.9999992749089786, iteration: 54494
loss: 1.0087428092956543,grad_norm: 0.999999119553785, iteration: 54495
loss: 0.9876940250396729,grad_norm: 0.9999990144665547, iteration: 54496
loss: 0.9669643640518188,grad_norm: 0.9051639355676816, iteration: 54497
loss: 1.0129297971725464,grad_norm: 0.9264628999899098, iteration: 54498
loss: 1.0051196813583374,grad_norm: 0.9202214924549013, iteration: 54499
loss: 1.0197209119796753,grad_norm: 0.9999990725404371, iteration: 54500
loss: 0.9637325406074524,grad_norm: 0.9999990869201536, iteration: 54501
loss: 0.9994818568229675,grad_norm: 0.9801878350993252, iteration: 54502
loss: 1.0176243782043457,grad_norm: 0.9999990777397046, iteration: 54503
loss: 0.9871087074279785,grad_norm: 0.9999990664451031, iteration: 54504
loss: 0.9905089735984802,grad_norm: 0.9999990737572274, iteration: 54505
loss: 0.9817107319831848,grad_norm: 0.8832404217157881, iteration: 54506
loss: 1.021465539932251,grad_norm: 0.9572822264962302, iteration: 54507
loss: 1.0055269002914429,grad_norm: 0.9812819788452626, iteration: 54508
loss: 0.998412549495697,grad_norm: 0.9010076081151887, iteration: 54509
loss: 0.9789137244224548,grad_norm: 0.9690964347074732, iteration: 54510
loss: 0.9903078675270081,grad_norm: 0.9999992386831427, iteration: 54511
loss: 0.9875349402427673,grad_norm: 0.9689704989485035, iteration: 54512
loss: 0.9455227851867676,grad_norm: 0.9643435392024011, iteration: 54513
loss: 1.027826189994812,grad_norm: 0.9942535381189654, iteration: 54514
loss: 1.0150851011276245,grad_norm: 0.9564974565663618, iteration: 54515
loss: 1.0093557834625244,grad_norm: 0.9413647386590964, iteration: 54516
loss: 1.0146127939224243,grad_norm: 0.9999990653291159, iteration: 54517
loss: 1.0000925064086914,grad_norm: 0.9999992297626831, iteration: 54518
loss: 0.9946661591529846,grad_norm: 0.965505962182816, iteration: 54519
loss: 1.024000644683838,grad_norm: 0.9999991191552308, iteration: 54520
loss: 0.9854157567024231,grad_norm: 0.9946988341162549, iteration: 54521
loss: 0.9932906031608582,grad_norm: 0.9999991794385866, iteration: 54522
loss: 1.0216292142868042,grad_norm: 0.9999991155238829, iteration: 54523
loss: 1.0089032649993896,grad_norm: 0.9999991115453912, iteration: 54524
loss: 0.9879896640777588,grad_norm: 0.9393589028497696, iteration: 54525
loss: 1.0512053966522217,grad_norm: 0.9999997123344401, iteration: 54526
loss: 0.9805268049240112,grad_norm: 0.8370944004109483, iteration: 54527
loss: 1.0150364637374878,grad_norm: 0.9262646259658982, iteration: 54528
loss: 0.9712293148040771,grad_norm: 0.99999912272574, iteration: 54529
loss: 0.9826095104217529,grad_norm: 0.8546748029868521, iteration: 54530
loss: 0.9849350452423096,grad_norm: 0.9999990957197838, iteration: 54531
loss: 0.9488512873649597,grad_norm: 0.9334677118094888, iteration: 54532
loss: 0.9770070910453796,grad_norm: 0.9999992433812164, iteration: 54533
loss: 1.0825867652893066,grad_norm: 0.9999995930939539, iteration: 54534
loss: 1.0122395753860474,grad_norm: 0.8959670895388411, iteration: 54535
loss: 0.9769734740257263,grad_norm: 0.81693765154319, iteration: 54536
loss: 0.9888806939125061,grad_norm: 0.9999992246775249, iteration: 54537
loss: 1.0292490720748901,grad_norm: 0.9026698101806948, iteration: 54538
loss: 0.994067907333374,grad_norm: 0.9999990754977937, iteration: 54539
loss: 1.004894733428955,grad_norm: 0.9161385060253924, iteration: 54540
loss: 1.00680673122406,grad_norm: 0.7885268969526665, iteration: 54541
loss: 0.986869215965271,grad_norm: 0.8635285991405125, iteration: 54542
loss: 0.993410050868988,grad_norm: 0.9999991375311897, iteration: 54543
loss: 0.9828686714172363,grad_norm: 0.9999991718103755, iteration: 54544
loss: 0.9886131286621094,grad_norm: 0.9999991470845978, iteration: 54545
loss: 1.0410369634628296,grad_norm: 0.9999992912270902, iteration: 54546
loss: 0.994047999382019,grad_norm: 0.999999148180188, iteration: 54547
loss: 0.9768857955932617,grad_norm: 0.8936152300510136, iteration: 54548
loss: 1.0119898319244385,grad_norm: 0.8985033302103563, iteration: 54549
loss: 0.998464822769165,grad_norm: 0.9473631382726282, iteration: 54550
loss: 1.0235236883163452,grad_norm: 0.963683498669472, iteration: 54551
loss: 0.9926260113716125,grad_norm: 0.8762889409295307, iteration: 54552
loss: 1.0502595901489258,grad_norm: 0.9999992639415558, iteration: 54553
loss: 0.9732335209846497,grad_norm: 0.9580802033624191, iteration: 54554
loss: 1.0157554149627686,grad_norm: 0.9999992679840677, iteration: 54555
loss: 1.038254976272583,grad_norm: 0.9999991548625499, iteration: 54556
loss: 0.9782954454421997,grad_norm: 0.9999991918397991, iteration: 54557
loss: 0.9958093762397766,grad_norm: 0.9999990582195153, iteration: 54558
loss: 1.02536141872406,grad_norm: 0.99999932280593, iteration: 54559
loss: 0.9930906891822815,grad_norm: 0.999998954007268, iteration: 54560
loss: 1.0083600282669067,grad_norm: 0.9999990413153721, iteration: 54561
loss: 1.0234875679016113,grad_norm: 0.9999989879642582, iteration: 54562
loss: 1.0098644495010376,grad_norm: 0.9999990897947265, iteration: 54563
loss: 0.9918614029884338,grad_norm: 0.9226580384985145, iteration: 54564
loss: 1.0276650190353394,grad_norm: 0.9593215454334827, iteration: 54565
loss: 0.985369861125946,grad_norm: 0.9999990251244585, iteration: 54566
loss: 0.9527959227561951,grad_norm: 0.9999990237859187, iteration: 54567
loss: 1.0322909355163574,grad_norm: 0.9999992477798944, iteration: 54568
loss: 0.9695947170257568,grad_norm: 0.9306308050876775, iteration: 54569
loss: 0.9980401396751404,grad_norm: 0.9118121501361763, iteration: 54570
loss: 0.9961534142494202,grad_norm: 0.999999097529007, iteration: 54571
loss: 1.0273287296295166,grad_norm: 0.9567800814265202, iteration: 54572
loss: 1.0026357173919678,grad_norm: 0.9999997499443001, iteration: 54573
loss: 1.0090795755386353,grad_norm: 0.9763728670589362, iteration: 54574
loss: 1.0476185083389282,grad_norm: 0.9999992259647834, iteration: 54575
loss: 0.9941639304161072,grad_norm: 0.8632719559134656, iteration: 54576
loss: 0.9999229311943054,grad_norm: 0.9593724427969649, iteration: 54577
loss: 1.0400058031082153,grad_norm: 0.9999996996585585, iteration: 54578
loss: 1.0067795515060425,grad_norm: 0.9999992347079808, iteration: 54579
loss: 1.0255961418151855,grad_norm: 0.9999990902184906, iteration: 54580
loss: 1.0287548303604126,grad_norm: 0.7825900358987217, iteration: 54581
loss: 0.962904155254364,grad_norm: 0.9786160113243162, iteration: 54582
loss: 0.9988507628440857,grad_norm: 0.9999992024556068, iteration: 54583
loss: 1.0286866426467896,grad_norm: 0.9999991923858024, iteration: 54584
loss: 0.9980730414390564,grad_norm: 0.9999991771316975, iteration: 54585
loss: 1.0251575708389282,grad_norm: 0.9999990597241708, iteration: 54586
loss: 0.9952253103256226,grad_norm: 0.9999990799714678, iteration: 54587
loss: 0.9950031042098999,grad_norm: 0.9337758002469219, iteration: 54588
loss: 1.0143855810165405,grad_norm: 0.942430358088571, iteration: 54589
loss: 1.008875846862793,grad_norm: 0.9712003014384331, iteration: 54590
loss: 1.0333502292633057,grad_norm: 0.9179460013327274, iteration: 54591
loss: 0.9791340827941895,grad_norm: 0.9999991356237998, iteration: 54592
loss: 0.9848254323005676,grad_norm: 0.9999990977178953, iteration: 54593
loss: 1.0215977430343628,grad_norm: 0.987471886874437, iteration: 54594
loss: 0.9945099353790283,grad_norm: 0.9038659248596675, iteration: 54595
loss: 1.0184621810913086,grad_norm: 0.9999991643735939, iteration: 54596
loss: 1.0013902187347412,grad_norm: 0.9941936315135932, iteration: 54597
loss: 1.0409531593322754,grad_norm: 0.9999999283973892, iteration: 54598
loss: 0.9653820991516113,grad_norm: 0.7973253235412655, iteration: 54599
loss: 1.014296293258667,grad_norm: 0.9999992604889915, iteration: 54600
loss: 0.9825140833854675,grad_norm: 0.9999990118853155, iteration: 54601
loss: 0.9921442866325378,grad_norm: 0.9999990533207159, iteration: 54602
loss: 0.9590603113174438,grad_norm: 0.9999991253723242, iteration: 54603
loss: 1.0559331178665161,grad_norm: 0.9999997124825386, iteration: 54604
loss: 1.0114291906356812,grad_norm: 0.999999694653484, iteration: 54605
loss: 0.9569026231765747,grad_norm: 0.999999058775742, iteration: 54606
loss: 0.9962311387062073,grad_norm: 0.9276330998436818, iteration: 54607
loss: 0.9887251853942871,grad_norm: 0.9508844640659619, iteration: 54608
loss: 1.0054610967636108,grad_norm: 0.9898598997067655, iteration: 54609
loss: 0.9973218441009521,grad_norm: 0.9999990770517224, iteration: 54610
loss: 1.0200083255767822,grad_norm: 0.9999990224550599, iteration: 54611
loss: 1.0304174423217773,grad_norm: 0.9504275500311983, iteration: 54612
loss: 1.0210955142974854,grad_norm: 0.9999990245691937, iteration: 54613
loss: 1.066519021987915,grad_norm: 0.9999997147326947, iteration: 54614
loss: 1.032098650932312,grad_norm: 0.9999993200155586, iteration: 54615
loss: 1.0076128244400024,grad_norm: 0.9999990547918687, iteration: 54616
loss: 1.0397310256958008,grad_norm: 0.9999990570365497, iteration: 54617
loss: 1.004477858543396,grad_norm: 0.9999994536798827, iteration: 54618
loss: 1.0127450227737427,grad_norm: 0.9999992268799942, iteration: 54619
loss: 1.0218881368637085,grad_norm: 0.9999994317780113, iteration: 54620
loss: 1.0218405723571777,grad_norm: 0.999999128776575, iteration: 54621
loss: 0.9947733879089355,grad_norm: 0.8402999417064588, iteration: 54622
loss: 0.9891685843467712,grad_norm: 0.981542394136437, iteration: 54623
loss: 1.0257753133773804,grad_norm: 0.9999992534541268, iteration: 54624
loss: 0.9802703261375427,grad_norm: 0.8145851739481993, iteration: 54625
loss: 0.9779095649719238,grad_norm: 0.9188463667556135, iteration: 54626
loss: 0.9885575771331787,grad_norm: 0.944849683119963, iteration: 54627
loss: 0.961286723613739,grad_norm: 0.9999991055611128, iteration: 54628
loss: 1.0079987049102783,grad_norm: 0.9407578916135141, iteration: 54629
loss: 1.0164711475372314,grad_norm: 0.9999992251096215, iteration: 54630
loss: 1.0231083631515503,grad_norm: 0.9823553115776773, iteration: 54631
loss: 0.9834643006324768,grad_norm: 0.9999993666585584, iteration: 54632
loss: 0.9956715106964111,grad_norm: 0.99999909889449, iteration: 54633
loss: 0.9673173427581787,grad_norm: 0.9999991000984412, iteration: 54634
loss: 1.002870798110962,grad_norm: 0.9999989804925059, iteration: 54635
loss: 0.9539659023284912,grad_norm: 0.9999991641153151, iteration: 54636
loss: 1.0493547916412354,grad_norm: 0.8023495985566207, iteration: 54637
loss: 1.0210586786270142,grad_norm: 0.9999992508509624, iteration: 54638
loss: 0.9816628098487854,grad_norm: 0.9194772171514096, iteration: 54639
loss: 0.9730265140533447,grad_norm: 0.8976325591670036, iteration: 54640
loss: 0.9785919785499573,grad_norm: 0.9999990617493881, iteration: 54641
loss: 1.0296111106872559,grad_norm: 0.999999178745737, iteration: 54642
loss: 1.0100531578063965,grad_norm: 0.9999991582591164, iteration: 54643
loss: 0.9988657832145691,grad_norm: 0.9999991093258954, iteration: 54644
loss: 0.9509162306785583,grad_norm: 0.8530363629240018, iteration: 54645
loss: 1.0282906293869019,grad_norm: 0.9692093768458743, iteration: 54646
loss: 1.0265705585479736,grad_norm: 0.9999991169248484, iteration: 54647
loss: 1.0072938203811646,grad_norm: 0.9158508518994068, iteration: 54648
loss: 1.019964575767517,grad_norm: 0.9999996406775998, iteration: 54649
loss: 1.0606460571289062,grad_norm: 0.9500052460614601, iteration: 54650
loss: 1.0036391019821167,grad_norm: 0.9999991179442917, iteration: 54651
loss: 0.9970505833625793,grad_norm: 0.9999991531659507, iteration: 54652
loss: 0.998478353023529,grad_norm: 0.9999990699889872, iteration: 54653
loss: 0.9897273182868958,grad_norm: 0.9036448276579732, iteration: 54654
loss: 1.0156137943267822,grad_norm: 0.9864500882964926, iteration: 54655
loss: 0.9979186654090881,grad_norm: 0.960235527942517, iteration: 54656
loss: 0.9874823689460754,grad_norm: 0.9999991072750164, iteration: 54657
loss: 0.9787311553955078,grad_norm: 0.9999991330358878, iteration: 54658
loss: 1.0007649660110474,grad_norm: 0.9782528932697426, iteration: 54659
loss: 1.0093857049942017,grad_norm: 0.9999990189642803, iteration: 54660
loss: 1.0274728536605835,grad_norm: 0.9999993694624282, iteration: 54661
loss: 1.0159509181976318,grad_norm: 0.9999990635193386, iteration: 54662
loss: 0.9941505789756775,grad_norm: 0.9760320061888902, iteration: 54663
loss: 0.9606525897979736,grad_norm: 0.9999991319424653, iteration: 54664
loss: 1.0366336107254028,grad_norm: 0.9916687907451223, iteration: 54665
loss: 0.9859690070152283,grad_norm: 0.9960256557073943, iteration: 54666
loss: 1.0039818286895752,grad_norm: 0.9999989997497898, iteration: 54667
loss: 1.0045571327209473,grad_norm: 0.9684612207420721, iteration: 54668
loss: 1.0006684064865112,grad_norm: 0.8766518419059197, iteration: 54669
loss: 1.0316909551620483,grad_norm: 0.9999993014569167, iteration: 54670
loss: 0.9813402891159058,grad_norm: 0.999999178884961, iteration: 54671
loss: 0.9964083433151245,grad_norm: 0.9999992243658873, iteration: 54672
loss: 1.0241576433181763,grad_norm: 0.8941382933130504, iteration: 54673
loss: 0.9716364145278931,grad_norm: 0.9631649932609765, iteration: 54674
loss: 0.9985647201538086,grad_norm: 0.9999991840583516, iteration: 54675
loss: 1.0036146640777588,grad_norm: 0.9249402890211329, iteration: 54676
loss: 0.9973853230476379,grad_norm: 0.9155799220865166, iteration: 54677
loss: 0.9971718192100525,grad_norm: 0.9432651889447523, iteration: 54678
loss: 1.0241608619689941,grad_norm: 0.8426601587883318, iteration: 54679
loss: 0.9960613250732422,grad_norm: 0.9999992182193583, iteration: 54680
loss: 1.0025941133499146,grad_norm: 0.9999990591121759, iteration: 54681
loss: 1.0175306797027588,grad_norm: 0.8352485854848161, iteration: 54682
loss: 0.9598150849342346,grad_norm: 0.9999995188241045, iteration: 54683
loss: 0.949522852897644,grad_norm: 0.9787058792691787, iteration: 54684
loss: 1.0013593435287476,grad_norm: 0.9010568182067522, iteration: 54685
loss: 0.9964790940284729,grad_norm: 0.8333171617433375, iteration: 54686
loss: 0.9802976250648499,grad_norm: 0.9373086133849047, iteration: 54687
loss: 1.0191383361816406,grad_norm: 0.9999991361561944, iteration: 54688
loss: 0.9882525205612183,grad_norm: 0.9993073200442824, iteration: 54689
loss: 1.0610052347183228,grad_norm: 0.9999992949410004, iteration: 54690
loss: 0.9734814763069153,grad_norm: 0.999999138426905, iteration: 54691
loss: 0.989136278629303,grad_norm: 0.9999995629715668, iteration: 54692
loss: 1.0094621181488037,grad_norm: 0.9665778659934706, iteration: 54693
loss: 1.01141357421875,grad_norm: 0.996205056619169, iteration: 54694
loss: 0.9720854759216309,grad_norm: 0.8375135566642066, iteration: 54695
loss: 0.9705671072006226,grad_norm: 0.907490033501282, iteration: 54696
loss: 0.9857603311538696,grad_norm: 0.8849557780089314, iteration: 54697
loss: 0.9839566349983215,grad_norm: 0.999998952670108, iteration: 54698
loss: 0.9710992574691772,grad_norm: 0.9999992195957, iteration: 54699
loss: 0.980175793170929,grad_norm: 0.9999995354899047, iteration: 54700
loss: 0.9998719096183777,grad_norm: 0.9999992823816548, iteration: 54701
loss: 0.9914236664772034,grad_norm: 0.7510619890316328, iteration: 54702
loss: 1.047512173652649,grad_norm: 0.9999991738327456, iteration: 54703
loss: 0.9994652271270752,grad_norm: 0.9999991005381973, iteration: 54704
loss: 1.019752025604248,grad_norm: 0.9999990405692768, iteration: 54705
loss: 1.0137124061584473,grad_norm: 0.9999991899804628, iteration: 54706
loss: 0.9935395121574402,grad_norm: 0.9016312308432464, iteration: 54707
loss: 0.9962442517280579,grad_norm: 0.9716757014659134, iteration: 54708
loss: 1.0090446472167969,grad_norm: 0.9239237846317301, iteration: 54709
loss: 1.009973168373108,grad_norm: 0.999999103116021, iteration: 54710
loss: 0.9892846345901489,grad_norm: 0.8971052168056738, iteration: 54711
loss: 1.0191733837127686,grad_norm: 0.9322859907515794, iteration: 54712
loss: 1.0198131799697876,grad_norm: 0.9234089748455143, iteration: 54713
loss: 1.0566202402114868,grad_norm: 0.9999997195163954, iteration: 54714
loss: 1.0569888353347778,grad_norm: 0.9707843576924128, iteration: 54715
loss: 1.0307831764221191,grad_norm: 0.902832730918792, iteration: 54716
loss: 1.0168302059173584,grad_norm: 0.999999285821256, iteration: 54717
loss: 0.9679118990898132,grad_norm: 0.9999991301885348, iteration: 54718
loss: 0.9889194965362549,grad_norm: 0.9315600207824111, iteration: 54719
loss: 0.9668877720832825,grad_norm: 0.9999991061312392, iteration: 54720
loss: 1.011548399925232,grad_norm: 0.999999053311907, iteration: 54721
loss: 0.9824603796005249,grad_norm: 0.9999989650986149, iteration: 54722
loss: 0.9628819823265076,grad_norm: 0.9815565763624818, iteration: 54723
loss: 0.9745985865592957,grad_norm: 0.8484025149717298, iteration: 54724
loss: 1.008767008781433,grad_norm: 0.8986772706258177, iteration: 54725
loss: 1.0079067945480347,grad_norm: 0.9999990757277355, iteration: 54726
loss: 1.031532645225525,grad_norm: 0.9999992487343612, iteration: 54727
loss: 0.997699499130249,grad_norm: 0.9999991438405972, iteration: 54728
loss: 0.9673056602478027,grad_norm: 0.8934266067233145, iteration: 54729
loss: 1.0271342992782593,grad_norm: 0.9187989917534152, iteration: 54730
loss: 1.0130562782287598,grad_norm: 0.9961407103177586, iteration: 54731
loss: 1.029947280883789,grad_norm: 0.9999990250420049, iteration: 54732
loss: 1.0237910747528076,grad_norm: 0.920755705292658, iteration: 54733
loss: 1.0107446908950806,grad_norm: 0.9999990601967405, iteration: 54734
loss: 1.0071228742599487,grad_norm: 0.9499916836884926, iteration: 54735
loss: 1.0268268585205078,grad_norm: 0.9999991896481051, iteration: 54736
loss: 1.0031652450561523,grad_norm: 0.9634470261732999, iteration: 54737
loss: 1.0177292823791504,grad_norm: 0.9999989982186157, iteration: 54738
loss: 0.9877117872238159,grad_norm: 0.9999991680457866, iteration: 54739
loss: 1.0180063247680664,grad_norm: 0.9163180208025844, iteration: 54740
loss: 0.9809200167655945,grad_norm: 0.9999990955493177, iteration: 54741
loss: 1.0220298767089844,grad_norm: 0.9999990427306206, iteration: 54742
loss: 1.0094552040100098,grad_norm: 0.8934620386465468, iteration: 54743
loss: 0.9420793652534485,grad_norm: 0.9999991761688306, iteration: 54744
loss: 0.9684211015701294,grad_norm: 0.8696010356419602, iteration: 54745
loss: 0.987809956073761,grad_norm: 0.9494035385647782, iteration: 54746
loss: 0.9597638845443726,grad_norm: 0.9999989179058452, iteration: 54747
loss: 0.9895926117897034,grad_norm: 0.917480266899427, iteration: 54748
loss: 1.0248695611953735,grad_norm: 0.886453405449257, iteration: 54749
loss: 0.9826977849006653,grad_norm: 0.9203391135967361, iteration: 54750
loss: 1.0103832483291626,grad_norm: 0.9464352105502903, iteration: 54751
loss: 1.0348988771438599,grad_norm: 0.9325706670903887, iteration: 54752
loss: 1.0140787363052368,grad_norm: 0.9999989750256554, iteration: 54753
loss: 1.0223119258880615,grad_norm: 0.9238143214028732, iteration: 54754
loss: 1.0017274618148804,grad_norm: 0.9999991821906407, iteration: 54755
loss: 0.9992608428001404,grad_norm: 0.9288177586876475, iteration: 54756
loss: 0.9826368689537048,grad_norm: 0.9999991199350812, iteration: 54757
loss: 1.0184857845306396,grad_norm: 0.9661210938961291, iteration: 54758
loss: 0.9715700745582581,grad_norm: 0.9999992375060487, iteration: 54759
loss: 1.0077711343765259,grad_norm: 0.9999991657696358, iteration: 54760
loss: 1.0424354076385498,grad_norm: 0.9999991185022732, iteration: 54761
loss: 1.0193791389465332,grad_norm: 0.9912524394600968, iteration: 54762
loss: 1.0227774381637573,grad_norm: 0.9292006367542245, iteration: 54763
loss: 1.0050032138824463,grad_norm: 0.9999991607584608, iteration: 54764
loss: 1.0012423992156982,grad_norm: 0.9299535995423465, iteration: 54765
loss: 0.9586833119392395,grad_norm: 0.9913543055866961, iteration: 54766
loss: 1.0151653289794922,grad_norm: 0.9999992214542472, iteration: 54767
loss: 1.0194810628890991,grad_norm: 0.9999992258579571, iteration: 54768
loss: 0.9812919497489929,grad_norm: 0.9161998955929941, iteration: 54769
loss: 1.0075327157974243,grad_norm: 0.9999990683802504, iteration: 54770
loss: 1.0101919174194336,grad_norm: 0.9999991873901538, iteration: 54771
loss: 0.970754861831665,grad_norm: 0.9999992021192063, iteration: 54772
loss: 0.9989416599273682,grad_norm: 0.9999991830091831, iteration: 54773
loss: 0.9828467965126038,grad_norm: 0.9999991540351592, iteration: 54774
loss: 0.9964523315429688,grad_norm: 0.988936287596257, iteration: 54775
loss: 0.983055830001831,grad_norm: 0.8839389539379668, iteration: 54776
loss: 0.9729925990104675,grad_norm: 0.9761639985857935, iteration: 54777
loss: 1.0060549974441528,grad_norm: 0.9760314982528798, iteration: 54778
loss: 0.9984713196754456,grad_norm: 0.9999991583461573, iteration: 54779
loss: 1.0150874853134155,grad_norm: 0.9999996356438748, iteration: 54780
loss: 0.9844917058944702,grad_norm: 0.8572982368198365, iteration: 54781
loss: 1.0165319442749023,grad_norm: 0.9749719168466807, iteration: 54782
loss: 0.9947931170463562,grad_norm: 0.9097604417946309, iteration: 54783
loss: 1.0423758029937744,grad_norm: 0.9999994930322212, iteration: 54784
loss: 1.0287338495254517,grad_norm: 0.9950810240906726, iteration: 54785
loss: 0.9884911179542542,grad_norm: 0.999999143480958, iteration: 54786
loss: 0.9962542653083801,grad_norm: 0.999998967390669, iteration: 54787
loss: 1.0321036577224731,grad_norm: 0.9999992162484813, iteration: 54788
loss: 1.0100246667861938,grad_norm: 0.9999994452879647, iteration: 54789
loss: 1.0257660150527954,grad_norm: 0.9999995512353935, iteration: 54790
loss: 1.0340348482131958,grad_norm: 0.9999995350854508, iteration: 54791
loss: 1.03518545627594,grad_norm: 0.9999990160521784, iteration: 54792
loss: 0.948927104473114,grad_norm: 0.9863442030521212, iteration: 54793
loss: 1.0127211809158325,grad_norm: 0.9999991456015032, iteration: 54794
loss: 1.0218960046768188,grad_norm: 0.9999991532088413, iteration: 54795
loss: 0.9991893768310547,grad_norm: 0.9539695884198159, iteration: 54796
loss: 1.0230951309204102,grad_norm: 0.9999991158220433, iteration: 54797
loss: 1.036362648010254,grad_norm: 0.8902207464125261, iteration: 54798
loss: 1.0249316692352295,grad_norm: 0.9999990430946536, iteration: 54799
loss: 0.9603380560874939,grad_norm: 0.8950201009883586, iteration: 54800
loss: 1.0202052593231201,grad_norm: 0.9999990382579809, iteration: 54801
loss: 1.012961506843567,grad_norm: 0.9999990769961662, iteration: 54802
loss: 0.9725614786148071,grad_norm: 0.9887568295711193, iteration: 54803
loss: 0.962887167930603,grad_norm: 0.999999246821703, iteration: 54804
loss: 1.0326818227767944,grad_norm: 0.9999990189257865, iteration: 54805
loss: 1.000154972076416,grad_norm: 0.9239156412991428, iteration: 54806
loss: 0.9758864045143127,grad_norm: 0.9546941757058334, iteration: 54807
loss: 1.0118393898010254,grad_norm: 0.9999989687964596, iteration: 54808
loss: 1.0357085466384888,grad_norm: 0.9999991314602557, iteration: 54809
loss: 0.9769726991653442,grad_norm: 0.9999995267838201, iteration: 54810
loss: 0.9996371269226074,grad_norm: 0.9801217518006679, iteration: 54811
loss: 0.9887271523475647,grad_norm: 0.9739532678886252, iteration: 54812
loss: 1.0107862949371338,grad_norm: 0.9481676318594654, iteration: 54813
loss: 0.9718677401542664,grad_norm: 0.9999991272802871, iteration: 54814
loss: 0.9960188269615173,grad_norm: 0.9999991051316867, iteration: 54815
loss: 1.0335310697555542,grad_norm: 0.9367497216486771, iteration: 54816
loss: 0.9999740719795227,grad_norm: 0.9999990666775759, iteration: 54817
loss: 1.0340269804000854,grad_norm: 0.9581936366531636, iteration: 54818
loss: 1.0301438570022583,grad_norm: 0.999999094924762, iteration: 54819
loss: 1.009871244430542,grad_norm: 0.9573002749349541, iteration: 54820
loss: 1.0140137672424316,grad_norm: 0.9271646367847453, iteration: 54821
loss: 0.9921107292175293,grad_norm: 0.9999991716315038, iteration: 54822
loss: 0.967663049697876,grad_norm: 0.8998557464464185, iteration: 54823
loss: 1.0128633975982666,grad_norm: 0.9999991611661616, iteration: 54824
loss: 0.9844720959663391,grad_norm: 0.9999993128536068, iteration: 54825
loss: 1.0429751873016357,grad_norm: 0.9484408150551604, iteration: 54826
loss: 0.9710486531257629,grad_norm: 0.9999990279068318, iteration: 54827
loss: 0.9694536328315735,grad_norm: 0.9402897010644765, iteration: 54828
loss: 0.972055971622467,grad_norm: 0.8645827134951561, iteration: 54829
loss: 1.0065760612487793,grad_norm: 0.961307048433154, iteration: 54830
loss: 0.9715994596481323,grad_norm: 0.9999992258375664, iteration: 54831
loss: 1.0179377794265747,grad_norm: 0.9631941176436375, iteration: 54832
loss: 0.9659368395805359,grad_norm: 0.841541926876889, iteration: 54833
loss: 1.0283559560775757,grad_norm: 0.9999992870986995, iteration: 54834
loss: 0.9982551336288452,grad_norm: 0.9999991644035164, iteration: 54835
loss: 0.9696641564369202,grad_norm: 0.8371941149222065, iteration: 54836
loss: 1.0090487003326416,grad_norm: 0.999999188094353, iteration: 54837
loss: 0.9935920834541321,grad_norm: 0.9999992355143109, iteration: 54838
loss: 1.0255382061004639,grad_norm: 0.9999989069530298, iteration: 54839
loss: 0.9978656768798828,grad_norm: 0.9999990635359509, iteration: 54840
loss: 0.974795401096344,grad_norm: 0.9999991374198133, iteration: 54841
loss: 1.019544005393982,grad_norm: 0.9999990595792102, iteration: 54842
loss: 1.0020833015441895,grad_norm: 0.8965916962199733, iteration: 54843
loss: 0.9788020849227905,grad_norm: 0.9249795254050125, iteration: 54844
loss: 1.03913152217865,grad_norm: 0.9999991408933872, iteration: 54845
loss: 1.004347324371338,grad_norm: 0.999999119775707, iteration: 54846
loss: 1.0107300281524658,grad_norm: 0.9952800301195676, iteration: 54847
loss: 1.0274217128753662,grad_norm: 0.9999990808121302, iteration: 54848
loss: 0.9919571876525879,grad_norm: 0.9999992644333954, iteration: 54849
loss: 1.0224506855010986,grad_norm: 0.999999072090344, iteration: 54850
loss: 0.9826131463050842,grad_norm: 0.9999993293593431, iteration: 54851
loss: 0.9885938167572021,grad_norm: 0.9289487105016998, iteration: 54852
loss: 1.0044975280761719,grad_norm: 0.9840039730609749, iteration: 54853
loss: 1.01900315284729,grad_norm: 0.9297005000539643, iteration: 54854
loss: 1.0424845218658447,grad_norm: 0.9999991730453717, iteration: 54855
loss: 0.9838974475860596,grad_norm: 0.999999230262993, iteration: 54856
loss: 1.0117489099502563,grad_norm: 0.9999990978541151, iteration: 54857
loss: 1.0032429695129395,grad_norm: 0.9999991293020573, iteration: 54858
loss: 1.022204875946045,grad_norm: 0.9999990014695618, iteration: 54859
loss: 1.014926552772522,grad_norm: 0.903777075268646, iteration: 54860
loss: 0.9616758227348328,grad_norm: 0.8896031484367903, iteration: 54861
loss: 0.9991456270217896,grad_norm: 0.9999991063627254, iteration: 54862
loss: 1.02104914188385,grad_norm: 0.9995486799831274, iteration: 54863
loss: 0.9840436577796936,grad_norm: 0.9999991012978489, iteration: 54864
loss: 0.989327073097229,grad_norm: 0.944589243535963, iteration: 54865
loss: 0.9921850562095642,grad_norm: 0.9453006358275821, iteration: 54866
loss: 1.0042376518249512,grad_norm: 0.9999991156835673, iteration: 54867
loss: 0.9833794832229614,grad_norm: 0.9999991614284047, iteration: 54868
loss: 1.0137134790420532,grad_norm: 0.9999990328514989, iteration: 54869
loss: 0.9933409690856934,grad_norm: 0.9375525352586211, iteration: 54870
loss: 1.0498985052108765,grad_norm: 0.9999991424960137, iteration: 54871
loss: 1.0235224962234497,grad_norm: 0.9593796005057528, iteration: 54872
loss: 1.0479189157485962,grad_norm: 0.999999188017862, iteration: 54873
loss: 0.9768680334091187,grad_norm: 0.9597286590098538, iteration: 54874
loss: 0.9917462468147278,grad_norm: 0.8765947813799245, iteration: 54875
loss: 0.9882137775421143,grad_norm: 0.9999990369571096, iteration: 54876
loss: 1.023081660270691,grad_norm: 0.9999990652324886, iteration: 54877
loss: 1.013180136680603,grad_norm: 0.9999990805765624, iteration: 54878
loss: 0.9960632920265198,grad_norm: 0.8651140743485553, iteration: 54879
loss: 0.990232527256012,grad_norm: 0.9613110609395943, iteration: 54880
loss: 0.9897421002388,grad_norm: 0.9400517325327147, iteration: 54881
loss: 0.9599761962890625,grad_norm: 0.9999990935014188, iteration: 54882
loss: 0.9842569828033447,grad_norm: 0.9999990630389386, iteration: 54883
loss: 0.9985037446022034,grad_norm: 0.9999990423467225, iteration: 54884
loss: 1.0030096769332886,grad_norm: 0.9999993213446801, iteration: 54885
loss: 1.0114103555679321,grad_norm: 0.999999184646349, iteration: 54886
loss: 1.0219589471817017,grad_norm: 0.9766221511803788, iteration: 54887
loss: 1.0268490314483643,grad_norm: 0.9999990739768988, iteration: 54888
loss: 0.9812506437301636,grad_norm: 0.999999256471061, iteration: 54889
loss: 0.97860187292099,grad_norm: 0.8464571496879114, iteration: 54890
loss: 0.975851833820343,grad_norm: 0.9036638713414015, iteration: 54891
loss: 0.9752521514892578,grad_norm: 0.9999991347191974, iteration: 54892
loss: 0.9992091655731201,grad_norm: 0.9999991320134327, iteration: 54893
loss: 1.0093361139297485,grad_norm: 0.9999990932616811, iteration: 54894
loss: 0.9981822967529297,grad_norm: 0.9924356127685517, iteration: 54895
loss: 0.9733583331108093,grad_norm: 0.9999991232888743, iteration: 54896
loss: 0.9541419148445129,grad_norm: 0.916549063230447, iteration: 54897
loss: 1.0407291650772095,grad_norm: 0.9999991855853283, iteration: 54898
loss: 1.0166624784469604,grad_norm: 0.92239145424113, iteration: 54899
loss: 0.9925718903541565,grad_norm: 0.9999989997773432, iteration: 54900
loss: 1.0057332515716553,grad_norm: 0.8718230372293571, iteration: 54901
loss: 0.9678139686584473,grad_norm: 0.9999990913418818, iteration: 54902
loss: 1.0371880531311035,grad_norm: 0.9999991845785858, iteration: 54903
loss: 0.9989028573036194,grad_norm: 0.9999994437056299, iteration: 54904
loss: 0.9426649808883667,grad_norm: 0.9999991059204326, iteration: 54905
loss: 1.0278756618499756,grad_norm: 0.999999097498839, iteration: 54906
loss: 1.0226683616638184,grad_norm: 0.9099760007568819, iteration: 54907
loss: 0.9948310852050781,grad_norm: 0.9999992149349438, iteration: 54908
loss: 1.0331170558929443,grad_norm: 0.9531528084625519, iteration: 54909
loss: 0.9758756160736084,grad_norm: 0.9865993427905897, iteration: 54910
loss: 0.9893560409545898,grad_norm: 0.9999991459020964, iteration: 54911
loss: 1.0503361225128174,grad_norm: 0.9999991682881858, iteration: 54912
loss: 1.0385024547576904,grad_norm: 0.9975109440046456, iteration: 54913
loss: 1.0191974639892578,grad_norm: 0.9999990744020248, iteration: 54914
loss: 0.9764480590820312,grad_norm: 0.9999989335611117, iteration: 54915
loss: 0.9923710227012634,grad_norm: 0.9999991299235046, iteration: 54916
loss: 1.0174741744995117,grad_norm: 0.9343219685794699, iteration: 54917
loss: 0.9921733140945435,grad_norm: 0.981709334708454, iteration: 54918
loss: 0.9662812948226929,grad_norm: 0.9999990811125098, iteration: 54919
loss: 0.9796851873397827,grad_norm: 0.8866611887605313, iteration: 54920
loss: 0.9818806648254395,grad_norm: 0.9999990917885765, iteration: 54921
loss: 1.0126869678497314,grad_norm: 0.9923419025715713, iteration: 54922
loss: 1.0017846822738647,grad_norm: 0.9999991773963296, iteration: 54923
loss: 0.972355306148529,grad_norm: 0.9999992408725698, iteration: 54924
loss: 1.0038089752197266,grad_norm: 0.8968627699265507, iteration: 54925
loss: 0.9707487225532532,grad_norm: 0.9999991574741537, iteration: 54926
loss: 1.0127586126327515,grad_norm: 0.9965030845593275, iteration: 54927
loss: 1.016638994216919,grad_norm: 0.9999993955016745, iteration: 54928
loss: 0.9698861241340637,grad_norm: 0.9999991611679484, iteration: 54929
loss: 0.9771032333374023,grad_norm: 0.9138469456549512, iteration: 54930
loss: 1.0086824893951416,grad_norm: 0.9999992598167607, iteration: 54931
loss: 1.0271118879318237,grad_norm: 0.966499774218617, iteration: 54932
loss: 0.9797738194465637,grad_norm: 0.9999990619189456, iteration: 54933
loss: 1.0415852069854736,grad_norm: 0.9999990970512949, iteration: 54934
loss: 1.0108320713043213,grad_norm: 0.9999997855419391, iteration: 54935
loss: 1.0390666723251343,grad_norm: 0.9999994043320619, iteration: 54936
loss: 1.014997959136963,grad_norm: 0.9999988847936334, iteration: 54937
loss: 1.0062294006347656,grad_norm: 0.9582424299136076, iteration: 54938
loss: 0.9797028303146362,grad_norm: 0.9999989530057564, iteration: 54939
loss: 1.0120030641555786,grad_norm: 0.9262220710485805, iteration: 54940
loss: 1.0214177370071411,grad_norm: 0.8909496177635017, iteration: 54941
loss: 0.9859654903411865,grad_norm: 0.9999992477957942, iteration: 54942
loss: 0.9718285202980042,grad_norm: 0.9033454366358902, iteration: 54943
loss: 1.0396593809127808,grad_norm: 0.9999992975904504, iteration: 54944
loss: 1.031471610069275,grad_norm: 0.9999991555137884, iteration: 54945
loss: 1.022623896598816,grad_norm: 0.999999142848361, iteration: 54946
loss: 0.9988794326782227,grad_norm: 0.9999991085608376, iteration: 54947
loss: 0.9955642223358154,grad_norm: 0.9999990786133754, iteration: 54948
loss: 0.9390504956245422,grad_norm: 0.9999990321504225, iteration: 54949
loss: 1.1141655445098877,grad_norm: 0.999999479743517, iteration: 54950
loss: 1.001908302307129,grad_norm: 0.8701061958625441, iteration: 54951
loss: 0.991946280002594,grad_norm: 0.8042353614467842, iteration: 54952
loss: 0.9704633355140686,grad_norm: 0.9999991806079941, iteration: 54953
loss: 1.0453590154647827,grad_norm: 0.9999991977205036, iteration: 54954
loss: 1.0077253580093384,grad_norm: 0.9167981488439216, iteration: 54955
loss: 0.9912542700767517,grad_norm: 0.8305101674929275, iteration: 54956
loss: 1.007666826248169,grad_norm: 0.9999991099030097, iteration: 54957
loss: 1.007676362991333,grad_norm: 0.8548741639781234, iteration: 54958
loss: 1.0396240949630737,grad_norm: 0.9028038111238192, iteration: 54959
loss: 0.9809979200363159,grad_norm: 0.9999992221288658, iteration: 54960
loss: 1.0079939365386963,grad_norm: 0.9999992236247314, iteration: 54961
loss: 1.041296362876892,grad_norm: 0.9999994629579037, iteration: 54962
loss: 1.0068151950836182,grad_norm: 0.9999993221493751, iteration: 54963
loss: 0.9926683902740479,grad_norm: 0.9337907854946054, iteration: 54964
loss: 1.043229579925537,grad_norm: 0.9999991359230287, iteration: 54965
loss: 1.0177876949310303,grad_norm: 0.970981933310852, iteration: 54966
loss: 1.0147403478622437,grad_norm: 0.9999993077582637, iteration: 54967
loss: 1.0049875974655151,grad_norm: 0.9999991227432518, iteration: 54968
loss: 1.026259183883667,grad_norm: 0.9019723172133012, iteration: 54969
loss: 0.972919762134552,grad_norm: 0.9276069570470522, iteration: 54970
loss: 1.0239568948745728,grad_norm: 0.9999993121714977, iteration: 54971
loss: 0.9829979538917542,grad_norm: 0.8981721767782449, iteration: 54972
loss: 0.992978572845459,grad_norm: 0.9891626338559648, iteration: 54973
loss: 0.9923327565193176,grad_norm: 0.9950560206191795, iteration: 54974
loss: 0.9919447302818298,grad_norm: 0.9922175569050922, iteration: 54975
loss: 1.009279727935791,grad_norm: 0.9999991064616375, iteration: 54976
loss: 1.0205169916152954,grad_norm: 0.9999990477756373, iteration: 54977
loss: 0.9873362183570862,grad_norm: 0.9335598274900057, iteration: 54978
loss: 1.0065290927886963,grad_norm: 0.9999989319159928, iteration: 54979
loss: 1.016606330871582,grad_norm: 0.9999991243901254, iteration: 54980
loss: 0.9775148034095764,grad_norm: 0.8679030553251402, iteration: 54981
loss: 0.9989244341850281,grad_norm: 0.9999991849392157, iteration: 54982
loss: 0.9792649149894714,grad_norm: 0.9999991647607134, iteration: 54983
loss: 1.0032159090042114,grad_norm: 0.9840804006365128, iteration: 54984
loss: 0.949597179889679,grad_norm: 0.9745364874697523, iteration: 54985
loss: 1.0294948816299438,grad_norm: 0.9776168729457452, iteration: 54986
loss: 0.965968906879425,grad_norm: 0.9999991240907987, iteration: 54987
loss: 1.0027927160263062,grad_norm: 0.9999992153509822, iteration: 54988
loss: 0.9934878349304199,grad_norm: 0.9999992454969769, iteration: 54989
loss: 1.0192924737930298,grad_norm: 0.9999990172937387, iteration: 54990
loss: 1.013696551322937,grad_norm: 0.9999991267105985, iteration: 54991
loss: 1.0047048330307007,grad_norm: 0.878218066313438, iteration: 54992
loss: 0.9816805124282837,grad_norm: 0.9655222623088213, iteration: 54993
loss: 0.9987077116966248,grad_norm: 0.9090267957120401, iteration: 54994
loss: 1.002072811126709,grad_norm: 0.9936883476265562, iteration: 54995
loss: 0.9537433385848999,grad_norm: 0.9999992689714695, iteration: 54996
loss: 0.9847612977027893,grad_norm: 0.9999992172201925, iteration: 54997
loss: 1.001447319984436,grad_norm: 0.877023408819685, iteration: 54998
loss: 1.0148696899414062,grad_norm: 0.8622602754169865, iteration: 54999
loss: 0.9829283952713013,grad_norm: 0.9999991657321482, iteration: 55000
loss: 1.0435478687286377,grad_norm: 0.9999992029475236, iteration: 55001
loss: 1.0081849098205566,grad_norm: 0.9999991389992801, iteration: 55002
loss: 1.0128676891326904,grad_norm: 0.999999096791982, iteration: 55003
loss: 0.9851450324058533,grad_norm: 0.9999995675357125, iteration: 55004
loss: 0.9960238933563232,grad_norm: 0.9999991282207159, iteration: 55005
loss: 1.0822867155075073,grad_norm: 0.9999996219968882, iteration: 55006
loss: 1.014953851699829,grad_norm: 0.8430088770287165, iteration: 55007
loss: 0.9846693873405457,grad_norm: 0.8885896633292996, iteration: 55008
loss: 0.9973413944244385,grad_norm: 0.8318955785483858, iteration: 55009
loss: 0.998927891254425,grad_norm: 0.9284060418398998, iteration: 55010
loss: 1.0237408876419067,grad_norm: 0.9999991052565176, iteration: 55011
loss: 1.0337562561035156,grad_norm: 0.999999184681873, iteration: 55012
loss: 0.9704893231391907,grad_norm: 0.9999991685068943, iteration: 55013
loss: 1.0242384672164917,grad_norm: 0.999999143892196, iteration: 55014
loss: 1.0110026597976685,grad_norm: 0.9999991418560523, iteration: 55015
loss: 1.0046788454055786,grad_norm: 0.8436985238257853, iteration: 55016
loss: 1.0134849548339844,grad_norm: 0.8689902387345979, iteration: 55017
loss: 0.9863503575325012,grad_norm: 0.8929100037406542, iteration: 55018
loss: 0.9903026223182678,grad_norm: 0.9999991034362344, iteration: 55019
loss: 1.0486886501312256,grad_norm: 0.9999990928423754, iteration: 55020
loss: 1.008320927619934,grad_norm: 0.9598928551430055, iteration: 55021
loss: 1.0040494203567505,grad_norm: 0.9999992551586371, iteration: 55022
loss: 0.9861918687820435,grad_norm: 0.9999990847668843, iteration: 55023
loss: 1.027490496635437,grad_norm: 0.9999991202217505, iteration: 55024
loss: 1.0108063220977783,grad_norm: 0.9999990671759453, iteration: 55025
loss: 0.9949286580085754,grad_norm: 0.9999989711613796, iteration: 55026
loss: 1.0009218454360962,grad_norm: 0.9999989916038696, iteration: 55027
loss: 1.0836055278778076,grad_norm: 0.9999997882980888, iteration: 55028
loss: 1.0264993906021118,grad_norm: 0.9948574136866785, iteration: 55029
loss: 1.01078200340271,grad_norm: 0.9999993320402315, iteration: 55030
loss: 1.010010004043579,grad_norm: 0.9749537116638184, iteration: 55031
loss: 0.9940068125724792,grad_norm: 0.9999991189576244, iteration: 55032
loss: 0.9908459782600403,grad_norm: 0.9999991881339728, iteration: 55033
loss: 0.9618596434593201,grad_norm: 0.9433750242436931, iteration: 55034
loss: 1.0006412267684937,grad_norm: 0.9999990389131952, iteration: 55035
loss: 0.9775185585021973,grad_norm: 0.9999992061450541, iteration: 55036
loss: 0.986085057258606,grad_norm: 0.9786437191130478, iteration: 55037
loss: 0.9915236234664917,grad_norm: 0.9999993305698849, iteration: 55038
loss: 1.0027841329574585,grad_norm: 0.9590548963647514, iteration: 55039
loss: 0.9668889045715332,grad_norm: 0.9999991894664549, iteration: 55040
loss: 0.9791562557220459,grad_norm: 0.8683342035630173, iteration: 55041
loss: 0.9968414306640625,grad_norm: 0.9999991541685342, iteration: 55042
loss: 0.9962815642356873,grad_norm: 0.9999990536492949, iteration: 55043
loss: 1.0229724645614624,grad_norm: 0.9999990573100178, iteration: 55044
loss: 1.0241590738296509,grad_norm: 0.9999991825949559, iteration: 55045
loss: 0.9902647733688354,grad_norm: 0.9999993814987508, iteration: 55046
loss: 0.9909813404083252,grad_norm: 0.9999992700304953, iteration: 55047
loss: 1.04585599899292,grad_norm: 0.9999990938292015, iteration: 55048
loss: 0.9546765089035034,grad_norm: 0.9999990219034338, iteration: 55049
loss: 1.0193480253219604,grad_norm: 0.9094721338673604, iteration: 55050
loss: 1.0197538137435913,grad_norm: 0.9999993618852473, iteration: 55051
loss: 0.9855965971946716,grad_norm: 0.9999991955887836, iteration: 55052
loss: 1.0760608911514282,grad_norm: 0.9999998420675867, iteration: 55053
loss: 1.0234694480895996,grad_norm: 0.9999991872974375, iteration: 55054
loss: 1.026290774345398,grad_norm: 0.918977656637057, iteration: 55055
loss: 0.9784114956855774,grad_norm: 0.995578358732687, iteration: 55056
loss: 0.9932355284690857,grad_norm: 0.9678781537198317, iteration: 55057
loss: 0.9565470814704895,grad_norm: 0.9385732462765901, iteration: 55058
loss: 0.9963542819023132,grad_norm: 0.9999990655695733, iteration: 55059
loss: 0.9865975379943848,grad_norm: 0.9999992162846122, iteration: 55060
loss: 1.0160988569259644,grad_norm: 0.9394857361817863, iteration: 55061
loss: 1.0088521242141724,grad_norm: 0.9999990959263622, iteration: 55062
loss: 0.9952819347381592,grad_norm: 0.9117516620254194, iteration: 55063
loss: 0.9985756874084473,grad_norm: 0.9999992397110944, iteration: 55064
loss: 0.9985257983207703,grad_norm: 0.7997069006851056, iteration: 55065
loss: 1.003519892692566,grad_norm: 0.9999992238257732, iteration: 55066
loss: 1.065589189529419,grad_norm: 0.999999593966021, iteration: 55067
loss: 0.9793739914894104,grad_norm: 0.9999992649459105, iteration: 55068
loss: 0.9862031936645508,grad_norm: 0.9527614414004927, iteration: 55069
loss: 1.0828999280929565,grad_norm: 0.9999996144692795, iteration: 55070
loss: 1.081386685371399,grad_norm: 0.9999997387343413, iteration: 55071
loss: 0.9819037914276123,grad_norm: 0.9999991814033297, iteration: 55072
loss: 1.0872377157211304,grad_norm: 0.9999997086883086, iteration: 55073
loss: 0.9890568852424622,grad_norm: 0.9999990425270944, iteration: 55074
loss: 1.0198748111724854,grad_norm: 1.0000000426606719, iteration: 55075
loss: 0.9632534980773926,grad_norm: 0.8426554830948272, iteration: 55076
loss: 1.0657731294631958,grad_norm: 0.9999992563078383, iteration: 55077
loss: 1.0398632287979126,grad_norm: 0.9999991118769647, iteration: 55078
loss: 1.021363377571106,grad_norm: 0.9999996798225805, iteration: 55079
loss: 1.0158830881118774,grad_norm: 0.9648709080662585, iteration: 55080
loss: 1.0229849815368652,grad_norm: 0.9999992596417708, iteration: 55081
loss: 1.0325380563735962,grad_norm: 0.95984575737911, iteration: 55082
loss: 0.9911320209503174,grad_norm: 0.8606774082166408, iteration: 55083
loss: 1.002758502960205,grad_norm: 0.9865076628903628, iteration: 55084
loss: 0.9883219599723816,grad_norm: 0.9198874256145639, iteration: 55085
loss: 1.018053650856018,grad_norm: 0.9999990106842778, iteration: 55086
loss: 1.0109333992004395,grad_norm: 0.9457432459732273, iteration: 55087
loss: 0.9600809216499329,grad_norm: 0.9796692685400684, iteration: 55088
loss: 0.9858070611953735,grad_norm: 0.9999990683559818, iteration: 55089
loss: 0.9890565276145935,grad_norm: 0.9999991616163653, iteration: 55090
loss: 1.0101959705352783,grad_norm: 0.9944492066296403, iteration: 55091
loss: 0.973875105381012,grad_norm: 0.9999992263386613, iteration: 55092
loss: 1.015290379524231,grad_norm: 0.9805531952370189, iteration: 55093
loss: 1.0137134790420532,grad_norm: 0.9074103515196419, iteration: 55094
loss: 0.9590846300125122,grad_norm: 0.9528119280315711, iteration: 55095
loss: 1.0479824542999268,grad_norm: 0.9999994485395048, iteration: 55096
loss: 1.020868182182312,grad_norm: 0.9471229630166761, iteration: 55097
loss: 1.012389063835144,grad_norm: 0.999999312959487, iteration: 55098
loss: 0.9944493174552917,grad_norm: 0.9697217506963364, iteration: 55099
loss: 1.0035812854766846,grad_norm: 0.812469348721228, iteration: 55100
loss: 1.000291109085083,grad_norm: 0.9999992723732295, iteration: 55101
loss: 1.0319085121154785,grad_norm: 0.9999990027361326, iteration: 55102
loss: 0.9920759201049805,grad_norm: 0.9999993067609421, iteration: 55103
loss: 1.1803746223449707,grad_norm: 0.9999998681299235, iteration: 55104
loss: 1.042287826538086,grad_norm: 0.9688989626492599, iteration: 55105
loss: 1.0005977153778076,grad_norm: 0.9999990002795979, iteration: 55106
loss: 1.0117535591125488,grad_norm: 0.9556737437453158, iteration: 55107
loss: 1.0032848119735718,grad_norm: 0.9258985996262794, iteration: 55108
loss: 0.9782330989837646,grad_norm: 0.9999996811256895, iteration: 55109
loss: 1.0429378747940063,grad_norm: 0.999999318937665, iteration: 55110
loss: 1.244329810142517,grad_norm: 0.9999998975100556, iteration: 55111
loss: 1.057405710220337,grad_norm: 0.9999996900069814, iteration: 55112
loss: 1.0056333541870117,grad_norm: 0.999999037457658, iteration: 55113
loss: 0.9848729372024536,grad_norm: 0.7758467948576316, iteration: 55114
loss: 1.0336819887161255,grad_norm: 0.9999995503850717, iteration: 55115
loss: 1.0355535745620728,grad_norm: 0.9023301143412924, iteration: 55116
loss: 0.994217038154602,grad_norm: 0.9999992149593794, iteration: 55117
loss: 1.147959589958191,grad_norm: 1.0000000083229394, iteration: 55118
loss: 0.9838160276412964,grad_norm: 0.9999991228062589, iteration: 55119
loss: 1.093053936958313,grad_norm: 0.9999991300648289, iteration: 55120
loss: 1.0132136344909668,grad_norm: 0.8387041544364324, iteration: 55121
loss: 1.01596999168396,grad_norm: 0.9999992502021444, iteration: 55122
loss: 0.9735950827598572,grad_norm: 0.999999225130718, iteration: 55123
loss: 0.9761809706687927,grad_norm: 0.9781653570759522, iteration: 55124
loss: 1.0546972751617432,grad_norm: 0.9999993893347131, iteration: 55125
loss: 1.0165122747421265,grad_norm: 0.9999994826656354, iteration: 55126
loss: 1.017231822013855,grad_norm: 0.9999992551286376, iteration: 55127
loss: 0.9922619462013245,grad_norm: 0.9999990840610734, iteration: 55128
loss: 0.9825171232223511,grad_norm: 0.8772907514134356, iteration: 55129
loss: 1.036447525024414,grad_norm: 0.8866569281882204, iteration: 55130
loss: 0.9858469367027283,grad_norm: 0.961539718523165, iteration: 55131
loss: 0.982796847820282,grad_norm: 0.937223275424494, iteration: 55132
loss: 0.999807596206665,grad_norm: 0.8865567440540634, iteration: 55133
loss: 0.9869484305381775,grad_norm: 0.978836178180969, iteration: 55134
loss: 0.9893050193786621,grad_norm: 0.9762204503217062, iteration: 55135
loss: 1.0511165857315063,grad_norm: 0.9999996928355532, iteration: 55136
loss: 1.001214623451233,grad_norm: 0.9999995661856239, iteration: 55137
loss: 0.9972158670425415,grad_norm: 0.940172394872258, iteration: 55138
loss: 1.0126636028289795,grad_norm: 0.9433057823064885, iteration: 55139
loss: 1.0326588153839111,grad_norm: 0.97283522021039, iteration: 55140
loss: 1.0110894441604614,grad_norm: 0.9999992496677754, iteration: 55141
loss: 0.9799497723579407,grad_norm: 0.9999990008251464, iteration: 55142
loss: 0.9779955148696899,grad_norm: 0.9999990113984465, iteration: 55143
loss: 1.0254724025726318,grad_norm: 0.9999779809031071, iteration: 55144
loss: 1.0103724002838135,grad_norm: 0.9999993906591588, iteration: 55145
loss: 0.9919412136077881,grad_norm: 0.9999990347464178, iteration: 55146
loss: 0.9741858839988708,grad_norm: 0.9999993002334532, iteration: 55147
loss: 1.2962942123413086,grad_norm: 1.0000000405588194, iteration: 55148
loss: 0.9898237586021423,grad_norm: 0.8821374927783894, iteration: 55149
loss: 0.9882599711418152,grad_norm: 0.9046363585517608, iteration: 55150
loss: 0.9579389691352844,grad_norm: 0.9999991262955311, iteration: 55151
loss: 0.9638522267341614,grad_norm: 0.9999991291228033, iteration: 55152
loss: 0.9758836627006531,grad_norm: 0.9999991741397078, iteration: 55153
loss: 0.9738823175430298,grad_norm: 0.9999992159144235, iteration: 55154
loss: 1.0880171060562134,grad_norm: 0.999999986353345, iteration: 55155
loss: 1.0320026874542236,grad_norm: 0.9999998738511736, iteration: 55156
loss: 1.0332809686660767,grad_norm: 0.9999991552138684, iteration: 55157
loss: 0.9832689166069031,grad_norm: 0.8697188999280849, iteration: 55158
loss: 0.9888038039207458,grad_norm: 0.9999991175361195, iteration: 55159
loss: 0.9778977632522583,grad_norm: 0.9999990756455945, iteration: 55160
loss: 1.0746612548828125,grad_norm: 0.9999992627428471, iteration: 55161
loss: 1.0253210067749023,grad_norm: 0.988324906937545, iteration: 55162
loss: 1.0208581686019897,grad_norm: 0.9520444656444738, iteration: 55163
loss: 0.9941473007202148,grad_norm: 0.9999992782682088, iteration: 55164
loss: 1.0146160125732422,grad_norm: 0.999999177218248, iteration: 55165
loss: 1.0583604574203491,grad_norm: 0.9999991363106193, iteration: 55166
loss: 0.9829040169715881,grad_norm: 0.9999991783862686, iteration: 55167
loss: 1.019086480140686,grad_norm: 0.999999232762968, iteration: 55168
loss: 0.9866373538970947,grad_norm: 0.9999992157216322, iteration: 55169
loss: 1.0287423133850098,grad_norm: 0.9999991210619198, iteration: 55170
loss: 1.0038832426071167,grad_norm: 0.9999991494028058, iteration: 55171
loss: 1.0061091184616089,grad_norm: 0.9999991828915039, iteration: 55172
loss: 0.9766421318054199,grad_norm: 0.9999990745635955, iteration: 55173
loss: 1.0698059797286987,grad_norm: 0.9999989820464545, iteration: 55174
loss: 1.0134973526000977,grad_norm: 0.9999990651137369, iteration: 55175
loss: 0.9980548024177551,grad_norm: 0.999999208715272, iteration: 55176
loss: 1.029680848121643,grad_norm: 0.9999996370012433, iteration: 55177
loss: 1.0750852823257446,grad_norm: 0.9999993120574034, iteration: 55178
loss: 0.9951616525650024,grad_norm: 0.9999991063683109, iteration: 55179
loss: 1.0029183626174927,grad_norm: 0.999999203868304, iteration: 55180
loss: 0.9764811992645264,grad_norm: 0.9999990292392908, iteration: 55181
loss: 0.9931701421737671,grad_norm: 0.7917638644536381, iteration: 55182
loss: 0.9770649075508118,grad_norm: 0.9784862362545574, iteration: 55183
loss: 0.9990502595901489,grad_norm: 0.8601826434509008, iteration: 55184
loss: 0.9998469352722168,grad_norm: 0.9999997246315903, iteration: 55185
loss: 0.9995630383491516,grad_norm: 0.9999990288218211, iteration: 55186
loss: 1.0235791206359863,grad_norm: 0.8926164169106467, iteration: 55187
loss: 0.9914998412132263,grad_norm: 0.9999991317070712, iteration: 55188
loss: 1.0092707872390747,grad_norm: 0.9999992284492797, iteration: 55189
loss: 1.0042625665664673,grad_norm: 0.9462441450622204, iteration: 55190
loss: 1.0151292085647583,grad_norm: 0.9999991580063374, iteration: 55191
loss: 1.0021402835845947,grad_norm: 0.9999991823689405, iteration: 55192
loss: 0.9990732669830322,grad_norm: 0.9271101881005658, iteration: 55193
loss: 0.9840550422668457,grad_norm: 0.902293584369169, iteration: 55194
loss: 1.0202363729476929,grad_norm: 0.9999989026111298, iteration: 55195
loss: 0.9869927763938904,grad_norm: 0.9999990661183461, iteration: 55196
loss: 0.9915270209312439,grad_norm: 0.9999990089555384, iteration: 55197
loss: 1.0117586851119995,grad_norm: 0.9474786684624378, iteration: 55198
loss: 0.9652427434921265,grad_norm: 0.9622634678879484, iteration: 55199
loss: 1.0168083906173706,grad_norm: 0.9999990564500882, iteration: 55200
loss: 0.9986173510551453,grad_norm: 0.9081818551432045, iteration: 55201
loss: 0.9845083355903625,grad_norm: 0.903242160936786, iteration: 55202
loss: 0.9878870844841003,grad_norm: 0.932713202456546, iteration: 55203
loss: 0.9817196130752563,grad_norm: 0.9999990230787522, iteration: 55204
loss: 0.9668906927108765,grad_norm: 0.9999993995622707, iteration: 55205
loss: 0.9819470643997192,grad_norm: 0.9999991550003648, iteration: 55206
loss: 0.984276533126831,grad_norm: 0.7565552476070728, iteration: 55207
loss: 0.9560574293136597,grad_norm: 0.9999990715007904, iteration: 55208
loss: 0.9930310249328613,grad_norm: 0.9020493039807693, iteration: 55209
loss: 1.0054354667663574,grad_norm: 0.8811723949692651, iteration: 55210
loss: 0.9902648329734802,grad_norm: 0.9017545705755666, iteration: 55211
loss: 1.0552587509155273,grad_norm: 0.9999989362875379, iteration: 55212
loss: 1.0562961101531982,grad_norm: 0.9999996417775012, iteration: 55213
loss: 1.022118091583252,grad_norm: 0.9999989686226195, iteration: 55214
loss: 1.0340653657913208,grad_norm: 0.9899202708991413, iteration: 55215
loss: 0.9941717386245728,grad_norm: 0.9999998471795423, iteration: 55216
loss: 1.0262385606765747,grad_norm: 0.9999994291216496, iteration: 55217
loss: 1.044270634651184,grad_norm: 0.9999990536166183, iteration: 55218
loss: 0.9876654148101807,grad_norm: 0.9999991778317697, iteration: 55219
loss: 1.002602219581604,grad_norm: 0.9457527752879781, iteration: 55220
loss: 0.9982596635818481,grad_norm: 0.8762745770582498, iteration: 55221
loss: 0.9982577562332153,grad_norm: 0.9001993555402001, iteration: 55222
loss: 0.9521716833114624,grad_norm: 0.9693742833414511, iteration: 55223
loss: 0.982081949710846,grad_norm: 0.9949043283205778, iteration: 55224
loss: 1.0098495483398438,grad_norm: 0.9999991796635487, iteration: 55225
loss: 1.0067161321640015,grad_norm: 0.9999992644113689, iteration: 55226
loss: 0.9819881916046143,grad_norm: 0.9999991954941579, iteration: 55227
loss: 0.9953171610832214,grad_norm: 0.9999991118255902, iteration: 55228
loss: 1.002692461013794,grad_norm: 0.9999992681393904, iteration: 55229
loss: 1.0332868099212646,grad_norm: 0.9331980690701587, iteration: 55230
loss: 1.0106769800186157,grad_norm: 0.8847926726106068, iteration: 55231
loss: 1.009535551071167,grad_norm: 0.9999996585546689, iteration: 55232
loss: 0.9907528758049011,grad_norm: 0.9999991844266111, iteration: 55233
loss: 1.0325194597244263,grad_norm: 0.9738021989623834, iteration: 55234
loss: 1.0379631519317627,grad_norm: 0.9999990779374224, iteration: 55235
loss: 1.0086222887039185,grad_norm: 0.9999991800867827, iteration: 55236
loss: 0.9846593141555786,grad_norm: 0.9999992849616335, iteration: 55237
loss: 1.060829520225525,grad_norm: 0.9999991659038319, iteration: 55238
loss: 1.036089301109314,grad_norm: 0.9999994082760737, iteration: 55239
loss: 0.972868800163269,grad_norm: 0.9280208127079317, iteration: 55240
loss: 1.0166668891906738,grad_norm: 0.9999991703071918, iteration: 55241
loss: 1.0089284181594849,grad_norm: 0.9359603574140908, iteration: 55242
loss: 1.0127497911453247,grad_norm: 0.9999988968960155, iteration: 55243
loss: 0.9763687252998352,grad_norm: 0.91850000096942, iteration: 55244
loss: 0.9508860111236572,grad_norm: 0.9929166355514472, iteration: 55245
loss: 0.9943174123764038,grad_norm: 0.9624492498413636, iteration: 55246
loss: 1.0564100742340088,grad_norm: 0.9999994403939809, iteration: 55247
loss: 0.9745690226554871,grad_norm: 0.9999990724281234, iteration: 55248
loss: 1.0099132061004639,grad_norm: 0.9999991835543259, iteration: 55249
loss: 0.9539826512336731,grad_norm: 0.9999991669862546, iteration: 55250
loss: 0.9391107559204102,grad_norm: 0.992289162021708, iteration: 55251
loss: 1.0390467643737793,grad_norm: 0.999998995063063, iteration: 55252
loss: 0.9827714562416077,grad_norm: 0.9999988837706184, iteration: 55253
loss: 1.0225799083709717,grad_norm: 0.9999996005233989, iteration: 55254
loss: 1.0314643383026123,grad_norm: 0.9999998408282984, iteration: 55255
loss: 0.9982296228408813,grad_norm: 0.9999995489840189, iteration: 55256
loss: 1.0225242376327515,grad_norm: 0.999999093086952, iteration: 55257
loss: 0.9864110946655273,grad_norm: 0.9971623619261809, iteration: 55258
loss: 1.0203429460525513,grad_norm: 0.9495225220108683, iteration: 55259
loss: 1.0261366367340088,grad_norm: 0.9546565486759261, iteration: 55260
loss: 1.0236601829528809,grad_norm: 0.999999182020219, iteration: 55261
loss: 0.9397979974746704,grad_norm: 0.9913179380491111, iteration: 55262
loss: 1.0259062051773071,grad_norm: 0.9129797258733047, iteration: 55263
loss: 0.9847840070724487,grad_norm: 0.9999991252823411, iteration: 55264
loss: 1.0065624713897705,grad_norm: 0.9999990767677848, iteration: 55265
loss: 1.0167982578277588,grad_norm: 0.8700184999353998, iteration: 55266
loss: 1.0050816535949707,grad_norm: 0.8409757027762421, iteration: 55267
loss: 1.0346711874008179,grad_norm: 0.9999989555604962, iteration: 55268
loss: 1.0397690534591675,grad_norm: 0.8992368247377801, iteration: 55269
loss: 1.1102681159973145,grad_norm: 0.9999993052579148, iteration: 55270
loss: 0.9869321584701538,grad_norm: 0.999999140618189, iteration: 55271
loss: 0.9657755494117737,grad_norm: 0.9999992166258849, iteration: 55272
loss: 0.965251624584198,grad_norm: 0.8752117167544746, iteration: 55273
loss: 1.0155233144760132,grad_norm: 0.9999990930323289, iteration: 55274
loss: 1.0061066150665283,grad_norm: 0.9872267275159219, iteration: 55275
loss: 1.0092883110046387,grad_norm: 0.9663819686117338, iteration: 55276
loss: 1.0170423984527588,grad_norm: 0.9999992360864329, iteration: 55277
loss: 0.973013162612915,grad_norm: 0.999999256016727, iteration: 55278
loss: 0.9637714624404907,grad_norm: 0.9338215664428304, iteration: 55279
loss: 0.9697923064231873,grad_norm: 0.9999992124953259, iteration: 55280
loss: 0.9661152362823486,grad_norm: 0.9999989485822943, iteration: 55281
loss: 1.0094863176345825,grad_norm: 0.9725156260820207, iteration: 55282
loss: 0.985274612903595,grad_norm: 0.9563044226765599, iteration: 55283
loss: 1.0193549394607544,grad_norm: 0.9932743176592497, iteration: 55284
loss: 1.0460054874420166,grad_norm: 0.9974339262567358, iteration: 55285
loss: 1.006523609161377,grad_norm: 0.8372327937673838, iteration: 55286
loss: 1.0201715230941772,grad_norm: 0.9853176249619864, iteration: 55287
loss: 0.9819363355636597,grad_norm: 0.9603318015091268, iteration: 55288
loss: 1.0075833797454834,grad_norm: 0.999999429479153, iteration: 55289
loss: 1.0143245458602905,grad_norm: 0.9999998848719659, iteration: 55290
loss: 1.0338127613067627,grad_norm: 0.9175710133925131, iteration: 55291
loss: 0.9760192632675171,grad_norm: 0.8679526154959166, iteration: 55292
loss: 1.01106858253479,grad_norm: 0.8453272355589755, iteration: 55293
loss: 1.0294582843780518,grad_norm: 0.7605854216753986, iteration: 55294
loss: 1.0279829502105713,grad_norm: 0.9130731302242652, iteration: 55295
loss: 1.002947449684143,grad_norm: 0.945753139998395, iteration: 55296
loss: 0.9987769722938538,grad_norm: 0.9999991392649684, iteration: 55297
loss: 1.0521938800811768,grad_norm: 0.9787166410819313, iteration: 55298
loss: 0.9966323971748352,grad_norm: 0.9999991774801971, iteration: 55299
loss: 1.0148109197616577,grad_norm: 0.9118454299546029, iteration: 55300
loss: 1.0199476480484009,grad_norm: 0.8098004329093577, iteration: 55301
loss: 1.0001908540725708,grad_norm: 0.9999992500893007, iteration: 55302
loss: 0.9906390905380249,grad_norm: 0.9999991359119405, iteration: 55303
loss: 1.0104972124099731,grad_norm: 0.9634830629195945, iteration: 55304
loss: 0.9464809894561768,grad_norm: 0.8328070211839913, iteration: 55305
loss: 0.9690579771995544,grad_norm: 0.9416114472258447, iteration: 55306
loss: 0.9851363301277161,grad_norm: 0.8749923212587819, iteration: 55307
loss: 1.0185251235961914,grad_norm: 0.9999991246781479, iteration: 55308
loss: 1.0161519050598145,grad_norm: 0.9757578940544184, iteration: 55309
loss: 0.9817932844161987,grad_norm: 0.7397175393088529, iteration: 55310
loss: 0.9985972046852112,grad_norm: 0.95359848924025, iteration: 55311
loss: 0.9956773519515991,grad_norm: 0.9498683408217194, iteration: 55312
loss: 0.9990022778511047,grad_norm: 0.9464768801890626, iteration: 55313
loss: 1.0224674940109253,grad_norm: 0.9999993459037081, iteration: 55314
loss: 0.9931930303573608,grad_norm: 0.9999991778723718, iteration: 55315
loss: 0.9270598888397217,grad_norm: 0.9999990258835246, iteration: 55316
loss: 1.0215227603912354,grad_norm: 0.9999990372346031, iteration: 55317
loss: 1.0319234132766724,grad_norm: 0.8291514925829203, iteration: 55318
loss: 1.0039845705032349,grad_norm: 0.9501133669915799, iteration: 55319
loss: 0.9886470437049866,grad_norm: 0.9321601512294648, iteration: 55320
loss: 1.0281122922897339,grad_norm: 0.959571147730972, iteration: 55321
loss: 1.1273401975631714,grad_norm: 0.999999544845976, iteration: 55322
loss: 0.9953328967094421,grad_norm: 0.9426774910811323, iteration: 55323
loss: 1.0156049728393555,grad_norm: 0.999999158655208, iteration: 55324
loss: 0.9765118956565857,grad_norm: 0.930359189801184, iteration: 55325
loss: 0.9777478575706482,grad_norm: 0.9999989894109385, iteration: 55326
loss: 0.9854084253311157,grad_norm: 0.9999991183697301, iteration: 55327
loss: 0.9940047860145569,grad_norm: 0.9999992792581913, iteration: 55328
loss: 0.9943514466285706,grad_norm: 0.8988946615845973, iteration: 55329
loss: 0.9760489463806152,grad_norm: 0.9550458393110818, iteration: 55330
loss: 0.9753261208534241,grad_norm: 0.9828152209105521, iteration: 55331
loss: 1.010575294494629,grad_norm: 0.9761336795097691, iteration: 55332
loss: 1.0259636640548706,grad_norm: 0.9999993125527974, iteration: 55333
loss: 1.0555200576782227,grad_norm: 0.9999997432434343, iteration: 55334
loss: 1.047073245048523,grad_norm: 0.9999991416381747, iteration: 55335
loss: 1.032091498374939,grad_norm: 0.972727817191787, iteration: 55336
loss: 0.9940098524093628,grad_norm: 0.8688666309121794, iteration: 55337
loss: 0.9888114929199219,grad_norm: 0.8513955269478353, iteration: 55338
loss: 0.991411030292511,grad_norm: 0.9999991636585425, iteration: 55339
loss: 0.9963805079460144,grad_norm: 0.9999991845564601, iteration: 55340
loss: 1.0014384984970093,grad_norm: 0.9999991961542607, iteration: 55341
loss: 1.0135728120803833,grad_norm: 0.9488424969891147, iteration: 55342
loss: 1.0023714303970337,grad_norm: 0.9999990566669053, iteration: 55343
loss: 1.0248116254806519,grad_norm: 0.9999993034456363, iteration: 55344
loss: 0.9818987846374512,grad_norm: 0.9999992375976011, iteration: 55345
loss: 1.0421007871627808,grad_norm: 0.9999991287332914, iteration: 55346
loss: 1.0121058225631714,grad_norm: 0.8910445193969104, iteration: 55347
loss: 1.0144453048706055,grad_norm: 0.9264547381370017, iteration: 55348
loss: 1.0276168584823608,grad_norm: 0.999999175563745, iteration: 55349
loss: 0.9853352904319763,grad_norm: 0.9999992401036315, iteration: 55350
loss: 1.0067987442016602,grad_norm: 0.9999992077479237, iteration: 55351
loss: 1.0145074129104614,grad_norm: 0.9999992383928058, iteration: 55352
loss: 1.0364735126495361,grad_norm: 0.999999300462548, iteration: 55353
loss: 1.0513185262680054,grad_norm: 0.9999997922739603, iteration: 55354
loss: 1.0278011560440063,grad_norm: 0.9999990136988265, iteration: 55355
loss: 1.0744367837905884,grad_norm: 0.9698554813625901, iteration: 55356
loss: 1.0275169610977173,grad_norm: 0.9111035760569471, iteration: 55357
loss: 0.9917701482772827,grad_norm: 0.9999990312594255, iteration: 55358
loss: 1.037192463874817,grad_norm: 0.9999991482684407, iteration: 55359
loss: 0.9977356195449829,grad_norm: 0.9999991143102729, iteration: 55360
loss: 0.9622639417648315,grad_norm: 0.9999990963960346, iteration: 55361
loss: 1.0589935779571533,grad_norm: 0.9996570994920294, iteration: 55362
loss: 0.9967358112335205,grad_norm: 0.9999990710168276, iteration: 55363
loss: 0.9548313021659851,grad_norm: 0.9629619203025344, iteration: 55364
loss: 1.0314350128173828,grad_norm: 0.9729982500835123, iteration: 55365
loss: 0.9872991442680359,grad_norm: 0.950428551602887, iteration: 55366
loss: 1.0452123880386353,grad_norm: 0.9999991456780751, iteration: 55367
loss: 1.0236637592315674,grad_norm: 0.99999918164334, iteration: 55368
loss: 0.9947053790092468,grad_norm: 0.8777845891849894, iteration: 55369
loss: 0.9847144484519958,grad_norm: 0.8226966184529069, iteration: 55370
loss: 1.0023144483566284,grad_norm: 0.9884202796654837, iteration: 55371
loss: 0.9991865754127502,grad_norm: 0.9999993953959865, iteration: 55372
loss: 0.9857445359230042,grad_norm: 0.9874092170781367, iteration: 55373
loss: 0.9856453537940979,grad_norm: 0.9999992525946566, iteration: 55374
loss: 0.9832992553710938,grad_norm: 0.9497600444556374, iteration: 55375
loss: 1.0089681148529053,grad_norm: 0.8946100712084932, iteration: 55376
loss: 0.9808480143547058,grad_norm: 0.884086743904287, iteration: 55377
loss: 1.0120948553085327,grad_norm: 0.8443743022791415, iteration: 55378
loss: 1.0132852792739868,grad_norm: 0.9999990789601249, iteration: 55379
loss: 1.0214264392852783,grad_norm: 0.9999991704357883, iteration: 55380
loss: 0.9947441816329956,grad_norm: 0.9911244245661092, iteration: 55381
loss: 0.9812521934509277,grad_norm: 0.9999991297878736, iteration: 55382
loss: 1.0352164506912231,grad_norm: 0.999998983697371, iteration: 55383
loss: 0.9920731782913208,grad_norm: 0.9999991957464117, iteration: 55384
loss: 0.9914048910140991,grad_norm: 0.9853114554489332, iteration: 55385
loss: 0.9953223466873169,grad_norm: 0.8397225663884733, iteration: 55386
loss: 1.0313317775726318,grad_norm: 0.9402355737111401, iteration: 55387
loss: 1.0192592144012451,grad_norm: 0.9073503829359151, iteration: 55388
loss: 0.9794203639030457,grad_norm: 0.999999131504447, iteration: 55389
loss: 1.0306575298309326,grad_norm: 0.999999230309093, iteration: 55390
loss: 1.0106754302978516,grad_norm: 0.8504775849349606, iteration: 55391
loss: 0.9737062454223633,grad_norm: 0.95324278512373, iteration: 55392
loss: 0.9897928833961487,grad_norm: 0.9999990978855192, iteration: 55393
loss: 1.0409965515136719,grad_norm: 0.9999991677254412, iteration: 55394
loss: 1.0250667333602905,grad_norm: 0.9999990671430344, iteration: 55395
loss: 0.9929397106170654,grad_norm: 0.9906575571922842, iteration: 55396
loss: 1.0162228345870972,grad_norm: 0.9999998516533158, iteration: 55397
loss: 0.9891692399978638,grad_norm: 0.9999992220843409, iteration: 55398
loss: 0.9711453318595886,grad_norm: 0.9999992325014633, iteration: 55399
loss: 1.0266954898834229,grad_norm: 0.9999991659350473, iteration: 55400
loss: 0.9633792638778687,grad_norm: 0.9999990876769467, iteration: 55401
loss: 1.0310394763946533,grad_norm: 0.9999991105391879, iteration: 55402
loss: 1.0372562408447266,grad_norm: 0.9618451564378945, iteration: 55403
loss: 0.9760738015174866,grad_norm: 0.9014235756304684, iteration: 55404
loss: 0.9770960211753845,grad_norm: 0.9999989980199055, iteration: 55405
loss: 1.0109323263168335,grad_norm: 0.9949225347773177, iteration: 55406
loss: 0.9930418133735657,grad_norm: 0.9999990947868197, iteration: 55407
loss: 1.0602954626083374,grad_norm: 0.9999996059925729, iteration: 55408
loss: 1.0087699890136719,grad_norm: 0.9999990954210789, iteration: 55409
loss: 1.01911199092865,grad_norm: 0.9999995675249302, iteration: 55410
loss: 0.9824386239051819,grad_norm: 0.9999990621838135, iteration: 55411
loss: 0.993241012096405,grad_norm: 0.9999992511495079, iteration: 55412
loss: 0.9802729487419128,grad_norm: 0.9999992654498915, iteration: 55413
loss: 0.9856393933296204,grad_norm: 0.8971252814924502, iteration: 55414
loss: 0.9998458623886108,grad_norm: 0.9999997529465152, iteration: 55415
loss: 1.0335546731948853,grad_norm: 0.9999992135267559, iteration: 55416
loss: 1.0021151304244995,grad_norm: 0.939688944198933, iteration: 55417
loss: 1.0034252405166626,grad_norm: 0.9005861190106493, iteration: 55418
loss: 1.0183109045028687,grad_norm: 0.9999992121203608, iteration: 55419
loss: 0.9733015298843384,grad_norm: 0.9819723082116991, iteration: 55420
loss: 1.0329279899597168,grad_norm: 0.999999511763, iteration: 55421
loss: 0.9895357489585876,grad_norm: 0.9999992392904088, iteration: 55422
loss: 1.035381555557251,grad_norm: 0.9999993623277998, iteration: 55423
loss: 0.9747244715690613,grad_norm: 0.9999990373841344, iteration: 55424
loss: 0.9946022629737854,grad_norm: 0.9709530997471946, iteration: 55425
loss: 1.0093952417373657,grad_norm: 0.9999990368603583, iteration: 55426
loss: 1.008303165435791,grad_norm: 0.9712420281431897, iteration: 55427
loss: 0.9949573874473572,grad_norm: 0.9999991538893427, iteration: 55428
loss: 1.010144591331482,grad_norm: 0.9999991584786242, iteration: 55429
loss: 0.9972642660140991,grad_norm: 0.9999992719129916, iteration: 55430
loss: 0.9904425740242004,grad_norm: 0.999999684814337, iteration: 55431
loss: 1.0702708959579468,grad_norm: 0.9999993538737225, iteration: 55432
loss: 1.0419753789901733,grad_norm: 0.9999992576179708, iteration: 55433
loss: 1.0111583471298218,grad_norm: 0.9999990853772177, iteration: 55434
loss: 0.9865467548370361,grad_norm: 0.9538626698648933, iteration: 55435
loss: 1.0317105054855347,grad_norm: 0.8821144674651062, iteration: 55436
loss: 1.0172781944274902,grad_norm: 0.9999990780578063, iteration: 55437
loss: 1.0834685564041138,grad_norm: 0.9999996788308119, iteration: 55438
loss: 1.0328165292739868,grad_norm: 0.8894505208878373, iteration: 55439
loss: 1.0061149597167969,grad_norm: 0.9999990998728241, iteration: 55440
loss: 1.0269228219985962,grad_norm: 0.9999990493668729, iteration: 55441
loss: 1.01273512840271,grad_norm: 0.9999991193593135, iteration: 55442
loss: 0.9935535788536072,grad_norm: 0.9999991119654723, iteration: 55443
loss: 1.0014939308166504,grad_norm: 0.9779316779377447, iteration: 55444
loss: 0.9793197512626648,grad_norm: 0.9452523514272217, iteration: 55445
loss: 1.0094987154006958,grad_norm: 0.9156151064733833, iteration: 55446
loss: 1.0473275184631348,grad_norm: 0.9880715213828002, iteration: 55447
loss: 1.0017973184585571,grad_norm: 0.9498357130458008, iteration: 55448
loss: 1.0992542505264282,grad_norm: 0.9999995227052831, iteration: 55449
loss: 0.9861888289451599,grad_norm: 0.9999992367177214, iteration: 55450
loss: 1.0221518278121948,grad_norm: 0.9999992703286105, iteration: 55451
loss: 0.9683083891868591,grad_norm: 0.9650295415589235, iteration: 55452
loss: 1.0329606533050537,grad_norm: 0.9164877330230575, iteration: 55453
loss: 1.0039304494857788,grad_norm: 0.7933385784369942, iteration: 55454
loss: 1.0087671279907227,grad_norm: 0.9999991124600199, iteration: 55455
loss: 1.0277756452560425,grad_norm: 0.9999994265728982, iteration: 55456
loss: 1.0447018146514893,grad_norm: 0.8181518678702562, iteration: 55457
loss: 1.0436540842056274,grad_norm: 0.9999999312749922, iteration: 55458
loss: 1.056390404701233,grad_norm: 0.9999999650096016, iteration: 55459
loss: 1.0326743125915527,grad_norm: 0.9456398184109255, iteration: 55460
loss: 0.989818811416626,grad_norm: 0.9885671535601822, iteration: 55461
loss: 1.0360642671585083,grad_norm: 0.9999989862530883, iteration: 55462
loss: 1.015030026435852,grad_norm: 0.8542717152915161, iteration: 55463
loss: 1.0283360481262207,grad_norm: 0.9940195536614653, iteration: 55464
loss: 0.9899963736534119,grad_norm: 0.9641075899358297, iteration: 55465
loss: 0.9980780482292175,grad_norm: 0.8471703376445452, iteration: 55466
loss: 1.0002886056900024,grad_norm: 0.999999211353932, iteration: 55467
loss: 0.9871971011161804,grad_norm: 0.942152482314221, iteration: 55468
loss: 1.0191282033920288,grad_norm: 0.9999991755398884, iteration: 55469
loss: 1.0127557516098022,grad_norm: 0.9999990039009022, iteration: 55470
loss: 1.0092650651931763,grad_norm: 0.9044467413601855, iteration: 55471
loss: 0.9997983574867249,grad_norm: 0.8657068362790026, iteration: 55472
loss: 1.0006227493286133,grad_norm: 0.9868808181229655, iteration: 55473
loss: 1.034984827041626,grad_norm: 0.9999994559094597, iteration: 55474
loss: 0.9996495246887207,grad_norm: 0.9999990975069563, iteration: 55475
loss: 1.0299738645553589,grad_norm: 0.9999992158665124, iteration: 55476
loss: 1.010416865348816,grad_norm: 0.9999991336547676, iteration: 55477
loss: 1.0320208072662354,grad_norm: 0.9999992148212452, iteration: 55478
loss: 0.9897029399871826,grad_norm: 0.8767505399435076, iteration: 55479
loss: 0.9825531840324402,grad_norm: 0.9589168782213667, iteration: 55480
loss: 0.9581665396690369,grad_norm: 0.9507748028154589, iteration: 55481
loss: 0.9489117860794067,grad_norm: 0.9075911916164199, iteration: 55482
loss: 0.9936392307281494,grad_norm: 0.9829476791576086, iteration: 55483
loss: 0.9940700531005859,grad_norm: 0.9999992138942744, iteration: 55484
loss: 0.9790469408035278,grad_norm: 0.9999990662770029, iteration: 55485
loss: 0.987257719039917,grad_norm: 0.981109528019411, iteration: 55486
loss: 1.004777431488037,grad_norm: 0.991167288466788, iteration: 55487
loss: 1.0233532190322876,grad_norm: 0.9411502534148309, iteration: 55488
loss: 1.0686969757080078,grad_norm: 0.9999992156014359, iteration: 55489
loss: 1.0403509140014648,grad_norm: 0.9287405126888788, iteration: 55490
loss: 1.0356526374816895,grad_norm: 0.9999993473207226, iteration: 55491
loss: 1.0017539262771606,grad_norm: 0.9999991687865044, iteration: 55492
loss: 0.9866185784339905,grad_norm: 0.877873557924055, iteration: 55493
loss: 0.9902192950248718,grad_norm: 0.999999077082405, iteration: 55494
loss: 0.9889942407608032,grad_norm: 0.9999990807851408, iteration: 55495
loss: 0.9856911897659302,grad_norm: 0.9765792217785375, iteration: 55496
loss: 1.019423484802246,grad_norm: 0.9999992673418084, iteration: 55497
loss: 0.9727234840393066,grad_norm: 0.8928528613874144, iteration: 55498
loss: 1.020868182182312,grad_norm: 0.812579061211585, iteration: 55499
loss: 0.9927248358726501,grad_norm: 0.9999992301363014, iteration: 55500
loss: 0.9784314632415771,grad_norm: 0.9076534389423819, iteration: 55501
loss: 1.0353554487228394,grad_norm: 0.9001333670036794, iteration: 55502
loss: 1.0093902349472046,grad_norm: 0.91011416012964, iteration: 55503
loss: 1.0234695672988892,grad_norm: 0.9872501019807188, iteration: 55504
loss: 1.007541537284851,grad_norm: 0.9999990952493993, iteration: 55505
loss: 1.0452829599380493,grad_norm: 0.9999990409438805, iteration: 55506
loss: 1.0009734630584717,grad_norm: 0.999999155219777, iteration: 55507
loss: 0.9993581175804138,grad_norm: 0.9999992634994952, iteration: 55508
loss: 1.00380277633667,grad_norm: 0.9392622139694199, iteration: 55509
loss: 0.974459171295166,grad_norm: 0.9999991512388394, iteration: 55510
loss: 0.9810826182365417,grad_norm: 0.9978222479217448, iteration: 55511
loss: 0.9670370817184448,grad_norm: 0.9999990742983668, iteration: 55512
loss: 0.9775901436805725,grad_norm: 0.9999992150438514, iteration: 55513
loss: 0.999860942363739,grad_norm: 0.9999992238207679, iteration: 55514
loss: 0.9914875626564026,grad_norm: 0.9999991913015954, iteration: 55515
loss: 0.9870008826255798,grad_norm: 0.9999990459265478, iteration: 55516
loss: 1.0502607822418213,grad_norm: 0.9999997071972864, iteration: 55517
loss: 0.9735908508300781,grad_norm: 0.9999991774300349, iteration: 55518
loss: 1.000354528427124,grad_norm: 0.9999990654145853, iteration: 55519
loss: 0.9748572111129761,grad_norm: 0.9999990329774838, iteration: 55520
loss: 0.9939605593681335,grad_norm: 0.9999991324442868, iteration: 55521
loss: 0.9968196749687195,grad_norm: 0.9063903323569376, iteration: 55522
loss: 1.0084389448165894,grad_norm: 0.9999991418167983, iteration: 55523
loss: 1.0094808340072632,grad_norm: 0.8944575576867371, iteration: 55524
loss: 1.0049614906311035,grad_norm: 0.9412208860766879, iteration: 55525
loss: 0.9896019697189331,grad_norm: 0.9841735202714696, iteration: 55526
loss: 0.9997602701187134,grad_norm: 0.9653222104915034, iteration: 55527
loss: 1.0284535884857178,grad_norm: 0.9999991281962798, iteration: 55528
loss: 0.9727961421012878,grad_norm: 0.9999990653882243, iteration: 55529
loss: 1.0074936151504517,grad_norm: 0.9999991502841513, iteration: 55530
loss: 1.011997103691101,grad_norm: 0.999999132141497, iteration: 55531
loss: 1.0096793174743652,grad_norm: 0.8857335637314467, iteration: 55532
loss: 0.9993287920951843,grad_norm: 0.9999993172680175, iteration: 55533
loss: 1.040447473526001,grad_norm: 0.9666138221150642, iteration: 55534
loss: 0.9893491268157959,grad_norm: 0.9999992229181129, iteration: 55535
loss: 0.9927582740783691,grad_norm: 0.987130609040954, iteration: 55536
loss: 1.02023446559906,grad_norm: 0.861398133194941, iteration: 55537
loss: 0.9947358965873718,grad_norm: 0.9999991518657421, iteration: 55538
loss: 1.0539377927780151,grad_norm: 0.9999994772925273, iteration: 55539
loss: 1.019130825996399,grad_norm: 0.9251993957632367, iteration: 55540
loss: 0.9959587454795837,grad_norm: 0.9999989343556822, iteration: 55541
loss: 1.0074797868728638,grad_norm: 0.9999991169123179, iteration: 55542
loss: 1.0445829629898071,grad_norm: 0.9999991443332777, iteration: 55543
loss: 1.0029089450836182,grad_norm: 0.9999993251984314, iteration: 55544
loss: 1.0196799039840698,grad_norm: 0.9999990640416333, iteration: 55545
loss: 1.004629135131836,grad_norm: 0.9999990979886967, iteration: 55546
loss: 1.0109697580337524,grad_norm: 0.946945604765827, iteration: 55547
loss: 0.9607949256896973,grad_norm: 0.8775730568677615, iteration: 55548
loss: 1.0044499635696411,grad_norm: 0.9999989454112542, iteration: 55549
loss: 0.9940165877342224,grad_norm: 0.7652307694484837, iteration: 55550
loss: 1.0001589059829712,grad_norm: 0.9999989281166362, iteration: 55551
loss: 0.9880634546279907,grad_norm: 0.9116395659928225, iteration: 55552
loss: 0.9972435235977173,grad_norm: 0.905710415098998, iteration: 55553
loss: 0.9904360175132751,grad_norm: 0.9999991035903595, iteration: 55554
loss: 0.9997865557670593,grad_norm: 0.9999990352827088, iteration: 55555
loss: 1.0140718221664429,grad_norm: 0.9999991535256585, iteration: 55556
loss: 1.0115764141082764,grad_norm: 0.999999132147186, iteration: 55557
loss: 1.0296435356140137,grad_norm: 0.9999996176625912, iteration: 55558
loss: 1.0146068334579468,grad_norm: 0.995509425450015, iteration: 55559
loss: 1.0067567825317383,grad_norm: 0.916772077065409, iteration: 55560
loss: 1.0116407871246338,grad_norm: 0.8692213915785243, iteration: 55561
loss: 1.073972225189209,grad_norm: 0.9999996081188282, iteration: 55562
loss: 0.9989307522773743,grad_norm: 0.9439060829337733, iteration: 55563
loss: 1.0231118202209473,grad_norm: 0.9999990227052947, iteration: 55564
loss: 0.9932873845100403,grad_norm: 0.9999990808337936, iteration: 55565
loss: 1.0212293863296509,grad_norm: 0.8535443468218655, iteration: 55566
loss: 1.0076438188552856,grad_norm: 0.9999991714857028, iteration: 55567
loss: 0.9988159537315369,grad_norm: 0.821408060769761, iteration: 55568
loss: 1.0036436319351196,grad_norm: 0.9999991160892854, iteration: 55569
loss: 1.027700424194336,grad_norm: 0.9769818886507968, iteration: 55570
loss: 0.9870783090591431,grad_norm: 0.9154617258803033, iteration: 55571
loss: 0.9741549491882324,grad_norm: 0.9318877473848785, iteration: 55572
loss: 1.0137673616409302,grad_norm: 0.9999990835488103, iteration: 55573
loss: 1.0296282768249512,grad_norm: 0.8466987980797663, iteration: 55574
loss: 0.9868099689483643,grad_norm: 0.9999993234914821, iteration: 55575
loss: 1.018060326576233,grad_norm: 0.9566548457664259, iteration: 55576
loss: 0.9885685443878174,grad_norm: 0.9549458116868171, iteration: 55577
loss: 1.027453064918518,grad_norm: 0.9071328547335364, iteration: 55578
loss: 1.0176975727081299,grad_norm: 0.9611013266192631, iteration: 55579
loss: 1.030211329460144,grad_norm: 0.9240912768561166, iteration: 55580
loss: 1.0474921464920044,grad_norm: 0.93099520641396, iteration: 55581
loss: 0.9921679496765137,grad_norm: 0.943233247255269, iteration: 55582
loss: 0.992671549320221,grad_norm: 0.9999993589251651, iteration: 55583
loss: 0.9717561602592468,grad_norm: 0.9999992538487434, iteration: 55584
loss: 1.0109164714813232,grad_norm: 0.999999171713851, iteration: 55585
loss: 1.0244888067245483,grad_norm: 0.9999992031685188, iteration: 55586
loss: 1.0122947692871094,grad_norm: 0.9042493753325715, iteration: 55587
loss: 0.9768692851066589,grad_norm: 0.9999991163422007, iteration: 55588
loss: 0.9453868865966797,grad_norm: 0.9999991510799267, iteration: 55589
loss: 0.9681687355041504,grad_norm: 0.9999991491974299, iteration: 55590
loss: 0.9799486994743347,grad_norm: 0.9999991216163032, iteration: 55591
loss: 0.9794845581054688,grad_norm: 0.9860543564213103, iteration: 55592
loss: 1.0095696449279785,grad_norm: 0.9999989835339778, iteration: 55593
loss: 0.9646238088607788,grad_norm: 0.9862972383603597, iteration: 55594
loss: 0.9941151142120361,grad_norm: 0.9999992021108961, iteration: 55595
loss: 1.0250108242034912,grad_norm: 0.9999990752965318, iteration: 55596
loss: 1.0199153423309326,grad_norm: 0.9999990135007837, iteration: 55597
loss: 1.0537524223327637,grad_norm: 0.9999991096478806, iteration: 55598
loss: 0.9953804612159729,grad_norm: 0.9999988980691625, iteration: 55599
loss: 0.9759725332260132,grad_norm: 0.9465040643849271, iteration: 55600
loss: 1.022355318069458,grad_norm: 0.907693020154032, iteration: 55601
loss: 0.9945297241210938,grad_norm: 0.9903914434896766, iteration: 55602
loss: 1.0166319608688354,grad_norm: 0.9999990859728608, iteration: 55603
loss: 1.009164810180664,grad_norm: 0.9999990829686481, iteration: 55604
loss: 1.0177605152130127,grad_norm: 0.9999991261647053, iteration: 55605
loss: 1.0172474384307861,grad_norm: 0.9999992710232479, iteration: 55606
loss: 0.9983270168304443,grad_norm: 0.9999992266912131, iteration: 55607
loss: 0.9904493093490601,grad_norm: 0.9319239164331538, iteration: 55608
loss: 0.9748319983482361,grad_norm: 0.999999216435528, iteration: 55609
loss: 1.0099470615386963,grad_norm: 0.9841226413124113, iteration: 55610
loss: 0.9648670554161072,grad_norm: 0.9722792582166088, iteration: 55611
loss: 0.9969106912612915,grad_norm: 0.9787470335944504, iteration: 55612
loss: 0.9814876317977905,grad_norm: 0.9999991387481485, iteration: 55613
loss: 0.9873936176300049,grad_norm: 0.9999991925826703, iteration: 55614
loss: 0.9814790487289429,grad_norm: 0.999999006738096, iteration: 55615
loss: 0.9825357794761658,grad_norm: 0.999999126549184, iteration: 55616
loss: 1.0212440490722656,grad_norm: 0.8916417782835563, iteration: 55617
loss: 1.0370014905929565,grad_norm: 0.9406410366490695, iteration: 55618
loss: 0.9899078011512756,grad_norm: 0.9999992439263601, iteration: 55619
loss: 1.0062779188156128,grad_norm: 0.9801794075755608, iteration: 55620
loss: 0.9912796020507812,grad_norm: 0.99999917717632, iteration: 55621
loss: 0.9544000029563904,grad_norm: 0.9999990820843309, iteration: 55622
loss: 0.9876740574836731,grad_norm: 0.9897931553110733, iteration: 55623
loss: 1.0096838474273682,grad_norm: 0.9999993207829526, iteration: 55624
loss: 1.0022399425506592,grad_norm: 0.9523199224612003, iteration: 55625
loss: 0.9991622567176819,grad_norm: 0.9999991039470479, iteration: 55626
loss: 0.9946892261505127,grad_norm: 0.9999990837343673, iteration: 55627
loss: 1.0150318145751953,grad_norm: 0.9999994701768797, iteration: 55628
loss: 1.0309593677520752,grad_norm: 0.9999989676806906, iteration: 55629
loss: 1.0110924243927002,grad_norm: 0.9215685354558113, iteration: 55630
loss: 1.0136221647262573,grad_norm: 0.9999997098398135, iteration: 55631
loss: 1.0068131685256958,grad_norm: 0.9781046160446984, iteration: 55632
loss: 1.0011718273162842,grad_norm: 0.909306570328054, iteration: 55633
loss: 1.0102646350860596,grad_norm: 0.9999993418786267, iteration: 55634
loss: 0.9972328543663025,grad_norm: 0.9999991018958786, iteration: 55635
loss: 0.9809891581535339,grad_norm: 0.8603124839561719, iteration: 55636
loss: 0.9933549165725708,grad_norm: 0.9999991430623644, iteration: 55637
loss: 0.9884413480758667,grad_norm: 0.9999992098713344, iteration: 55638
loss: 1.0531773567199707,grad_norm: 0.9999992308720261, iteration: 55639
loss: 0.9994953274726868,grad_norm: 0.99999902550836, iteration: 55640
loss: 1.0048669576644897,grad_norm: 0.8152397908972854, iteration: 55641
loss: 1.0086387395858765,grad_norm: 0.9999990688955281, iteration: 55642
loss: 0.9846413731575012,grad_norm: 0.9999990955182617, iteration: 55643
loss: 0.9960584044456482,grad_norm: 0.9463343144508963, iteration: 55644
loss: 1.0292006731033325,grad_norm: 0.9999994793337661, iteration: 55645
loss: 0.9732154011726379,grad_norm: 0.999999198888653, iteration: 55646
loss: 0.9891799688339233,grad_norm: 0.9883299120912998, iteration: 55647
loss: 1.0426722764968872,grad_norm: 0.9999991987762543, iteration: 55648
loss: 0.9760145545005798,grad_norm: 0.9771993435800467, iteration: 55649
loss: 1.0267982482910156,grad_norm: 0.9999990328752918, iteration: 55650
loss: 0.9848726391792297,grad_norm: 0.9718018603187566, iteration: 55651
loss: 1.0189419984817505,grad_norm: 0.9999991004727679, iteration: 55652
loss: 0.9747712016105652,grad_norm: 0.9832090830384335, iteration: 55653
loss: 1.0036195516586304,grad_norm: 0.9999991483480348, iteration: 55654
loss: 0.969353973865509,grad_norm: 0.9894996768075425, iteration: 55655
loss: 0.9944850206375122,grad_norm: 0.9999991908065754, iteration: 55656
loss: 0.971354603767395,grad_norm: 0.868332878557105, iteration: 55657
loss: 1.034014105796814,grad_norm: 0.9150453327169573, iteration: 55658
loss: 0.9957454800605774,grad_norm: 0.9999991821666928, iteration: 55659
loss: 1.0404678583145142,grad_norm: 0.999999466702587, iteration: 55660
loss: 1.0010360479354858,grad_norm: 0.9999994394611699, iteration: 55661
loss: 0.9953603148460388,grad_norm: 0.9999989382879609, iteration: 55662
loss: 1.0190709829330444,grad_norm: 0.9999991068008052, iteration: 55663
loss: 0.9798887372016907,grad_norm: 0.9999989494211455, iteration: 55664
loss: 1.0951701402664185,grad_norm: 0.999999282593157, iteration: 55665
loss: 1.0027867555618286,grad_norm: 0.9164471436339451, iteration: 55666
loss: 0.9731716513633728,grad_norm: 0.9999990773885783, iteration: 55667
loss: 1.0354125499725342,grad_norm: 0.9999991937216212, iteration: 55668
loss: 1.012351393699646,grad_norm: 0.9724904711108059, iteration: 55669
loss: 1.0024113655090332,grad_norm: 0.999999231023323, iteration: 55670
loss: 1.0330890417099,grad_norm: 0.9999990197098045, iteration: 55671
loss: 0.9936997890472412,grad_norm: 0.9114434455373535, iteration: 55672
loss: 0.9816017150878906,grad_norm: 0.9999992151831792, iteration: 55673
loss: 1.0040239095687866,grad_norm: 0.9999990583539528, iteration: 55674
loss: 1.0290507078170776,grad_norm: 0.9032663589523799, iteration: 55675
loss: 0.9588654041290283,grad_norm: 0.8975525883338471, iteration: 55676
loss: 1.0109806060791016,grad_norm: 0.9999993545770288, iteration: 55677
loss: 0.9959608316421509,grad_norm: 0.9999991960316708, iteration: 55678
loss: 1.024569034576416,grad_norm: 0.9999991949169567, iteration: 55679
loss: 0.9759470820426941,grad_norm: 0.9999990865990862, iteration: 55680
loss: 0.9988003969192505,grad_norm: 0.9999989737936302, iteration: 55681
loss: 1.0142295360565186,grad_norm: 0.9604238883388633, iteration: 55682
loss: 1.0189220905303955,grad_norm: 0.99999918230881, iteration: 55683
loss: 0.9871275424957275,grad_norm: 0.9999990729631854, iteration: 55684
loss: 1.0133165121078491,grad_norm: 0.9999994221571584, iteration: 55685
loss: 0.971048891544342,grad_norm: 0.9999991179527171, iteration: 55686
loss: 0.9549481272697449,grad_norm: 0.9250941538397945, iteration: 55687
loss: 1.0199724435806274,grad_norm: 0.8983666372488258, iteration: 55688
loss: 1.0370659828186035,grad_norm: 0.9999991168606778, iteration: 55689
loss: 0.9955807328224182,grad_norm: 0.9999993219993704, iteration: 55690
loss: 1.0274289846420288,grad_norm: 0.999999396156881, iteration: 55691
loss: 0.9883292317390442,grad_norm: 0.9999989636707705, iteration: 55692
loss: 1.0156145095825195,grad_norm: 0.9978143636273852, iteration: 55693
loss: 1.0107835531234741,grad_norm: 0.9909069119292081, iteration: 55694
loss: 0.9765143990516663,grad_norm: 0.9127268699563111, iteration: 55695
loss: 0.9637627601623535,grad_norm: 0.9999990503110682, iteration: 55696
loss: 0.975636899471283,grad_norm: 0.9782877795344009, iteration: 55697
loss: 1.0186923742294312,grad_norm: 0.9769135589313583, iteration: 55698
loss: 1.0254658460617065,grad_norm: 0.9768011417841088, iteration: 55699
loss: 1.0267024040222168,grad_norm: 0.9999990301327134, iteration: 55700
loss: 1.0303075313568115,grad_norm: 0.999999190299191, iteration: 55701
loss: 1.0177816152572632,grad_norm: 0.9999989260070874, iteration: 55702
loss: 1.0004163980484009,grad_norm: 0.9425407466539593, iteration: 55703
loss: 1.050384521484375,grad_norm: 0.9999998637125526, iteration: 55704
loss: 1.0261660814285278,grad_norm: 0.9999992312202983, iteration: 55705
loss: 1.0403752326965332,grad_norm: 0.848165787031743, iteration: 55706
loss: 1.0299617052078247,grad_norm: 0.87316903845379, iteration: 55707
loss: 1.0155348777770996,grad_norm: 0.9465674551471328, iteration: 55708
loss: 1.0261974334716797,grad_norm: 0.9999990330838707, iteration: 55709
loss: 1.020499587059021,grad_norm: 0.9999991297530125, iteration: 55710
loss: 1.058663010597229,grad_norm: 0.9616530011147212, iteration: 55711
loss: 1.0057040452957153,grad_norm: 0.93061305623421, iteration: 55712
loss: 1.014463186264038,grad_norm: 0.999998944037563, iteration: 55713
loss: 0.9958404898643494,grad_norm: 0.9999992325024671, iteration: 55714
loss: 0.9705382585525513,grad_norm: 0.99999911071193, iteration: 55715
loss: 0.9718218445777893,grad_norm: 0.9999990385558875, iteration: 55716
loss: 0.9920005798339844,grad_norm: 0.9999990547831058, iteration: 55717
loss: 1.0218440294265747,grad_norm: 0.9999992642074398, iteration: 55718
loss: 1.0039819478988647,grad_norm: 0.814957852345228, iteration: 55719
loss: 1.0008885860443115,grad_norm: 0.999999068478556, iteration: 55720
loss: 1.024950623512268,grad_norm: 0.9666557103890626, iteration: 55721
loss: 1.0314931869506836,grad_norm: 0.999999367798984, iteration: 55722
loss: 0.9741400480270386,grad_norm: 0.99999911084342, iteration: 55723
loss: 1.006087303161621,grad_norm: 0.999999177781919, iteration: 55724
loss: 1.0291301012039185,grad_norm: 0.888181396183343, iteration: 55725
loss: 1.0466042757034302,grad_norm: 0.9687600193210127, iteration: 55726
loss: 0.9965575933456421,grad_norm: 0.9944933295893561, iteration: 55727
loss: 1.0212358236312866,grad_norm: 0.9999991885793758, iteration: 55728
loss: 1.0230238437652588,grad_norm: 0.9477218752047083, iteration: 55729
loss: 0.9993507862091064,grad_norm: 0.917855867421852, iteration: 55730
loss: 1.0089681148529053,grad_norm: 0.9999990390351378, iteration: 55731
loss: 1.0132473707199097,grad_norm: 0.972081834106238, iteration: 55732
loss: 1.0094503164291382,grad_norm: 0.9531620927103325, iteration: 55733
loss: 1.0459717512130737,grad_norm: 0.999999833556785, iteration: 55734
loss: 1.008535385131836,grad_norm: 0.9999989797417266, iteration: 55735
loss: 1.0032672882080078,grad_norm: 0.8886287967821392, iteration: 55736
loss: 1.0166668891906738,grad_norm: 0.9999991055105034, iteration: 55737
loss: 0.9816330075263977,grad_norm: 0.9999992280692301, iteration: 55738
loss: 0.9769802093505859,grad_norm: 0.9999991583537455, iteration: 55739
loss: 0.9996643662452698,grad_norm: 0.9999992043449193, iteration: 55740
loss: 1.014501690864563,grad_norm: 0.9999990516491217, iteration: 55741
loss: 0.9753140211105347,grad_norm: 0.9400447644564031, iteration: 55742
loss: 0.9825404286384583,grad_norm: 0.9999990935439238, iteration: 55743
loss: 0.9917587041854858,grad_norm: 0.99999902040801, iteration: 55744
loss: 1.017637848854065,grad_norm: 0.9999991644714995, iteration: 55745
loss: 1.0262333154678345,grad_norm: 0.9999991692224944, iteration: 55746
loss: 1.0006061792373657,grad_norm: 0.9999990744107283, iteration: 55747
loss: 0.9685827493667603,grad_norm: 0.9278933237080865, iteration: 55748
loss: 1.0105335712432861,grad_norm: 0.9272458126152462, iteration: 55749
loss: 0.9740094542503357,grad_norm: 0.9095809065765194, iteration: 55750
loss: 0.9609819650650024,grad_norm: 0.9999993148495437, iteration: 55751
loss: 1.0250041484832764,grad_norm: 0.9256834757265636, iteration: 55752
loss: 0.9828018546104431,grad_norm: 0.9774366926066506, iteration: 55753
loss: 0.9903149604797363,grad_norm: 0.9782499904562945, iteration: 55754
loss: 1.0058155059814453,grad_norm: 0.9999994399399704, iteration: 55755
loss: 0.9637081623077393,grad_norm: 0.9999991728759489, iteration: 55756
loss: 0.9762366414070129,grad_norm: 0.9857126931705624, iteration: 55757
loss: 0.9840277433395386,grad_norm: 0.9999989966188848, iteration: 55758
loss: 1.0382500886917114,grad_norm: 0.9999990969187277, iteration: 55759
loss: 1.0251778364181519,grad_norm: 0.9733483112486228, iteration: 55760
loss: 0.9962465763092041,grad_norm: 0.8915160515471792, iteration: 55761
loss: 0.9712691307067871,grad_norm: 0.9147898614514354, iteration: 55762
loss: 1.0027437210083008,grad_norm: 0.9999991380506569, iteration: 55763
loss: 1.022976279258728,grad_norm: 0.9999994199426665, iteration: 55764
loss: 0.9730048775672913,grad_norm: 0.9030360037464652, iteration: 55765
loss: 0.9967760443687439,grad_norm: 0.9999990614367853, iteration: 55766
loss: 1.0062859058380127,grad_norm: 0.9669595841084448, iteration: 55767
loss: 1.0055110454559326,grad_norm: 0.9652355031239948, iteration: 55768
loss: 1.0392546653747559,grad_norm: 0.9850279835361629, iteration: 55769
loss: 0.9867979884147644,grad_norm: 0.9999991789109575, iteration: 55770
loss: 1.0442503690719604,grad_norm: 0.9999995712044163, iteration: 55771
loss: 1.0185070037841797,grad_norm: 0.9772256527442836, iteration: 55772
loss: 1.0021113157272339,grad_norm: 0.985335851740325, iteration: 55773
loss: 1.0018154382705688,grad_norm: 0.9148827941635536, iteration: 55774
loss: 0.9806740880012512,grad_norm: 0.9999991045468586, iteration: 55775
loss: 1.0243237018585205,grad_norm: 0.9999991019150749, iteration: 55776
loss: 1.04685378074646,grad_norm: 0.9999990801679162, iteration: 55777
loss: 1.0313764810562134,grad_norm: 0.9999992067545418, iteration: 55778
loss: 0.9847137331962585,grad_norm: 0.9550207311710726, iteration: 55779
loss: 1.021187424659729,grad_norm: 0.9999991502571564, iteration: 55780
loss: 0.9666346311569214,grad_norm: 0.9999991919294573, iteration: 55781
loss: 0.9860493540763855,grad_norm: 0.9999988532184757, iteration: 55782
loss: 1.0082365274429321,grad_norm: 0.8744042893442466, iteration: 55783
loss: 1.002061128616333,grad_norm: 0.9999990999367391, iteration: 55784
loss: 1.004915475845337,grad_norm: 0.9999992392803158, iteration: 55785
loss: 0.9696292281150818,grad_norm: 0.9124937466388128, iteration: 55786
loss: 1.0143176317214966,grad_norm: 0.9999991489347011, iteration: 55787
loss: 0.99411940574646,grad_norm: 0.999999219562983, iteration: 55788
loss: 1.024379849433899,grad_norm: 0.9999991725720985, iteration: 55789
loss: 0.9842957258224487,grad_norm: 0.9999991023545931, iteration: 55790
loss: 0.9916285276412964,grad_norm: 0.9730951219306921, iteration: 55791
loss: 1.0394468307495117,grad_norm: 0.999998994233446, iteration: 55792
loss: 1.0081937313079834,grad_norm: 0.9999991025271857, iteration: 55793
loss: 0.9902752041816711,grad_norm: 0.9309290988251502, iteration: 55794
loss: 1.012571096420288,grad_norm: 0.9293864244223151, iteration: 55795
loss: 1.0311280488967896,grad_norm: 0.9332673858112007, iteration: 55796
loss: 0.9987772703170776,grad_norm: 0.7411616275420179, iteration: 55797
loss: 0.9768338203430176,grad_norm: 0.9768204559515757, iteration: 55798
loss: 1.0243170261383057,grad_norm: 0.9925961067889536, iteration: 55799
loss: 1.0171144008636475,grad_norm: 0.8702122653196193, iteration: 55800
loss: 1.0042790174484253,grad_norm: 0.9999990708939913, iteration: 55801
loss: 0.9958762526512146,grad_norm: 0.9999992001495858, iteration: 55802
loss: 0.9757941365242004,grad_norm: 0.955552449262405, iteration: 55803
loss: 1.000945806503296,grad_norm: 0.8933850363830494, iteration: 55804
loss: 0.9979281425476074,grad_norm: 0.8543918646221609, iteration: 55805
loss: 0.9834052324295044,grad_norm: 0.9999990463682512, iteration: 55806
loss: 0.9689806699752808,grad_norm: 0.9999991836220672, iteration: 55807
loss: 1.0200951099395752,grad_norm: 0.8844717199377107, iteration: 55808
loss: 1.0203766822814941,grad_norm: 0.8163218219143695, iteration: 55809
loss: 0.9906314611434937,grad_norm: 0.9520002289295286, iteration: 55810
loss: 1.0274964570999146,grad_norm: 0.9999990595888059, iteration: 55811
loss: 0.9905095100402832,grad_norm: 0.8674645307023308, iteration: 55812
loss: 1.0069780349731445,grad_norm: 0.9999989861896562, iteration: 55813
loss: 1.050334095954895,grad_norm: 0.9628658320061659, iteration: 55814
loss: 0.9646233320236206,grad_norm: 0.9999992936682535, iteration: 55815
loss: 0.998393177986145,grad_norm: 0.9730769763215995, iteration: 55816
loss: 1.012837529182434,grad_norm: 0.9999990454328119, iteration: 55817
loss: 1.019044280052185,grad_norm: 0.931014152624604, iteration: 55818
loss: 1.0106916427612305,grad_norm: 0.9999989755291125, iteration: 55819
loss: 1.0156813859939575,grad_norm: 0.8816607353737407, iteration: 55820
loss: 0.9758113026618958,grad_norm: 0.999998984784138, iteration: 55821
loss: 1.0002001523971558,grad_norm: 0.9999993254859993, iteration: 55822
loss: 1.038069725036621,grad_norm: 0.9999989189187201, iteration: 55823
loss: 1.0080657005310059,grad_norm: 0.9999989984195065, iteration: 55824
loss: 1.0035853385925293,grad_norm: 0.9999990470306129, iteration: 55825
loss: 0.9872655272483826,grad_norm: 0.999999091303081, iteration: 55826
loss: 1.0167025327682495,grad_norm: 0.9999992376553564, iteration: 55827
loss: 1.000043272972107,grad_norm: 0.9999990047346499, iteration: 55828
loss: 0.9721133708953857,grad_norm: 0.9999992486391897, iteration: 55829
loss: 1.0223356485366821,grad_norm: 0.947231141455614, iteration: 55830
loss: 1.0412970781326294,grad_norm: 0.8946947299151491, iteration: 55831
loss: 1.0037648677825928,grad_norm: 0.9999991965310256, iteration: 55832
loss: 0.9838227033615112,grad_norm: 0.9059406039769845, iteration: 55833
loss: 1.026459813117981,grad_norm: 0.9538377971812615, iteration: 55834
loss: 0.9873249530792236,grad_norm: 0.9520232912760038, iteration: 55835
loss: 1.00746750831604,grad_norm: 0.9999992061357788, iteration: 55836
loss: 1.0047144889831543,grad_norm: 0.9999991697127437, iteration: 55837
loss: 1.026559829711914,grad_norm: 0.9999992070463417, iteration: 55838
loss: 1.0063163042068481,grad_norm: 0.95328747029572, iteration: 55839
loss: 1.0044957399368286,grad_norm: 0.9999989926261681, iteration: 55840
loss: 1.049468994140625,grad_norm: 0.99999941066048, iteration: 55841
loss: 0.9706906080245972,grad_norm: 0.9999991824826597, iteration: 55842
loss: 0.9893892407417297,grad_norm: 0.9999991170104069, iteration: 55843
loss: 0.9735124707221985,grad_norm: 0.999999163985523, iteration: 55844
loss: 1.0444782972335815,grad_norm: 0.9999998315859652, iteration: 55845
loss: 0.9687376618385315,grad_norm: 0.9501554765977874, iteration: 55846
loss: 1.0062700510025024,grad_norm: 0.9999995369407202, iteration: 55847
loss: 1.0092772245407104,grad_norm: 0.9999990878652808, iteration: 55848
loss: 0.9519162774085999,grad_norm: 0.9822885910077147, iteration: 55849
loss: 0.9701823592185974,grad_norm: 0.9999991889972069, iteration: 55850
loss: 0.9871703386306763,grad_norm: 0.8803798005428951, iteration: 55851
loss: 1.0010864734649658,grad_norm: 0.9998184475424995, iteration: 55852
loss: 0.9949352145195007,grad_norm: 0.9999992131766565, iteration: 55853
loss: 0.9798868894577026,grad_norm: 0.9362474196804327, iteration: 55854
loss: 0.9941551089286804,grad_norm: 0.9999991770450187, iteration: 55855
loss: 1.0180302858352661,grad_norm: 0.9868085732558982, iteration: 55856
loss: 1.0105199813842773,grad_norm: 0.9999991643589801, iteration: 55857
loss: 0.9758810997009277,grad_norm: 0.8325104607044445, iteration: 55858
loss: 1.0281493663787842,grad_norm: 0.9941375728370928, iteration: 55859
loss: 0.9894489049911499,grad_norm: 0.9689907820043855, iteration: 55860
loss: 1.0010298490524292,grad_norm: 0.999999242313961, iteration: 55861
loss: 1.0350866317749023,grad_norm: 0.9659439486964587, iteration: 55862
loss: 0.9942336678504944,grad_norm: 0.9999991772989, iteration: 55863
loss: 0.9965730905532837,grad_norm: 0.9999996952126737, iteration: 55864
loss: 0.9893895387649536,grad_norm: 0.9985545503511298, iteration: 55865
loss: 1.01427161693573,grad_norm: 0.9087019364844776, iteration: 55866
loss: 0.9623013138771057,grad_norm: 0.9999990389816669, iteration: 55867
loss: 1.022492527961731,grad_norm: 0.9005373305818237, iteration: 55868
loss: 1.0063502788543701,grad_norm: 0.9999992586725002, iteration: 55869
loss: 0.9329273700714111,grad_norm: 0.9999992631268811, iteration: 55870
loss: 0.9927218556404114,grad_norm: 0.9999992525537895, iteration: 55871
loss: 1.0121910572052002,grad_norm: 0.999999032144688, iteration: 55872
loss: 1.0330619812011719,grad_norm: 0.9466081515313731, iteration: 55873
loss: 1.0302376747131348,grad_norm: 0.9375575542504756, iteration: 55874
loss: 1.040088415145874,grad_norm: 0.9999994417109256, iteration: 55875
loss: 0.9615846872329712,grad_norm: 0.9999991339528004, iteration: 55876
loss: 1.0104515552520752,grad_norm: 0.9999990465267661, iteration: 55877
loss: 0.9987604022026062,grad_norm: 0.8294436848519599, iteration: 55878
loss: 0.9901866912841797,grad_norm: 0.9323257292492799, iteration: 55879
loss: 0.9886072278022766,grad_norm: 0.9805013094279359, iteration: 55880
loss: 1.0405635833740234,grad_norm: 0.9999993357449751, iteration: 55881
loss: 1.0176594257354736,grad_norm: 0.9999991158322523, iteration: 55882
loss: 0.9911187887191772,grad_norm: 0.9188339229959471, iteration: 55883
loss: 0.9922546744346619,grad_norm: 0.999999029715502, iteration: 55884
loss: 0.9884111881256104,grad_norm: 0.9999990993184598, iteration: 55885
loss: 0.9896576404571533,grad_norm: 0.9382954525117908, iteration: 55886
loss: 1.0020983219146729,grad_norm: 0.9999992841574059, iteration: 55887
loss: 1.032659649848938,grad_norm: 0.9999992510562474, iteration: 55888
loss: 0.983471691608429,grad_norm: 0.999999420643642, iteration: 55889
loss: 1.0028634071350098,grad_norm: 0.9999992449384634, iteration: 55890
loss: 0.9748584032058716,grad_norm: 0.9497487543359902, iteration: 55891
loss: 1.026629090309143,grad_norm: 0.9999990586049952, iteration: 55892
loss: 0.9929251074790955,grad_norm: 0.9806728291704707, iteration: 55893
loss: 0.9911720156669617,grad_norm: 0.9412685906211192, iteration: 55894
loss: 0.9837858080863953,grad_norm: 0.9999990920289287, iteration: 55895
loss: 0.9660857915878296,grad_norm: 0.92182772210746, iteration: 55896
loss: 1.053017258644104,grad_norm: 0.9999994088554015, iteration: 55897
loss: 0.9506847858428955,grad_norm: 0.880645815715951, iteration: 55898
loss: 1.016829252243042,grad_norm: 0.9999991651257187, iteration: 55899
loss: 1.016500473022461,grad_norm: 0.9999994015537305, iteration: 55900
loss: 1.0229591131210327,grad_norm: 0.9999992604126199, iteration: 55901
loss: 1.01095712184906,grad_norm: 0.9999991289822552, iteration: 55902
loss: 1.0425386428833008,grad_norm: 0.9999990618392084, iteration: 55903
loss: 1.0143895149230957,grad_norm: 0.8841451772075878, iteration: 55904
loss: 1.0227177143096924,grad_norm: 0.9999992352752233, iteration: 55905
loss: 1.0173475742340088,grad_norm: 0.999999112906347, iteration: 55906
loss: 1.042277455329895,grad_norm: 0.9999990761111743, iteration: 55907
loss: 0.9277005195617676,grad_norm: 0.9999989916081661, iteration: 55908
loss: 1.000864863395691,grad_norm: 0.9999992019454949, iteration: 55909
loss: 1.0361547470092773,grad_norm: 0.9999997860144709, iteration: 55910
loss: 0.9681769013404846,grad_norm: 0.909694761343724, iteration: 55911
loss: 0.9914314150810242,grad_norm: 0.9999992309796286, iteration: 55912
loss: 1.0038679838180542,grad_norm: 0.974572625316335, iteration: 55913
loss: 0.9990919828414917,grad_norm: 0.9999990492152584, iteration: 55914
loss: 0.986760139465332,grad_norm: 0.9999991747746937, iteration: 55915
loss: 0.9893236756324768,grad_norm: 0.9999992017200512, iteration: 55916
loss: 0.9880905747413635,grad_norm: 0.9446800363364838, iteration: 55917
loss: 0.9864910244941711,grad_norm: 0.920232841551149, iteration: 55918
loss: 1.0183309316635132,grad_norm: 0.9999990587724987, iteration: 55919
loss: 1.0145909786224365,grad_norm: 0.9200068186783418, iteration: 55920
loss: 0.9791232347488403,grad_norm: 0.8103898176461659, iteration: 55921
loss: 0.9878625869750977,grad_norm: 0.999999233390904, iteration: 55922
loss: 1.0309560298919678,grad_norm: 0.9999991646641134, iteration: 55923
loss: 0.9639869332313538,grad_norm: 0.8860301484878311, iteration: 55924
loss: 1.0392946004867554,grad_norm: 0.9999991263393212, iteration: 55925
loss: 1.0144082307815552,grad_norm: 0.9999991979546866, iteration: 55926
loss: 1.0084139108657837,grad_norm: 0.9999997518812643, iteration: 55927
loss: 1.0085111856460571,grad_norm: 0.9999992094835725, iteration: 55928
loss: 0.9618775248527527,grad_norm: 0.9999990342336157, iteration: 55929
loss: 0.9991130232810974,grad_norm: 0.8852872788332838, iteration: 55930
loss: 0.9514349699020386,grad_norm: 0.9999990924381348, iteration: 55931
loss: 1.0328823328018188,grad_norm: 0.9999991143800884, iteration: 55932
loss: 0.979668915271759,grad_norm: 0.9854920331415565, iteration: 55933
loss: 1.0384318828582764,grad_norm: 0.8734355884218101, iteration: 55934
loss: 0.9637765288352966,grad_norm: 0.9999991484746737, iteration: 55935
loss: 0.992600679397583,grad_norm: 0.9999991334614607, iteration: 55936
loss: 1.0390591621398926,grad_norm: 0.9999991956342673, iteration: 55937
loss: 0.9873465895652771,grad_norm: 0.9204487456725087, iteration: 55938
loss: 0.9916170239448547,grad_norm: 0.9999991859206245, iteration: 55939
loss: 1.0259552001953125,grad_norm: 0.9661757986172154, iteration: 55940
loss: 0.9603551030158997,grad_norm: 0.9999990844345815, iteration: 55941
loss: 1.0320709943771362,grad_norm: 0.9999990401324698, iteration: 55942
loss: 1.004141092300415,grad_norm: 0.9999991713214198, iteration: 55943
loss: 1.0206259489059448,grad_norm: 0.9999991257345936, iteration: 55944
loss: 0.9985880255699158,grad_norm: 0.9999992686981286, iteration: 55945
loss: 0.9916269779205322,grad_norm: 0.9738005658046228, iteration: 55946
loss: 0.9903576374053955,grad_norm: 0.999999329521525, iteration: 55947
loss: 0.9649319052696228,grad_norm: 0.999999024030825, iteration: 55948
loss: 0.9947614669799805,grad_norm: 0.9999991394668668, iteration: 55949
loss: 0.9577746391296387,grad_norm: 0.9999990681683856, iteration: 55950
loss: 0.969806432723999,grad_norm: 0.9591212275886769, iteration: 55951
loss: 1.057335376739502,grad_norm: 0.9999991639487227, iteration: 55952
loss: 1.0036778450012207,grad_norm: 0.9670980514082052, iteration: 55953
loss: 1.0550990104675293,grad_norm: 0.9999999048214983, iteration: 55954
loss: 1.0004312992095947,grad_norm: 0.9999990715009648, iteration: 55955
loss: 0.9978817105293274,grad_norm: 0.9537157734999404, iteration: 55956
loss: 0.9897237420082092,grad_norm: 0.8283855161570788, iteration: 55957
loss: 0.9867777228355408,grad_norm: 0.999999038514595, iteration: 55958
loss: 1.0023980140686035,grad_norm: 0.9999992762356693, iteration: 55959
loss: 0.9845311641693115,grad_norm: 0.988310414110122, iteration: 55960
loss: 0.9824167490005493,grad_norm: 0.93725120171, iteration: 55961
loss: 0.9746466875076294,grad_norm: 0.9999990909430495, iteration: 55962
loss: 1.013918161392212,grad_norm: 0.9999991185941529, iteration: 55963
loss: 1.0075631141662598,grad_norm: 0.9307706867373837, iteration: 55964
loss: 1.0081655979156494,grad_norm: 0.9999990063397312, iteration: 55965
loss: 1.0297389030456543,grad_norm: 0.9999991594400169, iteration: 55966
loss: 1.003759741783142,grad_norm: 0.9999990974952547, iteration: 55967
loss: 1.022603988647461,grad_norm: 0.9999993055229729, iteration: 55968
loss: 0.9946702122688293,grad_norm: 0.9999994561730551, iteration: 55969
loss: 1.0245252847671509,grad_norm: 0.9999993089539072, iteration: 55970
loss: 1.0200237035751343,grad_norm: 0.9999991494976423, iteration: 55971
loss: 0.9944186210632324,grad_norm: 0.8730743560411505, iteration: 55972
loss: 1.010925054550171,grad_norm: 0.99999928563773, iteration: 55973
loss: 0.9945335984230042,grad_norm: 0.8897062952913778, iteration: 55974
loss: 1.0358816385269165,grad_norm: 0.9999990186284796, iteration: 55975
loss: 0.9896328449249268,grad_norm: 0.9999991315233874, iteration: 55976
loss: 1.0255365371704102,grad_norm: 0.9999994594886826, iteration: 55977
loss: 1.007446527481079,grad_norm: 0.9999997366910579, iteration: 55978
loss: 0.9609079957008362,grad_norm: 0.8834414226084345, iteration: 55979
loss: 1.0127182006835938,grad_norm: 0.9999990427399384, iteration: 55980
loss: 1.0092277526855469,grad_norm: 0.865361844642252, iteration: 55981
loss: 1.0186043977737427,grad_norm: 0.8717865697625113, iteration: 55982
loss: 0.9695827960968018,grad_norm: 0.999999222194994, iteration: 55983
loss: 1.0267475843429565,grad_norm: 0.9999990365729129, iteration: 55984
loss: 0.9865260720252991,grad_norm: 0.7593648752135004, iteration: 55985
loss: 1.022200584411621,grad_norm: 0.9266972826327541, iteration: 55986
loss: 0.9787436127662659,grad_norm: 0.9999990642576266, iteration: 55987
loss: 1.0467947721481323,grad_norm: 0.999999182154556, iteration: 55988
loss: 0.9810203313827515,grad_norm: 0.9999991148788897, iteration: 55989
loss: 0.9777839183807373,grad_norm: 0.9999992749722268, iteration: 55990
loss: 0.9622828364372253,grad_norm: 0.9742339649552736, iteration: 55991
loss: 0.9849531650543213,grad_norm: 0.999999531566417, iteration: 55992
loss: 1.0058598518371582,grad_norm: 0.9999991751997239, iteration: 55993
loss: 1.0245522260665894,grad_norm: 0.9999991738918349, iteration: 55994
loss: 0.999778151512146,grad_norm: 0.9278295253266449, iteration: 55995
loss: 1.0156883001327515,grad_norm: 0.9914766148256458, iteration: 55996
loss: 0.9889928102493286,grad_norm: 0.9418022889744652, iteration: 55997
loss: 0.9776798486709595,grad_norm: 0.9864056761259735, iteration: 55998
loss: 1.0606675148010254,grad_norm: 0.9999996099033457, iteration: 55999
loss: 0.9867141842842102,grad_norm: 0.9208555278865868, iteration: 56000
loss: 0.9873225688934326,grad_norm: 0.9999991770617002, iteration: 56001
loss: 0.9857193827629089,grad_norm: 0.8813020614538775, iteration: 56002
loss: 1.0320920944213867,grad_norm: 0.9999994135812365, iteration: 56003
loss: 1.023939847946167,grad_norm: 0.999999201347386, iteration: 56004
loss: 0.9698770642280579,grad_norm: 0.9592677044831193, iteration: 56005
loss: 1.0235984325408936,grad_norm: 0.9999992178712529, iteration: 56006
loss: 0.9897180795669556,grad_norm: 0.902249722140269, iteration: 56007
loss: 0.9923376441001892,grad_norm: 0.8828864361052632, iteration: 56008
loss: 1.0244542360305786,grad_norm: 0.9999991347967194, iteration: 56009
loss: 1.0571173429489136,grad_norm: 0.9999992876323669, iteration: 56010
loss: 1.0195263624191284,grad_norm: 0.9999990264066975, iteration: 56011
loss: 0.9895723462104797,grad_norm: 0.9999991870993427, iteration: 56012
loss: 0.9744510650634766,grad_norm: 0.999999082090297, iteration: 56013
loss: 1.081958293914795,grad_norm: 0.9999995539825325, iteration: 56014
loss: 0.9643607139587402,grad_norm: 0.999999262573425, iteration: 56015
loss: 1.0082298517227173,grad_norm: 0.9999996386845247, iteration: 56016
loss: 1.0122257471084595,grad_norm: 0.9627248907131966, iteration: 56017
loss: 1.0568634271621704,grad_norm: 0.9999996065946888, iteration: 56018
loss: 1.0404469966888428,grad_norm: 0.9266998863747699, iteration: 56019
loss: 0.9749846458435059,grad_norm: 0.999999243421821, iteration: 56020
loss: 1.03011953830719,grad_norm: 0.9269933875625678, iteration: 56021
loss: 1.0098891258239746,grad_norm: 0.999999489940361, iteration: 56022
loss: 1.0045180320739746,grad_norm: 0.9507274578769537, iteration: 56023
loss: 1.0351651906967163,grad_norm: 0.9999991675710155, iteration: 56024
loss: 1.0335959196090698,grad_norm: 0.9999990480252117, iteration: 56025
loss: 1.0097758769989014,grad_norm: 0.9999991953162866, iteration: 56026
loss: 1.0329281091690063,grad_norm: 0.9464878334232933, iteration: 56027
loss: 1.0664448738098145,grad_norm: 0.9999993266025466, iteration: 56028
loss: 0.9774590730667114,grad_norm: 0.9676434816666067, iteration: 56029
loss: 1.009128451347351,grad_norm: 0.9216972240070023, iteration: 56030
loss: 0.9730952978134155,grad_norm: 0.9999991202044234, iteration: 56031
loss: 0.9729748964309692,grad_norm: 0.9999989518799558, iteration: 56032
loss: 1.0201221704483032,grad_norm: 0.9999991253459083, iteration: 56033
loss: 1.0314325094223022,grad_norm: 0.9999991795677765, iteration: 56034
loss: 1.0069531202316284,grad_norm: 0.9999991641780884, iteration: 56035
loss: 0.9649068117141724,grad_norm: 0.9999991072402727, iteration: 56036
loss: 1.0344141721725464,grad_norm: 0.9999991097679819, iteration: 56037
loss: 1.0233601331710815,grad_norm: 0.9999993587239104, iteration: 56038
loss: 1.0013039112091064,grad_norm: 0.9104059868387557, iteration: 56039
loss: 0.9901658296585083,grad_norm: 0.9999990595866796, iteration: 56040
loss: 0.994479775428772,grad_norm: 0.9999989848223364, iteration: 56041
loss: 1.0144401788711548,grad_norm: 0.9999993441914373, iteration: 56042
loss: 1.0228352546691895,grad_norm: 0.999999491966219, iteration: 56043
loss: 1.008621096611023,grad_norm: 0.9363542226380297, iteration: 56044
loss: 0.9838188886642456,grad_norm: 0.978595701364572, iteration: 56045
loss: 1.0226402282714844,grad_norm: 0.9999994665165796, iteration: 56046
loss: 1.0071197748184204,grad_norm: 0.999999034513448, iteration: 56047
loss: 1.0001797676086426,grad_norm: 0.9999991008418231, iteration: 56048
loss: 1.0405585765838623,grad_norm: 0.9949817501322317, iteration: 56049
loss: 0.9768700003623962,grad_norm: 0.9999990524701745, iteration: 56050
loss: 0.996311604976654,grad_norm: 0.9999992208754653, iteration: 56051
loss: 1.0323618650436401,grad_norm: 0.9800629212773099, iteration: 56052
loss: 1.0079187154769897,grad_norm: 0.9355612732139419, iteration: 56053
loss: 1.0067381858825684,grad_norm: 0.9999992662486964, iteration: 56054
loss: 1.0108860731124878,grad_norm: 0.9326045520607034, iteration: 56055
loss: 1.0267407894134521,grad_norm: 0.9999998921424915, iteration: 56056
loss: 0.9690774083137512,grad_norm: 0.9000583637098486, iteration: 56057
loss: 1.0203479528427124,grad_norm: 0.9895397516598754, iteration: 56058
loss: 0.9960002899169922,grad_norm: 0.9999991371338292, iteration: 56059
loss: 1.0135306119918823,grad_norm: 0.9999991162386531, iteration: 56060
loss: 0.9843552708625793,grad_norm: 0.9999992030918572, iteration: 56061
loss: 1.0349833965301514,grad_norm: 0.9999990504379958, iteration: 56062
loss: 0.9979950189590454,grad_norm: 0.9619295869025718, iteration: 56063
loss: 0.9946044683456421,grad_norm: 0.9999992923102994, iteration: 56064
loss: 0.9961022138595581,grad_norm: 0.871629179673126, iteration: 56065
loss: 1.007187008857727,grad_norm: 0.9093026052491491, iteration: 56066
loss: 1.0004984140396118,grad_norm: 0.9999991694997321, iteration: 56067
loss: 1.0083593130111694,grad_norm: 0.9999992332024444, iteration: 56068
loss: 1.0173543691635132,grad_norm: 0.82051236798582, iteration: 56069
loss: 1.009519100189209,grad_norm: 0.9999991291436932, iteration: 56070
loss: 0.9934828281402588,grad_norm: 0.9999990901102392, iteration: 56071
loss: 1.029457688331604,grad_norm: 0.9701423630283952, iteration: 56072
loss: 0.9935175776481628,grad_norm: 0.8520297571687518, iteration: 56073
loss: 0.9775409698486328,grad_norm: 0.8432581328966874, iteration: 56074
loss: 1.0017656087875366,grad_norm: 0.8830513504558865, iteration: 56075
loss: 0.9824308753013611,grad_norm: 0.9293396527673444, iteration: 56076
loss: 1.0272786617279053,grad_norm: 0.8621085284583676, iteration: 56077
loss: 0.9641473889350891,grad_norm: 0.9239232821729223, iteration: 56078
loss: 0.973588228225708,grad_norm: 0.9999990207931583, iteration: 56079
loss: 1.0303481817245483,grad_norm: 0.9968836664379784, iteration: 56080
loss: 1.0119669437408447,grad_norm: 0.9635639475000801, iteration: 56081
loss: 1.0275540351867676,grad_norm: 0.9334184687184788, iteration: 56082
loss: 1.0201741456985474,grad_norm: 0.9931967331698675, iteration: 56083
loss: 1.0065968036651611,grad_norm: 0.9999990250644195, iteration: 56084
loss: 0.9487167000770569,grad_norm: 0.8531379990169007, iteration: 56085
loss: 1.0161962509155273,grad_norm: 0.9417237397338792, iteration: 56086
loss: 1.012380599975586,grad_norm: 0.9986680610540224, iteration: 56087
loss: 0.985130786895752,grad_norm: 0.8586240989986875, iteration: 56088
loss: 1.0146400928497314,grad_norm: 0.9831832994514318, iteration: 56089
loss: 0.9933381080627441,grad_norm: 0.9999992054362435, iteration: 56090
loss: 1.018072247505188,grad_norm: 0.8790005555375555, iteration: 56091
loss: 1.0235974788665771,grad_norm: 0.9999995128156426, iteration: 56092
loss: 1.033447265625,grad_norm: 0.9999991853245722, iteration: 56093
loss: 0.9989769458770752,grad_norm: 0.9999999359688204, iteration: 56094
loss: 1.0143144130706787,grad_norm: 0.9999991021623663, iteration: 56095
loss: 0.9958423376083374,grad_norm: 0.9999990343812177, iteration: 56096
loss: 1.0315914154052734,grad_norm: 0.9999991348662615, iteration: 56097
loss: 0.9626340270042419,grad_norm: 0.8998858459877814, iteration: 56098
loss: 0.9944181442260742,grad_norm: 0.9999992207506075, iteration: 56099
loss: 1.0340800285339355,grad_norm: 0.9905841465055718, iteration: 56100
loss: 0.989385187625885,grad_norm: 0.9708115165687609, iteration: 56101
loss: 0.9753647446632385,grad_norm: 0.9999990070368313, iteration: 56102
loss: 1.016005516052246,grad_norm: 0.999999162285458, iteration: 56103
loss: 1.0357892513275146,grad_norm: 0.9999992761357036, iteration: 56104
loss: 1.0104368925094604,grad_norm: 0.9705021415034734, iteration: 56105
loss: 0.958952009677887,grad_norm: 0.9579538899782228, iteration: 56106
loss: 1.0291721820831299,grad_norm: 0.9999991067939169, iteration: 56107
loss: 1.042952537536621,grad_norm: 0.9999991954781676, iteration: 56108
loss: 0.9892272353172302,grad_norm: 0.9728970886099059, iteration: 56109
loss: 0.9536659121513367,grad_norm: 0.9999992592884447, iteration: 56110
loss: 0.9888894557952881,grad_norm: 0.9608135266753611, iteration: 56111
loss: 0.9927046298980713,grad_norm: 0.9999992394858624, iteration: 56112
loss: 0.9845364093780518,grad_norm: 0.9999990131251815, iteration: 56113
loss: 0.9713361859321594,grad_norm: 0.9999993053079729, iteration: 56114
loss: 1.0153785943984985,grad_norm: 0.9999995247000056, iteration: 56115
loss: 1.025688886642456,grad_norm: 0.9999994100890535, iteration: 56116
loss: 1.005778193473816,grad_norm: 0.9999989801395381, iteration: 56117
loss: 0.9750158190727234,grad_norm: 0.9086781438069514, iteration: 56118
loss: 1.0100055932998657,grad_norm: 0.9999990907685221, iteration: 56119
loss: 0.9970299005508423,grad_norm: 0.9999990294300474, iteration: 56120
loss: 1.0103720426559448,grad_norm: 0.9653692829595416, iteration: 56121
loss: 0.9953688979148865,grad_norm: 0.9999989795480592, iteration: 56122
loss: 1.0299382209777832,grad_norm: 0.920416936262222, iteration: 56123
loss: 0.9747681617736816,grad_norm: 0.9063527141849109, iteration: 56124
loss: 1.016294002532959,grad_norm: 0.9999990908598027, iteration: 56125
loss: 1.020005464553833,grad_norm: 0.9999992592673286, iteration: 56126
loss: 0.9939159154891968,grad_norm: 0.9065646821175836, iteration: 56127
loss: 0.9742901921272278,grad_norm: 0.9999992885096728, iteration: 56128
loss: 1.0096911191940308,grad_norm: 0.8690186805856702, iteration: 56129
loss: 0.9534913301467896,grad_norm: 0.9999991576766085, iteration: 56130
loss: 1.0217331647872925,grad_norm: 0.9981967467422888, iteration: 56131
loss: 1.0054914951324463,grad_norm: 0.945762193195321, iteration: 56132
loss: 1.118960976600647,grad_norm: 0.9999994960527387, iteration: 56133
loss: 1.0131754875183105,grad_norm: 0.9913334435847551, iteration: 56134
loss: 0.9849994778633118,grad_norm: 0.99999919126489, iteration: 56135
loss: 1.0471775531768799,grad_norm: 0.9999993737849541, iteration: 56136
loss: 1.0668779611587524,grad_norm: 0.9999996275150873, iteration: 56137
loss: 1.0295255184173584,grad_norm: 0.9999991649972738, iteration: 56138
loss: 1.0177727937698364,grad_norm: 0.908205388712236, iteration: 56139
loss: 0.9863783121109009,grad_norm: 0.9999990228540021, iteration: 56140
loss: 1.011045217514038,grad_norm: 0.9999990608939926, iteration: 56141
loss: 1.0379363298416138,grad_norm: 0.9754998267606071, iteration: 56142
loss: 1.0035583972930908,grad_norm: 0.9999990807023587, iteration: 56143
loss: 1.0180250406265259,grad_norm: 0.9391294178308082, iteration: 56144
loss: 1.0594056844711304,grad_norm: 0.9974022301885388, iteration: 56145
loss: 0.9983875155448914,grad_norm: 0.9999990641923653, iteration: 56146
loss: 1.0422742366790771,grad_norm: 0.921020382616511, iteration: 56147
loss: 1.0105348825454712,grad_norm: 0.9999990094630701, iteration: 56148
loss: 1.0199997425079346,grad_norm: 0.878539130103824, iteration: 56149
loss: 1.0136771202087402,grad_norm: 0.9999990941069683, iteration: 56150
loss: 0.9924437403678894,grad_norm: 0.9999991160537489, iteration: 56151
loss: 0.9645538330078125,grad_norm: 0.9999992332691587, iteration: 56152
loss: 0.9938681721687317,grad_norm: 0.9161800956347428, iteration: 56153
loss: 0.9576652646064758,grad_norm: 0.996905277771601, iteration: 56154
loss: 0.9885098934173584,grad_norm: 0.9547937344602154, iteration: 56155
loss: 1.0263259410858154,grad_norm: 0.9999991346909634, iteration: 56156
loss: 1.0130325555801392,grad_norm: 0.9999991749985495, iteration: 56157
loss: 1.0216392278671265,grad_norm: 0.9301214709414265, iteration: 56158
loss: 0.9939988851547241,grad_norm: 0.9818913157319789, iteration: 56159
loss: 0.9961265325546265,grad_norm: 0.9566400755905338, iteration: 56160
loss: 1.002535104751587,grad_norm: 0.9642578368018215, iteration: 56161
loss: 0.9963422417640686,grad_norm: 0.9999993413798839, iteration: 56162
loss: 1.032608985900879,grad_norm: 0.9860732609416742, iteration: 56163
loss: 1.0205659866333008,grad_norm: 0.9999992428982541, iteration: 56164
loss: 1.039927363395691,grad_norm: 0.9933725484357369, iteration: 56165
loss: 0.9840288758277893,grad_norm: 0.9999990181925799, iteration: 56166
loss: 0.9733004570007324,grad_norm: 0.9698274616844598, iteration: 56167
loss: 1.0068726539611816,grad_norm: 0.9232387821683592, iteration: 56168
loss: 0.99253249168396,grad_norm: 0.9999990683528593, iteration: 56169
loss: 0.9946447014808655,grad_norm: 0.9999991833849694, iteration: 56170
loss: 0.9754005074501038,grad_norm: 0.9999990466159802, iteration: 56171
loss: 1.0157865285873413,grad_norm: 0.999999263314966, iteration: 56172
loss: 0.9659613966941833,grad_norm: 0.9999991673908971, iteration: 56173
loss: 0.9499672651290894,grad_norm: 0.9999993584695749, iteration: 56174
loss: 0.9808390140533447,grad_norm: 0.9392047059460806, iteration: 56175
loss: 1.0338035821914673,grad_norm: 0.9444513115581437, iteration: 56176
loss: 0.9942582249641418,grad_norm: 0.8447014502364695, iteration: 56177
loss: 1.0163862705230713,grad_norm: 0.9922557870396337, iteration: 56178
loss: 0.9693382978439331,grad_norm: 0.9999991421236959, iteration: 56179
loss: 0.9960737228393555,grad_norm: 0.9999991153462564, iteration: 56180
loss: 0.9654808044433594,grad_norm: 0.9705264005457793, iteration: 56181
loss: 1.039463758468628,grad_norm: 0.9999990920493877, iteration: 56182
loss: 1.025464415550232,grad_norm: 0.9789257367228941, iteration: 56183
loss: 0.9966468811035156,grad_norm: 0.867122549926664, iteration: 56184
loss: 0.98273104429245,grad_norm: 0.999999131450466, iteration: 56185
loss: 1.063822865486145,grad_norm: 0.9999992731810463, iteration: 56186
loss: 0.9783865809440613,grad_norm: 0.9654623393980513, iteration: 56187
loss: 0.9889841079711914,grad_norm: 0.9999992980681126, iteration: 56188
loss: 0.9893499612808228,grad_norm: 0.8549475922136177, iteration: 56189
loss: 0.99556565284729,grad_norm: 0.9999989840526207, iteration: 56190
loss: 1.0223160982131958,grad_norm: 0.8515810159739003, iteration: 56191
loss: 0.9887171387672424,grad_norm: 0.9497104971311701, iteration: 56192
loss: 0.9879338145256042,grad_norm: 0.9999991116247147, iteration: 56193
loss: 0.9786465167999268,grad_norm: 0.8638449207847476, iteration: 56194
loss: 0.9988493323326111,grad_norm: 0.9816741070011203, iteration: 56195
loss: 1.0008281469345093,grad_norm: 0.9820851847250576, iteration: 56196
loss: 1.0054081678390503,grad_norm: 0.9611548713087361, iteration: 56197
loss: 1.002320647239685,grad_norm: 0.9646131056807286, iteration: 56198
loss: 0.9922382831573486,grad_norm: 0.9547021803742483, iteration: 56199
loss: 1.0153474807739258,grad_norm: 0.9999991517501842, iteration: 56200
loss: 1.0107181072235107,grad_norm: 0.9999990244743309, iteration: 56201
loss: 0.9844015836715698,grad_norm: 0.9999990546361293, iteration: 56202
loss: 1.0677751302719116,grad_norm: 0.9999992377126564, iteration: 56203
loss: 1.0085195302963257,grad_norm: 0.8978611288832247, iteration: 56204
loss: 0.9994006156921387,grad_norm: 0.9190367555138487, iteration: 56205
loss: 0.9980037808418274,grad_norm: 0.9186732112329798, iteration: 56206
loss: 0.9916060566902161,grad_norm: 0.9999990466963347, iteration: 56207
loss: 1.0014064311981201,grad_norm: 0.8527536390096281, iteration: 56208
loss: 0.9803733229637146,grad_norm: 0.9613038407281914, iteration: 56209
loss: 0.9882750511169434,grad_norm: 0.999999071069162, iteration: 56210
loss: 0.9702308177947998,grad_norm: 0.9999990733394736, iteration: 56211
loss: 1.0262867212295532,grad_norm: 0.9999990864905438, iteration: 56212
loss: 1.0526139736175537,grad_norm: 0.9999992211706046, iteration: 56213
loss: 1.008715271949768,grad_norm: 0.9999991159451839, iteration: 56214
loss: 1.0089733600616455,grad_norm: 0.999999100458608, iteration: 56215
loss: 0.9728134870529175,grad_norm: 0.9999990356364512, iteration: 56216
loss: 1.0035316944122314,grad_norm: 0.9999990455489907, iteration: 56217
loss: 1.0198296308517456,grad_norm: 0.9999989332980769, iteration: 56218
loss: 1.0043613910675049,grad_norm: 0.9501489982419681, iteration: 56219
loss: 0.9711427688598633,grad_norm: 0.9999991322805466, iteration: 56220
loss: 0.952742338180542,grad_norm: 0.9954885133720152, iteration: 56221
loss: 1.0621659755706787,grad_norm: 0.9999991551037447, iteration: 56222
loss: 0.9864984154701233,grad_norm: 0.9719068302553979, iteration: 56223
loss: 0.9842135310173035,grad_norm: 0.9195028755837411, iteration: 56224
loss: 1.0156561136245728,grad_norm: 0.981058511477615, iteration: 56225
loss: 1.006898283958435,grad_norm: 0.9999990335823368, iteration: 56226
loss: 1.0277113914489746,grad_norm: 0.99999930845322, iteration: 56227
loss: 1.015790581703186,grad_norm: 0.9999990860363871, iteration: 56228
loss: 0.9482603073120117,grad_norm: 0.9294842564256637, iteration: 56229
loss: 0.9822978377342224,grad_norm: 0.9039064076125193, iteration: 56230
loss: 1.010059118270874,grad_norm: 0.9999994548905855, iteration: 56231
loss: 0.9777984619140625,grad_norm: 0.971147528672784, iteration: 56232
loss: 0.9769159555435181,grad_norm: 0.9999992665032896, iteration: 56233
loss: 0.9815067052841187,grad_norm: 0.9999990493367887, iteration: 56234
loss: 0.9854685068130493,grad_norm: 0.9999989598214749, iteration: 56235
loss: 1.006687879562378,grad_norm: 0.9999990965969868, iteration: 56236
loss: 1.0605731010437012,grad_norm: 0.9999991891925365, iteration: 56237
loss: 1.0500792264938354,grad_norm: 0.9999991998043813, iteration: 56238
loss: 1.0115336179733276,grad_norm: 0.9999990852333276, iteration: 56239
loss: 1.0465327501296997,grad_norm: 0.9504600133544864, iteration: 56240
loss: 1.017729640007019,grad_norm: 0.9698163799812621, iteration: 56241
loss: 1.0146751403808594,grad_norm: 0.9466709084688943, iteration: 56242
loss: 1.0156432390213013,grad_norm: 0.834266641367644, iteration: 56243
loss: 0.9748455286026001,grad_norm: 0.9999990215850386, iteration: 56244
loss: 0.9951612949371338,grad_norm: 0.9999990541628179, iteration: 56245
loss: 0.9987326264381409,grad_norm: 0.9999992750918401, iteration: 56246
loss: 0.9779183864593506,grad_norm: 0.9731600482050939, iteration: 56247
loss: 0.9972976446151733,grad_norm: 0.9085644115418581, iteration: 56248
loss: 0.9983706474304199,grad_norm: 0.9223377082462767, iteration: 56249
loss: 1.0233135223388672,grad_norm: 0.9999992770569598, iteration: 56250
loss: 0.9741989970207214,grad_norm: 0.9999992272865346, iteration: 56251
loss: 0.9721575975418091,grad_norm: 0.9999990893411524, iteration: 56252
loss: 1.0337471961975098,grad_norm: 0.9999991760874224, iteration: 56253
loss: 1.0374706983566284,grad_norm: 0.9255785305962956, iteration: 56254
loss: 0.9831088781356812,grad_norm: 0.9999991731793137, iteration: 56255
loss: 0.9931878447532654,grad_norm: 0.9999992838914707, iteration: 56256
loss: 0.9916547536849976,grad_norm: 0.9312324509976504, iteration: 56257
loss: 1.0492173433303833,grad_norm: 0.9999999652154393, iteration: 56258
loss: 0.9950937628746033,grad_norm: 0.9120376427454662, iteration: 56259
loss: 0.9997823238372803,grad_norm: 0.9640977325016663, iteration: 56260
loss: 1.003206729888916,grad_norm: 0.9999991097128286, iteration: 56261
loss: 1.019170880317688,grad_norm: 0.9999991256523563, iteration: 56262
loss: 0.9875958561897278,grad_norm: 0.9787878822847784, iteration: 56263
loss: 1.0098751783370972,grad_norm: 0.9546716904513353, iteration: 56264
loss: 1.0058530569076538,grad_norm: 0.8480369738762437, iteration: 56265
loss: 1.0135693550109863,grad_norm: 0.967443833463668, iteration: 56266
loss: 1.0050581693649292,grad_norm: 0.9918347673540332, iteration: 56267
loss: 1.0096818208694458,grad_norm: 0.999999236251607, iteration: 56268
loss: 1.020880937576294,grad_norm: 0.9999990370109816, iteration: 56269
loss: 1.0000361204147339,grad_norm: 0.9330496337515868, iteration: 56270
loss: 0.9734355211257935,grad_norm: 0.9879696433365375, iteration: 56271
loss: 0.9914035797119141,grad_norm: 0.9680419545854871, iteration: 56272
loss: 0.9653008580207825,grad_norm: 0.9579584253979596, iteration: 56273
loss: 1.0310910940170288,grad_norm: 0.9999991156490039, iteration: 56274
loss: 1.012213945388794,grad_norm: 0.9999990978494528, iteration: 56275
loss: 1.0383492708206177,grad_norm: 0.9999989786404075, iteration: 56276
loss: 0.9893586039543152,grad_norm: 0.9999990642602418, iteration: 56277
loss: 0.9784927368164062,grad_norm: 0.955735304429351, iteration: 56278
loss: 1.0001603364944458,grad_norm: 0.8646662664984555, iteration: 56279
loss: 1.0683168172836304,grad_norm: 0.9999992348493713, iteration: 56280
loss: 1.0136005878448486,grad_norm: 0.9999993528926671, iteration: 56281
loss: 0.9760019779205322,grad_norm: 0.9797374786430324, iteration: 56282
loss: 1.014412522315979,grad_norm: 0.9937702200214673, iteration: 56283
loss: 0.982445240020752,grad_norm: 0.9999990030960052, iteration: 56284
loss: 0.9734286069869995,grad_norm: 0.9999991549864984, iteration: 56285
loss: 1.0289809703826904,grad_norm: 0.9999992380554337, iteration: 56286
loss: 1.0786806344985962,grad_norm: 0.9999990935985612, iteration: 56287
loss: 1.0354540348052979,grad_norm: 0.9274685335058122, iteration: 56288
loss: 0.9750215411186218,grad_norm: 0.993649411955372, iteration: 56289
loss: 1.0028132200241089,grad_norm: 0.9999990582403367, iteration: 56290
loss: 0.9663335084915161,grad_norm: 0.9999989736859108, iteration: 56291
loss: 1.017637848854065,grad_norm: 0.9999990406607967, iteration: 56292
loss: 1.0241206884384155,grad_norm: 0.9999992302560358, iteration: 56293
loss: 1.0092799663543701,grad_norm: 0.9999991635763597, iteration: 56294
loss: 1.005391001701355,grad_norm: 0.8710244575976739, iteration: 56295
loss: 1.0216336250305176,grad_norm: 0.9115401816606431, iteration: 56296
loss: 0.9738372564315796,grad_norm: 0.9177812173641561, iteration: 56297
loss: 1.0057357549667358,grad_norm: 0.9512296313310862, iteration: 56298
loss: 0.9984071850776672,grad_norm: 0.9999992772894233, iteration: 56299
loss: 0.9776723384857178,grad_norm: 0.9999989969312536, iteration: 56300
loss: 1.0058903694152832,grad_norm: 0.9999992445366959, iteration: 56301
loss: 0.9502730369567871,grad_norm: 0.9577771437642508, iteration: 56302
loss: 1.0165891647338867,grad_norm: 0.9999991501384987, iteration: 56303
loss: 1.0366575717926025,grad_norm: 0.9999989935361531, iteration: 56304
loss: 0.9672083854675293,grad_norm: 0.9999990384461758, iteration: 56305
loss: 0.9993207454681396,grad_norm: 0.9999992385882857, iteration: 56306
loss: 1.0003963708877563,grad_norm: 0.9999991673043223, iteration: 56307
loss: 0.9996436238288879,grad_norm: 0.9999991488139744, iteration: 56308
loss: 0.9933308959007263,grad_norm: 0.7619362749208696, iteration: 56309
loss: 1.0255659818649292,grad_norm: 0.9999991397293159, iteration: 56310
loss: 0.9686054587364197,grad_norm: 0.9999992867554427, iteration: 56311
loss: 1.0501240491867065,grad_norm: 0.9999992295981813, iteration: 56312
loss: 1.030380129814148,grad_norm: 0.9999990297733704, iteration: 56313
loss: 0.9895564317703247,grad_norm: 0.8613680547702922, iteration: 56314
loss: 1.0134958028793335,grad_norm: 0.9999990676398661, iteration: 56315
loss: 0.9840021729469299,grad_norm: 0.9999990967727422, iteration: 56316
loss: 1.0597892999649048,grad_norm: 0.999999176246735, iteration: 56317
loss: 0.9688166975975037,grad_norm: 0.999999272331188, iteration: 56318
loss: 0.9783101081848145,grad_norm: 0.9196096913946766, iteration: 56319
loss: 1.0335475206375122,grad_norm: 0.9999994079521406, iteration: 56320
loss: 0.98162841796875,grad_norm: 0.9347190694446141, iteration: 56321
loss: 0.999498188495636,grad_norm: 0.9999991934342889, iteration: 56322
loss: 0.9871904253959656,grad_norm: 0.9987014876195229, iteration: 56323
loss: 1.0001568794250488,grad_norm: 0.9803995944712146, iteration: 56324
loss: 0.9924893975257874,grad_norm: 0.9147237438441301, iteration: 56325
loss: 0.995988130569458,grad_norm: 0.9999990249248527, iteration: 56326
loss: 1.0157530307769775,grad_norm: 0.9999990440723074, iteration: 56327
loss: 0.9844490885734558,grad_norm: 0.9999990696939653, iteration: 56328
loss: 1.0055540800094604,grad_norm: 0.9646424031380101, iteration: 56329
loss: 0.9971911311149597,grad_norm: 0.9085884066535332, iteration: 56330
loss: 1.0456335544586182,grad_norm: 0.9999992188619988, iteration: 56331
loss: 0.9850553274154663,grad_norm: 0.9999991412897172, iteration: 56332
loss: 0.9867608547210693,grad_norm: 0.9416793599490447, iteration: 56333
loss: 1.023952603340149,grad_norm: 0.9431402711644851, iteration: 56334
loss: 1.0077654123306274,grad_norm: 0.9650487008479189, iteration: 56335
loss: 0.9680205583572388,grad_norm: 0.9999990626859513, iteration: 56336
loss: 0.9633200168609619,grad_norm: 0.9999990919729187, iteration: 56337
loss: 1.0200251340866089,grad_norm: 0.9999991412726893, iteration: 56338
loss: 0.9929302930831909,grad_norm: 0.8757636184037892, iteration: 56339
loss: 0.9729982614517212,grad_norm: 0.9999989823415352, iteration: 56340
loss: 1.00472092628479,grad_norm: 0.9087290977278202, iteration: 56341
loss: 1.0180922746658325,grad_norm: 0.9999991373491305, iteration: 56342
loss: 0.9951649904251099,grad_norm: 0.978316558575184, iteration: 56343
loss: 1.0159190893173218,grad_norm: 0.9999993609306954, iteration: 56344
loss: 0.988440752029419,grad_norm: 0.920095979304298, iteration: 56345
loss: 0.9842506647109985,grad_norm: 0.9855387610720823, iteration: 56346
loss: 0.9876000881195068,grad_norm: 0.993744658266658, iteration: 56347
loss: 1.0232915878295898,grad_norm: 0.9333637088301544, iteration: 56348
loss: 0.9858515858650208,grad_norm: 0.9999992230859773, iteration: 56349
loss: 0.9792096614837646,grad_norm: 0.9999991408216967, iteration: 56350
loss: 1.0022035837173462,grad_norm: 0.9999991648180051, iteration: 56351
loss: 1.0047719478607178,grad_norm: 0.9348571847249091, iteration: 56352
loss: 1.0070469379425049,grad_norm: 0.9999990378594394, iteration: 56353
loss: 0.9683003425598145,grad_norm: 0.9999992985547942, iteration: 56354
loss: 0.9889048337936401,grad_norm: 0.9999990557514474, iteration: 56355
loss: 1.012772798538208,grad_norm: 0.9999990479221399, iteration: 56356
loss: 0.9805706739425659,grad_norm: 0.9999990687081365, iteration: 56357
loss: 1.0296525955200195,grad_norm: 0.9999990150086739, iteration: 56358
loss: 0.9852297306060791,grad_norm: 0.9913513994418722, iteration: 56359
loss: 1.0085346698760986,grad_norm: 0.8981379532228714, iteration: 56360
loss: 0.9835319519042969,grad_norm: 0.9999992103516145, iteration: 56361
loss: 0.9902499914169312,grad_norm: 0.9999994971675633, iteration: 56362
loss: 1.0162307024002075,grad_norm: 0.98496213454017, iteration: 56363
loss: 1.0426170825958252,grad_norm: 0.9356190414769363, iteration: 56364
loss: 0.999204158782959,grad_norm: 0.9999990922978113, iteration: 56365
loss: 0.9748950004577637,grad_norm: 0.9999990830422317, iteration: 56366
loss: 0.9888511896133423,grad_norm: 0.9516210552730364, iteration: 56367
loss: 1.0331816673278809,grad_norm: 0.9999993689854766, iteration: 56368
loss: 0.9574273228645325,grad_norm: 0.9236794187863819, iteration: 56369
loss: 0.9724597930908203,grad_norm: 0.8553248111297822, iteration: 56370
loss: 0.9837820529937744,grad_norm: 0.9999991729771578, iteration: 56371
loss: 1.040856957435608,grad_norm: 0.9999992924561465, iteration: 56372
loss: 0.9775095582008362,grad_norm: 0.9999991040837184, iteration: 56373
loss: 1.0040091276168823,grad_norm: 0.8933549482581983, iteration: 56374
loss: 0.9909282326698303,grad_norm: 0.9331649311510773, iteration: 56375
loss: 0.9802895784378052,grad_norm: 0.999999165595991, iteration: 56376
loss: 0.9708773493766785,grad_norm: 0.902213448610032, iteration: 56377
loss: 0.9881116151809692,grad_norm: 0.8370076226028655, iteration: 56378
loss: 0.9969794154167175,grad_norm: 0.943103940028667, iteration: 56379
loss: 0.9986950755119324,grad_norm: 0.9999992244916217, iteration: 56380
loss: 0.9827227592468262,grad_norm: 0.9999991716893792, iteration: 56381
loss: 0.9879234433174133,grad_norm: 0.9999990283367581, iteration: 56382
loss: 1.0206769704818726,grad_norm: 0.9901899403739035, iteration: 56383
loss: 1.0131242275238037,grad_norm: 0.9999995912667353, iteration: 56384
loss: 1.0368505716323853,grad_norm: 0.9999992929300792, iteration: 56385
loss: 1.061771035194397,grad_norm: 0.9999991618203601, iteration: 56386
loss: 1.0260796546936035,grad_norm: 0.9999992745043654, iteration: 56387
loss: 0.9480276703834534,grad_norm: 0.999999013787366, iteration: 56388
loss: 1.01707923412323,grad_norm: 0.9999991276809336, iteration: 56389
loss: 0.9937437772750854,grad_norm: 0.999999306688811, iteration: 56390
loss: 1.007583498954773,grad_norm: 0.9999990501239813, iteration: 56391
loss: 1.0449453592300415,grad_norm: 0.9999993695803678, iteration: 56392
loss: 1.0114935636520386,grad_norm: 0.999999414764246, iteration: 56393
loss: 1.0088417530059814,grad_norm: 0.9597275156794044, iteration: 56394
loss: 1.0165870189666748,grad_norm: 0.9999991986910841, iteration: 56395
loss: 1.0166032314300537,grad_norm: 0.9999991924909872, iteration: 56396
loss: 1.0486623048782349,grad_norm: 0.9999990314204626, iteration: 56397
loss: 0.9676982164382935,grad_norm: 0.9999990308192497, iteration: 56398
loss: 0.987755298614502,grad_norm: 0.9999990092863453, iteration: 56399
loss: 0.9847490191459656,grad_norm: 0.9999989737303251, iteration: 56400
loss: 1.0507715940475464,grad_norm: 0.9999995044623278, iteration: 56401
loss: 0.9387515783309937,grad_norm: 0.9466585914665476, iteration: 56402
loss: 1.0069005489349365,grad_norm: 0.9999992806204202, iteration: 56403
loss: 1.0537785291671753,grad_norm: 0.9999994437495939, iteration: 56404
loss: 0.9841995239257812,grad_norm: 0.9373294478670537, iteration: 56405
loss: 0.9611663222312927,grad_norm: 0.9033801866588411, iteration: 56406
loss: 1.024788737297058,grad_norm: 0.9531930602673406, iteration: 56407
loss: 1.0248382091522217,grad_norm: 0.952524675890359, iteration: 56408
loss: 1.013665795326233,grad_norm: 0.9483306228356494, iteration: 56409
loss: 0.9831786155700684,grad_norm: 0.9999990133263127, iteration: 56410
loss: 0.965324878692627,grad_norm: 0.9049665703631163, iteration: 56411
loss: 0.9982913136482239,grad_norm: 0.9464599663100055, iteration: 56412
loss: 0.9903993010520935,grad_norm: 0.9999989804555001, iteration: 56413
loss: 0.966338574886322,grad_norm: 0.9642959831879473, iteration: 56414
loss: 1.011440634727478,grad_norm: 0.9662627960183788, iteration: 56415
loss: 1.0155624151229858,grad_norm: 0.9999992331156111, iteration: 56416
loss: 1.0162720680236816,grad_norm: 0.9999997695347653, iteration: 56417
loss: 0.9774900674819946,grad_norm: 0.9999995290514988, iteration: 56418
loss: 1.003603458404541,grad_norm: 0.9527285886958105, iteration: 56419
loss: 1.0360163450241089,grad_norm: 0.9999991762059901, iteration: 56420
loss: 1.0145725011825562,grad_norm: 0.9475939944001917, iteration: 56421
loss: 0.993057906627655,grad_norm: 0.9999990300774372, iteration: 56422
loss: 0.9679877758026123,grad_norm: 0.9999990222565087, iteration: 56423
loss: 1.0112674236297607,grad_norm: 0.9681619794552385, iteration: 56424
loss: 1.0013989210128784,grad_norm: 0.8510354950583385, iteration: 56425
loss: 0.9653975367546082,grad_norm: 0.9999992947765487, iteration: 56426
loss: 0.989142119884491,grad_norm: 0.9999991603803129, iteration: 56427
loss: 0.9984742999076843,grad_norm: 0.9999992176513054, iteration: 56428
loss: 1.0166661739349365,grad_norm: 0.8100553895493774, iteration: 56429
loss: 0.989301860332489,grad_norm: 0.985273034536002, iteration: 56430
loss: 1.0020689964294434,grad_norm: 0.8981937484863491, iteration: 56431
loss: 1.0276108980178833,grad_norm: 0.9854992030206579, iteration: 56432
loss: 1.0343705415725708,grad_norm: 0.9999991572188736, iteration: 56433
loss: 0.9841055274009705,grad_norm: 0.9226728274893117, iteration: 56434
loss: 0.9831238389015198,grad_norm: 0.9401296467259265, iteration: 56435
loss: 0.9607531428337097,grad_norm: 0.999999191245339, iteration: 56436
loss: 1.0240283012390137,grad_norm: 0.9999992143017006, iteration: 56437
loss: 1.0141408443450928,grad_norm: 0.9999991435855349, iteration: 56438
loss: 0.9809102416038513,grad_norm: 0.9468434929365095, iteration: 56439
loss: 1.0183438062667847,grad_norm: 0.9967264944752883, iteration: 56440
loss: 0.9818386435508728,grad_norm: 0.9251185994108219, iteration: 56441
loss: 0.9953103065490723,grad_norm: 0.9999989767099688, iteration: 56442
loss: 1.0009585618972778,grad_norm: 0.9252288615258444, iteration: 56443
loss: 0.9899999499320984,grad_norm: 0.9999992109524575, iteration: 56444
loss: 1.0097733736038208,grad_norm: 0.9779508508672281, iteration: 56445
loss: 1.0222409963607788,grad_norm: 0.9999994803660323, iteration: 56446
loss: 1.0302950143814087,grad_norm: 0.925933325071624, iteration: 56447
loss: 0.9891956448554993,grad_norm: 0.9999991749422833, iteration: 56448
loss: 1.0186035633087158,grad_norm: 0.8338627922614273, iteration: 56449
loss: 1.0521897077560425,grad_norm: 0.9999992849172719, iteration: 56450
loss: 1.047799825668335,grad_norm: 0.9999994186052342, iteration: 56451
loss: 0.962196409702301,grad_norm: 0.9999989691819756, iteration: 56452
loss: 1.0197293758392334,grad_norm: 0.9999995471375462, iteration: 56453
loss: 0.967346727848053,grad_norm: 0.8706197374778784, iteration: 56454
loss: 1.00236976146698,grad_norm: 0.9999990952411861, iteration: 56455
loss: 0.9810683727264404,grad_norm: 0.999999098101605, iteration: 56456
loss: 0.9811156392097473,grad_norm: 0.952872008211409, iteration: 56457
loss: 0.9762564301490784,grad_norm: 0.9370555597413571, iteration: 56458
loss: 1.0045604705810547,grad_norm: 0.9999990496161586, iteration: 56459
loss: 1.0116021633148193,grad_norm: 0.9999990646622365, iteration: 56460
loss: 1.0224217176437378,grad_norm: 0.99999925390593, iteration: 56461
loss: 0.9435293078422546,grad_norm: 0.9999990641721154, iteration: 56462
loss: 0.9741215705871582,grad_norm: 0.9848238592626416, iteration: 56463
loss: 1.0109336376190186,grad_norm: 0.9688419824422282, iteration: 56464
loss: 0.9626036882400513,grad_norm: 0.9999990984572831, iteration: 56465
loss: 0.9891347289085388,grad_norm: 0.9999991374776825, iteration: 56466
loss: 0.9635521769523621,grad_norm: 0.9999991965873032, iteration: 56467
loss: 0.9835540652275085,grad_norm: 0.9999990873402339, iteration: 56468
loss: 1.0193966627120972,grad_norm: 0.9999992724578651, iteration: 56469
loss: 0.9825860261917114,grad_norm: 0.9912096502123915, iteration: 56470
loss: 1.0190824270248413,grad_norm: 0.9999991921684167, iteration: 56471
loss: 1.0641424655914307,grad_norm: 0.9999995307562918, iteration: 56472
loss: 1.004296898841858,grad_norm: 0.999999049306025, iteration: 56473
loss: 0.9949171543121338,grad_norm: 0.9999992978848227, iteration: 56474
loss: 0.9937094449996948,grad_norm: 0.9999991741443068, iteration: 56475
loss: 0.9692944884300232,grad_norm: 0.9999992473785003, iteration: 56476
loss: 0.9746630191802979,grad_norm: 0.9506313678860231, iteration: 56477
loss: 1.0094517469406128,grad_norm: 0.9965912984688817, iteration: 56478
loss: 0.9958939552307129,grad_norm: 0.9999990101682751, iteration: 56479
loss: 1.0067247152328491,grad_norm: 0.8496073386162397, iteration: 56480
loss: 0.9695529937744141,grad_norm: 0.9742947238591847, iteration: 56481
loss: 1.0342001914978027,grad_norm: 0.9999989634374684, iteration: 56482
loss: 1.0030983686447144,grad_norm: 0.9999990622069359, iteration: 56483
loss: 1.0044958591461182,grad_norm: 0.9999992597825019, iteration: 56484
loss: 1.0279557704925537,grad_norm: 0.9379435992158863, iteration: 56485
loss: 0.9753628373146057,grad_norm: 0.9999992550383107, iteration: 56486
loss: 0.9682786464691162,grad_norm: 0.935594057784294, iteration: 56487
loss: 1.010446548461914,grad_norm: 0.9999992112107148, iteration: 56488
loss: 0.9805651307106018,grad_norm: 0.9999991263677815, iteration: 56489
loss: 0.9948421716690063,grad_norm: 0.8788390650966709, iteration: 56490
loss: 0.9701957106590271,grad_norm: 0.9293187829667571, iteration: 56491
loss: 0.9935819506645203,grad_norm: 0.8835705564331575, iteration: 56492
loss: 1.0085655450820923,grad_norm: 0.9999989636507528, iteration: 56493
loss: 1.0101213455200195,grad_norm: 0.9041496815398911, iteration: 56494
loss: 1.0002764463424683,grad_norm: 0.8123799838998185, iteration: 56495
loss: 0.9973466992378235,grad_norm: 0.9999991821392296, iteration: 56496
loss: 0.999060869216919,grad_norm: 0.9526406112370657, iteration: 56497
loss: 0.9905237555503845,grad_norm: 0.8315309475706942, iteration: 56498
loss: 0.9644622802734375,grad_norm: 0.9999991768424502, iteration: 56499
loss: 1.0153923034667969,grad_norm: 0.999999118121603, iteration: 56500
loss: 1.0047606229782104,grad_norm: 0.9897522096990135, iteration: 56501
loss: 1.0113790035247803,grad_norm: 0.9628016385192195, iteration: 56502
loss: 1.017669916152954,grad_norm: 0.9999990910468315, iteration: 56503
loss: 1.0031834840774536,grad_norm: 0.8343795678856841, iteration: 56504
loss: 0.9693554639816284,grad_norm: 0.9398902739174345, iteration: 56505
loss: 0.9944301843643188,grad_norm: 0.9781848138755345, iteration: 56506
loss: 1.002023696899414,grad_norm: 0.9866029406191024, iteration: 56507
loss: 0.9770784974098206,grad_norm: 0.9999991421418614, iteration: 56508
loss: 0.979832112789154,grad_norm: 0.999999078437196, iteration: 56509
loss: 1.0196017026901245,grad_norm: 0.9999990868911254, iteration: 56510
loss: 1.0088434219360352,grad_norm: 0.9295470259190801, iteration: 56511
loss: 0.9620187878608704,grad_norm: 0.9999990281387108, iteration: 56512
loss: 1.000619888305664,grad_norm: 0.9999990471789092, iteration: 56513
loss: 0.9959083795547485,grad_norm: 0.9378924934744859, iteration: 56514
loss: 1.018739104270935,grad_norm: 0.9920870887311296, iteration: 56515
loss: 1.0437456369400024,grad_norm: 0.9999997178042513, iteration: 56516
loss: 0.9877112507820129,grad_norm: 0.9330028695146715, iteration: 56517
loss: 0.929459273815155,grad_norm: 0.9831011445748795, iteration: 56518
loss: 1.0267764329910278,grad_norm: 0.9488157891541059, iteration: 56519
loss: 1.0194538831710815,grad_norm: 0.7748605911969885, iteration: 56520
loss: 0.9498651027679443,grad_norm: 0.9999990421158798, iteration: 56521
loss: 0.9758467674255371,grad_norm: 0.828621284343768, iteration: 56522
loss: 1.0000195503234863,grad_norm: 0.9999989886686732, iteration: 56523
loss: 0.9969846606254578,grad_norm: 0.9999993985992168, iteration: 56524
loss: 1.0359511375427246,grad_norm: 0.999999339448124, iteration: 56525
loss: 1.0156954526901245,grad_norm: 0.9575161048605849, iteration: 56526
loss: 1.03322434425354,grad_norm: 0.9656063575134005, iteration: 56527
loss: 0.9952200055122375,grad_norm: 0.8249765407647379, iteration: 56528
loss: 1.0566985607147217,grad_norm: 0.9999992264866887, iteration: 56529
loss: 1.0175833702087402,grad_norm: 0.9999993153259644, iteration: 56530
loss: 0.9766542315483093,grad_norm: 0.9999994483655033, iteration: 56531
loss: 0.9886811375617981,grad_norm: 0.9401619824876841, iteration: 56532
loss: 1.0090209245681763,grad_norm: 0.9999990227008, iteration: 56533
loss: 0.9814316630363464,grad_norm: 0.9999992196074683, iteration: 56534
loss: 0.9857993721961975,grad_norm: 0.9999990436857266, iteration: 56535
loss: 0.975922167301178,grad_norm: 0.9633173242790704, iteration: 56536
loss: 0.9852553606033325,grad_norm: 0.9885648059241597, iteration: 56537
loss: 1.0142802000045776,grad_norm: 0.9999990073633487, iteration: 56538
loss: 1.0386877059936523,grad_norm: 0.8193654483226949, iteration: 56539
loss: 1.0063896179199219,grad_norm: 0.9999991864183249, iteration: 56540
loss: 1.0151978731155396,grad_norm: 0.9999991119011316, iteration: 56541
loss: 0.9570664167404175,grad_norm: 0.9448222536070371, iteration: 56542
loss: 0.9583225846290588,grad_norm: 0.9563756312945202, iteration: 56543
loss: 0.9826481342315674,grad_norm: 0.9410535754422023, iteration: 56544
loss: 1.012697696685791,grad_norm: 0.8840887266081915, iteration: 56545
loss: 0.99226975440979,grad_norm: 0.9999993953803906, iteration: 56546
loss: 0.9910420775413513,grad_norm: 0.9999990566225436, iteration: 56547
loss: 1.003113031387329,grad_norm: 0.9999990943582758, iteration: 56548
loss: 1.005382776260376,grad_norm: 0.9499451295055128, iteration: 56549
loss: 0.986616313457489,grad_norm: 0.9011880473984573, iteration: 56550
loss: 1.0393680334091187,grad_norm: 0.9999990520045413, iteration: 56551
loss: 0.9784643054008484,grad_norm: 0.9999999234559, iteration: 56552
loss: 0.9859046339988708,grad_norm: 0.9999991203018004, iteration: 56553
loss: 1.0345208644866943,grad_norm: 0.9808342017099326, iteration: 56554
loss: 1.0293314456939697,grad_norm: 0.999999235110035, iteration: 56555
loss: 1.0273176431655884,grad_norm: 0.9016269254762616, iteration: 56556
loss: 1.0202744007110596,grad_norm: 0.9721662060589444, iteration: 56557
loss: 1.0438177585601807,grad_norm: 0.9094973135008934, iteration: 56558
loss: 0.9841998815536499,grad_norm: 0.9999992868345081, iteration: 56559
loss: 1.0167200565338135,grad_norm: 0.9999991049162417, iteration: 56560
loss: 0.9672176241874695,grad_norm: 0.9999993170550433, iteration: 56561
loss: 0.9645135998725891,grad_norm: 0.8655901254949012, iteration: 56562
loss: 0.9635984301567078,grad_norm: 0.999999119891588, iteration: 56563
loss: 1.0277531147003174,grad_norm: 0.9999989563610839, iteration: 56564
loss: 0.9756594300270081,grad_norm: 0.9999992395249692, iteration: 56565
loss: 0.9930732250213623,grad_norm: 0.99999906551199, iteration: 56566
loss: 1.0004501342773438,grad_norm: 0.9999990962615639, iteration: 56567
loss: 1.0061088800430298,grad_norm: 0.9999990418767, iteration: 56568
loss: 1.0373077392578125,grad_norm: 0.9964580185356684, iteration: 56569
loss: 0.9988168478012085,grad_norm: 0.9963450766423644, iteration: 56570
loss: 0.9649113416671753,grad_norm: 0.9999990855579809, iteration: 56571
loss: 1.007677674293518,grad_norm: 0.9999990070125891, iteration: 56572
loss: 0.9956135749816895,grad_norm: 0.9999992779583865, iteration: 56573
loss: 0.9741110801696777,grad_norm: 0.9456437141109847, iteration: 56574
loss: 0.9970458745956421,grad_norm: 0.9999990432886308, iteration: 56575
loss: 0.9740509986877441,grad_norm: 0.9332391899272606, iteration: 56576
loss: 0.9782615303993225,grad_norm: 0.9051355433599428, iteration: 56577
loss: 0.9988561868667603,grad_norm: 0.9999990745919125, iteration: 56578
loss: 1.0029830932617188,grad_norm: 0.9999991423299573, iteration: 56579
loss: 0.9943137168884277,grad_norm: 0.9667555047547121, iteration: 56580
loss: 0.968485951423645,grad_norm: 0.9954836473049756, iteration: 56581
loss: 1.0030792951583862,grad_norm: 0.9999992431098108, iteration: 56582
loss: 0.995351254940033,grad_norm: 0.9787600010071525, iteration: 56583
loss: 0.9755492806434631,grad_norm: 0.930263963733827, iteration: 56584
loss: 0.9817725419998169,grad_norm: 0.9914086041616311, iteration: 56585
loss: 0.9671796560287476,grad_norm: 0.9999991659367031, iteration: 56586
loss: 0.969782292842865,grad_norm: 0.9747462622373547, iteration: 56587
loss: 0.9795656204223633,grad_norm: 0.9999994125074325, iteration: 56588
loss: 1.0119673013687134,grad_norm: 0.9999991977445538, iteration: 56589
loss: 1.0009171962738037,grad_norm: 0.9999991511716156, iteration: 56590
loss: 0.9981926083564758,grad_norm: 0.9184210219989697, iteration: 56591
loss: 1.041451334953308,grad_norm: 0.9999990971920736, iteration: 56592
loss: 1.1152137517929077,grad_norm: 0.9999995077564826, iteration: 56593
loss: 0.937573254108429,grad_norm: 0.8955929528218398, iteration: 56594
loss: 0.9865322113037109,grad_norm: 0.9861178809282872, iteration: 56595
loss: 1.0091402530670166,grad_norm: 0.9618532652785496, iteration: 56596
loss: 1.0120912790298462,grad_norm: 0.9999991245403375, iteration: 56597
loss: 1.0023494958877563,grad_norm: 0.9999989640466072, iteration: 56598
loss: 0.9874250888824463,grad_norm: 0.9999990728996305, iteration: 56599
loss: 0.9791868329048157,grad_norm: 0.9999990340214585, iteration: 56600
loss: 0.9981535077095032,grad_norm: 0.9999992612102064, iteration: 56601
loss: 1.033001184463501,grad_norm: 0.9999991056353155, iteration: 56602
loss: 1.0686556100845337,grad_norm: 0.9999993661005693, iteration: 56603
loss: 1.0030139684677124,grad_norm: 0.7714974226055041, iteration: 56604
loss: 0.9995580911636353,grad_norm: 0.9999993023725076, iteration: 56605
loss: 0.9830297231674194,grad_norm: 0.97580814862969, iteration: 56606
loss: 0.9814572930335999,grad_norm: 0.9339302892237052, iteration: 56607
loss: 0.9873002767562866,grad_norm: 0.9539888974423539, iteration: 56608
loss: 0.998093843460083,grad_norm: 0.9999991344115011, iteration: 56609
loss: 1.0346341133117676,grad_norm: 0.9999992855412427, iteration: 56610
loss: 0.9810975790023804,grad_norm: 0.965850083466092, iteration: 56611
loss: 1.011898398399353,grad_norm: 0.9961046735082535, iteration: 56612
loss: 0.9848622679710388,grad_norm: 0.9999993311770218, iteration: 56613
loss: 1.0016865730285645,grad_norm: 0.9999990869077352, iteration: 56614
loss: 1.025535225868225,grad_norm: 0.9999991356432271, iteration: 56615
loss: 1.0531851053237915,grad_norm: 0.9398007749629055, iteration: 56616
loss: 1.0023634433746338,grad_norm: 0.9999991689164682, iteration: 56617
loss: 0.9890090823173523,grad_norm: 0.882422669538675, iteration: 56618
loss: 1.0194758176803589,grad_norm: 0.9999992233255303, iteration: 56619
loss: 1.0342170000076294,grad_norm: 0.9777569433056897, iteration: 56620
loss: 0.9833348393440247,grad_norm: 0.7105874771884646, iteration: 56621
loss: 0.9873632192611694,grad_norm: 0.9999990894169338, iteration: 56622
loss: 0.9885892271995544,grad_norm: 0.9989389188600895, iteration: 56623
loss: 1.0200276374816895,grad_norm: 0.9999991854807394, iteration: 56624
loss: 1.0188255310058594,grad_norm: 0.8847849489046082, iteration: 56625
loss: 0.9734798669815063,grad_norm: 0.999999063247714, iteration: 56626
loss: 1.0060224533081055,grad_norm: 0.8220459831899908, iteration: 56627
loss: 0.980842113494873,grad_norm: 0.999999307476796, iteration: 56628
loss: 0.9933940172195435,grad_norm: 0.999999175361485, iteration: 56629
loss: 1.0215027332305908,grad_norm: 0.909760500413394, iteration: 56630
loss: 1.0270590782165527,grad_norm: 0.8969669075471297, iteration: 56631
loss: 1.0198988914489746,grad_norm: 0.8868289315991452, iteration: 56632
loss: 0.9884676337242126,grad_norm: 0.9348060221845483, iteration: 56633
loss: 0.9902187585830688,grad_norm: 0.8827860466691808, iteration: 56634
loss: 0.9916805624961853,grad_norm: 0.9999990228796483, iteration: 56635
loss: 1.018554449081421,grad_norm: 0.9999990896973452, iteration: 56636
loss: 1.0009177923202515,grad_norm: 0.9999989926927348, iteration: 56637
loss: 1.030348539352417,grad_norm: 0.999999012054314, iteration: 56638
loss: 1.0398612022399902,grad_norm: 0.9999990285738609, iteration: 56639
loss: 0.97337806224823,grad_norm: 0.9999992227061013, iteration: 56640
loss: 1.0050334930419922,grad_norm: 0.9213542633794976, iteration: 56641
loss: 1.0139847993850708,grad_norm: 0.9345086304918733, iteration: 56642
loss: 1.068064570426941,grad_norm: 0.9999995742971567, iteration: 56643
loss: 0.968061625957489,grad_norm: 0.9922553891308209, iteration: 56644
loss: 1.0146769285202026,grad_norm: 0.9999991235772676, iteration: 56645
loss: 1.017261266708374,grad_norm: 0.9999991977281875, iteration: 56646
loss: 0.9818647503852844,grad_norm: 0.9999992005304136, iteration: 56647
loss: 1.0406074523925781,grad_norm: 0.9609634471015727, iteration: 56648
loss: 1.0135703086853027,grad_norm: 0.9482532588404556, iteration: 56649
loss: 1.0250916481018066,grad_norm: 0.9999990077460779, iteration: 56650
loss: 1.0200748443603516,grad_norm: 0.9999994412769839, iteration: 56651
loss: 0.9934158325195312,grad_norm: 0.999999132880307, iteration: 56652
loss: 1.1992182731628418,grad_norm: 0.9999999976732366, iteration: 56653
loss: 1.0156580209732056,grad_norm: 0.9999990643734459, iteration: 56654
loss: 1.0081876516342163,grad_norm: 0.9999990044656283, iteration: 56655
loss: 1.0173438787460327,grad_norm: 0.9018069967392396, iteration: 56656
loss: 1.0165244340896606,grad_norm: 0.9398269865789536, iteration: 56657
loss: 0.9749369621276855,grad_norm: 0.9999991819632412, iteration: 56658
loss: 1.001067042350769,grad_norm: 0.9999990659059357, iteration: 56659
loss: 1.0350170135498047,grad_norm: 0.854083216714402, iteration: 56660
loss: 1.0165345668792725,grad_norm: 0.9999989803667423, iteration: 56661
loss: 0.9731717705726624,grad_norm: 0.9999990625644818, iteration: 56662
loss: 1.0304113626480103,grad_norm: 0.9999990959321114, iteration: 56663
loss: 1.0212880373001099,grad_norm: 0.999999176473841, iteration: 56664
loss: 1.008853793144226,grad_norm: 0.999999267406073, iteration: 56665
loss: 1.0065057277679443,grad_norm: 0.8569730026649509, iteration: 56666
loss: 1.038001537322998,grad_norm: 0.9999990234089525, iteration: 56667
loss: 0.9979659914970398,grad_norm: 0.8460236861050703, iteration: 56668
loss: 0.9924414753913879,grad_norm: 0.8465028609936451, iteration: 56669
loss: 0.9939422607421875,grad_norm: 0.999999153604864, iteration: 56670
loss: 1.020308017730713,grad_norm: 0.8787172604045821, iteration: 56671
loss: 0.9809330701828003,grad_norm: 0.9999989105703642, iteration: 56672
loss: 1.0030174255371094,grad_norm: 0.9999989684933295, iteration: 56673
loss: 0.9709676504135132,grad_norm: 0.9254399969558801, iteration: 56674
loss: 0.9868248701095581,grad_norm: 0.8850084596237906, iteration: 56675
loss: 0.9829601049423218,grad_norm: 0.9999990569549344, iteration: 56676
loss: 1.0014055967330933,grad_norm: 0.9999990433888404, iteration: 56677
loss: 0.9902271032333374,grad_norm: 0.9753378024519176, iteration: 56678
loss: 1.0599331855773926,grad_norm: 0.9999994117006198, iteration: 56679
loss: 1.0021559000015259,grad_norm: 0.9999991000921843, iteration: 56680
loss: 1.0025854110717773,grad_norm: 0.9373351282938345, iteration: 56681
loss: 1.010045051574707,grad_norm: 0.8472763601795719, iteration: 56682
loss: 1.0137524604797363,grad_norm: 0.9770779465248374, iteration: 56683
loss: 0.9669756889343262,grad_norm: 0.9999989448842469, iteration: 56684
loss: 1.0000580549240112,grad_norm: 0.9999990940393343, iteration: 56685
loss: 0.9785621762275696,grad_norm: 0.9999990874648902, iteration: 56686
loss: 1.0054413080215454,grad_norm: 0.9862872155194007, iteration: 56687
loss: 1.0074154138565063,grad_norm: 0.9999991911238136, iteration: 56688
loss: 1.037238359451294,grad_norm: 0.9999993959876393, iteration: 56689
loss: 1.0038697719573975,grad_norm: 0.9999998539751846, iteration: 56690
loss: 1.0219151973724365,grad_norm: 0.9999990283593444, iteration: 56691
loss: 1.0208014249801636,grad_norm: 0.8213768057276957, iteration: 56692
loss: 1.0044605731964111,grad_norm: 0.7984336813625256, iteration: 56693
loss: 0.9472953081130981,grad_norm: 0.9999991817707976, iteration: 56694
loss: 0.9960551857948303,grad_norm: 0.9999990992064761, iteration: 56695
loss: 0.9632364511489868,grad_norm: 0.9999989567420754, iteration: 56696
loss: 1.0040130615234375,grad_norm: 0.9999991630945986, iteration: 56697
loss: 1.0109002590179443,grad_norm: 0.9875017444258664, iteration: 56698
loss: 0.9408036470413208,grad_norm: 0.9999991858530454, iteration: 56699
loss: 0.9984492063522339,grad_norm: 0.9999998110023295, iteration: 56700
loss: 0.9731516242027283,grad_norm: 0.8033319538756792, iteration: 56701
loss: 1.0146909952163696,grad_norm: 0.9999991109293735, iteration: 56702
loss: 1.0020496845245361,grad_norm: 0.9999991895252618, iteration: 56703
loss: 1.030005931854248,grad_norm: 0.9152978883338944, iteration: 56704
loss: 0.9649738073348999,grad_norm: 0.999999366686108, iteration: 56705
loss: 0.9955869913101196,grad_norm: 0.9999994754397316, iteration: 56706
loss: 1.0001128911972046,grad_norm: 0.9571789780483391, iteration: 56707
loss: 0.9503356218338013,grad_norm: 0.9959553598792088, iteration: 56708
loss: 1.0182759761810303,grad_norm: 0.9344383822525892, iteration: 56709
loss: 0.9990463852882385,grad_norm: 0.9999990068430357, iteration: 56710
loss: 1.0588257312774658,grad_norm: 0.9999996892956805, iteration: 56711
loss: 0.9994130730628967,grad_norm: 0.9812691034841404, iteration: 56712
loss: 1.0048681497573853,grad_norm: 0.8365731956556157, iteration: 56713
loss: 0.9998645186424255,grad_norm: 0.8831860243089202, iteration: 56714
loss: 1.006268858909607,grad_norm: 0.9999991285654191, iteration: 56715
loss: 0.9985250234603882,grad_norm: 0.9999992167445512, iteration: 56716
loss: 1.0259233713150024,grad_norm: 0.9999991009771517, iteration: 56717
loss: 1.0560822486877441,grad_norm: 0.9999992482985708, iteration: 56718
loss: 1.0578396320343018,grad_norm: 0.9999996420705395, iteration: 56719
loss: 0.9958108067512512,grad_norm: 0.9250768502008548, iteration: 56720
loss: 0.9744921326637268,grad_norm: 0.9892566109079964, iteration: 56721
loss: 0.9797214269638062,grad_norm: 0.9999990859034094, iteration: 56722
loss: 0.9832992553710938,grad_norm: 0.9588742130577835, iteration: 56723
loss: 0.9320210814476013,grad_norm: 0.9999990906467175, iteration: 56724
loss: 0.9793567061424255,grad_norm: 0.9834927212876529, iteration: 56725
loss: 0.9567142724990845,grad_norm: 0.9518156158264117, iteration: 56726
loss: 1.0285931825637817,grad_norm: 0.9999991447761591, iteration: 56727
loss: 0.9948091506958008,grad_norm: 0.9425308809117021, iteration: 56728
loss: 0.999750554561615,grad_norm: 0.8558457949480553, iteration: 56729
loss: 1.0168777704238892,grad_norm: 0.9999990994296828, iteration: 56730
loss: 0.9884418845176697,grad_norm: 0.9676778219509662, iteration: 56731
loss: 1.0088417530059814,grad_norm: 0.9164685066579675, iteration: 56732
loss: 1.0273382663726807,grad_norm: 0.9999990291858573, iteration: 56733
loss: 1.029032826423645,grad_norm: 0.9241912887786073, iteration: 56734
loss: 1.0479735136032104,grad_norm: 0.9999991270684009, iteration: 56735
loss: 1.020743489265442,grad_norm: 0.9808125287607582, iteration: 56736
loss: 1.0061843395233154,grad_norm: 0.9999992859424517, iteration: 56737
loss: 0.9901695251464844,grad_norm: 0.8386723039603434, iteration: 56738
loss: 1.009520173072815,grad_norm: 0.9999990453724366, iteration: 56739
loss: 0.9313539266586304,grad_norm: 0.9999991415458601, iteration: 56740
loss: 0.9758330583572388,grad_norm: 0.9662706188015528, iteration: 56741
loss: 1.0294837951660156,grad_norm: 0.9999996893858721, iteration: 56742
loss: 1.0015581846237183,grad_norm: 0.9999990867109836, iteration: 56743
loss: 0.9806433320045471,grad_norm: 0.9059672323964053, iteration: 56744
loss: 0.9651630520820618,grad_norm: 0.9284378557701382, iteration: 56745
loss: 1.0261846780776978,grad_norm: 0.9999989751638585, iteration: 56746
loss: 0.9939066767692566,grad_norm: 0.9999991550108481, iteration: 56747
loss: 1.0001932382583618,grad_norm: 0.9552624604690807, iteration: 56748
loss: 1.0370051860809326,grad_norm: 0.8281467957176798, iteration: 56749
loss: 0.9842971563339233,grad_norm: 0.9531046127517605, iteration: 56750
loss: 0.9914736747741699,grad_norm: 0.9999989956962508, iteration: 56751
loss: 1.0171416997909546,grad_norm: 0.9999991629297997, iteration: 56752
loss: 1.024022102355957,grad_norm: 0.9999991231746549, iteration: 56753
loss: 0.9542422890663147,grad_norm: 0.9999991535154639, iteration: 56754
loss: 1.0064723491668701,grad_norm: 0.9999990971425985, iteration: 56755
loss: 1.0258642435073853,grad_norm: 0.9169981612743494, iteration: 56756
loss: 0.9976468086242676,grad_norm: 0.9195325231279049, iteration: 56757
loss: 0.9800733327865601,grad_norm: 0.9279927374850692, iteration: 56758
loss: 0.9987821578979492,grad_norm: 0.999999171721456, iteration: 56759
loss: 0.999824583530426,grad_norm: 0.9999990261909768, iteration: 56760
loss: 1.0143336057662964,grad_norm: 0.9236403426118709, iteration: 56761
loss: 1.0294982194900513,grad_norm: 0.887984107170532, iteration: 56762
loss: 1.002834677696228,grad_norm: 0.9462523365440668, iteration: 56763
loss: 0.9818936586380005,grad_norm: 0.9640404489748098, iteration: 56764
loss: 0.9633784890174866,grad_norm: 0.9999991594105728, iteration: 56765
loss: 0.9763458371162415,grad_norm: 0.9999990815899075, iteration: 56766
loss: 0.9995203018188477,grad_norm: 0.999999160642134, iteration: 56767
loss: 1.0079224109649658,grad_norm: 0.8689530576606734, iteration: 56768
loss: 1.0212523937225342,grad_norm: 0.8852524898362603, iteration: 56769
loss: 0.9760552048683167,grad_norm: 0.9999992254771424, iteration: 56770
loss: 1.0566660165786743,grad_norm: 0.99999921012697, iteration: 56771
loss: 1.029159426689148,grad_norm: 0.9807840838493076, iteration: 56772
loss: 1.0320669412612915,grad_norm: 0.999999341259668, iteration: 56773
loss: 0.9909346699714661,grad_norm: 0.9999991114407504, iteration: 56774
loss: 1.0036511421203613,grad_norm: 0.9999993442288605, iteration: 56775
loss: 1.0303983688354492,grad_norm: 0.9999992278561263, iteration: 56776
loss: 0.9715293049812317,grad_norm: 0.9745351935611862, iteration: 56777
loss: 0.9966058731079102,grad_norm: 0.9923025747415575, iteration: 56778
loss: 1.0259751081466675,grad_norm: 0.9481686038997627, iteration: 56779
loss: 0.986198365688324,grad_norm: 0.9015387603801923, iteration: 56780
loss: 1.0213702917099,grad_norm: 0.9999990892777116, iteration: 56781
loss: 0.9855008125305176,grad_norm: 0.9999990658028731, iteration: 56782
loss: 1.0450794696807861,grad_norm: 0.9999989280485092, iteration: 56783
loss: 1.0417338609695435,grad_norm: 0.9999996135940792, iteration: 56784
loss: 1.0132578611373901,grad_norm: 0.9193513064087989, iteration: 56785
loss: 0.9718971848487854,grad_norm: 0.9360032824506628, iteration: 56786
loss: 1.0566855669021606,grad_norm: 0.9787718080984974, iteration: 56787
loss: 1.0152156352996826,grad_norm: 0.9735869647547005, iteration: 56788
loss: 0.9841864109039307,grad_norm: 0.8442817098483748, iteration: 56789
loss: 1.020740270614624,grad_norm: 0.9999993606254751, iteration: 56790
loss: 1.0051603317260742,grad_norm: 0.9999990626223799, iteration: 56791
loss: 0.9830885529518127,grad_norm: 0.9189296222517868, iteration: 56792
loss: 0.9703890085220337,grad_norm: 0.9999992317384727, iteration: 56793
loss: 0.9989199638366699,grad_norm: 0.999999144139644, iteration: 56794
loss: 0.9977760314941406,grad_norm: 0.9999991106715562, iteration: 56795
loss: 1.0067453384399414,grad_norm: 0.9999992945413625, iteration: 56796
loss: 1.0104601383209229,grad_norm: 0.9999992390089144, iteration: 56797
loss: 1.0096518993377686,grad_norm: 0.9999991089615173, iteration: 56798
loss: 1.009901762008667,grad_norm: 0.9999990414851072, iteration: 56799
loss: 1.0344059467315674,grad_norm: 0.9999990794688602, iteration: 56800
loss: 0.9887439012527466,grad_norm: 0.9999990983476761, iteration: 56801
loss: 1.0065398216247559,grad_norm: 0.9999990882317953, iteration: 56802
loss: 0.9892212748527527,grad_norm: 0.8691022955269718, iteration: 56803
loss: 1.0065066814422607,grad_norm: 0.9999992222221477, iteration: 56804
loss: 1.0233445167541504,grad_norm: 0.9999997230095998, iteration: 56805
loss: 1.0264581441879272,grad_norm: 0.8337886648215902, iteration: 56806
loss: 0.9524374604225159,grad_norm: 0.9920828935887932, iteration: 56807
loss: 0.9981356859207153,grad_norm: 0.9999990321507956, iteration: 56808
loss: 1.0067449808120728,grad_norm: 0.901413540075622, iteration: 56809
loss: 0.9876806735992432,grad_norm: 0.9999991860915564, iteration: 56810
loss: 1.019529104232788,grad_norm: 0.9999991679409527, iteration: 56811
loss: 1.0151705741882324,grad_norm: 0.9999992636228785, iteration: 56812
loss: 1.0124943256378174,grad_norm: 0.9999990520571642, iteration: 56813
loss: 0.999840497970581,grad_norm: 0.9719736850446561, iteration: 56814
loss: 0.977294921875,grad_norm: 0.9999993653194004, iteration: 56815
loss: 0.9894252419471741,grad_norm: 0.8239322583628725, iteration: 56816
loss: 1.0199476480484009,grad_norm: 0.9999991331747289, iteration: 56817
loss: 1.0868752002716064,grad_norm: 0.9999996504782867, iteration: 56818
loss: 0.9846147298812866,grad_norm: 0.9999991233256185, iteration: 56819
loss: 0.9630492925643921,grad_norm: 0.9467232505122051, iteration: 56820
loss: 1.0294584035873413,grad_norm: 0.9999990418486908, iteration: 56821
loss: 0.9869263768196106,grad_norm: 0.9999991473973934, iteration: 56822
loss: 1.031607985496521,grad_norm: 0.8306735508430136, iteration: 56823
loss: 1.000950813293457,grad_norm: 0.9999989556320181, iteration: 56824
loss: 1.0131657123565674,grad_norm: 0.9999995097610489, iteration: 56825
loss: 1.0188164710998535,grad_norm: 0.9999993736685928, iteration: 56826
loss: 0.9611973762512207,grad_norm: 0.9868612359527003, iteration: 56827
loss: 1.0089361667633057,grad_norm: 0.9843021011335162, iteration: 56828
loss: 0.9692177176475525,grad_norm: 0.9999991244598927, iteration: 56829
loss: 1.0067955255508423,grad_norm: 0.9144102775885046, iteration: 56830
loss: 1.028704047203064,grad_norm: 0.8659596255679481, iteration: 56831
loss: 1.0696862936019897,grad_norm: 0.9760797105969176, iteration: 56832
loss: 0.9834380745887756,grad_norm: 0.9999992502472651, iteration: 56833
loss: 1.0577616691589355,grad_norm: 0.9999992147365563, iteration: 56834
loss: 1.0208719968795776,grad_norm: 0.9999993544594044, iteration: 56835
loss: 1.0172871351242065,grad_norm: 0.9386996423571302, iteration: 56836
loss: 0.9677820801734924,grad_norm: 0.9999991950017536, iteration: 56837
loss: 0.9639312028884888,grad_norm: 0.9466043099134837, iteration: 56838
loss: 1.1309341192245483,grad_norm: 0.9999998768869991, iteration: 56839
loss: 1.003430724143982,grad_norm: 0.9165115030621241, iteration: 56840
loss: 1.058855414390564,grad_norm: 0.999999197165016, iteration: 56841
loss: 0.9705001711845398,grad_norm: 0.9999991245809344, iteration: 56842
loss: 1.0320549011230469,grad_norm: 0.9999991589175042, iteration: 56843
loss: 1.0215587615966797,grad_norm: 0.9999991498063173, iteration: 56844
loss: 1.042209267616272,grad_norm: 0.999999520952117, iteration: 56845
loss: 0.977364718914032,grad_norm: 0.96575219734038, iteration: 56846
loss: 1.0334278345108032,grad_norm: 0.8948331935909927, iteration: 56847
loss: 1.0311909914016724,grad_norm: 0.9999992044014234, iteration: 56848
loss: 1.0240639448165894,grad_norm: 0.8526227323457578, iteration: 56849
loss: 1.032731294631958,grad_norm: 0.9999991718929347, iteration: 56850
loss: 0.9776325225830078,grad_norm: 0.9531127778798582, iteration: 56851
loss: 1.003944754600525,grad_norm: 0.9400252907689627, iteration: 56852
loss: 1.0059388875961304,grad_norm: 0.999999041831682, iteration: 56853
loss: 1.0013132095336914,grad_norm: 0.9008825706901255, iteration: 56854
loss: 1.0871541500091553,grad_norm: 0.999998948030796, iteration: 56855
loss: 1.0322848558425903,grad_norm: 0.9999990914987954, iteration: 56856
loss: 1.0343204736709595,grad_norm: 0.9999991046426014, iteration: 56857
loss: 0.980063796043396,grad_norm: 0.9999989972632252, iteration: 56858
loss: 0.9934820532798767,grad_norm: 0.8513638638218965, iteration: 56859
loss: 0.9739776849746704,grad_norm: 0.9999990855291603, iteration: 56860
loss: 1.0401934385299683,grad_norm: 0.9448537198961323, iteration: 56861
loss: 1.0071429014205933,grad_norm: 0.9859084069692592, iteration: 56862
loss: 1.0179768800735474,grad_norm: 0.9999993271036252, iteration: 56863
loss: 0.9882920980453491,grad_norm: 0.8988461232662914, iteration: 56864
loss: 0.996931254863739,grad_norm: 0.9999992235663782, iteration: 56865
loss: 1.0089565515518188,grad_norm: 0.9542132583685339, iteration: 56866
loss: 1.009626865386963,grad_norm: 0.9999992224776918, iteration: 56867
loss: 1.014676809310913,grad_norm: 0.9720053312262715, iteration: 56868
loss: 1.0068384408950806,grad_norm: 0.9510495784134522, iteration: 56869
loss: 1.0551670789718628,grad_norm: 0.9813270357653727, iteration: 56870
loss: 0.9725216031074524,grad_norm: 0.9416377230277259, iteration: 56871
loss: 1.002748966217041,grad_norm: 0.999999363276124, iteration: 56872
loss: 1.0059170722961426,grad_norm: 0.8456224369926428, iteration: 56873
loss: 1.0462255477905273,grad_norm: 0.9210024658150864, iteration: 56874
loss: 0.994616687297821,grad_norm: 0.9999991083792834, iteration: 56875
loss: 1.0019371509552002,grad_norm: 0.9773012711629082, iteration: 56876
loss: 1.0214481353759766,grad_norm: 0.7896816223586589, iteration: 56877
loss: 0.9948513507843018,grad_norm: 0.8781517036648853, iteration: 56878
loss: 0.9975951313972473,grad_norm: 0.9999991190217392, iteration: 56879
loss: 0.9946072697639465,grad_norm: 0.8876567385123106, iteration: 56880
loss: 0.9855002164840698,grad_norm: 0.9999992504695747, iteration: 56881
loss: 0.9777103662490845,grad_norm: 0.9911851060599588, iteration: 56882
loss: 0.9966751337051392,grad_norm: 0.9441009494227126, iteration: 56883
loss: 0.9737436771392822,grad_norm: 0.9925959358418369, iteration: 56884
loss: 0.9776049256324768,grad_norm: 0.9999990100057368, iteration: 56885
loss: 1.0146517753601074,grad_norm: 0.999999080465444, iteration: 56886
loss: 0.9990251660346985,grad_norm: 0.999999441087446, iteration: 56887
loss: 1.0250684022903442,grad_norm: 0.999999189571524, iteration: 56888
loss: 1.0034860372543335,grad_norm: 0.9999990651994443, iteration: 56889
loss: 0.9752164483070374,grad_norm: 0.9347916634597272, iteration: 56890
loss: 0.9459514617919922,grad_norm: 0.9999991842302581, iteration: 56891
loss: 0.965766191482544,grad_norm: 0.9511235055435696, iteration: 56892
loss: 0.975131630897522,grad_norm: 0.9999991530835755, iteration: 56893
loss: 1.00912606716156,grad_norm: 0.9999992725307385, iteration: 56894
loss: 1.042811632156372,grad_norm: 0.9999989976725326, iteration: 56895
loss: 1.0122926235198975,grad_norm: 0.9999992035441146, iteration: 56896
loss: 1.0269659757614136,grad_norm: 0.9999991608374941, iteration: 56897
loss: 1.0025557279586792,grad_norm: 0.8015282626154704, iteration: 56898
loss: 1.0044571161270142,grad_norm: 0.9999991483130684, iteration: 56899
loss: 1.022234559059143,grad_norm: 0.9999992114780744, iteration: 56900
loss: 0.9925130009651184,grad_norm: 0.9999991167957326, iteration: 56901
loss: 0.9977569580078125,grad_norm: 0.9999991941395028, iteration: 56902
loss: 0.9929320216178894,grad_norm: 0.9999991921236825, iteration: 56903
loss: 0.9808074235916138,grad_norm: 0.9999993419764946, iteration: 56904
loss: 0.9676951766014099,grad_norm: 0.9765037790479592, iteration: 56905
loss: 0.9903028011322021,grad_norm: 0.8981720218260669, iteration: 56906
loss: 1.0042660236358643,grad_norm: 0.8870553823516019, iteration: 56907
loss: 1.0218738317489624,grad_norm: 0.9999991640392909, iteration: 56908
loss: 0.964685320854187,grad_norm: 0.8908786681119581, iteration: 56909
loss: 0.9749388694763184,grad_norm: 0.9773563600325328, iteration: 56910
loss: 0.9851562976837158,grad_norm: 0.9999991110919133, iteration: 56911
loss: 1.0116496086120605,grad_norm: 0.8583847673897286, iteration: 56912
loss: 0.949880063533783,grad_norm: 0.9961952180504214, iteration: 56913
loss: 1.0379703044891357,grad_norm: 0.9999990044942972, iteration: 56914
loss: 1.029289722442627,grad_norm: 0.8033701683577581, iteration: 56915
loss: 0.9913009405136108,grad_norm: 0.9999990436957902, iteration: 56916
loss: 1.0105838775634766,grad_norm: 0.9931976967208251, iteration: 56917
loss: 1.0207624435424805,grad_norm: 0.9999989688313146, iteration: 56918
loss: 0.9621982574462891,grad_norm: 0.9999992084728512, iteration: 56919
loss: 1.2435659170150757,grad_norm: 0.9999999362737036, iteration: 56920
loss: 0.9945282340049744,grad_norm: 0.9620569149524002, iteration: 56921
loss: 1.004007339477539,grad_norm: 0.9999992032632592, iteration: 56922
loss: 0.9966827630996704,grad_norm: 0.9113965037823007, iteration: 56923
loss: 0.9747792482376099,grad_norm: 0.9999989921696539, iteration: 56924
loss: 0.9908068180084229,grad_norm: 0.999998971581702, iteration: 56925
loss: 0.9773666858673096,grad_norm: 0.9999992546610376, iteration: 56926
loss: 1.0012387037277222,grad_norm: 0.9999991589046844, iteration: 56927
loss: 1.0085902214050293,grad_norm: 0.8656888177857794, iteration: 56928
loss: 1.0191924571990967,grad_norm: 0.8502736569133436, iteration: 56929
loss: 1.0160545110702515,grad_norm: 0.9999997192483355, iteration: 56930
loss: 1.2815533876419067,grad_norm: 0.9999998331588847, iteration: 56931
loss: 1.036246657371521,grad_norm: 0.9641145383988894, iteration: 56932
loss: 0.9958446621894836,grad_norm: 0.8625350214701883, iteration: 56933
loss: 0.9979826807975769,grad_norm: 0.9703854448613494, iteration: 56934
loss: 1.0031205415725708,grad_norm: 0.9523178951426242, iteration: 56935
loss: 1.0025726556777954,grad_norm: 0.9999996059613806, iteration: 56936
loss: 0.9961205720901489,grad_norm: 0.9999991896327818, iteration: 56937
loss: 0.9794562458992004,grad_norm: 0.9999992695864127, iteration: 56938
loss: 1.0083657503128052,grad_norm: 0.9533247428987749, iteration: 56939
loss: 0.9578410387039185,grad_norm: 0.9999990803347067, iteration: 56940
loss: 0.9802107214927673,grad_norm: 0.9999998518010591, iteration: 56941
loss: 0.9917778968811035,grad_norm: 0.9999991069103045, iteration: 56942
loss: 1.0023539066314697,grad_norm: 0.9999992289719271, iteration: 56943
loss: 1.0122950077056885,grad_norm: 0.9768010541410943, iteration: 56944
loss: 0.9836453795433044,grad_norm: 0.9999990523309944, iteration: 56945
loss: 1.0029749870300293,grad_norm: 0.9999991282180546, iteration: 56946
loss: 1.002971887588501,grad_norm: 0.9999992504941877, iteration: 56947
loss: 0.9659932851791382,grad_norm: 0.9999989261322578, iteration: 56948
loss: 0.9784120917320251,grad_norm: 0.9999991075232258, iteration: 56949
loss: 1.0386009216308594,grad_norm: 0.9472922549491717, iteration: 56950
loss: 0.9774548411369324,grad_norm: 0.9999991888545021, iteration: 56951
loss: 1.0233757495880127,grad_norm: 0.9999995109953839, iteration: 56952
loss: 1.1133527755737305,grad_norm: 0.9999998618407357, iteration: 56953
loss: 1.0597244501113892,grad_norm: 0.9999992498822794, iteration: 56954
loss: 1.0130897760391235,grad_norm: 0.9702614138850189, iteration: 56955
loss: 0.9850167632102966,grad_norm: 0.9999991669922808, iteration: 56956
loss: 0.9805543422698975,grad_norm: 0.8863294377764535, iteration: 56957
loss: 1.021659016609192,grad_norm: 0.999999452608371, iteration: 56958
loss: 0.9819095730781555,grad_norm: 0.9235539627950823, iteration: 56959
loss: 1.0728552341461182,grad_norm: 0.999999580936194, iteration: 56960
loss: 1.0274407863616943,grad_norm: 0.9999992182028218, iteration: 56961
loss: 0.9874328374862671,grad_norm: 0.9914465032190323, iteration: 56962
loss: 1.0112618207931519,grad_norm: 0.8941512910532088, iteration: 56963
loss: 0.9980312585830688,grad_norm: 0.9999992524648853, iteration: 56964
loss: 1.0460015535354614,grad_norm: 0.9999990758561901, iteration: 56965
loss: 1.0017108917236328,grad_norm: 0.999999233257395, iteration: 56966
loss: 1.043491244316101,grad_norm: 0.9999994496026311, iteration: 56967
loss: 1.037068247795105,grad_norm: 0.9326230991904696, iteration: 56968
loss: 1.0073528289794922,grad_norm: 0.9491894922849632, iteration: 56969
loss: 0.995571494102478,grad_norm: 0.9154946363351864, iteration: 56970
loss: 0.9977086186408997,grad_norm: 0.9999990910471992, iteration: 56971
loss: 0.9856534600257874,grad_norm: 0.9606020142693664, iteration: 56972
loss: 1.0046049356460571,grad_norm: 0.9999992931498426, iteration: 56973
loss: 1.0528122186660767,grad_norm: 0.9999998819352782, iteration: 56974
loss: 1.024143099784851,grad_norm: 0.9999990127302764, iteration: 56975
loss: 0.9809882640838623,grad_norm: 0.9999991979925777, iteration: 56976
loss: 1.006490707397461,grad_norm: 0.9999993704996386, iteration: 56977
loss: 0.9927322864532471,grad_norm: 0.8604005161849441, iteration: 56978
loss: 1.0170847177505493,grad_norm: 0.8963055463782486, iteration: 56979
loss: 1.0085821151733398,grad_norm: 0.9625138087303811, iteration: 56980
loss: 1.0133262872695923,grad_norm: 0.9999991807001026, iteration: 56981
loss: 0.9643465280532837,grad_norm: 0.9999996851650339, iteration: 56982
loss: 1.0099908113479614,grad_norm: 0.9999993187583253, iteration: 56983
loss: 1.0034018754959106,grad_norm: 0.9999989914124702, iteration: 56984
loss: 1.0192897319793701,grad_norm: 0.9999997105811864, iteration: 56985
loss: 1.0504250526428223,grad_norm: 0.999999158902388, iteration: 56986
loss: 0.9804717302322388,grad_norm: 0.9722676929234652, iteration: 56987
loss: 1.019274115562439,grad_norm: 0.9934150707371032, iteration: 56988
loss: 1.0117664337158203,grad_norm: 0.9139124931017404, iteration: 56989
loss: 1.0031381845474243,grad_norm: 0.999999020162081, iteration: 56990
loss: 1.0143253803253174,grad_norm: 0.9347185761719639, iteration: 56991
loss: 1.009031057357788,grad_norm: 0.9999991326245472, iteration: 56992
loss: 0.9778327345848083,grad_norm: 0.9847269304359693, iteration: 56993
loss: 1.015553593635559,grad_norm: 0.999999158675848, iteration: 56994
loss: 0.9901875257492065,grad_norm: 0.9534447198169795, iteration: 56995
loss: 0.9939125180244446,grad_norm: 0.9205448036853486, iteration: 56996
loss: 0.9912993311882019,grad_norm: 0.8393008830861259, iteration: 56997
loss: 0.9756220579147339,grad_norm: 0.9326543486102812, iteration: 56998
loss: 1.0128413438796997,grad_norm: 0.8016577053682774, iteration: 56999
loss: 0.9820165038108826,grad_norm: 0.9999990264944314, iteration: 57000
loss: 1.01025390625,grad_norm: 0.9999993134143116, iteration: 57001
loss: 0.9814856648445129,grad_norm: 0.9999990139195664, iteration: 57002
loss: 1.0464320182800293,grad_norm: 0.9999997267219632, iteration: 57003
loss: 0.9885119795799255,grad_norm: 0.9999990142555718, iteration: 57004
loss: 0.9947095513343811,grad_norm: 0.93332020552751, iteration: 57005
loss: 1.0169082880020142,grad_norm: 0.9999992229200322, iteration: 57006
loss: 1.0152828693389893,grad_norm: 0.9999990714614738, iteration: 57007
loss: 1.082046627998352,grad_norm: 0.9999995584236194, iteration: 57008
loss: 1.0580681562423706,grad_norm: 0.9999999124581971, iteration: 57009
loss: 0.9884732961654663,grad_norm: 0.8442076156161751, iteration: 57010
loss: 0.9789140224456787,grad_norm: 0.9999993578204421, iteration: 57011
loss: 0.9944793581962585,grad_norm: 0.9485839710367647, iteration: 57012
loss: 0.992540180683136,grad_norm: 0.9999993014272, iteration: 57013
loss: 0.9862240552902222,grad_norm: 0.7804296511682917, iteration: 57014
loss: 1.0305131673812866,grad_norm: 0.9999991084052967, iteration: 57015
loss: 1.0246742963790894,grad_norm: 0.9608900646804901, iteration: 57016
loss: 1.0263869762420654,grad_norm: 0.8890379041644612, iteration: 57017
loss: 1.0147632360458374,grad_norm: 0.9718653558541905, iteration: 57018
loss: 1.0274382829666138,grad_norm: 0.9999990855400207, iteration: 57019
loss: 1.0236095190048218,grad_norm: 0.999998973623757, iteration: 57020
loss: 0.988153874874115,grad_norm: 0.9268006920691854, iteration: 57021
loss: 1.0028553009033203,grad_norm: 0.9999990942692477, iteration: 57022
loss: 1.0013986825942993,grad_norm: 0.99999910438138, iteration: 57023
loss: 0.999722421169281,grad_norm: 0.9999991263885456, iteration: 57024
loss: 0.9880493879318237,grad_norm: 0.9537916921795329, iteration: 57025
loss: 1.031931757926941,grad_norm: 0.8888365483420471, iteration: 57026
loss: 0.9966924786567688,grad_norm: 0.9999992506017994, iteration: 57027
loss: 0.9860122203826904,grad_norm: 0.9903645311208392, iteration: 57028
loss: 0.9941167235374451,grad_norm: 0.9999992130925801, iteration: 57029
loss: 0.9993979334831238,grad_norm: 0.8823135399884209, iteration: 57030
loss: 0.9797402620315552,grad_norm: 0.9674643042311599, iteration: 57031
loss: 1.0178508758544922,grad_norm: 0.9999992413105498, iteration: 57032
loss: 0.9583215713500977,grad_norm: 0.9999989816910538, iteration: 57033
loss: 1.0070528984069824,grad_norm: 0.999999248902111, iteration: 57034
loss: 1.0106792449951172,grad_norm: 0.8830322541281296, iteration: 57035
loss: 0.9956846833229065,grad_norm: 0.9999990778483275, iteration: 57036
loss: 1.0085723400115967,grad_norm: 0.9999992469587509, iteration: 57037
loss: 1.0340723991394043,grad_norm: 0.9580919617349698, iteration: 57038
loss: 0.9942348003387451,grad_norm: 0.8958677430233033, iteration: 57039
loss: 1.0036362409591675,grad_norm: 0.9999989928243831, iteration: 57040
loss: 1.010019302368164,grad_norm: 0.777517302233392, iteration: 57041
loss: 1.0074894428253174,grad_norm: 0.9786194261136647, iteration: 57042
loss: 0.9887948632240295,grad_norm: 0.9999991040923202, iteration: 57043
loss: 0.9642981290817261,grad_norm: 0.9628519193594488, iteration: 57044
loss: 0.9882947206497192,grad_norm: 0.9999991885879401, iteration: 57045
loss: 0.9769787192344666,grad_norm: 0.9999990857604881, iteration: 57046
loss: 1.010640025138855,grad_norm: 0.9999991462115425, iteration: 57047
loss: 1.0004050731658936,grad_norm: 0.9999992229145424, iteration: 57048
loss: 0.9600063562393188,grad_norm: 0.8530292242360072, iteration: 57049
loss: 1.0098177194595337,grad_norm: 0.9999991337328732, iteration: 57050
loss: 0.9995716214179993,grad_norm: 0.9369303370028181, iteration: 57051
loss: 0.985805332660675,grad_norm: 0.9999990144121014, iteration: 57052
loss: 1.0404611825942993,grad_norm: 0.9480579817601833, iteration: 57053
loss: 1.0202971696853638,grad_norm: 0.9736580195046292, iteration: 57054
loss: 0.9865552186965942,grad_norm: 0.89454039080854, iteration: 57055
loss: 1.0226166248321533,grad_norm: 0.9519226152364265, iteration: 57056
loss: 1.0315909385681152,grad_norm: 0.9999997165317243, iteration: 57057
loss: 1.004902958869934,grad_norm: 0.9999991421228931, iteration: 57058
loss: 0.9976479411125183,grad_norm: 0.9999991111907681, iteration: 57059
loss: 0.9796640276908875,grad_norm: 0.9999991677404431, iteration: 57060
loss: 0.9858768582344055,grad_norm: 0.9774208167081962, iteration: 57061
loss: 0.9653134942054749,grad_norm: 0.9999990900553652, iteration: 57062
loss: 0.9486172199249268,grad_norm: 0.9999990414904915, iteration: 57063
loss: 1.0081830024719238,grad_norm: 0.9999994104365842, iteration: 57064
loss: 0.9863211512565613,grad_norm: 0.9999992939722693, iteration: 57065
loss: 0.9800141453742981,grad_norm: 0.9999989468053577, iteration: 57066
loss: 1.0045983791351318,grad_norm: 0.8902270380341638, iteration: 57067
loss: 1.0172858238220215,grad_norm: 0.9999997360180447, iteration: 57068
loss: 1.0023466348648071,grad_norm: 0.9999995273631689, iteration: 57069
loss: 1.000495195388794,grad_norm: 0.9174261636427742, iteration: 57070
loss: 0.9687691926956177,grad_norm: 0.9999990751784514, iteration: 57071
loss: 0.9816818237304688,grad_norm: 0.9999991237861185, iteration: 57072
loss: 1.0260926485061646,grad_norm: 0.9534191336148431, iteration: 57073
loss: 0.9980275630950928,grad_norm: 0.9999993948513656, iteration: 57074
loss: 0.9543324112892151,grad_norm: 0.9618337191626074, iteration: 57075
loss: 1.0087213516235352,grad_norm: 0.9805411655555334, iteration: 57076
loss: 0.9933984875679016,grad_norm: 0.9646805252037779, iteration: 57077
loss: 0.989221453666687,grad_norm: 0.976859507231748, iteration: 57078
loss: 0.9560200572013855,grad_norm: 0.9999990913310822, iteration: 57079
loss: 1.0132051706314087,grad_norm: 0.9999989629241337, iteration: 57080
loss: 0.9775539636611938,grad_norm: 0.9999989475197293, iteration: 57081
loss: 0.9700794816017151,grad_norm: 0.9150786795609361, iteration: 57082
loss: 0.9828688502311707,grad_norm: 0.9999988687856705, iteration: 57083
loss: 1.0107829570770264,grad_norm: 0.999999140981387, iteration: 57084
loss: 1.0038878917694092,grad_norm: 0.9999991503915484, iteration: 57085
loss: 1.0321508646011353,grad_norm: 0.9279724483727216, iteration: 57086
loss: 1.0255299806594849,grad_norm: 0.9999993502132336, iteration: 57087
loss: 1.0008891820907593,grad_norm: 0.8907350108916181, iteration: 57088
loss: 0.9908842444419861,grad_norm: 0.9999990944468644, iteration: 57089
loss: 0.9866949319839478,grad_norm: 0.9555925499585926, iteration: 57090
loss: 0.9819561839103699,grad_norm: 0.999999218995154, iteration: 57091
loss: 1.0131338834762573,grad_norm: 0.9999989815155519, iteration: 57092
loss: 0.9856711626052856,grad_norm: 0.9999991393029325, iteration: 57093
loss: 0.9652946591377258,grad_norm: 0.9999991962483818, iteration: 57094
loss: 1.0043692588806152,grad_norm: 0.9999993561544974, iteration: 57095
loss: 1.014689564704895,grad_norm: 0.9276448843699026, iteration: 57096
loss: 0.9855692982673645,grad_norm: 0.9999992659446255, iteration: 57097
loss: 1.0196248292922974,grad_norm: 0.9999992371021429, iteration: 57098
loss: 1.012940526008606,grad_norm: 0.999999312380876, iteration: 57099
loss: 0.9965453743934631,grad_norm: 0.9338053376034231, iteration: 57100
loss: 0.9675108194351196,grad_norm: 0.9315685048856233, iteration: 57101
loss: 0.9665758013725281,grad_norm: 0.9999989639492813, iteration: 57102
loss: 1.0060209035873413,grad_norm: 0.9999991944296034, iteration: 57103
loss: 0.97982257604599,grad_norm: 0.8308494632324502, iteration: 57104
loss: 1.0055429935455322,grad_norm: 0.940220966640444, iteration: 57105
loss: 0.9945007562637329,grad_norm: 0.9999991283913515, iteration: 57106
loss: 1.0161176919937134,grad_norm: 0.9999991834093231, iteration: 57107
loss: 1.0098209381103516,grad_norm: 0.9999993916248218, iteration: 57108
loss: 0.9692392945289612,grad_norm: 0.9999992028645144, iteration: 57109
loss: 1.0515801906585693,grad_norm: 0.9999990267337703, iteration: 57110
loss: 0.9868839979171753,grad_norm: 0.911481811304494, iteration: 57111
loss: 1.040868878364563,grad_norm: 0.999999045797852, iteration: 57112
loss: 1.0508323907852173,grad_norm: 0.9999991050378825, iteration: 57113
loss: 0.9956094622612,grad_norm: 0.8864083901207538, iteration: 57114
loss: 0.9960227012634277,grad_norm: 0.9999997810666484, iteration: 57115
loss: 1.0252996683120728,grad_norm: 0.9999991724704386, iteration: 57116
loss: 0.9957001209259033,grad_norm: 0.9999996057009202, iteration: 57117
loss: 1.002016305923462,grad_norm: 0.9774547275926341, iteration: 57118
loss: 1.0331424474716187,grad_norm: 0.9999991824227562, iteration: 57119
loss: 0.9857545495033264,grad_norm: 0.9999991662026151, iteration: 57120
loss: 0.9959417581558228,grad_norm: 0.9999991323088678, iteration: 57121
loss: 1.0066717863082886,grad_norm: 0.9999990466827501, iteration: 57122
loss: 1.0031518936157227,grad_norm: 0.9999990649569412, iteration: 57123
loss: 1.0199640989303589,grad_norm: 0.9999991909915078, iteration: 57124
loss: 1.0122780799865723,grad_norm: 0.9351070825351926, iteration: 57125
loss: 1.0011570453643799,grad_norm: 0.9999989886747143, iteration: 57126
loss: 0.9728108644485474,grad_norm: 0.9999991289303509, iteration: 57127
loss: 0.98675936460495,grad_norm: 0.9999991846650604, iteration: 57128
loss: 1.0102661848068237,grad_norm: 0.9999992669304466, iteration: 57129
loss: 1.0304787158966064,grad_norm: 0.9701626349669505, iteration: 57130
loss: 0.9675117135047913,grad_norm: 0.8958503358620349, iteration: 57131
loss: 1.0178885459899902,grad_norm: 0.9999990381113545, iteration: 57132
loss: 1.0113797187805176,grad_norm: 0.947134085766262, iteration: 57133
loss: 0.9770799279212952,grad_norm: 0.9999991015500086, iteration: 57134
loss: 1.0082517862319946,grad_norm: 0.9999991616685658, iteration: 57135
loss: 1.0030245780944824,grad_norm: 0.999999091834027, iteration: 57136
loss: 1.012694239616394,grad_norm: 0.9999993911104575, iteration: 57137
loss: 1.091347575187683,grad_norm: 0.9999994070393546, iteration: 57138
loss: 0.9919614195823669,grad_norm: 0.9999991344549798, iteration: 57139
loss: 1.0021870136260986,grad_norm: 0.9999990011449769, iteration: 57140
loss: 0.9680851697921753,grad_norm: 0.9999991703945408, iteration: 57141
loss: 1.0511666536331177,grad_norm: 0.9999994576324507, iteration: 57142
loss: 0.9963974356651306,grad_norm: 0.9554502997410427, iteration: 57143
loss: 0.9941927194595337,grad_norm: 0.9999989957069234, iteration: 57144
loss: 0.9933616518974304,grad_norm: 0.9999991200009121, iteration: 57145
loss: 1.0277073383331299,grad_norm: 0.9999993416343441, iteration: 57146
loss: 1.0438714027404785,grad_norm: 0.9999991146109732, iteration: 57147
loss: 0.9940868020057678,grad_norm: 0.9910467771701319, iteration: 57148
loss: 1.0041148662567139,grad_norm: 0.9626569071662411, iteration: 57149
loss: 0.9938079714775085,grad_norm: 0.9793727280176887, iteration: 57150
loss: 0.996379017829895,grad_norm: 0.999999025816776, iteration: 57151
loss: 1.0202782154083252,grad_norm: 0.9999989639027786, iteration: 57152
loss: 1.0039911270141602,grad_norm: 0.9547551440350434, iteration: 57153
loss: 1.0010170936584473,grad_norm: 0.9562702676116765, iteration: 57154
loss: 1.0099639892578125,grad_norm: 0.9999991814475309, iteration: 57155
loss: 1.0001351833343506,grad_norm: 0.9411455894322096, iteration: 57156
loss: 1.0066719055175781,grad_norm: 0.9999994766924495, iteration: 57157
loss: 1.0342881679534912,grad_norm: 0.9999991577724546, iteration: 57158
loss: 1.023494839668274,grad_norm: 0.9999991139450463, iteration: 57159
loss: 1.0066509246826172,grad_norm: 0.9999993161643187, iteration: 57160
loss: 1.034033179283142,grad_norm: 0.9999991403068161, iteration: 57161
loss: 0.9865636229515076,grad_norm: 0.8556050429049031, iteration: 57162
loss: 0.9993736743927002,grad_norm: 0.8965726493886366, iteration: 57163
loss: 1.0133522748947144,grad_norm: 0.9999990611352803, iteration: 57164
loss: 1.0434861183166504,grad_norm: 0.9999990864715252, iteration: 57165
loss: 1.0343947410583496,grad_norm: 0.9276680252607034, iteration: 57166
loss: 1.0065585374832153,grad_norm: 0.9999990989813308, iteration: 57167
loss: 1.009286642074585,grad_norm: 0.9999991988272988, iteration: 57168
loss: 1.0080381631851196,grad_norm: 0.9999990152797951, iteration: 57169
loss: 1.008690595626831,grad_norm: 0.9179531586644046, iteration: 57170
loss: 1.0224334001541138,grad_norm: 0.9999993059387532, iteration: 57171
loss: 0.9845852255821228,grad_norm: 0.9999993200342582, iteration: 57172
loss: 1.0579638481140137,grad_norm: 0.9999991441641786, iteration: 57173
loss: 1.014390468597412,grad_norm: 0.8092505687693228, iteration: 57174
loss: 1.0428677797317505,grad_norm: 0.999999586695022, iteration: 57175
loss: 0.9931964874267578,grad_norm: 0.9999991269731238, iteration: 57176
loss: 1.0700562000274658,grad_norm: 0.9999995665380615, iteration: 57177
loss: 1.003972053527832,grad_norm: 0.9298600927575035, iteration: 57178
loss: 1.0283681154251099,grad_norm: 0.9999998483202143, iteration: 57179
loss: 1.0062657594680786,grad_norm: 0.9898463279406334, iteration: 57180
loss: 0.9934129118919373,grad_norm: 0.9999993170733009, iteration: 57181
loss: 0.994361937046051,grad_norm: 0.9525477445928556, iteration: 57182
loss: 1.0398191213607788,grad_norm: 0.9999990846768854, iteration: 57183
loss: 1.013764500617981,grad_norm: 0.8660470841812716, iteration: 57184
loss: 0.9411130547523499,grad_norm: 0.9999989371237575, iteration: 57185
loss: 0.9858144521713257,grad_norm: 0.9999991682116882, iteration: 57186
loss: 0.9751659631729126,grad_norm: 0.9116973585714052, iteration: 57187
loss: 1.0292481184005737,grad_norm: 0.9999993001889294, iteration: 57188
loss: 0.9756345748901367,grad_norm: 0.9999991117686183, iteration: 57189
loss: 1.016891598701477,grad_norm: 0.938636013344287, iteration: 57190
loss: 1.0078262090682983,grad_norm: 0.9253344449755685, iteration: 57191
loss: 0.9827950596809387,grad_norm: 0.9999990027148686, iteration: 57192
loss: 0.999878466129303,grad_norm: 0.9999991229621984, iteration: 57193
loss: 1.0105305910110474,grad_norm: 0.9999991047068528, iteration: 57194
loss: 0.9684416055679321,grad_norm: 0.9999991883817675, iteration: 57195
loss: 0.9779918789863586,grad_norm: 0.9999992521887326, iteration: 57196
loss: 0.9864118099212646,grad_norm: 0.9999990209039319, iteration: 57197
loss: 0.9755909442901611,grad_norm: 0.882545186128292, iteration: 57198
loss: 0.9904177784919739,grad_norm: 0.9999990799010807, iteration: 57199
loss: 1.0156383514404297,grad_norm: 0.9110483280545886, iteration: 57200
loss: 0.9980993866920471,grad_norm: 0.9442878067883425, iteration: 57201
loss: 1.067117691040039,grad_norm: 0.9999997657766557, iteration: 57202
loss: 1.0202147960662842,grad_norm: 0.9999990191591234, iteration: 57203
loss: 1.0230646133422852,grad_norm: 0.9519688738556673, iteration: 57204
loss: 1.077430009841919,grad_norm: 0.9999996254736994, iteration: 57205
loss: 0.989578902721405,grad_norm: 0.9812026735131714, iteration: 57206
loss: 0.9696174263954163,grad_norm: 0.964147867175539, iteration: 57207
loss: 0.9986307621002197,grad_norm: 0.84930919036879, iteration: 57208
loss: 0.9971376061439514,grad_norm: 0.9999990827075353, iteration: 57209
loss: 1.005226969718933,grad_norm: 0.999999652958021, iteration: 57210
loss: 1.0319138765335083,grad_norm: 0.9999990963374693, iteration: 57211
loss: 1.0325144529342651,grad_norm: 0.999999587630035, iteration: 57212
loss: 0.9878377318382263,grad_norm: 0.9209931595615328, iteration: 57213
loss: 0.9974493384361267,grad_norm: 0.9999991600506859, iteration: 57214
loss: 1.0090233087539673,grad_norm: 0.9999991490096374, iteration: 57215
loss: 1.0491362810134888,grad_norm: 0.9999995360775615, iteration: 57216
loss: 0.9833168983459473,grad_norm: 0.9998859037605616, iteration: 57217
loss: 1.038317322731018,grad_norm: 0.9999997402284353, iteration: 57218
loss: 0.96983402967453,grad_norm: 0.9999991712200827, iteration: 57219
loss: 0.9945600628852844,grad_norm: 0.954104480027971, iteration: 57220
loss: 1.1575711965560913,grad_norm: 0.9999997953769233, iteration: 57221
loss: 1.078770637512207,grad_norm: 0.9999990634905302, iteration: 57222
loss: 1.0554769039154053,grad_norm: 0.9999989722611433, iteration: 57223
loss: 0.9662484526634216,grad_norm: 0.9999991308584342, iteration: 57224
loss: 1.053520679473877,grad_norm: 0.9999994097902946, iteration: 57225
loss: 1.0226163864135742,grad_norm: 0.8984355160141201, iteration: 57226
loss: 0.983413815498352,grad_norm: 0.9999990176119105, iteration: 57227
loss: 1.0587846040725708,grad_norm: 0.9999998230907168, iteration: 57228
loss: 0.9889332056045532,grad_norm: 0.9999991495441559, iteration: 57229
loss: 1.03447425365448,grad_norm: 0.999999084826283, iteration: 57230
loss: 0.9893374443054199,grad_norm: 0.9999992286463638, iteration: 57231
loss: 1.1080834865570068,grad_norm: 0.999999551302366, iteration: 57232
loss: 1.0043829679489136,grad_norm: 0.9999994933702195, iteration: 57233
loss: 1.0329062938690186,grad_norm: 0.9999991163960846, iteration: 57234
loss: 1.0034011602401733,grad_norm: 0.9999993616077152, iteration: 57235
loss: 1.010115146636963,grad_norm: 0.8144474229627835, iteration: 57236
loss: 1.0044586658477783,grad_norm: 0.9904985382921806, iteration: 57237
loss: 1.0150859355926514,grad_norm: 0.9999990912479253, iteration: 57238
loss: 1.0491160154342651,grad_norm: 0.9999993114207015, iteration: 57239
loss: 0.9795929193496704,grad_norm: 0.9999989955004085, iteration: 57240
loss: 1.0338929891586304,grad_norm: 0.9999997039060496, iteration: 57241
loss: 0.9958696961402893,grad_norm: 0.9625047078497364, iteration: 57242
loss: 1.0455020666122437,grad_norm: 0.9999996985503734, iteration: 57243
loss: 1.0581966638565063,grad_norm: 0.9999999216432933, iteration: 57244
loss: 0.9905813932418823,grad_norm: 0.999999143951281, iteration: 57245
loss: 1.013672113418579,grad_norm: 0.9999991847947926, iteration: 57246
loss: 1.0165847539901733,grad_norm: 0.9999989952684026, iteration: 57247
loss: 0.9781692028045654,grad_norm: 0.9999989678886888, iteration: 57248
loss: 0.9650992155075073,grad_norm: 0.9999991081102815, iteration: 57249
loss: 1.0647211074829102,grad_norm: 0.9999996044108801, iteration: 57250
loss: 1.0080485343933105,grad_norm: 0.9694261982064244, iteration: 57251
loss: 0.9898287057876587,grad_norm: 0.9999992027921432, iteration: 57252
loss: 0.999992311000824,grad_norm: 0.9639451579675558, iteration: 57253
loss: 1.039221167564392,grad_norm: 0.9999990076968804, iteration: 57254
loss: 1.0053746700286865,grad_norm: 0.8558858110643892, iteration: 57255
loss: 1.0230507850646973,grad_norm: 0.9999990523653044, iteration: 57256
loss: 0.9879981875419617,grad_norm: 0.9843778116642175, iteration: 57257
loss: 1.0457185506820679,grad_norm: 0.9343254638461852, iteration: 57258
loss: 0.9991228580474854,grad_norm: 0.9999990784074391, iteration: 57259
loss: 1.019129991531372,grad_norm: 0.9994446472802878, iteration: 57260
loss: 1.0095072984695435,grad_norm: 0.9999992470099399, iteration: 57261
loss: 1.0025347471237183,grad_norm: 0.9295519998603174, iteration: 57262
loss: 0.9891977906227112,grad_norm: 0.9999991638381031, iteration: 57263
loss: 0.9793049097061157,grad_norm: 0.9999992701849243, iteration: 57264
loss: 1.0671827793121338,grad_norm: 0.9999992350395834, iteration: 57265
loss: 1.0010316371917725,grad_norm: 0.9285888043170882, iteration: 57266
loss: 1.0077769756317139,grad_norm: 0.9281262194927986, iteration: 57267
loss: 1.0119608640670776,grad_norm: 0.9999993159323775, iteration: 57268
loss: 0.9840378165245056,grad_norm: 0.9999991115720347, iteration: 57269
loss: 0.9841147661209106,grad_norm: 0.950483118379389, iteration: 57270
loss: 0.9990778565406799,grad_norm: 0.9999991896697075, iteration: 57271
loss: 1.014622688293457,grad_norm: 0.9999990725454908, iteration: 57272
loss: 1.0164090394973755,grad_norm: 0.9999992614883642, iteration: 57273
loss: 0.9860199689865112,grad_norm: 0.966126631647461, iteration: 57274
loss: 1.0217726230621338,grad_norm: 0.9999996046084382, iteration: 57275
loss: 1.0033270120620728,grad_norm: 0.9017031766033563, iteration: 57276
loss: 1.0088372230529785,grad_norm: 0.8284854905984508, iteration: 57277
loss: 0.9665376543998718,grad_norm: 0.9999993475754958, iteration: 57278
loss: 0.9886410236358643,grad_norm: 0.9039652440199977, iteration: 57279
loss: 1.009277582168579,grad_norm: 0.999999850521857, iteration: 57280
loss: 1.0309290885925293,grad_norm: 0.9635094986658104, iteration: 57281
loss: 1.012351632118225,grad_norm: 0.9999993661624801, iteration: 57282
loss: 1.0686676502227783,grad_norm: 0.9999993878408678, iteration: 57283
loss: 0.9716135859489441,grad_norm: 0.9999990125510813, iteration: 57284
loss: 0.9931972026824951,grad_norm: 0.9999991407696301, iteration: 57285
loss: 1.075133204460144,grad_norm: 0.9999997815514234, iteration: 57286
loss: 0.9814710021018982,grad_norm: 0.8825178980065905, iteration: 57287
loss: 1.1072802543640137,grad_norm: 0.999999120809716, iteration: 57288
loss: 0.9726797342300415,grad_norm: 0.9999991500841632, iteration: 57289
loss: 1.0057241916656494,grad_norm: 0.9999989949776051, iteration: 57290
loss: 1.002378225326538,grad_norm: 0.8936067942934001, iteration: 57291
loss: 1.0032984018325806,grad_norm: 0.999999081512369, iteration: 57292
loss: 1.0662648677825928,grad_norm: 0.9999998415865213, iteration: 57293
loss: 0.9937137365341187,grad_norm: 0.9999990387209382, iteration: 57294
loss: 1.0050323009490967,grad_norm: 0.9999994118319295, iteration: 57295
loss: 0.9698560833930969,grad_norm: 0.977431845163124, iteration: 57296
loss: 1.0448949337005615,grad_norm: 0.9999991243082086, iteration: 57297
loss: 0.9699575304985046,grad_norm: 0.9999991408885972, iteration: 57298
loss: 1.0050194263458252,grad_norm: 0.9147164015925542, iteration: 57299
loss: 1.0550707578659058,grad_norm: 0.9999996444996649, iteration: 57300
loss: 1.017384648323059,grad_norm: 0.9999991890322865, iteration: 57301
loss: 1.0071303844451904,grad_norm: 0.9999990955742276, iteration: 57302
loss: 1.0195422172546387,grad_norm: 0.9999989983135666, iteration: 57303
loss: 0.9955105781555176,grad_norm: 0.9655993770985826, iteration: 57304
loss: 0.9543975591659546,grad_norm: 0.9084124417753884, iteration: 57305
loss: 0.9935911297798157,grad_norm: 0.8616745790333394, iteration: 57306
loss: 1.0131986141204834,grad_norm: 0.8449867473347494, iteration: 57307
loss: 0.9826048016548157,grad_norm: 0.9826009167224763, iteration: 57308
loss: 0.9791805744171143,grad_norm: 0.9590116572166211, iteration: 57309
loss: 0.9750733971595764,grad_norm: 0.9999994311423365, iteration: 57310
loss: 1.023808479309082,grad_norm: 0.9999990355579533, iteration: 57311
loss: 1.0256457328796387,grad_norm: 0.9999990517246745, iteration: 57312
loss: 0.9757571220397949,grad_norm: 0.9999990359707678, iteration: 57313
loss: 1.034595012664795,grad_norm: 0.999999397486112, iteration: 57314
loss: 0.9878277778625488,grad_norm: 0.999999174898632, iteration: 57315
loss: 1.0106008052825928,grad_norm: 0.9999991461775092, iteration: 57316
loss: 1.0208772420883179,grad_norm: 0.9999992406678169, iteration: 57317
loss: 1.0021648406982422,grad_norm: 0.99999911191292, iteration: 57318
loss: 1.0148639678955078,grad_norm: 0.99999929225008, iteration: 57319
loss: 1.0102975368499756,grad_norm: 0.8037571890143245, iteration: 57320
loss: 0.9946263432502747,grad_norm: 0.9999992557852148, iteration: 57321
loss: 1.0175726413726807,grad_norm: 0.974365174017418, iteration: 57322
loss: 0.9947130084037781,grad_norm: 0.9999991497569077, iteration: 57323
loss: 1.0063178539276123,grad_norm: 0.9999991811649416, iteration: 57324
loss: 0.9983324408531189,grad_norm: 0.999999312794666, iteration: 57325
loss: 1.0019419193267822,grad_norm: 0.9999992712460655, iteration: 57326
loss: 1.0149039030075073,grad_norm: 0.9546376799780776, iteration: 57327
loss: 1.0161323547363281,grad_norm: 0.9999992228821382, iteration: 57328
loss: 0.9773122072219849,grad_norm: 0.9973157655037598, iteration: 57329
loss: 1.0006533861160278,grad_norm: 0.9315736153750257, iteration: 57330
loss: 1.0008195638656616,grad_norm: 0.8995459296795935, iteration: 57331
loss: 0.9810911417007446,grad_norm: 0.9999991880989357, iteration: 57332
loss: 0.9948378801345825,grad_norm: 0.9873613359377196, iteration: 57333
loss: 1.0008691549301147,grad_norm: 0.9999994510290956, iteration: 57334
loss: 0.9742415547370911,grad_norm: 0.9999991918383706, iteration: 57335
loss: 1.0001821517944336,grad_norm: 0.9485492770305528, iteration: 57336
loss: 0.9797374606132507,grad_norm: 0.9999990947042512, iteration: 57337
loss: 1.0292212963104248,grad_norm: 0.9999991345489534, iteration: 57338
loss: 0.9964779615402222,grad_norm: 0.857714914660562, iteration: 57339
loss: 1.000687599182129,grad_norm: 0.9645762444994102, iteration: 57340
loss: 0.9960162043571472,grad_norm: 0.9999990349302958, iteration: 57341
loss: 1.0051809549331665,grad_norm: 0.9069914299799096, iteration: 57342
loss: 1.0368343591690063,grad_norm: 0.9999993996234732, iteration: 57343
loss: 1.0474259853363037,grad_norm: 0.999999917736047, iteration: 57344
loss: 0.9952048659324646,grad_norm: 0.9999991322670673, iteration: 57345
loss: 0.9717519283294678,grad_norm: 0.9593055406354513, iteration: 57346
loss: 0.9900701642036438,grad_norm: 0.9538374823051667, iteration: 57347
loss: 1.0009276866912842,grad_norm: 0.9999992946524262, iteration: 57348
loss: 1.0422425270080566,grad_norm: 0.999999038830988, iteration: 57349
loss: 1.0152348279953003,grad_norm: 0.9863631170118201, iteration: 57350
loss: 1.0287827253341675,grad_norm: 0.999999099307626, iteration: 57351
loss: 0.9813835620880127,grad_norm: 0.9999992829167731, iteration: 57352
loss: 1.0810632705688477,grad_norm: 0.9999995539618366, iteration: 57353
loss: 1.047565221786499,grad_norm: 0.9999991783087032, iteration: 57354
loss: 1.0181158781051636,grad_norm: 0.9999991115498723, iteration: 57355
loss: 1.0123199224472046,grad_norm: 0.9999994413812073, iteration: 57356
loss: 1.0239337682724,grad_norm: 0.9105336589327858, iteration: 57357
loss: 1.003443717956543,grad_norm: 0.9800315167053919, iteration: 57358
loss: 0.9871646761894226,grad_norm: 0.9999992842636003, iteration: 57359
loss: 0.989890456199646,grad_norm: 0.9857091016372683, iteration: 57360
loss: 0.9864388108253479,grad_norm: 0.8940247896014283, iteration: 57361
loss: 0.9903596639633179,grad_norm: 0.999999169074879, iteration: 57362
loss: 0.9971214532852173,grad_norm: 0.9999989320106469, iteration: 57363
loss: 0.9851980209350586,grad_norm: 0.9999991538981726, iteration: 57364
loss: 0.9845004081726074,grad_norm: 0.9999992638917684, iteration: 57365
loss: 0.9814051985740662,grad_norm: 0.9834153874435705, iteration: 57366
loss: 1.0157808065414429,grad_norm: 0.9775253417690616, iteration: 57367
loss: 1.0461333990097046,grad_norm: 0.9999991079984379, iteration: 57368
loss: 0.9719502925872803,grad_norm: 0.9999992018129343, iteration: 57369
loss: 1.039883017539978,grad_norm: 0.9999994148251685, iteration: 57370
loss: 1.010716438293457,grad_norm: 0.9999991145607543, iteration: 57371
loss: 0.9749115109443665,grad_norm: 0.9999992453476472, iteration: 57372
loss: 1.1003588438034058,grad_norm: 0.9999990807153103, iteration: 57373
loss: 1.0246405601501465,grad_norm: 0.9999989944435944, iteration: 57374
loss: 0.983697772026062,grad_norm: 0.8057471396136014, iteration: 57375
loss: 1.0368024110794067,grad_norm: 0.9795387263829041, iteration: 57376
loss: 1.0127145051956177,grad_norm: 0.9999991776330039, iteration: 57377
loss: 1.011949062347412,grad_norm: 0.909016113182669, iteration: 57378
loss: 1.0545780658721924,grad_norm: 0.9999990895623883, iteration: 57379
loss: 0.9854802489280701,grad_norm: 0.9999992005612308, iteration: 57380
loss: 1.0034185647964478,grad_norm: 0.9348335732268891, iteration: 57381
loss: 0.9725030660629272,grad_norm: 0.9672870731005576, iteration: 57382
loss: 0.9642908573150635,grad_norm: 0.9999991358600908, iteration: 57383
loss: 1.0256991386413574,grad_norm: 0.9999991337150398, iteration: 57384
loss: 1.0087743997573853,grad_norm: 0.9145012936781959, iteration: 57385
loss: 0.9662683010101318,grad_norm: 0.9477560217505862, iteration: 57386
loss: 1.3670778274536133,grad_norm: 0.9999998880180188, iteration: 57387
loss: 1.0098963975906372,grad_norm: 0.9492430964985514, iteration: 57388
loss: 0.9937216639518738,grad_norm: 0.958626053210752, iteration: 57389
loss: 1.0086475610733032,grad_norm: 0.9999995355640523, iteration: 57390
loss: 1.013075828552246,grad_norm: 0.9999991284124555, iteration: 57391
loss: 1.044710636138916,grad_norm: 0.9999991673594638, iteration: 57392
loss: 0.9699497818946838,grad_norm: 0.9626768002206708, iteration: 57393
loss: 1.0159327983856201,grad_norm: 0.9641209083042237, iteration: 57394
loss: 1.0367130041122437,grad_norm: 0.8965995069625612, iteration: 57395
loss: 1.0113921165466309,grad_norm: 0.8490571077532546, iteration: 57396
loss: 0.98795086145401,grad_norm: 0.9999991394155694, iteration: 57397
loss: 1.03642737865448,grad_norm: 0.9999991204495872, iteration: 57398
loss: 1.0275053977966309,grad_norm: 0.9732340867074347, iteration: 57399
loss: 0.9971699118614197,grad_norm: 0.9999992940960036, iteration: 57400
loss: 1.016640543937683,grad_norm: 0.9999992346290008, iteration: 57401
loss: 0.984845757484436,grad_norm: 0.9999989521815345, iteration: 57402
loss: 1.055163025856018,grad_norm: 0.9999991385074206, iteration: 57403
loss: 1.0162888765335083,grad_norm: 0.9999992611923391, iteration: 57404
loss: 1.0113850831985474,grad_norm: 0.9999992627371405, iteration: 57405
loss: 1.040187954902649,grad_norm: 0.9999990989636212, iteration: 57406
loss: 0.9953967332839966,grad_norm: 0.999999015172967, iteration: 57407
loss: 0.9700542092323303,grad_norm: 0.9999989769077589, iteration: 57408
loss: 1.003739595413208,grad_norm: 0.9999990493510077, iteration: 57409
loss: 1.0078290700912476,grad_norm: 0.9999993383289304, iteration: 57410
loss: 0.976811945438385,grad_norm: 0.9238580923855924, iteration: 57411
loss: 0.9713902473449707,grad_norm: 0.9999992455567498, iteration: 57412
loss: 1.026079773902893,grad_norm: 0.9999996092204925, iteration: 57413
loss: 0.9784602522850037,grad_norm: 0.9999989643855778, iteration: 57414
loss: 1.037493109703064,grad_norm: 0.9999993320551995, iteration: 57415
loss: 1.0030089616775513,grad_norm: 0.9219748155158024, iteration: 57416
loss: 0.9903576374053955,grad_norm: 0.9691521604115447, iteration: 57417
loss: 1.0402082204818726,grad_norm: 0.9534239479828195, iteration: 57418
loss: 0.9935330748558044,grad_norm: 0.9999990628649229, iteration: 57419
loss: 0.9968860149383545,grad_norm: 0.9999991217612444, iteration: 57420
loss: 1.0126051902770996,grad_norm: 0.9999990850189322, iteration: 57421
loss: 1.0073394775390625,grad_norm: 0.9136806955734177, iteration: 57422
loss: 1.0166535377502441,grad_norm: 0.9999992640659252, iteration: 57423
loss: 1.0232725143432617,grad_norm: 0.9727369005557881, iteration: 57424
loss: 1.0391660928726196,grad_norm: 0.9999991422946405, iteration: 57425
loss: 0.9676389098167419,grad_norm: 0.9999990611205457, iteration: 57426
loss: 1.008815050125122,grad_norm: 0.9737907141938013, iteration: 57427
loss: 1.0062694549560547,grad_norm: 0.9999990735899711, iteration: 57428
loss: 0.9812713861465454,grad_norm: 0.9615510819156577, iteration: 57429
loss: 0.983397901058197,grad_norm: 0.8514381210901438, iteration: 57430
loss: 0.984756350517273,grad_norm: 0.9999992783330344, iteration: 57431
loss: 1.028883695602417,grad_norm: 0.9999991875065437, iteration: 57432
loss: 1.003942608833313,grad_norm: 0.9999989721080232, iteration: 57433
loss: 1.0862995386123657,grad_norm: 0.9999996650930884, iteration: 57434
loss: 1.0257436037063599,grad_norm: 0.9989360185711196, iteration: 57435
loss: 0.9805323481559753,grad_norm: 0.9999990280530635, iteration: 57436
loss: 1.0048725605010986,grad_norm: 0.9999990323062022, iteration: 57437
loss: 1.0180155038833618,grad_norm: 0.9999992386365587, iteration: 57438
loss: 0.9809873700141907,grad_norm: 0.9999992612324234, iteration: 57439
loss: 1.0369346141815186,grad_norm: 0.9999991993059575, iteration: 57440
loss: 0.9917207956314087,grad_norm: 0.99999907477284, iteration: 57441
loss: 1.0158048868179321,grad_norm: 0.9999990259256337, iteration: 57442
loss: 1.0304356813430786,grad_norm: 0.9999990817950063, iteration: 57443
loss: 1.0024418830871582,grad_norm: 0.9999991426837956, iteration: 57444
loss: 0.9770107865333557,grad_norm: 0.9166195641303774, iteration: 57445
loss: 1.0135178565979004,grad_norm: 0.9999990005693996, iteration: 57446
loss: 1.0098382234573364,grad_norm: 0.9999990748623128, iteration: 57447
loss: 1.0008347034454346,grad_norm: 0.9999990581987065, iteration: 57448
loss: 1.0181807279586792,grad_norm: 0.9999992153166439, iteration: 57449
loss: 0.9985492825508118,grad_norm: 0.8640796468700116, iteration: 57450
loss: 1.0103625059127808,grad_norm: 0.9704518600122854, iteration: 57451
loss: 1.0349223613739014,grad_norm: 0.972122449947922, iteration: 57452
loss: 0.9762418270111084,grad_norm: 0.995267680055242, iteration: 57453
loss: 1.0219956636428833,grad_norm: 0.9217812116048844, iteration: 57454
loss: 1.0023382902145386,grad_norm: 0.9999990365007507, iteration: 57455
loss: 1.0004080533981323,grad_norm: 0.9999990956377789, iteration: 57456
loss: 0.991311252117157,grad_norm: 0.9999991761954748, iteration: 57457
loss: 0.9911903738975525,grad_norm: 0.9878015338997469, iteration: 57458
loss: 1.0105996131896973,grad_norm: 0.9949058193831036, iteration: 57459
loss: 0.9461213946342468,grad_norm: 0.9999990423507109, iteration: 57460
loss: 1.0207628011703491,grad_norm: 0.9999991749950179, iteration: 57461
loss: 0.9722439646720886,grad_norm: 0.9270723013181472, iteration: 57462
loss: 0.9691579341888428,grad_norm: 0.9999991245456815, iteration: 57463
loss: 1.0245234966278076,grad_norm: 0.9999991374958256, iteration: 57464
loss: 1.019231915473938,grad_norm: 0.9999991238592557, iteration: 57465
loss: 0.9947522878646851,grad_norm: 0.9999991064634, iteration: 57466
loss: 1.0271432399749756,grad_norm: 0.9506608038763179, iteration: 57467
loss: 0.9900093674659729,grad_norm: 0.8728412843396619, iteration: 57468
loss: 0.9862387776374817,grad_norm: 0.942613962213876, iteration: 57469
loss: 1.0412946939468384,grad_norm: 0.9999989602877873, iteration: 57470
loss: 1.0008342266082764,grad_norm: 0.9225988044493009, iteration: 57471
loss: 1.0181434154510498,grad_norm: 0.9999990366989907, iteration: 57472
loss: 1.0255273580551147,grad_norm: 0.9999992022080022, iteration: 57473
loss: 1.0402659177780151,grad_norm: 0.9618155883723003, iteration: 57474
loss: 0.94487464427948,grad_norm: 0.9999990102064642, iteration: 57475
loss: 1.0185476541519165,grad_norm: 0.8847907161313993, iteration: 57476
loss: 1.0145219564437866,grad_norm: 0.9626368026413878, iteration: 57477
loss: 0.9429522752761841,grad_norm: 0.9999991529512982, iteration: 57478
loss: 0.9832401275634766,grad_norm: 0.9999992202970944, iteration: 57479
loss: 1.014487385749817,grad_norm: 0.9999991492902385, iteration: 57480
loss: 0.9846835136413574,grad_norm: 0.9999989764924265, iteration: 57481
loss: 0.9798070788383484,grad_norm: 0.9999992916197799, iteration: 57482
loss: 1.0294910669326782,grad_norm: 0.9999991713728883, iteration: 57483
loss: 1.0256983041763306,grad_norm: 0.9999990400231021, iteration: 57484
loss: 0.9876437187194824,grad_norm: 0.9999993021026679, iteration: 57485
loss: 0.9825727939605713,grad_norm: 0.9999992563977088, iteration: 57486
loss: 0.9726284146308899,grad_norm: 0.9999990320451713, iteration: 57487
loss: 1.0344960689544678,grad_norm: 0.9999990439981973, iteration: 57488
loss: 1.0171093940734863,grad_norm: 0.9999990590333528, iteration: 57489
loss: 0.9939771294593811,grad_norm: 0.9166251111434733, iteration: 57490
loss: 0.9892624020576477,grad_norm: 0.9999992964557687, iteration: 57491
loss: 1.0254883766174316,grad_norm: 0.997246175924099, iteration: 57492
loss: 1.0072758197784424,grad_norm: 0.8844864329382136, iteration: 57493
loss: 1.0006104707717896,grad_norm: 0.999999143016129, iteration: 57494
loss: 0.9954208135604858,grad_norm: 0.9999990485192345, iteration: 57495
loss: 1.0107053518295288,grad_norm: 0.9728015335765189, iteration: 57496
loss: 1.0231733322143555,grad_norm: 0.8444384435970511, iteration: 57497
loss: 1.0458935499191284,grad_norm: 0.9999993219880721, iteration: 57498
loss: 1.0560697317123413,grad_norm: 0.9999991501019693, iteration: 57499
loss: 0.987192690372467,grad_norm: 0.9080474784381262, iteration: 57500
loss: 1.0276895761489868,grad_norm: 0.9999990906138821, iteration: 57501
loss: 0.9872921705245972,grad_norm: 0.9999990890242788, iteration: 57502
loss: 0.9882319569587708,grad_norm: 0.9807793710158945, iteration: 57503
loss: 1.028412103652954,grad_norm: 0.9999990824286591, iteration: 57504
loss: 1.0154026746749878,grad_norm: 0.9999991414995165, iteration: 57505
loss: 1.0372428894042969,grad_norm: 0.9999990449162192, iteration: 57506
loss: 1.0061595439910889,grad_norm: 0.9999990986399423, iteration: 57507
loss: 0.9995607137680054,grad_norm: 0.9999991155447755, iteration: 57508
loss: 0.9928209781646729,grad_norm: 0.9999991693622728, iteration: 57509
loss: 0.9689200520515442,grad_norm: 0.9999991946138314, iteration: 57510
loss: 1.0092519521713257,grad_norm: 0.8605028064118085, iteration: 57511
loss: 0.9777191877365112,grad_norm: 0.970303904389382, iteration: 57512
loss: 0.9707216024398804,grad_norm: 0.9999991346426517, iteration: 57513
loss: 1.0111271142959595,grad_norm: 0.8901750438554665, iteration: 57514
loss: 0.9529289603233337,grad_norm: 0.9986527902062992, iteration: 57515
loss: 1.002905011177063,grad_norm: 0.9999991790253832, iteration: 57516
loss: 0.9657907485961914,grad_norm: 0.9999993938040809, iteration: 57517
loss: 0.9544553160667419,grad_norm: 0.9304947304907725, iteration: 57518
loss: 1.0240007638931274,grad_norm: 0.9315302820619893, iteration: 57519
loss: 0.993820071220398,grad_norm: 0.9999991991429666, iteration: 57520
loss: 0.9819982647895813,grad_norm: 0.9178127373928646, iteration: 57521
loss: 0.9900409579277039,grad_norm: 0.9999991623341752, iteration: 57522
loss: 1.0288641452789307,grad_norm: 0.9784813008865216, iteration: 57523
loss: 0.9829482436180115,grad_norm: 0.9877609086557946, iteration: 57524
loss: 0.986802339553833,grad_norm: 0.9999998090707077, iteration: 57525
loss: 1.005159616470337,grad_norm: 0.9999991810160027, iteration: 57526
loss: 1.04573392868042,grad_norm: 0.9999989067492366, iteration: 57527
loss: 1.012289047241211,grad_norm: 0.999999162524273, iteration: 57528
loss: 1.00596284866333,grad_norm: 0.999998959193701, iteration: 57529
loss: 1.0145102739334106,grad_norm: 0.9999990501998615, iteration: 57530
loss: 0.9906492233276367,grad_norm: 0.9999990855054611, iteration: 57531
loss: 1.015586018562317,grad_norm: 0.9999995592656249, iteration: 57532
loss: 0.9818631410598755,grad_norm: 0.9999991754793658, iteration: 57533
loss: 1.0211873054504395,grad_norm: 0.9999990487285202, iteration: 57534
loss: 1.035328984260559,grad_norm: 0.9999995727966218, iteration: 57535
loss: 0.9605160355567932,grad_norm: 0.9999992423030336, iteration: 57536
loss: 1.0351428985595703,grad_norm: 0.9569508257745795, iteration: 57537
loss: 1.0113954544067383,grad_norm: 0.9999995293638613, iteration: 57538
loss: 0.9818206429481506,grad_norm: 0.9999991303023618, iteration: 57539
loss: 0.9888100624084473,grad_norm: 0.9498617143971279, iteration: 57540
loss: 0.9734693765640259,grad_norm: 0.9999992605772824, iteration: 57541
loss: 1.0035533905029297,grad_norm: 0.9836521967790254, iteration: 57542
loss: 0.9701439738273621,grad_norm: 0.9999988683723043, iteration: 57543
loss: 1.0421615839004517,grad_norm: 0.9560048698357849, iteration: 57544
loss: 0.9965378046035767,grad_norm: 0.9522369449440328, iteration: 57545
loss: 1.0109018087387085,grad_norm: 0.9999990414937457, iteration: 57546
loss: 1.0192487239837646,grad_norm: 0.9999991743093553, iteration: 57547
loss: 1.0172325372695923,grad_norm: 0.9999991079447376, iteration: 57548
loss: 1.0067615509033203,grad_norm: 0.9999990311558611, iteration: 57549
loss: 1.0281351804733276,grad_norm: 0.9428780039899055, iteration: 57550
loss: 1.002705693244934,grad_norm: 0.9999993535777177, iteration: 57551
loss: 1.014000415802002,grad_norm: 0.9999992424337103, iteration: 57552
loss: 1.0099639892578125,grad_norm: 0.9727668594115229, iteration: 57553
loss: 0.9843412041664124,grad_norm: 0.9645673607897789, iteration: 57554
loss: 0.9862202405929565,grad_norm: 0.9907700810912239, iteration: 57555
loss: 0.982998251914978,grad_norm: 0.999999167155807, iteration: 57556
loss: 1.029129147529602,grad_norm: 0.8801570074812849, iteration: 57557
loss: 0.9670100212097168,grad_norm: 0.9957761307870336, iteration: 57558
loss: 0.9704697132110596,grad_norm: 0.9793143457574286, iteration: 57559
loss: 1.0084198713302612,grad_norm: 0.893243845501932, iteration: 57560
loss: 0.9960863590240479,grad_norm: 0.9999990956295345, iteration: 57561
loss: 1.0063371658325195,grad_norm: 0.999999192296124, iteration: 57562
loss: 0.9860652685165405,grad_norm: 0.9999991370101392, iteration: 57563
loss: 0.9960668683052063,grad_norm: 0.8804282783691325, iteration: 57564
loss: 1.030128836631775,grad_norm: 0.9999991065986993, iteration: 57565
loss: 1.0018491744995117,grad_norm: 0.9999992574162687, iteration: 57566
loss: 1.0062127113342285,grad_norm: 0.9956246837445303, iteration: 57567
loss: 1.017062783241272,grad_norm: 0.9189992044899403, iteration: 57568
loss: 1.0315552949905396,grad_norm: 0.9999989301068246, iteration: 57569
loss: 1.0388906002044678,grad_norm: 0.9999992386762176, iteration: 57570
loss: 0.979382336139679,grad_norm: 0.9999991551103731, iteration: 57571
loss: 0.9927991628646851,grad_norm: 0.9527685334823336, iteration: 57572
loss: 1.0107117891311646,grad_norm: 0.9633648957106131, iteration: 57573
loss: 0.9763063788414001,grad_norm: 0.9469705651441643, iteration: 57574
loss: 0.9854041337966919,grad_norm: 0.7361533549214433, iteration: 57575
loss: 1.0008617639541626,grad_norm: 0.9217991671870608, iteration: 57576
loss: 1.0287225246429443,grad_norm: 0.9999998845170732, iteration: 57577
loss: 1.02186918258667,grad_norm: 0.9999990787135706, iteration: 57578
loss: 0.9912189841270447,grad_norm: 0.7278485498649874, iteration: 57579
loss: 0.9694715142250061,grad_norm: 0.9999992270974147, iteration: 57580
loss: 1.0137643814086914,grad_norm: 0.9999990992554064, iteration: 57581
loss: 0.9875248074531555,grad_norm: 0.9999991710841644, iteration: 57582
loss: 1.0075238943099976,grad_norm: 0.9999995821058046, iteration: 57583
loss: 0.9968611598014832,grad_norm: 0.957943693788568, iteration: 57584
loss: 0.9810104370117188,grad_norm: 0.999999196624336, iteration: 57585
loss: 1.0144100189208984,grad_norm: 0.9999990020574923, iteration: 57586
loss: 0.9755833745002747,grad_norm: 0.9999991997279118, iteration: 57587
loss: 0.982997715473175,grad_norm: 0.9999989635687683, iteration: 57588
loss: 0.9804001450538635,grad_norm: 0.9530103593493972, iteration: 57589
loss: 1.009759783744812,grad_norm: 0.988768989435223, iteration: 57590
loss: 1.0009551048278809,grad_norm: 0.999999165240222, iteration: 57591
loss: 0.9689001441001892,grad_norm: 0.9834589958906824, iteration: 57592
loss: 0.9968489408493042,grad_norm: 0.9999991255063432, iteration: 57593
loss: 1.0462369918823242,grad_norm: 0.9999990223310504, iteration: 57594
loss: 1.0194017887115479,grad_norm: 0.9999991200877224, iteration: 57595
loss: 0.9683546423912048,grad_norm: 0.8524013865674134, iteration: 57596
loss: 0.9864729046821594,grad_norm: 0.947601796262995, iteration: 57597
loss: 1.0138500928878784,grad_norm: 0.999999128180332, iteration: 57598
loss: 1.0982917547225952,grad_norm: 0.9999996061159426, iteration: 57599
loss: 0.9617363214492798,grad_norm: 0.9999993025531408, iteration: 57600
loss: 0.9901860952377319,grad_norm: 0.9212666244441462, iteration: 57601
loss: 0.9746137857437134,grad_norm: 0.9875867005052145, iteration: 57602
loss: 0.9895867109298706,grad_norm: 0.9157176389411611, iteration: 57603
loss: 1.042649507522583,grad_norm: 0.9999997982170389, iteration: 57604
loss: 1.0176461935043335,grad_norm: 0.9999991183620954, iteration: 57605
loss: 1.028538703918457,grad_norm: 0.9999993120489006, iteration: 57606
loss: 1.0038145780563354,grad_norm: 0.9999990676656341, iteration: 57607
loss: 1.0168471336364746,grad_norm: 0.9999991693681652, iteration: 57608
loss: 0.9648162722587585,grad_norm: 0.9999990812163985, iteration: 57609
loss: 1.036059856414795,grad_norm: 0.9999993908256446, iteration: 57610
loss: 0.9971343874931335,grad_norm: 0.9999989917949478, iteration: 57611
loss: 0.9949304461479187,grad_norm: 0.9310392606356737, iteration: 57612
loss: 1.0529849529266357,grad_norm: 0.9999994750933447, iteration: 57613
loss: 1.0014208555221558,grad_norm: 0.880256982442376, iteration: 57614
loss: 0.9905033111572266,grad_norm: 0.9999991225039006, iteration: 57615
loss: 1.007849931716919,grad_norm: 0.8170555534043437, iteration: 57616
loss: 1.0160235166549683,grad_norm: 0.9357075761530323, iteration: 57617
loss: 1.0266122817993164,grad_norm: 0.9369151978976081, iteration: 57618
loss: 1.0327537059783936,grad_norm: 0.9999991867357272, iteration: 57619
loss: 0.9861767888069153,grad_norm: 0.9999989778494074, iteration: 57620
loss: 0.9729337096214294,grad_norm: 0.8874668003063181, iteration: 57621
loss: 1.017795205116272,grad_norm: 0.9999990482974295, iteration: 57622
loss: 0.9868161678314209,grad_norm: 0.9869185956136453, iteration: 57623
loss: 1.0119796991348267,grad_norm: 0.9973678134197823, iteration: 57624
loss: 0.9827749133110046,grad_norm: 0.999998988127579, iteration: 57625
loss: 0.9668175578117371,grad_norm: 0.9999992295541785, iteration: 57626
loss: 1.0301445722579956,grad_norm: 0.9349346018191422, iteration: 57627
loss: 0.9875050187110901,grad_norm: 0.9579327296881321, iteration: 57628
loss: 1.0152473449707031,grad_norm: 0.9920226458286252, iteration: 57629
loss: 1.0240979194641113,grad_norm: 0.9999997452893953, iteration: 57630
loss: 0.9993470311164856,grad_norm: 0.9707600758212318, iteration: 57631
loss: 1.0529282093048096,grad_norm: 0.9999992053094655, iteration: 57632
loss: 0.970353364944458,grad_norm: 0.9999990490186608, iteration: 57633
loss: 0.9987268447875977,grad_norm: 0.9999990113578815, iteration: 57634
loss: 0.9801679849624634,grad_norm: 0.9999991073730968, iteration: 57635
loss: 1.0218781232833862,grad_norm: 0.9999990190451394, iteration: 57636
loss: 0.9695733785629272,grad_norm: 0.9666483387696949, iteration: 57637
loss: 1.012481451034546,grad_norm: 0.9999990520078815, iteration: 57638
loss: 0.9806813597679138,grad_norm: 0.9999991634421351, iteration: 57639
loss: 1.017691731452942,grad_norm: 0.9999991381972826, iteration: 57640
loss: 1.0373343229293823,grad_norm: 0.900745280460436, iteration: 57641
loss: 1.0469118356704712,grad_norm: 0.9999989193013329, iteration: 57642
loss: 0.9939740896224976,grad_norm: 0.9999991598033006, iteration: 57643
loss: 0.9955065846443176,grad_norm: 0.9908493662452453, iteration: 57644
loss: 1.0218716859817505,grad_norm: 0.9999989806717912, iteration: 57645
loss: 1.0949790477752686,grad_norm: 0.9999996328217505, iteration: 57646
loss: 1.0029157400131226,grad_norm: 0.9650272396203116, iteration: 57647
loss: 1.00772225856781,grad_norm: 0.9999991033968832, iteration: 57648
loss: 0.9824828505516052,grad_norm: 0.8917756171331029, iteration: 57649
loss: 1.054007649421692,grad_norm: 0.9487733063117709, iteration: 57650
loss: 1.008227825164795,grad_norm: 0.9999997016813565, iteration: 57651
loss: 0.9889363050460815,grad_norm: 0.9999992411176887, iteration: 57652
loss: 0.9718091487884521,grad_norm: 0.9654581995106266, iteration: 57653
loss: 0.9890251159667969,grad_norm: 0.9999988991575035, iteration: 57654
loss: 1.034279227256775,grad_norm: 0.9660615127289557, iteration: 57655
loss: 1.021830439567566,grad_norm: 0.9999995431638921, iteration: 57656
loss: 1.0249305963516235,grad_norm: 0.9999991134479577, iteration: 57657
loss: 0.9909489154815674,grad_norm: 0.9999991634047206, iteration: 57658
loss: 1.012515902519226,grad_norm: 0.9947400315346684, iteration: 57659
loss: 1.0089545249938965,grad_norm: 0.9751714450243404, iteration: 57660
loss: 1.0139743089675903,grad_norm: 0.8425790604314016, iteration: 57661
loss: 1.0098179578781128,grad_norm: 0.9999992342990598, iteration: 57662
loss: 1.0157262086868286,grad_norm: 0.9999991965491759, iteration: 57663
loss: 1.0107184648513794,grad_norm: 0.9996973326839464, iteration: 57664
loss: 0.9959319233894348,grad_norm: 0.999999161694792, iteration: 57665
loss: 1.004132866859436,grad_norm: 0.9999991444345819, iteration: 57666
loss: 1.0133874416351318,grad_norm: 0.9999991935309335, iteration: 57667
loss: 1.036544919013977,grad_norm: 0.8914711612180141, iteration: 57668
loss: 0.9765318036079407,grad_norm: 0.89750297801086, iteration: 57669
loss: 1.0199627876281738,grad_norm: 0.9999991785029045, iteration: 57670
loss: 1.0094162225723267,grad_norm: 0.9963815994401459, iteration: 57671
loss: 0.998332679271698,grad_norm: 0.9999991785823338, iteration: 57672
loss: 1.0523849725723267,grad_norm: 0.9999992209736943, iteration: 57673
loss: 1.0094563961029053,grad_norm: 0.9246239056683287, iteration: 57674
loss: 1.0479363203048706,grad_norm: 0.9999993134900139, iteration: 57675
loss: 1.0751123428344727,grad_norm: 0.9999994082583311, iteration: 57676
loss: 1.0003427267074585,grad_norm: 0.9999990861446603, iteration: 57677
loss: 1.0233217477798462,grad_norm: 0.9999993059273267, iteration: 57678
loss: 1.0363283157348633,grad_norm: 0.928542267897231, iteration: 57679
loss: 0.9676819443702698,grad_norm: 0.9707127991894549, iteration: 57680
loss: 0.9952812194824219,grad_norm: 0.8880579083009823, iteration: 57681
loss: 0.9878376722335815,grad_norm: 0.9999990137906173, iteration: 57682
loss: 1.011913537979126,grad_norm: 0.9999991741391865, iteration: 57683
loss: 0.9803881049156189,grad_norm: 0.9999991094569705, iteration: 57684
loss: 1.0076924562454224,grad_norm: 0.9999991814566515, iteration: 57685
loss: 0.9981199502944946,grad_norm: 0.9999991955018238, iteration: 57686
loss: 0.9564032554626465,grad_norm: 0.999999182550949, iteration: 57687
loss: 1.0339930057525635,grad_norm: 0.9999991119041765, iteration: 57688
loss: 1.0438603162765503,grad_norm: 0.9999990754916358, iteration: 57689
loss: 0.9915872812271118,grad_norm: 0.9999998940763205, iteration: 57690
loss: 0.9971024394035339,grad_norm: 0.9999990655669468, iteration: 57691
loss: 1.0263484716415405,grad_norm: 0.9999991218159039, iteration: 57692
loss: 0.9991587400436401,grad_norm: 0.9999989783706037, iteration: 57693
loss: 0.9995999336242676,grad_norm: 0.9024977311825086, iteration: 57694
loss: 1.0367226600646973,grad_norm: 0.8763849313316722, iteration: 57695
loss: 0.9903602600097656,grad_norm: 0.9999993476456225, iteration: 57696
loss: 0.9909630417823792,grad_norm: 0.8628058168474715, iteration: 57697
loss: 0.9907825589179993,grad_norm: 0.9449390482050185, iteration: 57698
loss: 0.9583002924919128,grad_norm: 0.9999990865601412, iteration: 57699
loss: 0.9973852634429932,grad_norm: 0.931915482814808, iteration: 57700
loss: 1.0706219673156738,grad_norm: 0.9999995164094448, iteration: 57701
loss: 1.0231316089630127,grad_norm: 0.9769998902990266, iteration: 57702
loss: 1.0079841613769531,grad_norm: 0.9426149286685095, iteration: 57703
loss: 1.0202833414077759,grad_norm: 0.9999994335720597, iteration: 57704
loss: 1.0282371044158936,grad_norm: 0.9272358624466397, iteration: 57705
loss: 1.0113252401351929,grad_norm: 0.9999990248170428, iteration: 57706
loss: 0.981665313243866,grad_norm: 0.9157947654480306, iteration: 57707
loss: 1.1327464580535889,grad_norm: 0.9999992104405673, iteration: 57708
loss: 1.0041906833648682,grad_norm: 0.9562705267284065, iteration: 57709
loss: 1.0130926370620728,grad_norm: 0.999999251069542, iteration: 57710
loss: 1.0163755416870117,grad_norm: 0.9736556741991125, iteration: 57711
loss: 0.9914161562919617,grad_norm: 0.9999996699836746, iteration: 57712
loss: 0.9462791681289673,grad_norm: 0.9999991208725423, iteration: 57713
loss: 1.0012564659118652,grad_norm: 0.9999991169333062, iteration: 57714
loss: 0.9858043193817139,grad_norm: 0.9339253644314092, iteration: 57715
loss: 1.000903844833374,grad_norm: 0.9999989842910749, iteration: 57716
loss: 1.1955293416976929,grad_norm: 0.9999999570229988, iteration: 57717
loss: 0.9749947786331177,grad_norm: 0.871596326116596, iteration: 57718
loss: 1.0459569692611694,grad_norm: 0.9999992729242274, iteration: 57719
loss: 1.0079954862594604,grad_norm: 0.9929660025559253, iteration: 57720
loss: 1.0126731395721436,grad_norm: 0.9179198920314948, iteration: 57721
loss: 0.9991486072540283,grad_norm: 0.9999991797983018, iteration: 57722
loss: 0.9872688055038452,grad_norm: 0.9326614589075511, iteration: 57723
loss: 1.1112157106399536,grad_norm: 0.9999995755773162, iteration: 57724
loss: 0.9798586368560791,grad_norm: 0.9999989421058717, iteration: 57725
loss: 0.9780229330062866,grad_norm: 0.9636560575447682, iteration: 57726
loss: 1.001205563545227,grad_norm: 0.9999990401446298, iteration: 57727
loss: 1.0205304622650146,grad_norm: 0.8883808438771016, iteration: 57728
loss: 1.0202635526657104,grad_norm: 0.9999992603186281, iteration: 57729
loss: 0.9664480090141296,grad_norm: 0.9172032091481633, iteration: 57730
loss: 1.0203207731246948,grad_norm: 0.9999989898843042, iteration: 57731
loss: 1.0356473922729492,grad_norm: 0.976402733332349, iteration: 57732
loss: 1.0019086599349976,grad_norm: 0.9999991551467378, iteration: 57733
loss: 1.0053088665008545,grad_norm: 0.9999990818734187, iteration: 57734
loss: 1.0087231397628784,grad_norm: 0.9999991456884603, iteration: 57735
loss: 1.0332242250442505,grad_norm: 0.9999991564689106, iteration: 57736
loss: 1.0214675664901733,grad_norm: 0.8887947045658116, iteration: 57737
loss: 0.9842935800552368,grad_norm: 0.9999992777598041, iteration: 57738
loss: 0.9954812526702881,grad_norm: 0.9999990978430408, iteration: 57739
loss: 1.0507296323776245,grad_norm: 0.9999992248052954, iteration: 57740
loss: 0.9911535382270813,grad_norm: 0.9999990105111722, iteration: 57741
loss: 1.0167567729949951,grad_norm: 0.9737014138740098, iteration: 57742
loss: 0.955479085445404,grad_norm: 0.8963858094225389, iteration: 57743
loss: 0.9749882221221924,grad_norm: 0.999999249686354, iteration: 57744
loss: 1.0024503469467163,grad_norm: 0.999999131833278, iteration: 57745
loss: 0.98453688621521,grad_norm: 0.9999991845545412, iteration: 57746
loss: 1.016249656677246,grad_norm: 0.9999991171825705, iteration: 57747
loss: 1.019368052482605,grad_norm: 0.9128326347975472, iteration: 57748
loss: 1.0061607360839844,grad_norm: 0.9518626488592254, iteration: 57749
loss: 1.0378936529159546,grad_norm: 0.9165736603725821, iteration: 57750
loss: 1.0220439434051514,grad_norm: 0.9591547160751274, iteration: 57751
loss: 1.025032877922058,grad_norm: 0.9258819434994393, iteration: 57752
loss: 1.0204869508743286,grad_norm: 0.9999992191809337, iteration: 57753
loss: 1.0047301054000854,grad_norm: 0.9999990516778926, iteration: 57754
loss: 0.9964226484298706,grad_norm: 0.9999991431727122, iteration: 57755
loss: 0.9767760038375854,grad_norm: 0.9999992230765855, iteration: 57756
loss: 0.9831109642982483,grad_norm: 0.9999988978420686, iteration: 57757
loss: 1.0280665159225464,grad_norm: 0.99999908403437, iteration: 57758
loss: 1.000173807144165,grad_norm: 0.9951840971190289, iteration: 57759
loss: 1.0118399858474731,grad_norm: 0.999999137703164, iteration: 57760
loss: 0.9868031740188599,grad_norm: 0.9125408332243357, iteration: 57761
loss: 0.9946643114089966,grad_norm: 0.9511432231529442, iteration: 57762
loss: 0.9934585690498352,grad_norm: 0.999999133928456, iteration: 57763
loss: 1.0323994159698486,grad_norm: 0.9054235071062802, iteration: 57764
loss: 1.001438021659851,grad_norm: 0.9999991051989526, iteration: 57765
loss: 0.9903048276901245,grad_norm: 0.8899257015948407, iteration: 57766
loss: 0.9890285134315491,grad_norm: 0.9999997694384909, iteration: 57767
loss: 1.0275886058807373,grad_norm: 0.9999991076330426, iteration: 57768
loss: 1.032867670059204,grad_norm: 0.9999992907709264, iteration: 57769
loss: 0.9926043152809143,grad_norm: 0.8773273867293968, iteration: 57770
loss: 0.991026759147644,grad_norm: 0.9999990694926554, iteration: 57771
loss: 1.0058382749557495,grad_norm: 0.9608230033261759, iteration: 57772
loss: 0.975376307964325,grad_norm: 0.9999992060350517, iteration: 57773
loss: 0.9667413234710693,grad_norm: 0.9999991114818728, iteration: 57774
loss: 0.9869183897972107,grad_norm: 0.9844420688399868, iteration: 57775
loss: 1.0091482400894165,grad_norm: 0.9999991587916732, iteration: 57776
loss: 0.9806208610534668,grad_norm: 0.9999990874389905, iteration: 57777
loss: 1.0122020244598389,grad_norm: 0.9999992189102375, iteration: 57778
loss: 0.9990420937538147,grad_norm: 0.9247077953991478, iteration: 57779
loss: 1.0291672945022583,grad_norm: 0.9999990829643643, iteration: 57780
loss: 0.9922030568122864,grad_norm: 0.9999991167411307, iteration: 57781
loss: 1.0033257007598877,grad_norm: 0.9999991781606113, iteration: 57782
loss: 0.9889026880264282,grad_norm: 0.87562585569714, iteration: 57783
loss: 0.9918813109397888,grad_norm: 0.9734890874892607, iteration: 57784
loss: 0.9729489684104919,grad_norm: 0.9238227280155162, iteration: 57785
loss: 1.0409728288650513,grad_norm: 0.9999996706965623, iteration: 57786
loss: 0.9882587194442749,grad_norm: 0.9999992172983827, iteration: 57787
loss: 1.0410159826278687,grad_norm: 0.9999991610528052, iteration: 57788
loss: 1.0048383474349976,grad_norm: 0.9999991610529603, iteration: 57789
loss: 1.0197832584381104,grad_norm: 0.9999992537365314, iteration: 57790
loss: 1.0374587774276733,grad_norm: 0.9999996614943062, iteration: 57791
loss: 0.9923703074455261,grad_norm: 0.9999991363548156, iteration: 57792
loss: 1.0017993450164795,grad_norm: 0.8064720659459703, iteration: 57793
loss: 0.9829806685447693,grad_norm: 0.999999811753883, iteration: 57794
loss: 1.0129578113555908,grad_norm: 0.9999992702991221, iteration: 57795
loss: 1.024495005607605,grad_norm: 0.9999990895367905, iteration: 57796
loss: 0.9943976402282715,grad_norm: 0.9999991469702701, iteration: 57797
loss: 0.999980092048645,grad_norm: 0.999999208779736, iteration: 57798
loss: 1.025396466255188,grad_norm: 0.8706439430922314, iteration: 57799
loss: 1.0300151109695435,grad_norm: 0.9999990802432057, iteration: 57800
loss: 0.9804672598838806,grad_norm: 0.9705355664181511, iteration: 57801
loss: 1.0205097198486328,grad_norm: 0.999999063239795, iteration: 57802
loss: 1.0253260135650635,grad_norm: 0.9999990030711874, iteration: 57803
loss: 0.9700149297714233,grad_norm: 0.9972356537941094, iteration: 57804
loss: 1.0116459131240845,grad_norm: 0.9999989178176337, iteration: 57805
loss: 0.9507176876068115,grad_norm: 0.9943980551547249, iteration: 57806
loss: 1.0094166994094849,grad_norm: 0.9999990633213155, iteration: 57807
loss: 0.9888743758201599,grad_norm: 0.999999101554703, iteration: 57808
loss: 0.9993094801902771,grad_norm: 0.9999997093841093, iteration: 57809
loss: 1.0120998620986938,grad_norm: 0.9999991878751899, iteration: 57810
loss: 1.0022519826889038,grad_norm: 0.9999992208743126, iteration: 57811
loss: 0.9679040312767029,grad_norm: 0.8692083405348845, iteration: 57812
loss: 1.0098228454589844,grad_norm: 0.9452744351105724, iteration: 57813
loss: 0.9833325743675232,grad_norm: 0.8720423231939882, iteration: 57814
loss: 0.9770597219467163,grad_norm: 0.9642617422017229, iteration: 57815
loss: 0.9875408411026001,grad_norm: 0.9999990267639618, iteration: 57816
loss: 0.9962131381034851,grad_norm: 0.9787385280877348, iteration: 57817
loss: 1.0237700939178467,grad_norm: 0.9999992906406372, iteration: 57818
loss: 0.9906495213508606,grad_norm: 0.9773694468285152, iteration: 57819
loss: 1.0225677490234375,grad_norm: 0.8318488913393395, iteration: 57820
loss: 1.0109517574310303,grad_norm: 0.9999995427516428, iteration: 57821
loss: 1.015091896057129,grad_norm: 0.9999991413878672, iteration: 57822
loss: 0.96863853931427,grad_norm: 0.9999991434426917, iteration: 57823
loss: 1.012385368347168,grad_norm: 0.9999989945655013, iteration: 57824
loss: 1.0131314992904663,grad_norm: 0.9158153776415858, iteration: 57825
loss: 0.9929277896881104,grad_norm: 0.9999991285458435, iteration: 57826
loss: 0.9790471792221069,grad_norm: 0.9999990896914838, iteration: 57827
loss: 1.0127986669540405,grad_norm: 0.9999991839074605, iteration: 57828
loss: 1.0584001541137695,grad_norm: 0.999999095415361, iteration: 57829
loss: 1.012995719909668,grad_norm: 0.8984664851198149, iteration: 57830
loss: 0.990386962890625,grad_norm: 0.999999157820546, iteration: 57831
loss: 0.9778735041618347,grad_norm: 0.999998980972171, iteration: 57832
loss: 0.9647825360298157,grad_norm: 0.9999996094365023, iteration: 57833
loss: 0.974897027015686,grad_norm: 0.9885426737469304, iteration: 57834
loss: 1.0806899070739746,grad_norm: 0.9961602317050194, iteration: 57835
loss: 1.015478253364563,grad_norm: 0.9067971266326886, iteration: 57836
loss: 0.9890177845954895,grad_norm: 0.9817829401119113, iteration: 57837
loss: 1.0373528003692627,grad_norm: 0.9999991580663581, iteration: 57838
loss: 0.9984237551689148,grad_norm: 0.9540413569481898, iteration: 57839
loss: 0.9769970774650574,grad_norm: 0.9999991836034661, iteration: 57840
loss: 0.9950209259986877,grad_norm: 0.9999990778746038, iteration: 57841
loss: 0.9883821606636047,grad_norm: 0.9999990944958389, iteration: 57842
loss: 0.9821178317070007,grad_norm: 0.9999991978197753, iteration: 57843
loss: 0.9649152755737305,grad_norm: 0.9999994519632779, iteration: 57844
loss: 0.988450825214386,grad_norm: 0.9999993915533071, iteration: 57845
loss: 1.0297974348068237,grad_norm: 0.9619825851940875, iteration: 57846
loss: 0.9394551515579224,grad_norm: 0.8804875418955012, iteration: 57847
loss: 0.9914701581001282,grad_norm: 0.9999990845811896, iteration: 57848
loss: 1.0355125665664673,grad_norm: 0.999999347779631, iteration: 57849
loss: 1.0486363172531128,grad_norm: 0.9999993683594226, iteration: 57850
loss: 1.0228526592254639,grad_norm: 0.9999991913573989, iteration: 57851
loss: 0.9609882235527039,grad_norm: 0.9999991073181201, iteration: 57852
loss: 0.9710004329681396,grad_norm: 0.9999992042224704, iteration: 57853
loss: 1.0282129049301147,grad_norm: 0.9999990771173012, iteration: 57854
loss: 0.9860242009162903,grad_norm: 0.9999991578072469, iteration: 57855
loss: 1.0067558288574219,grad_norm: 0.9999992674415789, iteration: 57856
loss: 1.0076725482940674,grad_norm: 0.99999935071846, iteration: 57857
loss: 1.0145119428634644,grad_norm: 0.9056240452444033, iteration: 57858
loss: 0.9931702017784119,grad_norm: 0.9999990754079962, iteration: 57859
loss: 1.051821231842041,grad_norm: 0.9999992089837463, iteration: 57860
loss: 1.0059467554092407,grad_norm: 0.999999125538461, iteration: 57861
loss: 0.9745333790779114,grad_norm: 0.999999058717435, iteration: 57862
loss: 1.0113071203231812,grad_norm: 0.9999991800966189, iteration: 57863
loss: 1.014616847038269,grad_norm: 0.9999991792881174, iteration: 57864
loss: 0.9888429045677185,grad_norm: 0.9999992926881179, iteration: 57865
loss: 1.0553563833236694,grad_norm: 0.9999992592898014, iteration: 57866
loss: 1.1011006832122803,grad_norm: 0.9999995844019905, iteration: 57867
loss: 1.003713846206665,grad_norm: 0.9999995162830799, iteration: 57868
loss: 1.0916107892990112,grad_norm: 0.9999997838252684, iteration: 57869
loss: 1.0125292539596558,grad_norm: 0.9999990493372488, iteration: 57870
loss: 0.991383969783783,grad_norm: 0.9999995499489303, iteration: 57871
loss: 1.0087963342666626,grad_norm: 0.9581938044406247, iteration: 57872
loss: 0.9570282101631165,grad_norm: 0.9846181344983158, iteration: 57873
loss: 1.0060148239135742,grad_norm: 0.9999991551352623, iteration: 57874
loss: 0.9837940335273743,grad_norm: 0.9148889825179735, iteration: 57875
loss: 0.9997962713241577,grad_norm: 0.9999992177528435, iteration: 57876
loss: 0.9773059487342834,grad_norm: 0.9375160794441706, iteration: 57877
loss: 1.0177274942398071,grad_norm: 0.9999991550682128, iteration: 57878
loss: 0.9960170984268188,grad_norm: 0.9650796946696224, iteration: 57879
loss: 0.9934681057929993,grad_norm: 0.9332623939441875, iteration: 57880
loss: 1.0099605321884155,grad_norm: 0.9999988980048397, iteration: 57881
loss: 1.0048649311065674,grad_norm: 0.9999990627207057, iteration: 57882
loss: 1.0417609214782715,grad_norm: 0.9999994250841227, iteration: 57883
loss: 1.0226205587387085,grad_norm: 0.9999991215460836, iteration: 57884
loss: 0.9983744621276855,grad_norm: 0.9722202040902588, iteration: 57885
loss: 1.0327253341674805,grad_norm: 0.999999147243199, iteration: 57886
loss: 1.040049433708191,grad_norm: 0.9999992983116521, iteration: 57887
loss: 0.9760690331459045,grad_norm: 0.9809267870866273, iteration: 57888
loss: 1.0127854347229004,grad_norm: 0.9999990524643249, iteration: 57889
loss: 1.0115433931350708,grad_norm: 0.9969737262183127, iteration: 57890
loss: 0.975238025188446,grad_norm: 0.9999991089779849, iteration: 57891
loss: 1.0180269479751587,grad_norm: 0.9690552241248257, iteration: 57892
loss: 0.9989880919456482,grad_norm: 0.999999097902407, iteration: 57893
loss: 0.9920035004615784,grad_norm: 0.9041521249259593, iteration: 57894
loss: 1.0145176649093628,grad_norm: 0.9999991185145283, iteration: 57895
loss: 0.9811629056930542,grad_norm: 0.9999992482306431, iteration: 57896
loss: 1.0661884546279907,grad_norm: 0.9999998942520116, iteration: 57897
loss: 0.9750897884368896,grad_norm: 0.9999991305086631, iteration: 57898
loss: 1.0903184413909912,grad_norm: 0.9999991933539594, iteration: 57899
loss: 1.0372185707092285,grad_norm: 0.9999991038329963, iteration: 57900
loss: 0.9922335147857666,grad_norm: 0.9999992253622166, iteration: 57901
loss: 1.0043370723724365,grad_norm: 0.7458312637986133, iteration: 57902
loss: 1.0192756652832031,grad_norm: 0.9606895706487207, iteration: 57903
loss: 1.0057321786880493,grad_norm: 0.9999990098610776, iteration: 57904
loss: 0.982082188129425,grad_norm: 0.9436973530528183, iteration: 57905
loss: 1.0097590684890747,grad_norm: 0.9999991045180446, iteration: 57906
loss: 1.0418469905853271,grad_norm: 0.8740723087782608, iteration: 57907
loss: 1.0115985870361328,grad_norm: 0.9999990283065157, iteration: 57908
loss: 1.0349947214126587,grad_norm: 0.9999991550534252, iteration: 57909
loss: 1.0131293535232544,grad_norm: 0.977870179422932, iteration: 57910
loss: 0.9943415522575378,grad_norm: 0.9883432123541809, iteration: 57911
loss: 1.0351794958114624,grad_norm: 0.9778447465914452, iteration: 57912
loss: 1.0098655223846436,grad_norm: 0.8529331811512216, iteration: 57913
loss: 0.9723725318908691,grad_norm: 0.9500074785131228, iteration: 57914
loss: 0.9938721060752869,grad_norm: 0.999999150081462, iteration: 57915
loss: 1.0137608051300049,grad_norm: 0.944393274010341, iteration: 57916
loss: 0.9784052968025208,grad_norm: 0.9999990052616751, iteration: 57917
loss: 0.9881864786148071,grad_norm: 0.9999991513302983, iteration: 57918
loss: 1.0402175188064575,grad_norm: 0.9999991242850197, iteration: 57919
loss: 0.9908432364463806,grad_norm: 0.999999144478076, iteration: 57920
loss: 0.9938494563102722,grad_norm: 0.982445064289493, iteration: 57921
loss: 1.0172157287597656,grad_norm: 0.9999991053869942, iteration: 57922
loss: 1.0245909690856934,grad_norm: 0.9287319494436778, iteration: 57923
loss: 0.9675607681274414,grad_norm: 0.9999993264688679, iteration: 57924
loss: 0.9903126358985901,grad_norm: 0.9999990043600164, iteration: 57925
loss: 1.0284556150436401,grad_norm: 0.9999990339786864, iteration: 57926
loss: 1.0102367401123047,grad_norm: 0.9824698612198948, iteration: 57927
loss: 0.9968348741531372,grad_norm: 0.9028257243808407, iteration: 57928
loss: 1.0000202655792236,grad_norm: 0.9999990018940613, iteration: 57929
loss: 0.9836990237236023,grad_norm: 0.965016369896272, iteration: 57930
loss: 1.018615484237671,grad_norm: 0.999999194946876, iteration: 57931
loss: 1.0030790567398071,grad_norm: 0.8670363634399384, iteration: 57932
loss: 0.9843921065330505,grad_norm: 0.9922354629505916, iteration: 57933
loss: 1.0070472955703735,grad_norm: 0.9999991349388379, iteration: 57934
loss: 0.9700216054916382,grad_norm: 0.9999991442995917, iteration: 57935
loss: 0.9814448952674866,grad_norm: 0.7356646988819571, iteration: 57936
loss: 1.0322849750518799,grad_norm: 0.999999112966207, iteration: 57937
loss: 1.049463152885437,grad_norm: 0.9999997694984136, iteration: 57938
loss: 1.0250144004821777,grad_norm: 0.9999990538015261, iteration: 57939
loss: 0.9789633750915527,grad_norm: 0.9999991125113772, iteration: 57940
loss: 0.9660924673080444,grad_norm: 0.9719989606447588, iteration: 57941
loss: 1.0056567192077637,grad_norm: 0.8910118173893071, iteration: 57942
loss: 1.0310579538345337,grad_norm: 0.9999991872101812, iteration: 57943
loss: 0.9705358743667603,grad_norm: 0.9999989513748063, iteration: 57944
loss: 1.015273094177246,grad_norm: 0.9999989524115048, iteration: 57945
loss: 1.0065343379974365,grad_norm: 0.9999990477562449, iteration: 57946
loss: 0.9823575615882874,grad_norm: 0.9999989989926593, iteration: 57947
loss: 0.9882687926292419,grad_norm: 0.9999992279295353, iteration: 57948
loss: 0.9662032723426819,grad_norm: 0.9193622549153428, iteration: 57949
loss: 0.9885468482971191,grad_norm: 0.999999000848035, iteration: 57950
loss: 0.9864016771316528,grad_norm: 0.9999992329907696, iteration: 57951
loss: 1.0345585346221924,grad_norm: 0.8289571464467119, iteration: 57952
loss: 1.024428367614746,grad_norm: 0.9890388024348139, iteration: 57953
loss: 1.0226303339004517,grad_norm: 0.9999996441993764, iteration: 57954
loss: 0.9947227239608765,grad_norm: 0.9716971907428107, iteration: 57955
loss: 1.0978213548660278,grad_norm: 0.999999267706258, iteration: 57956
loss: 1.0362402200698853,grad_norm: 0.9999991341454724, iteration: 57957
loss: 1.0277068614959717,grad_norm: 0.9999998431789416, iteration: 57958
loss: 1.0025838613510132,grad_norm: 0.8674130212661505, iteration: 57959
loss: 1.0276433229446411,grad_norm: 0.9179531342423946, iteration: 57960
loss: 0.9910522699356079,grad_norm: 0.9999992880253099, iteration: 57961
loss: 1.0031920671463013,grad_norm: 0.9468777543994972, iteration: 57962
loss: 1.0006605386734009,grad_norm: 0.9999991439095854, iteration: 57963
loss: 1.0179957151412964,grad_norm: 0.9827890504036996, iteration: 57964
loss: 1.0235133171081543,grad_norm: 0.9999990538996274, iteration: 57965
loss: 1.0081838369369507,grad_norm: 0.869038194298751, iteration: 57966
loss: 0.9954958558082581,grad_norm: 0.9999989683570858, iteration: 57967
loss: 0.9804639220237732,grad_norm: 0.9999991579217803, iteration: 57968
loss: 0.9865229725837708,grad_norm: 0.9079558831032825, iteration: 57969
loss: 1.0313202142715454,grad_norm: 0.9999990534967783, iteration: 57970
loss: 1.023857593536377,grad_norm: 0.9999991103327387, iteration: 57971
loss: 0.9994702339172363,grad_norm: 0.9999991989772754, iteration: 57972
loss: 0.9955224394798279,grad_norm: 0.8237717118701248, iteration: 57973
loss: 1.0021864175796509,grad_norm: 0.8093793001807548, iteration: 57974
loss: 0.9383130073547363,grad_norm: 0.9999990926092319, iteration: 57975
loss: 1.0173826217651367,grad_norm: 0.9999992126914431, iteration: 57976
loss: 0.9933939576148987,grad_norm: 0.9808657403603682, iteration: 57977
loss: 1.002101182937622,grad_norm: 0.9999990627784964, iteration: 57978
loss: 1.0086466073989868,grad_norm: 0.999999241362604, iteration: 57979
loss: 0.9757224321365356,grad_norm: 0.999999097823309, iteration: 57980
loss: 0.9834172129631042,grad_norm: 0.9934903900436287, iteration: 57981
loss: 1.0618804693222046,grad_norm: 0.9999990741024768, iteration: 57982
loss: 0.9958438277244568,grad_norm: 0.9593567565287366, iteration: 57983
loss: 1.0084277391433716,grad_norm: 0.9999992726803032, iteration: 57984
loss: 1.0075922012329102,grad_norm: 0.9999991121820185, iteration: 57985
loss: 0.9882950186729431,grad_norm: 0.9999992124192904, iteration: 57986
loss: 1.049696683883667,grad_norm: 0.9999996331142305, iteration: 57987
loss: 0.9813462495803833,grad_norm: 0.9999991167401053, iteration: 57988
loss: 1.0100765228271484,grad_norm: 0.9999991072493616, iteration: 57989
loss: 0.9868112802505493,grad_norm: 0.9632241523461158, iteration: 57990
loss: 1.004797101020813,grad_norm: 0.999999267473157, iteration: 57991
loss: 1.0354701280593872,grad_norm: 0.9757530605119499, iteration: 57992
loss: 0.973739504814148,grad_norm: 0.9999990104347288, iteration: 57993
loss: 1.0081523656845093,grad_norm: 0.8546817572964184, iteration: 57994
loss: 0.9994279742240906,grad_norm: 0.9999991842273747, iteration: 57995
loss: 1.0406427383422852,grad_norm: 0.9999992858253344, iteration: 57996
loss: 0.9893088936805725,grad_norm: 0.9999991303366409, iteration: 57997
loss: 1.0214505195617676,grad_norm: 0.9999996089166383, iteration: 57998
loss: 1.0041887760162354,grad_norm: 0.8320357048815844, iteration: 57999
loss: 0.985612690448761,grad_norm: 0.999999063288411, iteration: 58000
loss: 1.0050896406173706,grad_norm: 0.987189094402859, iteration: 58001
loss: 0.9880834817886353,grad_norm: 0.9999992265889525, iteration: 58002
loss: 0.9884260892868042,grad_norm: 0.9999991419011363, iteration: 58003
loss: 1.0275609493255615,grad_norm: 0.9999992698299978, iteration: 58004
loss: 1.0006730556488037,grad_norm: 0.9990339294437403, iteration: 58005
loss: 0.9897032380104065,grad_norm: 0.9999993483774909, iteration: 58006
loss: 1.0687495470046997,grad_norm: 0.9999996792761113, iteration: 58007
loss: 1.0552061796188354,grad_norm: 0.9065258840742945, iteration: 58008
loss: 1.00301992893219,grad_norm: 0.9108078177392884, iteration: 58009
loss: 1.0291526317596436,grad_norm: 0.9999994019147588, iteration: 58010
loss: 1.0155034065246582,grad_norm: 0.9999991219988001, iteration: 58011
loss: 1.0035909414291382,grad_norm: 0.9999992349744873, iteration: 58012
loss: 0.9853420257568359,grad_norm: 0.9999991379069518, iteration: 58013
loss: 1.0040491819381714,grad_norm: 0.9999990074202633, iteration: 58014
loss: 1.042299509048462,grad_norm: 0.9999992839887164, iteration: 58015
loss: 1.0032540559768677,grad_norm: 0.9999995778783042, iteration: 58016
loss: 1.02651047706604,grad_norm: 0.9124934265267689, iteration: 58017
loss: 0.9902254939079285,grad_norm: 0.9880982168178517, iteration: 58018
loss: 0.9834646582603455,grad_norm: 0.9652809625095223, iteration: 58019
loss: 1.0205703973770142,grad_norm: 0.999999171723799, iteration: 58020
loss: 1.0373995304107666,grad_norm: 1.0000000205462636, iteration: 58021
loss: 0.9935632348060608,grad_norm: 0.9811786261546362, iteration: 58022
loss: 1.0451041460037231,grad_norm: 0.9679555394296387, iteration: 58023
loss: 1.0265309810638428,grad_norm: 0.8952495019440965, iteration: 58024
loss: 0.961746335029602,grad_norm: 0.9999991820084446, iteration: 58025
loss: 0.9601505994796753,grad_norm: 0.9999991242201686, iteration: 58026
loss: 0.9979300498962402,grad_norm: 0.9999990943441979, iteration: 58027
loss: 0.9700661301612854,grad_norm: 0.9999991181140225, iteration: 58028
loss: 1.0045418739318848,grad_norm: 0.9999993291938559, iteration: 58029
loss: 1.0035218000411987,grad_norm: 0.954817503788349, iteration: 58030
loss: 0.9835430383682251,grad_norm: 0.9999992339507291, iteration: 58031
loss: 0.9977760910987854,grad_norm: 0.9999992245943474, iteration: 58032
loss: 0.9982253313064575,grad_norm: 0.9499138439049453, iteration: 58033
loss: 0.9966893792152405,grad_norm: 0.9999991902127093, iteration: 58034
loss: 0.975727379322052,grad_norm: 0.9999990672013538, iteration: 58035
loss: 0.9934083819389343,grad_norm: 0.9896170735969351, iteration: 58036
loss: 0.9717407822608948,grad_norm: 0.9999990862167024, iteration: 58037
loss: 1.0944187641143799,grad_norm: 0.999999981381428, iteration: 58038
loss: 1.0974509716033936,grad_norm: 0.9999999245268707, iteration: 58039
loss: 0.9818102121353149,grad_norm: 0.8854761358019468, iteration: 58040
loss: 1.0213723182678223,grad_norm: 0.9999993345114456, iteration: 58041
loss: 1.0258244276046753,grad_norm: 0.9990366274700649, iteration: 58042
loss: 1.0060234069824219,grad_norm: 0.9999990501234577, iteration: 58043
loss: 1.0031940937042236,grad_norm: 0.999999135683034, iteration: 58044
loss: 1.0312225818634033,grad_norm: 0.999999215820515, iteration: 58045
loss: 0.9932585954666138,grad_norm: 0.9999991893347036, iteration: 58046
loss: 1.0308339595794678,grad_norm: 0.9999992922692272, iteration: 58047
loss: 1.0489610433578491,grad_norm: 0.9999991548588404, iteration: 58048
loss: 0.9918920397758484,grad_norm: 0.9999990787724059, iteration: 58049
loss: 1.0094366073608398,grad_norm: 0.9539280074223196, iteration: 58050
loss: 0.9720919132232666,grad_norm: 0.8918321724586029, iteration: 58051
loss: 0.9943060874938965,grad_norm: 0.9999992263476165, iteration: 58052
loss: 0.9678300619125366,grad_norm: 0.8044169159437974, iteration: 58053
loss: 1.017846941947937,grad_norm: 0.9999989915312844, iteration: 58054
loss: 0.9816879630088806,grad_norm: 0.8221456180298481, iteration: 58055
loss: 0.9855612516403198,grad_norm: 0.9999990423845839, iteration: 58056
loss: 1.0900335311889648,grad_norm: 0.9999999402764695, iteration: 58057
loss: 1.0379648208618164,grad_norm: 0.9967071984869754, iteration: 58058
loss: 1.0079299211502075,grad_norm: 0.9999991496900633, iteration: 58059
loss: 1.009775996208191,grad_norm: 0.999999049478694, iteration: 58060
loss: 0.9908214807510376,grad_norm: 0.9381511606673991, iteration: 58061
loss: 0.999337375164032,grad_norm: 0.8933529751919568, iteration: 58062
loss: 1.0053635835647583,grad_norm: 0.9999990900657858, iteration: 58063
loss: 1.014467716217041,grad_norm: 0.9999991351789579, iteration: 58064
loss: 0.9827293753623962,grad_norm: 0.9999992205949172, iteration: 58065
loss: 0.952694833278656,grad_norm: 0.9795076512737629, iteration: 58066
loss: 0.9797216057777405,grad_norm: 0.8954051080451323, iteration: 58067
loss: 1.0104612112045288,grad_norm: 0.9999992523643969, iteration: 58068
loss: 1.0082547664642334,grad_norm: 0.9999990807324785, iteration: 58069
loss: 0.9556031227111816,grad_norm: 0.8973692124484928, iteration: 58070
loss: 0.9890689849853516,grad_norm: 0.999999606511608, iteration: 58071
loss: 0.9984176754951477,grad_norm: 0.815153554676061, iteration: 58072
loss: 1.0166008472442627,grad_norm: 0.9999991171629039, iteration: 58073
loss: 1.0045469999313354,grad_norm: 0.9999991320316465, iteration: 58074
loss: 1.0087562799453735,grad_norm: 0.9999991720648216, iteration: 58075
loss: 1.016452670097351,grad_norm: 0.9999993796969162, iteration: 58076
loss: 1.0256415605545044,grad_norm: 0.9999997957509142, iteration: 58077
loss: 1.0137546062469482,grad_norm: 0.9999993304946717, iteration: 58078
loss: 0.966235339641571,grad_norm: 0.9999991052617089, iteration: 58079
loss: 0.9761990904808044,grad_norm: 0.9978806115747029, iteration: 58080
loss: 1.0073280334472656,grad_norm: 0.8848720704867474, iteration: 58081
loss: 1.002944827079773,grad_norm: 0.9999989579765536, iteration: 58082
loss: 0.9984907507896423,grad_norm: 0.9446652405837827, iteration: 58083
loss: 1.0120890140533447,grad_norm: 0.9999993030337712, iteration: 58084
loss: 1.0251808166503906,grad_norm: 0.9999997488361446, iteration: 58085
loss: 1.0471365451812744,grad_norm: 0.9999990954031722, iteration: 58086
loss: 0.9659528732299805,grad_norm: 0.9999991535912107, iteration: 58087
loss: 0.9809169173240662,grad_norm: 0.9999990321868857, iteration: 58088
loss: 1.014001727104187,grad_norm: 0.9999990913257695, iteration: 58089
loss: 0.9705271124839783,grad_norm: 0.9999992034257089, iteration: 58090
loss: 0.9909140467643738,grad_norm: 0.9999990765355745, iteration: 58091
loss: 1.0313900709152222,grad_norm: 0.999999170321117, iteration: 58092
loss: 0.971928060054779,grad_norm: 0.9999991845639226, iteration: 58093
loss: 0.9825432896614075,grad_norm: 0.940123639783234, iteration: 58094
loss: 1.0006999969482422,grad_norm: 0.9999990315015922, iteration: 58095
loss: 1.0415563583374023,grad_norm: 0.999999140179657, iteration: 58096
loss: 1.0720083713531494,grad_norm: 0.9999998115601654, iteration: 58097
loss: 0.9851622581481934,grad_norm: 0.8523291050851747, iteration: 58098
loss: 1.0086792707443237,grad_norm: 0.9999993159113608, iteration: 58099
loss: 0.9913670420646667,grad_norm: 0.9999991917256693, iteration: 58100
loss: 1.0095605850219727,grad_norm: 0.9999991787857142, iteration: 58101
loss: 1.040893793106079,grad_norm: 0.9999994332445366, iteration: 58102
loss: 0.9621519446372986,grad_norm: 0.9999990676047602, iteration: 58103
loss: 0.9738976359367371,grad_norm: 0.9999991222659362, iteration: 58104
loss: 0.9790682196617126,grad_norm: 0.9999990687566653, iteration: 58105
loss: 0.9978935122489929,grad_norm: 0.9290473250529439, iteration: 58106
loss: 0.9516928791999817,grad_norm: 0.8166171558383297, iteration: 58107
loss: 1.0102059841156006,grad_norm: 0.9999993186050482, iteration: 58108
loss: 0.9866175651550293,grad_norm: 0.9999991488843777, iteration: 58109
loss: 1.0003255605697632,grad_norm: 0.9205521491945261, iteration: 58110
loss: 1.0192681550979614,grad_norm: 0.9999998247443994, iteration: 58111
loss: 0.973169207572937,grad_norm: 0.883250917167479, iteration: 58112
loss: 0.9566370844841003,grad_norm: 0.9999991069533828, iteration: 58113
loss: 0.9785154461860657,grad_norm: 0.9999988998357002, iteration: 58114
loss: 1.0224202871322632,grad_norm: 0.9999991808641231, iteration: 58115
loss: 1.0907138586044312,grad_norm: 0.9999991498737737, iteration: 58116
loss: 0.956425666809082,grad_norm: 0.9999990403234917, iteration: 58117
loss: 0.9620454907417297,grad_norm: 0.9999992441858286, iteration: 58118
loss: 1.1169105768203735,grad_norm: 0.9999992195939129, iteration: 58119
loss: 1.0593888759613037,grad_norm: 0.9999993205799909, iteration: 58120
loss: 0.9821562767028809,grad_norm: 0.9231973681905946, iteration: 58121
loss: 1.01103937625885,grad_norm: 0.9999993460632053, iteration: 58122
loss: 0.9823198914527893,grad_norm: 0.9999990356574809, iteration: 58123
loss: 1.012317419052124,grad_norm: 0.9512525800645352, iteration: 58124
loss: 0.9644573926925659,grad_norm: 0.9999989543847241, iteration: 58125
loss: 1.0126020908355713,grad_norm: 0.99999929036658, iteration: 58126
loss: 0.9353060722351074,grad_norm: 0.9315477037575316, iteration: 58127
loss: 0.9569079875946045,grad_norm: 0.9465618339095986, iteration: 58128
loss: 1.0055428743362427,grad_norm: 0.952649777320422, iteration: 58129
loss: 1.0138798952102661,grad_norm: 0.9999992916809028, iteration: 58130
loss: 0.9729658961296082,grad_norm: 0.9999992693452253, iteration: 58131
loss: 0.9817700386047363,grad_norm: 0.9999991940380702, iteration: 58132
loss: 0.9968077540397644,grad_norm: 0.9999992301189029, iteration: 58133
loss: 0.9609209299087524,grad_norm: 0.9271450539905791, iteration: 58134
loss: 0.9850437045097351,grad_norm: 0.9999991908081974, iteration: 58135
loss: 0.9789068698883057,grad_norm: 0.9154613205598978, iteration: 58136
loss: 0.9936726093292236,grad_norm: 0.9999993117343163, iteration: 58137
loss: 1.0266962051391602,grad_norm: 0.9999990325497424, iteration: 58138
loss: 0.9915311932563782,grad_norm: 0.9999993973701093, iteration: 58139
loss: 1.030346393585205,grad_norm: 0.8588013916973884, iteration: 58140
loss: 1.0160157680511475,grad_norm: 0.9955956134800861, iteration: 58141
loss: 1.0024657249450684,grad_norm: 0.9626696127994602, iteration: 58142
loss: 1.0325775146484375,grad_norm: 0.9293282719964068, iteration: 58143
loss: 1.009294867515564,grad_norm: 0.9201472016949671, iteration: 58144
loss: 0.9805536866188049,grad_norm: 0.9999991315578836, iteration: 58145
loss: 1.0051637887954712,grad_norm: 0.9384634644983078, iteration: 58146
loss: 1.0260815620422363,grad_norm: 0.9836642534483535, iteration: 58147
loss: 0.9553789496421814,grad_norm: 0.9999991545130759, iteration: 58148
loss: 1.0618311166763306,grad_norm: 0.8389009477302062, iteration: 58149
loss: 1.015007495880127,grad_norm: 0.9211434805855874, iteration: 58150
loss: 1.0115859508514404,grad_norm: 0.9999991143738225, iteration: 58151
loss: 1.0054072141647339,grad_norm: 0.9999994101000825, iteration: 58152
loss: 1.0133166313171387,grad_norm: 0.9118735228426205, iteration: 58153
loss: 1.0297267436981201,grad_norm: 0.9999990598398928, iteration: 58154
loss: 0.9662321209907532,grad_norm: 0.9999990905286198, iteration: 58155
loss: 0.9745275974273682,grad_norm: 0.9999991396795782, iteration: 58156
loss: 0.9918510317802429,grad_norm: 0.9999990748776929, iteration: 58157
loss: 1.0287693738937378,grad_norm: 0.9073130598507778, iteration: 58158
loss: 1.0094914436340332,grad_norm: 0.9999992745595344, iteration: 58159
loss: 1.0058876276016235,grad_norm: 0.8852987968812459, iteration: 58160
loss: 0.9549756646156311,grad_norm: 0.9672448219328404, iteration: 58161
loss: 1.058121681213379,grad_norm: 0.9999992527954558, iteration: 58162
loss: 1.0080294609069824,grad_norm: 0.8084465509977291, iteration: 58163
loss: 0.9841596484184265,grad_norm: 0.8141374828073031, iteration: 58164
loss: 1.0121554136276245,grad_norm: 0.9630471949031065, iteration: 58165
loss: 1.0055159330368042,grad_norm: 0.9186706295341356, iteration: 58166
loss: 1.0204555988311768,grad_norm: 0.9999991879351925, iteration: 58167
loss: 0.9994592666625977,grad_norm: 0.9999994492013103, iteration: 58168
loss: 1.0006335973739624,grad_norm: 0.9999991672541358, iteration: 58169
loss: 0.9992484450340271,grad_norm: 0.999999094106579, iteration: 58170
loss: 0.9905813932418823,grad_norm: 0.9999991431131804, iteration: 58171
loss: 0.9933647513389587,grad_norm: 0.9983317745127819, iteration: 58172
loss: 0.9774659872055054,grad_norm: 0.9999990509574984, iteration: 58173
loss: 0.9694281220436096,grad_norm: 0.9839421256894466, iteration: 58174
loss: 0.9854525923728943,grad_norm: 0.9999990722890891, iteration: 58175
loss: 0.9665908217430115,grad_norm: 0.9999991092509191, iteration: 58176
loss: 0.9526477456092834,grad_norm: 0.908575418270588, iteration: 58177
loss: 0.9763275980949402,grad_norm: 0.9999990409775018, iteration: 58178
loss: 1.0183725357055664,grad_norm: 0.9999991393504953, iteration: 58179
loss: 0.9985607862472534,grad_norm: 0.8566781748664805, iteration: 58180
loss: 1.0173414945602417,grad_norm: 0.9999990557484972, iteration: 58181
loss: 0.996647834777832,grad_norm: 0.9999991680534169, iteration: 58182
loss: 1.0019934177398682,grad_norm: 0.9968969475871208, iteration: 58183
loss: 1.019332766532898,grad_norm: 0.9999992177821984, iteration: 58184
loss: 0.9634504318237305,grad_norm: 0.9206003809616314, iteration: 58185
loss: 1.0069584846496582,grad_norm: 0.9999992900795209, iteration: 58186
loss: 1.0159066915512085,grad_norm: 0.9999992177170339, iteration: 58187
loss: 1.03433096408844,grad_norm: 0.9999991303458318, iteration: 58188
loss: 0.9830459952354431,grad_norm: 0.9813119316073801, iteration: 58189
loss: 0.9946298599243164,grad_norm: 0.9999991888350996, iteration: 58190
loss: 1.0206409692764282,grad_norm: 0.9999991063306847, iteration: 58191
loss: 1.0057159662246704,grad_norm: 0.9156752460563377, iteration: 58192
loss: 0.9954825639724731,grad_norm: 0.9999991414276372, iteration: 58193
loss: 0.9510272145271301,grad_norm: 0.9361396728233268, iteration: 58194
loss: 1.0039422512054443,grad_norm: 0.9104094976404875, iteration: 58195
loss: 0.9853838682174683,grad_norm: 0.9999990673762034, iteration: 58196
loss: 0.9720883369445801,grad_norm: 0.9919157997434294, iteration: 58197
loss: 0.9535166621208191,grad_norm: 0.9999989615568387, iteration: 58198
loss: 0.9374377727508545,grad_norm: 0.9999989459157803, iteration: 58199
loss: 1.0412362813949585,grad_norm: 0.8686942889604705, iteration: 58200
loss: 0.9694006443023682,grad_norm: 0.9999992093046391, iteration: 58201
loss: 0.9857520461082458,grad_norm: 0.9476551030854127, iteration: 58202
loss: 0.9733697175979614,grad_norm: 0.9999990129943147, iteration: 58203
loss: 0.9904915690422058,grad_norm: 0.9999991881283308, iteration: 58204
loss: 0.9636017084121704,grad_norm: 0.8747924812437301, iteration: 58205
loss: 1.0355454683303833,grad_norm: 0.9999992867335405, iteration: 58206
loss: 1.0256073474884033,grad_norm: 0.8595832673682986, iteration: 58207
loss: 0.9873008131980896,grad_norm: 0.9999990559062807, iteration: 58208
loss: 1.0273910760879517,grad_norm: 0.9999991709466035, iteration: 58209
loss: 1.04054594039917,grad_norm: 0.9174430007557566, iteration: 58210
loss: 0.9896891117095947,grad_norm: 0.9999990863306621, iteration: 58211
loss: 1.0117249488830566,grad_norm: 0.9137870453488247, iteration: 58212
loss: 1.0125107765197754,grad_norm: 0.9999991718200828, iteration: 58213
loss: 0.9835610389709473,grad_norm: 0.9999991685821147, iteration: 58214
loss: 1.0051732063293457,grad_norm: 0.9999989802231843, iteration: 58215
loss: 0.9955620765686035,grad_norm: 0.9048563946652545, iteration: 58216
loss: 0.9804906845092773,grad_norm: 0.9449730870256273, iteration: 58217
loss: 1.0171126127243042,grad_norm: 0.9619156410872505, iteration: 58218
loss: 1.0402594804763794,grad_norm: 0.9999991563239882, iteration: 58219
loss: 1.0376923084259033,grad_norm: 0.9524469512118483, iteration: 58220
loss: 0.9903754591941833,grad_norm: 0.9999990711002203, iteration: 58221
loss: 1.0069327354431152,grad_norm: 0.9999999323135429, iteration: 58222
loss: 0.9979494214057922,grad_norm: 0.9999991070851402, iteration: 58223
loss: 1.1044185161590576,grad_norm: 0.9999994774954705, iteration: 58224
loss: 0.9620813131332397,grad_norm: 0.999999180758563, iteration: 58225
loss: 0.9975457191467285,grad_norm: 0.8522651589764408, iteration: 58226
loss: 1.0056071281433105,grad_norm: 0.9999990043207737, iteration: 58227
loss: 0.9498327970504761,grad_norm: 0.9540983042676817, iteration: 58228
loss: 1.0092852115631104,grad_norm: 0.9999990860416453, iteration: 58229
loss: 1.0082420110702515,grad_norm: 0.9999989427073097, iteration: 58230
loss: 1.0041818618774414,grad_norm: 0.9999991315572874, iteration: 58231
loss: 1.032168984413147,grad_norm: 0.999999303423378, iteration: 58232
loss: 0.9803147315979004,grad_norm: 0.9892501185166057, iteration: 58233
loss: 0.9788897037506104,grad_norm: 0.9599398441006455, iteration: 58234
loss: 1.0107816457748413,grad_norm: 0.9999991163341986, iteration: 58235
loss: 0.9554755091667175,grad_norm: 0.9553504054332547, iteration: 58236
loss: 1.035588026046753,grad_norm: 0.9999992087072593, iteration: 58237
loss: 1.0023009777069092,grad_norm: 0.9999992641849901, iteration: 58238
loss: 1.0320132970809937,grad_norm: 0.9999990261137772, iteration: 58239
loss: 0.969241201877594,grad_norm: 0.9999991864281265, iteration: 58240
loss: 0.9868139028549194,grad_norm: 0.9547486342567769, iteration: 58241
loss: 1.0052814483642578,grad_norm: 0.9833043127139034, iteration: 58242
loss: 1.013351321220398,grad_norm: 0.9778750477645697, iteration: 58243
loss: 1.0297576189041138,grad_norm: 0.9487968272845059, iteration: 58244
loss: 1.1178905963897705,grad_norm: 0.9999992050814802, iteration: 58245
loss: 1.0032715797424316,grad_norm: 0.9999990920917068, iteration: 58246
loss: 0.9865210056304932,grad_norm: 0.9367311754821922, iteration: 58247
loss: 1.0141948461532593,grad_norm: 0.9999990902339244, iteration: 58248
loss: 0.9888300895690918,grad_norm: 0.8452318346975675, iteration: 58249
loss: 0.9943937063217163,grad_norm: 0.9999990858839514, iteration: 58250
loss: 1.0140284299850464,grad_norm: 0.9999990580775736, iteration: 58251
loss: 1.0129731893539429,grad_norm: 0.785534938770697, iteration: 58252
loss: 1.0192497968673706,grad_norm: 0.9852096028555064, iteration: 58253
loss: 1.0050890445709229,grad_norm: 0.9999991289727389, iteration: 58254
loss: 0.9846668243408203,grad_norm: 0.9999990766218889, iteration: 58255
loss: 0.9601321816444397,grad_norm: 0.9574201608110127, iteration: 58256
loss: 1.0835036039352417,grad_norm: 0.9999998604270756, iteration: 58257
loss: 0.9795911908149719,grad_norm: 0.9999991133637397, iteration: 58258
loss: 0.9749290943145752,grad_norm: 0.9466046253757303, iteration: 58259
loss: 0.9926655888557434,grad_norm: 0.8891992500085127, iteration: 58260
loss: 0.9711300134658813,grad_norm: 0.8294143806182689, iteration: 58261
loss: 0.9955319166183472,grad_norm: 0.9914642157185739, iteration: 58262
loss: 1.0256291627883911,grad_norm: 0.9526817550859674, iteration: 58263
loss: 0.9949585795402527,grad_norm: 0.9999991172758735, iteration: 58264
loss: 1.0094847679138184,grad_norm: 0.9999993426339231, iteration: 58265
loss: 0.9786622524261475,grad_norm: 0.992139332547776, iteration: 58266
loss: 1.0190978050231934,grad_norm: 0.9999991386317993, iteration: 58267
loss: 1.0352914333343506,grad_norm: 0.999999124584818, iteration: 58268
loss: 0.9932066202163696,grad_norm: 0.9104030732662419, iteration: 58269
loss: 1.0148026943206787,grad_norm: 0.9999991347824148, iteration: 58270
loss: 0.9972362518310547,grad_norm: 0.9421644530269668, iteration: 58271
loss: 0.9942853450775146,grad_norm: 0.9154976325269895, iteration: 58272
loss: 1.019686222076416,grad_norm: 0.9999991771928645, iteration: 58273
loss: 0.9603791832923889,grad_norm: 0.8007599900795264, iteration: 58274
loss: 1.025455355644226,grad_norm: 0.9948032357670283, iteration: 58275
loss: 0.9689932465553284,grad_norm: 0.8753056409121582, iteration: 58276
loss: 1.0140032768249512,grad_norm: 0.9890424475789534, iteration: 58277
loss: 1.0319424867630005,grad_norm: 0.9999992166067968, iteration: 58278
loss: 1.0137546062469482,grad_norm: 0.9999990001537115, iteration: 58279
loss: 0.9986677765846252,grad_norm: 0.9773694339532611, iteration: 58280
loss: 0.9723289012908936,grad_norm: 0.9999990447520893, iteration: 58281
loss: 0.9690383076667786,grad_norm: 0.9999991803585835, iteration: 58282
loss: 0.993175208568573,grad_norm: 0.9999992289240518, iteration: 58283
loss: 1.015941858291626,grad_norm: 0.9999991438927373, iteration: 58284
loss: 1.0277364253997803,grad_norm: 0.9999989699667073, iteration: 58285
loss: 0.9816020131111145,grad_norm: 0.9541349881691656, iteration: 58286
loss: 1.0233036279678345,grad_norm: 0.9999990786544986, iteration: 58287
loss: 0.9889293313026428,grad_norm: 0.9580079223130782, iteration: 58288
loss: 1.0087047815322876,grad_norm: 0.8403492232969221, iteration: 58289
loss: 1.0094234943389893,grad_norm: 0.9999991197780783, iteration: 58290
loss: 1.0184203386306763,grad_norm: 0.9999991418868697, iteration: 58291
loss: 0.9986922740936279,grad_norm: 0.9860375251783048, iteration: 58292
loss: 0.9885589480400085,grad_norm: 0.9462618621137523, iteration: 58293
loss: 0.9891887307167053,grad_norm: 0.9999991566205156, iteration: 58294
loss: 0.9996487498283386,grad_norm: 0.9598534727975244, iteration: 58295
loss: 1.014393925666809,grad_norm: 0.9999990531118523, iteration: 58296
loss: 1.0119960308074951,grad_norm: 0.8002535747067896, iteration: 58297
loss: 0.998490035533905,grad_norm: 0.89858573103074, iteration: 58298
loss: 0.999673068523407,grad_norm: 0.9999991971741508, iteration: 58299
loss: 1.0077614784240723,grad_norm: 0.9849077083975883, iteration: 58300
loss: 0.964388370513916,grad_norm: 0.9999990639476865, iteration: 58301
loss: 0.9740188717842102,grad_norm: 0.9999990585236189, iteration: 58302
loss: 1.0062167644500732,grad_norm: 0.9979551852464676, iteration: 58303
loss: 1.0105949640274048,grad_norm: 0.9999990515742831, iteration: 58304
loss: 0.9889475107192993,grad_norm: 0.7823304446551872, iteration: 58305
loss: 0.9739512801170349,grad_norm: 0.8822457364218789, iteration: 58306
loss: 1.0954838991165161,grad_norm: 0.9999999150371056, iteration: 58307
loss: 1.012833833694458,grad_norm: 0.9999991777612547, iteration: 58308
loss: 1.0261986255645752,grad_norm: 0.9206495858211565, iteration: 58309
loss: 1.0111504793167114,grad_norm: 0.9999990778658092, iteration: 58310
loss: 1.0395218133926392,grad_norm: 0.9999991971154655, iteration: 58311
loss: 1.0184012651443481,grad_norm: 0.9999990883543957, iteration: 58312
loss: 1.0437595844268799,grad_norm: 0.907915560031355, iteration: 58313
loss: 0.9949068427085876,grad_norm: 0.9088957226056943, iteration: 58314
loss: 0.986230194568634,grad_norm: 0.9999991597961047, iteration: 58315
loss: 0.9973757266998291,grad_norm: 0.8997130953100272, iteration: 58316
loss: 0.9947269558906555,grad_norm: 0.9657384027894595, iteration: 58317
loss: 1.0043959617614746,grad_norm: 0.9999991058129107, iteration: 58318
loss: 0.9883274435997009,grad_norm: 0.9421127001849148, iteration: 58319
loss: 1.0138218402862549,grad_norm: 0.9999991464297548, iteration: 58320
loss: 1.0054196119308472,grad_norm: 0.8623334211052492, iteration: 58321
loss: 0.970392107963562,grad_norm: 0.9999989208327129, iteration: 58322
loss: 0.9969392418861389,grad_norm: 0.9999991576831269, iteration: 58323
loss: 0.989803671836853,grad_norm: 0.9999990760880945, iteration: 58324
loss: 1.0037736892700195,grad_norm: 0.9565758058894536, iteration: 58325
loss: 0.9957453608512878,grad_norm: 0.855681494031039, iteration: 58326
loss: 1.0136170387268066,grad_norm: 0.9108013551476493, iteration: 58327
loss: 0.9874407052993774,grad_norm: 0.7980168347618654, iteration: 58328
loss: 1.0176570415496826,grad_norm: 0.9999991971272921, iteration: 58329
loss: 1.0279735326766968,grad_norm: 0.9999991951722622, iteration: 58330
loss: 1.0456242561340332,grad_norm: 0.9484301248978178, iteration: 58331
loss: 1.0028278827667236,grad_norm: 0.8458887537651525, iteration: 58332
loss: 0.9825843572616577,grad_norm: 0.9830320614364402, iteration: 58333
loss: 0.9748890995979309,grad_norm: 0.9999990606209499, iteration: 58334
loss: 1.0198533535003662,grad_norm: 0.9999995076163675, iteration: 58335
loss: 0.9884205460548401,grad_norm: 0.9999990981136553, iteration: 58336
loss: 1.0307546854019165,grad_norm: 0.9999992145647844, iteration: 58337
loss: 0.9773859977722168,grad_norm: 0.9158921368649873, iteration: 58338
loss: 1.0030804872512817,grad_norm: 0.8877426370751781, iteration: 58339
loss: 0.9715331196784973,grad_norm: 0.9857111448740973, iteration: 58340
loss: 0.9871301651000977,grad_norm: 0.966601508944986, iteration: 58341
loss: 0.9681679010391235,grad_norm: 0.9999993118956808, iteration: 58342
loss: 1.0222203731536865,grad_norm: 0.9901037189448029, iteration: 58343
loss: 0.9758292436599731,grad_norm: 0.9999989442967189, iteration: 58344
loss: 0.9837289452552795,grad_norm: 0.9999989702811759, iteration: 58345
loss: 0.9879047870635986,grad_norm: 0.9999991300349687, iteration: 58346
loss: 0.9916642904281616,grad_norm: 0.9999990046023237, iteration: 58347
loss: 0.9905770421028137,grad_norm: 0.999999261305647, iteration: 58348
loss: 0.9984361529350281,grad_norm: 0.8889865997135578, iteration: 58349
loss: 0.9811283349990845,grad_norm: 0.9375864403235373, iteration: 58350
loss: 0.9618396759033203,grad_norm: 0.9999991062873307, iteration: 58351
loss: 0.9829810857772827,grad_norm: 0.9999991268363919, iteration: 58352
loss: 1.002530574798584,grad_norm: 0.9999991912920321, iteration: 58353
loss: 1.0015405416488647,grad_norm: 0.9307159333399717, iteration: 58354
loss: 0.9837233424186707,grad_norm: 0.9999990949519781, iteration: 58355
loss: 0.9702374935150146,grad_norm: 0.9367659089003876, iteration: 58356
loss: 1.039475679397583,grad_norm: 0.895137030632542, iteration: 58357
loss: 1.0033501386642456,grad_norm: 0.865529775749874, iteration: 58358
loss: 0.9989194869995117,grad_norm: 0.9999990191738466, iteration: 58359
loss: 0.9784325361251831,grad_norm: 0.9999990304180678, iteration: 58360
loss: 1.0160186290740967,grad_norm: 0.999999222779916, iteration: 58361
loss: 1.0112892389297485,grad_norm: 0.9999998018889219, iteration: 58362
loss: 1.0924993753433228,grad_norm: 0.9999991731137676, iteration: 58363
loss: 0.9961129426956177,grad_norm: 0.9999990599344354, iteration: 58364
loss: 0.9847603440284729,grad_norm: 0.8545704238286691, iteration: 58365
loss: 1.0194185972213745,grad_norm: 0.9337018542758558, iteration: 58366
loss: 1.0000903606414795,grad_norm: 0.9999990975165614, iteration: 58367
loss: 0.9900707602500916,grad_norm: 0.9999990264903608, iteration: 58368
loss: 0.9821678996086121,grad_norm: 0.9403617304209342, iteration: 58369
loss: 1.0007503032684326,grad_norm: 0.907940996789386, iteration: 58370
loss: 0.9702098965644836,grad_norm: 0.999998881666342, iteration: 58371
loss: 1.0193815231323242,grad_norm: 0.9999991807131284, iteration: 58372
loss: 0.9967941641807556,grad_norm: 0.9999996293668129, iteration: 58373
loss: 0.9991545677185059,grad_norm: 0.9999991135332321, iteration: 58374
loss: 0.9584113955497742,grad_norm: 0.9999991892433827, iteration: 58375
loss: 0.9945213198661804,grad_norm: 0.9999990834964252, iteration: 58376
loss: 1.005205750465393,grad_norm: 0.999999219502241, iteration: 58377
loss: 1.0017874240875244,grad_norm: 0.9999990293590905, iteration: 58378
loss: 0.9875401258468628,grad_norm: 0.9999992260354171, iteration: 58379
loss: 0.9698668718338013,grad_norm: 0.9354315222297384, iteration: 58380
loss: 0.9921316504478455,grad_norm: 0.9811804386766748, iteration: 58381
loss: 0.9700635075569153,grad_norm: 0.8342979568350798, iteration: 58382
loss: 1.009696364402771,grad_norm: 0.9999990602281202, iteration: 58383
loss: 1.022045612335205,grad_norm: 0.9782934214890179, iteration: 58384
loss: 0.9955769181251526,grad_norm: 0.9999990282075214, iteration: 58385
loss: 1.0072741508483887,grad_norm: 0.9999993083681404, iteration: 58386
loss: 1.0339373350143433,grad_norm: 0.9999992244560728, iteration: 58387
loss: 0.9935717582702637,grad_norm: 0.9999990347236151, iteration: 58388
loss: 0.9933468103408813,grad_norm: 0.999999080711271, iteration: 58389
loss: 1.0337281227111816,grad_norm: 0.8750279545381394, iteration: 58390
loss: 1.0392080545425415,grad_norm: 0.9194960513955402, iteration: 58391
loss: 1.0174875259399414,grad_norm: 0.9999991429753432, iteration: 58392
loss: 1.0136289596557617,grad_norm: 0.999999082700322, iteration: 58393
loss: 0.9984316825866699,grad_norm: 0.9848166971820822, iteration: 58394
loss: 0.9628587961196899,grad_norm: 0.9999990924187864, iteration: 58395
loss: 1.0207393169403076,grad_norm: 0.8702164602418075, iteration: 58396
loss: 1.0036519765853882,grad_norm: 0.9999992129679787, iteration: 58397
loss: 1.0169180631637573,grad_norm: 0.8346427180805482, iteration: 58398
loss: 0.9820672273635864,grad_norm: 0.9204614609101546, iteration: 58399
loss: 1.009397029876709,grad_norm: 0.9999990142998257, iteration: 58400
loss: 1.0299456119537354,grad_norm: 0.9571090172296189, iteration: 58401
loss: 1.1853686571121216,grad_norm: 0.999999811080456, iteration: 58402
loss: 0.999319851398468,grad_norm: 0.8766780698601928, iteration: 58403
loss: 0.9705221056938171,grad_norm: 0.9999990936761864, iteration: 58404
loss: 0.9872015118598938,grad_norm: 0.9999991447891728, iteration: 58405
loss: 1.0401438474655151,grad_norm: 0.9999991998771719, iteration: 58406
loss: 1.0064510107040405,grad_norm: 0.9923877236844884, iteration: 58407
loss: 0.9955542087554932,grad_norm: 0.8635138759403985, iteration: 58408
loss: 1.016682505607605,grad_norm: 0.9999997256833213, iteration: 58409
loss: 1.0331827402114868,grad_norm: 0.9999992189768335, iteration: 58410
loss: 0.974783718585968,grad_norm: 0.9999990332613388, iteration: 58411
loss: 1.009450912475586,grad_norm: 0.965244459763693, iteration: 58412
loss: 1.0198553800582886,grad_norm: 0.9999992466283499, iteration: 58413
loss: 1.0307741165161133,grad_norm: 0.99999913906634, iteration: 58414
loss: 1.0049102306365967,grad_norm: 0.9909190204080033, iteration: 58415
loss: 0.9801521301269531,grad_norm: 0.9689259210066841, iteration: 58416
loss: 0.9960281848907471,grad_norm: 0.9999991145908942, iteration: 58417
loss: 1.0267906188964844,grad_norm: 0.9999992649407318, iteration: 58418
loss: 0.9950330853462219,grad_norm: 0.9999990289593909, iteration: 58419
loss: 0.9703698754310608,grad_norm: 0.9973864490245241, iteration: 58420
loss: 1.021625280380249,grad_norm: 0.9999991752928586, iteration: 58421
loss: 1.0592268705368042,grad_norm: 0.9999992565003917, iteration: 58422
loss: 1.0057106018066406,grad_norm: 0.9999990684528893, iteration: 58423
loss: 0.9419264197349548,grad_norm: 0.9999990764913428, iteration: 58424
loss: 0.9897956252098083,grad_norm: 0.9267074431639968, iteration: 58425
loss: 1.0050103664398193,grad_norm: 0.9999994739246925, iteration: 58426
loss: 0.9929792881011963,grad_norm: 0.9999991391337906, iteration: 58427
loss: 0.9805072546005249,grad_norm: 0.9999990934457675, iteration: 58428
loss: 1.0095144510269165,grad_norm: 0.916884607352264, iteration: 58429
loss: 1.0124672651290894,grad_norm: 0.9999992232980766, iteration: 58430
loss: 0.983136773109436,grad_norm: 0.9999993314485641, iteration: 58431
loss: 1.0245198011398315,grad_norm: 0.9999989362710521, iteration: 58432
loss: 1.033502221107483,grad_norm: 0.999999949939859, iteration: 58433
loss: 1.0119105577468872,grad_norm: 0.9999990134189376, iteration: 58434
loss: 0.9979380369186401,grad_norm: 0.9999990050036406, iteration: 58435
loss: 1.0254193544387817,grad_norm: 0.9999991018161003, iteration: 58436
loss: 1.059393286705017,grad_norm: 0.9999994281649034, iteration: 58437
loss: 1.011088490486145,grad_norm: 0.9999991604162429, iteration: 58438
loss: 1.0573149919509888,grad_norm: 0.9999993913945688, iteration: 58439
loss: 1.0211820602416992,grad_norm: 0.9999999008689177, iteration: 58440
loss: 0.9998226165771484,grad_norm: 0.9999323670393269, iteration: 58441
loss: 0.9959301352500916,grad_norm: 0.9828419278375399, iteration: 58442
loss: 1.0276607275009155,grad_norm: 0.9999990483650898, iteration: 58443
loss: 0.9724153280258179,grad_norm: 0.999999113726315, iteration: 58444
loss: 0.9706292748451233,grad_norm: 0.9692200626717713, iteration: 58445
loss: 1.0056772232055664,grad_norm: 0.811342824563809, iteration: 58446
loss: 1.0019614696502686,grad_norm: 0.9999992075519493, iteration: 58447
loss: 0.9850649237632751,grad_norm: 0.9999990724778282, iteration: 58448
loss: 0.9938189387321472,grad_norm: 0.838926078609106, iteration: 58449
loss: 1.0169411897659302,grad_norm: 0.9999989973575184, iteration: 58450
loss: 1.010263204574585,grad_norm: 0.9999990667503407, iteration: 58451
loss: 1.0234626531600952,grad_norm: 0.9999990808446501, iteration: 58452
loss: 0.9730796217918396,grad_norm: 0.9999992902835988, iteration: 58453
loss: 1.0064094066619873,grad_norm: 0.9999991544507907, iteration: 58454
loss: 0.9792565703392029,grad_norm: 0.9930478552218683, iteration: 58455
loss: 0.9652853608131409,grad_norm: 0.999998993295549, iteration: 58456
loss: 1.028355360031128,grad_norm: 0.9999989973768243, iteration: 58457
loss: 1.005094051361084,grad_norm: 0.9008359788126291, iteration: 58458
loss: 0.9784585237503052,grad_norm: 0.9999992154760787, iteration: 58459
loss: 0.9619600176811218,grad_norm: 0.9676604202052764, iteration: 58460
loss: 0.9712801575660706,grad_norm: 0.8861521666603803, iteration: 58461
loss: 1.0000336170196533,grad_norm: 0.9999990258388777, iteration: 58462
loss: 0.9735573530197144,grad_norm: 0.9999989978405459, iteration: 58463
loss: 0.9854931235313416,grad_norm: 0.9999997072272566, iteration: 58464
loss: 1.0211702585220337,grad_norm: 0.9999991518017644, iteration: 58465
loss: 1.0170445442199707,grad_norm: 0.97316129472111, iteration: 58466
loss: 1.0346559286117554,grad_norm: 0.9999992765246978, iteration: 58467
loss: 0.9887031316757202,grad_norm: 0.9999992691938764, iteration: 58468
loss: 1.0057076215744019,grad_norm: 0.9349474349936865, iteration: 58469
loss: 0.991334080696106,grad_norm: 0.9999991405588665, iteration: 58470
loss: 0.9719924330711365,grad_norm: 0.963828593176859, iteration: 58471
loss: 0.9790497422218323,grad_norm: 0.999999150808124, iteration: 58472
loss: 1.0167080163955688,grad_norm: 0.9999992766080278, iteration: 58473
loss: 1.003306269645691,grad_norm: 0.8896614352593287, iteration: 58474
loss: 1.0015925168991089,grad_norm: 0.9788396485257085, iteration: 58475
loss: 1.0498403310775757,grad_norm: 0.999999353788877, iteration: 58476
loss: 1.0177448987960815,grad_norm: 0.9999990275971521, iteration: 58477
loss: 1.0263859033584595,grad_norm: 0.9999998099175226, iteration: 58478
loss: 1.0186470746994019,grad_norm: 0.9999991801515384, iteration: 58479
loss: 0.9944801926612854,grad_norm: 0.8721613943605971, iteration: 58480
loss: 0.9864793419837952,grad_norm: 0.9999990737745543, iteration: 58481
loss: 1.0843746662139893,grad_norm: 0.9999997536229552, iteration: 58482
loss: 1.0096746683120728,grad_norm: 0.8525385500726359, iteration: 58483
loss: 0.9936367869377136,grad_norm: 0.9999990645681551, iteration: 58484
loss: 1.00224769115448,grad_norm: 0.9565052527497042, iteration: 58485
loss: 0.9628686308860779,grad_norm: 0.9999990407582074, iteration: 58486
loss: 1.0045440196990967,grad_norm: 0.9999993738348039, iteration: 58487
loss: 1.0134724378585815,grad_norm: 0.9999996552448919, iteration: 58488
loss: 1.0595109462738037,grad_norm: 0.9999992144012299, iteration: 58489
loss: 0.9973162412643433,grad_norm: 0.9317072036381077, iteration: 58490
loss: 0.9989381432533264,grad_norm: 0.9999992345034554, iteration: 58491
loss: 1.0170342922210693,grad_norm: 0.9829726134411496, iteration: 58492
loss: 0.997799813747406,grad_norm: 0.999999190624155, iteration: 58493
loss: 1.0061086416244507,grad_norm: 0.9999992234622713, iteration: 58494
loss: 0.9495737552642822,grad_norm: 0.9999992265803009, iteration: 58495
loss: 1.0119303464889526,grad_norm: 0.9636062337487501, iteration: 58496
loss: 1.0139045715332031,grad_norm: 0.9673781646713694, iteration: 58497
loss: 0.9981334209442139,grad_norm: 0.9530670826086178, iteration: 58498
loss: 0.9798742532730103,grad_norm: 0.8610525638330824, iteration: 58499
loss: 0.9842860102653503,grad_norm: 0.9330775804705657, iteration: 58500
loss: 1.0638883113861084,grad_norm: 0.9999994554218614, iteration: 58501
loss: 0.9804367423057556,grad_norm: 0.9605091806720313, iteration: 58502
loss: 1.0238745212554932,grad_norm: 0.9999992750544594, iteration: 58503
loss: 0.9833030700683594,grad_norm: 0.9999992580045629, iteration: 58504
loss: 1.0066195726394653,grad_norm: 0.9999991083852073, iteration: 58505
loss: 1.0132689476013184,grad_norm: 0.9999990097270289, iteration: 58506
loss: 1.0014097690582275,grad_norm: 0.972130874378583, iteration: 58507
loss: 1.0134284496307373,grad_norm: 0.9999990560428094, iteration: 58508
loss: 0.9926901459693909,grad_norm: 0.9923025696971319, iteration: 58509
loss: 1.0273360013961792,grad_norm: 0.9999991072356668, iteration: 58510
loss: 0.9629772901535034,grad_norm: 0.9999992782296729, iteration: 58511
loss: 1.0371289253234863,grad_norm: 0.9999991326593927, iteration: 58512
loss: 0.9866837859153748,grad_norm: 0.9999989120508108, iteration: 58513
loss: 0.990837037563324,grad_norm: 0.9361437454068405, iteration: 58514
loss: 1.000791311264038,grad_norm: 0.9999996410768691, iteration: 58515
loss: 0.9793716669082642,grad_norm: 0.9999992230823455, iteration: 58516
loss: 0.9771610498428345,grad_norm: 0.9999990410964329, iteration: 58517
loss: 0.9754316210746765,grad_norm: 0.9999989598366389, iteration: 58518
loss: 1.0685631036758423,grad_norm: 0.9999999328761191, iteration: 58519
loss: 1.0397108793258667,grad_norm: 0.9999991616279192, iteration: 58520
loss: 0.9807226061820984,grad_norm: 0.9999992255324429, iteration: 58521
loss: 1.0160611867904663,grad_norm: 0.9999990595583252, iteration: 58522
loss: 0.9636988043785095,grad_norm: 0.9317794278564435, iteration: 58523
loss: 0.9908044338226318,grad_norm: 0.9999992448153896, iteration: 58524
loss: 0.9914840459823608,grad_norm: 0.9999991947726868, iteration: 58525
loss: 0.9895446300506592,grad_norm: 0.9999992840907171, iteration: 58526
loss: 0.9965410828590393,grad_norm: 0.9999992531277985, iteration: 58527
loss: 1.0036462545394897,grad_norm: 0.9999989993023658, iteration: 58528
loss: 1.0213725566864014,grad_norm: 0.999999084219664, iteration: 58529
loss: 1.0103318691253662,grad_norm: 0.942618583098263, iteration: 58530
loss: 0.9971001148223877,grad_norm: 0.9589797012792914, iteration: 58531
loss: 0.9757091999053955,grad_norm: 0.7806267064916229, iteration: 58532
loss: 0.9983842968940735,grad_norm: 0.9160908603269672, iteration: 58533
loss: 0.9985881447792053,grad_norm: 0.9999992756393835, iteration: 58534
loss: 0.9869913458824158,grad_norm: 0.8265057375097304, iteration: 58535
loss: 1.0608041286468506,grad_norm: 0.999999088279792, iteration: 58536
loss: 1.0260753631591797,grad_norm: 0.8974198845484983, iteration: 58537
loss: 0.9993996024131775,grad_norm: 0.9999992514842498, iteration: 58538
loss: 0.9892652034759521,grad_norm: 0.9999990454887906, iteration: 58539
loss: 1.0037875175476074,grad_norm: 0.9223450618735001, iteration: 58540
loss: 0.99350905418396,grad_norm: 0.8243793980181127, iteration: 58541
loss: 0.9941816329956055,grad_norm: 0.9999992103343743, iteration: 58542
loss: 1.286176323890686,grad_norm: 0.9999991775184962, iteration: 58543
loss: 1.051795244216919,grad_norm: 0.9999991173720479, iteration: 58544
loss: 1.005089521408081,grad_norm: 0.9999992586650993, iteration: 58545
loss: 0.9858696460723877,grad_norm: 0.9999991515238656, iteration: 58546
loss: 1.017977237701416,grad_norm: 0.9999990813575912, iteration: 58547
loss: 0.9944818615913391,grad_norm: 0.9999990636100513, iteration: 58548
loss: 0.9805135726928711,grad_norm: 0.9999990001941641, iteration: 58549
loss: 1.0274327993392944,grad_norm: 0.9720578980257324, iteration: 58550
loss: 0.9698099493980408,grad_norm: 0.9999991970134678, iteration: 58551
loss: 1.0392184257507324,grad_norm: 0.999999777761742, iteration: 58552
loss: 0.9991503953933716,grad_norm: 0.8922637813940408, iteration: 58553
loss: 1.000436782836914,grad_norm: 0.9860326758358845, iteration: 58554
loss: 0.9846962094306946,grad_norm: 0.9468103192996707, iteration: 58555
loss: 0.9928902387619019,grad_norm: 0.8929237026040427, iteration: 58556
loss: 0.9789606928825378,grad_norm: 0.9425614238107533, iteration: 58557
loss: 1.026134967803955,grad_norm: 0.9999992033019992, iteration: 58558
loss: 0.9691343307495117,grad_norm: 0.9810966462586765, iteration: 58559
loss: 0.9943538904190063,grad_norm: 0.9999991261062519, iteration: 58560
loss: 1.007045030593872,grad_norm: 0.999999204106652, iteration: 58561
loss: 1.0018737316131592,grad_norm: 0.9603338696976152, iteration: 58562
loss: 1.01615309715271,grad_norm: 0.9999991643448777, iteration: 58563
loss: 0.9941529035568237,grad_norm: 0.9999990403217864, iteration: 58564
loss: 1.0140419006347656,grad_norm: 0.9999991579214289, iteration: 58565
loss: 0.9938830733299255,grad_norm: 0.9364935853693948, iteration: 58566
loss: 1.017488718032837,grad_norm: 0.9999991715108643, iteration: 58567
loss: 0.9936081171035767,grad_norm: 0.9042393313045287, iteration: 58568
loss: 0.9826352000236511,grad_norm: 0.9999991188878462, iteration: 58569
loss: 1.0088683366775513,grad_norm: 0.9593743940909305, iteration: 58570
loss: 1.0504276752471924,grad_norm: 0.9764436713703866, iteration: 58571
loss: 0.9796086549758911,grad_norm: 0.8593422406443529, iteration: 58572
loss: 1.0085124969482422,grad_norm: 0.9955080951256438, iteration: 58573
loss: 0.9867939352989197,grad_norm: 0.9999991260398139, iteration: 58574
loss: 1.0004910230636597,grad_norm: 0.999999189248179, iteration: 58575
loss: 0.989065945148468,grad_norm: 0.9999995756176151, iteration: 58576
loss: 1.0337259769439697,grad_norm: 0.9999990768948457, iteration: 58577
loss: 1.0046337842941284,grad_norm: 0.9999991228022104, iteration: 58578
loss: 0.990782618522644,grad_norm: 0.9999991185430227, iteration: 58579
loss: 1.0073916912078857,grad_norm: 0.8909758164537736, iteration: 58580
loss: 1.0232497453689575,grad_norm: 0.9999992918617947, iteration: 58581
loss: 1.012605905532837,grad_norm: 0.9999990090791417, iteration: 58582
loss: 1.0055882930755615,grad_norm: 0.9999990970411924, iteration: 58583
loss: 0.9733978509902954,grad_norm: 0.9999990419547541, iteration: 58584
loss: 0.9777395129203796,grad_norm: 0.8510778613239235, iteration: 58585
loss: 1.0051071643829346,grad_norm: 0.9999992400687929, iteration: 58586
loss: 0.9693459868431091,grad_norm: 0.8471512511234534, iteration: 58587
loss: 0.9778034687042236,grad_norm: 0.9035095800502133, iteration: 58588
loss: 0.9895555377006531,grad_norm: 0.9999991022676061, iteration: 58589
loss: 0.9901415705680847,grad_norm: 0.9999992071852651, iteration: 58590
loss: 1.0197641849517822,grad_norm: 0.9780616679445013, iteration: 58591
loss: 0.9710841178894043,grad_norm: 0.9999991383115304, iteration: 58592
loss: 1.0311331748962402,grad_norm: 0.9999991452347262, iteration: 58593
loss: 1.0219924449920654,grad_norm: 0.8618628088746545, iteration: 58594
loss: 1.0172967910766602,grad_norm: 0.9999990768689098, iteration: 58595
loss: 0.964574933052063,grad_norm: 0.9942405420827372, iteration: 58596
loss: 0.9715999960899353,grad_norm: 0.9999992350438887, iteration: 58597
loss: 0.9779902696609497,grad_norm: 0.9999993708472041, iteration: 58598
loss: 0.993057906627655,grad_norm: 0.9767700808454176, iteration: 58599
loss: 1.0192923545837402,grad_norm: 0.8125565124515903, iteration: 58600
loss: 1.026189923286438,grad_norm: 0.9625273293191322, iteration: 58601
loss: 0.992476761341095,grad_norm: 0.9094714996486926, iteration: 58602
loss: 1.0090162754058838,grad_norm: 0.9999991252705375, iteration: 58603
loss: 1.025390625,grad_norm: 0.9999990179009282, iteration: 58604
loss: 1.019952416419983,grad_norm: 0.9284268183802352, iteration: 58605
loss: 1.024049997329712,grad_norm: 0.9999991912276478, iteration: 58606
loss: 1.0048378705978394,grad_norm: 0.999999186738303, iteration: 58607
loss: 0.9592340588569641,grad_norm: 0.9999996849466499, iteration: 58608
loss: 1.0090596675872803,grad_norm: 0.9999992443984006, iteration: 58609
loss: 0.9834473729133606,grad_norm: 0.9999992013085592, iteration: 58610
loss: 0.9961008429527283,grad_norm: 0.9999991348236493, iteration: 58611
loss: 0.9841901659965515,grad_norm: 0.9532714544247173, iteration: 58612
loss: 1.0206787586212158,grad_norm: 0.9999989898541339, iteration: 58613
loss: 0.9832697510719299,grad_norm: 0.8938672706770803, iteration: 58614
loss: 1.0040146112442017,grad_norm: 0.9999990817791249, iteration: 58615
loss: 1.0363129377365112,grad_norm: 0.9061740079118443, iteration: 58616
loss: 1.024722933769226,grad_norm: 0.9572037576397422, iteration: 58617
loss: 1.008115291595459,grad_norm: 0.9999991855477944, iteration: 58618
loss: 0.9997778534889221,grad_norm: 0.9999991693524948, iteration: 58619
loss: 0.9911555051803589,grad_norm: 0.9999990975960623, iteration: 58620
loss: 0.9945670366287231,grad_norm: 0.9999991000183873, iteration: 58621
loss: 1.0538619756698608,grad_norm: 0.9999991396033292, iteration: 58622
loss: 1.0117533206939697,grad_norm: 0.9999992692742559, iteration: 58623
loss: 0.9929654002189636,grad_norm: 0.9308582930449767, iteration: 58624
loss: 1.073960542678833,grad_norm: 0.999999910556836, iteration: 58625
loss: 0.978084921836853,grad_norm: 0.8652835601094645, iteration: 58626
loss: 1.0309308767318726,grad_norm: 0.9999994069318741, iteration: 58627
loss: 0.9994491338729858,grad_norm: 0.9999991549804499, iteration: 58628
loss: 0.9897289276123047,grad_norm: 0.9999995248721795, iteration: 58629
loss: 1.0112749338150024,grad_norm: 0.9999989709152849, iteration: 58630
loss: 1.016318440437317,grad_norm: 0.999999058942527, iteration: 58631
loss: 1.0381817817687988,grad_norm: 0.9999994967167399, iteration: 58632
loss: 0.9973774552345276,grad_norm: 0.9999988855633022, iteration: 58633
loss: 1.0243127346038818,grad_norm: 0.9999991941123781, iteration: 58634
loss: 0.9974914789199829,grad_norm: 0.9999989467310156, iteration: 58635
loss: 1.0696632862091064,grad_norm: 0.9999996473609329, iteration: 58636
loss: 1.0051020383834839,grad_norm: 0.9999992244914679, iteration: 58637
loss: 0.9948622584342957,grad_norm: 0.9799243158969843, iteration: 58638
loss: 1.0058538913726807,grad_norm: 0.9497323945203371, iteration: 58639
loss: 1.0238131284713745,grad_norm: 0.9301813918687951, iteration: 58640
loss: 1.0082802772521973,grad_norm: 0.9999990104332114, iteration: 58641
loss: 0.9906412959098816,grad_norm: 0.9599556548408701, iteration: 58642
loss: 0.973820686340332,grad_norm: 0.9999990705734205, iteration: 58643
loss: 0.9808329343795776,grad_norm: 0.9731443960981888, iteration: 58644
loss: 0.991862416267395,grad_norm: 0.9999991406622425, iteration: 58645
loss: 1.0079083442687988,grad_norm: 0.999999150126949, iteration: 58646
loss: 1.014198899269104,grad_norm: 0.9149285438179218, iteration: 58647
loss: 1.0048179626464844,grad_norm: 0.8023530319858045, iteration: 58648
loss: 1.0165905952453613,grad_norm: 0.9999990089458543, iteration: 58649
loss: 1.0098587274551392,grad_norm: 0.9999996992291194, iteration: 58650
loss: 1.0229135751724243,grad_norm: 0.9999991646867518, iteration: 58651
loss: 1.0114068984985352,grad_norm: 0.9999990894417721, iteration: 58652
loss: 1.040963053703308,grad_norm: 0.8589909165693057, iteration: 58653
loss: 1.013045310974121,grad_norm: 0.9999991990055954, iteration: 58654
loss: 1.0219509601593018,grad_norm: 0.9999992416157281, iteration: 58655
loss: 0.9981897473335266,grad_norm: 0.9999991444343366, iteration: 58656
loss: 1.0008752346038818,grad_norm: 0.9999989889224612, iteration: 58657
loss: 1.0197809934616089,grad_norm: 0.952336640815216, iteration: 58658
loss: 1.0048449039459229,grad_norm: 0.9685283558362501, iteration: 58659
loss: 0.9983605742454529,grad_norm: 0.9999990151090874, iteration: 58660
loss: 0.9885982871055603,grad_norm: 0.974412233565928, iteration: 58661
loss: 0.9663074016571045,grad_norm: 0.9330019964737251, iteration: 58662
loss: 1.0095486640930176,grad_norm: 0.9273629348685415, iteration: 58663
loss: 1.0088372230529785,grad_norm: 0.9999990842189702, iteration: 58664
loss: 1.006111979484558,grad_norm: 0.9999991001283075, iteration: 58665
loss: 1.0024718046188354,grad_norm: 0.9750576359007265, iteration: 58666
loss: 1.0313820838928223,grad_norm: 0.9999990671649064, iteration: 58667
loss: 1.029554843902588,grad_norm: 0.9604521847446634, iteration: 58668
loss: 1.0043717622756958,grad_norm: 0.9335729038176258, iteration: 58669
loss: 1.015429139137268,grad_norm: 0.9999992518242491, iteration: 58670
loss: 1.0149574279785156,grad_norm: 0.8747461353765463, iteration: 58671
loss: 1.0146205425262451,grad_norm: 0.9489374716075794, iteration: 58672
loss: 1.014597773551941,grad_norm: 0.9207019668639211, iteration: 58673
loss: 1.0010032653808594,grad_norm: 0.9999992547367271, iteration: 58674
loss: 0.9932956695556641,grad_norm: 0.8857246649655662, iteration: 58675
loss: 0.9994207620620728,grad_norm: 0.9999992070493332, iteration: 58676
loss: 0.9986745119094849,grad_norm: 0.9569396040881548, iteration: 58677
loss: 0.9923110008239746,grad_norm: 0.9999992156973629, iteration: 58678
loss: 0.9723140001296997,grad_norm: 0.999999110826605, iteration: 58679
loss: 1.0360255241394043,grad_norm: 0.9999996014401199, iteration: 58680
loss: 1.0090587139129639,grad_norm: 0.9999993402826972, iteration: 58681
loss: 0.9834312796592712,grad_norm: 0.9999993224576516, iteration: 58682
loss: 0.9885603189468384,grad_norm: 0.9999994457671653, iteration: 58683
loss: 0.9859697222709656,grad_norm: 0.7646092032703196, iteration: 58684
loss: 0.9991028904914856,grad_norm: 0.9999996412730493, iteration: 58685
loss: 0.9985545873641968,grad_norm: 0.8967554626115265, iteration: 58686
loss: 0.9587026834487915,grad_norm: 0.9999990844413039, iteration: 58687
loss: 0.9842178225517273,grad_norm: 0.9999990322140702, iteration: 58688
loss: 0.9975375533103943,grad_norm: 0.8249728940374772, iteration: 58689
loss: 0.9930835962295532,grad_norm: 0.9999992346370675, iteration: 58690
loss: 0.993455171585083,grad_norm: 0.9999990731396228, iteration: 58691
loss: 1.0262006521224976,grad_norm: 0.9999993174415115, iteration: 58692
loss: 1.0040862560272217,grad_norm: 0.9999990148959088, iteration: 58693
loss: 1.0004740953445435,grad_norm: 0.999999059962222, iteration: 58694
loss: 1.0318002700805664,grad_norm: 0.9999990279099736, iteration: 58695
loss: 1.0221413373947144,grad_norm: 0.9999992665442464, iteration: 58696
loss: 1.0018892288208008,grad_norm: 0.9999990190167622, iteration: 58697
loss: 1.0403186082839966,grad_norm: 0.934962869611155, iteration: 58698
loss: 1.0236064195632935,grad_norm: 0.9332066774842034, iteration: 58699
loss: 1.0223236083984375,grad_norm: 0.9576791478789691, iteration: 58700
loss: 1.020750880241394,grad_norm: 0.9999991873276773, iteration: 58701
loss: 0.9960801005363464,grad_norm: 0.9999990767254039, iteration: 58702
loss: 0.9753348231315613,grad_norm: 0.9445691373571915, iteration: 58703
loss: 1.0030426979064941,grad_norm: 0.9999991414312085, iteration: 58704
loss: 0.9681596159934998,grad_norm: 0.9325793045891515, iteration: 58705
loss: 0.9844646453857422,grad_norm: 0.9404661501705184, iteration: 58706
loss: 1.0088744163513184,grad_norm: 0.9999991276577207, iteration: 58707
loss: 0.9847579598426819,grad_norm: 0.9999991207129544, iteration: 58708
loss: 1.015272855758667,grad_norm: 0.9608928513348093, iteration: 58709
loss: 1.0278022289276123,grad_norm: 0.999999153202037, iteration: 58710
loss: 0.9958282709121704,grad_norm: 0.9999995992320682, iteration: 58711
loss: 0.9932225346565247,grad_norm: 0.9416685617899625, iteration: 58712
loss: 1.017043948173523,grad_norm: 0.912778221623157, iteration: 58713
loss: 0.9748449921607971,grad_norm: 0.9391732360536839, iteration: 58714
loss: 0.9855769276618958,grad_norm: 0.9999991381235921, iteration: 58715
loss: 1.0285104513168335,grad_norm: 0.9999990534314355, iteration: 58716
loss: 0.9909873604774475,grad_norm: 0.8767926509250195, iteration: 58717
loss: 0.9978141188621521,grad_norm: 0.8575714929065076, iteration: 58718
loss: 1.0121967792510986,grad_norm: 0.9819189208201046, iteration: 58719
loss: 1.0088287591934204,grad_norm: 0.9938667507334088, iteration: 58720
loss: 1.0011881589889526,grad_norm: 0.9999991891800928, iteration: 58721
loss: 1.0041855573654175,grad_norm: 0.9999994195057262, iteration: 58722
loss: 0.9860953688621521,grad_norm: 0.9999991792992382, iteration: 58723
loss: 1.0122534036636353,grad_norm: 0.9999989838856785, iteration: 58724
loss: 1.0150994062423706,grad_norm: 0.9251518227538984, iteration: 58725
loss: 0.963447093963623,grad_norm: 0.9999992745053229, iteration: 58726
loss: 1.0017327070236206,grad_norm: 0.9612673810137835, iteration: 58727
loss: 1.0390070676803589,grad_norm: 0.9999990959521541, iteration: 58728
loss: 0.9563726186752319,grad_norm: 0.9999990864631422, iteration: 58729
loss: 0.9840162396430969,grad_norm: 0.9999993569472524, iteration: 58730
loss: 0.9918568730354309,grad_norm: 0.9999990578385469, iteration: 58731
loss: 1.0041368007659912,grad_norm: 0.9704462332308063, iteration: 58732
loss: 1.0026568174362183,grad_norm: 0.9611596252565678, iteration: 58733
loss: 1.0094784498214722,grad_norm: 0.9999998923224014, iteration: 58734
loss: 1.0149660110473633,grad_norm: 0.9427514764546716, iteration: 58735
loss: 1.0132330656051636,grad_norm: 0.99999940450096, iteration: 58736
loss: 1.0293776988983154,grad_norm: 0.9999991121199401, iteration: 58737
loss: 0.9982587695121765,grad_norm: 0.9999990464804236, iteration: 58738
loss: 0.9458967447280884,grad_norm: 0.9668706501268084, iteration: 58739
loss: 0.9962392449378967,grad_norm: 0.9877311317965031, iteration: 58740
loss: 1.0140382051467896,grad_norm: 0.9234515464368068, iteration: 58741
loss: 0.9870694279670715,grad_norm: 0.9999989823719208, iteration: 58742
loss: 1.0264365673065186,grad_norm: 0.9999991817264802, iteration: 58743
loss: 0.9502615928649902,grad_norm: 0.9999989316440084, iteration: 58744
loss: 1.02614164352417,grad_norm: 0.9999989660026666, iteration: 58745
loss: 0.9909193515777588,grad_norm: 0.9999991511444616, iteration: 58746
loss: 1.0362801551818848,grad_norm: 0.9756119732808134, iteration: 58747
loss: 0.9923031330108643,grad_norm: 0.9277739386762303, iteration: 58748
loss: 0.9916346669197083,grad_norm: 0.9000609689025797, iteration: 58749
loss: 0.9874654412269592,grad_norm: 0.9999990911577419, iteration: 58750
loss: 0.9706273078918457,grad_norm: 0.9245412065010742, iteration: 58751
loss: 0.9833632111549377,grad_norm: 0.8830350476919165, iteration: 58752
loss: 0.9792268872261047,grad_norm: 0.9999990451333499, iteration: 58753
loss: 1.0398091077804565,grad_norm: 0.9798144901235645, iteration: 58754
loss: 1.006764531135559,grad_norm: 0.9999990902448109, iteration: 58755
loss: 0.9958570599555969,grad_norm: 0.9999991320054089, iteration: 58756
loss: 1.040058970451355,grad_norm: 0.9999990670988823, iteration: 58757
loss: 1.0215741395950317,grad_norm: 0.9999990836188772, iteration: 58758
loss: 1.0206819772720337,grad_norm: 0.9930352911743205, iteration: 58759
loss: 0.9878840446472168,grad_norm: 0.8352898825208538, iteration: 58760
loss: 0.9898250102996826,grad_norm: 0.9441819891504476, iteration: 58761
loss: 1.0004850625991821,grad_norm: 0.9999990964130407, iteration: 58762
loss: 1.0090945959091187,grad_norm: 0.9521379979916673, iteration: 58763
loss: 1.0136806964874268,grad_norm: 0.9999991635543694, iteration: 58764
loss: 0.9713412523269653,grad_norm: 0.999999211217094, iteration: 58765
loss: 0.99994957447052,grad_norm: 0.9999994922915104, iteration: 58766
loss: 0.9875840544700623,grad_norm: 0.9185641406771599, iteration: 58767
loss: 1.0161713361740112,grad_norm: 0.9999991922783078, iteration: 58768
loss: 0.9922935366630554,grad_norm: 0.9999991409507685, iteration: 58769
loss: 0.9762787818908691,grad_norm: 0.9144969276719292, iteration: 58770
loss: 0.9649643301963806,grad_norm: 0.9449407766642818, iteration: 58771
loss: 1.000430941581726,grad_norm: 0.9662947092389256, iteration: 58772
loss: 1.0501405000686646,grad_norm: 0.9999995150347059, iteration: 58773
loss: 0.9639636874198914,grad_norm: 0.9999991959099629, iteration: 58774
loss: 1.007483720779419,grad_norm: 0.9999991016216363, iteration: 58775
loss: 1.0219073295593262,grad_norm: 0.9999990740785715, iteration: 58776
loss: 1.0289744138717651,grad_norm: 0.9999995927488972, iteration: 58777
loss: 1.0329785346984863,grad_norm: 0.8862834969751237, iteration: 58778
loss: 0.9990901947021484,grad_norm: 0.9920664031996619, iteration: 58779
loss: 0.9720550775527954,grad_norm: 0.9999990644231642, iteration: 58780
loss: 1.0411978960037231,grad_norm: 0.9711686129172427, iteration: 58781
loss: 1.0113623142242432,grad_norm: 0.9322593601306106, iteration: 58782
loss: 0.9659830927848816,grad_norm: 0.9999993134760224, iteration: 58783
loss: 0.9547756314277649,grad_norm: 0.9880490084417454, iteration: 58784
loss: 0.9969662427902222,grad_norm: 0.999999049365518, iteration: 58785
loss: 1.0514318943023682,grad_norm: 0.9999990146520455, iteration: 58786
loss: 0.9707952737808228,grad_norm: 0.9999990903906386, iteration: 58787
loss: 0.9902171492576599,grad_norm: 0.9597023702910867, iteration: 58788
loss: 1.0886448621749878,grad_norm: 0.9999990882246979, iteration: 58789
loss: 0.9912533760070801,grad_norm: 0.8249512043163424, iteration: 58790
loss: 0.9653058648109436,grad_norm: 0.9999993879471925, iteration: 58791
loss: 0.958330512046814,grad_norm: 0.999999180530656, iteration: 58792
loss: 1.0062849521636963,grad_norm: 0.9999989872920811, iteration: 58793
loss: 1.0292003154754639,grad_norm: 0.9622579373373503, iteration: 58794
loss: 1.021599531173706,grad_norm: 0.999999154762536, iteration: 58795
loss: 0.9742546081542969,grad_norm: 0.9711667029609938, iteration: 58796
loss: 1.0073227882385254,grad_norm: 0.9838475680326298, iteration: 58797
loss: 0.9890444874763489,grad_norm: 0.9999991248560005, iteration: 58798
loss: 1.0326322317123413,grad_norm: 0.9999993086689662, iteration: 58799
loss: 0.9606183767318726,grad_norm: 0.999999146121992, iteration: 58800
loss: 1.0155417919158936,grad_norm: 0.9999991067969484, iteration: 58801
loss: 0.9605968594551086,grad_norm: 0.8961297763974989, iteration: 58802
loss: 0.9941568970680237,grad_norm: 0.9072089727684801, iteration: 58803
loss: 0.9877879023551941,grad_norm: 0.9999991712669672, iteration: 58804
loss: 1.0000696182250977,grad_norm: 0.8682300600470626, iteration: 58805
loss: 1.0188777446746826,grad_norm: 0.9176930424902254, iteration: 58806
loss: 0.9648606777191162,grad_norm: 0.9999991904990102, iteration: 58807
loss: 1.0398848056793213,grad_norm: 0.9999992567558907, iteration: 58808
loss: 1.0129084587097168,grad_norm: 0.9999992626655981, iteration: 58809
loss: 1.004766821861267,grad_norm: 0.8936581224200864, iteration: 58810
loss: 1.0227675437927246,grad_norm: 0.953963003430372, iteration: 58811
loss: 1.0039142370224,grad_norm: 0.999999097390969, iteration: 58812
loss: 0.9644510746002197,grad_norm: 0.8893772024705929, iteration: 58813
loss: 0.9580894112586975,grad_norm: 0.8129315661954598, iteration: 58814
loss: 1.0103298425674438,grad_norm: 0.9307227740182772, iteration: 58815
loss: 0.942039966583252,grad_norm: 0.9472400977840256, iteration: 58816
loss: 1.0125758647918701,grad_norm: 0.999999077324555, iteration: 58817
loss: 1.0250122547149658,grad_norm: 0.9999989543845695, iteration: 58818
loss: 1.0261377096176147,grad_norm: 0.9999991057245748, iteration: 58819
loss: 0.9813389778137207,grad_norm: 0.9334580821807106, iteration: 58820
loss: 1.0205678939819336,grad_norm: 0.9999991622219447, iteration: 58821
loss: 0.9677333831787109,grad_norm: 0.9055526721366726, iteration: 58822
loss: 1.0042359828948975,grad_norm: 0.9999991707266541, iteration: 58823
loss: 1.0290216207504272,grad_norm: 0.9061954496095537, iteration: 58824
loss: 0.981918215751648,grad_norm: 0.995437451108361, iteration: 58825
loss: 0.9786767363548279,grad_norm: 0.9999993346099082, iteration: 58826
loss: 0.9902074337005615,grad_norm: 0.845520866163694, iteration: 58827
loss: 1.0095850229263306,grad_norm: 0.9999991235907744, iteration: 58828
loss: 1.0124478340148926,grad_norm: 0.894145631667071, iteration: 58829
loss: 1.0315908193588257,grad_norm: 0.9699379964592406, iteration: 58830
loss: 1.0206977128982544,grad_norm: 0.9930331523007694, iteration: 58831
loss: 1.0065217018127441,grad_norm: 0.9999992231639956, iteration: 58832
loss: 1.0060337781906128,grad_norm: 0.9999992525308586, iteration: 58833
loss: 1.0070455074310303,grad_norm: 0.9999991698667074, iteration: 58834
loss: 1.0977309942245483,grad_norm: 1.000000018674721, iteration: 58835
loss: 1.0268875360488892,grad_norm: 0.9537190774512289, iteration: 58836
loss: 1.0048084259033203,grad_norm: 0.9999991156970386, iteration: 58837
loss: 1.0094380378723145,grad_norm: 0.9612927262020047, iteration: 58838
loss: 1.0310875177383423,grad_norm: 0.9999991794569737, iteration: 58839
loss: 1.0031733512878418,grad_norm: 0.9999996573811668, iteration: 58840
loss: 1.0170363187789917,grad_norm: 0.9187348969066274, iteration: 58841
loss: 0.9912161827087402,grad_norm: 0.9747152333974125, iteration: 58842
loss: 1.0037096738815308,grad_norm: 0.9333135452352307, iteration: 58843
loss: 1.0193711519241333,grad_norm: 0.9999992096605597, iteration: 58844
loss: 0.9877249002456665,grad_norm: 0.9999991017946915, iteration: 58845
loss: 1.0380825996398926,grad_norm: 0.9791639605830232, iteration: 58846
loss: 1.005550742149353,grad_norm: 0.9999990448788759, iteration: 58847
loss: 0.9922365546226501,grad_norm: 0.999999141158046, iteration: 58848
loss: 0.9606145024299622,grad_norm: 0.9999990660199394, iteration: 58849
loss: 0.9735209941864014,grad_norm: 0.9999992952759146, iteration: 58850
loss: 0.9853608012199402,grad_norm: 0.9667267401041386, iteration: 58851
loss: 1.0159674882888794,grad_norm: 0.9878960148762355, iteration: 58852
loss: 0.9976685643196106,grad_norm: 0.9061201348486534, iteration: 58853
loss: 0.9898161888122559,grad_norm: 0.9999990599090054, iteration: 58854
loss: 0.982770562171936,grad_norm: 0.9305491381580987, iteration: 58855
loss: 0.9746770262718201,grad_norm: 0.9733187301567037, iteration: 58856
loss: 0.9963673949241638,grad_norm: 0.9999991318613817, iteration: 58857
loss: 0.9704154133796692,grad_norm: 0.9759594879021786, iteration: 58858
loss: 0.9708913564682007,grad_norm: 0.9608434216295662, iteration: 58859
loss: 1.02959406375885,grad_norm: 0.9999990921088314, iteration: 58860
loss: 0.9833024144172668,grad_norm: 0.8978019928304205, iteration: 58861
loss: 1.030219316482544,grad_norm: 0.9999991418789308, iteration: 58862
loss: 0.9864597320556641,grad_norm: 0.9999991386320121, iteration: 58863
loss: 0.9927806854248047,grad_norm: 0.9999990520548802, iteration: 58864
loss: 0.9883336424827576,grad_norm: 0.9072899467823626, iteration: 58865
loss: 1.0005003213882446,grad_norm: 0.837627445155726, iteration: 58866
loss: 1.0308594703674316,grad_norm: 0.9675127708867947, iteration: 58867
loss: 0.9937183856964111,grad_norm: 0.9911118320391282, iteration: 58868
loss: 1.0326517820358276,grad_norm: 0.9052428059679215, iteration: 58869
loss: 1.000247597694397,grad_norm: 0.8565638674426875, iteration: 58870
loss: 1.058203101158142,grad_norm: 0.999999247857313, iteration: 58871
loss: 1.0191607475280762,grad_norm: 0.9999992365401152, iteration: 58872
loss: 0.9973405003547668,grad_norm: 0.9605376031696805, iteration: 58873
loss: 1.0005336999893188,grad_norm: 0.9592766852908414, iteration: 58874
loss: 1.0158964395523071,grad_norm: 0.9999992329313411, iteration: 58875
loss: 1.0181561708450317,grad_norm: 0.9999992678580036, iteration: 58876
loss: 1.0135090351104736,grad_norm: 0.9637281042636597, iteration: 58877
loss: 0.9906554818153381,grad_norm: 0.9999991662354426, iteration: 58878
loss: 1.0283982753753662,grad_norm: 0.999999290114727, iteration: 58879
loss: 1.0119975805282593,grad_norm: 0.9999991450912484, iteration: 58880
loss: 1.0158940553665161,grad_norm: 0.88001977047854, iteration: 58881
loss: 1.0010333061218262,grad_norm: 0.9999993595266697, iteration: 58882
loss: 0.9847282767295837,grad_norm: 0.9999994906269607, iteration: 58883
loss: 1.0177305936813354,grad_norm: 0.9402396904743232, iteration: 58884
loss: 1.0228725671768188,grad_norm: 0.9999991172078312, iteration: 58885
loss: 0.9871630072593689,grad_norm: 0.9999990023050321, iteration: 58886
loss: 1.0175762176513672,grad_norm: 0.9999991668946423, iteration: 58887
loss: 1.0476975440979004,grad_norm: 0.9999998769926679, iteration: 58888
loss: 1.0128268003463745,grad_norm: 0.9999990830768312, iteration: 58889
loss: 1.0183476209640503,grad_norm: 0.8381660629604121, iteration: 58890
loss: 0.9962527751922607,grad_norm: 0.9999991451850038, iteration: 58891
loss: 0.9995651245117188,grad_norm: 0.944457324894027, iteration: 58892
loss: 1.0016660690307617,grad_norm: 0.96665572352312, iteration: 58893
loss: 0.9807751178741455,grad_norm: 0.9999992014421029, iteration: 58894
loss: 0.984611988067627,grad_norm: 0.9999991131901799, iteration: 58895
loss: 0.9656546115875244,grad_norm: 0.9999990411155151, iteration: 58896
loss: 1.0175625085830688,grad_norm: 0.9999990018653692, iteration: 58897
loss: 0.9938055276870728,grad_norm: 0.9077346737617236, iteration: 58898
loss: 0.9904142618179321,grad_norm: 0.9324689555641256, iteration: 58899
loss: 0.9867585897445679,grad_norm: 0.9310646484332366, iteration: 58900
loss: 0.9839542508125305,grad_norm: 0.9999990760087688, iteration: 58901
loss: 0.9843980073928833,grad_norm: 0.999999334450079, iteration: 58902
loss: 0.9900354743003845,grad_norm: 0.9999991134253948, iteration: 58903
loss: 1.0407781600952148,grad_norm: 0.9999990968534238, iteration: 58904
loss: 1.0119578838348389,grad_norm: 0.9667222390225171, iteration: 58905
loss: 1.0236046314239502,grad_norm: 0.9627315689153926, iteration: 58906
loss: 1.016010046005249,grad_norm: 0.9999992272739393, iteration: 58907
loss: 1.0266810655593872,grad_norm: 0.9999990186364989, iteration: 58908
loss: 0.9931792616844177,grad_norm: 0.9999991512427803, iteration: 58909
loss: 0.9793018102645874,grad_norm: 0.9999991888560578, iteration: 58910
loss: 1.0134660005569458,grad_norm: 0.9999992446726392, iteration: 58911
loss: 1.0271114110946655,grad_norm: 0.9999991349350633, iteration: 58912
loss: 1.056783676147461,grad_norm: 0.9999990143340192, iteration: 58913
loss: 0.9921078681945801,grad_norm: 0.957242526195021, iteration: 58914
loss: 1.0275596380233765,grad_norm: 0.945890122218947, iteration: 58915
loss: 1.0218490362167358,grad_norm: 0.9086765524209045, iteration: 58916
loss: 0.971204400062561,grad_norm: 0.9832077373178173, iteration: 58917
loss: 1.0372586250305176,grad_norm: 0.9999991319130981, iteration: 58918
loss: 0.9800236821174622,grad_norm: 0.9149752876115655, iteration: 58919
loss: 0.998511791229248,grad_norm: 0.9999989706801508, iteration: 58920
loss: 1.0033589601516724,grad_norm: 0.9924027533351392, iteration: 58921
loss: 1.008766531944275,grad_norm: 0.8897895396861494, iteration: 58922
loss: 1.0021699666976929,grad_norm: 0.9999991854990026, iteration: 58923
loss: 0.9908289909362793,grad_norm: 0.9999993187097092, iteration: 58924
loss: 0.9820051789283752,grad_norm: 0.9870453069686589, iteration: 58925
loss: 0.9657062292098999,grad_norm: 0.999999085685278, iteration: 58926
loss: 1.020097017288208,grad_norm: 0.9999991102719509, iteration: 58927
loss: 1.0204941034317017,grad_norm: 0.9999992462283225, iteration: 58928
loss: 1.0046159029006958,grad_norm: 0.8806083021174345, iteration: 58929
loss: 1.0206077098846436,grad_norm: 0.9084858637369424, iteration: 58930
loss: 0.973901629447937,grad_norm: 0.9153542552442798, iteration: 58931
loss: 1.0084832906723022,grad_norm: 0.9999989673292647, iteration: 58932
loss: 0.9911632537841797,grad_norm: 0.8813083575603098, iteration: 58933
loss: 0.9628362655639648,grad_norm: 0.9402049269507955, iteration: 58934
loss: 1.0099374055862427,grad_norm: 0.9999992291095928, iteration: 58935
loss: 1.006851315498352,grad_norm: 0.9999992952632649, iteration: 58936
loss: 0.9855908751487732,grad_norm: 0.9999989965921967, iteration: 58937
loss: 0.9933478832244873,grad_norm: 0.9999991005975877, iteration: 58938
loss: 0.9988846182823181,grad_norm: 0.9999989799119577, iteration: 58939
loss: 1.025476336479187,grad_norm: 0.9999989814918382, iteration: 58940
loss: 0.996093213558197,grad_norm: 0.9939346855008612, iteration: 58941
loss: 1.0162278413772583,grad_norm: 0.99999920010767, iteration: 58942
loss: 0.9902341961860657,grad_norm: 0.9176202267656345, iteration: 58943
loss: 1.018113613128662,grad_norm: 0.9233134843937852, iteration: 58944
loss: 1.0213004350662231,grad_norm: 0.9999991342084145, iteration: 58945
loss: 1.034170150756836,grad_norm: 0.9999991552196229, iteration: 58946
loss: 0.9776208996772766,grad_norm: 0.9999990506410306, iteration: 58947
loss: 1.0066114664077759,grad_norm: 0.9999992797504375, iteration: 58948
loss: 0.9862390756607056,grad_norm: 0.8659294927573863, iteration: 58949
loss: 1.0033434629440308,grad_norm: 0.9999992370297437, iteration: 58950
loss: 0.9958930611610413,grad_norm: 0.9999990369078853, iteration: 58951
loss: 1.033671259880066,grad_norm: 0.9999990182977655, iteration: 58952
loss: 1.0139518976211548,grad_norm: 0.8591593803857703, iteration: 58953
loss: 1.019439697265625,grad_norm: 0.9999990572978137, iteration: 58954
loss: 1.0320266485214233,grad_norm: 0.9999990777281282, iteration: 58955
loss: 1.0327409505844116,grad_norm: 0.9999991159972363, iteration: 58956
loss: 1.030443549156189,grad_norm: 0.999999293311498, iteration: 58957
loss: 1.0207856893539429,grad_norm: 0.9999993139635409, iteration: 58958
loss: 0.967146098613739,grad_norm: 0.9999989846493261, iteration: 58959
loss: 1.0204110145568848,grad_norm: 0.9999992080285349, iteration: 58960
loss: 0.982282280921936,grad_norm: 0.9293116836918309, iteration: 58961
loss: 0.9716053009033203,grad_norm: 0.9999990806424666, iteration: 58962
loss: 1.0096415281295776,grad_norm: 0.9999990917469338, iteration: 58963
loss: 1.0046168565750122,grad_norm: 0.9358869958771615, iteration: 58964
loss: 1.007665753364563,grad_norm: 0.9737271825106616, iteration: 58965
loss: 1.03858482837677,grad_norm: 0.9999990854294527, iteration: 58966
loss: 1.0107266902923584,grad_norm: 0.8443341402935398, iteration: 58967
loss: 0.9854753017425537,grad_norm: 0.9999990655103483, iteration: 58968
loss: 1.0104237794876099,grad_norm: 0.9999990594730739, iteration: 58969
loss: 0.9879941344261169,grad_norm: 0.9999992731739878, iteration: 58970
loss: 0.9930127263069153,grad_norm: 0.8408065010315199, iteration: 58971
loss: 1.002323031425476,grad_norm: 0.9999991986108471, iteration: 58972
loss: 1.021406888961792,grad_norm: 0.9967060033853722, iteration: 58973
loss: 0.9992309808731079,grad_norm: 0.9999991426248418, iteration: 58974
loss: 1.0195326805114746,grad_norm: 0.9487317750523806, iteration: 58975
loss: 1.011222243309021,grad_norm: 0.813936987281315, iteration: 58976
loss: 1.023788332939148,grad_norm: 0.9731140933644904, iteration: 58977
loss: 0.9922177195549011,grad_norm: 0.8161906091576823, iteration: 58978
loss: 0.9795700311660767,grad_norm: 0.9810151200851942, iteration: 58979
loss: 0.967059314250946,grad_norm: 0.9999992661590167, iteration: 58980
loss: 0.9922243356704712,grad_norm: 0.9999993512909876, iteration: 58981
loss: 1.0113301277160645,grad_norm: 0.9999992359822624, iteration: 58982
loss: 0.9953080415725708,grad_norm: 0.9999991668624003, iteration: 58983
loss: 0.9652279019355774,grad_norm: 0.9999990700718209, iteration: 58984
loss: 1.0421472787857056,grad_norm: 0.999999277273795, iteration: 58985
loss: 0.9851504564285278,grad_norm: 0.9999989971404711, iteration: 58986
loss: 0.9883926510810852,grad_norm: 0.9999990265695426, iteration: 58987
loss: 1.0313555002212524,grad_norm: 0.8532106032280604, iteration: 58988
loss: 1.028950572013855,grad_norm: 0.9999991165676201, iteration: 58989
loss: 1.033246397972107,grad_norm: 0.8556854106336388, iteration: 58990
loss: 1.0252426862716675,grad_norm: 0.9386224602527612, iteration: 58991
loss: 0.9882085919380188,grad_norm: 0.9846783262228155, iteration: 58992
loss: 0.9830964803695679,grad_norm: 0.9999990395265042, iteration: 58993
loss: 0.9891960620880127,grad_norm: 0.9955699160653806, iteration: 58994
loss: 0.9797729253768921,grad_norm: 0.9999990636716131, iteration: 58995
loss: 0.9743855595588684,grad_norm: 0.9173019001134324, iteration: 58996
loss: 0.9871239066123962,grad_norm: 0.9999990828119386, iteration: 58997
loss: 0.9902135133743286,grad_norm: 0.8783591109807872, iteration: 58998
loss: 1.035880208015442,grad_norm: 0.9313196748713817, iteration: 58999
loss: 0.9437345266342163,grad_norm: 0.9999990689581569, iteration: 59000
loss: 1.0132142305374146,grad_norm: 0.9999990605179085, iteration: 59001
loss: 1.0319215059280396,grad_norm: 0.9999993119136262, iteration: 59002
loss: 1.1997811794281006,grad_norm: 0.9999995325808494, iteration: 59003
loss: 0.9888801574707031,grad_norm: 0.9999990088222106, iteration: 59004
loss: 1.028999924659729,grad_norm: 0.9999991430252932, iteration: 59005
loss: 1.008874773979187,grad_norm: 0.9708943421267998, iteration: 59006
loss: 0.9877520799636841,grad_norm: 0.9555537691355226, iteration: 59007
loss: 0.9786676168441772,grad_norm: 0.8575744164288557, iteration: 59008
loss: 1.100107192993164,grad_norm: 0.9999993940319699, iteration: 59009
loss: 1.034420371055603,grad_norm: 0.9999991932753428, iteration: 59010
loss: 0.989775538444519,grad_norm: 0.944326821634541, iteration: 59011
loss: 0.9856427907943726,grad_norm: 0.9999992421646464, iteration: 59012
loss: 0.9981386065483093,grad_norm: 0.9016264006805847, iteration: 59013
loss: 0.9933926463127136,grad_norm: 0.8702115021374375, iteration: 59014
loss: 1.0394209623336792,grad_norm: 0.9999991129386238, iteration: 59015
loss: 1.0004459619522095,grad_norm: 0.8967642987720255, iteration: 59016
loss: 0.9969677925109863,grad_norm: 0.9640187499808162, iteration: 59017
loss: 1.0111494064331055,grad_norm: 0.8592478895391564, iteration: 59018
loss: 0.9881948828697205,grad_norm: 0.8877607429339818, iteration: 59019
loss: 1.0161197185516357,grad_norm: 0.9999997754239924, iteration: 59020
loss: 0.949039101600647,grad_norm: 0.9942343263642726, iteration: 59021
loss: 1.00582754611969,grad_norm: 0.8629976914691487, iteration: 59022
loss: 0.9842142462730408,grad_norm: 0.9999990863722453, iteration: 59023
loss: 0.9851783514022827,grad_norm: 0.9469285346901731, iteration: 59024
loss: 1.0270036458969116,grad_norm: 0.9541006198336629, iteration: 59025
loss: 1.0531847476959229,grad_norm: 0.9844630910626386, iteration: 59026
loss: 0.9839562177658081,grad_norm: 0.892169988784933, iteration: 59027
loss: 0.9697129130363464,grad_norm: 0.9999992892247074, iteration: 59028
loss: 1.0094099044799805,grad_norm: 0.9999991315874067, iteration: 59029
loss: 1.0379754304885864,grad_norm: 0.9999990232553725, iteration: 59030
loss: 1.016663908958435,grad_norm: 0.9999991340841569, iteration: 59031
loss: 1.0051348209381104,grad_norm: 0.9999991761164561, iteration: 59032
loss: 1.025608777999878,grad_norm: 0.9999990623198791, iteration: 59033
loss: 0.9866757988929749,grad_norm: 0.9999991261285104, iteration: 59034
loss: 1.0069173574447632,grad_norm: 0.9999995320638727, iteration: 59035
loss: 0.9886691570281982,grad_norm: 0.9757764823546095, iteration: 59036
loss: 1.0010477304458618,grad_norm: 0.9999994015965469, iteration: 59037
loss: 0.9837207794189453,grad_norm: 0.9999992660848873, iteration: 59038
loss: 1.0016838312149048,grad_norm: 0.976133745074709, iteration: 59039
loss: 0.9856867790222168,grad_norm: 0.852145568956208, iteration: 59040
loss: 1.0560489892959595,grad_norm: 0.9336773834178801, iteration: 59041
loss: 0.9859114289283752,grad_norm: 0.956702246464562, iteration: 59042
loss: 1.015395998954773,grad_norm: 0.9999990914789026, iteration: 59043
loss: 0.986698567867279,grad_norm: 0.9999992797037277, iteration: 59044
loss: 1.0212857723236084,grad_norm: 0.9999990673859979, iteration: 59045
loss: 1.0262585878372192,grad_norm: 0.99999934233248, iteration: 59046
loss: 0.9690109491348267,grad_norm: 0.9999991187361491, iteration: 59047
loss: 1.0034782886505127,grad_norm: 0.9999990975732608, iteration: 59048
loss: 0.9898136854171753,grad_norm: 0.7626073084127432, iteration: 59049
loss: 1.0033787488937378,grad_norm: 0.8887342187606413, iteration: 59050
loss: 1.0024707317352295,grad_norm: 0.9999991827632551, iteration: 59051
loss: 1.0036358833312988,grad_norm: 0.99999906385206, iteration: 59052
loss: 1.0283174514770508,grad_norm: 0.9999990841111089, iteration: 59053
loss: 1.0760841369628906,grad_norm: 0.9999993099810779, iteration: 59054
loss: 1.009313941001892,grad_norm: 0.9999990924218833, iteration: 59055
loss: 1.0053634643554688,grad_norm: 0.9999991291667474, iteration: 59056
loss: 1.0097017288208008,grad_norm: 0.9729824947575929, iteration: 59057
loss: 1.0177916288375854,grad_norm: 0.9999990528433707, iteration: 59058
loss: 1.0025840997695923,grad_norm: 0.8106331852112725, iteration: 59059
loss: 1.043084979057312,grad_norm: 0.9999991649554443, iteration: 59060
loss: 1.009596586227417,grad_norm: 0.9520298167900465, iteration: 59061
loss: 1.0032721757888794,grad_norm: 0.9999991791551366, iteration: 59062
loss: 1.0242944955825806,grad_norm: 0.982347010658459, iteration: 59063
loss: 1.0041921138763428,grad_norm: 0.9999991239694174, iteration: 59064
loss: 1.0200331211090088,grad_norm: 0.9150127967456333, iteration: 59065
loss: 1.0262104272842407,grad_norm: 0.9999992427045452, iteration: 59066
loss: 1.0003048181533813,grad_norm: 0.8729627548996525, iteration: 59067
loss: 1.0216047763824463,grad_norm: 0.9999995492065918, iteration: 59068
loss: 1.0562670230865479,grad_norm: 0.9999992744930162, iteration: 59069
loss: 0.9677857160568237,grad_norm: 0.9999992075286864, iteration: 59070
loss: 1.0043448209762573,grad_norm: 0.9999992621048485, iteration: 59071
loss: 1.0145530700683594,grad_norm: 0.950410934758219, iteration: 59072
loss: 0.9747568368911743,grad_norm: 0.9999990547619838, iteration: 59073
loss: 1.0116379261016846,grad_norm: 0.9802571560738711, iteration: 59074
loss: 0.9848166108131409,grad_norm: 0.8711609505334624, iteration: 59075
loss: 1.0176907777786255,grad_norm: 0.9768970462886809, iteration: 59076
loss: 0.9887139201164246,grad_norm: 0.9874817935727594, iteration: 59077
loss: 0.969997227191925,grad_norm: 0.9325153298897574, iteration: 59078
loss: 0.9870548248291016,grad_norm: 0.9999990738661257, iteration: 59079
loss: 1.0076361894607544,grad_norm: 0.9999991791453169, iteration: 59080
loss: 1.024705410003662,grad_norm: 0.9999990989729355, iteration: 59081
loss: 0.9905636310577393,grad_norm: 0.9999990988806214, iteration: 59082
loss: 1.0286896228790283,grad_norm: 0.8343568128704263, iteration: 59083
loss: 1.0316628217697144,grad_norm: 0.9999990576143434, iteration: 59084
loss: 1.0490666627883911,grad_norm: 0.9999994315089397, iteration: 59085
loss: 0.9822585582733154,grad_norm: 0.9999992181279356, iteration: 59086
loss: 1.0160634517669678,grad_norm: 0.9345408803637048, iteration: 59087
loss: 0.9996345043182373,grad_norm: 0.9969295607441078, iteration: 59088
loss: 0.9694170951843262,grad_norm: 0.8991271427056742, iteration: 59089
loss: 0.975651204586029,grad_norm: 0.9999992685915741, iteration: 59090
loss: 0.9884778261184692,grad_norm: 0.9573386022369252, iteration: 59091
loss: 1.0090997219085693,grad_norm: 0.9999990380785088, iteration: 59092
loss: 0.9634575247764587,grad_norm: 0.8787387667330683, iteration: 59093
loss: 0.9783424139022827,grad_norm: 0.9999990909875021, iteration: 59094
loss: 0.9977702498435974,grad_norm: 0.8732491675121097, iteration: 59095
loss: 1.0358127355575562,grad_norm: 0.8803813909359279, iteration: 59096
loss: 0.9921384453773499,grad_norm: 0.9572628496445259, iteration: 59097
loss: 1.0318882465362549,grad_norm: 0.9999993619880481, iteration: 59098
loss: 1.0224708318710327,grad_norm: 0.9454761687232098, iteration: 59099
loss: 0.9890269041061401,grad_norm: 0.9999991160638653, iteration: 59100
loss: 0.9904552102088928,grad_norm: 0.9772457853415908, iteration: 59101
loss: 0.9997140765190125,grad_norm: 0.9999992350177307, iteration: 59102
loss: 0.9623451232910156,grad_norm: 0.9999991163866371, iteration: 59103
loss: 1.0138108730316162,grad_norm: 0.9999991548401099, iteration: 59104
loss: 1.0059038400650024,grad_norm: 0.9999990906235571, iteration: 59105
loss: 0.998432457447052,grad_norm: 0.999999073434703, iteration: 59106
loss: 0.9921520352363586,grad_norm: 0.9999991550794639, iteration: 59107
loss: 1.0192207098007202,grad_norm: 0.9999993639553515, iteration: 59108
loss: 0.9987688064575195,grad_norm: 0.9999989728982522, iteration: 59109
loss: 0.9620535969734192,grad_norm: 0.9999992682385401, iteration: 59110
loss: 1.0298012495040894,grad_norm: 0.9911837365958783, iteration: 59111
loss: 1.0400326251983643,grad_norm: 0.9999991437451475, iteration: 59112
loss: 0.9916591644287109,grad_norm: 0.9999991398760947, iteration: 59113
loss: 0.9861118793487549,grad_norm: 0.9862455068430394, iteration: 59114
loss: 1.0014293193817139,grad_norm: 0.9999992650764197, iteration: 59115
loss: 0.9785757660865784,grad_norm: 0.9999990040922383, iteration: 59116
loss: 1.0024901628494263,grad_norm: 0.9999990707309818, iteration: 59117
loss: 0.9869071245193481,grad_norm: 0.9999991687743243, iteration: 59118
loss: 1.1261298656463623,grad_norm: 0.9999991452056077, iteration: 59119
loss: 0.9957162141799927,grad_norm: 0.9999992146636424, iteration: 59120
loss: 0.9504482746124268,grad_norm: 0.9777469257288629, iteration: 59121
loss: 0.9917815327644348,grad_norm: 0.9737748459196207, iteration: 59122
loss: 1.0406852960586548,grad_norm: 0.9999991282250025, iteration: 59123
loss: 1.0163849592208862,grad_norm: 0.9999990897700092, iteration: 59124
loss: 0.967605710029602,grad_norm: 0.9999991792181478, iteration: 59125
loss: 1.002247929573059,grad_norm: 0.9999989744429669, iteration: 59126
loss: 0.9960026741027832,grad_norm: 0.8826718180356989, iteration: 59127
loss: 0.9991071224212646,grad_norm: 0.999999125054719, iteration: 59128
loss: 1.0632147789001465,grad_norm: 0.9999991359274129, iteration: 59129
loss: 0.9812197089195251,grad_norm: 0.897694245584921, iteration: 59130
loss: 0.9671537280082703,grad_norm: 0.9247344437036957, iteration: 59131
loss: 0.9782814979553223,grad_norm: 0.9999991512583662, iteration: 59132
loss: 0.9952392578125,grad_norm: 0.9999990560726719, iteration: 59133
loss: 1.015580177307129,grad_norm: 0.9037590668849464, iteration: 59134
loss: 1.0145400762557983,grad_norm: 0.9647829011881329, iteration: 59135
loss: 0.953694224357605,grad_norm: 0.9666023833992476, iteration: 59136
loss: 1.023500919342041,grad_norm: 0.9999990856653526, iteration: 59137
loss: 1.029618501663208,grad_norm: 0.9271502121491961, iteration: 59138
loss: 1.0064234733581543,grad_norm: 0.9999990772910509, iteration: 59139
loss: 1.004616618156433,grad_norm: 0.8626745401251271, iteration: 59140
loss: 1.0373468399047852,grad_norm: 0.9999991341404021, iteration: 59141
loss: 1.009521722793579,grad_norm: 0.9579633050110633, iteration: 59142
loss: 1.027012586593628,grad_norm: 0.9341440899513186, iteration: 59143
loss: 1.0125499963760376,grad_norm: 0.9999991329122052, iteration: 59144
loss: 0.9861820936203003,grad_norm: 0.9661157971807227, iteration: 59145
loss: 1.003813624382019,grad_norm: 0.9657756076788026, iteration: 59146
loss: 0.9912365078926086,grad_norm: 0.9985363447864876, iteration: 59147
loss: 0.990031361579895,grad_norm: 0.9999992092464149, iteration: 59148
loss: 0.9834597110748291,grad_norm: 0.9999991585057186, iteration: 59149
loss: 1.0027228593826294,grad_norm: 0.8485968959278016, iteration: 59150
loss: 0.9797179102897644,grad_norm: 0.9999990543209338, iteration: 59151
loss: 0.9965017437934875,grad_norm: 0.9039105727281528, iteration: 59152
loss: 0.9871361255645752,grad_norm: 0.9999989657012315, iteration: 59153
loss: 0.9720659255981445,grad_norm: 0.9478246304300222, iteration: 59154
loss: 1.0348118543624878,grad_norm: 0.9375328812208995, iteration: 59155
loss: 0.9933760762214661,grad_norm: 0.9999990611798013, iteration: 59156
loss: 0.9927283525466919,grad_norm: 0.9143040143398422, iteration: 59157
loss: 0.9943827986717224,grad_norm: 0.9437003737036745, iteration: 59158
loss: 1.0126694440841675,grad_norm: 0.9999990361970365, iteration: 59159
loss: 1.0067445039749146,grad_norm: 0.905303301219168, iteration: 59160
loss: 1.022452473640442,grad_norm: 0.9999991024771522, iteration: 59161
loss: 1.0185075998306274,grad_norm: 0.9760596440388953, iteration: 59162
loss: 0.9962888360023499,grad_norm: 0.9999990105710375, iteration: 59163
loss: 0.9980745315551758,grad_norm: 0.9999992218143553, iteration: 59164
loss: 1.017598032951355,grad_norm: 0.9999990619456337, iteration: 59165
loss: 1.0295047760009766,grad_norm: 0.956766715387431, iteration: 59166
loss: 0.9963277578353882,grad_norm: 0.8106372132171206, iteration: 59167
loss: 0.9847205281257629,grad_norm: 0.9999991218794355, iteration: 59168
loss: 1.003587007522583,grad_norm: 0.9600132361756163, iteration: 59169
loss: 1.0237839221954346,grad_norm: 0.9999991547775412, iteration: 59170
loss: 1.0298043489456177,grad_norm: 0.8163607153855764, iteration: 59171
loss: 0.9984487891197205,grad_norm: 0.999999151457811, iteration: 59172
loss: 1.173045039176941,grad_norm: 0.9999999381604295, iteration: 59173
loss: 0.9922460913658142,grad_norm: 0.999999191073359, iteration: 59174
loss: 1.0016664266586304,grad_norm: 0.8500602320534794, iteration: 59175
loss: 1.0040339231491089,grad_norm: 0.9999990520457406, iteration: 59176
loss: 0.9786657094955444,grad_norm: 0.8724819234171615, iteration: 59177
loss: 0.9959906339645386,grad_norm: 0.9999992546015806, iteration: 59178
loss: 0.9959714412689209,grad_norm: 0.9999992991343346, iteration: 59179
loss: 0.9887966513633728,grad_norm: 0.9999990841838624, iteration: 59180
loss: 1.0279804468154907,grad_norm: 0.9408276308079646, iteration: 59181
loss: 0.9959670305252075,grad_norm: 0.9376676899473368, iteration: 59182
loss: 1.0059407949447632,grad_norm: 0.9999990884192053, iteration: 59183
loss: 0.9727957248687744,grad_norm: 0.9442316446103345, iteration: 59184
loss: 1.00589120388031,grad_norm: 0.9666353386634724, iteration: 59185
loss: 0.9914517998695374,grad_norm: 0.9844804932990078, iteration: 59186
loss: 1.0133930444717407,grad_norm: 0.9999989800121691, iteration: 59187
loss: 0.9930351972579956,grad_norm: 0.9999990804246516, iteration: 59188
loss: 0.9929988384246826,grad_norm: 0.7854974428509577, iteration: 59189
loss: 1.0303682088851929,grad_norm: 0.9999990168731198, iteration: 59190
loss: 0.9601492285728455,grad_norm: 0.8820507301085404, iteration: 59191
loss: 1.0262024402618408,grad_norm: 0.8857047040690258, iteration: 59192
loss: 0.9762819409370422,grad_norm: 0.9256108048622799, iteration: 59193
loss: 0.9993359446525574,grad_norm: 0.9853702285075409, iteration: 59194
loss: 1.0127962827682495,grad_norm: 0.99999916325706, iteration: 59195
loss: 1.066799521446228,grad_norm: 0.9999995811706291, iteration: 59196
loss: 1.0174919366836548,grad_norm: 0.9470078333616063, iteration: 59197
loss: 1.0125399827957153,grad_norm: 0.9999991494004343, iteration: 59198
loss: 1.036535620689392,grad_norm: 0.999999271724308, iteration: 59199
loss: 1.0363858938217163,grad_norm: 0.7914854397005068, iteration: 59200
loss: 1.032320499420166,grad_norm: 0.999999165410833, iteration: 59201
loss: 1.0168360471725464,grad_norm: 0.8846295644948946, iteration: 59202
loss: 1.01200532913208,grad_norm: 0.9999991329066088, iteration: 59203
loss: 1.0288459062576294,grad_norm: 0.9999991397849342, iteration: 59204
loss: 1.00553560256958,grad_norm: 0.8107819772929358, iteration: 59205
loss: 0.9776515364646912,grad_norm: 0.9737290103214756, iteration: 59206
loss: 0.9923737645149231,grad_norm: 0.9872647759886625, iteration: 59207
loss: 0.9939849972724915,grad_norm: 0.9999991605222848, iteration: 59208
loss: 1.016262412071228,grad_norm: 0.8896021550004198, iteration: 59209
loss: 1.0126512050628662,grad_norm: 0.9999991317944622, iteration: 59210
loss: 0.9951585531234741,grad_norm: 0.9758156576638595, iteration: 59211
loss: 1.0146533250808716,grad_norm: 0.9999991106642487, iteration: 59212
loss: 1.014108657836914,grad_norm: 0.9352637065762721, iteration: 59213
loss: 0.9988579154014587,grad_norm: 0.8768442238967585, iteration: 59214
loss: 1.0260635614395142,grad_norm: 0.9999990488702842, iteration: 59215
loss: 0.9961336255073547,grad_norm: 0.922931134106792, iteration: 59216
loss: 1.0012547969818115,grad_norm: 0.8727107486144988, iteration: 59217
loss: 1.022120475769043,grad_norm: 0.8305106338836784, iteration: 59218
loss: 1.0252505540847778,grad_norm: 0.999999121534674, iteration: 59219
loss: 1.0049265623092651,grad_norm: 0.999999338878985, iteration: 59220
loss: 0.9731680750846863,grad_norm: 0.9590370470912246, iteration: 59221
loss: 1.0197358131408691,grad_norm: 0.9999992282221543, iteration: 59222
loss: 1.048887014389038,grad_norm: 0.9689289001994418, iteration: 59223
loss: 1.0153255462646484,grad_norm: 0.9609313949761622, iteration: 59224
loss: 0.9828279614448547,grad_norm: 0.9999991507428183, iteration: 59225
loss: 1.0040894746780396,grad_norm: 0.9833017906149689, iteration: 59226
loss: 0.9927880764007568,grad_norm: 0.9999990989223256, iteration: 59227
loss: 1.0155192613601685,grad_norm: 0.9610566437180744, iteration: 59228
loss: 0.9964972138404846,grad_norm: 0.8747411732277904, iteration: 59229
loss: 0.9781581163406372,grad_norm: 0.9370976326323931, iteration: 59230
loss: 1.0304983854293823,grad_norm: 0.9474578780743025, iteration: 59231
loss: 0.9717710614204407,grad_norm: 0.9999991517066408, iteration: 59232
loss: 1.0229099988937378,grad_norm: 0.9999990214595103, iteration: 59233
loss: 0.9876774549484253,grad_norm: 0.9999990913946134, iteration: 59234
loss: 0.9808104038238525,grad_norm: 0.8821477253223152, iteration: 59235
loss: 0.9773970246315002,grad_norm: 0.9999992322244817, iteration: 59236
loss: 1.003104567527771,grad_norm: 0.999999005659044, iteration: 59237
loss: 1.0180641412734985,grad_norm: 0.9999991825552296, iteration: 59238
loss: 0.9878712892532349,grad_norm: 0.9623364549796612, iteration: 59239
loss: 0.9948795437812805,grad_norm: 0.9999991872600696, iteration: 59240
loss: 0.9815312623977661,grad_norm: 0.9635570174152674, iteration: 59241
loss: 0.9840066432952881,grad_norm: 0.9999989448987957, iteration: 59242
loss: 1.009400725364685,grad_norm: 0.8715742894900718, iteration: 59243
loss: 0.9778164029121399,grad_norm: 0.9806948118496445, iteration: 59244
loss: 0.985103964805603,grad_norm: 0.9999991293368615, iteration: 59245
loss: 1.0153412818908691,grad_norm: 0.9970486247363887, iteration: 59246
loss: 1.0015809535980225,grad_norm: 0.99999924735121, iteration: 59247
loss: 1.0191360712051392,grad_norm: 0.9640784771124903, iteration: 59248
loss: 1.0078145265579224,grad_norm: 0.9999991669661137, iteration: 59249
loss: 1.0395114421844482,grad_norm: 0.9999990645007741, iteration: 59250
loss: 1.0151278972625732,grad_norm: 0.7996621589215063, iteration: 59251
loss: 1.0148513317108154,grad_norm: 0.9999991618221125, iteration: 59252
loss: 1.0231868028640747,grad_norm: 0.9146962023439968, iteration: 59253
loss: 1.0232770442962646,grad_norm: 0.8430997797486426, iteration: 59254
loss: 0.9610382914543152,grad_norm: 0.9999990039628136, iteration: 59255
loss: 0.984294056892395,grad_norm: 0.9999989590353661, iteration: 59256
loss: 0.995991051197052,grad_norm: 0.9999990830978208, iteration: 59257
loss: 1.0125291347503662,grad_norm: 0.999999083304957, iteration: 59258
loss: 0.9974852800369263,grad_norm: 0.9999991404632353, iteration: 59259
loss: 1.0035667419433594,grad_norm: 0.9999992435502478, iteration: 59260
loss: 1.0262882709503174,grad_norm: 0.999999682881598, iteration: 59261
loss: 1.0463695526123047,grad_norm: 0.9999991984131886, iteration: 59262
loss: 1.0168771743774414,grad_norm: 0.9999991034736695, iteration: 59263
loss: 1.0419961214065552,grad_norm: 0.923866331914025, iteration: 59264
loss: 1.0113078355789185,grad_norm: 0.9552157394546212, iteration: 59265
loss: 1.0219181776046753,grad_norm: 0.9999991152734196, iteration: 59266
loss: 1.0430629253387451,grad_norm: 0.9423418632634092, iteration: 59267
loss: 0.9980752468109131,grad_norm: 0.999999140634183, iteration: 59268
loss: 0.998200535774231,grad_norm: 0.9355404242148818, iteration: 59269
loss: 0.9814976453781128,grad_norm: 0.9999991088769721, iteration: 59270
loss: 0.9916277527809143,grad_norm: 0.9999990247964278, iteration: 59271
loss: 0.9667829871177673,grad_norm: 0.9999990435195047, iteration: 59272
loss: 0.9715372920036316,grad_norm: 0.9999992886016968, iteration: 59273
loss: 0.9739934206008911,grad_norm: 0.999999097548573, iteration: 59274
loss: 1.0113024711608887,grad_norm: 0.886935501869936, iteration: 59275
loss: 1.0148485898971558,grad_norm: 0.9899724939384105, iteration: 59276
loss: 1.0073012113571167,grad_norm: 0.9424054170765895, iteration: 59277
loss: 0.9816673398017883,grad_norm: 0.9999991305762276, iteration: 59278
loss: 1.0540915727615356,grad_norm: 0.999999383061829, iteration: 59279
loss: 1.0099729299545288,grad_norm: 0.9999992159435829, iteration: 59280
loss: 0.9951193332672119,grad_norm: 0.9999990938540386, iteration: 59281
loss: 1.0031206607818604,grad_norm: 0.9570475442234602, iteration: 59282
loss: 0.9721475839614868,grad_norm: 0.9999991137514649, iteration: 59283
loss: 1.0185514688491821,grad_norm: 0.9999993063933967, iteration: 59284
loss: 1.0006664991378784,grad_norm: 0.9477261248228915, iteration: 59285
loss: 1.031101942062378,grad_norm: 0.9999991266452856, iteration: 59286
loss: 0.9887687563896179,grad_norm: 0.9391609922789247, iteration: 59287
loss: 0.9886369705200195,grad_norm: 0.9517399265132078, iteration: 59288
loss: 0.9895457029342651,grad_norm: 0.737825696014174, iteration: 59289
loss: 1.001650094985962,grad_norm: 0.9599002882933814, iteration: 59290
loss: 0.976923942565918,grad_norm: 0.8834544051893453, iteration: 59291
loss: 1.007940411567688,grad_norm: 0.9913903800216946, iteration: 59292
loss: 1.0368520021438599,grad_norm: 0.9999997676679131, iteration: 59293
loss: 1.0450091361999512,grad_norm: 0.9999993278611564, iteration: 59294
loss: 1.0204498767852783,grad_norm: 0.9999991613699216, iteration: 59295
loss: 1.0233420133590698,grad_norm: 0.999999120845437, iteration: 59296
loss: 1.0298205614089966,grad_norm: 0.9999993289837892, iteration: 59297
loss: 0.9842463731765747,grad_norm: 0.9999992460625121, iteration: 59298
loss: 1.0311216115951538,grad_norm: 0.9999991913570366, iteration: 59299
loss: 0.9529098868370056,grad_norm: 0.9999990751175022, iteration: 59300
loss: 1.0437560081481934,grad_norm: 0.9999997003351496, iteration: 59301
loss: 1.0319112539291382,grad_norm: 0.9577904919296577, iteration: 59302
loss: 1.0368914604187012,grad_norm: 0.9999990461559142, iteration: 59303
loss: 1.047550082206726,grad_norm: 0.9999990976250005, iteration: 59304
loss: 1.002895474433899,grad_norm: 0.9999991830075635, iteration: 59305
loss: 1.0115753412246704,grad_norm: 0.9999991852145119, iteration: 59306
loss: 1.0256179571151733,grad_norm: 0.9999991445915472, iteration: 59307
loss: 1.003359317779541,grad_norm: 0.9403860464190805, iteration: 59308
loss: 0.9749503135681152,grad_norm: 0.9442341446160688, iteration: 59309
loss: 1.0026404857635498,grad_norm: 0.9999990888285673, iteration: 59310
loss: 0.975253164768219,grad_norm: 0.9999990963191138, iteration: 59311
loss: 0.9669206738471985,grad_norm: 0.999999027077921, iteration: 59312
loss: 1.0086203813552856,grad_norm: 0.999999032549673, iteration: 59313
loss: 1.1004011631011963,grad_norm: 0.9999997847118436, iteration: 59314
loss: 0.9811217784881592,grad_norm: 0.9999990473717933, iteration: 59315
loss: 1.0070781707763672,grad_norm: 0.9999990858220652, iteration: 59316
loss: 0.9652823805809021,grad_norm: 0.9999990349222238, iteration: 59317
loss: 1.0089573860168457,grad_norm: 0.9999993047530613, iteration: 59318
loss: 0.9851170778274536,grad_norm: 0.9999990682942194, iteration: 59319
loss: 1.0104061365127563,grad_norm: 0.9999992105184174, iteration: 59320
loss: 1.0195605754852295,grad_norm: 0.9999991754513332, iteration: 59321
loss: 1.0034652948379517,grad_norm: 0.9999992583916403, iteration: 59322
loss: 1.0438332557678223,grad_norm: 0.9999992008709291, iteration: 59323
loss: 0.9987006187438965,grad_norm: 0.9669418605758876, iteration: 59324
loss: 1.0441406965255737,grad_norm: 0.9999991841400975, iteration: 59325
loss: 0.9692178964614868,grad_norm: 0.9999992604924243, iteration: 59326
loss: 0.9942090511322021,grad_norm: 0.9340934456269209, iteration: 59327
loss: 1.0148807764053345,grad_norm: 0.9999989520255032, iteration: 59328
loss: 0.9666476249694824,grad_norm: 0.9029173461074966, iteration: 59329
loss: 1.0134453773498535,grad_norm: 0.9999990827787743, iteration: 59330
loss: 1.0155051946640015,grad_norm: 0.9999990702174311, iteration: 59331
loss: 1.0203074216842651,grad_norm: 0.9999991102015089, iteration: 59332
loss: 1.0323052406311035,grad_norm: 0.9999991151830614, iteration: 59333
loss: 0.9976691007614136,grad_norm: 0.9601904884347375, iteration: 59334
loss: 0.9936584234237671,grad_norm: 0.9927463650719087, iteration: 59335
loss: 1.0914244651794434,grad_norm: 0.9999995443851415, iteration: 59336
loss: 0.991634726524353,grad_norm: 0.9999992502675988, iteration: 59337
loss: 0.9977987408638,grad_norm: 0.9809985154473773, iteration: 59338
loss: 0.9655751585960388,grad_norm: 0.9273219966184895, iteration: 59339
loss: 0.9850224852561951,grad_norm: 0.9632756563407482, iteration: 59340
loss: 0.9816759824752808,grad_norm: 0.9159806377627101, iteration: 59341
loss: 0.950275182723999,grad_norm: 0.8822874149489419, iteration: 59342
loss: 0.9784267544746399,grad_norm: 0.9284217491414805, iteration: 59343
loss: 0.9752004146575928,grad_norm: 0.9999991513610429, iteration: 59344
loss: 0.964924156665802,grad_norm: 0.9999994875535029, iteration: 59345
loss: 1.0334270000457764,grad_norm: 0.9999992507739178, iteration: 59346
loss: 1.004183292388916,grad_norm: 0.999999074603312, iteration: 59347
loss: 1.0615519285202026,grad_norm: 0.9999994732132074, iteration: 59348
loss: 0.993136465549469,grad_norm: 0.9999991482037377, iteration: 59349
loss: 1.035720705986023,grad_norm: 0.9250051613393716, iteration: 59350
loss: 1.0234687328338623,grad_norm: 0.9844175238570165, iteration: 59351
loss: 1.0016224384307861,grad_norm: 0.9999992290573382, iteration: 59352
loss: 1.0308642387390137,grad_norm: 0.8335717611370679, iteration: 59353
loss: 1.0334773063659668,grad_norm: 0.9999990662169853, iteration: 59354
loss: 1.0106968879699707,grad_norm: 0.9999989800126927, iteration: 59355
loss: 0.9922699332237244,grad_norm: 0.9999990429510082, iteration: 59356
loss: 1.010763168334961,grad_norm: 0.8975470442073015, iteration: 59357
loss: 1.001144289970398,grad_norm: 0.9999990992684786, iteration: 59358
loss: 0.983163058757782,grad_norm: 0.9999990205419205, iteration: 59359
loss: 0.9995958209037781,grad_norm: 0.847573109546838, iteration: 59360
loss: 0.998930811882019,grad_norm: 0.9999992085911114, iteration: 59361
loss: 1.0201457738876343,grad_norm: 0.9999991438347897, iteration: 59362
loss: 1.0140777826309204,grad_norm: 0.9019217377597458, iteration: 59363
loss: 0.9790972471237183,grad_norm: 0.999999207063992, iteration: 59364
loss: 1.011572003364563,grad_norm: 0.8882580043737007, iteration: 59365
loss: 0.9942085146903992,grad_norm: 0.9999992389092894, iteration: 59366
loss: 0.9863556623458862,grad_norm: 0.9999991749951607, iteration: 59367
loss: 0.9969212412834167,grad_norm: 0.9048640177677955, iteration: 59368
loss: 1.0213030576705933,grad_norm: 0.9444029952359955, iteration: 59369
loss: 0.9971899390220642,grad_norm: 0.9999990630507966, iteration: 59370
loss: 1.0147086381912231,grad_norm: 0.99999915751056, iteration: 59371
loss: 0.9923689961433411,grad_norm: 0.9999992649032612, iteration: 59372
loss: 0.9918049573898315,grad_norm: 0.999999183809012, iteration: 59373
loss: 0.9725029468536377,grad_norm: 0.9999992158740532, iteration: 59374
loss: 0.9925937056541443,grad_norm: 0.9999990511936995, iteration: 59375
loss: 1.0241910219192505,grad_norm: 0.999999045520738, iteration: 59376
loss: 1.0160350799560547,grad_norm: 0.9999990413986036, iteration: 59377
loss: 1.0108813047409058,grad_norm: 0.902523284643582, iteration: 59378
loss: 1.005232334136963,grad_norm: 0.9999990141187495, iteration: 59379
loss: 0.9778990149497986,grad_norm: 0.9999991287562459, iteration: 59380
loss: 1.0109583139419556,grad_norm: 0.9460494430713108, iteration: 59381
loss: 0.979275643825531,grad_norm: 0.943571183903576, iteration: 59382
loss: 0.9859583377838135,grad_norm: 0.9999991383115571, iteration: 59383
loss: 0.9992100596427917,grad_norm: 0.9999992328888112, iteration: 59384
loss: 1.022801160812378,grad_norm: 0.9292416442502561, iteration: 59385
loss: 1.0119531154632568,grad_norm: 0.9712793168787204, iteration: 59386
loss: 1.0020865201950073,grad_norm: 0.987561858657091, iteration: 59387
loss: 0.9906601905822754,grad_norm: 0.8646200522800425, iteration: 59388
loss: 0.9963366985321045,grad_norm: 0.9481652247678781, iteration: 59389
loss: 1.0136003494262695,grad_norm: 0.9116947468745822, iteration: 59390
loss: 0.9620882868766785,grad_norm: 0.8985835061504326, iteration: 59391
loss: 0.9980308413505554,grad_norm: 0.9999990824371803, iteration: 59392
loss: 1.0114589929580688,grad_norm: 0.9999998081089384, iteration: 59393
loss: 1.0147629976272583,grad_norm: 0.9410079126803851, iteration: 59394
loss: 0.9984572529792786,grad_norm: 0.9999990502639761, iteration: 59395
loss: 0.9711259007453918,grad_norm: 0.9999989752524211, iteration: 59396
loss: 0.9726720452308655,grad_norm: 0.9999990843399684, iteration: 59397
loss: 0.9541565179824829,grad_norm: 0.7996649225311168, iteration: 59398
loss: 1.019141674041748,grad_norm: 0.9999991563916459, iteration: 59399
loss: 0.9833193421363831,grad_norm: 0.9999995907265445, iteration: 59400
loss: 1.0367757081985474,grad_norm: 0.9936470298379698, iteration: 59401
loss: 1.0196508169174194,grad_norm: 0.9132287128075651, iteration: 59402
loss: 1.0018837451934814,grad_norm: 0.9999990480376427, iteration: 59403
loss: 0.9752929210662842,grad_norm: 0.999999117867108, iteration: 59404
loss: 0.9753151535987854,grad_norm: 0.9999992364480631, iteration: 59405
loss: 1.0033270120620728,grad_norm: 0.9999991114289881, iteration: 59406
loss: 0.9990237355232239,grad_norm: 0.9999992559776391, iteration: 59407
loss: 1.0025010108947754,grad_norm: 0.8736612823374298, iteration: 59408
loss: 1.0120680332183838,grad_norm: 0.9999992672203606, iteration: 59409
loss: 1.0342439413070679,grad_norm: 0.9999991430955465, iteration: 59410
loss: 1.0147291421890259,grad_norm: 0.9820444980471068, iteration: 59411
loss: 0.9662649631500244,grad_norm: 0.9628222330785701, iteration: 59412
loss: 1.0267548561096191,grad_norm: 0.9999992084514538, iteration: 59413
loss: 1.0300719738006592,grad_norm: 0.9943439037193319, iteration: 59414
loss: 1.080409049987793,grad_norm: 0.9999992654077795, iteration: 59415
loss: 1.0082464218139648,grad_norm: 0.9999991401249426, iteration: 59416
loss: 0.9541137218475342,grad_norm: 0.9921961205357633, iteration: 59417
loss: 0.9968978762626648,grad_norm: 0.9999990696916768, iteration: 59418
loss: 1.0143972635269165,grad_norm: 0.9999991420581423, iteration: 59419
loss: 1.0150631666183472,grad_norm: 0.9999990593336385, iteration: 59420
loss: 0.9622188210487366,grad_norm: 0.9779690812997363, iteration: 59421
loss: 1.0110617876052856,grad_norm: 0.9792974618419575, iteration: 59422
loss: 0.9994071125984192,grad_norm: 0.9553014461508599, iteration: 59423
loss: 0.9973628520965576,grad_norm: 0.9999990987753153, iteration: 59424
loss: 1.0147459506988525,grad_norm: 0.977158199942546, iteration: 59425
loss: 0.989901065826416,grad_norm: 0.9999990965186668, iteration: 59426
loss: 0.9951698184013367,grad_norm: 0.9570053114365832, iteration: 59427
loss: 0.9549175500869751,grad_norm: 0.9999992316752423, iteration: 59428
loss: 1.0052520036697388,grad_norm: 0.999999250988431, iteration: 59429
loss: 0.993678629398346,grad_norm: 0.9606983913771342, iteration: 59430
loss: 0.9646538496017456,grad_norm: 0.9999990122818623, iteration: 59431
loss: 0.9907732009887695,grad_norm: 0.9331601770131086, iteration: 59432
loss: 0.9839945435523987,grad_norm: 0.9999992096142006, iteration: 59433
loss: 1.0332200527191162,grad_norm: 0.9999993192692866, iteration: 59434
loss: 0.9900888800621033,grad_norm: 0.9932227451676106, iteration: 59435
loss: 0.9865577220916748,grad_norm: 0.9721951750217026, iteration: 59436
loss: 0.9646975994110107,grad_norm: 0.9152260059591151, iteration: 59437
loss: 0.995293140411377,grad_norm: 0.9999990138668579, iteration: 59438
loss: 0.9646961092948914,grad_norm: 0.9999991406736413, iteration: 59439
loss: 0.9725460410118103,grad_norm: 0.9999991102354868, iteration: 59440
loss: 0.9994931817054749,grad_norm: 0.999999243865505, iteration: 59441
loss: 1.0265413522720337,grad_norm: 0.9570883220457151, iteration: 59442
loss: 0.99073326587677,grad_norm: 0.9999990526400301, iteration: 59443
loss: 0.9727821946144104,grad_norm: 0.9846904177292799, iteration: 59444
loss: 1.0324089527130127,grad_norm: 0.9999990102857992, iteration: 59445
loss: 1.0294132232666016,grad_norm: 0.9999991468166519, iteration: 59446
loss: 1.0560710430145264,grad_norm: 0.9906526779628761, iteration: 59447
loss: 0.9811614751815796,grad_norm: 0.9253167252097593, iteration: 59448
loss: 1.0282940864562988,grad_norm: 0.9999992645518961, iteration: 59449
loss: 1.0293004512786865,grad_norm: 0.9999992076143409, iteration: 59450
loss: 1.0093857049942017,grad_norm: 0.9999991259808054, iteration: 59451
loss: 1.0076513290405273,grad_norm: 0.929256816619426, iteration: 59452
loss: 1.047690749168396,grad_norm: 0.8993491547057678, iteration: 59453
loss: 1.0261971950531006,grad_norm: 0.9999992001721248, iteration: 59454
loss: 1.003071904182434,grad_norm: 0.9789218141791373, iteration: 59455
loss: 0.9950777292251587,grad_norm: 0.985680909465752, iteration: 59456
loss: 1.0284327268600464,grad_norm: 0.9999990929697952, iteration: 59457
loss: 0.9773276448249817,grad_norm: 0.9232310474282421, iteration: 59458
loss: 0.9985084533691406,grad_norm: 0.9999990043004169, iteration: 59459
loss: 0.9918782114982605,grad_norm: 0.9999990859393906, iteration: 59460
loss: 0.9784244298934937,grad_norm: 0.8990588026194788, iteration: 59461
loss: 0.9993315935134888,grad_norm: 0.988111337070325, iteration: 59462
loss: 1.0341545343399048,grad_norm: 0.9999990925631993, iteration: 59463
loss: 0.988749623298645,grad_norm: 0.9999990403396034, iteration: 59464
loss: 1.0297930240631104,grad_norm: 0.8776448025104328, iteration: 59465
loss: 0.9943417906761169,grad_norm: 0.96833943260017, iteration: 59466
loss: 0.9695600271224976,grad_norm: 0.9413501175774996, iteration: 59467
loss: 0.9993702173233032,grad_norm: 0.8912403074772053, iteration: 59468
loss: 0.9910158514976501,grad_norm: 0.9999991006376779, iteration: 59469
loss: 0.9629557132720947,grad_norm: 0.999999124103913, iteration: 59470
loss: 1.0105963945388794,grad_norm: 0.999999114508477, iteration: 59471
loss: 1.0153650045394897,grad_norm: 0.9656927267289032, iteration: 59472
loss: 0.9913996458053589,grad_norm: 0.9999991921271155, iteration: 59473
loss: 1.001664161682129,grad_norm: 0.9947927720817777, iteration: 59474
loss: 1.0258628129959106,grad_norm: 0.9999991004980542, iteration: 59475
loss: 1.0429909229278564,grad_norm: 0.99999998162757, iteration: 59476
loss: 0.9966027736663818,grad_norm: 0.9999991935881928, iteration: 59477
loss: 1.0049351453781128,grad_norm: 0.9999992129233025, iteration: 59478
loss: 0.9872250556945801,grad_norm: 0.8396309173862523, iteration: 59479
loss: 0.9916801452636719,grad_norm: 0.9999991297328805, iteration: 59480
loss: 0.9787478446960449,grad_norm: 0.943914328054077, iteration: 59481
loss: 0.9961133599281311,grad_norm: 0.906676334237697, iteration: 59482
loss: 1.0244117975234985,grad_norm: 0.9999991980716079, iteration: 59483
loss: 0.9973598718643188,grad_norm: 0.9999992126083602, iteration: 59484
loss: 0.9957072734832764,grad_norm: 0.9999991605566144, iteration: 59485
loss: 1.0282262563705444,grad_norm: 0.9847144651196089, iteration: 59486
loss: 1.0065059661865234,grad_norm: 0.9908915096847748, iteration: 59487
loss: 1.0279890298843384,grad_norm: 0.999999202425922, iteration: 59488
loss: 0.9975119829177856,grad_norm: 0.9999992977164125, iteration: 59489
loss: 0.9997159838676453,grad_norm: 0.999999193682886, iteration: 59490
loss: 1.0295014381408691,grad_norm: 0.9999990627384789, iteration: 59491
loss: 0.9849939942359924,grad_norm: 0.9999993451493793, iteration: 59492
loss: 1.0040669441223145,grad_norm: 0.951507942999213, iteration: 59493
loss: 0.95257169008255,grad_norm: 0.8731877067724745, iteration: 59494
loss: 1.014348030090332,grad_norm: 0.999999148765996, iteration: 59495
loss: 0.9487295746803284,grad_norm: 0.975549486326497, iteration: 59496
loss: 0.9948399066925049,grad_norm: 0.999999089918054, iteration: 59497
loss: 1.0595276355743408,grad_norm: 0.9999997485483184, iteration: 59498
loss: 0.9805849194526672,grad_norm: 0.9999991925300367, iteration: 59499
loss: 0.9857802987098694,grad_norm: 0.9999990506901196, iteration: 59500
loss: 1.0378469228744507,grad_norm: 0.9999991933315496, iteration: 59501
loss: 0.9779080748558044,grad_norm: 0.9999991090268354, iteration: 59502
loss: 0.9954586029052734,grad_norm: 0.9999991903455098, iteration: 59503
loss: 0.9777848720550537,grad_norm: 0.9666464998719936, iteration: 59504
loss: 0.9921428561210632,grad_norm: 0.9999991296178609, iteration: 59505
loss: 0.9785292744636536,grad_norm: 0.9999990399357865, iteration: 59506
loss: 0.9862805604934692,grad_norm: 0.9999992745022882, iteration: 59507
loss: 0.9872216582298279,grad_norm: 0.9999989959466091, iteration: 59508
loss: 1.0198028087615967,grad_norm: 0.8449594420520724, iteration: 59509
loss: 0.9728920459747314,grad_norm: 0.9574698435910378, iteration: 59510
loss: 1.0025811195373535,grad_norm: 0.9999991319132028, iteration: 59511
loss: 1.037420630455017,grad_norm: 0.9999997560569306, iteration: 59512
loss: 1.0011855363845825,grad_norm: 0.999999070023623, iteration: 59513
loss: 1.0156267881393433,grad_norm: 0.999999132195217, iteration: 59514
loss: 0.9766746163368225,grad_norm: 0.9999990630762027, iteration: 59515
loss: 0.9942103028297424,grad_norm: 0.9572451136272091, iteration: 59516
loss: 1.018609881401062,grad_norm: 0.9999992373604949, iteration: 59517
loss: 0.9896669983863831,grad_norm: 0.9999990389588107, iteration: 59518
loss: 1.0012894868850708,grad_norm: 0.9999992966883424, iteration: 59519
loss: 1.0148342847824097,grad_norm: 0.9999991959073622, iteration: 59520
loss: 1.010093092918396,grad_norm: 0.9811476770469344, iteration: 59521
loss: 0.9934908151626587,grad_norm: 0.8843462813311644, iteration: 59522
loss: 0.998327374458313,grad_norm: 0.9999991736625746, iteration: 59523
loss: 1.030272364616394,grad_norm: 0.9999991948761877, iteration: 59524
loss: 0.9779176115989685,grad_norm: 0.9999992196779277, iteration: 59525
loss: 0.9890076518058777,grad_norm: 0.9999991075282669, iteration: 59526
loss: 1.0056439638137817,grad_norm: 0.9999991777286895, iteration: 59527
loss: 1.0166363716125488,grad_norm: 0.9999990173774499, iteration: 59528
loss: 0.98674076795578,grad_norm: 0.9577480400116537, iteration: 59529
loss: 0.9986952543258667,grad_norm: 0.9999990223736971, iteration: 59530
loss: 0.9900696873664856,grad_norm: 0.9999989632080388, iteration: 59531
loss: 0.979756772518158,grad_norm: 0.999999201142846, iteration: 59532
loss: 0.9996758103370667,grad_norm: 0.9345352183076736, iteration: 59533
loss: 1.0228313207626343,grad_norm: 0.9999991631648508, iteration: 59534
loss: 0.990511953830719,grad_norm: 0.8587347024107111, iteration: 59535
loss: 0.9715997576713562,grad_norm: 0.9999992183867579, iteration: 59536
loss: 0.9982136487960815,grad_norm: 0.9175803617603596, iteration: 59537
loss: 0.9943681955337524,grad_norm: 0.9999993706582637, iteration: 59538
loss: 0.9929303526878357,grad_norm: 0.9999992611116518, iteration: 59539
loss: 0.9954358339309692,grad_norm: 0.9999990762834062, iteration: 59540
loss: 0.9916574954986572,grad_norm: 0.9999991477116097, iteration: 59541
loss: 1.0120304822921753,grad_norm: 0.9632341820786547, iteration: 59542
loss: 0.9645328521728516,grad_norm: 0.9999991588326335, iteration: 59543
loss: 1.0293140411376953,grad_norm: 0.9944466828535321, iteration: 59544
loss: 1.0039472579956055,grad_norm: 0.999999095682285, iteration: 59545
loss: 0.9309694170951843,grad_norm: 0.9999991209877865, iteration: 59546
loss: 1.0591447353363037,grad_norm: 0.9999992566218969, iteration: 59547
loss: 0.9965171217918396,grad_norm: 0.9149350336062104, iteration: 59548
loss: 1.0188019275665283,grad_norm: 0.9999993250974204, iteration: 59549
loss: 1.015769124031067,grad_norm: 0.9999991687232823, iteration: 59550
loss: 0.9902997016906738,grad_norm: 0.9509036668796438, iteration: 59551
loss: 0.9956715106964111,grad_norm: 0.9992156184749834, iteration: 59552
loss: 0.9976116418838501,grad_norm: 0.9428084953329143, iteration: 59553
loss: 1.0019232034683228,grad_norm: 0.9610451015538012, iteration: 59554
loss: 1.0052005052566528,grad_norm: 0.9835228175907915, iteration: 59555
loss: 1.0237514972686768,grad_norm: 0.9999990450738755, iteration: 59556
loss: 0.9800487160682678,grad_norm: 0.952050507841254, iteration: 59557
loss: 0.9444338083267212,grad_norm: 0.9999990820995369, iteration: 59558
loss: 1.0183210372924805,grad_norm: 0.9999990824254505, iteration: 59559
loss: 0.9980416297912598,grad_norm: 0.9999990470721383, iteration: 59560
loss: 1.0202131271362305,grad_norm: 0.9999992270077164, iteration: 59561
loss: 1.0087494850158691,grad_norm: 0.8913421683091313, iteration: 59562
loss: 0.9800286293029785,grad_norm: 0.8739178995785593, iteration: 59563
loss: 1.0574078559875488,grad_norm: 0.9999999029505747, iteration: 59564
loss: 1.0073093175888062,grad_norm: 0.9999991987558554, iteration: 59565
loss: 1.032462477684021,grad_norm: 0.9999991001575039, iteration: 59566
loss: 0.9999109506607056,grad_norm: 0.9999990698921183, iteration: 59567
loss: 1.0165095329284668,grad_norm: 0.9340144901931503, iteration: 59568
loss: 0.9854942560195923,grad_norm: 0.999999237882634, iteration: 59569
loss: 1.0086922645568848,grad_norm: 0.9999989353527383, iteration: 59570
loss: 1.0176262855529785,grad_norm: 0.9999989855480879, iteration: 59571
loss: 1.0037059783935547,grad_norm: 0.9213906433915272, iteration: 59572
loss: 0.9706462025642395,grad_norm: 0.9999991875879772, iteration: 59573
loss: 0.9781680703163147,grad_norm: 0.9999990461881529, iteration: 59574
loss: 1.0100033283233643,grad_norm: 0.9384851149499234, iteration: 59575
loss: 1.0112608671188354,grad_norm: 0.884000782009906, iteration: 59576
loss: 0.9739428758621216,grad_norm: 0.9999990517406349, iteration: 59577
loss: 1.0150270462036133,grad_norm: 0.9999991260182218, iteration: 59578
loss: 1.0007907152175903,grad_norm: 0.9999992398900194, iteration: 59579
loss: 1.003275752067566,grad_norm: 0.9800384973899924, iteration: 59580
loss: 0.9946376085281372,grad_norm: 0.9312079092559342, iteration: 59581
loss: 0.9842166900634766,grad_norm: 0.9999994542689061, iteration: 59582
loss: 0.994763195514679,grad_norm: 0.9999991623569467, iteration: 59583
loss: 1.0045164823532104,grad_norm: 0.9263435908404503, iteration: 59584
loss: 0.9930199980735779,grad_norm: 0.9999991644747356, iteration: 59585
loss: 1.0069005489349365,grad_norm: 0.9999991216970966, iteration: 59586
loss: 1.1763725280761719,grad_norm: 0.9999990987241979, iteration: 59587
loss: 1.0470938682556152,grad_norm: 0.9104091089656364, iteration: 59588
loss: 1.0289957523345947,grad_norm: 0.9999991988507858, iteration: 59589
loss: 1.014860987663269,grad_norm: 0.9999992838462689, iteration: 59590
loss: 0.9969335198402405,grad_norm: 0.9999995210041129, iteration: 59591
loss: 1.0066291093826294,grad_norm: 0.878214625634832, iteration: 59592
loss: 1.006255030632019,grad_norm: 0.9999993490914736, iteration: 59593
loss: 1.0273061990737915,grad_norm: 0.9999991290642493, iteration: 59594
loss: 0.9774495363235474,grad_norm: 0.9797584626198238, iteration: 59595
loss: 0.9988629817962646,grad_norm: 0.9999997018410365, iteration: 59596
loss: 0.9662600159645081,grad_norm: 0.9999990928815384, iteration: 59597
loss: 1.008239507675171,grad_norm: 0.7724345852266978, iteration: 59598
loss: 1.0005922317504883,grad_norm: 0.9999992160793542, iteration: 59599
loss: 1.0207597017288208,grad_norm: 0.8919834351912047, iteration: 59600
loss: 1.0383795499801636,grad_norm: 0.9999998668643467, iteration: 59601
loss: 1.0251003503799438,grad_norm: 0.9999989183573443, iteration: 59602
loss: 1.0151190757751465,grad_norm: 0.9999989433496645, iteration: 59603
loss: 1.0361111164093018,grad_norm: 0.9999992983910706, iteration: 59604
loss: 1.0049221515655518,grad_norm: 0.8078200668200183, iteration: 59605
loss: 1.1259557008743286,grad_norm: 0.9999991412450697, iteration: 59606
loss: 0.9960736632347107,grad_norm: 0.9927210164343342, iteration: 59607
loss: 0.9937857985496521,grad_norm: 0.826149500074203, iteration: 59608
loss: 0.9824690818786621,grad_norm: 0.9999989757197306, iteration: 59609
loss: 0.9955673217773438,grad_norm: 0.921951591392832, iteration: 59610
loss: 0.9930733442306519,grad_norm: 0.9999990749099037, iteration: 59611
loss: 1.0098786354064941,grad_norm: 0.9999990987471973, iteration: 59612
loss: 0.990991473197937,grad_norm: 0.9859134935863744, iteration: 59613
loss: 0.9722616076469421,grad_norm: 0.8956527693045762, iteration: 59614
loss: 1.0087718963623047,grad_norm: 0.9999990229791682, iteration: 59615
loss: 1.0018603801727295,grad_norm: 0.9999991302262157, iteration: 59616
loss: 0.9958352446556091,grad_norm: 0.9623369452946632, iteration: 59617
loss: 1.0113459825515747,grad_norm: 0.8655655116365922, iteration: 59618
loss: 1.040949821472168,grad_norm: 0.999999238427921, iteration: 59619
loss: 0.9861665964126587,grad_norm: 0.9747527418555765, iteration: 59620
loss: 0.9936177730560303,grad_norm: 0.9999990557130273, iteration: 59621
loss: 1.02633535861969,grad_norm: 0.9999991986523157, iteration: 59622
loss: 1.051067590713501,grad_norm: 0.9999991617470905, iteration: 59623
loss: 0.9437360167503357,grad_norm: 0.9999993164364617, iteration: 59624
loss: 1.0442700386047363,grad_norm: 0.9999999205197919, iteration: 59625
loss: 1.0283079147338867,grad_norm: 0.9999990404616484, iteration: 59626
loss: 1.0150357484817505,grad_norm: 0.9999991336653135, iteration: 59627
loss: 1.05670166015625,grad_norm: 0.9999991900095624, iteration: 59628
loss: 0.9983112215995789,grad_norm: 0.999999172846054, iteration: 59629
loss: 1.0341718196868896,grad_norm: 0.9999990781943483, iteration: 59630
loss: 1.0354835987091064,grad_norm: 0.9999992252502014, iteration: 59631
loss: 0.9862855076789856,grad_norm: 0.9999991036924687, iteration: 59632
loss: 0.9948833584785461,grad_norm: 0.999999165604092, iteration: 59633
loss: 1.0053220987319946,grad_norm: 0.9999991331718936, iteration: 59634
loss: 1.06387460231781,grad_norm: 0.9999992290085448, iteration: 59635
loss: 0.9892111420631409,grad_norm: 0.9806015729971126, iteration: 59636
loss: 1.006532073020935,grad_norm: 0.9820996134941046, iteration: 59637
loss: 0.9789800643920898,grad_norm: 0.9999991096222053, iteration: 59638
loss: 1.0425246953964233,grad_norm: 0.9999998561597819, iteration: 59639
loss: 1.0064215660095215,grad_norm: 0.8728821392374246, iteration: 59640
loss: 0.9966527223587036,grad_norm: 0.7927134530474274, iteration: 59641
loss: 1.0061229467391968,grad_norm: 0.9856264274346604, iteration: 59642
loss: 0.9707478880882263,grad_norm: 0.9999991172862474, iteration: 59643
loss: 1.0037490129470825,grad_norm: 0.9999990865218101, iteration: 59644
loss: 1.0823161602020264,grad_norm: 0.9999994430036109, iteration: 59645
loss: 1.0024230480194092,grad_norm: 0.9999990563109681, iteration: 59646
loss: 1.0843867063522339,grad_norm: 0.9999998101324277, iteration: 59647
loss: 1.0010102987289429,grad_norm: 0.95526803509804, iteration: 59648
loss: 0.9785712957382202,grad_norm: 0.9623514340373238, iteration: 59649
loss: 1.0019969940185547,grad_norm: 0.9999992561760017, iteration: 59650
loss: 1.0892101526260376,grad_norm: 1.000000077536173, iteration: 59651
loss: 0.9908255338668823,grad_norm: 0.9999991294669608, iteration: 59652
loss: 1.0192756652832031,grad_norm: 0.9999990290384169, iteration: 59653
loss: 1.0183534622192383,grad_norm: 0.9999990959886116, iteration: 59654
loss: 1.0168484449386597,grad_norm: 0.8542981762022384, iteration: 59655
loss: 1.0001815557479858,grad_norm: 0.9999990526774253, iteration: 59656
loss: 0.9750094413757324,grad_norm: 0.9999991066812283, iteration: 59657
loss: 0.9936357140541077,grad_norm: 0.9999992123781645, iteration: 59658
loss: 1.0068261623382568,grad_norm: 0.9999992855334571, iteration: 59659
loss: 1.0161691904067993,grad_norm: 0.9999990013569457, iteration: 59660
loss: 0.9947255253791809,grad_norm: 0.9999998748394938, iteration: 59661
loss: 0.9925292730331421,grad_norm: 0.9873695202361881, iteration: 59662
loss: 1.013196349143982,grad_norm: 0.9617118287502794, iteration: 59663
loss: 1.0159181356430054,grad_norm: 0.9999991750056878, iteration: 59664
loss: 1.068060040473938,grad_norm: 0.9999997213932166, iteration: 59665
loss: 1.0487736463546753,grad_norm: 0.9999993634598943, iteration: 59666
loss: 1.0185949802398682,grad_norm: 0.9999990890524728, iteration: 59667
loss: 0.9948708415031433,grad_norm: 0.9730493964424515, iteration: 59668
loss: 1.0070648193359375,grad_norm: 0.9829720652069226, iteration: 59669
loss: 0.9785205721855164,grad_norm: 0.8941413459914525, iteration: 59670
loss: 0.9843176603317261,grad_norm: 0.9999990840719465, iteration: 59671
loss: 0.9876401424407959,grad_norm: 0.9306083832265322, iteration: 59672
loss: 1.0191309452056885,grad_norm: 0.9347468634934474, iteration: 59673
loss: 0.9498681426048279,grad_norm: 0.9999989938681368, iteration: 59674
loss: 1.0181397199630737,grad_norm: 0.9999991460754141, iteration: 59675
loss: 0.9959205985069275,grad_norm: 0.8907439496618319, iteration: 59676
loss: 1.0381276607513428,grad_norm: 0.9999992494255107, iteration: 59677
loss: 1.0096157789230347,grad_norm: 0.9999990692262078, iteration: 59678
loss: 0.9656087756156921,grad_norm: 0.9999991586154948, iteration: 59679
loss: 1.000054955482483,grad_norm: 0.9999991539409571, iteration: 59680
loss: 0.9396342039108276,grad_norm: 0.8737026098190674, iteration: 59681
loss: 1.010638952255249,grad_norm: 0.9693032258861511, iteration: 59682
loss: 0.983307421207428,grad_norm: 0.9999991149282434, iteration: 59683
loss: 1.0040197372436523,grad_norm: 0.8427440987181137, iteration: 59684
loss: 1.0003089904785156,grad_norm: 0.999999163226497, iteration: 59685
loss: 0.9530347585678101,grad_norm: 0.9788097724495175, iteration: 59686
loss: 0.973134458065033,grad_norm: 0.9999990676554514, iteration: 59687
loss: 1.0060217380523682,grad_norm: 0.8058953777740503, iteration: 59688
loss: 0.962694525718689,grad_norm: 0.9999991202994848, iteration: 59689
loss: 0.9789835810661316,grad_norm: 0.9999991839284313, iteration: 59690
loss: 1.053706407546997,grad_norm: 0.9999999112495169, iteration: 59691
loss: 0.9826148152351379,grad_norm: 0.999999383311814, iteration: 59692
loss: 1.0180541276931763,grad_norm: 0.94195845274576, iteration: 59693
loss: 0.9849337339401245,grad_norm: 0.999999038465534, iteration: 59694
loss: 0.9921452403068542,grad_norm: 0.9999990938018521, iteration: 59695
loss: 0.9812132716178894,grad_norm: 0.9999991304775666, iteration: 59696
loss: 1.0027774572372437,grad_norm: 0.9828217874565915, iteration: 59697
loss: 1.0084866285324097,grad_norm: 0.9999990689259605, iteration: 59698
loss: 1.030672550201416,grad_norm: 0.9999991311741961, iteration: 59699
loss: 1.022603988647461,grad_norm: 0.9999991722490483, iteration: 59700
loss: 0.997735857963562,grad_norm: 0.9999992062403934, iteration: 59701
loss: 1.022217869758606,grad_norm: 0.9272170201003145, iteration: 59702
loss: 0.9666608572006226,grad_norm: 0.7612772777994032, iteration: 59703
loss: 1.0030337572097778,grad_norm: 0.9999991479734627, iteration: 59704
loss: 1.000656247138977,grad_norm: 0.9999990850555692, iteration: 59705
loss: 1.0400919914245605,grad_norm: 0.9755744576130464, iteration: 59706
loss: 1.008052945137024,grad_norm: 0.9192032545236803, iteration: 59707
loss: 0.9742038249969482,grad_norm: 0.8451596535625867, iteration: 59708
loss: 0.9970376491546631,grad_norm: 0.8572652591678553, iteration: 59709
loss: 1.0003927946090698,grad_norm: 0.8419228265499427, iteration: 59710
loss: 1.0728421211242676,grad_norm: 0.9999992406630869, iteration: 59711
loss: 1.015572190284729,grad_norm: 0.9818022443858523, iteration: 59712
loss: 1.0009490251541138,grad_norm: 0.9999991751100918, iteration: 59713
loss: 0.9752796292304993,grad_norm: 0.9999989359740005, iteration: 59714
loss: 1.029873251914978,grad_norm: 0.9999992708523742, iteration: 59715
loss: 1.0221891403198242,grad_norm: 0.9999997690495082, iteration: 59716
loss: 0.9989983439445496,grad_norm: 0.999999073177114, iteration: 59717
loss: 1.0141409635543823,grad_norm: 0.9999990749106135, iteration: 59718
loss: 0.9721521139144897,grad_norm: 0.9999991740785995, iteration: 59719
loss: 1.0176105499267578,grad_norm: 0.9999990847119111, iteration: 59720
loss: 0.9783635139465332,grad_norm: 0.8984542927956708, iteration: 59721
loss: 1.005744218826294,grad_norm: 0.9823629588486285, iteration: 59722
loss: 1.0169907808303833,grad_norm: 0.9999992767451203, iteration: 59723
loss: 1.0342144966125488,grad_norm: 0.8813270733328223, iteration: 59724
loss: 0.9819789528846741,grad_norm: 0.996602715871572, iteration: 59725
loss: 1.0015381574630737,grad_norm: 0.9999990383557451, iteration: 59726
loss: 1.0128862857818604,grad_norm: 0.9999992620063243, iteration: 59727
loss: 1.047646164894104,grad_norm: 0.9526027354665219, iteration: 59728
loss: 1.0071728229522705,grad_norm: 0.999999090273363, iteration: 59729
loss: 1.0012099742889404,grad_norm: 0.999999209365474, iteration: 59730
loss: 1.0097934007644653,grad_norm: 0.9999990572663501, iteration: 59731
loss: 1.0352939367294312,grad_norm: 0.9999998981630985, iteration: 59732
loss: 1.041969656944275,grad_norm: 0.9999990897212343, iteration: 59733
loss: 1.0695949792861938,grad_norm: 0.9999992017828917, iteration: 59734
loss: 0.9923390746116638,grad_norm: 0.9843011531223684, iteration: 59735
loss: 0.9647153615951538,grad_norm: 0.8675652600067032, iteration: 59736
loss: 1.044010043144226,grad_norm: 0.9989074984681494, iteration: 59737
loss: 1.0169907808303833,grad_norm: 0.9999997694122198, iteration: 59738
loss: 1.0118987560272217,grad_norm: 0.9999994545076527, iteration: 59739
loss: 1.0122994184494019,grad_norm: 0.9999990351880674, iteration: 59740
loss: 1.0016326904296875,grad_norm: 0.9002461932595299, iteration: 59741
loss: 1.016498327255249,grad_norm: 0.9999993833648259, iteration: 59742
loss: 0.9955927133560181,grad_norm: 0.9999990966191924, iteration: 59743
loss: 1.0184235572814941,grad_norm: 0.9999990640503541, iteration: 59744
loss: 0.9879344701766968,grad_norm: 0.9714206625940182, iteration: 59745
loss: 1.0197840929031372,grad_norm: 0.9999991893758657, iteration: 59746
loss: 1.0100617408752441,grad_norm: 0.9999991493367525, iteration: 59747
loss: 1.0065791606903076,grad_norm: 0.9999992933826354, iteration: 59748
loss: 0.9980794191360474,grad_norm: 0.8956620695509149, iteration: 59749
loss: 0.9904783368110657,grad_norm: 0.9725845062928813, iteration: 59750
loss: 1.0120329856872559,grad_norm: 0.986351579015256, iteration: 59751
loss: 0.9956562519073486,grad_norm: 0.9999998784290491, iteration: 59752
loss: 1.0343849658966064,grad_norm: 0.999999660444172, iteration: 59753
loss: 0.9201201796531677,grad_norm: 0.9487155979053786, iteration: 59754
loss: 1.0207602977752686,grad_norm: 0.9999995768560087, iteration: 59755
loss: 1.1022204160690308,grad_norm: 0.9999999021664067, iteration: 59756
loss: 0.9910036325454712,grad_norm: 0.974171126642155, iteration: 59757
loss: 1.0016363859176636,grad_norm: 0.9999991063315443, iteration: 59758
loss: 1.0181578397750854,grad_norm: 0.9999990762724777, iteration: 59759
loss: 0.9920469522476196,grad_norm: 0.9999990899535489, iteration: 59760
loss: 1.0055359601974487,grad_norm: 0.9897649527807258, iteration: 59761
loss: 0.9830986261367798,grad_norm: 0.9111386692571801, iteration: 59762
loss: 1.0481085777282715,grad_norm: 0.9566381472879335, iteration: 59763
loss: 1.012825608253479,grad_norm: 0.9999991359456215, iteration: 59764
loss: 1.0138071775436401,grad_norm: 0.9980479579809641, iteration: 59765
loss: 1.0232142210006714,grad_norm: 0.9999990924965358, iteration: 59766
loss: 0.9785722494125366,grad_norm: 0.9999991681202361, iteration: 59767
loss: 1.0078985691070557,grad_norm: 0.9015787066697785, iteration: 59768
loss: 0.9803497791290283,grad_norm: 0.9999991158325294, iteration: 59769
loss: 1.0091867446899414,grad_norm: 0.9999990370494881, iteration: 59770
loss: 1.0024712085723877,grad_norm: 0.9999992715279655, iteration: 59771
loss: 1.0128097534179688,grad_norm: 0.8737715456957501, iteration: 59772
loss: 1.0156069993972778,grad_norm: 0.9403588783093604, iteration: 59773
loss: 1.0032639503479004,grad_norm: 0.9831678282507715, iteration: 59774
loss: 0.9956635236740112,grad_norm: 0.99999910894862, iteration: 59775
loss: 0.9902411103248596,grad_norm: 0.9146469808908216, iteration: 59776
loss: 1.0178546905517578,grad_norm: 0.862256885486886, iteration: 59777
loss: 0.9988558292388916,grad_norm: 0.9151357901241441, iteration: 59778
loss: 1.0261259078979492,grad_norm: 0.9387974265359452, iteration: 59779
loss: 1.021438479423523,grad_norm: 0.9999990793382565, iteration: 59780
loss: 1.025111436843872,grad_norm: 0.8738454339399212, iteration: 59781
loss: 1.008670687675476,grad_norm: 0.9999990848570666, iteration: 59782
loss: 1.006142258644104,grad_norm: 0.9091798284459555, iteration: 59783
loss: 1.0332586765289307,grad_norm: 0.9999991800389137, iteration: 59784
loss: 1.027774453163147,grad_norm: 0.9145926054150678, iteration: 59785
loss: 1.0365396738052368,grad_norm: 0.9673114763890992, iteration: 59786
loss: 0.9820442795753479,grad_norm: 0.7940370611469523, iteration: 59787
loss: 1.0095783472061157,grad_norm: 0.9999992135376278, iteration: 59788
loss: 0.9847586750984192,grad_norm: 0.9999991271758342, iteration: 59789
loss: 0.9755423665046692,grad_norm: 0.876489830393636, iteration: 59790
loss: 0.9908924102783203,grad_norm: 0.9793073070561906, iteration: 59791
loss: 0.9891039729118347,grad_norm: 0.9999990088869186, iteration: 59792
loss: 1.0095415115356445,grad_norm: 0.9999993880279289, iteration: 59793
loss: 0.9911744594573975,grad_norm: 0.9999990503192117, iteration: 59794
loss: 1.0015238523483276,grad_norm: 0.8963529311039737, iteration: 59795
loss: 1.0119514465332031,grad_norm: 0.9999992732753313, iteration: 59796
loss: 0.9881659150123596,grad_norm: 0.9999994158425857, iteration: 59797
loss: 0.9915500283241272,grad_norm: 0.915956759419964, iteration: 59798
loss: 0.9974269866943359,grad_norm: 0.9999992464954658, iteration: 59799
loss: 0.9926463961601257,grad_norm: 0.9999991314473049, iteration: 59800
loss: 1.0169235467910767,grad_norm: 0.9999990848274898, iteration: 59801
loss: 1.0160622596740723,grad_norm: 0.9999990957986639, iteration: 59802
loss: 0.9640071988105774,grad_norm: 0.999999226127764, iteration: 59803
loss: 1.0204657316207886,grad_norm: 0.9999990993760022, iteration: 59804
loss: 1.009437084197998,grad_norm: 0.999999209255488, iteration: 59805
loss: 1.0066324472427368,grad_norm: 0.999999136815832, iteration: 59806
loss: 0.9923607707023621,grad_norm: 0.9999990823756043, iteration: 59807
loss: 0.966046154499054,grad_norm: 0.9999991782269828, iteration: 59808
loss: 0.9848911166191101,grad_norm: 0.9999989758065674, iteration: 59809
loss: 1.0135046243667603,grad_norm: 0.9326273307068015, iteration: 59810
loss: 1.0272760391235352,grad_norm: 0.9999991606310716, iteration: 59811
loss: 1.0032527446746826,grad_norm: 0.999999124448668, iteration: 59812
loss: 0.9723235368728638,grad_norm: 0.9999997583350125, iteration: 59813
loss: 1.0502768754959106,grad_norm: 0.9999990848716556, iteration: 59814
loss: 0.9595853090286255,grad_norm: 0.9999989928591504, iteration: 59815
loss: 1.0090663433074951,grad_norm: 0.9999990695414659, iteration: 59816
loss: 0.996530294418335,grad_norm: 0.8446895290812108, iteration: 59817
loss: 0.9687016606330872,grad_norm: 0.9258634658920585, iteration: 59818
loss: 1.0565376281738281,grad_norm: 0.9999992786182702, iteration: 59819
loss: 0.961658239364624,grad_norm: 0.9999989389057615, iteration: 59820
loss: 1.0323156118392944,grad_norm: 0.9999990403088643, iteration: 59821
loss: 1.0110126733779907,grad_norm: 0.9999991145982505, iteration: 59822
loss: 0.9982401132583618,grad_norm: 0.9999991150892367, iteration: 59823
loss: 1.0022504329681396,grad_norm: 0.9999992111366282, iteration: 59824
loss: 0.9671899676322937,grad_norm: 0.9999991723687828, iteration: 59825
loss: 0.9962242245674133,grad_norm: 0.9904089259223328, iteration: 59826
loss: 0.9802170395851135,grad_norm: 0.9894797475825756, iteration: 59827
loss: 0.9901745319366455,grad_norm: 0.9999992724896387, iteration: 59828
loss: 1.0265893936157227,grad_norm: 0.9999996414209097, iteration: 59829
loss: 1.0064958333969116,grad_norm: 0.999999179524045, iteration: 59830
loss: 0.983916163444519,grad_norm: 0.9572781416128098, iteration: 59831
loss: 1.0093629360198975,grad_norm: 0.840173931194916, iteration: 59832
loss: 1.0059840679168701,grad_norm: 0.9999996026049952, iteration: 59833
loss: 1.0158987045288086,grad_norm: 0.9999999256116446, iteration: 59834
loss: 1.0073548555374146,grad_norm: 0.9868291970109326, iteration: 59835
loss: 0.9771001935005188,grad_norm: 0.8399833040301042, iteration: 59836
loss: 1.0586873292922974,grad_norm: 0.999999045267141, iteration: 59837
loss: 0.9926697015762329,grad_norm: 0.9999991117776853, iteration: 59838
loss: 1.0089763402938843,grad_norm: 0.999999025117316, iteration: 59839
loss: 1.0851584672927856,grad_norm: 0.9999993682058778, iteration: 59840
loss: 1.0751830339431763,grad_norm: 0.9999993674737763, iteration: 59841
loss: 1.0047837495803833,grad_norm: 0.99999915357325, iteration: 59842
loss: 1.0307894945144653,grad_norm: 0.9999991016869565, iteration: 59843
loss: 0.9797366857528687,grad_norm: 0.9848345344141074, iteration: 59844
loss: 1.2087926864624023,grad_norm: 0.9999993025297306, iteration: 59845
loss: 1.0146946907043457,grad_norm: 0.9999990850789046, iteration: 59846
loss: 1.1253937482833862,grad_norm: 0.9999994188136657, iteration: 59847
loss: 1.004456639289856,grad_norm: 0.9999992082130099, iteration: 59848
loss: 1.0027186870574951,grad_norm: 0.8878933205825711, iteration: 59849
loss: 1.0166658163070679,grad_norm: 0.9522108353714945, iteration: 59850
loss: 1.0358985662460327,grad_norm: 0.9999992549040957, iteration: 59851
loss: 1.0116387605667114,grad_norm: 0.9999990245398664, iteration: 59852
loss: 0.9638680219650269,grad_norm: 0.9976451992681287, iteration: 59853
loss: 0.9741402268409729,grad_norm: 0.999999110722902, iteration: 59854
loss: 0.9952537417411804,grad_norm: 0.9762059613574035, iteration: 59855
loss: 1.08177649974823,grad_norm: 0.9999991959295669, iteration: 59856
loss: 1.0108118057250977,grad_norm: 0.9538922383686187, iteration: 59857
loss: 0.9911945462226868,grad_norm: 0.9999991080143777, iteration: 59858
loss: 1.0100243091583252,grad_norm: 0.9178906198753264, iteration: 59859
loss: 0.9892603754997253,grad_norm: 0.9999991549374622, iteration: 59860
loss: 1.0581456422805786,grad_norm: 0.999999234483843, iteration: 59861
loss: 0.9705642461776733,grad_norm: 0.9811383076151925, iteration: 59862
loss: 1.0285993814468384,grad_norm: 0.9999989904406693, iteration: 59863
loss: 0.9981138706207275,grad_norm: 0.9732123950013097, iteration: 59864
loss: 1.0087915658950806,grad_norm: 0.9999994335044, iteration: 59865
loss: 1.0586776733398438,grad_norm: 0.9999993980304948, iteration: 59866
loss: 0.9971862435340881,grad_norm: 0.9999994751502639, iteration: 59867
loss: 0.9827485680580139,grad_norm: 0.9131780610412381, iteration: 59868
loss: 1.004217505455017,grad_norm: 0.9999990505028327, iteration: 59869
loss: 1.0070147514343262,grad_norm: 0.9999990761389022, iteration: 59870
loss: 0.9820113778114319,grad_norm: 0.9953072935975026, iteration: 59871
loss: 1.01305091381073,grad_norm: 0.951689879327158, iteration: 59872
loss: 0.9766108989715576,grad_norm: 0.9085826695456367, iteration: 59873
loss: 1.0300294160842896,grad_norm: 0.9992811931532859, iteration: 59874
loss: 0.9906326532363892,grad_norm: 0.999999144873456, iteration: 59875
loss: 1.012688159942627,grad_norm: 0.9276637708093242, iteration: 59876
loss: 1.0500181913375854,grad_norm: 0.9999995023833974, iteration: 59877
loss: 1.002747654914856,grad_norm: 0.9169444902307962, iteration: 59878
loss: 1.009482741355896,grad_norm: 0.9999992453573973, iteration: 59879
loss: 1.0090606212615967,grad_norm: 0.9999989702988856, iteration: 59880
loss: 1.036082148551941,grad_norm: 0.999999022138901, iteration: 59881
loss: 1.02582585811615,grad_norm: 0.9999992167927134, iteration: 59882
loss: 0.9967949390411377,grad_norm: 0.999999092416598, iteration: 59883
loss: 0.980191707611084,grad_norm: 0.9069405091077982, iteration: 59884
loss: 0.998519778251648,grad_norm: 0.9339735597393218, iteration: 59885
loss: 1.0156432390213013,grad_norm: 0.999999083939824, iteration: 59886
loss: 0.9901419281959534,grad_norm: 0.9999994886665367, iteration: 59887
loss: 1.0177056789398193,grad_norm: 0.9999993022161863, iteration: 59888
loss: 0.9964775443077087,grad_norm: 0.9256336191876827, iteration: 59889
loss: 1.0187381505966187,grad_norm: 0.999999124224981, iteration: 59890
loss: 1.0472407341003418,grad_norm: 0.9999998231086126, iteration: 59891
loss: 0.9998841881752014,grad_norm: 0.8684708763341725, iteration: 59892
loss: 1.0083242654800415,grad_norm: 0.9999991354989567, iteration: 59893
loss: 0.9737357497215271,grad_norm: 0.9601994715225011, iteration: 59894
loss: 1.047502040863037,grad_norm: 0.9999992707829191, iteration: 59895
loss: 0.9890655875205994,grad_norm: 0.9999990421764354, iteration: 59896
loss: 0.9905550479888916,grad_norm: 0.9999998987417507, iteration: 59897
loss: 0.993794322013855,grad_norm: 0.8433070357411495, iteration: 59898
loss: 0.9862305521965027,grad_norm: 0.9148443629225805, iteration: 59899
loss: 0.994592547416687,grad_norm: 0.9999993356421779, iteration: 59900
loss: 0.9840571284294128,grad_norm: 0.9999995552539852, iteration: 59901
loss: 1.0151212215423584,grad_norm: 0.999999121074932, iteration: 59902
loss: 1.003371000289917,grad_norm: 0.9774041473188904, iteration: 59903
loss: 1.0718923807144165,grad_norm: 0.9999991099231599, iteration: 59904
loss: 1.0143791437149048,grad_norm: 0.9999992417107587, iteration: 59905
loss: 1.0619944334030151,grad_norm: 0.999999304063173, iteration: 59906
loss: 1.0047398805618286,grad_norm: 0.9999997212117843, iteration: 59907
loss: 0.9699892997741699,grad_norm: 0.9647235081035751, iteration: 59908
loss: 0.9618719220161438,grad_norm: 0.7819409088005845, iteration: 59909
loss: 0.9992205500602722,grad_norm: 0.9256068191209786, iteration: 59910
loss: 1.020328402519226,grad_norm: 0.9286510044957673, iteration: 59911
loss: 0.993496835231781,grad_norm: 0.9999990260197219, iteration: 59912
loss: 0.9633826613426208,grad_norm: 0.9999990763560734, iteration: 59913
loss: 0.9737746715545654,grad_norm: 0.8425475954037589, iteration: 59914
loss: 1.076426386833191,grad_norm: 0.9999996486431322, iteration: 59915
loss: 1.0527931451797485,grad_norm: 0.9999996540409402, iteration: 59916
loss: 0.9836636781692505,grad_norm: 0.9999990766275699, iteration: 59917
loss: 1.0689681768417358,grad_norm: 0.9999995148863551, iteration: 59918
loss: 1.0016411542892456,grad_norm: 0.9175711033346968, iteration: 59919
loss: 0.995305597782135,grad_norm: 0.9999991671277585, iteration: 59920
loss: 1.0039626359939575,grad_norm: 0.9067440276424019, iteration: 59921
loss: 1.0010157823562622,grad_norm: 0.9999992894380288, iteration: 59922
loss: 0.9813758134841919,grad_norm: 0.9999992211447847, iteration: 59923
loss: 0.992717444896698,grad_norm: 0.9999990599194422, iteration: 59924
loss: 0.9928049445152283,grad_norm: 0.9037744035680261, iteration: 59925
loss: 1.0034078359603882,grad_norm: 0.8986330506310625, iteration: 59926
loss: 1.0446503162384033,grad_norm: 0.8560811615735507, iteration: 59927
loss: 1.012297511100769,grad_norm: 0.9977222544449261, iteration: 59928
loss: 0.9954584240913391,grad_norm: 0.9186694034359121, iteration: 59929
loss: 0.9812833666801453,grad_norm: 0.9999992469364275, iteration: 59930
loss: 1.0208483934402466,grad_norm: 0.9999991715044829, iteration: 59931
loss: 1.0149147510528564,grad_norm: 0.9999991458459409, iteration: 59932
loss: 0.9901197552680969,grad_norm: 0.9999992091493073, iteration: 59933
loss: 0.9724003076553345,grad_norm: 0.9999993180256881, iteration: 59934
loss: 1.0064280033111572,grad_norm: 0.9639925113717754, iteration: 59935
loss: 1.0014973878860474,grad_norm: 0.9999992054277601, iteration: 59936
loss: 1.0117474794387817,grad_norm: 0.9999992471067942, iteration: 59937
loss: 1.0071474313735962,grad_norm: 0.9999997381404372, iteration: 59938
loss: 1.0268473625183105,grad_norm: 0.9999995825019444, iteration: 59939
loss: 1.0071525573730469,grad_norm: 0.9482094503331167, iteration: 59940
loss: 1.001409649848938,grad_norm: 0.999999272034001, iteration: 59941
loss: 0.9617102742195129,grad_norm: 0.8948434606263601, iteration: 59942
loss: 1.0123635530471802,grad_norm: 0.9999992684504386, iteration: 59943
loss: 1.0000437498092651,grad_norm: 0.9999991768399359, iteration: 59944
loss: 0.989030122756958,grad_norm: 0.9320667742551388, iteration: 59945
loss: 0.9811405539512634,grad_norm: 0.8220589712669921, iteration: 59946
loss: 0.9799199104309082,grad_norm: 0.9110195495659678, iteration: 59947
loss: 1.002384901046753,grad_norm: 0.9812386120555812, iteration: 59948
loss: 1.014947533607483,grad_norm: 0.9999993857266389, iteration: 59949
loss: 1.0062224864959717,grad_norm: 0.9999992166685349, iteration: 59950
loss: 0.9671804308891296,grad_norm: 0.9892385958148334, iteration: 59951
loss: 0.9832605719566345,grad_norm: 0.9225722205911879, iteration: 59952
loss: 0.9727512001991272,grad_norm: 0.999999209736284, iteration: 59953
loss: 1.010099172592163,grad_norm: 0.9999991473152973, iteration: 59954
loss: 0.996955931186676,grad_norm: 0.9999992448824566, iteration: 59955
loss: 1.0231393575668335,grad_norm: 0.9999991290731802, iteration: 59956
loss: 1.0281482934951782,grad_norm: 0.8645795716257503, iteration: 59957
loss: 1.0113757848739624,grad_norm: 0.9999992189693083, iteration: 59958
loss: 0.9590601325035095,grad_norm: 0.9930902207226949, iteration: 59959
loss: 1.021937608718872,grad_norm: 0.9864466990618589, iteration: 59960
loss: 0.995148777961731,grad_norm: 0.9626005176697807, iteration: 59961
loss: 0.9932477474212646,grad_norm: 0.9999991055505258, iteration: 59962
loss: 1.0123049020767212,grad_norm: 0.999999162703323, iteration: 59963
loss: 0.9985169172286987,grad_norm: 0.9999990413688612, iteration: 59964
loss: 0.9999727010726929,grad_norm: 0.9999994208320888, iteration: 59965
loss: 1.050022006034851,grad_norm: 0.999999074443168, iteration: 59966
loss: 1.038832187652588,grad_norm: 0.9999991515306023, iteration: 59967
loss: 0.9492635726928711,grad_norm: 0.9792466397631642, iteration: 59968
loss: 0.990814208984375,grad_norm: 0.9999992238905546, iteration: 59969
loss: 0.9895972609519958,grad_norm: 0.9999991458301032, iteration: 59970
loss: 0.9919461607933044,grad_norm: 0.9742516250162123, iteration: 59971
loss: 1.055580496788025,grad_norm: 0.9999993195077059, iteration: 59972
loss: 1.000497579574585,grad_norm: 0.9999994164993962, iteration: 59973
loss: 1.017792820930481,grad_norm: 0.9999998459764472, iteration: 59974
loss: 0.990431010723114,grad_norm: 0.9999991292441246, iteration: 59975
loss: 0.9610399603843689,grad_norm: 0.9999992085692396, iteration: 59976
loss: 1.044373631477356,grad_norm: 0.9999991051010421, iteration: 59977
loss: 0.9921103119850159,grad_norm: 0.9999991992229684, iteration: 59978
loss: 0.9826223254203796,grad_norm: 0.861171967734929, iteration: 59979
loss: 0.9768609404563904,grad_norm: 0.9999990373548148, iteration: 59980
loss: 1.02469801902771,grad_norm: 0.9999992840353604, iteration: 59981
loss: 1.0092074871063232,grad_norm: 0.98721421623118, iteration: 59982
loss: 0.9867452383041382,grad_norm: 0.9999991510328907, iteration: 59983
loss: 1.011873483657837,grad_norm: 0.9999994044175188, iteration: 59984
loss: 1.0113301277160645,grad_norm: 0.9309442918817771, iteration: 59985
loss: 0.9787785410881042,grad_norm: 0.8590949324217789, iteration: 59986
loss: 0.9671519994735718,grad_norm: 0.9999992339058553, iteration: 59987
loss: 0.9486219882965088,grad_norm: 0.999999064927805, iteration: 59988
loss: 1.0135396718978882,grad_norm: 0.9999992045427248, iteration: 59989
loss: 0.9846996665000916,grad_norm: 0.9994517077416822, iteration: 59990
loss: 1.0210915803909302,grad_norm: 0.8678788847325082, iteration: 59991
loss: 1.038336157798767,grad_norm: 0.978562275899046, iteration: 59992
loss: 1.0093146562576294,grad_norm: 0.9458525562060621, iteration: 59993
loss: 0.9806603789329529,grad_norm: 0.9194990603888603, iteration: 59994
loss: 0.9223906397819519,grad_norm: 0.9999990914975407, iteration: 59995
loss: 1.0084306001663208,grad_norm: 0.9517765434026823, iteration: 59996
loss: 1.0057291984558105,grad_norm: 0.8854860415379726, iteration: 59997
loss: 0.9769692420959473,grad_norm: 0.9389085391428875, iteration: 59998
loss: 0.9676855206489563,grad_norm: 0.9657851015192038, iteration: 59999
loss: 0.9688233137130737,grad_norm: 0.9999991559916108, iteration: 60000
Evaluating at step 60000
{'val': 0.9958051592111588, 'test': 2.5932928952165306}
loss: 1.0156588554382324,grad_norm: 0.9484999957488355, iteration: 60001
loss: 1.0446962118148804,grad_norm: 0.9656037179423113, iteration: 60002
loss: 1.0164012908935547,grad_norm: 0.9999991586220353, iteration: 60003
loss: 1.000286340713501,grad_norm: 0.9999990554626651, iteration: 60004
loss: 1.0068362951278687,grad_norm: 0.9999993141349807, iteration: 60005
loss: 1.0122175216674805,grad_norm: 0.9999990862796349, iteration: 60006
loss: 0.9871606230735779,grad_norm: 0.9382086132345749, iteration: 60007
loss: 1.0269041061401367,grad_norm: 0.9877694070435341, iteration: 60008
loss: 1.0423940420150757,grad_norm: 0.9999991934123468, iteration: 60009
loss: 1.0091941356658936,grad_norm: 0.8373301104200703, iteration: 60010
loss: 0.9818142056465149,grad_norm: 0.9999995225628548, iteration: 60011
loss: 1.0306286811828613,grad_norm: 0.999999312347636, iteration: 60012
loss: 1.0093032121658325,grad_norm: 0.9421298382004819, iteration: 60013
loss: 0.995037853717804,grad_norm: 0.9999991688623576, iteration: 60014
loss: 0.9956293702125549,grad_norm: 0.9999992171383186, iteration: 60015
loss: 1.012141466140747,grad_norm: 0.8052000351251909, iteration: 60016
loss: 0.9902135133743286,grad_norm: 0.9999990719435928, iteration: 60017
loss: 1.0012868642807007,grad_norm: 0.9999991099791, iteration: 60018
loss: 0.9867751002311707,grad_norm: 0.9704205421527499, iteration: 60019
loss: 0.9987553358078003,grad_norm: 0.9999990315820418, iteration: 60020
loss: 1.0354219675064087,grad_norm: 0.9999992020642073, iteration: 60021
loss: 0.9906883239746094,grad_norm: 0.9688006349085826, iteration: 60022
loss: 1.023553729057312,grad_norm: 0.974301594488309, iteration: 60023
loss: 0.9980916976928711,grad_norm: 0.9848083549565597, iteration: 60024
loss: 1.0401604175567627,grad_norm: 0.8684479543282431, iteration: 60025
loss: 0.995536744594574,grad_norm: 0.9999990501949152, iteration: 60026
loss: 1.0916997194290161,grad_norm: 0.9999997847321743, iteration: 60027
loss: 0.9580973386764526,grad_norm: 0.9999991965109621, iteration: 60028
loss: 0.9920569658279419,grad_norm: 0.9999991678459225, iteration: 60029
loss: 0.9789221286773682,grad_norm: 1.0000000249566654, iteration: 60030
loss: 1.0210884809494019,grad_norm: 0.9999991259898279, iteration: 60031
loss: 1.0332366228103638,grad_norm: 0.9999999273704487, iteration: 60032
loss: 0.9979676604270935,grad_norm: 0.9999991929836306, iteration: 60033
loss: 0.9813445806503296,grad_norm: 0.9650566297833711, iteration: 60034
loss: 1.0071632862091064,grad_norm: 0.9999992944867462, iteration: 60035
loss: 0.9768447875976562,grad_norm: 0.9226648383580467, iteration: 60036
loss: 1.0155839920043945,grad_norm: 0.9999992427087996, iteration: 60037
loss: 0.998387336730957,grad_norm: 0.9999990642542768, iteration: 60038
loss: 0.9953875541687012,grad_norm: 0.9999990051228218, iteration: 60039
loss: 0.9999014735221863,grad_norm: 0.9853639473581334, iteration: 60040
loss: 0.9823330640792847,grad_norm: 0.9999991852518361, iteration: 60041
loss: 1.0134955644607544,grad_norm: 0.9999991401846499, iteration: 60042
loss: 1.0226328372955322,grad_norm: 0.9972853970340612, iteration: 60043
loss: 0.9770051836967468,grad_norm: 0.9999992619141316, iteration: 60044
loss: 1.031630039215088,grad_norm: 0.9999999893344292, iteration: 60045
loss: 0.9910590648651123,grad_norm: 0.9999990127570445, iteration: 60046
loss: 1.0447351932525635,grad_norm: 0.9999990808378696, iteration: 60047
loss: 0.9868751764297485,grad_norm: 0.9708455979212386, iteration: 60048
loss: 1.0380109548568726,grad_norm: 0.9999995451573723, iteration: 60049
loss: 1.014853835105896,grad_norm: 0.9999991790918378, iteration: 60050
loss: 1.1928958892822266,grad_norm: 1.0000000107114067, iteration: 60051
loss: 0.9882902503013611,grad_norm: 0.9669911308906879, iteration: 60052
loss: 1.0021744966506958,grad_norm: 0.9999991662403062, iteration: 60053
loss: 1.0300514698028564,grad_norm: 0.99999908402229, iteration: 60054
loss: 1.0050500631332397,grad_norm: 0.7915808135134639, iteration: 60055
loss: 0.990675151348114,grad_norm: 0.9999991716944805, iteration: 60056
loss: 0.9764992594718933,grad_norm: 0.84990342575465, iteration: 60057
loss: 0.9468570947647095,grad_norm: 0.9919934126800417, iteration: 60058
loss: 1.0638848543167114,grad_norm: 0.9361191329983193, iteration: 60059
loss: 0.9408122897148132,grad_norm: 0.9999990441011305, iteration: 60060
loss: 0.9916340708732605,grad_norm: 0.999999202968813, iteration: 60061
loss: 0.9954876899719238,grad_norm: 0.8992114286389908, iteration: 60062
loss: 1.0131103992462158,grad_norm: 0.999999543825284, iteration: 60063
loss: 0.9795916676521301,grad_norm: 0.9999992288194339, iteration: 60064
loss: 1.0181835889816284,grad_norm: 0.9463681127681157, iteration: 60065
loss: 1.0001369714736938,grad_norm: 0.874719970758173, iteration: 60066
loss: 1.0105494260787964,grad_norm: 0.991044307483932, iteration: 60067
loss: 1.0047838687896729,grad_norm: 0.9999990740018591, iteration: 60068
loss: 1.015586495399475,grad_norm: 0.9999990353711353, iteration: 60069
loss: 1.0041382312774658,grad_norm: 0.9887673623559556, iteration: 60070
loss: 1.0237451791763306,grad_norm: 0.9999992643020518, iteration: 60071
loss: 0.971680760383606,grad_norm: 0.999999396450701, iteration: 60072
loss: 1.0221807956695557,grad_norm: 0.9740202390997901, iteration: 60073
loss: 1.0135219097137451,grad_norm: 0.9999990807581122, iteration: 60074
loss: 0.9620926976203918,grad_norm: 0.9999991528165794, iteration: 60075
loss: 1.046337366104126,grad_norm: 0.9999991815728941, iteration: 60076
loss: 1.0184800624847412,grad_norm: 0.9999992078793414, iteration: 60077
loss: 1.0210657119750977,grad_norm: 0.9999992634708695, iteration: 60078
loss: 1.0281907320022583,grad_norm: 0.8255793634241085, iteration: 60079
loss: 0.9885034561157227,grad_norm: 0.9232197689447331, iteration: 60080
loss: 1.0905944108963013,grad_norm: 0.9999999393906832, iteration: 60081
loss: 0.9849241375923157,grad_norm: 0.8479673694865306, iteration: 60082
loss: 1.0365124940872192,grad_norm: 0.9999993240312789, iteration: 60083
loss: 0.9771101474761963,grad_norm: 0.9840574612503462, iteration: 60084
loss: 0.9599847197532654,grad_norm: 0.9539115089600434, iteration: 60085
loss: 1.0045547485351562,grad_norm: 0.9905203361303767, iteration: 60086
loss: 0.9948528409004211,grad_norm: 0.9999992877464817, iteration: 60087
loss: 1.0056421756744385,grad_norm: 0.8905489259376593, iteration: 60088
loss: 1.0159176588058472,grad_norm: 0.9999989731722331, iteration: 60089
loss: 1.0087674856185913,grad_norm: 0.999999206002564, iteration: 60090
loss: 1.0193955898284912,grad_norm: 0.9999992106229078, iteration: 60091
loss: 0.9998629093170166,grad_norm: 0.9999990192054091, iteration: 60092
loss: 1.03780198097229,grad_norm: 0.9999996411850165, iteration: 60093
loss: 0.9851400256156921,grad_norm: 0.8352694716528386, iteration: 60094
loss: 0.9698018431663513,grad_norm: 0.9999990952634353, iteration: 60095
loss: 1.0528842210769653,grad_norm: 0.9929569411006233, iteration: 60096
loss: 1.0175299644470215,grad_norm: 0.9999991794130683, iteration: 60097
loss: 0.9956037402153015,grad_norm: 0.9999992610858626, iteration: 60098
loss: 0.9849714040756226,grad_norm: 0.9999991793716769, iteration: 60099
loss: 1.0080115795135498,grad_norm: 0.9999990462990632, iteration: 60100
loss: 1.007380723953247,grad_norm: 0.9999993978602401, iteration: 60101
loss: 0.9770064949989319,grad_norm: 0.9178872035689947, iteration: 60102
loss: 1.0101313591003418,grad_norm: 0.9812142152007766, iteration: 60103
loss: 1.0129249095916748,grad_norm: 0.9999991201105921, iteration: 60104
loss: 0.9696616530418396,grad_norm: 0.941547848035243, iteration: 60105
loss: 0.9757677316665649,grad_norm: 0.9999991004577705, iteration: 60106
loss: 1.041504979133606,grad_norm: 0.9999996865667337, iteration: 60107
loss: 1.057240605354309,grad_norm: 0.9999998531583376, iteration: 60108
loss: 1.1133216619491577,grad_norm: 0.9999992921275439, iteration: 60109
loss: 0.9895816445350647,grad_norm: 0.9591601688530409, iteration: 60110
loss: 0.9877098202705383,grad_norm: 0.9999992377906456, iteration: 60111
loss: 1.0632312297821045,grad_norm: 0.9999993608052159, iteration: 60112
loss: 1.0576940774917603,grad_norm: 0.9999990910257988, iteration: 60113
loss: 1.021206021308899,grad_norm: 0.9694241558556821, iteration: 60114
loss: 0.9664461016654968,grad_norm: 0.9999991889789085, iteration: 60115
loss: 1.028541922569275,grad_norm: 0.966832910373315, iteration: 60116
loss: 0.9900947213172913,grad_norm: 0.999999160535299, iteration: 60117
loss: 1.031314730644226,grad_norm: 0.924141395575763, iteration: 60118
loss: 1.0419100522994995,grad_norm: 0.9999989894603079, iteration: 60119
loss: 0.9702861905097961,grad_norm: 0.9999993047141736, iteration: 60120
loss: 1.0492483377456665,grad_norm: 0.999999242358113, iteration: 60121
loss: 0.9971870183944702,grad_norm: 0.9662454764042078, iteration: 60122
loss: 1.014722466468811,grad_norm: 0.9433223990208242, iteration: 60123
loss: 1.0137290954589844,grad_norm: 0.9999991996663886, iteration: 60124
loss: 0.9894669651985168,grad_norm: 0.9843492208915781, iteration: 60125
loss: 1.0287131071090698,grad_norm: 0.9999994041972698, iteration: 60126
loss: 0.9726399183273315,grad_norm: 0.999998983226488, iteration: 60127
loss: 0.980287492275238,grad_norm: 0.999999113175212, iteration: 60128
loss: 1.0511023998260498,grad_norm: 0.9999994883208307, iteration: 60129
loss: 1.0046933889389038,grad_norm: 0.8390076911503714, iteration: 60130
loss: 0.9905706644058228,grad_norm: 0.9389194023323357, iteration: 60131
loss: 0.977006196975708,grad_norm: 0.9840605677454582, iteration: 60132
loss: 1.0288490056991577,grad_norm: 0.999999106538239, iteration: 60133
loss: 0.969298243522644,grad_norm: 0.9999990905698388, iteration: 60134
loss: 1.0209659337997437,grad_norm: 0.999999071978082, iteration: 60135
loss: 1.005101203918457,grad_norm: 0.9999991230256154, iteration: 60136
loss: 0.9734446406364441,grad_norm: 0.9067000896317098, iteration: 60137
loss: 0.9916254281997681,grad_norm: 0.9999990700114566, iteration: 60138
loss: 0.9775559306144714,grad_norm: 0.9999996198696427, iteration: 60139
loss: 0.9994403123855591,grad_norm: 0.9999990953386634, iteration: 60140
loss: 0.9885754585266113,grad_norm: 0.998182332639279, iteration: 60141
loss: 1.028536319732666,grad_norm: 0.9999991349603803, iteration: 60142
loss: 0.9901220798492432,grad_norm: 0.9999992731280579, iteration: 60143
loss: 0.9861049652099609,grad_norm: 0.999998955957244, iteration: 60144
loss: 1.0169450044631958,grad_norm: 0.9069583753380686, iteration: 60145
loss: 1.0197855234146118,grad_norm: 0.9973710360926815, iteration: 60146
loss: 1.058952808380127,grad_norm: 0.9795104672121963, iteration: 60147
loss: 1.0245229005813599,grad_norm: 0.9964590524231081, iteration: 60148
loss: 1.0232791900634766,grad_norm: 0.9999990849975803, iteration: 60149
loss: 1.018697738647461,grad_norm: 0.9999990370464367, iteration: 60150
loss: 0.9852526783943176,grad_norm: 0.9999989107747945, iteration: 60151
loss: 1.0154775381088257,grad_norm: 0.9999990521468699, iteration: 60152
loss: 0.9877772331237793,grad_norm: 0.9999991529442501, iteration: 60153
loss: 0.9977481961250305,grad_norm: 0.9253090060519225, iteration: 60154
loss: 1.000908613204956,grad_norm: 0.9999991127320007, iteration: 60155
loss: 0.9939450621604919,grad_norm: 0.9999990842135416, iteration: 60156
loss: 0.9945762157440186,grad_norm: 0.9999992112805026, iteration: 60157
loss: 1.0021350383758545,grad_norm: 0.847740279286916, iteration: 60158
loss: 0.9738341569900513,grad_norm: 0.9999990294787011, iteration: 60159
loss: 1.0267069339752197,grad_norm: 0.9079806432219203, iteration: 60160
loss: 1.0218164920806885,grad_norm: 0.9999990754907514, iteration: 60161
loss: 1.0055859088897705,grad_norm: 0.9999994305138444, iteration: 60162
loss: 0.9878831505775452,grad_norm: 0.9999989712329226, iteration: 60163
loss: 0.9905814528465271,grad_norm: 0.9999991472932992, iteration: 60164
loss: 0.9901407957077026,grad_norm: 0.9306145157125697, iteration: 60165
loss: 0.9874937534332275,grad_norm: 0.999999457424974, iteration: 60166
loss: 1.0577994585037231,grad_norm: 0.9553626400166989, iteration: 60167
loss: 0.9942057728767395,grad_norm: 0.9999990861150325, iteration: 60168
loss: 1.031849980354309,grad_norm: 0.9999996995679136, iteration: 60169
loss: 0.9996135830879211,grad_norm: 0.9647794101436852, iteration: 60170
loss: 1.004661202430725,grad_norm: 0.9763670060657649, iteration: 60171
loss: 1.0028033256530762,grad_norm: 0.999999496594261, iteration: 60172
loss: 1.048325538635254,grad_norm: 0.9999996551221368, iteration: 60173
loss: 0.9897292852401733,grad_norm: 0.9937764322317731, iteration: 60174
loss: 1.0139429569244385,grad_norm: 0.9999996956389333, iteration: 60175
loss: 1.0246907472610474,grad_norm: 0.9999992167969807, iteration: 60176
loss: 1.0105229616165161,grad_norm: 0.9507290179787878, iteration: 60177
loss: 1.0104557275772095,grad_norm: 0.9999992360344874, iteration: 60178
loss: 1.0217890739440918,grad_norm: 0.9999990317179893, iteration: 60179
loss: 1.001779556274414,grad_norm: 0.9999991962370022, iteration: 60180
loss: 1.0119866132736206,grad_norm: 0.9999990744645227, iteration: 60181
loss: 1.0276460647583008,grad_norm: 0.9118921955624861, iteration: 60182
loss: 1.0076487064361572,grad_norm: 0.9999990887249803, iteration: 60183
loss: 1.049662470817566,grad_norm: 0.9837252881012459, iteration: 60184
loss: 1.0343784093856812,grad_norm: 0.9486735450875907, iteration: 60185
loss: 0.9793141484260559,grad_norm: 0.9999991985930546, iteration: 60186
loss: 1.0244797468185425,grad_norm: 0.9999997971505543, iteration: 60187
loss: 1.0025756359100342,grad_norm: 0.9999991177238886, iteration: 60188
loss: 0.9904999732971191,grad_norm: 0.9944975407306812, iteration: 60189
loss: 1.0027762651443481,grad_norm: 0.9166708497764694, iteration: 60190
loss: 0.9855455756187439,grad_norm: 0.9999990877874047, iteration: 60191
loss: 0.9953312873840332,grad_norm: 0.9237283715121903, iteration: 60192
loss: 1.0359907150268555,grad_norm: 0.9999991478183251, iteration: 60193
loss: 1.0214476585388184,grad_norm: 0.8851284629763267, iteration: 60194
loss: 0.9544036984443665,grad_norm: 0.9999989428660824, iteration: 60195
loss: 0.9919018745422363,grad_norm: 0.9033644240868256, iteration: 60196
loss: 1.0339902639389038,grad_norm: 0.9999992298759637, iteration: 60197
loss: 1.0313397645950317,grad_norm: 0.999999397102528, iteration: 60198
loss: 1.002923607826233,grad_norm: 0.9880011167219933, iteration: 60199
loss: 1.010536789894104,grad_norm: 0.9911398330488786, iteration: 60200
loss: 1.033951997756958,grad_norm: 0.999999161613358, iteration: 60201
loss: 1.026751160621643,grad_norm: 0.9999994125626566, iteration: 60202
loss: 1.0047500133514404,grad_norm: 0.9999992523700325, iteration: 60203
loss: 0.9946088790893555,grad_norm: 0.8778281682229965, iteration: 60204
loss: 0.9871840476989746,grad_norm: 0.975287706710732, iteration: 60205
loss: 1.0436854362487793,grad_norm: 0.9999993297564862, iteration: 60206
loss: 1.0263326168060303,grad_norm: 0.888651523331651, iteration: 60207
loss: 0.9905815720558167,grad_norm: 0.8617279800592018, iteration: 60208
loss: 1.0642294883728027,grad_norm: 0.9999992637180274, iteration: 60209
loss: 0.9666969776153564,grad_norm: 0.9999990026805242, iteration: 60210
loss: 0.9781638383865356,grad_norm: 0.9999990552177005, iteration: 60211
loss: 0.9869653582572937,grad_norm: 0.9256788008275048, iteration: 60212
loss: 1.0370402336120605,grad_norm: 0.8742503487537332, iteration: 60213
loss: 0.9754571318626404,grad_norm: 0.9999998855892241, iteration: 60214
loss: 0.9803150296211243,grad_norm: 0.8946968026423494, iteration: 60215
loss: 0.9917014837265015,grad_norm: 0.9439115105394691, iteration: 60216
loss: 0.9802862405776978,grad_norm: 0.9999993774531148, iteration: 60217
loss: 0.9719061851501465,grad_norm: 0.9999992248081537, iteration: 60218
loss: 1.0067256689071655,grad_norm: 0.9999991330983792, iteration: 60219
loss: 1.0120254755020142,grad_norm: 0.9999990405657149, iteration: 60220
loss: 1.0193504095077515,grad_norm: 0.9999990105126126, iteration: 60221
loss: 1.0289405584335327,grad_norm: 0.9999991917816702, iteration: 60222
loss: 1.0193150043487549,grad_norm: 0.9999989650397368, iteration: 60223
loss: 1.1727662086486816,grad_norm: 0.9999996577194139, iteration: 60224
loss: 1.0080902576446533,grad_norm: 0.9999991369268167, iteration: 60225
loss: 0.9818225502967834,grad_norm: 0.9999992180681493, iteration: 60226
loss: 1.0025583505630493,grad_norm: 0.9999991719227392, iteration: 60227
loss: 0.9965150356292725,grad_norm: 0.9919971724272297, iteration: 60228
loss: 0.9963690638542175,grad_norm: 0.9999992890824466, iteration: 60229
loss: 1.0063750743865967,grad_norm: 0.9999997298262757, iteration: 60230
loss: 0.9860411286354065,grad_norm: 0.999999092811018, iteration: 60231
loss: 0.9704334139823914,grad_norm: 0.9999992480591804, iteration: 60232
loss: 0.9742248058319092,grad_norm: 0.9999991406044605, iteration: 60233
loss: 1.0075072050094604,grad_norm: 0.9999992617421449, iteration: 60234
loss: 1.0159318447113037,grad_norm: 0.9999990601720596, iteration: 60235
loss: 0.9465574026107788,grad_norm: 0.9999990653915513, iteration: 60236
loss: 1.0063358545303345,grad_norm: 0.9999995370050367, iteration: 60237
loss: 0.9959274530410767,grad_norm: 0.9999992398276057, iteration: 60238
loss: 1.0045299530029297,grad_norm: 0.8990411298847512, iteration: 60239
loss: 0.9856650829315186,grad_norm: 0.9664680174712431, iteration: 60240
loss: 1.0120353698730469,grad_norm: 0.9999992560117964, iteration: 60241
loss: 1.0134503841400146,grad_norm: 0.8915616975652717, iteration: 60242
loss: 0.9883383512496948,grad_norm: 0.9999992147224356, iteration: 60243
loss: 0.9863162636756897,grad_norm: 0.8678808260399467, iteration: 60244
loss: 0.9812517166137695,grad_norm: 0.9644942337291584, iteration: 60245
loss: 1.0028111934661865,grad_norm: 0.999998979741329, iteration: 60246
loss: 0.9751288294792175,grad_norm: 0.9999990784497249, iteration: 60247
loss: 0.9889984726905823,grad_norm: 0.9746635022094925, iteration: 60248
loss: 1.01469087600708,grad_norm: 0.9999991352507641, iteration: 60249
loss: 1.016075611114502,grad_norm: 0.9999991264252844, iteration: 60250
loss: 1.0107523202896118,grad_norm: 0.911243382657924, iteration: 60251
loss: 1.1697927713394165,grad_norm: 0.9999996338384612, iteration: 60252
loss: 0.9902293682098389,grad_norm: 0.9999992088913976, iteration: 60253
loss: 1.025902509689331,grad_norm: 0.9999991755609577, iteration: 60254
loss: 1.041489601135254,grad_norm: 0.999999051298476, iteration: 60255
loss: 0.9836103320121765,grad_norm: 0.9999991136248808, iteration: 60256
loss: 0.9896828532218933,grad_norm: 0.9999989677481281, iteration: 60257
loss: 0.9916134476661682,grad_norm: 0.9999990784519972, iteration: 60258
loss: 1.0331920385360718,grad_norm: 0.999999080618937, iteration: 60259
loss: 1.0030778646469116,grad_norm: 0.9999993506290525, iteration: 60260
loss: 1.017625331878662,grad_norm: 0.9999990791302866, iteration: 60261
loss: 1.0214837789535522,grad_norm: 0.9898544956013708, iteration: 60262
loss: 0.9997764825820923,grad_norm: 0.9370602756493633, iteration: 60263
loss: 1.003440022468567,grad_norm: 0.9999989763777346, iteration: 60264
loss: 0.9841609001159668,grad_norm: 0.9999990219575029, iteration: 60265
loss: 0.9746939539909363,grad_norm: 0.8402556369582861, iteration: 60266
loss: 1.0067157745361328,grad_norm: 0.9999995067214399, iteration: 60267
loss: 0.9844226241111755,grad_norm: 0.9729252977928787, iteration: 60268
loss: 0.9995535612106323,grad_norm: 0.9167851741573988, iteration: 60269
loss: 0.9791269302368164,grad_norm: 0.9999991429189754, iteration: 60270
loss: 0.9583359360694885,grad_norm: 0.9523576166046543, iteration: 60271
loss: 0.9958332777023315,grad_norm: 0.938664398673555, iteration: 60272
loss: 1.0115790367126465,grad_norm: 0.9445621627410395, iteration: 60273
loss: 0.9612557888031006,grad_norm: 0.9999992581949728, iteration: 60274
loss: 0.9695097804069519,grad_norm: 0.9999990371453055, iteration: 60275
loss: 0.9907680749893188,grad_norm: 0.9362074249955475, iteration: 60276
loss: 1.0298279523849487,grad_norm: 0.9999991430553621, iteration: 60277
loss: 1.0064845085144043,grad_norm: 0.8711057603631032, iteration: 60278
loss: 0.9944130778312683,grad_norm: 0.9999992195313069, iteration: 60279
loss: 1.054983377456665,grad_norm: 0.9999990393882937, iteration: 60280
loss: 1.0192066431045532,grad_norm: 0.9999989793652039, iteration: 60281
loss: 0.9868996739387512,grad_norm: 0.999999161060855, iteration: 60282
loss: 1.0428098440170288,grad_norm: 0.942969226539347, iteration: 60283
loss: 1.0125058889389038,grad_norm: 0.9999992813217301, iteration: 60284
loss: 1.060477375984192,grad_norm: 0.9999991720747428, iteration: 60285
loss: 0.9842843413352966,grad_norm: 0.9123164754949454, iteration: 60286
loss: 0.9770597815513611,grad_norm: 0.9388940549348218, iteration: 60287
loss: 1.01265287399292,grad_norm: 0.9998211458963506, iteration: 60288
loss: 1.0231132507324219,grad_norm: 0.9018711615648031, iteration: 60289
loss: 0.9693918824195862,grad_norm: 0.903966376602875, iteration: 60290
loss: 1.0372998714447021,grad_norm: 0.9999995369249443, iteration: 60291
loss: 1.005646824836731,grad_norm: 0.9022590306060135, iteration: 60292
loss: 0.968437135219574,grad_norm: 0.9999991643021264, iteration: 60293
loss: 1.0324856042861938,grad_norm: 0.8681565703849854, iteration: 60294
loss: 1.0040322542190552,grad_norm: 0.9721791501972752, iteration: 60295
loss: 1.037261724472046,grad_norm: 0.9999991671212092, iteration: 60296
loss: 1.0040112733840942,grad_norm: 0.9833060584521678, iteration: 60297
loss: 0.9927952289581299,grad_norm: 0.9999991751652156, iteration: 60298
loss: 0.9791253805160522,grad_norm: 0.9999992103951978, iteration: 60299
loss: 1.033265233039856,grad_norm: 0.9999993302724834, iteration: 60300
loss: 1.0008102655410767,grad_norm: 0.9999992047027448, iteration: 60301
loss: 1.019197940826416,grad_norm: 0.8911468478694857, iteration: 60302
loss: 0.9796581864356995,grad_norm: 0.9999991739046881, iteration: 60303
loss: 1.004459023475647,grad_norm: 0.9999993230239514, iteration: 60304
loss: 0.9836402535438538,grad_norm: 0.9999993114726505, iteration: 60305
loss: 1.0224225521087646,grad_norm: 0.9999993064938317, iteration: 60306
loss: 1.0430132150650024,grad_norm: 0.9999991836514234, iteration: 60307
loss: 1.0243120193481445,grad_norm: 0.9811002080430063, iteration: 60308
loss: 0.9875789284706116,grad_norm: 0.9999991895156659, iteration: 60309
loss: 0.9997454881668091,grad_norm: 0.9999991883319244, iteration: 60310
loss: 1.0056742429733276,grad_norm: 0.9999988921326459, iteration: 60311
loss: 0.9928508400917053,grad_norm: 0.9693828261728639, iteration: 60312
loss: 0.9699621200561523,grad_norm: 0.9999991557997324, iteration: 60313
loss: 0.9972953200340271,grad_norm: 0.981941489553206, iteration: 60314
loss: 1.0019532442092896,grad_norm: 0.8947976932647723, iteration: 60315
loss: 1.0080372095108032,grad_norm: 0.9999990103336441, iteration: 60316
loss: 0.9752325415611267,grad_norm: 0.9999991445586699, iteration: 60317
loss: 1.0194110870361328,grad_norm: 0.9999991141627634, iteration: 60318
loss: 1.0187407732009888,grad_norm: 0.95221375351173, iteration: 60319
loss: 0.9827021956443787,grad_norm: 0.999999067973381, iteration: 60320
loss: 1.00546133518219,grad_norm: 0.9999992239073772, iteration: 60321
loss: 1.0268243551254272,grad_norm: 0.9302611622562863, iteration: 60322
loss: 0.986535370349884,grad_norm: 0.999999269302046, iteration: 60323
loss: 1.000134825706482,grad_norm: 0.9999991624696168, iteration: 60324
loss: 1.0141087770462036,grad_norm: 0.8877890785290118, iteration: 60325
loss: 0.9871599674224854,grad_norm: 0.9999992267269254, iteration: 60326
loss: 1.0110692977905273,grad_norm: 0.93384989727424, iteration: 60327
loss: 1.0003660917282104,grad_norm: 0.950192651860985, iteration: 60328
loss: 1.018022894859314,grad_norm: 0.8688666277855184, iteration: 60329
loss: 0.9971729516983032,grad_norm: 0.9662213259955803, iteration: 60330
loss: 1.0120882987976074,grad_norm: 0.9999990717139565, iteration: 60331
loss: 0.9995240569114685,grad_norm: 0.9999990892706253, iteration: 60332
loss: 1.012717843055725,grad_norm: 0.9050510936837126, iteration: 60333
loss: 1.0385409593582153,grad_norm: 0.9999995429492075, iteration: 60334
loss: 0.9937616586685181,grad_norm: 0.9999990269652731, iteration: 60335
loss: 1.0114861726760864,grad_norm: 0.9999991785007651, iteration: 60336
loss: 1.008376955986023,grad_norm: 0.89441219599516, iteration: 60337
loss: 0.9645437002182007,grad_norm: 0.9999992859495317, iteration: 60338
loss: 0.9822068810462952,grad_norm: 0.9939044621837015, iteration: 60339
loss: 1.021817684173584,grad_norm: 0.9999990401248511, iteration: 60340
loss: 1.0024800300598145,grad_norm: 0.9553717412496096, iteration: 60341
loss: 0.9911110401153564,grad_norm: 0.9999992713250743, iteration: 60342
loss: 1.011232852935791,grad_norm: 0.999999133225352, iteration: 60343
loss: 1.0072531700134277,grad_norm: 0.9999990869185432, iteration: 60344
loss: 0.9636460542678833,grad_norm: 0.9999992784282858, iteration: 60345
loss: 1.0373741388320923,grad_norm: 0.9999990932470082, iteration: 60346
loss: 1.0189388990402222,grad_norm: 0.999999701439104, iteration: 60347
loss: 1.0229665040969849,grad_norm: 0.9999992730723647, iteration: 60348
loss: 0.9870622754096985,grad_norm: 0.9999992702867168, iteration: 60349
loss: 0.9908194541931152,grad_norm: 0.9999990904715206, iteration: 60350
loss: 0.9979764819145203,grad_norm: 0.9999991670130821, iteration: 60351
loss: 1.0177406072616577,grad_norm: 0.9112894503917602, iteration: 60352
loss: 0.9947822093963623,grad_norm: 0.9142379826466066, iteration: 60353
loss: 1.040555715560913,grad_norm: 0.9303416579998084, iteration: 60354
loss: 0.995292067527771,grad_norm: 0.9785661608257736, iteration: 60355
loss: 0.9998431205749512,grad_norm: 0.99999900794023, iteration: 60356
loss: 1.0189257860183716,grad_norm: 0.9999994141036213, iteration: 60357
loss: 1.0242533683776855,grad_norm: 0.9999991184321192, iteration: 60358
loss: 0.9371099472045898,grad_norm: 0.99999907985552, iteration: 60359
loss: 1.0118789672851562,grad_norm: 0.9999990961618795, iteration: 60360
loss: 0.9851537346839905,grad_norm: 0.9919914541692045, iteration: 60361
loss: 1.0293666124343872,grad_norm: 0.9999991528235729, iteration: 60362
loss: 1.023474931716919,grad_norm: 0.995047899000758, iteration: 60363
loss: 1.0149160623550415,grad_norm: 0.9999991382361694, iteration: 60364
loss: 0.9991326332092285,grad_norm: 0.9992169910520471, iteration: 60365
loss: 0.990746796131134,grad_norm: 0.9999991362727781, iteration: 60366
loss: 0.9962031841278076,grad_norm: 0.9999992980047601, iteration: 60367
loss: 1.0162370204925537,grad_norm: 0.9999989938371078, iteration: 60368
loss: 1.0197420120239258,grad_norm: 0.933789472766805, iteration: 60369
loss: 0.9789392948150635,grad_norm: 0.9999990906099702, iteration: 60370
loss: 1.0010520219802856,grad_norm: 0.9999993397265925, iteration: 60371
loss: 1.0431028604507446,grad_norm: 0.9999989150188973, iteration: 60372
loss: 0.9929822683334351,grad_norm: 0.9999990873244889, iteration: 60373
loss: 1.0083423852920532,grad_norm: 0.999999053917654, iteration: 60374
loss: 0.998784601688385,grad_norm: 0.9999990667931073, iteration: 60375
loss: 0.9765684008598328,grad_norm: 0.9999990554000409, iteration: 60376
loss: 0.9682187438011169,grad_norm: 0.9999990843740094, iteration: 60377
loss: 1.0971579551696777,grad_norm: 0.9999996234721055, iteration: 60378
loss: 0.9888917803764343,grad_norm: 0.9807884148709312, iteration: 60379
loss: 1.0121760368347168,grad_norm: 0.9453157692830537, iteration: 60380
loss: 0.9578750133514404,grad_norm: 0.9510191163629801, iteration: 60381
loss: 0.9894846081733704,grad_norm: 0.9999991754600014, iteration: 60382
loss: 0.9892876148223877,grad_norm: 0.9445897608350746, iteration: 60383
loss: 1.052408218383789,grad_norm: 0.9459551130071607, iteration: 60384
loss: 0.9743353724479675,grad_norm: 0.9999992510194401, iteration: 60385
loss: 0.9575582146644592,grad_norm: 0.9999989664629183, iteration: 60386
loss: 1.0101673603057861,grad_norm: 0.999999057661635, iteration: 60387
loss: 0.9792658686637878,grad_norm: 0.9889129102591522, iteration: 60388
loss: 1.0015238523483276,grad_norm: 0.9666903214377474, iteration: 60389
loss: 1.008320927619934,grad_norm: 0.9201959609562119, iteration: 60390
loss: 1.0123552083969116,grad_norm: 0.9999993377223257, iteration: 60391
loss: 1.0273455381393433,grad_norm: 0.8962615743933952, iteration: 60392
loss: 0.9589980840682983,grad_norm: 0.9422242350349707, iteration: 60393
loss: 1.033005714416504,grad_norm: 0.9999990888862634, iteration: 60394
loss: 1.0018912553787231,grad_norm: 0.9999992487946685, iteration: 60395
loss: 1.0204991102218628,grad_norm: 0.9081501090644984, iteration: 60396
loss: 1.0329347848892212,grad_norm: 0.9999998818538482, iteration: 60397
loss: 1.0108544826507568,grad_norm: 0.9968099303497266, iteration: 60398
loss: 1.0472328662872314,grad_norm: 0.9999990064936134, iteration: 60399
loss: 0.9941033124923706,grad_norm: 0.999999224974811, iteration: 60400
loss: 1.005337119102478,grad_norm: 0.9999990061165331, iteration: 60401
loss: 0.985990583896637,grad_norm: 0.9999992402166137, iteration: 60402
loss: 1.01176917552948,grad_norm: 0.9549140422356218, iteration: 60403
loss: 1.0178025960922241,grad_norm: 0.999999493436183, iteration: 60404
loss: 1.005831003189087,grad_norm: 0.9999990147422738, iteration: 60405
loss: 1.015540599822998,grad_norm: 0.9999992669035928, iteration: 60406
loss: 0.9863734245300293,grad_norm: 0.9999992195130583, iteration: 60407
loss: 0.9926916360855103,grad_norm: 0.9999992647437759, iteration: 60408
loss: 1.025846004486084,grad_norm: 0.999999485579603, iteration: 60409
loss: 1.0036687850952148,grad_norm: 0.999999094343795, iteration: 60410
loss: 1.0062330961227417,grad_norm: 0.9999995814837496, iteration: 60411
loss: 1.0488805770874023,grad_norm: 0.9999998679020782, iteration: 60412
loss: 1.0094225406646729,grad_norm: 0.999999114370114, iteration: 60413
loss: 1.0348694324493408,grad_norm: 0.9969753432912616, iteration: 60414
loss: 1.0106213092803955,grad_norm: 0.9999991695615666, iteration: 60415
loss: 1.002265453338623,grad_norm: 0.999999244265621, iteration: 60416
loss: 1.05796480178833,grad_norm: 0.999999348224811, iteration: 60417
loss: 0.9670941233634949,grad_norm: 0.9999990674920248, iteration: 60418
loss: 0.987111508846283,grad_norm: 0.9678166982995813, iteration: 60419
loss: 1.0071557760238647,grad_norm: 0.8906491405404369, iteration: 60420
loss: 1.0209463834762573,grad_norm: 0.9411770778855049, iteration: 60421
loss: 1.0138078927993774,grad_norm: 0.9999990497419181, iteration: 60422
loss: 0.9848092198371887,grad_norm: 0.9999993940903823, iteration: 60423
loss: 0.9840463995933533,grad_norm: 0.8536290053830122, iteration: 60424
loss: 1.0080640316009521,grad_norm: 0.9529750491719543, iteration: 60425
loss: 1.0442042350769043,grad_norm: 0.93404608315203, iteration: 60426
loss: 0.9905842542648315,grad_norm: 0.9999990380591424, iteration: 60427
loss: 1.0217151641845703,grad_norm: 0.9999992786446618, iteration: 60428
loss: 1.0102301836013794,grad_norm: 0.985864615087536, iteration: 60429
loss: 1.0263142585754395,grad_norm: 0.9999994573266907, iteration: 60430
loss: 0.9920338988304138,grad_norm: 0.9999992146675202, iteration: 60431
loss: 0.9836452603340149,grad_norm: 0.8855726393924701, iteration: 60432
loss: 1.022059679031372,grad_norm: 0.9589920842232669, iteration: 60433
loss: 0.9932518601417542,grad_norm: 0.927473233371443, iteration: 60434
loss: 1.0190041065216064,grad_norm: 0.9403870965124856, iteration: 60435
loss: 1.0073202848434448,grad_norm: 0.8775411071218988, iteration: 60436
loss: 1.0046467781066895,grad_norm: 0.8689410118875107, iteration: 60437
loss: 0.9650676250457764,grad_norm: 0.999999059693087, iteration: 60438
loss: 1.0376452207565308,grad_norm: 0.999999233237204, iteration: 60439
loss: 0.9582608938217163,grad_norm: 0.9999989726752241, iteration: 60440
loss: 0.984325110912323,grad_norm: 0.9999991224105275, iteration: 60441
loss: 1.0021835565567017,grad_norm: 0.9999991361547129, iteration: 60442
loss: 0.9773286581039429,grad_norm: 0.9999992145885828, iteration: 60443
loss: 0.977344810962677,grad_norm: 0.9539986095510763, iteration: 60444
loss: 1.0172224044799805,grad_norm: 0.9999990912322876, iteration: 60445
loss: 1.0355454683303833,grad_norm: 0.9308504888011511, iteration: 60446
loss: 0.9562132358551025,grad_norm: 0.9999990677280466, iteration: 60447
loss: 0.9888591170310974,grad_norm: 0.9999997985744719, iteration: 60448
loss: 1.006008267402649,grad_norm: 0.9999992316902453, iteration: 60449
loss: 0.9854243993759155,grad_norm: 0.9999992150243296, iteration: 60450
loss: 0.9943628907203674,grad_norm: 0.9999991299887617, iteration: 60451
loss: 0.9979061484336853,grad_norm: 0.9538242996808399, iteration: 60452
loss: 0.9972442984580994,grad_norm: 0.9999991975858223, iteration: 60453
loss: 1.0031147003173828,grad_norm: 0.9999991206893442, iteration: 60454
loss: 0.9865999221801758,grad_norm: 0.9999990888671668, iteration: 60455
loss: 1.00090491771698,grad_norm: 0.9999990848617254, iteration: 60456
loss: 1.0076305866241455,grad_norm: 0.9999991704556619, iteration: 60457
loss: 0.9712860584259033,grad_norm: 0.999999211345026, iteration: 60458
loss: 1.0207269191741943,grad_norm: 0.9999991364937773, iteration: 60459
loss: 1.01163911819458,grad_norm: 0.9999991373699995, iteration: 60460
loss: 0.9923784732818604,grad_norm: 0.9999991078976007, iteration: 60461
loss: 1.004263162612915,grad_norm: 0.8881215311913931, iteration: 60462
loss: 1.0187203884124756,grad_norm: 0.9450267557078356, iteration: 60463
loss: 0.9825132489204407,grad_norm: 0.999999074851921, iteration: 60464
loss: 1.0017732381820679,grad_norm: 0.9999991064262185, iteration: 60465
loss: 1.0233734846115112,grad_norm: 0.9897865678577645, iteration: 60466
loss: 0.980277419090271,grad_norm: 0.9999990972233431, iteration: 60467
loss: 0.9765599966049194,grad_norm: 0.9999989720504188, iteration: 60468
loss: 1.0100919008255005,grad_norm: 0.9999991481993848, iteration: 60469
loss: 0.973371684551239,grad_norm: 0.9999990801081797, iteration: 60470
loss: 1.0333442687988281,grad_norm: 0.988900936192487, iteration: 60471
loss: 1.0558946132659912,grad_norm: 0.9999995599202148, iteration: 60472
loss: 1.0250215530395508,grad_norm: 0.9999992716608892, iteration: 60473
loss: 0.987957775592804,grad_norm: 0.971450636922443, iteration: 60474
loss: 1.0153980255126953,grad_norm: 0.9973788114499603, iteration: 60475
loss: 1.0136421918869019,grad_norm: 0.9999991108650007, iteration: 60476
loss: 1.0076502561569214,grad_norm: 0.9247482123094668, iteration: 60477
loss: 0.9910074472427368,grad_norm: 0.9999997466414156, iteration: 60478
loss: 0.9531186819076538,grad_norm: 0.9609682854830632, iteration: 60479
loss: 1.056280493736267,grad_norm: 0.9999991342961514, iteration: 60480
loss: 1.0248445272445679,grad_norm: 0.9999990317625903, iteration: 60481
loss: 1.0103546380996704,grad_norm: 0.999999140619401, iteration: 60482
loss: 0.9894616603851318,grad_norm: 0.9999989726344751, iteration: 60483
loss: 0.9995817542076111,grad_norm: 0.99999911617611, iteration: 60484
loss: 1.0387166738510132,grad_norm: 0.999999072687824, iteration: 60485
loss: 0.9873148798942566,grad_norm: 0.9077609586749323, iteration: 60486
loss: 1.0437383651733398,grad_norm: 0.9999991168344197, iteration: 60487
loss: 0.9571075439453125,grad_norm: 0.9999992504122175, iteration: 60488
loss: 1.031843662261963,grad_norm: 0.9185180988225357, iteration: 60489
loss: 0.9934244155883789,grad_norm: 0.9999991129567204, iteration: 60490
loss: 1.008082389831543,grad_norm: 0.9999991970732225, iteration: 60491
loss: 1.0248422622680664,grad_norm: 0.947655213023739, iteration: 60492
loss: 1.0068906545639038,grad_norm: 0.9554977785808867, iteration: 60493
loss: 1.0115556716918945,grad_norm: 0.9635796713794498, iteration: 60494
loss: 1.042395830154419,grad_norm: 0.9999991707101146, iteration: 60495
loss: 1.050999402999878,grad_norm: 0.9999998498153397, iteration: 60496
loss: 1.0055443048477173,grad_norm: 0.8847800606839156, iteration: 60497
loss: 1.0265623331069946,grad_norm: 0.999999045433475, iteration: 60498
loss: 0.9959784746170044,grad_norm: 0.9999991255316186, iteration: 60499
loss: 1.0178027153015137,grad_norm: 0.999999280554189, iteration: 60500
loss: 1.0396519899368286,grad_norm: 0.9999992017641761, iteration: 60501
loss: 0.9994752407073975,grad_norm: 0.9008326504313006, iteration: 60502
loss: 0.9972150921821594,grad_norm: 0.9999992346891506, iteration: 60503
loss: 1.0130770206451416,grad_norm: 0.9550081516870741, iteration: 60504
loss: 1.0120662450790405,grad_norm: 0.999999115448966, iteration: 60505
loss: 0.9710267782211304,grad_norm: 0.8684717050958848, iteration: 60506
loss: 0.9843074083328247,grad_norm: 0.9680495501095977, iteration: 60507
loss: 1.0034129619598389,grad_norm: 0.9999992137379924, iteration: 60508
loss: 1.0344947576522827,grad_norm: 0.9999993469507794, iteration: 60509
loss: 1.0387213230133057,grad_norm: 0.9999991536725539, iteration: 60510
loss: 1.0171719789505005,grad_norm: 0.9999989910728049, iteration: 60511
loss: 1.037082314491272,grad_norm: 0.9846070706867254, iteration: 60512
loss: 1.0922329425811768,grad_norm: 0.9999994825286345, iteration: 60513
loss: 1.0037753582000732,grad_norm: 0.9173269329940286, iteration: 60514
loss: 0.9982362389564514,grad_norm: 0.9999990793304814, iteration: 60515
loss: 1.0282150506973267,grad_norm: 0.840507570061378, iteration: 60516
loss: 1.0124784708023071,grad_norm: 0.9999992124914041, iteration: 60517
loss: 1.0495461225509644,grad_norm: 0.9999990162467454, iteration: 60518
loss: 1.0157201290130615,grad_norm: 0.8703249183032027, iteration: 60519
loss: 1.0144703388214111,grad_norm: 0.9538311072422949, iteration: 60520
loss: 1.0080375671386719,grad_norm: 0.9473748598423832, iteration: 60521
loss: 1.021966814994812,grad_norm: 0.9053944134662633, iteration: 60522
loss: 1.0235761404037476,grad_norm: 0.9408684043681048, iteration: 60523
loss: 1.0192854404449463,grad_norm: 0.7931212562164102, iteration: 60524
loss: 0.9854231476783752,grad_norm: 0.9224519216712115, iteration: 60525
loss: 0.9987177848815918,grad_norm: 0.9999992707652978, iteration: 60526
loss: 1.0392732620239258,grad_norm: 0.8940539876625407, iteration: 60527
loss: 0.9832206964492798,grad_norm: 0.9999991122703487, iteration: 60528
loss: 1.038373589515686,grad_norm: 0.9381966157337943, iteration: 60529
loss: 1.0202834606170654,grad_norm: 0.999999073776678, iteration: 60530
loss: 1.012912392616272,grad_norm: 0.858609726440505, iteration: 60531
loss: 0.995539665222168,grad_norm: 0.7941175022830719, iteration: 60532
loss: 1.0321141481399536,grad_norm: 0.9999991609176094, iteration: 60533
loss: 0.9723165035247803,grad_norm: 0.9999993207976794, iteration: 60534
loss: 1.0062698125839233,grad_norm: 0.9999991232741351, iteration: 60535
loss: 1.032226324081421,grad_norm: 0.9999993543871113, iteration: 60536
loss: 1.006851077079773,grad_norm: 0.9999989508591137, iteration: 60537
loss: 0.9833775162696838,grad_norm: 0.9529082751647332, iteration: 60538
loss: 1.0271209478378296,grad_norm: 0.9613712868829523, iteration: 60539
loss: 0.9940938353538513,grad_norm: 0.9295030386318686, iteration: 60540
loss: 0.9966707825660706,grad_norm: 0.9677308585071436, iteration: 60541
loss: 0.9925233721733093,grad_norm: 0.9999991594772261, iteration: 60542
loss: 1.0236777067184448,grad_norm: 0.9999991827963289, iteration: 60543
loss: 0.9998499155044556,grad_norm: 0.9999992317314094, iteration: 60544
loss: 0.9751926064491272,grad_norm: 0.9999990969957179, iteration: 60545
loss: 1.0103598833084106,grad_norm: 0.9999991863809309, iteration: 60546
loss: 1.000003695487976,grad_norm: 0.9999992291325623, iteration: 60547
loss: 0.970479428768158,grad_norm: 0.980404783838011, iteration: 60548
loss: 0.9824650287628174,grad_norm: 0.9999997974928101, iteration: 60549
loss: 1.0112178325653076,grad_norm: 0.8558719342945948, iteration: 60550
loss: 0.9568470120429993,grad_norm: 0.8953253871286647, iteration: 60551
loss: 1.0104385614395142,grad_norm: 0.9999991583603357, iteration: 60552
loss: 1.006949543952942,grad_norm: 0.8766533327682557, iteration: 60553
loss: 1.0460460186004639,grad_norm: 0.9129762054694875, iteration: 60554
loss: 0.9796454906463623,grad_norm: 0.9999990430689604, iteration: 60555
loss: 0.9840344190597534,grad_norm: 0.9999990148250034, iteration: 60556
loss: 0.9911100268363953,grad_norm: 0.9659661419591905, iteration: 60557
loss: 0.9566062092781067,grad_norm: 0.7785354985580565, iteration: 60558
loss: 1.009817123413086,grad_norm: 0.9999991489975322, iteration: 60559
loss: 1.0237786769866943,grad_norm: 0.999999002739643, iteration: 60560
loss: 1.005021095275879,grad_norm: 0.8562377208215002, iteration: 60561
loss: 1.003616213798523,grad_norm: 0.9895828441408145, iteration: 60562
loss: 1.0142850875854492,grad_norm: 0.9034547981940698, iteration: 60563
loss: 0.9491361975669861,grad_norm: 0.9750227779394623, iteration: 60564
loss: 1.0016505718231201,grad_norm: 0.9999990125902775, iteration: 60565
loss: 1.0328096151351929,grad_norm: 0.9999990596437129, iteration: 60566
loss: 0.9990466237068176,grad_norm: 0.9325664829328452, iteration: 60567
loss: 0.9973296523094177,grad_norm: 0.9999991509509096, iteration: 60568
loss: 0.9906899333000183,grad_norm: 0.9411048122155995, iteration: 60569
loss: 0.9832624793052673,grad_norm: 0.9999991308595427, iteration: 60570
loss: 1.025800108909607,grad_norm: 0.8930432203546533, iteration: 60571
loss: 1.0094382762908936,grad_norm: 0.9999991078144064, iteration: 60572
loss: 0.993172287940979,grad_norm: 0.8786734545612707, iteration: 60573
loss: 1.0302300453186035,grad_norm: 0.9999995951772276, iteration: 60574
loss: 1.0244147777557373,grad_norm: 0.9999989983762457, iteration: 60575
loss: 1.0286306142807007,grad_norm: 0.999999202566752, iteration: 60576
loss: 1.0341988801956177,grad_norm: 0.9999990488234887, iteration: 60577
loss: 0.9836606979370117,grad_norm: 0.8865445070721435, iteration: 60578
loss: 0.9794987440109253,grad_norm: 0.8168929838297221, iteration: 60579
loss: 1.0264298915863037,grad_norm: 0.9999990819190585, iteration: 60580
loss: 1.0150139331817627,grad_norm: 0.9973200345039434, iteration: 60581
loss: 0.9999159574508667,grad_norm: 0.9471984041519984, iteration: 60582
loss: 0.9962825775146484,grad_norm: 0.9999989601942914, iteration: 60583
loss: 1.0351269245147705,grad_norm: 0.999999004359847, iteration: 60584
loss: 1.0134748220443726,grad_norm: 0.9999993871916167, iteration: 60585
loss: 0.9799023270606995,grad_norm: 0.8433494526967609, iteration: 60586
loss: 0.9607893228530884,grad_norm: 0.9159574712449328, iteration: 60587
loss: 0.9467238783836365,grad_norm: 0.9999990379407234, iteration: 60588
loss: 0.9862924814224243,grad_norm: 0.999999072058605, iteration: 60589
loss: 1.0041871070861816,grad_norm: 0.9446880953340323, iteration: 60590
loss: 0.9674969911575317,grad_norm: 0.9835412604490159, iteration: 60591
loss: 0.9626210927963257,grad_norm: 0.9883952591850307, iteration: 60592
loss: 0.9910461902618408,grad_norm: 0.8953782504087535, iteration: 60593
loss: 1.0347464084625244,grad_norm: 0.9999994061315659, iteration: 60594
loss: 1.0168380737304688,grad_norm: 0.9999991239691725, iteration: 60595
loss: 1.0031328201293945,grad_norm: 0.9999990262698687, iteration: 60596
loss: 1.001793384552002,grad_norm: 0.9646976147953896, iteration: 60597
loss: 1.02128267288208,grad_norm: 0.9999992036130617, iteration: 60598
loss: 1.0671741962432861,grad_norm: 0.9999995326312063, iteration: 60599
loss: 0.9875181317329407,grad_norm: 0.9999995932552448, iteration: 60600
loss: 0.9923784136772156,grad_norm: 0.8911957750384056, iteration: 60601
loss: 1.0032316446304321,grad_norm: 0.9620804903705377, iteration: 60602
loss: 1.0878275632858276,grad_norm: 0.9999995673706169, iteration: 60603
loss: 1.0202479362487793,grad_norm: 0.9999991311363006, iteration: 60604
loss: 0.9980981945991516,grad_norm: 0.788783379908708, iteration: 60605
loss: 1.0104272365570068,grad_norm: 0.9035086360462536, iteration: 60606
loss: 0.9759355187416077,grad_norm: 0.9999990144692417, iteration: 60607
loss: 0.9738737940788269,grad_norm: 0.9999992703013283, iteration: 60608
loss: 1.0151690244674683,grad_norm: 0.9999999093167069, iteration: 60609
loss: 0.9759194850921631,grad_norm: 0.9801053292643507, iteration: 60610
loss: 0.9572280645370483,grad_norm: 0.9999989356593502, iteration: 60611
loss: 1.0052870512008667,grad_norm: 0.9864023222471995, iteration: 60612
loss: 1.0411128997802734,grad_norm: 0.9999996680253462, iteration: 60613
loss: 1.027104139328003,grad_norm: 0.9999992488913175, iteration: 60614
loss: 1.0131442546844482,grad_norm: 0.9682618162708124, iteration: 60615
loss: 0.9933797121047974,grad_norm: 0.9999989580856615, iteration: 60616
loss: 1.0104283094406128,grad_norm: 0.999999098175004, iteration: 60617
loss: 0.9833884835243225,grad_norm: 0.9750180099661667, iteration: 60618
loss: 0.954425036907196,grad_norm: 0.9999991984616962, iteration: 60619
loss: 1.046478509902954,grad_norm: 0.9933026540993921, iteration: 60620
loss: 1.003052830696106,grad_norm: 0.9564895557986124, iteration: 60621
loss: 1.0054103136062622,grad_norm: 0.9999991185142356, iteration: 60622
loss: 1.0032908916473389,grad_norm: 0.9776171715829187, iteration: 60623
loss: 0.9843143820762634,grad_norm: 0.9985577234254094, iteration: 60624
loss: 1.0203076601028442,grad_norm: 0.9999992226830833, iteration: 60625
loss: 1.027003526687622,grad_norm: 0.9999991389902338, iteration: 60626
loss: 0.9348909854888916,grad_norm: 0.9999990547347934, iteration: 60627
loss: 0.9616459608078003,grad_norm: 0.9692489011589182, iteration: 60628
loss: 0.9971194267272949,grad_norm: 0.9999993200526166, iteration: 60629
loss: 0.9795882105827332,grad_norm: 0.9999990039334932, iteration: 60630
loss: 1.0440629720687866,grad_norm: 0.999999272996515, iteration: 60631
loss: 1.061819076538086,grad_norm: 0.9999991643687568, iteration: 60632
loss: 1.0339202880859375,grad_norm: 0.9648065172259865, iteration: 60633
loss: 1.0273994207382202,grad_norm: 0.8791702381214762, iteration: 60634
loss: 0.9810660481452942,grad_norm: 0.9634582451222847, iteration: 60635
loss: 1.0168825387954712,grad_norm: 0.9999990371333755, iteration: 60636
loss: 1.0708519220352173,grad_norm: 0.9999993404906494, iteration: 60637
loss: 0.9443016052246094,grad_norm: 0.9224061088424669, iteration: 60638
loss: 1.043793797492981,grad_norm: 0.9999992342564726, iteration: 60639
loss: 0.9847486019134521,grad_norm: 0.9999992810671781, iteration: 60640
loss: 1.0214812755584717,grad_norm: 0.9999990665764933, iteration: 60641
loss: 1.0064435005187988,grad_norm: 0.9127329516562451, iteration: 60642
loss: 1.002087950706482,grad_norm: 0.9617442631799058, iteration: 60643
loss: 1.0180550813674927,grad_norm: 0.9715940196342707, iteration: 60644
loss: 1.058840274810791,grad_norm: 0.9999991214827908, iteration: 60645
loss: 1.0375527143478394,grad_norm: 0.9999991120248337, iteration: 60646
loss: 1.0035799741744995,grad_norm: 0.8317146034765839, iteration: 60647
loss: 0.9672958254814148,grad_norm: 0.8849160022732575, iteration: 60648
loss: 1.0230028629302979,grad_norm: 0.99999963428281, iteration: 60649
loss: 0.940895676612854,grad_norm: 0.9999990312292585, iteration: 60650
loss: 0.9992197751998901,grad_norm: 0.9999991930634957, iteration: 60651
loss: 0.9670396447181702,grad_norm: 0.9999989918105447, iteration: 60652
loss: 0.989516019821167,grad_norm: 0.9887802842740426, iteration: 60653
loss: 1.0035321712493896,grad_norm: 0.9999991699186532, iteration: 60654
loss: 0.9786946773529053,grad_norm: 0.9999990785266112, iteration: 60655
loss: 0.9925684928894043,grad_norm: 0.8809516740469354, iteration: 60656
loss: 1.014553189277649,grad_norm: 0.9999992853109918, iteration: 60657
loss: 0.9969068169593811,grad_norm: 0.9560085464344142, iteration: 60658
loss: 1.0185223817825317,grad_norm: 0.9999992934833138, iteration: 60659
loss: 1.0433932542800903,grad_norm: 0.9999992892080933, iteration: 60660
loss: 1.0162642002105713,grad_norm: 0.9999992095237166, iteration: 60661
loss: 1.0144729614257812,grad_norm: 0.9492655987672644, iteration: 60662
loss: 1.0103708505630493,grad_norm: 0.8523590264250895, iteration: 60663
loss: 0.9977278709411621,grad_norm: 0.9999991654756101, iteration: 60664
loss: 0.9989009499549866,grad_norm: 0.9999990389267942, iteration: 60665
loss: 0.9847633838653564,grad_norm: 0.8406288385197535, iteration: 60666
loss: 0.9664597511291504,grad_norm: 0.9273715312248946, iteration: 60667
loss: 0.9905720949172974,grad_norm: 0.9999990870549161, iteration: 60668
loss: 1.008252501487732,grad_norm: 0.9773665337401067, iteration: 60669
loss: 1.008253812789917,grad_norm: 0.9901397019176225, iteration: 60670
loss: 0.9841699004173279,grad_norm: 0.9999991202462366, iteration: 60671
loss: 1.0113139152526855,grad_norm: 0.9999992337223698, iteration: 60672
loss: 1.0390645265579224,grad_norm: 0.9948078782017271, iteration: 60673
loss: 1.048640489578247,grad_norm: 0.9999991838221509, iteration: 60674
loss: 0.9916136264801025,grad_norm: 0.9999990456983279, iteration: 60675
loss: 0.9887720346450806,grad_norm: 0.9999991024308051, iteration: 60676
loss: 0.9904825091362,grad_norm: 0.9824522269258062, iteration: 60677
loss: 1.005205512046814,grad_norm: 0.9999995612772337, iteration: 60678
loss: 1.0036706924438477,grad_norm: 0.8759426280991662, iteration: 60679
loss: 1.007996678352356,grad_norm: 0.9999991118805273, iteration: 60680
loss: 1.0307862758636475,grad_norm: 0.9999991432193354, iteration: 60681
loss: 1.029880404472351,grad_norm: 0.9999991661014207, iteration: 60682
loss: 1.013241171836853,grad_norm: 0.9999992076291366, iteration: 60683
loss: 1.0219100713729858,grad_norm: 0.9824421097019672, iteration: 60684
loss: 0.959610641002655,grad_norm: 0.9999993141958745, iteration: 60685
loss: 0.9976563453674316,grad_norm: 0.9557152381175883, iteration: 60686
loss: 1.0065019130706787,grad_norm: 0.9999990540633252, iteration: 60687
loss: 0.9703978300094604,grad_norm: 0.9999990406555925, iteration: 60688
loss: 1.0017786026000977,grad_norm: 0.9999992901470772, iteration: 60689
loss: 1.006242275238037,grad_norm: 0.9894663524182397, iteration: 60690
loss: 1.0330562591552734,grad_norm: 0.9999991514477562, iteration: 60691
loss: 0.9851943254470825,grad_norm: 0.9317720477874946, iteration: 60692
loss: 1.0182262659072876,grad_norm: 0.9999995483725673, iteration: 60693
loss: 0.9818257689476013,grad_norm: 0.9999990285804634, iteration: 60694
loss: 1.0032763481140137,grad_norm: 0.9999994044886015, iteration: 60695
loss: 0.9543471932411194,grad_norm: 0.9283636199827222, iteration: 60696
loss: 0.9807562828063965,grad_norm: 0.9999992800964023, iteration: 60697
loss: 1.003153920173645,grad_norm: 0.9677550417105975, iteration: 60698
loss: 1.0297645330429077,grad_norm: 0.9999992372480826, iteration: 60699
loss: 1.0020662546157837,grad_norm: 0.9999990483688587, iteration: 60700
loss: 0.9882749915122986,grad_norm: 0.999999185714245, iteration: 60701
loss: 0.9836799502372742,grad_norm: 0.9999992668760819, iteration: 60702
loss: 1.0374302864074707,grad_norm: 0.9999999735689978, iteration: 60703
loss: 1.0298824310302734,grad_norm: 0.9999992297389253, iteration: 60704
loss: 1.0249511003494263,grad_norm: 0.9999995417988434, iteration: 60705
loss: 1.0138131380081177,grad_norm: 0.8207412424195656, iteration: 60706
loss: 1.0266737937927246,grad_norm: 0.8264576568158392, iteration: 60707
loss: 1.0203759670257568,grad_norm: 0.9999990452239305, iteration: 60708
loss: 1.0348296165466309,grad_norm: 0.9999992483463818, iteration: 60709
loss: 0.9945662617683411,grad_norm: 0.9372841173717229, iteration: 60710
loss: 0.9920607805252075,grad_norm: 0.9999988800751737, iteration: 60711
loss: 1.0733020305633545,grad_norm: 0.8272749764364291, iteration: 60712
loss: 1.0481135845184326,grad_norm: 0.9999999522716203, iteration: 60713
loss: 1.0320647954940796,grad_norm: 0.9999998469855162, iteration: 60714
loss: 0.9934407472610474,grad_norm: 0.9999990378327698, iteration: 60715
loss: 1.0145859718322754,grad_norm: 0.9999991337960205, iteration: 60716
loss: 1.02646803855896,grad_norm: 0.9999999093022387, iteration: 60717
loss: 1.0169769525527954,grad_norm: 0.999999089708612, iteration: 60718
loss: 0.9665117859840393,grad_norm: 0.9999990325698163, iteration: 60719
loss: 0.9819138050079346,grad_norm: 0.9999996955296888, iteration: 60720
loss: 1.020381212234497,grad_norm: 0.9999991685999395, iteration: 60721
loss: 0.988104522228241,grad_norm: 0.9839792077709576, iteration: 60722
loss: 1.016515851020813,grad_norm: 0.9999992110501048, iteration: 60723
loss: 0.9968588948249817,grad_norm: 0.9999992001743132, iteration: 60724
loss: 1.0068212747573853,grad_norm: 0.9999992043638272, iteration: 60725
loss: 1.0287808179855347,grad_norm: 0.9999992193625807, iteration: 60726
loss: 1.0864137411117554,grad_norm: 0.9999995972306851, iteration: 60727
loss: 0.9951322078704834,grad_norm: 0.9999991118082924, iteration: 60728
loss: 0.9981475472450256,grad_norm: 0.9370421885489384, iteration: 60729
loss: 0.9964014291763306,grad_norm: 0.999999258868128, iteration: 60730
loss: 1.0078171491622925,grad_norm: 0.9999992196054502, iteration: 60731
loss: 1.015376091003418,grad_norm: 0.9999990454169406, iteration: 60732
loss: 0.9915115833282471,grad_norm: 0.9223183531064129, iteration: 60733
loss: 1.0020191669464111,grad_norm: 0.9999992908417912, iteration: 60734
loss: 1.0376687049865723,grad_norm: 0.9999993433725162, iteration: 60735
loss: 1.0143455266952515,grad_norm: 0.9999991326122251, iteration: 60736
loss: 1.0217846632003784,grad_norm: 0.9999991171955427, iteration: 60737
loss: 0.9861183166503906,grad_norm: 0.9999990681137162, iteration: 60738
loss: 0.9620696902275085,grad_norm: 0.999999133499919, iteration: 60739
loss: 0.9864721298217773,grad_norm: 0.9999992742860577, iteration: 60740
loss: 1.0277626514434814,grad_norm: 0.9935480682191011, iteration: 60741
loss: 1.0063973665237427,grad_norm: 0.9315372578752669, iteration: 60742
loss: 1.0050486326217651,grad_norm: 0.9999990934877103, iteration: 60743
loss: 1.0617295503616333,grad_norm: 0.9999994539931938, iteration: 60744
loss: 1.1365844011306763,grad_norm: 0.9999999249988962, iteration: 60745
loss: 1.0298384428024292,grad_norm: 0.9691651868184186, iteration: 60746
loss: 0.9992060661315918,grad_norm: 0.9999995553220664, iteration: 60747
loss: 1.0162341594696045,grad_norm: 0.9999991863449783, iteration: 60748
loss: 1.026711106300354,grad_norm: 0.9999989767203826, iteration: 60749
loss: 0.9784332513809204,grad_norm: 0.9999991879518931, iteration: 60750
loss: 1.0308119058609009,grad_norm: 0.9934951856680233, iteration: 60751
loss: 1.020466923713684,grad_norm: 0.9999995767907575, iteration: 60752
loss: 1.0216872692108154,grad_norm: 0.9691096594280176, iteration: 60753
loss: 0.9814897775650024,grad_norm: 0.9999991290775904, iteration: 60754
loss: 0.9995154142379761,grad_norm: 0.9999992007670768, iteration: 60755
loss: 0.9994913935661316,grad_norm: 0.9999993282388345, iteration: 60756
loss: 1.0128653049468994,grad_norm: 0.9999992912207258, iteration: 60757
loss: 1.0933088064193726,grad_norm: 0.9999995400045729, iteration: 60758
loss: 0.9886663556098938,grad_norm: 0.9999991760617869, iteration: 60759
loss: 0.9743737578392029,grad_norm: 0.9339559243961817, iteration: 60760
loss: 1.0405176877975464,grad_norm: 0.999999109341991, iteration: 60761
loss: 1.0018022060394287,grad_norm: 0.9999991718241487, iteration: 60762
loss: 1.0367189645767212,grad_norm: 0.9940071301832855, iteration: 60763
loss: 1.0203789472579956,grad_norm: 0.8491713045786152, iteration: 60764
loss: 0.9905868768692017,grad_norm: 0.9425353302173706, iteration: 60765
loss: 0.9955012202262878,grad_norm: 0.9999990276903251, iteration: 60766
loss: 0.9992469549179077,grad_norm: 0.9999990373101719, iteration: 60767
loss: 1.0584560632705688,grad_norm: 0.999999231993463, iteration: 60768
loss: 0.9703264832496643,grad_norm: 0.9546121770360247, iteration: 60769
loss: 1.0085883140563965,grad_norm: 0.9626801741698163, iteration: 60770
loss: 1.0195214748382568,grad_norm: 0.999999590828017, iteration: 60771
loss: 1.0184617042541504,grad_norm: 0.9404819828678321, iteration: 60772
loss: 1.0329279899597168,grad_norm: 0.999999326124591, iteration: 60773
loss: 1.0291144847869873,grad_norm: 0.99999948544407, iteration: 60774
loss: 1.0435868501663208,grad_norm: 0.9999995870056381, iteration: 60775
loss: 1.038283348083496,grad_norm: 0.9999995636971954, iteration: 60776
loss: 1.0215531587600708,grad_norm: 0.9954490892071147, iteration: 60777
loss: 1.053739309310913,grad_norm: 0.999999989777752, iteration: 60778
loss: 0.9957082271575928,grad_norm: 0.9999990964404468, iteration: 60779
loss: 0.9827515482902527,grad_norm: 0.9999990396395358, iteration: 60780
loss: 1.0273151397705078,grad_norm: 0.9999998846918615, iteration: 60781
loss: 1.0298887491226196,grad_norm: 0.9999992828816853, iteration: 60782
loss: 1.011628270149231,grad_norm: 0.9999989633228085, iteration: 60783
loss: 1.0552618503570557,grad_norm: 0.9999993705249851, iteration: 60784
loss: 1.0138962268829346,grad_norm: 0.9495035115541441, iteration: 60785
loss: 0.9824618101119995,grad_norm: 0.9999991076743596, iteration: 60786
loss: 0.9625234007835388,grad_norm: 0.9948467735978548, iteration: 60787
loss: 0.9915198087692261,grad_norm: 0.9999990918178319, iteration: 60788
loss: 0.9783393740653992,grad_norm: 0.9605041667030476, iteration: 60789
loss: 0.988086462020874,grad_norm: 0.8610499354419133, iteration: 60790
loss: 1.0259010791778564,grad_norm: 0.9999992615833845, iteration: 60791
loss: 1.0159223079681396,grad_norm: 0.9525028047655235, iteration: 60792
loss: 1.0502023696899414,grad_norm: 0.9999991251206166, iteration: 60793
loss: 0.9957615733146667,grad_norm: 0.9999990834525408, iteration: 60794
loss: 1.0027053356170654,grad_norm: 0.9999992332417362, iteration: 60795
loss: 0.9990597367286682,grad_norm: 0.9999990874823472, iteration: 60796
loss: 0.9958581328392029,grad_norm: 0.9999992787270585, iteration: 60797
loss: 0.9975464940071106,grad_norm: 0.9401592030402092, iteration: 60798
loss: 1.0140187740325928,grad_norm: 0.9999990382476673, iteration: 60799
loss: 1.016431212425232,grad_norm: 0.9999991270439256, iteration: 60800
loss: 1.017713189125061,grad_norm: 0.9898078294283178, iteration: 60801
loss: 1.0309247970581055,grad_norm: 0.9999991711716334, iteration: 60802
loss: 0.9638208150863647,grad_norm: 0.9999991265640719, iteration: 60803
loss: 1.012338638305664,grad_norm: 0.9860187952026541, iteration: 60804
loss: 0.9881511330604553,grad_norm: 0.9999991524942458, iteration: 60805
loss: 1.0363881587982178,grad_norm: 0.9999999798669886, iteration: 60806
loss: 1.0048823356628418,grad_norm: 0.9999990986758899, iteration: 60807
loss: 0.966134250164032,grad_norm: 0.9999990980057338, iteration: 60808
loss: 1.0203633308410645,grad_norm: 0.9999996690321198, iteration: 60809
loss: 1.0206178426742554,grad_norm: 0.9999991529110538, iteration: 60810
loss: 1.0226633548736572,grad_norm: 0.9634843005102386, iteration: 60811
loss: 0.9727612137794495,grad_norm: 0.9556396675821989, iteration: 60812
loss: 1.030318021774292,grad_norm: 0.9999991908000905, iteration: 60813
loss: 1.0143837928771973,grad_norm: 0.9999991996491003, iteration: 60814
loss: 1.0298043489456177,grad_norm: 0.9999991697581366, iteration: 60815
loss: 1.0024338960647583,grad_norm: 0.9999992242644317, iteration: 60816
loss: 0.9976258873939514,grad_norm: 0.9999990562391108, iteration: 60817
loss: 1.0052157640457153,grad_norm: 0.999999166180947, iteration: 60818
loss: 1.0158450603485107,grad_norm: 0.9999991948316965, iteration: 60819
loss: 0.9933754801750183,grad_norm: 0.9999989156539477, iteration: 60820
loss: 1.1071571111679077,grad_norm: 0.9999997774349485, iteration: 60821
loss: 0.9789680242538452,grad_norm: 0.8620148445302249, iteration: 60822
loss: 1.0179108381271362,grad_norm: 0.9999990516801025, iteration: 60823
loss: 1.016404151916504,grad_norm: 0.8944194090640134, iteration: 60824
loss: 1.00067138671875,grad_norm: 0.9517015779841056, iteration: 60825
loss: 0.9691014289855957,grad_norm: 0.9999992712036399, iteration: 60826
loss: 1.0172855854034424,grad_norm: 0.9999991768092589, iteration: 60827
loss: 0.9948802590370178,grad_norm: 0.9999993722678554, iteration: 60828
loss: 0.9953927993774414,grad_norm: 0.9999990515140982, iteration: 60829
loss: 1.0177032947540283,grad_norm: 0.9999992524172562, iteration: 60830
loss: 1.0192599296569824,grad_norm: 0.9999992662656606, iteration: 60831
loss: 1.0095903873443604,grad_norm: 0.999999216967895, iteration: 60832
loss: 0.9858262538909912,grad_norm: 0.9999990090961973, iteration: 60833
loss: 1.0139716863632202,grad_norm: 0.8081176368173567, iteration: 60834
loss: 0.9638152718544006,grad_norm: 0.9999990147317401, iteration: 60835
loss: 0.9921241998672485,grad_norm: 0.999999054531564, iteration: 60836
loss: 0.9945847988128662,grad_norm: 0.8847283171515415, iteration: 60837
loss: 0.9804048538208008,grad_norm: 0.996769396869804, iteration: 60838
loss: 1.0000637769699097,grad_norm: 0.9066729349167031, iteration: 60839
loss: 1.0005598068237305,grad_norm: 0.9999991360065161, iteration: 60840
loss: 1.0227711200714111,grad_norm: 0.9830187804048337, iteration: 60841
loss: 1.001848816871643,grad_norm: 0.9999991240933287, iteration: 60842
loss: 1.0493593215942383,grad_norm: 0.9999993592521048, iteration: 60843
loss: 0.9977061748504639,grad_norm: 0.9999992952970173, iteration: 60844
loss: 0.986318826675415,grad_norm: 0.9999993282981543, iteration: 60845
loss: 0.9833794236183167,grad_norm: 0.9999991572873596, iteration: 60846
loss: 0.9799418449401855,grad_norm: 0.9999992474438376, iteration: 60847
loss: 0.9820505976676941,grad_norm: 0.9999991325279237, iteration: 60848
loss: 0.9571230411529541,grad_norm: 0.9999990838106292, iteration: 60849
loss: 0.9983701705932617,grad_norm: 0.8160464551508628, iteration: 60850
loss: 1.0009732246398926,grad_norm: 0.9922819765681015, iteration: 60851
loss: 0.9943038821220398,grad_norm: 0.9856110580835042, iteration: 60852
loss: 1.0133715867996216,grad_norm: 0.999999883236293, iteration: 60853
loss: 0.974865198135376,grad_norm: 0.999999198943086, iteration: 60854
loss: 1.0093557834625244,grad_norm: 0.9693495438332606, iteration: 60855
loss: 1.0216467380523682,grad_norm: 0.999998969441966, iteration: 60856
loss: 1.0551618337631226,grad_norm: 0.9999992802432487, iteration: 60857
loss: 1.0020898580551147,grad_norm: 0.9999992789169864, iteration: 60858
loss: 1.017190933227539,grad_norm: 0.9999991005208317, iteration: 60859
loss: 1.0238038301467896,grad_norm: 0.9999991458048756, iteration: 60860
loss: 0.9846916794776917,grad_norm: 0.9999991544051609, iteration: 60861
loss: 1.0151734352111816,grad_norm: 0.988888138245527, iteration: 60862
loss: 0.9925142526626587,grad_norm: 0.9999992173434593, iteration: 60863
loss: 0.9944272637367249,grad_norm: 0.9999989992982611, iteration: 60864
loss: 0.9974555969238281,grad_norm: 0.9999991278267012, iteration: 60865
loss: 1.0542933940887451,grad_norm: 0.9999997318746423, iteration: 60866
loss: 0.9740442633628845,grad_norm: 0.9999991444441361, iteration: 60867
loss: 1.0028172731399536,grad_norm: 0.9999991796817249, iteration: 60868
loss: 0.9778921604156494,grad_norm: 0.9999992177563836, iteration: 60869
loss: 1.005963921546936,grad_norm: 0.9999991430814925, iteration: 60870
loss: 1.0022658109664917,grad_norm: 0.9999997670917257, iteration: 60871
loss: 1.0138474702835083,grad_norm: 0.8894097766552564, iteration: 60872
loss: 1.0039043426513672,grad_norm: 0.9667615476855592, iteration: 60873
loss: 0.9817892909049988,grad_norm: 0.9446986871783705, iteration: 60874
loss: 1.0295906066894531,grad_norm: 0.9999995767978418, iteration: 60875
loss: 1.0120142698287964,grad_norm: 0.959899925706466, iteration: 60876
loss: 1.022450566291809,grad_norm: 0.9999991310336203, iteration: 60877
loss: 0.9774561524391174,grad_norm: 0.9799409466194952, iteration: 60878
loss: 1.0189003944396973,grad_norm: 0.9547166736901125, iteration: 60879
loss: 0.9978106617927551,grad_norm: 0.9345744794022711, iteration: 60880
loss: 1.0090965032577515,grad_norm: 0.9999991785617672, iteration: 60881
loss: 0.9674784541130066,grad_norm: 0.9999992045099871, iteration: 60882
loss: 1.0228320360183716,grad_norm: 0.8042655044159649, iteration: 60883
loss: 0.9768497347831726,grad_norm: 0.9999993135106345, iteration: 60884
loss: 0.9914186000823975,grad_norm: 0.999999208992148, iteration: 60885
loss: 0.9937580823898315,grad_norm: 0.9999992375463372, iteration: 60886
loss: 1.0001294612884521,grad_norm: 0.7489492594344913, iteration: 60887
loss: 0.9859959483146667,grad_norm: 0.9999992564039083, iteration: 60888
loss: 0.9767321348190308,grad_norm: 0.9988626786425072, iteration: 60889
loss: 1.0006020069122314,grad_norm: 0.927902684395607, iteration: 60890
loss: 1.0228352546691895,grad_norm: 0.9999990317827743, iteration: 60891
loss: 1.0010097026824951,grad_norm: 0.9975049905868567, iteration: 60892
loss: 0.9948003888130188,grad_norm: 0.9597568054335704, iteration: 60893
loss: 0.9755973219871521,grad_norm: 0.9999990220735324, iteration: 60894
loss: 1.0036357641220093,grad_norm: 0.8672058485922673, iteration: 60895
loss: 1.006751298904419,grad_norm: 0.9987127724689491, iteration: 60896
loss: 0.9876731634140015,grad_norm: 0.8063327391378402, iteration: 60897
loss: 1.0251131057739258,grad_norm: 0.9999990061624783, iteration: 60898
loss: 1.006443977355957,grad_norm: 0.9819119603093179, iteration: 60899
loss: 0.9884659051895142,grad_norm: 0.9999990739764393, iteration: 60900
loss: 0.9936723709106445,grad_norm: 0.9999992946965806, iteration: 60901
loss: 1.0033167600631714,grad_norm: 0.9999991043305729, iteration: 60902
loss: 1.0001510381698608,grad_norm: 0.9741611080174397, iteration: 60903
loss: 1.0867424011230469,grad_norm: 0.9999991086266371, iteration: 60904
loss: 0.9573156237602234,grad_norm: 0.9977933661176313, iteration: 60905
loss: 1.0147534608840942,grad_norm: 0.9999991461402885, iteration: 60906
loss: 1.0196621417999268,grad_norm: 0.9999992685193085, iteration: 60907
loss: 0.9996770620346069,grad_norm: 0.9999990257279779, iteration: 60908
loss: 0.9792450070381165,grad_norm: 0.999999127091265, iteration: 60909
loss: 1.0180704593658447,grad_norm: 0.9290320756384379, iteration: 60910
loss: 0.990546464920044,grad_norm: 0.9703953993984844, iteration: 60911
loss: 0.9774414300918579,grad_norm: 0.9999991762242885, iteration: 60912
loss: 0.9968001842498779,grad_norm: 0.9999992115495304, iteration: 60913
loss: 1.0275638103485107,grad_norm: 0.9999990851502244, iteration: 60914
loss: 1.0064597129821777,grad_norm: 0.9999994477155586, iteration: 60915
loss: 1.052926778793335,grad_norm: 0.9999992137296305, iteration: 60916
loss: 0.9801920652389526,grad_norm: 0.9999992299078163, iteration: 60917
loss: 0.9947165250778198,grad_norm: 0.9999993189876508, iteration: 60918
loss: 1.0248452425003052,grad_norm: 0.9999998434423182, iteration: 60919
loss: 0.9723165035247803,grad_norm: 0.9999991307845553, iteration: 60920
loss: 1.0124088525772095,grad_norm: 0.9999993186377216, iteration: 60921
loss: 1.0052738189697266,grad_norm: 0.999999128890238, iteration: 60922
loss: 1.0144041776657104,grad_norm: 0.9999991511159292, iteration: 60923
loss: 1.0066003799438477,grad_norm: 0.999998846934889, iteration: 60924
loss: 1.0052143335342407,grad_norm: 0.8815238734370728, iteration: 60925
loss: 0.9766503572463989,grad_norm: 0.8694290510247008, iteration: 60926
loss: 1.0064266920089722,grad_norm: 0.9999991506739228, iteration: 60927
loss: 0.9424998164176941,grad_norm: 0.999999209063331, iteration: 60928
loss: 0.9707089066505432,grad_norm: 0.9525451179090112, iteration: 60929
loss: 0.9976497888565063,grad_norm: 0.9921789467535035, iteration: 60930
loss: 1.0040968656539917,grad_norm: 0.9999990096837881, iteration: 60931
loss: 1.0068004131317139,grad_norm: 0.99999909064081, iteration: 60932
loss: 1.0104913711547852,grad_norm: 0.9999991705036396, iteration: 60933
loss: 1.0278891324996948,grad_norm: 0.9151613068403379, iteration: 60934
loss: 1.0090010166168213,grad_norm: 0.82508660779798, iteration: 60935
loss: 1.0180541276931763,grad_norm: 0.9999997622537335, iteration: 60936
loss: 1.007642149925232,grad_norm: 0.999265062538019, iteration: 60937
loss: 0.9822964072227478,grad_norm: 0.9999990254704136, iteration: 60938
loss: 0.9705656170845032,grad_norm: 0.9999991886433958, iteration: 60939
loss: 0.9990665912628174,grad_norm: 0.9999990672947742, iteration: 60940
loss: 1.197189211845398,grad_norm: 0.999999898346874, iteration: 60941
loss: 1.0199815034866333,grad_norm: 0.9999990677735758, iteration: 60942
loss: 1.0444526672363281,grad_norm: 0.9999992464875871, iteration: 60943
loss: 0.966576099395752,grad_norm: 0.8963721822704824, iteration: 60944
loss: 0.9822300672531128,grad_norm: 0.999999176820412, iteration: 60945
loss: 0.9888906478881836,grad_norm: 0.9788686781383681, iteration: 60946
loss: 1.0154051780700684,grad_norm: 0.9999991796182544, iteration: 60947
loss: 0.9793948531150818,grad_norm: 0.8357431890780551, iteration: 60948
loss: 0.9731820821762085,grad_norm: 0.9999992606346737, iteration: 60949
loss: 1.0307226181030273,grad_norm: 0.980419933007571, iteration: 60950
loss: 0.9736635088920593,grad_norm: 0.9807200193502854, iteration: 60951
loss: 1.0415323972702026,grad_norm: 0.9999991413825111, iteration: 60952
loss: 0.979386031627655,grad_norm: 0.8927811080368736, iteration: 60953
loss: 0.942952573299408,grad_norm: 0.9038335350940111, iteration: 60954
loss: 0.9910532832145691,grad_norm: 0.9999990461185835, iteration: 60955
loss: 0.9708213210105896,grad_norm: 0.8641737449422069, iteration: 60956
loss: 1.0708354711532593,grad_norm: 0.9999996715377142, iteration: 60957
loss: 0.9918303489685059,grad_norm: 0.9999998668462087, iteration: 60958
loss: 0.9709567427635193,grad_norm: 0.9999992941224504, iteration: 60959
loss: 0.9826810956001282,grad_norm: 0.9999993348408638, iteration: 60960
loss: 0.9844401478767395,grad_norm: 0.99999910958791, iteration: 60961
loss: 1.0531867742538452,grad_norm: 0.9999989936089068, iteration: 60962
loss: 0.9716157913208008,grad_norm: 0.9508357539499729, iteration: 60963
loss: 1.0010231733322144,grad_norm: 0.828590144568567, iteration: 60964
loss: 0.9968703985214233,grad_norm: 0.9999992340467162, iteration: 60965
loss: 1.0158745050430298,grad_norm: 0.9999990697656989, iteration: 60966
loss: 1.0053235292434692,grad_norm: 0.9999989959467825, iteration: 60967
loss: 0.982971727848053,grad_norm: 0.9788311797853366, iteration: 60968
loss: 0.98486328125,grad_norm: 0.9940917265470979, iteration: 60969
loss: 1.045620322227478,grad_norm: 0.9999996569339102, iteration: 60970
loss: 0.9814136624336243,grad_norm: 0.9999995352700553, iteration: 60971
loss: 1.0155202150344849,grad_norm: 0.8167030364219754, iteration: 60972
loss: 0.9721612930297852,grad_norm: 0.8341714876539893, iteration: 60973
loss: 1.0096814632415771,grad_norm: 0.9248867687224546, iteration: 60974
loss: 1.0173051357269287,grad_norm: 0.9999989827764962, iteration: 60975
loss: 0.9578529000282288,grad_norm: 0.9999991089287242, iteration: 60976
loss: 1.0119584798812866,grad_norm: 0.9999991254006192, iteration: 60977
loss: 0.9950710535049438,grad_norm: 0.7820501403728116, iteration: 60978
loss: 0.9615669846534729,grad_norm: 0.9999990805600804, iteration: 60979
loss: 0.9688634276390076,grad_norm: 0.999999119793176, iteration: 60980
loss: 1.007646083831787,grad_norm: 0.9999990010021421, iteration: 60981
loss: 0.9493300318717957,grad_norm: 0.919197130223162, iteration: 60982
loss: 1.0077918767929077,grad_norm: 0.9999988879616354, iteration: 60983
loss: 0.9979762434959412,grad_norm: 0.9999991952295818, iteration: 60984
loss: 1.0140818357467651,grad_norm: 0.9494852903587288, iteration: 60985
loss: 0.9787116050720215,grad_norm: 0.9809576133277376, iteration: 60986
loss: 0.9953803420066833,grad_norm: 0.9999991424984657, iteration: 60987
loss: 1.0201081037521362,grad_norm: 0.9999991836539823, iteration: 60988
loss: 0.9714954495429993,grad_norm: 0.8476643696641344, iteration: 60989
loss: 1.0202209949493408,grad_norm: 0.9999991116393497, iteration: 60990
loss: 0.9955369234085083,grad_norm: 0.999999237836886, iteration: 60991
loss: 0.9635822176933289,grad_norm: 0.9939438091096333, iteration: 60992
loss: 0.989871621131897,grad_norm: 0.9999992653216235, iteration: 60993
loss: 0.9713268876075745,grad_norm: 0.999999121059453, iteration: 60994
loss: 0.985016405582428,grad_norm: 0.9124867739356064, iteration: 60995
loss: 1.0205166339874268,grad_norm: 0.9999990761728416, iteration: 60996
loss: 0.9770819544792175,grad_norm: 0.9398495494692586, iteration: 60997
loss: 1.0610040426254272,grad_norm: 0.9999991323986103, iteration: 60998
loss: 0.9841076135635376,grad_norm: 0.8914650293950177, iteration: 60999
loss: 0.985069215297699,grad_norm: 0.9999991836805268, iteration: 61000
loss: 1.0236886739730835,grad_norm: 0.9352890835321097, iteration: 61001
loss: 1.0108916759490967,grad_norm: 0.9958197843242539, iteration: 61002
loss: 0.9958211779594421,grad_norm: 0.9999992601108766, iteration: 61003
loss: 0.9637824296951294,grad_norm: 0.9600590452908934, iteration: 61004
loss: 1.0054837465286255,grad_norm: 0.9739845800609181, iteration: 61005
loss: 0.9717020988464355,grad_norm: 0.9131401953255799, iteration: 61006
loss: 1.008184552192688,grad_norm: 0.9606380046965756, iteration: 61007
loss: 0.9899098873138428,grad_norm: 0.9999990698793584, iteration: 61008
loss: 1.0500421524047852,grad_norm: 0.9999999765636411, iteration: 61009
loss: 1.0201516151428223,grad_norm: 0.9920238643826421, iteration: 61010
loss: 1.018912434577942,grad_norm: 0.9999994618183685, iteration: 61011
loss: 0.960703432559967,grad_norm: 0.9999990924053861, iteration: 61012
loss: 1.0064632892608643,grad_norm: 0.999999069534632, iteration: 61013
loss: 1.0655946731567383,grad_norm: 1.0000000465441095, iteration: 61014
loss: 0.9760854840278625,grad_norm: 0.9999990364007962, iteration: 61015
loss: 0.9590083360671997,grad_norm: 0.999999118082124, iteration: 61016
loss: 0.9747071862220764,grad_norm: 0.9752287013787124, iteration: 61017
loss: 1.0198560953140259,grad_norm: 0.999999027827888, iteration: 61018
loss: 0.9953303337097168,grad_norm: 0.999999015458763, iteration: 61019
loss: 1.0031299591064453,grad_norm: 0.9999990945039514, iteration: 61020
loss: 0.9697265625,grad_norm: 0.9999990186427135, iteration: 61021
loss: 1.0603677034378052,grad_norm: 0.9999991163391896, iteration: 61022
loss: 1.0253130197525024,grad_norm: 0.9780802473711248, iteration: 61023
loss: 1.0226603746414185,grad_norm: 0.9910853383299507, iteration: 61024
loss: 0.9743837118148804,grad_norm: 0.9614962674145914, iteration: 61025
loss: 0.9736497402191162,grad_norm: 0.9999990699078016, iteration: 61026
loss: 1.0308926105499268,grad_norm: 0.9999991559023665, iteration: 61027
loss: 0.9989894032478333,grad_norm: 0.999999126517662, iteration: 61028
loss: 1.026802659034729,grad_norm: 0.999999196252909, iteration: 61029
loss: 0.9966758489608765,grad_norm: 0.901068493089717, iteration: 61030
loss: 0.9868724942207336,grad_norm: 0.9316740700344236, iteration: 61031
loss: 0.9884046316146851,grad_norm: 0.9999994318684511, iteration: 61032
loss: 0.9804291725158691,grad_norm: 0.9904066807368617, iteration: 61033
loss: 0.9973921179771423,grad_norm: 0.9999993074934915, iteration: 61034
loss: 1.0078974962234497,grad_norm: 0.9999994066887827, iteration: 61035
loss: 0.9807041883468628,grad_norm: 0.999999080112405, iteration: 61036
loss: 0.9975245594978333,grad_norm: 0.9999992732465599, iteration: 61037
loss: 1.0466430187225342,grad_norm: 0.9999998325352452, iteration: 61038
loss: 0.9791761636734009,grad_norm: 0.9999990076701258, iteration: 61039
loss: 0.9865115880966187,grad_norm: 0.9975346887572379, iteration: 61040
loss: 0.989490807056427,grad_norm: 0.9999992438192371, iteration: 61041
loss: 0.9911302328109741,grad_norm: 0.9999989982019885, iteration: 61042
loss: 1.0542455911636353,grad_norm: 0.999999015231736, iteration: 61043
loss: 1.006542682647705,grad_norm: 0.8551379286251006, iteration: 61044
loss: 0.9856101870536804,grad_norm: 0.999999196657067, iteration: 61045
loss: 0.9932761788368225,grad_norm: 0.9999992600375311, iteration: 61046
loss: 0.995195209980011,grad_norm: 0.9999994680620142, iteration: 61047
loss: 0.9876531362533569,grad_norm: 0.9999989805328269, iteration: 61048
loss: 1.0109517574310303,grad_norm: 0.9664941385199409, iteration: 61049
loss: 1.078987956047058,grad_norm: 0.9999994897063692, iteration: 61050
loss: 1.0348232984542847,grad_norm: 0.9999998166226413, iteration: 61051
loss: 1.01991605758667,grad_norm: 0.9793196157451303, iteration: 61052
loss: 1.015877366065979,grad_norm: 0.9999991874953285, iteration: 61053
loss: 0.9815028309822083,grad_norm: 0.9999992489332419, iteration: 61054
loss: 1.0545098781585693,grad_norm: 0.9999999337700036, iteration: 61055
loss: 1.074263572692871,grad_norm: 0.9999993538623656, iteration: 61056
loss: 1.0204436779022217,grad_norm: 0.9999997179111818, iteration: 61057
loss: 1.0555226802825928,grad_norm: 0.9999993894592757, iteration: 61058
loss: 1.0486997365951538,grad_norm: 0.8956949630897065, iteration: 61059
loss: 1.0515964031219482,grad_norm: 0.9999999231255305, iteration: 61060
loss: 0.9875696301460266,grad_norm: 0.9294671060405537, iteration: 61061
loss: 1.0088589191436768,grad_norm: 0.9999989438500968, iteration: 61062
loss: 0.9938079118728638,grad_norm: 0.9999993848787243, iteration: 61063
loss: 0.9968827366828918,grad_norm: 0.9999989733287953, iteration: 61064
loss: 1.008614420890808,grad_norm: 0.9593752065363141, iteration: 61065
loss: 0.9894649386405945,grad_norm: 0.9655603405427435, iteration: 61066
loss: 0.9783974885940552,grad_norm: 0.8713464193170055, iteration: 61067
loss: 0.9905057549476624,grad_norm: 0.9296667561827767, iteration: 61068
loss: 1.0488344430923462,grad_norm: 0.99999973700639, iteration: 61069
loss: 1.0123792886734009,grad_norm: 0.9999992464858328, iteration: 61070
loss: 1.0128942728042603,grad_norm: 0.9999991645861576, iteration: 61071
loss: 1.0080959796905518,grad_norm: 0.9999990708153049, iteration: 61072
loss: 1.0313102006912231,grad_norm: 0.9999992240405141, iteration: 61073
loss: 1.0023257732391357,grad_norm: 0.9574655642709331, iteration: 61074
loss: 0.9847962260246277,grad_norm: 0.9999990996048452, iteration: 61075
loss: 1.0056260824203491,grad_norm: 0.9071467082637817, iteration: 61076
loss: 1.0093785524368286,grad_norm: 0.9999989615788591, iteration: 61077
loss: 1.0197423696517944,grad_norm: 0.999999287493423, iteration: 61078
loss: 1.0210895538330078,grad_norm: 0.9999990894447224, iteration: 61079
loss: 0.9881707429885864,grad_norm: 0.7886993312169812, iteration: 61080
loss: 0.9775027632713318,grad_norm: 0.9195892260222198, iteration: 61081
loss: 0.9832206964492798,grad_norm: 0.9999990799949758, iteration: 61082
loss: 0.9910997152328491,grad_norm: 0.867076816430734, iteration: 61083
loss: 0.9771475791931152,grad_norm: 0.9999990721237919, iteration: 61084
loss: 0.9852753281593323,grad_norm: 0.9487895469273321, iteration: 61085
loss: 1.005342960357666,grad_norm: 0.9741838990092405, iteration: 61086
loss: 1.012668251991272,grad_norm: 0.9999990271760292, iteration: 61087
loss: 0.9742759466171265,grad_norm: 0.9221489527551971, iteration: 61088
loss: 1.0031424760818481,grad_norm: 0.9416895309922643, iteration: 61089
loss: 1.0247424840927124,grad_norm: 0.9999995365146523, iteration: 61090
loss: 1.012770652770996,grad_norm: 0.9999991019767284, iteration: 61091
loss: 0.9908497333526611,grad_norm: 0.9999990481383425, iteration: 61092
loss: 1.0334070920944214,grad_norm: 0.9999990792500657, iteration: 61093
loss: 0.999777615070343,grad_norm: 0.9999995537304545, iteration: 61094
loss: 1.0187513828277588,grad_norm: 0.999999173365196, iteration: 61095
loss: 0.9674195647239685,grad_norm: 0.9999990138771883, iteration: 61096
loss: 1.0101755857467651,grad_norm: 0.9999991411765146, iteration: 61097
loss: 0.9941788911819458,grad_norm: 0.9999991552505211, iteration: 61098
loss: 0.9961531162261963,grad_norm: 0.8728223375070118, iteration: 61099
loss: 1.0155704021453857,grad_norm: 0.9081330928773075, iteration: 61100
loss: 0.9915803074836731,grad_norm: 0.9790053539354703, iteration: 61101
loss: 1.0050405263900757,grad_norm: 0.9999993427129832, iteration: 61102
loss: 0.9967320561408997,grad_norm: 0.9999993390608021, iteration: 61103
loss: 0.9827421307563782,grad_norm: 0.900010893201044, iteration: 61104
loss: 0.9957934617996216,grad_norm: 0.9397498240064521, iteration: 61105
loss: 0.9883009791374207,grad_norm: 0.9999993409381865, iteration: 61106
loss: 1.0018519163131714,grad_norm: 0.9950600887680927, iteration: 61107
loss: 1.0163167715072632,grad_norm: 0.9999990872762446, iteration: 61108
loss: 0.9940964579582214,grad_norm: 0.971139738716481, iteration: 61109
loss: 0.9996423125267029,grad_norm: 0.9999990949345671, iteration: 61110
loss: 0.9979166984558105,grad_norm: 0.9999991228709956, iteration: 61111
loss: 1.030422329902649,grad_norm: 0.9999991867804693, iteration: 61112
loss: 0.9868732690811157,grad_norm: 0.9999991785176832, iteration: 61113
loss: 0.9902893900871277,grad_norm: 0.858765447221389, iteration: 61114
loss: 1.004205346107483,grad_norm: 0.9999990094372891, iteration: 61115
loss: 1.0137964487075806,grad_norm: 0.9999989298519913, iteration: 61116
loss: 0.9840654134750366,grad_norm: 0.9999990917557638, iteration: 61117
loss: 1.001391887664795,grad_norm: 0.9999998590311574, iteration: 61118
loss: 1.0158262252807617,grad_norm: 0.9999991715180284, iteration: 61119
loss: 0.9776783585548401,grad_norm: 0.8502123603267904, iteration: 61120
loss: 0.9747939705848694,grad_norm: 0.9999992360415432, iteration: 61121
loss: 0.9785518646240234,grad_norm: 0.9999990145842258, iteration: 61122
loss: 0.973023533821106,grad_norm: 0.9964857167833568, iteration: 61123
loss: 1.0276414155960083,grad_norm: 0.9627636665270688, iteration: 61124
loss: 1.0201886892318726,grad_norm: 0.938815693761826, iteration: 61125
loss: 0.9712478518486023,grad_norm: 0.9648799336022011, iteration: 61126
loss: 1.0189725160598755,grad_norm: 0.983873231720486, iteration: 61127
loss: 1.0159868001937866,grad_norm: 0.9999993760200999, iteration: 61128
loss: 1.017476201057434,grad_norm: 0.9999990887562441, iteration: 61129
loss: 0.9636359214782715,grad_norm: 0.9861280589031527, iteration: 61130
loss: 1.0556981563568115,grad_norm: 0.99999925098869, iteration: 61131
loss: 1.00040864944458,grad_norm: 0.9999992667897999, iteration: 61132
loss: 0.9720426201820374,grad_norm: 0.9436423743015471, iteration: 61133
loss: 0.9997305870056152,grad_norm: 0.9999990782074842, iteration: 61134
loss: 1.028355360031128,grad_norm: 0.9999996706533152, iteration: 61135
loss: 1.022521734237671,grad_norm: 0.8904195700263918, iteration: 61136
loss: 0.9946707487106323,grad_norm: 0.9999990822082071, iteration: 61137
loss: 1.059929370880127,grad_norm: 0.9999996170371525, iteration: 61138
loss: 1.0360790491104126,grad_norm: 0.8739533666672012, iteration: 61139
loss: 1.0185925960540771,grad_norm: 0.9999990953471654, iteration: 61140
loss: 1.0295391082763672,grad_norm: 0.8999993997579326, iteration: 61141
loss: 1.028889536857605,grad_norm: 0.9827997337544161, iteration: 61142
loss: 1.0199270248413086,grad_norm: 0.9999990030474973, iteration: 61143
loss: 1.0034340620040894,grad_norm: 0.9999991897611707, iteration: 61144
loss: 0.9890522360801697,grad_norm: 0.8712190452653223, iteration: 61145
loss: 1.0193690061569214,grad_norm: 0.9751265699388604, iteration: 61146
loss: 0.9924066066741943,grad_norm: 0.9999991973545119, iteration: 61147
loss: 0.9855336546897888,grad_norm: 0.9999990727395265, iteration: 61148
loss: 1.0398365259170532,grad_norm: 0.9999999404608679, iteration: 61149
loss: 0.992638349533081,grad_norm: 0.9999991929331219, iteration: 61150
loss: 1.0327894687652588,grad_norm: 0.9999993869369503, iteration: 61151
loss: 1.026911735534668,grad_norm: 0.999999136266895, iteration: 61152
loss: 1.003069519996643,grad_norm: 0.9999991043180991, iteration: 61153
loss: 1.0015360116958618,grad_norm: 0.9999990540948918, iteration: 61154
loss: 1.00284743309021,grad_norm: 0.935213576919962, iteration: 61155
loss: 0.9964814782142639,grad_norm: 0.9768203131726092, iteration: 61156
loss: 1.0257169008255005,grad_norm: 0.9187823578576702, iteration: 61157
loss: 1.0246175527572632,grad_norm: 0.9999991560227135, iteration: 61158
loss: 0.9943585991859436,grad_norm: 0.9999990306138002, iteration: 61159
loss: 0.9986785650253296,grad_norm: 0.9669089506477742, iteration: 61160
loss: 0.975248396396637,grad_norm: 0.9999992606692096, iteration: 61161
loss: 0.9780606031417847,grad_norm: 0.8399002892872556, iteration: 61162
loss: 1.0031733512878418,grad_norm: 0.9815480468463996, iteration: 61163
loss: 1.0273866653442383,grad_norm: 0.9999990918606598, iteration: 61164
loss: 1.0126566886901855,grad_norm: 0.9268012103307516, iteration: 61165
loss: 1.009528636932373,grad_norm: 0.9610349742731839, iteration: 61166
loss: 1.0147076845169067,grad_norm: 0.9999994275351761, iteration: 61167
loss: 0.9976577162742615,grad_norm: 0.999999067788152, iteration: 61168
loss: 0.9592685699462891,grad_norm: 0.9021675494434361, iteration: 61169
loss: 1.0126748085021973,grad_norm: 0.9999989589164991, iteration: 61170
loss: 0.977888822555542,grad_norm: 0.9999992872959087, iteration: 61171
loss: 0.9887043237686157,grad_norm: 0.9569118589952881, iteration: 61172
loss: 1.0194892883300781,grad_norm: 0.999999156179242, iteration: 61173
loss: 1.0364307165145874,grad_norm: 0.9999995886132869, iteration: 61174
loss: 1.0114678144454956,grad_norm: 0.999999472368335, iteration: 61175
loss: 0.9904900193214417,grad_norm: 0.9221876682856435, iteration: 61176
loss: 1.006364345550537,grad_norm: 0.9244121202073317, iteration: 61177
loss: 0.9960499405860901,grad_norm: 0.9999992630268519, iteration: 61178
loss: 0.9990557432174683,grad_norm: 0.9296754324565899, iteration: 61179
loss: 1.0360915660858154,grad_norm: 0.9999990952079111, iteration: 61180
loss: 1.0235737562179565,grad_norm: 0.9999992435799043, iteration: 61181
loss: 1.0097312927246094,grad_norm: 0.9999992736169335, iteration: 61182
loss: 1.0384424924850464,grad_norm: 0.9999992926594736, iteration: 61183
loss: 1.0038087368011475,grad_norm: 0.9999991731681912, iteration: 61184
loss: 0.993765652179718,grad_norm: 0.985581829448247, iteration: 61185
loss: 1.0337766408920288,grad_norm: 0.9999991800021136, iteration: 61186
loss: 1.0039315223693848,grad_norm: 0.8015427033553889, iteration: 61187
loss: 1.0042630434036255,grad_norm: 0.9999990927930686, iteration: 61188
loss: 0.9934980273246765,grad_norm: 0.9312462430484827, iteration: 61189
loss: 0.9845180511474609,grad_norm: 0.9716436300132286, iteration: 61190
loss: 1.0026400089263916,grad_norm: 0.9064678541351702, iteration: 61191
loss: 0.9914049506187439,grad_norm: 0.9906490040332673, iteration: 61192
loss: 1.0268194675445557,grad_norm: 0.9999998779203447, iteration: 61193
loss: 0.9939769506454468,grad_norm: 0.8390500326349474, iteration: 61194
loss: 0.9743301272392273,grad_norm: 0.9999991648411923, iteration: 61195
loss: 1.0325924158096313,grad_norm: 0.9999992080814613, iteration: 61196
loss: 1.0026124715805054,grad_norm: 0.9999991148840519, iteration: 61197
loss: 0.9989483952522278,grad_norm: 0.9999996087905867, iteration: 61198
loss: 1.0001718997955322,grad_norm: 0.9999990654478348, iteration: 61199
loss: 1.0079056024551392,grad_norm: 0.9920487159836628, iteration: 61200
loss: 0.9925166368484497,grad_norm: 0.9999990091895228, iteration: 61201
loss: 1.011030912399292,grad_norm: 0.8547945237712484, iteration: 61202
loss: 1.0240124464035034,grad_norm: 0.9999991496730312, iteration: 61203
loss: 0.9901312589645386,grad_norm: 0.9999991427222464, iteration: 61204
loss: 1.041237235069275,grad_norm: 0.999999203396957, iteration: 61205
loss: 0.9721940755844116,grad_norm: 0.9999992699385151, iteration: 61206
loss: 0.9960863590240479,grad_norm: 0.960820417248525, iteration: 61207
loss: 1.0203778743743896,grad_norm: 0.9999992733433518, iteration: 61208
loss: 1.0116868019104004,grad_norm: 0.8582362781738647, iteration: 61209
loss: 0.9910857081413269,grad_norm: 0.9159470850638621, iteration: 61210
loss: 1.0093201398849487,grad_norm: 0.9999991714425389, iteration: 61211
loss: 1.0081337690353394,grad_norm: 0.9129143186290484, iteration: 61212
loss: 0.99566650390625,grad_norm: 0.9308581201938774, iteration: 61213
loss: 0.9658618569374084,grad_norm: 0.9282590688239024, iteration: 61214
loss: 1.0008587837219238,grad_norm: 0.9999991807107824, iteration: 61215
loss: 1.0106655359268188,grad_norm: 0.9999991449238484, iteration: 61216
loss: 0.9675366878509521,grad_norm: 0.9999992195476057, iteration: 61217
loss: 1.0079972743988037,grad_norm: 0.9999991534759177, iteration: 61218
loss: 1.0172064304351807,grad_norm: 0.9999991439936637, iteration: 61219
loss: 0.9778314828872681,grad_norm: 0.9408067324211268, iteration: 61220
loss: 0.99739009141922,grad_norm: 0.9195374863259008, iteration: 61221
loss: 1.044211745262146,grad_norm: 0.9390448495800269, iteration: 61222
loss: 0.9983869791030884,grad_norm: 0.924829203424663, iteration: 61223
loss: 0.9703921675682068,grad_norm: 0.9573524194785011, iteration: 61224
loss: 1.017137885093689,grad_norm: 0.9999990465203519, iteration: 61225
loss: 1.0067384243011475,grad_norm: 0.9999991726147995, iteration: 61226
loss: 1.0117825269699097,grad_norm: 0.9597099243324646, iteration: 61227
loss: 1.0000648498535156,grad_norm: 0.9800690478471364, iteration: 61228
loss: 1.0062106847763062,grad_norm: 0.9952721305076538, iteration: 61229
loss: 0.9856908321380615,grad_norm: 0.9999992685699393, iteration: 61230
loss: 0.9784106016159058,grad_norm: 0.9999990762984082, iteration: 61231
loss: 1.0078117847442627,grad_norm: 0.9999990934332611, iteration: 61232
loss: 1.0260087251663208,grad_norm: 0.9999991690346375, iteration: 61233
loss: 1.0391710996627808,grad_norm: 0.9999991298418338, iteration: 61234
loss: 0.9757209420204163,grad_norm: 0.9999992981466116, iteration: 61235
loss: 1.0145022869110107,grad_norm: 0.9999992056309441, iteration: 61236
loss: 0.9904000759124756,grad_norm: 0.9703773224436378, iteration: 61237
loss: 1.0441491603851318,grad_norm: 0.9952161205085365, iteration: 61238
loss: 0.9928252100944519,grad_norm: 0.9804184659916768, iteration: 61239
loss: 0.9818731546401978,grad_norm: 0.9596202589707141, iteration: 61240
loss: 0.9882220029830933,grad_norm: 0.9999990603550877, iteration: 61241
loss: 0.9996626377105713,grad_norm: 0.9999995037994379, iteration: 61242
loss: 1.0036885738372803,grad_norm: 0.898452165999597, iteration: 61243
loss: 0.9753652215003967,grad_norm: 0.9999990552833135, iteration: 61244
loss: 0.9693537354469299,grad_norm: 0.9999991563633333, iteration: 61245
loss: 1.0358270406723022,grad_norm: 0.9999990643352025, iteration: 61246
loss: 1.000663161277771,grad_norm: 0.9999999653385943, iteration: 61247
loss: 1.0313917398452759,grad_norm: 0.9999995847822918, iteration: 61248
loss: 1.0131468772888184,grad_norm: 0.9999990826288887, iteration: 61249
loss: 0.9827974438667297,grad_norm: 0.9362474040729473, iteration: 61250
loss: 1.0204086303710938,grad_norm: 0.9999990519206782, iteration: 61251
loss: 1.025646686553955,grad_norm: 0.999999067848783, iteration: 61252
loss: 1.0016435384750366,grad_norm: 0.9512349123320701, iteration: 61253
loss: 0.9893671274185181,grad_norm: 0.9999990869214136, iteration: 61254
loss: 1.0118930339813232,grad_norm: 0.9999990977794301, iteration: 61255
loss: 0.9666209816932678,grad_norm: 0.9253265244454609, iteration: 61256
loss: 0.9904729127883911,grad_norm: 0.9999993715359468, iteration: 61257
loss: 1.030648112297058,grad_norm: 0.9999991311703632, iteration: 61258
loss: 0.9681627750396729,grad_norm: 0.9920583393571913, iteration: 61259
loss: 0.9753699898719788,grad_norm: 0.9580498078452642, iteration: 61260
loss: 1.004367709159851,grad_norm: 0.9999991909018786, iteration: 61261
loss: 1.003892421722412,grad_norm: 0.8844084791737011, iteration: 61262
loss: 0.9878342151641846,grad_norm: 0.9999993406851544, iteration: 61263
loss: 1.0835100412368774,grad_norm: 0.9999998361028313, iteration: 61264
loss: 0.9835878014564514,grad_norm: 0.9999993873033698, iteration: 61265
loss: 1.0400798320770264,grad_norm: 0.9999994752354404, iteration: 61266
loss: 1.0056012868881226,grad_norm: 0.999999083500523, iteration: 61267
loss: 1.0217020511627197,grad_norm: 0.9842860262926567, iteration: 61268
loss: 1.0209002494812012,grad_norm: 0.9999991184777993, iteration: 61269
loss: 1.0259548425674438,grad_norm: 0.9774415138630893, iteration: 61270
loss: 0.9950496554374695,grad_norm: 0.9999992218761999, iteration: 61271
loss: 0.9830496907234192,grad_norm: 0.999999184621747, iteration: 61272
loss: 0.9876761436462402,grad_norm: 0.9520209727159286, iteration: 61273
loss: 1.007847547531128,grad_norm: 0.9521166438307622, iteration: 61274
loss: 0.9964370131492615,grad_norm: 0.9999991624770236, iteration: 61275
loss: 1.0205472707748413,grad_norm: 0.8371072060668331, iteration: 61276
loss: 1.0257493257522583,grad_norm: 0.993409315390838, iteration: 61277
loss: 1.0240037441253662,grad_norm: 0.9999999281166861, iteration: 61278
loss: 0.997946560382843,grad_norm: 0.9999991725299151, iteration: 61279
loss: 0.9994511008262634,grad_norm: 0.9797233022571936, iteration: 61280
loss: 1.0270966291427612,grad_norm: 0.9999991301555261, iteration: 61281
loss: 0.999973714351654,grad_norm: 0.9549960236040606, iteration: 61282
loss: 1.0165653228759766,grad_norm: 0.9896488915574586, iteration: 61283
loss: 0.9609578847885132,grad_norm: 0.9999991400504283, iteration: 61284
loss: 0.9853651523590088,grad_norm: 0.9999998806678798, iteration: 61285
loss: 1.0221108198165894,grad_norm: 0.9999990533250726, iteration: 61286
loss: 1.004438042640686,grad_norm: 0.8426569457505878, iteration: 61287
loss: 1.0207512378692627,grad_norm: 0.999999038676159, iteration: 61288
loss: 1.0233855247497559,grad_norm: 0.9967588237130918, iteration: 61289
loss: 1.0198493003845215,grad_norm: 0.9999992544437631, iteration: 61290
loss: 0.9987500309944153,grad_norm: 0.9999991358827972, iteration: 61291
loss: 1.0319474935531616,grad_norm: 0.9830483986442579, iteration: 61292
loss: 1.0157817602157593,grad_norm: 0.9999998583636396, iteration: 61293
loss: 1.0206811428070068,grad_norm: 0.9999996644645526, iteration: 61294
loss: 1.0171592235565186,grad_norm: 0.9999989612003336, iteration: 61295
loss: 1.0013941526412964,grad_norm: 0.9999991285661302, iteration: 61296
loss: 0.9864828586578369,grad_norm: 0.9999993851146859, iteration: 61297
loss: 1.0052050352096558,grad_norm: 0.9999988991731568, iteration: 61298
loss: 0.9999744892120361,grad_norm: 0.9569345090738184, iteration: 61299
loss: 0.9869861602783203,grad_norm: 0.9723737304427581, iteration: 61300
loss: 0.9862223863601685,grad_norm: 0.9613537090505674, iteration: 61301
loss: 0.9833254814147949,grad_norm: 0.9999991200967853, iteration: 61302
loss: 0.9676173329353333,grad_norm: 0.9999990466394222, iteration: 61303
loss: 0.995259702205658,grad_norm: 0.892144245267463, iteration: 61304
loss: 1.295751690864563,grad_norm: 0.9999997832269225, iteration: 61305
loss: 0.9862200021743774,grad_norm: 0.9701957046184719, iteration: 61306
loss: 1.005760908126831,grad_norm: 0.9999994263093968, iteration: 61307
loss: 0.9929397702217102,grad_norm: 0.9999990939747848, iteration: 61308
loss: 1.0440353155136108,grad_norm: 0.8228147026547192, iteration: 61309
loss: 0.9929158091545105,grad_norm: 0.9999992816499071, iteration: 61310
loss: 0.9927959442138672,grad_norm: 0.8279884215168892, iteration: 61311
loss: 0.9763520359992981,grad_norm: 0.9999991457623658, iteration: 61312
loss: 1.0567336082458496,grad_norm: 0.9999991337258076, iteration: 61313
loss: 0.9708929657936096,grad_norm: 0.8563730536768347, iteration: 61314
loss: 1.0091044902801514,grad_norm: 0.9999992299545614, iteration: 61315
loss: 1.0093637704849243,grad_norm: 0.9999992418293988, iteration: 61316
loss: 1.0036176443099976,grad_norm: 0.8516082775261038, iteration: 61317
loss: 1.0527609586715698,grad_norm: 0.9999994505277332, iteration: 61318
loss: 1.0247623920440674,grad_norm: 0.9999997962210949, iteration: 61319
loss: 0.9718939065933228,grad_norm: 0.9887580691563329, iteration: 61320
loss: 1.0278419256210327,grad_norm: 0.9999991577729836, iteration: 61321
loss: 0.9642273187637329,grad_norm: 0.9919883793910602, iteration: 61322
loss: 0.9855601787567139,grad_norm: 0.9999992627277299, iteration: 61323
loss: 1.0676664113998413,grad_norm: 0.999999109343081, iteration: 61324
loss: 1.0055365562438965,grad_norm: 0.8534173335960836, iteration: 61325
loss: 0.9875301718711853,grad_norm: 0.9999991455507484, iteration: 61326
loss: 1.0437695980072021,grad_norm: 0.9999990698266911, iteration: 61327
loss: 0.9927058219909668,grad_norm: 0.9999990707000098, iteration: 61328
loss: 0.9665943384170532,grad_norm: 0.8357912224565093, iteration: 61329
loss: 0.9878309369087219,grad_norm: 0.9999990966899724, iteration: 61330
loss: 0.9317761063575745,grad_norm: 0.9999990153412691, iteration: 61331
loss: 1.0016523599624634,grad_norm: 0.9999991131053793, iteration: 61332
loss: 1.0346955060958862,grad_norm: 0.9999992568039432, iteration: 61333
loss: 0.999553382396698,grad_norm: 0.8979657992161412, iteration: 61334
loss: 1.0477170944213867,grad_norm: 0.8815975578267661, iteration: 61335
loss: 0.9904583096504211,grad_norm: 0.9999992782893317, iteration: 61336
loss: 1.0222808122634888,grad_norm: 0.9999991077202967, iteration: 61337
loss: 1.0177407264709473,grad_norm: 0.9999992041028165, iteration: 61338
loss: 1.0190725326538086,grad_norm: 0.9999993085195483, iteration: 61339
loss: 1.0088006258010864,grad_norm: 0.934963254318223, iteration: 61340
loss: 0.9940662384033203,grad_norm: 0.9315790664361515, iteration: 61341
loss: 0.9260846972465515,grad_norm: 0.9999990973092966, iteration: 61342
loss: 1.0071032047271729,grad_norm: 0.9999990643500368, iteration: 61343
loss: 1.0140979290008545,grad_norm: 0.9999990701321083, iteration: 61344
loss: 0.971631646156311,grad_norm: 0.9999992233672818, iteration: 61345
loss: 0.9894335269927979,grad_norm: 0.9999992225488512, iteration: 61346
loss: 1.0085595846176147,grad_norm: 0.8954983857477454, iteration: 61347
loss: 0.9845049977302551,grad_norm: 0.9523041453434478, iteration: 61348
loss: 0.959981381893158,grad_norm: 0.9999989475934066, iteration: 61349
loss: 0.983858048915863,grad_norm: 0.9712763821464967, iteration: 61350
loss: 1.0095012187957764,grad_norm: 0.9999995980691974, iteration: 61351
loss: 0.9845510125160217,grad_norm: 0.9999991557051168, iteration: 61352
loss: 0.9984090328216553,grad_norm: 0.9999990510626345, iteration: 61353
loss: 0.9983296394348145,grad_norm: 0.9999991612340996, iteration: 61354
loss: 0.9580019116401672,grad_norm: 0.9999991555891892, iteration: 61355
loss: 0.9723153710365295,grad_norm: 0.9999992105855903, iteration: 61356
loss: 1.031178593635559,grad_norm: 0.9999989964608408, iteration: 61357
loss: 1.0048699378967285,grad_norm: 0.8993698791850581, iteration: 61358
loss: 0.9840502738952637,grad_norm: 0.9929666263398275, iteration: 61359
loss: 0.9459629654884338,grad_norm: 0.9999990332420028, iteration: 61360
loss: 1.004360556602478,grad_norm: 0.9362634361785847, iteration: 61361
loss: 0.9867761135101318,grad_norm: 0.9999991217288393, iteration: 61362
loss: 1.001731514930725,grad_norm: 0.979195182520827, iteration: 61363
loss: 0.9661332964897156,grad_norm: 0.9342688490102437, iteration: 61364
loss: 1.0197373628616333,grad_norm: 0.9014911719831921, iteration: 61365
loss: 1.0007187128067017,grad_norm: 0.8507836589278862, iteration: 61366
loss: 0.9920567274093628,grad_norm: 0.9999991291467836, iteration: 61367
loss: 1.0208790302276611,grad_norm: 0.9999991950697243, iteration: 61368
loss: 1.0093345642089844,grad_norm: 0.9999990548528069, iteration: 61369
loss: 0.9799106121063232,grad_norm: 0.9999992295812071, iteration: 61370
loss: 1.0105518102645874,grad_norm: 0.9999990579951543, iteration: 61371
loss: 1.0264242887496948,grad_norm: 0.8906447577678304, iteration: 61372
loss: 1.0641756057739258,grad_norm: 0.9593794681275938, iteration: 61373
loss: 0.9927266240119934,grad_norm: 0.9999990412553643, iteration: 61374
loss: 0.9816383719444275,grad_norm: 0.9909115757512955, iteration: 61375
loss: 0.9628652930259705,grad_norm: 0.8665087132265002, iteration: 61376
loss: 0.9938513040542603,grad_norm: 0.8670544304793738, iteration: 61377
loss: 0.9928566813468933,grad_norm: 0.9066845677536405, iteration: 61378
loss: 0.9705753922462463,grad_norm: 0.8840913863753312, iteration: 61379
loss: 0.9515014290809631,grad_norm: 0.9805702981118192, iteration: 61380
loss: 1.0377984046936035,grad_norm: 0.9999990943218777, iteration: 61381
loss: 0.9893593788146973,grad_norm: 0.9999991835025388, iteration: 61382
loss: 0.9646149277687073,grad_norm: 0.970785604261777, iteration: 61383
loss: 1.0438876152038574,grad_norm: 0.9999994585132747, iteration: 61384
loss: 0.9861516952514648,grad_norm: 0.948085959752253, iteration: 61385
loss: 1.0061177015304565,grad_norm: 0.9710951373124296, iteration: 61386
loss: 0.969306230545044,grad_norm: 0.9999992579096008, iteration: 61387
loss: 1.00723135471344,grad_norm: 0.9999990891204066, iteration: 61388
loss: 0.9870923757553101,grad_norm: 0.9946863307328088, iteration: 61389
loss: 1.0441304445266724,grad_norm: 0.903522339672505, iteration: 61390
loss: 0.98285311460495,grad_norm: 0.9600156938636089, iteration: 61391
loss: 1.0109941959381104,grad_norm: 0.9999992108132298, iteration: 61392
loss: 0.953497588634491,grad_norm: 0.9999989378323542, iteration: 61393
loss: 0.9651780724525452,grad_norm: 0.9999992520767869, iteration: 61394
loss: 1.0585318803787231,grad_norm: 0.9999997027498984, iteration: 61395
loss: 1.0357062816619873,grad_norm: 0.891296158070723, iteration: 61396
loss: 1.0099670886993408,grad_norm: 0.9560196628125748, iteration: 61397
loss: 0.9634193181991577,grad_norm: 0.9999990658816459, iteration: 61398
loss: 0.998943030834198,grad_norm: 0.9999990864238838, iteration: 61399
loss: 1.0197275876998901,grad_norm: 0.9332313325311127, iteration: 61400
loss: 1.0249940156936646,grad_norm: 0.9999992525740036, iteration: 61401
loss: 1.0073657035827637,grad_norm: 0.803299251089838, iteration: 61402
loss: 1.0122829675674438,grad_norm: 0.8961821406812356, iteration: 61403
loss: 0.9937669634819031,grad_norm: 0.9892145067074627, iteration: 61404
loss: 0.9919509291648865,grad_norm: 0.9999992130307045, iteration: 61405
loss: 0.99985671043396,grad_norm: 0.9581686888077448, iteration: 61406
loss: 0.9885119199752808,grad_norm: 0.984570184561273, iteration: 61407
loss: 0.9985777735710144,grad_norm: 0.9999995254835051, iteration: 61408
loss: 1.0440607070922852,grad_norm: 0.9999993042450424, iteration: 61409
loss: 1.0017740726470947,grad_norm: 0.9999991584777409, iteration: 61410
loss: 0.9975487589836121,grad_norm: 0.9999992768622362, iteration: 61411
loss: 1.0006139278411865,grad_norm: 0.9234229788367841, iteration: 61412
loss: 0.9979562163352966,grad_norm: 0.8549635325858278, iteration: 61413
loss: 0.9564593434333801,grad_norm: 0.9999992203368382, iteration: 61414
loss: 1.0059010982513428,grad_norm: 0.9999992058201869, iteration: 61415
loss: 1.0218604803085327,grad_norm: 0.9999992440597706, iteration: 61416
loss: 0.9909428954124451,grad_norm: 0.9999991318320695, iteration: 61417
loss: 0.9995205998420715,grad_norm: 0.9999991669170373, iteration: 61418
loss: 0.9991386532783508,grad_norm: 0.9999993213745957, iteration: 61419
loss: 0.9765085577964783,grad_norm: 0.9999990699408225, iteration: 61420
loss: 1.019107699394226,grad_norm: 0.9999991435348466, iteration: 61421
loss: 0.9976608157157898,grad_norm: 0.9625465894239695, iteration: 61422
loss: 0.9679914712905884,grad_norm: 0.993199196598996, iteration: 61423
loss: 0.9697147011756897,grad_norm: 0.9999990163744273, iteration: 61424
loss: 0.9675060510635376,grad_norm: 0.9764402059788099, iteration: 61425
loss: 1.0272260904312134,grad_norm: 0.9181769540687662, iteration: 61426
loss: 0.9657953381538391,grad_norm: 0.955413076142063, iteration: 61427
loss: 1.0294591188430786,grad_norm: 0.9999993582212708, iteration: 61428
loss: 1.0102863311767578,grad_norm: 0.8660640185632119, iteration: 61429
loss: 1.0128225088119507,grad_norm: 0.9940495860691382, iteration: 61430
loss: 1.023698329925537,grad_norm: 0.9999990118038382, iteration: 61431
loss: 0.9854020476341248,grad_norm: 0.9999991735322032, iteration: 61432
loss: 1.0237810611724854,grad_norm: 0.982605005713028, iteration: 61433
loss: 1.0156437158584595,grad_norm: 0.9999991181865177, iteration: 61434
loss: 1.0096720457077026,grad_norm: 0.9999996908451559, iteration: 61435
loss: 1.038577675819397,grad_norm: 0.9999992220625673, iteration: 61436
loss: 1.0740982294082642,grad_norm: 0.9999992844793085, iteration: 61437
loss: 1.0315771102905273,grad_norm: 0.9999993236869218, iteration: 61438
loss: 1.0030558109283447,grad_norm: 0.9889482377970746, iteration: 61439
loss: 0.9874326586723328,grad_norm: 0.9866256231121483, iteration: 61440
loss: 0.9939784407615662,grad_norm: 0.8928804560120144, iteration: 61441
loss: 0.9929646253585815,grad_norm: 0.8980874033394297, iteration: 61442
loss: 1.001940131187439,grad_norm: 0.9999992703292561, iteration: 61443
loss: 1.0197339057922363,grad_norm: 0.9294641492531452, iteration: 61444
loss: 1.0050052404403687,grad_norm: 0.9999990135566953, iteration: 61445
loss: 1.0065960884094238,grad_norm: 0.9999990449402257, iteration: 61446
loss: 1.0613197088241577,grad_norm: 0.9769648683933471, iteration: 61447
loss: 0.9911555051803589,grad_norm: 0.9904012950124417, iteration: 61448
loss: 0.9675689935684204,grad_norm: 0.9999990254585136, iteration: 61449
loss: 1.010575771331787,grad_norm: 0.9999992079158775, iteration: 61450
loss: 1.0540742874145508,grad_norm: 0.9999994208460341, iteration: 61451
loss: 1.0359563827514648,grad_norm: 0.9999990505673934, iteration: 61452
loss: 0.9624828100204468,grad_norm: 0.9999991064824372, iteration: 61453
loss: 1.0343602895736694,grad_norm: 0.9514734686188924, iteration: 61454
loss: 0.9746975898742676,grad_norm: 0.9999990442329472, iteration: 61455
loss: 1.0343724489212036,grad_norm: 0.9999990282731043, iteration: 61456
loss: 1.0192139148712158,grad_norm: 0.9999991539724461, iteration: 61457
loss: 1.0454297065734863,grad_norm: 0.9999993390796408, iteration: 61458
loss: 0.9964820742607117,grad_norm: 0.9999990914699463, iteration: 61459
loss: 0.9668043255805969,grad_norm: 0.8901533671004948, iteration: 61460
loss: 0.9895104169845581,grad_norm: 0.9896339694442643, iteration: 61461
loss: 1.0259578227996826,grad_norm: 0.9719685060302736, iteration: 61462
loss: 0.9827246069908142,grad_norm: 0.9999989626335045, iteration: 61463
loss: 1.002350926399231,grad_norm: 0.9614185941530363, iteration: 61464
loss: 0.9794328212738037,grad_norm: 0.9999997088472995, iteration: 61465
loss: 0.9936237335205078,grad_norm: 0.99999916158817, iteration: 61466
loss: 0.9864948987960815,grad_norm: 0.9266933863254796, iteration: 61467
loss: 0.975162148475647,grad_norm: 0.7820001768934982, iteration: 61468
loss: 0.9904398322105408,grad_norm: 0.9999992247806401, iteration: 61469
loss: 1.0132895708084106,grad_norm: 0.9807548656876474, iteration: 61470
loss: 0.9871788620948792,grad_norm: 0.9999990351266452, iteration: 61471
loss: 1.023841381072998,grad_norm: 0.9999992288937709, iteration: 61472
loss: 0.9906458258628845,grad_norm: 0.9608616550877535, iteration: 61473
loss: 0.9972957968711853,grad_norm: 0.9016619762826029, iteration: 61474
loss: 1.0114959478378296,grad_norm: 0.9821154134558104, iteration: 61475
loss: 0.9969564080238342,grad_norm: 0.9999989421948794, iteration: 61476
loss: 0.9923474192619324,grad_norm: 0.908374613774744, iteration: 61477
loss: 1.0131889581680298,grad_norm: 0.9934782739942809, iteration: 61478
loss: 0.9957713484764099,grad_norm: 0.9265571418665917, iteration: 61479
loss: 1.036191463470459,grad_norm: 0.8660402961568157, iteration: 61480
loss: 1.0069166421890259,grad_norm: 0.9999992203287769, iteration: 61481
loss: 0.9931182861328125,grad_norm: 0.9999991500808164, iteration: 61482
loss: 0.9950145483016968,grad_norm: 0.9999990332028942, iteration: 61483
loss: 1.0166397094726562,grad_norm: 0.9999991892191626, iteration: 61484
loss: 0.9585981369018555,grad_norm: 0.9971475054961229, iteration: 61485
loss: 1.0041851997375488,grad_norm: 0.9999991759547738, iteration: 61486
loss: 0.9934072494506836,grad_norm: 0.9999991255118386, iteration: 61487
loss: 0.9887944459915161,grad_norm: 0.9999992309224642, iteration: 61488
loss: 0.9560856819152832,grad_norm: 0.9837953594126566, iteration: 61489
loss: 1.0016913414001465,grad_norm: 0.9999990811029161, iteration: 61490
loss: 1.0419923067092896,grad_norm: 0.8865270729891778, iteration: 61491
loss: 0.9743577241897583,grad_norm: 0.9999991018687298, iteration: 61492
loss: 0.9826470613479614,grad_norm: 0.9999990204652939, iteration: 61493
loss: 1.0029546022415161,grad_norm: 0.999999108739549, iteration: 61494
loss: 1.011919379234314,grad_norm: 0.9785429728677743, iteration: 61495
loss: 1.020201563835144,grad_norm: 0.999999172166782, iteration: 61496
loss: 1.013348937034607,grad_norm: 0.8526864681487136, iteration: 61497
loss: 1.0210660696029663,grad_norm: 0.9999991710866913, iteration: 61498
loss: 1.0023671388626099,grad_norm: 0.9999991900879609, iteration: 61499
loss: 1.0175706148147583,grad_norm: 0.8748986151244043, iteration: 61500
loss: 1.0231293439865112,grad_norm: 0.9999991651504239, iteration: 61501
loss: 0.9960196614265442,grad_norm: 0.9999990461221128, iteration: 61502
loss: 0.9692667722702026,grad_norm: 0.9711919100721215, iteration: 61503
loss: 1.0706555843353271,grad_norm: 0.9999997954615387, iteration: 61504
loss: 1.0028624534606934,grad_norm: 0.9999990528175245, iteration: 61505
loss: 0.9856505990028381,grad_norm: 0.9999989484560595, iteration: 61506
loss: 0.9959437251091003,grad_norm: 0.8626974285932384, iteration: 61507
loss: 0.9858182072639465,grad_norm: 0.9999990596972637, iteration: 61508
loss: 0.9968888163566589,grad_norm: 0.9999990733196147, iteration: 61509
loss: 1.0198447704315186,grad_norm: 0.9999993965481833, iteration: 61510
loss: 0.9405257105827332,grad_norm: 0.9999990829949045, iteration: 61511
loss: 1.0212676525115967,grad_norm: 0.8963691352599492, iteration: 61512
loss: 0.9884682893753052,grad_norm: 0.9999990153853082, iteration: 61513
loss: 1.0411831140518188,grad_norm: 0.9667125133116474, iteration: 61514
loss: 1.015544056892395,grad_norm: 0.9134398164624109, iteration: 61515
loss: 0.9733014106750488,grad_norm: 0.9999992251097601, iteration: 61516
loss: 0.9710789918899536,grad_norm: 0.9683207494998282, iteration: 61517
loss: 1.011207938194275,grad_norm: 0.9999990741832537, iteration: 61518
loss: 1.0191521644592285,grad_norm: 0.8716803682086435, iteration: 61519
loss: 1.0198315382003784,grad_norm: 0.9683770266109472, iteration: 61520
loss: 1.0035182237625122,grad_norm: 0.9432941330303358, iteration: 61521
loss: 1.0430887937545776,grad_norm: 0.9234567137487198, iteration: 61522
loss: 1.0060596466064453,grad_norm: 0.9999991261588642, iteration: 61523
loss: 0.9869173765182495,grad_norm: 0.9999991658419317, iteration: 61524
loss: 1.0309128761291504,grad_norm: 0.9999342764287288, iteration: 61525
loss: 0.9676745533943176,grad_norm: 0.9232265287992972, iteration: 61526
loss: 1.0044190883636475,grad_norm: 0.9999990029151682, iteration: 61527
loss: 0.9791819453239441,grad_norm: 0.8871209079000955, iteration: 61528
loss: 0.9835604429244995,grad_norm: 0.9999990502842943, iteration: 61529
loss: 0.9871702194213867,grad_norm: 0.9999990334248122, iteration: 61530
loss: 0.9884012341499329,grad_norm: 0.9746260952496271, iteration: 61531
loss: 0.9795728325843811,grad_norm: 0.9827725320244571, iteration: 61532
loss: 0.9740692377090454,grad_norm: 0.9792711856050021, iteration: 61533
loss: 1.0269427299499512,grad_norm: 0.9999991328326143, iteration: 61534
loss: 1.0290167331695557,grad_norm: 0.9999990383243035, iteration: 61535
loss: 0.9982602000236511,grad_norm: 0.9977680218342965, iteration: 61536
loss: 0.9768214821815491,grad_norm: 0.9430664331576216, iteration: 61537
loss: 1.0006853342056274,grad_norm: 0.9999991413173132, iteration: 61538
loss: 1.0121338367462158,grad_norm: 0.8632915147000975, iteration: 61539
loss: 0.9937413334846497,grad_norm: 0.8978899190374418, iteration: 61540
loss: 0.9956474900245667,grad_norm: 0.9137710652053711, iteration: 61541
loss: 1.0077669620513916,grad_norm: 0.9246942498652205, iteration: 61542
loss: 0.98135906457901,grad_norm: 0.8626934179863941, iteration: 61543
loss: 1.0255483388900757,grad_norm: 0.999999571932195, iteration: 61544
loss: 0.9765975475311279,grad_norm: 0.8955392001295015, iteration: 61545
loss: 1.0005866289138794,grad_norm: 0.9999990235651852, iteration: 61546
loss: 0.9873794913291931,grad_norm: 0.8172224617077262, iteration: 61547
loss: 1.0324519872665405,grad_norm: 0.9999992139033098, iteration: 61548
loss: 1.028229832649231,grad_norm: 0.9999991378788601, iteration: 61549
loss: 1.0154038667678833,grad_norm: 0.9999989851204109, iteration: 61550
loss: 0.9814901947975159,grad_norm: 0.9622254781793452, iteration: 61551
loss: 0.9976855516433716,grad_norm: 0.9858374717654037, iteration: 61552
loss: 1.013407826423645,grad_norm: 0.9999992572149221, iteration: 61553
loss: 1.0303136110305786,grad_norm: 0.9999993969503796, iteration: 61554
loss: 0.9951027035713196,grad_norm: 0.9999992143330461, iteration: 61555
loss: 1.0099059343338013,grad_norm: 0.9999990955354114, iteration: 61556
loss: 0.9938527345657349,grad_norm: 0.978679571773357, iteration: 61557
loss: 0.9964874982833862,grad_norm: 0.9867552966057818, iteration: 61558
loss: 0.9948833584785461,grad_norm: 0.9999992269394128, iteration: 61559
loss: 0.9824925661087036,grad_norm: 0.9050685945002844, iteration: 61560
loss: 1.0094481706619263,grad_norm: 0.9987162141968958, iteration: 61561
loss: 0.9718310236930847,grad_norm: 0.9999990945857263, iteration: 61562
loss: 0.986545205116272,grad_norm: 0.9999990727031803, iteration: 61563
loss: 1.0039721727371216,grad_norm: 0.999999554048209, iteration: 61564
loss: 0.975385308265686,grad_norm: 0.9438702628883667, iteration: 61565
loss: 1.0126913785934448,grad_norm: 0.999999267462872, iteration: 61566
loss: 0.9947347640991211,grad_norm: 0.9999990155839448, iteration: 61567
loss: 0.9823799729347229,grad_norm: 0.9999990398304224, iteration: 61568
loss: 1.0050017833709717,grad_norm: 0.9606926227443355, iteration: 61569
loss: 1.0182957649230957,grad_norm: 0.9634379778244286, iteration: 61570
loss: 0.9982127547264099,grad_norm: 0.9999991077233877, iteration: 61571
loss: 0.9813741445541382,grad_norm: 0.9151190249111822, iteration: 61572
loss: 1.0253678560256958,grad_norm: 0.9999990721912863, iteration: 61573
loss: 0.9699255228042603,grad_norm: 0.99999886064815, iteration: 61574
loss: 0.9728440642356873,grad_norm: 0.999999118107915, iteration: 61575
loss: 1.0085837841033936,grad_norm: 0.9396340801252999, iteration: 61576
loss: 1.0376063585281372,grad_norm: 0.999999026913494, iteration: 61577
loss: 0.9605016112327576,grad_norm: 0.9940893810741515, iteration: 61578
loss: 0.984210193157196,grad_norm: 0.9999991608699533, iteration: 61579
loss: 0.9853858351707458,grad_norm: 0.8940568413823371, iteration: 61580
loss: 0.9919495582580566,grad_norm: 0.8282086866286409, iteration: 61581
loss: 1.04560387134552,grad_norm: 0.9999992524892918, iteration: 61582
loss: 1.0264761447906494,grad_norm: 0.9999992502856722, iteration: 61583
loss: 1.0110270977020264,grad_norm: 0.8841608434696412, iteration: 61584
loss: 0.9566203355789185,grad_norm: 0.9137705076040415, iteration: 61585
loss: 0.9798641204833984,grad_norm: 0.951786591505962, iteration: 61586
loss: 1.006993055343628,grad_norm: 0.9999990934206381, iteration: 61587
loss: 1.042319416999817,grad_norm: 0.9999991355531678, iteration: 61588
loss: 1.0304535627365112,grad_norm: 0.9693712428142389, iteration: 61589
loss: 1.0111407041549683,grad_norm: 0.9999989564788602, iteration: 61590
loss: 1.0358563661575317,grad_norm: 0.99263491323523, iteration: 61591
loss: 1.0160579681396484,grad_norm: 0.986174317760926, iteration: 61592
loss: 1.0037051439285278,grad_norm: 0.9999990681444234, iteration: 61593
loss: 0.9842478036880493,grad_norm: 0.9999990954604568, iteration: 61594
loss: 0.9796622395515442,grad_norm: 0.8995580597147881, iteration: 61595
loss: 1.0202052593231201,grad_norm: 0.9999991826638465, iteration: 61596
loss: 1.0024144649505615,grad_norm: 0.9986838091842525, iteration: 61597
loss: 0.9697480797767639,grad_norm: 0.9999990710327639, iteration: 61598
loss: 1.0145175457000732,grad_norm: 0.9999991577048238, iteration: 61599
loss: 0.9793568849563599,grad_norm: 0.9999991154086133, iteration: 61600
loss: 0.9642152190208435,grad_norm: 0.9999991596494535, iteration: 61601
loss: 1.0155178308486938,grad_norm: 0.9999991782221934, iteration: 61602
loss: 0.9904304146766663,grad_norm: 0.9999990860389412, iteration: 61603
loss: 1.028799295425415,grad_norm: 0.8987462803129697, iteration: 61604
loss: 1.0208333730697632,grad_norm: 0.9999995987088303, iteration: 61605
loss: 1.0196547508239746,grad_norm: 0.8689408343734083, iteration: 61606
loss: 0.9572529196739197,grad_norm: 0.9999991377525131, iteration: 61607
loss: 1.0108206272125244,grad_norm: 0.9999990865257772, iteration: 61608
loss: 1.0091726779937744,grad_norm: 0.9999990343713062, iteration: 61609
loss: 1.0289498567581177,grad_norm: 0.9999991165102065, iteration: 61610
loss: 1.0181668996810913,grad_norm: 0.9999990546065372, iteration: 61611
loss: 1.029675841331482,grad_norm: 0.9999991844745778, iteration: 61612
loss: 1.0164817571640015,grad_norm: 0.9644609873845513, iteration: 61613
loss: 0.9948301911354065,grad_norm: 0.9218315176064102, iteration: 61614
loss: 1.018563985824585,grad_norm: 0.9485456474146747, iteration: 61615
loss: 0.9663450717926025,grad_norm: 0.9999991909659348, iteration: 61616
loss: 1.026668906211853,grad_norm: 0.9999992019589317, iteration: 61617
loss: 1.0186177492141724,grad_norm: 0.9999992083443341, iteration: 61618
loss: 1.005002737045288,grad_norm: 0.99999901948612, iteration: 61619
loss: 1.0080991983413696,grad_norm: 0.8355812357241557, iteration: 61620
loss: 1.0442180633544922,grad_norm: 0.9999990702593051, iteration: 61621
loss: 1.0191785097122192,grad_norm: 0.9892943395411916, iteration: 61622
loss: 0.9420678615570068,grad_norm: 0.9274128038626429, iteration: 61623
loss: 1.0497251749038696,grad_norm: 0.9999991277342978, iteration: 61624
loss: 1.0069639682769775,grad_norm: 0.9999990475460243, iteration: 61625
loss: 0.9899153709411621,grad_norm: 0.9318515263749204, iteration: 61626
loss: 1.0193501710891724,grad_norm: 0.999999045671503, iteration: 61627
loss: 1.0024317502975464,grad_norm: 0.9999991295169767, iteration: 61628
loss: 0.9989790916442871,grad_norm: 0.9607503255669709, iteration: 61629
loss: 0.9782863259315491,grad_norm: 0.9999990660283504, iteration: 61630
loss: 1.0297982692718506,grad_norm: 0.9999990544097788, iteration: 61631
loss: 0.9765259623527527,grad_norm: 0.947326894962758, iteration: 61632
loss: 0.9936004281044006,grad_norm: 0.9579926719504924, iteration: 61633
loss: 0.9699108600616455,grad_norm: 0.9999989785702766, iteration: 61634
loss: 0.9785829782485962,grad_norm: 0.999999157374132, iteration: 61635
loss: 0.9935425519943237,grad_norm: 0.9999991378603168, iteration: 61636
loss: 0.9601731300354004,grad_norm: 0.907444635148838, iteration: 61637
loss: 0.9918193221092224,grad_norm: 0.9787064384669808, iteration: 61638
loss: 0.9493703246116638,grad_norm: 0.9999990344659345, iteration: 61639
loss: 0.9770513772964478,grad_norm: 0.9926818460567897, iteration: 61640
loss: 1.0085275173187256,grad_norm: 0.9999991249042068, iteration: 61641
loss: 1.0129308700561523,grad_norm: 0.999999145850438, iteration: 61642
loss: 1.0062665939331055,grad_norm: 0.9999992865715485, iteration: 61643
loss: 0.9913581013679504,grad_norm: 0.9999990905608659, iteration: 61644
loss: 1.0437591075897217,grad_norm: 0.8593751522204746, iteration: 61645
loss: 1.0017129182815552,grad_norm: 0.99999903971676, iteration: 61646
loss: 1.000568151473999,grad_norm: 0.9257665802281313, iteration: 61647
loss: 0.9868202805519104,grad_norm: 0.9999989811943388, iteration: 61648
loss: 0.9873525500297546,grad_norm: 0.999999172633877, iteration: 61649
loss: 0.9779718518257141,grad_norm: 0.8348846774926026, iteration: 61650
loss: 0.9989144802093506,grad_norm: 0.9476611899858907, iteration: 61651
loss: 0.9864346981048584,grad_norm: 0.8741403872202385, iteration: 61652
loss: 1.0880837440490723,grad_norm: 0.9999997909872741, iteration: 61653
loss: 0.9607107639312744,grad_norm: 0.9999990025929792, iteration: 61654
loss: 0.9710846543312073,grad_norm: 0.8809989340934751, iteration: 61655
loss: 1.031774640083313,grad_norm: 0.9999990677836083, iteration: 61656
loss: 0.9773697257041931,grad_norm: 0.8706975510587771, iteration: 61657
loss: 1.0192735195159912,grad_norm: 0.9918735238017913, iteration: 61658
loss: 1.0371294021606445,grad_norm: 0.9999992798332067, iteration: 61659
loss: 1.0229507684707642,grad_norm: 0.9263793999936868, iteration: 61660
loss: 0.9993181228637695,grad_norm: 0.9584604259520593, iteration: 61661
loss: 0.975051999092102,grad_norm: 0.9788353963495448, iteration: 61662
loss: 0.9968699812889099,grad_norm: 0.9141187111579628, iteration: 61663
loss: 1.0311534404754639,grad_norm: 0.887752536311787, iteration: 61664
loss: 0.996829092502594,grad_norm: 0.9879103660111154, iteration: 61665
loss: 0.9858736395835876,grad_norm: 0.9999990874050543, iteration: 61666
loss: 1.0060182809829712,grad_norm: 0.9191981031789027, iteration: 61667
loss: 0.97956782579422,grad_norm: 0.9999990169104485, iteration: 61668
loss: 0.9850850701332092,grad_norm: 0.8532866790930792, iteration: 61669
loss: 0.9752689599990845,grad_norm: 0.9999994028353539, iteration: 61670
loss: 1.0026133060455322,grad_norm: 0.9999993035820869, iteration: 61671
loss: 0.9936776161193848,grad_norm: 0.9999991023363187, iteration: 61672
loss: 1.0172237157821655,grad_norm: 0.8873615854907216, iteration: 61673
loss: 1.0410963296890259,grad_norm: 0.910619609580573, iteration: 61674
loss: 0.994242787361145,grad_norm: 0.9999990318542483, iteration: 61675
loss: 1.0147918462753296,grad_norm: 0.8712419061621114, iteration: 61676
loss: 1.024483561515808,grad_norm: 0.9999990236229589, iteration: 61677
loss: 1.0120748281478882,grad_norm: 0.9758580546981538, iteration: 61678
loss: 1.0107427835464478,grad_norm: 0.9999990782536431, iteration: 61679
loss: 1.0119774341583252,grad_norm: 0.9999991708278831, iteration: 61680
loss: 1.0968711376190186,grad_norm: 0.9999994578207453, iteration: 61681
loss: 1.021901249885559,grad_norm: 0.999999164537602, iteration: 61682
loss: 0.9724488854408264,grad_norm: 0.9519044426520613, iteration: 61683
loss: 0.9785886406898499,grad_norm: 0.9999990089302653, iteration: 61684
loss: 0.9954839944839478,grad_norm: 0.9999992140607118, iteration: 61685
loss: 0.9841774702072144,grad_norm: 0.9594450589683848, iteration: 61686
loss: 1.01140296459198,grad_norm: 0.892909005384405, iteration: 61687
loss: 0.9935141801834106,grad_norm: 0.9999990168945907, iteration: 61688
loss: 1.0295138359069824,grad_norm: 0.8797070646119366, iteration: 61689
loss: 0.9995636940002441,grad_norm: 0.9999991693935701, iteration: 61690
loss: 0.9764040112495422,grad_norm: 0.9989261233780025, iteration: 61691
loss: 0.9976491332054138,grad_norm: 0.9999989940117543, iteration: 61692
loss: 0.9970735907554626,grad_norm: 0.8528469663260468, iteration: 61693
loss: 1.0255199670791626,grad_norm: 0.9563815471785223, iteration: 61694
loss: 0.9931959509849548,grad_norm: 0.999999252383934, iteration: 61695
loss: 1.0095384120941162,grad_norm: 0.9999990574523067, iteration: 61696
loss: 1.0044150352478027,grad_norm: 0.9999990238226564, iteration: 61697
loss: 1.028519868850708,grad_norm: 0.9999990771048315, iteration: 61698
loss: 0.9837623238563538,grad_norm: 0.9669656575065003, iteration: 61699
loss: 1.0094380378723145,grad_norm: 0.9594977423677181, iteration: 61700
loss: 0.9950913190841675,grad_norm: 0.9584567988995617, iteration: 61701
loss: 1.0273083448410034,grad_norm: 0.9999991387940637, iteration: 61702
loss: 0.9894372820854187,grad_norm: 0.9999989730382124, iteration: 61703
loss: 0.984491229057312,grad_norm: 0.9999991596797287, iteration: 61704
loss: 1.0040496587753296,grad_norm: 0.9994567313513121, iteration: 61705
loss: 0.9965938329696655,grad_norm: 0.9999989992561392, iteration: 61706
loss: 1.0200920104980469,grad_norm: 0.8613002358662691, iteration: 61707
loss: 0.9797970652580261,grad_norm: 0.92353148161791, iteration: 61708
loss: 1.0193299055099487,grad_norm: 0.9999993710070683, iteration: 61709
loss: 1.0216619968414307,grad_norm: 0.9999991804622334, iteration: 61710
loss: 0.9656421542167664,grad_norm: 0.768193938416249, iteration: 61711
loss: 1.0056757926940918,grad_norm: 0.8020056091454343, iteration: 61712
loss: 1.0411447286605835,grad_norm: 0.8412130311041811, iteration: 61713
loss: 1.0083601474761963,grad_norm: 0.9999989269580397, iteration: 61714
loss: 0.9972535967826843,grad_norm: 0.9834975437633336, iteration: 61715
loss: 0.998077392578125,grad_norm: 0.9370064756141416, iteration: 61716
loss: 0.9793952703475952,grad_norm: 0.9999990864977985, iteration: 61717
loss: 1.0132615566253662,grad_norm: 0.9815747894924752, iteration: 61718
loss: 0.9933108687400818,grad_norm: 0.9999990448590445, iteration: 61719
loss: 1.0108997821807861,grad_norm: 0.9999992192888243, iteration: 61720
loss: 1.035660743713379,grad_norm: 0.9999989330390546, iteration: 61721
loss: 1.009506344795227,grad_norm: 0.9999990879035624, iteration: 61722
loss: 1.0059692859649658,grad_norm: 0.9082847709344675, iteration: 61723
loss: 1.0032750368118286,grad_norm: 0.9999991690494031, iteration: 61724
loss: 0.9955834150314331,grad_norm: 0.8647630509886424, iteration: 61725
loss: 0.989924430847168,grad_norm: 0.8624901695191055, iteration: 61726
loss: 1.0272636413574219,grad_norm: 0.9999990680112387, iteration: 61727
loss: 0.9845896363258362,grad_norm: 0.9999989457987745, iteration: 61728
loss: 0.9712292551994324,grad_norm: 0.9825351110260679, iteration: 61729
loss: 0.9779999852180481,grad_norm: 0.999999259633103, iteration: 61730
loss: 1.0177255868911743,grad_norm: 0.934356769428957, iteration: 61731
loss: 1.0114672183990479,grad_norm: 0.9999991853598368, iteration: 61732
loss: 1.0403002500534058,grad_norm: 0.9705292129113373, iteration: 61733
loss: 1.019911766052246,grad_norm: 0.9881719583641382, iteration: 61734
loss: 1.0480681657791138,grad_norm: 0.9328669136642235, iteration: 61735
loss: 0.9794683456420898,grad_norm: 0.9999992633140702, iteration: 61736
loss: 0.9882695078849792,grad_norm: 0.8925587633034314, iteration: 61737
loss: 1.0291894674301147,grad_norm: 0.9582128990056223, iteration: 61738
loss: 0.985220730304718,grad_norm: 0.8890705192605552, iteration: 61739
loss: 0.9977155327796936,grad_norm: 0.8606907463140064, iteration: 61740
loss: 1.0230447053909302,grad_norm: 0.9787662690003447, iteration: 61741
loss: 1.001404881477356,grad_norm: 0.9999990810714845, iteration: 61742
loss: 0.9713355302810669,grad_norm: 0.9758686543168569, iteration: 61743
loss: 1.0054258108139038,grad_norm: 0.9999991181113644, iteration: 61744
loss: 1.0274291038513184,grad_norm: 0.9999992175838924, iteration: 61745
loss: 0.9632160663604736,grad_norm: 0.9999991237105023, iteration: 61746
loss: 1.0135467052459717,grad_norm: 0.8672591250311734, iteration: 61747
loss: 1.0258811712265015,grad_norm: 0.9999991382796624, iteration: 61748
loss: 1.0010197162628174,grad_norm: 0.961716503756699, iteration: 61749
loss: 0.989050030708313,grad_norm: 0.9999990758407723, iteration: 61750
loss: 0.9867308735847473,grad_norm: 0.999998932450599, iteration: 61751
loss: 1.0073362588882446,grad_norm: 0.9999991273974439, iteration: 61752
loss: 1.0328940153121948,grad_norm: 0.9999991709472312, iteration: 61753
loss: 1.0178256034851074,grad_norm: 0.9999989700367957, iteration: 61754
loss: 0.9947189092636108,grad_norm: 0.9243819540047086, iteration: 61755
loss: 0.9957599639892578,grad_norm: 0.9079058174704252, iteration: 61756
loss: 1.0053356885910034,grad_norm: 0.9999990361849554, iteration: 61757
loss: 0.987607479095459,grad_norm: 0.9999991886328213, iteration: 61758
loss: 1.0285612344741821,grad_norm: 0.9589172463709065, iteration: 61759
loss: 1.0236340761184692,grad_norm: 0.9186926271779249, iteration: 61760
loss: 0.9946770668029785,grad_norm: 0.9999991983946596, iteration: 61761
loss: 1.0178484916687012,grad_norm: 0.9999991167656217, iteration: 61762
loss: 0.9706740379333496,grad_norm: 0.9999991998912544, iteration: 61763
loss: 1.0215818881988525,grad_norm: 0.9639020946761407, iteration: 61764
loss: 0.9813567399978638,grad_norm: 0.999999069974279, iteration: 61765
loss: 1.0470309257507324,grad_norm: 0.9974257038833024, iteration: 61766
loss: 0.97606360912323,grad_norm: 0.9999990564546861, iteration: 61767
loss: 1.0057988166809082,grad_norm: 0.9134863923418494, iteration: 61768
loss: 1.0081232786178589,grad_norm: 0.9999992076704843, iteration: 61769
loss: 1.0177010297775269,grad_norm: 0.9937076676219723, iteration: 61770
loss: 0.9830781817436218,grad_norm: 0.9999989155290013, iteration: 61771
loss: 0.9868619441986084,grad_norm: 0.9999990446464946, iteration: 61772
loss: 0.9920632243156433,grad_norm: 0.9996040355257265, iteration: 61773
loss: 0.9664116501808167,grad_norm: 0.9999991050204914, iteration: 61774
loss: 1.018295168876648,grad_norm: 0.8887932535911816, iteration: 61775
loss: 1.0079996585845947,grad_norm: 0.9999992011576637, iteration: 61776
loss: 0.9660829901695251,grad_norm: 0.987107151859738, iteration: 61777
loss: 1.0584192276000977,grad_norm: 0.9999996401915439, iteration: 61778
loss: 1.0025527477264404,grad_norm: 0.9999990812610755, iteration: 61779
loss: 1.039296269416809,grad_norm: 0.9845354913046508, iteration: 61780
loss: 1.0053858757019043,grad_norm: 0.9440221610447288, iteration: 61781
loss: 1.0416837930679321,grad_norm: 0.9999991209299145, iteration: 61782
loss: 0.9834786653518677,grad_norm: 0.9194872799997665, iteration: 61783
loss: 1.0053608417510986,grad_norm: 0.9999990240040192, iteration: 61784
loss: 1.0002243518829346,grad_norm: 0.9999992996167595, iteration: 61785
loss: 0.9517236351966858,grad_norm: 0.9999991566133016, iteration: 61786
loss: 1.020519495010376,grad_norm: 0.9999989667485263, iteration: 61787
loss: 0.9649337530136108,grad_norm: 0.9150851003124102, iteration: 61788
loss: 0.9722997546195984,grad_norm: 0.9925699288336298, iteration: 61789
loss: 1.0458271503448486,grad_norm: 0.9999990983385262, iteration: 61790
loss: 0.9762638211250305,grad_norm: 0.9999990923418284, iteration: 61791
loss: 1.011083722114563,grad_norm: 0.9999989478869779, iteration: 61792
loss: 0.9501497149467468,grad_norm: 0.9999990651967553, iteration: 61793
loss: 0.9947735071182251,grad_norm: 0.9999999412862784, iteration: 61794
loss: 0.9643451571464539,grad_norm: 0.9418180224752944, iteration: 61795
loss: 0.9469811320304871,grad_norm: 0.9999991772839, iteration: 61796
loss: 1.008049488067627,grad_norm: 0.8427525227328273, iteration: 61797
loss: 0.9848578572273254,grad_norm: 0.9994194738193665, iteration: 61798
loss: 0.967537522315979,grad_norm: 0.999999009258288, iteration: 61799
loss: 0.9698530435562134,grad_norm: 0.9999992462407432, iteration: 61800
loss: 1.0273361206054688,grad_norm: 0.9999994476573718, iteration: 61801
loss: 1.0362428426742554,grad_norm: 0.9999991923113751, iteration: 61802
loss: 0.9809197783470154,grad_norm: 0.940924574441996, iteration: 61803
loss: 0.9501569867134094,grad_norm: 0.8960963682848443, iteration: 61804
loss: 0.950140655040741,grad_norm: 0.9999992619951021, iteration: 61805
loss: 1.012832522392273,grad_norm: 0.9262860586537298, iteration: 61806
loss: 0.9822782874107361,grad_norm: 0.9999991033724478, iteration: 61807
loss: 1.0240767002105713,grad_norm: 0.9501410198379607, iteration: 61808
loss: 1.010789155960083,grad_norm: 0.9999990649287257, iteration: 61809
loss: 0.985749363899231,grad_norm: 0.9999991848741999, iteration: 61810
loss: 1.012316346168518,grad_norm: 0.9999992688783723, iteration: 61811
loss: 1.022230863571167,grad_norm: 0.9094841158934129, iteration: 61812
loss: 1.0018001794815063,grad_norm: 0.9545597925139533, iteration: 61813
loss: 1.0063639879226685,grad_norm: 0.9873941982264939, iteration: 61814
loss: 0.9920911192893982,grad_norm: 0.9435570289555448, iteration: 61815
loss: 0.9937329292297363,grad_norm: 0.9999990460782102, iteration: 61816
loss: 1.0112289190292358,grad_norm: 0.9999990733384724, iteration: 61817
loss: 0.9723274111747742,grad_norm: 0.9942675298109458, iteration: 61818
loss: 0.9991612434387207,grad_norm: 0.9999991295884716, iteration: 61819
loss: 1.0191586017608643,grad_norm: 0.9999993557678228, iteration: 61820
loss: 1.019003987312317,grad_norm: 0.9999990612928183, iteration: 61821
loss: 1.0005923509597778,grad_norm: 0.8208198758532822, iteration: 61822
loss: 0.9458231329917908,grad_norm: 0.9909084813173189, iteration: 61823
loss: 1.0181502103805542,grad_norm: 0.9623907538050762, iteration: 61824
loss: 1.0088908672332764,grad_norm: 0.9999990809829564, iteration: 61825
loss: 1.0425864458084106,grad_norm: 0.9999990721119865, iteration: 61826
loss: 0.9863110184669495,grad_norm: 0.9999990070061303, iteration: 61827
loss: 1.0024210214614868,grad_norm: 0.9999991505855581, iteration: 61828
loss: 1.0055869817733765,grad_norm: 0.9631975903966511, iteration: 61829
loss: 0.9710310101509094,grad_norm: 0.9064713347380958, iteration: 61830
loss: 0.9910895824432373,grad_norm: 0.9542416857881265, iteration: 61831
loss: 1.0223859548568726,grad_norm: 0.9849706392405685, iteration: 61832
loss: 1.0323611497879028,grad_norm: 0.9355610575314898, iteration: 61833
loss: 1.011755347251892,grad_norm: 0.9488981066764722, iteration: 61834
loss: 1.0145049095153809,grad_norm: 0.9514549284075169, iteration: 61835
loss: 0.9735416769981384,grad_norm: 0.9999997527451003, iteration: 61836
loss: 1.0057463645935059,grad_norm: 0.984178722007944, iteration: 61837
loss: 0.9785443544387817,grad_norm: 0.9999991548678654, iteration: 61838
loss: 1.0204917192459106,grad_norm: 0.9999991794389156, iteration: 61839
loss: 0.991157591342926,grad_norm: 0.9737994798621067, iteration: 61840
loss: 0.9997835755348206,grad_norm: 0.9883427119197539, iteration: 61841
loss: 1.013857126235962,grad_norm: 0.9999991840623234, iteration: 61842
loss: 1.0063291788101196,grad_norm: 0.9663289564130715, iteration: 61843
loss: 0.9958849549293518,grad_norm: 0.8313755364531478, iteration: 61844
loss: 0.9938739538192749,grad_norm: 0.9999992188189314, iteration: 61845
loss: 1.0233155488967896,grad_norm: 0.9541033889130772, iteration: 61846
loss: 0.9882791638374329,grad_norm: 0.8773309437068484, iteration: 61847
loss: 1.0096731185913086,grad_norm: 0.9999990723105115, iteration: 61848
loss: 0.9977511167526245,grad_norm: 0.9999991881706587, iteration: 61849
loss: 0.9625203013420105,grad_norm: 0.9934586376231181, iteration: 61850
loss: 1.0023456811904907,grad_norm: 0.999999263705686, iteration: 61851
loss: 1.0146576166152954,grad_norm: 0.999999181219149, iteration: 61852
loss: 1.0077311992645264,grad_norm: 0.8636064347995404, iteration: 61853
loss: 1.0183467864990234,grad_norm: 0.8894850942093966, iteration: 61854
loss: 1.030259132385254,grad_norm: 0.9999992950390723, iteration: 61855
loss: 1.038137674331665,grad_norm: 0.9999993857625517, iteration: 61856
loss: 0.9910739660263062,grad_norm: 0.9999991786863532, iteration: 61857
loss: 1.0382663011550903,grad_norm: 0.9999992672529753, iteration: 61858
loss: 1.0045826435089111,grad_norm: 0.8935602907118204, iteration: 61859
loss: 1.0087562799453735,grad_norm: 0.9999992041879877, iteration: 61860
loss: 1.0348732471466064,grad_norm: 0.999999056131816, iteration: 61861
loss: 0.9786913990974426,grad_norm: 0.95184353029115, iteration: 61862
loss: 1.0018421411514282,grad_norm: 0.9999991361182977, iteration: 61863
loss: 0.9810404181480408,grad_norm: 0.9094214590662507, iteration: 61864
loss: 0.9768106937408447,grad_norm: 0.9993070984066155, iteration: 61865
loss: 1.0182842016220093,grad_norm: 0.9598982618254314, iteration: 61866
loss: 0.9931634068489075,grad_norm: 0.9999989981071021, iteration: 61867
loss: 1.0569814443588257,grad_norm: 0.9999996293340582, iteration: 61868
loss: 0.9869368672370911,grad_norm: 0.9999991516336642, iteration: 61869
loss: 1.0340949296951294,grad_norm: 0.9999991378313287, iteration: 61870
loss: 1.008774757385254,grad_norm: 0.8946248284893654, iteration: 61871
loss: 0.9901778697967529,grad_norm: 0.9999992050827983, iteration: 61872
loss: 0.979831874370575,grad_norm: 0.7677694135779661, iteration: 61873
loss: 1.0347907543182373,grad_norm: 0.9795679171400695, iteration: 61874
loss: 0.9524746537208557,grad_norm: 0.9999991837549645, iteration: 61875
loss: 0.9979602098464966,grad_norm: 0.9999991187842724, iteration: 61876
loss: 1.016135573387146,grad_norm: 0.9999991832382622, iteration: 61877
loss: 0.982968270778656,grad_norm: 0.9658605804961903, iteration: 61878
loss: 1.0165979862213135,grad_norm: 0.9999991760854562, iteration: 61879
loss: 0.9616286754608154,grad_norm: 0.9588351858709522, iteration: 61880
loss: 1.0252153873443604,grad_norm: 0.9284793127933234, iteration: 61881
loss: 0.9857532978057861,grad_norm: 0.9133688995365283, iteration: 61882
loss: 0.9953861236572266,grad_norm: 0.9394231749054865, iteration: 61883
loss: 1.0037672519683838,grad_norm: 0.9999991240473937, iteration: 61884
loss: 0.9751757979393005,grad_norm: 0.9891566845991923, iteration: 61885
loss: 1.0185401439666748,grad_norm: 0.9999991776990657, iteration: 61886
loss: 0.9917742609977722,grad_norm: 0.9824107889853431, iteration: 61887
loss: 1.0172683000564575,grad_norm: 0.9999990927087842, iteration: 61888
loss: 0.9946210980415344,grad_norm: 0.9999992151846788, iteration: 61889
loss: 1.0344785451889038,grad_norm: 0.9999992409265464, iteration: 61890
loss: 0.9869017601013184,grad_norm: 0.9999990437234485, iteration: 61891
loss: 1.0085495710372925,grad_norm: 0.9776889053258021, iteration: 61892
loss: 0.9681165218353271,grad_norm: 0.8916148615979604, iteration: 61893
loss: 0.9859436750411987,grad_norm: 0.9999990665286974, iteration: 61894
loss: 0.98972088098526,grad_norm: 0.9999989418049036, iteration: 61895
loss: 1.0041106939315796,grad_norm: 0.9999992766308022, iteration: 61896
loss: 0.9720394015312195,grad_norm: 0.9471119664533416, iteration: 61897
loss: 1.0006263256072998,grad_norm: 0.9999990344447355, iteration: 61898
loss: 1.047864317893982,grad_norm: 0.9687322867283227, iteration: 61899
loss: 1.004146933555603,grad_norm: 0.9999991318826056, iteration: 61900
loss: 1.0190058946609497,grad_norm: 0.9999991372088806, iteration: 61901
loss: 0.968614399433136,grad_norm: 0.9725919637989279, iteration: 61902
loss: 1.0119504928588867,grad_norm: 0.9763607889597447, iteration: 61903
loss: 1.0067002773284912,grad_norm: 0.9999990672708199, iteration: 61904
loss: 1.0052603483200073,grad_norm: 0.9948039699426393, iteration: 61905
loss: 1.050540566444397,grad_norm: 0.9999991530691738, iteration: 61906
loss: 0.9780322313308716,grad_norm: 0.9999990798779045, iteration: 61907
loss: 0.9791676998138428,grad_norm: 0.9999990745642616, iteration: 61908
loss: 1.0207301378250122,grad_norm: 0.9999993935334364, iteration: 61909
loss: 1.0261800289154053,grad_norm: 0.9434268977200353, iteration: 61910
loss: 1.002897024154663,grad_norm: 0.8823117784986754, iteration: 61911
loss: 1.0205682516098022,grad_norm: 0.9999991521784342, iteration: 61912
loss: 1.015008807182312,grad_norm: 0.95909545991615, iteration: 61913
loss: 0.9707050323486328,grad_norm: 0.7623921404817425, iteration: 61914
loss: 1.0205750465393066,grad_norm: 0.9999990634580707, iteration: 61915
loss: 1.0224131345748901,grad_norm: 0.9999991987892659, iteration: 61916
loss: 0.9781175255775452,grad_norm: 0.9999991838343935, iteration: 61917
loss: 1.0080934762954712,grad_norm: 0.9408748029872254, iteration: 61918
loss: 1.057518720626831,grad_norm: 0.9999994760841485, iteration: 61919
loss: 1.038213849067688,grad_norm: 0.9999989411316562, iteration: 61920
loss: 1.0255935192108154,grad_norm: 0.9999992047135906, iteration: 61921
loss: 0.9746978282928467,grad_norm: 0.9245426572454923, iteration: 61922
loss: 1.0367730855941772,grad_norm: 0.9999992240079453, iteration: 61923
loss: 0.9789174795150757,grad_norm: 0.9999991629503477, iteration: 61924
loss: 1.0424363613128662,grad_norm: 0.9999990603269605, iteration: 61925
loss: 0.9896913170814514,grad_norm: 0.8761953746789024, iteration: 61926
loss: 1.0173008441925049,grad_norm: 0.9951552116221314, iteration: 61927
loss: 1.018747091293335,grad_norm: 0.9974996923377467, iteration: 61928
loss: 1.0023269653320312,grad_norm: 0.9999990605234247, iteration: 61929
loss: 0.9721416234970093,grad_norm: 0.9999990852244988, iteration: 61930
loss: 1.02830970287323,grad_norm: 0.9387043927502876, iteration: 61931
loss: 1.0141454935073853,grad_norm: 0.999999058523927, iteration: 61932
loss: 1.0068018436431885,grad_norm: 0.9731963313689291, iteration: 61933
loss: 0.9909668564796448,grad_norm: 0.9408623169637099, iteration: 61934
loss: 1.0028046369552612,grad_norm: 0.796254916373933, iteration: 61935
loss: 1.0081685781478882,grad_norm: 0.9999991855296289, iteration: 61936
loss: 1.0339972972869873,grad_norm: 0.9999990386670282, iteration: 61937
loss: 1.0037691593170166,grad_norm: 0.9084756748869388, iteration: 61938
loss: 1.020647644996643,grad_norm: 0.906397674872579, iteration: 61939
loss: 1.0248578786849976,grad_norm: 0.8953389447356366, iteration: 61940
loss: 1.0123966932296753,grad_norm: 0.9640108591218789, iteration: 61941
loss: 0.9890196919441223,grad_norm: 0.9302143497509169, iteration: 61942
loss: 1.044752836227417,grad_norm: 0.9999991849315747, iteration: 61943
loss: 0.9768334627151489,grad_norm: 0.9999989700654803, iteration: 61944
loss: 1.0038096904754639,grad_norm: 0.9999991221462611, iteration: 61945
loss: 0.979725182056427,grad_norm: 0.9890825485074979, iteration: 61946
loss: 1.0118545293807983,grad_norm: 0.9999990277892743, iteration: 61947
loss: 1.0110325813293457,grad_norm: 0.9999992602737677, iteration: 61948
loss: 1.0002894401550293,grad_norm: 0.9999992369694175, iteration: 61949
loss: 1.0053505897521973,grad_norm: 0.9241182058561388, iteration: 61950
loss: 0.9875921607017517,grad_norm: 0.9694710118815112, iteration: 61951
loss: 1.007914662361145,grad_norm: 0.9150060589966782, iteration: 61952
loss: 0.9960695505142212,grad_norm: 0.9999992313969428, iteration: 61953
loss: 0.9967998266220093,grad_norm: 0.9999991695265197, iteration: 61954
loss: 0.9869859218597412,grad_norm: 0.9999989861662926, iteration: 61955
loss: 0.9742609262466431,grad_norm: 0.9999990479836763, iteration: 61956
loss: 0.9936497211456299,grad_norm: 0.9999991345169892, iteration: 61957
loss: 1.0017952919006348,grad_norm: 0.9999991635878357, iteration: 61958
loss: 1.0164097547531128,grad_norm: 0.9999990582114172, iteration: 61959
loss: 0.9840556979179382,grad_norm: 0.9999991362518772, iteration: 61960
loss: 1.004168152809143,grad_norm: 0.8346525362086583, iteration: 61961
loss: 1.0057145357131958,grad_norm: 0.9999990333415291, iteration: 61962
loss: 1.002922773361206,grad_norm: 0.8720328926788797, iteration: 61963
loss: 0.995902955532074,grad_norm: 0.9999991568968841, iteration: 61964
loss: 1.0000133514404297,grad_norm: 0.9438439792484811, iteration: 61965
loss: 1.0005539655685425,grad_norm: 0.9844489453340229, iteration: 61966
loss: 1.051908016204834,grad_norm: 0.8653064142792993, iteration: 61967
loss: 1.017372727394104,grad_norm: 0.9999992331758415, iteration: 61968
loss: 0.9953578114509583,grad_norm: 0.9430033569237832, iteration: 61969
loss: 0.9941948652267456,grad_norm: 0.9999992593012188, iteration: 61970
loss: 0.980038583278656,grad_norm: 0.9879971664660665, iteration: 61971
loss: 1.0249077081680298,grad_norm: 0.9999990055808499, iteration: 61972
loss: 1.0143275260925293,grad_norm: 0.8662691499173908, iteration: 61973
loss: 0.9747406840324402,grad_norm: 0.9999990373594903, iteration: 61974
loss: 0.9914951324462891,grad_norm: 0.999999172669001, iteration: 61975
loss: 1.0262500047683716,grad_norm: 0.9999991707236568, iteration: 61976
loss: 1.0253530740737915,grad_norm: 0.9999992122380962, iteration: 61977
loss: 0.9959127306938171,grad_norm: 0.9933804679930838, iteration: 61978
loss: 0.9919936060905457,grad_norm: 0.9805707547152923, iteration: 61979
loss: 0.9893601536750793,grad_norm: 0.9121793477478829, iteration: 61980
loss: 1.0050842761993408,grad_norm: 0.934344818398226, iteration: 61981
loss: 0.9945216774940491,grad_norm: 0.9999992009028258, iteration: 61982
loss: 1.0409406423568726,grad_norm: 0.9999990837755848, iteration: 61983
loss: 1.0126922130584717,grad_norm: 0.7660837801050634, iteration: 61984
loss: 1.0163979530334473,grad_norm: 0.945935068483834, iteration: 61985
loss: 0.9655571579933167,grad_norm: 0.9790894910255706, iteration: 61986
loss: 0.9804077744483948,grad_norm: 0.9871473913121552, iteration: 61987
loss: 0.9926855564117432,grad_norm: 0.9999991338694725, iteration: 61988
loss: 0.9688718914985657,grad_norm: 0.9486225077526338, iteration: 61989
loss: 0.9786051511764526,grad_norm: 0.8942040974367176, iteration: 61990
loss: 0.9919143319129944,grad_norm: 0.9223961485051528, iteration: 61991
loss: 0.9989134073257446,grad_norm: 0.9999990836487612, iteration: 61992
loss: 1.011769413948059,grad_norm: 0.9273458315806282, iteration: 61993
loss: 0.976932942867279,grad_norm: 0.9999990363200353, iteration: 61994
loss: 1.0028125047683716,grad_norm: 0.9862343534992524, iteration: 61995
loss: 1.006016731262207,grad_norm: 0.9999991670780245, iteration: 61996
loss: 0.9933030009269714,grad_norm: 0.91865685082616, iteration: 61997
loss: 0.9961463212966919,grad_norm: 0.9999992660525594, iteration: 61998
loss: 0.9831095933914185,grad_norm: 0.9468225378993489, iteration: 61999
loss: 0.9743836522102356,grad_norm: 0.9999990372765492, iteration: 62000
loss: 1.0210134983062744,grad_norm: 0.9999991911747228, iteration: 62001
loss: 1.002794623374939,grad_norm: 0.9470200963071885, iteration: 62002
loss: 1.0054831504821777,grad_norm: 0.9604392034817323, iteration: 62003
loss: 1.00050687789917,grad_norm: 0.9691935579477607, iteration: 62004
loss: 0.9847826361656189,grad_norm: 0.9999990689372605, iteration: 62005
loss: 0.9952760934829712,grad_norm: 0.9999990233475312, iteration: 62006
loss: 0.9605858325958252,grad_norm: 0.9999991018135096, iteration: 62007
loss: 0.9865912199020386,grad_norm: 0.9999992821228247, iteration: 62008
loss: 1.0273470878601074,grad_norm: 0.9857971323756449, iteration: 62009
loss: 1.0090913772583008,grad_norm: 0.9961144427101672, iteration: 62010
loss: 0.9789830446243286,grad_norm: 0.9999990275689986, iteration: 62011
loss: 0.9904429912567139,grad_norm: 0.9252515468330507, iteration: 62012
loss: 0.9625866413116455,grad_norm: 0.9146495276440072, iteration: 62013
loss: 0.986365795135498,grad_norm: 0.9916222454384821, iteration: 62014
loss: 0.9778438806533813,grad_norm: 0.9999990381944457, iteration: 62015
loss: 1.0158774852752686,grad_norm: 0.9999991161376383, iteration: 62016
loss: 0.9962794780731201,grad_norm: 0.984230058334747, iteration: 62017
loss: 1.0117985010147095,grad_norm: 0.9318173071142706, iteration: 62018
loss: 1.0329097509384155,grad_norm: 0.9426198900941813, iteration: 62019
loss: 0.9731543660163879,grad_norm: 0.990247911940877, iteration: 62020
loss: 1.0115207433700562,grad_norm: 0.8627460937140711, iteration: 62021
loss: 0.9941823482513428,grad_norm: 0.9999991298022741, iteration: 62022
loss: 1.000594973564148,grad_norm: 0.8795996198005139, iteration: 62023
loss: 0.980094313621521,grad_norm: 0.9999991732799098, iteration: 62024
loss: 1.0614601373672485,grad_norm: 0.9999990199536309, iteration: 62025
loss: 1.0409510135650635,grad_norm: 0.9999991818084667, iteration: 62026
loss: 0.9889596700668335,grad_norm: 0.866691525714316, iteration: 62027
loss: 1.0212557315826416,grad_norm: 0.856814525120168, iteration: 62028
loss: 0.97733473777771,grad_norm: 0.9999991325952421, iteration: 62029
loss: 1.0067564249038696,grad_norm: 0.9999991474014619, iteration: 62030
loss: 0.9864824414253235,grad_norm: 0.9999991038181985, iteration: 62031
loss: 0.9915734529495239,grad_norm: 0.9999992219975957, iteration: 62032
loss: 0.9840123653411865,grad_norm: 0.9999989916464888, iteration: 62033
loss: 1.0314314365386963,grad_norm: 0.8820990018251362, iteration: 62034
loss: 1.0080265998840332,grad_norm: 0.8143965823313014, iteration: 62035
loss: 0.9985554814338684,grad_norm: 0.9999991994850503, iteration: 62036
loss: 1.0081361532211304,grad_norm: 0.9999990545207974, iteration: 62037
loss: 1.009623408317566,grad_norm: 0.999999383640877, iteration: 62038
loss: 1.0143951177597046,grad_norm: 0.9866884971583367, iteration: 62039
loss: 1.0133600234985352,grad_norm: 0.9999991183394015, iteration: 62040
loss: 0.9913839101791382,grad_norm: 0.9999991460353012, iteration: 62041
loss: 1.0065956115722656,grad_norm: 0.9999991896653379, iteration: 62042
loss: 0.9813237190246582,grad_norm: 0.9480628429737272, iteration: 62043
loss: 0.9957733750343323,grad_norm: 0.9464346592211023, iteration: 62044
loss: 0.9919090270996094,grad_norm: 0.9999990343433149, iteration: 62045
loss: 1.0030815601348877,grad_norm: 0.9999990116245357, iteration: 62046
loss: 1.0183402299880981,grad_norm: 0.9590603703476794, iteration: 62047
loss: 1.0124285221099854,grad_norm: 0.9550173434133774, iteration: 62048
loss: 0.9398816823959351,grad_norm: 0.9281565241393941, iteration: 62049
loss: 1.0021636486053467,grad_norm: 0.9999991577479821, iteration: 62050
loss: 1.0021066665649414,grad_norm: 0.9999990001758486, iteration: 62051
loss: 0.9896309971809387,grad_norm: 0.9359218450857634, iteration: 62052
loss: 0.9701659083366394,grad_norm: 0.9999991908256992, iteration: 62053
loss: 0.969761312007904,grad_norm: 0.9999990396842393, iteration: 62054
loss: 0.9903059601783752,grad_norm: 0.9999991754968618, iteration: 62055
loss: 0.994525134563446,grad_norm: 0.9999990385868373, iteration: 62056
loss: 1.0102338790893555,grad_norm: 0.9263782794403093, iteration: 62057
loss: 0.9438876509666443,grad_norm: 0.9093887095143839, iteration: 62058
loss: 1.0113413333892822,grad_norm: 0.9999993681211474, iteration: 62059
loss: 0.9823558330535889,grad_norm: 0.9350273682826681, iteration: 62060
loss: 0.9791024923324585,grad_norm: 0.8422170267608394, iteration: 62061
loss: 0.9685679078102112,grad_norm: 0.8492596379358295, iteration: 62062
loss: 1.009652853012085,grad_norm: 0.993927483648498, iteration: 62063
loss: 0.9596843719482422,grad_norm: 0.9999992779913977, iteration: 62064
loss: 1.055423617362976,grad_norm: 0.9970645187343594, iteration: 62065
loss: 0.9869347214698792,grad_norm: 0.9387154030476587, iteration: 62066
loss: 1.012070894241333,grad_norm: 0.9999991866632749, iteration: 62067
loss: 0.952469527721405,grad_norm: 0.9804844916174865, iteration: 62068
loss: 1.0240403413772583,grad_norm: 0.9121003354783623, iteration: 62069
loss: 1.0192115306854248,grad_norm: 0.999999168227537, iteration: 62070
loss: 1.0399212837219238,grad_norm: 0.9999991018905672, iteration: 62071
loss: 1.0238522291183472,grad_norm: 0.8918765725637056, iteration: 62072
loss: 0.9962430000305176,grad_norm: 0.999999149727216, iteration: 62073
loss: 1.0160411596298218,grad_norm: 0.9999990147238427, iteration: 62074
loss: 0.9968061447143555,grad_norm: 0.9999992230466305, iteration: 62075
loss: 1.074048638343811,grad_norm: 0.9999992459912825, iteration: 62076
loss: 0.9878614544868469,grad_norm: 0.9999996313720743, iteration: 62077
loss: 1.0254441499710083,grad_norm: 0.9999992210003215, iteration: 62078
loss: 0.9869926571846008,grad_norm: 0.9043894181821044, iteration: 62079
loss: 1.0072355270385742,grad_norm: 0.7394907269316408, iteration: 62080
loss: 1.0283344984054565,grad_norm: 0.9617173973362607, iteration: 62081
loss: 1.0154725313186646,grad_norm: 0.9659522903320661, iteration: 62082
loss: 0.9425196647644043,grad_norm: 0.999999158072148, iteration: 62083
loss: 1.047778606414795,grad_norm: 0.9999990736240709, iteration: 62084
loss: 1.0002354383468628,grad_norm: 0.9999990899825136, iteration: 62085
loss: 0.9751908779144287,grad_norm: 0.9999990112876745, iteration: 62086
loss: 0.9899992346763611,grad_norm: 0.9727064748509602, iteration: 62087
loss: 0.9908188581466675,grad_norm: 0.7693896154912324, iteration: 62088
loss: 1.0245651006698608,grad_norm: 0.8681022513156336, iteration: 62089
loss: 0.9978263974189758,grad_norm: 0.9924474187903182, iteration: 62090
loss: 1.0083966255187988,grad_norm: 0.9999991406676151, iteration: 62091
loss: 1.0049772262573242,grad_norm: 0.8921014550289613, iteration: 62092
loss: 0.9824483394622803,grad_norm: 0.9944696327308947, iteration: 62093
loss: 0.975929856300354,grad_norm: 0.9999991052052093, iteration: 62094
loss: 1.0047215223312378,grad_norm: 0.9999990143316853, iteration: 62095
loss: 0.9919048547744751,grad_norm: 0.8972940532084793, iteration: 62096
loss: 0.9959344863891602,grad_norm: 0.9999996065321682, iteration: 62097
loss: 0.9899415969848633,grad_norm: 0.9544909444605475, iteration: 62098
loss: 0.9916675090789795,grad_norm: 0.7894995230930214, iteration: 62099
loss: 1.0276062488555908,grad_norm: 0.9999989197747027, iteration: 62100
loss: 0.9676446914672852,grad_norm: 0.999999052144086, iteration: 62101
loss: 1.0049679279327393,grad_norm: 0.7850179214529796, iteration: 62102
loss: 1.0068037509918213,grad_norm: 0.9332984951309465, iteration: 62103
loss: 1.0217591524124146,grad_norm: 0.9999993763285604, iteration: 62104
loss: 0.9371259808540344,grad_norm: 0.9999990988907606, iteration: 62105
loss: 0.984790563583374,grad_norm: 0.9999990540723568, iteration: 62106
loss: 0.9582450985908508,grad_norm: 0.9964099281682405, iteration: 62107
loss: 0.9922625422477722,grad_norm: 0.9999989577653182, iteration: 62108
loss: 1.002478003501892,grad_norm: 0.8016577826238737, iteration: 62109
loss: 1.0323305130004883,grad_norm: 0.897246465343104, iteration: 62110
loss: 1.0141030550003052,grad_norm: 0.8719222720561685, iteration: 62111
loss: 1.041912317276001,grad_norm: 0.9999991268775175, iteration: 62112
loss: 0.9946337342262268,grad_norm: 0.9999991547308945, iteration: 62113
loss: 0.9872811436653137,grad_norm: 0.9117468499175204, iteration: 62114
loss: 0.9973810911178589,grad_norm: 0.9999991104838711, iteration: 62115
loss: 0.9964352250099182,grad_norm: 0.9641054979022563, iteration: 62116
loss: 0.9886835217475891,grad_norm: 0.8779255109242937, iteration: 62117
loss: 0.9948263764381409,grad_norm: 0.999999011241763, iteration: 62118
loss: 1.0201961994171143,grad_norm: 0.9999992667908008, iteration: 62119
loss: 1.0453745126724243,grad_norm: 0.9999990163436752, iteration: 62120
loss: 1.0005483627319336,grad_norm: 0.9999990531740871, iteration: 62121
loss: 1.002488136291504,grad_norm: 0.9999990915251653, iteration: 62122
loss: 0.9985796809196472,grad_norm: 0.9999991543661884, iteration: 62123
loss: 1.0030640363693237,grad_norm: 0.9322669512508565, iteration: 62124
loss: 0.9742395281791687,grad_norm: 0.9999991381391974, iteration: 62125
loss: 1.0136635303497314,grad_norm: 0.9220307958589492, iteration: 62126
loss: 0.9775063991546631,grad_norm: 0.908667452954541, iteration: 62127
loss: 0.9964010715484619,grad_norm: 0.9999990206855719, iteration: 62128
loss: 0.9857611060142517,grad_norm: 0.882310764774961, iteration: 62129
loss: 1.0239838361740112,grad_norm: 0.9779365878176935, iteration: 62130
loss: 0.9547067284584045,grad_norm: 0.9999991639582109, iteration: 62131
loss: 1.006174921989441,grad_norm: 0.8010501947538773, iteration: 62132
loss: 0.9854779839515686,grad_norm: 0.999999184565911, iteration: 62133
loss: 1.0210505723953247,grad_norm: 0.9999990639026924, iteration: 62134
loss: 1.01054048538208,grad_norm: 0.9228725220424947, iteration: 62135
loss: 0.9541648626327515,grad_norm: 0.9161921026697839, iteration: 62136
loss: 0.9715195298194885,grad_norm: 0.9999990702124457, iteration: 62137
loss: 1.0018811225891113,grad_norm: 0.9999991447937835, iteration: 62138
loss: 0.9803085923194885,grad_norm: 0.999999200535915, iteration: 62139
loss: 0.9774866700172424,grad_norm: 0.9999995988375958, iteration: 62140
loss: 1.0157325267791748,grad_norm: 0.9999991916004881, iteration: 62141
loss: 0.9971538782119751,grad_norm: 0.9999992119780552, iteration: 62142
loss: 1.0102570056915283,grad_norm: 0.9768867638441594, iteration: 62143
loss: 0.9973132610321045,grad_norm: 0.991619995504692, iteration: 62144
loss: 1.0326993465423584,grad_norm: 0.9999993131204372, iteration: 62145
loss: 1.0328829288482666,grad_norm: 0.8557944055223881, iteration: 62146
loss: 1.0048918724060059,grad_norm: 0.9999989909815765, iteration: 62147
loss: 0.9771814346313477,grad_norm: 0.9908025563182371, iteration: 62148
loss: 0.9886299967765808,grad_norm: 0.9999992840896911, iteration: 62149
loss: 0.9901346564292908,grad_norm: 0.9279654866981203, iteration: 62150
loss: 0.99479740858078,grad_norm: 0.9389876930571295, iteration: 62151
loss: 1.0297516584396362,grad_norm: 0.9999992540321384, iteration: 62152
loss: 1.0130536556243896,grad_norm: 0.9999990941136446, iteration: 62153
loss: 0.9800609946250916,grad_norm: 0.9999991329306946, iteration: 62154
loss: 0.9800160527229309,grad_norm: 0.878092158506515, iteration: 62155
loss: 0.9964469075202942,grad_norm: 0.8811022562202679, iteration: 62156
loss: 0.9825138449668884,grad_norm: 0.981430938860763, iteration: 62157
loss: 0.9937889575958252,grad_norm: 0.985054664707833, iteration: 62158
loss: 1.0337525606155396,grad_norm: 0.9999992532289873, iteration: 62159
loss: 1.0000884532928467,grad_norm: 0.9999992057838409, iteration: 62160
loss: 0.9995706081390381,grad_norm: 0.920879930012315, iteration: 62161
loss: 0.9966495633125305,grad_norm: 0.9999991109826508, iteration: 62162
loss: 1.032094120979309,grad_norm: 0.9999993067265696, iteration: 62163
loss: 1.0100362300872803,grad_norm: 0.9999991851101934, iteration: 62164
loss: 1.0442311763763428,grad_norm: 0.9277451129474467, iteration: 62165
loss: 1.0066041946411133,grad_norm: 0.9999990496460387, iteration: 62166
loss: 1.1232142448425293,grad_norm: 0.9999997714962358, iteration: 62167
loss: 0.9283764958381653,grad_norm: 0.9999991910297347, iteration: 62168
loss: 1.0016024112701416,grad_norm: 0.9671059152404845, iteration: 62169
loss: 1.030554175376892,grad_norm: 0.9490006395640723, iteration: 62170
loss: 0.9895951151847839,grad_norm: 0.9541406566150861, iteration: 62171
loss: 1.0301401615142822,grad_norm: 0.9999989921101918, iteration: 62172
loss: 1.0387088060379028,grad_norm: 0.9999991704124656, iteration: 62173
loss: 1.0005182027816772,grad_norm: 0.9999990916506256, iteration: 62174
loss: 1.036118507385254,grad_norm: 0.9460716217385744, iteration: 62175
loss: 1.015167236328125,grad_norm: 0.9999992475433845, iteration: 62176
loss: 0.9371770620346069,grad_norm: 0.9999990171303752, iteration: 62177
loss: 1.0398637056350708,grad_norm: 0.9999993952475185, iteration: 62178
loss: 1.0033231973648071,grad_norm: 0.9999990137301072, iteration: 62179
loss: 1.0032274723052979,grad_norm: 0.9999990682715267, iteration: 62180
loss: 0.9842069745063782,grad_norm: 0.8731031989646972, iteration: 62181
loss: 0.9946742653846741,grad_norm: 0.9307179127745338, iteration: 62182
loss: 1.0144258737564087,grad_norm: 0.8957128842660584, iteration: 62183
loss: 1.005242109298706,grad_norm: 0.9999991400076496, iteration: 62184
loss: 1.0289032459259033,grad_norm: 0.999999120868111, iteration: 62185
loss: 1.00530207157135,grad_norm: 0.9999990073764314, iteration: 62186
loss: 0.9961317181587219,grad_norm: 0.8044921364789931, iteration: 62187
loss: 1.0030755996704102,grad_norm: 0.9999992111283802, iteration: 62188
loss: 1.0056397914886475,grad_norm: 0.9999990378065694, iteration: 62189
loss: 1.0006885528564453,grad_norm: 0.8677371388474194, iteration: 62190
loss: 1.0270981788635254,grad_norm: 0.9999991798053043, iteration: 62191
loss: 0.9672878384590149,grad_norm: 0.9999990335287077, iteration: 62192
loss: 0.9827161431312561,grad_norm: 0.9999996352113424, iteration: 62193
loss: 1.0032140016555786,grad_norm: 0.9999990245596637, iteration: 62194
loss: 0.9639049768447876,grad_norm: 0.9484537368300147, iteration: 62195
loss: 1.0010385513305664,grad_norm: 0.9999990686176358, iteration: 62196
loss: 1.0229065418243408,grad_norm: 0.999999252416161, iteration: 62197
loss: 0.958736002445221,grad_norm: 0.9452788237424736, iteration: 62198
loss: 0.9736650586128235,grad_norm: 0.9999992344390548, iteration: 62199
loss: 0.9829726219177246,grad_norm: 0.999999061961075, iteration: 62200
loss: 1.0477232933044434,grad_norm: 0.999999076272239, iteration: 62201
loss: 0.9652020335197449,grad_norm: 0.9999991999615822, iteration: 62202
loss: 0.9657595753669739,grad_norm: 0.9334550500086408, iteration: 62203
loss: 1.0129889249801636,grad_norm: 0.9999991102396619, iteration: 62204
loss: 1.0199815034866333,grad_norm: 0.9999991471038648, iteration: 62205
loss: 0.9758469462394714,grad_norm: 0.917067604577579, iteration: 62206
loss: 1.0071759223937988,grad_norm: 0.9596006190064432, iteration: 62207
loss: 0.9629323482513428,grad_norm: 0.9999991176864713, iteration: 62208
loss: 0.9965415000915527,grad_norm: 0.949990294572125, iteration: 62209
loss: 1.0042392015457153,grad_norm: 0.9748322756455433, iteration: 62210
loss: 1.0184576511383057,grad_norm: 0.966353732685138, iteration: 62211
loss: 1.0066943168640137,grad_norm: 0.9999991554907085, iteration: 62212
loss: 1.040169596672058,grad_norm: 0.9999991212206244, iteration: 62213
loss: 1.0016494989395142,grad_norm: 0.9999990839448614, iteration: 62214
loss: 0.9842291474342346,grad_norm: 0.9999992198070643, iteration: 62215
loss: 1.0108532905578613,grad_norm: 0.8236470969223845, iteration: 62216
loss: 1.0165833234786987,grad_norm: 0.9999990405864574, iteration: 62217
loss: 1.0166480541229248,grad_norm: 0.9525453211417657, iteration: 62218
loss: 0.9928309321403503,grad_norm: 0.9635021558826162, iteration: 62219
loss: 0.9982039928436279,grad_norm: 0.9999991117035368, iteration: 62220
loss: 1.0069783926010132,grad_norm: 0.9999992158254049, iteration: 62221
loss: 1.0088356733322144,grad_norm: 0.9999989718897061, iteration: 62222
loss: 1.0051542520523071,grad_norm: 0.877798619910272, iteration: 62223
loss: 1.009750247001648,grad_norm: 0.9999990104587614, iteration: 62224
loss: 1.0045602321624756,grad_norm: 0.9999989292349047, iteration: 62225
loss: 1.0259727239608765,grad_norm: 0.99999931011563, iteration: 62226
loss: 1.1437487602233887,grad_norm: 0.9999999724632708, iteration: 62227
loss: 0.9885618686676025,grad_norm: 0.8857296618743062, iteration: 62228
loss: 1.0314041376113892,grad_norm: 0.9999998855490961, iteration: 62229
loss: 0.9756513237953186,grad_norm: 0.9999991994014008, iteration: 62230
loss: 0.982343316078186,grad_norm: 0.8309087856194604, iteration: 62231
loss: 0.9846015572547913,grad_norm: 0.9999992427465366, iteration: 62232
loss: 1.0008512735366821,grad_norm: 0.9999990972666124, iteration: 62233
loss: 0.9736117720603943,grad_norm: 0.9999990961803141, iteration: 62234
loss: 1.0183765888214111,grad_norm: 0.9999990518247985, iteration: 62235
loss: 0.9720601439476013,grad_norm: 0.8907475253317578, iteration: 62236
loss: 0.9785076379776001,grad_norm: 0.9489788193582773, iteration: 62237
loss: 1.0005054473876953,grad_norm: 0.9999991614414405, iteration: 62238
loss: 1.0236377716064453,grad_norm: 0.999999178313239, iteration: 62239
loss: 1.0279591083526611,grad_norm: 0.9999992271061916, iteration: 62240
loss: 1.026841402053833,grad_norm: 0.9999991293513242, iteration: 62241
loss: 1.0053225755691528,grad_norm: 0.8720084866398901, iteration: 62242
loss: 1.0183660984039307,grad_norm: 0.9999989865791591, iteration: 62243
loss: 0.9818306565284729,grad_norm: 0.9999990442815953, iteration: 62244
loss: 0.9776432514190674,grad_norm: 0.8542903176753922, iteration: 62245
loss: 0.9851749539375305,grad_norm: 0.9431021332397042, iteration: 62246
loss: 1.0049384832382202,grad_norm: 0.9999991891464846, iteration: 62247
loss: 0.9767569303512573,grad_norm: 0.9999991225892819, iteration: 62248
loss: 0.9974302053451538,grad_norm: 0.9439149336223435, iteration: 62249
loss: 1.0194543600082397,grad_norm: 0.999999208427212, iteration: 62250
loss: 0.9947512149810791,grad_norm: 0.9999991493282003, iteration: 62251
loss: 0.999498188495636,grad_norm: 0.8340024136863279, iteration: 62252
loss: 1.0086085796356201,grad_norm: 0.9737987773687902, iteration: 62253
loss: 1.0047818422317505,grad_norm: 0.9999990432937987, iteration: 62254
loss: 1.0189933776855469,grad_norm: 0.9469998035419266, iteration: 62255
loss: 0.9958603382110596,grad_norm: 0.8978424301870428, iteration: 62256
loss: 1.0353785753250122,grad_norm: 0.9999990085915286, iteration: 62257
loss: 1.0411128997802734,grad_norm: 0.9999999288119666, iteration: 62258
loss: 1.0168819427490234,grad_norm: 0.9999991657906462, iteration: 62259
loss: 0.9708356857299805,grad_norm: 0.8619745977209046, iteration: 62260
loss: 1.0012673139572144,grad_norm: 0.9999991756353785, iteration: 62261
loss: 0.9842464327812195,grad_norm: 0.9999990345760156, iteration: 62262
loss: 1.0230741500854492,grad_norm: 0.9999992890491424, iteration: 62263
loss: 0.9977098703384399,grad_norm: 0.8276899380777181, iteration: 62264
loss: 0.9700588583946228,grad_norm: 0.9876901680473017, iteration: 62265
loss: 0.9662659764289856,grad_norm: 0.9999992527104311, iteration: 62266
loss: 0.9555042386054993,grad_norm: 0.9999990662371208, iteration: 62267
loss: 1.0126689672470093,grad_norm: 0.9457220011034856, iteration: 62268
loss: 0.9979263544082642,grad_norm: 0.9999990984414708, iteration: 62269
loss: 1.0238010883331299,grad_norm: 0.9532184198542032, iteration: 62270
loss: 0.989419162273407,grad_norm: 0.9245716636919508, iteration: 62271
loss: 0.9890778660774231,grad_norm: 0.7285554245900854, iteration: 62272
loss: 0.9903321266174316,grad_norm: 0.9999991921623128, iteration: 62273
loss: 1.0108598470687866,grad_norm: 0.9999992186099309, iteration: 62274
loss: 1.0313007831573486,grad_norm: 0.9999993191255762, iteration: 62275
loss: 1.0252772569656372,grad_norm: 0.9620837304669014, iteration: 62276
loss: 1.0127894878387451,grad_norm: 0.9999991698503339, iteration: 62277
loss: 1.0294826030731201,grad_norm: 0.9999990569843572, iteration: 62278
loss: 1.0465002059936523,grad_norm: 0.9999991836066142, iteration: 62279
loss: 0.98658686876297,grad_norm: 0.9239711902703561, iteration: 62280
loss: 1.0030657052993774,grad_norm: 0.9061611017523772, iteration: 62281
loss: 1.0014370679855347,grad_norm: 0.9999990882842181, iteration: 62282
loss: 1.0177772045135498,grad_norm: 0.9999990654337113, iteration: 62283
loss: 1.032114028930664,grad_norm: 0.9999994536445819, iteration: 62284
loss: 0.9848740696907043,grad_norm: 0.8972536357099301, iteration: 62285
loss: 0.9840686321258545,grad_norm: 0.9999992022443246, iteration: 62286
loss: 1.0154860019683838,grad_norm: 0.9999992658258146, iteration: 62287
loss: 0.9733213186264038,grad_norm: 0.9999992214371687, iteration: 62288
loss: 1.0261192321777344,grad_norm: 0.9999992275601893, iteration: 62289
loss: 1.0197280645370483,grad_norm: 0.9151021873405346, iteration: 62290
loss: 1.007541298866272,grad_norm: 0.9437715925676485, iteration: 62291
loss: 1.0199534893035889,grad_norm: 0.999999133245155, iteration: 62292
loss: 1.0096302032470703,grad_norm: 0.8912473809694013, iteration: 62293
loss: 1.0009031295776367,grad_norm: 0.9999990969291624, iteration: 62294
loss: 1.0321197509765625,grad_norm: 0.9955182656292137, iteration: 62295
loss: 0.9912354946136475,grad_norm: 0.9999991214695957, iteration: 62296
loss: 0.9833143353462219,grad_norm: 0.9999990497244167, iteration: 62297
loss: 0.9593310356140137,grad_norm: 0.9999990599147537, iteration: 62298
loss: 1.0186471939086914,grad_norm: 0.9999993920083838, iteration: 62299
loss: 0.9782411456108093,grad_norm: 0.884307431431703, iteration: 62300
loss: 1.0092636346817017,grad_norm: 0.9960258639617585, iteration: 62301
loss: 0.9899217486381531,grad_norm: 0.8980187561168524, iteration: 62302
loss: 0.9730321764945984,grad_norm: 0.9999991690249945, iteration: 62303
loss: 1.0307713747024536,grad_norm: 0.9999990390184744, iteration: 62304
loss: 0.9512167572975159,grad_norm: 0.9592008045014642, iteration: 62305
loss: 1.0082213878631592,grad_norm: 0.9682868056168823, iteration: 62306
loss: 1.0117238759994507,grad_norm: 0.9999990124269765, iteration: 62307
loss: 1.0021766424179077,grad_norm: 0.889323212200229, iteration: 62308
loss: 0.9923200011253357,grad_norm: 0.9999991226874662, iteration: 62309
loss: 1.0079914331436157,grad_norm: 0.940755083864286, iteration: 62310
loss: 0.9924120306968689,grad_norm: 0.9744154437904777, iteration: 62311
loss: 0.9518429040908813,grad_norm: 0.9999990017086519, iteration: 62312
loss: 1.0213819742202759,grad_norm: 0.9999991769068646, iteration: 62313
loss: 1.007927417755127,grad_norm: 0.9999999422824852, iteration: 62314
loss: 0.9777998328208923,grad_norm: 0.9999991999968192, iteration: 62315
loss: 0.9892975091934204,grad_norm: 0.9122256115680376, iteration: 62316
loss: 0.9792525768280029,grad_norm: 0.9999991281593539, iteration: 62317
loss: 0.9574825167655945,grad_norm: 0.9857151774884207, iteration: 62318
loss: 0.9997496008872986,grad_norm: 0.874742416950171, iteration: 62319
loss: 1.0230348110198975,grad_norm: 0.9999992489896619, iteration: 62320
loss: 1.0080511569976807,grad_norm: 0.9397798819317895, iteration: 62321
loss: 0.9927347302436829,grad_norm: 0.9999991204922613, iteration: 62322
loss: 0.9804130792617798,grad_norm: 0.9999991066579983, iteration: 62323
loss: 1.00426185131073,grad_norm: 0.9999992242170629, iteration: 62324
loss: 1.03123140335083,grad_norm: 0.9999991739290774, iteration: 62325
loss: 0.9940361380577087,grad_norm: 0.999999294452257, iteration: 62326
loss: 0.9589556455612183,grad_norm: 0.9999990828034855, iteration: 62327
loss: 1.0043365955352783,grad_norm: 0.9999990736981981, iteration: 62328
loss: 0.9844343662261963,grad_norm: 0.9969678912437137, iteration: 62329
loss: 1.063713550567627,grad_norm: 0.9999989575932546, iteration: 62330
loss: 0.9617742300033569,grad_norm: 0.9572490320867638, iteration: 62331
loss: 1.002791404724121,grad_norm: 0.9999991380309896, iteration: 62332
loss: 1.0071282386779785,grad_norm: 0.8277584915070173, iteration: 62333
loss: 1.0291496515274048,grad_norm: 0.9999991412564425, iteration: 62334
loss: 1.0200878381729126,grad_norm: 0.9847403942368587, iteration: 62335
loss: 1.0063997507095337,grad_norm: 0.9618841768849812, iteration: 62336
loss: 1.019142746925354,grad_norm: 0.999999347779523, iteration: 62337
loss: 1.0162906646728516,grad_norm: 0.8270103853234456, iteration: 62338
loss: 1.0220943689346313,grad_norm: 0.9999990322409134, iteration: 62339
loss: 1.0069993734359741,grad_norm: 0.8697015107986377, iteration: 62340
loss: 0.9812384247779846,grad_norm: 0.9578246140020108, iteration: 62341
loss: 1.0062520503997803,grad_norm: 0.9999990165988232, iteration: 62342
loss: 0.9963177442550659,grad_norm: 0.9482020081394678, iteration: 62343
loss: 1.007013201713562,grad_norm: 0.9758044321010866, iteration: 62344
loss: 1.0198756456375122,grad_norm: 0.9999992482965303, iteration: 62345
loss: 1.0260740518569946,grad_norm: 0.9893857519730453, iteration: 62346
loss: 0.9971923232078552,grad_norm: 0.9999990443001535, iteration: 62347
loss: 0.9899029731750488,grad_norm: 0.9128373477028849, iteration: 62348
loss: 0.9743279814720154,grad_norm: 0.9247290229903139, iteration: 62349
loss: 0.9984008073806763,grad_norm: 0.9551414485125624, iteration: 62350
loss: 1.0006139278411865,grad_norm: 0.9886603945214326, iteration: 62351
loss: 1.010739803314209,grad_norm: 0.9743435895904802, iteration: 62352
loss: 1.0084037780761719,grad_norm: 0.9999991363933135, iteration: 62353
loss: 1.0465527772903442,grad_norm: 0.9999991465994829, iteration: 62354
loss: 1.0181535482406616,grad_norm: 0.9999992287466827, iteration: 62355
loss: 0.9570136070251465,grad_norm: 0.9287209347104322, iteration: 62356
loss: 1.0338259935379028,grad_norm: 0.9999990900633781, iteration: 62357
loss: 1.0006871223449707,grad_norm: 0.9019509478298148, iteration: 62358
loss: 1.0159292221069336,grad_norm: 0.9999989355298484, iteration: 62359
loss: 1.0183537006378174,grad_norm: 0.9150146128018981, iteration: 62360
loss: 0.998188853263855,grad_norm: 0.9687852313294459, iteration: 62361
loss: 1.0043748617172241,grad_norm: 0.9999990500020886, iteration: 62362
loss: 1.0122990608215332,grad_norm: 0.9999990711126937, iteration: 62363
loss: 1.050889015197754,grad_norm: 0.9999993427475069, iteration: 62364
loss: 1.022169589996338,grad_norm: 0.9999991047479112, iteration: 62365
loss: 0.9728149771690369,grad_norm: 0.9999990213875893, iteration: 62366
loss: 0.9836768507957458,grad_norm: 0.9999990731333787, iteration: 62367
loss: 0.9522448778152466,grad_norm: 0.9999991701752035, iteration: 62368
loss: 1.0072811841964722,grad_norm: 0.9999992794069654, iteration: 62369
loss: 1.0046558380126953,grad_norm: 0.9999990715081637, iteration: 62370
loss: 0.9780761003494263,grad_norm: 0.9999990149942112, iteration: 62371
loss: 1.0294536352157593,grad_norm: 0.9173247016890226, iteration: 62372
loss: 0.9953996539115906,grad_norm: 0.9999990520875339, iteration: 62373
loss: 0.9969112873077393,grad_norm: 0.8836501534965536, iteration: 62374
loss: 0.9793745279312134,grad_norm: 0.9999989943299117, iteration: 62375
loss: 0.9996756911277771,grad_norm: 0.9999991632176691, iteration: 62376
loss: 0.9996352195739746,grad_norm: 0.919333780796302, iteration: 62377
loss: 1.004725694656372,grad_norm: 0.9999989953336458, iteration: 62378
loss: 1.0251716375350952,grad_norm: 0.9999998958552934, iteration: 62379
loss: 1.0187028646469116,grad_norm: 0.8788429129080593, iteration: 62380
loss: 1.0193450450897217,grad_norm: 0.9999990703440491, iteration: 62381
loss: 0.9970520734786987,grad_norm: 0.9999991940313361, iteration: 62382
loss: 0.9918236136436462,grad_norm: 0.8876527574734531, iteration: 62383
loss: 0.9817627668380737,grad_norm: 0.9514464418804491, iteration: 62384
loss: 1.0035263299942017,grad_norm: 0.9999989958177365, iteration: 62385
loss: 1.031592845916748,grad_norm: 0.9999992339374775, iteration: 62386
loss: 1.0137193202972412,grad_norm: 0.9584946819068992, iteration: 62387
loss: 1.012993574142456,grad_norm: 0.9999990923477189, iteration: 62388
loss: 0.9915308356285095,grad_norm: 0.9999991139804143, iteration: 62389
loss: 1.0082828998565674,grad_norm: 0.9999991214970602, iteration: 62390
loss: 1.0023771524429321,grad_norm: 0.999999031099479, iteration: 62391
loss: 1.0235882997512817,grad_norm: 0.9958370610453774, iteration: 62392
loss: 0.9915499687194824,grad_norm: 0.9721445692551686, iteration: 62393
loss: 1.0533428192138672,grad_norm: 0.9999996322323802, iteration: 62394
loss: 0.9851685166358948,grad_norm: 0.9999991181320069, iteration: 62395
loss: 1.0711164474487305,grad_norm: 0.9999996688273218, iteration: 62396
loss: 1.022021770477295,grad_norm: 0.9999989760808394, iteration: 62397
loss: 0.9494613409042358,grad_norm: 0.9999996430065807, iteration: 62398
loss: 0.99101722240448,grad_norm: 0.9610118060629264, iteration: 62399
loss: 1.0370064973831177,grad_norm: 0.9999990241710613, iteration: 62400
loss: 1.0072563886642456,grad_norm: 0.9366247615803327, iteration: 62401
loss: 1.0225927829742432,grad_norm: 0.9082184938676673, iteration: 62402
loss: 0.9985870122909546,grad_norm: 0.9999988507860681, iteration: 62403
loss: 0.9963777661323547,grad_norm: 0.9683390438035171, iteration: 62404
loss: 1.023922324180603,grad_norm: 0.8931073005579252, iteration: 62405
loss: 1.0242490768432617,grad_norm: 0.9999991047009047, iteration: 62406
loss: 0.9731284976005554,grad_norm: 0.9645669135172998, iteration: 62407
loss: 0.9960185885429382,grad_norm: 0.904540903956862, iteration: 62408
loss: 0.9838297963142395,grad_norm: 0.8796379591432232, iteration: 62409
loss: 1.0074154138565063,grad_norm: 0.999999164869885, iteration: 62410
loss: 0.9738626480102539,grad_norm: 0.9999990635623853, iteration: 62411
loss: 1.015866994857788,grad_norm: 0.9659787930049953, iteration: 62412
loss: 1.0362403392791748,grad_norm: 0.8325237400974645, iteration: 62413
loss: 0.9908048510551453,grad_norm: 0.8050151325597928, iteration: 62414
loss: 1.0278812646865845,grad_norm: 0.9999989969090384, iteration: 62415
loss: 0.9850552678108215,grad_norm: 0.9999991609040078, iteration: 62416
loss: 1.036889672279358,grad_norm: 0.9999991427414169, iteration: 62417
loss: 1.0098925828933716,grad_norm: 0.9999994066174278, iteration: 62418
loss: 1.0041117668151855,grad_norm: 0.9410336195112782, iteration: 62419
loss: 0.9884097576141357,grad_norm: 0.999999187410784, iteration: 62420
loss: 1.0041072368621826,grad_norm: 0.9999991555197917, iteration: 62421
loss: 1.0140308141708374,grad_norm: 0.9999992086211676, iteration: 62422
loss: 1.0144484043121338,grad_norm: 0.9415803870624564, iteration: 62423
loss: 1.0149621963500977,grad_norm: 0.9398412613824927, iteration: 62424
loss: 1.0301820039749146,grad_norm: 0.999999214484313, iteration: 62425
loss: 1.0117579698562622,grad_norm: 0.8725864440822778, iteration: 62426
loss: 1.029733657836914,grad_norm: 0.8923090399961314, iteration: 62427
loss: 0.9850166440010071,grad_norm: 0.9999990230301448, iteration: 62428
loss: 1.011885166168213,grad_norm: 0.8808343774893093, iteration: 62429
loss: 1.038458228111267,grad_norm: 0.9999993017467874, iteration: 62430
loss: 0.9983409643173218,grad_norm: 0.9999993249563227, iteration: 62431
loss: 1.0129810571670532,grad_norm: 0.9999990875254618, iteration: 62432
loss: 0.9653849005699158,grad_norm: 0.9999991314871295, iteration: 62433
loss: 1.0038464069366455,grad_norm: 0.9890102621251629, iteration: 62434
loss: 1.0125699043273926,grad_norm: 0.999999083948338, iteration: 62435
loss: 0.9797348976135254,grad_norm: 0.9999990844244246, iteration: 62436
loss: 0.9960042834281921,grad_norm: 0.9753701461997963, iteration: 62437
loss: 1.0710617303848267,grad_norm: 0.999999726114968, iteration: 62438
loss: 0.9892534613609314,grad_norm: 0.9999994889570016, iteration: 62439
loss: 1.011445164680481,grad_norm: 0.9199807846455285, iteration: 62440
loss: 1.023380994796753,grad_norm: 0.9999997097730585, iteration: 62441
loss: 1.0430527925491333,grad_norm: 0.999999096775147, iteration: 62442
loss: 1.0375734567642212,grad_norm: 0.9999992645043528, iteration: 62443
loss: 0.9705682396888733,grad_norm: 0.9999991432202412, iteration: 62444
loss: 0.9925308227539062,grad_norm: 0.9999991142873854, iteration: 62445
loss: 0.9665923714637756,grad_norm: 0.9999990137017306, iteration: 62446
loss: 1.01907479763031,grad_norm: 0.9999990901644545, iteration: 62447
loss: 0.9953962564468384,grad_norm: 0.9936233361178708, iteration: 62448
loss: 1.0053048133850098,grad_norm: 0.9999992246956303, iteration: 62449
loss: 1.017911672592163,grad_norm: 0.9994260399383696, iteration: 62450
loss: 1.006689429283142,grad_norm: 0.8742867149022879, iteration: 62451
loss: 1.028189778327942,grad_norm: 0.9999993230334677, iteration: 62452
loss: 1.0061177015304565,grad_norm: 0.9999989980774164, iteration: 62453
loss: 0.9785813093185425,grad_norm: 0.9999991663893164, iteration: 62454
loss: 1.0044310092926025,grad_norm: 0.999998979896728, iteration: 62455
loss: 1.001278042793274,grad_norm: 0.944319014157651, iteration: 62456
loss: 0.9746324419975281,grad_norm: 0.9999992085639181, iteration: 62457
loss: 0.9883240461349487,grad_norm: 0.9999992008342816, iteration: 62458
loss: 1.0380663871765137,grad_norm: 0.9999991130672128, iteration: 62459
loss: 0.9716900587081909,grad_norm: 0.9999989780442325, iteration: 62460
loss: 1.0155035257339478,grad_norm: 0.9999992098108361, iteration: 62461
loss: 1.0955815315246582,grad_norm: 0.9999995472473161, iteration: 62462
loss: 1.0032209157943726,grad_norm: 0.9999990898211217, iteration: 62463
loss: 0.98838210105896,grad_norm: 0.9999989326396395, iteration: 62464
loss: 1.0095402002334595,grad_norm: 0.9415202247473742, iteration: 62465
loss: 1.0197306871414185,grad_norm: 0.8932030393010578, iteration: 62466
loss: 1.0182850360870361,grad_norm: 0.9999991096596025, iteration: 62467
loss: 0.9979342818260193,grad_norm: 0.999999103787516, iteration: 62468
loss: 1.025052785873413,grad_norm: 0.9999998418728419, iteration: 62469
loss: 0.9801661372184753,grad_norm: 0.9999991398775199, iteration: 62470
loss: 1.0042433738708496,grad_norm: 0.9999992180114405, iteration: 62471
loss: 1.0079703330993652,grad_norm: 0.8667195401880966, iteration: 62472
loss: 0.9963756203651428,grad_norm: 0.9999992148476304, iteration: 62473
loss: 1.0173416137695312,grad_norm: 0.8293962037118685, iteration: 62474
loss: 0.9978856444358826,grad_norm: 0.9999992222198484, iteration: 62475
loss: 1.0640043020248413,grad_norm: 0.9999993858970988, iteration: 62476
loss: 0.992949366569519,grad_norm: 0.9999991580074545, iteration: 62477
loss: 1.1096429824829102,grad_norm: 0.9999991306028212, iteration: 62478
loss: 0.9979484677314758,grad_norm: 0.8007308142246191, iteration: 62479
loss: 0.983989953994751,grad_norm: 0.9999990540217578, iteration: 62480
loss: 0.9742551445960999,grad_norm: 0.9720910297105079, iteration: 62481
loss: 0.987227737903595,grad_norm: 0.7823007058969074, iteration: 62482
loss: 0.9971553683280945,grad_norm: 0.9999990869501131, iteration: 62483
loss: 0.9531290531158447,grad_norm: 0.9999989712815998, iteration: 62484
loss: 0.9838699698448181,grad_norm: 0.9999990731128581, iteration: 62485
loss: 1.0347405672073364,grad_norm: 0.911267737284643, iteration: 62486
loss: 1.0220304727554321,grad_norm: 0.9999996340564885, iteration: 62487
loss: 0.9671167731285095,grad_norm: 0.9943229474253683, iteration: 62488
loss: 0.9917781352996826,grad_norm: 0.9999991268952108, iteration: 62489
loss: 0.9981110692024231,grad_norm: 0.9999997444401981, iteration: 62490
loss: 1.010988473892212,grad_norm: 0.9999992493113777, iteration: 62491
loss: 0.9919779300689697,grad_norm: 0.9999991400308996, iteration: 62492
loss: 0.9676798582077026,grad_norm: 0.9999991614717136, iteration: 62493
loss: 1.0138581991195679,grad_norm: 0.9662303048177739, iteration: 62494
loss: 0.9884513020515442,grad_norm: 0.8854822189333886, iteration: 62495
loss: 1.0123600959777832,grad_norm: 0.9999989548973875, iteration: 62496
loss: 1.0013233423233032,grad_norm: 0.9999992698509312, iteration: 62497
loss: 1.0365824699401855,grad_norm: 0.9210097665998949, iteration: 62498
loss: 1.0282737016677856,grad_norm: 0.9999991153468204, iteration: 62499
loss: 0.9938837289810181,grad_norm: 0.9999991279370514, iteration: 62500
loss: 1.0136934518814087,grad_norm: 0.9999993942325345, iteration: 62501
loss: 1.0303583145141602,grad_norm: 0.9999992169196963, iteration: 62502
loss: 0.9988506436347961,grad_norm: 0.9598861097362984, iteration: 62503
loss: 1.0348901748657227,grad_norm: 0.9999990495820689, iteration: 62504
loss: 1.0141711235046387,grad_norm: 0.8223795310518469, iteration: 62505
loss: 0.9791972041130066,grad_norm: 0.9999991762209929, iteration: 62506
loss: 0.9808997511863708,grad_norm: 0.9999991973288853, iteration: 62507
loss: 1.0052804946899414,grad_norm: 0.867982800021621, iteration: 62508
loss: 1.0076497793197632,grad_norm: 0.9314635084303564, iteration: 62509
loss: 0.9952147603034973,grad_norm: 0.8039745864006926, iteration: 62510
loss: 1.006293535232544,grad_norm: 0.8474974712967772, iteration: 62511
loss: 1.021454095840454,grad_norm: 0.9999991513213672, iteration: 62512
loss: 0.9768437743186951,grad_norm: 0.9999992268292623, iteration: 62513
loss: 0.9694578647613525,grad_norm: 0.9999992580142881, iteration: 62514
loss: 1.000619888305664,grad_norm: 0.9719828161060964, iteration: 62515
loss: 1.0041102170944214,grad_norm: 0.8766209728887043, iteration: 62516
loss: 1.0617780685424805,grad_norm: 0.9999990934485817, iteration: 62517
loss: 0.9546584486961365,grad_norm: 0.9999996372601611, iteration: 62518
loss: 1.0156742334365845,grad_norm: 0.9999990802234919, iteration: 62519
loss: 1.0032435655593872,grad_norm: 0.9841550815505306, iteration: 62520
loss: 1.0082701444625854,grad_norm: 0.9999992794043868, iteration: 62521
loss: 0.9923130869865417,grad_norm: 0.9836095941514482, iteration: 62522
loss: 1.0390326976776123,grad_norm: 0.9945148234397267, iteration: 62523
loss: 1.0144720077514648,grad_norm: 0.999999109446426, iteration: 62524
loss: 1.007108449935913,grad_norm: 0.9999991636017467, iteration: 62525
loss: 1.0039831399917603,grad_norm: 0.9417529857148463, iteration: 62526
loss: 1.0075514316558838,grad_norm: 0.9999991947152117, iteration: 62527
loss: 0.9864223003387451,grad_norm: 0.9999992158015074, iteration: 62528
loss: 1.032145619392395,grad_norm: 0.9999992070880604, iteration: 62529
loss: 0.9909924864768982,grad_norm: 0.999999042822201, iteration: 62530
loss: 0.9826042056083679,grad_norm: 0.9927609967095897, iteration: 62531
loss: 1.0122625827789307,grad_norm: 0.9999990728956656, iteration: 62532
loss: 1.0208806991577148,grad_norm: 0.9999990862853291, iteration: 62533
loss: 1.0059645175933838,grad_norm: 0.9999989262409985, iteration: 62534
loss: 0.9897058010101318,grad_norm: 0.9545669431699717, iteration: 62535
loss: 1.0067261457443237,grad_norm: 0.99999903049447, iteration: 62536
loss: 1.031288981437683,grad_norm: 0.9999991124137794, iteration: 62537
loss: 1.0309032201766968,grad_norm: 0.9999995962079633, iteration: 62538
loss: 0.9751944541931152,grad_norm: 0.9999993746978738, iteration: 62539
loss: 0.9766327142715454,grad_norm: 0.9999993399805535, iteration: 62540
loss: 0.9634483456611633,grad_norm: 0.9999990293611968, iteration: 62541
loss: 1.001349687576294,grad_norm: 0.9999992400211064, iteration: 62542
loss: 1.0189826488494873,grad_norm: 0.9999990411833245, iteration: 62543
loss: 1.0069184303283691,grad_norm: 0.9999989983352902, iteration: 62544
loss: 0.9980953931808472,grad_norm: 0.9999990863147852, iteration: 62545
loss: 0.9677689671516418,grad_norm: 0.9999991015989882, iteration: 62546
loss: 0.9935314655303955,grad_norm: 0.9161522157388097, iteration: 62547
loss: 1.0053035020828247,grad_norm: 0.9999989724635362, iteration: 62548
loss: 1.0455665588378906,grad_norm: 0.9651768974643681, iteration: 62549
loss: 1.1322851181030273,grad_norm: 0.9999990543044819, iteration: 62550
loss: 0.9929555058479309,grad_norm: 0.9999990292930216, iteration: 62551
loss: 1.0258450508117676,grad_norm: 0.9042047188056125, iteration: 62552
loss: 1.0048410892486572,grad_norm: 0.8494246904326436, iteration: 62553
loss: 1.0036430358886719,grad_norm: 0.9922658554278636, iteration: 62554
loss: 0.9991313219070435,grad_norm: 0.9999990338012308, iteration: 62555
loss: 1.035981297492981,grad_norm: 0.999221766727759, iteration: 62556
loss: 0.9989809989929199,grad_norm: 0.9999993585227323, iteration: 62557
loss: 1.02264404296875,grad_norm: 0.9999990511997153, iteration: 62558
loss: 1.0314947366714478,grad_norm: 0.9999990627944041, iteration: 62559
loss: 0.9998083114624023,grad_norm: 0.8344963457606395, iteration: 62560
loss: 1.013992428779602,grad_norm: 0.894188227530728, iteration: 62561
loss: 1.005751371383667,grad_norm: 0.9774607608544005, iteration: 62562
loss: 1.0247868299484253,grad_norm: 0.9999991095650317, iteration: 62563
loss: 1.0162475109100342,grad_norm: 0.9999991824767391, iteration: 62564
loss: 1.0150943994522095,grad_norm: 0.9999993491746473, iteration: 62565
loss: 0.9818613529205322,grad_norm: 0.9999990291967464, iteration: 62566
loss: 1.0208216905593872,grad_norm: 0.9999992752416539, iteration: 62567
loss: 1.0081571340560913,grad_norm: 0.9374020630459036, iteration: 62568
loss: 1.0094083547592163,grad_norm: 0.999999091537947, iteration: 62569
loss: 0.9889739155769348,grad_norm: 0.9999989556499391, iteration: 62570
loss: 1.0722222328186035,grad_norm: 0.9999991072081075, iteration: 62571
loss: 1.0000959634780884,grad_norm: 0.9999999273680962, iteration: 62572
loss: 1.0286821126937866,grad_norm: 0.9999998664175577, iteration: 62573
loss: 1.0304367542266846,grad_norm: 0.9686736667926452, iteration: 62574
loss: 0.9801442623138428,grad_norm: 0.9999992389683119, iteration: 62575
loss: 1.0077621936798096,grad_norm: 0.9702982352456087, iteration: 62576
loss: 0.9685115218162537,grad_norm: 0.9999992710180616, iteration: 62577
loss: 0.9922359585762024,grad_norm: 0.9999991194394132, iteration: 62578
loss: 1.0109610557556152,grad_norm: 0.9999991408221787, iteration: 62579
loss: 1.0192826986312866,grad_norm: 0.9999994035288909, iteration: 62580
loss: 1.0068916082382202,grad_norm: 0.9999995187618035, iteration: 62581
loss: 1.0023317337036133,grad_norm: 0.9999991934927149, iteration: 62582
loss: 0.975231945514679,grad_norm: 0.9999990020834495, iteration: 62583
loss: 0.9905100464820862,grad_norm: 0.9999990856421594, iteration: 62584
loss: 1.0223040580749512,grad_norm: 0.9999989970666299, iteration: 62585
loss: 1.019118309020996,grad_norm: 0.9999994452703981, iteration: 62586
loss: 1.0410802364349365,grad_norm: 0.9999992016918136, iteration: 62587
loss: 1.0199189186096191,grad_norm: 0.9999992619363374, iteration: 62588
loss: 0.9971730709075928,grad_norm: 0.9999992017917003, iteration: 62589
loss: 0.9875227808952332,grad_norm: 0.9875697172350558, iteration: 62590
loss: 1.0117452144622803,grad_norm: 0.9999992452804229, iteration: 62591
loss: 0.9894111156463623,grad_norm: 0.9999992823210732, iteration: 62592
loss: 1.0107656717300415,grad_norm: 0.913007481010101, iteration: 62593
loss: 1.007623553276062,grad_norm: 0.9999990959170775, iteration: 62594
loss: 1.0394541025161743,grad_norm: 0.9999993516421248, iteration: 62595
loss: 1.0119593143463135,grad_norm: 0.9999994109233683, iteration: 62596
loss: 1.0066430568695068,grad_norm: 0.9999995774810393, iteration: 62597
loss: 1.0616555213928223,grad_norm: 0.9999992669844403, iteration: 62598
loss: 0.9744120836257935,grad_norm: 0.9999993865709419, iteration: 62599
loss: 1.0086613893508911,grad_norm: 0.999998934571117, iteration: 62600
loss: 1.0425095558166504,grad_norm: 0.9999993482107887, iteration: 62601
loss: 1.032354712486267,grad_norm: 0.9282562413235342, iteration: 62602
loss: 1.0056357383728027,grad_norm: 0.9999992223187005, iteration: 62603
loss: 1.0027254819869995,grad_norm: 0.9884735288101703, iteration: 62604
loss: 0.981900155544281,grad_norm: 0.999999258348875, iteration: 62605
loss: 0.9695373773574829,grad_norm: 0.9423763834833536, iteration: 62606
loss: 1.0102019309997559,grad_norm: 0.9999992722754205, iteration: 62607
loss: 1.015068769454956,grad_norm: 0.9817347293008527, iteration: 62608
loss: 0.9935760498046875,grad_norm: 0.9999991092377489, iteration: 62609
loss: 1.0594942569732666,grad_norm: 0.9999991773960102, iteration: 62610
loss: 1.022500991821289,grad_norm: 0.8941616785262113, iteration: 62611
loss: 1.0582966804504395,grad_norm: 0.9999996087714917, iteration: 62612
loss: 1.0060447454452515,grad_norm: 0.9999991091732882, iteration: 62613
loss: 1.008121371269226,grad_norm: 0.9999990614870278, iteration: 62614
loss: 1.0003622770309448,grad_norm: 0.927725974735313, iteration: 62615
loss: 0.9913983941078186,grad_norm: 0.9999990271273121, iteration: 62616
loss: 1.0010024309158325,grad_norm: 0.8566003336659039, iteration: 62617
loss: 1.021946907043457,grad_norm: 0.9999991472388476, iteration: 62618
loss: 1.0096409320831299,grad_norm: 0.8303053150926072, iteration: 62619
loss: 0.9844846129417419,grad_norm: 0.9999993169244745, iteration: 62620
loss: 1.0005221366882324,grad_norm: 0.8469891590643728, iteration: 62621
loss: 0.99887615442276,grad_norm: 0.9999990950369557, iteration: 62622
loss: 1.0495116710662842,grad_norm: 0.999999524916602, iteration: 62623
loss: 1.0115998983383179,grad_norm: 0.9999991884480712, iteration: 62624
loss: 0.9629529118537903,grad_norm: 0.9999991221882071, iteration: 62625
loss: 1.0096526145935059,grad_norm: 0.9999998626853412, iteration: 62626
loss: 0.9971660375595093,grad_norm: 0.9204870960544397, iteration: 62627
loss: 0.9986916184425354,grad_norm: 0.8886437763156976, iteration: 62628
loss: 1.0142486095428467,grad_norm: 0.9999994657665627, iteration: 62629
loss: 1.018771767616272,grad_norm: 0.9970738346319032, iteration: 62630
loss: 0.9745329022407532,grad_norm: 0.9999992149641154, iteration: 62631
loss: 1.0205928087234497,grad_norm: 0.9999991341167801, iteration: 62632
loss: 0.9639495611190796,grad_norm: 0.9999991456350691, iteration: 62633
loss: 1.0160750150680542,grad_norm: 0.9999993373758839, iteration: 62634
loss: 1.0180504322052002,grad_norm: 0.9800371764568329, iteration: 62635
loss: 0.9696014523506165,grad_norm: 0.8516297744382463, iteration: 62636
loss: 0.9728293418884277,grad_norm: 0.9660056924660316, iteration: 62637
loss: 1.0283063650131226,grad_norm: 0.9713120784062469, iteration: 62638
loss: 0.9837167859077454,grad_norm: 0.9999989968876051, iteration: 62639
loss: 1.0458546876907349,grad_norm: 0.9999992347982533, iteration: 62640
loss: 1.027360200881958,grad_norm: 0.9999991500087152, iteration: 62641
loss: 0.9945140480995178,grad_norm: 0.8560561071069499, iteration: 62642
loss: 1.0448558330535889,grad_norm: 0.9355037817480494, iteration: 62643
loss: 0.9853736758232117,grad_norm: 0.9999990984670875, iteration: 62644
loss: 0.9816354513168335,grad_norm: 0.9999992065769582, iteration: 62645
loss: 1.004271388053894,grad_norm: 0.8739755114749582, iteration: 62646
loss: 1.030601143836975,grad_norm: 0.999999140203516, iteration: 62647
loss: 0.9922396540641785,grad_norm: 0.8944797817350459, iteration: 62648
loss: 0.9980116486549377,grad_norm: 0.9999991777876737, iteration: 62649
loss: 0.9947829842567444,grad_norm: 0.965887183265011, iteration: 62650
loss: 0.9888669848442078,grad_norm: 0.9999993288869943, iteration: 62651
loss: 1.0827888250350952,grad_norm: 0.9999997376782398, iteration: 62652
loss: 0.9918999671936035,grad_norm: 0.9653860734602846, iteration: 62653
loss: 1.0755314826965332,grad_norm: 0.999999364563758, iteration: 62654
loss: 0.976849377155304,grad_norm: 0.999999289018626, iteration: 62655
loss: 1.0115338563919067,grad_norm: 0.8517900549006956, iteration: 62656
loss: 1.032765507698059,grad_norm: 0.9999990284760771, iteration: 62657
loss: 1.0054478645324707,grad_norm: 0.999999162679543, iteration: 62658
loss: 0.9848334789276123,grad_norm: 0.9889841550077785, iteration: 62659
loss: 0.9890882968902588,grad_norm: 0.9999989196467457, iteration: 62660
loss: 0.9601702690124512,grad_norm: 0.8855715759755858, iteration: 62661
loss: 0.9972978830337524,grad_norm: 0.864707823873159, iteration: 62662
loss: 0.983044445514679,grad_norm: 0.999998997338116, iteration: 62663
loss: 1.043127417564392,grad_norm: 0.9999993236103577, iteration: 62664
loss: 0.9871312379837036,grad_norm: 0.9999991062678008, iteration: 62665
loss: 1.0126110315322876,grad_norm: 0.9999990909664453, iteration: 62666
loss: 0.9719321727752686,grad_norm: 0.8967917672112434, iteration: 62667
loss: 0.9990560412406921,grad_norm: 0.9999991146209612, iteration: 62668
loss: 1.0284122228622437,grad_norm: 0.9461996241665134, iteration: 62669
loss: 1.0023800134658813,grad_norm: 0.9999991295158984, iteration: 62670
loss: 0.9912978410720825,grad_norm: 0.9415813250545179, iteration: 62671
loss: 1.009708285331726,grad_norm: 0.9720879555112328, iteration: 62672
loss: 1.0580127239227295,grad_norm: 0.9999991993915488, iteration: 62673
loss: 1.0479888916015625,grad_norm: 0.9999992276223826, iteration: 62674
loss: 1.0017136335372925,grad_norm: 0.9999992688342191, iteration: 62675
loss: 0.9897200465202332,grad_norm: 0.9999994287222372, iteration: 62676
loss: 0.9929438233375549,grad_norm: 0.9999991067667904, iteration: 62677
loss: 0.9892256259918213,grad_norm: 0.9999992006577871, iteration: 62678
loss: 0.9696570038795471,grad_norm: 0.9999992536590382, iteration: 62679
loss: 1.0458564758300781,grad_norm: 0.9999990354598632, iteration: 62680
loss: 1.0127800703048706,grad_norm: 0.9999992143890029, iteration: 62681
loss: 0.9998030662536621,grad_norm: 0.9999990091169767, iteration: 62682
loss: 1.0028319358825684,grad_norm: 0.9999990748074733, iteration: 62683
loss: 1.0146515369415283,grad_norm: 0.9999993314459934, iteration: 62684
loss: 1.060037612915039,grad_norm: 0.9999998298950799, iteration: 62685
loss: 0.98271244764328,grad_norm: 0.9999992164851401, iteration: 62686
loss: 1.0119627714157104,grad_norm: 0.9999989105540628, iteration: 62687
loss: 0.981412410736084,grad_norm: 0.8951166941641111, iteration: 62688
loss: 1.0060765743255615,grad_norm: 0.9731809288283685, iteration: 62689
loss: 0.996726393699646,grad_norm: 0.9999993802647424, iteration: 62690
loss: 1.0072736740112305,grad_norm: 0.9999991121671421, iteration: 62691
loss: 0.9859302639961243,grad_norm: 0.9999990520511736, iteration: 62692
loss: 0.996489942073822,grad_norm: 0.9999993384526255, iteration: 62693
loss: 1.0235366821289062,grad_norm: 0.9999991687462476, iteration: 62694
loss: 1.031584620475769,grad_norm: 0.9999991217565563, iteration: 62695
loss: 1.0222769975662231,grad_norm: 0.9999991450348099, iteration: 62696
loss: 1.016534447669983,grad_norm: 0.9999996836449838, iteration: 62697
loss: 0.9982630014419556,grad_norm: 0.9999989437012134, iteration: 62698
loss: 0.9980742335319519,grad_norm: 0.9999991921421383, iteration: 62699
loss: 1.02776300907135,grad_norm: 0.9999991333249354, iteration: 62700
loss: 1.004092812538147,grad_norm: 0.9252183912717088, iteration: 62701
loss: 1.0007655620574951,grad_norm: 0.9999990391223048, iteration: 62702
loss: 0.9835277199745178,grad_norm: 0.9999993213926245, iteration: 62703
loss: 1.0178605318069458,grad_norm: 0.9999997668411527, iteration: 62704
loss: 0.9794796109199524,grad_norm: 0.8881778276033158, iteration: 62705
loss: 1.0378268957138062,grad_norm: 0.9999989454412004, iteration: 62706
loss: 0.9857645034790039,grad_norm: 0.999999100677781, iteration: 62707
loss: 1.0189151763916016,grad_norm: 0.8852162371043106, iteration: 62708
loss: 1.007508397102356,grad_norm: 0.999999259735491, iteration: 62709
loss: 0.9966139793395996,grad_norm: 0.9999991819900517, iteration: 62710
loss: 0.9737377166748047,grad_norm: 0.9999997639338182, iteration: 62711
loss: 0.9886449575424194,grad_norm: 0.9999992084627517, iteration: 62712
loss: 1.0025702714920044,grad_norm: 0.883265839687229, iteration: 62713
loss: 1.0313036441802979,grad_norm: 0.9999990774634071, iteration: 62714
loss: 1.0105714797973633,grad_norm: 0.9999991877475134, iteration: 62715
loss: 1.0117648839950562,grad_norm: 0.8701127268667362, iteration: 62716
loss: 0.9983225464820862,grad_norm: 0.8596121975706058, iteration: 62717
loss: 0.9565075635910034,grad_norm: 0.9365063100805918, iteration: 62718
loss: 0.9434431195259094,grad_norm: 0.9999990705385507, iteration: 62719
loss: 0.9913477301597595,grad_norm: 0.9232751565867025, iteration: 62720
loss: 0.9912978410720825,grad_norm: 0.9999991112002937, iteration: 62721
loss: 0.9954931735992432,grad_norm: 0.999999011366722, iteration: 62722
loss: 1.0440706014633179,grad_norm: 0.9999991367487449, iteration: 62723
loss: 0.9842779040336609,grad_norm: 0.8956355586666801, iteration: 62724
loss: 1.0253658294677734,grad_norm: 0.999999289380238, iteration: 62725
loss: 0.9913508892059326,grad_norm: 0.9867523926945728, iteration: 62726
loss: 0.9915181398391724,grad_norm: 0.9903738113502859, iteration: 62727
loss: 0.9929270148277283,grad_norm: 0.9999990528750616, iteration: 62728
loss: 1.0215061902999878,grad_norm: 0.9999992331357008, iteration: 62729
loss: 1.0228875875473022,grad_norm: 0.9999990777381074, iteration: 62730
loss: 1.005878210067749,grad_norm: 0.9999991277656526, iteration: 62731
loss: 1.0266051292419434,grad_norm: 0.973895238899662, iteration: 62732
loss: 1.0151807069778442,grad_norm: 0.9999991247292257, iteration: 62733
loss: 1.0036132335662842,grad_norm: 0.9720995497671581, iteration: 62734
loss: 0.9992042183876038,grad_norm: 0.9999991616917229, iteration: 62735
loss: 0.9918146729469299,grad_norm: 0.9999990557427594, iteration: 62736
loss: 1.0273634195327759,grad_norm: 0.9638791593650425, iteration: 62737
loss: 1.0152924060821533,grad_norm: 0.9999991171685687, iteration: 62738
loss: 1.0103857517242432,grad_norm: 0.9245014189259582, iteration: 62739
loss: 1.0612019300460815,grad_norm: 0.9999999091902494, iteration: 62740
loss: 1.0083168745040894,grad_norm: 0.9999991750650173, iteration: 62741
loss: 1.0322149991989136,grad_norm: 0.9999991912332444, iteration: 62742
loss: 0.9811788201332092,grad_norm: 0.9999996891065723, iteration: 62743
loss: 1.0207805633544922,grad_norm: 0.999999232020226, iteration: 62744
loss: 0.977946937084198,grad_norm: 0.9999991616666319, iteration: 62745
loss: 1.016486644744873,grad_norm: 0.999999133467464, iteration: 62746
loss: 1.0579674243927002,grad_norm: 0.9999991303980311, iteration: 62747
loss: 0.9605075120925903,grad_norm: 0.9999991747109208, iteration: 62748
loss: 0.9788870811462402,grad_norm: 0.9999990209308455, iteration: 62749
loss: 1.0064564943313599,grad_norm: 0.9999990806033091, iteration: 62750
loss: 0.9983761310577393,grad_norm: 0.9999991466260078, iteration: 62751
loss: 0.9990987181663513,grad_norm: 0.9999990454870233, iteration: 62752
loss: 1.0608993768692017,grad_norm: 0.9999993316516754, iteration: 62753
loss: 0.9907299280166626,grad_norm: 0.822142045501427, iteration: 62754
loss: 0.9766980409622192,grad_norm: 0.9999991776187842, iteration: 62755
loss: 1.0201382637023926,grad_norm: 0.9335445298992538, iteration: 62756
loss: 1.006429672241211,grad_norm: 0.9999992044529348, iteration: 62757
loss: 1.0028842687606812,grad_norm: 0.9021504670826909, iteration: 62758
loss: 1.017411231994629,grad_norm: 0.9999991186177695, iteration: 62759
loss: 0.9972555637359619,grad_norm: 0.9999989650477418, iteration: 62760
loss: 1.0111958980560303,grad_norm: 0.999999666953513, iteration: 62761
loss: 1.0101817846298218,grad_norm: 0.9999992103338866, iteration: 62762
loss: 1.0095205307006836,grad_norm: 0.9999992434188191, iteration: 62763
loss: 0.9816356301307678,grad_norm: 0.9999990592838635, iteration: 62764
loss: 1.0154224634170532,grad_norm: 0.9600209465192799, iteration: 62765
loss: 1.0000712871551514,grad_norm: 0.9999991462715199, iteration: 62766
loss: 0.9745212197303772,grad_norm: 0.9999990185095299, iteration: 62767
loss: 1.0079352855682373,grad_norm: 0.9999997578502802, iteration: 62768
loss: 0.9904850125312805,grad_norm: 0.9999990556017333, iteration: 62769
loss: 0.9835036993026733,grad_norm: 0.9315859758312257, iteration: 62770
loss: 0.992159903049469,grad_norm: 0.957508069843086, iteration: 62771
loss: 1.0347470045089722,grad_norm: 0.9999994607178987, iteration: 62772
loss: 1.0239129066467285,grad_norm: 0.9999991885715346, iteration: 62773
loss: 0.981285810470581,grad_norm: 0.878935489293712, iteration: 62774
loss: 1.0007867813110352,grad_norm: 0.9999990566726401, iteration: 62775
loss: 1.0495455265045166,grad_norm: 0.9999991181935467, iteration: 62776
loss: 1.0013701915740967,grad_norm: 0.9908270195521398, iteration: 62777
loss: 0.9851304292678833,grad_norm: 0.9999990293033387, iteration: 62778
loss: 1.0149493217468262,grad_norm: 0.9999994878772811, iteration: 62779
loss: 1.0034235715866089,grad_norm: 0.9999990566690642, iteration: 62780
loss: 0.9956526756286621,grad_norm: 0.9999990851002929, iteration: 62781
loss: 1.022429347038269,grad_norm: 0.999999151954067, iteration: 62782
loss: 0.9846020340919495,grad_norm: 0.9999990384301741, iteration: 62783
loss: 0.9809167981147766,grad_norm: 0.999999122293365, iteration: 62784
loss: 0.9808429479598999,grad_norm: 0.8922964348135886, iteration: 62785
loss: 0.9722353219985962,grad_norm: 0.9672000889982765, iteration: 62786
loss: 1.0253177881240845,grad_norm: 0.9999991988762333, iteration: 62787
loss: 0.9788363575935364,grad_norm: 0.8929144481974641, iteration: 62788
loss: 0.993845522403717,grad_norm: 0.9982049029721942, iteration: 62789
loss: 0.9921941161155701,grad_norm: 0.999999139473856, iteration: 62790
loss: 0.9895750284194946,grad_norm: 0.9716707853067518, iteration: 62791
loss: 0.9977647662162781,grad_norm: 0.9984535407930286, iteration: 62792
loss: 1.0598989725112915,grad_norm: 0.999999395338797, iteration: 62793
loss: 0.9943082928657532,grad_norm: 0.9027002092820171, iteration: 62794
loss: 1.0028297901153564,grad_norm: 0.9999989679456143, iteration: 62795
loss: 1.0221295356750488,grad_norm: 0.9999992333117954, iteration: 62796
loss: 1.0041428804397583,grad_norm: 0.9027740314668802, iteration: 62797
loss: 1.0081062316894531,grad_norm: 0.9210466711309793, iteration: 62798
loss: 1.0179638862609863,grad_norm: 0.9999990063978561, iteration: 62799
loss: 1.0528831481933594,grad_norm: 0.9999991166730889, iteration: 62800
loss: 1.0046380758285522,grad_norm: 0.9999997264169214, iteration: 62801
loss: 1.002416729927063,grad_norm: 0.9999989652290504, iteration: 62802
loss: 0.9977900385856628,grad_norm: 0.9299017843420315, iteration: 62803
loss: 0.9817678928375244,grad_norm: 0.9409304302412981, iteration: 62804
loss: 0.9533087015151978,grad_norm: 0.9999991295042238, iteration: 62805
loss: 1.011555790901184,grad_norm: 0.9999994980593118, iteration: 62806
loss: 1.0021909475326538,grad_norm: 0.9999989671973597, iteration: 62807
loss: 0.9678245782852173,grad_norm: 0.9999992118255662, iteration: 62808
loss: 0.9814279079437256,grad_norm: 0.934783346316247, iteration: 62809
loss: 0.9754716157913208,grad_norm: 0.9999992797350384, iteration: 62810
loss: 1.0162602663040161,grad_norm: 0.9838445339703318, iteration: 62811
loss: 0.9703535437583923,grad_norm: 0.9999992005046936, iteration: 62812
loss: 0.9933739304542542,grad_norm: 0.9999993371055399, iteration: 62813
loss: 0.9731789827346802,grad_norm: 0.9031819580660124, iteration: 62814
loss: 0.9801813364028931,grad_norm: 0.8798799204054848, iteration: 62815
loss: 1.00019371509552,grad_norm: 0.9999991809521102, iteration: 62816
loss: 1.0051413774490356,grad_norm: 0.9999991922695038, iteration: 62817
loss: 0.9819082617759705,grad_norm: 0.8309020485200874, iteration: 62818
loss: 0.9843024611473083,grad_norm: 0.9999991522953846, iteration: 62819
loss: 1.0123183727264404,grad_norm: 0.9999993917781596, iteration: 62820
loss: 0.9956602454185486,grad_norm: 0.9999990319357146, iteration: 62821
loss: 0.9819914102554321,grad_norm: 0.9782739047063963, iteration: 62822
loss: 0.9916625022888184,grad_norm: 0.9999991544451633, iteration: 62823
loss: 0.9726507067680359,grad_norm: 0.9224463667605568, iteration: 62824
loss: 0.9861323237419128,grad_norm: 0.8046134590786302, iteration: 62825
loss: 0.9701886773109436,grad_norm: 0.9998654040269248, iteration: 62826
loss: 0.9948461651802063,grad_norm: 0.9722720434458332, iteration: 62827
loss: 1.0907166004180908,grad_norm: 0.9999992455177634, iteration: 62828
loss: 1.091822624206543,grad_norm: 0.9999997123545733, iteration: 62829
loss: 1.0142168998718262,grad_norm: 0.746345036897725, iteration: 62830
loss: 1.0509299039840698,grad_norm: 0.999999304873678, iteration: 62831
loss: 1.0077348947525024,grad_norm: 0.9999991525759366, iteration: 62832
loss: 1.0509521961212158,grad_norm: 0.9663396102295608, iteration: 62833
loss: 1.031746745109558,grad_norm: 0.9999987653337667, iteration: 62834
loss: 1.0121082067489624,grad_norm: 0.8682882274297188, iteration: 62835
loss: 0.9629479050636292,grad_norm: 0.9973155789657479, iteration: 62836
loss: 0.9440218210220337,grad_norm: 0.9999992081494757, iteration: 62837
loss: 1.0039758682250977,grad_norm: 0.8053050650645567, iteration: 62838
loss: 1.0102282762527466,grad_norm: 0.9999990392643258, iteration: 62839
loss: 1.0430567264556885,grad_norm: 0.9999991951144537, iteration: 62840
loss: 0.9717026948928833,grad_norm: 0.9999990614080164, iteration: 62841
loss: 1.0088300704956055,grad_norm: 0.9999990657307218, iteration: 62842
loss: 1.0187668800354004,grad_norm: 0.9999991557919783, iteration: 62843
loss: 1.0243867635726929,grad_norm: 0.9999998131346827, iteration: 62844
loss: 1.0217863321304321,grad_norm: 0.9999990289011279, iteration: 62845
loss: 0.9922817349433899,grad_norm: 0.9999992441530481, iteration: 62846
loss: 1.028057336807251,grad_norm: 0.9527394990583777, iteration: 62847
loss: 1.004994511604309,grad_norm: 0.9999991019399008, iteration: 62848
loss: 0.9797854423522949,grad_norm: 0.9472859803013588, iteration: 62849
loss: 0.9801006317138672,grad_norm: 0.8429302700988229, iteration: 62850
loss: 0.9867555499076843,grad_norm: 0.9999992799024032, iteration: 62851
loss: 0.9856139421463013,grad_norm: 0.9632797403528255, iteration: 62852
loss: 1.0283772945404053,grad_norm: 0.9999996479674049, iteration: 62853
loss: 1.0101197957992554,grad_norm: 0.9999991765332481, iteration: 62854
loss: 1.0857267379760742,grad_norm: 0.9999992296814733, iteration: 62855
loss: 1.0217912197113037,grad_norm: 0.9336050204759359, iteration: 62856
loss: 1.0178370475769043,grad_norm: 0.9999990174034902, iteration: 62857
loss: 1.0068747997283936,grad_norm: 0.9999990337636996, iteration: 62858
loss: 0.9985854625701904,grad_norm: 0.9224980067279598, iteration: 62859
loss: 0.9795026779174805,grad_norm: 0.8966096777174927, iteration: 62860
loss: 0.9828755855560303,grad_norm: 0.9781074391612334, iteration: 62861
loss: 1.0013498067855835,grad_norm: 0.936906758301036, iteration: 62862
loss: 0.9840999841690063,grad_norm: 0.9999990825156331, iteration: 62863
loss: 0.9562548398971558,grad_norm: 0.8375288704708076, iteration: 62864
loss: 1.029916524887085,grad_norm: 0.8584607589757044, iteration: 62865
loss: 1.0227954387664795,grad_norm: 0.9999992629033562, iteration: 62866
loss: 1.047532320022583,grad_norm: 0.9999990307641456, iteration: 62867
loss: 1.0004445314407349,grad_norm: 0.8314074318129288, iteration: 62868
loss: 0.9958163499832153,grad_norm: 0.9999992427704213, iteration: 62869
loss: 0.978829026222229,grad_norm: 0.999999043377489, iteration: 62870
loss: 1.065111517906189,grad_norm: 0.9999999305409315, iteration: 62871
loss: 1.036490797996521,grad_norm: 0.9999992547087903, iteration: 62872
loss: 0.9709205031394958,grad_norm: 0.9999990776830541, iteration: 62873
loss: 1.0197399854660034,grad_norm: 0.9999994959244303, iteration: 62874
loss: 1.0072696208953857,grad_norm: 0.9999992226680717, iteration: 62875
loss: 0.9883216619491577,grad_norm: 0.9999992805449062, iteration: 62876
loss: 1.0090644359588623,grad_norm: 0.9999991281617053, iteration: 62877
loss: 0.9922777414321899,grad_norm: 0.9999697128186914, iteration: 62878
loss: 0.9978206157684326,grad_norm: 0.999999044184307, iteration: 62879
loss: 0.9774435758590698,grad_norm: 0.9999990577267917, iteration: 62880
loss: 0.9593842625617981,grad_norm: 0.9785469807171712, iteration: 62881
loss: 0.9940881729125977,grad_norm: 0.9626003515526264, iteration: 62882
loss: 0.9901984333992004,grad_norm: 0.8961534336017118, iteration: 62883
loss: 1.0113918781280518,grad_norm: 0.9999996039765245, iteration: 62884
loss: 0.9894939064979553,grad_norm: 0.999998936859638, iteration: 62885
loss: 1.0096391439437866,grad_norm: 0.9999990862865478, iteration: 62886
loss: 1.0009948015213013,grad_norm: 0.9658409935189886, iteration: 62887
loss: 1.0549778938293457,grad_norm: 0.9471209687177032, iteration: 62888
loss: 1.0293878316879272,grad_norm: 0.9999991055893571, iteration: 62889
loss: 0.9835246205329895,grad_norm: 0.9999990182846827, iteration: 62890
loss: 0.9876669645309448,grad_norm: 0.9999990766897899, iteration: 62891
loss: 0.9794560670852661,grad_norm: 0.9999991646875922, iteration: 62892
loss: 0.9860583543777466,grad_norm: 0.9532038905109484, iteration: 62893
loss: 0.971900463104248,grad_norm: 0.8787704101305533, iteration: 62894
loss: 0.9837399125099182,grad_norm: 0.9999992507713582, iteration: 62895
loss: 1.0161418914794922,grad_norm: 0.999999223560383, iteration: 62896
loss: 0.98753821849823,grad_norm: 0.9999990021867243, iteration: 62897
loss: 1.025542974472046,grad_norm: 0.9999992794782174, iteration: 62898
loss: 1.0003949403762817,grad_norm: 0.9999990421780046, iteration: 62899
loss: 1.049586534500122,grad_norm: 0.9999993356706165, iteration: 62900
loss: 0.9937334656715393,grad_norm: 0.89091666048204, iteration: 62901
loss: 1.0029492378234863,grad_norm: 0.9999990863991878, iteration: 62902
loss: 1.052649736404419,grad_norm: 0.9999994769874564, iteration: 62903
loss: 0.9778817892074585,grad_norm: 0.9749966527330335, iteration: 62904
loss: 0.9964205026626587,grad_norm: 0.9784865075136977, iteration: 62905
loss: 0.9529513120651245,grad_norm: 0.9999991537674225, iteration: 62906
loss: 1.0352025032043457,grad_norm: 0.9999991438162021, iteration: 62907
loss: 1.0253833532333374,grad_norm: 0.9868898841056873, iteration: 62908
loss: 0.9953479766845703,grad_norm: 0.9999993332316702, iteration: 62909
loss: 1.0055015087127686,grad_norm: 0.9999990503178003, iteration: 62910
loss: 1.0292279720306396,grad_norm: 0.9999992978361051, iteration: 62911
loss: 1.0017350912094116,grad_norm: 0.9999990688048633, iteration: 62912
loss: 1.0390172004699707,grad_norm: 0.9999992314486673, iteration: 62913
loss: 1.0115320682525635,grad_norm: 0.9661800740520623, iteration: 62914
loss: 1.0002490282058716,grad_norm: 0.999999092246647, iteration: 62915
loss: 0.9635125398635864,grad_norm: 0.9999993350956572, iteration: 62916
loss: 0.9956892132759094,grad_norm: 0.9999991793836274, iteration: 62917
loss: 1.0099430084228516,grad_norm: 0.9999997868259352, iteration: 62918
loss: 1.0134434700012207,grad_norm: 0.9725239257174114, iteration: 62919
loss: 1.0773468017578125,grad_norm: 0.9999998476135742, iteration: 62920
loss: 1.0008518695831299,grad_norm: 0.9001821323774644, iteration: 62921
loss: 0.9993389248847961,grad_norm: 0.999999169332835, iteration: 62922
loss: 0.9652952551841736,grad_norm: 0.9815740795331438, iteration: 62923
loss: 1.0133336782455444,grad_norm: 0.9999991039515761, iteration: 62924
loss: 1.0357897281646729,grad_norm: 0.9999991368202216, iteration: 62925
loss: 0.999262273311615,grad_norm: 0.9999992650653344, iteration: 62926
loss: 0.960582435131073,grad_norm: 0.9999989927012805, iteration: 62927
loss: 0.9851601123809814,grad_norm: 0.9999991074185214, iteration: 62928
loss: 0.9612820148468018,grad_norm: 0.9999990778306453, iteration: 62929
loss: 1.032957911491394,grad_norm: 0.999999266047094, iteration: 62930
loss: 1.001966118812561,grad_norm: 0.9506968307624266, iteration: 62931
loss: 0.9958804845809937,grad_norm: 0.9999992865383431, iteration: 62932
loss: 0.9990614652633667,grad_norm: 0.9999990742446613, iteration: 62933
loss: 0.9684440493583679,grad_norm: 0.998420957190447, iteration: 62934
loss: 1.0052045583724976,grad_norm: 0.9760651736776602, iteration: 62935
loss: 0.9835072159767151,grad_norm: 0.9562814494392857, iteration: 62936
loss: 1.0008769035339355,grad_norm: 0.9999989715380325, iteration: 62937
loss: 0.9958046078681946,grad_norm: 0.9259801118471521, iteration: 62938
loss: 0.9534643888473511,grad_norm: 0.999999033377672, iteration: 62939
loss: 1.0338330268859863,grad_norm: 0.999999286976974, iteration: 62940
loss: 0.977736234664917,grad_norm: 0.9999990892392984, iteration: 62941
loss: 0.9874805212020874,grad_norm: 0.9563171828187228, iteration: 62942
loss: 1.0032259225845337,grad_norm: 0.9999992089278074, iteration: 62943
loss: 1.0386631488800049,grad_norm: 0.9999990845986431, iteration: 62944
loss: 1.023392677307129,grad_norm: 0.8337198235597195, iteration: 62945
loss: 1.0067356824874878,grad_norm: 0.99999914004294, iteration: 62946
loss: 1.0074810981750488,grad_norm: 0.9999990504535256, iteration: 62947
loss: 0.9726447463035583,grad_norm: 0.9073622806995167, iteration: 62948
loss: 0.9998282790184021,grad_norm: 0.971356245057675, iteration: 62949
loss: 0.9816257953643799,grad_norm: 0.9266025162458592, iteration: 62950
loss: 1.0317317247390747,grad_norm: 0.999999299062024, iteration: 62951
loss: 1.0376888513565063,grad_norm: 0.9999992488943592, iteration: 62952
loss: 1.005307912826538,grad_norm: 0.9999992301541879, iteration: 62953
loss: 0.9891619086265564,grad_norm: 0.9999992242662546, iteration: 62954
loss: 1.0830196142196655,grad_norm: 0.9999998920354763, iteration: 62955
loss: 0.977647602558136,grad_norm: 0.9686356904295916, iteration: 62956
loss: 0.9767234921455383,grad_norm: 0.999998974903783, iteration: 62957
loss: 1.0333365201950073,grad_norm: 0.9999990487300382, iteration: 62958
loss: 0.998046338558197,grad_norm: 0.9999990680709522, iteration: 62959
loss: 0.9834162592887878,grad_norm: 0.999999342368817, iteration: 62960
loss: 0.9784566164016724,grad_norm: 0.9999991266573057, iteration: 62961
loss: 1.0184025764465332,grad_norm: 0.9999992657415772, iteration: 62962
loss: 0.9764770269393921,grad_norm: 0.9999991187565677, iteration: 62963
loss: 1.0252124071121216,grad_norm: 0.9999994820303189, iteration: 62964
loss: 1.0257022380828857,grad_norm: 0.9711854678988621, iteration: 62965
loss: 0.9658129215240479,grad_norm: 0.9999992096247726, iteration: 62966
loss: 1.0224741697311401,grad_norm: 0.9999990256397094, iteration: 62967
loss: 1.064492106437683,grad_norm: 0.999999416875393, iteration: 62968
loss: 1.0521893501281738,grad_norm: 0.9850890697605056, iteration: 62969
loss: 0.9584241509437561,grad_norm: 0.9644751717592178, iteration: 62970
loss: 0.9899405837059021,grad_norm: 0.9999991711565053, iteration: 62971
loss: 1.0304622650146484,grad_norm: 0.9999989814835369, iteration: 62972
loss: 1.0065240859985352,grad_norm: 0.9999991260593389, iteration: 62973
loss: 1.0241366624832153,grad_norm: 0.9999991714939712, iteration: 62974
loss: 1.0297634601593018,grad_norm: 0.9999993900899944, iteration: 62975
loss: 0.9917815923690796,grad_norm: 0.9615148099231303, iteration: 62976
loss: 0.996406614780426,grad_norm: 0.9596725173734011, iteration: 62977
loss: 0.9688868522644043,grad_norm: 0.9341037710026986, iteration: 62978
loss: 0.9590962529182434,grad_norm: 0.9720784391255295, iteration: 62979
loss: 0.9755979180335999,grad_norm: 0.9967948929817901, iteration: 62980
loss: 1.0151909589767456,grad_norm: 0.8713464135619645, iteration: 62981
loss: 1.0208449363708496,grad_norm: 0.9999990410699957, iteration: 62982
loss: 1.0549319982528687,grad_norm: 0.9999992026140778, iteration: 62983
loss: 1.0024535655975342,grad_norm: 0.9371859034942103, iteration: 62984
loss: 0.9789014458656311,grad_norm: 0.9999992232062552, iteration: 62985
loss: 1.0201903581619263,grad_norm: 0.9999993372535971, iteration: 62986
loss: 0.9955191016197205,grad_norm: 0.9334224611264579, iteration: 62987
loss: 1.0071130990982056,grad_norm: 0.9999992225194276, iteration: 62988
loss: 0.9773648381233215,grad_norm: 0.967593094515591, iteration: 62989
loss: 0.992197573184967,grad_norm: 0.9606122110711982, iteration: 62990
loss: 1.0049222707748413,grad_norm: 0.9625330250279742, iteration: 62991
loss: 0.9559643864631653,grad_norm: 0.8326096392445363, iteration: 62992
loss: 1.0226266384124756,grad_norm: 0.9999991325384592, iteration: 62993
loss: 1.012534260749817,grad_norm: 0.9629969424323829, iteration: 62994
loss: 1.027018666267395,grad_norm: 0.9731197890322589, iteration: 62995
loss: 1.0088142156600952,grad_norm: 0.9665186167872294, iteration: 62996
loss: 0.972750186920166,grad_norm: 0.9999992650282599, iteration: 62997
loss: 0.9781007766723633,grad_norm: 0.9999993093848569, iteration: 62998
loss: 1.0348278284072876,grad_norm: 0.9999991807342877, iteration: 62999
loss: 1.0197968482971191,grad_norm: 0.9001771738043277, iteration: 63000
loss: 1.004881739616394,grad_norm: 0.9999991656881679, iteration: 63001
loss: 0.9833860993385315,grad_norm: 0.9999990465421212, iteration: 63002
loss: 1.0368977785110474,grad_norm: 0.9999993761585055, iteration: 63003
loss: 0.9707139730453491,grad_norm: 0.9450546486942784, iteration: 63004
loss: 1.0189318656921387,grad_norm: 0.9999997582184778, iteration: 63005
loss: 0.9778761863708496,grad_norm: 0.9458477393013384, iteration: 63006
loss: 0.9810486435890198,grad_norm: 0.9931150441540869, iteration: 63007
loss: 0.9755691885948181,grad_norm: 0.9999990755601951, iteration: 63008
loss: 1.3455545902252197,grad_norm: 0.9999994162673421, iteration: 63009
loss: 0.9832016825675964,grad_norm: 0.9999997501084873, iteration: 63010
loss: 0.9741232991218567,grad_norm: 0.9740853123966134, iteration: 63011
loss: 0.9945013523101807,grad_norm: 0.9999991074651651, iteration: 63012
loss: 0.9976294636726379,grad_norm: 0.8705557755150287, iteration: 63013
loss: 1.0089668035507202,grad_norm: 0.9999994796054046, iteration: 63014
loss: 1.0285615921020508,grad_norm: 0.8418283758810025, iteration: 63015
loss: 1.0297939777374268,grad_norm: 0.9999992254455891, iteration: 63016
loss: 1.0205734968185425,grad_norm: 0.889455247218799, iteration: 63017
loss: 1.035230278968811,grad_norm: 0.9999991605974244, iteration: 63018
loss: 0.9673665761947632,grad_norm: 0.8863198621355779, iteration: 63019
loss: 1.108130693435669,grad_norm: 0.9999997723692227, iteration: 63020
loss: 0.9886019229888916,grad_norm: 0.922740725108829, iteration: 63021
loss: 0.9940097332000732,grad_norm: 0.9999990310595057, iteration: 63022
loss: 0.978385865688324,grad_norm: 0.9261046625687673, iteration: 63023
loss: 0.984122633934021,grad_norm: 0.9629577621589733, iteration: 63024
loss: 1.0110758543014526,grad_norm: 0.9999990435448487, iteration: 63025
loss: 1.0204023122787476,grad_norm: 0.9200796159836481, iteration: 63026
loss: 1.0043485164642334,grad_norm: 0.9999991426574684, iteration: 63027
loss: 0.9870613217353821,grad_norm: 0.9999995276427256, iteration: 63028
loss: 1.0844991207122803,grad_norm: 0.9999992635855717, iteration: 63029
loss: 0.984478771686554,grad_norm: 0.7991069232113074, iteration: 63030
loss: 1.0011510848999023,grad_norm: 0.8814566411956477, iteration: 63031
loss: 0.9833804368972778,grad_norm: 0.9999991835990434, iteration: 63032
loss: 1.006996989250183,grad_norm: 0.9999990568093328, iteration: 63033
loss: 0.9763748645782471,grad_norm: 0.9000773716627181, iteration: 63034
loss: 0.9783535003662109,grad_norm: 0.9999990856726045, iteration: 63035
loss: 0.9981048107147217,grad_norm: 0.9999991232422784, iteration: 63036
loss: 0.9893158078193665,grad_norm: 0.9999991158935851, iteration: 63037
loss: 1.0583089590072632,grad_norm: 0.999999774795692, iteration: 63038
loss: 0.9697095155715942,grad_norm: 0.9571885195709937, iteration: 63039
loss: 1.0003172159194946,grad_norm: 0.9999992489910371, iteration: 63040
loss: 0.9984790086746216,grad_norm: 0.9999991967249133, iteration: 63041
loss: 0.9841834306716919,grad_norm: 0.9999989937384594, iteration: 63042
loss: 0.9861453771591187,grad_norm: 0.9999989158284682, iteration: 63043
loss: 1.0049046277999878,grad_norm: 0.9999991909011139, iteration: 63044
loss: 1.0111855268478394,grad_norm: 0.9823376476519843, iteration: 63045
loss: 1.0078593492507935,grad_norm: 0.9999992602609026, iteration: 63046
loss: 0.996313214302063,grad_norm: 0.9522623647933797, iteration: 63047
loss: 1.010935664176941,grad_norm: 0.9999991200883799, iteration: 63048
loss: 1.0064358711242676,grad_norm: 0.999999014179786, iteration: 63049
loss: 0.9810952544212341,grad_norm: 0.9999991086765352, iteration: 63050
loss: 0.9597737193107605,grad_norm: 0.9999990739456676, iteration: 63051
loss: 0.9746518731117249,grad_norm: 0.9999990569773684, iteration: 63052
loss: 0.9731893539428711,grad_norm: 0.9999991391836055, iteration: 63053
loss: 1.0380167961120605,grad_norm: 0.8806109949240409, iteration: 63054
loss: 1.0078119039535522,grad_norm: 0.993630012684511, iteration: 63055
loss: 1.0272413492202759,grad_norm: 0.99999922923239, iteration: 63056
loss: 1.0598695278167725,grad_norm: 0.9999993008205935, iteration: 63057
loss: 1.0250669717788696,grad_norm: 0.9016734579964503, iteration: 63058
loss: 0.972653865814209,grad_norm: 0.9249771805568949, iteration: 63059
loss: 1.0008273124694824,grad_norm: 0.9865109146445573, iteration: 63060
loss: 0.989713728427887,grad_norm: 0.9717724326547985, iteration: 63061
loss: 1.0261603593826294,grad_norm: 0.999999033356669, iteration: 63062
loss: 1.005361795425415,grad_norm: 0.9999992932129467, iteration: 63063
loss: 1.046151041984558,grad_norm: 0.9948392765584588, iteration: 63064
loss: 0.9802817106246948,grad_norm: 0.9999990769625252, iteration: 63065
loss: 0.9917760491371155,grad_norm: 0.9999991550270289, iteration: 63066
loss: 1.023343563079834,grad_norm: 0.9999991823403322, iteration: 63067
loss: 0.9788787364959717,grad_norm: 0.9636608316177998, iteration: 63068
loss: 1.004296898841858,grad_norm: 0.928935578987724, iteration: 63069
loss: 1.0445184707641602,grad_norm: 0.9999991678346415, iteration: 63070
loss: 1.021690011024475,grad_norm: 0.9714753090015954, iteration: 63071
loss: 1.0045063495635986,grad_norm: 0.8394155899756769, iteration: 63072
loss: 1.0320274829864502,grad_norm: 0.9999992464253161, iteration: 63073
loss: 1.0556303262710571,grad_norm: 0.9910994234764836, iteration: 63074
loss: 1.0084505081176758,grad_norm: 0.999999091169159, iteration: 63075
loss: 1.007327914237976,grad_norm: 0.9266150656858679, iteration: 63076
loss: 0.9817176461219788,grad_norm: 0.9999990259934308, iteration: 63077
loss: 0.9774115085601807,grad_norm: 0.9999991582573973, iteration: 63078
loss: 1.0048648118972778,grad_norm: 0.9999991949245935, iteration: 63079
loss: 1.0005325078964233,grad_norm: 0.9776906236435744, iteration: 63080
loss: 1.0118001699447632,grad_norm: 0.8452806792202904, iteration: 63081
loss: 1.0132046937942505,grad_norm: 0.9999991583895874, iteration: 63082
loss: 0.9876453280448914,grad_norm: 0.9999990155054467, iteration: 63083
loss: 0.9766200184822083,grad_norm: 0.999999083361397, iteration: 63084
loss: 0.9935998916625977,grad_norm: 0.9847967688265536, iteration: 63085
loss: 0.9628163576126099,grad_norm: 0.9999990843358609, iteration: 63086
loss: 1.0425206422805786,grad_norm: 0.9999996448234051, iteration: 63087
loss: 0.9880216121673584,grad_norm: 0.9999991463499766, iteration: 63088
loss: 0.993312656879425,grad_norm: 0.9999991637006364, iteration: 63089
loss: 1.019354224205017,grad_norm: 0.9740588903779049, iteration: 63090
loss: 0.9819024205207825,grad_norm: 0.9999994983115343, iteration: 63091
loss: 1.0199421644210815,grad_norm: 0.9999992751883744, iteration: 63092
loss: 0.9865401983261108,grad_norm: 0.9999989785728172, iteration: 63093
loss: 1.0004748106002808,grad_norm: 0.9999989352669543, iteration: 63094
loss: 0.9969705939292908,grad_norm: 0.9787711753443434, iteration: 63095
loss: 1.0032787322998047,grad_norm: 0.964701728773969, iteration: 63096
loss: 1.008134126663208,grad_norm: 0.9465064722843028, iteration: 63097
loss: 1.0390560626983643,grad_norm: 0.9579176678152836, iteration: 63098
loss: 0.9767099022865295,grad_norm: 0.9999991304595968, iteration: 63099
loss: 1.0097668170928955,grad_norm: 0.9358156385352011, iteration: 63100
loss: 0.9836472272872925,grad_norm: 0.9999992605329672, iteration: 63101
loss: 1.0044840574264526,grad_norm: 0.9999990194838659, iteration: 63102
loss: 1.0035792589187622,grad_norm: 0.9550958880319833, iteration: 63103
loss: 0.9958031177520752,grad_norm: 0.9904535465884959, iteration: 63104
loss: 0.9376653432846069,grad_norm: 0.9999990638717496, iteration: 63105
loss: 0.9861602187156677,grad_norm: 0.9999990867926984, iteration: 63106
loss: 1.005811333656311,grad_norm: 0.9999989682704784, iteration: 63107
loss: 1.0483794212341309,grad_norm: 0.9999992038209603, iteration: 63108
loss: 1.0201287269592285,grad_norm: 0.999999237664119, iteration: 63109
loss: 0.973273754119873,grad_norm: 0.9258632016234956, iteration: 63110
loss: 1.0035690069198608,grad_norm: 0.999998986045374, iteration: 63111
loss: 0.9920588731765747,grad_norm: 0.9500494273539533, iteration: 63112
loss: 1.006950855255127,grad_norm: 0.999999161924891, iteration: 63113
loss: 1.0049787759780884,grad_norm: 0.8969580097515548, iteration: 63114
loss: 0.9778422117233276,grad_norm: 0.9202260140025647, iteration: 63115
loss: 1.011919379234314,grad_norm: 0.9999991844230982, iteration: 63116
loss: 1.0420962572097778,grad_norm: 0.9999998310974153, iteration: 63117
loss: 1.0246318578720093,grad_norm: 0.9999991127494733, iteration: 63118
loss: 1.011254906654358,grad_norm: 0.999999202798752, iteration: 63119
loss: 0.9920654296875,grad_norm: 0.9512459418536249, iteration: 63120
loss: 0.9745535850524902,grad_norm: 0.9999990015066486, iteration: 63121
loss: 0.9979707598686218,grad_norm: 0.9999997980850569, iteration: 63122
loss: 1.0290724039077759,grad_norm: 0.8541284912284615, iteration: 63123
loss: 0.980055570602417,grad_norm: 0.9999993760456128, iteration: 63124
loss: 0.9725344777107239,grad_norm: 0.999999062026489, iteration: 63125
loss: 0.9744670391082764,grad_norm: 0.9990254558923274, iteration: 63126
loss: 1.0377813577651978,grad_norm: 0.9999992490776134, iteration: 63127
loss: 0.9832904934883118,grad_norm: 0.9999991604059668, iteration: 63128
loss: 1.0417286157608032,grad_norm: 0.9999994351343486, iteration: 63129
loss: 1.042966365814209,grad_norm: 0.9999993226346575, iteration: 63130
loss: 1.0372309684753418,grad_norm: 0.9999992152368551, iteration: 63131
loss: 1.0232124328613281,grad_norm: 0.9825325265453908, iteration: 63132
loss: 0.9518774747848511,grad_norm: 0.8683031895723815, iteration: 63133
loss: 1.022939920425415,grad_norm: 0.9999991112835689, iteration: 63134
loss: 0.9850699305534363,grad_norm: 0.9999990050374005, iteration: 63135
loss: 0.9501769542694092,grad_norm: 0.9999991589571283, iteration: 63136
loss: 0.9928462505340576,grad_norm: 0.9749837816478243, iteration: 63137
loss: 0.9972503781318665,grad_norm: 0.9270991281332543, iteration: 63138
loss: 1.1385605335235596,grad_norm: 0.9999993061009254, iteration: 63139
loss: 1.0078126192092896,grad_norm: 0.9999990069663536, iteration: 63140
loss: 0.9914310574531555,grad_norm: 0.8881255570356016, iteration: 63141
loss: 0.9588446021080017,grad_norm: 0.9343785341493653, iteration: 63142
loss: 1.015230655670166,grad_norm: 0.999998986313479, iteration: 63143
loss: 0.9945003390312195,grad_norm: 0.9999991210312317, iteration: 63144
loss: 1.0124162435531616,grad_norm: 0.8477844250863877, iteration: 63145
loss: 0.9993222951889038,grad_norm: 0.9579050174371815, iteration: 63146
loss: 1.0028016567230225,grad_norm: 0.9614997500188371, iteration: 63147
loss: 1.0161439180374146,grad_norm: 0.9533766167508885, iteration: 63148
loss: 0.9761185646057129,grad_norm: 0.9999991891744268, iteration: 63149
loss: 1.0022872686386108,grad_norm: 0.999999807397316, iteration: 63150
loss: 1.0113685131072998,grad_norm: 0.9999993784622238, iteration: 63151
loss: 0.9762670397758484,grad_norm: 0.9999991198332844, iteration: 63152
loss: 0.9721111059188843,grad_norm: 0.9999991963631704, iteration: 63153
loss: 0.9797454476356506,grad_norm: 0.9999989764914563, iteration: 63154
loss: 1.0049620866775513,grad_norm: 0.9999990521686378, iteration: 63155
loss: 1.0206449031829834,grad_norm: 0.9999991813325149, iteration: 63156
loss: 1.0326199531555176,grad_norm: 0.999999054972699, iteration: 63157
loss: 1.0606858730316162,grad_norm: 0.999999565285753, iteration: 63158
loss: 0.9803236722946167,grad_norm: 0.9083577375819112, iteration: 63159
loss: 1.0197349786758423,grad_norm: 0.9999990283410354, iteration: 63160
loss: 0.9707895517349243,grad_norm: 0.9999990993359598, iteration: 63161
loss: 1.017042875289917,grad_norm: 0.9894234061208598, iteration: 63162
loss: 0.9966223835945129,grad_norm: 0.9999992307233847, iteration: 63163
loss: 1.0006541013717651,grad_norm: 0.999999128820289, iteration: 63164
loss: 1.0344791412353516,grad_norm: 0.9999991327932977, iteration: 63165
loss: 0.9815834760665894,grad_norm: 0.9437101399398438, iteration: 63166
loss: 1.0118293762207031,grad_norm: 0.9919143356192202, iteration: 63167
loss: 0.9626243710517883,grad_norm: 0.9999990938964305, iteration: 63168
loss: 1.0060869455337524,grad_norm: 0.9999991420461879, iteration: 63169
loss: 1.017164707183838,grad_norm: 0.939822749451977, iteration: 63170
loss: 1.0215506553649902,grad_norm: 0.9871703953284777, iteration: 63171
loss: 0.9886836409568787,grad_norm: 0.9729419299774673, iteration: 63172
loss: 0.9723743796348572,grad_norm: 0.9999991186104521, iteration: 63173
loss: 1.0002444982528687,grad_norm: 0.9999991287519217, iteration: 63174
loss: 1.0595431327819824,grad_norm: 0.9999991060430807, iteration: 63175
loss: 1.0740325450897217,grad_norm: 0.9999991545138533, iteration: 63176
loss: 1.0148581266403198,grad_norm: 0.909172953690058, iteration: 63177
loss: 1.009674072265625,grad_norm: 0.9999991593371277, iteration: 63178
loss: 1.0224486589431763,grad_norm: 0.9999992535115914, iteration: 63179
loss: 1.031736135482788,grad_norm: 0.9999991643053036, iteration: 63180
loss: 0.994073748588562,grad_norm: 0.9718595793376513, iteration: 63181
loss: 1.0020934343338013,grad_norm: 0.9999995548466246, iteration: 63182
loss: 1.0185670852661133,grad_norm: 0.8528253833818018, iteration: 63183
loss: 0.976047694683075,grad_norm: 0.8272725616313937, iteration: 63184
loss: 1.0244261026382446,grad_norm: 0.9999989948797302, iteration: 63185
loss: 1.0319100618362427,grad_norm: 0.999999058077026, iteration: 63186
loss: 0.9977546334266663,grad_norm: 0.9999990852648232, iteration: 63187
loss: 1.0065792798995972,grad_norm: 0.9999993399648938, iteration: 63188
loss: 1.03291916847229,grad_norm: 0.9999994759882054, iteration: 63189
loss: 0.9780164957046509,grad_norm: 0.9999991011481297, iteration: 63190
loss: 0.986079752445221,grad_norm: 0.9812415140261648, iteration: 63191
loss: 1.026867151260376,grad_norm: 0.9999990951066303, iteration: 63192
loss: 1.0220694541931152,grad_norm: 0.999999849023883, iteration: 63193
loss: 1.0027800798416138,grad_norm: 0.9999995358941387, iteration: 63194
loss: 0.996540904045105,grad_norm: 0.9999991969840762, iteration: 63195
loss: 1.010542631149292,grad_norm: 0.9999992804943106, iteration: 63196
loss: 1.0083593130111694,grad_norm: 0.9999991195897532, iteration: 63197
loss: 0.9807248115539551,grad_norm: 0.8009517872755977, iteration: 63198
loss: 1.0038217306137085,grad_norm: 0.9999992636590908, iteration: 63199
loss: 1.0356956720352173,grad_norm: 0.9999991562419277, iteration: 63200
loss: 0.9970071315765381,grad_norm: 0.9743345324084922, iteration: 63201
loss: 1.0010566711425781,grad_norm: 0.9483440052468799, iteration: 63202
loss: 0.9935808777809143,grad_norm: 0.999999131183585, iteration: 63203
loss: 1.0266873836517334,grad_norm: 0.9113648794937762, iteration: 63204
loss: 1.0412429571151733,grad_norm: 0.9214704214148997, iteration: 63205
loss: 0.9947271943092346,grad_norm: 0.9999990656911877, iteration: 63206
loss: 0.9934609532356262,grad_norm: 0.9999991250366188, iteration: 63207
loss: 0.9940899014472961,grad_norm: 0.9470175037088396, iteration: 63208
loss: 1.0011976957321167,grad_norm: 0.9999991763100007, iteration: 63209
loss: 1.016790509223938,grad_norm: 0.8775687974305093, iteration: 63210
loss: 1.0088140964508057,grad_norm: 0.9999991064726113, iteration: 63211
loss: 1.0143262147903442,grad_norm: 0.9999991580051264, iteration: 63212
loss: 0.9926536083221436,grad_norm: 0.9999991710617708, iteration: 63213
loss: 0.9938478469848633,grad_norm: 0.9999991518155399, iteration: 63214
loss: 1.0343475341796875,grad_norm: 0.9999990042231112, iteration: 63215
loss: 1.015164852142334,grad_norm: 0.9999995083262847, iteration: 63216
loss: 1.0067176818847656,grad_norm: 0.9999990969131547, iteration: 63217
loss: 0.9931290149688721,grad_norm: 0.8828607490872851, iteration: 63218
loss: 1.0421196222305298,grad_norm: 0.999998973723357, iteration: 63219
loss: 1.0181610584259033,grad_norm: 0.9999991775011979, iteration: 63220
loss: 1.0148658752441406,grad_norm: 0.9999989722872625, iteration: 63221
loss: 0.9912857413291931,grad_norm: 0.999999348383839, iteration: 63222
loss: 0.9974100589752197,grad_norm: 0.9865842823582589, iteration: 63223
loss: 1.010274887084961,grad_norm: 0.944104718337824, iteration: 63224
loss: 0.9888107180595398,grad_norm: 0.9999990776240134, iteration: 63225
loss: 1.016331672668457,grad_norm: 0.9999992958380564, iteration: 63226
loss: 0.9917957186698914,grad_norm: 0.9999993012912245, iteration: 63227
loss: 1.0082027912139893,grad_norm: 0.8753530636544102, iteration: 63228
loss: 1.02373468875885,grad_norm: 0.9999992316253575, iteration: 63229
loss: 0.9804444909095764,grad_norm: 0.9355401542654678, iteration: 63230
loss: 0.9983566403388977,grad_norm: 0.9592743765272442, iteration: 63231
loss: 1.0024652481079102,grad_norm: 0.9670121894046392, iteration: 63232
loss: 1.0307371616363525,grad_norm: 0.9662492655933164, iteration: 63233
loss: 0.9857708215713501,grad_norm: 0.9999991263025859, iteration: 63234
loss: 1.0055984258651733,grad_norm: 0.9999990567905168, iteration: 63235
loss: 0.9974502921104431,grad_norm: 0.9255354978735458, iteration: 63236
loss: 1.046494483947754,grad_norm: 0.9658660057465769, iteration: 63237
loss: 1.0008326768875122,grad_norm: 0.9999990843782232, iteration: 63238
loss: 1.0010443925857544,grad_norm: 0.9463375017706174, iteration: 63239
loss: 0.9782866835594177,grad_norm: 0.9921022778903357, iteration: 63240
loss: 0.9757254719734192,grad_norm: 0.9409707810738392, iteration: 63241
loss: 1.0131648778915405,grad_norm: 0.9999988595746568, iteration: 63242
loss: 0.9701122045516968,grad_norm: 0.9498860115522905, iteration: 63243
loss: 0.990614652633667,grad_norm: 0.9559276856010163, iteration: 63244
loss: 0.9966222643852234,grad_norm: 0.9109397920285006, iteration: 63245
loss: 0.9844171404838562,grad_norm: 0.9999990280085767, iteration: 63246
loss: 1.0122337341308594,grad_norm: 0.999999199475786, iteration: 63247
loss: 0.9933373332023621,grad_norm: 0.99999906337824, iteration: 63248
loss: 0.9679810404777527,grad_norm: 0.9237438750679781, iteration: 63249
loss: 1.0109009742736816,grad_norm: 0.8787561098929584, iteration: 63250
loss: 0.995364785194397,grad_norm: 0.999999122798909, iteration: 63251
loss: 1.035094976425171,grad_norm: 0.9999990734373496, iteration: 63252
loss: 1.031947135925293,grad_norm: 0.8835243285646387, iteration: 63253
loss: 1.0337623357772827,grad_norm: 0.9999991820681223, iteration: 63254
loss: 1.0182441473007202,grad_norm: 0.9999993799088926, iteration: 63255
loss: 1.0363608598709106,grad_norm: 0.9999989906295842, iteration: 63256
loss: 1.0109598636627197,grad_norm: 0.9999991788663459, iteration: 63257
loss: 1.0053590536117554,grad_norm: 0.9999989882232884, iteration: 63258
loss: 1.0484123229980469,grad_norm: 0.9027082927519141, iteration: 63259
loss: 0.975034236907959,grad_norm: 0.998875826205934, iteration: 63260
loss: 0.9829174280166626,grad_norm: 0.9657052952397003, iteration: 63261
loss: 0.9891189336776733,grad_norm: 0.9682526198541138, iteration: 63262
loss: 0.9972076416015625,grad_norm: 0.9999989566489763, iteration: 63263
loss: 0.9971544146537781,grad_norm: 0.9593036070896833, iteration: 63264
loss: 0.9864643812179565,grad_norm: 0.9999991497182725, iteration: 63265
loss: 1.0501312017440796,grad_norm: 0.9999990941520344, iteration: 63266
loss: 1.0032538175582886,grad_norm: 0.9945913280043327, iteration: 63267
loss: 0.9793987274169922,grad_norm: 0.9575427591449203, iteration: 63268
loss: 0.9903967380523682,grad_norm: 0.9566734363527081, iteration: 63269
loss: 1.016896367073059,grad_norm: 0.9999992301224707, iteration: 63270
loss: 1.014460802078247,grad_norm: 0.9605351078657248, iteration: 63271
loss: 0.9862117767333984,grad_norm: 0.9475140095102464, iteration: 63272
loss: 0.9833993911743164,grad_norm: 0.9999990989675336, iteration: 63273
loss: 1.0327129364013672,grad_norm: 0.9999991580488401, iteration: 63274
loss: 1.022219181060791,grad_norm: 0.9999990279116573, iteration: 63275
loss: 1.0062265396118164,grad_norm: 0.9999990583605537, iteration: 63276
loss: 1.0107008218765259,grad_norm: 0.9999991132797226, iteration: 63277
loss: 0.9956597089767456,grad_norm: 0.9999994955961378, iteration: 63278
loss: 0.9834079146385193,grad_norm: 0.9999991508490835, iteration: 63279
loss: 1.0005534887313843,grad_norm: 0.9564331374961883, iteration: 63280
loss: 1.0178951025009155,grad_norm: 0.8635549894909532, iteration: 63281
loss: 1.0028516054153442,grad_norm: 0.9999991722907593, iteration: 63282
loss: 1.0148723125457764,grad_norm: 0.9999990742823132, iteration: 63283
loss: 0.9856258630752563,grad_norm: 0.9999991654629498, iteration: 63284
loss: 0.9973717927932739,grad_norm: 0.9999989674252537, iteration: 63285
loss: 1.0136042833328247,grad_norm: 0.9049880249596783, iteration: 63286
loss: 1.0458709001541138,grad_norm: 0.9999991559031579, iteration: 63287
loss: 0.9973738193511963,grad_norm: 0.9572153586686261, iteration: 63288
loss: 0.9917805790901184,grad_norm: 0.9999990504387903, iteration: 63289
loss: 0.993829071521759,grad_norm: 0.9999991182478166, iteration: 63290
loss: 0.9523876309394836,grad_norm: 0.9210917658598198, iteration: 63291
loss: 0.9915295243263245,grad_norm: 0.9999991912382986, iteration: 63292
loss: 1.0159721374511719,grad_norm: 0.99999913802948, iteration: 63293
loss: 1.0064408779144287,grad_norm: 0.9480358191843685, iteration: 63294
loss: 1.0171810388565063,grad_norm: 0.9274854540857719, iteration: 63295
loss: 1.1005843877792358,grad_norm: 0.9999995299475793, iteration: 63296
loss: 1.025122046470642,grad_norm: 0.9999997904356029, iteration: 63297
loss: 0.973594069480896,grad_norm: 0.9999989633485681, iteration: 63298
loss: 1.018751621246338,grad_norm: 0.9999991198573257, iteration: 63299
loss: 1.0289291143417358,grad_norm: 0.9999990096902099, iteration: 63300
loss: 1.0168174505233765,grad_norm: 0.8933103729480452, iteration: 63301
loss: 0.969469428062439,grad_norm: 0.9231779247180711, iteration: 63302
loss: 0.9692635536193848,grad_norm: 0.9999991303526994, iteration: 63303
loss: 1.0165841579437256,grad_norm: 0.9994436154339115, iteration: 63304
loss: 0.9907101988792419,grad_norm: 0.9071270061855765, iteration: 63305
loss: 1.0502361059188843,grad_norm: 0.9999990795815707, iteration: 63306
loss: 1.0994601249694824,grad_norm: 0.9999995355033282, iteration: 63307
loss: 0.9989756345748901,grad_norm: 0.9999990737323253, iteration: 63308
loss: 1.0343000888824463,grad_norm: 0.9999997483147011, iteration: 63309
loss: 1.0281392335891724,grad_norm: 0.8783485217584975, iteration: 63310
loss: 1.0279699563980103,grad_norm: 0.9999991401084584, iteration: 63311
loss: 1.0332883596420288,grad_norm: 0.9912359866263799, iteration: 63312
loss: 1.0129364728927612,grad_norm: 0.9999998178252164, iteration: 63313
loss: 0.9871751666069031,grad_norm: 0.8735928911961329, iteration: 63314
loss: 0.9584754705429077,grad_norm: 0.9999992682150453, iteration: 63315
loss: 1.0233478546142578,grad_norm: 0.9999994888126579, iteration: 63316
loss: 1.0836492776870728,grad_norm: 0.9999992153642618, iteration: 63317
loss: 1.002577304840088,grad_norm: 0.9999991008250154, iteration: 63318
loss: 1.0076217651367188,grad_norm: 0.9999991313691464, iteration: 63319
loss: 1.0402547121047974,grad_norm: 0.9999996729213835, iteration: 63320
loss: 1.0120124816894531,grad_norm: 0.8976205549066824, iteration: 63321
loss: 1.0171962976455688,grad_norm: 0.999999602258991, iteration: 63322
loss: 1.0192898511886597,grad_norm: 0.8875012363095072, iteration: 63323
loss: 0.9926555752754211,grad_norm: 0.9999990916181216, iteration: 63324
loss: 0.9972935318946838,grad_norm: 0.9941228088391966, iteration: 63325
loss: 1.0717576742172241,grad_norm: 0.9999994484018836, iteration: 63326
loss: 0.9941897988319397,grad_norm: 0.9999990236842367, iteration: 63327
loss: 1.0046099424362183,grad_norm: 0.8329124602742458, iteration: 63328
loss: 0.9789418578147888,grad_norm: 0.9937364206653412, iteration: 63329
loss: 0.9844993948936462,grad_norm: 0.9189787640294328, iteration: 63330
loss: 1.024503231048584,grad_norm: 0.9999992420491756, iteration: 63331
loss: 1.0172231197357178,grad_norm: 0.9043503331713298, iteration: 63332
loss: 1.010941982269287,grad_norm: 0.9999991601567547, iteration: 63333
loss: 1.0105599164962769,grad_norm: 0.9999993736932163, iteration: 63334
loss: 1.0398454666137695,grad_norm: 0.999999084910254, iteration: 63335
loss: 1.0003674030303955,grad_norm: 0.9999992153514686, iteration: 63336
loss: 0.9957408905029297,grad_norm: 0.9999991093694622, iteration: 63337
loss: 0.9838638305664062,grad_norm: 0.9999989592243383, iteration: 63338
loss: 1.0177016258239746,grad_norm: 0.9999989554305005, iteration: 63339
loss: 0.9960710406303406,grad_norm: 0.9155021432596515, iteration: 63340
loss: 0.9926937222480774,grad_norm: 0.999999121981834, iteration: 63341
loss: 1.0262458324432373,grad_norm: 0.9999995123154412, iteration: 63342
loss: 1.0098323822021484,grad_norm: 0.836853418767494, iteration: 63343
loss: 1.0192523002624512,grad_norm: 0.9999992094141339, iteration: 63344
loss: 1.0151981115341187,grad_norm: 0.9999990171451523, iteration: 63345
loss: 0.9734375476837158,grad_norm: 0.9999989990627873, iteration: 63346
loss: 0.9893629550933838,grad_norm: 0.9999992209815766, iteration: 63347
loss: 0.999916136264801,grad_norm: 0.99999920610462, iteration: 63348
loss: 0.98562091588974,grad_norm: 0.9999990745907228, iteration: 63349
loss: 0.9718555212020874,grad_norm: 0.873523085923495, iteration: 63350
loss: 1.024787187576294,grad_norm: 0.9999991897534808, iteration: 63351
loss: 1.012801170349121,grad_norm: 0.9367917219633894, iteration: 63352
loss: 0.947361946105957,grad_norm: 0.9068166546762828, iteration: 63353
loss: 1.0171542167663574,grad_norm: 0.9999991506601651, iteration: 63354
loss: 0.9933155179023743,grad_norm: 0.9047838877237582, iteration: 63355
loss: 0.9904381036758423,grad_norm: 0.9502197557680359, iteration: 63356
loss: 0.9900010824203491,grad_norm: 0.9999994075620727, iteration: 63357
loss: 0.9839828014373779,grad_norm: 0.999999104249331, iteration: 63358
loss: 0.998059093952179,grad_norm: 0.9999993091476187, iteration: 63359
loss: 0.991673469543457,grad_norm: 0.9254434713278646, iteration: 63360
loss: 0.999381959438324,grad_norm: 0.9999990995213833, iteration: 63361
loss: 0.9866204261779785,grad_norm: 0.9999990699756803, iteration: 63362
loss: 0.9425683617591858,grad_norm: 0.9999991186140299, iteration: 63363
loss: 0.9939645528793335,grad_norm: 0.9203371040681548, iteration: 63364
loss: 0.982977032661438,grad_norm: 0.9900266542254083, iteration: 63365
loss: 0.9776535034179688,grad_norm: 0.9999990829974712, iteration: 63366
loss: 0.9789566397666931,grad_norm: 0.999999103916257, iteration: 63367
loss: 0.9747207164764404,grad_norm: 0.99999912177994, iteration: 63368
loss: 0.984947919845581,grad_norm: 0.8969562962785044, iteration: 63369
loss: 1.0629216432571411,grad_norm: 0.9999995831896158, iteration: 63370
loss: 0.9969244003295898,grad_norm: 0.9105594696099909, iteration: 63371
loss: 1.0278880596160889,grad_norm: 0.9999992281090714, iteration: 63372
loss: 1.0001007318496704,grad_norm: 0.9999992416693801, iteration: 63373
loss: 1.0285824537277222,grad_norm: 0.9040977357758858, iteration: 63374
loss: 1.0171949863433838,grad_norm: 0.99999903915546, iteration: 63375
loss: 1.0227558612823486,grad_norm: 0.9999990920594227, iteration: 63376
loss: 0.9973288774490356,grad_norm: 0.9999992495663553, iteration: 63377
loss: 0.9888611435890198,grad_norm: 0.9999990618250097, iteration: 63378
loss: 1.006168246269226,grad_norm: 0.9999991427994281, iteration: 63379
loss: 0.9925487041473389,grad_norm: 0.9999992486173775, iteration: 63380
loss: 0.958370566368103,grad_norm: 0.9182399872244738, iteration: 63381
loss: 1.011634349822998,grad_norm: 0.9999994315580004, iteration: 63382
loss: 0.9978331923484802,grad_norm: 0.8799921886673848, iteration: 63383
loss: 1.0136662721633911,grad_norm: 0.9999990885470463, iteration: 63384
loss: 1.0074621438980103,grad_norm: 0.9150402016049386, iteration: 63385
loss: 1.0141927003860474,grad_norm: 0.9999996329418253, iteration: 63386
loss: 1.0319651365280151,grad_norm: 0.9999991201734264, iteration: 63387
loss: 1.0245038270950317,grad_norm: 0.9999991779980293, iteration: 63388
loss: 1.0083508491516113,grad_norm: 0.8761917123167019, iteration: 63389
loss: 0.9886390566825867,grad_norm: 0.9821337853786871, iteration: 63390
loss: 1.0046669244766235,grad_norm: 0.9999990767310662, iteration: 63391
loss: 0.9890390634536743,grad_norm: 0.9999992085884877, iteration: 63392
loss: 1.0360944271087646,grad_norm: 0.9999992521047415, iteration: 63393
loss: 0.9925965070724487,grad_norm: 0.9999991214124507, iteration: 63394
loss: 1.0571651458740234,grad_norm: 0.9999994293450549, iteration: 63395
loss: 1.0759156942367554,grad_norm: 0.9999993381487694, iteration: 63396
loss: 1.0057986974716187,grad_norm: 0.9999991717893236, iteration: 63397
loss: 1.0116580724716187,grad_norm: 0.999999181794921, iteration: 63398
loss: 1.0034853219985962,grad_norm: 0.8575054659678039, iteration: 63399
loss: 0.9733874797821045,grad_norm: 0.9915424193453052, iteration: 63400
loss: 1.0061225891113281,grad_norm: 0.9530052245527757, iteration: 63401
loss: 1.0070302486419678,grad_norm: 0.960633899134318, iteration: 63402
loss: 0.9653658866882324,grad_norm: 0.9999988930219749, iteration: 63403
loss: 1.0064561367034912,grad_norm: 0.9999996353156078, iteration: 63404
loss: 0.9806917905807495,grad_norm: 0.9689464956272001, iteration: 63405
loss: 1.0521924495697021,grad_norm: 0.9658041825336061, iteration: 63406
loss: 0.9675165414810181,grad_norm: 0.999999035612947, iteration: 63407
loss: 1.0481736660003662,grad_norm: 0.9999991363672649, iteration: 63408
loss: 1.0145128965377808,grad_norm: 0.999999165459279, iteration: 63409
loss: 1.0629388093948364,grad_norm: 0.9999992361609449, iteration: 63410
loss: 1.0125874280929565,grad_norm: 0.9359855864366439, iteration: 63411
loss: 0.9890049695968628,grad_norm: 0.9999993149879476, iteration: 63412
loss: 0.9993385672569275,grad_norm: 0.9999990308158735, iteration: 63413
loss: 1.055019736289978,grad_norm: 0.9999996433253234, iteration: 63414
loss: 1.0127887725830078,grad_norm: 0.9495608658856867, iteration: 63415
loss: 1.0086556673049927,grad_norm: 0.999999018034914, iteration: 63416
loss: 0.9624333381652832,grad_norm: 0.9999995676587008, iteration: 63417
loss: 0.998469352722168,grad_norm: 0.8581007751958404, iteration: 63418
loss: 1.084202766418457,grad_norm: 0.9999994542893935, iteration: 63419
loss: 1.007858157157898,grad_norm: 0.9999991472408155, iteration: 63420
loss: 0.9956149458885193,grad_norm: 0.987358019918841, iteration: 63421
loss: 0.9776033163070679,grad_norm: 0.9999990258571033, iteration: 63422
loss: 1.0274467468261719,grad_norm: 0.9999990111651262, iteration: 63423
loss: 1.040806531906128,grad_norm: 0.999999068981813, iteration: 63424
loss: 1.0009881258010864,grad_norm: 0.9999991311725476, iteration: 63425
loss: 1.0298001766204834,grad_norm: 0.9680254815960342, iteration: 63426
loss: 0.9968368411064148,grad_norm: 0.9999990268703969, iteration: 63427
loss: 0.9698621034622192,grad_norm: 0.9446388529486875, iteration: 63428
loss: 1.02228581905365,grad_norm: 0.7946549332894546, iteration: 63429
loss: 0.9888030290603638,grad_norm: 0.9999996957978562, iteration: 63430
loss: 1.0428366661071777,grad_norm: 0.999999160486177, iteration: 63431
loss: 1.0193549394607544,grad_norm: 0.947523794185948, iteration: 63432
loss: 0.9772545099258423,grad_norm: 0.9632767993347565, iteration: 63433
loss: 1.000949501991272,grad_norm: 0.9844533797436803, iteration: 63434
loss: 1.0053030252456665,grad_norm: 0.9999992728181727, iteration: 63435
loss: 0.9832428097724915,grad_norm: 0.9999992054164074, iteration: 63436
loss: 1.014807939529419,grad_norm: 0.9999997932452227, iteration: 63437
loss: 0.9956570863723755,grad_norm: 0.9999991892802536, iteration: 63438
loss: 0.999302089214325,grad_norm: 0.9999990433809438, iteration: 63439
loss: 1.0116443634033203,grad_norm: 0.95192969180621, iteration: 63440
loss: 0.9665067791938782,grad_norm: 0.999999199846779, iteration: 63441
loss: 1.0296261310577393,grad_norm: 0.9198785551357088, iteration: 63442
loss: 0.9808340072631836,grad_norm: 0.7697938465003565, iteration: 63443
loss: 0.9999675154685974,grad_norm: 0.9999991145054918, iteration: 63444
loss: 0.9947542548179626,grad_norm: 0.9999993590488817, iteration: 63445
loss: 0.9824348092079163,grad_norm: 0.9113414886052559, iteration: 63446
loss: 0.9798694252967834,grad_norm: 0.9999991111659244, iteration: 63447
loss: 1.0103996992111206,grad_norm: 0.9999991170247027, iteration: 63448
loss: 0.9694787859916687,grad_norm: 0.9999991884611689, iteration: 63449
loss: 1.0034784078598022,grad_norm: 0.9520162554869586, iteration: 63450
loss: 0.9757601022720337,grad_norm: 0.9999990796633191, iteration: 63451
loss: 1.0895549058914185,grad_norm: 1.0000000111144105, iteration: 63452
loss: 1.007879614830017,grad_norm: 0.9999996995705128, iteration: 63453
loss: 1.0172781944274902,grad_norm: 0.9999990894744877, iteration: 63454
loss: 1.0151114463806152,grad_norm: 0.9196325140509312, iteration: 63455
loss: 1.0402547121047974,grad_norm: 0.9999991686214453, iteration: 63456
loss: 0.9833691120147705,grad_norm: 0.943453157058749, iteration: 63457
loss: 0.9737483263015747,grad_norm: 0.9999990690958436, iteration: 63458
loss: 1.0174015760421753,grad_norm: 0.9999990302333727, iteration: 63459
loss: 1.0011417865753174,grad_norm: 0.9999992052191707, iteration: 63460
loss: 1.0120564699172974,grad_norm: 0.9689128985912238, iteration: 63461
loss: 0.9897259473800659,grad_norm: 0.9067113511482484, iteration: 63462
loss: 1.0153515338897705,grad_norm: 0.9999991917677324, iteration: 63463
loss: 0.9609583616256714,grad_norm: 0.9610335767757364, iteration: 63464
loss: 0.994045615196228,grad_norm: 0.9999990597235443, iteration: 63465
loss: 0.9932565093040466,grad_norm: 0.9999992544208117, iteration: 63466
loss: 1.005445957183838,grad_norm: 0.8853045130488926, iteration: 63467
loss: 1.0436277389526367,grad_norm: 0.9968189613583611, iteration: 63468
loss: 0.996091365814209,grad_norm: 0.9999991413630537, iteration: 63469
loss: 1.009914755821228,grad_norm: 0.8844693502584646, iteration: 63470
loss: 1.0025858879089355,grad_norm: 0.9999990062386207, iteration: 63471
loss: 1.0188173055648804,grad_norm: 0.9999991043576001, iteration: 63472
loss: 0.9924676418304443,grad_norm: 0.9999992280695011, iteration: 63473
loss: 0.9949710965156555,grad_norm: 0.9999991718173644, iteration: 63474
loss: 0.9954447150230408,grad_norm: 0.9999991355604336, iteration: 63475
loss: 0.9861623644828796,grad_norm: 0.9560695733189174, iteration: 63476
loss: 1.0276747941970825,grad_norm: 0.9999990807731924, iteration: 63477
loss: 0.9985161423683167,grad_norm: 0.9330159424281491, iteration: 63478
loss: 0.9946136474609375,grad_norm: 0.8635789220510364, iteration: 63479
loss: 0.9880795478820801,grad_norm: 0.9575793747160896, iteration: 63480
loss: 0.9875773787498474,grad_norm: 0.999999458008067, iteration: 63481
loss: 1.0151318311691284,grad_norm: 0.9927021232745106, iteration: 63482
loss: 1.0045382976531982,grad_norm: 0.893290677946636, iteration: 63483
loss: 1.0313042402267456,grad_norm: 0.9900893257823432, iteration: 63484
loss: 0.9997022151947021,grad_norm: 0.9999990406016661, iteration: 63485
loss: 1.0032148361206055,grad_norm: 0.916707749134053, iteration: 63486
loss: 1.0139247179031372,grad_norm: 0.9999990486285701, iteration: 63487
loss: 1.002752423286438,grad_norm: 0.9999991685416161, iteration: 63488
loss: 0.9807261228561401,grad_norm: 0.9999991055648733, iteration: 63489
loss: 0.9746272563934326,grad_norm: 0.7848789510809988, iteration: 63490
loss: 1.0002412796020508,grad_norm: 0.9380167019329996, iteration: 63491
loss: 0.9595300555229187,grad_norm: 0.9999990843762239, iteration: 63492
loss: 1.0047146081924438,grad_norm: 0.9999993333382036, iteration: 63493
loss: 1.0340989828109741,grad_norm: 0.9999993836544081, iteration: 63494
loss: 1.0224223136901855,grad_norm: 0.9999991108609965, iteration: 63495
loss: 1.0040311813354492,grad_norm: 0.9999991674902108, iteration: 63496
loss: 0.996491014957428,grad_norm: 0.9999995079969464, iteration: 63497
loss: 1.0235456228256226,grad_norm: 0.9999993155128175, iteration: 63498
loss: 1.0301871299743652,grad_norm: 0.9999992204642713, iteration: 63499
loss: 0.9854801893234253,grad_norm: 0.9709193108066704, iteration: 63500
loss: 1.0949739217758179,grad_norm: 0.9999999414328684, iteration: 63501
loss: 0.9849903583526611,grad_norm: 0.9778390351971731, iteration: 63502
loss: 0.9851400852203369,grad_norm: 0.9999990714356536, iteration: 63503
loss: 1.022559642791748,grad_norm: 0.9999991704671074, iteration: 63504
loss: 1.0256856679916382,grad_norm: 0.9999993325952089, iteration: 63505
loss: 1.0344310998916626,grad_norm: 0.9999989845260192, iteration: 63506
loss: 0.971368134021759,grad_norm: 0.8752331831095643, iteration: 63507
loss: 1.026259422302246,grad_norm: 0.999999255693103, iteration: 63508
loss: 0.9740381836891174,grad_norm: 0.9999992884349426, iteration: 63509
loss: 1.018103003501892,grad_norm: 0.9771708860471963, iteration: 63510
loss: 1.0189558267593384,grad_norm: 0.9999991431442493, iteration: 63511
loss: 1.0076829195022583,grad_norm: 0.9999990569357522, iteration: 63512
loss: 1.0328319072723389,grad_norm: 0.9999993382274514, iteration: 63513
loss: 1.0062358379364014,grad_norm: 0.9999992305955805, iteration: 63514
loss: 0.9754188060760498,grad_norm: 0.9310434084818676, iteration: 63515
loss: 1.0270360708236694,grad_norm: 0.9999990903008444, iteration: 63516
loss: 0.972513735294342,grad_norm: 0.9583524585213882, iteration: 63517
loss: 0.9481585621833801,grad_norm: 0.999999205646027, iteration: 63518
loss: 1.0043737888336182,grad_norm: 0.999999180462755, iteration: 63519
loss: 1.0093306303024292,grad_norm: 0.8534730266806392, iteration: 63520
loss: 1.0340584516525269,grad_norm: 0.9999996994681276, iteration: 63521
loss: 1.0133439302444458,grad_norm: 0.9999993347465982, iteration: 63522
loss: 1.0091972351074219,grad_norm: 0.9999997806031066, iteration: 63523
loss: 0.9869737029075623,grad_norm: 0.9999995006080264, iteration: 63524
loss: 0.9549787640571594,grad_norm: 0.9999992563675576, iteration: 63525
loss: 0.9877352118492126,grad_norm: 0.9999991213039273, iteration: 63526
loss: 1.028419852256775,grad_norm: 0.9118839027605148, iteration: 63527
loss: 1.0212891101837158,grad_norm: 0.9999991861309316, iteration: 63528
loss: 1.0009286403656006,grad_norm: 0.9326986320298127, iteration: 63529
loss: 0.9698901772499084,grad_norm: 0.973116285762941, iteration: 63530
loss: 1.0290203094482422,grad_norm: 0.9999992292723617, iteration: 63531
loss: 0.9406391978263855,grad_norm: 0.9999992728565729, iteration: 63532
loss: 1.0492552518844604,grad_norm: 0.9999995218856682, iteration: 63533
loss: 1.0230650901794434,grad_norm: 0.9643835101100475, iteration: 63534
loss: 0.9871615171432495,grad_norm: 0.9999990749418551, iteration: 63535
loss: 1.0155028104782104,grad_norm: 0.9999991353876437, iteration: 63536
loss: 0.9951106905937195,grad_norm: 0.9999992318070733, iteration: 63537
loss: 0.9837807416915894,grad_norm: 0.8706295696161572, iteration: 63538
loss: 0.978842556476593,grad_norm: 0.9999992062896018, iteration: 63539
loss: 1.0205063819885254,grad_norm: 0.9999991386943152, iteration: 63540
loss: 1.0186430215835571,grad_norm: 0.8553321910509645, iteration: 63541
loss: 1.0411821603775024,grad_norm: 0.9999994915588033, iteration: 63542
loss: 1.0107927322387695,grad_norm: 0.8922460852830344, iteration: 63543
loss: 0.968226432800293,grad_norm: 0.9999991059998008, iteration: 63544
loss: 0.9915033578872681,grad_norm: 0.9858970626059089, iteration: 63545
loss: 1.0173248052597046,grad_norm: 0.8794866551908412, iteration: 63546
loss: 0.9865050911903381,grad_norm: 0.9999991287825513, iteration: 63547
loss: 0.9822289943695068,grad_norm: 0.9852643914040303, iteration: 63548
loss: 1.0029386281967163,grad_norm: 0.8634745347185319, iteration: 63549
loss: 1.0171983242034912,grad_norm: 0.9934148448294041, iteration: 63550
loss: 1.0036799907684326,grad_norm: 0.9999990850103502, iteration: 63551
loss: 1.037782073020935,grad_norm: 0.9999993499931336, iteration: 63552
loss: 1.0308163166046143,grad_norm: 0.9999991684709304, iteration: 63553
loss: 0.9886389374732971,grad_norm: 0.9999990278533748, iteration: 63554
loss: 0.9777493476867676,grad_norm: 0.9109186215462529, iteration: 63555
loss: 1.0422917604446411,grad_norm: 0.9999993864039349, iteration: 63556
loss: 1.037536859512329,grad_norm: 0.9999991906133743, iteration: 63557
loss: 0.9970299601554871,grad_norm: 0.9999991459539153, iteration: 63558
loss: 1.006307601928711,grad_norm: 0.9999991548519345, iteration: 63559
loss: 0.9809731841087341,grad_norm: 0.9999991211401161, iteration: 63560
loss: 1.0088527202606201,grad_norm: 0.9999990929803196, iteration: 63561
loss: 0.9596962332725525,grad_norm: 0.999999015506497, iteration: 63562
loss: 1.0063272714614868,grad_norm: 0.7743036311295297, iteration: 63563
loss: 1.0130236148834229,grad_norm: 0.9999991142596598, iteration: 63564
loss: 0.9929785132408142,grad_norm: 0.9999992921226841, iteration: 63565
loss: 1.011550784111023,grad_norm: 0.9999990913788974, iteration: 63566
loss: 1.038832426071167,grad_norm: 0.9999990316264173, iteration: 63567
loss: 1.0009111166000366,grad_norm: 0.960218020029275, iteration: 63568
loss: 0.9798109531402588,grad_norm: 0.9999990957781102, iteration: 63569
loss: 1.0093512535095215,grad_norm: 0.9999992647196709, iteration: 63570
loss: 0.9863203167915344,grad_norm: 0.9999992453640688, iteration: 63571
loss: 1.0034403800964355,grad_norm: 0.9999990268852182, iteration: 63572
loss: 0.9791699051856995,grad_norm: 0.9898093773432116, iteration: 63573
loss: 0.995903491973877,grad_norm: 0.9999990014292972, iteration: 63574
loss: 1.025261402130127,grad_norm: 0.9999991680683794, iteration: 63575
loss: 1.0095652341842651,grad_norm: 0.964311587792031, iteration: 63576
loss: 0.978233277797699,grad_norm: 0.9724438554243979, iteration: 63577
loss: 0.978480875492096,grad_norm: 0.9408713400094411, iteration: 63578
loss: 0.9938031435012817,grad_norm: 0.9999991492862235, iteration: 63579
loss: 1.0053462982177734,grad_norm: 0.9423355328975334, iteration: 63580
loss: 0.9472342729568481,grad_norm: 0.9799206351544999, iteration: 63581
loss: 1.0013864040374756,grad_norm: 0.9999991335992023, iteration: 63582
loss: 1.041324257850647,grad_norm: 0.9999991049940953, iteration: 63583
loss: 0.9820693731307983,grad_norm: 0.9999989371665919, iteration: 63584
loss: 1.0617116689682007,grad_norm: 0.999999707264847, iteration: 63585
loss: 1.0115413665771484,grad_norm: 0.9999992993946463, iteration: 63586
loss: 0.9953842759132385,grad_norm: 0.9583377756185031, iteration: 63587
loss: 0.9754437208175659,grad_norm: 0.9999990963194069, iteration: 63588
loss: 1.0131230354309082,grad_norm: 0.9999991220413824, iteration: 63589
loss: 0.9900815486907959,grad_norm: 0.999998979795309, iteration: 63590
loss: 1.0446228981018066,grad_norm: 0.9999991590436659, iteration: 63591
loss: 1.0110224485397339,grad_norm: 0.9999989456755707, iteration: 63592
loss: 1.0349059104919434,grad_norm: 0.9999990385769021, iteration: 63593
loss: 1.0218180418014526,grad_norm: 0.9999991797582195, iteration: 63594
loss: 0.9722985625267029,grad_norm: 0.9999991714097016, iteration: 63595
loss: 1.0148122310638428,grad_norm: 0.999999297392201, iteration: 63596
loss: 0.9952919483184814,grad_norm: 0.9011833040226863, iteration: 63597
loss: 1.0237504243850708,grad_norm: 0.9662447525127004, iteration: 63598
loss: 1.0173671245574951,grad_norm: 0.9999990939748571, iteration: 63599
loss: 1.023481011390686,grad_norm: 0.9999994577431368, iteration: 63600
loss: 0.9935356974601746,grad_norm: 0.9999991360333357, iteration: 63601
loss: 0.9734392166137695,grad_norm: 0.9945795730886161, iteration: 63602
loss: 1.0166025161743164,grad_norm: 0.835705553474283, iteration: 63603
loss: 1.0035008192062378,grad_norm: 0.9026690037022431, iteration: 63604
loss: 0.9921756982803345,grad_norm: 0.9999993807925863, iteration: 63605
loss: 0.9618743062019348,grad_norm: 0.9999991913118699, iteration: 63606
loss: 1.0286056995391846,grad_norm: 0.999999048390719, iteration: 63607
loss: 1.0141475200653076,grad_norm: 0.999999209009946, iteration: 63608
loss: 0.9743357300758362,grad_norm: 0.9999991112507589, iteration: 63609
loss: 1.034870982170105,grad_norm: 0.9999996236156741, iteration: 63610
loss: 0.9912967681884766,grad_norm: 0.9999991924517574, iteration: 63611
loss: 1.0169652700424194,grad_norm: 0.8904699349269544, iteration: 63612
loss: 1.015731692314148,grad_norm: 0.9999989568595186, iteration: 63613
loss: 0.9594776630401611,grad_norm: 0.9999991118362128, iteration: 63614
loss: 1.0274654626846313,grad_norm: 0.9213758092936031, iteration: 63615
loss: 0.982819139957428,grad_norm: 0.999999163415261, iteration: 63616
loss: 0.9954805374145508,grad_norm: 0.9999992589698674, iteration: 63617
loss: 1.0066715478897095,grad_norm: 0.9999991701112417, iteration: 63618
loss: 1.0066441297531128,grad_norm: 0.936207644811048, iteration: 63619
loss: 1.0263737440109253,grad_norm: 0.9913848399175005, iteration: 63620
loss: 0.9912241697311401,grad_norm: 0.9999991007803921, iteration: 63621
loss: 0.9891523122787476,grad_norm: 0.8448394553072978, iteration: 63622
loss: 1.0412756204605103,grad_norm: 0.9420618041695608, iteration: 63623
loss: 1.0025781393051147,grad_norm: 0.9999991658499485, iteration: 63624
loss: 0.963128924369812,grad_norm: 0.9999991303686812, iteration: 63625
loss: 0.9940131902694702,grad_norm: 0.999999046194372, iteration: 63626
loss: 0.9886287450790405,grad_norm: 0.9999989376257158, iteration: 63627
loss: 0.994752824306488,grad_norm: 0.9085604892313854, iteration: 63628
loss: 0.9868927001953125,grad_norm: 0.8415057978154106, iteration: 63629
loss: 0.9844422340393066,grad_norm: 0.9999989785929524, iteration: 63630
loss: 0.975238025188446,grad_norm: 0.9999991031861275, iteration: 63631
loss: 1.023829460144043,grad_norm: 0.8485121465045743, iteration: 63632
loss: 1.0095815658569336,grad_norm: 0.9338821988751141, iteration: 63633
loss: 1.0086338520050049,grad_norm: 0.9029964758862957, iteration: 63634
loss: 0.9923370480537415,grad_norm: 0.9190672291029236, iteration: 63635
loss: 0.9977975487709045,grad_norm: 0.9999992788206592, iteration: 63636
loss: 0.9885409474372864,grad_norm: 0.9372473172844186, iteration: 63637
loss: 1.0156381130218506,grad_norm: 0.8459695962596679, iteration: 63638
loss: 0.9906831383705139,grad_norm: 0.9999995707588825, iteration: 63639
loss: 1.0097194910049438,grad_norm: 0.9999992824297779, iteration: 63640
loss: 1.0036228895187378,grad_norm: 0.9751961372694785, iteration: 63641
loss: 1.028303861618042,grad_norm: 0.9682830731939647, iteration: 63642
loss: 1.0107605457305908,grad_norm: 0.9999991552770419, iteration: 63643
loss: 1.0415822267532349,grad_norm: 0.871357420048942, iteration: 63644
loss: 1.006262183189392,grad_norm: 0.9999990230813506, iteration: 63645
loss: 1.0094159841537476,grad_norm: 0.9667469632874034, iteration: 63646
loss: 1.0122594833374023,grad_norm: 0.9999990956774502, iteration: 63647
loss: 0.973746657371521,grad_norm: 0.9488856891880773, iteration: 63648
loss: 1.0123738050460815,grad_norm: 0.9999992202842577, iteration: 63649
loss: 0.9857099652290344,grad_norm: 0.9999990593524225, iteration: 63650
loss: 1.008718729019165,grad_norm: 0.975460766797345, iteration: 63651
loss: 0.9723765850067139,grad_norm: 0.9282761927482794, iteration: 63652
loss: 1.0061923265457153,grad_norm: 0.9999990676366826, iteration: 63653
loss: 1.0256919860839844,grad_norm: 0.9999991078460377, iteration: 63654
loss: 1.0010759830474854,grad_norm: 0.9999992907978108, iteration: 63655
loss: 1.0208035707473755,grad_norm: 0.926637682017435, iteration: 63656
loss: 0.9667285084724426,grad_norm: 0.9999990029605765, iteration: 63657
loss: 1.0401750802993774,grad_norm: 0.9999989675733727, iteration: 63658
loss: 1.0401180982589722,grad_norm: 0.9999997171370475, iteration: 63659
loss: 0.9765413999557495,grad_norm: 0.9133883841815154, iteration: 63660
loss: 0.9722654819488525,grad_norm: 0.9999991178591598, iteration: 63661
loss: 1.0246888399124146,grad_norm: 0.9095919520550407, iteration: 63662
loss: 0.9967952966690063,grad_norm: 0.9299978115508949, iteration: 63663
loss: 0.9573195576667786,grad_norm: 0.9999990310312427, iteration: 63664
loss: 1.00326406955719,grad_norm: 0.8770172595249375, iteration: 63665
loss: 0.998822808265686,grad_norm: 0.9023802395103767, iteration: 63666
loss: 0.9935457110404968,grad_norm: 0.8883171609943247, iteration: 63667
loss: 1.072013258934021,grad_norm: 0.9999993132516302, iteration: 63668
loss: 0.9813449382781982,grad_norm: 0.9999990684673776, iteration: 63669
loss: 1.0142070055007935,grad_norm: 0.9999995475060885, iteration: 63670
loss: 0.9826567769050598,grad_norm: 0.9999991269646353, iteration: 63671
loss: 1.005327820777893,grad_norm: 0.9999991062459432, iteration: 63672
loss: 1.0152846574783325,grad_norm: 0.8970882310022273, iteration: 63673
loss: 0.9725431799888611,grad_norm: 0.9406253332384128, iteration: 63674
loss: 1.016337275505066,grad_norm: 0.865563362348689, iteration: 63675
loss: 1.034528136253357,grad_norm: 0.9999992422999757, iteration: 63676
loss: 1.0612279176712036,grad_norm: 0.9999995584637065, iteration: 63677
loss: 1.0180423259735107,grad_norm: 0.9307371180695984, iteration: 63678
loss: 1.0424489974975586,grad_norm: 0.9999991462894294, iteration: 63679
loss: 1.0153456926345825,grad_norm: 0.9999991938650239, iteration: 63680
loss: 0.9864396452903748,grad_norm: 0.8963189043549563, iteration: 63681
loss: 0.9880875945091248,grad_norm: 0.9007125251451188, iteration: 63682
loss: 1.0210151672363281,grad_norm: 0.9999990752858214, iteration: 63683
loss: 0.9997957348823547,grad_norm: 0.9999989801841697, iteration: 63684
loss: 1.0195002555847168,grad_norm: 0.8913819059643076, iteration: 63685
loss: 1.0406067371368408,grad_norm: 0.904630468889381, iteration: 63686
loss: 1.0229467153549194,grad_norm: 0.9548830822137696, iteration: 63687
loss: 1.013364553451538,grad_norm: 0.94002408017437, iteration: 63688
loss: 0.9934446811676025,grad_norm: 0.9999992574908326, iteration: 63689
loss: 0.9921709895133972,grad_norm: 0.9999990540294538, iteration: 63690
loss: 1.0201342105865479,grad_norm: 0.9999989598784551, iteration: 63691
loss: 1.0051029920578003,grad_norm: 0.9999989691897145, iteration: 63692
loss: 0.9805286526679993,grad_norm: 0.9801492760459941, iteration: 63693
loss: 0.9694795608520508,grad_norm: 0.8859266426554321, iteration: 63694
loss: 0.9986559748649597,grad_norm: 0.9996675989032098, iteration: 63695
loss: 1.0131852626800537,grad_norm: 0.9999991092733893, iteration: 63696
loss: 0.9603390097618103,grad_norm: 0.9999989463399537, iteration: 63697
loss: 1.030289888381958,grad_norm: 0.9876953155193404, iteration: 63698
loss: 0.9915351867675781,grad_norm: 0.9999991195106391, iteration: 63699
loss: 1.006227731704712,grad_norm: 0.999999066189778, iteration: 63700
loss: 0.9834984540939331,grad_norm: 0.9343940800116628, iteration: 63701
loss: 1.011522889137268,grad_norm: 0.9314015803627604, iteration: 63702
loss: 1.0183199644088745,grad_norm: 0.9999990834427139, iteration: 63703
loss: 0.988629162311554,grad_norm: 0.9591361982106862, iteration: 63704
loss: 0.9855700731277466,grad_norm: 0.9999991985005029, iteration: 63705
loss: 1.0220810174942017,grad_norm: 0.9999991002761035, iteration: 63706
loss: 0.9966014623641968,grad_norm: 0.976183150278316, iteration: 63707
loss: 0.959134578704834,grad_norm: 0.9999992228952079, iteration: 63708
loss: 0.9937958717346191,grad_norm: 0.9999991382819444, iteration: 63709
loss: 0.9830923080444336,grad_norm: 0.9379840289978221, iteration: 63710
loss: 1.024526834487915,grad_norm: 0.8849714900470707, iteration: 63711
loss: 0.9752264022827148,grad_norm: 0.9999992617071858, iteration: 63712
loss: 0.9904667735099792,grad_norm: 0.9582173189466301, iteration: 63713
loss: 0.9714906215667725,grad_norm: 0.9999991102370464, iteration: 63714
loss: 1.0080294609069824,grad_norm: 0.932888731883406, iteration: 63715
loss: 1.027812123298645,grad_norm: 0.9421307151321988, iteration: 63716
loss: 0.9957204461097717,grad_norm: 0.9217309843919133, iteration: 63717
loss: 0.9980981945991516,grad_norm: 0.9700711494939881, iteration: 63718
loss: 0.9781040549278259,grad_norm: 0.9123378770590124, iteration: 63719
loss: 0.992040753364563,grad_norm: 0.7840860553060178, iteration: 63720
loss: 0.9778484106063843,grad_norm: 0.9201050480497949, iteration: 63721
loss: 1.0210436582565308,grad_norm: 0.8490321702063633, iteration: 63722
loss: 1.0202780961990356,grad_norm: 0.9999991318847806, iteration: 63723
loss: 0.9645853042602539,grad_norm: 0.8742734026942376, iteration: 63724
loss: 1.0126842260360718,grad_norm: 0.9705785300102368, iteration: 63725
loss: 0.9871670603752136,grad_norm: 0.9999990858319865, iteration: 63726
loss: 1.0095912218093872,grad_norm: 0.9812200861162514, iteration: 63727
loss: 0.991462230682373,grad_norm: 0.9251998958874871, iteration: 63728
loss: 1.0106096267700195,grad_norm: 0.9147559971667465, iteration: 63729
loss: 0.9859660863876343,grad_norm: 0.9999991389599824, iteration: 63730
loss: 0.9999854564666748,grad_norm: 0.9999992240721384, iteration: 63731
loss: 1.0226470232009888,grad_norm: 0.9999991779984637, iteration: 63732
loss: 0.9827856421470642,grad_norm: 0.9999992466307503, iteration: 63733
loss: 1.0281555652618408,grad_norm: 0.99999929936843, iteration: 63734
loss: 1.0023657083511353,grad_norm: 0.9762869292244617, iteration: 63735
loss: 1.0343307256698608,grad_norm: 0.999999046226854, iteration: 63736
loss: 1.0081446170806885,grad_norm: 0.9999990597668343, iteration: 63737
loss: 1.0139846801757812,grad_norm: 0.9999990609459505, iteration: 63738
loss: 1.0056817531585693,grad_norm: 0.9999992024761487, iteration: 63739
loss: 1.0047005414962769,grad_norm: 0.9999992314467417, iteration: 63740
loss: 0.9930688142776489,grad_norm: 0.9999990006381224, iteration: 63741
loss: 0.9801623821258545,grad_norm: 0.9378300627989182, iteration: 63742
loss: 1.0023558139801025,grad_norm: 0.9999991751472719, iteration: 63743
loss: 1.0267330408096313,grad_norm: 0.9683764667135611, iteration: 63744
loss: 1.0014392137527466,grad_norm: 0.9999990297811054, iteration: 63745
loss: 1.0071437358856201,grad_norm: 0.9999992687916743, iteration: 63746
loss: 0.9837127923965454,grad_norm: 0.9999990117109628, iteration: 63747
loss: 0.9472739100456238,grad_norm: 0.9456085126790983, iteration: 63748
loss: 0.9958174824714661,grad_norm: 0.9999989725943557, iteration: 63749
loss: 1.1423105001449585,grad_norm: 0.9999992743470965, iteration: 63750
loss: 0.9581632614135742,grad_norm: 0.9999991331511723, iteration: 63751
loss: 1.025708556175232,grad_norm: 0.9999990239208114, iteration: 63752
loss: 0.9942837357521057,grad_norm: 0.9807838412806021, iteration: 63753
loss: 1.0286743640899658,grad_norm: 0.9999988549887014, iteration: 63754
loss: 0.9575107097625732,grad_norm: 0.9149107868433706, iteration: 63755
loss: 1.0497825145721436,grad_norm: 0.9999990303931349, iteration: 63756
loss: 0.989197313785553,grad_norm: 0.9999992181603848, iteration: 63757
loss: 1.0155341625213623,grad_norm: 0.9999990625196727, iteration: 63758
loss: 1.0119078159332275,grad_norm: 0.834129300373154, iteration: 63759
loss: 0.9602404236793518,grad_norm: 0.8533783575364423, iteration: 63760
loss: 1.0066945552825928,grad_norm: 0.9999993644732135, iteration: 63761
loss: 1.0282394886016846,grad_norm: 0.9999991345786848, iteration: 63762
loss: 1.0090937614440918,grad_norm: 0.9999990458756223, iteration: 63763
loss: 1.0190966129302979,grad_norm: 0.9999992075117655, iteration: 63764
loss: 1.0086051225662231,grad_norm: 0.9999989626562569, iteration: 63765
loss: 1.0269274711608887,grad_norm: 0.9999993214485602, iteration: 63766
loss: 1.030834436416626,grad_norm: 0.9999992403328288, iteration: 63767
loss: 0.9904798865318298,grad_norm: 0.9608777861956767, iteration: 63768
loss: 0.9243005514144897,grad_norm: 0.999999132682734, iteration: 63769
loss: 1.0136855840682983,grad_norm: 0.9999991520507056, iteration: 63770
loss: 1.0067508220672607,grad_norm: 0.9999989670022407, iteration: 63771
loss: 1.0067355632781982,grad_norm: 0.9999990209035352, iteration: 63772
loss: 1.0191882848739624,grad_norm: 0.9999994960828494, iteration: 63773
loss: 1.0431275367736816,grad_norm: 0.9999999208827152, iteration: 63774
loss: 0.9873825907707214,grad_norm: 0.9291006657765243, iteration: 63775
loss: 1.003712773323059,grad_norm: 0.916537960012726, iteration: 63776
loss: 0.9841468930244446,grad_norm: 0.9785246235816935, iteration: 63777
loss: 0.9730739593505859,grad_norm: 0.9999990819009966, iteration: 63778
loss: 1.0156654119491577,grad_norm: 0.999999062509741, iteration: 63779
loss: 1.0231037139892578,grad_norm: 0.9698081938518781, iteration: 63780
loss: 0.9900588393211365,grad_norm: 0.9999991044202304, iteration: 63781
loss: 1.0111240148544312,grad_norm: 0.9999990420557185, iteration: 63782
loss: 0.979242205619812,grad_norm: 0.9999991629720549, iteration: 63783
loss: 1.012174367904663,grad_norm: 0.9237147526469188, iteration: 63784
loss: 0.989479660987854,grad_norm: 0.8716534195888389, iteration: 63785
loss: 0.9904236793518066,grad_norm: 0.8958695994098876, iteration: 63786
loss: 0.973929762840271,grad_norm: 0.9665680842251362, iteration: 63787
loss: 0.9957262277603149,grad_norm: 0.9999992047850816, iteration: 63788
loss: 0.9953984022140503,grad_norm: 0.9659512011015066, iteration: 63789
loss: 1.0371215343475342,grad_norm: 0.9999990692191026, iteration: 63790
loss: 0.9861097931861877,grad_norm: 0.9195568097744616, iteration: 63791
loss: 0.9894759654998779,grad_norm: 0.9999990601618376, iteration: 63792
loss: 1.0040113925933838,grad_norm: 0.9999990530507565, iteration: 63793
loss: 1.0490751266479492,grad_norm: 0.9999991990025563, iteration: 63794
loss: 1.027693510055542,grad_norm: 0.9931452803042864, iteration: 63795
loss: 1.021041750907898,grad_norm: 0.9159774972343819, iteration: 63796
loss: 1.1660360097885132,grad_norm: 0.9999994941162385, iteration: 63797
loss: 1.0183031558990479,grad_norm: 0.999999179564206, iteration: 63798
loss: 0.9957337379455566,grad_norm: 0.9999991257302531, iteration: 63799
loss: 0.9761260747909546,grad_norm: 0.9999991045893093, iteration: 63800
loss: 0.993569016456604,grad_norm: 0.9999989413287842, iteration: 63801
loss: 1.0145856142044067,grad_norm: 0.9171434842986811, iteration: 63802
loss: 1.0051418542861938,grad_norm: 0.9999990413600178, iteration: 63803
loss: 0.9894676804542542,grad_norm: 0.9999995750080962, iteration: 63804
loss: 1.020855188369751,grad_norm: 0.9999991846541476, iteration: 63805
loss: 1.003301739692688,grad_norm: 0.944983651310411, iteration: 63806
loss: 1.0034823417663574,grad_norm: 0.9999991869810093, iteration: 63807
loss: 1.0104446411132812,grad_norm: 0.999999153959487, iteration: 63808
loss: 1.0165786743164062,grad_norm: 0.9999991492002914, iteration: 63809
loss: 0.984044075012207,grad_norm: 0.9928640508071678, iteration: 63810
loss: 1.0095795392990112,grad_norm: 0.9999992856288931, iteration: 63811
loss: 1.0215044021606445,grad_norm: 0.999999003752537, iteration: 63812
loss: 0.9721611142158508,grad_norm: 0.9444808143930226, iteration: 63813
loss: 1.041535496711731,grad_norm: 0.9999993867982347, iteration: 63814
loss: 1.0097248554229736,grad_norm: 0.9093680521865344, iteration: 63815
loss: 0.9829287528991699,grad_norm: 0.9999990266733245, iteration: 63816
loss: 1.0105905532836914,grad_norm: 0.989212616538212, iteration: 63817
loss: 0.972236156463623,grad_norm: 0.9999990017403314, iteration: 63818
loss: 0.9956778883934021,grad_norm: 0.9637790742211798, iteration: 63819
loss: 0.9963879585266113,grad_norm: 0.9999990985439603, iteration: 63820
loss: 1.0345919132232666,grad_norm: 0.9050539336373423, iteration: 63821
loss: 1.0102424621582031,grad_norm: 0.9999991000877243, iteration: 63822
loss: 0.9996458292007446,grad_norm: 0.9999991032988697, iteration: 63823
loss: 0.9971616268157959,grad_norm: 0.9999991979838262, iteration: 63824
loss: 1.0064374208450317,grad_norm: 0.8737582511919791, iteration: 63825
loss: 0.9816092848777771,grad_norm: 0.9689743442835764, iteration: 63826
loss: 1.0179798603057861,grad_norm: 0.9999992140800464, iteration: 63827
loss: 1.0164916515350342,grad_norm: 0.9141805951206576, iteration: 63828
loss: 1.0031543970108032,grad_norm: 0.9999989456848212, iteration: 63829
loss: 1.0312308073043823,grad_norm: 0.9157961616959664, iteration: 63830
loss: 0.9996506571769714,grad_norm: 0.9999992601632862, iteration: 63831
loss: 1.0031373500823975,grad_norm: 0.9999991302690713, iteration: 63832
loss: 0.9711499214172363,grad_norm: 0.9621048546509168, iteration: 63833
loss: 0.9779964089393616,grad_norm: 0.8722136488838682, iteration: 63834
loss: 0.9979283809661865,grad_norm: 0.9999992386098443, iteration: 63835
loss: 1.023700475692749,grad_norm: 0.9999991139895481, iteration: 63836
loss: 1.0098885297775269,grad_norm: 0.9999992067791351, iteration: 63837
loss: 1.0127261877059937,grad_norm: 0.9514364899266007, iteration: 63838
loss: 1.031116008758545,grad_norm: 0.9999994165398038, iteration: 63839
loss: 1.030474305152893,grad_norm: 0.84421482856006, iteration: 63840
loss: 1.0282864570617676,grad_norm: 0.9999991288630128, iteration: 63841
loss: 0.9795871376991272,grad_norm: 0.8567372938915278, iteration: 63842
loss: 0.9853985905647278,grad_norm: 0.9268466574803429, iteration: 63843
loss: 0.98654705286026,grad_norm: 0.999999125505894, iteration: 63844
loss: 1.035157322883606,grad_norm: 0.9596957610019553, iteration: 63845
loss: 1.0148578882217407,grad_norm: 0.9999989860671502, iteration: 63846
loss: 1.0044057369232178,grad_norm: 0.9290604533992215, iteration: 63847
loss: 1.0008020401000977,grad_norm: 0.9999992277809917, iteration: 63848
loss: 1.0233756303787231,grad_norm: 0.9999992966962601, iteration: 63849
loss: 0.9970529079437256,grad_norm: 0.9606494140442311, iteration: 63850
loss: 0.9754045605659485,grad_norm: 0.9999990675489143, iteration: 63851
loss: 1.056276798248291,grad_norm: 0.9999990552073907, iteration: 63852
loss: 1.0067543983459473,grad_norm: 0.999999359272458, iteration: 63853
loss: 0.9937083125114441,grad_norm: 0.9108198746642635, iteration: 63854
loss: 1.0081827640533447,grad_norm: 0.999999157417605, iteration: 63855
loss: 0.9747396111488342,grad_norm: 0.9636628944344706, iteration: 63856
loss: 0.9344965219497681,grad_norm: 0.9999990946465487, iteration: 63857
loss: 1.0159692764282227,grad_norm: 0.9999989553453238, iteration: 63858
loss: 1.0055025815963745,grad_norm: 0.9999991508330236, iteration: 63859
loss: 1.0196119546890259,grad_norm: 0.9999992175768885, iteration: 63860
loss: 1.0037504434585571,grad_norm: 0.9999990975376439, iteration: 63861
loss: 0.9689597487449646,grad_norm: 0.9999990523441842, iteration: 63862
loss: 1.0190529823303223,grad_norm: 0.8821184522926659, iteration: 63863
loss: 0.9813827872276306,grad_norm: 0.9304927379317732, iteration: 63864
loss: 1.0409018993377686,grad_norm: 0.9999991295063472, iteration: 63865
loss: 0.9884043335914612,grad_norm: 0.9724676055806507, iteration: 63866
loss: 0.9929569363594055,grad_norm: 0.9999991076838719, iteration: 63867
loss: 1.0154505968093872,grad_norm: 0.9892372186939072, iteration: 63868
loss: 0.9876643419265747,grad_norm: 0.977175454149652, iteration: 63869
loss: 0.9704445004463196,grad_norm: 0.9906011534203483, iteration: 63870
loss: 1.0030251741409302,grad_norm: 0.9999989884034057, iteration: 63871
loss: 0.9768657684326172,grad_norm: 0.9668173960159665, iteration: 63872
loss: 0.9737539291381836,grad_norm: 0.9999992138924282, iteration: 63873
loss: 1.013877034187317,grad_norm: 0.9999992019950649, iteration: 63874
loss: 1.010025978088379,grad_norm: 0.9999991190452839, iteration: 63875
loss: 0.989945113658905,grad_norm: 0.9999992516649445, iteration: 63876
loss: 1.011223316192627,grad_norm: 0.9701404102607728, iteration: 63877
loss: 1.0389858484268188,grad_norm: 0.9999996256729655, iteration: 63878
loss: 1.0454628467559814,grad_norm: 0.9999990947383074, iteration: 63879
loss: 1.0090042352676392,grad_norm: 0.7676423210678018, iteration: 63880
loss: 1.024772047996521,grad_norm: 0.9999990821189173, iteration: 63881
loss: 0.9842590689659119,grad_norm: 0.9135346341865623, iteration: 63882
loss: 0.9761093854904175,grad_norm: 0.9999990928828159, iteration: 63883
loss: 1.0191400051116943,grad_norm: 0.999999147072319, iteration: 63884
loss: 1.0100834369659424,grad_norm: 0.9999992000125303, iteration: 63885
loss: 0.953728973865509,grad_norm: 0.9999991674015539, iteration: 63886
loss: 1.0392796993255615,grad_norm: 0.8821369876117038, iteration: 63887
loss: 0.9922958612442017,grad_norm: 0.9999990516929892, iteration: 63888
loss: 0.9800261855125427,grad_norm: 0.9999989855656296, iteration: 63889
loss: 1.0246503353118896,grad_norm: 0.9999990891542834, iteration: 63890
loss: 0.9997148513793945,grad_norm: 0.8734203760618143, iteration: 63891
loss: 1.0244487524032593,grad_norm: 0.9999992170798393, iteration: 63892
loss: 0.9684935212135315,grad_norm: 0.9999990276939368, iteration: 63893
loss: 0.9384741187095642,grad_norm: 0.9603642520198691, iteration: 63894
loss: 0.9776374101638794,grad_norm: 0.9999991539033656, iteration: 63895
loss: 0.9979695081710815,grad_norm: 0.9455998870985616, iteration: 63896
loss: 0.9649335145950317,grad_norm: 0.9866303375785875, iteration: 63897
loss: 0.9901666045188904,grad_norm: 0.999999142489079, iteration: 63898
loss: 0.9942830204963684,grad_norm: 0.9999990137998112, iteration: 63899
loss: 0.9923071265220642,grad_norm: 0.797149883562484, iteration: 63900
loss: 0.9955264925956726,grad_norm: 0.9999992460516173, iteration: 63901
loss: 0.9613796472549438,grad_norm: 0.999999071323429, iteration: 63902
loss: 0.983170747756958,grad_norm: 0.9999991191893022, iteration: 63903
loss: 1.0401002168655396,grad_norm: 0.9999991383575223, iteration: 63904
loss: 1.02981436252594,grad_norm: 0.9999990738950811, iteration: 63905
loss: 0.9871100783348083,grad_norm: 0.999999023670704, iteration: 63906
loss: 1.0099706649780273,grad_norm: 0.9999989183206958, iteration: 63907
loss: 0.997749388217926,grad_norm: 0.9842185559764305, iteration: 63908
loss: 0.9960028529167175,grad_norm: 0.9760440750861078, iteration: 63909
loss: 1.021144986152649,grad_norm: 0.9999992877858918, iteration: 63910
loss: 1.041404128074646,grad_norm: 0.9999991715779828, iteration: 63911
loss: 0.9898592233657837,grad_norm: 0.9999991426760512, iteration: 63912
loss: 1.0419964790344238,grad_norm: 0.9999989633688301, iteration: 63913
loss: 0.9669771194458008,grad_norm: 0.926427022389598, iteration: 63914
loss: 0.9415105581283569,grad_norm: 0.9999990879924743, iteration: 63915
loss: 1.0108312368392944,grad_norm: 0.9618641560702074, iteration: 63916
loss: 1.029518485069275,grad_norm: 0.999999181730331, iteration: 63917
loss: 0.9837784171104431,grad_norm: 0.9943511304876305, iteration: 63918
loss: 0.9866334795951843,grad_norm: 0.9859283574326486, iteration: 63919
loss: 0.9875578880310059,grad_norm: 0.9999993306456348, iteration: 63920
loss: 1.0036208629608154,grad_norm: 0.9682099179470598, iteration: 63921
loss: 0.9739421010017395,grad_norm: 0.9999991813228991, iteration: 63922
loss: 0.9518228769302368,grad_norm: 0.9999991402281637, iteration: 63923
loss: 0.9775387048721313,grad_norm: 0.9999992307870973, iteration: 63924
loss: 0.9844498038291931,grad_norm: 0.9703601410470448, iteration: 63925
loss: 0.9967368245124817,grad_norm: 0.969698727237214, iteration: 63926
loss: 0.9457494020462036,grad_norm: 0.9412748662182301, iteration: 63927
loss: 1.03614342212677,grad_norm: 0.999999187717051, iteration: 63928
loss: 0.9818061590194702,grad_norm: 0.9999993374587558, iteration: 63929
loss: 0.9910649061203003,grad_norm: 0.8807094296549551, iteration: 63930
loss: 0.9956132769584656,grad_norm: 0.8516584115249783, iteration: 63931
loss: 0.9517744183540344,grad_norm: 0.9999991417903991, iteration: 63932
loss: 1.0533363819122314,grad_norm: 0.9999991872419302, iteration: 63933
loss: 1.0253565311431885,grad_norm: 0.969509878852708, iteration: 63934
loss: 0.9485365152359009,grad_norm: 0.9748306890826972, iteration: 63935
loss: 1.0402839183807373,grad_norm: 0.9999991248950642, iteration: 63936
loss: 1.0585747957229614,grad_norm: 0.9999990609881863, iteration: 63937
loss: 0.9956464171409607,grad_norm: 0.9999993377780491, iteration: 63938
loss: 1.031827688217163,grad_norm: 0.9741591709486334, iteration: 63939
loss: 0.9952324628829956,grad_norm: 0.899829652304902, iteration: 63940
loss: 0.9957156181335449,grad_norm: 0.9999990786491928, iteration: 63941
loss: 1.0119189023971558,grad_norm: 0.8815067616687209, iteration: 63942
loss: 0.9922213554382324,grad_norm: 0.8991019089516216, iteration: 63943
loss: 1.040972352027893,grad_norm: 0.880910287602541, iteration: 63944
loss: 1.0340224504470825,grad_norm: 0.9999992224773392, iteration: 63945
loss: 1.0331202745437622,grad_norm: 0.9999997289003437, iteration: 63946
loss: 1.0067812204360962,grad_norm: 0.9511551428078664, iteration: 63947
loss: 0.998318612575531,grad_norm: 0.9999991204538032, iteration: 63948
loss: 0.9923034310340881,grad_norm: 0.9999990531689097, iteration: 63949
loss: 1.0020380020141602,grad_norm: 0.9534525969269322, iteration: 63950
loss: 1.0148513317108154,grad_norm: 0.9999991801119753, iteration: 63951
loss: 0.997391939163208,grad_norm: 0.9999990660269107, iteration: 63952
loss: 1.0229291915893555,grad_norm: 0.9999999631866152, iteration: 63953
loss: 0.9739550352096558,grad_norm: 0.7930740416865849, iteration: 63954
loss: 0.9831447005271912,grad_norm: 0.9658197807008161, iteration: 63955
loss: 1.0073192119598389,grad_norm: 0.9510865204386391, iteration: 63956
loss: 0.9782015681266785,grad_norm: 0.9209621958260594, iteration: 63957
loss: 0.9887068271636963,grad_norm: 0.9547025796528895, iteration: 63958
loss: 0.9970023036003113,grad_norm: 0.9999990476358028, iteration: 63959
loss: 0.9809812903404236,grad_norm: 0.9369645340195506, iteration: 63960
loss: 1.0225900411605835,grad_norm: 0.9999992230001986, iteration: 63961
loss: 1.026990294456482,grad_norm: 0.9999990213717308, iteration: 63962
loss: 1.010528802871704,grad_norm: 0.9999992132736906, iteration: 63963
loss: 0.9747214913368225,grad_norm: 0.9999990388775385, iteration: 63964
loss: 0.9927504658699036,grad_norm: 0.9870462373052067, iteration: 63965
loss: 1.0183740854263306,grad_norm: 0.9664278601120745, iteration: 63966
loss: 1.0024633407592773,grad_norm: 0.999999345291398, iteration: 63967
loss: 0.999721109867096,grad_norm: 0.9999992431126331, iteration: 63968
loss: 0.9875606894493103,grad_norm: 0.9999991664111918, iteration: 63969
loss: 0.9839430451393127,grad_norm: 0.9999992156545112, iteration: 63970
loss: 0.9955238699913025,grad_norm: 0.9999988811290664, iteration: 63971
loss: 1.0209205150604248,grad_norm: 0.9328759966547966, iteration: 63972
loss: 1.019128441810608,grad_norm: 0.9999990223126541, iteration: 63973
loss: 0.9802488088607788,grad_norm: 0.9886931483297468, iteration: 63974
loss: 0.9811208844184875,grad_norm: 0.8682751918045137, iteration: 63975
loss: 0.9851381778717041,grad_norm: 0.9985317728950093, iteration: 63976
loss: 1.017816424369812,grad_norm: 0.9999989306865517, iteration: 63977
loss: 0.9892143607139587,grad_norm: 0.9999994915191918, iteration: 63978
loss: 1.022680640220642,grad_norm: 0.9999991517268924, iteration: 63979
loss: 0.9752717018127441,grad_norm: 0.9999991332086143, iteration: 63980
loss: 1.0232094526290894,grad_norm: 0.9999992209163415, iteration: 63981
loss: 1.0028969049453735,grad_norm: 0.939912214277971, iteration: 63982
loss: 1.0158350467681885,grad_norm: 0.8423729681179254, iteration: 63983
loss: 1.0267647504806519,grad_norm: 0.9999991498586381, iteration: 63984
loss: 0.9950233101844788,grad_norm: 0.9999991410544177, iteration: 63985
loss: 0.9867227077484131,grad_norm: 0.9999992901461623, iteration: 63986
loss: 0.9950425624847412,grad_norm: 0.9999991517479815, iteration: 63987
loss: 1.0130503177642822,grad_norm: 0.8795100756137693, iteration: 63988
loss: 1.0201125144958496,grad_norm: 0.999999286870925, iteration: 63989
loss: 0.9818323850631714,grad_norm: 0.8660048015729457, iteration: 63990
loss: 0.9960286021232605,grad_norm: 0.9140587359210489, iteration: 63991
loss: 1.028200626373291,grad_norm: 0.9999990443753602, iteration: 63992
loss: 1.0180761814117432,grad_norm: 0.9999990685712883, iteration: 63993
loss: 0.9902483224868774,grad_norm: 0.9999990394674194, iteration: 63994
loss: 1.0298117399215698,grad_norm: 0.9999992343459557, iteration: 63995
loss: 1.0152920484542847,grad_norm: 0.9999990935105789, iteration: 63996
loss: 1.029901385307312,grad_norm: 0.9999991861595462, iteration: 63997
loss: 0.9728286266326904,grad_norm: 0.9999991845331245, iteration: 63998
loss: 1.0160057544708252,grad_norm: 0.9999993603540333, iteration: 63999
loss: 1.0045381784439087,grad_norm: 0.9248496499080601, iteration: 64000
loss: 0.9845032095909119,grad_norm: 0.9999992520015119, iteration: 64001
loss: 1.0007749795913696,grad_norm: 0.9974236795940578, iteration: 64002
loss: 1.0120956897735596,grad_norm: 0.9369314266213864, iteration: 64003
loss: 1.0011403560638428,grad_norm: 0.9999991332196454, iteration: 64004
loss: 1.0172985792160034,grad_norm: 0.9137345187169571, iteration: 64005
loss: 1.0229071378707886,grad_norm: 0.9999990783827067, iteration: 64006
loss: 0.9979926943778992,grad_norm: 0.9999990896449878, iteration: 64007
loss: 1.0041017532348633,grad_norm: 0.9999991128337665, iteration: 64008
loss: 1.0017691850662231,grad_norm: 0.9999991153792105, iteration: 64009
loss: 0.9945703744888306,grad_norm: 0.9999990447586125, iteration: 64010
loss: 1.0080516338348389,grad_norm: 0.9999991284221953, iteration: 64011
loss: 1.0250951051712036,grad_norm: 0.9097810857284842, iteration: 64012
loss: 0.9983774423599243,grad_norm: 0.9999991242845127, iteration: 64013
loss: 0.9786977171897888,grad_norm: 0.8656572210482573, iteration: 64014
loss: 1.0148011445999146,grad_norm: 0.9722973952785134, iteration: 64015
loss: 1.0101367235183716,grad_norm: 0.9999991404684933, iteration: 64016
loss: 1.0010346174240112,grad_norm: 0.9325972806072689, iteration: 64017
loss: 0.9984565377235413,grad_norm: 0.9077613055822282, iteration: 64018
loss: 0.9742341637611389,grad_norm: 0.8726573966027714, iteration: 64019
loss: 1.0551233291625977,grad_norm: 0.9999991705562934, iteration: 64020
loss: 1.004263162612915,grad_norm: 0.9220878785978401, iteration: 64021
loss: 1.0000584125518799,grad_norm: 0.9999990794186118, iteration: 64022
loss: 1.005611538887024,grad_norm: 0.9999992635885927, iteration: 64023
loss: 1.0322794914245605,grad_norm: 0.9668859717692098, iteration: 64024
loss: 1.0509967803955078,grad_norm: 0.9999991576400951, iteration: 64025
loss: 0.9960324764251709,grad_norm: 0.9373993303446955, iteration: 64026
loss: 0.9526235461235046,grad_norm: 0.9999992340889552, iteration: 64027
loss: 0.991215169429779,grad_norm: 0.9999989979080868, iteration: 64028
loss: 0.958072304725647,grad_norm: 0.9509467175678055, iteration: 64029
loss: 0.9671978950500488,grad_norm: 0.8943358639846113, iteration: 64030
loss: 1.0416704416275024,grad_norm: 0.9763416059222911, iteration: 64031
loss: 0.9970213174819946,grad_norm: 0.9633458388265864, iteration: 64032
loss: 0.989497184753418,grad_norm: 0.9999990950976012, iteration: 64033
loss: 0.9748349785804749,grad_norm: 0.9782439893728142, iteration: 64034
loss: 1.0361980199813843,grad_norm: 0.9999989715640119, iteration: 64035
loss: 1.0108551979064941,grad_norm: 0.8974068562008067, iteration: 64036
loss: 1.0125914812088013,grad_norm: 0.9999991807062907, iteration: 64037
loss: 1.0048456192016602,grad_norm: 0.9999991457751642, iteration: 64038
loss: 1.00699782371521,grad_norm: 0.8834381970924032, iteration: 64039
loss: 1.0228016376495361,grad_norm: 0.9999990642962061, iteration: 64040
loss: 0.9803619384765625,grad_norm: 0.9999991320763493, iteration: 64041
loss: 1.0217713117599487,grad_norm: 0.9999993034670588, iteration: 64042
loss: 1.0284857749938965,grad_norm: 0.9999992595191732, iteration: 64043
loss: 1.021622896194458,grad_norm: 0.9999995186113592, iteration: 64044
loss: 1.0062918663024902,grad_norm: 0.9999997684764801, iteration: 64045
loss: 1.011479139328003,grad_norm: 0.9555095725864702, iteration: 64046
loss: 1.05939781665802,grad_norm: 0.9999993086376149, iteration: 64047
loss: 0.9444810748100281,grad_norm: 0.9822969877523345, iteration: 64048
loss: 1.0096033811569214,grad_norm: 0.9871244194722484, iteration: 64049
loss: 1.0097625255584717,grad_norm: 0.9453300121909102, iteration: 64050
loss: 0.9873834252357483,grad_norm: 0.9999990045138438, iteration: 64051
loss: 1.002152681350708,grad_norm: 0.980780192064643, iteration: 64052
loss: 1.0435779094696045,grad_norm: 0.9999991692080354, iteration: 64053
loss: 1.0177264213562012,grad_norm: 0.9999992012657339, iteration: 64054
loss: 0.9903493523597717,grad_norm: 0.9999991413049959, iteration: 64055
loss: 1.0104246139526367,grad_norm: 0.9375201127138358, iteration: 64056
loss: 0.9835676550865173,grad_norm: 0.9999992463455092, iteration: 64057
loss: 0.9803457856178284,grad_norm: 0.9999990627249687, iteration: 64058
loss: 0.9663917422294617,grad_norm: 0.9734307574089137, iteration: 64059
loss: 1.0507197380065918,grad_norm: 0.9999997763265314, iteration: 64060
loss: 1.0187277793884277,grad_norm: 0.9742495334601344, iteration: 64061
loss: 0.9650245308876038,grad_norm: 0.917246676215773, iteration: 64062
loss: 0.9971006512641907,grad_norm: 0.9999991080059168, iteration: 64063
loss: 1.0158926248550415,grad_norm: 0.9999991045028568, iteration: 64064
loss: 0.9949536323547363,grad_norm: 0.8854541857154515, iteration: 64065
loss: 0.9894780516624451,grad_norm: 0.9999990556016642, iteration: 64066
loss: 1.0049644708633423,grad_norm: 0.9463276655801159, iteration: 64067
loss: 1.0253206491470337,grad_norm: 0.9999996600275043, iteration: 64068
loss: 1.0142170190811157,grad_norm: 0.9999994111807511, iteration: 64069
loss: 1.0177522897720337,grad_norm: 0.9055855583669874, iteration: 64070
loss: 0.9841986298561096,grad_norm: 0.9999998140963642, iteration: 64071
loss: 1.0026730298995972,grad_norm: 0.9999991085138501, iteration: 64072
loss: 1.016837477684021,grad_norm: 0.9196606119722697, iteration: 64073
loss: 1.0128587484359741,grad_norm: 0.9066174630467521, iteration: 64074
loss: 1.0221918821334839,grad_norm: 0.999999163873945, iteration: 64075
loss: 1.0474655628204346,grad_norm: 0.9999997889431236, iteration: 64076
loss: 1.0179741382598877,grad_norm: 0.9999991288373614, iteration: 64077
loss: 1.0063397884368896,grad_norm: 0.9545855693908137, iteration: 64078
loss: 1.0058519840240479,grad_norm: 0.9999990659285934, iteration: 64079
loss: 1.0173587799072266,grad_norm: 0.9525531580065295, iteration: 64080
loss: 0.9816072583198547,grad_norm: 0.9554799859770057, iteration: 64081
loss: 0.9656622409820557,grad_norm: 0.9999990938882846, iteration: 64082
loss: 0.9921430945396423,grad_norm: 0.9999990600550627, iteration: 64083
loss: 0.9716488122940063,grad_norm: 0.9999991889978409, iteration: 64084
loss: 0.9890914559364319,grad_norm: 0.9484934950525381, iteration: 64085
loss: 1.0050718784332275,grad_norm: 0.9999990213206358, iteration: 64086
loss: 1.021970510482788,grad_norm: 0.9999995770239551, iteration: 64087
loss: 0.9804782867431641,grad_norm: 0.9999991463501045, iteration: 64088
loss: 1.0048255920410156,grad_norm: 0.9402585460039296, iteration: 64089
loss: 0.9896116852760315,grad_norm: 0.9999991870116625, iteration: 64090
loss: 1.0211656093597412,grad_norm: 0.9675436338659732, iteration: 64091
loss: 0.976897656917572,grad_norm: 0.9999991104528604, iteration: 64092
loss: 1.0029653310775757,grad_norm: 0.9999992727198327, iteration: 64093
loss: 0.9832339286804199,grad_norm: 0.9999990847640007, iteration: 64094
loss: 0.9973552227020264,grad_norm: 0.9714567409875151, iteration: 64095
loss: 1.0274087190628052,grad_norm: 0.999999240159263, iteration: 64096
loss: 0.9815729856491089,grad_norm: 0.8759968243214009, iteration: 64097
loss: 0.9991623163223267,grad_norm: 0.8759471935372901, iteration: 64098
loss: 0.9689843654632568,grad_norm: 0.9999990557341717, iteration: 64099
loss: 0.9916157722473145,grad_norm: 0.9143692118083475, iteration: 64100
loss: 1.0401874780654907,grad_norm: 0.9999992742460108, iteration: 64101
loss: 0.9836554527282715,grad_norm: 0.9180770865041056, iteration: 64102
loss: 0.9630562663078308,grad_norm: 0.8322253688157822, iteration: 64103
loss: 0.9866542220115662,grad_norm: 0.7511489887507735, iteration: 64104
loss: 0.977470338344574,grad_norm: 0.9999990627017069, iteration: 64105
loss: 1.0304876565933228,grad_norm: 0.9999996891901042, iteration: 64106
loss: 0.9961556792259216,grad_norm: 0.9216139849322008, iteration: 64107
loss: 0.984643816947937,grad_norm: 0.9999990248206513, iteration: 64108
loss: 0.9999105930328369,grad_norm: 0.999999306721466, iteration: 64109
loss: 0.9802347421646118,grad_norm: 0.9999990932110502, iteration: 64110
loss: 1.0007706880569458,grad_norm: 0.9698850442885592, iteration: 64111
loss: 1.0241862535476685,grad_norm: 0.9999991392829727, iteration: 64112
loss: 1.0441410541534424,grad_norm: 0.999999252982822, iteration: 64113
loss: 1.0566867589950562,grad_norm: 0.9999992759322642, iteration: 64114
loss: 0.9916104674339294,grad_norm: 0.9685036351910624, iteration: 64115
loss: 0.9950165748596191,grad_norm: 0.8169363989514719, iteration: 64116
loss: 0.9648927450180054,grad_norm: 0.9204706662056231, iteration: 64117
loss: 1.0103439092636108,grad_norm: 0.9862640442397377, iteration: 64118
loss: 1.0047416687011719,grad_norm: 0.9999993754592108, iteration: 64119
loss: 1.0057727098464966,grad_norm: 0.9999992704721391, iteration: 64120
loss: 1.011173963546753,grad_norm: 0.9999991479224407, iteration: 64121
loss: 0.9748320579528809,grad_norm: 0.9177722632074748, iteration: 64122
loss: 1.011897325515747,grad_norm: 0.9996567314024921, iteration: 64123
loss: 1.0263724327087402,grad_norm: 0.9920126059818792, iteration: 64124
loss: 0.9826397895812988,grad_norm: 0.9999990610772146, iteration: 64125
loss: 1.0243664979934692,grad_norm: 0.9999991150452312, iteration: 64126
loss: 0.9946444630622864,grad_norm: 0.9999990708406843, iteration: 64127
loss: 1.0129477977752686,grad_norm: 0.9999990427884898, iteration: 64128
loss: 1.0014410018920898,grad_norm: 0.9999992123795097, iteration: 64129
loss: 0.9923592209815979,grad_norm: 0.9133986225665416, iteration: 64130
loss: 0.9877839088439941,grad_norm: 0.9999991314296283, iteration: 64131
loss: 1.0032756328582764,grad_norm: 0.999999112195148, iteration: 64132
loss: 1.0117944478988647,grad_norm: 0.9999992427671885, iteration: 64133
loss: 1.0561999082565308,grad_norm: 0.9632532196613842, iteration: 64134
loss: 0.9389407634735107,grad_norm: 0.9999991018655473, iteration: 64135
loss: 1.015869140625,grad_norm: 0.9999992217477178, iteration: 64136
loss: 1.0068461894989014,grad_norm: 0.9999989678667968, iteration: 64137
loss: 0.9959365725517273,grad_norm: 0.9999992677123485, iteration: 64138
loss: 0.9647455811500549,grad_norm: 0.9999992612827183, iteration: 64139
loss: 1.0021840333938599,grad_norm: 0.999999099321486, iteration: 64140
loss: 1.029344916343689,grad_norm: 0.9999989984334683, iteration: 64141
loss: 1.012352705001831,grad_norm: 0.9999991779808434, iteration: 64142
loss: 1.0540273189544678,grad_norm: 0.9999993802641549, iteration: 64143
loss: 0.9817745685577393,grad_norm: 0.9883181165176839, iteration: 64144
loss: 0.9972280263900757,grad_norm: 0.8798740831398065, iteration: 64145
loss: 1.0058395862579346,grad_norm: 0.9999990527288578, iteration: 64146
loss: 1.0209128856658936,grad_norm: 0.9999990703460326, iteration: 64147
loss: 0.9973545074462891,grad_norm: 0.9999990789567994, iteration: 64148
loss: 0.9722334146499634,grad_norm: 0.9999991351138103, iteration: 64149
loss: 1.0060982704162598,grad_norm: 0.9999991891296963, iteration: 64150
loss: 1.0067908763885498,grad_norm: 0.8886675013739078, iteration: 64151
loss: 1.0137417316436768,grad_norm: 0.9999990171941249, iteration: 64152
loss: 1.0067880153656006,grad_norm: 0.8574978148274561, iteration: 64153
loss: 1.000862717628479,grad_norm: 0.9077100378355988, iteration: 64154
loss: 1.0134835243225098,grad_norm: 0.9999991760193403, iteration: 64155
loss: 0.9835433959960938,grad_norm: 0.9999992865003515, iteration: 64156
loss: 0.988776683807373,grad_norm: 0.9999992796481136, iteration: 64157
loss: 0.9844170212745667,grad_norm: 0.9999994031844466, iteration: 64158
loss: 0.9994128346443176,grad_norm: 0.9999990924183481, iteration: 64159
loss: 1.0009558200836182,grad_norm: 0.9999991046646917, iteration: 64160
loss: 0.9884517192840576,grad_norm: 0.9999990443969318, iteration: 64161
loss: 1.0012236833572388,grad_norm: 0.945491160669657, iteration: 64162
loss: 1.0177693367004395,grad_norm: 0.9999992257428679, iteration: 64163
loss: 1.000976324081421,grad_norm: 0.9999991519012402, iteration: 64164
loss: 1.0213285684585571,grad_norm: 0.9400313661612404, iteration: 64165
loss: 0.9901142716407776,grad_norm: 0.9999992322798537, iteration: 64166
loss: 0.9934621453285217,grad_norm: 0.9999992066374706, iteration: 64167
loss: 0.9998612999916077,grad_norm: 0.99999934653092, iteration: 64168
loss: 1.0161974430084229,grad_norm: 0.999999090084861, iteration: 64169
loss: 0.9963026642799377,grad_norm: 0.9999992553147035, iteration: 64170
loss: 1.0029869079589844,grad_norm: 0.99999906305438, iteration: 64171
loss: 1.1706438064575195,grad_norm: 0.9999995013844103, iteration: 64172
loss: 0.9637059569358826,grad_norm: 0.9999991054323837, iteration: 64173
loss: 1.0448167324066162,grad_norm: 0.9999991142043692, iteration: 64174
loss: 1.0002113580703735,grad_norm: 0.9672745775444288, iteration: 64175
loss: 1.0289257764816284,grad_norm: 0.9999997064229145, iteration: 64176
loss: 1.0048407316207886,grad_norm: 0.9999992502143851, iteration: 64177
loss: 0.9971270561218262,grad_norm: 0.9664222129873526, iteration: 64178
loss: 1.0773135423660278,grad_norm: 0.9999991778221321, iteration: 64179
loss: 1.0181164741516113,grad_norm: 0.9999991771126755, iteration: 64180
loss: 1.0540300607681274,grad_norm: 0.8953979664769072, iteration: 64181
loss: 0.9988284707069397,grad_norm: 0.9999990791793878, iteration: 64182
loss: 0.9848355054855347,grad_norm: 0.9924136970067713, iteration: 64183
loss: 1.0153995752334595,grad_norm: 0.8750383438064836, iteration: 64184
loss: 1.0314542055130005,grad_norm: 0.999999095814705, iteration: 64185
loss: 1.015372633934021,grad_norm: 0.9999992719513152, iteration: 64186
loss: 0.9851090908050537,grad_norm: 0.9999992868160201, iteration: 64187
loss: 0.9899125695228577,grad_norm: 0.9999990268738399, iteration: 64188
loss: 0.9698325395584106,grad_norm: 0.9999990918863936, iteration: 64189
loss: 1.010088324546814,grad_norm: 0.9999992377535751, iteration: 64190
loss: 1.031944751739502,grad_norm: 0.9033981301511399, iteration: 64191
loss: 0.9666393995285034,grad_norm: 0.9063185737531264, iteration: 64192
loss: 1.0484561920166016,grad_norm: 0.9999991598312835, iteration: 64193
loss: 1.0241254568099976,grad_norm: 0.9999991113801056, iteration: 64194
loss: 1.0044472217559814,grad_norm: 0.9999991846212228, iteration: 64195
loss: 0.969413697719574,grad_norm: 0.9704533423163807, iteration: 64196
loss: 1.0393776893615723,grad_norm: 0.9999998139659133, iteration: 64197
loss: 1.0032278299331665,grad_norm: 0.8772880043583429, iteration: 64198
loss: 0.9989778399467468,grad_norm: 0.9732956189698336, iteration: 64199
loss: 0.9912187457084656,grad_norm: 0.9999990752099167, iteration: 64200
loss: 0.9956941604614258,grad_norm: 0.9999990173561193, iteration: 64201
loss: 0.9921532869338989,grad_norm: 0.9314363958867651, iteration: 64202
loss: 0.9567020535469055,grad_norm: 0.9999990210375836, iteration: 64203
loss: 0.9681377410888672,grad_norm: 0.9999990980550938, iteration: 64204
loss: 1.030869722366333,grad_norm: 0.9999993906808032, iteration: 64205
loss: 1.009476900100708,grad_norm: 0.9413348719055048, iteration: 64206
loss: 1.018541932106018,grad_norm: 0.8229994153598128, iteration: 64207
loss: 0.986873984336853,grad_norm: 0.9999990601948716, iteration: 64208
loss: 1.025398850440979,grad_norm: 0.9999992830492033, iteration: 64209
loss: 1.0309499502182007,grad_norm: 0.9602408291993177, iteration: 64210
loss: 0.9965164661407471,grad_norm: 0.9295207980459649, iteration: 64211
loss: 0.9886072874069214,grad_norm: 0.8285734427139095, iteration: 64212
loss: 0.9904706478118896,grad_norm: 0.9999990112901431, iteration: 64213
loss: 1.0137044191360474,grad_norm: 0.9999992212499075, iteration: 64214
loss: 1.0033501386642456,grad_norm: 0.9999995249290063, iteration: 64215
loss: 1.0178070068359375,grad_norm: 0.9999992671875562, iteration: 64216
loss: 1.0223426818847656,grad_norm: 0.9999991511001594, iteration: 64217
loss: 1.0127793550491333,grad_norm: 0.9999990204992124, iteration: 64218
loss: 1.0327279567718506,grad_norm: 0.999999010021191, iteration: 64219
loss: 1.0172792673110962,grad_norm: 0.9293732931536071, iteration: 64220
loss: 1.0391608476638794,grad_norm: 0.9999990664544455, iteration: 64221
loss: 0.9966591596603394,grad_norm: 0.9987106232626375, iteration: 64222
loss: 0.9807915091514587,grad_norm: 0.9999991483831853, iteration: 64223
loss: 1.003208041191101,grad_norm: 0.9549008321774118, iteration: 64224
loss: 0.9740460515022278,grad_norm: 0.9160117771554861, iteration: 64225
loss: 0.9958928227424622,grad_norm: 0.9040803574461872, iteration: 64226
loss: 0.982116162776947,grad_norm: 0.9783039190933871, iteration: 64227
loss: 0.9736234545707703,grad_norm: 0.9572344338306146, iteration: 64228
loss: 1.0023713111877441,grad_norm: 0.9999993172768018, iteration: 64229
loss: 1.0148810148239136,grad_norm: 0.8971026895094738, iteration: 64230
loss: 1.0068888664245605,grad_norm: 0.7968000985836613, iteration: 64231
loss: 0.9997779726982117,grad_norm: 0.999999136500654, iteration: 64232
loss: 1.0222843885421753,grad_norm: 0.9999991467765165, iteration: 64233
loss: 0.9973772168159485,grad_norm: 0.9577383949660245, iteration: 64234
loss: 1.03138267993927,grad_norm: 0.9999991366964773, iteration: 64235
loss: 0.9941129684448242,grad_norm: 0.9793180291762114, iteration: 64236
loss: 1.0818537473678589,grad_norm: 0.9999991485480825, iteration: 64237
loss: 1.0225074291229248,grad_norm: 0.9999992899797183, iteration: 64238
loss: 0.9933882355690002,grad_norm: 0.9103963822193031, iteration: 64239
loss: 1.0809309482574463,grad_norm: 0.9999992230911735, iteration: 64240
loss: 0.9694553017616272,grad_norm: 0.9013467793263626, iteration: 64241
loss: 0.9816670417785645,grad_norm: 0.999999202688795, iteration: 64242
loss: 1.1435412168502808,grad_norm: 0.9999995217603767, iteration: 64243
loss: 0.9908522963523865,grad_norm: 0.999999145046732, iteration: 64244
loss: 1.0054583549499512,grad_norm: 0.9999992012483699, iteration: 64245
loss: 0.9873414635658264,grad_norm: 0.999999070958372, iteration: 64246
loss: 1.0276293754577637,grad_norm: 0.9999997128313247, iteration: 64247
loss: 0.9952869415283203,grad_norm: 0.9999992507174461, iteration: 64248
loss: 0.9953141808509827,grad_norm: 0.9148613930516672, iteration: 64249
loss: 0.9670741558074951,grad_norm: 0.8584233189339562, iteration: 64250
loss: 1.0146682262420654,grad_norm: 0.9999991876610169, iteration: 64251
loss: 1.04060959815979,grad_norm: 0.9832270786554637, iteration: 64252
loss: 1.007712960243225,grad_norm: 0.8997963824451569, iteration: 64253
loss: 1.0210238695144653,grad_norm: 0.9585898350932648, iteration: 64254
loss: 1.0289417505264282,grad_norm: 0.9815415153664496, iteration: 64255
loss: 1.074267029762268,grad_norm: 0.9999991045146321, iteration: 64256
loss: 1.0697635412216187,grad_norm: 0.999999498833683, iteration: 64257
loss: 1.0260075330734253,grad_norm: 0.9999990333817704, iteration: 64258
loss: 0.9886395931243896,grad_norm: 0.9999990724607676, iteration: 64259
loss: 0.9866637587547302,grad_norm: 0.9999990836353226, iteration: 64260
loss: 0.9873601794242859,grad_norm: 0.9999992139198017, iteration: 64261
loss: 1.017224669456482,grad_norm: 0.8698372127287114, iteration: 64262
loss: 0.975109338760376,grad_norm: 0.999999137233275, iteration: 64263
loss: 1.0211716890335083,grad_norm: 0.9999991484864038, iteration: 64264
loss: 0.9737850427627563,grad_norm: 0.9999990992236347, iteration: 64265
loss: 1.015317440032959,grad_norm: 0.8731546199826609, iteration: 64266
loss: 1.005463719367981,grad_norm: 0.9999992747835843, iteration: 64267
loss: 0.9833989143371582,grad_norm: 0.9510281793609633, iteration: 64268
loss: 1.0223859548568726,grad_norm: 0.9999990485640555, iteration: 64269
loss: 0.9911369681358337,grad_norm: 0.9999992115200599, iteration: 64270
loss: 0.9627130627632141,grad_norm: 0.9598360304809002, iteration: 64271
loss: 1.0121554136276245,grad_norm: 0.8940281384721485, iteration: 64272
loss: 0.9805482029914856,grad_norm: 0.9861789900072963, iteration: 64273
loss: 0.9787237644195557,grad_norm: 0.9999991758121999, iteration: 64274
loss: 0.9869943261146545,grad_norm: 0.8965615491026815, iteration: 64275
loss: 1.0037274360656738,grad_norm: 0.9999990079859944, iteration: 64276
loss: 0.9820114970207214,grad_norm: 0.9999990937427863, iteration: 64277
loss: 0.9887165427207947,grad_norm: 0.9999990827734901, iteration: 64278
loss: 0.9728131294250488,grad_norm: 0.9999991813472168, iteration: 64279
loss: 1.034339427947998,grad_norm: 0.9999990323924483, iteration: 64280
loss: 1.0046454668045044,grad_norm: 0.9999991309615189, iteration: 64281
loss: 0.9611544013023376,grad_norm: 0.9999991471585831, iteration: 64282
loss: 0.9844197034835815,grad_norm: 0.9702405369036112, iteration: 64283
loss: 0.978143036365509,grad_norm: 0.9999993306217546, iteration: 64284
loss: 1.086351990699768,grad_norm: 0.9999992561398173, iteration: 64285
loss: 1.0075387954711914,grad_norm: 0.9794005855758471, iteration: 64286
loss: 0.9950133562088013,grad_norm: 0.999999196301393, iteration: 64287
loss: 1.0161614418029785,grad_norm: 0.9999997704263559, iteration: 64288
loss: 0.9753361940383911,grad_norm: 0.9561348396896221, iteration: 64289
loss: 0.9650298357009888,grad_norm: 0.9999990780615077, iteration: 64290
loss: 1.0087459087371826,grad_norm: 0.9999989877896582, iteration: 64291
loss: 0.9987073540687561,grad_norm: 0.9999992241047213, iteration: 64292
loss: 1.0023105144500732,grad_norm: 0.9572029923300911, iteration: 64293
loss: 0.9925966262817383,grad_norm: 0.9999990849056632, iteration: 64294
loss: 1.0114409923553467,grad_norm: 0.9999992828791501, iteration: 64295
loss: 0.9412158131599426,grad_norm: 0.9999991242727375, iteration: 64296
loss: 1.0283987522125244,grad_norm: 0.9821530533442263, iteration: 64297
loss: 1.0180569887161255,grad_norm: 0.9999991097373243, iteration: 64298
loss: 0.9740467667579651,grad_norm: 0.9018162734696222, iteration: 64299
loss: 1.0016676187515259,grad_norm: 0.8525866617564207, iteration: 64300
loss: 0.9943597912788391,grad_norm: 0.9393486733926079, iteration: 64301
loss: 0.9837738275527954,grad_norm: 0.9999994709081226, iteration: 64302
loss: 0.924304723739624,grad_norm: 0.9617278744920819, iteration: 64303
loss: 0.9997929930686951,grad_norm: 0.9518981645369022, iteration: 64304
loss: 1.020480751991272,grad_norm: 0.9999989593839188, iteration: 64305
loss: 1.0191363096237183,grad_norm: 0.9999991466720013, iteration: 64306
loss: 0.9851452112197876,grad_norm: 0.9999991108525713, iteration: 64307
loss: 1.0211741924285889,grad_norm: 0.9831476233418905, iteration: 64308
loss: 1.0187020301818848,grad_norm: 0.9999994270145731, iteration: 64309
loss: 1.0114190578460693,grad_norm: 0.9999995685970408, iteration: 64310
loss: 1.019646167755127,grad_norm: 0.9999992476386492, iteration: 64311
loss: 0.9865798950195312,grad_norm: 0.9643346541359664, iteration: 64312
loss: 0.9609741568565369,grad_norm: 0.9999990729205249, iteration: 64313
loss: 0.9819101691246033,grad_norm: 0.9999991397295382, iteration: 64314
loss: 1.0129202604293823,grad_norm: 0.999999230742871, iteration: 64315
loss: 1.0100374221801758,grad_norm: 0.801227794092936, iteration: 64316
loss: 1.0176825523376465,grad_norm: 0.9386310684916304, iteration: 64317
loss: 1.035979151725769,grad_norm: 0.9999991212730617, iteration: 64318
loss: 1.0171679258346558,grad_norm: 0.979425625789101, iteration: 64319
loss: 1.0297894477844238,grad_norm: 0.8602685570656504, iteration: 64320
loss: 1.0184272527694702,grad_norm: 0.9907930677760757, iteration: 64321
loss: 1.045467495918274,grad_norm: 0.9910596703885813, iteration: 64322
loss: 1.0151679515838623,grad_norm: 0.999999190851548, iteration: 64323
loss: 1.0229766368865967,grad_norm: 0.8968534685716452, iteration: 64324
loss: 1.0204565525054932,grad_norm: 0.9517014801644057, iteration: 64325
loss: 1.0272740125656128,grad_norm: 0.9999993615976909, iteration: 64326
loss: 1.014307975769043,grad_norm: 0.9999991103865814, iteration: 64327
loss: 0.967318594455719,grad_norm: 0.9999992174776899, iteration: 64328
loss: 1.0137299299240112,grad_norm: 0.9999990774197467, iteration: 64329
loss: 0.9759133458137512,grad_norm: 0.9999990702778319, iteration: 64330
loss: 1.0087636709213257,grad_norm: 0.9275168962659626, iteration: 64331
loss: 0.996246337890625,grad_norm: 0.9999991802927947, iteration: 64332
loss: 1.0028420686721802,grad_norm: 0.9999990952071003, iteration: 64333
loss: 0.9905902743339539,grad_norm: 0.9901499822727677, iteration: 64334
loss: 1.0079236030578613,grad_norm: 0.9999990755724779, iteration: 64335
loss: 1.0053362846374512,grad_norm: 0.9999994256925722, iteration: 64336
loss: 0.9795563817024231,grad_norm: 0.9999998447933204, iteration: 64337
loss: 0.9839032292366028,grad_norm: 0.9999991347298942, iteration: 64338
loss: 1.0053503513336182,grad_norm: 0.9999991210437637, iteration: 64339
loss: 1.0150505304336548,grad_norm: 0.9999991869610171, iteration: 64340
loss: 1.0108133554458618,grad_norm: 0.999999009823393, iteration: 64341
loss: 1.0149792432785034,grad_norm: 0.9999990430091924, iteration: 64342
loss: 0.9931135177612305,grad_norm: 0.9722241675439325, iteration: 64343
loss: 1.0221779346466064,grad_norm: 0.9184606601392261, iteration: 64344
loss: 0.9995405077934265,grad_norm: 0.7921973715400482, iteration: 64345
loss: 0.9902732968330383,grad_norm: 0.9999992424845537, iteration: 64346
loss: 0.9994919300079346,grad_norm: 0.857629546469721, iteration: 64347
loss: 1.035565733909607,grad_norm: 0.9999990282154513, iteration: 64348
loss: 1.0010184049606323,grad_norm: 0.9999990600184565, iteration: 64349
loss: 0.951749324798584,grad_norm: 0.9999997216337998, iteration: 64350
loss: 0.9866906404495239,grad_norm: 0.9271753223194065, iteration: 64351
loss: 1.0143756866455078,grad_norm: 0.9999990565803846, iteration: 64352
loss: 0.9621772170066833,grad_norm: 0.9938812552829913, iteration: 64353
loss: 1.0010682344436646,grad_norm: 0.9459346941184567, iteration: 64354
loss: 1.0153504610061646,grad_norm: 0.9125488400355541, iteration: 64355
loss: 0.9938612580299377,grad_norm: 0.9999991221136942, iteration: 64356
loss: 1.0072412490844727,grad_norm: 0.9999991157310727, iteration: 64357
loss: 1.0078895092010498,grad_norm: 0.9198481144597224, iteration: 64358
loss: 1.0076797008514404,grad_norm: 0.9999990820572101, iteration: 64359
loss: 0.991870641708374,grad_norm: 0.9999990975017122, iteration: 64360
loss: 1.030767798423767,grad_norm: 0.9999992415936887, iteration: 64361
loss: 1.0086321830749512,grad_norm: 0.9522026615738133, iteration: 64362
loss: 1.0036011934280396,grad_norm: 0.8528234366534274, iteration: 64363
loss: 0.9783207178115845,grad_norm: 0.9339821825305978, iteration: 64364
loss: 1.0257304906845093,grad_norm: 0.9999991815555129, iteration: 64365
loss: 0.9875230193138123,grad_norm: 0.854631740596344, iteration: 64366
loss: 0.996888279914856,grad_norm: 0.9970618164294958, iteration: 64367
loss: 1.011871576309204,grad_norm: 0.9999993145028584, iteration: 64368
loss: 0.9672580361366272,grad_norm: 0.9999990320479099, iteration: 64369
loss: 0.9747427701950073,grad_norm: 0.999999373723801, iteration: 64370
loss: 0.9652101397514343,grad_norm: 0.9709245175458355, iteration: 64371
loss: 0.9953703880310059,grad_norm: 0.9055057612195138, iteration: 64372
loss: 0.9911841154098511,grad_norm: 0.9623099318411299, iteration: 64373
loss: 1.000066876411438,grad_norm: 0.9999992464110975, iteration: 64374
loss: 1.0389667749404907,grad_norm: 0.9999990544427554, iteration: 64375
loss: 1.014453649520874,grad_norm: 0.9999992807438207, iteration: 64376
loss: 1.0058633089065552,grad_norm: 0.9999991899865165, iteration: 64377
loss: 1.0273876190185547,grad_norm: 0.9999990309629044, iteration: 64378
loss: 0.998806893825531,grad_norm: 0.9999989598031649, iteration: 64379
loss: 1.0083905458450317,grad_norm: 0.9999992505851331, iteration: 64380
loss: 1.044866919517517,grad_norm: 0.9999994295995376, iteration: 64381
loss: 1.0336443185806274,grad_norm: 0.9999992069064279, iteration: 64382
loss: 0.9651105403900146,grad_norm: 0.9999990451620472, iteration: 64383
loss: 0.9915245175361633,grad_norm: 0.9766396691004612, iteration: 64384
loss: 0.9939590096473694,grad_norm: 0.9999990290991211, iteration: 64385
loss: 1.0247688293457031,grad_norm: 0.9999990971248165, iteration: 64386
loss: 1.002277135848999,grad_norm: 0.9999989800324851, iteration: 64387
loss: 0.9732174277305603,grad_norm: 0.999999122959886, iteration: 64388
loss: 0.9925243854522705,grad_norm: 0.9999990506794612, iteration: 64389
loss: 0.9827535152435303,grad_norm: 0.9999992075600166, iteration: 64390
loss: 1.1978013515472412,grad_norm: 0.999999850400246, iteration: 64391
loss: 1.0259929895401,grad_norm: 0.9617074573899447, iteration: 64392
loss: 0.9897430539131165,grad_norm: 0.9999989852455929, iteration: 64393
loss: 0.9968923330307007,grad_norm: 0.9999990950356078, iteration: 64394
loss: 0.9718295335769653,grad_norm: 0.9999993015841662, iteration: 64395
loss: 1.0087990760803223,grad_norm: 0.9643855593732376, iteration: 64396
loss: 1.030988335609436,grad_norm: 0.999999115072108, iteration: 64397
loss: 0.9879369139671326,grad_norm: 0.9999991116958934, iteration: 64398
loss: 1.0143531560897827,grad_norm: 0.999999192121434, iteration: 64399
loss: 0.9975857734680176,grad_norm: 0.9999990430603196, iteration: 64400
loss: 0.9644123911857605,grad_norm: 0.944336334181168, iteration: 64401
loss: 1.0143309831619263,grad_norm: 0.9999993062905215, iteration: 64402
loss: 1.013240933418274,grad_norm: 0.898710538302097, iteration: 64403
loss: 0.9987872838973999,grad_norm: 0.9999991981466007, iteration: 64404
loss: 1.008102297782898,grad_norm: 0.9874238581358179, iteration: 64405
loss: 1.0150071382522583,grad_norm: 0.9999990516240604, iteration: 64406
loss: 1.178642749786377,grad_norm: 0.9999994669922432, iteration: 64407
loss: 0.987903892993927,grad_norm: 0.999999088190431, iteration: 64408
loss: 0.9871445298194885,grad_norm: 0.9184004202888012, iteration: 64409
loss: 1.0207440853118896,grad_norm: 0.9999991790313364, iteration: 64410
loss: 0.9920068383216858,grad_norm: 0.8489872269100267, iteration: 64411
loss: 1.0172171592712402,grad_norm: 0.9999991690676914, iteration: 64412
loss: 1.0179665088653564,grad_norm: 0.999999211550325, iteration: 64413
loss: 0.9945473074913025,grad_norm: 0.9160644666928375, iteration: 64414
loss: 1.000044822692871,grad_norm: 0.9999992442251682, iteration: 64415
loss: 1.0256373882293701,grad_norm: 0.9999993133027891, iteration: 64416
loss: 1.0078535079956055,grad_norm: 0.9729714214859211, iteration: 64417
loss: 1.002572774887085,grad_norm: 0.9999989773871453, iteration: 64418
loss: 0.9926806688308716,grad_norm: 0.9999991221866638, iteration: 64419
loss: 1.0155311822891235,grad_norm: 0.9999991729908436, iteration: 64420
loss: 1.0248852968215942,grad_norm: 0.9999993270314632, iteration: 64421
loss: 1.0084619522094727,grad_norm: 0.9999992153722849, iteration: 64422
loss: 0.9999516010284424,grad_norm: 0.9999991197978442, iteration: 64423
loss: 0.9852601289749146,grad_norm: 0.9999991124672932, iteration: 64424
loss: 1.058783769607544,grad_norm: 0.9999989343872946, iteration: 64425
loss: 0.9591836333274841,grad_norm: 0.9999992054445352, iteration: 64426
loss: 1.0080053806304932,grad_norm: 0.999999148628759, iteration: 64427
loss: 0.9927717447280884,grad_norm: 0.998476557884963, iteration: 64428
loss: 1.004428744316101,grad_norm: 0.9019724628230978, iteration: 64429
loss: 0.9850715398788452,grad_norm: 0.9999990881822937, iteration: 64430
loss: 0.9833593368530273,grad_norm: 0.9999989378815545, iteration: 64431
loss: 1.005403757095337,grad_norm: 0.999999150553596, iteration: 64432
loss: 0.9779963493347168,grad_norm: 0.9999991535083357, iteration: 64433
loss: 0.999855637550354,grad_norm: 0.833291190778493, iteration: 64434
loss: 1.0096274614334106,grad_norm: 0.9855262837600698, iteration: 64435
loss: 1.025598406791687,grad_norm: 0.9661599101676676, iteration: 64436
loss: 0.9966168999671936,grad_norm: 0.9999992663095603, iteration: 64437
loss: 0.9836134910583496,grad_norm: 0.9999990744225661, iteration: 64438
loss: 1.0068941116333008,grad_norm: 0.9090841431120654, iteration: 64439
loss: 0.9671404361724854,grad_norm: 0.9999992136752703, iteration: 64440
loss: 1.0003982782363892,grad_norm: 0.92766691907218, iteration: 64441
loss: 1.0509390830993652,grad_norm: 0.9999994499194198, iteration: 64442
loss: 1.029727816581726,grad_norm: 0.9999993152389635, iteration: 64443
loss: 0.9959475994110107,grad_norm: 0.9081021650621247, iteration: 64444
loss: 1.0076816082000732,grad_norm: 0.9999991491814431, iteration: 64445
loss: 1.0501054525375366,grad_norm: 0.9999991382021873, iteration: 64446
loss: 0.9909545183181763,grad_norm: 0.9999991942607763, iteration: 64447
loss: 0.9613445401191711,grad_norm: 0.9999990473495305, iteration: 64448
loss: 1.0333309173583984,grad_norm: 0.9999991209526147, iteration: 64449
loss: 0.9921072721481323,grad_norm: 0.9689500807280599, iteration: 64450
loss: 0.9401035904884338,grad_norm: 0.9999992347997377, iteration: 64451
loss: 0.9813932776451111,grad_norm: 0.9999992157735428, iteration: 64452
loss: 1.0306637287139893,grad_norm: 0.9999991061834301, iteration: 64453
loss: 0.9830420017242432,grad_norm: 0.999999356304327, iteration: 64454
loss: 0.9564327597618103,grad_norm: 0.9999991766858256, iteration: 64455
loss: 1.0185954570770264,grad_norm: 0.9999991786039415, iteration: 64456
loss: 0.9843906164169312,grad_norm: 0.9999990581667898, iteration: 64457
loss: 0.994117259979248,grad_norm: 0.9999991455233254, iteration: 64458
loss: 0.9917730093002319,grad_norm: 0.9999991553941977, iteration: 64459
loss: 0.9937241673469543,grad_norm: 0.999999284686116, iteration: 64460
loss: 1.028290867805481,grad_norm: 0.9184451880117137, iteration: 64461
loss: 0.9936853051185608,grad_norm: 0.9999992003984889, iteration: 64462
loss: 0.9875105023384094,grad_norm: 0.9999990872480518, iteration: 64463
loss: 1.025209903717041,grad_norm: 0.912199154559299, iteration: 64464
loss: 1.0388157367706299,grad_norm: 0.9999991101011547, iteration: 64465
loss: 1.0206378698349,grad_norm: 0.9999991095290147, iteration: 64466
loss: 0.9664608240127563,grad_norm: 0.9999992476361903, iteration: 64467
loss: 0.9773667454719543,grad_norm: 0.9999994478667273, iteration: 64468
loss: 0.9698917269706726,grad_norm: 0.9999990328671777, iteration: 64469
loss: 1.0372600555419922,grad_norm: 0.9218624822315524, iteration: 64470
loss: 1.0707573890686035,grad_norm: 0.9999992937024894, iteration: 64471
loss: 1.0147950649261475,grad_norm: 0.9999991697139315, iteration: 64472
loss: 1.0184568166732788,grad_norm: 0.8654950533962383, iteration: 64473
loss: 0.9748974442481995,grad_norm: 0.9999989974873497, iteration: 64474
loss: 1.0071139335632324,grad_norm: 0.9999991411261838, iteration: 64475
loss: 0.9895222187042236,grad_norm: 0.9999990989320147, iteration: 64476
loss: 0.9990671873092651,grad_norm: 0.9999990574877876, iteration: 64477
loss: 0.9929283261299133,grad_norm: 0.9216136277215827, iteration: 64478
loss: 1.0092134475708008,grad_norm: 0.9263560617671633, iteration: 64479
loss: 0.9806520342826843,grad_norm: 0.999999260973415, iteration: 64480
loss: 0.99016934633255,grad_norm: 0.9999991090467598, iteration: 64481
loss: 1.0171951055526733,grad_norm: 0.999999319466058, iteration: 64482
loss: 1.0599250793457031,grad_norm: 0.9999990833651649, iteration: 64483
loss: 1.0005147457122803,grad_norm: 0.9912989441202723, iteration: 64484
loss: 0.9855791926383972,grad_norm: 0.9999989668935223, iteration: 64485
loss: 0.9929803013801575,grad_norm: 0.9583438566759673, iteration: 64486
loss: 1.003553032875061,grad_norm: 0.9999992291035079, iteration: 64487
loss: 1.0097558498382568,grad_norm: 0.9999998108487625, iteration: 64488
loss: 1.002379298210144,grad_norm: 0.9287448123174205, iteration: 64489
loss: 0.9347096085548401,grad_norm: 0.9999991500201487, iteration: 64490
loss: 0.9939937591552734,grad_norm: 0.9999993155933369, iteration: 64491
loss: 1.0413366556167603,grad_norm: 0.9999998516968149, iteration: 64492
loss: 0.9809505939483643,grad_norm: 0.9999992308069325, iteration: 64493
loss: 1.0046404600143433,grad_norm: 0.9999990989606418, iteration: 64494
loss: 1.0196549892425537,grad_norm: 0.9999991785586752, iteration: 64495
loss: 1.0074381828308105,grad_norm: 0.9999992823309771, iteration: 64496
loss: 0.9749271273612976,grad_norm: 0.7799270377351725, iteration: 64497
loss: 0.995568037033081,grad_norm: 0.9901872171543942, iteration: 64498
loss: 0.9857962727546692,grad_norm: 0.8287161515082592, iteration: 64499
loss: 1.0195940732955933,grad_norm: 0.9868308731254511, iteration: 64500
loss: 0.9953476190567017,grad_norm: 0.9999990797748504, iteration: 64501
loss: 1.0172593593597412,grad_norm: 0.9999990699647254, iteration: 64502
loss: 1.0536328554153442,grad_norm: 0.9999995461122252, iteration: 64503
loss: 1.0169126987457275,grad_norm: 0.9999990559218986, iteration: 64504
loss: 0.9833716154098511,grad_norm: 0.999999091645559, iteration: 64505
loss: 0.9945297837257385,grad_norm: 0.9699844593636648, iteration: 64506
loss: 0.9744888544082642,grad_norm: 0.9999991055965487, iteration: 64507
loss: 0.9867073893547058,grad_norm: 0.9854146851204215, iteration: 64508
loss: 1.0026332139968872,grad_norm: 0.9999991735356336, iteration: 64509
loss: 0.9852539300918579,grad_norm: 0.940095664824652, iteration: 64510
loss: 0.9715356826782227,grad_norm: 0.9999991916234555, iteration: 64511
loss: 1.0871502161026,grad_norm: 0.9999989559701474, iteration: 64512
loss: 1.0071096420288086,grad_norm: 0.9873966522292809, iteration: 64513
loss: 0.9880042672157288,grad_norm: 0.9999992299459607, iteration: 64514
loss: 0.979787290096283,grad_norm: 0.9999989372121305, iteration: 64515
loss: 0.9775286912918091,grad_norm: 0.9671483501317265, iteration: 64516
loss: 0.9785791039466858,grad_norm: 0.953546777311338, iteration: 64517
loss: 1.0263034105300903,grad_norm: 0.9999995760524099, iteration: 64518
loss: 1.1377440690994263,grad_norm: 0.9999998690913864, iteration: 64519
loss: 1.0088155269622803,grad_norm: 0.9999990994756438, iteration: 64520
loss: 1.011926293373108,grad_norm: 0.9999994442656275, iteration: 64521
loss: 0.9864872097969055,grad_norm: 0.9675930478447643, iteration: 64522
loss: 0.9263319969177246,grad_norm: 0.9635069099261159, iteration: 64523
loss: 1.0004680156707764,grad_norm: 0.9136678965577953, iteration: 64524
loss: 0.9946618676185608,grad_norm: 0.9727844101127044, iteration: 64525
loss: 0.9971747398376465,grad_norm: 0.9999991821200912, iteration: 64526
loss: 1.0393749475479126,grad_norm: 0.9999994062834433, iteration: 64527
loss: 0.9904925227165222,grad_norm: 0.9999990356510456, iteration: 64528
loss: 1.000770092010498,grad_norm: 0.9999990706067082, iteration: 64529
loss: 0.9847530722618103,grad_norm: 0.9089319913888374, iteration: 64530
loss: 1.003393530845642,grad_norm: 0.9999991408133568, iteration: 64531
loss: 0.945889949798584,grad_norm: 0.8850159567501634, iteration: 64532
loss: 0.9879900217056274,grad_norm: 0.9793960997478794, iteration: 64533
loss: 0.9757248163223267,grad_norm: 0.9267983251255136, iteration: 64534
loss: 1.0087765455245972,grad_norm: 0.9708639370708543, iteration: 64535
loss: 1.0050420761108398,grad_norm: 0.9999999248579305, iteration: 64536
loss: 0.9831547737121582,grad_norm: 0.9756032645118657, iteration: 64537
loss: 0.991395890712738,grad_norm: 0.924838274418761, iteration: 64538
loss: 1.0156325101852417,grad_norm: 0.9999993610092054, iteration: 64539
loss: 1.037643313407898,grad_norm: 0.9999989053583751, iteration: 64540
loss: 0.9554529786109924,grad_norm: 0.9999991579579064, iteration: 64541
loss: 1.0070738792419434,grad_norm: 0.9999989136403219, iteration: 64542
loss: 0.9772008657455444,grad_norm: 0.9999990630258413, iteration: 64543
loss: 0.9707298874855042,grad_norm: 0.8846675696329998, iteration: 64544
loss: 0.9841083288192749,grad_norm: 0.999999440717376, iteration: 64545
loss: 1.0114368200302124,grad_norm: 0.9999992309869898, iteration: 64546
loss: 0.9817386269569397,grad_norm: 0.9999990987323861, iteration: 64547
loss: 1.0444962978363037,grad_norm: 0.9999997850347578, iteration: 64548
loss: 0.9936604499816895,grad_norm: 0.9973131104593801, iteration: 64549
loss: 0.9637343883514404,grad_norm: 0.9999989929338197, iteration: 64550
loss: 1.0057320594787598,grad_norm: 0.9999991834438975, iteration: 64551
loss: 0.995784342288971,grad_norm: 0.905414396234001, iteration: 64552
loss: 0.9814571142196655,grad_norm: 0.9999990224167212, iteration: 64553
loss: 1.0001987218856812,grad_norm: 0.9999992021298798, iteration: 64554
loss: 0.9523371458053589,grad_norm: 0.9999990925086812, iteration: 64555
loss: 0.975550651550293,grad_norm: 0.9999991023257483, iteration: 64556
loss: 1.0276992321014404,grad_norm: 0.9999992954955575, iteration: 64557
loss: 1.0034247636795044,grad_norm: 0.9999994176453869, iteration: 64558
loss: 0.9992649555206299,grad_norm: 0.9999993765311723, iteration: 64559
loss: 1.0019233226776123,grad_norm: 0.9999990940715086, iteration: 64560
loss: 1.0044318437576294,grad_norm: 0.9999992872731216, iteration: 64561
loss: 1.0157569646835327,grad_norm: 0.9999995907107505, iteration: 64562
loss: 1.0273467302322388,grad_norm: 0.999999194541744, iteration: 64563
loss: 0.9827989339828491,grad_norm: 0.9999992872099566, iteration: 64564
loss: 0.9863027334213257,grad_norm: 0.7727786344739405, iteration: 64565
loss: 0.9765169620513916,grad_norm: 0.8533110856376922, iteration: 64566
loss: 0.9987898468971252,grad_norm: 0.9985793005494419, iteration: 64567
loss: 1.0528924465179443,grad_norm: 0.9999991914934406, iteration: 64568
loss: 0.9788405299186707,grad_norm: 0.9860857423953183, iteration: 64569
loss: 0.9724314212799072,grad_norm: 0.8583098746798757, iteration: 64570
loss: 1.0170234441757202,grad_norm: 0.9999991206451201, iteration: 64571
loss: 0.9552310109138489,grad_norm: 0.8274114259293357, iteration: 64572
loss: 1.0155569314956665,grad_norm: 0.9999991297112235, iteration: 64573
loss: 1.0120081901550293,grad_norm: 0.9999991907313684, iteration: 64574
loss: 0.9852726459503174,grad_norm: 0.8288739229344046, iteration: 64575
loss: 1.0530400276184082,grad_norm: 0.9999994265016668, iteration: 64576
loss: 0.9931318163871765,grad_norm: 0.999999220664625, iteration: 64577
loss: 1.0302836894989014,grad_norm: 0.9999992139591847, iteration: 64578
loss: 1.0006482601165771,grad_norm: 0.9999990875641396, iteration: 64579
loss: 1.0254243612289429,grad_norm: 0.9613056960527353, iteration: 64580
loss: 0.99432772397995,grad_norm: 0.9999991924389001, iteration: 64581
loss: 0.9884313941001892,grad_norm: 0.9999991107669316, iteration: 64582
loss: 0.9990727305412292,grad_norm: 0.9999993907767399, iteration: 64583
loss: 1.0119967460632324,grad_norm: 0.9999993321114591, iteration: 64584
loss: 0.980568528175354,grad_norm: 0.8814059066799004, iteration: 64585
loss: 0.9936974048614502,grad_norm: 0.9156987343742663, iteration: 64586
loss: 0.9904717803001404,grad_norm: 0.989931086568045, iteration: 64587
loss: 0.9929131865501404,grad_norm: 0.9576113381972599, iteration: 64588
loss: 0.9809510111808777,grad_norm: 0.9999990798147712, iteration: 64589
loss: 0.9757201075553894,grad_norm: 0.9999991577342068, iteration: 64590
loss: 1.005265712738037,grad_norm: 0.9999992572203609, iteration: 64591
loss: 0.9238625168800354,grad_norm: 0.9999991606242749, iteration: 64592
loss: 0.9894793629646301,grad_norm: 0.9981287810997788, iteration: 64593
loss: 1.0402933359146118,grad_norm: 0.9999991664098249, iteration: 64594
loss: 0.9900922179222107,grad_norm: 0.9999991023748618, iteration: 64595
loss: 1.0009268522262573,grad_norm: 0.8960928252385074, iteration: 64596
loss: 0.9886948466300964,grad_norm: 0.9999989980118767, iteration: 64597
loss: 0.9954685568809509,grad_norm: 0.9779718767513752, iteration: 64598
loss: 1.0023527145385742,grad_norm: 0.9490588943329987, iteration: 64599
loss: 0.9963423609733582,grad_norm: 0.9999991322516493, iteration: 64600
loss: 0.9815090894699097,grad_norm: 0.9999989988026875, iteration: 64601
loss: 1.058181643486023,grad_norm: 0.9999991628246758, iteration: 64602
loss: 1.0074536800384521,grad_norm: 0.9999990465799348, iteration: 64603
loss: 1.0310851335525513,grad_norm: 0.9999993011683127, iteration: 64604
loss: 0.9964383840560913,grad_norm: 0.9999991731504168, iteration: 64605
loss: 1.0813615322113037,grad_norm: 0.9999992113560869, iteration: 64606
loss: 0.9826573729515076,grad_norm: 0.9062055775203853, iteration: 64607
loss: 0.9902686476707458,grad_norm: 0.9999990321043164, iteration: 64608
loss: 1.0192980766296387,grad_norm: 0.9999989899996286, iteration: 64609
loss: 0.9895067811012268,grad_norm: 0.9999992957012511, iteration: 64610
loss: 1.019594669342041,grad_norm: 0.9999992626572646, iteration: 64611
loss: 0.9787041544914246,grad_norm: 0.9999990524405808, iteration: 64612
loss: 0.9876726269721985,grad_norm: 0.9999991946160711, iteration: 64613
loss: 1.0063562393188477,grad_norm: 0.9710979667215268, iteration: 64614
loss: 0.9681186079978943,grad_norm: 0.9999991235892283, iteration: 64615
loss: 1.0275300741195679,grad_norm: 0.847319443592153, iteration: 64616
loss: 0.9295254945755005,grad_norm: 0.9826884364549405, iteration: 64617
loss: 1.0341606140136719,grad_norm: 0.9999990375544624, iteration: 64618
loss: 1.0313515663146973,grad_norm: 0.8935210868854269, iteration: 64619
loss: 1.0064667463302612,grad_norm: 0.9844005533239096, iteration: 64620
loss: 0.9974716901779175,grad_norm: 0.9999991137386652, iteration: 64621
loss: 1.0078729391098022,grad_norm: 0.9999990379113326, iteration: 64622
loss: 1.014607310295105,grad_norm: 0.9999991706400708, iteration: 64623
loss: 1.0157079696655273,grad_norm: 0.8975225524610837, iteration: 64624
loss: 1.0230355262756348,grad_norm: 0.9999991310614574, iteration: 64625
loss: 1.0218979120254517,grad_norm: 0.99999894366838, iteration: 64626
loss: 0.9951363205909729,grad_norm: 0.9999991466106589, iteration: 64627
loss: 1.0183035135269165,grad_norm: 0.999999247502626, iteration: 64628
loss: 0.9942660927772522,grad_norm: 0.9999990929779213, iteration: 64629
loss: 1.017430067062378,grad_norm: 0.9999992219711452, iteration: 64630
loss: 1.0171959400177002,grad_norm: 0.9999991282436209, iteration: 64631
loss: 1.0202327966690063,grad_norm: 0.9721787018784956, iteration: 64632
loss: 1.0307228565216064,grad_norm: 0.8515057229634733, iteration: 64633
loss: 0.9745409488677979,grad_norm: 0.9999990044887926, iteration: 64634
loss: 0.954561173915863,grad_norm: 0.9999990374031406, iteration: 64635
loss: 0.9857752323150635,grad_norm: 0.9999992159453923, iteration: 64636
loss: 0.9750885963439941,grad_norm: 0.9999990045108944, iteration: 64637
loss: 0.9760504364967346,grad_norm: 0.9208913565925942, iteration: 64638
loss: 1.00261652469635,grad_norm: 0.9253207543297011, iteration: 64639
loss: 1.014203667640686,grad_norm: 0.9389236119534037, iteration: 64640
loss: 0.9800319671630859,grad_norm: 0.9999991187148352, iteration: 64641
loss: 0.9752881526947021,grad_norm: 0.9999990214932656, iteration: 64642
loss: 0.9940531253814697,grad_norm: 0.9999991776597734, iteration: 64643
loss: 1.021574854850769,grad_norm: 0.9999991270280243, iteration: 64644
loss: 1.0298038721084595,grad_norm: 0.9999990917062187, iteration: 64645
loss: 0.9716796875,grad_norm: 0.9999991805229809, iteration: 64646
loss: 1.0100631713867188,grad_norm: 0.9999990638035872, iteration: 64647
loss: 0.9917137622833252,grad_norm: 0.9999991200251135, iteration: 64648
loss: 1.0155426263809204,grad_norm: 0.8914697823151413, iteration: 64649
loss: 0.9914466142654419,grad_norm: 0.9611694301295133, iteration: 64650
loss: 0.9918655753135681,grad_norm: 0.9999989205443354, iteration: 64651
loss: 1.0181199312210083,grad_norm: 0.9999990916396364, iteration: 64652
loss: 1.0327903032302856,grad_norm: 0.8703391980891321, iteration: 64653
loss: 0.9964417219161987,grad_norm: 0.9999989362788548, iteration: 64654
loss: 0.9843400120735168,grad_norm: 0.9999990874216834, iteration: 64655
loss: 1.0223369598388672,grad_norm: 0.9224952275039482, iteration: 64656
loss: 1.0281968116760254,grad_norm: 0.9999993046240744, iteration: 64657
loss: 1.015280842781067,grad_norm: 0.9999990427898627, iteration: 64658
loss: 1.0114288330078125,grad_norm: 0.9999990462893564, iteration: 64659
loss: 0.9926683902740479,grad_norm: 0.9428568968482524, iteration: 64660
loss: 0.9919226169586182,grad_norm: 0.9999992588963289, iteration: 64661
loss: 0.9751538634300232,grad_norm: 0.9695098925945581, iteration: 64662
loss: 0.9929619431495667,grad_norm: 0.9430502357497444, iteration: 64663
loss: 1.0180294513702393,grad_norm: 0.9999995972140181, iteration: 64664
loss: 1.0240718126296997,grad_norm: 0.9999990817708745, iteration: 64665
loss: 1.0263738632202148,grad_norm: 0.999999262810874, iteration: 64666
loss: 0.9959027767181396,grad_norm: 0.8735576668154303, iteration: 64667
loss: 1.022402048110962,grad_norm: 0.9999993641637455, iteration: 64668
loss: 0.980774998664856,grad_norm: 0.9999992846247422, iteration: 64669
loss: 1.0014101266860962,grad_norm: 0.9699485848600271, iteration: 64670
loss: 0.9837602376937866,grad_norm: 0.999999042152059, iteration: 64671
loss: 1.0288244485855103,grad_norm: 0.999999696603506, iteration: 64672
loss: 0.9951263666152954,grad_norm: 0.9999991466333553, iteration: 64673
loss: 0.9901121258735657,grad_norm: 0.9996876883394205, iteration: 64674
loss: 0.9819470643997192,grad_norm: 0.9483613179491892, iteration: 64675
loss: 0.9975343346595764,grad_norm: 0.9999989532619457, iteration: 64676
loss: 1.033105492591858,grad_norm: 0.999999452821404, iteration: 64677
loss: 1.0126062631607056,grad_norm: 0.9999990327425242, iteration: 64678
loss: 1.0002378225326538,grad_norm: 0.9999990810888981, iteration: 64679
loss: 0.9806336164474487,grad_norm: 0.9987817896787059, iteration: 64680
loss: 0.9978790879249573,grad_norm: 0.8850092315245177, iteration: 64681
loss: 1.0126253366470337,grad_norm: 0.9999992107050636, iteration: 64682
loss: 0.9975311160087585,grad_norm: 0.8511114063741739, iteration: 64683
loss: 1.0143381357192993,grad_norm: 0.9999993868961184, iteration: 64684
loss: 1.0068343877792358,grad_norm: 0.999999826530859, iteration: 64685
loss: 1.0052244663238525,grad_norm: 0.99999897315265, iteration: 64686
loss: 0.9657217264175415,grad_norm: 0.9670322522963893, iteration: 64687
loss: 1.0212507247924805,grad_norm: 0.9688452181168827, iteration: 64688
loss: 0.99833083152771,grad_norm: 0.9175514036454097, iteration: 64689
loss: 0.9954959154129028,grad_norm: 0.999999201736673, iteration: 64690
loss: 1.0109251737594604,grad_norm: 0.999999237425367, iteration: 64691
loss: 0.9788947105407715,grad_norm: 0.9789498516186304, iteration: 64692
loss: 1.021502137184143,grad_norm: 0.8240128655147093, iteration: 64693
loss: 1.009891152381897,grad_norm: 0.9999998828777397, iteration: 64694
loss: 0.9986788630485535,grad_norm: 0.9999992515555755, iteration: 64695
loss: 1.0101414918899536,grad_norm: 0.9385979327132611, iteration: 64696
loss: 1.0270209312438965,grad_norm: 0.9929097344059883, iteration: 64697
loss: 1.0426634550094604,grad_norm: 0.9584201812527524, iteration: 64698
loss: 0.9906731247901917,grad_norm: 0.9447365196073925, iteration: 64699
loss: 0.9956684112548828,grad_norm: 0.9391378977624975, iteration: 64700
loss: 1.0175316333770752,grad_norm: 0.9999991503324136, iteration: 64701
loss: 1.0173944234848022,grad_norm: 0.8440773826919727, iteration: 64702
loss: 0.9932212829589844,grad_norm: 0.999999006999959, iteration: 64703
loss: 1.0107102394104004,grad_norm: 0.9999992315405487, iteration: 64704
loss: 0.9574975371360779,grad_norm: 0.999998970279572, iteration: 64705
loss: 0.9722091555595398,grad_norm: 0.9430015167484205, iteration: 64706
loss: 0.978661060333252,grad_norm: 0.939979441327388, iteration: 64707
loss: 0.9920260310173035,grad_norm: 0.986529364167011, iteration: 64708
loss: 0.983061671257019,grad_norm: 0.9819704532192535, iteration: 64709
loss: 0.9882714152336121,grad_norm: 0.9999997833044901, iteration: 64710
loss: 0.9938817024230957,grad_norm: 0.9928609351390091, iteration: 64711
loss: 0.9630664587020874,grad_norm: 0.9588284632816426, iteration: 64712
loss: 0.9915323853492737,grad_norm: 0.999999167748728, iteration: 64713
loss: 1.0223840475082397,grad_norm: 0.9999994605921501, iteration: 64714
loss: 1.0390303134918213,grad_norm: 0.9999999182961405, iteration: 64715
loss: 0.9777030944824219,grad_norm: 0.9999992005185988, iteration: 64716
loss: 0.9716688394546509,grad_norm: 0.9999998761017324, iteration: 64717
loss: 0.9955264925956726,grad_norm: 0.9999992114429144, iteration: 64718
loss: 1.0144646167755127,grad_norm: 0.9999991322730272, iteration: 64719
loss: 1.00764000415802,grad_norm: 0.9820950729832201, iteration: 64720
loss: 1.0172197818756104,grad_norm: 0.9165634791079593, iteration: 64721
loss: 1.0293641090393066,grad_norm: 0.9999989799512107, iteration: 64722
loss: 0.9916749000549316,grad_norm: 0.9999993662857078, iteration: 64723
loss: 0.981590986251831,grad_norm: 0.9999992218240588, iteration: 64724
loss: 1.0129203796386719,grad_norm: 0.8879992062382301, iteration: 64725
loss: 1.0253268480300903,grad_norm: 0.9999992135480033, iteration: 64726
loss: 0.9943457841873169,grad_norm: 0.9999991777116937, iteration: 64727
loss: 1.004929542541504,grad_norm: 0.9999996655983655, iteration: 64728
loss: 0.9975730776786804,grad_norm: 0.8978008137937707, iteration: 64729
loss: 1.0140818357467651,grad_norm: 0.999999054059709, iteration: 64730
loss: 0.9632768630981445,grad_norm: 0.9620077489712006, iteration: 64731
loss: 1.060658574104309,grad_norm: 0.9767397656611762, iteration: 64732
loss: 1.0203874111175537,grad_norm: 0.999999245230156, iteration: 64733
loss: 0.970220685005188,grad_norm: 0.999998990998607, iteration: 64734
loss: 0.9947481751441956,grad_norm: 0.9999991316377678, iteration: 64735
loss: 1.0007169246673584,grad_norm: 0.9999990190836099, iteration: 64736
loss: 0.9918148517608643,grad_norm: 0.9999990838362249, iteration: 64737
loss: 1.00497305393219,grad_norm: 0.9999990250535669, iteration: 64738
loss: 1.0155476331710815,grad_norm: 0.9999990539350123, iteration: 64739
loss: 0.9894989728927612,grad_norm: 0.9999990765166408, iteration: 64740
loss: 0.9844050407409668,grad_norm: 0.9999991683694047, iteration: 64741
loss: 0.9748330116271973,grad_norm: 0.9999992765454607, iteration: 64742
loss: 1.0801444053649902,grad_norm: 0.9999994778067246, iteration: 64743
loss: 0.9959492087364197,grad_norm: 0.9999991878622697, iteration: 64744
loss: 1.0030362606048584,grad_norm: 0.8494473251316608, iteration: 64745
loss: 1.2201390266418457,grad_norm: 0.9999997839543787, iteration: 64746
loss: 1.0063823461532593,grad_norm: 0.999999101696453, iteration: 64747
loss: 1.0027157068252563,grad_norm: 0.9999990916433451, iteration: 64748
loss: 1.0499958992004395,grad_norm: 0.9738592919616321, iteration: 64749
loss: 0.9595118761062622,grad_norm: 0.9999990664749496, iteration: 64750
loss: 0.948576033115387,grad_norm: 0.9842048913422627, iteration: 64751
loss: 0.9802048206329346,grad_norm: 0.9999992770240884, iteration: 64752
loss: 0.9441984295845032,grad_norm: 0.999999035805948, iteration: 64753
loss: 1.0462946891784668,grad_norm: 0.9692738434020793, iteration: 64754
loss: 0.9961107969284058,grad_norm: 0.9187680395583134, iteration: 64755
loss: 1.0396891832351685,grad_norm: 0.9796886620023151, iteration: 64756
loss: 1.0239990949630737,grad_norm: 0.9231142592400753, iteration: 64757
loss: 0.9665102362632751,grad_norm: 0.978988355436059, iteration: 64758
loss: 1.003363013267517,grad_norm: 0.9216942178558429, iteration: 64759
loss: 1.0604445934295654,grad_norm: 0.9999993007699448, iteration: 64760
loss: 1.014649510383606,grad_norm: 0.9999990862333491, iteration: 64761
loss: 0.9797921180725098,grad_norm: 0.9121871233673019, iteration: 64762
loss: 1.0095276832580566,grad_norm: 0.9999992423546022, iteration: 64763
loss: 1.0152031183242798,grad_norm: 0.9822911530172964, iteration: 64764
loss: 1.0210717916488647,grad_norm: 0.9766403510179674, iteration: 64765
loss: 0.9974507093429565,grad_norm: 0.999998918543885, iteration: 64766
loss: 1.0350812673568726,grad_norm: 0.9999995584965029, iteration: 64767
loss: 1.014086127281189,grad_norm: 0.9999991187032041, iteration: 64768
loss: 1.011207938194275,grad_norm: 0.9565318496930308, iteration: 64769
loss: 0.9869670867919922,grad_norm: 0.9999994384610772, iteration: 64770
loss: 0.9870518445968628,grad_norm: 0.9999992345962608, iteration: 64771
loss: 1.0076085329055786,grad_norm: 0.9631785130231094, iteration: 64772
loss: 0.9921416640281677,grad_norm: 0.9999992157040904, iteration: 64773
loss: 0.9843822121620178,grad_norm: 0.9999997205344029, iteration: 64774
loss: 1.0152455568313599,grad_norm: 0.999999244941981, iteration: 64775
loss: 1.0306576490402222,grad_norm: 0.9999993868259122, iteration: 64776
loss: 0.9524876475334167,grad_norm: 0.9646156376085571, iteration: 64777
loss: 1.0021812915802002,grad_norm: 0.9999990473351809, iteration: 64778
loss: 0.9772400259971619,grad_norm: 0.985651466376998, iteration: 64779
loss: 1.062364101409912,grad_norm: 0.8241580288460354, iteration: 64780
loss: 1.0334837436676025,grad_norm: 0.9999990394379182, iteration: 64781
loss: 1.0036712884902954,grad_norm: 0.9999994522003672, iteration: 64782
loss: 0.9767506122589111,grad_norm: 0.9083247375048095, iteration: 64783
loss: 1.0254666805267334,grad_norm: 0.9999998457246068, iteration: 64784
loss: 1.0359617471694946,grad_norm: 0.970808534939847, iteration: 64785
loss: 1.0200155973434448,grad_norm: 0.9999990893143693, iteration: 64786
loss: 1.0264374017715454,grad_norm: 0.9999992196042139, iteration: 64787
loss: 0.992213785648346,grad_norm: 0.7958925738786347, iteration: 64788
loss: 1.036961555480957,grad_norm: 0.9999996701112974, iteration: 64789
loss: 0.9845079779624939,grad_norm: 0.924774926107081, iteration: 64790
loss: 0.9612791538238525,grad_norm: 0.9999989778349415, iteration: 64791
loss: 1.0504661798477173,grad_norm: 0.9999993102769178, iteration: 64792
loss: 1.0445411205291748,grad_norm: 0.9999998267145592, iteration: 64793
loss: 0.9874630570411682,grad_norm: 0.8616946374768403, iteration: 64794
loss: 0.9886371493339539,grad_norm: 0.9999991513571538, iteration: 64795
loss: 1.0284039974212646,grad_norm: 0.9035237484116029, iteration: 64796
loss: 1.014025092124939,grad_norm: 0.9999995162523793, iteration: 64797
loss: 1.013073444366455,grad_norm: 0.9999992257646767, iteration: 64798
loss: 0.9752663373947144,grad_norm: 0.9325250342817851, iteration: 64799
loss: 1.0305510759353638,grad_norm: 0.9999989721687045, iteration: 64800
loss: 1.0029752254486084,grad_norm: 0.9615746230841026, iteration: 64801
loss: 1.0050100088119507,grad_norm: 0.9990381512979477, iteration: 64802
loss: 0.9937619566917419,grad_norm: 0.9348049828300328, iteration: 64803
loss: 1.0024464130401611,grad_norm: 0.999999390303433, iteration: 64804
loss: 1.000671148300171,grad_norm: 0.9999991745685354, iteration: 64805
loss: 1.011443018913269,grad_norm: 0.9949178773123939, iteration: 64806
loss: 0.9958844184875488,grad_norm: 0.874308481581914, iteration: 64807
loss: 1.0120431184768677,grad_norm: 0.9999991505583744, iteration: 64808
loss: 1.01730477809906,grad_norm: 0.9999992396028812, iteration: 64809
loss: 0.9854815006256104,grad_norm: 0.9999991776704342, iteration: 64810
loss: 1.0663425922393799,grad_norm: 0.9999996531927694, iteration: 64811
loss: 1.0283501148223877,grad_norm: 0.9999990528360831, iteration: 64812
loss: 1.05368971824646,grad_norm: 0.9999998512707543, iteration: 64813
loss: 0.991685688495636,grad_norm: 0.9999992047544071, iteration: 64814
loss: 0.962601363658905,grad_norm: 0.9999992133981843, iteration: 64815
loss: 0.9778998494148254,grad_norm: 0.9317790625398009, iteration: 64816
loss: 0.9810976386070251,grad_norm: 0.898310180748435, iteration: 64817
loss: 1.0397690534591675,grad_norm: 0.9999992743623172, iteration: 64818
loss: 0.9924235343933105,grad_norm: 0.9999990463796606, iteration: 64819
loss: 1.0593268871307373,grad_norm: 0.9999991634728016, iteration: 64820
loss: 0.9916954636573792,grad_norm: 0.897988625906257, iteration: 64821
loss: 0.9766510128974915,grad_norm: 0.9678515337163068, iteration: 64822
loss: 1.0222594738006592,grad_norm: 0.9824477902767775, iteration: 64823
loss: 0.9584808349609375,grad_norm: 0.9999991840294932, iteration: 64824
loss: 0.9895601272583008,grad_norm: 0.9999991587079664, iteration: 64825
loss: 0.9901925325393677,grad_norm: 0.9951494756679722, iteration: 64826
loss: 1.0065289735794067,grad_norm: 0.9999990357363157, iteration: 64827
loss: 0.9632560610771179,grad_norm: 0.9218684780109576, iteration: 64828
loss: 0.9723145961761475,grad_norm: 0.9999992636107842, iteration: 64829
loss: 1.0040702819824219,grad_norm: 0.9999992265884845, iteration: 64830
loss: 1.0187122821807861,grad_norm: 0.9825143736611955, iteration: 64831
loss: 1.077742576599121,grad_norm: 0.9322815773768868, iteration: 64832
loss: 0.9802040457725525,grad_norm: 0.96755584313989, iteration: 64833
loss: 0.9998559355735779,grad_norm: 0.9999991492277455, iteration: 64834
loss: 1.0120893716812134,grad_norm: 0.9999996190684153, iteration: 64835
loss: 0.9988466501235962,grad_norm: 0.9772500460435992, iteration: 64836
loss: 1.0419998168945312,grad_norm: 0.9976653837136211, iteration: 64837
loss: 1.0263837575912476,grad_norm: 0.9999990952289131, iteration: 64838
loss: 1.0026218891143799,grad_norm: 0.999999268707661, iteration: 64839
loss: 0.9858152866363525,grad_norm: 0.9999991806561426, iteration: 64840
loss: 1.0007898807525635,grad_norm: 0.9999993887749533, iteration: 64841
loss: 0.9882994890213013,grad_norm: 0.9999990481152211, iteration: 64842
loss: 0.9695191979408264,grad_norm: 0.8319498327514806, iteration: 64843
loss: 0.9885640144348145,grad_norm: 0.9999991458808677, iteration: 64844
loss: 0.9597771167755127,grad_norm: 0.99999907574523, iteration: 64845
loss: 0.9940122961997986,grad_norm: 0.9999991388395618, iteration: 64846
loss: 1.0093841552734375,grad_norm: 0.9999990057003456, iteration: 64847
loss: 1.0082701444625854,grad_norm: 0.999999103428456, iteration: 64848
loss: 1.021262764930725,grad_norm: 0.9999992888086506, iteration: 64849
loss: 1.0114339590072632,grad_norm: 0.9999996920775782, iteration: 64850
loss: 1.0138609409332275,grad_norm: 0.9999990921034717, iteration: 64851
loss: 0.9844145774841309,grad_norm: 0.9707447892817078, iteration: 64852
loss: 1.0289850234985352,grad_norm: 0.9999991572409965, iteration: 64853
loss: 0.9947788715362549,grad_norm: 0.9999992377781561, iteration: 64854
loss: 1.0462117195129395,grad_norm: 0.9999991090235079, iteration: 64855
loss: 1.0231539011001587,grad_norm: 0.9999991997262657, iteration: 64856
loss: 1.0200953483581543,grad_norm: 0.8505803505902416, iteration: 64857
loss: 0.9799433946609497,grad_norm: 0.8952449988329098, iteration: 64858
loss: 0.982938289642334,grad_norm: 0.9999990636012009, iteration: 64859
loss: 0.9939670562744141,grad_norm: 0.8830486716277833, iteration: 64860
loss: 1.0114014148712158,grad_norm: 0.9999990730160236, iteration: 64861
loss: 1.0147736072540283,grad_norm: 0.9999993546366683, iteration: 64862
loss: 0.9854999780654907,grad_norm: 0.9999991628810034, iteration: 64863
loss: 0.992865800857544,grad_norm: 0.9999991159121605, iteration: 64864
loss: 1.0007877349853516,grad_norm: 0.8845070695685877, iteration: 64865
loss: 0.9912371039390564,grad_norm: 0.9999994576134619, iteration: 64866
loss: 1.0343202352523804,grad_norm: 0.9999998642078383, iteration: 64867
loss: 0.9889686703681946,grad_norm: 0.999999143695738, iteration: 64868
loss: 0.973961591720581,grad_norm: 0.9999991327372646, iteration: 64869
loss: 1.0729111433029175,grad_norm: 0.9999997044312294, iteration: 64870
loss: 1.0099354982376099,grad_norm: 0.9999992975312875, iteration: 64871
loss: 0.9916892051696777,grad_norm: 0.9643545818043827, iteration: 64872
loss: 0.9666446447372437,grad_norm: 0.8490454061688248, iteration: 64873
loss: 1.0151724815368652,grad_norm: 0.8925685809956142, iteration: 64874
loss: 1.018847942352295,grad_norm: 0.9999990872026847, iteration: 64875
loss: 1.0053523778915405,grad_norm: 0.9999990977076858, iteration: 64876
loss: 1.039896845817566,grad_norm: 0.99999918116997, iteration: 64877
loss: 1.0063658952713013,grad_norm: 0.9999991449258058, iteration: 64878
loss: 1.0228428840637207,grad_norm: 0.9145828040914534, iteration: 64879
loss: 1.037471055984497,grad_norm: 0.9775623688823403, iteration: 64880
loss: 1.0050408840179443,grad_norm: 0.9999991669088262, iteration: 64881
loss: 1.0060641765594482,grad_norm: 0.9999997723655006, iteration: 64882
loss: 1.0104014873504639,grad_norm: 0.9999991874096007, iteration: 64883
loss: 0.9949139952659607,grad_norm: 0.9999990041966252, iteration: 64884
loss: 1.0124043226242065,grad_norm: 0.9999990947916113, iteration: 64885
loss: 0.9956931471824646,grad_norm: 0.9999990533170043, iteration: 64886
loss: 1.009769320487976,grad_norm: 0.9670562678906016, iteration: 64887
loss: 1.0203863382339478,grad_norm: 0.9999993502404061, iteration: 64888
loss: 1.0118483304977417,grad_norm: 0.9363422174050198, iteration: 64889
loss: 0.9788855910301208,grad_norm: 0.9705593695011284, iteration: 64890
loss: 0.9951709508895874,grad_norm: 0.9830651289051073, iteration: 64891
loss: 1.0188024044036865,grad_norm: 0.9999990344012789, iteration: 64892
loss: 1.0143258571624756,grad_norm: 0.8911240329646751, iteration: 64893
loss: 1.0061579942703247,grad_norm: 0.999999193617815, iteration: 64894
loss: 0.9983643293380737,grad_norm: 0.946983430222191, iteration: 64895
loss: 1.0102871656417847,grad_norm: 0.9999995189920495, iteration: 64896
loss: 1.0189419984817505,grad_norm: 0.8586522465225465, iteration: 64897
loss: 0.9914426803588867,grad_norm: 0.9999991657104719, iteration: 64898
loss: 1.068406105041504,grad_norm: 0.9999997639512512, iteration: 64899
loss: 0.9872211217880249,grad_norm: 0.9999989950081385, iteration: 64900
loss: 0.9698001146316528,grad_norm: 0.9999990766870197, iteration: 64901
loss: 1.0151211023330688,grad_norm: 0.8422018155566662, iteration: 64902
loss: 1.016517996788025,grad_norm: 0.999999233606606, iteration: 64903
loss: 0.9948967695236206,grad_norm: 0.9999992115427796, iteration: 64904
loss: 1.0150573253631592,grad_norm: 0.9405506779279786, iteration: 64905
loss: 1.0843466520309448,grad_norm: 0.9999997568247186, iteration: 64906
loss: 1.033866286277771,grad_norm: 0.8881014148003401, iteration: 64907
loss: 1.0356335639953613,grad_norm: 0.9999992366466732, iteration: 64908
loss: 1.0256825685501099,grad_norm: 0.9999991449795811, iteration: 64909
loss: 1.0190750360488892,grad_norm: 0.9999991969039335, iteration: 64910
loss: 0.9968019723892212,grad_norm: 0.9999995440952054, iteration: 64911
loss: 1.0157766342163086,grad_norm: 0.9999992495175626, iteration: 64912
loss: 1.0229467153549194,grad_norm: 0.9999991894945823, iteration: 64913
loss: 0.9803503751754761,grad_norm: 0.999999132159384, iteration: 64914
loss: 1.0282574892044067,grad_norm: 0.8863944011132224, iteration: 64915
loss: 1.0263826847076416,grad_norm: 0.9999992863298796, iteration: 64916
loss: 0.9757055044174194,grad_norm: 0.8716455706162561, iteration: 64917
loss: 1.0194612741470337,grad_norm: 0.9999992311715749, iteration: 64918
loss: 1.074307918548584,grad_norm: 0.9999994632036547, iteration: 64919
loss: 0.9967115521430969,grad_norm: 0.9999992772329526, iteration: 64920
loss: 1.0296727418899536,grad_norm: 0.9999996872530907, iteration: 64921
loss: 1.0378280878067017,grad_norm: 0.978858327309718, iteration: 64922
loss: 1.0156445503234863,grad_norm: 0.9999995663150664, iteration: 64923
loss: 1.0313076972961426,grad_norm: 0.9999993192464823, iteration: 64924
loss: 1.0008955001831055,grad_norm: 0.9999991073587348, iteration: 64925
loss: 1.0206037759780884,grad_norm: 1.0000000138177494, iteration: 64926
loss: 1.1122742891311646,grad_norm: 0.9999997720561216, iteration: 64927
loss: 0.9923774003982544,grad_norm: 0.9999992847563502, iteration: 64928
loss: 0.991416871547699,grad_norm: 0.8938575981897722, iteration: 64929
loss: 1.0104405879974365,grad_norm: 0.9508993054206962, iteration: 64930
loss: 1.0017977952957153,grad_norm: 0.9999991122974913, iteration: 64931
loss: 0.9710962772369385,grad_norm: 0.9999993463095725, iteration: 64932
loss: 1.0685112476348877,grad_norm: 0.9999991934904802, iteration: 64933
loss: 1.0135364532470703,grad_norm: 0.999999084105611, iteration: 64934
loss: 1.005071759223938,grad_norm: 0.999999240671923, iteration: 64935
loss: 1.1788334846496582,grad_norm: 0.9999997753410272, iteration: 64936
loss: 1.0025819540023804,grad_norm: 0.9999991833959367, iteration: 64937
loss: 1.0655114650726318,grad_norm: 0.9999995558207931, iteration: 64938
loss: 1.071230173110962,grad_norm: 0.9999996095510816, iteration: 64939
loss: 0.9964458346366882,grad_norm: 0.9999989178150711, iteration: 64940
loss: 1.0648289918899536,grad_norm: 0.9999998254747191, iteration: 64941
loss: 1.013505220413208,grad_norm: 0.9999992903571691, iteration: 64942
loss: 0.9990481734275818,grad_norm: 0.9999992318787706, iteration: 64943
loss: 1.0166747570037842,grad_norm: 0.9641573134953144, iteration: 64944
loss: 1.0522856712341309,grad_norm: 0.999999226748562, iteration: 64945
loss: 1.017040491104126,grad_norm: 0.8704120311758576, iteration: 64946
loss: 1.01108717918396,grad_norm: 0.9999990751555546, iteration: 64947
loss: 0.9737221002578735,grad_norm: 0.9999990508317682, iteration: 64948
loss: 0.9631718397140503,grad_norm: 0.9720614045510194, iteration: 64949
loss: 1.0301134586334229,grad_norm: 0.9065608945514254, iteration: 64950
loss: 0.9886599779129028,grad_norm: 0.8545475633296981, iteration: 64951
loss: 1.023819088935852,grad_norm: 0.8505379321937607, iteration: 64952
loss: 0.9477514028549194,grad_norm: 0.9999990786811584, iteration: 64953
loss: 1.0051910877227783,grad_norm: 0.9999992356534884, iteration: 64954
loss: 1.006337285041809,grad_norm: 0.9999997186144929, iteration: 64955
loss: 1.0023317337036133,grad_norm: 0.9999991514477469, iteration: 64956
loss: 1.0286387205123901,grad_norm: 0.9999992653686536, iteration: 64957
loss: 1.0131512880325317,grad_norm: 0.9999991532359552, iteration: 64958
loss: 1.013679027557373,grad_norm: 0.9999992761268874, iteration: 64959
loss: 0.9772471785545349,grad_norm: 0.9999991752582964, iteration: 64960
loss: 0.9939278960227966,grad_norm: 0.9852859260701042, iteration: 64961
loss: 1.0161869525909424,grad_norm: 0.9791048924468974, iteration: 64962
loss: 1.0229302644729614,grad_norm: 0.9999991193637767, iteration: 64963
loss: 0.9911564588546753,grad_norm: 0.99999953959172, iteration: 64964
loss: 1.0241698026657104,grad_norm: 0.9999990957280153, iteration: 64965
loss: 0.9920939803123474,grad_norm: 0.9432482715013353, iteration: 64966
loss: 0.9990192651748657,grad_norm: 0.999999128417189, iteration: 64967
loss: 0.9929892420768738,grad_norm: 0.9517937043542863, iteration: 64968
loss: 1.009111762046814,grad_norm: 0.9999992646462459, iteration: 64969
loss: 1.0281611680984497,grad_norm: 0.9999991507803174, iteration: 64970
loss: 1.0185943841934204,grad_norm: 0.9999991558725233, iteration: 64971
loss: 1.0522547960281372,grad_norm: 0.9999999060546586, iteration: 64972
loss: 1.0083496570587158,grad_norm: 0.9539465417878574, iteration: 64973
loss: 0.9589101672172546,grad_norm: 0.9999993700414874, iteration: 64974
loss: 0.9883179664611816,grad_norm: 0.9094400727467715, iteration: 64975
loss: 0.9650851488113403,grad_norm: 0.9999991549871383, iteration: 64976
loss: 0.9753157496452332,grad_norm: 0.9999989855791158, iteration: 64977
loss: 0.9807877540588379,grad_norm: 0.9999992668814116, iteration: 64978
loss: 0.982329249382019,grad_norm: 0.9581174489914558, iteration: 64979
loss: 0.9800381064414978,grad_norm: 0.9221437173411561, iteration: 64980
loss: 1.0156548023223877,grad_norm: 0.9999990477822984, iteration: 64981
loss: 0.9572287797927856,grad_norm: 0.881155414784883, iteration: 64982
loss: 1.020960807800293,grad_norm: 0.9999990978606084, iteration: 64983
loss: 1.0092653036117554,grad_norm: 0.9285138952118953, iteration: 64984
loss: 0.9783809185028076,grad_norm: 0.9669403880165245, iteration: 64985
loss: 1.0097990036010742,grad_norm: 0.9999991268649671, iteration: 64986
loss: 0.9995019435882568,grad_norm: 0.9999991287377948, iteration: 64987
loss: 1.004973292350769,grad_norm: 0.9999991281167575, iteration: 64988
loss: 0.9555099010467529,grad_norm: 0.9999991809820269, iteration: 64989
loss: 1.0101022720336914,grad_norm: 0.999999147937477, iteration: 64990
loss: 1.0017942190170288,grad_norm: 0.9999992661229713, iteration: 64991
loss: 0.9811328649520874,grad_norm: 0.9413980956520419, iteration: 64992
loss: 0.9916479587554932,grad_norm: 0.8615712719983466, iteration: 64993
loss: 0.994830310344696,grad_norm: 0.965177455887637, iteration: 64994
loss: 0.9922785758972168,grad_norm: 0.9215113968232612, iteration: 64995
loss: 1.039440631866455,grad_norm: 0.9937898888317946, iteration: 64996
loss: 1.0034481287002563,grad_norm: 0.8857896234951466, iteration: 64997
loss: 1.0180108547210693,grad_norm: 0.9269752033045869, iteration: 64998
loss: 1.04899001121521,grad_norm: 0.9999990116370537, iteration: 64999
loss: 1.0180350542068481,grad_norm: 0.9999992342010036, iteration: 65000
loss: 0.965623140335083,grad_norm: 0.9999991267477956, iteration: 65001
loss: 1.0089061260223389,grad_norm: 0.9999994268457624, iteration: 65002
loss: 1.0178864002227783,grad_norm: 0.9999989758272436, iteration: 65003
loss: 0.989972710609436,grad_norm: 0.8181152540108674, iteration: 65004
loss: 1.000258445739746,grad_norm: 0.9999993124173341, iteration: 65005
loss: 0.9703505635261536,grad_norm: 0.9025261350911504, iteration: 65006
loss: 1.0497609376907349,grad_norm: 0.9999994014292384, iteration: 65007
loss: 1.006944179534912,grad_norm: 0.8990749590756247, iteration: 65008
loss: 1.030710220336914,grad_norm: 0.9999992554889129, iteration: 65009
loss: 0.9767970442771912,grad_norm: 0.999999080122378, iteration: 65010
loss: 1.0312427282333374,grad_norm: 0.9999994239118882, iteration: 65011
loss: 1.002332091331482,grad_norm: 0.9999991519946823, iteration: 65012
loss: 0.9890478849411011,grad_norm: 0.9103356665851211, iteration: 65013
loss: 1.0361244678497314,grad_norm: 0.9999991702433108, iteration: 65014
loss: 0.9880070090293884,grad_norm: 0.8569854695508942, iteration: 65015
loss: 1.0031803846359253,grad_norm: 0.9999993665702632, iteration: 65016
loss: 0.9835137128829956,grad_norm: 0.9108860007497259, iteration: 65017
loss: 1.0397889614105225,grad_norm: 0.9534098007126028, iteration: 65018
loss: 0.9846931099891663,grad_norm: 0.9999999057726818, iteration: 65019
loss: 0.9917401075363159,grad_norm: 0.999999103451459, iteration: 65020
loss: 0.9619926810264587,grad_norm: 0.9999991729259428, iteration: 65021
loss: 0.9952379465103149,grad_norm: 0.9999993067887122, iteration: 65022
loss: 1.010671854019165,grad_norm: 0.9999990946817781, iteration: 65023
loss: 1.0072991847991943,grad_norm: 0.9999991781519064, iteration: 65024
loss: 0.9925395250320435,grad_norm: 0.9999991139771922, iteration: 65025
loss: 0.9713096618652344,grad_norm: 0.9999990313852316, iteration: 65026
loss: 0.9962934851646423,grad_norm: 0.9999991190899393, iteration: 65027
loss: 1.0006731748580933,grad_norm: 0.9761078144210343, iteration: 65028
loss: 1.005375862121582,grad_norm: 0.8791686368910919, iteration: 65029
loss: 0.9929338693618774,grad_norm: 0.9577156418391225, iteration: 65030
loss: 1.0245553255081177,grad_norm: 0.933344430556841, iteration: 65031
loss: 0.996647298336029,grad_norm: 0.9986521155642601, iteration: 65032
loss: 0.9573286175727844,grad_norm: 0.9999990476475851, iteration: 65033
loss: 0.9826624989509583,grad_norm: 0.9999991726386798, iteration: 65034
loss: 0.9882393479347229,grad_norm: 0.9999994944829789, iteration: 65035
loss: 1.000954270362854,grad_norm: 0.9999991598662623, iteration: 65036
loss: 0.9887081384658813,grad_norm: 0.9999992581890969, iteration: 65037
loss: 0.9700538516044617,grad_norm: 0.9084713956896994, iteration: 65038
loss: 1.0004212856292725,grad_norm: 0.9729210946863474, iteration: 65039
loss: 1.0286004543304443,grad_norm: 0.999999230129683, iteration: 65040
loss: 0.9907003045082092,grad_norm: 0.9670066014573827, iteration: 65041
loss: 1.0099668502807617,grad_norm: 0.9999989912551384, iteration: 65042
loss: 0.9880391955375671,grad_norm: 0.8864635700988946, iteration: 65043
loss: 1.0012136697769165,grad_norm: 0.9999991203597123, iteration: 65044
loss: 1.0194443464279175,grad_norm: 0.9999992161831126, iteration: 65045
loss: 1.0026516914367676,grad_norm: 0.9915893387922444, iteration: 65046
loss: 1.0246434211730957,grad_norm: 0.9999991309099485, iteration: 65047
loss: 0.9973329305648804,grad_norm: 0.9999992743291565, iteration: 65048
loss: 1.0114895105361938,grad_norm: 0.989274740774035, iteration: 65049
loss: 0.9870856404304504,grad_norm: 0.9586692829564614, iteration: 65050
loss: 0.9610410928726196,grad_norm: 0.8724071287645269, iteration: 65051
loss: 0.9953495264053345,grad_norm: 0.9999992969093664, iteration: 65052
loss: 0.995895504951477,grad_norm: 0.9999990440133749, iteration: 65053
loss: 1.013353705406189,grad_norm: 0.999999420613614, iteration: 65054
loss: 1.0136749744415283,grad_norm: 0.9999991348100444, iteration: 65055
loss: 1.0230785608291626,grad_norm: 0.9999990734781897, iteration: 65056
loss: 1.0066218376159668,grad_norm: 0.9595632437114692, iteration: 65057
loss: 0.9919945597648621,grad_norm: 0.9999988684429089, iteration: 65058
loss: 0.998928964138031,grad_norm: 0.9394827611686364, iteration: 65059
loss: 0.9454697370529175,grad_norm: 0.9999991810328577, iteration: 65060
loss: 0.9643213152885437,grad_norm: 0.9999991635027136, iteration: 65061
loss: 1.0475317239761353,grad_norm: 0.9999991212625049, iteration: 65062
loss: 0.9872514605522156,grad_norm: 0.9295734466575182, iteration: 65063
loss: 1.0247734785079956,grad_norm: 0.9990006418717325, iteration: 65064
loss: 1.024012804031372,grad_norm: 0.9999994181775553, iteration: 65065
loss: 0.9830180406570435,grad_norm: 0.945768987801666, iteration: 65066
loss: 1.0110012292861938,grad_norm: 0.999999185902294, iteration: 65067
loss: 0.9934898614883423,grad_norm: 0.9588700434806428, iteration: 65068
loss: 1.0138754844665527,grad_norm: 0.9999993037750019, iteration: 65069
loss: 1.0030021667480469,grad_norm: 0.9331876326675689, iteration: 65070
loss: 1.0193970203399658,grad_norm: 0.9999990442735811, iteration: 65071
loss: 0.9885304570198059,grad_norm: 0.9565985896128926, iteration: 65072
loss: 0.963958203792572,grad_norm: 0.987323828151433, iteration: 65073
loss: 0.9860771894454956,grad_norm: 0.9562379783256457, iteration: 65074
loss: 1.023395299911499,grad_norm: 0.9999998686331005, iteration: 65075
loss: 1.013074517250061,grad_norm: 0.9999992192304504, iteration: 65076
loss: 1.037285327911377,grad_norm: 0.9999991316330485, iteration: 65077
loss: 1.0425528287887573,grad_norm: 0.9992717174405784, iteration: 65078
loss: 0.968876838684082,grad_norm: 0.9999992540093714, iteration: 65079
loss: 0.9825027585029602,grad_norm: 0.9999991578985892, iteration: 65080
loss: 0.9843549132347107,grad_norm: 0.9999989142601872, iteration: 65081
loss: 1.0235508680343628,grad_norm: 0.9999990793048144, iteration: 65082
loss: 1.012578010559082,grad_norm: 0.8586505512143046, iteration: 65083
loss: 1.014594316482544,grad_norm: 0.9999991695335714, iteration: 65084
loss: 0.9876490831375122,grad_norm: 0.9999992155226638, iteration: 65085
loss: 0.9795162677764893,grad_norm: 0.9776324369758779, iteration: 65086
loss: 0.9753596186637878,grad_norm: 0.9999990575427228, iteration: 65087
loss: 1.0971238613128662,grad_norm: 0.9999999560875604, iteration: 65088
loss: 1.0196884870529175,grad_norm: 0.9999991175983816, iteration: 65089
loss: 1.0030254125595093,grad_norm: 0.9445673346971063, iteration: 65090
loss: 0.9852617383003235,grad_norm: 0.9922337382973356, iteration: 65091
loss: 1.0419750213623047,grad_norm: 0.9999991788068254, iteration: 65092
loss: 1.0272395610809326,grad_norm: 0.9999996004977346, iteration: 65093
loss: 1.0164074897766113,grad_norm: 0.9999989967393051, iteration: 65094
loss: 0.9958071112632751,grad_norm: 0.999999351889673, iteration: 65095
loss: 1.0158934593200684,grad_norm: 0.8526900568743984, iteration: 65096
loss: 1.0306891202926636,grad_norm: 0.999999161186763, iteration: 65097
loss: 0.9700075387954712,grad_norm: 0.9871573246241089, iteration: 65098
loss: 0.9898350834846497,grad_norm: 0.9999991651986441, iteration: 65099
loss: 1.0090241432189941,grad_norm: 0.9999991373090673, iteration: 65100
loss: 0.9742071032524109,grad_norm: 0.9693319200997408, iteration: 65101
loss: 0.9779378175735474,grad_norm: 0.9999992695854111, iteration: 65102
loss: 1.026977777481079,grad_norm: 0.9999991629400864, iteration: 65103
loss: 1.0498546361923218,grad_norm: 0.999999445834672, iteration: 65104
loss: 1.0045068264007568,grad_norm: 0.9999992111672203, iteration: 65105
loss: 1.0160515308380127,grad_norm: 0.7756618489138959, iteration: 65106
loss: 0.9826924204826355,grad_norm: 0.9999992055022799, iteration: 65107
loss: 0.9927038550376892,grad_norm: 0.9387146246540476, iteration: 65108
loss: 1.0286893844604492,grad_norm: 0.9999990528536686, iteration: 65109
loss: 0.984656572341919,grad_norm: 0.9999992595750332, iteration: 65110
loss: 1.003093957901001,grad_norm: 0.9593952345521418, iteration: 65111
loss: 0.9810326099395752,grad_norm: 0.9980937922411003, iteration: 65112
loss: 0.9870218634605408,grad_norm: 0.9999990860495663, iteration: 65113
loss: 0.9998319149017334,grad_norm: 0.9493725732437268, iteration: 65114
loss: 1.0178594589233398,grad_norm: 0.9515855511459689, iteration: 65115
loss: 0.9808349609375,grad_norm: 0.9999999014943455, iteration: 65116
loss: 0.995327353477478,grad_norm: 0.9999990324929664, iteration: 65117
loss: 1.0108171701431274,grad_norm: 0.9999991895200588, iteration: 65118
loss: 1.013346791267395,grad_norm: 0.9815385097944448, iteration: 65119
loss: 0.9847699403762817,grad_norm: 0.9751510845009987, iteration: 65120
loss: 0.961941123008728,grad_norm: 0.9999991070907402, iteration: 65121
loss: 0.9983576536178589,grad_norm: 0.9959245818038168, iteration: 65122
loss: 1.0334230661392212,grad_norm: 0.9999990878760803, iteration: 65123
loss: 1.0226881504058838,grad_norm: 0.9268734349781504, iteration: 65124
loss: 1.0189564228057861,grad_norm: 0.9999990555654875, iteration: 65125
loss: 1.0137747526168823,grad_norm: 0.9808368900008274, iteration: 65126
loss: 1.0126464366912842,grad_norm: 0.9999991884348618, iteration: 65127
loss: 1.0293834209442139,grad_norm: 0.9097992950676335, iteration: 65128
loss: 1.0103884935379028,grad_norm: 0.9944433546159792, iteration: 65129
loss: 1.0018185377120972,grad_norm: 0.9999991337160989, iteration: 65130
loss: 1.013304591178894,grad_norm: 0.952395560488857, iteration: 65131
loss: 0.9992995858192444,grad_norm: 0.9999991206673912, iteration: 65132
loss: 1.0041284561157227,grad_norm: 0.8810248480121736, iteration: 65133
loss: 1.0054105520248413,grad_norm: 0.9849455736091257, iteration: 65134
loss: 0.9739068746566772,grad_norm: 0.9999991008722738, iteration: 65135
loss: 1.0088310241699219,grad_norm: 0.9741499935576845, iteration: 65136
loss: 1.0052188634872437,grad_norm: 0.9999993206610011, iteration: 65137
loss: 1.0103861093521118,grad_norm: 0.9999992410441506, iteration: 65138
loss: 1.0155234336853027,grad_norm: 0.9999992754750214, iteration: 65139
loss: 0.9704703688621521,grad_norm: 0.9999992453443005, iteration: 65140
loss: 0.9971010088920593,grad_norm: 0.8920039149568594, iteration: 65141
loss: 1.0079106092453003,grad_norm: 0.9642552800763199, iteration: 65142
loss: 0.9939665198326111,grad_norm: 0.9999991812425794, iteration: 65143
loss: 1.0322964191436768,grad_norm: 0.9999990476564764, iteration: 65144
loss: 1.013667345046997,grad_norm: 0.9999992911883305, iteration: 65145
loss: 0.9618515968322754,grad_norm: 0.9999990500639409, iteration: 65146
loss: 0.9766913056373596,grad_norm: 0.9999991508907909, iteration: 65147
loss: 1.040686845779419,grad_norm: 0.9999990449670876, iteration: 65148
loss: 1.0111390352249146,grad_norm: 0.9999992122187301, iteration: 65149
loss: 0.9927565455436707,grad_norm: 0.9999992250315098, iteration: 65150
loss: 1.0979502201080322,grad_norm: 0.9999991106881359, iteration: 65151
loss: 1.004235029220581,grad_norm: 0.9999991308822511, iteration: 65152
loss: 1.0290120840072632,grad_norm: 0.9999994739012776, iteration: 65153
loss: 1.02409827709198,grad_norm: 0.9999991166385325, iteration: 65154
loss: 1.0058060884475708,grad_norm: 0.9149914999216107, iteration: 65155
loss: 0.9581077098846436,grad_norm: 0.8550733126860716, iteration: 65156
loss: 1.0287833213806152,grad_norm: 0.9999991879592494, iteration: 65157
loss: 0.9636845588684082,grad_norm: 0.9999991440164056, iteration: 65158
loss: 1.004812240600586,grad_norm: 0.9999989441175735, iteration: 65159
loss: 0.9849777221679688,grad_norm: 0.9999991107623292, iteration: 65160
loss: 1.0021027326583862,grad_norm: 0.9494008389767935, iteration: 65161
loss: 1.0141733884811401,grad_norm: 0.9999990576609706, iteration: 65162
loss: 0.9923937916755676,grad_norm: 0.9999992593376411, iteration: 65163
loss: 1.0589313507080078,grad_norm: 0.999999243102727, iteration: 65164
loss: 1.030120611190796,grad_norm: 0.9999992839827059, iteration: 65165
loss: 0.9765952229499817,grad_norm: 0.9999992265869893, iteration: 65166
loss: 1.0165430307388306,grad_norm: 0.9999992515028671, iteration: 65167
loss: 1.0219794511795044,grad_norm: 0.9999990276031638, iteration: 65168
loss: 1.0052865743637085,grad_norm: 0.9999989845379817, iteration: 65169
loss: 1.0020946264266968,grad_norm: 0.9999990874346412, iteration: 65170
loss: 0.9917194247245789,grad_norm: 0.9586020530550737, iteration: 65171
loss: 1.0038031339645386,grad_norm: 0.9909504756197633, iteration: 65172
loss: 1.0117309093475342,grad_norm: 0.8734833855568004, iteration: 65173
loss: 1.0038032531738281,grad_norm: 0.999999111673726, iteration: 65174
loss: 1.0501781702041626,grad_norm: 0.9999990749587175, iteration: 65175
loss: 0.9795460104942322,grad_norm: 0.9999991483583631, iteration: 65176
loss: 0.9911015629768372,grad_norm: 0.9999990767883926, iteration: 65177
loss: 1.0186827182769775,grad_norm: 0.9690815002692879, iteration: 65178
loss: 1.027694821357727,grad_norm: 0.999999000898755, iteration: 65179
loss: 1.0103367567062378,grad_norm: 0.8832181747497908, iteration: 65180
loss: 1.0153597593307495,grad_norm: 0.999999338030907, iteration: 65181
loss: 1.0076591968536377,grad_norm: 0.9999992120717497, iteration: 65182
loss: 1.0100733041763306,grad_norm: 0.9999991378180053, iteration: 65183
loss: 0.9897343516349792,grad_norm: 0.999999174865528, iteration: 65184
loss: 0.9984056353569031,grad_norm: 0.823162462007983, iteration: 65185
loss: 0.9737392663955688,grad_norm: 0.9999991112342114, iteration: 65186
loss: 0.9682945609092712,grad_norm: 0.9446588941550813, iteration: 65187
loss: 1.0213254690170288,grad_norm: 0.9999990531600579, iteration: 65188
loss: 0.960024356842041,grad_norm: 0.9573278740873622, iteration: 65189
loss: 0.9854706525802612,grad_norm: 0.9999990167095939, iteration: 65190
loss: 0.97953200340271,grad_norm: 0.9085809409384786, iteration: 65191
loss: 1.0349159240722656,grad_norm: 0.9999993729787308, iteration: 65192
loss: 0.9948046207427979,grad_norm: 0.9999992691307009, iteration: 65193
loss: 0.9985623955726624,grad_norm: 0.9610455644655678, iteration: 65194
loss: 1.0381399393081665,grad_norm: 0.9999989812029109, iteration: 65195
loss: 0.9681493639945984,grad_norm: 0.9770859234848267, iteration: 65196
loss: 0.9704234600067139,grad_norm: 0.9575270185408532, iteration: 65197
loss: 0.9959569573402405,grad_norm: 0.9999992850865175, iteration: 65198
loss: 0.9883009791374207,grad_norm: 0.9999991591988872, iteration: 65199
loss: 1.011833667755127,grad_norm: 0.9999990900784025, iteration: 65200
loss: 0.9941935539245605,grad_norm: 0.9999990219025041, iteration: 65201
loss: 1.0240670442581177,grad_norm: 0.9999991118615428, iteration: 65202
loss: 1.0109177827835083,grad_norm: 0.9780695722927832, iteration: 65203
loss: 1.0266021490097046,grad_norm: 0.938980017434462, iteration: 65204
loss: 0.9638091921806335,grad_norm: 0.9999989904619593, iteration: 65205
loss: 0.9747816324234009,grad_norm: 0.9279767929772135, iteration: 65206
loss: 0.9759814739227295,grad_norm: 0.8920771723542026, iteration: 65207
loss: 1.000427007675171,grad_norm: 0.8544486604361363, iteration: 65208
loss: 1.0266139507293701,grad_norm: 0.9683985718103618, iteration: 65209
loss: 0.9749534130096436,grad_norm: 0.9999991371065591, iteration: 65210
loss: 1.0109992027282715,grad_norm: 0.9415800405992955, iteration: 65211
loss: 1.0110580921173096,grad_norm: 0.9999993668079615, iteration: 65212
loss: 1.006778597831726,grad_norm: 0.9999990095295125, iteration: 65213
loss: 1.0059524774551392,grad_norm: 0.9999993127997482, iteration: 65214
loss: 1.0397592782974243,grad_norm: 0.9415410367381049, iteration: 65215
loss: 1.0261279344558716,grad_norm: 0.9999990554760254, iteration: 65216
loss: 1.013761043548584,grad_norm: 0.9149376268061538, iteration: 65217
loss: 0.998333752155304,grad_norm: 0.9999992397432356, iteration: 65218
loss: 0.9888504147529602,grad_norm: 0.95999442775435, iteration: 65219
loss: 0.9711329340934753,grad_norm: 0.9397985545433993, iteration: 65220
loss: 1.001272439956665,grad_norm: 0.9999990495384958, iteration: 65221
loss: 0.9894894361495972,grad_norm: 0.992779686990685, iteration: 65222
loss: 0.9939668774604797,grad_norm: 0.9999991744095786, iteration: 65223
loss: 0.998151957988739,grad_norm: 0.9999989325846693, iteration: 65224
loss: 0.9653687477111816,grad_norm: 0.9999989730150819, iteration: 65225
loss: 1.0111207962036133,grad_norm: 0.9999992547263234, iteration: 65226
loss: 1.0153837203979492,grad_norm: 0.9999992278058806, iteration: 65227
loss: 1.00972318649292,grad_norm: 0.9999989712429868, iteration: 65228
loss: 1.009901523590088,grad_norm: 0.9999990272086152, iteration: 65229
loss: 0.9884471893310547,grad_norm: 0.9269556342492424, iteration: 65230
loss: 0.9866117238998413,grad_norm: 0.9999996127074958, iteration: 65231
loss: 1.0116935968399048,grad_norm: 0.9448396303541449, iteration: 65232
loss: 0.9833781719207764,grad_norm: 0.9140006932784436, iteration: 65233
loss: 0.990094006061554,grad_norm: 0.9999990960157303, iteration: 65234
loss: 1.0227112770080566,grad_norm: 0.9999992944996567, iteration: 65235
loss: 0.9894856214523315,grad_norm: 0.9128284476363451, iteration: 65236
loss: 1.0174859762191772,grad_norm: 0.9999992851932347, iteration: 65237
loss: 1.0428649187088013,grad_norm: 0.999999051018633, iteration: 65238
loss: 0.9980732202529907,grad_norm: 0.9999992607584064, iteration: 65239
loss: 0.9791743159294128,grad_norm: 0.9999990556701899, iteration: 65240
loss: 0.953335165977478,grad_norm: 0.9999992918278885, iteration: 65241
loss: 1.00322425365448,grad_norm: 0.9999990427094367, iteration: 65242
loss: 1.007344365119934,grad_norm: 0.999999177319172, iteration: 65243
loss: 0.9571167230606079,grad_norm: 0.9999990587157449, iteration: 65244
loss: 1.0260928869247437,grad_norm: 0.9030994435919261, iteration: 65245
loss: 0.98627769947052,grad_norm: 0.9999991520475432, iteration: 65246
loss: 0.9885542988777161,grad_norm: 0.9999990847866085, iteration: 65247
loss: 1.0225821733474731,grad_norm: 0.9999991519538962, iteration: 65248
loss: 0.9755754470825195,grad_norm: 0.9488401688395199, iteration: 65249
loss: 0.992380678653717,grad_norm: 0.9999991137412735, iteration: 65250
loss: 0.9655133485794067,grad_norm: 0.9999991019980828, iteration: 65251
loss: 1.0209465026855469,grad_norm: 0.9999990873452833, iteration: 65252
loss: 1.0164272785186768,grad_norm: 0.9726898870805827, iteration: 65253
loss: 1.0024014711380005,grad_norm: 0.9999990986650608, iteration: 65254
loss: 1.0597589015960693,grad_norm: 0.999999143553942, iteration: 65255
loss: 0.9997190833091736,grad_norm: 0.9999991290153845, iteration: 65256
loss: 1.0221461057662964,grad_norm: 0.9999990848565644, iteration: 65257
loss: 1.0275166034698486,grad_norm: 0.989009893981308, iteration: 65258
loss: 0.9844103455543518,grad_norm: 0.9999991707644407, iteration: 65259
loss: 0.9864633679389954,grad_norm: 0.9999991777117332, iteration: 65260
loss: 1.0022517442703247,grad_norm: 0.9631436887263658, iteration: 65261
loss: 0.9673740267753601,grad_norm: 0.9999990257535392, iteration: 65262
loss: 0.9853743314743042,grad_norm: 0.9999992022491531, iteration: 65263
loss: 1.0072550773620605,grad_norm: 0.9999991660618571, iteration: 65264
loss: 1.0044299364089966,grad_norm: 0.9999992657496389, iteration: 65265
loss: 1.0233796834945679,grad_norm: 0.9999992593620181, iteration: 65266
loss: 0.980518102645874,grad_norm: 0.8635484892864151, iteration: 65267
loss: 0.948564887046814,grad_norm: 0.8660934440075978, iteration: 65268
loss: 0.9779958128929138,grad_norm: 0.9999991245939853, iteration: 65269
loss: 1.03045654296875,grad_norm: 0.9999993644857388, iteration: 65270
loss: 0.9908981919288635,grad_norm: 0.9614345903909732, iteration: 65271
loss: 0.9941005706787109,grad_norm: 0.8850826393552776, iteration: 65272
loss: 1.0154476165771484,grad_norm: 0.9999990343706167, iteration: 65273
loss: 1.0139755010604858,grad_norm: 0.9999998199898041, iteration: 65274
loss: 0.9964654445648193,grad_norm: 0.9322453265225756, iteration: 65275
loss: 1.0040253400802612,grad_norm: 0.9999992186232839, iteration: 65276
loss: 1.0518076419830322,grad_norm: 0.9999991911903754, iteration: 65277
loss: 1.007698655128479,grad_norm: 0.9110133707938056, iteration: 65278
loss: 0.9801661372184753,grad_norm: 0.9638466778447093, iteration: 65279
loss: 1.0590282678604126,grad_norm: 0.9753421166586463, iteration: 65280
loss: 0.9942466616630554,grad_norm: 0.9999991867939412, iteration: 65281
loss: 0.9700506925582886,grad_norm: 0.9674579944702968, iteration: 65282
loss: 0.9702625870704651,grad_norm: 0.9673288824071425, iteration: 65283
loss: 0.991687536239624,grad_norm: 0.9366642919551817, iteration: 65284
loss: 0.988849937915802,grad_norm: 0.97108867131067, iteration: 65285
loss: 0.9968992471694946,grad_norm: 0.9999991478523369, iteration: 65286
loss: 1.0296356678009033,grad_norm: 0.9999991873920624, iteration: 65287
loss: 0.9559245705604553,grad_norm: 0.9806628776556563, iteration: 65288
loss: 0.9856655597686768,grad_norm: 0.989201476227702, iteration: 65289
loss: 0.9799571633338928,grad_norm: 0.943118862692911, iteration: 65290
loss: 0.9683151245117188,grad_norm: 0.9999992327254996, iteration: 65291
loss: 1.0181435346603394,grad_norm: 0.9999991084949303, iteration: 65292
loss: 1.0118954181671143,grad_norm: 0.9999991564781878, iteration: 65293
loss: 0.9644208550453186,grad_norm: 0.9999991063011097, iteration: 65294
loss: 0.9902949929237366,grad_norm: 0.9548960338843933, iteration: 65295
loss: 1.017299771308899,grad_norm: 0.9999992563066552, iteration: 65296
loss: 1.0133111476898193,grad_norm: 0.9999991571225072, iteration: 65297
loss: 0.9854043126106262,grad_norm: 0.9999991314320451, iteration: 65298
loss: 0.9609469175338745,grad_norm: 0.9999991932037495, iteration: 65299
loss: 1.0123134851455688,grad_norm: 0.9462625432439831, iteration: 65300
loss: 1.0115560293197632,grad_norm: 0.913561698196917, iteration: 65301
loss: 1.0220627784729004,grad_norm: 0.8603804484879786, iteration: 65302
loss: 0.9559627771377563,grad_norm: 0.9999992097407449, iteration: 65303
loss: 0.9892081618309021,grad_norm: 0.99999923207421, iteration: 65304
loss: 1.0100775957107544,grad_norm: 0.8880627724999061, iteration: 65305
loss: 1.0110766887664795,grad_norm: 0.9511981191444432, iteration: 65306
loss: 0.9914005398750305,grad_norm: 0.9999990413448451, iteration: 65307
loss: 1.028319239616394,grad_norm: 0.9999991550786628, iteration: 65308
loss: 1.017249345779419,grad_norm: 0.999999258646066, iteration: 65309
loss: 0.9936953186988831,grad_norm: 0.9999993039746793, iteration: 65310
loss: 0.9926159381866455,grad_norm: 0.9999990771480493, iteration: 65311
loss: 1.0063893795013428,grad_norm: 0.9270067412482264, iteration: 65312
loss: 1.0169494152069092,grad_norm: 0.9999990744512484, iteration: 65313
loss: 0.9936845898628235,grad_norm: 0.9999991741555969, iteration: 65314
loss: 1.0140455961227417,grad_norm: 0.876552095068258, iteration: 65315
loss: 1.002825379371643,grad_norm: 0.9556404250868485, iteration: 65316
loss: 1.001534342765808,grad_norm: 0.9999991213528007, iteration: 65317
loss: 0.9680790901184082,grad_norm: 0.9999993057135872, iteration: 65318
loss: 0.9935910105705261,grad_norm: 0.9999992219877132, iteration: 65319
loss: 0.9779580235481262,grad_norm: 0.9999990523233238, iteration: 65320
loss: 0.9919684529304504,grad_norm: 0.9999989558192551, iteration: 65321
loss: 0.9784058928489685,grad_norm: 0.9999991510003211, iteration: 65322
loss: 1.0346978902816772,grad_norm: 0.999999099980795, iteration: 65323
loss: 0.9741084575653076,grad_norm: 0.9461416137652656, iteration: 65324
loss: 1.0147340297698975,grad_norm: 0.9999989804680655, iteration: 65325
loss: 0.9642457365989685,grad_norm: 0.999999087946128, iteration: 65326
loss: 1.0103235244750977,grad_norm: 0.8665270660240079, iteration: 65327
loss: 0.9843467473983765,grad_norm: 0.9230099103004942, iteration: 65328
loss: 1.0257539749145508,grad_norm: 0.9517835531533428, iteration: 65329
loss: 0.9843246340751648,grad_norm: 0.9470971968495623, iteration: 65330
loss: 1.0251126289367676,grad_norm: 0.9999990559734236, iteration: 65331
loss: 1.0229405164718628,grad_norm: 0.9794400886986978, iteration: 65332
loss: 0.99345862865448,grad_norm: 0.9999992875325785, iteration: 65333
loss: 0.98233562707901,grad_norm: 0.999999089137152, iteration: 65334
loss: 0.9827975630760193,grad_norm: 0.970566857451109, iteration: 65335
loss: 0.9900714159011841,grad_norm: 0.9038660463781734, iteration: 65336
loss: 1.0095189809799194,grad_norm: 0.9999991200569727, iteration: 65337
loss: 0.9817953109741211,grad_norm: 0.9469168236993192, iteration: 65338
loss: 0.978234052658081,grad_norm: 0.983354719496585, iteration: 65339
loss: 1.0085846185684204,grad_norm: 0.9999993372601876, iteration: 65340
loss: 1.0203720331192017,grad_norm: 0.9594266036472215, iteration: 65341
loss: 1.0141814947128296,grad_norm: 0.9999990635626282, iteration: 65342
loss: 0.9945650100708008,grad_norm: 0.9999991857139209, iteration: 65343
loss: 1.0144309997558594,grad_norm: 0.9799931079777056, iteration: 65344
loss: 1.0264968872070312,grad_norm: 0.9999992341005912, iteration: 65345
loss: 1.0294733047485352,grad_norm: 0.999999051590404, iteration: 65346
loss: 0.9630085229873657,grad_norm: 0.8576381435868832, iteration: 65347
loss: 0.9926512837409973,grad_norm: 0.9732550282546962, iteration: 65348
loss: 0.9784727692604065,grad_norm: 0.9999991443270859, iteration: 65349
loss: 1.0079975128173828,grad_norm: 0.9999996339624524, iteration: 65350
loss: 1.0030632019042969,grad_norm: 0.9999991050331404, iteration: 65351
loss: 1.00944185256958,grad_norm: 0.92105790338273, iteration: 65352
loss: 1.048700213432312,grad_norm: 0.9999992826907954, iteration: 65353
loss: 0.9908191561698914,grad_norm: 0.9999991876722935, iteration: 65354
loss: 0.9879826307296753,grad_norm: 0.9999989903480453, iteration: 65355
loss: 1.0011773109436035,grad_norm: 0.9999991638570744, iteration: 65356
loss: 0.9754404425621033,grad_norm: 0.9999991311798562, iteration: 65357
loss: 0.9807437658309937,grad_norm: 0.9999991436497417, iteration: 65358
loss: 1.0133360624313354,grad_norm: 0.9999992162903576, iteration: 65359
loss: 1.0191997289657593,grad_norm: 0.8239161080789362, iteration: 65360
loss: 1.0125378370285034,grad_norm: 0.9503225282433935, iteration: 65361
loss: 1.0238356590270996,grad_norm: 0.8450844639774372, iteration: 65362
loss: 1.0037403106689453,grad_norm: 0.9999992288643194, iteration: 65363
loss: 1.0096992254257202,grad_norm: 0.9115030338668252, iteration: 65364
loss: 1.0023889541625977,grad_norm: 0.9289696256549387, iteration: 65365
loss: 1.1128238439559937,grad_norm: 0.9999999158303681, iteration: 65366
loss: 1.0630042552947998,grad_norm: 0.9999999221596532, iteration: 65367
loss: 1.0046557188034058,grad_norm: 0.9999992832294547, iteration: 65368
loss: 0.9803829789161682,grad_norm: 0.9999990248185264, iteration: 65369
loss: 1.015263319015503,grad_norm: 0.9615048863025498, iteration: 65370
loss: 0.9747359752655029,grad_norm: 0.9785139149199754, iteration: 65371
loss: 1.0034070014953613,grad_norm: 0.9999992434103443, iteration: 65372
loss: 1.0068243741989136,grad_norm: 0.9999990443409655, iteration: 65373
loss: 0.9972199201583862,grad_norm: 0.9999992313851322, iteration: 65374
loss: 0.9858607053756714,grad_norm: 0.9150661258387913, iteration: 65375
loss: 0.9947437047958374,grad_norm: 0.8270217112407471, iteration: 65376
loss: 1.0563433170318604,grad_norm: 0.9999990800294053, iteration: 65377
loss: 1.0049705505371094,grad_norm: 0.9999990850826674, iteration: 65378
loss: 1.049353837966919,grad_norm: 0.999999777807203, iteration: 65379
loss: 0.974756121635437,grad_norm: 0.9999992001251495, iteration: 65380
loss: 1.043138027191162,grad_norm: 0.9769707576148956, iteration: 65381
loss: 0.9827768802642822,grad_norm: 0.9999989149996829, iteration: 65382
loss: 1.0189228057861328,grad_norm: 0.9999992642335778, iteration: 65383
loss: 0.9878205060958862,grad_norm: 0.9999992396290578, iteration: 65384
loss: 0.9848597049713135,grad_norm: 0.9999996296674152, iteration: 65385
loss: 0.9972541332244873,grad_norm: 0.9999990708536555, iteration: 65386
loss: 1.01582932472229,grad_norm: 0.9999992779783236, iteration: 65387
loss: 0.9901643395423889,grad_norm: 0.968828979278952, iteration: 65388
loss: 1.0215208530426025,grad_norm: 0.9999991881310721, iteration: 65389
loss: 1.0346604585647583,grad_norm: 0.9999990168466671, iteration: 65390
loss: 0.9892821907997131,grad_norm: 0.9999992824433914, iteration: 65391
loss: 1.0199517011642456,grad_norm: 0.9999990860655537, iteration: 65392
loss: 0.9545668363571167,grad_norm: 0.9999991704626375, iteration: 65393
loss: 1.0316437482833862,grad_norm: 0.9794659014782219, iteration: 65394
loss: 0.9856050610542297,grad_norm: 0.9851629669877374, iteration: 65395
loss: 1.041272521018982,grad_norm: 0.9999990771962007, iteration: 65396
loss: 1.0264614820480347,grad_norm: 0.9724271732614633, iteration: 65397
loss: 0.9797719717025757,grad_norm: 0.9999989729852158, iteration: 65398
loss: 0.9909586310386658,grad_norm: 0.958797384365563, iteration: 65399
loss: 0.969855546951294,grad_norm: 0.9999991535025142, iteration: 65400
loss: 0.9947050213813782,grad_norm: 0.9426347900268676, iteration: 65401
loss: 0.9842535853385925,grad_norm: 0.929833264312341, iteration: 65402
loss: 1.0205448865890503,grad_norm: 0.9999990210610078, iteration: 65403
loss: 0.9996793866157532,grad_norm: 0.859568216239995, iteration: 65404
loss: 1.0095356702804565,grad_norm: 0.971774752708219, iteration: 65405
loss: 0.9902358055114746,grad_norm: 0.9999992136403771, iteration: 65406
loss: 0.9975153207778931,grad_norm: 0.9999990748977408, iteration: 65407
loss: 0.9633020162582397,grad_norm: 0.9999991316387841, iteration: 65408
loss: 1.0047231912612915,grad_norm: 0.8723106461889988, iteration: 65409
loss: 1.0645607709884644,grad_norm: 0.9999992254331195, iteration: 65410
loss: 0.9645854830741882,grad_norm: 0.9586849273484475, iteration: 65411
loss: 1.0186958312988281,grad_norm: 0.9672536210778905, iteration: 65412
loss: 0.977563738822937,grad_norm: 0.9277673860742061, iteration: 65413
loss: 1.0099117755889893,grad_norm: 0.9532024954128909, iteration: 65414
loss: 0.9986162781715393,grad_norm: 0.999999029945639, iteration: 65415
loss: 1.0089439153671265,grad_norm: 0.9999992991279201, iteration: 65416
loss: 0.9837011694908142,grad_norm: 0.9999992179669597, iteration: 65417
loss: 1.0069273710250854,grad_norm: 0.9696411728021864, iteration: 65418
loss: 0.9839755892753601,grad_norm: 0.9999990669350668, iteration: 65419
loss: 1.0355093479156494,grad_norm: 0.9999992995241864, iteration: 65420
loss: 1.0236260890960693,grad_norm: 0.9999994385585187, iteration: 65421
loss: 0.9591168165206909,grad_norm: 0.922880563818171, iteration: 65422
loss: 1.0033729076385498,grad_norm: 0.9999993035157164, iteration: 65423
loss: 1.002644419670105,grad_norm: 0.9999990468399175, iteration: 65424
loss: 0.980163037776947,grad_norm: 0.9999990345844075, iteration: 65425
loss: 1.0242043733596802,grad_norm: 0.9999992159328569, iteration: 65426
loss: 1.032029390335083,grad_norm: 0.912518231749674, iteration: 65427
loss: 0.978701114654541,grad_norm: 0.9999991320422194, iteration: 65428
loss: 1.0077624320983887,grad_norm: 0.9999990782458743, iteration: 65429
loss: 0.9657942652702332,grad_norm: 0.9999991961607732, iteration: 65430
loss: 1.0213885307312012,grad_norm: 0.9757659151612489, iteration: 65431
loss: 0.9831894040107727,grad_norm: 0.9999992385207173, iteration: 65432
loss: 0.9815904498100281,grad_norm: 0.9999991285583797, iteration: 65433
loss: 1.005494475364685,grad_norm: 0.9611463289964517, iteration: 65434
loss: 0.9980056285858154,grad_norm: 0.9999991071627261, iteration: 65435
loss: 0.9511433839797974,grad_norm: 0.9899788905715751, iteration: 65436
loss: 1.0176671743392944,grad_norm: 0.9999990637451327, iteration: 65437
loss: 0.9944552779197693,grad_norm: 0.9999991903896583, iteration: 65438
loss: 0.9988759756088257,grad_norm: 0.9777364659185754, iteration: 65439
loss: 0.9879313707351685,grad_norm: 0.9097828623828412, iteration: 65440
loss: 1.0036975145339966,grad_norm: 0.9999992143079953, iteration: 65441
loss: 1.004215121269226,grad_norm: 0.9358546454722461, iteration: 65442
loss: 0.9731404185295105,grad_norm: 0.9954544833078481, iteration: 65443
loss: 0.9971866011619568,grad_norm: 0.9871598653104535, iteration: 65444
loss: 0.9857485890388489,grad_norm: 0.9108599862047527, iteration: 65445
loss: 1.0271353721618652,grad_norm: 0.9999989739302525, iteration: 65446
loss: 0.9951531887054443,grad_norm: 0.999999024619049, iteration: 65447
loss: 0.9719584584236145,grad_norm: 0.9109992222294291, iteration: 65448
loss: 1.0090107917785645,grad_norm: 0.9495419339095222, iteration: 65449
loss: 1.0072083473205566,grad_norm: 0.9999990764776354, iteration: 65450
loss: 1.0113353729248047,grad_norm: 0.9999991477726969, iteration: 65451
loss: 0.9985775947570801,grad_norm: 0.9999992437903849, iteration: 65452
loss: 1.0149154663085938,grad_norm: 0.9999991701902499, iteration: 65453
loss: 1.027756929397583,grad_norm: 0.9999991292197096, iteration: 65454
loss: 1.0141412019729614,grad_norm: 0.9999995441369804, iteration: 65455
loss: 1.0034083127975464,grad_norm: 0.9999991289473236, iteration: 65456
loss: 0.9894063472747803,grad_norm: 0.9208417986395626, iteration: 65457
loss: 1.011885643005371,grad_norm: 0.8722051338017527, iteration: 65458
loss: 0.9893271923065186,grad_norm: 0.9821682119533162, iteration: 65459
loss: 1.0075733661651611,grad_norm: 0.999999295228646, iteration: 65460
loss: 1.006506323814392,grad_norm: 0.999999214872574, iteration: 65461
loss: 1.035373568534851,grad_norm: 0.9999990542797174, iteration: 65462
loss: 0.9419946074485779,grad_norm: 0.9999992484530345, iteration: 65463
loss: 0.987457275390625,grad_norm: 0.9999991729161932, iteration: 65464
loss: 0.9979722499847412,grad_norm: 0.9999991494048189, iteration: 65465
loss: 0.9871986508369446,grad_norm: 0.9846637188480187, iteration: 65466
loss: 1.003017783164978,grad_norm: 0.9999990680475183, iteration: 65467
loss: 0.9982007145881653,grad_norm: 0.9999989911118677, iteration: 65468
loss: 1.0055460929870605,grad_norm: 0.9234735872383164, iteration: 65469
loss: 0.9448434114456177,grad_norm: 0.9999991875960077, iteration: 65470
loss: 0.9650764465332031,grad_norm: 0.9903455560044386, iteration: 65471
loss: 0.9765765070915222,grad_norm: 0.9999991474035053, iteration: 65472
loss: 1.0085480213165283,grad_norm: 0.944779198442577, iteration: 65473
loss: 1.0083359479904175,grad_norm: 0.8757406307014103, iteration: 65474
loss: 1.0057092905044556,grad_norm: 0.8471543606562301, iteration: 65475
loss: 1.0642403364181519,grad_norm: 0.999999229833, iteration: 65476
loss: 1.009596347808838,grad_norm: 0.9171743770232381, iteration: 65477
loss: 1.006639838218689,grad_norm: 0.9999989569760013, iteration: 65478
loss: 1.0135376453399658,grad_norm: 0.9999991990657205, iteration: 65479
loss: 1.0031249523162842,grad_norm: 0.9999992152221674, iteration: 65480
loss: 1.0138249397277832,grad_norm: 0.9999991872515506, iteration: 65481
loss: 0.9922389388084412,grad_norm: 0.9999992855270453, iteration: 65482
loss: 1.027264952659607,grad_norm: 0.9999990967975333, iteration: 65483
loss: 1.000360369682312,grad_norm: 0.9501218203712739, iteration: 65484
loss: 1.0424652099609375,grad_norm: 0.999999225337765, iteration: 65485
loss: 0.9862533211708069,grad_norm: 0.9999991956761592, iteration: 65486
loss: 0.953418493270874,grad_norm: 0.999999060445928, iteration: 65487
loss: 0.9742071032524109,grad_norm: 0.9999991570256507, iteration: 65488
loss: 1.0647904872894287,grad_norm: 0.9999997348778978, iteration: 65489
loss: 1.0354771614074707,grad_norm: 0.999999163923013, iteration: 65490
loss: 0.9687826633453369,grad_norm: 0.9999990576842948, iteration: 65491
loss: 1.0162067413330078,grad_norm: 0.9901200244346107, iteration: 65492
loss: 0.9764328002929688,grad_norm: 0.8996745826766663, iteration: 65493
loss: 1.0408517122268677,grad_norm: 0.9999991919816777, iteration: 65494
loss: 0.9737082123756409,grad_norm: 0.9093332898203828, iteration: 65495
loss: 0.9871715307235718,grad_norm: 0.9999990642298991, iteration: 65496
loss: 0.9625729322433472,grad_norm: 0.9999989719115961, iteration: 65497
loss: 0.9847915768623352,grad_norm: 0.999453307528969, iteration: 65498
loss: 0.9980507493019104,grad_norm: 0.9999991173734297, iteration: 65499
loss: 0.9932398200035095,grad_norm: 0.9130872218940532, iteration: 65500
loss: 1.0196011066436768,grad_norm: 0.9354483856433599, iteration: 65501
loss: 0.9974879026412964,grad_norm: 0.9028877554000683, iteration: 65502
loss: 0.98236083984375,grad_norm: 0.9999990078895421, iteration: 65503
loss: 0.9695897698402405,grad_norm: 0.9999990508390073, iteration: 65504
loss: 1.033734679222107,grad_norm: 0.9999989674185166, iteration: 65505
loss: 1.0295395851135254,grad_norm: 0.9999992061985609, iteration: 65506
loss: 1.0103718042373657,grad_norm: 0.9999990469722138, iteration: 65507
loss: 1.0083571672439575,grad_norm: 0.9999991654229657, iteration: 65508
loss: 1.0707728862762451,grad_norm: 0.950381540289785, iteration: 65509
loss: 0.9727188944816589,grad_norm: 0.924208784960702, iteration: 65510
loss: 1.0451760292053223,grad_norm: 0.9999989691059252, iteration: 65511
loss: 0.9865245223045349,grad_norm: 0.82799872728806, iteration: 65512
loss: 0.9557516574859619,grad_norm: 0.9999989921883048, iteration: 65513
loss: 1.0199421644210815,grad_norm: 0.9148494856926691, iteration: 65514
loss: 0.9847614169120789,grad_norm: 0.9999990952254636, iteration: 65515
loss: 0.999176561832428,grad_norm: 0.9717475235746087, iteration: 65516
loss: 0.9735963940620422,grad_norm: 0.9999992825277593, iteration: 65517
loss: 1.0342493057250977,grad_norm: 0.9999991626436404, iteration: 65518
loss: 1.033750295639038,grad_norm: 0.9999990509606044, iteration: 65519
loss: 0.9748422503471375,grad_norm: 0.8974063045204906, iteration: 65520
loss: 0.996393084526062,grad_norm: 0.9999990869873564, iteration: 65521
loss: 1.004749059677124,grad_norm: 0.9085757502249542, iteration: 65522
loss: 1.0083459615707397,grad_norm: 0.9010626796116453, iteration: 65523
loss: 0.9898899793624878,grad_norm: 0.9999992206425479, iteration: 65524
loss: 1.0047171115875244,grad_norm: 0.9879275633971041, iteration: 65525
loss: 1.0195525884628296,grad_norm: 0.9121602466173271, iteration: 65526
loss: 1.022728443145752,grad_norm: 0.9999991047154843, iteration: 65527
loss: 1.0043548345565796,grad_norm: 0.7763354084013692, iteration: 65528
loss: 1.0392518043518066,grad_norm: 0.9999995976317928, iteration: 65529
loss: 1.015270471572876,grad_norm: 0.9442532424064951, iteration: 65530
loss: 0.981390118598938,grad_norm: 0.9999993898626368, iteration: 65531
loss: 1.0102810859680176,grad_norm: 0.8396852223415853, iteration: 65532
loss: 1.0124127864837646,grad_norm: 0.9423854230053722, iteration: 65533
loss: 0.9748508334159851,grad_norm: 0.9999991506845906, iteration: 65534
loss: 1.0190788507461548,grad_norm: 0.9999991380773419, iteration: 65535
loss: 1.0297092199325562,grad_norm: 0.9999990884062513, iteration: 65536
loss: 0.9714001417160034,grad_norm: 0.9576662149455676, iteration: 65537
loss: 1.003045678138733,grad_norm: 0.9886879714938291, iteration: 65538
loss: 1.0089799165725708,grad_norm: 0.9999992972753511, iteration: 65539
loss: 1.0157029628753662,grad_norm: 0.9999991271123855, iteration: 65540
loss: 1.0200343132019043,grad_norm: 0.999999098394186, iteration: 65541
loss: 1.010638952255249,grad_norm: 0.9343494886862683, iteration: 65542
loss: 1.0028892755508423,grad_norm: 0.9470527351746277, iteration: 65543
loss: 0.9915744066238403,grad_norm: 0.9428134345761039, iteration: 65544
loss: 0.963158369064331,grad_norm: 0.897666494570456, iteration: 65545
loss: 0.9685418605804443,grad_norm: 0.960094817608795, iteration: 65546
loss: 0.9855930805206299,grad_norm: 0.999999569055771, iteration: 65547
loss: 1.0201990604400635,grad_norm: 0.9999992126739212, iteration: 65548
loss: 1.0028005838394165,grad_norm: 0.9999992014645736, iteration: 65549
loss: 1.0411919355392456,grad_norm: 0.9228451459308522, iteration: 65550
loss: 0.9904157519340515,grad_norm: 0.9999990971800006, iteration: 65551
loss: 1.0211659669876099,grad_norm: 0.9999993017615539, iteration: 65552
loss: 1.0077449083328247,grad_norm: 0.9999992072801482, iteration: 65553
loss: 1.0009167194366455,grad_norm: 0.94939978824604, iteration: 65554
loss: 1.0108615159988403,grad_norm: 0.9999992460512671, iteration: 65555
loss: 0.9885909557342529,grad_norm: 0.9999994031738509, iteration: 65556
loss: 1.018902063369751,grad_norm: 0.9999991161908235, iteration: 65557
loss: 1.0238368511199951,grad_norm: 0.9735741349263867, iteration: 65558
loss: 0.9789467453956604,grad_norm: 0.9821618697785631, iteration: 65559
loss: 1.0187183618545532,grad_norm: 0.9999992610298712, iteration: 65560
loss: 1.0097224712371826,grad_norm: 0.9999992474427051, iteration: 65561
loss: 0.9560955166816711,grad_norm: 0.8234203326982588, iteration: 65562
loss: 1.0155565738677979,grad_norm: 0.9999990910885619, iteration: 65563
loss: 1.0017402172088623,grad_norm: 0.9999990897990038, iteration: 65564
loss: 0.9950526356697083,grad_norm: 0.9682518416285414, iteration: 65565
loss: 0.9961812496185303,grad_norm: 0.9999988777053641, iteration: 65566
loss: 0.9452871084213257,grad_norm: 0.999999039284329, iteration: 65567
loss: 0.9961316585540771,grad_norm: 0.9999991016609625, iteration: 65568
loss: 1.0004674196243286,grad_norm: 0.9999990281795463, iteration: 65569
loss: 1.092760443687439,grad_norm: 0.9999989899467103, iteration: 65570
loss: 1.0014393329620361,grad_norm: 0.9097879230330095, iteration: 65571
loss: 0.99355149269104,grad_norm: 0.9999991302506517, iteration: 65572
loss: 1.0011837482452393,grad_norm: 0.9145530354459266, iteration: 65573
loss: 0.9908657073974609,grad_norm: 0.9273846888499596, iteration: 65574
loss: 0.9939286708831787,grad_norm: 0.9999990796000289, iteration: 65575
loss: 1.0344507694244385,grad_norm: 0.9999991552363653, iteration: 65576
loss: 1.008123517036438,grad_norm: 0.999999002309869, iteration: 65577
loss: 0.9830543398857117,grad_norm: 0.999999255107633, iteration: 65578
loss: 0.969115138053894,grad_norm: 0.9999991354153971, iteration: 65579
loss: 0.9982377886772156,grad_norm: 0.9639738577364934, iteration: 65580
loss: 1.0236321687698364,grad_norm: 0.9999991186722095, iteration: 65581
loss: 0.9704970717430115,grad_norm: 0.8875492516847344, iteration: 65582
loss: 1.0017681121826172,grad_norm: 0.9999990185020615, iteration: 65583
loss: 0.9935269355773926,grad_norm: 0.9642726609758094, iteration: 65584
loss: 0.9979698061943054,grad_norm: 0.9724902600443812, iteration: 65585
loss: 1.006928563117981,grad_norm: 0.9999990934626766, iteration: 65586
loss: 0.9727956652641296,grad_norm: 0.8926839150489169, iteration: 65587
loss: 1.0189660787582397,grad_norm: 0.9106600399405631, iteration: 65588
loss: 1.0204768180847168,grad_norm: 0.9999990067864765, iteration: 65589
loss: 1.0171563625335693,grad_norm: 0.9999991057354723, iteration: 65590
loss: 0.9955810904502869,grad_norm: 0.9999989373271679, iteration: 65591
loss: 0.9690276384353638,grad_norm: 0.99999918070826, iteration: 65592
loss: 0.9956727623939514,grad_norm: 0.9999991255048222, iteration: 65593
loss: 1.004494547843933,grad_norm: 0.9000800608469243, iteration: 65594
loss: 0.990760087966919,grad_norm: 0.8975480772637278, iteration: 65595
loss: 0.9959500432014465,grad_norm: 0.9999990312082604, iteration: 65596
loss: 1.039122462272644,grad_norm: 0.9999991944144649, iteration: 65597
loss: 0.993156373500824,grad_norm: 0.9999989770301454, iteration: 65598
loss: 1.0070356130599976,grad_norm: 0.9749247110173737, iteration: 65599
loss: 0.9641240239143372,grad_norm: 0.9999995919065973, iteration: 65600
loss: 0.9958574771881104,grad_norm: 0.9865460310253341, iteration: 65601
loss: 1.018326759338379,grad_norm: 0.9999990603601372, iteration: 65602
loss: 1.0024558305740356,grad_norm: 0.9999990850025068, iteration: 65603
loss: 1.0048985481262207,grad_norm: 0.8066492066715176, iteration: 65604
loss: 1.0036767721176147,grad_norm: 0.8550410466270313, iteration: 65605
loss: 1.0206727981567383,grad_norm: 0.9333146882727262, iteration: 65606
loss: 0.9961985945701599,grad_norm: 0.8178746792724022, iteration: 65607
loss: 1.0414152145385742,grad_norm: 0.9999990934759589, iteration: 65608
loss: 1.0141810178756714,grad_norm: 0.9999990729255329, iteration: 65609
loss: 0.9815382957458496,grad_norm: 0.8838505501377406, iteration: 65610
loss: 0.998446524143219,grad_norm: 0.9999991186485446, iteration: 65611
loss: 1.0070836544036865,grad_norm: 0.9999990756848968, iteration: 65612
loss: 0.9972335696220398,grad_norm: 0.999488105475179, iteration: 65613
loss: 0.9877472519874573,grad_norm: 0.8937612934787191, iteration: 65614
loss: 0.9955652356147766,grad_norm: 0.9999993693423201, iteration: 65615
loss: 0.9706861972808838,grad_norm: 0.9999991704315127, iteration: 65616
loss: 1.007926106452942,grad_norm: 0.9999990406365572, iteration: 65617
loss: 1.0041513442993164,grad_norm: 0.9999990560195201, iteration: 65618
loss: 0.9926112294197083,grad_norm: 0.9999992042705917, iteration: 65619
loss: 0.9753714799880981,grad_norm: 0.9999992285011573, iteration: 65620
loss: 1.0072338581085205,grad_norm: 0.9878213926516057, iteration: 65621
loss: 0.9811424016952515,grad_norm: 0.9999989775280691, iteration: 65622
loss: 1.0023890733718872,grad_norm: 0.999849592546399, iteration: 65623
loss: 1.0032540559768677,grad_norm: 0.9999991393474373, iteration: 65624
loss: 1.0245651006698608,grad_norm: 0.9999991997676408, iteration: 65625
loss: 1.0196707248687744,grad_norm: 0.9999989481652488, iteration: 65626
loss: 0.9834317564964294,grad_norm: 0.9999990293555594, iteration: 65627
loss: 0.9793075323104858,grad_norm: 0.9999990629125043, iteration: 65628
loss: 0.9872151017189026,grad_norm: 0.9999993731065737, iteration: 65629
loss: 0.9799681901931763,grad_norm: 0.9999990847307566, iteration: 65630
loss: 0.9865435361862183,grad_norm: 0.9741128922627872, iteration: 65631
loss: 0.9766516089439392,grad_norm: 0.9269953264736227, iteration: 65632
loss: 0.9842045903205872,grad_norm: 0.9074880634770315, iteration: 65633
loss: 0.9790017604827881,grad_norm: 0.9389161779876406, iteration: 65634
loss: 1.0068955421447754,grad_norm: 0.9999990021695353, iteration: 65635
loss: 1.023885726928711,grad_norm: 0.9793520950277776, iteration: 65636
loss: 0.9916055798530579,grad_norm: 0.9632070694078432, iteration: 65637
loss: 0.9916531443595886,grad_norm: 0.99999909994281, iteration: 65638
loss: 0.9740257859230042,grad_norm: 0.9763158198942411, iteration: 65639
loss: 0.9682148098945618,grad_norm: 0.9999991398428993, iteration: 65640
loss: 1.025633692741394,grad_norm: 0.9999992549867593, iteration: 65641
loss: 1.0629550218582153,grad_norm: 0.999998858832589, iteration: 65642
loss: 1.0207117795944214,grad_norm: 0.999999536774809, iteration: 65643
loss: 0.9937126040458679,grad_norm: 0.8586718410748987, iteration: 65644
loss: 1.0455098152160645,grad_norm: 0.9999991566828317, iteration: 65645
loss: 1.0267037153244019,grad_norm: 0.9999995392279641, iteration: 65646
loss: 0.9639265537261963,grad_norm: 0.9999989985318344, iteration: 65647
loss: 0.9954095482826233,grad_norm: 0.9383487376391529, iteration: 65648
loss: 0.9614679217338562,grad_norm: 0.9999992800004859, iteration: 65649
loss: 0.9981705546379089,grad_norm: 0.9999992018133873, iteration: 65650
loss: 0.9707401394844055,grad_norm: 0.9329833796582948, iteration: 65651
loss: 1.0099493265151978,grad_norm: 0.9999992070610516, iteration: 65652
loss: 0.9873208999633789,grad_norm: 0.9237274987915302, iteration: 65653
loss: 0.9927704334259033,grad_norm: 0.9999992252830439, iteration: 65654
loss: 1.0201389789581299,grad_norm: 0.9692600833827687, iteration: 65655
loss: 1.0065398216247559,grad_norm: 0.9999992095689008, iteration: 65656
loss: 1.0166505575180054,grad_norm: 0.9999990688583033, iteration: 65657
loss: 1.00425124168396,grad_norm: 0.90542489417276, iteration: 65658
loss: 1.0231865644454956,grad_norm: 0.9999989040673536, iteration: 65659
loss: 1.0110424757003784,grad_norm: 0.999999171543934, iteration: 65660
loss: 0.9819779992103577,grad_norm: 0.8988061157864473, iteration: 65661
loss: 1.0372684001922607,grad_norm: 0.9088614205338226, iteration: 65662
loss: 1.035913348197937,grad_norm: 0.9999991091926845, iteration: 65663
loss: 1.0160707235336304,grad_norm: 0.9568660242712049, iteration: 65664
loss: 1.006636142730713,grad_norm: 0.9043656863691532, iteration: 65665
loss: 1.0107712745666504,grad_norm: 0.9999990771928735, iteration: 65666
loss: 1.0005372762680054,grad_norm: 0.9999996600483876, iteration: 65667
loss: 1.006505012512207,grad_norm: 0.917157449929012, iteration: 65668
loss: 1.031335711479187,grad_norm: 0.9999992429998321, iteration: 65669
loss: 0.9721363186836243,grad_norm: 0.9999991724001661, iteration: 65670
loss: 1.0151829719543457,grad_norm: 0.9119662236285361, iteration: 65671
loss: 0.9910849332809448,grad_norm: 0.9999991125621549, iteration: 65672
loss: 1.017161250114441,grad_norm: 0.9999990683732605, iteration: 65673
loss: 0.9975802302360535,grad_norm: 0.9999991134870407, iteration: 65674
loss: 0.9903205633163452,grad_norm: 0.9999989657897197, iteration: 65675
loss: 0.9877498745918274,grad_norm: 0.951228759914971, iteration: 65676
loss: 0.9893189072608948,grad_norm: 0.9999992907914844, iteration: 65677
loss: 1.0432610511779785,grad_norm: 0.9999995388539482, iteration: 65678
loss: 0.9890569448471069,grad_norm: 0.9522715393995352, iteration: 65679
loss: 0.9785361289978027,grad_norm: 0.9618289219622825, iteration: 65680
loss: 0.9901293516159058,grad_norm: 0.9999990421830778, iteration: 65681
loss: 1.0263479948043823,grad_norm: 0.9999991045842529, iteration: 65682
loss: 1.0005900859832764,grad_norm: 0.9999991541639117, iteration: 65683
loss: 0.9896341562271118,grad_norm: 0.9999991352010031, iteration: 65684
loss: 0.9786706566810608,grad_norm: 0.9999990616548431, iteration: 65685
loss: 0.9846439361572266,grad_norm: 0.9999991751449573, iteration: 65686
loss: 1.0284303426742554,grad_norm: 0.9999992528378558, iteration: 65687
loss: 0.9940742254257202,grad_norm: 0.9697522555758691, iteration: 65688
loss: 1.0282001495361328,grad_norm: 0.9999990894780307, iteration: 65689
loss: 0.9606897234916687,grad_norm: 0.9999990722255194, iteration: 65690
loss: 1.0289582014083862,grad_norm: 0.9999992024770573, iteration: 65691
loss: 1.0274242162704468,grad_norm: 0.9037170822076107, iteration: 65692
loss: 0.9769222140312195,grad_norm: 0.9022280339555085, iteration: 65693
loss: 1.0600171089172363,grad_norm: 0.9999992956689063, iteration: 65694
loss: 0.9802560210227966,grad_norm: 0.9253722019271562, iteration: 65695
loss: 1.0210272073745728,grad_norm: 0.9245037588397896, iteration: 65696
loss: 0.9659225344657898,grad_norm: 0.9999991091242693, iteration: 65697
loss: 1.0352535247802734,grad_norm: 0.999999154187397, iteration: 65698
loss: 0.979184627532959,grad_norm: 0.9999990459914179, iteration: 65699
loss: 0.9405638575553894,grad_norm: 0.9634525366862561, iteration: 65700
loss: 1.0231826305389404,grad_norm: 0.9781025088320083, iteration: 65701
loss: 0.992088258266449,grad_norm: 0.8756743502481469, iteration: 65702
loss: 1.0227513313293457,grad_norm: 0.8793937124979417, iteration: 65703
loss: 0.9959874749183655,grad_norm: 0.999999180560314, iteration: 65704
loss: 1.006646990776062,grad_norm: 0.9676001962972905, iteration: 65705
loss: 0.9929161667823792,grad_norm: 0.870884242243665, iteration: 65706
loss: 1.0002964735031128,grad_norm: 0.999999177309451, iteration: 65707
loss: 1.0038843154907227,grad_norm: 0.999998835562578, iteration: 65708
loss: 0.998165488243103,grad_norm: 0.9619706978974081, iteration: 65709
loss: 1.017768144607544,grad_norm: 0.9602246524061753, iteration: 65710
loss: 0.9912673234939575,grad_norm: 0.9999991407486449, iteration: 65711
loss: 1.0566781759262085,grad_norm: 0.999999600042507, iteration: 65712
loss: 1.0228861570358276,grad_norm: 0.9999989851582872, iteration: 65713
loss: 0.9913784861564636,grad_norm: 0.9036201036643953, iteration: 65714
loss: 0.9835231304168701,grad_norm: 0.8787005224132995, iteration: 65715
loss: 0.9957787990570068,grad_norm: 0.9645299307960842, iteration: 65716
loss: 0.9991782307624817,grad_norm: 0.9999992094153884, iteration: 65717
loss: 0.994368314743042,grad_norm: 0.9820801127835526, iteration: 65718
loss: 0.9916657209396362,grad_norm: 0.931454356571865, iteration: 65719
loss: 1.0088279247283936,grad_norm: 0.986062300529863, iteration: 65720
loss: 1.0110523700714111,grad_norm: 0.999998986412265, iteration: 65721
loss: 1.0112788677215576,grad_norm: 0.9999990616668385, iteration: 65722
loss: 0.9675258994102478,grad_norm: 0.999999169933803, iteration: 65723
loss: 1.009016513824463,grad_norm: 0.9249953910562196, iteration: 65724
loss: 1.0156688690185547,grad_norm: 0.9999993065646828, iteration: 65725
loss: 1.0010944604873657,grad_norm: 0.9999990572638804, iteration: 65726
loss: 0.9932685494422913,grad_norm: 0.9999990809451488, iteration: 65727
loss: 1.0111538171768188,grad_norm: 0.9999992863331785, iteration: 65728
loss: 1.0024464130401611,grad_norm: 0.9861248922861386, iteration: 65729
loss: 1.0113871097564697,grad_norm: 0.9999991457750508, iteration: 65730
loss: 0.9849321246147156,grad_norm: 0.9999990610282772, iteration: 65731
loss: 0.982687771320343,grad_norm: 0.9999992414783162, iteration: 65732
loss: 1.0221582651138306,grad_norm: 0.9999989999478659, iteration: 65733
loss: 1.028092384338379,grad_norm: 0.9999992115827696, iteration: 65734
loss: 0.9824256300926208,grad_norm: 0.9325481911418158, iteration: 65735
loss: 1.0329041481018066,grad_norm: 0.9999996197988539, iteration: 65736
loss: 0.9898326992988586,grad_norm: 0.9999992175473627, iteration: 65737
loss: 0.9991585612297058,grad_norm: 0.9474148457531905, iteration: 65738
loss: 1.0054547786712646,grad_norm: 0.9999991608578953, iteration: 65739
loss: 0.985798180103302,grad_norm: 0.9999992478778865, iteration: 65740
loss: 0.9772769212722778,grad_norm: 0.9999992165428121, iteration: 65741
loss: 0.9628978967666626,grad_norm: 0.9725351244235865, iteration: 65742
loss: 0.9939353466033936,grad_norm: 0.9583238974354428, iteration: 65743
loss: 0.9950771331787109,grad_norm: 0.9999989411083946, iteration: 65744
loss: 0.9894080758094788,grad_norm: 0.9917968503622191, iteration: 65745
loss: 0.9625145792961121,grad_norm: 0.9999992355321635, iteration: 65746
loss: 1.0044151544570923,grad_norm: 0.9428581024280387, iteration: 65747
loss: 0.9968639016151428,grad_norm: 0.9999994506003258, iteration: 65748
loss: 0.9927981495857239,grad_norm: 0.9999992550325831, iteration: 65749
loss: 0.9699427485466003,grad_norm: 0.9777873769275326, iteration: 65750
loss: 0.9931065440177917,grad_norm: 0.9999993050547851, iteration: 65751
loss: 0.97953861951828,grad_norm: 0.9595970308240119, iteration: 65752
loss: 0.9906418323516846,grad_norm: 0.9293630237442279, iteration: 65753
loss: 0.9663847088813782,grad_norm: 0.9999991438434469, iteration: 65754
loss: 1.0329324007034302,grad_norm: 0.9999989905590313, iteration: 65755
loss: 1.0262058973312378,grad_norm: 0.9999991286120925, iteration: 65756
loss: 0.9896956086158752,grad_norm: 0.8986721830558072, iteration: 65757
loss: 1.0247883796691895,grad_norm: 0.9612287822700477, iteration: 65758
loss: 0.9969738721847534,grad_norm: 0.9065234487601224, iteration: 65759
loss: 1.0307608842849731,grad_norm: 0.9999990633208855, iteration: 65760
loss: 0.9746590256690979,grad_norm: 0.99999922200331, iteration: 65761
loss: 0.9655871987342834,grad_norm: 0.9630233044696556, iteration: 65762
loss: 0.9799497127532959,grad_norm: 0.999999383653763, iteration: 65763
loss: 0.9596496820449829,grad_norm: 0.9999992310693767, iteration: 65764
loss: 1.0075503587722778,grad_norm: 0.9797209745085251, iteration: 65765
loss: 0.996826171875,grad_norm: 0.857539324708899, iteration: 65766
loss: 0.9959104657173157,grad_norm: 0.9771210636781178, iteration: 65767
loss: 1.013547420501709,grad_norm: 0.9999992169349085, iteration: 65768
loss: 1.0199888944625854,grad_norm: 0.9999991245094406, iteration: 65769
loss: 0.9835118651390076,grad_norm: 0.9999990790711221, iteration: 65770
loss: 1.0448472499847412,grad_norm: 0.9383348696912079, iteration: 65771
loss: 0.9823907613754272,grad_norm: 0.999999376407224, iteration: 65772
loss: 1.01162850856781,grad_norm: 0.9804918640590489, iteration: 65773
loss: 0.999387264251709,grad_norm: 0.9487400807705805, iteration: 65774
loss: 1.031218409538269,grad_norm: 0.9999992931039023, iteration: 65775
loss: 1.0362920761108398,grad_norm: 0.9166056273077517, iteration: 65776
loss: 1.022223949432373,grad_norm: 0.9999990090942661, iteration: 65777
loss: 0.9637320041656494,grad_norm: 0.9999989603882699, iteration: 65778
loss: 1.0064072608947754,grad_norm: 0.9874097643481671, iteration: 65779
loss: 0.9960909485816956,grad_norm: 0.9999996805704867, iteration: 65780
loss: 1.0409181118011475,grad_norm: 0.8694508435962812, iteration: 65781
loss: 1.0301603078842163,grad_norm: 0.9999993596279354, iteration: 65782
loss: 1.057121992111206,grad_norm: 0.9999992891134971, iteration: 65783
loss: 0.9786207675933838,grad_norm: 0.9999995856281814, iteration: 65784
loss: 0.9668708443641663,grad_norm: 0.9346948039999423, iteration: 65785
loss: 0.9794638156890869,grad_norm: 0.9213486486042024, iteration: 65786
loss: 1.0165690183639526,grad_norm: 0.9778054401183481, iteration: 65787
loss: 0.9975433349609375,grad_norm: 0.9999991944572171, iteration: 65788
loss: 0.983727753162384,grad_norm: 0.9999991304368104, iteration: 65789
loss: 1.0005661249160767,grad_norm: 0.999998920694879, iteration: 65790
loss: 0.9636237621307373,grad_norm: 0.9999992294912564, iteration: 65791
loss: 0.9978057146072388,grad_norm: 0.9598325506704009, iteration: 65792
loss: 0.9749172329902649,grad_norm: 0.999999011046584, iteration: 65793
loss: 0.9978334307670593,grad_norm: 0.9999992471473067, iteration: 65794
loss: 1.0016965866088867,grad_norm: 0.8655085876401547, iteration: 65795
loss: 0.997649610042572,grad_norm: 0.9853257175241563, iteration: 65796
loss: 1.0199981927871704,grad_norm: 0.9999990088441358, iteration: 65797
loss: 0.9798398017883301,grad_norm: 0.9999990712292103, iteration: 65798
loss: 0.9783429503440857,grad_norm: 0.9999992566102626, iteration: 65799
loss: 1.0367571115493774,grad_norm: 0.9999992203195213, iteration: 65800
loss: 0.9936782121658325,grad_norm: 0.9998019741816385, iteration: 65801
loss: 1.0021036863327026,grad_norm: 0.9999990036523548, iteration: 65802
loss: 1.0398154258728027,grad_norm: 0.9999991856357437, iteration: 65803
loss: 0.9659351706504822,grad_norm: 0.9999991120126122, iteration: 65804
loss: 1.0997868776321411,grad_norm: 0.999999398745451, iteration: 65805
loss: 1.0027196407318115,grad_norm: 0.9999991633171247, iteration: 65806
loss: 1.0195039510726929,grad_norm: 0.858664914231113, iteration: 65807
loss: 0.9905510544776917,grad_norm: 0.9162571943963955, iteration: 65808
loss: 1.0021693706512451,grad_norm: 0.8430326524111803, iteration: 65809
loss: 0.9818710088729858,grad_norm: 0.9999992097776247, iteration: 65810
loss: 0.9791738390922546,grad_norm: 0.9999992518206576, iteration: 65811
loss: 0.9910711646080017,grad_norm: 0.9999990789990137, iteration: 65812
loss: 0.9719682931900024,grad_norm: 0.9999991068449967, iteration: 65813
loss: 1.00820791721344,grad_norm: 0.912692522111305, iteration: 65814
loss: 0.9671449661254883,grad_norm: 0.9999990799687808, iteration: 65815
loss: 1.0100098848342896,grad_norm: 0.9999993806903823, iteration: 65816
loss: 1.0054116249084473,grad_norm: 0.9999990859095953, iteration: 65817
loss: 0.982343852519989,grad_norm: 0.9999991386588136, iteration: 65818
loss: 0.9850325584411621,grad_norm: 0.9999990119880086, iteration: 65819
loss: 0.9927367568016052,grad_norm: 0.9999996941686395, iteration: 65820
loss: 0.9877378940582275,grad_norm: 0.926686149330995, iteration: 65821
loss: 0.9669018387794495,grad_norm: 0.9993826403949584, iteration: 65822
loss: 0.9675681591033936,grad_norm: 0.9999993729759324, iteration: 65823
loss: 0.9619379639625549,grad_norm: 0.9999990520938002, iteration: 65824
loss: 0.9963204860687256,grad_norm: 0.9628483642739625, iteration: 65825
loss: 1.0008052587509155,grad_norm: 0.9816130219845212, iteration: 65826
loss: 0.9861283302307129,grad_norm: 0.9999991905247587, iteration: 65827
loss: 1.0188264846801758,grad_norm: 0.9999992524799609, iteration: 65828
loss: 1.003600001335144,grad_norm: 0.8615338814136811, iteration: 65829
loss: 0.9881616234779358,grad_norm: 0.9999993065242894, iteration: 65830
loss: 0.9898983240127563,grad_norm: 0.9999992449538734, iteration: 65831
loss: 0.9734883904457092,grad_norm: 0.9999993157252619, iteration: 65832
loss: 0.9811971187591553,grad_norm: 0.9822033659240466, iteration: 65833
loss: 0.9866811633110046,grad_norm: 0.8484685879525699, iteration: 65834
loss: 0.9934887886047363,grad_norm: 0.9999990960507265, iteration: 65835
loss: 1.005961537361145,grad_norm: 0.9999996445192991, iteration: 65836
loss: 1.0304347276687622,grad_norm: 0.9999995017601474, iteration: 65837
loss: 1.0182236433029175,grad_norm: 0.9999990207831626, iteration: 65838
loss: 0.9999077320098877,grad_norm: 0.9999990060982165, iteration: 65839
loss: 1.0401530265808105,grad_norm: 0.9999991255322741, iteration: 65840
loss: 0.9888154864311218,grad_norm: 0.999999041771752, iteration: 65841
loss: 1.0302194356918335,grad_norm: 0.9999990742690195, iteration: 65842
loss: 1.0159618854522705,grad_norm: 0.9999991097721227, iteration: 65843
loss: 0.9898231625556946,grad_norm: 0.9999990091945649, iteration: 65844
loss: 1.009060025215149,grad_norm: 0.9999991537255798, iteration: 65845
loss: 1.005509853363037,grad_norm: 0.9771171243286144, iteration: 65846
loss: 1.0558661222457886,grad_norm: 0.9999991047978236, iteration: 65847
loss: 1.0855529308319092,grad_norm: 0.9999997385596218, iteration: 65848
loss: 1.0194123983383179,grad_norm: 0.9999992632323293, iteration: 65849
loss: 1.0381696224212646,grad_norm: 0.9999992468177507, iteration: 65850
loss: 1.0174466371536255,grad_norm: 0.9999995787150847, iteration: 65851
loss: 1.064464807510376,grad_norm: 0.999999804539268, iteration: 65852
loss: 0.9976814985275269,grad_norm: 0.9849317495347609, iteration: 65853
loss: 1.013261079788208,grad_norm: 0.999999154450529, iteration: 65854
loss: 0.9790988564491272,grad_norm: 0.8823641827062666, iteration: 65855
loss: 0.980095624923706,grad_norm: 0.9999989793283759, iteration: 65856
loss: 1.0271698236465454,grad_norm: 0.9240186663434723, iteration: 65857
loss: 0.9867750406265259,grad_norm: 0.9055206507213251, iteration: 65858
loss: 1.0218970775604248,grad_norm: 0.9890576369175148, iteration: 65859
loss: 0.9748485088348389,grad_norm: 0.9745209338943653, iteration: 65860
loss: 1.0178718566894531,grad_norm: 0.9999992325884904, iteration: 65861
loss: 1.0085861682891846,grad_norm: 0.9999991430620764, iteration: 65862
loss: 1.0601801872253418,grad_norm: 0.9999993296858407, iteration: 65863
loss: 0.9866035580635071,grad_norm: 0.9999995373999984, iteration: 65864
loss: 1.0447150468826294,grad_norm: 0.9999995453904711, iteration: 65865
loss: 1.0010287761688232,grad_norm: 0.9999991050250272, iteration: 65866
loss: 1.0156389474868774,grad_norm: 0.9999990918636666, iteration: 65867
loss: 1.0444107055664062,grad_norm: 0.9999992789951779, iteration: 65868
loss: 1.0172847509384155,grad_norm: 0.9999991359007866, iteration: 65869
loss: 1.0007410049438477,grad_norm: 0.9999989709105727, iteration: 65870
loss: 0.9658427238464355,grad_norm: 0.9999993387442107, iteration: 65871
loss: 0.9821661114692688,grad_norm: 0.8903747809439844, iteration: 65872
loss: 0.9850035905838013,grad_norm: 0.9999992081108867, iteration: 65873
loss: 0.9870781302452087,grad_norm: 0.9512871806556938, iteration: 65874
loss: 1.007055640220642,grad_norm: 0.9317266845785676, iteration: 65875
loss: 0.9950582385063171,grad_norm: 0.9999991117702796, iteration: 65876
loss: 1.0048977136611938,grad_norm: 0.9182820939530404, iteration: 65877
loss: 1.041479468345642,grad_norm: 0.9999992658295667, iteration: 65878
loss: 1.0131057500839233,grad_norm: 0.9517234987371606, iteration: 65879
loss: 1.0535290241241455,grad_norm: 0.9999990822075119, iteration: 65880
loss: 1.0392943620681763,grad_norm: 0.9999997682053295, iteration: 65881
loss: 1.0106964111328125,grad_norm: 0.9999991098443745, iteration: 65882
loss: 1.0563091039657593,grad_norm: 0.9999993434137292, iteration: 65883
loss: 1.0383714437484741,grad_norm: 0.9076586112973416, iteration: 65884
loss: 1.0309351682662964,grad_norm: 0.8118697962873584, iteration: 65885
loss: 0.9739263653755188,grad_norm: 0.9796554441715222, iteration: 65886
loss: 0.9636102318763733,grad_norm: 0.8975816453527576, iteration: 65887
loss: 0.9994283318519592,grad_norm: 0.9999991604947353, iteration: 65888
loss: 0.994920015335083,grad_norm: 0.9999990844036425, iteration: 65889
loss: 0.9839993119239807,grad_norm: 0.9999990230187256, iteration: 65890
loss: 1.0227776765823364,grad_norm: 0.9999991928788, iteration: 65891
loss: 1.0118632316589355,grad_norm: 0.9999992678791111, iteration: 65892
loss: 1.0216096639633179,grad_norm: 0.9813010094817135, iteration: 65893
loss: 1.064185380935669,grad_norm: 0.9999991559908642, iteration: 65894
loss: 1.0133881568908691,grad_norm: 0.8535902467920573, iteration: 65895
loss: 1.0263333320617676,grad_norm: 0.9999990889852753, iteration: 65896
loss: 1.0387448072433472,grad_norm: 0.9999993907435628, iteration: 65897
loss: 1.036904215812683,grad_norm: 0.9293135808296359, iteration: 65898
loss: 1.00423264503479,grad_norm: 0.9999990275610575, iteration: 65899
loss: 0.9771546125411987,grad_norm: 0.9506083617899268, iteration: 65900
loss: 0.9944913983345032,grad_norm: 0.9999990652649479, iteration: 65901
loss: 0.9921345710754395,grad_norm: 0.9474626457856817, iteration: 65902
loss: 1.010692834854126,grad_norm: 0.9999991294905419, iteration: 65903
loss: 1.0152324438095093,grad_norm: 0.9629801696941225, iteration: 65904
loss: 1.0122473239898682,grad_norm: 0.9999990769783051, iteration: 65905
loss: 1.0132410526275635,grad_norm: 0.99999906021129, iteration: 65906
loss: 0.9919795989990234,grad_norm: 0.999999047509078, iteration: 65907
loss: 0.9545736312866211,grad_norm: 0.9747349546576534, iteration: 65908
loss: 1.021483063697815,grad_norm: 0.9999991234169855, iteration: 65909
loss: 0.9668301939964294,grad_norm: 0.9175791934532121, iteration: 65910
loss: 0.9785135388374329,grad_norm: 0.9999992093398974, iteration: 65911
loss: 1.0006017684936523,grad_norm: 0.9999994460124892, iteration: 65912
loss: 0.996609091758728,grad_norm: 0.9587032137820016, iteration: 65913
loss: 0.984673023223877,grad_norm: 0.9983489110265329, iteration: 65914
loss: 1.0115137100219727,grad_norm: 0.9999991446849918, iteration: 65915
loss: 1.0405720472335815,grad_norm: 0.9946354219676632, iteration: 65916
loss: 1.0030078887939453,grad_norm: 0.9999990020832157, iteration: 65917
loss: 0.9888818264007568,grad_norm: 0.9999991409136116, iteration: 65918
loss: 1.0085749626159668,grad_norm: 0.9999996099921722, iteration: 65919
loss: 0.972562313079834,grad_norm: 0.8635970059417729, iteration: 65920
loss: 1.0128248929977417,grad_norm: 0.9999991371162215, iteration: 65921
loss: 0.9528933167457581,grad_norm: 0.9999989718354642, iteration: 65922
loss: 1.0217527151107788,grad_norm: 0.9999992834067504, iteration: 65923
loss: 1.0210471153259277,grad_norm: 0.9999990941237863, iteration: 65924
loss: 1.065127968788147,grad_norm: 0.9999991740159995, iteration: 65925
loss: 1.0181711912155151,grad_norm: 0.999999160561823, iteration: 65926
loss: 0.9941172003746033,grad_norm: 0.9589025978071207, iteration: 65927
loss: 1.0312869548797607,grad_norm: 0.999999136036014, iteration: 65928
loss: 1.029597520828247,grad_norm: 0.9047903852700742, iteration: 65929
loss: 0.9823577404022217,grad_norm: 0.9999991140666815, iteration: 65930
loss: 0.961033284664154,grad_norm: 0.9999990235610033, iteration: 65931
loss: 1.0192255973815918,grad_norm: 0.9741616901388189, iteration: 65932
loss: 1.0119160413742065,grad_norm: 0.9983066120721292, iteration: 65933
loss: 1.032490849494934,grad_norm: 0.9999992041344484, iteration: 65934
loss: 0.960870087146759,grad_norm: 0.9841841047500465, iteration: 65935
loss: 1.0160421133041382,grad_norm: 0.9999990932874019, iteration: 65936
loss: 0.9562870264053345,grad_norm: 0.987630742113752, iteration: 65937
loss: 1.0583562850952148,grad_norm: 0.9999994929167263, iteration: 65938
loss: 0.9962493181228638,grad_norm: 0.9999992208391045, iteration: 65939
loss: 1.015760064125061,grad_norm: 0.9999992889406869, iteration: 65940
loss: 1.0000214576721191,grad_norm: 0.9999992212516204, iteration: 65941
loss: 0.999890148639679,grad_norm: 0.99999928121911, iteration: 65942
loss: 1.0123867988586426,grad_norm: 0.9500089125349379, iteration: 65943
loss: 1.022076964378357,grad_norm: 0.9843628267714256, iteration: 65944
loss: 0.9938595294952393,grad_norm: 0.9264621943906212, iteration: 65945
loss: 0.9943493008613586,grad_norm: 0.9999991858774496, iteration: 65946
loss: 0.9928281903266907,grad_norm: 0.9999992968235765, iteration: 65947
loss: 0.985014796257019,grad_norm: 0.9999992414498876, iteration: 65948
loss: 0.9795497059822083,grad_norm: 0.969663396120675, iteration: 65949
loss: 1.0011407136917114,grad_norm: 0.9999991184976792, iteration: 65950
loss: 0.9869290590286255,grad_norm: 0.999999269582787, iteration: 65951
loss: 1.0165140628814697,grad_norm: 0.9246371606873778, iteration: 65952
loss: 1.0278170108795166,grad_norm: 0.9912859685801598, iteration: 65953
loss: 0.9901203513145447,grad_norm: 0.9999992258399195, iteration: 65954
loss: 0.9957844614982605,grad_norm: 0.9239459929507311, iteration: 65955
loss: 1.0057015419006348,grad_norm: 0.9816063413819688, iteration: 65956
loss: 0.9956080317497253,grad_norm: 0.9999990896605407, iteration: 65957
loss: 0.9753469228744507,grad_norm: 0.902831326681203, iteration: 65958
loss: 0.9870291948318481,grad_norm: 0.9011738087163168, iteration: 65959
loss: 1.0343191623687744,grad_norm: 0.9454374648864325, iteration: 65960
loss: 0.9969802498817444,grad_norm: 0.9999992047327275, iteration: 65961
loss: 0.9896920323371887,grad_norm: 0.9999990237204651, iteration: 65962
loss: 1.033372163772583,grad_norm: 0.9999993817655718, iteration: 65963
loss: 0.9973042011260986,grad_norm: 0.9999991503609625, iteration: 65964
loss: 1.0030220746994019,grad_norm: 0.9149402176242559, iteration: 65965
loss: 0.9688202142715454,grad_norm: 0.9999990551152166, iteration: 65966
loss: 1.015382170677185,grad_norm: 0.999999013632621, iteration: 65967
loss: 0.9895265698432922,grad_norm: 0.9999991632019353, iteration: 65968
loss: 0.9792740345001221,grad_norm: 0.9999991055708131, iteration: 65969
loss: 1.0321158170700073,grad_norm: 0.9999991033131308, iteration: 65970
loss: 0.999946653842926,grad_norm: 0.9609146356994692, iteration: 65971
loss: 1.0223811864852905,grad_norm: 0.9999990351985889, iteration: 65972
loss: 0.9794637560844421,grad_norm: 0.9825356882719909, iteration: 65973
loss: 1.010908842086792,grad_norm: 0.9999991403250197, iteration: 65974
loss: 0.9822829365730286,grad_norm: 0.9999991249375746, iteration: 65975
loss: 0.9975754618644714,grad_norm: 0.9999991264624649, iteration: 65976
loss: 1.014456868171692,grad_norm: 0.9999991296458051, iteration: 65977
loss: 1.0111758708953857,grad_norm: 0.9999997715077675, iteration: 65978
loss: 1.0066603422164917,grad_norm: 0.999999081215583, iteration: 65979
loss: 1.0080547332763672,grad_norm: 0.9458836019496665, iteration: 65980
loss: 1.0092954635620117,grad_norm: 0.8334611026795258, iteration: 65981
loss: 1.0293333530426025,grad_norm: 0.937556033112058, iteration: 65982
loss: 1.0044175386428833,grad_norm: 0.9999990230651786, iteration: 65983
loss: 1.0206130743026733,grad_norm: 0.9999992118697858, iteration: 65984
loss: 1.0129843950271606,grad_norm: 0.9999990513554391, iteration: 65985
loss: 1.000809669494629,grad_norm: 0.9476264175612451, iteration: 65986
loss: 1.0027579069137573,grad_norm: 0.9999992247723211, iteration: 65987
loss: 1.0159145593643188,grad_norm: 0.9999995836796829, iteration: 65988
loss: 0.9746312499046326,grad_norm: 0.9629991679624166, iteration: 65989
loss: 0.9967945218086243,grad_norm: 0.9108244261173823, iteration: 65990
loss: 0.9695580005645752,grad_norm: 0.9864699432083072, iteration: 65991
loss: 0.9518105387687683,grad_norm: 0.9586287258471179, iteration: 65992
loss: 1.0120201110839844,grad_norm: 0.9999999773682797, iteration: 65993
loss: 1.0105252265930176,grad_norm: 0.9703204455459494, iteration: 65994
loss: 1.0157872438430786,grad_norm: 0.9999991930869256, iteration: 65995
loss: 1.0405293703079224,grad_norm: 0.9999991601119967, iteration: 65996
loss: 0.9729995727539062,grad_norm: 0.9640139211198235, iteration: 65997
loss: 1.0086488723754883,grad_norm: 0.9999991403588085, iteration: 65998
loss: 0.9709041118621826,grad_norm: 0.9999991370872562, iteration: 65999
loss: 1.0000280141830444,grad_norm: 0.9999989980754727, iteration: 66000
loss: 0.9578405022621155,grad_norm: 0.999999227744399, iteration: 66001
loss: 1.0132030248641968,grad_norm: 0.9547845740139912, iteration: 66002
loss: 1.0018550157546997,grad_norm: 0.9999991298204334, iteration: 66003
loss: 1.0071645975112915,grad_norm: 0.9999989422528582, iteration: 66004
loss: 0.9955298900604248,grad_norm: 0.9999990605664663, iteration: 66005
loss: 1.1002378463745117,grad_norm: 0.9999999736371896, iteration: 66006
loss: 1.0130977630615234,grad_norm: 0.9999992534393968, iteration: 66007
loss: 1.0677082538604736,grad_norm: 0.9999996884498582, iteration: 66008
loss: 0.9923432469367981,grad_norm: 0.999999523692037, iteration: 66009
loss: 1.0111885070800781,grad_norm: 0.9999999405847548, iteration: 66010
loss: 0.9983901977539062,grad_norm: 0.9999991513384442, iteration: 66011
loss: 1.0247795581817627,grad_norm: 0.999999056529559, iteration: 66012
loss: 1.0149182081222534,grad_norm: 0.9999992082624661, iteration: 66013
loss: 1.0040448904037476,grad_norm: 0.9999991959702629, iteration: 66014
loss: 0.9923482537269592,grad_norm: 0.999999187604456, iteration: 66015
loss: 1.0360809564590454,grad_norm: 0.9999990612077567, iteration: 66016
loss: 1.008023738861084,grad_norm: 0.9999991847015512, iteration: 66017
loss: 1.0177634954452515,grad_norm: 0.9999991739324979, iteration: 66018
loss: 0.9771788120269775,grad_norm: 0.9999992709428369, iteration: 66019
loss: 1.0084322690963745,grad_norm: 0.8718178522360728, iteration: 66020
loss: 1.010965347290039,grad_norm: 0.9999991349704596, iteration: 66021
loss: 1.0161411762237549,grad_norm: 0.8741126526312631, iteration: 66022
loss: 1.0046168565750122,grad_norm: 0.9999992927306058, iteration: 66023
loss: 1.0019768476486206,grad_norm: 0.9999991336214417, iteration: 66024
loss: 1.0289965867996216,grad_norm: 0.9999998568534613, iteration: 66025
loss: 1.0319981575012207,grad_norm: 0.9999991325590645, iteration: 66026
loss: 1.0195032358169556,grad_norm: 0.9605844200606476, iteration: 66027
loss: 1.004799723625183,grad_norm: 0.9704348075005762, iteration: 66028
loss: 1.0299934148788452,grad_norm: 0.9999991796547667, iteration: 66029
loss: 0.9897716641426086,grad_norm: 0.9999993316449187, iteration: 66030
loss: 1.0007892847061157,grad_norm: 0.9999991120950744, iteration: 66031
loss: 1.0131161212921143,grad_norm: 0.9999990805317975, iteration: 66032
loss: 0.9939427375793457,grad_norm: 0.9999991378203049, iteration: 66033
loss: 1.032932996749878,grad_norm: 0.999999537684052, iteration: 66034
loss: 0.9832763075828552,grad_norm: 0.9999990256959584, iteration: 66035
loss: 0.9907599091529846,grad_norm: 0.9999994432526119, iteration: 66036
loss: 1.0106635093688965,grad_norm: 0.9144536716814471, iteration: 66037
loss: 1.0643413066864014,grad_norm: 0.8670660864529478, iteration: 66038
loss: 1.0459327697753906,grad_norm: 0.9999994942717091, iteration: 66039
loss: 0.9749447703361511,grad_norm: 0.9999995013954885, iteration: 66040
loss: 0.9883897304534912,grad_norm: 0.9999990783684028, iteration: 66041
loss: 0.997273325920105,grad_norm: 0.9999990159173332, iteration: 66042
loss: 1.0537570714950562,grad_norm: 0.9999994504133126, iteration: 66043
loss: 1.0210827589035034,grad_norm: 0.9999994304106096, iteration: 66044
loss: 1.0037732124328613,grad_norm: 0.9516925800886901, iteration: 66045
loss: 1.001577615737915,grad_norm: 0.9999993258461047, iteration: 66046
loss: 1.0033661127090454,grad_norm: 0.9999992002746786, iteration: 66047
loss: 1.0151528120040894,grad_norm: 0.9999990325060674, iteration: 66048
loss: 0.9724138379096985,grad_norm: 0.8473795563012111, iteration: 66049
loss: 1.0239602327346802,grad_norm: 0.9999990107905911, iteration: 66050
loss: 0.9857531785964966,grad_norm: 0.9999990901827019, iteration: 66051
loss: 1.0131534337997437,grad_norm: 0.998388528524928, iteration: 66052
loss: 0.9617449045181274,grad_norm: 0.898318338579621, iteration: 66053
loss: 0.965848445892334,grad_norm: 0.999999246237258, iteration: 66054
loss: 1.0384161472320557,grad_norm: 0.9999991706985852, iteration: 66055
loss: 0.9812064170837402,grad_norm: 0.9999990937798743, iteration: 66056
loss: 1.002541184425354,grad_norm: 0.9999990115119224, iteration: 66057
loss: 1.1576473712921143,grad_norm: 0.999999396661807, iteration: 66058
loss: 1.019756555557251,grad_norm: 0.9578092121078773, iteration: 66059
loss: 1.0063738822937012,grad_norm: 0.9999992810681869, iteration: 66060
loss: 0.9878978729248047,grad_norm: 0.9999992042618815, iteration: 66061
loss: 0.9797171354293823,grad_norm: 0.9999991444574257, iteration: 66062
loss: 1.0018885135650635,grad_norm: 0.8557179918976106, iteration: 66063
loss: 0.9960623383522034,grad_norm: 0.9999990156526458, iteration: 66064
loss: 1.0109641551971436,grad_norm: 0.9999991434404095, iteration: 66065
loss: 0.9863422513008118,grad_norm: 0.881832032065141, iteration: 66066
loss: 1.020185112953186,grad_norm: 0.9118271162484972, iteration: 66067
loss: 0.9575440883636475,grad_norm: 0.9780836537258871, iteration: 66068
loss: 1.013662576675415,grad_norm: 0.9999993297858069, iteration: 66069
loss: 0.9877366423606873,grad_norm: 0.9987114677675416, iteration: 66070
loss: 1.0200762748718262,grad_norm: 0.9999994974713113, iteration: 66071
loss: 1.0241224765777588,grad_norm: 0.9999991349218335, iteration: 66072
loss: 1.001700758934021,grad_norm: 0.9999990376015552, iteration: 66073
loss: 0.9897858500480652,grad_norm: 0.9605877336386184, iteration: 66074
loss: 1.0012820959091187,grad_norm: 0.9999991727983475, iteration: 66075
loss: 0.9738315343856812,grad_norm: 0.9999990389867123, iteration: 66076
loss: 1.0216481685638428,grad_norm: 0.8922739236907085, iteration: 66077
loss: 1.0124589204788208,grad_norm: 0.9229344260690511, iteration: 66078
loss: 1.0419596433639526,grad_norm: 0.9999992682413026, iteration: 66079
loss: 0.9777858257293701,grad_norm: 0.999999044442947, iteration: 66080
loss: 1.033211350440979,grad_norm: 0.999999025414739, iteration: 66081
loss: 1.0123814344406128,grad_norm: 0.9073644726338103, iteration: 66082
loss: 0.9900602102279663,grad_norm: 0.9999991321613695, iteration: 66083
loss: 0.999656081199646,grad_norm: 0.9999998702346925, iteration: 66084
loss: 1.003618597984314,grad_norm: 0.9999992018582976, iteration: 66085
loss: 1.0088533163070679,grad_norm: 0.9999995544055696, iteration: 66086
loss: 1.028102993965149,grad_norm: 0.999999557625599, iteration: 66087
loss: 1.0724226236343384,grad_norm: 0.999999799094302, iteration: 66088
loss: 1.0176007747650146,grad_norm: 0.9999991516005236, iteration: 66089
loss: 0.9996967315673828,grad_norm: 0.9999992811067343, iteration: 66090
loss: 0.9895423650741577,grad_norm: 0.9999990119056738, iteration: 66091
loss: 1.0054844617843628,grad_norm: 0.9806330874230067, iteration: 66092
loss: 0.9985513091087341,grad_norm: 0.9999992008061043, iteration: 66093
loss: 0.9919442534446716,grad_norm: 0.9393842372170772, iteration: 66094
loss: 1.0199990272521973,grad_norm: 0.8993946113698233, iteration: 66095
loss: 1.1425442695617676,grad_norm: 0.9999999129060794, iteration: 66096
loss: 0.9995376467704773,grad_norm: 0.9999989803870699, iteration: 66097
loss: 1.0296509265899658,grad_norm: 0.9999992087966273, iteration: 66098
loss: 0.9775433540344238,grad_norm: 0.9999990391507074, iteration: 66099
loss: 1.0480259656906128,grad_norm: 0.9405120181905336, iteration: 66100
loss: 0.9943956136703491,grad_norm: 0.9956563463344967, iteration: 66101
loss: 0.9934878349304199,grad_norm: 0.9999990287458348, iteration: 66102
loss: 0.9852518439292908,grad_norm: 0.9999994028882956, iteration: 66103
loss: 1.0193824768066406,grad_norm: 0.9999990716046824, iteration: 66104
loss: 1.0104870796203613,grad_norm: 0.9999991378096598, iteration: 66105
loss: 0.962514340877533,grad_norm: 0.9999991485980488, iteration: 66106
loss: 0.9924178123474121,grad_norm: 0.9999992098648859, iteration: 66107
loss: 1.0219281911849976,grad_norm: 0.962551541122017, iteration: 66108
loss: 1.0187560319900513,grad_norm: 0.9999991200806249, iteration: 66109
loss: 1.013038992881775,grad_norm: 0.9886473524243045, iteration: 66110
loss: 1.0015945434570312,grad_norm: 0.9999992345616285, iteration: 66111
loss: 0.9962010979652405,grad_norm: 0.9999990939694305, iteration: 66112
loss: 1.0162789821624756,grad_norm: 0.9999990880835803, iteration: 66113
loss: 1.0048952102661133,grad_norm: 0.9999989562225045, iteration: 66114
loss: 1.0005648136138916,grad_norm: 0.999999913340711, iteration: 66115
loss: 1.0027379989624023,grad_norm: 0.9999992476190486, iteration: 66116
loss: 1.0515167713165283,grad_norm: 0.9999997786080771, iteration: 66117
loss: 1.0921615362167358,grad_norm: 0.9999993974910393, iteration: 66118
loss: 1.1664948463439941,grad_norm: 0.9999994434258854, iteration: 66119
loss: 1.0011109113693237,grad_norm: 0.999999615025027, iteration: 66120
loss: 0.9817008376121521,grad_norm: 0.9999990792139662, iteration: 66121
loss: 1.0235549211502075,grad_norm: 0.9999990401949476, iteration: 66122
loss: 0.9908079504966736,grad_norm: 0.8402593331802315, iteration: 66123
loss: 1.0174728631973267,grad_norm: 0.9999991501405604, iteration: 66124
loss: 1.009825587272644,grad_norm: 0.9999996893586787, iteration: 66125
loss: 1.0542349815368652,grad_norm: 0.9847990238319172, iteration: 66126
loss: 0.982058048248291,grad_norm: 0.9999991673583528, iteration: 66127
loss: 0.9944005012512207,grad_norm: 0.8636091830183825, iteration: 66128
loss: 0.984408438205719,grad_norm: 0.9999990210079964, iteration: 66129
loss: 1.0178672075271606,grad_norm: 0.9999989696092617, iteration: 66130
loss: 1.0223402976989746,grad_norm: 0.9999998708829408, iteration: 66131
loss: 1.0173145532608032,grad_norm: 0.9999996195030697, iteration: 66132
loss: 1.0313518047332764,grad_norm: 0.9999992813119405, iteration: 66133
loss: 1.0163750648498535,grad_norm: 0.9999990619205475, iteration: 66134
loss: 0.9491841197013855,grad_norm: 0.999999178966383, iteration: 66135
loss: 1.005828619003296,grad_norm: 0.996642114827293, iteration: 66136
loss: 1.0142048597335815,grad_norm: 0.9999990796144914, iteration: 66137
loss: 0.9912790060043335,grad_norm: 0.9999991558023767, iteration: 66138
loss: 1.0046199560165405,grad_norm: 0.9573132236019463, iteration: 66139
loss: 0.9754322171211243,grad_norm: 0.999998973186763, iteration: 66140
loss: 0.9874100685119629,grad_norm: 0.9999990650220809, iteration: 66141
loss: 0.9920470118522644,grad_norm: 0.9002502494503866, iteration: 66142
loss: 1.0233888626098633,grad_norm: 0.9999994395763423, iteration: 66143
loss: 1.0218229293823242,grad_norm: 0.9999995285287263, iteration: 66144
loss: 1.4351445436477661,grad_norm: 0.9999999212464888, iteration: 66145
loss: 0.9837686419487,grad_norm: 0.9999992524430387, iteration: 66146
loss: 0.9839966893196106,grad_norm: 0.9999989981944762, iteration: 66147
loss: 1.1663780212402344,grad_norm: 0.99999992513977, iteration: 66148
loss: 1.0199941396713257,grad_norm: 0.9999992460658439, iteration: 66149
loss: 0.9851064085960388,grad_norm: 0.9999990774733672, iteration: 66150
loss: 1.0627408027648926,grad_norm: 0.9999995272473641, iteration: 66151
loss: 1.0214163064956665,grad_norm: 0.8677735149728488, iteration: 66152
loss: 1.0642004013061523,grad_norm: 0.9999998054295863, iteration: 66153
loss: 1.0906617641448975,grad_norm: 0.9999995026082921, iteration: 66154
loss: 0.9676874876022339,grad_norm: 0.9999994163138646, iteration: 66155
loss: 1.0249946117401123,grad_norm: 0.9999995432034289, iteration: 66156
loss: 0.9850354790687561,grad_norm: 0.9999990047337891, iteration: 66157
loss: 0.9948611855506897,grad_norm: 0.9999991478717499, iteration: 66158
loss: 1.0089055299758911,grad_norm: 0.9999989359510015, iteration: 66159
loss: 1.0385655164718628,grad_norm: 0.9999991285734742, iteration: 66160
loss: 1.0120093822479248,grad_norm: 0.9999996368026313, iteration: 66161
loss: 0.9934163689613342,grad_norm: 0.9999992481329535, iteration: 66162
loss: 1.0119653940200806,grad_norm: 0.9999993180901486, iteration: 66163
loss: 0.9859498143196106,grad_norm: 0.9999991867477561, iteration: 66164
loss: 1.0070260763168335,grad_norm: 0.9999991676184737, iteration: 66165
loss: 1.0030750036239624,grad_norm: 0.8439942620338691, iteration: 66166
loss: 1.0105193853378296,grad_norm: 0.9999993278072237, iteration: 66167
loss: 1.0082757472991943,grad_norm: 0.9999992390656256, iteration: 66168
loss: 1.0237361192703247,grad_norm: 0.9999993277090541, iteration: 66169
loss: 0.9881870746612549,grad_norm: 0.9844358095633012, iteration: 66170
loss: 1.027596116065979,grad_norm: 0.9052260540448666, iteration: 66171
loss: 1.0100380182266235,grad_norm: 0.9999995627828638, iteration: 66172
loss: 0.9927249550819397,grad_norm: 0.9999993918324517, iteration: 66173
loss: 1.0375875234603882,grad_norm: 0.9999991771068, iteration: 66174
loss: 1.0474424362182617,grad_norm: 0.9999996170594752, iteration: 66175
loss: 0.9942595958709717,grad_norm: 0.9999991152615584, iteration: 66176
loss: 1.0086023807525635,grad_norm: 0.9999992211049449, iteration: 66177
loss: 1.0030858516693115,grad_norm: 0.9999991749519777, iteration: 66178
loss: 0.9833551645278931,grad_norm: 0.9999989635961007, iteration: 66179
loss: 0.9965192079544067,grad_norm: 0.9402722890547347, iteration: 66180
loss: 1.004128098487854,grad_norm: 0.9999994597344384, iteration: 66181
loss: 1.016755223274231,grad_norm: 0.9999991161388317, iteration: 66182
loss: 0.9821422696113586,grad_norm: 0.9999989899367902, iteration: 66183
loss: 1.04068124294281,grad_norm: 0.9999993295307399, iteration: 66184
loss: 1.0149177312850952,grad_norm: 0.9999996757496726, iteration: 66185
loss: 1.003356695175171,grad_norm: 0.9286254774487634, iteration: 66186
loss: 0.9904431700706482,grad_norm: 0.9999991492215731, iteration: 66187
loss: 0.9834929704666138,grad_norm: 0.9999991184375013, iteration: 66188
loss: 1.0168906450271606,grad_norm: 0.9999990944917924, iteration: 66189
loss: 1.0079927444458008,grad_norm: 0.9999991176535625, iteration: 66190
loss: 1.0627388954162598,grad_norm: 0.9999991804066586, iteration: 66191
loss: 0.9922176599502563,grad_norm: 0.9999991171919796, iteration: 66192
loss: 1.0089740753173828,grad_norm: 0.9999991752829868, iteration: 66193
loss: 1.02290940284729,grad_norm: 0.9999992470754546, iteration: 66194
loss: 0.9943156838417053,grad_norm: 0.9999996082477987, iteration: 66195
loss: 1.0148805379867554,grad_norm: 0.999999223823559, iteration: 66196
loss: 0.9927653670310974,grad_norm: 0.9452440179412555, iteration: 66197
loss: 1.012195348739624,grad_norm: 0.999999445415485, iteration: 66198
loss: 0.9989590048789978,grad_norm: 0.9999989894005712, iteration: 66199
loss: 0.9878818988800049,grad_norm: 0.9999994714679972, iteration: 66200
loss: 0.954504668712616,grad_norm: 0.9999990954016185, iteration: 66201
loss: 0.9940522909164429,grad_norm: 0.9999992219196329, iteration: 66202
loss: 1.0130800008773804,grad_norm: 0.9999991947944402, iteration: 66203
loss: 1.0861701965332031,grad_norm: 0.9999993393662865, iteration: 66204
loss: 1.0484594106674194,grad_norm: 0.9999990627935063, iteration: 66205
loss: 0.9883531928062439,grad_norm: 0.9927230981107541, iteration: 66206
loss: 1.015196442604065,grad_norm: 0.9999990474571544, iteration: 66207
loss: 1.0005624294281006,grad_norm: 0.9999991602278328, iteration: 66208
loss: 0.9706714153289795,grad_norm: 0.9999994801307154, iteration: 66209
loss: 1.036439299583435,grad_norm: 0.999999310269519, iteration: 66210
loss: 0.9856950640678406,grad_norm: 0.99999931625117, iteration: 66211
loss: 0.9903277158737183,grad_norm: 0.9999990942519443, iteration: 66212
loss: 1.0087718963623047,grad_norm: 0.9999989764573325, iteration: 66213
loss: 0.9990620017051697,grad_norm: 0.9999989940572271, iteration: 66214
loss: 1.0059598684310913,grad_norm: 0.8742773155035785, iteration: 66215
loss: 0.9670527577400208,grad_norm: 0.9999991219969114, iteration: 66216
loss: 1.0165454149246216,grad_norm: 0.952934641606494, iteration: 66217
loss: 0.9963191747665405,grad_norm: 0.9999992479454538, iteration: 66218
loss: 0.9710605144500732,grad_norm: 0.9999992337270158, iteration: 66219
loss: 1.0337733030319214,grad_norm: 0.9999994533610218, iteration: 66220
loss: 1.0554590225219727,grad_norm: 0.9999995219880463, iteration: 66221
loss: 1.013052225112915,grad_norm: 0.9999992221476109, iteration: 66222
loss: 0.9592882990837097,grad_norm: 0.9968699998243138, iteration: 66223
loss: 1.1089284420013428,grad_norm: 0.9999990481518924, iteration: 66224
loss: 1.0135287046432495,grad_norm: 0.9999991959476041, iteration: 66225
loss: 1.0154367685317993,grad_norm: 0.9999997515744998, iteration: 66226
loss: 0.9817779064178467,grad_norm: 0.9999990769670625, iteration: 66227
loss: 1.0002379417419434,grad_norm: 0.8298167524344708, iteration: 66228
loss: 1.0155091285705566,grad_norm: 0.9999991563464822, iteration: 66229
loss: 0.9884302616119385,grad_norm: 0.9999990929381771, iteration: 66230
loss: 1.0072596073150635,grad_norm: 0.9999991886226546, iteration: 66231
loss: 1.0471628904342651,grad_norm: 0.9999992107005388, iteration: 66232
loss: 0.9789478778839111,grad_norm: 0.9643938863556194, iteration: 66233
loss: 1.0579670667648315,grad_norm: 0.9999998392409165, iteration: 66234
loss: 1.0004968643188477,grad_norm: 0.9999991772022988, iteration: 66235
loss: 0.9832478165626526,grad_norm: 0.9999992452678645, iteration: 66236
loss: 0.999656081199646,grad_norm: 0.8978006738235905, iteration: 66237
loss: 1.0247503519058228,grad_norm: 0.9999998069697534, iteration: 66238
loss: 0.9715390205383301,grad_norm: 0.9999991525401338, iteration: 66239
loss: 0.9674472808837891,grad_norm: 0.8235738885210487, iteration: 66240
loss: 1.0330510139465332,grad_norm: 0.9999994454284706, iteration: 66241
loss: 0.9840822815895081,grad_norm: 0.9999991518249278, iteration: 66242
loss: 0.9829126000404358,grad_norm: 0.9333903235521612, iteration: 66243
loss: 1.005774974822998,grad_norm: 0.9172762239179191, iteration: 66244
loss: 1.015407681465149,grad_norm: 0.9950544427386305, iteration: 66245
loss: 0.9913948178291321,grad_norm: 0.900389518133143, iteration: 66246
loss: 1.040540337562561,grad_norm: 0.9999994276470475, iteration: 66247
loss: 0.9753779768943787,grad_norm: 0.9999990201176121, iteration: 66248
loss: 0.9917137026786804,grad_norm: 0.9333796966247773, iteration: 66249
loss: 0.9741811752319336,grad_norm: 0.9771326546301814, iteration: 66250
loss: 1.012787103652954,grad_norm: 0.9999990293975031, iteration: 66251
loss: 0.9820803999900818,grad_norm: 0.9999992365727726, iteration: 66252
loss: 0.9687332510948181,grad_norm: 0.9999990717032223, iteration: 66253
loss: 0.9901112914085388,grad_norm: 0.8787196443103338, iteration: 66254
loss: 1.0040448904037476,grad_norm: 0.9802152381893786, iteration: 66255
loss: 0.9569088220596313,grad_norm: 0.9999990260229711, iteration: 66256
loss: 1.002993106842041,grad_norm: 0.9999992665125361, iteration: 66257
loss: 0.9966283440589905,grad_norm: 0.9999991186252029, iteration: 66258
loss: 1.009922981262207,grad_norm: 0.9999991052914396, iteration: 66259
loss: 1.0005806684494019,grad_norm: 0.818038221392126, iteration: 66260
loss: 1.0046312808990479,grad_norm: 0.9999994834765765, iteration: 66261
loss: 0.962191104888916,grad_norm: 0.999998961702224, iteration: 66262
loss: 1.02724289894104,grad_norm: 0.9999992240237653, iteration: 66263
loss: 1.0381195545196533,grad_norm: 0.999999171659785, iteration: 66264
loss: 1.0077875852584839,grad_norm: 0.9999990778401678, iteration: 66265
loss: 0.9924845099449158,grad_norm: 0.966489360515658, iteration: 66266
loss: 1.021662950515747,grad_norm: 0.9738366275475268, iteration: 66267
loss: 1.0118365287780762,grad_norm: 0.9999992577214571, iteration: 66268
loss: 0.9762424826622009,grad_norm: 0.9999992548631796, iteration: 66269
loss: 0.9952954053878784,grad_norm: 0.999999124024296, iteration: 66270
loss: 1.004321575164795,grad_norm: 0.999999096099601, iteration: 66271
loss: 1.001427412033081,grad_norm: 0.9999991567244411, iteration: 66272
loss: 0.9886478185653687,grad_norm: 0.9999991747132451, iteration: 66273
loss: 0.9891477227210999,grad_norm: 0.9999991161237604, iteration: 66274
loss: 1.0332868099212646,grad_norm: 0.9999990954611383, iteration: 66275
loss: 0.9838724136352539,grad_norm: 0.9123730991717696, iteration: 66276
loss: 0.981724739074707,grad_norm: 0.9999992520128327, iteration: 66277
loss: 1.0019896030426025,grad_norm: 0.9999990129745141, iteration: 66278
loss: 0.9827011823654175,grad_norm: 0.9999991994248684, iteration: 66279
loss: 0.9938057661056519,grad_norm: 0.9999992625107559, iteration: 66280
loss: 1.0342365503311157,grad_norm: 0.9999994669518458, iteration: 66281
loss: 1.0376887321472168,grad_norm: 0.9999991399136441, iteration: 66282
loss: 1.0025370121002197,grad_norm: 0.9668215492572141, iteration: 66283
loss: 0.9778900146484375,grad_norm: 0.9999992474205809, iteration: 66284
loss: 1.0016242265701294,grad_norm: 0.9999991386640693, iteration: 66285
loss: 1.0141316652297974,grad_norm: 0.9999991749623194, iteration: 66286
loss: 1.0006016492843628,grad_norm: 0.8867867234689005, iteration: 66287
loss: 1.0045520067214966,grad_norm: 0.9999990595735168, iteration: 66288
loss: 1.0167466402053833,grad_norm: 0.9999992374485955, iteration: 66289
loss: 1.0023969411849976,grad_norm: 0.9999990401066094, iteration: 66290
loss: 0.991856038570404,grad_norm: 0.9999993005184902, iteration: 66291
loss: 0.9905616641044617,grad_norm: 0.9999991940788173, iteration: 66292
loss: 1.0225555896759033,grad_norm: 0.9722695100339435, iteration: 66293
loss: 0.9654106497764587,grad_norm: 0.9033977820090433, iteration: 66294
loss: 0.9840531349182129,grad_norm: 0.9999990533656806, iteration: 66295
loss: 0.9965028762817383,grad_norm: 0.9999992373614307, iteration: 66296
loss: 0.9944546818733215,grad_norm: 0.9999990826162908, iteration: 66297
loss: 0.9853654503822327,grad_norm: 0.9999991411981416, iteration: 66298
loss: 1.0194847583770752,grad_norm: 0.9640452340452231, iteration: 66299
loss: 0.9852795600891113,grad_norm: 0.9999993050638922, iteration: 66300
loss: 1.0013759136199951,grad_norm: 0.9999990249979434, iteration: 66301
loss: 1.052405834197998,grad_norm: 0.9999990099752787, iteration: 66302
loss: 1.018104076385498,grad_norm: 0.9848483996964504, iteration: 66303
loss: 0.9725139141082764,grad_norm: 0.9999992855828748, iteration: 66304
loss: 1.0225058794021606,grad_norm: 0.9999992094887653, iteration: 66305
loss: 0.9645195603370667,grad_norm: 0.9825781535237131, iteration: 66306
loss: 0.9383568167686462,grad_norm: 0.9999992359604503, iteration: 66307
loss: 0.997460126876831,grad_norm: 0.9999990515051664, iteration: 66308
loss: 1.0079140663146973,grad_norm: 0.9999990548632582, iteration: 66309
loss: 0.9875975847244263,grad_norm: 0.9999992781664747, iteration: 66310
loss: 0.9965451955795288,grad_norm: 0.9999991242610977, iteration: 66311
loss: 0.9848641157150269,grad_norm: 0.7888128532609303, iteration: 66312
loss: 0.9890825748443604,grad_norm: 0.9999991031128626, iteration: 66313
loss: 1.0022920370101929,grad_norm: 0.926966310577892, iteration: 66314
loss: 0.9808480143547058,grad_norm: 0.9999990768397392, iteration: 66315
loss: 0.9762760996818542,grad_norm: 0.9096120153153396, iteration: 66316
loss: 0.9867449998855591,grad_norm: 0.9601661473040703, iteration: 66317
loss: 1.037824273109436,grad_norm: 0.9999991460374053, iteration: 66318
loss: 1.0115933418273926,grad_norm: 0.9999990809393831, iteration: 66319
loss: 0.9900888204574585,grad_norm: 0.9999991459450068, iteration: 66320
loss: 0.9471640586853027,grad_norm: 0.9999990857553146, iteration: 66321
loss: 1.0219130516052246,grad_norm: 0.9999993037278613, iteration: 66322
loss: 1.0126619338989258,grad_norm: 0.9999992443492762, iteration: 66323
loss: 0.9818323254585266,grad_norm: 0.9211911936394596, iteration: 66324
loss: 1.0318559408187866,grad_norm: 0.9219263851188247, iteration: 66325
loss: 0.9984697103500366,grad_norm: 0.9865487016684125, iteration: 66326
loss: 1.0428706407546997,grad_norm: 0.9999992017859566, iteration: 66327
loss: 0.9773674607276917,grad_norm: 0.9999991776928608, iteration: 66328
loss: 1.0285571813583374,grad_norm: 0.9999991302530248, iteration: 66329
loss: 0.9752341508865356,grad_norm: 0.9189081408379668, iteration: 66330
loss: 0.9483656883239746,grad_norm: 0.8800721755168459, iteration: 66331
loss: 1.0511835813522339,grad_norm: 0.9336651280551234, iteration: 66332
loss: 0.9875814318656921,grad_norm: 0.9999989312072046, iteration: 66333
loss: 1.0141539573669434,grad_norm: 0.9999998206599743, iteration: 66334
loss: 1.0267657041549683,grad_norm: 0.9999990612379033, iteration: 66335
loss: 1.030898928642273,grad_norm: 0.9999989192804021, iteration: 66336
loss: 1.0051850080490112,grad_norm: 0.9999992321516292, iteration: 66337
loss: 1.0143367052078247,grad_norm: 0.9999991351578333, iteration: 66338
loss: 0.9670768976211548,grad_norm: 0.9999991622019934, iteration: 66339
loss: 0.9934829473495483,grad_norm: 0.9999989946712222, iteration: 66340
loss: 1.0000380277633667,grad_norm: 0.9999991573843702, iteration: 66341
loss: 0.9801952838897705,grad_norm: 0.9999991202162156, iteration: 66342
loss: 1.079210638999939,grad_norm: 0.9999993070794708, iteration: 66343
loss: 1.0229740142822266,grad_norm: 0.9999990355030488, iteration: 66344
loss: 1.0258883237838745,grad_norm: 0.9999990872507866, iteration: 66345
loss: 1.0111554861068726,grad_norm: 0.8244509793703859, iteration: 66346
loss: 1.0037599802017212,grad_norm: 0.9052362282081555, iteration: 66347
loss: 1.0075029134750366,grad_norm: 0.9999993278590653, iteration: 66348
loss: 1.0262136459350586,grad_norm: 0.9431447138376969, iteration: 66349
loss: 0.9751735925674438,grad_norm: 0.9999990777547603, iteration: 66350
loss: 0.9886001944541931,grad_norm: 0.9999991776294507, iteration: 66351
loss: 0.9835342168807983,grad_norm: 0.9047693106858874, iteration: 66352
loss: 1.0140739679336548,grad_norm: 0.9999992407559265, iteration: 66353
loss: 1.0488735437393188,grad_norm: 0.9999990877710464, iteration: 66354
loss: 0.9275203347206116,grad_norm: 0.9999992244998244, iteration: 66355
loss: 1.0050891637802124,grad_norm: 0.9999992144819639, iteration: 66356
loss: 0.9804563522338867,grad_norm: 0.9700676347528802, iteration: 66357
loss: 0.9958083033561707,grad_norm: 0.7982385889747943, iteration: 66358
loss: 0.9920294284820557,grad_norm: 0.9999989058491953, iteration: 66359
loss: 1.000338077545166,grad_norm: 0.9999989634577832, iteration: 66360
loss: 0.9926878213882446,grad_norm: 0.9639138071248381, iteration: 66361
loss: 0.9629771113395691,grad_norm: 0.9999991154820026, iteration: 66362
loss: 1.0072883367538452,grad_norm: 0.9919593799192908, iteration: 66363
loss: 1.0247704982757568,grad_norm: 0.9999990817943456, iteration: 66364
loss: 0.9740813970565796,grad_norm: 0.9999990578453242, iteration: 66365
loss: 0.9706308245658875,grad_norm: 0.9451827958955997, iteration: 66366
loss: 0.9994438290596008,grad_norm: 0.8883012780899531, iteration: 66367
loss: 1.0020562410354614,grad_norm: 0.9059056738434372, iteration: 66368
loss: 0.9859768748283386,grad_norm: 0.9946821845995389, iteration: 66369
loss: 1.0029596090316772,grad_norm: 0.9999991195345339, iteration: 66370
loss: 0.9887938499450684,grad_norm: 0.9553527638286877, iteration: 66371
loss: 1.010233998298645,grad_norm: 0.9951001118862026, iteration: 66372
loss: 1.001573085784912,grad_norm: 0.9999991152277642, iteration: 66373
loss: 1.0052149295806885,grad_norm: 0.9999990338671159, iteration: 66374
loss: 1.0065970420837402,grad_norm: 0.8237992336662967, iteration: 66375
loss: 0.9918183088302612,grad_norm: 0.9880951340576346, iteration: 66376
loss: 0.9891563057899475,grad_norm: 0.9999991451629933, iteration: 66377
loss: 0.9869318604469299,grad_norm: 0.8523752784990674, iteration: 66378
loss: 1.0074959993362427,grad_norm: 0.9730811563480507, iteration: 66379
loss: 0.9686738848686218,grad_norm: 0.9999991160101516, iteration: 66380
loss: 1.0201478004455566,grad_norm: 0.9999991601110102, iteration: 66381
loss: 0.989801824092865,grad_norm: 0.8161419801998903, iteration: 66382
loss: 1.0320924520492554,grad_norm: 0.9571265419339577, iteration: 66383
loss: 0.988168478012085,grad_norm: 0.9824785649340003, iteration: 66384
loss: 0.9794260263442993,grad_norm: 0.9552662254485652, iteration: 66385
loss: 1.0077288150787354,grad_norm: 0.9999993437290169, iteration: 66386
loss: 1.0070877075195312,grad_norm: 0.9999994320152787, iteration: 66387
loss: 0.9987396001815796,grad_norm: 0.9999991443173978, iteration: 66388
loss: 0.9613434076309204,grad_norm: 0.9999991444564673, iteration: 66389
loss: 1.044524073600769,grad_norm: 0.9999990462416257, iteration: 66390
loss: 1.0033793449401855,grad_norm: 0.9999990896077136, iteration: 66391
loss: 1.0065526962280273,grad_norm: 0.9999992925527685, iteration: 66392
loss: 0.9615534543991089,grad_norm: 0.9999991677697125, iteration: 66393
loss: 1.0265724658966064,grad_norm: 0.9644736201808332, iteration: 66394
loss: 0.9988341927528381,grad_norm: 0.999999187790091, iteration: 66395
loss: 0.9913967251777649,grad_norm: 0.9999991351750211, iteration: 66396
loss: 1.0010355710983276,grad_norm: 0.999999186429009, iteration: 66397
loss: 1.0749069452285767,grad_norm: 0.999999316247528, iteration: 66398
loss: 0.9738951325416565,grad_norm: 0.9999992287164914, iteration: 66399
loss: 1.0272173881530762,grad_norm: 0.9999992737673763, iteration: 66400
loss: 1.0325125455856323,grad_norm: 0.9999991696133466, iteration: 66401
loss: 0.9963226914405823,grad_norm: 0.9536325292418202, iteration: 66402
loss: 1.0185518264770508,grad_norm: 0.9999990766959492, iteration: 66403
loss: 1.0049370527267456,grad_norm: 0.9999989470629733, iteration: 66404
loss: 1.0304566621780396,grad_norm: 0.9999992208027624, iteration: 66405
loss: 0.9666205644607544,grad_norm: 0.9429978196016803, iteration: 66406
loss: 1.0125476121902466,grad_norm: 0.9999991753422052, iteration: 66407
loss: 1.0490361452102661,grad_norm: 0.9999990850063878, iteration: 66408
loss: 0.9933732151985168,grad_norm: 0.9999992050479487, iteration: 66409
loss: 0.9859606027603149,grad_norm: 0.9409543126121227, iteration: 66410
loss: 1.011609673500061,grad_norm: 0.9999989341975367, iteration: 66411
loss: 0.9717280864715576,grad_norm: 0.9999991410900985, iteration: 66412
loss: 1.0115509033203125,grad_norm: 0.9999991291500318, iteration: 66413
loss: 0.9959856271743774,grad_norm: 0.9999990456475262, iteration: 66414
loss: 1.012933611869812,grad_norm: 0.855559468241173, iteration: 66415
loss: 1.0123144388198853,grad_norm: 0.850834482880813, iteration: 66416
loss: 0.9862200617790222,grad_norm: 0.9700438017257345, iteration: 66417
loss: 0.9860792756080627,grad_norm: 0.9771173282235064, iteration: 66418
loss: 0.9911248087882996,grad_norm: 0.9999989931025128, iteration: 66419
loss: 0.9948554635047913,grad_norm: 0.9999991448877927, iteration: 66420
loss: 0.9893985986709595,grad_norm: 0.9999990870492717, iteration: 66421
loss: 0.9987539052963257,grad_norm: 0.999999067710216, iteration: 66422
loss: 1.0087640285491943,grad_norm: 0.9999990059955933, iteration: 66423
loss: 0.978585958480835,grad_norm: 0.999998961549527, iteration: 66424
loss: 1.0390323400497437,grad_norm: 0.9999994172916159, iteration: 66425
loss: 1.0194129943847656,grad_norm: 0.8929949718083616, iteration: 66426
loss: 0.9771602153778076,grad_norm: 0.9999989552089714, iteration: 66427
loss: 1.0039002895355225,grad_norm: 0.9999990592085296, iteration: 66428
loss: 1.0230029821395874,grad_norm: 0.9999991563458387, iteration: 66429
loss: 1.0013647079467773,grad_norm: 0.9493017541416253, iteration: 66430
loss: 0.9768752455711365,grad_norm: 0.9125083452898632, iteration: 66431
loss: 0.9892612099647522,grad_norm: 0.9212683950563023, iteration: 66432
loss: 0.9995850920677185,grad_norm: 0.9999992509180338, iteration: 66433
loss: 0.9757171869277954,grad_norm: 0.97438349978771, iteration: 66434
loss: 1.0319322347640991,grad_norm: 0.8955155862748745, iteration: 66435
loss: 0.9810221791267395,grad_norm: 0.8837731387207094, iteration: 66436
loss: 0.9934031963348389,grad_norm: 0.9762910871607935, iteration: 66437
loss: 0.9797994494438171,grad_norm: 0.9999990018007338, iteration: 66438
loss: 0.988117516040802,grad_norm: 0.9999992541009749, iteration: 66439
loss: 1.0118563175201416,grad_norm: 0.9999990907435686, iteration: 66440
loss: 0.9934495091438293,grad_norm: 0.9999991418112609, iteration: 66441
loss: 1.0041710138320923,grad_norm: 0.9297290369109171, iteration: 66442
loss: 0.9984774589538574,grad_norm: 0.9611101992871315, iteration: 66443
loss: 0.9951938390731812,grad_norm: 0.9999991642504595, iteration: 66444
loss: 1.0256614685058594,grad_norm: 0.968098055605756, iteration: 66445
loss: 1.0472520589828491,grad_norm: 0.9810138584942182, iteration: 66446
loss: 0.9897398948669434,grad_norm: 0.9999992032183814, iteration: 66447
loss: 1.0017598867416382,grad_norm: 0.9999990516295106, iteration: 66448
loss: 0.9595197439193726,grad_norm: 0.9193788130326669, iteration: 66449
loss: 1.005406379699707,grad_norm: 0.9898496278450549, iteration: 66450
loss: 1.0033565759658813,grad_norm: 0.9999990266295093, iteration: 66451
loss: 0.9960107207298279,grad_norm: 0.9999990975230723, iteration: 66452
loss: 1.0163323879241943,grad_norm: 0.8963138977384447, iteration: 66453
loss: 1.0465667247772217,grad_norm: 0.9999991017778622, iteration: 66454
loss: 0.9918350577354431,grad_norm: 0.935970707456786, iteration: 66455
loss: 0.990344762802124,grad_norm: 0.9999992769074888, iteration: 66456
loss: 1.00754976272583,grad_norm: 0.999999133323103, iteration: 66457
loss: 0.96092689037323,grad_norm: 0.9999992518951348, iteration: 66458
loss: 0.987652599811554,grad_norm: 0.9999990743046197, iteration: 66459
loss: 1.0406337976455688,grad_norm: 0.9999992096071, iteration: 66460
loss: 1.007535696029663,grad_norm: 0.9999990524702438, iteration: 66461
loss: 1.0056875944137573,grad_norm: 0.9999995889450908, iteration: 66462
loss: 0.9967723488807678,grad_norm: 0.9999990345457357, iteration: 66463
loss: 0.967949628829956,grad_norm: 0.9557214494129052, iteration: 66464
loss: 0.9960521459579468,grad_norm: 0.9999990958512198, iteration: 66465
loss: 1.0110019445419312,grad_norm: 0.9999991366993883, iteration: 66466
loss: 0.9985212087631226,grad_norm: 0.8946586955517983, iteration: 66467
loss: 1.0042402744293213,grad_norm: 0.9615711379287639, iteration: 66468
loss: 1.046904444694519,grad_norm: 0.9999996846151162, iteration: 66469
loss: 1.0151190757751465,grad_norm: 0.9999991615600734, iteration: 66470
loss: 0.9913315773010254,grad_norm: 0.9999991446119912, iteration: 66471
loss: 1.000199556350708,grad_norm: 0.9999989898802466, iteration: 66472
loss: 1.026597023010254,grad_norm: 0.9999989339911672, iteration: 66473
loss: 0.9990020990371704,grad_norm: 0.9999991013701384, iteration: 66474
loss: 1.0747153759002686,grad_norm: 0.9999997120254375, iteration: 66475
loss: 0.9989322423934937,grad_norm: 0.9999995292586524, iteration: 66476
loss: 1.0179426670074463,grad_norm: 0.9999995692871088, iteration: 66477
loss: 1.0019986629486084,grad_norm: 1.0000000664375226, iteration: 66478
loss: 0.9638950228691101,grad_norm: 0.9674906612281304, iteration: 66479
loss: 0.9932863116264343,grad_norm: 0.9999991076626739, iteration: 66480
loss: 1.01651930809021,grad_norm: 0.9999991125882105, iteration: 66481
loss: 0.9709494113922119,grad_norm: 0.9999990649919539, iteration: 66482
loss: 1.0361300706863403,grad_norm: 0.9999992531944422, iteration: 66483
loss: 0.9976087808609009,grad_norm: 0.8046669651864672, iteration: 66484
loss: 0.9894165992736816,grad_norm: 0.9999989957557719, iteration: 66485
loss: 0.9849807024002075,grad_norm: 0.9999992060456987, iteration: 66486
loss: 0.9963923096656799,grad_norm: 0.9226117563190004, iteration: 66487
loss: 0.9921910166740417,grad_norm: 0.8662283974519699, iteration: 66488
loss: 0.9701520204544067,grad_norm: 0.9027934796975372, iteration: 66489
loss: 1.0026637315750122,grad_norm: 0.8866434615348864, iteration: 66490
loss: 1.0195600986480713,grad_norm: 0.9381098615638718, iteration: 66491
loss: 1.0374197959899902,grad_norm: 0.9999990453067632, iteration: 66492
loss: 1.026087760925293,grad_norm: 0.9345691611550521, iteration: 66493
loss: 1.0449665784835815,grad_norm: 0.9999992104731621, iteration: 66494
loss: 1.0109683275222778,grad_norm: 0.9999991716937169, iteration: 66495
loss: 1.0199329853057861,grad_norm: 0.9185552485212378, iteration: 66496
loss: 0.9974361658096313,grad_norm: 0.9999991988164515, iteration: 66497
loss: 1.0153632164001465,grad_norm: 0.9496934431681129, iteration: 66498
loss: 1.0285000801086426,grad_norm: 0.9999989726932672, iteration: 66499
loss: 0.9646861553192139,grad_norm: 0.9999990598967675, iteration: 66500
loss: 1.034510850906372,grad_norm: 0.9999991890146317, iteration: 66501
loss: 1.0424548387527466,grad_norm: 0.9496994827933402, iteration: 66502
loss: 1.0029419660568237,grad_norm: 0.999999131124714, iteration: 66503
loss: 1.0075855255126953,grad_norm: 0.999999086771759, iteration: 66504
loss: 0.965196430683136,grad_norm: 0.9999991364728096, iteration: 66505
loss: 0.9920651912689209,grad_norm: 0.9999993353091724, iteration: 66506
loss: 0.967460036277771,grad_norm: 0.9999992102778977, iteration: 66507
loss: 0.9997906684875488,grad_norm: 0.9015413927679395, iteration: 66508
loss: 0.9803988933563232,grad_norm: 0.999999237937734, iteration: 66509
loss: 1.011507272720337,grad_norm: 0.9999990480334469, iteration: 66510
loss: 1.015256643295288,grad_norm: 0.9157199182752557, iteration: 66511
loss: 0.9968224167823792,grad_norm: 0.9999991964617732, iteration: 66512
loss: 0.9807520508766174,grad_norm: 0.9999991276201495, iteration: 66513
loss: 1.0042065382003784,grad_norm: 0.9999990901500571, iteration: 66514
loss: 1.0194917917251587,grad_norm: 0.9999990138468394, iteration: 66515
loss: 0.9861350655555725,grad_norm: 0.9999990787507554, iteration: 66516
loss: 1.0102499723434448,grad_norm: 0.9999992827286769, iteration: 66517
loss: 1.0269670486450195,grad_norm: 0.9999990009158161, iteration: 66518
loss: 0.9903295040130615,grad_norm: 0.9999990880330392, iteration: 66519
loss: 0.9983330965042114,grad_norm: 0.7985325650202801, iteration: 66520
loss: 0.995660126209259,grad_norm: 0.8953451525664273, iteration: 66521
loss: 1.0110408067703247,grad_norm: 0.9999991113734378, iteration: 66522
loss: 1.0063718557357788,grad_norm: 0.987743562882803, iteration: 66523
loss: 1.0240851640701294,grad_norm: 0.9999992564768219, iteration: 66524
loss: 1.038179636001587,grad_norm: 0.9999994668760928, iteration: 66525
loss: 0.9716464281082153,grad_norm: 0.8270851657839627, iteration: 66526
loss: 1.0042057037353516,grad_norm: 0.999999167617696, iteration: 66527
loss: 0.9854246377944946,grad_norm: 0.999999179784471, iteration: 66528
loss: 0.9728987216949463,grad_norm: 0.9999991535317253, iteration: 66529
loss: 0.9788872599601746,grad_norm: 0.999999033768658, iteration: 66530
loss: 1.0177278518676758,grad_norm: 0.9023388158993283, iteration: 66531
loss: 0.990451991558075,grad_norm: 0.9299759016036491, iteration: 66532
loss: 0.9868736863136292,grad_norm: 0.992728220656335, iteration: 66533
loss: 0.9847985506057739,grad_norm: 0.9595595251274353, iteration: 66534
loss: 1.062002420425415,grad_norm: 0.9999992291511689, iteration: 66535
loss: 0.9618569612503052,grad_norm: 0.8742989032634957, iteration: 66536
loss: 1.0114010572433472,grad_norm: 0.9391918709880936, iteration: 66537
loss: 0.9993245005607605,grad_norm: 0.9999990630428323, iteration: 66538
loss: 0.9636476039886475,grad_norm: 0.8910598398483449, iteration: 66539
loss: 1.0167999267578125,grad_norm: 0.9999992226255079, iteration: 66540
loss: 0.9967230558395386,grad_norm: 0.8482238108305221, iteration: 66541
loss: 1.0141907930374146,grad_norm: 0.999999067353117, iteration: 66542
loss: 1.0306636095046997,grad_norm: 0.9631519638508604, iteration: 66543
loss: 0.9677608609199524,grad_norm: 0.9979105065177469, iteration: 66544
loss: 0.9987242817878723,grad_norm: 0.9999991099543154, iteration: 66545
loss: 1.0239819288253784,grad_norm: 0.9999991442668824, iteration: 66546
loss: 1.0238350629806519,grad_norm: 0.9999992010943688, iteration: 66547
loss: 0.9999749660491943,grad_norm: 0.9238618952120173, iteration: 66548
loss: 1.1496341228485107,grad_norm: 0.999999114856446, iteration: 66549
loss: 1.0096062421798706,grad_norm: 0.9999991728789809, iteration: 66550
loss: 1.0189013481140137,grad_norm: 0.9999994276890836, iteration: 66551
loss: 1.0298161506652832,grad_norm: 0.9999990954017859, iteration: 66552
loss: 1.0493574142456055,grad_norm: 0.9999993654991858, iteration: 66553
loss: 0.9993831515312195,grad_norm: 0.9794798962241105, iteration: 66554
loss: 1.0170997381210327,grad_norm: 0.8869138827563963, iteration: 66555
loss: 1.0466641187667847,grad_norm: 0.9999988991676678, iteration: 66556
loss: 0.9905120134353638,grad_norm: 0.9999991462394489, iteration: 66557
loss: 1.015761375427246,grad_norm: 0.9430390102981384, iteration: 66558
loss: 1.0305875539779663,grad_norm: 0.9668295592743253, iteration: 66559
loss: 1.0016610622406006,grad_norm: 0.9999991542141345, iteration: 66560
loss: 0.9838985204696655,grad_norm: 0.9999990467577535, iteration: 66561
loss: 1.0299180746078491,grad_norm: 0.9999991379331361, iteration: 66562
loss: 1.0226637125015259,grad_norm: 0.9360204351554068, iteration: 66563
loss: 0.9627870917320251,grad_norm: 0.9999991342254393, iteration: 66564
loss: 1.0306543111801147,grad_norm: 0.9999991953855688, iteration: 66565
loss: 1.0115350484848022,grad_norm: 0.9999992889736963, iteration: 66566
loss: 1.025684118270874,grad_norm: 0.999999210111445, iteration: 66567
loss: 1.0153672695159912,grad_norm: 0.9999991908638808, iteration: 66568
loss: 1.0219298601150513,grad_norm: 0.9999991865331526, iteration: 66569
loss: 0.9995754361152649,grad_norm: 0.9999993422444945, iteration: 66570
loss: 1.0190919637680054,grad_norm: 0.9484317859353533, iteration: 66571
loss: 0.9732497930526733,grad_norm: 0.9199840694117211, iteration: 66572
loss: 0.9894839525222778,grad_norm: 0.999999258605674, iteration: 66573
loss: 1.0227949619293213,grad_norm: 0.999999098086229, iteration: 66574
loss: 0.9745064377784729,grad_norm: 0.9651804573902458, iteration: 66575
loss: 1.0395586490631104,grad_norm: 0.9999991528590956, iteration: 66576
loss: 0.9871389269828796,grad_norm: 0.9999991483584759, iteration: 66577
loss: 0.9932501912117004,grad_norm: 0.9543154446396651, iteration: 66578
loss: 0.9989079236984253,grad_norm: 0.9999993161863782, iteration: 66579
loss: 1.0237699747085571,grad_norm: 0.9999991195033714, iteration: 66580
loss: 0.9662750959396362,grad_norm: 0.9362249745873402, iteration: 66581
loss: 1.0286741256713867,grad_norm: 0.9192683999718455, iteration: 66582
loss: 0.9996817111968994,grad_norm: 0.999999439991303, iteration: 66583
loss: 0.9886888265609741,grad_norm: 0.9114959744292789, iteration: 66584
loss: 1.020364761352539,grad_norm: 0.9999994576053315, iteration: 66585
loss: 0.9958855509757996,grad_norm: 0.999999206450498, iteration: 66586
loss: 0.9704083204269409,grad_norm: 0.9999992049164808, iteration: 66587
loss: 1.038378357887268,grad_norm: 0.9999991455679693, iteration: 66588
loss: 1.0143159627914429,grad_norm: 0.9999991609849384, iteration: 66589
loss: 0.9921433329582214,grad_norm: 0.999999123217749, iteration: 66590
loss: 0.978238582611084,grad_norm: 0.9999991269594123, iteration: 66591
loss: 1.0209397077560425,grad_norm: 0.9708635684469468, iteration: 66592
loss: 1.007354974746704,grad_norm: 0.9472420791111855, iteration: 66593
loss: 1.0127818584442139,grad_norm: 0.9999990256346555, iteration: 66594
loss: 0.9749355912208557,grad_norm: 0.93616075566099, iteration: 66595
loss: 0.9997581839561462,grad_norm: 0.9687020797031268, iteration: 66596
loss: 0.9649200439453125,grad_norm: 0.9999991323553657, iteration: 66597
loss: 0.9954781532287598,grad_norm: 0.9999990802116185, iteration: 66598
loss: 1.0030609369277954,grad_norm: 0.9413134191238945, iteration: 66599
loss: 1.0012660026550293,grad_norm: 0.8772257266098417, iteration: 66600
loss: 1.0091944932937622,grad_norm: 0.8410466184605715, iteration: 66601
loss: 1.0269465446472168,grad_norm: 0.9534316132095105, iteration: 66602
loss: 0.9552605152130127,grad_norm: 0.9222705704309372, iteration: 66603
loss: 1.0028775930404663,grad_norm: 0.9999991594339567, iteration: 66604
loss: 0.981438159942627,grad_norm: 0.8943243367948209, iteration: 66605
loss: 1.0033572912216187,grad_norm: 0.874403614676824, iteration: 66606
loss: 0.9845060110092163,grad_norm: 0.8649050292687184, iteration: 66607
loss: 1.0292617082595825,grad_norm: 0.9068125439611292, iteration: 66608
loss: 0.9907528758049011,grad_norm: 0.9141613625441372, iteration: 66609
loss: 1.0045526027679443,grad_norm: 0.9499373637129457, iteration: 66610
loss: 1.0416405200958252,grad_norm: 0.9999991376741012, iteration: 66611
loss: 0.9799116849899292,grad_norm: 0.9999992108460475, iteration: 66612
loss: 0.992846667766571,grad_norm: 0.9828723339797563, iteration: 66613
loss: 0.9944931268692017,grad_norm: 0.9999990854097025, iteration: 66614
loss: 0.9737571477890015,grad_norm: 0.9999990803825759, iteration: 66615
loss: 0.9933770298957825,grad_norm: 0.9999992242009964, iteration: 66616
loss: 1.042540431022644,grad_norm: 0.9999991746316805, iteration: 66617
loss: 0.9853538870811462,grad_norm: 0.9999990339989451, iteration: 66618
loss: 0.9620532989501953,grad_norm: 0.9999991051838099, iteration: 66619
loss: 1.0172775983810425,grad_norm: 0.9794681864492092, iteration: 66620
loss: 0.988226592540741,grad_norm: 0.9957165807582234, iteration: 66621
loss: 0.9656582474708557,grad_norm: 0.9999992303796646, iteration: 66622
loss: 0.9989209175109863,grad_norm: 0.9999990375841695, iteration: 66623
loss: 0.9884229898452759,grad_norm: 0.9999992560783232, iteration: 66624
loss: 1.0091668367385864,grad_norm: 0.9999991356821563, iteration: 66625
loss: 1.0104737281799316,grad_norm: 0.999999094066659, iteration: 66626
loss: 1.008060097694397,grad_norm: 0.9999990971829046, iteration: 66627
loss: 0.9882566928863525,grad_norm: 0.9999991512205767, iteration: 66628
loss: 0.9507017731666565,grad_norm: 0.9627987894644676, iteration: 66629
loss: 1.014389991760254,grad_norm: 0.9571175278106963, iteration: 66630
loss: 1.0175867080688477,grad_norm: 0.999999286338591, iteration: 66631
loss: 0.9800958633422852,grad_norm: 0.9999989724187565, iteration: 66632
loss: 1.0482252836227417,grad_norm: 0.9999992452337195, iteration: 66633
loss: 1.0054317712783813,grad_norm: 0.9999990401658584, iteration: 66634
loss: 0.9856102466583252,grad_norm: 0.9999991092909327, iteration: 66635
loss: 1.0258623361587524,grad_norm: 0.9862040273471845, iteration: 66636
loss: 1.0081037282943726,grad_norm: 0.9999994421302727, iteration: 66637
loss: 0.9562678337097168,grad_norm: 0.9999990315976335, iteration: 66638
loss: 0.9626488089561462,grad_norm: 0.9999991500033031, iteration: 66639
loss: 0.9964116811752319,grad_norm: 0.9897423093878968, iteration: 66640
loss: 0.9987797737121582,grad_norm: 0.9999990863733172, iteration: 66641
loss: 0.9735733866691589,grad_norm: 0.9990266496324884, iteration: 66642
loss: 1.0069869756698608,grad_norm: 0.9765193183177695, iteration: 66643
loss: 0.9724470376968384,grad_norm: 0.8491163063908845, iteration: 66644
loss: 1.0064102411270142,grad_norm: 0.9999990149762785, iteration: 66645
loss: 0.9625757932662964,grad_norm: 0.9999988811786295, iteration: 66646
loss: 0.9745410084724426,grad_norm: 0.9999990277399547, iteration: 66647
loss: 0.9927446842193604,grad_norm: 0.9303018010726606, iteration: 66648
loss: 1.0148134231567383,grad_norm: 0.9999990444109168, iteration: 66649
loss: 1.0306370258331299,grad_norm: 0.9999996620588826, iteration: 66650
loss: 0.9678842425346375,grad_norm: 0.9999992127916038, iteration: 66651
loss: 1.0035183429718018,grad_norm: 0.999999136241666, iteration: 66652
loss: 0.9834138751029968,grad_norm: 0.9999991825094464, iteration: 66653
loss: 0.9715782403945923,grad_norm: 0.9999991842313326, iteration: 66654
loss: 1.0375645160675049,grad_norm: 0.9901311962285119, iteration: 66655
loss: 1.013573408126831,grad_norm: 0.9999990596839914, iteration: 66656
loss: 1.0040762424468994,grad_norm: 0.8315386119465124, iteration: 66657
loss: 0.9977671504020691,grad_norm: 0.899633806980581, iteration: 66658
loss: 1.0045325756072998,grad_norm: 0.999999201145305, iteration: 66659
loss: 1.034751296043396,grad_norm: 0.9999995336759274, iteration: 66660
loss: 0.9880952835083008,grad_norm: 0.9894487995304927, iteration: 66661
loss: 1.0101737976074219,grad_norm: 0.9999990072196885, iteration: 66662
loss: 0.9805149435997009,grad_norm: 0.9999991102147013, iteration: 66663
loss: 0.9659886360168457,grad_norm: 0.9999990585736781, iteration: 66664
loss: 1.0086333751678467,grad_norm: 0.9999992849778234, iteration: 66665
loss: 0.9903177618980408,grad_norm: 0.9999991944977137, iteration: 66666
loss: 1.0073063373565674,grad_norm: 0.9999991135142043, iteration: 66667
loss: 1.027551293373108,grad_norm: 0.9999990759528824, iteration: 66668
loss: 0.9582483768463135,grad_norm: 0.9999992244549107, iteration: 66669
loss: 0.9902574419975281,grad_norm: 0.9464596769978496, iteration: 66670
loss: 1.0001728534698486,grad_norm: 0.9999990928277602, iteration: 66671
loss: 1.0023523569107056,grad_norm: 0.9946204199453468, iteration: 66672
loss: 1.0193839073181152,grad_norm: 0.9399989750113831, iteration: 66673
loss: 1.0009859800338745,grad_norm: 0.8889282729019884, iteration: 66674
loss: 0.9884201884269714,grad_norm: 0.951274827125987, iteration: 66675
loss: 0.9991137385368347,grad_norm: 0.9999992816637291, iteration: 66676
loss: 1.0024378299713135,grad_norm: 0.9999990382480871, iteration: 66677
loss: 1.0048255920410156,grad_norm: 0.9999992894152556, iteration: 66678
loss: 1.0143070220947266,grad_norm: 0.9999991474674359, iteration: 66679
loss: 0.9739433526992798,grad_norm: 0.9775029724127973, iteration: 66680
loss: 0.9903113842010498,grad_norm: 0.9677625598275588, iteration: 66681
loss: 1.024675965309143,grad_norm: 0.9999992040321166, iteration: 66682
loss: 1.0252269506454468,grad_norm: 0.9196366185162456, iteration: 66683
loss: 0.9666041135787964,grad_norm: 0.9999990252711863, iteration: 66684
loss: 1.0103260278701782,grad_norm: 0.9999992259476571, iteration: 66685
loss: 0.9819179773330688,grad_norm: 0.9999991691718534, iteration: 66686
loss: 1.020545244216919,grad_norm: 0.9552784234476377, iteration: 66687
loss: 0.9829330444335938,grad_norm: 0.9936930659709298, iteration: 66688
loss: 1.0549603700637817,grad_norm: 0.999999150196827, iteration: 66689
loss: 0.9857132434844971,grad_norm: 0.999999183533285, iteration: 66690
loss: 0.9696181416511536,grad_norm: 0.999998961987314, iteration: 66691
loss: 0.9943878650665283,grad_norm: 0.9999993226619903, iteration: 66692
loss: 1.0159578323364258,grad_norm: 0.8809794993438489, iteration: 66693
loss: 1.0011638402938843,grad_norm: 0.9999991364471472, iteration: 66694
loss: 0.9881075620651245,grad_norm: 0.9999991923990802, iteration: 66695
loss: 0.9756363034248352,grad_norm: 0.819015644055542, iteration: 66696
loss: 0.9890429377555847,grad_norm: 0.9285206073635061, iteration: 66697
loss: 1.0119447708129883,grad_norm: 0.8830515224071339, iteration: 66698
loss: 0.9713689088821411,grad_norm: 0.9999991007335496, iteration: 66699
loss: 1.0312221050262451,grad_norm: 0.9546156456131549, iteration: 66700
loss: 0.9551117420196533,grad_norm: 0.9999991328780644, iteration: 66701
loss: 1.011115312576294,grad_norm: 0.9454819921158979, iteration: 66702
loss: 1.0435439348220825,grad_norm: 0.9958018987681949, iteration: 66703
loss: 0.990479052066803,grad_norm: 0.9999989134401442, iteration: 66704
loss: 0.9777783751487732,grad_norm: 0.992895396872806, iteration: 66705
loss: 0.9564476609230042,grad_norm: 0.8081506793482133, iteration: 66706
loss: 1.0186398029327393,grad_norm: 0.999999300180866, iteration: 66707
loss: 1.003026008605957,grad_norm: 0.9999991363212003, iteration: 66708
loss: 0.9764229655265808,grad_norm: 0.9095060463752785, iteration: 66709
loss: 0.9743109941482544,grad_norm: 0.9999992353878973, iteration: 66710
loss: 1.0143790245056152,grad_norm: 0.9820097900847301, iteration: 66711
loss: 1.024556040763855,grad_norm: 0.999999164194012, iteration: 66712
loss: 1.036546230316162,grad_norm: 0.9999990821035667, iteration: 66713
loss: 1.0378015041351318,grad_norm: 0.9584418076759664, iteration: 66714
loss: 1.0056989192962646,grad_norm: 0.9999991762930172, iteration: 66715
loss: 0.9951862692832947,grad_norm: 0.9170771023369713, iteration: 66716
loss: 1.028208613395691,grad_norm: 0.8350726985833538, iteration: 66717
loss: 0.9933647513389587,grad_norm: 0.9804139373496916, iteration: 66718
loss: 1.0064529180526733,grad_norm: 0.9285938056210136, iteration: 66719
loss: 0.9618346691131592,grad_norm: 0.7635145742553657, iteration: 66720
loss: 1.0465996265411377,grad_norm: 0.9999995938380991, iteration: 66721
loss: 0.9803141951560974,grad_norm: 0.9680800586983456, iteration: 66722
loss: 1.0110915899276733,grad_norm: 0.9999991193789552, iteration: 66723
loss: 1.0019924640655518,grad_norm: 0.999999101473583, iteration: 66724
loss: 1.0283225774765015,grad_norm: 0.9999990419816566, iteration: 66725
loss: 1.0516557693481445,grad_norm: 0.9999991697231614, iteration: 66726
loss: 1.012139916419983,grad_norm: 0.9999991027038739, iteration: 66727
loss: 0.986808717250824,grad_norm: 0.9999989633853942, iteration: 66728
loss: 1.004205346107483,grad_norm: 0.999999524610874, iteration: 66729
loss: 1.0421619415283203,grad_norm: 0.9999993094692178, iteration: 66730
loss: 0.9724829792976379,grad_norm: 0.9999992598797256, iteration: 66731
loss: 0.992861270904541,grad_norm: 0.9608534952579886, iteration: 66732
loss: 1.0105503797531128,grad_norm: 0.9586300880472852, iteration: 66733
loss: 0.9697935581207275,grad_norm: 0.875006277242242, iteration: 66734
loss: 1.028186559677124,grad_norm: 0.9138441800152693, iteration: 66735
loss: 0.9598417282104492,grad_norm: 0.9253632176967849, iteration: 66736
loss: 1.0235624313354492,grad_norm: 0.9999992684458592, iteration: 66737
loss: 0.9806448817253113,grad_norm: 0.99999918865731, iteration: 66738
loss: 1.0194753408432007,grad_norm: 0.999999003612555, iteration: 66739
loss: 1.0021880865097046,grad_norm: 0.9999989270055978, iteration: 66740
loss: 0.9835813641548157,grad_norm: 0.9999992419510861, iteration: 66741
loss: 0.9923065900802612,grad_norm: 0.9742707943308448, iteration: 66742
loss: 0.9985597133636475,grad_norm: 0.9999991374473176, iteration: 66743
loss: 1.008597493171692,grad_norm: 0.9999992321775736, iteration: 66744
loss: 1.006420373916626,grad_norm: 0.9999992214341743, iteration: 66745
loss: 1.0385228395462036,grad_norm: 0.9191394186923184, iteration: 66746
loss: 0.9986453056335449,grad_norm: 0.9999989236300241, iteration: 66747
loss: 1.0104162693023682,grad_norm: 0.9489995401961003, iteration: 66748
loss: 0.9920052289962769,grad_norm: 0.9999991509296045, iteration: 66749
loss: 0.9790193438529968,grad_norm: 0.9999990304955307, iteration: 66750
loss: 0.9673845767974854,grad_norm: 0.9999993092908568, iteration: 66751
loss: 1.0093412399291992,grad_norm: 0.9999989396736052, iteration: 66752
loss: 1.0175559520721436,grad_norm: 0.999999101145409, iteration: 66753
loss: 0.9890097975730896,grad_norm: 0.9948922302294813, iteration: 66754
loss: 1.007551908493042,grad_norm: 0.8770505859626795, iteration: 66755
loss: 0.9660481810569763,grad_norm: 0.9999990538985266, iteration: 66756
loss: 0.9926384091377258,grad_norm: 0.9999990140661933, iteration: 66757
loss: 0.985029935836792,grad_norm: 0.9265716027119587, iteration: 66758
loss: 0.9669092297554016,grad_norm: 0.8858803151101677, iteration: 66759
loss: 1.0160977840423584,grad_norm: 0.9118859539203984, iteration: 66760
loss: 1.022499918937683,grad_norm: 0.9999990228351388, iteration: 66761
loss: 1.037108063697815,grad_norm: 0.9999990794742015, iteration: 66762
loss: 1.0285178422927856,grad_norm: 0.9999991738164864, iteration: 66763
loss: 0.9860827326774597,grad_norm: 0.9999990200368802, iteration: 66764
loss: 1.0037723779678345,grad_norm: 0.9999995237097511, iteration: 66765
loss: 0.9605408310890198,grad_norm: 0.9091568929217224, iteration: 66766
loss: 1.0334728956222534,grad_norm: 0.9999990580126602, iteration: 66767
loss: 1.0428112745285034,grad_norm: 0.9999991557770462, iteration: 66768
loss: 0.9780701994895935,grad_norm: 0.9466624792203499, iteration: 66769
loss: 1.0215603113174438,grad_norm: 0.9996151359657841, iteration: 66770
loss: 0.996752142906189,grad_norm: 0.999999023706975, iteration: 66771
loss: 1.0237938165664673,grad_norm: 0.9999992177302636, iteration: 66772
loss: 1.0117225646972656,grad_norm: 0.9847846010837736, iteration: 66773
loss: 0.9712882041931152,grad_norm: 0.9362859612179427, iteration: 66774
loss: 0.9729770421981812,grad_norm: 0.999999184468536, iteration: 66775
loss: 1.01312255859375,grad_norm: 0.9999990852707658, iteration: 66776
loss: 0.985496461391449,grad_norm: 0.9885697563441727, iteration: 66777
loss: 0.9749253988265991,grad_norm: 0.9999990032625126, iteration: 66778
loss: 0.9986851811408997,grad_norm: 0.9999992319532516, iteration: 66779
loss: 0.9814967513084412,grad_norm: 0.9640734542398498, iteration: 66780
loss: 1.010970115661621,grad_norm: 0.9999992062591907, iteration: 66781
loss: 0.9671198129653931,grad_norm: 0.9999991783254734, iteration: 66782
loss: 0.9970934987068176,grad_norm: 0.8805162446980235, iteration: 66783
loss: 0.9562220573425293,grad_norm: 0.9450600555774221, iteration: 66784
loss: 1.0280930995941162,grad_norm: 0.9736072949042418, iteration: 66785
loss: 1.0054844617843628,grad_norm: 0.9386026745251025, iteration: 66786
loss: 0.9999194145202637,grad_norm: 0.9964886076935606, iteration: 66787
loss: 0.9977888464927673,grad_norm: 0.9999998289478644, iteration: 66788
loss: 1.017389178276062,grad_norm: 0.9999988792272347, iteration: 66789
loss: 1.000126600265503,grad_norm: 0.9999991390865179, iteration: 66790
loss: 1.0147579908370972,grad_norm: 0.9999990468201994, iteration: 66791
loss: 0.997768759727478,grad_norm: 0.9999992736759001, iteration: 66792
loss: 0.9861440658569336,grad_norm: 0.932133727084108, iteration: 66793
loss: 1.004974365234375,grad_norm: 0.9008159683611919, iteration: 66794
loss: 0.9781410694122314,grad_norm: 0.9999992046452004, iteration: 66795
loss: 0.974513053894043,grad_norm: 0.9255165069922336, iteration: 66796
loss: 0.9937332272529602,grad_norm: 0.9886640731498798, iteration: 66797
loss: 0.985755443572998,grad_norm: 0.9999991849266856, iteration: 66798
loss: 0.9936496615409851,grad_norm: 0.9999990656016192, iteration: 66799
loss: 1.0056781768798828,grad_norm: 0.940493443994979, iteration: 66800
loss: 0.9938276410102844,grad_norm: 0.9230547936357393, iteration: 66801
loss: 0.99012291431427,grad_norm: 0.9999992458761622, iteration: 66802
loss: 0.9885236024856567,grad_norm: 0.9766611986097813, iteration: 66803
loss: 0.9866266846656799,grad_norm: 0.9999991032910873, iteration: 66804
loss: 0.9888306260108948,grad_norm: 0.9774167537299971, iteration: 66805
loss: 1.0082213878631592,grad_norm: 0.9111822019611697, iteration: 66806
loss: 0.9755218029022217,grad_norm: 0.9489365418098361, iteration: 66807
loss: 0.9708527326583862,grad_norm: 0.9999991915981208, iteration: 66808
loss: 0.9846932291984558,grad_norm: 0.9999991563765008, iteration: 66809
loss: 1.0161702632904053,grad_norm: 0.999999141762883, iteration: 66810
loss: 1.0114222764968872,grad_norm: 0.999999167124842, iteration: 66811
loss: 0.9875391721725464,grad_norm: 0.9999990985271366, iteration: 66812
loss: 1.0119410753250122,grad_norm: 0.9999997630968338, iteration: 66813
loss: 1.0586786270141602,grad_norm: 0.9999999552981749, iteration: 66814
loss: 1.0098991394042969,grad_norm: 0.9999992943991788, iteration: 66815
loss: 1.0302820205688477,grad_norm: 0.8346711762599032, iteration: 66816
loss: 1.0227047204971313,grad_norm: 0.9999990768800165, iteration: 66817
loss: 0.9771116971969604,grad_norm: 0.9999992021572769, iteration: 66818
loss: 0.9845278859138489,grad_norm: 0.963603637147061, iteration: 66819
loss: 0.99375981092453,grad_norm: 0.9654786445249732, iteration: 66820
loss: 0.9987345933914185,grad_norm: 0.9999992585452223, iteration: 66821
loss: 1.0067732334136963,grad_norm: 0.8555077436201768, iteration: 66822
loss: 1.0258718729019165,grad_norm: 0.9184510624987303, iteration: 66823
loss: 0.9803205728530884,grad_norm: 0.9464118992201794, iteration: 66824
loss: 0.9907734990119934,grad_norm: 0.9554732969645159, iteration: 66825
loss: 1.015505075454712,grad_norm: 0.9275320137632883, iteration: 66826
loss: 1.0064646005630493,grad_norm: 0.9999991907539796, iteration: 66827
loss: 0.9751113653182983,grad_norm: 0.937478642455846, iteration: 66828
loss: 0.9713001847267151,grad_norm: 0.9614773970466552, iteration: 66829
loss: 1.002630591392517,grad_norm: 0.9999990992964793, iteration: 66830
loss: 1.0473915338516235,grad_norm: 0.9876377803645601, iteration: 66831
loss: 0.9715781807899475,grad_norm: 0.8183705078628237, iteration: 66832
loss: 1.0504919290542603,grad_norm: 0.9999999967861432, iteration: 66833
loss: 1.0181318521499634,grad_norm: 0.9999990787118708, iteration: 66834
loss: 0.9942002296447754,grad_norm: 0.9507459846134254, iteration: 66835
loss: 0.9920583367347717,grad_norm: 0.9666354344010255, iteration: 66836
loss: 1.0163044929504395,grad_norm: 0.8466208926848438, iteration: 66837
loss: 0.981762170791626,grad_norm: 0.9981377879170533, iteration: 66838
loss: 1.0039548873901367,grad_norm: 0.9822695318209328, iteration: 66839
loss: 1.0241186618804932,grad_norm: 0.9999991336903136, iteration: 66840
loss: 1.0289630889892578,grad_norm: 0.9999991946983674, iteration: 66841
loss: 1.0162385702133179,grad_norm: 0.9864675151899905, iteration: 66842
loss: 1.0145343542099,grad_norm: 0.9939024370129711, iteration: 66843
loss: 1.0153125524520874,grad_norm: 0.9999990525332804, iteration: 66844
loss: 0.9969095587730408,grad_norm: 0.999999211571649, iteration: 66845
loss: 1.0382061004638672,grad_norm: 0.999999302396947, iteration: 66846
loss: 0.9864377379417419,grad_norm: 0.9618030666088774, iteration: 66847
loss: 1.024696946144104,grad_norm: 0.9999990606630818, iteration: 66848
loss: 0.9715241193771362,grad_norm: 0.9999990916654853, iteration: 66849
loss: 0.9948315024375916,grad_norm: 0.9999993622858732, iteration: 66850
loss: 0.9941453337669373,grad_norm: 0.9999991014242674, iteration: 66851
loss: 1.0378201007843018,grad_norm: 0.9446076429778677, iteration: 66852
loss: 0.9938868880271912,grad_norm: 0.784419341122363, iteration: 66853
loss: 0.9909645318984985,grad_norm: 0.9999991810586851, iteration: 66854
loss: 1.012986660003662,grad_norm: 0.9822597225321816, iteration: 66855
loss: 0.9904980659484863,grad_norm: 0.9835956637617005, iteration: 66856
loss: 1.025500774383545,grad_norm: 0.9999996923369074, iteration: 66857
loss: 1.0043129920959473,grad_norm: 0.9991321361670252, iteration: 66858
loss: 1.0041545629501343,grad_norm: 0.9999991409893964, iteration: 66859
loss: 1.0111773014068604,grad_norm: 0.9999990196639753, iteration: 66860
loss: 1.0070929527282715,grad_norm: 0.9999992759846034, iteration: 66861
loss: 0.9964035153388977,grad_norm: 0.9999989978064148, iteration: 66862
loss: 1.0296964645385742,grad_norm: 0.9999991484853535, iteration: 66863
loss: 0.9611371159553528,grad_norm: 0.9658791653962296, iteration: 66864
loss: 0.9939670562744141,grad_norm: 0.9794229299252024, iteration: 66865
loss: 1.0226950645446777,grad_norm: 0.9999996567782886, iteration: 66866
loss: 1.0107039213180542,grad_norm: 0.9582775091750597, iteration: 66867
loss: 0.9930102825164795,grad_norm: 0.9999990880480382, iteration: 66868
loss: 0.9973445534706116,grad_norm: 0.9999990511849841, iteration: 66869
loss: 1.0173591375350952,grad_norm: 0.9999992531947508, iteration: 66870
loss: 0.9697891473770142,grad_norm: 0.8859380796027819, iteration: 66871
loss: 1.0178247690200806,grad_norm: 0.9581414555640649, iteration: 66872
loss: 0.9829043745994568,grad_norm: 0.9999991457689081, iteration: 66873
loss: 1.0045605897903442,grad_norm: 0.9999992327525687, iteration: 66874
loss: 1.0038092136383057,grad_norm: 0.9999990900378981, iteration: 66875
loss: 0.9739591479301453,grad_norm: 0.9855478768856266, iteration: 66876
loss: 1.0212732553482056,grad_norm: 0.9999991227262243, iteration: 66877
loss: 0.9868654012680054,grad_norm: 0.9999991326290643, iteration: 66878
loss: 1.0254050493240356,grad_norm: 0.9881007337939088, iteration: 66879
loss: 0.9974367022514343,grad_norm: 0.9148386644278378, iteration: 66880
loss: 1.0033825635910034,grad_norm: 0.9322852397930587, iteration: 66881
loss: 1.0436493158340454,grad_norm: 0.9999991597040958, iteration: 66882
loss: 1.0277856588363647,grad_norm: 0.9369058735845806, iteration: 66883
loss: 0.9764907360076904,grad_norm: 0.8608406134607497, iteration: 66884
loss: 0.9826794266700745,grad_norm: 0.9534816498162602, iteration: 66885
loss: 0.993668794631958,grad_norm: 0.9999996256702588, iteration: 66886
loss: 1.0117720365524292,grad_norm: 0.9999991713988693, iteration: 66887
loss: 0.9931240081787109,grad_norm: 0.9999991647243406, iteration: 66888
loss: 0.9701199531555176,grad_norm: 0.9737852250646396, iteration: 66889
loss: 1.1169816255569458,grad_norm: 0.9999993142755426, iteration: 66890
loss: 1.0070412158966064,grad_norm: 0.9192452410667585, iteration: 66891
loss: 1.012711524963379,grad_norm: 0.9977142538624127, iteration: 66892
loss: 0.9785863161087036,grad_norm: 0.9564788117977316, iteration: 66893
loss: 1.0134121179580688,grad_norm: 0.954618203202631, iteration: 66894
loss: 0.9955904483795166,grad_norm: 0.9610220885070949, iteration: 66895
loss: 1.005071759223938,grad_norm: 0.9285706742306498, iteration: 66896
loss: 0.9922778606414795,grad_norm: 0.9371251793678707, iteration: 66897
loss: 1.0177757740020752,grad_norm: 0.999999178289732, iteration: 66898
loss: 0.9783310890197754,grad_norm: 0.9999989732728948, iteration: 66899
loss: 0.9920395016670227,grad_norm: 0.9999991351167717, iteration: 66900
loss: 0.996531069278717,grad_norm: 0.928736582740353, iteration: 66901
loss: 0.9720215201377869,grad_norm: 0.964584341085955, iteration: 66902
loss: 1.0213834047317505,grad_norm: 0.9999991689849544, iteration: 66903
loss: 1.0173044204711914,grad_norm: 0.9999992312209862, iteration: 66904
loss: 1.026131272315979,grad_norm: 0.9999991289033485, iteration: 66905
loss: 1.0308034420013428,grad_norm: 0.9999992291057261, iteration: 66906
loss: 0.9860828518867493,grad_norm: 0.9999991122628342, iteration: 66907
loss: 1.0155683755874634,grad_norm: 0.98860705294691, iteration: 66908
loss: 0.9768738150596619,grad_norm: 0.9909451037447833, iteration: 66909
loss: 0.9653473496437073,grad_norm: 0.8961023624728609, iteration: 66910
loss: 0.9959499835968018,grad_norm: 0.9999991678625539, iteration: 66911
loss: 1.0387718677520752,grad_norm: 0.9999992507013236, iteration: 66912
loss: 1.0225658416748047,grad_norm: 0.84667781006483, iteration: 66913
loss: 0.9911190271377563,grad_norm: 0.99999922042191, iteration: 66914
loss: 1.004617691040039,grad_norm: 0.9999992625445409, iteration: 66915
loss: 0.9786365032196045,grad_norm: 0.9999991467277634, iteration: 66916
loss: 1.006536841392517,grad_norm: 0.9999990476249377, iteration: 66917
loss: 1.0446605682373047,grad_norm: 0.9999988997083396, iteration: 66918
loss: 0.9608742594718933,grad_norm: 0.9855857951886488, iteration: 66919
loss: 1.020735740661621,grad_norm: 0.9496007885472948, iteration: 66920
loss: 1.0210835933685303,grad_norm: 0.9999991366268168, iteration: 66921
loss: 0.9683346152305603,grad_norm: 0.9007275292057254, iteration: 66922
loss: 0.9911041855812073,grad_norm: 0.9653430223364364, iteration: 66923
loss: 1.0469958782196045,grad_norm: 0.9999992146733452, iteration: 66924
loss: 1.0282032489776611,grad_norm: 0.9999990142666604, iteration: 66925
loss: 1.006640076637268,grad_norm: 0.9999991346049528, iteration: 66926
loss: 0.9618632793426514,grad_norm: 0.9999990549151755, iteration: 66927
loss: 0.9769355058670044,grad_norm: 0.9999991840778915, iteration: 66928
loss: 1.0346126556396484,grad_norm: 0.9999990689014621, iteration: 66929
loss: 1.0325027704238892,grad_norm: 0.9999992300198478, iteration: 66930
loss: 1.015486478805542,grad_norm: 0.9999991658519942, iteration: 66931
loss: 1.0615109205245972,grad_norm: 0.9999997952421156, iteration: 66932
loss: 1.032664179801941,grad_norm: 0.9999992315375985, iteration: 66933
loss: 1.0165696144104004,grad_norm: 0.9999990972989365, iteration: 66934
loss: 1.0316286087036133,grad_norm: 0.9141212139130518, iteration: 66935
loss: 1.027647852897644,grad_norm: 0.9999994952160755, iteration: 66936
loss: 0.9908298850059509,grad_norm: 0.999999212204894, iteration: 66937
loss: 1.0064228773117065,grad_norm: 0.9999991349219893, iteration: 66938
loss: 1.0318280458450317,grad_norm: 0.9999991559800954, iteration: 66939
loss: 0.9629183411598206,grad_norm: 0.9885490548083561, iteration: 66940
loss: 1.04244065284729,grad_norm: 0.9347954909374118, iteration: 66941
loss: 1.0160632133483887,grad_norm: 0.9999990879364855, iteration: 66942
loss: 0.9620835185050964,grad_norm: 0.9253139483895925, iteration: 66943
loss: 0.9475353956222534,grad_norm: 0.8342641424391901, iteration: 66944
loss: 0.9995111227035522,grad_norm: 0.9999992566948752, iteration: 66945
loss: 1.047179937362671,grad_norm: 0.9715093749409531, iteration: 66946
loss: 1.0858347415924072,grad_norm: 0.9999998035351583, iteration: 66947
loss: 0.9826521277427673,grad_norm: 0.9999993072141652, iteration: 66948
loss: 1.0000102519989014,grad_norm: 0.992478122946551, iteration: 66949
loss: 1.0384728908538818,grad_norm: 0.9999991446833718, iteration: 66950
loss: 1.0308071374893188,grad_norm: 0.9235953865113503, iteration: 66951
loss: 1.0012928247451782,grad_norm: 0.9999993826964334, iteration: 66952
loss: 0.9996173977851868,grad_norm: 0.9202016458981671, iteration: 66953
loss: 0.9778288006782532,grad_norm: 0.9999995316296716, iteration: 66954
loss: 1.1795109510421753,grad_norm: 0.9999994422603209, iteration: 66955
loss: 1.018311858177185,grad_norm: 0.9999990468028799, iteration: 66956
loss: 0.9677109122276306,grad_norm: 0.9358000901189031, iteration: 66957
loss: 1.0022724866867065,grad_norm: 0.913351362849851, iteration: 66958
loss: 1.003316879272461,grad_norm: 0.9999991875517917, iteration: 66959
loss: 0.9687038064002991,grad_norm: 0.9999990732041956, iteration: 66960
loss: 1.0268783569335938,grad_norm: 0.9941430208755833, iteration: 66961
loss: 1.0323365926742554,grad_norm: 0.9999991618930782, iteration: 66962
loss: 0.9853255152702332,grad_norm: 0.9613203918379062, iteration: 66963
loss: 1.0230745077133179,grad_norm: 0.9999990166478974, iteration: 66964
loss: 0.9946025013923645,grad_norm: 0.9999989743744753, iteration: 66965
loss: 1.0209259986877441,grad_norm: 0.9999989719684823, iteration: 66966
loss: 1.0254474878311157,grad_norm: 0.9999992039312698, iteration: 66967
loss: 1.0139245986938477,grad_norm: 0.9999991571496859, iteration: 66968
loss: 1.0466935634613037,grad_norm: 0.9885398698096662, iteration: 66969
loss: 1.000714659690857,grad_norm: 0.9999992506138432, iteration: 66970
loss: 0.9829627275466919,grad_norm: 0.9999991171860766, iteration: 66971
loss: 0.9878690242767334,grad_norm: 0.8071258676950813, iteration: 66972
loss: 1.0016988515853882,grad_norm: 0.9999991352003249, iteration: 66973
loss: 0.9768890142440796,grad_norm: 0.9999990742847364, iteration: 66974
loss: 0.9761208295822144,grad_norm: 0.8792358582110736, iteration: 66975
loss: 0.9655149579048157,grad_norm: 0.848453295900994, iteration: 66976
loss: 0.9795637130737305,grad_norm: 0.9999992015740982, iteration: 66977
loss: 1.0207213163375854,grad_norm: 0.8600119425173188, iteration: 66978
loss: 1.024653673171997,grad_norm: 0.994583121856557, iteration: 66979
loss: 1.003702998161316,grad_norm: 0.9999990842183432, iteration: 66980
loss: 1.0128037929534912,grad_norm: 0.9999993470022598, iteration: 66981
loss: 1.0123392343521118,grad_norm: 0.999998912312422, iteration: 66982
loss: 1.0095939636230469,grad_norm: 0.9185550809650578, iteration: 66983
loss: 0.9945217370986938,grad_norm: 0.999999063146948, iteration: 66984
loss: 0.9968577027320862,grad_norm: 0.999999179185515, iteration: 66985
loss: 1.048079252243042,grad_norm: 0.9999992986655307, iteration: 66986
loss: 1.0382291078567505,grad_norm: 0.9999992823825702, iteration: 66987
loss: 1.031519889831543,grad_norm: 0.9999990347927199, iteration: 66988
loss: 0.974143922328949,grad_norm: 0.9999992665914957, iteration: 66989
loss: 1.034164309501648,grad_norm: 0.9999991509297428, iteration: 66990
loss: 1.0029114484786987,grad_norm: 0.9746820849817609, iteration: 66991
loss: 1.0066795349121094,grad_norm: 0.9999991651245919, iteration: 66992
loss: 0.9998252391815186,grad_norm: 0.8746233446137907, iteration: 66993
loss: 1.029154658317566,grad_norm: 0.9999993144297737, iteration: 66994
loss: 1.0176808834075928,grad_norm: 0.9552515900172903, iteration: 66995
loss: 1.014538288116455,grad_norm: 0.9788495751725687, iteration: 66996
loss: 0.9992735981941223,grad_norm: 0.9301779594949847, iteration: 66997
loss: 0.9794301986694336,grad_norm: 0.9999991625763629, iteration: 66998
loss: 0.9996171593666077,grad_norm: 0.9999992831428491, iteration: 66999
loss: 0.9823132157325745,grad_norm: 0.9999991255791711, iteration: 67000
loss: 1.0075255632400513,grad_norm: 0.9498372098682004, iteration: 67001
loss: 1.0391114950180054,grad_norm: 0.9999993261791191, iteration: 67002
loss: 0.9766549468040466,grad_norm: 0.9791699206709994, iteration: 67003
loss: 1.003989577293396,grad_norm: 0.9999992246059047, iteration: 67004
loss: 1.012058973312378,grad_norm: 0.9999989969313375, iteration: 67005
loss: 0.9913526773452759,grad_norm: 0.9999991407456986, iteration: 67006
loss: 1.095210313796997,grad_norm: 0.9999995100544575, iteration: 67007
loss: 1.039689540863037,grad_norm: 0.9999992117272446, iteration: 67008
loss: 1.0208367109298706,grad_norm: 0.9999991536813579, iteration: 67009
loss: 1.0029277801513672,grad_norm: 0.8503632722655857, iteration: 67010
loss: 1.002120852470398,grad_norm: 0.8816248926870203, iteration: 67011
loss: 0.9980175495147705,grad_norm: 0.9749546148182083, iteration: 67012
loss: 1.024901032447815,grad_norm: 0.9762406224234315, iteration: 67013
loss: 1.0219531059265137,grad_norm: 0.9812168020262911, iteration: 67014
loss: 1.0012664794921875,grad_norm: 0.999999022651346, iteration: 67015
loss: 1.010859727859497,grad_norm: 0.9896264034733385, iteration: 67016
loss: 0.9931405782699585,grad_norm: 0.9999990704128553, iteration: 67017
loss: 1.0026031732559204,grad_norm: 0.9650889296425271, iteration: 67018
loss: 0.9890656471252441,grad_norm: 0.9270027681216176, iteration: 67019
loss: 1.032252550125122,grad_norm: 0.9999992967302481, iteration: 67020
loss: 1.0291436910629272,grad_norm: 0.9282448239370666, iteration: 67021
loss: 1.006374716758728,grad_norm: 0.9999992590209942, iteration: 67022
loss: 1.0173667669296265,grad_norm: 0.9999990366721659, iteration: 67023
loss: 1.0469324588775635,grad_norm: 0.9999993045196549, iteration: 67024
loss: 1.0037014484405518,grad_norm: 0.9627406521033425, iteration: 67025
loss: 1.0095652341842651,grad_norm: 0.9066474923953168, iteration: 67026
loss: 0.9482909440994263,grad_norm: 0.9999990211454902, iteration: 67027
loss: 0.9823853373527527,grad_norm: 0.9999990714680397, iteration: 67028
loss: 1.0107840299606323,grad_norm: 0.9999993079684165, iteration: 67029
loss: 0.9924764037132263,grad_norm: 0.895108905810815, iteration: 67030
loss: 0.9954613447189331,grad_norm: 0.9999990992227362, iteration: 67031
loss: 1.008375644683838,grad_norm: 0.9999989998702209, iteration: 67032
loss: 1.0107520818710327,grad_norm: 0.9606640307563818, iteration: 67033
loss: 0.991134762763977,grad_norm: 0.8622151122916833, iteration: 67034
loss: 0.9612804055213928,grad_norm: 0.9037887748900185, iteration: 67035
loss: 0.9507784247398376,grad_norm: 0.9999992292577327, iteration: 67036
loss: 0.9915094971656799,grad_norm: 0.9999990859368174, iteration: 67037
loss: 1.02432382106781,grad_norm: 0.999999217295893, iteration: 67038
loss: 0.9840094447135925,grad_norm: 0.9357170302945589, iteration: 67039
loss: 0.9990841746330261,grad_norm: 0.9999991550256714, iteration: 67040
loss: 1.055291771888733,grad_norm: 0.9999994943855346, iteration: 67041
loss: 0.9970153570175171,grad_norm: 0.9999991960019434, iteration: 67042
loss: 1.037703275680542,grad_norm: 0.9999992650295931, iteration: 67043
loss: 0.9444780349731445,grad_norm: 0.9999991583088673, iteration: 67044
loss: 0.9925544857978821,grad_norm: 0.9648973811223884, iteration: 67045
loss: 1.0730425119400024,grad_norm: 0.9999991252924532, iteration: 67046
loss: 0.9699468612670898,grad_norm: 0.895160589313379, iteration: 67047
loss: 1.0258350372314453,grad_norm: 0.8402953370578304, iteration: 67048
loss: 0.9613369107246399,grad_norm: 0.9999991323088208, iteration: 67049
loss: 0.9997379779815674,grad_norm: 0.9999991334662558, iteration: 67050
loss: 1.003657579421997,grad_norm: 0.999999519398751, iteration: 67051
loss: 1.0195749998092651,grad_norm: 0.9906878541052855, iteration: 67052
loss: 1.0607457160949707,grad_norm: 0.999999785699219, iteration: 67053
loss: 0.9975342750549316,grad_norm: 0.9521716313262789, iteration: 67054
loss: 0.9605624675750732,grad_norm: 0.9999990812977364, iteration: 67055
loss: 0.9760876297950745,grad_norm: 0.986231520767635, iteration: 67056
loss: 1.0072444677352905,grad_norm: 0.8206780102347523, iteration: 67057
loss: 0.9953372478485107,grad_norm: 0.9243830225042563, iteration: 67058
loss: 1.0239986181259155,grad_norm: 0.9999990484754613, iteration: 67059
loss: 1.0458060503005981,grad_norm: 0.9999990545402051, iteration: 67060
loss: 0.9730777144432068,grad_norm: 0.977241510716852, iteration: 67061
loss: 1.0072730779647827,grad_norm: 0.9999992574653527, iteration: 67062
loss: 1.009803295135498,grad_norm: 0.91874394400377, iteration: 67063
loss: 0.9835203289985657,grad_norm: 0.9949483719218696, iteration: 67064
loss: 0.9796432256698608,grad_norm: 0.9382073876655309, iteration: 67065
loss: 0.9592322707176208,grad_norm: 0.9999991270585719, iteration: 67066
loss: 0.9990420937538147,grad_norm: 0.9999990153340061, iteration: 67067
loss: 1.005751371383667,grad_norm: 0.9713460728883901, iteration: 67068
loss: 1.0393098592758179,grad_norm: 0.9999993402600866, iteration: 67069
loss: 0.9781983494758606,grad_norm: 0.9685372605694158, iteration: 67070
loss: 1.000749111175537,grad_norm: 0.9999992140118913, iteration: 67071
loss: 1.0221956968307495,grad_norm: 0.9999990278891262, iteration: 67072
loss: 1.0194542407989502,grad_norm: 0.8474887984319579, iteration: 67073
loss: 1.0390411615371704,grad_norm: 0.9999992523826974, iteration: 67074
loss: 1.011353611946106,grad_norm: 0.999999122811945, iteration: 67075
loss: 1.0554161071777344,grad_norm: 0.9999990891575731, iteration: 67076
loss: 0.9932506680488586,grad_norm: 0.9999989681817808, iteration: 67077
loss: 1.002386212348938,grad_norm: 0.9980948105721129, iteration: 67078
loss: 0.9969184398651123,grad_norm: 0.9999993760366217, iteration: 67079
loss: 1.0144827365875244,grad_norm: 0.9999991524758616, iteration: 67080
loss: 0.9988827109336853,grad_norm: 0.9785151847492269, iteration: 67081
loss: 0.9796550869941711,grad_norm: 0.9999991024209314, iteration: 67082
loss: 1.000235915184021,grad_norm: 0.9775248651412527, iteration: 67083
loss: 1.0211924314498901,grad_norm: 0.9999991931529684, iteration: 67084
loss: 1.0080927610397339,grad_norm: 0.9999991934087757, iteration: 67085
loss: 0.9799709320068359,grad_norm: 0.9999990630058944, iteration: 67086
loss: 1.0046817064285278,grad_norm: 0.9999990596215271, iteration: 67087
loss: 0.9946807622909546,grad_norm: 0.9483957249062016, iteration: 67088
loss: 1.0201294422149658,grad_norm: 0.999999045827922, iteration: 67089
loss: 1.0714308023452759,grad_norm: 0.999999818424496, iteration: 67090
loss: 0.9467167258262634,grad_norm: 0.9999991392281263, iteration: 67091
loss: 0.998615562915802,grad_norm: 0.999999179395237, iteration: 67092
loss: 1.0067979097366333,grad_norm: 0.9999991825311041, iteration: 67093
loss: 0.9706896543502808,grad_norm: 0.8941550706122309, iteration: 67094
loss: 0.997053861618042,grad_norm: 0.9999991548132835, iteration: 67095
loss: 0.9682115912437439,grad_norm: 0.9999991723543162, iteration: 67096
loss: 1.026187777519226,grad_norm: 0.9904577850163924, iteration: 67097
loss: 0.9821619987487793,grad_norm: 0.999999027314643, iteration: 67098
loss: 1.0043479204177856,grad_norm: 0.915209887659128, iteration: 67099
loss: 0.9889050126075745,grad_norm: 0.999999302730738, iteration: 67100
loss: 0.9987506866455078,grad_norm: 0.9170782341088027, iteration: 67101
loss: 0.9990030527114868,grad_norm: 0.9999991347289255, iteration: 67102
loss: 0.9927520751953125,grad_norm: 0.8931119749560209, iteration: 67103
loss: 0.9959969520568848,grad_norm: 0.9999991140500324, iteration: 67104
loss: 1.0280189514160156,grad_norm: 0.9999992143806808, iteration: 67105
loss: 1.038318395614624,grad_norm: 0.9999990884850071, iteration: 67106
loss: 0.9957752227783203,grad_norm: 0.9999989968652683, iteration: 67107
loss: 0.9489295482635498,grad_norm: 0.9999992890993582, iteration: 67108
loss: 0.9892701506614685,grad_norm: 0.9546959683512599, iteration: 67109
loss: 1.0468952655792236,grad_norm: 0.9519093601349717, iteration: 67110
loss: 0.9921849966049194,grad_norm: 0.9780358107953491, iteration: 67111
loss: 1.0177918672561646,grad_norm: 0.9999993273672918, iteration: 67112
loss: 1.0046714544296265,grad_norm: 0.9999991231424868, iteration: 67113
loss: 0.9991922378540039,grad_norm: 0.999999132345299, iteration: 67114
loss: 0.9833254814147949,grad_norm: 0.9999990838695666, iteration: 67115
loss: 1.0115470886230469,grad_norm: 0.9999989896439923, iteration: 67116
loss: 1.000489354133606,grad_norm: 0.9999992020089751, iteration: 67117
loss: 0.9924362301826477,grad_norm: 0.9999989530351698, iteration: 67118
loss: 1.003792643547058,grad_norm: 0.9999989268552351, iteration: 67119
loss: 1.0132898092269897,grad_norm: 0.9999989202821149, iteration: 67120
loss: 1.045876145362854,grad_norm: 0.9999995458356786, iteration: 67121
loss: 1.0091640949249268,grad_norm: 0.9999990880631813, iteration: 67122
loss: 0.9978113174438477,grad_norm: 0.9999990637655561, iteration: 67123
loss: 0.9792795777320862,grad_norm: 0.9999989675810985, iteration: 67124
loss: 1.028637170791626,grad_norm: 0.9610552241106793, iteration: 67125
loss: 0.9692839980125427,grad_norm: 0.9509412838782897, iteration: 67126
loss: 0.988427996635437,grad_norm: 0.989457252956394, iteration: 67127
loss: 0.9660235047340393,grad_norm: 0.8909738165389584, iteration: 67128
loss: 0.9501489400863647,grad_norm: 0.9999992260888658, iteration: 67129
loss: 1.0264862775802612,grad_norm: 0.9999991768017915, iteration: 67130
loss: 0.9790871739387512,grad_norm: 0.9461051762517272, iteration: 67131
loss: 0.9864212870597839,grad_norm: 0.9878454062318199, iteration: 67132
loss: 0.9847483038902283,grad_norm: 0.9161690503348416, iteration: 67133
loss: 0.9819902777671814,grad_norm: 0.9999991086643947, iteration: 67134
loss: 1.0009359121322632,grad_norm: 0.9999991369581517, iteration: 67135
loss: 1.0237337350845337,grad_norm: 0.9999991881096183, iteration: 67136
loss: 0.9939119219779968,grad_norm: 0.9999991578856069, iteration: 67137
loss: 0.9892813563346863,grad_norm: 0.9999991576291902, iteration: 67138
loss: 1.0192209482192993,grad_norm: 0.9999990273902276, iteration: 67139
loss: 0.9757717847824097,grad_norm: 0.9849717391264787, iteration: 67140
loss: 0.9900382161140442,grad_norm: 0.9999990323784119, iteration: 67141
loss: 1.0140913724899292,grad_norm: 0.9999991747790892, iteration: 67142
loss: 1.0118188858032227,grad_norm: 0.9999991051490094, iteration: 67143
loss: 0.9696324467658997,grad_norm: 0.9941806605199426, iteration: 67144
loss: 1.0318043231964111,grad_norm: 0.999999012087814, iteration: 67145
loss: 1.0112574100494385,grad_norm: 0.99999922207934, iteration: 67146
loss: 1.0165811777114868,grad_norm: 0.9999993595628306, iteration: 67147
loss: 0.996145486831665,grad_norm: 0.9577953764304683, iteration: 67148
loss: 0.9641342163085938,grad_norm: 0.9082505093564167, iteration: 67149
loss: 0.9581724405288696,grad_norm: 0.9999992850480645, iteration: 67150
loss: 0.9687879681587219,grad_norm: 0.999999222842082, iteration: 67151
loss: 1.0318912267684937,grad_norm: 0.9999989103185035, iteration: 67152
loss: 1.0166441202163696,grad_norm: 0.999998991258227, iteration: 67153
loss: 1.0312178134918213,grad_norm: 0.9941349130380116, iteration: 67154
loss: 1.0077472925186157,grad_norm: 0.969620782340458, iteration: 67155
loss: 0.9702264666557312,grad_norm: 0.9303353426508101, iteration: 67156
loss: 1.0095765590667725,grad_norm: 0.999999108019135, iteration: 67157
loss: 1.0013532638549805,grad_norm: 0.9616999413979039, iteration: 67158
loss: 0.9646072387695312,grad_norm: 0.9999993476103303, iteration: 67159
loss: 0.9861564040184021,grad_norm: 0.9999989583518432, iteration: 67160
loss: 1.0684075355529785,grad_norm: 0.9999990587078057, iteration: 67161
loss: 0.9777531027793884,grad_norm: 0.9226209015642429, iteration: 67162
loss: 1.0476830005645752,grad_norm: 0.9999994698819586, iteration: 67163
loss: 1.0011874437332153,grad_norm: 0.9999991359085446, iteration: 67164
loss: 1.001531958580017,grad_norm: 0.9565385119404027, iteration: 67165
loss: 0.9345846176147461,grad_norm: 0.9380013650036985, iteration: 67166
loss: 1.0029137134552002,grad_norm: 0.8936752665840678, iteration: 67167
loss: 1.0111720561981201,grad_norm: 0.9999996483696428, iteration: 67168
loss: 0.991233229637146,grad_norm: 0.9999988654887274, iteration: 67169
loss: 0.9559207558631897,grad_norm: 0.9999991646743471, iteration: 67170
loss: 0.9954507350921631,grad_norm: 0.8807996736984073, iteration: 67171
loss: 0.9847990274429321,grad_norm: 0.958982311327969, iteration: 67172
loss: 1.023476004600525,grad_norm: 0.9999991959652055, iteration: 67173
loss: 0.9965387582778931,grad_norm: 0.9962328414851968, iteration: 67174
loss: 1.0072216987609863,grad_norm: 0.9999990148862884, iteration: 67175
loss: 0.952263355255127,grad_norm: 0.9329801939891682, iteration: 67176
loss: 0.9708966016769409,grad_norm: 0.9999990201482828, iteration: 67177
loss: 1.0053050518035889,grad_norm: 0.9695027966340675, iteration: 67178
loss: 1.0018001794815063,grad_norm: 0.9430981285824919, iteration: 67179
loss: 0.992081880569458,grad_norm: 0.9879205542499295, iteration: 67180
loss: 0.9646901488304138,grad_norm: 0.9786759996514546, iteration: 67181
loss: 1.00870943069458,grad_norm: 0.9999991320028128, iteration: 67182
loss: 1.0806341171264648,grad_norm: 0.9999998617519552, iteration: 67183
loss: 1.0182960033416748,grad_norm: 0.9091686631349708, iteration: 67184
loss: 0.9908384680747986,grad_norm: 0.9812074922613723, iteration: 67185
loss: 1.0028339624404907,grad_norm: 0.9999994006601245, iteration: 67186
loss: 0.9865462183952332,grad_norm: 0.999999240805061, iteration: 67187
loss: 1.0279734134674072,grad_norm: 0.9999990944990559, iteration: 67188
loss: 0.9945409297943115,grad_norm: 0.9999991776491418, iteration: 67189
loss: 0.987469494342804,grad_norm: 0.8540523127447972, iteration: 67190
loss: 0.9975055456161499,grad_norm: 0.9999990546587686, iteration: 67191
loss: 1.015634298324585,grad_norm: 0.9999992336645174, iteration: 67192
loss: 1.0278668403625488,grad_norm: 0.9999990408626529, iteration: 67193
loss: 1.0059013366699219,grad_norm: 0.9999991328862973, iteration: 67194
loss: 0.9784523248672485,grad_norm: 0.9999991319547109, iteration: 67195
loss: 1.031366229057312,grad_norm: 0.9999991130569336, iteration: 67196
loss: 0.9639740586280823,grad_norm: 0.9999989444575141, iteration: 67197
loss: 1.0328004360198975,grad_norm: 0.9999993595143734, iteration: 67198
loss: 1.0355814695358276,grad_norm: 0.9999994865800388, iteration: 67199
loss: 1.0032432079315186,grad_norm: 0.9999991574855015, iteration: 67200
loss: 1.007150650024414,grad_norm: 0.9999993021588633, iteration: 67201
loss: 1.0156033039093018,grad_norm: 0.9999993073854161, iteration: 67202
loss: 1.0118211507797241,grad_norm: 0.999999144915254, iteration: 67203
loss: 0.9739056825637817,grad_norm: 0.9767623767701681, iteration: 67204
loss: 0.9622790217399597,grad_norm: 0.9999992840384944, iteration: 67205
loss: 1.014482021331787,grad_norm: 0.9999990061324544, iteration: 67206
loss: 0.9787335395812988,grad_norm: 0.999999185330836, iteration: 67207
loss: 1.007839560508728,grad_norm: 0.9999995262515304, iteration: 67208
loss: 0.9554136395454407,grad_norm: 0.9999991846696445, iteration: 67209
loss: 0.9446929693222046,grad_norm: 0.9999994212250478, iteration: 67210
loss: 1.0056248903274536,grad_norm: 0.9999991335412708, iteration: 67211
loss: 1.0071624517440796,grad_norm: 0.969431865898278, iteration: 67212
loss: 1.0565232038497925,grad_norm: 0.9999996732781545, iteration: 67213
loss: 0.9849575161933899,grad_norm: 0.9999992395157121, iteration: 67214
loss: 0.9730553030967712,grad_norm: 0.9999992261825352, iteration: 67215
loss: 1.0027860403060913,grad_norm: 0.905815777549734, iteration: 67216
loss: 1.1171202659606934,grad_norm: 0.9999992751911104, iteration: 67217
loss: 1.0313469171524048,grad_norm: 0.9158084372083718, iteration: 67218
loss: 1.0420037508010864,grad_norm: 0.9216544825478933, iteration: 67219
loss: 0.974257230758667,grad_norm: 0.9999991007809209, iteration: 67220
loss: 1.48878014087677,grad_norm: 0.9999995533318481, iteration: 67221
loss: 1.0175857543945312,grad_norm: 0.9671724210610617, iteration: 67222
loss: 0.9911075234413147,grad_norm: 0.9999990376700666, iteration: 67223
loss: 0.9848496913909912,grad_norm: 0.9686135158532523, iteration: 67224
loss: 1.2931320667266846,grad_norm: 0.9999998132475683, iteration: 67225
loss: 0.9874874353408813,grad_norm: 0.99999899838573, iteration: 67226
loss: 1.0208834409713745,grad_norm: 0.9999993709242332, iteration: 67227
loss: 0.975527286529541,grad_norm: 0.9877436989108537, iteration: 67228
loss: 1.0451799631118774,grad_norm: 0.9999990753133859, iteration: 67229
loss: 1.006713628768921,grad_norm: 0.9999991047728991, iteration: 67230
loss: 0.9934403300285339,grad_norm: 0.9999992241935441, iteration: 67231
loss: 1.0431346893310547,grad_norm: 0.9999989583619744, iteration: 67232
loss: 1.036907434463501,grad_norm: 0.9999990117967762, iteration: 67233
loss: 0.9723868370056152,grad_norm: 0.9999992134866693, iteration: 67234
loss: 0.9828659296035767,grad_norm: 0.9999990674172373, iteration: 67235
loss: 1.0330713987350464,grad_norm: 0.9999990921494121, iteration: 67236
loss: 1.025838017463684,grad_norm: 0.8774641827685515, iteration: 67237
loss: 0.9612503051757812,grad_norm: 0.9999992800010981, iteration: 67238
loss: 1.036311149597168,grad_norm: 0.9846062367160598, iteration: 67239
loss: 1.0076580047607422,grad_norm: 0.99999901619358, iteration: 67240
loss: 0.9943671226501465,grad_norm: 0.9999990307069493, iteration: 67241
loss: 0.9897775053977966,grad_norm: 0.9999992529670086, iteration: 67242
loss: 0.9902405738830566,grad_norm: 0.9999991049355691, iteration: 67243
loss: 1.018540620803833,grad_norm: 0.9999993201437952, iteration: 67244
loss: 1.015420913696289,grad_norm: 0.9999990556668191, iteration: 67245
loss: 0.9959582686424255,grad_norm: 0.8259399141199958, iteration: 67246
loss: 0.9892908930778503,grad_norm: 0.999999048154863, iteration: 67247
loss: 0.9944618344306946,grad_norm: 0.9999990266730557, iteration: 67248
loss: 1.014283537864685,grad_norm: 0.9999991055473239, iteration: 67249
loss: 1.0714243650436401,grad_norm: 0.999999659114799, iteration: 67250
loss: 1.057040810585022,grad_norm: 0.9999995429119816, iteration: 67251
loss: 0.9997373223304749,grad_norm: 0.9999990884245842, iteration: 67252
loss: 0.999146044254303,grad_norm: 0.8627670075203185, iteration: 67253
loss: 0.9888424277305603,grad_norm: 0.9829597280409627, iteration: 67254
loss: 1.0243891477584839,grad_norm: 0.999999198317903, iteration: 67255
loss: 1.0365240573883057,grad_norm: 0.999999148125477, iteration: 67256
loss: 1.0000935792922974,grad_norm: 0.9999991769799699, iteration: 67257
loss: 1.0215544700622559,grad_norm: 0.9999992135764693, iteration: 67258
loss: 1.0344505310058594,grad_norm: 0.9804297982080766, iteration: 67259
loss: 0.9827108383178711,grad_norm: 0.9999991677107598, iteration: 67260
loss: 1.0082776546478271,grad_norm: 0.9999990178045317, iteration: 67261
loss: 1.096388339996338,grad_norm: 0.9999998425058576, iteration: 67262
loss: 0.9752070903778076,grad_norm: 0.9999990868971965, iteration: 67263
loss: 0.9878188371658325,grad_norm: 0.9683597308547217, iteration: 67264
loss: 1.0085155963897705,grad_norm: 0.9999992015769462, iteration: 67265
loss: 1.042572021484375,grad_norm: 0.9999991981774211, iteration: 67266
loss: 1.0250952243804932,grad_norm: 0.9999995080295112, iteration: 67267
loss: 0.9979779720306396,grad_norm: 0.9375773613859067, iteration: 67268
loss: 0.9754302501678467,grad_norm: 0.9999990593285573, iteration: 67269
loss: 1.0199872255325317,grad_norm: 0.8969071538950847, iteration: 67270
loss: 1.0286463499069214,grad_norm: 0.9999991427093794, iteration: 67271
loss: 0.9894149899482727,grad_norm: 0.9999997452116605, iteration: 67272
loss: 1.0276356935501099,grad_norm: 0.999999157283681, iteration: 67273
loss: 1.027176856994629,grad_norm: 0.9999993778464276, iteration: 67274
loss: 1.0306310653686523,grad_norm: 0.9031932364645349, iteration: 67275
loss: 1.005338191986084,grad_norm: 0.9999991735868625, iteration: 67276
loss: 0.997884213924408,grad_norm: 0.9734247123176922, iteration: 67277
loss: 1.0015277862548828,grad_norm: 0.9999991531234537, iteration: 67278
loss: 1.0056668519973755,grad_norm: 0.9776934703297618, iteration: 67279
loss: 0.9975382089614868,grad_norm: 0.9999989454326317, iteration: 67280
loss: 1.00765061378479,grad_norm: 0.9999995746629599, iteration: 67281
loss: 0.9792543649673462,grad_norm: 0.9999991476039835, iteration: 67282
loss: 0.9901381731033325,grad_norm: 0.9999991724920142, iteration: 67283
loss: 0.9870892763137817,grad_norm: 0.999999038514498, iteration: 67284
loss: 0.9973313212394714,grad_norm: 0.956927859194656, iteration: 67285
loss: 1.0190736055374146,grad_norm: 0.9999992759632369, iteration: 67286
loss: 1.042297601699829,grad_norm: 0.9999993207848291, iteration: 67287
loss: 0.98152756690979,grad_norm: 0.9999991579211007, iteration: 67288
loss: 0.9741898775100708,grad_norm: 0.9999993137339498, iteration: 67289
loss: 1.0219449996948242,grad_norm: 0.9999991860777752, iteration: 67290
loss: 1.0436729192733765,grad_norm: 0.9999992235844769, iteration: 67291
loss: 1.0051547288894653,grad_norm: 0.9999991959749958, iteration: 67292
loss: 1.025026798248291,grad_norm: 0.9999991403093198, iteration: 67293
loss: 0.9863944053649902,grad_norm: 0.9717822795969686, iteration: 67294
loss: 1.005879282951355,grad_norm: 0.9999991186637813, iteration: 67295
loss: 0.9959943294525146,grad_norm: 0.8697193478231021, iteration: 67296
loss: 0.994499683380127,grad_norm: 0.9999991933755967, iteration: 67297
loss: 1.0297298431396484,grad_norm: 0.9999992826713608, iteration: 67298
loss: 1.0510512590408325,grad_norm: 0.9999991283130459, iteration: 67299
loss: 0.9910882711410522,grad_norm: 0.9264403336123157, iteration: 67300
loss: 1.0190839767456055,grad_norm: 0.9999998266674627, iteration: 67301
loss: 0.983457088470459,grad_norm: 0.9999991776937238, iteration: 67302
loss: 0.9883409142494202,grad_norm: 0.9638169729897474, iteration: 67303
loss: 0.980080246925354,grad_norm: 0.999999278038519, iteration: 67304
loss: 0.9807852506637573,grad_norm: 0.9999990025951269, iteration: 67305
loss: 0.9996786713600159,grad_norm: 0.9999992164925647, iteration: 67306
loss: 0.9997000098228455,grad_norm: 0.9365225670187797, iteration: 67307
loss: 0.9903172254562378,grad_norm: 0.9999991254504134, iteration: 67308
loss: 1.001414179801941,grad_norm: 0.9999990832356287, iteration: 67309
loss: 1.019466519355774,grad_norm: 0.9246491745945251, iteration: 67310
loss: 0.9664227962493896,grad_norm: 0.9999991350725341, iteration: 67311
loss: 0.9780383110046387,grad_norm: 0.9618483592258761, iteration: 67312
loss: 1.0367801189422607,grad_norm: 0.8606189671499229, iteration: 67313
loss: 0.9834306836128235,grad_norm: 0.8413738243161639, iteration: 67314
loss: 1.0050368309020996,grad_norm: 0.9627013965778399, iteration: 67315
loss: 0.9797403812408447,grad_norm: 0.9999993822280284, iteration: 67316
loss: 0.9481509923934937,grad_norm: 0.9312883431594223, iteration: 67317
loss: 0.9920125007629395,grad_norm: 0.9711666124997639, iteration: 67318
loss: 1.0060254335403442,grad_norm: 0.9999991298256551, iteration: 67319
loss: 1.0154603719711304,grad_norm: 0.999999128148249, iteration: 67320
loss: 1.0264116525650024,grad_norm: 0.9999989130227528, iteration: 67321
loss: 1.006247878074646,grad_norm: 0.8397036381200519, iteration: 67322
loss: 1.0038695335388184,grad_norm: 0.8792598563024319, iteration: 67323
loss: 0.9782075881958008,grad_norm: 0.9999994631629455, iteration: 67324
loss: 0.9722320437431335,grad_norm: 0.9418173750336881, iteration: 67325
loss: 1.01386296749115,grad_norm: 0.9999990538213429, iteration: 67326
loss: 0.99828040599823,grad_norm: 0.9521001972164793, iteration: 67327
loss: 1.0065650939941406,grad_norm: 0.9999991259729616, iteration: 67328
loss: 1.035568356513977,grad_norm: 0.9999992310819286, iteration: 67329
loss: 0.9904624223709106,grad_norm: 0.9999993062028449, iteration: 67330
loss: 1.0244148969650269,grad_norm: 0.9999990228999709, iteration: 67331
loss: 0.9865092635154724,grad_norm: 0.9691342861939126, iteration: 67332
loss: 1.0069297552108765,grad_norm: 0.9999990526594478, iteration: 67333
loss: 1.0052086114883423,grad_norm: 0.9694299901347323, iteration: 67334
loss: 0.9750574231147766,grad_norm: 0.9999992632937971, iteration: 67335
loss: 0.9796852469444275,grad_norm: 0.9999991846053166, iteration: 67336
loss: 0.9936365485191345,grad_norm: 0.9512621752827135, iteration: 67337
loss: 1.00589919090271,grad_norm: 0.9954162879577506, iteration: 67338
loss: 1.0016937255859375,grad_norm: 0.973682982567537, iteration: 67339
loss: 1.0076547861099243,grad_norm: 0.9197269281849324, iteration: 67340
loss: 1.0160496234893799,grad_norm: 0.9999991776115543, iteration: 67341
loss: 0.9959531426429749,grad_norm: 0.8543477468122093, iteration: 67342
loss: 1.0294499397277832,grad_norm: 0.9941668002908653, iteration: 67343
loss: 0.9876971244812012,grad_norm: 0.9999991731692744, iteration: 67344
loss: 0.975190281867981,grad_norm: 0.9999990606306304, iteration: 67345
loss: 1.028992772102356,grad_norm: 0.9999992259420244, iteration: 67346
loss: 1.0224195718765259,grad_norm: 0.9999990199110701, iteration: 67347
loss: 1.0277587175369263,grad_norm: 0.8923270727284653, iteration: 67348
loss: 0.9777852892875671,grad_norm: 0.9999991106943802, iteration: 67349
loss: 1.0101908445358276,grad_norm: 0.9911282804031284, iteration: 67350
loss: 0.990447461605072,grad_norm: 0.8929944307572272, iteration: 67351
loss: 0.9889780282974243,grad_norm: 0.9999991115194966, iteration: 67352
loss: 0.9965502023696899,grad_norm: 0.9999990768422824, iteration: 67353
loss: 1.0128883123397827,grad_norm: 0.9999990111018395, iteration: 67354
loss: 0.9637899994850159,grad_norm: 0.8455098547971569, iteration: 67355
loss: 1.0539418458938599,grad_norm: 0.9679947102074659, iteration: 67356
loss: 1.0401124954223633,grad_norm: 0.9999992958922529, iteration: 67357
loss: 0.9913135170936584,grad_norm: 0.9999989862819405, iteration: 67358
loss: 1.0491427183151245,grad_norm: 0.9999997240045769, iteration: 67359
loss: 0.9969292283058167,grad_norm: 0.9915143275791453, iteration: 67360
loss: 1.0132906436920166,grad_norm: 0.9999990664520361, iteration: 67361
loss: 0.975480854511261,grad_norm: 0.9999991445869346, iteration: 67362
loss: 1.0219498872756958,grad_norm: 0.9189956398170772, iteration: 67363
loss: 1.0082802772521973,grad_norm: 0.8696522382218136, iteration: 67364
loss: 1.024951696395874,grad_norm: 0.9999990250978145, iteration: 67365
loss: 0.9918336868286133,grad_norm: 0.9999990530241983, iteration: 67366
loss: 1.0014959573745728,grad_norm: 0.8553329309464748, iteration: 67367
loss: 0.9962577819824219,grad_norm: 0.9999995791210001, iteration: 67368
loss: 0.9833990335464478,grad_norm: 0.874015828277951, iteration: 67369
loss: 0.9914939999580383,grad_norm: 0.9999992800165306, iteration: 67370
loss: 0.9944487810134888,grad_norm: 0.9718830078357826, iteration: 67371
loss: 1.0199528932571411,grad_norm: 0.9999989683928531, iteration: 67372
loss: 0.9857181310653687,grad_norm: 0.9749787118872574, iteration: 67373
loss: 0.9913164377212524,grad_norm: 0.9999991238631635, iteration: 67374
loss: 1.0294506549835205,grad_norm: 0.9999990257822826, iteration: 67375
loss: 1.0063804388046265,grad_norm: 0.9150752676097019, iteration: 67376
loss: 0.9850912094116211,grad_norm: 0.9999991217474133, iteration: 67377
loss: 0.9850916862487793,grad_norm: 0.952502821121223, iteration: 67378
loss: 0.9636089205741882,grad_norm: 0.9999990929579, iteration: 67379
loss: 1.0154439210891724,grad_norm: 0.9778651738923455, iteration: 67380
loss: 0.9900071024894714,grad_norm: 0.9999991517217789, iteration: 67381
loss: 0.9877778887748718,grad_norm: 0.9999991099624382, iteration: 67382
loss: 1.0460405349731445,grad_norm: 0.9999990551340336, iteration: 67383
loss: 0.9917330145835876,grad_norm: 0.9999990574214264, iteration: 67384
loss: 0.978440523147583,grad_norm: 0.9999990700522058, iteration: 67385
loss: 1.0077499151229858,grad_norm: 0.9686535510661157, iteration: 67386
loss: 0.9764467477798462,grad_norm: 0.999999116603302, iteration: 67387
loss: 0.9898273348808289,grad_norm: 0.8718082469549987, iteration: 67388
loss: 0.9990205764770508,grad_norm: 0.9999991048301929, iteration: 67389
loss: 0.988839864730835,grad_norm: 0.9999990668501086, iteration: 67390
loss: 1.0054035186767578,grad_norm: 0.9999991564489367, iteration: 67391
loss: 0.995917797088623,grad_norm: 0.8997137515868627, iteration: 67392
loss: 0.9948988556861877,grad_norm: 0.999999068463394, iteration: 67393
loss: 1.0141432285308838,grad_norm: 0.8990748700296303, iteration: 67394
loss: 0.9768552184104919,grad_norm: 0.8538974635734451, iteration: 67395
loss: 0.9976726770401001,grad_norm: 0.9218632467787299, iteration: 67396
loss: 1.0125014781951904,grad_norm: 0.9111291749631996, iteration: 67397
loss: 0.9702019095420837,grad_norm: 0.9491766255551919, iteration: 67398
loss: 0.9850643873214722,grad_norm: 0.9259075711886391, iteration: 67399
loss: 1.0246362686157227,grad_norm: 0.978204386523222, iteration: 67400
loss: 1.0183626413345337,grad_norm: 0.9999991832804576, iteration: 67401
loss: 0.9862485527992249,grad_norm: 0.8399479792691084, iteration: 67402
loss: 1.0195692777633667,grad_norm: 0.9999990577149549, iteration: 67403
loss: 1.0533547401428223,grad_norm: 0.998645974500538, iteration: 67404
loss: 0.9812440872192383,grad_norm: 0.9999991295119401, iteration: 67405
loss: 0.9665437340736389,grad_norm: 0.9202235089042512, iteration: 67406
loss: 1.0272908210754395,grad_norm: 0.9562756759914457, iteration: 67407
loss: 1.0280272960662842,grad_norm: 0.9999991905430315, iteration: 67408
loss: 1.0217444896697998,grad_norm: 0.999999162485156, iteration: 67409
loss: 0.9764633774757385,grad_norm: 0.9963774400355837, iteration: 67410
loss: 0.9851509928703308,grad_norm: 0.999999167626536, iteration: 67411
loss: 0.9874033331871033,grad_norm: 0.999999240031336, iteration: 67412
loss: 0.9984488487243652,grad_norm: 0.9999990529968092, iteration: 67413
loss: 0.9394076466560364,grad_norm: 0.9506907610301422, iteration: 67414
loss: 1.0009260177612305,grad_norm: 0.9999991019773163, iteration: 67415
loss: 1.0252175331115723,grad_norm: 0.9999999570896436, iteration: 67416
loss: 0.9718388319015503,grad_norm: 0.9999993231952434, iteration: 67417
loss: 1.019335389137268,grad_norm: 0.9999993444249243, iteration: 67418
loss: 1.0086370706558228,grad_norm: 0.9999993680428797, iteration: 67419
loss: 1.0396645069122314,grad_norm: 0.9999995052349272, iteration: 67420
loss: 0.9967461228370667,grad_norm: 0.9093734614948696, iteration: 67421
loss: 0.9799985885620117,grad_norm: 0.8248371352247768, iteration: 67422
loss: 0.9950211048126221,grad_norm: 0.9999990824698366, iteration: 67423
loss: 1.0036958456039429,grad_norm: 0.9999991167474649, iteration: 67424
loss: 1.0073988437652588,grad_norm: 0.9999991339414808, iteration: 67425
loss: 1.014958381652832,grad_norm: 0.9999991409375676, iteration: 67426
loss: 0.9940232038497925,grad_norm: 0.9999991715385627, iteration: 67427
loss: 1.0837892293930054,grad_norm: 0.9999997918166187, iteration: 67428
loss: 0.9816953539848328,grad_norm: 0.9999989926411086, iteration: 67429
loss: 0.9754700064659119,grad_norm: 0.9165421817467402, iteration: 67430
loss: 0.9546657800674438,grad_norm: 0.9999991258720504, iteration: 67431
loss: 1.0182112455368042,grad_norm: 0.9229804460000712, iteration: 67432
loss: 1.0467067956924438,grad_norm: 0.9999993217674229, iteration: 67433
loss: 0.9787173271179199,grad_norm: 0.9999990965538949, iteration: 67434
loss: 1.0279022455215454,grad_norm: 0.9999991022105681, iteration: 67435
loss: 0.9816852807998657,grad_norm: 0.9999991291604177, iteration: 67436
loss: 1.0129510164260864,grad_norm: 0.9999992203014074, iteration: 67437
loss: 1.0053973197937012,grad_norm: 0.999999148579075, iteration: 67438
loss: 0.9821475148200989,grad_norm: 0.98811287580089, iteration: 67439
loss: 0.9840912818908691,grad_norm: 0.9789196444047132, iteration: 67440
loss: 0.9728410840034485,grad_norm: 0.8985838910199758, iteration: 67441
loss: 0.9587363004684448,grad_norm: 0.9581205283686516, iteration: 67442
loss: 0.9858788847923279,grad_norm: 0.9999990297222872, iteration: 67443
loss: 0.9819865822792053,grad_norm: 0.9999993316982285, iteration: 67444
loss: 1.0143282413482666,grad_norm: 0.9999989562553474, iteration: 67445
loss: 1.0059670209884644,grad_norm: 0.9904965322857203, iteration: 67446
loss: 1.0081435441970825,grad_norm: 0.9354357500838223, iteration: 67447
loss: 1.0019632577896118,grad_norm: 0.9999991546305542, iteration: 67448
loss: 1.0111758708953857,grad_norm: 0.8798796151613506, iteration: 67449
loss: 1.0357545614242554,grad_norm: 0.9999999208245557, iteration: 67450
loss: 0.991030752658844,grad_norm: 0.9999989979957077, iteration: 67451
loss: 1.0081872940063477,grad_norm: 0.9999991736089953, iteration: 67452
loss: 1.0005766153335571,grad_norm: 0.9213079267215333, iteration: 67453
loss: 0.9592251181602478,grad_norm: 0.9789539902454768, iteration: 67454
loss: 1.002577304840088,grad_norm: 0.9378854274022719, iteration: 67455
loss: 1.0212740898132324,grad_norm: 0.9999990953588703, iteration: 67456
loss: 0.9547474980354309,grad_norm: 0.9999990325524933, iteration: 67457
loss: 0.9984870553016663,grad_norm: 0.909345092265391, iteration: 67458
loss: 1.0223184823989868,grad_norm: 0.9999992421958083, iteration: 67459
loss: 1.0586131811141968,grad_norm: 0.9999990991974732, iteration: 67460
loss: 0.9470300674438477,grad_norm: 0.9999992795519916, iteration: 67461
loss: 1.003618597984314,grad_norm: 0.9999992657454373, iteration: 67462
loss: 1.0451834201812744,grad_norm: 0.9999993656622611, iteration: 67463
loss: 1.0628904104232788,grad_norm: 0.9999994235016472, iteration: 67464
loss: 1.0194495916366577,grad_norm: 0.9730283170041393, iteration: 67465
loss: 0.9930561184883118,grad_norm: 0.9999990412763269, iteration: 67466
loss: 0.979923665523529,grad_norm: 0.9876955338852413, iteration: 67467
loss: 0.9907973408699036,grad_norm: 0.9999990463922318, iteration: 67468
loss: 0.969115674495697,grad_norm: 0.9734973556685355, iteration: 67469
loss: 0.9765194058418274,grad_norm: 0.9182880709360944, iteration: 67470
loss: 1.00953209400177,grad_norm: 0.9500364258116719, iteration: 67471
loss: 0.9637589454650879,grad_norm: 0.999999041999556, iteration: 67472
loss: 1.0158414840698242,grad_norm: 0.9703528164144852, iteration: 67473
loss: 1.0044538974761963,grad_norm: 0.999999292126082, iteration: 67474
loss: 1.0072245597839355,grad_norm: 0.9711721856577178, iteration: 67475
loss: 0.9946446418762207,grad_norm: 0.9999990583727615, iteration: 67476
loss: 1.0224096775054932,grad_norm: 0.9999994639887545, iteration: 67477
loss: 1.018119215965271,grad_norm: 0.999999193448895, iteration: 67478
loss: 0.9985896348953247,grad_norm: 0.9742566898767675, iteration: 67479
loss: 0.9935540556907654,grad_norm: 0.999999142608011, iteration: 67480
loss: 1.0113558769226074,grad_norm: 0.9603731752829078, iteration: 67481
loss: 0.9997705817222595,grad_norm: 0.9284259384162389, iteration: 67482
loss: 1.0215041637420654,grad_norm: 0.9949944347621908, iteration: 67483
loss: 1.0228431224822998,grad_norm: 0.8949742923216921, iteration: 67484
loss: 1.0239536762237549,grad_norm: 0.9999995086918959, iteration: 67485
loss: 1.008702278137207,grad_norm: 0.9999992046149367, iteration: 67486
loss: 0.9884591102600098,grad_norm: 0.9999990956853303, iteration: 67487
loss: 1.042075514793396,grad_norm: 0.9134452424792774, iteration: 67488
loss: 1.003584861755371,grad_norm: 0.8666957196622236, iteration: 67489
loss: 0.9886825680732727,grad_norm: 0.999998989015456, iteration: 67490
loss: 1.0109639167785645,grad_norm: 0.8471591807653265, iteration: 67491
loss: 1.0172195434570312,grad_norm: 0.9999990671300215, iteration: 67492
loss: 1.0100228786468506,grad_norm: 0.9999991286106154, iteration: 67493
loss: 0.991493284702301,grad_norm: 0.9999990766602673, iteration: 67494
loss: 0.9993481636047363,grad_norm: 0.9999990712540998, iteration: 67495
loss: 1.0049034357070923,grad_norm: 0.9999990291122858, iteration: 67496
loss: 1.0604771375656128,grad_norm: 0.9377403241340249, iteration: 67497
loss: 1.0035420656204224,grad_norm: 0.9999990417627085, iteration: 67498
loss: 1.0358893871307373,grad_norm: 0.9999992619709706, iteration: 67499
loss: 1.018298864364624,grad_norm: 0.9180848740930649, iteration: 67500
loss: 0.9975041151046753,grad_norm: 0.9999990834715587, iteration: 67501
loss: 0.9576424360275269,grad_norm: 0.9999991043515826, iteration: 67502
loss: 0.9965968728065491,grad_norm: 0.9999990421545687, iteration: 67503
loss: 1.015012502670288,grad_norm: 0.9999990749272671, iteration: 67504
loss: 1.0125519037246704,grad_norm: 0.9999991335728374, iteration: 67505
loss: 0.9932657480239868,grad_norm: 0.9999998787387353, iteration: 67506
loss: 1.0289781093597412,grad_norm: 0.9341886567867366, iteration: 67507
loss: 0.9969046711921692,grad_norm: 0.9999992283113407, iteration: 67508
loss: 0.9606121182441711,grad_norm: 0.9999991854785608, iteration: 67509
loss: 0.9729292988777161,grad_norm: 0.965571018520371, iteration: 67510
loss: 1.011986494064331,grad_norm: 0.9174911720016377, iteration: 67511
loss: 0.9680643677711487,grad_norm: 0.9815484036892674, iteration: 67512
loss: 0.9905202388763428,grad_norm: 0.9457669372138391, iteration: 67513
loss: 0.9805594682693481,grad_norm: 0.9999992281662314, iteration: 67514
loss: 1.0136617422103882,grad_norm: 0.9999990616795819, iteration: 67515
loss: 0.9958120584487915,grad_norm: 0.9999992871070722, iteration: 67516
loss: 1.013940691947937,grad_norm: 0.9496455114445976, iteration: 67517
loss: 1.046518087387085,grad_norm: 0.9889177319652377, iteration: 67518
loss: 0.9878838062286377,grad_norm: 0.9996801122702235, iteration: 67519
loss: 1.0119853019714355,grad_norm: 0.999999028464565, iteration: 67520
loss: 1.0155525207519531,grad_norm: 0.9577014815603478, iteration: 67521
loss: 1.0560798645019531,grad_norm: 0.9999989293169725, iteration: 67522
loss: 1.0039942264556885,grad_norm: 0.9999990488858536, iteration: 67523
loss: 0.9939014315605164,grad_norm: 0.9575168689799832, iteration: 67524
loss: 0.9723759889602661,grad_norm: 0.9999992344005357, iteration: 67525
loss: 0.9928812980651855,grad_norm: 0.9999990331038171, iteration: 67526
loss: 1.0084269046783447,grad_norm: 0.9129168913953576, iteration: 67527
loss: 1.1036818027496338,grad_norm: 0.9999992696607404, iteration: 67528
loss: 1.0126869678497314,grad_norm: 0.9642596871802581, iteration: 67529
loss: 0.9943789839744568,grad_norm: 0.9656602968267147, iteration: 67530
loss: 1.0149028301239014,grad_norm: 0.9911639041959479, iteration: 67531
loss: 1.0025795698165894,grad_norm: 0.9454031121550451, iteration: 67532
loss: 1.0184916257858276,grad_norm: 0.923683745004454, iteration: 67533
loss: 1.0090268850326538,grad_norm: 0.8347203954833666, iteration: 67534
loss: 0.9833669662475586,grad_norm: 0.9412981000776592, iteration: 67535
loss: 1.0133000612258911,grad_norm: 0.8707491503689281, iteration: 67536
loss: 0.996258020401001,grad_norm: 0.8650676873090087, iteration: 67537
loss: 0.9767534136772156,grad_norm: 0.9771247588664259, iteration: 67538
loss: 0.9917111396789551,grad_norm: 0.9999990690417002, iteration: 67539
loss: 1.00625479221344,grad_norm: 0.9999990274842673, iteration: 67540
loss: 0.9874531626701355,grad_norm: 0.9999992178106535, iteration: 67541
loss: 0.9897791147232056,grad_norm: 0.7423186521289955, iteration: 67542
loss: 1.0053379535675049,grad_norm: 0.9999992341604271, iteration: 67543
loss: 0.9980102181434631,grad_norm: 0.9989614603202865, iteration: 67544
loss: 1.0291668176651,grad_norm: 0.9675051646273684, iteration: 67545
loss: 0.9882878065109253,grad_norm: 0.9999991577576445, iteration: 67546
loss: 0.9568824768066406,grad_norm: 0.9691536366092487, iteration: 67547
loss: 1.0471410751342773,grad_norm: 0.9999992708801984, iteration: 67548
loss: 0.9878344535827637,grad_norm: 0.9999990489872749, iteration: 67549
loss: 1.01636803150177,grad_norm: 0.9266942891599087, iteration: 67550
loss: 1.0027782917022705,grad_norm: 0.9999992724767054, iteration: 67551
loss: 0.9943153858184814,grad_norm: 0.9999990934812253, iteration: 67552
loss: 0.99724280834198,grad_norm: 0.9250753901516542, iteration: 67553
loss: 1.0105098485946655,grad_norm: 0.8696821928505355, iteration: 67554
loss: 1.0044203996658325,grad_norm: 0.9631188503495856, iteration: 67555
loss: 1.0167025327682495,grad_norm: 0.999999154416208, iteration: 67556
loss: 0.9820960760116577,grad_norm: 0.8624550038963918, iteration: 67557
loss: 0.9825493097305298,grad_norm: 0.9999991039497976, iteration: 67558
loss: 1.0215892791748047,grad_norm: 0.9999991358882663, iteration: 67559
loss: 0.9885003566741943,grad_norm: 0.9999993944960047, iteration: 67560
loss: 0.9812372922897339,grad_norm: 0.8843578733967594, iteration: 67561
loss: 0.9838329553604126,grad_norm: 0.9936718029657733, iteration: 67562
loss: 1.025386929512024,grad_norm: 0.9999991125914229, iteration: 67563
loss: 1.0006663799285889,grad_norm: 0.9999993164715666, iteration: 67564
loss: 1.0241321325302124,grad_norm: 0.9999990971561632, iteration: 67565
loss: 1.0178064107894897,grad_norm: 0.9999991612208082, iteration: 67566
loss: 1.0034122467041016,grad_norm: 0.8919100728653335, iteration: 67567
loss: 1.0188331604003906,grad_norm: 0.9999991985333819, iteration: 67568
loss: 0.9695324897766113,grad_norm: 0.9379993434055804, iteration: 67569
loss: 1.0135966539382935,grad_norm: 0.9918082360408994, iteration: 67570
loss: 1.0566564798355103,grad_norm: 0.9999996558260072, iteration: 67571
loss: 0.9928551316261292,grad_norm: 0.9238763622900155, iteration: 67572
loss: 1.0205353498458862,grad_norm: 0.9706377580640754, iteration: 67573
loss: 0.9591284990310669,grad_norm: 0.9999989441099962, iteration: 67574
loss: 0.9976488947868347,grad_norm: 0.7581517882784915, iteration: 67575
loss: 1.005913496017456,grad_norm: 0.9143351250237374, iteration: 67576
loss: 1.0067552328109741,grad_norm: 0.9999991417754479, iteration: 67577
loss: 1.0376033782958984,grad_norm: 0.9936798231175021, iteration: 67578
loss: 1.0224164724349976,grad_norm: 0.9999990085555637, iteration: 67579
loss: 0.9832104444503784,grad_norm: 0.8968991371294188, iteration: 67580
loss: 0.9881092309951782,grad_norm: 0.9999990761253666, iteration: 67581
loss: 1.0027821063995361,grad_norm: 0.9387246735905446, iteration: 67582
loss: 0.991256833076477,grad_norm: 0.9708552874322496, iteration: 67583
loss: 1.0160244703292847,grad_norm: 0.999999101396065, iteration: 67584
loss: 1.0391634702682495,grad_norm: 0.9999992060662908, iteration: 67585
loss: 1.0245388746261597,grad_norm: 0.8812485220099252, iteration: 67586
loss: 0.9951115250587463,grad_norm: 0.8481782569147783, iteration: 67587
loss: 1.0705881118774414,grad_norm: 0.9999990753216953, iteration: 67588
loss: 1.0217169523239136,grad_norm: 0.9999992201348822, iteration: 67589
loss: 0.9913005232810974,grad_norm: 0.9835934053865262, iteration: 67590
loss: 1.0122042894363403,grad_norm: 0.9011687918453182, iteration: 67591
loss: 0.9703800678253174,grad_norm: 0.9999990712522978, iteration: 67592
loss: 1.020314335823059,grad_norm: 0.9999991412545971, iteration: 67593
loss: 0.9941177368164062,grad_norm: 0.9999989907137556, iteration: 67594
loss: 0.9669874310493469,grad_norm: 0.9999991461949244, iteration: 67595
loss: 1.0070253610610962,grad_norm: 0.9999998847072128, iteration: 67596
loss: 0.9884026050567627,grad_norm: 0.9487066949213029, iteration: 67597
loss: 1.0305157899856567,grad_norm: 0.9999994333880664, iteration: 67598
loss: 1.0325554609298706,grad_norm: 0.9999991704762823, iteration: 67599
loss: 0.9601433873176575,grad_norm: 0.9999991720626784, iteration: 67600
loss: 0.9969832301139832,grad_norm: 0.9999991779707937, iteration: 67601
loss: 1.0231107473373413,grad_norm: 0.9999993892764991, iteration: 67602
loss: 1.0073761940002441,grad_norm: 0.9999990769329281, iteration: 67603
loss: 1.0257318019866943,grad_norm: 0.9999990549670746, iteration: 67604
loss: 0.9616833329200745,grad_norm: 0.9999992643217203, iteration: 67605
loss: 1.0130614042282104,grad_norm: 0.9999993353682097, iteration: 67606
loss: 0.9960731267929077,grad_norm: 0.999999385681108, iteration: 67607
loss: 1.0032458305358887,grad_norm: 0.9999990010020766, iteration: 67608
loss: 0.9515096545219421,grad_norm: 0.8535704725819394, iteration: 67609
loss: 0.9913631081581116,grad_norm: 0.9999990179923868, iteration: 67610
loss: 1.0031245946884155,grad_norm: 0.9738366243852637, iteration: 67611
loss: 1.0161339044570923,grad_norm: 0.9999991144243705, iteration: 67612
loss: 0.9920938611030579,grad_norm: 0.9521567877412089, iteration: 67613
loss: 0.991349995136261,grad_norm: 0.9999990099369225, iteration: 67614
loss: 0.9721671342849731,grad_norm: 0.924006106824592, iteration: 67615
loss: 0.9978272914886475,grad_norm: 0.9243632601894554, iteration: 67616
loss: 1.012535810470581,grad_norm: 0.9999989781692024, iteration: 67617
loss: 1.0072906017303467,grad_norm: 0.9999991960480642, iteration: 67618
loss: 0.9959768652915955,grad_norm: 0.8368585585578133, iteration: 67619
loss: 0.9878680109977722,grad_norm: 0.9341901959570158, iteration: 67620
loss: 1.0032068490982056,grad_norm: 0.9292421771447458, iteration: 67621
loss: 0.9891884922981262,grad_norm: 0.9798335198793499, iteration: 67622
loss: 0.9849606156349182,grad_norm: 0.9999991571621247, iteration: 67623
loss: 0.9652194976806641,grad_norm: 0.9999990896791626, iteration: 67624
loss: 0.9760503172874451,grad_norm: 0.9999991491958042, iteration: 67625
loss: 0.9978790283203125,grad_norm: 0.9999991701506834, iteration: 67626
loss: 1.0099377632141113,grad_norm: 0.9999990722850957, iteration: 67627
loss: 1.0099037885665894,grad_norm: 0.9999990212379345, iteration: 67628
loss: 1.0090675354003906,grad_norm: 0.9285239144069123, iteration: 67629
loss: 0.9827467203140259,grad_norm: 0.9999989625311358, iteration: 67630
loss: 0.9955350756645203,grad_norm: 0.9152812999618808, iteration: 67631
loss: 0.9869315028190613,grad_norm: 0.9612111468094474, iteration: 67632
loss: 1.0556503534317017,grad_norm: 0.9880860018491869, iteration: 67633
loss: 1.0026541948318481,grad_norm: 0.9999993022673157, iteration: 67634
loss: 1.0368460416793823,grad_norm: 0.9999990502594867, iteration: 67635
loss: 1.0521739721298218,grad_norm: 0.9999992125522758, iteration: 67636
loss: 1.007645845413208,grad_norm: 0.8439154482957167, iteration: 67637
loss: 0.93595951795578,grad_norm: 0.9999993760747615, iteration: 67638
loss: 0.9956026673316956,grad_norm: 0.9022249269400126, iteration: 67639
loss: 0.9911369681358337,grad_norm: 0.9999991369458208, iteration: 67640
loss: 1.062901258468628,grad_norm: 0.9999991074826372, iteration: 67641
loss: 1.0122041702270508,grad_norm: 0.916924475561594, iteration: 67642
loss: 1.0287660360336304,grad_norm: 0.9999990070967065, iteration: 67643
loss: 0.969001293182373,grad_norm: 0.9999991129495427, iteration: 67644
loss: 0.9967662692070007,grad_norm: 0.9999993199069648, iteration: 67645
loss: 1.034563422203064,grad_norm: 0.971650992472758, iteration: 67646
loss: 0.9909655451774597,grad_norm: 0.9999991441713053, iteration: 67647
loss: 1.02652108669281,grad_norm: 0.875367503761812, iteration: 67648
loss: 1.0181671380996704,grad_norm: 0.9999991965546308, iteration: 67649
loss: 1.0027272701263428,grad_norm: 0.9619154337174379, iteration: 67650
loss: 1.0085476636886597,grad_norm: 0.9999991844651356, iteration: 67651
loss: 0.9980796575546265,grad_norm: 0.9999990585160032, iteration: 67652
loss: 1.0097317695617676,grad_norm: 0.9999992816432215, iteration: 67653
loss: 0.9840878248214722,grad_norm: 0.9999989330482207, iteration: 67654
loss: 0.9855768084526062,grad_norm: 0.9095859281005163, iteration: 67655
loss: 1.0368390083312988,grad_norm: 0.9999992145651246, iteration: 67656
loss: 0.9600722193717957,grad_norm: 0.9169256215853622, iteration: 67657
loss: 1.0002440214157104,grad_norm: 0.9999991207912575, iteration: 67658
loss: 1.010573387145996,grad_norm: 0.9999991071421751, iteration: 67659
loss: 1.004805564880371,grad_norm: 0.9999990787982944, iteration: 67660
loss: 1.0284104347229004,grad_norm: 0.8821532594877656, iteration: 67661
loss: 1.008772850036621,grad_norm: 0.9999993321985284, iteration: 67662
loss: 0.9744406342506409,grad_norm: 0.9887571239348715, iteration: 67663
loss: 1.0113507509231567,grad_norm: 0.99999929383932, iteration: 67664
loss: 1.0111421346664429,grad_norm: 0.9999992416141635, iteration: 67665
loss: 0.9953233599662781,grad_norm: 0.9999990520516147, iteration: 67666
loss: 1.0022989511489868,grad_norm: 0.9999990386704656, iteration: 67667
loss: 0.9938459396362305,grad_norm: 0.9999991285816214, iteration: 67668
loss: 1.019942045211792,grad_norm: 0.9999990998259471, iteration: 67669
loss: 1.0079184770584106,grad_norm: 0.9999991556616487, iteration: 67670
loss: 1.0144895315170288,grad_norm: 0.9860173409413998, iteration: 67671
loss: 0.9944909811019897,grad_norm: 0.9746377114650525, iteration: 67672
loss: 1.0227526426315308,grad_norm: 0.9853263407539982, iteration: 67673
loss: 1.0085488557815552,grad_norm: 0.9999991924210083, iteration: 67674
loss: 0.9767885208129883,grad_norm: 0.9678133683533628, iteration: 67675
loss: 0.9934161901473999,grad_norm: 0.9999992680392721, iteration: 67676
loss: 1.0112254619598389,grad_norm: 0.9710323792517355, iteration: 67677
loss: 1.0126913785934448,grad_norm: 0.9999990895107888, iteration: 67678
loss: 0.9802576899528503,grad_norm: 0.9999991247521131, iteration: 67679
loss: 1.0128618478775024,grad_norm: 0.9999992345891121, iteration: 67680
loss: 0.9807225465774536,grad_norm: 0.9999991072655197, iteration: 67681
loss: 1.0094940662384033,grad_norm: 0.9999989913438232, iteration: 67682
loss: 1.0030256509780884,grad_norm: 0.9587685585671017, iteration: 67683
loss: 1.0071254968643188,grad_norm: 0.9999991491444814, iteration: 67684
loss: 0.9878409504890442,grad_norm: 0.9999992054794004, iteration: 67685
loss: 1.006905198097229,grad_norm: 0.9999990429148532, iteration: 67686
loss: 1.0107017755508423,grad_norm: 0.9999990815472746, iteration: 67687
loss: 1.0087512731552124,grad_norm: 0.913623094464394, iteration: 67688
loss: 0.9960051774978638,grad_norm: 0.7853505531934132, iteration: 67689
loss: 0.9823487997055054,grad_norm: 0.9769252451327367, iteration: 67690
loss: 0.9597625732421875,grad_norm: 0.9902498210417643, iteration: 67691
loss: 0.9968169927597046,grad_norm: 0.9577416935648083, iteration: 67692
loss: 0.9636878371238708,grad_norm: 0.8697495570586944, iteration: 67693
loss: 1.0055112838745117,grad_norm: 0.9999992014013455, iteration: 67694
loss: 0.9817414283752441,grad_norm: 0.9999994762593313, iteration: 67695
loss: 1.044033408164978,grad_norm: 0.9999992156070315, iteration: 67696
loss: 1.0033177137374878,grad_norm: 0.9999991071839229, iteration: 67697
loss: 0.9819939136505127,grad_norm: 0.9049826872826178, iteration: 67698
loss: 1.0110316276550293,grad_norm: 0.999998974008698, iteration: 67699
loss: 0.9872182011604309,grad_norm: 0.9667551994193715, iteration: 67700
loss: 0.9995908141136169,grad_norm: 0.9999991294499527, iteration: 67701
loss: 0.9780533909797668,grad_norm: 0.9667651669512982, iteration: 67702
loss: 0.9956720471382141,grad_norm: 0.9999991618674353, iteration: 67703
loss: 0.9795989990234375,grad_norm: 0.999999173559211, iteration: 67704
loss: 1.000900149345398,grad_norm: 0.9999991845419532, iteration: 67705
loss: 1.0097250938415527,grad_norm: 0.9586540881651565, iteration: 67706
loss: 1.0200837850570679,grad_norm: 0.894403104354725, iteration: 67707
loss: 1.0414155721664429,grad_norm: 0.9999992582582637, iteration: 67708
loss: 0.9815718531608582,grad_norm: 0.8841969587291807, iteration: 67709
loss: 0.9827108383178711,grad_norm: 0.9662272458400256, iteration: 67710
loss: 0.9986974000930786,grad_norm: 0.9999991092656295, iteration: 67711
loss: 1.0203567743301392,grad_norm: 0.9999989650186772, iteration: 67712
loss: 0.9773332476615906,grad_norm: 0.9999993091648549, iteration: 67713
loss: 0.9961257576942444,grad_norm: 0.9989893342128744, iteration: 67714
loss: 0.961957573890686,grad_norm: 0.9999991096024587, iteration: 67715
loss: 0.9894779324531555,grad_norm: 0.8360013179115051, iteration: 67716
loss: 1.0170156955718994,grad_norm: 0.9396156884969732, iteration: 67717
loss: 1.0019885301589966,grad_norm: 0.999999080564359, iteration: 67718
loss: 1.0100669860839844,grad_norm: 0.8631158691358598, iteration: 67719
loss: 1.0495517253875732,grad_norm: 0.9999990701843019, iteration: 67720
loss: 0.9975133538246155,grad_norm: 0.9999992020048023, iteration: 67721
loss: 1.008010983467102,grad_norm: 0.9999991943320962, iteration: 67722
loss: 1.009148120880127,grad_norm: 0.9559794287884419, iteration: 67723
loss: 1.012010931968689,grad_norm: 0.9999992892165104, iteration: 67724
loss: 1.009305477142334,grad_norm: 0.9999991265981159, iteration: 67725
loss: 1.014879584312439,grad_norm: 0.9999991353350591, iteration: 67726
loss: 1.0216271877288818,grad_norm: 0.9969562975391917, iteration: 67727
loss: 0.9851171374320984,grad_norm: 0.9999992200154518, iteration: 67728
loss: 1.02768075466156,grad_norm: 0.9999997270091863, iteration: 67729
loss: 0.9975950717926025,grad_norm: 0.9561325949339391, iteration: 67730
loss: 0.9947192072868347,grad_norm: 0.9999992071571955, iteration: 67731
loss: 1.0195480585098267,grad_norm: 0.9999990036488817, iteration: 67732
loss: 0.988814115524292,grad_norm: 0.9999990020406591, iteration: 67733
loss: 1.0026801824569702,grad_norm: 0.9836872936591531, iteration: 67734
loss: 0.9937031865119934,grad_norm: 0.9516639574091137, iteration: 67735
loss: 1.0216903686523438,grad_norm: 0.9999990317609935, iteration: 67736
loss: 1.0313751697540283,grad_norm: 0.8242235660964528, iteration: 67737
loss: 0.9815059304237366,grad_norm: 0.9999991815149634, iteration: 67738
loss: 0.9681429266929626,grad_norm: 0.9686702319823893, iteration: 67739
loss: 1.0434911251068115,grad_norm: 0.9999990247714482, iteration: 67740
loss: 0.9936297535896301,grad_norm: 0.9999990189383575, iteration: 67741
loss: 0.9895126819610596,grad_norm: 0.9999992258148175, iteration: 67742
loss: 0.969845712184906,grad_norm: 0.9999991613821329, iteration: 67743
loss: 0.9970730543136597,grad_norm: 0.9999992184284929, iteration: 67744
loss: 1.0030053853988647,grad_norm: 0.999999103900197, iteration: 67745
loss: 0.9826920628547668,grad_norm: 0.8956741973092462, iteration: 67746
loss: 1.0258996486663818,grad_norm: 0.917566306130022, iteration: 67747
loss: 0.9634342789649963,grad_norm: 0.9999992040199666, iteration: 67748
loss: 0.970209538936615,grad_norm: 0.9999990913069697, iteration: 67749
loss: 1.0000971555709839,grad_norm: 0.935416270655058, iteration: 67750
loss: 1.0101025104522705,grad_norm: 0.9999989334477746, iteration: 67751
loss: 1.0236668586730957,grad_norm: 0.99999916921202, iteration: 67752
loss: 0.9590222239494324,grad_norm: 0.9610156612904878, iteration: 67753
loss: 0.99810791015625,grad_norm: 0.9999991277962101, iteration: 67754
loss: 1.0047812461853027,grad_norm: 0.9624620097947106, iteration: 67755
loss: 0.9647021889686584,grad_norm: 0.9999990220005849, iteration: 67756
loss: 1.001800537109375,grad_norm: 0.9119228329745107, iteration: 67757
loss: 0.9750518202781677,grad_norm: 0.9868184823519193, iteration: 67758
loss: 0.9887382388114929,grad_norm: 0.9737600978619344, iteration: 67759
loss: 1.017532467842102,grad_norm: 0.9100442379477918, iteration: 67760
loss: 1.0367844104766846,grad_norm: 0.9999992249667213, iteration: 67761
loss: 1.0189683437347412,grad_norm: 0.9999990658838964, iteration: 67762
loss: 1.0248149633407593,grad_norm: 0.9999990635925444, iteration: 67763
loss: 0.965740442276001,grad_norm: 0.9364169759632417, iteration: 67764
loss: 0.9900509715080261,grad_norm: 0.8955953562274861, iteration: 67765
loss: 0.9977948665618896,grad_norm: 0.8924970465556071, iteration: 67766
loss: 0.9936341643333435,grad_norm: 0.9999990025397661, iteration: 67767
loss: 1.0461143255233765,grad_norm: 0.9999998747375983, iteration: 67768
loss: 0.9955697655677795,grad_norm: 0.9595385629542144, iteration: 67769
loss: 1.0109951496124268,grad_norm: 0.99262411326744, iteration: 67770
loss: 0.9926117062568665,grad_norm: 0.9615620465603791, iteration: 67771
loss: 1.0148043632507324,grad_norm: 0.9999991153844676, iteration: 67772
loss: 0.9676923155784607,grad_norm: 0.9993738565107373, iteration: 67773
loss: 0.9627925157546997,grad_norm: 0.9530258872175069, iteration: 67774
loss: 0.9882690906524658,grad_norm: 0.9557057176868726, iteration: 67775
loss: 0.9424377083778381,grad_norm: 0.9999990678834432, iteration: 67776
loss: 1.004245400428772,grad_norm: 0.9999992196382869, iteration: 67777
loss: 1.0291162729263306,grad_norm: 0.956548157387866, iteration: 67778
loss: 1.0237456560134888,grad_norm: 0.9999990331711399, iteration: 67779
loss: 1.0146393775939941,grad_norm: 0.973596005542729, iteration: 67780
loss: 0.9521874189376831,grad_norm: 0.9999991070327796, iteration: 67781
loss: 0.9902822375297546,grad_norm: 0.9657769331393683, iteration: 67782
loss: 1.0072441101074219,grad_norm: 0.9623087177779325, iteration: 67783
loss: 0.9505484104156494,grad_norm: 0.9999990622985543, iteration: 67784
loss: 0.9974509477615356,grad_norm: 0.8340076544932549, iteration: 67785
loss: 0.9846713542938232,grad_norm: 0.9807729453308509, iteration: 67786
loss: 1.0496032238006592,grad_norm: 0.9999991805793497, iteration: 67787
loss: 0.999218225479126,grad_norm: 0.9999991790824895, iteration: 67788
loss: 1.008037805557251,grad_norm: 0.8553492168190215, iteration: 67789
loss: 0.9453680515289307,grad_norm: 0.9278768600394119, iteration: 67790
loss: 0.9711264371871948,grad_norm: 0.970884841265135, iteration: 67791
loss: 1.023596167564392,grad_norm: 0.9422539330566593, iteration: 67792
loss: 1.0141665935516357,grad_norm: 0.9999992148061106, iteration: 67793
loss: 0.9987220168113708,grad_norm: 0.999999040625757, iteration: 67794
loss: 1.0113662481307983,grad_norm: 0.9851729081041568, iteration: 67795
loss: 0.9818164110183716,grad_norm: 0.9999991977543305, iteration: 67796
loss: 0.9971123337745667,grad_norm: 0.9999990808539998, iteration: 67797
loss: 1.0003392696380615,grad_norm: 0.9555982129900383, iteration: 67798
loss: 1.0171841382980347,grad_norm: 0.9999992243135414, iteration: 67799
loss: 1.0127168893814087,grad_norm: 0.999999285400854, iteration: 67800
loss: 1.0054866075515747,grad_norm: 0.9999991138964323, iteration: 67801
loss: 0.9935049414634705,grad_norm: 0.9999992372194932, iteration: 67802
loss: 1.0157707929611206,grad_norm: 0.999998996947969, iteration: 67803
loss: 0.9745023846626282,grad_norm: 0.8613225338958085, iteration: 67804
loss: 0.9899629354476929,grad_norm: 0.9999991061826158, iteration: 67805
loss: 0.9589629769325256,grad_norm: 0.9999990663286528, iteration: 67806
loss: 1.032447338104248,grad_norm: 0.9999989694161842, iteration: 67807
loss: 1.0071938037872314,grad_norm: 0.9999991281162675, iteration: 67808
loss: 1.0095082521438599,grad_norm: 0.9746166658571015, iteration: 67809
loss: 1.0164821147918701,grad_norm: 0.9387286567178003, iteration: 67810
loss: 1.026604413986206,grad_norm: 0.9999991809586644, iteration: 67811
loss: 1.0175410509109497,grad_norm: 0.9342170785577997, iteration: 67812
loss: 1.0284136533737183,grad_norm: 0.999999217080172, iteration: 67813
loss: 0.9802342653274536,grad_norm: 0.9999992628669104, iteration: 67814
loss: 0.9922264218330383,grad_norm: 0.9594482038430883, iteration: 67815
loss: 0.9933925271034241,grad_norm: 0.9999991116249983, iteration: 67816
loss: 0.9686010479927063,grad_norm: 0.9494567404821552, iteration: 67817
loss: 1.0117226839065552,grad_norm: 0.9682478519150535, iteration: 67818
loss: 1.0166362524032593,grad_norm: 0.9999991726358345, iteration: 67819
loss: 0.9989399313926697,grad_norm: 0.9999991049681709, iteration: 67820
loss: 1.014164924621582,grad_norm: 0.9999992062043841, iteration: 67821
loss: 0.9791823029518127,grad_norm: 0.9999991129401, iteration: 67822
loss: 0.9909500479698181,grad_norm: 0.9999990434378233, iteration: 67823
loss: 1.0179786682128906,grad_norm: 0.999999566208633, iteration: 67824
loss: 0.98590087890625,grad_norm: 0.9999991855819208, iteration: 67825
loss: 0.9619455933570862,grad_norm: 0.9999991166644282, iteration: 67826
loss: 1.0062748193740845,grad_norm: 0.9371826691122126, iteration: 67827
loss: 0.9941513538360596,grad_norm: 0.999999159685647, iteration: 67828
loss: 0.9884040355682373,grad_norm: 0.9999991065279091, iteration: 67829
loss: 1.0391258001327515,grad_norm: 0.9999992095491663, iteration: 67830
loss: 0.961890459060669,grad_norm: 0.9999990304925911, iteration: 67831
loss: 0.99881911277771,grad_norm: 0.9999991120688403, iteration: 67832
loss: 0.9964199066162109,grad_norm: 0.9999990425882098, iteration: 67833
loss: 1.0128618478775024,grad_norm: 0.9999990283241349, iteration: 67834
loss: 1.0181993246078491,grad_norm: 0.9999991744980596, iteration: 67835
loss: 0.9498697519302368,grad_norm: 0.9999993382028711, iteration: 67836
loss: 1.0241881608963013,grad_norm: 0.9999992878513914, iteration: 67837
loss: 0.9894590973854065,grad_norm: 0.8698573063370753, iteration: 67838
loss: 0.9874511957168579,grad_norm: 0.9509878714781305, iteration: 67839
loss: 1.0146992206573486,grad_norm: 0.9999991887890721, iteration: 67840
loss: 1.0097179412841797,grad_norm: 0.9007421861159927, iteration: 67841
loss: 1.0274834632873535,grad_norm: 0.9797339424498456, iteration: 67842
loss: 1.0229101181030273,grad_norm: 0.900787511966122, iteration: 67843
loss: 0.9685454368591309,grad_norm: 0.9218902821029523, iteration: 67844
loss: 0.9934773445129395,grad_norm: 0.9999992857726884, iteration: 67845
loss: 1.0085318088531494,grad_norm: 0.9999990263875449, iteration: 67846
loss: 1.0204238891601562,grad_norm: 0.9838322588473821, iteration: 67847
loss: 0.9918726086616516,grad_norm: 0.9999992917061749, iteration: 67848
loss: 0.997060239315033,grad_norm: 0.9999990001557036, iteration: 67849
loss: 0.987445056438446,grad_norm: 0.9999991506661067, iteration: 67850
loss: 0.9642730951309204,grad_norm: 0.9333308479969764, iteration: 67851
loss: 1.0137691497802734,grad_norm: 0.9312373588195737, iteration: 67852
loss: 0.9799652695655823,grad_norm: 0.9999991224966165, iteration: 67853
loss: 0.9898763298988342,grad_norm: 0.9999992078778773, iteration: 67854
loss: 1.033349871635437,grad_norm: 0.9999992184014282, iteration: 67855
loss: 0.9790006875991821,grad_norm: 0.9999990866273281, iteration: 67856
loss: 0.9892844557762146,grad_norm: 0.999999155359801, iteration: 67857
loss: 0.9709233045578003,grad_norm: 0.9999992945314187, iteration: 67858
loss: 0.9634180068969727,grad_norm: 0.8422869533688184, iteration: 67859
loss: 1.0112899541854858,grad_norm: 0.9999991829663905, iteration: 67860
loss: 1.0189616680145264,grad_norm: 0.891855374925544, iteration: 67861
loss: 1.020085334777832,grad_norm: 0.9999991948498808, iteration: 67862
loss: 1.0145093202590942,grad_norm: 0.9999991312697072, iteration: 67863
loss: 1.048672080039978,grad_norm: 0.9658845193990592, iteration: 67864
loss: 0.9962366819381714,grad_norm: 0.9999990198912095, iteration: 67865
loss: 1.0089399814605713,grad_norm: 0.8913668586746801, iteration: 67866
loss: 0.9980205297470093,grad_norm: 0.999999140799561, iteration: 67867
loss: 1.0187488794326782,grad_norm: 0.9600783257366119, iteration: 67868
loss: 1.0062600374221802,grad_norm: 0.9999992076249175, iteration: 67869
loss: 1.0369677543640137,grad_norm: 0.9999991699422615, iteration: 67870
loss: 1.000287652015686,grad_norm: 0.9999993168623565, iteration: 67871
loss: 1.0081764459609985,grad_norm: 0.9917965580763539, iteration: 67872
loss: 0.9820858836174011,grad_norm: 0.9640005694021, iteration: 67873
loss: 0.9524059295654297,grad_norm: 0.9999990864864955, iteration: 67874
loss: 0.9938812851905823,grad_norm: 0.9999990882756147, iteration: 67875
loss: 0.9639604091644287,grad_norm: 0.9999991956476658, iteration: 67876
loss: 0.9968042373657227,grad_norm: 0.9352529085176731, iteration: 67877
loss: 1.0457940101623535,grad_norm: 0.9899518999869783, iteration: 67878
loss: 0.9632556438446045,grad_norm: 0.9836437543743288, iteration: 67879
loss: 1.0026357173919678,grad_norm: 0.9999992062353417, iteration: 67880
loss: 0.9888041615486145,grad_norm: 0.9999991504024437, iteration: 67881
loss: 0.9583364725112915,grad_norm: 0.8918836808459724, iteration: 67882
loss: 0.964384913444519,grad_norm: 0.9999989023902339, iteration: 67883
loss: 0.984434962272644,grad_norm: 0.9772730017122201, iteration: 67884
loss: 1.0220911502838135,grad_norm: 0.9999995530631722, iteration: 67885
loss: 1.026647686958313,grad_norm: 0.9884961818423389, iteration: 67886
loss: 0.9764444231987,grad_norm: 0.9999992424934787, iteration: 67887
loss: 0.9687709212303162,grad_norm: 0.9999991114528898, iteration: 67888
loss: 0.9883714914321899,grad_norm: 0.8735226473354367, iteration: 67889
loss: 1.0519356727600098,grad_norm: 0.9999991592672203, iteration: 67890
loss: 0.9829942584037781,grad_norm: 0.9999992067584056, iteration: 67891
loss: 0.9312860369682312,grad_norm: 0.9999990532154104, iteration: 67892
loss: 0.9961487054824829,grad_norm: 0.9999990097132441, iteration: 67893
loss: 1.0110398530960083,grad_norm: 0.9882036876760344, iteration: 67894
loss: 0.9699932336807251,grad_norm: 0.9456358333992126, iteration: 67895
loss: 0.9973562955856323,grad_norm: 0.9999991639023804, iteration: 67896
loss: 0.9762584567070007,grad_norm: 0.9844644386958503, iteration: 67897
loss: 1.0136806964874268,grad_norm: 0.9371133452691356, iteration: 67898
loss: 0.9854903817176819,grad_norm: 0.9999991123535047, iteration: 67899
loss: 1.0173522233963013,grad_norm: 0.9999993051407722, iteration: 67900
loss: 0.9761479496955872,grad_norm: 0.9588302870856484, iteration: 67901
loss: 1.024497389793396,grad_norm: 0.9999992741671924, iteration: 67902
loss: 0.975508451461792,grad_norm: 0.9999991411540337, iteration: 67903
loss: 1.0000253915786743,grad_norm: 0.9915747997056509, iteration: 67904
loss: 0.9973404407501221,grad_norm: 0.9999992211993819, iteration: 67905
loss: 1.0301915407180786,grad_norm: 0.9999992043076067, iteration: 67906
loss: 1.0361733436584473,grad_norm: 0.9581295207557903, iteration: 67907
loss: 0.9875912070274353,grad_norm: 0.9999993328077817, iteration: 67908
loss: 0.9715573191642761,grad_norm: 0.9497728420526856, iteration: 67909
loss: 0.9582842588424683,grad_norm: 0.9999991825374286, iteration: 67910
loss: 1.0428009033203125,grad_norm: 0.9999990273012563, iteration: 67911
loss: 0.9888560771942139,grad_norm: 0.9839180483680308, iteration: 67912
loss: 1.0076502561569214,grad_norm: 0.999999148401781, iteration: 67913
loss: 1.0289908647537231,grad_norm: 0.9660856558687781, iteration: 67914
loss: 0.9883726835250854,grad_norm: 0.9999992582508672, iteration: 67915
loss: 0.993432879447937,grad_norm: 0.9999989861221339, iteration: 67916
loss: 1.00624680519104,grad_norm: 0.9805379546827327, iteration: 67917
loss: 1.0513386726379395,grad_norm: 0.9999998443104803, iteration: 67918
loss: 0.9984904527664185,grad_norm: 0.9999992347005527, iteration: 67919
loss: 1.0210697650909424,grad_norm: 0.9999991414172236, iteration: 67920
loss: 0.9972048997879028,grad_norm: 0.9999990399420698, iteration: 67921
loss: 0.986222505569458,grad_norm: 0.9999990603391778, iteration: 67922
loss: 0.9594100713729858,grad_norm: 0.9662532727299683, iteration: 67923
loss: 1.015479564666748,grad_norm: 0.999999175424707, iteration: 67924
loss: 0.9978951811790466,grad_norm: 0.9999990811777603, iteration: 67925
loss: 1.0134079456329346,grad_norm: 0.9999997472491448, iteration: 67926
loss: 0.9999162554740906,grad_norm: 0.9999991078659416, iteration: 67927
loss: 1.002139687538147,grad_norm: 0.9569398579076344, iteration: 67928
loss: 1.0128895044326782,grad_norm: 0.9999992798998727, iteration: 67929
loss: 0.9821161031723022,grad_norm: 0.999999273528535, iteration: 67930
loss: 1.024705410003662,grad_norm: 0.9225019378092117, iteration: 67931
loss: 1.0099101066589355,grad_norm: 0.9597679384970784, iteration: 67932
loss: 1.0271645784378052,grad_norm: 0.9999990790325358, iteration: 67933
loss: 1.0006284713745117,grad_norm: 0.9587611598390352, iteration: 67934
loss: 1.0361433029174805,grad_norm: 0.9986240443730017, iteration: 67935
loss: 1.0235942602157593,grad_norm: 0.9999995098065609, iteration: 67936
loss: 0.987743079662323,grad_norm: 0.9999991528003197, iteration: 67937
loss: 1.0146559476852417,grad_norm: 0.999999333726755, iteration: 67938
loss: 1.036855936050415,grad_norm: 0.9999991625787534, iteration: 67939
loss: 0.9979777932167053,grad_norm: 0.9999992170651563, iteration: 67940
loss: 1.0157755613327026,grad_norm: 0.9999990276160835, iteration: 67941
loss: 1.0187430381774902,grad_norm: 0.9999990896257067, iteration: 67942
loss: 1.0130646228790283,grad_norm: 0.9613763862821099, iteration: 67943
loss: 1.0191073417663574,grad_norm: 0.9300257101210241, iteration: 67944
loss: 1.0020699501037598,grad_norm: 0.9949249669422776, iteration: 67945
loss: 0.9977086782455444,grad_norm: 0.9783728308111448, iteration: 67946
loss: 1.0114392042160034,grad_norm: 0.999999100195283, iteration: 67947
loss: 1.0118887424468994,grad_norm: 0.9662687438302714, iteration: 67948
loss: 0.9970058798789978,grad_norm: 0.9999989503983691, iteration: 67949
loss: 0.9860631823539734,grad_norm: 0.9999997492860601, iteration: 67950
loss: 1.0679835081100464,grad_norm: 0.999999356559329, iteration: 67951
loss: 1.0085618495941162,grad_norm: 0.9999991200650918, iteration: 67952
loss: 1.0227487087249756,grad_norm: 0.9999991465640519, iteration: 67953
loss: 0.989385724067688,grad_norm: 0.8971979098736119, iteration: 67954
loss: 1.0173567533493042,grad_norm: 0.9999992839295115, iteration: 67955
loss: 1.0641697645187378,grad_norm: 0.9999994667044636, iteration: 67956
loss: 0.9663246870040894,grad_norm: 0.9798463581639275, iteration: 67957
loss: 0.9872267842292786,grad_norm: 0.9902632957090494, iteration: 67958
loss: 0.9710068702697754,grad_norm: 0.9999990184668095, iteration: 67959
loss: 0.9836625456809998,grad_norm: 0.9999991403802213, iteration: 67960
loss: 1.009856939315796,grad_norm: 0.9999997127529786, iteration: 67961
loss: 0.9909759163856506,grad_norm: 0.845476072778722, iteration: 67962
loss: 1.0160425901412964,grad_norm: 0.999999017860996, iteration: 67963
loss: 0.9745885133743286,grad_norm: 0.8602703622771043, iteration: 67964
loss: 1.0056426525115967,grad_norm: 0.9999991195639297, iteration: 67965
loss: 1.0098804235458374,grad_norm: 0.8778625199338901, iteration: 67966
loss: 1.020496129989624,grad_norm: 0.864945787166558, iteration: 67967
loss: 0.9931455850601196,grad_norm: 0.9999990918762729, iteration: 67968
loss: 1.0014368295669556,grad_norm: 0.9412554310804008, iteration: 67969
loss: 0.9823652505874634,grad_norm: 0.9999990914653945, iteration: 67970
loss: 0.9945696592330933,grad_norm: 0.9999991669499693, iteration: 67971
loss: 1.0118138790130615,grad_norm: 0.9491700147401905, iteration: 67972
loss: 0.9843193292617798,grad_norm: 0.8412277250653112, iteration: 67973
loss: 1.0051902532577515,grad_norm: 0.9955066078120627, iteration: 67974
loss: 0.9828366041183472,grad_norm: 0.9430816303562852, iteration: 67975
loss: 1.010988473892212,grad_norm: 0.8799243540379127, iteration: 67976
loss: 1.0200700759887695,grad_norm: 0.9999991314108101, iteration: 67977
loss: 1.0015859603881836,grad_norm: 0.9999992937708603, iteration: 67978
loss: 1.0198290348052979,grad_norm: 0.9863131092012996, iteration: 67979
loss: 0.9726843237876892,grad_norm: 0.9999990344859621, iteration: 67980
loss: 0.9974708557128906,grad_norm: 0.9999991322698852, iteration: 67981
loss: 0.9634011387825012,grad_norm: 0.9999993316805168, iteration: 67982
loss: 1.0058668851852417,grad_norm: 0.9999992283776796, iteration: 67983
loss: 1.0198537111282349,grad_norm: 0.9999990296931138, iteration: 67984
loss: 1.001209020614624,grad_norm: 0.8873178535326626, iteration: 67985
loss: 1.0530123710632324,grad_norm: 0.9999995127901325, iteration: 67986
loss: 1.0601590871810913,grad_norm: 0.9999991512439124, iteration: 67987
loss: 0.9773154854774475,grad_norm: 0.9728741093455738, iteration: 67988
loss: 1.0474843978881836,grad_norm: 0.9999991107748731, iteration: 67989
loss: 1.022792100906372,grad_norm: 0.9999990927454959, iteration: 67990
loss: 0.998796820640564,grad_norm: 0.9999991311385993, iteration: 67991
loss: 1.0011706352233887,grad_norm: 0.9999990650332851, iteration: 67992
loss: 0.9978645443916321,grad_norm: 0.9168560882235027, iteration: 67993
loss: 1.0352545976638794,grad_norm: 0.9999990154408261, iteration: 67994
loss: 1.0042738914489746,grad_norm: 0.9999992108699416, iteration: 67995
loss: 1.0513776540756226,grad_norm: 1.000000026692856, iteration: 67996
loss: 1.000301480293274,grad_norm: 0.9690320795089096, iteration: 67997
loss: 1.0045208930969238,grad_norm: 0.9999992842257376, iteration: 67998
loss: 0.97585529088974,grad_norm: 0.9421111921848372, iteration: 67999
loss: 1.045592188835144,grad_norm: 0.9111963850732735, iteration: 68000
loss: 1.0153175592422485,grad_norm: 0.9999991442059945, iteration: 68001
loss: 1.11464262008667,grad_norm: 0.9999995045575661, iteration: 68002
loss: 0.9656168222427368,grad_norm: 0.9999992781178779, iteration: 68003
loss: 1.0419578552246094,grad_norm: 0.9550896510811655, iteration: 68004
loss: 1.0171406269073486,grad_norm: 0.9999991295409895, iteration: 68005
loss: 1.0339893102645874,grad_norm: 0.9999990078648768, iteration: 68006
loss: 0.9812705516815186,grad_norm: 0.9999992835642916, iteration: 68007
loss: 1.0026227235794067,grad_norm: 0.9999990824937685, iteration: 68008
loss: 0.9736570715904236,grad_norm: 0.8663824919453537, iteration: 68009
loss: 0.9957190752029419,grad_norm: 0.9777896645126066, iteration: 68010
loss: 1.029770016670227,grad_norm: 0.9999991853156738, iteration: 68011
loss: 1.000231146812439,grad_norm: 0.9999990308387746, iteration: 68012
loss: 1.0202908515930176,grad_norm: 0.9999990851490654, iteration: 68013
loss: 0.9890232086181641,grad_norm: 0.8166453549393222, iteration: 68014
loss: 1.0003225803375244,grad_norm: 0.999999119746196, iteration: 68015
loss: 1.0061029195785522,grad_norm: 0.9999991812462394, iteration: 68016
loss: 1.0214797258377075,grad_norm: 0.9740750819422498, iteration: 68017
loss: 0.9940261244773865,grad_norm: 0.9634460877155934, iteration: 68018
loss: 0.9846097826957703,grad_norm: 0.9999992926961452, iteration: 68019
loss: 0.9572780728340149,grad_norm: 0.9999991984875823, iteration: 68020
loss: 1.035286784172058,grad_norm: 0.9999993749686351, iteration: 68021
loss: 0.9999246597290039,grad_norm: 0.9999988745029185, iteration: 68022
loss: 1.0347667932510376,grad_norm: 0.8974053178763775, iteration: 68023
loss: 1.0028538703918457,grad_norm: 0.9999991911095765, iteration: 68024
loss: 1.0020201206207275,grad_norm: 0.9228128743641605, iteration: 68025
loss: 1.0741702318191528,grad_norm: 0.999999077170094, iteration: 68026
loss: 0.9821734428405762,grad_norm: 0.999999307313657, iteration: 68027
loss: 1.0116889476776123,grad_norm: 0.9999991387511724, iteration: 68028
loss: 0.9617798924446106,grad_norm: 0.9999990163149263, iteration: 68029
loss: 1.0040463209152222,grad_norm: 0.9999991929096848, iteration: 68030
loss: 1.0230218172073364,grad_norm: 0.9999989292208962, iteration: 68031
loss: 1.0119222402572632,grad_norm: 0.999999127530122, iteration: 68032
loss: 1.002113699913025,grad_norm: 0.9999991567157479, iteration: 68033
loss: 1.0224874019622803,grad_norm: 0.9525578919162425, iteration: 68034
loss: 1.0045214891433716,grad_norm: 0.9999990185840858, iteration: 68035
loss: 0.967872142791748,grad_norm: 0.9999990943081151, iteration: 68036
loss: 0.9908449649810791,grad_norm: 0.9999990350854954, iteration: 68037
loss: 0.9823489189147949,grad_norm: 0.9999992803627824, iteration: 68038
loss: 1.0314806699752808,grad_norm: 0.9999995331048758, iteration: 68039
loss: 1.0091607570648193,grad_norm: 0.9999991390641505, iteration: 68040
loss: 0.9757638573646545,grad_norm: 0.9999989833344604, iteration: 68041
loss: 1.0227158069610596,grad_norm: 0.999999227499456, iteration: 68042
loss: 1.001786708831787,grad_norm: 0.9907719338010803, iteration: 68043
loss: 1.0061615705490112,grad_norm: 0.8881188446633894, iteration: 68044
loss: 0.9756391644477844,grad_norm: 0.9490937746218284, iteration: 68045
loss: 1.0072308778762817,grad_norm: 0.9999990534897244, iteration: 68046
loss: 1.030013918876648,grad_norm: 0.9999994772885312, iteration: 68047
loss: 1.0094760656356812,grad_norm: 0.9999991627328173, iteration: 68048
loss: 0.9943692088127136,grad_norm: 0.9872387376108638, iteration: 68049
loss: 1.0354431867599487,grad_norm: 0.9556070422204701, iteration: 68050
loss: 1.0032905340194702,grad_norm: 0.9999990862026867, iteration: 68051
loss: 1.015162706375122,grad_norm: 0.9999990940629099, iteration: 68052
loss: 1.046987771987915,grad_norm: 0.9121318415818206, iteration: 68053
loss: 1.0421384572982788,grad_norm: 0.9999994740736564, iteration: 68054
loss: 1.0050967931747437,grad_norm: 0.9999992267611233, iteration: 68055
loss: 0.9746086001396179,grad_norm: 0.999999098385022, iteration: 68056
loss: 0.9847573041915894,grad_norm: 0.9333939376261235, iteration: 68057
loss: 0.9961113929748535,grad_norm: 0.9999991987405334, iteration: 68058
loss: 0.9863041043281555,grad_norm: 0.9766581457866809, iteration: 68059
loss: 0.9739277362823486,grad_norm: 0.9999991096634361, iteration: 68060
loss: 0.9739534258842468,grad_norm: 0.8778302961576576, iteration: 68061
loss: 0.9664126038551331,grad_norm: 0.999999053736433, iteration: 68062
loss: 0.9874061346054077,grad_norm: 0.929544264409442, iteration: 68063
loss: 0.9862454533576965,grad_norm: 0.932685643923835, iteration: 68064
loss: 1.0409955978393555,grad_norm: 0.9432338192093346, iteration: 68065
loss: 0.9777826070785522,grad_norm: 0.8844250392249617, iteration: 68066
loss: 1.0140382051467896,grad_norm: 0.999999173851463, iteration: 68067
loss: 0.971329391002655,grad_norm: 0.9999990371861244, iteration: 68068
loss: 0.9988160133361816,grad_norm: 0.9999990944173385, iteration: 68069
loss: 1.0168532133102417,grad_norm: 0.9237489801707858, iteration: 68070
loss: 1.0214002132415771,grad_norm: 0.999999980138963, iteration: 68071
loss: 0.9945071935653687,grad_norm: 0.9999992058484455, iteration: 68072
loss: 0.9989511966705322,grad_norm: 0.8535774047107377, iteration: 68073
loss: 0.990476667881012,grad_norm: 0.9999991724594334, iteration: 68074
loss: 1.0300049781799316,grad_norm: 0.9999990771073568, iteration: 68075
loss: 1.001725673675537,grad_norm: 0.9999991890744457, iteration: 68076
loss: 1.0298992395401,grad_norm: 0.9999990602512988, iteration: 68077
loss: 1.0328762531280518,grad_norm: 0.9367684489210363, iteration: 68078
loss: 1.0222938060760498,grad_norm: 0.9999991548415721, iteration: 68079
loss: 0.9892208576202393,grad_norm: 0.8765275443475457, iteration: 68080
loss: 0.9429457783699036,grad_norm: 0.9999993130162561, iteration: 68081
loss: 1.008960485458374,grad_norm: 0.8444240995547412, iteration: 68082
loss: 0.9915058612823486,grad_norm: 0.9999992089361542, iteration: 68083
loss: 1.0312411785125732,grad_norm: 0.958633589451639, iteration: 68084
loss: 0.9749917984008789,grad_norm: 0.9999992404126257, iteration: 68085
loss: 0.9583348035812378,grad_norm: 0.9711572808287413, iteration: 68086
loss: 1.0143729448318481,grad_norm: 0.9999989998350819, iteration: 68087
loss: 1.0003937482833862,grad_norm: 0.9999992106113876, iteration: 68088
loss: 0.9808509945869446,grad_norm: 0.9999990378933993, iteration: 68089
loss: 0.9957370758056641,grad_norm: 0.9387557950018676, iteration: 68090
loss: 1.0243977308273315,grad_norm: 0.9820476404580712, iteration: 68091
loss: 0.9491710066795349,grad_norm: 0.9999990152757959, iteration: 68092
loss: 1.0262742042541504,grad_norm: 0.92063006700098, iteration: 68093
loss: 0.9970096349716187,grad_norm: 0.9999990790924976, iteration: 68094
loss: 0.998787522315979,grad_norm: 0.9591651709446296, iteration: 68095
loss: 1.013750433921814,grad_norm: 0.9999991349167634, iteration: 68096
loss: 0.9924827218055725,grad_norm: 0.9999991309144565, iteration: 68097
loss: 1.0092486143112183,grad_norm: 0.9999992265933385, iteration: 68098
loss: 0.9565063714981079,grad_norm: 0.9999991777167899, iteration: 68099
loss: 1.0060898065567017,grad_norm: 0.9867021462614437, iteration: 68100
loss: 1.0173107385635376,grad_norm: 0.9999991477865273, iteration: 68101
loss: 0.9900075793266296,grad_norm: 0.9999990799818701, iteration: 68102
loss: 1.0691460371017456,grad_norm: 0.9999991626899912, iteration: 68103
loss: 0.9902937412261963,grad_norm: 0.9999994483987139, iteration: 68104
loss: 1.0180145502090454,grad_norm: 0.9999990821505318, iteration: 68105
loss: 1.028235912322998,grad_norm: 0.9999992398094548, iteration: 68106
loss: 0.9895853400230408,grad_norm: 0.9999991646345388, iteration: 68107
loss: 1.018566370010376,grad_norm: 0.9999988388003016, iteration: 68108
loss: 1.0224792957305908,grad_norm: 0.999999218388623, iteration: 68109
loss: 1.0240700244903564,grad_norm: 0.8372230642915934, iteration: 68110
loss: 1.0239663124084473,grad_norm: 0.9999990117625046, iteration: 68111
loss: 0.9947622418403625,grad_norm: 0.9742132529959182, iteration: 68112
loss: 0.9980614185333252,grad_norm: 0.9999995050340377, iteration: 68113
loss: 0.9628782868385315,grad_norm: 0.9928880157467797, iteration: 68114
loss: 1.0037322044372559,grad_norm: 0.9655697047392904, iteration: 68115
loss: 1.0041106939315796,grad_norm: 0.9827143030309963, iteration: 68116
loss: 1.036312222480774,grad_norm: 0.9999991690573921, iteration: 68117
loss: 1.0007836818695068,grad_norm: 0.9999990872452271, iteration: 68118
loss: 0.9812421798706055,grad_norm: 0.9999991962628607, iteration: 68119
loss: 0.9997749924659729,grad_norm: 0.9999992353528038, iteration: 68120
loss: 0.9861604571342468,grad_norm: 0.9411511727726216, iteration: 68121
loss: 0.9893136024475098,grad_norm: 0.9999990561367545, iteration: 68122
loss: 1.021164894104004,grad_norm: 0.9999990491699133, iteration: 68123
loss: 0.9961277842521667,grad_norm: 0.9919385380477928, iteration: 68124
loss: 1.0042821168899536,grad_norm: 0.9799286614879973, iteration: 68125
loss: 1.0437631607055664,grad_norm: 0.9999992262256103, iteration: 68126
loss: 0.9906823039054871,grad_norm: 0.9999991207694462, iteration: 68127
loss: 0.9769558310508728,grad_norm: 0.9999990175495913, iteration: 68128
loss: 0.9851939082145691,grad_norm: 0.9999991997785677, iteration: 68129
loss: 0.9951099157333374,grad_norm: 0.9999990151600705, iteration: 68130
loss: 0.9808381795883179,grad_norm: 0.9999990666217939, iteration: 68131
loss: 0.9887872338294983,grad_norm: 0.8770780827377791, iteration: 68132
loss: 1.0188863277435303,grad_norm: 0.993996978629222, iteration: 68133
loss: 1.0232489109039307,grad_norm: 0.9999997361085995, iteration: 68134
loss: 0.959441065788269,grad_norm: 0.9759783444857482, iteration: 68135
loss: 1.0514686107635498,grad_norm: 0.8629406260864042, iteration: 68136
loss: 1.0064387321472168,grad_norm: 0.8582989573292544, iteration: 68137
loss: 0.972039520740509,grad_norm: 0.9999990472689263, iteration: 68138
loss: 1.010879397392273,grad_norm: 0.8366622929988385, iteration: 68139
loss: 0.9865785241127014,grad_norm: 0.9999990790469874, iteration: 68140
loss: 0.9737526774406433,grad_norm: 0.9999991443092917, iteration: 68141
loss: 0.9991407990455627,grad_norm: 0.9775612976740992, iteration: 68142
loss: 1.0054726600646973,grad_norm: 0.9673614708143994, iteration: 68143
loss: 0.9945500493049622,grad_norm: 0.8235163275619795, iteration: 68144
loss: 1.024812936782837,grad_norm: 0.9999992252890141, iteration: 68145
loss: 1.0021032094955444,grad_norm: 0.99999925087247, iteration: 68146
loss: 1.013828992843628,grad_norm: 0.9999991267784223, iteration: 68147
loss: 1.0411049127578735,grad_norm: 0.9593962286309927, iteration: 68148
loss: 0.9799441695213318,grad_norm: 0.9490001989379196, iteration: 68149
loss: 1.0040550231933594,grad_norm: 0.9999992289034658, iteration: 68150
loss: 1.0138188600540161,grad_norm: 0.9999991624208818, iteration: 68151
loss: 1.0548954010009766,grad_norm: 0.9999993277483046, iteration: 68152
loss: 0.959567129611969,grad_norm: 0.9999993844933274, iteration: 68153
loss: 0.9847744703292847,grad_norm: 0.9999990413219771, iteration: 68154
loss: 1.0542491674423218,grad_norm: 0.9999992245169591, iteration: 68155
loss: 1.0321390628814697,grad_norm: 0.8436857418361629, iteration: 68156
loss: 0.9586833715438843,grad_norm: 0.9999990935763756, iteration: 68157
loss: 0.9953944087028503,grad_norm: 0.9999989777197267, iteration: 68158
loss: 1.0231295824050903,grad_norm: 0.9999990682462276, iteration: 68159
loss: 0.9889482855796814,grad_norm: 0.9999990343169218, iteration: 68160
loss: 0.9800499081611633,grad_norm: 0.9999990263438027, iteration: 68161
loss: 0.9851274490356445,grad_norm: 0.9999990188951657, iteration: 68162
loss: 1.017754316329956,grad_norm: 0.9974548337776418, iteration: 68163
loss: 1.0198651552200317,grad_norm: 0.9999993370503151, iteration: 68164
loss: 1.0349820852279663,grad_norm: 0.978905766873422, iteration: 68165
loss: 0.9912642240524292,grad_norm: 0.986530634700071, iteration: 68166
loss: 1.0071467161178589,grad_norm: 0.9999993167335659, iteration: 68167
loss: 0.9783873558044434,grad_norm: 0.8914528766554701, iteration: 68168
loss: 0.9687581658363342,grad_norm: 0.9999990508606873, iteration: 68169
loss: 1.024095058441162,grad_norm: 0.9589864006537837, iteration: 68170
loss: 0.9831283688545227,grad_norm: 0.9780561285416877, iteration: 68171
loss: 0.9714698791503906,grad_norm: 0.9999989852082882, iteration: 68172
loss: 0.9852415919303894,grad_norm: 0.9538230897426325, iteration: 68173
loss: 0.9954819679260254,grad_norm: 0.9999991214295704, iteration: 68174
loss: 1.0251071453094482,grad_norm: 0.9999991724131286, iteration: 68175
loss: 0.9891015291213989,grad_norm: 0.9636052100822843, iteration: 68176
loss: 0.9663392901420593,grad_norm: 0.9731749406980376, iteration: 68177
loss: 1.0295796394348145,grad_norm: 0.9371558290711283, iteration: 68178
loss: 1.0082831382751465,grad_norm: 0.9999991787183872, iteration: 68179
loss: 0.9872502684593201,grad_norm: 0.9262477356891797, iteration: 68180
loss: 1.000040054321289,grad_norm: 0.9999992680137915, iteration: 68181
loss: 0.985375702381134,grad_norm: 0.9999991791946579, iteration: 68182
loss: 1.0023339986801147,grad_norm: 0.9999989244483058, iteration: 68183
loss: 1.051601529121399,grad_norm: 0.9999993347491392, iteration: 68184
loss: 0.9974787831306458,grad_norm: 0.9234108042193384, iteration: 68185
loss: 0.9450766444206238,grad_norm: 0.9999990471335819, iteration: 68186
loss: 1.0029789209365845,grad_norm: 0.9999991104616032, iteration: 68187
loss: 1.0411503314971924,grad_norm: 0.9999992204885348, iteration: 68188
loss: 0.987771213054657,grad_norm: 0.9401674298960246, iteration: 68189
loss: 1.016467571258545,grad_norm: 0.9999992950210593, iteration: 68190
loss: 1.010541319847107,grad_norm: 0.9999989563840712, iteration: 68191
loss: 0.9716066718101501,grad_norm: 0.9999991430017303, iteration: 68192
loss: 1.0026766061782837,grad_norm: 0.999999054325045, iteration: 68193
loss: 1.036117672920227,grad_norm: 0.9702487083790734, iteration: 68194
loss: 1.014331579208374,grad_norm: 0.9107553523058718, iteration: 68195
loss: 0.991538405418396,grad_norm: 0.9059704180749213, iteration: 68196
loss: 0.9929350018501282,grad_norm: 0.9999991268280869, iteration: 68197
loss: 1.0608724355697632,grad_norm: 0.9819086029117462, iteration: 68198
loss: 1.0069613456726074,grad_norm: 0.9999990375212943, iteration: 68199
loss: 0.9769006967544556,grad_norm: 0.9195255283785113, iteration: 68200
loss: 1.0117846727371216,grad_norm: 0.9999991021970874, iteration: 68201
loss: 0.9869930744171143,grad_norm: 0.9920591863954362, iteration: 68202
loss: 1.0004127025604248,grad_norm: 0.9999990857222312, iteration: 68203
loss: 1.0174416303634644,grad_norm: 0.9999991734351678, iteration: 68204
loss: 1.0170965194702148,grad_norm: 0.8279269471208892, iteration: 68205
loss: 1.028449535369873,grad_norm: 0.9999994410987391, iteration: 68206
loss: 1.0190905332565308,grad_norm: 0.8632610831705333, iteration: 68207
loss: 1.0101910829544067,grad_norm: 0.9628099529734853, iteration: 68208
loss: 1.0048984289169312,grad_norm: 0.9692780372709477, iteration: 68209
loss: 0.9905538558959961,grad_norm: 0.9999989761024406, iteration: 68210
loss: 0.9729993939399719,grad_norm: 0.9999990374091112, iteration: 68211
loss: 1.013777256011963,grad_norm: 0.9999991670090128, iteration: 68212
loss: 0.9993181824684143,grad_norm: 0.9999992627203618, iteration: 68213
loss: 0.9817191362380981,grad_norm: 0.9672826897686617, iteration: 68214
loss: 1.0151000022888184,grad_norm: 0.9999989893243577, iteration: 68215
loss: 0.9896750450134277,grad_norm: 0.9999992160809092, iteration: 68216
loss: 0.9714779257774353,grad_norm: 0.9999991885236343, iteration: 68217
loss: 0.9573875069618225,grad_norm: 0.9999990673303254, iteration: 68218
loss: 0.9879785776138306,grad_norm: 0.8716551536149868, iteration: 68219
loss: 0.9747343063354492,grad_norm: 0.8690495954332792, iteration: 68220
loss: 0.9931167364120483,grad_norm: 0.9999991900560243, iteration: 68221
loss: 1.012026309967041,grad_norm: 0.9999990735566955, iteration: 68222
loss: 0.9970912933349609,grad_norm: 0.8207673828776442, iteration: 68223
loss: 1.0508811473846436,grad_norm: 0.9999993219608028, iteration: 68224
loss: 1.0147042274475098,grad_norm: 0.9999991448709495, iteration: 68225
loss: 0.9952133893966675,grad_norm: 0.999999183415575, iteration: 68226
loss: 0.9964103102684021,grad_norm: 0.9082643231494087, iteration: 68227
loss: 1.015741229057312,grad_norm: 0.9999990343717137, iteration: 68228
loss: 0.9677537679672241,grad_norm: 0.9999991199189039, iteration: 68229
loss: 1.0069372653961182,grad_norm: 0.8850400012094409, iteration: 68230
loss: 1.00770902633667,grad_norm: 0.9999992208085182, iteration: 68231
loss: 1.0046892166137695,grad_norm: 0.9999990245310388, iteration: 68232
loss: 1.0133086442947388,grad_norm: 0.9078825305350127, iteration: 68233
loss: 1.038198471069336,grad_norm: 0.9999990378353676, iteration: 68234
loss: 0.989589512348175,grad_norm: 0.9999990919387286, iteration: 68235
loss: 1.0122995376586914,grad_norm: 0.9999990861161067, iteration: 68236
loss: 0.9740642309188843,grad_norm: 0.9999993189284644, iteration: 68237
loss: 1.0321699380874634,grad_norm: 0.9999993320826435, iteration: 68238
loss: 0.9970901608467102,grad_norm: 0.9999991112720701, iteration: 68239
loss: 1.009315848350525,grad_norm: 0.9999991261335615, iteration: 68240
loss: 1.013382911682129,grad_norm: 0.9999991764720827, iteration: 68241
loss: 1.03135085105896,grad_norm: 0.9999990973308975, iteration: 68242
loss: 1.0010619163513184,grad_norm: 0.8356984689377976, iteration: 68243
loss: 0.98948073387146,grad_norm: 0.9637374988158895, iteration: 68244
loss: 0.9812301993370056,grad_norm: 0.970097979086461, iteration: 68245
loss: 1.0141396522521973,grad_norm: 0.9137134913991533, iteration: 68246
loss: 0.9852767586708069,grad_norm: 0.999999167875998, iteration: 68247
loss: 0.975314199924469,grad_norm: 0.9999990228220129, iteration: 68248
loss: 0.9822365641593933,grad_norm: 0.9999992309030332, iteration: 68249
loss: 1.0357861518859863,grad_norm: 0.999999167739571, iteration: 68250
loss: 0.9876739382743835,grad_norm: 0.999999095729859, iteration: 68251
loss: 1.0380927324295044,grad_norm: 0.9949109709489726, iteration: 68252
loss: 0.9804216027259827,grad_norm: 0.9999991014328922, iteration: 68253
loss: 0.9902374148368835,grad_norm: 0.9999990819570744, iteration: 68254
loss: 1.002587080001831,grad_norm: 0.9999989372151867, iteration: 68255
loss: 1.0202481746673584,grad_norm: 0.9633046659025095, iteration: 68256
loss: 0.9904760718345642,grad_norm: 0.9501727104159516, iteration: 68257
loss: 1.0081859827041626,grad_norm: 0.8868563221799725, iteration: 68258
loss: 0.9973390102386475,grad_norm: 0.9999990202502632, iteration: 68259
loss: 1.0140571594238281,grad_norm: 0.9999991470605081, iteration: 68260
loss: 0.9736840128898621,grad_norm: 0.9999991474884761, iteration: 68261
loss: 0.9950454831123352,grad_norm: 0.9999992698320627, iteration: 68262
loss: 1.051682949066162,grad_norm: 0.9999992260596968, iteration: 68263
loss: 0.9762309193611145,grad_norm: 0.9732138990925217, iteration: 68264
loss: 1.005387306213379,grad_norm: 0.9999991554302262, iteration: 68265
loss: 0.9973096251487732,grad_norm: 0.933402428271079, iteration: 68266
loss: 1.010676383972168,grad_norm: 0.9921898394984261, iteration: 68267
loss: 0.9705653190612793,grad_norm: 0.9347870682537048, iteration: 68268
loss: 0.9584368467330933,grad_norm: 0.9999989984383503, iteration: 68269
loss: 1.0092800855636597,grad_norm: 0.9999993485402194, iteration: 68270
loss: 0.9756025671958923,grad_norm: 0.9999990533945324, iteration: 68271
loss: 1.0418925285339355,grad_norm: 0.9999991190520117, iteration: 68272
loss: 0.9875174164772034,grad_norm: 0.9755461994066061, iteration: 68273
loss: 0.9930139780044556,grad_norm: 0.9999991498399058, iteration: 68274
loss: 1.0165740251541138,grad_norm: 0.9033344115208258, iteration: 68275
loss: 1.0038726329803467,grad_norm: 0.9999992764717531, iteration: 68276
loss: 0.9871809482574463,grad_norm: 0.9824880107186084, iteration: 68277
loss: 0.9986869692802429,grad_norm: 0.9386051215892752, iteration: 68278
loss: 1.0871598720550537,grad_norm: 0.9999991590322543, iteration: 68279
loss: 0.9830938577651978,grad_norm: 0.999999134503208, iteration: 68280
loss: 1.0114331245422363,grad_norm: 0.9999991915499034, iteration: 68281
loss: 1.0181905031204224,grad_norm: 0.9999990228622756, iteration: 68282
loss: 1.0087755918502808,grad_norm: 0.9586228169290123, iteration: 68283
loss: 1.005060076713562,grad_norm: 0.9999992113386871, iteration: 68284
loss: 0.9910222887992859,grad_norm: 0.9659118262536435, iteration: 68285
loss: 1.018695592880249,grad_norm: 0.9286903192732446, iteration: 68286
loss: 0.994873583316803,grad_norm: 0.9084468466642937, iteration: 68287
loss: 1.014181137084961,grad_norm: 0.999999248435259, iteration: 68288
loss: 0.983460009098053,grad_norm: 0.9999992516500725, iteration: 68289
loss: 0.9959591031074524,grad_norm: 0.9999992704500139, iteration: 68290
loss: 1.0098859071731567,grad_norm: 0.9999989526119585, iteration: 68291
loss: 1.0820777416229248,grad_norm: 0.9999999216039779, iteration: 68292
loss: 1.032947063446045,grad_norm: 0.9195719996142149, iteration: 68293
loss: 0.987034022808075,grad_norm: 0.9999991220459984, iteration: 68294
loss: 1.0121943950653076,grad_norm: 0.9168607602838648, iteration: 68295
loss: 1.02360999584198,grad_norm: 0.9423963042976112, iteration: 68296
loss: 1.0393507480621338,grad_norm: 0.8963003573400635, iteration: 68297
loss: 0.9785758852958679,grad_norm: 0.9999990653429883, iteration: 68298
loss: 1.0085880756378174,grad_norm: 0.9999990969895228, iteration: 68299
loss: 0.9804384112358093,grad_norm: 0.9787514136841736, iteration: 68300
loss: 0.9476026892662048,grad_norm: 0.9468276056252983, iteration: 68301
loss: 1.0192331075668335,grad_norm: 0.9804235168985749, iteration: 68302
loss: 1.0255045890808105,grad_norm: 0.9999991390435826, iteration: 68303
loss: 1.024145483970642,grad_norm: 0.9999991996253138, iteration: 68304
loss: 0.9663798809051514,grad_norm: 0.9999992888182423, iteration: 68305
loss: 0.9928027391433716,grad_norm: 0.9896429793261926, iteration: 68306
loss: 1.013845682144165,grad_norm: 0.8245154426633375, iteration: 68307
loss: 1.0014771223068237,grad_norm: 0.9999991680374359, iteration: 68308
loss: 0.9922563433647156,grad_norm: 0.9999991653989558, iteration: 68309
loss: 0.9881715774536133,grad_norm: 0.9999992540402047, iteration: 68310
loss: 0.9995948672294617,grad_norm: 0.9999991420640714, iteration: 68311
loss: 1.007756233215332,grad_norm: 0.9999991168103806, iteration: 68312
loss: 0.9833465218544006,grad_norm: 0.9999991591674156, iteration: 68313
loss: 1.0005985498428345,grad_norm: 0.9999991839346879, iteration: 68314
loss: 0.9954870343208313,grad_norm: 0.9999991839532576, iteration: 68315
loss: 0.9981647729873657,grad_norm: 0.9374940144334631, iteration: 68316
loss: 1.007917881011963,grad_norm: 0.999999241518862, iteration: 68317
loss: 1.0065033435821533,grad_norm: 0.9116072872250235, iteration: 68318
loss: 1.0198458433151245,grad_norm: 0.9999991303596124, iteration: 68319
loss: 0.9858798980712891,grad_norm: 0.9976005079532818, iteration: 68320
loss: 0.9961557984352112,grad_norm: 0.9999990971124555, iteration: 68321
loss: 1.0011301040649414,grad_norm: 0.8949761527531288, iteration: 68322
loss: 0.9745385646820068,grad_norm: 0.9999990411540023, iteration: 68323
loss: 1.0219430923461914,grad_norm: 0.81781588967342, iteration: 68324
loss: 0.9724013209342957,grad_norm: 0.9626505971190344, iteration: 68325
loss: 0.9958890676498413,grad_norm: 0.9038816845285008, iteration: 68326
loss: 0.9794389605522156,grad_norm: 0.9999992052137299, iteration: 68327
loss: 0.9944989085197449,grad_norm: 0.9999993212092685, iteration: 68328
loss: 0.9907061457633972,grad_norm: 0.999999052883228, iteration: 68329
loss: 0.9531700611114502,grad_norm: 0.999998909925364, iteration: 68330
loss: 1.0307807922363281,grad_norm: 0.9999999014451992, iteration: 68331
loss: 1.0107401609420776,grad_norm: 0.9999990813186626, iteration: 68332
loss: 1.0198349952697754,grad_norm: 0.9999992174147314, iteration: 68333
loss: 0.9632465243339539,grad_norm: 0.9421600234802004, iteration: 68334
loss: 1.0012930631637573,grad_norm: 0.9999991800675234, iteration: 68335
loss: 1.0362926721572876,grad_norm: 0.9999990792328439, iteration: 68336
loss: 1.0723063945770264,grad_norm: 0.9999992288636488, iteration: 68337
loss: 0.9791171550750732,grad_norm: 0.9999990187446638, iteration: 68338
loss: 0.9908514022827148,grad_norm: 0.9999991803645407, iteration: 68339
loss: 1.0023860931396484,grad_norm: 0.9999991820218181, iteration: 68340
loss: 0.9940414428710938,grad_norm: 0.9444824805485929, iteration: 68341
loss: 0.970909059047699,grad_norm: 0.9999992483985082, iteration: 68342
loss: 0.9838768243789673,grad_norm: 0.9999990885738806, iteration: 68343
loss: 1.0322227478027344,grad_norm: 0.999999373999799, iteration: 68344
loss: 0.995011031627655,grad_norm: 0.9667400500457427, iteration: 68345
loss: 0.9973903894424438,grad_norm: 0.9999992116519029, iteration: 68346
loss: 1.0355446338653564,grad_norm: 0.9999992599906657, iteration: 68347
loss: 0.9780202507972717,grad_norm: 0.999999094496312, iteration: 68348
loss: 0.9604302048683167,grad_norm: 0.9999990929051863, iteration: 68349
loss: 1.0041218996047974,grad_norm: 0.8547990825645186, iteration: 68350
loss: 0.9927733540534973,grad_norm: 0.9042460393940972, iteration: 68351
loss: 1.0173574686050415,grad_norm: 0.9587893270243084, iteration: 68352
loss: 1.045069932937622,grad_norm: 0.9999991959218173, iteration: 68353
loss: 0.973992645740509,grad_norm: 0.8400177146875337, iteration: 68354
loss: 0.950427234172821,grad_norm: 0.9999991709341175, iteration: 68355
loss: 1.046426773071289,grad_norm: 0.9999991384759652, iteration: 68356
loss: 1.0254217386245728,grad_norm: 0.9999992337684291, iteration: 68357
loss: 0.9874693155288696,grad_norm: 0.9966667712167869, iteration: 68358
loss: 1.0132150650024414,grad_norm: 0.9999992011256083, iteration: 68359
loss: 1.0165534019470215,grad_norm: 0.9999989868435322, iteration: 68360
loss: 1.0027170181274414,grad_norm: 0.9999992398491179, iteration: 68361
loss: 1.0003595352172852,grad_norm: 0.976255203225621, iteration: 68362
loss: 1.0079634189605713,grad_norm: 0.9999998607265049, iteration: 68363
loss: 1.046972393989563,grad_norm: 0.8222753811989834, iteration: 68364
loss: 0.9644976258277893,grad_norm: 0.9845183100861771, iteration: 68365
loss: 1.0268632173538208,grad_norm: 0.9999990790044891, iteration: 68366
loss: 1.129940152168274,grad_norm: 0.9999996908023193, iteration: 68367
loss: 1.0211275815963745,grad_norm: 0.9999991568394506, iteration: 68368
loss: 1.010351300239563,grad_norm: 0.9873686524737588, iteration: 68369
loss: 1.0130443572998047,grad_norm: 0.8953733402758204, iteration: 68370
loss: 1.0056592226028442,grad_norm: 0.9999990491869473, iteration: 68371
loss: 0.9770318269729614,grad_norm: 0.9339682373842192, iteration: 68372
loss: 1.002427577972412,grad_norm: 0.9417272995243837, iteration: 68373
loss: 1.029058814048767,grad_norm: 0.9999990068515336, iteration: 68374
loss: 1.00324547290802,grad_norm: 0.9999993529588819, iteration: 68375
loss: 1.0170103311538696,grad_norm: 0.926859856072753, iteration: 68376
loss: 1.02707839012146,grad_norm: 0.9999990360314532, iteration: 68377
loss: 1.0116853713989258,grad_norm: 0.9999991438575678, iteration: 68378
loss: 1.001369833946228,grad_norm: 0.9236958642262606, iteration: 68379
loss: 0.9978148341178894,grad_norm: 0.9999991536484945, iteration: 68380
loss: 1.0126713514328003,grad_norm: 0.9531872000766196, iteration: 68381
loss: 1.0186842679977417,grad_norm: 0.9999992855067197, iteration: 68382
loss: 1.0397497415542603,grad_norm: 0.999999050721948, iteration: 68383
loss: 1.0098292827606201,grad_norm: 0.9999991488969643, iteration: 68384
loss: 0.9905091524124146,grad_norm: 0.9999991617318513, iteration: 68385
loss: 0.9811184406280518,grad_norm: 0.9789887381570623, iteration: 68386
loss: 1.0023237466812134,grad_norm: 0.8787178810889852, iteration: 68387
loss: 0.9772465229034424,grad_norm: 0.9999990453392242, iteration: 68388
loss: 1.0375922918319702,grad_norm: 0.999999139102558, iteration: 68389
loss: 1.0595757961273193,grad_norm: 0.9999991291505961, iteration: 68390
loss: 1.0283533334732056,grad_norm: 0.9999993487203109, iteration: 68391
loss: 1.036145567893982,grad_norm: 0.9999990796039485, iteration: 68392
loss: 0.991962730884552,grad_norm: 0.9816137370450162, iteration: 68393
loss: 0.9723150730133057,grad_norm: 0.9999991748758581, iteration: 68394
loss: 1.0089788436889648,grad_norm: 0.9999992723196509, iteration: 68395
loss: 1.014870524406433,grad_norm: 0.9908070244516132, iteration: 68396
loss: 0.9757319092750549,grad_norm: 0.9999991082755281, iteration: 68397
loss: 1.0243035554885864,grad_norm: 0.999998974168265, iteration: 68398
loss: 0.9818266034126282,grad_norm: 0.999999321493082, iteration: 68399
loss: 1.0377845764160156,grad_norm: 0.9999992081431778, iteration: 68400
loss: 1.0162267684936523,grad_norm: 0.9677905025388799, iteration: 68401
loss: 1.0114985704421997,grad_norm: 0.8383450284525568, iteration: 68402
loss: 1.0139844417572021,grad_norm: 0.9999991052349267, iteration: 68403
loss: 1.061050534248352,grad_norm: 0.9999991475938544, iteration: 68404
loss: 1.0365062952041626,grad_norm: 0.999999177090493, iteration: 68405
loss: 1.0213837623596191,grad_norm: 0.9999990501807545, iteration: 68406
loss: 1.0100497007369995,grad_norm: 0.901573135132543, iteration: 68407
loss: 1.0368098020553589,grad_norm: 0.9999991706734455, iteration: 68408
loss: 1.0077199935913086,grad_norm: 0.8993495962697169, iteration: 68409
loss: 0.974888265132904,grad_norm: 0.9999991210126277, iteration: 68410
loss: 0.9989081025123596,grad_norm: 0.9842530939690696, iteration: 68411
loss: 0.9726873636245728,grad_norm: 0.9003340752559003, iteration: 68412
loss: 0.984080970287323,grad_norm: 0.9999992304290257, iteration: 68413
loss: 1.0122997760772705,grad_norm: 0.9976854392175434, iteration: 68414
loss: 0.9924137592315674,grad_norm: 0.9999990573249138, iteration: 68415
loss: 1.010684847831726,grad_norm: 0.9999990487098608, iteration: 68416
loss: 0.9953375458717346,grad_norm: 0.9999990156962513, iteration: 68417
loss: 1.0475419759750366,grad_norm: 0.8933454320629234, iteration: 68418
loss: 1.0333271026611328,grad_norm: 0.9999994490251469, iteration: 68419
loss: 1.03827965259552,grad_norm: 0.9999999038259322, iteration: 68420
loss: 0.9699841737747192,grad_norm: 0.9999992323097355, iteration: 68421
loss: 1.019671082496643,grad_norm: 0.8551949733211276, iteration: 68422
loss: 1.006544589996338,grad_norm: 0.9857383963870661, iteration: 68423
loss: 0.9848901629447937,grad_norm: 0.9999991720454291, iteration: 68424
loss: 1.0100739002227783,grad_norm: 0.9999990872440998, iteration: 68425
loss: 0.9950774908065796,grad_norm: 0.9999991301622609, iteration: 68426
loss: 0.9720268249511719,grad_norm: 0.9829562569128888, iteration: 68427
loss: 1.0305209159851074,grad_norm: 0.9999991357088626, iteration: 68428
loss: 1.0207335948944092,grad_norm: 0.9999991675222654, iteration: 68429
loss: 1.0456947088241577,grad_norm: 0.9751686037909385, iteration: 68430
loss: 1.000791311264038,grad_norm: 0.9538099511389451, iteration: 68431
loss: 0.9848508238792419,grad_norm: 0.9999990321430479, iteration: 68432
loss: 1.0249390602111816,grad_norm: 0.9999992967846493, iteration: 68433
loss: 1.014917016029358,grad_norm: 0.9733028468026447, iteration: 68434
loss: 0.9869391918182373,grad_norm: 0.9827980542338746, iteration: 68435
loss: 0.9623687863349915,grad_norm: 0.9999991393704378, iteration: 68436
loss: 0.9627094864845276,grad_norm: 0.9999990825648929, iteration: 68437
loss: 0.9944971799850464,grad_norm: 0.9999991311322551, iteration: 68438
loss: 1.0289593935012817,grad_norm: 0.8557780891942866, iteration: 68439
loss: 1.034054160118103,grad_norm: 0.9895321806665155, iteration: 68440
loss: 1.003125786781311,grad_norm: 0.9807345460475468, iteration: 68441
loss: 1.0287325382232666,grad_norm: 0.9999995775529097, iteration: 68442
loss: 0.9652811884880066,grad_norm: 0.9999989473342957, iteration: 68443
loss: 1.0590070486068726,grad_norm: 0.9999997673583009, iteration: 68444
loss: 1.0390125513076782,grad_norm: 0.9999988666243075, iteration: 68445
loss: 1.0209460258483887,grad_norm: 0.9343951325836658, iteration: 68446
loss: 0.9994884133338928,grad_norm: 0.8573533585989984, iteration: 68447
loss: 1.027948260307312,grad_norm: 0.999999075678838, iteration: 68448
loss: 0.9755635857582092,grad_norm: 0.9269611735844157, iteration: 68449
loss: 0.9897500872612,grad_norm: 0.9999991529499325, iteration: 68450
loss: 1.0181666612625122,grad_norm: 0.9832506964278769, iteration: 68451
loss: 0.9904480576515198,grad_norm: 0.9999990285815153, iteration: 68452
loss: 1.004594087600708,grad_norm: 0.9999991238810926, iteration: 68453
loss: 0.9624842405319214,grad_norm: 0.9999992248159212, iteration: 68454
loss: 1.0119069814682007,grad_norm: 0.999999042693199, iteration: 68455
loss: 1.039233684539795,grad_norm: 0.9999991315170708, iteration: 68456
loss: 1.0183119773864746,grad_norm: 0.9999990565395773, iteration: 68457
loss: 1.0039424896240234,grad_norm: 0.9999992178236786, iteration: 68458
loss: 0.9869504570960999,grad_norm: 0.932138385355472, iteration: 68459
loss: 1.0008116960525513,grad_norm: 0.994213009203915, iteration: 68460
loss: 1.0046778917312622,grad_norm: 0.9121649418910492, iteration: 68461
loss: 1.0634353160858154,grad_norm: 0.9999994701905567, iteration: 68462
loss: 0.9922522902488708,grad_norm: 0.8751409815804876, iteration: 68463
loss: 1.023299217224121,grad_norm: 0.91324288004551, iteration: 68464
loss: 0.9908650517463684,grad_norm: 0.999999044573648, iteration: 68465
loss: 1.0301779508590698,grad_norm: 0.9999989750823941, iteration: 68466
loss: 1.0040338039398193,grad_norm: 0.9999992248506234, iteration: 68467
loss: 0.963797926902771,grad_norm: 0.9989359738619659, iteration: 68468
loss: 1.0694140195846558,grad_norm: 0.9999993967708252, iteration: 68469
loss: 1.024383783340454,grad_norm: 0.9999991100704442, iteration: 68470
loss: 1.2118221521377563,grad_norm: 0.9999995159398164, iteration: 68471
loss: 0.9656718373298645,grad_norm: 0.9999994035978126, iteration: 68472
loss: 1.0053925514221191,grad_norm: 0.999998908813905, iteration: 68473
loss: 0.9893162846565247,grad_norm: 0.9999991126545026, iteration: 68474
loss: 1.026308298110962,grad_norm: 0.999999191261332, iteration: 68475
loss: 1.0311954021453857,grad_norm: 0.9999989510421761, iteration: 68476
loss: 1.0128352642059326,grad_norm: 0.9060894006333515, iteration: 68477
loss: 0.9683246612548828,grad_norm: 0.9667779296518144, iteration: 68478
loss: 0.9889005422592163,grad_norm: 0.9753506831570206, iteration: 68479
loss: 1.0008164644241333,grad_norm: 0.8733696351438862, iteration: 68480
loss: 1.004131555557251,grad_norm: 0.9999990069707074, iteration: 68481
loss: 1.008103847503662,grad_norm: 0.9048760554018219, iteration: 68482
loss: 1.0072717666625977,grad_norm: 0.849843968174818, iteration: 68483
loss: 1.0086891651153564,grad_norm: 0.999999135643602, iteration: 68484
loss: 1.0023260116577148,grad_norm: 0.9055189375169033, iteration: 68485
loss: 0.9938477873802185,grad_norm: 0.9999992130494507, iteration: 68486
loss: 1.047353982925415,grad_norm: 0.9999998053088749, iteration: 68487
loss: 0.9828540682792664,grad_norm: 0.8494139488923592, iteration: 68488
loss: 1.1703044176101685,grad_norm: 1.0000000181554676, iteration: 68489
loss: 1.041788935661316,grad_norm: 0.9999995251563573, iteration: 68490
loss: 0.9476799964904785,grad_norm: 0.9022024826997403, iteration: 68491
loss: 0.9973351955413818,grad_norm: 0.999999070293801, iteration: 68492
loss: 1.0121172666549683,grad_norm: 0.9750519307517939, iteration: 68493
loss: 1.0082758665084839,grad_norm: 0.9999991449115955, iteration: 68494
loss: 0.9991816878318787,grad_norm: 0.999999076979866, iteration: 68495
loss: 1.0313352346420288,grad_norm: 0.9091021360886947, iteration: 68496
loss: 0.9906242489814758,grad_norm: 0.9999990181765007, iteration: 68497
loss: 1.0065083503723145,grad_norm: 0.9999991439289844, iteration: 68498
loss: 0.997996985912323,grad_norm: 0.9999991097910381, iteration: 68499
loss: 1.0302215814590454,grad_norm: 0.9550442398376177, iteration: 68500
loss: 0.9971437454223633,grad_norm: 0.9014193344515079, iteration: 68501
loss: 1.016379475593567,grad_norm: 0.9999992380159684, iteration: 68502
loss: 1.0150866508483887,grad_norm: 0.9089915499702057, iteration: 68503
loss: 1.0091462135314941,grad_norm: 0.918342178210137, iteration: 68504
loss: 1.0315202474594116,grad_norm: 0.9735592303915316, iteration: 68505
loss: 1.041916847229004,grad_norm: 0.999999004432514, iteration: 68506
loss: 1.0427865982055664,grad_norm: 0.9999991602859817, iteration: 68507
loss: 0.9980636239051819,grad_norm: 0.9999992131660483, iteration: 68508
loss: 1.0595457553863525,grad_norm: 0.9999996521562381, iteration: 68509
loss: 0.9827183485031128,grad_norm: 0.8892753303861682, iteration: 68510
loss: 1.0045984983444214,grad_norm: 0.979689681976977, iteration: 68511
loss: 0.9877709746360779,grad_norm: 0.9876850734977821, iteration: 68512
loss: 0.9924800395965576,grad_norm: 0.9999991771191851, iteration: 68513
loss: 0.9993283748626709,grad_norm: 0.9999991707895395, iteration: 68514
loss: 1.0171408653259277,grad_norm: 0.9875808198586117, iteration: 68515
loss: 0.9860438704490662,grad_norm: 0.9999992509291364, iteration: 68516
loss: 0.9944884181022644,grad_norm: 0.9999991006908513, iteration: 68517
loss: 0.9865089654922485,grad_norm: 0.9918169894573016, iteration: 68518
loss: 1.028450846672058,grad_norm: 0.999999233656384, iteration: 68519
loss: 0.9958240389823914,grad_norm: 0.8798648748313629, iteration: 68520
loss: 0.9960130453109741,grad_norm: 0.9999989694746985, iteration: 68521
loss: 1.0093512535095215,grad_norm: 0.9999990426284344, iteration: 68522
loss: 0.9952763915061951,grad_norm: 0.9847515833481773, iteration: 68523
loss: 1.007730484008789,grad_norm: 0.999999266348119, iteration: 68524
loss: 0.9787214994430542,grad_norm: 0.9999991091129332, iteration: 68525
loss: 0.9819006323814392,grad_norm: 0.8762220358947139, iteration: 68526
loss: 1.0237230062484741,grad_norm: 0.9999992199284117, iteration: 68527
loss: 0.9715909957885742,grad_norm: 0.9999991679181667, iteration: 68528
loss: 0.9981087446212769,grad_norm: 0.9689891944204595, iteration: 68529
loss: 0.9422752261161804,grad_norm: 0.9999991615142161, iteration: 68530
loss: 0.9976605772972107,grad_norm: 0.8662105481078918, iteration: 68531
loss: 1.0047881603240967,grad_norm: 0.9999990603629587, iteration: 68532
loss: 0.964369535446167,grad_norm: 0.9999992756474186, iteration: 68533
loss: 1.0348634719848633,grad_norm: 0.9999990002962287, iteration: 68534
loss: 0.9907504916191101,grad_norm: 0.999999158354702, iteration: 68535
loss: 0.9791111350059509,grad_norm: 0.9999990041724369, iteration: 68536
loss: 0.9837051033973694,grad_norm: 0.9805773015306452, iteration: 68537
loss: 1.0121498107910156,grad_norm: 0.9999993871132453, iteration: 68538
loss: 0.9674205183982849,grad_norm: 0.999999082842691, iteration: 68539
loss: 0.9927747249603271,grad_norm: 0.9399504507773957, iteration: 68540
loss: 1.0199073553085327,grad_norm: 0.999999307073637, iteration: 68541
loss: 1.0805457830429077,grad_norm: 0.9999994284614689, iteration: 68542
loss: 1.0255964994430542,grad_norm: 0.9433671241175858, iteration: 68543
loss: 0.9564645886421204,grad_norm: 0.9999991032179211, iteration: 68544
loss: 0.9711456298828125,grad_norm: 0.9999990502799175, iteration: 68545
loss: 0.9957905411720276,grad_norm: 0.9910448603172072, iteration: 68546
loss: 0.9884200096130371,grad_norm: 0.999999155986561, iteration: 68547
loss: 1.0295771360397339,grad_norm: 0.9712662039687827, iteration: 68548
loss: 0.9892526865005493,grad_norm: 0.9205189940349384, iteration: 68549
loss: 0.9766138195991516,grad_norm: 0.9779355440961236, iteration: 68550
loss: 1.0385915040969849,grad_norm: 0.9999993573716996, iteration: 68551
loss: 1.0033379793167114,grad_norm: 0.9999991531984609, iteration: 68552
loss: 1.0027477741241455,grad_norm: 0.9999992382158385, iteration: 68553
loss: 1.017364501953125,grad_norm: 0.9999990885855056, iteration: 68554
loss: 1.0844478607177734,grad_norm: 0.9640160900074006, iteration: 68555
loss: 1.0501304864883423,grad_norm: 0.9999991453263489, iteration: 68556
loss: 0.9724327921867371,grad_norm: 0.9999991567269313, iteration: 68557
loss: 1.0280715227127075,grad_norm: 0.999999184112704, iteration: 68558
loss: 1.0095956325531006,grad_norm: 0.9999992265572417, iteration: 68559
loss: 1.0296498537063599,grad_norm: 0.9999989793583842, iteration: 68560
loss: 0.9476081728935242,grad_norm: 0.9999990922335522, iteration: 68561
loss: 1.0183247327804565,grad_norm: 0.9999991370413986, iteration: 68562
loss: 1.0325998067855835,grad_norm: 0.9468703216372387, iteration: 68563
loss: 1.0330736637115479,grad_norm: 0.9999990422362685, iteration: 68564
loss: 1.0357005596160889,grad_norm: 0.9999992095124387, iteration: 68565
loss: 0.9801914095878601,grad_norm: 0.8734376635561959, iteration: 68566
loss: 1.029470443725586,grad_norm: 0.999999576262623, iteration: 68567
loss: 0.9308995604515076,grad_norm: 0.9999991024566142, iteration: 68568
loss: 1.0019129514694214,grad_norm: 0.9999991608102587, iteration: 68569
loss: 0.9643897414207458,grad_norm: 0.9999990188433165, iteration: 68570
loss: 0.9825450778007507,grad_norm: 0.9999989810535167, iteration: 68571
loss: 1.0287079811096191,grad_norm: 0.8858528630542924, iteration: 68572
loss: 1.0200203657150269,grad_norm: 0.9957194906910719, iteration: 68573
loss: 1.033376693725586,grad_norm: 0.9999991778851709, iteration: 68574
loss: 1.0099327564239502,grad_norm: 0.9999990692410239, iteration: 68575
loss: 1.2795805931091309,grad_norm: 0.9999998859095208, iteration: 68576
loss: 0.9798452258110046,grad_norm: 0.9981603116744903, iteration: 68577
loss: 1.0285621881484985,grad_norm: 0.9999991412398803, iteration: 68578
loss: 1.0191497802734375,grad_norm: 0.9999991257650388, iteration: 68579
loss: 1.0171916484832764,grad_norm: 0.9999992345658925, iteration: 68580
loss: 0.9867258667945862,grad_norm: 0.9999989192338415, iteration: 68581
loss: 1.0028741359710693,grad_norm: 0.8851644669713513, iteration: 68582
loss: 1.0256109237670898,grad_norm: 0.9726252470580488, iteration: 68583
loss: 0.9935809373855591,grad_norm: 0.999999110603192, iteration: 68584
loss: 1.0169050693511963,grad_norm: 0.9999990449090649, iteration: 68585
loss: 1.0233793258666992,grad_norm: 0.974115581302361, iteration: 68586
loss: 1.0112833976745605,grad_norm: 0.9999991845594423, iteration: 68587
loss: 0.9800878167152405,grad_norm: 0.9325761303690021, iteration: 68588
loss: 0.9736269116401672,grad_norm: 0.9519880098738808, iteration: 68589
loss: 1.045590877532959,grad_norm: 0.927172257168488, iteration: 68590
loss: 0.977749764919281,grad_norm: 0.9635228664989324, iteration: 68591
loss: 0.9808534383773804,grad_norm: 0.9727298527647371, iteration: 68592
loss: 0.9896965026855469,grad_norm: 0.999999011756641, iteration: 68593
loss: 1.0176571607589722,grad_norm: 0.9160381394670962, iteration: 68594
loss: 1.0184767246246338,grad_norm: 0.9999992106510001, iteration: 68595
loss: 1.0050731897354126,grad_norm: 0.999998979133319, iteration: 68596
loss: 1.035483956336975,grad_norm: 0.9999990722226003, iteration: 68597
loss: 1.0008260011672974,grad_norm: 0.9999990521758598, iteration: 68598
loss: 1.0043518543243408,grad_norm: 0.999999127691038, iteration: 68599
loss: 0.9701393842697144,grad_norm: 0.8325132603800044, iteration: 68600
loss: 1.0157575607299805,grad_norm: 0.9424442655216678, iteration: 68601
loss: 0.9618614912033081,grad_norm: 0.9999991390251811, iteration: 68602
loss: 0.9874561429023743,grad_norm: 0.9999992160798032, iteration: 68603
loss: 1.0949147939682007,grad_norm: 0.9999995801026121, iteration: 68604
loss: 0.9931830763816833,grad_norm: 0.9999991683698013, iteration: 68605
loss: 0.9867008328437805,grad_norm: 0.9999992673913111, iteration: 68606
loss: 0.9669994115829468,grad_norm: 0.9999992778347921, iteration: 68607
loss: 1.039896011352539,grad_norm: 0.9999989755002604, iteration: 68608
loss: 0.980949342250824,grad_norm: 0.9999990270605843, iteration: 68609
loss: 0.9896495342254639,grad_norm: 0.9870178944466159, iteration: 68610
loss: 0.9982035160064697,grad_norm: 0.9999990944263838, iteration: 68611
loss: 0.9968166947364807,grad_norm: 0.816592063355629, iteration: 68612
loss: 0.9981973767280579,grad_norm: 0.9999991622960922, iteration: 68613
loss: 1.0142357349395752,grad_norm: 0.9999990448947417, iteration: 68614
loss: 0.9931493997573853,grad_norm: 0.9999992010724881, iteration: 68615
loss: 1.044991135597229,grad_norm: 0.99999909950397, iteration: 68616
loss: 0.9962515234947205,grad_norm: 0.999999202358144, iteration: 68617
loss: 1.0340946912765503,grad_norm: 0.9999993166750819, iteration: 68618
loss: 0.994246244430542,grad_norm: 0.9878685717391185, iteration: 68619
loss: 0.9859776496887207,grad_norm: 0.9999991114637501, iteration: 68620
loss: 1.0098967552185059,grad_norm: 0.9331018254603061, iteration: 68621
loss: 1.0300976037979126,grad_norm: 0.9999997149514831, iteration: 68622
loss: 0.9959248304367065,grad_norm: 0.9999990306184993, iteration: 68623
loss: 1.008000373840332,grad_norm: 0.9072398090290958, iteration: 68624
loss: 0.9965817332267761,grad_norm: 0.9999994314630813, iteration: 68625
loss: 1.016196608543396,grad_norm: 0.9678959752334031, iteration: 68626
loss: 0.9724013209342957,grad_norm: 0.9999991809605479, iteration: 68627
loss: 1.0007407665252686,grad_norm: 0.9999990805031933, iteration: 68628
loss: 1.0033141374588013,grad_norm: 0.9735809169341392, iteration: 68629
loss: 0.9806620478630066,grad_norm: 0.9999991619192212, iteration: 68630
loss: 0.9952840805053711,grad_norm: 0.9759396861860447, iteration: 68631
loss: 1.0067886114120483,grad_norm: 0.9999990082512985, iteration: 68632
loss: 1.0045764446258545,grad_norm: 0.9999991384137669, iteration: 68633
loss: 0.9624375700950623,grad_norm: 0.9999988966828716, iteration: 68634
loss: 1.0240942239761353,grad_norm: 0.9999991617681627, iteration: 68635
loss: 0.9634650349617004,grad_norm: 0.9999991330225814, iteration: 68636
loss: 1.0068840980529785,grad_norm: 0.940727062530636, iteration: 68637
loss: 0.9958276748657227,grad_norm: 0.9999989471740275, iteration: 68638
loss: 1.0266083478927612,grad_norm: 0.9999997922153757, iteration: 68639
loss: 0.9772467613220215,grad_norm: 0.9961185216532198, iteration: 68640
loss: 1.0104105472564697,grad_norm: 0.9999989559900472, iteration: 68641
loss: 0.979280948638916,grad_norm: 0.8510667436144468, iteration: 68642
loss: 1.0121128559112549,grad_norm: 0.9573206120164621, iteration: 68643
loss: 0.9571366310119629,grad_norm: 0.9470922241082128, iteration: 68644
loss: 0.9990390539169312,grad_norm: 0.9189012276948083, iteration: 68645
loss: 1.1720497608184814,grad_norm: 0.9999996650103383, iteration: 68646
loss: 1.0087602138519287,grad_norm: 0.9999990282637928, iteration: 68647
loss: 1.044197678565979,grad_norm: 0.999999616266042, iteration: 68648
loss: 1.0337040424346924,grad_norm: 0.9999993984766303, iteration: 68649
loss: 0.9948490262031555,grad_norm: 0.9999991825930908, iteration: 68650
loss: 1.0517929792404175,grad_norm: 0.9999996047379907, iteration: 68651
loss: 1.040741205215454,grad_norm: 0.9999997503848005, iteration: 68652
loss: 1.0310239791870117,grad_norm: 0.9999990567784783, iteration: 68653
loss: 1.0298527479171753,grad_norm: 0.999999188706417, iteration: 68654
loss: 0.979351282119751,grad_norm: 0.9544266654739336, iteration: 68655
loss: 1.0534965991973877,grad_norm: 0.9999995256787638, iteration: 68656
loss: 1.0259405374526978,grad_norm: 0.9999991488864057, iteration: 68657
loss: 0.9970985054969788,grad_norm: 0.9075766157299521, iteration: 68658
loss: 1.0076000690460205,grad_norm: 0.7605730623606702, iteration: 68659
loss: 0.985436737537384,grad_norm: 0.9999991289736629, iteration: 68660
loss: 0.9741926193237305,grad_norm: 0.9999991779030166, iteration: 68661
loss: 1.0274239778518677,grad_norm: 0.9999991089763555, iteration: 68662
loss: 0.9648124575614929,grad_norm: 0.9965430890904181, iteration: 68663
loss: 0.9952309727668762,grad_norm: 0.9999989915369415, iteration: 68664
loss: 0.9981308579444885,grad_norm: 0.8518264127974513, iteration: 68665
loss: 1.046229362487793,grad_norm: 0.9695661937970478, iteration: 68666
loss: 0.985185980796814,grad_norm: 0.9432894425423172, iteration: 68667
loss: 1.0300657749176025,grad_norm: 0.999999245432971, iteration: 68668
loss: 1.001355528831482,grad_norm: 0.9999991862400367, iteration: 68669
loss: 1.0086023807525635,grad_norm: 0.9999990934668458, iteration: 68670
loss: 1.010088324546814,grad_norm: 0.9999992764204617, iteration: 68671
loss: 0.9871174693107605,grad_norm: 0.9993578756665866, iteration: 68672
loss: 1.0034005641937256,grad_norm: 0.9999989970038836, iteration: 68673
loss: 1.0148329734802246,grad_norm: 0.999999089293327, iteration: 68674
loss: 1.0303946733474731,grad_norm: 0.9999996932351329, iteration: 68675
loss: 1.0036842823028564,grad_norm: 0.9999991270216481, iteration: 68676
loss: 1.018505573272705,grad_norm: 0.9999992456028398, iteration: 68677
loss: 0.9656046628952026,grad_norm: 0.9999993039492441, iteration: 68678
loss: 1.0175195932388306,grad_norm: 0.999999109645702, iteration: 68679
loss: 0.9814872145652771,grad_norm: 0.9625667212166823, iteration: 68680
loss: 1.0025436878204346,grad_norm: 0.9999991154670318, iteration: 68681
loss: 1.0182846784591675,grad_norm: 0.9999990715892905, iteration: 68682
loss: 0.9842752814292908,grad_norm: 0.8680593674564506, iteration: 68683
loss: 0.9675456881523132,grad_norm: 0.9416645632261958, iteration: 68684
loss: 1.0291498899459839,grad_norm: 0.9833019471713492, iteration: 68685
loss: 1.0071274042129517,grad_norm: 0.9999996070699119, iteration: 68686
loss: 0.9676825404167175,grad_norm: 0.94531392230102, iteration: 68687
loss: 0.9985272884368896,grad_norm: 0.9896186269441019, iteration: 68688
loss: 0.9751372337341309,grad_norm: 0.9976271568364652, iteration: 68689
loss: 1.0115444660186768,grad_norm: 0.9999991077104965, iteration: 68690
loss: 1.0153024196624756,grad_norm: 0.9999991048853083, iteration: 68691
loss: 0.9931130409240723,grad_norm: 0.9999991095402281, iteration: 68692
loss: 1.0046477317810059,grad_norm: 0.9900221296699983, iteration: 68693
loss: 0.9714287519454956,grad_norm: 0.9667446094793639, iteration: 68694
loss: 1.0261108875274658,grad_norm: 0.8746024556272125, iteration: 68695
loss: 1.0004056692123413,grad_norm: 0.9999989236433015, iteration: 68696
loss: 0.9926286339759827,grad_norm: 0.9405637358020782, iteration: 68697
loss: 0.97892826795578,grad_norm: 0.9999991846596428, iteration: 68698
loss: 0.977920413017273,grad_norm: 0.999999054839598, iteration: 68699
loss: 0.9895752668380737,grad_norm: 0.9787456010001602, iteration: 68700
loss: 0.9671545028686523,grad_norm: 0.9999990018569828, iteration: 68701
loss: 1.0220335721969604,grad_norm: 0.9999991982108583, iteration: 68702
loss: 1.0213940143585205,grad_norm: 0.8653527869888682, iteration: 68703
loss: 0.972141444683075,grad_norm: 0.999998995185321, iteration: 68704
loss: 1.0952414274215698,grad_norm: 0.999999923381317, iteration: 68705
loss: 1.0054030418395996,grad_norm: 0.963740623892942, iteration: 68706
loss: 0.9882481694221497,grad_norm: 0.9855037706054497, iteration: 68707
loss: 1.0000488758087158,grad_norm: 0.9999991874402387, iteration: 68708
loss: 0.9895531535148621,grad_norm: 0.9377073661431052, iteration: 68709
loss: 0.970831573009491,grad_norm: 0.9999991264595589, iteration: 68710
loss: 0.9987740516662598,grad_norm: 0.8311258448183402, iteration: 68711
loss: 1.0063502788543701,grad_norm: 0.9999991272130995, iteration: 68712
loss: 1.0140042304992676,grad_norm: 0.9636599784790933, iteration: 68713
loss: 0.9945096373558044,grad_norm: 0.8883974010458621, iteration: 68714
loss: 0.9955012798309326,grad_norm: 0.9999991456047209, iteration: 68715
loss: 0.9705276489257812,grad_norm: 0.8400945453797902, iteration: 68716
loss: 0.9820972084999084,grad_norm: 0.9999991755000647, iteration: 68717
loss: 1.024030327796936,grad_norm: 0.9999991790341332, iteration: 68718
loss: 0.9804067015647888,grad_norm: 0.9999993268377749, iteration: 68719
loss: 1.0259922742843628,grad_norm: 0.9999990114597985, iteration: 68720
loss: 1.0249736309051514,grad_norm: 0.936335916236741, iteration: 68721
loss: 0.9679536819458008,grad_norm: 0.8258046781941406, iteration: 68722
loss: 0.996540904045105,grad_norm: 0.9999989819512388, iteration: 68723
loss: 1.002525806427002,grad_norm: 0.9999992365239672, iteration: 68724
loss: 1.0023037195205688,grad_norm: 0.9722060264572518, iteration: 68725
loss: 0.9954260587692261,grad_norm: 0.9999990253513045, iteration: 68726
loss: 0.9785369634628296,grad_norm: 0.8942152221075197, iteration: 68727
loss: 0.9965254664421082,grad_norm: 0.8949314707329321, iteration: 68728
loss: 1.0228137969970703,grad_norm: 0.9606128590754069, iteration: 68729
loss: 1.0329605340957642,grad_norm: 0.9959868548537222, iteration: 68730
loss: 0.9756507873535156,grad_norm: 0.9999991662032438, iteration: 68731
loss: 1.01588773727417,grad_norm: 0.9999990587157886, iteration: 68732
loss: 1.0403177738189697,grad_norm: 0.8450233095339728, iteration: 68733
loss: 1.0262128114700317,grad_norm: 0.895748088920443, iteration: 68734
loss: 0.9871140122413635,grad_norm: 0.951528965423473, iteration: 68735
loss: 1.0084868669509888,grad_norm: 0.9999993016989349, iteration: 68736
loss: 0.9816945791244507,grad_norm: 0.9999992523932613, iteration: 68737
loss: 0.9906242489814758,grad_norm: 0.9237535709843491, iteration: 68738
loss: 0.9794538021087646,grad_norm: 0.865159787834827, iteration: 68739
loss: 0.9976341128349304,grad_norm: 0.999999150723877, iteration: 68740
loss: 0.9888337254524231,grad_norm: 0.999999252466198, iteration: 68741
loss: 0.9795852899551392,grad_norm: 0.9999992173542576, iteration: 68742
loss: 1.0036684274673462,grad_norm: 0.9999990430192727, iteration: 68743
loss: 0.9847185015678406,grad_norm: 0.999998986303772, iteration: 68744
loss: 0.9919793605804443,grad_norm: 0.999999206941754, iteration: 68745
loss: 1.015852451324463,grad_norm: 0.9999992722715464, iteration: 68746
loss: 1.0054150819778442,grad_norm: 0.9754060433748541, iteration: 68747
loss: 0.9564814567565918,grad_norm: 0.9462097113375457, iteration: 68748
loss: 0.9631645679473877,grad_norm: 0.905930524911386, iteration: 68749
loss: 1.0206239223480225,grad_norm: 0.9715730277981004, iteration: 68750
loss: 1.0114436149597168,grad_norm: 0.9951846917557157, iteration: 68751
loss: 1.025828242301941,grad_norm: 0.9378342701106797, iteration: 68752
loss: 1.0337973833084106,grad_norm: 0.9999991287792169, iteration: 68753
loss: 1.0080866813659668,grad_norm: 0.9999991630756022, iteration: 68754
loss: 0.998305082321167,grad_norm: 0.9909116935605519, iteration: 68755
loss: 1.0434489250183105,grad_norm: 0.9999991897728597, iteration: 68756
loss: 1.022024393081665,grad_norm: 0.9999990789740134, iteration: 68757
loss: 1.0182989835739136,grad_norm: 0.9283746440892573, iteration: 68758
loss: 1.0019322633743286,grad_norm: 0.8149565022876221, iteration: 68759
loss: 1.037158489227295,grad_norm: 0.9999992278981368, iteration: 68760
loss: 0.9641961455345154,grad_norm: 0.9905040111654059, iteration: 68761
loss: 0.9775887131690979,grad_norm: 0.9827795101823491, iteration: 68762
loss: 1.083013892173767,grad_norm: 0.9999992895475034, iteration: 68763
loss: 1.0262917280197144,grad_norm: 0.9999993511304605, iteration: 68764
loss: 1.003912329673767,grad_norm: 0.9999990166130828, iteration: 68765
loss: 0.9944344162940979,grad_norm: 0.9999990619082855, iteration: 68766
loss: 1.019997000694275,grad_norm: 0.9999990844983759, iteration: 68767
loss: 1.0209053754806519,grad_norm: 0.999664599247878, iteration: 68768
loss: 0.9996804594993591,grad_norm: 0.999999723228461, iteration: 68769
loss: 0.9901235103607178,grad_norm: 0.9999991162717229, iteration: 68770
loss: 0.9861425757408142,grad_norm: 0.9999989579364914, iteration: 68771
loss: 1.013429045677185,grad_norm: 0.9999992626110565, iteration: 68772
loss: 1.0046871900558472,grad_norm: 0.9999991555412315, iteration: 68773
loss: 0.95856773853302,grad_norm: 0.9650178642828585, iteration: 68774
loss: 0.9887728691101074,grad_norm: 0.9999992664851084, iteration: 68775
loss: 1.0211375951766968,grad_norm: 0.999999145483536, iteration: 68776
loss: 1.0089161396026611,grad_norm: 0.8553231224283508, iteration: 68777
loss: 1.0184303522109985,grad_norm: 0.9999993631308152, iteration: 68778
loss: 0.9907521605491638,grad_norm: 0.892595118474419, iteration: 68779
loss: 1.019449234008789,grad_norm: 0.9999991576869645, iteration: 68780
loss: 1.008898138999939,grad_norm: 0.999999097591583, iteration: 68781
loss: 0.9886127710342407,grad_norm: 0.9892795320035398, iteration: 68782
loss: 1.0321931838989258,grad_norm: 0.9155509608341208, iteration: 68783
loss: 1.1101101636886597,grad_norm: 0.999999234620379, iteration: 68784
loss: 0.9942649006843567,grad_norm: 0.9534084383245005, iteration: 68785
loss: 0.9930711984634399,grad_norm: 0.9614124338393686, iteration: 68786
loss: 1.0239567756652832,grad_norm: 0.999999283681273, iteration: 68787
loss: 1.0336567163467407,grad_norm: 0.9999994471712151, iteration: 68788
loss: 0.9963811039924622,grad_norm: 0.9999990604728393, iteration: 68789
loss: 1.0208778381347656,grad_norm: 0.8525014601469915, iteration: 68790
loss: 1.0424803495407104,grad_norm: 0.9999993451754048, iteration: 68791
loss: 1.0102345943450928,grad_norm: 0.9351041852582664, iteration: 68792
loss: 0.9803005456924438,grad_norm: 0.9999991365264903, iteration: 68793
loss: 1.0333985090255737,grad_norm: 0.9999990670940458, iteration: 68794
loss: 1.0172690153121948,grad_norm: 0.9999991031809089, iteration: 68795
loss: 0.9777998328208923,grad_norm: 0.9294645829419185, iteration: 68796
loss: 1.0147260427474976,grad_norm: 0.9999992062995554, iteration: 68797
loss: 1.024850845336914,grad_norm: 0.9999991483415194, iteration: 68798
loss: 0.9920954704284668,grad_norm: 0.9069548878510341, iteration: 68799
loss: 1.0018254518508911,grad_norm: 0.9999989747428731, iteration: 68800
loss: 0.9858973026275635,grad_norm: 0.9999989919402491, iteration: 68801
loss: 0.9685062170028687,grad_norm: 0.964672698601627, iteration: 68802
loss: 0.9806404709815979,grad_norm: 0.9151994064646389, iteration: 68803
loss: 0.9754714965820312,grad_norm: 0.871073827706792, iteration: 68804
loss: 1.0056228637695312,grad_norm: 0.9999993670200646, iteration: 68805
loss: 0.9910432696342468,grad_norm: 0.9999991151776589, iteration: 68806
loss: 1.0561535358428955,grad_norm: 0.9999994699363169, iteration: 68807
loss: 1.0187160968780518,grad_norm: 0.9999990940602151, iteration: 68808
loss: 0.9792140126228333,grad_norm: 0.9999990994627626, iteration: 68809
loss: 1.0130362510681152,grad_norm: 0.9999992260919283, iteration: 68810
loss: 0.9768228530883789,grad_norm: 0.9999990260861114, iteration: 68811
loss: 1.024977684020996,grad_norm: 0.9999991996981779, iteration: 68812
loss: 1.0315669775009155,grad_norm: 0.9999989807658326, iteration: 68813
loss: 0.981541097164154,grad_norm: 0.999999068455647, iteration: 68814
loss: 1.0155558586120605,grad_norm: 0.9999990120730697, iteration: 68815
loss: 1.0327214002609253,grad_norm: 0.9067751636719747, iteration: 68816
loss: 1.0237516164779663,grad_norm: 0.9999990933776524, iteration: 68817
loss: 1.0624995231628418,grad_norm: 0.9999991969949559, iteration: 68818
loss: 1.0490634441375732,grad_norm: 0.9999992212471681, iteration: 68819
loss: 0.9736009240150452,grad_norm: 0.9387278405360885, iteration: 68820
loss: 1.1432164907455444,grad_norm: 0.9999996331289108, iteration: 68821
loss: 1.0372412204742432,grad_norm: 0.9999994263454742, iteration: 68822
loss: 1.0386961698532104,grad_norm: 0.8752623699565558, iteration: 68823
loss: 0.9963080286979675,grad_norm: 0.9999990702845315, iteration: 68824
loss: 1.0230212211608887,grad_norm: 0.9480112900972565, iteration: 68825
loss: 1.0039904117584229,grad_norm: 0.9999989726797509, iteration: 68826
loss: 1.0214941501617432,grad_norm: 0.9999991282633713, iteration: 68827
loss: 0.9937715530395508,grad_norm: 0.9999992222996017, iteration: 68828
loss: 0.9904830455780029,grad_norm: 0.8507856055214259, iteration: 68829
loss: 1.0155267715454102,grad_norm: 0.9999991787975597, iteration: 68830
loss: 1.0170869827270508,grad_norm: 0.9999991049276346, iteration: 68831
loss: 0.9956744909286499,grad_norm: 0.9999991404538052, iteration: 68832
loss: 0.9883874654769897,grad_norm: 0.9999990688075078, iteration: 68833
loss: 1.029012680053711,grad_norm: 0.9999993001067493, iteration: 68834
loss: 0.9980512857437134,grad_norm: 0.9999989649536112, iteration: 68835
loss: 0.9998300671577454,grad_norm: 0.9552311926511294, iteration: 68836
loss: 0.9846482872962952,grad_norm: 0.9999990302910742, iteration: 68837
loss: 0.9901602864265442,grad_norm: 0.9999990790161002, iteration: 68838
loss: 1.0097026824951172,grad_norm: 0.9576103627372441, iteration: 68839
loss: 1.0272533893585205,grad_norm: 0.9442809073801429, iteration: 68840
loss: 1.0099437236785889,grad_norm: 0.9999992892555569, iteration: 68841
loss: 1.0148322582244873,grad_norm: 0.9615026220321119, iteration: 68842
loss: 0.9983271956443787,grad_norm: 0.9999991941504398, iteration: 68843
loss: 0.99892657995224,grad_norm: 0.9711361399571347, iteration: 68844
loss: 0.9872797131538391,grad_norm: 0.8885100273351619, iteration: 68845
loss: 0.9954802393913269,grad_norm: 0.9999989631368911, iteration: 68846
loss: 1.011937141418457,grad_norm: 0.9999996212473137, iteration: 68847
loss: 1.028519630432129,grad_norm: 0.9362156522313875, iteration: 68848
loss: 0.9943166375160217,grad_norm: 0.7663990996820976, iteration: 68849
loss: 0.9958505034446716,grad_norm: 0.9999993891163055, iteration: 68850
loss: 1.0196058750152588,grad_norm: 0.9999992594002415, iteration: 68851
loss: 1.0796566009521484,grad_norm: 0.9999995276282556, iteration: 68852
loss: 1.0119273662567139,grad_norm: 0.9968167437115801, iteration: 68853
loss: 1.0049545764923096,grad_norm: 0.9999991581216505, iteration: 68854
loss: 0.9881280064582825,grad_norm: 0.9999990800561266, iteration: 68855
loss: 0.9970285296440125,grad_norm: 0.9999989566270672, iteration: 68856
loss: 1.002698540687561,grad_norm: 0.9999990437634881, iteration: 68857
loss: 1.0254650115966797,grad_norm: 0.9999992327043515, iteration: 68858
loss: 1.0237762928009033,grad_norm: 0.9999992020731746, iteration: 68859
loss: 1.0111340284347534,grad_norm: 0.9999990221217089, iteration: 68860
loss: 1.0339045524597168,grad_norm: 0.9999992180557317, iteration: 68861
loss: 0.9984434247016907,grad_norm: 0.9883326396457567, iteration: 68862
loss: 0.9689366221427917,grad_norm: 0.999999069749054, iteration: 68863
loss: 1.0007156133651733,grad_norm: 0.9999990862705068, iteration: 68864
loss: 1.062682032585144,grad_norm: 0.9999996884044701, iteration: 68865
loss: 0.978350043296814,grad_norm: 0.9999990648006086, iteration: 68866
loss: 1.018509030342102,grad_norm: 0.9999999665624107, iteration: 68867
loss: 1.0194214582443237,grad_norm: 0.9999990246833371, iteration: 68868
loss: 0.9780753254890442,grad_norm: 0.9682724595198888, iteration: 68869
loss: 1.0116029977798462,grad_norm: 0.9985860713128034, iteration: 68870
loss: 0.9838306903839111,grad_norm: 0.9871803502744929, iteration: 68871
loss: 0.9902740120887756,grad_norm: 0.9999990716940714, iteration: 68872
loss: 0.9682426452636719,grad_norm: 0.9486977066152245, iteration: 68873
loss: 1.0060009956359863,grad_norm: 0.9855240992396244, iteration: 68874
loss: 0.9877795577049255,grad_norm: 0.9999992148014841, iteration: 68875
loss: 1.0019723176956177,grad_norm: 0.9999991567443384, iteration: 68876
loss: 0.9887915849685669,grad_norm: 0.9999990685062936, iteration: 68877
loss: 0.9873471856117249,grad_norm: 0.979003236130347, iteration: 68878
loss: 1.0219346284866333,grad_norm: 0.9999992599740439, iteration: 68879
loss: 0.9696817398071289,grad_norm: 0.9999991747909134, iteration: 68880
loss: 0.9985730051994324,grad_norm: 0.9999989622074469, iteration: 68881
loss: 1.0203558206558228,grad_norm: 0.9999991635690242, iteration: 68882
loss: 1.047439694404602,grad_norm: 0.9435995545320427, iteration: 68883
loss: 0.9927862286567688,grad_norm: 0.9999990953369194, iteration: 68884
loss: 1.0217673778533936,grad_norm: 0.9999995287838396, iteration: 68885
loss: 1.0417481660842896,grad_norm: 0.9999992058832451, iteration: 68886
loss: 1.001241683959961,grad_norm: 0.9999988434563181, iteration: 68887
loss: 1.0128835439682007,grad_norm: 0.9222753018726272, iteration: 68888
loss: 1.0095902681350708,grad_norm: 0.9999990611260341, iteration: 68889
loss: 1.004076361656189,grad_norm: 0.9999991493644561, iteration: 68890
loss: 0.9706676602363586,grad_norm: 0.8661189291552517, iteration: 68891
loss: 0.9829644560813904,grad_norm: 0.9513886028836157, iteration: 68892
loss: 0.9703149199485779,grad_norm: 0.8763809201193756, iteration: 68893
loss: 0.9966303706169128,grad_norm: 0.8505177418381791, iteration: 68894
loss: 0.9733335375785828,grad_norm: 0.9999989423193407, iteration: 68895
loss: 0.9990461468696594,grad_norm: 0.9518270983546364, iteration: 68896
loss: 0.9997134804725647,grad_norm: 0.999999011470142, iteration: 68897
loss: 1.0110465288162231,grad_norm: 0.9999993203441525, iteration: 68898
loss: 0.9817543029785156,grad_norm: 0.9306539821609733, iteration: 68899
loss: 1.0667845010757446,grad_norm: 0.999999643138332, iteration: 68900
loss: 0.9938018918037415,grad_norm: 0.9999991502860641, iteration: 68901
loss: 0.9857370853424072,grad_norm: 0.9999993278291855, iteration: 68902
loss: 0.9674109816551208,grad_norm: 0.9999989588850136, iteration: 68903
loss: 1.0087181329727173,grad_norm: 0.988506646101872, iteration: 68904
loss: 0.961085855960846,grad_norm: 0.9999992221995381, iteration: 68905
loss: 1.0147316455841064,grad_norm: 0.9999989209584819, iteration: 68906
loss: 1.0147747993469238,grad_norm: 0.9999993015164154, iteration: 68907
loss: 1.0127222537994385,grad_norm: 0.9301187047502162, iteration: 68908
loss: 0.9744309186935425,grad_norm: 0.9911271254939218, iteration: 68909
loss: 1.010282039642334,grad_norm: 0.9989624762803341, iteration: 68910
loss: 1.0279009342193604,grad_norm: 0.8734217549646525, iteration: 68911
loss: 1.022507905960083,grad_norm: 0.9999991730539105, iteration: 68912
loss: 0.9703139662742615,grad_norm: 0.9999991570514205, iteration: 68913
loss: 0.9894033074378967,grad_norm: 0.9999995601791997, iteration: 68914
loss: 1.0482797622680664,grad_norm: 0.9999995938571549, iteration: 68915
loss: 0.9700193405151367,grad_norm: 0.9999990407049442, iteration: 68916
loss: 1.0724056959152222,grad_norm: 0.9999990552565123, iteration: 68917
loss: 1.0213841199874878,grad_norm: 0.9999999233626309, iteration: 68918
loss: 1.0015932321548462,grad_norm: 0.9999995371782764, iteration: 68919
loss: 1.042166829109192,grad_norm: 0.9999988888439265, iteration: 68920
loss: 0.9929602146148682,grad_norm: 0.999999028734118, iteration: 68921
loss: 1.0172537565231323,grad_norm: 0.9999990660064224, iteration: 68922
loss: 1.0005836486816406,grad_norm: 0.9232595008748025, iteration: 68923
loss: 0.9786259531974792,grad_norm: 0.9170992050228155, iteration: 68924
loss: 1.0150834321975708,grad_norm: 0.9999990609555087, iteration: 68925
loss: 1.0167936086654663,grad_norm: 0.8632544684539669, iteration: 68926
loss: 0.9999771118164062,grad_norm: 0.9999990914970905, iteration: 68927
loss: 0.9778382182121277,grad_norm: 0.978098734254246, iteration: 68928
loss: 1.0048203468322754,grad_norm: 0.8942029994381563, iteration: 68929
loss: 1.0188883543014526,grad_norm: 0.9999993164274621, iteration: 68930
loss: 0.9855462312698364,grad_norm: 0.999999243063354, iteration: 68931
loss: 1.0070246458053589,grad_norm: 0.9999991151090577, iteration: 68932
loss: 1.0424247980117798,grad_norm: 0.9999995219225714, iteration: 68933
loss: 1.0431913137435913,grad_norm: 0.9103617314025247, iteration: 68934
loss: 0.9646996855735779,grad_norm: 0.9999992503943261, iteration: 68935
loss: 0.9996160864830017,grad_norm: 0.9442926671853377, iteration: 68936
loss: 1.0009493827819824,grad_norm: 0.999999152261823, iteration: 68937
loss: 1.019258737564087,grad_norm: 0.9698181804260193, iteration: 68938
loss: 1.0104546546936035,grad_norm: 0.934834874406669, iteration: 68939
loss: 0.9887844920158386,grad_norm: 0.9999991701448747, iteration: 68940
loss: 1.0014514923095703,grad_norm: 0.9999992581170944, iteration: 68941
loss: 1.0314756631851196,grad_norm: 0.9338689922814405, iteration: 68942
loss: 1.027100682258606,grad_norm: 0.9999999664821045, iteration: 68943
loss: 0.9725480079650879,grad_norm: 0.853763016679835, iteration: 68944
loss: 0.9935097098350525,grad_norm: 0.9999990965593943, iteration: 68945
loss: 1.008887767791748,grad_norm: 0.9506985107790291, iteration: 68946
loss: 1.025833010673523,grad_norm: 0.9999991610745975, iteration: 68947
loss: 1.0068327188491821,grad_norm: 0.9999991970992804, iteration: 68948
loss: 1.015191674232483,grad_norm: 0.8903067402972447, iteration: 68949
loss: 0.9763941168785095,grad_norm: 0.999999140894351, iteration: 68950
loss: 1.0175718069076538,grad_norm: 0.9244280568737618, iteration: 68951
loss: 0.9982491731643677,grad_norm: 0.9123907996037522, iteration: 68952
loss: 0.9830037951469421,grad_norm: 0.9999996441360257, iteration: 68953
loss: 1.0925359725952148,grad_norm: 0.9999995977373827, iteration: 68954
loss: 0.9871606826782227,grad_norm: 0.9999991201127744, iteration: 68955
loss: 0.9634243845939636,grad_norm: 0.9882747536065363, iteration: 68956
loss: 1.0079623460769653,grad_norm: 0.9990787018814091, iteration: 68957
loss: 0.9845579266548157,grad_norm: 0.8709208195202094, iteration: 68958
loss: 0.9926440119743347,grad_norm: 0.9565976845237267, iteration: 68959
loss: 0.9887318015098572,grad_norm: 0.9999990004889471, iteration: 68960
loss: 0.9941579103469849,grad_norm: 0.9999992277672054, iteration: 68961
loss: 0.997310996055603,grad_norm: 0.9855406388389796, iteration: 68962
loss: 1.0130484104156494,grad_norm: 0.9999993765189428, iteration: 68963
loss: 1.0560534000396729,grad_norm: 0.9999996759889437, iteration: 68964
loss: 0.9749612212181091,grad_norm: 0.9999991447594896, iteration: 68965
loss: 0.9829198122024536,grad_norm: 0.9999991831883287, iteration: 68966
loss: 0.9795778393745422,grad_norm: 0.9999991419380388, iteration: 68967
loss: 1.0435014963150024,grad_norm: 0.9999993771297645, iteration: 68968
loss: 0.9663392305374146,grad_norm: 0.9999989875130271, iteration: 68969
loss: 1.0473756790161133,grad_norm: 0.9999995210388332, iteration: 68970
loss: 1.0096579790115356,grad_norm: 0.9999990829388964, iteration: 68971
loss: 0.9828881025314331,grad_norm: 0.8730735607425454, iteration: 68972
loss: 1.0522737503051758,grad_norm: 0.9999992887252311, iteration: 68973
loss: 0.9949735999107361,grad_norm: 0.9999991631155186, iteration: 68974
loss: 0.9993734955787659,grad_norm: 0.9999992837203645, iteration: 68975
loss: 1.017622709274292,grad_norm: 0.9999991193453951, iteration: 68976
loss: 1.001017451286316,grad_norm: 0.8753461366091355, iteration: 68977
loss: 1.0322240591049194,grad_norm: 0.9999992321760801, iteration: 68978
loss: 1.0139451026916504,grad_norm: 0.9999992083399042, iteration: 68979
loss: 1.0015597343444824,grad_norm: 0.9999992405874227, iteration: 68980
loss: 0.9751573801040649,grad_norm: 0.9315672816490004, iteration: 68981
loss: 1.0193381309509277,grad_norm: 0.9999991469759009, iteration: 68982
loss: 1.005266547203064,grad_norm: 0.9999991874453213, iteration: 68983
loss: 1.0102224349975586,grad_norm: 0.9999990800487049, iteration: 68984
loss: 1.0097898244857788,grad_norm: 0.9444693974388324, iteration: 68985
loss: 1.0147372484207153,grad_norm: 0.9999990724336018, iteration: 68986
loss: 1.012998342514038,grad_norm: 0.9999990464293528, iteration: 68987
loss: 1.007292628288269,grad_norm: 0.9999991264656972, iteration: 68988
loss: 1.041438341140747,grad_norm: 0.9999992513218725, iteration: 68989
loss: 1.0446728467941284,grad_norm: 0.9999991593169968, iteration: 68990
loss: 0.9998139142990112,grad_norm: 0.999999059595221, iteration: 68991
loss: 0.9953826665878296,grad_norm: 0.9999993003159017, iteration: 68992
loss: 0.9924998879432678,grad_norm: 0.9999991730109584, iteration: 68993
loss: 0.9887872934341431,grad_norm: 0.9940915133820118, iteration: 68994
loss: 1.0344059467315674,grad_norm: 0.9999989824886909, iteration: 68995
loss: 0.9936243295669556,grad_norm: 0.9077876367517966, iteration: 68996
loss: 1.0263895988464355,grad_norm: 0.9999992698357736, iteration: 68997
loss: 1.002046823501587,grad_norm: 0.9999992114493217, iteration: 68998
loss: 0.9913932681083679,grad_norm: 0.9803757548851237, iteration: 68999
loss: 1.0159573554992676,grad_norm: 0.9999991387687308, iteration: 69000
loss: 1.0082576274871826,grad_norm: 0.9999990363725998, iteration: 69001
loss: 0.9557940363883972,grad_norm: 0.9999993062138994, iteration: 69002
loss: 1.0062520503997803,grad_norm: 0.9999990623760008, iteration: 69003
loss: 0.9677879810333252,grad_norm: 0.9999993447553045, iteration: 69004
loss: 1.0260218381881714,grad_norm: 0.999999193603484, iteration: 69005
loss: 0.9882208108901978,grad_norm: 0.9999990095266821, iteration: 69006
loss: 0.9755730628967285,grad_norm: 0.9999991403802223, iteration: 69007
loss: 0.966770350933075,grad_norm: 0.9999990776241348, iteration: 69008
loss: 1.0149580240249634,grad_norm: 0.8603836785762159, iteration: 69009
loss: 1.0140708684921265,grad_norm: 0.999999225754561, iteration: 69010
loss: 1.0269287824630737,grad_norm: 0.9999991359618368, iteration: 69011
loss: 0.993021547794342,grad_norm: 0.9907162918508629, iteration: 69012
loss: 1.0337388515472412,grad_norm: 0.9999992011373041, iteration: 69013
loss: 1.0087791681289673,grad_norm: 0.9999991636135931, iteration: 69014
loss: 1.0293418169021606,grad_norm: 0.9999992517623625, iteration: 69015
loss: 0.9899710416793823,grad_norm: 0.9446979253158593, iteration: 69016
loss: 0.9947499632835388,grad_norm: 0.9999990627379824, iteration: 69017
loss: 0.963262140750885,grad_norm: 0.9999991979536541, iteration: 69018
loss: 0.9829437136650085,grad_norm: 0.9951016852409786, iteration: 69019
loss: 0.9991956949234009,grad_norm: 0.9828405411152729, iteration: 69020
loss: 1.0063560009002686,grad_norm: 0.999999026201106, iteration: 69021
loss: 0.9903636574745178,grad_norm: 0.999998948602631, iteration: 69022
loss: 1.0238198041915894,grad_norm: 0.9999991792330912, iteration: 69023
loss: 0.9926773309707642,grad_norm: 0.8959615068858431, iteration: 69024
loss: 1.0147029161453247,grad_norm: 0.9131073838787842, iteration: 69025
loss: 1.0355976819992065,grad_norm: 0.9999991629072595, iteration: 69026
loss: 1.0634053945541382,grad_norm: 0.999999318813024, iteration: 69027
loss: 1.006054162979126,grad_norm: 0.9292822375809937, iteration: 69028
loss: 1.0488557815551758,grad_norm: 0.9999991661482759, iteration: 69029
loss: 1.0066983699798584,grad_norm: 0.9999992160779162, iteration: 69030
loss: 1.0201045274734497,grad_norm: 0.8624133462130629, iteration: 69031
loss: 1.0117231607437134,grad_norm: 0.9999990966881743, iteration: 69032
loss: 0.9923096299171448,grad_norm: 0.9999991213492478, iteration: 69033
loss: 1.0237950086593628,grad_norm: 0.9999990767528995, iteration: 69034
loss: 0.9678695201873779,grad_norm: 0.9999991048810095, iteration: 69035
loss: 1.0029271841049194,grad_norm: 0.9999992777745501, iteration: 69036
loss: 1.0063835382461548,grad_norm: 0.999999102544939, iteration: 69037
loss: 0.9899848103523254,grad_norm: 0.9999992985151364, iteration: 69038
loss: 1.0278271436691284,grad_norm: 0.9999996362336798, iteration: 69039
loss: 0.9934024214744568,grad_norm: 0.9999998371434771, iteration: 69040
loss: 1.0064741373062134,grad_norm: 0.9999990136596401, iteration: 69041
loss: 0.9673956632614136,grad_norm: 0.984241861697849, iteration: 69042
loss: 1.0134336948394775,grad_norm: 0.9803155939272326, iteration: 69043
loss: 0.9934331774711609,grad_norm: 0.9999991519801165, iteration: 69044
loss: 0.9899786114692688,grad_norm: 0.9469461653135361, iteration: 69045
loss: 0.9718334674835205,grad_norm: 0.9999990953621641, iteration: 69046
loss: 1.031969666481018,grad_norm: 0.9151020338460853, iteration: 69047
loss: 0.9862937331199646,grad_norm: 0.9999992046194618, iteration: 69048
loss: 1.0501705408096313,grad_norm: 0.9999992390703267, iteration: 69049
loss: 1.0057071447372437,grad_norm: 0.9975869601688804, iteration: 69050
loss: 1.0010020732879639,grad_norm: 0.8957835996384023, iteration: 69051
loss: 1.0225512981414795,grad_norm: 0.9999992934232045, iteration: 69052
loss: 1.013317584991455,grad_norm: 0.9999989997656499, iteration: 69053
loss: 1.0431931018829346,grad_norm: 0.9999994634761104, iteration: 69054
loss: 1.0272456407546997,grad_norm: 0.9999992530929923, iteration: 69055
loss: 0.9707673192024231,grad_norm: 0.9999993020693133, iteration: 69056
loss: 0.9942236542701721,grad_norm: 0.9999991172136035, iteration: 69057
loss: 0.9890080094337463,grad_norm: 0.9999992004512899, iteration: 69058
loss: 1.016348123550415,grad_norm: 0.9609568758057729, iteration: 69059
loss: 1.01582932472229,grad_norm: 0.9984844774968699, iteration: 69060
loss: 1.012476921081543,grad_norm: 0.9999992581357051, iteration: 69061
loss: 1.0439447164535522,grad_norm: 0.9999992621410585, iteration: 69062
loss: 1.013726830482483,grad_norm: 0.9349975546411934, iteration: 69063
loss: 0.9895354509353638,grad_norm: 0.9999991998825263, iteration: 69064
loss: 0.9768508672714233,grad_norm: 0.8750559785672168, iteration: 69065
loss: 1.006832480430603,grad_norm: 0.9999992622796308, iteration: 69066
loss: 1.0139597654342651,grad_norm: 0.9999990238167411, iteration: 69067
loss: 1.023790955543518,grad_norm: 0.9999992067144883, iteration: 69068
loss: 1.0213209390640259,grad_norm: 0.9999992669732812, iteration: 69069
loss: 0.9920452237129211,grad_norm: 0.9999992071681304, iteration: 69070
loss: 1.0151677131652832,grad_norm: 0.9999991182927479, iteration: 69071
loss: 0.9799190759658813,grad_norm: 0.9999991885330555, iteration: 69072
loss: 1.0122897624969482,grad_norm: 0.9700308927362407, iteration: 69073
loss: 0.9886396527290344,grad_norm: 0.885130873411043, iteration: 69074
loss: 1.0097577571868896,grad_norm: 0.9999991166208314, iteration: 69075
loss: 1.0233142375946045,grad_norm: 0.9999992119618006, iteration: 69076
loss: 1.0040785074234009,grad_norm: 0.9999990474981171, iteration: 69077
loss: 1.0169004201889038,grad_norm: 0.9999991292689587, iteration: 69078
loss: 1.01025390625,grad_norm: 0.9999994134061194, iteration: 69079
loss: 1.0130199193954468,grad_norm: 0.960174775581293, iteration: 69080
loss: 1.0237321853637695,grad_norm: 0.9999996530335671, iteration: 69081
loss: 0.9665812849998474,grad_norm: 0.9999990353623932, iteration: 69082
loss: 1.0051195621490479,grad_norm: 0.9999992786766813, iteration: 69083
loss: 0.9943687319755554,grad_norm: 0.9999991574092755, iteration: 69084
loss: 0.9902198910713196,grad_norm: 0.875732063670779, iteration: 69085
loss: 0.9951861500740051,grad_norm: 0.9999992330807792, iteration: 69086
loss: 0.9914923906326294,grad_norm: 0.9789251327079358, iteration: 69087
loss: 0.9686159491539001,grad_norm: 0.9999991279743083, iteration: 69088
loss: 1.0093169212341309,grad_norm: 0.9999991879537321, iteration: 69089
loss: 1.019784927368164,grad_norm: 0.9127251361042233, iteration: 69090
loss: 1.0060311555862427,grad_norm: 0.9999991027624622, iteration: 69091
loss: 1.0154107809066772,grad_norm: 0.9999991737959335, iteration: 69092
loss: 0.9576100707054138,grad_norm: 0.9999991182845044, iteration: 69093
loss: 1.0204639434814453,grad_norm: 0.9346259004436985, iteration: 69094
loss: 1.0260266065597534,grad_norm: 0.9999990978388872, iteration: 69095
loss: 1.0686314105987549,grad_norm: 0.9999997083783351, iteration: 69096
loss: 0.997224748134613,grad_norm: 0.9854077184822047, iteration: 69097
loss: 0.9998943209648132,grad_norm: 0.9999997170298333, iteration: 69098
loss: 0.9698248505592346,grad_norm: 0.999999041433381, iteration: 69099
loss: 0.965179979801178,grad_norm: 0.9999993838783743, iteration: 69100
loss: 0.9757159948348999,grad_norm: 0.9999990371771786, iteration: 69101
loss: 0.9611983895301819,grad_norm: 0.8663632004735263, iteration: 69102
loss: 1.0185418128967285,grad_norm: 0.9674330355169607, iteration: 69103
loss: 1.0343903303146362,grad_norm: 0.9999993244049893, iteration: 69104
loss: 0.9915155172348022,grad_norm: 0.9999993793014341, iteration: 69105
loss: 1.0177003145217896,grad_norm: 0.9999992700429308, iteration: 69106
loss: 1.02681303024292,grad_norm: 0.9999992469254373, iteration: 69107
loss: 1.0209224224090576,grad_norm: 0.9999994387880183, iteration: 69108
loss: 1.022109866142273,grad_norm: 0.9999990647215926, iteration: 69109
loss: 1.0557942390441895,grad_norm: 0.9999993531040562, iteration: 69110
loss: 0.9829831719398499,grad_norm: 0.999999054506117, iteration: 69111
loss: 0.970464825630188,grad_norm: 0.9999992203503087, iteration: 69112
loss: 0.974553644657135,grad_norm: 0.9999990964187827, iteration: 69113
loss: 1.0416505336761475,grad_norm: 0.9999991631001958, iteration: 69114
loss: 1.0138510465621948,grad_norm: 0.9200589065533434, iteration: 69115
loss: 1.016150951385498,grad_norm: 0.9999991413936558, iteration: 69116
loss: 1.0629899501800537,grad_norm: 0.9999996283282102, iteration: 69117
loss: 1.000958800315857,grad_norm: 0.9999991870572014, iteration: 69118
loss: 0.9676113128662109,grad_norm: 0.8904389231685124, iteration: 69119
loss: 0.989191472530365,grad_norm: 0.9445486357591532, iteration: 69120
loss: 0.9786291718482971,grad_norm: 0.8902717642916347, iteration: 69121
loss: 1.0304903984069824,grad_norm: 0.99999917999472, iteration: 69122
loss: 1.0434496402740479,grad_norm: 0.9999992612944768, iteration: 69123
loss: 1.0105606317520142,grad_norm: 0.9999992403681512, iteration: 69124
loss: 1.0068076848983765,grad_norm: 0.9999995584234727, iteration: 69125
loss: 0.9722753763198853,grad_norm: 0.9999990543513959, iteration: 69126
loss: 0.9825431704521179,grad_norm: 0.8872261420216977, iteration: 69127
loss: 1.0022715330123901,grad_norm: 0.960870939724524, iteration: 69128
loss: 1.007554292678833,grad_norm: 0.999999244924555, iteration: 69129
loss: 1.0187073945999146,grad_norm: 0.9999992015443303, iteration: 69130
loss: 1.0210806131362915,grad_norm: 0.9999989924500163, iteration: 69131
loss: 1.0036420822143555,grad_norm: 0.9463931371464452, iteration: 69132
loss: 1.0022830963134766,grad_norm: 0.8255956494902428, iteration: 69133
loss: 0.992530107498169,grad_norm: 0.9999990897783035, iteration: 69134
loss: 1.0303196907043457,grad_norm: 0.9999990537475975, iteration: 69135
loss: 0.9724122881889343,grad_norm: 0.9999989654450377, iteration: 69136
loss: 1.0397355556488037,grad_norm: 0.9999994078297625, iteration: 69137
loss: 1.00395929813385,grad_norm: 0.9999991094154596, iteration: 69138
loss: 0.9673240184783936,grad_norm: 0.9999990706323161, iteration: 69139
loss: 1.0048613548278809,grad_norm: 0.9999993081272888, iteration: 69140
loss: 1.0121008157730103,grad_norm: 0.9999994894661812, iteration: 69141
loss: 0.9960857629776001,grad_norm: 0.9999989927305252, iteration: 69142
loss: 0.9952875971794128,grad_norm: 0.9999990842634021, iteration: 69143
loss: 1.0049299001693726,grad_norm: 0.9999993278715904, iteration: 69144
loss: 1.0209357738494873,grad_norm: 0.9876933823377849, iteration: 69145
loss: 1.0173221826553345,grad_norm: 0.9999990384554966, iteration: 69146
loss: 0.9834734797477722,grad_norm: 0.9999990920942211, iteration: 69147
loss: 1.0073562860488892,grad_norm: 0.9912216316897409, iteration: 69148
loss: 0.9885831475257874,grad_norm: 0.9999991913141695, iteration: 69149
loss: 0.9982797503471375,grad_norm: 0.8954367301965531, iteration: 69150
loss: 1.0251250267028809,grad_norm: 0.9999991598103227, iteration: 69151
loss: 1.0041946172714233,grad_norm: 0.8109825557411852, iteration: 69152
loss: 1.011816143989563,grad_norm: 0.9999991399508529, iteration: 69153
loss: 1.0071717500686646,grad_norm: 0.907318928308131, iteration: 69154
loss: 1.0036683082580566,grad_norm: 0.9370108864475405, iteration: 69155
loss: 1.0010510683059692,grad_norm: 0.9999990004721255, iteration: 69156
loss: 0.9901408553123474,grad_norm: 0.9999991935881523, iteration: 69157
loss: 1.1389646530151367,grad_norm: 0.999999375211533, iteration: 69158
loss: 1.0583988428115845,grad_norm: 0.9999997067874713, iteration: 69159
loss: 1.0093448162078857,grad_norm: 0.9999991470757086, iteration: 69160
loss: 0.9931966066360474,grad_norm: 0.9458650150557576, iteration: 69161
loss: 0.9669532179832458,grad_norm: 0.9999991368596475, iteration: 69162
loss: 1.014714241027832,grad_norm: 0.9999992496563278, iteration: 69163
loss: 1.023953914642334,grad_norm: 0.9107557025756083, iteration: 69164
loss: 0.9572116732597351,grad_norm: 0.9878572176492623, iteration: 69165
loss: 1.0211888551712036,grad_norm: 0.9999989683334658, iteration: 69166
loss: 0.9979145526885986,grad_norm: 0.9287403232835272, iteration: 69167
loss: 0.991878867149353,grad_norm: 0.9999989899281337, iteration: 69168
loss: 0.9905319213867188,grad_norm: 0.9999992392257117, iteration: 69169
loss: 1.030590295791626,grad_norm: 0.9999999321577178, iteration: 69170
loss: 0.9894746541976929,grad_norm: 0.9999991292567024, iteration: 69171
loss: 1.0474998950958252,grad_norm: 0.9999999081833797, iteration: 69172
loss: 0.9629745483398438,grad_norm: 0.8640200980775679, iteration: 69173
loss: 1.0032575130462646,grad_norm: 0.9999990569587082, iteration: 69174
loss: 0.9867181181907654,grad_norm: 0.9394276744647574, iteration: 69175
loss: 0.9893151521682739,grad_norm: 0.8661915674638014, iteration: 69176
loss: 1.019654631614685,grad_norm: 0.9999992101367084, iteration: 69177
loss: 1.002428650856018,grad_norm: 0.9999990158996391, iteration: 69178
loss: 0.9624741077423096,grad_norm: 0.9626477831277123, iteration: 69179
loss: 1.0144226551055908,grad_norm: 0.8679115683292608, iteration: 69180
loss: 0.9840956330299377,grad_norm: 0.9058871811774756, iteration: 69181
loss: 0.9562411308288574,grad_norm: 0.9999989561418531, iteration: 69182
loss: 1.0388416051864624,grad_norm: 0.9999995573991874, iteration: 69183
loss: 1.0106384754180908,grad_norm: 0.9453231468239076, iteration: 69184
loss: 1.0481857061386108,grad_norm: 0.9260204690729825, iteration: 69185
loss: 0.977895975112915,grad_norm: 0.9878606126302865, iteration: 69186
loss: 0.982395350933075,grad_norm: 0.9685268766484763, iteration: 69187
loss: 1.0159475803375244,grad_norm: 0.9999990190966389, iteration: 69188
loss: 0.9986465573310852,grad_norm: 0.9429909249591658, iteration: 69189
loss: 1.0494146347045898,grad_norm: 0.9999991137643004, iteration: 69190
loss: 1.0065929889678955,grad_norm: 0.9517292649219501, iteration: 69191
loss: 1.0447813272476196,grad_norm: 0.9999990701574293, iteration: 69192
loss: 1.0451387166976929,grad_norm: 0.9999990857677907, iteration: 69193
loss: 0.9683834314346313,grad_norm: 0.8762871812211491, iteration: 69194
loss: 0.9673954844474792,grad_norm: 0.9983941761732017, iteration: 69195
loss: 1.0367225408554077,grad_norm: 0.9999990916139828, iteration: 69196
loss: 0.9531122446060181,grad_norm: 0.9999990553846672, iteration: 69197
loss: 0.9996609687805176,grad_norm: 0.9115886647954684, iteration: 69198
loss: 0.9900282025337219,grad_norm: 0.9999991697189178, iteration: 69199
loss: 1.012045979499817,grad_norm: 0.9999991725437101, iteration: 69200
loss: 1.021904468536377,grad_norm: 0.8648260336386625, iteration: 69201
loss: 1.012199878692627,grad_norm: 0.9999992832612269, iteration: 69202
loss: 0.9835836291313171,grad_norm: 0.9999991510239556, iteration: 69203
loss: 1.0221498012542725,grad_norm: 0.9999990974184368, iteration: 69204
loss: 1.0859622955322266,grad_norm: 0.9999990270446392, iteration: 69205
loss: 0.9859652519226074,grad_norm: 0.931253823307122, iteration: 69206
loss: 0.9846698641777039,grad_norm: 0.9999991277996698, iteration: 69207
loss: 1.0063215494155884,grad_norm: 0.9999990095623995, iteration: 69208
loss: 1.0092577934265137,grad_norm: 0.9999991627542321, iteration: 69209
loss: 0.9880357980728149,grad_norm: 0.9999991473516009, iteration: 69210
loss: 1.019873023033142,grad_norm: 0.9999991912152553, iteration: 69211
loss: 1.047892451286316,grad_norm: 0.9999991133055554, iteration: 69212
loss: 1.0216736793518066,grad_norm: 0.9999990586783516, iteration: 69213
loss: 1.1135512590408325,grad_norm: 0.999999017230933, iteration: 69214
loss: 1.003085970878601,grad_norm: 0.9999990637019385, iteration: 69215
loss: 1.0232361555099487,grad_norm: 0.9999990684466173, iteration: 69216
loss: 1.003660798072815,grad_norm: 0.9714546372591293, iteration: 69217
loss: 0.9870954751968384,grad_norm: 0.9307482742921047, iteration: 69218
loss: 1.001523733139038,grad_norm: 0.9999990864124023, iteration: 69219
loss: 0.9812024235725403,grad_norm: 0.9999991149433466, iteration: 69220
loss: 0.997963547706604,grad_norm: 0.9999992038721286, iteration: 69221
loss: 0.9988634586334229,grad_norm: 0.9999991814346219, iteration: 69222
loss: 1.0070469379425049,grad_norm: 0.9549891939967361, iteration: 69223
loss: 0.9762343764305115,grad_norm: 0.999998996763964, iteration: 69224
loss: 0.9779932498931885,grad_norm: 0.8814523374379621, iteration: 69225
loss: 0.9905390739440918,grad_norm: 0.9999996408100266, iteration: 69226
loss: 1.0072021484375,grad_norm: 0.9651104111767335, iteration: 69227
loss: 1.02869713306427,grad_norm: 0.9999991301623806, iteration: 69228
loss: 1.0289446115493774,grad_norm: 0.9999990108964336, iteration: 69229
loss: 0.9855343103408813,grad_norm: 0.9999991307505429, iteration: 69230
loss: 1.0294495820999146,grad_norm: 0.9999994355529415, iteration: 69231
loss: 0.9770640730857849,grad_norm: 0.8661459107444032, iteration: 69232
loss: 0.9768233299255371,grad_norm: 0.9999991484469081, iteration: 69233
loss: 1.0053715705871582,grad_norm: 0.9485171895546844, iteration: 69234
loss: 1.0395359992980957,grad_norm: 0.9645695612558222, iteration: 69235
loss: 0.9512472152709961,grad_norm: 0.9999991257540691, iteration: 69236
loss: 1.012505054473877,grad_norm: 0.9999989728962897, iteration: 69237
loss: 1.0071202516555786,grad_norm: 0.9999989682529346, iteration: 69238
loss: 0.9872003793716431,grad_norm: 0.9857895140452786, iteration: 69239
loss: 1.0045379400253296,grad_norm: 0.9282558713741604, iteration: 69240
loss: 0.9755753874778748,grad_norm: 0.9167505914505739, iteration: 69241
loss: 1.0109891891479492,grad_norm: 0.9999991092077479, iteration: 69242
loss: 1.0035289525985718,grad_norm: 0.9999990616633982, iteration: 69243
loss: 1.0008704662322998,grad_norm: 0.9999991504738794, iteration: 69244
loss: 1.0123372077941895,grad_norm: 0.9999993311636147, iteration: 69245
loss: 1.0072576999664307,grad_norm: 0.9999994640390457, iteration: 69246
loss: 0.9847263097763062,grad_norm: 0.9999990795818773, iteration: 69247
loss: 0.991571843624115,grad_norm: 0.9999992308799381, iteration: 69248
loss: 0.9770148992538452,grad_norm: 0.9999992158962748, iteration: 69249
loss: 1.0114305019378662,grad_norm: 0.9639957628189123, iteration: 69250
loss: 0.992180347442627,grad_norm: 0.9999993622467879, iteration: 69251
loss: 0.9897011518478394,grad_norm: 0.9999990835161158, iteration: 69252
loss: 0.9921490550041199,grad_norm: 0.8440848320697777, iteration: 69253
loss: 0.9329321980476379,grad_norm: 0.9008342427003387, iteration: 69254
loss: 0.9672567844390869,grad_norm: 0.9999993135920496, iteration: 69255
loss: 1.0318435430526733,grad_norm: 0.9999992531839061, iteration: 69256
loss: 1.0014151334762573,grad_norm: 0.9266232132396618, iteration: 69257
loss: 1.0093969106674194,grad_norm: 0.8984900461066584, iteration: 69258
loss: 1.0015238523483276,grad_norm: 0.9749521878370699, iteration: 69259
loss: 0.9991751909255981,grad_norm: 0.8316004831341104, iteration: 69260
loss: 1.0365338325500488,grad_norm: 0.9999993593525363, iteration: 69261
loss: 0.9879571795463562,grad_norm: 0.9999992327770639, iteration: 69262
loss: 0.9852243661880493,grad_norm: 0.9999991913481834, iteration: 69263
loss: 1.0025883913040161,grad_norm: 0.9330100259901195, iteration: 69264
loss: 0.97568279504776,grad_norm: 0.8776768030599088, iteration: 69265
loss: 0.9581990242004395,grad_norm: 0.9999994352493924, iteration: 69266
loss: 0.9839808940887451,grad_norm: 0.9999992582265417, iteration: 69267
loss: 1.0131278038024902,grad_norm: 0.9999991500561835, iteration: 69268
loss: 1.0031453371047974,grad_norm: 0.9999989930020161, iteration: 69269
loss: 1.0028576850891113,grad_norm: 0.9777578491104605, iteration: 69270
loss: 1.0215636491775513,grad_norm: 0.9999992231284346, iteration: 69271
loss: 1.019508719444275,grad_norm: 0.9999990104402511, iteration: 69272
loss: 0.9828718304634094,grad_norm: 0.9999989833242925, iteration: 69273
loss: 0.9845325350761414,grad_norm: 0.8880307289883823, iteration: 69274
loss: 1.0203256607055664,grad_norm: 0.8695810442826987, iteration: 69275
loss: 1.0161869525909424,grad_norm: 0.9999992346696825, iteration: 69276
loss: 1.0301401615142822,grad_norm: 0.9999992896296341, iteration: 69277
loss: 1.0444843769073486,grad_norm: 0.9999991918968062, iteration: 69278
loss: 0.9588724374771118,grad_norm: 0.9999991172907572, iteration: 69279
loss: 1.0264390707015991,grad_norm: 0.999999877935308, iteration: 69280
loss: 1.0093913078308105,grad_norm: 0.934182298643522, iteration: 69281
loss: 1.0269850492477417,grad_norm: 0.8208395030296608, iteration: 69282
loss: 1.049647331237793,grad_norm: 0.9999996506651658, iteration: 69283
loss: 1.0021811723709106,grad_norm: 0.999999477090845, iteration: 69284
loss: 0.9982990026473999,grad_norm: 0.9999991436567944, iteration: 69285
loss: 0.9877516031265259,grad_norm: 0.9851423419230062, iteration: 69286
loss: 0.9826855659484863,grad_norm: 0.9999991820172532, iteration: 69287
loss: 1.0512642860412598,grad_norm: 0.9999991304928545, iteration: 69288
loss: 0.9774017333984375,grad_norm: 0.9999994343797516, iteration: 69289
loss: 1.0189378261566162,grad_norm: 0.9999990755865067, iteration: 69290
loss: 0.9899111986160278,grad_norm: 0.9999990746217005, iteration: 69291
loss: 1.0119942426681519,grad_norm: 0.9999992065874201, iteration: 69292
loss: 1.0299416780471802,grad_norm: 0.9999992321356914, iteration: 69293
loss: 1.004599690437317,grad_norm: 0.9999989251835925, iteration: 69294
loss: 1.0033204555511475,grad_norm: 0.9999991994503886, iteration: 69295
loss: 1.0238022804260254,grad_norm: 0.9999992925146561, iteration: 69296
loss: 0.9898422956466675,grad_norm: 0.9999992995147179, iteration: 69297
loss: 1.0497969388961792,grad_norm: 0.9999993562498601, iteration: 69298
loss: 1.0015360116958618,grad_norm: 0.9999990806348672, iteration: 69299
loss: 1.019233226776123,grad_norm: 0.9999990924180032, iteration: 69300
loss: 1.0112793445587158,grad_norm: 0.9376947790123326, iteration: 69301
loss: 1.0115376710891724,grad_norm: 0.9999998392278828, iteration: 69302
loss: 1.014423131942749,grad_norm: 0.9900514644236627, iteration: 69303
loss: 0.9687049388885498,grad_norm: 0.9999991324303837, iteration: 69304
loss: 1.0970934629440308,grad_norm: 0.9999997079263852, iteration: 69305
loss: 0.9977781176567078,grad_norm: 0.9731195040419264, iteration: 69306
loss: 1.0364480018615723,grad_norm: 0.9999992962312204, iteration: 69307
loss: 1.0007554292678833,grad_norm: 0.9999990286276577, iteration: 69308
loss: 1.034873604774475,grad_norm: 0.9999991431762504, iteration: 69309
loss: 1.028615117073059,grad_norm: 0.9999998862524342, iteration: 69310
loss: 0.9856855273246765,grad_norm: 0.9999990047519538, iteration: 69311
loss: 1.0425655841827393,grad_norm: 0.999999019974674, iteration: 69312
loss: 0.98699551820755,grad_norm: 0.9999990185367635, iteration: 69313
loss: 0.9711189866065979,grad_norm: 0.9999992733951069, iteration: 69314
loss: 1.017960548400879,grad_norm: 0.9999989556524641, iteration: 69315
loss: 1.024194359779358,grad_norm: 0.9999990230036993, iteration: 69316
loss: 0.9707284569740295,grad_norm: 0.9999991829836928, iteration: 69317
loss: 1.032907485961914,grad_norm: 0.9073234335163314, iteration: 69318
loss: 0.9916921854019165,grad_norm: 0.8828197957027007, iteration: 69319
loss: 1.0045400857925415,grad_norm: 0.9999992637492308, iteration: 69320
loss: 0.9808374047279358,grad_norm: 0.9999991988939103, iteration: 69321
loss: 1.022199273109436,grad_norm: 0.9374712615908939, iteration: 69322
loss: 1.0321264266967773,grad_norm: 0.8510381931576051, iteration: 69323
loss: 0.9943397641181946,grad_norm: 0.9999991666235115, iteration: 69324
loss: 1.0258448123931885,grad_norm: 0.9496377769361856, iteration: 69325
loss: 1.0269818305969238,grad_norm: 0.9999990103929927, iteration: 69326
loss: 1.002788782119751,grad_norm: 0.9999991342466606, iteration: 69327
loss: 0.9909520149230957,grad_norm: 0.9999988601900024, iteration: 69328
loss: 1.0212666988372803,grad_norm: 0.9999989695002651, iteration: 69329
loss: 1.0215083360671997,grad_norm: 0.999999294886544, iteration: 69330
loss: 0.9871165156364441,grad_norm: 0.9999991017414249, iteration: 69331
loss: 1.0029419660568237,grad_norm: 0.9470240467324307, iteration: 69332
loss: 1.0133610963821411,grad_norm: 0.9999991609683072, iteration: 69333
loss: 1.0334019660949707,grad_norm: 0.9999992843849462, iteration: 69334
loss: 0.9987071752548218,grad_norm: 0.893832325134328, iteration: 69335
loss: 1.0025633573532104,grad_norm: 0.9999990951523848, iteration: 69336
loss: 0.97568279504776,grad_norm: 0.9767747989690946, iteration: 69337
loss: 1.0127613544464111,grad_norm: 0.9978174562973285, iteration: 69338
loss: 0.9925545454025269,grad_norm: 0.9842791333887123, iteration: 69339
loss: 1.0068304538726807,grad_norm: 0.9999992562742157, iteration: 69340
loss: 1.0068215131759644,grad_norm: 0.9866169721586276, iteration: 69341
loss: 1.0367248058319092,grad_norm: 0.9999994161437634, iteration: 69342
loss: 0.9913265109062195,grad_norm: 0.9961458507221366, iteration: 69343
loss: 1.0084561109542847,grad_norm: 0.9999996926296908, iteration: 69344
loss: 0.9991670846939087,grad_norm: 0.9535444375626417, iteration: 69345
loss: 0.99295973777771,grad_norm: 0.9999996796967325, iteration: 69346
loss: 0.9927878379821777,grad_norm: 0.9999990457074193, iteration: 69347
loss: 0.991517961025238,grad_norm: 0.9999990897188951, iteration: 69348
loss: 0.9979947209358215,grad_norm: 0.9999991322784122, iteration: 69349
loss: 0.986493706703186,grad_norm: 0.9999992382798504, iteration: 69350
loss: 0.9692773222923279,grad_norm: 0.9999991956475603, iteration: 69351
loss: 0.9947338104248047,grad_norm: 0.9999990234861367, iteration: 69352
loss: 0.9825533628463745,grad_norm: 0.9999990672151584, iteration: 69353
loss: 0.9882935881614685,grad_norm: 0.9999989866545551, iteration: 69354
loss: 0.9813129901885986,grad_norm: 0.9999990329755178, iteration: 69355
loss: 1.0176901817321777,grad_norm: 0.9999992133588909, iteration: 69356
loss: 0.9953896403312683,grad_norm: 0.9755925407347473, iteration: 69357
loss: 1.005347728729248,grad_norm: 0.9999990601199129, iteration: 69358
loss: 1.002124309539795,grad_norm: 0.9009002755515139, iteration: 69359
loss: 0.9977223873138428,grad_norm: 0.8821950605094909, iteration: 69360
loss: 0.995603084564209,grad_norm: 0.9623580736719554, iteration: 69361
loss: 1.0251619815826416,grad_norm: 0.9999991015705776, iteration: 69362
loss: 0.9905979633331299,grad_norm: 0.9109709670165093, iteration: 69363
loss: 0.9954858422279358,grad_norm: 0.9999992806326823, iteration: 69364
loss: 1.0212292671203613,grad_norm: 0.9999989704350238, iteration: 69365
loss: 1.023290753364563,grad_norm: 0.9899762649901358, iteration: 69366
loss: 1.042433261871338,grad_norm: 0.999999818709839, iteration: 69367
loss: 1.0274765491485596,grad_norm: 0.9840890870012942, iteration: 69368
loss: 1.0426459312438965,grad_norm: 0.9999991516905967, iteration: 69369
loss: 1.016699194908142,grad_norm: 0.8989591706975567, iteration: 69370
loss: 0.9997369050979614,grad_norm: 0.9999990604087919, iteration: 69371
loss: 0.9687294960021973,grad_norm: 0.9270976142284434, iteration: 69372
loss: 1.0048270225524902,grad_norm: 0.999999157541062, iteration: 69373
loss: 0.9827945828437805,grad_norm: 0.9503868030086567, iteration: 69374
loss: 1.0115984678268433,grad_norm: 0.9500151177756966, iteration: 69375
loss: 1.0031756162643433,grad_norm: 0.9999992903450895, iteration: 69376
loss: 0.9765607118606567,grad_norm: 0.9999990920104671, iteration: 69377
loss: 0.9899062514305115,grad_norm: 0.9198724912189905, iteration: 69378
loss: 1.0378938913345337,grad_norm: 0.9368597998618573, iteration: 69379
loss: 1.0778201818466187,grad_norm: 0.9999993662987388, iteration: 69380
loss: 1.0184941291809082,grad_norm: 0.9999989472159777, iteration: 69381
loss: 0.9984413981437683,grad_norm: 0.8444920010792228, iteration: 69382
loss: 1.0487245321273804,grad_norm: 0.9999996600046727, iteration: 69383
loss: 1.001604676246643,grad_norm: 0.9999990235922235, iteration: 69384
loss: 1.0216357707977295,grad_norm: 0.9999992694819314, iteration: 69385
loss: 0.9897831082344055,grad_norm: 0.8895801813115334, iteration: 69386
loss: 1.0163935422897339,grad_norm: 0.99999903765567, iteration: 69387
loss: 0.9875391125679016,grad_norm: 0.9606473468421671, iteration: 69388
loss: 1.0336354970932007,grad_norm: 0.9421873404367794, iteration: 69389
loss: 0.978249728679657,grad_norm: 0.9999991087754403, iteration: 69390
loss: 0.9970210790634155,grad_norm: 0.9999992203616145, iteration: 69391
loss: 0.9810233116149902,grad_norm: 0.9999993535335548, iteration: 69392
loss: 0.9975335597991943,grad_norm: 0.999999152460487, iteration: 69393
loss: 0.9854163527488708,grad_norm: 0.9999991129115571, iteration: 69394
loss: 1.004634976387024,grad_norm: 0.9999993394621269, iteration: 69395
loss: 0.9891429543495178,grad_norm: 0.999999230916886, iteration: 69396
loss: 0.9839856028556824,grad_norm: 0.9916544899616206, iteration: 69397
loss: 1.0246433019638062,grad_norm: 0.9429935990051348, iteration: 69398
loss: 1.020402431488037,grad_norm: 0.9999990158180233, iteration: 69399
loss: 1.020462155342102,grad_norm: 0.890249470947572, iteration: 69400
loss: 0.9994537234306335,grad_norm: 0.9999992539055265, iteration: 69401
loss: 1.0262221097946167,grad_norm: 0.9999998350414213, iteration: 69402
loss: 0.9970239400863647,grad_norm: 0.9999997882367537, iteration: 69403
loss: 1.0844707489013672,grad_norm: 0.9999996482121734, iteration: 69404
loss: 0.9697777032852173,grad_norm: 0.9368327956397832, iteration: 69405
loss: 1.0221905708312988,grad_norm: 0.9999991780983472, iteration: 69406
loss: 0.988650918006897,grad_norm: 0.9203320044659935, iteration: 69407
loss: 0.9524808526039124,grad_norm: 0.9999992375710735, iteration: 69408
loss: 0.9839125871658325,grad_norm: 0.9999989539201335, iteration: 69409
loss: 1.0028399229049683,grad_norm: 0.9999991940002191, iteration: 69410
loss: 0.9669091701507568,grad_norm: 0.8990894204687906, iteration: 69411
loss: 1.0085326433181763,grad_norm: 0.9884071991142297, iteration: 69412
loss: 1.023581624031067,grad_norm: 0.9290291816575991, iteration: 69413
loss: 0.9959481358528137,grad_norm: 0.9999991731556221, iteration: 69414
loss: 1.0029239654541016,grad_norm: 0.9999992508499296, iteration: 69415
loss: 1.0270494222640991,grad_norm: 0.9999991262759799, iteration: 69416
loss: 0.9974663853645325,grad_norm: 0.9820324470480171, iteration: 69417
loss: 1.0175637006759644,grad_norm: 0.9781694170251894, iteration: 69418
loss: 0.9974638223648071,grad_norm: 0.8595742216405309, iteration: 69419
loss: 1.0099945068359375,grad_norm: 0.9999993080416563, iteration: 69420
loss: 0.9977012872695923,grad_norm: 0.8636132259125265, iteration: 69421
loss: 1.0151036977767944,grad_norm: 0.9999996443318117, iteration: 69422
loss: 0.971192479133606,grad_norm: 0.999999085287172, iteration: 69423
loss: 0.9750275015830994,grad_norm: 0.9999991903952655, iteration: 69424
loss: 1.0226237773895264,grad_norm: 0.9999990364480786, iteration: 69425
loss: 1.0024715662002563,grad_norm: 0.9999995187001405, iteration: 69426
loss: 1.0010085105895996,grad_norm: 0.9999990279479458, iteration: 69427
loss: 0.9853912591934204,grad_norm: 0.836839599445247, iteration: 69428
loss: 0.9804598093032837,grad_norm: 0.999999191189237, iteration: 69429
loss: 0.9976155161857605,grad_norm: 0.9695157342039159, iteration: 69430
loss: 1.0509157180786133,grad_norm: 0.9999993466693223, iteration: 69431
loss: 0.9841405153274536,grad_norm: 0.999999023297232, iteration: 69432
loss: 1.0194554328918457,grad_norm: 0.8403736298657123, iteration: 69433
loss: 1.0270886421203613,grad_norm: 0.9995287812211844, iteration: 69434
loss: 1.024803638458252,grad_norm: 0.96243562210011, iteration: 69435
loss: 1.034572720527649,grad_norm: 0.9999991207572224, iteration: 69436
loss: 0.9754476547241211,grad_norm: 0.9999991053560923, iteration: 69437
loss: 0.989678680896759,grad_norm: 0.9999991155660347, iteration: 69438
loss: 1.0268839597702026,grad_norm: 0.9344951014895637, iteration: 69439
loss: 1.0067131519317627,grad_norm: 0.999999147658507, iteration: 69440
loss: 0.9909064769744873,grad_norm: 0.9040462754286946, iteration: 69441
loss: 1.0318076610565186,grad_norm: 0.9328569587848007, iteration: 69442
loss: 0.9789173007011414,grad_norm: 0.9999991843468784, iteration: 69443
loss: 1.0073009729385376,grad_norm: 0.9999990860206537, iteration: 69444
loss: 1.0343095064163208,grad_norm: 0.9999994385134141, iteration: 69445
loss: 1.0312330722808838,grad_norm: 0.9999991239094417, iteration: 69446
loss: 0.9748963117599487,grad_norm: 0.9999996003214611, iteration: 69447
loss: 1.0605151653289795,grad_norm: 0.9456598120690899, iteration: 69448
loss: 1.0099351406097412,grad_norm: 0.9999992377706552, iteration: 69449
loss: 0.9763078093528748,grad_norm: 0.9999992673945935, iteration: 69450
loss: 0.9943891167640686,grad_norm: 0.8692233123240933, iteration: 69451
loss: 0.9698199033737183,grad_norm: 0.956398705925494, iteration: 69452
loss: 0.9980202913284302,grad_norm: 0.9999991656226115, iteration: 69453
loss: 1.01695716381073,grad_norm: 0.9999991782317483, iteration: 69454
loss: 0.9961615204811096,grad_norm: 0.9999994529854044, iteration: 69455
loss: 0.9769346714019775,grad_norm: 0.9872369585793627, iteration: 69456
loss: 0.9946454763412476,grad_norm: 0.9999989971386677, iteration: 69457
loss: 1.0171748399734497,grad_norm: 0.9247056797398888, iteration: 69458
loss: 0.973773181438446,grad_norm: 0.9999990278461729, iteration: 69459
loss: 0.9994162321090698,grad_norm: 0.9999993058827944, iteration: 69460
loss: 0.9853702187538147,grad_norm: 0.9999994189415439, iteration: 69461
loss: 1.0028668642044067,grad_norm: 0.8516968181851867, iteration: 69462
loss: 0.9911237955093384,grad_norm: 0.999999023973556, iteration: 69463
loss: 1.0229289531707764,grad_norm: 0.9999991595586823, iteration: 69464
loss: 1.0488570928573608,grad_norm: 0.9970345019410503, iteration: 69465
loss: 1.034326434135437,grad_norm: 0.9683474447031475, iteration: 69466
loss: 1.0037710666656494,grad_norm: 0.8459959547443716, iteration: 69467
loss: 1.0318379402160645,grad_norm: 0.9999991975873557, iteration: 69468
loss: 0.9623578786849976,grad_norm: 0.9297038011724278, iteration: 69469
loss: 0.9883995652198792,grad_norm: 0.9751993746208183, iteration: 69470
loss: 1.017310619354248,grad_norm: 0.9999991985690843, iteration: 69471
loss: 1.0302618741989136,grad_norm: 0.9999992370073422, iteration: 69472
loss: 1.0082428455352783,grad_norm: 0.8954449367405413, iteration: 69473
loss: 0.99164217710495,grad_norm: 0.8765191046446936, iteration: 69474
loss: 0.9818972945213318,grad_norm: 0.9618708500292859, iteration: 69475
loss: 0.9738187193870544,grad_norm: 0.999999163178643, iteration: 69476
loss: 0.9825826287269592,grad_norm: 0.8298614847844199, iteration: 69477
loss: 1.0055248737335205,grad_norm: 0.9999990966591479, iteration: 69478
loss: 1.019845962524414,grad_norm: 0.9710113987994011, iteration: 69479
loss: 1.0048953294754028,grad_norm: 0.9999991809863614, iteration: 69480
loss: 1.0086437463760376,grad_norm: 0.9999990553551714, iteration: 69481
loss: 1.0178059339523315,grad_norm: 0.9999993653590894, iteration: 69482
loss: 1.0192463397979736,grad_norm: 0.9277407696250527, iteration: 69483
loss: 1.0197933912277222,grad_norm: 0.9999991189734572, iteration: 69484
loss: 0.9892467856407166,grad_norm: 0.9999995460478208, iteration: 69485
loss: 1.073205590248108,grad_norm: 0.9999998638402666, iteration: 69486
loss: 1.0013469457626343,grad_norm: 0.9999990699434524, iteration: 69487
loss: 1.0249216556549072,grad_norm: 0.9999994533756853, iteration: 69488
loss: 0.9731278419494629,grad_norm: 0.9999990806037815, iteration: 69489
loss: 1.0114046335220337,grad_norm: 0.8771157444890999, iteration: 69490
loss: 0.9985591769218445,grad_norm: 0.9426409699157439, iteration: 69491
loss: 1.0390803813934326,grad_norm: 0.999999199581284, iteration: 69492
loss: 0.9808618426322937,grad_norm: 0.9999990812865525, iteration: 69493
loss: 1.064394235610962,grad_norm: 0.9999997515856667, iteration: 69494
loss: 1.0057921409606934,grad_norm: 0.8981123217458999, iteration: 69495
loss: 1.019360899925232,grad_norm: 0.9999990990292245, iteration: 69496
loss: 0.9873786568641663,grad_norm: 0.9496650625205094, iteration: 69497
loss: 1.0426433086395264,grad_norm: 0.9424253135446793, iteration: 69498
loss: 0.9772478342056274,grad_norm: 0.9999991968064263, iteration: 69499
loss: 1.020397663116455,grad_norm: 0.8977076567198874, iteration: 69500
loss: 1.014493703842163,grad_norm: 0.9819349268587065, iteration: 69501
loss: 0.9768164157867432,grad_norm: 0.9999989732386866, iteration: 69502
loss: 0.9663702249526978,grad_norm: 0.8938536334440347, iteration: 69503
loss: 0.9781119227409363,grad_norm: 0.9559374521507057, iteration: 69504
loss: 0.9864262342453003,grad_norm: 0.9818997731559634, iteration: 69505
loss: 0.9815320372581482,grad_norm: 0.8717354648751069, iteration: 69506
loss: 0.973493218421936,grad_norm: 0.9999992153764214, iteration: 69507
loss: 0.9946156144142151,grad_norm: 0.9999991403414442, iteration: 69508
loss: 1.0273733139038086,grad_norm: 0.9999992506143771, iteration: 69509
loss: 1.0089377164840698,grad_norm: 0.9999990053149275, iteration: 69510
loss: 1.018707036972046,grad_norm: 0.9999991536531174, iteration: 69511
loss: 1.0020309686660767,grad_norm: 0.923507456565008, iteration: 69512
loss: 0.9785977005958557,grad_norm: 0.9999989182824487, iteration: 69513
loss: 1.023126244544983,grad_norm: 0.9999991532591063, iteration: 69514
loss: 1.0013065338134766,grad_norm: 0.9588778722563988, iteration: 69515
loss: 1.0034960508346558,grad_norm: 0.9999991247760861, iteration: 69516
loss: 0.9965744018554688,grad_norm: 0.9999989825336373, iteration: 69517
loss: 1.0006335973739624,grad_norm: 0.9999990261183926, iteration: 69518
loss: 0.9804211258888245,grad_norm: 0.999999197113026, iteration: 69519
loss: 0.9877591729164124,grad_norm: 0.9999990814901055, iteration: 69520
loss: 0.9865652322769165,grad_norm: 0.9999995524939266, iteration: 69521
loss: 1.0462331771850586,grad_norm: 0.999999281637663, iteration: 69522
loss: 1.014799952507019,grad_norm: 0.9999992478774357, iteration: 69523
loss: 1.0862884521484375,grad_norm: 0.9999998696713933, iteration: 69524
loss: 1.0064761638641357,grad_norm: 0.9958042189091703, iteration: 69525
loss: 0.9563547968864441,grad_norm: 0.9999992178518738, iteration: 69526
loss: 1.000112771987915,grad_norm: 0.999999235924279, iteration: 69527
loss: 1.0764466524124146,grad_norm: 0.999999696924831, iteration: 69528
loss: 1.0274922847747803,grad_norm: 0.9999992843272213, iteration: 69529
loss: 0.9941566586494446,grad_norm: 0.9479740789683384, iteration: 69530
loss: 1.020045280456543,grad_norm: 0.9527932860662894, iteration: 69531
loss: 1.018530011177063,grad_norm: 0.9999990882667197, iteration: 69532
loss: 1.0084223747253418,grad_norm: 0.9999990151045199, iteration: 69533
loss: 0.9931852221488953,grad_norm: 0.8991739660359371, iteration: 69534
loss: 0.9836159944534302,grad_norm: 0.9999991688854419, iteration: 69535
loss: 0.9920482635498047,grad_norm: 0.999999286552339, iteration: 69536
loss: 0.9898304343223572,grad_norm: 0.9999990989781005, iteration: 69537
loss: 1.029438853263855,grad_norm: 0.9999994102984776, iteration: 69538
loss: 0.9837483763694763,grad_norm: 0.9504160296234975, iteration: 69539
loss: 0.9525969624519348,grad_norm: 0.999999108024741, iteration: 69540
loss: 0.9812650084495544,grad_norm: 0.995733242960469, iteration: 69541
loss: 1.0008676052093506,grad_norm: 0.9709525325727257, iteration: 69542
loss: 0.9784692525863647,grad_norm: 0.8730440292277004, iteration: 69543
loss: 1.0026451349258423,grad_norm: 0.9999991729454294, iteration: 69544
loss: 0.9593450427055359,grad_norm: 0.9999991125166833, iteration: 69545
loss: 1.000309705734253,grad_norm: 0.9568189868281171, iteration: 69546
loss: 1.003529667854309,grad_norm: 0.8722991487064532, iteration: 69547
loss: 0.977013111114502,grad_norm: 0.9999990852608808, iteration: 69548
loss: 0.9936342239379883,grad_norm: 0.9999990648408936, iteration: 69549
loss: 0.993263840675354,grad_norm: 0.9999990934483621, iteration: 69550
loss: 1.0002596378326416,grad_norm: 0.9999991204270975, iteration: 69551
loss: 0.9797793030738831,grad_norm: 0.9999992308301962, iteration: 69552
loss: 0.9852529168128967,grad_norm: 0.8527537633591662, iteration: 69553
loss: 1.0000187158584595,grad_norm: 0.9999989882512377, iteration: 69554
loss: 0.9939838647842407,grad_norm: 0.9999990833522003, iteration: 69555
loss: 1.0184719562530518,grad_norm: 0.9999991308698583, iteration: 69556
loss: 0.981799304485321,grad_norm: 0.9913807674668186, iteration: 69557
loss: 1.038580298423767,grad_norm: 0.9484237879228115, iteration: 69558
loss: 0.9974432587623596,grad_norm: 0.917293629464285, iteration: 69559
loss: 0.9794531464576721,grad_norm: 0.9999990548510793, iteration: 69560
loss: 0.9716907739639282,grad_norm: 0.9295861408444204, iteration: 69561
loss: 1.029032826423645,grad_norm: 0.9999989774400925, iteration: 69562
loss: 1.0008465051651,grad_norm: 0.9999991507780632, iteration: 69563
loss: 1.0002936124801636,grad_norm: 0.9999991035972868, iteration: 69564
loss: 1.0207346677780151,grad_norm: 0.9999990908895674, iteration: 69565
loss: 0.9942576289176941,grad_norm: 0.9999992176628841, iteration: 69566
loss: 0.9967847466468811,grad_norm: 0.8377559993445937, iteration: 69567
loss: 0.9534006714820862,grad_norm: 0.9999992163198469, iteration: 69568
loss: 1.0058026313781738,grad_norm: 0.9527652680815596, iteration: 69569
loss: 0.9873472452163696,grad_norm: 0.9471225449298102, iteration: 69570
loss: 0.9546524286270142,grad_norm: 0.9999993930420897, iteration: 69571
loss: 0.9825987815856934,grad_norm: 0.9815759383954173, iteration: 69572
loss: 1.0226569175720215,grad_norm: 0.9999989790038118, iteration: 69573
loss: 0.9966838359832764,grad_norm: 0.999999137133191, iteration: 69574
loss: 1.0107873678207397,grad_norm: 0.9999997551502504, iteration: 69575
loss: 0.9893116354942322,grad_norm: 0.9174231981792393, iteration: 69576
loss: 0.9623650908470154,grad_norm: 0.9999990851361504, iteration: 69577
loss: 1.0583317279815674,grad_norm: 0.9999993661448178, iteration: 69578
loss: 0.9726051688194275,grad_norm: 0.9959976402559821, iteration: 69579
loss: 1.0228177309036255,grad_norm: 0.9814242908302575, iteration: 69580
loss: 1.0064890384674072,grad_norm: 0.9999991156136222, iteration: 69581
loss: 1.0097979307174683,grad_norm: 0.9904340545808084, iteration: 69582
loss: 1.0078833103179932,grad_norm: 0.9999991661423814, iteration: 69583
loss: 1.001255750656128,grad_norm: 0.9202391089329723, iteration: 69584
loss: 1.0311344861984253,grad_norm: 0.9343906643142311, iteration: 69585
loss: 0.9661570191383362,grad_norm: 0.975960557252155, iteration: 69586
loss: 1.0366358757019043,grad_norm: 0.9999992126160325, iteration: 69587
loss: 0.986576497554779,grad_norm: 0.9790784897979348, iteration: 69588
loss: 0.9980556964874268,grad_norm: 0.9999991542055265, iteration: 69589
loss: 0.9823774099349976,grad_norm: 0.9999990132599236, iteration: 69590
loss: 1.0122066736221313,grad_norm: 0.9423999043888759, iteration: 69591
loss: 1.0120933055877686,grad_norm: 0.9999991861741558, iteration: 69592
loss: 1.0251457691192627,grad_norm: 0.9915090600602827, iteration: 69593
loss: 0.9777944087982178,grad_norm: 0.946777226279673, iteration: 69594
loss: 1.0211659669876099,grad_norm: 0.9999989597149769, iteration: 69595
loss: 1.0076041221618652,grad_norm: 0.9936813161370658, iteration: 69596
loss: 1.0105575323104858,grad_norm: 0.9999991879686345, iteration: 69597
loss: 1.0060514211654663,grad_norm: 0.9999990750684822, iteration: 69598
loss: 1.0176914930343628,grad_norm: 0.9999993735062983, iteration: 69599
loss: 0.9976480603218079,grad_norm: 0.9999990553291498, iteration: 69600
loss: 0.9961854219436646,grad_norm: 0.8952220895948207, iteration: 69601
loss: 1.0066008567810059,grad_norm: 0.913523342007225, iteration: 69602
loss: 1.0002764463424683,grad_norm: 0.9999990449730816, iteration: 69603
loss: 0.9960507750511169,grad_norm: 0.999999119490736, iteration: 69604
loss: 1.0207902193069458,grad_norm: 0.9887593959940524, iteration: 69605
loss: 0.992285966873169,grad_norm: 0.9999991267144959, iteration: 69606
loss: 0.9942034482955933,grad_norm: 0.8979537991513966, iteration: 69607
loss: 0.9700949788093567,grad_norm: 0.999999129586415, iteration: 69608
loss: 1.0108102560043335,grad_norm: 0.9999991592803575, iteration: 69609
loss: 0.9747180938720703,grad_norm: 0.9999991986765159, iteration: 69610
loss: 0.9827870726585388,grad_norm: 0.9999991526634381, iteration: 69611
loss: 1.0161622762680054,grad_norm: 0.9999995817302259, iteration: 69612
loss: 1.0247728824615479,grad_norm: 0.9999992441189094, iteration: 69613
loss: 0.9790820479393005,grad_norm: 0.8337221245242527, iteration: 69614
loss: 1.0042532682418823,grad_norm: 0.9707029942495623, iteration: 69615
loss: 0.966762363910675,grad_norm: 0.9999990397223711, iteration: 69616
loss: 1.0027506351470947,grad_norm: 0.9517931695952008, iteration: 69617
loss: 0.9839566349983215,grad_norm: 0.9999991238599194, iteration: 69618
loss: 0.9836881160736084,grad_norm: 0.9999990960849355, iteration: 69619
loss: 0.9743286371231079,grad_norm: 0.9296678183935205, iteration: 69620
loss: 0.9875932335853577,grad_norm: 0.9566877274061454, iteration: 69621
loss: 1.015377402305603,grad_norm: 0.9999991869170635, iteration: 69622
loss: 0.9766722917556763,grad_norm: 0.8486229049692828, iteration: 69623
loss: 1.043278455734253,grad_norm: 0.999999109604685, iteration: 69624
loss: 0.9968738555908203,grad_norm: 0.9999990504187786, iteration: 69625
loss: 1.013439655303955,grad_norm: 0.9588493423504096, iteration: 69626
loss: 1.0050016641616821,grad_norm: 0.976108204622247, iteration: 69627
loss: 1.003801703453064,grad_norm: 0.9999993294312203, iteration: 69628
loss: 1.0068738460540771,grad_norm: 0.9377755224923552, iteration: 69629
loss: 1.0233466625213623,grad_norm: 0.9999990452070364, iteration: 69630
loss: 0.9741883873939514,grad_norm: 0.9255461710424714, iteration: 69631
loss: 0.9852657318115234,grad_norm: 0.9999991624771026, iteration: 69632
loss: 1.0568854808807373,grad_norm: 0.999999216336665, iteration: 69633
loss: 0.9719191193580627,grad_norm: 0.9999992527807351, iteration: 69634
loss: 1.0369775295257568,grad_norm: 0.9999996981534318, iteration: 69635
loss: 1.0098495483398438,grad_norm: 0.9600580246964309, iteration: 69636
loss: 0.9748132824897766,grad_norm: 0.9999992997545356, iteration: 69637
loss: 1.0290948152542114,grad_norm: 0.9999991601026007, iteration: 69638
loss: 0.9750020503997803,grad_norm: 0.9999992839351683, iteration: 69639
loss: 1.0300062894821167,grad_norm: 0.999999257636728, iteration: 69640
loss: 1.022897481918335,grad_norm: 0.9999991412265733, iteration: 69641
loss: 0.9866001605987549,grad_norm: 0.9999990287159516, iteration: 69642
loss: 1.0247437953948975,grad_norm: 0.9999991865416276, iteration: 69643
loss: 1.021946668624878,grad_norm: 0.9999992151581971, iteration: 69644
loss: 1.0227270126342773,grad_norm: 0.9999999341240893, iteration: 69645
loss: 1.0215656757354736,grad_norm: 0.9999993200395428, iteration: 69646
loss: 1.0278103351593018,grad_norm: 0.9999990613569988, iteration: 69647
loss: 1.020449161529541,grad_norm: 0.9272573733948534, iteration: 69648
loss: 1.0342111587524414,grad_norm: 0.9999993740955895, iteration: 69649
loss: 1.0091743469238281,grad_norm: 0.9999990408925337, iteration: 69650
loss: 0.966659665107727,grad_norm: 0.9203401533088108, iteration: 69651
loss: 0.9990379214286804,grad_norm: 0.9999990912486251, iteration: 69652
loss: 1.0231411457061768,grad_norm: 0.9638903881032267, iteration: 69653
loss: 1.0047860145568848,grad_norm: 0.9999989755117099, iteration: 69654
loss: 0.9803279638290405,grad_norm: 0.9632503348194622, iteration: 69655
loss: 0.9786341190338135,grad_norm: 0.9760170713516805, iteration: 69656
loss: 1.0594450235366821,grad_norm: 0.9999993702318726, iteration: 69657
loss: 0.9677728414535522,grad_norm: 0.9894496402741393, iteration: 69658
loss: 0.9820140600204468,grad_norm: 0.9999989579240595, iteration: 69659
loss: 0.9788886308670044,grad_norm: 0.9999989922093221, iteration: 69660
loss: 0.978973388671875,grad_norm: 0.999999293861646, iteration: 69661
loss: 1.029434084892273,grad_norm: 0.9999995298725252, iteration: 69662
loss: 0.9954928159713745,grad_norm: 0.9136635964930703, iteration: 69663
loss: 1.0004498958587646,grad_norm: 0.9149377085189487, iteration: 69664
loss: 0.9858862161636353,grad_norm: 0.9459210566433253, iteration: 69665
loss: 0.9943675994873047,grad_norm: 0.9999991464876008, iteration: 69666
loss: 0.9652571082115173,grad_norm: 0.9999991826625806, iteration: 69667
loss: 0.9544052481651306,grad_norm: 0.9999990701034615, iteration: 69668
loss: 0.9371064305305481,grad_norm: 0.8523515831752848, iteration: 69669
loss: 1.0414360761642456,grad_norm: 0.9999991458685532, iteration: 69670
loss: 1.0186465978622437,grad_norm: 0.9999991989626605, iteration: 69671
loss: 1.045958161354065,grad_norm: 0.9999995089064136, iteration: 69672
loss: 0.9546414017677307,grad_norm: 0.967012781460765, iteration: 69673
loss: 0.9669479131698608,grad_norm: 0.999999076226851, iteration: 69674
loss: 1.0190271139144897,grad_norm: 0.8836467811166087, iteration: 69675
loss: 0.9992256760597229,grad_norm: 0.9999990705360935, iteration: 69676
loss: 0.9901772737503052,grad_norm: 0.9999990401209954, iteration: 69677
loss: 0.9735746383666992,grad_norm: 0.9999991293704655, iteration: 69678
loss: 0.9622395038604736,grad_norm: 0.9999991009784579, iteration: 69679
loss: 1.0049573183059692,grad_norm: 0.9523990159880497, iteration: 69680
loss: 1.000389575958252,grad_norm: 0.9999990013864558, iteration: 69681
loss: 0.9791831970214844,grad_norm: 0.9328937081194303, iteration: 69682
loss: 0.9745588898658752,grad_norm: 0.9999990479988381, iteration: 69683
loss: 0.9882887005805969,grad_norm: 0.9999991590566234, iteration: 69684
loss: 0.9999402761459351,grad_norm: 0.988108158016538, iteration: 69685
loss: 1.0333125591278076,grad_norm: 0.9999992815378982, iteration: 69686
loss: 1.028348445892334,grad_norm: 0.9999992027579785, iteration: 69687
loss: 1.0077030658721924,grad_norm: 0.9999992166229527, iteration: 69688
loss: 1.035463809967041,grad_norm: 0.9999989347470222, iteration: 69689
loss: 1.0165938138961792,grad_norm: 0.9517303934366244, iteration: 69690
loss: 1.0173416137695312,grad_norm: 0.9999990167642426, iteration: 69691
loss: 1.0045154094696045,grad_norm: 0.9522936155116272, iteration: 69692
loss: 1.0055298805236816,grad_norm: 0.9999993965829714, iteration: 69693
loss: 1.0560249090194702,grad_norm: 0.9999998129841333, iteration: 69694
loss: 0.9907893538475037,grad_norm: 0.9999990496701696, iteration: 69695
loss: 1.045157790184021,grad_norm: 0.9999990918264208, iteration: 69696
loss: 0.9932895302772522,grad_norm: 0.9999992342645899, iteration: 69697
loss: 1.0105180740356445,grad_norm: 0.9999990567535195, iteration: 69698
loss: 1.0069656372070312,grad_norm: 0.9999990097798623, iteration: 69699
loss: 0.9964101314544678,grad_norm: 0.9999990516553245, iteration: 69700
loss: 0.9825436472892761,grad_norm: 0.909889234496266, iteration: 69701
loss: 1.0514473915100098,grad_norm: 0.9999997546267471, iteration: 69702
loss: 0.9933061599731445,grad_norm: 0.9999991698276168, iteration: 69703
loss: 1.0027546882629395,grad_norm: 0.9999991596679799, iteration: 69704
loss: 0.9900385141372681,grad_norm: 0.9999994558278237, iteration: 69705
loss: 0.9527269005775452,grad_norm: 0.9999990446808795, iteration: 69706
loss: 0.9970400929450989,grad_norm: 0.9999993222392646, iteration: 69707
loss: 0.978981077671051,grad_norm: 0.9148582034819862, iteration: 69708
loss: 0.9829233288764954,grad_norm: 0.8950188969607611, iteration: 69709
loss: 1.0100899934768677,grad_norm: 0.8707570586747039, iteration: 69710
loss: 0.9880861043930054,grad_norm: 0.9999991929253338, iteration: 69711
loss: 0.9581882357597351,grad_norm: 0.9999991033260296, iteration: 69712
loss: 0.9603388905525208,grad_norm: 0.9999991630613297, iteration: 69713
loss: 1.0109363794326782,grad_norm: 0.999876518411678, iteration: 69714
loss: 1.0070295333862305,grad_norm: 0.9999998721573087, iteration: 69715
loss: 1.0121556520462036,grad_norm: 0.8945191309600585, iteration: 69716
loss: 0.9859165549278259,grad_norm: 0.9999990576207821, iteration: 69717
loss: 1.0023335218429565,grad_norm: 0.999999057010471, iteration: 69718
loss: 0.9910756945610046,grad_norm: 0.9999992715207328, iteration: 69719
loss: 0.987562894821167,grad_norm: 0.9360143463642687, iteration: 69720
loss: 0.9985248446464539,grad_norm: 0.8101680246244307, iteration: 69721
loss: 0.98817378282547,grad_norm: 0.9999991989667023, iteration: 69722
loss: 0.9690601229667664,grad_norm: 0.9595269911104529, iteration: 69723
loss: 0.9769435524940491,grad_norm: 0.9999990726635276, iteration: 69724
loss: 1.012706995010376,grad_norm: 0.7772438965422274, iteration: 69725
loss: 1.0049643516540527,grad_norm: 0.77696975660824, iteration: 69726
loss: 1.107620358467102,grad_norm: 0.9999995500951645, iteration: 69727
loss: 0.9976992607116699,grad_norm: 0.9999991990351512, iteration: 69728
loss: 0.9726322889328003,grad_norm: 0.9342799111816393, iteration: 69729
loss: 0.9958335757255554,grad_norm: 0.969553074236155, iteration: 69730
loss: 0.988946795463562,grad_norm: 0.9999991793962951, iteration: 69731
loss: 0.9893289804458618,grad_norm: 0.9936725489053686, iteration: 69732
loss: 0.9837133884429932,grad_norm: 0.9233636421778152, iteration: 69733
loss: 1.027358055114746,grad_norm: 0.9160544767142422, iteration: 69734
loss: 0.9543383717536926,grad_norm: 0.9999991260495058, iteration: 69735
loss: 1.0073034763336182,grad_norm: 0.9999991261754231, iteration: 69736
loss: 0.9877046346664429,grad_norm: 0.9813705470912485, iteration: 69737
loss: 1.059414029121399,grad_norm: 0.9999991444758279, iteration: 69738
loss: 1.0038758516311646,grad_norm: 0.9999989235749628, iteration: 69739
loss: 0.979175865650177,grad_norm: 0.8736758214871762, iteration: 69740
loss: 1.0223015546798706,grad_norm: 0.978179660338481, iteration: 69741
loss: 0.9950008392333984,grad_norm: 0.9849536488202937, iteration: 69742
loss: 0.9871144890785217,grad_norm: 0.9999991237820026, iteration: 69743
loss: 0.97273188829422,grad_norm: 0.9999990546541327, iteration: 69744
loss: 0.9845388531684875,grad_norm: 0.9999991154767174, iteration: 69745
loss: 0.9808918833732605,grad_norm: 0.9982975507817057, iteration: 69746
loss: 0.9849849939346313,grad_norm: 0.9442368654489985, iteration: 69747
loss: 0.9953746795654297,grad_norm: 0.9898433822704323, iteration: 69748
loss: 1.0612105131149292,grad_norm: 0.9999995304522591, iteration: 69749
loss: 0.974329948425293,grad_norm: 0.999999128315287, iteration: 69750
loss: 0.9864165186882019,grad_norm: 0.985321573144964, iteration: 69751
loss: 1.0392329692840576,grad_norm: 0.9999990958859677, iteration: 69752
loss: 1.0190807580947876,grad_norm: 0.9163240299241959, iteration: 69753
loss: 0.9847365617752075,grad_norm: 0.927274966767541, iteration: 69754
loss: 0.9548940062522888,grad_norm: 0.9999991985443861, iteration: 69755
loss: 1.0003987550735474,grad_norm: 0.9999992952983565, iteration: 69756
loss: 0.9886477589607239,grad_norm: 0.9596224018533672, iteration: 69757
loss: 1.0268559455871582,grad_norm: 0.8614966765348481, iteration: 69758
loss: 0.9995465278625488,grad_norm: 0.9999990768152242, iteration: 69759
loss: 0.9856876730918884,grad_norm: 0.999999260630603, iteration: 69760
loss: 1.0015618801116943,grad_norm: 0.999998972823657, iteration: 69761
loss: 1.02505362033844,grad_norm: 0.8221440253780711, iteration: 69762
loss: 1.0254935026168823,grad_norm: 0.876304494566905, iteration: 69763
loss: 0.9954586029052734,grad_norm: 0.9999990991737332, iteration: 69764
loss: 1.0226218700408936,grad_norm: 0.9999989861175677, iteration: 69765
loss: 0.9494167566299438,grad_norm: 0.949359403424749, iteration: 69766
loss: 0.9935396313667297,grad_norm: 0.9999990766765564, iteration: 69767
loss: 0.9888932108879089,grad_norm: 0.9999990584919347, iteration: 69768
loss: 1.0293869972229004,grad_norm: 0.99999912467998, iteration: 69769
loss: 0.9847696423530579,grad_norm: 0.9999991366401929, iteration: 69770
loss: 1.0357308387756348,grad_norm: 0.9999992042246156, iteration: 69771
loss: 0.963543713092804,grad_norm: 0.8299350995870178, iteration: 69772
loss: 1.0067718029022217,grad_norm: 0.8990452360374442, iteration: 69773
loss: 0.9457496404647827,grad_norm: 0.9999991396058742, iteration: 69774
loss: 1.00978422164917,grad_norm: 0.9999990899152136, iteration: 69775
loss: 1.0401712656021118,grad_norm: 0.9792412771776507, iteration: 69776
loss: 0.9552621841430664,grad_norm: 0.9999991681468084, iteration: 69777
loss: 1.0340044498443604,grad_norm: 0.965499326828962, iteration: 69778
loss: 1.0163826942443848,grad_norm: 0.8780711024185947, iteration: 69779
loss: 0.9897298216819763,grad_norm: 0.9999991462032021, iteration: 69780
loss: 1.022589087486267,grad_norm: 0.9999989956293173, iteration: 69781
loss: 1.0264288187026978,grad_norm: 0.9999990800378579, iteration: 69782
loss: 1.0360835790634155,grad_norm: 0.9663139169443526, iteration: 69783
loss: 1.0188732147216797,grad_norm: 0.9999994344729519, iteration: 69784
loss: 0.9740576148033142,grad_norm: 0.9688541900956442, iteration: 69785
loss: 1.004581093788147,grad_norm: 0.9435454341722577, iteration: 69786
loss: 0.9771563410758972,grad_norm: 0.8971658403679531, iteration: 69787
loss: 1.0179693698883057,grad_norm: 0.9999991544920739, iteration: 69788
loss: 0.9986907839775085,grad_norm: 0.9999991193010832, iteration: 69789
loss: 0.985789954662323,grad_norm: 0.9999990736328307, iteration: 69790
loss: 1.046023964881897,grad_norm: 0.9999997944145128, iteration: 69791
loss: 1.0182856321334839,grad_norm: 0.9231147021880409, iteration: 69792
loss: 1.0236386060714722,grad_norm: 0.9684859441143384, iteration: 69793
loss: 1.0416988134384155,grad_norm: 0.9999993333767121, iteration: 69794
loss: 1.031859040260315,grad_norm: 0.9999991615302664, iteration: 69795
loss: 0.9780729413032532,grad_norm: 0.9999991549636452, iteration: 69796
loss: 1.0019927024841309,grad_norm: 0.9999994045153934, iteration: 69797
loss: 1.006233811378479,grad_norm: 0.9495375348080731, iteration: 69798
loss: 1.004625678062439,grad_norm: 0.9999991938584575, iteration: 69799
loss: 1.0093096494674683,grad_norm: 0.8762780170251574, iteration: 69800
loss: 1.034555196762085,grad_norm: 0.9999991235628674, iteration: 69801
loss: 0.9700164794921875,grad_norm: 0.9096472026834055, iteration: 69802
loss: 0.9810391664505005,grad_norm: 0.9482717864413658, iteration: 69803
loss: 0.9933275580406189,grad_norm: 0.9199009539455352, iteration: 69804
loss: 0.9822535514831543,grad_norm: 0.9999992230572483, iteration: 69805
loss: 1.0009088516235352,grad_norm: 0.9999989460524801, iteration: 69806
loss: 1.05270516872406,grad_norm: 0.9999990970073643, iteration: 69807
loss: 1.00690758228302,grad_norm: 0.9999990724281711, iteration: 69808
loss: 1.013960599899292,grad_norm: 0.9810262053831732, iteration: 69809
loss: 0.9898710250854492,grad_norm: 0.9999991179581093, iteration: 69810
loss: 1.0386332273483276,grad_norm: 0.9999990788278565, iteration: 69811
loss: 0.9943776726722717,grad_norm: 0.8652583600339976, iteration: 69812
loss: 0.9793514013290405,grad_norm: 0.9237457145145652, iteration: 69813
loss: 0.9881391525268555,grad_norm: 0.9161007521848216, iteration: 69814
loss: 0.987994909286499,grad_norm: 0.9999992982800741, iteration: 69815
loss: 0.9780572652816772,grad_norm: 0.9999991146407262, iteration: 69816
loss: 1.0295953750610352,grad_norm: 0.9999992797541499, iteration: 69817
loss: 1.0004079341888428,grad_norm: 0.999999114127518, iteration: 69818
loss: 1.0175226926803589,grad_norm: 0.9999992386411087, iteration: 69819
loss: 1.0013352632522583,grad_norm: 0.9999990088697684, iteration: 69820
loss: 0.9961440563201904,grad_norm: 0.999999287390512, iteration: 69821
loss: 1.0251469612121582,grad_norm: 0.8793634381115639, iteration: 69822
loss: 0.9789828658103943,grad_norm: 0.908421556893219, iteration: 69823
loss: 0.9850336313247681,grad_norm: 0.9455833479074546, iteration: 69824
loss: 0.9667222499847412,grad_norm: 0.999999132528523, iteration: 69825
loss: 0.9846706390380859,grad_norm: 0.999999329096674, iteration: 69826
loss: 1.0675185918807983,grad_norm: 0.9999992218651442, iteration: 69827
loss: 0.9910302758216858,grad_norm: 0.9722839529721494, iteration: 69828
loss: 1.0071985721588135,grad_norm: 0.9999990358254834, iteration: 69829
loss: 0.9710246324539185,grad_norm: 0.8813609518789103, iteration: 69830
loss: 1.0059252977371216,grad_norm: 0.9907366690705841, iteration: 69831
loss: 1.0033153295516968,grad_norm: 0.8912357707451843, iteration: 69832
loss: 1.0093668699264526,grad_norm: 0.958426623669948, iteration: 69833
loss: 0.9841361045837402,grad_norm: 0.9031427917291344, iteration: 69834
loss: 1.0090460777282715,grad_norm: 0.955018806344926, iteration: 69835
loss: 0.9983104467391968,grad_norm: 0.9592465319375153, iteration: 69836
loss: 0.9864537715911865,grad_norm: 0.8113186951143271, iteration: 69837
loss: 1.014241337776184,grad_norm: 0.9999991935384753, iteration: 69838
loss: 1.0180732011795044,grad_norm: 0.9675450839945842, iteration: 69839
loss: 0.9999663233757019,grad_norm: 0.9213066613406551, iteration: 69840
loss: 1.0163933038711548,grad_norm: 0.8898846564756896, iteration: 69841
loss: 0.9864326119422913,grad_norm: 0.9190928583510237, iteration: 69842
loss: 0.9964526891708374,grad_norm: 0.9999989009108254, iteration: 69843
loss: 0.9889041185379028,grad_norm: 0.9474177934644799, iteration: 69844
loss: 1.0088216066360474,grad_norm: 0.9999994195269187, iteration: 69845
loss: 1.0040522813796997,grad_norm: 0.999999174263551, iteration: 69846
loss: 0.9996882677078247,grad_norm: 0.9999991839382929, iteration: 69847
loss: 1.0006499290466309,grad_norm: 0.9999991489251129, iteration: 69848
loss: 1.0152868032455444,grad_norm: 0.8240647666809606, iteration: 69849
loss: 0.9762493371963501,grad_norm: 0.9999989726655186, iteration: 69850
loss: 0.9885903000831604,grad_norm: 0.8660134222165231, iteration: 69851
loss: 1.0362941026687622,grad_norm: 0.9999990659317455, iteration: 69852
loss: 0.9830672144889832,grad_norm: 0.9999989979182795, iteration: 69853
loss: 0.9845525026321411,grad_norm: 0.9700539314815465, iteration: 69854
loss: 1.0362889766693115,grad_norm: 0.9232550766920875, iteration: 69855
loss: 1.0042970180511475,grad_norm: 0.9978907873294817, iteration: 69856
loss: 0.9971272945404053,grad_norm: 0.9999989992313089, iteration: 69857
loss: 1.014483094215393,grad_norm: 0.9999992932843832, iteration: 69858
loss: 0.9978642463684082,grad_norm: 0.9768075948297839, iteration: 69859
loss: 0.9948028326034546,grad_norm: 0.9999990683878675, iteration: 69860
loss: 0.9692965149879456,grad_norm: 0.9479384067810459, iteration: 69861
loss: 1.039467215538025,grad_norm: 0.9999996154005991, iteration: 69862
loss: 0.9593725800514221,grad_norm: 0.9999992477690431, iteration: 69863
loss: 0.9397170543670654,grad_norm: 0.9999993131183802, iteration: 69864
loss: 1.0147172212600708,grad_norm: 0.9604900428490593, iteration: 69865
loss: 1.011142373085022,grad_norm: 0.8845645172969452, iteration: 69866
loss: 1.0142619609832764,grad_norm: 0.9999990690047394, iteration: 69867
loss: 0.9931882619857788,grad_norm: 0.9524165011582395, iteration: 69868
loss: 0.9942070841789246,grad_norm: 0.9999990526683156, iteration: 69869
loss: 1.0374997854232788,grad_norm: 0.9958397688948428, iteration: 69870
loss: 0.9783202409744263,grad_norm: 0.9931756221770567, iteration: 69871
loss: 0.9897116422653198,grad_norm: 0.9999993165275083, iteration: 69872
loss: 1.0325912237167358,grad_norm: 0.9840346621258407, iteration: 69873
loss: 0.9736499190330505,grad_norm: 0.9999990468766099, iteration: 69874
loss: 1.0059114694595337,grad_norm: 0.9999990520842159, iteration: 69875
loss: 0.9769688248634338,grad_norm: 0.9999991474870359, iteration: 69876
loss: 0.9989171028137207,grad_norm: 0.9999991883488306, iteration: 69877
loss: 1.029435634613037,grad_norm: 0.9976209475228648, iteration: 69878
loss: 1.0281662940979004,grad_norm: 0.999999793057802, iteration: 69879
loss: 1.0082424879074097,grad_norm: 0.9999994790819178, iteration: 69880
loss: 1.0154547691345215,grad_norm: 0.9999991424186037, iteration: 69881
loss: 1.0245451927185059,grad_norm: 0.9812914909467491, iteration: 69882
loss: 1.0504930019378662,grad_norm: 0.9999991665422454, iteration: 69883
loss: 0.998835563659668,grad_norm: 0.999999103599639, iteration: 69884
loss: 0.9857028126716614,grad_norm: 0.9999990759801918, iteration: 69885
loss: 1.0274925231933594,grad_norm: 0.9982890289401238, iteration: 69886
loss: 0.9848337173461914,grad_norm: 0.8916230732555156, iteration: 69887
loss: 1.0108801126480103,grad_norm: 0.9999991711642877, iteration: 69888
loss: 0.9925679564476013,grad_norm: 0.9999992194755647, iteration: 69889
loss: 0.9695580005645752,grad_norm: 0.9111919496242613, iteration: 69890
loss: 0.9993324875831604,grad_norm: 0.9553763895675568, iteration: 69891
loss: 1.0268710851669312,grad_norm: 0.9999991230987484, iteration: 69892
loss: 1.0013219118118286,grad_norm: 0.8404702560821974, iteration: 69893
loss: 1.0103435516357422,grad_norm: 0.999999140655758, iteration: 69894
loss: 1.0179152488708496,grad_norm: 0.9391815153501367, iteration: 69895
loss: 1.0077210664749146,grad_norm: 0.9616155640695325, iteration: 69896
loss: 0.9819715023040771,grad_norm: 0.9999991783301196, iteration: 69897
loss: 1.028298258781433,grad_norm: 0.9999991981772891, iteration: 69898
loss: 0.9861661195755005,grad_norm: 0.8250069480250986, iteration: 69899
loss: 1.0242713689804077,grad_norm: 0.9999991442414676, iteration: 69900
loss: 1.017419457435608,grad_norm: 0.9999989345700162, iteration: 69901
loss: 1.0143271684646606,grad_norm: 0.9999992629814367, iteration: 69902
loss: 1.0115976333618164,grad_norm: 0.997504793734297, iteration: 69903
loss: 1.0225759744644165,grad_norm: 0.9999991917249114, iteration: 69904
loss: 0.9822338223457336,grad_norm: 0.9999991936777656, iteration: 69905
loss: 1.000385046005249,grad_norm: 0.9679950216024036, iteration: 69906
loss: 0.9456404447555542,grad_norm: 0.9774275556829152, iteration: 69907
loss: 1.0445592403411865,grad_norm: 0.9999998760673231, iteration: 69908
loss: 1.0274789333343506,grad_norm: 0.9999991005443212, iteration: 69909
loss: 1.0711495876312256,grad_norm: 0.987067544027372, iteration: 69910
loss: 0.959852397441864,grad_norm: 0.9567877312993899, iteration: 69911
loss: 1.0009129047393799,grad_norm: 0.9999989652094354, iteration: 69912
loss: 0.9660352468490601,grad_norm: 0.8456975901998742, iteration: 69913
loss: 0.9577804207801819,grad_norm: 0.9833497777410006, iteration: 69914
loss: 0.9954631924629211,grad_norm: 0.9555310628596356, iteration: 69915
loss: 0.9946223497390747,grad_norm: 0.9999991717593089, iteration: 69916
loss: 1.0108891725540161,grad_norm: 0.9667623440420898, iteration: 69917
loss: 0.9704636931419373,grad_norm: 0.9999997773198638, iteration: 69918
loss: 0.9548221230506897,grad_norm: 0.9502723540381265, iteration: 69919
loss: 1.0243945121765137,grad_norm: 0.8893558074515845, iteration: 69920
loss: 1.0058516263961792,grad_norm: 0.9260082573595362, iteration: 69921
loss: 0.9789842367172241,grad_norm: 0.9999989377685219, iteration: 69922
loss: 0.9937443733215332,grad_norm: 0.9999992955559668, iteration: 69923
loss: 0.9629389047622681,grad_norm: 0.9999992072703946, iteration: 69924
loss: 1.0280441045761108,grad_norm: 0.9909001411275443, iteration: 69925
loss: 0.9896771907806396,grad_norm: 0.9587817871294978, iteration: 69926
loss: 1.0187314748764038,grad_norm: 0.9958139789149613, iteration: 69927
loss: 1.026293158531189,grad_norm: 0.7872662216238269, iteration: 69928
loss: 0.9791866540908813,grad_norm: 0.8998027613823187, iteration: 69929
loss: 0.9808026552200317,grad_norm: 0.9326280549601905, iteration: 69930
loss: 1.045655369758606,grad_norm: 0.9999991508572049, iteration: 69931
loss: 0.9668118953704834,grad_norm: 0.9892361368685094, iteration: 69932
loss: 1.0427017211914062,grad_norm: 0.9999991367214708, iteration: 69933
loss: 0.9959493279457092,grad_norm: 0.9999992314244669, iteration: 69934
loss: 1.0120558738708496,grad_norm: 0.9999991581632487, iteration: 69935
loss: 1.0191125869750977,grad_norm: 0.8570286596444673, iteration: 69936
loss: 1.021630883216858,grad_norm: 0.9999991761695562, iteration: 69937
loss: 0.9824158549308777,grad_norm: 0.9999992449300696, iteration: 69938
loss: 1.0124256610870361,grad_norm: 0.9999990575004005, iteration: 69939
loss: 0.9989449381828308,grad_norm: 0.9999991265321453, iteration: 69940
loss: 1.0030144453048706,grad_norm: 0.9999992752485848, iteration: 69941
loss: 1.030694842338562,grad_norm: 0.9999994389983575, iteration: 69942
loss: 0.9973615407943726,grad_norm: 0.865734292920491, iteration: 69943
loss: 0.991844892501831,grad_norm: 0.9999993025102115, iteration: 69944
loss: 0.9571107625961304,grad_norm: 0.999999229148119, iteration: 69945
loss: 0.9993389248847961,grad_norm: 0.9919084591738075, iteration: 69946
loss: 0.9792413115501404,grad_norm: 0.999999155117253, iteration: 69947
loss: 0.9705440998077393,grad_norm: 0.9766885143134338, iteration: 69948
loss: 1.0155147314071655,grad_norm: 0.999999139712358, iteration: 69949
loss: 1.0155718326568604,grad_norm: 0.9999993076315614, iteration: 69950
loss: 1.0148017406463623,grad_norm: 0.9999991253778024, iteration: 69951
loss: 1.0356851816177368,grad_norm: 0.9568635784764793, iteration: 69952
loss: 1.0194445848464966,grad_norm: 0.9999992634216371, iteration: 69953
loss: 0.9685986042022705,grad_norm: 0.8713505076927507, iteration: 69954
loss: 1.0055893659591675,grad_norm: 0.999999088956371, iteration: 69955
loss: 1.0309381484985352,grad_norm: 0.9442767428029362, iteration: 69956
loss: 1.0274888277053833,grad_norm: 0.9999993184885426, iteration: 69957
loss: 0.9609275460243225,grad_norm: 0.9999990623111391, iteration: 69958
loss: 0.9878940582275391,grad_norm: 0.9999991243701783, iteration: 69959
loss: 0.9720166325569153,grad_norm: 0.9999991952529534, iteration: 69960
loss: 0.9832616448402405,grad_norm: 0.9906604289760054, iteration: 69961
loss: 0.9930042624473572,grad_norm: 0.9999990276160631, iteration: 69962
loss: 0.9975682497024536,grad_norm: 0.9294269654352751, iteration: 69963
loss: 1.040469765663147,grad_norm: 0.9999991299979412, iteration: 69964
loss: 1.0175918340682983,grad_norm: 0.8761302742019583, iteration: 69965
loss: 1.0110305547714233,grad_norm: 0.9999996495019994, iteration: 69966
loss: 1.0175780057907104,grad_norm: 0.9999991094422108, iteration: 69967
loss: 0.9945294857025146,grad_norm: 0.999999153123337, iteration: 69968
loss: 1.0042283535003662,grad_norm: 0.8831423528377699, iteration: 69969
loss: 0.985005795955658,grad_norm: 0.999999126069871, iteration: 69970
loss: 0.9798269271850586,grad_norm: 0.999999195778133, iteration: 69971
loss: 1.004037618637085,grad_norm: 0.9999991162886523, iteration: 69972
loss: 0.9431102275848389,grad_norm: 0.994020518703482, iteration: 69973
loss: 1.0158277750015259,grad_norm: 0.972007868210801, iteration: 69974
loss: 0.9882058501243591,grad_norm: 0.9999989665379395, iteration: 69975
loss: 0.9630574584007263,grad_norm: 0.9999989684669395, iteration: 69976
loss: 1.0208284854888916,grad_norm: 0.9999992209668248, iteration: 69977
loss: 1.01268470287323,grad_norm: 0.9999991538659094, iteration: 69978
loss: 0.970563530921936,grad_norm: 0.9999991992053104, iteration: 69979
loss: 0.9988682866096497,grad_norm: 0.9531256460455133, iteration: 69980
loss: 1.0011316537857056,grad_norm: 0.9999991873040218, iteration: 69981
loss: 0.9639372825622559,grad_norm: 0.9999990758642591, iteration: 69982
loss: 0.9955631494522095,grad_norm: 0.9999992706151113, iteration: 69983
loss: 1.009100317955017,grad_norm: 0.999999122107357, iteration: 69984
loss: 1.0340309143066406,grad_norm: 0.993016845538044, iteration: 69985
loss: 0.9870293140411377,grad_norm: 0.9999993278364983, iteration: 69986
loss: 0.9975200891494751,grad_norm: 0.9301299608383379, iteration: 69987
loss: 0.9781444668769836,grad_norm: 0.9999990690557825, iteration: 69988
loss: 1.0006417036056519,grad_norm: 0.999999201637543, iteration: 69989
loss: 0.9742060303688049,grad_norm: 0.9999990505237895, iteration: 69990
loss: 1.004884123802185,grad_norm: 0.8827921366064265, iteration: 69991
loss: 1.0064103603363037,grad_norm: 0.9437427695403633, iteration: 69992
loss: 1.030637502670288,grad_norm: 0.9159965054654213, iteration: 69993
loss: 0.9455257654190063,grad_norm: 0.9999990931629741, iteration: 69994
loss: 0.9808748960494995,grad_norm: 0.9833178614982652, iteration: 69995
loss: 1.0218236446380615,grad_norm: 0.9999991594724328, iteration: 69996
loss: 0.9504004716873169,grad_norm: 0.9999993380183689, iteration: 69997
loss: 1.008181095123291,grad_norm: 0.8923673942602176, iteration: 69998
loss: 0.9889421463012695,grad_norm: 0.9999993205929976, iteration: 69999
loss: 1.0031569004058838,grad_norm: 0.9694512025486407, iteration: 70000
Evaluating at step 70000
{'val': 0.9966400116682053, 'test': 2.4791464706579336}
loss: 0.9779475331306458,grad_norm: 0.9600286345003601, iteration: 70001
loss: 1.0050888061523438,grad_norm: 0.9999993706283422, iteration: 70002
loss: 1.0451767444610596,grad_norm: 0.9999989795528895, iteration: 70003
loss: 0.9957019686698914,grad_norm: 0.9553790946836854, iteration: 70004
loss: 1.0476199388504028,grad_norm: 0.9999997070717808, iteration: 70005
loss: 1.073730707168579,grad_norm: 0.9999992814519965, iteration: 70006
loss: 0.9739956259727478,grad_norm: 0.9999992304975212, iteration: 70007
loss: 0.9832268953323364,grad_norm: 0.9999992022379414, iteration: 70008
loss: 1.020134449005127,grad_norm: 0.9999991348341258, iteration: 70009
loss: 1.0173956155776978,grad_norm: 0.9999990973517282, iteration: 70010
loss: 0.9769660234451294,grad_norm: 0.965214164658066, iteration: 70011
loss: 0.9771419763565063,grad_norm: 0.93108108550094, iteration: 70012
loss: 1.0185133218765259,grad_norm: 0.9620428055999674, iteration: 70013
loss: 0.9998118281364441,grad_norm: 0.9999990755021456, iteration: 70014
loss: 0.9687228202819824,grad_norm: 0.9602522271582744, iteration: 70015
loss: 1.0097962617874146,grad_norm: 0.9999990024488603, iteration: 70016
loss: 0.9885702729225159,grad_norm: 0.9999990423961083, iteration: 70017
loss: 1.0069330930709839,grad_norm: 0.9999990672265603, iteration: 70018
loss: 0.9984811544418335,grad_norm: 0.9999991048810628, iteration: 70019
loss: 0.9872527122497559,grad_norm: 0.9394553558660391, iteration: 70020
loss: 0.9989796876907349,grad_norm: 0.8359567315013522, iteration: 70021
loss: 0.9766197204589844,grad_norm: 0.8139854228131596, iteration: 70022
loss: 0.9596176147460938,grad_norm: 0.9999993569096134, iteration: 70023
loss: 0.9986681342124939,grad_norm: 0.8821493583632247, iteration: 70024
loss: 1.0174356698989868,grad_norm: 0.9999997399981935, iteration: 70025
loss: 0.9964519739151001,grad_norm: 0.9550742453503351, iteration: 70026
loss: 1.0094084739685059,grad_norm: 0.9999989215598298, iteration: 70027
loss: 1.02459716796875,grad_norm: 0.9999990495150771, iteration: 70028
loss: 0.9434975385665894,grad_norm: 0.9669663961681345, iteration: 70029
loss: 0.9811801314353943,grad_norm: 0.9868557832845286, iteration: 70030
loss: 1.0647213459014893,grad_norm: 0.9999991066572168, iteration: 70031
loss: 1.0278362035751343,grad_norm: 0.9999992928825252, iteration: 70032
loss: 0.9905482530593872,grad_norm: 0.9999991567001337, iteration: 70033
loss: 0.9844474196434021,grad_norm: 0.9417409232668189, iteration: 70034
loss: 1.0250375270843506,grad_norm: 0.9999994533340318, iteration: 70035
loss: 0.992462694644928,grad_norm: 0.9999990005302237, iteration: 70036
loss: 0.9974373579025269,grad_norm: 0.9999992832094127, iteration: 70037
loss: 1.0084911584854126,grad_norm: 0.898286128223035, iteration: 70038
loss: 1.017526626586914,grad_norm: 0.9088968897142936, iteration: 70039
loss: 1.0026941299438477,grad_norm: 0.9999993995162334, iteration: 70040
loss: 1.0252338647842407,grad_norm: 0.9999996722252242, iteration: 70041
loss: 0.9897207021713257,grad_norm: 0.9269014865258194, iteration: 70042
loss: 1.0389858484268188,grad_norm: 0.9807021925610789, iteration: 70043
loss: 1.0384107828140259,grad_norm: 0.99999982518237, iteration: 70044
loss: 1.0164037942886353,grad_norm: 0.9999993648896107, iteration: 70045
loss: 0.9923216700553894,grad_norm: 0.9999989930336654, iteration: 70046
loss: 1.0106256008148193,grad_norm: 0.8398824783132134, iteration: 70047
loss: 0.9777035117149353,grad_norm: 0.9999992201111667, iteration: 70048
loss: 1.0040063858032227,grad_norm: 0.8642301923082162, iteration: 70049
loss: 0.9658519625663757,grad_norm: 0.9999990507992439, iteration: 70050
loss: 0.9925453662872314,grad_norm: 0.99999895962346, iteration: 70051
loss: 1.0408183336257935,grad_norm: 0.9487506084157862, iteration: 70052
loss: 1.009422779083252,grad_norm: 0.9999990841085297, iteration: 70053
loss: 0.9723566770553589,grad_norm: 0.9999992624625943, iteration: 70054
loss: 0.9782522320747375,grad_norm: 0.9999991309493996, iteration: 70055
loss: 1.0040431022644043,grad_norm: 0.9902332891657822, iteration: 70056
loss: 0.9852754473686218,grad_norm: 0.9999990439140283, iteration: 70057
loss: 1.0011181831359863,grad_norm: 0.8648254571870403, iteration: 70058
loss: 1.0132149457931519,grad_norm: 0.9999991896890819, iteration: 70059
loss: 0.9783617258071899,grad_norm: 0.9999991761005677, iteration: 70060
loss: 1.0575785636901855,grad_norm: 0.9999991470353025, iteration: 70061
loss: 1.0142626762390137,grad_norm: 0.9999990026374646, iteration: 70062
loss: 1.0076498985290527,grad_norm: 0.9169423916109041, iteration: 70063
loss: 1.0353190898895264,grad_norm: 0.9999991677731366, iteration: 70064
loss: 1.0436947345733643,grad_norm: 0.9999995734716692, iteration: 70065
loss: 1.0070520639419556,grad_norm: 0.9999998889198995, iteration: 70066
loss: 0.9817141890525818,grad_norm: 0.9999991165251763, iteration: 70067
loss: 0.9779062867164612,grad_norm: 0.8566471166139171, iteration: 70068
loss: 0.9906690716743469,grad_norm: 0.9410644086165463, iteration: 70069
loss: 0.9783421754837036,grad_norm: 0.9999991132910478, iteration: 70070
loss: 0.9787421822547913,grad_norm: 0.9989266269773486, iteration: 70071
loss: 1.0187844038009644,grad_norm: 0.970827615080209, iteration: 70072
loss: 0.9965378642082214,grad_norm: 0.8843270192858546, iteration: 70073
loss: 1.0102829933166504,grad_norm: 0.9814232132553705, iteration: 70074
loss: 1.025517463684082,grad_norm: 0.9999991089215708, iteration: 70075
loss: 1.0066139698028564,grad_norm: 0.9999990608228254, iteration: 70076
loss: 1.0541969537734985,grad_norm: 0.9751096062145694, iteration: 70077
loss: 0.9834902882575989,grad_norm: 0.9999989957112381, iteration: 70078
loss: 1.0001312494277954,grad_norm: 0.9177289328292467, iteration: 70079
loss: 1.0118516683578491,grad_norm: 0.9999995102957195, iteration: 70080
loss: 1.003021240234375,grad_norm: 0.9999991313926969, iteration: 70081
loss: 1.0166956186294556,grad_norm: 0.9999991372910546, iteration: 70082
loss: 1.048372507095337,grad_norm: 0.9999990848420155, iteration: 70083
loss: 0.9586329460144043,grad_norm: 0.9641812971390141, iteration: 70084
loss: 0.9967931509017944,grad_norm: 0.9999991458792905, iteration: 70085
loss: 0.9764590263366699,grad_norm: 0.9999992346902445, iteration: 70086
loss: 1.023645043373108,grad_norm: 0.9999994732733288, iteration: 70087
loss: 1.0149836540222168,grad_norm: 0.9999991917813325, iteration: 70088
loss: 1.0052552223205566,grad_norm: 0.8893563631564412, iteration: 70089
loss: 0.9885848164558411,grad_norm: 0.9310556754015891, iteration: 70090
loss: 1.014283537864685,grad_norm: 0.9748408121289636, iteration: 70091
loss: 1.0324140787124634,grad_norm: 0.999999277852651, iteration: 70092
loss: 1.0261772871017456,grad_norm: 0.817206254665703, iteration: 70093
loss: 1.0209068059921265,grad_norm: 0.9999991473391886, iteration: 70094
loss: 1.004082202911377,grad_norm: 0.9999991467907788, iteration: 70095
loss: 0.9804889559745789,grad_norm: 0.8538197035520079, iteration: 70096
loss: 1.0074446201324463,grad_norm: 0.9680406260015508, iteration: 70097
loss: 0.9757810831069946,grad_norm: 0.9892436254331529, iteration: 70098
loss: 0.9961990714073181,grad_norm: 0.9955739709378233, iteration: 70099
loss: 0.9864248037338257,grad_norm: 0.984326679402173, iteration: 70100
loss: 1.039798617362976,grad_norm: 0.9999989821548424, iteration: 70101
loss: 1.0372201204299927,grad_norm: 0.9999990864721156, iteration: 70102
loss: 0.9743728041648865,grad_norm: 0.9999990087122493, iteration: 70103
loss: 0.9905776977539062,grad_norm: 0.9999989986762297, iteration: 70104
loss: 1.0305012464523315,grad_norm: 0.9999997822648878, iteration: 70105
loss: 0.9981552362442017,grad_norm: 0.9135106773905131, iteration: 70106
loss: 0.9906839728355408,grad_norm: 0.9999989959697018, iteration: 70107
loss: 0.9852570295333862,grad_norm: 0.9840938750132916, iteration: 70108
loss: 1.0059481859207153,grad_norm: 0.9999990183409168, iteration: 70109
loss: 1.009377360343933,grad_norm: 0.999999177822158, iteration: 70110
loss: 1.0507657527923584,grad_norm: 0.9999990380227888, iteration: 70111
loss: 1.0266064405441284,grad_norm: 0.9572264185837112, iteration: 70112
loss: 0.9883278608322144,grad_norm: 0.8831471348883606, iteration: 70113
loss: 1.0223890542984009,grad_norm: 0.9999991534988533, iteration: 70114
loss: 1.0291086435317993,grad_norm: 0.9999990695851527, iteration: 70115
loss: 1.0120857954025269,grad_norm: 0.9652426263830433, iteration: 70116
loss: 1.0454187393188477,grad_norm: 0.999999122689053, iteration: 70117
loss: 1.0011875629425049,grad_norm: 0.9999989890370888, iteration: 70118
loss: 1.0152270793914795,grad_norm: 0.9999990911189969, iteration: 70119
loss: 0.9932472705841064,grad_norm: 0.9373246706711447, iteration: 70120
loss: 1.0640746355056763,grad_norm: 0.9999992104810613, iteration: 70121
loss: 0.9745784997940063,grad_norm: 0.9253816410503494, iteration: 70122
loss: 1.0143404006958008,grad_norm: 0.9999990515937053, iteration: 70123
loss: 1.0072965621948242,grad_norm: 0.8587120535935031, iteration: 70124
loss: 1.0321327447891235,grad_norm: 0.9531169347717039, iteration: 70125
loss: 0.9879087209701538,grad_norm: 0.8617298205649144, iteration: 70126
loss: 1.0020450353622437,grad_norm: 0.9999990247978143, iteration: 70127
loss: 1.0293980836868286,grad_norm: 0.9999990823153962, iteration: 70128
loss: 0.9931051135063171,grad_norm: 0.9999991572176004, iteration: 70129
loss: 0.989531934261322,grad_norm: 0.9999993016735207, iteration: 70130
loss: 0.9842055439949036,grad_norm: 0.9999991679974398, iteration: 70131
loss: 0.9914047122001648,grad_norm: 0.9999991144270843, iteration: 70132
loss: 1.0247700214385986,grad_norm: 0.999999128214218, iteration: 70133
loss: 1.0540722608566284,grad_norm: 0.9999992301936335, iteration: 70134
loss: 1.0029716491699219,grad_norm: 0.947484445715608, iteration: 70135
loss: 0.9940450191497803,grad_norm: 0.9999992259911912, iteration: 70136
loss: 1.0093716382980347,grad_norm: 0.9999989494558097, iteration: 70137
loss: 1.013167142868042,grad_norm: 0.9999991154143812, iteration: 70138
loss: 1.0166634321212769,grad_norm: 0.999998965934456, iteration: 70139
loss: 1.006645917892456,grad_norm: 0.9999993817384029, iteration: 70140
loss: 0.9913604259490967,grad_norm: 0.9999992281134322, iteration: 70141
loss: 0.9893073439598083,grad_norm: 0.9999990659502225, iteration: 70142
loss: 1.0289491415023804,grad_norm: 0.999999019624758, iteration: 70143
loss: 1.0014653205871582,grad_norm: 0.9999992244983263, iteration: 70144
loss: 1.0325345993041992,grad_norm: 0.9999991855620295, iteration: 70145
loss: 0.9904584884643555,grad_norm: 0.9999991647435624, iteration: 70146
loss: 0.9556978940963745,grad_norm: 0.9999991419593733, iteration: 70147
loss: 1.0094685554504395,grad_norm: 0.9999989906331778, iteration: 70148
loss: 1.0071463584899902,grad_norm: 0.9999992204504056, iteration: 70149
loss: 0.9824787378311157,grad_norm: 0.8329824498906404, iteration: 70150
loss: 0.9890425205230713,grad_norm: 0.9999991320921083, iteration: 70151
loss: 1.0642836093902588,grad_norm: 0.999999172429938, iteration: 70152
loss: 0.9790084958076477,grad_norm: 0.9999990242074488, iteration: 70153
loss: 0.9990374445915222,grad_norm: 0.9445657510572673, iteration: 70154
loss: 0.9952409863471985,grad_norm: 0.9762734793023806, iteration: 70155
loss: 1.031167984008789,grad_norm: 0.9406851200841486, iteration: 70156
loss: 0.9622353315353394,grad_norm: 0.9999992352900866, iteration: 70157
loss: 1.0387721061706543,grad_norm: 0.9999997514407343, iteration: 70158
loss: 1.0064197778701782,grad_norm: 0.9999990622330487, iteration: 70159
loss: 1.0094044208526611,grad_norm: 0.9999991101954208, iteration: 70160
loss: 1.0202934741973877,grad_norm: 0.9999992101289982, iteration: 70161
loss: 0.9606835246086121,grad_norm: 0.9999991481247942, iteration: 70162
loss: 1.0128830671310425,grad_norm: 0.9387860719875197, iteration: 70163
loss: 1.0035955905914307,grad_norm: 0.9787166218835885, iteration: 70164
loss: 0.9620122909545898,grad_norm: 0.9981904412732197, iteration: 70165
loss: 0.9786792993545532,grad_norm: 0.9977369050190006, iteration: 70166
loss: 1.029137372970581,grad_norm: 0.9995087099533617, iteration: 70167
loss: 0.9757154583930969,grad_norm: 0.99999914872994, iteration: 70168
loss: 1.0203773975372314,grad_norm: 0.9999991610446255, iteration: 70169
loss: 0.9687864184379578,grad_norm: 0.999999208151778, iteration: 70170
loss: 0.9712966680526733,grad_norm: 0.9999991201701156, iteration: 70171
loss: 0.9657298922538757,grad_norm: 0.9999993250542378, iteration: 70172
loss: 0.997782826423645,grad_norm: 0.9999991918924397, iteration: 70173
loss: 0.9727523326873779,grad_norm: 0.999999211218065, iteration: 70174
loss: 0.9903464317321777,grad_norm: 0.9999991319867415, iteration: 70175
loss: 0.9618358612060547,grad_norm: 0.8556076807805106, iteration: 70176
loss: 0.9787286520004272,grad_norm: 0.9999992312782787, iteration: 70177
loss: 0.9776321649551392,grad_norm: 0.8406006089153957, iteration: 70178
loss: 1.0014476776123047,grad_norm: 0.9999990510339298, iteration: 70179
loss: 0.989957869052887,grad_norm: 0.8538929477845476, iteration: 70180
loss: 1.0032076835632324,grad_norm: 0.9999991254734302, iteration: 70181
loss: 1.0228897333145142,grad_norm: 0.9999991928071131, iteration: 70182
loss: 0.9903481602668762,grad_norm: 0.9999992115408536, iteration: 70183
loss: 1.0025606155395508,grad_norm: 0.9999990831286095, iteration: 70184
loss: 0.9318338632583618,grad_norm: 0.9999990859898716, iteration: 70185
loss: 1.0149279832839966,grad_norm: 0.9999990451612238, iteration: 70186
loss: 0.9756681323051453,grad_norm: 0.999999084516658, iteration: 70187
loss: 0.9989098906517029,grad_norm: 0.9891525318655395, iteration: 70188
loss: 0.989582359790802,grad_norm: 0.9999991403482414, iteration: 70189
loss: 1.0314719676971436,grad_norm: 0.9300123450582719, iteration: 70190
loss: 0.9431302547454834,grad_norm: 0.9999989931296004, iteration: 70191
loss: 1.0106816291809082,grad_norm: 0.9297608637592097, iteration: 70192
loss: 0.9806647300720215,grad_norm: 0.8576585637032554, iteration: 70193
loss: 0.9689258933067322,grad_norm: 0.9999991702968466, iteration: 70194
loss: 1.002619743347168,grad_norm: 0.9999990553680576, iteration: 70195
loss: 1.0161422491073608,grad_norm: 0.9999991030597438, iteration: 70196
loss: 1.0127763748168945,grad_norm: 0.8387300767793618, iteration: 70197
loss: 0.965343177318573,grad_norm: 0.9804725141231921, iteration: 70198
loss: 1.001839518547058,grad_norm: 0.942005999632319, iteration: 70199
loss: 1.0583183765411377,grad_norm: 0.9935874977728046, iteration: 70200
loss: 0.9791064858436584,grad_norm: 0.9846747767770897, iteration: 70201
loss: 1.0083024501800537,grad_norm: 0.9999992037169103, iteration: 70202
loss: 0.9999591112136841,grad_norm: 0.9885617934325354, iteration: 70203
loss: 0.9995670914649963,grad_norm: 0.9999990698204714, iteration: 70204
loss: 0.9681822657585144,grad_norm: 0.9999991265323966, iteration: 70205
loss: 0.9822523593902588,grad_norm: 0.9375662196732706, iteration: 70206
loss: 0.9589442014694214,grad_norm: 0.9932080287964744, iteration: 70207
loss: 1.0200562477111816,grad_norm: 0.9999990819081057, iteration: 70208
loss: 1.0199862718582153,grad_norm: 0.9999989130989653, iteration: 70209
loss: 0.9921634197235107,grad_norm: 0.9999990640201997, iteration: 70210
loss: 1.0212143659591675,grad_norm: 0.9999991868687157, iteration: 70211
loss: 1.0046111345291138,grad_norm: 0.8313154899750083, iteration: 70212
loss: 1.0213050842285156,grad_norm: 0.999999007325787, iteration: 70213
loss: 0.9764868021011353,grad_norm: 0.999999156232248, iteration: 70214
loss: 0.9752517342567444,grad_norm: 0.9999991643963851, iteration: 70215
loss: 1.0093637704849243,grad_norm: 0.94770192110539, iteration: 70216
loss: 0.9735090732574463,grad_norm: 0.9999990416410223, iteration: 70217
loss: 1.0228887796401978,grad_norm: 0.9999990178982996, iteration: 70218
loss: 0.9926284551620483,grad_norm: 0.9999990420426179, iteration: 70219
loss: 1.001396656036377,grad_norm: 0.8842515048515833, iteration: 70220
loss: 1.026976466178894,grad_norm: 0.9209753340961773, iteration: 70221
loss: 1.0465081930160522,grad_norm: 0.9768693710383709, iteration: 70222
loss: 1.0219242572784424,grad_norm: 0.9848984884167714, iteration: 70223
loss: 1.0006314516067505,grad_norm: 0.998854242631575, iteration: 70224
loss: 1.0457453727722168,grad_norm: 0.9999992215520095, iteration: 70225
loss: 0.9842954874038696,grad_norm: 0.999999104383851, iteration: 70226
loss: 1.025132179260254,grad_norm: 0.955184927675216, iteration: 70227
loss: 0.9801462888717651,grad_norm: 0.999999079382163, iteration: 70228
loss: 0.9897706508636475,grad_norm: 0.8019160952594355, iteration: 70229
loss: 1.016243815422058,grad_norm: 0.9028001192863885, iteration: 70230
loss: 1.0388504266738892,grad_norm: 0.9999990043067618, iteration: 70231
loss: 0.9898248314857483,grad_norm: 0.9999996592159073, iteration: 70232
loss: 1.009150743484497,grad_norm: 0.9514228888527801, iteration: 70233
loss: 1.0015126466751099,grad_norm: 0.9999993410673401, iteration: 70234
loss: 1.0188064575195312,grad_norm: 0.9317568608732957, iteration: 70235
loss: 0.9988417029380798,grad_norm: 0.8989755578135936, iteration: 70236
loss: 0.9806212782859802,grad_norm: 0.9999992768590981, iteration: 70237
loss: 0.9594465494155884,grad_norm: 0.9999990182229312, iteration: 70238
loss: 1.0069271326065063,grad_norm: 0.999999113525203, iteration: 70239
loss: 0.9726120233535767,grad_norm: 0.9999991438050362, iteration: 70240
loss: 0.9766852855682373,grad_norm: 0.9999991888883357, iteration: 70241
loss: 1.0075799226760864,grad_norm: 0.9999991135048831, iteration: 70242
loss: 1.010839819908142,grad_norm: 0.9999990311433445, iteration: 70243
loss: 1.0140300989151,grad_norm: 0.9999991675066436, iteration: 70244
loss: 1.0010097026824951,grad_norm: 0.9999990443343467, iteration: 70245
loss: 1.0118293762207031,grad_norm: 0.9372197988888742, iteration: 70246
loss: 1.001509428024292,grad_norm: 0.9986024642513072, iteration: 70247
loss: 0.9871282577514648,grad_norm: 0.9999990583764474, iteration: 70248
loss: 1.0208284854888916,grad_norm: 0.9329040024762897, iteration: 70249
loss: 1.0092792510986328,grad_norm: 0.9999991920784188, iteration: 70250
loss: 1.0064116716384888,grad_norm: 0.9999993483242762, iteration: 70251
loss: 1.005859613418579,grad_norm: 0.9999991688339457, iteration: 70252
loss: 0.9856218695640564,grad_norm: 0.9999991238134035, iteration: 70253
loss: 0.9676905870437622,grad_norm: 0.9253363505846111, iteration: 70254
loss: 0.9829994440078735,grad_norm: 0.8692885451594452, iteration: 70255
loss: 0.9942450523376465,grad_norm: 0.9153187175310677, iteration: 70256
loss: 0.9876811504364014,grad_norm: 0.9999991697303752, iteration: 70257
loss: 0.9926450848579407,grad_norm: 0.9270478090070797, iteration: 70258
loss: 0.9882999062538147,grad_norm: 0.999999048368454, iteration: 70259
loss: 1.0130221843719482,grad_norm: 0.9999990802877513, iteration: 70260
loss: 1.021165370941162,grad_norm: 0.9920835274176918, iteration: 70261
loss: 1.0475002527236938,grad_norm: 0.9999990870390749, iteration: 70262
loss: 0.9913543462753296,grad_norm: 0.9999991047537746, iteration: 70263
loss: 1.0115718841552734,grad_norm: 0.9999990643417327, iteration: 70264
loss: 1.0441700220108032,grad_norm: 0.9329129784091998, iteration: 70265
loss: 1.009431004524231,grad_norm: 0.9999993878121997, iteration: 70266
loss: 0.992707371711731,grad_norm: 0.9999991221508486, iteration: 70267
loss: 0.9724639654159546,grad_norm: 0.8791194902194206, iteration: 70268
loss: 0.9842890501022339,grad_norm: 0.8890061919546961, iteration: 70269
loss: 0.9949133396148682,grad_norm: 0.8299723232061675, iteration: 70270
loss: 0.983959972858429,grad_norm: 0.8595268422053922, iteration: 70271
loss: 1.0384047031402588,grad_norm: 0.9999994834437745, iteration: 70272
loss: 0.9799447655677795,grad_norm: 0.9999993814871053, iteration: 70273
loss: 0.9778901934623718,grad_norm: 0.9999991048969334, iteration: 70274
loss: 0.9817764163017273,grad_norm: 0.999999133440175, iteration: 70275
loss: 0.9800958633422852,grad_norm: 0.9999991018806821, iteration: 70276
loss: 1.02304208278656,grad_norm: 0.9999991497344374, iteration: 70277
loss: 0.9910531640052795,grad_norm: 0.9846093967359366, iteration: 70278
loss: 0.986809253692627,grad_norm: 0.9680589056119536, iteration: 70279
loss: 1.0252195596694946,grad_norm: 0.9379897701826075, iteration: 70280
loss: 0.9902059435844421,grad_norm: 0.9999991080586307, iteration: 70281
loss: 0.98664790391922,grad_norm: 0.9999991524654126, iteration: 70282
loss: 0.9796468615531921,grad_norm: 0.8714898252971665, iteration: 70283
loss: 1.0382457971572876,grad_norm: 0.9999991310506744, iteration: 70284
loss: 0.977427065372467,grad_norm: 0.9479578050607456, iteration: 70285
loss: 1.0088609457015991,grad_norm: 0.9007887714488696, iteration: 70286
loss: 0.996882975101471,grad_norm: 0.8825900796522014, iteration: 70287
loss: 1.0044499635696411,grad_norm: 0.9999991316521986, iteration: 70288
loss: 0.9957402348518372,grad_norm: 0.9188068386646007, iteration: 70289
loss: 0.9988110661506653,grad_norm: 0.7352694759398458, iteration: 70290
loss: 1.0191497802734375,grad_norm: 0.9999990225862138, iteration: 70291
loss: 1.0002527236938477,grad_norm: 0.9999992002309385, iteration: 70292
loss: 1.0187363624572754,grad_norm: 0.9984548913249965, iteration: 70293
loss: 0.9703457951545715,grad_norm: 0.9999990546773811, iteration: 70294
loss: 0.9936830997467041,grad_norm: 0.9999990597988002, iteration: 70295
loss: 0.964396595954895,grad_norm: 0.9999991476937558, iteration: 70296
loss: 0.9691107273101807,grad_norm: 0.9999991957215207, iteration: 70297
loss: 0.9922096729278564,grad_norm: 0.9748352444694387, iteration: 70298
loss: 1.009030818939209,grad_norm: 0.9968674646748642, iteration: 70299
loss: 1.0085865259170532,grad_norm: 0.873752621212025, iteration: 70300
loss: 1.0267131328582764,grad_norm: 0.9999990841438114, iteration: 70301
loss: 1.0419566631317139,grad_norm: 0.9999990157081854, iteration: 70302
loss: 1.0031044483184814,grad_norm: 0.9803729816055554, iteration: 70303
loss: 0.97533118724823,grad_norm: 0.9999991068509761, iteration: 70304
loss: 1.00751531124115,grad_norm: 0.9999990657131367, iteration: 70305
loss: 0.9677000641822815,grad_norm: 0.9173391115423056, iteration: 70306
loss: 0.9971828460693359,grad_norm: 0.999999129583764, iteration: 70307
loss: 0.9965568780899048,grad_norm: 0.9999992070901624, iteration: 70308
loss: 0.9988311529159546,grad_norm: 0.9609682567894594, iteration: 70309
loss: 1.0272105932235718,grad_norm: 0.9727460375442777, iteration: 70310
loss: 0.9933996200561523,grad_norm: 0.9999990064857903, iteration: 70311
loss: 1.0015945434570312,grad_norm: 0.999999022388334, iteration: 70312
loss: 1.0035688877105713,grad_norm: 0.9999990918623117, iteration: 70313
loss: 0.9769831299781799,grad_norm: 0.9999991660545722, iteration: 70314
loss: 0.9931638240814209,grad_norm: 0.9154606110151959, iteration: 70315
loss: 1.0329455137252808,grad_norm: 0.9999997360054088, iteration: 70316
loss: 1.0214612483978271,grad_norm: 0.9999992741730342, iteration: 70317
loss: 1.02385413646698,grad_norm: 0.9999993439872359, iteration: 70318
loss: 0.9899413585662842,grad_norm: 0.9999991065628667, iteration: 70319
loss: 1.0039863586425781,grad_norm: 0.9999991312351684, iteration: 70320
loss: 0.9670383334159851,grad_norm: 0.9999991880265121, iteration: 70321
loss: 1.014196753501892,grad_norm: 0.9999989952777419, iteration: 70322
loss: 1.0220084190368652,grad_norm: 0.9999991486625345, iteration: 70323
loss: 1.0055758953094482,grad_norm: 0.9999993797281633, iteration: 70324
loss: 1.0064607858657837,grad_norm: 0.918919130688711, iteration: 70325
loss: 1.023632526397705,grad_norm: 0.9999991880067329, iteration: 70326
loss: 0.9982402920722961,grad_norm: 0.9999991701703236, iteration: 70327
loss: 1.0001193284988403,grad_norm: 0.9999990844010287, iteration: 70328
loss: 0.973201334476471,grad_norm: 0.8704427565270447, iteration: 70329
loss: 1.0250862836837769,grad_norm: 0.918586697242658, iteration: 70330
loss: 0.950559139251709,grad_norm: 0.9352909865534654, iteration: 70331
loss: 1.0235315561294556,grad_norm: 0.9901364077104892, iteration: 70332
loss: 1.0014199018478394,grad_norm: 0.9999990620172358, iteration: 70333
loss: 1.0174638032913208,grad_norm: 0.9117620673986588, iteration: 70334
loss: 0.9455922245979309,grad_norm: 0.9999990696113283, iteration: 70335
loss: 0.9760954976081848,grad_norm: 0.9999992468183542, iteration: 70336
loss: 1.0038615465164185,grad_norm: 0.9999991991827666, iteration: 70337
loss: 1.014542818069458,grad_norm: 0.9638192030955317, iteration: 70338
loss: 1.0106514692306519,grad_norm: 0.9999991752515401, iteration: 70339
loss: 0.9881170988082886,grad_norm: 0.999999029597283, iteration: 70340
loss: 0.9496541023254395,grad_norm: 0.9657376024583937, iteration: 70341
loss: 0.9597628116607666,grad_norm: 0.9999990368791549, iteration: 70342
loss: 1.0210213661193848,grad_norm: 0.929401279226644, iteration: 70343
loss: 0.9929118752479553,grad_norm: 0.999999270147594, iteration: 70344
loss: 1.020340919494629,grad_norm: 0.9999992311144676, iteration: 70345
loss: 1.0018757581710815,grad_norm: 0.8626307033415415, iteration: 70346
loss: 1.0130324363708496,grad_norm: 0.9999991200591191, iteration: 70347
loss: 1.0155569314956665,grad_norm: 0.9999992750809681, iteration: 70348
loss: 0.9917482137680054,grad_norm: 0.9999992610557483, iteration: 70349
loss: 0.9931690096855164,grad_norm: 0.9999991238400986, iteration: 70350
loss: 0.9924392700195312,grad_norm: 0.8438334730847207, iteration: 70351
loss: 0.9883739352226257,grad_norm: 0.9999992110427544, iteration: 70352
loss: 1.0028929710388184,grad_norm: 0.9999990517018942, iteration: 70353
loss: 1.0103859901428223,grad_norm: 0.9602151586158482, iteration: 70354
loss: 1.054563045501709,grad_norm: 0.9999991505074198, iteration: 70355
loss: 1.0217525959014893,grad_norm: 0.9999992024967642, iteration: 70356
loss: 0.9693984389305115,grad_norm: 0.9400486685488835, iteration: 70357
loss: 1.0197055339813232,grad_norm: 0.9999992020146782, iteration: 70358
loss: 0.9962959885597229,grad_norm: 0.9999990427186454, iteration: 70359
loss: 0.9722496867179871,grad_norm: 0.9999990927463417, iteration: 70360
loss: 1.024230718612671,grad_norm: 0.9999991969867137, iteration: 70361
loss: 1.0100162029266357,grad_norm: 0.9999990799563163, iteration: 70362
loss: 1.0272252559661865,grad_norm: 0.9999992700568485, iteration: 70363
loss: 1.0065406560897827,grad_norm: 0.8242410367490653, iteration: 70364
loss: 0.9899479746818542,grad_norm: 0.9999990892354682, iteration: 70365
loss: 0.9760046601295471,grad_norm: 0.9999989947558061, iteration: 70366
loss: 0.9653115272521973,grad_norm: 0.9999993105424778, iteration: 70367
loss: 1.0082290172576904,grad_norm: 0.9999996506193934, iteration: 70368
loss: 0.9873113632202148,grad_norm: 0.9999990827440141, iteration: 70369
loss: 1.0078203678131104,grad_norm: 0.9789219456843732, iteration: 70370
loss: 0.9987921118736267,grad_norm: 0.9751412242419424, iteration: 70371
loss: 0.9458755254745483,grad_norm: 0.9999992299539542, iteration: 70372
loss: 0.9867583513259888,grad_norm: 0.9999991909344805, iteration: 70373
loss: 0.9702346920967102,grad_norm: 0.999999069509072, iteration: 70374
loss: 0.980121910572052,grad_norm: 0.9999991177547605, iteration: 70375
loss: 1.038873553276062,grad_norm: 0.9999992651655268, iteration: 70376
loss: 0.9834259748458862,grad_norm: 0.9118638949113558, iteration: 70377
loss: 1.0027841329574585,grad_norm: 0.9999990470576882, iteration: 70378
loss: 0.9538323879241943,grad_norm: 0.9999991928996574, iteration: 70379
loss: 0.978478729724884,grad_norm: 0.9999989671522314, iteration: 70380
loss: 1.0247633457183838,grad_norm: 0.9999993399747895, iteration: 70381
loss: 0.9579241275787354,grad_norm: 0.95998514473842, iteration: 70382
loss: 1.0138744115829468,grad_norm: 0.9999991890268176, iteration: 70383
loss: 0.9829473495483398,grad_norm: 0.9759937948234045, iteration: 70384
loss: 0.9778386950492859,grad_norm: 0.8929357903827001, iteration: 70385
loss: 0.9877903461456299,grad_norm: 0.9179956827425655, iteration: 70386
loss: 1.0154573917388916,grad_norm: 0.9581811849360993, iteration: 70387
loss: 1.0231269598007202,grad_norm: 0.9999991892002931, iteration: 70388
loss: 1.0480635166168213,grad_norm: 0.9999992725951067, iteration: 70389
loss: 0.992435097694397,grad_norm: 0.9999991897332623, iteration: 70390
loss: 0.94770348072052,grad_norm: 0.999999102899718, iteration: 70391
loss: 1.005429983139038,grad_norm: 0.8884711719506191, iteration: 70392
loss: 1.0215282440185547,grad_norm: 0.8929302823230182, iteration: 70393
loss: 1.0118852853775024,grad_norm: 0.9827600140810319, iteration: 70394
loss: 0.9812878966331482,grad_norm: 0.9758513848793442, iteration: 70395
loss: 0.928464949131012,grad_norm: 0.9999990787958755, iteration: 70396
loss: 0.9534621238708496,grad_norm: 0.9999989788928241, iteration: 70397
loss: 0.9726321697235107,grad_norm: 0.9999992428105986, iteration: 70398
loss: 0.9759289622306824,grad_norm: 0.9541609610223535, iteration: 70399
loss: 0.985145092010498,grad_norm: 0.9999990457010842, iteration: 70400
loss: 1.0243732929229736,grad_norm: 0.999999318381479, iteration: 70401
loss: 0.9918702840805054,grad_norm: 0.9999991670270851, iteration: 70402
loss: 0.9970315098762512,grad_norm: 0.9999991133223453, iteration: 70403
loss: 0.9873866438865662,grad_norm: 0.9999991695557393, iteration: 70404
loss: 0.989824652671814,grad_norm: 0.9999989506012513, iteration: 70405
loss: 0.9897334575653076,grad_norm: 0.9999990372763196, iteration: 70406
loss: 0.9750638604164124,grad_norm: 0.9999990950124894, iteration: 70407
loss: 0.9886268377304077,grad_norm: 0.9999992015207799, iteration: 70408
loss: 1.0357749462127686,grad_norm: 0.9999990108906625, iteration: 70409
loss: 0.9941197633743286,grad_norm: 0.9026229571306684, iteration: 70410
loss: 1.0146409273147583,grad_norm: 0.9436182616483328, iteration: 70411
loss: 1.030401587486267,grad_norm: 0.9120254196886575, iteration: 70412
loss: 0.9532544016838074,grad_norm: 0.9999992342039643, iteration: 70413
loss: 1.0104933977127075,grad_norm: 0.9775391544399635, iteration: 70414
loss: 1.0328264236450195,grad_norm: 0.9807255426214798, iteration: 70415
loss: 1.0153104066848755,grad_norm: 0.9999990887715057, iteration: 70416
loss: 0.995669424533844,grad_norm: 0.9999992740820849, iteration: 70417
loss: 1.0074018239974976,grad_norm: 0.9999991143824547, iteration: 70418
loss: 1.0107115507125854,grad_norm: 0.931595551346634, iteration: 70419
loss: 1.0155140161514282,grad_norm: 0.888166924655426, iteration: 70420
loss: 0.9817855358123779,grad_norm: 0.8762434367411519, iteration: 70421
loss: 0.9770763516426086,grad_norm: 0.9139739099799203, iteration: 70422
loss: 0.9934647679328918,grad_norm: 0.9301068701029046, iteration: 70423
loss: 0.9842886328697205,grad_norm: 0.9999989892083933, iteration: 70424
loss: 1.0001871585845947,grad_norm: 0.9999991681521547, iteration: 70425
loss: 0.9809696078300476,grad_norm: 0.9999991661520378, iteration: 70426
loss: 0.9677310585975647,grad_norm: 0.9999991748486119, iteration: 70427
loss: 0.9569350481033325,grad_norm: 0.999999221683295, iteration: 70428
loss: 0.9974909424781799,grad_norm: 0.9553628284914193, iteration: 70429
loss: 0.9543322324752808,grad_norm: 0.9532520132558315, iteration: 70430
loss: 1.0299863815307617,grad_norm: 0.9999990950190091, iteration: 70431
loss: 1.0251295566558838,grad_norm: 0.9999993551271177, iteration: 70432
loss: 0.983458936214447,grad_norm: 0.9937217578814264, iteration: 70433
loss: 1.0123370885849,grad_norm: 0.8574051259201892, iteration: 70434
loss: 1.0187791585922241,grad_norm: 0.9999992306217224, iteration: 70435
loss: 1.0119953155517578,grad_norm: 0.999999146763543, iteration: 70436
loss: 0.9819948077201843,grad_norm: 0.9557427546174465, iteration: 70437
loss: 0.979511559009552,grad_norm: 0.9999991207145065, iteration: 70438
loss: 1.0202771425247192,grad_norm: 0.9394395290389367, iteration: 70439
loss: 1.0097497701644897,grad_norm: 0.8746784471211464, iteration: 70440
loss: 1.010156273841858,grad_norm: 0.9741566515801023, iteration: 70441
loss: 0.9758337140083313,grad_norm: 0.9999992326701191, iteration: 70442
loss: 1.0182427167892456,grad_norm: 0.9397926640052031, iteration: 70443
loss: 0.9928262233734131,grad_norm: 0.9999991794219432, iteration: 70444
loss: 1.0227807760238647,grad_norm: 0.9999990164198854, iteration: 70445
loss: 1.0290184020996094,grad_norm: 0.9999992703882522, iteration: 70446
loss: 0.9729755520820618,grad_norm: 0.9623659395596926, iteration: 70447
loss: 0.9721934199333191,grad_norm: 0.8333496001122737, iteration: 70448
loss: 1.0001291036605835,grad_norm: 0.9311296263390887, iteration: 70449
loss: 0.9783079028129578,grad_norm: 0.914563589933182, iteration: 70450
loss: 0.9896659255027771,grad_norm: 0.9999990720831745, iteration: 70451
loss: 0.9592082500457764,grad_norm: 0.9999991490498584, iteration: 70452
loss: 1.0333045721054077,grad_norm: 0.999999101153171, iteration: 70453
loss: 1.0051342248916626,grad_norm: 0.999999285506035, iteration: 70454
loss: 1.029024362564087,grad_norm: 0.9999991853546923, iteration: 70455
loss: 1.004612684249878,grad_norm: 0.9835257430172628, iteration: 70456
loss: 0.9887953996658325,grad_norm: 0.9999991831985453, iteration: 70457
loss: 1.0360491275787354,grad_norm: 0.9999993530347631, iteration: 70458
loss: 0.9828750491142273,grad_norm: 0.9999991488540397, iteration: 70459
loss: 0.9818966388702393,grad_norm: 0.9999991745320667, iteration: 70460
loss: 0.9870489239692688,grad_norm: 0.9999991584886659, iteration: 70461
loss: 0.992011547088623,grad_norm: 0.9999991437432856, iteration: 70462
loss: 0.9976083040237427,grad_norm: 0.9999991560399701, iteration: 70463
loss: 1.0063235759735107,grad_norm: 0.9999992583517503, iteration: 70464
loss: 1.010790228843689,grad_norm: 0.9350709156435686, iteration: 70465
loss: 1.023952603340149,grad_norm: 0.9999991180767998, iteration: 70466
loss: 1.0142155885696411,grad_norm: 0.9295335730966907, iteration: 70467
loss: 0.9939378499984741,grad_norm: 0.9999990694760369, iteration: 70468
loss: 1.014194130897522,grad_norm: 0.9904575766545354, iteration: 70469
loss: 0.9884505271911621,grad_norm: 0.99999911042579, iteration: 70470
loss: 0.9915555715560913,grad_norm: 0.9659748334227218, iteration: 70471
loss: 0.9836877584457397,grad_norm: 0.9791156378894489, iteration: 70472
loss: 1.0158188343048096,grad_norm: 0.9999988552760275, iteration: 70473
loss: 0.9911301136016846,grad_norm: 0.9999990555944495, iteration: 70474
loss: 0.9973052144050598,grad_norm: 0.95834229853233, iteration: 70475
loss: 1.00113844871521,grad_norm: 0.9999990863394554, iteration: 70476
loss: 0.964493989944458,grad_norm: 0.9999991575212538, iteration: 70477
loss: 1.0464363098144531,grad_norm: 0.9999991386917657, iteration: 70478
loss: 1.0336865186691284,grad_norm: 0.999999175744833, iteration: 70479
loss: 1.0225796699523926,grad_norm: 0.9999990008132992, iteration: 70480
loss: 1.0013611316680908,grad_norm: 0.9663278273963223, iteration: 70481
loss: 1.0440342426300049,grad_norm: 0.8835498427267872, iteration: 70482
loss: 0.9895214438438416,grad_norm: 0.9409532725393491, iteration: 70483
loss: 0.9941555261611938,grad_norm: 0.999999255743733, iteration: 70484
loss: 0.9972913861274719,grad_norm: 0.8027822887148233, iteration: 70485
loss: 0.9477359056472778,grad_norm: 0.9152248082262283, iteration: 70486
loss: 0.9790493249893188,grad_norm: 0.8699381552897131, iteration: 70487
loss: 0.9668160676956177,grad_norm: 0.9999989640416241, iteration: 70488
loss: 1.0113236904144287,grad_norm: 0.9999990174999648, iteration: 70489
loss: 0.9979607462882996,grad_norm: 0.9203818021691842, iteration: 70490
loss: 0.9684159755706787,grad_norm: 0.9589592095355454, iteration: 70491
loss: 0.9498144388198853,grad_norm: 0.999999150332568, iteration: 70492
loss: 1.0311070680618286,grad_norm: 0.9016235154691897, iteration: 70493
loss: 1.0116045475006104,grad_norm: 0.9999991088855119, iteration: 70494
loss: 0.9836018085479736,grad_norm: 0.9999993145286468, iteration: 70495
loss: 1.012565016746521,grad_norm: 0.9999991602850851, iteration: 70496
loss: 1.0643266439437866,grad_norm: 0.9999996457752197, iteration: 70497
loss: 1.0328162908554077,grad_norm: 0.9999991884128615, iteration: 70498
loss: 0.9843561053276062,grad_norm: 0.9999990956954381, iteration: 70499
loss: 1.0309475660324097,grad_norm: 0.9999990716110558, iteration: 70500
loss: 0.9919894337654114,grad_norm: 0.9999991043011632, iteration: 70501
loss: 0.9898373484611511,grad_norm: 0.9414811521341379, iteration: 70502
loss: 1.0132019519805908,grad_norm: 0.999999268951286, iteration: 70503
loss: 1.0144789218902588,grad_norm: 0.8595221418755788, iteration: 70504
loss: 1.019561529159546,grad_norm: 0.9999990354829525, iteration: 70505
loss: 1.0121487379074097,grad_norm: 0.9973499457599383, iteration: 70506
loss: 0.9792718887329102,grad_norm: 0.9999990565499192, iteration: 70507
loss: 1.0030254125595093,grad_norm: 0.9999992782066027, iteration: 70508
loss: 1.0012500286102295,grad_norm: 0.9037009895711171, iteration: 70509
loss: 1.0208014249801636,grad_norm: 0.9999990536166632, iteration: 70510
loss: 1.030266284942627,grad_norm: 0.8967977051858997, iteration: 70511
loss: 1.008750557899475,grad_norm: 0.9999990908846298, iteration: 70512
loss: 0.987511932849884,grad_norm: 0.9999990950834844, iteration: 70513
loss: 1.0053926706314087,grad_norm: 0.9999991261439568, iteration: 70514
loss: 1.0178829431533813,grad_norm: 0.999999022888044, iteration: 70515
loss: 0.9976802468299866,grad_norm: 0.8465671302308863, iteration: 70516
loss: 0.9803844094276428,grad_norm: 0.9553954884426473, iteration: 70517
loss: 1.0190069675445557,grad_norm: 0.9999993038812756, iteration: 70518
loss: 0.9780392646789551,grad_norm: 0.8129512993432563, iteration: 70519
loss: 0.9850744605064392,grad_norm: 0.9999992375308623, iteration: 70520
loss: 1.006803035736084,grad_norm: 0.9731988031406033, iteration: 70521
loss: 0.979611873626709,grad_norm: 0.9584388910346563, iteration: 70522
loss: 0.9989184141159058,grad_norm: 0.9999991887538864, iteration: 70523
loss: 0.9908149838447571,grad_norm: 0.9999991826926719, iteration: 70524
loss: 1.0322496891021729,grad_norm: 0.9999990262586876, iteration: 70525
loss: 1.0478192567825317,grad_norm: 0.9999990706078749, iteration: 70526
loss: 0.9687692523002625,grad_norm: 0.9283997720491505, iteration: 70527
loss: 1.0217205286026,grad_norm: 0.9999991900007332, iteration: 70528
loss: 0.9905081987380981,grad_norm: 0.9999991249043796, iteration: 70529
loss: 0.9777881503105164,grad_norm: 0.9975005143173987, iteration: 70530
loss: 0.9785738587379456,grad_norm: 0.9451589905350659, iteration: 70531
loss: 0.9901158213615417,grad_norm: 0.9595802186803729, iteration: 70532
loss: 1.004239797592163,grad_norm: 0.999999138073753, iteration: 70533
loss: 0.966373860836029,grad_norm: 0.9007068684250732, iteration: 70534
loss: 0.9831802845001221,grad_norm: 0.999999159405621, iteration: 70535
loss: 1.027023196220398,grad_norm: 0.9999991898865426, iteration: 70536
loss: 1.051071047782898,grad_norm: 0.9999991208571015, iteration: 70537
loss: 0.9869344830513,grad_norm: 0.9999994261545335, iteration: 70538
loss: 1.0037788152694702,grad_norm: 0.9321984357463572, iteration: 70539
loss: 1.0327365398406982,grad_norm: 0.9999990741390099, iteration: 70540
loss: 0.9983903765678406,grad_norm: 0.8911229523676234, iteration: 70541
loss: 1.0045239925384521,grad_norm: 0.9999992156023896, iteration: 70542
loss: 1.0384482145309448,grad_norm: 0.8988437282515057, iteration: 70543
loss: 0.9442527294158936,grad_norm: 0.9999991373139623, iteration: 70544
loss: 0.9745545387268066,grad_norm: 0.8252374096414296, iteration: 70545
loss: 0.9750842452049255,grad_norm: 0.9081104484256635, iteration: 70546
loss: 1.003151774406433,grad_norm: 0.9322862131934114, iteration: 70547
loss: 0.9785035252571106,grad_norm: 0.9999991510173595, iteration: 70548
loss: 1.0266566276550293,grad_norm: 0.9999990501441479, iteration: 70549
loss: 0.9926720857620239,grad_norm: 0.9999991454029651, iteration: 70550
loss: 0.9822432994842529,grad_norm: 0.9595783834597307, iteration: 70551
loss: 0.9852946996688843,grad_norm: 0.9999991656190467, iteration: 70552
loss: 0.9597257375717163,grad_norm: 0.9999991299410614, iteration: 70553
loss: 1.0083227157592773,grad_norm: 0.9422626536856517, iteration: 70554
loss: 1.021721601486206,grad_norm: 0.9999997980707468, iteration: 70555
loss: 1.003609538078308,grad_norm: 0.999847156606527, iteration: 70556
loss: 1.0134872198104858,grad_norm: 0.9999991754872686, iteration: 70557
loss: 0.9826618432998657,grad_norm: 0.9999991464993832, iteration: 70558
loss: 1.021125316619873,grad_norm: 0.9999995213805143, iteration: 70559
loss: 1.0281809568405151,grad_norm: 0.9999993277079596, iteration: 70560
loss: 0.9956846833229065,grad_norm: 0.9999990619086224, iteration: 70561
loss: 1.0356329679489136,grad_norm: 0.9570977898485308, iteration: 70562
loss: 0.9856361150741577,grad_norm: 0.9999991217803975, iteration: 70563
loss: 0.9931469559669495,grad_norm: 0.9999991700286618, iteration: 70564
loss: 1.0041980743408203,grad_norm: 0.9999990063447686, iteration: 70565
loss: 0.9937227368354797,grad_norm: 0.9999989612852067, iteration: 70566
loss: 1.0177052021026611,grad_norm: 0.9999992254448109, iteration: 70567
loss: 1.005804181098938,grad_norm: 0.9999993792537866, iteration: 70568
loss: 1.0332578420639038,grad_norm: 0.9671808694524207, iteration: 70569
loss: 0.9933393597602844,grad_norm: 0.9905979454834044, iteration: 70570
loss: 1.0221118927001953,grad_norm: 0.959997037751598, iteration: 70571
loss: 1.0236239433288574,grad_norm: 0.9999991583572554, iteration: 70572
loss: 0.9847105145454407,grad_norm: 0.9999990779812049, iteration: 70573
loss: 0.9921311736106873,grad_norm: 0.9439237309669032, iteration: 70574
loss: 1.0283418893814087,grad_norm: 0.9999991493886924, iteration: 70575
loss: 0.9960703253746033,grad_norm: 0.9999992322515598, iteration: 70576
loss: 1.026501178741455,grad_norm: 0.9999991813979883, iteration: 70577
loss: 0.9830603003501892,grad_norm: 0.9999991109841038, iteration: 70578
loss: 0.9871675372123718,grad_norm: 0.8528147077044355, iteration: 70579
loss: 1.0226964950561523,grad_norm: 0.8807093783719435, iteration: 70580
loss: 0.989265501499176,grad_norm: 0.8809591690299058, iteration: 70581
loss: 1.0197919607162476,grad_norm: 0.9999991537577491, iteration: 70582
loss: 1.0315847396850586,grad_norm: 0.9840422805967042, iteration: 70583
loss: 0.9996204972267151,grad_norm: 0.9999991819164136, iteration: 70584
loss: 1.0256571769714355,grad_norm: 0.9999990871911008, iteration: 70585
loss: 1.0253932476043701,grad_norm: 0.9999997682071791, iteration: 70586
loss: 0.9728880524635315,grad_norm: 0.9999991752292936, iteration: 70587
loss: 1.0552922487258911,grad_norm: 0.9752617888760773, iteration: 70588
loss: 1.0263645648956299,grad_norm: 0.9999991920272243, iteration: 70589
loss: 1.00127375125885,grad_norm: 0.8673272096646978, iteration: 70590
loss: 1.0638346672058105,grad_norm: 0.9999996038964843, iteration: 70591
loss: 0.9843760132789612,grad_norm: 0.999999151993633, iteration: 70592
loss: 0.996519148349762,grad_norm: 0.9999990962860353, iteration: 70593
loss: 1.0262197256088257,grad_norm: 0.99999915090482, iteration: 70594
loss: 1.0360180139541626,grad_norm: 0.91988260143996, iteration: 70595
loss: 1.0131298303604126,grad_norm: 0.9999991323534355, iteration: 70596
loss: 0.9826560020446777,grad_norm: 0.9999990816902796, iteration: 70597
loss: 0.993049144744873,grad_norm: 0.9999992148367511, iteration: 70598
loss: 1.0121945142745972,grad_norm: 0.999999195326111, iteration: 70599
loss: 1.0175243616104126,grad_norm: 0.9999991075938417, iteration: 70600
loss: 1.0166497230529785,grad_norm: 0.9167352077322218, iteration: 70601
loss: 1.0194326639175415,grad_norm: 0.9859578047497285, iteration: 70602
loss: 1.0327558517456055,grad_norm: 0.99999895078088, iteration: 70603
loss: 1.0181429386138916,grad_norm: 0.9999991137616471, iteration: 70604
loss: 1.0122833251953125,grad_norm: 0.8721820039254943, iteration: 70605
loss: 1.0139803886413574,grad_norm: 0.9653599837484516, iteration: 70606
loss: 0.9692326188087463,grad_norm: 0.9999990845951253, iteration: 70607
loss: 0.998952329158783,grad_norm: 0.9999991596424554, iteration: 70608
loss: 0.9774696826934814,grad_norm: 0.9999990899611757, iteration: 70609
loss: 1.01579749584198,grad_norm: 0.9999990302690659, iteration: 70610
loss: 1.0131624937057495,grad_norm: 0.9999994870266837, iteration: 70611
loss: 0.9901493787765503,grad_norm: 0.9774157816097796, iteration: 70612
loss: 1.0054596662521362,grad_norm: 0.9999990990757396, iteration: 70613
loss: 1.0074036121368408,grad_norm: 0.9999996244979753, iteration: 70614
loss: 1.0108463764190674,grad_norm: 0.9999991125659639, iteration: 70615
loss: 1.0113073587417603,grad_norm: 0.9999991597811032, iteration: 70616
loss: 0.9918363690376282,grad_norm: 0.8324130519833595, iteration: 70617
loss: 0.9925588369369507,grad_norm: 0.999999138328038, iteration: 70618
loss: 0.9882640242576599,grad_norm: 0.999999105211717, iteration: 70619
loss: 1.02025306224823,grad_norm: 0.9999992116517039, iteration: 70620
loss: 0.9752485752105713,grad_norm: 0.9999992437190947, iteration: 70621
loss: 1.0217937231063843,grad_norm: 0.9667107471297487, iteration: 70622
loss: 0.9848200082778931,grad_norm: 0.9146340765982497, iteration: 70623
loss: 0.9960280060768127,grad_norm: 0.9999990943485662, iteration: 70624
loss: 1.0076974630355835,grad_norm: 0.9999991021699162, iteration: 70625
loss: 1.1654314994812012,grad_norm: 0.9999995036987166, iteration: 70626
loss: 1.01826012134552,grad_norm: 0.9999991706340605, iteration: 70627
loss: 0.9716383218765259,grad_norm: 0.9999990181924563, iteration: 70628
loss: 1.000013828277588,grad_norm: 0.959677931502583, iteration: 70629
loss: 1.0153326988220215,grad_norm: 0.9999991084027829, iteration: 70630
loss: 1.0014032125473022,grad_norm: 0.9999990175327155, iteration: 70631
loss: 1.0083138942718506,grad_norm: 0.9999991369979552, iteration: 70632
loss: 1.0134700536727905,grad_norm: 0.9695563104010421, iteration: 70633
loss: 0.9697702527046204,grad_norm: 0.9999990337663711, iteration: 70634
loss: 1.0307037830352783,grad_norm: 0.9999998629139798, iteration: 70635
loss: 1.0058293342590332,grad_norm: 0.910890365906941, iteration: 70636
loss: 1.0005500316619873,grad_norm: 0.922882893135948, iteration: 70637
loss: 0.9870821237564087,grad_norm: 0.9983806871184808, iteration: 70638
loss: 1.0056755542755127,grad_norm: 0.9999990566902077, iteration: 70639
loss: 0.9639136791229248,grad_norm: 0.8907501886197196, iteration: 70640
loss: 0.9975091814994812,grad_norm: 0.9999989971972042, iteration: 70641
loss: 1.0275334119796753,grad_norm: 0.8408317083407275, iteration: 70642
loss: 0.9775623679161072,grad_norm: 0.9124274775881577, iteration: 70643
loss: 1.0184030532836914,grad_norm: 0.9999990643307923, iteration: 70644
loss: 0.9271613955497742,grad_norm: 0.9479202069243106, iteration: 70645
loss: 0.9967362880706787,grad_norm: 0.9999990408171491, iteration: 70646
loss: 1.0428239107131958,grad_norm: 0.9999996564480796, iteration: 70647
loss: 0.9875932931900024,grad_norm: 0.8457954775672369, iteration: 70648
loss: 1.0225454568862915,grad_norm: 0.9999990846858506, iteration: 70649
loss: 1.0368496179580688,grad_norm: 0.9999993959508185, iteration: 70650
loss: 0.9849358797073364,grad_norm: 0.9999991946417232, iteration: 70651
loss: 0.9931754469871521,grad_norm: 0.9216102706723478, iteration: 70652
loss: 1.0145124197006226,grad_norm: 0.999999055096176, iteration: 70653
loss: 0.9843384027481079,grad_norm: 0.8642365694157246, iteration: 70654
loss: 1.000869870185852,grad_norm: 0.999998926250593, iteration: 70655
loss: 1.0141983032226562,grad_norm: 0.9999992194535966, iteration: 70656
loss: 1.0309066772460938,grad_norm: 0.9999989538280235, iteration: 70657
loss: 0.9776776432991028,grad_norm: 0.9999989971165621, iteration: 70658
loss: 1.0184324979782104,grad_norm: 0.9999992708408971, iteration: 70659
loss: 0.9970148801803589,grad_norm: 0.9999990433503639, iteration: 70660
loss: 0.9608527421951294,grad_norm: 0.930490258165819, iteration: 70661
loss: 1.0236543416976929,grad_norm: 0.999999209804361, iteration: 70662
loss: 0.9892846941947937,grad_norm: 0.8993289602684834, iteration: 70663
loss: 0.9960185289382935,grad_norm: 0.9855224179059311, iteration: 70664
loss: 1.0306179523468018,grad_norm: 0.9940181033572998, iteration: 70665
loss: 0.999103307723999,grad_norm: 0.9999991569497195, iteration: 70666
loss: 1.0105574131011963,grad_norm: 0.9999991439152416, iteration: 70667
loss: 1.0171412229537964,grad_norm: 0.9999992172446565, iteration: 70668
loss: 1.0241774320602417,grad_norm: 0.9999991420878394, iteration: 70669
loss: 1.0399959087371826,grad_norm: 0.9999992514934108, iteration: 70670
loss: 0.9747770428657532,grad_norm: 0.9442676598673162, iteration: 70671
loss: 0.9822589159011841,grad_norm: 0.8793871122989639, iteration: 70672
loss: 0.992619514465332,grad_norm: 0.9999991743765916, iteration: 70673
loss: 1.0032405853271484,grad_norm: 0.9720567301263956, iteration: 70674
loss: 1.0214602947235107,grad_norm: 0.9999991328454467, iteration: 70675
loss: 0.9920452833175659,grad_norm: 0.9999991203255925, iteration: 70676
loss: 0.98771733045578,grad_norm: 0.9999990288028898, iteration: 70677
loss: 0.9961419701576233,grad_norm: 0.983628607362938, iteration: 70678
loss: 1.0225924253463745,grad_norm: 0.9812453587374331, iteration: 70679
loss: 1.0143144130706787,grad_norm: 0.9830858918143685, iteration: 70680
loss: 0.9726573824882507,grad_norm: 0.9449932641772045, iteration: 70681
loss: 0.9811262488365173,grad_norm: 0.9999991164095283, iteration: 70682
loss: 1.0032691955566406,grad_norm: 0.9999993529208313, iteration: 70683
loss: 1.0091495513916016,grad_norm: 0.9999991283784545, iteration: 70684
loss: 1.0131529569625854,grad_norm: 0.9999991624250623, iteration: 70685
loss: 0.9579544067382812,grad_norm: 0.9758923054070032, iteration: 70686
loss: 1.0340362787246704,grad_norm: 0.9999994369197515, iteration: 70687
loss: 1.014275312423706,grad_norm: 0.9414068983091903, iteration: 70688
loss: 1.021817922592163,grad_norm: 0.9999990640530353, iteration: 70689
loss: 0.9595451951026917,grad_norm: 0.9343461475295193, iteration: 70690
loss: 1.0147483348846436,grad_norm: 0.9999991305082956, iteration: 70691
loss: 1.0213994979858398,grad_norm: 0.9999991469972236, iteration: 70692
loss: 1.0150198936462402,grad_norm: 0.9768196852590585, iteration: 70693
loss: 1.0002275705337524,grad_norm: 0.9862268296113146, iteration: 70694
loss: 1.0163978338241577,grad_norm: 0.9999990494575236, iteration: 70695
loss: 1.0493968725204468,grad_norm: 0.9999993349262309, iteration: 70696
loss: 1.05135977268219,grad_norm: 0.9999994379127944, iteration: 70697
loss: 1.0046621561050415,grad_norm: 0.9999992245870332, iteration: 70698
loss: 1.0876357555389404,grad_norm: 0.9999995133207358, iteration: 70699
loss: 0.9942522644996643,grad_norm: 0.9999990553720217, iteration: 70700
loss: 0.9977654814720154,grad_norm: 0.9433596780223137, iteration: 70701
loss: 1.0671054124832153,grad_norm: 0.8966426583452911, iteration: 70702
loss: 1.0057673454284668,grad_norm: 0.9999989507713091, iteration: 70703
loss: 0.9717909097671509,grad_norm: 0.9999990605494965, iteration: 70704
loss: 1.0627087354660034,grad_norm: 0.9999991720935069, iteration: 70705
loss: 1.0348238945007324,grad_norm: 0.9999989916733862, iteration: 70706
loss: 1.0027687549591064,grad_norm: 0.9999991541577853, iteration: 70707
loss: 1.0064066648483276,grad_norm: 0.9472777571312819, iteration: 70708
loss: 1.0224074125289917,grad_norm: 0.9999990630512484, iteration: 70709
loss: 0.9895208477973938,grad_norm: 0.917831630337417, iteration: 70710
loss: 1.018289566040039,grad_norm: 0.9711222716388993, iteration: 70711
loss: 0.9847583770751953,grad_norm: 0.9891610425833437, iteration: 70712
loss: 0.9794664978981018,grad_norm: 0.9662825430042711, iteration: 70713
loss: 1.0102534294128418,grad_norm: 0.9999992277286316, iteration: 70714
loss: 0.9955441355705261,grad_norm: 0.9999990502344637, iteration: 70715
loss: 0.9983866214752197,grad_norm: 0.9999988958658489, iteration: 70716
loss: 1.0178464651107788,grad_norm: 0.9313555715809592, iteration: 70717
loss: 0.9831975102424622,grad_norm: 0.999999287094076, iteration: 70718
loss: 1.0124461650848389,grad_norm: 0.9999992101307034, iteration: 70719
loss: 0.9952002763748169,grad_norm: 0.9944556797300002, iteration: 70720
loss: 1.011479139328003,grad_norm: 0.9999991496623465, iteration: 70721
loss: 1.0258774757385254,grad_norm: 0.9999994901764166, iteration: 70722
loss: 0.9877787232398987,grad_norm: 0.854895781784888, iteration: 70723
loss: 1.041703462600708,grad_norm: 0.9775485782542105, iteration: 70724
loss: 1.0411075353622437,grad_norm: 0.878066745511544, iteration: 70725
loss: 1.0387762784957886,grad_norm: 0.9999990796076569, iteration: 70726
loss: 1.0053189992904663,grad_norm: 0.9235411856160913, iteration: 70727
loss: 1.0107327699661255,grad_norm: 0.9881900508310498, iteration: 70728
loss: 1.039957880973816,grad_norm: 0.9964167897345122, iteration: 70729
loss: 0.9880220890045166,grad_norm: 0.9576288863433825, iteration: 70730
loss: 0.9811351299285889,grad_norm: 0.9999991053697834, iteration: 70731
loss: 1.0004841089248657,grad_norm: 0.9368693829979923, iteration: 70732
loss: 1.0119404792785645,grad_norm: 0.9999994379959507, iteration: 70733
loss: 0.9920386672019958,grad_norm: 0.9999989911702696, iteration: 70734
loss: 1.0104718208312988,grad_norm: 0.8969270669569568, iteration: 70735
loss: 1.002913475036621,grad_norm: 0.8930370847735005, iteration: 70736
loss: 1.0113708972930908,grad_norm: 0.999999965434491, iteration: 70737
loss: 1.011989951133728,grad_norm: 0.9999992246746858, iteration: 70738
loss: 0.9889662265777588,grad_norm: 0.9999991564273555, iteration: 70739
loss: 0.9685073494911194,grad_norm: 0.9999991391401092, iteration: 70740
loss: 0.988987147808075,grad_norm: 0.9999991027002137, iteration: 70741
loss: 0.9850792288780212,grad_norm: 0.8976334178121566, iteration: 70742
loss: 1.0102161169052124,grad_norm: 0.9999990107848492, iteration: 70743
loss: 1.010277509689331,grad_norm: 0.9999989475120774, iteration: 70744
loss: 1.0122346878051758,grad_norm: 0.9999991084577403, iteration: 70745
loss: 1.036374807357788,grad_norm: 0.999999258804357, iteration: 70746
loss: 0.9887000322341919,grad_norm: 0.9347288772217248, iteration: 70747
loss: 0.9818711280822754,grad_norm: 0.9999991360367619, iteration: 70748
loss: 0.9816230535507202,grad_norm: 0.9323218612557564, iteration: 70749
loss: 1.0194846391677856,grad_norm: 0.9999992060528229, iteration: 70750
loss: 0.9936640858650208,grad_norm: 0.9999990763756655, iteration: 70751
loss: 1.0078539848327637,grad_norm: 0.8903917255216421, iteration: 70752
loss: 1.026453971862793,grad_norm: 0.9999991271503337, iteration: 70753
loss: 1.0087705850601196,grad_norm: 0.9999990429314054, iteration: 70754
loss: 1.0291485786437988,grad_norm: 0.9999993865826621, iteration: 70755
loss: 0.9742280840873718,grad_norm: 0.8510831138375102, iteration: 70756
loss: 0.9809874892234802,grad_norm: 0.9999991613601942, iteration: 70757
loss: 0.9731320738792419,grad_norm: 0.9785918420647567, iteration: 70758
loss: 1.0221011638641357,grad_norm: 0.9352507979202941, iteration: 70759
loss: 1.013288974761963,grad_norm: 0.9999990928810552, iteration: 70760
loss: 0.9907469153404236,grad_norm: 0.9999991363159502, iteration: 70761
loss: 0.9910281300544739,grad_norm: 0.9999989630736887, iteration: 70762
loss: 1.0028777122497559,grad_norm: 0.9999991967815756, iteration: 70763
loss: 1.0041632652282715,grad_norm: 0.926895351905796, iteration: 70764
loss: 0.9956369996070862,grad_norm: 0.9918474590888533, iteration: 70765
loss: 1.0099256038665771,grad_norm: 0.9999990401494065, iteration: 70766
loss: 1.0046240091323853,grad_norm: 0.9979651084865553, iteration: 70767
loss: 0.959796130657196,grad_norm: 0.9999991189557746, iteration: 70768
loss: 1.0045795440673828,grad_norm: 0.8669488723647903, iteration: 70769
loss: 1.0230681896209717,grad_norm: 0.9880952257543811, iteration: 70770
loss: 1.0068470239639282,grad_norm: 0.999999156409433, iteration: 70771
loss: 1.0306220054626465,grad_norm: 0.9999992304011467, iteration: 70772
loss: 0.9578070044517517,grad_norm: 0.9999990936345303, iteration: 70773
loss: 1.0583776235580444,grad_norm: 0.9999994799805909, iteration: 70774
loss: 0.9677279591560364,grad_norm: 0.9843766419784138, iteration: 70775
loss: 0.9817201495170593,grad_norm: 0.9999991475009544, iteration: 70776
loss: 1.0384511947631836,grad_norm: 0.9999990998970523, iteration: 70777
loss: 1.0191409587860107,grad_norm: 0.9545314025101791, iteration: 70778
loss: 0.9802907705307007,grad_norm: 0.8595566924742886, iteration: 70779
loss: 0.9707246422767639,grad_norm: 0.9689468645723371, iteration: 70780
loss: 1.000227928161621,grad_norm: 0.9864101367577902, iteration: 70781
loss: 0.9709073901176453,grad_norm: 0.8536516325329429, iteration: 70782
loss: 0.961648166179657,grad_norm: 0.9093276607835202, iteration: 70783
loss: 1.0454586744308472,grad_norm: 0.9999991097064203, iteration: 70784
loss: 0.9577563405036926,grad_norm: 0.9604588554324279, iteration: 70785
loss: 0.9991680383682251,grad_norm: 0.9999993635604395, iteration: 70786
loss: 0.9705766439437866,grad_norm: 0.9999992512922513, iteration: 70787
loss: 1.0017521381378174,grad_norm: 0.9999990380828432, iteration: 70788
loss: 0.99095618724823,grad_norm: 0.9999989681095461, iteration: 70789
loss: 1.008194923400879,grad_norm: 0.9999989784083197, iteration: 70790
loss: 0.9978153705596924,grad_norm: 0.9530071928197416, iteration: 70791
loss: 1.0049668550491333,grad_norm: 0.9999991288319484, iteration: 70792
loss: 1.0307068824768066,grad_norm: 0.999999128689407, iteration: 70793
loss: 0.9850090146064758,grad_norm: 0.9999993326131946, iteration: 70794
loss: 0.9960154891014099,grad_norm: 0.9999989390983943, iteration: 70795
loss: 0.9952541589736938,grad_norm: 0.9999992244941278, iteration: 70796
loss: 1.0217397212982178,grad_norm: 0.9999993191422826, iteration: 70797
loss: 0.9835013747215271,grad_norm: 0.9999991720452504, iteration: 70798
loss: 1.0240581035614014,grad_norm: 0.9999992624838416, iteration: 70799
loss: 1.0111178159713745,grad_norm: 0.9999991800096789, iteration: 70800
loss: 1.0172454118728638,grad_norm: 0.9849236893269426, iteration: 70801
loss: 1.0082651376724243,grad_norm: 0.9999990756527485, iteration: 70802
loss: 1.0105727910995483,grad_norm: 0.9999991061056006, iteration: 70803
loss: 1.023518443107605,grad_norm: 0.9999992339142362, iteration: 70804
loss: 1.0079644918441772,grad_norm: 0.9999992300231361, iteration: 70805
loss: 1.0252403020858765,grad_norm: 0.9999991828072285, iteration: 70806
loss: 0.991148054599762,grad_norm: 0.999999258392038, iteration: 70807
loss: 1.0523765087127686,grad_norm: 0.9999993470649201, iteration: 70808
loss: 1.0098472833633423,grad_norm: 0.9999994114456501, iteration: 70809
loss: 1.0094243288040161,grad_norm: 0.9016272009408053, iteration: 70810
loss: 0.9508357644081116,grad_norm: 0.9323859405844176, iteration: 70811
loss: 0.9914199113845825,grad_norm: 0.9999991652549989, iteration: 70812
loss: 1.0059101581573486,grad_norm: 0.999999248824757, iteration: 70813
loss: 0.9919158220291138,grad_norm: 0.9999991879327326, iteration: 70814
loss: 1.0177994966506958,grad_norm: 0.9999991165326729, iteration: 70815
loss: 0.971290111541748,grad_norm: 0.9576024549847582, iteration: 70816
loss: 1.0328611135482788,grad_norm: 0.9999990869830166, iteration: 70817
loss: 0.9501065611839294,grad_norm: 0.9999989896600693, iteration: 70818
loss: 1.0135942697525024,grad_norm: 0.999999302469049, iteration: 70819
loss: 0.9902772307395935,grad_norm: 0.9999991225842941, iteration: 70820
loss: 0.9922411441802979,grad_norm: 0.9999991428253129, iteration: 70821
loss: 0.9945639371871948,grad_norm: 0.9601742397404104, iteration: 70822
loss: 0.9870615005493164,grad_norm: 0.8755563610864733, iteration: 70823
loss: 0.9950796961784363,grad_norm: 0.9999990764465292, iteration: 70824
loss: 1.0081329345703125,grad_norm: 0.9920433137021686, iteration: 70825
loss: 1.0209052562713623,grad_norm: 0.9999992814180216, iteration: 70826
loss: 0.9829750061035156,grad_norm: 0.9999992368332862, iteration: 70827
loss: 0.9874418377876282,grad_norm: 0.9742614824826088, iteration: 70828
loss: 0.9895995855331421,grad_norm: 0.9484098392922518, iteration: 70829
loss: 0.9864720106124878,grad_norm: 0.9999989929104869, iteration: 70830
loss: 1.0056484937667847,grad_norm: 0.9297330465682203, iteration: 70831
loss: 0.9654855728149414,grad_norm: 0.9725702383005641, iteration: 70832
loss: 1.0185350179672241,grad_norm: 0.9443948024368646, iteration: 70833
loss: 0.9964162707328796,grad_norm: 0.9999991134097792, iteration: 70834
loss: 0.9703153967857361,grad_norm: 0.9999991801911773, iteration: 70835
loss: 0.9682197570800781,grad_norm: 0.9646538380769498, iteration: 70836
loss: 1.0271449089050293,grad_norm: 0.9999993341289679, iteration: 70837
loss: 1.0258712768554688,grad_norm: 0.8780103701715847, iteration: 70838
loss: 0.9702066779136658,grad_norm: 0.9999990087108048, iteration: 70839
loss: 1.0056827068328857,grad_norm: 0.9999989709483966, iteration: 70840
loss: 0.9870812892913818,grad_norm: 0.9999991063888907, iteration: 70841
loss: 1.0527989864349365,grad_norm: 0.9999991984601984, iteration: 70842
loss: 0.9974797368049622,grad_norm: 0.8164988429232389, iteration: 70843
loss: 1.0330424308776855,grad_norm: 0.8378381701163115, iteration: 70844
loss: 1.025808572769165,grad_norm: 0.8634684756344821, iteration: 70845
loss: 1.0268186330795288,grad_norm: 0.9999990888721766, iteration: 70846
loss: 0.9959328770637512,grad_norm: 0.9670162939449232, iteration: 70847
loss: 0.9717385768890381,grad_norm: 0.8753810909083032, iteration: 70848
loss: 1.0036377906799316,grad_norm: 0.999999244462122, iteration: 70849
loss: 1.0137569904327393,grad_norm: 0.999999151816876, iteration: 70850
loss: 0.9813948273658752,grad_norm: 0.9395195863800195, iteration: 70851
loss: 0.9999011158943176,grad_norm: 0.9999991429824769, iteration: 70852
loss: 1.020350694656372,grad_norm: 0.9999990930780417, iteration: 70853
loss: 0.9876937866210938,grad_norm: 0.999999299414141, iteration: 70854
loss: 0.9870341420173645,grad_norm: 0.9999991785298984, iteration: 70855
loss: 1.0246707201004028,grad_norm: 0.9999990747758672, iteration: 70856
loss: 1.0015884637832642,grad_norm: 0.9896410228060982, iteration: 70857
loss: 1.0097393989562988,grad_norm: 0.8777287609247836, iteration: 70858
loss: 1.0014159679412842,grad_norm: 0.938870344993439, iteration: 70859
loss: 0.9813737273216248,grad_norm: 0.9999991604553734, iteration: 70860
loss: 0.9990871548652649,grad_norm: 0.9177773182193519, iteration: 70861
loss: 0.9858303666114807,grad_norm: 0.9999990531479274, iteration: 70862
loss: 0.9919719696044922,grad_norm: 0.999999254362495, iteration: 70863
loss: 1.0072325468063354,grad_norm: 0.9999990518412383, iteration: 70864
loss: 0.9931010603904724,grad_norm: 0.8937937759991157, iteration: 70865
loss: 0.9666751623153687,grad_norm: 0.8658127774370696, iteration: 70866
loss: 0.9684450030326843,grad_norm: 0.9999990725937884, iteration: 70867
loss: 0.9818494319915771,grad_norm: 0.9297762647920325, iteration: 70868
loss: 0.956988513469696,grad_norm: 0.996543794476228, iteration: 70869
loss: 0.9791402220726013,grad_norm: 0.9999989384328729, iteration: 70870
loss: 0.9850343465805054,grad_norm: 0.8262668947215349, iteration: 70871
loss: 0.9985107779502869,grad_norm: 0.9340728931246491, iteration: 70872
loss: 0.9755159020423889,grad_norm: 0.9135305726368665, iteration: 70873
loss: 0.9917179942131042,grad_norm: 0.9999990562996535, iteration: 70874
loss: 0.9784922003746033,grad_norm: 0.8920443487947954, iteration: 70875
loss: 0.9747385382652283,grad_norm: 0.8572048122471124, iteration: 70876
loss: 0.9770768880844116,grad_norm: 0.8408198441211262, iteration: 70877
loss: 0.9779168367385864,grad_norm: 0.9540089057934564, iteration: 70878
loss: 0.9830572605133057,grad_norm: 0.960476104320707, iteration: 70879
loss: 1.0079972743988037,grad_norm: 0.9649869956863454, iteration: 70880
loss: 1.0004940032958984,grad_norm: 0.9989070082302427, iteration: 70881
loss: 1.0122708082199097,grad_norm: 0.9733541912256963, iteration: 70882
loss: 0.9930368661880493,grad_norm: 0.9999993196378164, iteration: 70883
loss: 1.0064748525619507,grad_norm: 0.9290709700712046, iteration: 70884
loss: 0.9883672595024109,grad_norm: 0.9999990539984984, iteration: 70885
loss: 1.0219007730484009,grad_norm: 0.9296723381843258, iteration: 70886
loss: 0.9582334160804749,grad_norm: 0.9999990786208887, iteration: 70887
loss: 1.0485198497772217,grad_norm: 0.9999991538356515, iteration: 70888
loss: 1.0016522407531738,grad_norm: 0.999999053464263, iteration: 70889
loss: 0.9923267960548401,grad_norm: 0.9325211088503705, iteration: 70890
loss: 1.0030219554901123,grad_norm: 0.8947535341182294, iteration: 70891
loss: 0.9670280814170837,grad_norm: 0.9999990805346171, iteration: 70892
loss: 0.9958943724632263,grad_norm: 0.9605607419759364, iteration: 70893
loss: 0.9989855885505676,grad_norm: 0.9062334271412399, iteration: 70894
loss: 1.0033397674560547,grad_norm: 0.999999351901542, iteration: 70895
loss: 1.0015439987182617,grad_norm: 0.999999057210388, iteration: 70896
loss: 0.9882282018661499,grad_norm: 0.9999992384631564, iteration: 70897
loss: 0.9591505527496338,grad_norm: 0.9999990569984784, iteration: 70898
loss: 1.0208338499069214,grad_norm: 0.9144776297278664, iteration: 70899
loss: 1.0140529870986938,grad_norm: 0.9999990921252389, iteration: 70900
loss: 1.0250070095062256,grad_norm: 0.9735001373910164, iteration: 70901
loss: 0.9475319981575012,grad_norm: 0.9999991163330402, iteration: 70902
loss: 1.0129597187042236,grad_norm: 0.9713702810411035, iteration: 70903
loss: 1.0099283456802368,grad_norm: 0.9999991573674486, iteration: 70904
loss: 0.9680769443511963,grad_norm: 0.9999992198541201, iteration: 70905
loss: 0.9933944344520569,grad_norm: 0.8754378187319682, iteration: 70906
loss: 1.0369837284088135,grad_norm: 0.9999990860215834, iteration: 70907
loss: 1.016633152961731,grad_norm: 0.9999991861028784, iteration: 70908
loss: 1.0125269889831543,grad_norm: 0.9824872191091322, iteration: 70909
loss: 1.0123889446258545,grad_norm: 0.9999990925605802, iteration: 70910
loss: 1.0250654220581055,grad_norm: 0.9999992011737842, iteration: 70911
loss: 0.9451998472213745,grad_norm: 0.9999991270972843, iteration: 70912
loss: 0.9745209813117981,grad_norm: 0.9493617146527701, iteration: 70913
loss: 0.984963595867157,grad_norm: 0.9999989602394331, iteration: 70914
loss: 0.9798030853271484,grad_norm: 0.9999991648402516, iteration: 70915
loss: 1.0019105672836304,grad_norm: 0.9999991065207262, iteration: 70916
loss: 1.0123679637908936,grad_norm: 0.9449835371475649, iteration: 70917
loss: 1.0071558952331543,grad_norm: 0.9999990351728388, iteration: 70918
loss: 1.0271681547164917,grad_norm: 0.9999994902272417, iteration: 70919
loss: 0.9619582891464233,grad_norm: 0.9078937722025853, iteration: 70920
loss: 0.9982172250747681,grad_norm: 0.9999991367876747, iteration: 70921
loss: 0.9756639003753662,grad_norm: 0.9418852775882646, iteration: 70922
loss: 0.9960280656814575,grad_norm: 0.9999990527604836, iteration: 70923
loss: 1.011361002922058,grad_norm: 0.9999991598348025, iteration: 70924
loss: 1.006568431854248,grad_norm: 0.9999990646673038, iteration: 70925
loss: 1.0277671813964844,grad_norm: 0.9999993262449421, iteration: 70926
loss: 0.9920493960380554,grad_norm: 0.9999993017504767, iteration: 70927
loss: 0.976161539554596,grad_norm: 0.9132910917166307, iteration: 70928
loss: 1.0060598850250244,grad_norm: 0.9999992352441069, iteration: 70929
loss: 1.0050010681152344,grad_norm: 0.9999991391714156, iteration: 70930
loss: 1.0379133224487305,grad_norm: 0.9999999786658809, iteration: 70931
loss: 0.9857154488563538,grad_norm: 0.8958284590944853, iteration: 70932
loss: 0.9835609197616577,grad_norm: 0.9999990342514075, iteration: 70933
loss: 1.0274931192398071,grad_norm: 0.9999994055781671, iteration: 70934
loss: 0.9925224184989929,grad_norm: 0.9999991114437558, iteration: 70935
loss: 1.0089633464813232,grad_norm: 0.7514644874829277, iteration: 70936
loss: 1.0110613107681274,grad_norm: 0.9999990536308103, iteration: 70937
loss: 1.0166290998458862,grad_norm: 0.999999149717134, iteration: 70938
loss: 1.0018693208694458,grad_norm: 0.9999991490746554, iteration: 70939
loss: 0.9880344271659851,grad_norm: 0.9999990382131411, iteration: 70940
loss: 1.005147933959961,grad_norm: 0.9999992928818234, iteration: 70941
loss: 1.0136743783950806,grad_norm: 0.9999992903289993, iteration: 70942
loss: 1.0309795141220093,grad_norm: 0.9999990762791168, iteration: 70943
loss: 1.0399945974349976,grad_norm: 1.00000001671812, iteration: 70944
loss: 1.01888108253479,grad_norm: 0.9999991148201032, iteration: 70945
loss: 0.9844028353691101,grad_norm: 0.9333194189545744, iteration: 70946
loss: 0.9894158244132996,grad_norm: 0.9999990271772626, iteration: 70947
loss: 1.0037871599197388,grad_norm: 0.919926829108816, iteration: 70948
loss: 1.002584457397461,grad_norm: 0.9999990430872978, iteration: 70949
loss: 1.0128334760665894,grad_norm: 0.9729367836179292, iteration: 70950
loss: 1.0214967727661133,grad_norm: 0.8634541443164785, iteration: 70951
loss: 1.0098563432693481,grad_norm: 0.846953286073383, iteration: 70952
loss: 1.007630705833435,grad_norm: 0.9999991795255543, iteration: 70953
loss: 1.003890037536621,grad_norm: 0.9999990590577512, iteration: 70954
loss: 1.0342577695846558,grad_norm: 0.9999998840808724, iteration: 70955
loss: 1.0205153226852417,grad_norm: 0.9999992703864846, iteration: 70956
loss: 0.9865934252738953,grad_norm: 0.9999992596037108, iteration: 70957
loss: 1.0220955610275269,grad_norm: 0.9999991628644727, iteration: 70958
loss: 0.9783678650856018,grad_norm: 0.9999991283904721, iteration: 70959
loss: 1.026289939880371,grad_norm: 0.9927815270078626, iteration: 70960
loss: 0.9987534284591675,grad_norm: 0.9014233077749159, iteration: 70961
loss: 0.9811238646507263,grad_norm: 0.8273002568155884, iteration: 70962
loss: 0.9599729180335999,grad_norm: 0.9687876006304348, iteration: 70963
loss: 0.988393247127533,grad_norm: 0.9999991309597479, iteration: 70964
loss: 0.9990294575691223,grad_norm: 0.9829589725985461, iteration: 70965
loss: 0.9844595789909363,grad_norm: 0.9999993065340872, iteration: 70966
loss: 0.9904903173446655,grad_norm: 0.9999993831983487, iteration: 70967
loss: 1.0041496753692627,grad_norm: 0.9999991687084336, iteration: 70968
loss: 0.9690180420875549,grad_norm: 0.9999991738776822, iteration: 70969
loss: 1.03107488155365,grad_norm: 0.9999991854706886, iteration: 70970
loss: 1.001234531402588,grad_norm: 0.9999992242427106, iteration: 70971
loss: 0.9838428497314453,grad_norm: 0.9527221013796945, iteration: 70972
loss: 1.0153768062591553,grad_norm: 0.9999990752278023, iteration: 70973
loss: 1.0086240768432617,grad_norm: 0.9999991166931763, iteration: 70974
loss: 1.0440547466278076,grad_norm: 0.9999992657063097, iteration: 70975
loss: 1.0223093032836914,grad_norm: 0.970408199018418, iteration: 70976
loss: 0.9876700043678284,grad_norm: 0.8458578178705839, iteration: 70977
loss: 0.9916279315948486,grad_norm: 0.9999989591946384, iteration: 70978
loss: 1.0081037282943726,grad_norm: 0.9706825185125749, iteration: 70979
loss: 0.9951788187026978,grad_norm: 0.9999992073103613, iteration: 70980
loss: 1.0212867259979248,grad_norm: 0.999999317294921, iteration: 70981
loss: 0.9777845740318298,grad_norm: 0.8312637326835811, iteration: 70982
loss: 1.0046799182891846,grad_norm: 0.9796469758163003, iteration: 70983
loss: 0.9711402654647827,grad_norm: 0.9902521517374231, iteration: 70984
loss: 1.0002930164337158,grad_norm: 0.9999991574125892, iteration: 70985
loss: 0.9932131171226501,grad_norm: 0.966728185553881, iteration: 70986
loss: 0.9681589603424072,grad_norm: 0.9999989608491212, iteration: 70987
loss: 1.007594108581543,grad_norm: 0.9999990723354175, iteration: 70988
loss: 1.0199668407440186,grad_norm: 0.9999990153219436, iteration: 70989
loss: 0.9792519211769104,grad_norm: 0.9999990816067179, iteration: 70990
loss: 0.9819998145103455,grad_norm: 0.9999990764034644, iteration: 70991
loss: 1.0266363620758057,grad_norm: 0.9999990614752771, iteration: 70992
loss: 0.9716557264328003,grad_norm: 0.9390230397169005, iteration: 70993
loss: 1.0166012048721313,grad_norm: 0.9984922052489916, iteration: 70994
loss: 1.0213875770568848,grad_norm: 0.9664508143510881, iteration: 70995
loss: 1.0225625038146973,grad_norm: 0.9999992412055957, iteration: 70996
loss: 1.0193508863449097,grad_norm: 0.9999991322316771, iteration: 70997
loss: 1.0062696933746338,grad_norm: 0.999999234652141, iteration: 70998
loss: 0.9795827269554138,grad_norm: 0.9999992431830265, iteration: 70999
loss: 0.9783805012702942,grad_norm: 0.8908672322546769, iteration: 71000
loss: 0.9839641451835632,grad_norm: 0.9999992679289385, iteration: 71001
loss: 1.0289530754089355,grad_norm: 0.9499132863512565, iteration: 71002
loss: 0.99161696434021,grad_norm: 0.9999990791972937, iteration: 71003
loss: 1.0169929265975952,grad_norm: 0.8773325463814124, iteration: 71004
loss: 1.012094259262085,grad_norm: 0.9851963867685808, iteration: 71005
loss: 0.9932717680931091,grad_norm: 0.99999917898445, iteration: 71006
loss: 1.034132957458496,grad_norm: 0.9999990103928138, iteration: 71007
loss: 1.0451477766036987,grad_norm: 0.9999992434784468, iteration: 71008
loss: 1.0344916582107544,grad_norm: 0.9841360918685291, iteration: 71009
loss: 1.0401824712753296,grad_norm: 0.9370450866026288, iteration: 71010
loss: 0.9779077768325806,grad_norm: 0.999999122201115, iteration: 71011
loss: 1.0027521848678589,grad_norm: 0.9999990166951045, iteration: 71012
loss: 1.0364575386047363,grad_norm: 0.9099777885555569, iteration: 71013
loss: 1.001889944076538,grad_norm: 0.9999989504609483, iteration: 71014
loss: 1.0344281196594238,grad_norm: 0.9999993164562766, iteration: 71015
loss: 0.9884089231491089,grad_norm: 0.9674866587967936, iteration: 71016
loss: 1.0091356039047241,grad_norm: 0.9855734801335846, iteration: 71017
loss: 1.0082868337631226,grad_norm: 0.9433875022735498, iteration: 71018
loss: 1.0691372156143188,grad_norm: 0.9755871669840316, iteration: 71019
loss: 1.017144799232483,grad_norm: 0.9999991763819575, iteration: 71020
loss: 0.9998164176940918,grad_norm: 0.9560984038750376, iteration: 71021
loss: 1.001071572303772,grad_norm: 0.9999992171600799, iteration: 71022
loss: 1.0162625312805176,grad_norm: 0.9999991412387425, iteration: 71023
loss: 1.0114914178848267,grad_norm: 0.9999989722994483, iteration: 71024
loss: 0.9835394024848938,grad_norm: 0.8458173983641091, iteration: 71025
loss: 0.991711437702179,grad_norm: 0.9509468766143202, iteration: 71026
loss: 1.0108023881912231,grad_norm: 0.9999990770948616, iteration: 71027
loss: 0.9946292042732239,grad_norm: 0.9966104263365265, iteration: 71028
loss: 0.9913539290428162,grad_norm: 0.9763828498914945, iteration: 71029
loss: 1.0185238122940063,grad_norm: 0.9999992555641668, iteration: 71030
loss: 0.9713389277458191,grad_norm: 0.9999990903511893, iteration: 71031
loss: 1.0162838697433472,grad_norm: 0.9999990964087245, iteration: 71032
loss: 0.9962266683578491,grad_norm: 0.9999992246222635, iteration: 71033
loss: 1.0191943645477295,grad_norm: 0.9419598817718491, iteration: 71034
loss: 0.9768380522727966,grad_norm: 0.9999992395149837, iteration: 71035
loss: 0.9571448564529419,grad_norm: 0.999999048610843, iteration: 71036
loss: 1.056268334388733,grad_norm: 0.9999992196860756, iteration: 71037
loss: 0.9815459251403809,grad_norm: 0.8471374472716756, iteration: 71038
loss: 1.0126491785049438,grad_norm: 0.8739246859870538, iteration: 71039
loss: 1.0167583227157593,grad_norm: 0.9999989820260523, iteration: 71040
loss: 0.9836904406547546,grad_norm: 0.9785933330092146, iteration: 71041
loss: 0.9848812818527222,grad_norm: 0.9634783989160173, iteration: 71042
loss: 1.0178964138031006,grad_norm: 0.9294777888077607, iteration: 71043
loss: 1.0301262140274048,grad_norm: 0.9999992419547516, iteration: 71044
loss: 0.9885300993919373,grad_norm: 0.9999990271468954, iteration: 71045
loss: 0.9899833798408508,grad_norm: 0.9830307937500449, iteration: 71046
loss: 1.0235341787338257,grad_norm: 0.999999313633091, iteration: 71047
loss: 1.025069236755371,grad_norm: 0.9999991656605756, iteration: 71048
loss: 0.979011595249176,grad_norm: 0.9233750185091245, iteration: 71049
loss: 1.0567715167999268,grad_norm: 0.9999990934678321, iteration: 71050
loss: 1.0248167514801025,grad_norm: 0.9982725954018256, iteration: 71051
loss: 0.960511326789856,grad_norm: 0.9822745812380054, iteration: 71052
loss: 1.0191445350646973,grad_norm: 0.9999990134647969, iteration: 71053
loss: 1.0042078495025635,grad_norm: 0.8627483837760671, iteration: 71054
loss: 1.0207935571670532,grad_norm: 0.9592318284784761, iteration: 71055
loss: 1.003096580505371,grad_norm: 0.9238516420489841, iteration: 71056
loss: 0.9817644953727722,grad_norm: 0.9999990960293141, iteration: 71057
loss: 0.9894464612007141,grad_norm: 0.9999991631114841, iteration: 71058
loss: 1.0239639282226562,grad_norm: 0.9999989488606409, iteration: 71059
loss: 0.9982243180274963,grad_norm: 0.9125360029663266, iteration: 71060
loss: 1.0146855115890503,grad_norm: 0.9999991693434579, iteration: 71061
loss: 1.005264163017273,grad_norm: 0.9129563048158506, iteration: 71062
loss: 0.9767082929611206,grad_norm: 0.999999031126571, iteration: 71063
loss: 1.0146563053131104,grad_norm: 0.9999990858981079, iteration: 71064
loss: 0.998650848865509,grad_norm: 0.9999990521620699, iteration: 71065
loss: 1.0089243650436401,grad_norm: 0.9006208100238642, iteration: 71066
loss: 1.0042574405670166,grad_norm: 0.9606863990582712, iteration: 71067
loss: 0.9793605208396912,grad_norm: 0.9873289893552087, iteration: 71068
loss: 1.0194233655929565,grad_norm: 0.9006217426190756, iteration: 71069
loss: 1.038785696029663,grad_norm: 0.9999989470248177, iteration: 71070
loss: 1.0374447107315063,grad_norm: 0.9999991400431554, iteration: 71071
loss: 0.9884845614433289,grad_norm: 0.999999166317279, iteration: 71072
loss: 0.9943025708198547,grad_norm: 0.9302950884425457, iteration: 71073
loss: 1.0078002214431763,grad_norm: 0.9746601040747664, iteration: 71074
loss: 1.0026658773422241,grad_norm: 0.9999991161682965, iteration: 71075
loss: 0.9593936204910278,grad_norm: 0.9999990738616311, iteration: 71076
loss: 1.001481056213379,grad_norm: 0.9999989927705302, iteration: 71077
loss: 1.0178064107894897,grad_norm: 0.9999990143419676, iteration: 71078
loss: 1.020446538925171,grad_norm: 0.9999990812351789, iteration: 71079
loss: 1.0782939195632935,grad_norm: 0.9999992040495382, iteration: 71080
loss: 0.9777093529701233,grad_norm: 0.8846102026586888, iteration: 71081
loss: 1.005070447921753,grad_norm: 0.9999993950710079, iteration: 71082
loss: 1.0139048099517822,grad_norm: 0.9344962593559379, iteration: 71083
loss: 1.0148955583572388,grad_norm: 0.9999992034036149, iteration: 71084
loss: 1.0327469110488892,grad_norm: 0.9532730338048053, iteration: 71085
loss: 0.9820007085800171,grad_norm: 0.9642548354997301, iteration: 71086
loss: 1.0162639617919922,grad_norm: 0.9999991231752795, iteration: 71087
loss: 0.9982432126998901,grad_norm: 0.953511168458678, iteration: 71088
loss: 1.0117278099060059,grad_norm: 0.9451645855691581, iteration: 71089
loss: 0.9988917112350464,grad_norm: 0.999999048596636, iteration: 71090
loss: 1.0007009506225586,grad_norm: 0.9116191788069985, iteration: 71091
loss: 1.0080137252807617,grad_norm: 0.842143015285086, iteration: 71092
loss: 1.0009641647338867,grad_norm: 0.9377074020966653, iteration: 71093
loss: 0.9776141047477722,grad_norm: 0.9999991364661129, iteration: 71094
loss: 0.9719673991203308,grad_norm: 0.9999990069361783, iteration: 71095
loss: 1.0154457092285156,grad_norm: 0.9225478826549701, iteration: 71096
loss: 1.0018730163574219,grad_norm: 0.999999004915757, iteration: 71097
loss: 0.9785343408584595,grad_norm: 0.9999989763525511, iteration: 71098
loss: 1.0167630910873413,grad_norm: 0.9999991305243995, iteration: 71099
loss: 0.9851831793785095,grad_norm: 0.9104171866509767, iteration: 71100
loss: 1.0406057834625244,grad_norm: 0.9888068575874474, iteration: 71101
loss: 0.9950057864189148,grad_norm: 0.998200226294914, iteration: 71102
loss: 0.9950501322746277,grad_norm: 0.9535026192701731, iteration: 71103
loss: 1.0080859661102295,grad_norm: 0.999999140833124, iteration: 71104
loss: 0.9978132843971252,grad_norm: 0.878403254495651, iteration: 71105
loss: 0.9862263798713684,grad_norm: 0.9999990135699609, iteration: 71106
loss: 0.9962207674980164,grad_norm: 0.9324760109098738, iteration: 71107
loss: 1.010091781616211,grad_norm: 0.9999992282778429, iteration: 71108
loss: 0.9872530102729797,grad_norm: 0.885340160384378, iteration: 71109
loss: 1.0048848390579224,grad_norm: 0.8579776374878219, iteration: 71110
loss: 0.9906441569328308,grad_norm: 0.9999992073959111, iteration: 71111
loss: 0.995826244354248,grad_norm: 0.9571579751686387, iteration: 71112
loss: 0.9742714166641235,grad_norm: 0.9702944531737611, iteration: 71113
loss: 0.9992420673370361,grad_norm: 0.9999994239392294, iteration: 71114
loss: 1.0395787954330444,grad_norm: 0.8819688336935365, iteration: 71115
loss: 0.9905378222465515,grad_norm: 0.9054346419906305, iteration: 71116
loss: 1.0005218982696533,grad_norm: 0.9916783606239628, iteration: 71117
loss: 1.0086116790771484,grad_norm: 0.9999991482161459, iteration: 71118
loss: 1.0154390335083008,grad_norm: 0.999998961098508, iteration: 71119
loss: 0.9807990193367004,grad_norm: 0.9688436852836578, iteration: 71120
loss: 1.016095519065857,grad_norm: 0.9999998596673054, iteration: 71121
loss: 1.0047531127929688,grad_norm: 0.9999991515226263, iteration: 71122
loss: 0.985782265663147,grad_norm: 0.9827681217384424, iteration: 71123
loss: 1.0145875215530396,grad_norm: 0.9999991656536864, iteration: 71124
loss: 1.0175085067749023,grad_norm: 0.9999989263341648, iteration: 71125
loss: 1.0391238927841187,grad_norm: 0.9999999414264148, iteration: 71126
loss: 0.9835020899772644,grad_norm: 0.9583345090621177, iteration: 71127
loss: 0.9353041648864746,grad_norm: 0.84908589220137, iteration: 71128
loss: 0.9974520802497864,grad_norm: 0.9999991214734211, iteration: 71129
loss: 0.972711443901062,grad_norm: 0.9999990300525619, iteration: 71130
loss: 0.9806902408599854,grad_norm: 0.9999997896912097, iteration: 71131
loss: 1.0025700330734253,grad_norm: 0.999999211304577, iteration: 71132
loss: 1.0032187700271606,grad_norm: 0.9999991663999725, iteration: 71133
loss: 1.1878153085708618,grad_norm: 0.9999996983314019, iteration: 71134
loss: 0.9988697171211243,grad_norm: 0.9999991486153866, iteration: 71135
loss: 0.9968173503875732,grad_norm: 0.985483784957191, iteration: 71136
loss: 0.9803600907325745,grad_norm: 0.9470992928726152, iteration: 71137
loss: 0.9765156507492065,grad_norm: 0.9257152521333732, iteration: 71138
loss: 0.9709094762802124,grad_norm: 0.999999177493114, iteration: 71139
loss: 1.0291352272033691,grad_norm: 0.9999990217854037, iteration: 71140
loss: 0.9877868294715881,grad_norm: 0.9015084023852307, iteration: 71141
loss: 1.0328164100646973,grad_norm: 0.992850604916245, iteration: 71142
loss: 0.9734395146369934,grad_norm: 0.9999991031799641, iteration: 71143
loss: 0.9788862466812134,grad_norm: 0.9999992914264475, iteration: 71144
loss: 0.9531627893447876,grad_norm: 0.9878105391015061, iteration: 71145
loss: 0.9678393602371216,grad_norm: 0.9999991058441371, iteration: 71146
loss: 1.013918161392212,grad_norm: 0.99999898075775, iteration: 71147
loss: 0.9807494282722473,grad_norm: 0.8519256610179066, iteration: 71148
loss: 0.9961442351341248,grad_norm: 0.9906078974341676, iteration: 71149
loss: 1.0099928379058838,grad_norm: 0.9999990351049735, iteration: 71150
loss: 1.0156581401824951,grad_norm: 0.9999992578020575, iteration: 71151
loss: 0.9989586472511292,grad_norm: 0.9999991006323548, iteration: 71152
loss: 1.0144504308700562,grad_norm: 0.9999989802923003, iteration: 71153
loss: 0.9949613213539124,grad_norm: 0.9948912657674623, iteration: 71154
loss: 0.9944603443145752,grad_norm: 0.9999991995683292, iteration: 71155
loss: 0.9437102675437927,grad_norm: 0.9167375868417369, iteration: 71156
loss: 1.0519341230392456,grad_norm: 0.9999991963311565, iteration: 71157
loss: 0.964961588382721,grad_norm: 0.8478600446590534, iteration: 71158
loss: 1.0461900234222412,grad_norm: 0.9464954281549423, iteration: 71159
loss: 1.0236622095108032,grad_norm: 0.9999990880849918, iteration: 71160
loss: 1.0415505170822144,grad_norm: 0.9478517844760115, iteration: 71161
loss: 1.0101432800292969,grad_norm: 0.9999992809376408, iteration: 71162
loss: 1.0044604539871216,grad_norm: 0.9999992194344131, iteration: 71163
loss: 0.9856394529342651,grad_norm: 0.9999992880791001, iteration: 71164
loss: 1.0460262298583984,grad_norm: 0.9999993345552963, iteration: 71165
loss: 0.9960300922393799,grad_norm: 0.8903537094718663, iteration: 71166
loss: 1.052859902381897,grad_norm: 0.8653839598198098, iteration: 71167
loss: 0.9544540643692017,grad_norm: 0.9999991140540537, iteration: 71168
loss: 0.9878485798835754,grad_norm: 0.9815550582585939, iteration: 71169
loss: 0.9935051798820496,grad_norm: 0.9686090709998422, iteration: 71170
loss: 1.007556676864624,grad_norm: 0.9184476521269573, iteration: 71171
loss: 1.0150469541549683,grad_norm: 0.9999990101093178, iteration: 71172
loss: 0.964806079864502,grad_norm: 0.9999992961777053, iteration: 71173
loss: 1.0003963708877563,grad_norm: 0.9999992095257451, iteration: 71174
loss: 0.9997620582580566,grad_norm: 0.9565256631832012, iteration: 71175
loss: 1.0213042497634888,grad_norm: 0.9999990261824576, iteration: 71176
loss: 1.0013830661773682,grad_norm: 0.9999993956235559, iteration: 71177
loss: 1.0644067525863647,grad_norm: 0.9999994543210678, iteration: 71178
loss: 1.0352261066436768,grad_norm: 0.999999588911986, iteration: 71179
loss: 1.0575411319732666,grad_norm: 0.9999995789486085, iteration: 71180
loss: 1.032616376876831,grad_norm: 0.9999991234354573, iteration: 71181
loss: 1.0128990411758423,grad_norm: 0.9999991181915857, iteration: 71182
loss: 0.9899182915687561,grad_norm: 0.9999991448916883, iteration: 71183
loss: 1.018705129623413,grad_norm: 0.9999992776763837, iteration: 71184
loss: 0.9968634247779846,grad_norm: 0.9999989906348458, iteration: 71185
loss: 1.0001477003097534,grad_norm: 0.99999900990122, iteration: 71186
loss: 0.9941909909248352,grad_norm: 0.8769526815755971, iteration: 71187
loss: 1.043160319328308,grad_norm: 0.9999991737706252, iteration: 71188
loss: 1.0021833181381226,grad_norm: 0.9999992347974556, iteration: 71189
loss: 0.9755293130874634,grad_norm: 0.9999991199211911, iteration: 71190
loss: 1.0193432569503784,grad_norm: 0.9999991623647099, iteration: 71191
loss: 0.995745062828064,grad_norm: 0.9778354201671235, iteration: 71192
loss: 1.0212876796722412,grad_norm: 0.9999996464614834, iteration: 71193
loss: 1.022325038909912,grad_norm: 0.9999991881277763, iteration: 71194
loss: 0.9906843304634094,grad_norm: 0.9999999238452304, iteration: 71195
loss: 0.9774202108383179,grad_norm: 0.9465463742614417, iteration: 71196
loss: 1.0188590288162231,grad_norm: 0.9999991673478038, iteration: 71197
loss: 1.029676079750061,grad_norm: 0.9999990666586006, iteration: 71198
loss: 0.981689989566803,grad_norm: 0.999999015365071, iteration: 71199
loss: 1.0108243227005005,grad_norm: 0.9999992293397083, iteration: 71200
loss: 1.0144456624984741,grad_norm: 0.9999991679034264, iteration: 71201
loss: 0.9937499165534973,grad_norm: 0.9999990091180505, iteration: 71202
loss: 1.001746416091919,grad_norm: 0.9999992578541095, iteration: 71203
loss: 0.9960630536079407,grad_norm: 0.8960609982681538, iteration: 71204
loss: 1.0043975114822388,grad_norm: 0.9999992235251601, iteration: 71205
loss: 0.9995484352111816,grad_norm: 0.9534524287751961, iteration: 71206
loss: 0.9880074858665466,grad_norm: 0.9999991675304047, iteration: 71207
loss: 0.9781343936920166,grad_norm: 0.9560420269390152, iteration: 71208
loss: 1.045678734779358,grad_norm: 0.9999998127294033, iteration: 71209
loss: 1.0190491676330566,grad_norm: 0.8936084974060192, iteration: 71210
loss: 1.0026183128356934,grad_norm: 0.9999997642579602, iteration: 71211
loss: 1.0119640827178955,grad_norm: 0.999999088956684, iteration: 71212
loss: 0.9832338690757751,grad_norm: 0.999999106638113, iteration: 71213
loss: 1.0353741645812988,grad_norm: 0.9999991024274459, iteration: 71214
loss: 0.9897306561470032,grad_norm: 0.9999990000928722, iteration: 71215
loss: 0.9879898428916931,grad_norm: 0.8528930124892846, iteration: 71216
loss: 0.9370877742767334,grad_norm: 0.9999991003171851, iteration: 71217
loss: 0.9806122183799744,grad_norm: 0.9761494693470908, iteration: 71218
loss: 1.0321860313415527,grad_norm: 0.9976917984627285, iteration: 71219
loss: 0.976003110408783,grad_norm: 0.9999991745832794, iteration: 71220
loss: 0.9917056560516357,grad_norm: 0.8007058284938158, iteration: 71221
loss: 0.9726888537406921,grad_norm: 0.9582945158214657, iteration: 71222
loss: 1.0056757926940918,grad_norm: 0.9264672686965624, iteration: 71223
loss: 0.9997398257255554,grad_norm: 0.9999991696808661, iteration: 71224
loss: 1.0338730812072754,grad_norm: 0.8855291837951538, iteration: 71225
loss: 0.9712010622024536,grad_norm: 0.9677197611885371, iteration: 71226
loss: 1.028145432472229,grad_norm: 0.9999993635947565, iteration: 71227
loss: 1.0018393993377686,grad_norm: 0.999999082490326, iteration: 71228
loss: 1.004359483718872,grad_norm: 0.9553260602468242, iteration: 71229
loss: 1.0134299993515015,grad_norm: 0.9999991611836827, iteration: 71230
loss: 1.0100473165512085,grad_norm: 0.9999996618448898, iteration: 71231
loss: 0.9865250587463379,grad_norm: 0.9999990906519829, iteration: 71232
loss: 0.9739313125610352,grad_norm: 0.999998862868552, iteration: 71233
loss: 1.0102659463882446,grad_norm: 0.9999992382241453, iteration: 71234
loss: 1.0219557285308838,grad_norm: 0.999999188628154, iteration: 71235
loss: 0.9883166551589966,grad_norm: 0.9999990911471466, iteration: 71236
loss: 0.998378574848175,grad_norm: 0.9999991953787171, iteration: 71237
loss: 1.0040756464004517,grad_norm: 0.8526451162121336, iteration: 71238
loss: 1.0226951837539673,grad_norm: 0.9757418582048271, iteration: 71239
loss: 1.0407463312149048,grad_norm: 0.9999991968580617, iteration: 71240
loss: 1.0210683345794678,grad_norm: 0.9999992115485034, iteration: 71241
loss: 1.0051714181900024,grad_norm: 0.9999994780540212, iteration: 71242
loss: 1.006160020828247,grad_norm: 0.9999991000434066, iteration: 71243
loss: 1.0578346252441406,grad_norm: 0.9999995143589188, iteration: 71244
loss: 1.018984317779541,grad_norm: 0.999999123264266, iteration: 71245
loss: 1.0283986330032349,grad_norm: 0.9999991741272235, iteration: 71246
loss: 0.9887674450874329,grad_norm: 0.9133136026957656, iteration: 71247
loss: 1.0205974578857422,grad_norm: 0.9999997566317557, iteration: 71248
loss: 1.019037127494812,grad_norm: 0.9999990733837295, iteration: 71249
loss: 1.0439726114273071,grad_norm: 0.9999993015187103, iteration: 71250
loss: 0.9966893196105957,grad_norm: 0.9999991015978498, iteration: 71251
loss: 1.0111382007598877,grad_norm: 0.9999991179470287, iteration: 71252
loss: 0.9862932562828064,grad_norm: 0.9999991916535146, iteration: 71253
loss: 1.0256761312484741,grad_norm: 0.9999992773094876, iteration: 71254
loss: 1.016812801361084,grad_norm: 0.9999989575224294, iteration: 71255
loss: 1.0165804624557495,grad_norm: 0.9999990746969094, iteration: 71256
loss: 1.0549840927124023,grad_norm: 0.9999992771163546, iteration: 71257
loss: 1.020166277885437,grad_norm: 0.9999989761063156, iteration: 71258
loss: 0.9925103187561035,grad_norm: 0.9999992305430245, iteration: 71259
loss: 1.011208176612854,grad_norm: 0.9678663268022758, iteration: 71260
loss: 1.034462332725525,grad_norm: 0.999999633181918, iteration: 71261
loss: 0.9900156855583191,grad_norm: 0.9853567139696696, iteration: 71262
loss: 0.9792968034744263,grad_norm: 0.9734479334056639, iteration: 71263
loss: 0.9987509846687317,grad_norm: 0.9999990640838177, iteration: 71264
loss: 0.9700854420661926,grad_norm: 0.9916892313491008, iteration: 71265
loss: 0.9787958860397339,grad_norm: 0.9999991952881556, iteration: 71266
loss: 0.9854764342308044,grad_norm: 0.9768552889132004, iteration: 71267
loss: 1.0013614892959595,grad_norm: 0.9749746291835062, iteration: 71268
loss: 1.0182180404663086,grad_norm: 0.9999990555290472, iteration: 71269
loss: 1.0231270790100098,grad_norm: 0.8137165417258889, iteration: 71270
loss: 0.9656011462211609,grad_norm: 0.9813065863379101, iteration: 71271
loss: 1.012021780014038,grad_norm: 0.9999994613869447, iteration: 71272
loss: 0.9840902090072632,grad_norm: 0.9999995410630841, iteration: 71273
loss: 0.9944309592247009,grad_norm: 0.9999991769941525, iteration: 71274
loss: 0.9905697703361511,grad_norm: 0.9999992659374902, iteration: 71275
loss: 1.0050104856491089,grad_norm: 0.999999345441251, iteration: 71276
loss: 0.9987735748291016,grad_norm: 0.9854750328413732, iteration: 71277
loss: 0.9949854016304016,grad_norm: 0.9999991252073256, iteration: 71278
loss: 0.9985531568527222,grad_norm: 0.9889320913745095, iteration: 71279
loss: 1.0036343336105347,grad_norm: 0.9380672461390662, iteration: 71280
loss: 1.068459153175354,grad_norm: 0.9999992740562333, iteration: 71281
loss: 1.0060081481933594,grad_norm: 0.9143884272489883, iteration: 71282
loss: 1.0338867902755737,grad_norm: 0.9999991864348807, iteration: 71283
loss: 1.01123046875,grad_norm: 0.9981552644493692, iteration: 71284
loss: 1.0252325534820557,grad_norm: 0.9999992490241756, iteration: 71285
loss: 1.0206129550933838,grad_norm: 0.9999990080084694, iteration: 71286
loss: 1.0336644649505615,grad_norm: 0.9999992437420777, iteration: 71287
loss: 1.0022047758102417,grad_norm: 0.9897496532344702, iteration: 71288
loss: 1.017284631729126,grad_norm: 0.999999187563694, iteration: 71289
loss: 1.0196701288223267,grad_norm: 0.9999990890088692, iteration: 71290
loss: 0.9928349852561951,grad_norm: 0.9679549606102827, iteration: 71291
loss: 1.0412516593933105,grad_norm: 0.9999992189064291, iteration: 71292
loss: 0.9800964593887329,grad_norm: 0.9999991895492795, iteration: 71293
loss: 0.9995182752609253,grad_norm: 0.9999990587390926, iteration: 71294
loss: 1.0267714262008667,grad_norm: 0.999999442452248, iteration: 71295
loss: 0.978642463684082,grad_norm: 0.9999990392644862, iteration: 71296
loss: 0.9899687170982361,grad_norm: 0.9999991905595301, iteration: 71297
loss: 1.012028694152832,grad_norm: 0.9919284703882675, iteration: 71298
loss: 0.9702157378196716,grad_norm: 0.9999998643848498, iteration: 71299
loss: 1.0034053325653076,grad_norm: 0.9999998924710718, iteration: 71300
loss: 1.0098706483840942,grad_norm: 0.9999991410173229, iteration: 71301
loss: 1.0113195180892944,grad_norm: 0.9999992954623903, iteration: 71302
loss: 1.0144809484481812,grad_norm: 0.9218613646199856, iteration: 71303
loss: 1.0333553552627563,grad_norm: 0.9999992510740524, iteration: 71304
loss: 1.0302150249481201,grad_norm: 0.9322223637945367, iteration: 71305
loss: 1.0190215110778809,grad_norm: 0.8305801667805723, iteration: 71306
loss: 0.9993056654930115,grad_norm: 0.9186719282930466, iteration: 71307
loss: 0.9694194793701172,grad_norm: 0.9785608780371593, iteration: 71308
loss: 1.0190666913986206,grad_norm: 0.9999990619070653, iteration: 71309
loss: 0.9882520437240601,grad_norm: 0.9219166353152901, iteration: 71310
loss: 1.0055185556411743,grad_norm: 0.9999992770309463, iteration: 71311
loss: 0.9917782545089722,grad_norm: 0.9999991494883862, iteration: 71312
loss: 1.006184697151184,grad_norm: 0.9999992505722914, iteration: 71313
loss: 0.9910047650337219,grad_norm: 0.8903020325516574, iteration: 71314
loss: 1.0308165550231934,grad_norm: 0.9122797514777907, iteration: 71315
loss: 0.9942393898963928,grad_norm: 0.999999065005102, iteration: 71316
loss: 1.0173273086547852,grad_norm: 0.9486286731395799, iteration: 71317
loss: 1.0059078931808472,grad_norm: 0.8238425689464128, iteration: 71318
loss: 1.0381463766098022,grad_norm: 0.999999008991518, iteration: 71319
loss: 1.036131739616394,grad_norm: 0.999999718075946, iteration: 71320
loss: 0.9937052130699158,grad_norm: 0.9984249667061607, iteration: 71321
loss: 0.9973564147949219,grad_norm: 0.9496105310529174, iteration: 71322
loss: 0.9920969009399414,grad_norm: 0.8548971082387583, iteration: 71323
loss: 0.9871786236763,grad_norm: 0.9999992669326891, iteration: 71324
loss: 1.0077807903289795,grad_norm: 0.9576577556804573, iteration: 71325
loss: 1.0246762037277222,grad_norm: 0.993070262716293, iteration: 71326
loss: 1.0291303396224976,grad_norm: 0.968475910691907, iteration: 71327
loss: 0.9693660140037537,grad_norm: 0.914958490872851, iteration: 71328
loss: 0.9840928912162781,grad_norm: 0.8723179097313254, iteration: 71329
loss: 0.9307496547698975,grad_norm: 0.9999990504747513, iteration: 71330
loss: 0.9845768213272095,grad_norm: 0.9461716611247056, iteration: 71331
loss: 0.9971839785575867,grad_norm: 0.9832948634978673, iteration: 71332
loss: 0.9718917012214661,grad_norm: 0.999999204727782, iteration: 71333
loss: 0.9644395112991333,grad_norm: 0.9999990714637292, iteration: 71334
loss: 1.0001330375671387,grad_norm: 0.9999991639635811, iteration: 71335
loss: 1.0081028938293457,grad_norm: 0.9999989525215158, iteration: 71336
loss: 0.9691627025604248,grad_norm: 0.9818032039182929, iteration: 71337
loss: 1.0102535486221313,grad_norm: 0.9868583396218857, iteration: 71338
loss: 1.005484700202942,grad_norm: 0.9999989570961222, iteration: 71339
loss: 1.0250768661499023,grad_norm: 0.84265699751036, iteration: 71340
loss: 1.014971375465393,grad_norm: 0.9999991252953842, iteration: 71341
loss: 0.9967238306999207,grad_norm: 0.96827166677302, iteration: 71342
loss: 0.9766144156455994,grad_norm: 0.9737206308025858, iteration: 71343
loss: 1.012299656867981,grad_norm: 0.9732288042434595, iteration: 71344
loss: 0.9840261340141296,grad_norm: 0.9675654973683449, iteration: 71345
loss: 1.0275144577026367,grad_norm: 0.9879840135542287, iteration: 71346
loss: 1.0298609733581543,grad_norm: 0.9999994313223969, iteration: 71347
loss: 0.9926422834396362,grad_norm: 0.9724215009847208, iteration: 71348
loss: 0.9955596923828125,grad_norm: 0.9463726268051724, iteration: 71349
loss: 1.0171294212341309,grad_norm: 0.9522057098468786, iteration: 71350
loss: 0.9920918345451355,grad_norm: 0.8685597230745998, iteration: 71351
loss: 1.0347503423690796,grad_norm: 0.9999990040138804, iteration: 71352
loss: 0.9853442907333374,grad_norm: 0.9902321734795488, iteration: 71353
loss: 1.0230892896652222,grad_norm: 0.9999991168963792, iteration: 71354
loss: 0.975380003452301,grad_norm: 0.9999991030526666, iteration: 71355
loss: 1.0050197839736938,grad_norm: 0.9999991299440747, iteration: 71356
loss: 1.0116006135940552,grad_norm: 0.9999990433867285, iteration: 71357
loss: 0.9665178656578064,grad_norm: 0.9999989720740342, iteration: 71358
loss: 0.982722818851471,grad_norm: 0.8341738797943042, iteration: 71359
loss: 0.9626450538635254,grad_norm: 0.9396539255478883, iteration: 71360
loss: 0.9647188782691956,grad_norm: 0.9999991532990248, iteration: 71361
loss: 0.9826987981796265,grad_norm: 0.8473390895955456, iteration: 71362
loss: 1.012712001800537,grad_norm: 0.9999990811873759, iteration: 71363
loss: 1.0351039171218872,grad_norm: 0.9999991807658228, iteration: 71364
loss: 1.0237178802490234,grad_norm: 0.9609977932781824, iteration: 71365
loss: 1.0164368152618408,grad_norm: 0.9999992134509521, iteration: 71366
loss: 0.9676824808120728,grad_norm: 0.9999990308740481, iteration: 71367
loss: 0.9836757183074951,grad_norm: 0.9999991780657516, iteration: 71368
loss: 1.0530784130096436,grad_norm: 0.9687805585807729, iteration: 71369
loss: 1.0087101459503174,grad_norm: 0.9999991625141836, iteration: 71370
loss: 1.0002020597457886,grad_norm: 0.9999991465030892, iteration: 71371
loss: 0.966013491153717,grad_norm: 0.9999991656601054, iteration: 71372
loss: 0.9920350313186646,grad_norm: 0.9999992905380772, iteration: 71373
loss: 0.9946327805519104,grad_norm: 0.9999988538864056, iteration: 71374
loss: 0.9841476082801819,grad_norm: 0.9999992780632394, iteration: 71375
loss: 1.0312848091125488,grad_norm: 0.9999992137409379, iteration: 71376
loss: 1.0102561712265015,grad_norm: 0.9671795971012196, iteration: 71377
loss: 0.9718458652496338,grad_norm: 0.9999990334177381, iteration: 71378
loss: 0.9523544311523438,grad_norm: 0.9999991605717642, iteration: 71379
loss: 0.9555802345275879,grad_norm: 0.9999991225597135, iteration: 71380
loss: 0.9775376915931702,grad_norm: 0.9999991576884344, iteration: 71381
loss: 0.9706197381019592,grad_norm: 0.9999989629726501, iteration: 71382
loss: 1.0159319639205933,grad_norm: 0.7725770136310155, iteration: 71383
loss: 1.0081075429916382,grad_norm: 0.9810671642373, iteration: 71384
loss: 0.9744012355804443,grad_norm: 0.99964092646471, iteration: 71385
loss: 1.0089178085327148,grad_norm: 0.999999142161538, iteration: 71386
loss: 0.9752299785614014,grad_norm: 0.889289467148395, iteration: 71387
loss: 1.0348774194717407,grad_norm: 0.9999990599770545, iteration: 71388
loss: 1.007453441619873,grad_norm: 0.9999991912529335, iteration: 71389
loss: 0.9928762316703796,grad_norm: 0.9999990453795673, iteration: 71390
loss: 1.0243089199066162,grad_norm: 0.9999993387848644, iteration: 71391
loss: 0.9693910479545593,grad_norm: 0.999999080252322, iteration: 71392
loss: 0.9715109467506409,grad_norm: 0.9999992699552807, iteration: 71393
loss: 1.0132192373275757,grad_norm: 0.9800638890243929, iteration: 71394
loss: 1.0329551696777344,grad_norm: 0.9999990120829736, iteration: 71395
loss: 0.9883263111114502,grad_norm: 0.9999990320806283, iteration: 71396
loss: 1.0065224170684814,grad_norm: 0.9999990209849976, iteration: 71397
loss: 1.0158331394195557,grad_norm: 0.96495700638752, iteration: 71398
loss: 1.0135139226913452,grad_norm: 0.9999991019172885, iteration: 71399
loss: 0.9639198780059814,grad_norm: 0.9999991987580789, iteration: 71400
loss: 0.9679867625236511,grad_norm: 0.9999989564690019, iteration: 71401
loss: 0.999602198600769,grad_norm: 0.9999991383891615, iteration: 71402
loss: 1.0029408931732178,grad_norm: 0.9281074922867749, iteration: 71403
loss: 1.0255911350250244,grad_norm: 0.982848026561765, iteration: 71404
loss: 1.0544517040252686,grad_norm: 0.9656259580458625, iteration: 71405
loss: 1.008255958557129,grad_norm: 0.9846409071854894, iteration: 71406
loss: 0.9966322183609009,grad_norm: 0.9955125434617064, iteration: 71407
loss: 1.0027720928192139,grad_norm: 0.9134178046379825, iteration: 71408
loss: 1.0306874513626099,grad_norm: 0.923214612962369, iteration: 71409
loss: 0.9722118973731995,grad_norm: 0.9999991497525794, iteration: 71410
loss: 0.9927151799201965,grad_norm: 0.9999989997374638, iteration: 71411
loss: 0.9734365940093994,grad_norm: 0.9999990383317731, iteration: 71412
loss: 0.9982616305351257,grad_norm: 0.9999991036783856, iteration: 71413
loss: 1.043961524963379,grad_norm: 0.9999989624333053, iteration: 71414
loss: 0.9813991189002991,grad_norm: 0.9999991539464178, iteration: 71415
loss: 0.9763861894607544,grad_norm: 0.9999990779377524, iteration: 71416
loss: 1.0014430284500122,grad_norm: 0.999999115984601, iteration: 71417
loss: 0.9810920357704163,grad_norm: 0.9999991581881991, iteration: 71418
loss: 1.0113282203674316,grad_norm: 0.9877790806875201, iteration: 71419
loss: 1.029911756515503,grad_norm: 0.9970234676080598, iteration: 71420
loss: 1.0323920249938965,grad_norm: 0.9202922708306209, iteration: 71421
loss: 0.949367880821228,grad_norm: 0.9679961818225863, iteration: 71422
loss: 1.038838505744934,grad_norm: 0.9999992328509192, iteration: 71423
loss: 0.9989079236984253,grad_norm: 0.9999991932281165, iteration: 71424
loss: 1.0008690357208252,grad_norm: 0.8074561762833685, iteration: 71425
loss: 1.0165773630142212,grad_norm: 0.9488300778828473, iteration: 71426
loss: 0.9922787547111511,grad_norm: 0.999999116731992, iteration: 71427
loss: 0.9969566464424133,grad_norm: 0.8772135784470273, iteration: 71428
loss: 0.9488785862922668,grad_norm: 0.9999990868299009, iteration: 71429
loss: 1.001842737197876,grad_norm: 0.9999991487472139, iteration: 71430
loss: 0.9833832383155823,grad_norm: 0.9866145929833992, iteration: 71431
loss: 0.9963600039482117,grad_norm: 0.999999355058322, iteration: 71432
loss: 1.0075258016586304,grad_norm: 0.999999035723598, iteration: 71433
loss: 0.9665459990501404,grad_norm: 0.9162304797892563, iteration: 71434
loss: 0.9975413084030151,grad_norm: 0.8650181130028746, iteration: 71435
loss: 0.9802942872047424,grad_norm: 0.8572265988055807, iteration: 71436
loss: 1.0201269388198853,grad_norm: 0.9999990226014167, iteration: 71437
loss: 0.9762596487998962,grad_norm: 0.9391868465492997, iteration: 71438
loss: 0.9842900633811951,grad_norm: 0.9100103105653021, iteration: 71439
loss: 0.995746374130249,grad_norm: 0.9999991810259521, iteration: 71440
loss: 1.0035039186477661,grad_norm: 0.9999990365477051, iteration: 71441
loss: 1.003351092338562,grad_norm: 0.9999991396143691, iteration: 71442
loss: 1.0160510540008545,grad_norm: 0.9999991229785511, iteration: 71443
loss: 1.008170485496521,grad_norm: 0.9999991146395176, iteration: 71444
loss: 1.0019707679748535,grad_norm: 0.9954527049839054, iteration: 71445
loss: 1.0102096796035767,grad_norm: 0.9999991780523603, iteration: 71446
loss: 1.017526388168335,grad_norm: 0.9999990402498578, iteration: 71447
loss: 0.9735161662101746,grad_norm: 0.9798851633679321, iteration: 71448
loss: 1.0254826545715332,grad_norm: 0.8219585995105028, iteration: 71449
loss: 1.0040762424468994,grad_norm: 0.9870200134513598, iteration: 71450
loss: 1.0595744848251343,grad_norm: 0.9999993599986889, iteration: 71451
loss: 1.0530116558074951,grad_norm: 0.9999991311452274, iteration: 71452
loss: 1.0339428186416626,grad_norm: 0.9999991688621394, iteration: 71453
loss: 0.9788074493408203,grad_norm: 0.9730364774435613, iteration: 71454
loss: 1.0353914499282837,grad_norm: 0.9796126812833972, iteration: 71455
loss: 1.015975832939148,grad_norm: 0.9999992606929278, iteration: 71456
loss: 0.9905781149864197,grad_norm: 0.9999992568250646, iteration: 71457
loss: 0.9981456995010376,grad_norm: 0.9999990008450199, iteration: 71458
loss: 0.994890570640564,grad_norm: 0.9999991231847075, iteration: 71459
loss: 0.9858109951019287,grad_norm: 0.8239871695703069, iteration: 71460
loss: 0.9907069206237793,grad_norm: 0.9999992875493374, iteration: 71461
loss: 0.9653472304344177,grad_norm: 0.9301987400836761, iteration: 71462
loss: 0.9581094980239868,grad_norm: 0.9360105254916409, iteration: 71463
loss: 1.0129170417785645,grad_norm: 0.9135508174752888, iteration: 71464
loss: 1.002351999282837,grad_norm: 0.9999991296130031, iteration: 71465
loss: 1.0057066679000854,grad_norm: 0.999999094671353, iteration: 71466
loss: 0.9858942627906799,grad_norm: 0.9757271918965977, iteration: 71467
loss: 1.031095266342163,grad_norm: 0.9454209703502635, iteration: 71468
loss: 0.9760836958885193,grad_norm: 0.9999990496869415, iteration: 71469
loss: 0.9941713809967041,grad_norm: 0.9993624632879594, iteration: 71470
loss: 1.0315937995910645,grad_norm: 0.9687654763261249, iteration: 71471
loss: 1.0168088674545288,grad_norm: 0.9456367664091444, iteration: 71472
loss: 0.9952041506767273,grad_norm: 0.9999991399001972, iteration: 71473
loss: 0.9854834079742432,grad_norm: 0.9999990263195231, iteration: 71474
loss: 1.0085211992263794,grad_norm: 0.9999990219359374, iteration: 71475
loss: 0.9935275316238403,grad_norm: 0.9275855118095636, iteration: 71476
loss: 0.993550717830658,grad_norm: 0.9416880733254489, iteration: 71477
loss: 0.9699191451072693,grad_norm: 0.9999990229264496, iteration: 71478
loss: 1.005399465560913,grad_norm: 0.9999990832521097, iteration: 71479
loss: 0.9798949360847473,grad_norm: 0.9999992434471029, iteration: 71480
loss: 0.9986221194267273,grad_norm: 0.9568469954671802, iteration: 71481
loss: 0.9884492754936218,grad_norm: 0.9999990367607678, iteration: 71482
loss: 1.0236735343933105,grad_norm: 0.9999990537314257, iteration: 71483
loss: 1.010024905204773,grad_norm: 0.9999992353174613, iteration: 71484
loss: 1.0025044679641724,grad_norm: 0.9999991074770747, iteration: 71485
loss: 1.0414329767227173,grad_norm: 0.9366314700285693, iteration: 71486
loss: 1.009795069694519,grad_norm: 0.9999992212394487, iteration: 71487
loss: 1.008339762687683,grad_norm: 0.9391633569571431, iteration: 71488
loss: 0.9955406785011292,grad_norm: 0.9999989952977383, iteration: 71489
loss: 0.9933160543441772,grad_norm: 0.9999992598077421, iteration: 71490
loss: 0.9911952614784241,grad_norm: 0.9560595633461678, iteration: 71491
loss: 0.990754246711731,grad_norm: 0.8859030021323709, iteration: 71492
loss: 1.0262678861618042,grad_norm: 0.9093734262552297, iteration: 71493
loss: 0.9997462630271912,grad_norm: 0.9427898219956883, iteration: 71494
loss: 1.0151517391204834,grad_norm: 0.8447768917986214, iteration: 71495
loss: 0.9969261884689331,grad_norm: 0.9999993005066765, iteration: 71496
loss: 1.0020122528076172,grad_norm: 0.9999991678451017, iteration: 71497
loss: 1.0108182430267334,grad_norm: 0.9934670941750148, iteration: 71498
loss: 1.0446875095367432,grad_norm: 0.9999991303309649, iteration: 71499
loss: 1.0180484056472778,grad_norm: 0.9999991114250089, iteration: 71500
loss: 0.9940783977508545,grad_norm: 0.9999990227179393, iteration: 71501
loss: 1.0159211158752441,grad_norm: 0.9999992953224828, iteration: 71502
loss: 0.9883427023887634,grad_norm: 0.9999991369107772, iteration: 71503
loss: 0.9999602437019348,grad_norm: 0.8903910544532098, iteration: 71504
loss: 1.005640983581543,grad_norm: 0.999999172872766, iteration: 71505
loss: 1.0234571695327759,grad_norm: 0.9999988878752272, iteration: 71506
loss: 0.9999580383300781,grad_norm: 0.9999990461827991, iteration: 71507
loss: 1.0169305801391602,grad_norm: 0.959714232510876, iteration: 71508
loss: 1.007411003112793,grad_norm: 0.9999991988227978, iteration: 71509
loss: 1.0202957391738892,grad_norm: 0.999999081943018, iteration: 71510
loss: 1.0078364610671997,grad_norm: 0.9647456059586986, iteration: 71511
loss: 1.0320149660110474,grad_norm: 0.9999997085447837, iteration: 71512
loss: 1.019047498703003,grad_norm: 0.9999992086352991, iteration: 71513
loss: 0.9784451723098755,grad_norm: 0.7988268441565757, iteration: 71514
loss: 1.0016807317733765,grad_norm: 0.9999990945215446, iteration: 71515
loss: 1.0126985311508179,grad_norm: 0.9999991044893468, iteration: 71516
loss: 0.9789685010910034,grad_norm: 0.9524157420191042, iteration: 71517
loss: 1.0339895486831665,grad_norm: 0.9999993207665828, iteration: 71518
loss: 0.9838023781776428,grad_norm: 0.9999991466847503, iteration: 71519
loss: 0.9910246729850769,grad_norm: 0.9999991873427395, iteration: 71520
loss: 0.9870572090148926,grad_norm: 0.9999991163068949, iteration: 71521
loss: 0.9961772561073303,grad_norm: 0.9999991579001339, iteration: 71522
loss: 1.015653133392334,grad_norm: 0.9423284930159508, iteration: 71523
loss: 1.023215889930725,grad_norm: 0.9999992431109906, iteration: 71524
loss: 1.025945782661438,grad_norm: 0.9566671615771385, iteration: 71525
loss: 1.0177268981933594,grad_norm: 0.9999991567731809, iteration: 71526
loss: 1.0048292875289917,grad_norm: 0.9119458052521481, iteration: 71527
loss: 0.9798099994659424,grad_norm: 0.9999991208848167, iteration: 71528
loss: 1.0095783472061157,grad_norm: 0.9336304677148085, iteration: 71529
loss: 0.9887921214103699,grad_norm: 0.860659008071515, iteration: 71530
loss: 1.0245414972305298,grad_norm: 0.9999991862446158, iteration: 71531
loss: 0.9902053475379944,grad_norm: 0.9999992149496209, iteration: 71532
loss: 1.00245201587677,grad_norm: 0.99999914533887, iteration: 71533
loss: 0.9949959516525269,grad_norm: 0.9313346584163903, iteration: 71534
loss: 0.9991133809089661,grad_norm: 0.9999993119502643, iteration: 71535
loss: 1.0211317539215088,grad_norm: 0.9999990695657872, iteration: 71536
loss: 1.0030337572097778,grad_norm: 0.999999311152208, iteration: 71537
loss: 0.9354371428489685,grad_norm: 0.9576492257418044, iteration: 71538
loss: 0.9926891326904297,grad_norm: 0.9999990643922951, iteration: 71539
loss: 1.002484679222107,grad_norm: 0.999999055975792, iteration: 71540
loss: 1.0214612483978271,grad_norm: 0.9483081560032238, iteration: 71541
loss: 0.9663863182067871,grad_norm: 0.9725674800701892, iteration: 71542
loss: 0.9944771528244019,grad_norm: 0.9999989467470465, iteration: 71543
loss: 1.0581707954406738,grad_norm: 0.9999994259312723, iteration: 71544
loss: 0.970317542552948,grad_norm: 0.9999991081346519, iteration: 71545
loss: 0.9692381024360657,grad_norm: 0.9508588808578918, iteration: 71546
loss: 1.0336782932281494,grad_norm: 0.988466768070863, iteration: 71547
loss: 0.9818345308303833,grad_norm: 0.9999992856563972, iteration: 71548
loss: 0.965709388256073,grad_norm: 0.9999991286910687, iteration: 71549
loss: 0.9967520833015442,grad_norm: 0.9999992344439905, iteration: 71550
loss: 1.0070723295211792,grad_norm: 0.9999991758208063, iteration: 71551
loss: 1.0374574661254883,grad_norm: 0.9456686852723795, iteration: 71552
loss: 0.9994556307792664,grad_norm: 0.9999990431492459, iteration: 71553
loss: 0.9695962071418762,grad_norm: 0.9826343234512533, iteration: 71554
loss: 1.0300570726394653,grad_norm: 0.8843469411051282, iteration: 71555
loss: 0.9856237769126892,grad_norm: 0.9999989489246962, iteration: 71556
loss: 0.9885654449462891,grad_norm: 0.9999993085328803, iteration: 71557
loss: 0.9848716855049133,grad_norm: 0.9999990870247993, iteration: 71558
loss: 0.9734988808631897,grad_norm: 0.9682498327708603, iteration: 71559
loss: 0.9492879509925842,grad_norm: 0.9844002974329479, iteration: 71560
loss: 1.005682349205017,grad_norm: 0.9201549775659246, iteration: 71561
loss: 1.00746488571167,grad_norm: 0.9999990899603911, iteration: 71562
loss: 1.036434292793274,grad_norm: 0.9999990381049332, iteration: 71563
loss: 0.9630488157272339,grad_norm: 0.9705161866495841, iteration: 71564
loss: 1.024411678314209,grad_norm: 0.9999991343637832, iteration: 71565
loss: 0.9882385730743408,grad_norm: 0.8837406589731658, iteration: 71566
loss: 1.0007827281951904,grad_norm: 0.9999995887048828, iteration: 71567
loss: 1.0016121864318848,grad_norm: 0.9999990520392072, iteration: 71568
loss: 0.999739944934845,grad_norm: 0.9876395374553043, iteration: 71569
loss: 0.9796638488769531,grad_norm: 0.9999990504278177, iteration: 71570
loss: 0.9709349274635315,grad_norm: 0.9278971055743049, iteration: 71571
loss: 1.12286376953125,grad_norm: 0.9999991890961437, iteration: 71572
loss: 1.0243991613388062,grad_norm: 0.999999136560261, iteration: 71573
loss: 1.0312169790267944,grad_norm: 0.9592347127466923, iteration: 71574
loss: 1.0046247243881226,grad_norm: 0.8366955365638753, iteration: 71575
loss: 1.005253553390503,grad_norm: 0.9574681223344246, iteration: 71576
loss: 0.9901525378227234,grad_norm: 0.9028313486002933, iteration: 71577
loss: 0.9588028192520142,grad_norm: 0.9160150740996174, iteration: 71578
loss: 1.0039260387420654,grad_norm: 0.9999991623169842, iteration: 71579
loss: 0.9845132231712341,grad_norm: 0.9681823877391651, iteration: 71580
loss: 1.0160422325134277,grad_norm: 0.9657496724689794, iteration: 71581
loss: 1.0057320594787598,grad_norm: 0.9999991344231443, iteration: 71582
loss: 1.0297284126281738,grad_norm: 0.9163007206103422, iteration: 71583
loss: 0.9606236815452576,grad_norm: 0.9435394360233271, iteration: 71584
loss: 1.0000041723251343,grad_norm: 0.9999990609288799, iteration: 71585
loss: 0.9744316339492798,grad_norm: 0.9999992095073125, iteration: 71586
loss: 0.9639726877212524,grad_norm: 0.9256101761982124, iteration: 71587
loss: 1.0660275220870972,grad_norm: 0.9999990984229483, iteration: 71588
loss: 0.9923129081726074,grad_norm: 0.999999376552559, iteration: 71589
loss: 1.0284976959228516,grad_norm: 0.9999991068663125, iteration: 71590
loss: 0.9596067070960999,grad_norm: 0.9999992703573931, iteration: 71591
loss: 1.0299134254455566,grad_norm: 0.9999990931436324, iteration: 71592
loss: 1.0138773918151855,grad_norm: 0.9999990341619568, iteration: 71593
loss: 0.9780323505401611,grad_norm: 0.9999990575735096, iteration: 71594
loss: 0.9676707983016968,grad_norm: 0.9999991391399893, iteration: 71595
loss: 0.9636169075965881,grad_norm: 0.8982323726228381, iteration: 71596
loss: 0.9841702580451965,grad_norm: 0.9837627413110442, iteration: 71597
loss: 1.0104305744171143,grad_norm: 0.966816130135789, iteration: 71598
loss: 0.9861418008804321,grad_norm: 0.8627567532069249, iteration: 71599
loss: 1.0210793018341064,grad_norm: 0.9999991334260361, iteration: 71600
loss: 1.030180811882019,grad_norm: 0.9999990557984998, iteration: 71601
loss: 0.9799919724464417,grad_norm: 0.9999990908551566, iteration: 71602
loss: 0.9590039253234863,grad_norm: 0.9704223556732061, iteration: 71603
loss: 0.9843664765357971,grad_norm: 0.9999989560840611, iteration: 71604
loss: 0.9973626732826233,grad_norm: 0.896833898434163, iteration: 71605
loss: 0.9327512383460999,grad_norm: 0.9469364297535088, iteration: 71606
loss: 0.9470387101173401,grad_norm: 0.9827823136259694, iteration: 71607
loss: 0.9940013289451599,grad_norm: 0.8507176976371799, iteration: 71608
loss: 1.0117557048797607,grad_norm: 0.9999991239916618, iteration: 71609
loss: 1.021580696105957,grad_norm: 0.9999990483878749, iteration: 71610
loss: 1.0358893871307373,grad_norm: 0.9999990867100211, iteration: 71611
loss: 0.9847063422203064,grad_norm: 0.893088797592491, iteration: 71612
loss: 1.004114031791687,grad_norm: 0.9999991994488391, iteration: 71613
loss: 0.9987052083015442,grad_norm: 0.9999991160420639, iteration: 71614
loss: 0.9604635238647461,grad_norm: 0.9999990668046367, iteration: 71615
loss: 0.9980446100234985,grad_norm: 0.9999992193709752, iteration: 71616
loss: 0.9882751703262329,grad_norm: 0.999999034228942, iteration: 71617
loss: 1.0114936828613281,grad_norm: 0.880651778259916, iteration: 71618
loss: 0.936800479888916,grad_norm: 0.9999992218464588, iteration: 71619
loss: 1.0671467781066895,grad_norm: 0.9999991285867701, iteration: 71620
loss: 0.9634827375411987,grad_norm: 0.9999990922629332, iteration: 71621
loss: 1.0037953853607178,grad_norm: 0.9999991003687977, iteration: 71622
loss: 0.9994866847991943,grad_norm: 0.9999991585899939, iteration: 71623
loss: 1.0113551616668701,grad_norm: 0.9999990975222042, iteration: 71624
loss: 1.006205439567566,grad_norm: 0.9999991819964041, iteration: 71625
loss: 1.0335758924484253,grad_norm: 0.9999991694435272, iteration: 71626
loss: 0.9896973371505737,grad_norm: 0.9999990644176772, iteration: 71627
loss: 0.9904660582542419,grad_norm: 0.9189021180198023, iteration: 71628
loss: 0.9812198877334595,grad_norm: 0.9999991311808688, iteration: 71629
loss: 1.0035322904586792,grad_norm: 0.9999992015772584, iteration: 71630
loss: 0.9774001836776733,grad_norm: 0.9447987460511377, iteration: 71631
loss: 0.9587292671203613,grad_norm: 0.9999990288273182, iteration: 71632
loss: 1.0260088443756104,grad_norm: 0.9363101087375604, iteration: 71633
loss: 0.9969142079353333,grad_norm: 0.9608700234045406, iteration: 71634
loss: 0.9987372756004333,grad_norm: 0.9301989784434581, iteration: 71635
loss: 1.0246350765228271,grad_norm: 0.9999989912508919, iteration: 71636
loss: 0.9719474911689758,grad_norm: 0.9999990512847855, iteration: 71637
loss: 1.003860354423523,grad_norm: 0.9999991403416186, iteration: 71638
loss: 0.9848785400390625,grad_norm: 0.9999991159075611, iteration: 71639
loss: 1.0158047676086426,grad_norm: 0.99999907603281, iteration: 71640
loss: 1.0048408508300781,grad_norm: 0.9999990993772887, iteration: 71641
loss: 1.0370337963104248,grad_norm: 0.9999994114140156, iteration: 71642
loss: 0.9963186979293823,grad_norm: 0.998729359459354, iteration: 71643
loss: 0.969487726688385,grad_norm: 0.9999991311793398, iteration: 71644
loss: 0.974870502948761,grad_norm: 0.9999992116457603, iteration: 71645
loss: 0.9767856597900391,grad_norm: 0.9999991378365225, iteration: 71646
loss: 0.9988332986831665,grad_norm: 0.8270245420738014, iteration: 71647
loss: 0.9812955856323242,grad_norm: 0.9999992157297436, iteration: 71648
loss: 0.9809898734092712,grad_norm: 0.999998991772637, iteration: 71649
loss: 0.9975862503051758,grad_norm: 0.9999991466119758, iteration: 71650
loss: 1.0346293449401855,grad_norm: 0.9999990860735003, iteration: 71651
loss: 0.9730486869812012,grad_norm: 0.9999991302325602, iteration: 71652
loss: 1.0050934553146362,grad_norm: 0.9246661792581238, iteration: 71653
loss: 0.9992733001708984,grad_norm: 0.9411059435165299, iteration: 71654
loss: 1.050438642501831,grad_norm: 0.9999991026655085, iteration: 71655
loss: 0.987963080406189,grad_norm: 0.9210119797107091, iteration: 71656
loss: 1.0193486213684082,grad_norm: 0.9999990932652356, iteration: 71657
loss: 1.0285847187042236,grad_norm: 0.9999990391837684, iteration: 71658
loss: 0.9985530376434326,grad_norm: 0.99999915534477, iteration: 71659
loss: 1.0317853689193726,grad_norm: 0.9999991483644666, iteration: 71660
loss: 1.0083844661712646,grad_norm: 0.9936167046976268, iteration: 71661
loss: 0.9862963557243347,grad_norm: 0.9277421575699153, iteration: 71662
loss: 0.9905168414115906,grad_norm: 0.9999989453950351, iteration: 71663
loss: 0.9410393834114075,grad_norm: 0.9826032912069531, iteration: 71664
loss: 1.0124454498291016,grad_norm: 0.999999708669694, iteration: 71665
loss: 0.9591877460479736,grad_norm: 0.999999285685153, iteration: 71666
loss: 1.0025173425674438,grad_norm: 0.99999917849469, iteration: 71667
loss: 1.0371067523956299,grad_norm: 0.9999999313360376, iteration: 71668
loss: 1.0021384954452515,grad_norm: 0.8933899048711471, iteration: 71669
loss: 0.9827239513397217,grad_norm: 0.9999991203943718, iteration: 71670
loss: 1.001079797744751,grad_norm: 0.9999991780376261, iteration: 71671
loss: 1.0296341180801392,grad_norm: 0.9006279651567416, iteration: 71672
loss: 0.9867355227470398,grad_norm: 0.9579989448878939, iteration: 71673
loss: 0.9536013007164001,grad_norm: 0.9472359694695852, iteration: 71674
loss: 0.9926903247833252,grad_norm: 0.9999990826797335, iteration: 71675
loss: 1.0452224016189575,grad_norm: 0.9618172786959828, iteration: 71676
loss: 0.9667672514915466,grad_norm: 0.9999990987307529, iteration: 71677
loss: 1.0112708806991577,grad_norm: 0.8940316869302743, iteration: 71678
loss: 1.0304720401763916,grad_norm: 0.8187401407830628, iteration: 71679
loss: 0.9946314692497253,grad_norm: 0.9999992939191679, iteration: 71680
loss: 0.9935994148254395,grad_norm: 0.9117514526773307, iteration: 71681
loss: 0.9829546213150024,grad_norm: 0.9999992225637465, iteration: 71682
loss: 1.0418239831924438,grad_norm: 0.9447533071477019, iteration: 71683
loss: 1.0217124223709106,grad_norm: 0.9999991959381533, iteration: 71684
loss: 0.9590185284614563,grad_norm: 0.9999993549670264, iteration: 71685
loss: 1.0053884983062744,grad_norm: 0.7524956135669574, iteration: 71686
loss: 0.9976203441619873,grad_norm: 0.999999218863399, iteration: 71687
loss: 0.9934857487678528,grad_norm: 0.9999996423423266, iteration: 71688
loss: 0.9793127775192261,grad_norm: 0.926700855343698, iteration: 71689
loss: 0.9780381321907043,grad_norm: 0.994872847785579, iteration: 71690
loss: 0.9619871377944946,grad_norm: 0.9999990508314879, iteration: 71691
loss: 1.011121392250061,grad_norm: 0.9999991361834993, iteration: 71692
loss: 0.9865883588790894,grad_norm: 0.9999992003800459, iteration: 71693
loss: 0.9996611475944519,grad_norm: 0.9059086516778149, iteration: 71694
loss: 1.0047626495361328,grad_norm: 0.9999992912268947, iteration: 71695
loss: 1.0339587926864624,grad_norm: 0.999999177567403, iteration: 71696
loss: 0.9818283319473267,grad_norm: 0.9999990651389078, iteration: 71697
loss: 0.993552565574646,grad_norm: 0.9785277688805879, iteration: 71698
loss: 0.9698500633239746,grad_norm: 0.9616467682220338, iteration: 71699
loss: 1.0089083909988403,grad_norm: 0.9999990947682608, iteration: 71700
loss: 1.0014171600341797,grad_norm: 0.999999078327011, iteration: 71701
loss: 1.0357879400253296,grad_norm: 0.9999992527771672, iteration: 71702
loss: 0.9878776669502258,grad_norm: 0.9777997971654206, iteration: 71703
loss: 0.9979978799819946,grad_norm: 0.9675612725338825, iteration: 71704
loss: 1.0055373907089233,grad_norm: 0.9999990019130108, iteration: 71705
loss: 1.0118895769119263,grad_norm: 0.9999991447260844, iteration: 71706
loss: 1.0033926963806152,grad_norm: 0.9999991120826784, iteration: 71707
loss: 1.012622356414795,grad_norm: 0.9999995610772737, iteration: 71708
loss: 1.0037518739700317,grad_norm: 0.9999990767947946, iteration: 71709
loss: 0.9958148002624512,grad_norm: 0.9169998104417643, iteration: 71710
loss: 1.0015547275543213,grad_norm: 0.999999064115744, iteration: 71711
loss: 1.0165332555770874,grad_norm: 0.9999990298903896, iteration: 71712
loss: 0.9946591854095459,grad_norm: 0.9571247107683386, iteration: 71713
loss: 1.0640082359313965,grad_norm: 0.9999993792938118, iteration: 71714
loss: 1.0149706602096558,grad_norm: 0.8357580714714664, iteration: 71715
loss: 1.0314571857452393,grad_norm: 0.9216756920436132, iteration: 71716
loss: 1.003700613975525,grad_norm: 0.9999995035683967, iteration: 71717
loss: 1.0142666101455688,grad_norm: 0.9999989387257212, iteration: 71718
loss: 0.9885183572769165,grad_norm: 0.9999991984810241, iteration: 71719
loss: 0.977901041507721,grad_norm: 0.999999220344345, iteration: 71720
loss: 1.0184792280197144,grad_norm: 0.9748504037272512, iteration: 71721
loss: 1.0162391662597656,grad_norm: 0.9546500414826438, iteration: 71722
loss: 1.003564476966858,grad_norm: 0.9999991679218977, iteration: 71723
loss: 0.9931841492652893,grad_norm: 0.9999991340556936, iteration: 71724
loss: 1.0545750856399536,grad_norm: 0.999999768335852, iteration: 71725
loss: 0.9804627299308777,grad_norm: 0.9441548109692459, iteration: 71726
loss: 0.989017903804779,grad_norm: 0.9999991738511607, iteration: 71727
loss: 0.9825114011764526,grad_norm: 0.9987486176356443, iteration: 71728
loss: 1.0167019367218018,grad_norm: 0.9999991446618597, iteration: 71729
loss: 1.0348531007766724,grad_norm: 0.9999988708763581, iteration: 71730
loss: 1.011770486831665,grad_norm: 0.9999991147830598, iteration: 71731
loss: 1.019428014755249,grad_norm: 0.9999992124030892, iteration: 71732
loss: 1.020174503326416,grad_norm: 0.99999915629896, iteration: 71733
loss: 0.9866330623626709,grad_norm: 0.9385558400907118, iteration: 71734
loss: 1.0201812982559204,grad_norm: 0.9999990078274035, iteration: 71735
loss: 0.9951397776603699,grad_norm: 0.9999990312382085, iteration: 71736
loss: 1.0095404386520386,grad_norm: 0.9999991485202253, iteration: 71737
loss: 0.9577464461326599,grad_norm: 0.9999992303136473, iteration: 71738
loss: 1.040623426437378,grad_norm: 0.99999907486593, iteration: 71739
loss: 1.0299347639083862,grad_norm: 0.9999995119713792, iteration: 71740
loss: 0.9932127594947815,grad_norm: 0.9999991685031239, iteration: 71741
loss: 1.0235238075256348,grad_norm: 0.9999991153657624, iteration: 71742
loss: 0.9764145016670227,grad_norm: 0.9999990396122166, iteration: 71743
loss: 0.9912554025650024,grad_norm: 0.9999990852017467, iteration: 71744
loss: 0.9857088327407837,grad_norm: 0.8618750734514249, iteration: 71745
loss: 0.9720045924186707,grad_norm: 0.9999991521443042, iteration: 71746
loss: 1.0013906955718994,grad_norm: 0.9999989990612246, iteration: 71747
loss: 0.958216667175293,grad_norm: 0.9078241305786349, iteration: 71748
loss: 0.9682856202125549,grad_norm: 0.950543303713667, iteration: 71749
loss: 0.9952690005302429,grad_norm: 0.9999991055424895, iteration: 71750
loss: 0.9882763624191284,grad_norm: 0.9999991749527682, iteration: 71751
loss: 1.013556957244873,grad_norm: 0.9999991634778498, iteration: 71752
loss: 1.024415373802185,grad_norm: 0.9999990859139616, iteration: 71753
loss: 1.0156285762786865,grad_norm: 0.8883940049215138, iteration: 71754
loss: 1.0179318189620972,grad_norm: 0.9999990851185668, iteration: 71755
loss: 1.0353076457977295,grad_norm: 0.9999993027281173, iteration: 71756
loss: 0.9733810424804688,grad_norm: 0.9551800265782477, iteration: 71757
loss: 1.0074414014816284,grad_norm: 0.9999992498725055, iteration: 71758
loss: 1.0103981494903564,grad_norm: 0.9999990029712195, iteration: 71759
loss: 0.9926890730857849,grad_norm: 0.9999993426288746, iteration: 71760
loss: 0.9817632436752319,grad_norm: 0.921564008224843, iteration: 71761
loss: 0.9978393316268921,grad_norm: 0.9999992347372743, iteration: 71762
loss: 0.9860904812812805,grad_norm: 0.8841954579449671, iteration: 71763
loss: 1.0036507844924927,grad_norm: 0.9999991587398682, iteration: 71764
loss: 0.9846579432487488,grad_norm: 0.9999992225794458, iteration: 71765
loss: 1.0213372707366943,grad_norm: 0.9999991145720172, iteration: 71766
loss: 1.0345062017440796,grad_norm: 0.9999990588008135, iteration: 71767
loss: 1.0069632530212402,grad_norm: 0.9999991075838877, iteration: 71768
loss: 1.004198431968689,grad_norm: 0.9999991261117488, iteration: 71769
loss: 0.9705773591995239,grad_norm: 0.9999991947848139, iteration: 71770
loss: 0.9848276376724243,grad_norm: 0.9925662101512258, iteration: 71771
loss: 1.0183818340301514,grad_norm: 0.999999020145757, iteration: 71772
loss: 1.0444427728652954,grad_norm: 0.9999995136770571, iteration: 71773
loss: 0.9626182317733765,grad_norm: 0.9999991784488571, iteration: 71774
loss: 1.03328275680542,grad_norm: 0.8494450857725401, iteration: 71775
loss: 0.9538149237632751,grad_norm: 0.9999991870729692, iteration: 71776
loss: 1.0018854141235352,grad_norm: 0.9999991924564918, iteration: 71777
loss: 1.0061161518096924,grad_norm: 0.999999082570063, iteration: 71778
loss: 0.9964963793754578,grad_norm: 0.9407775585609773, iteration: 71779
loss: 1.0034277439117432,grad_norm: 0.9999991203323182, iteration: 71780
loss: 1.0438088178634644,grad_norm: 0.9999995265536157, iteration: 71781
loss: 1.0220383405685425,grad_norm: 0.8903906063585161, iteration: 71782
loss: 1.0236603021621704,grad_norm: 0.9705224988482831, iteration: 71783
loss: 1.0066940784454346,grad_norm: 0.8837048746940043, iteration: 71784
loss: 0.9843307137489319,grad_norm: 0.9999992268781156, iteration: 71785
loss: 1.0175034999847412,grad_norm: 0.9999991688981426, iteration: 71786
loss: 0.9947630167007446,grad_norm: 0.8559026907404947, iteration: 71787
loss: 1.0027477741241455,grad_norm: 0.9999993826304139, iteration: 71788
loss: 0.9736008644104004,grad_norm: 0.9999990064889781, iteration: 71789
loss: 0.9664103388786316,grad_norm: 0.9999991490263852, iteration: 71790
loss: 1.0391459465026855,grad_norm: 0.9090636305608569, iteration: 71791
loss: 1.0000083446502686,grad_norm: 0.9505521875019781, iteration: 71792
loss: 0.9933340549468994,grad_norm: 0.9999990260541254, iteration: 71793
loss: 0.9881649613380432,grad_norm: 0.9999990773707429, iteration: 71794
loss: 0.9687738418579102,grad_norm: 0.999999221204956, iteration: 71795
loss: 1.0034807920455933,grad_norm: 0.9999995731498212, iteration: 71796
loss: 0.9731763601303101,grad_norm: 0.8575578896480345, iteration: 71797
loss: 0.9850392937660217,grad_norm: 0.8891848654489813, iteration: 71798
loss: 1.012282133102417,grad_norm: 0.9999990443059272, iteration: 71799
loss: 1.0046648979187012,grad_norm: 0.9999992292342875, iteration: 71800
loss: 0.9835794568061829,grad_norm: 0.9999991842081228, iteration: 71801
loss: 1.0186671018600464,grad_norm: 0.9999992291217931, iteration: 71802
loss: 1.0174200534820557,grad_norm: 0.8878916171980812, iteration: 71803
loss: 1.0235649347305298,grad_norm: 0.9350558620856233, iteration: 71804
loss: 0.9857962727546692,grad_norm: 0.9999990790803042, iteration: 71805
loss: 1.0232396125793457,grad_norm: 0.9999995088608602, iteration: 71806
loss: 1.0190731287002563,grad_norm: 0.9999992144863216, iteration: 71807
loss: 0.994974672794342,grad_norm: 0.9955162297979536, iteration: 71808
loss: 0.9836504459381104,grad_norm: 0.8026431243870584, iteration: 71809
loss: 1.0350440740585327,grad_norm: 0.9999990749345975, iteration: 71810
loss: 1.0150171518325806,grad_norm: 0.999999020993548, iteration: 71811
loss: 1.0258607864379883,grad_norm: 0.9999995792475895, iteration: 71812
loss: 1.0284161567687988,grad_norm: 0.999999144261601, iteration: 71813
loss: 1.013411283493042,grad_norm: 0.9232155254256641, iteration: 71814
loss: 1.0237029790878296,grad_norm: 0.8657881691178283, iteration: 71815
loss: 0.9978007078170776,grad_norm: 0.9999991566545601, iteration: 71816
loss: 1.0075781345367432,grad_norm: 0.9999991792753654, iteration: 71817
loss: 1.0314276218414307,grad_norm: 0.9859754322932303, iteration: 71818
loss: 1.044001817703247,grad_norm: 0.9999991938863622, iteration: 71819
loss: 1.058817744255066,grad_norm: 0.9999993154203355, iteration: 71820
loss: 1.0012085437774658,grad_norm: 0.9999998208166389, iteration: 71821
loss: 1.0290356874465942,grad_norm: 0.999999091064341, iteration: 71822
loss: 1.0106090307235718,grad_norm: 0.9951662974855657, iteration: 71823
loss: 1.0112030506134033,grad_norm: 0.9999988762205223, iteration: 71824
loss: 1.030613660812378,grad_norm: 0.9999992825632337, iteration: 71825
loss: 0.9789406657218933,grad_norm: 0.999999147367345, iteration: 71826
loss: 0.951988935470581,grad_norm: 0.9999992910890334, iteration: 71827
loss: 1.0059548616409302,grad_norm: 0.9999990728857299, iteration: 71828
loss: 0.9647101759910583,grad_norm: 0.9999992192834493, iteration: 71829
loss: 1.0142508745193481,grad_norm: 0.9999991529777852, iteration: 71830
loss: 1.0194569826126099,grad_norm: 0.8514722399832024, iteration: 71831
loss: 0.9968134760856628,grad_norm: 0.8744411699109815, iteration: 71832
loss: 1.0092062950134277,grad_norm: 0.9943476418805691, iteration: 71833
loss: 0.9829094409942627,grad_norm: 0.9999990409762164, iteration: 71834
loss: 1.0156803131103516,grad_norm: 0.8219979727108138, iteration: 71835
loss: 0.9877864718437195,grad_norm: 0.8724586693600451, iteration: 71836
loss: 1.010604977607727,grad_norm: 0.932051324081165, iteration: 71837
loss: 1.0117559432983398,grad_norm: 0.9999991283728905, iteration: 71838
loss: 1.0038388967514038,grad_norm: 0.9999990644756941, iteration: 71839
loss: 0.9483177065849304,grad_norm: 0.9999990829403665, iteration: 71840
loss: 0.983467698097229,grad_norm: 0.9169145823073869, iteration: 71841
loss: 1.013253927230835,grad_norm: 0.9603175986484908, iteration: 71842
loss: 1.0124047994613647,grad_norm: 0.9454738906594632, iteration: 71843
loss: 1.032988429069519,grad_norm: 0.8878603268269144, iteration: 71844
loss: 1.0008875131607056,grad_norm: 0.999999135155359, iteration: 71845
loss: 1.0219511985778809,grad_norm: 0.9999997990557726, iteration: 71846
loss: 0.9914821982383728,grad_norm: 0.9999991833531702, iteration: 71847
loss: 1.050179362297058,grad_norm: 0.9999991361912953, iteration: 71848
loss: 1.0253506898880005,grad_norm: 0.9999991408744923, iteration: 71849
loss: 1.0186408758163452,grad_norm: 0.8999563246112496, iteration: 71850
loss: 1.0035364627838135,grad_norm: 0.9999989775005921, iteration: 71851
loss: 1.0284196138381958,grad_norm: 0.8792938263704484, iteration: 71852
loss: 0.9697376489639282,grad_norm: 0.9008205241155933, iteration: 71853
loss: 0.986181378364563,grad_norm: 0.9379804387187707, iteration: 71854
loss: 0.9285810589790344,grad_norm: 0.9999989995515143, iteration: 71855
loss: 1.0204216241836548,grad_norm: 0.9999990278413028, iteration: 71856
loss: 1.0313316583633423,grad_norm: 0.9999991339192303, iteration: 71857
loss: 1.0219403505325317,grad_norm: 0.9999992134370925, iteration: 71858
loss: 0.9851561188697815,grad_norm: 0.8767875527074873, iteration: 71859
loss: 0.9826688170433044,grad_norm: 0.9978468796447427, iteration: 71860
loss: 1.0377196073532104,grad_norm: 0.946975891625401, iteration: 71861
loss: 1.0267432928085327,grad_norm: 0.9999991747679399, iteration: 71862
loss: 1.0185394287109375,grad_norm: 0.9498045287194429, iteration: 71863
loss: 0.9839887022972107,grad_norm: 0.9956078350160439, iteration: 71864
loss: 1.0012177228927612,grad_norm: 0.8003369654303032, iteration: 71865
loss: 0.9787114262580872,grad_norm: 0.9999991126259362, iteration: 71866
loss: 1.0353223085403442,grad_norm: 0.9999996779565777, iteration: 71867
loss: 1.0214943885803223,grad_norm: 0.8955962180483027, iteration: 71868
loss: 0.999434769153595,grad_norm: 0.9999993025827687, iteration: 71869
loss: 1.0454461574554443,grad_norm: 0.9577732121722655, iteration: 71870
loss: 0.9947694540023804,grad_norm: 0.9999991916835423, iteration: 71871
loss: 1.011831283569336,grad_norm: 0.8613548887465721, iteration: 71872
loss: 1.0249406099319458,grad_norm: 0.9194506438918355, iteration: 71873
loss: 0.991310715675354,grad_norm: 0.999999102212911, iteration: 71874
loss: 1.0307053327560425,grad_norm: 0.9975797570227658, iteration: 71875
loss: 0.992563784122467,grad_norm: 0.9999991298831786, iteration: 71876
loss: 0.9929571747779846,grad_norm: 0.9999991476058165, iteration: 71877
loss: 1.0036721229553223,grad_norm: 0.960141890588543, iteration: 71878
loss: 0.9923279285430908,grad_norm: 0.9999992031492518, iteration: 71879
loss: 1.008589744567871,grad_norm: 0.9522969085138242, iteration: 71880
loss: 1.0237174034118652,grad_norm: 0.9999992221010906, iteration: 71881
loss: 1.0186054706573486,grad_norm: 0.9061839004971838, iteration: 71882
loss: 0.9994699358940125,grad_norm: 0.9999990465527673, iteration: 71883
loss: 0.9887836575508118,grad_norm: 0.8533693749612811, iteration: 71884
loss: 0.9907426834106445,grad_norm: 0.990468136315029, iteration: 71885
loss: 1.0002973079681396,grad_norm: 0.9999992366084874, iteration: 71886
loss: 0.9947237372398376,grad_norm: 0.9999990658486666, iteration: 71887
loss: 0.9754550457000732,grad_norm: 0.9999991302666553, iteration: 71888
loss: 1.0012729167938232,grad_norm: 0.9999991904556628, iteration: 71889
loss: 0.9986818432807922,grad_norm: 0.9999991588769002, iteration: 71890
loss: 1.0196731090545654,grad_norm: 0.9999990766565633, iteration: 71891
loss: 0.9822466373443604,grad_norm: 0.8848386081731414, iteration: 71892
loss: 0.9816643595695496,grad_norm: 0.9259128894256374, iteration: 71893
loss: 0.9768831729888916,grad_norm: 0.9643658788973095, iteration: 71894
loss: 1.0779064893722534,grad_norm: 0.999999696644462, iteration: 71895
loss: 0.9702351689338684,grad_norm: 0.8241766511036838, iteration: 71896
loss: 1.0308737754821777,grad_norm: 0.973220583220392, iteration: 71897
loss: 0.9854380488395691,grad_norm: 0.8409489232517188, iteration: 71898
loss: 0.9984148144721985,grad_norm: 0.9999991873479634, iteration: 71899
loss: 0.9702816009521484,grad_norm: 0.9772439525705257, iteration: 71900
loss: 0.9757755398750305,grad_norm: 0.970440241529087, iteration: 71901
loss: 0.9913840293884277,grad_norm: 0.9999992143710121, iteration: 71902
loss: 1.022226333618164,grad_norm: 0.999999513680727, iteration: 71903
loss: 0.9602958559989929,grad_norm: 0.9710029357388172, iteration: 71904
loss: 0.9964375495910645,grad_norm: 0.9999991937161662, iteration: 71905
loss: 1.0020301342010498,grad_norm: 0.9999992688908655, iteration: 71906
loss: 0.9826119542121887,grad_norm: 0.9999991668454604, iteration: 71907
loss: 1.0143163204193115,grad_norm: 0.9999991075719297, iteration: 71908
loss: 0.9784468412399292,grad_norm: 0.9999990802558815, iteration: 71909
loss: 1.003229022026062,grad_norm: 0.9542441066144692, iteration: 71910
loss: 1.003930687904358,grad_norm: 0.9999990322507577, iteration: 71911
loss: 1.0219255685806274,grad_norm: 0.9822755065981006, iteration: 71912
loss: 1.0268492698669434,grad_norm: 0.999999191779172, iteration: 71913
loss: 0.9999361038208008,grad_norm: 0.966288491816361, iteration: 71914
loss: 1.0036979913711548,grad_norm: 0.9554999656164253, iteration: 71915
loss: 0.9970430731773376,grad_norm: 0.9383546039552141, iteration: 71916
loss: 1.0370371341705322,grad_norm: 0.9946512736479011, iteration: 71917
loss: 0.9648643136024475,grad_norm: 0.9754421290428663, iteration: 71918
loss: 0.9919479489326477,grad_norm: 0.9999991623474183, iteration: 71919
loss: 1.0185328722000122,grad_norm: 0.9999991344485772, iteration: 71920
loss: 1.0033057928085327,grad_norm: 0.9999990997210632, iteration: 71921
loss: 0.9711984992027283,grad_norm: 0.9999991237653016, iteration: 71922
loss: 0.968886137008667,grad_norm: 0.9999992003096467, iteration: 71923
loss: 0.9882110357284546,grad_norm: 0.9062207560786861, iteration: 71924
loss: 1.037960171699524,grad_norm: 0.999999656827908, iteration: 71925
loss: 0.9890090227127075,grad_norm: 0.9999992219116329, iteration: 71926
loss: 0.9932963252067566,grad_norm: 0.9999991269396452, iteration: 71927
loss: 1.013427972793579,grad_norm: 0.9231365181145779, iteration: 71928
loss: 1.014002799987793,grad_norm: 0.9999990833638615, iteration: 71929
loss: 0.9582942128181458,grad_norm: 0.8837756713033944, iteration: 71930
loss: 1.0317870378494263,grad_norm: 0.9999992356603405, iteration: 71931
loss: 0.9807965755462646,grad_norm: 0.9999990414905525, iteration: 71932
loss: 0.946297287940979,grad_norm: 0.999999188612589, iteration: 71933
loss: 0.9638311862945557,grad_norm: 0.9999990169136604, iteration: 71934
loss: 0.9619609713554382,grad_norm: 0.9999992000621987, iteration: 71935
loss: 1.009169101715088,grad_norm: 0.990892778298415, iteration: 71936
loss: 0.9701576828956604,grad_norm: 0.9999990983265884, iteration: 71937
loss: 0.9994964599609375,grad_norm: 0.9598750020244429, iteration: 71938
loss: 0.9765319228172302,grad_norm: 0.9999992042020004, iteration: 71939
loss: 1.0126415491104126,grad_norm: 0.9999991861205286, iteration: 71940
loss: 1.0129464864730835,grad_norm: 0.9999996567942977, iteration: 71941
loss: 0.9919473528862,grad_norm: 0.9048837639237423, iteration: 71942
loss: 0.981742262840271,grad_norm: 0.9999991113093663, iteration: 71943
loss: 1.0244032144546509,grad_norm: 0.8885717547337936, iteration: 71944
loss: 0.9744795560836792,grad_norm: 0.9999991132538312, iteration: 71945
loss: 0.994768500328064,grad_norm: 0.9999991505404596, iteration: 71946
loss: 1.0043962001800537,grad_norm: 0.9999996655362942, iteration: 71947
loss: 1.017612338066101,grad_norm: 0.9999991016120362, iteration: 71948
loss: 1.0115398168563843,grad_norm: 0.9999990082758077, iteration: 71949
loss: 1.0180453062057495,grad_norm: 0.9999993696883339, iteration: 71950
loss: 1.030333399772644,grad_norm: 0.999999162414583, iteration: 71951
loss: 0.9831334948539734,grad_norm: 0.8832513448691987, iteration: 71952
loss: 1.0450234413146973,grad_norm: 0.9999994147376358, iteration: 71953
loss: 0.9637429714202881,grad_norm: 0.9999990471380963, iteration: 71954
loss: 1.0133898258209229,grad_norm: 0.999999041263527, iteration: 71955
loss: 1.01616370677948,grad_norm: 0.999999020749859, iteration: 71956
loss: 1.0192527770996094,grad_norm: 0.9999992145162121, iteration: 71957
loss: 0.9850075840950012,grad_norm: 0.999999201322166, iteration: 71958
loss: 0.9768218994140625,grad_norm: 0.9295314731488125, iteration: 71959
loss: 1.0004068613052368,grad_norm: 0.9999991650026536, iteration: 71960
loss: 0.9901794195175171,grad_norm: 0.9999992523372332, iteration: 71961
loss: 0.9935173988342285,grad_norm: 0.9969193724563343, iteration: 71962
loss: 0.9803009629249573,grad_norm: 0.9999990261790723, iteration: 71963
loss: 0.9824123978614807,grad_norm: 0.9999993160824453, iteration: 71964
loss: 1.0199857950210571,grad_norm: 0.9999990678925516, iteration: 71965
loss: 1.0132602453231812,grad_norm: 0.9999990070396456, iteration: 71966
loss: 1.0133289098739624,grad_norm: 0.9999990534421752, iteration: 71967
loss: 0.9749547839164734,grad_norm: 0.9510483656161857, iteration: 71968
loss: 0.97960364818573,grad_norm: 0.9880924998663636, iteration: 71969
loss: 1.0262333154678345,grad_norm: 0.9999993193959865, iteration: 71970
loss: 0.9617330431938171,grad_norm: 0.9999991832068295, iteration: 71971
loss: 0.9795919060707092,grad_norm: 0.9999990513816396, iteration: 71972
loss: 0.9931261539459229,grad_norm: 0.9999991117046894, iteration: 71973
loss: 1.0066215991973877,grad_norm: 0.9999991134876537, iteration: 71974
loss: 0.9933530688285828,grad_norm: 0.9999991624366884, iteration: 71975
loss: 0.9962401390075684,grad_norm: 0.92145927825307, iteration: 71976
loss: 1.0150312185287476,grad_norm: 0.8423856860017767, iteration: 71977
loss: 0.9600812196731567,grad_norm: 0.9999990948876432, iteration: 71978
loss: 1.0205940008163452,grad_norm: 0.9999991292241407, iteration: 71979
loss: 1.029514193534851,grad_norm: 0.9999988300622814, iteration: 71980
loss: 1.0312541723251343,grad_norm: 0.9391880761854888, iteration: 71981
loss: 1.0440998077392578,grad_norm: 0.9999994666565356, iteration: 71982
loss: 1.0054274797439575,grad_norm: 0.999998976428173, iteration: 71983
loss: 0.9782186150550842,grad_norm: 0.9132061497149061, iteration: 71984
loss: 0.9984365105628967,grad_norm: 0.8116321129348342, iteration: 71985
loss: 1.0094283819198608,grad_norm: 0.9999991529439365, iteration: 71986
loss: 0.9951897859573364,grad_norm: 0.9641618794120939, iteration: 71987
loss: 1.02110755443573,grad_norm: 0.9999994845314929, iteration: 71988
loss: 1.0263209342956543,grad_norm: 0.9999990557862974, iteration: 71989
loss: 1.0162473917007446,grad_norm: 0.9211549343074271, iteration: 71990
loss: 1.0080097913742065,grad_norm: 0.9999993340175918, iteration: 71991
loss: 1.0104035139083862,grad_norm: 0.984923183913583, iteration: 71992
loss: 0.9993253946304321,grad_norm: 0.9999992201524182, iteration: 71993
loss: 1.0207908153533936,grad_norm: 0.9430035813745101, iteration: 71994
loss: 1.0065654516220093,grad_norm: 0.9999991386041083, iteration: 71995
loss: 1.042237401008606,grad_norm: 0.9999993218538856, iteration: 71996
loss: 0.9960179328918457,grad_norm: 0.999999117334906, iteration: 71997
loss: 1.0229800939559937,grad_norm: 0.9999994434979801, iteration: 71998
loss: 1.0156502723693848,grad_norm: 0.9999992026048403, iteration: 71999
loss: 1.0057592391967773,grad_norm: 0.9999992226732193, iteration: 72000
loss: 0.969607949256897,grad_norm: 0.8933864100070271, iteration: 72001
loss: 1.0146186351776123,grad_norm: 0.9128665699168836, iteration: 72002
loss: 1.0104948282241821,grad_norm: 0.9999991472435396, iteration: 72003
loss: 1.0161657333374023,grad_norm: 0.9822649410399463, iteration: 72004
loss: 1.042654037475586,grad_norm: 0.9675357821691365, iteration: 72005
loss: 0.9911509156227112,grad_norm: 0.9999990620740701, iteration: 72006
loss: 1.0014184713363647,grad_norm: 0.9445252933183396, iteration: 72007
loss: 1.0156736373901367,grad_norm: 0.9999991751091569, iteration: 72008
loss: 1.0178741216659546,grad_norm: 0.9999994831110165, iteration: 72009
loss: 0.9742349982261658,grad_norm: 0.9924287297217617, iteration: 72010
loss: 1.0342212915420532,grad_norm: 0.9999990166999005, iteration: 72011
loss: 0.9940221905708313,grad_norm: 0.9999991715394367, iteration: 72012
loss: 0.9822873473167419,grad_norm: 0.9999994682321492, iteration: 72013
loss: 0.9854559302330017,grad_norm: 0.9995060382076905, iteration: 72014
loss: 1.0116515159606934,grad_norm: 0.9134895166535731, iteration: 72015
loss: 0.9860926866531372,grad_norm: 0.9999990235793592, iteration: 72016
loss: 1.009324312210083,grad_norm: 0.9550659498721348, iteration: 72017
loss: 0.9941365122795105,grad_norm: 0.99999910595224, iteration: 72018
loss: 0.9846479892730713,grad_norm: 0.9999991791753262, iteration: 72019
loss: 1.0019924640655518,grad_norm: 0.9999992866550452, iteration: 72020
loss: 0.9916077256202698,grad_norm: 0.9157783411332905, iteration: 72021
loss: 1.0168813467025757,grad_norm: 0.9999992465359027, iteration: 72022
loss: 1.0194660425186157,grad_norm: 0.9069511717804555, iteration: 72023
loss: 0.9832300543785095,grad_norm: 0.8599524378610874, iteration: 72024
loss: 0.980590283870697,grad_norm: 0.9999990716622158, iteration: 72025
loss: 0.968572199344635,grad_norm: 0.9999992379294802, iteration: 72026
loss: 0.9775539040565491,grad_norm: 0.9763887076608071, iteration: 72027
loss: 1.026314377784729,grad_norm: 0.9999992560158698, iteration: 72028
loss: 1.0054839849472046,grad_norm: 0.9263771579198204, iteration: 72029
loss: 0.9766544699668884,grad_norm: 0.9999998954805849, iteration: 72030
loss: 1.0146139860153198,grad_norm: 0.8831922477283084, iteration: 72031
loss: 1.0381782054901123,grad_norm: 0.9681751321504601, iteration: 72032
loss: 1.1251753568649292,grad_norm: 0.9999996067321134, iteration: 72033
loss: 0.9889094829559326,grad_norm: 0.8827828374909316, iteration: 72034
loss: 1.0096189975738525,grad_norm: 0.9999993240205082, iteration: 72035
loss: 1.035464882850647,grad_norm: 0.9999999259985459, iteration: 72036
loss: 1.0000470876693726,grad_norm: 0.9045690667915219, iteration: 72037
loss: 0.9923731088638306,grad_norm: 0.9999993444922576, iteration: 72038
loss: 0.9909401535987854,grad_norm: 0.9999991360845787, iteration: 72039
loss: 1.0355948209762573,grad_norm: 0.9999990707095787, iteration: 72040
loss: 0.995944619178772,grad_norm: 0.9999989478474858, iteration: 72041
loss: 1.0331116914749146,grad_norm: 0.999999089612445, iteration: 72042
loss: 1.0040419101715088,grad_norm: 0.9999990232615263, iteration: 72043
loss: 1.0040690898895264,grad_norm: 0.9999992795734165, iteration: 72044
loss: 0.9773973822593689,grad_norm: 0.9999991040413874, iteration: 72045
loss: 0.9748737812042236,grad_norm: 0.9932484834473695, iteration: 72046
loss: 1.0306720733642578,grad_norm: 0.9999991447912567, iteration: 72047
loss: 1.0021142959594727,grad_norm: 0.9277223492116298, iteration: 72048
loss: 0.9860602617263794,grad_norm: 0.9666677570089447, iteration: 72049
loss: 0.9690210819244385,grad_norm: 0.9999990519641374, iteration: 72050
loss: 0.9728533625602722,grad_norm: 0.9230008462770994, iteration: 72051
loss: 1.0033425092697144,grad_norm: 0.9885095092161105, iteration: 72052
loss: 1.0376567840576172,grad_norm: 0.9999989617803484, iteration: 72053
loss: 1.044705867767334,grad_norm: 0.9999993205040485, iteration: 72054
loss: 0.9764151573181152,grad_norm: 0.9613539610278162, iteration: 72055
loss: 0.9986851215362549,grad_norm: 0.9999989653960307, iteration: 72056
loss: 0.9939506649971008,grad_norm: 0.9986670156405915, iteration: 72057
loss: 1.0149121284484863,grad_norm: 0.9999993197751488, iteration: 72058
loss: 1.1648833751678467,grad_norm: 0.9999995637707263, iteration: 72059
loss: 0.9776054620742798,grad_norm: 0.9983035251542312, iteration: 72060
loss: 1.0396134853363037,grad_norm: 0.9999998903059956, iteration: 72061
loss: 0.9790090322494507,grad_norm: 0.9204560074790357, iteration: 72062
loss: 1.0490628480911255,grad_norm: 0.9977755347767913, iteration: 72063
loss: 0.988834023475647,grad_norm: 0.9999992292793384, iteration: 72064
loss: 1.0402215719223022,grad_norm: 0.9999991616792514, iteration: 72065
loss: 1.0102436542510986,grad_norm: 0.9999991114505984, iteration: 72066
loss: 1.041120171546936,grad_norm: 0.9999991738572029, iteration: 72067
loss: 0.9679620265960693,grad_norm: 0.981397725734394, iteration: 72068
loss: 1.0032073259353638,grad_norm: 0.9999991123512857, iteration: 72069
loss: 1.0328021049499512,grad_norm: 0.9999992545445231, iteration: 72070
loss: 0.9848315119743347,grad_norm: 0.9999991230593939, iteration: 72071
loss: 1.0249049663543701,grad_norm: 0.9999991499427424, iteration: 72072
loss: 0.9988759160041809,grad_norm: 0.9999991532390202, iteration: 72073
loss: 1.0019564628601074,grad_norm: 0.9999992629603567, iteration: 72074
loss: 1.0159662961959839,grad_norm: 0.9999989957341651, iteration: 72075
loss: 1.0675878524780273,grad_norm: 0.9999991955482039, iteration: 72076
loss: 1.0041338205337524,grad_norm: 0.9999990703851837, iteration: 72077
loss: 0.9956943392753601,grad_norm: 0.9999992490020587, iteration: 72078
loss: 0.9630060791969299,grad_norm: 0.982498743012749, iteration: 72079
loss: 1.014100432395935,grad_norm: 0.999999302507132, iteration: 72080
loss: 0.9940382838249207,grad_norm: 0.9535083652029749, iteration: 72081
loss: 0.9796690940856934,grad_norm: 0.9999991935545984, iteration: 72082
loss: 0.9502959251403809,grad_norm: 0.9999990985982009, iteration: 72083
loss: 1.0034923553466797,grad_norm: 0.9999991455397708, iteration: 72084
loss: 1.003442645072937,grad_norm: 0.9999990772489723, iteration: 72085
loss: 0.99244225025177,grad_norm: 0.999999004870986, iteration: 72086
loss: 0.9843348860740662,grad_norm: 0.9999991353724031, iteration: 72087
loss: 0.9968554377555847,grad_norm: 0.9481873014331137, iteration: 72088
loss: 1.0022646188735962,grad_norm: 0.9999991612986551, iteration: 72089
loss: 1.0187426805496216,grad_norm: 0.9999990770801287, iteration: 72090
loss: 1.0133105516433716,grad_norm: 0.9671611418574382, iteration: 72091
loss: 1.0264002084732056,grad_norm: 0.9999991645101148, iteration: 72092
loss: 1.0283451080322266,grad_norm: 0.9999992323648714, iteration: 72093
loss: 1.0274684429168701,grad_norm: 0.9999992379400969, iteration: 72094
loss: 1.0178993940353394,grad_norm: 0.9999991232725046, iteration: 72095
loss: 0.9927393198013306,grad_norm: 0.8899591095382205, iteration: 72096
loss: 1.0218087434768677,grad_norm: 0.9999991163962122, iteration: 72097
loss: 1.036211371421814,grad_norm: 0.8538168367093675, iteration: 72098
loss: 1.021971583366394,grad_norm: 0.9632149322195539, iteration: 72099
loss: 1.0273295640945435,grad_norm: 0.9999992778490645, iteration: 72100
loss: 1.0144239664077759,grad_norm: 0.999999173112502, iteration: 72101
loss: 0.9925318956375122,grad_norm: 0.9999991309736156, iteration: 72102
loss: 1.0318922996520996,grad_norm: 0.9999991586997998, iteration: 72103
loss: 0.9797042012214661,grad_norm: 0.9999991885294857, iteration: 72104
loss: 1.0155280828475952,grad_norm: 0.9999999848682778, iteration: 72105
loss: 0.9904354810714722,grad_norm: 0.9999990546002927, iteration: 72106
loss: 1.0260897874832153,grad_norm: 0.9999990746970941, iteration: 72107
loss: 1.0080690383911133,grad_norm: 0.9999991278791946, iteration: 72108
loss: 1.0043174028396606,grad_norm: 0.9954079088004375, iteration: 72109
loss: 1.026349663734436,grad_norm: 0.999999121264363, iteration: 72110
loss: 1.0937488079071045,grad_norm: 0.9999991984227157, iteration: 72111
loss: 1.0277243852615356,grad_norm: 0.9999993849727843, iteration: 72112
loss: 0.9772979617118835,grad_norm: 0.8420571085653553, iteration: 72113
loss: 0.971916913986206,grad_norm: 0.9999991357995739, iteration: 72114
loss: 0.9642130732536316,grad_norm: 0.8907627934318727, iteration: 72115
loss: 1.0342859029769897,grad_norm: 0.9999991184269443, iteration: 72116
loss: 0.9976334571838379,grad_norm: 0.9999991811311888, iteration: 72117
loss: 0.9653156399726868,grad_norm: 0.9595399050144692, iteration: 72118
loss: 1.0035696029663086,grad_norm: 0.8974743974836813, iteration: 72119
loss: 1.0342774391174316,grad_norm: 0.87493392045132, iteration: 72120
loss: 0.985770046710968,grad_norm: 0.9999991457991348, iteration: 72121
loss: 0.9971759915351868,grad_norm: 0.9999990546117674, iteration: 72122
loss: 1.001756191253662,grad_norm: 0.9838436007095434, iteration: 72123
loss: 1.069603681564331,grad_norm: 0.9999991737911978, iteration: 72124
loss: 0.9678269028663635,grad_norm: 0.9999991480220461, iteration: 72125
loss: 0.9880896210670471,grad_norm: 0.9999990224638138, iteration: 72126
loss: 0.9941081404685974,grad_norm: 0.9999992104496761, iteration: 72127
loss: 1.0444566011428833,grad_norm: 0.9121328850210079, iteration: 72128
loss: 1.0615099668502808,grad_norm: 0.9999996491147315, iteration: 72129
loss: 1.0239286422729492,grad_norm: 0.9611151916497731, iteration: 72130
loss: 1.0333821773529053,grad_norm: 0.9999994239019789, iteration: 72131
loss: 1.0042823553085327,grad_norm: 0.9999992018959226, iteration: 72132
loss: 1.0201364755630493,grad_norm: 0.9071283001031768, iteration: 72133
loss: 1.028122067451477,grad_norm: 0.9999990417892085, iteration: 72134
loss: 0.9864133596420288,grad_norm: 0.9999995977115412, iteration: 72135
loss: 1.010530710220337,grad_norm: 0.9999989723433301, iteration: 72136
loss: 0.9642162919044495,grad_norm: 0.9918847556386552, iteration: 72137
loss: 1.0309492349624634,grad_norm: 0.9999994680177392, iteration: 72138
loss: 1.067123532295227,grad_norm: 0.9999992661746726, iteration: 72139
loss: 1.0263227224349976,grad_norm: 0.9999989485553159, iteration: 72140
loss: 1.0643080472946167,grad_norm: 0.9999995298964961, iteration: 72141
loss: 1.0250656604766846,grad_norm: 0.9370746117577273, iteration: 72142
loss: 0.9935084581375122,grad_norm: 0.931688175937012, iteration: 72143
loss: 1.0070027112960815,grad_norm: 0.9999993346324154, iteration: 72144
loss: 0.9883210062980652,grad_norm: 0.9999995576106704, iteration: 72145
loss: 1.021217942237854,grad_norm: 0.9999991165110803, iteration: 72146
loss: 0.9661024212837219,grad_norm: 0.9312954671532685, iteration: 72147
loss: 1.0103669166564941,grad_norm: 0.999999136711081, iteration: 72148
loss: 0.9789352416992188,grad_norm: 0.9999990401267038, iteration: 72149
loss: 0.9648340344429016,grad_norm: 0.999999030217815, iteration: 72150
loss: 0.9879304766654968,grad_norm: 0.9999991608649005, iteration: 72151
loss: 0.9961996674537659,grad_norm: 0.8411827047622995, iteration: 72152
loss: 0.9983446002006531,grad_norm: 0.9999991649082484, iteration: 72153
loss: 1.0001791715621948,grad_norm: 0.917696133804759, iteration: 72154
loss: 1.0087518692016602,grad_norm: 0.983838441765007, iteration: 72155
loss: 1.0174466371536255,grad_norm: 0.9999991188609356, iteration: 72156
loss: 0.9731491208076477,grad_norm: 0.9999991963187548, iteration: 72157
loss: 0.966238796710968,grad_norm: 0.9999992371026823, iteration: 72158
loss: 0.9930801391601562,grad_norm: 0.9999993380185203, iteration: 72159
loss: 1.0257964134216309,grad_norm: 0.8590754320379418, iteration: 72160
loss: 1.0094141960144043,grad_norm: 0.9999990292437209, iteration: 72161
loss: 1.0029505491256714,grad_norm: 0.9999992757033711, iteration: 72162
loss: 1.0549052953720093,grad_norm: 0.9999992095978313, iteration: 72163
loss: 0.9945036768913269,grad_norm: 0.9999992508796646, iteration: 72164
loss: 0.95892333984375,grad_norm: 0.9999992371344719, iteration: 72165
loss: 1.0431519746780396,grad_norm: 0.9999994359128226, iteration: 72166
loss: 0.9886805415153503,grad_norm: 0.9999990806369545, iteration: 72167
loss: 1.0396409034729004,grad_norm: 0.9999991614364142, iteration: 72168
loss: 0.9870094060897827,grad_norm: 0.904815712716991, iteration: 72169
loss: 1.0042600631713867,grad_norm: 0.9367485551745325, iteration: 72170
loss: 1.0377906560897827,grad_norm: 0.9999992695250663, iteration: 72171
loss: 0.9639264345169067,grad_norm: 0.9124669967831365, iteration: 72172
loss: 1.0257456302642822,grad_norm: 0.9999991275660819, iteration: 72173
loss: 0.991939902305603,grad_norm: 0.999998912208003, iteration: 72174
loss: 1.0105664730072021,grad_norm: 0.9999990787773845, iteration: 72175
loss: 0.9965106844902039,grad_norm: 0.9999991034798525, iteration: 72176
loss: 1.1746238470077515,grad_norm: 0.9999995351416658, iteration: 72177
loss: 0.9943099021911621,grad_norm: 0.9770452018651901, iteration: 72178
loss: 0.9983266592025757,grad_norm: 0.9999992408900139, iteration: 72179
loss: 1.0519603490829468,grad_norm: 0.9999991960082093, iteration: 72180
loss: 0.9953885674476624,grad_norm: 0.9999991676491822, iteration: 72181
loss: 0.9828188419342041,grad_norm: 0.9999991656798837, iteration: 72182
loss: 1.0313208103179932,grad_norm: 0.9999997755081698, iteration: 72183
loss: 0.985322117805481,grad_norm: 0.9999992297537594, iteration: 72184
loss: 0.9674635529518127,grad_norm: 0.9999991398338633, iteration: 72185
loss: 0.9533264636993408,grad_norm: 0.9521230240543329, iteration: 72186
loss: 1.0102444887161255,grad_norm: 0.9999992925409064, iteration: 72187
loss: 0.9738936424255371,grad_norm: 0.999999120458913, iteration: 72188
loss: 1.0152708292007446,grad_norm: 0.9999991266828399, iteration: 72189
loss: 0.9834657311439514,grad_norm: 0.9999992135892084, iteration: 72190
loss: 1.0194453001022339,grad_norm: 0.9999991075047066, iteration: 72191
loss: 1.0127291679382324,grad_norm: 0.9999992504381484, iteration: 72192
loss: 1.0378683805465698,grad_norm: 0.9999991564347759, iteration: 72193
loss: 1.0069490671157837,grad_norm: 0.9999989800800763, iteration: 72194
loss: 0.9770023822784424,grad_norm: 0.999999191196862, iteration: 72195
loss: 0.96351557970047,grad_norm: 0.9157363898078508, iteration: 72196
loss: 0.9946354627609253,grad_norm: 0.9999993915253946, iteration: 72197
loss: 0.986551821231842,grad_norm: 0.9848197625159854, iteration: 72198
loss: 1.0092693567276,grad_norm: 0.9999988941281246, iteration: 72199
loss: 0.9685572385787964,grad_norm: 0.9293536213762823, iteration: 72200
loss: 0.9439970254898071,grad_norm: 0.9759066087686408, iteration: 72201
loss: 1.029439091682434,grad_norm: 0.9999992453014467, iteration: 72202
loss: 1.015887975692749,grad_norm: 0.9999993353473724, iteration: 72203
loss: 0.9888376593589783,grad_norm: 0.9150839044790984, iteration: 72204
loss: 1.011778712272644,grad_norm: 0.9999991711767167, iteration: 72205
loss: 1.0299464464187622,grad_norm: 0.9315871862562505, iteration: 72206
loss: 0.994830846786499,grad_norm: 0.9999991622793353, iteration: 72207
loss: 1.0236014127731323,grad_norm: 0.9999991136000976, iteration: 72208
loss: 0.956348717212677,grad_norm: 0.9578938156239685, iteration: 72209
loss: 1.0380005836486816,grad_norm: 0.9963672935748341, iteration: 72210
loss: 1.0467231273651123,grad_norm: 0.9999993157927569, iteration: 72211
loss: 1.0237575769424438,grad_norm: 0.908775879678651, iteration: 72212
loss: 0.9870932102203369,grad_norm: 0.9999991582028951, iteration: 72213
loss: 1.0264075994491577,grad_norm: 0.9999992536109036, iteration: 72214
loss: 1.018157720565796,grad_norm: 0.9778428417371038, iteration: 72215
loss: 0.9949921369552612,grad_norm: 0.8290360061616783, iteration: 72216
loss: 0.9865880012512207,grad_norm: 0.9999990108929596, iteration: 72217
loss: 1.0075327157974243,grad_norm: 0.9999992423355217, iteration: 72218
loss: 1.0622214078903198,grad_norm: 0.9999993815007944, iteration: 72219
loss: 1.0205575227737427,grad_norm: 0.9999991655105366, iteration: 72220
loss: 0.9688423275947571,grad_norm: 0.999999162507956, iteration: 72221
loss: 1.012932538986206,grad_norm: 0.9999992411250559, iteration: 72222
loss: 0.9945774674415588,grad_norm: 0.9999992993965947, iteration: 72223
loss: 0.9516912698745728,grad_norm: 0.9999991861796049, iteration: 72224
loss: 1.0271334648132324,grad_norm: 0.9999991250774874, iteration: 72225
loss: 1.020102620124817,grad_norm: 0.9939715674926247, iteration: 72226
loss: 1.010779857635498,grad_norm: 0.9999991069344389, iteration: 72227
loss: 1.008446455001831,grad_norm: 0.9451964300967165, iteration: 72228
loss: 1.0372157096862793,grad_norm: 0.9634718492685654, iteration: 72229
loss: 1.0266140699386597,grad_norm: 0.999999310254388, iteration: 72230
loss: 0.9799959063529968,grad_norm: 0.9157004476866514, iteration: 72231
loss: 0.9834613800048828,grad_norm: 0.9999993145855643, iteration: 72232
loss: 0.9850029945373535,grad_norm: 0.9885181515939763, iteration: 72233
loss: 1.0089247226715088,grad_norm: 0.8954939276402003, iteration: 72234
loss: 1.0039210319519043,grad_norm: 0.9999989304219732, iteration: 72235
loss: 0.9901435375213623,grad_norm: 0.9999993169335849, iteration: 72236
loss: 0.9727367162704468,grad_norm: 0.9999992411183378, iteration: 72237
loss: 1.0326324701309204,grad_norm: 0.999999707762818, iteration: 72238
loss: 1.0665364265441895,grad_norm: 0.9999999636644249, iteration: 72239
loss: 1.0210456848144531,grad_norm: 0.9999991684513884, iteration: 72240
loss: 1.0045652389526367,grad_norm: 0.8617731559356154, iteration: 72241
loss: 1.0146714448928833,grad_norm: 0.9999990460711695, iteration: 72242
loss: 1.0193287134170532,grad_norm: 0.9999992087758401, iteration: 72243
loss: 1.0156015157699585,grad_norm: 0.9755331075740991, iteration: 72244
loss: 0.9982438087463379,grad_norm: 0.9562080564887141, iteration: 72245
loss: 1.034674048423767,grad_norm: 0.9450859858251188, iteration: 72246
loss: 1.0229249000549316,grad_norm: 0.899932620062934, iteration: 72247
loss: 0.9695444107055664,grad_norm: 0.9999992108958775, iteration: 72248
loss: 0.9708335399627686,grad_norm: 0.7715063243300188, iteration: 72249
loss: 0.9824456572532654,grad_norm: 0.9999989665337656, iteration: 72250
loss: 1.004525065422058,grad_norm: 0.9940469181687337, iteration: 72251
loss: 1.0265039205551147,grad_norm: 0.9999994530964802, iteration: 72252
loss: 0.9996507167816162,grad_norm: 0.9999990514371799, iteration: 72253
loss: 0.9852179288864136,grad_norm: 0.9999993197241616, iteration: 72254
loss: 0.9721410274505615,grad_norm: 0.9467063045323004, iteration: 72255
loss: 1.031902551651001,grad_norm: 0.9999992715559084, iteration: 72256
loss: 0.9852460026741028,grad_norm: 0.9861935607585237, iteration: 72257
loss: 1.0224040746688843,grad_norm: 0.999999192038226, iteration: 72258
loss: 1.076557993888855,grad_norm: 0.9999991544798339, iteration: 72259
loss: 1.0028971433639526,grad_norm: 0.9999990578061784, iteration: 72260
loss: 0.9860998392105103,grad_norm: 0.9594717495778102, iteration: 72261
loss: 1.0149948596954346,grad_norm: 0.9999991500664323, iteration: 72262
loss: 1.08357572555542,grad_norm: 0.999999023103006, iteration: 72263
loss: 1.0172417163848877,grad_norm: 0.9999992380447381, iteration: 72264
loss: 1.1408717632293701,grad_norm: 0.9999997748366662, iteration: 72265
loss: 1.2620532512664795,grad_norm: 0.9999998004566991, iteration: 72266
loss: 1.0058696269989014,grad_norm: 0.9999991805424232, iteration: 72267
loss: 1.2171669006347656,grad_norm: 0.9999991927763087, iteration: 72268
loss: 1.7187429666519165,grad_norm: 0.9999997223956847, iteration: 72269
loss: 1.2644683122634888,grad_norm: 0.9999994105499396, iteration: 72270
loss: 0.970608651638031,grad_norm: 0.9999991946528007, iteration: 72271
loss: 1.100841999053955,grad_norm: 0.9999997864817533, iteration: 72272
loss: 1.1178638935089111,grad_norm: 0.9999994875294086, iteration: 72273
loss: 1.149114727973938,grad_norm: 0.9999993897001179, iteration: 72274
loss: 1.1916292905807495,grad_norm: 0.9999998102990321, iteration: 72275
loss: 1.2488369941711426,grad_norm: 0.9999995069962158, iteration: 72276
loss: 1.2484372854232788,grad_norm: 0.9999997065322151, iteration: 72277
loss: 1.041892647743225,grad_norm: 0.9999995992451294, iteration: 72278
loss: 1.0974655151367188,grad_norm: 0.9999997408370277, iteration: 72279
loss: 1.203052282333374,grad_norm: 0.9999995659082473, iteration: 72280
loss: 1.144850492477417,grad_norm: 0.9999995200186282, iteration: 72281
loss: 1.4995551109313965,grad_norm: 0.9999998686672934, iteration: 72282
loss: 1.1134878396987915,grad_norm: 0.9999994567042276, iteration: 72283
loss: 0.9954383373260498,grad_norm: 0.9999990078679923, iteration: 72284
loss: 1.0570626258850098,grad_norm: 0.9999995516801747, iteration: 72285
loss: 1.0668479204177856,grad_norm: 0.999999713670584, iteration: 72286
loss: 1.0052282810211182,grad_norm: 0.9494034538611608, iteration: 72287
loss: 1.073595404624939,grad_norm: 0.9999990664067901, iteration: 72288
loss: 1.0066311359405518,grad_norm: 0.9272554575423084, iteration: 72289
loss: 1.004837989807129,grad_norm: 0.9999997811166185, iteration: 72290
loss: 1.1208994388580322,grad_norm: 0.9999992274318928, iteration: 72291
loss: 1.1671257019042969,grad_norm: 0.9999993607808432, iteration: 72292
loss: 1.0325018167495728,grad_norm: 0.9999993022312759, iteration: 72293
loss: 1.0013536214828491,grad_norm: 0.9999991482338102, iteration: 72294
loss: 1.1170603036880493,grad_norm: 0.999999792921249, iteration: 72295
loss: 1.0257623195648193,grad_norm: 0.9999995957156382, iteration: 72296
loss: 1.0882114171981812,grad_norm: 0.9999997582450042, iteration: 72297
loss: 1.047245979309082,grad_norm: 0.9999996187959891, iteration: 72298
loss: 1.0548018217086792,grad_norm: 0.9999997435175654, iteration: 72299
loss: 1.0176945924758911,grad_norm: 0.999999464014051, iteration: 72300
loss: 1.0254075527191162,grad_norm: 0.999999698730511, iteration: 72301
loss: 0.960205078125,grad_norm: 0.8782291064421579, iteration: 72302
loss: 1.1095800399780273,grad_norm: 0.9999998501927607, iteration: 72303
loss: 1.1016731262207031,grad_norm: 0.9999999058251844, iteration: 72304
loss: 1.128746509552002,grad_norm: 0.9999997806412154, iteration: 72305
loss: 1.002550482749939,grad_norm: 0.9999989862637104, iteration: 72306
loss: 1.1012951135635376,grad_norm: 0.9999998948900793, iteration: 72307
loss: 1.049978494644165,grad_norm: 0.9999995980046708, iteration: 72308
loss: 1.1335463523864746,grad_norm: 0.9999992900094493, iteration: 72309
loss: 1.0044538974761963,grad_norm: 0.9999994091027288, iteration: 72310
loss: 1.0076981782913208,grad_norm: 0.9999992379178899, iteration: 72311
loss: 1.241800308227539,grad_norm: 0.9999993791578246, iteration: 72312
loss: 1.0081990957260132,grad_norm: 0.9999990784963, iteration: 72313
loss: 1.1209232807159424,grad_norm: 0.9999991456107645, iteration: 72314
loss: 1.0364913940429688,grad_norm: 0.9999988892328763, iteration: 72315
loss: 0.9986683130264282,grad_norm: 0.9999991899050625, iteration: 72316
loss: 0.9910754561424255,grad_norm: 0.9999991002075251, iteration: 72317
loss: 1.0309408903121948,grad_norm: 0.9999992761003453, iteration: 72318
loss: 1.0380637645721436,grad_norm: 0.9999990924773072, iteration: 72319
loss: 0.962486743927002,grad_norm: 0.9013192085386391, iteration: 72320
loss: 1.0622141361236572,grad_norm: 0.9999999357804322, iteration: 72321
loss: 0.9766480326652527,grad_norm: 0.9999990812786355, iteration: 72322
loss: 1.0070946216583252,grad_norm: 0.8374613624003717, iteration: 72323
loss: 1.013175129890442,grad_norm: 0.9680220151030057, iteration: 72324
loss: 1.0274578332901,grad_norm: 0.999999379521626, iteration: 72325
loss: 1.0102571249008179,grad_norm: 0.9999994069857817, iteration: 72326
loss: 0.999855637550354,grad_norm: 0.9999990940857401, iteration: 72327
loss: 1.175891399383545,grad_norm: 0.9999998787682558, iteration: 72328
loss: 0.9987224340438843,grad_norm: 0.9999990976912226, iteration: 72329
loss: 1.1490504741668701,grad_norm: 1.0000000288046687, iteration: 72330
loss: 1.096757411956787,grad_norm: 0.9999997012377648, iteration: 72331
loss: 1.0485000610351562,grad_norm: 0.999999950654728, iteration: 72332
loss: 0.999903678894043,grad_norm: 0.9739127964364487, iteration: 72333
loss: 1.0522955656051636,grad_norm: 0.999999467902702, iteration: 72334
loss: 1.0273361206054688,grad_norm: 0.9999990428760149, iteration: 72335
loss: 0.9963067770004272,grad_norm: 0.9589417750405768, iteration: 72336
loss: 1.0760730504989624,grad_norm: 0.999999469590999, iteration: 72337
loss: 1.3167126178741455,grad_norm: 0.9999993700617176, iteration: 72338
loss: 0.9933863282203674,grad_norm: 0.9999991221962675, iteration: 72339
loss: 1.1248667240142822,grad_norm: 0.9999996781495823, iteration: 72340
loss: 1.0419039726257324,grad_norm: 0.999999286279376, iteration: 72341
loss: 1.066285490989685,grad_norm: 0.9999991656504413, iteration: 72342
loss: 1.2237709760665894,grad_norm: 0.9999994148800562, iteration: 72343
loss: 1.0309432744979858,grad_norm: 0.9999998399395041, iteration: 72344
loss: 1.1079411506652832,grad_norm: 0.9999994217116057, iteration: 72345
loss: 1.081862449645996,grad_norm: 0.9999995639116104, iteration: 72346
loss: 1.1371498107910156,grad_norm: 0.9999997540188652, iteration: 72347
loss: 1.032321810722351,grad_norm: 0.9999993038630829, iteration: 72348
loss: 1.0778974294662476,grad_norm: 0.9999992248992594, iteration: 72349
loss: 1.0979596376419067,grad_norm: 0.9999993767179424, iteration: 72350
loss: 1.011447548866272,grad_norm: 0.9999992164744083, iteration: 72351
loss: 0.9890064597129822,grad_norm: 0.9890253363692624, iteration: 72352
loss: 1.1180886030197144,grad_norm: 0.9999995152584599, iteration: 72353
loss: 1.026785135269165,grad_norm: 0.99999908048302, iteration: 72354
loss: 0.9920651316642761,grad_norm: 0.9999991358824918, iteration: 72355
loss: 1.0039012432098389,grad_norm: 0.9999991297124649, iteration: 72356
loss: 1.0728074312210083,grad_norm: 0.9999997337245787, iteration: 72357
loss: 1.0223952531814575,grad_norm: 0.9999992675016484, iteration: 72358
loss: 1.1331692934036255,grad_norm: 0.9999997719639304, iteration: 72359
loss: 1.0553526878356934,grad_norm: 0.8735247451589865, iteration: 72360
loss: 0.997868537902832,grad_norm: 0.9999990284688278, iteration: 72361
loss: 1.0425093173980713,grad_norm: 0.9999991547644571, iteration: 72362
loss: 1.0166294574737549,grad_norm: 0.9999996369312828, iteration: 72363
loss: 1.187865972518921,grad_norm: 0.999999933175981, iteration: 72364
loss: 0.973196268081665,grad_norm: 0.9946900935780326, iteration: 72365
loss: 1.0450092554092407,grad_norm: 0.9999994818069718, iteration: 72366
loss: 1.1691299676895142,grad_norm: 0.9999994209892742, iteration: 72367
loss: 1.0123274326324463,grad_norm: 0.9999993384934459, iteration: 72368
loss: 1.1150555610656738,grad_norm: 0.999999301122224, iteration: 72369
loss: 1.0139658451080322,grad_norm: 0.9867222208810827, iteration: 72370
loss: 1.1094989776611328,grad_norm: 0.9999995716958363, iteration: 72371
loss: 0.9932788014411926,grad_norm: 0.9999991882281961, iteration: 72372
loss: 0.9546507000923157,grad_norm: 0.9999990611385026, iteration: 72373
loss: 1.0598533153533936,grad_norm: 0.999999511795663, iteration: 72374
loss: 1.0170797109603882,grad_norm: 0.8580080207455772, iteration: 72375
loss: 1.0115092992782593,grad_norm: 0.999999328736334, iteration: 72376
loss: 1.027084231376648,grad_norm: 0.9878132536822843, iteration: 72377
loss: 1.02239191532135,grad_norm: 0.9815519441538063, iteration: 72378
loss: 1.0140687227249146,grad_norm: 0.999999265335317, iteration: 72379
loss: 0.9711418151855469,grad_norm: 0.9999991153369339, iteration: 72380
loss: 1.070516586303711,grad_norm: 0.9999995119161387, iteration: 72381
loss: 1.030962347984314,grad_norm: 0.9999994401741779, iteration: 72382
loss: 1.0002762079238892,grad_norm: 0.9999991008437055, iteration: 72383
loss: 1.2086325883865356,grad_norm: 0.9999997866046764, iteration: 72384
loss: 0.9936746954917908,grad_norm: 0.9999996465322376, iteration: 72385
loss: 0.9710860848426819,grad_norm: 0.9785094773036075, iteration: 72386
loss: 0.9823762774467468,grad_norm: 0.9999992717162033, iteration: 72387
loss: 0.9784572720527649,grad_norm: 0.9642871251318805, iteration: 72388
loss: 1.0157241821289062,grad_norm: 0.9999990504444717, iteration: 72389
loss: 1.0339828729629517,grad_norm: 0.999999192922801, iteration: 72390
loss: 1.032831072807312,grad_norm: 0.9999990721613925, iteration: 72391
loss: 1.0201447010040283,grad_norm: 0.9999995988119232, iteration: 72392
loss: 1.002508521080017,grad_norm: 0.9999991174554631, iteration: 72393
loss: 0.9817522764205933,grad_norm: 0.999999158103659, iteration: 72394
loss: 1.0118515491485596,grad_norm: 0.9999992089025359, iteration: 72395
loss: 0.9863888025283813,grad_norm: 0.9999991192849009, iteration: 72396
loss: 1.0281858444213867,grad_norm: 0.9999995635778035, iteration: 72397
loss: 1.0076168775558472,grad_norm: 0.9999992839122841, iteration: 72398
loss: 1.0231174230575562,grad_norm: 0.9770471718881792, iteration: 72399
loss: 0.9750733971595764,grad_norm: 0.9999991727047668, iteration: 72400
loss: 1.0334062576293945,grad_norm: 0.9999990509649844, iteration: 72401
loss: 1.113534927368164,grad_norm: 0.999999507862239, iteration: 72402
loss: 1.0615370273590088,grad_norm: 0.9999994740419773, iteration: 72403
loss: 1.0112299919128418,grad_norm: 0.9999990904816375, iteration: 72404
loss: 1.0114206075668335,grad_norm: 0.9999990550494001, iteration: 72405
loss: 0.9775204062461853,grad_norm: 0.9999991055765425, iteration: 72406
loss: 1.0257829427719116,grad_norm: 0.9999996822477952, iteration: 72407
loss: 1.038742184638977,grad_norm: 0.9999997604008101, iteration: 72408
loss: 0.9678773283958435,grad_norm: 0.9999991311337704, iteration: 72409
loss: 1.0049493312835693,grad_norm: 0.9999989924377521, iteration: 72410
loss: 0.985775887966156,grad_norm: 0.9999990793365723, iteration: 72411
loss: 0.9801416993141174,grad_norm: 0.9999992431870662, iteration: 72412
loss: 1.01516854763031,grad_norm: 0.9999991587796473, iteration: 72413
loss: 1.0117074251174927,grad_norm: 0.8923467328395557, iteration: 72414
loss: 0.9747729301452637,grad_norm: 0.9999991108822148, iteration: 72415
loss: 1.0194602012634277,grad_norm: 0.9999992652183805, iteration: 72416
loss: 1.0571963787078857,grad_norm: 0.9999998038250996, iteration: 72417
loss: 1.0234943628311157,grad_norm: 0.9640468907060092, iteration: 72418
loss: 1.0010830163955688,grad_norm: 0.9351073613627897, iteration: 72419
loss: 0.9962784647941589,grad_norm: 0.9999996687041489, iteration: 72420
loss: 1.0651499032974243,grad_norm: 0.9999997143777979, iteration: 72421
loss: 1.0353375673294067,grad_norm: 0.999999165640539, iteration: 72422
loss: 0.9699406623840332,grad_norm: 0.9999989841760131, iteration: 72423
loss: 0.9988052248954773,grad_norm: 0.9999990292975496, iteration: 72424
loss: 1.0071994066238403,grad_norm: 0.9587506475124277, iteration: 72425
loss: 0.9754692912101746,grad_norm: 0.9206181990982868, iteration: 72426
loss: 1.0258511304855347,grad_norm: 0.9999990287330083, iteration: 72427
loss: 1.0522067546844482,grad_norm: 0.9999995602201307, iteration: 72428
loss: 1.0880022048950195,grad_norm: 0.9999991837646385, iteration: 72429
loss: 1.073204517364502,grad_norm: 0.999999151092053, iteration: 72430
loss: 0.9877021312713623,grad_norm: 0.9999990337432687, iteration: 72431
loss: 1.0157833099365234,grad_norm: 0.999999175686244, iteration: 72432
loss: 1.000498652458191,grad_norm: 0.9999993399939966, iteration: 72433
loss: 0.974113941192627,grad_norm: 0.9598291202410341, iteration: 72434
loss: 1.1469813585281372,grad_norm: 0.9999998485731489, iteration: 72435
loss: 1.0255323648452759,grad_norm: 0.9999993135118328, iteration: 72436
loss: 0.9883450865745544,grad_norm: 0.9999991405656551, iteration: 72437
loss: 1.031628131866455,grad_norm: 0.9999990449981372, iteration: 72438
loss: 1.024984359741211,grad_norm: 0.9999991888565553, iteration: 72439
loss: 0.9607470631599426,grad_norm: 0.999999309292473, iteration: 72440
loss: 0.9944323301315308,grad_norm: 0.9999990229543932, iteration: 72441
loss: 1.0661088228225708,grad_norm: 0.9999994273830289, iteration: 72442
loss: 1.0272012948989868,grad_norm: 0.9999994004610984, iteration: 72443
loss: 0.9885775446891785,grad_norm: 0.9999994034319607, iteration: 72444
loss: 1.0085899829864502,grad_norm: 0.9999993376270807, iteration: 72445
loss: 0.9738860726356506,grad_norm: 0.9999991682486105, iteration: 72446
loss: 1.0069043636322021,grad_norm: 0.999999655226157, iteration: 72447
loss: 1.004844307899475,grad_norm: 0.9999990977124584, iteration: 72448
loss: 1.044769525527954,grad_norm: 0.9261401185868336, iteration: 72449
loss: 1.038509726524353,grad_norm: 0.9999992461126594, iteration: 72450
loss: 0.9890983700752258,grad_norm: 0.9601058360031819, iteration: 72451
loss: 0.9889671802520752,grad_norm: 0.9999994836207748, iteration: 72452
loss: 1.0284359455108643,grad_norm: 0.9999992636128627, iteration: 72453
loss: 1.0053224563598633,grad_norm: 0.9999996056426704, iteration: 72454
loss: 1.0047738552093506,grad_norm: 0.999999127189643, iteration: 72455
loss: 1.0306116342544556,grad_norm: 0.9999990920175054, iteration: 72456
loss: 1.0117160081863403,grad_norm: 0.9999993748095408, iteration: 72457
loss: 1.039419174194336,grad_norm: 0.9999994619042715, iteration: 72458
loss: 1.013078212738037,grad_norm: 0.9999991372300381, iteration: 72459
loss: 0.994168758392334,grad_norm: 0.9999991619973668, iteration: 72460
loss: 1.0377501249313354,grad_norm: 0.9999990896631524, iteration: 72461
loss: 1.009737491607666,grad_norm: 0.9999989625575915, iteration: 72462
loss: 1.0027060508728027,grad_norm: 0.9999992234803433, iteration: 72463
loss: 0.9804656505584717,grad_norm: 0.8662943032351932, iteration: 72464
loss: 1.0141459703445435,grad_norm: 0.99999906467849, iteration: 72465
loss: 1.0269742012023926,grad_norm: 0.9999994321636351, iteration: 72466
loss: 1.0092957019805908,grad_norm: 0.9189679300580252, iteration: 72467
loss: 1.0145702362060547,grad_norm: 0.999999088942551, iteration: 72468
loss: 0.9964994788169861,grad_norm: 0.999999265121661, iteration: 72469
loss: 0.9933170676231384,grad_norm: 0.9552263929335758, iteration: 72470
loss: 1.0157790184020996,grad_norm: 0.9805058357991802, iteration: 72471
loss: 1.0114182233810425,grad_norm: 0.9999995580401081, iteration: 72472
loss: 1.0890005826950073,grad_norm: 0.9999991547158645, iteration: 72473
loss: 1.0271997451782227,grad_norm: 0.9999992101587767, iteration: 72474
loss: 0.9984905123710632,grad_norm: 0.9999990238747161, iteration: 72475
loss: 1.0116620063781738,grad_norm: 0.8711185545844391, iteration: 72476
loss: 1.003753900527954,grad_norm: 0.9980726186984842, iteration: 72477
loss: 1.0184658765792847,grad_norm: 0.9999994694823829, iteration: 72478
loss: 1.005366325378418,grad_norm: 0.9264534348448554, iteration: 72479
loss: 0.9800527691841125,grad_norm: 0.940073121981102, iteration: 72480
loss: 0.9939154982566833,grad_norm: 0.9999992309084204, iteration: 72481
loss: 0.9685344696044922,grad_norm: 0.9780999128059824, iteration: 72482
loss: 1.0182390213012695,grad_norm: 0.9999996161652891, iteration: 72483
loss: 1.043971061706543,grad_norm: 0.9999990135415877, iteration: 72484
loss: 0.9949653744697571,grad_norm: 0.9999991610200105, iteration: 72485
loss: 1.007472038269043,grad_norm: 0.9999990021366906, iteration: 72486
loss: 1.040842890739441,grad_norm: 0.9999992385100978, iteration: 72487
loss: 1.036389946937561,grad_norm: 0.9999990442578102, iteration: 72488
loss: 0.9571300745010376,grad_norm: 0.9999992176989686, iteration: 72489
loss: 0.9918885827064514,grad_norm: 0.999999218106342, iteration: 72490
loss: 1.0112919807434082,grad_norm: 0.9764208195519399, iteration: 72491
loss: 0.9783670902252197,grad_norm: 0.9999992602315735, iteration: 72492
loss: 1.0191656351089478,grad_norm: 0.9999999673720836, iteration: 72493
loss: 0.9654538631439209,grad_norm: 0.9999995477444842, iteration: 72494
loss: 1.0448405742645264,grad_norm: 0.9999991823281995, iteration: 72495
loss: 0.9899704456329346,grad_norm: 0.9840912089803988, iteration: 72496
loss: 1.022521734237671,grad_norm: 0.9999990950501746, iteration: 72497
loss: 1.0310239791870117,grad_norm: 0.9999989878023707, iteration: 72498
loss: 1.0208585262298584,grad_norm: 0.9999992737371042, iteration: 72499
loss: 0.9647353887557983,grad_norm: 0.9199203606816513, iteration: 72500
loss: 1.0203826427459717,grad_norm: 0.9999993205605876, iteration: 72501
loss: 1.0383034944534302,grad_norm: 0.9938214496499282, iteration: 72502
loss: 1.0347634553909302,grad_norm: 0.9999997040527866, iteration: 72503
loss: 1.008211612701416,grad_norm: 0.9999992021668767, iteration: 72504
loss: 1.020072340965271,grad_norm: 0.9999990953777806, iteration: 72505
loss: 1.032071828842163,grad_norm: 0.9999991147401741, iteration: 72506
loss: 1.0169004201889038,grad_norm: 0.9642563378141479, iteration: 72507
loss: 0.9898895025253296,grad_norm: 0.9999991735487138, iteration: 72508
loss: 1.0423085689544678,grad_norm: 0.9999990704285785, iteration: 72509
loss: 1.0271944999694824,grad_norm: 0.9999994062875659, iteration: 72510
loss: 1.0113911628723145,grad_norm: 0.999998986001917, iteration: 72511
loss: 1.0353890657424927,grad_norm: 0.9999991284707022, iteration: 72512
loss: 1.0086714029312134,grad_norm: 0.9999992337261572, iteration: 72513
loss: 0.9747077226638794,grad_norm: 0.9999991904012153, iteration: 72514
loss: 1.0485814809799194,grad_norm: 0.9999994143395908, iteration: 72515
loss: 1.072476863861084,grad_norm: 0.9999990698650989, iteration: 72516
loss: 1.0939933061599731,grad_norm: 0.9999998259514419, iteration: 72517
loss: 0.9916532039642334,grad_norm: 0.9999992059444459, iteration: 72518
loss: 0.9791389107704163,grad_norm: 0.9999991559164274, iteration: 72519
loss: 1.0438017845153809,grad_norm: 0.991623539725449, iteration: 72520
loss: 1.058868169784546,grad_norm: 0.9999998062743857, iteration: 72521
loss: 1.0099986791610718,grad_norm: 0.9999991295257032, iteration: 72522
loss: 1.0595077276229858,grad_norm: 0.9999989342186678, iteration: 72523
loss: 1.0012784004211426,grad_norm: 0.9619176481746021, iteration: 72524
loss: 0.9607532620429993,grad_norm: 0.999999069045885, iteration: 72525
loss: 1.0060592889785767,grad_norm: 0.9999991219844598, iteration: 72526
loss: 1.0020407438278198,grad_norm: 0.9999990929713574, iteration: 72527
loss: 0.9976637959480286,grad_norm: 0.9999990245684055, iteration: 72528
loss: 0.9685128927230835,grad_norm: 0.9193186771235057, iteration: 72529
loss: 0.9871799945831299,grad_norm: 0.9999990284177077, iteration: 72530
loss: 0.9917212128639221,grad_norm: 0.9999991599983037, iteration: 72531
loss: 0.9956064820289612,grad_norm: 0.9999992941826835, iteration: 72532
loss: 1.006054162979126,grad_norm: 0.9999993210709599, iteration: 72533
loss: 1.0119606256484985,grad_norm: 0.9757329104537698, iteration: 72534
loss: 0.9721726179122925,grad_norm: 0.9999989753868948, iteration: 72535
loss: 1.0228188037872314,grad_norm: 0.9999991705497251, iteration: 72536
loss: 0.9967607259750366,grad_norm: 0.9999991233219109, iteration: 72537
loss: 1.0043283700942993,grad_norm: 0.8651440157093685, iteration: 72538
loss: 0.991046130657196,grad_norm: 0.9999990429118838, iteration: 72539
loss: 1.0074059963226318,grad_norm: 0.9999990336569823, iteration: 72540
loss: 0.967549741268158,grad_norm: 0.9999990172387174, iteration: 72541
loss: 0.9972679018974304,grad_norm: 0.9999992653625286, iteration: 72542
loss: 0.9883846044540405,grad_norm: 0.9165068961760825, iteration: 72543
loss: 1.0009344816207886,grad_norm: 0.9999990860485267, iteration: 72544
loss: 1.0023682117462158,grad_norm: 0.9485095465144266, iteration: 72545
loss: 0.9618533849716187,grad_norm: 0.976363635343185, iteration: 72546
loss: 0.9950627684593201,grad_norm: 0.9494273105246487, iteration: 72547
loss: 1.0172892808914185,grad_norm: 0.9999991452249739, iteration: 72548
loss: 0.9891268610954285,grad_norm: 0.9999989685858877, iteration: 72549
loss: 0.9745314121246338,grad_norm: 0.8894263580763596, iteration: 72550
loss: 0.9662824869155884,grad_norm: 0.9999991459068563, iteration: 72551
loss: 0.9497014284133911,grad_norm: 0.9999990331308063, iteration: 72552
loss: 1.181868553161621,grad_norm: 0.9999997618676545, iteration: 72553
loss: 1.0260858535766602,grad_norm: 0.9575480224617153, iteration: 72554
loss: 1.0188905000686646,grad_norm: 0.9999991704687073, iteration: 72555
loss: 1.0066139698028564,grad_norm: 0.9999992669627739, iteration: 72556
loss: 1.0081571340560913,grad_norm: 0.9414535202639502, iteration: 72557
loss: 0.9955226182937622,grad_norm: 0.999999716110853, iteration: 72558
loss: 0.984173595905304,grad_norm: 0.9200342771207162, iteration: 72559
loss: 1.0094090700149536,grad_norm: 0.9332496253350034, iteration: 72560
loss: 1.0097146034240723,grad_norm: 0.9999992099371658, iteration: 72561
loss: 0.9603639245033264,grad_norm: 0.8568009896626533, iteration: 72562
loss: 0.9640336036682129,grad_norm: 0.9999991747278438, iteration: 72563
loss: 0.9867042899131775,grad_norm: 0.9396584290895359, iteration: 72564
loss: 1.0384113788604736,grad_norm: 0.999999153870842, iteration: 72565
loss: 1.0037610530853271,grad_norm: 0.9999997873268335, iteration: 72566
loss: 1.0151879787445068,grad_norm: 0.9999991798858503, iteration: 72567
loss: 0.986830472946167,grad_norm: 0.9999990650718437, iteration: 72568
loss: 0.9959788918495178,grad_norm: 0.9999993994511184, iteration: 72569
loss: 0.9965032339096069,grad_norm: 0.953125099636415, iteration: 72570
loss: 0.9700822830200195,grad_norm: 0.9999991105286709, iteration: 72571
loss: 1.023153305053711,grad_norm: 0.999999744607232, iteration: 72572
loss: 1.0196802616119385,grad_norm: 0.9999991318332918, iteration: 72573
loss: 0.982468843460083,grad_norm: 0.93328610431775, iteration: 72574
loss: 1.0024621486663818,grad_norm: 0.9999991199254145, iteration: 72575
loss: 1.0377278327941895,grad_norm: 0.974380697447886, iteration: 72576
loss: 0.9986951351165771,grad_norm: 0.9999991057382905, iteration: 72577
loss: 1.0331963300704956,grad_norm: 0.9999991679981103, iteration: 72578
loss: 1.0014011859893799,grad_norm: 0.9999991977249272, iteration: 72579
loss: 1.0242280960083008,grad_norm: 0.9999990726870112, iteration: 72580
loss: 1.0231187343597412,grad_norm: 0.9999992097718647, iteration: 72581
loss: 0.9670867323875427,grad_norm: 0.9999993151954866, iteration: 72582
loss: 1.0130597352981567,grad_norm: 0.930218410086814, iteration: 72583
loss: 1.028659701347351,grad_norm: 0.9999993586013077, iteration: 72584
loss: 1.0029293298721313,grad_norm: 0.9999993187480108, iteration: 72585
loss: 1.0277822017669678,grad_norm: 0.9999996059097778, iteration: 72586
loss: 0.9874597191810608,grad_norm: 0.9999990165461413, iteration: 72587
loss: 0.9912096858024597,grad_norm: 0.9999991273659178, iteration: 72588
loss: 0.9843900799751282,grad_norm: 0.9895659799723046, iteration: 72589
loss: 1.019594430923462,grad_norm: 0.9999991339104862, iteration: 72590
loss: 1.0274937152862549,grad_norm: 0.9999991290434048, iteration: 72591
loss: 1.0021202564239502,grad_norm: 0.9999992681420964, iteration: 72592
loss: 0.9943896532058716,grad_norm: 0.9999990904882046, iteration: 72593
loss: 1.041000247001648,grad_norm: 0.9999992091650094, iteration: 72594
loss: 1.0194658041000366,grad_norm: 0.9999989778248491, iteration: 72595
loss: 0.9989675879478455,grad_norm: 0.9598917698643917, iteration: 72596
loss: 1.0270754098892212,grad_norm: 0.9342635466194197, iteration: 72597
loss: 1.0828900337219238,grad_norm: 0.9999993127022202, iteration: 72598
loss: 1.0022733211517334,grad_norm: 0.973193607587814, iteration: 72599
loss: 1.009238362312317,grad_norm: 0.9999990242384869, iteration: 72600
loss: 1.025675892829895,grad_norm: 0.9855048819203734, iteration: 72601
loss: 0.9910950660705566,grad_norm: 0.9999995899258817, iteration: 72602
loss: 1.114538550376892,grad_norm: 0.9999996289967242, iteration: 72603
loss: 0.9885722994804382,grad_norm: 0.9826116652218528, iteration: 72604
loss: 0.9695030450820923,grad_norm: 0.999999776260079, iteration: 72605
loss: 0.9922087788581848,grad_norm: 0.9999991646312647, iteration: 72606
loss: 1.0078635215759277,grad_norm: 0.9072603060784082, iteration: 72607
loss: 1.0019067525863647,grad_norm: 0.9999993595046901, iteration: 72608
loss: 1.0585054159164429,grad_norm: 0.9999996369673282, iteration: 72609
loss: 0.9645911455154419,grad_norm: 0.9999992067623213, iteration: 72610
loss: 1.0165259838104248,grad_norm: 0.9999989639674345, iteration: 72611
loss: 1.0100346803665161,grad_norm: 0.9999993697788767, iteration: 72612
loss: 1.0372848510742188,grad_norm: 0.9999998258049192, iteration: 72613
loss: 1.0631145238876343,grad_norm: 0.999999155831524, iteration: 72614
loss: 0.9922277927398682,grad_norm: 0.8372396235475442, iteration: 72615
loss: 0.9875198006629944,grad_norm: 0.9857047023447639, iteration: 72616
loss: 1.0186153650283813,grad_norm: 0.9999993034198262, iteration: 72617
loss: 0.9936349391937256,grad_norm: 0.9709624315285325, iteration: 72618
loss: 0.9714255928993225,grad_norm: 0.9310412277617836, iteration: 72619
loss: 1.0035637617111206,grad_norm: 0.9999990332507469, iteration: 72620
loss: 1.0240283012390137,grad_norm: 0.9368848937454809, iteration: 72621
loss: 0.9802202582359314,grad_norm: 0.9999994293817568, iteration: 72622
loss: 1.0118849277496338,grad_norm: 0.8436427638909995, iteration: 72623
loss: 0.9693322777748108,grad_norm: 0.9999990884084574, iteration: 72624
loss: 0.988203763961792,grad_norm: 0.9999990724458625, iteration: 72625
loss: 1.0053236484527588,grad_norm: 0.9999990841993831, iteration: 72626
loss: 0.9884997010231018,grad_norm: 0.9852201196625049, iteration: 72627
loss: 1.029444694519043,grad_norm: 0.9999990592810967, iteration: 72628
loss: 1.0034133195877075,grad_norm: 0.999999216657714, iteration: 72629
loss: 1.0209859609603882,grad_norm: 0.9999992819178141, iteration: 72630
loss: 1.0247962474822998,grad_norm: 0.9999992236216866, iteration: 72631
loss: 0.9739580154418945,grad_norm: 0.8979533127570447, iteration: 72632
loss: 0.9675916433334351,grad_norm: 0.967551950969569, iteration: 72633
loss: 1.0059760808944702,grad_norm: 0.9999992763500005, iteration: 72634
loss: 1.0098496675491333,grad_norm: 0.9999990861357545, iteration: 72635
loss: 1.02958345413208,grad_norm: 0.9973539660292988, iteration: 72636
loss: 1.0874409675598145,grad_norm: 0.9999991878685769, iteration: 72637
loss: 1.0049395561218262,grad_norm: 0.9999997960319261, iteration: 72638
loss: 0.9837989807128906,grad_norm: 0.9999992390647908, iteration: 72639
loss: 1.0458916425704956,grad_norm: 0.9999998327121823, iteration: 72640
loss: 1.008264422416687,grad_norm: 0.9999989806681684, iteration: 72641
loss: 1.017646312713623,grad_norm: 0.8987575727963639, iteration: 72642
loss: 1.02083420753479,grad_norm: 0.9999990841505869, iteration: 72643
loss: 1.021736741065979,grad_norm: 0.999640935536319, iteration: 72644
loss: 1.0355298519134521,grad_norm: 0.8749327927124366, iteration: 72645
loss: 0.9922860264778137,grad_norm: 0.9840817919758333, iteration: 72646
loss: 0.9691369533538818,grad_norm: 0.9999991725504763, iteration: 72647
loss: 0.9842194318771362,grad_norm: 0.9640621812845349, iteration: 72648
loss: 0.9943457245826721,grad_norm: 0.9385014485736474, iteration: 72649
loss: 1.0042030811309814,grad_norm: 0.9999990753245392, iteration: 72650
loss: 1.003643274307251,grad_norm: 0.9999991061079483, iteration: 72651
loss: 0.9815974235534668,grad_norm: 0.9999992671149356, iteration: 72652
loss: 1.0283868312835693,grad_norm: 0.9999991857208499, iteration: 72653
loss: 1.0050047636032104,grad_norm: 0.9999991984702346, iteration: 72654
loss: 0.9984634518623352,grad_norm: 0.9999991709235981, iteration: 72655
loss: 0.9912052154541016,grad_norm: 0.9352743699589083, iteration: 72656
loss: 1.0352281332015991,grad_norm: 0.9939124423106894, iteration: 72657
loss: 1.007776141166687,grad_norm: 0.9746220535649229, iteration: 72658
loss: 0.9703846573829651,grad_norm: 0.9749734124977372, iteration: 72659
loss: 0.9932703971862793,grad_norm: 0.9444783791353726, iteration: 72660
loss: 0.989689290523529,grad_norm: 0.9999992147680911, iteration: 72661
loss: 1.0140670537948608,grad_norm: 0.9924202293384579, iteration: 72662
loss: 0.9651205539703369,grad_norm: 0.9999991555728409, iteration: 72663
loss: 0.9731370210647583,grad_norm: 0.9999989580050276, iteration: 72664
loss: 1.0596342086791992,grad_norm: 0.9999994905381561, iteration: 72665
loss: 1.040959358215332,grad_norm: 0.999999527858046, iteration: 72666
loss: 1.0792427062988281,grad_norm: 0.9999992470657793, iteration: 72667
loss: 0.9781088829040527,grad_norm: 0.9999990784699152, iteration: 72668
loss: 1.0116801261901855,grad_norm: 0.9999992182585654, iteration: 72669
loss: 1.0029089450836182,grad_norm: 0.9969587846436176, iteration: 72670
loss: 1.0192676782608032,grad_norm: 0.9433059463980953, iteration: 72671
loss: 1.0001763105392456,grad_norm: 0.9719561794192524, iteration: 72672
loss: 1.0103340148925781,grad_norm: 0.9999991325323307, iteration: 72673
loss: 1.0218859910964966,grad_norm: 0.9999991328451344, iteration: 72674
loss: 1.0183824300765991,grad_norm: 0.9999988206299946, iteration: 72675
loss: 1.0027217864990234,grad_norm: 0.9999993714055899, iteration: 72676
loss: 1.0280667543411255,grad_norm: 0.9999993721716226, iteration: 72677
loss: 1.0003747940063477,grad_norm: 0.9999992016058536, iteration: 72678
loss: 0.9602148532867432,grad_norm: 0.9999992688264847, iteration: 72679
loss: 1.0244508981704712,grad_norm: 0.9999989830880466, iteration: 72680
loss: 0.99338698387146,grad_norm: 0.9999990307888476, iteration: 72681
loss: 1.0171635150909424,grad_norm: 0.999999138445409, iteration: 72682
loss: 0.9980996251106262,grad_norm: 0.9999992313486795, iteration: 72683
loss: 0.9977269172668457,grad_norm: 0.9999991713921146, iteration: 72684
loss: 0.942108154296875,grad_norm: 0.9999991594160093, iteration: 72685
loss: 1.0416492223739624,grad_norm: 0.9999991340215244, iteration: 72686
loss: 0.9925774931907654,grad_norm: 0.9999993062829561, iteration: 72687
loss: 1.004151463508606,grad_norm: 0.9999991444588647, iteration: 72688
loss: 0.9530876278877258,grad_norm: 0.9999989605090225, iteration: 72689
loss: 1.0204322338104248,grad_norm: 0.9200241013793481, iteration: 72690
loss: 1.0125412940979004,grad_norm: 0.9999990620000263, iteration: 72691
loss: 0.9759259223937988,grad_norm: 0.9999991796633534, iteration: 72692
loss: 1.036419153213501,grad_norm: 0.9999993324171038, iteration: 72693
loss: 0.9572272300720215,grad_norm: 0.9619549082419602, iteration: 72694
loss: 1.0140119791030884,grad_norm: 0.9999991738720374, iteration: 72695
loss: 0.9807714223861694,grad_norm: 0.9709963988759037, iteration: 72696
loss: 0.9663608074188232,grad_norm: 0.9345417949845425, iteration: 72697
loss: 1.0328006744384766,grad_norm: 0.9785903587961662, iteration: 72698
loss: 0.9953672885894775,grad_norm: 0.9999992434860892, iteration: 72699
loss: 1.0261082649230957,grad_norm: 0.9999991745505591, iteration: 72700
loss: 1.0084216594696045,grad_norm: 0.999999092408691, iteration: 72701
loss: 0.9581975340843201,grad_norm: 0.9370664079251104, iteration: 72702
loss: 0.9650725722312927,grad_norm: 0.999999141688569, iteration: 72703
loss: 0.9920898675918579,grad_norm: 0.9999990923950628, iteration: 72704
loss: 1.0054610967636108,grad_norm: 0.9999990780962219, iteration: 72705
loss: 0.9952482581138611,grad_norm: 0.9896590752887282, iteration: 72706
loss: 1.019425630569458,grad_norm: 0.999999367961316, iteration: 72707
loss: 1.0032929182052612,grad_norm: 0.9999990591071456, iteration: 72708
loss: 0.9999069571495056,grad_norm: 0.9999992096330766, iteration: 72709
loss: 1.0049535036087036,grad_norm: 0.9999989366671962, iteration: 72710
loss: 1.0118504762649536,grad_norm: 0.9999991205972737, iteration: 72711
loss: 0.997520387172699,grad_norm: 0.8565179032852316, iteration: 72712
loss: 1.015061616897583,grad_norm: 0.9999992665482312, iteration: 72713
loss: 1.0214290618896484,grad_norm: 0.9258674018508548, iteration: 72714
loss: 0.9923481345176697,grad_norm: 0.8915065841890338, iteration: 72715
loss: 1.0064791440963745,grad_norm: 0.9999992151099847, iteration: 72716
loss: 0.985757052898407,grad_norm: 0.9999992113746643, iteration: 72717
loss: 0.9916180372238159,grad_norm: 0.9890849041744522, iteration: 72718
loss: 0.9798190593719482,grad_norm: 0.9999990955368476, iteration: 72719
loss: 0.9999310970306396,grad_norm: 0.999999222270556, iteration: 72720
loss: 0.9897837042808533,grad_norm: 0.9152889564683484, iteration: 72721
loss: 1.0402352809906006,grad_norm: 0.8327641228327293, iteration: 72722
loss: 1.0424261093139648,grad_norm: 0.9999992721599095, iteration: 72723
loss: 0.9891631603240967,grad_norm: 0.9999991628564114, iteration: 72724
loss: 1.0064698457717896,grad_norm: 0.9259108931785841, iteration: 72725
loss: 0.9514191746711731,grad_norm: 0.9894691028259485, iteration: 72726
loss: 1.0140894651412964,grad_norm: 0.9999991548640385, iteration: 72727
loss: 0.9776791334152222,grad_norm: 0.9999989401265382, iteration: 72728
loss: 0.9827612042427063,grad_norm: 0.999283833917758, iteration: 72729
loss: 1.0018028020858765,grad_norm: 0.9999989862559323, iteration: 72730
loss: 0.937813401222229,grad_norm: 0.9984523656509428, iteration: 72731
loss: 1.0046766996383667,grad_norm: 0.9504872586878073, iteration: 72732
loss: 0.965299665927887,grad_norm: 0.9999991093806375, iteration: 72733
loss: 1.0230423212051392,grad_norm: 0.9999992302386945, iteration: 72734
loss: 0.9716365933418274,grad_norm: 0.9999992540781659, iteration: 72735
loss: 1.0000128746032715,grad_norm: 0.999999111425279, iteration: 72736
loss: 0.9945125579833984,grad_norm: 0.7922369526975936, iteration: 72737
loss: 0.9755545854568481,grad_norm: 0.9999991975933185, iteration: 72738
loss: 1.011427879333496,grad_norm: 0.9999994696763668, iteration: 72739
loss: 0.9868959188461304,grad_norm: 0.9449935419639234, iteration: 72740
loss: 1.0274975299835205,grad_norm: 0.9999989579843697, iteration: 72741
loss: 0.979221761226654,grad_norm: 0.9088844516230796, iteration: 72742
loss: 1.0013123750686646,grad_norm: 0.9999992041278521, iteration: 72743
loss: 1.0146254301071167,grad_norm: 0.9306401680332418, iteration: 72744
loss: 1.0131675004959106,grad_norm: 0.9999988564049132, iteration: 72745
loss: 1.0203005075454712,grad_norm: 0.9585673711961645, iteration: 72746
loss: 1.0179755687713623,grad_norm: 0.963890589522939, iteration: 72747
loss: 1.0210211277008057,grad_norm: 0.9999991297432563, iteration: 72748
loss: 1.0030896663665771,grad_norm: 0.9999997558669592, iteration: 72749
loss: 0.9793080687522888,grad_norm: 0.9999991939971608, iteration: 72750
loss: 1.046051025390625,grad_norm: 0.982427834440367, iteration: 72751
loss: 0.9972460269927979,grad_norm: 0.9999990595330877, iteration: 72752
loss: 1.0476387739181519,grad_norm: 0.9999990659173171, iteration: 72753
loss: 0.9720027446746826,grad_norm: 0.9999990853070174, iteration: 72754
loss: 0.9781554937362671,grad_norm: 0.9999994106383335, iteration: 72755
loss: 0.9942536354064941,grad_norm: 0.998634480482537, iteration: 72756
loss: 0.9977876543998718,grad_norm: 0.9999989446702188, iteration: 72757
loss: 1.001351237297058,grad_norm: 0.9999991116940963, iteration: 72758
loss: 0.9909383058547974,grad_norm: 0.9999991983965074, iteration: 72759
loss: 0.9750125408172607,grad_norm: 0.9653159075093307, iteration: 72760
loss: 0.9704639911651611,grad_norm: 0.9999990221153718, iteration: 72761
loss: 0.9874688386917114,grad_norm: 0.9204231811578002, iteration: 72762
loss: 0.9968306422233582,grad_norm: 0.9999991392556908, iteration: 72763
loss: 0.974502682685852,grad_norm: 0.9999991406270423, iteration: 72764
loss: 1.0248932838439941,grad_norm: 0.9999993054783226, iteration: 72765
loss: 1.0189298391342163,grad_norm: 0.9999990247548698, iteration: 72766
loss: 1.0178653001785278,grad_norm: 0.9930281014993331, iteration: 72767
loss: 1.0205798149108887,grad_norm: 0.9999991354856446, iteration: 72768
loss: 1.0031449794769287,grad_norm: 0.9999991764373476, iteration: 72769
loss: 1.069027304649353,grad_norm: 0.9999993704837483, iteration: 72770
loss: 1.0245940685272217,grad_norm: 0.9916012875818822, iteration: 72771
loss: 1.0045441389083862,grad_norm: 0.9999992509547422, iteration: 72772
loss: 1.0162171125411987,grad_norm: 0.9999989937133299, iteration: 72773
loss: 0.9905576705932617,grad_norm: 0.9999997493060859, iteration: 72774
loss: 0.9552109241485596,grad_norm: 0.8327906399395762, iteration: 72775
loss: 1.0240240097045898,grad_norm: 0.9999991256104105, iteration: 72776
loss: 1.0170493125915527,grad_norm: 0.8839582046378524, iteration: 72777
loss: 0.9725164175033569,grad_norm: 0.906163262119016, iteration: 72778
loss: 0.9783914089202881,grad_norm: 0.9032529125621492, iteration: 72779
loss: 1.079959750175476,grad_norm: 0.9999996512774393, iteration: 72780
loss: 0.9628545641899109,grad_norm: 0.9999991186966631, iteration: 72781
loss: 0.9931942224502563,grad_norm: 0.939214433172659, iteration: 72782
loss: 0.9977952837944031,grad_norm: 0.977550592936792, iteration: 72783
loss: 0.9810467958450317,grad_norm: 0.9537684484810213, iteration: 72784
loss: 0.9626392722129822,grad_norm: 0.9999989895145686, iteration: 72785
loss: 0.9420996904373169,grad_norm: 0.9999991213946017, iteration: 72786
loss: 0.9699329137802124,grad_norm: 0.9999990976379669, iteration: 72787
loss: 1.0059123039245605,grad_norm: 0.9878401052296667, iteration: 72788
loss: 0.9848240613937378,grad_norm: 0.9056060234232574, iteration: 72789
loss: 1.0205248594284058,grad_norm: 0.9999992184934787, iteration: 72790
loss: 1.0168061256408691,grad_norm: 0.9910170803590924, iteration: 72791
loss: 1.0093351602554321,grad_norm: 0.9999990271683987, iteration: 72792
loss: 0.9964836239814758,grad_norm: 0.9454225960213102, iteration: 72793
loss: 1.1971074342727661,grad_norm: 0.9999998888615037, iteration: 72794
loss: 0.969473659992218,grad_norm: 0.9999992184395579, iteration: 72795
loss: 1.035996913909912,grad_norm: 0.9814832906409803, iteration: 72796
loss: 1.0012768507003784,grad_norm: 0.9999989769877494, iteration: 72797
loss: 0.987277626991272,grad_norm: 0.8828398168966404, iteration: 72798
loss: 1.008158564567566,grad_norm: 0.9999992556724575, iteration: 72799
loss: 1.0041308403015137,grad_norm: 0.9999991414852084, iteration: 72800
loss: 1.016508936882019,grad_norm: 0.9465597859231528, iteration: 72801
loss: 0.9940735101699829,grad_norm: 0.9493789017092038, iteration: 72802
loss: 0.9949941039085388,grad_norm: 0.9907271455633044, iteration: 72803
loss: 1.0055981874465942,grad_norm: 0.8839099966919531, iteration: 72804
loss: 1.0302271842956543,grad_norm: 0.9772918236583347, iteration: 72805
loss: 0.9969707131385803,grad_norm: 0.9999991048711786, iteration: 72806
loss: 0.9747968912124634,grad_norm: 0.993656906597153, iteration: 72807
loss: 0.984710156917572,grad_norm: 0.9999989749278619, iteration: 72808
loss: 0.9742692708969116,grad_norm: 0.9701316299589667, iteration: 72809
loss: 1.019252896308899,grad_norm: 0.9999989986150539, iteration: 72810
loss: 1.0108343362808228,grad_norm: 0.9999990686265513, iteration: 72811
loss: 0.9892557859420776,grad_norm: 0.9145731310686951, iteration: 72812
loss: 0.9888657331466675,grad_norm: 0.9896075820749984, iteration: 72813
loss: 1.0169737339019775,grad_norm: 0.9999991728672669, iteration: 72814
loss: 0.9469426870346069,grad_norm: 0.9365191149798067, iteration: 72815
loss: 1.000503420829773,grad_norm: 0.999999122155309, iteration: 72816
loss: 1.0094530582427979,grad_norm: 0.9999989469342778, iteration: 72817
loss: 1.0082660913467407,grad_norm: 0.9999992215919999, iteration: 72818
loss: 0.9838466048240662,grad_norm: 0.9999990957262992, iteration: 72819
loss: 0.9872869849205017,grad_norm: 0.9031708647104475, iteration: 72820
loss: 0.9851222038269043,grad_norm: 0.9038714042452605, iteration: 72821
loss: 0.961409330368042,grad_norm: 0.9108260058102035, iteration: 72822
loss: 1.0177158117294312,grad_norm: 0.9280161684657502, iteration: 72823
loss: 1.0163583755493164,grad_norm: 0.999999197097391, iteration: 72824
loss: 0.9994983077049255,grad_norm: 0.9999993138752976, iteration: 72825
loss: 0.9971791505813599,grad_norm: 0.9273418197377647, iteration: 72826
loss: 0.9993312954902649,grad_norm: 0.9841908382686193, iteration: 72827
loss: 1.0166244506835938,grad_norm: 0.999999162261601, iteration: 72828
loss: 1.0081795454025269,grad_norm: 0.999283580138935, iteration: 72829
loss: 1.0166289806365967,grad_norm: 0.9999990991813253, iteration: 72830
loss: 1.0065041780471802,grad_norm: 0.9999991841503355, iteration: 72831
loss: 0.991830050945282,grad_norm: 0.9949550254183522, iteration: 72832
loss: 0.9689863324165344,grad_norm: 0.9999995794134766, iteration: 72833
loss: 1.0213724374771118,grad_norm: 0.999999308849413, iteration: 72834
loss: 1.0134687423706055,grad_norm: 0.9999992977731106, iteration: 72835
loss: 0.982624351978302,grad_norm: 0.9999991629225246, iteration: 72836
loss: 1.010968565940857,grad_norm: 0.8746412047535422, iteration: 72837
loss: 0.9781663417816162,grad_norm: 0.9999991840594514, iteration: 72838
loss: 0.9921852946281433,grad_norm: 0.936019546316309, iteration: 72839
loss: 0.9971184134483337,grad_norm: 0.9161385638857225, iteration: 72840
loss: 1.0208090543746948,grad_norm: 0.9999991281848172, iteration: 72841
loss: 0.9906712174415588,grad_norm: 0.9999992655129305, iteration: 72842
loss: 1.0305067300796509,grad_norm: 0.9999991077626694, iteration: 72843
loss: 1.004661202430725,grad_norm: 0.9999990053873997, iteration: 72844
loss: 1.0116270780563354,grad_norm: 0.9999992226517775, iteration: 72845
loss: 0.9883497357368469,grad_norm: 0.9999991040769659, iteration: 72846
loss: 0.9986343383789062,grad_norm: 0.9519187016688399, iteration: 72847
loss: 0.9832307696342468,grad_norm: 0.9999991278418269, iteration: 72848
loss: 0.9984979033470154,grad_norm: 0.9404719606329938, iteration: 72849
loss: 1.023577332496643,grad_norm: 0.999999280912489, iteration: 72850
loss: 0.9770015478134155,grad_norm: 0.9603928011930694, iteration: 72851
loss: 0.9840118288993835,grad_norm: 0.9999992702151997, iteration: 72852
loss: 1.0051642656326294,grad_norm: 0.9492007290716278, iteration: 72853
loss: 0.993234395980835,grad_norm: 0.9686636895982478, iteration: 72854
loss: 0.9847637414932251,grad_norm: 0.9999991194869275, iteration: 72855
loss: 0.9882457256317139,grad_norm: 0.9837383295190699, iteration: 72856
loss: 1.0253454446792603,grad_norm: 0.9999992640926593, iteration: 72857
loss: 1.0077483654022217,grad_norm: 0.999999288966279, iteration: 72858
loss: 0.9945229887962341,grad_norm: 0.9714336856627587, iteration: 72859
loss: 0.9632046818733215,grad_norm: 0.9554004318750801, iteration: 72860
loss: 1.0119792222976685,grad_norm: 0.999998956027013, iteration: 72861
loss: 0.9494218826293945,grad_norm: 0.9999991314850195, iteration: 72862
loss: 1.028542399406433,grad_norm: 0.926968678075967, iteration: 72863
loss: 1.0210273265838623,grad_norm: 0.9999991920775921, iteration: 72864
loss: 1.01901376247406,grad_norm: 0.9999990966568295, iteration: 72865
loss: 0.9944669008255005,grad_norm: 0.8653125599779914, iteration: 72866
loss: 0.985974907875061,grad_norm: 0.9999990147139025, iteration: 72867
loss: 0.984815776348114,grad_norm: 0.999999067497015, iteration: 72868
loss: 1.0166794061660767,grad_norm: 0.9999992689559001, iteration: 72869
loss: 1.0004005432128906,grad_norm: 0.9999989432668465, iteration: 72870
loss: 0.9767141938209534,grad_norm: 0.9574259569683349, iteration: 72871
loss: 1.0712906122207642,grad_norm: 0.999999484465016, iteration: 72872
loss: 1.0770716667175293,grad_norm: 0.9999997766924511, iteration: 72873
loss: 1.0027081966400146,grad_norm: 0.9999991100746422, iteration: 72874
loss: 0.9928215146064758,grad_norm: 0.9999990495801384, iteration: 72875
loss: 1.0257091522216797,grad_norm: 0.9999990314073145, iteration: 72876
loss: 1.040964126586914,grad_norm: 0.9999991750829407, iteration: 72877
loss: 0.9804269075393677,grad_norm: 0.9555796376552332, iteration: 72878
loss: 0.9560457468032837,grad_norm: 0.9999991361557067, iteration: 72879
loss: 0.9842028021812439,grad_norm: 0.9999990926757907, iteration: 72880
loss: 1.001928448677063,grad_norm: 0.9999990804979885, iteration: 72881
loss: 0.9921435117721558,grad_norm: 0.9999990703865888, iteration: 72882
loss: 1.041675329208374,grad_norm: 0.9999994791846076, iteration: 72883
loss: 0.9917199611663818,grad_norm: 0.9999990153031562, iteration: 72884
loss: 0.9275460243225098,grad_norm: 0.9728623222958018, iteration: 72885
loss: 0.9957529306411743,grad_norm: 0.9918313173235698, iteration: 72886
loss: 1.0260059833526611,grad_norm: 0.9406722410170018, iteration: 72887
loss: 0.9813913106918335,grad_norm: 0.9999990832777604, iteration: 72888
loss: 0.9773904085159302,grad_norm: 0.9999991702667317, iteration: 72889
loss: 0.9943236708641052,grad_norm: 0.9999990425186934, iteration: 72890
loss: 0.9780887961387634,grad_norm: 0.9999990422019975, iteration: 72891
loss: 1.0193783044815063,grad_norm: 0.9590180149875774, iteration: 72892
loss: 0.9599290490150452,grad_norm: 0.9999993143129796, iteration: 72893
loss: 1.0127640962600708,grad_norm: 0.9877681843180427, iteration: 72894
loss: 1.0039348602294922,grad_norm: 0.9999992415103958, iteration: 72895
loss: 1.0151666402816772,grad_norm: 0.9670865865628668, iteration: 72896
loss: 0.985736072063446,grad_norm: 0.9999990045827853, iteration: 72897
loss: 1.0407158136367798,grad_norm: 0.9999998813280295, iteration: 72898
loss: 0.9786935448646545,grad_norm: 0.8196450753820863, iteration: 72899
loss: 1.0094343423843384,grad_norm: 0.9999991343561734, iteration: 72900
loss: 0.9841004014015198,grad_norm: 0.9279870603932188, iteration: 72901
loss: 1.009996771812439,grad_norm: 0.9999990727665842, iteration: 72902
loss: 1.007058024406433,grad_norm: 0.9264826080655987, iteration: 72903
loss: 1.024059772491455,grad_norm: 0.9774987699422812, iteration: 72904
loss: 0.9353904128074646,grad_norm: 0.8794387855204469, iteration: 72905
loss: 1.0030732154846191,grad_norm: 0.9533242419197606, iteration: 72906
loss: 1.0022052526474,grad_norm: 0.9999991033434783, iteration: 72907
loss: 1.0197855234146118,grad_norm: 0.9946781446238873, iteration: 72908
loss: 0.9994416236877441,grad_norm: 0.9999990174968684, iteration: 72909
loss: 1.017933964729309,grad_norm: 0.9965547752557947, iteration: 72910
loss: 0.9727997779846191,grad_norm: 0.9999990471837266, iteration: 72911
loss: 1.0121142864227295,grad_norm: 0.9999991809633937, iteration: 72912
loss: 1.0101968050003052,grad_norm: 0.8357451063609146, iteration: 72913
loss: 1.0229029655456543,grad_norm: 0.9999992445674444, iteration: 72914
loss: 1.0113400220870972,grad_norm: 0.9330182156302609, iteration: 72915
loss: 1.025647521018982,grad_norm: 0.9999991079203706, iteration: 72916
loss: 1.0019540786743164,grad_norm: 0.9999990723361726, iteration: 72917
loss: 0.9864789247512817,grad_norm: 0.9999990687646969, iteration: 72918
loss: 0.9915844202041626,grad_norm: 0.9999991591304145, iteration: 72919
loss: 0.9694336652755737,grad_norm: 0.9999992473630946, iteration: 72920
loss: 1.0223816633224487,grad_norm: 0.9999991749718159, iteration: 72921
loss: 0.9870187044143677,grad_norm: 0.9999991675843071, iteration: 72922
loss: 0.9670622944831848,grad_norm: 0.9999990827497872, iteration: 72923
loss: 1.0036510229110718,grad_norm: 0.9600590831574319, iteration: 72924
loss: 1.0104053020477295,grad_norm: 0.9999992284578443, iteration: 72925
loss: 1.040822982788086,grad_norm: 0.9999997219222941, iteration: 72926
loss: 0.9927613735198975,grad_norm: 0.9999990467037743, iteration: 72927
loss: 1.000731348991394,grad_norm: 0.9999991713901246, iteration: 72928
loss: 0.9745243191719055,grad_norm: 0.8688280112124177, iteration: 72929
loss: 0.9879692792892456,grad_norm: 0.9999989968031253, iteration: 72930
loss: 0.9826918840408325,grad_norm: 0.9999990617652411, iteration: 72931
loss: 1.0017101764678955,grad_norm: 0.9999990295944483, iteration: 72932
loss: 1.009516954421997,grad_norm: 0.999999089396657, iteration: 72933
loss: 0.9919667840003967,grad_norm: 0.9728994874402813, iteration: 72934
loss: 0.9556466341018677,grad_norm: 0.9999990939373328, iteration: 72935
loss: 1.0215916633605957,grad_norm: 0.8808014625562931, iteration: 72936
loss: 1.014350414276123,grad_norm: 0.8528953396147855, iteration: 72937
loss: 0.9998376369476318,grad_norm: 0.8794401791866886, iteration: 72938
loss: 1.0193427801132202,grad_norm: 0.9999991521579534, iteration: 72939
loss: 0.9716477394104004,grad_norm: 0.9999992383768215, iteration: 72940
loss: 1.035037875175476,grad_norm: 0.9741472890684654, iteration: 72941
loss: 0.9884957075119019,grad_norm: 0.997543386948642, iteration: 72942
loss: 0.9635495543479919,grad_norm: 0.999999103346274, iteration: 72943
loss: 1.0040558576583862,grad_norm: 0.9999990139657436, iteration: 72944
loss: 0.9934984445571899,grad_norm: 0.8520736353027084, iteration: 72945
loss: 1.0304131507873535,grad_norm: 0.9999991101952512, iteration: 72946
loss: 1.0047155618667603,grad_norm: 0.9775276729171688, iteration: 72947
loss: 0.9875296950340271,grad_norm: 0.9999990674709307, iteration: 72948
loss: 0.9905984401702881,grad_norm: 0.9515063088818149, iteration: 72949
loss: 1.0437761545181274,grad_norm: 0.9999994334155683, iteration: 72950
loss: 1.0088015794754028,grad_norm: 0.9999991910424972, iteration: 72951
loss: 0.9899432063102722,grad_norm: 0.999999070865383, iteration: 72952
loss: 1.005746603012085,grad_norm: 0.9999991367381426, iteration: 72953
loss: 1.0166244506835938,grad_norm: 0.9999991227321245, iteration: 72954
loss: 1.018701434135437,grad_norm: 0.9999990487245146, iteration: 72955
loss: 0.9970744848251343,grad_norm: 0.9999991708488604, iteration: 72956
loss: 0.9804807901382446,grad_norm: 0.9999990000069334, iteration: 72957
loss: 1.0487720966339111,grad_norm: 0.9999990253649457, iteration: 72958
loss: 1.009122610092163,grad_norm: 0.9999991693838022, iteration: 72959
loss: 0.989459753036499,grad_norm: 0.7631751437362152, iteration: 72960
loss: 1.015636920928955,grad_norm: 0.9999991610817361, iteration: 72961
loss: 1.0321654081344604,grad_norm: 0.999999046235961, iteration: 72962
loss: 0.9978235960006714,grad_norm: 0.8987095586557468, iteration: 72963
loss: 0.984731137752533,grad_norm: 0.9669858658824588, iteration: 72964
loss: 1.018173098564148,grad_norm: 0.942320930494246, iteration: 72965
loss: 1.005008339881897,grad_norm: 0.9655406652652624, iteration: 72966
loss: 1.008294701576233,grad_norm: 0.9999990902072199, iteration: 72967
loss: 1.0290734767913818,grad_norm: 0.999999118588948, iteration: 72968
loss: 1.0269370079040527,grad_norm: 0.999999121620038, iteration: 72969
loss: 1.0277657508850098,grad_norm: 0.9999994784390679, iteration: 72970
loss: 1.0074561834335327,grad_norm: 0.9999991499582147, iteration: 72971
loss: 1.0283368825912476,grad_norm: 0.9999991233308952, iteration: 72972
loss: 1.0069496631622314,grad_norm: 0.9999989856192669, iteration: 72973
loss: 1.0038447380065918,grad_norm: 0.8980590857071923, iteration: 72974
loss: 0.9919301271438599,grad_norm: 0.9999991603161873, iteration: 72975
loss: 1.0143662691116333,grad_norm: 0.770300693786844, iteration: 72976
loss: 1.0242908000946045,grad_norm: 0.9999997984643255, iteration: 72977
loss: 1.0182483196258545,grad_norm: 0.9770431695604698, iteration: 72978
loss: 1.0149085521697998,grad_norm: 0.9181012984993139, iteration: 72979
loss: 1.016363263130188,grad_norm: 0.9999990421204695, iteration: 72980
loss: 1.0119974613189697,grad_norm: 0.9999993398945214, iteration: 72981
loss: 0.9795433878898621,grad_norm: 0.9999993193491377, iteration: 72982
loss: 0.9852633476257324,grad_norm: 0.9999990633391919, iteration: 72983
loss: 1.01003098487854,grad_norm: 0.9113209056662644, iteration: 72984
loss: 0.9844338297843933,grad_norm: 0.9999990116313454, iteration: 72985
loss: 1.015588402748108,grad_norm: 0.8792021657324853, iteration: 72986
loss: 1.0091090202331543,grad_norm: 0.9653516468306614, iteration: 72987
loss: 0.9910787343978882,grad_norm: 0.9999990849246054, iteration: 72988
loss: 0.9938954710960388,grad_norm: 0.9999993870832532, iteration: 72989
loss: 0.9988437294960022,grad_norm: 0.9440710966636574, iteration: 72990
loss: 1.004178524017334,grad_norm: 0.9999990332412829, iteration: 72991
loss: 1.006077766418457,grad_norm: 0.9999991795649406, iteration: 72992
loss: 1.6397051811218262,grad_norm: 0.9999997746704434, iteration: 72993
loss: 0.9959924221038818,grad_norm: 0.9999990941703231, iteration: 72994
loss: 1.0042954683303833,grad_norm: 0.9999989220245937, iteration: 72995
loss: 0.9762572050094604,grad_norm: 0.9579420763471184, iteration: 72996
loss: 1.075218677520752,grad_norm: 0.9999993610461336, iteration: 72997
loss: 1.0158953666687012,grad_norm: 0.9999995501776062, iteration: 72998
loss: 0.9875004291534424,grad_norm: 0.9999993061331087, iteration: 72999
loss: 0.9935466051101685,grad_norm: 0.8793469463361753, iteration: 73000
loss: 1.0050437450408936,grad_norm: 0.9134643090600537, iteration: 73001
loss: 1.0119404792785645,grad_norm: 0.9999991688782838, iteration: 73002
loss: 0.9875028729438782,grad_norm: 0.9999990951595494, iteration: 73003
loss: 1.0054048299789429,grad_norm: 0.9133148084657993, iteration: 73004
loss: 0.9770214557647705,grad_norm: 0.9260898233639749, iteration: 73005
loss: 0.9990791082382202,grad_norm: 0.9999998725318022, iteration: 73006
loss: 1.0824167728424072,grad_norm: 0.9999994216484233, iteration: 73007
loss: 1.0057729482650757,grad_norm: 0.8971589591355528, iteration: 73008
loss: 0.990302562713623,grad_norm: 0.9999991864604741, iteration: 73009
loss: 1.0314602851867676,grad_norm: 0.8477229550657901, iteration: 73010
loss: 1.007896900177002,grad_norm: 0.9999992046428386, iteration: 73011
loss: 1.0132853984832764,grad_norm: 0.9999995211543331, iteration: 73012
loss: 1.0216639041900635,grad_norm: 0.9049115001861328, iteration: 73013
loss: 0.9685357213020325,grad_norm: 0.9995942867920105, iteration: 73014
loss: 1.0205168724060059,grad_norm: 0.9999991840408872, iteration: 73015
loss: 1.004077434539795,grad_norm: 0.999999219147635, iteration: 73016
loss: 1.0174930095672607,grad_norm: 0.9999989676709338, iteration: 73017
loss: 0.9514360427856445,grad_norm: 0.965780812414346, iteration: 73018
loss: 0.9999280571937561,grad_norm: 0.9999990678309859, iteration: 73019
loss: 1.017371654510498,grad_norm: 0.9999989149068921, iteration: 73020
loss: 0.981696367263794,grad_norm: 0.9999990291062013, iteration: 73021
loss: 0.9728409051895142,grad_norm: 0.8702719202610232, iteration: 73022
loss: 1.006818175315857,grad_norm: 0.9364711959335824, iteration: 73023
loss: 0.9967495799064636,grad_norm: 0.9999989149496867, iteration: 73024
loss: 1.0081150531768799,grad_norm: 0.9999990826349445, iteration: 73025
loss: 1.071571946144104,grad_norm: 0.9999992235155695, iteration: 73026
loss: 1.0177876949310303,grad_norm: 0.999999173888131, iteration: 73027
loss: 1.0015569925308228,grad_norm: 0.9999992191428995, iteration: 73028
loss: 0.9896395802497864,grad_norm: 0.9999990254020854, iteration: 73029
loss: 1.0043936967849731,grad_norm: 0.9999992007381858, iteration: 73030
loss: 0.9738947749137878,grad_norm: 0.9347651638017469, iteration: 73031
loss: 0.99233478307724,grad_norm: 0.9999990971745286, iteration: 73032
loss: 1.0340354442596436,grad_norm: 0.9999997716830505, iteration: 73033
loss: 0.9825444221496582,grad_norm: 0.9999991782624621, iteration: 73034
loss: 1.004342794418335,grad_norm: 0.8664235699310869, iteration: 73035
loss: 1.032021164894104,grad_norm: 0.9999991335020603, iteration: 73036
loss: 1.0283108949661255,grad_norm: 0.9679817940526677, iteration: 73037
loss: 1.0026181936264038,grad_norm: 0.9970377858040352, iteration: 73038
loss: 1.0199834108352661,grad_norm: 0.9999992726807239, iteration: 73039
loss: 1.013453722000122,grad_norm: 0.9999991201372955, iteration: 73040
loss: 1.008838415145874,grad_norm: 0.9999991178857136, iteration: 73041
loss: 1.0224286317825317,grad_norm: 0.9999990506461106, iteration: 73042
loss: 0.9871640205383301,grad_norm: 0.9999989415241939, iteration: 73043
loss: 1.0247024297714233,grad_norm: 0.9293549767069398, iteration: 73044
loss: 1.0051733255386353,grad_norm: 0.9999991768088816, iteration: 73045
loss: 1.0106511116027832,grad_norm: 0.9999991133754738, iteration: 73046
loss: 1.006438970565796,grad_norm: 0.9999992375470667, iteration: 73047
loss: 1.0387901067733765,grad_norm: 0.9999991665684068, iteration: 73048
loss: 1.023016333580017,grad_norm: 0.9968398858298443, iteration: 73049
loss: 1.05097234249115,grad_norm: 0.9999991087216973, iteration: 73050
loss: 1.0395880937576294,grad_norm: 0.9999991752785174, iteration: 73051
loss: 0.9997013807296753,grad_norm: 0.9999991550258044, iteration: 73052
loss: 1.0123631954193115,grad_norm: 0.8986149350666208, iteration: 73053
loss: 1.0066545009613037,grad_norm: 0.9999992026991668, iteration: 73054
loss: 1.018060564994812,grad_norm: 0.8863856219952253, iteration: 73055
loss: 0.987067699432373,grad_norm: 0.9999991182190473, iteration: 73056
loss: 0.9832123517990112,grad_norm: 0.9999991413235225, iteration: 73057
loss: 1.0190365314483643,grad_norm: 0.9999992087826816, iteration: 73058
loss: 1.0219067335128784,grad_norm: 0.9999990688357014, iteration: 73059
loss: 1.0262504816055298,grad_norm: 0.9528330867355945, iteration: 73060
loss: 0.976599395275116,grad_norm: 0.9999989297707439, iteration: 73061
loss: 0.9783660769462585,grad_norm: 0.9957098059358336, iteration: 73062
loss: 1.0392194986343384,grad_norm: 0.999999288840823, iteration: 73063
loss: 1.013321042060852,grad_norm: 0.8530375836687882, iteration: 73064
loss: 0.9938644170761108,grad_norm: 0.9999991665988812, iteration: 73065
loss: 0.9992039203643799,grad_norm: 0.9332422112910281, iteration: 73066
loss: 1.053654670715332,grad_norm: 0.9999992963166655, iteration: 73067
loss: 1.0058777332305908,grad_norm: 0.9999990818615361, iteration: 73068
loss: 0.9926547408103943,grad_norm: 0.9606761486870857, iteration: 73069
loss: 0.9892004132270813,grad_norm: 0.9999992928104836, iteration: 73070
loss: 0.9933477640151978,grad_norm: 0.9999991085643222, iteration: 73071
loss: 1.0213305950164795,grad_norm: 0.9999991936961432, iteration: 73072
loss: 1.004734992980957,grad_norm: 0.9392026766998276, iteration: 73073
loss: 1.0000497102737427,grad_norm: 0.9999992897732124, iteration: 73074
loss: 1.0095206499099731,grad_norm: 0.9999992482638677, iteration: 73075
loss: 1.020443320274353,grad_norm: 0.9999992464839242, iteration: 73076
loss: 0.9924634099006653,grad_norm: 0.8444077956591081, iteration: 73077
loss: 1.001854419708252,grad_norm: 0.999999027906366, iteration: 73078
loss: 0.9947532415390015,grad_norm: 0.9424485625132067, iteration: 73079
loss: 1.0086804628372192,grad_norm: 0.9999997733092311, iteration: 73080
loss: 0.9891096353530884,grad_norm: 0.9999989509492356, iteration: 73081
loss: 0.9695861339569092,grad_norm: 0.9937722964631421, iteration: 73082
loss: 0.9779691696166992,grad_norm: 0.9999991535929865, iteration: 73083
loss: 0.9892587661743164,grad_norm: 0.9999992282638346, iteration: 73084
loss: 1.0208674669265747,grad_norm: 0.9999992148367115, iteration: 73085
loss: 1.0107523202896118,grad_norm: 0.9999990294737237, iteration: 73086
loss: 1.0048224925994873,grad_norm: 0.9880162559412712, iteration: 73087
loss: 0.9849324226379395,grad_norm: 0.8840580622980467, iteration: 73088
loss: 1.042936086654663,grad_norm: 0.9999989871924058, iteration: 73089
loss: 1.0165188312530518,grad_norm: 0.9999991482838515, iteration: 73090
loss: 0.9772111177444458,grad_norm: 0.9426145036978367, iteration: 73091
loss: 0.9687168598175049,grad_norm: 0.999999209807411, iteration: 73092
loss: 1.0001111030578613,grad_norm: 0.9227921674698724, iteration: 73093
loss: 1.0075371265411377,grad_norm: 0.9999992057493964, iteration: 73094
loss: 0.9810597896575928,grad_norm: 0.999998987044383, iteration: 73095
loss: 1.0365196466445923,grad_norm: 0.9266663619846769, iteration: 73096
loss: 0.990226149559021,grad_norm: 0.9999989920491589, iteration: 73097
loss: 1.0148584842681885,grad_norm: 0.9620183003081305, iteration: 73098
loss: 1.006059169769287,grad_norm: 0.9999993718319548, iteration: 73099
loss: 1.0213066339492798,grad_norm: 0.9999995226751357, iteration: 73100
loss: 0.9928382039070129,grad_norm: 0.939502665350028, iteration: 73101
loss: 0.9840987920761108,grad_norm: 0.9999991041613162, iteration: 73102
loss: 1.0083847045898438,grad_norm: 0.9999991589558287, iteration: 73103
loss: 1.0085288286209106,grad_norm: 0.9999989635040561, iteration: 73104
loss: 1.0024352073669434,grad_norm: 0.9867622757165809, iteration: 73105
loss: 0.998018741607666,grad_norm: 0.9448105768638226, iteration: 73106
loss: 0.9815958142280579,grad_norm: 0.9999992382849996, iteration: 73107
loss: 0.9723824262619019,grad_norm: 0.9999991112298632, iteration: 73108
loss: 1.035737156867981,grad_norm: 0.9999990143207815, iteration: 73109
loss: 1.0081393718719482,grad_norm: 0.9999996861826532, iteration: 73110
loss: 1.0455126762390137,grad_norm: 0.9999997898459324, iteration: 73111
loss: 1.0300453901290894,grad_norm: 0.9300518933653731, iteration: 73112
loss: 1.0687544345855713,grad_norm: 0.9114148092669778, iteration: 73113
loss: 1.0089958906173706,grad_norm: 0.9999990496191828, iteration: 73114
loss: 0.9948218464851379,grad_norm: 0.9903974031747242, iteration: 73115
loss: 0.9922367334365845,grad_norm: 0.9999990415165855, iteration: 73116
loss: 1.0048373937606812,grad_norm: 0.9799105185753284, iteration: 73117
loss: 1.0872174501419067,grad_norm: 0.999999832505984, iteration: 73118
loss: 0.9949873685836792,grad_norm: 0.9999991541683059, iteration: 73119
loss: 1.0043847560882568,grad_norm: 0.888280339131046, iteration: 73120
loss: 1.0205546617507935,grad_norm: 0.9999990046696589, iteration: 73121
loss: 0.968802809715271,grad_norm: 0.9999991210109154, iteration: 73122
loss: 0.9887993931770325,grad_norm: 0.9999991646135648, iteration: 73123
loss: 0.9689568877220154,grad_norm: 0.9999991840424715, iteration: 73124
loss: 0.9874745607376099,grad_norm: 0.9999990482826974, iteration: 73125
loss: 0.9988450407981873,grad_norm: 0.9441969118400135, iteration: 73126
loss: 0.9986308813095093,grad_norm: 0.9999990216254614, iteration: 73127
loss: 0.9842521548271179,grad_norm: 0.9999992190754354, iteration: 73128
loss: 0.9848006367683411,grad_norm: 0.9818556397658624, iteration: 73129
loss: 0.9726987481117249,grad_norm: 0.9682391093842291, iteration: 73130
loss: 0.9940203428268433,grad_norm: 0.9067702395331688, iteration: 73131
loss: 1.0134119987487793,grad_norm: 0.929659460624696, iteration: 73132
loss: 1.007211446762085,grad_norm: 0.999999209614194, iteration: 73133
loss: 1.0584696531295776,grad_norm: 0.9398486357719938, iteration: 73134
loss: 1.0061264038085938,grad_norm: 0.9999991133856114, iteration: 73135
loss: 0.9674191474914551,grad_norm: 0.9999991222898583, iteration: 73136
loss: 1.023627519607544,grad_norm: 0.9999991138300127, iteration: 73137
loss: 0.9756518602371216,grad_norm: 0.9999992222200346, iteration: 73138
loss: 0.9835265278816223,grad_norm: 0.9999989807518914, iteration: 73139
loss: 0.970379650592804,grad_norm: 0.9999991360788338, iteration: 73140
loss: 1.018130898475647,grad_norm: 0.9999990497508627, iteration: 73141
loss: 1.0298823118209839,grad_norm: 0.9999990939543559, iteration: 73142
loss: 0.9987019896507263,grad_norm: 0.9999991750968459, iteration: 73143
loss: 0.942882776260376,grad_norm: 0.9999990181608053, iteration: 73144
loss: 0.981208860874176,grad_norm: 0.999999045397389, iteration: 73145
loss: 1.026270866394043,grad_norm: 0.9999992887318254, iteration: 73146
loss: 1.0393136739730835,grad_norm: 0.9815566036680363, iteration: 73147
loss: 0.964503824710846,grad_norm: 0.9999992451933336, iteration: 73148
loss: 1.0410608053207397,grad_norm: 0.7284221032109051, iteration: 73149
loss: 1.024997353553772,grad_norm: 0.999999181727592, iteration: 73150
loss: 1.0053324699401855,grad_norm: 0.9999994162213103, iteration: 73151
loss: 1.02047598361969,grad_norm: 0.9412639138870805, iteration: 73152
loss: 1.0239660739898682,grad_norm: 0.9999993522343442, iteration: 73153
loss: 1.0102394819259644,grad_norm: 0.99999920043387, iteration: 73154
loss: 0.9999139308929443,grad_norm: 0.9419889001516366, iteration: 73155
loss: 0.9888250827789307,grad_norm: 0.9939237632291686, iteration: 73156
loss: 0.9702118039131165,grad_norm: 0.9409307511185077, iteration: 73157
loss: 0.9958299398422241,grad_norm: 0.9850236403702417, iteration: 73158
loss: 1.1048808097839355,grad_norm: 0.9999995217139738, iteration: 73159
loss: 1.0824416875839233,grad_norm: 0.9999994990556081, iteration: 73160
loss: 1.0569454431533813,grad_norm: 0.9999993843130692, iteration: 73161
loss: 0.9967325925827026,grad_norm: 0.9313084037166159, iteration: 73162
loss: 1.0063024759292603,grad_norm: 0.9999990099495764, iteration: 73163
loss: 1.0048927068710327,grad_norm: 0.9999991359920118, iteration: 73164
loss: 1.0035979747772217,grad_norm: 0.9564545537922123, iteration: 73165
loss: 0.9986982941627502,grad_norm: 0.9999990033583218, iteration: 73166
loss: 0.9956746697425842,grad_norm: 0.9999991216071357, iteration: 73167
loss: 0.9998464584350586,grad_norm: 0.9999993495292357, iteration: 73168
loss: 1.0193166732788086,grad_norm: 0.9999992625724697, iteration: 73169
loss: 1.0243386030197144,grad_norm: 0.9861563161000197, iteration: 73170
loss: 1.0146621465682983,grad_norm: 0.9999991282352373, iteration: 73171
loss: 1.0019339323043823,grad_norm: 0.9562849624180354, iteration: 73172
loss: 0.9757698774337769,grad_norm: 0.9999991726434885, iteration: 73173
loss: 1.0109237432479858,grad_norm: 0.9999992324344514, iteration: 73174
loss: 1.0287879705429077,grad_norm: 0.9999993494280841, iteration: 73175
loss: 0.9855857491493225,grad_norm: 0.9742027765289137, iteration: 73176
loss: 1.0124562978744507,grad_norm: 0.9999991809849289, iteration: 73177
loss: 0.9919227361679077,grad_norm: 0.9999991878694203, iteration: 73178
loss: 1.0312061309814453,grad_norm: 0.9999988996529277, iteration: 73179
loss: 1.0137063264846802,grad_norm: 0.9999990816829445, iteration: 73180
loss: 0.9586340188980103,grad_norm: 0.905531033032241, iteration: 73181
loss: 1.027455449104309,grad_norm: 0.9999990683441977, iteration: 73182
loss: 1.0080543756484985,grad_norm: 0.9296233463112187, iteration: 73183
loss: 1.019208312034607,grad_norm: 0.9999991429802196, iteration: 73184
loss: 1.0430629253387451,grad_norm: 0.9999991530907103, iteration: 73185
loss: 0.9979794025421143,grad_norm: 0.9445541697551426, iteration: 73186
loss: 0.9880967736244202,grad_norm: 0.9492829778960539, iteration: 73187
loss: 1.0817981958389282,grad_norm: 0.999999030715164, iteration: 73188
loss: 1.0350528955459595,grad_norm: 0.9999992994172978, iteration: 73189
loss: 1.0347150564193726,grad_norm: 0.9999992161818212, iteration: 73190
loss: 1.0224435329437256,grad_norm: 0.9999992374368342, iteration: 73191
loss: 0.9940309524536133,grad_norm: 0.9999991800179722, iteration: 73192
loss: 0.9852553009986877,grad_norm: 0.9999991197757334, iteration: 73193
loss: 0.969569742679596,grad_norm: 0.9999991376307069, iteration: 73194
loss: 1.0254367589950562,grad_norm: 0.9999992138861782, iteration: 73195
loss: 1.00938081741333,grad_norm: 0.896188745183556, iteration: 73196
loss: 1.016266107559204,grad_norm: 0.9999991205149383, iteration: 73197
loss: 1.0041171312332153,grad_norm: 0.999999761059664, iteration: 73198
loss: 1.0330716371536255,grad_norm: 0.9999992408959295, iteration: 73199
loss: 0.9899608492851257,grad_norm: 0.9999992373325823, iteration: 73200
loss: 1.0058544874191284,grad_norm: 0.9999990129730725, iteration: 73201
loss: 1.0174765586853027,grad_norm: 0.9681602405590939, iteration: 73202
loss: 0.9834678769111633,grad_norm: 0.9067239809171846, iteration: 73203
loss: 0.9491547346115112,grad_norm: 0.9821669098216957, iteration: 73204
loss: 0.9872496128082275,grad_norm: 0.9999991646276284, iteration: 73205
loss: 1.0116256475448608,grad_norm: 0.9999989706146758, iteration: 73206
loss: 1.0035080909729004,grad_norm: 0.9107557297401072, iteration: 73207
loss: 1.0098503828048706,grad_norm: 0.9984703440658639, iteration: 73208
loss: 0.9759371280670166,grad_norm: 0.9710892610968318, iteration: 73209
loss: 0.9954327940940857,grad_norm: 0.9999991128589176, iteration: 73210
loss: 0.9781816005706787,grad_norm: 0.9999990338442347, iteration: 73211
loss: 1.0057458877563477,grad_norm: 0.9999990105551013, iteration: 73212
loss: 1.0277475118637085,grad_norm: 0.999999234351415, iteration: 73213
loss: 0.9941965341567993,grad_norm: 0.8625144864429536, iteration: 73214
loss: 0.997734546661377,grad_norm: 0.9999991721255541, iteration: 73215
loss: 1.004035234451294,grad_norm: 0.9999990979051357, iteration: 73216
loss: 1.0061907768249512,grad_norm: 0.9941266345614945, iteration: 73217
loss: 1.0122356414794922,grad_norm: 0.9999990169926772, iteration: 73218
loss: 1.0049506425857544,grad_norm: 0.9999991528752006, iteration: 73219
loss: 0.9910739064216614,grad_norm: 0.9851848673814991, iteration: 73220
loss: 0.9959186911582947,grad_norm: 0.8326129569676173, iteration: 73221
loss: 1.049538493156433,grad_norm: 0.9999997516607473, iteration: 73222
loss: 1.0166728496551514,grad_norm: 0.926732072227117, iteration: 73223
loss: 0.9864120483398438,grad_norm: 0.8403796973249851, iteration: 73224
loss: 1.0087320804595947,grad_norm: 0.9999990245342115, iteration: 73225
loss: 0.9875823855400085,grad_norm: 0.9566793770743736, iteration: 73226
loss: 1.004311203956604,grad_norm: 0.8879288819968314, iteration: 73227
loss: 1.0021069049835205,grad_norm: 0.999999242341542, iteration: 73228
loss: 1.0077829360961914,grad_norm: 0.9270525391929283, iteration: 73229
loss: 0.983364462852478,grad_norm: 0.9271325827854412, iteration: 73230
loss: 0.9606167078018188,grad_norm: 0.9999990840485294, iteration: 73231
loss: 0.9849706292152405,grad_norm: 0.9999993405429063, iteration: 73232
loss: 0.9882387518882751,grad_norm: 0.9031228356970652, iteration: 73233
loss: 1.0222721099853516,grad_norm: 0.9999991877565596, iteration: 73234
loss: 1.0086034536361694,grad_norm: 0.8663633933093892, iteration: 73235
loss: 1.0146126747131348,grad_norm: 0.9825793039142898, iteration: 73236
loss: 0.9524117112159729,grad_norm: 0.9546624327060894, iteration: 73237
loss: 1.023640751838684,grad_norm: 0.9999989931487369, iteration: 73238
loss: 1.0146982669830322,grad_norm: 0.932585582056744, iteration: 73239
loss: 1.0226612091064453,grad_norm: 0.9999992673727562, iteration: 73240
loss: 1.0036282539367676,grad_norm: 0.9999992229994225, iteration: 73241
loss: 0.9899627566337585,grad_norm: 0.9999991662434928, iteration: 73242
loss: 0.9665523767471313,grad_norm: 0.9999990921227611, iteration: 73243
loss: 0.9849615097045898,grad_norm: 0.9999992534135747, iteration: 73244
loss: 0.9871793985366821,grad_norm: 0.9999996451344599, iteration: 73245
loss: 1.0025153160095215,grad_norm: 0.9999991545613195, iteration: 73246
loss: 1.0276886224746704,grad_norm: 0.9491670658000265, iteration: 73247
loss: 1.0253092050552368,grad_norm: 0.8547670323635009, iteration: 73248
loss: 1.024857997894287,grad_norm: 0.9999990744331467, iteration: 73249
loss: 1.0015606880187988,grad_norm: 0.9999991309725819, iteration: 73250
loss: 0.9568058848381042,grad_norm: 0.9999989255173614, iteration: 73251
loss: 1.050278663635254,grad_norm: 0.999999128601077, iteration: 73252
loss: 0.955437421798706,grad_norm: 0.9999991971711912, iteration: 73253
loss: 1.0062525272369385,grad_norm: 0.9999995307996212, iteration: 73254
loss: 1.0252628326416016,grad_norm: 0.9999992049517635, iteration: 73255
loss: 1.0088828802108765,grad_norm: 0.9999991584192781, iteration: 73256
loss: 1.011818766593933,grad_norm: 0.9999990284003167, iteration: 73257
loss: 0.9788848757743835,grad_norm: 0.9999994333563736, iteration: 73258
loss: 1.0224058628082275,grad_norm: 0.9999997953758212, iteration: 73259
loss: 0.9721554517745972,grad_norm: 0.9999990931494029, iteration: 73260
loss: 0.9842342138290405,grad_norm: 0.9999998542405611, iteration: 73261
loss: 1.0140427350997925,grad_norm: 0.9999991387921835, iteration: 73262
loss: 1.0141818523406982,grad_norm: 0.9621911332110399, iteration: 73263
loss: 0.9984862804412842,grad_norm: 0.8656530101548919, iteration: 73264
loss: 1.0280356407165527,grad_norm: 0.9999998083027177, iteration: 73265
loss: 1.004758358001709,grad_norm: 0.9087514319108814, iteration: 73266
loss: 0.9867244958877563,grad_norm: 0.9999990793969337, iteration: 73267
loss: 0.9692262411117554,grad_norm: 0.9999991256964292, iteration: 73268
loss: 1.0201841592788696,grad_norm: 0.9999993058833361, iteration: 73269
loss: 1.0327277183532715,grad_norm: 0.9999991015556038, iteration: 73270
loss: 1.0282886028289795,grad_norm: 0.9999991387780001, iteration: 73271
loss: 0.9807388186454773,grad_norm: 0.8309626088828008, iteration: 73272
loss: 0.9633115530014038,grad_norm: 0.9913389493586663, iteration: 73273
loss: 1.0191493034362793,grad_norm: 0.9989504474187054, iteration: 73274
loss: 1.00841224193573,grad_norm: 0.9999992058126443, iteration: 73275
loss: 1.0085978507995605,grad_norm: 0.8997985034064692, iteration: 73276
loss: 1.0211540460586548,grad_norm: 0.9999990851673327, iteration: 73277
loss: 1.020437479019165,grad_norm: 0.9640880081916928, iteration: 73278
loss: 1.0250592231750488,grad_norm: 0.9999993561526725, iteration: 73279
loss: 1.0259443521499634,grad_norm: 0.9999994505639171, iteration: 73280
loss: 0.9777525067329407,grad_norm: 0.9419792885934336, iteration: 73281
loss: 1.026166319847107,grad_norm: 0.9999992279111324, iteration: 73282
loss: 0.9966000914573669,grad_norm: 0.9999991577092359, iteration: 73283
loss: 1.0001804828643799,grad_norm: 0.9999991318928019, iteration: 73284
loss: 0.9799014329910278,grad_norm: 0.9999992234509145, iteration: 73285
loss: 0.9745905995368958,grad_norm: 0.9999990706094607, iteration: 73286
loss: 1.0047913789749146,grad_norm: 0.9999992595513761, iteration: 73287
loss: 0.9881709218025208,grad_norm: 0.9999995540829274, iteration: 73288
loss: 1.022194743156433,grad_norm: 0.9736277013540098, iteration: 73289
loss: 0.9959861636161804,grad_norm: 0.9999989627320006, iteration: 73290
loss: 1.0001046657562256,grad_norm: 0.9999990904351249, iteration: 73291
loss: 1.0426785945892334,grad_norm: 0.9797792551333911, iteration: 73292
loss: 1.010611653327942,grad_norm: 0.9999991612712427, iteration: 73293
loss: 1.0672314167022705,grad_norm: 0.9999995048799412, iteration: 73294
loss: 1.0178983211517334,grad_norm: 0.9999991809373701, iteration: 73295
loss: 1.021066427230835,grad_norm: 0.9999993802157587, iteration: 73296
loss: 0.9602195620536804,grad_norm: 0.9999990927597925, iteration: 73297
loss: 1.0271750688552856,grad_norm: 0.8895168111381747, iteration: 73298
loss: 1.028476595878601,grad_norm: 0.9999990778763803, iteration: 73299
loss: 1.000488042831421,grad_norm: 0.957193420292884, iteration: 73300
loss: 0.9899522662162781,grad_norm: 0.9220420571165326, iteration: 73301
loss: 1.0207732915878296,grad_norm: 0.9999992575134081, iteration: 73302
loss: 1.0114349126815796,grad_norm: 0.999999027962382, iteration: 73303
loss: 1.0017708539962769,grad_norm: 0.9999991418245925, iteration: 73304
loss: 1.0087372064590454,grad_norm: 0.9999991158996063, iteration: 73305
loss: 0.9836136102676392,grad_norm: 0.9999991858410305, iteration: 73306
loss: 1.0013316869735718,grad_norm: 0.969538526782067, iteration: 73307
loss: 1.0207387208938599,grad_norm: 0.9999998299012666, iteration: 73308
loss: 1.0344316959381104,grad_norm: 0.999999087834072, iteration: 73309
loss: 0.9970124363899231,grad_norm: 0.930573604820154, iteration: 73310
loss: 0.9868389964103699,grad_norm: 0.8940085667405006, iteration: 73311
loss: 1.0190788507461548,grad_norm: 0.99999910006716, iteration: 73312
loss: 1.0308544635772705,grad_norm: 0.9999993311341112, iteration: 73313
loss: 0.9680601358413696,grad_norm: 0.9999991096890056, iteration: 73314
loss: 1.0189056396484375,grad_norm: 0.9999990555580048, iteration: 73315
loss: 0.9806526899337769,grad_norm: 0.9999990610744074, iteration: 73316
loss: 1.014103889465332,grad_norm: 0.8568995061850094, iteration: 73317
loss: 0.9864284992218018,grad_norm: 0.9999990578491982, iteration: 73318
loss: 0.9914818406105042,grad_norm: 0.9999992229564459, iteration: 73319
loss: 0.9917217493057251,grad_norm: 0.9745031537773896, iteration: 73320
loss: 1.0011115074157715,grad_norm: 0.9999991138054293, iteration: 73321
loss: 0.9962402582168579,grad_norm: 0.999999200645234, iteration: 73322
loss: 1.017095923423767,grad_norm: 0.8572506249574487, iteration: 73323
loss: 0.9929790496826172,grad_norm: 0.9046654863772586, iteration: 73324
loss: 1.0301482677459717,grad_norm: 0.9999992072515979, iteration: 73325
loss: 0.9950066208839417,grad_norm: 0.9312597737070654, iteration: 73326
loss: 1.0273069143295288,grad_norm: 0.9392083357106061, iteration: 73327
loss: 1.0115629434585571,grad_norm: 0.9616260461489904, iteration: 73328
loss: 1.0062350034713745,grad_norm: 0.9999990695658522, iteration: 73329
loss: 1.0784450769424438,grad_norm: 0.9999997382841921, iteration: 73330
loss: 1.0063217878341675,grad_norm: 0.9999991416075931, iteration: 73331
loss: 1.020405650138855,grad_norm: 0.9999990358150584, iteration: 73332
loss: 0.986010730266571,grad_norm: 0.9438254077717718, iteration: 73333
loss: 1.0614228248596191,grad_norm: 0.999999866621245, iteration: 73334
loss: 1.0001853704452515,grad_norm: 0.9999992076687554, iteration: 73335
loss: 1.028532862663269,grad_norm: 0.999999143508006, iteration: 73336
loss: 1.001546025276184,grad_norm: 0.8682716583835617, iteration: 73337
loss: 1.0410209894180298,grad_norm: 0.9999997299003532, iteration: 73338
loss: 0.9770317673683167,grad_norm: 0.9173399852575708, iteration: 73339
loss: 0.9913320541381836,grad_norm: 0.9999990565731389, iteration: 73340
loss: 0.9683184623718262,grad_norm: 0.99999894083944, iteration: 73341
loss: 0.9957650303840637,grad_norm: 0.9999990951039938, iteration: 73342
loss: 1.044183611869812,grad_norm: 0.9999991034116832, iteration: 73343
loss: 1.0505651235580444,grad_norm: 0.9999991720229274, iteration: 73344
loss: 0.9871772527694702,grad_norm: 0.9999992000650343, iteration: 73345
loss: 0.9892837405204773,grad_norm: 0.9999991528836217, iteration: 73346
loss: 1.0192351341247559,grad_norm: 0.9391948867534273, iteration: 73347
loss: 1.0089120864868164,grad_norm: 0.9999993273296505, iteration: 73348
loss: 0.9865323901176453,grad_norm: 0.9999999175604875, iteration: 73349
loss: 1.017769694328308,grad_norm: 0.9762739375643941, iteration: 73350
loss: 1.016758680343628,grad_norm: 0.9999990875751401, iteration: 73351
loss: 0.9662020802497864,grad_norm: 0.9372837911655438, iteration: 73352
loss: 1.0278502702713013,grad_norm: 0.8162703311232961, iteration: 73353
loss: 1.0725321769714355,grad_norm: 0.9999998810846807, iteration: 73354
loss: 0.9899278879165649,grad_norm: 0.9999990645124026, iteration: 73355
loss: 0.9947373270988464,grad_norm: 0.9999991327858643, iteration: 73356
loss: 0.9775131344795227,grad_norm: 0.9382950969321499, iteration: 73357
loss: 0.9823887944221497,grad_norm: 0.999999168852616, iteration: 73358
loss: 1.0330512523651123,grad_norm: 0.9999990874397379, iteration: 73359
loss: 1.006974220275879,grad_norm: 0.9999990509908857, iteration: 73360
loss: 1.012944221496582,grad_norm: 0.9999994944102089, iteration: 73361
loss: 0.9748044610023499,grad_norm: 0.9999991015822717, iteration: 73362
loss: 0.9725695848464966,grad_norm: 0.9957071814127001, iteration: 73363
loss: 1.0129703283309937,grad_norm: 0.9999990755697203, iteration: 73364
loss: 0.9879676699638367,grad_norm: 0.9999989730966952, iteration: 73365
loss: 0.9896330833435059,grad_norm: 0.9011440570436405, iteration: 73366
loss: 0.9677352905273438,grad_norm: 0.9999991548013739, iteration: 73367
loss: 0.9918212294578552,grad_norm: 0.912011863153978, iteration: 73368
loss: 0.9930184483528137,grad_norm: 0.9999990368859216, iteration: 73369
loss: 1.0129581689834595,grad_norm: 0.999999192734288, iteration: 73370
loss: 1.0000404119491577,grad_norm: 0.9999991880914849, iteration: 73371
loss: 1.0321643352508545,grad_norm: 0.9999991186216813, iteration: 73372
loss: 1.0155682563781738,grad_norm: 0.8610419849333995, iteration: 73373
loss: 0.963513970375061,grad_norm: 0.9943719941957125, iteration: 73374
loss: 0.98200923204422,grad_norm: 0.9999990143374258, iteration: 73375
loss: 0.9817041158676147,grad_norm: 0.9348967915837298, iteration: 73376
loss: 1.0159192085266113,grad_norm: 0.9999992274772108, iteration: 73377
loss: 1.0606125593185425,grad_norm: 0.9999992435057805, iteration: 73378
loss: 0.9754454493522644,grad_norm: 0.9650659753421268, iteration: 73379
loss: 1.0132725238800049,grad_norm: 0.9544500555299692, iteration: 73380
loss: 1.017241358757019,grad_norm: 0.9999992672305578, iteration: 73381
loss: 1.0162686109542847,grad_norm: 0.9999990877971827, iteration: 73382
loss: 1.000717043876648,grad_norm: 0.999999088060467, iteration: 73383
loss: 0.9856512546539307,grad_norm: 0.9999991045516519, iteration: 73384
loss: 1.0046768188476562,grad_norm: 0.9999997440753801, iteration: 73385
loss: 0.9854898452758789,grad_norm: 0.9039326966534332, iteration: 73386
loss: 0.9925525188446045,grad_norm: 0.9999990671622536, iteration: 73387
loss: 1.0122740268707275,grad_norm: 0.9999995628444517, iteration: 73388
loss: 1.0061935186386108,grad_norm: 0.9999991308579707, iteration: 73389
loss: 1.02968430519104,grad_norm: 0.8746266155572255, iteration: 73390
loss: 0.9896298050880432,grad_norm: 0.9999989992997149, iteration: 73391
loss: 1.023474097251892,grad_norm: 0.9999994358233679, iteration: 73392
loss: 1.0102967023849487,grad_norm: 0.9999992083580577, iteration: 73393
loss: 0.985109269618988,grad_norm: 0.9692287609452416, iteration: 73394
loss: 0.9572086334228516,grad_norm: 0.9952983983853724, iteration: 73395
loss: 1.0108500719070435,grad_norm: 0.9999990301710667, iteration: 73396
loss: 1.0016487836837769,grad_norm: 0.8897629374493843, iteration: 73397
loss: 0.9748640060424805,grad_norm: 0.9999994595451559, iteration: 73398
loss: 1.0046769380569458,grad_norm: 0.9999991144418277, iteration: 73399
loss: 0.9611358642578125,grad_norm: 0.9999989992807453, iteration: 73400
loss: 0.9963139891624451,grad_norm: 0.9853077206723165, iteration: 73401
loss: 1.0079354047775269,grad_norm: 0.9999990542752335, iteration: 73402
loss: 1.0400714874267578,grad_norm: 0.9594466720078327, iteration: 73403
loss: 0.9787185788154602,grad_norm: 0.9999992138913493, iteration: 73404
loss: 1.0241137742996216,grad_norm: 0.9999990091198828, iteration: 73405
loss: 0.9943456053733826,grad_norm: 0.9999990583916468, iteration: 73406
loss: 1.0001038312911987,grad_norm: 0.9944553480123137, iteration: 73407
loss: 0.9849761724472046,grad_norm: 0.9999990951894671, iteration: 73408
loss: 0.9771331548690796,grad_norm: 0.999999140203969, iteration: 73409
loss: 1.0305880308151245,grad_norm: 0.9511383458989928, iteration: 73410
loss: 0.9730625748634338,grad_norm: 0.9757229207668339, iteration: 73411
loss: 1.0012173652648926,grad_norm: 0.9999991544814913, iteration: 73412
loss: 1.001350998878479,grad_norm: 0.9999995661980597, iteration: 73413
loss: 0.9671801328659058,grad_norm: 0.999999142085254, iteration: 73414
loss: 1.0123040676116943,grad_norm: 0.9999991674509507, iteration: 73415
loss: 0.9910925030708313,grad_norm: 0.9999992097246557, iteration: 73416
loss: 1.030560851097107,grad_norm: 0.9999990749964573, iteration: 73417
loss: 0.9991872310638428,grad_norm: 0.9999994115500855, iteration: 73418
loss: 0.9578573107719421,grad_norm: 0.8761671268081717, iteration: 73419
loss: 0.9660714268684387,grad_norm: 0.889522629909899, iteration: 73420
loss: 1.0008838176727295,grad_norm: 0.897785926940092, iteration: 73421
loss: 1.0617762804031372,grad_norm: 0.9999992131996515, iteration: 73422
loss: 1.0049008131027222,grad_norm: 0.9999991797077716, iteration: 73423
loss: 0.9799240231513977,grad_norm: 0.8561445225924987, iteration: 73424
loss: 1.0151458978652954,grad_norm: 0.9674926944628077, iteration: 73425
loss: 1.0274033546447754,grad_norm: 0.9999991178878913, iteration: 73426
loss: 1.0387194156646729,grad_norm: 0.9999992216441108, iteration: 73427
loss: 1.0142709016799927,grad_norm: 0.9999990851409148, iteration: 73428
loss: 1.0036063194274902,grad_norm: 0.9999990081468179, iteration: 73429
loss: 0.9991493821144104,grad_norm: 0.9693399053214623, iteration: 73430
loss: 1.0129472017288208,grad_norm: 0.9999994002822332, iteration: 73431
loss: 1.0327891111373901,grad_norm: 0.989051762768927, iteration: 73432
loss: 0.9661316275596619,grad_norm: 0.9999991739343842, iteration: 73433
loss: 1.0167664289474487,grad_norm: 0.9547883452149318, iteration: 73434
loss: 0.9709949493408203,grad_norm: 0.9907828098121475, iteration: 73435
loss: 1.0283997058868408,grad_norm: 0.9999990147385059, iteration: 73436
loss: 1.0430614948272705,grad_norm: 0.9999991528163951, iteration: 73437
loss: 0.9818161129951477,grad_norm: 0.9999991625949145, iteration: 73438
loss: 1.015552282333374,grad_norm: 0.9736703395305396, iteration: 73439
loss: 0.9979336857795715,grad_norm: 0.9999991701451172, iteration: 73440
loss: 0.9877888560295105,grad_norm: 0.9999991049445021, iteration: 73441
loss: 1.0358902215957642,grad_norm: 0.9999991893999954, iteration: 73442
loss: 1.026875376701355,grad_norm: 0.938468273340039, iteration: 73443
loss: 0.9940131306648254,grad_norm: 0.9999991771115849, iteration: 73444
loss: 1.0069693326950073,grad_norm: 0.9999989634247916, iteration: 73445
loss: 1.0100343227386475,grad_norm: 0.9999991250423805, iteration: 73446
loss: 0.990977942943573,grad_norm: 0.9982270144305373, iteration: 73447
loss: 0.9891706705093384,grad_norm: 0.9949743663952001, iteration: 73448
loss: 0.9949111938476562,grad_norm: 0.9999991145615351, iteration: 73449
loss: 1.0243592262268066,grad_norm: 0.9999989739243066, iteration: 73450
loss: 0.9734697937965393,grad_norm: 0.9999991205920896, iteration: 73451
loss: 0.9871352314949036,grad_norm: 0.9999992320312212, iteration: 73452
loss: 0.987895667552948,grad_norm: 0.9999991879523525, iteration: 73453
loss: 1.0079057216644287,grad_norm: 0.9999989832383454, iteration: 73454
loss: 0.9924598932266235,grad_norm: 0.9999990782959712, iteration: 73455
loss: 1.0115855932235718,grad_norm: 0.9999989116809748, iteration: 73456
loss: 0.9973000884056091,grad_norm: 0.9931705457689839, iteration: 73457
loss: 0.9785917401313782,grad_norm: 0.9999990884237168, iteration: 73458
loss: 0.9969772100448608,grad_norm: 0.9999991021140319, iteration: 73459
loss: 0.9918487071990967,grad_norm: 0.9999990396835279, iteration: 73460
loss: 1.0204968452453613,grad_norm: 0.9999992168237071, iteration: 73461
loss: 1.0306799411773682,grad_norm: 0.9999991980919591, iteration: 73462
loss: 1.0014772415161133,grad_norm: 0.999999308424513, iteration: 73463
loss: 1.008963942527771,grad_norm: 0.9999992289633989, iteration: 73464
loss: 1.005534052848816,grad_norm: 0.999999271717272, iteration: 73465
loss: 0.9980049729347229,grad_norm: 0.9999992930732656, iteration: 73466
loss: 1.034726858139038,grad_norm: 0.9999991222382403, iteration: 73467
loss: 0.9721711277961731,grad_norm: 0.9999991483089633, iteration: 73468
loss: 1.0208479166030884,grad_norm: 0.9776440676357611, iteration: 73469
loss: 1.0062979459762573,grad_norm: 0.9999990103325828, iteration: 73470
loss: 0.9962960481643677,grad_norm: 0.9999991432887253, iteration: 73471
loss: 0.9975885152816772,grad_norm: 0.8579907570709951, iteration: 73472
loss: 1.042714238166809,grad_norm: 0.9999992959258105, iteration: 73473
loss: 1.0051888227462769,grad_norm: 0.9735366080522462, iteration: 73474
loss: 0.9966375231742859,grad_norm: 0.9340391041571768, iteration: 73475
loss: 1.016898274421692,grad_norm: 0.9999996320575606, iteration: 73476
loss: 1.0039103031158447,grad_norm: 0.9999992133605164, iteration: 73477
loss: 1.007465124130249,grad_norm: 0.990066331610983, iteration: 73478
loss: 0.9974548816680908,grad_norm: 0.9590246696380386, iteration: 73479
loss: 0.9972992539405823,grad_norm: 0.9999992777714682, iteration: 73480
loss: 1.0084210634231567,grad_norm: 0.9677511698330993, iteration: 73481
loss: 1.0074377059936523,grad_norm: 0.957461712238249, iteration: 73482
loss: 0.9711185097694397,grad_norm: 0.9244579327200236, iteration: 73483
loss: 0.975897490978241,grad_norm: 0.9982610371418726, iteration: 73484
loss: 1.0019358396530151,grad_norm: 0.9846309409347193, iteration: 73485
loss: 1.017918586730957,grad_norm: 0.9999989596003355, iteration: 73486
loss: 0.9863649010658264,grad_norm: 0.9999992343829248, iteration: 73487
loss: 0.9885138273239136,grad_norm: 0.9999991251650899, iteration: 73488
loss: 1.0288482904434204,grad_norm: 0.8593221625612976, iteration: 73489
loss: 1.0503134727478027,grad_norm: 0.9999992111119085, iteration: 73490
loss: 0.9850663542747498,grad_norm: 0.9999993613150651, iteration: 73491
loss: 0.9893419146537781,grad_norm: 0.9999991539491776, iteration: 73492
loss: 1.0075100660324097,grad_norm: 0.840514714674878, iteration: 73493
loss: 1.0184928178787231,grad_norm: 0.928724203623061, iteration: 73494
loss: 0.9702925682067871,grad_norm: 0.9999990202918038, iteration: 73495
loss: 0.9910227060317993,grad_norm: 0.9999991206688013, iteration: 73496
loss: 0.9968747496604919,grad_norm: 0.99999907636114, iteration: 73497
loss: 0.9822511672973633,grad_norm: 0.9999990146133038, iteration: 73498
loss: 1.021634578704834,grad_norm: 0.9999990739444438, iteration: 73499
loss: 0.9973383545875549,grad_norm: 0.9999992847998392, iteration: 73500
loss: 1.0009568929672241,grad_norm: 0.999999089939335, iteration: 73501
loss: 1.0099968910217285,grad_norm: 0.9999990485819393, iteration: 73502
loss: 0.9789419174194336,grad_norm: 0.9999990456098272, iteration: 73503
loss: 0.9666664600372314,grad_norm: 0.9119023546308525, iteration: 73504
loss: 1.009279489517212,grad_norm: 0.9999992237469387, iteration: 73505
loss: 0.9979371428489685,grad_norm: 0.9999991662852892, iteration: 73506
loss: 0.9724050164222717,grad_norm: 0.9999993161576165, iteration: 73507
loss: 0.9930446147918701,grad_norm: 0.9999992625739256, iteration: 73508
loss: 1.0028562545776367,grad_norm: 0.9999991146099662, iteration: 73509
loss: 1.0121341943740845,grad_norm: 0.9999992067240303, iteration: 73510
loss: 0.9754523634910583,grad_norm: 0.9999991618143647, iteration: 73511
loss: 1.0310426950454712,grad_norm: 0.899223008057086, iteration: 73512
loss: 0.9806861281394958,grad_norm: 0.9999992733409594, iteration: 73513
loss: 1.0134153366088867,grad_norm: 0.9999990826477927, iteration: 73514
loss: 0.9922663569450378,grad_norm: 0.9999992509045088, iteration: 73515
loss: 0.9634144902229309,grad_norm: 0.9674471914255703, iteration: 73516
loss: 1.0071290731430054,grad_norm: 0.9999989699950405, iteration: 73517
loss: 1.0084973573684692,grad_norm: 0.9999989830366628, iteration: 73518
loss: 1.0089435577392578,grad_norm: 0.9589009498490298, iteration: 73519
loss: 0.9840730428695679,grad_norm: 0.9999992827001115, iteration: 73520
loss: 1.0055686235427856,grad_norm: 0.9999991549554946, iteration: 73521
loss: 0.9843869209289551,grad_norm: 0.9999992633019801, iteration: 73522
loss: 1.0040953159332275,grad_norm: 0.9999991954140787, iteration: 73523
loss: 0.9790495038032532,grad_norm: 0.9999990218357815, iteration: 73524
loss: 0.9877298474311829,grad_norm: 0.9999990618364695, iteration: 73525
loss: 1.0107357501983643,grad_norm: 0.999999625777122, iteration: 73526
loss: 1.0313462018966675,grad_norm: 0.9999991968057449, iteration: 73527
loss: 1.0343700647354126,grad_norm: 0.9789865122863025, iteration: 73528
loss: 1.0134893655776978,grad_norm: 0.9999991583385969, iteration: 73529
loss: 1.0219759941101074,grad_norm: 0.9999990701678405, iteration: 73530
loss: 0.9922493100166321,grad_norm: 0.9676431143645502, iteration: 73531
loss: 0.9824292659759521,grad_norm: 0.9576649580256053, iteration: 73532
loss: 0.9693816900253296,grad_norm: 0.9999990994194303, iteration: 73533
loss: 0.9671589732170105,grad_norm: 0.9999989280757635, iteration: 73534
loss: 1.006255030632019,grad_norm: 0.9160514115596414, iteration: 73535
loss: 1.0234020948410034,grad_norm: 0.886621349689483, iteration: 73536
loss: 0.9597559571266174,grad_norm: 0.9802149387098027, iteration: 73537
loss: 1.0250377655029297,grad_norm: 0.9999991621158315, iteration: 73538
loss: 1.0280264616012573,grad_norm: 0.9999991184363408, iteration: 73539
loss: 0.9581195116043091,grad_norm: 0.9999991001088306, iteration: 73540
loss: 0.9906470775604248,grad_norm: 0.9581789795609282, iteration: 73541
loss: 0.9923388361930847,grad_norm: 0.9999992220846458, iteration: 73542
loss: 0.9830116629600525,grad_norm: 0.9999988839822553, iteration: 73543
loss: 1.0115604400634766,grad_norm: 0.9446706767231662, iteration: 73544
loss: 1.0256189107894897,grad_norm: 0.9999996480016552, iteration: 73545
loss: 1.0282213687896729,grad_norm: 0.9999991921299508, iteration: 73546
loss: 1.0339336395263672,grad_norm: 0.9999992445247725, iteration: 73547
loss: 0.9917113780975342,grad_norm: 0.999999235027743, iteration: 73548
loss: 1.025855302810669,grad_norm: 0.9999993412086795, iteration: 73549
loss: 1.0134860277175903,grad_norm: 0.9999989626346305, iteration: 73550
loss: 1.017679214477539,grad_norm: 0.885257367808677, iteration: 73551
loss: 1.019800066947937,grad_norm: 0.9999991749399127, iteration: 73552
loss: 0.9933867454528809,grad_norm: 0.7869688936182092, iteration: 73553
loss: 1.0064009428024292,grad_norm: 0.9884507505261477, iteration: 73554
loss: 1.012271523475647,grad_norm: 0.8512012809636299, iteration: 73555
loss: 0.9769410490989685,grad_norm: 0.9999991822716616, iteration: 73556
loss: 1.0105102062225342,grad_norm: 0.9999992510526929, iteration: 73557
loss: 1.002535104751587,grad_norm: 0.9114302228983882, iteration: 73558
loss: 1.0185860395431519,grad_norm: 0.9999993569356053, iteration: 73559
loss: 0.9643649458885193,grad_norm: 0.9999990611303262, iteration: 73560
loss: 1.009335994720459,grad_norm: 0.9999994334919174, iteration: 73561
loss: 1.0072627067565918,grad_norm: 0.9999990968158581, iteration: 73562
loss: 1.0040117502212524,grad_norm: 0.9999989647460958, iteration: 73563
loss: 0.9763819575309753,grad_norm: 0.9218884611595246, iteration: 73564
loss: 1.00080406665802,grad_norm: 0.9999991112083162, iteration: 73565
loss: 1.0047870874404907,grad_norm: 0.999999232976834, iteration: 73566
loss: 1.0255106687545776,grad_norm: 0.9999990042590131, iteration: 73567
loss: 0.9783661365509033,grad_norm: 0.913078113986548, iteration: 73568
loss: 1.0300999879837036,grad_norm: 0.9999998695876678, iteration: 73569
loss: 1.0025802850723267,grad_norm: 0.9999991295740915, iteration: 73570
loss: 1.0054842233657837,grad_norm: 0.9999992925133777, iteration: 73571
loss: 0.9857164621353149,grad_norm: 0.8482668195865347, iteration: 73572
loss: 1.0160396099090576,grad_norm: 0.9547497863861114, iteration: 73573
loss: 0.9795946478843689,grad_norm: 0.9999991173091435, iteration: 73574
loss: 1.0087987184524536,grad_norm: 0.9999990642649623, iteration: 73575
loss: 0.9623298048973083,grad_norm: 0.9999994173844918, iteration: 73576
loss: 0.9659761786460876,grad_norm: 0.9999991904393621, iteration: 73577
loss: 1.0014193058013916,grad_norm: 0.9999991405778121, iteration: 73578
loss: 1.000898838043213,grad_norm: 0.961220057344058, iteration: 73579
loss: 0.9862171411514282,grad_norm: 0.8626466682187686, iteration: 73580
loss: 1.009737253189087,grad_norm: 0.8621344986880018, iteration: 73581
loss: 1.0135438442230225,grad_norm: 0.9354352790300838, iteration: 73582
loss: 1.0391753911972046,grad_norm: 0.9999991323938603, iteration: 73583
loss: 0.9886069893836975,grad_norm: 0.9999990521868998, iteration: 73584
loss: 0.9871666431427002,grad_norm: 0.999999083651988, iteration: 73585
loss: 0.9996106028556824,grad_norm: 0.9999992679029986, iteration: 73586
loss: 0.9624276161193848,grad_norm: 0.9999992076060783, iteration: 73587
loss: 0.9804277420043945,grad_norm: 0.9999990690504872, iteration: 73588
loss: 0.988005518913269,grad_norm: 0.9999992902562137, iteration: 73589
loss: 0.9830119013786316,grad_norm: 0.9999991603004201, iteration: 73590
loss: 1.0134636163711548,grad_norm: 0.9999990985994356, iteration: 73591
loss: 0.997877836227417,grad_norm: 0.9999990378183505, iteration: 73592
loss: 0.9916555285453796,grad_norm: 0.9191893944577177, iteration: 73593
loss: 1.0325727462768555,grad_norm: 0.8922647060465291, iteration: 73594
loss: 1.0047403573989868,grad_norm: 0.9999991640913641, iteration: 73595
loss: 1.0189323425292969,grad_norm: 0.983511606534199, iteration: 73596
loss: 1.0042595863342285,grad_norm: 0.9999991400914346, iteration: 73597
loss: 0.9603654742240906,grad_norm: 0.9999990375314706, iteration: 73598
loss: 1.020156741142273,grad_norm: 0.99999937098259, iteration: 73599
loss: 1.0617446899414062,grad_norm: 0.9999999734550212, iteration: 73600
loss: 0.9744531512260437,grad_norm: 0.9657161199283649, iteration: 73601
loss: 0.987470805644989,grad_norm: 0.9080406540098945, iteration: 73602
loss: 0.9719445109367371,grad_norm: 0.9828132681983829, iteration: 73603
loss: 1.008309245109558,grad_norm: 0.913335224218036, iteration: 73604
loss: 1.014373779296875,grad_norm: 0.8547998904282975, iteration: 73605
loss: 0.9944666624069214,grad_norm: 0.999999006072051, iteration: 73606
loss: 0.9725887179374695,grad_norm: 0.9110847366413137, iteration: 73607
loss: 1.013439416885376,grad_norm: 0.9999991474714747, iteration: 73608
loss: 1.0002939701080322,grad_norm: 0.9999989811537286, iteration: 73609
loss: 1.0132529735565186,grad_norm: 0.9999990518949408, iteration: 73610
loss: 1.0216636657714844,grad_norm: 0.947240915135712, iteration: 73611
loss: 1.0187687873840332,grad_norm: 0.9999991411874244, iteration: 73612
loss: 0.9864935874938965,grad_norm: 0.9999991756225399, iteration: 73613
loss: 1.00514817237854,grad_norm: 0.9451604105700423, iteration: 73614
loss: 0.9982419610023499,grad_norm: 0.999999097032627, iteration: 73615
loss: 0.9904245138168335,grad_norm: 0.9999990382028976, iteration: 73616
loss: 1.004880428314209,grad_norm: 0.9999990154987154, iteration: 73617
loss: 1.0169645547866821,grad_norm: 0.9999991180457519, iteration: 73618
loss: 1.021727442741394,grad_norm: 0.9999992148311223, iteration: 73619
loss: 0.9953569769859314,grad_norm: 0.999999033724323, iteration: 73620
loss: 1.0245370864868164,grad_norm: 0.9752537110231444, iteration: 73621
loss: 0.9927904009819031,grad_norm: 0.9753922563665947, iteration: 73622
loss: 0.9638285636901855,grad_norm: 0.9999990885406232, iteration: 73623
loss: 1.0073705911636353,grad_norm: 0.99999934982214, iteration: 73624
loss: 0.9755085706710815,grad_norm: 0.8776173678004506, iteration: 73625
loss: 1.0126138925552368,grad_norm: 0.9637916169434287, iteration: 73626
loss: 1.000701904296875,grad_norm: 0.999999119388984, iteration: 73627
loss: 0.989070475101471,grad_norm: 0.9999989554782631, iteration: 73628
loss: 1.0369552373886108,grad_norm: 0.9999992082307593, iteration: 73629
loss: 0.9820464253425598,grad_norm: 0.9635722431835394, iteration: 73630
loss: 1.0069663524627686,grad_norm: 0.9999994404125918, iteration: 73631
loss: 1.0302469730377197,grad_norm: 0.9999992000462251, iteration: 73632
loss: 1.02082097530365,grad_norm: 0.9999992217059082, iteration: 73633
loss: 1.0033646821975708,grad_norm: 0.9397574508824904, iteration: 73634
loss: 1.0084446668624878,grad_norm: 0.9999991120155038, iteration: 73635
loss: 1.0008983612060547,grad_norm: 0.9999990213431176, iteration: 73636
loss: 0.9666523337364197,grad_norm: 0.8633882584927914, iteration: 73637
loss: 0.9730132222175598,grad_norm: 0.9999994223995682, iteration: 73638
loss: 0.991989016532898,grad_norm: 0.9999992337627565, iteration: 73639
loss: 1.0170814990997314,grad_norm: 0.999999204679233, iteration: 73640
loss: 1.0346218347549438,grad_norm: 0.9999992011293404, iteration: 73641
loss: 0.976309061050415,grad_norm: 0.9041691216702518, iteration: 73642
loss: 1.0019770860671997,grad_norm: 0.9022079108182317, iteration: 73643
loss: 0.9968070983886719,grad_norm: 0.999999307387514, iteration: 73644
loss: 0.9933939576148987,grad_norm: 0.9999990590577971, iteration: 73645
loss: 1.0178824663162231,grad_norm: 0.9999990440997456, iteration: 73646
loss: 0.9945734739303589,grad_norm: 0.8483703596054676, iteration: 73647
loss: 1.006483554840088,grad_norm: 0.9999989459179537, iteration: 73648
loss: 0.9913433790206909,grad_norm: 0.8768013925276068, iteration: 73649
loss: 1.0177983045578003,grad_norm: 0.7848254918750389, iteration: 73650
loss: 0.9868178963661194,grad_norm: 0.9999991039400735, iteration: 73651
loss: 0.9941625595092773,grad_norm: 0.9999993472109786, iteration: 73652
loss: 1.001462459564209,grad_norm: 0.999998988827258, iteration: 73653
loss: 0.9878612160682678,grad_norm: 0.9711866315594023, iteration: 73654
loss: 1.0003410577774048,grad_norm: 0.9999990478015411, iteration: 73655
loss: 1.010632038116455,grad_norm: 0.9999990692011932, iteration: 73656
loss: 1.0148080587387085,grad_norm: 0.9999992351599772, iteration: 73657
loss: 0.9911643266677856,grad_norm: 0.9999990915112734, iteration: 73658
loss: 1.0105667114257812,grad_norm: 0.9999989989459175, iteration: 73659
loss: 1.0502216815948486,grad_norm: 0.999999008856331, iteration: 73660
loss: 0.992364764213562,grad_norm: 0.9543269512587417, iteration: 73661
loss: 0.961946964263916,grad_norm: 0.9999992351190677, iteration: 73662
loss: 0.9803353548049927,grad_norm: 0.9999989930062425, iteration: 73663
loss: 1.0063549280166626,grad_norm: 0.801793748608983, iteration: 73664
loss: 0.9680954813957214,grad_norm: 0.9999992763323791, iteration: 73665
loss: 1.0239793062210083,grad_norm: 0.9999990765527811, iteration: 73666
loss: 0.964470386505127,grad_norm: 0.9999991182388025, iteration: 73667
loss: 0.9815512895584106,grad_norm: 0.9999990593441582, iteration: 73668
loss: 1.0237557888031006,grad_norm: 0.9999995224198519, iteration: 73669
loss: 0.9545318484306335,grad_norm: 0.9999991280211039, iteration: 73670
loss: 1.0037742853164673,grad_norm: 0.9910483755303803, iteration: 73671
loss: 1.0240477323532104,grad_norm: 0.8753863553184538, iteration: 73672
loss: 0.9849618673324585,grad_norm: 0.9999992056787077, iteration: 73673
loss: 0.9830620288848877,grad_norm: 0.9606740774778929, iteration: 73674
loss: 1.0337098836898804,grad_norm: 0.9999990887774479, iteration: 73675
loss: 1.006446123123169,grad_norm: 0.9999990115539917, iteration: 73676
loss: 1.0109286308288574,grad_norm: 0.9999992504961703, iteration: 73677
loss: 0.9913720488548279,grad_norm: 0.9999991302373977, iteration: 73678
loss: 0.9887492656707764,grad_norm: 0.9999991523837791, iteration: 73679
loss: 0.9891043901443481,grad_norm: 0.920019860459888, iteration: 73680
loss: 1.035760760307312,grad_norm: 0.9999990989860269, iteration: 73681
loss: 1.0028245449066162,grad_norm: 0.9865967453018466, iteration: 73682
loss: 0.9971091747283936,grad_norm: 0.9999990775689943, iteration: 73683
loss: 0.9748720526695251,grad_norm: 0.9502006123718346, iteration: 73684
loss: 0.9822807908058167,grad_norm: 0.999998988036842, iteration: 73685
loss: 1.0365757942199707,grad_norm: 0.999999306932776, iteration: 73686
loss: 1.0074008703231812,grad_norm: 0.9999991430096837, iteration: 73687
loss: 1.0328646898269653,grad_norm: 0.9939465576562194, iteration: 73688
loss: 1.0163230895996094,grad_norm: 0.903150166972237, iteration: 73689
loss: 0.9881136417388916,grad_norm: 0.9423152876000377, iteration: 73690
loss: 1.0212737321853638,grad_norm: 0.9999990368839019, iteration: 73691
loss: 1.0703507661819458,grad_norm: 0.9999996805724738, iteration: 73692
loss: 0.9999045133590698,grad_norm: 0.9999991832343994, iteration: 73693
loss: 0.9764052629470825,grad_norm: 0.9999991479765867, iteration: 73694
loss: 0.962913453578949,grad_norm: 0.9455714099085141, iteration: 73695
loss: 0.9900672435760498,grad_norm: 0.9508739623577417, iteration: 73696
loss: 1.0469260215759277,grad_norm: 0.9999996328006056, iteration: 73697
loss: 0.9877898097038269,grad_norm: 0.9999991958096554, iteration: 73698
loss: 1.0449395179748535,grad_norm: 0.9999996445315642, iteration: 73699
loss: 0.9816213846206665,grad_norm: 0.9920590804535352, iteration: 73700
loss: 0.9984149932861328,grad_norm: 0.999999022737056, iteration: 73701
loss: 1.0061837434768677,grad_norm: 0.9999989331759328, iteration: 73702
loss: 1.0056145191192627,grad_norm: 0.9999991716496005, iteration: 73703
loss: 1.0282087326049805,grad_norm: 0.9212584336648012, iteration: 73704
loss: 1.0205191373825073,grad_norm: 0.9078710961783202, iteration: 73705
loss: 1.049397587776184,grad_norm: 0.999999085837207, iteration: 73706
loss: 0.9999327659606934,grad_norm: 0.9036452429461529, iteration: 73707
loss: 0.9628012776374817,grad_norm: 0.9480510613228094, iteration: 73708
loss: 1.1333996057510376,grad_norm: 0.9999999795680452, iteration: 73709
loss: 0.9638659358024597,grad_norm: 0.9746160568256187, iteration: 73710
loss: 0.9593647122383118,grad_norm: 0.9999992186882637, iteration: 73711
loss: 1.0323748588562012,grad_norm: 0.9999991157024152, iteration: 73712
loss: 1.0173982381820679,grad_norm: 0.8060500589378459, iteration: 73713
loss: 1.0257108211517334,grad_norm: 0.9999991514075831, iteration: 73714
loss: 1.02564537525177,grad_norm: 0.9999991098044716, iteration: 73715
loss: 0.9739270210266113,grad_norm: 0.9999990911896324, iteration: 73716
loss: 1.038537621498108,grad_norm: 0.9999990939304649, iteration: 73717
loss: 1.0284212827682495,grad_norm: 0.9999998039284735, iteration: 73718
loss: 0.9710251092910767,grad_norm: 0.9999994939788849, iteration: 73719
loss: 1.010857343673706,grad_norm: 0.938924919369028, iteration: 73720
loss: 0.9651772379875183,grad_norm: 0.9999990337208167, iteration: 73721
loss: 1.0198380947113037,grad_norm: 0.9999992752129019, iteration: 73722
loss: 1.051710844039917,grad_norm: 0.9999991728665352, iteration: 73723
loss: 0.9869666695594788,grad_norm: 0.9999991033127048, iteration: 73724
loss: 1.0467249155044556,grad_norm: 0.9999995468736081, iteration: 73725
loss: 1.0181694030761719,grad_norm: 0.9999990683861993, iteration: 73726
loss: 1.009408712387085,grad_norm: 0.9490928255531809, iteration: 73727
loss: 0.9944010972976685,grad_norm: 0.8746766440961975, iteration: 73728
loss: 1.0182026624679565,grad_norm: 0.999999505102361, iteration: 73729
loss: 0.9955660700798035,grad_norm: 0.9999989365792951, iteration: 73730
loss: 0.9640163779258728,grad_norm: 0.9999992846293981, iteration: 73731
loss: 1.067859411239624,grad_norm: 0.9999993724505417, iteration: 73732
loss: 0.9890238642692566,grad_norm: 0.9999990994893851, iteration: 73733
loss: 1.0120923519134521,grad_norm: 0.9999991875077207, iteration: 73734
loss: 0.9812661409378052,grad_norm: 0.999999242982214, iteration: 73735
loss: 0.947834312915802,grad_norm: 0.9999991360922222, iteration: 73736
loss: 1.0185952186584473,grad_norm: 0.963623130114106, iteration: 73737
loss: 1.0028492212295532,grad_norm: 0.9999993295649193, iteration: 73738
loss: 1.056589961051941,grad_norm: 0.999999919597838, iteration: 73739
loss: 0.9685518145561218,grad_norm: 0.999999143404504, iteration: 73740
loss: 1.0005297660827637,grad_norm: 0.9749703409778171, iteration: 73741
loss: 0.9989442825317383,grad_norm: 0.9999991037121346, iteration: 73742
loss: 1.0140163898468018,grad_norm: 0.9976311189860516, iteration: 73743
loss: 1.0361814498901367,grad_norm: 0.9255906017423651, iteration: 73744
loss: 0.9763532876968384,grad_norm: 0.9999991506538698, iteration: 73745
loss: 1.0026460886001587,grad_norm: 0.9999990820821252, iteration: 73746
loss: 0.9875423908233643,grad_norm: 0.9999992004810413, iteration: 73747
loss: 1.0428695678710938,grad_norm: 0.9999995154733551, iteration: 73748
loss: 0.9599717259407043,grad_norm: 0.9999989721977116, iteration: 73749
loss: 0.9808915257453918,grad_norm: 0.9055188364874622, iteration: 73750
loss: 1.002238154411316,grad_norm: 0.9232200051441506, iteration: 73751
loss: 0.9785637855529785,grad_norm: 0.999999285465832, iteration: 73752
loss: 0.9589934349060059,grad_norm: 0.8673475954739824, iteration: 73753
loss: 0.98688143491745,grad_norm: 0.9999992217714482, iteration: 73754
loss: 1.0221827030181885,grad_norm: 0.9999992189009554, iteration: 73755
loss: 0.9727592468261719,grad_norm: 0.9999992114279019, iteration: 73756
loss: 0.9836198687553406,grad_norm: 0.9999991339284471, iteration: 73757
loss: 0.9626721739768982,grad_norm: 0.9999991946328373, iteration: 73758
loss: 1.0142377614974976,grad_norm: 0.9636085990199549, iteration: 73759
loss: 1.0172466039657593,grad_norm: 0.9768342017604968, iteration: 73760
loss: 0.9970515370368958,grad_norm: 0.9999990456194124, iteration: 73761
loss: 0.9581794738769531,grad_norm: 0.9999992804695572, iteration: 73762
loss: 1.004223108291626,grad_norm: 0.8546230469543592, iteration: 73763
loss: 0.9944785833358765,grad_norm: 0.935824306840413, iteration: 73764
loss: 0.986357569694519,grad_norm: 0.9889009190936396, iteration: 73765
loss: 1.0611350536346436,grad_norm: 0.9999990931860318, iteration: 73766
loss: 1.1182153224945068,grad_norm: 0.9999994821452558, iteration: 73767
loss: 0.9942508935928345,grad_norm: 0.9907003980369099, iteration: 73768
loss: 0.9921340346336365,grad_norm: 0.9999992563321126, iteration: 73769
loss: 0.985656201839447,grad_norm: 0.9999990439618278, iteration: 73770
loss: 1.0402445793151855,grad_norm: 0.9999992561928795, iteration: 73771
loss: 1.0316953659057617,grad_norm: 0.9940794208742624, iteration: 73772
loss: 1.057965874671936,grad_norm: 0.9982259105960316, iteration: 73773
loss: 1.0204083919525146,grad_norm: 0.9999991722147861, iteration: 73774
loss: 1.0185551643371582,grad_norm: 0.9306911491339943, iteration: 73775
loss: 0.972490668296814,grad_norm: 0.9944363094413698, iteration: 73776
loss: 1.0553523302078247,grad_norm: 0.9236837731076233, iteration: 73777
loss: 1.0049539804458618,grad_norm: 0.9999991232736462, iteration: 73778
loss: 0.9710902571678162,grad_norm: 0.9999991750900467, iteration: 73779
loss: 0.997418999671936,grad_norm: 0.999999168788689, iteration: 73780
loss: 0.9944587349891663,grad_norm: 0.9999991357659253, iteration: 73781
loss: 1.0050733089447021,grad_norm: 0.9999992526697628, iteration: 73782
loss: 0.9894196391105652,grad_norm: 0.9570057687343231, iteration: 73783
loss: 0.9711400866508484,grad_norm: 0.9153100052229929, iteration: 73784
loss: 1.0413532257080078,grad_norm: 0.9999993140887382, iteration: 73785
loss: 1.0477691888809204,grad_norm: 0.9999992262008055, iteration: 73786
loss: 1.0224347114562988,grad_norm: 0.902520757529915, iteration: 73787
loss: 1.0197656154632568,grad_norm: 0.9999990436789321, iteration: 73788
loss: 1.0102242231369019,grad_norm: 0.9134885864654485, iteration: 73789
loss: 0.9744734168052673,grad_norm: 0.8762727782052104, iteration: 73790
loss: 1.012118935585022,grad_norm: 0.9999999610949585, iteration: 73791
loss: 0.9991129636764526,grad_norm: 0.869594506247261, iteration: 73792
loss: 1.0101268291473389,grad_norm: 0.9028463913906479, iteration: 73793
loss: 1.0050472021102905,grad_norm: 0.9032963188221466, iteration: 73794
loss: 1.0233817100524902,grad_norm: 0.7984655441385777, iteration: 73795
loss: 0.9932799935340881,grad_norm: 0.9338355259553954, iteration: 73796
loss: 1.0220614671707153,grad_norm: 0.9999991562587291, iteration: 73797
loss: 1.029552698135376,grad_norm: 0.9999991990355455, iteration: 73798
loss: 1.000402808189392,grad_norm: 0.9539622710920462, iteration: 73799
loss: 1.0295125246047974,grad_norm: 0.9914484769970954, iteration: 73800
loss: 0.9417880773544312,grad_norm: 0.9999992387632793, iteration: 73801
loss: 0.9960764646530151,grad_norm: 0.9999990050627698, iteration: 73802
loss: 1.0298676490783691,grad_norm: 0.9999992163614586, iteration: 73803
loss: 1.0206505060195923,grad_norm: 0.9999989986878861, iteration: 73804
loss: 0.9909699559211731,grad_norm: 0.906717832291926, iteration: 73805
loss: 0.9769216179847717,grad_norm: 0.9999990558575711, iteration: 73806
loss: 1.0100047588348389,grad_norm: 0.9999991580873604, iteration: 73807
loss: 0.9683941602706909,grad_norm: 0.9999992066812267, iteration: 73808
loss: 1.1502203941345215,grad_norm: 0.999999791761461, iteration: 73809
loss: 1.0580202341079712,grad_norm: 0.9999999399826205, iteration: 73810
loss: 1.0309985876083374,grad_norm: 0.9999990341274574, iteration: 73811
loss: 0.9968583583831787,grad_norm: 0.9059128037875154, iteration: 73812
loss: 1.0044320821762085,grad_norm: 0.9118853211924453, iteration: 73813
loss: 0.9583250284194946,grad_norm: 0.9999991081861692, iteration: 73814
loss: 0.9913736581802368,grad_norm: 0.9941636444137844, iteration: 73815
loss: 0.9962314963340759,grad_norm: 0.9999992219493736, iteration: 73816
loss: 1.0009026527404785,grad_norm: 0.9228038269936883, iteration: 73817
loss: 0.9843440651893616,grad_norm: 0.999999124144904, iteration: 73818
loss: 1.0009649991989136,grad_norm: 0.9999991761908951, iteration: 73819
loss: 0.9904079437255859,grad_norm: 0.9999991571698925, iteration: 73820
loss: 1.0110433101654053,grad_norm: 0.9999991678269345, iteration: 73821
loss: 0.9779903888702393,grad_norm: 0.951087770423757, iteration: 73822
loss: 1.0058249235153198,grad_norm: 0.9999992061286118, iteration: 73823
loss: 0.9752803444862366,grad_norm: 0.8195190317149497, iteration: 73824
loss: 1.0105388164520264,grad_norm: 0.9999991882253194, iteration: 73825
loss: 0.9822354912757874,grad_norm: 0.9999992056117273, iteration: 73826
loss: 1.072723627090454,grad_norm: 0.9999994120746684, iteration: 73827
loss: 0.9969395995140076,grad_norm: 0.9999992308825695, iteration: 73828
loss: 0.9881005883216858,grad_norm: 0.9220181318284599, iteration: 73829
loss: 1.020622968673706,grad_norm: 0.9999990352244966, iteration: 73830
loss: 0.9828280210494995,grad_norm: 0.9693137943273198, iteration: 73831
loss: 1.0138283967971802,grad_norm: 0.9999992148024078, iteration: 73832
loss: 0.9840223789215088,grad_norm: 0.9714046874726042, iteration: 73833
loss: 0.9935551881790161,grad_norm: 0.999999421562743, iteration: 73834
loss: 1.018725037574768,grad_norm: 0.9999990808518293, iteration: 73835
loss: 0.9771975874900818,grad_norm: 0.9999991481102327, iteration: 73836
loss: 1.0428674221038818,grad_norm: 0.9999989200869335, iteration: 73837
loss: 1.0127403736114502,grad_norm: 0.9931956431888169, iteration: 73838
loss: 0.9730989336967468,grad_norm: 0.999999100625624, iteration: 73839
loss: 1.0329509973526,grad_norm: 0.9999991415237203, iteration: 73840
loss: 0.9893491864204407,grad_norm: 0.9999991790246323, iteration: 73841
loss: 1.0016911029815674,grad_norm: 0.9487005278023685, iteration: 73842
loss: 0.9809269309043884,grad_norm: 0.9999991457917636, iteration: 73843
loss: 0.9848193526268005,grad_norm: 0.9436103044865461, iteration: 73844
loss: 1.105728030204773,grad_norm: 0.9999992547599823, iteration: 73845
loss: 1.0098042488098145,grad_norm: 0.9999991742566179, iteration: 73846
loss: 0.969389796257019,grad_norm: 0.9393441653659881, iteration: 73847
loss: 0.9841102957725525,grad_norm: 0.9999989171389969, iteration: 73848
loss: 0.9193588495254517,grad_norm: 0.999999122650541, iteration: 73849
loss: 0.9905845522880554,grad_norm: 0.9247915474870221, iteration: 73850
loss: 0.9881463646888733,grad_norm: 0.9999991891862785, iteration: 73851
loss: 1.0229076147079468,grad_norm: 0.9999991102387616, iteration: 73852
loss: 1.06110417842865,grad_norm: 0.9999997942199151, iteration: 73853
loss: 0.9770975112915039,grad_norm: 0.9999991474314621, iteration: 73854
loss: 1.0012664794921875,grad_norm: 0.9638068631548986, iteration: 73855
loss: 0.9964193105697632,grad_norm: 0.9999990625587036, iteration: 73856
loss: 1.032116174697876,grad_norm: 0.9089195892435666, iteration: 73857
loss: 1.0195273160934448,grad_norm: 0.9999990966484643, iteration: 73858
loss: 1.0025254487991333,grad_norm: 0.9885768468944544, iteration: 73859
loss: 1.0214365720748901,grad_norm: 0.9999992453086165, iteration: 73860
loss: 0.9937939047813416,grad_norm: 0.9999998207576427, iteration: 73861
loss: 0.9599945545196533,grad_norm: 0.9999990277673908, iteration: 73862
loss: 1.0180189609527588,grad_norm: 0.9999990147661204, iteration: 73863
loss: 1.032148003578186,grad_norm: 0.9314417092812574, iteration: 73864
loss: 1.0211466550827026,grad_norm: 0.999999299906231, iteration: 73865
loss: 0.9698717594146729,grad_norm: 0.8825838261135438, iteration: 73866
loss: 0.9768419861793518,grad_norm: 0.938702241387916, iteration: 73867
loss: 0.9950008988380432,grad_norm: 0.9999991597373202, iteration: 73868
loss: 1.0765178203582764,grad_norm: 0.9999996063981179, iteration: 73869
loss: 1.0198439359664917,grad_norm: 0.9999991174249121, iteration: 73870
loss: 1.005449891090393,grad_norm: 0.9999992744884479, iteration: 73871
loss: 1.0115091800689697,grad_norm: 0.8418812217324102, iteration: 73872
loss: 1.0311989784240723,grad_norm: 0.9669004987922465, iteration: 73873
loss: 0.9909898638725281,grad_norm: 0.970497613949402, iteration: 73874
loss: 0.977627694606781,grad_norm: 0.9478195935219883, iteration: 73875
loss: 1.0059001445770264,grad_norm: 0.9353511626609342, iteration: 73876
loss: 0.9969926476478577,grad_norm: 0.9999990772035292, iteration: 73877
loss: 0.9845020174980164,grad_norm: 0.9688581270074855, iteration: 73878
loss: 1.0139317512512207,grad_norm: 0.9036743485362264, iteration: 73879
loss: 1.016586422920227,grad_norm: 0.9999991401744798, iteration: 73880
loss: 1.0193489789962769,grad_norm: 0.9999991535965568, iteration: 73881
loss: 0.9755098223686218,grad_norm: 0.999999142054817, iteration: 73882
loss: 1.0140451192855835,grad_norm: 0.9999993282802944, iteration: 73883
loss: 0.9734404683113098,grad_norm: 0.999999218303729, iteration: 73884
loss: 0.9883984327316284,grad_norm: 0.9999991253910006, iteration: 73885
loss: 1.0116791725158691,grad_norm: 0.9999991989876701, iteration: 73886
loss: 1.0033607482910156,grad_norm: 0.9644934326666366, iteration: 73887
loss: 1.0007717609405518,grad_norm: 0.9999990490555002, iteration: 73888
loss: 1.0123186111450195,grad_norm: 0.9999994386578147, iteration: 73889
loss: 1.0020657777786255,grad_norm: 0.9999990509374601, iteration: 73890
loss: 1.0374116897583008,grad_norm: 0.9999992068948471, iteration: 73891
loss: 1.026922345161438,grad_norm: 0.9999995125231057, iteration: 73892
loss: 0.9717767834663391,grad_norm: 0.8234771823976447, iteration: 73893
loss: 1.012860655784607,grad_norm: 0.952636030025784, iteration: 73894
loss: 1.0060741901397705,grad_norm: 0.9999990414358527, iteration: 73895
loss: 1.0415613651275635,grad_norm: 0.9999995814741051, iteration: 73896
loss: 0.9844306707382202,grad_norm: 0.9999992676430883, iteration: 73897
loss: 0.9588717222213745,grad_norm: 0.9195078845336234, iteration: 73898
loss: 0.9971829652786255,grad_norm: 0.9999990264157504, iteration: 73899
loss: 1.00146484375,grad_norm: 0.9999989101513941, iteration: 73900
loss: 0.9708120822906494,grad_norm: 0.9999989832184374, iteration: 73901
loss: 1.0228325128555298,grad_norm: 0.9127874972341706, iteration: 73902
loss: 1.0123895406723022,grad_norm: 0.9999991779656376, iteration: 73903
loss: 0.9887244701385498,grad_norm: 0.9999990065635334, iteration: 73904
loss: 0.9951805472373962,grad_norm: 0.9999992890539298, iteration: 73905
loss: 0.9874278903007507,grad_norm: 0.9999989164764206, iteration: 73906
loss: 1.002479076385498,grad_norm: 0.8532816986436907, iteration: 73907
loss: 0.981140673160553,grad_norm: 0.9680052124752979, iteration: 73908
loss: 1.0204609632492065,grad_norm: 0.9999991385972471, iteration: 73909
loss: 1.0499109029769897,grad_norm: 0.9790351326986184, iteration: 73910
loss: 1.007224202156067,grad_norm: 0.9747488731861561, iteration: 73911
loss: 0.9973813891410828,grad_norm: 0.8863630953970026, iteration: 73912
loss: 1.0045160055160522,grad_norm: 0.8463953255931695, iteration: 73913
loss: 1.0196048021316528,grad_norm: 0.9999991698401317, iteration: 73914
loss: 0.9940853118896484,grad_norm: 0.9803889495498609, iteration: 73915
loss: 1.013899803161621,grad_norm: 0.9999992445561872, iteration: 73916
loss: 1.0974798202514648,grad_norm: 0.9999994226233647, iteration: 73917
loss: 1.0101187229156494,grad_norm: 0.9772153057061942, iteration: 73918
loss: 0.9850084781646729,grad_norm: 0.9844294880434721, iteration: 73919
loss: 1.0070346593856812,grad_norm: 0.9999991463926509, iteration: 73920
loss: 1.0155484676361084,grad_norm: 0.9772090316902712, iteration: 73921
loss: 0.9645192623138428,grad_norm: 0.9999992845857496, iteration: 73922
loss: 1.0054749250411987,grad_norm: 0.9836322907463625, iteration: 73923
loss: 0.989770770072937,grad_norm: 0.9999990731156054, iteration: 73924
loss: 1.0415239334106445,grad_norm: 0.9999994416835939, iteration: 73925
loss: 1.0080286264419556,grad_norm: 0.9999990784996449, iteration: 73926
loss: 1.0357927083969116,grad_norm: 0.9999991935334877, iteration: 73927
loss: 1.0231479406356812,grad_norm: 0.9999990439249414, iteration: 73928
loss: 1.0019607543945312,grad_norm: 0.9999990854214992, iteration: 73929
loss: 1.0072569847106934,grad_norm: 0.9999991093157653, iteration: 73930
loss: 0.9953571557998657,grad_norm: 0.9999990874543667, iteration: 73931
loss: 0.9828776121139526,grad_norm: 0.9999990300726407, iteration: 73932
loss: 1.0306557416915894,grad_norm: 0.9999995151080632, iteration: 73933
loss: 1.0176922082901,grad_norm: 0.9999991047424476, iteration: 73934
loss: 0.974955677986145,grad_norm: 0.999999265101212, iteration: 73935
loss: 1.0141321420669556,grad_norm: 0.999999098302304, iteration: 73936
loss: 1.0199244022369385,grad_norm: 0.9999992539010644, iteration: 73937
loss: 1.0699208974838257,grad_norm: 0.9999994647305944, iteration: 73938
loss: 0.9934046268463135,grad_norm: 0.9999990916622918, iteration: 73939
loss: 0.9413465261459351,grad_norm: 0.9999990390082595, iteration: 73940
loss: 0.9710763692855835,grad_norm: 0.9999993195501249, iteration: 73941
loss: 1.0097962617874146,grad_norm: 0.9999991499689895, iteration: 73942
loss: 1.0423173904418945,grad_norm: 0.9999991893234649, iteration: 73943
loss: 1.002419114112854,grad_norm: 0.9999990597571825, iteration: 73944
loss: 1.0053534507751465,grad_norm: 0.8706948354053919, iteration: 73945
loss: 1.0111140012741089,grad_norm: 0.9096500215567421, iteration: 73946
loss: 1.0011775493621826,grad_norm: 0.9811279428009574, iteration: 73947
loss: 0.9741522669792175,grad_norm: 0.9794229705180924, iteration: 73948
loss: 0.9767844080924988,grad_norm: 0.9999993166846689, iteration: 73949
loss: 0.984334409236908,grad_norm: 0.9999990929733565, iteration: 73950
loss: 1.0071156024932861,grad_norm: 0.9999990599207329, iteration: 73951
loss: 1.0798674821853638,grad_norm: 0.9999992384378924, iteration: 73952
loss: 0.9548874497413635,grad_norm: 0.9999991422349074, iteration: 73953
loss: 1.0368132591247559,grad_norm: 0.999999368941186, iteration: 73954
loss: 0.9736394882202148,grad_norm: 0.9999989222813999, iteration: 73955
loss: 1.0184416770935059,grad_norm: 0.9598868401479876, iteration: 73956
loss: 1.0091793537139893,grad_norm: 0.9794895122047641, iteration: 73957
loss: 0.9757221937179565,grad_norm: 0.9999991950926473, iteration: 73958
loss: 1.0105607509613037,grad_norm: 0.9999996090128838, iteration: 73959
loss: 1.0044026374816895,grad_norm: 0.9999990767492575, iteration: 73960
loss: 0.9745509624481201,grad_norm: 0.9999989412070266, iteration: 73961
loss: 0.9986220598220825,grad_norm: 0.9999991622463715, iteration: 73962
loss: 1.0310944318771362,grad_norm: 0.9975074063947114, iteration: 73963
loss: 1.0866215229034424,grad_norm: 0.99999984282644, iteration: 73964
loss: 0.9734470844268799,grad_norm: 0.99999913617448, iteration: 73965
loss: 0.974970281124115,grad_norm: 0.9999989394159077, iteration: 73966
loss: 1.0102207660675049,grad_norm: 0.9251589191369678, iteration: 73967
loss: 1.0164527893066406,grad_norm: 0.9999992498260633, iteration: 73968
loss: 0.9988922476768494,grad_norm: 0.9999990659237293, iteration: 73969
loss: 1.0186289548873901,grad_norm: 0.9999990638188593, iteration: 73970
loss: 0.9737533330917358,grad_norm: 0.9999993709353416, iteration: 73971
loss: 0.9869996905326843,grad_norm: 0.9999991877468546, iteration: 73972
loss: 0.9940471053123474,grad_norm: 0.9534234765378757, iteration: 73973
loss: 1.0134189128875732,grad_norm: 0.9999990639472051, iteration: 73974
loss: 0.9722153544425964,grad_norm: 0.9999989794573312, iteration: 73975
loss: 1.0183945894241333,grad_norm: 0.9999991311277108, iteration: 73976
loss: 1.0286011695861816,grad_norm: 0.9999991009619618, iteration: 73977
loss: 1.0051301717758179,grad_norm: 0.9818162154109574, iteration: 73978
loss: 0.9956019520759583,grad_norm: 0.9644888222720872, iteration: 73979
loss: 0.9602937698364258,grad_norm: 0.8789149463776939, iteration: 73980
loss: 1.0419161319732666,grad_norm: 0.9999991752259558, iteration: 73981
loss: 1.0203224420547485,grad_norm: 0.9999993934130359, iteration: 73982
loss: 1.0631041526794434,grad_norm: 0.9724224773935995, iteration: 73983
loss: 0.9800402522087097,grad_norm: 0.9999993008532923, iteration: 73984
loss: 0.9986494183540344,grad_norm: 0.8988027685474355, iteration: 73985
loss: 1.0297569036483765,grad_norm: 0.999999038816057, iteration: 73986
loss: 1.0086696147918701,grad_norm: 0.9619871529420918, iteration: 73987
loss: 1.0278801918029785,grad_norm: 0.9999994541025474, iteration: 73988
loss: 0.9365897178649902,grad_norm: 0.9999989339584324, iteration: 73989
loss: 1.0032905340194702,grad_norm: 0.8682337434512468, iteration: 73990
loss: 0.9942938685417175,grad_norm: 0.9999990402658493, iteration: 73991
loss: 1.0013799667358398,grad_norm: 0.999999042909946, iteration: 73992
loss: 0.9831627011299133,grad_norm: 0.9654343202600217, iteration: 73993
loss: 0.9918562769889832,grad_norm: 0.9889018949517464, iteration: 73994
loss: 1.0125523805618286,grad_norm: 0.9999992652299909, iteration: 73995
loss: 1.0003751516342163,grad_norm: 0.9999992999709064, iteration: 73996
loss: 0.9965620636940002,grad_norm: 0.9999992184707517, iteration: 73997
loss: 1.0095609426498413,grad_norm: 0.932925475742205, iteration: 73998
loss: 1.01885187625885,grad_norm: 0.9999993458626827, iteration: 73999
loss: 0.9868596792221069,grad_norm: 0.9999996575876772, iteration: 74000
loss: 1.0221080780029297,grad_norm: 0.9301374349012183, iteration: 74001
loss: 0.9769713282585144,grad_norm: 0.898473903843262, iteration: 74002
loss: 1.0048623085021973,grad_norm: 0.972541245283431, iteration: 74003
loss: 1.003342628479004,grad_norm: 0.9999990293183013, iteration: 74004
loss: 1.004601240158081,grad_norm: 0.9791032445886368, iteration: 74005
loss: 1.0069762468338013,grad_norm: 0.978584653381744, iteration: 74006
loss: 0.9930399060249329,grad_norm: 0.968032808216601, iteration: 74007
loss: 0.9575117826461792,grad_norm: 0.9999991312114862, iteration: 74008
loss: 0.9958633184432983,grad_norm: 0.9999990148328647, iteration: 74009
loss: 1.0036641359329224,grad_norm: 0.9999989799325788, iteration: 74010
loss: 1.0225396156311035,grad_norm: 0.9999989900141997, iteration: 74011
loss: 1.049546718597412,grad_norm: 0.9999991655002501, iteration: 74012
loss: 0.9849611520767212,grad_norm: 0.9589635249946582, iteration: 74013
loss: 0.9768201112747192,grad_norm: 0.8446373636739357, iteration: 74014
loss: 0.9668969511985779,grad_norm: 0.999999464682356, iteration: 74015
loss: 1.000884771347046,grad_norm: 0.9999992931425208, iteration: 74016
loss: 1.0006190538406372,grad_norm: 0.9067686532732567, iteration: 74017
loss: 1.0150200128555298,grad_norm: 0.999999341890037, iteration: 74018
loss: 1.0523850917816162,grad_norm: 0.9999997832500699, iteration: 74019
loss: 1.036092758178711,grad_norm: 0.9999990963979754, iteration: 74020
loss: 1.0114892721176147,grad_norm: 0.9999991106326007, iteration: 74021
loss: 1.030906081199646,grad_norm: 0.9941171227047368, iteration: 74022
loss: 0.980957567691803,grad_norm: 0.9999991403925592, iteration: 74023
loss: 1.0129443407058716,grad_norm: 0.9999991379696911, iteration: 74024
loss: 1.098734974861145,grad_norm: 0.9999995275057166, iteration: 74025
loss: 0.969546914100647,grad_norm: 0.9103947719914339, iteration: 74026
loss: 1.017883539199829,grad_norm: 0.9999996691185712, iteration: 74027
loss: 1.025572419166565,grad_norm: 0.9999991574893686, iteration: 74028
loss: 1.0113800764083862,grad_norm: 0.9999994051091409, iteration: 74029
loss: 0.9998940229415894,grad_norm: 0.9812295018007892, iteration: 74030
loss: 1.0436205863952637,grad_norm: 0.9999991029062358, iteration: 74031
loss: 0.9998740553855896,grad_norm: 0.9950238076663188, iteration: 74032
loss: 1.0244593620300293,grad_norm: 0.9999991424460599, iteration: 74033
loss: 1.0171922445297241,grad_norm: 0.9999990925543238, iteration: 74034
loss: 0.994927167892456,grad_norm: 0.9165704847028808, iteration: 74035
loss: 0.9799110293388367,grad_norm: 0.9999992634811661, iteration: 74036
loss: 1.005276083946228,grad_norm: 0.9999991409659891, iteration: 74037
loss: 1.0100104808807373,grad_norm: 0.9999992899794298, iteration: 74038
loss: 1.0075844526290894,grad_norm: 0.9999992490249439, iteration: 74039
loss: 0.9846988320350647,grad_norm: 0.9958015038505967, iteration: 74040
loss: 1.0247801542282104,grad_norm: 0.9999991361735641, iteration: 74041
loss: 1.0035955905914307,grad_norm: 0.9999991221288079, iteration: 74042
loss: 0.970928966999054,grad_norm: 0.999999188066537, iteration: 74043
loss: 0.9593991041183472,grad_norm: 0.9999990153224562, iteration: 74044
loss: 1.0175955295562744,grad_norm: 0.9334815854135426, iteration: 74045
loss: 1.0366387367248535,grad_norm: 0.9164611106090331, iteration: 74046
loss: 0.9903010725975037,grad_norm: 0.9999990430445287, iteration: 74047
loss: 0.9838210940361023,grad_norm: 0.9999989982597143, iteration: 74048
loss: 1.0216935873031616,grad_norm: 0.9999991094552363, iteration: 74049
loss: 0.9830384254455566,grad_norm: 0.9999991480251216, iteration: 74050
loss: 0.9775831699371338,grad_norm: 0.9999989691438882, iteration: 74051
loss: 0.9997833371162415,grad_norm: 0.9999991907256051, iteration: 74052
loss: 0.9756673574447632,grad_norm: 0.9999991448868134, iteration: 74053
loss: 0.9935893416404724,grad_norm: 0.9999990262405714, iteration: 74054
loss: 1.0245589017868042,grad_norm: 0.9553502813409129, iteration: 74055
loss: 0.9939999580383301,grad_norm: 0.9999991270616876, iteration: 74056
loss: 0.9923279285430908,grad_norm: 0.8891446246760439, iteration: 74057
loss: 1.023236870765686,grad_norm: 0.9842216721353548, iteration: 74058
loss: 1.0432599782943726,grad_norm: 0.9999992354502312, iteration: 74059
loss: 0.9922683835029602,grad_norm: 0.9999991274206133, iteration: 74060
loss: 0.9749754071235657,grad_norm: 0.999998977242364, iteration: 74061
loss: 0.968726396560669,grad_norm: 0.9999989956632978, iteration: 74062
loss: 0.9765346050262451,grad_norm: 0.9999992865179322, iteration: 74063
loss: 1.0060442686080933,grad_norm: 0.9709101777254808, iteration: 74064
loss: 0.9830636382102966,grad_norm: 0.9999990821061829, iteration: 74065
loss: 0.9850298762321472,grad_norm: 0.9476320246137221, iteration: 74066
loss: 1.0143873691558838,grad_norm: 0.8144085424688977, iteration: 74067
loss: 0.9669849872589111,grad_norm: 0.8811270353603504, iteration: 74068
loss: 0.954893946647644,grad_norm: 0.9999991107225346, iteration: 74069
loss: 1.0271364450454712,grad_norm: 0.9999990830148353, iteration: 74070
loss: 1.133281946182251,grad_norm: 0.9999998512730822, iteration: 74071
loss: 0.9782918095588684,grad_norm: 0.9999992526971668, iteration: 74072
loss: 0.9895462393760681,grad_norm: 0.8878126439206984, iteration: 74073
loss: 0.982231616973877,grad_norm: 0.9999991662051633, iteration: 74074
loss: 1.0195307731628418,grad_norm: 0.8960192027379047, iteration: 74075
loss: 1.0060927867889404,grad_norm: 0.9999990711251164, iteration: 74076
loss: 0.9713083505630493,grad_norm: 0.9999992392116205, iteration: 74077
loss: 1.0013344287872314,grad_norm: 0.9999991674149773, iteration: 74078
loss: 1.0256599187850952,grad_norm: 0.9999997467833989, iteration: 74079
loss: 1.0821894407272339,grad_norm: 0.9999995405700809, iteration: 74080
loss: 1.0317450761795044,grad_norm: 0.8276216239532308, iteration: 74081
loss: 0.9624126553535461,grad_norm: 0.9999992244288155, iteration: 74082
loss: 1.0598764419555664,grad_norm: 0.9999992957993017, iteration: 74083
loss: 0.9846435785293579,grad_norm: 0.8864916646406733, iteration: 74084
loss: 1.00315523147583,grad_norm: 0.9480920277459604, iteration: 74085
loss: 0.9690317511558533,grad_norm: 0.9937832084889726, iteration: 74086
loss: 1.028146505355835,grad_norm: 0.9999990725121958, iteration: 74087
loss: 1.0504391193389893,grad_norm: 0.9999991299059914, iteration: 74088
loss: 0.9775939583778381,grad_norm: 0.9999995050837561, iteration: 74089
loss: 1.002808928489685,grad_norm: 0.9494537448743318, iteration: 74090
loss: 1.0265806913375854,grad_norm: 0.9999997780412201, iteration: 74091
loss: 0.979114294052124,grad_norm: 0.9999991824148421, iteration: 74092
loss: 1.0305657386779785,grad_norm: 0.9999992596592444, iteration: 74093
loss: 0.9970600008964539,grad_norm: 0.996654255223881, iteration: 74094
loss: 0.9937915802001953,grad_norm: 0.9421748601065909, iteration: 74095
loss: 1.0045229196548462,grad_norm: 0.999999072482335, iteration: 74096
loss: 1.0080403089523315,grad_norm: 0.871540430233744, iteration: 74097
loss: 1.015540361404419,grad_norm: 0.9999991193126284, iteration: 74098
loss: 0.9784444570541382,grad_norm: 0.9999990682045868, iteration: 74099
loss: 0.99982750415802,grad_norm: 0.9999991420680476, iteration: 74100
loss: 0.9836729168891907,grad_norm: 0.9765185234890723, iteration: 74101
loss: 0.9929208159446716,grad_norm: 0.999999119915463, iteration: 74102
loss: 1.0147830247879028,grad_norm: 0.9999991437856325, iteration: 74103
loss: 0.9768599271774292,grad_norm: 0.9999990688828141, iteration: 74104
loss: 0.9976891279220581,grad_norm: 0.9999990544890776, iteration: 74105
loss: 1.0173708200454712,grad_norm: 0.9999991046152777, iteration: 74106
loss: 0.9965332746505737,grad_norm: 0.9754169678994405, iteration: 74107
loss: 1.001711130142212,grad_norm: 0.9999989524704584, iteration: 74108
loss: 1.0569946765899658,grad_norm: 0.9999993188190918, iteration: 74109
loss: 1.0653742551803589,grad_norm: 0.9999991792381827, iteration: 74110
loss: 1.0084073543548584,grad_norm: 0.9999992201450273, iteration: 74111
loss: 1.008806586265564,grad_norm: 0.9776103929618131, iteration: 74112
loss: 0.9863503575325012,grad_norm: 0.9999990587262363, iteration: 74113
loss: 0.9945700764656067,grad_norm: 0.9562356925188169, iteration: 74114
loss: 0.982927680015564,grad_norm: 0.8377873297640033, iteration: 74115
loss: 1.0647997856140137,grad_norm: 0.9999993446596036, iteration: 74116
loss: 0.988677442073822,grad_norm: 0.8880758463329147, iteration: 74117
loss: 1.0006405115127563,grad_norm: 0.8555260013289064, iteration: 74118
loss: 0.9831846356391907,grad_norm: 0.9999991828624153, iteration: 74119
loss: 0.9735907316207886,grad_norm: 0.9999995503051166, iteration: 74120
loss: 0.9992817640304565,grad_norm: 0.9999991004587047, iteration: 74121
loss: 1.0396599769592285,grad_norm: 0.999999234811776, iteration: 74122
loss: 1.024132490158081,grad_norm: 0.9999993819021544, iteration: 74123
loss: 0.9717918634414673,grad_norm: 0.888812771204698, iteration: 74124
loss: 0.9909316897392273,grad_norm: 0.9809480991655053, iteration: 74125
loss: 0.967654824256897,grad_norm: 0.9627277199113626, iteration: 74126
loss: 1.0188673734664917,grad_norm: 0.9501232645879203, iteration: 74127
loss: 1.054097056388855,grad_norm: 0.9999992839211533, iteration: 74128
loss: 1.0036818981170654,grad_norm: 0.99999931888454, iteration: 74129
loss: 0.9463673233985901,grad_norm: 0.9999990559939057, iteration: 74130
loss: 0.9745475053787231,grad_norm: 0.9209668342833927, iteration: 74131
loss: 0.9844973683357239,grad_norm: 0.8825485968238531, iteration: 74132
loss: 1.0122995376586914,grad_norm: 0.9999991451685164, iteration: 74133
loss: 1.0185072422027588,grad_norm: 0.9999993056847407, iteration: 74134
loss: 1.032872200012207,grad_norm: 0.9999994219491822, iteration: 74135
loss: 1.0028122663497925,grad_norm: 0.9544115681372993, iteration: 74136
loss: 0.9902084469795227,grad_norm: 0.967998941855035, iteration: 74137
loss: 0.9859816431999207,grad_norm: 0.9999990796243211, iteration: 74138
loss: 0.985083818435669,grad_norm: 0.9999991458566067, iteration: 74139
loss: 0.9882264733314514,grad_norm: 0.9856603729923725, iteration: 74140
loss: 0.9959210157394409,grad_norm: 0.9999990608044979, iteration: 74141
loss: 1.0057629346847534,grad_norm: 0.9075058260619582, iteration: 74142
loss: 1.013713002204895,grad_norm: 0.9999995786407614, iteration: 74143
loss: 0.9642214775085449,grad_norm: 0.999999110874014, iteration: 74144
loss: 1.0000462532043457,grad_norm: 0.9999992339347443, iteration: 74145
loss: 0.990080714225769,grad_norm: 0.9999992315743084, iteration: 74146
loss: 0.9907311797142029,grad_norm: 0.8485132395104367, iteration: 74147
loss: 0.9821431636810303,grad_norm: 0.999999161297387, iteration: 74148
loss: 1.0359408855438232,grad_norm: 0.9999992434189084, iteration: 74149
loss: 0.992159903049469,grad_norm: 0.999999097376408, iteration: 74150
loss: 0.9886409044265747,grad_norm: 0.9999993366204074, iteration: 74151
loss: 1.0100417137145996,grad_norm: 0.9183633356437967, iteration: 74152
loss: 1.0297455787658691,grad_norm: 0.9573571467207926, iteration: 74153
loss: 1.0031405687332153,grad_norm: 0.8991499825771345, iteration: 74154
loss: 1.038513422012329,grad_norm: 0.9490963853029438, iteration: 74155
loss: 0.9978446364402771,grad_norm: 0.9943136550884373, iteration: 74156
loss: 1.0020536184310913,grad_norm: 0.9999990484779846, iteration: 74157
loss: 1.0137760639190674,grad_norm: 0.9999992010987077, iteration: 74158
loss: 1.0134029388427734,grad_norm: 0.9447778908167249, iteration: 74159
loss: 0.9999544024467468,grad_norm: 0.9999992589340982, iteration: 74160
loss: 0.9898681640625,grad_norm: 0.9170857148842919, iteration: 74161
loss: 1.0421229600906372,grad_norm: 0.9999994796579182, iteration: 74162
loss: 1.0021095275878906,grad_norm: 0.8954498823033176, iteration: 74163
loss: 1.012491226196289,grad_norm: 0.956033022743262, iteration: 74164
loss: 0.9971550107002258,grad_norm: 0.944146825460517, iteration: 74165
loss: 1.0007344484329224,grad_norm: 0.9999992232057552, iteration: 74166
loss: 0.9993959069252014,grad_norm: 0.9781743441086307, iteration: 74167
loss: 1.0067275762557983,grad_norm: 0.9999991417011542, iteration: 74168
loss: 0.9904964566230774,grad_norm: 0.8858220078567847, iteration: 74169
loss: 1.0206725597381592,grad_norm: 0.9999990376378554, iteration: 74170
loss: 0.9831011295318604,grad_norm: 0.8780543207305217, iteration: 74171
loss: 1.0128633975982666,grad_norm: 0.9999990615206265, iteration: 74172
loss: 0.9974479675292969,grad_norm: 0.8757406037811652, iteration: 74173
loss: 0.9724920988082886,grad_norm: 0.9999991302277769, iteration: 74174
loss: 0.9695215821266174,grad_norm: 0.9999991446392503, iteration: 74175
loss: 0.9584625363349915,grad_norm: 0.9236968951141046, iteration: 74176
loss: 0.9703062772750854,grad_norm: 0.9999990434050401, iteration: 74177
loss: 1.0106974840164185,grad_norm: 0.9999991041394973, iteration: 74178
loss: 0.9944551587104797,grad_norm: 0.9999990870491384, iteration: 74179
loss: 1.020772099494934,grad_norm: 0.8525984266408738, iteration: 74180
loss: 1.0088200569152832,grad_norm: 0.8642911594543096, iteration: 74181
loss: 0.9884631633758545,grad_norm: 0.9546277961043568, iteration: 74182
loss: 1.0159382820129395,grad_norm: 0.9999990047639955, iteration: 74183
loss: 0.9669532179832458,grad_norm: 0.9999990700274948, iteration: 74184
loss: 0.9900099635124207,grad_norm: 0.999999834332135, iteration: 74185
loss: 1.002475619316101,grad_norm: 0.9468110097698269, iteration: 74186
loss: 0.9872311949729919,grad_norm: 0.9999990337509771, iteration: 74187
loss: 0.9638660550117493,grad_norm: 0.8424981153938219, iteration: 74188
loss: 0.9807263612747192,grad_norm: 0.9999991587699687, iteration: 74189
loss: 1.0028588771820068,grad_norm: 0.9999990816477936, iteration: 74190
loss: 1.0484533309936523,grad_norm: 0.9999991687897616, iteration: 74191
loss: 0.9897858500480652,grad_norm: 0.9999991232442295, iteration: 74192
loss: 0.9903114438056946,grad_norm: 0.9999992623862559, iteration: 74193
loss: 0.9782751202583313,grad_norm: 0.9999991227892516, iteration: 74194
loss: 1.0056328773498535,grad_norm: 0.9999992254840181, iteration: 74195
loss: 1.028581142425537,grad_norm: 0.9999990179489051, iteration: 74196
loss: 1.0422369241714478,grad_norm: 0.9999992578083174, iteration: 74197
loss: 0.9966365098953247,grad_norm: 0.9999990238979727, iteration: 74198
loss: 0.9774547815322876,grad_norm: 0.8307501877116515, iteration: 74199
loss: 1.0683174133300781,grad_norm: 0.9999993649842126, iteration: 74200
loss: 1.0067529678344727,grad_norm: 0.9999995548461625, iteration: 74201
loss: 1.0350341796875,grad_norm: 0.9951592457771223, iteration: 74202
loss: 0.9928268194198608,grad_norm: 0.9999990608305523, iteration: 74203
loss: 1.0266038179397583,grad_norm: 0.9999990766954872, iteration: 74204
loss: 1.0024523735046387,grad_norm: 0.9489890872423715, iteration: 74205
loss: 1.0208935737609863,grad_norm: 0.9999993243136783, iteration: 74206
loss: 0.9916408658027649,grad_norm: 0.9267904841138978, iteration: 74207
loss: 0.9732958078384399,grad_norm: 0.9536006259437755, iteration: 74208
loss: 0.9928219318389893,grad_norm: 0.9685403727905504, iteration: 74209
loss: 0.9896062612533569,grad_norm: 0.9274252715984322, iteration: 74210
loss: 1.0078929662704468,grad_norm: 0.999999138274411, iteration: 74211
loss: 0.9732019305229187,grad_norm: 0.9999991061745248, iteration: 74212
loss: 1.01798677444458,grad_norm: 0.9999990805462712, iteration: 74213
loss: 1.0212498903274536,grad_norm: 0.9999991726419769, iteration: 74214
loss: 1.000624656677246,grad_norm: 0.9999992066935846, iteration: 74215
loss: 0.987384021282196,grad_norm: 0.9999991734950637, iteration: 74216
loss: 1.0137996673583984,grad_norm: 0.9999991243328022, iteration: 74217
loss: 1.023680567741394,grad_norm: 0.999999819129238, iteration: 74218
loss: 0.9766324758529663,grad_norm: 0.9999991684295045, iteration: 74219
loss: 0.9805678725242615,grad_norm: 0.9931416957408574, iteration: 74220
loss: 0.9911659359931946,grad_norm: 0.9999989247244191, iteration: 74221
loss: 0.9919201731681824,grad_norm: 0.9770644960796407, iteration: 74222
loss: 1.0148100852966309,grad_norm: 0.995485898251459, iteration: 74223
loss: 0.9917184710502625,grad_norm: 0.9999990976646959, iteration: 74224
loss: 1.0495082139968872,grad_norm: 0.9999995734164695, iteration: 74225
loss: 1.0213700532913208,grad_norm: 0.9090230571862469, iteration: 74226
loss: 0.9945452809333801,grad_norm: 0.9999990578980833, iteration: 74227
loss: 1.0045086145401,grad_norm: 0.9999990168670123, iteration: 74228
loss: 1.024336814880371,grad_norm: 0.9999990092794373, iteration: 74229
loss: 0.9856431484222412,grad_norm: 0.9999991732314851, iteration: 74230
loss: 1.0079772472381592,grad_norm: 0.9999991485740404, iteration: 74231
loss: 0.9819721579551697,grad_norm: 0.9999990959736944, iteration: 74232
loss: 0.996511697769165,grad_norm: 0.999999612074472, iteration: 74233
loss: 0.9859330654144287,grad_norm: 0.999843703345579, iteration: 74234
loss: 0.979201078414917,grad_norm: 0.9999990915557686, iteration: 74235
loss: 1.022782802581787,grad_norm: 0.9999990338359311, iteration: 74236
loss: 1.0108647346496582,grad_norm: 0.9999991595279798, iteration: 74237
loss: 0.9925511479377747,grad_norm: 0.9999992323424902, iteration: 74238
loss: 0.983131468296051,grad_norm: 0.999999248585185, iteration: 74239
loss: 0.9632430076599121,grad_norm: 0.9999990105518336, iteration: 74240
loss: 1.023924708366394,grad_norm: 0.993037441668267, iteration: 74241
loss: 1.064740538597107,grad_norm: 0.9728458652691491, iteration: 74242
loss: 1.0342772006988525,grad_norm: 0.92438699158982, iteration: 74243
loss: 1.0281856060028076,grad_norm: 0.9999989895544137, iteration: 74244
loss: 1.0313146114349365,grad_norm: 0.9999993310974672, iteration: 74245
loss: 1.0178345441818237,grad_norm: 0.9999990842027474, iteration: 74246
loss: 0.9931608438491821,grad_norm: 0.999999072771941, iteration: 74247
loss: 1.0541785955429077,grad_norm: 0.9999992002725142, iteration: 74248
loss: 0.9958021640777588,grad_norm: 0.9999990733621928, iteration: 74249
loss: 0.9952598810195923,grad_norm: 0.8857077901311847, iteration: 74250
loss: 1.0123984813690186,grad_norm: 0.9553980493603372, iteration: 74251
loss: 1.03530752658844,grad_norm: 0.9999992822064393, iteration: 74252
loss: 0.9891684651374817,grad_norm: 0.8879303645242194, iteration: 74253
loss: 1.1104159355163574,grad_norm: 0.9999993483527281, iteration: 74254
loss: 1.0156669616699219,grad_norm: 0.9999992537345078, iteration: 74255
loss: 0.9797680974006653,grad_norm: 0.9963403969321487, iteration: 74256
loss: 0.9986098408699036,grad_norm: 0.9671229717710799, iteration: 74257
loss: 1.0166782140731812,grad_norm: 0.9999992151904905, iteration: 74258
loss: 1.0124984979629517,grad_norm: 0.7991329420423683, iteration: 74259
loss: 1.0082157850265503,grad_norm: 0.9999989751205386, iteration: 74260
loss: 0.9883625507354736,grad_norm: 0.9999991037471048, iteration: 74261
loss: 1.000237226486206,grad_norm: 0.9999992268619339, iteration: 74262
loss: 1.0170400142669678,grad_norm: 0.9999997788708418, iteration: 74263
loss: 1.016990303993225,grad_norm: 0.9941490338305625, iteration: 74264
loss: 1.0446364879608154,grad_norm: 0.9999990690847836, iteration: 74265
loss: 1.0227699279785156,grad_norm: 0.9441309643451006, iteration: 74266
loss: 0.9971450567245483,grad_norm: 0.8894537477074836, iteration: 74267
loss: 1.0181682109832764,grad_norm: 0.9999990993762701, iteration: 74268
loss: 0.970528781414032,grad_norm: 0.9999991854002171, iteration: 74269
loss: 0.9672901630401611,grad_norm: 0.9334220225167352, iteration: 74270
loss: 1.0279803276062012,grad_norm: 0.9071284755340276, iteration: 74271
loss: 0.9724801778793335,grad_norm: 0.9999991718927085, iteration: 74272
loss: 0.972040057182312,grad_norm: 0.9999990760132317, iteration: 74273
loss: 0.9800978899002075,grad_norm: 0.9999991648367779, iteration: 74274
loss: 1.0007309913635254,grad_norm: 0.9894149879725308, iteration: 74275
loss: 1.0167739391326904,grad_norm: 0.9367953685771617, iteration: 74276
loss: 1.0262938737869263,grad_norm: 0.9999991624121519, iteration: 74277
loss: 1.0001007318496704,grad_norm: 0.9884963670081954, iteration: 74278
loss: 1.0276201963424683,grad_norm: 0.9999993181893054, iteration: 74279
loss: 1.0357012748718262,grad_norm: 0.99999904512237, iteration: 74280
loss: 1.0052931308746338,grad_norm: 0.9803742070894071, iteration: 74281
loss: 1.0188956260681152,grad_norm: 0.9929064908744987, iteration: 74282
loss: 0.9822352528572083,grad_norm: 0.947228826718336, iteration: 74283
loss: 0.979900062084198,grad_norm: 0.9012568720171102, iteration: 74284
loss: 1.028970718383789,grad_norm: 0.8492760133901623, iteration: 74285
loss: 0.9946349263191223,grad_norm: 0.9999991668126145, iteration: 74286
loss: 0.9809241890907288,grad_norm: 0.998224034611896, iteration: 74287
loss: 1.0282341241836548,grad_norm: 0.9999990001278981, iteration: 74288
loss: 0.9929853081703186,grad_norm: 0.9999990211350227, iteration: 74289
loss: 1.0080039501190186,grad_norm: 0.999999191838534, iteration: 74290
loss: 0.9864635467529297,grad_norm: 0.9999991578231013, iteration: 74291
loss: 1.0165942907333374,grad_norm: 0.9978163995581203, iteration: 74292
loss: 0.9774987697601318,grad_norm: 0.999999141663562, iteration: 74293
loss: 0.9943469762802124,grad_norm: 0.8732608029040143, iteration: 74294
loss: 1.005749225616455,grad_norm: 0.999999100610058, iteration: 74295
loss: 0.9896171689033508,grad_norm: 0.9999992517837977, iteration: 74296
loss: 0.9508624076843262,grad_norm: 0.9101343709494807, iteration: 74297
loss: 1.0152888298034668,grad_norm: 0.9999992156333098, iteration: 74298
loss: 1.0059504508972168,grad_norm: 0.9803208333770539, iteration: 74299
loss: 1.0036762952804565,grad_norm: 0.9999991868953555, iteration: 74300
loss: 0.9843811392784119,grad_norm: 0.9723401534749375, iteration: 74301
loss: 0.9997445940971375,grad_norm: 0.9630378729544709, iteration: 74302
loss: 1.007200002670288,grad_norm: 0.9741991714884537, iteration: 74303
loss: 0.9704408645629883,grad_norm: 0.9999992168462588, iteration: 74304
loss: 1.0238181352615356,grad_norm: 0.9405329827940518, iteration: 74305
loss: 0.9936450123786926,grad_norm: 0.9999992293950277, iteration: 74306
loss: 1.03805410861969,grad_norm: 0.962219960459247, iteration: 74307
loss: 1.0097421407699585,grad_norm: 0.7987601609876084, iteration: 74308
loss: 1.0197430849075317,grad_norm: 0.8410963311304103, iteration: 74309
loss: 0.9934220314025879,grad_norm: 0.9566077315278242, iteration: 74310
loss: 1.0280799865722656,grad_norm: 0.942503343450401, iteration: 74311
loss: 0.9993880391120911,grad_norm: 0.9945589524627755, iteration: 74312
loss: 1.0011159181594849,grad_norm: 0.9999990532810861, iteration: 74313
loss: 0.9910585284233093,grad_norm: 0.9487622175253643, iteration: 74314
loss: 1.0083345174789429,grad_norm: 0.8876338655353038, iteration: 74315
loss: 1.0065194368362427,grad_norm: 0.9999992115022885, iteration: 74316
loss: 1.002097487449646,grad_norm: 0.992443695360031, iteration: 74317
loss: 1.0221444368362427,grad_norm: 0.8699700237160192, iteration: 74318
loss: 1.028187870979309,grad_norm: 0.8468183232459038, iteration: 74319
loss: 0.9996955990791321,grad_norm: 0.9773044752008888, iteration: 74320
loss: 1.0029875040054321,grad_norm: 0.9999990886201423, iteration: 74321
loss: 1.1007498502731323,grad_norm: 0.9999993409561447, iteration: 74322
loss: 1.00557541847229,grad_norm: 0.9326671026881546, iteration: 74323
loss: 0.9585615992546082,grad_norm: 0.9916076424837907, iteration: 74324
loss: 0.9903212189674377,grad_norm: 0.9450109426030556, iteration: 74325
loss: 1.0078623294830322,grad_norm: 0.9999991887637432, iteration: 74326
loss: 0.994040846824646,grad_norm: 0.9999991570825149, iteration: 74327
loss: 1.0091553926467896,grad_norm: 0.9999992054052808, iteration: 74328
loss: 1.0197930335998535,grad_norm: 0.9999989978405208, iteration: 74329
loss: 1.0017883777618408,grad_norm: 0.8584534202000161, iteration: 74330
loss: 1.0207847356796265,grad_norm: 0.8314382729660142, iteration: 74331
loss: 0.9942112565040588,grad_norm: 0.9697492657347135, iteration: 74332
loss: 0.9746072292327881,grad_norm: 0.9999989982938382, iteration: 74333
loss: 0.9855172634124756,grad_norm: 0.9999993086507356, iteration: 74334
loss: 0.9761325120925903,grad_norm: 0.9591037888226611, iteration: 74335
loss: 1.0088045597076416,grad_norm: 0.9250866446874704, iteration: 74336
loss: 0.9925912022590637,grad_norm: 0.9129344552959632, iteration: 74337
loss: 0.9786008596420288,grad_norm: 0.986863648054556, iteration: 74338
loss: 0.9792826771736145,grad_norm: 0.9999992007759141, iteration: 74339
loss: 1.0052815675735474,grad_norm: 0.9999994795885337, iteration: 74340
loss: 1.016242504119873,grad_norm: 0.928190422474716, iteration: 74341
loss: 0.9853785037994385,grad_norm: 0.9999991025027661, iteration: 74342
loss: 1.000288724899292,grad_norm: 0.9034702024635697, iteration: 74343
loss: 1.017632007598877,grad_norm: 0.9999991006532136, iteration: 74344
loss: 0.9802150130271912,grad_norm: 0.8698766194442852, iteration: 74345
loss: 1.0027953386306763,grad_norm: 0.9643989596716717, iteration: 74346
loss: 0.9738004207611084,grad_norm: 0.9999993003849055, iteration: 74347
loss: 0.9712933301925659,grad_norm: 0.9787471270064811, iteration: 74348
loss: 1.068293571472168,grad_norm: 0.9999992886309967, iteration: 74349
loss: 0.9962326884269714,grad_norm: 0.990123348934996, iteration: 74350
loss: 0.9821376800537109,grad_norm: 0.9999995724965264, iteration: 74351
loss: 0.99509197473526,grad_norm: 0.9999992255405444, iteration: 74352
loss: 1.006369709968567,grad_norm: 0.9999990258256904, iteration: 74353
loss: 0.9929599165916443,grad_norm: 0.9999992655312127, iteration: 74354
loss: 0.9890726208686829,grad_norm: 0.9999990041822521, iteration: 74355
loss: 1.0049805641174316,grad_norm: 0.798777864544997, iteration: 74356
loss: 0.9755536317825317,grad_norm: 0.9999990889678974, iteration: 74357
loss: 1.0310490131378174,grad_norm: 0.9999989605738544, iteration: 74358
loss: 1.0000940561294556,grad_norm: 0.8850408788198892, iteration: 74359
loss: 0.9847561120986938,grad_norm: 0.9999989831646706, iteration: 74360
loss: 1.0106654167175293,grad_norm: 0.9057415928450364, iteration: 74361
loss: 1.0150192975997925,grad_norm: 0.9999991860838714, iteration: 74362
loss: 1.010188102722168,grad_norm: 0.9999991170495452, iteration: 74363
loss: 0.9767247438430786,grad_norm: 0.9999993809741685, iteration: 74364
loss: 0.9763960838317871,grad_norm: 0.9923585498574246, iteration: 74365
loss: 1.01073157787323,grad_norm: 0.999999119298084, iteration: 74366
loss: 1.0387409925460815,grad_norm: 0.999999288930529, iteration: 74367
loss: 0.9971566200256348,grad_norm: 0.9968257724630643, iteration: 74368
loss: 0.9823564887046814,grad_norm: 0.9925076244155886, iteration: 74369
loss: 0.9827840328216553,grad_norm: 0.9999990893275857, iteration: 74370
loss: 0.9732633829116821,grad_norm: 0.9999989720758857, iteration: 74371
loss: 0.9593706130981445,grad_norm: 0.9040348278318696, iteration: 74372
loss: 0.9638462662696838,grad_norm: 0.9999988874424635, iteration: 74373
loss: 1.0074832439422607,grad_norm: 0.9999991002662737, iteration: 74374
loss: 1.0112508535385132,grad_norm: 0.9999991914728266, iteration: 74375
loss: 0.9536029100418091,grad_norm: 0.9999991034962041, iteration: 74376
loss: 1.0050214529037476,grad_norm: 0.9060617346900827, iteration: 74377
loss: 1.021666169166565,grad_norm: 0.999999097018964, iteration: 74378
loss: 0.9785354733467102,grad_norm: 0.9999991949164806, iteration: 74379
loss: 0.9544880986213684,grad_norm: 0.9999991655818294, iteration: 74380
loss: 0.9838432669639587,grad_norm: 0.9999991249213699, iteration: 74381
loss: 0.9938652515411377,grad_norm: 0.9999991796297599, iteration: 74382
loss: 0.9969102740287781,grad_norm: 0.9999991313780623, iteration: 74383
loss: 1.0059252977371216,grad_norm: 0.8618143031385637, iteration: 74384
loss: 1.005152702331543,grad_norm: 0.9999992135813662, iteration: 74385
loss: 0.9769654870033264,grad_norm: 0.9999990906313063, iteration: 74386
loss: 1.0165482759475708,grad_norm: 0.8293005815639327, iteration: 74387
loss: 0.9905086755752563,grad_norm: 0.9999991431884669, iteration: 74388
loss: 1.005435824394226,grad_norm: 0.9907755013973513, iteration: 74389
loss: 0.9820276498794556,grad_norm: 0.999999119735705, iteration: 74390
loss: 1.0024570226669312,grad_norm: 0.9806537872270508, iteration: 74391
loss: 1.0058910846710205,grad_norm: 0.8341235771471196, iteration: 74392
loss: 0.9798546433448792,grad_norm: 0.999999177416385, iteration: 74393
loss: 0.9908623099327087,grad_norm: 0.8926975972636001, iteration: 74394
loss: 1.023587942123413,grad_norm: 0.9999993152871365, iteration: 74395
loss: 1.0051920413970947,grad_norm: 0.9999989917481474, iteration: 74396
loss: 1.0058919191360474,grad_norm: 0.9999991925792306, iteration: 74397
loss: 1.0033512115478516,grad_norm: 0.9999991669597788, iteration: 74398
loss: 0.9973235130310059,grad_norm: 0.9790880190534903, iteration: 74399
loss: 0.9623141288757324,grad_norm: 0.999999160399148, iteration: 74400
loss: 0.986039400100708,grad_norm: 0.9999991695347855, iteration: 74401
loss: 1.0011399984359741,grad_norm: 0.9999991614669983, iteration: 74402
loss: 0.9978980422019958,grad_norm: 0.9868729043052312, iteration: 74403
loss: 1.0372215509414673,grad_norm: 0.9913176249777278, iteration: 74404
loss: 0.9699292778968811,grad_norm: 0.9999991908158911, iteration: 74405
loss: 0.9893431067466736,grad_norm: 0.9999990944733578, iteration: 74406
loss: 0.9848040342330933,grad_norm: 0.99999910379693, iteration: 74407
loss: 1.0351234674453735,grad_norm: 0.9999992095313341, iteration: 74408
loss: 1.0227653980255127,grad_norm: 0.8970354935155531, iteration: 74409
loss: 0.9844372868537903,grad_norm: 0.8742559807312493, iteration: 74410
loss: 0.9770880341529846,grad_norm: 0.9242528482427659, iteration: 74411
loss: 0.9862870573997498,grad_norm: 0.9999990715810567, iteration: 74412
loss: 1.0246039628982544,grad_norm: 0.9999990306947867, iteration: 74413
loss: 1.017745852470398,grad_norm: 0.9999991448299178, iteration: 74414
loss: 1.006700038909912,grad_norm: 0.9762274422928877, iteration: 74415
loss: 0.9813971519470215,grad_norm: 0.963801822941173, iteration: 74416
loss: 0.9991282224655151,grad_norm: 0.8989408592491169, iteration: 74417
loss: 1.0122822523117065,grad_norm: 0.8786202258368, iteration: 74418
loss: 0.9929404854774475,grad_norm: 0.9772052220593694, iteration: 74419
loss: 0.9783278107643127,grad_norm: 0.8442824864482817, iteration: 74420
loss: 0.9600746631622314,grad_norm: 0.9849172850328474, iteration: 74421
loss: 1.0177112817764282,grad_norm: 0.9999991570792066, iteration: 74422
loss: 1.0079851150512695,grad_norm: 0.9999990825685298, iteration: 74423
loss: 0.9984658360481262,grad_norm: 0.9999992451598537, iteration: 74424
loss: 1.0086833238601685,grad_norm: 0.9999990825637209, iteration: 74425
loss: 1.0169172286987305,grad_norm: 0.9999991024215434, iteration: 74426
loss: 0.9866748452186584,grad_norm: 0.9319283100756354, iteration: 74427
loss: 1.006121277809143,grad_norm: 0.8521793389498616, iteration: 74428
loss: 1.0189844369888306,grad_norm: 0.8952574343624911, iteration: 74429
loss: 0.9975289702415466,grad_norm: 0.9999992189847129, iteration: 74430
loss: 1.0255205631256104,grad_norm: 0.9613839405229286, iteration: 74431
loss: 0.9929253458976746,grad_norm: 0.9999991546693531, iteration: 74432
loss: 0.9884694218635559,grad_norm: 0.83225006154385, iteration: 74433
loss: 1.020797848701477,grad_norm: 0.9999991663496473, iteration: 74434
loss: 1.0405824184417725,grad_norm: 0.999999219877235, iteration: 74435
loss: 0.9927911162376404,grad_norm: 0.999998965615822, iteration: 74436
loss: 1.0089526176452637,grad_norm: 0.9999989889648081, iteration: 74437
loss: 0.9806885123252869,grad_norm: 0.9087064775462224, iteration: 74438
loss: 0.9741663336753845,grad_norm: 0.9999991863697643, iteration: 74439
loss: 0.9938151240348816,grad_norm: 0.9999990001044249, iteration: 74440
loss: 1.0660760402679443,grad_norm: 0.9999995202992599, iteration: 74441
loss: 1.0033012628555298,grad_norm: 0.9999992293698048, iteration: 74442
loss: 0.9909825921058655,grad_norm: 0.9999991710420919, iteration: 74443
loss: 0.9373438954353333,grad_norm: 0.9999990983134216, iteration: 74444
loss: 0.9421637654304504,grad_norm: 0.9999990947562423, iteration: 74445
loss: 0.9812623858451843,grad_norm: 0.999999269951757, iteration: 74446
loss: 0.969236433506012,grad_norm: 0.9704133926230447, iteration: 74447
loss: 0.9783125519752502,grad_norm: 0.9999998394525317, iteration: 74448
loss: 1.155393123626709,grad_norm: 0.9999995975981297, iteration: 74449
loss: 1.1064002513885498,grad_norm: 0.9999997884284049, iteration: 74450
loss: 0.9436057209968567,grad_norm: 0.9999991535507694, iteration: 74451
loss: 0.982201874256134,grad_norm: 0.9999990448277212, iteration: 74452
loss: 1.0122766494750977,grad_norm: 0.9999991569160007, iteration: 74453
loss: 1.04507315158844,grad_norm: 0.9013472721747995, iteration: 74454
loss: 0.9790868163108826,grad_norm: 0.9277549778500561, iteration: 74455
loss: 1.0058495998382568,grad_norm: 0.9914474716911247, iteration: 74456
loss: 1.0120353698730469,grad_norm: 0.9530682526766857, iteration: 74457
loss: 1.015654444694519,grad_norm: 0.999999240098699, iteration: 74458
loss: 0.9935029745101929,grad_norm: 0.9999989663549945, iteration: 74459
loss: 1.0207810401916504,grad_norm: 0.9999990916049997, iteration: 74460
loss: 1.0009657144546509,grad_norm: 0.957389311047147, iteration: 74461
loss: 1.0014866590499878,grad_norm: 0.9999990732172386, iteration: 74462
loss: 1.0371980667114258,grad_norm: 0.9999998792037318, iteration: 74463
loss: 0.9796332716941833,grad_norm: 0.8740051439393154, iteration: 74464
loss: 1.0181964635849,grad_norm: 0.8105057146895353, iteration: 74465
loss: 0.9765655398368835,grad_norm: 0.999999199836535, iteration: 74466
loss: 0.9945029616355896,grad_norm: 0.9999990228550237, iteration: 74467
loss: 1.0179182291030884,grad_norm: 0.9999990837773358, iteration: 74468
loss: 1.0225086212158203,grad_norm: 0.9999991378972172, iteration: 74469
loss: 1.0199390649795532,grad_norm: 0.9233534002863714, iteration: 74470
loss: 0.9743301868438721,grad_norm: 0.9999989675793458, iteration: 74471
loss: 0.9817237854003906,grad_norm: 0.9999990614077405, iteration: 74472
loss: 1.0124695301055908,grad_norm: 0.8890331522418916, iteration: 74473
loss: 1.0709526538848877,grad_norm: 0.9999991798916817, iteration: 74474
loss: 0.9995450377464294,grad_norm: 0.9999990421791054, iteration: 74475
loss: 1.021385669708252,grad_norm: 0.999999252549585, iteration: 74476
loss: 0.9909979701042175,grad_norm: 0.9482853520690143, iteration: 74477
loss: 0.9742965698242188,grad_norm: 0.9571510319378402, iteration: 74478
loss: 0.9990705251693726,grad_norm: 0.8833065720806055, iteration: 74479
loss: 0.9678602814674377,grad_norm: 0.9999992093209896, iteration: 74480
loss: 0.9562864303588867,grad_norm: 0.9999993602736892, iteration: 74481
loss: 1.019860863685608,grad_norm: 0.9999990975087049, iteration: 74482
loss: 1.0295153856277466,grad_norm: 0.999999171253888, iteration: 74483
loss: 1.0224498510360718,grad_norm: 0.9999993843926238, iteration: 74484
loss: 1.0086361169815063,grad_norm: 0.9999990801907539, iteration: 74485
loss: 1.0195130109786987,grad_norm: 0.9999992000154617, iteration: 74486
loss: 1.0035264492034912,grad_norm: 0.9999990842753013, iteration: 74487
loss: 1.0263327360153198,grad_norm: 0.9999991737030595, iteration: 74488
loss: 1.0630602836608887,grad_norm: 0.99999979112482, iteration: 74489
loss: 0.9872609972953796,grad_norm: 0.9393990327696411, iteration: 74490
loss: 1.0314123630523682,grad_norm: 0.9999993482963502, iteration: 74491
loss: 1.0042662620544434,grad_norm: 0.9999992209201249, iteration: 74492
loss: 1.0179224014282227,grad_norm: 0.9934342160161579, iteration: 74493
loss: 0.9949972033500671,grad_norm: 0.8406103251315763, iteration: 74494
loss: 0.9832569360733032,grad_norm: 0.9999992172988932, iteration: 74495
loss: 1.017232894897461,grad_norm: 0.9999989755704876, iteration: 74496
loss: 0.9831734299659729,grad_norm: 0.9279446349136495, iteration: 74497
loss: 1.0074279308319092,grad_norm: 0.9999991857581689, iteration: 74498
loss: 0.9705924987792969,grad_norm: 0.9999991187632151, iteration: 74499
loss: 0.983441174030304,grad_norm: 0.9999993098478308, iteration: 74500
loss: 1.0161364078521729,grad_norm: 0.9610304674754835, iteration: 74501
loss: 1.029189944267273,grad_norm: 0.9999993313647754, iteration: 74502
loss: 0.9801652431488037,grad_norm: 0.9999991723294126, iteration: 74503
loss: 0.9687108993530273,grad_norm: 0.9999990104745407, iteration: 74504
loss: 1.0114259719848633,grad_norm: 0.9999989895073159, iteration: 74505
loss: 1.0286900997161865,grad_norm: 0.9999989445316996, iteration: 74506
loss: 0.9920174479484558,grad_norm: 0.9977413323847542, iteration: 74507
loss: 0.9768911600112915,grad_norm: 0.9604577826159504, iteration: 74508
loss: 1.035793662071228,grad_norm: 0.9916512690854923, iteration: 74509
loss: 0.9955953359603882,grad_norm: 0.9999990264470044, iteration: 74510
loss: 1.0387015342712402,grad_norm: 0.812485799174084, iteration: 74511
loss: 0.9944917559623718,grad_norm: 0.9999990334114947, iteration: 74512
loss: 1.0252255201339722,grad_norm: 0.9999990638950483, iteration: 74513
loss: 0.965461790561676,grad_norm: 0.9999992067787565, iteration: 74514
loss: 0.9835256338119507,grad_norm: 0.992509384132874, iteration: 74515
loss: 0.9957284331321716,grad_norm: 0.9999991688864053, iteration: 74516
loss: 0.9626724123954773,grad_norm: 0.9760137650692209, iteration: 74517
loss: 0.9891329407691956,grad_norm: 0.9999991000446236, iteration: 74518
loss: 0.9659222364425659,grad_norm: 0.9999990102919686, iteration: 74519
loss: 1.0335912704467773,grad_norm: 0.9999990036085615, iteration: 74520
loss: 0.988787055015564,grad_norm: 0.9312867291557373, iteration: 74521
loss: 0.9945270419120789,grad_norm: 0.9999992247423639, iteration: 74522
loss: 1.0333476066589355,grad_norm: 0.9999992000022198, iteration: 74523
loss: 1.0261386632919312,grad_norm: 0.9840440755657229, iteration: 74524
loss: 1.00368332862854,grad_norm: 0.9999991941045041, iteration: 74525
loss: 0.9859362244606018,grad_norm: 0.9999991317521487, iteration: 74526
loss: 0.9916507601737976,grad_norm: 0.9999993537002294, iteration: 74527
loss: 1.0024477243423462,grad_norm: 0.9999991128098835, iteration: 74528
loss: 0.9899733662605286,grad_norm: 0.9999990391465883, iteration: 74529
loss: 1.0179123878479004,grad_norm: 0.8991994473567317, iteration: 74530
loss: 1.0183675289154053,grad_norm: 0.9026691880572406, iteration: 74531
loss: 1.014794945716858,grad_norm: 0.9999990252267636, iteration: 74532
loss: 0.9918704032897949,grad_norm: 0.9127455012667934, iteration: 74533
loss: 0.9931312203407288,grad_norm: 0.8452343446473803, iteration: 74534
loss: 0.9826146960258484,grad_norm: 0.999999063868335, iteration: 74535
loss: 1.0124586820602417,grad_norm: 0.9999990675477614, iteration: 74536
loss: 0.9782543182373047,grad_norm: 0.8617314441182686, iteration: 74537
loss: 1.0313148498535156,grad_norm: 0.9999990793327654, iteration: 74538
loss: 0.9786968231201172,grad_norm: 0.9842293417490532, iteration: 74539
loss: 0.9783967733383179,grad_norm: 0.9999990345540389, iteration: 74540
loss: 1.061257243156433,grad_norm: 0.9174753771004931, iteration: 74541
loss: 1.01002037525177,grad_norm: 0.9999991315836294, iteration: 74542
loss: 1.0126080513000488,grad_norm: 0.9999991287024155, iteration: 74543
loss: 1.0190021991729736,grad_norm: 0.8000746123429437, iteration: 74544
loss: 1.001349925994873,grad_norm: 0.978940311916451, iteration: 74545
loss: 0.9871590733528137,grad_norm: 0.9999990734486146, iteration: 74546
loss: 0.982969343662262,grad_norm: 0.9999990417752591, iteration: 74547
loss: 0.9855634570121765,grad_norm: 0.9999991558302277, iteration: 74548
loss: 0.9972370266914368,grad_norm: 0.8796142707945683, iteration: 74549
loss: 1.0028454065322876,grad_norm: 0.9751042266758655, iteration: 74550
loss: 0.975312352180481,grad_norm: 0.9944580346333307, iteration: 74551
loss: 1.04380464553833,grad_norm: 0.9999992462733645, iteration: 74552
loss: 1.0014212131500244,grad_norm: 0.9999990913545422, iteration: 74553
loss: 0.9684904217720032,grad_norm: 0.9998479868316714, iteration: 74554
loss: 1.0072263479232788,grad_norm: 0.9999989771703162, iteration: 74555
loss: 0.9653027653694153,grad_norm: 0.9999997036008117, iteration: 74556
loss: 1.0653434991836548,grad_norm: 0.999999233359173, iteration: 74557
loss: 0.9803500771522522,grad_norm: 0.8674709974371767, iteration: 74558
loss: 1.0072076320648193,grad_norm: 0.9999990970904349, iteration: 74559
loss: 0.9915026426315308,grad_norm: 0.8819921856872445, iteration: 74560
loss: 1.0125067234039307,grad_norm: 0.9999990446833875, iteration: 74561
loss: 1.0286496877670288,grad_norm: 0.9999996242310139, iteration: 74562
loss: 1.0190622806549072,grad_norm: 0.9531188672118529, iteration: 74563
loss: 0.9860212206840515,grad_norm: 0.999999129236511, iteration: 74564
loss: 0.9936770796775818,grad_norm: 0.9082662074573119, iteration: 74565
loss: 1.015688180923462,grad_norm: 0.8219125342677563, iteration: 74566
loss: 0.9967140555381775,grad_norm: 0.9999990727687238, iteration: 74567
loss: 0.9822390079498291,grad_norm: 0.9999992136835589, iteration: 74568
loss: 1.0299475193023682,grad_norm: 0.9999994034795439, iteration: 74569
loss: 1.0249890089035034,grad_norm: 0.9999991428903515, iteration: 74570
loss: 0.9894187450408936,grad_norm: 0.9999990410453539, iteration: 74571
loss: 1.0183788537979126,grad_norm: 0.9001513504109516, iteration: 74572
loss: 1.0220881700515747,grad_norm: 0.9999991004138273, iteration: 74573
loss: 1.0285024642944336,grad_norm: 0.9999990392751664, iteration: 74574
loss: 1.0052083730697632,grad_norm: 0.9283861173058799, iteration: 74575
loss: 1.0113873481750488,grad_norm: 0.9999989712794014, iteration: 74576
loss: 0.996989905834198,grad_norm: 0.9129987490542429, iteration: 74577
loss: 0.9901103377342224,grad_norm: 0.9999991991648531, iteration: 74578
loss: 0.9566065669059753,grad_norm: 0.9818778570168684, iteration: 74579
loss: 1.0170551538467407,grad_norm: 0.9999995461878963, iteration: 74580
loss: 1.0246661901474,grad_norm: 0.9659646324083209, iteration: 74581
loss: 0.9989269375801086,grad_norm: 0.9999992383754852, iteration: 74582
loss: 0.987689733505249,grad_norm: 0.9999992771980535, iteration: 74583
loss: 1.0023560523986816,grad_norm: 0.9225790730110401, iteration: 74584
loss: 0.9941900372505188,grad_norm: 0.9999993156922211, iteration: 74585
loss: 0.9757498502731323,grad_norm: 0.9999989092830126, iteration: 74586
loss: 0.986003577709198,grad_norm: 0.9999989994392361, iteration: 74587
loss: 0.9987685084342957,grad_norm: 0.9999990430576661, iteration: 74588
loss: 1.0235618352890015,grad_norm: 0.9364737120978491, iteration: 74589
loss: 1.0187063217163086,grad_norm: 0.9999989826855736, iteration: 74590
loss: 0.9809504151344299,grad_norm: 0.9999990100445378, iteration: 74591
loss: 1.0027762651443481,grad_norm: 0.9170252942991983, iteration: 74592
loss: 1.0152640342712402,grad_norm: 0.9510018534118827, iteration: 74593
loss: 0.9991123676300049,grad_norm: 0.9626412769509142, iteration: 74594
loss: 1.048389196395874,grad_norm: 0.9999993177469382, iteration: 74595
loss: 1.015764594078064,grad_norm: 0.9999990069011696, iteration: 74596
loss: 0.9940600395202637,grad_norm: 0.9999991612830231, iteration: 74597
loss: 1.002493143081665,grad_norm: 0.9242320280717747, iteration: 74598
loss: 0.9847913384437561,grad_norm: 0.9999992721388955, iteration: 74599
loss: 1.0172631740570068,grad_norm: 0.9831608090618029, iteration: 74600
loss: 1.0186450481414795,grad_norm: 0.999999154785472, iteration: 74601
loss: 0.9879594445228577,grad_norm: 0.9999990097159713, iteration: 74602
loss: 0.9796637296676636,grad_norm: 0.9237853851177182, iteration: 74603
loss: 1.017006516456604,grad_norm: 0.9053272555932348, iteration: 74604
loss: 1.0015050172805786,grad_norm: 0.8726293774471361, iteration: 74605
loss: 1.0122531652450562,grad_norm: 0.9999989705695768, iteration: 74606
loss: 1.0643293857574463,grad_norm: 0.999999576614241, iteration: 74607
loss: 0.999848484992981,grad_norm: 0.999999044393518, iteration: 74608
loss: 0.9876716136932373,grad_norm: 0.9999991970873401, iteration: 74609
loss: 1.0075534582138062,grad_norm: 0.9999990494176871, iteration: 74610
loss: 1.0110371112823486,grad_norm: 0.9999990958809516, iteration: 74611
loss: 1.01935875415802,grad_norm: 0.8780424702752352, iteration: 74612
loss: 0.9953911304473877,grad_norm: 0.999998980188311, iteration: 74613
loss: 0.9951584339141846,grad_norm: 0.9999990986397729, iteration: 74614
loss: 1.0127911567687988,grad_norm: 0.9999992395481522, iteration: 74615
loss: 1.011073350906372,grad_norm: 0.9999990776977117, iteration: 74616
loss: 1.017891764640808,grad_norm: 0.9410526978808992, iteration: 74617
loss: 1.009827733039856,grad_norm: 0.9827885837980086, iteration: 74618
loss: 1.0047873258590698,grad_norm: 0.9887516818555376, iteration: 74619
loss: 0.9767987728118896,grad_norm: 0.9690039783945498, iteration: 74620
loss: 1.0101943016052246,grad_norm: 0.9999992204114929, iteration: 74621
loss: 0.9874563813209534,grad_norm: 0.9488412292617534, iteration: 74622
loss: 0.9909520149230957,grad_norm: 0.9752871361910909, iteration: 74623
loss: 1.0289360284805298,grad_norm: 0.9851450770324548, iteration: 74624
loss: 0.9425308704376221,grad_norm: 0.9694359698708828, iteration: 74625
loss: 0.9990764856338501,grad_norm: 0.9999989078002213, iteration: 74626
loss: 0.9970754384994507,grad_norm: 0.9999992523441626, iteration: 74627
loss: 1.0127006769180298,grad_norm: 0.9347467881406287, iteration: 74628
loss: 1.0077570676803589,grad_norm: 0.9999990633992094, iteration: 74629
loss: 1.0130012035369873,grad_norm: 0.9999991792619651, iteration: 74630
loss: 1.0032086372375488,grad_norm: 0.9999991586826347, iteration: 74631
loss: 0.9965077638626099,grad_norm: 0.9999990946510324, iteration: 74632
loss: 0.9586190581321716,grad_norm: 0.9999990746623894, iteration: 74633
loss: 1.0821294784545898,grad_norm: 0.9999991827732277, iteration: 74634
loss: 1.0117475986480713,grad_norm: 0.9933892366853898, iteration: 74635
loss: 0.9824004173278809,grad_norm: 0.999999250047158, iteration: 74636
loss: 0.9624842405319214,grad_norm: 0.9999991161301887, iteration: 74637
loss: 0.9792594909667969,grad_norm: 0.9999989723582706, iteration: 74638
loss: 0.9639642238616943,grad_norm: 0.9999991180851041, iteration: 74639
loss: 1.018998384475708,grad_norm: 0.8209659785322629, iteration: 74640
loss: 1.0634040832519531,grad_norm: 0.9999992584946822, iteration: 74641
loss: 1.0093411207199097,grad_norm: 0.9999991479043662, iteration: 74642
loss: 0.9914124011993408,grad_norm: 0.9679514103007162, iteration: 74643
loss: 1.0392447710037231,grad_norm: 0.9420166196249309, iteration: 74644
loss: 1.0207020044326782,grad_norm: 0.9999991919199671, iteration: 74645
loss: 0.9802411794662476,grad_norm: 0.9230494636135339, iteration: 74646
loss: 1.0127453804016113,grad_norm: 0.9999989309426908, iteration: 74647
loss: 1.0301512479782104,grad_norm: 0.8971295172455374, iteration: 74648
loss: 1.025800108909607,grad_norm: 0.9999990292802556, iteration: 74649
loss: 0.9991884231567383,grad_norm: 0.8242366214949302, iteration: 74650
loss: 0.9713900685310364,grad_norm: 0.9967750903550358, iteration: 74651
loss: 0.9821035265922546,grad_norm: 0.9755185546882024, iteration: 74652
loss: 0.9989284873008728,grad_norm: 0.9999991397925182, iteration: 74653
loss: 1.028701663017273,grad_norm: 0.9999991251898055, iteration: 74654
loss: 1.0476917028427124,grad_norm: 0.9999992688044748, iteration: 74655
loss: 0.9980365037918091,grad_norm: 0.9999990412310352, iteration: 74656
loss: 1.0080612897872925,grad_norm: 0.8948129404321621, iteration: 74657
loss: 1.0178083181381226,grad_norm: 0.9546076518288272, iteration: 74658
loss: 0.9905606508255005,grad_norm: 0.9451096124970995, iteration: 74659
loss: 1.0402263402938843,grad_norm: 0.9999992501019846, iteration: 74660
loss: 1.0061938762664795,grad_norm: 0.9999991440321738, iteration: 74661
loss: 0.9911853671073914,grad_norm: 0.9999992613871969, iteration: 74662
loss: 1.0071876049041748,grad_norm: 0.9999991097895679, iteration: 74663
loss: 1.0365238189697266,grad_norm: 0.9999992716936069, iteration: 74664
loss: 1.0255391597747803,grad_norm: 0.9999990609339896, iteration: 74665
loss: 0.9908209443092346,grad_norm: 0.9999991873941342, iteration: 74666
loss: 0.992001473903656,grad_norm: 0.999999144972251, iteration: 74667
loss: 0.9641686081886292,grad_norm: 0.8763681284913343, iteration: 74668
loss: 0.9658499956130981,grad_norm: 0.9335007643898261, iteration: 74669
loss: 1.0398114919662476,grad_norm: 0.9999992738732333, iteration: 74670
loss: 1.005332112312317,grad_norm: 0.9999990931503489, iteration: 74671
loss: 0.9804811477661133,grad_norm: 0.8219657329641559, iteration: 74672
loss: 0.9719630479812622,grad_norm: 0.9240203780356824, iteration: 74673
loss: 1.0116199254989624,grad_norm: 0.9999992322786333, iteration: 74674
loss: 1.0331529378890991,grad_norm: 0.9670448790672704, iteration: 74675
loss: 1.0013442039489746,grad_norm: 0.9993409558765844, iteration: 74676
loss: 0.9607291221618652,grad_norm: 0.9992708617810779, iteration: 74677
loss: 0.9894163012504578,grad_norm: 0.9999991266765405, iteration: 74678
loss: 0.9913745522499084,grad_norm: 0.9664401859409515, iteration: 74679
loss: 0.9718527793884277,grad_norm: 0.973509713951182, iteration: 74680
loss: 0.9822843074798584,grad_norm: 0.9883838615683183, iteration: 74681
loss: 0.9852896928787231,grad_norm: 0.9312518099283409, iteration: 74682
loss: 1.0400049686431885,grad_norm: 0.9297415086223091, iteration: 74683
loss: 1.0178837776184082,grad_norm: 0.9170488379066042, iteration: 74684
loss: 1.0240448713302612,grad_norm: 0.9034742928786171, iteration: 74685
loss: 1.0030015707015991,grad_norm: 0.9999992281846516, iteration: 74686
loss: 0.9969618916511536,grad_norm: 0.999999131424263, iteration: 74687
loss: 0.9957851767539978,grad_norm: 0.999998920838241, iteration: 74688
loss: 0.9977440237998962,grad_norm: 0.9999992249707402, iteration: 74689
loss: 0.9991467595100403,grad_norm: 0.9999990062007618, iteration: 74690
loss: 1.0067355632781982,grad_norm: 0.9999990507601385, iteration: 74691
loss: 0.9952107667922974,grad_norm: 0.9999989658789814, iteration: 74692
loss: 0.998121976852417,grad_norm: 0.9999990436905476, iteration: 74693
loss: 1.0352097749710083,grad_norm: 0.99999912648567, iteration: 74694
loss: 0.9562856554985046,grad_norm: 0.999999151876869, iteration: 74695
loss: 1.0021332502365112,grad_norm: 0.9999993308878187, iteration: 74696
loss: 0.9586681723594666,grad_norm: 0.9999990415674063, iteration: 74697
loss: 0.9952632188796997,grad_norm: 0.9999989887153942, iteration: 74698
loss: 0.9979044198989868,grad_norm: 0.9999991263464872, iteration: 74699
loss: 0.9969847798347473,grad_norm: 0.9999991031312996, iteration: 74700
loss: 1.016766905784607,grad_norm: 0.999999302868415, iteration: 74701
loss: 0.9910523295402527,grad_norm: 0.9999991340774221, iteration: 74702
loss: 1.0359711647033691,grad_norm: 0.9770725528841229, iteration: 74703
loss: 0.9235168695449829,grad_norm: 0.973556946616579, iteration: 74704
loss: 1.0170625448226929,grad_norm: 0.9999991055660967, iteration: 74705
loss: 1.0099940299987793,grad_norm: 0.9999992733676412, iteration: 74706
loss: 1.0198063850402832,grad_norm: 0.9999990767867889, iteration: 74707
loss: 1.0067293643951416,grad_norm: 0.9999990719923694, iteration: 74708
loss: 1.0159904956817627,grad_norm: 0.9999990520913035, iteration: 74709
loss: 1.026862382888794,grad_norm: 0.9999991835369652, iteration: 74710
loss: 1.0006840229034424,grad_norm: 0.9999992050777939, iteration: 74711
loss: 0.9955635070800781,grad_norm: 0.9999991503311872, iteration: 74712
loss: 1.0174133777618408,grad_norm: 0.8810560561813936, iteration: 74713
loss: 0.9897003769874573,grad_norm: 0.8479083379449305, iteration: 74714
loss: 1.0014115571975708,grad_norm: 0.9225980022973658, iteration: 74715
loss: 0.9916516542434692,grad_norm: 0.9999991365811038, iteration: 74716
loss: 1.0245840549468994,grad_norm: 0.9999989822836602, iteration: 74717
loss: 1.0341743230819702,grad_norm: 0.9999989843099802, iteration: 74718
loss: 0.980326235294342,grad_norm: 0.9999991018995934, iteration: 74719
loss: 1.0340262651443481,grad_norm: 0.9450550526779863, iteration: 74720
loss: 1.0092891454696655,grad_norm: 0.9804895742281977, iteration: 74721
loss: 0.9958930015563965,grad_norm: 0.9434932272369833, iteration: 74722
loss: 0.9879944920539856,grad_norm: 0.9999991134140341, iteration: 74723
loss: 0.9897339344024658,grad_norm: 0.999999068544458, iteration: 74724
loss: 1.0291461944580078,grad_norm: 0.9999989934380319, iteration: 74725
loss: 1.017647385597229,grad_norm: 0.9791272101219978, iteration: 74726
loss: 0.9990121722221375,grad_norm: 0.9675766623763995, iteration: 74727
loss: 0.9788429141044617,grad_norm: 0.9353086128769714, iteration: 74728
loss: 1.0262420177459717,grad_norm: 0.9999993088078298, iteration: 74729
loss: 1.0098639726638794,grad_norm: 0.9999990811487557, iteration: 74730
loss: 1.0145206451416016,grad_norm: 0.9999990694630853, iteration: 74731
loss: 1.0497909784317017,grad_norm: 0.9999991951404046, iteration: 74732
loss: 1.012320876121521,grad_norm: 0.9999991402162767, iteration: 74733
loss: 1.0075597763061523,grad_norm: 0.9999991346720354, iteration: 74734
loss: 1.0373015403747559,grad_norm: 0.9999990536610649, iteration: 74735
loss: 0.9882221817970276,grad_norm: 0.9999990513030601, iteration: 74736
loss: 1.0230560302734375,grad_norm: 0.9781853768595495, iteration: 74737
loss: 0.9995858669281006,grad_norm: 0.9999991011681529, iteration: 74738
loss: 1.0141596794128418,grad_norm: 0.9324928143801543, iteration: 74739
loss: 0.985607385635376,grad_norm: 0.9999991670329044, iteration: 74740
loss: 0.9948846697807312,grad_norm: 0.9999990158293084, iteration: 74741
loss: 1.001701831817627,grad_norm: 0.8714773161064535, iteration: 74742
loss: 0.9545531868934631,grad_norm: 0.9626093322028575, iteration: 74743
loss: 0.9727233648300171,grad_norm: 0.994614214257795, iteration: 74744
loss: 0.9766153693199158,grad_norm: 0.9999989956867453, iteration: 74745
loss: 1.0533066987991333,grad_norm: 0.9999992105931295, iteration: 74746
loss: 0.9905977249145508,grad_norm: 0.9425438342356379, iteration: 74747
loss: 1.0349762439727783,grad_norm: 0.9999990675096241, iteration: 74748
loss: 1.0124092102050781,grad_norm: 0.9706000030882916, iteration: 74749
loss: 1.0189896821975708,grad_norm: 0.9654693934305798, iteration: 74750
loss: 1.0783894062042236,grad_norm: 0.9999998861136808, iteration: 74751
loss: 0.9846746921539307,grad_norm: 0.99999914403943, iteration: 74752
loss: 1.1079490184783936,grad_norm: 0.9999991651825253, iteration: 74753
loss: 1.001961350440979,grad_norm: 0.9999991272324978, iteration: 74754
loss: 1.015799641609192,grad_norm: 0.9999991274723536, iteration: 74755
loss: 0.9883803725242615,grad_norm: 0.9999992283386441, iteration: 74756
loss: 1.0021624565124512,grad_norm: 0.9502876138336137, iteration: 74757
loss: 1.0072813034057617,grad_norm: 0.9999992304325056, iteration: 74758
loss: 1.01736581325531,grad_norm: 0.9999992425759252, iteration: 74759
loss: 1.0202569961547852,grad_norm: 0.9824381609229749, iteration: 74760
loss: 0.9949070811271667,grad_norm: 0.9999991889591924, iteration: 74761
loss: 1.0098530054092407,grad_norm: 0.9999992314868928, iteration: 74762
loss: 1.0041861534118652,grad_norm: 0.9999996180011701, iteration: 74763
loss: 0.968477189540863,grad_norm: 0.999999349547805, iteration: 74764
loss: 1.017606258392334,grad_norm: 0.9999991601730511, iteration: 74765
loss: 1.0489555597305298,grad_norm: 0.9367580297631177, iteration: 74766
loss: 1.008338451385498,grad_norm: 0.9999991693849103, iteration: 74767
loss: 1.006184697151184,grad_norm: 0.9999989564601078, iteration: 74768
loss: 0.9748839139938354,grad_norm: 0.9999992049441346, iteration: 74769
loss: 1.04265558719635,grad_norm: 0.9999989898765502, iteration: 74770
loss: 1.00740385055542,grad_norm: 0.9131750733858092, iteration: 74771
loss: 0.9969605803489685,grad_norm: 0.9999990642362888, iteration: 74772
loss: 0.9780149459838867,grad_norm: 0.9864834932738099, iteration: 74773
loss: 1.0063503980636597,grad_norm: 0.836317369524056, iteration: 74774
loss: 0.9590140581130981,grad_norm: 0.9999991711249611, iteration: 74775
loss: 1.003250241279602,grad_norm: 0.9483336895848208, iteration: 74776
loss: 0.9911227822303772,grad_norm: 0.9999990939166438, iteration: 74777
loss: 1.0295383930206299,grad_norm: 0.9999991558155025, iteration: 74778
loss: 0.9712083339691162,grad_norm: 0.9999992735581564, iteration: 74779
loss: 1.0128357410430908,grad_norm: 0.9839616102581447, iteration: 74780
loss: 0.9818838238716125,grad_norm: 0.9999990716491984, iteration: 74781
loss: 0.9722607731819153,grad_norm: 0.9999989732238643, iteration: 74782
loss: 1.0290167331695557,grad_norm: 0.9999992867480948, iteration: 74783
loss: 0.9912481307983398,grad_norm: 0.9999991706370522, iteration: 74784
loss: 0.9899513721466064,grad_norm: 0.9879422517832402, iteration: 74785
loss: 1.034419059753418,grad_norm: 0.9999992243666139, iteration: 74786
loss: 0.9818180203437805,grad_norm: 0.9999992965793194, iteration: 74787
loss: 0.9952265024185181,grad_norm: 0.9999989989931958, iteration: 74788
loss: 1.0044397115707397,grad_norm: 0.9999992761519721, iteration: 74789
loss: 1.036561131477356,grad_norm: 0.9999993062863184, iteration: 74790
loss: 0.9797630310058594,grad_norm: 0.9329502872960745, iteration: 74791
loss: 0.9744664430618286,grad_norm: 0.9999991820904063, iteration: 74792
loss: 0.9894337058067322,grad_norm: 0.9999990745488081, iteration: 74793
loss: 0.981049120426178,grad_norm: 0.9999990900119902, iteration: 74794
loss: 0.992424726486206,grad_norm: 0.9999992109478546, iteration: 74795
loss: 0.9990553855895996,grad_norm: 0.9999991976729974, iteration: 74796
loss: 0.98987877368927,grad_norm: 0.9999989742097674, iteration: 74797
loss: 0.9784262776374817,grad_norm: 0.9999990909785708, iteration: 74798
loss: 1.0149569511413574,grad_norm: 0.9999990753890297, iteration: 74799
loss: 0.9685928225517273,grad_norm: 0.9867537469084364, iteration: 74800
loss: 0.96851646900177,grad_norm: 0.9999990462877365, iteration: 74801
loss: 1.001656413078308,grad_norm: 0.9999990722568782, iteration: 74802
loss: 1.0055221319198608,grad_norm: 0.9447988000821893, iteration: 74803
loss: 0.9714226126670837,grad_norm: 0.9999989841388256, iteration: 74804
loss: 0.997075080871582,grad_norm: 0.9999989813058593, iteration: 74805
loss: 0.9750111699104309,grad_norm: 0.9999991469087811, iteration: 74806
loss: 0.9971643686294556,grad_norm: 0.8454312419127219, iteration: 74807
loss: 0.9948318600654602,grad_norm: 0.8956145595241518, iteration: 74808
loss: 0.9980165362358093,grad_norm: 0.9999990859759356, iteration: 74809
loss: 1.0217381715774536,grad_norm: 0.8839252264269272, iteration: 74810
loss: 0.9751912951469421,grad_norm: 0.971323949923142, iteration: 74811
loss: 0.9750490784645081,grad_norm: 0.9789524865085403, iteration: 74812
loss: 1.0178269147872925,grad_norm: 0.9999991432347862, iteration: 74813
loss: 1.0935859680175781,grad_norm: 0.9999995551739832, iteration: 74814
loss: 1.0191653966903687,grad_norm: 0.99999924519091, iteration: 74815
loss: 0.987637460231781,grad_norm: 0.9746842105381455, iteration: 74816
loss: 1.0357582569122314,grad_norm: 0.9999990476172953, iteration: 74817
loss: 0.9896957278251648,grad_norm: 0.886239419932073, iteration: 74818
loss: 0.9942097067832947,grad_norm: 0.9999991087196978, iteration: 74819
loss: 0.9790019392967224,grad_norm: 0.9806031157148529, iteration: 74820
loss: 0.9837029576301575,grad_norm: 0.9999993352125252, iteration: 74821
loss: 0.9864237904548645,grad_norm: 0.9532167464677539, iteration: 74822
loss: 1.0217688083648682,grad_norm: 0.8435880030730013, iteration: 74823
loss: 1.0246983766555786,grad_norm: 0.9999991468879887, iteration: 74824
loss: 1.0259236097335815,grad_norm: 0.9999990580172295, iteration: 74825
loss: 0.9918839335441589,grad_norm: 0.9999992065171426, iteration: 74826
loss: 1.0142608880996704,grad_norm: 0.9999991333354659, iteration: 74827
loss: 0.9788791537284851,grad_norm: 0.9999991552755396, iteration: 74828
loss: 1.029831886291504,grad_norm: 0.999999284228043, iteration: 74829
loss: 0.9799542427062988,grad_norm: 0.9999992317197514, iteration: 74830
loss: 0.9810920357704163,grad_norm: 0.9999991255751964, iteration: 74831
loss: 1.0066173076629639,grad_norm: 0.9999991103801266, iteration: 74832
loss: 1.0062659978866577,grad_norm: 0.9999991657278382, iteration: 74833
loss: 1.0511189699172974,grad_norm: 0.9506129328271735, iteration: 74834
loss: 1.0122172832489014,grad_norm: 0.9999991428428621, iteration: 74835
loss: 1.0118343830108643,grad_norm: 0.999999292473012, iteration: 74836
loss: 1.0164884328842163,grad_norm: 0.8885184227094828, iteration: 74837
loss: 0.9851629734039307,grad_norm: 0.9999991898271867, iteration: 74838
loss: 1.0709251165390015,grad_norm: 0.9999995151167769, iteration: 74839
loss: 0.9990161061286926,grad_norm: 0.9380073267965852, iteration: 74840
loss: 1.0314500331878662,grad_norm: 0.9999991172544447, iteration: 74841
loss: 1.004470705986023,grad_norm: 0.9999991898555577, iteration: 74842
loss: 0.9629155397415161,grad_norm: 0.9999992448103637, iteration: 74843
loss: 1.0228222608566284,grad_norm: 0.9420436469929121, iteration: 74844
loss: 1.0115382671356201,grad_norm: 0.9999992060034294, iteration: 74845
loss: 1.016594409942627,grad_norm: 0.9999989739922351, iteration: 74846
loss: 1.0412654876708984,grad_norm: 0.9999991834510518, iteration: 74847
loss: 1.0089964866638184,grad_norm: 0.999999291733363, iteration: 74848
loss: 0.9799366593360901,grad_norm: 0.9999991402755379, iteration: 74849
loss: 0.9985529780387878,grad_norm: 0.9999990645901418, iteration: 74850
loss: 1.0233064889907837,grad_norm: 0.9999993724239138, iteration: 74851
loss: 0.9906173348426819,grad_norm: 0.9999991185055248, iteration: 74852
loss: 1.021437644958496,grad_norm: 0.9999992841300832, iteration: 74853
loss: 0.9936479926109314,grad_norm: 0.9999991322494193, iteration: 74854
loss: 1.0275338888168335,grad_norm: 0.9999991992130817, iteration: 74855
loss: 1.0191221237182617,grad_norm: 0.9604614307157281, iteration: 74856
loss: 0.9992730617523193,grad_norm: 0.9904267599394707, iteration: 74857
loss: 1.0101178884506226,grad_norm: 0.999999140397334, iteration: 74858
loss: 1.0253400802612305,grad_norm: 0.9999994081732212, iteration: 74859
loss: 1.0446621179580688,grad_norm: 0.999999011035514, iteration: 74860
loss: 1.004083514213562,grad_norm: 0.9621383168221178, iteration: 74861
loss: 1.007143259048462,grad_norm: 0.9999991090697963, iteration: 74862
loss: 1.0194369554519653,grad_norm: 0.9999995808174651, iteration: 74863
loss: 1.0018119812011719,grad_norm: 0.9524150365548834, iteration: 74864
loss: 0.9719204902648926,grad_norm: 0.9413070539758729, iteration: 74865
loss: 0.9442125558853149,grad_norm: 0.9999992536471345, iteration: 74866
loss: 1.0529415607452393,grad_norm: 0.9999992359897324, iteration: 74867
loss: 0.9812997579574585,grad_norm: 0.8262719091354307, iteration: 74868
loss: 1.020385503768921,grad_norm: 0.9999995076053063, iteration: 74869
loss: 1.019034504890442,grad_norm: 0.9956161040252031, iteration: 74870
loss: 1.0302995443344116,grad_norm: 0.9999993312447752, iteration: 74871
loss: 0.9838109612464905,grad_norm: 0.9999993725728555, iteration: 74872
loss: 0.9843667149543762,grad_norm: 0.9804912546209531, iteration: 74873
loss: 0.9980124235153198,grad_norm: 0.999999178009962, iteration: 74874
loss: 0.9523316621780396,grad_norm: 0.9999991568789114, iteration: 74875
loss: 0.9970331192016602,grad_norm: 0.9978299652030038, iteration: 74876
loss: 1.0333858728408813,grad_norm: 0.9999989968129948, iteration: 74877
loss: 1.013096809387207,grad_norm: 0.9999991196765962, iteration: 74878
loss: 0.974373459815979,grad_norm: 0.9999993102832934, iteration: 74879
loss: 0.9867307543754578,grad_norm: 0.9114157944543533, iteration: 74880
loss: 0.9732505679130554,grad_norm: 0.9949675003267867, iteration: 74881
loss: 1.0089560747146606,grad_norm: 0.9999989410388032, iteration: 74882
loss: 1.046350359916687,grad_norm: 0.9999992437375638, iteration: 74883
loss: 1.0331214666366577,grad_norm: 0.9999991621445758, iteration: 74884
loss: 1.0314284563064575,grad_norm: 0.999999133036686, iteration: 74885
loss: 1.039523959159851,grad_norm: 0.9999993127804652, iteration: 74886
loss: 1.0142333507537842,grad_norm: 0.9999997099865373, iteration: 74887
loss: 1.073272466659546,grad_norm: 0.9999994129441128, iteration: 74888
loss: 1.0170347690582275,grad_norm: 0.9999993411931725, iteration: 74889
loss: 0.9950680136680603,grad_norm: 0.9656249825296651, iteration: 74890
loss: 1.0117535591125488,grad_norm: 0.9999991110351079, iteration: 74891
loss: 1.0355288982391357,grad_norm: 0.9999992457260165, iteration: 74892
loss: 1.0053352117538452,grad_norm: 0.9999990728822447, iteration: 74893
loss: 1.0118032693862915,grad_norm: 0.999999101865464, iteration: 74894
loss: 0.9855053424835205,grad_norm: 0.9869471036897761, iteration: 74895
loss: 1.0081965923309326,grad_norm: 0.970601378532179, iteration: 74896
loss: 0.9853308796882629,grad_norm: 0.9999991013654178, iteration: 74897
loss: 1.034401297569275,grad_norm: 0.999999086124624, iteration: 74898
loss: 1.0042548179626465,grad_norm: 0.9326962510783467, iteration: 74899
loss: 1.008342981338501,grad_norm: 0.9390434038955731, iteration: 74900
loss: 1.0523929595947266,grad_norm: 0.9999997068506142, iteration: 74901
loss: 1.0068947076797485,grad_norm: 0.9999994080826234, iteration: 74902
loss: 1.0070725679397583,grad_norm: 0.9999989488799639, iteration: 74903
loss: 0.9964990019798279,grad_norm: 0.9999990640926152, iteration: 74904
loss: 0.9707239270210266,grad_norm: 0.7869331718583117, iteration: 74905
loss: 0.9897445440292358,grad_norm: 0.999999107229308, iteration: 74906
loss: 1.016340970993042,grad_norm: 0.9999991754535559, iteration: 74907
loss: 0.9759265780448914,grad_norm: 0.9999992375965912, iteration: 74908
loss: 0.9988396763801575,grad_norm: 0.9999989839665567, iteration: 74909
loss: 0.981707751750946,grad_norm: 0.9999990877106799, iteration: 74910
loss: 0.9985304474830627,grad_norm: 0.9999990699357257, iteration: 74911
loss: 1.0115635395050049,grad_norm: 0.9654319921370487, iteration: 74912
loss: 1.0063458681106567,grad_norm: 0.9999990217237307, iteration: 74913
loss: 0.9882138967514038,grad_norm: 0.979543733727498, iteration: 74914
loss: 1.0146780014038086,grad_norm: 0.9999992810909857, iteration: 74915
loss: 0.9739915728569031,grad_norm: 0.8414266033080291, iteration: 74916
loss: 1.0118658542633057,grad_norm: 0.9999991763626936, iteration: 74917
loss: 1.0040345191955566,grad_norm: 0.9809214650036817, iteration: 74918
loss: 0.9569588303565979,grad_norm: 0.9999991845335264, iteration: 74919
loss: 0.9731089472770691,grad_norm: 0.9999992156300982, iteration: 74920
loss: 0.9678791165351868,grad_norm: 0.9912954238096614, iteration: 74921
loss: 0.9832103252410889,grad_norm: 0.9495501997051488, iteration: 74922
loss: 1.0190171003341675,grad_norm: 0.9999991165795321, iteration: 74923
loss: 1.019452452659607,grad_norm: 0.9516383944264739, iteration: 74924
loss: 0.981452465057373,grad_norm: 0.984516839184624, iteration: 74925
loss: 1.0030850172042847,grad_norm: 0.999999155379475, iteration: 74926
loss: 1.0233415365219116,grad_norm: 0.99999916088892, iteration: 74927
loss: 1.153403401374817,grad_norm: 0.9999997652035153, iteration: 74928
loss: 0.9931023716926575,grad_norm: 0.9999991648445944, iteration: 74929
loss: 1.0155329704284668,grad_norm: 0.901206933699343, iteration: 74930
loss: 1.0018435716629028,grad_norm: 0.999999139064849, iteration: 74931
loss: 0.9658531546592712,grad_norm: 0.9999991940018756, iteration: 74932
loss: 0.9688759446144104,grad_norm: 0.9999992154435159, iteration: 74933
loss: 1.0021462440490723,grad_norm: 0.9710153446343923, iteration: 74934
loss: 1.0156948566436768,grad_norm: 0.9999993310152491, iteration: 74935
loss: 1.0132702589035034,grad_norm: 0.9999991051507626, iteration: 74936
loss: 1.0572654008865356,grad_norm: 0.9999992685196383, iteration: 74937
loss: 0.9904876351356506,grad_norm: 0.8706188282638306, iteration: 74938
loss: 1.0130864381790161,grad_norm: 0.9999990461840563, iteration: 74939
loss: 0.9837743639945984,grad_norm: 0.9999991994775442, iteration: 74940
loss: 1.0096540451049805,grad_norm: 0.9999991313480054, iteration: 74941
loss: 0.9924154877662659,grad_norm: 0.9999991290906338, iteration: 74942
loss: 1.015662431716919,grad_norm: 0.9999990278301641, iteration: 74943
loss: 0.9506357312202454,grad_norm: 0.9184149156631767, iteration: 74944
loss: 1.0105259418487549,grad_norm: 0.9999995291234642, iteration: 74945
loss: 0.97830730676651,grad_norm: 0.9973420387624669, iteration: 74946
loss: 0.9870544672012329,grad_norm: 0.9460322231171494, iteration: 74947
loss: 0.9967637062072754,grad_norm: 0.999999200955732, iteration: 74948
loss: 0.9336971640586853,grad_norm: 0.9999989656299542, iteration: 74949
loss: 0.9914053082466125,grad_norm: 0.9999990696341525, iteration: 74950
loss: 1.0021225214004517,grad_norm: 0.9999991276615546, iteration: 74951
loss: 0.9768964052200317,grad_norm: 0.9999991440832268, iteration: 74952
loss: 0.9670253992080688,grad_norm: 0.9999990396565444, iteration: 74953
loss: 1.0106384754180908,grad_norm: 0.9999997879320852, iteration: 74954
loss: 0.984785795211792,grad_norm: 0.9999996224046317, iteration: 74955
loss: 1.044640064239502,grad_norm: 0.9696717573065881, iteration: 74956
loss: 1.011759638786316,grad_norm: 0.9999992145934111, iteration: 74957
loss: 1.0039489269256592,grad_norm: 0.9999990594717769, iteration: 74958
loss: 0.9892842769622803,grad_norm: 0.9999989840631842, iteration: 74959
loss: 0.9904536008834839,grad_norm: 0.9999990956947694, iteration: 74960
loss: 0.9784870743751526,grad_norm: 0.999999110070899, iteration: 74961
loss: 1.026593565940857,grad_norm: 0.9999989764368732, iteration: 74962
loss: 1.016547441482544,grad_norm: 0.8750587846410198, iteration: 74963
loss: 1.0050699710845947,grad_norm: 0.9999995513346677, iteration: 74964
loss: 0.9622412919998169,grad_norm: 0.9999991048823635, iteration: 74965
loss: 1.0069493055343628,grad_norm: 0.8886599987531181, iteration: 74966
loss: 0.9587582349777222,grad_norm: 0.9999993139946851, iteration: 74967
loss: 1.013936996459961,grad_norm: 0.9656335654885471, iteration: 74968
loss: 0.9810640215873718,grad_norm: 0.8188139323429013, iteration: 74969
loss: 0.9884412884712219,grad_norm: 0.9999992058987395, iteration: 74970
loss: 0.9925380945205688,grad_norm: 0.9999992004448487, iteration: 74971
loss: 1.0316531658172607,grad_norm: 0.9999991554097338, iteration: 74972
loss: 1.0339125394821167,grad_norm: 0.999999058674835, iteration: 74973
loss: 1.0327037572860718,grad_norm: 0.9999990297839318, iteration: 74974
loss: 1.0372838973999023,grad_norm: 0.9999991923356175, iteration: 74975
loss: 0.9764381647109985,grad_norm: 0.9188982940697308, iteration: 74976
loss: 1.0442216396331787,grad_norm: 0.9999991576859791, iteration: 74977
loss: 0.9640702605247498,grad_norm: 0.9999990791343226, iteration: 74978
loss: 1.0026586055755615,grad_norm: 0.9669415406813976, iteration: 74979
loss: 1.0248490571975708,grad_norm: 0.8791274274145737, iteration: 74980
loss: 0.9972926378250122,grad_norm: 0.9482501647704679, iteration: 74981
loss: 0.9992506504058838,grad_norm: 0.8396914768508432, iteration: 74982
loss: 0.9936919808387756,grad_norm: 0.9999992131508431, iteration: 74983
loss: 0.9521436095237732,grad_norm: 0.9999992057463419, iteration: 74984
loss: 1.0071232318878174,grad_norm: 0.9999990478591444, iteration: 74985
loss: 0.9859550595283508,grad_norm: 0.9676887103546188, iteration: 74986
loss: 0.9962173104286194,grad_norm: 0.9999990530683086, iteration: 74987
loss: 1.0064667463302612,grad_norm: 0.9999991307373407, iteration: 74988
loss: 1.0166261196136475,grad_norm: 0.999999166419155, iteration: 74989
loss: 1.0218185186386108,grad_norm: 0.9999990666800306, iteration: 74990
loss: 0.9621782898902893,grad_norm: 0.9999990933712694, iteration: 74991
loss: 0.9961085319519043,grad_norm: 0.9999990635094232, iteration: 74992
loss: 0.964714765548706,grad_norm: 0.9616178414463932, iteration: 74993
loss: 0.9998723864555359,grad_norm: 0.9999992746854451, iteration: 74994
loss: 1.008664846420288,grad_norm: 0.9999992725490272, iteration: 74995
loss: 1.014521598815918,grad_norm: 0.9999991496235601, iteration: 74996
loss: 1.0350592136383057,grad_norm: 0.968185327489587, iteration: 74997
loss: 1.000092625617981,grad_norm: 0.9282726612011195, iteration: 74998
loss: 1.0186097621917725,grad_norm: 0.9999990829608111, iteration: 74999
loss: 1.0176767110824585,grad_norm: 0.9999990489731581, iteration: 75000
loss: 1.0035631656646729,grad_norm: 0.9728396693308097, iteration: 75001
loss: 1.0278255939483643,grad_norm: 0.999999695947016, iteration: 75002
loss: 0.9929823875427246,grad_norm: 0.99999904178574, iteration: 75003
loss: 1.0143134593963623,grad_norm: 0.9006230901556223, iteration: 75004
loss: 0.9988786578178406,grad_norm: 0.9999989507281404, iteration: 75005
loss: 0.9864137768745422,grad_norm: 0.9999991115379101, iteration: 75006
loss: 1.0046472549438477,grad_norm: 0.9999990700490032, iteration: 75007
loss: 0.9981892108917236,grad_norm: 0.999999075598209, iteration: 75008
loss: 1.0162618160247803,grad_norm: 0.9999990229691744, iteration: 75009
loss: 1.0402814149856567,grad_norm: 0.9999991731138836, iteration: 75010
loss: 0.9814957976341248,grad_norm: 0.8996072508845614, iteration: 75011
loss: 1.00620698928833,grad_norm: 0.954709618020455, iteration: 75012
loss: 0.9968689680099487,grad_norm: 0.8699866053678664, iteration: 75013
loss: 0.9888006448745728,grad_norm: 0.9986153054388455, iteration: 75014
loss: 0.9946933388710022,grad_norm: 0.9999995780795055, iteration: 75015
loss: 1.0123987197875977,grad_norm: 0.9245309796346495, iteration: 75016
loss: 0.9860435724258423,grad_norm: 0.9554992509078986, iteration: 75017
loss: 0.9946975111961365,grad_norm: 0.9999990415746135, iteration: 75018
loss: 1.041100263595581,grad_norm: 0.9999992287446444, iteration: 75019
loss: 0.9893974661827087,grad_norm: 0.796556493517963, iteration: 75020
loss: 1.1124366521835327,grad_norm: 0.9999998109088168, iteration: 75021
loss: 0.9898020625114441,grad_norm: 0.9999991442974365, iteration: 75022
loss: 1.000406265258789,grad_norm: 0.9547492150852678, iteration: 75023
loss: 0.9380309581756592,grad_norm: 0.9999992591167496, iteration: 75024
loss: 0.9686424136161804,grad_norm: 0.9999989791328112, iteration: 75025
loss: 1.0199614763259888,grad_norm: 0.9652329808430337, iteration: 75026
loss: 0.9871052503585815,grad_norm: 0.9592044625117607, iteration: 75027
loss: 1.031398892402649,grad_norm: 0.999999077716588, iteration: 75028
loss: 0.9823123216629028,grad_norm: 0.8877976593371745, iteration: 75029
loss: 1.0249061584472656,grad_norm: 0.9393104146472037, iteration: 75030
loss: 0.9999480247497559,grad_norm: 0.7851230993282633, iteration: 75031
loss: 1.0228632688522339,grad_norm: 0.999999096341004, iteration: 75032
loss: 0.9935588836669922,grad_norm: 0.9935539877260892, iteration: 75033
loss: 0.9756724834442139,grad_norm: 0.9999993215858854, iteration: 75034
loss: 0.9591380953788757,grad_norm: 0.9999990757244058, iteration: 75035
loss: 1.0133355855941772,grad_norm: 0.9999991142322857, iteration: 75036
loss: 0.9844430685043335,grad_norm: 0.9999992569192301, iteration: 75037
loss: 0.979448676109314,grad_norm: 0.9999991345670071, iteration: 75038
loss: 1.0325171947479248,grad_norm: 0.9999989113861193, iteration: 75039
loss: 0.9667444229125977,grad_norm: 0.8682505732542427, iteration: 75040
loss: 0.9928699135780334,grad_norm: 0.9999991168651435, iteration: 75041
loss: 1.0132173299789429,grad_norm: 0.9999992747949493, iteration: 75042
loss: 0.9786030054092407,grad_norm: 0.9999991108251799, iteration: 75043
loss: 1.0207834243774414,grad_norm: 0.9999992206933477, iteration: 75044
loss: 1.0152767896652222,grad_norm: 0.8461830384280712, iteration: 75045
loss: 0.9803248047828674,grad_norm: 0.9999990677756363, iteration: 75046
loss: 1.0011190176010132,grad_norm: 0.9999993188498311, iteration: 75047
loss: 1.033988356590271,grad_norm: 0.9999996729296201, iteration: 75048
loss: 1.0012236833572388,grad_norm: 0.9999992563669949, iteration: 75049
loss: 1.0082329511642456,grad_norm: 0.9999991833996328, iteration: 75050
loss: 0.9690636992454529,grad_norm: 0.9999990265598339, iteration: 75051
loss: 0.9798963665962219,grad_norm: 0.9999989791532122, iteration: 75052
loss: 1.015393853187561,grad_norm: 0.9999991441406021, iteration: 75053
loss: 0.9564412832260132,grad_norm: 0.9999991416270909, iteration: 75054
loss: 1.0056747198104858,grad_norm: 0.9999998511803898, iteration: 75055
loss: 0.9542000889778137,grad_norm: 0.9999998291325037, iteration: 75056
loss: 0.9987601637840271,grad_norm: 0.9573621151123602, iteration: 75057
loss: 0.9724316000938416,grad_norm: 0.9171137774572742, iteration: 75058
loss: 1.0085057020187378,grad_norm: 0.9651100664049582, iteration: 75059
loss: 0.9859110713005066,grad_norm: 0.9824028658728954, iteration: 75060
loss: 1.0009177923202515,grad_norm: 0.8764867856569444, iteration: 75061
loss: 0.9802026152610779,grad_norm: 0.9999991683317798, iteration: 75062
loss: 0.9883636832237244,grad_norm: 0.9999990680964762, iteration: 75063
loss: 1.0069942474365234,grad_norm: 0.999999266075175, iteration: 75064
loss: 0.9784603714942932,grad_norm: 0.896661220805193, iteration: 75065
loss: 0.9830085039138794,grad_norm: 0.9999991802190918, iteration: 75066
loss: 0.9846834540367126,grad_norm: 0.9999991247034553, iteration: 75067
loss: 1.0415908098220825,grad_norm: 0.9999990144547618, iteration: 75068
loss: 0.9922031164169312,grad_norm: 0.8884341056686421, iteration: 75069
loss: 0.9928596019744873,grad_norm: 0.9999995924654284, iteration: 75070
loss: 1.0182889699935913,grad_norm: 0.9999990882816632, iteration: 75071
loss: 0.9920850992202759,grad_norm: 0.9479746175998883, iteration: 75072
loss: 0.9699217081069946,grad_norm: 0.9923006775686914, iteration: 75073
loss: 0.989251434803009,grad_norm: 0.9999990120838689, iteration: 75074
loss: 0.9868658185005188,grad_norm: 0.9999993101394633, iteration: 75075
loss: 1.0196231603622437,grad_norm: 0.9331173512946701, iteration: 75076
loss: 0.9932456612586975,grad_norm: 0.9738184319811292, iteration: 75077
loss: 1.0083657503128052,grad_norm: 0.9999996092193313, iteration: 75078
loss: 1.0125983953475952,grad_norm: 0.8992873977162597, iteration: 75079
loss: 0.9858492612838745,grad_norm: 0.9999992233526168, iteration: 75080
loss: 1.0198957920074463,grad_norm: 0.999999245121402, iteration: 75081
loss: 1.0393096208572388,grad_norm: 0.9999991931306639, iteration: 75082
loss: 1.0242290496826172,grad_norm: 0.9999991695365515, iteration: 75083
loss: 0.9737662672996521,grad_norm: 0.9999991987676635, iteration: 75084
loss: 1.0088403224945068,grad_norm: 0.9999992633856645, iteration: 75085
loss: 1.047471523284912,grad_norm: 0.9999995769310539, iteration: 75086
loss: 0.9938327670097351,grad_norm: 0.9999991362245902, iteration: 75087
loss: 0.9892706274986267,grad_norm: 0.9259144637001924, iteration: 75088
loss: 0.9927175641059875,grad_norm: 0.9999991939743093, iteration: 75089
loss: 1.040050745010376,grad_norm: 0.9999992303319608, iteration: 75090
loss: 0.981396496295929,grad_norm: 0.9999992371553811, iteration: 75091
loss: 0.9817768335342407,grad_norm: 0.9999990690912679, iteration: 75092
loss: 0.977993905544281,grad_norm: 0.9999992365736808, iteration: 75093
loss: 0.9769436120986938,grad_norm: 0.999999223198552, iteration: 75094
loss: 0.9962491989135742,grad_norm: 0.9999990672353184, iteration: 75095
loss: 1.041916847229004,grad_norm: 0.9999992611511956, iteration: 75096
loss: 0.9728379249572754,grad_norm: 0.8804390708345903, iteration: 75097
loss: 0.9816446304321289,grad_norm: 0.9999989629929801, iteration: 75098
loss: 1.017838478088379,grad_norm: 0.9579925461045762, iteration: 75099
loss: 1.0141630172729492,grad_norm: 0.9999993382276763, iteration: 75100
loss: 1.0074454545974731,grad_norm: 0.9392755692967742, iteration: 75101
loss: 1.0095185041427612,grad_norm: 0.9999990104519147, iteration: 75102
loss: 1.008745789527893,grad_norm: 0.9999991518451061, iteration: 75103
loss: 1.0202594995498657,grad_norm: 0.9999992295378207, iteration: 75104
loss: 1.0267080068588257,grad_norm: 0.9999990222109897, iteration: 75105
loss: 0.9899383783340454,grad_norm: 0.9882589020151271, iteration: 75106
loss: 0.994970440864563,grad_norm: 0.9999991788643435, iteration: 75107
loss: 0.9691567420959473,grad_norm: 0.9999989995475081, iteration: 75108
loss: 1.0178277492523193,grad_norm: 0.890983762654197, iteration: 75109
loss: 0.9888115525245667,grad_norm: 0.9999992378524402, iteration: 75110
loss: 1.0025179386138916,grad_norm: 0.9999992847912985, iteration: 75111
loss: 1.0441725254058838,grad_norm: 0.999999088561138, iteration: 75112
loss: 1.039682388305664,grad_norm: 0.9999992586514282, iteration: 75113
loss: 0.9841075539588928,grad_norm: 0.9999990954187402, iteration: 75114
loss: 1.0022331476211548,grad_norm: 0.999999184121542, iteration: 75115
loss: 0.9840500354766846,grad_norm: 0.9999991050079113, iteration: 75116
loss: 0.9833546280860901,grad_norm: 0.9999990795649609, iteration: 75117
loss: 1.0337574481964111,grad_norm: 0.9999990800725091, iteration: 75118
loss: 0.9710697531700134,grad_norm: 0.999999088114969, iteration: 75119
loss: 1.0270743370056152,grad_norm: 0.9999998194616733, iteration: 75120
loss: 1.011296033859253,grad_norm: 0.9848440495047764, iteration: 75121
loss: 1.0036684274673462,grad_norm: 0.9999989749023451, iteration: 75122
loss: 1.0294185876846313,grad_norm: 0.9999993220421844, iteration: 75123
loss: 0.9560808539390564,grad_norm: 0.9999991182741234, iteration: 75124
loss: 1.0386905670166016,grad_norm: 0.9999993240449453, iteration: 75125
loss: 0.96825110912323,grad_norm: 0.9999992346997041, iteration: 75126
loss: 1.027183175086975,grad_norm: 0.9999991353955248, iteration: 75127
loss: 1.024206519126892,grad_norm: 0.9999990478870512, iteration: 75128
loss: 1.0143190622329712,grad_norm: 0.9999990104772403, iteration: 75129
loss: 1.016675353050232,grad_norm: 0.8818196171873298, iteration: 75130
loss: 0.9669488668441772,grad_norm: 0.9999991136466951, iteration: 75131
loss: 0.953655481338501,grad_norm: 0.9999990401034743, iteration: 75132
loss: 1.0126709938049316,grad_norm: 0.9999991920111877, iteration: 75133
loss: 0.9988157749176025,grad_norm: 0.9999992601088299, iteration: 75134
loss: 0.9767319560050964,grad_norm: 0.9999991378521219, iteration: 75135
loss: 1.007285714149475,grad_norm: 0.9999990828245431, iteration: 75136
loss: 1.0058467388153076,grad_norm: 0.8440029171822248, iteration: 75137
loss: 0.970482349395752,grad_norm: 0.956901362493688, iteration: 75138
loss: 1.0342267751693726,grad_norm: 0.9999990694305161, iteration: 75139
loss: 1.0145236253738403,grad_norm: 0.9999991373504015, iteration: 75140
loss: 0.9689092636108398,grad_norm: 0.9184711330394661, iteration: 75141
loss: 0.9882407784461975,grad_norm: 0.9999990950478754, iteration: 75142
loss: 0.9944241642951965,grad_norm: 0.9999990550761227, iteration: 75143
loss: 0.9639868140220642,grad_norm: 0.9796759385746477, iteration: 75144
loss: 1.0294859409332275,grad_norm: 0.9076917444153642, iteration: 75145
loss: 0.9803235530853271,grad_norm: 0.9196871789069906, iteration: 75146
loss: 0.9540204405784607,grad_norm: 0.967850510604746, iteration: 75147
loss: 0.9903602004051208,grad_norm: 0.8903339800106943, iteration: 75148
loss: 0.9887163639068604,grad_norm: 0.8329612948568267, iteration: 75149
loss: 1.004842758178711,grad_norm: 0.9999992613006526, iteration: 75150
loss: 0.99541175365448,grad_norm: 0.9999993014326534, iteration: 75151
loss: 0.998443603515625,grad_norm: 0.9999992237946226, iteration: 75152
loss: 1.0179531574249268,grad_norm: 0.9999993165686231, iteration: 75153
loss: 0.9987199902534485,grad_norm: 0.9571455468974991, iteration: 75154
loss: 1.009515404701233,grad_norm: 0.8457020721799114, iteration: 75155
loss: 0.9952772259712219,grad_norm: 0.9999992264361048, iteration: 75156
loss: 1.0258251428604126,grad_norm: 0.999999057581707, iteration: 75157
loss: 1.0093249082565308,grad_norm: 0.9999990706970475, iteration: 75158
loss: 1.021517276763916,grad_norm: 0.9463175264123717, iteration: 75159
loss: 1.0148773193359375,grad_norm: 0.9999994503079932, iteration: 75160
loss: 0.9807016253471375,grad_norm: 0.9999991530173973, iteration: 75161
loss: 0.9971169829368591,grad_norm: 0.9999991617023797, iteration: 75162
loss: 1.008124828338623,grad_norm: 0.9999990279124285, iteration: 75163
loss: 1.006005883216858,grad_norm: 0.9999991850833797, iteration: 75164
loss: 0.9704881906509399,grad_norm: 0.9999990354544303, iteration: 75165
loss: 0.9468944072723389,grad_norm: 0.9133328258639335, iteration: 75166
loss: 1.0015151500701904,grad_norm: 0.9999991372375504, iteration: 75167
loss: 0.9623432159423828,grad_norm: 0.9999990659566041, iteration: 75168
loss: 1.021226167678833,grad_norm: 0.9999998688688002, iteration: 75169
loss: 1.0084224939346313,grad_norm: 0.9882681940898925, iteration: 75170
loss: 0.9720378518104553,grad_norm: 0.9999990677634719, iteration: 75171
loss: 0.9662914872169495,grad_norm: 0.9999991255772449, iteration: 75172
loss: 1.0492759943008423,grad_norm: 0.9999996200856638, iteration: 75173
loss: 0.966335654258728,grad_norm: 0.9999990048202708, iteration: 75174
loss: 1.0413107872009277,grad_norm: 0.9999989823346549, iteration: 75175
loss: 1.059743881225586,grad_norm: 0.999999948589221, iteration: 75176
loss: 0.991173505783081,grad_norm: 0.9631479080259794, iteration: 75177
loss: 1.015385389328003,grad_norm: 0.9999990996092403, iteration: 75178
loss: 1.0042248964309692,grad_norm: 0.9999992159390392, iteration: 75179
loss: 1.0071258544921875,grad_norm: 0.9713788426317137, iteration: 75180
loss: 1.018328309059143,grad_norm: 0.9999990559844135, iteration: 75181
loss: 1.0252989530563354,grad_norm: 0.9999991817274159, iteration: 75182
loss: 1.017033576965332,grad_norm: 0.999999124357288, iteration: 75183
loss: 1.0267447233200073,grad_norm: 0.9999990556611966, iteration: 75184
loss: 1.0009814500808716,grad_norm: 0.9999991441168183, iteration: 75185
loss: 1.0626355409622192,grad_norm: 0.9999993304545319, iteration: 75186
loss: 1.0287847518920898,grad_norm: 0.8603107081847875, iteration: 75187
loss: 0.9978536367416382,grad_norm: 0.9999991182833174, iteration: 75188
loss: 1.0008938312530518,grad_norm: 0.9999991282510756, iteration: 75189
loss: 0.9955555200576782,grad_norm: 0.9999989986584249, iteration: 75190
loss: 1.0088729858398438,grad_norm: 0.7884488948869868, iteration: 75191
loss: 0.9690756797790527,grad_norm: 0.9999990793239426, iteration: 75192
loss: 0.9969872832298279,grad_norm: 0.9999991641206977, iteration: 75193
loss: 0.998620867729187,grad_norm: 0.9999990010746833, iteration: 75194
loss: 0.9829841256141663,grad_norm: 0.8378631825007057, iteration: 75195
loss: 1.0033906698226929,grad_norm: 0.9999991942723135, iteration: 75196
loss: 0.9881777167320251,grad_norm: 0.9999990558095374, iteration: 75197
loss: 0.9960231184959412,grad_norm: 0.8071449536914553, iteration: 75198
loss: 1.0066235065460205,grad_norm: 0.8800651088339376, iteration: 75199
loss: 1.0383886098861694,grad_norm: 0.9999995266024904, iteration: 75200
loss: 1.0262776613235474,grad_norm: 0.9999990762523863, iteration: 75201
loss: 1.0102357864379883,grad_norm: 0.9286667614069809, iteration: 75202
loss: 0.9946112036705017,grad_norm: 0.9999991710563423, iteration: 75203
loss: 1.0432831048965454,grad_norm: 0.9999991642157139, iteration: 75204
loss: 1.02277410030365,grad_norm: 0.9945500886339028, iteration: 75205
loss: 1.0155688524246216,grad_norm: 0.9180790039399963, iteration: 75206
loss: 0.9419266581535339,grad_norm: 0.9264451435799966, iteration: 75207
loss: 0.9908109903335571,grad_norm: 0.999999017683968, iteration: 75208
loss: 0.9977306127548218,grad_norm: 0.8031649026447114, iteration: 75209
loss: 1.0021073818206787,grad_norm: 0.9999992585970411, iteration: 75210
loss: 0.9886314272880554,grad_norm: 0.9057810757042746, iteration: 75211
loss: 1.0258145332336426,grad_norm: 0.8713570587302814, iteration: 75212
loss: 0.9994838833808899,grad_norm: 0.9999994092257511, iteration: 75213
loss: 1.0433183908462524,grad_norm: 0.9999992987776758, iteration: 75214
loss: 1.0024622678756714,grad_norm: 0.9612511199846206, iteration: 75215
loss: 0.9895107746124268,grad_norm: 0.9999996697992145, iteration: 75216
loss: 0.9909712076187134,grad_norm: 0.9999991610306405, iteration: 75217
loss: 0.9776573181152344,grad_norm: 0.9999990464556534, iteration: 75218
loss: 1.003389596939087,grad_norm: 0.9999991002671625, iteration: 75219
loss: 0.9843612313270569,grad_norm: 0.9999992834640115, iteration: 75220
loss: 0.9727050065994263,grad_norm: 0.8737320535770964, iteration: 75221
loss: 1.04047429561615,grad_norm: 0.9857886418277023, iteration: 75222
loss: 0.9995092749595642,grad_norm: 0.9999991186711589, iteration: 75223
loss: 1.0185991525650024,grad_norm: 0.9999993156526855, iteration: 75224
loss: 1.0187314748764038,grad_norm: 0.911087779207273, iteration: 75225
loss: 1.0024380683898926,grad_norm: 0.9109365965928907, iteration: 75226
loss: 0.9727087616920471,grad_norm: 0.9999992019458254, iteration: 75227
loss: 1.0128508806228638,grad_norm: 0.9102041361354596, iteration: 75228
loss: 0.984487771987915,grad_norm: 0.9999991545176232, iteration: 75229
loss: 1.008991003036499,grad_norm: 0.9999991056419615, iteration: 75230
loss: 1.0049649477005005,grad_norm: 0.9999992398583285, iteration: 75231
loss: 0.9987640380859375,grad_norm: 0.9821390147795459, iteration: 75232
loss: 0.9955455660820007,grad_norm: 0.999999192011452, iteration: 75233
loss: 0.9817673563957214,grad_norm: 0.9999991581420321, iteration: 75234
loss: 0.9697125554084778,grad_norm: 0.8936437650758922, iteration: 75235
loss: 0.9968544840812683,grad_norm: 0.9999992361736794, iteration: 75236
loss: 0.9973936080932617,grad_norm: 0.9999991480207088, iteration: 75237
loss: 0.9997255802154541,grad_norm: 0.9999990539062465, iteration: 75238
loss: 0.9984872341156006,grad_norm: 0.9959956466971406, iteration: 75239
loss: 0.9762517809867859,grad_norm: 0.9999993450204351, iteration: 75240
loss: 1.0133627653121948,grad_norm: 0.9885737336073745, iteration: 75241
loss: 1.0520809888839722,grad_norm: 0.9307300324490727, iteration: 75242
loss: 0.9881001710891724,grad_norm: 0.9999991254552633, iteration: 75243
loss: 1.0056480169296265,grad_norm: 0.9999991021453003, iteration: 75244
loss: 0.983116865158081,grad_norm: 0.8720323647276406, iteration: 75245
loss: 0.9659696817398071,grad_norm: 0.8754502246977627, iteration: 75246
loss: 0.9989117980003357,grad_norm: 0.9999992146121117, iteration: 75247
loss: 0.9614359140396118,grad_norm: 0.9999990435019072, iteration: 75248
loss: 0.9971901774406433,grad_norm: 0.9999991820722827, iteration: 75249
loss: 1.0461405515670776,grad_norm: 0.9322527480952404, iteration: 75250
loss: 0.9763748049736023,grad_norm: 0.9999994710634141, iteration: 75251
loss: 0.9905783534049988,grad_norm: 0.9999990026397992, iteration: 75252
loss: 0.9566603302955627,grad_norm: 0.9999992602730795, iteration: 75253
loss: 1.0066004991531372,grad_norm: 0.999999244061163, iteration: 75254
loss: 1.029647946357727,grad_norm: 0.9961693432902864, iteration: 75255
loss: 1.024766445159912,grad_norm: 0.9876592320878181, iteration: 75256
loss: 0.9830174446105957,grad_norm: 0.9999991553995033, iteration: 75257
loss: 1.0164653062820435,grad_norm: 0.9999991893211387, iteration: 75258
loss: 1.0398699045181274,grad_norm: 0.9999992975939693, iteration: 75259
loss: 0.9743891954421997,grad_norm: 0.9999992709526796, iteration: 75260
loss: 1.0336638689041138,grad_norm: 0.999999148385035, iteration: 75261
loss: 0.9808331727981567,grad_norm: 0.9999991092551813, iteration: 75262
loss: 1.0014539957046509,grad_norm: 0.9999993341070331, iteration: 75263
loss: 0.9875549674034119,grad_norm: 0.9999992746132865, iteration: 75264
loss: 1.030814290046692,grad_norm: 0.9999992046190519, iteration: 75265
loss: 1.0057494640350342,grad_norm: 0.9999990647398508, iteration: 75266
loss: 0.968384861946106,grad_norm: 0.9999992555454185, iteration: 75267
loss: 0.98214191198349,grad_norm: 0.9999989882352277, iteration: 75268
loss: 1.0136550664901733,grad_norm: 0.9534198753117897, iteration: 75269
loss: 0.98859041929245,grad_norm: 0.9690053493968195, iteration: 75270
loss: 0.9629960656166077,grad_norm: 0.999999399479589, iteration: 75271
loss: 0.9552450776100159,grad_norm: 0.9999992204704621, iteration: 75272
loss: 1.026020884513855,grad_norm: 0.8425745240028465, iteration: 75273
loss: 0.9824422597885132,grad_norm: 0.945799162758368, iteration: 75274
loss: 0.9838203191757202,grad_norm: 0.9999992209574592, iteration: 75275
loss: 1.0028842687606812,grad_norm: 0.9913163075985443, iteration: 75276
loss: 0.9781901240348816,grad_norm: 0.9999990937443926, iteration: 75277
loss: 1.0133755207061768,grad_norm: 0.9999990308484831, iteration: 75278
loss: 0.98788982629776,grad_norm: 0.9999992545691931, iteration: 75279
loss: 0.9950063824653625,grad_norm: 0.792574453683923, iteration: 75280
loss: 1.0122798681259155,grad_norm: 0.9999991724551165, iteration: 75281
loss: 1.0047717094421387,grad_norm: 0.9999991382230783, iteration: 75282
loss: 0.9843769669532776,grad_norm: 0.875579813006869, iteration: 75283
loss: 1.0071622133255005,grad_norm: 0.9978724965969975, iteration: 75284
loss: 1.001996636390686,grad_norm: 0.8430802616751222, iteration: 75285
loss: 1.0112521648406982,grad_norm: 0.8854731269391831, iteration: 75286
loss: 0.9898676872253418,grad_norm: 0.9999991205214975, iteration: 75287
loss: 1.0428311824798584,grad_norm: 0.9999993269192473, iteration: 75288
loss: 0.9791699647903442,grad_norm: 0.9999991780790827, iteration: 75289
loss: 1.1403818130493164,grad_norm: 0.9999992910680047, iteration: 75290
loss: 0.9943972826004028,grad_norm: 0.9999992308863404, iteration: 75291
loss: 1.0231584310531616,grad_norm: 0.9999997564443027, iteration: 75292
loss: 1.0242228507995605,grad_norm: 0.9961845415739092, iteration: 75293
loss: 1.003612756729126,grad_norm: 0.9999993238193178, iteration: 75294
loss: 1.023330807685852,grad_norm: 0.9999991808001621, iteration: 75295
loss: 1.0031569004058838,grad_norm: 0.9999990887928363, iteration: 75296
loss: 1.0125266313552856,grad_norm: 0.9999991289336385, iteration: 75297
loss: 1.0042389631271362,grad_norm: 0.8551713554584242, iteration: 75298
loss: 0.9868866801261902,grad_norm: 0.9999992491130623, iteration: 75299
loss: 1.0083009004592896,grad_norm: 0.9999990885856003, iteration: 75300
loss: 0.9785944223403931,grad_norm: 0.9999991111206222, iteration: 75301
loss: 0.9882400035858154,grad_norm: 0.9999992004668989, iteration: 75302
loss: 0.985757052898407,grad_norm: 0.8392560499873609, iteration: 75303
loss: 0.9864040613174438,grad_norm: 0.9999990765742295, iteration: 75304
loss: 1.0361264944076538,grad_norm: 0.9999990315482687, iteration: 75305
loss: 1.0639927387237549,grad_norm: 0.9452620035432907, iteration: 75306
loss: 1.0072396993637085,grad_norm: 0.9999991036590744, iteration: 75307
loss: 0.9879485964775085,grad_norm: 0.8941535695429087, iteration: 75308
loss: 1.0154989957809448,grad_norm: 0.9816264558836015, iteration: 75309
loss: 0.9869788289070129,grad_norm: 0.9999990089246689, iteration: 75310
loss: 1.0188179016113281,grad_norm: 0.9999991338517645, iteration: 75311
loss: 0.9615281820297241,grad_norm: 0.9999992172004322, iteration: 75312
loss: 1.0007179975509644,grad_norm: 0.9999990071534447, iteration: 75313
loss: 0.9671842455863953,grad_norm: 0.8856839255390118, iteration: 75314
loss: 0.9800629615783691,grad_norm: 0.921067572843322, iteration: 75315
loss: 0.9964081048965454,grad_norm: 0.968249857009226, iteration: 75316
loss: 1.0304487943649292,grad_norm: 0.9999995266509283, iteration: 75317
loss: 0.9511417150497437,grad_norm: 0.9734347453759239, iteration: 75318
loss: 0.9899852871894836,grad_norm: 0.9999992369152868, iteration: 75319
loss: 0.9716675877571106,grad_norm: 0.9999992081530161, iteration: 75320
loss: 0.9707372784614563,grad_norm: 0.999999194269874, iteration: 75321
loss: 0.992364764213562,grad_norm: 0.9999991080686804, iteration: 75322
loss: 1.0402154922485352,grad_norm: 0.9999991383872264, iteration: 75323
loss: 0.9800818562507629,grad_norm: 0.9999989948002413, iteration: 75324
loss: 0.9977836012840271,grad_norm: 0.9999990964944091, iteration: 75325
loss: 1.0152939558029175,grad_norm: 0.7777676420264027, iteration: 75326
loss: 0.9948957562446594,grad_norm: 0.9999996833178517, iteration: 75327
loss: 0.996978223323822,grad_norm: 0.852924380046802, iteration: 75328
loss: 0.9905421137809753,grad_norm: 0.9999992306941969, iteration: 75329
loss: 0.974500298500061,grad_norm: 0.9544720768570965, iteration: 75330
loss: 1.0091239213943481,grad_norm: 0.9999990515461046, iteration: 75331
loss: 1.1165903806686401,grad_norm: 0.9999999373869983, iteration: 75332
loss: 1.0113353729248047,grad_norm: 0.9152000748094357, iteration: 75333
loss: 0.9989036321640015,grad_norm: 0.9757762631728384, iteration: 75334
loss: 0.9949272871017456,grad_norm: 0.9999991279583929, iteration: 75335
loss: 0.9800449013710022,grad_norm: 0.8802212012145775, iteration: 75336
loss: 1.0189636945724487,grad_norm: 0.9999991738743893, iteration: 75337
loss: 0.9808733463287354,grad_norm: 0.9999990512565932, iteration: 75338
loss: 0.9979525804519653,grad_norm: 0.9999990017261462, iteration: 75339
loss: 1.035163402557373,grad_norm: 0.9999994658072109, iteration: 75340
loss: 1.0164189338684082,grad_norm: 0.9999995089740361, iteration: 75341
loss: 1.0056161880493164,grad_norm: 0.9999988961073245, iteration: 75342
loss: 1.046722173690796,grad_norm: 0.909474303716518, iteration: 75343
loss: 1.0188676118850708,grad_norm: 0.999999088409227, iteration: 75344
loss: 0.9887511134147644,grad_norm: 0.9844760014167246, iteration: 75345
loss: 1.0202670097351074,grad_norm: 0.9999988296329144, iteration: 75346
loss: 1.0140728950500488,grad_norm: 0.8551737881756236, iteration: 75347
loss: 0.9956842064857483,grad_norm: 0.9625314529803894, iteration: 75348
loss: 0.9972598552703857,grad_norm: 0.9999991752260989, iteration: 75349
loss: 0.9907546043395996,grad_norm: 0.9999991269039004, iteration: 75350
loss: 0.9934079051017761,grad_norm: 0.9915017311483187, iteration: 75351
loss: 1.0086538791656494,grad_norm: 0.999999151400456, iteration: 75352
loss: 0.9909700155258179,grad_norm: 0.9999992217870498, iteration: 75353
loss: 1.0093698501586914,grad_norm: 0.9999992786451326, iteration: 75354
loss: 0.98654705286026,grad_norm: 0.9999991162374395, iteration: 75355
loss: 0.9821105599403381,grad_norm: 0.9999990056521326, iteration: 75356
loss: 1.005903959274292,grad_norm: 0.9999991833006728, iteration: 75357
loss: 0.9939448833465576,grad_norm: 0.999999265679025, iteration: 75358
loss: 1.0060285329818726,grad_norm: 0.9634694730078521, iteration: 75359
loss: 0.9699861407279968,grad_norm: 0.9999989639306959, iteration: 75360
loss: 0.9979252815246582,grad_norm: 0.8881022729465999, iteration: 75361
loss: 0.9896783232688904,grad_norm: 0.9999990131102252, iteration: 75362
loss: 1.0326169729232788,grad_norm: 0.9999990556494076, iteration: 75363
loss: 1.0375518798828125,grad_norm: 1.000000033453348, iteration: 75364
loss: 1.0094709396362305,grad_norm: 0.9999990039188943, iteration: 75365
loss: 0.9896082878112793,grad_norm: 0.999999215788757, iteration: 75366
loss: 0.9748048782348633,grad_norm: 0.9822767917934914, iteration: 75367
loss: 1.0021612644195557,grad_norm: 0.9944551145406955, iteration: 75368
loss: 0.9900999665260315,grad_norm: 0.9999991444451594, iteration: 75369
loss: 0.9914698600769043,grad_norm: 0.9481866383227588, iteration: 75370
loss: 0.977993369102478,grad_norm: 0.9999991004066134, iteration: 75371
loss: 1.0058677196502686,grad_norm: 0.9472445532754292, iteration: 75372
loss: 0.9993906617164612,grad_norm: 0.87353782504378, iteration: 75373
loss: 0.9830142259597778,grad_norm: 0.948660891811848, iteration: 75374
loss: 0.9987737536430359,grad_norm: 0.9999992523809931, iteration: 75375
loss: 1.0423897504806519,grad_norm: 0.9999994555480036, iteration: 75376
loss: 0.9698899388313293,grad_norm: 0.9999991149873968, iteration: 75377
loss: 1.036731481552124,grad_norm: 0.9778896516982228, iteration: 75378
loss: 0.9925488829612732,grad_norm: 0.9127701433278039, iteration: 75379
loss: 1.045660138130188,grad_norm: 0.9745436630785074, iteration: 75380
loss: 0.9960089325904846,grad_norm: 0.9999991436275576, iteration: 75381
loss: 1.0038697719573975,grad_norm: 0.9999990572377602, iteration: 75382
loss: 1.0641199350357056,grad_norm: 0.99999959282859, iteration: 75383
loss: 0.9475994110107422,grad_norm: 0.999999018986681, iteration: 75384
loss: 1.0245078802108765,grad_norm: 0.9664695207431683, iteration: 75385
loss: 0.9995397925376892,grad_norm: 0.9999992425348715, iteration: 75386
loss: 1.003004550933838,grad_norm: 0.999999215511516, iteration: 75387
loss: 1.0117731094360352,grad_norm: 0.999999112251093, iteration: 75388
loss: 0.9812460541725159,grad_norm: 0.9999991492716289, iteration: 75389
loss: 1.0089646577835083,grad_norm: 0.9999989151787104, iteration: 75390
loss: 1.0002813339233398,grad_norm: 0.9999991776837448, iteration: 75391
loss: 1.0135189294815063,grad_norm: 0.9999997000113566, iteration: 75392
loss: 0.9853007197380066,grad_norm: 0.9999991512916399, iteration: 75393
loss: 0.9602664113044739,grad_norm: 0.9999991078008539, iteration: 75394
loss: 0.9740409851074219,grad_norm: 0.9999990439634895, iteration: 75395
loss: 1.003241777420044,grad_norm: 0.9576505428499089, iteration: 75396
loss: 1.015817403793335,grad_norm: 0.9999992119655611, iteration: 75397
loss: 0.9735805988311768,grad_norm: 0.9999992425450347, iteration: 75398
loss: 1.0112926959991455,grad_norm: 0.9999990747157577, iteration: 75399
loss: 1.018048882484436,grad_norm: 0.9999992634560175, iteration: 75400
loss: 0.9751342535018921,grad_norm: 0.999999116433476, iteration: 75401
loss: 0.9900215268135071,grad_norm: 0.9999999798656065, iteration: 75402
loss: 0.9932781457901001,grad_norm: 0.9999991350629412, iteration: 75403
loss: 1.02205228805542,grad_norm: 0.9999991804679134, iteration: 75404
loss: 0.9902790784835815,grad_norm: 0.9713222308018107, iteration: 75405
loss: 1.0233895778656006,grad_norm: 0.9999993966408122, iteration: 75406
loss: 1.0121164321899414,grad_norm: 0.8741873354216533, iteration: 75407
loss: 0.9828454256057739,grad_norm: 0.9999990576498241, iteration: 75408
loss: 1.0070013999938965,grad_norm: 0.999999097515999, iteration: 75409
loss: 0.9885541200637817,grad_norm: 0.9014565855495527, iteration: 75410
loss: 0.9925720691680908,grad_norm: 0.955808273642099, iteration: 75411
loss: 0.9954267144203186,grad_norm: 0.9350913431195639, iteration: 75412
loss: 0.9985619187355042,grad_norm: 0.999999316766834, iteration: 75413
loss: 0.9710519909858704,grad_norm: 0.9999991101911629, iteration: 75414
loss: 1.0022900104522705,grad_norm: 0.9228472805270604, iteration: 75415
loss: 1.0262669324874878,grad_norm: 0.9999992762865294, iteration: 75416
loss: 1.0168105363845825,grad_norm: 0.9999991456728499, iteration: 75417
loss: 1.0203684568405151,grad_norm: 0.9999992400597203, iteration: 75418
loss: 1.024214744567871,grad_norm: 0.9435521753443183, iteration: 75419
loss: 1.0084304809570312,grad_norm: 0.9999991199233775, iteration: 75420
loss: 0.9916982650756836,grad_norm: 0.9079008301932713, iteration: 75421
loss: 0.9790907502174377,grad_norm: 0.8936007085861596, iteration: 75422
loss: 1.0205769538879395,grad_norm: 0.9860971065404092, iteration: 75423
loss: 1.023276925086975,grad_norm: 0.9999989975047288, iteration: 75424
loss: 0.9818899631500244,grad_norm: 0.9999992262628098, iteration: 75425
loss: 0.9838141202926636,grad_norm: 0.9999990653044529, iteration: 75426
loss: 1.0047767162322998,grad_norm: 0.9129898643321221, iteration: 75427
loss: 0.9821655750274658,grad_norm: 0.8405262025224911, iteration: 75428
loss: 1.0103635787963867,grad_norm: 0.9999992283341507, iteration: 75429
loss: 0.9985064268112183,grad_norm: 0.9860998187013933, iteration: 75430
loss: 0.9816297888755798,grad_norm: 0.9999994350760214, iteration: 75431
loss: 1.0078195333480835,grad_norm: 0.9999990782365301, iteration: 75432
loss: 1.0275923013687134,grad_norm: 0.9999990236858821, iteration: 75433
loss: 0.9977372288703918,grad_norm: 0.9269176841719752, iteration: 75434
loss: 1.0332781076431274,grad_norm: 0.9897108451848186, iteration: 75435
loss: 0.9738168120384216,grad_norm: 0.999999030071361, iteration: 75436
loss: 0.9979934096336365,grad_norm: 0.7833881374490714, iteration: 75437
loss: 1.011721134185791,grad_norm: 0.9999991546060603, iteration: 75438
loss: 1.0182822942733765,grad_norm: 0.9999991500386878, iteration: 75439
loss: 1.007478952407837,grad_norm: 0.9999990569887414, iteration: 75440
loss: 1.011983871459961,grad_norm: 0.8595013045809518, iteration: 75441
loss: 1.0125315189361572,grad_norm: 0.9999991996585935, iteration: 75442
loss: 1.0331335067749023,grad_norm: 0.9999992663776417, iteration: 75443
loss: 1.0034059286117554,grad_norm: 0.9999990594491093, iteration: 75444
loss: 0.9729427695274353,grad_norm: 0.9999992305939839, iteration: 75445
loss: 0.9966254830360413,grad_norm: 0.9999994377212397, iteration: 75446
loss: 0.9928044080734253,grad_norm: 0.9896470502919384, iteration: 75447
loss: 1.025528073310852,grad_norm: 0.9037110012476423, iteration: 75448
loss: 1.0243823528289795,grad_norm: 0.9999993104587973, iteration: 75449
loss: 0.9630446434020996,grad_norm: 0.9999991033613994, iteration: 75450
loss: 0.9918311238288879,grad_norm: 0.9822626210854796, iteration: 75451
loss: 1.0061215162277222,grad_norm: 0.9999994443799659, iteration: 75452
loss: 0.9877562522888184,grad_norm: 0.9999991333609823, iteration: 75453
loss: 0.9845220446586609,grad_norm: 0.9999989685267938, iteration: 75454
loss: 0.9962874054908752,grad_norm: 0.9999990827225069, iteration: 75455
loss: 1.0178101062774658,grad_norm: 0.9999992294096277, iteration: 75456
loss: 0.9548521041870117,grad_norm: 0.9999991698874237, iteration: 75457
loss: 0.9900036454200745,grad_norm: 0.9999991487162845, iteration: 75458
loss: 0.9936584830284119,grad_norm: 0.9049438140445707, iteration: 75459
loss: 1.0300172567367554,grad_norm: 0.9999990730501703, iteration: 75460
loss: 0.9775382280349731,grad_norm: 0.9999989707479512, iteration: 75461
loss: 0.9819091558456421,grad_norm: 0.9999991305147266, iteration: 75462
loss: 0.9639841318130493,grad_norm: 0.9906104716684592, iteration: 75463
loss: 1.0520707368850708,grad_norm: 0.9999995676413389, iteration: 75464
loss: 1.0054606199264526,grad_norm: 0.9999992317186756, iteration: 75465
loss: 1.0189204216003418,grad_norm: 0.9658373453542594, iteration: 75466
loss: 0.9796162843704224,grad_norm: 0.9999989900394134, iteration: 75467
loss: 0.9749963879585266,grad_norm: 0.9999991120907237, iteration: 75468
loss: 0.9766597747802734,grad_norm: 0.9999992903534483, iteration: 75469
loss: 0.99612957239151,grad_norm: 0.9999989841954207, iteration: 75470
loss: 0.9763193726539612,grad_norm: 0.8815767270726517, iteration: 75471
loss: 1.0317504405975342,grad_norm: 0.9873957041654955, iteration: 75472
loss: 1.0401394367218018,grad_norm: 0.9999990394573273, iteration: 75473
loss: 1.0077935457229614,grad_norm: 0.9675674879558104, iteration: 75474
loss: 1.0212019681930542,grad_norm: 0.9999990941364767, iteration: 75475
loss: 1.0038373470306396,grad_norm: 0.9380994006298026, iteration: 75476
loss: 1.0143851041793823,grad_norm: 0.9999992448985001, iteration: 75477
loss: 1.0014866590499878,grad_norm: 0.999999036761901, iteration: 75478
loss: 0.9738579392433167,grad_norm: 0.918719693827494, iteration: 75479
loss: 1.0107015371322632,grad_norm: 0.9999990732026486, iteration: 75480
loss: 0.9811792373657227,grad_norm: 0.8213015242648294, iteration: 75481
loss: 1.02829110622406,grad_norm: 0.9999990615948222, iteration: 75482
loss: 0.9820880889892578,grad_norm: 0.9067731801350067, iteration: 75483
loss: 1.0176883935928345,grad_norm: 0.9999989862210955, iteration: 75484
loss: 0.9875400066375732,grad_norm: 0.999999228617779, iteration: 75485
loss: 0.9665330052375793,grad_norm: 0.9368242529176839, iteration: 75486
loss: 0.9719052314758301,grad_norm: 0.9999990786315576, iteration: 75487
loss: 1.024955153465271,grad_norm: 0.9999991226109671, iteration: 75488
loss: 0.9973036646842957,grad_norm: 0.9999989698558995, iteration: 75489
loss: 0.9639995098114014,grad_norm: 0.9999989991298931, iteration: 75490
loss: 1.0059300661087036,grad_norm: 0.9999990513304048, iteration: 75491
loss: 0.9961636066436768,grad_norm: 0.9999990748391778, iteration: 75492
loss: 1.0087944269180298,grad_norm: 0.999999137238951, iteration: 75493
loss: 0.9812516570091248,grad_norm: 0.9999992302851232, iteration: 75494
loss: 1.0026928186416626,grad_norm: 0.9871276028790307, iteration: 75495
loss: 0.9918472170829773,grad_norm: 0.9999990402398279, iteration: 75496
loss: 1.0742446184158325,grad_norm: 0.9999995170806574, iteration: 75497
loss: 1.0078916549682617,grad_norm: 0.9999991443208928, iteration: 75498
loss: 1.0219544172286987,grad_norm: 0.999999155321279, iteration: 75499
loss: 1.0187410116195679,grad_norm: 0.966920491336934, iteration: 75500
loss: 1.0099232196807861,grad_norm: 0.9651110547679087, iteration: 75501
loss: 0.9608122706413269,grad_norm: 0.999998967531886, iteration: 75502
loss: 1.0230934619903564,grad_norm: 0.9776226567140794, iteration: 75503
loss: 0.9877747893333435,grad_norm: 0.9999992815650551, iteration: 75504
loss: 1.000273585319519,grad_norm: 0.9999991906055536, iteration: 75505
loss: 1.0187666416168213,grad_norm: 0.9999991239525643, iteration: 75506
loss: 1.0202053785324097,grad_norm: 0.9999991165588494, iteration: 75507
loss: 0.9416797161102295,grad_norm: 0.9999989665566432, iteration: 75508
loss: 1.0145630836486816,grad_norm: 0.9074054364338758, iteration: 75509
loss: 0.9683675169944763,grad_norm: 0.9569788431503433, iteration: 75510
loss: 1.0090065002441406,grad_norm: 0.993864472895508, iteration: 75511
loss: 0.9689499139785767,grad_norm: 0.9707423711947354, iteration: 75512
loss: 1.0174247026443481,grad_norm: 0.9394712314688397, iteration: 75513
loss: 0.9890323877334595,grad_norm: 0.9439145216619229, iteration: 75514
loss: 1.0316299200057983,grad_norm: 0.9999989996565972, iteration: 75515
loss: 1.0264427661895752,grad_norm: 0.9999993456648585, iteration: 75516
loss: 0.9713528752326965,grad_norm: 0.9531633681779417, iteration: 75517
loss: 0.9914127588272095,grad_norm: 0.9999991674488837, iteration: 75518
loss: 1.0063297748565674,grad_norm: 0.9431581050643433, iteration: 75519
loss: 1.0014156103134155,grad_norm: 0.9999990528327641, iteration: 75520
loss: 0.9758968353271484,grad_norm: 0.9999990306122778, iteration: 75521
loss: 0.9767694473266602,grad_norm: 0.9102176962324424, iteration: 75522
loss: 0.9857154488563538,grad_norm: 0.9999991474465225, iteration: 75523
loss: 1.0119810104370117,grad_norm: 0.9999989946315916, iteration: 75524
loss: 0.9881532788276672,grad_norm: 0.9999992174487735, iteration: 75525
loss: 1.0114845037460327,grad_norm: 0.9243897947870522, iteration: 75526
loss: 0.9904046654701233,grad_norm: 0.968156419080778, iteration: 75527
loss: 1.0146267414093018,grad_norm: 0.9907634372876263, iteration: 75528
loss: 0.9472644329071045,grad_norm: 0.9957746428776002, iteration: 75529
loss: 1.015387773513794,grad_norm: 0.9999993133666567, iteration: 75530
loss: 0.9871559739112854,grad_norm: 0.9999992599834875, iteration: 75531
loss: 0.9714260697364807,grad_norm: 0.9999991720814454, iteration: 75532
loss: 1.0265095233917236,grad_norm: 0.9946378726474889, iteration: 75533
loss: 0.9705612659454346,grad_norm: 0.9239391077812764, iteration: 75534
loss: 1.0284534692764282,grad_norm: 0.8323402041471208, iteration: 75535
loss: 1.0133543014526367,grad_norm: 0.9999992508209246, iteration: 75536
loss: 1.017250657081604,grad_norm: 0.9999992566291717, iteration: 75537
loss: 1.02442467212677,grad_norm: 0.9999997546367934, iteration: 75538
loss: 0.9830494523048401,grad_norm: 0.9999989683793887, iteration: 75539
loss: 1.0270296335220337,grad_norm: 0.9999990797670091, iteration: 75540
loss: 1.0151439905166626,grad_norm: 0.9999990812600755, iteration: 75541
loss: 0.9440594911575317,grad_norm: 0.9632416010303484, iteration: 75542
loss: 0.9962629079818726,grad_norm: 0.9616423469932092, iteration: 75543
loss: 1.0041435956954956,grad_norm: 0.9795412262689132, iteration: 75544
loss: 0.9973602294921875,grad_norm: 0.9258790893836939, iteration: 75545
loss: 1.003883719444275,grad_norm: 0.9906289581525176, iteration: 75546
loss: 0.9868614077568054,grad_norm: 0.9999991220231568, iteration: 75547
loss: 1.0062488317489624,grad_norm: 0.7805643710253646, iteration: 75548
loss: 0.9669367074966431,grad_norm: 0.9999993234892444, iteration: 75549
loss: 0.9985816478729248,grad_norm: 0.9999991654454735, iteration: 75550
loss: 0.9551160335540771,grad_norm: 0.9999991658733577, iteration: 75551
loss: 1.0301318168640137,grad_norm: 0.99999918882278, iteration: 75552
loss: 1.0089013576507568,grad_norm: 0.9999991683374212, iteration: 75553
loss: 1.028050422668457,grad_norm: 0.999999058638474, iteration: 75554
loss: 0.9430714845657349,grad_norm: 0.9999990255442058, iteration: 75555
loss: 1.023156762123108,grad_norm: 0.9999991512057853, iteration: 75556
loss: 0.9777567982673645,grad_norm: 0.9999989869685446, iteration: 75557
loss: 1.0140618085861206,grad_norm: 0.9999991332199172, iteration: 75558
loss: 0.9924073219299316,grad_norm: 0.9276258980250817, iteration: 75559
loss: 0.9988108277320862,grad_norm: 0.9999997617279207, iteration: 75560
loss: 1.0108565092086792,grad_norm: 0.9999992224405401, iteration: 75561
loss: 1.0064704418182373,grad_norm: 0.9407397835361033, iteration: 75562
loss: 1.0367772579193115,grad_norm: 0.9868654591220983, iteration: 75563
loss: 1.0111722946166992,grad_norm: 0.9250929342038183, iteration: 75564
loss: 1.0091265439987183,grad_norm: 0.9999993360555953, iteration: 75565
loss: 1.0229562520980835,grad_norm: 0.9744317269459946, iteration: 75566
loss: 0.9938436150550842,grad_norm: 0.999999005098769, iteration: 75567
loss: 0.985753059387207,grad_norm: 0.9999990457365108, iteration: 75568
loss: 0.9567427039146423,grad_norm: 0.9999990581603007, iteration: 75569
loss: 0.9746294617652893,grad_norm: 0.9101054963368455, iteration: 75570
loss: 1.0041074752807617,grad_norm: 0.9672790007810316, iteration: 75571
loss: 1.0165629386901855,grad_norm: 0.8787339536111932, iteration: 75572
loss: 1.0102081298828125,grad_norm: 0.9999991748631556, iteration: 75573
loss: 0.989051878452301,grad_norm: 0.9999991205363861, iteration: 75574
loss: 1.0329148769378662,grad_norm: 0.9999993086276737, iteration: 75575
loss: 0.9999880790710449,grad_norm: 0.999999136453177, iteration: 75576
loss: 0.9875007271766663,grad_norm: 0.9377969114669215, iteration: 75577
loss: 1.0233583450317383,grad_norm: 0.9999991155940214, iteration: 75578
loss: 1.017249584197998,grad_norm: 0.9999993220335225, iteration: 75579
loss: 0.976656973361969,grad_norm: 0.9999991459340674, iteration: 75580
loss: 0.976414680480957,grad_norm: 0.9999989405059513, iteration: 75581
loss: 1.0026549100875854,grad_norm: 0.9998954881103216, iteration: 75582
loss: 0.9966408610343933,grad_norm: 0.9430732218568976, iteration: 75583
loss: 0.9984685182571411,grad_norm: 0.932558321307813, iteration: 75584
loss: 0.9940107464790344,grad_norm: 0.9999994456217732, iteration: 75585
loss: 1.0073230266571045,grad_norm: 0.9999990145909057, iteration: 75586
loss: 0.9881066679954529,grad_norm: 0.999999123765471, iteration: 75587
loss: 1.032497763633728,grad_norm: 0.9999990071324176, iteration: 75588
loss: 1.003452181816101,grad_norm: 0.9999993609050438, iteration: 75589
loss: 0.9894634485244751,grad_norm: 0.9085655225339363, iteration: 75590
loss: 1.0508769750595093,grad_norm: 0.9999990564960838, iteration: 75591
loss: 0.9994611740112305,grad_norm: 0.8447191616366078, iteration: 75592
loss: 0.9995219707489014,grad_norm: 0.9999990346600037, iteration: 75593
loss: 0.9943815469741821,grad_norm: 0.999999201634365, iteration: 75594
loss: 0.9671416878700256,grad_norm: 0.9999991087340757, iteration: 75595
loss: 1.0105090141296387,grad_norm: 0.9663273253287469, iteration: 75596
loss: 0.9929425120353699,grad_norm: 0.9999990809306655, iteration: 75597
loss: 1.0113708972930908,grad_norm: 0.9999991998023131, iteration: 75598
loss: 1.003887414932251,grad_norm: 0.9326369838482237, iteration: 75599
loss: 1.0045603513717651,grad_norm: 0.9999997984186959, iteration: 75600
loss: 0.9953104853630066,grad_norm: 0.9999991092543061, iteration: 75601
loss: 1.0426442623138428,grad_norm: 0.9999992634921324, iteration: 75602
loss: 0.9955313801765442,grad_norm: 0.9416068142462941, iteration: 75603
loss: 0.9949212670326233,grad_norm: 0.9999992574418242, iteration: 75604
loss: 0.9462904334068298,grad_norm: 0.9999991238031094, iteration: 75605
loss: 1.0288581848144531,grad_norm: 0.9999996087488402, iteration: 75606
loss: 1.0250173807144165,grad_norm: 0.9731542242485791, iteration: 75607
loss: 1.0016965866088867,grad_norm: 0.9999998155962359, iteration: 75608
loss: 0.9541105628013611,grad_norm: 0.9999991637898097, iteration: 75609
loss: 1.033823013305664,grad_norm: 0.9999989572201156, iteration: 75610
loss: 0.9574165940284729,grad_norm: 0.8572273676645342, iteration: 75611
loss: 0.9777665138244629,grad_norm: 0.9999991469832068, iteration: 75612
loss: 1.0015852451324463,grad_norm: 0.9999991253928213, iteration: 75613
loss: 0.9989285469055176,grad_norm: 0.9972501893635191, iteration: 75614
loss: 0.9868110418319702,grad_norm: 0.9999991945171957, iteration: 75615
loss: 1.0340120792388916,grad_norm: 0.9999999714162441, iteration: 75616
loss: 0.9801766276359558,grad_norm: 0.9999990955019139, iteration: 75617
loss: 1.012163758277893,grad_norm: 0.8957532235164618, iteration: 75618
loss: 1.0034393072128296,grad_norm: 0.9999991071379724, iteration: 75619
loss: 0.9745926260948181,grad_norm: 0.9999989935668975, iteration: 75620
loss: 1.0038849115371704,grad_norm: 0.9999991816596101, iteration: 75621
loss: 0.9994557499885559,grad_norm: 0.999999243283586, iteration: 75622
loss: 1.0279496908187866,grad_norm: 0.9999993484794032, iteration: 75623
loss: 0.9737038612365723,grad_norm: 0.8940554600651887, iteration: 75624
loss: 0.991144061088562,grad_norm: 0.9999991772030604, iteration: 75625
loss: 1.0110929012298584,grad_norm: 0.9999991997453952, iteration: 75626
loss: 0.9756192564964294,grad_norm: 0.9999991151729469, iteration: 75627
loss: 1.0288655757904053,grad_norm: 0.9999990651265607, iteration: 75628
loss: 1.0173976421356201,grad_norm: 0.9896941316849046, iteration: 75629
loss: 0.9869195222854614,grad_norm: 0.9494594677515786, iteration: 75630
loss: 1.0070645809173584,grad_norm: 0.9773856585580102, iteration: 75631
loss: 1.0062646865844727,grad_norm: 0.9999992089127485, iteration: 75632
loss: 1.0354351997375488,grad_norm: 0.9999997727617134, iteration: 75633
loss: 1.0230389833450317,grad_norm: 0.9999996592516037, iteration: 75634
loss: 1.0149810314178467,grad_norm: 0.9999994934733212, iteration: 75635
loss: 1.015546441078186,grad_norm: 0.981208031416559, iteration: 75636
loss: 1.0015422105789185,grad_norm: 0.9999993563585314, iteration: 75637
loss: 1.010990023612976,grad_norm: 0.9548860254055725, iteration: 75638
loss: 1.03304922580719,grad_norm: 0.8882773237504994, iteration: 75639
loss: 1.0380693674087524,grad_norm: 0.9612613645510514, iteration: 75640
loss: 0.9957306385040283,grad_norm: 0.9752816672426899, iteration: 75641
loss: 1.0003852844238281,grad_norm: 0.9999992553904226, iteration: 75642
loss: 1.0170559883117676,grad_norm: 0.9999991320882133, iteration: 75643
loss: 1.0420111417770386,grad_norm: 0.9025523022432017, iteration: 75644
loss: 1.0056718587875366,grad_norm: 0.9999991060305778, iteration: 75645
loss: 0.9879953861236572,grad_norm: 0.999999214239468, iteration: 75646
loss: 0.9964077472686768,grad_norm: 0.9599641177819737, iteration: 75647
loss: 1.0287646055221558,grad_norm: 0.9999991728913422, iteration: 75648
loss: 1.1404868364334106,grad_norm: 0.9999999643101414, iteration: 75649
loss: 1.0416916608810425,grad_norm: 0.9999992034982853, iteration: 75650
loss: 1.0231316089630127,grad_norm: 0.9999991460058282, iteration: 75651
loss: 0.9886866211891174,grad_norm: 0.9999990895123116, iteration: 75652
loss: 0.9987190365791321,grad_norm: 0.9999998536320003, iteration: 75653
loss: 1.0327696800231934,grad_norm: 0.9999992454858446, iteration: 75654
loss: 1.0040727853775024,grad_norm: 0.9999990416324332, iteration: 75655
loss: 1.0099807977676392,grad_norm: 0.9999991605635437, iteration: 75656
loss: 1.0239949226379395,grad_norm: 0.9999991607282345, iteration: 75657
loss: 0.9786327481269836,grad_norm: 0.9999990471416348, iteration: 75658
loss: 0.9521312117576599,grad_norm: 0.9999992296660905, iteration: 75659
loss: 0.9793300628662109,grad_norm: 0.9868776987762236, iteration: 75660
loss: 0.9964994788169861,grad_norm: 0.8963572602856931, iteration: 75661
loss: 0.9998989701271057,grad_norm: 0.9999990635151585, iteration: 75662
loss: 1.0247234106063843,grad_norm: 0.9615384442055657, iteration: 75663
loss: 0.9886966347694397,grad_norm: 0.99999909518997, iteration: 75664
loss: 1.0327723026275635,grad_norm: 0.9147273674595228, iteration: 75665
loss: 1.1073170900344849,grad_norm: 0.999999303113801, iteration: 75666
loss: 1.0084608793258667,grad_norm: 0.917852304247351, iteration: 75667
loss: 1.0091559886932373,grad_norm: 0.999999272802903, iteration: 75668
loss: 1.0137922763824463,grad_norm: 0.999999095797332, iteration: 75669
loss: 1.0237973928451538,grad_norm: 0.9999991349205116, iteration: 75670
loss: 1.0078133344650269,grad_norm: 0.9543435823137272, iteration: 75671
loss: 1.025371789932251,grad_norm: 0.9999990696457155, iteration: 75672
loss: 0.9992260336875916,grad_norm: 0.9999991199333741, iteration: 75673
loss: 1.0181010961532593,grad_norm: 0.9501199861343795, iteration: 75674
loss: 1.006515383720398,grad_norm: 0.9999992152584094, iteration: 75675
loss: 1.0181951522827148,grad_norm: 0.9999991892899401, iteration: 75676
loss: 0.9752932786941528,grad_norm: 0.971100894203985, iteration: 75677
loss: 0.9775918126106262,grad_norm: 0.999999185636463, iteration: 75678
loss: 1.0348496437072754,grad_norm: 0.9999990614704398, iteration: 75679
loss: 0.9966405630111694,grad_norm: 0.9999989838636063, iteration: 75680
loss: 1.009819746017456,grad_norm: 0.93472703728718, iteration: 75681
loss: 0.9892565608024597,grad_norm: 0.9999989933187979, iteration: 75682
loss: 1.0016998052597046,grad_norm: 0.9999990134487499, iteration: 75683
loss: 0.9868192672729492,grad_norm: 0.9999990697426956, iteration: 75684
loss: 0.962369441986084,grad_norm: 0.9999989136155476, iteration: 75685
loss: 1.00281822681427,grad_norm: 0.9999991806640303, iteration: 75686
loss: 1.0202338695526123,grad_norm: 0.9999989285365467, iteration: 75687
loss: 1.0076597929000854,grad_norm: 0.9999991078996179, iteration: 75688
loss: 1.0099949836730957,grad_norm: 0.9999991736633572, iteration: 75689
loss: 0.9899260401725769,grad_norm: 0.9999990253581438, iteration: 75690
loss: 1.0055978298187256,grad_norm: 0.9999991734209662, iteration: 75691
loss: 1.0123534202575684,grad_norm: 0.999999202635512, iteration: 75692
loss: 1.007444143295288,grad_norm: 0.9999994112443661, iteration: 75693
loss: 1.035951852798462,grad_norm: 0.9410794959342111, iteration: 75694
loss: 0.990374743938446,grad_norm: 0.9999993005446495, iteration: 75695
loss: 0.9866536855697632,grad_norm: 0.9999991252880492, iteration: 75696
loss: 0.9947054386138916,grad_norm: 0.9999991210144588, iteration: 75697
loss: 0.9889700412750244,grad_norm: 0.9999991885412648, iteration: 75698
loss: 0.9683805704116821,grad_norm: 0.9999992012223377, iteration: 75699
loss: 1.0156333446502686,grad_norm: 0.9999990701406397, iteration: 75700
loss: 1.0181607007980347,grad_norm: 0.999999471592544, iteration: 75701
loss: 1.001176118850708,grad_norm: 0.8762784408101228, iteration: 75702
loss: 0.9659526348114014,grad_norm: 0.9999988801503444, iteration: 75703
loss: 0.9668316841125488,grad_norm: 0.946156138380458, iteration: 75704
loss: 0.9902694225311279,grad_norm: 0.9999990668052291, iteration: 75705
loss: 1.0012253522872925,grad_norm: 0.9999989864461425, iteration: 75706
loss: 1.026084303855896,grad_norm: 0.9999993523525456, iteration: 75707
loss: 1.0047920942306519,grad_norm: 0.9999991286302138, iteration: 75708
loss: 1.01369309425354,grad_norm: 0.9999991164215543, iteration: 75709
loss: 0.9967967867851257,grad_norm: 0.9999990089218036, iteration: 75710
loss: 0.9505863785743713,grad_norm: 0.9999990384603891, iteration: 75711
loss: 1.003100872039795,grad_norm: 0.9999992314364946, iteration: 75712
loss: 0.9917036890983582,grad_norm: 0.999999318008986, iteration: 75713
loss: 0.9741733074188232,grad_norm: 0.9999991021853741, iteration: 75714
loss: 0.9579553008079529,grad_norm: 0.9978154964361833, iteration: 75715
loss: 0.9824197292327881,grad_norm: 0.9846139810447895, iteration: 75716
loss: 0.9679237604141235,grad_norm: 0.9999990779167471, iteration: 75717
loss: 0.9984801411628723,grad_norm: 0.9999990996647428, iteration: 75718
loss: 1.055668592453003,grad_norm: 0.9999999633472565, iteration: 75719
loss: 1.0191340446472168,grad_norm: 0.9614037056023542, iteration: 75720
loss: 0.9907077550888062,grad_norm: 0.9999999706018894, iteration: 75721
loss: 1.039061427116394,grad_norm: 0.9999992293252931, iteration: 75722
loss: 0.9871724843978882,grad_norm: 0.9999991917331168, iteration: 75723
loss: 1.0589637756347656,grad_norm: 0.999999745406166, iteration: 75724
loss: 1.0110656023025513,grad_norm: 0.9999993032117563, iteration: 75725
loss: 1.0040405988693237,grad_norm: 0.9999990064849603, iteration: 75726
loss: 1.0407713651657104,grad_norm: 0.8979003381283477, iteration: 75727
loss: 1.0115734338760376,grad_norm: 0.9999991351533735, iteration: 75728
loss: 0.996820867061615,grad_norm: 0.9548241986947045, iteration: 75729
loss: 0.9913633465766907,grad_norm: 0.8875266826755484, iteration: 75730
loss: 1.0060930252075195,grad_norm: 0.9315630409916317, iteration: 75731
loss: 1.006538987159729,grad_norm: 0.9999991475369817, iteration: 75732
loss: 1.146227240562439,grad_norm: 0.9999995406270189, iteration: 75733
loss: 0.9791390299797058,grad_norm: 0.9999990659470235, iteration: 75734
loss: 1.0411039590835571,grad_norm: 0.9999991159857072, iteration: 75735
loss: 1.0327092409133911,grad_norm: 0.999999187898452, iteration: 75736
loss: 1.0127382278442383,grad_norm: 0.9284140044584825, iteration: 75737
loss: 1.0064644813537598,grad_norm: 0.9619844443556266, iteration: 75738
loss: 1.0004949569702148,grad_norm: 0.9999991556992999, iteration: 75739
loss: 1.0010124444961548,grad_norm: 0.9999991009835082, iteration: 75740
loss: 1.0079309940338135,grad_norm: 0.9993904117306691, iteration: 75741
loss: 1.0028178691864014,grad_norm: 0.9999991452637405, iteration: 75742
loss: 1.040826678276062,grad_norm: 0.9999992742512213, iteration: 75743
loss: 0.9848049879074097,grad_norm: 0.999999112639904, iteration: 75744
loss: 0.9995586276054382,grad_norm: 0.9999991399245923, iteration: 75745
loss: 1.0383341312408447,grad_norm: 0.9999991545702778, iteration: 75746
loss: 1.0239691734313965,grad_norm: 0.9999990085314849, iteration: 75747
loss: 0.9872756004333496,grad_norm: 0.9275188382298853, iteration: 75748
loss: 0.9986596703529358,grad_norm: 0.9999989597045145, iteration: 75749
loss: 0.983766496181488,grad_norm: 0.8518152540474725, iteration: 75750
loss: 1.020303726196289,grad_norm: 0.9999992310516896, iteration: 75751
loss: 1.0497007369995117,grad_norm: 0.9999990353438445, iteration: 75752
loss: 0.9910743236541748,grad_norm: 0.999999102548529, iteration: 75753
loss: 1.0196037292480469,grad_norm: 0.9999991840421735, iteration: 75754
loss: 0.988408625125885,grad_norm: 0.9999991738943003, iteration: 75755
loss: 0.9819188117980957,grad_norm: 0.9780044501177282, iteration: 75756
loss: 0.9951513409614563,grad_norm: 0.9556662656977688, iteration: 75757
loss: 1.0070106983184814,grad_norm: 0.9999993133614002, iteration: 75758
loss: 1.0352293252944946,grad_norm: 0.9999997305650468, iteration: 75759
loss: 0.9980747699737549,grad_norm: 0.9903207652711917, iteration: 75760
loss: 1.0246877670288086,grad_norm: 0.9999990201692515, iteration: 75761
loss: 1.010346531867981,grad_norm: 0.9366272680234132, iteration: 75762
loss: 1.002577543258667,grad_norm: 0.9999990820578432, iteration: 75763
loss: 0.9841607213020325,grad_norm: 0.9482973422926807, iteration: 75764
loss: 0.9882896542549133,grad_norm: 0.8898539905220006, iteration: 75765
loss: 0.9794179797172546,grad_norm: 0.9999992722720142, iteration: 75766
loss: 1.0164507627487183,grad_norm: 0.951272147436646, iteration: 75767
loss: 1.0260967016220093,grad_norm: 0.9715302068476915, iteration: 75768
loss: 1.0303871631622314,grad_norm: 0.9957379785859474, iteration: 75769
loss: 0.984167218208313,grad_norm: 0.9681940846642599, iteration: 75770
loss: 1.024702548980713,grad_norm: 0.8551252398851601, iteration: 75771
loss: 1.009609580039978,grad_norm: 0.9999990525677264, iteration: 75772
loss: 1.0228581428527832,grad_norm: 0.9999990185704093, iteration: 75773
loss: 1.0631232261657715,grad_norm: 0.9999994768352312, iteration: 75774
loss: 1.0087136030197144,grad_norm: 0.9999990400930888, iteration: 75775
loss: 0.988589882850647,grad_norm: 0.9999991101277063, iteration: 75776
loss: 0.9931989312171936,grad_norm: 0.9683010453091314, iteration: 75777
loss: 0.9826658964157104,grad_norm: 0.9589767682690898, iteration: 75778
loss: 0.9910051822662354,grad_norm: 0.8646978097188733, iteration: 75779
loss: 1.025518774986267,grad_norm: 0.9057107300785217, iteration: 75780
loss: 1.0329216718673706,grad_norm: 0.9999991765350447, iteration: 75781
loss: 0.9790241718292236,grad_norm: 0.9670843195250728, iteration: 75782
loss: 0.9977543950080872,grad_norm: 0.9541830913019091, iteration: 75783
loss: 1.0306148529052734,grad_norm: 0.9342582293739817, iteration: 75784
loss: 1.0124003887176514,grad_norm: 0.9999993010401834, iteration: 75785
loss: 1.0304938554763794,grad_norm: 0.9999991791707675, iteration: 75786
loss: 1.0284624099731445,grad_norm: 0.8874498733451832, iteration: 75787
loss: 1.0087648630142212,grad_norm: 0.999999037003431, iteration: 75788
loss: 1.001572608947754,grad_norm: 0.9999991898687277, iteration: 75789
loss: 0.9848968982696533,grad_norm: 0.9215498629132287, iteration: 75790
loss: 1.002623438835144,grad_norm: 0.9999990462543843, iteration: 75791
loss: 0.9935215711593628,grad_norm: 0.9999990336912257, iteration: 75792
loss: 1.008438229560852,grad_norm: 0.9999993539831415, iteration: 75793
loss: 0.9781646728515625,grad_norm: 0.9999991633604892, iteration: 75794
loss: 0.9949671030044556,grad_norm: 0.950844083663558, iteration: 75795
loss: 1.0139683485031128,grad_norm: 0.9409605557052286, iteration: 75796
loss: 0.985362708568573,grad_norm: 0.9999991214671379, iteration: 75797
loss: 1.0026764869689941,grad_norm: 0.9322101979739938, iteration: 75798
loss: 0.989320695400238,grad_norm: 0.9999991272303025, iteration: 75799
loss: 0.9920788407325745,grad_norm: 0.9999990262831205, iteration: 75800
loss: 0.9935353398323059,grad_norm: 0.9856161558677649, iteration: 75801
loss: 0.9812370538711548,grad_norm: 0.9646399492628396, iteration: 75802
loss: 1.013300895690918,grad_norm: 0.8964084233060519, iteration: 75803
loss: 0.9774051904678345,grad_norm: 0.9999992006343053, iteration: 75804
loss: 0.9913244247436523,grad_norm: 0.9999991159458843, iteration: 75805
loss: 0.9943820238113403,grad_norm: 0.8297784287746857, iteration: 75806
loss: 0.9762807488441467,grad_norm: 0.9999991389307044, iteration: 75807
loss: 0.9877958297729492,grad_norm: 0.9999991383629443, iteration: 75808
loss: 0.9864342212677002,grad_norm: 0.9999991312079722, iteration: 75809
loss: 0.9828696846961975,grad_norm: 0.9999991061518637, iteration: 75810
loss: 0.9966968297958374,grad_norm: 0.9999991721266659, iteration: 75811
loss: 0.9747054576873779,grad_norm: 0.99999917101401, iteration: 75812
loss: 1.0375629663467407,grad_norm: 0.9999991345230781, iteration: 75813
loss: 0.9985604286193848,grad_norm: 0.9557592535088153, iteration: 75814
loss: 0.9991387724876404,grad_norm: 0.9999992346865364, iteration: 75815
loss: 1.016178011894226,grad_norm: 0.9999991366924067, iteration: 75816
loss: 1.0038825273513794,grad_norm: 0.9999990692820442, iteration: 75817
loss: 1.0164326429367065,grad_norm: 0.9999990999444525, iteration: 75818
loss: 1.010284423828125,grad_norm: 0.9999991265828441, iteration: 75819
loss: 1.0388028621673584,grad_norm: 0.9999993273861144, iteration: 75820
loss: 0.9791519045829773,grad_norm: 0.9999990259770855, iteration: 75821
loss: 0.9863549470901489,grad_norm: 0.9009510445874704, iteration: 75822
loss: 1.0220893621444702,grad_norm: 0.9999990381043994, iteration: 75823
loss: 0.9361695051193237,grad_norm: 0.9999991289968727, iteration: 75824
loss: 1.0216064453125,grad_norm: 0.9999993030137379, iteration: 75825
loss: 1.0224804878234863,grad_norm: 0.9999990295292404, iteration: 75826
loss: 0.9846871495246887,grad_norm: 0.9999993296398119, iteration: 75827
loss: 0.9686832427978516,grad_norm: 0.8939722911885, iteration: 75828
loss: 1.0118098258972168,grad_norm: 0.9999993272755058, iteration: 75829
loss: 1.004990816116333,grad_norm: 0.9641643891305928, iteration: 75830
loss: 1.0026330947875977,grad_norm: 0.9999989393652476, iteration: 75831
loss: 1.0308159589767456,grad_norm: 0.999999252617689, iteration: 75832
loss: 1.0031824111938477,grad_norm: 0.9999991797782716, iteration: 75833
loss: 0.960599422454834,grad_norm: 0.9999991559452983, iteration: 75834
loss: 0.9837071895599365,grad_norm: 0.9999989770984649, iteration: 75835
loss: 0.9929535984992981,grad_norm: 0.9999990839622703, iteration: 75836
loss: 1.0314311981201172,grad_norm: 0.9999991604095239, iteration: 75837
loss: 1.001495122909546,grad_norm: 0.99999942478128, iteration: 75838
loss: 1.0175790786743164,grad_norm: 0.999999153704543, iteration: 75839
loss: 0.9750153422355652,grad_norm: 0.9999992801938808, iteration: 75840
loss: 1.0061348676681519,grad_norm: 0.9999991632672726, iteration: 75841
loss: 0.9829578399658203,grad_norm: 0.9999990319696941, iteration: 75842
loss: 1.0439119338989258,grad_norm: 0.9999992544626148, iteration: 75843
loss: 0.9821087121963501,grad_norm: 0.9999990358082619, iteration: 75844
loss: 1.0133756399154663,grad_norm: 0.8999872878355121, iteration: 75845
loss: 0.9821422100067139,grad_norm: 0.9999989647770181, iteration: 75846
loss: 0.9622583389282227,grad_norm: 0.9881099827643112, iteration: 75847
loss: 0.9944965839385986,grad_norm: 0.987370698264255, iteration: 75848
loss: 0.9778091311454773,grad_norm: 0.9888268161579928, iteration: 75849
loss: 0.9469252824783325,grad_norm: 0.9999991466246032, iteration: 75850
loss: 1.0354719161987305,grad_norm: 0.9999993091824313, iteration: 75851
loss: 0.986325204372406,grad_norm: 0.9999990271481229, iteration: 75852
loss: 0.9490472674369812,grad_norm: 0.9098099129582987, iteration: 75853
loss: 0.9661325812339783,grad_norm: 0.9999993716066915, iteration: 75854
loss: 1.0215415954589844,grad_norm: 0.9999991910502332, iteration: 75855
loss: 0.997556209564209,grad_norm: 0.9999990626806201, iteration: 75856
loss: 1.0046495199203491,grad_norm: 0.9999991572217108, iteration: 75857
loss: 1.0303926467895508,grad_norm: 0.9999991068624647, iteration: 75858
loss: 0.999062716960907,grad_norm: 0.9999992329706522, iteration: 75859
loss: 0.9867475628852844,grad_norm: 0.999999214362936, iteration: 75860
loss: 1.0151349306106567,grad_norm: 0.9999995484908634, iteration: 75861
loss: 1.127529501914978,grad_norm: 0.9208515808565194, iteration: 75862
loss: 0.9786645174026489,grad_norm: 0.8853801562254154, iteration: 75863
loss: 0.9902758598327637,grad_norm: 0.9999992635814154, iteration: 75864
loss: 0.993150532245636,grad_norm: 0.990857506532482, iteration: 75865
loss: 1.0255368947982788,grad_norm: 0.9677837104148872, iteration: 75866
loss: 1.006423830986023,grad_norm: 0.9229784032038881, iteration: 75867
loss: 1.0045976638793945,grad_norm: 0.9999992482135126, iteration: 75868
loss: 1.0068039894104004,grad_norm: 0.900996823326582, iteration: 75869
loss: 0.9652748703956604,grad_norm: 0.9319733454300742, iteration: 75870
loss: 1.0140979290008545,grad_norm: 0.9999991595051132, iteration: 75871
loss: 1.0293396711349487,grad_norm: 0.999998892038023, iteration: 75872
loss: 1.0331093072891235,grad_norm: 0.9999991534727105, iteration: 75873
loss: 0.996426522731781,grad_norm: 0.8511004447091978, iteration: 75874
loss: 0.9708197712898254,grad_norm: 0.9999991785895629, iteration: 75875
loss: 1.0073562860488892,grad_norm: 0.9999990676555803, iteration: 75876
loss: 0.9957175850868225,grad_norm: 0.9999994588016484, iteration: 75877
loss: 1.0180158615112305,grad_norm: 0.9999990934101481, iteration: 75878
loss: 1.0128782987594604,grad_norm: 0.9999991995333932, iteration: 75879
loss: 1.0420416593551636,grad_norm: 0.9999995838747091, iteration: 75880
loss: 1.000482439994812,grad_norm: 0.9944375076241079, iteration: 75881
loss: 0.9882746934890747,grad_norm: 0.9999991794199447, iteration: 75882
loss: 1.0090527534484863,grad_norm: 0.9138922300620727, iteration: 75883
loss: 0.9947851300239563,grad_norm: 0.8434890869731806, iteration: 75884
loss: 0.9883825182914734,grad_norm: 0.9817591419300261, iteration: 75885
loss: 0.9663509726524353,grad_norm: 0.9999991776207228, iteration: 75886
loss: 1.0333791971206665,grad_norm: 0.9999992740046869, iteration: 75887
loss: 1.0360586643218994,grad_norm: 0.9999992196865101, iteration: 75888
loss: 0.9870164394378662,grad_norm: 0.999999129405593, iteration: 75889
loss: 1.0080054998397827,grad_norm: 0.9999990997570181, iteration: 75890
loss: 0.992935836315155,grad_norm: 0.9999991899207858, iteration: 75891
loss: 0.997958242893219,grad_norm: 0.9999994261792161, iteration: 75892
loss: 1.0068954229354858,grad_norm: 0.9680284363216962, iteration: 75893
loss: 0.9894710779190063,grad_norm: 0.9999991639814838, iteration: 75894
loss: 1.03420889377594,grad_norm: 0.9999998191069346, iteration: 75895
loss: 1.0217101573944092,grad_norm: 0.9999994656391084, iteration: 75896
loss: 0.947150468826294,grad_norm: 0.8846129939702839, iteration: 75897
loss: 1.0410923957824707,grad_norm: 0.9999994606797024, iteration: 75898
loss: 0.9795013666152954,grad_norm: 0.9999990565453768, iteration: 75899
loss: 1.0173448324203491,grad_norm: 0.9999991968032571, iteration: 75900
loss: 1.0138587951660156,grad_norm: 0.999999047136119, iteration: 75901
loss: 1.0069832801818848,grad_norm: 0.9999990612158544, iteration: 75902
loss: 0.9706716537475586,grad_norm: 0.9999993204857375, iteration: 75903
loss: 0.995680034160614,grad_norm: 0.999999182372354, iteration: 75904
loss: 1.0041581392288208,grad_norm: 0.9999992285489375, iteration: 75905
loss: 1.0017032623291016,grad_norm: 0.9999991457886263, iteration: 75906
loss: 0.9802165627479553,grad_norm: 0.9999989994726758, iteration: 75907
loss: 1.0002514123916626,grad_norm: 0.9999991484384099, iteration: 75908
loss: 1.0157182216644287,grad_norm: 0.9999991821368113, iteration: 75909
loss: 0.9957184195518494,grad_norm: 0.9999990290167196, iteration: 75910
loss: 0.9588661789894104,grad_norm: 0.9324294942035661, iteration: 75911
loss: 1.0275955200195312,grad_norm: 0.999999157528836, iteration: 75912
loss: 0.9963591694831848,grad_norm: 0.9999991056914495, iteration: 75913
loss: 0.988532304763794,grad_norm: 0.9999990674785818, iteration: 75914
loss: 0.9856455326080322,grad_norm: 0.9812821965686924, iteration: 75915
loss: 0.9589130878448486,grad_norm: 0.9999989698104472, iteration: 75916
loss: 1.035946011543274,grad_norm: 0.9999994236856173, iteration: 75917
loss: 1.0068156719207764,grad_norm: 0.911400467136078, iteration: 75918
loss: 0.9949816465377808,grad_norm: 0.9999989852535954, iteration: 75919
loss: 1.0158089399337769,grad_norm: 0.9999990036552191, iteration: 75920
loss: 1.026465654373169,grad_norm: 0.9999992852065317, iteration: 75921
loss: 1.0111970901489258,grad_norm: 0.9901648569136601, iteration: 75922
loss: 1.0326803922653198,grad_norm: 0.9999991136175047, iteration: 75923
loss: 1.0241632461547852,grad_norm: 0.999998982641425, iteration: 75924
loss: 0.994636058807373,grad_norm: 0.999999161220544, iteration: 75925
loss: 1.0263742208480835,grad_norm: 0.999999246596546, iteration: 75926
loss: 1.0158026218414307,grad_norm: 0.9872564587373073, iteration: 75927
loss: 1.0193151235580444,grad_norm: 0.9590559615433988, iteration: 75928
loss: 1.0083410739898682,grad_norm: 0.9999991682437774, iteration: 75929
loss: 1.0144168138504028,grad_norm: 0.9999990519945507, iteration: 75930
loss: 0.9784587621688843,grad_norm: 0.8158044758002139, iteration: 75931
loss: 0.9831165075302124,grad_norm: 0.96013917424291, iteration: 75932
loss: 1.0042803287506104,grad_norm: 0.9999994443203291, iteration: 75933
loss: 0.9701095819473267,grad_norm: 0.9999990903516879, iteration: 75934
loss: 1.0291857719421387,grad_norm: 0.9999996221431077, iteration: 75935
loss: 0.97965407371521,grad_norm: 0.9999991357877256, iteration: 75936
loss: 1.0006059408187866,grad_norm: 0.9999991673760936, iteration: 75937
loss: 1.0068272352218628,grad_norm: 0.9999990697878105, iteration: 75938
loss: 0.9793837070465088,grad_norm: 0.9063279091836435, iteration: 75939
loss: 1.049406886100769,grad_norm: 0.955739265041919, iteration: 75940
loss: 1.0365222692489624,grad_norm: 0.9999991844522362, iteration: 75941
loss: 0.9622170329093933,grad_norm: 0.9999993041931313, iteration: 75942
loss: 1.0252572298049927,grad_norm: 0.8892443782990226, iteration: 75943
loss: 1.0028586387634277,grad_norm: 0.9999991438505293, iteration: 75944
loss: 1.0428030490875244,grad_norm: 0.9999991026406897, iteration: 75945
loss: 1.0387048721313477,grad_norm: 0.9999994542113463, iteration: 75946
loss: 0.9551519155502319,grad_norm: 0.9175724173711258, iteration: 75947
loss: 0.9993261694908142,grad_norm: 0.8437968275408009, iteration: 75948
loss: 1.0358954668045044,grad_norm: 0.9139870869740473, iteration: 75949
loss: 0.9856241941452026,grad_norm: 0.8635066173812096, iteration: 75950
loss: 1.0147682428359985,grad_norm: 0.9318750661390683, iteration: 75951
loss: 1.0033739805221558,grad_norm: 0.9999992532035903, iteration: 75952
loss: 0.9856575727462769,grad_norm: 0.9999992336383254, iteration: 75953
loss: 0.9730030298233032,grad_norm: 0.9999992128620088, iteration: 75954
loss: 1.0331603288650513,grad_norm: 0.9999991319189967, iteration: 75955
loss: 0.9944627285003662,grad_norm: 0.8987106825710085, iteration: 75956
loss: 1.0329480171203613,grad_norm: 0.9999991776595943, iteration: 75957
loss: 0.9707644581794739,grad_norm: 0.9673551539487473, iteration: 75958
loss: 0.9756754040718079,grad_norm: 0.9698934349537174, iteration: 75959
loss: 1.0331758260726929,grad_norm: 0.9445837927634204, iteration: 75960
loss: 1.0319596529006958,grad_norm: 0.9999989314143292, iteration: 75961
loss: 1.0090866088867188,grad_norm: 0.9999989614242211, iteration: 75962
loss: 1.0464707612991333,grad_norm: 0.9999992127534983, iteration: 75963
loss: 0.9982337951660156,grad_norm: 0.9999994530526755, iteration: 75964
loss: 1.0020962953567505,grad_norm: 0.9999990658572776, iteration: 75965
loss: 1.0019936561584473,grad_norm: 0.9999990094778858, iteration: 75966
loss: 1.0129183530807495,grad_norm: 0.8596045641255049, iteration: 75967
loss: 1.021159052848816,grad_norm: 0.9999992750202599, iteration: 75968
loss: 0.9437543153762817,grad_norm: 0.9999994081285858, iteration: 75969
loss: 1.006941556930542,grad_norm: 0.9591179154636411, iteration: 75970
loss: 1.0256236791610718,grad_norm: 0.9999990745655819, iteration: 75971
loss: 1.0339441299438477,grad_norm: 0.9999996495665995, iteration: 75972
loss: 1.0232526063919067,grad_norm: 0.9999990927229153, iteration: 75973
loss: 0.9776470065116882,grad_norm: 0.999999279683795, iteration: 75974
loss: 1.0128940343856812,grad_norm: 0.9999989842741269, iteration: 75975
loss: 0.9832336902618408,grad_norm: 0.9381583485018137, iteration: 75976
loss: 1.0066866874694824,grad_norm: 0.99999913530281, iteration: 75977
loss: 0.9827547073364258,grad_norm: 0.999999012062221, iteration: 75978
loss: 1.0069304704666138,grad_norm: 0.9999992340254594, iteration: 75979
loss: 1.0207650661468506,grad_norm: 0.9999991879854918, iteration: 75980
loss: 0.9749190807342529,grad_norm: 0.9925107703787631, iteration: 75981
loss: 1.0014702081680298,grad_norm: 0.999999115262641, iteration: 75982
loss: 0.9935246706008911,grad_norm: 0.9999990516571293, iteration: 75983
loss: 1.0015429258346558,grad_norm: 0.9999989286293289, iteration: 75984
loss: 1.0173094272613525,grad_norm: 0.9999991994525363, iteration: 75985
loss: 1.0050004720687866,grad_norm: 0.9728193722541669, iteration: 75986
loss: 0.9882537126541138,grad_norm: 0.999999093789068, iteration: 75987
loss: 0.9632822275161743,grad_norm: 0.956383936503129, iteration: 75988
loss: 1.017155408859253,grad_norm: 0.9999990625103035, iteration: 75989
loss: 1.027248501777649,grad_norm: 0.999999068339979, iteration: 75990
loss: 1.018880009651184,grad_norm: 0.9999993110665509, iteration: 75991
loss: 1.0003671646118164,grad_norm: 0.9999994086702271, iteration: 75992
loss: 0.9955490827560425,grad_norm: 0.9999991338584778, iteration: 75993
loss: 0.9888888001441956,grad_norm: 0.9999990511422463, iteration: 75994
loss: 1.001941442489624,grad_norm: 0.9999990973338125, iteration: 75995
loss: 1.012083888053894,grad_norm: 0.8539594086465776, iteration: 75996
loss: 1.0489776134490967,grad_norm: 0.9999989471076098, iteration: 75997
loss: 1.0090768337249756,grad_norm: 0.8395971455299528, iteration: 75998
loss: 1.0194529294967651,grad_norm: 0.9999989075422175, iteration: 75999
loss: 0.9901913404464722,grad_norm: 0.999999094513117, iteration: 76000
loss: 1.0217169523239136,grad_norm: 0.99999906178995, iteration: 76001
loss: 1.1086297035217285,grad_norm: 0.9999996696073074, iteration: 76002
loss: 0.997898280620575,grad_norm: 0.9999992559101227, iteration: 76003
loss: 1.0912834405899048,grad_norm: 0.9999993750305232, iteration: 76004
loss: 1.0198886394500732,grad_norm: 0.9999991129970497, iteration: 76005
loss: 0.9867532849311829,grad_norm: 0.9999990893495978, iteration: 76006
loss: 0.9592081904411316,grad_norm: 0.9369577301687716, iteration: 76007
loss: 0.964798092842102,grad_norm: 0.9999993183704565, iteration: 76008
loss: 1.0084095001220703,grad_norm: 0.9872209463354612, iteration: 76009
loss: 0.9849967360496521,grad_norm: 0.9999991454942364, iteration: 76010
loss: 0.9913159608840942,grad_norm: 0.9999991922848875, iteration: 76011
loss: 0.9782794713973999,grad_norm: 0.9999989714145784, iteration: 76012
loss: 1.044960856437683,grad_norm: 0.9999995850899875, iteration: 76013
loss: 1.0488649606704712,grad_norm: 0.9999993196125251, iteration: 76014
loss: 0.9946673512458801,grad_norm: 0.9004414839483893, iteration: 76015
loss: 1.0135703086853027,grad_norm: 0.988349357158824, iteration: 76016
loss: 1.0074535608291626,grad_norm: 0.9999993929361547, iteration: 76017
loss: 0.980738639831543,grad_norm: 0.9200907771977465, iteration: 76018
loss: 0.9741041660308838,grad_norm: 0.9999991415709736, iteration: 76019
loss: 0.9796040654182434,grad_norm: 0.9179582217576278, iteration: 76020
loss: 1.0352977514266968,grad_norm: 0.9999991295629377, iteration: 76021
loss: 0.9972880482673645,grad_norm: 0.9868969099769175, iteration: 76022
loss: 1.0203869342803955,grad_norm: 0.9999991499359984, iteration: 76023
loss: 0.9757334589958191,grad_norm: 0.9999991309661865, iteration: 76024
loss: 1.052092432975769,grad_norm: 0.9999992512169044, iteration: 76025
loss: 0.9977598786354065,grad_norm: 0.8686897516164473, iteration: 76026
loss: 0.9521785974502563,grad_norm: 0.9598210029626763, iteration: 76027
loss: 1.0394634008407593,grad_norm: 0.9999992566716482, iteration: 76028
loss: 0.9899758100509644,grad_norm: 0.9999991551430824, iteration: 76029
loss: 1.0214089155197144,grad_norm: 0.9335532863360763, iteration: 76030
loss: 1.0066472291946411,grad_norm: 0.9999994924997808, iteration: 76031
loss: 1.0305553674697876,grad_norm: 0.9999998706323023, iteration: 76032
loss: 1.010736346244812,grad_norm: 0.999999065195305, iteration: 76033
loss: 0.9995818138122559,grad_norm: 0.999999464989128, iteration: 76034
loss: 1.0195915699005127,grad_norm: 0.9737237950624977, iteration: 76035
loss: 1.0814825296401978,grad_norm: 0.9999992563338181, iteration: 76036
loss: 0.995058000087738,grad_norm: 0.8002562844218682, iteration: 76037
loss: 0.9733948707580566,grad_norm: 0.9806644888376045, iteration: 76038
loss: 0.992332398891449,grad_norm: 0.9999990618015645, iteration: 76039
loss: 1.0404598712921143,grad_norm: 0.9999998032999832, iteration: 76040
loss: 1.0262243747711182,grad_norm: 0.9881032523092393, iteration: 76041
loss: 0.9987550973892212,grad_norm: 0.9999990494750979, iteration: 76042
loss: 1.0163884162902832,grad_norm: 0.9999993872788263, iteration: 76043
loss: 0.9543250799179077,grad_norm: 0.9687615130394008, iteration: 76044
loss: 1.0417572259902954,grad_norm: 0.9999992339658278, iteration: 76045
loss: 0.9796335101127625,grad_norm: 0.9999989471406112, iteration: 76046
loss: 1.0060960054397583,grad_norm: 0.9999991003509211, iteration: 76047
loss: 1.0441776514053345,grad_norm: 0.9999991275549879, iteration: 76048
loss: 0.9988813996315002,grad_norm: 0.8617337966566375, iteration: 76049
loss: 0.9752236008644104,grad_norm: 0.9999989961771251, iteration: 76050
loss: 1.0209285020828247,grad_norm: 0.9999991021182013, iteration: 76051
loss: 1.0082935094833374,grad_norm: 0.9999992114372632, iteration: 76052
loss: 0.986602246761322,grad_norm: 0.9999992191380688, iteration: 76053
loss: 0.9787804484367371,grad_norm: 0.9791547372524665, iteration: 76054
loss: 1.007459044456482,grad_norm: 0.9317314277724423, iteration: 76055
loss: 1.0302281379699707,grad_norm: 0.9999992654885891, iteration: 76056
loss: 1.0305547714233398,grad_norm: 0.9999991654044746, iteration: 76057
loss: 0.982639491558075,grad_norm: 0.9673064969052534, iteration: 76058
loss: 0.9769687056541443,grad_norm: 0.9685330326332303, iteration: 76059
loss: 1.0224647521972656,grad_norm: 0.999999116509252, iteration: 76060
loss: 1.0119580030441284,grad_norm: 0.9999994722968942, iteration: 76061
loss: 0.9601035714149475,grad_norm: 0.999999201780081, iteration: 76062
loss: 0.9919371604919434,grad_norm: 0.999999125248657, iteration: 76063
loss: 0.9845794439315796,grad_norm: 0.9999991788509835, iteration: 76064
loss: 1.0142072439193726,grad_norm: 0.98104939631318, iteration: 76065
loss: 1.0659576654434204,grad_norm: 0.9261967530975972, iteration: 76066
loss: 1.0057240724563599,grad_norm: 0.9999991078803455, iteration: 76067
loss: 0.9898812174797058,grad_norm: 0.9999997873375279, iteration: 76068
loss: 0.9430772662162781,grad_norm: 0.9999992632998813, iteration: 76069
loss: 1.0121761560440063,grad_norm: 0.9999990146925473, iteration: 76070
loss: 0.9838943481445312,grad_norm: 0.9999992106312099, iteration: 76071
loss: 1.060984492301941,grad_norm: 0.9999996150119336, iteration: 76072
loss: 1.0059232711791992,grad_norm: 0.9999991996168374, iteration: 76073
loss: 0.9856978058815002,grad_norm: 0.9685326157149377, iteration: 76074
loss: 0.9878684878349304,grad_norm: 0.9100479073959834, iteration: 76075
loss: 1.0121283531188965,grad_norm: 0.9999992730476401, iteration: 76076
loss: 1.0256602764129639,grad_norm: 0.9999993093620464, iteration: 76077
loss: 1.0117216110229492,grad_norm: 0.9204594838816161, iteration: 76078
loss: 0.989698052406311,grad_norm: 0.9475635824923624, iteration: 76079
loss: 1.0193650722503662,grad_norm: 0.9999994246789538, iteration: 76080
loss: 0.9648473262786865,grad_norm: 0.9999992324482272, iteration: 76081
loss: 1.000970482826233,grad_norm: 0.9999992663610621, iteration: 76082
loss: 0.9831814169883728,grad_norm: 0.9999997357499248, iteration: 76083
loss: 1.010326623916626,grad_norm: 0.9999990365708148, iteration: 76084
loss: 0.9739084243774414,grad_norm: 0.9793994127938455, iteration: 76085
loss: 0.9861675500869751,grad_norm: 0.9999990512010611, iteration: 76086
loss: 0.9924520254135132,grad_norm: 0.9999990460103747, iteration: 76087
loss: 1.0309542417526245,grad_norm: 0.9999991356162397, iteration: 76088
loss: 1.0330687761306763,grad_norm: 0.9999992451013041, iteration: 76089
loss: 1.026364803314209,grad_norm: 0.9999991522865715, iteration: 76090
loss: 0.9895212054252625,grad_norm: 0.9656605691329626, iteration: 76091
loss: 1.0005855560302734,grad_norm: 0.9999993325437224, iteration: 76092
loss: 0.9988247752189636,grad_norm: 0.999999384523992, iteration: 76093
loss: 1.0058648586273193,grad_norm: 0.9999991573951654, iteration: 76094
loss: 1.0129368305206299,grad_norm: 0.9999994394336585, iteration: 76095
loss: 0.9633790850639343,grad_norm: 0.9999990836931666, iteration: 76096
loss: 1.0041937828063965,grad_norm: 0.8741567502835229, iteration: 76097
loss: 1.0243282318115234,grad_norm: 0.9999991071520795, iteration: 76098
loss: 0.9718865156173706,grad_norm: 0.999999176034206, iteration: 76099
loss: 1.0012378692626953,grad_norm: 0.9999990616614558, iteration: 76100
loss: 0.9750279784202576,grad_norm: 0.9084601533773572, iteration: 76101
loss: 1.0164326429367065,grad_norm: 0.98163864453209, iteration: 76102
loss: 0.9781680107116699,grad_norm: 0.999999104321113, iteration: 76103
loss: 1.0116052627563477,grad_norm: 0.8636986720711343, iteration: 76104
loss: 0.9829192161560059,grad_norm: 0.9999997778386565, iteration: 76105
loss: 1.0250085592269897,grad_norm: 0.9999989770521648, iteration: 76106
loss: 0.9592753648757935,grad_norm: 0.8621793770533965, iteration: 76107
loss: 0.9984715580940247,grad_norm: 0.9999991194448241, iteration: 76108
loss: 0.9632233381271362,grad_norm: 0.9999989466125815, iteration: 76109
loss: 1.0219320058822632,grad_norm: 0.9999991302193818, iteration: 76110
loss: 1.0065804719924927,grad_norm: 0.9959812476650077, iteration: 76111
loss: 1.027166485786438,grad_norm: 0.9999992153420781, iteration: 76112
loss: 1.0201197862625122,grad_norm: 0.999999066249184, iteration: 76113
loss: 1.0642222166061401,grad_norm: 0.9999992499357312, iteration: 76114
loss: 0.9615947604179382,grad_norm: 0.9568490541571162, iteration: 76115
loss: 0.9485047459602356,grad_norm: 0.8791068052474825, iteration: 76116
loss: 1.0000966787338257,grad_norm: 0.9977947767476996, iteration: 76117
loss: 1.0172309875488281,grad_norm: 0.9081182042830803, iteration: 76118
loss: 1.0125077962875366,grad_norm: 0.9999990656911245, iteration: 76119
loss: 0.9990755319595337,grad_norm: 0.9999992505596464, iteration: 76120
loss: 0.9938442707061768,grad_norm: 0.9999989858148288, iteration: 76121
loss: 0.9906710982322693,grad_norm: 0.9150457591168114, iteration: 76122
loss: 1.0110338926315308,grad_norm: 0.9999990959446031, iteration: 76123
loss: 1.0248438119888306,grad_norm: 0.9810651320099405, iteration: 76124
loss: 1.0119893550872803,grad_norm: 0.9968189766225355, iteration: 76125
loss: 1.0008596181869507,grad_norm: 0.9999992978076168, iteration: 76126
loss: 0.9784823656082153,grad_norm: 0.9999991520335328, iteration: 76127
loss: 1.0388091802597046,grad_norm: 0.9999991585106645, iteration: 76128
loss: 0.9935945868492126,grad_norm: 0.9999994584054224, iteration: 76129
loss: 1.0129203796386719,grad_norm: 0.8874764282814646, iteration: 76130
loss: 0.9979537129402161,grad_norm: 0.9999991455795108, iteration: 76131
loss: 0.9954985976219177,grad_norm: 0.8643735535893537, iteration: 76132
loss: 0.9559811949729919,grad_norm: 0.95290968700922, iteration: 76133
loss: 1.001610517501831,grad_norm: 0.9820745381176321, iteration: 76134
loss: 0.9794570803642273,grad_norm: 0.9999990208127721, iteration: 76135
loss: 0.9979683756828308,grad_norm: 0.9999991618127637, iteration: 76136
loss: 0.9963120818138123,grad_norm: 0.999999198950823, iteration: 76137
loss: 1.0413694381713867,grad_norm: 0.9999992480588628, iteration: 76138
loss: 1.0151262283325195,grad_norm: 0.9999992527861732, iteration: 76139
loss: 1.120465636253357,grad_norm: 0.9999997985222294, iteration: 76140
loss: 1.0072118043899536,grad_norm: 0.8071671813917825, iteration: 76141
loss: 0.9825876355171204,grad_norm: 0.9740764556751599, iteration: 76142
loss: 1.017879843711853,grad_norm: 0.9999992445573263, iteration: 76143
loss: 1.0666850805282593,grad_norm: 0.9999995988812724, iteration: 76144
loss: 0.9980742931365967,grad_norm: 0.999999034552893, iteration: 76145
loss: 1.020219326019287,grad_norm: 0.9999991969667823, iteration: 76146
loss: 0.9691603779792786,grad_norm: 0.999999131706936, iteration: 76147
loss: 1.01241934299469,grad_norm: 0.9999991578773778, iteration: 76148
loss: 0.9716184735298157,grad_norm: 0.9930876590342748, iteration: 76149
loss: 0.9894036054611206,grad_norm: 0.9829080996563615, iteration: 76150
loss: 1.0104526281356812,grad_norm: 0.8805788576818216, iteration: 76151
loss: 1.0188119411468506,grad_norm: 0.9999991652697708, iteration: 76152
loss: 0.9998874664306641,grad_norm: 0.9824229432766576, iteration: 76153
loss: 0.9968956708908081,grad_norm: 0.9999992642454946, iteration: 76154
loss: 0.9771100282669067,grad_norm: 0.9999990507686521, iteration: 76155
loss: 1.0051394701004028,grad_norm: 0.9685356529785221, iteration: 76156
loss: 0.9634173512458801,grad_norm: 0.9999994206160214, iteration: 76157
loss: 1.038820505142212,grad_norm: 0.9999991936728857, iteration: 76158
loss: 0.9685645699501038,grad_norm: 0.9999990855093442, iteration: 76159
loss: 0.9863241314888,grad_norm: 0.9999991321935827, iteration: 76160
loss: 1.0140676498413086,grad_norm: 0.9999989309120051, iteration: 76161
loss: 1.0181169509887695,grad_norm: 0.9999991044953119, iteration: 76162
loss: 1.0004949569702148,grad_norm: 0.9999992573352018, iteration: 76163
loss: 1.087162971496582,grad_norm: 0.9999992734873007, iteration: 76164
loss: 0.9974974989891052,grad_norm: 0.9454288139887248, iteration: 76165
loss: 0.9884606003761292,grad_norm: 0.9999993261627481, iteration: 76166
loss: 0.9887754917144775,grad_norm: 0.9999991375450442, iteration: 76167
loss: 0.999819815158844,grad_norm: 0.9999991978482408, iteration: 76168
loss: 0.9810205101966858,grad_norm: 0.999999057274651, iteration: 76169
loss: 0.995954155921936,grad_norm: 0.9999992447903533, iteration: 76170
loss: 1.0116491317749023,grad_norm: 0.999999160975408, iteration: 76171
loss: 0.9455001354217529,grad_norm: 0.9999991375731918, iteration: 76172
loss: 0.9802093505859375,grad_norm: 0.9999991345175815, iteration: 76173
loss: 0.9844088554382324,grad_norm: 0.9159321954506301, iteration: 76174
loss: 0.97904372215271,grad_norm: 0.9886233482256079, iteration: 76175
loss: 1.00033700466156,grad_norm: 0.9999990498853155, iteration: 76176
loss: 1.009054183959961,grad_norm: 0.9999989809311148, iteration: 76177
loss: 0.9659152626991272,grad_norm: 0.9999990141353539, iteration: 76178
loss: 0.9852440357208252,grad_norm: 0.9999990245189282, iteration: 76179
loss: 0.9759354591369629,grad_norm: 0.9999992706240476, iteration: 76180
loss: 1.0420877933502197,grad_norm: 0.9999992101123624, iteration: 76181
loss: 1.0326180458068848,grad_norm: 0.9999991849595595, iteration: 76182
loss: 0.98210209608078,grad_norm: 0.999999134400095, iteration: 76183
loss: 0.9823182225227356,grad_norm: 0.9999991786796215, iteration: 76184
loss: 1.0209424495697021,grad_norm: 0.95226249144742, iteration: 76185
loss: 0.9978644251823425,grad_norm: 0.9999993549747132, iteration: 76186
loss: 1.0095053911209106,grad_norm: 0.9230170449394968, iteration: 76187
loss: 0.9817178249359131,grad_norm: 0.9999991650886703, iteration: 76188
loss: 0.9868813753128052,grad_norm: 0.999999322308857, iteration: 76189
loss: 1.0582947731018066,grad_norm: 0.9999990726776584, iteration: 76190
loss: 0.9888707995414734,grad_norm: 0.999999336617028, iteration: 76191
loss: 0.9734979867935181,grad_norm: 0.8924364070648776, iteration: 76192
loss: 0.987421989440918,grad_norm: 0.9999991320685516, iteration: 76193
loss: 0.991527795791626,grad_norm: 0.9906110366657621, iteration: 76194
loss: 0.9932572841644287,grad_norm: 0.9999991241188644, iteration: 76195
loss: 0.9740851521492004,grad_norm: 0.9999991633548423, iteration: 76196
loss: 0.9914478659629822,grad_norm: 0.9999998199232322, iteration: 76197
loss: 0.9738389849662781,grad_norm: 0.873457308443841, iteration: 76198
loss: 1.0262781381607056,grad_norm: 0.9999992221022941, iteration: 76199
loss: 1.0158058404922485,grad_norm: 0.9037504380362752, iteration: 76200
loss: 1.011433482170105,grad_norm: 0.934015838576326, iteration: 76201
loss: 1.0114835500717163,grad_norm: 0.9999989420410216, iteration: 76202
loss: 1.0065839290618896,grad_norm: 0.9999993275169299, iteration: 76203
loss: 0.9964427351951599,grad_norm: 0.9999990110402893, iteration: 76204
loss: 1.0214790105819702,grad_norm: 0.8304091373008388, iteration: 76205
loss: 1.0024528503417969,grad_norm: 0.9278254927476013, iteration: 76206
loss: 1.0121150016784668,grad_norm: 0.9490023965449225, iteration: 76207
loss: 0.9831321239471436,grad_norm: 0.9856797696444616, iteration: 76208
loss: 1.0028420686721802,grad_norm: 0.999999169968402, iteration: 76209
loss: 1.0183906555175781,grad_norm: 0.9999990814378465, iteration: 76210
loss: 0.9980283975601196,grad_norm: 0.9980227270437144, iteration: 76211
loss: 0.9834585189819336,grad_norm: 0.9793277288407268, iteration: 76212
loss: 1.0015785694122314,grad_norm: 0.9999989196559094, iteration: 76213
loss: 1.004423975944519,grad_norm: 0.9249907802107136, iteration: 76214
loss: 1.0384128093719482,grad_norm: 0.9999993002524332, iteration: 76215
loss: 1.0124565362930298,grad_norm: 0.9999991147565068, iteration: 76216
loss: 0.968333899974823,grad_norm: 0.9999991419338397, iteration: 76217
loss: 1.0332250595092773,grad_norm: 0.9999995163814477, iteration: 76218
loss: 1.0038529634475708,grad_norm: 0.9999991538471323, iteration: 76219
loss: 0.9864644408226013,grad_norm: 0.9923308405668824, iteration: 76220
loss: 0.9928000569343567,grad_norm: 0.9999991400206417, iteration: 76221
loss: 0.9968810677528381,grad_norm: 0.9999990551538595, iteration: 76222
loss: 1.028113603591919,grad_norm: 0.8665052490142235, iteration: 76223
loss: 1.1587613821029663,grad_norm: 0.9999992599173247, iteration: 76224
loss: 0.9850303530693054,grad_norm: 0.9999989419154857, iteration: 76225
loss: 1.0989125967025757,grad_norm: 0.9999996653784003, iteration: 76226
loss: 1.1126604080200195,grad_norm: 0.9999995734901893, iteration: 76227
loss: 1.0422664880752563,grad_norm: 0.9999995430429608, iteration: 76228
loss: 0.9473893046379089,grad_norm: 0.999999177131475, iteration: 76229
loss: 1.0018616914749146,grad_norm: 0.9999991180350382, iteration: 76230
loss: 1.0139425992965698,grad_norm: 0.9999992217552969, iteration: 76231
loss: 1.03467857837677,grad_norm: 0.999999060842818, iteration: 76232
loss: 0.9992669224739075,grad_norm: 0.9999990815171644, iteration: 76233
loss: 0.9911985397338867,grad_norm: 0.9127323732920322, iteration: 76234
loss: 0.9919245839118958,grad_norm: 0.9999992330949287, iteration: 76235
loss: 1.0156455039978027,grad_norm: 0.9999990744930105, iteration: 76236
loss: 1.0131950378417969,grad_norm: 0.9999991703868288, iteration: 76237
loss: 1.0053147077560425,grad_norm: 0.9999989780825901, iteration: 76238
loss: 1.0342938899993896,grad_norm: 0.9999991187848423, iteration: 76239
loss: 1.031343936920166,grad_norm: 0.9999990430167361, iteration: 76240
loss: 1.002114176750183,grad_norm: 0.8730552160728953, iteration: 76241
loss: 0.9887539744377136,grad_norm: 0.9668105751097118, iteration: 76242
loss: 1.0283255577087402,grad_norm: 0.9999991338433903, iteration: 76243
loss: 1.0024923086166382,grad_norm: 0.9999992454787044, iteration: 76244
loss: 1.038738489151001,grad_norm: 0.9999991354989115, iteration: 76245
loss: 0.9740106463432312,grad_norm: 0.9463154027398868, iteration: 76246
loss: 0.9818560481071472,grad_norm: 0.9999990747962103, iteration: 76247
loss: 1.000885009765625,grad_norm: 0.9999989867930955, iteration: 76248
loss: 0.9982832074165344,grad_norm: 0.9999990505644516, iteration: 76249
loss: 1.0375194549560547,grad_norm: 0.9382711080673123, iteration: 76250
loss: 0.9977723956108093,grad_norm: 0.9999991647643233, iteration: 76251
loss: 0.9666215777397156,grad_norm: 0.9461268531523588, iteration: 76252
loss: 1.0074540376663208,grad_norm: 0.999999903867944, iteration: 76253
loss: 1.0225145816802979,grad_norm: 0.9999989560434961, iteration: 76254
loss: 0.9724081158638,grad_norm: 0.9999992869005877, iteration: 76255
loss: 1.0124244689941406,grad_norm: 0.9999991281608512, iteration: 76256
loss: 1.0219202041625977,grad_norm: 0.9999991155562307, iteration: 76257
loss: 0.9560593366622925,grad_norm: 0.9696258573713699, iteration: 76258
loss: 1.0220627784729004,grad_norm: 0.9999990223025107, iteration: 76259
loss: 1.004410743713379,grad_norm: 0.9999991607690106, iteration: 76260
loss: 1.0101321935653687,grad_norm: 0.9999991917061416, iteration: 76261
loss: 1.0096102952957153,grad_norm: 0.9999988971869865, iteration: 76262
loss: 1.0373655557632446,grad_norm: 0.8769844980774739, iteration: 76263
loss: 0.9792034029960632,grad_norm: 0.9999994739060167, iteration: 76264
loss: 0.9868554472923279,grad_norm: 0.9999990056290585, iteration: 76265
loss: 0.9738761186599731,grad_norm: 0.9999991312787526, iteration: 76266
loss: 0.9996399283409119,grad_norm: 0.9999999411127871, iteration: 76267
loss: 1.022038459777832,grad_norm: 0.9949732590907875, iteration: 76268
loss: 0.9708116054534912,grad_norm: 0.9999992012838805, iteration: 76269
loss: 1.005622386932373,grad_norm: 0.972493009329202, iteration: 76270
loss: 1.0284010171890259,grad_norm: 0.9613403952083557, iteration: 76271
loss: 1.0121111869812012,grad_norm: 0.9999992211832133, iteration: 76272
loss: 1.006801724433899,grad_norm: 0.9999992836504967, iteration: 76273
loss: 0.9745471477508545,grad_norm: 0.9999990342583811, iteration: 76274
loss: 0.966151773929596,grad_norm: 0.9999991154033503, iteration: 76275
loss: 0.9800202250480652,grad_norm: 0.9377816865310026, iteration: 76276
loss: 1.0204272270202637,grad_norm: 0.9999995875269252, iteration: 76277
loss: 0.997713565826416,grad_norm: 0.9999991867561228, iteration: 76278
loss: 1.0383408069610596,grad_norm: 0.9999999190406723, iteration: 76279
loss: 1.0203287601470947,grad_norm: 0.9999991398470647, iteration: 76280
loss: 1.0039254426956177,grad_norm: 0.947376771226782, iteration: 76281
loss: 0.9921708106994629,grad_norm: 0.9788789456489017, iteration: 76282
loss: 1.0110538005828857,grad_norm: 0.9999992753224984, iteration: 76283
loss: 1.0176430940628052,grad_norm: 0.9999991637052831, iteration: 76284
loss: 0.9788049459457397,grad_norm: 0.954630803436073, iteration: 76285
loss: 1.026816964149475,grad_norm: 0.9999995269581459, iteration: 76286
loss: 0.99888014793396,grad_norm: 0.999999211722087, iteration: 76287
loss: 0.9817906022071838,grad_norm: 0.9821646797797704, iteration: 76288
loss: 0.9892399907112122,grad_norm: 0.9202892034655853, iteration: 76289
loss: 1.0233951807022095,grad_norm: 0.9999990761015063, iteration: 76290
loss: 1.0550854206085205,grad_norm: 0.9964283198337508, iteration: 76291
loss: 0.982414722442627,grad_norm: 0.9999992907454021, iteration: 76292
loss: 1.0194448232650757,grad_norm: 0.8187536258844864, iteration: 76293
loss: 1.0057411193847656,grad_norm: 0.9999990741428562, iteration: 76294
loss: 0.9819023013114929,grad_norm: 0.9999990268683537, iteration: 76295
loss: 0.9877274632453918,grad_norm: 0.999999120335784, iteration: 76296
loss: 1.0159764289855957,grad_norm: 0.9999991996512503, iteration: 76297
loss: 1.0330352783203125,grad_norm: 0.9999991910213785, iteration: 76298
loss: 1.0775080919265747,grad_norm: 0.9999996102832247, iteration: 76299
loss: 1.0027379989624023,grad_norm: 0.9443003351123016, iteration: 76300
loss: 1.0682601928710938,grad_norm: 0.9999991804472814, iteration: 76301
loss: 1.0039509534835815,grad_norm: 0.9275558897917882, iteration: 76302
loss: 0.9972900748252869,grad_norm: 0.9999991691834442, iteration: 76303
loss: 1.0113829374313354,grad_norm: 0.9999990012737474, iteration: 76304
loss: 1.0017644166946411,grad_norm: 0.9008608500486355, iteration: 76305
loss: 1.0172462463378906,grad_norm: 0.9999994094972725, iteration: 76306
loss: 1.013620138168335,grad_norm: 0.9990942662920779, iteration: 76307
loss: 0.9579970836639404,grad_norm: 0.9999992873191085, iteration: 76308
loss: 0.9656172394752502,grad_norm: 0.9999993040091965, iteration: 76309
loss: 1.0028895139694214,grad_norm: 0.8605495934674675, iteration: 76310
loss: 0.9569758176803589,grad_norm: 0.9999991140576627, iteration: 76311
loss: 1.0142439603805542,grad_norm: 0.946675643587133, iteration: 76312
loss: 0.9914374947547913,grad_norm: 0.9999990849892868, iteration: 76313
loss: 1.0822944641113281,grad_norm: 0.9999991699573054, iteration: 76314
loss: 1.027655839920044,grad_norm: 0.9999991581328471, iteration: 76315
loss: 0.9634140729904175,grad_norm: 0.9999990841462771, iteration: 76316
loss: 0.9954661130905151,grad_norm: 0.8212063527772485, iteration: 76317
loss: 0.9878545999526978,grad_norm: 0.999999999525163, iteration: 76318
loss: 0.9539045691490173,grad_norm: 0.9999991480492354, iteration: 76319
loss: 1.0030583143234253,grad_norm: 0.999999119395708, iteration: 76320
loss: 1.0268290042877197,grad_norm: 0.9913190400527515, iteration: 76321
loss: 0.9742178916931152,grad_norm: 0.9905336289933758, iteration: 76322
loss: 1.0120733976364136,grad_norm: 0.9904920155091749, iteration: 76323
loss: 1.0316678285598755,grad_norm: 0.999999093037442, iteration: 76324
loss: 0.9915264248847961,grad_norm: 0.963057509003689, iteration: 76325
loss: 0.9975296258926392,grad_norm: 0.99999908347006, iteration: 76326
loss: 0.992601215839386,grad_norm: 0.9999990257440247, iteration: 76327
loss: 0.9878919124603271,grad_norm: 0.9999991292566924, iteration: 76328
loss: 0.9874944090843201,grad_norm: 0.9576815836911218, iteration: 76329
loss: 1.028882622718811,grad_norm: 0.9999990608922623, iteration: 76330
loss: 1.0393394231796265,grad_norm: 0.903095279820008, iteration: 76331
loss: 1.0253888368606567,grad_norm: 0.999999605212833, iteration: 76332
loss: 1.0062631368637085,grad_norm: 0.999999159539786, iteration: 76333
loss: 1.004597544670105,grad_norm: 0.9666883947757698, iteration: 76334
loss: 1.0186548233032227,grad_norm: 0.9714108005050635, iteration: 76335
loss: 0.9633845686912537,grad_norm: 0.9999991164878897, iteration: 76336
loss: 1.047383189201355,grad_norm: 0.9999996416902402, iteration: 76337
loss: 1.0102726221084595,grad_norm: 0.9999995316131796, iteration: 76338
loss: 0.9967564344406128,grad_norm: 0.9675505784971157, iteration: 76339
loss: 0.9816509485244751,grad_norm: 0.9897574994276662, iteration: 76340
loss: 1.0066982507705688,grad_norm: 0.9999991596971572, iteration: 76341
loss: 0.9795939922332764,grad_norm: 0.9999989670221384, iteration: 76342
loss: 1.0031468868255615,grad_norm: 0.9528202710212655, iteration: 76343
loss: 0.9981778264045715,grad_norm: 0.9999991947087605, iteration: 76344
loss: 0.9970071911811829,grad_norm: 0.9999992063925482, iteration: 76345
loss: 1.0192630290985107,grad_norm: 0.999999919867706, iteration: 76346
loss: 0.9735064506530762,grad_norm: 0.9999990553130621, iteration: 76347
loss: 1.0179110765457153,grad_norm: 0.9999996696279169, iteration: 76348
loss: 1.0151258707046509,grad_norm: 0.9999991139627891, iteration: 76349
loss: 0.9694543480873108,grad_norm: 0.9999990070047866, iteration: 76350
loss: 0.9844459295272827,grad_norm: 0.9999991064240015, iteration: 76351
loss: 0.9629778265953064,grad_norm: 0.9999990963762765, iteration: 76352
loss: 0.9929291605949402,grad_norm: 0.9771733880300905, iteration: 76353
loss: 0.9913693070411682,grad_norm: 0.9999990865551309, iteration: 76354
loss: 0.9825097918510437,grad_norm: 0.9999991256652574, iteration: 76355
loss: 1.042142629623413,grad_norm: 0.9999991358657777, iteration: 76356
loss: 0.9750412106513977,grad_norm: 0.9999991079368881, iteration: 76357
loss: 0.9912536144256592,grad_norm: 0.8798636060082241, iteration: 76358
loss: 0.986308753490448,grad_norm: 0.9760660236147901, iteration: 76359
loss: 1.0022391080856323,grad_norm: 0.9999991285306358, iteration: 76360
loss: 1.0177993774414062,grad_norm: 0.999999111857714, iteration: 76361
loss: 0.9932153820991516,grad_norm: 0.9999992273057475, iteration: 76362
loss: 0.9735191464424133,grad_norm: 0.9999991519026462, iteration: 76363
loss: 0.9979487061500549,grad_norm: 0.9999989219381064, iteration: 76364
loss: 1.007608413696289,grad_norm: 0.999999277815472, iteration: 76365
loss: 1.032513976097107,grad_norm: 0.9999991631623929, iteration: 76366
loss: 0.997963547706604,grad_norm: 0.9999991427453181, iteration: 76367
loss: 1.042807698249817,grad_norm: 0.9999996095423563, iteration: 76368
loss: 1.012650728225708,grad_norm: 0.9999990743442988, iteration: 76369
loss: 0.9397746324539185,grad_norm: 0.9999991492008097, iteration: 76370
loss: 1.009071946144104,grad_norm: 0.9999998656486849, iteration: 76371
loss: 0.9602228999137878,grad_norm: 0.9999992462433507, iteration: 76372
loss: 1.0097705125808716,grad_norm: 0.9339794877240737, iteration: 76373
loss: 1.011472463607788,grad_norm: 0.9999992343196464, iteration: 76374
loss: 1.0291626453399658,grad_norm: 0.9781174321296925, iteration: 76375
loss: 1.0158106088638306,grad_norm: 0.9402282539473202, iteration: 76376
loss: 1.0153653621673584,grad_norm: 0.8966967620233176, iteration: 76377
loss: 0.9700992703437805,grad_norm: 0.9999993146111005, iteration: 76378
loss: 0.9935718774795532,grad_norm: 0.9999992379080777, iteration: 76379
loss: 1.019300937652588,grad_norm: 0.999999174237835, iteration: 76380
loss: 0.9704448580741882,grad_norm: 0.9999990959006123, iteration: 76381
loss: 1.0276119709014893,grad_norm: 0.9856130830247377, iteration: 76382
loss: 0.9667212963104248,grad_norm: 0.8912180759601774, iteration: 76383
loss: 0.992855966091156,grad_norm: 0.9999991417247676, iteration: 76384
loss: 0.9969766139984131,grad_norm: 0.9901711366544443, iteration: 76385
loss: 1.0164823532104492,grad_norm: 0.9999993098842448, iteration: 76386
loss: 1.0006805658340454,grad_norm: 0.9999991531675165, iteration: 76387
loss: 0.9874842762947083,grad_norm: 0.9999992649133916, iteration: 76388
loss: 0.9708338975906372,grad_norm: 0.9999990518383023, iteration: 76389
loss: 1.005123257637024,grad_norm: 0.9447166402476074, iteration: 76390
loss: 1.0072256326675415,grad_norm: 0.9613386314571134, iteration: 76391
loss: 0.996882438659668,grad_norm: 0.9581415278652647, iteration: 76392
loss: 1.024919033050537,grad_norm: 0.9999992632794481, iteration: 76393
loss: 0.9998292326927185,grad_norm: 0.9999991648185766, iteration: 76394
loss: 1.0151299238204956,grad_norm: 0.9999991509071202, iteration: 76395
loss: 0.9840009212493896,grad_norm: 0.9999991028739219, iteration: 76396
loss: 0.9860672354698181,grad_norm: 0.9999990686163851, iteration: 76397
loss: 1.0108423233032227,grad_norm: 0.999999314881951, iteration: 76398
loss: 0.9946737885475159,grad_norm: 0.9991963796301078, iteration: 76399
loss: 1.012733817100525,grad_norm: 0.9752944486973911, iteration: 76400
loss: 1.0308834314346313,grad_norm: 0.9999992334532627, iteration: 76401
loss: 1.0297335386276245,grad_norm: 0.9999990467015486, iteration: 76402
loss: 0.9978731870651245,grad_norm: 0.9999990197835459, iteration: 76403
loss: 1.0336922407150269,grad_norm: 0.9999998106268712, iteration: 76404
loss: 0.9756068587303162,grad_norm: 0.9999990165676762, iteration: 76405
loss: 1.0029380321502686,grad_norm: 0.9946270493867942, iteration: 76406
loss: 0.9934336543083191,grad_norm: 0.9999990723037382, iteration: 76407
loss: 0.9775280356407166,grad_norm: 0.959934226871182, iteration: 76408
loss: 0.9781021475791931,grad_norm: 0.9999993448777126, iteration: 76409
loss: 1.0358535051345825,grad_norm: 0.9658010624843213, iteration: 76410
loss: 1.0075695514678955,grad_norm: 0.9999990557509437, iteration: 76411
loss: 0.9681406021118164,grad_norm: 0.9999989433764294, iteration: 76412
loss: 0.9853817224502563,grad_norm: 0.9999990114966236, iteration: 76413
loss: 1.0074540376663208,grad_norm: 0.9912677429670862, iteration: 76414
loss: 0.9951441287994385,grad_norm: 0.999999038320199, iteration: 76415
loss: 1.0282281637191772,grad_norm: 0.9565764289718297, iteration: 76416
loss: 0.9999306201934814,grad_norm: 0.9999990714120207, iteration: 76417
loss: 1.0338504314422607,grad_norm: 0.9999996933337437, iteration: 76418
loss: 0.9722484350204468,grad_norm: 0.9999990791360577, iteration: 76419
loss: 1.031282901763916,grad_norm: 0.9999988525807945, iteration: 76420
loss: 1.0440078973770142,grad_norm: 0.9999991006412252, iteration: 76421
loss: 0.9755756855010986,grad_norm: 0.8010039903133173, iteration: 76422
loss: 0.9949444532394409,grad_norm: 0.9999991831973787, iteration: 76423
loss: 0.9812750816345215,grad_norm: 0.9373180063923364, iteration: 76424
loss: 1.0090506076812744,grad_norm: 0.9999992249033245, iteration: 76425
loss: 1.015164852142334,grad_norm: 0.921856614001568, iteration: 76426
loss: 1.0095595121383667,grad_norm: 0.9999991833527077, iteration: 76427
loss: 1.017454981803894,grad_norm: 0.9999991442244498, iteration: 76428
loss: 1.0079808235168457,grad_norm: 0.9999993669902268, iteration: 76429
loss: 0.9992053508758545,grad_norm: 0.999998998212932, iteration: 76430
loss: 1.005581259727478,grad_norm: 0.9999991857280498, iteration: 76431
loss: 0.9935848712921143,grad_norm: 0.8680895826837807, iteration: 76432
loss: 0.9752169847488403,grad_norm: 0.9587557633512894, iteration: 76433
loss: 0.9786129593849182,grad_norm: 0.9079448206032803, iteration: 76434
loss: 0.9946590662002563,grad_norm: 0.9999996118339491, iteration: 76435
loss: 0.9913483262062073,grad_norm: 0.9999991319058285, iteration: 76436
loss: 0.9867973923683167,grad_norm: 0.9999991264750959, iteration: 76437
loss: 1.006726622581482,grad_norm: 0.9999992593158632, iteration: 76438
loss: 1.0049322843551636,grad_norm: 0.8221775138016973, iteration: 76439
loss: 0.9656277298927307,grad_norm: 0.999999062426909, iteration: 76440
loss: 0.9767871499061584,grad_norm: 0.9999989745673454, iteration: 76441
loss: 1.0059394836425781,grad_norm: 0.972532178119329, iteration: 76442
loss: 1.0007946491241455,grad_norm: 0.9608310625535326, iteration: 76443
loss: 0.9895354509353638,grad_norm: 0.9129303192936252, iteration: 76444
loss: 0.988608717918396,grad_norm: 0.9999991182233199, iteration: 76445
loss: 0.9734348654747009,grad_norm: 0.9999992678034175, iteration: 76446
loss: 1.0370501279830933,grad_norm: 0.9999994331805764, iteration: 76447
loss: 1.0023659467697144,grad_norm: 0.9181833877641311, iteration: 76448
loss: 1.0095419883728027,grad_norm: 0.9999990928503172, iteration: 76449
loss: 0.9948168992996216,grad_norm: 0.9999991750496079, iteration: 76450
loss: 1.001546859741211,grad_norm: 0.9222082261602506, iteration: 76451
loss: 0.9907682538032532,grad_norm: 0.9340496367877783, iteration: 76452
loss: 1.030255675315857,grad_norm: 0.9680999606968689, iteration: 76453
loss: 1.0325863361358643,grad_norm: 0.9653096750968398, iteration: 76454
loss: 0.9981371164321899,grad_norm: 0.9999992170904396, iteration: 76455
loss: 1.0293302536010742,grad_norm: 0.9999993933033028, iteration: 76456
loss: 1.009507656097412,grad_norm: 0.9999991762821404, iteration: 76457
loss: 1.0056850910186768,grad_norm: 0.9999993016176444, iteration: 76458
loss: 1.0201475620269775,grad_norm: 0.9999990700894551, iteration: 76459
loss: 1.0152357816696167,grad_norm: 0.9999990249596611, iteration: 76460
loss: 1.0000548362731934,grad_norm: 0.9999989911932422, iteration: 76461
loss: 0.9629514813423157,grad_norm: 0.9999993894503404, iteration: 76462
loss: 1.0169532299041748,grad_norm: 0.9999990615875667, iteration: 76463
loss: 1.0356497764587402,grad_norm: 0.9999992584560151, iteration: 76464
loss: 0.9863681197166443,grad_norm: 0.9999991015406738, iteration: 76465
loss: 1.0333292484283447,grad_norm: 0.9999991709898581, iteration: 76466
loss: 1.0154391527175903,grad_norm: 0.9843567062977001, iteration: 76467
loss: 1.015824794769287,grad_norm: 0.9999989653992419, iteration: 76468
loss: 0.9871962070465088,grad_norm: 0.9999990029967726, iteration: 76469
loss: 0.998942494392395,grad_norm: 0.9999989982435384, iteration: 76470
loss: 0.9999324083328247,grad_norm: 0.9999991923195175, iteration: 76471
loss: 1.0142775774002075,grad_norm: 0.9999992278395508, iteration: 76472
loss: 0.9897187948226929,grad_norm: 0.9999990431177109, iteration: 76473
loss: 1.0084317922592163,grad_norm: 0.9999998054009424, iteration: 76474
loss: 0.9845998883247375,grad_norm: 0.9380134829890798, iteration: 76475
loss: 0.9993724226951599,grad_norm: 0.9999992293379241, iteration: 76476
loss: 0.9781198501586914,grad_norm: 0.9999992017890493, iteration: 76477
loss: 1.0121744871139526,grad_norm: 0.9999990663196604, iteration: 76478
loss: 0.988059401512146,grad_norm: 0.9999990334746736, iteration: 76479
loss: 1.0536812543869019,grad_norm: 0.9999995746221778, iteration: 76480
loss: 0.996020495891571,grad_norm: 0.9872755560799837, iteration: 76481
loss: 0.9995967745780945,grad_norm: 0.968564187005552, iteration: 76482
loss: 1.0279645919799805,grad_norm: 0.9999992532376775, iteration: 76483
loss: 1.010648250579834,grad_norm: 0.9999992366297578, iteration: 76484
loss: 0.9949238300323486,grad_norm: 0.9999990746488341, iteration: 76485
loss: 1.0343399047851562,grad_norm: 0.9999995085499818, iteration: 76486
loss: 1.0113310813903809,grad_norm: 0.999999325877559, iteration: 76487
loss: 1.2366193532943726,grad_norm: 0.9999995495179566, iteration: 76488
loss: 0.9786787033081055,grad_norm: 0.9999990703371335, iteration: 76489
loss: 1.053891897201538,grad_norm: 0.999999633945258, iteration: 76490
loss: 1.0025153160095215,grad_norm: 0.9999991141984954, iteration: 76491
loss: 1.0020688772201538,grad_norm: 0.9999992913362278, iteration: 76492
loss: 0.9904370307922363,grad_norm: 0.9999990759941478, iteration: 76493
loss: 1.049789547920227,grad_norm: 0.999999964312218, iteration: 76494
loss: 0.999706506729126,grad_norm: 0.999999131254963, iteration: 76495
loss: 1.009335994720459,grad_norm: 0.9162919931681623, iteration: 76496
loss: 1.0145355463027954,grad_norm: 0.948430284238457, iteration: 76497
loss: 1.0188047885894775,grad_norm: 0.9999992668483636, iteration: 76498
loss: 1.0765652656555176,grad_norm: 0.9999998311210683, iteration: 76499
loss: 1.0204739570617676,grad_norm: 0.9999993353438039, iteration: 76500
loss: 0.9688548445701599,grad_norm: 0.9368722673211314, iteration: 76501
loss: 0.9625707268714905,grad_norm: 0.9999992653665023, iteration: 76502
loss: 1.0150614976882935,grad_norm: 0.9999992166361552, iteration: 76503
loss: 1.0107060670852661,grad_norm: 0.9999993983733123, iteration: 76504
loss: 1.0072773694992065,grad_norm: 0.9664959140444362, iteration: 76505
loss: 1.0031992197036743,grad_norm: 0.999999152550612, iteration: 76506
loss: 0.9897863268852234,grad_norm: 0.9873721441174176, iteration: 76507
loss: 0.9965230226516724,grad_norm: 0.99999909360524, iteration: 76508
loss: 0.9498039484024048,grad_norm: 0.8694425101818849, iteration: 76509
loss: 1.0126912593841553,grad_norm: 0.9999995578346295, iteration: 76510
loss: 1.0013878345489502,grad_norm: 0.9999994565690444, iteration: 76511
loss: 0.9819015264511108,grad_norm: 0.8403104577930566, iteration: 76512
loss: 0.9883135557174683,grad_norm: 0.9999992077134737, iteration: 76513
loss: 0.9962190985679626,grad_norm: 0.9999992587204317, iteration: 76514
loss: 1.022094488143921,grad_norm: 0.9999990804951582, iteration: 76515
loss: 0.9541065692901611,grad_norm: 0.9999992036354496, iteration: 76516
loss: 0.9999037384986877,grad_norm: 0.8882733340481729, iteration: 76517
loss: 1.0156956911087036,grad_norm: 0.9999991913796549, iteration: 76518
loss: 0.990552544593811,grad_norm: 0.9752319121172243, iteration: 76519
loss: 0.99684739112854,grad_norm: 0.999999076711042, iteration: 76520
loss: 1.0207358598709106,grad_norm: 0.9999994488307317, iteration: 76521
loss: 0.9751669764518738,grad_norm: 0.9999991265780471, iteration: 76522
loss: 1.0100330114364624,grad_norm: 0.9999989885569719, iteration: 76523
loss: 0.9868491291999817,grad_norm: 0.99999909088111, iteration: 76524
loss: 1.0197595357894897,grad_norm: 0.9999991244962739, iteration: 76525
loss: 0.9943388104438782,grad_norm: 0.9999996955986288, iteration: 76526
loss: 0.9671854972839355,grad_norm: 0.9999992666784292, iteration: 76527
loss: 1.0022988319396973,grad_norm: 0.999999063999909, iteration: 76528
loss: 1.0128636360168457,grad_norm: 0.9999991963754663, iteration: 76529
loss: 0.9855948686599731,grad_norm: 0.9999992326937129, iteration: 76530
loss: 1.0235776901245117,grad_norm: 0.9999990384596419, iteration: 76531
loss: 0.9555496573448181,grad_norm: 0.9484263263289839, iteration: 76532
loss: 1.0711718797683716,grad_norm: 0.9999992886772534, iteration: 76533
loss: 0.9942561388015747,grad_norm: 0.9286834676674405, iteration: 76534
loss: 1.0102691650390625,grad_norm: 0.9999991209641037, iteration: 76535
loss: 1.0255461931228638,grad_norm: 0.9999990708446901, iteration: 76536
loss: 0.9847061634063721,grad_norm: 0.9999990448223008, iteration: 76537
loss: 1.005115270614624,grad_norm: 0.9823750740757717, iteration: 76538
loss: 1.0360441207885742,grad_norm: 0.9376390048560838, iteration: 76539
loss: 1.0000250339508057,grad_norm: 0.9999997617190325, iteration: 76540
loss: 0.9732692241668701,grad_norm: 0.9167482152906705, iteration: 76541
loss: 1.0093798637390137,grad_norm: 0.999999140698707, iteration: 76542
loss: 0.9764511585235596,grad_norm: 0.9999994725144332, iteration: 76543
loss: 0.9602351188659668,grad_norm: 0.8885497542586156, iteration: 76544
loss: 0.9802789688110352,grad_norm: 0.9999990376807198, iteration: 76545
loss: 1.0074145793914795,grad_norm: 0.9999990439029763, iteration: 76546
loss: 1.0298436880111694,grad_norm: 0.9999990452157365, iteration: 76547
loss: 0.9663154482841492,grad_norm: 0.9999992406856685, iteration: 76548
loss: 0.987372636795044,grad_norm: 0.9691488185648125, iteration: 76549
loss: 0.9919137358665466,grad_norm: 0.9196094816985749, iteration: 76550
loss: 1.0054190158843994,grad_norm: 0.9362584735272574, iteration: 76551
loss: 1.0528604984283447,grad_norm: 0.9999996348925878, iteration: 76552
loss: 0.9832592606544495,grad_norm: 0.9999990933226032, iteration: 76553
loss: 0.9984172582626343,grad_norm: 0.9999998885674048, iteration: 76554
loss: 1.0150760412216187,grad_norm: 0.9436997477886614, iteration: 76555
loss: 0.9830559492111206,grad_norm: 0.9999991593279703, iteration: 76556
loss: 1.0141876935958862,grad_norm: 0.9999989561541169, iteration: 76557
loss: 0.9875606298446655,grad_norm: 0.9999991710922568, iteration: 76558
loss: 0.9969286322593689,grad_norm: 0.9999991641137584, iteration: 76559
loss: 0.9707996249198914,grad_norm: 0.9999991495293132, iteration: 76560
loss: 1.0306700468063354,grad_norm: 0.999999474405305, iteration: 76561
loss: 1.0007964372634888,grad_norm: 0.9999992280966388, iteration: 76562
loss: 0.9834017753601074,grad_norm: 0.9999990937343726, iteration: 76563
loss: 1.1177159547805786,grad_norm: 0.9999997784430014, iteration: 76564
loss: 1.0030900239944458,grad_norm: 0.9838958729862098, iteration: 76565
loss: 0.9872470498085022,grad_norm: 0.9999992546918549, iteration: 76566
loss: 1.0000048875808716,grad_norm: 0.9999991067766627, iteration: 76567
loss: 0.990553081035614,grad_norm: 0.9999991265956886, iteration: 76568
loss: 1.0183987617492676,grad_norm: 0.9999992522563202, iteration: 76569
loss: 0.9927929639816284,grad_norm: 0.9999991136820848, iteration: 76570
loss: 1.028090000152588,grad_norm: 0.9999991719490825, iteration: 76571
loss: 1.026520848274231,grad_norm: 0.9999993258041685, iteration: 76572
loss: 0.9833773374557495,grad_norm: 0.999999047908298, iteration: 76573
loss: 1.0455533266067505,grad_norm: 0.9999992082276485, iteration: 76574
loss: 1.0206513404846191,grad_norm: 0.9091644196084199, iteration: 76575
loss: 1.0243278741836548,grad_norm: 0.9999991769543556, iteration: 76576
loss: 0.9959388375282288,grad_norm: 0.9999992428615468, iteration: 76577
loss: 1.0289093255996704,grad_norm: 0.9999992263393248, iteration: 76578
loss: 1.034881353378296,grad_norm: 0.9999992107138059, iteration: 76579
loss: 1.0669583082199097,grad_norm: 0.9999999225761562, iteration: 76580
loss: 1.0085453987121582,grad_norm: 0.9999999502271291, iteration: 76581
loss: 1.0077064037322998,grad_norm: 0.9999991311793078, iteration: 76582
loss: 1.0386658906936646,grad_norm: 0.9526953991889385, iteration: 76583
loss: 0.9870201349258423,grad_norm: 0.9999989560360619, iteration: 76584
loss: 1.0278387069702148,grad_norm: 0.9999991654499822, iteration: 76585
loss: 0.9951972365379333,grad_norm: 0.9999990788459449, iteration: 76586
loss: 1.0133532285690308,grad_norm: 0.9999992322021976, iteration: 76587
loss: 1.0168360471725464,grad_norm: 0.9999990172829931, iteration: 76588
loss: 0.9920324683189392,grad_norm: 0.872876681137004, iteration: 76589
loss: 0.9878593683242798,grad_norm: 0.9918243872035727, iteration: 76590
loss: 1.0557849407196045,grad_norm: 0.8875186066979281, iteration: 76591
loss: 0.9922428131103516,grad_norm: 0.9999991609550597, iteration: 76592
loss: 1.0073717832565308,grad_norm: 0.9999991684183424, iteration: 76593
loss: 1.0580905675888062,grad_norm: 0.9999992675137179, iteration: 76594
loss: 1.0152403116226196,grad_norm: 0.9999991732236228, iteration: 76595
loss: 1.0265806913375854,grad_norm: 0.9999991419582162, iteration: 76596
loss: 1.002969741821289,grad_norm: 0.9999990938417158, iteration: 76597
loss: 0.9757717847824097,grad_norm: 0.9999992724191334, iteration: 76598
loss: 1.053847312927246,grad_norm: 0.999999497518049, iteration: 76599
loss: 1.0126776695251465,grad_norm: 0.9762329180976618, iteration: 76600
loss: 1.0034164190292358,grad_norm: 0.9999990829654567, iteration: 76601
loss: 1.0087701082229614,grad_norm: 0.9999994107544404, iteration: 76602
loss: 0.9677954912185669,grad_norm: 0.9964872442338345, iteration: 76603
loss: 0.988680899143219,grad_norm: 0.9999992230585967, iteration: 76604
loss: 0.9865794777870178,grad_norm: 0.999999663987867, iteration: 76605
loss: 0.9820088148117065,grad_norm: 0.9139829851921031, iteration: 76606
loss: 0.9588476419448853,grad_norm: 0.9828899800273249, iteration: 76607
loss: 0.9951921701431274,grad_norm: 0.9999992015911836, iteration: 76608
loss: 0.9780629277229309,grad_norm: 0.9999989319891254, iteration: 76609
loss: 0.9917927384376526,grad_norm: 0.9456450087076362, iteration: 76610
loss: 1.0053960084915161,grad_norm: 0.9999990421587596, iteration: 76611
loss: 0.9940990805625916,grad_norm: 0.9999990434219341, iteration: 76612
loss: 1.047174096107483,grad_norm: 0.9722229675490052, iteration: 76613
loss: 1.0026218891143799,grad_norm: 0.9999991220210881, iteration: 76614
loss: 0.9765674471855164,grad_norm: 0.9999990396805162, iteration: 76615
loss: 1.019059419631958,grad_norm: 0.9999997655652715, iteration: 76616
loss: 0.9917822480201721,grad_norm: 0.9999992080147055, iteration: 76617
loss: 1.0580946207046509,grad_norm: 0.9999990962202002, iteration: 76618
loss: 1.0258499383926392,grad_norm: 0.9999990107764961, iteration: 76619
loss: 0.982251763343811,grad_norm: 0.9999990702982366, iteration: 76620
loss: 0.9929578304290771,grad_norm: 0.9810885109607471, iteration: 76621
loss: 1.0237152576446533,grad_norm: 0.8854354454468341, iteration: 76622
loss: 0.9853140115737915,grad_norm: 0.9999989567548115, iteration: 76623
loss: 0.9456008672714233,grad_norm: 0.99999925889558, iteration: 76624
loss: 1.0230844020843506,grad_norm: 0.9999989525939169, iteration: 76625
loss: 0.992957592010498,grad_norm: 0.9986396415355902, iteration: 76626
loss: 1.006880521774292,grad_norm: 0.9891865729293232, iteration: 76627
loss: 0.9968312382698059,grad_norm: 0.9999991164205821, iteration: 76628
loss: 1.0292619466781616,grad_norm: 0.9999990617392657, iteration: 76629
loss: 0.9764590859413147,grad_norm: 0.9999989967783968, iteration: 76630
loss: 0.9656109809875488,grad_norm: 0.9999997031351343, iteration: 76631
loss: 1.0199774503707886,grad_norm: 0.9439702648458734, iteration: 76632
loss: 1.011753797531128,grad_norm: 0.9999991030009173, iteration: 76633
loss: 1.0271728038787842,grad_norm: 0.9084001724498011, iteration: 76634
loss: 0.9829239249229431,grad_norm: 0.8687145917198857, iteration: 76635
loss: 0.9948196411132812,grad_norm: 0.9999990554730052, iteration: 76636
loss: 0.9930723309516907,grad_norm: 0.9999991894410794, iteration: 76637
loss: 0.9973202347755432,grad_norm: 0.9999992189636744, iteration: 76638
loss: 1.0065439939498901,grad_norm: 0.8788373851978245, iteration: 76639
loss: 1.007551670074463,grad_norm: 0.9999992164360149, iteration: 76640
loss: 1.0212810039520264,grad_norm: 0.9999991802632044, iteration: 76641
loss: 1.0095356702804565,grad_norm: 0.9999991000014842, iteration: 76642
loss: 0.9629858136177063,grad_norm: 0.9537889710957099, iteration: 76643
loss: 0.9648867249488831,grad_norm: 0.9999990095324421, iteration: 76644
loss: 0.9941087961196899,grad_norm: 0.9999992701181641, iteration: 76645
loss: 0.9875063896179199,grad_norm: 0.9727594303138782, iteration: 76646
loss: 0.9964296817779541,grad_norm: 0.9341565727529109, iteration: 76647
loss: 1.0318481922149658,grad_norm: 0.9999992105094188, iteration: 76648
loss: 0.9901497960090637,grad_norm: 0.9999991000359799, iteration: 76649
loss: 0.9978654384613037,grad_norm: 0.9585246598143495, iteration: 76650
loss: 0.9605209827423096,grad_norm: 0.9999990897737856, iteration: 76651
loss: 1.0061562061309814,grad_norm: 0.8915121304450865, iteration: 76652
loss: 0.9840255379676819,grad_norm: 0.9163285447539503, iteration: 76653
loss: 1.0068622827529907,grad_norm: 0.9999990815467252, iteration: 76654
loss: 0.9756159782409668,grad_norm: 0.9999990587885265, iteration: 76655
loss: 0.9896267652511597,grad_norm: 0.8796512757503053, iteration: 76656
loss: 0.9936070442199707,grad_norm: 0.8583000827563334, iteration: 76657
loss: 0.9644409418106079,grad_norm: 0.9999989446364992, iteration: 76658
loss: 0.9865427613258362,grad_norm: 0.8587107472693841, iteration: 76659
loss: 0.9809696078300476,grad_norm: 0.9999991316338736, iteration: 76660
loss: 0.9934860467910767,grad_norm: 0.9999990846660519, iteration: 76661
loss: 1.0009154081344604,grad_norm: 0.99999920581471, iteration: 76662
loss: 0.9942384362220764,grad_norm: 0.999999070545817, iteration: 76663
loss: 0.9995979070663452,grad_norm: 0.9999998015275192, iteration: 76664
loss: 1.0028055906295776,grad_norm: 0.965750447752394, iteration: 76665
loss: 0.9676752686500549,grad_norm: 0.9999991073258124, iteration: 76666
loss: 0.9682355523109436,grad_norm: 0.9826384669170097, iteration: 76667
loss: 1.0323888063430786,grad_norm: 0.9999995237727365, iteration: 76668
loss: 0.9779404401779175,grad_norm: 0.9583887942234623, iteration: 76669
loss: 0.9898383021354675,grad_norm: 0.9999991503056737, iteration: 76670
loss: 1.0215610265731812,grad_norm: 0.9949289879072204, iteration: 76671
loss: 1.0438021421432495,grad_norm: 0.9297640788689784, iteration: 76672
loss: 0.9845852255821228,grad_norm: 0.9999990257655155, iteration: 76673
loss: 0.9986608624458313,grad_norm: 0.9999991299745787, iteration: 76674
loss: 0.9822957515716553,grad_norm: 0.9613316755379986, iteration: 76675
loss: 0.9812623858451843,grad_norm: 0.9108432587134635, iteration: 76676
loss: 0.9864898324012756,grad_norm: 0.999999285307326, iteration: 76677
loss: 1.0186707973480225,grad_norm: 0.9029185103173806, iteration: 76678
loss: 1.0034908056259155,grad_norm: 0.9318401743001746, iteration: 76679
loss: 0.9897271394729614,grad_norm: 0.999999308184358, iteration: 76680
loss: 1.0218298435211182,grad_norm: 0.9999991473645545, iteration: 76681
loss: 0.9913814663887024,grad_norm: 0.8845067101991295, iteration: 76682
loss: 0.983773946762085,grad_norm: 0.999999267223613, iteration: 76683
loss: 1.0132523775100708,grad_norm: 0.9999990200349322, iteration: 76684
loss: 0.9679663181304932,grad_norm: 0.9287296825343141, iteration: 76685
loss: 1.008541464805603,grad_norm: 0.9999992600306349, iteration: 76686
loss: 0.9759389162063599,grad_norm: 0.9999992646871497, iteration: 76687
loss: 1.0013763904571533,grad_norm: 0.8650520700162374, iteration: 76688
loss: 0.9901759624481201,grad_norm: 0.9999991817504132, iteration: 76689
loss: 1.024673342704773,grad_norm: 0.9999992334704473, iteration: 76690
loss: 0.9994293451309204,grad_norm: 0.9332725678811261, iteration: 76691
loss: 0.9718500375747681,grad_norm: 0.9999991777334476, iteration: 76692
loss: 1.0455271005630493,grad_norm: 0.9999991472932875, iteration: 76693
loss: 1.0307176113128662,grad_norm: 0.9999990353397995, iteration: 76694
loss: 0.9994217753410339,grad_norm: 0.9999991172548408, iteration: 76695
loss: 1.0351086854934692,grad_norm: 0.9999992196324182, iteration: 76696
loss: 1.0168558359146118,grad_norm: 0.9999992365059626, iteration: 76697
loss: 1.0825965404510498,grad_norm: 0.9999991618142523, iteration: 76698
loss: 0.9905665516853333,grad_norm: 0.9999990435562968, iteration: 76699
loss: 1.0136693716049194,grad_norm: 0.9368922552986487, iteration: 76700
loss: 0.9507699012756348,grad_norm: 0.9999990392665928, iteration: 76701
loss: 1.0203666687011719,grad_norm: 0.9999992319440363, iteration: 76702
loss: 1.0154064893722534,grad_norm: 0.9999992329604727, iteration: 76703
loss: 1.0265003442764282,grad_norm: 0.9999991521577377, iteration: 76704
loss: 1.0053058862686157,grad_norm: 0.9999991366861328, iteration: 76705
loss: 1.0114821195602417,grad_norm: 0.9999992201843863, iteration: 76706
loss: 0.9985263347625732,grad_norm: 0.9999990458661773, iteration: 76707
loss: 1.0173970460891724,grad_norm: 0.9999992935212367, iteration: 76708
loss: 0.9519485235214233,grad_norm: 0.9999990880974104, iteration: 76709
loss: 1.0435478687286377,grad_norm: 0.999999082570806, iteration: 76710
loss: 1.056951880455017,grad_norm: 0.9999990832035042, iteration: 76711
loss: 0.9998716115951538,grad_norm: 0.9056415789886386, iteration: 76712
loss: 0.9973369836807251,grad_norm: 0.9999990960005247, iteration: 76713
loss: 1.0822510719299316,grad_norm: 0.99999912571635, iteration: 76714
loss: 1.0406545400619507,grad_norm: 0.9999996004843427, iteration: 76715
loss: 1.0392255783081055,grad_norm: 0.9999996343289311, iteration: 76716
loss: 1.0270049571990967,grad_norm: 0.9999989557538307, iteration: 76717
loss: 0.9998572468757629,grad_norm: 0.9999990917058588, iteration: 76718
loss: 0.9779413342475891,grad_norm: 0.9999993370767738, iteration: 76719
loss: 1.0128899812698364,grad_norm: 0.999999120296913, iteration: 76720
loss: 1.0076245069503784,grad_norm: 0.9683683727828192, iteration: 76721
loss: 1.0177937746047974,grad_norm: 0.999999248249386, iteration: 76722
loss: 1.0049591064453125,grad_norm: 0.9999990883628928, iteration: 76723
loss: 1.009875774383545,grad_norm: 0.9999990422880344, iteration: 76724
loss: 1.0261993408203125,grad_norm: 0.9903228077880517, iteration: 76725
loss: 1.0225144624710083,grad_norm: 0.9999990324995132, iteration: 76726
loss: 1.021398901939392,grad_norm: 0.9271153658209701, iteration: 76727
loss: 0.9825467467308044,grad_norm: 0.9999991078821997, iteration: 76728
loss: 1.0026448965072632,grad_norm: 0.9999990938969144, iteration: 76729
loss: 1.0107507705688477,grad_norm: 0.9999991032582279, iteration: 76730
loss: 1.0391528606414795,grad_norm: 0.9681853251431504, iteration: 76731
loss: 0.9875490665435791,grad_norm: 0.9999991829344475, iteration: 76732
loss: 0.9999639987945557,grad_norm: 0.9443289139030193, iteration: 76733
loss: 1.01114022731781,grad_norm: 0.9108333599458993, iteration: 76734
loss: 1.0366110801696777,grad_norm: 0.9999990690045779, iteration: 76735
loss: 1.0139789581298828,grad_norm: 0.957065641162617, iteration: 76736
loss: 1.0001113414764404,grad_norm: 0.922469872744302, iteration: 76737
loss: 1.0004494190216064,grad_norm: 0.9999989516217149, iteration: 76738
loss: 1.006338357925415,grad_norm: 0.9866615522388607, iteration: 76739
loss: 0.9994940757751465,grad_norm: 0.9370645916468859, iteration: 76740
loss: 0.9666517376899719,grad_norm: 0.9999991873600645, iteration: 76741
loss: 0.9937667846679688,grad_norm: 0.9999992223315345, iteration: 76742
loss: 0.9712687730789185,grad_norm: 0.9999991228321503, iteration: 76743
loss: 0.9747323393821716,grad_norm: 0.9999990752145274, iteration: 76744
loss: 0.9974832534790039,grad_norm: 0.8370703525972185, iteration: 76745
loss: 0.9962421655654907,grad_norm: 0.9999990069934405, iteration: 76746
loss: 0.9866520166397095,grad_norm: 0.9999991902462805, iteration: 76747
loss: 0.9427307844161987,grad_norm: 0.9999990200095226, iteration: 76748
loss: 1.0118712186813354,grad_norm: 0.9679344807036928, iteration: 76749
loss: 0.9791633486747742,grad_norm: 0.999999246945634, iteration: 76750
loss: 0.987038254737854,grad_norm: 0.9917702196163849, iteration: 76751
loss: 1.0652503967285156,grad_norm: 0.9999998729630765, iteration: 76752
loss: 1.0153435468673706,grad_norm: 0.9999992193586613, iteration: 76753
loss: 0.9273562431335449,grad_norm: 0.9999991149672952, iteration: 76754
loss: 0.9928790330886841,grad_norm: 0.9999991206214887, iteration: 76755
loss: 1.004032015800476,grad_norm: 0.9999991504797123, iteration: 76756
loss: 1.0124205350875854,grad_norm: 0.9999992712608291, iteration: 76757
loss: 1.0284348726272583,grad_norm: 0.9999989900942071, iteration: 76758
loss: 0.9974963068962097,grad_norm: 0.9379038580712549, iteration: 76759
loss: 1.0180015563964844,grad_norm: 0.9999990230580782, iteration: 76760
loss: 0.9803788661956787,grad_norm: 0.9999991767460431, iteration: 76761
loss: 0.9753319621086121,grad_norm: 0.9999993299638036, iteration: 76762
loss: 1.0236847400665283,grad_norm: 0.9999993644734354, iteration: 76763
loss: 0.9986116290092468,grad_norm: 0.9999991427357035, iteration: 76764
loss: 1.005975365638733,grad_norm: 0.999999202756461, iteration: 76765
loss: 0.9553220868110657,grad_norm: 0.9999990702412845, iteration: 76766
loss: 1.0443167686462402,grad_norm: 0.9999989208210935, iteration: 76767
loss: 1.0093575716018677,grad_norm: 0.9999990321271838, iteration: 76768
loss: 0.9684467315673828,grad_norm: 0.998897519560001, iteration: 76769
loss: 0.97761470079422,grad_norm: 0.9999991674566625, iteration: 76770
loss: 0.981701672077179,grad_norm: 0.9392993152720024, iteration: 76771
loss: 0.987853467464447,grad_norm: 0.9999991320768142, iteration: 76772
loss: 0.9693471789360046,grad_norm: 0.9516434601489229, iteration: 76773
loss: 0.9887588620185852,grad_norm: 0.9999993711611694, iteration: 76774
loss: 1.0071032047271729,grad_norm: 0.9302338177282897, iteration: 76775
loss: 0.9639584422111511,grad_norm: 0.9999992920155256, iteration: 76776
loss: 1.0242551565170288,grad_norm: 0.8967775100694451, iteration: 76777
loss: 0.9723126888275146,grad_norm: 0.8555494250946563, iteration: 76778
loss: 1.0148555040359497,grad_norm: 0.9999992143197298, iteration: 76779
loss: 0.9875369071960449,grad_norm: 0.999998908360491, iteration: 76780
loss: 0.9533641338348389,grad_norm: 0.9999992013858748, iteration: 76781
loss: 1.0383169651031494,grad_norm: 0.9999990963347546, iteration: 76782
loss: 0.9687479138374329,grad_norm: 0.9999990064105284, iteration: 76783
loss: 1.0305603742599487,grad_norm: 0.9225312445842417, iteration: 76784
loss: 0.9898112416267395,grad_norm: 0.9999990727770806, iteration: 76785
loss: 0.9963322281837463,grad_norm: 0.959063934729989, iteration: 76786
loss: 0.9929478764533997,grad_norm: 0.9808471942471148, iteration: 76787
loss: 0.9962185621261597,grad_norm: 0.9775821202713469, iteration: 76788
loss: 1.1445695161819458,grad_norm: 0.9999994807321244, iteration: 76789
loss: 1.050341010093689,grad_norm: 0.8437288261078847, iteration: 76790
loss: 1.0087343454360962,grad_norm: 0.9999992211819891, iteration: 76791
loss: 0.9811981320381165,grad_norm: 0.9797621089539318, iteration: 76792
loss: 1.0454578399658203,grad_norm: 0.9954094961954596, iteration: 76793
loss: 1.0139398574829102,grad_norm: 0.9999990764243454, iteration: 76794
loss: 1.0181702375411987,grad_norm: 0.9999997737332588, iteration: 76795
loss: 0.9829544425010681,grad_norm: 0.9999990862873083, iteration: 76796
loss: 1.0106537342071533,grad_norm: 0.991024619875715, iteration: 76797
loss: 1.0392731428146362,grad_norm: 0.9926595223611662, iteration: 76798
loss: 1.0051729679107666,grad_norm: 0.9168310159816785, iteration: 76799
loss: 0.998960554599762,grad_norm: 0.9795621614507379, iteration: 76800
loss: 0.9874860644340515,grad_norm: 0.9392020564299552, iteration: 76801
loss: 0.9870287179946899,grad_norm: 0.9999990483190127, iteration: 76802
loss: 0.9863599538803101,grad_norm: 0.9384404244920129, iteration: 76803
loss: 0.9883158206939697,grad_norm: 0.9999990729740807, iteration: 76804
loss: 0.9974236488342285,grad_norm: 0.9663571424032192, iteration: 76805
loss: 0.9840534925460815,grad_norm: 0.9712252439935026, iteration: 76806
loss: 1.0198259353637695,grad_norm: 0.9999991104966998, iteration: 76807
loss: 1.0030264854431152,grad_norm: 0.9999991134726094, iteration: 76808
loss: 0.9824201464653015,grad_norm: 0.900032346547754, iteration: 76809
loss: 0.9920302033424377,grad_norm: 0.9957372737156034, iteration: 76810
loss: 0.9989243149757385,grad_norm: 0.977479514059686, iteration: 76811
loss: 0.994598925113678,grad_norm: 0.9999990167150803, iteration: 76812
loss: 1.024955153465271,grad_norm: 0.9999992651500682, iteration: 76813
loss: 1.0284926891326904,grad_norm: 0.9999992539244736, iteration: 76814
loss: 1.0044846534729004,grad_norm: 0.9323041828124009, iteration: 76815
loss: 1.0171644687652588,grad_norm: 0.9999991385873686, iteration: 76816
loss: 1.0409936904907227,grad_norm: 0.9999991183120795, iteration: 76817
loss: 1.0112760066986084,grad_norm: 0.7988149860381787, iteration: 76818
loss: 1.0040836334228516,grad_norm: 0.9999990640787197, iteration: 76819
loss: 1.024609088897705,grad_norm: 0.9975332925672139, iteration: 76820
loss: 1.0041571855545044,grad_norm: 0.9846039358016672, iteration: 76821
loss: 1.010155200958252,grad_norm: 0.9999990946878922, iteration: 76822
loss: 1.0000076293945312,grad_norm: 0.9999989923065489, iteration: 76823
loss: 0.9998728632926941,grad_norm: 0.9999991442641273, iteration: 76824
loss: 0.9840545654296875,grad_norm: 0.8330903248816928, iteration: 76825
loss: 1.0205941200256348,grad_norm: 0.9890553220252357, iteration: 76826
loss: 1.0286277532577515,grad_norm: 0.9999993479984535, iteration: 76827
loss: 1.1651768684387207,grad_norm: 0.9999992529135013, iteration: 76828
loss: 0.9780497550964355,grad_norm: 0.999999023550363, iteration: 76829
loss: 0.9811632037162781,grad_norm: 0.9999991189664355, iteration: 76830
loss: 1.0103919506072998,grad_norm: 0.992073150334159, iteration: 76831
loss: 1.025539517402649,grad_norm: 0.9999997369305449, iteration: 76832
loss: 0.9470426440238953,grad_norm: 0.9999990247637106, iteration: 76833
loss: 1.0225096940994263,grad_norm: 0.9999992263873658, iteration: 76834
loss: 1.0094531774520874,grad_norm: 0.9999991003025912, iteration: 76835
loss: 1.0130929946899414,grad_norm: 0.9999994030087623, iteration: 76836
loss: 1.0231585502624512,grad_norm: 0.9999993277894382, iteration: 76837
loss: 0.998185396194458,grad_norm: 0.9999990878987212, iteration: 76838
loss: 0.9744156002998352,grad_norm: 0.9690681518113474, iteration: 76839
loss: 1.0044809579849243,grad_norm: 0.9999991262999613, iteration: 76840
loss: 0.9630245566368103,grad_norm: 0.9897631375544735, iteration: 76841
loss: 0.9643769860267639,grad_norm: 0.9999990648733521, iteration: 76842
loss: 0.9964339137077332,grad_norm: 0.9999991318010568, iteration: 76843
loss: 1.013070821762085,grad_norm: 0.9702505262304668, iteration: 76844
loss: 0.9789369106292725,grad_norm: 0.9966745481473862, iteration: 76845
loss: 1.0194923877716064,grad_norm: 0.9999989947105954, iteration: 76846
loss: 1.0186920166015625,grad_norm: 0.9999991335851575, iteration: 76847
loss: 0.9590058326721191,grad_norm: 0.9491633871988171, iteration: 76848
loss: 0.9659432768821716,grad_norm: 0.9999992139765999, iteration: 76849
loss: 0.9994034171104431,grad_norm: 0.9731400035229725, iteration: 76850
loss: 0.968713104724884,grad_norm: 0.9999990341670061, iteration: 76851
loss: 0.9885523319244385,grad_norm: 0.9717450474541854, iteration: 76852
loss: 1.083211064338684,grad_norm: 0.9999996649776423, iteration: 76853
loss: 0.9961276054382324,grad_norm: 0.9999989477569151, iteration: 76854
loss: 1.0585092306137085,grad_norm: 0.9999990567106651, iteration: 76855
loss: 0.9712430238723755,grad_norm: 0.9067661998881622, iteration: 76856
loss: 0.9732051491737366,grad_norm: 0.9999990665375483, iteration: 76857
loss: 0.9901779890060425,grad_norm: 0.9054813223781607, iteration: 76858
loss: 0.9908531904220581,grad_norm: 0.9999992107876413, iteration: 76859
loss: 1.0484799146652222,grad_norm: 0.9376639360477048, iteration: 76860
loss: 0.9728356599807739,grad_norm: 0.9999990496746247, iteration: 76861
loss: 1.032881736755371,grad_norm: 0.9999990591703678, iteration: 76862
loss: 0.9511308073997498,grad_norm: 0.9999991669230286, iteration: 76863
loss: 0.9934489130973816,grad_norm: 0.9999989882254139, iteration: 76864
loss: 1.0318471193313599,grad_norm: 0.9999992182174303, iteration: 76865
loss: 1.0636768341064453,grad_norm: 0.9999993610019074, iteration: 76866
loss: 0.9866434335708618,grad_norm: 0.9999990669058137, iteration: 76867
loss: 0.9687058925628662,grad_norm: 0.99999918769529, iteration: 76868
loss: 1.02335786819458,grad_norm: 0.971179527379451, iteration: 76869
loss: 1.0091160535812378,grad_norm: 0.9296080780816047, iteration: 76870
loss: 0.9696775078773499,grad_norm: 0.9864204034343664, iteration: 76871
loss: 0.9937615394592285,grad_norm: 0.9858521244915214, iteration: 76872
loss: 1.00548255443573,grad_norm: 0.9999992898213949, iteration: 76873
loss: 1.0070228576660156,grad_norm: 0.9999991731568004, iteration: 76874
loss: 1.0149329900741577,grad_norm: 0.9999991458620787, iteration: 76875
loss: 1.0648027658462524,grad_norm: 0.9999992496804928, iteration: 76876
loss: 0.9669846296310425,grad_norm: 0.9999993174618561, iteration: 76877
loss: 1.0032049417495728,grad_norm: 0.9999991320039803, iteration: 76878
loss: 1.0130223035812378,grad_norm: 0.999999166727343, iteration: 76879
loss: 0.9831063747406006,grad_norm: 0.9999991285143057, iteration: 76880
loss: 1.180177927017212,grad_norm: 0.9999991903909353, iteration: 76881
loss: 0.981493353843689,grad_norm: 0.9999991186212772, iteration: 76882
loss: 0.9866164326667786,grad_norm: 0.9999991773597978, iteration: 76883
loss: 0.9448279738426208,grad_norm: 0.9999991097106414, iteration: 76884
loss: 0.9792261719703674,grad_norm: 0.9999991148146202, iteration: 76885
loss: 1.0245704650878906,grad_norm: 0.9814841916158313, iteration: 76886
loss: 1.0056885480880737,grad_norm: 0.9999990095045691, iteration: 76887
loss: 1.0065418481826782,grad_norm: 0.9999990686887749, iteration: 76888
loss: 0.9845215082168579,grad_norm: 0.9350164909904553, iteration: 76889
loss: 0.9922403693199158,grad_norm: 0.9999994267416026, iteration: 76890
loss: 0.9835982322692871,grad_norm: 0.9999989968861085, iteration: 76891
loss: 1.0393294095993042,grad_norm: 0.9999992227574787, iteration: 76892
loss: 1.0000518560409546,grad_norm: 0.9943367515309974, iteration: 76893
loss: 1.0037707090377808,grad_norm: 0.9716878323283598, iteration: 76894
loss: 1.0324326753616333,grad_norm: 0.9999991487534564, iteration: 76895
loss: 0.9758766889572144,grad_norm: 0.9308580397493877, iteration: 76896
loss: 0.9760677814483643,grad_norm: 0.9999996292596123, iteration: 76897
loss: 0.9787194728851318,grad_norm: 0.9999990394567336, iteration: 76898
loss: 1.0051031112670898,grad_norm: 0.9999990858155111, iteration: 76899
loss: 0.9857910871505737,grad_norm: 0.9999992773880049, iteration: 76900
loss: 1.0527095794677734,grad_norm: 0.9999993398341102, iteration: 76901
loss: 0.964752197265625,grad_norm: 0.9999990731182826, iteration: 76902
loss: 0.9834314584732056,grad_norm: 0.9999993143927166, iteration: 76903
loss: 0.9784124493598938,grad_norm: 0.9999991636807978, iteration: 76904
loss: 1.0436866283416748,grad_norm: 0.9999990996048772, iteration: 76905
loss: 1.0370413064956665,grad_norm: 0.9999995676423308, iteration: 76906
loss: 1.0398504734039307,grad_norm: 0.9999996190685511, iteration: 76907
loss: 0.9968465566635132,grad_norm: 0.9999990121180669, iteration: 76908
loss: 1.0216081142425537,grad_norm: 0.9999993377464498, iteration: 76909
loss: 0.9752057194709778,grad_norm: 0.9055458066784923, iteration: 76910
loss: 1.0490270853042603,grad_norm: 0.9999992196220268, iteration: 76911
loss: 0.9957991242408752,grad_norm: 0.9979450585537253, iteration: 76912
loss: 0.9998131394386292,grad_norm: 0.9889146656730857, iteration: 76913
loss: 0.9753136038780212,grad_norm: 0.9976194500288972, iteration: 76914
loss: 1.0210925340652466,grad_norm: 0.999999167525499, iteration: 76915
loss: 1.0069267749786377,grad_norm: 0.9999994952230649, iteration: 76916
loss: 0.9851797223091125,grad_norm: 0.8633475553646284, iteration: 76917
loss: 0.9920485615730286,grad_norm: 0.9999991080270227, iteration: 76918
loss: 0.972976803779602,grad_norm: 0.9548722249516411, iteration: 76919
loss: 1.0082061290740967,grad_norm: 0.999999152549421, iteration: 76920
loss: 0.9970117807388306,grad_norm: 0.999999078996964, iteration: 76921
loss: 1.0098727941513062,grad_norm: 0.9999991067702128, iteration: 76922
loss: 0.9799792170524597,grad_norm: 0.9999991159100134, iteration: 76923
loss: 0.963459849357605,grad_norm: 0.9999989581275129, iteration: 76924
loss: 0.9862000942230225,grad_norm: 0.9461484626278434, iteration: 76925
loss: 0.9944784641265869,grad_norm: 0.9999990648695968, iteration: 76926
loss: 1.009205937385559,grad_norm: 0.9999991054754958, iteration: 76927
loss: 0.9646329283714294,grad_norm: 0.9999992579290181, iteration: 76928
loss: 0.9811907410621643,grad_norm: 0.999999057881076, iteration: 76929
loss: 1.0273982286453247,grad_norm: 0.9999991403388546, iteration: 76930
loss: 1.0136356353759766,grad_norm: 0.9276818930666821, iteration: 76931
loss: 0.9906956553459167,grad_norm: 0.9999990848754009, iteration: 76932
loss: 1.0197129249572754,grad_norm: 0.999999088436051, iteration: 76933
loss: 1.0095199346542358,grad_norm: 0.8456471903834972, iteration: 76934
loss: 0.9996719360351562,grad_norm: 0.9999991015344417, iteration: 76935
loss: 1.0188801288604736,grad_norm: 0.9999991211969902, iteration: 76936
loss: 0.9922377467155457,grad_norm: 0.9999989454584302, iteration: 76937
loss: 0.9909084439277649,grad_norm: 0.9999990203523604, iteration: 76938
loss: 0.9855015873908997,grad_norm: 0.9512913450490447, iteration: 76939
loss: 1.0222738981246948,grad_norm: 0.9999991188688725, iteration: 76940
loss: 0.9895366430282593,grad_norm: 0.9999990730349834, iteration: 76941
loss: 1.1020139455795288,grad_norm: 0.9999996093499361, iteration: 76942
loss: 1.0523475408554077,grad_norm: 0.9999992278260593, iteration: 76943
loss: 0.9975639581680298,grad_norm: 0.9999991352539487, iteration: 76944
loss: 1.0769248008728027,grad_norm: 0.999999307926412, iteration: 76945
loss: 1.016282558441162,grad_norm: 0.9818056239844221, iteration: 76946
loss: 1.0033708810806274,grad_norm: 0.9230836184899927, iteration: 76947
loss: 0.9893637895584106,grad_norm: 0.8720617758428555, iteration: 76948
loss: 0.9314544796943665,grad_norm: 0.9999991538270816, iteration: 76949
loss: 1.0089491605758667,grad_norm: 0.9801087428036488, iteration: 76950
loss: 0.9865596890449524,grad_norm: 0.9187163689523664, iteration: 76951
loss: 0.9930329918861389,grad_norm: 0.9999990965516741, iteration: 76952
loss: 0.9444336891174316,grad_norm: 0.9999990428276614, iteration: 76953
loss: 0.9801990985870361,grad_norm: 0.8811968733959421, iteration: 76954
loss: 1.0090243816375732,grad_norm: 0.9733595212948811, iteration: 76955
loss: 0.9842677712440491,grad_norm: 0.999999231579119, iteration: 76956
loss: 0.9886245727539062,grad_norm: 0.9999990909171684, iteration: 76957
loss: 1.0151406526565552,grad_norm: 0.9999990688197871, iteration: 76958
loss: 1.0129047632217407,grad_norm: 0.9999992464689973, iteration: 76959
loss: 0.998300313949585,grad_norm: 0.9999990819789345, iteration: 76960
loss: 0.9710549116134644,grad_norm: 0.9999992464983486, iteration: 76961
loss: 0.9699099659919739,grad_norm: 0.8585581902745545, iteration: 76962
loss: 1.0232337713241577,grad_norm: 0.9999993350689568, iteration: 76963
loss: 0.9784291982650757,grad_norm: 0.9999992777824425, iteration: 76964
loss: 0.972467303276062,grad_norm: 0.9999991334961436, iteration: 76965
loss: 1.0634212493896484,grad_norm: 0.9999998808974556, iteration: 76966
loss: 0.9925829768180847,grad_norm: 0.9142736336497551, iteration: 76967
loss: 0.9890962839126587,grad_norm: 0.9999991958768418, iteration: 76968
loss: 1.005139946937561,grad_norm: 0.9999990631965078, iteration: 76969
loss: 0.9831564426422119,grad_norm: 0.9999992060950696, iteration: 76970
loss: 1.0792288780212402,grad_norm: 0.9999991764500061, iteration: 76971
loss: 1.0162599086761475,grad_norm: 0.999999805352408, iteration: 76972
loss: 1.0048434734344482,grad_norm: 0.9999992757291574, iteration: 76973
loss: 1.0052851438522339,grad_norm: 0.8712836083108203, iteration: 76974
loss: 0.9726075530052185,grad_norm: 0.9999999452000613, iteration: 76975
loss: 1.0169857740402222,grad_norm: 0.9999991279992743, iteration: 76976
loss: 0.978181004524231,grad_norm: 0.9999992135988309, iteration: 76977
loss: 0.9616422057151794,grad_norm: 0.9613323525747481, iteration: 76978
loss: 1.039839267730713,grad_norm: 0.9643572370742669, iteration: 76979
loss: 0.9845491051673889,grad_norm: 0.9999989239635061, iteration: 76980
loss: 1.075697422027588,grad_norm: 0.9999994571028992, iteration: 76981
loss: 1.0103495121002197,grad_norm: 0.9999993033399389, iteration: 76982
loss: 0.9877146482467651,grad_norm: 0.9999990748413146, iteration: 76983
loss: 1.009320616722107,grad_norm: 0.9999989879849203, iteration: 76984
loss: 0.998900294303894,grad_norm: 0.9600224289810901, iteration: 76985
loss: 0.9799709320068359,grad_norm: 0.9398175327362673, iteration: 76986
loss: 1.0286576747894287,grad_norm: 0.9999995113100044, iteration: 76987
loss: 1.056048035621643,grad_norm: 0.9999995085676308, iteration: 76988
loss: 1.0058778524398804,grad_norm: 0.9999990848131903, iteration: 76989
loss: 1.0271360874176025,grad_norm: 0.9999991379982989, iteration: 76990
loss: 0.9937854409217834,grad_norm: 0.8520759442030879, iteration: 76991
loss: 1.0000598430633545,grad_norm: 0.9451169660793498, iteration: 76992
loss: 1.0005145072937012,grad_norm: 0.9999994281295018, iteration: 76993
loss: 0.9982087016105652,grad_norm: 0.9995267208953952, iteration: 76994
loss: 0.999377965927124,grad_norm: 0.9999991156729572, iteration: 76995
loss: 0.9866618514060974,grad_norm: 0.8343497518017903, iteration: 76996
loss: 1.046278476715088,grad_norm: 0.9999989784304598, iteration: 76997
loss: 0.9711084365844727,grad_norm: 0.9999990160759412, iteration: 76998
loss: 0.9605228304862976,grad_norm: 0.9855932272738828, iteration: 76999
loss: 1.0455633401870728,grad_norm: 0.9999991323967143, iteration: 77000
loss: 1.0266793966293335,grad_norm: 0.9999989566941268, iteration: 77001
loss: 0.9825513958930969,grad_norm: 0.9058613942988311, iteration: 77002
loss: 1.0265355110168457,grad_norm: 0.9999993871941714, iteration: 77003
loss: 1.0094540119171143,grad_norm: 0.9999991631518313, iteration: 77004
loss: 1.0068413019180298,grad_norm: 0.9999991736665945, iteration: 77005
loss: 1.0397462844848633,grad_norm: 0.9999992342051983, iteration: 77006
loss: 1.0051168203353882,grad_norm: 0.9999991997663891, iteration: 77007
loss: 1.0187420845031738,grad_norm: 0.9824552337934941, iteration: 77008
loss: 1.0091248750686646,grad_norm: 0.9999991832706846, iteration: 77009
loss: 1.0280442237854004,grad_norm: 0.9417796386239812, iteration: 77010
loss: 1.0326787233352661,grad_norm: 0.9999995977569225, iteration: 77011
loss: 0.990208625793457,grad_norm: 0.999999130182625, iteration: 77012
loss: 1.0393048524856567,grad_norm: 0.9999993508255594, iteration: 77013
loss: 1.0156103372573853,grad_norm: 0.9999991850359553, iteration: 77014
loss: 1.0203535556793213,grad_norm: 0.9999992532485465, iteration: 77015
loss: 1.023833990097046,grad_norm: 0.9999991252007517, iteration: 77016
loss: 1.0388020277023315,grad_norm: 0.9999994367778975, iteration: 77017
loss: 1.0207278728485107,grad_norm: 0.8680959934492442, iteration: 77018
loss: 1.0222141742706299,grad_norm: 0.9999991187596248, iteration: 77019
loss: 0.9844942092895508,grad_norm: 0.99999912914072, iteration: 77020
loss: 0.9900338649749756,grad_norm: 0.9999993236000119, iteration: 77021
loss: 0.996107280254364,grad_norm: 0.8476431361559341, iteration: 77022
loss: 1.0612057447433472,grad_norm: 0.9999991632264006, iteration: 77023
loss: 1.0558745861053467,grad_norm: 0.9999997961023607, iteration: 77024
loss: 0.991349995136261,grad_norm: 0.8226010830116816, iteration: 77025
loss: 1.0354095697402954,grad_norm: 0.999999659294887, iteration: 77026
loss: 1.0156919956207275,grad_norm: 0.9999989745930573, iteration: 77027
loss: 0.9689539074897766,grad_norm: 0.9999990728364292, iteration: 77028
loss: 0.9915328025817871,grad_norm: 0.9999990465253363, iteration: 77029
loss: 1.0236635208129883,grad_norm: 0.9999995167843067, iteration: 77030
loss: 1.0540649890899658,grad_norm: 0.9999996734879271, iteration: 77031
loss: 0.9986838102340698,grad_norm: 0.9999990630462144, iteration: 77032
loss: 1.0011862516403198,grad_norm: 0.9445955515016569, iteration: 77033
loss: 0.977583110332489,grad_norm: 0.9999992954811058, iteration: 77034
loss: 1.0112624168395996,grad_norm: 0.9999993827677445, iteration: 77035
loss: 0.9824915528297424,grad_norm: 0.9999992322417558, iteration: 77036
loss: 0.9620009660720825,grad_norm: 0.9999992764256714, iteration: 77037
loss: 0.9932557940483093,grad_norm: 0.9517053409922551, iteration: 77038
loss: 0.991005003452301,grad_norm: 0.9999991335539908, iteration: 77039
loss: 1.012156367301941,grad_norm: 0.9999989934232626, iteration: 77040
loss: 1.053418517112732,grad_norm: 0.999999349563652, iteration: 77041
loss: 0.9840252995491028,grad_norm: 0.9549748939841558, iteration: 77042
loss: 1.0207020044326782,grad_norm: 0.999999141090451, iteration: 77043
loss: 0.9808765053749084,grad_norm: 0.99999918588319, iteration: 77044
loss: 1.034184217453003,grad_norm: 0.9999991878949296, iteration: 77045
loss: 0.9981567859649658,grad_norm: 0.9913399790042965, iteration: 77046
loss: 1.01987886428833,grad_norm: 0.9999989717984495, iteration: 77047
loss: 0.9998060464859009,grad_norm: 0.9999991712017777, iteration: 77048
loss: 1.000097393989563,grad_norm: 0.9999993009059199, iteration: 77049
loss: 0.9780423045158386,grad_norm: 0.8671348836405958, iteration: 77050
loss: 0.997554361820221,grad_norm: 0.9999990631985348, iteration: 77051
loss: 1.0219930410385132,grad_norm: 0.9999991070009024, iteration: 77052
loss: 1.0150700807571411,grad_norm: 0.9999990694295271, iteration: 77053
loss: 1.0128467082977295,grad_norm: 0.9999994729736803, iteration: 77054
loss: 0.9836565256118774,grad_norm: 0.9999990900528191, iteration: 77055
loss: 0.9875486493110657,grad_norm: 0.9999994136646395, iteration: 77056
loss: 1.0427347421646118,grad_norm: 0.9999991263892773, iteration: 77057
loss: 1.0954985618591309,grad_norm: 0.9999996461192505, iteration: 77058
loss: 1.0020551681518555,grad_norm: 0.979824209113074, iteration: 77059
loss: 1.0139602422714233,grad_norm: 0.9999989705387556, iteration: 77060
loss: 1.0017669200897217,grad_norm: 0.8642577973735439, iteration: 77061
loss: 1.0941940546035767,grad_norm: 0.9872811678612667, iteration: 77062
loss: 1.0765783786773682,grad_norm: 0.9999999061902674, iteration: 77063
loss: 1.0005898475646973,grad_norm: 0.9999991995511266, iteration: 77064
loss: 1.0949299335479736,grad_norm: 0.999999158331892, iteration: 77065
loss: 0.9869572520256042,grad_norm: 0.9999991761652568, iteration: 77066
loss: 1.0066512823104858,grad_norm: 0.7884837855556032, iteration: 77067
loss: 0.9707814455032349,grad_norm: 0.999999267959352, iteration: 77068
loss: 1.0097401142120361,grad_norm: 0.9529562935492246, iteration: 77069
loss: 1.0128557682037354,grad_norm: 0.9999991453911432, iteration: 77070
loss: 0.9866777658462524,grad_norm: 0.9655283089423832, iteration: 77071
loss: 1.1044777631759644,grad_norm: 0.999999153383842, iteration: 77072
loss: 0.9929661750793457,grad_norm: 0.9468988934899351, iteration: 77073
loss: 1.006486415863037,grad_norm: 0.9999992238073349, iteration: 77074
loss: 0.9888850450515747,grad_norm: 0.8643019030732125, iteration: 77075
loss: 1.0552940368652344,grad_norm: 0.9999994761149782, iteration: 77076
loss: 0.9807884693145752,grad_norm: 0.9999991768298134, iteration: 77077
loss: 1.0233486890792847,grad_norm: 0.9999992828515267, iteration: 77078
loss: 1.0318875312805176,grad_norm: 0.9999991867552187, iteration: 77079
loss: 1.032443642616272,grad_norm: 0.986661947413014, iteration: 77080
loss: 1.0047430992126465,grad_norm: 0.9999991879780858, iteration: 77081
loss: 1.0106528997421265,grad_norm: 0.8568186230613398, iteration: 77082
loss: 0.9934245347976685,grad_norm: 0.9999990884093597, iteration: 77083
loss: 1.0086942911148071,grad_norm: 0.9999991008471613, iteration: 77084
loss: 1.021819829940796,grad_norm: 0.9749768232913144, iteration: 77085
loss: 0.9891880750656128,grad_norm: 0.9999991994714409, iteration: 77086
loss: 0.9887756705284119,grad_norm: 0.9470381418436967, iteration: 77087
loss: 1.0154857635498047,grad_norm: 0.9999995882907493, iteration: 77088
loss: 1.0185635089874268,grad_norm: 0.9999991434176495, iteration: 77089
loss: 1.0002977848052979,grad_norm: 0.999999438328254, iteration: 77090
loss: 0.9985787272453308,grad_norm: 0.939316328941336, iteration: 77091
loss: 1.0142639875411987,grad_norm: 0.9588563073319398, iteration: 77092
loss: 1.0007922649383545,grad_norm: 0.9999996629397925, iteration: 77093
loss: 0.972917377948761,grad_norm: 0.951668376141304, iteration: 77094
loss: 1.006273627281189,grad_norm: 0.9999992578377772, iteration: 77095
loss: 0.9954754114151001,grad_norm: 0.9999993085048852, iteration: 77096
loss: 1.0062471628189087,grad_norm: 0.9939305846163606, iteration: 77097
loss: 1.0015206336975098,grad_norm: 0.9999990122084091, iteration: 77098
loss: 1.0017814636230469,grad_norm: 0.9999990584449935, iteration: 77099
loss: 1.0255484580993652,grad_norm: 0.9999991107280022, iteration: 77100
loss: 1.0010432004928589,grad_norm: 0.9999990828382636, iteration: 77101
loss: 1.001622200012207,grad_norm: 0.9398855363579702, iteration: 77102
loss: 1.0588531494140625,grad_norm: 0.9999995175757354, iteration: 77103
loss: 0.993300199508667,grad_norm: 0.9600246988286391, iteration: 77104
loss: 0.9889649152755737,grad_norm: 0.999999038585253, iteration: 77105
loss: 0.9987444281578064,grad_norm: 0.9371825291152687, iteration: 77106
loss: 0.9797800779342651,grad_norm: 0.9999992723175615, iteration: 77107
loss: 1.0464483499526978,grad_norm: 0.9999990587171295, iteration: 77108
loss: 0.9877983927726746,grad_norm: 0.999999353357636, iteration: 77109
loss: 0.9961292743682861,grad_norm: 0.9999990145461537, iteration: 77110
loss: 0.9751188158988953,grad_norm: 0.9078771229850928, iteration: 77111
loss: 1.0301785469055176,grad_norm: 0.9999991009942977, iteration: 77112
loss: 1.0243172645568848,grad_norm: 0.9999992579156448, iteration: 77113
loss: 0.9930199384689331,grad_norm: 0.9999990296058697, iteration: 77114
loss: 1.0604356527328491,grad_norm: 0.9999996367695234, iteration: 77115
loss: 1.0207573175430298,grad_norm: 0.9999990585028343, iteration: 77116
loss: 1.0226824283599854,grad_norm: 0.9999991977719049, iteration: 77117
loss: 0.9859426021575928,grad_norm: 0.9999991931025947, iteration: 77118
loss: 0.9839813709259033,grad_norm: 0.9999991785873368, iteration: 77119
loss: 1.0246587991714478,grad_norm: 0.9999991600081335, iteration: 77120
loss: 0.9718501567840576,grad_norm: 0.8904223495933262, iteration: 77121
loss: 0.9715040326118469,grad_norm: 0.9333146688476288, iteration: 77122
loss: 0.9423463940620422,grad_norm: 0.9999991172447273, iteration: 77123
loss: 0.9893738031387329,grad_norm: 0.9999991685562626, iteration: 77124
loss: 1.0114701986312866,grad_norm: 0.8686389309977496, iteration: 77125
loss: 1.0312913656234741,grad_norm: 0.9999995188450089, iteration: 77126
loss: 1.0064024925231934,grad_norm: 0.9999991883666371, iteration: 77127
loss: 1.0132442712783813,grad_norm: 0.8525353352144707, iteration: 77128
loss: 1.009734034538269,grad_norm: 0.9737592207373235, iteration: 77129
loss: 0.9999322891235352,grad_norm: 0.9999991962434311, iteration: 77130
loss: 1.0226457118988037,grad_norm: 0.999999448779939, iteration: 77131
loss: 0.9815307855606079,grad_norm: 0.9198676923144087, iteration: 77132
loss: 1.2975401878356934,grad_norm: 0.999999462193216, iteration: 77133
loss: 1.044800043106079,grad_norm: 0.9999992843783625, iteration: 77134
loss: 1.0006674528121948,grad_norm: 0.9999991261799129, iteration: 77135
loss: 1.0253031253814697,grad_norm: 0.9999990165612624, iteration: 77136
loss: 0.9709781408309937,grad_norm: 0.9133375610581771, iteration: 77137
loss: 0.9895222187042236,grad_norm: 0.9999992338436599, iteration: 77138
loss: 1.0200496912002563,grad_norm: 0.9771730867497782, iteration: 77139
loss: 0.9874418377876282,grad_norm: 0.9999992396165436, iteration: 77140
loss: 0.9963381290435791,grad_norm: 0.999999330406494, iteration: 77141
loss: 1.0005894899368286,grad_norm: 0.9762395816427611, iteration: 77142
loss: 1.027923822402954,grad_norm: 0.8679054270754495, iteration: 77143
loss: 1.0015062093734741,grad_norm: 0.9999992325343866, iteration: 77144
loss: 1.0006853342056274,grad_norm: 0.9999992421660789, iteration: 77145
loss: 1.159321904182434,grad_norm: 1.0000001154115958, iteration: 77146
loss: 1.0422558784484863,grad_norm: 0.9999990734565737, iteration: 77147
loss: 0.9894437789916992,grad_norm: 0.9999991476398759, iteration: 77148
loss: 1.006955623626709,grad_norm: 0.999999075938345, iteration: 77149
loss: 1.2436124086380005,grad_norm: 0.9999992087387527, iteration: 77150
loss: 1.0461912155151367,grad_norm: 0.9999992434580623, iteration: 77151
loss: 1.0801844596862793,grad_norm: 0.9999993178317768, iteration: 77152
loss: 1.3307387828826904,grad_norm: 0.9999998864207831, iteration: 77153
loss: 1.0763100385665894,grad_norm: 0.9999992336224948, iteration: 77154
loss: 1.3123316764831543,grad_norm: 0.9999995580824845, iteration: 77155
loss: 1.0252352952957153,grad_norm: 0.9999993009958349, iteration: 77156
loss: 1.0348392724990845,grad_norm: 0.999999082692017, iteration: 77157
loss: 1.008370041847229,grad_norm: 0.9999990348978014, iteration: 77158
loss: 1.0344411134719849,grad_norm: 0.9999993116545197, iteration: 77159
loss: 1.1862729787826538,grad_norm: 0.9999995731682649, iteration: 77160
loss: 1.1903760433197021,grad_norm: 0.99999981379572, iteration: 77161
loss: 1.156683325767517,grad_norm: 0.9999995367565915, iteration: 77162
loss: 1.0131261348724365,grad_norm: 0.8900954509337987, iteration: 77163
loss: 1.0316308736801147,grad_norm: 0.9999991756134644, iteration: 77164
loss: 1.0164159536361694,grad_norm: 0.9992239084632388, iteration: 77165
loss: 1.0336426496505737,grad_norm: 0.9999996091932637, iteration: 77166
loss: 1.1216037273406982,grad_norm: 0.9999992693224307, iteration: 77167
loss: 1.0400562286376953,grad_norm: 0.9999993270574705, iteration: 77168
loss: 1.1043328046798706,grad_norm: 0.9999992514682099, iteration: 77169
loss: 1.0546040534973145,grad_norm: 0.9999994472108459, iteration: 77170
loss: 1.0160819292068481,grad_norm: 0.9999991532925521, iteration: 77171
loss: 1.0032306909561157,grad_norm: 0.999999300588198, iteration: 77172
loss: 1.1147836446762085,grad_norm: 0.9999998313649807, iteration: 77173
loss: 1.0519388914108276,grad_norm: 0.9999992413687981, iteration: 77174
loss: 1.1076854467391968,grad_norm: 0.9999997260527625, iteration: 77175
loss: 0.9700276851654053,grad_norm: 0.9999995326907167, iteration: 77176
loss: 0.9909254312515259,grad_norm: 0.999999163219993, iteration: 77177
loss: 1.0174891948699951,grad_norm: 0.9999999175033293, iteration: 77178
loss: 1.0143709182739258,grad_norm: 0.9999990883942813, iteration: 77179
loss: 0.9988868832588196,grad_norm: 0.9233636755628186, iteration: 77180
loss: 1.0909931659698486,grad_norm: 0.9999992847823335, iteration: 77181
loss: 0.9865038990974426,grad_norm: 0.9999992794547179, iteration: 77182
loss: 1.1255040168762207,grad_norm: 0.9999999014396382, iteration: 77183
loss: 1.0383985042572021,grad_norm: 0.9999994193144395, iteration: 77184
loss: 1.1346895694732666,grad_norm: 0.9999996778928291, iteration: 77185
loss: 1.0119094848632812,grad_norm: 0.99999907424669, iteration: 77186
loss: 1.0775396823883057,grad_norm: 0.9999998806925086, iteration: 77187
loss: 0.9876893758773804,grad_norm: 0.9999991953437509, iteration: 77188
loss: 1.006243348121643,grad_norm: 0.9999993489446908, iteration: 77189
loss: 1.0318676233291626,grad_norm: 0.9999992195695759, iteration: 77190
loss: 1.0240356922149658,grad_norm: 0.9999992101125948, iteration: 77191
loss: 1.0305171012878418,grad_norm: 0.9999996756308261, iteration: 77192
loss: 1.0167864561080933,grad_norm: 0.9999990939608805, iteration: 77193
loss: 0.9753974080085754,grad_norm: 0.9999989976961773, iteration: 77194
loss: 1.0123975276947021,grad_norm: 0.9999989653998343, iteration: 77195
loss: 1.0303763151168823,grad_norm: 0.9999995299496968, iteration: 77196
loss: 0.9625350832939148,grad_norm: 0.9999994967636167, iteration: 77197
loss: 1.0264089107513428,grad_norm: 0.9539006815538196, iteration: 77198
loss: 1.014168620109558,grad_norm: 0.9999989505988562, iteration: 77199
loss: 1.032788872718811,grad_norm: 0.9999999795610777, iteration: 77200
loss: 0.9978024363517761,grad_norm: 0.9999990309558568, iteration: 77201
loss: 1.010783076286316,grad_norm: 0.9999991429379789, iteration: 77202
loss: 1.0216230154037476,grad_norm: 0.9999990061028942, iteration: 77203
loss: 1.0627814531326294,grad_norm: 0.9999991933682021, iteration: 77204
loss: 0.9941333532333374,grad_norm: 0.999999109041417, iteration: 77205
loss: 1.0741997957229614,grad_norm: 0.9999990677949279, iteration: 77206
loss: 0.9938803315162659,grad_norm: 0.9999991333072548, iteration: 77207
loss: 0.9796874523162842,grad_norm: 0.9999991952730017, iteration: 77208
loss: 1.010596513748169,grad_norm: 0.9999990711386838, iteration: 77209
loss: 1.028328776359558,grad_norm: 0.9999991122111784, iteration: 77210
loss: 1.040297031402588,grad_norm: 0.9999993426411866, iteration: 77211
loss: 1.009065866470337,grad_norm: 0.9999992289353415, iteration: 77212
loss: 1.0318350791931152,grad_norm: 0.999999423407146, iteration: 77213
loss: 1.03010094165802,grad_norm: 0.9999989972628143, iteration: 77214
loss: 1.0097576379776,grad_norm: 0.999999220833006, iteration: 77215
loss: 1.0140249729156494,grad_norm: 0.9789593106539533, iteration: 77216
loss: 1.0424652099609375,grad_norm: 0.9999999460147281, iteration: 77217
loss: 0.9985716938972473,grad_norm: 0.9831101355222258, iteration: 77218
loss: 1.0217370986938477,grad_norm: 0.9999993485125945, iteration: 77219
loss: 1.0213944911956787,grad_norm: 0.9999990336349539, iteration: 77220
loss: 1.0005156993865967,grad_norm: 0.9999991550201145, iteration: 77221
loss: 1.0334988832473755,grad_norm: 0.9999998571113159, iteration: 77222
loss: 0.9996287822723389,grad_norm: 0.9999991037330906, iteration: 77223
loss: 1.018142819404602,grad_norm: 0.9999992118165109, iteration: 77224
loss: 1.0365744829177856,grad_norm: 0.9999996031284606, iteration: 77225
loss: 1.0024076700210571,grad_norm: 0.8975034430531237, iteration: 77226
loss: 0.9696097373962402,grad_norm: 0.9999991842446629, iteration: 77227
loss: 1.025669813156128,grad_norm: 0.930741852604663, iteration: 77228
loss: 1.037514328956604,grad_norm: 0.9999993317210444, iteration: 77229
loss: 0.9888491630554199,grad_norm: 0.9999998638571997, iteration: 77230
loss: 0.9370114207267761,grad_norm: 0.9999990780050173, iteration: 77231
loss: 0.9662250280380249,grad_norm: 0.9999998179277964, iteration: 77232
loss: 1.1300387382507324,grad_norm: 0.9999994155780845, iteration: 77233
loss: 0.9594270586967468,grad_norm: 0.9999992504612059, iteration: 77234
loss: 1.020790696144104,grad_norm: 0.9999992177959806, iteration: 77235
loss: 1.030017375946045,grad_norm: 0.9999991893629515, iteration: 77236
loss: 1.0515997409820557,grad_norm: 0.9605217249970263, iteration: 77237
loss: 0.9735888242721558,grad_norm: 0.9999990626162311, iteration: 77238
loss: 0.951659083366394,grad_norm: 0.999999210821396, iteration: 77239
loss: 0.9988892078399658,grad_norm: 0.9999990281637919, iteration: 77240
loss: 1.0105438232421875,grad_norm: 0.9999992461502929, iteration: 77241
loss: 0.9975720643997192,grad_norm: 0.9837990287291313, iteration: 77242
loss: 1.000537633895874,grad_norm: 0.9856852663634423, iteration: 77243
loss: 1.0152965784072876,grad_norm: 0.9999989309795091, iteration: 77244
loss: 1.0229151248931885,grad_norm: 0.9999991699696824, iteration: 77245
loss: 0.9961826801300049,grad_norm: 0.9999990826141497, iteration: 77246
loss: 1.0240775346755981,grad_norm: 0.9999992184062937, iteration: 77247
loss: 0.9600107669830322,grad_norm: 0.9999993852180009, iteration: 77248
loss: 0.9606900811195374,grad_norm: 0.9999990173388362, iteration: 77249
loss: 0.9853417277336121,grad_norm: 0.8545954011425869, iteration: 77250
loss: 0.991383969783783,grad_norm: 0.9505481556212031, iteration: 77251
loss: 1.0030171871185303,grad_norm: 0.950994849736044, iteration: 77252
loss: 1.0074962377548218,grad_norm: 0.9999993769959873, iteration: 77253
loss: 0.998325765132904,grad_norm: 0.999999252014097, iteration: 77254
loss: 1.1384544372558594,grad_norm: 0.9999991989941726, iteration: 77255
loss: 1.042754888534546,grad_norm: 0.9999990142905327, iteration: 77256
loss: 1.0032154321670532,grad_norm: 0.9464769838467081, iteration: 77257
loss: 1.1264756917953491,grad_norm: 0.9999995870894955, iteration: 77258
loss: 0.9828466773033142,grad_norm: 0.9716345430903333, iteration: 77259
loss: 1.0182102918624878,grad_norm: 0.999999141664642, iteration: 77260
loss: 1.0114936828613281,grad_norm: 0.9999990560557502, iteration: 77261
loss: 1.029778242111206,grad_norm: 0.9999989872583794, iteration: 77262
loss: 1.0778464078903198,grad_norm: 0.9999990446194169, iteration: 77263
loss: 1.0129462480545044,grad_norm: 0.9999991765267248, iteration: 77264
loss: 1.0843095779418945,grad_norm: 0.9999993695104252, iteration: 77265
loss: 0.989986002445221,grad_norm: 0.9999992768719891, iteration: 77266
loss: 1.0711647272109985,grad_norm: 0.9999996085105519, iteration: 77267
loss: 1.0319782495498657,grad_norm: 0.9999994514811942, iteration: 77268
loss: 1.0435916185379028,grad_norm: 0.9999992675619648, iteration: 77269
loss: 0.973928689956665,grad_norm: 0.9999991870400383, iteration: 77270
loss: 1.0069807767868042,grad_norm: 0.9847657198785558, iteration: 77271
loss: 0.9711160063743591,grad_norm: 0.9905536304415486, iteration: 77272
loss: 0.9374988079071045,grad_norm: 0.9999991664970266, iteration: 77273
loss: 1.0782861709594727,grad_norm: 0.9999995470812938, iteration: 77274
loss: 1.003148078918457,grad_norm: 0.9789971843480714, iteration: 77275
loss: 0.9793733358383179,grad_norm: 0.9999991845462849, iteration: 77276
loss: 1.015148639678955,grad_norm: 0.9999992831888418, iteration: 77277
loss: 1.0101836919784546,grad_norm: 0.9999990564992355, iteration: 77278
loss: 0.9965569972991943,grad_norm: 0.9762001588479213, iteration: 77279
loss: 1.0144450664520264,grad_norm: 0.999999107946178, iteration: 77280
loss: 1.0050444602966309,grad_norm: 0.9052950467898949, iteration: 77281
loss: 1.0333362817764282,grad_norm: 0.9999997738063859, iteration: 77282
loss: 0.9679667353630066,grad_norm: 0.838634658186345, iteration: 77283
loss: 1.0275217294692993,grad_norm: 0.9999993071098676, iteration: 77284
loss: 0.9848101735115051,grad_norm: 0.9999990482257807, iteration: 77285
loss: 0.9833207130432129,grad_norm: 0.9999992389542003, iteration: 77286
loss: 1.0330644845962524,grad_norm: 0.999999676854544, iteration: 77287
loss: 0.9895707964897156,grad_norm: 0.9999990899224097, iteration: 77288
loss: 0.9769843816757202,grad_norm: 0.9999992145355915, iteration: 77289
loss: 0.9949825406074524,grad_norm: 0.9284570478043335, iteration: 77290
loss: 0.97779780626297,grad_norm: 0.9065345408872452, iteration: 77291
loss: 1.0040419101715088,grad_norm: 0.9999991181444269, iteration: 77292
loss: 0.9787093997001648,grad_norm: 0.9999992092990998, iteration: 77293
loss: 1.0336501598358154,grad_norm: 0.9999991423349118, iteration: 77294
loss: 1.0554660558700562,grad_norm: 0.9658141746431377, iteration: 77295
loss: 1.1591798067092896,grad_norm: 0.9999995586004808, iteration: 77296
loss: 1.0172151327133179,grad_norm: 0.999999043244383, iteration: 77297
loss: 1.0015678405761719,grad_norm: 0.9999992500740463, iteration: 77298
loss: 1.0751723051071167,grad_norm: 0.99999969777928, iteration: 77299
loss: 1.0391277074813843,grad_norm: 0.9999992420714916, iteration: 77300
loss: 1.0230093002319336,grad_norm: 0.9999994954019714, iteration: 77301
loss: 1.0842617750167847,grad_norm: 0.9999991041939307, iteration: 77302
loss: 1.021897315979004,grad_norm: 0.9999994540349311, iteration: 77303
loss: 0.9682653546333313,grad_norm: 0.9999991915257077, iteration: 77304
loss: 0.9982366561889648,grad_norm: 0.8755768250097035, iteration: 77305
loss: 1.0044063329696655,grad_norm: 0.930568885248441, iteration: 77306
loss: 0.9837716817855835,grad_norm: 0.9999990044509831, iteration: 77307
loss: 0.9932859539985657,grad_norm: 0.8518689395108857, iteration: 77308
loss: 1.1712455749511719,grad_norm: 0.9999992892405661, iteration: 77309
loss: 1.1749261617660522,grad_norm: 0.9999994785060029, iteration: 77310
loss: 1.006899356842041,grad_norm: 0.9999990504802593, iteration: 77311
loss: 1.0250648260116577,grad_norm: 0.9999992596693915, iteration: 77312
loss: 0.9784457683563232,grad_norm: 0.9940782435049729, iteration: 77313
loss: 1.1165213584899902,grad_norm: 0.9999994089429991, iteration: 77314
loss: 1.0523067712783813,grad_norm: 0.9999993353316228, iteration: 77315
loss: 1.0269598960876465,grad_norm: 0.9962953506120547, iteration: 77316
loss: 1.011136770248413,grad_norm: 0.9388165289439581, iteration: 77317
loss: 1.0105434656143188,grad_norm: 0.9499488783869058, iteration: 77318
loss: 1.049272060394287,grad_norm: 0.9999994058436583, iteration: 77319
loss: 1.0343576669692993,grad_norm: 0.9999991688331674, iteration: 77320
loss: 1.0143275260925293,grad_norm: 0.9999991211305488, iteration: 77321
loss: 0.9717366099357605,grad_norm: 0.9999989924282949, iteration: 77322
loss: 0.9565088152885437,grad_norm: 0.9999992426093686, iteration: 77323
loss: 0.9869894981384277,grad_norm: 0.9487831707163821, iteration: 77324
loss: 0.9601777791976929,grad_norm: 0.8970356197020558, iteration: 77325
loss: 1.0197393894195557,grad_norm: 0.9993832979499395, iteration: 77326
loss: 0.9840578436851501,grad_norm: 0.9999991837850255, iteration: 77327
loss: 0.9840077757835388,grad_norm: 0.9627587452657106, iteration: 77328
loss: 0.9762487411499023,grad_norm: 0.999999179450696, iteration: 77329
loss: 1.0137126445770264,grad_norm: 0.9999992717729913, iteration: 77330
loss: 0.9577909708023071,grad_norm: 0.9999992362253884, iteration: 77331
loss: 0.9472048282623291,grad_norm: 0.850797218360456, iteration: 77332
loss: 1.021878719329834,grad_norm: 0.9999991316559056, iteration: 77333
loss: 0.9817330241203308,grad_norm: 0.9999992212426028, iteration: 77334
loss: 1.027671456336975,grad_norm: 0.9999991559804614, iteration: 77335
loss: 1.0897303819656372,grad_norm: 1.0000000234688713, iteration: 77336
loss: 1.0078884363174438,grad_norm: 0.9999990481528682, iteration: 77337
loss: 1.0343096256256104,grad_norm: 0.9999990866340662, iteration: 77338
loss: 1.043843388557434,grad_norm: 0.9999991913338908, iteration: 77339
loss: 1.00797700881958,grad_norm: 0.9999992437899657, iteration: 77340
loss: 1.0089120864868164,grad_norm: 0.9999992071851721, iteration: 77341
loss: 0.9804821610450745,grad_norm: 0.9999990295235234, iteration: 77342
loss: 0.9768798351287842,grad_norm: 0.9777638712803697, iteration: 77343
loss: 1.017505168914795,grad_norm: 0.9999990514797011, iteration: 77344
loss: 0.9905362725257874,grad_norm: 0.9999991385943603, iteration: 77345
loss: 1.0213857889175415,grad_norm: 0.9673396830356243, iteration: 77346
loss: 0.9601240158081055,grad_norm: 0.9999990926996055, iteration: 77347
loss: 0.9910799860954285,grad_norm: 0.999999146561313, iteration: 77348
loss: 1.0184073448181152,grad_norm: 0.9999989691586656, iteration: 77349
loss: 0.9802770614624023,grad_norm: 0.9805614148474842, iteration: 77350
loss: 1.0831325054168701,grad_norm: 0.9999992109988306, iteration: 77351
loss: 0.9911571741104126,grad_norm: 0.9999991365541645, iteration: 77352
loss: 1.03187096118927,grad_norm: 0.9670302288494205, iteration: 77353
loss: 0.9828041195869446,grad_norm: 0.9999992585674049, iteration: 77354
loss: 0.9888871312141418,grad_norm: 0.9166404786497352, iteration: 77355
loss: 1.01473069190979,grad_norm: 0.9999994083658414, iteration: 77356
loss: 1.0196913480758667,grad_norm: 0.9999459594780313, iteration: 77357
loss: 0.9949889183044434,grad_norm: 0.9907793587348024, iteration: 77358
loss: 0.9844511151313782,grad_norm: 0.9999991827751212, iteration: 77359
loss: 0.9817800521850586,grad_norm: 0.9496184648292939, iteration: 77360
loss: 1.032763957977295,grad_norm: 0.999999243630757, iteration: 77361
loss: 1.0241931676864624,grad_norm: 0.97997340594879, iteration: 77362
loss: 1.0088310241699219,grad_norm: 0.9892222758613749, iteration: 77363
loss: 1.0285298824310303,grad_norm: 0.9695786283771194, iteration: 77364
loss: 1.0023889541625977,grad_norm: 0.9999992245877775, iteration: 77365
loss: 1.0244874954223633,grad_norm: 0.9675304739761004, iteration: 77366
loss: 1.003887414932251,grad_norm: 0.9999991169973578, iteration: 77367
loss: 1.0019946098327637,grad_norm: 0.9999991240886547, iteration: 77368
loss: 1.0078566074371338,grad_norm: 0.9999992328076532, iteration: 77369
loss: 1.0807615518569946,grad_norm: 0.9999993262885347, iteration: 77370
loss: 1.022105097770691,grad_norm: 0.8917309460241041, iteration: 77371
loss: 1.0036851167678833,grad_norm: 0.9947129749055981, iteration: 77372
loss: 1.0501130819320679,grad_norm: 0.9999992433411041, iteration: 77373
loss: 0.9918612241744995,grad_norm: 0.8303602994001587, iteration: 77374
loss: 1.0162924528121948,grad_norm: 0.9999991284513088, iteration: 77375
loss: 1.014085054397583,grad_norm: 0.917403760573344, iteration: 77376
loss: 1.000190019607544,grad_norm: 0.9218898569298395, iteration: 77377
loss: 1.0060287714004517,grad_norm: 0.9222321936702849, iteration: 77378
loss: 1.0054856538772583,grad_norm: 0.9999991154304274, iteration: 77379
loss: 1.038596749305725,grad_norm: 0.9999992165315139, iteration: 77380
loss: 0.9992062449455261,grad_norm: 0.9999991576930609, iteration: 77381
loss: 1.0022261142730713,grad_norm: 0.9574972564056385, iteration: 77382
loss: 0.978608250617981,grad_norm: 0.9999992245725386, iteration: 77383
loss: 1.0270401239395142,grad_norm: 0.999998988180943, iteration: 77384
loss: 1.0295876264572144,grad_norm: 0.9999994809964567, iteration: 77385
loss: 0.9837040901184082,grad_norm: 0.9999992169243438, iteration: 77386
loss: 1.0175459384918213,grad_norm: 0.9999990802460611, iteration: 77387
loss: 1.0151009559631348,grad_norm: 0.9999991648966509, iteration: 77388
loss: 1.021657943725586,grad_norm: 0.9956423148286594, iteration: 77389
loss: 1.0006208419799805,grad_norm: 0.9999990968827419, iteration: 77390
loss: 1.0215224027633667,grad_norm: 0.9999991398924943, iteration: 77391
loss: 1.0485905408859253,grad_norm: 0.9999993896081194, iteration: 77392
loss: 0.9898303151130676,grad_norm: 0.9728764280784472, iteration: 77393
loss: 1.0229443311691284,grad_norm: 0.9999993176756529, iteration: 77394
loss: 0.9955045580863953,grad_norm: 0.9999990526674942, iteration: 77395
loss: 0.9800014495849609,grad_norm: 0.9999996458585252, iteration: 77396
loss: 0.9859365224838257,grad_norm: 0.9999991242325724, iteration: 77397
loss: 0.9887245893478394,grad_norm: 0.9999991693419755, iteration: 77398
loss: 1.0430470705032349,grad_norm: 0.893688967573546, iteration: 77399
loss: 1.017093300819397,grad_norm: 0.991835688276682, iteration: 77400
loss: 1.0208508968353271,grad_norm: 0.9707713279067304, iteration: 77401
loss: 0.9782281517982483,grad_norm: 0.9999991193926705, iteration: 77402
loss: 0.9686024785041809,grad_norm: 0.994757980834198, iteration: 77403
loss: 1.039510726928711,grad_norm: 0.9999991117091804, iteration: 77404
loss: 0.9488576054573059,grad_norm: 0.9999990037285019, iteration: 77405
loss: 1.0324478149414062,grad_norm: 0.9057705809754666, iteration: 77406
loss: 0.9932808876037598,grad_norm: 0.999999315989336, iteration: 77407
loss: 0.974055290222168,grad_norm: 0.9999992095667036, iteration: 77408
loss: 1.0253945589065552,grad_norm: 0.9999990513800608, iteration: 77409
loss: 0.9912658929824829,grad_norm: 0.9453884912860556, iteration: 77410
loss: 1.0058581829071045,grad_norm: 0.9762097688932143, iteration: 77411
loss: 1.0136746168136597,grad_norm: 0.9544241855753651, iteration: 77412
loss: 0.9994996786117554,grad_norm: 0.9999990391454004, iteration: 77413
loss: 0.9894071221351624,grad_norm: 0.9610867505900968, iteration: 77414
loss: 0.983666718006134,grad_norm: 0.999999062454627, iteration: 77415
loss: 1.0138368606567383,grad_norm: 0.9999990153188375, iteration: 77416
loss: 1.005963683128357,grad_norm: 0.9999991195913441, iteration: 77417
loss: 0.9988149404525757,grad_norm: 0.9999991627902014, iteration: 77418
loss: 1.0111688375473022,grad_norm: 0.8572660163142471, iteration: 77419
loss: 1.0157265663146973,grad_norm: 0.998354736021089, iteration: 77420
loss: 0.9896197319030762,grad_norm: 0.8678599288898434, iteration: 77421
loss: 1.0059889554977417,grad_norm: 0.9147363170995717, iteration: 77422
loss: 0.9773650169372559,grad_norm: 0.8510129920196834, iteration: 77423
loss: 0.9735475182533264,grad_norm: 0.9999990948600226, iteration: 77424
loss: 0.9783344864845276,grad_norm: 0.9999992290586233, iteration: 77425
loss: 0.9778543710708618,grad_norm: 0.9999991353159356, iteration: 77426
loss: 0.9949769377708435,grad_norm: 0.9751824082638723, iteration: 77427
loss: 1.0171962976455688,grad_norm: 0.9582922572896021, iteration: 77428
loss: 1.0476429462432861,grad_norm: 0.9999991099621408, iteration: 77429
loss: 1.008880853652954,grad_norm: 0.9999992030157301, iteration: 77430
loss: 0.9963261485099792,grad_norm: 0.9999990540311952, iteration: 77431
loss: 1.0199081897735596,grad_norm: 0.9999989806592002, iteration: 77432
loss: 1.0139029026031494,grad_norm: 0.990839356534598, iteration: 77433
loss: 1.0091228485107422,grad_norm: 0.9999990701350777, iteration: 77434
loss: 0.993323802947998,grad_norm: 0.9999990900843309, iteration: 77435
loss: 1.0077452659606934,grad_norm: 0.9999992300352744, iteration: 77436
loss: 0.9886893033981323,grad_norm: 0.9890481254207698, iteration: 77437
loss: 1.006583333015442,grad_norm: 0.9999993666274802, iteration: 77438
loss: 1.0308642387390137,grad_norm: 0.8674805811217529, iteration: 77439
loss: 1.0335561037063599,grad_norm: 0.9999992719322356, iteration: 77440
loss: 1.0134304761886597,grad_norm: 0.9999992049320565, iteration: 77441
loss: 0.9859083294868469,grad_norm: 0.9999990664386639, iteration: 77442
loss: 1.0322126150131226,grad_norm: 0.999999086261271, iteration: 77443
loss: 1.0213333368301392,grad_norm: 0.9374160414725318, iteration: 77444
loss: 0.9918562173843384,grad_norm: 0.9999992120182501, iteration: 77445
loss: 1.0065792798995972,grad_norm: 0.9522166618396862, iteration: 77446
loss: 1.0181347131729126,grad_norm: 0.9509081865636169, iteration: 77447
loss: 0.9848647117614746,grad_norm: 0.999999125505021, iteration: 77448
loss: 1.0104923248291016,grad_norm: 0.9999991568396921, iteration: 77449
loss: 1.0277130603790283,grad_norm: 0.96018838574457, iteration: 77450
loss: 1.0288618803024292,grad_norm: 0.9999991996453997, iteration: 77451
loss: 1.0009812116622925,grad_norm: 0.8828622902349513, iteration: 77452
loss: 0.995512843132019,grad_norm: 0.9926023896960656, iteration: 77453
loss: 0.9965218901634216,grad_norm: 0.999999193586283, iteration: 77454
loss: 0.9919141530990601,grad_norm: 0.999999690381708, iteration: 77455
loss: 0.9772341251373291,grad_norm: 0.9999992340646341, iteration: 77456
loss: 0.9932066202163696,grad_norm: 0.8391680601073164, iteration: 77457
loss: 0.9629235863685608,grad_norm: 0.9999991293808353, iteration: 77458
loss: 0.992779016494751,grad_norm: 0.999999183626252, iteration: 77459
loss: 0.9837357401847839,grad_norm: 0.9693793682036956, iteration: 77460
loss: 0.9939430356025696,grad_norm: 0.9999992088282714, iteration: 77461
loss: 1.0029231309890747,grad_norm: 0.9999990599808518, iteration: 77462
loss: 0.9961838722229004,grad_norm: 0.9999990315058834, iteration: 77463
loss: 1.0040011405944824,grad_norm: 0.8874507551679137, iteration: 77464
loss: 1.0401086807250977,grad_norm: 0.999999066434031, iteration: 77465
loss: 1.002249002456665,grad_norm: 0.9999992086334071, iteration: 77466
loss: 0.9747847318649292,grad_norm: 0.9999993003838266, iteration: 77467
loss: 1.104899525642395,grad_norm: 0.9767879538702299, iteration: 77468
loss: 1.0259206295013428,grad_norm: 0.9999991593337186, iteration: 77469
loss: 1.0175511837005615,grad_norm: 0.9491888347966534, iteration: 77470
loss: 1.0174537897109985,grad_norm: 0.9999991967488643, iteration: 77471
loss: 0.9948477149009705,grad_norm: 0.9999992343878998, iteration: 77472
loss: 0.9943769574165344,grad_norm: 0.999999226531613, iteration: 77473
loss: 1.019752025604248,grad_norm: 0.9999989715491671, iteration: 77474
loss: 1.0303786993026733,grad_norm: 0.999999277936439, iteration: 77475
loss: 0.9789007902145386,grad_norm: 0.9999992215001806, iteration: 77476
loss: 0.9954790472984314,grad_norm: 0.9999990199869656, iteration: 77477
loss: 0.9936067461967468,grad_norm: 0.9999991926122548, iteration: 77478
loss: 1.0036073923110962,grad_norm: 0.9999991186228327, iteration: 77479
loss: 1.0102661848068237,grad_norm: 0.999999088437414, iteration: 77480
loss: 0.9809976816177368,grad_norm: 0.981176482287376, iteration: 77481
loss: 1.0027786493301392,grad_norm: 0.8195904111572333, iteration: 77482
loss: 1.0026713609695435,grad_norm: 0.9999990966134605, iteration: 77483
loss: 1.002351999282837,grad_norm: 0.9999991010353659, iteration: 77484
loss: 1.0491336584091187,grad_norm: 0.9999991373966385, iteration: 77485
loss: 1.0101368427276611,grad_norm: 0.9999991311829287, iteration: 77486
loss: 0.9984683990478516,grad_norm: 0.9999991823632185, iteration: 77487
loss: 1.0018061399459839,grad_norm: 0.9999995191913539, iteration: 77488
loss: 0.977259635925293,grad_norm: 0.9999991569938372, iteration: 77489
loss: 0.9829338788986206,grad_norm: 0.9999991688553875, iteration: 77490
loss: 1.0314425230026245,grad_norm: 0.9999991123982183, iteration: 77491
loss: 0.9848392605781555,grad_norm: 0.9652168223203588, iteration: 77492
loss: 1.0676536560058594,grad_norm: 0.9999992767495777, iteration: 77493
loss: 0.9745482802391052,grad_norm: 0.9999990873363276, iteration: 77494
loss: 0.9863002300262451,grad_norm: 0.9857675425147183, iteration: 77495
loss: 1.016847848892212,grad_norm: 0.9662045433238392, iteration: 77496
loss: 1.0337690114974976,grad_norm: 0.9999991133530285, iteration: 77497
loss: 0.9790946245193481,grad_norm: 0.9567022289593132, iteration: 77498
loss: 1.0140810012817383,grad_norm: 0.9999991446004007, iteration: 77499
loss: 0.9782386422157288,grad_norm: 0.9999992827371266, iteration: 77500
loss: 0.9854413866996765,grad_norm: 0.9999992699156647, iteration: 77501
loss: 0.980718195438385,grad_norm: 0.99193606925008, iteration: 77502
loss: 0.9585402607917786,grad_norm: 0.9662859269647079, iteration: 77503
loss: 0.9389073848724365,grad_norm: 0.9999992723564292, iteration: 77504
loss: 1.0306843519210815,grad_norm: 0.8845862513751395, iteration: 77505
loss: 1.0404921770095825,grad_norm: 0.8014772212639643, iteration: 77506
loss: 1.0103085041046143,grad_norm: 0.8449722207550092, iteration: 77507
loss: 1.0152761936187744,grad_norm: 0.9335666465852834, iteration: 77508
loss: 0.9709783792495728,grad_norm: 0.9999992190421763, iteration: 77509
loss: 0.9816025495529175,grad_norm: 0.999999148805246, iteration: 77510
loss: 0.9982312321662903,grad_norm: 0.9999989739420225, iteration: 77511
loss: 1.0057085752487183,grad_norm: 0.9999997064747781, iteration: 77512
loss: 0.9816821217536926,grad_norm: 0.9434928112299661, iteration: 77513
loss: 1.0168339014053345,grad_norm: 0.9999991321137033, iteration: 77514
loss: 0.9988894462585449,grad_norm: 0.9999990932790345, iteration: 77515
loss: 1.0127168893814087,grad_norm: 0.9614341173394585, iteration: 77516
loss: 0.9954440593719482,grad_norm: 0.9171439142379253, iteration: 77517
loss: 0.9872150421142578,grad_norm: 0.9999995362900329, iteration: 77518
loss: 1.0032931566238403,grad_norm: 0.9999991304444575, iteration: 77519
loss: 0.9801240563392639,grad_norm: 0.9999990884694251, iteration: 77520
loss: 0.9876222610473633,grad_norm: 0.9999992917881425, iteration: 77521
loss: 1.003466010093689,grad_norm: 0.9680611253779202, iteration: 77522
loss: 0.9599410891532898,grad_norm: 0.9999992197872204, iteration: 77523
loss: 0.9982514977455139,grad_norm: 0.9978433254997919, iteration: 77524
loss: 0.9832059144973755,grad_norm: 0.9999990303981527, iteration: 77525
loss: 1.0042158365249634,grad_norm: 0.9999992230576006, iteration: 77526
loss: 0.984107255935669,grad_norm: 0.9999990693156814, iteration: 77527
loss: 1.011347770690918,grad_norm: 0.9999991564485764, iteration: 77528
loss: 1.0373508930206299,grad_norm: 0.9999990286922509, iteration: 77529
loss: 0.9969740509986877,grad_norm: 0.8766446259517547, iteration: 77530
loss: 0.9905546307563782,grad_norm: 0.9999992160503817, iteration: 77531
loss: 1.0162630081176758,grad_norm: 0.9999991999998903, iteration: 77532
loss: 1.012848973274231,grad_norm: 0.999999041367495, iteration: 77533
loss: 0.9890545010566711,grad_norm: 0.9999990593224588, iteration: 77534
loss: 0.9962056279182434,grad_norm: 0.9790964210501456, iteration: 77535
loss: 0.9989314079284668,grad_norm: 0.9906187592137498, iteration: 77536
loss: 1.040286898612976,grad_norm: 0.8886229735231632, iteration: 77537
loss: 0.97980135679245,grad_norm: 0.9177589980514037, iteration: 77538
loss: 0.9808035492897034,grad_norm: 0.999999196158463, iteration: 77539
loss: 1.031237006187439,grad_norm: 0.9999990956649483, iteration: 77540
loss: 1.0327812433242798,grad_norm: 0.9999992200222537, iteration: 77541
loss: 0.9843046069145203,grad_norm: 0.9544055530685572, iteration: 77542
loss: 0.9947789907455444,grad_norm: 0.9435039257179609, iteration: 77543
loss: 1.015626072883606,grad_norm: 0.999999196750775, iteration: 77544
loss: 0.9986775517463684,grad_norm: 0.9999990728635509, iteration: 77545
loss: 1.0202049016952515,grad_norm: 0.9999991476145926, iteration: 77546
loss: 0.9984813332557678,grad_norm: 0.9999990265144075, iteration: 77547
loss: 0.9911637306213379,grad_norm: 0.9999990692046601, iteration: 77548
loss: 1.0450551509857178,grad_norm: 0.9999991014182764, iteration: 77549
loss: 1.011435866355896,grad_norm: 0.9999992427092641, iteration: 77550
loss: 1.0477993488311768,grad_norm: 0.9716177405678069, iteration: 77551
loss: 0.9914194941520691,grad_norm: 0.999999429741889, iteration: 77552
loss: 1.04819917678833,grad_norm: 0.9999991379888096, iteration: 77553
loss: 1.0286164283752441,grad_norm: 0.9999995066262668, iteration: 77554
loss: 0.9883216619491577,grad_norm: 0.9999991009295716, iteration: 77555
loss: 1.0171303749084473,grad_norm: 0.9999989252017472, iteration: 77556
loss: 1.0028976202011108,grad_norm: 0.9999991522130025, iteration: 77557
loss: 1.015123963356018,grad_norm: 0.9980352938342081, iteration: 77558
loss: 1.0114158391952515,grad_norm: 0.989871831915628, iteration: 77559
loss: 1.040541410446167,grad_norm: 0.9999992179369673, iteration: 77560
loss: 1.0256839990615845,grad_norm: 0.9370108352711184, iteration: 77561
loss: 0.9992299675941467,grad_norm: 0.9999991396008735, iteration: 77562
loss: 0.9960773587226868,grad_norm: 0.9943680031952573, iteration: 77563
loss: 1.0042660236358643,grad_norm: 0.9999992694601532, iteration: 77564
loss: 0.9960435628890991,grad_norm: 0.9999992463286821, iteration: 77565
loss: 1.0147030353546143,grad_norm: 0.9729776931058036, iteration: 77566
loss: 1.0017726421356201,grad_norm: 0.9999990334698351, iteration: 77567
loss: 1.0091233253479004,grad_norm: 0.9364598934988865, iteration: 77568
loss: 0.9906631112098694,grad_norm: 0.9999991556596161, iteration: 77569
loss: 1.0180330276489258,grad_norm: 0.9177207676462659, iteration: 77570
loss: 1.0430347919464111,grad_norm: 0.9999992334950524, iteration: 77571
loss: 1.0043392181396484,grad_norm: 0.9999991550720346, iteration: 77572
loss: 0.9750261902809143,grad_norm: 0.999999113746384, iteration: 77573
loss: 0.9880744814872742,grad_norm: 0.999999199998584, iteration: 77574
loss: 1.0111024379730225,grad_norm: 0.9999991509771676, iteration: 77575
loss: 1.034708023071289,grad_norm: 0.9999992613297494, iteration: 77576
loss: 1.097015380859375,grad_norm: 0.9999994443807828, iteration: 77577
loss: 0.9861466288566589,grad_norm: 0.9884106183949101, iteration: 77578
loss: 0.973991334438324,grad_norm: 0.9999992755299575, iteration: 77579
loss: 1.000089406967163,grad_norm: 0.9999992648732049, iteration: 77580
loss: 1.024312973022461,grad_norm: 0.9755392011597396, iteration: 77581
loss: 1.0105853080749512,grad_norm: 0.9667603195949854, iteration: 77582
loss: 1.001670002937317,grad_norm: 0.999999020745545, iteration: 77583
loss: 1.0028687715530396,grad_norm: 0.9999993763616153, iteration: 77584
loss: 1.0193097591400146,grad_norm: 0.9834115778085145, iteration: 77585
loss: 1.0263153314590454,grad_norm: 0.9594456490582581, iteration: 77586
loss: 0.9940928816795349,grad_norm: 0.9999990262732457, iteration: 77587
loss: 1.048567295074463,grad_norm: 0.9762458678695571, iteration: 77588
loss: 0.9661028385162354,grad_norm: 0.9459750846322293, iteration: 77589
loss: 0.9726668000221252,grad_norm: 0.9999990976227936, iteration: 77590
loss: 1.0316522121429443,grad_norm: 0.9648107122722236, iteration: 77591
loss: 0.9821277260780334,grad_norm: 0.8412846365571649, iteration: 77592
loss: 0.965240478515625,grad_norm: 0.999998961991132, iteration: 77593
loss: 1.0196318626403809,grad_norm: 0.9999991515214202, iteration: 77594
loss: 0.9991745948791504,grad_norm: 0.9999992020771669, iteration: 77595
loss: 0.9930847883224487,grad_norm: 0.9999991601235448, iteration: 77596
loss: 0.9842458963394165,grad_norm: 0.9829893793553907, iteration: 77597
loss: 0.9925255179405212,grad_norm: 0.9999991209752267, iteration: 77598
loss: 1.0256414413452148,grad_norm: 0.9325227329494064, iteration: 77599
loss: 0.9711097478866577,grad_norm: 0.999999143810777, iteration: 77600
loss: 0.986042857170105,grad_norm: 0.9999991263051007, iteration: 77601
loss: 1.0180336236953735,grad_norm: 0.99999925986889, iteration: 77602
loss: 1.033982515335083,grad_norm: 0.9999990756849751, iteration: 77603
loss: 1.0095078945159912,grad_norm: 0.9999991917078931, iteration: 77604
loss: 1.0008248090744019,grad_norm: 0.999999147387837, iteration: 77605
loss: 0.9956920742988586,grad_norm: 0.9809522466478503, iteration: 77606
loss: 1.01710045337677,grad_norm: 0.975300886344304, iteration: 77607
loss: 0.9880244731903076,grad_norm: 0.9999991153441443, iteration: 77608
loss: 0.9904765486717224,grad_norm: 0.878629263193874, iteration: 77609
loss: 0.9642598032951355,grad_norm: 0.9999992173710464, iteration: 77610
loss: 1.0079773664474487,grad_norm: 0.9999990572166776, iteration: 77611
loss: 0.9860456585884094,grad_norm: 0.9999990016096906, iteration: 77612
loss: 0.9918862581253052,grad_norm: 0.9910876962571042, iteration: 77613
loss: 1.0062178373336792,grad_norm: 0.8565509541328296, iteration: 77614
loss: 1.0135148763656616,grad_norm: 0.999999162560449, iteration: 77615
loss: 0.9884393215179443,grad_norm: 0.9999991586968643, iteration: 77616
loss: 0.9990047812461853,grad_norm: 0.9954553769232198, iteration: 77617
loss: 0.9700663685798645,grad_norm: 0.9999991866375529, iteration: 77618
loss: 0.9768640995025635,grad_norm: 0.9999991499734966, iteration: 77619
loss: 0.9895909428596497,grad_norm: 0.9028789089443273, iteration: 77620
loss: 0.9943606853485107,grad_norm: 0.9471220015076631, iteration: 77621
loss: 0.9927137494087219,grad_norm: 0.9999991825629536, iteration: 77622
loss: 1.0012234449386597,grad_norm: 0.9999992173321522, iteration: 77623
loss: 0.9767842888832092,grad_norm: 0.9999989848269383, iteration: 77624
loss: 1.0077791213989258,grad_norm: 0.9999991513492321, iteration: 77625
loss: 1.0310345888137817,grad_norm: 0.9999991876725121, iteration: 77626
loss: 0.9842909574508667,grad_norm: 0.9999990296812439, iteration: 77627
loss: 0.9878333806991577,grad_norm: 0.9999991938049464, iteration: 77628
loss: 0.9887690544128418,grad_norm: 0.9698122267138716, iteration: 77629
loss: 1.0407668352127075,grad_norm: 0.9999991834400211, iteration: 77630
loss: 0.9737403392791748,grad_norm: 0.9046729914374803, iteration: 77631
loss: 0.9869579076766968,grad_norm: 0.9627609797023132, iteration: 77632
loss: 0.9901098608970642,grad_norm: 0.9999992956758939, iteration: 77633
loss: 1.0301454067230225,grad_norm: 0.9999992107040928, iteration: 77634
loss: 1.019843578338623,grad_norm: 0.9999990789855545, iteration: 77635
loss: 1.0108922719955444,grad_norm: 0.9999992121074991, iteration: 77636
loss: 0.996762216091156,grad_norm: 0.9999992692791109, iteration: 77637
loss: 1.0278717279434204,grad_norm: 0.9259782978233085, iteration: 77638
loss: 1.026533842086792,grad_norm: 0.9999991522486287, iteration: 77639
loss: 0.9970545768737793,grad_norm: 0.9999990770344344, iteration: 77640
loss: 1.0066145658493042,grad_norm: 0.948246366472223, iteration: 77641
loss: 0.9690356850624084,grad_norm: 0.9413253675924467, iteration: 77642
loss: 0.967699408531189,grad_norm: 0.9999992932281485, iteration: 77643
loss: 1.0767216682434082,grad_norm: 0.999999267216037, iteration: 77644
loss: 1.011884331703186,grad_norm: 0.9999991487717291, iteration: 77645
loss: 0.9751987457275391,grad_norm: 0.9999990658195793, iteration: 77646
loss: 0.9787366390228271,grad_norm: 0.9720446273552814, iteration: 77647
loss: 1.030650019645691,grad_norm: 0.9999992323553303, iteration: 77648
loss: 0.9487504363059998,grad_norm: 0.9999990308116623, iteration: 77649
loss: 0.9773425459861755,grad_norm: 0.999999067212734, iteration: 77650
loss: 0.9545881152153015,grad_norm: 0.9547747043194763, iteration: 77651
loss: 0.9913643598556519,grad_norm: 0.9999990433126422, iteration: 77652
loss: 0.9865875244140625,grad_norm: 0.999999155582403, iteration: 77653
loss: 1.0248371362686157,grad_norm: 0.9999992807161819, iteration: 77654
loss: 0.9988901615142822,grad_norm: 0.9999989517715783, iteration: 77655
loss: 1.0282098054885864,grad_norm: 0.9999996164217171, iteration: 77656
loss: 1.001422643661499,grad_norm: 0.9999991226535071, iteration: 77657
loss: 0.9905202388763428,grad_norm: 0.9803778913313175, iteration: 77658
loss: 0.9737181067466736,grad_norm: 0.9999991878601311, iteration: 77659
loss: 0.9710084795951843,grad_norm: 0.9923558204453539, iteration: 77660
loss: 0.9998632073402405,grad_norm: 0.9999991629526637, iteration: 77661
loss: 1.0107004642486572,grad_norm: 0.9999993657595503, iteration: 77662
loss: 0.9676463007926941,grad_norm: 0.9587467992957543, iteration: 77663
loss: 0.9592939615249634,grad_norm: 0.9999989972142869, iteration: 77664
loss: 0.9792343378067017,grad_norm: 0.9999991219774124, iteration: 77665
loss: 1.0035219192504883,grad_norm: 0.9999994228954416, iteration: 77666
loss: 0.9242298603057861,grad_norm: 0.9999993783685414, iteration: 77667
loss: 0.9686440229415894,grad_norm: 0.996033563699889, iteration: 77668
loss: 0.9988194108009338,grad_norm: 0.9999991927474186, iteration: 77669
loss: 0.9659099578857422,grad_norm: 0.9999991314051896, iteration: 77670
loss: 0.9935516119003296,grad_norm: 0.9999991089608338, iteration: 77671
loss: 0.9874663352966309,grad_norm: 0.9999990974080226, iteration: 77672
loss: 1.0373177528381348,grad_norm: 0.7996569872409333, iteration: 77673
loss: 0.9876420497894287,grad_norm: 0.9999992238360224, iteration: 77674
loss: 0.9972379803657532,grad_norm: 0.99999913957451, iteration: 77675
loss: 0.9783447980880737,grad_norm: 0.9999991573018518, iteration: 77676
loss: 1.015883207321167,grad_norm: 0.9999991459458665, iteration: 77677
loss: 1.0020240545272827,grad_norm: 0.9999991396255443, iteration: 77678
loss: 0.9779913425445557,grad_norm: 0.9999995127156396, iteration: 77679
loss: 0.965027391910553,grad_norm: 0.9999990881554047, iteration: 77680
loss: 0.9955004453659058,grad_norm: 0.9860450513218301, iteration: 77681
loss: 1.003225564956665,grad_norm: 0.9699683387209562, iteration: 77682
loss: 1.0085712671279907,grad_norm: 0.9158408860470488, iteration: 77683
loss: 1.00334632396698,grad_norm: 0.9999990565241059, iteration: 77684
loss: 1.0166093111038208,grad_norm: 0.9467827378784822, iteration: 77685
loss: 0.9932472705841064,grad_norm: 0.9999990826456027, iteration: 77686
loss: 1.0163524150848389,grad_norm: 0.9999990538319276, iteration: 77687
loss: 0.9813066720962524,grad_norm: 0.9801580683558982, iteration: 77688
loss: 0.9926998615264893,grad_norm: 0.9999990698081636, iteration: 77689
loss: 1.0147453546524048,grad_norm: 0.9999992127862429, iteration: 77690
loss: 1.0176502466201782,grad_norm: 0.9999991829285853, iteration: 77691
loss: 1.050850749015808,grad_norm: 0.999999718467084, iteration: 77692
loss: 0.9985504746437073,grad_norm: 0.9999993931158311, iteration: 77693
loss: 0.9948155879974365,grad_norm: 0.9206282978769278, iteration: 77694
loss: 1.008453369140625,grad_norm: 0.9999991784999951, iteration: 77695
loss: 0.9869333505630493,grad_norm: 0.9005967917781602, iteration: 77696
loss: 0.9804618954658508,grad_norm: 0.9999992097865469, iteration: 77697
loss: 1.011534571647644,grad_norm: 0.9999991962955692, iteration: 77698
loss: 1.0327348709106445,grad_norm: 0.9173854285397665, iteration: 77699
loss: 1.0322585105895996,grad_norm: 0.9999989990018754, iteration: 77700
loss: 1.0318683385849,grad_norm: 0.9999989368991821, iteration: 77701
loss: 0.9711160063743591,grad_norm: 0.9999992635679482, iteration: 77702
loss: 0.9842416644096375,grad_norm: 0.9999992231778143, iteration: 77703
loss: 1.0037720203399658,grad_norm: 0.9304360536839958, iteration: 77704
loss: 0.9645708799362183,grad_norm: 0.9999991867294803, iteration: 77705
loss: 1.0211341381072998,grad_norm: 0.9999995940720274, iteration: 77706
loss: 0.9689772725105286,grad_norm: 0.9965879684736045, iteration: 77707
loss: 1.006870150566101,grad_norm: 0.9999990623040252, iteration: 77708
loss: 1.0018537044525146,grad_norm: 0.9767063904911055, iteration: 77709
loss: 0.9789677262306213,grad_norm: 0.9999992006194014, iteration: 77710
loss: 1.0146746635437012,grad_norm: 0.9999991184858554, iteration: 77711
loss: 1.0071356296539307,grad_norm: 0.9975952504117128, iteration: 77712
loss: 1.0165495872497559,grad_norm: 0.9999991322749715, iteration: 77713
loss: 0.9760729670524597,grad_norm: 0.8935674124105004, iteration: 77714
loss: 1.0011037588119507,grad_norm: 0.9999990437326376, iteration: 77715
loss: 0.9602916240692139,grad_norm: 0.9506553514998562, iteration: 77716
loss: 1.0013662576675415,grad_norm: 0.9999991183569988, iteration: 77717
loss: 1.0185633897781372,grad_norm: 0.999999055511717, iteration: 77718
loss: 0.9999138712882996,grad_norm: 0.9999990840913362, iteration: 77719
loss: 1.006778597831726,grad_norm: 0.9692518597906509, iteration: 77720
loss: 0.9988006949424744,grad_norm: 0.9693597318135502, iteration: 77721
loss: 1.0121591091156006,grad_norm: 0.9999991121443347, iteration: 77722
loss: 0.9757576584815979,grad_norm: 0.9999992149578245, iteration: 77723
loss: 1.0188959836959839,grad_norm: 0.999998994525842, iteration: 77724
loss: 1.021498680114746,grad_norm: 0.9029628713700575, iteration: 77725
loss: 0.9887728095054626,grad_norm: 0.9166706599183456, iteration: 77726
loss: 1.0041166543960571,grad_norm: 0.9787512135069205, iteration: 77727
loss: 0.9903203845024109,grad_norm: 0.9999990851822319, iteration: 77728
loss: 0.9365265369415283,grad_norm: 0.9999990869357277, iteration: 77729
loss: 1.0129215717315674,grad_norm: 0.9881952997954637, iteration: 77730
loss: 1.009413719177246,grad_norm: 0.9999989570030636, iteration: 77731
loss: 0.9955779314041138,grad_norm: 0.9999991034879411, iteration: 77732
loss: 1.0228208303451538,grad_norm: 0.9999989343693519, iteration: 77733
loss: 1.01750910282135,grad_norm: 0.9999993219607685, iteration: 77734
loss: 0.988793134689331,grad_norm: 0.9999990604340477, iteration: 77735
loss: 0.9819141030311584,grad_norm: 0.980504853093811, iteration: 77736
loss: 1.0078201293945312,grad_norm: 0.9999992341748895, iteration: 77737
loss: 0.9976433515548706,grad_norm: 0.9999991384596566, iteration: 77738
loss: 0.9719744920730591,grad_norm: 0.9999991554837832, iteration: 77739
loss: 1.0035492181777954,grad_norm: 0.9999992570006546, iteration: 77740
loss: 1.1217527389526367,grad_norm: 0.9999998263703628, iteration: 77741
loss: 1.0048211812973022,grad_norm: 0.8654737570458675, iteration: 77742
loss: 0.9997702836990356,grad_norm: 0.9890022611008265, iteration: 77743
loss: 1.0370385646820068,grad_norm: 0.9999995003939016, iteration: 77744
loss: 1.0250626802444458,grad_norm: 0.9999990772482792, iteration: 77745
loss: 0.9845676422119141,grad_norm: 0.9999990581042975, iteration: 77746
loss: 1.0127242803573608,grad_norm: 0.999999155580849, iteration: 77747
loss: 1.0124714374542236,grad_norm: 0.9735027228793511, iteration: 77748
loss: 1.0185513496398926,grad_norm: 0.9999997449920323, iteration: 77749
loss: 0.9835464358329773,grad_norm: 0.9999991345135631, iteration: 77750
loss: 1.0284550189971924,grad_norm: 0.9999994189230471, iteration: 77751
loss: 0.9884105324745178,grad_norm: 0.9999991292581266, iteration: 77752
loss: 0.9729257822036743,grad_norm: 0.9990369766349826, iteration: 77753
loss: 0.9936844706535339,grad_norm: 0.9999991927273112, iteration: 77754
loss: 1.0096580982208252,grad_norm: 0.999999206377695, iteration: 77755
loss: 1.0176695585250854,grad_norm: 0.9049050696605961, iteration: 77756
loss: 0.9729444980621338,grad_norm: 0.999999276918378, iteration: 77757
loss: 1.0006089210510254,grad_norm: 0.8881457134893077, iteration: 77758
loss: 1.0027307271957397,grad_norm: 0.9999991399239434, iteration: 77759
loss: 1.0100849866867065,grad_norm: 0.9999991686311425, iteration: 77760
loss: 1.0145800113677979,grad_norm: 0.9729109519728965, iteration: 77761
loss: 1.0276905298233032,grad_norm: 0.999999360152172, iteration: 77762
loss: 1.003842830657959,grad_norm: 0.9999991131421363, iteration: 77763
loss: 1.0002820491790771,grad_norm: 0.9809177764442456, iteration: 77764
loss: 0.9990472197532654,grad_norm: 0.9999990824018102, iteration: 77765
loss: 0.9937922358512878,grad_norm: 0.9280389284388474, iteration: 77766
loss: 0.9642534255981445,grad_norm: 0.9999990977875756, iteration: 77767
loss: 0.9472658038139343,grad_norm: 0.9999991005623582, iteration: 77768
loss: 1.0157485008239746,grad_norm: 0.9999988743846884, iteration: 77769
loss: 1.0220668315887451,grad_norm: 0.9999992280573994, iteration: 77770
loss: 0.9858238101005554,grad_norm: 0.9999989962433569, iteration: 77771
loss: 1.0001509189605713,grad_norm: 0.999999070042485, iteration: 77772
loss: 0.9760452508926392,grad_norm: 0.9999991146305526, iteration: 77773
loss: 0.9983736872673035,grad_norm: 0.9999990608423538, iteration: 77774
loss: 0.9863144755363464,grad_norm: 0.999999054798507, iteration: 77775
loss: 1.0071882009506226,grad_norm: 0.9138212832295888, iteration: 77776
loss: 0.9627638459205627,grad_norm: 0.999999154199228, iteration: 77777
loss: 1.0487326383590698,grad_norm: 0.999999528293716, iteration: 77778
loss: 1.0065326690673828,grad_norm: 0.999999080148119, iteration: 77779
loss: 0.9831690192222595,grad_norm: 0.9999992076500605, iteration: 77780
loss: 1.0231248140335083,grad_norm: 0.9999995023597359, iteration: 77781
loss: 1.019006609916687,grad_norm: 0.9641052797132594, iteration: 77782
loss: 1.010301947593689,grad_norm: 0.9999991706796487, iteration: 77783
loss: 1.0224010944366455,grad_norm: 0.9791697613613796, iteration: 77784
loss: 1.0325149297714233,grad_norm: 0.9999991511756285, iteration: 77785
loss: 1.0066871643066406,grad_norm: 0.8921036149775095, iteration: 77786
loss: 1.00197172164917,grad_norm: 0.9999991216940646, iteration: 77787
loss: 0.9905911684036255,grad_norm: 0.9547616435172867, iteration: 77788
loss: 0.9849597811698914,grad_norm: 0.9999992253055168, iteration: 77789
loss: 0.9860638976097107,grad_norm: 0.9999991831830068, iteration: 77790
loss: 1.065375804901123,grad_norm: 0.9999995130673442, iteration: 77791
loss: 1.0304460525512695,grad_norm: 0.9999991632121539, iteration: 77792
loss: 0.9773807525634766,grad_norm: 0.9999991758523963, iteration: 77793
loss: 1.0045727491378784,grad_norm: 0.9354718005568025, iteration: 77794
loss: 0.9928606748580933,grad_norm: 0.9999992385404278, iteration: 77795
loss: 1.0063966512680054,grad_norm: 0.9208252171442717, iteration: 77796
loss: 1.061461329460144,grad_norm: 0.9999991239121234, iteration: 77797
loss: 0.9886518120765686,grad_norm: 0.9999990257248648, iteration: 77798
loss: 1.070689082145691,grad_norm: 0.9999992880869515, iteration: 77799
loss: 1.0370466709136963,grad_norm: 0.9999999192738744, iteration: 77800
loss: 1.0148228406906128,grad_norm: 0.9900996639191948, iteration: 77801
loss: 1.0026752948760986,grad_norm: 0.999999148854557, iteration: 77802
loss: 0.9692080616950989,grad_norm: 0.9999992793115297, iteration: 77803
loss: 1.0236730575561523,grad_norm: 0.9134335361393717, iteration: 77804
loss: 0.9837810397148132,grad_norm: 0.9999991213347142, iteration: 77805
loss: 1.005043625831604,grad_norm: 0.9999993301919915, iteration: 77806
loss: 0.9984650015830994,grad_norm: 0.9999991187733523, iteration: 77807
loss: 0.9988253116607666,grad_norm: 0.8956291119034504, iteration: 77808
loss: 0.997401237487793,grad_norm: 0.9999991311047013, iteration: 77809
loss: 0.9724034667015076,grad_norm: 0.9999991637820286, iteration: 77810
loss: 0.9769771695137024,grad_norm: 0.9999991859730721, iteration: 77811
loss: 0.9850035905838013,grad_norm: 0.99999921190979, iteration: 77812
loss: 0.9965463280677795,grad_norm: 0.9999992959758459, iteration: 77813
loss: 0.9663296341896057,grad_norm: 0.9999991110438748, iteration: 77814
loss: 1.001197338104248,grad_norm: 0.9999991562774874, iteration: 77815
loss: 1.0290080308914185,grad_norm: 0.9999990401067821, iteration: 77816
loss: 1.0158759355545044,grad_norm: 0.9999990793014544, iteration: 77817
loss: 0.9941855072975159,grad_norm: 0.9999989720873849, iteration: 77818
loss: 1.0316882133483887,grad_norm: 0.9039069396649124, iteration: 77819
loss: 0.9913805723190308,grad_norm: 0.99999926002501, iteration: 77820
loss: 0.9725462198257446,grad_norm: 0.8822097912049102, iteration: 77821
loss: 1.066526174545288,grad_norm: 0.9999997831784377, iteration: 77822
loss: 1.0229122638702393,grad_norm: 0.9999995811648789, iteration: 77823
loss: 1.0187491178512573,grad_norm: 0.9464766939730311, iteration: 77824
loss: 1.0130366086959839,grad_norm: 0.9999991128011966, iteration: 77825
loss: 1.0166079998016357,grad_norm: 0.9598515333626029, iteration: 77826
loss: 0.9965518116950989,grad_norm: 0.9999991510261339, iteration: 77827
loss: 1.001405954360962,grad_norm: 0.9255418626669474, iteration: 77828
loss: 0.9568024277687073,grad_norm: 0.8950422106427849, iteration: 77829
loss: 1.0144901275634766,grad_norm: 0.9902762288887299, iteration: 77830
loss: 1.0120834112167358,grad_norm: 0.9999990896171789, iteration: 77831
loss: 1.0111202001571655,grad_norm: 0.9999990035837678, iteration: 77832
loss: 0.9844696521759033,grad_norm: 0.9999991885769902, iteration: 77833
loss: 1.007909893989563,grad_norm: 0.9999992030092955, iteration: 77834
loss: 0.95451420545578,grad_norm: 0.9961396096435463, iteration: 77835
loss: 1.0441087484359741,grad_norm: 0.9999993353366641, iteration: 77836
loss: 0.9686999320983887,grad_norm: 0.9999990270448909, iteration: 77837
loss: 1.039574146270752,grad_norm: 0.9999991900595755, iteration: 77838
loss: 0.9867424368858337,grad_norm: 0.9999991040616092, iteration: 77839
loss: 0.9951488375663757,grad_norm: 0.9999992444459712, iteration: 77840
loss: 0.9771114587783813,grad_norm: 0.9999990083393927, iteration: 77841
loss: 1.0687910318374634,grad_norm: 0.999999450651859, iteration: 77842
loss: 1.0110528469085693,grad_norm: 0.8411430179498061, iteration: 77843
loss: 1.0262658596038818,grad_norm: 0.9999991527275356, iteration: 77844
loss: 1.009143352508545,grad_norm: 0.9999991960878051, iteration: 77845
loss: 1.055802345275879,grad_norm: 0.9920491916993771, iteration: 77846
loss: 1.0320097208023071,grad_norm: 0.9999992302049818, iteration: 77847
loss: 0.9994046092033386,grad_norm: 0.949100961121046, iteration: 77848
loss: 0.999754011631012,grad_norm: 0.9999991122461849, iteration: 77849
loss: 0.9781871438026428,grad_norm: 0.9407843558711696, iteration: 77850
loss: 0.9819570183753967,grad_norm: 0.9687364691292891, iteration: 77851
loss: 1.0058823823928833,grad_norm: 0.9999992265650971, iteration: 77852
loss: 0.9565874338150024,grad_norm: 0.9999991038779493, iteration: 77853
loss: 0.9891453385353088,grad_norm: 0.8275888702041525, iteration: 77854
loss: 0.9748772382736206,grad_norm: 0.8074100535252868, iteration: 77855
loss: 1.023207187652588,grad_norm: 0.9999992518202776, iteration: 77856
loss: 1.0260881185531616,grad_norm: 0.9999995726330634, iteration: 77857
loss: 1.0166027545928955,grad_norm: 0.999999194506269, iteration: 77858
loss: 1.0165715217590332,grad_norm: 0.9466337985826075, iteration: 77859
loss: 0.975092887878418,grad_norm: 0.9812397727321112, iteration: 77860
loss: 1.0242360830307007,grad_norm: 1.0000000444654669, iteration: 77861
loss: 0.9755920767784119,grad_norm: 0.9739726662955961, iteration: 77862
loss: 0.9934775233268738,grad_norm: 0.9999992577187986, iteration: 77863
loss: 0.9978127479553223,grad_norm: 0.9999990732692365, iteration: 77864
loss: 0.9948685765266418,grad_norm: 0.926675715706536, iteration: 77865
loss: 0.9888802766799927,grad_norm: 0.8901283295816855, iteration: 77866
loss: 1.0334144830703735,grad_norm: 0.9656076788322318, iteration: 77867
loss: 0.9877632260322571,grad_norm: 0.9999992075705194, iteration: 77868
loss: 0.9578618407249451,grad_norm: 0.9184839084123451, iteration: 77869
loss: 0.9961897134780884,grad_norm: 0.9245623415934825, iteration: 77870
loss: 0.9930543303489685,grad_norm: 0.9999990697946055, iteration: 77871
loss: 1.0105600357055664,grad_norm: 0.9999993397887331, iteration: 77872
loss: 1.0078145265579224,grad_norm: 0.9999989477160829, iteration: 77873
loss: 1.00164794921875,grad_norm: 0.9999991702983646, iteration: 77874
loss: 0.9923765659332275,grad_norm: 0.9999991926199696, iteration: 77875
loss: 0.9624605178833008,grad_norm: 0.9263973850485345, iteration: 77876
loss: 1.0618743896484375,grad_norm: 0.9999993546240542, iteration: 77877
loss: 0.9902072548866272,grad_norm: 0.9999990576189216, iteration: 77878
loss: 1.0071173906326294,grad_norm: 0.9999991566387358, iteration: 77879
loss: 1.0158367156982422,grad_norm: 0.9999990817380608, iteration: 77880
loss: 1.0320014953613281,grad_norm: 0.9999990242608182, iteration: 77881
loss: 1.0043786764144897,grad_norm: 0.9796043238274604, iteration: 77882
loss: 1.0297064781188965,grad_norm: 0.9999991953055557, iteration: 77883
loss: 1.000126838684082,grad_norm: 0.971665659581415, iteration: 77884
loss: 0.9779505133628845,grad_norm: 0.9999991978266799, iteration: 77885
loss: 1.0374919176101685,grad_norm: 0.9937968328165322, iteration: 77886
loss: 0.9899061322212219,grad_norm: 0.9999990925142119, iteration: 77887
loss: 1.0173864364624023,grad_norm: 0.999999171638294, iteration: 77888
loss: 0.9988047480583191,grad_norm: 0.9999989823616015, iteration: 77889
loss: 1.0098907947540283,grad_norm: 0.9999992278674825, iteration: 77890
loss: 0.9953566193580627,grad_norm: 0.9999992698574846, iteration: 77891
loss: 1.0562283992767334,grad_norm: 0.9999998235447736, iteration: 77892
loss: 1.0247925519943237,grad_norm: 0.9999991576751405, iteration: 77893
loss: 1.0326908826828003,grad_norm: 0.9999997460820714, iteration: 77894
loss: 0.9958237409591675,grad_norm: 0.9999993740582331, iteration: 77895
loss: 1.0120880603790283,grad_norm: 0.9999993511132728, iteration: 77896
loss: 0.9988602995872498,grad_norm: 0.9999991537856456, iteration: 77897
loss: 1.0354074239730835,grad_norm: 0.9999992189048185, iteration: 77898
loss: 0.9860780835151672,grad_norm: 0.9999991750385053, iteration: 77899
loss: 0.9963800311088562,grad_norm: 0.9561804356118746, iteration: 77900
loss: 1.0114344358444214,grad_norm: 0.9999992229018873, iteration: 77901
loss: 0.9828213453292847,grad_norm: 0.9999995709353103, iteration: 77902
loss: 1.0934221744537354,grad_norm: 0.9999998748308186, iteration: 77903
loss: 0.997115433216095,grad_norm: 0.9748256606795445, iteration: 77904
loss: 1.0182075500488281,grad_norm: 0.999999022803071, iteration: 77905
loss: 1.0024824142456055,grad_norm: 0.9533867507845856, iteration: 77906
loss: 0.9836488962173462,grad_norm: 0.9999991445221886, iteration: 77907
loss: 1.0093729496002197,grad_norm: 0.9999990126220756, iteration: 77908
loss: 1.026498556137085,grad_norm: 0.8936661658400221, iteration: 77909
loss: 1.0319398641586304,grad_norm: 0.9999993072049956, iteration: 77910
loss: 1.0319581031799316,grad_norm: 0.9184996021990885, iteration: 77911
loss: 0.9937320351600647,grad_norm: 0.9999992355231716, iteration: 77912
loss: 1.038419485092163,grad_norm: 0.990127765401406, iteration: 77913
loss: 1.0041851997375488,grad_norm: 0.9999990999461003, iteration: 77914
loss: 0.9800835251808167,grad_norm: 0.9999991810309554, iteration: 77915
loss: 1.0255051851272583,grad_norm: 0.9999989919493636, iteration: 77916
loss: 1.0062592029571533,grad_norm: 0.999999605837295, iteration: 77917
loss: 0.9883977770805359,grad_norm: 0.9999990225940849, iteration: 77918
loss: 1.0168616771697998,grad_norm: 0.9999991063036396, iteration: 77919
loss: 0.9882317185401917,grad_norm: 0.9718584700866889, iteration: 77920
loss: 1.0039743185043335,grad_norm: 0.9999991333335826, iteration: 77921
loss: 1.079190969467163,grad_norm: 0.9788712636476086, iteration: 77922
loss: 1.0275882482528687,grad_norm: 0.9945142123268842, iteration: 77923
loss: 0.9577615857124329,grad_norm: 0.9999991427238065, iteration: 77924
loss: 1.0019692182540894,grad_norm: 0.943284068795361, iteration: 77925
loss: 1.0225780010223389,grad_norm: 0.9999993603786617, iteration: 77926
loss: 1.0203505754470825,grad_norm: 0.9999991402396965, iteration: 77927
loss: 0.9631388187408447,grad_norm: 0.9748064583391644, iteration: 77928
loss: 1.0007933378219604,grad_norm: 0.978806354025174, iteration: 77929
loss: 0.9991012811660767,grad_norm: 0.9999990872925147, iteration: 77930
loss: 1.0100010633468628,grad_norm: 0.9999989836856151, iteration: 77931
loss: 0.9886185526847839,grad_norm: 0.999999030654183, iteration: 77932
loss: 1.028372883796692,grad_norm: 0.9987031720226642, iteration: 77933
loss: 0.9738255739212036,grad_norm: 0.893116111738136, iteration: 77934
loss: 0.996421217918396,grad_norm: 0.9633322010203501, iteration: 77935
loss: 0.9980894923210144,grad_norm: 0.9999992152495483, iteration: 77936
loss: 0.973997175693512,grad_norm: 0.9999991026021764, iteration: 77937
loss: 0.9824924468994141,grad_norm: 0.9702326671117358, iteration: 77938
loss: 1.0149028301239014,grad_norm: 0.9999991309026004, iteration: 77939
loss: 1.0353527069091797,grad_norm: 0.9994572787526927, iteration: 77940
loss: 1.0203156471252441,grad_norm: 0.9999990932361095, iteration: 77941
loss: 0.9784458875656128,grad_norm: 0.9999990821089033, iteration: 77942
loss: 1.021947979927063,grad_norm: 0.9999993606816807, iteration: 77943
loss: 0.9663512706756592,grad_norm: 0.9104153523424916, iteration: 77944
loss: 0.9794769883155823,grad_norm: 0.9017929183800156, iteration: 77945
loss: 1.0462417602539062,grad_norm: 0.9999993492599838, iteration: 77946
loss: 1.001479983329773,grad_norm: 0.9999990549565201, iteration: 77947
loss: 0.9710004925727844,grad_norm: 0.9657160550100135, iteration: 77948
loss: 0.9995755553245544,grad_norm: 0.9999990786778177, iteration: 77949
loss: 0.9917254447937012,grad_norm: 0.9999992734651405, iteration: 77950
loss: 0.9936972260475159,grad_norm: 0.9999992395353277, iteration: 77951
loss: 1.0373849868774414,grad_norm: 0.9999992195647867, iteration: 77952
loss: 1.0329416990280151,grad_norm: 0.9999991434774745, iteration: 77953
loss: 0.9926943778991699,grad_norm: 0.9999990506898978, iteration: 77954
loss: 1.0240857601165771,grad_norm: 0.9999994786518673, iteration: 77955
loss: 1.016373634338379,grad_norm: 0.9440858804518623, iteration: 77956
loss: 0.9840230345726013,grad_norm: 0.9999993129916667, iteration: 77957
loss: 1.0029964447021484,grad_norm: 0.8379323862856513, iteration: 77958
loss: 0.9947515726089478,grad_norm: 0.974820090110205, iteration: 77959
loss: 0.9979820847511292,grad_norm: 0.9306513110210947, iteration: 77960
loss: 0.975368857383728,grad_norm: 0.9999990879631325, iteration: 77961
loss: 1.0274815559387207,grad_norm: 0.9999990007548389, iteration: 77962
loss: 1.0514084100723267,grad_norm: 0.9999998445426509, iteration: 77963
loss: 1.0078860521316528,grad_norm: 0.8308392481872398, iteration: 77964
loss: 0.9702227711677551,grad_norm: 0.9999991254239647, iteration: 77965
loss: 1.0396541357040405,grad_norm: 0.9999989491898285, iteration: 77966
loss: 0.9888025522232056,grad_norm: 0.9689409850135716, iteration: 77967
loss: 1.0591301918029785,grad_norm: 0.9999996824550057, iteration: 77968
loss: 0.9589188694953918,grad_norm: 0.9999990512672114, iteration: 77969
loss: 0.9921370148658752,grad_norm: 0.9269564883570351, iteration: 77970
loss: 1.007745385169983,grad_norm: 0.9999989931207772, iteration: 77971
loss: 1.0013362169265747,grad_norm: 0.9938728663141889, iteration: 77972
loss: 0.9866199493408203,grad_norm: 0.9999992248870696, iteration: 77973
loss: 0.9982762932777405,grad_norm: 0.9999991376589299, iteration: 77974
loss: 0.9905580878257751,grad_norm: 0.7894510162416865, iteration: 77975
loss: 0.9785423278808594,grad_norm: 0.9999992687181146, iteration: 77976
loss: 0.977157711982727,grad_norm: 0.9999992813060498, iteration: 77977
loss: 0.9703235626220703,grad_norm: 0.9999990557561106, iteration: 77978
loss: 0.9944328665733337,grad_norm: 0.8973949624669981, iteration: 77979
loss: 0.9803139567375183,grad_norm: 0.9999991425802753, iteration: 77980
loss: 0.9823477268218994,grad_norm: 0.9999991826421639, iteration: 77981
loss: 1.0188974142074585,grad_norm: 0.999999109454067, iteration: 77982
loss: 0.999445915222168,grad_norm: 0.9996058183576475, iteration: 77983
loss: 0.985091507434845,grad_norm: 0.9999991625154098, iteration: 77984
loss: 1.0197277069091797,grad_norm: 0.8937989971396547, iteration: 77985
loss: 0.9849858283996582,grad_norm: 0.9295094919536374, iteration: 77986
loss: 1.0172076225280762,grad_norm: 0.9748708503152058, iteration: 77987
loss: 1.0035865306854248,grad_norm: 0.9723212622178274, iteration: 77988
loss: 1.0019217729568481,grad_norm: 0.999999223732027, iteration: 77989
loss: 0.9688904881477356,grad_norm: 0.9999991464555846, iteration: 77990
loss: 0.9861049652099609,grad_norm: 0.9999990009863077, iteration: 77991
loss: 1.0036695003509521,grad_norm: 0.9999990190051792, iteration: 77992
loss: 1.0205127000808716,grad_norm: 0.9999990465645022, iteration: 77993
loss: 0.9884300231933594,grad_norm: 0.9999990128084244, iteration: 77994
loss: 1.0049293041229248,grad_norm: 0.9311679209643974, iteration: 77995
loss: 1.0197924375534058,grad_norm: 0.9999992738560798, iteration: 77996
loss: 1.0211021900177002,grad_norm: 0.9688223524516456, iteration: 77997
loss: 1.0115230083465576,grad_norm: 0.9999990566139373, iteration: 77998
loss: 1.0246471166610718,grad_norm: 0.9774579556339488, iteration: 77999
loss: 0.975595235824585,grad_norm: 0.9062703189228754, iteration: 78000
loss: 0.989574134349823,grad_norm: 0.887617411713783, iteration: 78001
loss: 0.9833364486694336,grad_norm: 0.9999991922759166, iteration: 78002
loss: 1.0079741477966309,grad_norm: 0.9248749353049419, iteration: 78003
loss: 1.0238301753997803,grad_norm: 0.9946348855528425, iteration: 78004
loss: 1.0046906471252441,grad_norm: 0.9999989996106039, iteration: 78005
loss: 1.012549877166748,grad_norm: 0.9999990996583862, iteration: 78006
loss: 0.9726839661598206,grad_norm: 0.999999188949432, iteration: 78007
loss: 0.987044632434845,grad_norm: 0.9999991887414479, iteration: 78008
loss: 1.0059829950332642,grad_norm: 0.9999991303375412, iteration: 78009
loss: 0.9956320524215698,grad_norm: 0.9999991050983102, iteration: 78010
loss: 1.0332990884780884,grad_norm: 0.943615482760544, iteration: 78011
loss: 1.0130741596221924,grad_norm: 0.9836237498746272, iteration: 78012
loss: 0.997316837310791,grad_norm: 0.999999176213086, iteration: 78013
loss: 0.9932481646537781,grad_norm: 0.9999991121108334, iteration: 78014
loss: 1.0043907165527344,grad_norm: 0.9800864686843296, iteration: 78015
loss: 1.0245569944381714,grad_norm: 0.9999990881355045, iteration: 78016
loss: 0.996130108833313,grad_norm: 0.9734430411636024, iteration: 78017
loss: 1.013649821281433,grad_norm: 0.99999907220913, iteration: 78018
loss: 1.0242161750793457,grad_norm: 0.999999224110369, iteration: 78019
loss: 1.0106559991836548,grad_norm: 0.9999992232558116, iteration: 78020
loss: 1.000705361366272,grad_norm: 0.9999991998117568, iteration: 78021
loss: 0.9886071681976318,grad_norm: 0.8493127156981982, iteration: 78022
loss: 1.0144649744033813,grad_norm: 0.905569637591743, iteration: 78023
loss: 0.9896730780601501,grad_norm: 0.9797931993234614, iteration: 78024
loss: 0.9736183881759644,grad_norm: 0.9859331093762661, iteration: 78025
loss: 0.9941222071647644,grad_norm: 0.9362835257946045, iteration: 78026
loss: 0.9823259115219116,grad_norm: 0.999999125305946, iteration: 78027
loss: 1.0171488523483276,grad_norm: 0.9999989644309802, iteration: 78028
loss: 1.0005995035171509,grad_norm: 0.9905164071371415, iteration: 78029
loss: 0.977287769317627,grad_norm: 0.9999991360864909, iteration: 78030
loss: 0.9761645197868347,grad_norm: 0.952266776368519, iteration: 78031
loss: 1.0529448986053467,grad_norm: 0.9999990498335949, iteration: 78032
loss: 0.9935089349746704,grad_norm: 0.9999990954485526, iteration: 78033
loss: 1.0061126947402954,grad_norm: 0.9999991669353492, iteration: 78034
loss: 0.9651505947113037,grad_norm: 0.9999994651804254, iteration: 78035
loss: 1.0164306163787842,grad_norm: 0.9999990993778333, iteration: 78036
loss: 1.025007724761963,grad_norm: 0.9999990542617492, iteration: 78037
loss: 0.9959719181060791,grad_norm: 0.8871716415317807, iteration: 78038
loss: 0.9833325147628784,grad_norm: 0.9999990634054946, iteration: 78039
loss: 1.034686803817749,grad_norm: 0.9895085229095768, iteration: 78040
loss: 1.0129063129425049,grad_norm: 0.9999993010773587, iteration: 78041
loss: 1.038552165031433,grad_norm: 0.9999990979005634, iteration: 78042
loss: 0.9670383930206299,grad_norm: 0.9999989411664689, iteration: 78043
loss: 0.9999510645866394,grad_norm: 0.9732259686576459, iteration: 78044
loss: 0.991502046585083,grad_norm: 0.8781768278040704, iteration: 78045
loss: 1.0061073303222656,grad_norm: 0.7603555362339912, iteration: 78046
loss: 1.0050756931304932,grad_norm: 0.943830244409619, iteration: 78047
loss: 0.996703565120697,grad_norm: 0.9999991256674498, iteration: 78048
loss: 1.0007541179656982,grad_norm: 0.9443332326393667, iteration: 78049
loss: 1.0278759002685547,grad_norm: 0.9999997642058673, iteration: 78050
loss: 1.0069599151611328,grad_norm: 0.9999989784849974, iteration: 78051
loss: 1.0140671730041504,grad_norm: 0.999999154765113, iteration: 78052
loss: 1.013318657875061,grad_norm: 0.9003106180603254, iteration: 78053
loss: 1.0354804992675781,grad_norm: 0.9999991079961029, iteration: 78054
loss: 1.047856330871582,grad_norm: 0.9999992067116366, iteration: 78055
loss: 0.990685760974884,grad_norm: 0.9999992007907783, iteration: 78056
loss: 0.9845317602157593,grad_norm: 0.9026525469549612, iteration: 78057
loss: 0.9710118174552917,grad_norm: 0.9500680803433912, iteration: 78058
loss: 0.9955726861953735,grad_norm: 0.9999992930098641, iteration: 78059
loss: 0.9663463234901428,grad_norm: 0.999999064747785, iteration: 78060
loss: 1.0842883586883545,grad_norm: 0.9999995876748046, iteration: 78061
loss: 0.9964022636413574,grad_norm: 0.999999182988318, iteration: 78062
loss: 0.9637143611907959,grad_norm: 0.9999990997625274, iteration: 78063
loss: 0.9838614463806152,grad_norm: 0.9999991988838605, iteration: 78064
loss: 0.9880545139312744,grad_norm: 0.8820182057616767, iteration: 78065
loss: 1.0256155729293823,grad_norm: 0.9966438028317164, iteration: 78066
loss: 0.9905492067337036,grad_norm: 0.9707769024202211, iteration: 78067
loss: 0.9763609170913696,grad_norm: 0.9999992390678628, iteration: 78068
loss: 0.9574148654937744,grad_norm: 0.9999990646729257, iteration: 78069
loss: 1.034567952156067,grad_norm: 0.9999990241673907, iteration: 78070
loss: 0.9870356321334839,grad_norm: 0.9999991524556181, iteration: 78071
loss: 1.0171810388565063,grad_norm: 0.903698889019414, iteration: 78072
loss: 1.0133824348449707,grad_norm: 0.9506122675781408, iteration: 78073
loss: 0.9861445426940918,grad_norm: 0.9999990604977586, iteration: 78074
loss: 1.0083122253417969,grad_norm: 0.9999991099298982, iteration: 78075
loss: 1.003292202949524,grad_norm: 0.9536994135225695, iteration: 78076
loss: 0.9506514668464661,grad_norm: 0.9999991086025076, iteration: 78077
loss: 0.9864413142204285,grad_norm: 0.9292178433500199, iteration: 78078
loss: 0.9791770577430725,grad_norm: 0.9999992377856673, iteration: 78079
loss: 0.9975691437721252,grad_norm: 0.9105279238254701, iteration: 78080
loss: 1.014257788658142,grad_norm: 0.9900610201384503, iteration: 78081
loss: 0.9979907870292664,grad_norm: 0.9999992296165202, iteration: 78082
loss: 1.000048041343689,grad_norm: 0.9999991861187751, iteration: 78083
loss: 1.0026402473449707,grad_norm: 0.9999991170232301, iteration: 78084
loss: 0.9987074136734009,grad_norm: 0.99999923874033, iteration: 78085
loss: 1.0457935333251953,grad_norm: 0.999999278887355, iteration: 78086
loss: 1.0002045631408691,grad_norm: 0.8974687232422481, iteration: 78087
loss: 0.9885169863700867,grad_norm: 0.984334800777386, iteration: 78088
loss: 1.0217362642288208,grad_norm: 0.9999992591738598, iteration: 78089
loss: 0.9983603358268738,grad_norm: 0.8543880080591847, iteration: 78090
loss: 1.0182487964630127,grad_norm: 0.999999306611409, iteration: 78091
loss: 0.9685627818107605,grad_norm: 0.9715238126772974, iteration: 78092
loss: 1.0004801750183105,grad_norm: 0.9999988926896881, iteration: 78093
loss: 1.0013364553451538,grad_norm: 0.9999991242383596, iteration: 78094
loss: 1.03619384765625,grad_norm: 0.9999993266678356, iteration: 78095
loss: 1.0220695734024048,grad_norm: 0.9999994421821329, iteration: 78096
loss: 0.9961355328559875,grad_norm: 0.9429209503527574, iteration: 78097
loss: 1.020442008972168,grad_norm: 0.9999988985479139, iteration: 78098
loss: 0.9357979893684387,grad_norm: 0.950767806160017, iteration: 78099
loss: 0.9870359301567078,grad_norm: 0.9999993104165809, iteration: 78100
loss: 1.0216022729873657,grad_norm: 0.9999990243014452, iteration: 78101
loss: 1.0378749370574951,grad_norm: 0.9977711759293165, iteration: 78102
loss: 0.995052695274353,grad_norm: 0.907013038764335, iteration: 78103
loss: 0.995960533618927,grad_norm: 0.9999992595160444, iteration: 78104
loss: 0.9835097193717957,grad_norm: 0.9999991852540616, iteration: 78105
loss: 0.9772281050682068,grad_norm: 0.999999109675925, iteration: 78106
loss: 0.9568199515342712,grad_norm: 0.8937681223685847, iteration: 78107
loss: 0.9866295456886292,grad_norm: 0.9999991025142185, iteration: 78108
loss: 1.0082190036773682,grad_norm: 0.9324002565451934, iteration: 78109
loss: 0.9769917130470276,grad_norm: 0.9999989230283498, iteration: 78110
loss: 0.9685327410697937,grad_norm: 0.9999991220947697, iteration: 78111
loss: 0.9472705721855164,grad_norm: 0.999999129969716, iteration: 78112
loss: 1.0062847137451172,grad_norm: 0.9059356221757648, iteration: 78113
loss: 1.033279538154602,grad_norm: 0.9329733885236674, iteration: 78114
loss: 1.0222893953323364,grad_norm: 0.9999992356912984, iteration: 78115
loss: 0.9659791588783264,grad_norm: 0.9999989518653883, iteration: 78116
loss: 0.9604453444480896,grad_norm: 0.9215144548308004, iteration: 78117
loss: 0.9956234693527222,grad_norm: 0.9999988595964947, iteration: 78118
loss: 0.9718828797340393,grad_norm: 0.9999997663273331, iteration: 78119
loss: 0.9943413734436035,grad_norm: 0.9999991254088421, iteration: 78120
loss: 1.0223208665847778,grad_norm: 0.9999989347490154, iteration: 78121
loss: 1.0173583030700684,grad_norm: 0.9999992192513437, iteration: 78122
loss: 1.0372958183288574,grad_norm: 0.9494909130522461, iteration: 78123
loss: 1.0212844610214233,grad_norm: 0.9999998987017307, iteration: 78124
loss: 0.9822989106178284,grad_norm: 0.9999990728423566, iteration: 78125
loss: 1.0095070600509644,grad_norm: 0.9999989972783299, iteration: 78126
loss: 0.9988333582878113,grad_norm: 0.9999998380928432, iteration: 78127
loss: 0.9752189517021179,grad_norm: 0.9999993115948281, iteration: 78128
loss: 1.0351488590240479,grad_norm: 0.9999992752724361, iteration: 78129
loss: 1.0166242122650146,grad_norm: 0.951643028590891, iteration: 78130
loss: 0.9544121026992798,grad_norm: 0.9999992161868912, iteration: 78131
loss: 1.0100795030593872,grad_norm: 0.8847239598187443, iteration: 78132
loss: 1.0021697282791138,grad_norm: 0.9999991837922352, iteration: 78133
loss: 1.0024104118347168,grad_norm: 0.9999991006049954, iteration: 78134
loss: 1.0271258354187012,grad_norm: 0.999999154557962, iteration: 78135
loss: 1.0050981044769287,grad_norm: 0.9999992682627664, iteration: 78136
loss: 0.9875073432922363,grad_norm: 0.9095098546247949, iteration: 78137
loss: 0.9899329543113708,grad_norm: 0.9999991944216693, iteration: 78138
loss: 1.0245059728622437,grad_norm: 0.9999992041668103, iteration: 78139
loss: 1.0256901979446411,grad_norm: 0.9999990872564559, iteration: 78140
loss: 1.0143887996673584,grad_norm: 0.9999992174235575, iteration: 78141
loss: 0.996964693069458,grad_norm: 0.9999990612813194, iteration: 78142
loss: 0.9931620955467224,grad_norm: 0.9999991689844115, iteration: 78143
loss: 0.968899130821228,grad_norm: 0.9781188111494779, iteration: 78144
loss: 0.9668186902999878,grad_norm: 0.9999992099432322, iteration: 78145
loss: 0.9682658910751343,grad_norm: 0.9999991082497223, iteration: 78146
loss: 1.0579850673675537,grad_norm: 0.9999993126158689, iteration: 78147
loss: 1.0064390897750854,grad_norm: 0.999999033180135, iteration: 78148
loss: 0.9827864766120911,grad_norm: 0.9060831881801931, iteration: 78149
loss: 0.9946657419204712,grad_norm: 0.9640748688860451, iteration: 78150
loss: 0.9670039415359497,grad_norm: 0.9930124051672249, iteration: 78151
loss: 0.9861997365951538,grad_norm: 0.9999991212067831, iteration: 78152
loss: 1.0170997381210327,grad_norm: 0.9999990558103851, iteration: 78153
loss: 1.0358340740203857,grad_norm: 0.9999991532695559, iteration: 78154
loss: 0.9709667563438416,grad_norm: 0.9883454162437383, iteration: 78155
loss: 0.9868298172950745,grad_norm: 0.9503302614543514, iteration: 78156
loss: 1.0195426940917969,grad_norm: 0.9999992344726625, iteration: 78157
loss: 1.0290627479553223,grad_norm: 0.9079468197194192, iteration: 78158
loss: 1.0111926794052124,grad_norm: 0.9999991205709283, iteration: 78159
loss: 1.0067206621170044,grad_norm: 0.9999991077211796, iteration: 78160
loss: 1.0413720607757568,grad_norm: 0.9612588773682846, iteration: 78161
loss: 1.0030282735824585,grad_norm: 0.9999992925699602, iteration: 78162
loss: 0.9905967116355896,grad_norm: 0.9999992077679333, iteration: 78163
loss: 0.9873062968254089,grad_norm: 0.9999990922908572, iteration: 78164
loss: 1.0040130615234375,grad_norm: 0.9812514673116436, iteration: 78165
loss: 1.025506615638733,grad_norm: 0.9999990695264137, iteration: 78166
loss: 1.0001498460769653,grad_norm: 0.9999990496793063, iteration: 78167
loss: 0.99464350938797,grad_norm: 0.9999991224372048, iteration: 78168
loss: 0.9864892363548279,grad_norm: 0.9999990166026862, iteration: 78169
loss: 1.1774139404296875,grad_norm: 0.9999997440932309, iteration: 78170
loss: 1.016991376876831,grad_norm: 0.926077497148241, iteration: 78171
loss: 0.9863668084144592,grad_norm: 0.999999265049298, iteration: 78172
loss: 0.9913596510887146,grad_norm: 0.9188912699490392, iteration: 78173
loss: 1.0103315114974976,grad_norm: 0.98224159068222, iteration: 78174
loss: 1.0059443712234497,grad_norm: 0.9999990007578605, iteration: 78175
loss: 0.9610807299613953,grad_norm: 0.999999088170336, iteration: 78176
loss: 1.0181550979614258,grad_norm: 0.9999991755676834, iteration: 78177
loss: 1.0216869115829468,grad_norm: 0.8190132369057105, iteration: 78178
loss: 0.9977983236312866,grad_norm: 0.9653359160764016, iteration: 78179
loss: 1.014432430267334,grad_norm: 0.9999990106906076, iteration: 78180
loss: 0.998704731464386,grad_norm: 0.9999990365219402, iteration: 78181
loss: 0.9716732501983643,grad_norm: 0.999998983752704, iteration: 78182
loss: 1.0062319040298462,grad_norm: 0.9498675007620734, iteration: 78183
loss: 0.9838988780975342,grad_norm: 0.9999991281354236, iteration: 78184
loss: 1.01072096824646,grad_norm: 0.9999998869662794, iteration: 78185
loss: 1.0280545949935913,grad_norm: 0.9999989109508688, iteration: 78186
loss: 1.0019447803497314,grad_norm: 0.9999991754730247, iteration: 78187
loss: 0.9737598299980164,grad_norm: 0.8887149844816657, iteration: 78188
loss: 0.9915907979011536,grad_norm: 0.9999990724985497, iteration: 78189
loss: 1.0249409675598145,grad_norm: 0.9999991218317366, iteration: 78190
loss: 1.0155409574508667,grad_norm: 0.9260000200542522, iteration: 78191
loss: 0.9349344372749329,grad_norm: 0.9574875552868795, iteration: 78192
loss: 0.993893563747406,grad_norm: 0.9300433313802082, iteration: 78193
loss: 1.0364863872528076,grad_norm: 0.9999991534891408, iteration: 78194
loss: 1.0102081298828125,grad_norm: 0.9999991194987171, iteration: 78195
loss: 0.961215078830719,grad_norm: 0.9983588673428356, iteration: 78196
loss: 1.0116194486618042,grad_norm: 0.9804430144010915, iteration: 78197
loss: 1.0064051151275635,grad_norm: 0.9230469964360213, iteration: 78198
loss: 1.0023216009140015,grad_norm: 0.9331704780055923, iteration: 78199
loss: 0.9956752061843872,grad_norm: 0.9428824695934375, iteration: 78200
loss: 0.986110508441925,grad_norm: 0.8604596967710744, iteration: 78201
loss: 0.9635155200958252,grad_norm: 0.9999990292867591, iteration: 78202
loss: 1.0209696292877197,grad_norm: 0.8815816123613597, iteration: 78203
loss: 0.9906992316246033,grad_norm: 0.9999992502795182, iteration: 78204
loss: 1.056654930114746,grad_norm: 0.9999999214561032, iteration: 78205
loss: 0.9929046630859375,grad_norm: 0.9999992087557946, iteration: 78206
loss: 1.0015504360198975,grad_norm: 0.9999991200037359, iteration: 78207
loss: 0.9765403866767883,grad_norm: 0.9229423195545693, iteration: 78208
loss: 0.999708354473114,grad_norm: 0.9999990650701147, iteration: 78209
loss: 0.9528048634529114,grad_norm: 0.9751550092193748, iteration: 78210
loss: 1.0111756324768066,grad_norm: 0.9253842235666807, iteration: 78211
loss: 0.9548885226249695,grad_norm: 0.999999102344499, iteration: 78212
loss: 0.9651845097541809,grad_norm: 0.8919075693939206, iteration: 78213
loss: 1.0007531642913818,grad_norm: 0.9999992091718551, iteration: 78214
loss: 1.0262593030929565,grad_norm: 0.9999990658781116, iteration: 78215
loss: 1.0750607252120972,grad_norm: 0.9999994657055707, iteration: 78216
loss: 0.9654341340065002,grad_norm: 0.9999992539724372, iteration: 78217
loss: 1.0080980062484741,grad_norm: 0.954040711480855, iteration: 78218
loss: 0.9856245517730713,grad_norm: 0.9999990565937869, iteration: 78219
loss: 1.012445092201233,grad_norm: 0.9999991132332591, iteration: 78220
loss: 0.9728798270225525,grad_norm: 0.9260824725287593, iteration: 78221
loss: 1.0064324140548706,grad_norm: 0.999999100587419, iteration: 78222
loss: 1.034595251083374,grad_norm: 0.9999990749154732, iteration: 78223
loss: 0.9806161522865295,grad_norm: 0.9941068771616308, iteration: 78224
loss: 1.004309058189392,grad_norm: 0.9999990217295612, iteration: 78225
loss: 1.0033800601959229,grad_norm: 0.9999989759934458, iteration: 78226
loss: 1.0246460437774658,grad_norm: 0.8972782286307766, iteration: 78227
loss: 0.9755904078483582,grad_norm: 0.9865296961451867, iteration: 78228
loss: 1.0327707529067993,grad_norm: 0.9999996474408369, iteration: 78229
loss: 1.0280718803405762,grad_norm: 0.9999992121891612, iteration: 78230
loss: 1.0141637325286865,grad_norm: 0.9999994358361207, iteration: 78231
loss: 0.994401216506958,grad_norm: 0.9999990290796442, iteration: 78232
loss: 1.0191255807876587,grad_norm: 0.97073471864574, iteration: 78233
loss: 1.0099526643753052,grad_norm: 0.9999992519416829, iteration: 78234
loss: 1.0113723278045654,grad_norm: 0.9999995728196422, iteration: 78235
loss: 1.0034557580947876,grad_norm: 0.9999992104342476, iteration: 78236
loss: 1.0325883626937866,grad_norm: 0.9999989869684648, iteration: 78237
loss: 0.996772050857544,grad_norm: 0.9449934467664952, iteration: 78238
loss: 1.0009357929229736,grad_norm: 0.9999990920836159, iteration: 78239
loss: 1.0214619636535645,grad_norm: 0.9999991345435806, iteration: 78240
loss: 0.9364864826202393,grad_norm: 0.9467019388428067, iteration: 78241
loss: 0.9912494421005249,grad_norm: 0.8795678603603665, iteration: 78242
loss: 1.02647066116333,grad_norm: 0.9999990588144946, iteration: 78243
loss: 1.0239068269729614,grad_norm: 0.8392289995550444, iteration: 78244
loss: 1.0400193929672241,grad_norm: 0.8322385769913255, iteration: 78245
loss: 0.9997684955596924,grad_norm: 0.9831499741283013, iteration: 78246
loss: 1.0178357362747192,grad_norm: 0.9999992255749832, iteration: 78247
loss: 0.9945839643478394,grad_norm: 0.9591743442417167, iteration: 78248
loss: 1.0066035985946655,grad_norm: 0.9999991741164409, iteration: 78249
loss: 1.0467740297317505,grad_norm: 0.9999993241476496, iteration: 78250
loss: 1.0126749277114868,grad_norm: 0.9999992371070298, iteration: 78251
loss: 0.993242084980011,grad_norm: 0.941055703344817, iteration: 78252
loss: 1.0214027166366577,grad_norm: 0.9541151834963798, iteration: 78253
loss: 0.9790987968444824,grad_norm: 0.9090119738500981, iteration: 78254
loss: 0.9935114979743958,grad_norm: 0.9999992329128355, iteration: 78255
loss: 1.0295523405075073,grad_norm: 0.9984049156757955, iteration: 78256
loss: 1.0171947479248047,grad_norm: 0.999999107484009, iteration: 78257
loss: 0.9874442219734192,grad_norm: 0.9999991319636955, iteration: 78258
loss: 1.019247055053711,grad_norm: 0.9999991032507738, iteration: 78259
loss: 1.010834813117981,grad_norm: 0.9999992049003193, iteration: 78260
loss: 1.026501178741455,grad_norm: 0.999999296247861, iteration: 78261
loss: 0.9892745018005371,grad_norm: 0.9999992869068551, iteration: 78262
loss: 1.0205650329589844,grad_norm: 0.9999992310479388, iteration: 78263
loss: 0.9851822257041931,grad_norm: 0.9999991370555606, iteration: 78264
loss: 0.9877322316169739,grad_norm: 0.9999992612909268, iteration: 78265
loss: 0.9876428246498108,grad_norm: 0.9999990479602977, iteration: 78266
loss: 1.0019079446792603,grad_norm: 0.999999069439922, iteration: 78267
loss: 0.996274471282959,grad_norm: 0.999999100152297, iteration: 78268
loss: 1.001360535621643,grad_norm: 0.999999171673175, iteration: 78269
loss: 0.999686062335968,grad_norm: 0.9999990680410559, iteration: 78270
loss: 0.9877799153327942,grad_norm: 0.9349823751035825, iteration: 78271
loss: 1.0325038433074951,grad_norm: 0.999998997504649, iteration: 78272
loss: 0.9963933229446411,grad_norm: 0.9999990982182817, iteration: 78273
loss: 1.006658673286438,grad_norm: 0.9999990437472955, iteration: 78274
loss: 0.9837877154350281,grad_norm: 0.8385046383023299, iteration: 78275
loss: 0.9771557450294495,grad_norm: 0.9999991951619166, iteration: 78276
loss: 1.0025153160095215,grad_norm: 0.9999991899280686, iteration: 78277
loss: 0.9751604795455933,grad_norm: 0.9999992521913339, iteration: 78278
loss: 1.0163757801055908,grad_norm: 0.9107625022471793, iteration: 78279
loss: 0.9807520508766174,grad_norm: 0.9999990313159639, iteration: 78280
loss: 1.005911946296692,grad_norm: 0.9522843524311485, iteration: 78281
loss: 0.9683432579040527,grad_norm: 0.9999992637480221, iteration: 78282
loss: 1.0205835103988647,grad_norm: 0.9999991527821848, iteration: 78283
loss: 0.9643567800521851,grad_norm: 0.9999992459670739, iteration: 78284
loss: 0.9982307553291321,grad_norm: 0.978168796233883, iteration: 78285
loss: 1.008655071258545,grad_norm: 0.9999993001698184, iteration: 78286
loss: 1.0073413848876953,grad_norm: 0.9999990338188837, iteration: 78287
loss: 0.9808740615844727,grad_norm: 0.999999305365718, iteration: 78288
loss: 1.0236585140228271,grad_norm: 0.9999992156066715, iteration: 78289
loss: 1.0059716701507568,grad_norm: 0.9999990494812113, iteration: 78290
loss: 1.0181668996810913,grad_norm: 0.9999992678974313, iteration: 78291
loss: 0.9913479089736938,grad_norm: 0.9584225791666916, iteration: 78292
loss: 0.9873600006103516,grad_norm: 0.9999990647520435, iteration: 78293
loss: 0.9776201248168945,grad_norm: 0.8996445002652109, iteration: 78294
loss: 0.9797179698944092,grad_norm: 0.9999990775440389, iteration: 78295
loss: 1.032151222229004,grad_norm: 0.9999996797908746, iteration: 78296
loss: 0.9975191950798035,grad_norm: 0.8367552975474702, iteration: 78297
loss: 0.9795204401016235,grad_norm: 0.9206737655284298, iteration: 78298
loss: 0.9679023027420044,grad_norm: 0.9999991085483713, iteration: 78299
loss: 1.01536226272583,grad_norm: 0.9911938380039019, iteration: 78300
loss: 1.0394669771194458,grad_norm: 0.999999194971979, iteration: 78301
loss: 1.0059202909469604,grad_norm: 0.9999992990835155, iteration: 78302
loss: 1.0098202228546143,grad_norm: 0.9999995581031713, iteration: 78303
loss: 0.9788755774497986,grad_norm: 0.9159815856980672, iteration: 78304
loss: 0.9991952180862427,grad_norm: 0.9999991615620892, iteration: 78305
loss: 1.0300241708755493,grad_norm: 0.999999301899213, iteration: 78306
loss: 0.9779304265975952,grad_norm: 0.999999176787667, iteration: 78307
loss: 0.976050853729248,grad_norm: 0.9885569454383263, iteration: 78308
loss: 0.9727365374565125,grad_norm: 0.8941679572613652, iteration: 78309
loss: 0.945617139339447,grad_norm: 0.9999990421776754, iteration: 78310
loss: 0.9925755262374878,grad_norm: 0.9999991382390501, iteration: 78311
loss: 0.9774808883666992,grad_norm: 0.9999991026888305, iteration: 78312
loss: 0.968069851398468,grad_norm: 0.9999991419130749, iteration: 78313
loss: 0.9889875054359436,grad_norm: 0.9999990894258776, iteration: 78314
loss: 0.9790168404579163,grad_norm: 0.9889951356192118, iteration: 78315
loss: 1.0378391742706299,grad_norm: 0.999999121456886, iteration: 78316
loss: 0.9769876003265381,grad_norm: 0.9999990711596216, iteration: 78317
loss: 0.9874498248100281,grad_norm: 0.9999990842548944, iteration: 78318
loss: 1.0318931341171265,grad_norm: 0.8324123086189403, iteration: 78319
loss: 0.9628128409385681,grad_norm: 0.9999990874088108, iteration: 78320
loss: 0.9733257293701172,grad_norm: 0.9999989657557478, iteration: 78321
loss: 1.010266661643982,grad_norm: 0.985148870453276, iteration: 78322
loss: 1.0029181241989136,grad_norm: 0.9999990502686905, iteration: 78323
loss: 1.0120010375976562,grad_norm: 0.9999991281032439, iteration: 78324
loss: 0.9747210144996643,grad_norm: 0.862258923768006, iteration: 78325
loss: 1.0394264459609985,grad_norm: 0.9999993476293906, iteration: 78326
loss: 1.010998010635376,grad_norm: 0.9999992100703687, iteration: 78327
loss: 1.0382903814315796,grad_norm: 0.9999992821674268, iteration: 78328
loss: 0.9743232131004333,grad_norm: 0.9999990691635039, iteration: 78329
loss: 0.9976846575737,grad_norm: 0.9999991075814751, iteration: 78330
loss: 1.0467638969421387,grad_norm: 0.9999992247486018, iteration: 78331
loss: 1.0142180919647217,grad_norm: 0.9999992379131952, iteration: 78332
loss: 1.048818588256836,grad_norm: 0.9258391841731548, iteration: 78333
loss: 1.0212808847427368,grad_norm: 0.9999991915229093, iteration: 78334
loss: 0.9574733376502991,grad_norm: 0.9587811780916049, iteration: 78335
loss: 0.9955571293830872,grad_norm: 0.8426951203328629, iteration: 78336
loss: 1.0136183500289917,grad_norm: 0.9999990681512626, iteration: 78337
loss: 0.9908080101013184,grad_norm: 0.9999992742829218, iteration: 78338
loss: 0.9804403781890869,grad_norm: 0.9011694670738883, iteration: 78339
loss: 1.015006422996521,grad_norm: 0.9634000399789294, iteration: 78340
loss: 0.9756776690483093,grad_norm: 0.9999997978308727, iteration: 78341
loss: 1.0407758951187134,grad_norm: 0.907366641635966, iteration: 78342
loss: 1.023033857345581,grad_norm: 0.9999989434443215, iteration: 78343
loss: 1.002166509628296,grad_norm: 0.9999990397632524, iteration: 78344
loss: 0.9562278389930725,grad_norm: 0.9999991925111847, iteration: 78345
loss: 1.0164750814437866,grad_norm: 0.9999990742620308, iteration: 78346
loss: 1.0019700527191162,grad_norm: 0.9954988819146301, iteration: 78347
loss: 1.0270289182662964,grad_norm: 0.9999990670734119, iteration: 78348
loss: 0.9940791130065918,grad_norm: 0.999999271081218, iteration: 78349
loss: 1.007995843887329,grad_norm: 0.9999991511926575, iteration: 78350
loss: 1.0182139873504639,grad_norm: 0.9999991064586505, iteration: 78351
loss: 1.057318091392517,grad_norm: 0.9999994276789722, iteration: 78352
loss: 0.9913910627365112,grad_norm: 0.99999916072208, iteration: 78353
loss: 0.9672151803970337,grad_norm: 0.993885326966282, iteration: 78354
loss: 0.985113263130188,grad_norm: 0.9999991717290491, iteration: 78355
loss: 0.9820715188980103,grad_norm: 0.9589253149646709, iteration: 78356
loss: 0.9817458987236023,grad_norm: 0.9112291259181758, iteration: 78357
loss: 1.0321860313415527,grad_norm: 0.9999991677315085, iteration: 78358
loss: 0.9863006472587585,grad_norm: 0.9999990028830428, iteration: 78359
loss: 1.0383212566375732,grad_norm: 0.9999997349081382, iteration: 78360
loss: 1.1302666664123535,grad_norm: 0.9999997207539126, iteration: 78361
loss: 0.984358012676239,grad_norm: 0.999999067368468, iteration: 78362
loss: 1.0130982398986816,grad_norm: 0.9999990306201371, iteration: 78363
loss: 1.010896921157837,grad_norm: 0.9999989931913312, iteration: 78364
loss: 1.0367430448532104,grad_norm: 0.9999993350200556, iteration: 78365
loss: 1.0182822942733765,grad_norm: 0.9999991466095864, iteration: 78366
loss: 1.0272846221923828,grad_norm: 0.9999989214837348, iteration: 78367
loss: 1.003578543663025,grad_norm: 0.9999988943578952, iteration: 78368
loss: 1.016576886177063,grad_norm: 0.9999992425051151, iteration: 78369
loss: 0.9893845915794373,grad_norm: 0.9999995187847035, iteration: 78370
loss: 0.989264190196991,grad_norm: 0.8849782270517406, iteration: 78371
loss: 1.0222257375717163,grad_norm: 0.9999994983765408, iteration: 78372
loss: 1.027402639389038,grad_norm: 0.9999989644213335, iteration: 78373
loss: 0.9600460529327393,grad_norm: 0.9999991084345785, iteration: 78374
loss: 0.9641633629798889,grad_norm: 0.999999099234124, iteration: 78375
loss: 1.0208452939987183,grad_norm: 0.9373773752829705, iteration: 78376
loss: 1.0354657173156738,grad_norm: 0.9999991232200168, iteration: 78377
loss: 1.0161564350128174,grad_norm: 0.9999990914944988, iteration: 78378
loss: 1.0229169130325317,grad_norm: 0.9999990913802526, iteration: 78379
loss: 0.9883153438568115,grad_norm: 0.9999989780917078, iteration: 78380
loss: 1.0094451904296875,grad_norm: 0.9468067396931893, iteration: 78381
loss: 0.9791281223297119,grad_norm: 0.9777245392056341, iteration: 78382
loss: 0.9942480325698853,grad_norm: 0.9999991850538503, iteration: 78383
loss: 0.9880480170249939,grad_norm: 0.9999991284974755, iteration: 78384
loss: 0.9958828687667847,grad_norm: 0.9999991059060878, iteration: 78385
loss: 1.2632689476013184,grad_norm: 0.9999992239494346, iteration: 78386
loss: 0.9496583342552185,grad_norm: 0.99999899475721, iteration: 78387
loss: 1.0482161045074463,grad_norm: 0.999999317678992, iteration: 78388
loss: 1.0153144598007202,grad_norm: 0.9999991963265928, iteration: 78389
loss: 0.9407395720481873,grad_norm: 0.9414704961959158, iteration: 78390
loss: 0.9575148224830627,grad_norm: 0.9080619542976331, iteration: 78391
loss: 0.9957066774368286,grad_norm: 0.9999992375212314, iteration: 78392
loss: 1.0398634672164917,grad_norm: 0.9144063875631929, iteration: 78393
loss: 0.9977960586547852,grad_norm: 0.9999990899429859, iteration: 78394
loss: 0.9890883564949036,grad_norm: 0.9893281685210306, iteration: 78395
loss: 1.0592474937438965,grad_norm: 0.9999994030426315, iteration: 78396
loss: 1.0390121936798096,grad_norm: 0.999999116873586, iteration: 78397
loss: 1.0409750938415527,grad_norm: 0.9999990963370027, iteration: 78398
loss: 1.0083110332489014,grad_norm: 0.8489509572293025, iteration: 78399
loss: 0.9741970300674438,grad_norm: 0.9999992001316096, iteration: 78400
loss: 1.0044454336166382,grad_norm: 0.9822584208615635, iteration: 78401
loss: 1.012532114982605,grad_norm: 0.9999991828169363, iteration: 78402
loss: 1.0077036619186401,grad_norm: 0.8765404906743098, iteration: 78403
loss: 0.9888949990272522,grad_norm: 0.9999990460882076, iteration: 78404
loss: 1.0311145782470703,grad_norm: 0.9999992276042516, iteration: 78405
loss: 1.0479010343551636,grad_norm: 0.9999993700109516, iteration: 78406
loss: 0.9901672005653381,grad_norm: 0.999999017670263, iteration: 78407
loss: 1.0048408508300781,grad_norm: 0.9999991375117029, iteration: 78408
loss: 0.9681803584098816,grad_norm: 0.9918911855171161, iteration: 78409
loss: 1.0160802602767944,grad_norm: 0.9999991809155356, iteration: 78410
loss: 0.997109055519104,grad_norm: 0.99999921128583, iteration: 78411
loss: 0.9786796569824219,grad_norm: 0.999999082438258, iteration: 78412
loss: 0.9961191415786743,grad_norm: 0.9999992080696816, iteration: 78413
loss: 1.0171159505844116,grad_norm: 0.9999991809172804, iteration: 78414
loss: 0.992688000202179,grad_norm: 0.999999171249789, iteration: 78415
loss: 1.0083354711532593,grad_norm: 0.9999991367495502, iteration: 78416
loss: 1.0017986297607422,grad_norm: 0.9999991432582883, iteration: 78417
loss: 1.0247149467468262,grad_norm: 0.999999122428679, iteration: 78418
loss: 1.0139142274856567,grad_norm: 0.9307886255088231, iteration: 78419
loss: 0.999969482421875,grad_norm: 0.9999996509207678, iteration: 78420
loss: 1.0227160453796387,grad_norm: 0.9999989786004846, iteration: 78421
loss: 1.0132211446762085,grad_norm: 0.9999990341431715, iteration: 78422
loss: 0.9867551922798157,grad_norm: 0.9284942381008822, iteration: 78423
loss: 0.9827961921691895,grad_norm: 0.9803061013048882, iteration: 78424
loss: 1.0104643106460571,grad_norm: 0.965195173440714, iteration: 78425
loss: 1.0027203559875488,grad_norm: 0.9999999059842287, iteration: 78426
loss: 1.0105458498001099,grad_norm: 0.9999991287498199, iteration: 78427
loss: 1.0183756351470947,grad_norm: 0.9999990803398308, iteration: 78428
loss: 0.9703975319862366,grad_norm: 0.878645093735704, iteration: 78429
loss: 1.0107269287109375,grad_norm: 0.9999992242947766, iteration: 78430
loss: 0.9743056893348694,grad_norm: 0.9999990150982581, iteration: 78431
loss: 1.0015581846237183,grad_norm: 0.7831011624449083, iteration: 78432
loss: 0.9971622228622437,grad_norm: 0.9999992028476492, iteration: 78433
loss: 0.9869170188903809,grad_norm: 0.9616493748135829, iteration: 78434
loss: 0.9660709500312805,grad_norm: 0.9999991811767771, iteration: 78435
loss: 1.0309767723083496,grad_norm: 0.926193316807122, iteration: 78436
loss: 1.0028071403503418,grad_norm: 0.9999990864386807, iteration: 78437
loss: 0.9994779825210571,grad_norm: 0.9999991925251261, iteration: 78438
loss: 0.988847017288208,grad_norm: 0.9999993453450052, iteration: 78439
loss: 1.0081441402435303,grad_norm: 0.9999992118334027, iteration: 78440
loss: 1.0002232789993286,grad_norm: 0.9999991552427089, iteration: 78441
loss: 1.0110818147659302,grad_norm: 0.9999992076411005, iteration: 78442
loss: 1.0309131145477295,grad_norm: 0.999999118348603, iteration: 78443
loss: 1.0250136852264404,grad_norm: 0.999999115704386, iteration: 78444
loss: 0.9864225387573242,grad_norm: 0.9999991402302167, iteration: 78445
loss: 0.9677254557609558,grad_norm: 0.8914749930882752, iteration: 78446
loss: 0.9809298515319824,grad_norm: 0.93163649809854, iteration: 78447
loss: 0.9980567693710327,grad_norm: 0.9477486768606278, iteration: 78448
loss: 0.9906339645385742,grad_norm: 0.9999991745521635, iteration: 78449
loss: 0.9910442233085632,grad_norm: 0.9347036894385101, iteration: 78450
loss: 1.0014317035675049,grad_norm: 0.9999991889305663, iteration: 78451
loss: 0.9553402662277222,grad_norm: 0.9999992833945087, iteration: 78452
loss: 0.995602011680603,grad_norm: 0.9957237584661955, iteration: 78453
loss: 1.0013436079025269,grad_norm: 0.9999990378556621, iteration: 78454
loss: 1.0102146863937378,grad_norm: 0.9996450708356079, iteration: 78455
loss: 1.0111169815063477,grad_norm: 0.9999990371852711, iteration: 78456
loss: 0.9931816458702087,grad_norm: 0.9999990356602121, iteration: 78457
loss: 1.0264261960983276,grad_norm: 0.9999990407829907, iteration: 78458
loss: 0.9864516854286194,grad_norm: 0.9616371551220269, iteration: 78459
loss: 1.0243793725967407,grad_norm: 0.9999991637099108, iteration: 78460
loss: 1.002503752708435,grad_norm: 0.9999993152898842, iteration: 78461
loss: 1.0018543004989624,grad_norm: 0.9999991140564867, iteration: 78462
loss: 0.9902648329734802,grad_norm: 0.9516303043201881, iteration: 78463
loss: 1.0226168632507324,grad_norm: 0.9045818059557775, iteration: 78464
loss: 0.9659973382949829,grad_norm: 0.9999991512650747, iteration: 78465
loss: 1.032402753829956,grad_norm: 0.9999992793543839, iteration: 78466
loss: 1.0072855949401855,grad_norm: 0.9999991959167112, iteration: 78467
loss: 1.0110564231872559,grad_norm: 0.9999992100011281, iteration: 78468
loss: 0.9896707534790039,grad_norm: 0.9031770549098923, iteration: 78469
loss: 0.9899662137031555,grad_norm: 0.9644679017893589, iteration: 78470
loss: 0.9740389585494995,grad_norm: 0.9999990190175183, iteration: 78471
loss: 0.9889907836914062,grad_norm: 0.9999993154755862, iteration: 78472
loss: 0.9919962286949158,grad_norm: 0.999999028281987, iteration: 78473
loss: 0.9731190800666809,grad_norm: 0.9281025149817336, iteration: 78474
loss: 0.9648087024688721,grad_norm: 0.9366124752358733, iteration: 78475
loss: 1.0180155038833618,grad_norm: 0.9999991004551163, iteration: 78476
loss: 1.046101689338684,grad_norm: 0.9999990317423442, iteration: 78477
loss: 0.9736140370368958,grad_norm: 0.9999991807076468, iteration: 78478
loss: 1.0102907419204712,grad_norm: 0.9999993439108896, iteration: 78479
loss: 1.0292619466781616,grad_norm: 0.999999009457684, iteration: 78480
loss: 1.0115625858306885,grad_norm: 0.9546309822009934, iteration: 78481
loss: 1.0106918811798096,grad_norm: 0.9496514787274367, iteration: 78482
loss: 0.9962726831436157,grad_norm: 0.9415349661033823, iteration: 78483
loss: 1.0169041156768799,grad_norm: 0.999999355260013, iteration: 78484
loss: 1.0398229360580444,grad_norm: 0.9999992409876326, iteration: 78485
loss: 1.0392508506774902,grad_norm: 0.9999994212982481, iteration: 78486
loss: 1.0003838539123535,grad_norm: 0.9999992686114471, iteration: 78487
loss: 1.0946595668792725,grad_norm: 0.9999993744712805, iteration: 78488
loss: 1.0043325424194336,grad_norm: 0.8845092877648039, iteration: 78489
loss: 1.0169692039489746,grad_norm: 0.9999990897295441, iteration: 78490
loss: 0.9748366475105286,grad_norm: 0.9999991218637989, iteration: 78491
loss: 0.9668733477592468,grad_norm: 0.9743589654447347, iteration: 78492
loss: 0.9874154925346375,grad_norm: 0.9490375576975758, iteration: 78493
loss: 0.960123598575592,grad_norm: 0.9406485518338161, iteration: 78494
loss: 1.0034493207931519,grad_norm: 0.9774581424963481, iteration: 78495
loss: 1.0127863883972168,grad_norm: 0.9999992441783389, iteration: 78496
loss: 1.011277675628662,grad_norm: 0.999998964693919, iteration: 78497
loss: 1.0094566345214844,grad_norm: 0.999999091667042, iteration: 78498
loss: 1.0461503267288208,grad_norm: 0.9203355566798416, iteration: 78499
loss: 0.9881425499916077,grad_norm: 0.9999992130572901, iteration: 78500
loss: 1.0250362157821655,grad_norm: 0.9999990992544529, iteration: 78501
loss: 0.9898787140846252,grad_norm: 0.999999145160071, iteration: 78502
loss: 1.0085630416870117,grad_norm: 0.9198780744028504, iteration: 78503
loss: 0.9998093247413635,grad_norm: 0.9798216232014927, iteration: 78504
loss: 0.9983495473861694,grad_norm: 0.9999992947317213, iteration: 78505
loss: 1.0556726455688477,grad_norm: 0.9999997186280072, iteration: 78506
loss: 0.9810543656349182,grad_norm: 0.9999991119834948, iteration: 78507
loss: 0.9700067043304443,grad_norm: 0.9597135948492455, iteration: 78508
loss: 0.9878283143043518,grad_norm: 0.9999990876439602, iteration: 78509
loss: 0.9971951842308044,grad_norm: 0.9999992800433259, iteration: 78510
loss: 1.0380488634109497,grad_norm: 0.9999997900259524, iteration: 78511
loss: 0.9837813377380371,grad_norm: 0.9150865963720701, iteration: 78512
loss: 1.0043641328811646,grad_norm: 0.999999329994541, iteration: 78513
loss: 0.9973787069320679,grad_norm: 0.9999992429059368, iteration: 78514
loss: 1.0059090852737427,grad_norm: 0.9999991485504983, iteration: 78515
loss: 1.0027635097503662,grad_norm: 0.9999991224615522, iteration: 78516
loss: 0.9976191520690918,grad_norm: 0.9999992371948512, iteration: 78517
loss: 1.0470187664031982,grad_norm: 0.9989665825093448, iteration: 78518
loss: 1.026421070098877,grad_norm: 0.9999992399191645, iteration: 78519
loss: 0.9917964935302734,grad_norm: 0.9999990271820658, iteration: 78520
loss: 1.008844017982483,grad_norm: 0.9999991878044548, iteration: 78521
loss: 1.110854148864746,grad_norm: 0.9999996973233214, iteration: 78522
loss: 0.9660553336143494,grad_norm: 0.8896200585901202, iteration: 78523
loss: 0.987021803855896,grad_norm: 0.9904042869284306, iteration: 78524
loss: 1.013520359992981,grad_norm: 0.925886435437073, iteration: 78525
loss: 1.0053447484970093,grad_norm: 0.9999997612203471, iteration: 78526
loss: 1.1188840866088867,grad_norm: 0.9999992605668172, iteration: 78527
loss: 1.1615469455718994,grad_norm: 0.9999992533035224, iteration: 78528
loss: 0.949529767036438,grad_norm: 0.9999992836432205, iteration: 78529
loss: 0.9521093368530273,grad_norm: 0.9999991883742217, iteration: 78530
loss: 0.9739623665809631,grad_norm: 0.9999991728548027, iteration: 78531
loss: 1.0243078470230103,grad_norm: 0.9698671809561503, iteration: 78532
loss: 1.0221035480499268,grad_norm: 0.9999999240745164, iteration: 78533
loss: 0.9896449446678162,grad_norm: 0.9999991268026862, iteration: 78534
loss: 1.0350128412246704,grad_norm: 0.9877477808858065, iteration: 78535
loss: 1.069454312324524,grad_norm: 0.9999991891463683, iteration: 78536
loss: 0.9818382859230042,grad_norm: 0.8099473167224375, iteration: 78537
loss: 1.077009916305542,grad_norm: 0.9999992023990235, iteration: 78538
loss: 1.0862456560134888,grad_norm: 0.9999993705731253, iteration: 78539
loss: 0.9953579902648926,grad_norm: 0.9999992065867734, iteration: 78540
loss: 0.9933439493179321,grad_norm: 0.9999992533922037, iteration: 78541
loss: 1.0362924337387085,grad_norm: 0.9999999772768731, iteration: 78542
loss: 1.0188868045806885,grad_norm: 0.9999993305816112, iteration: 78543
loss: 1.011534571647644,grad_norm: 0.9999991331266178, iteration: 78544
loss: 0.9922695755958557,grad_norm: 0.9999991321878838, iteration: 78545
loss: 1.0090607404708862,grad_norm: 0.9999988957368946, iteration: 78546
loss: 0.9558201432228088,grad_norm: 0.9607751035109967, iteration: 78547
loss: 1.0061489343643188,grad_norm: 0.873982098387535, iteration: 78548
loss: 1.025345802307129,grad_norm: 0.9999992463318296, iteration: 78549
loss: 0.9952878355979919,grad_norm: 0.9999991843185931, iteration: 78550
loss: 1.0562103986740112,grad_norm: 0.9999990353511693, iteration: 78551
loss: 1.0041593313217163,grad_norm: 0.9999991327234548, iteration: 78552
loss: 0.9807676672935486,grad_norm: 0.9999991857002973, iteration: 78553
loss: 1.0057177543640137,grad_norm: 0.9999992793063284, iteration: 78554
loss: 1.01166570186615,grad_norm: 0.9999991664021914, iteration: 78555
loss: 1.0043213367462158,grad_norm: 0.9999991758585625, iteration: 78556
loss: 0.981512725353241,grad_norm: 0.9999991825359936, iteration: 78557
loss: 1.0009996891021729,grad_norm: 0.9999989894857754, iteration: 78558
loss: 1.0254943370819092,grad_norm: 0.9999991614665743, iteration: 78559
loss: 0.9851530194282532,grad_norm: 0.9502995542948411, iteration: 78560
loss: 0.9862300157546997,grad_norm: 0.9999991327431522, iteration: 78561
loss: 0.9636898636817932,grad_norm: 0.9931519068498812, iteration: 78562
loss: 0.9772846698760986,grad_norm: 0.999999216091076, iteration: 78563
loss: 0.9953155517578125,grad_norm: 0.999999267670799, iteration: 78564
loss: 1.006500005722046,grad_norm: 0.9999992203795008, iteration: 78565
loss: 1.0102150440216064,grad_norm: 0.9826990517187254, iteration: 78566
loss: 0.9841844439506531,grad_norm: 0.9948259326460248, iteration: 78567
loss: 1.0284353494644165,grad_norm: 0.9999990962501926, iteration: 78568
loss: 1.009093165397644,grad_norm: 0.920707560856649, iteration: 78569
loss: 1.043869972229004,grad_norm: 0.9999991183060121, iteration: 78570
loss: 1.0090287923812866,grad_norm: 0.9264178995809118, iteration: 78571
loss: 1.0129281282424927,grad_norm: 0.9999991033234712, iteration: 78572
loss: 1.0085694789886475,grad_norm: 0.9999992060499834, iteration: 78573
loss: 0.9804556965827942,grad_norm: 0.9562397270809899, iteration: 78574
loss: 1.000993013381958,grad_norm: 0.9375732208382057, iteration: 78575
loss: 0.9702153205871582,grad_norm: 0.9999992314553133, iteration: 78576
loss: 1.044334053993225,grad_norm: 0.9999992333531161, iteration: 78577
loss: 1.0287303924560547,grad_norm: 0.9999995108324375, iteration: 78578
loss: 0.9954098463058472,grad_norm: 0.9999991641987152, iteration: 78579
loss: 0.9677830338478088,grad_norm: 0.9999992041998502, iteration: 78580
loss: 1.0144798755645752,grad_norm: 0.9653776359339671, iteration: 78581
loss: 1.0263794660568237,grad_norm: 0.9999993192924799, iteration: 78582
loss: 1.010082483291626,grad_norm: 0.9999991339853272, iteration: 78583
loss: 1.0669629573822021,grad_norm: 0.9999999002529998, iteration: 78584
loss: 1.0284545421600342,grad_norm: 0.9999991028395103, iteration: 78585
loss: 0.9678717851638794,grad_norm: 0.999999178261962, iteration: 78586
loss: 0.9803546071052551,grad_norm: 0.8865295342326698, iteration: 78587
loss: 1.0710697174072266,grad_norm: 1.0000000227680494, iteration: 78588
loss: 0.9464203119277954,grad_norm: 0.9999991306660307, iteration: 78589
loss: 0.99309903383255,grad_norm: 0.9999991763493912, iteration: 78590
loss: 1.0210189819335938,grad_norm: 0.9625508946056615, iteration: 78591
loss: 1.0424202680587769,grad_norm: 0.9999990552571224, iteration: 78592
loss: 1.000793218612671,grad_norm: 0.9999991672964383, iteration: 78593
loss: 0.9705589413642883,grad_norm: 0.9999990818312946, iteration: 78594
loss: 0.9930773377418518,grad_norm: 0.9999991557984701, iteration: 78595
loss: 1.0171853303909302,grad_norm: 0.999999178312215, iteration: 78596
loss: 1.0188689231872559,grad_norm: 0.9999989977361321, iteration: 78597
loss: 0.9929213523864746,grad_norm: 0.9999990965956265, iteration: 78598
loss: 1.010786771774292,grad_norm: 0.9803406094270178, iteration: 78599
loss: 1.0049474239349365,grad_norm: 0.9999992896692026, iteration: 78600
loss: 1.0721676349639893,grad_norm: 0.9999999852402903, iteration: 78601
loss: 0.9972949624061584,grad_norm: 0.9999990623193348, iteration: 78602
loss: 1.0664786100387573,grad_norm: 0.9999998405696771, iteration: 78603
loss: 1.1250436305999756,grad_norm: 0.9999997401032835, iteration: 78604
loss: 1.2868636846542358,grad_norm: 0.9999996820551103, iteration: 78605
loss: 1.0271189212799072,grad_norm: 0.9999992733734183, iteration: 78606
loss: 0.9845113754272461,grad_norm: 0.9999991651377164, iteration: 78607
loss: 1.0504136085510254,grad_norm: 0.9999990112346033, iteration: 78608
loss: 1.0181423425674438,grad_norm: 0.9944068683262822, iteration: 78609
loss: 1.013726830482483,grad_norm: 0.9892156348761021, iteration: 78610
loss: 1.0104761123657227,grad_norm: 0.9264286562649088, iteration: 78611
loss: 0.9890629053115845,grad_norm: 0.8632598375085831, iteration: 78612
loss: 0.9942983388900757,grad_norm: 0.9999992150380046, iteration: 78613
loss: 1.0147027969360352,grad_norm: 0.9999991718693563, iteration: 78614
loss: 0.9867498278617859,grad_norm: 0.9525664631866531, iteration: 78615
loss: 0.9654586911201477,grad_norm: 0.9999991079276157, iteration: 78616
loss: 1.0215212106704712,grad_norm: 0.9999992237088691, iteration: 78617
loss: 1.0080432891845703,grad_norm: 0.9999992411172168, iteration: 78618
loss: 1.0233092308044434,grad_norm: 0.9999995230964095, iteration: 78619
loss: 0.978699803352356,grad_norm: 0.8668947216713392, iteration: 78620
loss: 1.0280625820159912,grad_norm: 0.9999997555460466, iteration: 78621
loss: 0.9733222723007202,grad_norm: 0.9421305395371631, iteration: 78622
loss: 1.0226466655731201,grad_norm: 0.9999995289231469, iteration: 78623
loss: 1.0179356336593628,grad_norm: 0.9999992013345183, iteration: 78624
loss: 0.9467781782150269,grad_norm: 0.9999991248547883, iteration: 78625
loss: 1.0091279745101929,grad_norm: 0.999999070598085, iteration: 78626
loss: 0.9917048215866089,grad_norm: 0.9999992532138459, iteration: 78627
loss: 1.0152490139007568,grad_norm: 0.9999990232491319, iteration: 78628
loss: 1.025574803352356,grad_norm: 0.9999992826521418, iteration: 78629
loss: 0.9892439246177673,grad_norm: 0.9622961394361816, iteration: 78630
loss: 1.00033438205719,grad_norm: 0.9050965670666469, iteration: 78631
loss: 1.000710129737854,grad_norm: 0.8913684293558126, iteration: 78632
loss: 0.9981619715690613,grad_norm: 0.9974691155828955, iteration: 78633
loss: 1.0737130641937256,grad_norm: 0.9999998915408645, iteration: 78634
loss: 1.0135633945465088,grad_norm: 0.9999991939406934, iteration: 78635
loss: 0.9756330847740173,grad_norm: 0.893616040467424, iteration: 78636
loss: 0.9755000472068787,grad_norm: 0.9999990429381358, iteration: 78637
loss: 1.0027233362197876,grad_norm: 0.999999140810017, iteration: 78638
loss: 0.9983970522880554,grad_norm: 0.9999991098035685, iteration: 78639
loss: 1.0289639234542847,grad_norm: 0.9999991822713423, iteration: 78640
loss: 1.012771725654602,grad_norm: 0.9999992044318194, iteration: 78641
loss: 1.117671012878418,grad_norm: 0.9999992777569401, iteration: 78642
loss: 1.0618635416030884,grad_norm: 0.9999991995747635, iteration: 78643
loss: 1.0025876760482788,grad_norm: 0.9999991696997973, iteration: 78644
loss: 0.9740768074989319,grad_norm: 0.999999151650168, iteration: 78645
loss: 1.0106321573257446,grad_norm: 0.9999990323962643, iteration: 78646
loss: 0.9541166424751282,grad_norm: 0.9696071296621878, iteration: 78647
loss: 1.0263630151748657,grad_norm: 0.9999995094844235, iteration: 78648
loss: 0.9825196862220764,grad_norm: 0.9443659556842182, iteration: 78649
loss: 1.0381017923355103,grad_norm: 0.9812927953072812, iteration: 78650
loss: 0.9988965392112732,grad_norm: 0.9325699763608076, iteration: 78651
loss: 1.0095562934875488,grad_norm: 0.9999990201159381, iteration: 78652
loss: 0.9911951422691345,grad_norm: 0.9999990521378969, iteration: 78653
loss: 1.001851201057434,grad_norm: 0.8999527779754531, iteration: 78654
loss: 1.0173766613006592,grad_norm: 0.9999992957080522, iteration: 78655
loss: 1.0062326192855835,grad_norm: 0.8528645609966239, iteration: 78656
loss: 0.9651741981506348,grad_norm: 0.9999990899950538, iteration: 78657
loss: 1.0314184427261353,grad_norm: 0.9999991422730182, iteration: 78658
loss: 0.9925077557563782,grad_norm: 0.9999992307647665, iteration: 78659
loss: 1.0718753337860107,grad_norm: 0.9999992329723261, iteration: 78660
loss: 1.0514888763427734,grad_norm: 0.9394917765390687, iteration: 78661
loss: 1.0811048746109009,grad_norm: 0.9999991155889488, iteration: 78662
loss: 1.0792111158370972,grad_norm: 0.999999587653349, iteration: 78663
loss: 1.006778359413147,grad_norm: 0.9999992468443626, iteration: 78664
loss: 1.06288743019104,grad_norm: 0.9999996882453372, iteration: 78665
loss: 1.0306066274642944,grad_norm: 0.9999993636354676, iteration: 78666
loss: 1.0101979970932007,grad_norm: 0.9754216754055481, iteration: 78667
loss: 0.9895414710044861,grad_norm: 0.9999992320971903, iteration: 78668
loss: 1.0054291486740112,grad_norm: 0.9731226991881319, iteration: 78669
loss: 0.9960869550704956,grad_norm: 0.9999991707976623, iteration: 78670
loss: 0.9961881041526794,grad_norm: 0.9525121021751073, iteration: 78671
loss: 1.0099964141845703,grad_norm: 0.9723358859520502, iteration: 78672
loss: 0.9988071918487549,grad_norm: 0.9999990344211401, iteration: 78673
loss: 1.031965970993042,grad_norm: 0.9393612755545814, iteration: 78674
loss: 1.007270097732544,grad_norm: 0.9999989833300457, iteration: 78675
loss: 0.999607503414154,grad_norm: 0.8976255889898186, iteration: 78676
loss: 1.0378986597061157,grad_norm: 0.9856812104168701, iteration: 78677
loss: 0.9787379503250122,grad_norm: 0.9516029212072911, iteration: 78678
loss: 1.0568397045135498,grad_norm: 0.9999996674828115, iteration: 78679
loss: 1.003190517425537,grad_norm: 0.9962617991804952, iteration: 78680
loss: 0.9883174896240234,grad_norm: 0.9999989873286597, iteration: 78681
loss: 1.0043182373046875,grad_norm: 0.9375575212124523, iteration: 78682
loss: 0.9989019632339478,grad_norm: 0.9999995424403766, iteration: 78683
loss: 0.9706031680107117,grad_norm: 0.9999999615801067, iteration: 78684
loss: 1.0208278894424438,grad_norm: 0.9999995230731885, iteration: 78685
loss: 1.0517898797988892,grad_norm: 0.9569775473099378, iteration: 78686
loss: 1.0036985874176025,grad_norm: 0.9999992339211874, iteration: 78687
loss: 1.0119396448135376,grad_norm: 0.9999990945607556, iteration: 78688
loss: 0.9996294379234314,grad_norm: 0.9399837071317538, iteration: 78689
loss: 1.1075698137283325,grad_norm: 0.999999104737245, iteration: 78690
loss: 1.0138027667999268,grad_norm: 0.9999991958029617, iteration: 78691
loss: 1.046562671661377,grad_norm: 0.9999999755977573, iteration: 78692
loss: 0.97505784034729,grad_norm: 0.9324945799531268, iteration: 78693
loss: 1.0552245378494263,grad_norm: 0.9999991802728295, iteration: 78694
loss: 1.0112699270248413,grad_norm: 0.9999990418093835, iteration: 78695
loss: 1.021938443183899,grad_norm: 0.9999999250567453, iteration: 78696
loss: 1.0329753160476685,grad_norm: 0.999999512780388, iteration: 78697
loss: 1.0453089475631714,grad_norm: 0.999999194157825, iteration: 78698
loss: 1.0114898681640625,grad_norm: 0.9999995822967875, iteration: 78699
loss: 1.032169222831726,grad_norm: 0.9999994951736456, iteration: 78700
loss: 1.1035449504852295,grad_norm: 0.999999387869619, iteration: 78701
loss: 1.0285851955413818,grad_norm: 0.999999289733616, iteration: 78702
loss: 1.0517808198928833,grad_norm: 0.9999996730419018, iteration: 78703
loss: 1.029636263847351,grad_norm: 0.9999992066191238, iteration: 78704
loss: 1.03140389919281,grad_norm: 0.9999993100191694, iteration: 78705
loss: 0.9692192077636719,grad_norm: 0.9999990447556474, iteration: 78706
loss: 0.9916009306907654,grad_norm: 0.9999992025801996, iteration: 78707
loss: 0.9983963966369629,grad_norm: 0.9999993606324528, iteration: 78708
loss: 1.0520728826522827,grad_norm: 0.9999997977260596, iteration: 78709
loss: 0.9953356385231018,grad_norm: 0.9268731368714115, iteration: 78710
loss: 0.98246169090271,grad_norm: 0.9999991604167134, iteration: 78711
loss: 1.0284864902496338,grad_norm: 0.9999997660064914, iteration: 78712
loss: 1.055087924003601,grad_norm: 0.9999990928247701, iteration: 78713
loss: 1.0210752487182617,grad_norm: 0.9999990593330813, iteration: 78714
loss: 1.0945165157318115,grad_norm: 1.000000062710462, iteration: 78715
loss: 1.1256918907165527,grad_norm: 0.9999993959476309, iteration: 78716
loss: 1.050837755203247,grad_norm: 0.9999991949803168, iteration: 78717
loss: 1.0942859649658203,grad_norm: 0.9999998531228166, iteration: 78718
loss: 1.0941203832626343,grad_norm: 0.9999997526308358, iteration: 78719
loss: 1.2260156869888306,grad_norm: 0.9999997691089, iteration: 78720
loss: 1.0498533248901367,grad_norm: 0.9999994631841579, iteration: 78721
loss: 1.3596220016479492,grad_norm: 0.9999998303209541, iteration: 78722
loss: 1.021783709526062,grad_norm: 0.9999993244036454, iteration: 78723
loss: 1.0610182285308838,grad_norm: 0.9999994348387614, iteration: 78724
loss: 1.1117838621139526,grad_norm: 0.9999991566633352, iteration: 78725
loss: 1.0260673761367798,grad_norm: 0.9999991079346146, iteration: 78726
loss: 1.0058892965316772,grad_norm: 0.9959134037743897, iteration: 78727
loss: 1.03452730178833,grad_norm: 0.9999992745949632, iteration: 78728
loss: 1.0098168849945068,grad_norm: 0.9999992330104693, iteration: 78729
loss: 1.0328351259231567,grad_norm: 0.9999993468568161, iteration: 78730
loss: 1.020849347114563,grad_norm: 0.9999997858859072, iteration: 78731
loss: 0.9899218082427979,grad_norm: 0.9999991972749062, iteration: 78732
loss: 1.1909055709838867,grad_norm: 0.9999997137487876, iteration: 78733
loss: 1.0422121286392212,grad_norm: 0.9999991494292388, iteration: 78734
loss: 1.1989761590957642,grad_norm: 0.9999996565710941, iteration: 78735
loss: 1.0520614385604858,grad_norm: 0.9999990589361398, iteration: 78736
loss: 1.0378894805908203,grad_norm: 0.9999993267098719, iteration: 78737
loss: 0.9613814353942871,grad_norm: 0.9999996971764686, iteration: 78738
loss: 1.031167984008789,grad_norm: 0.9999997340798549, iteration: 78739
loss: 1.0201131105422974,grad_norm: 0.9999998107769095, iteration: 78740
loss: 0.9784059524536133,grad_norm: 0.999999170615893, iteration: 78741
loss: 1.0502058267593384,grad_norm: 0.9999994399184204, iteration: 78742
loss: 1.062721610069275,grad_norm: 1.0000000161594906, iteration: 78743
loss: 0.9630259275436401,grad_norm: 0.9866931845867026, iteration: 78744
loss: 1.03223717212677,grad_norm: 0.9999994799814198, iteration: 78745
loss: 1.121342420578003,grad_norm: 0.9999995757557719, iteration: 78746
loss: 0.9886152744293213,grad_norm: 0.9999991236333668, iteration: 78747
loss: 1.0202343463897705,grad_norm: 0.9999991500647545, iteration: 78748
loss: 1.0363024473190308,grad_norm: 0.999999398586687, iteration: 78749
loss: 0.9942723512649536,grad_norm: 0.9481279410469968, iteration: 78750
loss: 1.0193188190460205,grad_norm: 0.9999994217520365, iteration: 78751
loss: 1.0081666707992554,grad_norm: 0.9999999709945387, iteration: 78752
loss: 0.9874981045722961,grad_norm: 0.9999989321174602, iteration: 78753
loss: 1.072605848312378,grad_norm: 0.999999498247726, iteration: 78754
loss: 1.003881812095642,grad_norm: 0.9408000143290914, iteration: 78755
loss: 1.1149332523345947,grad_norm: 0.9999998516720155, iteration: 78756
loss: 1.0149766206741333,grad_norm: 0.9999991318823478, iteration: 78757
loss: 1.027045726776123,grad_norm: 0.9410695394992298, iteration: 78758
loss: 1.0156186819076538,grad_norm: 0.9999992351036512, iteration: 78759
loss: 0.9947192072868347,grad_norm: 0.9952150808508571, iteration: 78760
loss: 1.0145001411437988,grad_norm: 0.9999991826694905, iteration: 78761
loss: 1.0487841367721558,grad_norm: 0.9999991893314483, iteration: 78762
loss: 1.0074305534362793,grad_norm: 0.9999991173703403, iteration: 78763
loss: 1.00579035282135,grad_norm: 0.9999992657565959, iteration: 78764
loss: 1.002745509147644,grad_norm: 0.9999991423926278, iteration: 78765
loss: 0.9719867706298828,grad_norm: 0.9810833229714668, iteration: 78766
loss: 0.9785197377204895,grad_norm: 0.8738415838429118, iteration: 78767
loss: 1.042078971862793,grad_norm: 0.9999997453186428, iteration: 78768
loss: 1.0243011713027954,grad_norm: 0.9759210871755899, iteration: 78769
loss: 1.004931926727295,grad_norm: 0.9691678706518921, iteration: 78770
loss: 0.9852203726768494,grad_norm: 0.9999993326619466, iteration: 78771
loss: 1.0090128183364868,grad_norm: 0.9999989996528801, iteration: 78772
loss: 1.019839882850647,grad_norm: 0.9999996251428805, iteration: 78773
loss: 0.9892016053199768,grad_norm: 0.9999992057850896, iteration: 78774
loss: 0.9897125363349915,grad_norm: 0.8640270007873314, iteration: 78775
loss: 0.9884690642356873,grad_norm: 0.999998998107296, iteration: 78776
loss: 1.0091496706008911,grad_norm: 0.913905103963319, iteration: 78777
loss: 1.0288876295089722,grad_norm: 0.9302499882493949, iteration: 78778
loss: 1.0455915927886963,grad_norm: 0.9999990970122199, iteration: 78779
loss: 1.0365009307861328,grad_norm: 1.0000000615242843, iteration: 78780
loss: 1.042675256729126,grad_norm: 0.9985091382491965, iteration: 78781
loss: 1.0417207479476929,grad_norm: 0.9999990664894243, iteration: 78782
loss: 1.0297305583953857,grad_norm: 0.9999990585476745, iteration: 78783
loss: 1.0288634300231934,grad_norm: 0.9999991411647763, iteration: 78784
loss: 1.0078070163726807,grad_norm: 0.9999992276097641, iteration: 78785
loss: 1.0066744089126587,grad_norm: 0.9999995086602035, iteration: 78786
loss: 0.9841420650482178,grad_norm: 0.943701003122256, iteration: 78787
loss: 1.0081428289413452,grad_norm: 0.9006976431300021, iteration: 78788
loss: 0.9651646018028259,grad_norm: 0.9308064536143418, iteration: 78789
loss: 1.007251501083374,grad_norm: 0.9999992212853591, iteration: 78790
loss: 1.0162484645843506,grad_norm: 0.9999991372311308, iteration: 78791
loss: 1.0309494733810425,grad_norm: 0.9999997474727189, iteration: 78792
loss: 1.036698579788208,grad_norm: 0.999999638199506, iteration: 78793
loss: 1.0167046785354614,grad_norm: 0.9902495492066605, iteration: 78794
loss: 1.0385830402374268,grad_norm: 0.9999992642112263, iteration: 78795
loss: 0.9884094595909119,grad_norm: 0.9999990792491538, iteration: 78796
loss: 0.9799965620040894,grad_norm: 0.9999991654762715, iteration: 78797
loss: 1.0016728639602661,grad_norm: 0.9999990849978013, iteration: 78798
loss: 0.9726920127868652,grad_norm: 0.8948006962279175, iteration: 78799
loss: 0.9943654537200928,grad_norm: 0.9999991576047602, iteration: 78800
loss: 0.9953453540802002,grad_norm: 0.9999990922920281, iteration: 78801
loss: 0.9920680522918701,grad_norm: 0.9999992790408033, iteration: 78802
loss: 1.0046895742416382,grad_norm: 0.896041617425104, iteration: 78803
loss: 0.9824028611183167,grad_norm: 0.9641375378216469, iteration: 78804
loss: 1.0082577466964722,grad_norm: 0.9067229180571015, iteration: 78805
loss: 0.9880335330963135,grad_norm: 0.8535789839871307, iteration: 78806
loss: 1.0158677101135254,grad_norm: 0.9375466875694513, iteration: 78807
loss: 0.990838348865509,grad_norm: 0.8932942794845239, iteration: 78808
loss: 1.0041284561157227,grad_norm: 0.9999990924737985, iteration: 78809
loss: 0.9641225934028625,grad_norm: 0.9999991192335059, iteration: 78810
loss: 0.9784867167472839,grad_norm: 0.9999993191807786, iteration: 78811
loss: 1.00814688205719,grad_norm: 0.8935768532561645, iteration: 78812
loss: 1.006090760231018,grad_norm: 0.9386861145409512, iteration: 78813
loss: 0.9759964346885681,grad_norm: 0.9999989779194852, iteration: 78814
loss: 0.9823323488235474,grad_norm: 0.965185234437345, iteration: 78815
loss: 0.999566912651062,grad_norm: 0.8882121780984787, iteration: 78816
loss: 0.9854585528373718,grad_norm: 0.9249867907228526, iteration: 78817
loss: 1.0143722295761108,grad_norm: 0.9999990202438466, iteration: 78818
loss: 1.0038232803344727,grad_norm: 0.9999990034711256, iteration: 78819
loss: 1.0063163042068481,grad_norm: 0.8169891655150465, iteration: 78820
loss: 1.022438645362854,grad_norm: 0.9999991254924339, iteration: 78821
loss: 1.0107370615005493,grad_norm: 0.9669547857915494, iteration: 78822
loss: 0.9806631207466125,grad_norm: 0.999999198603517, iteration: 78823
loss: 1.0997971296310425,grad_norm: 0.9999999932782636, iteration: 78824
loss: 1.0148919820785522,grad_norm: 0.9999991572184993, iteration: 78825
loss: 1.0038459300994873,grad_norm: 0.9999990863001871, iteration: 78826
loss: 0.9798555970191956,grad_norm: 0.9999991745209104, iteration: 78827
loss: 1.0074427127838135,grad_norm: 0.9999991073624974, iteration: 78828
loss: 1.0139473676681519,grad_norm: 0.9251610635252939, iteration: 78829
loss: 1.008009433746338,grad_norm: 0.9999990172095293, iteration: 78830
loss: 0.9735158681869507,grad_norm: 0.9850343436241427, iteration: 78831
loss: 0.9996084570884705,grad_norm: 0.9999990518066684, iteration: 78832
loss: 1.0016840696334839,grad_norm: 0.9999990992488414, iteration: 78833
loss: 0.9632279872894287,grad_norm: 0.999999147893125, iteration: 78834
loss: 0.9700700640678406,grad_norm: 0.9124837684422459, iteration: 78835
loss: 1.0374984741210938,grad_norm: 0.8947184156774537, iteration: 78836
loss: 0.9984666705131531,grad_norm: 0.999999066643477, iteration: 78837
loss: 1.0112180709838867,grad_norm: 0.9999991804783964, iteration: 78838
loss: 1.0323246717453003,grad_norm: 0.9999991747822098, iteration: 78839
loss: 0.9949349164962769,grad_norm: 0.9373475933411903, iteration: 78840
loss: 0.9927762746810913,grad_norm: 0.9999990363691647, iteration: 78841
loss: 1.0093823671340942,grad_norm: 0.9999990317839886, iteration: 78842
loss: 1.002478003501892,grad_norm: 0.9462185082148771, iteration: 78843
loss: 1.0904096364974976,grad_norm: 0.9999994781528867, iteration: 78844
loss: 0.9959964752197266,grad_norm: 0.9999992604806647, iteration: 78845
loss: 1.0074514150619507,grad_norm: 0.9999991082407462, iteration: 78846
loss: 0.9645163416862488,grad_norm: 0.937358802338449, iteration: 78847
loss: 0.9757437705993652,grad_norm: 0.9999992699526238, iteration: 78848
loss: 0.9680070877075195,grad_norm: 0.9978141898724238, iteration: 78849
loss: 1.0412366390228271,grad_norm: 0.9831472643831823, iteration: 78850
loss: 0.9990532398223877,grad_norm: 0.9363026106495037, iteration: 78851
loss: 0.9929893612861633,grad_norm: 0.9999992089003538, iteration: 78852
loss: 0.9869744777679443,grad_norm: 0.999999073998701, iteration: 78853
loss: 1.0176537036895752,grad_norm: 0.9999991761961757, iteration: 78854
loss: 0.990778923034668,grad_norm: 0.9468494879351023, iteration: 78855
loss: 1.003435730934143,grad_norm: 0.9999989414576571, iteration: 78856
loss: 0.9945098757743835,grad_norm: 0.9999991531351634, iteration: 78857
loss: 0.9943466782569885,grad_norm: 0.9999990710068372, iteration: 78858
loss: 1.0050162076950073,grad_norm: 0.9999993150736585, iteration: 78859
loss: 1.0143775939941406,grad_norm: 0.9999996743678354, iteration: 78860
loss: 0.978520393371582,grad_norm: 0.9999993127641452, iteration: 78861
loss: 0.9985440969467163,grad_norm: 0.8289780953260419, iteration: 78862
loss: 1.0147209167480469,grad_norm: 0.9999990871341183, iteration: 78863
loss: 0.9743300676345825,grad_norm: 0.9938602640569977, iteration: 78864
loss: 0.9608890414237976,grad_norm: 0.9819718617667387, iteration: 78865
loss: 0.9743740558624268,grad_norm: 0.9478797366931481, iteration: 78866
loss: 1.0184276103973389,grad_norm: 0.8753160068834629, iteration: 78867
loss: 0.9910560846328735,grad_norm: 0.9999991418638768, iteration: 78868
loss: 1.0264636278152466,grad_norm: 0.9987835960922693, iteration: 78869
loss: 1.0074325799942017,grad_norm: 0.9999991331580143, iteration: 78870
loss: 0.9700483083724976,grad_norm: 0.9999990748081127, iteration: 78871
loss: 1.0111451148986816,grad_norm: 0.9999990762011672, iteration: 78872
loss: 1.00888991355896,grad_norm: 0.9460434189539615, iteration: 78873
loss: 1.0254414081573486,grad_norm: 0.9999994617174891, iteration: 78874
loss: 1.0095566511154175,grad_norm: 0.948481137040524, iteration: 78875
loss: 1.0469927787780762,grad_norm: 0.9999990762369791, iteration: 78876
loss: 1.0205838680267334,grad_norm: 0.9999991757012767, iteration: 78877
loss: 0.9740664958953857,grad_norm: 0.9455074087207553, iteration: 78878
loss: 0.9659519791603088,grad_norm: 0.9624103528323972, iteration: 78879
loss: 0.9955206513404846,grad_norm: 0.9680885913707734, iteration: 78880
loss: 1.006903052330017,grad_norm: 0.8103019391602829, iteration: 78881
loss: 1.0039184093475342,grad_norm: 0.9999990446294442, iteration: 78882
loss: 1.0248841047286987,grad_norm: 0.9999996671968328, iteration: 78883
loss: 1.0114951133728027,grad_norm: 0.9999990937880595, iteration: 78884
loss: 0.979733943939209,grad_norm: 0.9999990314622883, iteration: 78885
loss: 1.0265735387802124,grad_norm: 0.9005557719865723, iteration: 78886
loss: 0.9896910786628723,grad_norm: 0.999999021615731, iteration: 78887
loss: 0.9753739833831787,grad_norm: 0.9999990795196549, iteration: 78888
loss: 0.9690971970558167,grad_norm: 0.9999990891868072, iteration: 78889
loss: 0.9697189331054688,grad_norm: 0.9166796203369618, iteration: 78890
loss: 1.0311903953552246,grad_norm: 0.8767698489596135, iteration: 78891
loss: 1.0329965353012085,grad_norm: 0.9999991212186299, iteration: 78892
loss: 0.9928176403045654,grad_norm: 0.9999991764490349, iteration: 78893
loss: 1.0088105201721191,grad_norm: 0.9773667719169221, iteration: 78894
loss: 1.0452247858047485,grad_norm: 0.9553918782871276, iteration: 78895
loss: 0.9864377379417419,grad_norm: 0.9999990400123, iteration: 78896
loss: 1.0111089944839478,grad_norm: 0.9999991105820759, iteration: 78897
loss: 0.9608163833618164,grad_norm: 0.9056529560053862, iteration: 78898
loss: 0.9874516129493713,grad_norm: 0.975135953517976, iteration: 78899
loss: 0.9878896474838257,grad_norm: 0.9866058241881706, iteration: 78900
loss: 0.9671047329902649,grad_norm: 0.9663507657774282, iteration: 78901
loss: 1.0097863674163818,grad_norm: 0.9999991448456786, iteration: 78902
loss: 0.9972057342529297,grad_norm: 0.9999992288395569, iteration: 78903
loss: 1.0109635591506958,grad_norm: 0.9957531258373574, iteration: 78904
loss: 0.9753288626670837,grad_norm: 0.9999991880192008, iteration: 78905
loss: 1.0168932676315308,grad_norm: 0.9152189582126514, iteration: 78906
loss: 1.008408546447754,grad_norm: 0.9999990829527259, iteration: 78907
loss: 1.0078309774398804,grad_norm: 0.9999991080347139, iteration: 78908
loss: 1.0292047262191772,grad_norm: 0.9184855596970118, iteration: 78909
loss: 0.9869269728660583,grad_norm: 0.9999990437470632, iteration: 78910
loss: 0.993811845779419,grad_norm: 0.9999991472480727, iteration: 78911
loss: 0.9706020355224609,grad_norm: 0.9999991799894764, iteration: 78912
loss: 0.9855343699455261,grad_norm: 0.8495888388599987, iteration: 78913
loss: 1.039952278137207,grad_norm: 0.9999993224856674, iteration: 78914
loss: 0.9619266390800476,grad_norm: 0.9999989625995794, iteration: 78915
loss: 1.0257881879806519,grad_norm: 0.9999990141198997, iteration: 78916
loss: 0.9973544478416443,grad_norm: 0.999999135959529, iteration: 78917
loss: 1.0019240379333496,grad_norm: 0.999999068850559, iteration: 78918
loss: 1.0390284061431885,grad_norm: 0.9999992543536557, iteration: 78919
loss: 0.9718942046165466,grad_norm: 0.9905787259011524, iteration: 78920
loss: 0.9978497624397278,grad_norm: 0.9999991257242907, iteration: 78921
loss: 1.0186370611190796,grad_norm: 0.9999993263059969, iteration: 78922
loss: 0.9615791440010071,grad_norm: 0.9999989842017171, iteration: 78923
loss: 0.9908626079559326,grad_norm: 0.9999990275281064, iteration: 78924
loss: 0.9921016693115234,grad_norm: 0.8618650534637093, iteration: 78925
loss: 1.012834906578064,grad_norm: 0.9999993239016233, iteration: 78926
loss: 1.0276837348937988,grad_norm: 0.9999990643591548, iteration: 78927
loss: 0.9868628978729248,grad_norm: 0.9769357733743992, iteration: 78928
loss: 1.0455336570739746,grad_norm: 0.99999919154446, iteration: 78929
loss: 0.979141354560852,grad_norm: 0.9999992327419648, iteration: 78930
loss: 1.0096087455749512,grad_norm: 0.889128699611719, iteration: 78931
loss: 0.9796181321144104,grad_norm: 0.9999990848087817, iteration: 78932
loss: 0.979156494140625,grad_norm: 0.9742471724056875, iteration: 78933
loss: 1.0573949813842773,grad_norm: 0.999999729441105, iteration: 78934
loss: 0.9846903085708618,grad_norm: 0.9999992377696548, iteration: 78935
loss: 0.978929340839386,grad_norm: 0.8951363520239181, iteration: 78936
loss: 0.9757081866264343,grad_norm: 0.9584048850300972, iteration: 78937
loss: 0.9903449416160583,grad_norm: 0.9524388198930618, iteration: 78938
loss: 1.0510778427124023,grad_norm: 0.9456862394207305, iteration: 78939
loss: 0.9844485521316528,grad_norm: 0.9999990378388377, iteration: 78940
loss: 1.054145097732544,grad_norm: 0.999999779918325, iteration: 78941
loss: 1.0241615772247314,grad_norm: 0.9999992122542609, iteration: 78942
loss: 1.002400279045105,grad_norm: 0.9981937056585053, iteration: 78943
loss: 0.9880755543708801,grad_norm: 0.9533988244940481, iteration: 78944
loss: 0.9904685616493225,grad_norm: 0.9999990079944074, iteration: 78945
loss: 0.9661381840705872,grad_norm: 0.9999992710617486, iteration: 78946
loss: 0.9867980480194092,grad_norm: 0.8775825346066538, iteration: 78947
loss: 1.013078212738037,grad_norm: 0.9999991406123557, iteration: 78948
loss: 1.0062751770019531,grad_norm: 0.9999991938398857, iteration: 78949
loss: 1.0129693746566772,grad_norm: 0.9548065474638125, iteration: 78950
loss: 1.0264045000076294,grad_norm: 0.9999990916266538, iteration: 78951
loss: 0.9593647122383118,grad_norm: 0.9999991969834285, iteration: 78952
loss: 1.06916344165802,grad_norm: 0.9999992594932698, iteration: 78953
loss: 1.0152784585952759,grad_norm: 0.991874900543245, iteration: 78954
loss: 1.0012720823287964,grad_norm: 0.8983543176628344, iteration: 78955
loss: 1.027150273323059,grad_norm: 0.9866159552988049, iteration: 78956
loss: 1.0216760635375977,grad_norm: 0.9999990687848818, iteration: 78957
loss: 0.9651374220848083,grad_norm: 0.9999990372501738, iteration: 78958
loss: 1.0123428106307983,grad_norm: 0.9999990497632965, iteration: 78959
loss: 0.9685546159744263,grad_norm: 0.9999992131771669, iteration: 78960
loss: 1.00746750831604,grad_norm: 0.9999990749311378, iteration: 78961
loss: 0.9985582828521729,grad_norm: 0.9604940650351439, iteration: 78962
loss: 0.9875319004058838,grad_norm: 0.9921160862125732, iteration: 78963
loss: 1.0006219148635864,grad_norm: 0.9999991687004043, iteration: 78964
loss: 0.9812976717948914,grad_norm: 0.9999990942822097, iteration: 78965
loss: 1.0048909187316895,grad_norm: 0.999999056407983, iteration: 78966
loss: 0.978320837020874,grad_norm: 0.9999990851324538, iteration: 78967
loss: 1.0049173831939697,grad_norm: 0.943065255330524, iteration: 78968
loss: 0.9934566617012024,grad_norm: 0.9999991007251481, iteration: 78969
loss: 0.9993622303009033,grad_norm: 0.8757514395825404, iteration: 78970
loss: 1.021677851676941,grad_norm: 0.9860185819269653, iteration: 78971
loss: 1.0224432945251465,grad_norm: 0.9999991780974453, iteration: 78972
loss: 1.004764199256897,grad_norm: 0.9734277142946702, iteration: 78973
loss: 1.034055233001709,grad_norm: 0.9999990183443709, iteration: 78974
loss: 1.0005689859390259,grad_norm: 0.9999991896102657, iteration: 78975
loss: 1.0153601169586182,grad_norm: 0.9999991698896097, iteration: 78976
loss: 1.0134555101394653,grad_norm: 0.8976187436212958, iteration: 78977
loss: 0.9829651713371277,grad_norm: 0.9999991957949733, iteration: 78978
loss: 0.9944448471069336,grad_norm: 0.9999995932114898, iteration: 78979
loss: 0.9997555613517761,grad_norm: 0.9999997102176253, iteration: 78980
loss: 1.065524935722351,grad_norm: 0.9999991834370606, iteration: 78981
loss: 0.982991099357605,grad_norm: 0.999999140428801, iteration: 78982
loss: 0.9670180678367615,grad_norm: 0.890840737642691, iteration: 78983
loss: 0.9943901300430298,grad_norm: 0.9999992425048323, iteration: 78984
loss: 1.0035057067871094,grad_norm: 0.9999989332811112, iteration: 78985
loss: 0.9775898456573486,grad_norm: 0.9872570929466127, iteration: 78986
loss: 1.0127201080322266,grad_norm: 0.9555896675845308, iteration: 78987
loss: 1.024543285369873,grad_norm: 0.9999992266952127, iteration: 78988
loss: 0.9907718300819397,grad_norm: 0.9999989469694811, iteration: 78989
loss: 1.0087008476257324,grad_norm: 0.9999992202654987, iteration: 78990
loss: 0.9977647662162781,grad_norm: 0.9999989718870733, iteration: 78991
loss: 1.0399430990219116,grad_norm: 0.8094018704191701, iteration: 78992
loss: 1.0700583457946777,grad_norm: 0.9999992119778691, iteration: 78993
loss: 1.0129578113555908,grad_norm: 0.9285029201203024, iteration: 78994
loss: 0.9970560073852539,grad_norm: 0.9999992383499844, iteration: 78995
loss: 0.9868374466896057,grad_norm: 0.9999991391785785, iteration: 78996
loss: 1.0027962923049927,grad_norm: 0.9879047900027859, iteration: 78997
loss: 1.1417754888534546,grad_norm: 0.9999993228275591, iteration: 78998
loss: 1.0211567878723145,grad_norm: 0.8875525573656488, iteration: 78999
loss: 1.0387468338012695,grad_norm: 0.9999991349313188, iteration: 79000
loss: 1.0971578359603882,grad_norm: 0.9999997270054242, iteration: 79001
loss: 1.033348560333252,grad_norm: 0.9999994585908414, iteration: 79002
loss: 1.0937879085540771,grad_norm: 0.9999997180481061, iteration: 79003
loss: 1.0275872945785522,grad_norm: 0.857464447693909, iteration: 79004
loss: 0.9937520623207092,grad_norm: 0.999999040336305, iteration: 79005
loss: 0.991729736328125,grad_norm: 0.9110396366099167, iteration: 79006
loss: 1.0223625898361206,grad_norm: 0.9999993538443724, iteration: 79007
loss: 1.0044387578964233,grad_norm: 0.9747599279655321, iteration: 79008
loss: 1.0000158548355103,grad_norm: 0.9999991369378448, iteration: 79009
loss: 1.0067296028137207,grad_norm: 0.9889013332575182, iteration: 79010
loss: 0.9727328419685364,grad_norm: 0.9999991678614599, iteration: 79011
loss: 1.0258129835128784,grad_norm: 0.9999989869680115, iteration: 79012
loss: 1.0387177467346191,grad_norm: 0.999999201134373, iteration: 79013
loss: 1.003352403640747,grad_norm: 0.9999992932212919, iteration: 79014
loss: 0.9939281344413757,grad_norm: 0.9999992468073524, iteration: 79015
loss: 0.9985305666923523,grad_norm: 0.9999997708514929, iteration: 79016
loss: 0.9999635219573975,grad_norm: 0.999999325405032, iteration: 79017
loss: 0.971392035484314,grad_norm: 0.9999990639589985, iteration: 79018
loss: 1.0148073434829712,grad_norm: 0.9999997412902704, iteration: 79019
loss: 1.0191760063171387,grad_norm: 0.9723669252235013, iteration: 79020
loss: 0.9913471937179565,grad_norm: 0.9370353844960079, iteration: 79021
loss: 1.0277736186981201,grad_norm: 0.999999178642009, iteration: 79022
loss: 1.0084022283554077,grad_norm: 0.9053997437296838, iteration: 79023
loss: 1.0440336465835571,grad_norm: 0.9999997423689184, iteration: 79024
loss: 1.027204990386963,grad_norm: 0.9999990437552575, iteration: 79025
loss: 0.9866091012954712,grad_norm: 0.9290310336540286, iteration: 79026
loss: 0.9977211356163025,grad_norm: 0.9999991026728047, iteration: 79027
loss: 0.9680042862892151,grad_norm: 0.9999991894440978, iteration: 79028
loss: 0.9767846465110779,grad_norm: 0.9999991303785418, iteration: 79029
loss: 0.9973278641700745,grad_norm: 0.9999990531927122, iteration: 79030
loss: 1.0379252433776855,grad_norm: 0.9999990109809325, iteration: 79031
loss: 1.014105200767517,grad_norm: 0.999999164959076, iteration: 79032
loss: 1.04303777217865,grad_norm: 0.9999996545289845, iteration: 79033
loss: 0.9674056172370911,grad_norm: 0.9999992012076797, iteration: 79034
loss: 1.0142797231674194,grad_norm: 0.9530380873865785, iteration: 79035
loss: 0.9971033334732056,grad_norm: 0.9240984573608088, iteration: 79036
loss: 1.0195939540863037,grad_norm: 0.9999991695405615, iteration: 79037
loss: 0.9787181615829468,grad_norm: 0.9999991006312613, iteration: 79038
loss: 0.9558828473091125,grad_norm: 0.9999990848477208, iteration: 79039
loss: 1.0745254755020142,grad_norm: 0.9999995173471652, iteration: 79040
loss: 1.0025749206542969,grad_norm: 0.9712011009335708, iteration: 79041
loss: 0.9797617793083191,grad_norm: 0.9999991329111286, iteration: 79042
loss: 1.0127861499786377,grad_norm: 0.9999991926329185, iteration: 79043
loss: 0.9848243594169617,grad_norm: 0.9999993577160413, iteration: 79044
loss: 0.9779626131057739,grad_norm: 0.9999991535391803, iteration: 79045
loss: 1.0124986171722412,grad_norm: 0.9852255822091567, iteration: 79046
loss: 1.0136973857879639,grad_norm: 0.9999997898438528, iteration: 79047
loss: 0.9894826412200928,grad_norm: 0.999999504145837, iteration: 79048
loss: 1.036713719367981,grad_norm: 0.9641566440521202, iteration: 79049
loss: 0.9692872762680054,grad_norm: 0.999999060531411, iteration: 79050
loss: 0.9612705111503601,grad_norm: 0.999999566152698, iteration: 79051
loss: 1.0293192863464355,grad_norm: 0.999999138053254, iteration: 79052
loss: 0.979992151260376,grad_norm: 0.9941774202797803, iteration: 79053
loss: 0.9935872554779053,grad_norm: 0.9999990359433907, iteration: 79054
loss: 1.0129997730255127,grad_norm: 0.999999054186616, iteration: 79055
loss: 0.9705132246017456,grad_norm: 0.9999989938758268, iteration: 79056
loss: 0.9969117641448975,grad_norm: 0.9589053335639065, iteration: 79057
loss: 0.9780768752098083,grad_norm: 0.9999991732414703, iteration: 79058
loss: 0.9647103548049927,grad_norm: 0.9739328250267659, iteration: 79059
loss: 1.0423691272735596,grad_norm: 1.0000000028778075, iteration: 79060
loss: 1.0044974088668823,grad_norm: 0.913667667445622, iteration: 79061
loss: 1.0424896478652954,grad_norm: 0.9999990233018249, iteration: 79062
loss: 1.032822847366333,grad_norm: 0.9999990427170887, iteration: 79063
loss: 1.0018677711486816,grad_norm: 0.8873605074692513, iteration: 79064
loss: 0.9883827567100525,grad_norm: 0.9749516424466489, iteration: 79065
loss: 0.9835236668586731,grad_norm: 0.9841110717965075, iteration: 79066
loss: 1.0157725811004639,grad_norm: 0.9999991714925462, iteration: 79067
loss: 1.0238375663757324,grad_norm: 0.9543831507436179, iteration: 79068
loss: 0.9981750845909119,grad_norm: 0.9153180228798691, iteration: 79069
loss: 1.0077223777770996,grad_norm: 0.99999909808671, iteration: 79070
loss: 1.0012075901031494,grad_norm: 0.9441632693204508, iteration: 79071
loss: 0.9852655529975891,grad_norm: 0.9999990540117214, iteration: 79072
loss: 1.0100412368774414,grad_norm: 0.9999991138554426, iteration: 79073
loss: 1.0345605611801147,grad_norm: 0.9999991504664637, iteration: 79074
loss: 0.9954133629798889,grad_norm: 0.9999991576700233, iteration: 79075
loss: 1.0044184923171997,grad_norm: 0.9999992926044612, iteration: 79076
loss: 1.0399020910263062,grad_norm: 0.9999991289231072, iteration: 79077
loss: 1.020634651184082,grad_norm: 0.999999169899671, iteration: 79078
loss: 1.0299711227416992,grad_norm: 0.9999998860324876, iteration: 79079
loss: 1.0121387243270874,grad_norm: 0.9999990971306006, iteration: 79080
loss: 0.9778526425361633,grad_norm: 0.9999991776936537, iteration: 79081
loss: 1.0048166513442993,grad_norm: 0.9999992282287954, iteration: 79082
loss: 1.0529690980911255,grad_norm: 0.999999180335914, iteration: 79083
loss: 1.0005542039871216,grad_norm: 0.9999990574239513, iteration: 79084
loss: 0.994856059551239,grad_norm: 0.9999989664778642, iteration: 79085
loss: 1.0278429985046387,grad_norm: 0.9999990919937379, iteration: 79086
loss: 0.9752680063247681,grad_norm: 0.9243332403785687, iteration: 79087
loss: 0.9861046671867371,grad_norm: 0.9999992323571761, iteration: 79088
loss: 0.9370290637016296,grad_norm: 0.9999991294713928, iteration: 79089
loss: 1.0458184480667114,grad_norm: 0.9940185696361998, iteration: 79090
loss: 0.9973742961883545,grad_norm: 0.92422734392283, iteration: 79091
loss: 0.9926849007606506,grad_norm: 0.9999992226737009, iteration: 79092
loss: 0.9844002723693848,grad_norm: 0.9999998449387305, iteration: 79093
loss: 1.0142868757247925,grad_norm: 0.9999990608910926, iteration: 79094
loss: 1.0606657266616821,grad_norm: 0.999999266906814, iteration: 79095
loss: 1.0314737558364868,grad_norm: 0.9999990219359135, iteration: 79096
loss: 0.9706010222434998,grad_norm: 0.9025529619416565, iteration: 79097
loss: 1.0242393016815186,grad_norm: 0.999999171069956, iteration: 79098
loss: 0.9813550114631653,grad_norm: 0.9999991579522842, iteration: 79099
loss: 1.0742549896240234,grad_norm: 0.9999993646681036, iteration: 79100
loss: 0.9944592714309692,grad_norm: 0.9789810771908705, iteration: 79101
loss: 1.0272459983825684,grad_norm: 0.9999991315284519, iteration: 79102
loss: 0.972162127494812,grad_norm: 0.9999990325005478, iteration: 79103
loss: 0.9853327870368958,grad_norm: 0.979118392196354, iteration: 79104
loss: 0.9922012686729431,grad_norm: 0.9999991301074779, iteration: 79105
loss: 1.1101285219192505,grad_norm: 0.9999993855390451, iteration: 79106
loss: 0.9867078065872192,grad_norm: 0.9999993109983837, iteration: 79107
loss: 1.0235275030136108,grad_norm: 0.9999990412138386, iteration: 79108
loss: 1.0338315963745117,grad_norm: 0.9999992232350554, iteration: 79109
loss: 1.0243630409240723,grad_norm: 0.9999991138862419, iteration: 79110
loss: 0.9923478364944458,grad_norm: 0.9999991005228216, iteration: 79111
loss: 1.0732282400131226,grad_norm: 0.9999999327390278, iteration: 79112
loss: 0.9717071652412415,grad_norm: 0.9999990711257603, iteration: 79113
loss: 1.0104079246520996,grad_norm: 0.9999990148398914, iteration: 79114
loss: 1.0188734531402588,grad_norm: 0.9999994628016908, iteration: 79115
loss: 0.9889851808547974,grad_norm: 0.9230626670858322, iteration: 79116
loss: 1.0373913049697876,grad_norm: 0.9999996128784158, iteration: 79117
loss: 1.025376796722412,grad_norm: 0.9999992153143082, iteration: 79118
loss: 1.1293137073516846,grad_norm: 0.9999994503339183, iteration: 79119
loss: 0.9868283867835999,grad_norm: 0.9999992802475519, iteration: 79120
loss: 1.0264564752578735,grad_norm: 0.9999990648769997, iteration: 79121
loss: 1.0732338428497314,grad_norm: 0.9999993541836197, iteration: 79122
loss: 1.0413919687271118,grad_norm: 0.9698398748940321, iteration: 79123
loss: 1.0033904314041138,grad_norm: 0.8958767600540648, iteration: 79124
loss: 0.9995541572570801,grad_norm: 0.9039596792673126, iteration: 79125
loss: 1.0251628160476685,grad_norm: 0.9292323282785196, iteration: 79126
loss: 1.0174309015274048,grad_norm: 0.9999990515945866, iteration: 79127
loss: 1.0156958103179932,grad_norm: 0.9999990936709209, iteration: 79128
loss: 0.9958420395851135,grad_norm: 0.9472485211284684, iteration: 79129
loss: 0.993628740310669,grad_norm: 0.9672587783069049, iteration: 79130
loss: 0.9694981575012207,grad_norm: 0.9999990107912216, iteration: 79131
loss: 0.9803540110588074,grad_norm: 0.9404959393232681, iteration: 79132
loss: 0.9950786232948303,grad_norm: 0.999999185535845, iteration: 79133
loss: 1.0454002618789673,grad_norm: 0.9999990660200785, iteration: 79134
loss: 1.0031323432922363,grad_norm: 0.999999106425797, iteration: 79135
loss: 1.0320037603378296,grad_norm: 0.9999995140319418, iteration: 79136
loss: 0.9850577712059021,grad_norm: 0.9999994286119909, iteration: 79137
loss: 0.9715598821640015,grad_norm: 0.9969702979082181, iteration: 79138
loss: 0.998658299446106,grad_norm: 0.9416574179458063, iteration: 79139
loss: 1.0632985830307007,grad_norm: 0.9999995120209015, iteration: 79140
loss: 1.0550200939178467,grad_norm: 0.9811955527366121, iteration: 79141
loss: 1.0048468112945557,grad_norm: 0.9999993261082064, iteration: 79142
loss: 1.0071732997894287,grad_norm: 0.9999993535696747, iteration: 79143
loss: 1.06689453125,grad_norm: 1.0000001023194804, iteration: 79144
loss: 1.029366135597229,grad_norm: 0.9999994388541359, iteration: 79145
loss: 1.0007333755493164,grad_norm: 0.9999990772215104, iteration: 79146
loss: 1.0184553861618042,grad_norm: 0.882372783798797, iteration: 79147
loss: 1.0428447723388672,grad_norm: 0.9999996004186977, iteration: 79148
loss: 1.0329828262329102,grad_norm: 0.9220958376740758, iteration: 79149
loss: 0.9669926762580872,grad_norm: 0.9999991236203533, iteration: 79150
loss: 1.0041714906692505,grad_norm: 0.9999990600119051, iteration: 79151
loss: 0.9954002499580383,grad_norm: 0.9999991418120383, iteration: 79152
loss: 1.0004838705062866,grad_norm: 0.9999990398027078, iteration: 79153
loss: 1.058426022529602,grad_norm: 1.0000000561786004, iteration: 79154
loss: 1.0134477615356445,grad_norm: 0.9999993186844756, iteration: 79155
loss: 0.9938766360282898,grad_norm: 0.9999991683842621, iteration: 79156
loss: 1.0217894315719604,grad_norm: 0.9999989848233125, iteration: 79157
loss: 0.9991686344146729,grad_norm: 0.999999007791622, iteration: 79158
loss: 0.9697861671447754,grad_norm: 0.9999991610466016, iteration: 79159
loss: 0.9945787787437439,grad_norm: 0.8763350623293643, iteration: 79160
loss: 1.005372166633606,grad_norm: 0.9999997510787692, iteration: 79161
loss: 1.0390852689743042,grad_norm: 0.9999990521772759, iteration: 79162
loss: 1.0259145498275757,grad_norm: 0.9999991720969553, iteration: 79163
loss: 1.0256280899047852,grad_norm: 0.9462280646740758, iteration: 79164
loss: 0.9954979419708252,grad_norm: 0.999999063887301, iteration: 79165
loss: 0.9708859920501709,grad_norm: 0.9999991710897229, iteration: 79166
loss: 1.0374289751052856,grad_norm: 0.9849816980607022, iteration: 79167
loss: 1.0913608074188232,grad_norm: 0.9999992591226271, iteration: 79168
loss: 1.0129148960113525,grad_norm: 0.9999990737094681, iteration: 79169
loss: 1.011858582496643,grad_norm: 0.9999991605179506, iteration: 79170
loss: 0.981396496295929,grad_norm: 0.9204614899660586, iteration: 79171
loss: 0.9752998948097229,grad_norm: 0.9999989314183937, iteration: 79172
loss: 0.9987725615501404,grad_norm: 0.9867239643130007, iteration: 79173
loss: 0.9995971322059631,grad_norm: 0.9055258133764359, iteration: 79174
loss: 0.9836524128913879,grad_norm: 0.9895073609844762, iteration: 79175
loss: 0.9826587438583374,grad_norm: 0.9304364623434501, iteration: 79176
loss: 0.9930434226989746,grad_norm: 0.9247356023236457, iteration: 79177
loss: 1.0394724607467651,grad_norm: 0.9999992247879748, iteration: 79178
loss: 1.0137732028961182,grad_norm: 0.9133933588717538, iteration: 79179
loss: 0.9906340837478638,grad_norm: 0.9207226428977171, iteration: 79180
loss: 0.9966716766357422,grad_norm: 0.9999992321929905, iteration: 79181
loss: 0.9756842851638794,grad_norm: 0.9999991553432276, iteration: 79182
loss: 1.0194398164749146,grad_norm: 0.999999897297141, iteration: 79183
loss: 0.9997143745422363,grad_norm: 0.9999990397674987, iteration: 79184
loss: 0.9783778190612793,grad_norm: 0.9131386246374992, iteration: 79185
loss: 1.0093979835510254,grad_norm: 0.9070398077632084, iteration: 79186
loss: 1.0121710300445557,grad_norm: 0.957940623833256, iteration: 79187
loss: 1.0189396142959595,grad_norm: 0.9958706199701491, iteration: 79188
loss: 1.0229501724243164,grad_norm: 0.9999990581388737, iteration: 79189
loss: 1.0208745002746582,grad_norm: 0.999999158276122, iteration: 79190
loss: 0.9688737392425537,grad_norm: 0.9999991617913161, iteration: 79191
loss: 1.0181550979614258,grad_norm: 0.9999992546473767, iteration: 79192
loss: 1.017617106437683,grad_norm: 0.9999991244844209, iteration: 79193
loss: 0.9837132096290588,grad_norm: 0.9599514284579083, iteration: 79194
loss: 0.9914984703063965,grad_norm: 0.9999990680283224, iteration: 79195
loss: 0.9565642476081848,grad_norm: 0.9999989190515036, iteration: 79196
loss: 1.0194154977798462,grad_norm: 0.9999991260687591, iteration: 79197
loss: 0.9736735820770264,grad_norm: 0.9999990209167363, iteration: 79198
loss: 0.9962060451507568,grad_norm: 0.9999992162755286, iteration: 79199
loss: 1.0162322521209717,grad_norm: 0.999999102905091, iteration: 79200
loss: 1.0023422241210938,grad_norm: 0.9999990586911074, iteration: 79201
loss: 0.9901914596557617,grad_norm: 0.9999990714589643, iteration: 79202
loss: 0.998923122882843,grad_norm: 0.8342358676671313, iteration: 79203
loss: 0.980461597442627,grad_norm: 0.9999992347178146, iteration: 79204
loss: 1.0572423934936523,grad_norm: 0.9999995102668406, iteration: 79205
loss: 1.00941002368927,grad_norm: 0.9553006808416009, iteration: 79206
loss: 0.97538822889328,grad_norm: 0.9292041472110885, iteration: 79207
loss: 1.0210732221603394,grad_norm: 0.9159212189958317, iteration: 79208
loss: 1.0071418285369873,grad_norm: 0.9999990640009557, iteration: 79209
loss: 0.9814184308052063,grad_norm: 0.9999991751413951, iteration: 79210
loss: 1.0141483545303345,grad_norm: 0.9999989617116251, iteration: 79211
loss: 1.0163484811782837,grad_norm: 0.9018468773218872, iteration: 79212
loss: 0.9929186105728149,grad_norm: 0.9999992321913687, iteration: 79213
loss: 0.979787290096283,grad_norm: 0.9999991260539646, iteration: 79214
loss: 1.0054882764816284,grad_norm: 0.9360356107535031, iteration: 79215
loss: 0.9770175218582153,grad_norm: 0.9999991701687816, iteration: 79216
loss: 1.0351369380950928,grad_norm: 0.9999991188490591, iteration: 79217
loss: 0.9886480569839478,grad_norm: 0.9999992067702382, iteration: 79218
loss: 1.0023497343063354,grad_norm: 0.9999990804820578, iteration: 79219
loss: 1.0427846908569336,grad_norm: 0.9999991364862674, iteration: 79220
loss: 0.9612213969230652,grad_norm: 0.9999990727576578, iteration: 79221
loss: 1.0098737478256226,grad_norm: 0.9999991946552288, iteration: 79222
loss: 0.9591586589813232,grad_norm: 0.9078130961646279, iteration: 79223
loss: 1.0203748941421509,grad_norm: 0.977704669871449, iteration: 79224
loss: 0.9977284669876099,grad_norm: 0.9999991526011169, iteration: 79225
loss: 1.0278509855270386,grad_norm: 0.9999993129879455, iteration: 79226
loss: 1.026842713356018,grad_norm: 0.9999994091529081, iteration: 79227
loss: 1.0301834344863892,grad_norm: 0.9999992471735845, iteration: 79228
loss: 1.028384804725647,grad_norm: 0.9999991698795403, iteration: 79229
loss: 1.044318675994873,grad_norm: 0.9999990675248693, iteration: 79230
loss: 1.0049042701721191,grad_norm: 0.9999991031291232, iteration: 79231
loss: 1.0200979709625244,grad_norm: 0.9999994809858301, iteration: 79232
loss: 1.0275611877441406,grad_norm: 0.9999990875843066, iteration: 79233
loss: 0.9840713143348694,grad_norm: 0.9999992011362775, iteration: 79234
loss: 1.0490301847457886,grad_norm: 0.9999992894571034, iteration: 79235
loss: 1.0222748517990112,grad_norm: 0.9999991325906471, iteration: 79236
loss: 0.9688549041748047,grad_norm: 0.9227107476501302, iteration: 79237
loss: 1.059259295463562,grad_norm: 0.9999991771814032, iteration: 79238
loss: 0.9669976830482483,grad_norm: 0.9848350262791306, iteration: 79239
loss: 1.0425605773925781,grad_norm: 0.9999995814072237, iteration: 79240
loss: 0.9969167113304138,grad_norm: 0.9999991607162739, iteration: 79241
loss: 0.9919113516807556,grad_norm: 0.9825675548623654, iteration: 79242
loss: 1.0213639736175537,grad_norm: 0.9999995990537975, iteration: 79243
loss: 1.0355015993118286,grad_norm: 0.9399449347457943, iteration: 79244
loss: 1.036018967628479,grad_norm: 0.9999993228027577, iteration: 79245
loss: 1.0072662830352783,grad_norm: 0.9999991849520892, iteration: 79246
loss: 1.0049614906311035,grad_norm: 0.9999990756258362, iteration: 79247
loss: 1.004723310470581,grad_norm: 0.9999993068222807, iteration: 79248
loss: 0.9832846522331238,grad_norm: 0.9872078603829002, iteration: 79249
loss: 1.1022629737854004,grad_norm: 0.9999991397924642, iteration: 79250
loss: 1.0167871713638306,grad_norm: 0.8554020788214295, iteration: 79251
loss: 0.9845874309539795,grad_norm: 0.9999989209351757, iteration: 79252
loss: 0.9964364767074585,grad_norm: 0.9108484052715663, iteration: 79253
loss: 0.9665547609329224,grad_norm: 0.9999990863501759, iteration: 79254
loss: 0.9634200930595398,grad_norm: 0.9074814563325092, iteration: 79255
loss: 1.0112831592559814,grad_norm: 0.9999989541218018, iteration: 79256
loss: 1.008336067199707,grad_norm: 0.9999990197286452, iteration: 79257
loss: 1.0394082069396973,grad_norm: 0.9999997071364753, iteration: 79258
loss: 0.9849435687065125,grad_norm: 0.9999989731423354, iteration: 79259
loss: 1.0045192241668701,grad_norm: 0.9999991020542708, iteration: 79260
loss: 1.0045621395111084,grad_norm: 0.9999992542515772, iteration: 79261
loss: 1.0036718845367432,grad_norm: 0.9870567687000381, iteration: 79262
loss: 0.9809560775756836,grad_norm: 0.9999990186762068, iteration: 79263
loss: 1.0407850742340088,grad_norm: 0.9999991003213372, iteration: 79264
loss: 0.9938328266143799,grad_norm: 0.9999991887846773, iteration: 79265
loss: 0.9761152267456055,grad_norm: 0.9999992389038705, iteration: 79266
loss: 1.0035507678985596,grad_norm: 0.9999991965923883, iteration: 79267
loss: 0.9745502471923828,grad_norm: 0.9999990788405171, iteration: 79268
loss: 0.9838099479675293,grad_norm: 0.9999998210607599, iteration: 79269
loss: 0.9648745059967041,grad_norm: 0.9693769361227166, iteration: 79270
loss: 0.9845139980316162,grad_norm: 0.9999990591186727, iteration: 79271
loss: 1.0302220582962036,grad_norm: 0.984919757690974, iteration: 79272
loss: 1.0153815746307373,grad_norm: 0.9999991033756269, iteration: 79273
loss: 0.9851031303405762,grad_norm: 0.9662964411301336, iteration: 79274
loss: 0.9810099601745605,grad_norm: 0.9999991416834976, iteration: 79275
loss: 0.9686269164085388,grad_norm: 0.999999210637065, iteration: 79276
loss: 0.9944792985916138,grad_norm: 0.9999992330545738, iteration: 79277
loss: 1.0170735120773315,grad_norm: 0.9358040817419031, iteration: 79278
loss: 1.0298216342926025,grad_norm: 0.9999989224799304, iteration: 79279
loss: 0.9748883843421936,grad_norm: 0.9561991094429033, iteration: 79280
loss: 0.9637145400047302,grad_norm: 0.9999994357450435, iteration: 79281
loss: 0.9897286891937256,grad_norm: 0.8948261533451417, iteration: 79282
loss: 0.9703248739242554,grad_norm: 0.9889724345335853, iteration: 79283
loss: 1.0164196491241455,grad_norm: 0.9603441407839731, iteration: 79284
loss: 0.9845649600028992,grad_norm: 0.9839384698638273, iteration: 79285
loss: 1.04234778881073,grad_norm: 0.9756953161574833, iteration: 79286
loss: 1.0091121196746826,grad_norm: 0.9055191626796483, iteration: 79287
loss: 0.9525362253189087,grad_norm: 0.9574762385795051, iteration: 79288
loss: 0.9919260740280151,grad_norm: 0.9999992159653363, iteration: 79289
loss: 1.0052449703216553,grad_norm: 0.999999182416945, iteration: 79290
loss: 0.9909907579421997,grad_norm: 0.9528996999141524, iteration: 79291
loss: 0.9859217405319214,grad_norm: 0.9245719623976398, iteration: 79292
loss: 1.0314476490020752,grad_norm: 0.9999990339636471, iteration: 79293
loss: 1.0047258138656616,grad_norm: 0.8556162277412233, iteration: 79294
loss: 1.009238839149475,grad_norm: 0.9094284503899183, iteration: 79295
loss: 1.0232470035552979,grad_norm: 0.9999991201111533, iteration: 79296
loss: 1.0014992952346802,grad_norm: 0.9999990706521215, iteration: 79297
loss: 0.9936622977256775,grad_norm: 0.9745986066576539, iteration: 79298
loss: 1.0080764293670654,grad_norm: 0.9999991583122773, iteration: 79299
loss: 1.0221492052078247,grad_norm: 0.9999993471975579, iteration: 79300
loss: 0.9877746105194092,grad_norm: 0.99999894499919, iteration: 79301
loss: 1.0005841255187988,grad_norm: 0.9999990315005274, iteration: 79302
loss: 1.0166133642196655,grad_norm: 0.9999999582631688, iteration: 79303
loss: 1.0553120374679565,grad_norm: 0.9628755273512468, iteration: 79304
loss: 1.0042520761489868,grad_norm: 0.9999991765247636, iteration: 79305
loss: 0.9457799196243286,grad_norm: 0.9884253945743833, iteration: 79306
loss: 1.0147188901901245,grad_norm: 0.999999439883625, iteration: 79307
loss: 1.0201753377914429,grad_norm: 0.9999992218844951, iteration: 79308
loss: 1.0327401161193848,grad_norm: 0.9027574107081047, iteration: 79309
loss: 0.969170093536377,grad_norm: 0.9999991298367704, iteration: 79310
loss: 1.0185669660568237,grad_norm: 0.871976516280616, iteration: 79311
loss: 1.095353364944458,grad_norm: 0.9999990388842761, iteration: 79312
loss: 1.0416843891143799,grad_norm: 0.996613815252933, iteration: 79313
loss: 0.991104245185852,grad_norm: 0.9999992199850268, iteration: 79314
loss: 0.9836156964302063,grad_norm: 0.9999991713358809, iteration: 79315
loss: 0.9818423390388489,grad_norm: 0.9999991575659131, iteration: 79316
loss: 0.9752166867256165,grad_norm: 0.8327577185593777, iteration: 79317
loss: 0.9536523818969727,grad_norm: 0.9999991896576769, iteration: 79318
loss: 1.0001314878463745,grad_norm: 0.9999991141995829, iteration: 79319
loss: 1.008489966392517,grad_norm: 0.9999996552505795, iteration: 79320
loss: 0.9955052137374878,grad_norm: 0.8906485639116442, iteration: 79321
loss: 1.021458387374878,grad_norm: 0.9208080484055654, iteration: 79322
loss: 1.0466125011444092,grad_norm: 0.9999997635403045, iteration: 79323
loss: 1.0241243839263916,grad_norm: 0.9999990650617382, iteration: 79324
loss: 1.0092296600341797,grad_norm: 0.9999992623475725, iteration: 79325
loss: 1.0228271484375,grad_norm: 0.9385551754982239, iteration: 79326
loss: 1.0221935510635376,grad_norm: 0.9242192324045152, iteration: 79327
loss: 0.988034188747406,grad_norm: 0.897513579410134, iteration: 79328
loss: 0.9904839396476746,grad_norm: 0.9999993311213197, iteration: 79329
loss: 1.0297573804855347,grad_norm: 0.9999991684741081, iteration: 79330
loss: 0.9785980582237244,grad_norm: 0.9999990922114346, iteration: 79331
loss: 0.9941791296005249,grad_norm: 0.9999993197792285, iteration: 79332
loss: 0.9889021515846252,grad_norm: 0.9999994133162703, iteration: 79333
loss: 0.9744324684143066,grad_norm: 0.9999991761680491, iteration: 79334
loss: 0.9954109191894531,grad_norm: 0.9999990176877136, iteration: 79335
loss: 1.0211703777313232,grad_norm: 0.9999992765341266, iteration: 79336
loss: 1.0495308637619019,grad_norm: 0.9961442257000338, iteration: 79337
loss: 1.0234086513519287,grad_norm: 0.9461595892137744, iteration: 79338
loss: 1.0074453353881836,grad_norm: 0.9718829054894375, iteration: 79339
loss: 0.9832428097724915,grad_norm: 0.9999992040319439, iteration: 79340
loss: 0.9985415935516357,grad_norm: 0.999998937505258, iteration: 79341
loss: 0.9887121319770813,grad_norm: 0.9931913613036065, iteration: 79342
loss: 1.0301131010055542,grad_norm: 0.9999991752212585, iteration: 79343
loss: 0.9942286610603333,grad_norm: 0.9999991570971047, iteration: 79344
loss: 0.9939905405044556,grad_norm: 0.9999991452039838, iteration: 79345
loss: 0.9705731868743896,grad_norm: 0.9999991136859299, iteration: 79346
loss: 1.0213840007781982,grad_norm: 0.9999992203655121, iteration: 79347
loss: 1.0086071491241455,grad_norm: 0.9999991985928125, iteration: 79348
loss: 1.0328289270401,grad_norm: 0.9999998394926404, iteration: 79349
loss: 1.0088145732879639,grad_norm: 0.9999990079263766, iteration: 79350
loss: 1.035691261291504,grad_norm: 0.9999990289184384, iteration: 79351
loss: 1.0167309045791626,grad_norm: 0.9999996707682205, iteration: 79352
loss: 0.9914085865020752,grad_norm: 0.9999992292526534, iteration: 79353
loss: 0.9850678443908691,grad_norm: 0.9877494902429904, iteration: 79354
loss: 1.0145405530929565,grad_norm: 0.7954957770211402, iteration: 79355
loss: 0.9773508906364441,grad_norm: 0.9686771819901325, iteration: 79356
loss: 1.0133980512619019,grad_norm: 0.9999992558063554, iteration: 79357
loss: 1.0194387435913086,grad_norm: 0.9570817336123983, iteration: 79358
loss: 1.0335843563079834,grad_norm: 0.9999992535211883, iteration: 79359
loss: 1.017496943473816,grad_norm: 0.9999993012608839, iteration: 79360
loss: 0.9463745951652527,grad_norm: 0.9999991503164236, iteration: 79361
loss: 1.0113017559051514,grad_norm: 0.9999989719263015, iteration: 79362
loss: 0.9757852554321289,grad_norm: 0.9999991317719517, iteration: 79363
loss: 1.0226655006408691,grad_norm: 0.9999992939709905, iteration: 79364
loss: 0.9965019226074219,grad_norm: 0.9999990556780539, iteration: 79365
loss: 0.9969205856323242,grad_norm: 0.9999991226112809, iteration: 79366
loss: 0.992889404296875,grad_norm: 0.9676155244964978, iteration: 79367
loss: 1.042685627937317,grad_norm: 0.999998990261526, iteration: 79368
loss: 0.9927120208740234,grad_norm: 0.9999990559476117, iteration: 79369
loss: 0.9866346716880798,grad_norm: 0.9999991488899336, iteration: 79370
loss: 0.9641979932785034,grad_norm: 0.9999991166953056, iteration: 79371
loss: 1.077612280845642,grad_norm: 0.9999994623454329, iteration: 79372
loss: 0.9792953133583069,grad_norm: 0.9328770381283469, iteration: 79373
loss: 0.9961630702018738,grad_norm: 0.9999992108497071, iteration: 79374
loss: 1.0585989952087402,grad_norm: 0.9999999101563959, iteration: 79375
loss: 0.9891690015792847,grad_norm: 0.9999991931355579, iteration: 79376
loss: 1.0212342739105225,grad_norm: 0.9593897817921613, iteration: 79377
loss: 1.0272982120513916,grad_norm: 0.9811849420675267, iteration: 79378
loss: 0.9988162517547607,grad_norm: 0.9999992394260724, iteration: 79379
loss: 0.9706773161888123,grad_norm: 0.9999991359861179, iteration: 79380
loss: 0.9840494394302368,grad_norm: 0.9999989450268715, iteration: 79381
loss: 0.9894725680351257,grad_norm: 0.9126823710387563, iteration: 79382
loss: 1.0202807188034058,grad_norm: 0.9503576910557439, iteration: 79383
loss: 1.033751130104065,grad_norm: 0.9999994185918515, iteration: 79384
loss: 1.0374528169631958,grad_norm: 0.999999081503261, iteration: 79385
loss: 0.9661235809326172,grad_norm: 0.9429203220268857, iteration: 79386
loss: 0.9777628183364868,grad_norm: 0.9999991910388462, iteration: 79387
loss: 0.9929996132850647,grad_norm: 0.9477956345050743, iteration: 79388
loss: 1.035659909248352,grad_norm: 0.9999991998964685, iteration: 79389
loss: 0.9849860072135925,grad_norm: 0.9999991756576905, iteration: 79390
loss: 0.9916383624076843,grad_norm: 0.9999993572078221, iteration: 79391
loss: 0.9532078504562378,grad_norm: 0.9999990465354439, iteration: 79392
loss: 1.005068063735962,grad_norm: 0.9999990792990264, iteration: 79393
loss: 1.0216270685195923,grad_norm: 0.9999990639516173, iteration: 79394
loss: 1.0444812774658203,grad_norm: 0.9999998637279166, iteration: 79395
loss: 1.0366684198379517,grad_norm: 0.999998988861623, iteration: 79396
loss: 0.994611918926239,grad_norm: 0.9999991288216538, iteration: 79397
loss: 0.9972516894340515,grad_norm: 0.8116741926779442, iteration: 79398
loss: 1.0311585664749146,grad_norm: 0.9999991245313936, iteration: 79399
loss: 1.051931619644165,grad_norm: 0.9999993840308097, iteration: 79400
loss: 1.0321900844573975,grad_norm: 0.9999994665589632, iteration: 79401
loss: 1.0223913192749023,grad_norm: 0.9450640541679117, iteration: 79402
loss: 1.0024218559265137,grad_norm: 0.999999141819465, iteration: 79403
loss: 1.012548804283142,grad_norm: 0.9999990985090097, iteration: 79404
loss: 1.029036045074463,grad_norm: 0.9999991441069235, iteration: 79405
loss: 1.0112584829330444,grad_norm: 0.9999990349421982, iteration: 79406
loss: 0.9594658613204956,grad_norm: 0.9999991578669825, iteration: 79407
loss: 0.9831218123435974,grad_norm: 0.9999991579269093, iteration: 79408
loss: 1.0484750270843506,grad_norm: 0.8978009192339982, iteration: 79409
loss: 0.9711123704910278,grad_norm: 0.8349297532931552, iteration: 79410
loss: 0.9905237555503845,grad_norm: 0.9771622646558713, iteration: 79411
loss: 1.0013130903244019,grad_norm: 0.9268941187246305, iteration: 79412
loss: 1.0132280588150024,grad_norm: 0.9999991980808114, iteration: 79413
loss: 1.023655652999878,grad_norm: 0.9999991836236554, iteration: 79414
loss: 1.0046491622924805,grad_norm: 0.99999919675292, iteration: 79415
loss: 1.1382144689559937,grad_norm: 0.9999995874602636, iteration: 79416
loss: 1.0020897388458252,grad_norm: 0.9866964690650402, iteration: 79417
loss: 0.9971093535423279,grad_norm: 0.9822858180009815, iteration: 79418
loss: 0.9793152809143066,grad_norm: 0.9986013041309812, iteration: 79419
loss: 0.9931633472442627,grad_norm: 0.9999991152851911, iteration: 79420
loss: 1.004096269607544,grad_norm: 0.9499985854999821, iteration: 79421
loss: 1.0520631074905396,grad_norm: 0.9999994296729189, iteration: 79422
loss: 1.007735252380371,grad_norm: 0.999999185429816, iteration: 79423
loss: 0.9802498817443848,grad_norm: 0.9999991825194688, iteration: 79424
loss: 1.0016365051269531,grad_norm: 0.9999993254581648, iteration: 79425
loss: 0.9880098700523376,grad_norm: 0.9999991261853485, iteration: 79426
loss: 0.9959052205085754,grad_norm: 0.9999991901742254, iteration: 79427
loss: 1.0258667469024658,grad_norm: 0.9999993959238336, iteration: 79428
loss: 0.9883930683135986,grad_norm: 0.999999866896374, iteration: 79429
loss: 0.988093376159668,grad_norm: 0.9999992021226338, iteration: 79430
loss: 0.981834352016449,grad_norm: 0.9999992658961476, iteration: 79431
loss: 0.9919935464859009,grad_norm: 0.9999990945547895, iteration: 79432
loss: 0.9918100237846375,grad_norm: 0.9588941580067586, iteration: 79433
loss: 1.0034637451171875,grad_norm: 0.9999991154590926, iteration: 79434
loss: 1.001857042312622,grad_norm: 0.99999922783755, iteration: 79435
loss: 1.0051312446594238,grad_norm: 0.9999991876215862, iteration: 79436
loss: 0.9995469450950623,grad_norm: 0.9999990781865683, iteration: 79437
loss: 0.9995545148849487,grad_norm: 0.9999990684817199, iteration: 79438
loss: 0.9804703593254089,grad_norm: 0.9999990827111702, iteration: 79439
loss: 0.9827051162719727,grad_norm: 0.9999991754212562, iteration: 79440
loss: 1.0131185054779053,grad_norm: 0.9999991999446397, iteration: 79441
loss: 1.0234076976776123,grad_norm: 0.9999992848297118, iteration: 79442
loss: 1.0033529996871948,grad_norm: 0.9999993217572806, iteration: 79443
loss: 0.9990156292915344,grad_norm: 0.9999989857679971, iteration: 79444
loss: 1.0181573629379272,grad_norm: 0.9999990439050854, iteration: 79445
loss: 0.991143524646759,grad_norm: 0.9999991238198821, iteration: 79446
loss: 1.0611889362335205,grad_norm: 0.9999995434594231, iteration: 79447
loss: 0.98211669921875,grad_norm: 0.9999991817116017, iteration: 79448
loss: 0.9902383685112,grad_norm: 0.9999992388044324, iteration: 79449
loss: 1.0340418815612793,grad_norm: 0.9999990070600676, iteration: 79450
loss: 1.0052638053894043,grad_norm: 0.9999993117435612, iteration: 79451
loss: 1.0028918981552124,grad_norm: 0.9999992058671288, iteration: 79452
loss: 0.9893062114715576,grad_norm: 0.8236417475539711, iteration: 79453
loss: 0.9987810254096985,grad_norm: 0.9999998732187673, iteration: 79454
loss: 1.04848313331604,grad_norm: 0.999999223034643, iteration: 79455
loss: 1.1876569986343384,grad_norm: 0.9999994219194819, iteration: 79456
loss: 0.9755614399909973,grad_norm: 0.9999990636432526, iteration: 79457
loss: 0.9804693460464478,grad_norm: 0.9327684043711928, iteration: 79458
loss: 1.0073312520980835,grad_norm: 0.999999309281407, iteration: 79459
loss: 0.9976953864097595,grad_norm: 0.9999994404062256, iteration: 79460
loss: 1.055673360824585,grad_norm: 0.9999993643877036, iteration: 79461
loss: 1.0107022523880005,grad_norm: 0.9999990681784705, iteration: 79462
loss: 0.9942540526390076,grad_norm: 0.9999991853768988, iteration: 79463
loss: 1.0116037130355835,grad_norm: 0.9999993077670486, iteration: 79464
loss: 0.9779035449028015,grad_norm: 0.9999990679900551, iteration: 79465
loss: 0.993675708770752,grad_norm: 0.8560749434826057, iteration: 79466
loss: 0.9838721752166748,grad_norm: 0.9999992800259928, iteration: 79467
loss: 0.9683109521865845,grad_norm: 0.8795819261108417, iteration: 79468
loss: 0.9974634647369385,grad_norm: 0.9460813903264355, iteration: 79469
loss: 0.9990530610084534,grad_norm: 0.9999992172686751, iteration: 79470
loss: 1.0106480121612549,grad_norm: 0.9999992468889095, iteration: 79471
loss: 1.013365387916565,grad_norm: 0.9024326443725277, iteration: 79472
loss: 0.9908534288406372,grad_norm: 0.9320479328168642, iteration: 79473
loss: 0.9788070321083069,grad_norm: 0.9271369285712725, iteration: 79474
loss: 0.9700374007225037,grad_norm: 0.9085886912985897, iteration: 79475
loss: 0.9713466167449951,grad_norm: 0.9487310411920448, iteration: 79476
loss: 1.0235587358474731,grad_norm: 0.9999990507188556, iteration: 79477
loss: 1.0218253135681152,grad_norm: 0.9999990595869999, iteration: 79478
loss: 0.996982991695404,grad_norm: 0.9999991781283326, iteration: 79479
loss: 0.9788106083869934,grad_norm: 0.9762888427297886, iteration: 79480
loss: 0.9728527665138245,grad_norm: 0.9999989431241281, iteration: 79481
loss: 0.9871383905410767,grad_norm: 0.9999991635957988, iteration: 79482
loss: 1.0082274675369263,grad_norm: 0.9240186476588035, iteration: 79483
loss: 1.0158270597457886,grad_norm: 0.999998912030252, iteration: 79484
loss: 1.0102983713150024,grad_norm: 0.9999989793027135, iteration: 79485
loss: 1.020784854888916,grad_norm: 0.9999990897208981, iteration: 79486
loss: 0.9802953600883484,grad_norm: 0.9999990892449768, iteration: 79487
loss: 1.0354286432266235,grad_norm: 0.959283551163776, iteration: 79488
loss: 1.027942419052124,grad_norm: 0.999999096846663, iteration: 79489
loss: 1.0238113403320312,grad_norm: 0.9999991438894559, iteration: 79490
loss: 1.038461685180664,grad_norm: 0.9043534914717682, iteration: 79491
loss: 1.0125138759613037,grad_norm: 0.9205697148079993, iteration: 79492
loss: 1.035563349723816,grad_norm: 0.99999918526701, iteration: 79493
loss: 0.9862457513809204,grad_norm: 0.9999990922526919, iteration: 79494
loss: 0.9686756134033203,grad_norm: 0.8693222019763653, iteration: 79495
loss: 0.9778196811676025,grad_norm: 0.999999127402929, iteration: 79496
loss: 0.9845482707023621,grad_norm: 0.9999990848228971, iteration: 79497
loss: 1.0483359098434448,grad_norm: 0.9989312033015294, iteration: 79498
loss: 1.0032782554626465,grad_norm: 0.9451161054081376, iteration: 79499
loss: 1.014864444732666,grad_norm: 0.8510078929919369, iteration: 79500
loss: 0.9739137887954712,grad_norm: 0.962360396224648, iteration: 79501
loss: 0.9986793994903564,grad_norm: 0.8745891371713528, iteration: 79502
loss: 0.9687702655792236,grad_norm: 0.9584413804337643, iteration: 79503
loss: 1.0145913362503052,grad_norm: 0.9147711658801584, iteration: 79504
loss: 1.0409096479415894,grad_norm: 0.9999992473961627, iteration: 79505
loss: 1.0078164339065552,grad_norm: 0.8902243365302351, iteration: 79506
loss: 1.009520173072815,grad_norm: 0.9999993737213689, iteration: 79507
loss: 0.9677579998970032,grad_norm: 0.9405823716883861, iteration: 79508
loss: 0.9592779278755188,grad_norm: 0.9852113348951598, iteration: 79509
loss: 0.9958614706993103,grad_norm: 0.9999996126423709, iteration: 79510
loss: 1.0259442329406738,grad_norm: 0.9999991543266801, iteration: 79511
loss: 1.0144057273864746,grad_norm: 0.9999991710931254, iteration: 79512
loss: 0.9946500658988953,grad_norm: 0.9999991163424952, iteration: 79513
loss: 0.9922174215316772,grad_norm: 0.9749149335284645, iteration: 79514
loss: 0.9994139671325684,grad_norm: 0.9999991473652409, iteration: 79515
loss: 1.0108449459075928,grad_norm: 0.830744355545696, iteration: 79516
loss: 0.9896948933601379,grad_norm: 0.9999992281995317, iteration: 79517
loss: 0.9892208576202393,grad_norm: 0.961883956874151, iteration: 79518
loss: 1.0070719718933105,grad_norm: 0.9999991676933172, iteration: 79519
loss: 0.9925107359886169,grad_norm: 0.9999992473251329, iteration: 79520
loss: 0.9950251579284668,grad_norm: 0.999999006438227, iteration: 79521
loss: 1.0009639263153076,grad_norm: 0.9999992911915493, iteration: 79522
loss: 1.035849928855896,grad_norm: 0.9999998594478929, iteration: 79523
loss: 0.9963388442993164,grad_norm: 0.999999079160981, iteration: 79524
loss: 1.000587821006775,grad_norm: 0.9999991349125957, iteration: 79525
loss: 1.032150387763977,grad_norm: 0.9999991133129379, iteration: 79526
loss: 1.0014721155166626,grad_norm: 0.9999991683244407, iteration: 79527
loss: 1.008238434791565,grad_norm: 0.9457209315645423, iteration: 79528
loss: 1.0196869373321533,grad_norm: 0.9999990941053998, iteration: 79529
loss: 1.035228967666626,grad_norm: 0.9999993632539456, iteration: 79530
loss: 1.041057825088501,grad_norm: 0.9999991903229517, iteration: 79531
loss: 0.985504686832428,grad_norm: 0.868405877785704, iteration: 79532
loss: 1.0153896808624268,grad_norm: 0.9999990577347916, iteration: 79533
loss: 1.0051305294036865,grad_norm: 0.9999991104824311, iteration: 79534
loss: 0.9848120808601379,grad_norm: 0.9999994147167313, iteration: 79535
loss: 1.000803828239441,grad_norm: 0.9494210404103444, iteration: 79536
loss: 0.983775794506073,grad_norm: 0.968138223896075, iteration: 79537
loss: 1.0071512460708618,grad_norm: 0.9999992091150807, iteration: 79538
loss: 0.9931084513664246,grad_norm: 0.9999991028186132, iteration: 79539
loss: 1.0043113231658936,grad_norm: 0.999999225228393, iteration: 79540
loss: 1.0016881227493286,grad_norm: 0.8812298529461238, iteration: 79541
loss: 0.9977189898490906,grad_norm: 0.9924510959299332, iteration: 79542
loss: 0.9791606068611145,grad_norm: 0.9999991071419663, iteration: 79543
loss: 1.0241092443466187,grad_norm: 0.9999992989014098, iteration: 79544
loss: 0.9994001984596252,grad_norm: 0.8782416209973065, iteration: 79545
loss: 1.0405789613723755,grad_norm: 0.9999989692862846, iteration: 79546
loss: 1.0027836561203003,grad_norm: 0.9999989861784251, iteration: 79547
loss: 0.9819440841674805,grad_norm: 0.9740179957663245, iteration: 79548
loss: 0.9881710410118103,grad_norm: 0.9999991695996359, iteration: 79549
loss: 1.0166698694229126,grad_norm: 0.9999990960549303, iteration: 79550
loss: 1.0131691694259644,grad_norm: 0.9999992564076635, iteration: 79551
loss: 1.025153636932373,grad_norm: 0.9631187660269684, iteration: 79552
loss: 0.9876466989517212,grad_norm: 0.9999991016817772, iteration: 79553
loss: 1.0029122829437256,grad_norm: 0.9327009788557485, iteration: 79554
loss: 1.0229350328445435,grad_norm: 0.9999994293898252, iteration: 79555
loss: 1.0272513628005981,grad_norm: 0.9999991136134921, iteration: 79556
loss: 0.9786115288734436,grad_norm: 0.977431949380475, iteration: 79557
loss: 0.9949859380722046,grad_norm: 0.9999991810893734, iteration: 79558
loss: 0.9905345439910889,grad_norm: 0.8935357613252932, iteration: 79559
loss: 0.9914994239807129,grad_norm: 0.9413756810355463, iteration: 79560
loss: 0.9947101473808289,grad_norm: 0.9769903329522451, iteration: 79561
loss: 1.0298019647598267,grad_norm: 0.9999991146462451, iteration: 79562
loss: 1.0012518167495728,grad_norm: 0.9719592562864654, iteration: 79563
loss: 1.011507511138916,grad_norm: 0.9999993161880415, iteration: 79564
loss: 1.0232669115066528,grad_norm: 0.8757743565299351, iteration: 79565
loss: 1.0124402046203613,grad_norm: 0.9804923940429087, iteration: 79566
loss: 0.9998520612716675,grad_norm: 0.9080579355302754, iteration: 79567
loss: 1.0298112630844116,grad_norm: 0.9999993283846192, iteration: 79568
loss: 0.9672260880470276,grad_norm: 0.9999991314652338, iteration: 79569
loss: 0.9994131922721863,grad_norm: 0.9999990155355127, iteration: 79570
loss: 1.0071910619735718,grad_norm: 0.971216015684134, iteration: 79571
loss: 0.9846969842910767,grad_norm: 0.8310549161715272, iteration: 79572
loss: 0.9847408533096313,grad_norm: 0.999999003436354, iteration: 79573
loss: 0.9739983677864075,grad_norm: 0.9999989302704453, iteration: 79574
loss: 0.9985157251358032,grad_norm: 0.9999989243214807, iteration: 79575
loss: 0.997925341129303,grad_norm: 0.9999994292024378, iteration: 79576
loss: 0.9988102912902832,grad_norm: 0.9999992205521229, iteration: 79577
loss: 1.000796914100647,grad_norm: 0.9999989526238781, iteration: 79578
loss: 1.0327779054641724,grad_norm: 0.9999991821600254, iteration: 79579
loss: 1.0527353286743164,grad_norm: 0.9999995142607836, iteration: 79580
loss: 0.989241898059845,grad_norm: 0.9911225604283985, iteration: 79581
loss: 1.0090930461883545,grad_norm: 0.88548754331856, iteration: 79582
loss: 1.0109530687332153,grad_norm: 0.9887753427027631, iteration: 79583
loss: 0.9701316952705383,grad_norm: 0.9999990261489892, iteration: 79584
loss: 1.0306358337402344,grad_norm: 0.9999992171316853, iteration: 79585
loss: 0.9624956846237183,grad_norm: 0.908056588009875, iteration: 79586
loss: 1.0280117988586426,grad_norm: 0.9999992929375007, iteration: 79587
loss: 1.0153653621673584,grad_norm: 0.9021470901350165, iteration: 79588
loss: 0.9602283239364624,grad_norm: 0.854342740252991, iteration: 79589
loss: 0.9943005442619324,grad_norm: 0.9212113020726261, iteration: 79590
loss: 0.990684449672699,grad_norm: 0.9999990703781608, iteration: 79591
loss: 0.9761366248130798,grad_norm: 0.9999989776535093, iteration: 79592
loss: 1.0419065952301025,grad_norm: 0.9999994670544283, iteration: 79593
loss: 1.0007529258728027,grad_norm: 0.9999994518393397, iteration: 79594
loss: 1.0000301599502563,grad_norm: 0.9999991485000609, iteration: 79595
loss: 0.9758470058441162,grad_norm: 0.9999992397802902, iteration: 79596
loss: 0.994611918926239,grad_norm: 0.9999995675355837, iteration: 79597
loss: 0.9892244338989258,grad_norm: 0.9437892155079541, iteration: 79598
loss: 1.0002233982086182,grad_norm: 0.9265791261406776, iteration: 79599
loss: 1.0255045890808105,grad_norm: 0.9999993175443365, iteration: 79600
loss: 0.9999899864196777,grad_norm: 0.9999993199166676, iteration: 79601
loss: 1.015154242515564,grad_norm: 0.9999989945912894, iteration: 79602
loss: 1.002736210823059,grad_norm: 0.9144074262938379, iteration: 79603
loss: 1.0028029680252075,grad_norm: 0.9999992126817548, iteration: 79604
loss: 0.9622552990913391,grad_norm: 0.9999991436205452, iteration: 79605
loss: 0.9913283586502075,grad_norm: 0.9176718420681407, iteration: 79606
loss: 1.025182843208313,grad_norm: 0.9999991084306864, iteration: 79607
loss: 0.9963088035583496,grad_norm: 0.9946652155943978, iteration: 79608
loss: 1.042529821395874,grad_norm: 0.8923507503133552, iteration: 79609
loss: 0.9991073608398438,grad_norm: 0.9999991877110038, iteration: 79610
loss: 1.0147286653518677,grad_norm: 0.9999990331177672, iteration: 79611
loss: 1.0164936780929565,grad_norm: 0.9999991261228658, iteration: 79612
loss: 1.0203289985656738,grad_norm: 0.9999995862866651, iteration: 79613
loss: 0.9665348529815674,grad_norm: 0.999999035486772, iteration: 79614
loss: 1.0881025791168213,grad_norm: 0.8540082001503885, iteration: 79615
loss: 1.0012710094451904,grad_norm: 0.999999137864966, iteration: 79616
loss: 1.0131309032440186,grad_norm: 0.9999992250947558, iteration: 79617
loss: 0.9880251288414001,grad_norm: 0.9326079951338405, iteration: 79618
loss: 0.99510657787323,grad_norm: 0.9999989798712465, iteration: 79619
loss: 1.003719687461853,grad_norm: 0.9078453663255153, iteration: 79620
loss: 1.01504647731781,grad_norm: 0.9999990449985012, iteration: 79621
loss: 1.0030897855758667,grad_norm: 0.8876248190308447, iteration: 79622
loss: 0.9941219091415405,grad_norm: 0.9628646084529547, iteration: 79623
loss: 0.9980947971343994,grad_norm: 0.9999992323815982, iteration: 79624
loss: 0.9638190269470215,grad_norm: 0.9999991738366154, iteration: 79625
loss: 1.0315066576004028,grad_norm: 0.9999992344980628, iteration: 79626
loss: 1.0429154634475708,grad_norm: 0.999999172686963, iteration: 79627
loss: 1.0094215869903564,grad_norm: 0.999999202316178, iteration: 79628
loss: 0.9865264892578125,grad_norm: 0.9999991551610637, iteration: 79629
loss: 1.036605715751648,grad_norm: 0.9989821782032312, iteration: 79630
loss: 0.9858801364898682,grad_norm: 0.9999989347595132, iteration: 79631
loss: 1.0113568305969238,grad_norm: 0.9999991602205116, iteration: 79632
loss: 1.0527478456497192,grad_norm: 0.9766687738872194, iteration: 79633
loss: 1.0166281461715698,grad_norm: 0.9999988959025364, iteration: 79634
loss: 1.0172419548034668,grad_norm: 0.9999991247942724, iteration: 79635
loss: 1.0173211097717285,grad_norm: 0.9999990837326667, iteration: 79636
loss: 1.0080872774124146,grad_norm: 0.9652360867627519, iteration: 79637
loss: 0.9744929671287537,grad_norm: 0.9999991491043168, iteration: 79638
loss: 1.0118193626403809,grad_norm: 0.9999992388640786, iteration: 79639
loss: 1.0442442893981934,grad_norm: 0.9999990656116075, iteration: 79640
loss: 0.9958604574203491,grad_norm: 0.9999991881785786, iteration: 79641
loss: 0.9998522400856018,grad_norm: 0.9999990214973816, iteration: 79642
loss: 0.9757106900215149,grad_norm: 0.9999991284432949, iteration: 79643
loss: 0.9998054504394531,grad_norm: 0.8593590138311992, iteration: 79644
loss: 1.0055333375930786,grad_norm: 0.9848305820071692, iteration: 79645
loss: 0.9989147782325745,grad_norm: 0.8849956567185409, iteration: 79646
loss: 1.0113823413848877,grad_norm: 0.9999991522328668, iteration: 79647
loss: 0.978029191493988,grad_norm: 0.9999990412079967, iteration: 79648
loss: 0.9878595471382141,grad_norm: 0.9019212290501081, iteration: 79649
loss: 0.9920013546943665,grad_norm: 0.9916036254000278, iteration: 79650
loss: 0.9806093573570251,grad_norm: 0.99999930675452, iteration: 79651
loss: 1.0057348012924194,grad_norm: 0.9223451592868774, iteration: 79652
loss: 0.9979316592216492,grad_norm: 0.9999992544218614, iteration: 79653
loss: 1.0302685499191284,grad_norm: 0.9999991098755783, iteration: 79654
loss: 1.0090569257736206,grad_norm: 0.9999990650384918, iteration: 79655
loss: 0.9658451676368713,grad_norm: 0.9999989574932908, iteration: 79656
loss: 0.9715924859046936,grad_norm: 0.9999990486733273, iteration: 79657
loss: 0.9804695248603821,grad_norm: 0.9999991704603294, iteration: 79658
loss: 1.016929268836975,grad_norm: 0.9999992956808862, iteration: 79659
loss: 1.0142173767089844,grad_norm: 0.9079666713447493, iteration: 79660
loss: 1.0144883394241333,grad_norm: 0.9931583611088336, iteration: 79661
loss: 1.0242770910263062,grad_norm: 0.9999991582960026, iteration: 79662
loss: 0.9816041588783264,grad_norm: 0.8939706320193616, iteration: 79663
loss: 1.0005238056182861,grad_norm: 0.9280456099320041, iteration: 79664
loss: 0.9744478464126587,grad_norm: 0.9922988555241528, iteration: 79665
loss: 0.9800106287002563,grad_norm: 0.9999990486633357, iteration: 79666
loss: 0.9817443490028381,grad_norm: 0.9999992287772899, iteration: 79667
loss: 0.9793911576271057,grad_norm: 0.9817268693002278, iteration: 79668
loss: 1.0232808589935303,grad_norm: 0.9806718080134896, iteration: 79669
loss: 1.036480188369751,grad_norm: 0.9999990619235185, iteration: 79670
loss: 0.9771379828453064,grad_norm: 0.9995601246831823, iteration: 79671
loss: 1.0351414680480957,grad_norm: 0.9999991719559427, iteration: 79672
loss: 0.9902466535568237,grad_norm: 0.9999991396866601, iteration: 79673
loss: 0.9747835397720337,grad_norm: 0.9999990378066711, iteration: 79674
loss: 0.9698520302772522,grad_norm: 0.7723660513494495, iteration: 79675
loss: 0.9926451444625854,grad_norm: 0.9999991034726663, iteration: 79676
loss: 0.9824264049530029,grad_norm: 0.99999925574036, iteration: 79677
loss: 1.0016593933105469,grad_norm: 0.999999152318607, iteration: 79678
loss: 1.015144944190979,grad_norm: 0.9999992266540727, iteration: 79679
loss: 0.9993892908096313,grad_norm: 0.9999991488821467, iteration: 79680
loss: 1.0193344354629517,grad_norm: 0.9494006107144451, iteration: 79681
loss: 0.9985926747322083,grad_norm: 0.999999264186403, iteration: 79682
loss: 1.035771369934082,grad_norm: 0.9999990492036113, iteration: 79683
loss: 1.017966866493225,grad_norm: 0.9999990706927329, iteration: 79684
loss: 0.9886871576309204,grad_norm: 0.9999991903933341, iteration: 79685
loss: 0.9984467029571533,grad_norm: 0.8754235284344098, iteration: 79686
loss: 0.9552610516548157,grad_norm: 0.9999991326757487, iteration: 79687
loss: 1.0219148397445679,grad_norm: 0.9999991943956864, iteration: 79688
loss: 0.9861271977424622,grad_norm: 0.9999999230889314, iteration: 79689
loss: 1.0227627754211426,grad_norm: 0.9188918664554895, iteration: 79690
loss: 1.0211644172668457,grad_norm: 0.8298334738543324, iteration: 79691
loss: 1.0172075033187866,grad_norm: 0.9999991197658017, iteration: 79692
loss: 0.9840389490127563,grad_norm: 0.9859785327221252, iteration: 79693
loss: 1.0112826824188232,grad_norm: 0.8384182570889852, iteration: 79694
loss: 1.0073837041854858,grad_norm: 0.9999990849212766, iteration: 79695
loss: 1.0460929870605469,grad_norm: 0.9999992310606343, iteration: 79696
loss: 0.9959740042686462,grad_norm: 0.9999991599425396, iteration: 79697
loss: 1.0200892686843872,grad_norm: 0.9190029958037211, iteration: 79698
loss: 0.9974661469459534,grad_norm: 0.9392076352479597, iteration: 79699
loss: 1.0015686750411987,grad_norm: 0.9999991563385191, iteration: 79700
loss: 0.9952157735824585,grad_norm: 0.9999995271424689, iteration: 79701
loss: 0.9991111159324646,grad_norm: 0.9017551492764215, iteration: 79702
loss: 0.995400607585907,grad_norm: 0.9776507856315955, iteration: 79703
loss: 0.9644994735717773,grad_norm: 0.9999990962850293, iteration: 79704
loss: 1.02362859249115,grad_norm: 0.9999991638467403, iteration: 79705
loss: 1.0135546922683716,grad_norm: 0.9999992189993122, iteration: 79706
loss: 1.0181684494018555,grad_norm: 0.9416714786082381, iteration: 79707
loss: 1.017574667930603,grad_norm: 0.9698718943747932, iteration: 79708
loss: 0.9867578744888306,grad_norm: 0.9008174075744142, iteration: 79709
loss: 1.001280426979065,grad_norm: 0.9999989929963398, iteration: 79710
loss: 1.0168551206588745,grad_norm: 0.9999992826926372, iteration: 79711
loss: 0.9665295481681824,grad_norm: 0.9999991198268915, iteration: 79712
loss: 0.9815413951873779,grad_norm: 0.9999991579151597, iteration: 79713
loss: 1.0098649263381958,grad_norm: 0.9998645252238786, iteration: 79714
loss: 0.9948659539222717,grad_norm: 0.9999990812329762, iteration: 79715
loss: 1.0361031293869019,grad_norm: 0.8231944872191135, iteration: 79716
loss: 0.9582073092460632,grad_norm: 0.8059473287162746, iteration: 79717
loss: 0.9858577847480774,grad_norm: 0.999999056133892, iteration: 79718
loss: 1.0169684886932373,grad_norm: 0.9999991646152445, iteration: 79719
loss: 1.025343656539917,grad_norm: 0.9999992476443855, iteration: 79720
loss: 0.9942461848258972,grad_norm: 0.9999990058319256, iteration: 79721
loss: 1.0152785778045654,grad_norm: 0.9999991950887059, iteration: 79722
loss: 1.0220454931259155,grad_norm: 0.8170135892890648, iteration: 79723
loss: 1.0198893547058105,grad_norm: 0.9999990836577277, iteration: 79724
loss: 1.0142208337783813,grad_norm: 0.9999992277980376, iteration: 79725
loss: 1.0103307962417603,grad_norm: 0.9999990206921434, iteration: 79726
loss: 0.9853823781013489,grad_norm: 0.9999992245697922, iteration: 79727
loss: 1.0027683973312378,grad_norm: 0.9999991004059241, iteration: 79728
loss: 1.0151290893554688,grad_norm: 0.999999151388449, iteration: 79729
loss: 1.0251290798187256,grad_norm: 0.9999990143298665, iteration: 79730
loss: 1.0115251541137695,grad_norm: 0.812498612913998, iteration: 79731
loss: 0.9883624315261841,grad_norm: 0.9999992294354232, iteration: 79732
loss: 1.0174156427383423,grad_norm: 0.9696606500461037, iteration: 79733
loss: 1.0290532112121582,grad_norm: 0.9599636755044569, iteration: 79734
loss: 1.0090727806091309,grad_norm: 0.9367180073009985, iteration: 79735
loss: 0.9766416549682617,grad_norm: 0.9999990952968347, iteration: 79736
loss: 0.9868386387825012,grad_norm: 0.9999993563105773, iteration: 79737
loss: 1.0087147951126099,grad_norm: 0.9936563199395799, iteration: 79738
loss: 0.9935604333877563,grad_norm: 0.9639582904578032, iteration: 79739
loss: 0.9812218546867371,grad_norm: 0.9999990979393971, iteration: 79740
loss: 1.02193284034729,grad_norm: 0.9999991029408128, iteration: 79741
loss: 1.0461761951446533,grad_norm: 0.9999991221675324, iteration: 79742
loss: 0.9696378111839294,grad_norm: 0.9999991720281076, iteration: 79743
loss: 0.9921585321426392,grad_norm: 0.9999989842567754, iteration: 79744
loss: 0.9587340950965881,grad_norm: 0.9999991733203155, iteration: 79745
loss: 1.0057367086410522,grad_norm: 0.9999994319537534, iteration: 79746
loss: 0.9770054817199707,grad_norm: 0.9999992373056694, iteration: 79747
loss: 0.9967917799949646,grad_norm: 0.9999991622285651, iteration: 79748
loss: 0.9935188889503479,grad_norm: 0.9999989856265934, iteration: 79749
loss: 0.9556586742401123,grad_norm: 0.9999991159114411, iteration: 79750
loss: 1.0129365921020508,grad_norm: 0.9999992006643491, iteration: 79751
loss: 0.9770190119743347,grad_norm: 0.9999991750353936, iteration: 79752
loss: 0.9859723448753357,grad_norm: 0.999999060297136, iteration: 79753
loss: 1.0064584016799927,grad_norm: 0.9999996360354503, iteration: 79754
loss: 1.0258677005767822,grad_norm: 0.9999997420533648, iteration: 79755
loss: 1.0029027462005615,grad_norm: 0.9999992269978218, iteration: 79756
loss: 0.958716094493866,grad_norm: 0.9999992822972228, iteration: 79757
loss: 1.0029397010803223,grad_norm: 0.9470402152989288, iteration: 79758
loss: 1.025925874710083,grad_norm: 0.9999990150178096, iteration: 79759
loss: 0.9904332756996155,grad_norm: 0.9081697532295551, iteration: 79760
loss: 1.0190755128860474,grad_norm: 0.9999990515523645, iteration: 79761
loss: 1.027647614479065,grad_norm: 0.9879133787618716, iteration: 79762
loss: 1.004096269607544,grad_norm: 0.9999989193890049, iteration: 79763
loss: 1.0087016820907593,grad_norm: 0.9999993376368375, iteration: 79764
loss: 1.030549168586731,grad_norm: 0.930358349543452, iteration: 79765
loss: 1.0020087957382202,grad_norm: 0.9999991191279833, iteration: 79766
loss: 0.9611907601356506,grad_norm: 0.9999990864696727, iteration: 79767
loss: 1.0196212530136108,grad_norm: 0.9590473024560994, iteration: 79768
loss: 0.9986346960067749,grad_norm: 0.9043564746955804, iteration: 79769
loss: 0.9934332966804504,grad_norm: 0.9770781708391707, iteration: 79770
loss: 1.0153170824050903,grad_norm: 0.9999990527302419, iteration: 79771
loss: 0.9956852197647095,grad_norm: 0.999999191544744, iteration: 79772
loss: 0.9843178391456604,grad_norm: 0.9999991164365516, iteration: 79773
loss: 0.9714766144752502,grad_norm: 0.9999991460545354, iteration: 79774
loss: 1.066129446029663,grad_norm: 0.9999997431187924, iteration: 79775
loss: 1.015844702720642,grad_norm: 0.9156180280529408, iteration: 79776
loss: 0.9848812222480774,grad_norm: 0.9999991783766939, iteration: 79777
loss: 1.002608060836792,grad_norm: 0.9999992284225252, iteration: 79778
loss: 1.0720480680465698,grad_norm: 0.9999997960814847, iteration: 79779
loss: 1.0549241304397583,grad_norm: 0.9999992884730491, iteration: 79780
loss: 0.9737856388092041,grad_norm: 0.9999990493318794, iteration: 79781
loss: 1.0113633871078491,grad_norm: 0.999999656498265, iteration: 79782
loss: 1.0004690885543823,grad_norm: 0.9851411602125981, iteration: 79783
loss: 1.030272126197815,grad_norm: 0.9999990581052702, iteration: 79784
loss: 0.999171257019043,grad_norm: 0.9999989728978388, iteration: 79785
loss: 1.042699933052063,grad_norm: 0.9999991665171184, iteration: 79786
loss: 0.9951283931732178,grad_norm: 0.9999992221657624, iteration: 79787
loss: 0.9984844923019409,grad_norm: 0.9972319161917377, iteration: 79788
loss: 1.0332176685333252,grad_norm: 0.9193121713463225, iteration: 79789
loss: 1.0397387742996216,grad_norm: 0.9999990227585954, iteration: 79790
loss: 0.9964402318000793,grad_norm: 0.8829303322306606, iteration: 79791
loss: 1.001682162284851,grad_norm: 0.8886499953474076, iteration: 79792
loss: 0.9900045990943909,grad_norm: 0.9585920867125376, iteration: 79793
loss: 0.9845282435417175,grad_norm: 0.9999991297261657, iteration: 79794
loss: 0.9955906867980957,grad_norm: 0.9999989972235838, iteration: 79795
loss: 0.9956584572792053,grad_norm: 0.999999387330512, iteration: 79796
loss: 1.0286178588867188,grad_norm: 0.9474179576343686, iteration: 79797
loss: 0.9894900321960449,grad_norm: 0.8647513501690834, iteration: 79798
loss: 0.9643206596374512,grad_norm: 0.9999991869156164, iteration: 79799
loss: 1.0239473581314087,grad_norm: 0.9265041781797846, iteration: 79800
loss: 0.9828401803970337,grad_norm: 0.9705040272912818, iteration: 79801
loss: 0.984600841999054,grad_norm: 0.9999992034974828, iteration: 79802
loss: 0.9943214058876038,grad_norm: 0.9999992800091866, iteration: 79803
loss: 1.0077314376831055,grad_norm: 0.8969780797354382, iteration: 79804
loss: 0.9974631667137146,grad_norm: 0.9754165877204257, iteration: 79805
loss: 0.9766409993171692,grad_norm: 0.999999258992628, iteration: 79806
loss: 1.0183404684066772,grad_norm: 0.9999991408695372, iteration: 79807
loss: 1.005847692489624,grad_norm: 0.9999992006833983, iteration: 79808
loss: 1.003971815109253,grad_norm: 0.9999991639683468, iteration: 79809
loss: 0.9862159490585327,grad_norm: 0.9999991808946398, iteration: 79810
loss: 1.0082467794418335,grad_norm: 0.9477465571213438, iteration: 79811
loss: 1.0050944089889526,grad_norm: 0.9999991306388433, iteration: 79812
loss: 0.9458118081092834,grad_norm: 0.9167590778729837, iteration: 79813
loss: 0.9925419688224792,grad_norm: 0.9999989595449509, iteration: 79814
loss: 1.0134069919586182,grad_norm: 0.9999992486501469, iteration: 79815
loss: 1.0172263383865356,grad_norm: 0.999999044174248, iteration: 79816
loss: 1.0019999742507935,grad_norm: 0.9338519529861848, iteration: 79817
loss: 1.007620930671692,grad_norm: 0.8991555979937039, iteration: 79818
loss: 0.9527071714401245,grad_norm: 0.9197663484808238, iteration: 79819
loss: 0.9911987781524658,grad_norm: 0.8955375754334023, iteration: 79820
loss: 1.0147666931152344,grad_norm: 0.9999995814729852, iteration: 79821
loss: 0.9406429529190063,grad_norm: 0.9095617420048877, iteration: 79822
loss: 0.9952968955039978,grad_norm: 0.9999991687587862, iteration: 79823
loss: 1.002271056175232,grad_norm: 0.9484967495753415, iteration: 79824
loss: 1.007832646369934,grad_norm: 0.9999992279454399, iteration: 79825
loss: 1.045210838317871,grad_norm: 0.9999992482315163, iteration: 79826
loss: 1.0337591171264648,grad_norm: 0.9999990379017525, iteration: 79827
loss: 1.0067764520645142,grad_norm: 0.9999991875389816, iteration: 79828
loss: 0.9797959923744202,grad_norm: 0.9586830683604083, iteration: 79829
loss: 0.9732500314712524,grad_norm: 0.9999990435816317, iteration: 79830
loss: 0.9641996026039124,grad_norm: 0.9207121512941835, iteration: 79831
loss: 0.9904224872589111,grad_norm: 0.8765085969718888, iteration: 79832
loss: 0.9847689270973206,grad_norm: 0.9999991019423783, iteration: 79833
loss: 1.0078645944595337,grad_norm: 0.9999994302583171, iteration: 79834
loss: 0.9884746670722961,grad_norm: 0.9999997110771192, iteration: 79835
loss: 0.9725326895713806,grad_norm: 0.9999989672900308, iteration: 79836
loss: 0.9535526037216187,grad_norm: 0.9999990166311019, iteration: 79837
loss: 1.0169227123260498,grad_norm: 0.999999302551965, iteration: 79838
loss: 0.9898421168327332,grad_norm: 0.9999992606206806, iteration: 79839
loss: 1.012198567390442,grad_norm: 0.999999311372106, iteration: 79840
loss: 0.9983075261116028,grad_norm: 0.9999989977234923, iteration: 79841
loss: 1.024133563041687,grad_norm: 0.9999990078644615, iteration: 79842
loss: 1.0052284002304077,grad_norm: 0.982709049725431, iteration: 79843
loss: 1.011905550956726,grad_norm: 0.9999990761864779, iteration: 79844
loss: 0.9787883758544922,grad_norm: 0.9389377772871432, iteration: 79845
loss: 1.0002565383911133,grad_norm: 0.9999990670450499, iteration: 79846
loss: 1.059889793395996,grad_norm: 0.999999821815655, iteration: 79847
loss: 0.9550103545188904,grad_norm: 0.9999991906916713, iteration: 79848
loss: 1.0108304023742676,grad_norm: 0.9999990716403323, iteration: 79849
loss: 1.0187076330184937,grad_norm: 0.9471599030010954, iteration: 79850
loss: 0.9957934617996216,grad_norm: 0.9701656679708676, iteration: 79851
loss: 1.01182222366333,grad_norm: 0.8833566685743445, iteration: 79852
loss: 0.9991434812545776,grad_norm: 0.9528285920143857, iteration: 79853
loss: 0.9618423581123352,grad_norm: 0.9999992581675132, iteration: 79854
loss: 1.0073304176330566,grad_norm: 0.9999991277026514, iteration: 79855
loss: 0.9778459668159485,grad_norm: 0.9999993509519804, iteration: 79856
loss: 1.0051252841949463,grad_norm: 0.9950055638625106, iteration: 79857
loss: 0.9685959219932556,grad_norm: 0.9999991753794835, iteration: 79858
loss: 0.9932411313056946,grad_norm: 0.9999992075958236, iteration: 79859
loss: 0.9967557191848755,grad_norm: 0.9329728217721879, iteration: 79860
loss: 1.0039913654327393,grad_norm: 0.9601145061935572, iteration: 79861
loss: 1.0139580965042114,grad_norm: 0.9637078050355259, iteration: 79862
loss: 1.0025367736816406,grad_norm: 0.829009272637687, iteration: 79863
loss: 1.0262190103530884,grad_norm: 0.9430007789923497, iteration: 79864
loss: 1.026123285293579,grad_norm: 0.9356160202404785, iteration: 79865
loss: 0.9704721570014954,grad_norm: 0.8705967593534181, iteration: 79866
loss: 0.9671003818511963,grad_norm: 0.9999990684220482, iteration: 79867
loss: 0.9731109738349915,grad_norm: 0.8162105019274948, iteration: 79868
loss: 1.008263111114502,grad_norm: 0.999999138206759, iteration: 79869
loss: 1.0303773880004883,grad_norm: 0.999999054176038, iteration: 79870
loss: 0.9500048160552979,grad_norm: 0.9999991547785724, iteration: 79871
loss: 0.9890649914741516,grad_norm: 0.9895723323858364, iteration: 79872
loss: 0.9858205914497375,grad_norm: 0.9999991025772049, iteration: 79873
loss: 1.0018516778945923,grad_norm: 0.9999992519937332, iteration: 79874
loss: 1.0289885997772217,grad_norm: 0.999999219020252, iteration: 79875
loss: 1.022022008895874,grad_norm: 0.8868985178506442, iteration: 79876
loss: 0.9781570434570312,grad_norm: 0.9999991110642646, iteration: 79877
loss: 0.9837917685508728,grad_norm: 0.999999177886029, iteration: 79878
loss: 1.0024412870407104,grad_norm: 0.9422100377049965, iteration: 79879
loss: 1.0320781469345093,grad_norm: 0.9101040900838321, iteration: 79880
loss: 1.015669822692871,grad_norm: 0.9623435589098615, iteration: 79881
loss: 0.9979224801063538,grad_norm: 0.9999993319085342, iteration: 79882
loss: 0.9630739092826843,grad_norm: 0.9088908950456288, iteration: 79883
loss: 1.0136624574661255,grad_norm: 0.9999994087943577, iteration: 79884
loss: 0.9743692278862,grad_norm: 0.791145753775191, iteration: 79885
loss: 1.0113694667816162,grad_norm: 0.9999993641879819, iteration: 79886
loss: 0.9871386885643005,grad_norm: 0.8553593824692324, iteration: 79887
loss: 0.9778028130531311,grad_norm: 0.9999990961154862, iteration: 79888
loss: 1.0057915449142456,grad_norm: 0.9524289429476477, iteration: 79889
loss: 1.0094549655914307,grad_norm: 0.9326054728496384, iteration: 79890
loss: 1.0255032777786255,grad_norm: 0.9999992761337485, iteration: 79891
loss: 0.99887615442276,grad_norm: 0.9999990097758894, iteration: 79892
loss: 1.0012236833572388,grad_norm: 0.9077324018619548, iteration: 79893
loss: 1.0404345989227295,grad_norm: 0.9999990112085794, iteration: 79894
loss: 1.0071322917938232,grad_norm: 0.8541374022784488, iteration: 79895
loss: 0.9716041088104248,grad_norm: 0.9999989378479764, iteration: 79896
loss: 1.0078977346420288,grad_norm: 0.9999990505156433, iteration: 79897
loss: 1.0059382915496826,grad_norm: 0.9325232759428467, iteration: 79898
loss: 1.0239347219467163,grad_norm: 0.9999992639900568, iteration: 79899
loss: 0.9876051545143127,grad_norm: 0.9702909705081161, iteration: 79900
loss: 0.9982185959815979,grad_norm: 0.9999990698823575, iteration: 79901
loss: 0.9677321314811707,grad_norm: 0.9999997541467539, iteration: 79902
loss: 1.0207408666610718,grad_norm: 0.9148598351693982, iteration: 79903
loss: 0.9912355542182922,grad_norm: 0.9826164598097196, iteration: 79904
loss: 1.000848650932312,grad_norm: 0.9999990240844905, iteration: 79905
loss: 0.9835497736930847,grad_norm: 0.9999991830882071, iteration: 79906
loss: 1.0637767314910889,grad_norm: 0.9999992440206831, iteration: 79907
loss: 1.0771434307098389,grad_norm: 0.999999071904193, iteration: 79908
loss: 1.02109694480896,grad_norm: 0.9999992826757349, iteration: 79909
loss: 0.9499243497848511,grad_norm: 0.9999991730915744, iteration: 79910
loss: 1.0087515115737915,grad_norm: 0.9999991285933911, iteration: 79911
loss: 1.0137900114059448,grad_norm: 0.9999991896573581, iteration: 79912
loss: 1.007907509803772,grad_norm: 0.9999991633551478, iteration: 79913
loss: 1.003376841545105,grad_norm: 0.9999999068747186, iteration: 79914
loss: 0.9930282831192017,grad_norm: 0.845533515904219, iteration: 79915
loss: 1.0197815895080566,grad_norm: 0.9505020198342282, iteration: 79916
loss: 0.9689826369285583,grad_norm: 0.9999991809588338, iteration: 79917
loss: 1.0174559354782104,grad_norm: 0.9645565466373754, iteration: 79918
loss: 1.018715262413025,grad_norm: 0.8813145290338903, iteration: 79919
loss: 1.0051195621490479,grad_norm: 0.9999991019630919, iteration: 79920
loss: 0.9877870678901672,grad_norm: 0.9849977680412944, iteration: 79921
loss: 0.992443859577179,grad_norm: 0.9774932490136856, iteration: 79922
loss: 1.0064326524734497,grad_norm: 0.9099324386446922, iteration: 79923
loss: 0.9799715876579285,grad_norm: 0.9994509837883154, iteration: 79924
loss: 1.0040391683578491,grad_norm: 0.9999990160854857, iteration: 79925
loss: 0.9854580163955688,grad_norm: 0.99999922426079, iteration: 79926
loss: 0.9933711886405945,grad_norm: 0.8687959589368789, iteration: 79927
loss: 1.0317952632904053,grad_norm: 0.9999991252112619, iteration: 79928
loss: 0.9948285818099976,grad_norm: 0.8267166710398277, iteration: 79929
loss: 1.0774812698364258,grad_norm: 0.9999999419446877, iteration: 79930
loss: 1.02283775806427,grad_norm: 0.9999997892410937, iteration: 79931
loss: 1.0135107040405273,grad_norm: 0.9999995066839578, iteration: 79932
loss: 1.0158724784851074,grad_norm: 0.9993072741655873, iteration: 79933
loss: 0.9950271844863892,grad_norm: 0.9056392369604813, iteration: 79934
loss: 0.9992581605911255,grad_norm: 0.9999993975529937, iteration: 79935
loss: 0.9802213311195374,grad_norm: 0.9999991507014163, iteration: 79936
loss: 0.973341703414917,grad_norm: 0.9828863183850854, iteration: 79937
loss: 1.018784999847412,grad_norm: 0.9999991341160908, iteration: 79938
loss: 0.9610329270362854,grad_norm: 0.9999990798463764, iteration: 79939
loss: 0.9871413111686707,grad_norm: 0.9394504680741443, iteration: 79940
loss: 0.987365186214447,grad_norm: 0.9538070105756776, iteration: 79941
loss: 1.0100007057189941,grad_norm: 0.9999990381348456, iteration: 79942
loss: 1.0138498544692993,grad_norm: 0.9999992532978318, iteration: 79943
loss: 1.0149741172790527,grad_norm: 0.9999991578066852, iteration: 79944
loss: 0.9987311363220215,grad_norm: 0.9999989837245689, iteration: 79945
loss: 0.9935594201087952,grad_norm: 0.9999993313755221, iteration: 79946
loss: 1.0224584341049194,grad_norm: 0.9999989786802764, iteration: 79947
loss: 1.0128428936004639,grad_norm: 0.8804698224318956, iteration: 79948
loss: 0.9804754853248596,grad_norm: 0.9083605347768022, iteration: 79949
loss: 0.9894007444381714,grad_norm: 0.9999992420262845, iteration: 79950
loss: 1.005044937133789,grad_norm: 0.9999991232803134, iteration: 79951
loss: 1.0153214931488037,grad_norm: 0.9239024688015536, iteration: 79952
loss: 1.0235718488693237,grad_norm: 0.9999991378756853, iteration: 79953
loss: 0.9662440419197083,grad_norm: 0.999999094685268, iteration: 79954
loss: 0.9819006323814392,grad_norm: 0.9003463584006711, iteration: 79955
loss: 1.0122601985931396,grad_norm: 0.9999990746332832, iteration: 79956
loss: 1.0051229000091553,grad_norm: 0.9999991393378168, iteration: 79957
loss: 1.0054866075515747,grad_norm: 0.9999992139833367, iteration: 79958
loss: 1.0233547687530518,grad_norm: 0.9431756829869188, iteration: 79959
loss: 0.9916263818740845,grad_norm: 0.999998940680626, iteration: 79960
loss: 1.0335205793380737,grad_norm: 0.9855940087366775, iteration: 79961
loss: 1.0240318775177002,grad_norm: 0.9999999483105342, iteration: 79962
loss: 0.9921281337738037,grad_norm: 0.9999989925768119, iteration: 79963
loss: 1.0382674932479858,grad_norm: 0.9999990759079685, iteration: 79964
loss: 1.0312193632125854,grad_norm: 0.9999991322034271, iteration: 79965
loss: 1.0018837451934814,grad_norm: 0.8702690130887208, iteration: 79966
loss: 1.001183032989502,grad_norm: 0.9999990369230414, iteration: 79967
loss: 1.0589241981506348,grad_norm: 0.8892718262210166, iteration: 79968
loss: 0.9968236088752747,grad_norm: 0.9999990672139748, iteration: 79969
loss: 0.9810574650764465,grad_norm: 0.9999991502683018, iteration: 79970
loss: 0.9892823696136475,grad_norm: 0.9267097186793715, iteration: 79971
loss: 0.9538827538490295,grad_norm: 0.9999991194645493, iteration: 79972
loss: 1.0060298442840576,grad_norm: 0.9999992069864462, iteration: 79973
loss: 0.9867697358131409,grad_norm: 0.9999990794362122, iteration: 79974
loss: 1.112398386001587,grad_norm: 0.9999991692361297, iteration: 79975
loss: 0.9979087710380554,grad_norm: 0.8917243150133194, iteration: 79976
loss: 0.993011474609375,grad_norm: 0.9999989623564113, iteration: 79977
loss: 0.9860067963600159,grad_norm: 0.9999989794205676, iteration: 79978
loss: 1.0238596200942993,grad_norm: 0.9999991648420534, iteration: 79979
loss: 1.0454778671264648,grad_norm: 0.9999995700540417, iteration: 79980
loss: 0.9602574706077576,grad_norm: 0.9999991867301582, iteration: 79981
loss: 0.984131395816803,grad_norm: 0.9999991683048587, iteration: 79982
loss: 1.00139582157135,grad_norm: 0.8698285493446644, iteration: 79983
loss: 1.0459245443344116,grad_norm: 0.9999994494398242, iteration: 79984
loss: 0.9877216815948486,grad_norm: 0.999998987262068, iteration: 79985
loss: 1.0237913131713867,grad_norm: 0.9391483353738922, iteration: 79986
loss: 0.9801994562149048,grad_norm: 0.9999992516963475, iteration: 79987
loss: 0.9874526858329773,grad_norm: 0.9999991993738899, iteration: 79988
loss: 1.036800503730774,grad_norm: 0.9999992892724578, iteration: 79989
loss: 1.004218339920044,grad_norm: 0.9480028208688905, iteration: 79990
loss: 0.9555469155311584,grad_norm: 0.9773243142201392, iteration: 79991
loss: 0.9779579043388367,grad_norm: 0.9999991116963338, iteration: 79992
loss: 1.0158681869506836,grad_norm: 0.999999031058158, iteration: 79993
loss: 0.9679991602897644,grad_norm: 0.9999992604247288, iteration: 79994
loss: 1.0270979404449463,grad_norm: 0.9999995186121021, iteration: 79995
loss: 1.1126716136932373,grad_norm: 0.9999996147113617, iteration: 79996
loss: 0.9927018284797668,grad_norm: 0.9999992807996823, iteration: 79997
loss: 1.0044188499450684,grad_norm: 0.9999991112010439, iteration: 79998
loss: 0.9987183809280396,grad_norm: 0.9425791865541078, iteration: 79999
loss: 0.9820365905761719,grad_norm: 0.9999992048742278, iteration: 80000
Evaluating at step 80000
{'val': 0.9962307680398226, 'test': 2.2093953469309944}
loss: 1.0309429168701172,grad_norm: 0.9021093900259369, iteration: 80001
loss: 0.9898205399513245,grad_norm: 0.9812642395389001, iteration: 80002
loss: 1.0155620574951172,grad_norm: 0.9999992334295914, iteration: 80003
loss: 1.00253164768219,grad_norm: 0.9847684467242502, iteration: 80004
loss: 1.0139882564544678,grad_norm: 0.9999991709962758, iteration: 80005
loss: 0.9952678680419922,grad_norm: 0.9750462967216996, iteration: 80006
loss: 0.984439492225647,grad_norm: 0.999999223348082, iteration: 80007
loss: 0.9966208338737488,grad_norm: 0.999999180299553, iteration: 80008
loss: 0.9685806035995483,grad_norm: 0.9999990111107763, iteration: 80009
loss: 1.001665711402893,grad_norm: 0.9999990918536514, iteration: 80010
loss: 0.992103099822998,grad_norm: 0.9999992937410684, iteration: 80011
loss: 1.0426428318023682,grad_norm: 0.9999992123166501, iteration: 80012
loss: 1.0182512998580933,grad_norm: 0.9999990049945886, iteration: 80013
loss: 0.9481047987937927,grad_norm: 0.9999990187383223, iteration: 80014
loss: 0.9852871894836426,grad_norm: 0.9886933802699853, iteration: 80015
loss: 0.9684382677078247,grad_norm: 0.9999990422373629, iteration: 80016
loss: 1.0266931056976318,grad_norm: 0.9944378630498891, iteration: 80017
loss: 0.992495596408844,grad_norm: 0.99999904184497, iteration: 80018
loss: 0.9894533753395081,grad_norm: 0.9947403780818076, iteration: 80019
loss: 0.9960530996322632,grad_norm: 0.9999992166694616, iteration: 80020
loss: 0.9739250540733337,grad_norm: 0.9999992110408314, iteration: 80021
loss: 1.029006838798523,grad_norm: 0.9608242439333048, iteration: 80022
loss: 0.9881754517555237,grad_norm: 0.9999991963366279, iteration: 80023
loss: 1.017148494720459,grad_norm: 0.9135190583642557, iteration: 80024
loss: 0.9683141112327576,grad_norm: 0.8203880651416818, iteration: 80025
loss: 1.0120524168014526,grad_norm: 0.9871820555535907, iteration: 80026
loss: 1.0067390203475952,grad_norm: 0.9999991299175791, iteration: 80027
loss: 1.0339900255203247,grad_norm: 0.9999990218642224, iteration: 80028
loss: 1.0355360507965088,grad_norm: 0.999999234798411, iteration: 80029
loss: 1.0252505540847778,grad_norm: 0.9999990832717409, iteration: 80030
loss: 1.0234031677246094,grad_norm: 0.9276922521279234, iteration: 80031
loss: 1.032602071762085,grad_norm: 0.9375047229629713, iteration: 80032
loss: 0.9970166087150574,grad_norm: 0.9999992169162913, iteration: 80033
loss: 1.0528303384780884,grad_norm: 0.9999991650007455, iteration: 80034
loss: 0.9876903295516968,grad_norm: 0.8383852500163512, iteration: 80035
loss: 1.006263017654419,grad_norm: 0.9999990990568346, iteration: 80036
loss: 1.0198596715927124,grad_norm: 0.9999990705051176, iteration: 80037
loss: 1.0193400382995605,grad_norm: 0.9654434315351521, iteration: 80038
loss: 0.9952006340026855,grad_norm: 0.9999990261405228, iteration: 80039
loss: 0.9843506813049316,grad_norm: 0.9812368180467939, iteration: 80040
loss: 1.00713312625885,grad_norm: 0.9999996237573715, iteration: 80041
loss: 0.9643391370773315,grad_norm: 0.9999991798125812, iteration: 80042
loss: 1.0429751873016357,grad_norm: 0.9999992721028584, iteration: 80043
loss: 0.9966127276420593,grad_norm: 0.9999993003035226, iteration: 80044
loss: 0.9685598611831665,grad_norm: 0.9999992951141006, iteration: 80045
loss: 1.030165195465088,grad_norm: 0.9999990710512487, iteration: 80046
loss: 1.0034089088439941,grad_norm: 0.9999990569384319, iteration: 80047
loss: 1.020007848739624,grad_norm: 0.9999991331480332, iteration: 80048
loss: 0.9969200491905212,grad_norm: 0.9999991698810106, iteration: 80049
loss: 1.0096817016601562,grad_norm: 0.9578468646876763, iteration: 80050
loss: 1.0087714195251465,grad_norm: 0.935035399707994, iteration: 80051
loss: 1.004260540008545,grad_norm: 0.9999992041350115, iteration: 80052
loss: 1.0014898777008057,grad_norm: 0.8782690284390907, iteration: 80053
loss: 1.0039336681365967,grad_norm: 0.989216173705777, iteration: 80054
loss: 0.9999021887779236,grad_norm: 0.9999990038008002, iteration: 80055
loss: 1.050334095954895,grad_norm: 0.9999996917077433, iteration: 80056
loss: 1.0334255695343018,grad_norm: 0.9999990295481412, iteration: 80057
loss: 0.9932899475097656,grad_norm: 0.9999990444833088, iteration: 80058
loss: 0.9988952875137329,grad_norm: 0.999999182164028, iteration: 80059
loss: 0.9909873604774475,grad_norm: 0.87149333619205, iteration: 80060
loss: 0.9772100448608398,grad_norm: 0.9369769863034202, iteration: 80061
loss: 1.0175553560256958,grad_norm: 0.9628336696395964, iteration: 80062
loss: 0.9878966808319092,grad_norm: 0.9329037483348522, iteration: 80063
loss: 1.0188285112380981,grad_norm: 0.9999989641862822, iteration: 80064
loss: 1.0535036325454712,grad_norm: 0.9999994762147248, iteration: 80065
loss: 1.0152077674865723,grad_norm: 0.9999991976779325, iteration: 80066
loss: 0.9706319570541382,grad_norm: 0.9999993580306572, iteration: 80067
loss: 1.0128835439682007,grad_norm: 0.8955271081408887, iteration: 80068
loss: 0.9899318218231201,grad_norm: 0.9999988874307756, iteration: 80069
loss: 1.010986566543579,grad_norm: 0.961124735830328, iteration: 80070
loss: 0.9887620806694031,grad_norm: 0.9371569153468666, iteration: 80071
loss: 0.994619607925415,grad_norm: 0.9999989881320778, iteration: 80072
loss: 1.0393182039260864,grad_norm: 0.9999991474598174, iteration: 80073
loss: 1.0155699253082275,grad_norm: 0.9213289093363535, iteration: 80074
loss: 1.0360491275787354,grad_norm: 0.9999991272545901, iteration: 80075
loss: 0.9626516103744507,grad_norm: 0.9985738178129052, iteration: 80076
loss: 1.024533748626709,grad_norm: 0.9999991962998113, iteration: 80077
loss: 0.9916548132896423,grad_norm: 0.9600703709383898, iteration: 80078
loss: 1.1026408672332764,grad_norm: 0.9999992247085596, iteration: 80079
loss: 0.9797124266624451,grad_norm: 0.9999991519608186, iteration: 80080
loss: 1.003662109375,grad_norm: 0.9946246191097611, iteration: 80081
loss: 1.0110913515090942,grad_norm: 0.999999335294807, iteration: 80082
loss: 0.9840678572654724,grad_norm: 0.9999991058805507, iteration: 80083
loss: 1.0155624151229858,grad_norm: 0.9999990508681011, iteration: 80084
loss: 1.033043622970581,grad_norm: 0.9999990349270772, iteration: 80085
loss: 0.9976654648780823,grad_norm: 0.9282572851480874, iteration: 80086
loss: 0.9755607843399048,grad_norm: 0.999998959942928, iteration: 80087
loss: 0.9845101833343506,grad_norm: 0.9999990965011549, iteration: 80088
loss: 0.9708635807037354,grad_norm: 0.952094329840279, iteration: 80089
loss: 0.9750635623931885,grad_norm: 0.9702556505981362, iteration: 80090
loss: 1.020873785018921,grad_norm: 0.9642623834196585, iteration: 80091
loss: 1.0297682285308838,grad_norm: 0.99999916694926, iteration: 80092
loss: 1.000550389289856,grad_norm: 0.9485223756074966, iteration: 80093
loss: 1.0039161443710327,grad_norm: 0.9999991584658072, iteration: 80094
loss: 0.9960084557533264,grad_norm: 0.9890635754675791, iteration: 80095
loss: 0.9797264933586121,grad_norm: 0.9999991314651591, iteration: 80096
loss: 1.001184105873108,grad_norm: 0.9999991691975213, iteration: 80097
loss: 1.0132731199264526,grad_norm: 0.999999416011057, iteration: 80098
loss: 1.0193049907684326,grad_norm: 0.8357981468034057, iteration: 80099
loss: 1.0019662380218506,grad_norm: 0.9920128615183705, iteration: 80100
loss: 1.0318630933761597,grad_norm: 0.8851699245408902, iteration: 80101
loss: 0.9882262945175171,grad_norm: 0.9999990366127869, iteration: 80102
loss: 1.038533091545105,grad_norm: 0.9853877612959878, iteration: 80103
loss: 1.0261123180389404,grad_norm: 0.9405034223513248, iteration: 80104
loss: 1.0269298553466797,grad_norm: 0.9999990815148195, iteration: 80105
loss: 0.9820298552513123,grad_norm: 0.9734175085057086, iteration: 80106
loss: 0.9885071516036987,grad_norm: 0.9999990746415705, iteration: 80107
loss: 1.0001753568649292,grad_norm: 0.9537215818154181, iteration: 80108
loss: 1.019636631011963,grad_norm: 0.9999997160223875, iteration: 80109
loss: 1.0013532638549805,grad_norm: 0.9232341143041202, iteration: 80110
loss: 0.9862672686576843,grad_norm: 0.9999992552003412, iteration: 80111
loss: 1.01634681224823,grad_norm: 0.9999991120133722, iteration: 80112
loss: 0.9944846034049988,grad_norm: 0.8694294222036641, iteration: 80113
loss: 0.9867732524871826,grad_norm: 0.9999990998483721, iteration: 80114
loss: 1.0181760787963867,grad_norm: 0.9789769030124573, iteration: 80115
loss: 0.9894347190856934,grad_norm: 0.9914236098363365, iteration: 80116
loss: 1.0377377271652222,grad_norm: 0.9999995690084741, iteration: 80117
loss: 1.01748526096344,grad_norm: 0.9999991847499429, iteration: 80118
loss: 1.0158158540725708,grad_norm: 0.9999989997058012, iteration: 80119
loss: 1.0403615236282349,grad_norm: 0.9999992793287144, iteration: 80120
loss: 1.00552237033844,grad_norm: 0.9999990724677864, iteration: 80121
loss: 0.9814538359642029,grad_norm: 0.9999991939850877, iteration: 80122
loss: 1.036212682723999,grad_norm: 0.9999991138652519, iteration: 80123
loss: 1.0264605283737183,grad_norm: 0.9999993713451002, iteration: 80124
loss: 1.0174545049667358,grad_norm: 0.9311986525412057, iteration: 80125
loss: 1.0247867107391357,grad_norm: 0.9999992500549402, iteration: 80126
loss: 1.0078034400939941,grad_norm: 0.9523778911263158, iteration: 80127
loss: 1.0172793865203857,grad_norm: 0.9860485969715591, iteration: 80128
loss: 1.0175247192382812,grad_norm: 0.9999990698391955, iteration: 80129
loss: 1.0288106203079224,grad_norm: 0.9999991663908427, iteration: 80130
loss: 0.9650904536247253,grad_norm: 0.9699733745859955, iteration: 80131
loss: 0.9739595055580139,grad_norm: 0.9999992736794466, iteration: 80132
loss: 1.041709303855896,grad_norm: 0.9999995801003932, iteration: 80133
loss: 0.9771562218666077,grad_norm: 0.9999990788751583, iteration: 80134
loss: 1.0296626091003418,grad_norm: 0.9999997315055954, iteration: 80135
loss: 1.0221160650253296,grad_norm: 0.9623810666303776, iteration: 80136
loss: 0.9617083072662354,grad_norm: 0.9999992700135902, iteration: 80137
loss: 0.9966477155685425,grad_norm: 0.9999989729698034, iteration: 80138
loss: 0.9818834066390991,grad_norm: 0.9999991233491717, iteration: 80139
loss: 1.0124825239181519,grad_norm: 0.9999992529030182, iteration: 80140
loss: 1.0031943321228027,grad_norm: 0.9033129388516538, iteration: 80141
loss: 1.0121099948883057,grad_norm: 0.9999998416362564, iteration: 80142
loss: 1.0169097185134888,grad_norm: 0.9999990985091816, iteration: 80143
loss: 1.0007171630859375,grad_norm: 0.9999989336566085, iteration: 80144
loss: 1.0343424081802368,grad_norm: 0.9999993087185891, iteration: 80145
loss: 0.9873997569084167,grad_norm: 0.9999990971088757, iteration: 80146
loss: 1.0067745447158813,grad_norm: 0.8458041370992124, iteration: 80147
loss: 0.9967876672744751,grad_norm: 0.999999454353527, iteration: 80148
loss: 1.025223731994629,grad_norm: 0.9999991295765981, iteration: 80149
loss: 0.9934186935424805,grad_norm: 0.9999990452687322, iteration: 80150
loss: 0.9917734265327454,grad_norm: 0.9225728612952546, iteration: 80151
loss: 1.0166085958480835,grad_norm: 0.9999991595166503, iteration: 80152
loss: 0.9861671328544617,grad_norm: 0.9999992924164904, iteration: 80153
loss: 0.9883962273597717,grad_norm: 0.9971719829014437, iteration: 80154
loss: 0.9883734583854675,grad_norm: 0.9999992034619628, iteration: 80155
loss: 0.9641604423522949,grad_norm: 0.9368187877410028, iteration: 80156
loss: 0.9751207232475281,grad_norm: 0.9999991582006782, iteration: 80157
loss: 0.9501426219940186,grad_norm: 0.962654339211614, iteration: 80158
loss: 0.981930673122406,grad_norm: 0.9999991206285063, iteration: 80159
loss: 1.0242891311645508,grad_norm: 0.9999996438520266, iteration: 80160
loss: 1.0259044170379639,grad_norm: 0.9999991335763483, iteration: 80161
loss: 0.9755340814590454,grad_norm: 0.9950799575470404, iteration: 80162
loss: 0.9975793957710266,grad_norm: 0.9999990617397548, iteration: 80163
loss: 1.0095170736312866,grad_norm: 0.8695594118922076, iteration: 80164
loss: 0.9976122379302979,grad_norm: 0.99999896136239, iteration: 80165
loss: 1.0196199417114258,grad_norm: 0.9681672476107718, iteration: 80166
loss: 0.9786238074302673,grad_norm: 0.9999990483719977, iteration: 80167
loss: 0.9842094779014587,grad_norm: 0.9999990556022919, iteration: 80168
loss: 1.0116854906082153,grad_norm: 0.9999991500388089, iteration: 80169
loss: 0.9850694537162781,grad_norm: 0.9284247462229022, iteration: 80170
loss: 1.0188591480255127,grad_norm: 0.9999998608158587, iteration: 80171
loss: 0.9790598154067993,grad_norm: 0.999999293493839, iteration: 80172
loss: 0.9949266910552979,grad_norm: 0.845254456116875, iteration: 80173
loss: 1.0000860691070557,grad_norm: 0.9999991815219398, iteration: 80174
loss: 0.9959249496459961,grad_norm: 0.9999991227324478, iteration: 80175
loss: 0.9938539266586304,grad_norm: 0.9999990000591021, iteration: 80176
loss: 0.9922319054603577,grad_norm: 0.999999200838014, iteration: 80177
loss: 0.981447696685791,grad_norm: 0.9558103170423792, iteration: 80178
loss: 0.989062488079071,grad_norm: 0.9857662750041581, iteration: 80179
loss: 0.971355140209198,grad_norm: 0.9237700643178363, iteration: 80180
loss: 0.9752262830734253,grad_norm: 0.9139175556791034, iteration: 80181
loss: 1.0187571048736572,grad_norm: 0.8397238906833585, iteration: 80182
loss: 0.9634895324707031,grad_norm: 0.9999992249245839, iteration: 80183
loss: 1.0012075901031494,grad_norm: 0.9999991598617941, iteration: 80184
loss: 0.9652310609817505,grad_norm: 0.9887155124636317, iteration: 80185
loss: 0.9819972515106201,grad_norm: 0.9999990524710012, iteration: 80186
loss: 0.9718132615089417,grad_norm: 0.907005601666713, iteration: 80187
loss: 1.0246692895889282,grad_norm: 0.9999992834769118, iteration: 80188
loss: 0.987379789352417,grad_norm: 0.9388819303716638, iteration: 80189
loss: 0.9959065318107605,grad_norm: 0.9637107854410705, iteration: 80190
loss: 0.9462713003158569,grad_norm: 0.9999992476862708, iteration: 80191
loss: 1.0498777627944946,grad_norm: 0.9822961549800677, iteration: 80192
loss: 0.9733304381370544,grad_norm: 0.9557911605999793, iteration: 80193
loss: 0.9987859129905701,grad_norm: 0.9999990607288032, iteration: 80194
loss: 1.0553278923034668,grad_norm: 0.9999995920684239, iteration: 80195
loss: 0.9771815538406372,grad_norm: 0.9999991670693624, iteration: 80196
loss: 1.0413352251052856,grad_norm: 0.9999991958186605, iteration: 80197
loss: 0.9899494647979736,grad_norm: 0.9999990729372296, iteration: 80198
loss: 0.9932833313941956,grad_norm: 0.9787746862787975, iteration: 80199
loss: 0.9993911385536194,grad_norm: 0.9313672445212406, iteration: 80200
loss: 1.0134137868881226,grad_norm: 0.9999992960767454, iteration: 80201
loss: 1.0120338201522827,grad_norm: 0.9999990394293086, iteration: 80202
loss: 0.9528340697288513,grad_norm: 0.9895492680907879, iteration: 80203
loss: 1.1881635189056396,grad_norm: 0.9999998445699981, iteration: 80204
loss: 1.0075255632400513,grad_norm: 0.9999991880497175, iteration: 80205
loss: 0.999884843826294,grad_norm: 0.9781415498572953, iteration: 80206
loss: 1.0334028005599976,grad_norm: 0.9999996157882884, iteration: 80207
loss: 0.9805070757865906,grad_norm: 0.9336885517387942, iteration: 80208
loss: 1.0351393222808838,grad_norm: 0.9999989856642459, iteration: 80209
loss: 1.033104419708252,grad_norm: 0.9999991219735115, iteration: 80210
loss: 0.9691240191459656,grad_norm: 0.9741735705346881, iteration: 80211
loss: 0.9818429350852966,grad_norm: 0.999998997215315, iteration: 80212
loss: 1.0128391981124878,grad_norm: 0.9317700949854707, iteration: 80213
loss: 1.0412864685058594,grad_norm: 0.9999991955093332, iteration: 80214
loss: 0.9739432334899902,grad_norm: 0.9999991867947753, iteration: 80215
loss: 0.9848267436027527,grad_norm: 0.999999370329345, iteration: 80216
loss: 1.0012191534042358,grad_norm: 0.9999989978352681, iteration: 80217
loss: 0.9586170315742493,grad_norm: 0.9999990747318003, iteration: 80218
loss: 1.0551235675811768,grad_norm: 0.9999999197691228, iteration: 80219
loss: 1.0008002519607544,grad_norm: 0.9999990796986483, iteration: 80220
loss: 0.9685929417610168,grad_norm: 0.8951072994769618, iteration: 80221
loss: 0.9982520341873169,grad_norm: 0.99999907103741, iteration: 80222
loss: 1.0149290561676025,grad_norm: 0.9999991229284474, iteration: 80223
loss: 1.0248615741729736,grad_norm: 0.9999992383868804, iteration: 80224
loss: 1.001421570777893,grad_norm: 0.9999991906754814, iteration: 80225
loss: 0.9900321364402771,grad_norm: 0.9074756218698113, iteration: 80226
loss: 0.9956429600715637,grad_norm: 0.9999990962599526, iteration: 80227
loss: 1.0118056535720825,grad_norm: 0.9811155038042788, iteration: 80228
loss: 1.0233968496322632,grad_norm: 0.9999991235289661, iteration: 80229
loss: 0.9896640777587891,grad_norm: 0.9999990579633514, iteration: 80230
loss: 1.007115364074707,grad_norm: 0.9999991116936846, iteration: 80231
loss: 0.9876124858856201,grad_norm: 0.99200222338245, iteration: 80232
loss: 0.9943077564239502,grad_norm: 0.9999990082873377, iteration: 80233
loss: 1.0184473991394043,grad_norm: 0.9999992327138444, iteration: 80234
loss: 0.9524686336517334,grad_norm: 0.8669941411527905, iteration: 80235
loss: 0.990032970905304,grad_norm: 0.9999992530675137, iteration: 80236
loss: 1.041122317314148,grad_norm: 0.9999992425907339, iteration: 80237
loss: 0.9923320412635803,grad_norm: 0.9751483516370635, iteration: 80238
loss: 0.977209210395813,grad_norm: 0.9980767842704321, iteration: 80239
loss: 1.0061904191970825,grad_norm: 0.9999990981228793, iteration: 80240
loss: 0.9881654381752014,grad_norm: 0.8696774296456997, iteration: 80241
loss: 0.9830564856529236,grad_norm: 0.9999991276351914, iteration: 80242
loss: 1.0322272777557373,grad_norm: 0.9999989609985412, iteration: 80243
loss: 1.0118308067321777,grad_norm: 0.9999991934740122, iteration: 80244
loss: 0.9882091283798218,grad_norm: 0.9227553358783676, iteration: 80245
loss: 0.9990596771240234,grad_norm: 0.9999991415556473, iteration: 80246
loss: 1.0507407188415527,grad_norm: 0.9999992160126545, iteration: 80247
loss: 1.0437982082366943,grad_norm: 0.9999992023663216, iteration: 80248
loss: 1.0018290281295776,grad_norm: 0.8260200145317794, iteration: 80249
loss: 1.012033224105835,grad_norm: 0.9999991903836675, iteration: 80250
loss: 1.0094592571258545,grad_norm: 0.9999992836836622, iteration: 80251
loss: 0.9804511666297913,grad_norm: 0.95319035203828, iteration: 80252
loss: 1.006783127784729,grad_norm: 0.9999992752099623, iteration: 80253
loss: 1.069583535194397,grad_norm: 0.9999992862949324, iteration: 80254
loss: 0.9549594521522522,grad_norm: 0.9999990429385748, iteration: 80255
loss: 1.0226409435272217,grad_norm: 0.9999994217219977, iteration: 80256
loss: 1.009580135345459,grad_norm: 0.9999991513844111, iteration: 80257
loss: 1.0181750059127808,grad_norm: 0.9999996941248545, iteration: 80258
loss: 0.998213529586792,grad_norm: 0.9999990307304997, iteration: 80259
loss: 0.9886807203292847,grad_norm: 0.9999993062934233, iteration: 80260
loss: 1.0650819540023804,grad_norm: 0.9999992526534002, iteration: 80261
loss: 1.0123730897903442,grad_norm: 0.9999989512893118, iteration: 80262
loss: 0.9711160659790039,grad_norm: 0.9999989134283778, iteration: 80263
loss: 1.01559317111969,grad_norm: 0.8231411959915558, iteration: 80264
loss: 1.007670521736145,grad_norm: 0.9999990598507911, iteration: 80265
loss: 0.9472637176513672,grad_norm: 0.8398763920144756, iteration: 80266
loss: 0.9803458452224731,grad_norm: 0.9290874529655639, iteration: 80267
loss: 0.9940997362136841,grad_norm: 0.9999993220223309, iteration: 80268
loss: 1.0289642810821533,grad_norm: 0.9999991579076095, iteration: 80269
loss: 0.9953681826591492,grad_norm: 0.9364494639370454, iteration: 80270
loss: 1.01423180103302,grad_norm: 0.9999990796901723, iteration: 80271
loss: 0.9938459992408752,grad_norm: 0.9999991469181134, iteration: 80272
loss: 0.9914633631706238,grad_norm: 0.9999990732937514, iteration: 80273
loss: 1.0153729915618896,grad_norm: 0.9999992016596088, iteration: 80274
loss: 1.0104079246520996,grad_norm: 0.9999990000674791, iteration: 80275
loss: 1.022035837173462,grad_norm: 0.9999996374728443, iteration: 80276
loss: 1.0332930088043213,grad_norm: 0.9999989908471345, iteration: 80277
loss: 0.9963569045066833,grad_norm: 0.9999991362032004, iteration: 80278
loss: 1.0031238794326782,grad_norm: 0.9547187005847356, iteration: 80279
loss: 0.9616560339927673,grad_norm: 0.9179807709480101, iteration: 80280
loss: 0.9788780808448792,grad_norm: 0.9893977282177289, iteration: 80281
loss: 1.0118255615234375,grad_norm: 0.9999990717722093, iteration: 80282
loss: 1.0088616609573364,grad_norm: 0.999999046854559, iteration: 80283
loss: 1.0077813863754272,grad_norm: 0.9999992045549546, iteration: 80284
loss: 1.0129337310791016,grad_norm: 0.9522984234419059, iteration: 80285
loss: 0.9978882074356079,grad_norm: 0.9999991876173427, iteration: 80286
loss: 0.974079430103302,grad_norm: 0.9999991801134023, iteration: 80287
loss: 1.0037553310394287,grad_norm: 0.9954781906409124, iteration: 80288
loss: 1.0016882419586182,grad_norm: 0.9999990930913709, iteration: 80289
loss: 1.0049859285354614,grad_norm: 0.9457623756233903, iteration: 80290
loss: 1.018601417541504,grad_norm: 0.9914633247452979, iteration: 80291
loss: 0.9824421405792236,grad_norm: 0.9999990657061555, iteration: 80292
loss: 1.0142502784729004,grad_norm: 0.9515954713115338, iteration: 80293
loss: 1.0109138488769531,grad_norm: 0.9999989913814068, iteration: 80294
loss: 0.9789167046546936,grad_norm: 0.9251963115440093, iteration: 80295
loss: 1.0693519115447998,grad_norm: 0.9999999784528707, iteration: 80296
loss: 0.9405669569969177,grad_norm: 0.9307059325715412, iteration: 80297
loss: 1.006861686706543,grad_norm: 0.9999992009281227, iteration: 80298
loss: 0.9976155161857605,grad_norm: 0.9999992746902823, iteration: 80299
loss: 1.0135477781295776,grad_norm: 0.9031945109929441, iteration: 80300
loss: 0.9949331879615784,grad_norm: 0.9588394720920637, iteration: 80301
loss: 1.0202772617340088,grad_norm: 0.9999990205160239, iteration: 80302
loss: 1.0202783346176147,grad_norm: 0.9634575466215529, iteration: 80303
loss: 1.0475947856903076,grad_norm: 0.9999992043653838, iteration: 80304
loss: 1.0234414339065552,grad_norm: 0.9999996865016387, iteration: 80305
loss: 1.0190317630767822,grad_norm: 0.9999990442448861, iteration: 80306
loss: 1.000050663948059,grad_norm: 0.8481202549402754, iteration: 80307
loss: 0.9692041277885437,grad_norm: 0.8868523323230346, iteration: 80308
loss: 0.9847357869148254,grad_norm: 0.9999989459976505, iteration: 80309
loss: 1.0043158531188965,grad_norm: 0.9999991260392422, iteration: 80310
loss: 1.0318979024887085,grad_norm: 0.9741311410597514, iteration: 80311
loss: 1.0008665323257446,grad_norm: 0.8952954509300597, iteration: 80312
loss: 1.007615566253662,grad_norm: 0.9596431664682523, iteration: 80313
loss: 1.003942608833313,grad_norm: 0.9999989983543489, iteration: 80314
loss: 0.9749903082847595,grad_norm: 0.9999992567557002, iteration: 80315
loss: 0.9823107719421387,grad_norm: 0.9999990519385544, iteration: 80316
loss: 1.0061854124069214,grad_norm: 0.9937251039877646, iteration: 80317
loss: 1.0142326354980469,grad_norm: 0.9999991894763887, iteration: 80318
loss: 1.016116976737976,grad_norm: 0.9999993251047613, iteration: 80319
loss: 1.014665961265564,grad_norm: 0.9999991919184014, iteration: 80320
loss: 1.0153310298919678,grad_norm: 0.9656889883760066, iteration: 80321
loss: 1.0091060400009155,grad_norm: 0.9343080649758134, iteration: 80322
loss: 1.0003728866577148,grad_norm: 0.9999990802897809, iteration: 80323
loss: 1.0332438945770264,grad_norm: 0.9152604930632936, iteration: 80324
loss: 1.0169185400009155,grad_norm: 0.999998913502769, iteration: 80325
loss: 0.9743361473083496,grad_norm: 0.9801262250100278, iteration: 80326
loss: 0.9991713762283325,grad_norm: 0.9420335726216065, iteration: 80327
loss: 0.9828891754150391,grad_norm: 0.9999991547724574, iteration: 80328
loss: 0.9970932602882385,grad_norm: 0.8504578254162193, iteration: 80329
loss: 1.00981605052948,grad_norm: 0.9999991438118536, iteration: 80330
loss: 1.0190762281417847,grad_norm: 0.9999992286877262, iteration: 80331
loss: 0.9960238337516785,grad_norm: 0.9999991318707697, iteration: 80332
loss: 1.0194576978683472,grad_norm: 0.9999990572028964, iteration: 80333
loss: 0.9968181252479553,grad_norm: 0.9999991796348586, iteration: 80334
loss: 1.0279607772827148,grad_norm: 0.9890572935817223, iteration: 80335
loss: 1.0153731107711792,grad_norm: 0.9999991478437914, iteration: 80336
loss: 0.9719712734222412,grad_norm: 0.9999991301416652, iteration: 80337
loss: 0.9552109241485596,grad_norm: 0.9999990529015672, iteration: 80338
loss: 0.9857622981071472,grad_norm: 0.7791892098332269, iteration: 80339
loss: 0.9906889796257019,grad_norm: 0.9999991249682071, iteration: 80340
loss: 0.9886009097099304,grad_norm: 0.9643867076234504, iteration: 80341
loss: 0.9758703708648682,grad_norm: 0.9204203114649265, iteration: 80342
loss: 0.955247163772583,grad_norm: 0.9999993227965404, iteration: 80343
loss: 0.9873285293579102,grad_norm: 0.9999992679277165, iteration: 80344
loss: 1.0591928958892822,grad_norm: 0.9999994751916955, iteration: 80345
loss: 0.9830074310302734,grad_norm: 0.9999990666538042, iteration: 80346
loss: 0.9863612651824951,grad_norm: 0.999999183280623, iteration: 80347
loss: 1.0213229656219482,grad_norm: 0.9999991724075202, iteration: 80348
loss: 1.0124180316925049,grad_norm: 0.9999991867480379, iteration: 80349
loss: 0.9833970069885254,grad_norm: 0.9999991791190542, iteration: 80350
loss: 1.0085893869400024,grad_norm: 0.9999991121032622, iteration: 80351
loss: 0.982295036315918,grad_norm: 0.9841464831118628, iteration: 80352
loss: 1.010196328163147,grad_norm: 0.9999992139791283, iteration: 80353
loss: 1.0084826946258545,grad_norm: 0.9999992350524658, iteration: 80354
loss: 0.9994016885757446,grad_norm: 0.9267698861468401, iteration: 80355
loss: 1.0130136013031006,grad_norm: 0.8982091731654036, iteration: 80356
loss: 1.0042372941970825,grad_norm: 0.9204161928454159, iteration: 80357
loss: 0.9901014566421509,grad_norm: 0.9999993109836787, iteration: 80358
loss: 0.9857404232025146,grad_norm: 0.9999990846890313, iteration: 80359
loss: 1.0054610967636108,grad_norm: 0.9999992485417605, iteration: 80360
loss: 0.9862208366394043,grad_norm: 0.9040672579581647, iteration: 80361
loss: 1.0253874063491821,grad_norm: 0.9999992084937271, iteration: 80362
loss: 1.0038468837738037,grad_norm: 0.9198118496351975, iteration: 80363
loss: 0.979051947593689,grad_norm: 0.9999991059985087, iteration: 80364
loss: 0.9648944139480591,grad_norm: 0.9999992167810736, iteration: 80365
loss: 1.067476749420166,grad_norm: 0.9999997543776057, iteration: 80366
loss: 0.9882052540779114,grad_norm: 0.9999990730807543, iteration: 80367
loss: 1.0183037519454956,grad_norm: 0.9999991733046693, iteration: 80368
loss: 0.991132378578186,grad_norm: 0.9999992474149633, iteration: 80369
loss: 1.0081521272659302,grad_norm: 0.9999990193734111, iteration: 80370
loss: 1.0191287994384766,grad_norm: 0.9999990694478268, iteration: 80371
loss: 1.0071488618850708,grad_norm: 0.9999990036322539, iteration: 80372
loss: 0.9962915182113647,grad_norm: 0.9999989399738362, iteration: 80373
loss: 0.9852838516235352,grad_norm: 0.9418974883485507, iteration: 80374
loss: 0.9747557044029236,grad_norm: 0.8972739625901449, iteration: 80375
loss: 1.0211997032165527,grad_norm: 0.9999990814252075, iteration: 80376
loss: 0.9674720764160156,grad_norm: 0.9999990993110426, iteration: 80377
loss: 0.9835336804389954,grad_norm: 0.9999993073019737, iteration: 80378
loss: 1.0294114351272583,grad_norm: 0.9999989496328379, iteration: 80379
loss: 0.9942785501480103,grad_norm: 0.9999991599319681, iteration: 80380
loss: 1.0349892377853394,grad_norm: 0.986247170461112, iteration: 80381
loss: 1.011101245880127,grad_norm: 0.9999992910033764, iteration: 80382
loss: 0.9941964149475098,grad_norm: 0.9999991408016763, iteration: 80383
loss: 1.002988338470459,grad_norm: 0.9999990745887272, iteration: 80384
loss: 0.9947121739387512,grad_norm: 0.9999991684753551, iteration: 80385
loss: 0.9593006372451782,grad_norm: 0.8894189938495335, iteration: 80386
loss: 1.0311747789382935,grad_norm: 0.89742534505552, iteration: 80387
loss: 1.0135581493377686,grad_norm: 0.999999252710487, iteration: 80388
loss: 0.9830783605575562,grad_norm: 0.999999275035961, iteration: 80389
loss: 0.996559202671051,grad_norm: 0.9854395740792683, iteration: 80390
loss: 1.0281685590744019,grad_norm: 0.9999990785669445, iteration: 80391
loss: 0.9953670501708984,grad_norm: 0.9999991710210266, iteration: 80392
loss: 0.9994952082633972,grad_norm: 0.9999990647899146, iteration: 80393
loss: 1.0036816596984863,grad_norm: 0.9845391852916727, iteration: 80394
loss: 1.0267246961593628,grad_norm: 0.9921740204896479, iteration: 80395
loss: 1.0274806022644043,grad_norm: 0.9999990966506935, iteration: 80396
loss: 0.9945759773254395,grad_norm: 0.9999991510824199, iteration: 80397
loss: 1.0175553560256958,grad_norm: 0.9529630882186715, iteration: 80398
loss: 0.9937651753425598,grad_norm: 0.9067549443695622, iteration: 80399
loss: 0.9990690350532532,grad_norm: 0.9668053793074032, iteration: 80400
loss: 1.028165578842163,grad_norm: 0.9254930801512107, iteration: 80401
loss: 0.9761665463447571,grad_norm: 0.8532970869956231, iteration: 80402
loss: 1.0118509531021118,grad_norm: 0.9656318526227683, iteration: 80403
loss: 1.0252183675765991,grad_norm: 0.9999992289739829, iteration: 80404
loss: 0.9922780990600586,grad_norm: 0.9828164722109468, iteration: 80405
loss: 0.9804893732070923,grad_norm: 0.9999990131981683, iteration: 80406
loss: 1.0043494701385498,grad_norm: 0.9999995765923949, iteration: 80407
loss: 1.0126886367797852,grad_norm: 0.9999991797158851, iteration: 80408
loss: 1.0127633810043335,grad_norm: 0.999999433438226, iteration: 80409
loss: 0.9823589324951172,grad_norm: 0.9999995504289173, iteration: 80410
loss: 1.0078376531600952,grad_norm: 0.8959388137830336, iteration: 80411
loss: 0.9875547289848328,grad_norm: 0.9999999625875317, iteration: 80412
loss: 0.9733347296714783,grad_norm: 0.9573856354315049, iteration: 80413
loss: 1.0064609050750732,grad_norm: 0.8953819816641208, iteration: 80414
loss: 0.9859308004379272,grad_norm: 0.9972726591263333, iteration: 80415
loss: 0.9991287589073181,grad_norm: 0.9999991085778357, iteration: 80416
loss: 1.0040135383605957,grad_norm: 0.999999217027474, iteration: 80417
loss: 1.002616286277771,grad_norm: 0.9999991786247179, iteration: 80418
loss: 0.9820660948753357,grad_norm: 0.9999990396798472, iteration: 80419
loss: 0.9884653091430664,grad_norm: 0.997641514831751, iteration: 80420
loss: 0.9864938259124756,grad_norm: 0.9999989358791443, iteration: 80421
loss: 0.9777114987373352,grad_norm: 0.9999990797352697, iteration: 80422
loss: 1.000160813331604,grad_norm: 0.845174170662023, iteration: 80423
loss: 1.0343137979507446,grad_norm: 0.9999991807413963, iteration: 80424
loss: 1.0210875272750854,grad_norm: 0.9999993449922316, iteration: 80425
loss: 1.0224589109420776,grad_norm: 0.9999992183806885, iteration: 80426
loss: 1.0073882341384888,grad_norm: 0.9999993423145572, iteration: 80427
loss: 1.0086225271224976,grad_norm: 0.9999991973975785, iteration: 80428
loss: 1.0195763111114502,grad_norm: 0.9999992700650266, iteration: 80429
loss: 1.0165899991989136,grad_norm: 0.9999991580049382, iteration: 80430
loss: 0.9767099618911743,grad_norm: 0.966861084475283, iteration: 80431
loss: 1.0156444311141968,grad_norm: 0.9999990961286895, iteration: 80432
loss: 1.0011721849441528,grad_norm: 0.9999990498353342, iteration: 80433
loss: 0.995211124420166,grad_norm: 0.9390816690130237, iteration: 80434
loss: 0.9978375434875488,grad_norm: 0.9999990255481166, iteration: 80435
loss: 1.0189374685287476,grad_norm: 0.9999991493864916, iteration: 80436
loss: 0.9730836749076843,grad_norm: 0.9999991264739738, iteration: 80437
loss: 1.0178452730178833,grad_norm: 0.9999991045028284, iteration: 80438
loss: 0.997432291507721,grad_norm: 0.9999992377167128, iteration: 80439
loss: 1.0145725011825562,grad_norm: 0.9341782976134626, iteration: 80440
loss: 0.9719781875610352,grad_norm: 0.9431165667619856, iteration: 80441
loss: 1.0053004026412964,grad_norm: 0.9999992819932932, iteration: 80442
loss: 1.0037678480148315,grad_norm: 0.9999989944838605, iteration: 80443
loss: 0.9799968600273132,grad_norm: 0.9999990504440343, iteration: 80444
loss: 0.9963420033454895,grad_norm: 0.9999990486194976, iteration: 80445
loss: 1.0191055536270142,grad_norm: 0.9999991146271308, iteration: 80446
loss: 0.977033793926239,grad_norm: 0.9999991276796754, iteration: 80447
loss: 0.9879454970359802,grad_norm: 0.9820808719255588, iteration: 80448
loss: 0.9670007824897766,grad_norm: 0.999999134741707, iteration: 80449
loss: 0.9961022138595581,grad_norm: 0.9584118294857075, iteration: 80450
loss: 0.9929051399230957,grad_norm: 0.9999990008689273, iteration: 80451
loss: 0.9970877766609192,grad_norm: 0.9782514420226839, iteration: 80452
loss: 1.0028663873672485,grad_norm: 0.8596438630463856, iteration: 80453
loss: 0.9460879564285278,grad_norm: 0.98650733168383, iteration: 80454
loss: 0.9906390309333801,grad_norm: 0.8990834457495457, iteration: 80455
loss: 1.0279362201690674,grad_norm: 0.9999990188656523, iteration: 80456
loss: 0.9818434119224548,grad_norm: 0.9999990114599855, iteration: 80457
loss: 1.007484793663025,grad_norm: 0.9999991429141908, iteration: 80458
loss: 1.0116068124771118,grad_norm: 0.9999990225365866, iteration: 80459
loss: 0.9691142439842224,grad_norm: 0.9999991560890189, iteration: 80460
loss: 1.0156702995300293,grad_norm: 0.9870214266626042, iteration: 80461
loss: 0.9954123497009277,grad_norm: 0.9649633550361254, iteration: 80462
loss: 1.003473162651062,grad_norm: 0.9595978449006751, iteration: 80463
loss: 1.0162965059280396,grad_norm: 0.9389850462773469, iteration: 80464
loss: 1.0004770755767822,grad_norm: 0.9999992616679905, iteration: 80465
loss: 0.989525318145752,grad_norm: 0.9999991962502176, iteration: 80466
loss: 0.9951514601707458,grad_norm: 0.9999992596692131, iteration: 80467
loss: 0.9785589575767517,grad_norm: 0.9838344089787233, iteration: 80468
loss: 1.004823088645935,grad_norm: 0.9286582562864482, iteration: 80469
loss: 1.0138121843338013,grad_norm: 0.9999991299025144, iteration: 80470
loss: 0.9983167052268982,grad_norm: 0.9999990259845213, iteration: 80471
loss: 0.9933589696884155,grad_norm: 0.9999990846841076, iteration: 80472
loss: 1.0325841903686523,grad_norm: 0.9999992326092353, iteration: 80473
loss: 0.9785346388816833,grad_norm: 0.9999989880913994, iteration: 80474
loss: 1.0289297103881836,grad_norm: 0.9557638785960609, iteration: 80475
loss: 0.9946879744529724,grad_norm: 0.9999991146531991, iteration: 80476
loss: 0.9978561997413635,grad_norm: 0.9204599826935093, iteration: 80477
loss: 1.0086920261383057,grad_norm: 0.9999991054853068, iteration: 80478
loss: 1.057277798652649,grad_norm: 0.9999988398394488, iteration: 80479
loss: 1.0163525342941284,grad_norm: 0.9408973560891148, iteration: 80480
loss: 1.002366542816162,grad_norm: 0.9153380097893985, iteration: 80481
loss: 1.019486665725708,grad_norm: 0.9673415655192303, iteration: 80482
loss: 1.0053428411483765,grad_norm: 0.999999007414567, iteration: 80483
loss: 1.0103286504745483,grad_norm: 0.9803964619735254, iteration: 80484
loss: 1.0228676795959473,grad_norm: 0.8556093539370488, iteration: 80485
loss: 1.004583716392517,grad_norm: 0.999999045117497, iteration: 80486
loss: 0.9829182624816895,grad_norm: 0.9999990230073971, iteration: 80487
loss: 0.9724749326705933,grad_norm: 0.9786303162092033, iteration: 80488
loss: 1.044695496559143,grad_norm: 0.9999992265227549, iteration: 80489
loss: 1.0140330791473389,grad_norm: 0.9177697847889409, iteration: 80490
loss: 0.9938660860061646,grad_norm: 0.8756340650700352, iteration: 80491
loss: 1.00709867477417,grad_norm: 0.9999992726276273, iteration: 80492
loss: 1.0060807466506958,grad_norm: 0.9960890540145991, iteration: 80493
loss: 1.011864185333252,grad_norm: 0.9999993491035437, iteration: 80494
loss: 0.9567097425460815,grad_norm: 0.9999991441714324, iteration: 80495
loss: 1.0295977592468262,grad_norm: 0.9999993049338312, iteration: 80496
loss: 1.0137615203857422,grad_norm: 0.8200627955084681, iteration: 80497
loss: 1.0181739330291748,grad_norm: 0.9999991876236838, iteration: 80498
loss: 1.001388430595398,grad_norm: 0.9999989786737131, iteration: 80499
loss: 0.9980929493904114,grad_norm: 0.9999991805338247, iteration: 80500
loss: 1.0050561428070068,grad_norm: 0.9999990697086132, iteration: 80501
loss: 1.0341562032699585,grad_norm: 0.9579750415093017, iteration: 80502
loss: 0.9777664542198181,grad_norm: 0.9999992879110315, iteration: 80503
loss: 1.00434148311615,grad_norm: 0.9372324249461987, iteration: 80504
loss: 0.9685279130935669,grad_norm: 0.9999990523327918, iteration: 80505
loss: 1.0159213542938232,grad_norm: 0.9494202688014887, iteration: 80506
loss: 1.0298610925674438,grad_norm: 0.9999995536355089, iteration: 80507
loss: 0.9783005118370056,grad_norm: 0.9399927396410879, iteration: 80508
loss: 1.0231982469558716,grad_norm: 0.9999991652740818, iteration: 80509
loss: 0.9592662453651428,grad_norm: 0.9999991902360059, iteration: 80510
loss: 0.9813803434371948,grad_norm: 0.8366905478237616, iteration: 80511
loss: 0.9989274144172668,grad_norm: 0.9999991079619203, iteration: 80512
loss: 1.008334994316101,grad_norm: 0.9471893142629432, iteration: 80513
loss: 1.0110220909118652,grad_norm: 0.9999992090045676, iteration: 80514
loss: 1.0130043029785156,grad_norm: 0.9999991271689291, iteration: 80515
loss: 1.01454496383667,grad_norm: 0.9034374038596737, iteration: 80516
loss: 0.9807663559913635,grad_norm: 0.9999990793144777, iteration: 80517
loss: 1.0071419477462769,grad_norm: 0.9999990785797371, iteration: 80518
loss: 0.9803336262702942,grad_norm: 0.9999991352801256, iteration: 80519
loss: 0.9900286793708801,grad_norm: 0.9999990499213052, iteration: 80520
loss: 0.9943091869354248,grad_norm: 0.9528349005429101, iteration: 80521
loss: 1.013108730316162,grad_norm: 0.9999991091715515, iteration: 80522
loss: 1.0080112218856812,grad_norm: 0.9999992843571991, iteration: 80523
loss: 1.0159307718276978,grad_norm: 0.9999991194110286, iteration: 80524
loss: 0.9868260025978088,grad_norm: 0.9933840491222142, iteration: 80525
loss: 0.9870133996009827,grad_norm: 0.999998943106296, iteration: 80526
loss: 1.0494612455368042,grad_norm: 0.9999995832520129, iteration: 80527
loss: 1.0168262720108032,grad_norm: 0.9999991422425722, iteration: 80528
loss: 1.0007271766662598,grad_norm: 0.9999991625569761, iteration: 80529
loss: 0.9907735586166382,grad_norm: 0.9970947501966265, iteration: 80530
loss: 1.0211212635040283,grad_norm: 0.9100202110120369, iteration: 80531
loss: 0.9931926727294922,grad_norm: 0.961473760983052, iteration: 80532
loss: 1.0222340822219849,grad_norm: 0.9898540558063695, iteration: 80533
loss: 1.0319393873214722,grad_norm: 0.9999991037039686, iteration: 80534
loss: 0.9700360894203186,grad_norm: 0.9999994204970132, iteration: 80535
loss: 1.0199558734893799,grad_norm: 0.939721448538486, iteration: 80536
loss: 0.967380166053772,grad_norm: 0.9999991609966293, iteration: 80537
loss: 1.0173590183258057,grad_norm: 0.906829259785241, iteration: 80538
loss: 1.0158288478851318,grad_norm: 0.9999991891934796, iteration: 80539
loss: 1.0364675521850586,grad_norm: 0.9999992744474635, iteration: 80540
loss: 1.0522992610931396,grad_norm: 0.9999991943209866, iteration: 80541
loss: 0.9825583100318909,grad_norm: 0.9999991080859348, iteration: 80542
loss: 0.9907205700874329,grad_norm: 0.9999991962999802, iteration: 80543
loss: 0.9901832342147827,grad_norm: 0.9999991352136316, iteration: 80544
loss: 1.0394598245620728,grad_norm: 0.9653808209009209, iteration: 80545
loss: 0.9853243827819824,grad_norm: 0.9999990954139412, iteration: 80546
loss: 0.9641192555427551,grad_norm: 0.9999991571289716, iteration: 80547
loss: 0.9682623147964478,grad_norm: 0.9999990954917625, iteration: 80548
loss: 1.0070772171020508,grad_norm: 0.9999993297458354, iteration: 80549
loss: 1.0080525875091553,grad_norm: 0.9260668913475003, iteration: 80550
loss: 1.0109753608703613,grad_norm: 0.9999991616988229, iteration: 80551
loss: 0.9829638004302979,grad_norm: 0.8928231276612373, iteration: 80552
loss: 1.0159924030303955,grad_norm: 0.9999992621161162, iteration: 80553
loss: 0.9955161213874817,grad_norm: 0.999999218643883, iteration: 80554
loss: 1.0075417757034302,grad_norm: 0.9999990451813351, iteration: 80555
loss: 0.9883472323417664,grad_norm: 0.9976423292298604, iteration: 80556
loss: 0.9555255174636841,grad_norm: 0.93032519728135, iteration: 80557
loss: 0.99986732006073,grad_norm: 0.9999990544438251, iteration: 80558
loss: 1.0042527914047241,grad_norm: 0.9999992355105967, iteration: 80559
loss: 1.0195127725601196,grad_norm: 0.9999991466061853, iteration: 80560
loss: 1.0878870487213135,grad_norm: 0.9999993531089713, iteration: 80561
loss: 0.9392172694206238,grad_norm: 0.9999992360305775, iteration: 80562
loss: 0.9941350221633911,grad_norm: 0.9859896019932306, iteration: 80563
loss: 1.0030603408813477,grad_norm: 0.9999992422720912, iteration: 80564
loss: 1.0073598623275757,grad_norm: 0.9999992077578689, iteration: 80565
loss: 0.9322599768638611,grad_norm: 0.9999991108726929, iteration: 80566
loss: 1.0094380378723145,grad_norm: 0.9999990288227324, iteration: 80567
loss: 0.9635782241821289,grad_norm: 0.9971336639034746, iteration: 80568
loss: 0.9567776322364807,grad_norm: 0.9999990124234868, iteration: 80569
loss: 1.028603434562683,grad_norm: 0.9999992253617148, iteration: 80570
loss: 1.014238953590393,grad_norm: 0.9697890098911894, iteration: 80571
loss: 0.9695525765419006,grad_norm: 0.9999991261378962, iteration: 80572
loss: 0.9835188388824463,grad_norm: 0.9999989406405239, iteration: 80573
loss: 0.9897484183311462,grad_norm: 0.9027000461332907, iteration: 80574
loss: 1.0312715768814087,grad_norm: 0.9899449456027729, iteration: 80575
loss: 0.9974042177200317,grad_norm: 0.9999989524085998, iteration: 80576
loss: 0.9659603834152222,grad_norm: 0.9703956684251748, iteration: 80577
loss: 0.993004322052002,grad_norm: 0.9606534754430835, iteration: 80578
loss: 1.0055909156799316,grad_norm: 0.8756843315928244, iteration: 80579
loss: 1.002274751663208,grad_norm: 0.882690830447136, iteration: 80580
loss: 0.9766577482223511,grad_norm: 0.9999991263997458, iteration: 80581
loss: 0.999664843082428,grad_norm: 0.9999990649611938, iteration: 80582
loss: 1.0219279527664185,grad_norm: 0.9999991632784527, iteration: 80583
loss: 0.9890701174736023,grad_norm: 0.984116765168019, iteration: 80584
loss: 0.9584082961082458,grad_norm: 0.9628278521429378, iteration: 80585
loss: 1.0200622081756592,grad_norm: 0.9999995553344597, iteration: 80586
loss: 1.027694582939148,grad_norm: 0.9999992395477261, iteration: 80587
loss: 0.9939612746238708,grad_norm: 0.9999992754340045, iteration: 80588
loss: 1.007997989654541,grad_norm: 0.9999991489833918, iteration: 80589
loss: 0.9933347702026367,grad_norm: 0.9866465484116624, iteration: 80590
loss: 1.0083750486373901,grad_norm: 0.8908534195307712, iteration: 80591
loss: 0.993185818195343,grad_norm: 0.8129265876042594, iteration: 80592
loss: 0.9792775511741638,grad_norm: 0.9745337706148881, iteration: 80593
loss: 1.0876214504241943,grad_norm: 0.9999998576954842, iteration: 80594
loss: 0.9589146971702576,grad_norm: 0.9853034679129202, iteration: 80595
loss: 1.0170626640319824,grad_norm: 0.9999991038879303, iteration: 80596
loss: 0.9965635538101196,grad_norm: 0.9999992251478292, iteration: 80597
loss: 1.0263245105743408,grad_norm: 0.9999993770994469, iteration: 80598
loss: 1.0201654434204102,grad_norm: 0.7760236466092326, iteration: 80599
loss: 1.001399278640747,grad_norm: 0.9999992106880222, iteration: 80600
loss: 0.9807327389717102,grad_norm: 0.909843514277018, iteration: 80601
loss: 1.0323371887207031,grad_norm: 0.9999991039311422, iteration: 80602
loss: 0.9877789616584778,grad_norm: 0.9999991672461604, iteration: 80603
loss: 1.0167841911315918,grad_norm: 0.84514812752286, iteration: 80604
loss: 0.9919425845146179,grad_norm: 0.9500861213569155, iteration: 80605
loss: 1.0069905519485474,grad_norm: 0.9222438955860497, iteration: 80606
loss: 1.0055606365203857,grad_norm: 0.9999992690701357, iteration: 80607
loss: 0.978748083114624,grad_norm: 0.9999991754483843, iteration: 80608
loss: 1.016496181488037,grad_norm: 0.9999993082786457, iteration: 80609
loss: 1.048519492149353,grad_norm: 0.9999998161802569, iteration: 80610
loss: 0.9969124794006348,grad_norm: 0.9999989784863572, iteration: 80611
loss: 1.0345560312271118,grad_norm: 0.9999989632027464, iteration: 80612
loss: 1.043809175491333,grad_norm: 0.9999994147196544, iteration: 80613
loss: 1.0012404918670654,grad_norm: 0.9999990252985129, iteration: 80614
loss: 0.9860290288925171,grad_norm: 0.9999990910540141, iteration: 80615
loss: 1.0133017301559448,grad_norm: 0.9999991956643418, iteration: 80616
loss: 0.9912699460983276,grad_norm: 0.8907248811641015, iteration: 80617
loss: 1.0243473052978516,grad_norm: 0.9999991179489095, iteration: 80618
loss: 0.9698073863983154,grad_norm: 0.973277241024858, iteration: 80619
loss: 0.9939351081848145,grad_norm: 0.9999989925588502, iteration: 80620
loss: 0.9691594243049622,grad_norm: 0.9999990993513672, iteration: 80621
loss: 0.9935562610626221,grad_norm: 0.9999991192027577, iteration: 80622
loss: 1.0153785943984985,grad_norm: 0.9999992593679665, iteration: 80623
loss: 0.9884263873100281,grad_norm: 0.9999990427786545, iteration: 80624
loss: 0.9803121089935303,grad_norm: 0.9999991093115147, iteration: 80625
loss: 1.0147267580032349,grad_norm: 0.9258485302762456, iteration: 80626
loss: 1.0527321100234985,grad_norm: 0.9999991083002312, iteration: 80627
loss: 0.9993856549263,grad_norm: 0.9999991260891911, iteration: 80628
loss: 1.019262433052063,grad_norm: 0.9999992112967159, iteration: 80629
loss: 1.0006834268569946,grad_norm: 0.9999991478439845, iteration: 80630
loss: 1.030725121498108,grad_norm: 0.999999428688925, iteration: 80631
loss: 1.0107617378234863,grad_norm: 0.9999990874042471, iteration: 80632
loss: 0.9885688424110413,grad_norm: 0.9999991690034948, iteration: 80633
loss: 0.9808672666549683,grad_norm: 0.992681739750641, iteration: 80634
loss: 1.041399359703064,grad_norm: 0.9999997722558153, iteration: 80635
loss: 1.0245368480682373,grad_norm: 0.99999919461301, iteration: 80636
loss: 1.0413533449172974,grad_norm: 0.9519017959106295, iteration: 80637
loss: 0.9606011509895325,grad_norm: 0.9999990665401083, iteration: 80638
loss: 0.9675565958023071,grad_norm: 0.9999990856989857, iteration: 80639
loss: 0.959396243095398,grad_norm: 0.999999129754001, iteration: 80640
loss: 0.9990653991699219,grad_norm: 0.9999991023743211, iteration: 80641
loss: 0.9845583438873291,grad_norm: 0.9999994965537408, iteration: 80642
loss: 0.9903411865234375,grad_norm: 0.9033637860995595, iteration: 80643
loss: 0.9813100099563599,grad_norm: 0.999999188863873, iteration: 80644
loss: 0.9715096354484558,grad_norm: 0.9999990531271448, iteration: 80645
loss: 1.0014574527740479,grad_norm: 0.9999991964307849, iteration: 80646
loss: 0.9881911277770996,grad_norm: 0.9999992148239022, iteration: 80647
loss: 1.0237997770309448,grad_norm: 0.9999992207426586, iteration: 80648
loss: 1.033791184425354,grad_norm: 0.9999993761495904, iteration: 80649
loss: 0.9739309549331665,grad_norm: 0.9999991734272927, iteration: 80650
loss: 1.005864143371582,grad_norm: 0.9999992286290816, iteration: 80651
loss: 0.9831412434577942,grad_norm: 0.999999347399473, iteration: 80652
loss: 1.0137099027633667,grad_norm: 0.9999990610285229, iteration: 80653
loss: 1.0356781482696533,grad_norm: 0.9999994697522707, iteration: 80654
loss: 1.0245825052261353,grad_norm: 0.9999990369505417, iteration: 80655
loss: 0.9507970213890076,grad_norm: 0.9361131764953323, iteration: 80656
loss: 0.997137188911438,grad_norm: 0.8476022057973126, iteration: 80657
loss: 0.9984121322631836,grad_norm: 0.9999991038916857, iteration: 80658
loss: 1.006615400314331,grad_norm: 0.9999993117858377, iteration: 80659
loss: 1.0046898126602173,grad_norm: 0.9999991009848334, iteration: 80660
loss: 0.988286018371582,grad_norm: 0.9999991187292027, iteration: 80661
loss: 1.0007413625717163,grad_norm: 0.9999992835743374, iteration: 80662
loss: 0.9727217555046082,grad_norm: 0.8975748171977481, iteration: 80663
loss: 1.019737720489502,grad_norm: 0.9999991538971607, iteration: 80664
loss: 1.0197632312774658,grad_norm: 0.9999991557543252, iteration: 80665
loss: 0.984414279460907,grad_norm: 0.9999991064088458, iteration: 80666
loss: 1.0011619329452515,grad_norm: 0.9726150593418899, iteration: 80667
loss: 0.9582470059394836,grad_norm: 0.9999991510836721, iteration: 80668
loss: 1.008634328842163,grad_norm: 0.9999990388822037, iteration: 80669
loss: 1.000514030456543,grad_norm: 0.9941143431672698, iteration: 80670
loss: 1.020234227180481,grad_norm: 0.8844011401344942, iteration: 80671
loss: 0.9783516526222229,grad_norm: 0.8967781511221444, iteration: 80672
loss: 1.0113751888275146,grad_norm: 0.9999994208759287, iteration: 80673
loss: 1.0383166074752808,grad_norm: 0.9606538537375402, iteration: 80674
loss: 0.9719544053077698,grad_norm: 0.9820799535999757, iteration: 80675
loss: 1.0190138816833496,grad_norm: 0.9288661296355789, iteration: 80676
loss: 1.0163962841033936,grad_norm: 0.9999989973221057, iteration: 80677
loss: 0.990222692489624,grad_norm: 0.9999991214503433, iteration: 80678
loss: 1.0201319456100464,grad_norm: 0.999999272965277, iteration: 80679
loss: 1.0139546394348145,grad_norm: 0.9999992153656158, iteration: 80680
loss: 0.9750750660896301,grad_norm: 0.9999990110569092, iteration: 80681
loss: 1.0037819147109985,grad_norm: 0.9999990339010942, iteration: 80682
loss: 1.0456446409225464,grad_norm: 0.9999992731550961, iteration: 80683
loss: 0.9765564203262329,grad_norm: 0.9999991007845151, iteration: 80684
loss: 0.974665641784668,grad_norm: 0.9999991681023112, iteration: 80685
loss: 0.9737799167633057,grad_norm: 0.9999996849637092, iteration: 80686
loss: 0.9908900856971741,grad_norm: 0.9464349747328094, iteration: 80687
loss: 0.9963050484657288,grad_norm: 0.9999990373700698, iteration: 80688
loss: 0.9899080991744995,grad_norm: 0.9999990833813945, iteration: 80689
loss: 1.0582149028778076,grad_norm: 0.999999664195664, iteration: 80690
loss: 0.994561493396759,grad_norm: 0.9999991748626462, iteration: 80691
loss: 0.9955818057060242,grad_norm: 0.9999989107673007, iteration: 80692
loss: 1.0466865301132202,grad_norm: 0.9999996537425738, iteration: 80693
loss: 1.0209294557571411,grad_norm: 0.9999993663807445, iteration: 80694
loss: 0.9900031685829163,grad_norm: 0.8413534081738501, iteration: 80695
loss: 0.9791653752326965,grad_norm: 0.9999991733942711, iteration: 80696
loss: 0.9966976046562195,grad_norm: 0.9508496537074551, iteration: 80697
loss: 1.0015252828598022,grad_norm: 0.9877619050686318, iteration: 80698
loss: 1.028588891029358,grad_norm: 0.9578557593976954, iteration: 80699
loss: 0.9810256958007812,grad_norm: 0.8700343585538397, iteration: 80700
loss: 0.9850402474403381,grad_norm: 0.9994997976471951, iteration: 80701
loss: 0.9946441650390625,grad_norm: 0.8983663196382726, iteration: 80702
loss: 0.9930666089057922,grad_norm: 0.9999991992348135, iteration: 80703
loss: 1.0256011486053467,grad_norm: 0.9999990445220701, iteration: 80704
loss: 0.994134247303009,grad_norm: 0.9999990030513691, iteration: 80705
loss: 1.02899968624115,grad_norm: 0.9999992822196291, iteration: 80706
loss: 1.003179669380188,grad_norm: 0.9999992010430327, iteration: 80707
loss: 0.99417644739151,grad_norm: 0.9999991605358111, iteration: 80708
loss: 1.0002983808517456,grad_norm: 0.9999989761801368, iteration: 80709
loss: 1.0641992092132568,grad_norm: 0.9999992653693569, iteration: 80710
loss: 1.0045839548110962,grad_norm: 0.9999991028306487, iteration: 80711
loss: 0.9754706025123596,grad_norm: 0.9999990607435937, iteration: 80712
loss: 1.0175153017044067,grad_norm: 0.9999991587491573, iteration: 80713
loss: 0.990606427192688,grad_norm: 0.9667029222891169, iteration: 80714
loss: 0.9883797764778137,grad_norm: 0.999999180920867, iteration: 80715
loss: 0.9593688249588013,grad_norm: 0.9421971071673088, iteration: 80716
loss: 0.9848926663398743,grad_norm: 0.9999990440670028, iteration: 80717
loss: 1.0207301378250122,grad_norm: 0.9999993175916752, iteration: 80718
loss: 1.0282057523727417,grad_norm: 0.9999990765889564, iteration: 80719
loss: 0.9892973303794861,grad_norm: 0.9999991101316282, iteration: 80720
loss: 1.0149281024932861,grad_norm: 0.9753705348876027, iteration: 80721
loss: 1.0365840196609497,grad_norm: 0.9999993217358082, iteration: 80722
loss: 1.001518726348877,grad_norm: 0.9997624771947449, iteration: 80723
loss: 1.019190788269043,grad_norm: 0.9999991938588758, iteration: 80724
loss: 0.980358898639679,grad_norm: 0.999999019645318, iteration: 80725
loss: 1.0173429250717163,grad_norm: 0.9999991897313628, iteration: 80726
loss: 0.9956369400024414,grad_norm: 0.9999991472317028, iteration: 80727
loss: 0.9929943084716797,grad_norm: 0.999999059589443, iteration: 80728
loss: 1.0256330966949463,grad_norm: 0.8544298155987725, iteration: 80729
loss: 1.006148099899292,grad_norm: 0.999999111621022, iteration: 80730
loss: 1.028108835220337,grad_norm: 0.9999990525686676, iteration: 80731
loss: 0.9822424054145813,grad_norm: 0.9250374345703427, iteration: 80732
loss: 1.006361961364746,grad_norm: 0.9999996099973153, iteration: 80733
loss: 1.0275098085403442,grad_norm: 0.9077442456283189, iteration: 80734
loss: 1.0235326290130615,grad_norm: 0.9999991112454556, iteration: 80735
loss: 1.0250492095947266,grad_norm: 0.9999990964867893, iteration: 80736
loss: 0.9756984114646912,grad_norm: 0.9999991544705297, iteration: 80737
loss: 0.9718874096870422,grad_norm: 0.9878507078717998, iteration: 80738
loss: 1.0026768445968628,grad_norm: 0.8971766430746587, iteration: 80739
loss: 1.0142892599105835,grad_norm: 0.9999989508866638, iteration: 80740
loss: 1.021807312965393,grad_norm: 0.9999991152556967, iteration: 80741
loss: 1.0305200815200806,grad_norm: 0.9999997905671854, iteration: 80742
loss: 1.0088610649108887,grad_norm: 0.999999115133748, iteration: 80743
loss: 0.9807072281837463,grad_norm: 0.8830889073855337, iteration: 80744
loss: 1.0650838613510132,grad_norm: 0.999999768300886, iteration: 80745
loss: 1.042398452758789,grad_norm: 0.9999993055885382, iteration: 80746
loss: 1.013163685798645,grad_norm: 0.9999990586714349, iteration: 80747
loss: 0.9856442809104919,grad_norm: 0.734689288265105, iteration: 80748
loss: 0.9881980419158936,grad_norm: 0.999999150774313, iteration: 80749
loss: 1.0014601945877075,grad_norm: 0.9999991580948884, iteration: 80750
loss: 0.978546679019928,grad_norm: 0.9999992154536559, iteration: 80751
loss: 0.9976968765258789,grad_norm: 0.9999991015386046, iteration: 80752
loss: 1.0032227039337158,grad_norm: 0.9118341428691036, iteration: 80753
loss: 0.974181056022644,grad_norm: 0.999999039146163, iteration: 80754
loss: 1.0179803371429443,grad_norm: 0.9999990329152595, iteration: 80755
loss: 1.0120410919189453,grad_norm: 0.9999991622375631, iteration: 80756
loss: 0.9465228915214539,grad_norm: 0.9999990957723117, iteration: 80757
loss: 0.9785809516906738,grad_norm: 0.9605902204227386, iteration: 80758
loss: 0.9962065815925598,grad_norm: 0.9999991417147096, iteration: 80759
loss: 1.0085737705230713,grad_norm: 0.9525898919757267, iteration: 80760
loss: 1.0411784648895264,grad_norm: 0.9461027314044036, iteration: 80761
loss: 1.0277067422866821,grad_norm: 0.988483889613997, iteration: 80762
loss: 0.9859441518783569,grad_norm: 0.999999091770119, iteration: 80763
loss: 1.0527065992355347,grad_norm: 0.9999993214144669, iteration: 80764
loss: 1.022220253944397,grad_norm: 0.9414332815668466, iteration: 80765
loss: 1.0279128551483154,grad_norm: 0.9658259087644809, iteration: 80766
loss: 0.9983628988265991,grad_norm: 0.907148336329452, iteration: 80767
loss: 1.051087498664856,grad_norm: 0.9999993815921355, iteration: 80768
loss: 0.9571232795715332,grad_norm: 0.9999992009311923, iteration: 80769
loss: 0.9972735643386841,grad_norm: 0.999999179067446, iteration: 80770
loss: 1.0233250856399536,grad_norm: 0.9128104961081187, iteration: 80771
loss: 1.0171445608139038,grad_norm: 0.9999990515585391, iteration: 80772
loss: 1.0074551105499268,grad_norm: 0.9999988716867415, iteration: 80773
loss: 1.0359190702438354,grad_norm: 0.9999992038780187, iteration: 80774
loss: 0.9809908866882324,grad_norm: 0.9999991552331479, iteration: 80775
loss: 1.00327730178833,grad_norm: 0.9999991741297867, iteration: 80776
loss: 1.0248533487319946,grad_norm: 0.99999911558412, iteration: 80777
loss: 1.0646812915802002,grad_norm: 0.9999995666692097, iteration: 80778
loss: 0.9700373411178589,grad_norm: 0.9999992754756909, iteration: 80779
loss: 1.017645001411438,grad_norm: 0.9999991397377958, iteration: 80780
loss: 0.9817433953285217,grad_norm: 0.9999990003659097, iteration: 80781
loss: 0.990058183670044,grad_norm: 0.9999991904803431, iteration: 80782
loss: 1.0010398626327515,grad_norm: 0.9999993106514357, iteration: 80783
loss: 0.9862000346183777,grad_norm: 0.9240598824273534, iteration: 80784
loss: 1.0091062784194946,grad_norm: 0.9999991830903977, iteration: 80785
loss: 0.9999445080757141,grad_norm: 0.9999991668990476, iteration: 80786
loss: 1.0601484775543213,grad_norm: 0.9999992084787056, iteration: 80787
loss: 1.000247597694397,grad_norm: 0.9999990544110766, iteration: 80788
loss: 1.0183002948760986,grad_norm: 0.999998937814696, iteration: 80789
loss: 0.9872720241546631,grad_norm: 0.999999070377475, iteration: 80790
loss: 1.0068244934082031,grad_norm: 0.9999991414278566, iteration: 80791
loss: 0.9958182573318481,grad_norm: 0.9999992231346801, iteration: 80792
loss: 0.9718064665794373,grad_norm: 0.9999989507898527, iteration: 80793
loss: 0.9716039896011353,grad_norm: 0.9999991731183921, iteration: 80794
loss: 0.9809975028038025,grad_norm: 0.999999095307813, iteration: 80795
loss: 1.0157171487808228,grad_norm: 0.999999008322711, iteration: 80796
loss: 1.0069001913070679,grad_norm: 0.9999989390483945, iteration: 80797
loss: 1.0252604484558105,grad_norm: 0.9999992781300905, iteration: 80798
loss: 1.0042455196380615,grad_norm: 0.9999990215350988, iteration: 80799
loss: 1.000005841255188,grad_norm: 0.9999991488524956, iteration: 80800
loss: 1.0251798629760742,grad_norm: 0.9999990631847323, iteration: 80801
loss: 0.9524941444396973,grad_norm: 0.7745686776268974, iteration: 80802
loss: 0.9753150343894958,grad_norm: 0.9999991386655867, iteration: 80803
loss: 1.0012346506118774,grad_norm: 0.9380918569156314, iteration: 80804
loss: 0.9775953888893127,grad_norm: 0.9999991081463439, iteration: 80805
loss: 0.9277514219284058,grad_norm: 0.9999991990964383, iteration: 80806
loss: 1.0058677196502686,grad_norm: 0.9063255837054592, iteration: 80807
loss: 0.989427924156189,grad_norm: 0.9481648077223347, iteration: 80808
loss: 1.0268924236297607,grad_norm: 0.9308686784202678, iteration: 80809
loss: 0.9623637795448303,grad_norm: 0.9999989871664459, iteration: 80810
loss: 1.046380639076233,grad_norm: 0.9999993427979247, iteration: 80811
loss: 1.0169349908828735,grad_norm: 0.8195705779317027, iteration: 80812
loss: 0.9778003692626953,grad_norm: 0.9999994058863485, iteration: 80813
loss: 1.0127507448196411,grad_norm: 0.9999991507430724, iteration: 80814
loss: 1.0136891603469849,grad_norm: 0.9277010244999517, iteration: 80815
loss: 0.9824874997138977,grad_norm: 0.9517967255274764, iteration: 80816
loss: 0.9897814989089966,grad_norm: 0.9741058171467443, iteration: 80817
loss: 0.9839838147163391,grad_norm: 0.9999990928917801, iteration: 80818
loss: 1.0229406356811523,grad_norm: 0.9999991608610745, iteration: 80819
loss: 0.9964765906333923,grad_norm: 0.999999282383535, iteration: 80820
loss: 0.9696180820465088,grad_norm: 0.9999991253695053, iteration: 80821
loss: 0.9851875305175781,grad_norm: 0.9999991037349693, iteration: 80822
loss: 1.025290846824646,grad_norm: 0.9999996546423082, iteration: 80823
loss: 1.0278722047805786,grad_norm: 0.9880405802773057, iteration: 80824
loss: 0.9949022531509399,grad_norm: 0.9999991091740217, iteration: 80825
loss: 1.0129190683364868,grad_norm: 0.826936952553326, iteration: 80826
loss: 1.033395767211914,grad_norm: 0.999999079805613, iteration: 80827
loss: 1.0210559368133545,grad_norm: 0.9999991999468725, iteration: 80828
loss: 1.065798044204712,grad_norm: 0.9999990625510322, iteration: 80829
loss: 0.9993240237236023,grad_norm: 0.9400153084703552, iteration: 80830
loss: 0.9712451696395874,grad_norm: 0.9999993525792326, iteration: 80831
loss: 1.059908151626587,grad_norm: 0.999998978740551, iteration: 80832
loss: 0.9739916920661926,grad_norm: 0.897315432124834, iteration: 80833
loss: 0.9934453964233398,grad_norm: 0.9999990333905532, iteration: 80834
loss: 1.027765154838562,grad_norm: 0.8919774975861864, iteration: 80835
loss: 0.9518793821334839,grad_norm: 0.9999991713305278, iteration: 80836
loss: 0.9581391215324402,grad_norm: 0.8565415235868793, iteration: 80837
loss: 1.0283981561660767,grad_norm: 0.9999988751091922, iteration: 80838
loss: 0.9885207414627075,grad_norm: 0.8811203232254142, iteration: 80839
loss: 1.0026129484176636,grad_norm: 0.9999990922579793, iteration: 80840
loss: 1.030611276626587,grad_norm: 0.9999991590809452, iteration: 80841
loss: 1.012749195098877,grad_norm: 0.9999992051684622, iteration: 80842
loss: 1.0212838649749756,grad_norm: 0.9908528225256846, iteration: 80843
loss: 0.9958245158195496,grad_norm: 0.9606236329104597, iteration: 80844
loss: 0.9728546142578125,grad_norm: 0.9247664292099882, iteration: 80845
loss: 0.9602847695350647,grad_norm: 0.9999991492695406, iteration: 80846
loss: 1.0082497596740723,grad_norm: 0.956962270115637, iteration: 80847
loss: 0.9825642108917236,grad_norm: 0.8116214826067274, iteration: 80848
loss: 0.9868181347846985,grad_norm: 0.9999991354369638, iteration: 80849
loss: 1.0032397508621216,grad_norm: 0.9999992983017634, iteration: 80850
loss: 1.004725456237793,grad_norm: 0.9053701576105027, iteration: 80851
loss: 1.008406400680542,grad_norm: 0.9719334034927641, iteration: 80852
loss: 0.9348673820495605,grad_norm: 0.9374539268815286, iteration: 80853
loss: 1.0208028554916382,grad_norm: 0.9999995965599979, iteration: 80854
loss: 1.024079442024231,grad_norm: 0.9999991924799745, iteration: 80855
loss: 1.0325343608856201,grad_norm: 0.999999049582819, iteration: 80856
loss: 1.005176067352295,grad_norm: 0.9999990937375469, iteration: 80857
loss: 1.0191880464553833,grad_norm: 0.9999990412290857, iteration: 80858
loss: 1.042770504951477,grad_norm: 0.9999990992318538, iteration: 80859
loss: 1.0272375345230103,grad_norm: 0.9999991552973096, iteration: 80860
loss: 0.9920458793640137,grad_norm: 0.8405935143738561, iteration: 80861
loss: 1.0322984457015991,grad_norm: 0.9999991251334631, iteration: 80862
loss: 1.060190200805664,grad_norm: 0.9999998033490638, iteration: 80863
loss: 1.0266380310058594,grad_norm: 0.9999992341188293, iteration: 80864
loss: 1.0221701860427856,grad_norm: 0.9999991698296594, iteration: 80865
loss: 0.9720451235771179,grad_norm: 0.9301297265864404, iteration: 80866
loss: 1.0151485204696655,grad_norm: 0.9456324707507261, iteration: 80867
loss: 0.969970703125,grad_norm: 0.9999991283950637, iteration: 80868
loss: 0.9911075234413147,grad_norm: 0.9999990861589433, iteration: 80869
loss: 1.0404167175292969,grad_norm: 0.912227003274175, iteration: 80870
loss: 1.0496646165847778,grad_norm: 0.9059896885004088, iteration: 80871
loss: 1.0122010707855225,grad_norm: 0.9092927064777768, iteration: 80872
loss: 0.9915072321891785,grad_norm: 0.9491755297742231, iteration: 80873
loss: 1.0304569005966187,grad_norm: 0.9999990451496334, iteration: 80874
loss: 1.042081594467163,grad_norm: 0.9952341507686976, iteration: 80875
loss: 1.039199948310852,grad_norm: 0.9999991208912601, iteration: 80876
loss: 0.984494149684906,grad_norm: 0.9999992293727991, iteration: 80877
loss: 1.0093762874603271,grad_norm: 0.9487540048248683, iteration: 80878
loss: 1.0017476081848145,grad_norm: 0.9078233944210946, iteration: 80879
loss: 0.9716613292694092,grad_norm: 0.8535103996547235, iteration: 80880
loss: 1.0211405754089355,grad_norm: 0.9878238454776447, iteration: 80881
loss: 1.0020712614059448,grad_norm: 0.999999102046644, iteration: 80882
loss: 0.993995726108551,grad_norm: 0.9999990858323655, iteration: 80883
loss: 0.9810464382171631,grad_norm: 0.9999991308621196, iteration: 80884
loss: 0.9839628338813782,grad_norm: 0.9255211246463219, iteration: 80885
loss: 1.0140764713287354,grad_norm: 0.965824066776849, iteration: 80886
loss: 1.0100250244140625,grad_norm: 0.9999993050475611, iteration: 80887
loss: 0.9943059682846069,grad_norm: 0.9296433590126463, iteration: 80888
loss: 0.9982287287712097,grad_norm: 0.9999996685956591, iteration: 80889
loss: 0.9909136891365051,grad_norm: 0.9201091921312126, iteration: 80890
loss: 0.971146821975708,grad_norm: 0.999999100721577, iteration: 80891
loss: 1.0157266855239868,grad_norm: 0.999999697439132, iteration: 80892
loss: 0.9875202775001526,grad_norm: 0.9999992780508383, iteration: 80893
loss: 0.9816017150878906,grad_norm: 0.9949191082381307, iteration: 80894
loss: 1.0219539403915405,grad_norm: 0.9345121072118064, iteration: 80895
loss: 1.0014320611953735,grad_norm: 0.9999990431303896, iteration: 80896
loss: 1.0253063440322876,grad_norm: 0.9999993438155333, iteration: 80897
loss: 1.0202841758728027,grad_norm: 0.9378194647513349, iteration: 80898
loss: 1.0008054971694946,grad_norm: 0.9999989912588637, iteration: 80899
loss: 1.0179195404052734,grad_norm: 0.858767554021826, iteration: 80900
loss: 1.0016064643859863,grad_norm: 0.9833002935018006, iteration: 80901
loss: 1.0148979425430298,grad_norm: 0.9999994122573462, iteration: 80902
loss: 1.0148855447769165,grad_norm: 0.999999154458524, iteration: 80903
loss: 1.0092707872390747,grad_norm: 0.9520457478408946, iteration: 80904
loss: 0.9730696082115173,grad_norm: 0.9821720002880825, iteration: 80905
loss: 0.9901082515716553,grad_norm: 0.9868667385721918, iteration: 80906
loss: 0.9946750402450562,grad_norm: 0.9999990008567211, iteration: 80907
loss: 1.0365484952926636,grad_norm: 0.999999089165698, iteration: 80908
loss: 1.0324140787124634,grad_norm: 0.9999990654082224, iteration: 80909
loss: 0.9698872566223145,grad_norm: 0.9999992383244418, iteration: 80910
loss: 0.9895491600036621,grad_norm: 0.9999992185685, iteration: 80911
loss: 0.9634907245635986,grad_norm: 0.9999991127730589, iteration: 80912
loss: 0.9976418018341064,grad_norm: 0.9999991592352448, iteration: 80913
loss: 1.0087611675262451,grad_norm: 0.895293507262772, iteration: 80914
loss: 0.9767719507217407,grad_norm: 0.99999908986507, iteration: 80915
loss: 1.031077265739441,grad_norm: 0.999999353053646, iteration: 80916
loss: 1.031878113746643,grad_norm: 0.9011572758929982, iteration: 80917
loss: 1.004345178604126,grad_norm: 0.999999307147883, iteration: 80918
loss: 0.9785892367362976,grad_norm: 0.9999990308151057, iteration: 80919
loss: 1.015493631362915,grad_norm: 0.9967781566186164, iteration: 80920
loss: 1.0096005201339722,grad_norm: 0.9830615736763886, iteration: 80921
loss: 0.9960340261459351,grad_norm: 0.9999990087682866, iteration: 80922
loss: 0.9492709040641785,grad_norm: 0.999999116234764, iteration: 80923
loss: 0.9716410636901855,grad_norm: 0.9448402175457778, iteration: 80924
loss: 0.9967023134231567,grad_norm: 0.9999997982594191, iteration: 80925
loss: 0.9610616564750671,grad_norm: 0.9999989540507774, iteration: 80926
loss: 0.9962285757064819,grad_norm: 0.9999990429614067, iteration: 80927
loss: 1.0392024517059326,grad_norm: 0.92340820930381, iteration: 80928
loss: 0.9509990811347961,grad_norm: 0.9999992174992592, iteration: 80929
loss: 0.9863951802253723,grad_norm: 0.968238653343123, iteration: 80930
loss: 0.9916625618934631,grad_norm: 0.9999993205373516, iteration: 80931
loss: 1.0246516466140747,grad_norm: 0.9999992042020746, iteration: 80932
loss: 1.0517081022262573,grad_norm: 0.9999993451919603, iteration: 80933
loss: 1.0409852266311646,grad_norm: 0.9999992689422541, iteration: 80934
loss: 1.014901876449585,grad_norm: 0.9999991570483889, iteration: 80935
loss: 1.0199819803237915,grad_norm: 0.9999990599888295, iteration: 80936
loss: 0.9627883434295654,grad_norm: 0.9999992359995761, iteration: 80937
loss: 1.0125453472137451,grad_norm: 0.9999992563089611, iteration: 80938
loss: 1.0194708108901978,grad_norm: 0.999999231891871, iteration: 80939
loss: 0.9963539838790894,grad_norm: 0.9999992467349498, iteration: 80940
loss: 0.9728233814239502,grad_norm: 0.9999991934312119, iteration: 80941
loss: 0.9843858480453491,grad_norm: 0.9906596394551161, iteration: 80942
loss: 1.0122429132461548,grad_norm: 0.9999991218609873, iteration: 80943
loss: 1.0329054594039917,grad_norm: 0.9999991237868316, iteration: 80944
loss: 1.0230876207351685,grad_norm: 0.8778536835005857, iteration: 80945
loss: 0.9982045292854309,grad_norm: 0.9598464259162373, iteration: 80946
loss: 0.9760260581970215,grad_norm: 0.9999990959317355, iteration: 80947
loss: 0.9817628264427185,grad_norm: 0.8795376665852833, iteration: 80948
loss: 0.9996686577796936,grad_norm: 0.9174509176252118, iteration: 80949
loss: 1.0356409549713135,grad_norm: 0.9999993342938562, iteration: 80950
loss: 0.99721759557724,grad_norm: 0.9999991215789186, iteration: 80951
loss: 1.0040422677993774,grad_norm: 0.9091384693398306, iteration: 80952
loss: 1.0088456869125366,grad_norm: 0.9999992579786545, iteration: 80953
loss: 0.9694476127624512,grad_norm: 0.9842091426271669, iteration: 80954
loss: 0.9696923494338989,grad_norm: 0.9999990746171562, iteration: 80955
loss: 1.016189455986023,grad_norm: 0.9999990909329259, iteration: 80956
loss: 1.0394096374511719,grad_norm: 0.9999990860962624, iteration: 80957
loss: 0.9674510359764099,grad_norm: 0.9999991062797664, iteration: 80958
loss: 0.992038905620575,grad_norm: 0.9999990024306717, iteration: 80959
loss: 1.0105115175247192,grad_norm: 0.9568207668659352, iteration: 80960
loss: 0.9919747114181519,grad_norm: 0.9999991623517938, iteration: 80961
loss: 1.049638271331787,grad_norm: 0.9999993879910992, iteration: 80962
loss: 1.023872971534729,grad_norm: 0.8697533566633411, iteration: 80963
loss: 0.9858963489532471,grad_norm: 0.9999988851682431, iteration: 80964
loss: 1.022932767868042,grad_norm: 0.9999991882993463, iteration: 80965
loss: 0.9589128494262695,grad_norm: 0.9999992053510216, iteration: 80966
loss: 1.0187255144119263,grad_norm: 0.9999992621377793, iteration: 80967
loss: 0.9796628355979919,grad_norm: 0.9999991106507055, iteration: 80968
loss: 1.0271888971328735,grad_norm: 0.9999990849649041, iteration: 80969
loss: 1.0227984189987183,grad_norm: 0.9801478707357897, iteration: 80970
loss: 1.004218339920044,grad_norm: 0.9999991383313414, iteration: 80971
loss: 0.9874581694602966,grad_norm: 0.8674876593047918, iteration: 80972
loss: 0.9667478203773499,grad_norm: 0.9990569837938231, iteration: 80973
loss: 1.0311217308044434,grad_norm: 0.9795931230221165, iteration: 80974
loss: 0.9870845079421997,grad_norm: 0.9999991128831803, iteration: 80975
loss: 1.025007724761963,grad_norm: 0.830676695722852, iteration: 80976
loss: 0.9805208444595337,grad_norm: 0.9999990858801627, iteration: 80977
loss: 0.9914807677268982,grad_norm: 0.9999992560219481, iteration: 80978
loss: 0.9676135182380676,grad_norm: 0.9999991748069276, iteration: 80979
loss: 0.9596360325813293,grad_norm: 0.9999991192366429, iteration: 80980
loss: 1.0316160917282104,grad_norm: 0.9089247239443938, iteration: 80981
loss: 1.0391275882720947,grad_norm: 0.9999995185657043, iteration: 80982
loss: 1.0148276090621948,grad_norm: 0.9999991489587117, iteration: 80983
loss: 0.9896057844161987,grad_norm: 0.9999992330403611, iteration: 80984
loss: 1.0405789613723755,grad_norm: 0.999999370702345, iteration: 80985
loss: 0.9640941023826599,grad_norm: 0.9766354084547888, iteration: 80986
loss: 1.01539945602417,grad_norm: 0.9999991448731986, iteration: 80987
loss: 1.0395961999893188,grad_norm: 0.9999991506089965, iteration: 80988
loss: 0.9902983903884888,grad_norm: 0.999999105665505, iteration: 80989
loss: 0.9613996148109436,grad_norm: 0.9568263205663577, iteration: 80990
loss: 0.9857413172721863,grad_norm: 0.9999990417893527, iteration: 80991
loss: 0.9858739376068115,grad_norm: 0.9999989861624266, iteration: 80992
loss: 0.9908953905105591,grad_norm: 0.9780944844535867, iteration: 80993
loss: 1.018476128578186,grad_norm: 0.9444996360124387, iteration: 80994
loss: 0.9793933629989624,grad_norm: 0.8353959059761339, iteration: 80995
loss: 0.9881922006607056,grad_norm: 0.9999989822126577, iteration: 80996
loss: 1.0377862453460693,grad_norm: 0.9999992559779216, iteration: 80997
loss: 0.9937699437141418,grad_norm: 0.999999435576439, iteration: 80998
loss: 0.9974958896636963,grad_norm: 0.8502839122104879, iteration: 80999
loss: 1.0093823671340942,grad_norm: 0.9999993381704471, iteration: 81000
loss: 1.0147874355316162,grad_norm: 0.9999991285033841, iteration: 81001
loss: 1.001775860786438,grad_norm: 0.9999990542577989, iteration: 81002
loss: 0.9897854328155518,grad_norm: 0.9999991550079477, iteration: 81003
loss: 0.9982117414474487,grad_norm: 0.9999991061804644, iteration: 81004
loss: 0.9992843866348267,grad_norm: 0.9999993015186288, iteration: 81005
loss: 0.9799652099609375,grad_norm: 0.9995609761606086, iteration: 81006
loss: 1.0245134830474854,grad_norm: 0.9999991775801017, iteration: 81007
loss: 0.9940087199211121,grad_norm: 0.9999990663740206, iteration: 81008
loss: 0.9841169714927673,grad_norm: 0.9888797525164726, iteration: 81009
loss: 0.9915927052497864,grad_norm: 0.9999991417564997, iteration: 81010
loss: 1.0152472257614136,grad_norm: 0.9999990698759149, iteration: 81011
loss: 0.9929070472717285,grad_norm: 0.9999991608672051, iteration: 81012
loss: 0.9696555733680725,grad_norm: 0.9999992384366795, iteration: 81013
loss: 0.9880226850509644,grad_norm: 0.9999991849886234, iteration: 81014
loss: 0.9723115563392639,grad_norm: 0.9117221743289132, iteration: 81015
loss: 1.0013614892959595,grad_norm: 0.9999990335598096, iteration: 81016
loss: 0.9923555254936218,grad_norm: 0.999999784635485, iteration: 81017
loss: 1.0045779943466187,grad_norm: 0.9999992164300717, iteration: 81018
loss: 1.0181330442428589,grad_norm: 0.9999991543590852, iteration: 81019
loss: 0.9773668050765991,grad_norm: 0.9332677170051432, iteration: 81020
loss: 0.9732158184051514,grad_norm: 0.8611607250728311, iteration: 81021
loss: 0.957345187664032,grad_norm: 0.9999990953895515, iteration: 81022
loss: 1.0265793800354004,grad_norm: 0.9999993208805775, iteration: 81023
loss: 0.9593412280082703,grad_norm: 0.9999992522840229, iteration: 81024
loss: 1.0193389654159546,grad_norm: 0.9706156615552735, iteration: 81025
loss: 1.010675072669983,grad_norm: 0.9999993161114026, iteration: 81026
loss: 1.025290608406067,grad_norm: 0.999999121348287, iteration: 81027
loss: 0.9809792041778564,grad_norm: 0.9999990855320342, iteration: 81028
loss: 1.0088783502578735,grad_norm: 0.9371646078752311, iteration: 81029
loss: 0.9898658990859985,grad_norm: 0.9261904227401155, iteration: 81030
loss: 0.9916918873786926,grad_norm: 0.9263557225431495, iteration: 81031
loss: 0.9898488521575928,grad_norm: 0.9999989942542126, iteration: 81032
loss: 1.0180615186691284,grad_norm: 0.999999061267841, iteration: 81033
loss: 0.9750412702560425,grad_norm: 0.9665456282861833, iteration: 81034
loss: 0.9953349232673645,grad_norm: 0.9999989822356975, iteration: 81035
loss: 1.0086572170257568,grad_norm: 0.9963371297910673, iteration: 81036
loss: 1.0082194805145264,grad_norm: 0.9999991354212299, iteration: 81037
loss: 0.9847369194030762,grad_norm: 0.8466028364385637, iteration: 81038
loss: 0.9819977283477783,grad_norm: 0.9592793609909059, iteration: 81039
loss: 0.9532880783081055,grad_norm: 0.9999991064076098, iteration: 81040
loss: 1.0007919073104858,grad_norm: 0.951866904618195, iteration: 81041
loss: 1.0233324766159058,grad_norm: 0.9999995242438879, iteration: 81042
loss: 0.9805731177330017,grad_norm: 0.999999069707465, iteration: 81043
loss: 1.0596917867660522,grad_norm: 0.9999997724035519, iteration: 81044
loss: 1.1029356718063354,grad_norm: 0.9999992510188686, iteration: 81045
loss: 0.9820493459701538,grad_norm: 0.9999990606197922, iteration: 81046
loss: 0.9692139029502869,grad_norm: 0.9909428001639319, iteration: 81047
loss: 1.0504649877548218,grad_norm: 0.9999990617393032, iteration: 81048
loss: 1.0080031156539917,grad_norm: 0.9999990502674744, iteration: 81049
loss: 0.9885901212692261,grad_norm: 0.9999996129366769, iteration: 81050
loss: 1.068529725074768,grad_norm: 0.9999991369280434, iteration: 81051
loss: 1.0052284002304077,grad_norm: 0.9072719248333947, iteration: 81052
loss: 0.9912070035934448,grad_norm: 0.9361989450985758, iteration: 81053
loss: 0.9761319160461426,grad_norm: 0.9999990878274341, iteration: 81054
loss: 0.9955636858940125,grad_norm: 0.9999995445561807, iteration: 81055
loss: 1.0207316875457764,grad_norm: 0.9999991376413367, iteration: 81056
loss: 0.9962636828422546,grad_norm: 0.9999991207210673, iteration: 81057
loss: 0.9904187917709351,grad_norm: 0.8794316783371432, iteration: 81058
loss: 1.0242453813552856,grad_norm: 0.9999994273419451, iteration: 81059
loss: 1.0092055797576904,grad_norm: 0.8935409130462343, iteration: 81060
loss: 1.0127997398376465,grad_norm: 0.9459684936905177, iteration: 81061
loss: 0.9996230006217957,grad_norm: 0.9999991620080652, iteration: 81062
loss: 1.0024148225784302,grad_norm: 0.9999991405597771, iteration: 81063
loss: 1.0087755918502808,grad_norm: 0.9999990931729377, iteration: 81064
loss: 0.9835555553436279,grad_norm: 0.9999991909443978, iteration: 81065
loss: 0.9709779620170593,grad_norm: 0.8826138519119897, iteration: 81066
loss: 0.9672130346298218,grad_norm: 0.905906138042565, iteration: 81067
loss: 1.0248491764068604,grad_norm: 0.999999279536877, iteration: 81068
loss: 1.0529065132141113,grad_norm: 0.9999997507095625, iteration: 81069
loss: 1.0133897066116333,grad_norm: 0.9999990828744177, iteration: 81070
loss: 1.0112544298171997,grad_norm: 0.9999991433098718, iteration: 81071
loss: 0.9767321944236755,grad_norm: 0.9999992376494106, iteration: 81072
loss: 1.032739520072937,grad_norm: 0.999999272708139, iteration: 81073
loss: 1.0158153772354126,grad_norm: 0.9999992494782584, iteration: 81074
loss: 0.9646351337432861,grad_norm: 0.7944699752352343, iteration: 81075
loss: 0.9995590448379517,grad_norm: 0.9073340080303958, iteration: 81076
loss: 0.9659412503242493,grad_norm: 0.99999921395917, iteration: 81077
loss: 1.0186612606048584,grad_norm: 0.962124033699514, iteration: 81078
loss: 1.0296626091003418,grad_norm: 0.9999991680265978, iteration: 81079
loss: 0.9857564568519592,grad_norm: 0.9026609305867043, iteration: 81080
loss: 1.0241222381591797,grad_norm: 0.9999992025281629, iteration: 81081
loss: 0.9893501996994019,grad_norm: 0.9999992045007775, iteration: 81082
loss: 0.9976456165313721,grad_norm: 0.9999993181127431, iteration: 81083
loss: 1.0185511112213135,grad_norm: 0.9999992165025485, iteration: 81084
loss: 0.9762670993804932,grad_norm: 0.9999992127221043, iteration: 81085
loss: 1.001498818397522,grad_norm: 0.9999990865429619, iteration: 81086
loss: 0.9599170088768005,grad_norm: 0.8862392807346715, iteration: 81087
loss: 1.0055853128433228,grad_norm: 0.9817219210702623, iteration: 81088
loss: 0.9808968305587769,grad_norm: 0.9999991468121451, iteration: 81089
loss: 0.9688698649406433,grad_norm: 0.9999992144402595, iteration: 81090
loss: 0.9864060878753662,grad_norm: 0.9479458813065452, iteration: 81091
loss: 1.0268118381500244,grad_norm: 0.9999996209173683, iteration: 81092
loss: 1.0076661109924316,grad_norm: 0.9999991742764545, iteration: 81093
loss: 1.0234713554382324,grad_norm: 0.9282773929006034, iteration: 81094
loss: 1.0078544616699219,grad_norm: 0.9999991207772994, iteration: 81095
loss: 1.0068039894104004,grad_norm: 0.9999990157556703, iteration: 81096
loss: 0.9713016152381897,grad_norm: 0.9512941498238479, iteration: 81097
loss: 0.9705474972724915,grad_norm: 0.8472551478863949, iteration: 81098
loss: 1.0021593570709229,grad_norm: 0.9999992322834966, iteration: 81099
loss: 0.9965019226074219,grad_norm: 0.9999989517036193, iteration: 81100
loss: 1.0166795253753662,grad_norm: 0.9999991967580413, iteration: 81101
loss: 0.971066951751709,grad_norm: 0.9893958632995353, iteration: 81102
loss: 1.017453908920288,grad_norm: 0.9999989329122797, iteration: 81103
loss: 0.9675798416137695,grad_norm: 0.8130030472087677, iteration: 81104
loss: 0.9652796983718872,grad_norm: 0.9153409024119368, iteration: 81105
loss: 1.0063087940216064,grad_norm: 0.8787216133201702, iteration: 81106
loss: 1.0455225706100464,grad_norm: 0.999998954467467, iteration: 81107
loss: 0.9846665263175964,grad_norm: 0.9999990293508962, iteration: 81108
loss: 0.9902538657188416,grad_norm: 0.9999990804817807, iteration: 81109
loss: 0.9982247352600098,grad_norm: 0.9999990211967186, iteration: 81110
loss: 0.9953948855400085,grad_norm: 0.8927217942036804, iteration: 81111
loss: 1.0437837839126587,grad_norm: 0.9999991725187086, iteration: 81112
loss: 1.0696165561676025,grad_norm: 0.9972613119370605, iteration: 81113
loss: 0.9915913343429565,grad_norm: 0.9524044587499271, iteration: 81114
loss: 0.982767641544342,grad_norm: 0.9970819031846045, iteration: 81115
loss: 0.9652625918388367,grad_norm: 0.9999991011530561, iteration: 81116
loss: 1.0175609588623047,grad_norm: 0.9999992564684304, iteration: 81117
loss: 1.0074608325958252,grad_norm: 0.9779009762152836, iteration: 81118
loss: 1.0300863981246948,grad_norm: 0.9999991235577558, iteration: 81119
loss: 1.0011526346206665,grad_norm: 0.9999999177124421, iteration: 81120
loss: 0.9735968112945557,grad_norm: 0.9206148029096938, iteration: 81121
loss: 0.9718528389930725,grad_norm: 0.9607518393429388, iteration: 81122
loss: 1.0549107789993286,grad_norm: 0.9239241079092181, iteration: 81123
loss: 1.031440019607544,grad_norm: 0.9999991633139718, iteration: 81124
loss: 0.9816487431526184,grad_norm: 0.9999990384604116, iteration: 81125
loss: 0.9906218647956848,grad_norm: 0.9999990229498402, iteration: 81126
loss: 0.9961918592453003,grad_norm: 0.99999910189876, iteration: 81127
loss: 1.0149821043014526,grad_norm: 0.9293982851070385, iteration: 81128
loss: 1.0291647911071777,grad_norm: 0.9999994597567096, iteration: 81129
loss: 0.9748761653900146,grad_norm: 0.9748666598015877, iteration: 81130
loss: 0.9814451336860657,grad_norm: 0.9999991065624154, iteration: 81131
loss: 1.0150275230407715,grad_norm: 0.9999992672413551, iteration: 81132
loss: 0.991129994392395,grad_norm: 0.9766429304181921, iteration: 81133
loss: 0.9682918787002563,grad_norm: 0.8947677775320996, iteration: 81134
loss: 0.9957947134971619,grad_norm: 0.8097128226269555, iteration: 81135
loss: 0.9641593098640442,grad_norm: 0.9999952158032609, iteration: 81136
loss: 1.0098655223846436,grad_norm: 0.9999991820146483, iteration: 81137
loss: 1.0102218389511108,grad_norm: 0.9999990539807926, iteration: 81138
loss: 0.9705550670623779,grad_norm: 0.9999989826081112, iteration: 81139
loss: 0.9498188495635986,grad_norm: 0.9448339032288204, iteration: 81140
loss: 1.0505988597869873,grad_norm: 0.9999991013236728, iteration: 81141
loss: 0.9935629367828369,grad_norm: 0.9599017224042877, iteration: 81142
loss: 1.0339146852493286,grad_norm: 0.9999990866811156, iteration: 81143
loss: 1.0241726636886597,grad_norm: 0.9999991514856795, iteration: 81144
loss: 1.020445704460144,grad_norm: 0.9999990578726075, iteration: 81145
loss: 0.9825946092605591,grad_norm: 0.985672013463622, iteration: 81146
loss: 0.9942083954811096,grad_norm: 0.9765721025438777, iteration: 81147
loss: 1.0753538608551025,grad_norm: 0.999999558251369, iteration: 81148
loss: 0.9808274507522583,grad_norm: 0.9139595068045454, iteration: 81149
loss: 1.0135942697525024,grad_norm: 0.9999992585362718, iteration: 81150
loss: 1.0277773141860962,grad_norm: 0.9999991553411143, iteration: 81151
loss: 0.9878262281417847,grad_norm: 0.9999990040268684, iteration: 81152
loss: 1.0087082386016846,grad_norm: 0.9395149017651278, iteration: 81153
loss: 1.0182889699935913,grad_norm: 0.897759746001587, iteration: 81154
loss: 0.9800994992256165,grad_norm: 0.9999991083722227, iteration: 81155
loss: 0.9850772619247437,grad_norm: 0.9999991709948218, iteration: 81156
loss: 1.0338808298110962,grad_norm: 0.907139775788838, iteration: 81157
loss: 1.031839370727539,grad_norm: 0.99999912196869, iteration: 81158
loss: 0.9801968336105347,grad_norm: 0.999998898425839, iteration: 81159
loss: 1.0291212797164917,grad_norm: 0.9656472456802713, iteration: 81160
loss: 1.0001323223114014,grad_norm: 0.9999991280305145, iteration: 81161
loss: 1.0465470552444458,grad_norm: 0.9999989447245364, iteration: 81162
loss: 0.9945275783538818,grad_norm: 0.9999990395852131, iteration: 81163
loss: 0.9817991852760315,grad_norm: 0.9999991667384076, iteration: 81164
loss: 0.9896607398986816,grad_norm: 0.9200004392502026, iteration: 81165
loss: 1.0017644166946411,grad_norm: 0.852057841211124, iteration: 81166
loss: 1.0093472003936768,grad_norm: 0.9999990511615149, iteration: 81167
loss: 0.9794676899909973,grad_norm: 0.9999991162458345, iteration: 81168
loss: 0.9699621796607971,grad_norm: 0.9999989470395201, iteration: 81169
loss: 0.9963265061378479,grad_norm: 0.9999991299580114, iteration: 81170
loss: 0.9620310664176941,grad_norm: 0.9696397999506255, iteration: 81171
loss: 0.9945340156555176,grad_norm: 0.9986463868576364, iteration: 81172
loss: 1.093110203742981,grad_norm: 0.9999997536817127, iteration: 81173
loss: 1.0135531425476074,grad_norm: 0.9546988222483873, iteration: 81174
loss: 0.9896728992462158,grad_norm: 0.999999279477613, iteration: 81175
loss: 1.0197699069976807,grad_norm: 0.999999199526784, iteration: 81176
loss: 1.05990731716156,grad_norm: 0.9065550764474657, iteration: 81177
loss: 1.0156468152999878,grad_norm: 0.9999990348521249, iteration: 81178
loss: 0.990142285823822,grad_norm: 0.9999990796102708, iteration: 81179
loss: 0.9527934789657593,grad_norm: 0.8730569511073898, iteration: 81180
loss: 0.9693578481674194,grad_norm: 0.9999991449405813, iteration: 81181
loss: 1.0137197971343994,grad_norm: 0.9999989291989456, iteration: 81182
loss: 1.05571711063385,grad_norm: 0.9999991499442833, iteration: 81183
loss: 0.9887586832046509,grad_norm: 0.9999991842068863, iteration: 81184
loss: 1.0024923086166382,grad_norm: 0.9999990115100847, iteration: 81185
loss: 1.0040532350540161,grad_norm: 0.7402346504147739, iteration: 81186
loss: 1.0002270936965942,grad_norm: 0.9999989965511495, iteration: 81187
loss: 1.0163143873214722,grad_norm: 0.9999992386490154, iteration: 81188
loss: 1.0321136713027954,grad_norm: 0.9999991810358866, iteration: 81189
loss: 1.0301845073699951,grad_norm: 0.9999992469298928, iteration: 81190
loss: 1.0432413816452026,grad_norm: 0.9999991156265228, iteration: 81191
loss: 0.9864907264709473,grad_norm: 0.9999990316443079, iteration: 81192
loss: 1.0229589939117432,grad_norm: 0.9999992023313284, iteration: 81193
loss: 0.9981517195701599,grad_norm: 0.9298068960311869, iteration: 81194
loss: 1.0135853290557861,grad_norm: 0.958021953016008, iteration: 81195
loss: 1.0160608291625977,grad_norm: 0.9999990398234416, iteration: 81196
loss: 0.9510334730148315,grad_norm: 0.9999991131125012, iteration: 81197
loss: 1.0138212442398071,grad_norm: 0.9946332669385772, iteration: 81198
loss: 1.0502721071243286,grad_norm: 0.9999993253032354, iteration: 81199
loss: 0.972000002861023,grad_norm: 0.9999990477600809, iteration: 81200
loss: 1.0439605712890625,grad_norm: 0.9636444476267663, iteration: 81201
loss: 0.9901697039604187,grad_norm: 0.9999989127535517, iteration: 81202
loss: 1.0169434547424316,grad_norm: 0.9999992209747179, iteration: 81203
loss: 1.0090872049331665,grad_norm: 0.8247812661291214, iteration: 81204
loss: 0.9935322403907776,grad_norm: 0.9999990392723939, iteration: 81205
loss: 0.9616337418556213,grad_norm: 0.8859843219378374, iteration: 81206
loss: 1.0128222703933716,grad_norm: 0.9999991772822728, iteration: 81207
loss: 1.0076236724853516,grad_norm: 0.8896271849282194, iteration: 81208
loss: 0.9667493104934692,grad_norm: 0.9999990700750342, iteration: 81209
loss: 1.0037102699279785,grad_norm: 0.9999990601825824, iteration: 81210
loss: 0.977423369884491,grad_norm: 0.8993993231024031, iteration: 81211
loss: 1.0151840448379517,grad_norm: 0.9999992093572585, iteration: 81212
loss: 0.9914897680282593,grad_norm: 0.9999991924547987, iteration: 81213
loss: 0.9963919520378113,grad_norm: 0.9999991450037348, iteration: 81214
loss: 1.0032219886779785,grad_norm: 0.9947215008163717, iteration: 81215
loss: 0.9847639799118042,grad_norm: 0.9999991635441592, iteration: 81216
loss: 1.0568548440933228,grad_norm: 0.9999990904944133, iteration: 81217
loss: 0.9632798433303833,grad_norm: 0.9393749428857269, iteration: 81218
loss: 0.999945342540741,grad_norm: 0.9202824362779204, iteration: 81219
loss: 1.0435410737991333,grad_norm: 0.9999992776286845, iteration: 81220
loss: 0.9859808087348938,grad_norm: 0.9999992893677051, iteration: 81221
loss: 0.9796865582466125,grad_norm: 0.9544349440353995, iteration: 81222
loss: 0.9623991847038269,grad_norm: 0.9999991157370793, iteration: 81223
loss: 1.023898720741272,grad_norm: 0.9999992174072302, iteration: 81224
loss: 1.017289400100708,grad_norm: 0.9593807729066772, iteration: 81225
loss: 0.9827426671981812,grad_norm: 0.9999991127569667, iteration: 81226
loss: 1.0353620052337646,grad_norm: 0.9999993690308226, iteration: 81227
loss: 1.0080727338790894,grad_norm: 0.9470967271353492, iteration: 81228
loss: 1.0688809156417847,grad_norm: 0.9999995369487497, iteration: 81229
loss: 0.974689781665802,grad_norm: 0.977836923973776, iteration: 81230
loss: 1.0155073404312134,grad_norm: 0.999999364557162, iteration: 81231
loss: 0.9698172807693481,grad_norm: 0.9999989971284037, iteration: 81232
loss: 0.9862962365150452,grad_norm: 0.9086342568737777, iteration: 81233
loss: 1.018452525138855,grad_norm: 0.9999996441800805, iteration: 81234
loss: 1.0234845876693726,grad_norm: 0.9999992393386947, iteration: 81235
loss: 1.0836374759674072,grad_norm: 0.9999994093608335, iteration: 81236
loss: 0.9927586317062378,grad_norm: 0.9222444354929573, iteration: 81237
loss: 0.9934241771697998,grad_norm: 0.9999991035488411, iteration: 81238
loss: 1.0318539142608643,grad_norm: 0.9018240692881818, iteration: 81239
loss: 0.9996762871742249,grad_norm: 0.9999990930704539, iteration: 81240
loss: 0.9839091300964355,grad_norm: 0.9323996875022862, iteration: 81241
loss: 1.039660930633545,grad_norm: 0.9405594659865782, iteration: 81242
loss: 0.9981022477149963,grad_norm: 0.9261959160629214, iteration: 81243
loss: 1.0140584707260132,grad_norm: 0.9999991881178457, iteration: 81244
loss: 1.0260242223739624,grad_norm: 0.9999991057655778, iteration: 81245
loss: 1.008247971534729,grad_norm: 0.9999991469702968, iteration: 81246
loss: 1.0072259902954102,grad_norm: 0.9999991430597356, iteration: 81247
loss: 1.044569492340088,grad_norm: 0.9999995017112526, iteration: 81248
loss: 1.0068135261535645,grad_norm: 0.938172851693893, iteration: 81249
loss: 1.0312817096710205,grad_norm: 0.9999990523036867, iteration: 81250
loss: 0.9900335073471069,grad_norm: 0.827728029278783, iteration: 81251
loss: 1.0279514789581299,grad_norm: 0.9999990748224136, iteration: 81252
loss: 1.0081093311309814,grad_norm: 0.9999990926738825, iteration: 81253
loss: 1.015674114227295,grad_norm: 0.9999991532932282, iteration: 81254
loss: 1.0263521671295166,grad_norm: 0.9999992398392745, iteration: 81255
loss: 0.9882415533065796,grad_norm: 0.9999990019739489, iteration: 81256
loss: 0.9820661544799805,grad_norm: 0.9494195607841016, iteration: 81257
loss: 1.045465350151062,grad_norm: 0.9464385388589337, iteration: 81258
loss: 1.0269532203674316,grad_norm: 0.9963253374634288, iteration: 81259
loss: 0.9963204264640808,grad_norm: 0.9063890925789161, iteration: 81260
loss: 1.0132583379745483,grad_norm: 0.9999990897505322, iteration: 81261
loss: 1.021356463432312,grad_norm: 0.9999993847214578, iteration: 81262
loss: 1.1208857297897339,grad_norm: 0.9999994969838343, iteration: 81263
loss: 1.005183458328247,grad_norm: 0.999999180043665, iteration: 81264
loss: 1.016592264175415,grad_norm: 0.9999990348268677, iteration: 81265
loss: 1.0010716915130615,grad_norm: 0.9999993433155618, iteration: 81266
loss: 0.9956374764442444,grad_norm: 0.9999992494632978, iteration: 81267
loss: 1.0055198669433594,grad_norm: 0.9999991081721975, iteration: 81268
loss: 1.0262445211410522,grad_norm: 0.9999991548050966, iteration: 81269
loss: 1.0118852853775024,grad_norm: 0.9999989881144504, iteration: 81270
loss: 0.9709964990615845,grad_norm: 0.9999992190951779, iteration: 81271
loss: 1.0141080617904663,grad_norm: 0.9999990184055731, iteration: 81272
loss: 0.9771009683609009,grad_norm: 0.9999991178525031, iteration: 81273
loss: 0.9902759790420532,grad_norm: 0.9999991506958651, iteration: 81274
loss: 1.0192506313323975,grad_norm: 0.9579416187486264, iteration: 81275
loss: 0.9918786287307739,grad_norm: 0.9999991233055645, iteration: 81276
loss: 0.9863799810409546,grad_norm: 0.9183410248521724, iteration: 81277
loss: 0.9449817538261414,grad_norm: 0.9447596227783317, iteration: 81278
loss: 1.0635393857955933,grad_norm: 0.9999994827849895, iteration: 81279
loss: 1.011479377746582,grad_norm: 0.9999991989700248, iteration: 81280
loss: 0.990308940410614,grad_norm: 0.9745969968211444, iteration: 81281
loss: 0.9957148432731628,grad_norm: 0.942606694465825, iteration: 81282
loss: 1.01539146900177,grad_norm: 0.9999992028061385, iteration: 81283
loss: 0.9998921155929565,grad_norm: 0.868253609073185, iteration: 81284
loss: 0.9626829624176025,grad_norm: 0.9999991779969986, iteration: 81285
loss: 0.9860342741012573,grad_norm: 0.9099926760275702, iteration: 81286
loss: 0.993172287940979,grad_norm: 0.9999991440484576, iteration: 81287
loss: 1.0032062530517578,grad_norm: 0.9599057920601515, iteration: 81288
loss: 0.9871190190315247,grad_norm: 0.9999990446587963, iteration: 81289
loss: 1.0129667520523071,grad_norm: 0.9999991530856823, iteration: 81290
loss: 1.0110820531845093,grad_norm: 0.9999991756299373, iteration: 81291
loss: 1.0016430616378784,grad_norm: 0.938723973589241, iteration: 81292
loss: 0.9776642918586731,grad_norm: 0.9999989316260864, iteration: 81293
loss: 0.9979797005653381,grad_norm: 0.9999989231185861, iteration: 81294
loss: 1.0174322128295898,grad_norm: 0.9999990720436841, iteration: 81295
loss: 1.0049281120300293,grad_norm: 0.9999992313179787, iteration: 81296
loss: 1.0099033117294312,grad_norm: 0.999999090947944, iteration: 81297
loss: 0.9844063520431519,grad_norm: 0.9999991574418805, iteration: 81298
loss: 0.9450369477272034,grad_norm: 0.9788276693982053, iteration: 81299
loss: 1.014723300933838,grad_norm: 0.9554822622353233, iteration: 81300
loss: 0.9841238260269165,grad_norm: 0.9769162988381217, iteration: 81301
loss: 0.9890526533126831,grad_norm: 0.9228867853523444, iteration: 81302
loss: 0.9915241003036499,grad_norm: 0.9713386949604773, iteration: 81303
loss: 0.991473913192749,grad_norm: 0.9352072083560474, iteration: 81304
loss: 0.9779943823814392,grad_norm: 0.9337025236519502, iteration: 81305
loss: 1.0373677015304565,grad_norm: 0.9807069183673538, iteration: 81306
loss: 1.0035022497177124,grad_norm: 0.9999990622860968, iteration: 81307
loss: 1.0235732793807983,grad_norm: 0.9999993951980419, iteration: 81308
loss: 0.9978330731391907,grad_norm: 0.9999991379934982, iteration: 81309
loss: 1.001480221748352,grad_norm: 0.999999114052314, iteration: 81310
loss: 0.9759247303009033,grad_norm: 0.9999991225759168, iteration: 81311
loss: 1.0127310752868652,grad_norm: 0.9999990268513321, iteration: 81312
loss: 1.004899501800537,grad_norm: 0.9399501521795152, iteration: 81313
loss: 1.0154051780700684,grad_norm: 0.9999990193603453, iteration: 81314
loss: 1.0342915058135986,grad_norm: 0.9999991708411184, iteration: 81315
loss: 0.9870628118515015,grad_norm: 0.9524530416169465, iteration: 81316
loss: 0.9941744804382324,grad_norm: 0.9999993238713779, iteration: 81317
loss: 1.0099916458129883,grad_norm: 0.9999992369429255, iteration: 81318
loss: 0.9855172634124756,grad_norm: 0.9647040890181044, iteration: 81319
loss: 1.0241978168487549,grad_norm: 0.9388277799746464, iteration: 81320
loss: 1.013574242591858,grad_norm: 0.9232340634670586, iteration: 81321
loss: 1.0353000164031982,grad_norm: 0.8914531919033811, iteration: 81322
loss: 1.0301568508148193,grad_norm: 0.9999990651333293, iteration: 81323
loss: 0.9949465990066528,grad_norm: 0.9964267994326826, iteration: 81324
loss: 0.9802045226097107,grad_norm: 0.999999132923572, iteration: 81325
loss: 1.0242904424667358,grad_norm: 0.9999990163300606, iteration: 81326
loss: 0.961734414100647,grad_norm: 0.999998985435477, iteration: 81327
loss: 0.9804354310035706,grad_norm: 0.9999990713677734, iteration: 81328
loss: 0.9916588068008423,grad_norm: 0.9999992855480843, iteration: 81329
loss: 0.9947704076766968,grad_norm: 0.9999990533370144, iteration: 81330
loss: 0.9464190006256104,grad_norm: 0.9999991292420716, iteration: 81331
loss: 0.964277446269989,grad_norm: 0.9999989682711949, iteration: 81332
loss: 1.001438021659851,grad_norm: 0.992495630636643, iteration: 81333
loss: 0.9828333854675293,grad_norm: 0.9624462132810019, iteration: 81334
loss: 1.0077940225601196,grad_norm: 0.8615170059497149, iteration: 81335
loss: 0.9915950298309326,grad_norm: 0.9999992236365314, iteration: 81336
loss: 0.9817253947257996,grad_norm: 0.9999990864348897, iteration: 81337
loss: 0.9749651551246643,grad_norm: 0.9999991460818319, iteration: 81338
loss: 0.9849708676338196,grad_norm: 0.9999991244084435, iteration: 81339
loss: 1.0133336782455444,grad_norm: 0.9999990646668079, iteration: 81340
loss: 1.0366419553756714,grad_norm: 0.9999993746295205, iteration: 81341
loss: 1.0522561073303223,grad_norm: 0.9999990876415223, iteration: 81342
loss: 1.0296497344970703,grad_norm: 0.956421775317851, iteration: 81343
loss: 0.9945077300071716,grad_norm: 0.8323978136602447, iteration: 81344
loss: 1.0214622020721436,grad_norm: 0.9139110465150839, iteration: 81345
loss: 0.9951772093772888,grad_norm: 0.9285579985464322, iteration: 81346
loss: 1.0072952508926392,grad_norm: 0.9233355177262497, iteration: 81347
loss: 0.9990177750587463,grad_norm: 0.9999990961768251, iteration: 81348
loss: 1.0288211107254028,grad_norm: 0.999999227306396, iteration: 81349
loss: 1.0148346424102783,grad_norm: 0.9845317731438583, iteration: 81350
loss: 1.0003243684768677,grad_norm: 0.9999991192333559, iteration: 81351
loss: 0.9912049770355225,grad_norm: 0.9999991907276119, iteration: 81352
loss: 1.0064623355865479,grad_norm: 0.9999991861623126, iteration: 81353
loss: 0.982073187828064,grad_norm: 0.9999989367539374, iteration: 81354
loss: 1.0313215255737305,grad_norm: 0.9999992222231172, iteration: 81355
loss: 1.057754397392273,grad_norm: 0.9999999295233035, iteration: 81356
loss: 0.9895716905593872,grad_norm: 0.8763245986195065, iteration: 81357
loss: 1.0303603410720825,grad_norm: 0.9999990596657888, iteration: 81358
loss: 0.9945670366287231,grad_norm: 0.9999992034461759, iteration: 81359
loss: 1.012815237045288,grad_norm: 0.9999991179690622, iteration: 81360
loss: 0.9897438883781433,grad_norm: 0.9999990093815841, iteration: 81361
loss: 0.9761506915092468,grad_norm: 0.9567308824268658, iteration: 81362
loss: 1.008862018585205,grad_norm: 0.9126352363213828, iteration: 81363
loss: 1.0048211812973022,grad_norm: 0.9999991081411269, iteration: 81364
loss: 0.9761642813682556,grad_norm: 0.8039679522463831, iteration: 81365
loss: 1.0052241086959839,grad_norm: 0.999999229187193, iteration: 81366
loss: 1.017151117324829,grad_norm: 0.8414778797721646, iteration: 81367
loss: 1.0077409744262695,grad_norm: 0.9999992013270302, iteration: 81368
loss: 1.0000450611114502,grad_norm: 0.9999989477368013, iteration: 81369
loss: 1.0036730766296387,grad_norm: 0.9999991665923649, iteration: 81370
loss: 0.98844975233078,grad_norm: 0.9999991465300726, iteration: 81371
loss: 1.011597990989685,grad_norm: 0.9608300781017326, iteration: 81372
loss: 1.0012925863265991,grad_norm: 0.9999993467509523, iteration: 81373
loss: 0.986210823059082,grad_norm: 0.9999990451412395, iteration: 81374
loss: 1.0231834650039673,grad_norm: 0.9999992281678874, iteration: 81375
loss: 1.0000128746032715,grad_norm: 0.9367995313382717, iteration: 81376
loss: 1.0263798236846924,grad_norm: 0.9878681697862148, iteration: 81377
loss: 1.0080716609954834,grad_norm: 0.9999991545934003, iteration: 81378
loss: 0.9934700727462769,grad_norm: 0.9999990105471827, iteration: 81379
loss: 1.0059199333190918,grad_norm: 0.9999993307987021, iteration: 81380
loss: 1.024322509765625,grad_norm: 0.9999992006413396, iteration: 81381
loss: 1.0096322298049927,grad_norm: 0.9999992220047144, iteration: 81382
loss: 0.9954089522361755,grad_norm: 0.9999989878439071, iteration: 81383
loss: 1.0123586654663086,grad_norm: 0.9999989171933753, iteration: 81384
loss: 0.9980563521385193,grad_norm: 0.9999991851857858, iteration: 81385
loss: 0.9966903924942017,grad_norm: 0.9999991446268781, iteration: 81386
loss: 0.9967350959777832,grad_norm: 0.9999990994387472, iteration: 81387
loss: 0.9743741154670715,grad_norm: 0.9746515106645582, iteration: 81388
loss: 0.9730949401855469,grad_norm: 0.9999989686716155, iteration: 81389
loss: 1.0090875625610352,grad_norm: 0.9624223081313883, iteration: 81390
loss: 1.0427215099334717,grad_norm: 0.9999991232192383, iteration: 81391
loss: 0.9970101118087769,grad_norm: 0.9144373005072941, iteration: 81392
loss: 1.0146483182907104,grad_norm: 0.9999999147745119, iteration: 81393
loss: 1.035112738609314,grad_norm: 0.9999992518582642, iteration: 81394
loss: 0.993542492389679,grad_norm: 0.9154480161758978, iteration: 81395
loss: 1.0144168138504028,grad_norm: 0.999998970290141, iteration: 81396
loss: 1.0049445629119873,grad_norm: 0.9000689965676298, iteration: 81397
loss: 1.0176163911819458,grad_norm: 0.999999167036942, iteration: 81398
loss: 0.9979544878005981,grad_norm: 0.9999990359914055, iteration: 81399
loss: 0.9872009754180908,grad_norm: 0.9999990004031142, iteration: 81400
loss: 0.9874253869056702,grad_norm: 0.9999996151541033, iteration: 81401
loss: 1.0218490362167358,grad_norm: 0.9376204872471038, iteration: 81402
loss: 0.980861246585846,grad_norm: 0.9046098269697606, iteration: 81403
loss: 0.9855442643165588,grad_norm: 0.9739382406053861, iteration: 81404
loss: 1.0059919357299805,grad_norm: 0.9999992099015155, iteration: 81405
loss: 0.9807232618331909,grad_norm: 0.935200729972, iteration: 81406
loss: 1.0000433921813965,grad_norm: 0.9999991074649132, iteration: 81407
loss: 1.0129456520080566,grad_norm: 0.9999990683634302, iteration: 81408
loss: 1.009565830230713,grad_norm: 0.9408433997178726, iteration: 81409
loss: 0.995021641254425,grad_norm: 0.9999990752462482, iteration: 81410
loss: 0.9472906589508057,grad_norm: 0.9999990917422747, iteration: 81411
loss: 0.9963172674179077,grad_norm: 0.9999990597626891, iteration: 81412
loss: 1.0062639713287354,grad_norm: 0.9999991553605762, iteration: 81413
loss: 1.0177361965179443,grad_norm: 0.9341558078927834, iteration: 81414
loss: 0.999113917350769,grad_norm: 0.9999991377062252, iteration: 81415
loss: 1.0173982381820679,grad_norm: 0.9522451067598118, iteration: 81416
loss: 1.0109838247299194,grad_norm: 0.8579459981840818, iteration: 81417
loss: 1.0427616834640503,grad_norm: 0.9999992137574033, iteration: 81418
loss: 0.9841997027397156,grad_norm: 0.9785092625400992, iteration: 81419
loss: 1.0025969743728638,grad_norm: 0.9791363104366013, iteration: 81420
loss: 0.9958487153053284,grad_norm: 0.8056472459801646, iteration: 81421
loss: 0.9948091506958008,grad_norm: 0.9999991133901555, iteration: 81422
loss: 0.9814174175262451,grad_norm: 0.9999992288424688, iteration: 81423
loss: 1.0122153759002686,grad_norm: 0.999999108098587, iteration: 81424
loss: 1.037278175354004,grad_norm: 0.9999990623723022, iteration: 81425
loss: 0.9951560497283936,grad_norm: 0.9075576825733728, iteration: 81426
loss: 0.9855144023895264,grad_norm: 0.9999990388528779, iteration: 81427
loss: 1.0012327432632446,grad_norm: 0.9999990312158399, iteration: 81428
loss: 1.006411075592041,grad_norm: 0.937701942282549, iteration: 81429
loss: 1.014870524406433,grad_norm: 0.9489460393209234, iteration: 81430
loss: 1.0075434446334839,grad_norm: 0.9999991328660766, iteration: 81431
loss: 0.9911565780639648,grad_norm: 0.9800315017224606, iteration: 81432
loss: 0.9834372401237488,grad_norm: 0.9999992219288099, iteration: 81433
loss: 0.9746236801147461,grad_norm: 0.9999991605567304, iteration: 81434
loss: 1.0251599550247192,grad_norm: 0.9999991127617703, iteration: 81435
loss: 1.0061253309249878,grad_norm: 0.9999993385835758, iteration: 81436
loss: 1.0140877962112427,grad_norm: 0.910345623874459, iteration: 81437
loss: 1.0080060958862305,grad_norm: 0.8546050293220341, iteration: 81438
loss: 0.9716637134552002,grad_norm: 0.9787417707900451, iteration: 81439
loss: 0.9803791046142578,grad_norm: 0.999999148231749, iteration: 81440
loss: 1.0010372400283813,grad_norm: 0.9992664174562985, iteration: 81441
loss: 0.9977979063987732,grad_norm: 0.999999043421482, iteration: 81442
loss: 0.9642532467842102,grad_norm: 0.9999991145981147, iteration: 81443
loss: 1.0037524700164795,grad_norm: 0.9999989252800137, iteration: 81444
loss: 0.9901773929595947,grad_norm: 0.99999900934475, iteration: 81445
loss: 1.0262807607650757,grad_norm: 0.9999993315804188, iteration: 81446
loss: 1.022929310798645,grad_norm: 0.999999164447285, iteration: 81447
loss: 0.9938847422599792,grad_norm: 0.9999990104649432, iteration: 81448
loss: 0.9828594326972961,grad_norm: 0.9999989438848426, iteration: 81449
loss: 0.9645004272460938,grad_norm: 0.999999138697591, iteration: 81450
loss: 1.0503321886062622,grad_norm: 0.9999990054636141, iteration: 81451
loss: 1.0309090614318848,grad_norm: 0.9999989566819175, iteration: 81452
loss: 1.0364028215408325,grad_norm: 0.9894814290010677, iteration: 81453
loss: 0.9505929350852966,grad_norm: 0.9885036811613709, iteration: 81454
loss: 0.986473023891449,grad_norm: 0.9561381276279283, iteration: 81455
loss: 0.9601931571960449,grad_norm: 0.9999988591188249, iteration: 81456
loss: 1.0242453813552856,grad_norm: 0.9999990564740585, iteration: 81457
loss: 1.0103305578231812,grad_norm: 0.9999991300216017, iteration: 81458
loss: 0.9657873511314392,grad_norm: 0.9999990573323676, iteration: 81459
loss: 0.978049635887146,grad_norm: 0.9999990998015504, iteration: 81460
loss: 1.0239462852478027,grad_norm: 0.9999990248893018, iteration: 81461
loss: 1.0189204216003418,grad_norm: 0.9182609874609327, iteration: 81462
loss: 0.9924083352088928,grad_norm: 0.9999991945362428, iteration: 81463
loss: 0.9414687156677246,grad_norm: 0.9185298771429113, iteration: 81464
loss: 1.000573754310608,grad_norm: 0.909339355385963, iteration: 81465
loss: 0.9935276508331299,grad_norm: 0.8885062996385814, iteration: 81466
loss: 1.046770453453064,grad_norm: 0.9999990913986306, iteration: 81467
loss: 1.0563631057739258,grad_norm: 0.9999991058443353, iteration: 81468
loss: 0.9595588445663452,grad_norm: 0.9999991215402665, iteration: 81469
loss: 1.0193018913269043,grad_norm: 0.9999990670593971, iteration: 81470
loss: 1.0232661962509155,grad_norm: 0.999999162905392, iteration: 81471
loss: 1.0025739669799805,grad_norm: 0.9206452266247909, iteration: 81472
loss: 1.011012077331543,grad_norm: 0.9999991112730962, iteration: 81473
loss: 0.9689809083938599,grad_norm: 0.9010245479154372, iteration: 81474
loss: 1.0133532285690308,grad_norm: 0.9999992746386991, iteration: 81475
loss: 0.9997281432151794,grad_norm: 0.9999991655665456, iteration: 81476
loss: 1.0242102146148682,grad_norm: 0.9999991032835375, iteration: 81477
loss: 1.0056356191635132,grad_norm: 0.999999135243346, iteration: 81478
loss: 0.9653169512748718,grad_norm: 0.9999990437817773, iteration: 81479
loss: 1.0072963237762451,grad_norm: 0.9999990676832987, iteration: 81480
loss: 0.9939713478088379,grad_norm: 0.9064674247114367, iteration: 81481
loss: 0.9764955639839172,grad_norm: 0.9999991098408556, iteration: 81482
loss: 1.0014417171478271,grad_norm: 0.9999991219830917, iteration: 81483
loss: 1.063697099685669,grad_norm: 0.9999991488087258, iteration: 81484
loss: 0.9837689399719238,grad_norm: 0.9999992917544193, iteration: 81485
loss: 1.0050135850906372,grad_norm: 0.9807937120040437, iteration: 81486
loss: 1.025644063949585,grad_norm: 0.9999990351920696, iteration: 81487
loss: 1.0225013494491577,grad_norm: 0.9982506866538197, iteration: 81488
loss: 0.9929661154747009,grad_norm: 0.9999990868188832, iteration: 81489
loss: 0.9924930930137634,grad_norm: 0.9999990692363266, iteration: 81490
loss: 0.9902950525283813,grad_norm: 0.9971557801287466, iteration: 81491
loss: 1.0861434936523438,grad_norm: 0.9999994541056516, iteration: 81492
loss: 0.9996844530105591,grad_norm: 0.8891940462574105, iteration: 81493
loss: 1.0194393396377563,grad_norm: 0.99999920467761, iteration: 81494
loss: 1.013862133026123,grad_norm: 0.9999990573600858, iteration: 81495
loss: 0.9810200929641724,grad_norm: 0.9999994320669234, iteration: 81496
loss: 0.9785482287406921,grad_norm: 0.8960093105725931, iteration: 81497
loss: 0.986544668674469,grad_norm: 0.8921543433325193, iteration: 81498
loss: 1.0304510593414307,grad_norm: 0.9371827093505566, iteration: 81499
loss: 0.9879655241966248,grad_norm: 0.9842767453423006, iteration: 81500
loss: 1.0279215574264526,grad_norm: 0.9999991239441337, iteration: 81501
loss: 0.9555257558822632,grad_norm: 0.9999990277185189, iteration: 81502
loss: 0.9965402483940125,grad_norm: 0.9857377238781997, iteration: 81503
loss: 0.9728716611862183,grad_norm: 0.9935977255077613, iteration: 81504
loss: 0.9864419102668762,grad_norm: 0.9999991664367277, iteration: 81505
loss: 0.977751612663269,grad_norm: 0.9742840282718139, iteration: 81506
loss: 0.9896912574768066,grad_norm: 0.999999069631171, iteration: 81507
loss: 0.991441547870636,grad_norm: 0.8643325241733865, iteration: 81508
loss: 1.0240098237991333,grad_norm: 0.9999991064492875, iteration: 81509
loss: 1.0115951299667358,grad_norm: 0.9999991381718282, iteration: 81510
loss: 0.9888512492179871,grad_norm: 0.9999990179099221, iteration: 81511
loss: 1.0012682676315308,grad_norm: 0.9999990776348586, iteration: 81512
loss: 0.9938017725944519,grad_norm: 0.817466192743983, iteration: 81513
loss: 0.9647884368896484,grad_norm: 0.9999991081105382, iteration: 81514
loss: 0.9699917435646057,grad_norm: 0.9999991276766997, iteration: 81515
loss: 0.9917049407958984,grad_norm: 0.9999991989185633, iteration: 81516
loss: 0.9858546257019043,grad_norm: 0.9478189841973046, iteration: 81517
loss: 1.0030040740966797,grad_norm: 0.9999993542844946, iteration: 81518
loss: 1.0096451044082642,grad_norm: 0.9999993535316416, iteration: 81519
loss: 0.9720939993858337,grad_norm: 0.9999992933538752, iteration: 81520
loss: 1.0237230062484741,grad_norm: 0.9999990059416832, iteration: 81521
loss: 1.0321656465530396,grad_norm: 0.9894902004973494, iteration: 81522
loss: 0.9834584593772888,grad_norm: 0.9999992898356923, iteration: 81523
loss: 1.020456314086914,grad_norm: 0.9999993323290771, iteration: 81524
loss: 1.0247836112976074,grad_norm: 0.979386070387585, iteration: 81525
loss: 0.9678411483764648,grad_norm: 0.9985379716225946, iteration: 81526
loss: 0.9915757775306702,grad_norm: 0.9999991718195478, iteration: 81527
loss: 0.9987941980361938,grad_norm: 0.9999990468397794, iteration: 81528
loss: 0.9891073107719421,grad_norm: 0.9999992042385867, iteration: 81529
loss: 0.9924768209457397,grad_norm: 0.9806977029752623, iteration: 81530
loss: 0.9919086694717407,grad_norm: 0.8967897990619722, iteration: 81531
loss: 0.9738495349884033,grad_norm: 0.9999991021291428, iteration: 81532
loss: 1.1104533672332764,grad_norm: 0.9999998558077093, iteration: 81533
loss: 1.0363621711730957,grad_norm: 0.9999991322121496, iteration: 81534
loss: 1.0095540285110474,grad_norm: 0.9999989903925226, iteration: 81535
loss: 0.9618174433708191,grad_norm: 0.9999991595937985, iteration: 81536
loss: 1.0287666320800781,grad_norm: 0.9140175837390263, iteration: 81537
loss: 1.0190374851226807,grad_norm: 0.9999994751804626, iteration: 81538
loss: 0.9935173988342285,grad_norm: 0.9999990460376939, iteration: 81539
loss: 0.9939490556716919,grad_norm: 0.9315825503128524, iteration: 81540
loss: 1.0028172731399536,grad_norm: 0.9868253801501791, iteration: 81541
loss: 0.9925369620323181,grad_norm: 0.9999990407760231, iteration: 81542
loss: 1.0192389488220215,grad_norm: 0.9907506945702248, iteration: 81543
loss: 1.0088834762573242,grad_norm: 0.9999992261419282, iteration: 81544
loss: 0.9891515374183655,grad_norm: 0.9474806602002804, iteration: 81545
loss: 0.9888137578964233,grad_norm: 0.9779430206074414, iteration: 81546
loss: 0.9986793398857117,grad_norm: 0.8083023966336192, iteration: 81547
loss: 0.9973363876342773,grad_norm: 0.978988800786186, iteration: 81548
loss: 0.9633942246437073,grad_norm: 0.9999995541555782, iteration: 81549
loss: 1.0208364725112915,grad_norm: 0.9799240605060296, iteration: 81550
loss: 1.0374367237091064,grad_norm: 0.9999990321091806, iteration: 81551
loss: 1.0005738735198975,grad_norm: 0.999999356260444, iteration: 81552
loss: 0.9847382307052612,grad_norm: 0.893079161153987, iteration: 81553
loss: 0.9958421587944031,grad_norm: 0.9999990787258742, iteration: 81554
loss: 1.0183908939361572,grad_norm: 0.9999992334738708, iteration: 81555
loss: 1.0917154550552368,grad_norm: 0.9999996033262022, iteration: 81556
loss: 0.9727775454521179,grad_norm: 0.8495503085515279, iteration: 81557
loss: 0.9860745668411255,grad_norm: 0.954151468650316, iteration: 81558
loss: 1.0136288404464722,grad_norm: 0.9999991860156148, iteration: 81559
loss: 1.0212798118591309,grad_norm: 0.9999990751086353, iteration: 81560
loss: 1.0245965719223022,grad_norm: 0.9999991325436282, iteration: 81561
loss: 1.029387354850769,grad_norm: 0.9999989595370162, iteration: 81562
loss: 0.9853265881538391,grad_norm: 0.9433383754373696, iteration: 81563
loss: 0.9805679321289062,grad_norm: 0.9999990786498935, iteration: 81564
loss: 0.9770352244377136,grad_norm: 0.9883530637579953, iteration: 81565
loss: 0.9808031320571899,grad_norm: 0.977607515232006, iteration: 81566
loss: 0.9902217388153076,grad_norm: 0.927264084716544, iteration: 81567
loss: 1.0026334524154663,grad_norm: 0.9805285196743657, iteration: 81568
loss: 1.0410208702087402,grad_norm: 0.9999996178628158, iteration: 81569
loss: 1.060858130455017,grad_norm: 0.9999991067851534, iteration: 81570
loss: 1.004647135734558,grad_norm: 0.9999992036767773, iteration: 81571
loss: 1.019494652748108,grad_norm: 0.9999991263129434, iteration: 81572
loss: 0.9806445837020874,grad_norm: 0.9999991030876021, iteration: 81573
loss: 1.057538628578186,grad_norm: 0.9999991834199871, iteration: 81574
loss: 0.985188364982605,grad_norm: 0.9999992671535202, iteration: 81575
loss: 1.0108494758605957,grad_norm: 0.9999991701422886, iteration: 81576
loss: 0.9863156080245972,grad_norm: 0.9999991550350021, iteration: 81577
loss: 0.9897045493125916,grad_norm: 0.9719469986726672, iteration: 81578
loss: 1.0105901956558228,grad_norm: 0.9999990675188953, iteration: 81579
loss: 0.9606890678405762,grad_norm: 0.9999989486416241, iteration: 81580
loss: 0.9519622325897217,grad_norm: 0.959527023709591, iteration: 81581
loss: 0.9719452857971191,grad_norm: 0.9681863061957792, iteration: 81582
loss: 0.9827840924263,grad_norm: 0.8912853785443363, iteration: 81583
loss: 1.0137561559677124,grad_norm: 0.9999998152651872, iteration: 81584
loss: 1.0096619129180908,grad_norm: 0.9999995346915035, iteration: 81585
loss: 0.9925452470779419,grad_norm: 0.9999992543682585, iteration: 81586
loss: 0.9849076867103577,grad_norm: 0.9316122498691813, iteration: 81587
loss: 0.9955220818519592,grad_norm: 0.9855535847235611, iteration: 81588
loss: 0.9922925233840942,grad_norm: 0.9544976021793731, iteration: 81589
loss: 0.976378321647644,grad_norm: 0.9999991012294822, iteration: 81590
loss: 1.0141977071762085,grad_norm: 0.9679931157044562, iteration: 81591
loss: 1.0113719701766968,grad_norm: 0.978936355416963, iteration: 81592
loss: 1.002047061920166,grad_norm: 0.9999991291492119, iteration: 81593
loss: 0.9969554543495178,grad_norm: 0.9999992143669691, iteration: 81594
loss: 0.997718870639801,grad_norm: 0.9999992951559364, iteration: 81595
loss: 0.9720239639282227,grad_norm: 0.9999991633193537, iteration: 81596
loss: 1.0227570533752441,grad_norm: 0.8595310456645503, iteration: 81597
loss: 1.0597792863845825,grad_norm: 0.9999990871384786, iteration: 81598
loss: 1.0068974494934082,grad_norm: 0.9597639702915096, iteration: 81599
loss: 0.9681029319763184,grad_norm: 0.8832131986809879, iteration: 81600
loss: 0.9835783839225769,grad_norm: 0.9971597373928012, iteration: 81601
loss: 1.0188636779785156,grad_norm: 0.9999988891358688, iteration: 81602
loss: 0.9709381461143494,grad_norm: 0.9999991204900431, iteration: 81603
loss: 1.0368634462356567,grad_norm: 0.9671247928603935, iteration: 81604
loss: 0.9939395785331726,grad_norm: 0.9999991633826403, iteration: 81605
loss: 0.980187177658081,grad_norm: 0.9999990320904216, iteration: 81606
loss: 0.9928568601608276,grad_norm: 0.9999992561074922, iteration: 81607
loss: 0.9726307392120361,grad_norm: 0.9999991416699863, iteration: 81608
loss: 0.995341420173645,grad_norm: 0.9520245314747108, iteration: 81609
loss: 1.049270749092102,grad_norm: 0.9999990753802632, iteration: 81610
loss: 0.9582720994949341,grad_norm: 0.9999990046233078, iteration: 81611
loss: 1.0499495267868042,grad_norm: 0.9999992654578018, iteration: 81612
loss: 0.9954132437705994,grad_norm: 0.9999990746084602, iteration: 81613
loss: 0.9644446969032288,grad_norm: 0.9312571136298698, iteration: 81614
loss: 0.9956445693969727,grad_norm: 0.9999992312961736, iteration: 81615
loss: 0.9704281687736511,grad_norm: 0.9415441790364166, iteration: 81616
loss: 1.044101595878601,grad_norm: 0.999999080862106, iteration: 81617
loss: 0.9870938062667847,grad_norm: 0.9999991240322462, iteration: 81618
loss: 1.103283405303955,grad_norm: 0.9999992856394654, iteration: 81619
loss: 0.980546772480011,grad_norm: 0.9254160885909439, iteration: 81620
loss: 0.9999778270721436,grad_norm: 0.999999210349566, iteration: 81621
loss: 0.9997520446777344,grad_norm: 0.965311192672076, iteration: 81622
loss: 0.9969468116760254,grad_norm: 0.999999322537472, iteration: 81623
loss: 0.9854405522346497,grad_norm: 0.9999990820920001, iteration: 81624
loss: 0.9757012128829956,grad_norm: 0.9999991957840604, iteration: 81625
loss: 1.017458200454712,grad_norm: 0.9999992511590452, iteration: 81626
loss: 0.9999788999557495,grad_norm: 0.9999991195809115, iteration: 81627
loss: 0.9879395961761475,grad_norm: 0.9999990248391882, iteration: 81628
loss: 0.9872578978538513,grad_norm: 0.9999991387142886, iteration: 81629
loss: 0.9716091156005859,grad_norm: 0.9462346369432956, iteration: 81630
loss: 1.0256133079528809,grad_norm: 0.9999995304244738, iteration: 81631
loss: 1.001030445098877,grad_norm: 0.9550198756804481, iteration: 81632
loss: 0.9564733505249023,grad_norm: 0.9999991400947007, iteration: 81633
loss: 0.989631175994873,grad_norm: 0.9999991557996287, iteration: 81634
loss: 1.004581093788147,grad_norm: 0.999999153445485, iteration: 81635
loss: 1.006029725074768,grad_norm: 0.99999915543489, iteration: 81636
loss: 1.019654631614685,grad_norm: 0.9999998895020875, iteration: 81637
loss: 1.0298666954040527,grad_norm: 0.9040071656527322, iteration: 81638
loss: 0.9998922348022461,grad_norm: 0.99999911545854, iteration: 81639
loss: 1.0041764974594116,grad_norm: 0.9999991189716685, iteration: 81640
loss: 0.9856672286987305,grad_norm: 0.951588002976042, iteration: 81641
loss: 0.9771074652671814,grad_norm: 0.9999990411042196, iteration: 81642
loss: 1.0098507404327393,grad_norm: 0.856958155024693, iteration: 81643
loss: 0.9480957388877869,grad_norm: 0.9999991009998894, iteration: 81644
loss: 1.0007611513137817,grad_norm: 0.9999991252640624, iteration: 81645
loss: 0.993620753288269,grad_norm: 0.9999991866928711, iteration: 81646
loss: 0.9970594644546509,grad_norm: 0.9999991473327106, iteration: 81647
loss: 0.9815558791160583,grad_norm: 0.999999276729779, iteration: 81648
loss: 0.9606359004974365,grad_norm: 0.9999992435551264, iteration: 81649
loss: 0.973856508731842,grad_norm: 0.9963882140175936, iteration: 81650
loss: 1.0094811916351318,grad_norm: 0.9999992103777648, iteration: 81651
loss: 0.9828556776046753,grad_norm: 0.9999991249824767, iteration: 81652
loss: 1.021315574645996,grad_norm: 0.9999992141039399, iteration: 81653
loss: 1.0292932987213135,grad_norm: 0.9999990948345273, iteration: 81654
loss: 0.9986734986305237,grad_norm: 0.9999990931752734, iteration: 81655
loss: 0.9887282252311707,grad_norm: 0.9968103279894731, iteration: 81656
loss: 0.9838197827339172,grad_norm: 0.9999991466003794, iteration: 81657
loss: 0.9696606397628784,grad_norm: 0.9999992237435488, iteration: 81658
loss: 1.0017224550247192,grad_norm: 0.8996532939766757, iteration: 81659
loss: 0.9815667271614075,grad_norm: 0.9999990812820505, iteration: 81660
loss: 1.0013203620910645,grad_norm: 0.9174718819681789, iteration: 81661
loss: 0.9764288067817688,grad_norm: 0.9999992467462262, iteration: 81662
loss: 1.0050195455551147,grad_norm: 0.9999990002480993, iteration: 81663
loss: 0.9975639581680298,grad_norm: 0.9999992634568605, iteration: 81664
loss: 1.0052706003189087,grad_norm: 0.9999991373490228, iteration: 81665
loss: 1.0000391006469727,grad_norm: 0.9172775425732989, iteration: 81666
loss: 0.9482539296150208,grad_norm: 0.9999991637268691, iteration: 81667
loss: 1.0251010656356812,grad_norm: 0.9999991013403674, iteration: 81668
loss: 1.0169310569763184,grad_norm: 0.9663449965914283, iteration: 81669
loss: 0.9932559728622437,grad_norm: 0.9999993271025601, iteration: 81670
loss: 1.0614840984344482,grad_norm: 0.9999991622897192, iteration: 81671
loss: 1.0218086242675781,grad_norm: 0.9999990123849162, iteration: 81672
loss: 1.0067851543426514,grad_norm: 0.948871793245026, iteration: 81673
loss: 1.010931134223938,grad_norm: 0.9292694371797204, iteration: 81674
loss: 1.1034404039382935,grad_norm: 0.9999991804079243, iteration: 81675
loss: 1.0292152166366577,grad_norm: 0.9999990351245696, iteration: 81676
loss: 1.0113427639007568,grad_norm: 0.9966206534543852, iteration: 81677
loss: 1.0134602785110474,grad_norm: 0.9999997210027602, iteration: 81678
loss: 0.9919071793556213,grad_norm: 0.9999989719039822, iteration: 81679
loss: 0.9771913290023804,grad_norm: 0.999999178038664, iteration: 81680
loss: 1.0136773586273193,grad_norm: 0.9971717539528888, iteration: 81681
loss: 1.0157253742218018,grad_norm: 0.9704239946429173, iteration: 81682
loss: 0.9627662897109985,grad_norm: 0.9537299625921911, iteration: 81683
loss: 0.9975951313972473,grad_norm: 0.9929564064466337, iteration: 81684
loss: 0.9982800483703613,grad_norm: 0.9999991582641427, iteration: 81685
loss: 0.9867883324623108,grad_norm: 0.9140501980748562, iteration: 81686
loss: 0.9811388254165649,grad_norm: 0.9999990960238675, iteration: 81687
loss: 0.9803211092948914,grad_norm: 0.999998954295706, iteration: 81688
loss: 1.0758310556411743,grad_norm: 0.9999992493731455, iteration: 81689
loss: 0.9974372386932373,grad_norm: 0.8350635277582202, iteration: 81690
loss: 0.9662196040153503,grad_norm: 0.9999989635412477, iteration: 81691
loss: 0.9619632363319397,grad_norm: 0.9999990684355428, iteration: 81692
loss: 1.0013413429260254,grad_norm: 0.9999990890205742, iteration: 81693
loss: 1.0075949430465698,grad_norm: 0.999999134573813, iteration: 81694
loss: 1.0293047428131104,grad_norm: 0.9999992208692848, iteration: 81695
loss: 1.0088838338851929,grad_norm: 0.9999997114047785, iteration: 81696
loss: 1.010938048362732,grad_norm: 0.9999990458215005, iteration: 81697
loss: 1.0320707559585571,grad_norm: 0.9999991365429787, iteration: 81698
loss: 0.98189777135849,grad_norm: 0.8654772361034776, iteration: 81699
loss: 0.9945297241210938,grad_norm: 0.9999991403234282, iteration: 81700
loss: 0.991865336894989,grad_norm: 0.9117501955506007, iteration: 81701
loss: 1.013590693473816,grad_norm: 0.9999991477592943, iteration: 81702
loss: 0.9901981949806213,grad_norm: 0.9156489142059918, iteration: 81703
loss: 1.0515059232711792,grad_norm: 0.9999991246615, iteration: 81704
loss: 1.0135846138000488,grad_norm: 0.9998197818525114, iteration: 81705
loss: 1.0197819471359253,grad_norm: 0.999999265676069, iteration: 81706
loss: 0.9717864990234375,grad_norm: 0.9999991384966543, iteration: 81707
loss: 0.973194420337677,grad_norm: 0.9999989649659959, iteration: 81708
loss: 1.0246397256851196,grad_norm: 0.9999990219765511, iteration: 81709
loss: 1.025245189666748,grad_norm: 0.9999999599554319, iteration: 81710
loss: 1.0061064958572388,grad_norm: 0.999999007602663, iteration: 81711
loss: 1.0452054738998413,grad_norm: 0.9999991798832198, iteration: 81712
loss: 1.007720947265625,grad_norm: 0.9999991336589177, iteration: 81713
loss: 1.0092326402664185,grad_norm: 0.8952948789053231, iteration: 81714
loss: 1.0071629285812378,grad_norm: 0.9999991331565309, iteration: 81715
loss: 0.9947569966316223,grad_norm: 0.9999992494030504, iteration: 81716
loss: 1.0046279430389404,grad_norm: 0.9999993989133994, iteration: 81717
loss: 1.0208629369735718,grad_norm: 0.999999168304999, iteration: 81718
loss: 0.9876754283905029,grad_norm: 0.9421830013744114, iteration: 81719
loss: 0.9636289477348328,grad_norm: 0.9639607029866363, iteration: 81720
loss: 0.9887563586235046,grad_norm: 0.9684288183147015, iteration: 81721
loss: 1.0227891206741333,grad_norm: 0.9999989234186138, iteration: 81722
loss: 0.9977725744247437,grad_norm: 0.9375827914039272, iteration: 81723
loss: 1.0110951662063599,grad_norm: 0.9999991579319636, iteration: 81724
loss: 1.0274279117584229,grad_norm: 0.9999991983758282, iteration: 81725
loss: 0.9902934432029724,grad_norm: 0.9636525644229, iteration: 81726
loss: 0.9961016178131104,grad_norm: 0.9999993162858287, iteration: 81727
loss: 0.990742027759552,grad_norm: 0.9999992249937134, iteration: 81728
loss: 0.9991866946220398,grad_norm: 0.9999991024615424, iteration: 81729
loss: 0.9950675368309021,grad_norm: 0.9999992050101323, iteration: 81730
loss: 0.9922188520431519,grad_norm: 0.9999990600346564, iteration: 81731
loss: 0.9852643609046936,grad_norm: 0.999998979818803, iteration: 81732
loss: 0.9687235951423645,grad_norm: 0.9999989186344086, iteration: 81733
loss: 1.0037486553192139,grad_norm: 0.8893887530762614, iteration: 81734
loss: 1.0013700723648071,grad_norm: 0.9999992263558373, iteration: 81735
loss: 0.9948415756225586,grad_norm: 0.9999990436837828, iteration: 81736
loss: 0.9709174633026123,grad_norm: 0.9999991411066508, iteration: 81737
loss: 0.9934089183807373,grad_norm: 0.9999991914318518, iteration: 81738
loss: 0.9727716445922852,grad_norm: 0.974097290414246, iteration: 81739
loss: 1.0835154056549072,grad_norm: 0.9999998369618517, iteration: 81740
loss: 1.0105171203613281,grad_norm: 0.8678333208037113, iteration: 81741
loss: 0.9941368103027344,grad_norm: 0.9999990252934374, iteration: 81742
loss: 1.1065582036972046,grad_norm: 0.999999816584693, iteration: 81743
loss: 1.0075981616973877,grad_norm: 0.9999991555220574, iteration: 81744
loss: 1.013393521308899,grad_norm: 0.999274166722482, iteration: 81745
loss: 1.0006349086761475,grad_norm: 0.9610580059100113, iteration: 81746
loss: 0.9912853837013245,grad_norm: 0.9999990959444935, iteration: 81747
loss: 0.9655330181121826,grad_norm: 0.9999989671893422, iteration: 81748
loss: 0.9975007772445679,grad_norm: 0.9150251263334638, iteration: 81749
loss: 1.004379153251648,grad_norm: 0.9999990653271178, iteration: 81750
loss: 0.9937090277671814,grad_norm: 0.9136651038783331, iteration: 81751
loss: 1.0479578971862793,grad_norm: 0.9999995694343327, iteration: 81752
loss: 0.9848684668540955,grad_norm: 0.9930589771694129, iteration: 81753
loss: 0.9642845392227173,grad_norm: 0.9853078343834879, iteration: 81754
loss: 1.0028269290924072,grad_norm: 0.9391805973032025, iteration: 81755
loss: 1.0011684894561768,grad_norm: 0.9999992920358131, iteration: 81756
loss: 1.028961181640625,grad_norm: 0.999999422984331, iteration: 81757
loss: 0.9916689395904541,grad_norm: 0.9999990338064838, iteration: 81758
loss: 0.9755754470825195,grad_norm: 0.9999993285944222, iteration: 81759
loss: 0.9861974120140076,grad_norm: 0.9654604862985805, iteration: 81760
loss: 0.9981909990310669,grad_norm: 0.9793226852911832, iteration: 81761
loss: 1.0040748119354248,grad_norm: 0.9681894610550676, iteration: 81762
loss: 1.0204441547393799,grad_norm: 0.9999994281118073, iteration: 81763
loss: 0.9790374636650085,grad_norm: 0.9317598149614821, iteration: 81764
loss: 0.9980279803276062,grad_norm: 0.9999991917766391, iteration: 81765
loss: 1.0319968461990356,grad_norm: 0.9028784776460019, iteration: 81766
loss: 1.0193794965744019,grad_norm: 0.9999989727998414, iteration: 81767
loss: 1.0135736465454102,grad_norm: 0.999999148249293, iteration: 81768
loss: 1.00668466091156,grad_norm: 0.963558948527168, iteration: 81769
loss: 0.9950666427612305,grad_norm: 0.9999990858348063, iteration: 81770
loss: 1.0144355297088623,grad_norm: 0.8926566343650089, iteration: 81771
loss: 0.9958963394165039,grad_norm: 0.9999990845191548, iteration: 81772
loss: 1.0064582824707031,grad_norm: 0.9885704475115079, iteration: 81773
loss: 1.0175049304962158,grad_norm: 0.999999143538503, iteration: 81774
loss: 1.039700984954834,grad_norm: 0.999999197992941, iteration: 81775
loss: 0.975104808807373,grad_norm: 0.9999990916028408, iteration: 81776
loss: 1.0491474866867065,grad_norm: 0.999999129416277, iteration: 81777
loss: 0.995658278465271,grad_norm: 0.9999989712691166, iteration: 81778
loss: 0.9813703298568726,grad_norm: 0.9999991435055702, iteration: 81779
loss: 1.0097211599349976,grad_norm: 0.95995863635173, iteration: 81780
loss: 1.0135759115219116,grad_norm: 0.9999993499892005, iteration: 81781
loss: 1.0167810916900635,grad_norm: 0.8707491467663452, iteration: 81782
loss: 0.975281834602356,grad_norm: 0.9999990849732406, iteration: 81783
loss: 0.9864092469215393,grad_norm: 0.9999992062026507, iteration: 81784
loss: 1.0132761001586914,grad_norm: 0.9116826747841237, iteration: 81785
loss: 0.9593297243118286,grad_norm: 0.9999992126758118, iteration: 81786
loss: 1.0053693056106567,grad_norm: 0.999999173735376, iteration: 81787
loss: 0.9533609747886658,grad_norm: 0.9999990647409476, iteration: 81788
loss: 1.0183552503585815,grad_norm: 0.9999991090951713, iteration: 81789
loss: 0.9975979924201965,grad_norm: 0.9999997012328306, iteration: 81790
loss: 0.988381028175354,grad_norm: 0.9999992296687563, iteration: 81791
loss: 1.0251768827438354,grad_norm: 0.9999990481654429, iteration: 81792
loss: 1.008052945137024,grad_norm: 0.9308980241821718, iteration: 81793
loss: 1.039468765258789,grad_norm: 0.9999992493259191, iteration: 81794
loss: 1.008975625038147,grad_norm: 0.9999990723076841, iteration: 81795
loss: 0.9870598912239075,grad_norm: 0.9999989099310753, iteration: 81796
loss: 0.9422764182090759,grad_norm: 0.9999989740599544, iteration: 81797
loss: 1.0287322998046875,grad_norm: 0.9999991055865112, iteration: 81798
loss: 0.9972946643829346,grad_norm: 0.8653683831154493, iteration: 81799
loss: 0.9745369553565979,grad_norm: 0.9999989285274178, iteration: 81800
loss: 0.9709899425506592,grad_norm: 0.9994543158881787, iteration: 81801
loss: 1.0122270584106445,grad_norm: 0.9999993700753295, iteration: 81802
loss: 0.9647471308708191,grad_norm: 0.9999993299199118, iteration: 81803
loss: 1.0196460485458374,grad_norm: 0.9999992714785189, iteration: 81804
loss: 0.9706003665924072,grad_norm: 0.9587689052593934, iteration: 81805
loss: 1.0269372463226318,grad_norm: 0.8700309755828401, iteration: 81806
loss: 1.0676298141479492,grad_norm: 0.8579827272242606, iteration: 81807
loss: 1.0242571830749512,grad_norm: 0.9999991405379262, iteration: 81808
loss: 0.9863227009773254,grad_norm: 0.9999992303578928, iteration: 81809
loss: 1.0261681079864502,grad_norm: 0.9999991513623182, iteration: 81810
loss: 1.0428277254104614,grad_norm: 0.9999990969894492, iteration: 81811
loss: 0.9892507195472717,grad_norm: 0.9999991238579551, iteration: 81812
loss: 0.9697146415710449,grad_norm: 0.9874088698188903, iteration: 81813
loss: 1.0016586780548096,grad_norm: 0.9999990153999876, iteration: 81814
loss: 1.0122487545013428,grad_norm: 0.9999997372878543, iteration: 81815
loss: 1.0055067539215088,grad_norm: 0.9830269165790434, iteration: 81816
loss: 0.9402390718460083,grad_norm: 0.9825732505986401, iteration: 81817
loss: 1.075291633605957,grad_norm: 0.9999996749539745, iteration: 81818
loss: 0.9848611950874329,grad_norm: 0.9999991095170027, iteration: 81819
loss: 1.0175421237945557,grad_norm: 0.9999994613662944, iteration: 81820
loss: 0.9986754059791565,grad_norm: 0.9901302682188466, iteration: 81821
loss: 0.9931853413581848,grad_norm: 0.9999990666455275, iteration: 81822
loss: 0.9895763993263245,grad_norm: 0.9999992248150317, iteration: 81823
loss: 0.9890380501747131,grad_norm: 0.9999995672504682, iteration: 81824
loss: 1.0582866668701172,grad_norm: 0.9999992222075478, iteration: 81825
loss: 1.0453016757965088,grad_norm: 0.9736520028015738, iteration: 81826
loss: 0.992811381816864,grad_norm: 0.9587383721180763, iteration: 81827
loss: 1.0085711479187012,grad_norm: 0.9999991106093773, iteration: 81828
loss: 1.0131676197052002,grad_norm: 0.9259288694683668, iteration: 81829
loss: 0.9585627913475037,grad_norm: 0.9999991850387634, iteration: 81830
loss: 1.0060815811157227,grad_norm: 0.9904665331835467, iteration: 81831
loss: 1.0227539539337158,grad_norm: 0.9999991249116897, iteration: 81832
loss: 0.9643428921699524,grad_norm: 0.9999990663349319, iteration: 81833
loss: 1.0170060396194458,grad_norm: 0.9999993311517048, iteration: 81834
loss: 1.0254554748535156,grad_norm: 0.9999992384325483, iteration: 81835
loss: 1.0365508794784546,grad_norm: 0.9177782883930031, iteration: 81836
loss: 0.9895984530448914,grad_norm: 0.9504311069583297, iteration: 81837
loss: 1.020858645439148,grad_norm: 0.9999990488495881, iteration: 81838
loss: 1.0140200853347778,grad_norm: 0.9999991325158049, iteration: 81839
loss: 0.9910494089126587,grad_norm: 0.9999995067981865, iteration: 81840
loss: 1.0098057985305786,grad_norm: 0.8805337887351481, iteration: 81841
loss: 1.030948519706726,grad_norm: 0.956236578501834, iteration: 81842
loss: 0.9536861777305603,grad_norm: 0.9999992063032472, iteration: 81843
loss: 1.0246955156326294,grad_norm: 0.9999989744516101, iteration: 81844
loss: 0.9748484492301941,grad_norm: 0.9999990668844824, iteration: 81845
loss: 1.000803828239441,grad_norm: 0.9999990981367014, iteration: 81846
loss: 0.9893975853919983,grad_norm: 0.9999992045420321, iteration: 81847
loss: 1.0173369646072388,grad_norm: 0.9999991140306234, iteration: 81848
loss: 0.9754763841629028,grad_norm: 0.9999990793501922, iteration: 81849
loss: 1.0099769830703735,grad_norm: 0.9999989635133484, iteration: 81850
loss: 1.0558351278305054,grad_norm: 0.9999991929056083, iteration: 81851
loss: 0.9996366500854492,grad_norm: 0.9999992974856035, iteration: 81852
loss: 1.2112374305725098,grad_norm: 0.999999904115817, iteration: 81853
loss: 1.0281869173049927,grad_norm: 0.9999991781312628, iteration: 81854
loss: 0.9942715764045715,grad_norm: 0.945793050521891, iteration: 81855
loss: 1.0035245418548584,grad_norm: 0.9878829918025841, iteration: 81856
loss: 0.9986788630485535,grad_norm: 0.963074164484732, iteration: 81857
loss: 1.0168777704238892,grad_norm: 0.9999996889591638, iteration: 81858
loss: 1.0007301568984985,grad_norm: 0.9075163372677466, iteration: 81859
loss: 1.0188575983047485,grad_norm: 0.9999991148926491, iteration: 81860
loss: 1.0349531173706055,grad_norm: 0.9999991784895278, iteration: 81861
loss: 0.9863748550415039,grad_norm: 0.999999644861986, iteration: 81862
loss: 0.9724411964416504,grad_norm: 0.9999991764338614, iteration: 81863
loss: 0.9946887493133545,grad_norm: 0.9999993550485866, iteration: 81864
loss: 1.008134126663208,grad_norm: 0.9999991829050411, iteration: 81865
loss: 0.9824033379554749,grad_norm: 0.9999992338934925, iteration: 81866
loss: 1.1037282943725586,grad_norm: 0.9999997552840666, iteration: 81867
loss: 1.0172145366668701,grad_norm: 0.999999179161097, iteration: 81868
loss: 0.9842801690101624,grad_norm: 0.9573868392451703, iteration: 81869
loss: 0.9965354800224304,grad_norm: 0.9206379049193097, iteration: 81870
loss: 1.0248738527297974,grad_norm: 0.9999993044403804, iteration: 81871
loss: 1.009651780128479,grad_norm: 0.9999991227381448, iteration: 81872
loss: 1.0497745275497437,grad_norm: 0.9999991830377843, iteration: 81873
loss: 1.0407172441482544,grad_norm: 0.9999992292879325, iteration: 81874
loss: 0.9928978085517883,grad_norm: 0.9999991970669061, iteration: 81875
loss: 1.0376659631729126,grad_norm: 0.9999998708508867, iteration: 81876
loss: 1.0115091800689697,grad_norm: 0.9999990277005939, iteration: 81877
loss: 0.9610161185264587,grad_norm: 0.9301976700290087, iteration: 81878
loss: 1.0871273279190063,grad_norm: 0.9999994386175488, iteration: 81879
loss: 0.998114824295044,grad_norm: 0.9999990971287298, iteration: 81880
loss: 0.9994173049926758,grad_norm: 0.9446838598311696, iteration: 81881
loss: 0.9796541333198547,grad_norm: 0.9958265097575328, iteration: 81882
loss: 1.0295964479446411,grad_norm: 0.9999990399313318, iteration: 81883
loss: 1.0257941484451294,grad_norm: 0.9495213689316837, iteration: 81884
loss: 0.9843558669090271,grad_norm: 0.9999991534496271, iteration: 81885
loss: 1.0034738779067993,grad_norm: 0.9138196853895881, iteration: 81886
loss: 0.9707922339439392,grad_norm: 0.8989303519919095, iteration: 81887
loss: 1.003960371017456,grad_norm: 0.9999990963599985, iteration: 81888
loss: 1.0200014114379883,grad_norm: 0.9999992800059089, iteration: 81889
loss: 0.9612339735031128,grad_norm: 0.8339070137918007, iteration: 81890
loss: 0.9954515099525452,grad_norm: 0.8574070969388998, iteration: 81891
loss: 1.002204179763794,grad_norm: 0.9774321013159609, iteration: 81892
loss: 1.01939857006073,grad_norm: 0.9999990998533462, iteration: 81893
loss: 0.9786758422851562,grad_norm: 0.9999991207522541, iteration: 81894
loss: 1.0104155540466309,grad_norm: 0.9999990098185381, iteration: 81895
loss: 1.0162348747253418,grad_norm: 0.9999990777509687, iteration: 81896
loss: 0.9985495805740356,grad_norm: 0.9999992075259067, iteration: 81897
loss: 0.9957394003868103,grad_norm: 0.9999991138342944, iteration: 81898
loss: 1.021392822265625,grad_norm: 0.99999904663503, iteration: 81899
loss: 1.014114260673523,grad_norm: 0.9999991182038069, iteration: 81900
loss: 1.0272496938705444,grad_norm: 0.999999168968409, iteration: 81901
loss: 0.9937333464622498,grad_norm: 0.9999993072948323, iteration: 81902
loss: 1.0133181810379028,grad_norm: 0.9999991188691897, iteration: 81903
loss: 0.9708690047264099,grad_norm: 0.9999991289563436, iteration: 81904
loss: 0.9733789563179016,grad_norm: 0.9999992712341822, iteration: 81905
loss: 0.9923378825187683,grad_norm: 0.9699915449511195, iteration: 81906
loss: 0.993848443031311,grad_norm: 0.8924815246954276, iteration: 81907
loss: 1.0223512649536133,grad_norm: 0.9999990716671746, iteration: 81908
loss: 1.0043193101882935,grad_norm: 0.9813328948369764, iteration: 81909
loss: 1.0045045614242554,grad_norm: 0.9999990903869658, iteration: 81910
loss: 1.0279600620269775,grad_norm: 0.9847910138092667, iteration: 81911
loss: 0.9424536228179932,grad_norm: 0.999999165411852, iteration: 81912
loss: 0.9864746928215027,grad_norm: 0.9999993268625689, iteration: 81913
loss: 1.0221283435821533,grad_norm: 0.9999991734697732, iteration: 81914
loss: 0.993195652961731,grad_norm: 0.9325289129682566, iteration: 81915
loss: 0.9666004180908203,grad_norm: 0.9959819276595426, iteration: 81916
loss: 1.0330005884170532,grad_norm: 0.9999990322321354, iteration: 81917
loss: 1.0175915956497192,grad_norm: 0.9999991575284634, iteration: 81918
loss: 1.0177342891693115,grad_norm: 0.999999116906702, iteration: 81919
loss: 1.0523146390914917,grad_norm: 0.9999996095014192, iteration: 81920
loss: 1.007017970085144,grad_norm: 0.9999996006997153, iteration: 81921
loss: 1.002027153968811,grad_norm: 0.9999991276639328, iteration: 81922
loss: 1.0114355087280273,grad_norm: 0.999999255066987, iteration: 81923
loss: 1.0257017612457275,grad_norm: 0.9631142386759893, iteration: 81924
loss: 1.0092560052871704,grad_norm: 0.9999990721589472, iteration: 81925
loss: 0.9993736147880554,grad_norm: 0.9999990845620869, iteration: 81926
loss: 0.9918509125709534,grad_norm: 0.9999992072687803, iteration: 81927
loss: 1.042535424232483,grad_norm: 0.966835523737409, iteration: 81928
loss: 1.0361707210540771,grad_norm: 0.999999096514203, iteration: 81929
loss: 1.0231186151504517,grad_norm: 0.8904631676970913, iteration: 81930
loss: 1.0009615421295166,grad_norm: 0.8950499883212046, iteration: 81931
loss: 1.0101118087768555,grad_norm: 0.9169958725726839, iteration: 81932
loss: 1.005899429321289,grad_norm: 0.9999991327367587, iteration: 81933
loss: 0.9889782071113586,grad_norm: 0.9999991713794121, iteration: 81934
loss: 0.9663711786270142,grad_norm: 0.9999991596872454, iteration: 81935
loss: 0.9849836826324463,grad_norm: 0.9754052521017395, iteration: 81936
loss: 1.0419942140579224,grad_norm: 0.9999993581963089, iteration: 81937
loss: 1.011430263519287,grad_norm: 0.9953283114316961, iteration: 81938
loss: 0.9786099195480347,grad_norm: 0.9999990067751379, iteration: 81939
loss: 1.0246150493621826,grad_norm: 0.9999990615754013, iteration: 81940
loss: 1.0347670316696167,grad_norm: 0.9999990890635011, iteration: 81941
loss: 0.9787521362304688,grad_norm: 0.964081395689833, iteration: 81942
loss: 1.001180648803711,grad_norm: 0.976476310148834, iteration: 81943
loss: 0.9931066632270813,grad_norm: 0.9999993124312188, iteration: 81944
loss: 1.1116667985916138,grad_norm: 0.9999991511389671, iteration: 81945
loss: 1.0649067163467407,grad_norm: 0.9999991450458823, iteration: 81946
loss: 0.9863049983978271,grad_norm: 0.9999990631839817, iteration: 81947
loss: 1.0134536027908325,grad_norm: 0.9998313731084271, iteration: 81948
loss: 1.0544328689575195,grad_norm: 0.9999989591122973, iteration: 81949
loss: 0.9844534993171692,grad_norm: 0.9632646023486627, iteration: 81950
loss: 1.0057998895645142,grad_norm: 0.9582364405722563, iteration: 81951
loss: 1.0234349966049194,grad_norm: 0.9877010676043572, iteration: 81952
loss: 0.9676670432090759,grad_norm: 0.9999991206882811, iteration: 81953
loss: 0.99129319190979,grad_norm: 0.9999990461633669, iteration: 81954
loss: 0.9990932941436768,grad_norm: 0.9192832399660683, iteration: 81955
loss: 0.9852924942970276,grad_norm: 0.9999988990957906, iteration: 81956
loss: 0.9976381063461304,grad_norm: 0.9999991550306074, iteration: 81957
loss: 0.9818920493125916,grad_norm: 0.9669162230351133, iteration: 81958
loss: 0.9992067217826843,grad_norm: 0.9999990258574277, iteration: 81959
loss: 1.0003081560134888,grad_norm: 0.9251573031547231, iteration: 81960
loss: 1.0035258531570435,grad_norm: 0.9025105248795822, iteration: 81961
loss: 1.0306107997894287,grad_norm: 0.9999991405190591, iteration: 81962
loss: 0.9826815128326416,grad_norm: 0.999999085523079, iteration: 81963
loss: 1.017472267150879,grad_norm: 0.9999989719624194, iteration: 81964
loss: 1.0176501274108887,grad_norm: 0.9928832597807795, iteration: 81965
loss: 1.0095912218093872,grad_norm: 0.8939383728209667, iteration: 81966
loss: 1.0320782661437988,grad_norm: 0.9999992207189998, iteration: 81967
loss: 0.9887843728065491,grad_norm: 0.8967578262810737, iteration: 81968
loss: 1.0064818859100342,grad_norm: 0.9999990245323923, iteration: 81969
loss: 1.004235863685608,grad_norm: 0.8466609989177342, iteration: 81970
loss: 1.000244379043579,grad_norm: 0.999999140962248, iteration: 81971
loss: 0.9954721331596375,grad_norm: 0.9999990981637386, iteration: 81972
loss: 0.9787794351577759,grad_norm: 0.8467627337264126, iteration: 81973
loss: 1.0167310237884521,grad_norm: 0.9273736928554739, iteration: 81974
loss: 0.9980818629264832,grad_norm: 0.9386227341951242, iteration: 81975
loss: 0.997133195400238,grad_norm: 0.9999991483721892, iteration: 81976
loss: 1.0165581703186035,grad_norm: 0.9002981958032504, iteration: 81977
loss: 1.006037950515747,grad_norm: 0.999999133005536, iteration: 81978
loss: 0.9978870153427124,grad_norm: 0.9999992306939556, iteration: 81979
loss: 1.0104845762252808,grad_norm: 0.9999991247087671, iteration: 81980
loss: 1.0116829872131348,grad_norm: 0.999999086997523, iteration: 81981
loss: 0.9802908301353455,grad_norm: 0.8762528077234505, iteration: 81982
loss: 0.9720416069030762,grad_norm: 0.9999991166110174, iteration: 81983
loss: 1.0207666158676147,grad_norm: 0.9999989797712425, iteration: 81984
loss: 1.0082814693450928,grad_norm: 0.9999990758258215, iteration: 81985
loss: 0.9938277006149292,grad_norm: 0.9999991288695361, iteration: 81986
loss: 1.017427921295166,grad_norm: 0.9999991414516036, iteration: 81987
loss: 1.0290638208389282,grad_norm: 0.9999990403943131, iteration: 81988
loss: 0.9956428408622742,grad_norm: 0.897362085116143, iteration: 81989
loss: 1.0166568756103516,grad_norm: 0.9314307042853378, iteration: 81990
loss: 1.0024768114089966,grad_norm: 0.9999991069960393, iteration: 81991
loss: 1.0337932109832764,grad_norm: 0.9999990037387096, iteration: 81992
loss: 1.0431485176086426,grad_norm: 0.9999992496320982, iteration: 81993
loss: 1.0058382749557495,grad_norm: 0.9999990899621841, iteration: 81994
loss: 1.0032938718795776,grad_norm: 0.9999992348098531, iteration: 81995
loss: 1.0177723169326782,grad_norm: 0.9968610118363772, iteration: 81996
loss: 0.9905033707618713,grad_norm: 0.9999990838626781, iteration: 81997
loss: 0.9703187942504883,grad_norm: 0.9999991392237781, iteration: 81998
loss: 1.0454975366592407,grad_norm: 0.8968415677572945, iteration: 81999
loss: 1.0129026174545288,grad_norm: 0.9436766659828388, iteration: 82000
loss: 1.0229685306549072,grad_norm: 0.9756853704841325, iteration: 82001
loss: 0.9807460904121399,grad_norm: 0.9999991551915589, iteration: 82002
loss: 0.9974845051765442,grad_norm: 0.999999225971472, iteration: 82003
loss: 0.9942951202392578,grad_norm: 0.7806421914313961, iteration: 82004
loss: 1.0481265783309937,grad_norm: 0.9999990862244843, iteration: 82005
loss: 1.0399956703186035,grad_norm: 0.9178567501779555, iteration: 82006
loss: 0.9960905909538269,grad_norm: 0.9999991535871658, iteration: 82007
loss: 0.9865197539329529,grad_norm: 0.9999992526987026, iteration: 82008
loss: 0.979943037033081,grad_norm: 0.9999991115261507, iteration: 82009
loss: 1.0525141954421997,grad_norm: 0.9999989841905174, iteration: 82010
loss: 0.9936316013336182,grad_norm: 0.9999991126995249, iteration: 82011
loss: 1.019163727760315,grad_norm: 0.9267941392698832, iteration: 82012
loss: 1.0028380155563354,grad_norm: 0.8895756982603361, iteration: 82013
loss: 0.9804463386535645,grad_norm: 0.9999990487469821, iteration: 82014
loss: 1.0008947849273682,grad_norm: 0.9999989582256845, iteration: 82015
loss: 0.9977443218231201,grad_norm: 0.9999991396249629, iteration: 82016
loss: 0.9950474500656128,grad_norm: 0.972676178928729, iteration: 82017
loss: 1.0135645866394043,grad_norm: 0.8866119476199426, iteration: 82018
loss: 1.0223393440246582,grad_norm: 0.9999990377738909, iteration: 82019
loss: 1.0038175582885742,grad_norm: 0.9999991391258815, iteration: 82020
loss: 0.9895389080047607,grad_norm: 0.9999989982089429, iteration: 82021
loss: 0.9974668622016907,grad_norm: 0.9999990298739232, iteration: 82022
loss: 0.9921777248382568,grad_norm: 0.9999992016288215, iteration: 82023
loss: 0.9735986590385437,grad_norm: 0.9999992517419639, iteration: 82024
loss: 1.0008323192596436,grad_norm: 0.9999991772489428, iteration: 82025
loss: 1.012158989906311,grad_norm: 0.8500446949307132, iteration: 82026
loss: 1.00111985206604,grad_norm: 0.9662369718524031, iteration: 82027
loss: 1.0231242179870605,grad_norm: 0.9383288593294785, iteration: 82028
loss: 1.0106613636016846,grad_norm: 0.9999991083948055, iteration: 82029
loss: 0.9600456953048706,grad_norm: 0.9999991810780673, iteration: 82030
loss: 0.9810248017311096,grad_norm: 0.99999936940711, iteration: 82031
loss: 0.9843647480010986,grad_norm: 0.9999991684355262, iteration: 82032
loss: 0.9755326509475708,grad_norm: 0.9999989829203512, iteration: 82033
loss: 1.0473408699035645,grad_norm: 0.9999992546302706, iteration: 82034
loss: 0.989742636680603,grad_norm: 0.99999907455369, iteration: 82035
loss: 0.982632577419281,grad_norm: 0.9663685213638853, iteration: 82036
loss: 0.9731513261795044,grad_norm: 0.9999991759126897, iteration: 82037
loss: 1.0173641443252563,grad_norm: 0.9094087150320103, iteration: 82038
loss: 1.0338448286056519,grad_norm: 0.9999991321985415, iteration: 82039
loss: 1.0047900676727295,grad_norm: 0.9999992338701502, iteration: 82040
loss: 1.00786292552948,grad_norm: 0.9999990166820709, iteration: 82041
loss: 0.9957723021507263,grad_norm: 0.9999991011519654, iteration: 82042
loss: 1.0540530681610107,grad_norm: 0.9999996349421894, iteration: 82043
loss: 1.0103201866149902,grad_norm: 0.9999992252396538, iteration: 82044
loss: 1.018094778060913,grad_norm: 0.922235874633762, iteration: 82045
loss: 0.9859544038772583,grad_norm: 0.8643883833897336, iteration: 82046
loss: 1.026565432548523,grad_norm: 0.9999990519281888, iteration: 82047
loss: 0.9807920455932617,grad_norm: 0.9948179430103625, iteration: 82048
loss: 1.0108184814453125,grad_norm: 0.9999990132274638, iteration: 82049
loss: 0.9830187559127808,grad_norm: 0.9654458012395255, iteration: 82050
loss: 0.9637594223022461,grad_norm: 0.9999992028340623, iteration: 82051
loss: 1.014520525932312,grad_norm: 0.9334892769993252, iteration: 82052
loss: 1.000930905342102,grad_norm: 0.8950411227772987, iteration: 82053
loss: 1.0009775161743164,grad_norm: 0.9999990847221416, iteration: 82054
loss: 0.9746586084365845,grad_norm: 0.9877592719992138, iteration: 82055
loss: 0.9930465221405029,grad_norm: 0.9921158033177193, iteration: 82056
loss: 1.0112203359603882,grad_norm: 0.942499207788458, iteration: 82057
loss: 1.0171639919281006,grad_norm: 0.9999990789487946, iteration: 82058
loss: 0.9789065718650818,grad_norm: 0.9999989884990361, iteration: 82059
loss: 0.96597820520401,grad_norm: 0.9999992118837956, iteration: 82060
loss: 0.9724663496017456,grad_norm: 0.9999991515438638, iteration: 82061
loss: 0.978948175907135,grad_norm: 0.9999992191369046, iteration: 82062
loss: 0.9682089686393738,grad_norm: 0.9999992816020747, iteration: 82063
loss: 0.9567318558692932,grad_norm: 0.9999990716771467, iteration: 82064
loss: 0.9780155420303345,grad_norm: 0.977005171920271, iteration: 82065
loss: 0.9859883785247803,grad_norm: 0.999998846912313, iteration: 82066
loss: 1.0279619693756104,grad_norm: 0.9999990033161614, iteration: 82067
loss: 0.9701365232467651,grad_norm: 0.9999991870929268, iteration: 82068
loss: 1.0538078546524048,grad_norm: 0.9999997077090904, iteration: 82069
loss: 0.992539644241333,grad_norm: 0.9999990942803995, iteration: 82070
loss: 1.031625509262085,grad_norm: 0.9999992005600521, iteration: 82071
loss: 1.0351027250289917,grad_norm: 0.9999992231992414, iteration: 82072
loss: 0.9607216119766235,grad_norm: 0.9999992984423867, iteration: 82073
loss: 0.9701306223869324,grad_norm: 0.9999990880722301, iteration: 82074
loss: 0.9890620708465576,grad_norm: 0.999999062099787, iteration: 82075
loss: 1.0252361297607422,grad_norm: 0.9999998070533354, iteration: 82076
loss: 1.0027167797088623,grad_norm: 0.9999990223695041, iteration: 82077
loss: 1.019965410232544,grad_norm: 0.9999990559394459, iteration: 82078
loss: 0.9688827395439148,grad_norm: 0.9999991632679363, iteration: 82079
loss: 0.9491865634918213,grad_norm: 0.8957443750623165, iteration: 82080
loss: 1.0069124698638916,grad_norm: 0.9999990668784751, iteration: 82081
loss: 1.0072945356369019,grad_norm: 0.9999994441605089, iteration: 82082
loss: 0.9974265694618225,grad_norm: 0.9999991598713905, iteration: 82083
loss: 1.0294156074523926,grad_norm: 0.9999990796779709, iteration: 82084
loss: 0.9958763122558594,grad_norm: 0.9999992051516143, iteration: 82085
loss: 0.9809305667877197,grad_norm: 0.9361691401683558, iteration: 82086
loss: 0.987846314907074,grad_norm: 0.9290157158522466, iteration: 82087
loss: 1.0122592449188232,grad_norm: 0.9999993132247753, iteration: 82088
loss: 1.0254911184310913,grad_norm: 0.9999991289598613, iteration: 82089
loss: 1.0094939470291138,grad_norm: 0.989155973007501, iteration: 82090
loss: 0.9856144189834595,grad_norm: 0.9859972945011262, iteration: 82091
loss: 0.9848388433456421,grad_norm: 0.9999991972607465, iteration: 82092
loss: 1.038953185081482,grad_norm: 0.9999989354329331, iteration: 82093
loss: 1.0263426303863525,grad_norm: 0.9999991366184563, iteration: 82094
loss: 1.013257622718811,grad_norm: 0.9999990592948318, iteration: 82095
loss: 1.0084375143051147,grad_norm: 0.8540530645326257, iteration: 82096
loss: 0.9843840599060059,grad_norm: 0.9999990330240067, iteration: 82097
loss: 1.0094196796417236,grad_norm: 0.9583997982846867, iteration: 82098
loss: 0.9850601553916931,grad_norm: 0.9884280068287215, iteration: 82099
loss: 0.9708242416381836,grad_norm: 0.959987518754034, iteration: 82100
loss: 0.9997839331626892,grad_norm: 0.998245215339849, iteration: 82101
loss: 0.9988824129104614,grad_norm: 0.9999991678851323, iteration: 82102
loss: 1.0177863836288452,grad_norm: 0.9836210739880453, iteration: 82103
loss: 0.9972501397132874,grad_norm: 0.9999990129147304, iteration: 82104
loss: 0.9951499104499817,grad_norm: 0.9999991379121698, iteration: 82105
loss: 1.0295745134353638,grad_norm: 0.9999991022747746, iteration: 82106
loss: 0.9961259961128235,grad_norm: 0.9999989981060511, iteration: 82107
loss: 1.009067416191101,grad_norm: 0.9644555560048732, iteration: 82108
loss: 0.9869333505630493,grad_norm: 0.9999992142568471, iteration: 82109
loss: 0.9832255244255066,grad_norm: 0.9881506739346237, iteration: 82110
loss: 0.991043746471405,grad_norm: 0.9999990840376078, iteration: 82111
loss: 1.0154627561569214,grad_norm: 0.9999993035192708, iteration: 82112
loss: 1.0235769748687744,grad_norm: 0.9999991474190161, iteration: 82113
loss: 1.0444241762161255,grad_norm: 0.9999993492684082, iteration: 82114
loss: 1.0199655294418335,grad_norm: 0.9999989743237585, iteration: 82115
loss: 0.9827482104301453,grad_norm: 0.9999992627682217, iteration: 82116
loss: 1.0047193765640259,grad_norm: 0.9806744879952279, iteration: 82117
loss: 1.003488302230835,grad_norm: 0.9999990216279018, iteration: 82118
loss: 1.009792685508728,grad_norm: 0.8508157424169313, iteration: 82119
loss: 1.0210891962051392,grad_norm: 0.9732129696411719, iteration: 82120
loss: 1.0145487785339355,grad_norm: 0.9999990698711131, iteration: 82121
loss: 1.0061408281326294,grad_norm: 0.999998971222043, iteration: 82122
loss: 0.9838323593139648,grad_norm: 0.9294881680030165, iteration: 82123
loss: 1.008363962173462,grad_norm: 0.999999129030277, iteration: 82124
loss: 0.9917013645172119,grad_norm: 0.999999447211021, iteration: 82125
loss: 1.0633219480514526,grad_norm: 0.9999994160069263, iteration: 82126
loss: 0.999637246131897,grad_norm: 0.9999990363484766, iteration: 82127
loss: 1.0210840702056885,grad_norm: 0.9999992625965684, iteration: 82128
loss: 0.9906790852546692,grad_norm: 0.9707348672651496, iteration: 82129
loss: 1.002622127532959,grad_norm: 0.9999992334470337, iteration: 82130
loss: 1.0581687688827515,grad_norm: 0.9999992109060805, iteration: 82131
loss: 0.9917904138565063,grad_norm: 0.9999991117971724, iteration: 82132
loss: 0.9841939806938171,grad_norm: 0.9207215651595486, iteration: 82133
loss: 0.9953274130821228,grad_norm: 0.9779035403625368, iteration: 82134
loss: 0.9747031331062317,grad_norm: 0.9999990357810065, iteration: 82135
loss: 0.9896490573883057,grad_norm: 0.8505724565171128, iteration: 82136
loss: 0.9901061058044434,grad_norm: 0.999999073529643, iteration: 82137
loss: 1.0183966159820557,grad_norm: 0.992395248458844, iteration: 82138
loss: 1.0018479824066162,grad_norm: 0.9999990411575277, iteration: 82139
loss: 0.9882158041000366,grad_norm: 0.9999990676586408, iteration: 82140
loss: 0.9943870306015015,grad_norm: 0.9951522801477964, iteration: 82141
loss: 0.9746848940849304,grad_norm: 0.9999990424496307, iteration: 82142
loss: 0.9976062774658203,grad_norm: 0.999999120217241, iteration: 82143
loss: 1.0073752403259277,grad_norm: 0.9306414588204116, iteration: 82144
loss: 0.9647336602210999,grad_norm: 0.9999992576368429, iteration: 82145
loss: 1.007394552230835,grad_norm: 0.9999990331560659, iteration: 82146
loss: 0.9664896130561829,grad_norm: 0.9999992991073614, iteration: 82147
loss: 0.9835575222969055,grad_norm: 0.9999991390163957, iteration: 82148
loss: 1.0066156387329102,grad_norm: 0.9999992292260439, iteration: 82149
loss: 0.9826067090034485,grad_norm: 0.9999991681704649, iteration: 82150
loss: 0.9617051482200623,grad_norm: 0.8776635508255491, iteration: 82151
loss: 0.9988048672676086,grad_norm: 0.9397544442926873, iteration: 82152
loss: 0.9496585130691528,grad_norm: 0.999999127687906, iteration: 82153
loss: 1.033508539199829,grad_norm: 0.8746274555574013, iteration: 82154
loss: 0.9830887913703918,grad_norm: 0.8663065100411129, iteration: 82155
loss: 1.0126700401306152,grad_norm: 0.9772104227038231, iteration: 82156
loss: 1.0375794172286987,grad_norm: 0.9999998830897497, iteration: 82157
loss: 1.0359643697738647,grad_norm: 0.905443819211645, iteration: 82158
loss: 1.0284606218338013,grad_norm: 0.9999996462114832, iteration: 82159
loss: 0.9872573018074036,grad_norm: 0.9999990775900532, iteration: 82160
loss: 0.9945474863052368,grad_norm: 0.9999989338298667, iteration: 82161
loss: 1.0318434238433838,grad_norm: 0.9999992243855154, iteration: 82162
loss: 0.9620122313499451,grad_norm: 0.9999990662568813, iteration: 82163
loss: 0.990719735622406,grad_norm: 0.9999991440189202, iteration: 82164
loss: 0.9749822020530701,grad_norm: 0.9999990380901597, iteration: 82165
loss: 1.0430994033813477,grad_norm: 0.9999991858750025, iteration: 82166
loss: 1.007610559463501,grad_norm: 0.9999992161609458, iteration: 82167
loss: 1.0359266996383667,grad_norm: 0.9999992433583899, iteration: 82168
loss: 0.9470927119255066,grad_norm: 0.9999990044398468, iteration: 82169
loss: 0.997685968875885,grad_norm: 0.999999317289872, iteration: 82170
loss: 1.044333815574646,grad_norm: 0.999999111369066, iteration: 82171
loss: 1.0278657674789429,grad_norm: 0.9999991738781414, iteration: 82172
loss: 1.026695728302002,grad_norm: 0.9384953828111969, iteration: 82173
loss: 0.9917854070663452,grad_norm: 0.9999990870364411, iteration: 82174
loss: 1.0204274654388428,grad_norm: 0.9999989531814216, iteration: 82175
loss: 1.0172460079193115,grad_norm: 0.9999991205178452, iteration: 82176
loss: 1.0019197463989258,grad_norm: 0.908776202855914, iteration: 82177
loss: 1.0165427923202515,grad_norm: 0.9083152066837956, iteration: 82178
loss: 0.9586284756660461,grad_norm: 0.9999990755376751, iteration: 82179
loss: 1.0276803970336914,grad_norm: 0.9277530233170033, iteration: 82180
loss: 0.9848068356513977,grad_norm: 0.9999993218277017, iteration: 82181
loss: 0.978752851486206,grad_norm: 0.9701675375341938, iteration: 82182
loss: 0.9940118789672852,grad_norm: 0.8965627852374345, iteration: 82183
loss: 0.9795568585395813,grad_norm: 0.955833520655233, iteration: 82184
loss: 0.9947701096534729,grad_norm: 0.9999990654756924, iteration: 82185
loss: 0.976866602897644,grad_norm: 0.9999991762356445, iteration: 82186
loss: 0.9906373620033264,grad_norm: 0.9999992383070748, iteration: 82187
loss: 1.0002738237380981,grad_norm: 0.8973316881952605, iteration: 82188
loss: 1.0034427642822266,grad_norm: 0.9999992429626839, iteration: 82189
loss: 0.9859045743942261,grad_norm: 0.9999990135431445, iteration: 82190
loss: 1.0020579099655151,grad_norm: 0.9066921757972135, iteration: 82191
loss: 1.010712742805481,grad_norm: 0.999999077658072, iteration: 82192
loss: 1.0194218158721924,grad_norm: 0.9999992121982454, iteration: 82193
loss: 1.0431004762649536,grad_norm: 0.9999989735505422, iteration: 82194
loss: 1.0060783624649048,grad_norm: 0.9999990722633028, iteration: 82195
loss: 0.9888750910758972,grad_norm: 0.894710335715015, iteration: 82196
loss: 0.9767556190490723,grad_norm: 0.9359964991632833, iteration: 82197
loss: 1.0289896726608276,grad_norm: 0.9999990955555808, iteration: 82198
loss: 0.9713924527168274,grad_norm: 0.9999992387846993, iteration: 82199
loss: 1.001186490058899,grad_norm: 0.8832228907717826, iteration: 82200
loss: 1.041944146156311,grad_norm: 0.9483018170132359, iteration: 82201
loss: 0.9636470079421997,grad_norm: 0.8920129898843279, iteration: 82202
loss: 1.0156627893447876,grad_norm: 0.9165406288888859, iteration: 82203
loss: 0.9950107336044312,grad_norm: 0.9581479848224779, iteration: 82204
loss: 1.0263458490371704,grad_norm: 0.9999991608458685, iteration: 82205
loss: 0.9628933668136597,grad_norm: 0.9999991999697359, iteration: 82206
loss: 0.9919266700744629,grad_norm: 0.9999990272300746, iteration: 82207
loss: 1.0247571468353271,grad_norm: 0.9999992441368755, iteration: 82208
loss: 0.9896721839904785,grad_norm: 0.9888826050707832, iteration: 82209
loss: 0.9670777320861816,grad_norm: 0.9869266636958022, iteration: 82210
loss: 0.9987157583236694,grad_norm: 0.9873089632091617, iteration: 82211
loss: 0.9911010265350342,grad_norm: 0.9532029690572873, iteration: 82212
loss: 0.9657036066055298,grad_norm: 0.9999993819113635, iteration: 82213
loss: 1.0187493562698364,grad_norm: 0.9999990580363967, iteration: 82214
loss: 1.019468903541565,grad_norm: 0.9999990618022715, iteration: 82215
loss: 0.9754728674888611,grad_norm: 0.9740660380471626, iteration: 82216
loss: 0.9802077412605286,grad_norm: 0.947940064459217, iteration: 82217
loss: 0.9850835800170898,grad_norm: 0.7793389848757547, iteration: 82218
loss: 1.0026684999465942,grad_norm: 0.9419201241327874, iteration: 82219
loss: 0.9553202986717224,grad_norm: 0.999999082174948, iteration: 82220
loss: 1.008547067642212,grad_norm: 0.9999991058806914, iteration: 82221
loss: 0.993627667427063,grad_norm: 0.9999990721470009, iteration: 82222
loss: 0.9906097054481506,grad_norm: 0.9871953742545752, iteration: 82223
loss: 0.9760840535163879,grad_norm: 0.9999992787120408, iteration: 82224
loss: 1.0045526027679443,grad_norm: 0.9999990851991041, iteration: 82225
loss: 1.0038307905197144,grad_norm: 0.9999991361523232, iteration: 82226
loss: 1.01065194606781,grad_norm: 0.9999992551672903, iteration: 82227
loss: 0.9726212620735168,grad_norm: 0.9516837030637614, iteration: 82228
loss: 1.0224385261535645,grad_norm: 0.999999123798527, iteration: 82229
loss: 0.9718528389930725,grad_norm: 0.9999990762121559, iteration: 82230
loss: 1.036145806312561,grad_norm: 0.9999990749405919, iteration: 82231
loss: 1.0179667472839355,grad_norm: 0.999999076763094, iteration: 82232
loss: 1.0040103197097778,grad_norm: 0.9999989703073096, iteration: 82233
loss: 1.0039448738098145,grad_norm: 0.9999992250374731, iteration: 82234
loss: 1.0010532140731812,grad_norm: 0.9999990813726872, iteration: 82235
loss: 0.9945173859596252,grad_norm: 0.9999997213238285, iteration: 82236
loss: 0.9812031984329224,grad_norm: 0.9999990340832958, iteration: 82237
loss: 1.017996072769165,grad_norm: 0.7997233277988384, iteration: 82238
loss: 1.021277904510498,grad_norm: 0.999999116717169, iteration: 82239
loss: 1.0638521909713745,grad_norm: 0.9957705378921203, iteration: 82240
loss: 1.001420021057129,grad_norm: 0.9999991213064913, iteration: 82241
loss: 1.0105388164520264,grad_norm: 0.9999990741608307, iteration: 82242
loss: 1.0201798677444458,grad_norm: 0.999999562073856, iteration: 82243
loss: 1.031803846359253,grad_norm: 0.9999991325987841, iteration: 82244
loss: 0.9924387335777283,grad_norm: 0.9816185635942869, iteration: 82245
loss: 1.0296530723571777,grad_norm: 0.9517668568616918, iteration: 82246
loss: 0.9668325781822205,grad_norm: 0.9999990723719372, iteration: 82247
loss: 0.9853086471557617,grad_norm: 0.9418410310368399, iteration: 82248
loss: 1.0109875202178955,grad_norm: 0.9631878679592857, iteration: 82249
loss: 0.9578765034675598,grad_norm: 0.9999988526313356, iteration: 82250
loss: 1.0045720338821411,grad_norm: 0.9999995546877352, iteration: 82251
loss: 1.0390592813491821,grad_norm: 0.9999993334699435, iteration: 82252
loss: 0.9916255474090576,grad_norm: 0.999999090772404, iteration: 82253
loss: 0.9924494624137878,grad_norm: 0.9999991716158614, iteration: 82254
loss: 0.9988590478897095,grad_norm: 0.8600726433035439, iteration: 82255
loss: 1.01985502243042,grad_norm: 0.94775789205987, iteration: 82256
loss: 0.9866547584533691,grad_norm: 0.9999991899784207, iteration: 82257
loss: 0.9958916306495667,grad_norm: 0.9999992347201516, iteration: 82258
loss: 0.962569534778595,grad_norm: 0.9979867175558137, iteration: 82259
loss: 1.0461971759796143,grad_norm: 0.9999993979444539, iteration: 82260
loss: 0.9933083653450012,grad_norm: 0.9358721036485218, iteration: 82261
loss: 1.0208626985549927,grad_norm: 0.9999991680682031, iteration: 82262
loss: 1.0332255363464355,grad_norm: 0.999999239793054, iteration: 82263
loss: 0.96539306640625,grad_norm: 0.9999990508665528, iteration: 82264
loss: 0.9924048185348511,grad_norm: 0.9999991681146357, iteration: 82265
loss: 0.9963263869285583,grad_norm: 0.9999989821546466, iteration: 82266
loss: 1.0327869653701782,grad_norm: 0.8413620475247626, iteration: 82267
loss: 0.992949903011322,grad_norm: 0.9999990776374665, iteration: 82268
loss: 1.0117213726043701,grad_norm: 0.999999286570945, iteration: 82269
loss: 1.0094255208969116,grad_norm: 0.9999991795652857, iteration: 82270
loss: 1.0154640674591064,grad_norm: 0.9999991570316612, iteration: 82271
loss: 0.9887090921401978,grad_norm: 0.9999992818064913, iteration: 82272
loss: 0.9966585040092468,grad_norm: 0.9999993029981794, iteration: 82273
loss: 0.995637834072113,grad_norm: 0.9724313542755295, iteration: 82274
loss: 0.9906497597694397,grad_norm: 0.999999092218925, iteration: 82275
loss: 1.0020291805267334,grad_norm: 0.9999991830769582, iteration: 82276
loss: 1.0051981210708618,grad_norm: 0.9999994053181438, iteration: 82277
loss: 1.0053691864013672,grad_norm: 0.9999993002908609, iteration: 82278
loss: 1.0145937204360962,grad_norm: 0.965736105471048, iteration: 82279
loss: 0.9870293140411377,grad_norm: 0.9754714746279588, iteration: 82280
loss: 0.9770227074623108,grad_norm: 0.9765210132477911, iteration: 82281
loss: 0.9779095649719238,grad_norm: 0.999999223021369, iteration: 82282
loss: 1.0156912803649902,grad_norm: 0.9584683134269665, iteration: 82283
loss: 0.9874400496482849,grad_norm: 0.9999991986015087, iteration: 82284
loss: 1.0143790245056152,grad_norm: 0.9873302807541369, iteration: 82285
loss: 1.0040249824523926,grad_norm: 0.9264672415231132, iteration: 82286
loss: 0.9782971143722534,grad_norm: 0.9587545043998116, iteration: 82287
loss: 0.9804196357727051,grad_norm: 0.9999989439336401, iteration: 82288
loss: 1.023438811302185,grad_norm: 0.9999991502118808, iteration: 82289
loss: 1.0051735639572144,grad_norm: 0.9999991767608791, iteration: 82290
loss: 1.0150341987609863,grad_norm: 0.8643585343320856, iteration: 82291
loss: 1.0376979112625122,grad_norm: 0.9999992607138147, iteration: 82292
loss: 1.0146209001541138,grad_norm: 0.9465030749530225, iteration: 82293
loss: 1.0246502161026,grad_norm: 0.9999996921136665, iteration: 82294
loss: 0.9904802441596985,grad_norm: 0.9999991351230565, iteration: 82295
loss: 1.0443556308746338,grad_norm: 0.9999994248861362, iteration: 82296
loss: 1.0199836492538452,grad_norm: 0.9999991109528465, iteration: 82297
loss: 0.9616875052452087,grad_norm: 0.9999990802588884, iteration: 82298
loss: 1.0297021865844727,grad_norm: 0.9999990740036869, iteration: 82299
loss: 1.0169378519058228,grad_norm: 0.9999991420740233, iteration: 82300
loss: 0.9758293628692627,grad_norm: 0.9999990202189872, iteration: 82301
loss: 1.0259305238723755,grad_norm: 0.9999992409261148, iteration: 82302
loss: 0.9817357659339905,grad_norm: 0.9668056273114802, iteration: 82303
loss: 1.0262548923492432,grad_norm: 0.9999989641657644, iteration: 82304
loss: 0.9695111513137817,grad_norm: 0.9999990358272107, iteration: 82305
loss: 0.9859462380409241,grad_norm: 0.9808251644957311, iteration: 82306
loss: 0.9937138557434082,grad_norm: 0.7746971387187616, iteration: 82307
loss: 1.0767488479614258,grad_norm: 0.9999992994584924, iteration: 82308
loss: 0.9860519766807556,grad_norm: 0.9999993141315112, iteration: 82309
loss: 0.992179274559021,grad_norm: 0.9999991620384068, iteration: 82310
loss: 1.0311030149459839,grad_norm: 0.999999241280551, iteration: 82311
loss: 0.979997992515564,grad_norm: 0.9813114524281241, iteration: 82312
loss: 1.015152931213379,grad_norm: 0.9999991803648054, iteration: 82313
loss: 0.9920164942741394,grad_norm: 0.889058594880785, iteration: 82314
loss: 1.0124326944351196,grad_norm: 0.8607311214323421, iteration: 82315
loss: 0.9569611549377441,grad_norm: 0.9999990929322613, iteration: 82316
loss: 1.0135976076126099,grad_norm: 0.9967225582951144, iteration: 82317
loss: 1.002220869064331,grad_norm: 0.9420588036103276, iteration: 82318
loss: 1.0382213592529297,grad_norm: 0.9999994376652908, iteration: 82319
loss: 0.9769331216812134,grad_norm: 0.999999210680892, iteration: 82320
loss: 1.036974310874939,grad_norm: 0.9999995304848787, iteration: 82321
loss: 0.9687002897262573,grad_norm: 0.9515475057549905, iteration: 82322
loss: 1.007127046585083,grad_norm: 0.9999990489318997, iteration: 82323
loss: 1.041587471961975,grad_norm: 0.9999992346724127, iteration: 82324
loss: 0.9860643744468689,grad_norm: 0.9330659908588306, iteration: 82325
loss: 1.0346852540969849,grad_norm: 0.9761130430194923, iteration: 82326
loss: 0.9726170897483826,grad_norm: 0.9999991321548093, iteration: 82327
loss: 0.9782793521881104,grad_norm: 0.9921742918357334, iteration: 82328
loss: 0.9947484135627747,grad_norm: 0.9859920611525308, iteration: 82329
loss: 1.0077728033065796,grad_norm: 0.999999059048665, iteration: 82330
loss: 1.0017919540405273,grad_norm: 0.9999991239873043, iteration: 82331
loss: 0.9881532192230225,grad_norm: 0.9877819668797037, iteration: 82332
loss: 1.0222550630569458,grad_norm: 0.9551656809308917, iteration: 82333
loss: 1.0091630220413208,grad_norm: 0.9999990526224625, iteration: 82334
loss: 0.9787406325340271,grad_norm: 0.9999991384920708, iteration: 82335
loss: 0.9882774353027344,grad_norm: 0.9999991624847085, iteration: 82336
loss: 1.0573824644088745,grad_norm: 0.9999996813645035, iteration: 82337
loss: 1.0005546808242798,grad_norm: 0.999999246529156, iteration: 82338
loss: 0.9839661717414856,grad_norm: 0.9999991152318088, iteration: 82339
loss: 1.0115923881530762,grad_norm: 0.9447936679864197, iteration: 82340
loss: 1.037435531616211,grad_norm: 0.9758445392461615, iteration: 82341
loss: 1.0187517404556274,grad_norm: 0.9999999068949804, iteration: 82342
loss: 1.0180020332336426,grad_norm: 0.9999997515471523, iteration: 82343
loss: 1.009355902671814,grad_norm: 0.9999992065468665, iteration: 82344
loss: 0.9876296520233154,grad_norm: 0.9612908823787591, iteration: 82345
loss: 1.0042424201965332,grad_norm: 0.9999991934605627, iteration: 82346
loss: 0.9913989901542664,grad_norm: 0.9882370749235783, iteration: 82347
loss: 0.9690229892730713,grad_norm: 0.9999991955372804, iteration: 82348
loss: 1.0163737535476685,grad_norm: 0.9999991356778444, iteration: 82349
loss: 1.009650468826294,grad_norm: 0.9508375985470475, iteration: 82350
loss: 1.0411713123321533,grad_norm: 0.9999996626568115, iteration: 82351
loss: 1.053668737411499,grad_norm: 0.9999990871458333, iteration: 82352
loss: 0.9988862872123718,grad_norm: 0.9999991825485148, iteration: 82353
loss: 1.0208977460861206,grad_norm: 0.9999991223478358, iteration: 82354
loss: 0.9981677532196045,grad_norm: 0.8840555438858252, iteration: 82355
loss: 0.9763565063476562,grad_norm: 0.9999990385960192, iteration: 82356
loss: 0.9862667322158813,grad_norm: 0.9999990710956211, iteration: 82357
loss: 1.0139553546905518,grad_norm: 0.9999990913376063, iteration: 82358
loss: 1.0293718576431274,grad_norm: 0.91565611528811, iteration: 82359
loss: 1.0095444917678833,grad_norm: 0.9999991160591872, iteration: 82360
loss: 0.967470109462738,grad_norm: 0.9683446329220406, iteration: 82361
loss: 1.028706431388855,grad_norm: 0.9999994964664589, iteration: 82362
loss: 0.9946050047874451,grad_norm: 0.9999998212237866, iteration: 82363
loss: 1.036156415939331,grad_norm: 0.9999991406369014, iteration: 82364
loss: 1.0084186792373657,grad_norm: 0.9999991350023408, iteration: 82365
loss: 0.9771267175674438,grad_norm: 0.8973298514317385, iteration: 82366
loss: 1.0601695775985718,grad_norm: 0.9999996531446962, iteration: 82367
loss: 0.9988752007484436,grad_norm: 0.878809740216668, iteration: 82368
loss: 0.9776296019554138,grad_norm: 0.9999991672605218, iteration: 82369
loss: 0.9706704020500183,grad_norm: 0.9999996881332853, iteration: 82370
loss: 0.9981535077095032,grad_norm: 0.9469537520756801, iteration: 82371
loss: 0.9919552803039551,grad_norm: 0.9917154372050138, iteration: 82372
loss: 1.0318635702133179,grad_norm: 0.9611468787852062, iteration: 82373
loss: 1.0099388360977173,grad_norm: 0.9999991272646643, iteration: 82374
loss: 1.0596696138381958,grad_norm: 0.9064201275767129, iteration: 82375
loss: 0.9828603863716125,grad_norm: 0.9999995167787051, iteration: 82376
loss: 0.9791707992553711,grad_norm: 0.999999141846429, iteration: 82377
loss: 1.0928523540496826,grad_norm: 0.9999997251585958, iteration: 82378
loss: 1.0205488204956055,grad_norm: 0.9999993168382658, iteration: 82379
loss: 1.0033142566680908,grad_norm: 0.9999991993022191, iteration: 82380
loss: 0.9863930344581604,grad_norm: 0.9999992555778661, iteration: 82381
loss: 1.0006531476974487,grad_norm: 0.9999991239740075, iteration: 82382
loss: 1.0214368104934692,grad_norm: 0.9999990693927192, iteration: 82383
loss: 1.072880506515503,grad_norm: 0.9999994241033676, iteration: 82384
loss: 1.0460457801818848,grad_norm: 0.9999991976872344, iteration: 82385
loss: 0.9959785342216492,grad_norm: 0.9999991316496941, iteration: 82386
loss: 1.0035287141799927,grad_norm: 0.999999101849587, iteration: 82387
loss: 1.0058507919311523,grad_norm: 0.999999338013497, iteration: 82388
loss: 1.0306026935577393,grad_norm: 0.9999990483606098, iteration: 82389
loss: 1.0522723197937012,grad_norm: 0.9999991600529621, iteration: 82390
loss: 0.9826456308364868,grad_norm: 0.9999993327621353, iteration: 82391
loss: 1.0188924074172974,grad_norm: 0.9999991625748699, iteration: 82392
loss: 1.0445291996002197,grad_norm: 0.9999997433654025, iteration: 82393
loss: 1.0562936067581177,grad_norm: 0.9999994565638681, iteration: 82394
loss: 0.9620663523674011,grad_norm: 0.9999994363115674, iteration: 82395
loss: 0.9908247590065002,grad_norm: 0.9999992682864848, iteration: 82396
loss: 1.005822777748108,grad_norm: 0.9999990292010846, iteration: 82397
loss: 1.0068291425704956,grad_norm: 0.9999990706056894, iteration: 82398
loss: 1.0219539403915405,grad_norm: 0.9999991446316792, iteration: 82399
loss: 0.9982461333274841,grad_norm: 0.8928331671562353, iteration: 82400
loss: 1.0151230096817017,grad_norm: 0.9718059427844118, iteration: 82401
loss: 0.9983369708061218,grad_norm: 0.9999992194495336, iteration: 82402
loss: 1.0086623430252075,grad_norm: 0.8473401466000187, iteration: 82403
loss: 0.9551348090171814,grad_norm: 0.9965608648821528, iteration: 82404
loss: 1.0013082027435303,grad_norm: 0.9719993416667074, iteration: 82405
loss: 1.0030934810638428,grad_norm: 0.937338932084419, iteration: 82406
loss: 0.9695596098899841,grad_norm: 0.9999991987269568, iteration: 82407
loss: 0.9806750416755676,grad_norm: 0.9995131419426311, iteration: 82408
loss: 1.072400450706482,grad_norm: 1.0000000198000203, iteration: 82409
loss: 0.9698774814605713,grad_norm: 0.9999990703990028, iteration: 82410
loss: 1.0247043371200562,grad_norm: 0.999999144622937, iteration: 82411
loss: 1.0389031171798706,grad_norm: 0.9999993691788118, iteration: 82412
loss: 1.023727536201477,grad_norm: 0.9844273170691126, iteration: 82413
loss: 1.013447642326355,grad_norm: 0.9999992144918807, iteration: 82414
loss: 1.0179767608642578,grad_norm: 0.8142370551352465, iteration: 82415
loss: 0.9677078127861023,grad_norm: 0.9999989459355973, iteration: 82416
loss: 1.0224937200546265,grad_norm: 0.8266191259360383, iteration: 82417
loss: 0.9686260223388672,grad_norm: 0.999999038246517, iteration: 82418
loss: 1.0534385442733765,grad_norm: 0.9999998756661254, iteration: 82419
loss: 0.9860407114028931,grad_norm: 0.9999991280335582, iteration: 82420
loss: 1.0128446817398071,grad_norm: 0.9839950005040521, iteration: 82421
loss: 1.0885380506515503,grad_norm: 0.999999860410683, iteration: 82422
loss: 1.0242000818252563,grad_norm: 0.9999991074698209, iteration: 82423
loss: 1.0461217164993286,grad_norm: 0.99999931075109, iteration: 82424
loss: 0.988863468170166,grad_norm: 0.9999989369574055, iteration: 82425
loss: 1.0005857944488525,grad_norm: 0.9999991425856721, iteration: 82426
loss: 0.9861029386520386,grad_norm: 0.9999991970512343, iteration: 82427
loss: 0.9963915348052979,grad_norm: 0.9999991172657521, iteration: 82428
loss: 1.011267900466919,grad_norm: 0.9999992528817218, iteration: 82429
loss: 0.9956609010696411,grad_norm: 0.969734434634616, iteration: 82430
loss: 1.0192748308181763,grad_norm: 0.9352025925297373, iteration: 82431
loss: 1.0331733226776123,grad_norm: 0.9999993471633889, iteration: 82432
loss: 0.9764407873153687,grad_norm: 0.9999991348691027, iteration: 82433
loss: 0.9954549074172974,grad_norm: 0.9999990787514833, iteration: 82434
loss: 1.0200406312942505,grad_norm: 0.9999993314159964, iteration: 82435
loss: 0.9919009804725647,grad_norm: 0.9647470974280797, iteration: 82436
loss: 1.0391323566436768,grad_norm: 0.9999991507884051, iteration: 82437
loss: 0.9711440801620483,grad_norm: 0.9999991403917147, iteration: 82438
loss: 1.0426799058914185,grad_norm: 0.999999040601271, iteration: 82439
loss: 1.0009249448776245,grad_norm: 0.9999991712446595, iteration: 82440
loss: 0.9782826900482178,grad_norm: 0.9999991702388099, iteration: 82441
loss: 0.9708430171012878,grad_norm: 0.9408784147902282, iteration: 82442
loss: 1.0075411796569824,grad_norm: 0.9999992610759083, iteration: 82443
loss: 1.0237534046173096,grad_norm: 0.999999395412907, iteration: 82444
loss: 1.0101802349090576,grad_norm: 0.9879197331682135, iteration: 82445
loss: 1.0484551191329956,grad_norm: 0.9999990457880626, iteration: 82446
loss: 0.9666458368301392,grad_norm: 0.9556339722159223, iteration: 82447
loss: 0.9962839484214783,grad_norm: 0.9999991686127343, iteration: 82448
loss: 1.002923846244812,grad_norm: 0.999999188367685, iteration: 82449
loss: 0.9954131841659546,grad_norm: 0.9999991582500614, iteration: 82450
loss: 1.0305373668670654,grad_norm: 0.9999990490184261, iteration: 82451
loss: 0.9660208821296692,grad_norm: 0.9999991677960862, iteration: 82452
loss: 1.007728099822998,grad_norm: 0.9999992427180018, iteration: 82453
loss: 1.0051190853118896,grad_norm: 0.9999990171561166, iteration: 82454
loss: 0.9836471080780029,grad_norm: 0.9999989969655138, iteration: 82455
loss: 1.0120350122451782,grad_norm: 0.9999992312399563, iteration: 82456
loss: 0.9763885140419006,grad_norm: 0.9546324182306934, iteration: 82457
loss: 0.9863945245742798,grad_norm: 0.94618641320776, iteration: 82458
loss: 0.9729373455047607,grad_norm: 0.9999991818345791, iteration: 82459
loss: 0.992727518081665,grad_norm: 0.9194068652538895, iteration: 82460
loss: 1.0399518013000488,grad_norm: 0.9999993270416698, iteration: 82461
loss: 1.0099201202392578,grad_norm: 0.9999991267675321, iteration: 82462
loss: 1.0096925497055054,grad_norm: 0.8897143821915741, iteration: 82463
loss: 0.9994229674339294,grad_norm: 0.9395878642241282, iteration: 82464
loss: 0.9741518497467041,grad_norm: 0.9999990786404209, iteration: 82465
loss: 1.0219727754592896,grad_norm: 0.9581175430535368, iteration: 82466
loss: 0.9885282516479492,grad_norm: 0.9712951542157853, iteration: 82467
loss: 1.0011861324310303,grad_norm: 0.8282208977397402, iteration: 82468
loss: 1.0160114765167236,grad_norm: 0.9538073634876569, iteration: 82469
loss: 1.0108296871185303,grad_norm: 0.9999991054746339, iteration: 82470
loss: 1.0093334913253784,grad_norm: 0.9999991939942219, iteration: 82471
loss: 0.9963433742523193,grad_norm: 0.9999992117562998, iteration: 82472
loss: 0.9852294921875,grad_norm: 0.9287978764430356, iteration: 82473
loss: 1.02024507522583,grad_norm: 0.9999993416681969, iteration: 82474
loss: 0.9594796895980835,grad_norm: 0.9185333663285037, iteration: 82475
loss: 0.995119571685791,grad_norm: 0.9999998020749942, iteration: 82476
loss: 0.9968768358230591,grad_norm: 0.9999992670796993, iteration: 82477
loss: 0.9782991409301758,grad_norm: 0.7873550134587062, iteration: 82478
loss: 1.0090912580490112,grad_norm: 0.9457414593639842, iteration: 82479
loss: 0.9635109901428223,grad_norm: 0.9216687090658409, iteration: 82480
loss: 1.0020753145217896,grad_norm: 0.9953528209753801, iteration: 82481
loss: 0.9924083948135376,grad_norm: 0.9999991579509568, iteration: 82482
loss: 1.0236903429031372,grad_norm: 0.9715440803546671, iteration: 82483
loss: 1.015158772468567,grad_norm: 0.9999990045469315, iteration: 82484
loss: 1.0089869499206543,grad_norm: 0.9982831037791001, iteration: 82485
loss: 0.9756212830543518,grad_norm: 0.9999992841449734, iteration: 82486
loss: 1.0264629125595093,grad_norm: 0.999999194548987, iteration: 82487
loss: 1.0313467979431152,grad_norm: 0.999999183492153, iteration: 82488
loss: 1.0181249380111694,grad_norm: 0.999999720726416, iteration: 82489
loss: 1.011199712753296,grad_norm: 0.878332979156443, iteration: 82490
loss: 0.9991828203201294,grad_norm: 0.9999991324698186, iteration: 82491
loss: 0.97199946641922,grad_norm: 0.7961287001753634, iteration: 82492
loss: 1.018381118774414,grad_norm: 0.999999009001089, iteration: 82493
loss: 1.031957745552063,grad_norm: 0.999999327998154, iteration: 82494
loss: 0.9945400357246399,grad_norm: 0.9959082604921934, iteration: 82495
loss: 0.9802472591400146,grad_norm: 0.99999906446708, iteration: 82496
loss: 0.9824467897415161,grad_norm: 0.9999992743719196, iteration: 82497
loss: 0.983664870262146,grad_norm: 0.9999991265236151, iteration: 82498
loss: 0.9634579420089722,grad_norm: 0.9999990612671358, iteration: 82499
loss: 1.0217796564102173,grad_norm: 0.9999991329028572, iteration: 82500
loss: 1.0221595764160156,grad_norm: 0.9999993420538978, iteration: 82501
loss: 0.9820287227630615,grad_norm: 0.9999990689791859, iteration: 82502
loss: 0.9773441553115845,grad_norm: 0.9999989994595768, iteration: 82503
loss: 0.9847479462623596,grad_norm: 0.9147832512324969, iteration: 82504
loss: 0.9972143769264221,grad_norm: 0.9999991344477681, iteration: 82505
loss: 1.0035382509231567,grad_norm: 0.9999989291790937, iteration: 82506
loss: 0.9999361634254456,grad_norm: 0.9999991944198054, iteration: 82507
loss: 0.9891117811203003,grad_norm: 0.9928623771270241, iteration: 82508
loss: 0.9947662949562073,grad_norm: 0.9999991543874017, iteration: 82509
loss: 0.9987491369247437,grad_norm: 0.9999992306262174, iteration: 82510
loss: 0.9996876120567322,grad_norm: 0.9890625444570055, iteration: 82511
loss: 0.9668459296226501,grad_norm: 0.9999991905129068, iteration: 82512
loss: 0.9766821265220642,grad_norm: 0.9617939434622811, iteration: 82513
loss: 1.0229085683822632,grad_norm: 0.9999989169193637, iteration: 82514
loss: 0.9582962393760681,grad_norm: 0.9334565256686028, iteration: 82515
loss: 0.9608781933784485,grad_norm: 0.8751698379097517, iteration: 82516
loss: 1.0116573572158813,grad_norm: 0.9999989967534886, iteration: 82517
loss: 0.99924236536026,grad_norm: 0.9999990955890815, iteration: 82518
loss: 0.9820351004600525,grad_norm: 0.9999992222102574, iteration: 82519
loss: 1.0185831785202026,grad_norm: 0.9999990895323693, iteration: 82520
loss: 1.021335244178772,grad_norm: 0.9435641882686622, iteration: 82521
loss: 1.0056815147399902,grad_norm: 0.9999991982288874, iteration: 82522
loss: 0.9401204586029053,grad_norm: 0.9877620583974136, iteration: 82523
loss: 1.0086510181427002,grad_norm: 0.9999991557321598, iteration: 82524
loss: 1.0001720190048218,grad_norm: 0.9999991865587655, iteration: 82525
loss: 0.982563316822052,grad_norm: 0.9999992163721164, iteration: 82526
loss: 0.9969019889831543,grad_norm: 0.9999991495206887, iteration: 82527
loss: 0.9857595562934875,grad_norm: 0.9999991516252307, iteration: 82528
loss: 0.9911244511604309,grad_norm: 0.9667971368643391, iteration: 82529
loss: 0.9937499761581421,grad_norm: 0.9894172904518175, iteration: 82530
loss: 1.0292078256607056,grad_norm: 0.9502545578541782, iteration: 82531
loss: 1.0306274890899658,grad_norm: 0.9142004284123159, iteration: 82532
loss: 1.006723403930664,grad_norm: 0.9869596162893397, iteration: 82533
loss: 1.0051954984664917,grad_norm: 0.998239804917312, iteration: 82534
loss: 0.9771439433097839,grad_norm: 0.9999990365643272, iteration: 82535
loss: 0.9955050349235535,grad_norm: 0.9630045566919974, iteration: 82536
loss: 0.9805572628974915,grad_norm: 0.9629024130266577, iteration: 82537
loss: 0.9638283252716064,grad_norm: 0.9999993531237579, iteration: 82538
loss: 1.048633337020874,grad_norm: 0.9999993542456775, iteration: 82539
loss: 1.0315289497375488,grad_norm: 0.9999990867077851, iteration: 82540
loss: 0.9708731174468994,grad_norm: 0.9999991530588833, iteration: 82541
loss: 1.0304471254348755,grad_norm: 0.9143802863262463, iteration: 82542
loss: 0.9749118089675903,grad_norm: 0.999999097738765, iteration: 82543
loss: 0.9884214401245117,grad_norm: 0.9999990838189402, iteration: 82544
loss: 0.9983164072036743,grad_norm: 0.9720056699100987, iteration: 82545
loss: 0.9749994277954102,grad_norm: 0.9999991894666884, iteration: 82546
loss: 1.0261565446853638,grad_norm: 0.999999013142053, iteration: 82547
loss: 1.0127959251403809,grad_norm: 0.9999989487115466, iteration: 82548
loss: 0.9901173114776611,grad_norm: 0.9999989414930507, iteration: 82549
loss: 0.9976232051849365,grad_norm: 0.872076491860651, iteration: 82550
loss: 1.0162343978881836,grad_norm: 0.9999988954951915, iteration: 82551
loss: 1.0142767429351807,grad_norm: 0.9999991378171649, iteration: 82552
loss: 0.9808945059776306,grad_norm: 0.999999082944999, iteration: 82553
loss: 1.0103826522827148,grad_norm: 0.9999990419910632, iteration: 82554
loss: 1.0107265710830688,grad_norm: 0.9999991738964457, iteration: 82555
loss: 1.013495922088623,grad_norm: 0.999999171503625, iteration: 82556
loss: 0.98016357421875,grad_norm: 0.9999991718421035, iteration: 82557
loss: 1.0472466945648193,grad_norm: 0.9999992645557184, iteration: 82558
loss: 1.0811550617218018,grad_norm: 0.9999992796118736, iteration: 82559
loss: 1.0056190490722656,grad_norm: 0.9999991305996629, iteration: 82560
loss: 1.0105748176574707,grad_norm: 0.8736091867570622, iteration: 82561
loss: 0.9966111183166504,grad_norm: 0.842134109730756, iteration: 82562
loss: 1.0477380752563477,grad_norm: 0.9999991114953116, iteration: 82563
loss: 0.9869560599327087,grad_norm: 0.9999991201634046, iteration: 82564
loss: 1.0416656732559204,grad_norm: 0.9841039993040396, iteration: 82565
loss: 0.9701793193817139,grad_norm: 0.9999991337802426, iteration: 82566
loss: 0.9863662123680115,grad_norm: 0.9999991558407426, iteration: 82567
loss: 0.9926627278327942,grad_norm: 0.9999993135595704, iteration: 82568
loss: 1.0188798904418945,grad_norm: 0.851598526947104, iteration: 82569
loss: 0.985505998134613,grad_norm: 0.9999992429515514, iteration: 82570
loss: 1.0272958278656006,grad_norm: 0.9896645668292822, iteration: 82571
loss: 0.9981899261474609,grad_norm: 0.9999989709970061, iteration: 82572
loss: 1.003831148147583,grad_norm: 0.9999990867556476, iteration: 82573
loss: 0.9943980574607849,grad_norm: 0.9999989677838564, iteration: 82574
loss: 1.0292630195617676,grad_norm: 0.999999096393063, iteration: 82575
loss: 0.98924320936203,grad_norm: 0.9999990432304484, iteration: 82576
loss: 1.0604137182235718,grad_norm: 0.9999992210339915, iteration: 82577
loss: 1.0037752389907837,grad_norm: 0.9999995490599171, iteration: 82578
loss: 0.9986042976379395,grad_norm: 0.9999990837362192, iteration: 82579
loss: 1.020857810974121,grad_norm: 0.9999990407107654, iteration: 82580
loss: 1.0473225116729736,grad_norm: 0.9166073532097531, iteration: 82581
loss: 1.0177972316741943,grad_norm: 0.9999994603454623, iteration: 82582
loss: 0.9860855340957642,grad_norm: 0.8686177230810206, iteration: 82583
loss: 0.985541582107544,grad_norm: 0.999999070599149, iteration: 82584
loss: 1.0051738023757935,grad_norm: 0.8983236824022542, iteration: 82585
loss: 1.0028537511825562,grad_norm: 0.8757397807154839, iteration: 82586
loss: 0.9978551864624023,grad_norm: 0.9999991030673828, iteration: 82587
loss: 1.0519452095031738,grad_norm: 0.9999991262007683, iteration: 82588
loss: 0.9854186177253723,grad_norm: 0.9999991264869384, iteration: 82589
loss: 0.9949887990951538,grad_norm: 0.9324560905944326, iteration: 82590
loss: 0.9980451464653015,grad_norm: 0.9999992385908429, iteration: 82591
loss: 1.0024694204330444,grad_norm: 0.9999997451095967, iteration: 82592
loss: 1.0394539833068848,grad_norm: 0.9999991276396235, iteration: 82593
loss: 1.0043565034866333,grad_norm: 0.999999196663431, iteration: 82594
loss: 1.0637282133102417,grad_norm: 0.9999999352993356, iteration: 82595
loss: 1.0305358171463013,grad_norm: 0.9999991486708572, iteration: 82596
loss: 0.9938464760780334,grad_norm: 0.9999991445670207, iteration: 82597
loss: 0.969301164150238,grad_norm: 0.9527960691798996, iteration: 82598
loss: 1.0499271154403687,grad_norm: 0.99999939569235, iteration: 82599
loss: 1.0164228677749634,grad_norm: 0.9999995986231113, iteration: 82600
loss: 1.0391104221343994,grad_norm: 0.9999992483555926, iteration: 82601
loss: 0.9941060543060303,grad_norm: 0.9999990866579417, iteration: 82602
loss: 0.9896771907806396,grad_norm: 0.9999990322954128, iteration: 82603
loss: 0.987234354019165,grad_norm: 0.9999991142690403, iteration: 82604
loss: 1.011905550956726,grad_norm: 0.999999876382273, iteration: 82605
loss: 1.0141316652297974,grad_norm: 0.9740658188264517, iteration: 82606
loss: 1.0419294834136963,grad_norm: 0.999999103673815, iteration: 82607
loss: 0.9876760840415955,grad_norm: 0.9550682598396208, iteration: 82608
loss: 0.9954255223274231,grad_norm: 0.9999991305826447, iteration: 82609
loss: 1.0207269191741943,grad_norm: 0.9932026166721971, iteration: 82610
loss: 0.9982883334159851,grad_norm: 0.9632681540056238, iteration: 82611
loss: 1.0122538805007935,grad_norm: 0.9999990836530847, iteration: 82612
loss: 0.9942716360092163,grad_norm: 0.9999992398944116, iteration: 82613
loss: 0.985472559928894,grad_norm: 0.9999991874824957, iteration: 82614
loss: 0.9974930882453918,grad_norm: 0.9999992520297344, iteration: 82615
loss: 0.9980807900428772,grad_norm: 0.9757541242687652, iteration: 82616
loss: 1.0067027807235718,grad_norm: 0.9936992602433652, iteration: 82617
loss: 0.9763646721839905,grad_norm: 0.9999991986555385, iteration: 82618
loss: 1.0129061937332153,grad_norm: 0.8645968095887064, iteration: 82619
loss: 0.99521803855896,grad_norm: 0.9999989981232736, iteration: 82620
loss: 0.9781521558761597,grad_norm: 0.9999992249802816, iteration: 82621
loss: 1.011641025543213,grad_norm: 0.9999991316606932, iteration: 82622
loss: 1.0059094429016113,grad_norm: 0.9999991689422988, iteration: 82623
loss: 0.995545506477356,grad_norm: 0.8785626396477686, iteration: 82624
loss: 0.9969689846038818,grad_norm: 0.9448462478705728, iteration: 82625
loss: 0.9939291477203369,grad_norm: 0.9999991276195227, iteration: 82626
loss: 1.0031514167785645,grad_norm: 0.999999158191008, iteration: 82627
loss: 1.0110838413238525,grad_norm: 0.9785520004835375, iteration: 82628
loss: 1.0152944326400757,grad_norm: 0.982243961192597, iteration: 82629
loss: 0.9892405271530151,grad_norm: 0.9999991738014481, iteration: 82630
loss: 1.0100865364074707,grad_norm: 0.9999991940694011, iteration: 82631
loss: 1.0086712837219238,grad_norm: 0.9999991061306795, iteration: 82632
loss: 0.9712676405906677,grad_norm: 0.9999991552416906, iteration: 82633
loss: 1.027319312095642,grad_norm: 0.9681374243373058, iteration: 82634
loss: 1.0255333185195923,grad_norm: 0.9999994892625289, iteration: 82635
loss: 0.9923173785209656,grad_norm: 0.9999990420934383, iteration: 82636
loss: 1.0001169443130493,grad_norm: 0.9871432787934794, iteration: 82637
loss: 1.0045886039733887,grad_norm: 0.9999991171509995, iteration: 82638
loss: 1.0021233558654785,grad_norm: 0.9999990694198468, iteration: 82639
loss: 1.0011780261993408,grad_norm: 0.9999989883983551, iteration: 82640
loss: 0.9738398790359497,grad_norm: 0.9397590308250238, iteration: 82641
loss: 0.9946120381355286,grad_norm: 0.999999063377225, iteration: 82642
loss: 0.9728177189826965,grad_norm: 0.8120740563456389, iteration: 82643
loss: 1.01150643825531,grad_norm: 0.9999991729433848, iteration: 82644
loss: 0.9765828847885132,grad_norm: 0.9999991988874374, iteration: 82645
loss: 1.0017625093460083,grad_norm: 0.9999991214466014, iteration: 82646
loss: 1.0249214172363281,grad_norm: 0.9999997189634074, iteration: 82647
loss: 0.9822281002998352,grad_norm: 0.9999991824535513, iteration: 82648
loss: 1.0117385387420654,grad_norm: 0.9999991647018209, iteration: 82649
loss: 0.9865300059318542,grad_norm: 0.9999990868147097, iteration: 82650
loss: 1.0103814601898193,grad_norm: 0.9875379598967129, iteration: 82651
loss: 0.9847566485404968,grad_norm: 0.9999991358642562, iteration: 82652
loss: 0.985187828540802,grad_norm: 0.9999991227152275, iteration: 82653
loss: 1.0115290880203247,grad_norm: 0.999999019980147, iteration: 82654
loss: 0.9970734715461731,grad_norm: 0.9999991006885266, iteration: 82655
loss: 1.0355302095413208,grad_norm: 0.9999992584049294, iteration: 82656
loss: 1.001429796218872,grad_norm: 0.8785434181758826, iteration: 82657
loss: 0.9990823864936829,grad_norm: 0.9999991780311583, iteration: 82658
loss: 0.9865485429763794,grad_norm: 0.999999284765312, iteration: 82659
loss: 1.0472878217697144,grad_norm: 0.9999994743076093, iteration: 82660
loss: 1.0146980285644531,grad_norm: 0.9999989927320518, iteration: 82661
loss: 1.0035362243652344,grad_norm: 0.9836973555755226, iteration: 82662
loss: 1.0137115716934204,grad_norm: 0.9999990334735399, iteration: 82663
loss: 0.9959710836410522,grad_norm: 0.9999992389640451, iteration: 82664
loss: 0.9879745244979858,grad_norm: 0.9999990422805389, iteration: 82665
loss: 0.9643605351448059,grad_norm: 0.9999990590779252, iteration: 82666
loss: 1.00882089138031,grad_norm: 0.9481065277921384, iteration: 82667
loss: 1.0027117729187012,grad_norm: 0.9491349666709558, iteration: 82668
loss: 0.9827008843421936,grad_norm: 0.8948190652611557, iteration: 82669
loss: 0.9634273648262024,grad_norm: 0.9306530978024805, iteration: 82670
loss: 1.0384975671768188,grad_norm: 0.9540545270447223, iteration: 82671
loss: 1.0087486505508423,grad_norm: 0.9999991337473574, iteration: 82672
loss: 1.0348907709121704,grad_norm: 0.9999994679674779, iteration: 82673
loss: 0.9826124310493469,grad_norm: 0.9999992389498088, iteration: 82674
loss: 1.0970938205718994,grad_norm: 0.9999989609891097, iteration: 82675
loss: 1.0261396169662476,grad_norm: 0.9999990268935603, iteration: 82676
loss: 0.9896172285079956,grad_norm: 0.9814661316132214, iteration: 82677
loss: 0.9992152452468872,grad_norm: 0.9999996314790162, iteration: 82678
loss: 1.0289676189422607,grad_norm: 0.999999501600445, iteration: 82679
loss: 1.0374343395233154,grad_norm: 0.9999993905778471, iteration: 82680
loss: 1.0822737216949463,grad_norm: 0.9999991338365708, iteration: 82681
loss: 0.997201144695282,grad_norm: 0.9910459811703193, iteration: 82682
loss: 0.9910826086997986,grad_norm: 0.999999357958638, iteration: 82683
loss: 1.0089316368103027,grad_norm: 0.9085772660047071, iteration: 82684
loss: 0.988153338432312,grad_norm: 0.8792815309228745, iteration: 82685
loss: 0.9837493896484375,grad_norm: 0.9007761604689443, iteration: 82686
loss: 1.0950466394424438,grad_norm: 0.9999992148291523, iteration: 82687
loss: 0.9704957008361816,grad_norm: 0.9999993857554352, iteration: 82688
loss: 1.1069117784500122,grad_norm: 0.9999991635685656, iteration: 82689
loss: 0.9884923696517944,grad_norm: 0.9696764511616909, iteration: 82690
loss: 1.0366694927215576,grad_norm: 0.9911736236646378, iteration: 82691
loss: 1.0177029371261597,grad_norm: 0.9535223232663746, iteration: 82692
loss: 1.0003584623336792,grad_norm: 0.9999993380913663, iteration: 82693
loss: 1.0827155113220215,grad_norm: 0.9999993476723819, iteration: 82694
loss: 1.0013991594314575,grad_norm: 0.9999990832880397, iteration: 82695
loss: 1.0580724477767944,grad_norm: 1.0000000060681227, iteration: 82696
loss: 0.9893701672554016,grad_norm: 0.959086666019262, iteration: 82697
loss: 1.0014556646347046,grad_norm: 0.9999992388321497, iteration: 82698
loss: 1.036116123199463,grad_norm: 0.9999989773101037, iteration: 82699
loss: 1.0745844841003418,grad_norm: 0.9999997070956521, iteration: 82700
loss: 1.0451513528823853,grad_norm: 0.9999991523439535, iteration: 82701
loss: 1.0294525623321533,grad_norm: 0.9999992421859036, iteration: 82702
loss: 1.0823323726654053,grad_norm: 0.9999998049112042, iteration: 82703
loss: 0.9823843836784363,grad_norm: 0.9999992669017431, iteration: 82704
loss: 0.9898461103439331,grad_norm: 0.9999989770483696, iteration: 82705
loss: 1.000190258026123,grad_norm: 0.9999992002944085, iteration: 82706
loss: 1.0166385173797607,grad_norm: 0.999999156790098, iteration: 82707
loss: 0.9815350770950317,grad_norm: 0.9999990732512555, iteration: 82708
loss: 0.9809334874153137,grad_norm: 0.9999990596258131, iteration: 82709
loss: 1.026892900466919,grad_norm: 0.9999990869832754, iteration: 82710
loss: 1.0309302806854248,grad_norm: 0.9771078531794599, iteration: 82711
loss: 1.0318835973739624,grad_norm: 0.9999990714894079, iteration: 82712
loss: 0.9891038537025452,grad_norm: 0.9999990877566168, iteration: 82713
loss: 1.0193157196044922,grad_norm: 0.9999990557924592, iteration: 82714
loss: 1.0021121501922607,grad_norm: 0.999999195160479, iteration: 82715
loss: 1.0189552307128906,grad_norm: 0.9999992439102172, iteration: 82716
loss: 0.9822435975074768,grad_norm: 0.9999990828742104, iteration: 82717
loss: 1.0184961557388306,grad_norm: 0.9999993909701707, iteration: 82718
loss: 0.9858914613723755,grad_norm: 0.9999990364810761, iteration: 82719
loss: 0.9899032711982727,grad_norm: 0.99999902821455, iteration: 82720
loss: 0.9866255521774292,grad_norm: 0.9999990010827964, iteration: 82721
loss: 1.0103317499160767,grad_norm: 0.9999988765249358, iteration: 82722
loss: 1.0198750495910645,grad_norm: 0.9999989406222148, iteration: 82723
loss: 1.0122363567352295,grad_norm: 0.9779109953894235, iteration: 82724
loss: 1.0230677127838135,grad_norm: 0.9999991367005684, iteration: 82725
loss: 0.9849745035171509,grad_norm: 0.9999990501612898, iteration: 82726
loss: 0.9936258792877197,grad_norm: 0.9999990347620453, iteration: 82727
loss: 1.031267523765564,grad_norm: 0.999999153884811, iteration: 82728
loss: 0.9880719184875488,grad_norm: 0.9999991826299728, iteration: 82729
loss: 1.0439410209655762,grad_norm: 0.9999991267834233, iteration: 82730
loss: 1.0471800565719604,grad_norm: 0.999999718674766, iteration: 82731
loss: 0.9913110733032227,grad_norm: 0.9999991392352087, iteration: 82732
loss: 1.0473960638046265,grad_norm: 0.8953376683756997, iteration: 82733
loss: 0.9853854775428772,grad_norm: 0.9273997647600587, iteration: 82734
loss: 0.9884594678878784,grad_norm: 0.9853950835038298, iteration: 82735
loss: 0.9913825988769531,grad_norm: 0.9999990205090247, iteration: 82736
loss: 0.9911295175552368,grad_norm: 0.9692633043222685, iteration: 82737
loss: 0.9854585528373718,grad_norm: 0.9382038257465887, iteration: 82738
loss: 1.0346336364746094,grad_norm: 0.9999992413829659, iteration: 82739
loss: 0.993389368057251,grad_norm: 0.9999990508881187, iteration: 82740
loss: 1.0161656141281128,grad_norm: 0.9113833386762417, iteration: 82741
loss: 1.0077482461929321,grad_norm: 0.9999991660401598, iteration: 82742
loss: 0.980945348739624,grad_norm: 0.9999991583985023, iteration: 82743
loss: 1.0195375680923462,grad_norm: 0.9999991779949144, iteration: 82744
loss: 0.9898346066474915,grad_norm: 0.9976953609843224, iteration: 82745
loss: 0.987116277217865,grad_norm: 0.9966646367510711, iteration: 82746
loss: 1.011587381362915,grad_norm: 0.999999282732348, iteration: 82747
loss: 0.9862058758735657,grad_norm: 0.9279441455052494, iteration: 82748
loss: 0.987272322177887,grad_norm: 0.9897228453895135, iteration: 82749
loss: 0.9987218379974365,grad_norm: 0.9373348137308325, iteration: 82750
loss: 1.025355577468872,grad_norm: 0.9999997338484714, iteration: 82751
loss: 1.004263997077942,grad_norm: 0.9999990525595566, iteration: 82752
loss: 0.9895053505897522,grad_norm: 0.9999990785275081, iteration: 82753
loss: 0.9792018532752991,grad_norm: 0.9999992138304886, iteration: 82754
loss: 0.9966373443603516,grad_norm: 0.9999990896915288, iteration: 82755
loss: 1.001094937324524,grad_norm: 0.8998151668149664, iteration: 82756
loss: 1.0025116205215454,grad_norm: 0.9999991463995654, iteration: 82757
loss: 0.9733198285102844,grad_norm: 0.9999990316954213, iteration: 82758
loss: 1.0278228521347046,grad_norm: 0.9700023027490638, iteration: 82759
loss: 1.0159354209899902,grad_norm: 0.9999994635153537, iteration: 82760
loss: 0.986032247543335,grad_norm: 0.9999990922657093, iteration: 82761
loss: 0.9887622594833374,grad_norm: 0.9999991091132774, iteration: 82762
loss: 1.006095290184021,grad_norm: 0.9851779394036214, iteration: 82763
loss: 0.9895017147064209,grad_norm: 0.9889063273166704, iteration: 82764
loss: 0.9769726991653442,grad_norm: 0.9690444659726731, iteration: 82765
loss: 1.0217736959457397,grad_norm: 0.9999994357332368, iteration: 82766
loss: 0.9758037328720093,grad_norm: 0.984411122956407, iteration: 82767
loss: 0.9908863306045532,grad_norm: 0.9999990815371442, iteration: 82768
loss: 0.981967031955719,grad_norm: 0.9999993122792115, iteration: 82769
loss: 0.9843393564224243,grad_norm: 0.9999991964587329, iteration: 82770
loss: 0.9952977895736694,grad_norm: 0.9999992058664423, iteration: 82771
loss: 1.0356268882751465,grad_norm: 0.9999992356636498, iteration: 82772
loss: 0.9868074059486389,grad_norm: 0.9078378266961867, iteration: 82773
loss: 1.0133765935897827,grad_norm: 0.9999994808277471, iteration: 82774
loss: 0.9855597615242004,grad_norm: 0.9999991228613152, iteration: 82775
loss: 0.9883604049682617,grad_norm: 0.9999991414497725, iteration: 82776
loss: 0.9877265095710754,grad_norm: 0.999999025079349, iteration: 82777
loss: 0.9831345081329346,grad_norm: 0.9029410784939244, iteration: 82778
loss: 1.0021936893463135,grad_norm: 0.9999990654045221, iteration: 82779
loss: 1.0584841966629028,grad_norm: 0.9999991445500982, iteration: 82780
loss: 0.9866105914115906,grad_norm: 0.81500064241331, iteration: 82781
loss: 0.9910918474197388,grad_norm: 0.9999991239823217, iteration: 82782
loss: 0.9860301613807678,grad_norm: 0.9999991553594314, iteration: 82783
loss: 1.024977207183838,grad_norm: 0.9033585181866849, iteration: 82784
loss: 0.9750310778617859,grad_norm: 0.9323703961576763, iteration: 82785
loss: 0.9818841814994812,grad_norm: 0.9999990852196743, iteration: 82786
loss: 0.9960654377937317,grad_norm: 0.9999997240147281, iteration: 82787
loss: 0.997288703918457,grad_norm: 0.999999172277676, iteration: 82788
loss: 1.0268417596817017,grad_norm: 0.9698114082694735, iteration: 82789
loss: 0.9977259039878845,grad_norm: 0.9999991956708328, iteration: 82790
loss: 0.9755848050117493,grad_norm: 0.9999990941993213, iteration: 82791
loss: 0.9574688673019409,grad_norm: 0.9999991657779876, iteration: 82792
loss: 0.9899522066116333,grad_norm: 0.9999991512963348, iteration: 82793
loss: 1.0349041223526,grad_norm: 0.9999990824193805, iteration: 82794
loss: 1.007668375968933,grad_norm: 0.9256053202291952, iteration: 82795
loss: 1.0183446407318115,grad_norm: 0.9999990999902882, iteration: 82796
loss: 1.005112648010254,grad_norm: 0.9999998953268076, iteration: 82797
loss: 1.0395786762237549,grad_norm: 0.9999995410067267, iteration: 82798
loss: 1.1325809955596924,grad_norm: 0.9999997233371304, iteration: 82799
loss: 0.9398069977760315,grad_norm: 0.9999990061595346, iteration: 82800
loss: 0.995948076248169,grad_norm: 0.8381272043890683, iteration: 82801
loss: 0.9642671942710876,grad_norm: 0.999999167811868, iteration: 82802
loss: 1.0144493579864502,grad_norm: 0.9164565102845083, iteration: 82803
loss: 0.9988927841186523,grad_norm: 0.934930140725541, iteration: 82804
loss: 1.0251290798187256,grad_norm: 0.9999992786647504, iteration: 82805
loss: 0.998726487159729,grad_norm: 0.9999990317109935, iteration: 82806
loss: 1.0084290504455566,grad_norm: 0.9290482932627784, iteration: 82807
loss: 0.9493448138237,grad_norm: 0.9636192638297731, iteration: 82808
loss: 1.0559619665145874,grad_norm: 0.9999992646055463, iteration: 82809
loss: 0.9885962009429932,grad_norm: 0.9999991750659144, iteration: 82810
loss: 0.9881107211112976,grad_norm: 0.999999473907201, iteration: 82811
loss: 1.0308140516281128,grad_norm: 0.9999991176294618, iteration: 82812
loss: 1.002142071723938,grad_norm: 0.9999990095816844, iteration: 82813
loss: 0.9994625449180603,grad_norm: 0.999999033129648, iteration: 82814
loss: 1.0142912864685059,grad_norm: 0.9999991562026002, iteration: 82815
loss: 0.9856641888618469,grad_norm: 0.9999991028160906, iteration: 82816
loss: 1.011320948600769,grad_norm: 0.9401496580206669, iteration: 82817
loss: 0.9828670620918274,grad_norm: 0.999999217157375, iteration: 82818
loss: 1.0131093263626099,grad_norm: 0.987080990431616, iteration: 82819
loss: 1.1673449277877808,grad_norm: 1.0000000521071828, iteration: 82820
loss: 1.021920084953308,grad_norm: 0.9999997301288311, iteration: 82821
loss: 0.9988493323326111,grad_norm: 0.9999991473456986, iteration: 82822
loss: 1.035329818725586,grad_norm: 0.8940307907675528, iteration: 82823
loss: 0.9651773571968079,grad_norm: 0.8618161217347517, iteration: 82824
loss: 0.9582776427268982,grad_norm: 0.9999991625182232, iteration: 82825
loss: 1.0214825868606567,grad_norm: 0.9406780993113906, iteration: 82826
loss: 0.9969813227653503,grad_norm: 0.9999990600398879, iteration: 82827
loss: 0.9973998665809631,grad_norm: 0.9999997749419489, iteration: 82828
loss: 0.9989827275276184,grad_norm: 0.8339110468726756, iteration: 82829
loss: 1.0195451974868774,grad_norm: 0.9999991025400693, iteration: 82830
loss: 1.0163170099258423,grad_norm: 0.9027395191399912, iteration: 82831
loss: 1.0045342445373535,grad_norm: 0.9999990686657128, iteration: 82832
loss: 0.9902791380882263,grad_norm: 0.999999219198532, iteration: 82833
loss: 0.994967520236969,grad_norm: 0.9999990889630269, iteration: 82834
loss: 0.9961403012275696,grad_norm: 0.7367870405963235, iteration: 82835
loss: 0.9758713841438293,grad_norm: 0.8993037976470738, iteration: 82836
loss: 1.0187561511993408,grad_norm: 0.8293420095374444, iteration: 82837
loss: 0.923674464225769,grad_norm: 0.9400202965203641, iteration: 82838
loss: 1.00273597240448,grad_norm: 0.9359511434001608, iteration: 82839
loss: 0.9699593782424927,grad_norm: 0.8903517008755937, iteration: 82840
loss: 1.0315678119659424,grad_norm: 0.9999991368855893, iteration: 82841
loss: 0.9828212261199951,grad_norm: 0.857744935132202, iteration: 82842
loss: 0.9870392680168152,grad_norm: 0.9999991145693518, iteration: 82843
loss: 1.0005370378494263,grad_norm: 0.9555394523942558, iteration: 82844
loss: 1.0030254125595093,grad_norm: 0.9805800785798777, iteration: 82845
loss: 0.985012948513031,grad_norm: 0.9097618433288641, iteration: 82846
loss: 0.9968909025192261,grad_norm: 0.9999992366605259, iteration: 82847
loss: 0.9747312664985657,grad_norm: 0.9999990560167292, iteration: 82848
loss: 0.9967578649520874,grad_norm: 0.9999990648156388, iteration: 82849
loss: 1.0246269702911377,grad_norm: 0.9999991806764087, iteration: 82850
loss: 0.9967924356460571,grad_norm: 0.9999990357325891, iteration: 82851
loss: 1.0326727628707886,grad_norm: 0.9999991823897977, iteration: 82852
loss: 0.9852719306945801,grad_norm: 0.8429687571916407, iteration: 82853
loss: 1.004892349243164,grad_norm: 0.9999992776036368, iteration: 82854
loss: 0.9871436357498169,grad_norm: 0.953139512045293, iteration: 82855
loss: 0.9601712226867676,grad_norm: 0.9999991343946337, iteration: 82856
loss: 0.9866479635238647,grad_norm: 0.9200466722097624, iteration: 82857
loss: 0.9933345913887024,grad_norm: 0.9808158187376746, iteration: 82858
loss: 1.0039267539978027,grad_norm: 0.9999990456580369, iteration: 82859
loss: 0.9884609580039978,grad_norm: 0.9999990616119654, iteration: 82860
loss: 0.9937140345573425,grad_norm: 0.9999990945691699, iteration: 82861
loss: 1.0099689960479736,grad_norm: 0.9999990598644272, iteration: 82862
loss: 0.9878422617912292,grad_norm: 0.9999990639242436, iteration: 82863
loss: 1.0100376605987549,grad_norm: 0.9999990105448041, iteration: 82864
loss: 1.0647886991500854,grad_norm: 0.9999993118471822, iteration: 82865
loss: 1.0114563703536987,grad_norm: 0.9999991537080849, iteration: 82866
loss: 1.1050983667373657,grad_norm: 0.9999999153916826, iteration: 82867
loss: 1.0067789554595947,grad_norm: 0.9999991001168403, iteration: 82868
loss: 1.0500942468643188,grad_norm: 0.9999990051757739, iteration: 82869
loss: 1.0120959281921387,grad_norm: 0.9292667778148572, iteration: 82870
loss: 0.9887136816978455,grad_norm: 0.9999993191616812, iteration: 82871
loss: 1.014695167541504,grad_norm: 0.9734964875407868, iteration: 82872
loss: 1.0089138746261597,grad_norm: 0.9999991125960409, iteration: 82873
loss: 0.991243839263916,grad_norm: 0.9999991944419504, iteration: 82874
loss: 1.0125412940979004,grad_norm: 0.8183119914810656, iteration: 82875
loss: 0.9966008067131042,grad_norm: 0.9999992871295591, iteration: 82876
loss: 0.9644700884819031,grad_norm: 0.9961768240804312, iteration: 82877
loss: 1.2174755334854126,grad_norm: 0.9999990684872028, iteration: 82878
loss: 1.0054305791854858,grad_norm: 0.9260430662733934, iteration: 82879
loss: 0.9707807302474976,grad_norm: 0.8673020768110655, iteration: 82880
loss: 1.0130115747451782,grad_norm: 0.9999991104552918, iteration: 82881
loss: 1.0087817907333374,grad_norm: 0.9999990166463698, iteration: 82882
loss: 1.208860158920288,grad_norm: 0.999999011656178, iteration: 82883
loss: 1.1555761098861694,grad_norm: 0.9999998768867563, iteration: 82884
loss: 0.9714927673339844,grad_norm: 0.9999992284779503, iteration: 82885
loss: 1.0130319595336914,grad_norm: 0.9999990721894507, iteration: 82886
loss: 0.9898322820663452,grad_norm: 0.9999992038823566, iteration: 82887
loss: 0.994418740272522,grad_norm: 0.999999390453459, iteration: 82888
loss: 1.0170159339904785,grad_norm: 0.9999992161739627, iteration: 82889
loss: 1.048497200012207,grad_norm: 0.9999996672124368, iteration: 82890
loss: 1.1152615547180176,grad_norm: 0.9999994678190821, iteration: 82891
loss: 1.0325292348861694,grad_norm: 0.9498797813438571, iteration: 82892
loss: 1.3736944198608398,grad_norm: 0.9999998744932892, iteration: 82893
loss: 1.1407692432403564,grad_norm: 0.999999848381401, iteration: 82894
loss: 1.2804924249649048,grad_norm: 0.9999996059290863, iteration: 82895
loss: 0.9595224857330322,grad_norm: 0.9452914595764732, iteration: 82896
loss: 1.0413169860839844,grad_norm: 0.9999991360792342, iteration: 82897
loss: 1.5350537300109863,grad_norm: 0.9999998289271922, iteration: 82898
loss: 1.1956748962402344,grad_norm: 0.9999998972580952, iteration: 82899
loss: 1.2429394721984863,grad_norm: 0.9999998002657832, iteration: 82900
loss: 1.0891048908233643,grad_norm: 0.9999998230654875, iteration: 82901
loss: 1.073643445968628,grad_norm: 0.9999999945260944, iteration: 82902
loss: 1.137726068496704,grad_norm: 0.9999999188476684, iteration: 82903
loss: 1.1976566314697266,grad_norm: 0.9999998138641543, iteration: 82904
loss: 1.052778959274292,grad_norm: 0.999999545898516, iteration: 82905
loss: 1.017274022102356,grad_norm: 0.9999990686564388, iteration: 82906
loss: 1.0336748361587524,grad_norm: 0.999999626485597, iteration: 82907
loss: 1.1826704740524292,grad_norm: 0.9999994838508413, iteration: 82908
loss: 0.9770917296409607,grad_norm: 0.9999990537986437, iteration: 82909
loss: 0.9981422424316406,grad_norm: 0.99999948605723, iteration: 82910
loss: 1.2103679180145264,grad_norm: 0.9999996108937926, iteration: 82911
loss: 1.1503572463989258,grad_norm: 0.999999788253356, iteration: 82912
loss: 0.9950258135795593,grad_norm: 0.9439420293827733, iteration: 82913
loss: 0.9754878878593445,grad_norm: 0.9999990353220293, iteration: 82914
loss: 0.9598254561424255,grad_norm: 0.9999993410825617, iteration: 82915
loss: 0.9680517315864563,grad_norm: 0.904892426936751, iteration: 82916
loss: 0.9601978659629822,grad_norm: 0.8717919461811043, iteration: 82917
loss: 1.1066641807556152,grad_norm: 0.9999996654381575, iteration: 82918
loss: 0.98633873462677,grad_norm: 0.9610769346774519, iteration: 82919
loss: 1.0892817974090576,grad_norm: 0.9999996906940599, iteration: 82920
loss: 1.0416239500045776,grad_norm: 0.9999996248440626, iteration: 82921
loss: 1.061334252357483,grad_norm: 0.9999995510482245, iteration: 82922
loss: 1.016069769859314,grad_norm: 0.9607248960612645, iteration: 82923
loss: 1.0177119970321655,grad_norm: 0.9999993647813064, iteration: 82924
loss: 0.9819983243942261,grad_norm: 0.9999991619569801, iteration: 82925
loss: 0.9876990914344788,grad_norm: 0.9999993171811041, iteration: 82926
loss: 1.0673907995224,grad_norm: 0.9999991113651744, iteration: 82927
loss: 1.0355232954025269,grad_norm: 0.9898790062517937, iteration: 82928
loss: 1.0332626104354858,grad_norm: 0.9396379074104129, iteration: 82929
loss: 0.9715792536735535,grad_norm: 0.9660909854823494, iteration: 82930
loss: 1.00606369972229,grad_norm: 0.9999993351290012, iteration: 82931
loss: 1.023492455482483,grad_norm: 0.9999996014305924, iteration: 82932
loss: 0.9863863587379456,grad_norm: 0.9999993517386657, iteration: 82933
loss: 0.9939795136451721,grad_norm: 0.9999993726651695, iteration: 82934
loss: 1.0095024108886719,grad_norm: 0.919698084106192, iteration: 82935
loss: 1.0223815441131592,grad_norm: 0.9999991321384866, iteration: 82936
loss: 1.087803602218628,grad_norm: 0.9999991314621908, iteration: 82937
loss: 1.013764500617981,grad_norm: 0.9999994629310611, iteration: 82938
loss: 1.0224435329437256,grad_norm: 0.9999992568893106, iteration: 82939
loss: 0.9946069121360779,grad_norm: 0.9999990028707146, iteration: 82940
loss: 1.003061056137085,grad_norm: 0.9999992146132752, iteration: 82941
loss: 1.0061328411102295,grad_norm: 0.9999990640710811, iteration: 82942
loss: 0.9682230949401855,grad_norm: 0.9999992375040677, iteration: 82943
loss: 0.9942491054534912,grad_norm: 0.9877379494639903, iteration: 82944
loss: 1.0771657228469849,grad_norm: 0.9999991532077861, iteration: 82945
loss: 0.9860307574272156,grad_norm: 0.9999991809264507, iteration: 82946
loss: 1.0394433736801147,grad_norm: 0.9999994834454475, iteration: 82947
loss: 1.0863388776779175,grad_norm: 0.9999997972016064, iteration: 82948
loss: 0.9671198725700378,grad_norm: 0.999999036433664, iteration: 82949
loss: 1.0250234603881836,grad_norm: 0.8880518266865637, iteration: 82950
loss: 0.9692162871360779,grad_norm: 0.9421978401822152, iteration: 82951
loss: 0.9816813468933105,grad_norm: 0.9812701858308363, iteration: 82952
loss: 0.985466480255127,grad_norm: 0.9999991008435143, iteration: 82953
loss: 1.009462594985962,grad_norm: 0.9999992483721084, iteration: 82954
loss: 0.9897555708885193,grad_norm: 0.999999163540394, iteration: 82955
loss: 1.005684733390808,grad_norm: 0.9781843862440881, iteration: 82956
loss: 0.9977996945381165,grad_norm: 0.9999991763045579, iteration: 82957
loss: 1.0253039598464966,grad_norm: 0.8711749427833276, iteration: 82958
loss: 0.979453444480896,grad_norm: 0.9999991047241087, iteration: 82959
loss: 0.9564811587333679,grad_norm: 0.9999991308776367, iteration: 82960
loss: 0.9965625405311584,grad_norm: 0.9402947380003689, iteration: 82961
loss: 0.9523155689239502,grad_norm: 0.9999992229596554, iteration: 82962
loss: 0.9780198335647583,grad_norm: 0.9999997080827818, iteration: 82963
loss: 1.0044188499450684,grad_norm: 0.9071010936737559, iteration: 82964
loss: 1.0550198554992676,grad_norm: 0.9999992989245335, iteration: 82965
loss: 1.035434365272522,grad_norm: 0.9118679510906071, iteration: 82966
loss: 1.0404833555221558,grad_norm: 0.9928527441853212, iteration: 82967
loss: 0.9989356398582458,grad_norm: 0.9141144565727735, iteration: 82968
loss: 1.000487208366394,grad_norm: 0.9999991286207722, iteration: 82969
loss: 0.987908661365509,grad_norm: 0.9999991954548324, iteration: 82970
loss: 0.9989422559738159,grad_norm: 0.854904398846151, iteration: 82971
loss: 0.9927014112472534,grad_norm: 0.9999991122116605, iteration: 82972
loss: 0.9870710968971252,grad_norm: 0.9676740359639363, iteration: 82973
loss: 1.0047240257263184,grad_norm: 0.9999990726557673, iteration: 82974
loss: 0.9974470138549805,grad_norm: 0.8615032204104679, iteration: 82975
loss: 1.0416066646575928,grad_norm: 0.9053050511808161, iteration: 82976
loss: 1.0216176509857178,grad_norm: 0.999999222421142, iteration: 82977
loss: 0.9893711805343628,grad_norm: 0.9999997948613233, iteration: 82978
loss: 0.9864807724952698,grad_norm: 0.9664684925556808, iteration: 82979
loss: 0.9739862084388733,grad_norm: 0.9498495754955825, iteration: 82980
loss: 1.0112924575805664,grad_norm: 0.9999991201449258, iteration: 82981
loss: 1.0032609701156616,grad_norm: 0.9586105037530506, iteration: 82982
loss: 1.0098073482513428,grad_norm: 0.9999991369557146, iteration: 82983
loss: 1.0432559251785278,grad_norm: 0.999999220070697, iteration: 82984
loss: 1.0054274797439575,grad_norm: 0.9999990661923899, iteration: 82985
loss: 1.0034891366958618,grad_norm: 0.9867259038846514, iteration: 82986
loss: 1.0167431831359863,grad_norm: 0.9597965925765993, iteration: 82987
loss: 1.0388503074645996,grad_norm: 0.9999990454167093, iteration: 82988
loss: 0.9916788339614868,grad_norm: 0.9999990773117992, iteration: 82989
loss: 0.9610306620597839,grad_norm: 0.9999995791465084, iteration: 82990
loss: 0.9878224730491638,grad_norm: 0.999999197602605, iteration: 82991
loss: 0.9855585694313049,grad_norm: 0.9999991593797305, iteration: 82992
loss: 0.9996481537818909,grad_norm: 0.999999633097907, iteration: 82993
loss: 1.0137913227081299,grad_norm: 0.8759906114447601, iteration: 82994
loss: 1.017228126525879,grad_norm: 0.9999992601173097, iteration: 82995
loss: 1.0385650396347046,grad_norm: 0.9999990502538868, iteration: 82996
loss: 1.0806934833526611,grad_norm: 0.9999998430418353, iteration: 82997
loss: 1.0121448040008545,grad_norm: 0.9999988911615272, iteration: 82998
loss: 1.0202423334121704,grad_norm: 0.9999993620217428, iteration: 82999
loss: 0.9674608707427979,grad_norm: 0.9999993459417605, iteration: 83000
loss: 1.0199556350708008,grad_norm: 0.9999991325301654, iteration: 83001
loss: 1.044057011604309,grad_norm: 0.9999991548198149, iteration: 83002
loss: 1.0032720565795898,grad_norm: 0.9999991375805753, iteration: 83003
loss: 1.0191224813461304,grad_norm: 0.9560945320543726, iteration: 83004
loss: 0.973717451095581,grad_norm: 0.9812240152246059, iteration: 83005
loss: 0.9930408596992493,grad_norm: 0.9999990523953032, iteration: 83006
loss: 1.020623803138733,grad_norm: 0.9999996914277706, iteration: 83007
loss: 0.9644735455513,grad_norm: 0.999999126956668, iteration: 83008
loss: 0.9860854148864746,grad_norm: 0.9999991691335437, iteration: 83009
loss: 1.0172375440597534,grad_norm: 0.99999915884012, iteration: 83010
loss: 1.0067681074142456,grad_norm: 0.9999992188284661, iteration: 83011
loss: 1.0007354021072388,grad_norm: 0.8528807085806459, iteration: 83012
loss: 0.9911366105079651,grad_norm: 0.8640717805510795, iteration: 83013
loss: 1.0790860652923584,grad_norm: 0.9999999222036864, iteration: 83014
loss: 0.9865078330039978,grad_norm: 0.9773586339296247, iteration: 83015
loss: 0.9797850251197815,grad_norm: 0.9999992339325099, iteration: 83016
loss: 1.0281493663787842,grad_norm: 0.8792052151774061, iteration: 83017
loss: 1.013585090637207,grad_norm: 0.9548565673611736, iteration: 83018
loss: 0.9849483966827393,grad_norm: 0.8394967876977313, iteration: 83019
loss: 1.0126938819885254,grad_norm: 0.9999990964089217, iteration: 83020
loss: 1.0148553848266602,grad_norm: 0.8510362884604747, iteration: 83021
loss: 0.9988075494766235,grad_norm: 0.999999804531124, iteration: 83022
loss: 1.0041371583938599,grad_norm: 0.9018651236117803, iteration: 83023
loss: 0.9780352711677551,grad_norm: 0.9999992667318803, iteration: 83024
loss: 0.9991057515144348,grad_norm: 0.9554355598806932, iteration: 83025
loss: 1.1197097301483154,grad_norm: 0.9999993083309007, iteration: 83026
loss: 0.993079662322998,grad_norm: 0.9999991935530108, iteration: 83027
loss: 0.999241292476654,grad_norm: 0.999999190427837, iteration: 83028
loss: 0.9906264543533325,grad_norm: 0.999999313576042, iteration: 83029
loss: 0.9986063241958618,grad_norm: 0.9906773069116532, iteration: 83030
loss: 1.0006141662597656,grad_norm: 0.9999997194687283, iteration: 83031
loss: 1.022225022315979,grad_norm: 0.9999990957716062, iteration: 83032
loss: 0.9952632188796997,grad_norm: 0.9999990212077408, iteration: 83033
loss: 1.0228424072265625,grad_norm: 0.9999992007294116, iteration: 83034
loss: 0.9776172041893005,grad_norm: 0.9999992411524451, iteration: 83035
loss: 1.015432357788086,grad_norm: 0.9982671181788313, iteration: 83036
loss: 0.9742738604545593,grad_norm: 0.985566937247412, iteration: 83037
loss: 1.0330424308776855,grad_norm: 0.9999993206512948, iteration: 83038
loss: 0.9642270803451538,grad_norm: 0.9912300279320032, iteration: 83039
loss: 1.0170247554779053,grad_norm: 0.9999993078130578, iteration: 83040
loss: 1.0037269592285156,grad_norm: 0.9320593698159566, iteration: 83041
loss: 0.9899288415908813,grad_norm: 0.9999993231440804, iteration: 83042
loss: 0.9834030866622925,grad_norm: 0.9999988915617206, iteration: 83043
loss: 0.9987128973007202,grad_norm: 0.9999992805913184, iteration: 83044
loss: 0.9937524199485779,grad_norm: 0.9022780126810812, iteration: 83045
loss: 0.973048985004425,grad_norm: 0.9999991431453357, iteration: 83046
loss: 1.0416309833526611,grad_norm: 0.9999992760600145, iteration: 83047
loss: 0.9751824736595154,grad_norm: 0.999999313696711, iteration: 83048
loss: 0.9604247808456421,grad_norm: 0.9999990629101535, iteration: 83049
loss: 1.0067722797393799,grad_norm: 0.9999992524874852, iteration: 83050
loss: 1.10344660282135,grad_norm: 0.9909380638356419, iteration: 83051
loss: 1.017805576324463,grad_norm: 0.9999991366178186, iteration: 83052
loss: 1.0377464294433594,grad_norm: 0.999998991433678, iteration: 83053
loss: 1.021606683731079,grad_norm: 0.9999994129448608, iteration: 83054
loss: 1.0283821821212769,grad_norm: 0.999999479130536, iteration: 83055
loss: 0.9681516289710999,grad_norm: 0.9999992089125549, iteration: 83056
loss: 0.9940480589866638,grad_norm: 0.9199716823443106, iteration: 83057
loss: 1.0239589214324951,grad_norm: 0.9677474873163832, iteration: 83058
loss: 1.0103915929794312,grad_norm: 0.9999993386784938, iteration: 83059
loss: 1.0160880088806152,grad_norm: 0.9999994646506412, iteration: 83060
loss: 1.0114998817443848,grad_norm: 0.9999993921956934, iteration: 83061
loss: 1.0564407110214233,grad_norm: 0.9999991705141403, iteration: 83062
loss: 1.0001739263534546,grad_norm: 0.8802930184120833, iteration: 83063
loss: 0.9999165534973145,grad_norm: 0.9999990482587646, iteration: 83064
loss: 0.965235710144043,grad_norm: 0.9999990827005023, iteration: 83065
loss: 1.007381558418274,grad_norm: 0.8937153783194608, iteration: 83066
loss: 0.9771018624305725,grad_norm: 0.9999992418386052, iteration: 83067
loss: 1.089986801147461,grad_norm: 0.9999992306137141, iteration: 83068
loss: 1.0279953479766846,grad_norm: 0.9999990793014117, iteration: 83069
loss: 0.9944906830787659,grad_norm: 0.8909637418618174, iteration: 83070
loss: 1.0204360485076904,grad_norm: 0.9999990964225665, iteration: 83071
loss: 1.0352414846420288,grad_norm: 0.9999992162568525, iteration: 83072
loss: 1.0368341207504272,grad_norm: 0.9999993541705188, iteration: 83073
loss: 1.0495197772979736,grad_norm: 0.999999158698291, iteration: 83074
loss: 0.9801176190376282,grad_norm: 0.945886018453838, iteration: 83075
loss: 1.006310224533081,grad_norm: 0.9197648366605493, iteration: 83076
loss: 0.9529067277908325,grad_norm: 0.9999992326025314, iteration: 83077
loss: 1.139526605606079,grad_norm: 0.9999994892484123, iteration: 83078
loss: 1.041170597076416,grad_norm: 0.999999129236958, iteration: 83079
loss: 0.9785922765731812,grad_norm: 0.9999991549190363, iteration: 83080
loss: 1.0023446083068848,grad_norm: 0.9999990381103298, iteration: 83081
loss: 0.965669572353363,grad_norm: 0.9999994214307747, iteration: 83082
loss: 1.0038321018218994,grad_norm: 0.9999990924532858, iteration: 83083
loss: 1.0108965635299683,grad_norm: 0.9580428994500896, iteration: 83084
loss: 0.9853619933128357,grad_norm: 0.9197425678883168, iteration: 83085
loss: 0.9947999119758606,grad_norm: 0.9999992408270615, iteration: 83086
loss: 0.9773906469345093,grad_norm: 0.9999992243663312, iteration: 83087
loss: 1.0050880908966064,grad_norm: 0.9999991159312963, iteration: 83088
loss: 0.9796268939971924,grad_norm: 0.9999990859330005, iteration: 83089
loss: 0.9856422543525696,grad_norm: 0.9752051821550155, iteration: 83090
loss: 0.9842784404754639,grad_norm: 0.9999989628865805, iteration: 83091
loss: 1.0048021078109741,grad_norm: 0.9999991400451387, iteration: 83092
loss: 1.0034277439117432,grad_norm: 0.9863951726681235, iteration: 83093
loss: 1.0081170797348022,grad_norm: 0.9999991460045018, iteration: 83094
loss: 0.9676547050476074,grad_norm: 0.9999991264669417, iteration: 83095
loss: 0.992098867893219,grad_norm: 0.9999989335887771, iteration: 83096
loss: 1.0277161598205566,grad_norm: 0.9999991151079634, iteration: 83097
loss: 0.9953297972679138,grad_norm: 0.9999989328816274, iteration: 83098
loss: 0.9490903615951538,grad_norm: 0.999998940395872, iteration: 83099
loss: 1.0168546438217163,grad_norm: 0.9999998713261166, iteration: 83100
loss: 0.999101996421814,grad_norm: 0.9999991767795955, iteration: 83101
loss: 0.9931665062904358,grad_norm: 0.9999991407950012, iteration: 83102
loss: 1.0074406862258911,grad_norm: 0.9999991972670231, iteration: 83103
loss: 0.9912126064300537,grad_norm: 0.9999991026038904, iteration: 83104
loss: 1.011771559715271,grad_norm: 0.9999993077594166, iteration: 83105
loss: 0.9802528023719788,grad_norm: 0.9999991227709716, iteration: 83106
loss: 0.9983167052268982,grad_norm: 0.9490219714736265, iteration: 83107
loss: 0.9803550243377686,grad_norm: 0.9999990353334826, iteration: 83108
loss: 1.0179469585418701,grad_norm: 0.9128960896781587, iteration: 83109
loss: 1.022433876991272,grad_norm: 0.9328865069412086, iteration: 83110
loss: 1.0165349245071411,grad_norm: 0.9999991311044352, iteration: 83111
loss: 0.9794344902038574,grad_norm: 0.9999991321313401, iteration: 83112
loss: 0.9859587550163269,grad_norm: 0.9999992252073303, iteration: 83113
loss: 1.0106451511383057,grad_norm: 0.9999991764926162, iteration: 83114
loss: 1.008181095123291,grad_norm: 0.9999992607573507, iteration: 83115
loss: 1.0084961652755737,grad_norm: 0.9999992031233981, iteration: 83116
loss: 0.9985732436180115,grad_norm: 0.9720401640412787, iteration: 83117
loss: 1.0202821493148804,grad_norm: 0.9956776668135735, iteration: 83118
loss: 1.0182896852493286,grad_norm: 0.9999991626350073, iteration: 83119
loss: 0.9464665055274963,grad_norm: 0.9508162870975229, iteration: 83120
loss: 1.0267313718795776,grad_norm: 0.9999996435631318, iteration: 83121
loss: 1.003010630607605,grad_norm: 0.9999991405447294, iteration: 83122
loss: 1.009477972984314,grad_norm: 0.982047643272974, iteration: 83123
loss: 0.9675784111022949,grad_norm: 0.9846083657504349, iteration: 83124
loss: 0.9828274250030518,grad_norm: 0.9799426826341722, iteration: 83125
loss: 1.0000098943710327,grad_norm: 0.9999991211796956, iteration: 83126
loss: 1.044265866279602,grad_norm: 0.9999992507877554, iteration: 83127
loss: 1.0138404369354248,grad_norm: 0.9951378920040337, iteration: 83128
loss: 0.9913886785507202,grad_norm: 0.9999993099746438, iteration: 83129
loss: 0.9442616701126099,grad_norm: 0.9999992306078783, iteration: 83130
loss: 0.977909505367279,grad_norm: 0.9999991732609377, iteration: 83131
loss: 0.9959731698036194,grad_norm: 0.9999991615535757, iteration: 83132
loss: 1.0230087041854858,grad_norm: 0.9999991942166072, iteration: 83133
loss: 1.0041515827178955,grad_norm: 0.9999989174983562, iteration: 83134
loss: 1.000400424003601,grad_norm: 0.9999991017266575, iteration: 83135
loss: 0.9955700039863586,grad_norm: 0.9072191519953137, iteration: 83136
loss: 1.0131428241729736,grad_norm: 0.999998982686513, iteration: 83137
loss: 1.0571271181106567,grad_norm: 0.99999925585836, iteration: 83138
loss: 1.040468454360962,grad_norm: 0.9860589432341555, iteration: 83139
loss: 1.0336898565292358,grad_norm: 0.9999991119203089, iteration: 83140
loss: 0.9910878539085388,grad_norm: 0.9999990762260192, iteration: 83141
loss: 1.038930058479309,grad_norm: 0.9770622067177747, iteration: 83142
loss: 1.0165280103683472,grad_norm: 0.9999991123177145, iteration: 83143
loss: 1.0011743307113647,grad_norm: 0.9999990277338895, iteration: 83144
loss: 0.9748201966285706,grad_norm: 0.9999991365399609, iteration: 83145
loss: 0.9775908589363098,grad_norm: 0.9408179368376165, iteration: 83146
loss: 1.033862590789795,grad_norm: 0.9999992684081614, iteration: 83147
loss: 1.010358214378357,grad_norm: 0.9999990599356353, iteration: 83148
loss: 1.0092499256134033,grad_norm: 0.977552126410408, iteration: 83149
loss: 0.9954308271408081,grad_norm: 0.9928257018656795, iteration: 83150
loss: 1.0205148458480835,grad_norm: 0.9999992752359573, iteration: 83151
loss: 0.9744184613227844,grad_norm: 0.9020598491014646, iteration: 83152
loss: 1.013005256652832,grad_norm: 0.932064929261863, iteration: 83153
loss: 0.9943288564682007,grad_norm: 0.9999991965563522, iteration: 83154
loss: 1.0074288845062256,grad_norm: 0.9999995874919482, iteration: 83155
loss: 1.0065662860870361,grad_norm: 0.9999994329839654, iteration: 83156
loss: 0.9420874714851379,grad_norm: 0.9822143612418988, iteration: 83157
loss: 1.0311559438705444,grad_norm: 0.9999993877484529, iteration: 83158
loss: 1.0112723112106323,grad_norm: 0.9783000622611024, iteration: 83159
loss: 1.0790815353393555,grad_norm: 0.9999989799624635, iteration: 83160
loss: 1.0358715057373047,grad_norm: 0.9999990020720525, iteration: 83161
loss: 0.9633311033248901,grad_norm: 0.934548371555937, iteration: 83162
loss: 1.0089178085327148,grad_norm: 0.9999990762353875, iteration: 83163
loss: 1.0219398736953735,grad_norm: 0.9938389818087489, iteration: 83164
loss: 1.033754587173462,grad_norm: 0.9999993065882693, iteration: 83165
loss: 0.9921224117279053,grad_norm: 0.9999991952253179, iteration: 83166
loss: 0.9885151386260986,grad_norm: 0.999999154317761, iteration: 83167
loss: 1.0673973560333252,grad_norm: 0.9999999900302879, iteration: 83168
loss: 1.0006855726242065,grad_norm: 0.8512244166780042, iteration: 83169
loss: 0.9828071594238281,grad_norm: 0.9999991009625753, iteration: 83170
loss: 1.0119882822036743,grad_norm: 0.9999990362791611, iteration: 83171
loss: 0.9779129028320312,grad_norm: 0.925642470735191, iteration: 83172
loss: 0.9982227683067322,grad_norm: 0.9999994092366358, iteration: 83173
loss: 1.009360432624817,grad_norm: 0.976334451859974, iteration: 83174
loss: 0.999912679195404,grad_norm: 0.9999993589815253, iteration: 83175
loss: 1.0144764184951782,grad_norm: 0.9999991660895219, iteration: 83176
loss: 1.0030828714370728,grad_norm: 0.9999991862903508, iteration: 83177
loss: 0.9876165986061096,grad_norm: 0.999999139305313, iteration: 83178
loss: 1.0198160409927368,grad_norm: 0.9328902611402111, iteration: 83179
loss: 1.0332915782928467,grad_norm: 0.9999992016403533, iteration: 83180
loss: 1.0199363231658936,grad_norm: 0.9999993699940194, iteration: 83181
loss: 1.0445047616958618,grad_norm: 0.9999991083121232, iteration: 83182
loss: 1.0096104145050049,grad_norm: 0.9999991413368268, iteration: 83183
loss: 0.9838831424713135,grad_norm: 0.9625371952052753, iteration: 83184
loss: 1.001227855682373,grad_norm: 0.999999190286832, iteration: 83185
loss: 1.005659580230713,grad_norm: 0.999999183595054, iteration: 83186
loss: 0.9816632270812988,grad_norm: 0.9999991214047769, iteration: 83187
loss: 1.0059521198272705,grad_norm: 0.9999990646790852, iteration: 83188
loss: 1.0101550817489624,grad_norm: 0.9999989702154466, iteration: 83189
loss: 1.0283498764038086,grad_norm: 0.9999994664346088, iteration: 83190
loss: 1.0055705308914185,grad_norm: 0.9786026316054199, iteration: 83191
loss: 1.0012156963348389,grad_norm: 0.9612049322041086, iteration: 83192
loss: 0.9745813012123108,grad_norm: 0.9999992389177437, iteration: 83193
loss: 1.023844599723816,grad_norm: 0.9999991374403886, iteration: 83194
loss: 0.9845645427703857,grad_norm: 0.9601420103470504, iteration: 83195
loss: 0.9691189527511597,grad_norm: 0.9999989360101423, iteration: 83196
loss: 1.0210624933242798,grad_norm: 0.9999992893647631, iteration: 83197
loss: 1.014196515083313,grad_norm: 0.9961685949208765, iteration: 83198
loss: 0.9935368299484253,grad_norm: 0.9999991035985891, iteration: 83199
loss: 0.9967780113220215,grad_norm: 0.9227889003821843, iteration: 83200
loss: 0.9968722462654114,grad_norm: 0.9999991837123947, iteration: 83201
loss: 1.0052456855773926,grad_norm: 0.9999991902165837, iteration: 83202
loss: 1.0239287614822388,grad_norm: 0.9999991042065517, iteration: 83203
loss: 0.9894838929176331,grad_norm: 0.9386932017677582, iteration: 83204
loss: 1.0711439847946167,grad_norm: 0.9999997173362001, iteration: 83205
loss: 0.9633591771125793,grad_norm: 0.9734369625061791, iteration: 83206
loss: 0.9829893708229065,grad_norm: 0.9999990751558273, iteration: 83207
loss: 0.9970492720603943,grad_norm: 0.955226959744473, iteration: 83208
loss: 0.9918983578681946,grad_norm: 0.9266893643853583, iteration: 83209
loss: 1.0553171634674072,grad_norm: 0.9999991881593158, iteration: 83210
loss: 1.0111113786697388,grad_norm: 0.9999999907853095, iteration: 83211
loss: 0.9893972277641296,grad_norm: 0.9999991551794827, iteration: 83212
loss: 0.9909878969192505,grad_norm: 0.9999998730298661, iteration: 83213
loss: 1.0312020778656006,grad_norm: 0.9934560826763581, iteration: 83214
loss: 1.009699821472168,grad_norm: 0.9692959891955737, iteration: 83215
loss: 1.0006334781646729,grad_norm: 0.9999990685241266, iteration: 83216
loss: 1.0306215286254883,grad_norm: 0.9999993766406663, iteration: 83217
loss: 1.1479885578155518,grad_norm: 0.9999996357258637, iteration: 83218
loss: 0.9943448901176453,grad_norm: 0.9999991976203298, iteration: 83219
loss: 1.0059270858764648,grad_norm: 0.9999991885853786, iteration: 83220
loss: 0.9848476052284241,grad_norm: 0.9999989849821647, iteration: 83221
loss: 0.9936828017234802,grad_norm: 0.9999990524593472, iteration: 83222
loss: 1.0136034488677979,grad_norm: 0.9051693017331917, iteration: 83223
loss: 1.0478944778442383,grad_norm: 0.9877231560016873, iteration: 83224
loss: 1.0133888721466064,grad_norm: 0.9999989343483193, iteration: 83225
loss: 1.001688003540039,grad_norm: 0.9999991734462439, iteration: 83226
loss: 1.0069005489349365,grad_norm: 0.999999275194302, iteration: 83227
loss: 0.9759446382522583,grad_norm: 0.9691854625070825, iteration: 83228
loss: 0.955932080745697,grad_norm: 0.9999991509289611, iteration: 83229
loss: 1.0020660161972046,grad_norm: 0.9536686690037919, iteration: 83230
loss: 0.9963299036026001,grad_norm: 0.9999991614127601, iteration: 83231
loss: 0.985581636428833,grad_norm: 0.8862829365087521, iteration: 83232
loss: 1.020926833152771,grad_norm: 0.9999998695872749, iteration: 83233
loss: 1.028368592262268,grad_norm: 0.9307720939721141, iteration: 83234
loss: 0.9957496523857117,grad_norm: 0.9999989412460484, iteration: 83235
loss: 1.0137507915496826,grad_norm: 0.9633413421824027, iteration: 83236
loss: 0.9970659017562866,grad_norm: 0.9999997387894892, iteration: 83237
loss: 1.007118582725525,grad_norm: 0.9999991951162159, iteration: 83238
loss: 1.0129339694976807,grad_norm: 0.9999991066662566, iteration: 83239
loss: 1.0030437707901,grad_norm: 0.9335651697959441, iteration: 83240
loss: 1.0132434368133545,grad_norm: 0.9999999615415701, iteration: 83241
loss: 1.007567048072815,grad_norm: 0.9999991413828668, iteration: 83242
loss: 1.0261046886444092,grad_norm: 0.9960073540943051, iteration: 83243
loss: 0.982534646987915,grad_norm: 0.9999992294347857, iteration: 83244
loss: 1.002401351928711,grad_norm: 0.9999991417908625, iteration: 83245
loss: 1.0163967609405518,grad_norm: 0.9999992167867826, iteration: 83246
loss: 1.006242036819458,grad_norm: 0.952910919357372, iteration: 83247
loss: 0.9765775203704834,grad_norm: 0.999999010689025, iteration: 83248
loss: 1.033164381980896,grad_norm: 0.9999991490839064, iteration: 83249
loss: 0.9703852534294128,grad_norm: 0.9999992880093849, iteration: 83250
loss: 0.9956845641136169,grad_norm: 0.999999081535171, iteration: 83251
loss: 0.9857291579246521,grad_norm: 0.986137405041037, iteration: 83252
loss: 0.989215075969696,grad_norm: 0.9999992385912408, iteration: 83253
loss: 1.0219424962997437,grad_norm: 0.9999992257788322, iteration: 83254
loss: 0.9970315098762512,grad_norm: 0.9999991289710582, iteration: 83255
loss: 1.0052727460861206,grad_norm: 0.9999992104236678, iteration: 83256
loss: 0.9783160090446472,grad_norm: 0.9986546823318875, iteration: 83257
loss: 0.9964652061462402,grad_norm: 0.9999991988746567, iteration: 83258
loss: 1.032731533050537,grad_norm: 0.999999181955665, iteration: 83259
loss: 1.0182877779006958,grad_norm: 0.9142324678301895, iteration: 83260
loss: 1.0334203243255615,grad_norm: 0.9999989945519498, iteration: 83261
loss: 0.962928056716919,grad_norm: 0.999999225076582, iteration: 83262
loss: 1.022392988204956,grad_norm: 0.999999107027241, iteration: 83263
loss: 0.9876461625099182,grad_norm: 0.9999990810453302, iteration: 83264
loss: 0.982369601726532,grad_norm: 0.9999991678771295, iteration: 83265
loss: 0.9852988719940186,grad_norm: 0.9999992097384176, iteration: 83266
loss: 0.9709646701812744,grad_norm: 0.9999991491445422, iteration: 83267
loss: 1.0056085586547852,grad_norm: 0.9806560538941581, iteration: 83268
loss: 1.007419466972351,grad_norm: 0.9999991715609735, iteration: 83269
loss: 0.9783100485801697,grad_norm: 0.9999992223169716, iteration: 83270
loss: 1.0034760236740112,grad_norm: 0.9999989599015311, iteration: 83271
loss: 1.011904239654541,grad_norm: 0.9065683078824704, iteration: 83272
loss: 1.0225189924240112,grad_norm: 0.9999995306629755, iteration: 83273
loss: 0.9811732769012451,grad_norm: 0.8410768107474549, iteration: 83274
loss: 0.99431973695755,grad_norm: 0.975256355468395, iteration: 83275
loss: 1.0401030778884888,grad_norm: 0.9078767230720116, iteration: 83276
loss: 0.9933967590332031,grad_norm: 0.9420822484272997, iteration: 83277
loss: 0.9837910532951355,grad_norm: 0.9999992063816057, iteration: 83278
loss: 0.9962694048881531,grad_norm: 0.8652749172630813, iteration: 83279
loss: 1.0111942291259766,grad_norm: 0.9269135230532655, iteration: 83280
loss: 1.008818507194519,grad_norm: 0.9999990402178072, iteration: 83281
loss: 0.9832674264907837,grad_norm: 0.9999990336132025, iteration: 83282
loss: 1.0031306743621826,grad_norm: 0.9999989860883668, iteration: 83283
loss: 1.028303623199463,grad_norm: 0.9999990551997698, iteration: 83284
loss: 1.0253632068634033,grad_norm: 0.9999992003898986, iteration: 83285
loss: 0.9917250275611877,grad_norm: 0.8803646487257034, iteration: 83286
loss: 1.0145994424819946,grad_norm: 0.9999992034821228, iteration: 83287
loss: 0.9850125312805176,grad_norm: 0.9999991233808232, iteration: 83288
loss: 1.0045006275177002,grad_norm: 0.9329278957973435, iteration: 83289
loss: 1.0256882905960083,grad_norm: 0.9691008335398579, iteration: 83290
loss: 1.0101081132888794,grad_norm: 0.8976000984146137, iteration: 83291
loss: 0.9802554249763489,grad_norm: 0.9349824988573828, iteration: 83292
loss: 1.0311225652694702,grad_norm: 0.9999992828449639, iteration: 83293
loss: 0.9769492149353027,grad_norm: 0.9774127982665298, iteration: 83294
loss: 1.032369613647461,grad_norm: 0.9999992616689711, iteration: 83295
loss: 1.0080832242965698,grad_norm: 0.9999992228042747, iteration: 83296
loss: 1.0153135061264038,grad_norm: 0.9999993209704412, iteration: 83297
loss: 0.9942020177841187,grad_norm: 0.99999914771594, iteration: 83298
loss: 1.0328106880187988,grad_norm: 0.9999991081472325, iteration: 83299
loss: 1.0501443147659302,grad_norm: 0.9999993335087276, iteration: 83300
loss: 0.9634275436401367,grad_norm: 0.9630524405579911, iteration: 83301
loss: 1.0541629791259766,grad_norm: 0.999999127741846, iteration: 83302
loss: 1.0051233768463135,grad_norm: 0.9999988901522132, iteration: 83303
loss: 1.0251492261886597,grad_norm: 0.8166912405552603, iteration: 83304
loss: 1.0291268825531006,grad_norm: 0.9999991188721011, iteration: 83305
loss: 1.0221370458602905,grad_norm: 0.9726206184639259, iteration: 83306
loss: 0.9955229163169861,grad_norm: 0.9999993257537639, iteration: 83307
loss: 1.0270222425460815,grad_norm: 0.9999991754791241, iteration: 83308
loss: 1.0323054790496826,grad_norm: 0.9999992470213112, iteration: 83309
loss: 0.9922568798065186,grad_norm: 0.977097449345417, iteration: 83310
loss: 1.0082203149795532,grad_norm: 0.9868774126818917, iteration: 83311
loss: 1.021740436553955,grad_norm: 0.9999990460636476, iteration: 83312
loss: 0.9843046069145203,grad_norm: 0.9999989412416588, iteration: 83313
loss: 1.0248968601226807,grad_norm: 0.9999992513876402, iteration: 83314
loss: 0.9515150189399719,grad_norm: 0.9022553503674056, iteration: 83315
loss: 1.0409090518951416,grad_norm: 0.9999990433943476, iteration: 83316
loss: 1.001144528388977,grad_norm: 0.9289735062858288, iteration: 83317
loss: 0.9726102948188782,grad_norm: 0.9999991571451488, iteration: 83318
loss: 0.9654816389083862,grad_norm: 0.9999992743825671, iteration: 83319
loss: 0.987162709236145,grad_norm: 0.9720230039103914, iteration: 83320
loss: 0.9470765590667725,grad_norm: 0.9999988638214545, iteration: 83321
loss: 0.9990052580833435,grad_norm: 0.9999990135401261, iteration: 83322
loss: 1.1738712787628174,grad_norm: 0.9999998081098845, iteration: 83323
loss: 1.0351755619049072,grad_norm: 0.9999990496816753, iteration: 83324
loss: 0.9718939661979675,grad_norm: 0.9999991201527534, iteration: 83325
loss: 1.0334433317184448,grad_norm: 0.9550931627740572, iteration: 83326
loss: 1.0753955841064453,grad_norm: 0.9999992181651668, iteration: 83327
loss: 1.0578885078430176,grad_norm: 0.9999991339943745, iteration: 83328
loss: 1.000562310218811,grad_norm: 0.9999992689615164, iteration: 83329
loss: 0.9922891855239868,grad_norm: 0.9999991409402472, iteration: 83330
loss: 0.9719063639640808,grad_norm: 0.9999993058120551, iteration: 83331
loss: 1.0236139297485352,grad_norm: 0.9619061809731311, iteration: 83332
loss: 0.9830694198608398,grad_norm: 0.9999991037535682, iteration: 83333
loss: 0.9814823865890503,grad_norm: 0.9999991397826945, iteration: 83334
loss: 0.9962725043296814,grad_norm: 0.9534866169441459, iteration: 83335
loss: 1.0048362016677856,grad_norm: 0.9999992404058348, iteration: 83336
loss: 0.9822227954864502,grad_norm: 0.9999991386943249, iteration: 83337
loss: 0.9858067631721497,grad_norm: 0.8562526415522368, iteration: 83338
loss: 0.9886400699615479,grad_norm: 0.9999991602579318, iteration: 83339
loss: 1.0185617208480835,grad_norm: 0.9999989800427671, iteration: 83340
loss: 0.9941931366920471,grad_norm: 0.9999989889946155, iteration: 83341
loss: 1.009734034538269,grad_norm: 0.9999989528878767, iteration: 83342
loss: 0.9987555742263794,grad_norm: 0.98123394857326, iteration: 83343
loss: 1.0270631313323975,grad_norm: 0.9999996657916371, iteration: 83344
loss: 1.007738471031189,grad_norm: 0.99800023694086, iteration: 83345
loss: 0.9740005731582642,grad_norm: 0.8986523252708589, iteration: 83346
loss: 0.9984574317932129,grad_norm: 0.9999991650300155, iteration: 83347
loss: 1.023335337638855,grad_norm: 0.999999203543126, iteration: 83348
loss: 0.9894636869430542,grad_norm: 0.8543769937207265, iteration: 83349
loss: 0.976925253868103,grad_norm: 0.8688634611748843, iteration: 83350
loss: 1.0046942234039307,grad_norm: 0.9999992010658275, iteration: 83351
loss: 0.9927688241004944,grad_norm: 0.9999991591273976, iteration: 83352
loss: 0.9939725995063782,grad_norm: 0.9999990824982606, iteration: 83353
loss: 0.9940588474273682,grad_norm: 0.999999294092241, iteration: 83354
loss: 1.0158045291900635,grad_norm: 0.9999990531526478, iteration: 83355
loss: 0.9862574934959412,grad_norm: 0.9404780343634549, iteration: 83356
loss: 1.0381402969360352,grad_norm: 0.9821847887368086, iteration: 83357
loss: 1.038109540939331,grad_norm: 0.9999990182761942, iteration: 83358
loss: 1.0066547393798828,grad_norm: 0.8435788079883878, iteration: 83359
loss: 1.0226432085037231,grad_norm: 0.9999989725437438, iteration: 83360
loss: 1.0238969326019287,grad_norm: 0.9999997873497465, iteration: 83361
loss: 1.0222318172454834,grad_norm: 0.9999995088102428, iteration: 83362
loss: 1.0115420818328857,grad_norm: 0.9414810113581881, iteration: 83363
loss: 0.9894490242004395,grad_norm: 0.9635780184963678, iteration: 83364
loss: 1.0463982820510864,grad_norm: 0.9999992715907576, iteration: 83365
loss: 1.0221900939941406,grad_norm: 0.9999992274710009, iteration: 83366
loss: 1.0060075521469116,grad_norm: 0.9374612218282932, iteration: 83367
loss: 0.9876077175140381,grad_norm: 0.9921139607246788, iteration: 83368
loss: 0.9736305475234985,grad_norm: 0.999999181742215, iteration: 83369
loss: 0.9852485656738281,grad_norm: 0.9999992721490023, iteration: 83370
loss: 0.9967650771141052,grad_norm: 0.999999135442118, iteration: 83371
loss: 0.9945598840713501,grad_norm: 0.9999992971634404, iteration: 83372
loss: 1.0344595909118652,grad_norm: 0.9999990438199866, iteration: 83373
loss: 1.0432054996490479,grad_norm: 0.9999997338427923, iteration: 83374
loss: 1.0122548341751099,grad_norm: 0.9999991330969341, iteration: 83375
loss: 1.0181533098220825,grad_norm: 0.9999991803556636, iteration: 83376
loss: 0.9951996803283691,grad_norm: 0.8710795621745661, iteration: 83377
loss: 0.9688007831573486,grad_norm: 0.9999989717605776, iteration: 83378
loss: 0.9729380011558533,grad_norm: 0.9999991572550483, iteration: 83379
loss: 0.9898548722267151,grad_norm: 0.9538016282983283, iteration: 83380
loss: 1.0044240951538086,grad_norm: 0.9454023890278455, iteration: 83381
loss: 0.990440845489502,grad_norm: 0.9139280980398442, iteration: 83382
loss: 1.024099349975586,grad_norm: 0.999999005091809, iteration: 83383
loss: 1.00320565700531,grad_norm: 0.9290524613033356, iteration: 83384
loss: 0.9588638544082642,grad_norm: 0.9908171059289042, iteration: 83385
loss: 1.0235543251037598,grad_norm: 0.9999992734762515, iteration: 83386
loss: 1.0279605388641357,grad_norm: 0.897425537783294, iteration: 83387
loss: 1.0183699131011963,grad_norm: 0.9999991992233817, iteration: 83388
loss: 1.0356990098953247,grad_norm: 0.999999167117755, iteration: 83389
loss: 1.0257543325424194,grad_norm: 0.9961581486334317, iteration: 83390
loss: 0.9792380332946777,grad_norm: 0.9999992513331247, iteration: 83391
loss: 0.9854617118835449,grad_norm: 0.9002585033677654, iteration: 83392
loss: 1.025759220123291,grad_norm: 0.9999991934840554, iteration: 83393
loss: 0.9903857707977295,grad_norm: 0.9999990829831854, iteration: 83394
loss: 1.0051777362823486,grad_norm: 0.9999992632798327, iteration: 83395
loss: 1.0194350481033325,grad_norm: 0.9999991249352934, iteration: 83396
loss: 1.0177788734436035,grad_norm: 0.9999990367918926, iteration: 83397
loss: 0.9979828000068665,grad_norm: 0.9999990696750034, iteration: 83398
loss: 1.0236294269561768,grad_norm: 0.9999990125445649, iteration: 83399
loss: 0.9869979619979858,grad_norm: 0.9919863685131048, iteration: 83400
loss: 1.0778025388717651,grad_norm: 0.9999998008973724, iteration: 83401
loss: 0.9634180665016174,grad_norm: 0.9999991253200098, iteration: 83402
loss: 1.007834792137146,grad_norm: 0.9999993360888245, iteration: 83403
loss: 0.9946542978286743,grad_norm: 0.9710543283207852, iteration: 83404
loss: 0.9923356771469116,grad_norm: 0.9999990287461706, iteration: 83405
loss: 0.9781169891357422,grad_norm: 0.9999991763117688, iteration: 83406
loss: 0.9848812222480774,grad_norm: 0.9999990993569272, iteration: 83407
loss: 1.0029189586639404,grad_norm: 0.9999990527960783, iteration: 83408
loss: 1.0171021223068237,grad_norm: 0.9999994142895104, iteration: 83409
loss: 1.091423749923706,grad_norm: 0.9999996573434593, iteration: 83410
loss: 0.9936273694038391,grad_norm: 0.9999991507194016, iteration: 83411
loss: 0.9977018237113953,grad_norm: 0.9999991856916336, iteration: 83412
loss: 0.9887508153915405,grad_norm: 0.9999991189346707, iteration: 83413
loss: 1.0066728591918945,grad_norm: 0.9999992125469767, iteration: 83414
loss: 0.9820060133934021,grad_norm: 0.96518830083725, iteration: 83415
loss: 1.008489966392517,grad_norm: 0.999999015808962, iteration: 83416
loss: 0.9571519494056702,grad_norm: 0.8584202185878886, iteration: 83417
loss: 0.9951070547103882,grad_norm: 0.9745632354905042, iteration: 83418
loss: 1.0090323686599731,grad_norm: 0.9999992051351866, iteration: 83419
loss: 1.01115882396698,grad_norm: 0.9886605776753309, iteration: 83420
loss: 1.0288828611373901,grad_norm: 0.9999991498713989, iteration: 83421
loss: 1.001983404159546,grad_norm: 0.9999991659459267, iteration: 83422
loss: 0.9908409714698792,grad_norm: 0.913593888416728, iteration: 83423
loss: 0.9931938648223877,grad_norm: 0.9999992350751689, iteration: 83424
loss: 1.0032416582107544,grad_norm: 0.982057635498647, iteration: 83425
loss: 1.0175375938415527,grad_norm: 0.9704486832576185, iteration: 83426
loss: 0.9909992218017578,grad_norm: 0.9999992621675966, iteration: 83427
loss: 1.0014662742614746,grad_norm: 0.9999991030855889, iteration: 83428
loss: 0.979707658290863,grad_norm: 0.9012402810126822, iteration: 83429
loss: 1.0177576541900635,grad_norm: 0.9977647460823414, iteration: 83430
loss: 0.9946411848068237,grad_norm: 0.9999991036195304, iteration: 83431
loss: 1.0145149230957031,grad_norm: 0.9999991597657524, iteration: 83432
loss: 1.0115512609481812,grad_norm: 0.9999991280303854, iteration: 83433
loss: 1.0039736032485962,grad_norm: 0.9999991807515782, iteration: 83434
loss: 1.0135058164596558,grad_norm: 0.9999991049433394, iteration: 83435
loss: 0.9616873860359192,grad_norm: 0.9766668988794609, iteration: 83436
loss: 0.9784586429595947,grad_norm: 0.9999992016204002, iteration: 83437
loss: 0.9823179841041565,grad_norm: 0.9999992348128668, iteration: 83438
loss: 0.9891985058784485,grad_norm: 0.9999992089104265, iteration: 83439
loss: 0.9905941486358643,grad_norm: 0.9999992215625446, iteration: 83440
loss: 1.0124900341033936,grad_norm: 0.9999989265736892, iteration: 83441
loss: 0.9931315779685974,grad_norm: 0.9999989841274295, iteration: 83442
loss: 1.0702728033065796,grad_norm: 0.9999995085274699, iteration: 83443
loss: 1.0168876647949219,grad_norm: 0.9999991607620476, iteration: 83444
loss: 1.0295264720916748,grad_norm: 0.8996075200761595, iteration: 83445
loss: 0.9709341526031494,grad_norm: 0.9999991353705395, iteration: 83446
loss: 1.0539593696594238,grad_norm: 0.9999998035280412, iteration: 83447
loss: 0.9426977038383484,grad_norm: 0.9999990345769302, iteration: 83448
loss: 0.9890452027320862,grad_norm: 0.9999990707071305, iteration: 83449
loss: 1.004045009613037,grad_norm: 0.958968631921693, iteration: 83450
loss: 0.9884940981864929,grad_norm: 0.7694775430934283, iteration: 83451
loss: 1.0392082929611206,grad_norm: 0.9999995008975273, iteration: 83452
loss: 0.979066789150238,grad_norm: 0.8450823131921479, iteration: 83453
loss: 0.9957759976387024,grad_norm: 0.9115886456989841, iteration: 83454
loss: 1.0054391622543335,grad_norm: 0.9842516078436122, iteration: 83455
loss: 1.0249853134155273,grad_norm: 0.9999991364724339, iteration: 83456
loss: 1.0215976238250732,grad_norm: 0.9999996830860103, iteration: 83457
loss: 1.009186029434204,grad_norm: 0.9999991710039231, iteration: 83458
loss: 0.9738519787788391,grad_norm: 0.9362442126129548, iteration: 83459
loss: 1.0126267671585083,grad_norm: 0.9382597152497811, iteration: 83460
loss: 1.029079556465149,grad_norm: 0.927858956328686, iteration: 83461
loss: 1.0115063190460205,grad_norm: 0.9999994849527153, iteration: 83462
loss: 1.00879967212677,grad_norm: 0.9999995822243852, iteration: 83463
loss: 0.98470538854599,grad_norm: 0.99999927934506, iteration: 83464
loss: 1.0317103862762451,grad_norm: 0.9999994518581814, iteration: 83465
loss: 1.0525505542755127,grad_norm: 0.999999209481504, iteration: 83466
loss: 0.9928320646286011,grad_norm: 0.9999992772272387, iteration: 83467
loss: 1.0310471057891846,grad_norm: 0.9999991301363046, iteration: 83468
loss: 1.015343427658081,grad_norm: 0.9999989231076051, iteration: 83469
loss: 0.9944726228713989,grad_norm: 0.9999991098922326, iteration: 83470
loss: 1.141549825668335,grad_norm: 0.9999992887435939, iteration: 83471
loss: 1.009454369544983,grad_norm: 0.9999991168797556, iteration: 83472
loss: 1.0034019947052002,grad_norm: 0.9898902106008274, iteration: 83473
loss: 0.9761337041854858,grad_norm: 0.9999992924785174, iteration: 83474
loss: 0.9701666831970215,grad_norm: 0.999999051888284, iteration: 83475
loss: 0.9691029787063599,grad_norm: 0.9999991062066602, iteration: 83476
loss: 1.002519130706787,grad_norm: 0.99999919970066, iteration: 83477
loss: 1.0158374309539795,grad_norm: 0.9999991848707366, iteration: 83478
loss: 0.9996537566184998,grad_norm: 0.9999992588039661, iteration: 83479
loss: 0.9697753190994263,grad_norm: 0.9892947012856642, iteration: 83480
loss: 0.98992919921875,grad_norm: 0.9378630394105967, iteration: 83481
loss: 1.0003916025161743,grad_norm: 0.7935697188318273, iteration: 83482
loss: 1.004050850868225,grad_norm: 0.9999988621055795, iteration: 83483
loss: 1.0273101329803467,grad_norm: 0.9999992796696343, iteration: 83484
loss: 1.0504900217056274,grad_norm: 0.999999090016129, iteration: 83485
loss: 1.0236934423446655,grad_norm: 0.999998909054044, iteration: 83486
loss: 1.0307021141052246,grad_norm: 0.9999991266903794, iteration: 83487
loss: 1.0772169828414917,grad_norm: 0.9999995844839615, iteration: 83488
loss: 0.9889988899230957,grad_norm: 0.9999994171767609, iteration: 83489
loss: 1.003731369972229,grad_norm: 0.9999991445327413, iteration: 83490
loss: 0.9933707118034363,grad_norm: 0.9966248631574542, iteration: 83491
loss: 0.9982077479362488,grad_norm: 0.999999124779289, iteration: 83492
loss: 1.046866774559021,grad_norm: 0.9999994087843718, iteration: 83493
loss: 1.0316170454025269,grad_norm: 0.9999995866356379, iteration: 83494
loss: 1.0140568017959595,grad_norm: 0.999998986717304, iteration: 83495
loss: 1.0041239261627197,grad_norm: 0.9999991107261066, iteration: 83496
loss: 0.9959158897399902,grad_norm: 0.999999137116204, iteration: 83497
loss: 0.9995626211166382,grad_norm: 0.8737533339236186, iteration: 83498
loss: 1.0133389234542847,grad_norm: 0.9999992884359462, iteration: 83499
loss: 0.9896673560142517,grad_norm: 0.9999990977212257, iteration: 83500
loss: 1.0533833503723145,grad_norm: 0.9999998310396019, iteration: 83501
loss: 0.9490997195243835,grad_norm: 0.9999992014591079, iteration: 83502
loss: 0.967688262462616,grad_norm: 0.9999991618385848, iteration: 83503
loss: 0.9767647385597229,grad_norm: 0.9999990484930887, iteration: 83504
loss: 0.9830906391143799,grad_norm: 0.9642303222325264, iteration: 83505
loss: 0.9698208570480347,grad_norm: 0.9985520913961655, iteration: 83506
loss: 0.998677670955658,grad_norm: 0.9999990434423977, iteration: 83507
loss: 0.9766731858253479,grad_norm: 0.9848340382438148, iteration: 83508
loss: 0.9857232570648193,grad_norm: 0.9999991847176897, iteration: 83509
loss: 1.0204157829284668,grad_norm: 0.9999992900479489, iteration: 83510
loss: 1.0161364078521729,grad_norm: 0.9445362814201478, iteration: 83511
loss: 1.0070428848266602,grad_norm: 0.9999991293883834, iteration: 83512
loss: 0.999484121799469,grad_norm: 0.9999990280916377, iteration: 83513
loss: 0.9758066534996033,grad_norm: 0.9999990392198792, iteration: 83514
loss: 1.0124434232711792,grad_norm: 0.9999991075328223, iteration: 83515
loss: 1.0245287418365479,grad_norm: 0.9940630105883762, iteration: 83516
loss: 1.0039454698562622,grad_norm: 0.8537897210885347, iteration: 83517
loss: 0.9872581958770752,grad_norm: 0.9999989317115776, iteration: 83518
loss: 0.9474035501480103,grad_norm: 0.962031543330829, iteration: 83519
loss: 0.9950648546218872,grad_norm: 0.9999990461917636, iteration: 83520
loss: 0.99317467212677,grad_norm: 0.9798886234699569, iteration: 83521
loss: 1.0255897045135498,grad_norm: 0.9999991513988121, iteration: 83522
loss: 0.9900245666503906,grad_norm: 0.9999992426133253, iteration: 83523
loss: 0.9796704053878784,grad_norm: 0.8330224154791035, iteration: 83524
loss: 1.0291154384613037,grad_norm: 0.999999284988335, iteration: 83525
loss: 1.009472131729126,grad_norm: 0.9999989819349043, iteration: 83526
loss: 0.9941380620002747,grad_norm: 0.9999992494461022, iteration: 83527
loss: 1.0085091590881348,grad_norm: 0.9077909974934565, iteration: 83528
loss: 1.0106947422027588,grad_norm: 0.9999990589997194, iteration: 83529
loss: 0.9952722787857056,grad_norm: 0.9999991080858488, iteration: 83530
loss: 1.000656008720398,grad_norm: 0.9314587848630338, iteration: 83531
loss: 1.0294538736343384,grad_norm: 0.9681511776475356, iteration: 83532
loss: 1.0445713996887207,grad_norm: 0.9999991760412638, iteration: 83533
loss: 1.0132373571395874,grad_norm: 0.9424940526911546, iteration: 83534
loss: 1.0059128999710083,grad_norm: 0.9999992247599148, iteration: 83535
loss: 1.0208719968795776,grad_norm: 0.9999991455475381, iteration: 83536
loss: 0.9972871541976929,grad_norm: 0.9999991278208255, iteration: 83537
loss: 0.9552690386772156,grad_norm: 0.954784172547393, iteration: 83538
loss: 1.0195766687393188,grad_norm: 0.9999991323640531, iteration: 83539
loss: 0.996299147605896,grad_norm: 0.9999992691415268, iteration: 83540
loss: 0.998052179813385,grad_norm: 0.9999999117463123, iteration: 83541
loss: 1.0171250104904175,grad_norm: 0.9371840722129929, iteration: 83542
loss: 0.9978369474411011,grad_norm: 0.9999991371056827, iteration: 83543
loss: 0.9987238645553589,grad_norm: 0.9219984347866516, iteration: 83544
loss: 1.0719068050384521,grad_norm: 0.9999995851703698, iteration: 83545
loss: 1.0183604955673218,grad_norm: 0.9999990669427046, iteration: 83546
loss: 0.9680355787277222,grad_norm: 0.9999991309636228, iteration: 83547
loss: 1.0011242628097534,grad_norm: 0.9970227784399599, iteration: 83548
loss: 1.0047850608825684,grad_norm: 0.9999996548365722, iteration: 83549
loss: 0.985921323299408,grad_norm: 0.9999991324755855, iteration: 83550
loss: 0.9756556749343872,grad_norm: 0.8456460081778812, iteration: 83551
loss: 1.0029646158218384,grad_norm: 0.9090769502239655, iteration: 83552
loss: 1.0448952913284302,grad_norm: 0.9999992010905375, iteration: 83553
loss: 0.9833558797836304,grad_norm: 0.9999995180249842, iteration: 83554
loss: 0.9988440275192261,grad_norm: 0.9286697935347642, iteration: 83555
loss: 0.9915408492088318,grad_norm: 0.999999270432511, iteration: 83556
loss: 1.0027515888214111,grad_norm: 0.9999990316480629, iteration: 83557
loss: 1.001777172088623,grad_norm: 0.9999990690219567, iteration: 83558
loss: 0.9960257411003113,grad_norm: 0.9999994077107869, iteration: 83559
loss: 0.9980858564376831,grad_norm: 0.9999991366002425, iteration: 83560
loss: 0.991155743598938,grad_norm: 0.9999990952046374, iteration: 83561
loss: 1.0264458656311035,grad_norm: 0.9999992610170799, iteration: 83562
loss: 1.017637014389038,grad_norm: 0.9999990886760634, iteration: 83563
loss: 1.0308889150619507,grad_norm: 0.9926650149668682, iteration: 83564
loss: 1.0299067497253418,grad_norm: 0.9999993356717498, iteration: 83565
loss: 1.0413880348205566,grad_norm: 0.935877960553065, iteration: 83566
loss: 0.9955825209617615,grad_norm: 0.9999991577679902, iteration: 83567
loss: 0.985024631023407,grad_norm: 0.9999991701998665, iteration: 83568
loss: 0.9978272914886475,grad_norm: 0.9293127364914433, iteration: 83569
loss: 1.0276833772659302,grad_norm: 0.9999990750199751, iteration: 83570
loss: 0.9893465042114258,grad_norm: 0.9999992894946773, iteration: 83571
loss: 0.94898521900177,grad_norm: 0.9999992157639239, iteration: 83572
loss: 1.0205113887786865,grad_norm: 0.9999990637257683, iteration: 83573
loss: 1.0352250337600708,grad_norm: 0.9999990527151967, iteration: 83574
loss: 0.9960095286369324,grad_norm: 0.9999990645024598, iteration: 83575
loss: 0.9862540364265442,grad_norm: 0.9637458509895336, iteration: 83576
loss: 1.007448673248291,grad_norm: 0.9139301525319186, iteration: 83577
loss: 1.004717469215393,grad_norm: 0.999999171905969, iteration: 83578
loss: 0.9820477962493896,grad_norm: 0.9999990456557846, iteration: 83579
loss: 0.9865734577178955,grad_norm: 0.9999991187930725, iteration: 83580
loss: 0.9841187000274658,grad_norm: 0.9199371038200226, iteration: 83581
loss: 0.9992277026176453,grad_norm: 0.9999990874809964, iteration: 83582
loss: 1.000421404838562,grad_norm: 0.9713432195480843, iteration: 83583
loss: 1.0245712995529175,grad_norm: 0.9725811249272567, iteration: 83584
loss: 0.9821761846542358,grad_norm: 0.9999993045561643, iteration: 83585
loss: 0.9831005930900574,grad_norm: 0.9016683280265134, iteration: 83586
loss: 1.0301482677459717,grad_norm: 0.9429776930681639, iteration: 83587
loss: 0.9970979690551758,grad_norm: 0.8551815937740933, iteration: 83588
loss: 0.9993546605110168,grad_norm: 0.9027383443300363, iteration: 83589
loss: 1.0253034830093384,grad_norm: 0.9774140715236815, iteration: 83590
loss: 0.9713501334190369,grad_norm: 0.9986519880233934, iteration: 83591
loss: 0.9538201093673706,grad_norm: 0.9999990925061331, iteration: 83592
loss: 1.0069886445999146,grad_norm: 0.9642331192344684, iteration: 83593
loss: 1.0031033754348755,grad_norm: 0.9999992009711312, iteration: 83594
loss: 1.0064735412597656,grad_norm: 0.9999990903539372, iteration: 83595
loss: 1.0072078704833984,grad_norm: 0.8671607071321086, iteration: 83596
loss: 1.013127088546753,grad_norm: 0.9999991138684318, iteration: 83597
loss: 1.0017552375793457,grad_norm: 0.9999992973570417, iteration: 83598
loss: 0.9908968806266785,grad_norm: 0.9450399984160478, iteration: 83599
loss: 1.0173022747039795,grad_norm: 0.8818613030382731, iteration: 83600
loss: 1.0238624811172485,grad_norm: 0.9313999320255724, iteration: 83601
loss: 1.0478708744049072,grad_norm: 0.9999991386102465, iteration: 83602
loss: 1.0304979085922241,grad_norm: 0.9999994871110588, iteration: 83603
loss: 0.992529034614563,grad_norm: 0.9834117191627062, iteration: 83604
loss: 1.0459555387496948,grad_norm: 0.9782820944272829, iteration: 83605
loss: 1.0051823854446411,grad_norm: 0.9999992253316249, iteration: 83606
loss: 1.0449634790420532,grad_norm: 0.999999292994487, iteration: 83607
loss: 1.0338557958602905,grad_norm: 0.919642475453171, iteration: 83608
loss: 1.0023802518844604,grad_norm: 0.9999990723886713, iteration: 83609
loss: 1.0933465957641602,grad_norm: 0.9999998650184171, iteration: 83610
loss: 1.0666229724884033,grad_norm: 0.999999373861421, iteration: 83611
loss: 0.9954264163970947,grad_norm: 0.999999764238714, iteration: 83612
loss: 1.0266729593276978,grad_norm: 0.9999991625889874, iteration: 83613
loss: 0.9909402132034302,grad_norm: 0.9999999553395201, iteration: 83614
loss: 1.0155459642410278,grad_norm: 0.9999992246908589, iteration: 83615
loss: 1.0017459392547607,grad_norm: 0.9999991658390008, iteration: 83616
loss: 0.9914999604225159,grad_norm: 0.8432262303771764, iteration: 83617
loss: 1.0031813383102417,grad_norm: 0.9999991956864165, iteration: 83618
loss: 1.0127695798873901,grad_norm: 0.9999991534871675, iteration: 83619
loss: 0.96523118019104,grad_norm: 0.9999991275063413, iteration: 83620
loss: 1.0083374977111816,grad_norm: 0.9999991350822648, iteration: 83621
loss: 1.0073878765106201,grad_norm: 0.994514261146885, iteration: 83622
loss: 1.0126276016235352,grad_norm: 0.9156426096572403, iteration: 83623
loss: 0.9898317456245422,grad_norm: 0.985629584384784, iteration: 83624
loss: 0.9845925569534302,grad_norm: 0.999999190459343, iteration: 83625
loss: 1.0362483263015747,grad_norm: 0.9999991879673923, iteration: 83626
loss: 1.007252812385559,grad_norm: 0.9999991497662939, iteration: 83627
loss: 1.0351866483688354,grad_norm: 0.9999990698198504, iteration: 83628
loss: 0.9748137593269348,grad_norm: 0.9773397847176131, iteration: 83629
loss: 1.00774085521698,grad_norm: 0.9999992259352398, iteration: 83630
loss: 0.9855324625968933,grad_norm: 0.8957721062868542, iteration: 83631
loss: 0.9832141995429993,grad_norm: 0.9999992466559056, iteration: 83632
loss: 0.9801694750785828,grad_norm: 0.9999992225851916, iteration: 83633
loss: 0.9688133597373962,grad_norm: 0.9999991143710449, iteration: 83634
loss: 0.9956402778625488,grad_norm: 0.9999992047092571, iteration: 83635
loss: 0.9995005130767822,grad_norm: 0.9999991458763052, iteration: 83636
loss: 0.9983223080635071,grad_norm: 0.999999736684164, iteration: 83637
loss: 1.0519815683364868,grad_norm: 0.9999993886109771, iteration: 83638
loss: 1.0155442953109741,grad_norm: 0.9999991487315537, iteration: 83639
loss: 0.9955816864967346,grad_norm: 0.9999992643433572, iteration: 83640
loss: 1.0822793245315552,grad_norm: 0.9999995306775359, iteration: 83641
loss: 1.0233274698257446,grad_norm: 0.9999992641220238, iteration: 83642
loss: 0.9858362674713135,grad_norm: 0.9939209090417944, iteration: 83643
loss: 1.023321270942688,grad_norm: 0.9999992556690236, iteration: 83644
loss: 0.997870922088623,grad_norm: 0.9999994688741284, iteration: 83645
loss: 1.014958143234253,grad_norm: 0.9999990746912659, iteration: 83646
loss: 0.9928232431411743,grad_norm: 0.9999990485637734, iteration: 83647
loss: 0.9951049089431763,grad_norm: 0.9999992784652018, iteration: 83648
loss: 0.9989151954650879,grad_norm: 0.9999990395071913, iteration: 83649
loss: 0.9737151861190796,grad_norm: 0.9999989612278257, iteration: 83650
loss: 1.0164779424667358,grad_norm: 0.9999991597622913, iteration: 83651
loss: 0.9916285872459412,grad_norm: 0.9999992613840613, iteration: 83652
loss: 1.0289452075958252,grad_norm: 0.9731145603483599, iteration: 83653
loss: 1.034312129020691,grad_norm: 0.9999998277892032, iteration: 83654
loss: 1.1671366691589355,grad_norm: 0.9999992972208193, iteration: 83655
loss: 0.9620999097824097,grad_norm: 0.9723179374353731, iteration: 83656
loss: 1.007020354270935,grad_norm: 0.9999991037795274, iteration: 83657
loss: 0.9659010171890259,grad_norm: 0.9698967352607075, iteration: 83658
loss: 1.0348379611968994,grad_norm: 0.9999993538705354, iteration: 83659
loss: 1.056833028793335,grad_norm: 0.9999991116154008, iteration: 83660
loss: 1.0072224140167236,grad_norm: 0.9999991318286687, iteration: 83661
loss: 1.015424132347107,grad_norm: 0.9999989407192807, iteration: 83662
loss: 1.0105090141296387,grad_norm: 0.9918538135333927, iteration: 83663
loss: 1.0108799934387207,grad_norm: 0.9999991877811737, iteration: 83664
loss: 0.9918031692504883,grad_norm: 0.9999991691683001, iteration: 83665
loss: 0.979736328125,grad_norm: 0.9999990771319539, iteration: 83666
loss: 1.0023066997528076,grad_norm: 0.9580633920699501, iteration: 83667
loss: 1.0073076486587524,grad_norm: 0.9999989829881667, iteration: 83668
loss: 0.9863840937614441,grad_norm: 0.999999097272615, iteration: 83669
loss: 0.9954981207847595,grad_norm: 0.9999989404364749, iteration: 83670
loss: 1.0619583129882812,grad_norm: 0.9999994618213025, iteration: 83671
loss: 1.021316409111023,grad_norm: 0.9999992065203872, iteration: 83672
loss: 1.0100374221801758,grad_norm: 0.9999992199470273, iteration: 83673
loss: 1.0030778646469116,grad_norm: 0.9999992357251074, iteration: 83674
loss: 1.0135271549224854,grad_norm: 0.9999989454569223, iteration: 83675
loss: 0.9940197467803955,grad_norm: 0.929697787936217, iteration: 83676
loss: 0.9783110022544861,grad_norm: 0.9999990548809957, iteration: 83677
loss: 1.0085065364837646,grad_norm: 0.84515546937626, iteration: 83678
loss: 1.0023796558380127,grad_norm: 0.9905947580795667, iteration: 83679
loss: 1.0383394956588745,grad_norm: 0.9999990705262074, iteration: 83680
loss: 0.9475643038749695,grad_norm: 0.9999988646179638, iteration: 83681
loss: 1.001996636390686,grad_norm: 0.9999990851919367, iteration: 83682
loss: 0.9709311723709106,grad_norm: 0.7898229569941043, iteration: 83683
loss: 0.9972842335700989,grad_norm: 0.98077098477707, iteration: 83684
loss: 0.9907925128936768,grad_norm: 0.9999998495811235, iteration: 83685
loss: 0.9930505752563477,grad_norm: 0.9999991252533499, iteration: 83686
loss: 0.9750841856002808,grad_norm: 0.7828363674109603, iteration: 83687
loss: 0.9915059208869934,grad_norm: 0.9999994371444865, iteration: 83688
loss: 1.0025759935379028,grad_norm: 0.9999991264400274, iteration: 83689
loss: 1.0225825309753418,grad_norm: 0.9999991016689818, iteration: 83690
loss: 1.0299618244171143,grad_norm: 0.9999990643127108, iteration: 83691
loss: 1.0133609771728516,grad_norm: 0.9999991575421862, iteration: 83692
loss: 0.9844769239425659,grad_norm: 0.9999992127351827, iteration: 83693
loss: 0.9889330267906189,grad_norm: 0.9999990933181829, iteration: 83694
loss: 1.0212645530700684,grad_norm: 0.9447443961834402, iteration: 83695
loss: 0.9944505095481873,grad_norm: 0.9999992238861996, iteration: 83696
loss: 1.0320557355880737,grad_norm: 0.9999993965608764, iteration: 83697
loss: 1.0050674676895142,grad_norm: 0.999999243433425, iteration: 83698
loss: 0.9973722100257874,grad_norm: 0.9999992715326329, iteration: 83699
loss: 0.9744839668273926,grad_norm: 0.9999990540818974, iteration: 83700
loss: 0.9438009858131409,grad_norm: 0.9999990001854443, iteration: 83701
loss: 1.00667142868042,grad_norm: 0.9148977370738527, iteration: 83702
loss: 1.0223205089569092,grad_norm: 0.9999991945628834, iteration: 83703
loss: 1.0219918489456177,grad_norm: 0.9892397181292953, iteration: 83704
loss: 1.0957599878311157,grad_norm: 0.9999993638049077, iteration: 83705
loss: 0.9958986043930054,grad_norm: 0.9999992817829111, iteration: 83706
loss: 0.9925500154495239,grad_norm: 0.9999991018840539, iteration: 83707
loss: 1.0046905279159546,grad_norm: 0.9999991379285177, iteration: 83708
loss: 0.9802011251449585,grad_norm: 0.9999993339571743, iteration: 83709
loss: 1.061636209487915,grad_norm: 0.9999997320002241, iteration: 83710
loss: 0.9871705174446106,grad_norm: 0.9999991060184348, iteration: 83711
loss: 0.983487069606781,grad_norm: 0.9765209677499189, iteration: 83712
loss: 0.9941728115081787,grad_norm: 0.9999990039516651, iteration: 83713
loss: 1.0212726593017578,grad_norm: 0.9999992875276595, iteration: 83714
loss: 1.0099282264709473,grad_norm: 0.9999991020340409, iteration: 83715
loss: 0.9857423305511475,grad_norm: 0.9049372101790196, iteration: 83716
loss: 1.0249861478805542,grad_norm: 0.9999991497729969, iteration: 83717
loss: 0.9947870969772339,grad_norm: 0.86989435908638, iteration: 83718
loss: 0.9882069230079651,grad_norm: 1.0000000276186196, iteration: 83719
loss: 1.0460349321365356,grad_norm: 0.9999992877532291, iteration: 83720
loss: 0.9998958110809326,grad_norm: 0.9999990512490539, iteration: 83721
loss: 1.0050311088562012,grad_norm: 0.9999991333305566, iteration: 83722
loss: 0.9625371694564819,grad_norm: 0.9999990372769738, iteration: 83723
loss: 1.0339009761810303,grad_norm: 0.9999991809885239, iteration: 83724
loss: 0.9695510268211365,grad_norm: 0.977006875611198, iteration: 83725
loss: 1.0135078430175781,grad_norm: 0.9999990695392438, iteration: 83726
loss: 1.0449085235595703,grad_norm: 0.9999991740023353, iteration: 83727
loss: 1.0660911798477173,grad_norm: 0.9999994349752402, iteration: 83728
loss: 0.9931318759918213,grad_norm: 0.9999991680296851, iteration: 83729
loss: 0.9784706830978394,grad_norm: 0.9999994629826742, iteration: 83730
loss: 1.2574775218963623,grad_norm: 0.999999262063232, iteration: 83731
loss: 1.001257300376892,grad_norm: 0.9999990993849869, iteration: 83732
loss: 1.0112531185150146,grad_norm: 0.9999991350344127, iteration: 83733
loss: 1.143949031829834,grad_norm: 0.9999991610930297, iteration: 83734
loss: 1.0367432832717896,grad_norm: 0.9999991092166152, iteration: 83735
loss: 0.9990344643592834,grad_norm: 0.9377354674095345, iteration: 83736
loss: 1.0380299091339111,grad_norm: 0.9999992306327021, iteration: 83737
loss: 1.0036866664886475,grad_norm: 0.9790626644259324, iteration: 83738
loss: 1.015893816947937,grad_norm: 0.8947471733820669, iteration: 83739
loss: 0.9959762692451477,grad_norm: 0.9999991640278453, iteration: 83740
loss: 0.9859837293624878,grad_norm: 0.9999991678873579, iteration: 83741
loss: 1.038360595703125,grad_norm: 0.999999354506086, iteration: 83742
loss: 1.0347907543182373,grad_norm: 0.999999108516732, iteration: 83743
loss: 0.9883385300636292,grad_norm: 0.8867119908465939, iteration: 83744
loss: 1.0249511003494263,grad_norm: 0.9999989827339992, iteration: 83745
loss: 0.9971463084220886,grad_norm: 0.9999992096528056, iteration: 83746
loss: 1.0133522748947144,grad_norm: 0.9999991011571971, iteration: 83747
loss: 0.992574155330658,grad_norm: 0.999999234642936, iteration: 83748
loss: 1.0100637674331665,grad_norm: 0.9999992996547953, iteration: 83749
loss: 1.0383120775222778,grad_norm: 0.9677233882555807, iteration: 83750
loss: 1.0054562091827393,grad_norm: 0.9999992318905139, iteration: 83751
loss: 1.0536433458328247,grad_norm: 0.9999999179054712, iteration: 83752
loss: 0.9648888111114502,grad_norm: 0.9999992171238914, iteration: 83753
loss: 1.011497139930725,grad_norm: 0.9999994828489621, iteration: 83754
loss: 0.937842071056366,grad_norm: 0.9999991735949111, iteration: 83755
loss: 1.0118085145950317,grad_norm: 0.9999990824996122, iteration: 83756
loss: 1.0102691650390625,grad_norm: 0.9999991664742498, iteration: 83757
loss: 0.9709400534629822,grad_norm: 0.9999989964889188, iteration: 83758
loss: 0.9967181086540222,grad_norm: 0.980447211642955, iteration: 83759
loss: 1.0093681812286377,grad_norm: 0.9999990932336333, iteration: 83760
loss: 1.0061249732971191,grad_norm: 0.9999992360437262, iteration: 83761
loss: 0.9864178895950317,grad_norm: 0.9343828470000082, iteration: 83762
loss: 0.9899699091911316,grad_norm: 0.9999991093876085, iteration: 83763
loss: 0.9846647381782532,grad_norm: 0.9999990862855425, iteration: 83764
loss: 1.0097577571868896,grad_norm: 0.9999990499252301, iteration: 83765
loss: 1.014863133430481,grad_norm: 0.9999992266472668, iteration: 83766
loss: 0.9860196113586426,grad_norm: 0.9999991087538423, iteration: 83767
loss: 1.0106761455535889,grad_norm: 0.9999992049217884, iteration: 83768
loss: 1.0359338521957397,grad_norm: 0.9999994626288581, iteration: 83769
loss: 0.9705812931060791,grad_norm: 0.827722155852077, iteration: 83770
loss: 0.9985054135322571,grad_norm: 0.9999992636715206, iteration: 83771
loss: 1.014318823814392,grad_norm: 0.8806589438351277, iteration: 83772
loss: 1.0114490985870361,grad_norm: 0.9999990931621017, iteration: 83773
loss: 0.9948654770851135,grad_norm: 0.9999991661007827, iteration: 83774
loss: 0.9902743101119995,grad_norm: 0.9999998765006521, iteration: 83775
loss: 0.9881926774978638,grad_norm: 0.9999991948574608, iteration: 83776
loss: 1.026980996131897,grad_norm: 0.9752379923193363, iteration: 83777
loss: 0.9739199280738831,grad_norm: 0.9570615418540731, iteration: 83778
loss: 1.0226181745529175,grad_norm: 0.9999990012446496, iteration: 83779
loss: 0.9672222137451172,grad_norm: 0.9740691652440435, iteration: 83780
loss: 0.9971948266029358,grad_norm: 0.9999994767649485, iteration: 83781
loss: 0.9973470568656921,grad_norm: 0.9999989282260321, iteration: 83782
loss: 1.0043163299560547,grad_norm: 0.9999997882928783, iteration: 83783
loss: 1.0135576725006104,grad_norm: 0.9999992177456412, iteration: 83784
loss: 0.9671120643615723,grad_norm: 0.9999993321052829, iteration: 83785
loss: 0.9653502702713013,grad_norm: 0.999999101664356, iteration: 83786
loss: 0.9965277910232544,grad_norm: 0.9999992175963162, iteration: 83787
loss: 1.0153275728225708,grad_norm: 0.9999991560423503, iteration: 83788
loss: 1.0008840560913086,grad_norm: 0.9999990887054245, iteration: 83789
loss: 0.9755971431732178,grad_norm: 0.917588009505548, iteration: 83790
loss: 1.1656526327133179,grad_norm: 0.9999991888199592, iteration: 83791
loss: 1.0089778900146484,grad_norm: 0.8806400220420566, iteration: 83792
loss: 1.0064553022384644,grad_norm: 0.9999990900253892, iteration: 83793
loss: 1.0314409732818604,grad_norm: 0.9586255264628516, iteration: 83794
loss: 1.0145065784454346,grad_norm: 0.9999992315407451, iteration: 83795
loss: 1.0056345462799072,grad_norm: 0.9999990158034282, iteration: 83796
loss: 1.0385621786117554,grad_norm: 0.9999993792777105, iteration: 83797
loss: 1.0026590824127197,grad_norm: 0.9999993150131496, iteration: 83798
loss: 0.9925917983055115,grad_norm: 0.9999991831072855, iteration: 83799
loss: 1.0047945976257324,grad_norm: 0.9999991357414263, iteration: 83800
loss: 1.1676980257034302,grad_norm: 0.9999996326671651, iteration: 83801
loss: 0.9797897338867188,grad_norm: 0.9898997270379374, iteration: 83802
loss: 0.9930738806724548,grad_norm: 0.9999992417811261, iteration: 83803
loss: 0.9968631863594055,grad_norm: 0.9999992303669399, iteration: 83804
loss: 1.0229841470718384,grad_norm: 0.997337967014438, iteration: 83805
loss: 0.9919242262840271,grad_norm: 0.9640831907410915, iteration: 83806
loss: 1.0037921667099,grad_norm: 0.9223801586721581, iteration: 83807
loss: 1.0767802000045776,grad_norm: 0.9999991998338983, iteration: 83808
loss: 1.0649493932724,grad_norm: 0.9999990436656773, iteration: 83809
loss: 1.0206056833267212,grad_norm: 0.9999989342549759, iteration: 83810
loss: 1.0206749439239502,grad_norm: 0.9999989698114032, iteration: 83811
loss: 0.9749999046325684,grad_norm: 0.9999991487077162, iteration: 83812
loss: 0.991001307964325,grad_norm: 0.9999990636377682, iteration: 83813
loss: 1.0290744304656982,grad_norm: 0.9999992878210017, iteration: 83814
loss: 0.9675383567810059,grad_norm: 0.8774045910677571, iteration: 83815
loss: 1.0434931516647339,grad_norm: 0.9999992202607012, iteration: 83816
loss: 1.0070710182189941,grad_norm: 0.949281170640354, iteration: 83817
loss: 0.9872193932533264,grad_norm: 0.9999990648503442, iteration: 83818
loss: 0.9835122227668762,grad_norm: 0.9999991653873264, iteration: 83819
loss: 0.969940721988678,grad_norm: 0.9999990613829942, iteration: 83820
loss: 0.9813152551651001,grad_norm: 0.9057432048318267, iteration: 83821
loss: 1.0431103706359863,grad_norm: 0.8290359470373715, iteration: 83822
loss: 0.9890838265419006,grad_norm: 0.9999992957262372, iteration: 83823
loss: 1.0151511430740356,grad_norm: 0.9584491387404753, iteration: 83824
loss: 1.0161875486373901,grad_norm: 0.9999992990588548, iteration: 83825
loss: 0.9824436902999878,grad_norm: 0.9768165399213562, iteration: 83826
loss: 1.0209577083587646,grad_norm: 0.9999992449695102, iteration: 83827
loss: 0.9992600083351135,grad_norm: 0.9999990987476133, iteration: 83828
loss: 1.0125713348388672,grad_norm: 0.9999992411800164, iteration: 83829
loss: 1.0264768600463867,grad_norm: 0.9999992688649949, iteration: 83830
loss: 0.9853044152259827,grad_norm: 0.8797675150311002, iteration: 83831
loss: 0.98503178358078,grad_norm: 0.9999991790838203, iteration: 83832
loss: 1.031611680984497,grad_norm: 0.9999990542948239, iteration: 83833
loss: 1.0179904699325562,grad_norm: 0.9999996621545441, iteration: 83834
loss: 1.1020333766937256,grad_norm: 1.0000000442141965, iteration: 83835
loss: 1.0091798305511475,grad_norm: 0.9236065569662701, iteration: 83836
loss: 1.01824951171875,grad_norm: 0.8531385409272241, iteration: 83837
loss: 0.9914354085922241,grad_norm: 0.9999992354682464, iteration: 83838
loss: 0.9923266768455505,grad_norm: 0.9896651134664547, iteration: 83839
loss: 1.0168825387954712,grad_norm: 0.8662213719066985, iteration: 83840
loss: 0.9965020418167114,grad_norm: 0.999999466245528, iteration: 83841
loss: 1.0452219247817993,grad_norm: 0.9742945068407867, iteration: 83842
loss: 1.0086263418197632,grad_norm: 0.9999991219730618, iteration: 83843
loss: 0.976224958896637,grad_norm: 0.9999990622867605, iteration: 83844
loss: 1.1299364566802979,grad_norm: 0.9999993367806005, iteration: 83845
loss: 0.9756662845611572,grad_norm: 0.9999995559898563, iteration: 83846
loss: 1.1798278093338013,grad_norm: 0.9999995941702567, iteration: 83847
loss: 1.154051661491394,grad_norm: 0.9999993212492483, iteration: 83848
loss: 1.0065836906433105,grad_norm: 0.9259350870013955, iteration: 83849
loss: 1.0002988576889038,grad_norm: 0.9999990845375918, iteration: 83850
loss: 1.0477420091629028,grad_norm: 0.9999993494075841, iteration: 83851
loss: 1.1606806516647339,grad_norm: 0.9999992774289443, iteration: 83852
loss: 1.2624258995056152,grad_norm: 0.9999998887552974, iteration: 83853
loss: 0.990753710269928,grad_norm: 0.9999992000194227, iteration: 83854
loss: 1.0103055238723755,grad_norm: 0.9999994797302746, iteration: 83855
loss: 1.0221501588821411,grad_norm: 0.9999991131760285, iteration: 83856
loss: 1.014225721359253,grad_norm: 0.999999165890491, iteration: 83857
loss: 0.9931743741035461,grad_norm: 0.999999372967811, iteration: 83858
loss: 0.9965556263923645,grad_norm: 0.9999991972654378, iteration: 83859
loss: 1.016745924949646,grad_norm: 0.8567189808406585, iteration: 83860
loss: 1.0171704292297363,grad_norm: 0.9999990378012438, iteration: 83861
loss: 1.050665020942688,grad_norm: 0.9999990233633357, iteration: 83862
loss: 1.0115240812301636,grad_norm: 0.999999166809532, iteration: 83863
loss: 0.99130779504776,grad_norm: 0.9655027276446628, iteration: 83864
loss: 0.9718071222305298,grad_norm: 0.9999992114587187, iteration: 83865
loss: 0.9547718167304993,grad_norm: 0.9115504485762433, iteration: 83866
loss: 0.9393697381019592,grad_norm: 0.9999991871160504, iteration: 83867
loss: 1.0281285047531128,grad_norm: 0.9999998118474194, iteration: 83868
loss: 0.9894527792930603,grad_norm: 0.9999990599632257, iteration: 83869
loss: 1.0322085618972778,grad_norm: 0.9837413251466972, iteration: 83870
loss: 0.970338761806488,grad_norm: 0.8898032864953846, iteration: 83871
loss: 1.0771510601043701,grad_norm: 0.9999998157969993, iteration: 83872
loss: 0.9995421171188354,grad_norm: 0.99999912589906, iteration: 83873
loss: 1.0060480833053589,grad_norm: 0.9999990734721533, iteration: 83874
loss: 0.9906136989593506,grad_norm: 0.9787580684773888, iteration: 83875
loss: 1.0123569965362549,grad_norm: 0.9999990960553267, iteration: 83876
loss: 1.0629757642745972,grad_norm: 0.999999140081373, iteration: 83877
loss: 0.9951233267784119,grad_norm: 0.9999989767354739, iteration: 83878
loss: 0.9738561511039734,grad_norm: 0.9999993424645058, iteration: 83879
loss: 0.989468514919281,grad_norm: 0.9999991654060111, iteration: 83880
loss: 1.0284526348114014,grad_norm: 0.9999991839557693, iteration: 83881
loss: 1.0046355724334717,grad_norm: 0.99999918205613, iteration: 83882
loss: 1.024268627166748,grad_norm: 0.9999997674172061, iteration: 83883
loss: 1.010689616203308,grad_norm: 0.999999444831085, iteration: 83884
loss: 1.0058525800704956,grad_norm: 0.9572352481293982, iteration: 83885
loss: 1.0334807634353638,grad_norm: 0.9947828399782295, iteration: 83886
loss: 0.9697434306144714,grad_norm: 0.9999991815440763, iteration: 83887
loss: 1.0895695686340332,grad_norm: 0.999999888431801, iteration: 83888
loss: 1.108715534210205,grad_norm: 0.9999998606373245, iteration: 83889
loss: 1.0128366947174072,grad_norm: 0.9999990813465092, iteration: 83890
loss: 1.0168894529342651,grad_norm: 0.9967087932440682, iteration: 83891
loss: 1.0124847888946533,grad_norm: 0.9999991039948188, iteration: 83892
loss: 0.9975922703742981,grad_norm: 0.8757213361803986, iteration: 83893
loss: 1.0182856321334839,grad_norm: 0.9706097445714867, iteration: 83894
loss: 1.0115077495574951,grad_norm: 0.9660388890137434, iteration: 83895
loss: 0.954682469367981,grad_norm: 0.9999990931602415, iteration: 83896
loss: 1.0178968906402588,grad_norm: 0.9999989404843129, iteration: 83897
loss: 0.9721552729606628,grad_norm: 0.9483000822929024, iteration: 83898
loss: 1.017124056816101,grad_norm: 0.999999293292325, iteration: 83899
loss: 0.9863654971122742,grad_norm: 0.9999992506933498, iteration: 83900
loss: 1.0148279666900635,grad_norm: 0.9999990065816053, iteration: 83901
loss: 1.0014797449111938,grad_norm: 0.9999991939751717, iteration: 83902
loss: 1.0050996541976929,grad_norm: 0.9999991147903412, iteration: 83903
loss: 0.9725343585014343,grad_norm: 0.8968160715632432, iteration: 83904
loss: 0.9944570064544678,grad_norm: 0.9999992265061118, iteration: 83905
loss: 1.0087085962295532,grad_norm: 0.999999155547964, iteration: 83906
loss: 1.174873948097229,grad_norm: 0.9999999265531724, iteration: 83907
loss: 0.9490299224853516,grad_norm: 0.9899956737209342, iteration: 83908
loss: 0.9990590214729309,grad_norm: 0.9999991952826073, iteration: 83909
loss: 1.0345511436462402,grad_norm: 0.9999991099525998, iteration: 83910
loss: 1.0167856216430664,grad_norm: 0.9470026954454409, iteration: 83911
loss: 0.9844008088111877,grad_norm: 0.8939110326360349, iteration: 83912
loss: 0.9752653241157532,grad_norm: 0.9999990801231511, iteration: 83913
loss: 0.9612574577331543,grad_norm: 0.9662658955131195, iteration: 83914
loss: 1.0021657943725586,grad_norm: 0.9999991588467367, iteration: 83915
loss: 0.9841838479042053,grad_norm: 0.9999992328554974, iteration: 83916
loss: 0.9855248332023621,grad_norm: 0.9884778885756108, iteration: 83917
loss: 0.9797707796096802,grad_norm: 0.9241891153797007, iteration: 83918
loss: 0.9999435544013977,grad_norm: 0.7882638398821673, iteration: 83919
loss: 1.0296872854232788,grad_norm: 0.9999991163744738, iteration: 83920
loss: 0.9953462481498718,grad_norm: 0.9748917412884848, iteration: 83921
loss: 0.9667344689369202,grad_norm: 0.9999991259722312, iteration: 83922
loss: 1.0099008083343506,grad_norm: 0.9999991500198164, iteration: 83923
loss: 0.9967188239097595,grad_norm: 0.9999991035603988, iteration: 83924
loss: 1.00147545337677,grad_norm: 0.9999992517085811, iteration: 83925
loss: 1.0071269273757935,grad_norm: 0.9999991936870306, iteration: 83926
loss: 0.9898485541343689,grad_norm: 0.9413247660350056, iteration: 83927
loss: 0.976483941078186,grad_norm: 0.9999992439229305, iteration: 83928
loss: 1.038967490196228,grad_norm: 0.9999997918436633, iteration: 83929
loss: 0.9912099242210388,grad_norm: 0.9999992292366578, iteration: 83930
loss: 1.0070236921310425,grad_norm: 0.9999992123919115, iteration: 83931
loss: 0.9824534058570862,grad_norm: 0.9531504406497925, iteration: 83932
loss: 1.002612590789795,grad_norm: 0.9999989941689401, iteration: 83933
loss: 0.9589414596557617,grad_norm: 0.9999999672624328, iteration: 83934
loss: 0.987252414226532,grad_norm: 0.8804915873438632, iteration: 83935
loss: 1.000559687614441,grad_norm: 0.9999992762477403, iteration: 83936
loss: 1.0380895137786865,grad_norm: 0.9999991205472808, iteration: 83937
loss: 0.9860685467720032,grad_norm: 0.9506409906157601, iteration: 83938
loss: 1.0089598894119263,grad_norm: 0.999999243443733, iteration: 83939
loss: 0.9839613437652588,grad_norm: 0.9999990989090595, iteration: 83940
loss: 0.9939889311790466,grad_norm: 0.9999992285853432, iteration: 83941
loss: 0.9805690050125122,grad_norm: 0.9999989745472085, iteration: 83942
loss: 1.0126025676727295,grad_norm: 0.9112113862500212, iteration: 83943
loss: 0.9879213571548462,grad_norm: 0.9999989990037267, iteration: 83944
loss: 0.9941645264625549,grad_norm: 0.9999992200487764, iteration: 83945
loss: 1.0004560947418213,grad_norm: 0.9999990702459531, iteration: 83946
loss: 1.0180100202560425,grad_norm: 0.9999996035190759, iteration: 83947
loss: 0.9853541851043701,grad_norm: 0.9999990922015705, iteration: 83948
loss: 0.9582548141479492,grad_norm: 0.999999213251223, iteration: 83949
loss: 0.9680101871490479,grad_norm: 0.9999991244320519, iteration: 83950
loss: 1.0218875408172607,grad_norm: 0.9999992368826423, iteration: 83951
loss: 1.0041782855987549,grad_norm: 0.9999990424669425, iteration: 83952
loss: 1.0028769969940186,grad_norm: 0.9999992408588586, iteration: 83953
loss: 0.9651877880096436,grad_norm: 0.9999991229002122, iteration: 83954
loss: 0.9980894923210144,grad_norm: 0.9789878833127662, iteration: 83955
loss: 1.010603427886963,grad_norm: 0.9636895550836019, iteration: 83956
loss: 0.9814344048500061,grad_norm: 0.8427815456594414, iteration: 83957
loss: 1.0045639276504517,grad_norm: 0.9186150452681227, iteration: 83958
loss: 1.021966814994812,grad_norm: 0.9999994662165443, iteration: 83959
loss: 1.0052624940872192,grad_norm: 0.9999991349719844, iteration: 83960
loss: 1.0019935369491577,grad_norm: 0.9999991105664895, iteration: 83961
loss: 1.0168241262435913,grad_norm: 0.9004599718990735, iteration: 83962
loss: 1.0473589897155762,grad_norm: 0.999999091832196, iteration: 83963
loss: 0.9874395728111267,grad_norm: 0.9573334828123392, iteration: 83964
loss: 1.001078486442566,grad_norm: 0.9999990659219508, iteration: 83965
loss: 0.9631726145744324,grad_norm: 0.9999992696673023, iteration: 83966
loss: 1.0071392059326172,grad_norm: 0.9999992231435494, iteration: 83967
loss: 0.9731104969978333,grad_norm: 0.8511501398407622, iteration: 83968
loss: 1.009151577949524,grad_norm: 0.9999990147256675, iteration: 83969
loss: 1.0270097255706787,grad_norm: 0.9999991122806724, iteration: 83970
loss: 0.9751906991004944,grad_norm: 0.9999990625769241, iteration: 83971
loss: 1.007314682006836,grad_norm: 0.9999992443540343, iteration: 83972
loss: 0.9758211374282837,grad_norm: 0.9789551499344489, iteration: 83973
loss: 0.9718651175498962,grad_norm: 0.9766169661346829, iteration: 83974
loss: 1.0423402786254883,grad_norm: 0.9999989877294229, iteration: 83975
loss: 1.1199922561645508,grad_norm: 0.9999998018206374, iteration: 83976
loss: 1.1306251287460327,grad_norm: 0.9999996447260975, iteration: 83977
loss: 0.9854009747505188,grad_norm: 0.9999991476961344, iteration: 83978
loss: 1.0157719850540161,grad_norm: 0.9999990128449587, iteration: 83979
loss: 0.9875065088272095,grad_norm: 0.9999991597489916, iteration: 83980
loss: 0.964989423751831,grad_norm: 0.8960867095484705, iteration: 83981
loss: 1.0285789966583252,grad_norm: 0.9999993193596345, iteration: 83982
loss: 0.9675702452659607,grad_norm: 0.9999991785606405, iteration: 83983
loss: 1.00969398021698,grad_norm: 0.9771426932559955, iteration: 83984
loss: 1.0166051387786865,grad_norm: 0.9999991106078099, iteration: 83985
loss: 1.002265214920044,grad_norm: 0.9999991025100465, iteration: 83986
loss: 1.0135266780853271,grad_norm: 0.9999991064930601, iteration: 83987
loss: 0.9887282252311707,grad_norm: 0.9999989923277688, iteration: 83988
loss: 1.038425087928772,grad_norm: 0.9999990487023139, iteration: 83989
loss: 1.0172555446624756,grad_norm: 0.999999115193932, iteration: 83990
loss: 0.9883065223693848,grad_norm: 0.9999992061657138, iteration: 83991
loss: 1.0050091743469238,grad_norm: 0.9024054438038526, iteration: 83992
loss: 1.0165899991989136,grad_norm: 0.8628513876627681, iteration: 83993
loss: 1.0204209089279175,grad_norm: 0.9999992707486554, iteration: 83994
loss: 0.9938873648643494,grad_norm: 0.9999992087018552, iteration: 83995
loss: 0.9677181243896484,grad_norm: 0.999999109088817, iteration: 83996
loss: 0.9802994728088379,grad_norm: 0.9999989121004423, iteration: 83997
loss: 1.0100758075714111,grad_norm: 0.9999990605792123, iteration: 83998
loss: 0.9935932159423828,grad_norm: 0.9999991466786511, iteration: 83999
loss: 1.0110349655151367,grad_norm: 0.9999991227002141, iteration: 84000
loss: 1.0349801778793335,grad_norm: 0.9999991772359776, iteration: 84001
loss: 0.9925685524940491,grad_norm: 0.9253803130653961, iteration: 84002
loss: 1.0110559463500977,grad_norm: 0.9999991876640387, iteration: 84003
loss: 1.011857032775879,grad_norm: 0.8127663580215789, iteration: 84004
loss: 0.996859073638916,grad_norm: 0.9750817794020041, iteration: 84005
loss: 1.0343024730682373,grad_norm: 0.999999208114978, iteration: 84006
loss: 1.009755253791809,grad_norm: 0.9999991173899737, iteration: 84007
loss: 1.0051634311676025,grad_norm: 0.999999144258669, iteration: 84008
loss: 0.9751042723655701,grad_norm: 0.9497552118135183, iteration: 84009
loss: 0.9924968481063843,grad_norm: 0.9944679309508901, iteration: 84010
loss: 0.9848862290382385,grad_norm: 0.9731191240447223, iteration: 84011
loss: 0.9698509573936462,grad_norm: 0.9999992068451623, iteration: 84012
loss: 1.0202608108520508,grad_norm: 0.9999992458713823, iteration: 84013
loss: 0.9915463328361511,grad_norm: 0.9999990399969053, iteration: 84014
loss: 0.9921742677688599,grad_norm: 0.9999991828907023, iteration: 84015
loss: 0.9892701506614685,grad_norm: 0.9999990406978528, iteration: 84016
loss: 1.0263887643814087,grad_norm: 0.999999100837657, iteration: 84017
loss: 1.0046788454055786,grad_norm: 0.9967055483450082, iteration: 84018
loss: 1.0010709762573242,grad_norm: 0.9999989825285504, iteration: 84019
loss: 0.991240918636322,grad_norm: 0.9999990719732031, iteration: 84020
loss: 0.9846359491348267,grad_norm: 0.9999992297870273, iteration: 84021
loss: 1.025319218635559,grad_norm: 0.9999989744671208, iteration: 84022
loss: 1.0105334520339966,grad_norm: 0.9999989830232933, iteration: 84023
loss: 0.9989455938339233,grad_norm: 0.8126137419927462, iteration: 84024
loss: 1.003402829170227,grad_norm: 0.9317650372804216, iteration: 84025
loss: 0.994419276714325,grad_norm: 0.9999991146140764, iteration: 84026
loss: 0.9973999857902527,grad_norm: 0.8297634765993442, iteration: 84027
loss: 0.9913510084152222,grad_norm: 0.9999991035284768, iteration: 84028
loss: 1.0103248357772827,grad_norm: 0.9999990910740143, iteration: 84029
loss: 1.0119527578353882,grad_norm: 0.9554097106615743, iteration: 84030
loss: 0.9951647520065308,grad_norm: 0.9999991332641816, iteration: 84031
loss: 0.9997633099555969,grad_norm: 0.9999990910774792, iteration: 84032
loss: 0.9891036748886108,grad_norm: 0.9999992127509871, iteration: 84033
loss: 0.9829144477844238,grad_norm: 0.9999991816964332, iteration: 84034
loss: 1.0076990127563477,grad_norm: 0.9999991962562758, iteration: 84035
loss: 1.0037118196487427,grad_norm: 0.9999990899298977, iteration: 84036
loss: 1.0174309015274048,grad_norm: 0.9999993514264607, iteration: 84037
loss: 1.0026214122772217,grad_norm: 0.9999991808768988, iteration: 84038
loss: 1.0234043598175049,grad_norm: 0.9999993319466742, iteration: 84039
loss: 0.9967964291572571,grad_norm: 0.976309313779113, iteration: 84040
loss: 1.0464681386947632,grad_norm: 0.9825185302274401, iteration: 84041
loss: 0.9781326651573181,grad_norm: 0.9400717347766011, iteration: 84042
loss: 1.0199735164642334,grad_norm: 0.9039984003170003, iteration: 84043
loss: 0.9668484926223755,grad_norm: 0.9999991357085088, iteration: 84044
loss: 0.9912189245223999,grad_norm: 0.9999992646004726, iteration: 84045
loss: 1.0197196006774902,grad_norm: 0.9999989684623176, iteration: 84046
loss: 1.0171005725860596,grad_norm: 0.8924626231146847, iteration: 84047
loss: 0.9892618656158447,grad_norm: 0.8475743591118726, iteration: 84048
loss: 0.9995461106300354,grad_norm: 0.9251548816916094, iteration: 84049
loss: 0.9903793334960938,grad_norm: 0.9878183637166732, iteration: 84050
loss: 0.9941915273666382,grad_norm: 0.8868574256766114, iteration: 84051
loss: 0.9964832663536072,grad_norm: 0.8824126571689082, iteration: 84052
loss: 1.037730097770691,grad_norm: 0.9999991169272174, iteration: 84053
loss: 1.011346459388733,grad_norm: 0.9999991925975342, iteration: 84054
loss: 1.0138062238693237,grad_norm: 0.9999991815818122, iteration: 84055
loss: 1.0181787014007568,grad_norm: 0.9999991818174597, iteration: 84056
loss: 0.9780484437942505,grad_norm: 0.983475682595416, iteration: 84057
loss: 0.9870508909225464,grad_norm: 0.9999991937698646, iteration: 84058
loss: 1.0104846954345703,grad_norm: 0.999999196218026, iteration: 84059
loss: 0.9976263642311096,grad_norm: 0.9999991390021156, iteration: 84060
loss: 1.0214476585388184,grad_norm: 0.9999999353558557, iteration: 84061
loss: 0.9641804695129395,grad_norm: 0.8641470192446885, iteration: 84062
loss: 1.005800485610962,grad_norm: 0.9969321920404182, iteration: 84063
loss: 0.976584255695343,grad_norm: 0.9999991259390357, iteration: 84064
loss: 1.0152621269226074,grad_norm: 0.9999991401422165, iteration: 84065
loss: 0.9853301048278809,grad_norm: 0.9999997411452428, iteration: 84066
loss: 1.0154893398284912,grad_norm: 0.8516312383322802, iteration: 84067
loss: 1.0410276651382446,grad_norm: 0.9999989885922533, iteration: 84068
loss: 0.9983299374580383,grad_norm: 0.9985984919764307, iteration: 84069
loss: 0.9968900084495544,grad_norm: 0.8868895119217104, iteration: 84070
loss: 1.0091828107833862,grad_norm: 0.9999992489554517, iteration: 84071
loss: 0.9830473065376282,grad_norm: 0.9294510509591281, iteration: 84072
loss: 1.0087419748306274,grad_norm: 0.9232156074153098, iteration: 84073
loss: 1.0061763525009155,grad_norm: 0.9999990219882305, iteration: 84074
loss: 0.97889244556427,grad_norm: 0.999999218224869, iteration: 84075
loss: 0.9768771529197693,grad_norm: 0.9355216801067185, iteration: 84076
loss: 0.998010516166687,grad_norm: 0.9999991465119821, iteration: 84077
loss: 0.9888780117034912,grad_norm: 0.9999989960160287, iteration: 84078
loss: 1.0255584716796875,grad_norm: 0.9431360087237665, iteration: 84079
loss: 1.0495996475219727,grad_norm: 0.9999999173014671, iteration: 84080
loss: 1.0477995872497559,grad_norm: 0.9075028208629725, iteration: 84081
loss: 0.9934176802635193,grad_norm: 0.9395656640136298, iteration: 84082
loss: 0.9970188140869141,grad_norm: 0.9999991605223529, iteration: 84083
loss: 1.0155205726623535,grad_norm: 0.9639799667788256, iteration: 84084
loss: 1.0159735679626465,grad_norm: 0.9999992487680024, iteration: 84085
loss: 0.9821943640708923,grad_norm: 0.9999991690281944, iteration: 84086
loss: 1.0326159000396729,grad_norm: 0.9999993383637855, iteration: 84087
loss: 0.9990099668502808,grad_norm: 0.9999992805958063, iteration: 84088
loss: 1.006177306175232,grad_norm: 0.9999993163567795, iteration: 84089
loss: 1.090570092201233,grad_norm: 0.9999990930281141, iteration: 84090
loss: 1.002466082572937,grad_norm: 0.999999196863912, iteration: 84091
loss: 1.0014617443084717,grad_norm: 0.9999990714597502, iteration: 84092
loss: 0.9653459787368774,grad_norm: 0.8724406585113897, iteration: 84093
loss: 0.9827945828437805,grad_norm: 0.9906822978926457, iteration: 84094
loss: 0.9923447370529175,grad_norm: 0.9999994299812723, iteration: 84095
loss: 1.0163613557815552,grad_norm: 0.9999991354542663, iteration: 84096
loss: 1.021109938621521,grad_norm: 0.99999903476492, iteration: 84097
loss: 0.9853731989860535,grad_norm: 0.9885587319540241, iteration: 84098
loss: 0.9976257085800171,grad_norm: 0.9999991719792017, iteration: 84099
loss: 1.0050650835037231,grad_norm: 0.9176773836453489, iteration: 84100
loss: 0.9915668964385986,grad_norm: 0.999999130940896, iteration: 84101
loss: 1.0120952129364014,grad_norm: 0.9999991891411312, iteration: 84102
loss: 0.9724655151367188,grad_norm: 0.9999992383721329, iteration: 84103
loss: 0.9534910917282104,grad_norm: 0.9653522729672506, iteration: 84104
loss: 1.0088622570037842,grad_norm: 0.9999991895929167, iteration: 84105
loss: 0.9976099729537964,grad_norm: 0.9967572813886415, iteration: 84106
loss: 0.9613240361213684,grad_norm: 0.9609151100071808, iteration: 84107
loss: 0.9918836355209351,grad_norm: 0.9999991986891937, iteration: 84108
loss: 1.0295631885528564,grad_norm: 0.999999389668643, iteration: 84109
loss: 1.0390934944152832,grad_norm: 0.999999229849507, iteration: 84110
loss: 0.9784688949584961,grad_norm: 0.9563389137745362, iteration: 84111
loss: 1.0156915187835693,grad_norm: 0.9389630091110313, iteration: 84112
loss: 1.0030258893966675,grad_norm: 0.999999206975743, iteration: 84113
loss: 1.0114736557006836,grad_norm: 0.8481056164290004, iteration: 84114
loss: 0.9848110675811768,grad_norm: 0.9641910538665118, iteration: 84115
loss: 1.0369043350219727,grad_norm: 0.9999990596562033, iteration: 84116
loss: 0.9739603996276855,grad_norm: 0.9999989984973958, iteration: 84117
loss: 1.016893982887268,grad_norm: 0.9999990695156865, iteration: 84118
loss: 1.036198616027832,grad_norm: 0.8989177688896843, iteration: 84119
loss: 1.0524550676345825,grad_norm: 0.9999997074573316, iteration: 84120
loss: 0.9933976531028748,grad_norm: 0.9999990226743402, iteration: 84121
loss: 0.9895175695419312,grad_norm: 0.9572845832157616, iteration: 84122
loss: 0.9721577167510986,grad_norm: 0.9999992096645558, iteration: 84123
loss: 0.9869529008865356,grad_norm: 0.9999989920839514, iteration: 84124
loss: 1.0146760940551758,grad_norm: 0.9999989027241126, iteration: 84125
loss: 1.0071841478347778,grad_norm: 0.9999992277568013, iteration: 84126
loss: 1.0073950290679932,grad_norm: 0.9999990106789934, iteration: 84127
loss: 1.0484485626220703,grad_norm: 0.9999991812689342, iteration: 84128
loss: 0.9859291911125183,grad_norm: 0.849689766795456, iteration: 84129
loss: 1.016809344291687,grad_norm: 0.9999991667142014, iteration: 84130
loss: 0.9836611747741699,grad_norm: 0.9999990941111789, iteration: 84131
loss: 1.0110238790512085,grad_norm: 0.9970223512474199, iteration: 84132
loss: 0.9987298250198364,grad_norm: 0.8727534401077619, iteration: 84133
loss: 0.9706404209136963,grad_norm: 0.8903800188616778, iteration: 84134
loss: 0.988223135471344,grad_norm: 0.8633941992213924, iteration: 84135
loss: 1.0128669738769531,grad_norm: 0.999999094081645, iteration: 84136
loss: 1.0270382165908813,grad_norm: 0.9999994353653289, iteration: 84137
loss: 0.9997606873512268,grad_norm: 0.99220671290552, iteration: 84138
loss: 1.0262752771377563,grad_norm: 0.9999991265820164, iteration: 84139
loss: 0.9921485781669617,grad_norm: 0.9938096383457351, iteration: 84140
loss: 1.0159721374511719,grad_norm: 0.9999992001048227, iteration: 84141
loss: 1.0216630697250366,grad_norm: 0.911506654800754, iteration: 84142
loss: 0.9956611394882202,grad_norm: 0.9999989923848335, iteration: 84143
loss: 0.9817175269126892,grad_norm: 0.9999989999282732, iteration: 84144
loss: 0.9946603178977966,grad_norm: 0.9999990063644945, iteration: 84145
loss: 1.058205485343933,grad_norm: 0.9999992510352913, iteration: 84146
loss: 0.9834337830543518,grad_norm: 0.9999990065427609, iteration: 84147
loss: 0.9990265369415283,grad_norm: 0.9999995026250391, iteration: 84148
loss: 0.9887824058532715,grad_norm: 0.9861105973133553, iteration: 84149
loss: 1.00270414352417,grad_norm: 0.9999991842539186, iteration: 84150
loss: 1.0066486597061157,grad_norm: 0.7755185875633479, iteration: 84151
loss: 1.026099443435669,grad_norm: 0.9999991855291219, iteration: 84152
loss: 0.9763603210449219,grad_norm: 0.9999994302676205, iteration: 84153
loss: 0.9973533153533936,grad_norm: 0.99999920720449, iteration: 84154
loss: 1.0018107891082764,grad_norm: 0.9999990869763007, iteration: 84155
loss: 1.0298309326171875,grad_norm: 0.9999989335425973, iteration: 84156
loss: 0.9490430951118469,grad_norm: 0.9999990062432105, iteration: 84157
loss: 1.0011178255081177,grad_norm: 0.9665040153510549, iteration: 84158
loss: 1.0129163265228271,grad_norm: 0.999999150366684, iteration: 84159
loss: 1.0435343980789185,grad_norm: 0.9999990781615508, iteration: 84160
loss: 1.0237873792648315,grad_norm: 0.9999990489564118, iteration: 84161
loss: 0.9999503493309021,grad_norm: 0.9532372932749652, iteration: 84162
loss: 0.9679551124572754,grad_norm: 0.9873019455113136, iteration: 84163
loss: 0.9956437945365906,grad_norm: 0.9735340909389687, iteration: 84164
loss: 1.0113017559051514,grad_norm: 0.9515028584042089, iteration: 84165
loss: 1.0213828086853027,grad_norm: 0.9487408856182497, iteration: 84166
loss: 1.0456125736236572,grad_norm: 0.9999993374406925, iteration: 84167
loss: 0.9964478015899658,grad_norm: 0.9999990775640487, iteration: 84168
loss: 0.9841559529304504,grad_norm: 0.9999991089079011, iteration: 84169
loss: 0.9795265793800354,grad_norm: 0.9999990495236565, iteration: 84170
loss: 0.9731563329696655,grad_norm: 0.9325818135995236, iteration: 84171
loss: 0.9780309200286865,grad_norm: 0.9722849060635568, iteration: 84172
loss: 0.96697998046875,grad_norm: 0.9024733515159781, iteration: 84173
loss: 0.955574095249176,grad_norm: 0.9880540562141079, iteration: 84174
loss: 1.0432745218276978,grad_norm: 0.9288239564385518, iteration: 84175
loss: 1.044220209121704,grad_norm: 0.9999992199182862, iteration: 84176
loss: 1.0100935697555542,grad_norm: 0.8845437682797092, iteration: 84177
loss: 0.9707812666893005,grad_norm: 0.9697819874548227, iteration: 84178
loss: 1.0036540031433105,grad_norm: 0.9999992240316146, iteration: 84179
loss: 0.9869817495346069,grad_norm: 0.8866356386430057, iteration: 84180
loss: 1.0267810821533203,grad_norm: 0.999999577863749, iteration: 84181
loss: 1.0041911602020264,grad_norm: 0.9999991144912795, iteration: 84182
loss: 0.9689221382141113,grad_norm: 0.9999990395325727, iteration: 84183
loss: 1.010783314704895,grad_norm: 0.993966824944125, iteration: 84184
loss: 1.0080329179763794,grad_norm: 0.9999993747989995, iteration: 84185
loss: 0.9688765406608582,grad_norm: 0.9999991571607405, iteration: 84186
loss: 1.0109055042266846,grad_norm: 0.9999991522314893, iteration: 84187
loss: 1.0033972263336182,grad_norm: 0.9999991122948496, iteration: 84188
loss: 1.0089808702468872,grad_norm: 0.9999991549496262, iteration: 84189
loss: 0.9962198138237,grad_norm: 0.999999052910046, iteration: 84190
loss: 0.9895475506782532,grad_norm: 0.9999991906347485, iteration: 84191
loss: 1.0273873805999756,grad_norm: 0.9331106812099352, iteration: 84192
loss: 0.9957998394966125,grad_norm: 0.999999080483866, iteration: 84193
loss: 0.9891709089279175,grad_norm: 0.9902824145867162, iteration: 84194
loss: 1.002500057220459,grad_norm: 0.9999989600507042, iteration: 84195
loss: 0.9944303631782532,grad_norm: 0.9999991090913437, iteration: 84196
loss: 1.0108529329299927,grad_norm: 0.999999055226777, iteration: 84197
loss: 1.00273859500885,grad_norm: 0.9999990622538284, iteration: 84198
loss: 1.0445555448532104,grad_norm: 0.9944015717028301, iteration: 84199
loss: 0.9753636121749878,grad_norm: 0.9999990135247226, iteration: 84200
loss: 1.0165120363235474,grad_norm: 0.9999991147714361, iteration: 84201
loss: 0.9803591966629028,grad_norm: 0.9418952057298292, iteration: 84202
loss: 0.9938088059425354,grad_norm: 0.9999990121822976, iteration: 84203
loss: 1.0297328233718872,grad_norm: 0.9999992287626039, iteration: 84204
loss: 1.0114264488220215,grad_norm: 0.9999990482933342, iteration: 84205
loss: 0.9810847043991089,grad_norm: 0.999998948916845, iteration: 84206
loss: 0.9899954199790955,grad_norm: 0.8025273769805014, iteration: 84207
loss: 1.0023680925369263,grad_norm: 0.999999127712175, iteration: 84208
loss: 1.0337711572647095,grad_norm: 0.999999088567226, iteration: 84209
loss: 0.9900406002998352,grad_norm: 0.9999992383119506, iteration: 84210
loss: 0.9871448278427124,grad_norm: 0.9999992156489054, iteration: 84211
loss: 1.0484557151794434,grad_norm: 0.9999991523350859, iteration: 84212
loss: 1.003774642944336,grad_norm: 0.999999130867671, iteration: 84213
loss: 1.0207709074020386,grad_norm: 0.9999991730465914, iteration: 84214
loss: 0.9614348411560059,grad_norm: 0.9999990777977167, iteration: 84215
loss: 1.0304638147354126,grad_norm: 0.9999993270799417, iteration: 84216
loss: 1.0056214332580566,grad_norm: 0.897677269128877, iteration: 84217
loss: 0.9646183252334595,grad_norm: 0.9999990328873902, iteration: 84218
loss: 1.0397320985794067,grad_norm: 0.9493868983239819, iteration: 84219
loss: 1.13168203830719,grad_norm: 0.9999994786865191, iteration: 84220
loss: 0.9881911873817444,grad_norm: 0.9999993888303121, iteration: 84221
loss: 0.9836304783821106,grad_norm: 0.9249716979597443, iteration: 84222
loss: 1.0188798904418945,grad_norm: 0.9215571415984817, iteration: 84223
loss: 0.9681110382080078,grad_norm: 0.9999990116127879, iteration: 84224
loss: 1.0217037200927734,grad_norm: 0.9999990378782944, iteration: 84225
loss: 0.9786303639411926,grad_norm: 0.9999991113389434, iteration: 84226
loss: 1.0013662576675415,grad_norm: 0.9999989121156907, iteration: 84227
loss: 1.0083074569702148,grad_norm: 0.9354444809708853, iteration: 84228
loss: 0.9987128973007202,grad_norm: 0.9999991787704757, iteration: 84229
loss: 0.9859841465950012,grad_norm: 0.9999991799593461, iteration: 84230
loss: 0.9477548003196716,grad_norm: 0.9999991100551869, iteration: 84231
loss: 1.0182197093963623,grad_norm: 0.9999991131878537, iteration: 84232
loss: 0.9702733159065247,grad_norm: 0.9999993441094446, iteration: 84233
loss: 1.0285730361938477,grad_norm: 0.9670172808262444, iteration: 84234
loss: 0.9806748032569885,grad_norm: 0.9999991184428891, iteration: 84235
loss: 0.9721776843070984,grad_norm: 0.9117066920938908, iteration: 84236
loss: 1.0107641220092773,grad_norm: 0.999999021186756, iteration: 84237
loss: 1.011564016342163,grad_norm: 0.9999991196558763, iteration: 84238
loss: 1.0516693592071533,grad_norm: 0.9999990807597836, iteration: 84239
loss: 0.9910435676574707,grad_norm: 0.9999991011748391, iteration: 84240
loss: 1.011086106300354,grad_norm: 0.9999991878414399, iteration: 84241
loss: 1.007431149482727,grad_norm: 0.9715219424813712, iteration: 84242
loss: 0.9910624623298645,grad_norm: 0.9999990552742303, iteration: 84243
loss: 0.9626146554946899,grad_norm: 0.9664516255283796, iteration: 84244
loss: 1.0108940601348877,grad_norm: 0.9753217004144723, iteration: 84245
loss: 0.993570864200592,grad_norm: 0.9999990755823337, iteration: 84246
loss: 0.9882645010948181,grad_norm: 0.9999991242636092, iteration: 84247
loss: 0.9903318285942078,grad_norm: 0.9999992875836808, iteration: 84248
loss: 1.001103401184082,grad_norm: 0.9656988101046655, iteration: 84249
loss: 1.035715937614441,grad_norm: 0.9999992912363518, iteration: 84250
loss: 0.9966782927513123,grad_norm: 0.978398840565254, iteration: 84251
loss: 0.9809960126876831,grad_norm: 0.9999991203679903, iteration: 84252
loss: 1.0081475973129272,grad_norm: 0.9999990530449602, iteration: 84253
loss: 0.9797297716140747,grad_norm: 0.999999194434713, iteration: 84254
loss: 0.9955599904060364,grad_norm: 0.9945009114172801, iteration: 84255
loss: 0.9995459914207458,grad_norm: 0.9999991388055328, iteration: 84256
loss: 1.0111356973648071,grad_norm: 0.8893152576451945, iteration: 84257
loss: 1.0061988830566406,grad_norm: 0.9999993104839963, iteration: 84258
loss: 0.9852511286735535,grad_norm: 0.9999992848341419, iteration: 84259
loss: 0.9702608585357666,grad_norm: 0.9395347784893673, iteration: 84260
loss: 0.9954800009727478,grad_norm: 0.9999992803239599, iteration: 84261
loss: 1.0398074388504028,grad_norm: 0.9999991574832188, iteration: 84262
loss: 1.0206608772277832,grad_norm: 0.8838019127433144, iteration: 84263
loss: 1.0106226205825806,grad_norm: 0.9999989744820357, iteration: 84264
loss: 1.0167664289474487,grad_norm: 0.9223631757635968, iteration: 84265
loss: 0.9971072673797607,grad_norm: 0.9999991813100615, iteration: 84266
loss: 1.0131863355636597,grad_norm: 0.9348270723464523, iteration: 84267
loss: 0.9937809109687805,grad_norm: 0.9439842273575941, iteration: 84268
loss: 1.021034598350525,grad_norm: 0.8460530462805876, iteration: 84269
loss: 1.0272972583770752,grad_norm: 0.9462225870394003, iteration: 84270
loss: 1.0230962038040161,grad_norm: 0.9999990986160009, iteration: 84271
loss: 1.0383751392364502,grad_norm: 0.9999992371762622, iteration: 84272
loss: 1.0141855478286743,grad_norm: 0.9999992742682913, iteration: 84273
loss: 1.0023908615112305,grad_norm: 0.9999992741621078, iteration: 84274
loss: 1.0612281560897827,grad_norm: 0.9999991930111694, iteration: 84275
loss: 0.9700837731361389,grad_norm: 0.999999011859065, iteration: 84276
loss: 0.9870287179946899,grad_norm: 0.995992481148992, iteration: 84277
loss: 1.0407356023788452,grad_norm: 0.9999994264822076, iteration: 84278
loss: 1.0002293586730957,grad_norm: 0.9999990943100341, iteration: 84279
loss: 0.9761554002761841,grad_norm: 0.9999991644745415, iteration: 84280
loss: 1.0062103271484375,grad_norm: 0.9999990706139961, iteration: 84281
loss: 1.0012394189834595,grad_norm: 0.948026662843261, iteration: 84282
loss: 0.9880341291427612,grad_norm: 0.9516517706093444, iteration: 84283
loss: 1.0057322978973389,grad_norm: 0.927517192844115, iteration: 84284
loss: 0.9905440807342529,grad_norm: 0.9999991555192136, iteration: 84285
loss: 0.9825173616409302,grad_norm: 0.9999991199397329, iteration: 84286
loss: 1.0028483867645264,grad_norm: 0.9999990016214042, iteration: 84287
loss: 0.9959654211997986,grad_norm: 0.8296423130461511, iteration: 84288
loss: 1.0035816431045532,grad_norm: 0.9999989821179461, iteration: 84289
loss: 1.0445709228515625,grad_norm: 0.9999994865394134, iteration: 84290
loss: 0.9845505356788635,grad_norm: 0.9999991573167069, iteration: 84291
loss: 0.9933680891990662,grad_norm: 0.9999990141007968, iteration: 84292
loss: 1.033294439315796,grad_norm: 0.9999992835662858, iteration: 84293
loss: 0.9856783151626587,grad_norm: 0.9999990935125513, iteration: 84294
loss: 0.9906942844390869,grad_norm: 0.9999990754956987, iteration: 84295
loss: 1.003821849822998,grad_norm: 0.9999991559294583, iteration: 84296
loss: 0.9898253679275513,grad_norm: 0.970289898744574, iteration: 84297
loss: 0.9942123889923096,grad_norm: 0.9330950641773027, iteration: 84298
loss: 0.9734418392181396,grad_norm: 0.9999991229246279, iteration: 84299
loss: 1.0102283954620361,grad_norm: 0.9610652441138312, iteration: 84300
loss: 1.0220552682876587,grad_norm: 0.9999991783135951, iteration: 84301
loss: 1.0057322978973389,grad_norm: 0.9999991284653492, iteration: 84302
loss: 0.9949535131454468,grad_norm: 0.8125898833497803, iteration: 84303
loss: 0.9721359610557556,grad_norm: 0.9999991280007821, iteration: 84304
loss: 0.9916656017303467,grad_norm: 0.891341072716566, iteration: 84305
loss: 1.0247381925582886,grad_norm: 0.9999991889108858, iteration: 84306
loss: 1.0195376873016357,grad_norm: 0.9999991260902359, iteration: 84307
loss: 0.9945655465126038,grad_norm: 0.9999991293099856, iteration: 84308
loss: 0.979049026966095,grad_norm: 0.924017047284127, iteration: 84309
loss: 1.022958755493164,grad_norm: 0.999999215664291, iteration: 84310
loss: 1.0099291801452637,grad_norm: 0.9999990293930501, iteration: 84311
loss: 1.0207774639129639,grad_norm: 0.9999990789625289, iteration: 84312
loss: 1.008272409439087,grad_norm: 0.9999990262579692, iteration: 84313
loss: 1.0024274587631226,grad_norm: 0.9999991499340312, iteration: 84314
loss: 0.9951329231262207,grad_norm: 0.9771347982852562, iteration: 84315
loss: 1.0470441579818726,grad_norm: 0.9470000930146041, iteration: 84316
loss: 1.0268930196762085,grad_norm: 0.9549066439889148, iteration: 84317
loss: 0.997509777545929,grad_norm: 0.8555044238125428, iteration: 84318
loss: 1.002091646194458,grad_norm: 0.9862567137068031, iteration: 84319
loss: 1.023282527923584,grad_norm: 0.9999992214565112, iteration: 84320
loss: 1.0130695104599,grad_norm: 0.9999995614926228, iteration: 84321
loss: 1.0395396947860718,grad_norm: 0.9999994579829526, iteration: 84322
loss: 0.9925021529197693,grad_norm: 0.9500242056977515, iteration: 84323
loss: 1.005878210067749,grad_norm: 0.9999991862860698, iteration: 84324
loss: 0.9868007898330688,grad_norm: 0.9999992049991798, iteration: 84325
loss: 1.0076876878738403,grad_norm: 0.874367524981318, iteration: 84326
loss: 1.1093915700912476,grad_norm: 0.9999992398263491, iteration: 84327
loss: 0.9984483122825623,grad_norm: 0.9096477800352182, iteration: 84328
loss: 1.0156327486038208,grad_norm: 0.999999232042562, iteration: 84329
loss: 1.0351107120513916,grad_norm: 0.9999992708544507, iteration: 84330
loss: 1.0108665227890015,grad_norm: 0.8505358737691123, iteration: 84331
loss: 1.003240942955017,grad_norm: 0.9999991465425698, iteration: 84332
loss: 0.9964091181755066,grad_norm: 0.9110977529643862, iteration: 84333
loss: 1.0134698152542114,grad_norm: 0.8943098537243818, iteration: 84334
loss: 0.9987562298774719,grad_norm: 0.9748111271886548, iteration: 84335
loss: 1.027144193649292,grad_norm: 0.9999989680959165, iteration: 84336
loss: 1.0287479162216187,grad_norm: 0.9999992602481537, iteration: 84337
loss: 0.9842249155044556,grad_norm: 0.9999991046944096, iteration: 84338
loss: 0.9883490204811096,grad_norm: 0.9876438584538928, iteration: 84339
loss: 0.988486111164093,grad_norm: 0.9757519123969204, iteration: 84340
loss: 0.9790699481964111,grad_norm: 0.9473108865737759, iteration: 84341
loss: 1.0037729740142822,grad_norm: 0.9790945728998685, iteration: 84342
loss: 0.986440896987915,grad_norm: 0.99999899696338, iteration: 84343
loss: 0.9908906817436218,grad_norm: 0.9999990159762606, iteration: 84344
loss: 1.009324312210083,grad_norm: 0.9999993119861582, iteration: 84345
loss: 1.01564359664917,grad_norm: 0.9999991199523776, iteration: 84346
loss: 0.9843902587890625,grad_norm: 0.999999055534789, iteration: 84347
loss: 1.0136358737945557,grad_norm: 0.99999904169137, iteration: 84348
loss: 1.0105278491973877,grad_norm: 0.9999998606005496, iteration: 84349
loss: 1.0096848011016846,grad_norm: 0.9999990729719844, iteration: 84350
loss: 1.007501244544983,grad_norm: 0.8783414613402826, iteration: 84351
loss: 0.9742048382759094,grad_norm: 0.9999990597885102, iteration: 84352
loss: 1.00371515750885,grad_norm: 0.9999991007109599, iteration: 84353
loss: 1.0052666664123535,grad_norm: 0.999999087835853, iteration: 84354
loss: 0.9846946597099304,grad_norm: 0.9999990459185322, iteration: 84355
loss: 1.0324939489364624,grad_norm: 0.9999992112451587, iteration: 84356
loss: 1.0010851621627808,grad_norm: 0.9999990958205467, iteration: 84357
loss: 0.9886631965637207,grad_norm: 0.9911683967717398, iteration: 84358
loss: 1.000705599784851,grad_norm: 0.9999992590405177, iteration: 84359
loss: 1.004189372062683,grad_norm: 0.9999991887308451, iteration: 84360
loss: 0.9765552282333374,grad_norm: 0.8361637208522469, iteration: 84361
loss: 0.9893433451652527,grad_norm: 0.9241290814609472, iteration: 84362
loss: 1.0345687866210938,grad_norm: 0.9999990718618776, iteration: 84363
loss: 0.9764950275421143,grad_norm: 0.8539161134161323, iteration: 84364
loss: 0.9724759459495544,grad_norm: 0.9999992515222043, iteration: 84365
loss: 0.9654956459999084,grad_norm: 0.9999990426192039, iteration: 84366
loss: 1.0583113431930542,grad_norm: 0.9999993983581286, iteration: 84367
loss: 0.9801086187362671,grad_norm: 0.9999991135635398, iteration: 84368
loss: 1.0302281379699707,grad_norm: 0.9999992378834847, iteration: 84369
loss: 0.9870412945747375,grad_norm: 0.9999990368652816, iteration: 84370
loss: 0.9744643568992615,grad_norm: 0.9090452906231402, iteration: 84371
loss: 0.9994892477989197,grad_norm: 0.9566164951218198, iteration: 84372
loss: 0.9483792185783386,grad_norm: 0.999999124937557, iteration: 84373
loss: 0.9901953935623169,grad_norm: 0.9006687186744176, iteration: 84374
loss: 1.0026534795761108,grad_norm: 0.9999992392741912, iteration: 84375
loss: 0.9610595107078552,grad_norm: 0.922941548788869, iteration: 84376
loss: 1.022990345954895,grad_norm: 0.9937500874555214, iteration: 84377
loss: 0.9735256433486938,grad_norm: 0.8598679543433012, iteration: 84378
loss: 0.9886044263839722,grad_norm: 0.9999991666057589, iteration: 84379
loss: 1.0211925506591797,grad_norm: 0.9420843950581621, iteration: 84380
loss: 0.9775304794311523,grad_norm: 0.9172572716420385, iteration: 84381
loss: 0.9893317818641663,grad_norm: 0.999999118169321, iteration: 84382
loss: 0.9289699196815491,grad_norm: 0.9999992247170088, iteration: 84383
loss: 1.016616940498352,grad_norm: 0.9999992863793208, iteration: 84384
loss: 1.0238237380981445,grad_norm: 0.9730928109886755, iteration: 84385
loss: 1.016624093055725,grad_norm: 0.9999993531806227, iteration: 84386
loss: 0.9838687181472778,grad_norm: 0.9755199538870866, iteration: 84387
loss: 0.9910181164741516,grad_norm: 0.999999078924282, iteration: 84388
loss: 1.0256297588348389,grad_norm: 0.9255308841469477, iteration: 84389
loss: 0.9990437626838684,grad_norm: 0.9999991836838465, iteration: 84390
loss: 0.9940540790557861,grad_norm: 0.9393161078098802, iteration: 84391
loss: 1.0145494937896729,grad_norm: 0.9999990865946624, iteration: 84392
loss: 0.9307249188423157,grad_norm: 0.9999992420621115, iteration: 84393
loss: 1.0313222408294678,grad_norm: 0.9999989499852836, iteration: 84394
loss: 0.9949682354927063,grad_norm: 0.9999990601110137, iteration: 84395
loss: 0.9916166663169861,grad_norm: 0.9999990085795876, iteration: 84396
loss: 0.9820589423179626,grad_norm: 0.9999991209517713, iteration: 84397
loss: 1.0210731029510498,grad_norm: 0.9708218301010945, iteration: 84398
loss: 1.022629737854004,grad_norm: 0.9029239238059509, iteration: 84399
loss: 1.0065432786941528,grad_norm: 0.9999992924060279, iteration: 84400
loss: 0.9687004685401917,grad_norm: 0.8815913468815801, iteration: 84401
loss: 1.034456729888916,grad_norm: 0.9999990559234269, iteration: 84402
loss: 1.0121155977249146,grad_norm: 0.8957338088173665, iteration: 84403
loss: 1.0023448467254639,grad_norm: 0.999999137596917, iteration: 84404
loss: 1.0016460418701172,grad_norm: 0.8841190570893581, iteration: 84405
loss: 0.9967899918556213,grad_norm: 0.9999990656711624, iteration: 84406
loss: 1.0105711221694946,grad_norm: 0.9999993020407523, iteration: 84407
loss: 1.0089905261993408,grad_norm: 0.999999220459354, iteration: 84408
loss: 1.019640326499939,grad_norm: 0.9496265618070019, iteration: 84409
loss: 1.0071481466293335,grad_norm: 0.9999991492760605, iteration: 84410
loss: 1.031548261642456,grad_norm: 0.999999047749296, iteration: 84411
loss: 0.9779855012893677,grad_norm: 0.9999990867804224, iteration: 84412
loss: 0.9992737770080566,grad_norm: 0.9854306992363808, iteration: 84413
loss: 0.9899634122848511,grad_norm: 0.8737195283012804, iteration: 84414
loss: 1.0029857158660889,grad_norm: 0.9999992273595923, iteration: 84415
loss: 1.0006096363067627,grad_norm: 0.9999990209155692, iteration: 84416
loss: 0.9816840291023254,grad_norm: 0.9999991812025315, iteration: 84417
loss: 0.998327374458313,grad_norm: 0.9999992120543965, iteration: 84418
loss: 1.0114474296569824,grad_norm: 0.9999990841281265, iteration: 84419
loss: 1.0031497478485107,grad_norm: 0.9999990749538638, iteration: 84420
loss: 0.9725257158279419,grad_norm: 0.999999210843357, iteration: 84421
loss: 1.0055334568023682,grad_norm: 0.9669707141731494, iteration: 84422
loss: 1.0017311573028564,grad_norm: 0.9471823539672429, iteration: 84423
loss: 0.9711011052131653,grad_norm: 0.9894164902793158, iteration: 84424
loss: 1.0296955108642578,grad_norm: 0.9901924757208685, iteration: 84425
loss: 1.030271291732788,grad_norm: 0.994740582276015, iteration: 84426
loss: 0.9922000765800476,grad_norm: 0.9999993778098714, iteration: 84427
loss: 0.969102680683136,grad_norm: 0.999998951075183, iteration: 84428
loss: 0.9930590391159058,grad_norm: 0.9999992266745044, iteration: 84429
loss: 0.9970454573631287,grad_norm: 0.9999991866770057, iteration: 84430
loss: 0.9858812689781189,grad_norm: 0.9719926160404967, iteration: 84431
loss: 1.0351194143295288,grad_norm: 0.9999993208111471, iteration: 84432
loss: 0.9903010129928589,grad_norm: 0.9999989604182715, iteration: 84433
loss: 1.0111942291259766,grad_norm: 0.9999990443751483, iteration: 84434
loss: 0.9843018054962158,grad_norm: 0.9999992844950621, iteration: 84435
loss: 1.026178002357483,grad_norm: 0.7748082751667751, iteration: 84436
loss: 1.0352764129638672,grad_norm: 0.872983020664206, iteration: 84437
loss: 0.9953541159629822,grad_norm: 0.9799127135812765, iteration: 84438
loss: 1.0281003713607788,grad_norm: 0.9999991309374999, iteration: 84439
loss: 1.1936591863632202,grad_norm: 0.999999265082922, iteration: 84440
loss: 1.0585722923278809,grad_norm: 0.9999990804589074, iteration: 84441
loss: 0.9686985611915588,grad_norm: 0.9999990901151612, iteration: 84442
loss: 0.9696303606033325,grad_norm: 0.9999991341607185, iteration: 84443
loss: 0.9294640421867371,grad_norm: 0.9999990595584635, iteration: 84444
loss: 0.9875709414482117,grad_norm: 0.9999992185839173, iteration: 84445
loss: 0.9909059405326843,grad_norm: 0.9999991863500998, iteration: 84446
loss: 1.0358580350875854,grad_norm: 0.9999989979369481, iteration: 84447
loss: 1.0351463556289673,grad_norm: 0.9999991630424044, iteration: 84448
loss: 0.9631226658821106,grad_norm: 0.9150441458332577, iteration: 84449
loss: 0.9928913712501526,grad_norm: 0.9686154374622118, iteration: 84450
loss: 0.9975038170814514,grad_norm: 0.9999992048686539, iteration: 84451
loss: 1.0046720504760742,grad_norm: 0.9999993163936524, iteration: 84452
loss: 0.994432806968689,grad_norm: 0.9999990392129601, iteration: 84453
loss: 0.9775293469429016,grad_norm: 0.9603436105131774, iteration: 84454
loss: 1.0157365798950195,grad_norm: 0.9999993426733007, iteration: 84455
loss: 1.0167851448059082,grad_norm: 0.9999993716016576, iteration: 84456
loss: 0.9807082414627075,grad_norm: 0.9999991105693964, iteration: 84457
loss: 0.9819881319999695,grad_norm: 0.9999990347149398, iteration: 84458
loss: 1.0205715894699097,grad_norm: 0.9999992311967106, iteration: 84459
loss: 0.997427225112915,grad_norm: 0.9999990298440323, iteration: 84460
loss: 0.9833754897117615,grad_norm: 0.9492479516014238, iteration: 84461
loss: 1.0277526378631592,grad_norm: 0.9999991363205306, iteration: 84462
loss: 1.0098644495010376,grad_norm: 0.9776201271702574, iteration: 84463
loss: 1.0124839544296265,grad_norm: 0.9999991110942961, iteration: 84464
loss: 1.0117521286010742,grad_norm: 0.9999991444610642, iteration: 84465
loss: 0.9663983583450317,grad_norm: 0.9921532008768519, iteration: 84466
loss: 1.0179599523544312,grad_norm: 0.9999997842193192, iteration: 84467
loss: 0.9737812876701355,grad_norm: 0.9999990042078211, iteration: 84468
loss: 0.983039379119873,grad_norm: 0.9999994581869911, iteration: 84469
loss: 0.9694333672523499,grad_norm: 0.9656395003068765, iteration: 84470
loss: 1.0128694772720337,grad_norm: 0.971855406876303, iteration: 84471
loss: 1.0056498050689697,grad_norm: 0.862648417087181, iteration: 84472
loss: 1.0096688270568848,grad_norm: 0.9999991608537381, iteration: 84473
loss: 0.9585905075073242,grad_norm: 0.9834046414394891, iteration: 84474
loss: 0.9489956498146057,grad_norm: 0.999999098503554, iteration: 84475
loss: 1.011206030845642,grad_norm: 0.9999989815565916, iteration: 84476
loss: 1.005246639251709,grad_norm: 0.9841974551793399, iteration: 84477
loss: 1.1486432552337646,grad_norm: 0.9999994497883037, iteration: 84478
loss: 1.0171005725860596,grad_norm: 0.9999992014293, iteration: 84479
loss: 0.9936860203742981,grad_norm: 0.9236830705728081, iteration: 84480
loss: 0.9777709245681763,grad_norm: 0.9999991815598835, iteration: 84481
loss: 0.9648005962371826,grad_norm: 0.9999992479488012, iteration: 84482
loss: 1.000866413116455,grad_norm: 0.999999127717914, iteration: 84483
loss: 1.0140526294708252,grad_norm: 0.965254130066709, iteration: 84484
loss: 1.0224193334579468,grad_norm: 0.9999991270702643, iteration: 84485
loss: 1.0165679454803467,grad_norm: 0.99999911931731, iteration: 84486
loss: 1.016140103340149,grad_norm: 0.999999026852923, iteration: 84487
loss: 0.9862687587738037,grad_norm: 0.8556164349252064, iteration: 84488
loss: 1.007361650466919,grad_norm: 0.9999990417335376, iteration: 84489
loss: 1.023923635482788,grad_norm: 0.9999991689694543, iteration: 84490
loss: 0.9424235224723816,grad_norm: 0.9765407125120373, iteration: 84491
loss: 1.0379587411880493,grad_norm: 0.9838332813598392, iteration: 84492
loss: 1.0018059015274048,grad_norm: 0.999999006243662, iteration: 84493
loss: 1.000365138053894,grad_norm: 0.9999991950235959, iteration: 84494
loss: 1.0139143466949463,grad_norm: 0.9999991040930049, iteration: 84495
loss: 1.0198801755905151,grad_norm: 0.9999989723972118, iteration: 84496
loss: 0.9974315762519836,grad_norm: 0.9999991279110862, iteration: 84497
loss: 1.058730125427246,grad_norm: 0.9999991953039955, iteration: 84498
loss: 0.9758493900299072,grad_norm: 0.9999991091062307, iteration: 84499
loss: 0.9720920920372009,grad_norm: 0.9999992003969933, iteration: 84500
loss: 0.9770656824111938,grad_norm: 0.9999989618582777, iteration: 84501
loss: 1.0151875019073486,grad_norm: 0.9999993210263213, iteration: 84502
loss: 1.0080829858779907,grad_norm: 0.9999989169132525, iteration: 84503
loss: 1.0189318656921387,grad_norm: 0.9212947153684502, iteration: 84504
loss: 1.0120184421539307,grad_norm: 0.9999991811436126, iteration: 84505
loss: 1.0107587575912476,grad_norm: 0.9999994447422126, iteration: 84506
loss: 1.0163795948028564,grad_norm: 0.9703175370749914, iteration: 84507
loss: 0.9940969944000244,grad_norm: 0.9676198510006712, iteration: 84508
loss: 0.9784567952156067,grad_norm: 0.9999990235843058, iteration: 84509
loss: 1.0236637592315674,grad_norm: 0.9999992757547248, iteration: 84510
loss: 1.014443039894104,grad_norm: 0.9060408512655814, iteration: 84511
loss: 1.0344493389129639,grad_norm: 0.9999992671109813, iteration: 84512
loss: 1.0111438035964966,grad_norm: 0.9999992897500573, iteration: 84513
loss: 0.9760231375694275,grad_norm: 0.9999990693852031, iteration: 84514
loss: 1.0030591487884521,grad_norm: 0.9999990307772155, iteration: 84515
loss: 1.0026179552078247,grad_norm: 0.974288159157561, iteration: 84516
loss: 1.0059552192687988,grad_norm: 0.9999999081309555, iteration: 84517
loss: 1.0309332609176636,grad_norm: 0.9557070541910967, iteration: 84518
loss: 1.0347867012023926,grad_norm: 0.9746315536903639, iteration: 84519
loss: 1.0357158184051514,grad_norm: 0.9999991021054139, iteration: 84520
loss: 0.9842020869255066,grad_norm: 0.9999991632254981, iteration: 84521
loss: 0.9810035228729248,grad_norm: 0.9114421411647281, iteration: 84522
loss: 1.0017061233520508,grad_norm: 0.9999990690416246, iteration: 84523
loss: 0.9812886714935303,grad_norm: 0.9549532305840344, iteration: 84524
loss: 1.0041775703430176,grad_norm: 0.9620109204200613, iteration: 84525
loss: 0.9878409504890442,grad_norm: 0.9622804460338721, iteration: 84526
loss: 0.9825658798217773,grad_norm: 0.889870519936464, iteration: 84527
loss: 0.9767751693725586,grad_norm: 0.9639044999540177, iteration: 84528
loss: 0.9825002551078796,grad_norm: 0.9999990276944056, iteration: 84529
loss: 0.979548990726471,grad_norm: 0.9999990082347003, iteration: 84530
loss: 1.0013868808746338,grad_norm: 0.9979359479831583, iteration: 84531
loss: 0.9753839373588562,grad_norm: 0.8719824654770436, iteration: 84532
loss: 0.9451718926429749,grad_norm: 0.9455369593575421, iteration: 84533
loss: 1.0040711164474487,grad_norm: 0.885345604437362, iteration: 84534
loss: 0.9709892868995667,grad_norm: 0.9999993379458785, iteration: 84535
loss: 0.9732072353363037,grad_norm: 0.9999989659333236, iteration: 84536
loss: 1.0192501544952393,grad_norm: 0.8022204449747774, iteration: 84537
loss: 1.0199854373931885,grad_norm: 0.9999992948556098, iteration: 84538
loss: 1.0274032354354858,grad_norm: 0.9999991026102385, iteration: 84539
loss: 1.040973424911499,grad_norm: 0.9999990632024506, iteration: 84540
loss: 1.0014880895614624,grad_norm: 0.9946398761205841, iteration: 84541
loss: 0.978166401386261,grad_norm: 0.83318486626617, iteration: 84542
loss: 0.9717991948127747,grad_norm: 0.9999990393869201, iteration: 84543
loss: 0.9753470420837402,grad_norm: 0.9607206691017028, iteration: 84544
loss: 0.9892502427101135,grad_norm: 0.9999990941556156, iteration: 84545
loss: 0.984946608543396,grad_norm: 0.9999992280749707, iteration: 84546
loss: 1.0071992874145508,grad_norm: 0.9118245887420503, iteration: 84547
loss: 1.0071460008621216,grad_norm: 0.9999992103339951, iteration: 84548
loss: 0.98926842212677,grad_norm: 0.965954934903047, iteration: 84549
loss: 1.001009225845337,grad_norm: 0.999999301379131, iteration: 84550
loss: 0.983832061290741,grad_norm: 0.9999991481424124, iteration: 84551
loss: 1.0007885694503784,grad_norm: 0.9999992310890197, iteration: 84552
loss: 1.019242525100708,grad_norm: 0.9999992733949875, iteration: 84553
loss: 1.0279343128204346,grad_norm: 0.9999991239415735, iteration: 84554
loss: 0.9944586753845215,grad_norm: 0.9999991620821977, iteration: 84555
loss: 0.9703190326690674,grad_norm: 0.9999990767844565, iteration: 84556
loss: 0.9609596729278564,grad_norm: 0.9815375093680662, iteration: 84557
loss: 1.0054402351379395,grad_norm: 0.9999990026984746, iteration: 84558
loss: 0.9711192846298218,grad_norm: 0.9999991813632081, iteration: 84559
loss: 0.9575963020324707,grad_norm: 0.9731732532189763, iteration: 84560
loss: 0.9832945466041565,grad_norm: 0.9999996467251302, iteration: 84561
loss: 1.0036665201187134,grad_norm: 0.9672008502109857, iteration: 84562
loss: 0.9976993799209595,grad_norm: 0.9999991427515366, iteration: 84563
loss: 1.0004576444625854,grad_norm: 0.9210662691020663, iteration: 84564
loss: 1.0121278762817383,grad_norm: 0.9999991931230854, iteration: 84565
loss: 1.0438002347946167,grad_norm: 0.9999990281375837, iteration: 84566
loss: 0.9861932396888733,grad_norm: 0.9980309036172268, iteration: 84567
loss: 1.0017211437225342,grad_norm: 0.9906407459307142, iteration: 84568
loss: 1.004012942314148,grad_norm: 0.8841922016739866, iteration: 84569
loss: 0.9970739483833313,grad_norm: 0.948772209932437, iteration: 84570
loss: 0.9883958697319031,grad_norm: 0.9999990755229888, iteration: 84571
loss: 1.0025197267532349,grad_norm: 0.955575457860714, iteration: 84572
loss: 1.0186805725097656,grad_norm: 0.9999990345943254, iteration: 84573
loss: 1.0296274423599243,grad_norm: 0.953571170004594, iteration: 84574
loss: 1.0419915914535522,grad_norm: 0.9999989387865028, iteration: 84575
loss: 1.0040580034255981,grad_norm: 0.9999991593429747, iteration: 84576
loss: 1.014929175376892,grad_norm: 0.9844792502372617, iteration: 84577
loss: 1.0033599138259888,grad_norm: 0.9999991554323959, iteration: 84578
loss: 1.0012105703353882,grad_norm: 0.9999992067325, iteration: 84579
loss: 1.0493519306182861,grad_norm: 0.9992512255835007, iteration: 84580
loss: 1.0383484363555908,grad_norm: 0.9999991890555199, iteration: 84581
loss: 1.014866590499878,grad_norm: 0.9999992248674187, iteration: 84582
loss: 0.9695278406143188,grad_norm: 0.9868842514655991, iteration: 84583
loss: 1.0225896835327148,grad_norm: 0.9999993179422424, iteration: 84584
loss: 1.05138099193573,grad_norm: 0.999999130294518, iteration: 84585
loss: 0.9896873235702515,grad_norm: 0.9999990886334325, iteration: 84586
loss: 0.9752234816551208,grad_norm: 0.9999990833038713, iteration: 84587
loss: 0.973853349685669,grad_norm: 0.9999991933082509, iteration: 84588
loss: 1.0041639804840088,grad_norm: 0.9682814110492507, iteration: 84589
loss: 0.983748197555542,grad_norm: 0.9855257560545825, iteration: 84590
loss: 0.9697529077529907,grad_norm: 0.9999990448368807, iteration: 84591
loss: 1.0062602758407593,grad_norm: 0.9999991555359948, iteration: 84592
loss: 1.0062127113342285,grad_norm: 0.9999990535820242, iteration: 84593
loss: 1.0075732469558716,grad_norm: 0.9999991095497524, iteration: 84594
loss: 1.0052976608276367,grad_norm: 0.9176483277774419, iteration: 84595
loss: 0.9853589534759521,grad_norm: 0.9999991123896198, iteration: 84596
loss: 0.9759690165519714,grad_norm: 0.9743792956698778, iteration: 84597
loss: 1.0134482383728027,grad_norm: 0.9999992115231746, iteration: 84598
loss: 1.0067477226257324,grad_norm: 0.9999990106423932, iteration: 84599
loss: 0.9927456378936768,grad_norm: 0.9562127803092668, iteration: 84600
loss: 1.0169612169265747,grad_norm: 0.9069272422717854, iteration: 84601
loss: 1.009822964668274,grad_norm: 0.9999990880266852, iteration: 84602
loss: 1.0197726488113403,grad_norm: 0.9999990221211014, iteration: 84603
loss: 0.9936130046844482,grad_norm: 0.9999992585425901, iteration: 84604
loss: 0.990422248840332,grad_norm: 0.9596822682527009, iteration: 84605
loss: 0.9958856701850891,grad_norm: 0.9999991111010267, iteration: 84606
loss: 1.044312596321106,grad_norm: 0.9999990634208158, iteration: 84607
loss: 1.0263923406600952,grad_norm: 0.999999647379279, iteration: 84608
loss: 1.033585548400879,grad_norm: 0.9824233933920815, iteration: 84609
loss: 1.0457425117492676,grad_norm: 0.9999991402545181, iteration: 84610
loss: 0.9932063221931458,grad_norm: 0.9999994896333545, iteration: 84611
loss: 1.0095735788345337,grad_norm: 0.9326072247508609, iteration: 84612
loss: 1.001641869544983,grad_norm: 0.9807644632626605, iteration: 84613
loss: 1.0216974020004272,grad_norm: 0.9002971739350347, iteration: 84614
loss: 0.9549229741096497,grad_norm: 0.9999990646796467, iteration: 84615
loss: 0.98051518201828,grad_norm: 0.9279706787989485, iteration: 84616
loss: 0.992306649684906,grad_norm: 0.9117713857719804, iteration: 84617
loss: 0.9921454787254333,grad_norm: 0.9999991708812049, iteration: 84618
loss: 0.9960399270057678,grad_norm: 0.9999990449826388, iteration: 84619
loss: 1.0134915113449097,grad_norm: 0.9576752071922082, iteration: 84620
loss: 0.9960391521453857,grad_norm: 0.9999991172604468, iteration: 84621
loss: 0.9629484415054321,grad_norm: 0.9999992434057139, iteration: 84622
loss: 0.9784786701202393,grad_norm: 0.9827095112041472, iteration: 84623
loss: 0.9970220923423767,grad_norm: 0.9999996843969003, iteration: 84624
loss: 0.9976020455360413,grad_norm: 0.9999992160243352, iteration: 84625
loss: 0.9755991697311401,grad_norm: 0.9149738355668207, iteration: 84626
loss: 1.0082145929336548,grad_norm: 0.9647023772640455, iteration: 84627
loss: 1.0074005126953125,grad_norm: 0.9999991687693153, iteration: 84628
loss: 0.9169304966926575,grad_norm: 0.9999991554920967, iteration: 84629
loss: 1.0060452222824097,grad_norm: 0.9999989390773752, iteration: 84630
loss: 1.022557020187378,grad_norm: 0.999999102218595, iteration: 84631
loss: 1.0322438478469849,grad_norm: 0.901066377116224, iteration: 84632
loss: 1.0197511911392212,grad_norm: 0.9999990059330293, iteration: 84633
loss: 0.9989563226699829,grad_norm: 0.9424879291791666, iteration: 84634
loss: 1.0388151407241821,grad_norm: 0.999998882625765, iteration: 84635
loss: 1.028288722038269,grad_norm: 0.9999990715648686, iteration: 84636
loss: 0.9965084195137024,grad_norm: 0.9999989993233183, iteration: 84637
loss: 0.9834315776824951,grad_norm: 0.9999991028569861, iteration: 84638
loss: 0.985025942325592,grad_norm: 0.9718679549783805, iteration: 84639
loss: 1.0069613456726074,grad_norm: 0.9999990116383015, iteration: 84640
loss: 1.0674426555633545,grad_norm: 0.9999995614146948, iteration: 84641
loss: 1.009006381034851,grad_norm: 0.8017653763107875, iteration: 84642
loss: 1.0459452867507935,grad_norm: 0.999999254626394, iteration: 84643
loss: 1.0224196910858154,grad_norm: 0.9999991160184813, iteration: 84644
loss: 0.9767833948135376,grad_norm: 0.972598125568511, iteration: 84645
loss: 0.9777094721794128,grad_norm: 0.9999990973599299, iteration: 84646
loss: 0.9963574409484863,grad_norm: 0.999999040259129, iteration: 84647
loss: 0.9610941410064697,grad_norm: 0.9999991593131734, iteration: 84648
loss: 1.0067096948623657,grad_norm: 0.8806553205336217, iteration: 84649
loss: 1.0186619758605957,grad_norm: 0.9999991436970646, iteration: 84650
loss: 0.9812875390052795,grad_norm: 0.9999991717581613, iteration: 84651
loss: 0.9842810034751892,grad_norm: 0.89665136607106, iteration: 84652
loss: 0.9549278020858765,grad_norm: 0.9999991125219981, iteration: 84653
loss: 0.9896186590194702,grad_norm: 0.9115837470341094, iteration: 84654
loss: 0.9941809773445129,grad_norm: 0.9999991716285402, iteration: 84655
loss: 0.9495602250099182,grad_norm: 0.999999145922709, iteration: 84656
loss: 1.0289340019226074,grad_norm: 0.9999991973069395, iteration: 84657
loss: 0.9731071591377258,grad_norm: 0.9999991433081956, iteration: 84658
loss: 1.0025163888931274,grad_norm: 0.9999991682983705, iteration: 84659
loss: 0.9972331523895264,grad_norm: 0.9269017421949454, iteration: 84660
loss: 1.0148664712905884,grad_norm: 0.9999989889103321, iteration: 84661
loss: 1.0007617473602295,grad_norm: 0.9026021087860637, iteration: 84662
loss: 0.9922564029693604,grad_norm: 0.9999990554010424, iteration: 84663
loss: 0.9888004660606384,grad_norm: 0.9999990472314351, iteration: 84664
loss: 1.0156660079956055,grad_norm: 0.9999991133602691, iteration: 84665
loss: 1.046449065208435,grad_norm: 0.8702295446134034, iteration: 84666
loss: 0.9927988648414612,grad_norm: 0.9700223125243527, iteration: 84667
loss: 0.9657976031303406,grad_norm: 0.999999243753252, iteration: 84668
loss: 0.9832862019538879,grad_norm: 0.9587690916945951, iteration: 84669
loss: 1.008782148361206,grad_norm: 0.9368274026680352, iteration: 84670
loss: 0.9937703609466553,grad_norm: 0.9061918961439646, iteration: 84671
loss: 1.0057355165481567,grad_norm: 0.9999993259412079, iteration: 84672
loss: 1.0086029767990112,grad_norm: 0.9999995178776098, iteration: 84673
loss: 1.0022722482681274,grad_norm: 0.9999989750041457, iteration: 84674
loss: 1.005688190460205,grad_norm: 0.9999992531468113, iteration: 84675
loss: 0.987109363079071,grad_norm: 0.9999989833450131, iteration: 84676
loss: 0.9958831071853638,grad_norm: 0.9999993101164998, iteration: 84677
loss: 1.0006811618804932,grad_norm: 0.9656720945249995, iteration: 84678
loss: 0.982537031173706,grad_norm: 0.9804142589431641, iteration: 84679
loss: 0.9941015839576721,grad_norm: 0.9999992229478394, iteration: 84680
loss: 0.9985013604164124,grad_norm: 0.999998935123621, iteration: 84681
loss: 0.9919655323028564,grad_norm: 0.9999990247435089, iteration: 84682
loss: 1.0041977167129517,grad_norm: 0.9999992128519603, iteration: 84683
loss: 1.0083160400390625,grad_norm: 0.9999991493513902, iteration: 84684
loss: 1.0062799453735352,grad_norm: 0.9999991178871537, iteration: 84685
loss: 0.9899011254310608,grad_norm: 0.9999989464020264, iteration: 84686
loss: 1.0194083452224731,grad_norm: 0.9999991759837044, iteration: 84687
loss: 1.0338799953460693,grad_norm: 0.9999991615552835, iteration: 84688
loss: 1.0113474130630493,grad_norm: 0.9999992144024138, iteration: 84689
loss: 0.9877499938011169,grad_norm: 0.999999225066315, iteration: 84690
loss: 1.0077970027923584,grad_norm: 0.9829525560473592, iteration: 84691
loss: 0.9592646360397339,grad_norm: 0.999999262416078, iteration: 84692
loss: 0.9728900194168091,grad_norm: 0.9999992013337202, iteration: 84693
loss: 0.9770816564559937,grad_norm: 0.999999123759714, iteration: 84694
loss: 1.0124133825302124,grad_norm: 0.9665782463346524, iteration: 84695
loss: 0.9832493662834167,grad_norm: 0.999999164633663, iteration: 84696
loss: 0.9997021555900574,grad_norm: 0.9999990328765401, iteration: 84697
loss: 0.9996407628059387,grad_norm: 0.9999991500673109, iteration: 84698
loss: 0.9925560355186462,grad_norm: 0.9404096492673333, iteration: 84699
loss: 1.0242975950241089,grad_norm: 0.9999990848160489, iteration: 84700
loss: 1.014443039894104,grad_norm: 0.9999990654647273, iteration: 84701
loss: 1.0001957416534424,grad_norm: 0.9999990853834291, iteration: 84702
loss: 0.9995139837265015,grad_norm: 0.9088440282712504, iteration: 84703
loss: 0.9587275385856628,grad_norm: 0.9187741851766243, iteration: 84704
loss: 1.003887414932251,grad_norm: 0.9999989560008662, iteration: 84705
loss: 0.9842061400413513,grad_norm: 0.9999991016457516, iteration: 84706
loss: 0.970190167427063,grad_norm: 0.9548979580777828, iteration: 84707
loss: 1.0080467462539673,grad_norm: 0.8700070724095146, iteration: 84708
loss: 0.9915629029273987,grad_norm: 0.9027970611466637, iteration: 84709
loss: 1.016591191291809,grad_norm: 0.9999991362730994, iteration: 84710
loss: 0.9505297541618347,grad_norm: 0.9999993061521377, iteration: 84711
loss: 0.9785935282707214,grad_norm: 0.9999989763795909, iteration: 84712
loss: 1.0013010501861572,grad_norm: 0.9999992805492651, iteration: 84713
loss: 1.0119017362594604,grad_norm: 0.9452860356135057, iteration: 84714
loss: 0.9458610415458679,grad_norm: 0.9521633305097219, iteration: 84715
loss: 1.0315312147140503,grad_norm: 0.9999990861360181, iteration: 84716
loss: 0.9788569211959839,grad_norm: 0.9999991608882425, iteration: 84717
loss: 0.9725300073623657,grad_norm: 0.9598843774976507, iteration: 84718
loss: 0.9538753628730774,grad_norm: 0.9999990759510284, iteration: 84719
loss: 0.9772796034812927,grad_norm: 0.9999991062542977, iteration: 84720
loss: 0.981789231300354,grad_norm: 0.9999990261758693, iteration: 84721
loss: 0.9903762340545654,grad_norm: 0.9999990221866482, iteration: 84722
loss: 1.0048021078109741,grad_norm: 0.8710520559845075, iteration: 84723
loss: 1.0009833574295044,grad_norm: 0.9999991047244703, iteration: 84724
loss: 1.0230237245559692,grad_norm: 0.8869769717094693, iteration: 84725
loss: 0.9863482117652893,grad_norm: 0.9793674993645359, iteration: 84726
loss: 0.9661845564842224,grad_norm: 0.9999989157494509, iteration: 84727
loss: 0.9399030804634094,grad_norm: 0.9999989984532505, iteration: 84728
loss: 0.9991657137870789,grad_norm: 0.9999992211305704, iteration: 84729
loss: 0.9926092028617859,grad_norm: 0.9999992704857665, iteration: 84730
loss: 1.0023508071899414,grad_norm: 0.9218146935701558, iteration: 84731
loss: 0.9831228256225586,grad_norm: 0.9976911438476465, iteration: 84732
loss: 0.9599499702453613,grad_norm: 0.9999991395867789, iteration: 84733
loss: 1.0758249759674072,grad_norm: 1.0000000253023407, iteration: 84734
loss: 0.9938122034072876,grad_norm: 0.9799437638243876, iteration: 84735
loss: 0.9558998942375183,grad_norm: 0.9999991753939076, iteration: 84736
loss: 0.9861422181129456,grad_norm: 0.9246252087952561, iteration: 84737
loss: 1.006402611732483,grad_norm: 0.9999991687223189, iteration: 84738
loss: 0.9697932004928589,grad_norm: 0.999999113382455, iteration: 84739
loss: 1.0113575458526611,grad_norm: 0.9566161080406567, iteration: 84740
loss: 0.9835829734802246,grad_norm: 0.9999992401890475, iteration: 84741
loss: 0.9859128594398499,grad_norm: 0.999999101703662, iteration: 84742
loss: 1.00580894947052,grad_norm: 0.9999991285818023, iteration: 84743
loss: 0.9866544008255005,grad_norm: 0.964549568048695, iteration: 84744
loss: 1.017120122909546,grad_norm: 0.9999992040697899, iteration: 84745
loss: 1.0849748849868774,grad_norm: 0.9999992838241957, iteration: 84746
loss: 1.0136224031448364,grad_norm: 0.9999992404946643, iteration: 84747
loss: 1.0076419115066528,grad_norm: 0.9999992676635278, iteration: 84748
loss: 0.9865964651107788,grad_norm: 0.9999992020866371, iteration: 84749
loss: 0.9814068675041199,grad_norm: 0.9641698681925106, iteration: 84750
loss: 1.0328161716461182,grad_norm: 0.9999991582526268, iteration: 84751
loss: 1.0104780197143555,grad_norm: 0.9999992545495366, iteration: 84752
loss: 1.004888653755188,grad_norm: 0.9999992273567819, iteration: 84753
loss: 0.9607740044593811,grad_norm: 0.9047744807900291, iteration: 84754
loss: 1.0208566188812256,grad_norm: 0.9999991128133933, iteration: 84755
loss: 1.0169661045074463,grad_norm: 0.9999990280178398, iteration: 84756
loss: 0.9760492444038391,grad_norm: 0.9999991824465769, iteration: 84757
loss: 0.9664867520332336,grad_norm: 0.999999222115632, iteration: 84758
loss: 1.0016796588897705,grad_norm: 0.9644188167955219, iteration: 84759
loss: 0.9707634449005127,grad_norm: 0.9999992470925975, iteration: 84760
loss: 0.9937283396720886,grad_norm: 0.9999989962408198, iteration: 84761
loss: 1.0038328170776367,grad_norm: 0.999999157775588, iteration: 84762
loss: 1.0283005237579346,grad_norm: 0.9999991257360296, iteration: 84763
loss: 1.0371688604354858,grad_norm: 0.9999991787426514, iteration: 84764
loss: 0.996300220489502,grad_norm: 0.9919886805503193, iteration: 84765
loss: 0.9664878249168396,grad_norm: 0.9999993986233784, iteration: 84766
loss: 1.026469349861145,grad_norm: 0.9999989795321621, iteration: 84767
loss: 0.9957903623580933,grad_norm: 0.9955604033089185, iteration: 84768
loss: 1.0070899724960327,grad_norm: 0.9043530002599065, iteration: 84769
loss: 0.9938949346542358,grad_norm: 0.9999990730162716, iteration: 84770
loss: 0.9812260270118713,grad_norm: 0.9894957688458185, iteration: 84771
loss: 0.9976962208747864,grad_norm: 0.9999991164671249, iteration: 84772
loss: 0.9996356964111328,grad_norm: 0.9000536767829093, iteration: 84773
loss: 1.0138014554977417,grad_norm: 0.9999989886077687, iteration: 84774
loss: 0.9729771614074707,grad_norm: 0.8836904049000021, iteration: 84775
loss: 1.0336443185806274,grad_norm: 0.994785459873845, iteration: 84776
loss: 0.9786788821220398,grad_norm: 0.9999992846786052, iteration: 84777
loss: 0.9590738415718079,grad_norm: 0.9051114443068143, iteration: 84778
loss: 0.9646891355514526,grad_norm: 0.9999990774712627, iteration: 84779
loss: 1.001034140586853,grad_norm: 0.9420087559873823, iteration: 84780
loss: 1.0537031888961792,grad_norm: 0.9999992417092437, iteration: 84781
loss: 0.9693032503128052,grad_norm: 0.9288746006362856, iteration: 84782
loss: 0.9800699949264526,grad_norm: 0.869037568810333, iteration: 84783
loss: 1.0209095478057861,grad_norm: 0.9369128733311113, iteration: 84784
loss: 0.9942628145217896,grad_norm: 0.9999993139680606, iteration: 84785
loss: 1.0048389434814453,grad_norm: 0.9999989859984385, iteration: 84786
loss: 0.9950762987136841,grad_norm: 0.9999990734641916, iteration: 84787
loss: 1.0212327241897583,grad_norm: 0.999999814574814, iteration: 84788
loss: 1.0268994569778442,grad_norm: 0.9999997852652535, iteration: 84789
loss: 1.0052950382232666,grad_norm: 0.9999993092526336, iteration: 84790
loss: 0.9930434823036194,grad_norm: 0.9741316044286593, iteration: 84791
loss: 0.9272894263267517,grad_norm: 0.9999991693344366, iteration: 84792
loss: 1.007106900215149,grad_norm: 0.999999199740772, iteration: 84793
loss: 1.0294528007507324,grad_norm: 0.9999989207961053, iteration: 84794
loss: 1.0090032815933228,grad_norm: 0.9601235946230926, iteration: 84795
loss: 0.983855664730072,grad_norm: 0.9367634218345572, iteration: 84796
loss: 1.0478363037109375,grad_norm: 0.999999284313573, iteration: 84797
loss: 0.997639536857605,grad_norm: 0.982068715271698, iteration: 84798
loss: 1.0150272846221924,grad_norm: 0.8756451645993966, iteration: 84799
loss: 1.003149151802063,grad_norm: 0.9999989965118102, iteration: 84800
loss: 1.0319738388061523,grad_norm: 0.9904176772336699, iteration: 84801
loss: 1.0018103122711182,grad_norm: 0.9999991520364306, iteration: 84802
loss: 1.0387123823165894,grad_norm: 0.9999988631469829, iteration: 84803
loss: 1.027551531791687,grad_norm: 0.9999989023369842, iteration: 84804
loss: 1.0129036903381348,grad_norm: 0.8660934234956623, iteration: 84805
loss: 1.0311503410339355,grad_norm: 0.8988208772292657, iteration: 84806
loss: 1.0020357370376587,grad_norm: 0.9305014263872199, iteration: 84807
loss: 0.9947651624679565,grad_norm: 0.9999991129370799, iteration: 84808
loss: 1.0184450149536133,grad_norm: 0.9608609170790011, iteration: 84809
loss: 1.0153038501739502,grad_norm: 0.9999990750356322, iteration: 84810
loss: 1.005306601524353,grad_norm: 0.8470128123897905, iteration: 84811
loss: 1.0082167387008667,grad_norm: 0.9999994253812207, iteration: 84812
loss: 1.0087673664093018,grad_norm: 0.9999991573077309, iteration: 84813
loss: 1.0104670524597168,grad_norm: 0.9999996437237713, iteration: 84814
loss: 1.0494070053100586,grad_norm: 0.9999992015675524, iteration: 84815
loss: 0.9868084192276001,grad_norm: 0.9500264696006312, iteration: 84816
loss: 0.9899337887763977,grad_norm: 0.9908911347836639, iteration: 84817
loss: 1.0152803659439087,grad_norm: 0.9999997774925046, iteration: 84818
loss: 0.9949480891227722,grad_norm: 0.9999991201795643, iteration: 84819
loss: 0.9920951128005981,grad_norm: 0.9999990967624794, iteration: 84820
loss: 0.9847935438156128,grad_norm: 0.9559302469433841, iteration: 84821
loss: 0.9702898859977722,grad_norm: 0.9999991088513802, iteration: 84822
loss: 1.0118906497955322,grad_norm: 0.9999991856464694, iteration: 84823
loss: 0.9962216019630432,grad_norm: 0.9999991678784783, iteration: 84824
loss: 1.0101675987243652,grad_norm: 0.9966418426519184, iteration: 84825
loss: 0.9953812956809998,grad_norm: 0.9999991486445311, iteration: 84826
loss: 1.0015084743499756,grad_norm: 0.9999996712802948, iteration: 84827
loss: 1.021958827972412,grad_norm: 0.9999990497684516, iteration: 84828
loss: 0.9903209805488586,grad_norm: 0.9999990538771591, iteration: 84829
loss: 1.002083659172058,grad_norm: 0.9407378490514463, iteration: 84830
loss: 1.0231108665466309,grad_norm: 0.9611531985795856, iteration: 84831
loss: 0.9803026914596558,grad_norm: 0.9999991801928199, iteration: 84832
loss: 0.9892941117286682,grad_norm: 0.9958770470575359, iteration: 84833
loss: 1.0285924673080444,grad_norm: 0.999999216329838, iteration: 84834
loss: 0.9846790432929993,grad_norm: 0.9999989761157876, iteration: 84835
loss: 0.9856594204902649,grad_norm: 0.9999992165827472, iteration: 84836
loss: 1.0275213718414307,grad_norm: 0.8998599393555857, iteration: 84837
loss: 0.9947725534439087,grad_norm: 0.9999998906377674, iteration: 84838
loss: 1.0088400840759277,grad_norm: 0.9628794901545022, iteration: 84839
loss: 0.9875879287719727,grad_norm: 0.9999991407106504, iteration: 84840
loss: 1.0030592679977417,grad_norm: 0.846737970862332, iteration: 84841
loss: 0.9850471019744873,grad_norm: 0.9641832105394322, iteration: 84842
loss: 0.9596805572509766,grad_norm: 0.9779918983337575, iteration: 84843
loss: 1.0298413038253784,grad_norm: 0.9999994362748, iteration: 84844
loss: 0.9823889136314392,grad_norm: 0.9866537764145848, iteration: 84845
loss: 0.9874641299247742,grad_norm: 0.9999990706622334, iteration: 84846
loss: 0.9840338230133057,grad_norm: 0.8607512257981508, iteration: 84847
loss: 0.989409863948822,grad_norm: 0.8565230730477591, iteration: 84848
loss: 0.977550208568573,grad_norm: 0.9999998030680131, iteration: 84849
loss: 1.0013573169708252,grad_norm: 0.8855850485782452, iteration: 84850
loss: 1.0229471921920776,grad_norm: 0.9999991100306469, iteration: 84851
loss: 0.9863541722297668,grad_norm: 0.9524035741190667, iteration: 84852
loss: 0.9770275354385376,grad_norm: 0.8787920051191492, iteration: 84853
loss: 1.0139892101287842,grad_norm: 0.9999992294857537, iteration: 84854
loss: 1.1017881631851196,grad_norm: 0.9999992572036698, iteration: 84855
loss: 1.022697925567627,grad_norm: 0.9999991501508251, iteration: 84856
loss: 1.000560998916626,grad_norm: 0.9999992144906192, iteration: 84857
loss: 1.0085504055023193,grad_norm: 0.9999990817090225, iteration: 84858
loss: 0.9948910474777222,grad_norm: 0.9999991458533557, iteration: 84859
loss: 0.9532904624938965,grad_norm: 0.9999991724750523, iteration: 84860
loss: 0.9716033339500427,grad_norm: 0.9999991824403999, iteration: 84861
loss: 1.003033995628357,grad_norm: 0.9999990743316868, iteration: 84862
loss: 1.0157743692398071,grad_norm: 0.999999068227492, iteration: 84863
loss: 1.010095477104187,grad_norm: 0.937659270766529, iteration: 84864
loss: 1.031234860420227,grad_norm: 0.9999991635696578, iteration: 84865
loss: 0.9939596652984619,grad_norm: 0.9999990531330885, iteration: 84866
loss: 1.0237292051315308,grad_norm: 0.9217696929784958, iteration: 84867
loss: 0.9901139736175537,grad_norm: 0.9999991501257337, iteration: 84868
loss: 0.9848614931106567,grad_norm: 0.9999991364682329, iteration: 84869
loss: 0.9880526661872864,grad_norm: 0.9999988767137651, iteration: 84870
loss: 0.9667794108390808,grad_norm: 0.9999990119243646, iteration: 84871
loss: 0.9689882397651672,grad_norm: 0.9999990321530838, iteration: 84872
loss: 1.0086288452148438,grad_norm: 0.9999992129659653, iteration: 84873
loss: 1.0117924213409424,grad_norm: 0.9842591200904349, iteration: 84874
loss: 1.0524243116378784,grad_norm: 0.9999991501473305, iteration: 84875
loss: 0.9870217442512512,grad_norm: 0.9999992871817903, iteration: 84876
loss: 1.0103827714920044,grad_norm: 0.9999996181053956, iteration: 84877
loss: 1.01163911819458,grad_norm: 0.9999990139654941, iteration: 84878
loss: 1.0523568391799927,grad_norm: 0.9999995430728845, iteration: 84879
loss: 0.989266574382782,grad_norm: 0.9999990676473158, iteration: 84880
loss: 1.003753423690796,grad_norm: 0.9368163145726264, iteration: 84881
loss: 1.0282697677612305,grad_norm: 0.9999989320209677, iteration: 84882
loss: 0.9812539219856262,grad_norm: 0.9999991096074012, iteration: 84883
loss: 0.9935348629951477,grad_norm: 0.7966169149852306, iteration: 84884
loss: 1.001694917678833,grad_norm: 0.9394986263307898, iteration: 84885
loss: 0.9940754771232605,grad_norm: 0.9999995275307396, iteration: 84886
loss: 1.0097603797912598,grad_norm: 0.99999897115101, iteration: 84887
loss: 1.0966769456863403,grad_norm: 0.9999995898117008, iteration: 84888
loss: 1.01169753074646,grad_norm: 0.9999995966769637, iteration: 84889
loss: 1.0111027956008911,grad_norm: 0.9999990836603314, iteration: 84890
loss: 1.0176125764846802,grad_norm: 0.9330895362323829, iteration: 84891
loss: 0.9809001088142395,grad_norm: 0.9999992482164675, iteration: 84892
loss: 1.0118227005004883,grad_norm: 0.9999990671570413, iteration: 84893
loss: 1.0047801733016968,grad_norm: 0.9999993270019629, iteration: 84894
loss: 1.0210098028182983,grad_norm: 0.9999990681418903, iteration: 84895
loss: 0.9766256809234619,grad_norm: 0.9999992917375725, iteration: 84896
loss: 1.075013279914856,grad_norm: 0.9999992682535724, iteration: 84897
loss: 1.0033447742462158,grad_norm: 0.9999992270037936, iteration: 84898
loss: 1.0009734630584717,grad_norm: 0.9999991043557481, iteration: 84899
loss: 1.008644461631775,grad_norm: 0.999999235767626, iteration: 84900
loss: 0.9975476264953613,grad_norm: 0.9999991065687349, iteration: 84901
loss: 0.9973936080932617,grad_norm: 0.9999991795459574, iteration: 84902
loss: 0.9911336302757263,grad_norm: 0.9994638089301779, iteration: 84903
loss: 1.002018928527832,grad_norm: 0.999999322991674, iteration: 84904
loss: 1.0193111896514893,grad_norm: 0.9999991386225535, iteration: 84905
loss: 1.0040884017944336,grad_norm: 0.9999991095770403, iteration: 84906
loss: 1.0187962055206299,grad_norm: 0.9999997429666753, iteration: 84907
loss: 1.0091936588287354,grad_norm: 0.9999996661926567, iteration: 84908
loss: 1.0303142070770264,grad_norm: 0.9999991386338859, iteration: 84909
loss: 1.0107364654541016,grad_norm: 0.9999990922544842, iteration: 84910
loss: 0.9974368214607239,grad_norm: 0.9350453318882633, iteration: 84911
loss: 0.9713322520256042,grad_norm: 0.9999992585094072, iteration: 84912
loss: 1.057252287864685,grad_norm: 0.999999211198467, iteration: 84913
loss: 1.000834584236145,grad_norm: 0.9999990792588944, iteration: 84914
loss: 0.986058235168457,grad_norm: 0.9999992985594024, iteration: 84915
loss: 1.144945740699768,grad_norm: 0.9999996735067838, iteration: 84916
loss: 0.9523494839668274,grad_norm: 0.9493828768861433, iteration: 84917
loss: 1.0029807090759277,grad_norm: 0.8960667656490023, iteration: 84918
loss: 1.0049810409545898,grad_norm: 0.9999992902386461, iteration: 84919
loss: 1.0108691453933716,grad_norm: 0.9999989161849325, iteration: 84920
loss: 1.0453044176101685,grad_norm: 0.9999990240913641, iteration: 84921
loss: 1.031559944152832,grad_norm: 0.9999991909669473, iteration: 84922
loss: 0.9513075947761536,grad_norm: 0.8925450721142189, iteration: 84923
loss: 0.9661785960197449,grad_norm: 0.999999207086379, iteration: 84924
loss: 1.0048117637634277,grad_norm: 0.9999991846194396, iteration: 84925
loss: 1.0169014930725098,grad_norm: 0.9547106747524857, iteration: 84926
loss: 0.9646969437599182,grad_norm: 0.951065503807313, iteration: 84927
loss: 1.0236923694610596,grad_norm: 0.9999993216836857, iteration: 84928
loss: 1.0138053894042969,grad_norm: 0.9139461048467993, iteration: 84929
loss: 1.0167039632797241,grad_norm: 0.999999082744182, iteration: 84930
loss: 0.9862068891525269,grad_norm: 0.9264730743190183, iteration: 84931
loss: 0.9800083041191101,grad_norm: 0.9999991535589332, iteration: 84932
loss: 0.9985281825065613,grad_norm: 0.9999991333580374, iteration: 84933
loss: 1.0013954639434814,grad_norm: 0.9999991255639585, iteration: 84934
loss: 0.949252188205719,grad_norm: 0.9999991065303917, iteration: 84935
loss: 1.0150943994522095,grad_norm: 0.9718952730555548, iteration: 84936
loss: 1.015053391456604,grad_norm: 0.9988012978184904, iteration: 84937
loss: 0.9770161509513855,grad_norm: 0.9085420594420924, iteration: 84938
loss: 1.0562012195587158,grad_norm: 0.9999991540561556, iteration: 84939
loss: 0.9930978417396545,grad_norm: 0.9508647546647235, iteration: 84940
loss: 1.0419089794158936,grad_norm: 0.9999997198118679, iteration: 84941
loss: 1.0256727933883667,grad_norm: 0.999999070427256, iteration: 84942
loss: 1.0026150941848755,grad_norm: 0.9999991042409485, iteration: 84943
loss: 1.0017963647842407,grad_norm: 0.9999991366691265, iteration: 84944
loss: 0.9968189001083374,grad_norm: 0.9999992617824465, iteration: 84945
loss: 1.0778003931045532,grad_norm: 0.9999998858279453, iteration: 84946
loss: 1.0398954153060913,grad_norm: 0.9999990306670649, iteration: 84947
loss: 1.0058807134628296,grad_norm: 0.9999991665291803, iteration: 84948
loss: 1.0113134384155273,grad_norm: 0.9999990990251144, iteration: 84949
loss: 0.9879570007324219,grad_norm: 0.9999991052652588, iteration: 84950
loss: 1.015621304512024,grad_norm: 0.9999992228719776, iteration: 84951
loss: 1.0091527700424194,grad_norm: 0.9999993218356056, iteration: 84952
loss: 0.9626096487045288,grad_norm: 0.9999989936200671, iteration: 84953
loss: 0.9947099685668945,grad_norm: 0.9772628385183593, iteration: 84954
loss: 1.0024837255477905,grad_norm: 0.9999992343649783, iteration: 84955
loss: 0.9592054486274719,grad_norm: 0.9999991909682684, iteration: 84956
loss: 1.0118541717529297,grad_norm: 0.9999990537272317, iteration: 84957
loss: 1.0292097330093384,grad_norm: 0.9999994000685121, iteration: 84958
loss: 1.014058232307434,grad_norm: 0.9827009412569666, iteration: 84959
loss: 1.0110414028167725,grad_norm: 0.7906538740996345, iteration: 84960
loss: 1.0528966188430786,grad_norm: 0.9999992470807302, iteration: 84961
loss: 1.0200505256652832,grad_norm: 0.8814371994433589, iteration: 84962
loss: 1.0124554634094238,grad_norm: 0.8362611318477812, iteration: 84963
loss: 0.9792212247848511,grad_norm: 0.8717920681744034, iteration: 84964
loss: 1.0057952404022217,grad_norm: 0.8919305443579376, iteration: 84965
loss: 1.0111690759658813,grad_norm: 0.9999992938529327, iteration: 84966
loss: 0.9760054349899292,grad_norm: 0.9999991783078299, iteration: 84967
loss: 0.9573236703872681,grad_norm: 0.9399035447653085, iteration: 84968
loss: 0.9983495473861694,grad_norm: 0.9999992437872739, iteration: 84969
loss: 0.9852895736694336,grad_norm: 0.9999993720779861, iteration: 84970
loss: 0.9831811189651489,grad_norm: 0.9999990982387384, iteration: 84971
loss: 0.9896098971366882,grad_norm: 0.9999993377882005, iteration: 84972
loss: 0.9945533871650696,grad_norm: 0.9999992179664848, iteration: 84973
loss: 1.0000486373901367,grad_norm: 0.8619872954853975, iteration: 84974
loss: 0.9976514577865601,grad_norm: 0.9999989518886969, iteration: 84975
loss: 0.9650927782058716,grad_norm: 0.9920700908082754, iteration: 84976
loss: 0.9822291135787964,grad_norm: 0.9999991309298742, iteration: 84977
loss: 1.015492558479309,grad_norm: 0.999998928803408, iteration: 84978
loss: 1.0218111276626587,grad_norm: 0.9437398264909234, iteration: 84979
loss: 1.0071358680725098,grad_norm: 0.9817093154690907, iteration: 84980
loss: 1.0103527307510376,grad_norm: 0.9848821779441156, iteration: 84981
loss: 1.0129351615905762,grad_norm: 0.9999991070883374, iteration: 84982
loss: 1.012282133102417,grad_norm: 0.9999994858987558, iteration: 84983
loss: 1.0044978857040405,grad_norm: 0.9999991119447166, iteration: 84984
loss: 1.0040837526321411,grad_norm: 0.9787970865293147, iteration: 84985
loss: 0.9889672994613647,grad_norm: 0.9999992818365753, iteration: 84986
loss: 1.0137875080108643,grad_norm: 0.9916813686758409, iteration: 84987
loss: 1.0128434896469116,grad_norm: 0.9985986517930888, iteration: 84988
loss: 0.9928128719329834,grad_norm: 0.9999991495854995, iteration: 84989
loss: 0.9987714886665344,grad_norm: 0.9999990240243505, iteration: 84990
loss: 1.0052752494812012,grad_norm: 0.9999996240112505, iteration: 84991
loss: 1.01833176612854,grad_norm: 0.9999992559190631, iteration: 84992
loss: 0.9919085502624512,grad_norm: 0.9206773349408217, iteration: 84993
loss: 0.9957262873649597,grad_norm: 0.9999990428821548, iteration: 84994
loss: 0.9635596871376038,grad_norm: 0.9999992442537203, iteration: 84995
loss: 1.0510144233703613,grad_norm: 0.9999991941537054, iteration: 84996
loss: 1.0277131795883179,grad_norm: 0.8885324201606946, iteration: 84997
loss: 0.9778540134429932,grad_norm: 0.9999990527938332, iteration: 84998
loss: 1.019871473312378,grad_norm: 0.9999990694716211, iteration: 84999
loss: 1.0110039710998535,grad_norm: 0.8940972754229475, iteration: 85000
loss: 1.115364670753479,grad_norm: 0.9999996622296252, iteration: 85001
loss: 0.9861522316932678,grad_norm: 0.9999992278664002, iteration: 85002
loss: 1.0612908601760864,grad_norm: 0.9999991087243563, iteration: 85003
loss: 1.0107920169830322,grad_norm: 0.9460915080731441, iteration: 85004
loss: 1.010221242904663,grad_norm: 0.9999991069769087, iteration: 85005
loss: 0.9755550622940063,grad_norm: 0.9112136306494659, iteration: 85006
loss: 0.9587425589561462,grad_norm: 0.9999990952999904, iteration: 85007
loss: 1.0085885524749756,grad_norm: 0.854897393453727, iteration: 85008
loss: 0.9792234301567078,grad_norm: 0.9999992826268943, iteration: 85009
loss: 0.9870848655700684,grad_norm: 0.9999990204077834, iteration: 85010
loss: 1.035606026649475,grad_norm: 0.8764101651153525, iteration: 85011
loss: 0.9658454656600952,grad_norm: 0.999999082396073, iteration: 85012
loss: 0.9419728517532349,grad_norm: 0.8690374926971574, iteration: 85013
loss: 1.0163443088531494,grad_norm: 0.9825777102504248, iteration: 85014
loss: 0.9957735538482666,grad_norm: 0.9999992143158715, iteration: 85015
loss: 1.0216846466064453,grad_norm: 0.9999992923149162, iteration: 85016
loss: 1.0776251554489136,grad_norm: 0.999999528450946, iteration: 85017
loss: 0.9654014110565186,grad_norm: 0.9999989779946972, iteration: 85018
loss: 1.0011855363845825,grad_norm: 0.9228288003388229, iteration: 85019
loss: 1.0564206838607788,grad_norm: 0.999999847866578, iteration: 85020
loss: 1.0063711404800415,grad_norm: 0.9999991358005232, iteration: 85021
loss: 1.0687669515609741,grad_norm: 0.9999998037078038, iteration: 85022
loss: 0.9832676649093628,grad_norm: 0.9999991646936233, iteration: 85023
loss: 1.0119624137878418,grad_norm: 0.9510597779887235, iteration: 85024
loss: 0.9861797094345093,grad_norm: 0.999999097660131, iteration: 85025
loss: 0.9854391813278198,grad_norm: 0.9552900511519873, iteration: 85026
loss: 0.995183527469635,grad_norm: 0.9999992312692678, iteration: 85027
loss: 0.985566258430481,grad_norm: 0.9999990730686978, iteration: 85028
loss: 1.0228271484375,grad_norm: 0.9252905521203342, iteration: 85029
loss: 1.005806565284729,grad_norm: 0.9999989764790128, iteration: 85030
loss: 1.0200324058532715,grad_norm: 0.9999992137112346, iteration: 85031
loss: 0.9704182147979736,grad_norm: 0.9999990864699583, iteration: 85032
loss: 1.0105888843536377,grad_norm: 0.9999991483563571, iteration: 85033
loss: 0.9643403887748718,grad_norm: 0.9999991643768892, iteration: 85034
loss: 0.9761795401573181,grad_norm: 0.999999238151043, iteration: 85035
loss: 1.0208520889282227,grad_norm: 0.9999991806342059, iteration: 85036
loss: 1.016481637954712,grad_norm: 0.9623969363670585, iteration: 85037
loss: 1.0268467664718628,grad_norm: 0.9552818859252938, iteration: 85038
loss: 1.0093055963516235,grad_norm: 0.9999991209402221, iteration: 85039
loss: 0.9856441020965576,grad_norm: 0.9999991430210072, iteration: 85040
loss: 0.9753932356834412,grad_norm: 0.9951587861062796, iteration: 85041
loss: 0.9995023012161255,grad_norm: 0.9999991243575183, iteration: 85042
loss: 0.9791660904884338,grad_norm: 0.9958222531072657, iteration: 85043
loss: 0.9880247116088867,grad_norm: 0.9999991630859364, iteration: 85044
loss: 1.0185565948486328,grad_norm: 0.9999992549071645, iteration: 85045
loss: 1.012426733970642,grad_norm: 0.999999097014817, iteration: 85046
loss: 1.0335131883621216,grad_norm: 0.9999995768606067, iteration: 85047
loss: 1.0289369821548462,grad_norm: 0.9999991568749936, iteration: 85048
loss: 0.9733073711395264,grad_norm: 0.9999991284023564, iteration: 85049
loss: 0.9816107749938965,grad_norm: 0.9109571119834716, iteration: 85050
loss: 1.008267879486084,grad_norm: 0.9754844624246756, iteration: 85051
loss: 0.9883029460906982,grad_norm: 0.9444501947368295, iteration: 85052
loss: 1.0093848705291748,grad_norm: 0.9409768764948084, iteration: 85053
loss: 0.9962073564529419,grad_norm: 0.9999990283942249, iteration: 85054
loss: 0.9551658034324646,grad_norm: 0.9999992377091064, iteration: 85055
loss: 0.9952374696731567,grad_norm: 0.9999989248589867, iteration: 85056
loss: 0.993913471698761,grad_norm: 0.9999992164302981, iteration: 85057
loss: 0.9703108668327332,grad_norm: 0.9999990919945887, iteration: 85058
loss: 0.9626920223236084,grad_norm: 0.9999991855722137, iteration: 85059
loss: 1.0423550605773926,grad_norm: 0.9999994805667912, iteration: 85060
loss: 1.043798804283142,grad_norm: 0.9999991258225728, iteration: 85061
loss: 0.964652955532074,grad_norm: 0.9999992730226428, iteration: 85062
loss: 0.9983958005905151,grad_norm: 0.9999992425005084, iteration: 85063
loss: 1.0084385871887207,grad_norm: 0.9999992722305323, iteration: 85064
loss: 0.9772300720214844,grad_norm: 0.9787622307460387, iteration: 85065
loss: 1.0208029747009277,grad_norm: 0.9863559505546727, iteration: 85066
loss: 0.9991962909698486,grad_norm: 0.9648223222603418, iteration: 85067
loss: 0.9851982593536377,grad_norm: 0.9999991127606568, iteration: 85068
loss: 0.9865712523460388,grad_norm: 0.9999992103539508, iteration: 85069
loss: 0.9671171307563782,grad_norm: 0.999999049406095, iteration: 85070
loss: 1.0030688047409058,grad_norm: 0.9999990937446107, iteration: 85071
loss: 0.993581235408783,grad_norm: 0.9003060587631796, iteration: 85072
loss: 0.9622000455856323,grad_norm: 0.9999993164696372, iteration: 85073
loss: 1.0862064361572266,grad_norm: 0.9999996048307199, iteration: 85074
loss: 1.0220469236373901,grad_norm: 0.9999991670418368, iteration: 85075
loss: 1.0182914733886719,grad_norm: 0.9999991789604675, iteration: 85076
loss: 0.984510064125061,grad_norm: 0.999999177913308, iteration: 85077
loss: 0.9511818289756775,grad_norm: 0.9999990872451473, iteration: 85078
loss: 0.9962878227233887,grad_norm: 0.9999990442050639, iteration: 85079
loss: 1.0012257099151611,grad_norm: 0.955493742361736, iteration: 85080
loss: 1.0025776624679565,grad_norm: 0.9999989464782664, iteration: 85081
loss: 0.9838769435882568,grad_norm: 0.999999151333577, iteration: 85082
loss: 0.9730132222175598,grad_norm: 0.9999990365730098, iteration: 85083
loss: 0.9386646747589111,grad_norm: 0.9537000909069598, iteration: 85084
loss: 0.9803438782691956,grad_norm: 0.999999084816083, iteration: 85085
loss: 0.9915314316749573,grad_norm: 0.9234491849882506, iteration: 85086
loss: 1.002094030380249,grad_norm: 0.9416664976518586, iteration: 85087
loss: 0.9720687866210938,grad_norm: 0.9999991431653348, iteration: 85088
loss: 1.0159615278244019,grad_norm: 0.9999993419163989, iteration: 85089
loss: 0.9736813902854919,grad_norm: 0.9999992213447566, iteration: 85090
loss: 0.9588807225227356,grad_norm: 0.9999994612035301, iteration: 85091
loss: 1.0035518407821655,grad_norm: 0.999999070466502, iteration: 85092
loss: 0.9735320210456848,grad_norm: 0.9999990506551375, iteration: 85093
loss: 0.9573840498924255,grad_norm: 0.9999992273470116, iteration: 85094
loss: 1.0110180377960205,grad_norm: 0.9999992812861672, iteration: 85095
loss: 1.0039695501327515,grad_norm: 0.9999990108185134, iteration: 85096
loss: 1.0475668907165527,grad_norm: 0.9999991211229224, iteration: 85097
loss: 0.9875816106796265,grad_norm: 0.9999991442826887, iteration: 85098
loss: 0.9582864046096802,grad_norm: 0.9999990849442097, iteration: 85099
loss: 0.98116534948349,grad_norm: 0.9999990338005115, iteration: 85100
loss: 1.0225868225097656,grad_norm: 0.9355899721159086, iteration: 85101
loss: 1.0029921531677246,grad_norm: 0.949601996749186, iteration: 85102
loss: 1.018229365348816,grad_norm: 0.9999991908570158, iteration: 85103
loss: 0.9693425893783569,grad_norm: 0.999999146539649, iteration: 85104
loss: 1.0231026411056519,grad_norm: 0.9980491783739907, iteration: 85105
loss: 1.0149245262145996,grad_norm: 0.9999993795707006, iteration: 85106
loss: 1.0470107793807983,grad_norm: 0.9150596571526549, iteration: 85107
loss: 0.9973565936088562,grad_norm: 0.9999988332418124, iteration: 85108
loss: 1.007371425628662,grad_norm: 0.9999991324311491, iteration: 85109
loss: 0.9882326126098633,grad_norm: 0.9999989768249717, iteration: 85110
loss: 0.9742512702941895,grad_norm: 0.999999036365158, iteration: 85111
loss: 1.0101898908615112,grad_norm: 0.9999991178876534, iteration: 85112
loss: 0.9943861365318298,grad_norm: 0.908772384687845, iteration: 85113
loss: 1.0555055141448975,grad_norm: 0.9999998659561165, iteration: 85114
loss: 0.9622900485992432,grad_norm: 0.9999989741709977, iteration: 85115
loss: 1.0454847812652588,grad_norm: 0.9999994198765869, iteration: 85116
loss: 0.959162175655365,grad_norm: 0.9223465109343437, iteration: 85117
loss: 1.0164417028427124,grad_norm: 0.9191299334297112, iteration: 85118
loss: 0.9881845712661743,grad_norm: 0.999999052414369, iteration: 85119
loss: 0.9727570414543152,grad_norm: 0.9999990931675707, iteration: 85120
loss: 1.016811490058899,grad_norm: 0.9999999267657438, iteration: 85121
loss: 1.009777545928955,grad_norm: 0.9999992884337054, iteration: 85122
loss: 1.0115431547164917,grad_norm: 0.9999990563773552, iteration: 85123
loss: 1.0252000093460083,grad_norm: 0.9999991006349475, iteration: 85124
loss: 0.9841468334197998,grad_norm: 0.9999990969449507, iteration: 85125
loss: 0.9732828140258789,grad_norm: 0.9999989265252399, iteration: 85126
loss: 0.9901226162910461,grad_norm: 0.9999992671269846, iteration: 85127
loss: 1.034879446029663,grad_norm: 0.9999991336882795, iteration: 85128
loss: 0.9698315858840942,grad_norm: 0.9992591020196929, iteration: 85129
loss: 0.9823626279830933,grad_norm: 0.9999991766601788, iteration: 85130
loss: 1.013628363609314,grad_norm: 0.9668450200250823, iteration: 85131
loss: 0.9729723334312439,grad_norm: 0.9409425790367376, iteration: 85132
loss: 1.0019562244415283,grad_norm: 0.9246959475645017, iteration: 85133
loss: 0.9803371429443359,grad_norm: 0.9999990650868978, iteration: 85134
loss: 0.9882550239562988,grad_norm: 0.9999990753313444, iteration: 85135
loss: 1.0288288593292236,grad_norm: 0.9999991559332964, iteration: 85136
loss: 0.9602146148681641,grad_norm: 0.884408520615383, iteration: 85137
loss: 0.99833083152771,grad_norm: 0.9999991475508069, iteration: 85138
loss: 1.0104948282241821,grad_norm: 0.9999990032067454, iteration: 85139
loss: 1.0280355215072632,grad_norm: 0.9818297306238601, iteration: 85140
loss: 1.0231750011444092,grad_norm: 0.9645771814998534, iteration: 85141
loss: 1.0016132593154907,grad_norm: 0.9999991011887532, iteration: 85142
loss: 0.9893417358398438,grad_norm: 0.9999991081393605, iteration: 85143
loss: 1.017195224761963,grad_norm: 0.9999990424038557, iteration: 85144
loss: 1.0994573831558228,grad_norm: 0.9999991001352243, iteration: 85145
loss: 0.974632978439331,grad_norm: 0.9999993382194695, iteration: 85146
loss: 1.0346877574920654,grad_norm: 0.9999990900726898, iteration: 85147
loss: 0.9850141406059265,grad_norm: 0.8653982511034981, iteration: 85148
loss: 0.9715280532836914,grad_norm: 0.9999991744452904, iteration: 85149
loss: 0.9773540496826172,grad_norm: 0.952334509606596, iteration: 85150
loss: 0.9782214760780334,grad_norm: 0.9999992147975185, iteration: 85151
loss: 1.0092884302139282,grad_norm: 0.9773627696513618, iteration: 85152
loss: 1.0044074058532715,grad_norm: 0.9997251841348995, iteration: 85153
loss: 1.0065436363220215,grad_norm: 0.999999658317179, iteration: 85154
loss: 1.0141644477844238,grad_norm: 0.9999990249201555, iteration: 85155
loss: 0.9795993566513062,grad_norm: 0.9999990519718575, iteration: 85156
loss: 0.9620254635810852,grad_norm: 0.9170519839131638, iteration: 85157
loss: 0.9635124802589417,grad_norm: 0.992984586217225, iteration: 85158
loss: 0.9875471591949463,grad_norm: 0.8461441937792807, iteration: 85159
loss: 1.0295368432998657,grad_norm: 0.9999992154995024, iteration: 85160
loss: 0.9701617360115051,grad_norm: 0.999999056918679, iteration: 85161
loss: 1.0243585109710693,grad_norm: 0.999999132071241, iteration: 85162
loss: 0.9688960313796997,grad_norm: 0.9999992381993085, iteration: 85163
loss: 1.006434679031372,grad_norm: 0.9919307359144149, iteration: 85164
loss: 0.9606348872184753,grad_norm: 0.9999990680822382, iteration: 85165
loss: 0.9857611060142517,grad_norm: 0.9999991072677314, iteration: 85166
loss: 1.0003852844238281,grad_norm: 0.961133170571834, iteration: 85167
loss: 1.0293822288513184,grad_norm: 0.9999991898927312, iteration: 85168
loss: 1.0389080047607422,grad_norm: 0.9999991435156834, iteration: 85169
loss: 0.9824588894844055,grad_norm: 0.9999993788376845, iteration: 85170
loss: 0.9955065250396729,grad_norm: 0.9999991651402708, iteration: 85171
loss: 0.9483250975608826,grad_norm: 0.999999218754178, iteration: 85172
loss: 0.9929175972938538,grad_norm: 0.9455940817294959, iteration: 85173
loss: 1.0215723514556885,grad_norm: 0.9999991599798775, iteration: 85174
loss: 1.0141098499298096,grad_norm: 0.9999991014734981, iteration: 85175
loss: 1.031453251838684,grad_norm: 0.9999991836981975, iteration: 85176
loss: 0.976870059967041,grad_norm: 0.9832734900148223, iteration: 85177
loss: 1.0163394212722778,grad_norm: 0.9051974851546359, iteration: 85178
loss: 1.0106645822525024,grad_norm: 0.9338250917315772, iteration: 85179
loss: 0.9997699856758118,grad_norm: 0.9999991218590965, iteration: 85180
loss: 0.9688100814819336,grad_norm: 0.9999992460954636, iteration: 85181
loss: 1.0207512378692627,grad_norm: 0.9999990964111254, iteration: 85182
loss: 1.0078158378601074,grad_norm: 0.9999991964041203, iteration: 85183
loss: 1.060318946838379,grad_norm: 0.9999993557156106, iteration: 85184
loss: 1.0352253913879395,grad_norm: 0.9999991783230123, iteration: 85185
loss: 1.0394827127456665,grad_norm: 0.9999996317108749, iteration: 85186
loss: 1.0017622709274292,grad_norm: 0.9841738115207492, iteration: 85187
loss: 0.9725795984268188,grad_norm: 0.9800214165507756, iteration: 85188
loss: 1.0325007438659668,grad_norm: 0.9856628496304997, iteration: 85189
loss: 1.0092453956604004,grad_norm: 0.9999992063037698, iteration: 85190
loss: 1.0011725425720215,grad_norm: 0.999999444858879, iteration: 85191
loss: 0.9713472723960876,grad_norm: 0.9999992556273547, iteration: 85192
loss: 0.9949662685394287,grad_norm: 0.999999130856909, iteration: 85193
loss: 1.028339147567749,grad_norm: 0.9645707416360075, iteration: 85194
loss: 1.0048316717147827,grad_norm: 0.9891487443098259, iteration: 85195
loss: 0.9881399273872375,grad_norm: 0.9999991558480189, iteration: 85196
loss: 0.9933724403381348,grad_norm: 0.8874414992674813, iteration: 85197
loss: 0.981942892074585,grad_norm: 0.9999989713166735, iteration: 85198
loss: 1.013579249382019,grad_norm: 0.9999991289728972, iteration: 85199
loss: 1.0301414728164673,grad_norm: 0.9999990779151197, iteration: 85200
loss: 1.0246562957763672,grad_norm: 0.958658210631492, iteration: 85201
loss: 0.9992235898971558,grad_norm: 0.9999991233364428, iteration: 85202
loss: 1.0083879232406616,grad_norm: 0.9999992772593972, iteration: 85203
loss: 1.014111876487732,grad_norm: 0.9999992008220915, iteration: 85204
loss: 0.9924726486206055,grad_norm: 0.9999997203556301, iteration: 85205
loss: 0.9660859704017639,grad_norm: 0.9205667250502239, iteration: 85206
loss: 0.9757823348045349,grad_norm: 0.9999993127899314, iteration: 85207
loss: 1.011933445930481,grad_norm: 0.9999992349654946, iteration: 85208
loss: 0.99843829870224,grad_norm: 0.9999997265266123, iteration: 85209
loss: 1.0185681581497192,grad_norm: 0.9999990650714734, iteration: 85210
loss: 1.0153863430023193,grad_norm: 0.999999200985696, iteration: 85211
loss: 0.9924988746643066,grad_norm: 0.9999994660664381, iteration: 85212
loss: 1.000868320465088,grad_norm: 0.9999991416429881, iteration: 85213
loss: 0.9752560257911682,grad_norm: 0.9999993311543257, iteration: 85214
loss: 1.0016998052597046,grad_norm: 0.897041835447578, iteration: 85215
loss: 1.0488531589508057,grad_norm: 0.9999999264013858, iteration: 85216
loss: 1.0154402256011963,grad_norm: 0.9999991598561744, iteration: 85217
loss: 0.9859220385551453,grad_norm: 0.9999990343211935, iteration: 85218
loss: 1.0245418548583984,grad_norm: 0.9999991503183641, iteration: 85219
loss: 0.9918044209480286,grad_norm: 0.9999990064474894, iteration: 85220
loss: 1.0380827188491821,grad_norm: 0.9483433448062987, iteration: 85221
loss: 1.022390365600586,grad_norm: 0.9999991100624597, iteration: 85222
loss: 1.0466570854187012,grad_norm: 0.9999991731292576, iteration: 85223
loss: 0.9943549633026123,grad_norm: 0.999999435562963, iteration: 85224
loss: 1.071821689605713,grad_norm: 0.9999992282818171, iteration: 85225
loss: 0.9816170334815979,grad_norm: 0.9999990733399071, iteration: 85226
loss: 1.019237995147705,grad_norm: 0.970090915695398, iteration: 85227
loss: 1.0074971914291382,grad_norm: 0.9036497186760641, iteration: 85228
loss: 0.9744232296943665,grad_norm: 0.9397331469837022, iteration: 85229
loss: 0.9910563230514526,grad_norm: 0.8995690329346535, iteration: 85230
loss: 1.014110803604126,grad_norm: 0.9643933332678597, iteration: 85231
loss: 1.014288067817688,grad_norm: 0.8833617675345137, iteration: 85232
loss: 1.022723913192749,grad_norm: 0.8986938024297032, iteration: 85233
loss: 1.023438572883606,grad_norm: 0.9999993690052167, iteration: 85234
loss: 1.0331964492797852,grad_norm: 0.9999992602626792, iteration: 85235
loss: 0.9830981492996216,grad_norm: 0.9999990665611849, iteration: 85236
loss: 0.990881085395813,grad_norm: 0.9999990615190072, iteration: 85237
loss: 1.0119282007217407,grad_norm: 0.9600991527678793, iteration: 85238
loss: 1.0606093406677246,grad_norm: 0.9999991283295303, iteration: 85239
loss: 1.0116682052612305,grad_norm: 0.999999150251405, iteration: 85240
loss: 0.9834259748458862,grad_norm: 0.9999990548891593, iteration: 85241
loss: 1.0085960626602173,grad_norm: 0.9466841846582799, iteration: 85242
loss: 1.0315386056900024,grad_norm: 0.9999995709982707, iteration: 85243
loss: 0.9684082269668579,grad_norm: 0.9217845055976636, iteration: 85244
loss: 1.0188989639282227,grad_norm: 0.9999990182262279, iteration: 85245
loss: 1.0164568424224854,grad_norm: 0.9805869990438018, iteration: 85246
loss: 0.9367771148681641,grad_norm: 0.9999991712916424, iteration: 85247
loss: 1.0395954847335815,grad_norm: 0.9986650013775286, iteration: 85248
loss: 1.0054676532745361,grad_norm: 0.9999991392546287, iteration: 85249
loss: 0.9835224151611328,grad_norm: 0.9759764769143456, iteration: 85250
loss: 1.0247684717178345,grad_norm: 0.9999990596347087, iteration: 85251
loss: 1.016304612159729,grad_norm: 0.9338693020477837, iteration: 85252
loss: 0.9757903218269348,grad_norm: 0.9999990910639092, iteration: 85253
loss: 0.9741056561470032,grad_norm: 0.9999991293326742, iteration: 85254
loss: 0.9895590543746948,grad_norm: 0.9999991214524286, iteration: 85255
loss: 1.0418217182159424,grad_norm: 0.9999991583593443, iteration: 85256
loss: 1.0102351903915405,grad_norm: 0.8095719144255611, iteration: 85257
loss: 1.0380357503890991,grad_norm: 0.9999994051257499, iteration: 85258
loss: 0.9688963890075684,grad_norm: 0.9999991774799968, iteration: 85259
loss: 1.006477952003479,grad_norm: 0.9016541952157708, iteration: 85260
loss: 1.0070991516113281,grad_norm: 0.999999761664519, iteration: 85261
loss: 1.0171259641647339,grad_norm: 0.9999990451670757, iteration: 85262
loss: 0.9779820442199707,grad_norm: 0.9999990726101746, iteration: 85263
loss: 1.0140241384506226,grad_norm: 0.999999197263954, iteration: 85264
loss: 1.0406779050827026,grad_norm: 0.9999992438800769, iteration: 85265
loss: 0.9904681444168091,grad_norm: 0.9999990157462957, iteration: 85266
loss: 0.9900980591773987,grad_norm: 0.9447843459743774, iteration: 85267
loss: 0.9985058307647705,grad_norm: 0.9047150627496207, iteration: 85268
loss: 0.977037250995636,grad_norm: 0.8858186546776023, iteration: 85269
loss: 1.053749680519104,grad_norm: 0.9999993983531951, iteration: 85270
loss: 1.0577116012573242,grad_norm: 0.9999991812993645, iteration: 85271
loss: 0.9887837767601013,grad_norm: 0.9926435478197436, iteration: 85272
loss: 0.977544367313385,grad_norm: 0.9999992579816808, iteration: 85273
loss: 0.9991172552108765,grad_norm: 0.9999993046441354, iteration: 85274
loss: 1.0028899908065796,grad_norm: 0.9999991230692504, iteration: 85275
loss: 0.9657111763954163,grad_norm: 0.9478535427911797, iteration: 85276
loss: 0.9793877005577087,grad_norm: 0.8682977349585376, iteration: 85277
loss: 1.009540319442749,grad_norm: 0.9691622767443079, iteration: 85278
loss: 0.9840270280838013,grad_norm: 0.9934021191073829, iteration: 85279
loss: 0.9936049580574036,grad_norm: 0.8288511899356753, iteration: 85280
loss: 0.9924353957176208,grad_norm: 0.9999992238228954, iteration: 85281
loss: 1.013497233390808,grad_norm: 0.9999992619549868, iteration: 85282
loss: 0.96368807554245,grad_norm: 0.999999180039158, iteration: 85283
loss: 1.0087025165557861,grad_norm: 0.9862924942521153, iteration: 85284
loss: 1.009596347808838,grad_norm: 0.9575015341632171, iteration: 85285
loss: 0.9936896562576294,grad_norm: 0.7950789793019578, iteration: 85286
loss: 1.0552579164505005,grad_norm: 0.9999993903132041, iteration: 85287
loss: 1.025378704071045,grad_norm: 0.9895912247306026, iteration: 85288
loss: 1.0047972202301025,grad_norm: 0.9999990359116615, iteration: 85289
loss: 0.9703747630119324,grad_norm: 0.9999990200286675, iteration: 85290
loss: 0.975084662437439,grad_norm: 0.9643577200210527, iteration: 85291
loss: 0.9927976727485657,grad_norm: 0.9698312621959264, iteration: 85292
loss: 0.9794238209724426,grad_norm: 0.9999990822781327, iteration: 85293
loss: 0.9808610677719116,grad_norm: 0.9999992759928776, iteration: 85294
loss: 0.9879843592643738,grad_norm: 0.9999990799738427, iteration: 85295
loss: 0.9639651775360107,grad_norm: 0.999999125877149, iteration: 85296
loss: 0.995976984500885,grad_norm: 0.9999991486128952, iteration: 85297
loss: 0.9958785772323608,grad_norm: 0.9999992739726137, iteration: 85298
loss: 0.9880412817001343,grad_norm: 0.9999990215001321, iteration: 85299
loss: 0.9828144907951355,grad_norm: 0.9999991171196188, iteration: 85300
loss: 1.000950574874878,grad_norm: 0.9999991514317074, iteration: 85301
loss: 0.9836719036102295,grad_norm: 0.9999990537171035, iteration: 85302
loss: 0.9787657260894775,grad_norm: 0.9999991708027215, iteration: 85303
loss: 1.0777926445007324,grad_norm: 0.9097968983417214, iteration: 85304
loss: 1.021388053894043,grad_norm: 0.9789897584565508, iteration: 85305
loss: 0.9705749154090881,grad_norm: 0.8815944393612786, iteration: 85306
loss: 1.0263454914093018,grad_norm: 0.918344606165926, iteration: 85307
loss: 0.9969236254692078,grad_norm: 0.9999990155805694, iteration: 85308
loss: 0.985788106918335,grad_norm: 0.9999995662088215, iteration: 85309
loss: 0.9668270349502563,grad_norm: 0.9460336389302332, iteration: 85310
loss: 0.9987048506736755,grad_norm: 0.9743026019514178, iteration: 85311
loss: 1.0389608144760132,grad_norm: 0.9505239910378299, iteration: 85312
loss: 0.9683353900909424,grad_norm: 0.9999991434787032, iteration: 85313
loss: 1.008764624595642,grad_norm: 0.9822133531081594, iteration: 85314
loss: 1.0111348628997803,grad_norm: 0.9999991971490936, iteration: 85315
loss: 1.0019516944885254,grad_norm: 0.9940844427980993, iteration: 85316
loss: 0.9934062361717224,grad_norm: 0.9420014851679978, iteration: 85317
loss: 1.0556029081344604,grad_norm: 0.9999991108392032, iteration: 85318
loss: 1.0375746488571167,grad_norm: 0.9355254507082273, iteration: 85319
loss: 0.9848488569259644,grad_norm: 0.9999991202795517, iteration: 85320
loss: 0.992396354675293,grad_norm: 0.9431723443632166, iteration: 85321
loss: 0.9761504530906677,grad_norm: 0.957909315808861, iteration: 85322
loss: 1.0129988193511963,grad_norm: 0.9999989870838867, iteration: 85323
loss: 0.9770334959030151,grad_norm: 0.9999990214188051, iteration: 85324
loss: 0.9707803726196289,grad_norm: 0.9999992320627987, iteration: 85325
loss: 1.0456008911132812,grad_norm: 0.9999994472838833, iteration: 85326
loss: 0.9909376502037048,grad_norm: 0.999999073081501, iteration: 85327
loss: 0.9766455292701721,grad_norm: 0.9999991906589611, iteration: 85328
loss: 0.9943366646766663,grad_norm: 0.956049647041325, iteration: 85329
loss: 0.9894204139709473,grad_norm: 0.8973922422746954, iteration: 85330
loss: 1.0212137699127197,grad_norm: 0.9660514190029565, iteration: 85331
loss: 1.0153366327285767,grad_norm: 0.9999991984646925, iteration: 85332
loss: 1.0184566974639893,grad_norm: 0.7993607555212768, iteration: 85333
loss: 0.9874412417411804,grad_norm: 0.8314631483465457, iteration: 85334
loss: 0.9784888625144958,grad_norm: 0.9999989171769983, iteration: 85335
loss: 0.968915581703186,grad_norm: 0.8204828144885211, iteration: 85336
loss: 0.995674192905426,grad_norm: 0.9252473397176242, iteration: 85337
loss: 1.0128087997436523,grad_norm: 0.9999991258549, iteration: 85338
loss: 1.006998062133789,grad_norm: 0.999999085252204, iteration: 85339
loss: 1.0177992582321167,grad_norm: 0.9999991423493132, iteration: 85340
loss: 0.9587382674217224,grad_norm: 0.987418258295119, iteration: 85341
loss: 1.0202351808547974,grad_norm: 0.8962394403964604, iteration: 85342
loss: 1.0680451393127441,grad_norm: 0.999999478288555, iteration: 85343
loss: 1.0076028108596802,grad_norm: 0.9999997239526215, iteration: 85344
loss: 0.9925345182418823,grad_norm: 0.999999187006692, iteration: 85345
loss: 0.9791674613952637,grad_norm: 0.9999990410562968, iteration: 85346
loss: 0.995789110660553,grad_norm: 0.9093064666565932, iteration: 85347
loss: 1.0546798706054688,grad_norm: 0.8887113604350586, iteration: 85348
loss: 1.0001531839370728,grad_norm: 0.8899267358011052, iteration: 85349
loss: 1.0498778820037842,grad_norm: 0.999999235061611, iteration: 85350
loss: 1.0154155492782593,grad_norm: 0.9999990366906601, iteration: 85351
loss: 0.9887571930885315,grad_norm: 0.9047793190284376, iteration: 85352
loss: 0.9968528151512146,grad_norm: 0.8979716690806852, iteration: 85353
loss: 1.0161479711532593,grad_norm: 0.9999991437851845, iteration: 85354
loss: 0.9907867908477783,grad_norm: 0.9586362164994918, iteration: 85355
loss: 1.0159225463867188,grad_norm: 0.9999990049251024, iteration: 85356
loss: 1.007023811340332,grad_norm: 0.9207704626217906, iteration: 85357
loss: 0.9885478019714355,grad_norm: 0.9999990443273511, iteration: 85358
loss: 1.004248023033142,grad_norm: 0.954175680130871, iteration: 85359
loss: 0.9781290888786316,grad_norm: 0.8554135194821672, iteration: 85360
loss: 0.9814882874488831,grad_norm: 0.9999990440732252, iteration: 85361
loss: 0.9870359301567078,grad_norm: 0.9057687189057807, iteration: 85362
loss: 1.0033352375030518,grad_norm: 0.9999993092646782, iteration: 85363
loss: 1.0134862661361694,grad_norm: 0.9741757397573544, iteration: 85364
loss: 1.0123391151428223,grad_norm: 0.99999930631285, iteration: 85365
loss: 1.0289415121078491,grad_norm: 0.9999994320099307, iteration: 85366
loss: 0.9872731566429138,grad_norm: 0.9944140287769792, iteration: 85367
loss: 0.9739977717399597,grad_norm: 0.999999379420083, iteration: 85368
loss: 0.9589642882347107,grad_norm: 0.973304877786471, iteration: 85369
loss: 1.0118316411972046,grad_norm: 0.9999994370397753, iteration: 85370
loss: 0.9823282957077026,grad_norm: 0.9999991676368505, iteration: 85371
loss: 0.9886208176612854,grad_norm: 0.8900171923624055, iteration: 85372
loss: 0.9894197583198547,grad_norm: 0.9999990231380096, iteration: 85373
loss: 0.9880766868591309,grad_norm: 0.9999996910792223, iteration: 85374
loss: 0.9985252618789673,grad_norm: 0.8899548216011113, iteration: 85375
loss: 1.004028558731079,grad_norm: 0.9999991092898756, iteration: 85376
loss: 0.9726392030715942,grad_norm: 0.986412322177051, iteration: 85377
loss: 1.0278385877609253,grad_norm: 0.9544253636147682, iteration: 85378
loss: 1.0250478982925415,grad_norm: 0.861469614955623, iteration: 85379
loss: 0.9766085743904114,grad_norm: 0.7973924597839931, iteration: 85380
loss: 0.9807021021842957,grad_norm: 0.9999990515184434, iteration: 85381
loss: 0.98836350440979,grad_norm: 0.9999992980010066, iteration: 85382
loss: 1.0152751207351685,grad_norm: 0.9999991827854187, iteration: 85383
loss: 0.9956050515174866,grad_norm: 0.8297574226718202, iteration: 85384
loss: 0.976095974445343,grad_norm: 0.9999992917886812, iteration: 85385
loss: 0.9722068309783936,grad_norm: 0.9999994013014522, iteration: 85386
loss: 1.0026129484176636,grad_norm: 0.9999992045392222, iteration: 85387
loss: 0.9927319884300232,grad_norm: 0.9800168578329809, iteration: 85388
loss: 0.9691089987754822,grad_norm: 0.9999992029506654, iteration: 85389
loss: 1.0165213346481323,grad_norm: 0.8831429630078539, iteration: 85390
loss: 1.041040062904358,grad_norm: 0.999999965148296, iteration: 85391
loss: 1.010170817375183,grad_norm: 0.9999992305588274, iteration: 85392
loss: 0.9880379438400269,grad_norm: 0.9581437159052647, iteration: 85393
loss: 0.9766052961349487,grad_norm: 0.9689568636697428, iteration: 85394
loss: 0.9721447229385376,grad_norm: 0.9999990525884916, iteration: 85395
loss: 1.0259684324264526,grad_norm: 0.9999992495879219, iteration: 85396
loss: 0.991030216217041,grad_norm: 0.9999993090462285, iteration: 85397
loss: 0.9732804894447327,grad_norm: 0.9092531582957762, iteration: 85398
loss: 1.0390880107879639,grad_norm: 0.9999990061651371, iteration: 85399
loss: 0.984135627746582,grad_norm: 0.9889580111506757, iteration: 85400
loss: 0.994244396686554,grad_norm: 0.9999990806469186, iteration: 85401
loss: 1.013628363609314,grad_norm: 0.9999990844778167, iteration: 85402
loss: 1.0303254127502441,grad_norm: 0.9999990698934932, iteration: 85403
loss: 1.013856291770935,grad_norm: 0.9776260975984273, iteration: 85404
loss: 0.9746695756912231,grad_norm: 0.9999991887903203, iteration: 85405
loss: 1.0034288167953491,grad_norm: 0.9987042292810977, iteration: 85406
loss: 0.9907503128051758,grad_norm: 0.9999992875474816, iteration: 85407
loss: 0.9936724305152893,grad_norm: 0.9999992359233425, iteration: 85408
loss: 0.9903460144996643,grad_norm: 0.9999991926559292, iteration: 85409
loss: 0.978747546672821,grad_norm: 0.9999991513080694, iteration: 85410
loss: 0.9996718764305115,grad_norm: 0.9999992177054938, iteration: 85411
loss: 1.002911925315857,grad_norm: 0.9333580551116488, iteration: 85412
loss: 1.0047799348831177,grad_norm: 0.9999992549352426, iteration: 85413
loss: 0.9872987270355225,grad_norm: 0.9999991409683292, iteration: 85414
loss: 0.9802958965301514,grad_norm: 0.9999990096508211, iteration: 85415
loss: 0.9822275042533875,grad_norm: 0.9171232014233861, iteration: 85416
loss: 1.0484037399291992,grad_norm: 0.9999992528973648, iteration: 85417
loss: 0.9869760274887085,grad_norm: 0.8693341519414438, iteration: 85418
loss: 0.9919523000717163,grad_norm: 0.9999993325310561, iteration: 85419
loss: 1.0432804822921753,grad_norm: 0.984691238139641, iteration: 85420
loss: 0.9909243583679199,grad_norm: 0.9999990722176795, iteration: 85421
loss: 1.0056899785995483,grad_norm: 0.9999989453344802, iteration: 85422
loss: 0.9926977753639221,grad_norm: 0.9999992633215482, iteration: 85423
loss: 1.0094056129455566,grad_norm: 0.9999993020784873, iteration: 85424
loss: 1.0133092403411865,grad_norm: 0.8767694934498473, iteration: 85425
loss: 0.9676324725151062,grad_norm: 0.8835315154672498, iteration: 85426
loss: 0.973842978477478,grad_norm: 0.9999991480012613, iteration: 85427
loss: 0.9540520906448364,grad_norm: 0.9999990753694914, iteration: 85428
loss: 0.9910764098167419,grad_norm: 0.8438436578006838, iteration: 85429
loss: 0.9553194642066956,grad_norm: 0.9999993158967114, iteration: 85430
loss: 0.9856644868850708,grad_norm: 0.9999989650890404, iteration: 85431
loss: 0.9843363761901855,grad_norm: 0.9512559380366062, iteration: 85432
loss: 1.0115057229995728,grad_norm: 0.9999992268094772, iteration: 85433
loss: 0.9800915718078613,grad_norm: 0.9999989831539726, iteration: 85434
loss: 1.0202878713607788,grad_norm: 0.9999990926974939, iteration: 85435
loss: 1.0360347032546997,grad_norm: 0.9999989641052982, iteration: 85436
loss: 0.9966917037963867,grad_norm: 0.9999992077088553, iteration: 85437
loss: 1.025671124458313,grad_norm: 0.9999992617993945, iteration: 85438
loss: 0.994388997554779,grad_norm: 0.997363052599463, iteration: 85439
loss: 0.9975770711898804,grad_norm: 0.9999991720479756, iteration: 85440
loss: 1.0411440134048462,grad_norm: 0.9999990316695954, iteration: 85441
loss: 1.0083489418029785,grad_norm: 0.8724364771418484, iteration: 85442
loss: 1.0507800579071045,grad_norm: 0.9999990987241487, iteration: 85443
loss: 1.0011249780654907,grad_norm: 0.9999990443781, iteration: 85444
loss: 1.046907663345337,grad_norm: 0.9844723524814802, iteration: 85445
loss: 0.9998955130577087,grad_norm: 0.9879307796795348, iteration: 85446
loss: 1.0149881839752197,grad_norm: 0.9999992987679125, iteration: 85447
loss: 1.0237491130828857,grad_norm: 0.9999991123533584, iteration: 85448
loss: 1.0322785377502441,grad_norm: 0.999999564270448, iteration: 85449
loss: 1.0213119983673096,grad_norm: 0.9354000565434976, iteration: 85450
loss: 1.0189995765686035,grad_norm: 0.9999991153599987, iteration: 85451
loss: 1.045872688293457,grad_norm: 0.9999990056090206, iteration: 85452
loss: 0.9564196467399597,grad_norm: 0.9999990026619722, iteration: 85453
loss: 1.0075011253356934,grad_norm: 0.9999990264482084, iteration: 85454
loss: 1.0042786598205566,grad_norm: 0.999998945232922, iteration: 85455
loss: 1.0257774591445923,grad_norm: 0.9464970838390766, iteration: 85456
loss: 0.9956657886505127,grad_norm: 0.9999991365318336, iteration: 85457
loss: 1.026347279548645,grad_norm: 0.9999990760621715, iteration: 85458
loss: 0.981478750705719,grad_norm: 0.8513608630353154, iteration: 85459
loss: 1.0348769426345825,grad_norm: 0.9999996583952644, iteration: 85460
loss: 1.014443039894104,grad_norm: 0.9965575333386426, iteration: 85461
loss: 1.0179599523544312,grad_norm: 0.9999989905681483, iteration: 85462
loss: 1.0145384073257446,grad_norm: 0.8893364008854163, iteration: 85463
loss: 0.9972216486930847,grad_norm: 0.9999990479476395, iteration: 85464
loss: 1.0114566087722778,grad_norm: 0.9999990069689898, iteration: 85465
loss: 1.0148591995239258,grad_norm: 0.9999992170391352, iteration: 85466
loss: 0.9918882846832275,grad_norm: 0.8791395937825367, iteration: 85467
loss: 1.0028027296066284,grad_norm: 0.9999992946209091, iteration: 85468
loss: 0.9833481311798096,grad_norm: 0.9999990854171932, iteration: 85469
loss: 0.973243236541748,grad_norm: 0.9999992387077162, iteration: 85470
loss: 0.9708371162414551,grad_norm: 0.9286007447328329, iteration: 85471
loss: 0.9895458221435547,grad_norm: 0.9999992447216502, iteration: 85472
loss: 1.0135165452957153,grad_norm: 0.99999917544349, iteration: 85473
loss: 1.0191466808319092,grad_norm: 0.9216754501052882, iteration: 85474
loss: 0.9915316700935364,grad_norm: 0.9999991029664862, iteration: 85475
loss: 1.0180546045303345,grad_norm: 0.9349636836633872, iteration: 85476
loss: 1.014032244682312,grad_norm: 0.8901585140669402, iteration: 85477
loss: 1.0173604488372803,grad_norm: 0.9999996478758683, iteration: 85478
loss: 0.9878966212272644,grad_norm: 0.9999992331882555, iteration: 85479
loss: 1.0019667148590088,grad_norm: 0.9999991945607886, iteration: 85480
loss: 0.9893529415130615,grad_norm: 0.9999991468063489, iteration: 85481
loss: 1.0399924516677856,grad_norm: 0.9999992436704784, iteration: 85482
loss: 0.9638479351997375,grad_norm: 0.9999991217272904, iteration: 85483
loss: 0.9916755557060242,grad_norm: 0.9999990431929245, iteration: 85484
loss: 0.9725806713104248,grad_norm: 0.999999255889892, iteration: 85485
loss: 1.01558518409729,grad_norm: 0.9828613393844308, iteration: 85486
loss: 1.0381149053573608,grad_norm: 0.9266476418696108, iteration: 85487
loss: 0.9733649492263794,grad_norm: 0.99999947818817, iteration: 85488
loss: 1.011086106300354,grad_norm: 0.8030124082047987, iteration: 85489
loss: 0.9893544912338257,grad_norm: 0.9240993560937643, iteration: 85490
loss: 1.0222371816635132,grad_norm: 0.9799653127857678, iteration: 85491
loss: 0.941580593585968,grad_norm: 0.9999991757314587, iteration: 85492
loss: 0.9891480803489685,grad_norm: 0.9999991915421422, iteration: 85493
loss: 0.9814632534980774,grad_norm: 0.9994994873678406, iteration: 85494
loss: 1.0098029375076294,grad_norm: 0.9023240785131099, iteration: 85495
loss: 1.0336476564407349,grad_norm: 0.9999990873698557, iteration: 85496
loss: 1.0032167434692383,grad_norm: 0.9999991031146114, iteration: 85497
loss: 1.018743634223938,grad_norm: 0.999999355431902, iteration: 85498
loss: 0.9850426316261292,grad_norm: 0.9999992410548371, iteration: 85499
loss: 1.0116983652114868,grad_norm: 0.9999990855517614, iteration: 85500
loss: 0.9994651079177856,grad_norm: 0.9334441111225996, iteration: 85501
loss: 1.0176602602005005,grad_norm: 0.9999991514279299, iteration: 85502
loss: 1.006156086921692,grad_norm: 0.8961879265297689, iteration: 85503
loss: 1.0193547010421753,grad_norm: 0.9932514516996194, iteration: 85504
loss: 0.9867121577262878,grad_norm: 0.9999990514111996, iteration: 85505
loss: 0.9975504279136658,grad_norm: 0.999999021343315, iteration: 85506
loss: 1.018310785293579,grad_norm: 0.9999991855031651, iteration: 85507
loss: 0.978220522403717,grad_norm: 0.9999991438119116, iteration: 85508
loss: 0.9957722425460815,grad_norm: 0.9999992183801549, iteration: 85509
loss: 0.9997416734695435,grad_norm: 0.9405847271378074, iteration: 85510
loss: 1.0460032224655151,grad_norm: 0.999999123927921, iteration: 85511
loss: 0.9792414903640747,grad_norm: 0.9999991301994064, iteration: 85512
loss: 0.9807494282722473,grad_norm: 0.9826858050685687, iteration: 85513
loss: 1.0035821199417114,grad_norm: 0.9999991922194885, iteration: 85514
loss: 1.0120570659637451,grad_norm: 0.9999990143985857, iteration: 85515
loss: 0.9832867383956909,grad_norm: 0.9651811841303798, iteration: 85516
loss: 0.9961058497428894,grad_norm: 0.9838975818939427, iteration: 85517
loss: 1.001639485359192,grad_norm: 0.8637452140567862, iteration: 85518
loss: 0.9852830767631531,grad_norm: 0.9999990055039564, iteration: 85519
loss: 1.046141505241394,grad_norm: 0.9999993044524396, iteration: 85520
loss: 0.9702327847480774,grad_norm: 0.9999990754429428, iteration: 85521
loss: 0.9711756706237793,grad_norm: 0.9176831821960759, iteration: 85522
loss: 1.0105359554290771,grad_norm: 0.9345727434752851, iteration: 85523
loss: 1.0026333332061768,grad_norm: 0.9999991282584091, iteration: 85524
loss: 1.012924313545227,grad_norm: 0.9918951733656685, iteration: 85525
loss: 0.9896506071090698,grad_norm: 0.999999110217959, iteration: 85526
loss: 1.015522837638855,grad_norm: 0.9999995032200303, iteration: 85527
loss: 0.9877886176109314,grad_norm: 0.9999992384905633, iteration: 85528
loss: 0.9802038073539734,grad_norm: 0.9693751250887471, iteration: 85529
loss: 1.0408607721328735,grad_norm: 0.9999990662093904, iteration: 85530
loss: 1.0223501920700073,grad_norm: 0.9999997760056303, iteration: 85531
loss: 0.9826858043670654,grad_norm: 0.8685965841832759, iteration: 85532
loss: 1.0139529705047607,grad_norm: 0.9999991134269746, iteration: 85533
loss: 1.0218048095703125,grad_norm: 0.9660903531572044, iteration: 85534
loss: 0.9470191597938538,grad_norm: 0.9999993712406683, iteration: 85535
loss: 1.009482979774475,grad_norm: 0.9999990776945293, iteration: 85536
loss: 0.9956855773925781,grad_norm: 0.9999992298071211, iteration: 85537
loss: 1.0163261890411377,grad_norm: 0.9999991227301043, iteration: 85538
loss: 0.9823247194290161,grad_norm: 0.9999993239515523, iteration: 85539
loss: 1.045295000076294,grad_norm: 0.8554727050039047, iteration: 85540
loss: 1.0298832654953003,grad_norm: 0.9999993957988377, iteration: 85541
loss: 0.9837668538093567,grad_norm: 0.9999991786606007, iteration: 85542
loss: 0.9979520440101624,grad_norm: 0.9999989972683632, iteration: 85543
loss: 1.0074667930603027,grad_norm: 0.8885981193358449, iteration: 85544
loss: 1.0117913484573364,grad_norm: 0.9380952018093235, iteration: 85545
loss: 1.0136709213256836,grad_norm: 0.9890904617574937, iteration: 85546
loss: 0.9793475270271301,grad_norm: 0.9999992278643784, iteration: 85547
loss: 0.9981935024261475,grad_norm: 0.9999992032560193, iteration: 85548
loss: 0.9889326095581055,grad_norm: 0.9999992202058605, iteration: 85549
loss: 0.9934809803962708,grad_norm: 0.9957826239599832, iteration: 85550
loss: 0.9667359590530396,grad_norm: 0.9999990981554203, iteration: 85551
loss: 0.9862163662910461,grad_norm: 0.9999990969295524, iteration: 85552
loss: 1.0349345207214355,grad_norm: 0.9999990567835995, iteration: 85553
loss: 0.9990934133529663,grad_norm: 0.999999180670883, iteration: 85554
loss: 1.0612069368362427,grad_norm: 0.9999996441288562, iteration: 85555
loss: 1.015803575515747,grad_norm: 0.8639433936685385, iteration: 85556
loss: 1.0302734375,grad_norm: 0.9999991166449982, iteration: 85557
loss: 1.0179837942123413,grad_norm: 0.9999990882486652, iteration: 85558
loss: 0.9934616088867188,grad_norm: 0.9999992458130516, iteration: 85559
loss: 1.0029749870300293,grad_norm: 0.9999990275752547, iteration: 85560
loss: 1.0245471000671387,grad_norm: 0.8458230983323507, iteration: 85561
loss: 1.0046006441116333,grad_norm: 0.9999991917297686, iteration: 85562
loss: 1.0193661451339722,grad_norm: 0.9999990953935177, iteration: 85563
loss: 1.0011564493179321,grad_norm: 0.9999991257484905, iteration: 85564
loss: 1.010970115661621,grad_norm: 0.9674841538414707, iteration: 85565
loss: 1.0111536979675293,grad_norm: 0.9999989929640264, iteration: 85566
loss: 1.0347524881362915,grad_norm: 0.9999992213248395, iteration: 85567
loss: 0.9824221730232239,grad_norm: 0.999999208667132, iteration: 85568
loss: 0.9908999800682068,grad_norm: 0.9910008470379874, iteration: 85569
loss: 0.9977652430534363,grad_norm: 0.9999990347346217, iteration: 85570
loss: 1.027541160583496,grad_norm: 0.9999996345275363, iteration: 85571
loss: 1.0066041946411133,grad_norm: 0.9999990131009486, iteration: 85572
loss: 1.0443570613861084,grad_norm: 0.9999992261893581, iteration: 85573
loss: 1.014168381690979,grad_norm: 0.9955791075819863, iteration: 85574
loss: 0.9667291641235352,grad_norm: 0.9999992368077039, iteration: 85575
loss: 1.0294506549835205,grad_norm: 0.9999999221086115, iteration: 85576
loss: 0.9844587445259094,grad_norm: 0.9999990462698184, iteration: 85577
loss: 0.9944460988044739,grad_norm: 0.948156771887082, iteration: 85578
loss: 1.0026311874389648,grad_norm: 0.9999991747127881, iteration: 85579
loss: 1.037068247795105,grad_norm: 0.9999996557360026, iteration: 85580
loss: 1.0029505491256714,grad_norm: 0.9686346284624772, iteration: 85581
loss: 0.983220636844635,grad_norm: 0.9674514458998419, iteration: 85582
loss: 0.9856931567192078,grad_norm: 0.9999991344880823, iteration: 85583
loss: 1.0186011791229248,grad_norm: 0.909304120412699, iteration: 85584
loss: 1.020323634147644,grad_norm: 0.9999996917555571, iteration: 85585
loss: 0.9971892237663269,grad_norm: 0.9110040736964996, iteration: 85586
loss: 1.001334309577942,grad_norm: 0.9999992150337473, iteration: 85587
loss: 1.0337977409362793,grad_norm: 0.9999990055499177, iteration: 85588
loss: 0.989511251449585,grad_norm: 0.9999991627179605, iteration: 85589
loss: 0.9879935383796692,grad_norm: 0.9663997947160102, iteration: 85590
loss: 0.9691591858863831,grad_norm: 0.9999990420504509, iteration: 85591
loss: 0.9932610988616943,grad_norm: 0.9999991500798833, iteration: 85592
loss: 1.0113896131515503,grad_norm: 0.9984607635539258, iteration: 85593
loss: 1.0155996084213257,grad_norm: 0.9999990774386869, iteration: 85594
loss: 0.9898369908332825,grad_norm: 0.9999991722348025, iteration: 85595
loss: 0.9814809560775757,grad_norm: 0.9999990601044508, iteration: 85596
loss: 0.9871252179145813,grad_norm: 0.8968067876278006, iteration: 85597
loss: 1.0721917152404785,grad_norm: 0.9894773193729944, iteration: 85598
loss: 1.0218629837036133,grad_norm: 0.9573718408186364, iteration: 85599
loss: 1.0130897760391235,grad_norm: 0.9575511287729637, iteration: 85600
loss: 0.9986237287521362,grad_norm: 0.9909932525497995, iteration: 85601
loss: 1.0005494356155396,grad_norm: 0.9999991409865002, iteration: 85602
loss: 0.979227602481842,grad_norm: 0.9780618735282519, iteration: 85603
loss: 1.0145347118377686,grad_norm: 0.9999990424832805, iteration: 85604
loss: 0.9867686629295349,grad_norm: 0.9999992622606713, iteration: 85605
loss: 1.0258785486221313,grad_norm: 0.9430199511286708, iteration: 85606
loss: 1.027571678161621,grad_norm: 0.9999990254098187, iteration: 85607
loss: 1.021944522857666,grad_norm: 0.9999992704859607, iteration: 85608
loss: 1.0031005144119263,grad_norm: 0.9999990282741318, iteration: 85609
loss: 1.0382157564163208,grad_norm: 0.9999993765108912, iteration: 85610
loss: 1.0235722064971924,grad_norm: 0.9999990938934298, iteration: 85611
loss: 1.0022573471069336,grad_norm: 0.9999991487137967, iteration: 85612
loss: 1.0171257257461548,grad_norm: 0.999999200270782, iteration: 85613
loss: 1.0164765119552612,grad_norm: 0.9999990564786172, iteration: 85614
loss: 1.016697883605957,grad_norm: 0.9999997918585568, iteration: 85615
loss: 0.9887925982475281,grad_norm: 0.9999991860972569, iteration: 85616
loss: 0.9887707829475403,grad_norm: 0.9999991555563007, iteration: 85617
loss: 0.9847897291183472,grad_norm: 0.942057099885594, iteration: 85618
loss: 1.0148065090179443,grad_norm: 0.8762795583337016, iteration: 85619
loss: 1.0136085748672485,grad_norm: 0.9999990931444857, iteration: 85620
loss: 0.9872145056724548,grad_norm: 0.999999125990988, iteration: 85621
loss: 0.9745733141899109,grad_norm: 0.9999992769724174, iteration: 85622
loss: 0.9803332090377808,grad_norm: 0.9999990982384773, iteration: 85623
loss: 0.984771192073822,grad_norm: 0.9999991483144334, iteration: 85624
loss: 1.0983775854110718,grad_norm: 0.9999998077223019, iteration: 85625
loss: 0.9557064771652222,grad_norm: 0.9999992081560143, iteration: 85626
loss: 0.9995108842849731,grad_norm: 0.9642581841868751, iteration: 85627
loss: 0.9945855736732483,grad_norm: 0.8809847023977068, iteration: 85628
loss: 1.00807523727417,grad_norm: 0.9999990256320702, iteration: 85629
loss: 1.013405442237854,grad_norm: 0.9999990242787179, iteration: 85630
loss: 1.012994647026062,grad_norm: 0.9999989794864764, iteration: 85631
loss: 1.0185009241104126,grad_norm: 0.9999998432481781, iteration: 85632
loss: 0.9851179122924805,grad_norm: 0.9999989796021548, iteration: 85633
loss: 1.0153932571411133,grad_norm: 0.9094304451754777, iteration: 85634
loss: 1.0143084526062012,grad_norm: 0.9999990700115284, iteration: 85635
loss: 1.0277142524719238,grad_norm: 0.9999993070772758, iteration: 85636
loss: 0.9655673503875732,grad_norm: 0.9699658035945451, iteration: 85637
loss: 0.9543516039848328,grad_norm: 0.9241154352936133, iteration: 85638
loss: 1.0052071809768677,grad_norm: 0.9999989291276833, iteration: 85639
loss: 1.0172879695892334,grad_norm: 0.9999991506949677, iteration: 85640
loss: 0.9997280240058899,grad_norm: 0.9999992409703531, iteration: 85641
loss: 1.0154156684875488,grad_norm: 0.9604640705739924, iteration: 85642
loss: 0.996469259262085,grad_norm: 0.8275331667433219, iteration: 85643
loss: 0.9932835698127747,grad_norm: 0.9912052238476321, iteration: 85644
loss: 1.0002681016921997,grad_norm: 0.999999248672191, iteration: 85645
loss: 1.009726881980896,grad_norm: 0.9999991048330363, iteration: 85646
loss: 0.9691658020019531,grad_norm: 0.9999990937533237, iteration: 85647
loss: 0.9627878069877625,grad_norm: 0.9771836593419267, iteration: 85648
loss: 0.993912935256958,grad_norm: 0.9999991889809141, iteration: 85649
loss: 0.9685367345809937,grad_norm: 0.9999990354220568, iteration: 85650
loss: 0.9774589538574219,grad_norm: 0.9722983243811104, iteration: 85651
loss: 0.9894052743911743,grad_norm: 0.9695613462908538, iteration: 85652
loss: 1.0531580448150635,grad_norm: 0.9999992273045563, iteration: 85653
loss: 1.0385090112686157,grad_norm: 0.9999992083891115, iteration: 85654
loss: 0.9662935137748718,grad_norm: 0.9999991566795416, iteration: 85655
loss: 0.9788314700126648,grad_norm: 0.9999991839418813, iteration: 85656
loss: 1.000389575958252,grad_norm: 0.9999991403840585, iteration: 85657
loss: 1.0235153436660767,grad_norm: 0.9999990605952046, iteration: 85658
loss: 1.0184577703475952,grad_norm: 0.9999991491052943, iteration: 85659
loss: 1.0050817728042603,grad_norm: 0.9999990175008872, iteration: 85660
loss: 0.9263859391212463,grad_norm: 0.9999991754272689, iteration: 85661
loss: 1.010686993598938,grad_norm: 0.9999997036160012, iteration: 85662
loss: 1.0393975973129272,grad_norm: 0.9425130873969967, iteration: 85663
loss: 1.0029903650283813,grad_norm: 0.9720989712870779, iteration: 85664
loss: 1.032158613204956,grad_norm: 0.9838508658858357, iteration: 85665
loss: 0.9811710715293884,grad_norm: 0.9999989867794208, iteration: 85666
loss: 1.042592167854309,grad_norm: 0.9522147262713334, iteration: 85667
loss: 0.9849016666412354,grad_norm: 0.8974658147581026, iteration: 85668
loss: 0.9983432292938232,grad_norm: 0.9999990537906284, iteration: 85669
loss: 0.9722924828529358,grad_norm: 0.9999991546993109, iteration: 85670
loss: 0.9888508915901184,grad_norm: 0.9999990557893856, iteration: 85671
loss: 1.1708600521087646,grad_norm: 0.9999991877337827, iteration: 85672
loss: 1.0227917432785034,grad_norm: 0.9729138105950084, iteration: 85673
loss: 1.0010603666305542,grad_norm: 0.9999991290978505, iteration: 85674
loss: 0.9693351984024048,grad_norm: 0.9999991450658218, iteration: 85675
loss: 1.0197149515151978,grad_norm: 0.9999991905347978, iteration: 85676
loss: 1.0195938348770142,grad_norm: 0.9999991402624673, iteration: 85677
loss: 0.9902970790863037,grad_norm: 0.9999991691298366, iteration: 85678
loss: 0.9909083843231201,grad_norm: 0.9999990043770778, iteration: 85679
loss: 1.0101598501205444,grad_norm: 0.9999991038126185, iteration: 85680
loss: 0.9973778128623962,grad_norm: 0.9082136899136593, iteration: 85681
loss: 1.031225562095642,grad_norm: 0.999999088396416, iteration: 85682
loss: 1.073911428451538,grad_norm: 0.9999992752881957, iteration: 85683
loss: 1.0097618103027344,grad_norm: 0.9999992058757212, iteration: 85684
loss: 1.0321118831634521,grad_norm: 0.9999993695581382, iteration: 85685
loss: 0.9821125864982605,grad_norm: 0.9999991437111203, iteration: 85686
loss: 1.0253551006317139,grad_norm: 0.9999992294686092, iteration: 85687
loss: 0.9883289337158203,grad_norm: 0.9459776751306277, iteration: 85688
loss: 1.020041584968567,grad_norm: 0.9999991685324361, iteration: 85689
loss: 0.9948126673698425,grad_norm: 0.9515672561773123, iteration: 85690
loss: 1.0232000350952148,grad_norm: 0.9999989964788494, iteration: 85691
loss: 1.011001706123352,grad_norm: 0.9808872099301217, iteration: 85692
loss: 1.0140380859375,grad_norm: 0.9999990189018696, iteration: 85693
loss: 1.0398633480072021,grad_norm: 0.9999992243721407, iteration: 85694
loss: 1.097544550895691,grad_norm: 0.9999992463566503, iteration: 85695
loss: 0.9749819040298462,grad_norm: 0.9999993274009872, iteration: 85696
loss: 0.9669466614723206,grad_norm: 0.9999992587460741, iteration: 85697
loss: 0.9968183636665344,grad_norm: 0.9673282859298321, iteration: 85698
loss: 0.9897475838661194,grad_norm: 0.9999991253180995, iteration: 85699
loss: 0.9971725344657898,grad_norm: 0.9598024392302819, iteration: 85700
loss: 1.012969732284546,grad_norm: 0.9999990625880899, iteration: 85701
loss: 1.0227129459381104,grad_norm: 0.9456647554869124, iteration: 85702
loss: 1.0202968120574951,grad_norm: 0.9999991469245126, iteration: 85703
loss: 1.0058388710021973,grad_norm: 0.9319838750982358, iteration: 85704
loss: 1.035516381263733,grad_norm: 0.9999990170175823, iteration: 85705
loss: 1.0062326192855835,grad_norm: 0.9999991650462449, iteration: 85706
loss: 0.974942147731781,grad_norm: 0.9999991484823465, iteration: 85707
loss: 1.0385695695877075,grad_norm: 0.9999992154376869, iteration: 85708
loss: 1.027039647102356,grad_norm: 0.9999991767827, iteration: 85709
loss: 1.0451319217681885,grad_norm: 0.9204618697072576, iteration: 85710
loss: 1.004285216331482,grad_norm: 0.9999989613867856, iteration: 85711
loss: 0.9850055575370789,grad_norm: 0.9999992258016975, iteration: 85712
loss: 1.0109566450119019,grad_norm: 0.9999990177526893, iteration: 85713
loss: 0.9911444783210754,grad_norm: 0.9999992286503143, iteration: 85714
loss: 1.0361862182617188,grad_norm: 0.999999128345046, iteration: 85715
loss: 1.0366084575653076,grad_norm: 0.9353110791432834, iteration: 85716
loss: 0.9896742701530457,grad_norm: 0.9999991710727094, iteration: 85717
loss: 0.9768069982528687,grad_norm: 0.9999990767658107, iteration: 85718
loss: 0.9978564381599426,grad_norm: 0.9986086995549456, iteration: 85719
loss: 0.9666095972061157,grad_norm: 0.9693491634134301, iteration: 85720
loss: 1.0181639194488525,grad_norm: 0.9999990524014467, iteration: 85721
loss: 0.9988489151000977,grad_norm: 0.9999992367877094, iteration: 85722
loss: 1.021016240119934,grad_norm: 0.9999991301634001, iteration: 85723
loss: 1.009456753730774,grad_norm: 0.9999990157083851, iteration: 85724
loss: 1.0318615436553955,grad_norm: 0.9999991781481701, iteration: 85725
loss: 0.9880812764167786,grad_norm: 0.9999992386699004, iteration: 85726
loss: 0.9987980723381042,grad_norm: 0.806098926467445, iteration: 85727
loss: 1.0487899780273438,grad_norm: 0.9999994162430633, iteration: 85728
loss: 1.0113725662231445,grad_norm: 0.99999905557088, iteration: 85729
loss: 1.0050393342971802,grad_norm: 0.9999990215276909, iteration: 85730
loss: 1.0145832300186157,grad_norm: 0.9127635443537573, iteration: 85731
loss: 1.0070394277572632,grad_norm: 0.9321034612000936, iteration: 85732
loss: 1.0193678140640259,grad_norm: 0.9999991122325795, iteration: 85733
loss: 1.003799319267273,grad_norm: 0.9999993176732822, iteration: 85734
loss: 0.9967526793479919,grad_norm: 0.9999990203058865, iteration: 85735
loss: 1.0075608491897583,grad_norm: 0.9999991137076746, iteration: 85736
loss: 0.988473117351532,grad_norm: 0.9999990345496925, iteration: 85737
loss: 1.0133851766586304,grad_norm: 0.992225181245412, iteration: 85738
loss: 1.0005944967269897,grad_norm: 0.9999990870411994, iteration: 85739
loss: 0.9895232915878296,grad_norm: 0.8977362430881392, iteration: 85740
loss: 1.0038697719573975,grad_norm: 0.9999990393236807, iteration: 85741
loss: 1.0127023458480835,grad_norm: 0.9999991898861886, iteration: 85742
loss: 0.9846411943435669,grad_norm: 0.9999991406235753, iteration: 85743
loss: 0.9964562058448792,grad_norm: 0.9999991195986264, iteration: 85744
loss: 1.0020747184753418,grad_norm: 0.9146153622463664, iteration: 85745
loss: 1.0124953985214233,grad_norm: 0.9197285308195003, iteration: 85746
loss: 0.96536785364151,grad_norm: 0.9999992118497139, iteration: 85747
loss: 0.9801172614097595,grad_norm: 0.9999992123397554, iteration: 85748
loss: 1.0194432735443115,grad_norm: 0.927827166824631, iteration: 85749
loss: 0.9637555480003357,grad_norm: 0.9999992429764704, iteration: 85750
loss: 1.0373024940490723,grad_norm: 0.9999992673769293, iteration: 85751
loss: 1.0003694295883179,grad_norm: 0.9035918764230125, iteration: 85752
loss: 1.0348814725875854,grad_norm: 0.9999990690454768, iteration: 85753
loss: 1.002744436264038,grad_norm: 0.9999990820951631, iteration: 85754
loss: 1.0241705179214478,grad_norm: 0.9999992544466809, iteration: 85755
loss: 1.0077784061431885,grad_norm: 0.9999990030509476, iteration: 85756
loss: 1.0297977924346924,grad_norm: 0.9999992651568828, iteration: 85757
loss: 1.1017305850982666,grad_norm: 0.9999994986657109, iteration: 85758
loss: 1.015905499458313,grad_norm: 0.999999527074843, iteration: 85759
loss: 1.021561622619629,grad_norm: 0.9999991188504005, iteration: 85760
loss: 1.0150185823440552,grad_norm: 0.9999990948760621, iteration: 85761
loss: 1.0155799388885498,grad_norm: 0.9999993038753561, iteration: 85762
loss: 1.0103604793548584,grad_norm: 0.9999991072798875, iteration: 85763
loss: 1.0085654258728027,grad_norm: 0.944784629519831, iteration: 85764
loss: 1.0173372030258179,grad_norm: 0.9134089503648325, iteration: 85765
loss: 0.9834526181221008,grad_norm: 0.9999992611174123, iteration: 85766
loss: 1.0002734661102295,grad_norm: 0.9999991397886346, iteration: 85767
loss: 0.9927408695220947,grad_norm: 0.9999991595718891, iteration: 85768
loss: 0.972069501876831,grad_norm: 0.9999990357073192, iteration: 85769
loss: 1.0085300207138062,grad_norm: 0.9999989712392514, iteration: 85770
loss: 0.9997252225875854,grad_norm: 0.9351245381629305, iteration: 85771
loss: 0.9721566438674927,grad_norm: 0.9999991882185006, iteration: 85772
loss: 0.9324649572372437,grad_norm: 0.9999990576650707, iteration: 85773
loss: 0.9722622036933899,grad_norm: 0.9999990488254189, iteration: 85774
loss: 1.018768072128296,grad_norm: 0.8567812138924552, iteration: 85775
loss: 1.0167351961135864,grad_norm: 0.9857721539796106, iteration: 85776
loss: 1.0200848579406738,grad_norm: 0.9999992039667124, iteration: 85777
loss: 1.0016961097717285,grad_norm: 0.999999199750781, iteration: 85778
loss: 0.9855569005012512,grad_norm: 0.9999989710302294, iteration: 85779
loss: 0.9837223887443542,grad_norm: 0.9999991208572553, iteration: 85780
loss: 1.0052685737609863,grad_norm: 0.9999992708106051, iteration: 85781
loss: 0.9818806648254395,grad_norm: 0.9303564324679678, iteration: 85782
loss: 0.9792776703834534,grad_norm: 0.8400566220202987, iteration: 85783
loss: 1.0447139739990234,grad_norm: 0.9999991342977818, iteration: 85784
loss: 1.0132460594177246,grad_norm: 0.9834909060472095, iteration: 85785
loss: 0.9905590415000916,grad_norm: 0.9635995978678255, iteration: 85786
loss: 0.973207414150238,grad_norm: 0.9999989615207371, iteration: 85787
loss: 0.9923602938652039,grad_norm: 0.9999992796565323, iteration: 85788
loss: 0.9857640862464905,grad_norm: 0.9999991070111592, iteration: 85789
loss: 1.027347207069397,grad_norm: 0.9999992288889719, iteration: 85790
loss: 1.0200048685073853,grad_norm: 0.9775371134654984, iteration: 85791
loss: 0.9904683828353882,grad_norm: 0.9999991802835785, iteration: 85792
loss: 0.9960778951644897,grad_norm: 0.9999992724828408, iteration: 85793
loss: 1.025443434715271,grad_norm: 0.9999994543257407, iteration: 85794
loss: 1.0057495832443237,grad_norm: 0.9999991116318727, iteration: 85795
loss: 1.020592451095581,grad_norm: 0.9999990677266287, iteration: 85796
loss: 1.0255626440048218,grad_norm: 0.9999991643675922, iteration: 85797
loss: 1.0315150022506714,grad_norm: 0.9999990424544332, iteration: 85798
loss: 0.9967260956764221,grad_norm: 0.9999990203618563, iteration: 85799
loss: 1.00198233127594,grad_norm: 0.9999990566690083, iteration: 85800
loss: 1.0205250978469849,grad_norm: 0.9413259632855893, iteration: 85801
loss: 1.029101014137268,grad_norm: 0.9999989222534795, iteration: 85802
loss: 0.9893196821212769,grad_norm: 0.9914149056832015, iteration: 85803
loss: 1.0097378492355347,grad_norm: 0.9442189034363458, iteration: 85804
loss: 0.9894453287124634,grad_norm: 0.9999992124465287, iteration: 85805
loss: 0.9749576449394226,grad_norm: 0.9999991046779889, iteration: 85806
loss: 1.020458459854126,grad_norm: 0.9834607190263146, iteration: 85807
loss: 0.9779552817344666,grad_norm: 0.9999989921890049, iteration: 85808
loss: 0.9511492848396301,grad_norm: 0.9999991224883921, iteration: 85809
loss: 1.0919629335403442,grad_norm: 0.9999997792225266, iteration: 85810
loss: 0.9980922937393188,grad_norm: 0.9999992357278727, iteration: 85811
loss: 1.0281583070755005,grad_norm: 0.9999991494705939, iteration: 85812
loss: 1.0388809442520142,grad_norm: 0.9999994771936438, iteration: 85813
loss: 1.0294355154037476,grad_norm: 0.9999999390314328, iteration: 85814
loss: 0.9997283816337585,grad_norm: 0.9999991187879164, iteration: 85815
loss: 0.9721359610557556,grad_norm: 0.8196586723210261, iteration: 85816
loss: 0.9719880819320679,grad_norm: 0.9999991170292586, iteration: 85817
loss: 1.0056418180465698,grad_norm: 0.9980846389436766, iteration: 85818
loss: 0.9860665202140808,grad_norm: 0.9573276694151902, iteration: 85819
loss: 1.0199203491210938,grad_norm: 0.9999990549789447, iteration: 85820
loss: 1.000129222869873,grad_norm: 0.9264763431190178, iteration: 85821
loss: 0.9902957081794739,grad_norm: 0.9999993662491639, iteration: 85822
loss: 0.9974567294120789,grad_norm: 0.9578028181888847, iteration: 85823
loss: 0.985943078994751,grad_norm: 0.9999991214368629, iteration: 85824
loss: 1.0946087837219238,grad_norm: 0.9999994225000159, iteration: 85825
loss: 1.0059807300567627,grad_norm: 0.936995090937009, iteration: 85826
loss: 0.985243022441864,grad_norm: 0.9999991408172695, iteration: 85827
loss: 0.9915033578872681,grad_norm: 0.9999991200378756, iteration: 85828
loss: 1.0241881608963013,grad_norm: 0.9999990921582267, iteration: 85829
loss: 0.9950870275497437,grad_norm: 0.9380813219754535, iteration: 85830
loss: 0.9702383279800415,grad_norm: 0.9999991829274071, iteration: 85831
loss: 1.0166124105453491,grad_norm: 0.9999991567931542, iteration: 85832
loss: 0.9993759393692017,grad_norm: 0.9952449292811952, iteration: 85833
loss: 0.9815466403961182,grad_norm: 0.8983864950320336, iteration: 85834
loss: 1.0036675930023193,grad_norm: 0.9999992027693084, iteration: 85835
loss: 0.9954466819763184,grad_norm: 0.9747759439185409, iteration: 85836
loss: 0.9770137071609497,grad_norm: 0.9999989820190454, iteration: 85837
loss: 0.9849790334701538,grad_norm: 0.9940777023736206, iteration: 85838
loss: 1.0279306173324585,grad_norm: 0.9999991202625268, iteration: 85839
loss: 1.0194090604782104,grad_norm: 0.915995986720661, iteration: 85840
loss: 1.0018707513809204,grad_norm: 0.9999992095361951, iteration: 85841
loss: 1.057694435119629,grad_norm: 0.9999999531022828, iteration: 85842
loss: 1.002943992614746,grad_norm: 0.9999990087269343, iteration: 85843
loss: 1.0351916551589966,grad_norm: 0.9184983252737138, iteration: 85844
loss: 0.9964307546615601,grad_norm: 0.9999990073826889, iteration: 85845
loss: 0.9982410669326782,grad_norm: 0.9999990591812714, iteration: 85846
loss: 1.0141078233718872,grad_norm: 0.9999989688060971, iteration: 85847
loss: 1.010892629623413,grad_norm: 0.9999991648058358, iteration: 85848
loss: 1.0006051063537598,grad_norm: 0.9999991122470843, iteration: 85849
loss: 1.0576504468917847,grad_norm: 0.9999999404559954, iteration: 85850
loss: 0.9869900941848755,grad_norm: 0.9999989234914419, iteration: 85851
loss: 1.0108344554901123,grad_norm: 0.9999994157254303, iteration: 85852
loss: 0.9953905344009399,grad_norm: 0.8613685312440779, iteration: 85853
loss: 1.0033833980560303,grad_norm: 0.9999990682238248, iteration: 85854
loss: 1.0101088285446167,grad_norm: 0.9999991922467253, iteration: 85855
loss: 1.0175615549087524,grad_norm: 0.9712331876226905, iteration: 85856
loss: 0.9576955437660217,grad_norm: 0.98998944998963, iteration: 85857
loss: 1.0067709684371948,grad_norm: 0.9531237073004245, iteration: 85858
loss: 1.0031368732452393,grad_norm: 0.9999991494531197, iteration: 85859
loss: 0.9958574771881104,grad_norm: 0.9999991248375328, iteration: 85860
loss: 1.0127089023590088,grad_norm: 0.9999992435575203, iteration: 85861
loss: 0.9908324480056763,grad_norm: 0.9999991716247235, iteration: 85862
loss: 1.0226761102676392,grad_norm: 0.9999992497217703, iteration: 85863
loss: 1.0477451086044312,grad_norm: 0.9999992257677476, iteration: 85864
loss: 1.0020015239715576,grad_norm: 0.9520334267586144, iteration: 85865
loss: 1.0036815404891968,grad_norm: 0.8725453615658714, iteration: 85866
loss: 0.9842429161071777,grad_norm: 0.9872784904492656, iteration: 85867
loss: 0.9860512018203735,grad_norm: 0.9999990852241663, iteration: 85868
loss: 1.0104373693466187,grad_norm: 0.9999990123251026, iteration: 85869
loss: 0.9855672717094421,grad_norm: 0.9999991270933745, iteration: 85870
loss: 1.035904884338379,grad_norm: 0.9999992236122014, iteration: 85871
loss: 0.974651038646698,grad_norm: 0.9999994367505436, iteration: 85872
loss: 0.9965065121650696,grad_norm: 0.9999989794967608, iteration: 85873
loss: 1.0432722568511963,grad_norm: 0.9974087421226346, iteration: 85874
loss: 1.0288573503494263,grad_norm: 0.9461736845004377, iteration: 85875
loss: 0.9898133873939514,grad_norm: 0.9999990569638723, iteration: 85876
loss: 1.0009465217590332,grad_norm: 0.9999991399853909, iteration: 85877
loss: 0.9730551838874817,grad_norm: 0.9999991446016538, iteration: 85878
loss: 1.023311734199524,grad_norm: 0.9320844660654539, iteration: 85879
loss: 1.0010250806808472,grad_norm: 0.9999991467892948, iteration: 85880
loss: 0.9987283945083618,grad_norm: 0.8372001206349307, iteration: 85881
loss: 0.9830924272537231,grad_norm: 0.999999253671694, iteration: 85882
loss: 1.0151838064193726,grad_norm: 0.9999992528338182, iteration: 85883
loss: 1.0066394805908203,grad_norm: 0.9999990654680364, iteration: 85884
loss: 0.9815508127212524,grad_norm: 0.9999990117161184, iteration: 85885
loss: 0.9975720643997192,grad_norm: 0.9999998232666166, iteration: 85886
loss: 1.0033907890319824,grad_norm: 0.9999990256159289, iteration: 85887
loss: 0.9982765913009644,grad_norm: 0.9999990744953532, iteration: 85888
loss: 0.9999240040779114,grad_norm: 0.9999990872167825, iteration: 85889
loss: 1.022063136100769,grad_norm: 0.9999990852238194, iteration: 85890
loss: 1.011501431465149,grad_norm: 0.99999914379376, iteration: 85891
loss: 1.0284637212753296,grad_norm: 0.999999360702235, iteration: 85892
loss: 1.024442434310913,grad_norm: 0.9999996722291137, iteration: 85893
loss: 0.9950066208839417,grad_norm: 0.9999989920682643, iteration: 85894
loss: 0.9728673696517944,grad_norm: 0.9999990952157082, iteration: 85895
loss: 0.9902001023292542,grad_norm: 0.99999921122029, iteration: 85896
loss: 0.9773760437965393,grad_norm: 0.9999989183582036, iteration: 85897
loss: 0.9921107292175293,grad_norm: 0.9999990904438525, iteration: 85898
loss: 0.9805609583854675,grad_norm: 0.9546206525627859, iteration: 85899
loss: 0.9803133606910706,grad_norm: 0.9999990224923707, iteration: 85900
loss: 1.0390589237213135,grad_norm: 0.8910206750984208, iteration: 85901
loss: 1.029969334602356,grad_norm: 0.9999992310522497, iteration: 85902
loss: 1.02541184425354,grad_norm: 0.9999991752564976, iteration: 85903
loss: 1.0124733448028564,grad_norm: 0.9999993098801188, iteration: 85904
loss: 1.0262932777404785,grad_norm: 0.999999123914279, iteration: 85905
loss: 0.9940224289894104,grad_norm: 0.9394258837365043, iteration: 85906
loss: 1.0071542263031006,grad_norm: 0.9852268782026462, iteration: 85907
loss: 1.0113940238952637,grad_norm: 0.9999990851176834, iteration: 85908
loss: 1.0006002187728882,grad_norm: 0.8829795768262207, iteration: 85909
loss: 1.0105289220809937,grad_norm: 0.9999994132391142, iteration: 85910
loss: 1.0132843255996704,grad_norm: 0.9999994765252753, iteration: 85911
loss: 1.0414539575576782,grad_norm: 0.9999992187108726, iteration: 85912
loss: 0.9997006058692932,grad_norm: 0.9999991494097293, iteration: 85913
loss: 1.0162423849105835,grad_norm: 0.9999992432930254, iteration: 85914
loss: 1.0005344152450562,grad_norm: 0.8769814027410788, iteration: 85915
loss: 1.006391167640686,grad_norm: 0.9999991867109995, iteration: 85916
loss: 1.0411542654037476,grad_norm: 0.9999991748145439, iteration: 85917
loss: 0.9950449466705322,grad_norm: 0.9999991333259144, iteration: 85918
loss: 1.0189096927642822,grad_norm: 0.9999990457180085, iteration: 85919
loss: 0.9839282035827637,grad_norm: 0.9999992950644779, iteration: 85920
loss: 1.0142796039581299,grad_norm: 0.9776707007181933, iteration: 85921
loss: 0.9981123805046082,grad_norm: 0.9999992841535117, iteration: 85922
loss: 0.955993115901947,grad_norm: 0.9999990795963762, iteration: 85923
loss: 0.9962259531021118,grad_norm: 0.8852575010795928, iteration: 85924
loss: 1.004323959350586,grad_norm: 0.999999048427604, iteration: 85925
loss: 1.0003811120986938,grad_norm: 0.8934768302830552, iteration: 85926
loss: 1.0266233682632446,grad_norm: 0.9999989027646203, iteration: 85927
loss: 1.0004607439041138,grad_norm: 0.943282157013084, iteration: 85928
loss: 1.034179449081421,grad_norm: 0.9999991636781553, iteration: 85929
loss: 1.0066100358963013,grad_norm: 0.9999997355083049, iteration: 85930
loss: 0.9931255578994751,grad_norm: 0.9675646534728317, iteration: 85931
loss: 1.0332508087158203,grad_norm: 0.9999991750553344, iteration: 85932
loss: 0.9769258499145508,grad_norm: 0.9999991190976618, iteration: 85933
loss: 0.9969757795333862,grad_norm: 0.9999990833432895, iteration: 85934
loss: 1.0163911581039429,grad_norm: 0.9999990802175238, iteration: 85935
loss: 0.9938762784004211,grad_norm: 0.9999992224706369, iteration: 85936
loss: 0.9940405488014221,grad_norm: 0.9894391163507914, iteration: 85937
loss: 0.9895021915435791,grad_norm: 0.9999990389240149, iteration: 85938
loss: 0.9871035218238831,grad_norm: 0.9999991187938437, iteration: 85939
loss: 0.997378945350647,grad_norm: 0.9999991556578667, iteration: 85940
loss: 1.0033155679702759,grad_norm: 0.999999034723042, iteration: 85941
loss: 1.0059665441513062,grad_norm: 0.8949300262823148, iteration: 85942
loss: 1.0184006690979004,grad_norm: 0.9999990509238987, iteration: 85943
loss: 0.9882030487060547,grad_norm: 0.9999990761639217, iteration: 85944
loss: 1.0127122402191162,grad_norm: 0.9999991595503939, iteration: 85945
loss: 0.998786449432373,grad_norm: 0.9999990121469639, iteration: 85946
loss: 0.998893678188324,grad_norm: 0.9972974287739925, iteration: 85947
loss: 1.021437644958496,grad_norm: 0.946267967497284, iteration: 85948
loss: 1.0334709882736206,grad_norm: 0.9999991527912871, iteration: 85949
loss: 0.9782732725143433,grad_norm: 0.9999991233995655, iteration: 85950
loss: 0.969491720199585,grad_norm: 0.9999990947498179, iteration: 85951
loss: 1.0329550504684448,grad_norm: 0.999999095241953, iteration: 85952
loss: 0.9915040135383606,grad_norm: 0.9999991912236563, iteration: 85953
loss: 0.9871405959129333,grad_norm: 0.9999990726114528, iteration: 85954
loss: 0.9689319133758545,grad_norm: 0.9419660629890758, iteration: 85955
loss: 1.0338164567947388,grad_norm: 0.9999991837439183, iteration: 85956
loss: 1.0185906887054443,grad_norm: 0.9999990820289041, iteration: 85957
loss: 0.9546305537223816,grad_norm: 0.9999989633003289, iteration: 85958
loss: 1.0116357803344727,grad_norm: 0.999999035762729, iteration: 85959
loss: 1.0051926374435425,grad_norm: 0.9539966713603851, iteration: 85960
loss: 0.9879314303398132,grad_norm: 0.9999991218368379, iteration: 85961
loss: 0.9779117107391357,grad_norm: 0.9593728692370415, iteration: 85962
loss: 0.9943898916244507,grad_norm: 0.9999992717410487, iteration: 85963
loss: 1.0143986940383911,grad_norm: 0.9999992005498596, iteration: 85964
loss: 1.0031198263168335,grad_norm: 0.9999990001751833, iteration: 85965
loss: 1.0310170650482178,grad_norm: 0.9999990981075759, iteration: 85966
loss: 1.0190178155899048,grad_norm: 0.9999992304597428, iteration: 85967
loss: 1.0344263315200806,grad_norm: 0.9999990635295377, iteration: 85968
loss: 1.0299409627914429,grad_norm: 0.9001233821054931, iteration: 85969
loss: 0.9849773645401001,grad_norm: 0.9999992035849913, iteration: 85970
loss: 1.015724539756775,grad_norm: 0.9614588784773167, iteration: 85971
loss: 1.019351601600647,grad_norm: 0.9943983606040986, iteration: 85972
loss: 1.003646731376648,grad_norm: 0.9999989137676918, iteration: 85973
loss: 0.9665042757987976,grad_norm: 0.9999991966458869, iteration: 85974
loss: 0.9816333055496216,grad_norm: 0.9604207875287668, iteration: 85975
loss: 0.9544579982757568,grad_norm: 0.9999992165193626, iteration: 85976
loss: 1.0247830152511597,grad_norm: 0.9999990125401839, iteration: 85977
loss: 0.9787696003913879,grad_norm: 0.9999991735257387, iteration: 85978
loss: 1.020309567451477,grad_norm: 0.9999991346795531, iteration: 85979
loss: 0.9757059216499329,grad_norm: 0.9999991738502779, iteration: 85980
loss: 1.003193974494934,grad_norm: 0.99999918274711, iteration: 85981
loss: 1.0106900930404663,grad_norm: 0.9999992674862473, iteration: 85982
loss: 1.046354055404663,grad_norm: 0.971435976453634, iteration: 85983
loss: 0.9994351267814636,grad_norm: 0.9999991383426377, iteration: 85984
loss: 1.0030429363250732,grad_norm: 0.9999992238257218, iteration: 85985
loss: 0.976492166519165,grad_norm: 0.9999992491636711, iteration: 85986
loss: 1.0510011911392212,grad_norm: 0.9999997963657804, iteration: 85987
loss: 0.9599516987800598,grad_norm: 0.9909955072941096, iteration: 85988
loss: 1.0249311923980713,grad_norm: 0.999999431092676, iteration: 85989
loss: 0.9745371341705322,grad_norm: 0.9025046732999134, iteration: 85990
loss: 1.0213488340377808,grad_norm: 0.9731454259406768, iteration: 85991
loss: 1.0079034566879272,grad_norm: 0.9999992390428163, iteration: 85992
loss: 1.020698070526123,grad_norm: 0.9999991257866946, iteration: 85993
loss: 1.0231084823608398,grad_norm: 0.8423539567073909, iteration: 85994
loss: 1.0163722038269043,grad_norm: 0.9999991390786663, iteration: 85995
loss: 0.9808078408241272,grad_norm: 0.9999990821714306, iteration: 85996
loss: 1.038133978843689,grad_norm: 0.9999998239245177, iteration: 85997
loss: 0.9943011999130249,grad_norm: 0.9999990865747157, iteration: 85998
loss: 0.995469868183136,grad_norm: 0.9999992065914626, iteration: 85999
loss: 0.9751651287078857,grad_norm: 0.9982373314683746, iteration: 86000
loss: 0.9988576769828796,grad_norm: 0.9999991955828229, iteration: 86001
loss: 0.9712122082710266,grad_norm: 0.9999991495777788, iteration: 86002
loss: 0.9856850504875183,grad_norm: 0.9240499471184382, iteration: 86003
loss: 1.0184191465377808,grad_norm: 0.9999992444113464, iteration: 86004
loss: 0.937526285648346,grad_norm: 0.9999994768279986, iteration: 86005
loss: 0.9935256242752075,grad_norm: 0.9999989243242535, iteration: 86006
loss: 0.9853021502494812,grad_norm: 0.8484131497734848, iteration: 86007
loss: 1.00477135181427,grad_norm: 0.8637325882733544, iteration: 86008
loss: 0.9622114300727844,grad_norm: 0.9999992227251707, iteration: 86009
loss: 1.0365170240402222,grad_norm: 0.9999991169261693, iteration: 86010
loss: 1.0100717544555664,grad_norm: 0.9999990913027286, iteration: 86011
loss: 1.024498701095581,grad_norm: 0.9999992310621526, iteration: 86012
loss: 0.9754084944725037,grad_norm: 0.999999055874348, iteration: 86013
loss: 0.9758936166763306,grad_norm: 0.9999992082945102, iteration: 86014
loss: 0.9544742703437805,grad_norm: 0.9999992230254963, iteration: 86015
loss: 1.0194107294082642,grad_norm: 0.8796743086098019, iteration: 86016
loss: 0.9702901840209961,grad_norm: 0.9999991988875159, iteration: 86017
loss: 0.9683830142021179,grad_norm: 0.9796572738825456, iteration: 86018
loss: 1.0145021677017212,grad_norm: 0.9034631093661126, iteration: 86019
loss: 0.9855347275733948,grad_norm: 0.9965292517282023, iteration: 86020
loss: 0.984186053276062,grad_norm: 0.999998987384273, iteration: 86021
loss: 1.0015125274658203,grad_norm: 0.9230420399956705, iteration: 86022
loss: 1.0288100242614746,grad_norm: 0.9999991921289931, iteration: 86023
loss: 1.022188425064087,grad_norm: 0.9999990665496862, iteration: 86024
loss: 0.9397710561752319,grad_norm: 0.9776325555126105, iteration: 86025
loss: 1.0117249488830566,grad_norm: 0.976206689954174, iteration: 86026
loss: 0.9992759227752686,grad_norm: 0.9999991388045707, iteration: 86027
loss: 0.9910649657249451,grad_norm: 0.9999991259013952, iteration: 86028
loss: 0.9933063983917236,grad_norm: 0.9999991150415884, iteration: 86029
loss: 0.9313395619392395,grad_norm: 0.999999214053465, iteration: 86030
loss: 1.0472300052642822,grad_norm: 0.9999991097008263, iteration: 86031
loss: 1.0332108736038208,grad_norm: 0.9755669340981387, iteration: 86032
loss: 0.9833908677101135,grad_norm: 0.9999992360614194, iteration: 86033
loss: 0.9993757009506226,grad_norm: 0.9060282063754103, iteration: 86034
loss: 1.006143569946289,grad_norm: 0.9999992827506047, iteration: 86035
loss: 1.0206947326660156,grad_norm: 0.9999988920890053, iteration: 86036
loss: 1.0078011751174927,grad_norm: 0.999999131970549, iteration: 86037
loss: 0.9833210706710815,grad_norm: 0.990842237239517, iteration: 86038
loss: 1.0184212923049927,grad_norm: 0.9999992345786191, iteration: 86039
loss: 0.9661238193511963,grad_norm: 0.9999990746013893, iteration: 86040
loss: 1.013869285583496,grad_norm: 0.9999993926441698, iteration: 86041
loss: 1.032199501991272,grad_norm: 0.9802738643428465, iteration: 86042
loss: 1.0026262998580933,grad_norm: 0.9999990287224085, iteration: 86043
loss: 1.005502462387085,grad_norm: 0.9999991132943938, iteration: 86044
loss: 1.0195008516311646,grad_norm: 0.9999990543051979, iteration: 86045
loss: 1.063337802886963,grad_norm: 0.9999995423972156, iteration: 86046
loss: 0.9869027733802795,grad_norm: 0.9446072539125308, iteration: 86047
loss: 1.0054290294647217,grad_norm: 0.9999992616189757, iteration: 86048
loss: 1.0271068811416626,grad_norm: 0.9449408728188202, iteration: 86049
loss: 0.9958783984184265,grad_norm: 0.9999990279816461, iteration: 86050
loss: 0.9988981485366821,grad_norm: 0.9999993152811536, iteration: 86051
loss: 0.9950752854347229,grad_norm: 0.9515923287935829, iteration: 86052
loss: 0.9813060164451599,grad_norm: 0.9999991305383726, iteration: 86053
loss: 1.0633196830749512,grad_norm: 0.9999996691719021, iteration: 86054
loss: 1.0272830724716187,grad_norm: 0.9999996615328215, iteration: 86055
loss: 1.0224883556365967,grad_norm: 0.9999991002247289, iteration: 86056
loss: 0.9718160033226013,grad_norm: 0.9999989208583459, iteration: 86057
loss: 1.0058916807174683,grad_norm: 0.9999990291577125, iteration: 86058
loss: 1.0032895803451538,grad_norm: 0.9850931464269855, iteration: 86059
loss: 1.009993553161621,grad_norm: 0.9382907856138265, iteration: 86060
loss: 0.9944230914115906,grad_norm: 0.9999991065937384, iteration: 86061
loss: 1.021876335144043,grad_norm: 0.9999991181473601, iteration: 86062
loss: 0.9962210059165955,grad_norm: 0.9580344239646698, iteration: 86063
loss: 0.9942428469657898,grad_norm: 0.9999990570353778, iteration: 86064
loss: 1.004183053970337,grad_norm: 0.9999992354493985, iteration: 86065
loss: 0.9918139576911926,grad_norm: 0.9462974699507184, iteration: 86066
loss: 0.982914924621582,grad_norm: 0.9999991768608358, iteration: 86067
loss: 0.9860714673995972,grad_norm: 0.99213303593211, iteration: 86068
loss: 1.0441449880599976,grad_norm: 0.9999991738860439, iteration: 86069
loss: 0.985240638256073,grad_norm: 0.9999990653157893, iteration: 86070
loss: 0.9847569465637207,grad_norm: 0.9450283684809869, iteration: 86071
loss: 0.9828197360038757,grad_norm: 0.9999992637783738, iteration: 86072
loss: 1.0079412460327148,grad_norm: 0.9999990994400436, iteration: 86073
loss: 1.0163209438323975,grad_norm: 0.9999990320094292, iteration: 86074
loss: 1.0186418294906616,grad_norm: 0.9981224206461287, iteration: 86075
loss: 0.9608083367347717,grad_norm: 0.8852230566188033, iteration: 86076
loss: 1.0250405073165894,grad_norm: 0.9999991931478314, iteration: 86077
loss: 1.004346251487732,grad_norm: 0.9999992292913745, iteration: 86078
loss: 1.0076860189437866,grad_norm: 0.999998956279439, iteration: 86079
loss: 1.026297688484192,grad_norm: 0.9999991860851, iteration: 86080
loss: 0.9993526935577393,grad_norm: 0.9999991642170375, iteration: 86081
loss: 1.0266196727752686,grad_norm: 0.9999990267168221, iteration: 86082
loss: 1.0032601356506348,grad_norm: 0.9999990625636639, iteration: 86083
loss: 1.033929705619812,grad_norm: 0.9999998362180678, iteration: 86084
loss: 1.007611632347107,grad_norm: 0.9999990958270537, iteration: 86085
loss: 1.0298709869384766,grad_norm: 0.9999992563748109, iteration: 86086
loss: 0.9848313331604004,grad_norm: 0.9999992375451651, iteration: 86087
loss: 0.9627622961997986,grad_norm: 0.977348437508076, iteration: 86088
loss: 0.9777599573135376,grad_norm: 0.9999991214973911, iteration: 86089
loss: 1.0069057941436768,grad_norm: 0.9999990980496387, iteration: 86090
loss: 1.0217792987823486,grad_norm: 0.9468269976028885, iteration: 86091
loss: 0.9872744679450989,grad_norm: 0.9999992086827209, iteration: 86092
loss: 0.9939163327217102,grad_norm: 0.9999997684390768, iteration: 86093
loss: 0.9835171699523926,grad_norm: 0.9999991571046318, iteration: 86094
loss: 1.0290194749832153,grad_norm: 0.8464883064290629, iteration: 86095
loss: 0.9747316241264343,grad_norm: 0.9999992162866703, iteration: 86096
loss: 0.9953110814094543,grad_norm: 0.9999990744309547, iteration: 86097
loss: 1.0050938129425049,grad_norm: 0.9999991706906489, iteration: 86098
loss: 1.000306487083435,grad_norm: 0.9999990270503972, iteration: 86099
loss: 1.0474766492843628,grad_norm: 0.9999993650768253, iteration: 86100
loss: 0.972650408744812,grad_norm: 0.9999992127618962, iteration: 86101
loss: 1.0099856853485107,grad_norm: 0.9999990723476552, iteration: 86102
loss: 1.020890474319458,grad_norm: 0.9916814911387172, iteration: 86103
loss: 1.0096079111099243,grad_norm: 0.9999996160944838, iteration: 86104
loss: 1.0090689659118652,grad_norm: 0.9999991326168786, iteration: 86105
loss: 1.0909768342971802,grad_norm: 0.9999998272194295, iteration: 86106
loss: 1.0254054069519043,grad_norm: 0.9999996635204258, iteration: 86107
loss: 0.9972956776618958,grad_norm: 0.9999995151333088, iteration: 86108
loss: 0.9974689483642578,grad_norm: 0.9999994417962261, iteration: 86109
loss: 1.050952434539795,grad_norm: 0.999999477244463, iteration: 86110
loss: 1.2504221200942993,grad_norm: 0.9999999536258429, iteration: 86111
loss: 0.9957468509674072,grad_norm: 0.9999991488795821, iteration: 86112
loss: 1.0146955251693726,grad_norm: 0.9999990219730388, iteration: 86113
loss: 1.0076781511306763,grad_norm: 0.9999996611183672, iteration: 86114
loss: 1.004281997680664,grad_norm: 0.9999991618815192, iteration: 86115
loss: 1.0041989088058472,grad_norm: 0.9999997633075505, iteration: 86116
loss: 1.0018681287765503,grad_norm: 0.9999990891939061, iteration: 86117
loss: 0.9497013092041016,grad_norm: 0.9999988582261273, iteration: 86118
loss: 1.0187289714813232,grad_norm: 0.9999995188771534, iteration: 86119
loss: 1.0856846570968628,grad_norm: 0.9999998559224615, iteration: 86120
loss: 0.9948303699493408,grad_norm: 0.9117302593062502, iteration: 86121
loss: 0.9800223708152771,grad_norm: 0.9990713653437341, iteration: 86122
loss: 1.0112236738204956,grad_norm: 0.9999992009224079, iteration: 86123
loss: 1.0034419298171997,grad_norm: 0.9999991522318087, iteration: 86124
loss: 1.0198503732681274,grad_norm: 0.999999323036477, iteration: 86125
loss: 1.0250333547592163,grad_norm: 0.9999994719295755, iteration: 86126
loss: 0.9719831943511963,grad_norm: 0.9999991908419733, iteration: 86127
loss: 1.0068359375,grad_norm: 0.9791350583816291, iteration: 86128
loss: 0.9977126121520996,grad_norm: 0.84514734919012, iteration: 86129
loss: 0.9829540252685547,grad_norm: 0.9999991708407124, iteration: 86130
loss: 0.9847592115402222,grad_norm: 0.9999990971210749, iteration: 86131
loss: 1.002564549446106,grad_norm: 0.9999997417890621, iteration: 86132
loss: 1.040392279624939,grad_norm: 0.9999995816256506, iteration: 86133
loss: 0.9882937073707581,grad_norm: 0.9320311400701891, iteration: 86134
loss: 1.0462520122528076,grad_norm: 0.9999990169115348, iteration: 86135
loss: 1.0248243808746338,grad_norm: 0.9999989852467475, iteration: 86136
loss: 1.0218770503997803,grad_norm: 0.9999991450017884, iteration: 86137
loss: 1.0062034130096436,grad_norm: 0.9999991040998497, iteration: 86138
loss: 0.998553454875946,grad_norm: 0.9229797729982242, iteration: 86139
loss: 1.0916094779968262,grad_norm: 0.9999997836418859, iteration: 86140
loss: 1.0183148384094238,grad_norm: 0.9585490089076502, iteration: 86141
loss: 1.0633338689804077,grad_norm: 0.9999994170109507, iteration: 86142
loss: 0.992882490158081,grad_norm: 0.9969403848866051, iteration: 86143
loss: 0.9889132380485535,grad_norm: 0.9390292884889431, iteration: 86144
loss: 0.9692172408103943,grad_norm: 0.9999988895192666, iteration: 86145
loss: 0.9824525713920593,grad_norm: 0.9825547475191216, iteration: 86146
loss: 1.0219111442565918,grad_norm: 0.9999992085474709, iteration: 86147
loss: 1.039829134941101,grad_norm: 0.9999990714284339, iteration: 86148
loss: 1.0410548448562622,grad_norm: 0.9999995788536202, iteration: 86149
loss: 0.9730716943740845,grad_norm: 0.9999991863889147, iteration: 86150
loss: 1.0119978189468384,grad_norm: 0.9999992594379341, iteration: 86151
loss: 1.0038883686065674,grad_norm: 0.9999991789272018, iteration: 86152
loss: 1.0023783445358276,grad_norm: 0.9999993921502681, iteration: 86153
loss: 1.02658212184906,grad_norm: 0.9999991834857223, iteration: 86154
loss: 1.005332589149475,grad_norm: 0.9999991966900459, iteration: 86155
loss: 1.0130054950714111,grad_norm: 0.9999991611865325, iteration: 86156
loss: 1.0003349781036377,grad_norm: 0.9999991120039377, iteration: 86157
loss: 1.0342222452163696,grad_norm: 0.9999996103295065, iteration: 86158
loss: 1.0092517137527466,grad_norm: 0.9999992177878299, iteration: 86159
loss: 1.0042206048965454,grad_norm: 0.9999992420425484, iteration: 86160
loss: 1.026168704032898,grad_norm: 0.9999999006374404, iteration: 86161
loss: 1.0777260065078735,grad_norm: 0.9999994553782805, iteration: 86162
loss: 1.0214862823486328,grad_norm: 0.861792788207501, iteration: 86163
loss: 0.9867914915084839,grad_norm: 0.999999003987531, iteration: 86164
loss: 0.9856993556022644,grad_norm: 0.9999991361176463, iteration: 86165
loss: 1.0017991065979004,grad_norm: 0.9719941416318945, iteration: 86166
loss: 1.0231926441192627,grad_norm: 0.9999990254274427, iteration: 86167
loss: 1.061062216758728,grad_norm: 0.9999992405586411, iteration: 86168
loss: 0.9850000143051147,grad_norm: 0.9999991800994495, iteration: 86169
loss: 1.043694257736206,grad_norm: 0.8804618211451458, iteration: 86170
loss: 1.0019067525863647,grad_norm: 0.9999990289639444, iteration: 86171
loss: 1.0167803764343262,grad_norm: 0.9999990795801664, iteration: 86172
loss: 0.9985035061836243,grad_norm: 0.9999989708882389, iteration: 86173
loss: 1.0090253353118896,grad_norm: 0.999999396171613, iteration: 86174
loss: 0.9835997223854065,grad_norm: 0.9999992989495853, iteration: 86175
loss: 1.0005357265472412,grad_norm: 0.8981468144216405, iteration: 86176
loss: 1.0758687257766724,grad_norm: 0.9999992337353804, iteration: 86177
loss: 0.9826278686523438,grad_norm: 0.9999991127761642, iteration: 86178
loss: 0.9880319833755493,grad_norm: 0.9999992008995914, iteration: 86179
loss: 1.0051376819610596,grad_norm: 0.9408352012406224, iteration: 86180
loss: 0.9828023910522461,grad_norm: 0.9999991903270481, iteration: 86181
loss: 1.0050299167633057,grad_norm: 0.9999995271612736, iteration: 86182
loss: 0.9868465662002563,grad_norm: 0.9999991180573922, iteration: 86183
loss: 1.0092864036560059,grad_norm: 0.9292460799065402, iteration: 86184
loss: 1.001893401145935,grad_norm: 0.999999177162167, iteration: 86185
loss: 1.010440468788147,grad_norm: 0.7741918642576134, iteration: 86186
loss: 0.9943994879722595,grad_norm: 0.9999992463245472, iteration: 86187
loss: 1.034305214881897,grad_norm: 0.9999995464696225, iteration: 86188
loss: 1.0166796445846558,grad_norm: 0.9999992379238634, iteration: 86189
loss: 0.9870324730873108,grad_norm: 0.9999989946478444, iteration: 86190
loss: 1.0911898612976074,grad_norm: 0.9999993603126975, iteration: 86191
loss: 0.9956195950508118,grad_norm: 0.9600275381624314, iteration: 86192
loss: 0.9935774207115173,grad_norm: 0.9220023027717654, iteration: 86193
loss: 0.9529551863670349,grad_norm: 0.9972109921211066, iteration: 86194
loss: 1.040167212486267,grad_norm: 0.9999991257553632, iteration: 86195
loss: 1.0012860298156738,grad_norm: 0.854929454106749, iteration: 86196
loss: 1.3273065090179443,grad_norm: 0.9999998218236293, iteration: 86197
loss: 1.0249016284942627,grad_norm: 0.9836485392590933, iteration: 86198
loss: 0.9841105341911316,grad_norm: 0.9999992169663589, iteration: 86199
loss: 1.0973858833312988,grad_norm: 0.9999992282515582, iteration: 86200
loss: 0.9911799430847168,grad_norm: 0.9999992033851303, iteration: 86201
loss: 1.0016376972198486,grad_norm: 0.9999990366321511, iteration: 86202
loss: 1.0220544338226318,grad_norm: 0.9999990838086625, iteration: 86203
loss: 1.0034533739089966,grad_norm: 0.9999990317770445, iteration: 86204
loss: 0.9857501983642578,grad_norm: 0.999999034838583, iteration: 86205
loss: 0.9905863404273987,grad_norm: 0.9275925210592184, iteration: 86206
loss: 0.992881715297699,grad_norm: 0.9999991660383675, iteration: 86207
loss: 1.0141487121582031,grad_norm: 0.9999996413633354, iteration: 86208
loss: 0.9631828665733337,grad_norm: 0.9683437957200601, iteration: 86209
loss: 0.9753479361534119,grad_norm: 0.999999032591761, iteration: 86210
loss: 1.022691011428833,grad_norm: 0.9999993921994206, iteration: 86211
loss: 1.0151513814926147,grad_norm: 0.9633063175157386, iteration: 86212
loss: 0.9794310927391052,grad_norm: 0.9493009222217891, iteration: 86213
loss: 1.1015921831130981,grad_norm: 0.9999992719972255, iteration: 86214
loss: 1.036404013633728,grad_norm: 0.9999990288035141, iteration: 86215
loss: 1.0102946758270264,grad_norm: 0.999999370100643, iteration: 86216
loss: 1.0546773672103882,grad_norm: 0.9999991910665795, iteration: 86217
loss: 0.9984778165817261,grad_norm: 0.9999992421283169, iteration: 86218
loss: 1.0164272785186768,grad_norm: 0.9697482759792383, iteration: 86219
loss: 0.9776586294174194,grad_norm: 0.9397383925664158, iteration: 86220
loss: 0.9896945357322693,grad_norm: 0.9620306386347534, iteration: 86221
loss: 0.9995138645172119,grad_norm: 0.999999340432659, iteration: 86222
loss: 1.0026298761367798,grad_norm: 0.9999991197577348, iteration: 86223
loss: 1.0137698650360107,grad_norm: 0.9999990507324844, iteration: 86224
loss: 1.0153762102127075,grad_norm: 0.9999991065183705, iteration: 86225
loss: 1.06187105178833,grad_norm: 0.9999994204296838, iteration: 86226
loss: 1.0057868957519531,grad_norm: 0.9796963243371757, iteration: 86227
loss: 1.0265766382217407,grad_norm: 0.9980946764759792, iteration: 86228
loss: 1.0413910150527954,grad_norm: 0.9999994769751323, iteration: 86229
loss: 1.0289223194122314,grad_norm: 0.9514542855842782, iteration: 86230
loss: 1.009186029434204,grad_norm: 0.9999990752896943, iteration: 86231
loss: 1.0194077491760254,grad_norm: 0.9998509939721243, iteration: 86232
loss: 1.010399580001831,grad_norm: 0.9999991118283046, iteration: 86233
loss: 0.992706298828125,grad_norm: 0.9999991679072912, iteration: 86234
loss: 0.9961994290351868,grad_norm: 0.9999992343238229, iteration: 86235
loss: 1.0269981622695923,grad_norm: 0.9850763367404285, iteration: 86236
loss: 0.989034116268158,grad_norm: 0.9999990120575949, iteration: 86237
loss: 1.0193971395492554,grad_norm: 0.9999991416113101, iteration: 86238
loss: 0.9628630876541138,grad_norm: 0.9007690754978074, iteration: 86239
loss: 1.011767864227295,grad_norm: 0.9999994297987008, iteration: 86240
loss: 1.045317530632019,grad_norm: 0.9999992750488573, iteration: 86241
loss: 0.9849916696548462,grad_norm: 0.9999991484815008, iteration: 86242
loss: 1.0634092092514038,grad_norm: 0.9999994061850361, iteration: 86243
loss: 1.0892945528030396,grad_norm: 0.9999992612637425, iteration: 86244
loss: 0.9951534867286682,grad_norm: 0.9999990760743694, iteration: 86245
loss: 1.0202648639678955,grad_norm: 0.9999991608074038, iteration: 86246
loss: 0.9684703946113586,grad_norm: 0.9999990275570889, iteration: 86247
loss: 1.0052886009216309,grad_norm: 0.9999989171384333, iteration: 86248
loss: 1.0036230087280273,grad_norm: 0.9999989915530456, iteration: 86249
loss: 0.9949960708618164,grad_norm: 0.9999990568665759, iteration: 86250
loss: 1.004262924194336,grad_norm: 0.9882190466034744, iteration: 86251
loss: 0.9916344881057739,grad_norm: 0.9999990625454741, iteration: 86252
loss: 0.9647493958473206,grad_norm: 0.9973699776304631, iteration: 86253
loss: 0.994580090045929,grad_norm: 0.9999989220359486, iteration: 86254
loss: 0.9950965642929077,grad_norm: 0.8620065114662376, iteration: 86255
loss: 0.968763530254364,grad_norm: 0.9999991967444548, iteration: 86256
loss: 1.0201557874679565,grad_norm: 0.9999991566987364, iteration: 86257
loss: 1.0982658863067627,grad_norm: 0.9999994529738236, iteration: 86258
loss: 0.9966686367988586,grad_norm: 0.9951046273395067, iteration: 86259
loss: 1.0010510683059692,grad_norm: 0.9999990817274242, iteration: 86260
loss: 1.0206665992736816,grad_norm: 0.9999991470354211, iteration: 86261
loss: 0.9948968887329102,grad_norm: 0.9999989610175829, iteration: 86262
loss: 1.0103739500045776,grad_norm: 0.9999991060200947, iteration: 86263
loss: 1.0173425674438477,grad_norm: 0.9333797889984385, iteration: 86264
loss: 0.9792441725730896,grad_norm: 0.9805786208068163, iteration: 86265
loss: 1.0022615194320679,grad_norm: 0.9560166432274305, iteration: 86266
loss: 0.990706205368042,grad_norm: 0.9621690635951056, iteration: 86267
loss: 0.9950289130210876,grad_norm: 0.9681807872636385, iteration: 86268
loss: 0.9915644526481628,grad_norm: 0.9999989743348725, iteration: 86269
loss: 1.004330039024353,grad_norm: 0.9999989929146178, iteration: 86270
loss: 1.0206120014190674,grad_norm: 0.9999991141212623, iteration: 86271
loss: 1.1090527772903442,grad_norm: 0.9999991006860224, iteration: 86272
loss: 1.0158370733261108,grad_norm: 0.9999991348776869, iteration: 86273
loss: 1.0117738246917725,grad_norm: 0.9999990990628572, iteration: 86274
loss: 0.995795488357544,grad_norm: 0.9999992552072975, iteration: 86275
loss: 0.9950281381607056,grad_norm: 0.9999991659842978, iteration: 86276
loss: 0.9965055584907532,grad_norm: 0.9597208794384354, iteration: 86277
loss: 1.0269606113433838,grad_norm: 0.999998997243362, iteration: 86278
loss: 0.9991347193717957,grad_norm: 0.9999991522298114, iteration: 86279
loss: 1.0149275064468384,grad_norm: 0.9999991068734764, iteration: 86280
loss: 1.03269624710083,grad_norm: 0.9999992667456804, iteration: 86281
loss: 0.9665729999542236,grad_norm: 0.9999992599769167, iteration: 86282
loss: 0.988665759563446,grad_norm: 0.9999991941036033, iteration: 86283
loss: 0.9854881167411804,grad_norm: 0.8896463168068396, iteration: 86284
loss: 1.0697193145751953,grad_norm: 0.9999997012942391, iteration: 86285
loss: 0.9966440200805664,grad_norm: 0.9845723045098499, iteration: 86286
loss: 1.0171114206314087,grad_norm: 0.9999989768361643, iteration: 86287
loss: 1.0040090084075928,grad_norm: 0.9999991901723961, iteration: 86288
loss: 0.9913551211357117,grad_norm: 0.9999991374236347, iteration: 86289
loss: 1.0125441551208496,grad_norm: 0.9999991866822648, iteration: 86290
loss: 1.0124558210372925,grad_norm: 0.9999995117980814, iteration: 86291
loss: 0.9940685033798218,grad_norm: 0.9999991230008786, iteration: 86292
loss: 1.0149067640304565,grad_norm: 0.9999991101802769, iteration: 86293
loss: 0.9995720982551575,grad_norm: 0.9999992237895071, iteration: 86294
loss: 0.999904215335846,grad_norm: 0.9999994853879792, iteration: 86295
loss: 1.0030475854873657,grad_norm: 0.9999993263411947, iteration: 86296
loss: 0.985231876373291,grad_norm: 0.9999991296391554, iteration: 86297
loss: 1.0003061294555664,grad_norm: 0.9999989069951293, iteration: 86298
loss: 1.037688136100769,grad_norm: 0.9700012737163209, iteration: 86299
loss: 0.9940400719642639,grad_norm: 0.999999099754379, iteration: 86300
loss: 0.9859036207199097,grad_norm: 0.9999991362247297, iteration: 86301
loss: 0.9923557639122009,grad_norm: 0.9999992673334375, iteration: 86302
loss: 1.0010284185409546,grad_norm: 0.9704053095950114, iteration: 86303
loss: 0.9695432782173157,grad_norm: 0.9300773452606534, iteration: 86304
loss: 1.0056414604187012,grad_norm: 0.9295977860887595, iteration: 86305
loss: 1.0010137557983398,grad_norm: 0.9999993267741278, iteration: 86306
loss: 1.0009194612503052,grad_norm: 0.999999671068919, iteration: 86307
loss: 0.9804852604866028,grad_norm: 0.9999990788502519, iteration: 86308
loss: 0.9964633584022522,grad_norm: 0.9921708759081795, iteration: 86309
loss: 1.0043299198150635,grad_norm: 0.999999833554848, iteration: 86310
loss: 0.9786105751991272,grad_norm: 0.9999990251959286, iteration: 86311
loss: 1.0156265497207642,grad_norm: 0.9999990732942027, iteration: 86312
loss: 1.0384763479232788,grad_norm: 0.9999998243164764, iteration: 86313
loss: 0.9858676791191101,grad_norm: 0.999999005786389, iteration: 86314
loss: 0.9792413711547852,grad_norm: 0.9999997429477746, iteration: 86315
loss: 1.0019118785858154,grad_norm: 0.9999990851763852, iteration: 86316
loss: 1.01194429397583,grad_norm: 0.9999992135265311, iteration: 86317
loss: 1.021053433418274,grad_norm: 0.9999998177318686, iteration: 86318
loss: 0.9582422375679016,grad_norm: 0.9999990092404293, iteration: 86319
loss: 0.9777012467384338,grad_norm: 0.9999992766727631, iteration: 86320
loss: 1.0417910814285278,grad_norm: 0.9999993527023017, iteration: 86321
loss: 0.9993264079093933,grad_norm: 0.9999991245971053, iteration: 86322
loss: 1.0532461404800415,grad_norm: 0.9999991074164125, iteration: 86323
loss: 1.0036466121673584,grad_norm: 0.8918440531554085, iteration: 86324
loss: 0.9985615015029907,grad_norm: 0.9977039198096178, iteration: 86325
loss: 0.9917811751365662,grad_norm: 0.9999989917443226, iteration: 86326
loss: 0.966522753238678,grad_norm: 0.9260258560168166, iteration: 86327
loss: 0.983561098575592,grad_norm: 0.9999990809368489, iteration: 86328
loss: 1.0294400453567505,grad_norm: 0.9999991992988336, iteration: 86329
loss: 1.0685924291610718,grad_norm: 0.9999995008860008, iteration: 86330
loss: 0.9893792867660522,grad_norm: 0.9999990917330649, iteration: 86331
loss: 1.003864049911499,grad_norm: 0.9999990066781097, iteration: 86332
loss: 1.039616584777832,grad_norm: 0.9999996461097489, iteration: 86333
loss: 1.0178239345550537,grad_norm: 0.9999991549929079, iteration: 86334
loss: 1.0105133056640625,grad_norm: 0.9999991483960503, iteration: 86335
loss: 0.9553999304771423,grad_norm: 0.9557094905401343, iteration: 86336
loss: 1.1458168029785156,grad_norm: 0.9999994016163046, iteration: 86337
loss: 1.0854153633117676,grad_norm: 0.9999993588984226, iteration: 86338
loss: 1.0024383068084717,grad_norm: 0.9999989209143135, iteration: 86339
loss: 1.0336841344833374,grad_norm: 0.9999991344708075, iteration: 86340
loss: 0.9975123405456543,grad_norm: 0.999999123965723, iteration: 86341
loss: 1.105301022529602,grad_norm: 0.9999996127474415, iteration: 86342
loss: 0.992158055305481,grad_norm: 0.9943557908296707, iteration: 86343
loss: 1.0104938745498657,grad_norm: 0.9999990892749238, iteration: 86344
loss: 1.022533893585205,grad_norm: 0.9999990616856449, iteration: 86345
loss: 1.0327833890914917,grad_norm: 0.9999991520392598, iteration: 86346
loss: 1.0017653703689575,grad_norm: 0.9999991269353586, iteration: 86347
loss: 1.016886591911316,grad_norm: 0.999999142389579, iteration: 86348
loss: 1.0014674663543701,grad_norm: 0.9999989379840536, iteration: 86349
loss: 1.0121734142303467,grad_norm: 0.9499728407906443, iteration: 86350
loss: 0.9840729236602783,grad_norm: 0.9632808746374667, iteration: 86351
loss: 1.0060827732086182,grad_norm: 0.9999992326761785, iteration: 86352
loss: 1.02614164352417,grad_norm: 0.9999989846412418, iteration: 86353
loss: 1.0473233461380005,grad_norm: 0.9999989857356172, iteration: 86354
loss: 1.0388435125350952,grad_norm: 0.9999994227689154, iteration: 86355
loss: 0.9895362257957458,grad_norm: 0.9999990057838267, iteration: 86356
loss: 0.9854003190994263,grad_norm: 0.9999993489351154, iteration: 86357
loss: 1.0193732976913452,grad_norm: 0.9999991541285718, iteration: 86358
loss: 0.9717651605606079,grad_norm: 0.9999989875176768, iteration: 86359
loss: 1.008458137512207,grad_norm: 0.9999993385574997, iteration: 86360
loss: 1.000740647315979,grad_norm: 0.9024966697021025, iteration: 86361
loss: 0.9947960376739502,grad_norm: 0.9999990952745387, iteration: 86362
loss: 1.0121220350265503,grad_norm: 0.9999989457540188, iteration: 86363
loss: 1.014395833015442,grad_norm: 0.9310729921060998, iteration: 86364
loss: 0.9872017502784729,grad_norm: 0.9999990162849465, iteration: 86365
loss: 0.9803135991096497,grad_norm: 0.999999064524006, iteration: 86366
loss: 0.9975774884223938,grad_norm: 0.953711850149531, iteration: 86367
loss: 0.9787704348564148,grad_norm: 0.9999991824969195, iteration: 86368
loss: 0.9896940588951111,grad_norm: 0.9639247783725675, iteration: 86369
loss: 0.9970719814300537,grad_norm: 0.9094748357576372, iteration: 86370
loss: 0.9888922572135925,grad_norm: 0.9999990968029255, iteration: 86371
loss: 1.0031030178070068,grad_norm: 0.9999991995353902, iteration: 86372
loss: 0.9809370636940002,grad_norm: 0.9999991796463102, iteration: 86373
loss: 0.9904441237449646,grad_norm: 0.9828793103300465, iteration: 86374
loss: 0.985005259513855,grad_norm: 0.9999990932626083, iteration: 86375
loss: 0.9694629907608032,grad_norm: 0.9999990909050671, iteration: 86376
loss: 0.9861384034156799,grad_norm: 0.9999991217594073, iteration: 86377
loss: 1.0169600248336792,grad_norm: 0.9909067668875157, iteration: 86378
loss: 1.0066039562225342,grad_norm: 0.9999991550055142, iteration: 86379
loss: 1.0114363431930542,grad_norm: 0.9999990289282273, iteration: 86380
loss: 0.9891021847724915,grad_norm: 0.9511446133280653, iteration: 86381
loss: 0.9766926765441895,grad_norm: 0.9594076974602475, iteration: 86382
loss: 1.0055263042449951,grad_norm: 0.9999995686296507, iteration: 86383
loss: 1.0195454359054565,grad_norm: 0.9999992136856389, iteration: 86384
loss: 1.013129711151123,grad_norm: 0.9999989987093764, iteration: 86385
loss: 0.9732391834259033,grad_norm: 0.9649062017584534, iteration: 86386
loss: 0.9910409450531006,grad_norm: 0.9829320407058568, iteration: 86387
loss: 0.973473310470581,grad_norm: 0.9999990453537041, iteration: 86388
loss: 1.0315240621566772,grad_norm: 0.9985883407772983, iteration: 86389
loss: 0.9969315528869629,grad_norm: 0.9999990327740361, iteration: 86390
loss: 0.9689518213272095,grad_norm: 0.9626957341655217, iteration: 86391
loss: 0.9772965908050537,grad_norm: 0.9999991849096745, iteration: 86392
loss: 1.0089589357376099,grad_norm: 0.9999992746672107, iteration: 86393
loss: 1.2236063480377197,grad_norm: 0.9999997182901507, iteration: 86394
loss: 0.9980847835540771,grad_norm: 0.9999991998452225, iteration: 86395
loss: 1.0223190784454346,grad_norm: 0.9999990670836618, iteration: 86396
loss: 0.992802619934082,grad_norm: 0.9001488185246386, iteration: 86397
loss: 0.9909549355506897,grad_norm: 0.9999990487891733, iteration: 86398
loss: 1.0048502683639526,grad_norm: 0.972419641974003, iteration: 86399
loss: 1.0415040254592896,grad_norm: 0.9750505870761023, iteration: 86400
loss: 1.0017632246017456,grad_norm: 0.9999991529797109, iteration: 86401
loss: 1.001646637916565,grad_norm: 0.9532397474631978, iteration: 86402
loss: 0.9806593060493469,grad_norm: 0.966191989995707, iteration: 86403
loss: 0.9696486592292786,grad_norm: 0.9871462491996549, iteration: 86404
loss: 1.03494131565094,grad_norm: 0.9650384921902088, iteration: 86405
loss: 0.9789878726005554,grad_norm: 0.9999990297612489, iteration: 86406
loss: 1.0175223350524902,grad_norm: 0.9999992529625068, iteration: 86407
loss: 1.0003350973129272,grad_norm: 0.9999989679813801, iteration: 86408
loss: 0.9696825742721558,grad_norm: 0.9999991089504633, iteration: 86409
loss: 1.0315759181976318,grad_norm: 0.9999990628812211, iteration: 86410
loss: 1.0063879489898682,grad_norm: 0.9652814292725008, iteration: 86411
loss: 0.9859601855278015,grad_norm: 0.9999991222289634, iteration: 86412
loss: 0.9788395762443542,grad_norm: 0.9999990266931404, iteration: 86413
loss: 1.0396127700805664,grad_norm: 0.999999200410012, iteration: 86414
loss: 0.9495947957038879,grad_norm: 0.9999991674147828, iteration: 86415
loss: 1.008919596672058,grad_norm: 0.9883364793107122, iteration: 86416
loss: 0.9846165180206299,grad_norm: 0.9999991979385896, iteration: 86417
loss: 1.0134474039077759,grad_norm: 0.9999990419647032, iteration: 86418
loss: 0.9934951663017273,grad_norm: 0.999999205391743, iteration: 86419
loss: 0.9963056445121765,grad_norm: 0.9166837122592608, iteration: 86420
loss: 1.007902979850769,grad_norm: 0.9999990585851173, iteration: 86421
loss: 0.973286509513855,grad_norm: 0.9438893266900165, iteration: 86422
loss: 1.0157532691955566,grad_norm: 0.9999991156366593, iteration: 86423
loss: 1.0035889148712158,grad_norm: 0.9999990844165756, iteration: 86424
loss: 1.0305368900299072,grad_norm: 0.9999992428662475, iteration: 86425
loss: 0.9879658222198486,grad_norm: 0.9249205300066415, iteration: 86426
loss: 0.9847539663314819,grad_norm: 0.9999990190275161, iteration: 86427
loss: 0.9815186858177185,grad_norm: 0.9999991778480606, iteration: 86428
loss: 0.9912328124046326,grad_norm: 0.9999990185935131, iteration: 86429
loss: 1.0099549293518066,grad_norm: 0.9999991025845465, iteration: 86430
loss: 1.040606141090393,grad_norm: 0.9999991565858267, iteration: 86431
loss: 0.9787885546684265,grad_norm: 0.9960939319874005, iteration: 86432
loss: 1.0073556900024414,grad_norm: 0.9999992322491119, iteration: 86433
loss: 0.986994206905365,grad_norm: 0.8661827565768057, iteration: 86434
loss: 0.9780166149139404,grad_norm: 0.9999991213883214, iteration: 86435
loss: 1.0465655326843262,grad_norm: 0.999999235947244, iteration: 86436
loss: 0.9860283136367798,grad_norm: 0.9675697366606772, iteration: 86437
loss: 0.9758234620094299,grad_norm: 0.9999989848273013, iteration: 86438
loss: 1.0059716701507568,grad_norm: 0.864646732938055, iteration: 86439
loss: 0.9895882606506348,grad_norm: 0.9999992876791004, iteration: 86440
loss: 0.9971867799758911,grad_norm: 0.999999095936568, iteration: 86441
loss: 0.9711245894432068,grad_norm: 0.9999990318444444, iteration: 86442
loss: 1.0420955419540405,grad_norm: 0.9999990429557163, iteration: 86443
loss: 1.018204689025879,grad_norm: 0.9999992427316833, iteration: 86444
loss: 0.9809244871139526,grad_norm: 0.9999991203420238, iteration: 86445
loss: 1.022753357887268,grad_norm: 0.999999069415624, iteration: 86446
loss: 1.0226243734359741,grad_norm: 0.9999989776863741, iteration: 86447
loss: 0.9831392168998718,grad_norm: 0.8938971109119765, iteration: 86448
loss: 1.0051987171173096,grad_norm: 0.9999992520332437, iteration: 86449
loss: 0.9788930416107178,grad_norm: 0.9999993188126687, iteration: 86450
loss: 0.9849640727043152,grad_norm: 0.8271028804959555, iteration: 86451
loss: 1.0211161375045776,grad_norm: 0.9999995929530233, iteration: 86452
loss: 0.9812152981758118,grad_norm: 0.9534960722983225, iteration: 86453
loss: 1.0127214193344116,grad_norm: 0.9999992563763642, iteration: 86454
loss: 1.01173996925354,grad_norm: 0.9999991621094008, iteration: 86455
loss: 1.0083264112472534,grad_norm: 0.9999990625076034, iteration: 86456
loss: 1.022382378578186,grad_norm: 0.9999989946976164, iteration: 86457
loss: 1.00661301612854,grad_norm: 0.9999991526376376, iteration: 86458
loss: 0.9940314888954163,grad_norm: 0.9347565389621575, iteration: 86459
loss: 1.051173448562622,grad_norm: 0.9999993666951662, iteration: 86460
loss: 1.0123976469039917,grad_norm: 0.9999992282308913, iteration: 86461
loss: 0.9822345972061157,grad_norm: 0.9999991256155345, iteration: 86462
loss: 1.0091198682785034,grad_norm: 0.9999993550666203, iteration: 86463
loss: 1.0146656036376953,grad_norm: 0.9999989278204703, iteration: 86464
loss: 1.013179898262024,grad_norm: 0.9999991596035426, iteration: 86465
loss: 0.9465156197547913,grad_norm: 0.999999123378804, iteration: 86466
loss: 0.9855878353118896,grad_norm: 0.9999990271339483, iteration: 86467
loss: 0.9844313263893127,grad_norm: 0.8959264644007094, iteration: 86468
loss: 0.9734959006309509,grad_norm: 0.9999990106669212, iteration: 86469
loss: 0.9763853549957275,grad_norm: 0.9999988931643858, iteration: 86470
loss: 1.0249204635620117,grad_norm: 0.9999991661212365, iteration: 86471
loss: 0.9830591082572937,grad_norm: 0.941051337225334, iteration: 86472
loss: 0.9815846085548401,grad_norm: 0.9999990203102695, iteration: 86473
loss: 0.9821646809577942,grad_norm: 0.9235939660303997, iteration: 86474
loss: 1.0286544561386108,grad_norm: 0.9999991322567041, iteration: 86475
loss: 0.968981146812439,grad_norm: 0.8660203324311434, iteration: 86476
loss: 1.00667405128479,grad_norm: 0.9999990130455735, iteration: 86477
loss: 0.9981257319450378,grad_norm: 0.9999999713341028, iteration: 86478
loss: 0.9936118721961975,grad_norm: 0.9999991656275465, iteration: 86479
loss: 0.99459308385849,grad_norm: 0.999999166985873, iteration: 86480
loss: 0.9548373818397522,grad_norm: 0.892462699627022, iteration: 86481
loss: 1.010451316833496,grad_norm: 0.9999990712103717, iteration: 86482
loss: 1.0008479356765747,grad_norm: 0.9439390557078353, iteration: 86483
loss: 1.0177068710327148,grad_norm: 0.999999509335616, iteration: 86484
loss: 1.0191478729248047,grad_norm: 0.9999992221458721, iteration: 86485
loss: 0.9946666359901428,grad_norm: 0.9015957730814227, iteration: 86486
loss: 1.019578218460083,grad_norm: 0.9700599497655454, iteration: 86487
loss: 0.9935072660446167,grad_norm: 0.9999991224496689, iteration: 86488
loss: 0.9937235713005066,grad_norm: 0.9999990215489334, iteration: 86489
loss: 0.9636395573616028,grad_norm: 0.999999098555727, iteration: 86490
loss: 0.9940252900123596,grad_norm: 0.9999990983473555, iteration: 86491
loss: 1.0179041624069214,grad_norm: 0.9513970948614474, iteration: 86492
loss: 0.9820019006729126,grad_norm: 0.9999992428895548, iteration: 86493
loss: 0.9884055256843567,grad_norm: 0.999999095110621, iteration: 86494
loss: 1.008061408996582,grad_norm: 0.9577778700080443, iteration: 86495
loss: 1.0005532503128052,grad_norm: 0.9999991255718566, iteration: 86496
loss: 1.0026066303253174,grad_norm: 0.9999992005471313, iteration: 86497
loss: 0.9816862940788269,grad_norm: 0.9999990396246401, iteration: 86498
loss: 0.9887466430664062,grad_norm: 0.9908919225541606, iteration: 86499
loss: 1.0066370964050293,grad_norm: 0.940902640606705, iteration: 86500
loss: 1.0040510892868042,grad_norm: 0.999998971844839, iteration: 86501
loss: 1.0171868801116943,grad_norm: 0.999999176139377, iteration: 86502
loss: 0.9942346215248108,grad_norm: 0.9999990091840725, iteration: 86503
loss: 1.0235711336135864,grad_norm: 0.9999990861680471, iteration: 86504
loss: 0.9768361449241638,grad_norm: 0.9611962636045004, iteration: 86505
loss: 1.0606988668441772,grad_norm: 0.9999992993846974, iteration: 86506
loss: 0.9951178431510925,grad_norm: 0.9244253174778673, iteration: 86507
loss: 1.0111331939697266,grad_norm: 0.9354986907657745, iteration: 86508
loss: 1.0163575410842896,grad_norm: 0.9999991673004175, iteration: 86509
loss: 0.9910338521003723,grad_norm: 0.9999992795983936, iteration: 86510
loss: 1.0084120035171509,grad_norm: 0.9536190655859849, iteration: 86511
loss: 1.0127724409103394,grad_norm: 0.8239653683538268, iteration: 86512
loss: 0.9999030232429504,grad_norm: 0.999999095929103, iteration: 86513
loss: 0.98659747838974,grad_norm: 0.9999990752859479, iteration: 86514
loss: 0.9917930960655212,grad_norm: 0.9999994835033884, iteration: 86515
loss: 0.9896778464317322,grad_norm: 0.9999991733164624, iteration: 86516
loss: 0.9939804077148438,grad_norm: 0.9999992961930297, iteration: 86517
loss: 1.0122861862182617,grad_norm: 0.9948963397074941, iteration: 86518
loss: 0.9660087823867798,grad_norm: 0.9999990242012013, iteration: 86519
loss: 1.018530249595642,grad_norm: 0.9999991720259342, iteration: 86520
loss: 0.9963372945785522,grad_norm: 0.9706832485280339, iteration: 86521
loss: 0.9608046412467957,grad_norm: 0.9999990893094657, iteration: 86522
loss: 1.0153166055679321,grad_norm: 0.9999990832107926, iteration: 86523
loss: 1.0318971872329712,grad_norm: 0.8824916486357479, iteration: 86524
loss: 1.0040086507797241,grad_norm: 0.9490074867541227, iteration: 86525
loss: 1.0334872007369995,grad_norm: 0.9999992185390983, iteration: 86526
loss: 0.9921419024467468,grad_norm: 0.9999990523200325, iteration: 86527
loss: 0.983106255531311,grad_norm: 0.9999992824729461, iteration: 86528
loss: 1.013169288635254,grad_norm: 0.999999027710929, iteration: 86529
loss: 0.993352472782135,grad_norm: 0.9999992581394556, iteration: 86530
loss: 0.9881443977355957,grad_norm: 0.9999992608327699, iteration: 86531
loss: 1.0365065336227417,grad_norm: 0.9999993493621024, iteration: 86532
loss: 1.022445559501648,grad_norm: 0.9999992011688307, iteration: 86533
loss: 1.0102332830429077,grad_norm: 0.9999991422696486, iteration: 86534
loss: 1.031996488571167,grad_norm: 0.9999998043746166, iteration: 86535
loss: 1.0088540315628052,grad_norm: 0.9022460136838736, iteration: 86536
loss: 1.0117093324661255,grad_norm: 0.9999991341670941, iteration: 86537
loss: 1.040839433670044,grad_norm: 0.9999990627897438, iteration: 86538
loss: 0.9932719469070435,grad_norm: 0.9612335701893048, iteration: 86539
loss: 1.0196329355239868,grad_norm: 0.9999990641424941, iteration: 86540
loss: 1.0471575260162354,grad_norm: 0.9999992406890058, iteration: 86541
loss: 0.9813013076782227,grad_norm: 0.9499457158003162, iteration: 86542
loss: 1.000364065170288,grad_norm: 0.9999992341107431, iteration: 86543
loss: 1.026226282119751,grad_norm: 0.9999992172336056, iteration: 86544
loss: 1.0079437494277954,grad_norm: 0.9999989433311792, iteration: 86545
loss: 0.9716887474060059,grad_norm: 0.9343472158286431, iteration: 86546
loss: 1.0145480632781982,grad_norm: 0.9999991665644434, iteration: 86547
loss: 1.0094107389450073,grad_norm: 0.9999992147192138, iteration: 86548
loss: 1.0104761123657227,grad_norm: 0.9764483907651448, iteration: 86549
loss: 1.023830533027649,grad_norm: 0.9482279297774644, iteration: 86550
loss: 1.0090335607528687,grad_norm: 0.9999991467898436, iteration: 86551
loss: 1.0075104236602783,grad_norm: 0.9999991323839772, iteration: 86552
loss: 0.9972526431083679,grad_norm: 0.9237795110732047, iteration: 86553
loss: 0.9925278425216675,grad_norm: 0.9999991165805312, iteration: 86554
loss: 0.9807095527648926,grad_norm: 0.9999992597085625, iteration: 86555
loss: 0.9518058896064758,grad_norm: 0.989237289396165, iteration: 86556
loss: 0.9882296919822693,grad_norm: 0.9999991337367036, iteration: 86557
loss: 1.0148324966430664,grad_norm: 0.9796243336228567, iteration: 86558
loss: 1.0018247365951538,grad_norm: 0.9949574273554088, iteration: 86559
loss: 1.0337185859680176,grad_norm: 0.9999991816676782, iteration: 86560
loss: 0.9857590198516846,grad_norm: 0.8981544258465864, iteration: 86561
loss: 1.0166401863098145,grad_norm: 0.9999990076542603, iteration: 86562
loss: 1.0030090808868408,grad_norm: 0.9999990095622806, iteration: 86563
loss: 1.0320117473602295,grad_norm: 0.9999996075078622, iteration: 86564
loss: 0.9928255677223206,grad_norm: 0.9999991962062765, iteration: 86565
loss: 1.0078645944595337,grad_norm: 0.999999112985136, iteration: 86566
loss: 1.0142370462417603,grad_norm: 0.9999997361993338, iteration: 86567
loss: 1.0085707902908325,grad_norm: 0.9539514600110022, iteration: 86568
loss: 0.9813280701637268,grad_norm: 0.9999992309700533, iteration: 86569
loss: 1.0039156675338745,grad_norm: 0.9331372997480067, iteration: 86570
loss: 0.9983412623405457,grad_norm: 0.9328075776159497, iteration: 86571
loss: 1.0149033069610596,grad_norm: 0.9999989613244016, iteration: 86572
loss: 0.9875649809837341,grad_norm: 0.9999992464137418, iteration: 86573
loss: 0.9936113357543945,grad_norm: 0.999999051487071, iteration: 86574
loss: 1.0163493156433105,grad_norm: 0.8901094032584527, iteration: 86575
loss: 1.0539053678512573,grad_norm: 0.9999995350813556, iteration: 86576
loss: 0.9860630631446838,grad_norm: 0.878531740638882, iteration: 86577
loss: 0.9863966107368469,grad_norm: 0.9672702106191166, iteration: 86578
loss: 0.9921257495880127,grad_norm: 0.9999991007638108, iteration: 86579
loss: 0.9972338676452637,grad_norm: 0.9999991073000988, iteration: 86580
loss: 1.0187888145446777,grad_norm: 0.9501271903756231, iteration: 86581
loss: 1.0839014053344727,grad_norm: 0.9999995744881639, iteration: 86582
loss: 0.9856367111206055,grad_norm: 0.9999991657226427, iteration: 86583
loss: 0.994351863861084,grad_norm: 0.9999989398532698, iteration: 86584
loss: 1.0044312477111816,grad_norm: 0.9999991901529661, iteration: 86585
loss: 0.9899728298187256,grad_norm: 0.9999990865489727, iteration: 86586
loss: 0.9754182696342468,grad_norm: 0.9999993805044683, iteration: 86587
loss: 0.9900754690170288,grad_norm: 0.9735375476152898, iteration: 86588
loss: 1.0067126750946045,grad_norm: 0.9999991434502046, iteration: 86589
loss: 1.0551403760910034,grad_norm: 0.9999998604065385, iteration: 86590
loss: 0.9991225600242615,grad_norm: 0.999999037067759, iteration: 86591
loss: 1.0336847305297852,grad_norm: 0.9999990392966133, iteration: 86592
loss: 0.9632487893104553,grad_norm: 0.9999992363054551, iteration: 86593
loss: 1.0840864181518555,grad_norm: 0.9999993705873292, iteration: 86594
loss: 1.0129377841949463,grad_norm: 0.9999990131104116, iteration: 86595
loss: 1.0082863569259644,grad_norm: 0.8589106706614343, iteration: 86596
loss: 0.9806711077690125,grad_norm: 0.9289576459776463, iteration: 86597
loss: 0.9628559947013855,grad_norm: 0.9999990626451758, iteration: 86598
loss: 1.0083321332931519,grad_norm: 0.9999991007841952, iteration: 86599
loss: 0.9756649136543274,grad_norm: 0.9999990531340472, iteration: 86600
loss: 0.9771230220794678,grad_norm: 0.9415227931006624, iteration: 86601
loss: 0.9902727007865906,grad_norm: 0.9999990613760236, iteration: 86602
loss: 0.9812510013580322,grad_norm: 0.9884916097770476, iteration: 86603
loss: 0.9579795598983765,grad_norm: 0.9999992831379273, iteration: 86604
loss: 1.0692901611328125,grad_norm: 0.9589581654796916, iteration: 86605
loss: 1.0192209482192993,grad_norm: 0.8306945440910016, iteration: 86606
loss: 0.993141233921051,grad_norm: 0.9999991500690525, iteration: 86607
loss: 1.0243669748306274,grad_norm: 0.9999992525670359, iteration: 86608
loss: 1.0135166645050049,grad_norm: 0.9999991164591904, iteration: 86609
loss: 0.9875576496124268,grad_norm: 0.9999992766393512, iteration: 86610
loss: 1.0583487749099731,grad_norm: 0.9999995344426688, iteration: 86611
loss: 0.9819148778915405,grad_norm: 0.9490288231553505, iteration: 86612
loss: 1.024527668952942,grad_norm: 0.9999991269150323, iteration: 86613
loss: 1.0218201875686646,grad_norm: 0.999999059850638, iteration: 86614
loss: 0.9905194044113159,grad_norm: 0.9999990868683551, iteration: 86615
loss: 0.9812514781951904,grad_norm: 0.9999994104593164, iteration: 86616
loss: 1.012722373008728,grad_norm: 0.9999991905328116, iteration: 86617
loss: 1.0252063274383545,grad_norm: 0.9999994780327162, iteration: 86618
loss: 1.0033221244812012,grad_norm: 0.9999990532953942, iteration: 86619
loss: 1.039617657661438,grad_norm: 0.9999991916101806, iteration: 86620
loss: 1.0056685209274292,grad_norm: 0.9999989517924767, iteration: 86621
loss: 1.0161798000335693,grad_norm: 0.9999991841254798, iteration: 86622
loss: 1.0219826698303223,grad_norm: 0.9999998232783647, iteration: 86623
loss: 1.0351181030273438,grad_norm: 0.9999991956644082, iteration: 86624
loss: 1.0444402694702148,grad_norm: 0.9999991512294727, iteration: 86625
loss: 0.9998478293418884,grad_norm: 0.9999990585322681, iteration: 86626
loss: 1.03158438205719,grad_norm: 0.99999917989347, iteration: 86627
loss: 0.9907581806182861,grad_norm: 0.8983231669565993, iteration: 86628
loss: 1.0232269763946533,grad_norm: 0.9999992396870524, iteration: 86629
loss: 1.0993883609771729,grad_norm: 0.9999991706515559, iteration: 86630
loss: 0.9936062097549438,grad_norm: 0.8057603627321583, iteration: 86631
loss: 0.9795510172843933,grad_norm: 0.99999898417455, iteration: 86632
loss: 0.998619019985199,grad_norm: 0.9999997037170301, iteration: 86633
loss: 0.9593815803527832,grad_norm: 0.9999991810804034, iteration: 86634
loss: 1.0380494594573975,grad_norm: 0.9208803276058387, iteration: 86635
loss: 0.9946263432502747,grad_norm: 0.9503942459572414, iteration: 86636
loss: 1.023756980895996,grad_norm: 0.9999990441703108, iteration: 86637
loss: 1.0213195085525513,grad_norm: 0.9999996833186594, iteration: 86638
loss: 1.0577795505523682,grad_norm: 0.9999990295262096, iteration: 86639
loss: 0.9962977170944214,grad_norm: 0.9999991604034968, iteration: 86640
loss: 1.0146033763885498,grad_norm: 0.8840233479700719, iteration: 86641
loss: 0.9721124172210693,grad_norm: 0.9961049879360696, iteration: 86642
loss: 0.9615554809570312,grad_norm: 0.9999990754641801, iteration: 86643
loss: 1.1248258352279663,grad_norm: 0.9999990739887096, iteration: 86644
loss: 0.9805803298950195,grad_norm: 0.9230476596644134, iteration: 86645
loss: 0.9968932867050171,grad_norm: 0.9891501471979061, iteration: 86646
loss: 1.0126399993896484,grad_norm: 0.9999990558145302, iteration: 86647
loss: 1.0098220109939575,grad_norm: 0.9800817799108666, iteration: 86648
loss: 1.0019229650497437,grad_norm: 0.9999990483256042, iteration: 86649
loss: 1.0388942956924438,grad_norm: 0.999999034234196, iteration: 86650
loss: 0.9781298041343689,grad_norm: 0.9886143557079806, iteration: 86651
loss: 0.9899826049804688,grad_norm: 0.9999991674370444, iteration: 86652
loss: 1.0215235948562622,grad_norm: 0.9999997054363051, iteration: 86653
loss: 0.9783201217651367,grad_norm: 0.9999994859007048, iteration: 86654
loss: 1.0223618745803833,grad_norm: 0.9999992057942411, iteration: 86655
loss: 1.00507652759552,grad_norm: 0.8780931540878139, iteration: 86656
loss: 0.9883461594581604,grad_norm: 0.9417379690080447, iteration: 86657
loss: 0.9806908965110779,grad_norm: 0.9999992126630615, iteration: 86658
loss: 1.0125362873077393,grad_norm: 0.999999165899992, iteration: 86659
loss: 0.9912409782409668,grad_norm: 0.9999995802955595, iteration: 86660
loss: 0.9896808862686157,grad_norm: 0.999999206562743, iteration: 86661
loss: 1.0001990795135498,grad_norm: 0.999999231273549, iteration: 86662
loss: 1.0161343812942505,grad_norm: 0.9999991771344157, iteration: 86663
loss: 1.0075972080230713,grad_norm: 0.9715129792114288, iteration: 86664
loss: 1.006221890449524,grad_norm: 0.9999993917159666, iteration: 86665
loss: 1.0436127185821533,grad_norm: 0.9834912411303451, iteration: 86666
loss: 0.9849414229393005,grad_norm: 0.9510263553668825, iteration: 86667
loss: 0.9757333397865295,grad_norm: 0.9999995041215108, iteration: 86668
loss: 1.0341849327087402,grad_norm: 0.9840689255519242, iteration: 86669
loss: 1.0341585874557495,grad_norm: 0.9157308028099391, iteration: 86670
loss: 0.9845978617668152,grad_norm: 0.9999992811833934, iteration: 86671
loss: 1.0208548307418823,grad_norm: 0.9999992544548898, iteration: 86672
loss: 0.9804486036300659,grad_norm: 0.9999991604429291, iteration: 86673
loss: 1.007120966911316,grad_norm: 0.9999988935942089, iteration: 86674
loss: 0.9889547228813171,grad_norm: 0.9999991860030352, iteration: 86675
loss: 1.042587399482727,grad_norm: 0.9999991346483781, iteration: 86676
loss: 0.9833816885948181,grad_norm: 0.999999375118447, iteration: 86677
loss: 0.9970090985298157,grad_norm: 0.9999991245094916, iteration: 86678
loss: 1.0206159353256226,grad_norm: 0.9999991135307348, iteration: 86679
loss: 1.0181690454483032,grad_norm: 0.9999992052072413, iteration: 86680
loss: 0.9977707862854004,grad_norm: 0.9999990269295814, iteration: 86681
loss: 0.9934247136116028,grad_norm: 0.9967070267008998, iteration: 86682
loss: 1.0151727199554443,grad_norm: 0.9999990690873831, iteration: 86683
loss: 0.9590566754341125,grad_norm: 0.9143127726733963, iteration: 86684
loss: 1.0270357131958008,grad_norm: 0.9999996733653999, iteration: 86685
loss: 0.9999322891235352,grad_norm: 0.999999083765194, iteration: 86686
loss: 0.9883200526237488,grad_norm: 0.9999992320232697, iteration: 86687
loss: 1.0251679420471191,grad_norm: 0.999999170828352, iteration: 86688
loss: 1.0281703472137451,grad_norm: 0.9999992795181429, iteration: 86689
loss: 1.0215678215026855,grad_norm: 0.9999991955428608, iteration: 86690
loss: 1.0058043003082275,grad_norm: 0.9999997100349493, iteration: 86691
loss: 0.9841570854187012,grad_norm: 0.9110163109002387, iteration: 86692
loss: 0.9767859578132629,grad_norm: 0.9971737089141123, iteration: 86693
loss: 1.030147671699524,grad_norm: 0.999999013851281, iteration: 86694
loss: 1.013566493988037,grad_norm: 0.9999991749480758, iteration: 86695
loss: 1.007942795753479,grad_norm: 0.9744644215728699, iteration: 86696
loss: 0.9610034227371216,grad_norm: 0.9443238074887251, iteration: 86697
loss: 1.0165529251098633,grad_norm: 0.9999989583518012, iteration: 86698
loss: 0.9912399649620056,grad_norm: 0.9999991382557, iteration: 86699
loss: 0.9925700426101685,grad_norm: 0.9446920727006355, iteration: 86700
loss: 0.9917392134666443,grad_norm: 0.9999992704081124, iteration: 86701
loss: 0.9880692362785339,grad_norm: 0.9999991629297748, iteration: 86702
loss: 1.050864338874817,grad_norm: 0.999999215595387, iteration: 86703
loss: 1.00587797164917,grad_norm: 0.9512397141571142, iteration: 86704
loss: 0.9940482974052429,grad_norm: 0.9999992144452601, iteration: 86705
loss: 1.0246936082839966,grad_norm: 0.9999990521263628, iteration: 86706
loss: 1.0161203145980835,grad_norm: 0.9999991735547579, iteration: 86707
loss: 1.0222110748291016,grad_norm: 0.9999991756511207, iteration: 86708
loss: 1.0059303045272827,grad_norm: 0.9999991667463032, iteration: 86709
loss: 0.994340181350708,grad_norm: 0.9409700004794452, iteration: 86710
loss: 1.0001978874206543,grad_norm: 0.9999995828627045, iteration: 86711
loss: 0.9965286254882812,grad_norm: 0.9940728192601238, iteration: 86712
loss: 1.0486044883728027,grad_norm: 0.9999996325434548, iteration: 86713
loss: 0.9949014186859131,grad_norm: 0.9999992153248497, iteration: 86714
loss: 1.0698508024215698,grad_norm: 0.9999992735109544, iteration: 86715
loss: 1.0258573293685913,grad_norm: 0.9999991546828269, iteration: 86716
loss: 1.0136843919754028,grad_norm: 0.9999989771181512, iteration: 86717
loss: 0.9998390078544617,grad_norm: 0.9999990713207295, iteration: 86718
loss: 1.0249112844467163,grad_norm: 0.9999990761569479, iteration: 86719
loss: 0.9850851893424988,grad_norm: 0.9999991066190284, iteration: 86720
loss: 0.9858471751213074,grad_norm: 0.9999990629692862, iteration: 86721
loss: 0.9811450242996216,grad_norm: 0.9999989447086497, iteration: 86722
loss: 1.0000497102737427,grad_norm: 0.9999997985719764, iteration: 86723
loss: 1.0041230916976929,grad_norm: 0.9163627124720742, iteration: 86724
loss: 1.0289816856384277,grad_norm: 0.9999992506302174, iteration: 86725
loss: 0.9824780821800232,grad_norm: 0.8981286758033821, iteration: 86726
loss: 1.0213334560394287,grad_norm: 0.9999990709690749, iteration: 86727
loss: 1.0011096000671387,grad_norm: 0.9738194210827904, iteration: 86728
loss: 1.0102813243865967,grad_norm: 0.9999993826953149, iteration: 86729
loss: 1.0144895315170288,grad_norm: 0.9999990603839005, iteration: 86730
loss: 1.0147100687026978,grad_norm: 0.9999993102230061, iteration: 86731
loss: 0.9933878779411316,grad_norm: 0.8991336006371906, iteration: 86732
loss: 0.9555615782737732,grad_norm: 0.9999991631313537, iteration: 86733
loss: 0.9937379360198975,grad_norm: 0.9999991961880412, iteration: 86734
loss: 1.0300568342208862,grad_norm: 0.9999993786663119, iteration: 86735
loss: 1.0125426054000854,grad_norm: 0.9826590453368504, iteration: 86736
loss: 1.0186103582382202,grad_norm: 0.981544044523459, iteration: 86737
loss: 0.9773542284965515,grad_norm: 0.9999991556707511, iteration: 86738
loss: 1.0139442682266235,grad_norm: 0.8247595691952923, iteration: 86739
loss: 0.9840469360351562,grad_norm: 0.9999990301481738, iteration: 86740
loss: 1.0443609952926636,grad_norm: 0.9999992239843594, iteration: 86741
loss: 0.9446128606796265,grad_norm: 0.9999989612458591, iteration: 86742
loss: 0.9824463129043579,grad_norm: 0.9999989679637551, iteration: 86743
loss: 0.9614971280097961,grad_norm: 0.9999991186088211, iteration: 86744
loss: 0.9866816401481628,grad_norm: 0.9999991419105411, iteration: 86745
loss: 0.9903889894485474,grad_norm: 0.9999992038520213, iteration: 86746
loss: 0.9929502606391907,grad_norm: 0.9999993142382047, iteration: 86747
loss: 0.9961839318275452,grad_norm: 0.883916032432776, iteration: 86748
loss: 1.0694209337234497,grad_norm: 0.9999996117546293, iteration: 86749
loss: 1.0006823539733887,grad_norm: 0.9999994326775067, iteration: 86750
loss: 1.004966378211975,grad_norm: 0.9999991850136262, iteration: 86751
loss: 1.0097148418426514,grad_norm: 0.9999993339964932, iteration: 86752
loss: 1.0232828855514526,grad_norm: 0.9999992180951489, iteration: 86753
loss: 1.0011247396469116,grad_norm: 0.9999993671798157, iteration: 86754
loss: 1.012140154838562,grad_norm: 0.9999991780130001, iteration: 86755
loss: 0.9999470114707947,grad_norm: 0.9338635125413852, iteration: 86756
loss: 1.005881905555725,grad_norm: 0.9454191059105844, iteration: 86757
loss: 0.9900281429290771,grad_norm: 0.976271715377711, iteration: 86758
loss: 1.023666501045227,grad_norm: 0.9999991651343256, iteration: 86759
loss: 1.027505874633789,grad_norm: 0.999999228854205, iteration: 86760
loss: 0.9797142148017883,grad_norm: 0.9731680741826977, iteration: 86761
loss: 1.0175342559814453,grad_norm: 0.9329148140400366, iteration: 86762
loss: 0.979369044303894,grad_norm: 0.999999108876067, iteration: 86763
loss: 1.0205730199813843,grad_norm: 0.9999999847134433, iteration: 86764
loss: 0.9571622014045715,grad_norm: 0.9999990543457328, iteration: 86765
loss: 1.0066512823104858,grad_norm: 0.9670663453652146, iteration: 86766
loss: 1.0170438289642334,grad_norm: 0.9999992469478887, iteration: 86767
loss: 0.9653538465499878,grad_norm: 0.8408489035041193, iteration: 86768
loss: 0.9503049850463867,grad_norm: 0.9931344564145, iteration: 86769
loss: 0.988706111907959,grad_norm: 0.9999991502167784, iteration: 86770
loss: 1.0102159976959229,grad_norm: 0.97128165424995, iteration: 86771
loss: 1.0633625984191895,grad_norm: 0.999999149793958, iteration: 86772
loss: 1.011926293373108,grad_norm: 0.9768534828424739, iteration: 86773
loss: 1.0072823762893677,grad_norm: 0.9999991663889934, iteration: 86774
loss: 1.0149554014205933,grad_norm: 0.9999991399886826, iteration: 86775
loss: 1.0237334966659546,grad_norm: 0.9562947203725424, iteration: 86776
loss: 1.0108320713043213,grad_norm: 0.9999993783844239, iteration: 86777
loss: 0.9811176061630249,grad_norm: 0.9999989539358951, iteration: 86778
loss: 0.9521194100379944,grad_norm: 0.9999990897783524, iteration: 86779
loss: 0.9551102519035339,grad_norm: 0.999998959853169, iteration: 86780
loss: 0.9782682061195374,grad_norm: 0.9999990544961871, iteration: 86781
loss: 1.02626633644104,grad_norm: 0.999999141172708, iteration: 86782
loss: 1.014397144317627,grad_norm: 0.9999990407928097, iteration: 86783
loss: 1.0117812156677246,grad_norm: 0.9999991378936088, iteration: 86784
loss: 0.9814466834068298,grad_norm: 0.9999991010067794, iteration: 86785
loss: 1.0863149166107178,grad_norm: 0.9999998093277175, iteration: 86786
loss: 1.0566939115524292,grad_norm: 0.9999995844912402, iteration: 86787
loss: 0.9591977596282959,grad_norm: 0.9999992689223338, iteration: 86788
loss: 0.9824605584144592,grad_norm: 0.9999991006962234, iteration: 86789
loss: 0.9921948909759521,grad_norm: 0.999999379559267, iteration: 86790
loss: 1.0002721548080444,grad_norm: 0.9786938272961406, iteration: 86791
loss: 0.9841855764389038,grad_norm: 0.9006024533922712, iteration: 86792
loss: 1.0124130249023438,grad_norm: 0.9068038100321196, iteration: 86793
loss: 1.020853042602539,grad_norm: 0.9081858452145567, iteration: 86794
loss: 1.0799334049224854,grad_norm: 0.999999420928747, iteration: 86795
loss: 0.9938966631889343,grad_norm: 0.9999991833739295, iteration: 86796
loss: 0.9924039244651794,grad_norm: 0.832481309281882, iteration: 86797
loss: 0.9935324788093567,grad_norm: 0.9784329535667258, iteration: 86798
loss: 1.0094903707504272,grad_norm: 0.9999990461994573, iteration: 86799
loss: 1.0020283460617065,grad_norm: 0.999998990657173, iteration: 86800
loss: 1.0160895586013794,grad_norm: 0.9999991242321963, iteration: 86801
loss: 0.9943288564682007,grad_norm: 0.9606629014191288, iteration: 86802
loss: 1.0231245756149292,grad_norm: 0.9999991618269121, iteration: 86803
loss: 1.0310176610946655,grad_norm: 0.9999991195067889, iteration: 86804
loss: 0.9739839434623718,grad_norm: 0.9999995315198676, iteration: 86805
loss: 0.9784165620803833,grad_norm: 0.9999999481938207, iteration: 86806
loss: 0.9961315989494324,grad_norm: 0.7581333109291628, iteration: 86807
loss: 1.0402796268463135,grad_norm: 0.9736252015026549, iteration: 86808
loss: 0.9818006753921509,grad_norm: 0.9999990874507717, iteration: 86809
loss: 0.9883579015731812,grad_norm: 0.9999992417459665, iteration: 86810
loss: 1.0347644090652466,grad_norm: 0.9518960317598599, iteration: 86811
loss: 0.9687870144844055,grad_norm: 0.9999992593639485, iteration: 86812
loss: 1.0093624591827393,grad_norm: 0.9819612330148296, iteration: 86813
loss: 1.0211269855499268,grad_norm: 0.9999990907963273, iteration: 86814
loss: 1.0075633525848389,grad_norm: 0.9999991888869366, iteration: 86815
loss: 1.0038872957229614,grad_norm: 0.9999992753066216, iteration: 86816
loss: 1.0444097518920898,grad_norm: 0.9999994432102015, iteration: 86817
loss: 1.0149344205856323,grad_norm: 0.999999939755484, iteration: 86818
loss: 0.9989430904388428,grad_norm: 0.9513409987070095, iteration: 86819
loss: 0.9755947589874268,grad_norm: 0.999999232365263, iteration: 86820
loss: 1.0144048929214478,grad_norm: 0.9862034885201421, iteration: 86821
loss: 1.0313999652862549,grad_norm: 0.9999989549935843, iteration: 86822
loss: 1.013383388519287,grad_norm: 0.9999991608331498, iteration: 86823
loss: 0.9850543141365051,grad_norm: 0.9999994307263921, iteration: 86824
loss: 1.0039764642715454,grad_norm: 0.9999995787205515, iteration: 86825
loss: 1.0203759670257568,grad_norm: 0.9198819800119041, iteration: 86826
loss: 1.0203627347946167,grad_norm: 0.9796122110579416, iteration: 86827
loss: 1.0279122591018677,grad_norm: 0.999999167023378, iteration: 86828
loss: 0.9984499216079712,grad_norm: 0.9999989851948883, iteration: 86829
loss: 1.0244030952453613,grad_norm: 0.8424197663556511, iteration: 86830
loss: 0.968437135219574,grad_norm: 0.9489413893505878, iteration: 86831
loss: 0.976708710193634,grad_norm: 0.9999991966786216, iteration: 86832
loss: 1.0183002948760986,grad_norm: 0.9999991291209295, iteration: 86833
loss: 1.0361945629119873,grad_norm: 0.9017040500736284, iteration: 86834
loss: 1.027422308921814,grad_norm: 0.9999990748370654, iteration: 86835
loss: 0.9953498244285583,grad_norm: 0.9999990110137261, iteration: 86836
loss: 1.0362915992736816,grad_norm: 0.9999992744500268, iteration: 86837
loss: 1.0869373083114624,grad_norm: 0.9999999723959878, iteration: 86838
loss: 0.9912205934524536,grad_norm: 0.8817960783669884, iteration: 86839
loss: 1.00287926197052,grad_norm: 0.9999992266351966, iteration: 86840
loss: 1.0031485557556152,grad_norm: 0.9999992686221951, iteration: 86841
loss: 0.9877024292945862,grad_norm: 0.9999990814229408, iteration: 86842
loss: 0.9677043557167053,grad_norm: 0.9999992058728708, iteration: 86843
loss: 1.0149940252304077,grad_norm: 0.9999990009350692, iteration: 86844
loss: 1.0328620672225952,grad_norm: 0.9999991397968491, iteration: 86845
loss: 1.0370044708251953,grad_norm: 0.9999992307002104, iteration: 86846
loss: 1.009675145149231,grad_norm: 0.9999990550869918, iteration: 86847
loss: 1.0226242542266846,grad_norm: 0.9999992340773376, iteration: 86848
loss: 1.0428496599197388,grad_norm: 0.9999991887388593, iteration: 86849
loss: 1.0843178033828735,grad_norm: 0.9999989557199551, iteration: 86850
loss: 1.0532358884811401,grad_norm: 0.999999305345553, iteration: 86851
loss: 0.9839994311332703,grad_norm: 0.9999992894702672, iteration: 86852
loss: 1.0160585641860962,grad_norm: 0.9999993160545273, iteration: 86853
loss: 0.9782705307006836,grad_norm: 0.9999990505464172, iteration: 86854
loss: 0.9985816478729248,grad_norm: 0.9999990780624649, iteration: 86855
loss: 1.0405573844909668,grad_norm: 0.9999990503336698, iteration: 86856
loss: 1.1109087467193604,grad_norm: 0.9999996213642922, iteration: 86857
loss: 0.9900010824203491,grad_norm: 0.9999991722233474, iteration: 86858
loss: 1.0076754093170166,grad_norm: 0.9999990305004033, iteration: 86859
loss: 1.0178370475769043,grad_norm: 0.9999991381103726, iteration: 86860
loss: 1.0892279148101807,grad_norm: 0.9999994615171542, iteration: 86861
loss: 0.9795404672622681,grad_norm: 0.9999995384518756, iteration: 86862
loss: 1.0649646520614624,grad_norm: 0.9999998950079261, iteration: 86863
loss: 1.0403164625167847,grad_norm: 0.9999991476566908, iteration: 86864
loss: 1.0063143968582153,grad_norm: 0.9999992027400116, iteration: 86865
loss: 1.0187194347381592,grad_norm: 0.999999045142535, iteration: 86866
loss: 1.0049283504486084,grad_norm: 0.9999993079308845, iteration: 86867
loss: 0.9765570163726807,grad_norm: 0.9999991294526807, iteration: 86868
loss: 1.0208184719085693,grad_norm: 0.9999991924714268, iteration: 86869
loss: 1.0378451347351074,grad_norm: 0.9999992548799564, iteration: 86870
loss: 1.016428828239441,grad_norm: 0.9999991455684514, iteration: 86871
loss: 0.9877907037734985,grad_norm: 0.9999990101960138, iteration: 86872
loss: 1.0130864381790161,grad_norm: 0.9713011832208931, iteration: 86873
loss: 0.9734167456626892,grad_norm: 0.999999247464724, iteration: 86874
loss: 0.9989270567893982,grad_norm: 0.917124247467971, iteration: 86875
loss: 0.9643887877464294,grad_norm: 0.999999058436251, iteration: 86876
loss: 0.9964585900306702,grad_norm: 0.9818361644658594, iteration: 86877
loss: 1.0064207315444946,grad_norm: 0.9999989094761499, iteration: 86878
loss: 1.0142887830734253,grad_norm: 0.9999989626921503, iteration: 86879
loss: 1.0224984884262085,grad_norm: 0.9051489758143703, iteration: 86880
loss: 1.0797381401062012,grad_norm: 0.9999990885956838, iteration: 86881
loss: 1.00243079662323,grad_norm: 0.8639680840196616, iteration: 86882
loss: 1.0671473741531372,grad_norm: 0.9999990064074826, iteration: 86883
loss: 1.020168662071228,grad_norm: 0.9999990874809713, iteration: 86884
loss: 0.994864284992218,grad_norm: 0.99999914083577, iteration: 86885
loss: 1.0130804777145386,grad_norm: 0.869563867710447, iteration: 86886
loss: 0.9397903084754944,grad_norm: 0.9999993255586126, iteration: 86887
loss: 1.0155340433120728,grad_norm: 0.9999991344892327, iteration: 86888
loss: 0.9944426417350769,grad_norm: 0.9349639318838119, iteration: 86889
loss: 0.9947148561477661,grad_norm: 0.9999992718756685, iteration: 86890
loss: 1.0337719917297363,grad_norm: 0.9999990906722512, iteration: 86891
loss: 1.0191227197647095,grad_norm: 0.9433247561463669, iteration: 86892
loss: 1.0031887292861938,grad_norm: 0.8463029998653095, iteration: 86893
loss: 0.9905334115028381,grad_norm: 0.9999990546457317, iteration: 86894
loss: 0.9762786626815796,grad_norm: 0.9508578049326563, iteration: 86895
loss: 1.0071284770965576,grad_norm: 0.9999990634460639, iteration: 86896
loss: 1.0150001049041748,grad_norm: 0.9999990930194947, iteration: 86897
loss: 1.012189269065857,grad_norm: 0.9261464530020077, iteration: 86898
loss: 1.0108864307403564,grad_norm: 0.9466550182836984, iteration: 86899
loss: 1.0675029754638672,grad_norm: 0.9999992631659218, iteration: 86900
loss: 1.0137041807174683,grad_norm: 0.9999991803864839, iteration: 86901
loss: 0.9680173993110657,grad_norm: 0.9749869348616628, iteration: 86902
loss: 0.97917240858078,grad_norm: 0.9999998911732898, iteration: 86903
loss: 0.9749646782875061,grad_norm: 0.9999991416641778, iteration: 86904
loss: 1.0255764722824097,grad_norm: 0.9999990007059258, iteration: 86905
loss: 0.950097918510437,grad_norm: 0.9499680782764349, iteration: 86906
loss: 0.975990355014801,grad_norm: 0.9999991515634952, iteration: 86907
loss: 0.9814835786819458,grad_norm: 0.9999990882175377, iteration: 86908
loss: 0.9866153001785278,grad_norm: 0.8837825861427568, iteration: 86909
loss: 0.9918212294578552,grad_norm: 0.8175362882200798, iteration: 86910
loss: 1.0087350606918335,grad_norm: 0.966900401472752, iteration: 86911
loss: 1.0104798078536987,grad_norm: 0.8921354274516757, iteration: 86912
loss: 0.9738284945487976,grad_norm: 0.9999992630338677, iteration: 86913
loss: 0.9754151105880737,grad_norm: 0.9999990661840568, iteration: 86914
loss: 0.9426418542861938,grad_norm: 0.9386702567711044, iteration: 86915
loss: 1.0435490608215332,grad_norm: 0.941339011816915, iteration: 86916
loss: 1.0210782289505005,grad_norm: 0.999999131524786, iteration: 86917
loss: 1.0406450033187866,grad_norm: 0.9999991000074151, iteration: 86918
loss: 1.0014219284057617,grad_norm: 0.9999991709005328, iteration: 86919
loss: 1.016554355621338,grad_norm: 0.9273356735444485, iteration: 86920
loss: 0.9962412118911743,grad_norm: 0.9681962430342504, iteration: 86921
loss: 1.0067721605300903,grad_norm: 0.9999990091138063, iteration: 86922
loss: 0.9891272783279419,grad_norm: 0.9999990934572849, iteration: 86923
loss: 0.9480317831039429,grad_norm: 0.9999991524830214, iteration: 86924
loss: 1.026483178138733,grad_norm: 0.9999993759950568, iteration: 86925
loss: 1.0236272811889648,grad_norm: 0.9251446581783067, iteration: 86926
loss: 0.9592812657356262,grad_norm: 0.9826209552851934, iteration: 86927
loss: 1.0074561834335327,grad_norm: 0.9589118557523455, iteration: 86928
loss: 0.9843646883964539,grad_norm: 0.9999991765055645, iteration: 86929
loss: 1.0043011903762817,grad_norm: 0.9999989305353827, iteration: 86930
loss: 1.0086997747421265,grad_norm: 0.9396278553835836, iteration: 86931
loss: 0.9908961057662964,grad_norm: 0.9999998334125875, iteration: 86932
loss: 0.985012412071228,grad_norm: 0.9544990936293134, iteration: 86933
loss: 1.0920180082321167,grad_norm: 0.9999995522691724, iteration: 86934
loss: 0.961022138595581,grad_norm: 0.8728209099303539, iteration: 86935
loss: 1.0096404552459717,grad_norm: 0.9999990435298906, iteration: 86936
loss: 1.0082570314407349,grad_norm: 0.9999995261662845, iteration: 86937
loss: 1.0201659202575684,grad_norm: 0.9999991411375447, iteration: 86938
loss: 0.9439578056335449,grad_norm: 0.9999991785052239, iteration: 86939
loss: 1.0073966979980469,grad_norm: 0.9999992190153176, iteration: 86940
loss: 0.9570167064666748,grad_norm: 0.9999992327177804, iteration: 86941
loss: 1.0070807933807373,grad_norm: 0.9999991846648689, iteration: 86942
loss: 0.9948671460151672,grad_norm: 0.9999990299168028, iteration: 86943
loss: 1.0019845962524414,grad_norm: 0.9999999127006848, iteration: 86944
loss: 0.975679874420166,grad_norm: 0.9706557300289724, iteration: 86945
loss: 0.9908055663108826,grad_norm: 0.9999991187019349, iteration: 86946
loss: 0.973442018032074,grad_norm: 0.9999990849754725, iteration: 86947
loss: 1.010856032371521,grad_norm: 0.9259002601238967, iteration: 86948
loss: 0.9921268820762634,grad_norm: 0.9999989751170736, iteration: 86949
loss: 1.0990104675292969,grad_norm: 0.9999991258118163, iteration: 86950
loss: 1.0448435544967651,grad_norm: 0.999999128995179, iteration: 86951
loss: 1.0039902925491333,grad_norm: 0.9999991593955592, iteration: 86952
loss: 1.0503134727478027,grad_norm: 0.9999990235154931, iteration: 86953
loss: 1.0068485736846924,grad_norm: 0.9999990409020032, iteration: 86954
loss: 0.9947212934494019,grad_norm: 0.999999449140797, iteration: 86955
loss: 0.9900919795036316,grad_norm: 0.9999989793452915, iteration: 86956
loss: 0.9648343920707703,grad_norm: 0.9999992369296579, iteration: 86957
loss: 1.0076402425765991,grad_norm: 0.9999992488583418, iteration: 86958
loss: 1.0080170631408691,grad_norm: 0.946999290611635, iteration: 86959
loss: 0.9954792261123657,grad_norm: 0.9999990252292748, iteration: 86960
loss: 0.985880434513092,grad_norm: 0.9999992621344761, iteration: 86961
loss: 1.0113246440887451,grad_norm: 0.9506059361336303, iteration: 86962
loss: 0.974736750125885,grad_norm: 0.9999990245765845, iteration: 86963
loss: 1.007575511932373,grad_norm: 0.9114255167892261, iteration: 86964
loss: 0.9727599024772644,grad_norm: 0.9914832704632143, iteration: 86965
loss: 0.9749691486358643,grad_norm: 0.9501077896351415, iteration: 86966
loss: 1.0313053131103516,grad_norm: 0.9999992218382593, iteration: 86967
loss: 1.0232107639312744,grad_norm: 0.9351860492386477, iteration: 86968
loss: 0.9715259671211243,grad_norm: 0.9999989272308514, iteration: 86969
loss: 0.9859017729759216,grad_norm: 0.9999992019807021, iteration: 86970
loss: 1.0104308128356934,grad_norm: 0.9298855225826899, iteration: 86971
loss: 0.9885360598564148,grad_norm: 0.8219461167417901, iteration: 86972
loss: 1.0111706256866455,grad_norm: 0.9860370528494092, iteration: 86973
loss: 0.9945229291915894,grad_norm: 0.9999991484451337, iteration: 86974
loss: 0.9839836359024048,grad_norm: 0.9999991267346117, iteration: 86975
loss: 0.9674763679504395,grad_norm: 0.9374022297100977, iteration: 86976
loss: 0.9830440282821655,grad_norm: 0.9999993438957738, iteration: 86977
loss: 0.9882767200469971,grad_norm: 0.9999989529665431, iteration: 86978
loss: 0.9983929395675659,grad_norm: 0.9157890014060939, iteration: 86979
loss: 1.0128731727600098,grad_norm: 0.9999992090566993, iteration: 86980
loss: 1.0110572576522827,grad_norm: 0.9999992213966394, iteration: 86981
loss: 0.9588073492050171,grad_norm: 0.9999991647431347, iteration: 86982
loss: 0.9710425734519958,grad_norm: 0.9999992409298185, iteration: 86983
loss: 0.9926062822341919,grad_norm: 0.9999989741187095, iteration: 86984
loss: 1.0020345449447632,grad_norm: 0.9999992676577285, iteration: 86985
loss: 1.0114105939865112,grad_norm: 0.9766259484779821, iteration: 86986
loss: 0.9906699657440186,grad_norm: 0.9999991125182061, iteration: 86987
loss: 0.9851700663566589,grad_norm: 0.999999260980481, iteration: 86988
loss: 1.0152052640914917,grad_norm: 0.9043954796777119, iteration: 86989
loss: 1.0037617683410645,grad_norm: 0.9999991330803313, iteration: 86990
loss: 1.0507241487503052,grad_norm: 0.9999993272218338, iteration: 86991
loss: 1.012537956237793,grad_norm: 0.9999994154008854, iteration: 86992
loss: 1.1035740375518799,grad_norm: 0.9999998184066591, iteration: 86993
loss: 1.0257887840270996,grad_norm: 0.9999988974629608, iteration: 86994
loss: 1.0349270105361938,grad_norm: 0.9999992861104275, iteration: 86995
loss: 0.9900946021080017,grad_norm: 0.9999991275628458, iteration: 86996
loss: 0.959281325340271,grad_norm: 0.9999991791302193, iteration: 86997
loss: 1.007208228111267,grad_norm: 0.9999990496165461, iteration: 86998
loss: 0.9909627437591553,grad_norm: 0.8817151880457511, iteration: 86999
loss: 1.0330700874328613,grad_norm: 0.9999990016738273, iteration: 87000
loss: 1.0118321180343628,grad_norm: 0.9999991374068412, iteration: 87001
loss: 1.0300730466842651,grad_norm: 0.9999992689681411, iteration: 87002
loss: 0.9965823888778687,grad_norm: 0.9042987523897291, iteration: 87003
loss: 0.9942618608474731,grad_norm: 0.9999991035569559, iteration: 87004
loss: 1.0200536251068115,grad_norm: 0.9999992302373027, iteration: 87005
loss: 1.0419901609420776,grad_norm: 0.999999111759418, iteration: 87006
loss: 1.0024007558822632,grad_norm: 0.9999991215831175, iteration: 87007
loss: 1.010710597038269,grad_norm: 0.9999990734044795, iteration: 87008
loss: 1.01943838596344,grad_norm: 0.8597060279094121, iteration: 87009
loss: 1.0290024280548096,grad_norm: 0.9796045331108151, iteration: 87010
loss: 0.9826774597167969,grad_norm: 0.9999991134806487, iteration: 87011
loss: 1.0268644094467163,grad_norm: 0.9999992933565863, iteration: 87012
loss: 0.9628624320030212,grad_norm: 0.9516162604750673, iteration: 87013
loss: 1.0323625802993774,grad_norm: 0.9999991403121952, iteration: 87014
loss: 1.0222378969192505,grad_norm: 0.9756548211076551, iteration: 87015
loss: 1.010571837425232,grad_norm: 0.9999991502990846, iteration: 87016
loss: 1.0199580192565918,grad_norm: 0.996267451873969, iteration: 87017
loss: 0.9776515364646912,grad_norm: 0.9999996130774074, iteration: 87018
loss: 1.0052481889724731,grad_norm: 0.9999991594178896, iteration: 87019
loss: 0.9914644360542297,grad_norm: 0.9999992983868653, iteration: 87020
loss: 1.0024362802505493,grad_norm: 0.9723309479437979, iteration: 87021
loss: 1.0030601024627686,grad_norm: 0.9999990984264105, iteration: 87022
loss: 1.0029480457305908,grad_norm: 0.8960354893320617, iteration: 87023
loss: 1.0554752349853516,grad_norm: 0.9999995369029925, iteration: 87024
loss: 0.9981881976127625,grad_norm: 0.9068780305267897, iteration: 87025
loss: 1.0302737951278687,grad_norm: 0.9431628018196241, iteration: 87026
loss: 1.0120389461517334,grad_norm: 0.999999052141674, iteration: 87027
loss: 1.0077978372573853,grad_norm: 0.9999991316926051, iteration: 87028
loss: 0.9901342988014221,grad_norm: 0.9167803671422732, iteration: 87029
loss: 0.9906846284866333,grad_norm: 0.9999990582272211, iteration: 87030
loss: 1.0212948322296143,grad_norm: 0.9999992380845464, iteration: 87031
loss: 1.0392711162567139,grad_norm: 0.9999993869011208, iteration: 87032
loss: 1.0125844478607178,grad_norm: 0.9999990666942462, iteration: 87033
loss: 0.9893988966941833,grad_norm: 0.9076742842607712, iteration: 87034
loss: 1.0080029964447021,grad_norm: 0.9999999740687924, iteration: 87035
loss: 0.989646315574646,grad_norm: 0.9999991620536612, iteration: 87036
loss: 0.9718216061592102,grad_norm: 0.9879676997331205, iteration: 87037
loss: 0.9671189188957214,grad_norm: 0.9999990152334133, iteration: 87038
loss: 0.9689623713493347,grad_norm: 0.9391248368511043, iteration: 87039
loss: 0.9426610469818115,grad_norm: 0.9999990745880801, iteration: 87040
loss: 1.0209952592849731,grad_norm: 0.999999174098894, iteration: 87041
loss: 1.0283479690551758,grad_norm: 0.9999990885936862, iteration: 87042
loss: 0.9978247284889221,grad_norm: 0.8733896297738689, iteration: 87043
loss: 1.0169181823730469,grad_norm: 0.9999990367747333, iteration: 87044
loss: 1.0192897319793701,grad_norm: 0.999999084746949, iteration: 87045
loss: 0.983637809753418,grad_norm: 0.9101857294132429, iteration: 87046
loss: 1.028446078300476,grad_norm: 0.9999992176988035, iteration: 87047
loss: 1.015850305557251,grad_norm: 0.955271450990543, iteration: 87048
loss: 0.9992356896400452,grad_norm: 0.9999989763043851, iteration: 87049
loss: 0.9659972190856934,grad_norm: 0.9999993360932679, iteration: 87050
loss: 0.9984837770462036,grad_norm: 0.9999993960760819, iteration: 87051
loss: 1.0324227809906006,grad_norm: 0.9999997179560297, iteration: 87052
loss: 1.1515008211135864,grad_norm: 0.9999994437235384, iteration: 87053
loss: 0.9881287217140198,grad_norm: 0.9999991148833018, iteration: 87054
loss: 0.9952381253242493,grad_norm: 0.9999992690535497, iteration: 87055
loss: 1.0139384269714355,grad_norm: 0.9999990627401207, iteration: 87056
loss: 0.9871318936347961,grad_norm: 0.9332862557976341, iteration: 87057
loss: 1.021170735359192,grad_norm: 0.939523306298164, iteration: 87058
loss: 1.0091464519500732,grad_norm: 0.9999992496161664, iteration: 87059
loss: 0.9935604333877563,grad_norm: 0.9999991884349421, iteration: 87060
loss: 0.9932358860969543,grad_norm: 0.9999991182168675, iteration: 87061
loss: 1.0120069980621338,grad_norm: 0.9732134162557309, iteration: 87062
loss: 0.9636982679367065,grad_norm: 0.9999991378398826, iteration: 87063
loss: 0.9861195087432861,grad_norm: 0.9991393351212731, iteration: 87064
loss: 0.9994733929634094,grad_norm: 0.9999991544101442, iteration: 87065
loss: 0.9812400937080383,grad_norm: 0.9999991242279014, iteration: 87066
loss: 0.9987548589706421,grad_norm: 0.9188662982651599, iteration: 87067
loss: 0.9866853356361389,grad_norm: 0.9999991133804639, iteration: 87068
loss: 1.0179227590560913,grad_norm: 0.8756023303380204, iteration: 87069
loss: 1.0013025999069214,grad_norm: 0.9999998094526459, iteration: 87070
loss: 1.0122160911560059,grad_norm: 0.9999991151707796, iteration: 87071
loss: 1.013713002204895,grad_norm: 0.9775986361286546, iteration: 87072
loss: 0.9905502200126648,grad_norm: 0.999999199644872, iteration: 87073
loss: 0.9716733694076538,grad_norm: 0.999999128310128, iteration: 87074
loss: 0.9923316240310669,grad_norm: 0.9999990265681855, iteration: 87075
loss: 1.0344282388687134,grad_norm: 0.9431177841882961, iteration: 87076
loss: 1.0202399492263794,grad_norm: 0.999999417438412, iteration: 87077
loss: 0.9844902157783508,grad_norm: 0.904352583764468, iteration: 87078
loss: 1.0362101793289185,grad_norm: 0.9999992832347228, iteration: 87079
loss: 1.0044682025909424,grad_norm: 0.9999991242184041, iteration: 87080
loss: 0.9998193979263306,grad_norm: 0.995920865674771, iteration: 87081
loss: 0.9936020970344543,grad_norm: 0.9743313463659902, iteration: 87082
loss: 0.9713433384895325,grad_norm: 0.945053372958682, iteration: 87083
loss: 1.022588849067688,grad_norm: 0.9999990391397122, iteration: 87084
loss: 1.0052882432937622,grad_norm: 0.999999197729882, iteration: 87085
loss: 0.9937117695808411,grad_norm: 0.9999992767760176, iteration: 87086
loss: 0.9819885492324829,grad_norm: 0.9999992042564066, iteration: 87087
loss: 0.9908508658409119,grad_norm: 0.9849797061635324, iteration: 87088
loss: 1.0351195335388184,grad_norm: 0.9999991672132413, iteration: 87089
loss: 1.0405791997909546,grad_norm: 0.9999990742336132, iteration: 87090
loss: 0.950812041759491,grad_norm: 0.9999992250612857, iteration: 87091
loss: 1.012742280960083,grad_norm: 0.9968175184497247, iteration: 87092
loss: 1.007005214691162,grad_norm: 0.9999991350875391, iteration: 87093
loss: 1.0048911571502686,grad_norm: 0.9999999253705882, iteration: 87094
loss: 0.9627994298934937,grad_norm: 0.999999154174, iteration: 87095
loss: 0.9867701530456543,grad_norm: 0.9999991673458467, iteration: 87096
loss: 0.9736697673797607,grad_norm: 0.9999990879280966, iteration: 87097
loss: 0.9958086013793945,grad_norm: 0.9999992027013744, iteration: 87098
loss: 1.0110548734664917,grad_norm: 0.9999988787294615, iteration: 87099
loss: 0.9908852577209473,grad_norm: 0.9999993204423407, iteration: 87100
loss: 1.0078434944152832,grad_norm: 0.997006941308342, iteration: 87101
loss: 1.0153064727783203,grad_norm: 0.9999991658560239, iteration: 87102
loss: 0.9763537049293518,grad_norm: 0.9999992209635239, iteration: 87103
loss: 0.9667510986328125,grad_norm: 0.9999990877024704, iteration: 87104
loss: 0.9921196103096008,grad_norm: 0.9999990149731395, iteration: 87105
loss: 1.0220589637756348,grad_norm: 0.9999990862879776, iteration: 87106
loss: 1.0229582786560059,grad_norm: 0.9999991022526297, iteration: 87107
loss: 0.9795565605163574,grad_norm: 0.9069812361994894, iteration: 87108
loss: 0.9691537022590637,grad_norm: 0.9999991832656732, iteration: 87109
loss: 1.0218757390975952,grad_norm: 0.9550788072115072, iteration: 87110
loss: 1.0262126922607422,grad_norm: 0.9297246326838114, iteration: 87111
loss: 0.9682084321975708,grad_norm: 0.9999993368086542, iteration: 87112
loss: 1.0580514669418335,grad_norm: 0.9999990885952499, iteration: 87113
loss: 0.9817963242530823,grad_norm: 0.9999992084961951, iteration: 87114
loss: 0.9631145596504211,grad_norm: 0.9999991403778296, iteration: 87115
loss: 1.0009464025497437,grad_norm: 0.9999993490961876, iteration: 87116
loss: 0.9570170640945435,grad_norm: 0.999999163397758, iteration: 87117
loss: 0.9906436204910278,grad_norm: 0.9999989947725366, iteration: 87118
loss: 1.017002820968628,grad_norm: 0.9999990544209006, iteration: 87119
loss: 0.9712105989456177,grad_norm: 0.9999990101282605, iteration: 87120
loss: 1.0002472400665283,grad_norm: 0.8227213849850032, iteration: 87121
loss: 0.9786762595176697,grad_norm: 0.9999991395608286, iteration: 87122
loss: 0.9662517309188843,grad_norm: 0.9987419076262152, iteration: 87123
loss: 0.982871949672699,grad_norm: 0.9999991339814038, iteration: 87124
loss: 1.0555109977722168,grad_norm: 0.9999992790407034, iteration: 87125
loss: 1.0207762718200684,grad_norm: 0.9999994122449166, iteration: 87126
loss: 1.0357521772384644,grad_norm: 0.9999990614070127, iteration: 87127
loss: 1.0004522800445557,grad_norm: 0.9563018340153389, iteration: 87128
loss: 0.9765560030937195,grad_norm: 0.9999991155280755, iteration: 87129
loss: 0.9729061722755432,grad_norm: 0.9999991486169701, iteration: 87130
loss: 0.9265519976615906,grad_norm: 0.9808529443700894, iteration: 87131
loss: 0.9528382420539856,grad_norm: 0.9999992023108033, iteration: 87132
loss: 0.9863672256469727,grad_norm: 0.9545711879006575, iteration: 87133
loss: 0.9834235310554504,grad_norm: 0.9999991809634259, iteration: 87134
loss: 1.0007117986679077,grad_norm: 0.999999150342953, iteration: 87135
loss: 1.0180587768554688,grad_norm: 0.9999992838341208, iteration: 87136
loss: 1.0022989511489868,grad_norm: 0.9999990747716772, iteration: 87137
loss: 0.9728407859802246,grad_norm: 0.9884134088706362, iteration: 87138
loss: 0.9757766723632812,grad_norm: 0.8519540892257444, iteration: 87139
loss: 0.9849622249603271,grad_norm: 0.9227425715024558, iteration: 87140
loss: 0.9969142079353333,grad_norm: 0.889456458516008, iteration: 87141
loss: 1.0178147554397583,grad_norm: 0.9763307833545628, iteration: 87142
loss: 0.972754716873169,grad_norm: 0.9999992390245069, iteration: 87143
loss: 0.997214138507843,grad_norm: 0.9999992984861558, iteration: 87144
loss: 1.0014822483062744,grad_norm: 0.9999989748074778, iteration: 87145
loss: 1.0032931566238403,grad_norm: 0.9495560400440977, iteration: 87146
loss: 0.9963692426681519,grad_norm: 0.9999991179907611, iteration: 87147
loss: 0.9600790143013,grad_norm: 0.9999993338507133, iteration: 87148
loss: 0.9861940145492554,grad_norm: 0.9753519210638946, iteration: 87149
loss: 1.009103775024414,grad_norm: 0.999999185744057, iteration: 87150
loss: 0.9666643142700195,grad_norm: 0.996498831435504, iteration: 87151
loss: 0.9816482663154602,grad_norm: 0.9999991069005143, iteration: 87152
loss: 0.9575376510620117,grad_norm: 0.9275952227534068, iteration: 87153
loss: 0.9865925312042236,grad_norm: 0.999999087278344, iteration: 87154
loss: 0.9473168253898621,grad_norm: 0.9999990901102341, iteration: 87155
loss: 1.0158182382583618,grad_norm: 0.9382716334559159, iteration: 87156
loss: 1.0068343877792358,grad_norm: 0.9999991705319942, iteration: 87157
loss: 1.0084830522537231,grad_norm: 0.8842879687437576, iteration: 87158
loss: 0.9829930663108826,grad_norm: 0.999999192537149, iteration: 87159
loss: 1.0063022375106812,grad_norm: 0.999999143264901, iteration: 87160
loss: 1.04390549659729,grad_norm: 0.9999991434831708, iteration: 87161
loss: 0.9924460053443909,grad_norm: 0.8980644714506059, iteration: 87162
loss: 0.9912092685699463,grad_norm: 0.9999991299245679, iteration: 87163
loss: 0.9971332550048828,grad_norm: 0.999999148868982, iteration: 87164
loss: 1.0178515911102295,grad_norm: 0.9069831283426117, iteration: 87165
loss: 1.012493371963501,grad_norm: 0.9999992385942815, iteration: 87166
loss: 0.9657605886459351,grad_norm: 0.9999991152483692, iteration: 87167
loss: 0.9732086658477783,grad_norm: 0.9254913123281928, iteration: 87168
loss: 1.0052472352981567,grad_norm: 0.999999108496264, iteration: 87169
loss: 0.9761068224906921,grad_norm: 0.9999991816664449, iteration: 87170
loss: 0.9921320676803589,grad_norm: 0.9999991455693614, iteration: 87171
loss: 0.9984416365623474,grad_norm: 0.9596694023288854, iteration: 87172
loss: 1.001861333847046,grad_norm: 0.9999997632016667, iteration: 87173
loss: 1.0162636041641235,grad_norm: 0.9999991302941801, iteration: 87174
loss: 1.0035929679870605,grad_norm: 0.9999990904012382, iteration: 87175
loss: 1.0287450551986694,grad_norm: 0.961408150619161, iteration: 87176
loss: 0.9740235209465027,grad_norm: 0.9091934315956557, iteration: 87177
loss: 1.0123083591461182,grad_norm: 0.9999991738087286, iteration: 87178
loss: 1.028709888458252,grad_norm: 0.999999233587774, iteration: 87179
loss: 1.0054802894592285,grad_norm: 0.9465839159166521, iteration: 87180
loss: 1.0111664533615112,grad_norm: 0.9999992304581783, iteration: 87181
loss: 1.0316705703735352,grad_norm: 0.9999990326487725, iteration: 87182
loss: 0.9897811412811279,grad_norm: 0.9999990470285242, iteration: 87183
loss: 1.0198615789413452,grad_norm: 0.9005824853340593, iteration: 87184
loss: 1.2033894062042236,grad_norm: 0.9999999148488274, iteration: 87185
loss: 1.0325590372085571,grad_norm: 0.9999990335216217, iteration: 87186
loss: 0.9695959687232971,grad_norm: 0.999999236581614, iteration: 87187
loss: 0.9458714723587036,grad_norm: 0.9002756136729257, iteration: 87188
loss: 0.9760578274726868,grad_norm: 0.9999992155206366, iteration: 87189
loss: 0.9826334118843079,grad_norm: 0.9999991754781602, iteration: 87190
loss: 1.0263218879699707,grad_norm: 0.9736985995747953, iteration: 87191
loss: 1.0386656522750854,grad_norm: 0.999998994541668, iteration: 87192
loss: 1.0102481842041016,grad_norm: 0.8477767085729745, iteration: 87193
loss: 0.980226993560791,grad_norm: 0.9999990805317731, iteration: 87194
loss: 0.9840590953826904,grad_norm: 0.999998978396772, iteration: 87195
loss: 1.0144109725952148,grad_norm: 0.9999991052087553, iteration: 87196
loss: 1.021855354309082,grad_norm: 0.9862525765350594, iteration: 87197
loss: 1.0272151231765747,grad_norm: 0.9635673755130675, iteration: 87198
loss: 1.0173348188400269,grad_norm: 0.9999990825240317, iteration: 87199
loss: 1.0058759450912476,grad_norm: 0.9999991805402935, iteration: 87200
loss: 1.058922529220581,grad_norm: 0.9999991789544087, iteration: 87201
loss: 0.9904980063438416,grad_norm: 0.999999129116064, iteration: 87202
loss: 1.0084587335586548,grad_norm: 0.9999990551520012, iteration: 87203
loss: 0.9797220826148987,grad_norm: 0.9999993595330933, iteration: 87204
loss: 0.9707854986190796,grad_norm: 0.9216930671751509, iteration: 87205
loss: 1.0535145998001099,grad_norm: 0.9999991211141407, iteration: 87206
loss: 1.0103733539581299,grad_norm: 0.9999992020116089, iteration: 87207
loss: 0.9792844653129578,grad_norm: 0.8962990800649607, iteration: 87208
loss: 1.016297459602356,grad_norm: 0.9999992731058541, iteration: 87209
loss: 0.9880794286727905,grad_norm: 0.847759853630772, iteration: 87210
loss: 0.9974703192710876,grad_norm: 0.9999991621054247, iteration: 87211
loss: 1.032307505607605,grad_norm: 0.9999991973845763, iteration: 87212
loss: 0.9916378855705261,grad_norm: 0.8255199242856115, iteration: 87213
loss: 0.9969284534454346,grad_norm: 0.9999991498072414, iteration: 87214
loss: 1.0018569231033325,grad_norm: 0.9593629916769576, iteration: 87215
loss: 1.0035245418548584,grad_norm: 0.9999992356967694, iteration: 87216
loss: 0.9951137900352478,grad_norm: 0.99999917309338, iteration: 87217
loss: 0.978771984577179,grad_norm: 0.9999991625827969, iteration: 87218
loss: 1.0321632623672485,grad_norm: 0.9999991048553359, iteration: 87219
loss: 1.0136386156082153,grad_norm: 0.9891394565920475, iteration: 87220
loss: 0.9993494749069214,grad_norm: 0.9999989681291394, iteration: 87221
loss: 0.9998476505279541,grad_norm: 0.9953370677100389, iteration: 87222
loss: 0.988015353679657,grad_norm: 0.9864870048622918, iteration: 87223
loss: 0.9782962203025818,grad_norm: 0.9999991402587968, iteration: 87224
loss: 1.0254896879196167,grad_norm: 0.823372642168654, iteration: 87225
loss: 1.0101794004440308,grad_norm: 0.921541258995017, iteration: 87226
loss: 1.0205239057540894,grad_norm: 0.9999991995877385, iteration: 87227
loss: 0.9951194524765015,grad_norm: 0.8595818780543099, iteration: 87228
loss: 0.9862177968025208,grad_norm: 0.9999990218312532, iteration: 87229
loss: 0.9894513487815857,grad_norm: 0.999999195009971, iteration: 87230
loss: 0.9805492162704468,grad_norm: 0.8530633879996514, iteration: 87231
loss: 0.9863548874855042,grad_norm: 0.9612228212099542, iteration: 87232
loss: 1.0246312618255615,grad_norm: 0.9999991318770485, iteration: 87233
loss: 1.0228946208953857,grad_norm: 0.788703781978827, iteration: 87234
loss: 0.9741469025611877,grad_norm: 0.8731013489877084, iteration: 87235
loss: 0.9845647811889648,grad_norm: 0.9795482986985958, iteration: 87236
loss: 0.9684908390045166,grad_norm: 0.999999097157539, iteration: 87237
loss: 1.0012933015823364,grad_norm: 0.969774343967279, iteration: 87238
loss: 0.9700630307197571,grad_norm: 0.9957417312396803, iteration: 87239
loss: 0.990440845489502,grad_norm: 0.9999989961653417, iteration: 87240
loss: 1.004393219947815,grad_norm: 0.9288805956105652, iteration: 87241
loss: 1.025421380996704,grad_norm: 0.9999991211754827, iteration: 87242
loss: 1.0242894887924194,grad_norm: 0.9999993448233779, iteration: 87243
loss: 0.9819010496139526,grad_norm: 0.9999992581061192, iteration: 87244
loss: 1.0020685195922852,grad_norm: 0.9999992076160161, iteration: 87245
loss: 0.9997586607933044,grad_norm: 0.9999991384199176, iteration: 87246
loss: 0.9946149587631226,grad_norm: 0.9999996186676545, iteration: 87247
loss: 1.0295755863189697,grad_norm: 0.9999991749615883, iteration: 87248
loss: 0.9891756772994995,grad_norm: 0.9999990666710951, iteration: 87249
loss: 0.9789164662361145,grad_norm: 0.9571550172090868, iteration: 87250
loss: 0.9982757568359375,grad_norm: 0.7751421154759432, iteration: 87251
loss: 0.9928106665611267,grad_norm: 0.9127461691354162, iteration: 87252
loss: 0.9830291271209717,grad_norm: 0.9999991882004938, iteration: 87253
loss: 1.0125735998153687,grad_norm: 0.9795507090683423, iteration: 87254
loss: 0.9879167079925537,grad_norm: 0.9999990745486702, iteration: 87255
loss: 0.9815517663955688,grad_norm: 0.9954971872846446, iteration: 87256
loss: 1.0087909698486328,grad_norm: 0.8024142789801788, iteration: 87257
loss: 1.019381046295166,grad_norm: 0.9999991036502118, iteration: 87258
loss: 1.0630568265914917,grad_norm: 0.9999997858148867, iteration: 87259
loss: 1.0008506774902344,grad_norm: 0.9999990727799756, iteration: 87260
loss: 0.9856992959976196,grad_norm: 0.9999990566325507, iteration: 87261
loss: 1.009124755859375,grad_norm: 0.9409860179359733, iteration: 87262
loss: 0.9962112903594971,grad_norm: 0.8944027367134121, iteration: 87263
loss: 0.9814736247062683,grad_norm: 0.9999990684648463, iteration: 87264
loss: 1.0268161296844482,grad_norm: 0.9999992662469851, iteration: 87265
loss: 1.0331809520721436,grad_norm: 0.9999991491062081, iteration: 87266
loss: 0.9848943948745728,grad_norm: 0.8798600406175138, iteration: 87267
loss: 1.0436651706695557,grad_norm: 0.9999991256287653, iteration: 87268
loss: 0.9856967926025391,grad_norm: 0.9999991840334551, iteration: 87269
loss: 0.9923427700996399,grad_norm: 0.9999996578821185, iteration: 87270
loss: 1.006284236907959,grad_norm: 0.913999838630846, iteration: 87271
loss: 0.9612436890602112,grad_norm: 0.9999991374023354, iteration: 87272
loss: 1.0160043239593506,grad_norm: 0.999998936375514, iteration: 87273
loss: 0.9929073452949524,grad_norm: 0.9999992434174654, iteration: 87274
loss: 1.0066969394683838,grad_norm: 0.9999990585832468, iteration: 87275
loss: 0.9683018922805786,grad_norm: 0.9999991836007542, iteration: 87276
loss: 1.0889731645584106,grad_norm: 0.9999994323766173, iteration: 87277
loss: 0.9898152947425842,grad_norm: 0.9999990325324088, iteration: 87278
loss: 1.0161043405532837,grad_norm: 0.9999991282111976, iteration: 87279
loss: 0.986660361289978,grad_norm: 0.9999991619155179, iteration: 87280
loss: 0.9620462656021118,grad_norm: 0.9999990903254168, iteration: 87281
loss: 0.9722941517829895,grad_norm: 0.9999991472267098, iteration: 87282
loss: 1.0112160444259644,grad_norm: 0.8325887100873931, iteration: 87283
loss: 0.9851782917976379,grad_norm: 0.8803971067140123, iteration: 87284
loss: 1.0312085151672363,grad_norm: 0.9999991460305516, iteration: 87285
loss: 0.9866310358047485,grad_norm: 0.9999992006286483, iteration: 87286
loss: 0.9564080238342285,grad_norm: 0.9999990484336001, iteration: 87287
loss: 0.9989752173423767,grad_norm: 0.8798924004286458, iteration: 87288
loss: 1.0095924139022827,grad_norm: 0.9999991026882062, iteration: 87289
loss: 0.9810618758201599,grad_norm: 0.8394776303958882, iteration: 87290
loss: 1.001538634300232,grad_norm: 0.9999997722330921, iteration: 87291
loss: 1.007087230682373,grad_norm: 0.9999990838577268, iteration: 87292
loss: 1.058518409729004,grad_norm: 0.9999991765657202, iteration: 87293
loss: 1.065004825592041,grad_norm: 0.9696044673583861, iteration: 87294
loss: 1.0220335721969604,grad_norm: 0.9999990465291623, iteration: 87295
loss: 0.9972553849220276,grad_norm: 0.902435368330884, iteration: 87296
loss: 1.0193908214569092,grad_norm: 0.9999991688918562, iteration: 87297
loss: 1.0155879259109497,grad_norm: 0.9999991147450928, iteration: 87298
loss: 1.022946834564209,grad_norm: 0.9999990010773343, iteration: 87299
loss: 0.9922301769256592,grad_norm: 0.9999991418423755, iteration: 87300
loss: 0.9643648862838745,grad_norm: 0.9999991012492835, iteration: 87301
loss: 0.9826003909111023,grad_norm: 0.8844878518402848, iteration: 87302
loss: 1.0046801567077637,grad_norm: 0.9708257477908285, iteration: 87303
loss: 0.9689106941223145,grad_norm: 0.9800231229931385, iteration: 87304
loss: 0.9779037237167358,grad_norm: 0.9452091307032462, iteration: 87305
loss: 0.9983617067337036,grad_norm: 0.9999991377729102, iteration: 87306
loss: 1.036778450012207,grad_norm: 0.9999994650591432, iteration: 87307
loss: 1.0010793209075928,grad_norm: 0.9999991887596895, iteration: 87308
loss: 0.9444357752799988,grad_norm: 0.9999992749927719, iteration: 87309
loss: 0.985327959060669,grad_norm: 0.9999992162582564, iteration: 87310
loss: 1.0278594493865967,grad_norm: 0.9999998022068948, iteration: 87311
loss: 1.0205086469650269,grad_norm: 0.972677415288668, iteration: 87312
loss: 0.9848955869674683,grad_norm: 0.999999077964301, iteration: 87313
loss: 1.0073384046554565,grad_norm: 0.9999991133663014, iteration: 87314
loss: 1.0130568742752075,grad_norm: 0.9999992146712525, iteration: 87315
loss: 0.9628525972366333,grad_norm: 0.9409469026821581, iteration: 87316
loss: 0.9986090064048767,grad_norm: 0.9999992142326751, iteration: 87317
loss: 0.9933573007583618,grad_norm: 0.9877604844207514, iteration: 87318
loss: 1.0362688302993774,grad_norm: 0.9999993532654371, iteration: 87319
loss: 1.0091303586959839,grad_norm: 0.9999990413928406, iteration: 87320
loss: 1.1075023412704468,grad_norm: 0.9999996264695415, iteration: 87321
loss: 0.9848313927650452,grad_norm: 0.9289959880763371, iteration: 87322
loss: 1.0314924716949463,grad_norm: 0.9999992271074736, iteration: 87323
loss: 1.0566742420196533,grad_norm: 0.9999991640010982, iteration: 87324
loss: 1.0027095079421997,grad_norm: 0.9913024540808781, iteration: 87325
loss: 0.9776046276092529,grad_norm: 0.9999991926874023, iteration: 87326
loss: 0.9784641861915588,grad_norm: 0.9974219166396634, iteration: 87327
loss: 0.9662278294563293,grad_norm: 0.972867042229652, iteration: 87328
loss: 1.0196704864501953,grad_norm: 0.9999991709374996, iteration: 87329
loss: 0.9912020564079285,grad_norm: 0.9999990643041157, iteration: 87330
loss: 0.9761129021644592,grad_norm: 0.9999992360493272, iteration: 87331
loss: 1.0296754837036133,grad_norm: 0.9999993535963955, iteration: 87332
loss: 0.9971152544021606,grad_norm: 0.999999081265641, iteration: 87333
loss: 1.0143203735351562,grad_norm: 0.999999232246882, iteration: 87334
loss: 1.023521065711975,grad_norm: 0.9999991215903726, iteration: 87335
loss: 1.0001522302627563,grad_norm: 0.9999990875115201, iteration: 87336
loss: 0.9972009062767029,grad_norm: 0.9999990781931812, iteration: 87337
loss: 1.0225199460983276,grad_norm: 0.9999990930083533, iteration: 87338
loss: 1.0289429426193237,grad_norm: 0.9999991853279777, iteration: 87339
loss: 1.0292752981185913,grad_norm: 0.9999989908048328, iteration: 87340
loss: 1.0086427927017212,grad_norm: 0.9999992709088942, iteration: 87341
loss: 0.9769535660743713,grad_norm: 0.9370390664904704, iteration: 87342
loss: 1.0157595872879028,grad_norm: 0.9999992370137677, iteration: 87343
loss: 0.9917885065078735,grad_norm: 0.9999990644987147, iteration: 87344
loss: 1.0103546380996704,grad_norm: 0.9999990926126526, iteration: 87345
loss: 0.9869308471679688,grad_norm: 0.999999345347389, iteration: 87346
loss: 1.0057878494262695,grad_norm: 0.999999036548947, iteration: 87347
loss: 1.005456566810608,grad_norm: 0.9695654678887605, iteration: 87348
loss: 0.9781554341316223,grad_norm: 0.99999922383983, iteration: 87349
loss: 0.9715580940246582,grad_norm: 0.9999992338398872, iteration: 87350
loss: 1.0493717193603516,grad_norm: 0.9999990344159069, iteration: 87351
loss: 1.0215955972671509,grad_norm: 0.9478249660706565, iteration: 87352
loss: 0.9709587693214417,grad_norm: 0.9067143670177696, iteration: 87353
loss: 0.965034544467926,grad_norm: 0.9309674926356692, iteration: 87354
loss: 1.0214792490005493,grad_norm: 0.9495560316865815, iteration: 87355
loss: 0.988195538520813,grad_norm: 0.8706083440620976, iteration: 87356
loss: 0.9992712140083313,grad_norm: 0.9999992913153359, iteration: 87357
loss: 0.9969310164451599,grad_norm: 0.9847348303552119, iteration: 87358
loss: 0.9835371971130371,grad_norm: 0.9999989921131933, iteration: 87359
loss: 1.0019246339797974,grad_norm: 0.9999991624057689, iteration: 87360
loss: 1.0327587127685547,grad_norm: 0.9999991784323635, iteration: 87361
loss: 0.9918566346168518,grad_norm: 0.7538643153478618, iteration: 87362
loss: 1.0465316772460938,grad_norm: 0.9999993118122971, iteration: 87363
loss: 1.0008432865142822,grad_norm: 0.9999991348761089, iteration: 87364
loss: 0.995435357093811,grad_norm: 0.9999989377060102, iteration: 87365
loss: 1.0183137655258179,grad_norm: 0.9999989226815874, iteration: 87366
loss: 0.9820351004600525,grad_norm: 0.9999991811338024, iteration: 87367
loss: 1.013349175453186,grad_norm: 0.9198908679980933, iteration: 87368
loss: 1.0191057920455933,grad_norm: 0.9566468536194951, iteration: 87369
loss: 1.0790756940841675,grad_norm: 0.999999060219595, iteration: 87370
loss: 0.9595486521720886,grad_norm: 0.9999990327258607, iteration: 87371
loss: 1.0072035789489746,grad_norm: 0.9668921990563082, iteration: 87372
loss: 0.9945562481880188,grad_norm: 0.9999993264162552, iteration: 87373
loss: 0.9510307312011719,grad_norm: 0.9999991360083118, iteration: 87374
loss: 0.9684931635856628,grad_norm: 0.9901785722969212, iteration: 87375
loss: 0.9863793253898621,grad_norm: 0.9242023266431325, iteration: 87376
loss: 1.006942629814148,grad_norm: 0.9099617252780899, iteration: 87377
loss: 0.9954412579536438,grad_norm: 0.9677091302356634, iteration: 87378
loss: 1.022323727607727,grad_norm: 0.9999989876546057, iteration: 87379
loss: 1.0338808298110962,grad_norm: 0.9999994726200894, iteration: 87380
loss: 1.0320805311203003,grad_norm: 0.9751757579076046, iteration: 87381
loss: 1.0012160539627075,grad_norm: 0.9999991659789225, iteration: 87382
loss: 0.9568571448326111,grad_norm: 0.9999990314387849, iteration: 87383
loss: 1.009475827217102,grad_norm: 0.9537790672038683, iteration: 87384
loss: 1.054520606994629,grad_norm: 0.9999990678557689, iteration: 87385
loss: 0.9902809858322144,grad_norm: 0.9999989973144295, iteration: 87386
loss: 0.9887312054634094,grad_norm: 0.936750782033248, iteration: 87387
loss: 1.0127298831939697,grad_norm: 0.9999991862487915, iteration: 87388
loss: 0.9928660988807678,grad_norm: 0.9999991400864857, iteration: 87389
loss: 1.000746250152588,grad_norm: 0.9999992157786181, iteration: 87390
loss: 1.0324811935424805,grad_norm: 0.9999990194255699, iteration: 87391
loss: 0.9867745041847229,grad_norm: 0.9999991653567417, iteration: 87392
loss: 1.0005989074707031,grad_norm: 0.8522862464901348, iteration: 87393
loss: 1.0200494527816772,grad_norm: 0.9999994598997229, iteration: 87394
loss: 0.9648531675338745,grad_norm: 0.999999126771644, iteration: 87395
loss: 1.0158846378326416,grad_norm: 0.9999991520035859, iteration: 87396
loss: 0.9903091788291931,grad_norm: 0.9999991161833679, iteration: 87397
loss: 1.0202891826629639,grad_norm: 0.9999991349989072, iteration: 87398
loss: 1.032062292098999,grad_norm: 0.9487869564979844, iteration: 87399
loss: 1.0141711235046387,grad_norm: 0.9303583115748758, iteration: 87400
loss: 1.0097178220748901,grad_norm: 0.9999993668425428, iteration: 87401
loss: 1.0910959243774414,grad_norm: 0.9999991481671597, iteration: 87402
loss: 0.9840955138206482,grad_norm: 0.9999990930275879, iteration: 87403
loss: 0.9747827649116516,grad_norm: 0.9999991538775134, iteration: 87404
loss: 0.9926549196243286,grad_norm: 0.9999991922542388, iteration: 87405
loss: 1.001369833946228,grad_norm: 0.9999990988773841, iteration: 87406
loss: 1.0078039169311523,grad_norm: 0.9122926333200094, iteration: 87407
loss: 1.026474952697754,grad_norm: 0.9999993535656344, iteration: 87408
loss: 1.0034013986587524,grad_norm: 0.9999992553491871, iteration: 87409
loss: 0.986532986164093,grad_norm: 0.9709336910499723, iteration: 87410
loss: 0.9865095615386963,grad_norm: 0.9999990581759612, iteration: 87411
loss: 0.9720520973205566,grad_norm: 0.9999991149217969, iteration: 87412
loss: 1.0396195650100708,grad_norm: 0.9999991704273613, iteration: 87413
loss: 1.0227634906768799,grad_norm: 0.9999991490572839, iteration: 87414
loss: 1.0201102495193481,grad_norm: 0.9999991482321342, iteration: 87415
loss: 0.9745211005210876,grad_norm: 0.999999165390485, iteration: 87416
loss: 0.9961745142936707,grad_norm: 0.9999990418857863, iteration: 87417
loss: 0.9669533967971802,grad_norm: 0.9970707665646309, iteration: 87418
loss: 1.0157554149627686,grad_norm: 0.9999990183939651, iteration: 87419
loss: 1.0050770044326782,grad_norm: 0.9273868287617716, iteration: 87420
loss: 1.0187280178070068,grad_norm: 0.9999991507049489, iteration: 87421
loss: 1.005823016166687,grad_norm: 0.8283738418310631, iteration: 87422
loss: 1.0038120746612549,grad_norm: 0.9264402091915428, iteration: 87423
loss: 0.950069010257721,grad_norm: 0.9999990475570566, iteration: 87424
loss: 1.0154911279678345,grad_norm: 0.9999990978654367, iteration: 87425
loss: 0.9920188188552856,grad_norm: 0.9999991749512861, iteration: 87426
loss: 1.0199177265167236,grad_norm: 0.9357828487838548, iteration: 87427
loss: 1.06243097782135,grad_norm: 0.9999991831564147, iteration: 87428
loss: 0.9954143166542053,grad_norm: 0.9999991007591502, iteration: 87429
loss: 0.9901048541069031,grad_norm: 0.9999992242159104, iteration: 87430
loss: 1.0001221895217896,grad_norm: 0.9895822425315594, iteration: 87431
loss: 1.0040535926818848,grad_norm: 0.9733047329673881, iteration: 87432
loss: 0.9661861062049866,grad_norm: 0.9999992426170686, iteration: 87433
loss: 0.9839326739311218,grad_norm: 0.9162824334618207, iteration: 87434
loss: 1.0045039653778076,grad_norm: 0.8991109651686929, iteration: 87435
loss: 0.9852210283279419,grad_norm: 0.9999990424321892, iteration: 87436
loss: 0.9930248260498047,grad_norm: 0.9652573291734183, iteration: 87437
loss: 1.0077773332595825,grad_norm: 0.9986092497168724, iteration: 87438
loss: 0.9853286743164062,grad_norm: 0.9999990434541101, iteration: 87439
loss: 1.010227084159851,grad_norm: 0.9999992277838355, iteration: 87440
loss: 0.9977594614028931,grad_norm: 0.9999991898180316, iteration: 87441
loss: 1.0241448879241943,grad_norm: 0.9999990592353649, iteration: 87442
loss: 1.0207571983337402,grad_norm: 0.9999991580024219, iteration: 87443
loss: 0.9899029731750488,grad_norm: 0.9743489019467543, iteration: 87444
loss: 0.9851531982421875,grad_norm: 0.9587378945281796, iteration: 87445
loss: 0.9905235171318054,grad_norm: 0.9999990235884367, iteration: 87446
loss: 0.9827394485473633,grad_norm: 0.999999135354806, iteration: 87447
loss: 0.9936667680740356,grad_norm: 0.9999989694058886, iteration: 87448
loss: 1.0065418481826782,grad_norm: 0.9942898603250879, iteration: 87449
loss: 1.0363324880599976,grad_norm: 0.9999992870240987, iteration: 87450
loss: 1.0078341960906982,grad_norm: 0.9999990318756204, iteration: 87451
loss: 0.9974294304847717,grad_norm: 0.9999988116086811, iteration: 87452
loss: 0.9986319541931152,grad_norm: 0.9999992596481481, iteration: 87453
loss: 1.02402663230896,grad_norm: 0.999999290102952, iteration: 87454
loss: 0.9837355613708496,grad_norm: 0.9096537653202722, iteration: 87455
loss: 0.9817656874656677,grad_norm: 0.9999991188575601, iteration: 87456
loss: 1.0260612964630127,grad_norm: 0.996321872760836, iteration: 87457
loss: 1.0509519577026367,grad_norm: 0.912131044815558, iteration: 87458
loss: 1.0645315647125244,grad_norm: 0.9999995080344735, iteration: 87459
loss: 1.0027737617492676,grad_norm: 0.9999994629318852, iteration: 87460
loss: 1.0205825567245483,grad_norm: 0.9999993938687938, iteration: 87461
loss: 1.0027267932891846,grad_norm: 0.9872124281937964, iteration: 87462
loss: 1.0323758125305176,grad_norm: 0.9999997745224525, iteration: 87463
loss: 1.0043749809265137,grad_norm: 0.9822929824681367, iteration: 87464
loss: 0.9812354445457458,grad_norm: 0.9999990964403936, iteration: 87465
loss: 1.0031087398529053,grad_norm: 0.9999990594788603, iteration: 87466
loss: 0.9871934652328491,grad_norm: 0.9999992010737884, iteration: 87467
loss: 0.9979858994483948,grad_norm: 0.9999992168288425, iteration: 87468
loss: 0.9971305727958679,grad_norm: 0.927182147512143, iteration: 87469
loss: 0.9673599600791931,grad_norm: 0.9999991644264493, iteration: 87470
loss: 0.9925987720489502,grad_norm: 0.9587148128584985, iteration: 87471
loss: 1.0017409324645996,grad_norm: 0.9490547689661798, iteration: 87472
loss: 1.0189944505691528,grad_norm: 0.9999991189973282, iteration: 87473
loss: 1.004589557647705,grad_norm: 0.976229864368418, iteration: 87474
loss: 0.9913088083267212,grad_norm: 0.9818020672066275, iteration: 87475
loss: 0.9919811487197876,grad_norm: 0.9799131307621745, iteration: 87476
loss: 1.0023270845413208,grad_norm: 0.9999991284801485, iteration: 87477
loss: 0.9916287064552307,grad_norm: 0.9999990958651337, iteration: 87478
loss: 1.006104588508606,grad_norm: 0.9137467233093183, iteration: 87479
loss: 1.021903157234192,grad_norm: 0.9999989105409192, iteration: 87480
loss: 1.0335532426834106,grad_norm: 0.9999996600508723, iteration: 87481
loss: 1.024210810661316,grad_norm: 0.969839409088579, iteration: 87482
loss: 1.0186150074005127,grad_norm: 0.9999990600442774, iteration: 87483
loss: 1.0335688591003418,grad_norm: 0.9393053459063758, iteration: 87484
loss: 1.0389704704284668,grad_norm: 0.9583391204910707, iteration: 87485
loss: 1.0109888315200806,grad_norm: 0.999999068645332, iteration: 87486
loss: 1.0837944746017456,grad_norm: 0.9999992041763754, iteration: 87487
loss: 0.9974790811538696,grad_norm: 0.9999990907332942, iteration: 87488
loss: 0.9685850143432617,grad_norm: 0.922431230142448, iteration: 87489
loss: 0.9617785811424255,grad_norm: 0.9999989970797235, iteration: 87490
loss: 0.9865787625312805,grad_norm: 0.9999992365294367, iteration: 87491
loss: 0.9674121141433716,grad_norm: 0.9999991125210423, iteration: 87492
loss: 1.0061990022659302,grad_norm: 0.9999991196801772, iteration: 87493
loss: 1.0012133121490479,grad_norm: 0.9845172700814401, iteration: 87494
loss: 0.9938437342643738,grad_norm: 0.9420607442695842, iteration: 87495
loss: 1.0200496912002563,grad_norm: 0.9999990326152857, iteration: 87496
loss: 0.9861263632774353,grad_norm: 0.9999991146715309, iteration: 87497
loss: 0.9968791007995605,grad_norm: 0.9999992263271169, iteration: 87498
loss: 0.9975936412811279,grad_norm: 0.9999992962080445, iteration: 87499
loss: 1.0330696105957031,grad_norm: 0.9999991529329323, iteration: 87500
loss: 1.0043965578079224,grad_norm: 0.9676060239161085, iteration: 87501
loss: 0.9958303570747375,grad_norm: 0.9999991536295462, iteration: 87502
loss: 1.025773525238037,grad_norm: 0.999999108823859, iteration: 87503
loss: 1.0237808227539062,grad_norm: 0.999999146749626, iteration: 87504
loss: 0.9921382069587708,grad_norm: 0.9407356336064358, iteration: 87505
loss: 0.9733601212501526,grad_norm: 0.9999992700194363, iteration: 87506
loss: 1.0434625148773193,grad_norm: 0.9999993535514636, iteration: 87507
loss: 0.9733781218528748,grad_norm: 0.9999992181646362, iteration: 87508
loss: 1.010812759399414,grad_norm: 0.9999997059159623, iteration: 87509
loss: 1.0069578886032104,grad_norm: 0.9999993783004495, iteration: 87510
loss: 0.9987314343452454,grad_norm: 0.9052013023149786, iteration: 87511
loss: 1.093503475189209,grad_norm: 0.9999991597596906, iteration: 87512
loss: 1.0105440616607666,grad_norm: 0.9999997583301099, iteration: 87513
loss: 1.024656057357788,grad_norm: 0.9390442712140522, iteration: 87514
loss: 1.0338034629821777,grad_norm: 0.9999990436175787, iteration: 87515
loss: 0.9912249445915222,grad_norm: 0.999999175310588, iteration: 87516
loss: 1.0114831924438477,grad_norm: 0.999999209134548, iteration: 87517
loss: 0.973066508769989,grad_norm: 0.9999991469723871, iteration: 87518
loss: 1.0208549499511719,grad_norm: 0.9999993613188574, iteration: 87519
loss: 1.0204265117645264,grad_norm: 0.9999996227278801, iteration: 87520
loss: 1.014241099357605,grad_norm: 0.999999181677031, iteration: 87521
loss: 0.9764776229858398,grad_norm: 0.967425724028144, iteration: 87522
loss: 1.0212194919586182,grad_norm: 0.851852651185654, iteration: 87523
loss: 1.0208768844604492,grad_norm: 0.9999991353554977, iteration: 87524
loss: 0.9541616439819336,grad_norm: 0.9601299879575813, iteration: 87525
loss: 1.0104330778121948,grad_norm: 0.9999992238458021, iteration: 87526
loss: 0.95114666223526,grad_norm: 0.9272395700656014, iteration: 87527
loss: 0.9752885699272156,grad_norm: 0.9999990467939358, iteration: 87528
loss: 1.0013643503189087,grad_norm: 0.9999990996097359, iteration: 87529
loss: 0.9869833588600159,grad_norm: 0.9999990588848806, iteration: 87530
loss: 1.0205702781677246,grad_norm: 0.9294654379855123, iteration: 87531
loss: 0.9694979190826416,grad_norm: 0.9650006159184131, iteration: 87532
loss: 0.9911965727806091,grad_norm: 0.9999989152137325, iteration: 87533
loss: 1.051299810409546,grad_norm: 0.999999288984008, iteration: 87534
loss: 0.9893482327461243,grad_norm: 0.9999990536272458, iteration: 87535
loss: 1.047356367111206,grad_norm: 0.936551696994283, iteration: 87536
loss: 1.0031311511993408,grad_norm: 0.9999991136384703, iteration: 87537
loss: 0.9481900334358215,grad_norm: 0.9999992981219519, iteration: 87538
loss: 1.0303579568862915,grad_norm: 0.9999991354823292, iteration: 87539
loss: 1.0296494960784912,grad_norm: 0.9533559208588653, iteration: 87540
loss: 0.9930236339569092,grad_norm: 0.9652804160301657, iteration: 87541
loss: 1.047849416732788,grad_norm: 0.9999993392319863, iteration: 87542
loss: 1.0060720443725586,grad_norm: 0.9392448039511593, iteration: 87543
loss: 0.9977678060531616,grad_norm: 0.962370282432167, iteration: 87544
loss: 1.0044522285461426,grad_norm: 0.9999989816061653, iteration: 87545
loss: 0.9645922183990479,grad_norm: 0.9812905874428907, iteration: 87546
loss: 1.0255552530288696,grad_norm: 0.838194045569782, iteration: 87547
loss: 0.974137544631958,grad_norm: 0.9999990748703868, iteration: 87548
loss: 0.9923921823501587,grad_norm: 0.9855179448924704, iteration: 87549
loss: 1.0466630458831787,grad_norm: 0.9823916047988999, iteration: 87550
loss: 0.9957742094993591,grad_norm: 0.9999992672870014, iteration: 87551
loss: 1.0034481287002563,grad_norm: 0.999999232719096, iteration: 87552
loss: 1.013253092765808,grad_norm: 0.9999991109984201, iteration: 87553
loss: 1.2385467290878296,grad_norm: 0.9999998155856041, iteration: 87554
loss: 1.0142278671264648,grad_norm: 0.9999991764918714, iteration: 87555
loss: 1.0052001476287842,grad_norm: 0.9999997151238271, iteration: 87556
loss: 1.0441075563430786,grad_norm: 0.7632694197117039, iteration: 87557
loss: 0.9976285696029663,grad_norm: 0.9999995977591489, iteration: 87558
loss: 1.0025041103363037,grad_norm: 0.9647153032340445, iteration: 87559
loss: 1.002269983291626,grad_norm: 0.9648071319461937, iteration: 87560
loss: 1.0034078359603882,grad_norm: 0.9999999328539522, iteration: 87561
loss: 1.0345383882522583,grad_norm: 0.9999997257420259, iteration: 87562
loss: 1.0194652080535889,grad_norm: 0.9999991862264107, iteration: 87563
loss: 1.016583800315857,grad_norm: 0.9999992074544979, iteration: 87564
loss: 0.9890906810760498,grad_norm: 0.9999991615736586, iteration: 87565
loss: 0.991523027420044,grad_norm: 0.9999991296583188, iteration: 87566
loss: 1.00385582447052,grad_norm: 0.9676465815815344, iteration: 87567
loss: 0.9673545956611633,grad_norm: 0.7700727914463058, iteration: 87568
loss: 1.0154517889022827,grad_norm: 0.9711749019321879, iteration: 87569
loss: 0.9460413455963135,grad_norm: 0.9999989637504181, iteration: 87570
loss: 1.0268564224243164,grad_norm: 0.9999992682171838, iteration: 87571
loss: 0.986780047416687,grad_norm: 0.9999992182634607, iteration: 87572
loss: 1.0456278324127197,grad_norm: 0.9999998833922138, iteration: 87573
loss: 0.9877370595932007,grad_norm: 0.9601536363081763, iteration: 87574
loss: 0.9895952343940735,grad_norm: 0.9226479776007964, iteration: 87575
loss: 1.0125032663345337,grad_norm: 0.9999991928149328, iteration: 87576
loss: 0.9729108214378357,grad_norm: 0.9999989284328555, iteration: 87577
loss: 1.083361268043518,grad_norm: 0.9999990590520892, iteration: 87578
loss: 0.9781165719032288,grad_norm: 0.7816845831666854, iteration: 87579
loss: 1.0068844556808472,grad_norm: 0.9999991953716836, iteration: 87580
loss: 1.0026962757110596,grad_norm: 0.9999991691680166, iteration: 87581
loss: 1.0178102254867554,grad_norm: 0.9999989936634158, iteration: 87582
loss: 1.0114268064498901,grad_norm: 0.9999991488899951, iteration: 87583
loss: 1.0032306909561157,grad_norm: 0.9999993100072242, iteration: 87584
loss: 0.9967542290687561,grad_norm: 0.9105610600916353, iteration: 87585
loss: 1.0904591083526611,grad_norm: 0.9999996275865537, iteration: 87586
loss: 1.0190809965133667,grad_norm: 0.9999991036010404, iteration: 87587
loss: 0.9322568774223328,grad_norm: 0.9548223232520936, iteration: 87588
loss: 0.9974080324172974,grad_norm: 0.9999991471355243, iteration: 87589
loss: 1.064445972442627,grad_norm: 0.9999990605707763, iteration: 87590
loss: 1.2458149194717407,grad_norm: 0.9999997232336575, iteration: 87591
loss: 0.9857698082923889,grad_norm: 0.9921510135737134, iteration: 87592
loss: 0.9983878135681152,grad_norm: 0.9999991604924878, iteration: 87593
loss: 1.0031765699386597,grad_norm: 0.9999991851383977, iteration: 87594
loss: 1.0123707056045532,grad_norm: 0.9999991606101724, iteration: 87595
loss: 1.0230507850646973,grad_norm: 0.8371922358480661, iteration: 87596
loss: 0.9919518232345581,grad_norm: 0.999999120627179, iteration: 87597
loss: 0.9941907525062561,grad_norm: 0.999999111679789, iteration: 87598
loss: 1.0667773485183716,grad_norm: 0.9999997440570195, iteration: 87599
loss: 1.0446263551712036,grad_norm: 0.999999010645716, iteration: 87600
loss: 1.018162727355957,grad_norm: 0.9999990507402238, iteration: 87601
loss: 1.0236501693725586,grad_norm: 0.9694963879835277, iteration: 87602
loss: 0.9984835386276245,grad_norm: 0.924098662991517, iteration: 87603
loss: 0.9944170117378235,grad_norm: 0.999998888417944, iteration: 87604
loss: 0.9942624568939209,grad_norm: 0.9994069562232769, iteration: 87605
loss: 1.0117056369781494,grad_norm: 0.9999991753370802, iteration: 87606
loss: 1.0799586772918701,grad_norm: 0.9999996853730952, iteration: 87607
loss: 1.0127171277999878,grad_norm: 0.9999991821593036, iteration: 87608
loss: 1.021430253982544,grad_norm: 0.9999994928105214, iteration: 87609
loss: 0.9743645191192627,grad_norm: 0.9999991466348717, iteration: 87610
loss: 0.9703574776649475,grad_norm: 0.9999992005342779, iteration: 87611
loss: 1.000557541847229,grad_norm: 0.9999991158781015, iteration: 87612
loss: 0.9996663331985474,grad_norm: 0.9999991702407103, iteration: 87613
loss: 0.9654437303543091,grad_norm: 0.9999992080354481, iteration: 87614
loss: 1.0165220499038696,grad_norm: 0.9485226863217784, iteration: 87615
loss: 1.00238835811615,grad_norm: 0.9999991288011421, iteration: 87616
loss: 1.006759524345398,grad_norm: 0.990689314843699, iteration: 87617
loss: 1.0000965595245361,grad_norm: 0.8941699533647837, iteration: 87618
loss: 1.0150054693222046,grad_norm: 0.9983315186919223, iteration: 87619
loss: 0.9776173830032349,grad_norm: 0.9999991422012494, iteration: 87620
loss: 0.9816515445709229,grad_norm: 0.9999992788460834, iteration: 87621
loss: 1.0039398670196533,grad_norm: 0.9999990337097857, iteration: 87622
loss: 1.0053824186325073,grad_norm: 0.9999991789481909, iteration: 87623
loss: 1.0142871141433716,grad_norm: 0.9999997233606832, iteration: 87624
loss: 1.0213474035263062,grad_norm: 0.9923475985745305, iteration: 87625
loss: 0.9872542023658752,grad_norm: 0.9898205030392287, iteration: 87626
loss: 1.0255413055419922,grad_norm: 0.9999990388035016, iteration: 87627
loss: 0.9978315830230713,grad_norm: 0.9413208148453367, iteration: 87628
loss: 1.0016974210739136,grad_norm: 0.9713380247556578, iteration: 87629
loss: 0.9958091378211975,grad_norm: 0.8093861782023899, iteration: 87630
loss: 1.0352622270584106,grad_norm: 0.9999998217286211, iteration: 87631
loss: 1.0510663986206055,grad_norm: 0.9999992549741304, iteration: 87632
loss: 1.006955862045288,grad_norm: 0.9999994252674761, iteration: 87633
loss: 0.9740465879440308,grad_norm: 0.9999991983585284, iteration: 87634
loss: 1.0062750577926636,grad_norm: 0.9999991720239363, iteration: 87635
loss: 0.9741843342781067,grad_norm: 0.8899203497575648, iteration: 87636
loss: 0.9993373155593872,grad_norm: 0.9999991926478053, iteration: 87637
loss: 0.9722720980644226,grad_norm: 0.9906178598144035, iteration: 87638
loss: 0.9833204746246338,grad_norm: 0.9999992978455283, iteration: 87639
loss: 1.0443239212036133,grad_norm: 0.9696859125370634, iteration: 87640
loss: 1.0011613368988037,grad_norm: 0.9999990949806321, iteration: 87641
loss: 0.9780603051185608,grad_norm: 0.9999990876584193, iteration: 87642
loss: 1.0223079919815063,grad_norm: 0.9999991090579597, iteration: 87643
loss: 1.0176061391830444,grad_norm: 0.999999103324884, iteration: 87644
loss: 0.9777543544769287,grad_norm: 0.9999991607145431, iteration: 87645
loss: 1.0141154527664185,grad_norm: 0.9999990969261892, iteration: 87646
loss: 0.9896961450576782,grad_norm: 0.9921325897581114, iteration: 87647
loss: 1.0374149084091187,grad_norm: 0.9999991688889114, iteration: 87648
loss: 1.0147780179977417,grad_norm: 0.9999990434818605, iteration: 87649
loss: 0.9986881017684937,grad_norm: 0.999999191368983, iteration: 87650
loss: 0.9747521877288818,grad_norm: 0.9999991372222782, iteration: 87651
loss: 1.0247218608856201,grad_norm: 0.999999185642358, iteration: 87652
loss: 1.022797703742981,grad_norm: 0.9999992249999871, iteration: 87653
loss: 1.0371161699295044,grad_norm: 0.9999991230821103, iteration: 87654
loss: 0.9831086993217468,grad_norm: 0.9999991457367798, iteration: 87655
loss: 0.9981390237808228,grad_norm: 0.9999992217032625, iteration: 87656
loss: 1.0132615566253662,grad_norm: 0.951585230342546, iteration: 87657
loss: 1.000119924545288,grad_norm: 0.9513850447246883, iteration: 87658
loss: 0.9899458289146423,grad_norm: 0.9655452382382912, iteration: 87659
loss: 0.9785661697387695,grad_norm: 0.9999992648180666, iteration: 87660
loss: 0.9746277928352356,grad_norm: 0.99999916117463, iteration: 87661
loss: 1.0067147016525269,grad_norm: 0.9999990914750836, iteration: 87662
loss: 1.011350154876709,grad_norm: 0.9348745966631427, iteration: 87663
loss: 0.9883120656013489,grad_norm: 0.99999915763775, iteration: 87664
loss: 0.999723494052887,grad_norm: 0.9344927824884859, iteration: 87665
loss: 0.9980310797691345,grad_norm: 0.9732408742596756, iteration: 87666
loss: 1.0005769729614258,grad_norm: 0.9999988792159431, iteration: 87667
loss: 0.9899325370788574,grad_norm: 0.9794798810946659, iteration: 87668
loss: 1.0208410024642944,grad_norm: 0.999999230960056, iteration: 87669
loss: 1.0365623235702515,grad_norm: 0.9999991290773096, iteration: 87670
loss: 1.0379822254180908,grad_norm: 0.9999991040605264, iteration: 87671
loss: 0.9923137426376343,grad_norm: 0.9999991701185553, iteration: 87672
loss: 1.0168074369430542,grad_norm: 0.999999359935024, iteration: 87673
loss: 0.9878562092781067,grad_norm: 0.9999990150799412, iteration: 87674
loss: 0.9861851930618286,grad_norm: 0.9999992422108199, iteration: 87675
loss: 1.0285807847976685,grad_norm: 0.9999994900096396, iteration: 87676
loss: 0.9763424396514893,grad_norm: 0.9999990638379592, iteration: 87677
loss: 1.11307692527771,grad_norm: 0.9999994710626908, iteration: 87678
loss: 1.0086445808410645,grad_norm: 0.9999992038338564, iteration: 87679
loss: 1.0183744430541992,grad_norm: 0.9005622585363328, iteration: 87680
loss: 1.0159502029418945,grad_norm: 0.9999990113820959, iteration: 87681
loss: 1.036095380783081,grad_norm: 0.9656892220583567, iteration: 87682
loss: 1.0780823230743408,grad_norm: 0.9999998029798458, iteration: 87683
loss: 0.9936333298683167,grad_norm: 0.9603753018170182, iteration: 87684
loss: 0.9836946129798889,grad_norm: 0.9999991243881868, iteration: 87685
loss: 1.011672019958496,grad_norm: 0.9999989645519094, iteration: 87686
loss: 1.0026323795318604,grad_norm: 0.9853394556458172, iteration: 87687
loss: 0.9943888187408447,grad_norm: 0.9999990609052769, iteration: 87688
loss: 1.023445725440979,grad_norm: 0.9360139600358903, iteration: 87689
loss: 1.0239770412445068,grad_norm: 0.9999992536554311, iteration: 87690
loss: 0.9846888780593872,grad_norm: 0.9999990272488699, iteration: 87691
loss: 1.0150127410888672,grad_norm: 0.9605799201913219, iteration: 87692
loss: 0.9731486439704895,grad_norm: 0.9999991538241215, iteration: 87693
loss: 1.0122603178024292,grad_norm: 0.9999990641214441, iteration: 87694
loss: 1.0259664058685303,grad_norm: 0.9999990166781831, iteration: 87695
loss: 1.0528196096420288,grad_norm: 0.9999995727129322, iteration: 87696
loss: 0.9691346883773804,grad_norm: 0.9999990702353229, iteration: 87697
loss: 0.9962809085845947,grad_norm: 0.9999992361270306, iteration: 87698
loss: 0.9844122529029846,grad_norm: 0.9999990545588675, iteration: 87699
loss: 1.0033074617385864,grad_norm: 0.999999254884664, iteration: 87700
loss: 0.9672009348869324,grad_norm: 0.9999990608469624, iteration: 87701
loss: 0.9979056715965271,grad_norm: 0.9999991962461444, iteration: 87702
loss: 1.0556615591049194,grad_norm: 0.9999991483605484, iteration: 87703
loss: 0.9778045415878296,grad_norm: 0.9999990990327564, iteration: 87704
loss: 0.9832786917686462,grad_norm: 0.9999990902061586, iteration: 87705
loss: 1.0046072006225586,grad_norm: 0.823724524798063, iteration: 87706
loss: 0.9781951308250427,grad_norm: 0.999998997425178, iteration: 87707
loss: 0.9779000282287598,grad_norm: 0.999999638960919, iteration: 87708
loss: 0.9793775677680969,grad_norm: 0.9999991984197997, iteration: 87709
loss: 1.0732457637786865,grad_norm: 0.9999992881751676, iteration: 87710
loss: 1.0613003969192505,grad_norm: 0.9619057923309711, iteration: 87711
loss: 0.9943872690200806,grad_norm: 0.9999992583433753, iteration: 87712
loss: 1.0110366344451904,grad_norm: 0.9999991761543097, iteration: 87713
loss: 1.030720829963684,grad_norm: 0.8311326135720412, iteration: 87714
loss: 1.0180913209915161,grad_norm: 0.9999991269803761, iteration: 87715
loss: 1.0523059368133545,grad_norm: 0.9999991448847768, iteration: 87716
loss: 1.0364633798599243,grad_norm: 0.9999990869251563, iteration: 87717
loss: 0.994500458240509,grad_norm: 0.9864442274538427, iteration: 87718
loss: 0.9955254793167114,grad_norm: 0.9999991259057408, iteration: 87719
loss: 1.0144590139389038,grad_norm: 0.9321254311210008, iteration: 87720
loss: 1.0124733448028564,grad_norm: 0.9999993237998744, iteration: 87721
loss: 1.0015214681625366,grad_norm: 0.999999101982897, iteration: 87722
loss: 1.0055664777755737,grad_norm: 0.9657218105573145, iteration: 87723
loss: 0.9845641255378723,grad_norm: 0.9999991378582767, iteration: 87724
loss: 0.9810805916786194,grad_norm: 0.9999991676750191, iteration: 87725
loss: 0.9257012009620667,grad_norm: 0.9999991802624114, iteration: 87726
loss: 0.9991980195045471,grad_norm: 0.9336235814008269, iteration: 87727
loss: 1.0752909183502197,grad_norm: 0.9999996971005336, iteration: 87728
loss: 1.0059705972671509,grad_norm: 0.8790089373092699, iteration: 87729
loss: 0.9839261174201965,grad_norm: 0.9999991182111324, iteration: 87730
loss: 1.0155680179595947,grad_norm: 0.9999991035798653, iteration: 87731
loss: 0.9686612486839294,grad_norm: 0.9999990719392755, iteration: 87732
loss: 1.0086097717285156,grad_norm: 0.9222314999229773, iteration: 87733
loss: 0.9961811304092407,grad_norm: 0.9726383980749327, iteration: 87734
loss: 1.005261778831482,grad_norm: 0.9999995700325467, iteration: 87735
loss: 0.9952870607376099,grad_norm: 0.999999223900987, iteration: 87736
loss: 0.9965900182723999,grad_norm: 0.9999991851185331, iteration: 87737
loss: 0.9795992970466614,grad_norm: 0.9999991657490599, iteration: 87738
loss: 0.9996426105499268,grad_norm: 0.9999995416808256, iteration: 87739
loss: 1.0276368856430054,grad_norm: 0.999999001904971, iteration: 87740
loss: 1.095275640487671,grad_norm: 0.9999998005352194, iteration: 87741
loss: 0.9538329839706421,grad_norm: 0.9999992235439373, iteration: 87742
loss: 1.0144555568695068,grad_norm: 0.9999991847517026, iteration: 87743
loss: 1.038467526435852,grad_norm: 0.9999991739333836, iteration: 87744
loss: 0.9993427991867065,grad_norm: 0.999999691685883, iteration: 87745
loss: 1.0309706926345825,grad_norm: 0.999999731937007, iteration: 87746
loss: 1.0324149131774902,grad_norm: 0.9999992794392161, iteration: 87747
loss: 1.0130398273468018,grad_norm: 0.9879702033296696, iteration: 87748
loss: 1.184706687927246,grad_norm: 0.9999994401878208, iteration: 87749
loss: 0.9718184471130371,grad_norm: 0.9999990917230723, iteration: 87750
loss: 1.0462321043014526,grad_norm: 0.9859164902422167, iteration: 87751
loss: 1.0343513488769531,grad_norm: 0.9999990414195744, iteration: 87752
loss: 1.0287768840789795,grad_norm: 0.9632928847829102, iteration: 87753
loss: 1.0390853881835938,grad_norm: 0.9608524039903968, iteration: 87754
loss: 1.037194848060608,grad_norm: 0.9999990605208556, iteration: 87755
loss: 0.9996601343154907,grad_norm: 0.9845191346557588, iteration: 87756
loss: 1.0551862716674805,grad_norm: 0.9999992615870626, iteration: 87757
loss: 0.9980855584144592,grad_norm: 0.9999992068592586, iteration: 87758
loss: 0.9882287383079529,grad_norm: 0.9867047275028309, iteration: 87759
loss: 1.0203484296798706,grad_norm: 0.96532864135391, iteration: 87760
loss: 1.0207266807556152,grad_norm: 0.9999990843257198, iteration: 87761
loss: 0.997603178024292,grad_norm: 0.9325409880465382, iteration: 87762
loss: 1.0083004236221313,grad_norm: 0.9999993573995295, iteration: 87763
loss: 1.0228735208511353,grad_norm: 0.999999462876532, iteration: 87764
loss: 0.952357828617096,grad_norm: 0.9999990961912917, iteration: 87765
loss: 1.0194793939590454,grad_norm: 0.9999991533651515, iteration: 87766
loss: 1.0052707195281982,grad_norm: 0.9999992086205781, iteration: 87767
loss: 0.9977937340736389,grad_norm: 0.9999990138666006, iteration: 87768
loss: 0.9900820255279541,grad_norm: 0.9999992049055549, iteration: 87769
loss: 0.9983546733856201,grad_norm: 0.9999988910459513, iteration: 87770
loss: 1.037514090538025,grad_norm: 0.9999990737132326, iteration: 87771
loss: 1.0248578786849976,grad_norm: 0.9999991996711979, iteration: 87772
loss: 0.9870439171791077,grad_norm: 0.9999991579285249, iteration: 87773
loss: 0.9739394783973694,grad_norm: 0.9999992584091932, iteration: 87774
loss: 1.0155178308486938,grad_norm: 0.9999994781840603, iteration: 87775
loss: 1.000091552734375,grad_norm: 0.9999991959663306, iteration: 87776
loss: 1.000868320465088,grad_norm: 0.9999990814392805, iteration: 87777
loss: 0.9732897877693176,grad_norm: 0.9999991844335074, iteration: 87778
loss: 1.0017735958099365,grad_norm: 0.8661927534433418, iteration: 87779
loss: 1.0427204370498657,grad_norm: 0.9614482086260451, iteration: 87780
loss: 1.0197118520736694,grad_norm: 0.9999993285895835, iteration: 87781
loss: 0.9943263530731201,grad_norm: 0.957812201825094, iteration: 87782
loss: 0.9872146844863892,grad_norm: 0.9999990941524627, iteration: 87783
loss: 0.9987990260124207,grad_norm: 0.8535311468543806, iteration: 87784
loss: 1.0167688131332397,grad_norm: 0.9968853213231141, iteration: 87785
loss: 0.9887735843658447,grad_norm: 0.9999991004378811, iteration: 87786
loss: 0.9776456356048584,grad_norm: 0.9999990540498923, iteration: 87787
loss: 1.0505342483520508,grad_norm: 0.999999438358988, iteration: 87788
loss: 0.9816527962684631,grad_norm: 0.9276586151781305, iteration: 87789
loss: 0.9908000826835632,grad_norm: 0.9999991809899423, iteration: 87790
loss: 1.0068867206573486,grad_norm: 0.8899276367934937, iteration: 87791
loss: 1.0112448930740356,grad_norm: 0.9830270794391947, iteration: 87792
loss: 1.043355941772461,grad_norm: 0.9719029468260476, iteration: 87793
loss: 0.9636939764022827,grad_norm: 0.9912322172226082, iteration: 87794
loss: 0.9951350688934326,grad_norm: 0.9966513291185681, iteration: 87795
loss: 0.9786208868026733,grad_norm: 0.9809903510925966, iteration: 87796
loss: 0.9725377559661865,grad_norm: 0.9999989928764746, iteration: 87797
loss: 1.0066028833389282,grad_norm: 0.968712669786606, iteration: 87798
loss: 0.992119550704956,grad_norm: 0.9999990696258114, iteration: 87799
loss: 1.0124238729476929,grad_norm: 0.9999990057274541, iteration: 87800
loss: 0.9791175723075867,grad_norm: 0.9999991730232319, iteration: 87801
loss: 0.9916437864303589,grad_norm: 0.9913650462494105, iteration: 87802
loss: 0.995887279510498,grad_norm: 0.8809034317230131, iteration: 87803
loss: 1.0073904991149902,grad_norm: 0.999999047157891, iteration: 87804
loss: 1.0199456214904785,grad_norm: 0.9999991838229909, iteration: 87805
loss: 0.9787957072257996,grad_norm: 0.9999989850956557, iteration: 87806
loss: 0.9939857721328735,grad_norm: 0.999999362676363, iteration: 87807
loss: 0.9693013429641724,grad_norm: 0.9228617302595269, iteration: 87808
loss: 0.9951486587524414,grad_norm: 0.9999990732963933, iteration: 87809
loss: 0.9686819314956665,grad_norm: 0.9999990508441053, iteration: 87810
loss: 0.9725097417831421,grad_norm: 0.9999992529610962, iteration: 87811
loss: 0.975375235080719,grad_norm: 0.9620429967816105, iteration: 87812
loss: 0.9753856062889099,grad_norm: 0.9976271298521944, iteration: 87813
loss: 1.0040682554244995,grad_norm: 0.9167645366460128, iteration: 87814
loss: 1.0153793096542358,grad_norm: 0.9999990260935943, iteration: 87815
loss: 1.0002162456512451,grad_norm: 0.999998970458407, iteration: 87816
loss: 0.9678917527198792,grad_norm: 0.9999991512454478, iteration: 87817
loss: 1.0194404125213623,grad_norm: 0.7938025628150202, iteration: 87818
loss: 1.0145708322525024,grad_norm: 0.9999991572638398, iteration: 87819
loss: 0.9950032830238342,grad_norm: 0.9999991394979841, iteration: 87820
loss: 0.9700244665145874,grad_norm: 0.999999076259749, iteration: 87821
loss: 1.0173110961914062,grad_norm: 0.9999990298732025, iteration: 87822
loss: 0.9773143529891968,grad_norm: 0.9149946475207977, iteration: 87823
loss: 1.0056288242340088,grad_norm: 0.9231926615599555, iteration: 87824
loss: 1.0239641666412354,grad_norm: 0.9999991361132848, iteration: 87825
loss: 0.9608230590820312,grad_norm: 0.9925054250884012, iteration: 87826
loss: 0.9951212406158447,grad_norm: 0.999999141125305, iteration: 87827
loss: 0.9963826537132263,grad_norm: 0.9338107589730272, iteration: 87828
loss: 0.9738439321517944,grad_norm: 0.9896529205169191, iteration: 87829
loss: 1.007142186164856,grad_norm: 0.999998945231136, iteration: 87830
loss: 1.001994013786316,grad_norm: 0.9293208732364748, iteration: 87831
loss: 0.9625409245491028,grad_norm: 0.9999991427724018, iteration: 87832
loss: 1.0015881061553955,grad_norm: 0.9994399947867633, iteration: 87833
loss: 1.0210285186767578,grad_norm: 0.9999991334379652, iteration: 87834
loss: 0.9636974334716797,grad_norm: 0.999999225235238, iteration: 87835
loss: 0.9837903380393982,grad_norm: 0.9154209808575949, iteration: 87836
loss: 1.038119912147522,grad_norm: 0.9342211737175565, iteration: 87837
loss: 1.0055413246154785,grad_norm: 0.9999991327826894, iteration: 87838
loss: 0.9703505635261536,grad_norm: 0.9492620274459416, iteration: 87839
loss: 0.9773399829864502,grad_norm: 0.9999991579764604, iteration: 87840
loss: 1.0089672803878784,grad_norm: 0.9474605792680387, iteration: 87841
loss: 1.0281405448913574,grad_norm: 0.9999989817059325, iteration: 87842
loss: 0.9741893410682678,grad_norm: 0.9999990609972736, iteration: 87843
loss: 1.013594150543213,grad_norm: 0.9999992153810113, iteration: 87844
loss: 1.040972113609314,grad_norm: 0.9999996217395813, iteration: 87845
loss: 1.08287513256073,grad_norm: 1.0000000100428525, iteration: 87846
loss: 1.0058485269546509,grad_norm: 0.9999992013888874, iteration: 87847
loss: 0.9827168583869934,grad_norm: 0.9999991146005199, iteration: 87848
loss: 0.9961217045783997,grad_norm: 0.9999990849289422, iteration: 87849
loss: 0.991502046585083,grad_norm: 0.8249720893384231, iteration: 87850
loss: 1.0117491483688354,grad_norm: 0.9999991018017095, iteration: 87851
loss: 1.0132290124893188,grad_norm: 0.9999992071052863, iteration: 87852
loss: 0.9786012172698975,grad_norm: 0.9999990903621512, iteration: 87853
loss: 0.9780629873275757,grad_norm: 0.9999992712689473, iteration: 87854
loss: 0.9290624260902405,grad_norm: 0.9999992354546723, iteration: 87855
loss: 0.9998608827590942,grad_norm: 0.999999257188449, iteration: 87856
loss: 1.0065233707427979,grad_norm: 0.9249592357384769, iteration: 87857
loss: 0.980236828327179,grad_norm: 0.9999992061473614, iteration: 87858
loss: 0.9895422458648682,grad_norm: 0.9999990854872494, iteration: 87859
loss: 0.9967138171195984,grad_norm: 0.8720830124681719, iteration: 87860
loss: 0.9363064169883728,grad_norm: 0.9808795440325906, iteration: 87861
loss: 0.981413722038269,grad_norm: 0.9236860710067779, iteration: 87862
loss: 0.9685994386672974,grad_norm: 0.9999991747103449, iteration: 87863
loss: 0.9768596887588501,grad_norm: 0.9859089965103973, iteration: 87864
loss: 0.982517659664154,grad_norm: 0.9999990076771127, iteration: 87865
loss: 0.9555914998054504,grad_norm: 0.9409888494904797, iteration: 87866
loss: 1.0222935676574707,grad_norm: 0.9999991541366577, iteration: 87867
loss: 1.0348960161209106,grad_norm: 0.9999990162259799, iteration: 87868
loss: 0.9619529247283936,grad_norm: 0.9999991419695037, iteration: 87869
loss: 1.0001908540725708,grad_norm: 0.9999989706690136, iteration: 87870
loss: 0.9786431789398193,grad_norm: 0.9999991403531742, iteration: 87871
loss: 1.0393929481506348,grad_norm: 0.9999990868345202, iteration: 87872
loss: 1.0086997747421265,grad_norm: 0.9335243646784863, iteration: 87873
loss: 0.9914455413818359,grad_norm: 0.9960795475754466, iteration: 87874
loss: 1.0271406173706055,grad_norm: 0.9999992059927976, iteration: 87875
loss: 0.9823397397994995,grad_norm: 0.9999993012131603, iteration: 87876
loss: 1.041062355041504,grad_norm: 0.9999990564111931, iteration: 87877
loss: 0.9790521860122681,grad_norm: 0.9002269990753867, iteration: 87878
loss: 1.0004502534866333,grad_norm: 0.999999048325944, iteration: 87879
loss: 0.9739733934402466,grad_norm: 0.9999990373182432, iteration: 87880
loss: 1.0008052587509155,grad_norm: 0.9999991271275814, iteration: 87881
loss: 1.0006099939346313,grad_norm: 0.8787824651621641, iteration: 87882
loss: 1.0028513669967651,grad_norm: 0.999999080973259, iteration: 87883
loss: 0.94285649061203,grad_norm: 0.999999279040403, iteration: 87884
loss: 0.9542590379714966,grad_norm: 0.9835947944296356, iteration: 87885
loss: 1.002678632736206,grad_norm: 0.9999992698496598, iteration: 87886
loss: 0.9910352826118469,grad_norm: 0.999999132410804, iteration: 87887
loss: 1.0050407648086548,grad_norm: 0.999999018378058, iteration: 87888
loss: 1.0089061260223389,grad_norm: 0.9050933154931059, iteration: 87889
loss: 0.9701002836227417,grad_norm: 0.9999991018597438, iteration: 87890
loss: 0.9995687007904053,grad_norm: 0.9856645331248255, iteration: 87891
loss: 0.9686335921287537,grad_norm: 0.999999111967245, iteration: 87892
loss: 1.0060795545578003,grad_norm: 0.9999992655205823, iteration: 87893
loss: 0.9854654669761658,grad_norm: 0.9999991153177527, iteration: 87894
loss: 0.9685759544372559,grad_norm: 0.9999990493333228, iteration: 87895
loss: 0.9982064962387085,grad_norm: 0.9999989577505562, iteration: 87896
loss: 0.9687781929969788,grad_norm: 0.9500319054943985, iteration: 87897
loss: 1.0163460969924927,grad_norm: 0.9999993822044434, iteration: 87898
loss: 1.0095655918121338,grad_norm: 0.9999990834482354, iteration: 87899
loss: 1.0220812559127808,grad_norm: 0.9999989570881561, iteration: 87900
loss: 1.0097146034240723,grad_norm: 0.9999989820383469, iteration: 87901
loss: 1.0004173517227173,grad_norm: 0.9999991365018573, iteration: 87902
loss: 1.00435471534729,grad_norm: 0.8996010966722503, iteration: 87903
loss: 0.9579213857650757,grad_norm: 0.9999990334080788, iteration: 87904
loss: 1.032965898513794,grad_norm: 0.9999992037453561, iteration: 87905
loss: 0.9597975611686707,grad_norm: 0.8666751860190945, iteration: 87906
loss: 0.9804686903953552,grad_norm: 0.9999991085838346, iteration: 87907
loss: 0.9950369000434875,grad_norm: 0.9999990787759935, iteration: 87908
loss: 1.0092676877975464,grad_norm: 0.9999992247609261, iteration: 87909
loss: 0.9803524017333984,grad_norm: 0.9999992299947417, iteration: 87910
loss: 0.9926472306251526,grad_norm: 0.9999990858547535, iteration: 87911
loss: 1.093483328819275,grad_norm: 0.9999999038616133, iteration: 87912
loss: 1.0094013214111328,grad_norm: 0.9847997929692933, iteration: 87913
loss: 0.9892339706420898,grad_norm: 0.9999991800195782, iteration: 87914
loss: 0.9957007765769958,grad_norm: 0.8970974928757501, iteration: 87915
loss: 1.0080232620239258,grad_norm: 0.9881799882918614, iteration: 87916
loss: 1.0104151964187622,grad_norm: 0.999999110643416, iteration: 87917
loss: 0.9969819784164429,grad_norm: 0.9999992146342876, iteration: 87918
loss: 1.0239249467849731,grad_norm: 0.9750830724448549, iteration: 87919
loss: 1.0225945711135864,grad_norm: 0.9408957258963041, iteration: 87920
loss: 1.0112744569778442,grad_norm: 0.9999991973934523, iteration: 87921
loss: 0.9454044103622437,grad_norm: 0.9811384032687736, iteration: 87922
loss: 1.016887903213501,grad_norm: 0.9236141242369013, iteration: 87923
loss: 0.9953737854957581,grad_norm: 0.9935297778813871, iteration: 87924
loss: 1.0042695999145508,grad_norm: 0.9999992062988852, iteration: 87925
loss: 1.0174205303192139,grad_norm: 0.9999992498264628, iteration: 87926
loss: 1.0109434127807617,grad_norm: 0.9532042580697927, iteration: 87927
loss: 0.9883593916893005,grad_norm: 0.9389474172436194, iteration: 87928
loss: 1.0243974924087524,grad_norm: 0.9999990586534783, iteration: 87929
loss: 0.983235776424408,grad_norm: 0.9000296007694234, iteration: 87930
loss: 0.9959664940834045,grad_norm: 0.9999990626295144, iteration: 87931
loss: 1.0146989822387695,grad_norm: 0.9999989717164084, iteration: 87932
loss: 0.9986042976379395,grad_norm: 0.9999990001222617, iteration: 87933
loss: 1.0045772790908813,grad_norm: 0.9902452323472523, iteration: 87934
loss: 0.9848097562789917,grad_norm: 0.9999990984097884, iteration: 87935
loss: 0.9937012195587158,grad_norm: 0.9999991322196653, iteration: 87936
loss: 0.9972547888755798,grad_norm: 0.9999990713299397, iteration: 87937
loss: 1.0100373029708862,grad_norm: 0.9379911694671501, iteration: 87938
loss: 1.0041532516479492,grad_norm: 0.9794298907343946, iteration: 87939
loss: 1.001076579093933,grad_norm: 0.9288980135154838, iteration: 87940
loss: 1.0128613710403442,grad_norm: 0.9999992192061885, iteration: 87941
loss: 1.0136674642562866,grad_norm: 0.9947092498487651, iteration: 87942
loss: 1.0090597867965698,grad_norm: 0.999998955471446, iteration: 87943
loss: 0.9926736950874329,grad_norm: 0.9999990559888718, iteration: 87944
loss: 1.0041788816452026,grad_norm: 0.9999990914206177, iteration: 87945
loss: 1.0252556800842285,grad_norm: 0.9198721469349125, iteration: 87946
loss: 1.0297770500183105,grad_norm: 0.999999083469079, iteration: 87947
loss: 1.0272791385650635,grad_norm: 0.9743316589970147, iteration: 87948
loss: 0.988438606262207,grad_norm: 0.9985178392973084, iteration: 87949
loss: 0.986945629119873,grad_norm: 0.9999992893413422, iteration: 87950
loss: 1.0239686965942383,grad_norm: 0.9563091596021954, iteration: 87951
loss: 1.018930196762085,grad_norm: 0.9999990749769143, iteration: 87952
loss: 1.0152682065963745,grad_norm: 0.9999992195959932, iteration: 87953
loss: 1.0476168394088745,grad_norm: 0.9999999239139189, iteration: 87954
loss: 1.019368052482605,grad_norm: 0.9891705332270706, iteration: 87955
loss: 1.004482626914978,grad_norm: 0.9922301839606602, iteration: 87956
loss: 0.98468017578125,grad_norm: 0.947719496738452, iteration: 87957
loss: 0.9821358323097229,grad_norm: 0.8560497251591479, iteration: 87958
loss: 0.9876623153686523,grad_norm: 0.9999992901879141, iteration: 87959
loss: 1.0107747316360474,grad_norm: 0.8404556378999317, iteration: 87960
loss: 0.9929744601249695,grad_norm: 0.9999989170161828, iteration: 87961
loss: 0.9862311482429504,grad_norm: 0.9999991852536506, iteration: 87962
loss: 0.9913002252578735,grad_norm: 0.999999248664731, iteration: 87963
loss: 0.9879125356674194,grad_norm: 0.8949801691068517, iteration: 87964
loss: 1.0195499658584595,grad_norm: 0.9999990250948797, iteration: 87965
loss: 1.0011812448501587,grad_norm: 0.9999991718327539, iteration: 87966
loss: 1.0213301181793213,grad_norm: 0.999999670986719, iteration: 87967
loss: 0.9965396523475647,grad_norm: 0.9999991832427229, iteration: 87968
loss: 0.9760885834693909,grad_norm: 0.9421612795912332, iteration: 87969
loss: 0.9663402438163757,grad_norm: 0.9999991816179165, iteration: 87970
loss: 1.010268211364746,grad_norm: 0.9999989821094567, iteration: 87971
loss: 0.9911543726921082,grad_norm: 0.9334853865785487, iteration: 87972
loss: 1.0127077102661133,grad_norm: 0.934198917196202, iteration: 87973
loss: 0.974797785282135,grad_norm: 0.9332302932679359, iteration: 87974
loss: 0.9517483115196228,grad_norm: 0.8792784701721759, iteration: 87975
loss: 1.1184163093566895,grad_norm: 0.9999991225119672, iteration: 87976
loss: 1.027098298072815,grad_norm: 0.9999991431920835, iteration: 87977
loss: 1.0417240858078003,grad_norm: 0.8765728105310225, iteration: 87978
loss: 0.9781169891357422,grad_norm: 0.9999992099483552, iteration: 87979
loss: 0.988961935043335,grad_norm: 0.9999991297429282, iteration: 87980
loss: 0.9975666403770447,grad_norm: 0.9683020237413027, iteration: 87981
loss: 1.0083051919937134,grad_norm: 0.9999992558172635, iteration: 87982
loss: 1.0799139738082886,grad_norm: 0.9999990256423295, iteration: 87983
loss: 0.989660918712616,grad_norm: 0.9999992090932474, iteration: 87984
loss: 0.9899002909660339,grad_norm: 0.9236394579296167, iteration: 87985
loss: 1.0256985425949097,grad_norm: 0.9805525484489116, iteration: 87986
loss: 0.9733394980430603,grad_norm: 0.9999991491070537, iteration: 87987
loss: 1.0252307653427124,grad_norm: 0.9883908737512321, iteration: 87988
loss: 1.0096518993377686,grad_norm: 0.9744137157680877, iteration: 87989
loss: 0.9999814629554749,grad_norm: 0.934187310981408, iteration: 87990
loss: 0.9963067173957825,grad_norm: 0.9999989937613362, iteration: 87991
loss: 0.947765588760376,grad_norm: 0.9999992167300097, iteration: 87992
loss: 1.0137803554534912,grad_norm: 0.9999990999008165, iteration: 87993
loss: 0.9978458881378174,grad_norm: 0.987740628302377, iteration: 87994
loss: 1.010890245437622,grad_norm: 0.9999993233812761, iteration: 87995
loss: 1.0782256126403809,grad_norm: 0.9999991581272385, iteration: 87996
loss: 0.9979723691940308,grad_norm: 0.9999990593587587, iteration: 87997
loss: 0.9775670766830444,grad_norm: 0.9999991450353557, iteration: 87998
loss: 1.014338731765747,grad_norm: 0.9878562736325494, iteration: 87999
loss: 0.9884693622589111,grad_norm: 0.9999992960199735, iteration: 88000
loss: 0.9869810938835144,grad_norm: 0.9999991812552886, iteration: 88001
loss: 1.0085738897323608,grad_norm: 0.9999992353551996, iteration: 88002
loss: 0.967059850692749,grad_norm: 0.9999992462965163, iteration: 88003
loss: 1.0104738473892212,grad_norm: 0.9351809824158815, iteration: 88004
loss: 0.9821175932884216,grad_norm: 0.9981099675375611, iteration: 88005
loss: 1.0264132022857666,grad_norm: 0.7970330329921937, iteration: 88006
loss: 0.9611579179763794,grad_norm: 0.9999991336862661, iteration: 88007
loss: 0.9938181638717651,grad_norm: 0.9422186473448794, iteration: 88008
loss: 0.990591287612915,grad_norm: 0.9690244606101655, iteration: 88009
loss: 0.9905457496643066,grad_norm: 0.946085158881939, iteration: 88010
loss: 1.0191141366958618,grad_norm: 0.9999989394403883, iteration: 88011
loss: 0.9959997534751892,grad_norm: 0.9641254796217058, iteration: 88012
loss: 0.990178644657135,grad_norm: 0.9642939440600595, iteration: 88013
loss: 1.0452944040298462,grad_norm: 0.999999188022364, iteration: 88014
loss: 1.0133334398269653,grad_norm: 0.9999993493296668, iteration: 88015
loss: 0.9774967432022095,grad_norm: 0.9909597527869594, iteration: 88016
loss: 1.0318055152893066,grad_norm: 0.999999468772268, iteration: 88017
loss: 0.9852869510650635,grad_norm: 0.8925584785902896, iteration: 88018
loss: 0.9925283193588257,grad_norm: 0.9999991220471514, iteration: 88019
loss: 1.022019386291504,grad_norm: 0.9999990115186399, iteration: 88020
loss: 1.004328966140747,grad_norm: 0.9999991430829135, iteration: 88021
loss: 1.0092766284942627,grad_norm: 0.9885797470597784, iteration: 88022
loss: 0.9623156189918518,grad_norm: 0.9999992560279953, iteration: 88023
loss: 0.9860001802444458,grad_norm: 0.9999259679795602, iteration: 88024
loss: 0.9765992164611816,grad_norm: 0.9999993193706349, iteration: 88025
loss: 0.994688093662262,grad_norm: 0.8992691191214642, iteration: 88026
loss: 0.9664528965950012,grad_norm: 0.9999991578923472, iteration: 88027
loss: 0.9818136692047119,grad_norm: 0.9999990723107914, iteration: 88028
loss: 1.0152194499969482,grad_norm: 0.9999991036841599, iteration: 88029
loss: 0.9735699892044067,grad_norm: 0.9999991123135386, iteration: 88030
loss: 1.040900468826294,grad_norm: 0.9093244109143734, iteration: 88031
loss: 1.0214194059371948,grad_norm: 0.9999997739313077, iteration: 88032
loss: 0.9800205826759338,grad_norm: 0.9381560001844098, iteration: 88033
loss: 1.014973759651184,grad_norm: 0.9999990658207552, iteration: 88034
loss: 1.005489706993103,grad_norm: 0.9999989617494659, iteration: 88035
loss: 0.988508939743042,grad_norm: 0.9432417937043538, iteration: 88036
loss: 0.9967823028564453,grad_norm: 0.9432238717658925, iteration: 88037
loss: 0.9740135669708252,grad_norm: 0.9999991762388023, iteration: 88038
loss: 1.0164310932159424,grad_norm: 0.8556566691151161, iteration: 88039
loss: 1.0022916793823242,grad_norm: 0.9999992983096654, iteration: 88040
loss: 1.008975625038147,grad_norm: 0.9999991734900613, iteration: 88041
loss: 0.9509258270263672,grad_norm: 0.999999013375377, iteration: 88042
loss: 0.981174647808075,grad_norm: 0.9576607925240932, iteration: 88043
loss: 1.0239254236221313,grad_norm: 0.9999990799502972, iteration: 88044
loss: 1.0134196281433105,grad_norm: 0.9999991542667209, iteration: 88045
loss: 0.9932942986488342,grad_norm: 0.9848778797778467, iteration: 88046
loss: 1.009145975112915,grad_norm: 0.9867010238449808, iteration: 88047
loss: 1.0176610946655273,grad_norm: 0.9999996924852214, iteration: 88048
loss: 1.025877833366394,grad_norm: 0.967639215856506, iteration: 88049
loss: 1.017974615097046,grad_norm: 0.9582334589074454, iteration: 88050
loss: 1.0196987390518188,grad_norm: 0.9999991504423156, iteration: 88051
loss: 1.0098580121994019,grad_norm: 0.9999992098423484, iteration: 88052
loss: 0.9916598796844482,grad_norm: 0.9999990624744524, iteration: 88053
loss: 0.9636560082435608,grad_norm: 0.999999088942209, iteration: 88054
loss: 1.041117787361145,grad_norm: 0.9999992376467539, iteration: 88055
loss: 0.9936006665229797,grad_norm: 0.9999989980702517, iteration: 88056
loss: 0.9837373495101929,grad_norm: 0.9999990382599415, iteration: 88057
loss: 0.9706404805183411,grad_norm: 0.969297048353151, iteration: 88058
loss: 1.0577890872955322,grad_norm: 0.9999991946637722, iteration: 88059
loss: 0.9894285798072815,grad_norm: 0.9999991120640886, iteration: 88060
loss: 0.9785354137420654,grad_norm: 0.9999991018013256, iteration: 88061
loss: 1.0104808807373047,grad_norm: 0.8951399830695643, iteration: 88062
loss: 1.0297610759735107,grad_norm: 0.999999230033557, iteration: 88063
loss: 1.0109858512878418,grad_norm: 0.9775557348546536, iteration: 88064
loss: 1.029316782951355,grad_norm: 0.9999991348129935, iteration: 88065
loss: 1.0134716033935547,grad_norm: 0.99999921505352, iteration: 88066
loss: 0.9880920648574829,grad_norm: 0.9999989915554632, iteration: 88067
loss: 1.0137722492218018,grad_norm: 0.9999992725980924, iteration: 88068
loss: 0.9745970368385315,grad_norm: 0.9999991822030981, iteration: 88069
loss: 1.033829927444458,grad_norm: 0.9715342379599299, iteration: 88070
loss: 1.0196478366851807,grad_norm: 0.9708183413881798, iteration: 88071
loss: 0.9775761961936951,grad_norm: 0.9548982057291746, iteration: 88072
loss: 1.005536675453186,grad_norm: 0.9999991089945214, iteration: 88073
loss: 1.011199951171875,grad_norm: 0.9999991581691126, iteration: 88074
loss: 1.021925687789917,grad_norm: 0.9999993029684683, iteration: 88075
loss: 0.9800631999969482,grad_norm: 0.9999991379045365, iteration: 88076
loss: 0.9933350682258606,grad_norm: 0.9999990969800225, iteration: 88077
loss: 1.0207403898239136,grad_norm: 0.9999990428433972, iteration: 88078
loss: 1.0272232294082642,grad_norm: 0.9999992889015629, iteration: 88079
loss: 1.0152363777160645,grad_norm: 0.9865815140912088, iteration: 88080
loss: 1.0205111503601074,grad_norm: 0.9999990992479078, iteration: 88081
loss: 1.0167027711868286,grad_norm: 0.9999991664401136, iteration: 88082
loss: 0.9880985617637634,grad_norm: 0.999999167088045, iteration: 88083
loss: 1.0122106075286865,grad_norm: 0.9999991453559496, iteration: 88084
loss: 1.0458182096481323,grad_norm: 0.999999181282245, iteration: 88085
loss: 0.9868813157081604,grad_norm: 0.9999991841020406, iteration: 88086
loss: 1.0036333799362183,grad_norm: 0.9999991870840211, iteration: 88087
loss: 0.9834301471710205,grad_norm: 0.9999991561935011, iteration: 88088
loss: 0.9971486330032349,grad_norm: 0.9999989819437389, iteration: 88089
loss: 1.0162686109542847,grad_norm: 0.9271079879459402, iteration: 88090
loss: 0.9689467549324036,grad_norm: 0.999999077906337, iteration: 88091
loss: 0.9470067024230957,grad_norm: 0.8878873371416757, iteration: 88092
loss: 0.9705318212509155,grad_norm: 0.9806492456536189, iteration: 88093
loss: 0.9840754866600037,grad_norm: 0.9999989539708665, iteration: 88094
loss: 1.0137940645217896,grad_norm: 0.9999992091934393, iteration: 88095
loss: 0.9926542639732361,grad_norm: 0.9493023541082418, iteration: 88096
loss: 1.0157445669174194,grad_norm: 0.9999991005100385, iteration: 88097
loss: 0.9756798148155212,grad_norm: 0.9999991768673624, iteration: 88098
loss: 1.0204124450683594,grad_norm: 0.9999990711996468, iteration: 88099
loss: 1.0065174102783203,grad_norm: 0.9999992147504637, iteration: 88100
loss: 0.9931966662406921,grad_norm: 0.9999991440488293, iteration: 88101
loss: 0.9757110476493835,grad_norm: 0.9999992288693963, iteration: 88102
loss: 0.989868700504303,grad_norm: 0.9999991377869104, iteration: 88103
loss: 1.0059694051742554,grad_norm: 0.9999990627944436, iteration: 88104
loss: 0.9877672791481018,grad_norm: 0.9091189749449028, iteration: 88105
loss: 0.9883131980895996,grad_norm: 0.9501106223066573, iteration: 88106
loss: 0.9529499411582947,grad_norm: 0.9999993245157006, iteration: 88107
loss: 1.0127683877944946,grad_norm: 0.9057668129977458, iteration: 88108
loss: 0.9686827659606934,grad_norm: 0.9513059958525136, iteration: 88109
loss: 0.9945327043533325,grad_norm: 0.999999288627686, iteration: 88110
loss: 0.9992571473121643,grad_norm: 0.993612301729124, iteration: 88111
loss: 1.0245965719223022,grad_norm: 0.9983362554722232, iteration: 88112
loss: 0.9489666223526001,grad_norm: 0.9999992416907093, iteration: 88113
loss: 1.0216437578201294,grad_norm: 0.981407580632409, iteration: 88114
loss: 1.0017565488815308,grad_norm: 0.9999989525182852, iteration: 88115
loss: 1.0030338764190674,grad_norm: 0.9999990838321673, iteration: 88116
loss: 1.0134685039520264,grad_norm: 0.9999991947992595, iteration: 88117
loss: 0.9738725423812866,grad_norm: 0.9999992350566868, iteration: 88118
loss: 1.007007122039795,grad_norm: 0.8749783122984194, iteration: 88119
loss: 0.9702074527740479,grad_norm: 0.999999271952723, iteration: 88120
loss: 0.9514320492744446,grad_norm: 0.9999992940300653, iteration: 88121
loss: 1.0114420652389526,grad_norm: 0.8541605506568498, iteration: 88122
loss: 1.0150058269500732,grad_norm: 0.9999991208241398, iteration: 88123
loss: 0.9906765818595886,grad_norm: 0.9999991997782759, iteration: 88124
loss: 0.9984729290008545,grad_norm: 0.9268762992597588, iteration: 88125
loss: 0.9881325364112854,grad_norm: 0.9362679165705448, iteration: 88126
loss: 0.9828330278396606,grad_norm: 0.9999990374378638, iteration: 88127
loss: 1.0190064907073975,grad_norm: 0.9803506898346963, iteration: 88128
loss: 1.0089958906173706,grad_norm: 0.9999991625004219, iteration: 88129
loss: 1.0059720277786255,grad_norm: 0.9999994379625964, iteration: 88130
loss: 0.9918910264968872,grad_norm: 0.9999989272087518, iteration: 88131
loss: 1.0116934776306152,grad_norm: 0.9679281928005772, iteration: 88132
loss: 0.956511378288269,grad_norm: 0.9999991603730036, iteration: 88133
loss: 0.9969307780265808,grad_norm: 0.9693622245760212, iteration: 88134
loss: 0.9953423142433167,grad_norm: 0.9115497415725623, iteration: 88135
loss: 1.0166689157485962,grad_norm: 0.9999992075289956, iteration: 88136
loss: 1.0132098197937012,grad_norm: 0.9999990961668368, iteration: 88137
loss: 0.9300554394721985,grad_norm: 0.9999992455626535, iteration: 88138
loss: 0.9931278824806213,grad_norm: 0.9565204961531104, iteration: 88139
loss: 0.9971687197685242,grad_norm: 0.9999992617715454, iteration: 88140
loss: 1.0248467922210693,grad_norm: 0.999999148936029, iteration: 88141
loss: 1.0109379291534424,grad_norm: 0.8937946105347527, iteration: 88142
loss: 0.9938128590583801,grad_norm: 0.9999990195972046, iteration: 88143
loss: 0.9748168587684631,grad_norm: 0.8735224265504977, iteration: 88144
loss: 0.9996955990791321,grad_norm: 0.9999990438895404, iteration: 88145
loss: 1.0199549198150635,grad_norm: 0.9644882837583028, iteration: 88146
loss: 0.9745115637779236,grad_norm: 0.9999991678102871, iteration: 88147
loss: 1.0058138370513916,grad_norm: 0.9999990253467745, iteration: 88148
loss: 1.001401662826538,grad_norm: 0.8796100133525306, iteration: 88149
loss: 0.9734016060829163,grad_norm: 0.9999990591705668, iteration: 88150
loss: 0.9996904730796814,grad_norm: 0.9999994691756937, iteration: 88151
loss: 0.9988820552825928,grad_norm: 0.9999991846574937, iteration: 88152
loss: 1.0039303302764893,grad_norm: 0.9999992286779915, iteration: 88153
loss: 1.0049396753311157,grad_norm: 0.9999988995306607, iteration: 88154
loss: 1.0022735595703125,grad_norm: 0.9999991813953104, iteration: 88155
loss: 0.9851019382476807,grad_norm: 0.9999991067602757, iteration: 88156
loss: 0.957461416721344,grad_norm: 0.9056116503254863, iteration: 88157
loss: 0.9932095408439636,grad_norm: 0.9999992821852158, iteration: 88158
loss: 1.0539816617965698,grad_norm: 0.9625578976925295, iteration: 88159
loss: 1.026839017868042,grad_norm: 0.9554708089365296, iteration: 88160
loss: 1.0157047510147095,grad_norm: 0.9628886243357873, iteration: 88161
loss: 1.0255690813064575,grad_norm: 0.9689839730728611, iteration: 88162
loss: 0.9816984534263611,grad_norm: 0.9999989651450603, iteration: 88163
loss: 1.0239696502685547,grad_norm: 0.9999994852251652, iteration: 88164
loss: 1.0046989917755127,grad_norm: 0.9999991525565503, iteration: 88165
loss: 1.0075957775115967,grad_norm: 0.7765503560413355, iteration: 88166
loss: 0.9864434003829956,grad_norm: 0.9999990869671154, iteration: 88167
loss: 1.026785135269165,grad_norm: 0.999999162682779, iteration: 88168
loss: 0.9830275774002075,grad_norm: 0.9999992727233532, iteration: 88169
loss: 1.0154986381530762,grad_norm: 0.9448287473638929, iteration: 88170
loss: 1.0080316066741943,grad_norm: 0.989606035217305, iteration: 88171
loss: 0.9758877754211426,grad_norm: 0.9999991885979164, iteration: 88172
loss: 1.0009292364120483,grad_norm: 0.9300114793076047, iteration: 88173
loss: 1.032381296157837,grad_norm: 0.9999990308845149, iteration: 88174
loss: 1.0213584899902344,grad_norm: 0.9999994068862588, iteration: 88175
loss: 1.003311038017273,grad_norm: 0.9661794158925362, iteration: 88176
loss: 1.0207805633544922,grad_norm: 0.8856209675260607, iteration: 88177
loss: 1.020628809928894,grad_norm: 0.953430206062614, iteration: 88178
loss: 0.9727469682693481,grad_norm: 0.9722249692364291, iteration: 88179
loss: 0.9623988270759583,grad_norm: 0.9302786713530974, iteration: 88180
loss: 1.013736367225647,grad_norm: 0.9999990068839243, iteration: 88181
loss: 1.0011444091796875,grad_norm: 0.9999991145233397, iteration: 88182
loss: 0.9828221797943115,grad_norm: 0.9999992099276989, iteration: 88183
loss: 1.0299373865127563,grad_norm: 0.9999992391717891, iteration: 88184
loss: 1.008779764175415,grad_norm: 0.999999095605074, iteration: 88185
loss: 0.9920193552970886,grad_norm: 0.9999991255634374, iteration: 88186
loss: 0.9940087795257568,grad_norm: 0.9999992562849118, iteration: 88187
loss: 0.9934954643249512,grad_norm: 0.9999989531580461, iteration: 88188
loss: 1.00578773021698,grad_norm: 0.9999991886291356, iteration: 88189
loss: 0.9840369820594788,grad_norm: 0.8475913933277985, iteration: 88190
loss: 1.0125113725662231,grad_norm: 0.9999989306384031, iteration: 88191
loss: 1.007503867149353,grad_norm: 0.9999991821858455, iteration: 88192
loss: 0.9904924035072327,grad_norm: 0.9999991117583195, iteration: 88193
loss: 1.0044440031051636,grad_norm: 0.9999991875714763, iteration: 88194
loss: 0.985876202583313,grad_norm: 0.9472524526887908, iteration: 88195
loss: 0.996512234210968,grad_norm: 0.9999992303670756, iteration: 88196
loss: 1.0160622596740723,grad_norm: 0.9999991897901008, iteration: 88197
loss: 1.0069454908370972,grad_norm: 0.9999992235070956, iteration: 88198
loss: 1.004544734954834,grad_norm: 0.9999992056359034, iteration: 88199
loss: 1.0026681423187256,grad_norm: 0.8751781239337344, iteration: 88200
loss: 1.0097063779830933,grad_norm: 0.9999990468132814, iteration: 88201
loss: 0.9746205806732178,grad_norm: 0.9761724771424976, iteration: 88202
loss: 0.9811216592788696,grad_norm: 0.9999990670243044, iteration: 88203
loss: 0.9953136444091797,grad_norm: 0.9307117952359026, iteration: 88204
loss: 0.9961465001106262,grad_norm: 0.9813924252060607, iteration: 88205
loss: 0.9953718185424805,grad_norm: 0.9925750741112565, iteration: 88206
loss: 0.9923477172851562,grad_norm: 0.923691384503353, iteration: 88207
loss: 0.9713322520256042,grad_norm: 0.9437889024706326, iteration: 88208
loss: 0.9807709455490112,grad_norm: 0.8717453914673268, iteration: 88209
loss: 1.0006663799285889,grad_norm: 0.9999990829397803, iteration: 88210
loss: 0.9601506590843201,grad_norm: 0.9999992146413436, iteration: 88211
loss: 1.0154062509536743,grad_norm: 0.9999990802363413, iteration: 88212
loss: 1.001842975616455,grad_norm: 0.9999991843112536, iteration: 88213
loss: 0.986958920955658,grad_norm: 0.9708677423646169, iteration: 88214
loss: 1.030131459236145,grad_norm: 0.9999993222245369, iteration: 88215
loss: 1.0146446228027344,grad_norm: 0.9987141911503126, iteration: 88216
loss: 1.0201627016067505,grad_norm: 0.9999990281090455, iteration: 88217
loss: 0.9913825988769531,grad_norm: 0.8743372864201525, iteration: 88218
loss: 0.9878699779510498,grad_norm: 0.9999991764292272, iteration: 88219
loss: 0.9900294542312622,grad_norm: 0.9999992725892506, iteration: 88220
loss: 0.9884156584739685,grad_norm: 0.9488635959138132, iteration: 88221
loss: 0.9853387475013733,grad_norm: 0.9647539708942959, iteration: 88222
loss: 1.0102770328521729,grad_norm: 0.9966555663093922, iteration: 88223
loss: 1.0005570650100708,grad_norm: 0.9575743306776795, iteration: 88224
loss: 1.0096259117126465,grad_norm: 0.9999991693988257, iteration: 88225
loss: 1.0142056941986084,grad_norm: 0.9140520397002286, iteration: 88226
loss: 0.9964241981506348,grad_norm: 0.999999234550359, iteration: 88227
loss: 0.9970633387565613,grad_norm: 0.9999991407937363, iteration: 88228
loss: 0.9577780961990356,grad_norm: 0.9999989890639139, iteration: 88229
loss: 1.0007035732269287,grad_norm: 0.9999991724164059, iteration: 88230
loss: 0.9877296686172485,grad_norm: 0.9999988864455224, iteration: 88231
loss: 1.0142700672149658,grad_norm: 0.9999992324283952, iteration: 88232
loss: 1.0103521347045898,grad_norm: 0.9999990284209233, iteration: 88233
loss: 1.0041075944900513,grad_norm: 0.999999103595763, iteration: 88234
loss: 0.9862821102142334,grad_norm: 0.9999990807549777, iteration: 88235
loss: 0.9839690327644348,grad_norm: 0.9999991602591904, iteration: 88236
loss: 1.1055659055709839,grad_norm: 0.9999990438872621, iteration: 88237
loss: 1.0105600357055664,grad_norm: 0.9999990750753703, iteration: 88238
loss: 1.0234214067459106,grad_norm: 0.9999990770084999, iteration: 88239
loss: 1.0035643577575684,grad_norm: 0.9999991891493498, iteration: 88240
loss: 0.9581133723258972,grad_norm: 0.8990799094200013, iteration: 88241
loss: 0.9753488898277283,grad_norm: 0.999999050525517, iteration: 88242
loss: 0.9867997169494629,grad_norm: 0.999999024818807, iteration: 88243
loss: 0.9742658734321594,grad_norm: 0.9999990966539026, iteration: 88244
loss: 0.9385099411010742,grad_norm: 0.9999991167572191, iteration: 88245
loss: 0.9982006549835205,grad_norm: 0.9999991348478817, iteration: 88246
loss: 0.9887453317642212,grad_norm: 0.9474808767478179, iteration: 88247
loss: 1.0084348917007446,grad_norm: 0.9999990247885471, iteration: 88248
loss: 1.0051355361938477,grad_norm: 0.9999991145641243, iteration: 88249
loss: 0.9679577946662903,grad_norm: 0.9999992365915195, iteration: 88250
loss: 1.007526159286499,grad_norm: 0.9999992029350944, iteration: 88251
loss: 1.054174542427063,grad_norm: 0.9999993657784398, iteration: 88252
loss: 1.0232294797897339,grad_norm: 0.9999991737977251, iteration: 88253
loss: 1.0682547092437744,grad_norm: 0.9999993050120055, iteration: 88254
loss: 1.01788330078125,grad_norm: 0.9999991060661918, iteration: 88255
loss: 0.9772898554801941,grad_norm: 0.9999992026344343, iteration: 88256
loss: 0.9680080413818359,grad_norm: 0.9355257238567201, iteration: 88257
loss: 1.0228468179702759,grad_norm: 0.9873395429572486, iteration: 88258
loss: 0.993370771408081,grad_norm: 0.9806822461392266, iteration: 88259
loss: 0.9981088638305664,grad_norm: 0.960996303795209, iteration: 88260
loss: 1.0217481851577759,grad_norm: 0.999999235743363, iteration: 88261
loss: 0.9836902618408203,grad_norm: 0.9999991098506381, iteration: 88262
loss: 1.0002970695495605,grad_norm: 0.9999991172416464, iteration: 88263
loss: 0.9848027229309082,grad_norm: 0.9999991094083455, iteration: 88264
loss: 1.0425739288330078,grad_norm: 0.9999990853068385, iteration: 88265
loss: 1.0306305885314941,grad_norm: 0.8607818879267801, iteration: 88266
loss: 0.9850108623504639,grad_norm: 0.8965218018319492, iteration: 88267
loss: 1.0152841806411743,grad_norm: 0.9999991968717219, iteration: 88268
loss: 1.0153543949127197,grad_norm: 0.9686854009969124, iteration: 88269
loss: 1.0000982284545898,grad_norm: 0.9999990272442763, iteration: 88270
loss: 1.0082789659500122,grad_norm: 0.999999049031431, iteration: 88271
loss: 1.0117403268814087,grad_norm: 0.999998947160258, iteration: 88272
loss: 1.0275053977966309,grad_norm: 0.9999991133209012, iteration: 88273
loss: 0.99140864610672,grad_norm: 0.9794469244611924, iteration: 88274
loss: 1.024369716644287,grad_norm: 0.9963041994918598, iteration: 88275
loss: 1.0018855333328247,grad_norm: 0.9999992617402914, iteration: 88276
loss: 0.9910469055175781,grad_norm: 0.967398744350659, iteration: 88277
loss: 0.9816804528236389,grad_norm: 0.8852456337083477, iteration: 88278
loss: 1.0336191654205322,grad_norm: 0.9999991651952015, iteration: 88279
loss: 1.0123409032821655,grad_norm: 0.8856589119503679, iteration: 88280
loss: 1.0153406858444214,grad_norm: 0.9999990428960812, iteration: 88281
loss: 1.0024118423461914,grad_norm: 0.9999991092826157, iteration: 88282
loss: 1.0164926052093506,grad_norm: 0.9584253669510205, iteration: 88283
loss: 0.9918422102928162,grad_norm: 0.9999990704932462, iteration: 88284
loss: 0.989409863948822,grad_norm: 0.9999989390757327, iteration: 88285
loss: 0.994385838508606,grad_norm: 0.9808610000008622, iteration: 88286
loss: 1.0093801021575928,grad_norm: 0.9999989779466757, iteration: 88287
loss: 1.003156304359436,grad_norm: 0.9999991818044268, iteration: 88288
loss: 1.0159611701965332,grad_norm: 0.9962493331056455, iteration: 88289
loss: 0.9516137838363647,grad_norm: 0.9999991284437624, iteration: 88290
loss: 0.9933173656463623,grad_norm: 0.9999990879623495, iteration: 88291
loss: 0.9827275276184082,grad_norm: 0.9999992553498763, iteration: 88292
loss: 1.0091326236724854,grad_norm: 0.9999990891213746, iteration: 88293
loss: 1.0048649311065674,grad_norm: 0.9999991911984686, iteration: 88294
loss: 1.0185699462890625,grad_norm: 0.9999990266352035, iteration: 88295
loss: 0.9947481155395508,grad_norm: 0.9999991422483125, iteration: 88296
loss: 1.0232969522476196,grad_norm: 0.9999992099034513, iteration: 88297
loss: 1.0260480642318726,grad_norm: 0.9999991247667731, iteration: 88298
loss: 1.01451575756073,grad_norm: 0.9999990302082505, iteration: 88299
loss: 0.9843374490737915,grad_norm: 0.9999990952106942, iteration: 88300
loss: 0.9886587858200073,grad_norm: 0.9216824824678037, iteration: 88301
loss: 0.9610611796379089,grad_norm: 0.9999992273093858, iteration: 88302
loss: 1.0219875574111938,grad_norm: 0.8972137318080645, iteration: 88303
loss: 0.9987788796424866,grad_norm: 0.9890943296844673, iteration: 88304
loss: 1.0110431909561157,grad_norm: 0.9224951125597544, iteration: 88305
loss: 1.0212193727493286,grad_norm: 0.8944110174409219, iteration: 88306
loss: 0.9626615643501282,grad_norm: 0.9672397568956883, iteration: 88307
loss: 0.9996179342269897,grad_norm: 0.8382676127765591, iteration: 88308
loss: 1.0036017894744873,grad_norm: 0.9999989102857032, iteration: 88309
loss: 1.0147989988327026,grad_norm: 0.999999066462543, iteration: 88310
loss: 0.9803489446640015,grad_norm: 0.9999989464737916, iteration: 88311
loss: 1.050049066543579,grad_norm: 0.9999997058711063, iteration: 88312
loss: 1.0058741569519043,grad_norm: 0.9942746110156558, iteration: 88313
loss: 0.9878020882606506,grad_norm: 0.9138150478524321, iteration: 88314
loss: 1.0167485475540161,grad_norm: 0.9439315404106847, iteration: 88315
loss: 1.0008169412612915,grad_norm: 0.9999993283006501, iteration: 88316
loss: 1.0266242027282715,grad_norm: 0.9032748241146121, iteration: 88317
loss: 1.0191015005111694,grad_norm: 0.9486434551212668, iteration: 88318
loss: 1.0043139457702637,grad_norm: 0.9999992099757096, iteration: 88319
loss: 0.9910450577735901,grad_norm: 0.9999990129578665, iteration: 88320
loss: 0.9735644459724426,grad_norm: 0.9999991277746197, iteration: 88321
loss: 1.0099968910217285,grad_norm: 0.9099542914692231, iteration: 88322
loss: 0.975750207901001,grad_norm: 0.9793692943961487, iteration: 88323
loss: 1.013344645500183,grad_norm: 0.9999990178729485, iteration: 88324
loss: 0.9959312677383423,grad_norm: 0.9999990519060885, iteration: 88325
loss: 0.9601039290428162,grad_norm: 0.9558921892477049, iteration: 88326
loss: 1.034670114517212,grad_norm: 0.9999990919000481, iteration: 88327
loss: 0.9847886562347412,grad_norm: 0.999999115662614, iteration: 88328
loss: 0.9803163409233093,grad_norm: 0.9999992844650581, iteration: 88329
loss: 1.0031013488769531,grad_norm: 0.813289646431225, iteration: 88330
loss: 1.0930018424987793,grad_norm: 0.9999990537068029, iteration: 88331
loss: 1.0211364030838013,grad_norm: 0.9999990958291144, iteration: 88332
loss: 0.972768247127533,grad_norm: 0.9999993292556842, iteration: 88333
loss: 0.9971274733543396,grad_norm: 0.9999992498267343, iteration: 88334
loss: 1.1077569723129272,grad_norm: 0.9999989808118982, iteration: 88335
loss: 1.0254422426223755,grad_norm: 0.9999999087537538, iteration: 88336
loss: 0.9965229630470276,grad_norm: 0.99999924648156, iteration: 88337
loss: 0.9780139327049255,grad_norm: 0.9999991340878427, iteration: 88338
loss: 0.998681902885437,grad_norm: 0.9999991247031078, iteration: 88339
loss: 1.0007438659667969,grad_norm: 0.9999991820128119, iteration: 88340
loss: 1.0007916688919067,grad_norm: 0.9999990922227141, iteration: 88341
loss: 0.9906845688819885,grad_norm: 0.9999991076595, iteration: 88342
loss: 0.9668790102005005,grad_norm: 0.9999991621627785, iteration: 88343
loss: 0.964686393737793,grad_norm: 0.8595151164532555, iteration: 88344
loss: 1.0303711891174316,grad_norm: 0.9999992102109563, iteration: 88345
loss: 1.0351848602294922,grad_norm: 0.9999990947712116, iteration: 88346
loss: 1.015083909034729,grad_norm: 0.9999991195176272, iteration: 88347
loss: 1.0297105312347412,grad_norm: 0.9999992153748735, iteration: 88348
loss: 1.0274968147277832,grad_norm: 0.9189951787855863, iteration: 88349
loss: 0.956475019454956,grad_norm: 0.8314858221567308, iteration: 88350
loss: 0.9818276166915894,grad_norm: 0.9999990782402812, iteration: 88351
loss: 1.0868865251541138,grad_norm: 0.9999992010760959, iteration: 88352
loss: 0.9773908257484436,grad_norm: 0.9679932662153232, iteration: 88353
loss: 1.0047940015792847,grad_norm: 0.9999990843757414, iteration: 88354
loss: 1.0065414905548096,grad_norm: 0.9999998050079926, iteration: 88355
loss: 1.00140380859375,grad_norm: 0.9328808458107283, iteration: 88356
loss: 0.9740351438522339,grad_norm: 0.99999898921883, iteration: 88357
loss: 1.0260123014450073,grad_norm: 0.9999991730891753, iteration: 88358
loss: 1.0143578052520752,grad_norm: 0.9870208178849886, iteration: 88359
loss: 1.004288911819458,grad_norm: 0.9379877681827745, iteration: 88360
loss: 0.9986372590065002,grad_norm: 0.9999991133860355, iteration: 88361
loss: 1.0253567695617676,grad_norm: 0.9704739283638383, iteration: 88362
loss: 1.021836280822754,grad_norm: 0.999999316086725, iteration: 88363
loss: 0.9596114158630371,grad_norm: 0.9150777621693297, iteration: 88364
loss: 1.0136756896972656,grad_norm: 0.9999990901897181, iteration: 88365
loss: 1.0198088884353638,grad_norm: 0.999999145015701, iteration: 88366
loss: 1.085017204284668,grad_norm: 0.9999991027042333, iteration: 88367
loss: 1.0133790969848633,grad_norm: 0.9999990791266602, iteration: 88368
loss: 0.9880010485649109,grad_norm: 0.9999991818973477, iteration: 88369
loss: 0.9653899669647217,grad_norm: 0.9999992805841575, iteration: 88370
loss: 0.996317446231842,grad_norm: 0.9999991151245554, iteration: 88371
loss: 0.9865061044692993,grad_norm: 0.9999991155903186, iteration: 88372
loss: 1.0081048011779785,grad_norm: 0.9999991991364178, iteration: 88373
loss: 1.0329759120941162,grad_norm: 0.9999991417422099, iteration: 88374
loss: 0.9901058077812195,grad_norm: 0.9165133765413408, iteration: 88375
loss: 1.0294212102890015,grad_norm: 0.9999990289531276, iteration: 88376
loss: 1.0003018379211426,grad_norm: 0.9999992476540623, iteration: 88377
loss: 0.9959505200386047,grad_norm: 0.8735434477313877, iteration: 88378
loss: 1.0000722408294678,grad_norm: 0.9999992151135857, iteration: 88379
loss: 1.036955714225769,grad_norm: 0.9800599724059138, iteration: 88380
loss: 1.0150563716888428,grad_norm: 0.9999991886328603, iteration: 88381
loss: 1.0360063314437866,grad_norm: 0.9999999158599747, iteration: 88382
loss: 1.0003676414489746,grad_norm: 0.983915866180522, iteration: 88383
loss: 0.9919676780700684,grad_norm: 0.9094690211594482, iteration: 88384
loss: 1.0000300407409668,grad_norm: 0.9999991072152872, iteration: 88385
loss: 0.9835126996040344,grad_norm: 0.9999992021391896, iteration: 88386
loss: 1.006514549255371,grad_norm: 0.9999993023115697, iteration: 88387
loss: 1.0220965147018433,grad_norm: 0.999999260523756, iteration: 88388
loss: 0.9644624590873718,grad_norm: 0.9999991522810194, iteration: 88389
loss: 1.0170832872390747,grad_norm: 0.952010517240019, iteration: 88390
loss: 1.0029984712600708,grad_norm: 0.9999992508549863, iteration: 88391
loss: 0.9674851894378662,grad_norm: 0.9999990022553721, iteration: 88392
loss: 0.9989847540855408,grad_norm: 0.9999989678404414, iteration: 88393
loss: 1.0746393203735352,grad_norm: 0.9999996406650723, iteration: 88394
loss: 1.0082734823226929,grad_norm: 0.8857313095672131, iteration: 88395
loss: 0.9387009143829346,grad_norm: 0.9744236341147459, iteration: 88396
loss: 1.002145528793335,grad_norm: 0.9999990676018433, iteration: 88397
loss: 1.0492411851882935,grad_norm: 0.9999994106650436, iteration: 88398
loss: 1.0092893838882446,grad_norm: 0.9999991828539296, iteration: 88399
loss: 0.9954935908317566,grad_norm: 0.9972032193667364, iteration: 88400
loss: 1.0152901411056519,grad_norm: 0.9911553253767182, iteration: 88401
loss: 0.9877479076385498,grad_norm: 0.9197684705126214, iteration: 88402
loss: 0.9641398787498474,grad_norm: 0.9999992200745409, iteration: 88403
loss: 1.0210038423538208,grad_norm: 0.9999991392515798, iteration: 88404
loss: 1.0222989320755005,grad_norm: 0.8729751146322366, iteration: 88405
loss: 1.073300838470459,grad_norm: 0.9999994464635238, iteration: 88406
loss: 0.9540480375289917,grad_norm: 0.9999990171071702, iteration: 88407
loss: 0.9892991781234741,grad_norm: 0.9471371174930149, iteration: 88408
loss: 1.0046206712722778,grad_norm: 0.8329183781176693, iteration: 88409
loss: 0.9287656545639038,grad_norm: 0.999999105965487, iteration: 88410
loss: 1.045334815979004,grad_norm: 0.9793985026404277, iteration: 88411
loss: 1.116221308708191,grad_norm: 0.9999990902628048, iteration: 88412
loss: 1.0298088788986206,grad_norm: 0.9861516612665595, iteration: 88413
loss: 0.9972352981567383,grad_norm: 0.9999990492314094, iteration: 88414
loss: 0.9916691780090332,grad_norm: 0.9999989962029515, iteration: 88415
loss: 0.9927734732627869,grad_norm: 0.9999991069555986, iteration: 88416
loss: 1.0000371932983398,grad_norm: 0.9999990131997227, iteration: 88417
loss: 1.0284672975540161,grad_norm: 0.9999991249857925, iteration: 88418
loss: 1.0267616510391235,grad_norm: 0.9999992845357412, iteration: 88419
loss: 1.0174925327301025,grad_norm: 0.9999991211213469, iteration: 88420
loss: 1.0702831745147705,grad_norm: 0.9999991523474282, iteration: 88421
loss: 1.015324592590332,grad_norm: 0.9999991998553597, iteration: 88422
loss: 1.0186525583267212,grad_norm: 0.9999989591992261, iteration: 88423
loss: 1.0119088888168335,grad_norm: 0.9738094979778633, iteration: 88424
loss: 1.003116488456726,grad_norm: 0.9528391687777454, iteration: 88425
loss: 1.0094239711761475,grad_norm: 0.9999991060326924, iteration: 88426
loss: 1.0014729499816895,grad_norm: 0.8769036914263513, iteration: 88427
loss: 1.001753807067871,grad_norm: 0.9999992598255387, iteration: 88428
loss: 0.9905940890312195,grad_norm: 0.8679961633494634, iteration: 88429
loss: 1.01338529586792,grad_norm: 0.9692823498796841, iteration: 88430
loss: 1.0198326110839844,grad_norm: 0.9999991246448665, iteration: 88431
loss: 0.9878494143486023,grad_norm: 0.9303083388970032, iteration: 88432
loss: 0.9911415576934814,grad_norm: 0.9999989400696635, iteration: 88433
loss: 0.9972902536392212,grad_norm: 0.8840983655309772, iteration: 88434
loss: 1.0023940801620483,grad_norm: 0.9412838994753346, iteration: 88435
loss: 1.0036919116973877,grad_norm: 0.9999992886084185, iteration: 88436
loss: 0.9828377366065979,grad_norm: 0.873259088605051, iteration: 88437
loss: 0.9895262122154236,grad_norm: 0.9999989959043457, iteration: 88438
loss: 1.0142309665679932,grad_norm: 0.9999991940866699, iteration: 88439
loss: 1.0011159181594849,grad_norm: 0.9793347750972793, iteration: 88440
loss: 1.03713858127594,grad_norm: 0.999999146427431, iteration: 88441
loss: 1.0267504453659058,grad_norm: 0.9573544408476043, iteration: 88442
loss: 1.054485559463501,grad_norm: 0.9999994919138993, iteration: 88443
loss: 1.009222388267517,grad_norm: 0.9629536150813527, iteration: 88444
loss: 1.003253698348999,grad_norm: 0.9999992690909039, iteration: 88445
loss: 1.0036886930465698,grad_norm: 0.9999990535361594, iteration: 88446
loss: 1.0276163816452026,grad_norm: 0.9999992977534472, iteration: 88447
loss: 1.0187287330627441,grad_norm: 0.9853154664418846, iteration: 88448
loss: 1.015447974205017,grad_norm: 0.9999991788881815, iteration: 88449
loss: 1.0137132406234741,grad_norm: 0.9727269111827146, iteration: 88450
loss: 1.0095304250717163,grad_norm: 0.9888884799955681, iteration: 88451
loss: 0.9930267333984375,grad_norm: 0.941847346640638, iteration: 88452
loss: 0.969512939453125,grad_norm: 0.9844743323915415, iteration: 88453
loss: 0.9946728348731995,grad_norm: 0.8865066179718138, iteration: 88454
loss: 1.0159850120544434,grad_norm: 0.9999992368798194, iteration: 88455
loss: 0.9973644018173218,grad_norm: 0.8895884500565754, iteration: 88456
loss: 1.1458154916763306,grad_norm: 0.9999991947041339, iteration: 88457
loss: 1.003516674041748,grad_norm: 0.9999991299618747, iteration: 88458
loss: 1.002537727355957,grad_norm: 0.999999015732554, iteration: 88459
loss: 0.9543049335479736,grad_norm: 0.9999990936877764, iteration: 88460
loss: 0.9940406084060669,grad_norm: 0.9999989940011935, iteration: 88461
loss: 0.9989703297615051,grad_norm: 0.9999990101565832, iteration: 88462
loss: 1.0080013275146484,grad_norm: 0.9264321909939809, iteration: 88463
loss: 0.9932956695556641,grad_norm: 0.9999990359524813, iteration: 88464
loss: 1.0260725021362305,grad_norm: 0.999999304607168, iteration: 88465
loss: 0.9916722774505615,grad_norm: 0.9106493113032751, iteration: 88466
loss: 0.9612520933151245,grad_norm: 0.9999991769375798, iteration: 88467
loss: 0.970196008682251,grad_norm: 0.9999991157511118, iteration: 88468
loss: 1.033258080482483,grad_norm: 0.9999991493817442, iteration: 88469
loss: 1.0152398347854614,grad_norm: 0.9999989859517862, iteration: 88470
loss: 0.9571328163146973,grad_norm: 0.9999990678316703, iteration: 88471
loss: 1.00602126121521,grad_norm: 0.982265279536814, iteration: 88472
loss: 0.996895432472229,grad_norm: 0.9999991926739754, iteration: 88473
loss: 0.9753562211990356,grad_norm: 0.8956367828792773, iteration: 88474
loss: 0.9487618207931519,grad_norm: 0.999999102103874, iteration: 88475
loss: 0.9819778203964233,grad_norm: 0.9999991401498621, iteration: 88476
loss: 0.9621832370758057,grad_norm: 0.9804823229754087, iteration: 88477
loss: 1.016137719154358,grad_norm: 0.9999998201583884, iteration: 88478
loss: 1.0026214122772217,grad_norm: 0.9999991156338148, iteration: 88479
loss: 0.9948400855064392,grad_norm: 0.999999062759478, iteration: 88480
loss: 0.952804684638977,grad_norm: 0.999999147893295, iteration: 88481
loss: 1.0352284908294678,grad_norm: 0.9999990328068561, iteration: 88482
loss: 0.9476750493049622,grad_norm: 0.8970872615678651, iteration: 88483
loss: 0.9443262815475464,grad_norm: 0.9999989997123216, iteration: 88484
loss: 1.0320230722427368,grad_norm: 0.999999135202963, iteration: 88485
loss: 1.016241192817688,grad_norm: 0.8609057527267238, iteration: 88486
loss: 1.005223035812378,grad_norm: 0.9258463306975415, iteration: 88487
loss: 0.9845947027206421,grad_norm: 0.9999990769271538, iteration: 88488
loss: 1.0193358659744263,grad_norm: 0.9635490420922459, iteration: 88489
loss: 1.0225602388381958,grad_norm: 0.901543569918766, iteration: 88490
loss: 1.0203992128372192,grad_norm: 0.9946827161804714, iteration: 88491
loss: 0.9947574138641357,grad_norm: 0.9999991463248301, iteration: 88492
loss: 0.9822610020637512,grad_norm: 0.9999993074998363, iteration: 88493
loss: 0.9876803755760193,grad_norm: 0.9999990770129906, iteration: 88494
loss: 1.0541043281555176,grad_norm: 0.9999991029901952, iteration: 88495
loss: 1.0330471992492676,grad_norm: 0.9999989798523133, iteration: 88496
loss: 1.023173451423645,grad_norm: 0.9999991603206385, iteration: 88497
loss: 1.0244429111480713,grad_norm: 0.9999991011234326, iteration: 88498
loss: 0.9810213446617126,grad_norm: 0.9999990560420412, iteration: 88499
loss: 1.0094447135925293,grad_norm: 0.9999992105150268, iteration: 88500
loss: 1.0511120557785034,grad_norm: 0.9999993255742214, iteration: 88501
loss: 1.0124746561050415,grad_norm: 0.9806874568304599, iteration: 88502
loss: 1.0220154523849487,grad_norm: 0.9279838450703214, iteration: 88503
loss: 0.9946483373641968,grad_norm: 0.9999990187758, iteration: 88504
loss: 0.9886812567710876,grad_norm: 0.9798050730128443, iteration: 88505
loss: 0.9987944960594177,grad_norm: 0.9999991724333992, iteration: 88506
loss: 1.009827733039856,grad_norm: 0.7927252770024487, iteration: 88507
loss: 0.9679962396621704,grad_norm: 0.9999990992495136, iteration: 88508
loss: 0.9951691031455994,grad_norm: 0.8912235182136364, iteration: 88509
loss: 1.017014503479004,grad_norm: 0.9999992106702504, iteration: 88510
loss: 0.9890428781509399,grad_norm: 0.7940515975012886, iteration: 88511
loss: 0.9957464337348938,grad_norm: 0.9999991441821938, iteration: 88512
loss: 1.028494954109192,grad_norm: 0.9999991184649529, iteration: 88513
loss: 1.0008511543273926,grad_norm: 0.9238438224730806, iteration: 88514
loss: 0.972472071647644,grad_norm: 0.9999997113518075, iteration: 88515
loss: 1.0343290567398071,grad_norm: 0.9999990941037159, iteration: 88516
loss: 0.9881437420845032,grad_norm: 0.9999995226893585, iteration: 88517
loss: 0.9726858735084534,grad_norm: 0.9999991874525992, iteration: 88518
loss: 1.0140308141708374,grad_norm: 0.8768662068435102, iteration: 88519
loss: 1.0145848989486694,grad_norm: 0.9999991699503814, iteration: 88520
loss: 0.9431702494621277,grad_norm: 0.9909385365398543, iteration: 88521
loss: 1.0209554433822632,grad_norm: 0.9999991983283224, iteration: 88522
loss: 1.0182926654815674,grad_norm: 0.9999991343045354, iteration: 88523
loss: 1.0010977983474731,grad_norm: 0.969107475840062, iteration: 88524
loss: 0.9657312631607056,grad_norm: 0.8317993389264904, iteration: 88525
loss: 0.9848253130912781,grad_norm: 0.9999991406434402, iteration: 88526
loss: 1.0419256687164307,grad_norm: 0.9999991845259891, iteration: 88527
loss: 0.9860073924064636,grad_norm: 0.9258966733800348, iteration: 88528
loss: 1.0191349983215332,grad_norm: 0.9999988967222871, iteration: 88529
loss: 0.9883418679237366,grad_norm: 0.9227897313419032, iteration: 88530
loss: 0.991902232170105,grad_norm: 0.9514197263590697, iteration: 88531
loss: 0.9833656549453735,grad_norm: 0.9999990103643167, iteration: 88532
loss: 0.9816582798957825,grad_norm: 0.9999992477626785, iteration: 88533
loss: 0.9690106511116028,grad_norm: 0.9926373290559655, iteration: 88534
loss: 0.9683899283409119,grad_norm: 0.9969684165615055, iteration: 88535
loss: 1.0173311233520508,grad_norm: 0.999999043763939, iteration: 88536
loss: 0.9955068230628967,grad_norm: 0.9999991805125548, iteration: 88537
loss: 0.97023606300354,grad_norm: 0.9999989953729398, iteration: 88538
loss: 0.9879485964775085,grad_norm: 0.9375453378973857, iteration: 88539
loss: 1.0122684240341187,grad_norm: 0.9999996784069345, iteration: 88540
loss: 0.9967119097709656,grad_norm: 0.8842581568721926, iteration: 88541
loss: 0.9910503625869751,grad_norm: 0.9999990861441379, iteration: 88542
loss: 1.0200464725494385,grad_norm: 0.9999990190781013, iteration: 88543
loss: 0.9955677390098572,grad_norm: 0.9999995217985995, iteration: 88544
loss: 1.0031639337539673,grad_norm: 0.9999990819392155, iteration: 88545
loss: 1.029767394065857,grad_norm: 0.9999993856271847, iteration: 88546
loss: 0.9888767004013062,grad_norm: 0.999999054081224, iteration: 88547
loss: 0.9755656123161316,grad_norm: 0.9418145714681707, iteration: 88548
loss: 0.9703691005706787,grad_norm: 0.8716801433181761, iteration: 88549
loss: 1.019781231880188,grad_norm: 0.9999989316536634, iteration: 88550
loss: 0.9724218845367432,grad_norm: 0.8835407922983736, iteration: 88551
loss: 0.9615390300750732,grad_norm: 0.9999990002065469, iteration: 88552
loss: 1.006178379058838,grad_norm: 0.9999996382230062, iteration: 88553
loss: 1.0051507949829102,grad_norm: 0.9999991460178631, iteration: 88554
loss: 0.9751264452934265,grad_norm: 0.988113703244448, iteration: 88555
loss: 1.0216766595840454,grad_norm: 0.9483392967690563, iteration: 88556
loss: 0.9993696212768555,grad_norm: 0.9421423434454629, iteration: 88557
loss: 1.0186529159545898,grad_norm: 0.9999991991461082, iteration: 88558
loss: 1.0313215255737305,grad_norm: 0.9999997323878421, iteration: 88559
loss: 0.9779253005981445,grad_norm: 0.999999157839071, iteration: 88560
loss: 1.0220258235931396,grad_norm: 0.9999989591327709, iteration: 88561
loss: 1.0047974586486816,grad_norm: 0.9999992126953564, iteration: 88562
loss: 1.010340690612793,grad_norm: 0.999999173608421, iteration: 88563
loss: 1.0098973512649536,grad_norm: 0.9999990406119506, iteration: 88564
loss: 1.0347318649291992,grad_norm: 0.99999948431143, iteration: 88565
loss: 0.9650799632072449,grad_norm: 0.9120786757508274, iteration: 88566
loss: 1.007373332977295,grad_norm: 0.9999991841635381, iteration: 88567
loss: 1.007522702217102,grad_norm: 0.9906996668674919, iteration: 88568
loss: 1.006848931312561,grad_norm: 0.9999992980574961, iteration: 88569
loss: 0.9785894155502319,grad_norm: 0.9275563571136121, iteration: 88570
loss: 0.992892324924469,grad_norm: 0.999999266286472, iteration: 88571
loss: 1.0289326906204224,grad_norm: 0.9999991982359568, iteration: 88572
loss: 1.044387936592102,grad_norm: 0.999999505051957, iteration: 88573
loss: 0.980444610118866,grad_norm: 0.9999990521304924, iteration: 88574
loss: 0.9914072155952454,grad_norm: 0.9999990856685108, iteration: 88575
loss: 0.9979700446128845,grad_norm: 0.9999991957578016, iteration: 88576
loss: 1.0331989526748657,grad_norm: 0.9999992502501649, iteration: 88577
loss: 1.0181732177734375,grad_norm: 0.9425395479394839, iteration: 88578
loss: 0.9934969544410706,grad_norm: 0.8975423550026825, iteration: 88579
loss: 0.9972582459449768,grad_norm: 0.9999990493031006, iteration: 88580
loss: 0.9494097232818604,grad_norm: 0.9094204898910719, iteration: 88581
loss: 1.0017495155334473,grad_norm: 0.9333484828618351, iteration: 88582
loss: 1.0235347747802734,grad_norm: 0.9999992644848659, iteration: 88583
loss: 0.9765490293502808,grad_norm: 0.8590114894288418, iteration: 88584
loss: 0.9677346348762512,grad_norm: 0.9999990818877315, iteration: 88585
loss: 1.0115364789962769,grad_norm: 0.9999990176064346, iteration: 88586
loss: 1.006399154663086,grad_norm: 0.9999990888720818, iteration: 88587
loss: 0.9757670760154724,grad_norm: 0.9999992055742053, iteration: 88588
loss: 0.9915890097618103,grad_norm: 0.9999991308056863, iteration: 88589
loss: 1.0287628173828125,grad_norm: 0.9999996005147064, iteration: 88590
loss: 1.0022339820861816,grad_norm: 0.9484300625329087, iteration: 88591
loss: 1.0221914052963257,grad_norm: 0.9999991112827056, iteration: 88592
loss: 0.9781949520111084,grad_norm: 0.8922185599115311, iteration: 88593
loss: 0.991930365562439,grad_norm: 0.9999993437140008, iteration: 88594
loss: 1.063606858253479,grad_norm: 0.9578866913538605, iteration: 88595
loss: 1.0204523801803589,grad_norm: 0.9999990696965871, iteration: 88596
loss: 1.009539246559143,grad_norm: 0.9999991678069657, iteration: 88597
loss: 1.0414155721664429,grad_norm: 0.9999993197951417, iteration: 88598
loss: 0.9949629306793213,grad_norm: 0.969175133298598, iteration: 88599
loss: 1.0652092695236206,grad_norm: 0.9999992269118482, iteration: 88600
loss: 1.0191832780838013,grad_norm: 0.9999992460276247, iteration: 88601
loss: 1.0335164070129395,grad_norm: 0.9999993523753512, iteration: 88602
loss: 1.0148305892944336,grad_norm: 0.917650639136765, iteration: 88603
loss: 1.0415239334106445,grad_norm: 0.9999992193010601, iteration: 88604
loss: 1.0191960334777832,grad_norm: 0.9999992250470394, iteration: 88605
loss: 1.015373706817627,grad_norm: 0.9477969094608062, iteration: 88606
loss: 1.0543683767318726,grad_norm: 0.9503695422160083, iteration: 88607
loss: 1.0160261392593384,grad_norm: 0.9999991023168673, iteration: 88608
loss: 1.0425556898117065,grad_norm: 0.8949685625763274, iteration: 88609
loss: 1.0158123970031738,grad_norm: 0.9999990807771547, iteration: 88610
loss: 1.0054242610931396,grad_norm: 0.9999990976727764, iteration: 88611
loss: 1.0511889457702637,grad_norm: 0.9999995525375321, iteration: 88612
loss: 1.017052173614502,grad_norm: 0.9999992606945598, iteration: 88613
loss: 1.0241913795471191,grad_norm: 0.9999992340120489, iteration: 88614
loss: 1.0219956636428833,grad_norm: 0.999999287773897, iteration: 88615
loss: 1.0141693353652954,grad_norm: 0.9989777892745457, iteration: 88616
loss: 0.967673659324646,grad_norm: 0.9999991639669489, iteration: 88617
loss: 0.9964338541030884,grad_norm: 0.9999991705013134, iteration: 88618
loss: 1.0148694515228271,grad_norm: 0.9999991162572424, iteration: 88619
loss: 0.9860997200012207,grad_norm: 0.9999991435158447, iteration: 88620
loss: 1.0200409889221191,grad_norm: 0.9999992702841358, iteration: 88621
loss: 1.0255396366119385,grad_norm: 0.9999991775153034, iteration: 88622
loss: 0.9975628852844238,grad_norm: 0.9999991359524639, iteration: 88623
loss: 0.9819768071174622,grad_norm: 0.9999991305676286, iteration: 88624
loss: 1.0098376274108887,grad_norm: 0.9999991108927186, iteration: 88625
loss: 0.9987761974334717,grad_norm: 0.9999990532859505, iteration: 88626
loss: 1.0100314617156982,grad_norm: 0.9999990928421, iteration: 88627
loss: 1.0281027555465698,grad_norm: 0.8917184133482616, iteration: 88628
loss: 0.9880090355873108,grad_norm: 0.9999990786368023, iteration: 88629
loss: 1.0103459358215332,grad_norm: 0.999998986515633, iteration: 88630
loss: 0.9727185964584351,grad_norm: 0.9999989882262792, iteration: 88631
loss: 1.005713939666748,grad_norm: 0.9999991869466125, iteration: 88632
loss: 1.0561318397521973,grad_norm: 0.9999996880434427, iteration: 88633
loss: 1.0127601623535156,grad_norm: 0.8962401448544105, iteration: 88634
loss: 1.0104221105575562,grad_norm: 0.9558281840596483, iteration: 88635
loss: 0.9994113445281982,grad_norm: 0.9999989439781385, iteration: 88636
loss: 0.9903815388679504,grad_norm: 0.9664855186808851, iteration: 88637
loss: 1.0079050064086914,grad_norm: 0.9999993969496025, iteration: 88638
loss: 0.997958779335022,grad_norm: 0.8808174751008062, iteration: 88639
loss: 0.998874306678772,grad_norm: 0.999998982748818, iteration: 88640
loss: 0.977284848690033,grad_norm: 0.9999991872198871, iteration: 88641
loss: 0.9542966485023499,grad_norm: 0.9999990870749104, iteration: 88642
loss: 0.9605294466018677,grad_norm: 0.9609421146121306, iteration: 88643
loss: 0.9994991421699524,grad_norm: 0.9999991761118657, iteration: 88644
loss: 1.0098659992218018,grad_norm: 0.9329166332599769, iteration: 88645
loss: 0.994505763053894,grad_norm: 0.9999991337908146, iteration: 88646
loss: 0.9920876622200012,grad_norm: 0.9999993182622139, iteration: 88647
loss: 1.023877501487732,grad_norm: 0.9999992011999045, iteration: 88648
loss: 1.021072506904602,grad_norm: 0.9999993312317537, iteration: 88649
loss: 1.0088835954666138,grad_norm: 0.9999990717537218, iteration: 88650
loss: 0.9918392300605774,grad_norm: 0.9999991165438776, iteration: 88651
loss: 1.038009762763977,grad_norm: 0.9349629124753754, iteration: 88652
loss: 1.0185530185699463,grad_norm: 0.9999989675864315, iteration: 88653
loss: 0.9595394134521484,grad_norm: 0.9999991057866064, iteration: 88654
loss: 1.0306259393692017,grad_norm: 0.9999991117476548, iteration: 88655
loss: 1.019026756286621,grad_norm: 0.9999989534484334, iteration: 88656
loss: 1.0273171663284302,grad_norm: 0.999999329442203, iteration: 88657
loss: 0.9787417054176331,grad_norm: 0.9999990789239864, iteration: 88658
loss: 1.0431749820709229,grad_norm: 0.9999990755228948, iteration: 88659
loss: 1.0182043313980103,grad_norm: 0.935123345392462, iteration: 88660
loss: 1.0339834690093994,grad_norm: 0.9999992699739674, iteration: 88661
loss: 1.0085980892181396,grad_norm: 0.9999992008937747, iteration: 88662
loss: 0.9867781400680542,grad_norm: 0.9999990424285163, iteration: 88663
loss: 1.0192192792892456,grad_norm: 0.9999997882118551, iteration: 88664
loss: 1.0129753351211548,grad_norm: 0.9999991439701147, iteration: 88665
loss: 0.9961754083633423,grad_norm: 0.9999990850421733, iteration: 88666
loss: 1.0203721523284912,grad_norm: 0.9999992196914483, iteration: 88667
loss: 0.957126796245575,grad_norm: 0.9999992661666223, iteration: 88668
loss: 0.984175443649292,grad_norm: 0.9999991286215534, iteration: 88669
loss: 1.022711157798767,grad_norm: 0.9999993472443542, iteration: 88670
loss: 0.9995458722114563,grad_norm: 0.8103095990847375, iteration: 88671
loss: 0.9973788261413574,grad_norm: 0.936151225416385, iteration: 88672
loss: 1.0007860660552979,grad_norm: 0.999999016376864, iteration: 88673
loss: 1.009664535522461,grad_norm: 0.9931677422412274, iteration: 88674
loss: 1.0138849020004272,grad_norm: 0.999999317411907, iteration: 88675
loss: 1.0055007934570312,grad_norm: 0.9999990240401001, iteration: 88676
loss: 1.0185848474502563,grad_norm: 0.9999990474562022, iteration: 88677
loss: 1.0035139322280884,grad_norm: 0.9999990637522087, iteration: 88678
loss: 0.9782841205596924,grad_norm: 0.9076551349695618, iteration: 88679
loss: 0.9839630126953125,grad_norm: 0.9999990377582577, iteration: 88680
loss: 1.0149495601654053,grad_norm: 0.9999989779181362, iteration: 88681
loss: 0.992020308971405,grad_norm: 0.9999992240750064, iteration: 88682
loss: 0.9924651980400085,grad_norm: 0.951542808357835, iteration: 88683
loss: 1.0057895183563232,grad_norm: 0.9392841030354343, iteration: 88684
loss: 0.9952402114868164,grad_norm: 0.9999990358426594, iteration: 88685
loss: 1.0005168914794922,grad_norm: 0.8859424496122571, iteration: 88686
loss: 0.9980583190917969,grad_norm: 0.9999992329218961, iteration: 88687
loss: 0.9958803057670593,grad_norm: 0.9650772642889618, iteration: 88688
loss: 1.0364748239517212,grad_norm: 0.9999991711321485, iteration: 88689
loss: 0.9831370711326599,grad_norm: 0.9302216864543863, iteration: 88690
loss: 1.0014041662216187,grad_norm: 0.9999992237538468, iteration: 88691
loss: 1.0229741334915161,grad_norm: 0.9999990125065006, iteration: 88692
loss: 1.0189155340194702,grad_norm: 0.8847346969545063, iteration: 88693
loss: 1.0014692544937134,grad_norm: 0.9999990237825098, iteration: 88694
loss: 1.0007718801498413,grad_norm: 0.9999990503399849, iteration: 88695
loss: 1.036087989807129,grad_norm: 0.9999994640404949, iteration: 88696
loss: 0.9967507719993591,grad_norm: 0.999999057237571, iteration: 88697
loss: 0.9641157984733582,grad_norm: 0.9999991447858049, iteration: 88698
loss: 0.9679561853408813,grad_norm: 0.9999991950280163, iteration: 88699
loss: 0.987733006477356,grad_norm: 0.8860839879962323, iteration: 88700
loss: 0.9996991157531738,grad_norm: 0.999999102637537, iteration: 88701
loss: 1.0093735456466675,grad_norm: 0.9999990768807825, iteration: 88702
loss: 1.00690758228302,grad_norm: 0.9999991278548609, iteration: 88703
loss: 0.990132212638855,grad_norm: 0.9999991944402213, iteration: 88704
loss: 1.003919243812561,grad_norm: 0.9999989502812914, iteration: 88705
loss: 1.0087368488311768,grad_norm: 0.9999992266823887, iteration: 88706
loss: 0.9880082607269287,grad_norm: 0.9999991113873289, iteration: 88707
loss: 0.9723660349845886,grad_norm: 0.9304793574216388, iteration: 88708
loss: 1.0166387557983398,grad_norm: 0.9264890174856544, iteration: 88709
loss: 0.9524061679840088,grad_norm: 0.9999991939225059, iteration: 88710
loss: 0.9819308519363403,grad_norm: 0.9669322162655037, iteration: 88711
loss: 1.018585205078125,grad_norm: 0.999999187169338, iteration: 88712
loss: 0.9915915727615356,grad_norm: 0.9999991543810106, iteration: 88713
loss: 0.9896674752235413,grad_norm: 0.9999991971006559, iteration: 88714
loss: 1.0100326538085938,grad_norm: 0.999999112402868, iteration: 88715
loss: 0.969811737537384,grad_norm: 0.9999991447069659, iteration: 88716
loss: 0.974504292011261,grad_norm: 0.8411778686239421, iteration: 88717
loss: 1.0048491954803467,grad_norm: 0.9999991047031981, iteration: 88718
loss: 0.972093939781189,grad_norm: 0.9653977204620181, iteration: 88719
loss: 1.0062462091445923,grad_norm: 0.9999992248146532, iteration: 88720
loss: 1.0002351999282837,grad_norm: 0.9901448955728291, iteration: 88721
loss: 0.9913132190704346,grad_norm: 0.8897854225801878, iteration: 88722
loss: 1.0027028322219849,grad_norm: 0.9999991985524556, iteration: 88723
loss: 0.9994423985481262,grad_norm: 0.9999991209599408, iteration: 88724
loss: 0.9857645034790039,grad_norm: 0.9999991159200357, iteration: 88725
loss: 1.0252383947372437,grad_norm: 0.9999991350752061, iteration: 88726
loss: 0.996033251285553,grad_norm: 0.9999992594025584, iteration: 88727
loss: 0.9971647262573242,grad_norm: 0.9999990612441979, iteration: 88728
loss: 1.0283136367797852,grad_norm: 0.9999992057612324, iteration: 88729
loss: 1.0341534614562988,grad_norm: 0.999999068865198, iteration: 88730
loss: 0.9464580416679382,grad_norm: 0.951440503340641, iteration: 88731
loss: 1.0085341930389404,grad_norm: 0.9999991089189278, iteration: 88732
loss: 1.030949354171753,grad_norm: 0.9415355919794092, iteration: 88733
loss: 1.033730149269104,grad_norm: 0.9883573872879476, iteration: 88734
loss: 1.004784107208252,grad_norm: 0.8547765291502983, iteration: 88735
loss: 0.9906607866287231,grad_norm: 0.8826928196778574, iteration: 88736
loss: 1.0190893411636353,grad_norm: 0.99999898622046, iteration: 88737
loss: 1.019897699356079,grad_norm: 0.9999990916617228, iteration: 88738
loss: 1.0140315294265747,grad_norm: 0.946837723166698, iteration: 88739
loss: 0.9626988172531128,grad_norm: 0.9694807061284387, iteration: 88740
loss: 1.0119967460632324,grad_norm: 0.9999989407339148, iteration: 88741
loss: 0.9887397289276123,grad_norm: 0.9999991567835823, iteration: 88742
loss: 0.9521611332893372,grad_norm: 0.9999991350613227, iteration: 88743
loss: 0.9880905747413635,grad_norm: 0.9999990880976316, iteration: 88744
loss: 1.0196138620376587,grad_norm: 0.9999992579349269, iteration: 88745
loss: 1.0128076076507568,grad_norm: 0.9284492032823972, iteration: 88746
loss: 1.0016950368881226,grad_norm: 0.9999992744142533, iteration: 88747
loss: 1.010270118713379,grad_norm: 0.9999990846692444, iteration: 88748
loss: 1.022507905960083,grad_norm: 0.9999991010578587, iteration: 88749
loss: 0.9631814360618591,grad_norm: 0.9999990329573345, iteration: 88750
loss: 1.016878604888916,grad_norm: 0.9999991375881804, iteration: 88751
loss: 0.9755045175552368,grad_norm: 0.970996342368611, iteration: 88752
loss: 0.9846093654632568,grad_norm: 0.9999990508298514, iteration: 88753
loss: 1.0106655359268188,grad_norm: 0.9999990876677225, iteration: 88754
loss: 0.9697939157485962,grad_norm: 0.9895906496284136, iteration: 88755
loss: 1.0080633163452148,grad_norm: 0.9999990409767896, iteration: 88756
loss: 1.032354712486267,grad_norm: 0.9783537760642247, iteration: 88757
loss: 0.9703476428985596,grad_norm: 0.9999990481028747, iteration: 88758
loss: 1.0056313276290894,grad_norm: 0.9999992626907731, iteration: 88759
loss: 1.0066441297531128,grad_norm: 0.9999992069193975, iteration: 88760
loss: 1.0072686672210693,grad_norm: 0.9999992330096298, iteration: 88761
loss: 1.0244474411010742,grad_norm: 0.9192833776460759, iteration: 88762
loss: 1.0274039506912231,grad_norm: 0.9631680310937077, iteration: 88763
loss: 0.9933199882507324,grad_norm: 0.8511055275955374, iteration: 88764
loss: 0.979983389377594,grad_norm: 0.9999992781910433, iteration: 88765
loss: 0.9977593421936035,grad_norm: 0.9999989592009332, iteration: 88766
loss: 1.0327644348144531,grad_norm: 0.9288255912490492, iteration: 88767
loss: 1.0417189598083496,grad_norm: 0.9353939162795921, iteration: 88768
loss: 0.9995613694190979,grad_norm: 0.9659949569439946, iteration: 88769
loss: 1.0269218683242798,grad_norm: 0.999999153975867, iteration: 88770
loss: 1.0281400680541992,grad_norm: 0.999999064342294, iteration: 88771
loss: 1.0284138917922974,grad_norm: 0.996330753412984, iteration: 88772
loss: 1.0495052337646484,grad_norm: 0.9999990505626472, iteration: 88773
loss: 0.9961613416671753,grad_norm: 0.9999991994169531, iteration: 88774
loss: 1.0160971879959106,grad_norm: 0.9714633308371322, iteration: 88775
loss: 1.0040322542190552,grad_norm: 0.9360122115460058, iteration: 88776
loss: 0.9916394948959351,grad_norm: 0.9999991730418045, iteration: 88777
loss: 1.0138921737670898,grad_norm: 0.9999992049427302, iteration: 88778
loss: 1.0002903938293457,grad_norm: 0.9999989710846354, iteration: 88779
loss: 0.9823155403137207,grad_norm: 0.999998976148673, iteration: 88780
loss: 1.0051343441009521,grad_norm: 0.9999990783740441, iteration: 88781
loss: 0.9952780604362488,grad_norm: 0.9497838889911437, iteration: 88782
loss: 0.989952564239502,grad_norm: 0.9999989638259424, iteration: 88783
loss: 0.9950905442237854,grad_norm: 0.9999991683186463, iteration: 88784
loss: 1.0114506483078003,grad_norm: 0.9037778052878439, iteration: 88785
loss: 1.0144734382629395,grad_norm: 0.9730943833732486, iteration: 88786
loss: 0.989528477191925,grad_norm: 0.8252387831946468, iteration: 88787
loss: 0.9761002659797668,grad_norm: 0.9999991279534702, iteration: 88788
loss: 1.0207736492156982,grad_norm: 0.9600266164861828, iteration: 88789
loss: 1.0160547494888306,grad_norm: 0.9762958043457554, iteration: 88790
loss: 1.0005688667297363,grad_norm: 0.8896584095243143, iteration: 88791
loss: 0.9760028719902039,grad_norm: 0.9786868630646154, iteration: 88792
loss: 0.9976292848587036,grad_norm: 0.9999990522976184, iteration: 88793
loss: 0.9607253670692444,grad_norm: 0.970098407483211, iteration: 88794
loss: 0.9827471375465393,grad_norm: 0.9000701869690707, iteration: 88795
loss: 0.9794296026229858,grad_norm: 0.9999991269562365, iteration: 88796
loss: 0.9862074851989746,grad_norm: 0.9999989736115059, iteration: 88797
loss: 0.9835571050643921,grad_norm: 0.9999991096086374, iteration: 88798
loss: 1.0169165134429932,grad_norm: 0.9999992475397724, iteration: 88799
loss: 0.990425705909729,grad_norm: 0.9528962312166306, iteration: 88800
loss: 0.9623615145683289,grad_norm: 0.9999992164088006, iteration: 88801
loss: 0.9869722127914429,grad_norm: 0.9999990674942957, iteration: 88802
loss: 0.9718741178512573,grad_norm: 0.9999991481430655, iteration: 88803
loss: 1.024994969367981,grad_norm: 0.9999991166817794, iteration: 88804
loss: 0.9686625003814697,grad_norm: 0.8864465789846524, iteration: 88805
loss: 0.994473934173584,grad_norm: 0.8927243537803606, iteration: 88806
loss: 0.9612246751785278,grad_norm: 0.9836121201402174, iteration: 88807
loss: 1.0050803422927856,grad_norm: 0.9999999415771215, iteration: 88808
loss: 0.9927348494529724,grad_norm: 0.8291773537258981, iteration: 88809
loss: 0.9483146071434021,grad_norm: 0.9999990295816141, iteration: 88810
loss: 1.0088582038879395,grad_norm: 0.9634608145991062, iteration: 88811
loss: 0.9595232605934143,grad_norm: 0.9750266230141796, iteration: 88812
loss: 1.008799433708191,grad_norm: 0.9999990535777032, iteration: 88813
loss: 1.0007063150405884,grad_norm: 0.9999991967556378, iteration: 88814
loss: 1.035679578781128,grad_norm: 0.9999992211128709, iteration: 88815
loss: 1.042214035987854,grad_norm: 0.9999990419564002, iteration: 88816
loss: 1.0120307207107544,grad_norm: 0.999998971308336, iteration: 88817
loss: 0.9676263332366943,grad_norm: 0.9999992438377059, iteration: 88818
loss: 1.007484793663025,grad_norm: 0.9999989467709662, iteration: 88819
loss: 0.9873201847076416,grad_norm: 0.9203877115367004, iteration: 88820
loss: 0.9999796748161316,grad_norm: 0.9949398680809848, iteration: 88821
loss: 0.9525825381278992,grad_norm: 0.9188767757795968, iteration: 88822
loss: 1.0154122114181519,grad_norm: 0.95512148115647, iteration: 88823
loss: 1.0484012365341187,grad_norm: 0.9999991426907494, iteration: 88824
loss: 1.0005230903625488,grad_norm: 0.9999995425943086, iteration: 88825
loss: 1.0091931819915771,grad_norm: 0.880400517974514, iteration: 88826
loss: 1.0054019689559937,grad_norm: 0.9999990632829636, iteration: 88827
loss: 1.014489769935608,grad_norm: 0.999999136744662, iteration: 88828
loss: 0.9938012361526489,grad_norm: 0.8780780075121118, iteration: 88829
loss: 1.0040031671524048,grad_norm: 0.901540598001014, iteration: 88830
loss: 1.0253784656524658,grad_norm: 0.9999991149311029, iteration: 88831
loss: 1.027612328529358,grad_norm: 0.9999990352836173, iteration: 88832
loss: 0.9939697980880737,grad_norm: 0.9999989900048412, iteration: 88833
loss: 0.9774015545845032,grad_norm: 0.9999992839445127, iteration: 88834
loss: 0.9759016633033752,grad_norm: 0.9451688879900496, iteration: 88835
loss: 0.984472393989563,grad_norm: 0.9149710265668433, iteration: 88836
loss: 1.0284727811813354,grad_norm: 0.9999991661574149, iteration: 88837
loss: 0.9854061603546143,grad_norm: 0.999999123313118, iteration: 88838
loss: 1.0106960535049438,grad_norm: 0.9999990789488011, iteration: 88839
loss: 0.9519060254096985,grad_norm: 0.9999990261732158, iteration: 88840
loss: 0.9956788420677185,grad_norm: 0.9999990344305667, iteration: 88841
loss: 1.0570192337036133,grad_norm: 0.9999990339315229, iteration: 88842
loss: 0.9923564195632935,grad_norm: 0.9999993744707415, iteration: 88843
loss: 1.0160640478134155,grad_norm: 0.9999992056882943, iteration: 88844
loss: 0.9947394728660583,grad_norm: 0.9819553998274388, iteration: 88845
loss: 0.9713464975357056,grad_norm: 0.9999990461053168, iteration: 88846
loss: 1.011175274848938,grad_norm: 0.9999992725750705, iteration: 88847
loss: 0.9837830662727356,grad_norm: 0.9999991172435706, iteration: 88848
loss: 0.9980823397636414,grad_norm: 0.9999990172777877, iteration: 88849
loss: 1.0298014879226685,grad_norm: 0.8765753744999243, iteration: 88850
loss: 1.013184905052185,grad_norm: 0.9999991264146437, iteration: 88851
loss: 0.9846341013908386,grad_norm: 0.9999991234486476, iteration: 88852
loss: 1.026465892791748,grad_norm: 0.9384128066696769, iteration: 88853
loss: 1.01137375831604,grad_norm: 0.7767167529217568, iteration: 88854
loss: 0.9880450963973999,grad_norm: 0.79931515733377, iteration: 88855
loss: 1.0203336477279663,grad_norm: 0.9877521725563495, iteration: 88856
loss: 0.996974527835846,grad_norm: 0.9999990679952679, iteration: 88857
loss: 0.9895633459091187,grad_norm: 0.9477036925700669, iteration: 88858
loss: 1.030646562576294,grad_norm: 0.9224662687863265, iteration: 88859
loss: 0.9900579452514648,grad_norm: 0.873783710633511, iteration: 88860
loss: 0.9969763159751892,grad_norm: 0.9982977181463083, iteration: 88861
loss: 1.0309902429580688,grad_norm: 0.8676581935685092, iteration: 88862
loss: 1.0468060970306396,grad_norm: 0.999999079568075, iteration: 88863
loss: 1.022611379623413,grad_norm: 0.9999991120306483, iteration: 88864
loss: 0.9738826751708984,grad_norm: 0.9670264980452973, iteration: 88865
loss: 0.9832367300987244,grad_norm: 0.9999991548650821, iteration: 88866
loss: 1.0515849590301514,grad_norm: 0.9999993993289436, iteration: 88867
loss: 0.9651329517364502,grad_norm: 0.9999991677024103, iteration: 88868
loss: 1.0340030193328857,grad_norm: 0.9342578951649297, iteration: 88869
loss: 0.9670885801315308,grad_norm: 0.9999990783486479, iteration: 88870
loss: 0.9619276523590088,grad_norm: 0.9999991610086817, iteration: 88871
loss: 0.9810205698013306,grad_norm: 0.9999989822862944, iteration: 88872
loss: 0.9530776739120483,grad_norm: 0.9999991183939666, iteration: 88873
loss: 1.0309653282165527,grad_norm: 0.999999236681435, iteration: 88874
loss: 0.9666386246681213,grad_norm: 0.9999991111309795, iteration: 88875
loss: 0.9650374054908752,grad_norm: 0.9999990573587862, iteration: 88876
loss: 0.9879912734031677,grad_norm: 0.909932077617588, iteration: 88877
loss: 1.0091301202774048,grad_norm: 0.9999994044448571, iteration: 88878
loss: 0.9689933061599731,grad_norm: 0.999999179001561, iteration: 88879
loss: 0.9845603704452515,grad_norm: 0.9999992588412829, iteration: 88880
loss: 1.0172021389007568,grad_norm: 0.9999990311005988, iteration: 88881
loss: 0.9958853721618652,grad_norm: 0.9999991661239527, iteration: 88882
loss: 1.0081943273544312,grad_norm: 0.9620171970867639, iteration: 88883
loss: 0.9847394227981567,grad_norm: 0.9920303498616049, iteration: 88884
loss: 1.006122350692749,grad_norm: 0.8350738616564316, iteration: 88885
loss: 0.9878996014595032,grad_norm: 0.9999991431495261, iteration: 88886
loss: 1.0004899501800537,grad_norm: 0.9999991873817266, iteration: 88887
loss: 1.0192688703536987,grad_norm: 0.8933371200610725, iteration: 88888
loss: 1.0176417827606201,grad_norm: 0.9999990406505045, iteration: 88889
loss: 0.9839102029800415,grad_norm: 0.9999990625812429, iteration: 88890
loss: 0.994600772857666,grad_norm: 0.8622367153578036, iteration: 88891
loss: 1.0167990922927856,grad_norm: 0.9999990729918476, iteration: 88892
loss: 1.0013340711593628,grad_norm: 0.9999990225823701, iteration: 88893
loss: 0.9989880323410034,grad_norm: 0.9040193403382291, iteration: 88894
loss: 1.0000544786453247,grad_norm: 0.9999992922255196, iteration: 88895
loss: 0.9978445172309875,grad_norm: 0.9999990907119847, iteration: 88896
loss: 0.9902040958404541,grad_norm: 0.9999992473609068, iteration: 88897
loss: 1.0258597135543823,grad_norm: 0.9390848627959487, iteration: 88898
loss: 1.0311174392700195,grad_norm: 0.9999991792255183, iteration: 88899
loss: 0.9860115051269531,grad_norm: 0.9999990899905795, iteration: 88900
loss: 0.9977660179138184,grad_norm: 0.999999110705587, iteration: 88901
loss: 1.02333402633667,grad_norm: 0.9691208205987389, iteration: 88902
loss: 0.9821692705154419,grad_norm: 0.992740704382125, iteration: 88903
loss: 0.9711373448371887,grad_norm: 0.9999991815158346, iteration: 88904
loss: 0.978915810585022,grad_norm: 0.9321158266504416, iteration: 88905
loss: 1.0399094820022583,grad_norm: 0.9999992996255948, iteration: 88906
loss: 0.9771395921707153,grad_norm: 0.9999990537911153, iteration: 88907
loss: 1.0028166770935059,grad_norm: 0.9999991046541308, iteration: 88908
loss: 0.9909836053848267,grad_norm: 0.9660568858701283, iteration: 88909
loss: 1.0279539823532104,grad_norm: 0.9405807994230567, iteration: 88910
loss: 0.9475474953651428,grad_norm: 0.9999992033445003, iteration: 88911
loss: 1.0073730945587158,grad_norm: 0.999999250561569, iteration: 88912
loss: 0.9787061214447021,grad_norm: 0.9999990551844702, iteration: 88913
loss: 0.977063775062561,grad_norm: 0.9999990672173507, iteration: 88914
loss: 1.032679796218872,grad_norm: 0.9999990545370321, iteration: 88915
loss: 0.9853127002716064,grad_norm: 0.9999991464068888, iteration: 88916
loss: 1.0052881240844727,grad_norm: 0.9999993042744407, iteration: 88917
loss: 1.0451672077178955,grad_norm: 0.9999994295507723, iteration: 88918
loss: 0.997763454914093,grad_norm: 0.9441587394673051, iteration: 88919
loss: 1.0104771852493286,grad_norm: 0.9999992509363723, iteration: 88920
loss: 1.0264850854873657,grad_norm: 0.9999993086928622, iteration: 88921
loss: 0.9949612021446228,grad_norm: 0.9999990016456866, iteration: 88922
loss: 1.014958381652832,grad_norm: 0.9189614227296813, iteration: 88923
loss: 0.9984299540519714,grad_norm: 0.9809046888242071, iteration: 88924
loss: 1.0644453763961792,grad_norm: 0.9999991662421601, iteration: 88925
loss: 0.9836523532867432,grad_norm: 0.9999991305990773, iteration: 88926
loss: 0.9902482628822327,grad_norm: 0.9468039856211387, iteration: 88927
loss: 1.000349521636963,grad_norm: 0.9999990997230765, iteration: 88928
loss: 0.9775750041007996,grad_norm: 0.9728788262135387, iteration: 88929
loss: 0.9937082529067993,grad_norm: 0.9999991433203674, iteration: 88930
loss: 1.0218627452850342,grad_norm: 0.9999990523003013, iteration: 88931
loss: 1.0104531049728394,grad_norm: 0.9999992459693556, iteration: 88932
loss: 0.9893254041671753,grad_norm: 0.9999991327154811, iteration: 88933
loss: 0.9798980951309204,grad_norm: 0.9999992230461605, iteration: 88934
loss: 1.0464341640472412,grad_norm: 0.9999991383542614, iteration: 88935
loss: 1.0118986368179321,grad_norm: 0.999999236635254, iteration: 88936
loss: 0.9828574657440186,grad_norm: 0.9999989957567611, iteration: 88937
loss: 1.0049227476119995,grad_norm: 0.9364446146873001, iteration: 88938
loss: 1.0356096029281616,grad_norm: 0.9552902892127176, iteration: 88939
loss: 1.0211782455444336,grad_norm: 0.9999992817123423, iteration: 88940
loss: 0.9722108244895935,grad_norm: 0.9999996554388426, iteration: 88941
loss: 0.9745791554450989,grad_norm: 0.9999991048038318, iteration: 88942
loss: 0.9929171800613403,grad_norm: 0.988058456077301, iteration: 88943
loss: 1.0205209255218506,grad_norm: 0.9999990540064361, iteration: 88944
loss: 0.9701150059700012,grad_norm: 0.9999992454656947, iteration: 88945
loss: 1.007312536239624,grad_norm: 0.9999991509199971, iteration: 88946
loss: 1.1363282203674316,grad_norm: 0.9999996504664881, iteration: 88947
loss: 0.9720370769500732,grad_norm: 0.978517097340566, iteration: 88948
loss: 0.9834315180778503,grad_norm: 0.9565134588677492, iteration: 88949
loss: 1.0136229991912842,grad_norm: 0.9127821126943566, iteration: 88950
loss: 1.0046348571777344,grad_norm: 0.9999990395389577, iteration: 88951
loss: 0.9779713153839111,grad_norm: 0.9999991298209702, iteration: 88952
loss: 0.9995219111442566,grad_norm: 0.9101506657328349, iteration: 88953
loss: 1.0118972063064575,grad_norm: 0.9315392989692302, iteration: 88954
loss: 0.9844810366630554,grad_norm: 0.9999991992393641, iteration: 88955
loss: 1.000373125076294,grad_norm: 0.9999995301701278, iteration: 88956
loss: 0.9868718385696411,grad_norm: 0.9925579987597049, iteration: 88957
loss: 1.0216526985168457,grad_norm: 0.9999992544009381, iteration: 88958
loss: 1.0481171607971191,grad_norm: 0.9895069437867123, iteration: 88959
loss: 0.9946341514587402,grad_norm: 0.9999992218639161, iteration: 88960
loss: 0.9640514254570007,grad_norm: 0.871873454647893, iteration: 88961
loss: 1.0018196105957031,grad_norm: 0.9999991711067452, iteration: 88962
loss: 1.0233948230743408,grad_norm: 0.9999993705392174, iteration: 88963
loss: 1.0159260034561157,grad_norm: 0.9999990912889924, iteration: 88964
loss: 1.0334805250167847,grad_norm: 0.9999990705475083, iteration: 88965
loss: 1.0063139200210571,grad_norm: 0.9999999596442397, iteration: 88966
loss: 1.009075403213501,grad_norm: 0.9218349608592782, iteration: 88967
loss: 1.006871223449707,grad_norm: 0.9959634222964732, iteration: 88968
loss: 0.990614116191864,grad_norm: 0.9999992758282708, iteration: 88969
loss: 0.9924030303955078,grad_norm: 0.99999919231981, iteration: 88970
loss: 0.9734516143798828,grad_norm: 0.9210850330893168, iteration: 88971
loss: 1.0123844146728516,grad_norm: 0.9999991856503626, iteration: 88972
loss: 0.9698632955551147,grad_norm: 0.9434230214934388, iteration: 88973
loss: 0.9977545142173767,grad_norm: 0.9999992120130908, iteration: 88974
loss: 0.9419118762016296,grad_norm: 0.9999992291812931, iteration: 88975
loss: 0.9674590826034546,grad_norm: 0.999999316898219, iteration: 88976
loss: 1.003096580505371,grad_norm: 0.9999990521095704, iteration: 88977
loss: 1.0132319927215576,grad_norm: 0.9999992032650528, iteration: 88978
loss: 1.006538987159729,grad_norm: 0.9999992383837429, iteration: 88979
loss: 1.0002236366271973,grad_norm: 0.9999990803018025, iteration: 88980
loss: 1.0443204641342163,grad_norm: 1.00000000727558, iteration: 88981
loss: 1.0615266561508179,grad_norm: 0.9999992357144598, iteration: 88982
loss: 0.9767149090766907,grad_norm: 0.9703410714386235, iteration: 88983
loss: 1.02973473072052,grad_norm: 0.9999992034274957, iteration: 88984
loss: 1.0166034698486328,grad_norm: 0.9999991694328413, iteration: 88985
loss: 0.9953567981719971,grad_norm: 0.9999996321639416, iteration: 88986
loss: 1.0318125486373901,grad_norm: 0.9999994402307485, iteration: 88987
loss: 0.9966116547584534,grad_norm: 0.9999990891933863, iteration: 88988
loss: 0.9710490107536316,grad_norm: 0.9097139499932676, iteration: 88989
loss: 0.9804171919822693,grad_norm: 0.9200591272595628, iteration: 88990
loss: 1.0517332553863525,grad_norm: 0.9999994232418702, iteration: 88991
loss: 0.9579211473464966,grad_norm: 0.9999991312066533, iteration: 88992
loss: 1.0470327138900757,grad_norm: 0.9999991521972432, iteration: 88993
loss: 1.014953851699829,grad_norm: 0.9999992438519858, iteration: 88994
loss: 0.981172502040863,grad_norm: 0.9999989993486171, iteration: 88995
loss: 0.9840167164802551,grad_norm: 0.9999993225153975, iteration: 88996
loss: 1.034009337425232,grad_norm: 0.9999990796739642, iteration: 88997
loss: 0.970801055431366,grad_norm: 0.9999992811116313, iteration: 88998
loss: 1.0548808574676514,grad_norm: 0.9999990170204108, iteration: 88999
loss: 1.0247262716293335,grad_norm: 0.9999990710546188, iteration: 89000
loss: 1.040858268737793,grad_norm: 0.9170038119463448, iteration: 89001
loss: 0.9816005229949951,grad_norm: 0.9999996972466356, iteration: 89002
loss: 0.9834780693054199,grad_norm: 0.9999994985173307, iteration: 89003
loss: 1.077389121055603,grad_norm: 0.9999997458076569, iteration: 89004
loss: 0.9997755289077759,grad_norm: 0.9999990955726957, iteration: 89005
loss: 1.010352373123169,grad_norm: 0.9999992152624008, iteration: 89006
loss: 1.0532054901123047,grad_norm: 0.9999993020035459, iteration: 89007
loss: 1.015007495880127,grad_norm: 0.9999992478758142, iteration: 89008
loss: 0.9815012216567993,grad_norm: 0.9999991634338282, iteration: 89009
loss: 0.977940022945404,grad_norm: 0.9999990247819487, iteration: 89010
loss: 0.9724803566932678,grad_norm: 0.9999991546441229, iteration: 89011
loss: 1.0124847888946533,grad_norm: 0.9999991668304312, iteration: 89012
loss: 1.0239074230194092,grad_norm: 0.9999991049207871, iteration: 89013
loss: 0.9739969968795776,grad_norm: 0.9999991786368956, iteration: 89014
loss: 1.0244337320327759,grad_norm: 0.9999990714790764, iteration: 89015
loss: 0.9986643195152283,grad_norm: 0.9999992740548604, iteration: 89016
loss: 0.9898017048835754,grad_norm: 0.9999993701705766, iteration: 89017
loss: 0.9604475498199463,grad_norm: 0.9717297043734003, iteration: 89018
loss: 0.9786843657493591,grad_norm: 0.9895983669277287, iteration: 89019
loss: 1.0216479301452637,grad_norm: 0.985023927653364, iteration: 89020
loss: 1.0184893608093262,grad_norm: 0.849309815976527, iteration: 89021
loss: 1.0392332077026367,grad_norm: 0.9999990980010507, iteration: 89022
loss: 1.0149022340774536,grad_norm: 0.9683358547140224, iteration: 89023
loss: 1.0146044492721558,grad_norm: 0.9999990786254859, iteration: 89024
loss: 1.066746473312378,grad_norm: 1.0000000570713186, iteration: 89025
loss: 0.9951143264770508,grad_norm: 0.9999991183685331, iteration: 89026
loss: 1.0282676219940186,grad_norm: 0.8736575301786976, iteration: 89027
loss: 1.0163781642913818,grad_norm: 0.9999993331350376, iteration: 89028
loss: 0.986611545085907,grad_norm: 0.9134142963283329, iteration: 89029
loss: 1.0113840103149414,grad_norm: 0.9999992824478495, iteration: 89030
loss: 1.0182596445083618,grad_norm: 0.9999991802104047, iteration: 89031
loss: 1.0683103799819946,grad_norm: 0.9999995822386036, iteration: 89032
loss: 0.9971765875816345,grad_norm: 0.9999992006050572, iteration: 89033
loss: 1.0036776065826416,grad_norm: 0.9999990282902591, iteration: 89034
loss: 0.9542765021324158,grad_norm: 0.9999991346043018, iteration: 89035
loss: 0.9738888144493103,grad_norm: 0.9999995200625336, iteration: 89036
loss: 1.01154363155365,grad_norm: 1.0000000099629676, iteration: 89037
loss: 1.019959807395935,grad_norm: 0.9999996768755459, iteration: 89038
loss: 0.9962878227233887,grad_norm: 0.9999988994292107, iteration: 89039
loss: 0.978813648223877,grad_norm: 0.9999993348867253, iteration: 89040
loss: 1.021492600440979,grad_norm: 0.9999991390406541, iteration: 89041
loss: 1.009436011314392,grad_norm: 0.9999991482537578, iteration: 89042
loss: 0.9719542860984802,grad_norm: 0.9098206299735664, iteration: 89043
loss: 1.0208284854888916,grad_norm: 0.97638812364056, iteration: 89044
loss: 1.0326334238052368,grad_norm: 0.9999991319093048, iteration: 89045
loss: 1.0104947090148926,grad_norm: 0.9999990857173142, iteration: 89046
loss: 0.9900773763656616,grad_norm: 0.9596367669665751, iteration: 89047
loss: 1.0121033191680908,grad_norm: 0.9999991957733217, iteration: 89048
loss: 1.0145726203918457,grad_norm: 0.9999992148391273, iteration: 89049
loss: 0.9882148504257202,grad_norm: 0.9999992162348278, iteration: 89050
loss: 1.0115902423858643,grad_norm: 0.9329671082469086, iteration: 89051
loss: 0.9852399826049805,grad_norm: 0.9999990685062554, iteration: 89052
loss: 1.0121753215789795,grad_norm: 0.9999990466634279, iteration: 89053
loss: 1.011748194694519,grad_norm: 0.9999995107077413, iteration: 89054
loss: 0.9430684447288513,grad_norm: 0.9999991304036926, iteration: 89055
loss: 1.0312092304229736,grad_norm: 0.9999990915497209, iteration: 89056
loss: 0.9645769000053406,grad_norm: 0.9280401683205238, iteration: 89057
loss: 0.9906572699546814,grad_norm: 0.9999991703364047, iteration: 89058
loss: 1.0095502138137817,grad_norm: 0.9999994020883372, iteration: 89059
loss: 1.018959641456604,grad_norm: 0.8981056762565477, iteration: 89060
loss: 0.9793083667755127,grad_norm: 0.9658114937925918, iteration: 89061
loss: 1.008471965789795,grad_norm: 0.9999990432844151, iteration: 89062
loss: 1.035973310470581,grad_norm: 0.9331127165569523, iteration: 89063
loss: 1.006690263748169,grad_norm: 0.9890138727650691, iteration: 89064
loss: 0.9926754832267761,grad_norm: 0.992776496924244, iteration: 89065
loss: 1.0080381631851196,grad_norm: 0.9999991590104955, iteration: 89066
loss: 1.0326058864593506,grad_norm: 0.9050734962572942, iteration: 89067
loss: 1.0364657640457153,grad_norm: 0.9999995581712146, iteration: 89068
loss: 1.0048213005065918,grad_norm: 0.9999991871246922, iteration: 89069
loss: 0.9940818548202515,grad_norm: 0.9999991533474811, iteration: 89070
loss: 0.9914417266845703,grad_norm: 0.9999991281159433, iteration: 89071
loss: 1.0080891847610474,grad_norm: 0.9068595318577484, iteration: 89072
loss: 1.032514214515686,grad_norm: 0.9999990353853908, iteration: 89073
loss: 0.9881699681282043,grad_norm: 0.9999992444438909, iteration: 89074
loss: 1.022929310798645,grad_norm: 0.9181056475979971, iteration: 89075
loss: 0.982921838760376,grad_norm: 0.999999300317222, iteration: 89076
loss: 1.003267765045166,grad_norm: 0.9999989691552537, iteration: 89077
loss: 1.007508397102356,grad_norm: 0.9269372388027217, iteration: 89078
loss: 1.020699381828308,grad_norm: 0.990653343072663, iteration: 89079
loss: 1.0395381450653076,grad_norm: 0.9999992037208285, iteration: 89080
loss: 1.023738145828247,grad_norm: 0.999999149310789, iteration: 89081
loss: 1.0250385999679565,grad_norm: 0.999999104213325, iteration: 89082
loss: 1.006108283996582,grad_norm: 0.9426473116195616, iteration: 89083
loss: 0.9991095066070557,grad_norm: 0.999999335656461, iteration: 89084
loss: 0.9959200024604797,grad_norm: 0.999999134428989, iteration: 89085
loss: 1.0142371654510498,grad_norm: 0.9999990886301794, iteration: 89086
loss: 0.9650833010673523,grad_norm: 0.9999995007510252, iteration: 89087
loss: 0.9878070950508118,grad_norm: 0.9276869080430721, iteration: 89088
loss: 0.9981197118759155,grad_norm: 0.9999991256537234, iteration: 89089
loss: 0.984961986541748,grad_norm: 0.9513861848017253, iteration: 89090
loss: 0.9788960814476013,grad_norm: 0.9999991031855746, iteration: 89091
loss: 1.0042469501495361,grad_norm: 0.984503847220917, iteration: 89092
loss: 0.9811532497406006,grad_norm: 0.9777139662964717, iteration: 89093
loss: 0.9715801477432251,grad_norm: 0.9503183200468176, iteration: 89094
loss: 1.0273562669754028,grad_norm: 0.9921306626077154, iteration: 89095
loss: 0.9993545413017273,grad_norm: 0.9999988406542374, iteration: 89096
loss: 1.029412865638733,grad_norm: 0.9999991761722562, iteration: 89097
loss: 1.0181061029434204,grad_norm: 0.9999992049744773, iteration: 89098
loss: 0.9996423721313477,grad_norm: 0.9999990839594814, iteration: 89099
loss: 0.9850997924804688,grad_norm: 0.9999991386379025, iteration: 89100
loss: 1.0011723041534424,grad_norm: 0.9328115100999939, iteration: 89101
loss: 1.1059166193008423,grad_norm: 0.999999302311364, iteration: 89102
loss: 0.9947019219398499,grad_norm: 0.9999991749502898, iteration: 89103
loss: 0.9915141463279724,grad_norm: 0.9506934457696168, iteration: 89104
loss: 1.0584948062896729,grad_norm: 0.9999995631090848, iteration: 89105
loss: 0.9873786568641663,grad_norm: 0.9999991376305934, iteration: 89106
loss: 1.006206750869751,grad_norm: 0.972412295021583, iteration: 89107
loss: 0.9893278479576111,grad_norm: 0.9999992540703826, iteration: 89108
loss: 0.9995040893554688,grad_norm: 0.9999990799216217, iteration: 89109
loss: 1.0825575590133667,grad_norm: 0.9999992066206134, iteration: 89110
loss: 0.9874977469444275,grad_norm: 0.9286886535657893, iteration: 89111
loss: 1.0047508478164673,grad_norm: 0.9999995888332599, iteration: 89112
loss: 1.0112866163253784,grad_norm: 0.9360695669325587, iteration: 89113
loss: 1.0489133596420288,grad_norm: 0.9999994545957575, iteration: 89114
loss: 0.9869599938392639,grad_norm: 0.9485380137969229, iteration: 89115
loss: 0.9922618865966797,grad_norm: 0.9999989048447324, iteration: 89116
loss: 1.0377029180526733,grad_norm: 0.9997252887664487, iteration: 89117
loss: 1.0351053476333618,grad_norm: 0.9999992551728533, iteration: 89118
loss: 1.0288132429122925,grad_norm: 0.9826634799658336, iteration: 89119
loss: 1.066183090209961,grad_norm: 0.9999997209988996, iteration: 89120
loss: 1.005608320236206,grad_norm: 0.9725485240509715, iteration: 89121
loss: 1.0192210674285889,grad_norm: 0.9999991974834213, iteration: 89122
loss: 0.9791073203086853,grad_norm: 0.957391491735704, iteration: 89123
loss: 0.967376708984375,grad_norm: 0.9999989395065108, iteration: 89124
loss: 0.999477207660675,grad_norm: 0.9999989692078484, iteration: 89125
loss: 0.9898275136947632,grad_norm: 0.9329977711697028, iteration: 89126
loss: 1.075748085975647,grad_norm: 0.999999302690776, iteration: 89127
loss: 0.9940022230148315,grad_norm: 0.9999991183888218, iteration: 89128
loss: 0.9946380257606506,grad_norm: 0.9999990488727262, iteration: 89129
loss: 1.005334496498108,grad_norm: 0.8440748685689785, iteration: 89130
loss: 0.9949033856391907,grad_norm: 0.9983714454509683, iteration: 89131
loss: 0.9822347164154053,grad_norm: 0.9999990292338531, iteration: 89132
loss: 1.0085606575012207,grad_norm: 0.9883412089811783, iteration: 89133
loss: 1.0012065172195435,grad_norm: 0.9999991011586453, iteration: 89134
loss: 0.993919849395752,grad_norm: 0.9877420610279201, iteration: 89135
loss: 1.014419674873352,grad_norm: 0.9999990800272991, iteration: 89136
loss: 0.9784911274909973,grad_norm: 0.9054549647160772, iteration: 89137
loss: 0.9972718954086304,grad_norm: 0.9999991613465576, iteration: 89138
loss: 1.0116249322891235,grad_norm: 0.9999990760719886, iteration: 89139
loss: 0.983442485332489,grad_norm: 0.999999040804483, iteration: 89140
loss: 1.0199170112609863,grad_norm: 0.9612619740771111, iteration: 89141
loss: 1.0207139253616333,grad_norm: 0.9999990850594144, iteration: 89142
loss: 0.9897818565368652,grad_norm: 0.9999991132025036, iteration: 89143
loss: 1.002432107925415,grad_norm: 0.9999992508621479, iteration: 89144
loss: 0.9877044558525085,grad_norm: 0.8374830588372459, iteration: 89145
loss: 0.9863770604133606,grad_norm: 0.9960654435766332, iteration: 89146
loss: 1.0404943227767944,grad_norm: 0.9999993305902535, iteration: 89147
loss: 1.0152848958969116,grad_norm: 0.9999995557142133, iteration: 89148
loss: 1.1242437362670898,grad_norm: 0.9999992590902183, iteration: 89149
loss: 1.0055750608444214,grad_norm: 0.9999990916157568, iteration: 89150
loss: 0.971668004989624,grad_norm: 0.9999991455950052, iteration: 89151
loss: 1.0127147436141968,grad_norm: 0.9455335948303191, iteration: 89152
loss: 1.0204657316207886,grad_norm: 0.9999998855834704, iteration: 89153
loss: 0.996925413608551,grad_norm: 0.925542146397082, iteration: 89154
loss: 0.9903536438941956,grad_norm: 0.9999990548442386, iteration: 89155
loss: 0.9988640546798706,grad_norm: 0.9965482937905845, iteration: 89156
loss: 1.0012285709381104,grad_norm: 0.9243230417340729, iteration: 89157
loss: 0.9906418323516846,grad_norm: 0.9999991070231513, iteration: 89158
loss: 0.9969112873077393,grad_norm: 0.9999993214970001, iteration: 89159
loss: 1.0352689027786255,grad_norm: 0.9999991381749183, iteration: 89160
loss: 0.985796332359314,grad_norm: 0.9999990630572111, iteration: 89161
loss: 1.0353599786758423,grad_norm: 0.9999991675894142, iteration: 89162
loss: 0.968526303768158,grad_norm: 0.999999066082187, iteration: 89163
loss: 1.0026406049728394,grad_norm: 0.9999999524272135, iteration: 89164
loss: 1.0012881755828857,grad_norm: 0.9999991361031643, iteration: 89165
loss: 1.0023115873336792,grad_norm: 0.9999991527483757, iteration: 89166
loss: 1.0050657987594604,grad_norm: 0.8909500938868128, iteration: 89167
loss: 1.000779390335083,grad_norm: 0.999999175228631, iteration: 89168
loss: 1.0102561712265015,grad_norm: 0.9999991095370839, iteration: 89169
loss: 0.9777311682701111,grad_norm: 0.9999991823120777, iteration: 89170
loss: 1.0129750967025757,grad_norm: 0.999999277336823, iteration: 89171
loss: 1.0008541345596313,grad_norm: 0.9999990807779918, iteration: 89172
loss: 1.0121276378631592,grad_norm: 0.9776505227142487, iteration: 89173
loss: 1.0095223188400269,grad_norm: 0.9970227935660391, iteration: 89174
loss: 0.9700838923454285,grad_norm: 0.9644917667093936, iteration: 89175
loss: 1.0098897218704224,grad_norm: 0.9999992395332552, iteration: 89176
loss: 0.9807563424110413,grad_norm: 0.9999990201122299, iteration: 89177
loss: 0.9906694889068604,grad_norm: 0.9999990924436066, iteration: 89178
loss: 1.0140440464019775,grad_norm: 0.8620288195887094, iteration: 89179
loss: 1.0342048406600952,grad_norm: 0.9999991508663899, iteration: 89180
loss: 1.0257112979888916,grad_norm: 0.8913884914470407, iteration: 89181
loss: 0.9869034290313721,grad_norm: 0.8857471124040006, iteration: 89182
loss: 1.0081522464752197,grad_norm: 0.9999990486122312, iteration: 89183
loss: 0.9801709651947021,grad_norm: 0.9999988884621326, iteration: 89184
loss: 0.9942536950111389,grad_norm: 0.9999990306025895, iteration: 89185
loss: 0.9951657652854919,grad_norm: 0.9799197342072811, iteration: 89186
loss: 0.9788057208061218,grad_norm: 0.9802508311102607, iteration: 89187
loss: 0.9970200061798096,grad_norm: 0.8784745685201815, iteration: 89188
loss: 1.052996277809143,grad_norm: 0.9999998153893597, iteration: 89189
loss: 1.057838797569275,grad_norm: 0.9999992664775833, iteration: 89190
loss: 0.9866653084754944,grad_norm: 0.9999992379503736, iteration: 89191
loss: 1.0228281021118164,grad_norm: 0.9967517527509127, iteration: 89192
loss: 0.954704225063324,grad_norm: 0.8466141409033935, iteration: 89193
loss: 1.0137227773666382,grad_norm: 0.9157490740640988, iteration: 89194
loss: 0.9862499237060547,grad_norm: 0.9999991184221916, iteration: 89195
loss: 1.03572416305542,grad_norm: 0.9999990917051979, iteration: 89196
loss: 0.999097466468811,grad_norm: 0.9354369987876893, iteration: 89197
loss: 1.0090973377227783,grad_norm: 0.999999213400164, iteration: 89198
loss: 0.9991304278373718,grad_norm: 0.9147945525180576, iteration: 89199
loss: 1.014100193977356,grad_norm: 0.999999205857045, iteration: 89200
loss: 1.0465189218521118,grad_norm: 0.9314234328616523, iteration: 89201
loss: 0.9858881235122681,grad_norm: 0.944202003139881, iteration: 89202
loss: 0.9683734178543091,grad_norm: 0.999999094148782, iteration: 89203
loss: 0.9861939549446106,grad_norm: 0.9999990444492795, iteration: 89204
loss: 0.976936936378479,grad_norm: 0.961652474767391, iteration: 89205
loss: 0.9982858300209045,grad_norm: 0.9999990941546759, iteration: 89206
loss: 1.0055149793624878,grad_norm: 0.9999991284313309, iteration: 89207
loss: 0.9942666292190552,grad_norm: 0.999999185325782, iteration: 89208
loss: 0.9931807518005371,grad_norm: 0.8649687990506366, iteration: 89209
loss: 0.9781770706176758,grad_norm: 0.9999990815185152, iteration: 89210
loss: 0.9845998883247375,grad_norm: 0.9872783797407209, iteration: 89211
loss: 0.9772321581840515,grad_norm: 0.9999991634659581, iteration: 89212
loss: 0.9634507894515991,grad_norm: 0.8677574420851474, iteration: 89213
loss: 1.0080183744430542,grad_norm: 0.9803279439762499, iteration: 89214
loss: 1.0030198097229004,grad_norm: 0.9999990190787229, iteration: 89215
loss: 1.022095799446106,grad_norm: 0.9999999673854829, iteration: 89216
loss: 0.9846688508987427,grad_norm: 0.9999992126459825, iteration: 89217
loss: 0.9828801155090332,grad_norm: 0.9999989853151942, iteration: 89218
loss: 1.031483769416809,grad_norm: 0.8974061799380869, iteration: 89219
loss: 1.0195008516311646,grad_norm: 0.9999989888163906, iteration: 89220
loss: 1.0079845190048218,grad_norm: 0.9999992380140239, iteration: 89221
loss: 0.9900856018066406,grad_norm: 0.9999991484804989, iteration: 89222
loss: 0.9706059098243713,grad_norm: 0.9999992453235083, iteration: 89223
loss: 1.012545108795166,grad_norm: 0.9999991126499241, iteration: 89224
loss: 1.0007171630859375,grad_norm: 0.9858995244761463, iteration: 89225
loss: 1.0129393339157104,grad_norm: 0.9371460215174937, iteration: 89226
loss: 1.009000301361084,grad_norm: 0.9999991687233613, iteration: 89227
loss: 0.9655942320823669,grad_norm: 0.840779626269155, iteration: 89228
loss: 1.0069453716278076,grad_norm: 0.9999994357985591, iteration: 89229
loss: 1.009079933166504,grad_norm: 0.9999991975495051, iteration: 89230
loss: 1.0305883884429932,grad_norm: 0.8968236240899776, iteration: 89231
loss: 1.0311068296432495,grad_norm: 0.9999991389765579, iteration: 89232
loss: 1.035656452178955,grad_norm: 0.9999991382486886, iteration: 89233
loss: 0.9738253951072693,grad_norm: 0.9302401547385164, iteration: 89234
loss: 1.0147091150283813,grad_norm: 0.9722297821486598, iteration: 89235
loss: 0.9987254738807678,grad_norm: 0.8463096050955253, iteration: 89236
loss: 1.0065826177597046,grad_norm: 0.9462608198148226, iteration: 89237
loss: 0.985659658908844,grad_norm: 0.9999990175863382, iteration: 89238
loss: 1.0204554796218872,grad_norm: 0.9999992581309213, iteration: 89239
loss: 1.0283197164535522,grad_norm: 0.9943452833249447, iteration: 89240
loss: 1.0281981229782104,grad_norm: 0.9999990976004378, iteration: 89241
loss: 0.9999446868896484,grad_norm: 0.8913257031655358, iteration: 89242
loss: 0.9716172814369202,grad_norm: 0.9999990770871268, iteration: 89243
loss: 1.018890619277954,grad_norm: 0.999999116576562, iteration: 89244
loss: 1.0230023860931396,grad_norm: 0.99999931271016, iteration: 89245
loss: 0.9917128086090088,grad_norm: 0.9999991725020146, iteration: 89246
loss: 1.0044535398483276,grad_norm: 0.9999991501683596, iteration: 89247
loss: 1.049323320388794,grad_norm: 0.9999991816241262, iteration: 89248
loss: 0.969139814376831,grad_norm: 0.9999990926949551, iteration: 89249
loss: 0.9804215431213379,grad_norm: 0.9999998509498876, iteration: 89250
loss: 1.0259382724761963,grad_norm: 0.9999992196341007, iteration: 89251
loss: 1.0028586387634277,grad_norm: 0.9905192811968042, iteration: 89252
loss: 1.0735362768173218,grad_norm: 0.9999996487917318, iteration: 89253
loss: 0.9953808784484863,grad_norm: 0.9999990871426847, iteration: 89254
loss: 1.0240564346313477,grad_norm: 0.9999990570446082, iteration: 89255
loss: 1.035781741142273,grad_norm: 0.9999992148042475, iteration: 89256
loss: 0.9927150011062622,grad_norm: 0.9175544984999102, iteration: 89257
loss: 0.9888842105865479,grad_norm: 0.9520133041865726, iteration: 89258
loss: 0.9569370746612549,grad_norm: 0.9999990565122805, iteration: 89259
loss: 0.9976860284805298,grad_norm: 0.9999990820316768, iteration: 89260
loss: 1.0303691625595093,grad_norm: 0.9999989196762304, iteration: 89261
loss: 1.0225927829742432,grad_norm: 0.936063277571222, iteration: 89262
loss: 1.0330140590667725,grad_norm: 0.9999998818527943, iteration: 89263
loss: 1.0208452939987183,grad_norm: 0.9999991523180157, iteration: 89264
loss: 0.9751026034355164,grad_norm: 0.9999991936390716, iteration: 89265
loss: 0.9814372658729553,grad_norm: 0.9999990253318357, iteration: 89266
loss: 0.9928873777389526,grad_norm: 0.9999993165846475, iteration: 89267
loss: 1.0389258861541748,grad_norm: 0.9999991574085858, iteration: 89268
loss: 1.021794319152832,grad_norm: 0.8540576566172403, iteration: 89269
loss: 0.9690537452697754,grad_norm: 0.9999992487117612, iteration: 89270
loss: 0.9869975447654724,grad_norm: 0.99999917733113, iteration: 89271
loss: 0.9859243631362915,grad_norm: 0.9999990150055661, iteration: 89272
loss: 1.0052529573440552,grad_norm: 0.9999989513897836, iteration: 89273
loss: 0.9677660465240479,grad_norm: 0.9748939723914134, iteration: 89274
loss: 1.0445691347122192,grad_norm: 0.999999143698336, iteration: 89275
loss: 1.0416779518127441,grad_norm: 0.999999700504318, iteration: 89276
loss: 1.0209766626358032,grad_norm: 0.9999991230333684, iteration: 89277
loss: 1.0018019676208496,grad_norm: 0.8944781762943415, iteration: 89278
loss: 0.9851737022399902,grad_norm: 0.9999989922945598, iteration: 89279
loss: 0.9943812489509583,grad_norm: 0.9999991788542097, iteration: 89280
loss: 1.0324792861938477,grad_norm: 0.9999992391451463, iteration: 89281
loss: 1.0306369066238403,grad_norm: 0.9999990061256028, iteration: 89282
loss: 0.9738739132881165,grad_norm: 0.9637038078421668, iteration: 89283
loss: 0.9713148474693298,grad_norm: 0.8356072467375925, iteration: 89284
loss: 0.9995181560516357,grad_norm: 0.9999992582554408, iteration: 89285
loss: 1.022670030593872,grad_norm: 0.9999991186500092, iteration: 89286
loss: 1.0064021348953247,grad_norm: 0.9999992464299918, iteration: 89287
loss: 0.9897109866142273,grad_norm: 0.9999990941324249, iteration: 89288
loss: 0.9710748791694641,grad_norm: 0.9720299237274892, iteration: 89289
loss: 1.1047778129577637,grad_norm: 0.9999995356048647, iteration: 89290
loss: 0.9965773224830627,grad_norm: 0.8176463985514054, iteration: 89291
loss: 0.9915050864219666,grad_norm: 0.9999990610106589, iteration: 89292
loss: 1.0048587322235107,grad_norm: 0.9999990380211304, iteration: 89293
loss: 1.005595088005066,grad_norm: 0.999999223050363, iteration: 89294
loss: 0.9855024218559265,grad_norm: 0.9278413521096702, iteration: 89295
loss: 0.9716044664382935,grad_norm: 0.9999992899902621, iteration: 89296
loss: 1.025856614112854,grad_norm: 0.9999990708900011, iteration: 89297
loss: 0.9989214539527893,grad_norm: 0.9025642722995144, iteration: 89298
loss: 1.003095269203186,grad_norm: 0.8364649038745052, iteration: 89299
loss: 0.9908744096755981,grad_norm: 0.9138595040940718, iteration: 89300
loss: 0.998084545135498,grad_norm: 0.961577485016733, iteration: 89301
loss: 1.0197097063064575,grad_norm: 0.9602706289249892, iteration: 89302
loss: 0.9977039694786072,grad_norm: 0.9999992745432298, iteration: 89303
loss: 1.0335673093795776,grad_norm: 0.8893850784066315, iteration: 89304
loss: 0.9840607047080994,grad_norm: 0.8641574337852436, iteration: 89305
loss: 0.9898134469985962,grad_norm: 0.9770560698862955, iteration: 89306
loss: 1.0173574686050415,grad_norm: 0.9999995419491652, iteration: 89307
loss: 1.0236475467681885,grad_norm: 0.9999991882371541, iteration: 89308
loss: 0.9735709428787231,grad_norm: 0.999999043713728, iteration: 89309
loss: 1.0315383672714233,grad_norm: 0.9999990514656292, iteration: 89310
loss: 1.0362478494644165,grad_norm: 0.9999991159044215, iteration: 89311
loss: 0.9735943675041199,grad_norm: 0.9931171099846925, iteration: 89312
loss: 1.0122458934783936,grad_norm: 0.9062417919688299, iteration: 89313
loss: 0.9833347201347351,grad_norm: 0.8400421113932667, iteration: 89314
loss: 0.9979459643363953,grad_norm: 0.99999905671008, iteration: 89315
loss: 1.0075781345367432,grad_norm: 0.9999991776118388, iteration: 89316
loss: 1.0231013298034668,grad_norm: 0.9912426421161874, iteration: 89317
loss: 0.9604172110557556,grad_norm: 0.9999991865770755, iteration: 89318
loss: 1.0281130075454712,grad_norm: 0.9999990422035837, iteration: 89319
loss: 0.9899193048477173,grad_norm: 0.9999991520143469, iteration: 89320
loss: 1.003568172454834,grad_norm: 0.9715634695145895, iteration: 89321
loss: 0.991656482219696,grad_norm: 0.9999996399590122, iteration: 89322
loss: 0.9983372688293457,grad_norm: 0.9999991278614748, iteration: 89323
loss: 0.9905321002006531,grad_norm: 0.9672043382454915, iteration: 89324
loss: 1.0042933225631714,grad_norm: 0.8947854186832055, iteration: 89325
loss: 0.9469770789146423,grad_norm: 0.9616802933361491, iteration: 89326
loss: 1.0054081678390503,grad_norm: 0.9494451205399287, iteration: 89327
loss: 1.0070013999938965,grad_norm: 0.9999992385201838, iteration: 89328
loss: 0.9629018306732178,grad_norm: 0.9999991115998489, iteration: 89329
loss: 1.023806095123291,grad_norm: 0.9999990918235204, iteration: 89330
loss: 1.0120701789855957,grad_norm: 0.9999991496088159, iteration: 89331
loss: 0.9597334861755371,grad_norm: 0.9780083332521134, iteration: 89332
loss: 0.9926303029060364,grad_norm: 0.9268498772766112, iteration: 89333
loss: 0.9733319878578186,grad_norm: 0.9234306832392731, iteration: 89334
loss: 0.9699833393096924,grad_norm: 0.9999991538692019, iteration: 89335
loss: 0.992026686668396,grad_norm: 0.9999991109641442, iteration: 89336
loss: 0.9769318103790283,grad_norm: 0.9999991800429, iteration: 89337
loss: 0.9660364389419556,grad_norm: 0.9997113025034718, iteration: 89338
loss: 1.0617938041687012,grad_norm: 0.9999996568948628, iteration: 89339
loss: 1.025494933128357,grad_norm: 0.9999991150934043, iteration: 89340
loss: 1.0372644662857056,grad_norm: 0.9999990805260697, iteration: 89341
loss: 0.9864412546157837,grad_norm: 0.9999990429048661, iteration: 89342
loss: 1.006474256515503,grad_norm: 0.9703226370845335, iteration: 89343
loss: 1.0495766401290894,grad_norm: 0.9999992431461289, iteration: 89344
loss: 0.9807068705558777,grad_norm: 0.9866515237236992, iteration: 89345
loss: 1.0247657299041748,grad_norm: 0.9999993328202793, iteration: 89346
loss: 0.9867597818374634,grad_norm: 0.9999991640402812, iteration: 89347
loss: 0.9736461043357849,grad_norm: 0.9999991223111406, iteration: 89348
loss: 0.9987939596176147,grad_norm: 0.9512362390604235, iteration: 89349
loss: 1.0041147470474243,grad_norm: 0.9999992212439807, iteration: 89350
loss: 0.9756885170936584,grad_norm: 0.9703692270162206, iteration: 89351
loss: 0.9831138253211975,grad_norm: 0.9206991338048034, iteration: 89352
loss: 1.0384172201156616,grad_norm: 0.9999988433660346, iteration: 89353
loss: 1.020300030708313,grad_norm: 0.9999992138187103, iteration: 89354
loss: 0.9721686244010925,grad_norm: 0.8782732059911647, iteration: 89355
loss: 1.0065279006958008,grad_norm: 0.9999991698519014, iteration: 89356
loss: 0.9843525886535645,grad_norm: 0.9999991832705972, iteration: 89357
loss: 0.9707394242286682,grad_norm: 0.9844142209978393, iteration: 89358
loss: 0.9685086607933044,grad_norm: 0.9999990815501967, iteration: 89359
loss: 1.0024197101593018,grad_norm: 0.9999989659440043, iteration: 89360
loss: 0.9862818121910095,grad_norm: 0.9781155526172778, iteration: 89361
loss: 1.0132858753204346,grad_norm: 0.9999990510995083, iteration: 89362
loss: 1.0075594186782837,grad_norm: 0.9999992125744581, iteration: 89363
loss: 1.011741042137146,grad_norm: 0.9999990769231001, iteration: 89364
loss: 0.9830837845802307,grad_norm: 0.9906672428943253, iteration: 89365
loss: 0.982579231262207,grad_norm: 0.9999991802508226, iteration: 89366
loss: 0.9829313158988953,grad_norm: 0.9999998617931916, iteration: 89367
loss: 0.9966984391212463,grad_norm: 0.999999003082828, iteration: 89368
loss: 1.0375264883041382,grad_norm: 0.9639174799457081, iteration: 89369
loss: 0.9959617853164673,grad_norm: 0.9999991438883897, iteration: 89370
loss: 0.9887818694114685,grad_norm: 0.9999990983117703, iteration: 89371
loss: 1.0094348192214966,grad_norm: 0.9999992468841474, iteration: 89372
loss: 0.9720507264137268,grad_norm: 0.9999991431313646, iteration: 89373
loss: 1.0206838846206665,grad_norm: 0.9999990649203909, iteration: 89374
loss: 0.9661661982536316,grad_norm: 0.9999991243637948, iteration: 89375
loss: 1.4961470365524292,grad_norm: 0.9999995992788325, iteration: 89376
loss: 0.9750043749809265,grad_norm: 0.9999991162398661, iteration: 89377
loss: 0.956519365310669,grad_norm: 0.9999992171588502, iteration: 89378
loss: 0.9774437546730042,grad_norm: 0.9143514089027274, iteration: 89379
loss: 0.9610189199447632,grad_norm: 0.999999091097513, iteration: 89380
loss: 0.9957587718963623,grad_norm: 0.9999991112924446, iteration: 89381
loss: 0.9695301651954651,grad_norm: 0.9999991759387126, iteration: 89382
loss: 0.9998032450675964,grad_norm: 0.9999990722158255, iteration: 89383
loss: 0.9973477125167847,grad_norm: 0.8777709761259267, iteration: 89384
loss: 0.9943176507949829,grad_norm: 0.999999445461582, iteration: 89385
loss: 1.032922625541687,grad_norm: 0.9999991133249159, iteration: 89386
loss: 1.0606470108032227,grad_norm: 0.9999995097054716, iteration: 89387
loss: 1.0368486642837524,grad_norm: 0.9999991995716735, iteration: 89388
loss: 0.9959395527839661,grad_norm: 0.9999997442663203, iteration: 89389
loss: 1.0403828620910645,grad_norm: 0.9999990814964341, iteration: 89390
loss: 1.0614327192306519,grad_norm: 0.9999994200553106, iteration: 89391
loss: 0.9856633543968201,grad_norm: 0.9999991802133074, iteration: 89392
loss: 0.9741399884223938,grad_norm: 0.9999990902294065, iteration: 89393
loss: 0.9820775985717773,grad_norm: 0.9999993192197631, iteration: 89394
loss: 0.996718168258667,grad_norm: 0.9999991436980695, iteration: 89395
loss: 1.0330467224121094,grad_norm: 0.9999992040174065, iteration: 89396
loss: 1.0053772926330566,grad_norm: 0.9999992310158433, iteration: 89397
loss: 0.9975595474243164,grad_norm: 0.9999990670281041, iteration: 89398
loss: 1.0171912908554077,grad_norm: 0.9999991454933504, iteration: 89399
loss: 0.9887136220932007,grad_norm: 0.9999990776566878, iteration: 89400
loss: 0.9958648085594177,grad_norm: 0.989123307431973, iteration: 89401
loss: 0.9757853746414185,grad_norm: 0.9894874986357768, iteration: 89402
loss: 0.9911128878593445,grad_norm: 0.940979779459152, iteration: 89403
loss: 0.9719693064689636,grad_norm: 0.999999171138906, iteration: 89404
loss: 1.022769570350647,grad_norm: 0.9999991604479068, iteration: 89405
loss: 0.9718407392501831,grad_norm: 0.9999990823462103, iteration: 89406
loss: 0.9745717644691467,grad_norm: 0.9999990559856875, iteration: 89407
loss: 0.995095431804657,grad_norm: 0.9999992260582462, iteration: 89408
loss: 1.0145479440689087,grad_norm: 0.9999990261410882, iteration: 89409
loss: 0.9863049983978271,grad_norm: 0.9599295532486452, iteration: 89410
loss: 1.0178698301315308,grad_norm: 0.9269190341542017, iteration: 89411
loss: 1.0748469829559326,grad_norm: 0.9999995521550906, iteration: 89412
loss: 0.9906649589538574,grad_norm: 0.99999910221422, iteration: 89413
loss: 0.9662473797798157,grad_norm: 0.9999992614023865, iteration: 89414
loss: 0.9770401120185852,grad_norm: 0.9999992182073383, iteration: 89415
loss: 0.9920748472213745,grad_norm: 0.9999989628016648, iteration: 89416
loss: 1.0266709327697754,grad_norm: 0.9836801819329948, iteration: 89417
loss: 1.0035943984985352,grad_norm: 0.9751499678626085, iteration: 89418
loss: 0.990338146686554,grad_norm: 0.9999991181258626, iteration: 89419
loss: 1.0272080898284912,grad_norm: 0.9999997099498258, iteration: 89420
loss: 1.0197111368179321,grad_norm: 0.9059348881951278, iteration: 89421
loss: 0.9954392910003662,grad_norm: 0.9999990924241796, iteration: 89422
loss: 0.9673848152160645,grad_norm: 0.9999991619967068, iteration: 89423
loss: 0.9988015294075012,grad_norm: 0.9999999202055818, iteration: 89424
loss: 0.9400073289871216,grad_norm: 0.9822196010988615, iteration: 89425
loss: 0.9323784112930298,grad_norm: 0.9999989917283909, iteration: 89426
loss: 1.0170494318008423,grad_norm: 0.9014763940848219, iteration: 89427
loss: 0.98602694272995,grad_norm: 0.9999991682618258, iteration: 89428
loss: 0.9966715574264526,grad_norm: 0.999999165474936, iteration: 89429
loss: 1.0825436115264893,grad_norm: 0.9999996717194063, iteration: 89430
loss: 1.0391221046447754,grad_norm: 0.9999991570460134, iteration: 89431
loss: 0.9887309670448303,grad_norm: 0.9999992519945172, iteration: 89432
loss: 0.9935577511787415,grad_norm: 0.9581937529374682, iteration: 89433
loss: 1.0168269872665405,grad_norm: 0.9999991485365977, iteration: 89434
loss: 1.0000958442687988,grad_norm: 0.9999990770523068, iteration: 89435
loss: 0.996290385723114,grad_norm: 0.9999996059545646, iteration: 89436
loss: 1.0146585702896118,grad_norm: 0.999999217692285, iteration: 89437
loss: 1.0237125158309937,grad_norm: 0.9999990704121696, iteration: 89438
loss: 0.9752805829048157,grad_norm: 0.9937694372021035, iteration: 89439
loss: 1.0183247327804565,grad_norm: 0.9999991574150353, iteration: 89440
loss: 1.0204811096191406,grad_norm: 0.999998933255765, iteration: 89441
loss: 0.9928273558616638,grad_norm: 0.9999990606602255, iteration: 89442
loss: 0.997021496295929,grad_norm: 0.9999991508078576, iteration: 89443
loss: 1.0244630575180054,grad_norm: 0.9332162876511942, iteration: 89444
loss: 0.971237063407898,grad_norm: 0.9999990507283285, iteration: 89445
loss: 1.0277607440948486,grad_norm: 0.9999991044605611, iteration: 89446
loss: 1.0030759572982788,grad_norm: 0.9999992296274007, iteration: 89447
loss: 0.9706358313560486,grad_norm: 0.9999990885120312, iteration: 89448
loss: 1.0146669149398804,grad_norm: 0.82077410403984, iteration: 89449
loss: 0.9927226305007935,grad_norm: 0.9999992589235219, iteration: 89450
loss: 1.0163915157318115,grad_norm: 0.9948240322936819, iteration: 89451
loss: 1.0663057565689087,grad_norm: 0.9999991163268697, iteration: 89452
loss: 1.0080877542495728,grad_norm: 0.9999990166973216, iteration: 89453
loss: 1.016970157623291,grad_norm: 0.9625119794634134, iteration: 89454
loss: 1.002976417541504,grad_norm: 0.997898915392311, iteration: 89455
loss: 1.0252126455307007,grad_norm: 0.9999999246798534, iteration: 89456
loss: 0.979503870010376,grad_norm: 0.9999991778733971, iteration: 89457
loss: 1.0384067296981812,grad_norm: 0.9835851036196371, iteration: 89458
loss: 1.0214924812316895,grad_norm: 0.9999991302030066, iteration: 89459
loss: 1.0067849159240723,grad_norm: 0.9994042324963048, iteration: 89460
loss: 1.0164834260940552,grad_norm: 0.9999991801773574, iteration: 89461
loss: 0.9537597894668579,grad_norm: 0.999999313187308, iteration: 89462
loss: 1.025248646736145,grad_norm: 0.9223191891282584, iteration: 89463
loss: 1.0163133144378662,grad_norm: 0.9999991962356933, iteration: 89464
loss: 0.9688519835472107,grad_norm: 0.9999998281759345, iteration: 89465
loss: 0.9670146107673645,grad_norm: 0.999999191226563, iteration: 89466
loss: 1.0549564361572266,grad_norm: 0.9999992154568849, iteration: 89467
loss: 1.0054861307144165,grad_norm: 0.9999989320935836, iteration: 89468
loss: 1.0110093355178833,grad_norm: 0.9999991070352007, iteration: 89469
loss: 1.0194849967956543,grad_norm: 0.9418723905691029, iteration: 89470
loss: 0.9811010360717773,grad_norm: 0.9995570500540617, iteration: 89471
loss: 1.0341475009918213,grad_norm: 0.9429800198045933, iteration: 89472
loss: 1.0191359519958496,grad_norm: 0.99999902641253, iteration: 89473
loss: 1.0374687910079956,grad_norm: 0.8463946483045021, iteration: 89474
loss: 0.980191707611084,grad_norm: 0.9201209642381902, iteration: 89475
loss: 0.9924877285957336,grad_norm: 0.9999991821442397, iteration: 89476
loss: 0.9874036908149719,grad_norm: 0.9999994691244455, iteration: 89477
loss: 1.0101128816604614,grad_norm: 0.9919072350493717, iteration: 89478
loss: 1.0100914239883423,grad_norm: 0.9319073513769499, iteration: 89479
loss: 1.0065301656723022,grad_norm: 0.9999991876493243, iteration: 89480
loss: 0.9694586396217346,grad_norm: 0.8664669841987342, iteration: 89481
loss: 0.9917905926704407,grad_norm: 0.9881789764094026, iteration: 89482
loss: 0.9808148741722107,grad_norm: 0.999999008849885, iteration: 89483
loss: 1.0129660367965698,grad_norm: 0.999999177812618, iteration: 89484
loss: 0.9820020794868469,grad_norm: 0.9967471819008085, iteration: 89485
loss: 0.9811855554580688,grad_norm: 0.9999993318713741, iteration: 89486
loss: 0.9819191098213196,grad_norm: 0.9999990774399568, iteration: 89487
loss: 1.006177306175232,grad_norm: 0.9999991538223058, iteration: 89488
loss: 1.0249674320220947,grad_norm: 0.9696692443409555, iteration: 89489
loss: 0.9899755120277405,grad_norm: 0.912719111976498, iteration: 89490
loss: 1.0625125169754028,grad_norm: 0.9999998199187053, iteration: 89491
loss: 1.001595139503479,grad_norm: 0.9999995047211416, iteration: 89492
loss: 1.0097795724868774,grad_norm: 0.9999995803974311, iteration: 89493
loss: 0.9794614911079407,grad_norm: 0.9122717893723827, iteration: 89494
loss: 1.0147355794906616,grad_norm: 0.9519266867503271, iteration: 89495
loss: 1.0074564218521118,grad_norm: 0.999998915494009, iteration: 89496
loss: 0.9628736972808838,grad_norm: 0.9999990964767514, iteration: 89497
loss: 0.9671775698661804,grad_norm: 0.9999991107288292, iteration: 89498
loss: 1.0288877487182617,grad_norm: 0.9999990649867048, iteration: 89499
loss: 0.9837846159934998,grad_norm: 0.9999989763192906, iteration: 89500
loss: 0.9517063498497009,grad_norm: 0.9999991142736698, iteration: 89501
loss: 1.0099343061447144,grad_norm: 0.9999991460614561, iteration: 89502
loss: 1.0493659973144531,grad_norm: 0.9999990897972788, iteration: 89503
loss: 1.0106171369552612,grad_norm: 0.999512627573879, iteration: 89504
loss: 0.9962402582168579,grad_norm: 0.9448935132890994, iteration: 89505
loss: 0.9882968068122864,grad_norm: 0.9999991090468668, iteration: 89506
loss: 0.9918514490127563,grad_norm: 0.9670834972560362, iteration: 89507
loss: 1.0289102792739868,grad_norm: 0.9771973867805208, iteration: 89508
loss: 0.9735485315322876,grad_norm: 0.999999018039668, iteration: 89509
loss: 1.0052285194396973,grad_norm: 0.9999990925663873, iteration: 89510
loss: 0.9955164790153503,grad_norm: 0.9999991603269647, iteration: 89511
loss: 1.0275601148605347,grad_norm: 0.9999993179192109, iteration: 89512
loss: 0.9527025818824768,grad_norm: 0.9999988668016573, iteration: 89513
loss: 1.0473253726959229,grad_norm: 0.9999994099249057, iteration: 89514
loss: 1.0349899530410767,grad_norm: 0.9999996938752997, iteration: 89515
loss: 1.0210187435150146,grad_norm: 0.9999989614270649, iteration: 89516
loss: 0.9925374388694763,grad_norm: 0.8546561034903088, iteration: 89517
loss: 0.9754651784896851,grad_norm: 0.9999991497737505, iteration: 89518
loss: 1.0155326128005981,grad_norm: 0.9776997911095676, iteration: 89519
loss: 1.0207258462905884,grad_norm: 0.9999998246270412, iteration: 89520
loss: 0.9975629448890686,grad_norm: 0.9999991800245951, iteration: 89521
loss: 1.0109230279922485,grad_norm: 0.9999992087318819, iteration: 89522
loss: 0.9710067510604858,grad_norm: 0.9999991743421803, iteration: 89523
loss: 1.0307652950286865,grad_norm: 0.838992216959679, iteration: 89524
loss: 0.9968416690826416,grad_norm: 0.9999990897154656, iteration: 89525
loss: 1.0190898180007935,grad_norm: 0.9999991793516897, iteration: 89526
loss: 0.9839308857917786,grad_norm: 0.9228400492239819, iteration: 89527
loss: 0.9767771363258362,grad_norm: 0.9999989592314777, iteration: 89528
loss: 1.0012362003326416,grad_norm: 0.999999290375005, iteration: 89529
loss: 1.0182944536209106,grad_norm: 0.8863036345021337, iteration: 89530
loss: 0.9577134251594543,grad_norm: 0.991834829730441, iteration: 89531
loss: 1.0419490337371826,grad_norm: 0.9999991970792483, iteration: 89532
loss: 1.0107611417770386,grad_norm: 0.887117737698462, iteration: 89533
loss: 1.0599958896636963,grad_norm: 0.9999991259193675, iteration: 89534
loss: 1.02842116355896,grad_norm: 0.9999991376574189, iteration: 89535
loss: 1.0268635749816895,grad_norm: 0.9999996538119279, iteration: 89536
loss: 1.0020610094070435,grad_norm: 0.9999992394463099, iteration: 89537
loss: 1.0185374021530151,grad_norm: 0.9999993205485714, iteration: 89538
loss: 1.0216256380081177,grad_norm: 0.999999213220043, iteration: 89539
loss: 0.9858959317207336,grad_norm: 0.9927563242441728, iteration: 89540
loss: 1.0142021179199219,grad_norm: 0.8676231935520929, iteration: 89541
loss: 1.062572717666626,grad_norm: 0.9999998218064782, iteration: 89542
loss: 0.9910244941711426,grad_norm: 0.9999995138263258, iteration: 89543
loss: 1.018871784210205,grad_norm: 0.9999992994395448, iteration: 89544
loss: 0.9960220456123352,grad_norm: 0.9999991327949135, iteration: 89545
loss: 0.9487848877906799,grad_norm: 0.9999992458371663, iteration: 89546
loss: 0.9917711615562439,grad_norm: 0.9234613588870061, iteration: 89547
loss: 1.0133084058761597,grad_norm: 0.9999990428681158, iteration: 89548
loss: 0.9997709393501282,grad_norm: 0.9773790764428516, iteration: 89549
loss: 1.0001423358917236,grad_norm: 0.9872009895277879, iteration: 89550
loss: 1.017130970954895,grad_norm: 0.9999992162093279, iteration: 89551
loss: 1.0138704776763916,grad_norm: 0.9999993037872841, iteration: 89552
loss: 1.0081161260604858,grad_norm: 0.9999990695415454, iteration: 89553
loss: 1.017651081085205,grad_norm: 0.9999991650579204, iteration: 89554
loss: 0.9829007983207703,grad_norm: 0.9999990851977854, iteration: 89555
loss: 0.9744027256965637,grad_norm: 0.9999992533496356, iteration: 89556
loss: 0.9888492822647095,grad_norm: 0.999999450868151, iteration: 89557
loss: 1.0233763456344604,grad_norm: 0.97229867542861, iteration: 89558
loss: 0.9955639243125916,grad_norm: 0.9999993074008265, iteration: 89559
loss: 1.038724660873413,grad_norm: 0.9999992338390977, iteration: 89560
loss: 1.012455940246582,grad_norm: 0.9999991840875124, iteration: 89561
loss: 0.9980131983757019,grad_norm: 0.9999994270382369, iteration: 89562
loss: 0.9855025410652161,grad_norm: 0.9999991700293811, iteration: 89563
loss: 1.0021024942398071,grad_norm: 0.9891985419721114, iteration: 89564
loss: 1.0067030191421509,grad_norm: 0.9999993130103493, iteration: 89565
loss: 1.0358375310897827,grad_norm: 0.8635091430036296, iteration: 89566
loss: 0.9934759140014648,grad_norm: 0.9999990002888776, iteration: 89567
loss: 0.9892860054969788,grad_norm: 0.9256281368843072, iteration: 89568
loss: 1.0068531036376953,grad_norm: 0.9999991063957645, iteration: 89569
loss: 0.9863511919975281,grad_norm: 0.9692873606243746, iteration: 89570
loss: 0.9643054008483887,grad_norm: 0.9999992098396638, iteration: 89571
loss: 0.9945273995399475,grad_norm: 0.999999165222347, iteration: 89572
loss: 0.9921514391899109,grad_norm: 0.9999990792131196, iteration: 89573
loss: 0.9922735095024109,grad_norm: 0.9999990809521092, iteration: 89574
loss: 1.0338488817214966,grad_norm: 0.9999991095908597, iteration: 89575
loss: 1.0310490131378174,grad_norm: 0.9999990902698147, iteration: 89576
loss: 1.0263417959213257,grad_norm: 0.9999992401368228, iteration: 89577
loss: 0.9973616003990173,grad_norm: 0.9999990890077856, iteration: 89578
loss: 1.0098698139190674,grad_norm: 0.9999994626225651, iteration: 89579
loss: 1.0209290981292725,grad_norm: 0.9999990861956896, iteration: 89580
loss: 1.0019702911376953,grad_norm: 0.9999993464056646, iteration: 89581
loss: 1.0035626888275146,grad_norm: 0.9999991445138857, iteration: 89582
loss: 1.0465093851089478,grad_norm: 0.9798563544100912, iteration: 89583
loss: 1.0118253231048584,grad_norm: 0.9999991420606028, iteration: 89584
loss: 1.0198190212249756,grad_norm: 0.9999991180505476, iteration: 89585
loss: 1.007283091545105,grad_norm: 0.9999992263849877, iteration: 89586
loss: 1.0020288228988647,grad_norm: 0.9999992090542729, iteration: 89587
loss: 0.9863346219062805,grad_norm: 0.999999098835246, iteration: 89588
loss: 1.0046968460083008,grad_norm: 0.9999990468994079, iteration: 89589
loss: 0.9674062132835388,grad_norm: 0.9999992312942225, iteration: 89590
loss: 1.0115164518356323,grad_norm: 0.9999990758307412, iteration: 89591
loss: 1.0015314817428589,grad_norm: 0.9999992464085298, iteration: 89592
loss: 1.0225390195846558,grad_norm: 0.9999995743194762, iteration: 89593
loss: 0.9882445335388184,grad_norm: 0.9999991639569165, iteration: 89594
loss: 0.9942747950553894,grad_norm: 0.978420696065993, iteration: 89595
loss: 0.9983922243118286,grad_norm: 0.9999992965766913, iteration: 89596
loss: 1.0040535926818848,grad_norm: 0.907240077462083, iteration: 89597
loss: 0.986268162727356,grad_norm: 0.9948678544820527, iteration: 89598
loss: 0.9592841267585754,grad_norm: 0.9884848728536677, iteration: 89599
loss: 1.0412627458572388,grad_norm: 0.9999993716988014, iteration: 89600
loss: 0.9845027923583984,grad_norm: 0.9999991878098704, iteration: 89601
loss: 0.9765805602073669,grad_norm: 0.9999991103904707, iteration: 89602
loss: 0.9758016467094421,grad_norm: 0.9999992618623351, iteration: 89603
loss: 1.0100629329681396,grad_norm: 0.961164721558301, iteration: 89604
loss: 1.03663969039917,grad_norm: 0.9999992588225767, iteration: 89605
loss: 1.011831283569336,grad_norm: 0.9563766451019348, iteration: 89606
loss: 0.9874042868614197,grad_norm: 0.9458816664127221, iteration: 89607
loss: 1.0117062330245972,grad_norm: 0.9544679969454798, iteration: 89608
loss: 0.9985161423683167,grad_norm: 0.8889942217499062, iteration: 89609
loss: 1.0090316534042358,grad_norm: 0.9648312130628631, iteration: 89610
loss: 1.0172638893127441,grad_norm: 0.8583658554489378, iteration: 89611
loss: 0.9744865894317627,grad_norm: 0.8740071563158609, iteration: 89612
loss: 1.013274908065796,grad_norm: 0.9513966683850884, iteration: 89613
loss: 1.0264850854873657,grad_norm: 0.9999990593691239, iteration: 89614
loss: 1.0030840635299683,grad_norm: 0.9933716553975661, iteration: 89615
loss: 0.9851943850517273,grad_norm: 0.9999997938547112, iteration: 89616
loss: 1.0110523700714111,grad_norm: 0.9835036000707044, iteration: 89617
loss: 1.004632830619812,grad_norm: 0.9999992089312739, iteration: 89618
loss: 1.00308096408844,grad_norm: 0.9330414665209714, iteration: 89619
loss: 0.9956227540969849,grad_norm: 0.9999991644002907, iteration: 89620
loss: 1.0018126964569092,grad_norm: 0.9999991221535544, iteration: 89621
loss: 0.9913334250450134,grad_norm: 0.999999175505385, iteration: 89622
loss: 1.0175448656082153,grad_norm: 0.9999990952734961, iteration: 89623
loss: 1.0054757595062256,grad_norm: 0.9999990989371411, iteration: 89624
loss: 0.97187739610672,grad_norm: 0.9594391615566482, iteration: 89625
loss: 0.9931110143661499,grad_norm: 0.9999995027750775, iteration: 89626
loss: 1.0063494443893433,grad_norm: 0.9857910716325993, iteration: 89627
loss: 1.005307912826538,grad_norm: 0.9999992121431168, iteration: 89628
loss: 1.0005826950073242,grad_norm: 0.9999991822095349, iteration: 89629
loss: 0.9803693294525146,grad_norm: 0.999999135892882, iteration: 89630
loss: 1.059146761894226,grad_norm: 0.9999996775410388, iteration: 89631
loss: 0.9858642816543579,grad_norm: 0.9999992023603725, iteration: 89632
loss: 1.0005446672439575,grad_norm: 0.949225268092855, iteration: 89633
loss: 1.0185060501098633,grad_norm: 0.9999995092090332, iteration: 89634
loss: 0.9806816577911377,grad_norm: 0.9999990195295705, iteration: 89635
loss: 1.0087202787399292,grad_norm: 0.9999991231297387, iteration: 89636
loss: 1.0193049907684326,grad_norm: 0.9999990844721509, iteration: 89637
loss: 1.0275388956069946,grad_norm: 0.9999993182176063, iteration: 89638
loss: 1.0092226266860962,grad_norm: 0.9133616797529013, iteration: 89639
loss: 1.084791898727417,grad_norm: 0.9999990152034732, iteration: 89640
loss: 1.0473392009735107,grad_norm: 0.9999995909082333, iteration: 89641
loss: 0.9726096987724304,grad_norm: 0.9721603452342746, iteration: 89642
loss: 1.073999285697937,grad_norm: 0.9999998737218408, iteration: 89643
loss: 1.0132191181182861,grad_norm: 0.9999990356451776, iteration: 89644
loss: 0.9925327897071838,grad_norm: 0.8577069192097818, iteration: 89645
loss: 0.9842842221260071,grad_norm: 0.9046474014561537, iteration: 89646
loss: 1.018854022026062,grad_norm: 0.9999993720739575, iteration: 89647
loss: 1.0145756006240845,grad_norm: 0.9999994692821854, iteration: 89648
loss: 0.9921320080757141,grad_norm: 0.9819383486703138, iteration: 89649
loss: 1.0229521989822388,grad_norm: 0.851831691037691, iteration: 89650
loss: 0.994269609451294,grad_norm: 0.99999910922012, iteration: 89651
loss: 1.0690299272537231,grad_norm: 0.9999996182094416, iteration: 89652
loss: 1.0808075666427612,grad_norm: 0.9999991100177965, iteration: 89653
loss: 1.001771092414856,grad_norm: 0.9999993645822556, iteration: 89654
loss: 0.9849458932876587,grad_norm: 0.9999994747627227, iteration: 89655
loss: 0.9641662836074829,grad_norm: 0.971913169140666, iteration: 89656
loss: 0.9937993288040161,grad_norm: 0.9999991507316849, iteration: 89657
loss: 0.9959123730659485,grad_norm: 0.9964824566856488, iteration: 89658
loss: 1.0066280364990234,grad_norm: 0.9999990686958815, iteration: 89659
loss: 1.0519717931747437,grad_norm: 0.9999992298650475, iteration: 89660
loss: 1.026118278503418,grad_norm: 0.9999992068770241, iteration: 89661
loss: 1.004702091217041,grad_norm: 0.9999989206397355, iteration: 89662
loss: 0.9938962459564209,grad_norm: 0.999999210988494, iteration: 89663
loss: 0.9744767546653748,grad_norm: 0.9590844746346309, iteration: 89664
loss: 0.977613091468811,grad_norm: 0.9389904249138162, iteration: 89665
loss: 0.9869207739830017,grad_norm: 0.9999996050836394, iteration: 89666
loss: 0.9965944886207581,grad_norm: 0.9999993052368787, iteration: 89667
loss: 0.9763815402984619,grad_norm: 0.9029156557494732, iteration: 89668
loss: 1.007198452949524,grad_norm: 0.9999990672792675, iteration: 89669
loss: 0.9799579977989197,grad_norm: 0.9999991614959602, iteration: 89670
loss: 1.003859519958496,grad_norm: 0.9538512587292967, iteration: 89671
loss: 1.0327565670013428,grad_norm: 0.999999551692249, iteration: 89672
loss: 1.0401082038879395,grad_norm: 0.9999994567353506, iteration: 89673
loss: 0.9943450689315796,grad_norm: 0.9634043479191725, iteration: 89674
loss: 0.9776611328125,grad_norm: 0.9999991395443851, iteration: 89675
loss: 1.0314524173736572,grad_norm: 0.9794832698439553, iteration: 89676
loss: 1.014420509338379,grad_norm: 0.9999996622017635, iteration: 89677
loss: 0.9826809167861938,grad_norm: 0.999999241077989, iteration: 89678
loss: 0.9874129295349121,grad_norm: 0.9999993709772045, iteration: 89679
loss: 0.997628927230835,grad_norm: 0.9999989578286933, iteration: 89680
loss: 1.0211856365203857,grad_norm: 0.9999993745727489, iteration: 89681
loss: 1.0030184984207153,grad_norm: 0.9999990345984043, iteration: 89682
loss: 1.0089763402938843,grad_norm: 0.9773678271939539, iteration: 89683
loss: 0.9934796094894409,grad_norm: 0.999999225513825, iteration: 89684
loss: 1.0031659603118896,grad_norm: 0.99999928342517, iteration: 89685
loss: 0.9803640842437744,grad_norm: 0.9999991350768715, iteration: 89686
loss: 0.9668452143669128,grad_norm: 0.9999990939798098, iteration: 89687
loss: 1.0048909187316895,grad_norm: 0.9803706314142657, iteration: 89688
loss: 1.0266480445861816,grad_norm: 0.9999997853083576, iteration: 89689
loss: 0.9933817386627197,grad_norm: 0.9999992665221851, iteration: 89690
loss: 0.9825780391693115,grad_norm: 0.9489709073169964, iteration: 89691
loss: 0.9993966221809387,grad_norm: 0.7786226593193991, iteration: 89692
loss: 0.9549038410186768,grad_norm: 0.9999992138003383, iteration: 89693
loss: 0.9836095571517944,grad_norm: 0.9999993192945112, iteration: 89694
loss: 0.9498152732849121,grad_norm: 0.9765912362584814, iteration: 89695
loss: 1.0121220350265503,grad_norm: 0.9999991874696361, iteration: 89696
loss: 0.9902555346488953,grad_norm: 0.9895949332139303, iteration: 89697
loss: 1.0083378553390503,grad_norm: 0.9999992938754135, iteration: 89698
loss: 0.9974546432495117,grad_norm: 0.9999988983876927, iteration: 89699
loss: 0.9924346208572388,grad_norm: 0.9999991733673116, iteration: 89700
loss: 1.0028430223464966,grad_norm: 0.9572473991528115, iteration: 89701
loss: 1.0239176750183105,grad_norm: 0.9999989737581425, iteration: 89702
loss: 0.9795362949371338,grad_norm: 0.9999993389543917, iteration: 89703
loss: 0.9799954295158386,grad_norm: 0.9999991883830984, iteration: 89704
loss: 1.0785523653030396,grad_norm: 0.9999992940242376, iteration: 89705
loss: 1.0063037872314453,grad_norm: 0.9999992708713393, iteration: 89706
loss: 0.9728882908821106,grad_norm: 0.9999990631910801, iteration: 89707
loss: 0.9797008633613586,grad_norm: 0.8873856061589683, iteration: 89708
loss: 0.9804236888885498,grad_norm: 0.9999991651288139, iteration: 89709
loss: 0.9843116402626038,grad_norm: 0.9956946002705066, iteration: 89710
loss: 1.0008050203323364,grad_norm: 0.9761812512232246, iteration: 89711
loss: 0.9789512753486633,grad_norm: 0.999999184386754, iteration: 89712
loss: 1.0284374952316284,grad_norm: 0.90631285788922, iteration: 89713
loss: 1.023215651512146,grad_norm: 0.9999997553647848, iteration: 89714
loss: 0.9734625220298767,grad_norm: 0.9999993596307329, iteration: 89715
loss: 0.9921121001243591,grad_norm: 0.975231574707109, iteration: 89716
loss: 0.9946416020393372,grad_norm: 0.9999991152393841, iteration: 89717
loss: 0.9888834953308105,grad_norm: 0.9560423099778123, iteration: 89718
loss: 1.0090906620025635,grad_norm: 0.9999990985379987, iteration: 89719
loss: 1.025174617767334,grad_norm: 0.9999992015211805, iteration: 89720
loss: 0.9808345437049866,grad_norm: 0.9172435174399308, iteration: 89721
loss: 0.9856531620025635,grad_norm: 0.8982654602752388, iteration: 89722
loss: 1.0167908668518066,grad_norm: 0.947362652716012, iteration: 89723
loss: 1.0315971374511719,grad_norm: 0.9999992146622928, iteration: 89724
loss: 0.9743383526802063,grad_norm: 0.9864106479648458, iteration: 89725
loss: 0.9917567372322083,grad_norm: 0.9999990738506398, iteration: 89726
loss: 0.9811618328094482,grad_norm: 0.9414958227950969, iteration: 89727
loss: 0.9809460639953613,grad_norm: 0.9999989941046274, iteration: 89728
loss: 1.0303441286087036,grad_norm: 0.9742198800483078, iteration: 89729
loss: 1.0154640674591064,grad_norm: 0.9999995998439718, iteration: 89730
loss: 0.9928923845291138,grad_norm: 0.968386494837295, iteration: 89731
loss: 0.9719802737236023,grad_norm: 0.9999990286392366, iteration: 89732
loss: 1.0197386741638184,grad_norm: 0.9037727235905112, iteration: 89733
loss: 1.0134838819503784,grad_norm: 0.975309020861083, iteration: 89734
loss: 1.0080351829528809,grad_norm: 0.9999990830202089, iteration: 89735
loss: 1.0296108722686768,grad_norm: 0.9999998943136827, iteration: 89736
loss: 0.9631097912788391,grad_norm: 0.999999284376467, iteration: 89737
loss: 1.015734314918518,grad_norm: 0.9999992436156392, iteration: 89738
loss: 0.9935442805290222,grad_norm: 0.9337596840635047, iteration: 89739
loss: 0.9880055785179138,grad_norm: 0.9999992288076339, iteration: 89740
loss: 0.9974110126495361,grad_norm: 0.9173876150465039, iteration: 89741
loss: 1.0862102508544922,grad_norm: 0.9999995443413224, iteration: 89742
loss: 1.0367443561553955,grad_norm: 0.9999996425021065, iteration: 89743
loss: 0.9871340990066528,grad_norm: 0.9452643024231988, iteration: 89744
loss: 1.024880051612854,grad_norm: 0.9999993478867468, iteration: 89745
loss: 1.0135494470596313,grad_norm: 0.999999097373788, iteration: 89746
loss: 0.9909384250640869,grad_norm: 0.9359383583590078, iteration: 89747
loss: 0.9615917205810547,grad_norm: 0.9999991207364803, iteration: 89748
loss: 1.034366488456726,grad_norm: 0.9999992109778026, iteration: 89749
loss: 1.00958251953125,grad_norm: 0.9999992008324867, iteration: 89750
loss: 0.9874769449234009,grad_norm: 0.9999991239914814, iteration: 89751
loss: 0.9755887389183044,grad_norm: 0.9999992414691729, iteration: 89752
loss: 0.975236177444458,grad_norm: 0.9375768100264368, iteration: 89753
loss: 1.0438064336776733,grad_norm: 0.9999991079661797, iteration: 89754
loss: 1.081316351890564,grad_norm: 0.999999047596173, iteration: 89755
loss: 1.0089720487594604,grad_norm: 0.9999997515730292, iteration: 89756
loss: 1.0107216835021973,grad_norm: 0.8614218054941771, iteration: 89757
loss: 1.0017807483673096,grad_norm: 0.9999997629323814, iteration: 89758
loss: 0.9586483836174011,grad_norm: 0.9999991367171094, iteration: 89759
loss: 0.9786237478256226,grad_norm: 0.8966818832340364, iteration: 89760
loss: 0.9847915768623352,grad_norm: 0.9999989738120799, iteration: 89761
loss: 0.9642685055732727,grad_norm: 0.9999991762604852, iteration: 89762
loss: 0.9984134435653687,grad_norm: 0.9999991185538878, iteration: 89763
loss: 1.009142279624939,grad_norm: 0.9579525785407325, iteration: 89764
loss: 0.9795016646385193,grad_norm: 0.9697854468638394, iteration: 89765
loss: 0.9529904127120972,grad_norm: 0.9999990845119358, iteration: 89766
loss: 0.9586509466171265,grad_norm: 0.9367243346727068, iteration: 89767
loss: 1.0293916463851929,grad_norm: 0.9999992344058701, iteration: 89768
loss: 1.0073330402374268,grad_norm: 0.9999989666029064, iteration: 89769
loss: 0.9947343468666077,grad_norm: 0.9999990274469744, iteration: 89770
loss: 0.9978840947151184,grad_norm: 0.9293901267559843, iteration: 89771
loss: 1.0020514726638794,grad_norm: 0.9999990423584162, iteration: 89772
loss: 1.0030514001846313,grad_norm: 0.9999989880647998, iteration: 89773
loss: 0.9754945635795593,grad_norm: 0.9999991005972726, iteration: 89774
loss: 1.0210398435592651,grad_norm: 0.9999992513890175, iteration: 89775
loss: 1.0214033126831055,grad_norm: 0.9999993610368507, iteration: 89776
loss: 1.0185688734054565,grad_norm: 0.9999990124636052, iteration: 89777
loss: 0.9930760264396667,grad_norm: 0.9999989613924649, iteration: 89778
loss: 1.012307047843933,grad_norm: 0.9999990107057499, iteration: 89779
loss: 0.9583402872085571,grad_norm: 0.9999993344015665, iteration: 89780
loss: 0.9932941794395447,grad_norm: 0.9424027446392825, iteration: 89781
loss: 1.0145460367202759,grad_norm: 0.9999991307606759, iteration: 89782
loss: 1.0274291038513184,grad_norm: 0.9999994884790673, iteration: 89783
loss: 1.0739741325378418,grad_norm: 0.9999993436210786, iteration: 89784
loss: 1.1104187965393066,grad_norm: 0.999999274576675, iteration: 89785
loss: 0.9702128767967224,grad_norm: 0.999999112779258, iteration: 89786
loss: 1.0238910913467407,grad_norm: 0.9999998745259228, iteration: 89787
loss: 0.962702751159668,grad_norm: 0.9999991192709086, iteration: 89788
loss: 0.9955549240112305,grad_norm: 0.9999991195973136, iteration: 89789
loss: 1.0339860916137695,grad_norm: 0.9999992679018467, iteration: 89790
loss: 1.0188850164413452,grad_norm: 0.9999990098061566, iteration: 89791
loss: 1.0198287963867188,grad_norm: 0.9999990508110116, iteration: 89792
loss: 0.9831647276878357,grad_norm: 0.921499493845111, iteration: 89793
loss: 0.9560738205909729,grad_norm: 0.999999089959926, iteration: 89794
loss: 0.9841720461845398,grad_norm: 0.9999991743545271, iteration: 89795
loss: 0.9853314161300659,grad_norm: 0.9357196260953503, iteration: 89796
loss: 1.020490050315857,grad_norm: 0.9999993365322961, iteration: 89797
loss: 0.9829548001289368,grad_norm: 0.9554252155336422, iteration: 89798
loss: 1.0039629936218262,grad_norm: 0.8949571618461754, iteration: 89799
loss: 0.9959511160850525,grad_norm: 0.999999177782193, iteration: 89800
loss: 0.9883812665939331,grad_norm: 0.9999989704543111, iteration: 89801
loss: 0.980670690536499,grad_norm: 0.9716849688718461, iteration: 89802
loss: 0.9675239324569702,grad_norm: 0.9999991661348765, iteration: 89803
loss: 1.0063459873199463,grad_norm: 0.9636358925228976, iteration: 89804
loss: 1.0168120861053467,grad_norm: 0.9266769733202874, iteration: 89805
loss: 0.9943369030952454,grad_norm: 0.9999991279278654, iteration: 89806
loss: 1.0167391300201416,grad_norm: 0.9999991714285883, iteration: 89807
loss: 0.9953564405441284,grad_norm: 0.9999990430284339, iteration: 89808
loss: 1.015942931175232,grad_norm: 0.9999991761408001, iteration: 89809
loss: 1.0099223852157593,grad_norm: 0.9999992918488757, iteration: 89810
loss: 1.0025662183761597,grad_norm: 0.9827196792909629, iteration: 89811
loss: 1.0079317092895508,grad_norm: 0.9607164584360401, iteration: 89812
loss: 1.0213669538497925,grad_norm: 0.9208823258626775, iteration: 89813
loss: 0.9831048250198364,grad_norm: 0.9999990252078121, iteration: 89814
loss: 0.9857478737831116,grad_norm: 0.9999991768926895, iteration: 89815
loss: 1.0289297103881836,grad_norm: 0.9999989985204929, iteration: 89816
loss: 1.0424087047576904,grad_norm: 0.9999996285373024, iteration: 89817
loss: 1.0130079984664917,grad_norm: 0.8315301106400106, iteration: 89818
loss: 1.0487128496170044,grad_norm: 0.9999991712328444, iteration: 89819
loss: 0.9908213019371033,grad_norm: 0.9999992747155494, iteration: 89820
loss: 0.9679768085479736,grad_norm: 0.9999992515542532, iteration: 89821
loss: 0.9956066608428955,grad_norm: 0.9634462253446164, iteration: 89822
loss: 1.024155616760254,grad_norm: 0.9999991061399351, iteration: 89823
loss: 1.0584543943405151,grad_norm: 0.9999993437426509, iteration: 89824
loss: 0.9672820568084717,grad_norm: 0.9999990005925732, iteration: 89825
loss: 1.0175656080245972,grad_norm: 0.9999994798365698, iteration: 89826
loss: 0.9521869421005249,grad_norm: 0.9339816484529335, iteration: 89827
loss: 1.0089709758758545,grad_norm: 0.9091332952225473, iteration: 89828
loss: 0.9591320753097534,grad_norm: 0.9999991401112499, iteration: 89829
loss: 1.0751725435256958,grad_norm: 0.9999995153536836, iteration: 89830
loss: 0.9785573482513428,grad_norm: 0.8753084666184361, iteration: 89831
loss: 1.0053683519363403,grad_norm: 0.999999369347588, iteration: 89832
loss: 1.00143301486969,grad_norm: 0.9975461735411306, iteration: 89833
loss: 1.0127253532409668,grad_norm: 0.9665211326650172, iteration: 89834
loss: 1.0180442333221436,grad_norm: 0.9145509885404279, iteration: 89835
loss: 1.0045799016952515,grad_norm: 0.9999991551308753, iteration: 89836
loss: 0.9794450402259827,grad_norm: 0.999999112916125, iteration: 89837
loss: 0.9938015341758728,grad_norm: 0.9999991551903927, iteration: 89838
loss: 0.9659517407417297,grad_norm: 0.9999992930382194, iteration: 89839
loss: 1.040673851966858,grad_norm: 0.9999992682177347, iteration: 89840
loss: 1.0024937391281128,grad_norm: 0.9851011455170023, iteration: 89841
loss: 1.0393705368041992,grad_norm: 0.9646508478122187, iteration: 89842
loss: 0.9843066334724426,grad_norm: 0.9999989371791463, iteration: 89843
loss: 0.99322509765625,grad_norm: 0.9868973345766751, iteration: 89844
loss: 1.0250695943832397,grad_norm: 0.9999991024916154, iteration: 89845
loss: 1.0076285600662231,grad_norm: 0.9396201609318333, iteration: 89846
loss: 0.9985294342041016,grad_norm: 0.9999991263312935, iteration: 89847
loss: 1.0252701044082642,grad_norm: 0.8756381199116773, iteration: 89848
loss: 1.0450974702835083,grad_norm: 0.9999991465039506, iteration: 89849
loss: 1.002349615097046,grad_norm: 0.9999992511586765, iteration: 89850
loss: 0.9375893473625183,grad_norm: 0.8371912183024094, iteration: 89851
loss: 0.9987233281135559,grad_norm: 0.9999991518678458, iteration: 89852
loss: 0.9978967308998108,grad_norm: 0.9999992931008215, iteration: 89853
loss: 1.0001744031906128,grad_norm: 0.9999989790232391, iteration: 89854
loss: 1.0335266590118408,grad_norm: 0.9999993327879173, iteration: 89855
loss: 0.9871711134910583,grad_norm: 0.9999991670486056, iteration: 89856
loss: 1.0238178968429565,grad_norm: 0.9999992316562676, iteration: 89857
loss: 0.9936399459838867,grad_norm: 0.9525418324902872, iteration: 89858
loss: 1.0187658071517944,grad_norm: 0.9999992134254273, iteration: 89859
loss: 0.9884822964668274,grad_norm: 0.9999992872338996, iteration: 89860
loss: 0.9846816062927246,grad_norm: 0.999999102344297, iteration: 89861
loss: 0.9984970092773438,grad_norm: 0.9999991587536317, iteration: 89862
loss: 1.0069020986557007,grad_norm: 0.999999145678281, iteration: 89863
loss: 1.0070933103561401,grad_norm: 0.9999991879539853, iteration: 89864
loss: 0.9750679731369019,grad_norm: 0.9523073156308169, iteration: 89865
loss: 1.000622034072876,grad_norm: 0.9460292648571033, iteration: 89866
loss: 1.0005048513412476,grad_norm: 0.9111245501722697, iteration: 89867
loss: 0.9968416094779968,grad_norm: 0.9999990652312681, iteration: 89868
loss: 1.0086402893066406,grad_norm: 0.9466259673654244, iteration: 89869
loss: 0.989872395992279,grad_norm: 0.8714156863967062, iteration: 89870
loss: 1.036367654800415,grad_norm: 0.9999990506957469, iteration: 89871
loss: 1.071946620941162,grad_norm: 0.9999995963912376, iteration: 89872
loss: 1.0508862733840942,grad_norm: 0.999999049389198, iteration: 89873
loss: 0.949022114276886,grad_norm: 0.9999990803328213, iteration: 89874
loss: 0.9826802015304565,grad_norm: 0.999998960780454, iteration: 89875
loss: 1.0094618797302246,grad_norm: 0.9907645441850986, iteration: 89876
loss: 1.0161343812942505,grad_norm: 0.9999990431656497, iteration: 89877
loss: 1.0180763006210327,grad_norm: 0.9999991773132043, iteration: 89878
loss: 0.972717821598053,grad_norm: 0.9999990679351004, iteration: 89879
loss: 1.0386182069778442,grad_norm: 0.9999996117610072, iteration: 89880
loss: 1.0196589231491089,grad_norm: 0.9999989840939719, iteration: 89881
loss: 0.9754526019096375,grad_norm: 0.9999991350304224, iteration: 89882
loss: 0.9427341222763062,grad_norm: 0.9999990644611648, iteration: 89883
loss: 0.9946422576904297,grad_norm: 0.9999991724508024, iteration: 89884
loss: 1.0052438974380493,grad_norm: 0.993348801671314, iteration: 89885
loss: 0.998188316822052,grad_norm: 0.9999990412916949, iteration: 89886
loss: 1.022208571434021,grad_norm: 0.9526821551560052, iteration: 89887
loss: 0.9836054444313049,grad_norm: 0.999999136192406, iteration: 89888
loss: 1.05551278591156,grad_norm: 0.9999995864957342, iteration: 89889
loss: 0.9901425838470459,grad_norm: 0.999999155901467, iteration: 89890
loss: 1.005947232246399,grad_norm: 0.9999990419367267, iteration: 89891
loss: 1.0322908163070679,grad_norm: 0.999999215662492, iteration: 89892
loss: 1.0273689031600952,grad_norm: 0.9999991305129085, iteration: 89893
loss: 1.024142861366272,grad_norm: 0.9804482737835324, iteration: 89894
loss: 0.982318639755249,grad_norm: 0.9999991401139788, iteration: 89895
loss: 1.0734161138534546,grad_norm: 0.9999997325397613, iteration: 89896
loss: 1.0382047891616821,grad_norm: 0.999999025644743, iteration: 89897
loss: 0.9766610264778137,grad_norm: 0.9999992297060627, iteration: 89898
loss: 1.0153614282608032,grad_norm: 0.9264533345069871, iteration: 89899
loss: 1.0089575052261353,grad_norm: 0.9999991884228161, iteration: 89900
loss: 0.9931691884994507,grad_norm: 0.9999990627844791, iteration: 89901
loss: 0.9659674763679504,grad_norm: 0.9999991720303542, iteration: 89902
loss: 1.0099737644195557,grad_norm: 0.9999992421759812, iteration: 89903
loss: 1.0071461200714111,grad_norm: 0.9999992057317423, iteration: 89904
loss: 0.9695246815681458,grad_norm: 0.999998967451893, iteration: 89905
loss: 1.0049649477005005,grad_norm: 0.9133813481558893, iteration: 89906
loss: 1.0139639377593994,grad_norm: 0.9999991983570369, iteration: 89907
loss: 1.0398273468017578,grad_norm: 0.9999994771449366, iteration: 89908
loss: 0.9747665524482727,grad_norm: 0.9999992570845702, iteration: 89909
loss: 0.9830199480056763,grad_norm: 0.99999890662134, iteration: 89910
loss: 0.9713169932365417,grad_norm: 0.9615256996189893, iteration: 89911
loss: 1.033377766609192,grad_norm: 0.9677748212720804, iteration: 89912
loss: 1.0408003330230713,grad_norm: 0.999999249594821, iteration: 89913
loss: 1.013049602508545,grad_norm: 0.999999715179326, iteration: 89914
loss: 0.9990902543067932,grad_norm: 0.9999992099338929, iteration: 89915
loss: 0.9826329350471497,grad_norm: 0.9445150880466064, iteration: 89916
loss: 1.0207397937774658,grad_norm: 0.9999991902694804, iteration: 89917
loss: 0.9784149527549744,grad_norm: 0.9999990748270353, iteration: 89918
loss: 0.9769598245620728,grad_norm: 0.9999991473451981, iteration: 89919
loss: 0.9947032332420349,grad_norm: 0.9999990840282573, iteration: 89920
loss: 0.9986538290977478,grad_norm: 0.893026066308274, iteration: 89921
loss: 1.033272385597229,grad_norm: 0.9999990290708447, iteration: 89922
loss: 1.0320385694503784,grad_norm: 0.9999991218256813, iteration: 89923
loss: 0.9786039590835571,grad_norm: 0.9999991561342659, iteration: 89924
loss: 1.0386323928833008,grad_norm: 0.9999991406475681, iteration: 89925
loss: 1.018877387046814,grad_norm: 0.9999991595400101, iteration: 89926
loss: 1.0043545961380005,grad_norm: 0.9999990630063541, iteration: 89927
loss: 0.984300434589386,grad_norm: 0.9999990328117406, iteration: 89928
loss: 1.0181217193603516,grad_norm: 0.9999990777544351, iteration: 89929
loss: 1.0059936046600342,grad_norm: 0.9999992706790686, iteration: 89930
loss: 1.0010597705841064,grad_norm: 0.9934236982544887, iteration: 89931
loss: 1.035916805267334,grad_norm: 0.9999997075526701, iteration: 89932
loss: 1.000485897064209,grad_norm: 0.8929479199273876, iteration: 89933
loss: 1.0138828754425049,grad_norm: 0.9100729923630864, iteration: 89934
loss: 1.0177044868469238,grad_norm: 0.8429857749489071, iteration: 89935
loss: 1.0095574855804443,grad_norm: 0.9999991201516221, iteration: 89936
loss: 0.9983897805213928,grad_norm: 0.9999990496461819, iteration: 89937
loss: 1.0023725032806396,grad_norm: 0.9999991778318824, iteration: 89938
loss: 0.9918602705001831,grad_norm: 0.9999990508498416, iteration: 89939
loss: 1.043073296546936,grad_norm: 0.8670856352073101, iteration: 89940
loss: 0.9542520046234131,grad_norm: 0.9999992200552577, iteration: 89941
loss: 0.973878800868988,grad_norm: 0.974983909169709, iteration: 89942
loss: 1.0020897388458252,grad_norm: 0.9999992131570187, iteration: 89943
loss: 1.013582468032837,grad_norm: 0.9999991804809708, iteration: 89944
loss: 1.0294002294540405,grad_norm: 0.999999117819889, iteration: 89945
loss: 0.9963682293891907,grad_norm: 0.9152697188810793, iteration: 89946
loss: 1.0703502893447876,grad_norm: 0.9999996868818376, iteration: 89947
loss: 0.9791604280471802,grad_norm: 0.9980579801438274, iteration: 89948
loss: 0.9883043766021729,grad_norm: 0.9999991384170496, iteration: 89949
loss: 1.0509227514266968,grad_norm: 0.9999994550110873, iteration: 89950
loss: 0.9804211258888245,grad_norm: 0.999999081029243, iteration: 89951
loss: 0.9786567091941833,grad_norm: 0.9999990867021048, iteration: 89952
loss: 1.0280630588531494,grad_norm: 0.9264285875411797, iteration: 89953
loss: 1.0111979246139526,grad_norm: 0.9698685657409721, iteration: 89954
loss: 1.0111985206604004,grad_norm: 0.9999990426627547, iteration: 89955
loss: 0.9908210039138794,grad_norm: 0.9490148833439015, iteration: 89956
loss: 1.0391062498092651,grad_norm: 0.9365035993884651, iteration: 89957
loss: 1.017043113708496,grad_norm: 0.9999990555767179, iteration: 89958
loss: 1.0003955364227295,grad_norm: 0.9780401499141186, iteration: 89959
loss: 1.0228594541549683,grad_norm: 0.9368199787126587, iteration: 89960
loss: 1.0058116912841797,grad_norm: 0.9699261092307816, iteration: 89961
loss: 0.9196183085441589,grad_norm: 0.9999991796025882, iteration: 89962
loss: 1.0073398351669312,grad_norm: 0.9999992450094083, iteration: 89963
loss: 0.9905810356140137,grad_norm: 0.9903042789820405, iteration: 89964
loss: 1.0153474807739258,grad_norm: 0.9999989950089926, iteration: 89965
loss: 0.9726722240447998,grad_norm: 0.9999989901631678, iteration: 89966
loss: 0.9907805919647217,grad_norm: 0.9999992306289911, iteration: 89967
loss: 1.0022344589233398,grad_norm: 0.999999076185372, iteration: 89968
loss: 0.9746087789535522,grad_norm: 0.9999990163864831, iteration: 89969
loss: 1.0126134157180786,grad_norm: 0.9999991186587145, iteration: 89970
loss: 0.9881961345672607,grad_norm: 0.9322075313068348, iteration: 89971
loss: 0.9883340001106262,grad_norm: 0.9366820611530591, iteration: 89972
loss: 0.9906191825866699,grad_norm: 0.9279800923815771, iteration: 89973
loss: 0.9916168451309204,grad_norm: 0.9999991949290098, iteration: 89974
loss: 0.9794531464576721,grad_norm: 0.9999991148444065, iteration: 89975
loss: 1.0377649068832397,grad_norm: 0.8674832149114332, iteration: 89976
loss: 1.0068360567092896,grad_norm: 0.9532673288282674, iteration: 89977
loss: 0.9742324352264404,grad_norm: 0.9999991075931937, iteration: 89978
loss: 1.019083857536316,grad_norm: 0.9999991024719663, iteration: 89979
loss: 1.007749319076538,grad_norm: 0.9999991010491661, iteration: 89980
loss: 1.0084128379821777,grad_norm: 0.9999990875257627, iteration: 89981
loss: 1.0051190853118896,grad_norm: 0.9174684747541836, iteration: 89982
loss: 1.0284172296524048,grad_norm: 0.9999997074395336, iteration: 89983
loss: 0.984067440032959,grad_norm: 0.9999992042338438, iteration: 89984
loss: 0.9976534247398376,grad_norm: 0.9999990444839231, iteration: 89985
loss: 0.990230143070221,grad_norm: 0.9538935698454879, iteration: 89986
loss: 1.0148777961730957,grad_norm: 0.9999991734192405, iteration: 89987
loss: 1.0131736993789673,grad_norm: 0.8977486927789122, iteration: 89988
loss: 0.9986783862113953,grad_norm: 0.9999992687280104, iteration: 89989
loss: 1.0296415090560913,grad_norm: 0.999999126978634, iteration: 89990
loss: 1.0077790021896362,grad_norm: 0.9999990138075953, iteration: 89991
loss: 0.9929254055023193,grad_norm: 0.9763381309760942, iteration: 89992
loss: 0.9636491537094116,grad_norm: 0.9999990600840389, iteration: 89993
loss: 1.0096759796142578,grad_norm: 0.9999991568251353, iteration: 89994
loss: 1.0077637434005737,grad_norm: 0.9999991234128572, iteration: 89995
loss: 1.0096299648284912,grad_norm: 0.9999991612575895, iteration: 89996
loss: 0.9901438355445862,grad_norm: 0.9999992237414959, iteration: 89997
loss: 1.0248216390609741,grad_norm: 0.999999294459054, iteration: 89998
loss: 1.0058376789093018,grad_norm: 0.9492592680284285, iteration: 89999
loss: 1.0462123155593872,grad_norm: 0.9999990485855587, iteration: 90000
Evaluating at step 90000
{'val': 0.9954276587814093, 'test': 2.1945327718417866}
loss: 0.9966423511505127,grad_norm: 0.9999992535719121, iteration: 90001
loss: 0.9697644114494324,grad_norm: 0.9999991045226461, iteration: 90002
loss: 1.0068747997283936,grad_norm: 0.9319704507111946, iteration: 90003
loss: 1.0206295251846313,grad_norm: 0.9765841103419418, iteration: 90004
loss: 1.0248318910598755,grad_norm: 0.9999994196371623, iteration: 90005
loss: 0.9940899014472961,grad_norm: 0.9968613182079942, iteration: 90006
loss: 1.007351040840149,grad_norm: 0.9999990494533526, iteration: 90007
loss: 1.0181190967559814,grad_norm: 0.9227677120415999, iteration: 90008
loss: 1.0335184335708618,grad_norm: 0.9999991584289236, iteration: 90009
loss: 1.0176030397415161,grad_norm: 0.8781808673966929, iteration: 90010
loss: 1.042479157447815,grad_norm: 0.9999992398927724, iteration: 90011
loss: 1.0289074182510376,grad_norm: 0.9999992845147821, iteration: 90012
loss: 1.0056791305541992,grad_norm: 0.9999993282761503, iteration: 90013
loss: 0.9740614295005798,grad_norm: 0.9999991977088374, iteration: 90014
loss: 1.0145286321640015,grad_norm: 0.9999990802680733, iteration: 90015
loss: 0.9861037135124207,grad_norm: 0.9907361262862152, iteration: 90016
loss: 1.044211983680725,grad_norm: 0.9999997336828418, iteration: 90017
loss: 1.0314940214157104,grad_norm: 0.9999991218740694, iteration: 90018
loss: 0.9795976877212524,grad_norm: 0.9421071431167442, iteration: 90019
loss: 1.0077623128890991,grad_norm: 0.9999990093787063, iteration: 90020
loss: 0.9991743564605713,grad_norm: 0.9999991216929814, iteration: 90021
loss: 1.0043329000473022,grad_norm: 0.9499278914522125, iteration: 90022
loss: 0.9955573081970215,grad_norm: 0.9999990761119826, iteration: 90023
loss: 1.0273141860961914,grad_norm: 0.9907153985011793, iteration: 90024
loss: 0.9678081274032593,grad_norm: 0.9999992103561864, iteration: 90025
loss: 1.0388545989990234,grad_norm: 0.9872498425490381, iteration: 90026
loss: 0.984089732170105,grad_norm: 0.9999989968330243, iteration: 90027
loss: 0.9833964705467224,grad_norm: 0.9683594163418062, iteration: 90028
loss: 0.9908463954925537,grad_norm: 0.9999990016453078, iteration: 90029
loss: 0.9570158123970032,grad_norm: 0.9193151789004665, iteration: 90030
loss: 0.9923620820045471,grad_norm: 0.9386083020663775, iteration: 90031
loss: 0.9799532890319824,grad_norm: 0.9999991387409097, iteration: 90032
loss: 1.0343260765075684,grad_norm: 0.9999999177820786, iteration: 90033
loss: 0.996299684047699,grad_norm: 0.9999991170022012, iteration: 90034
loss: 0.9732010364532471,grad_norm: 0.9999991619369853, iteration: 90035
loss: 0.997652530670166,grad_norm: 0.9999991344580007, iteration: 90036
loss: 0.9677271246910095,grad_norm: 0.8913698959431515, iteration: 90037
loss: 0.9939373731613159,grad_norm: 0.8393040647286735, iteration: 90038
loss: 1.0197659730911255,grad_norm: 0.967552165260224, iteration: 90039
loss: 1.0040818452835083,grad_norm: 0.9999990532038582, iteration: 90040
loss: 1.0314704179763794,grad_norm: 0.9837993714864868, iteration: 90041
loss: 0.991596519947052,grad_norm: 0.9999989907755803, iteration: 90042
loss: 1.0331668853759766,grad_norm: 0.9999989965911809, iteration: 90043
loss: 0.9868748188018799,grad_norm: 0.9999991061268224, iteration: 90044
loss: 0.9767991304397583,grad_norm: 0.9999991052803074, iteration: 90045
loss: 0.9917778372764587,grad_norm: 0.9999991623687282, iteration: 90046
loss: 1.0202618837356567,grad_norm: 0.898003735932991, iteration: 90047
loss: 0.9737281203269958,grad_norm: 0.9999992228077713, iteration: 90048
loss: 0.9976527690887451,grad_norm: 0.999999097934085, iteration: 90049
loss: 0.9768863320350647,grad_norm: 0.9798055542915072, iteration: 90050
loss: 1.018259048461914,grad_norm: 0.8973916752541549, iteration: 90051
loss: 0.988619863986969,grad_norm: 0.9273089091302882, iteration: 90052
loss: 1.0346314907073975,grad_norm: 0.9999992176823288, iteration: 90053
loss: 1.0121781826019287,grad_norm: 0.9999989292519622, iteration: 90054
loss: 1.0209935903549194,grad_norm: 0.9684753248669171, iteration: 90055
loss: 1.002446174621582,grad_norm: 0.8985966125654516, iteration: 90056
loss: 1.023700475692749,grad_norm: 0.9999992587764723, iteration: 90057
loss: 1.0436867475509644,grad_norm: 0.9999989965141134, iteration: 90058
loss: 1.0282033681869507,grad_norm: 0.9120257635946732, iteration: 90059
loss: 0.9901612997055054,grad_norm: 0.974837590034742, iteration: 90060
loss: 0.9837108254432678,grad_norm: 0.9557304803308801, iteration: 90061
loss: 1.023573637008667,grad_norm: 0.9999996714603429, iteration: 90062
loss: 0.9831817150115967,grad_norm: 0.9358251361610344, iteration: 90063
loss: 0.9969035387039185,grad_norm: 0.9931525308256954, iteration: 90064
loss: 1.013015627861023,grad_norm: 0.8775415194910181, iteration: 90065
loss: 0.9727342128753662,grad_norm: 0.9890983690644813, iteration: 90066
loss: 0.9904627203941345,grad_norm: 0.9999991188597749, iteration: 90067
loss: 0.985408365726471,grad_norm: 0.9999990946900532, iteration: 90068
loss: 0.9957560896873474,grad_norm: 0.9888984981194123, iteration: 90069
loss: 0.9971134662628174,grad_norm: 0.8943991678832155, iteration: 90070
loss: 0.9751439690589905,grad_norm: 0.9190763893508608, iteration: 90071
loss: 1.0153838396072388,grad_norm: 0.9999991844956374, iteration: 90072
loss: 0.9814023971557617,grad_norm: 0.9999990683138067, iteration: 90073
loss: 1.0272198915481567,grad_norm: 0.9999990666155494, iteration: 90074
loss: 1.0266884565353394,grad_norm: 0.9999990459099095, iteration: 90075
loss: 1.0082148313522339,grad_norm: 0.9967186597899017, iteration: 90076
loss: 1.014767050743103,grad_norm: 0.978340025325711, iteration: 90077
loss: 1.021448016166687,grad_norm: 0.9414002285809396, iteration: 90078
loss: 1.0150238275527954,grad_norm: 0.9999999150468797, iteration: 90079
loss: 1.0350793600082397,grad_norm: 0.9999998856353587, iteration: 90080
loss: 1.0039949417114258,grad_norm: 0.9440362687686871, iteration: 90081
loss: 0.982326328754425,grad_norm: 0.9999991964114626, iteration: 90082
loss: 1.0037516355514526,grad_norm: 0.999999095382652, iteration: 90083
loss: 1.0676542520523071,grad_norm: 0.9999998795213276, iteration: 90084
loss: 1.0063823461532593,grad_norm: 0.9999991170009521, iteration: 90085
loss: 0.9953585267066956,grad_norm: 0.8794675801237289, iteration: 90086
loss: 0.9958491325378418,grad_norm: 0.9381331694069265, iteration: 90087
loss: 1.026079773902893,grad_norm: 0.9999991037890472, iteration: 90088
loss: 0.9952763915061951,grad_norm: 0.932480632120864, iteration: 90089
loss: 0.9800269603729248,grad_norm: 0.9999991223750672, iteration: 90090
loss: 1.0130788087844849,grad_norm: 0.8920825857911302, iteration: 90091
loss: 0.9657639265060425,grad_norm: 0.9999992296966762, iteration: 90092
loss: 0.9824901819229126,grad_norm: 0.9999992386510759, iteration: 90093
loss: 0.9979024529457092,grad_norm: 0.9999991074061512, iteration: 90094
loss: 1.00514817237854,grad_norm: 0.9999991497438666, iteration: 90095
loss: 0.9731451272964478,grad_norm: 0.9999990958360674, iteration: 90096
loss: 1.0246870517730713,grad_norm: 0.9999993372341645, iteration: 90097
loss: 1.0271618366241455,grad_norm: 0.999999047434512, iteration: 90098
loss: 0.9726279377937317,grad_norm: 0.8907440107399588, iteration: 90099
loss: 1.0284440517425537,grad_norm: 0.9999989864064447, iteration: 90100
loss: 1.058207631111145,grad_norm: 0.9999990328838089, iteration: 90101
loss: 1.0617023706436157,grad_norm: 0.9999995740913084, iteration: 90102
loss: 1.008227825164795,grad_norm: 0.9999989954428697, iteration: 90103
loss: 1.0934500694274902,grad_norm: 0.9999997094175174, iteration: 90104
loss: 1.065340280532837,grad_norm: 0.9999991825720007, iteration: 90105
loss: 0.996491551399231,grad_norm: 0.999999127710554, iteration: 90106
loss: 0.9723653197288513,grad_norm: 0.9999991468060156, iteration: 90107
loss: 0.9931740164756775,grad_norm: 0.9999990727317319, iteration: 90108
loss: 1.0094248056411743,grad_norm: 0.9554460238562735, iteration: 90109
loss: 0.9832971096038818,grad_norm: 0.8667502193056693, iteration: 90110
loss: 0.9847646951675415,grad_norm: 0.9999990370534523, iteration: 90111
loss: 1.0596150159835815,grad_norm: 0.9999997005430943, iteration: 90112
loss: 1.010849118232727,grad_norm: 0.9585485006460597, iteration: 90113
loss: 0.9723235368728638,grad_norm: 0.9523969992933063, iteration: 90114
loss: 1.014938235282898,grad_norm: 0.9999993336520249, iteration: 90115
loss: 0.9832977056503296,grad_norm: 0.9960265442905809, iteration: 90116
loss: 0.9618539810180664,grad_norm: 0.9999990105534493, iteration: 90117
loss: 1.0578410625457764,grad_norm: 0.9999990712621611, iteration: 90118
loss: 0.9853982925415039,grad_norm: 0.8427150207428953, iteration: 90119
loss: 0.9582783579826355,grad_norm: 0.9999990393053292, iteration: 90120
loss: 1.0072587728500366,grad_norm: 0.9999992748703257, iteration: 90121
loss: 1.0380101203918457,grad_norm: 0.9999991502099237, iteration: 90122
loss: 1.0050047636032104,grad_norm: 0.9999991424660697, iteration: 90123
loss: 0.9856219291687012,grad_norm: 0.9999991570784201, iteration: 90124
loss: 1.0069293975830078,grad_norm: 0.999999178132602, iteration: 90125
loss: 1.0036070346832275,grad_norm: 0.9973473480563642, iteration: 90126
loss: 1.0175532102584839,grad_norm: 0.9999993067944682, iteration: 90127
loss: 0.9751399159431458,grad_norm: 0.9999991338604004, iteration: 90128
loss: 0.9968199133872986,grad_norm: 0.900307904106541, iteration: 90129
loss: 1.0283585786819458,grad_norm: 0.9495186480192814, iteration: 90130
loss: 1.0016529560089111,grad_norm: 0.9999997849326253, iteration: 90131
loss: 0.9882419109344482,grad_norm: 0.9999992077632525, iteration: 90132
loss: 1.0386462211608887,grad_norm: 0.9999998401955488, iteration: 90133
loss: 1.0231090784072876,grad_norm: 0.9999993655367414, iteration: 90134
loss: 0.9887968301773071,grad_norm: 0.9333250724431499, iteration: 90135
loss: 0.9982585310935974,grad_norm: 0.9999991157374657, iteration: 90136
loss: 1.033197283744812,grad_norm: 0.9999999100197564, iteration: 90137
loss: 0.9978046417236328,grad_norm: 0.9999991157695475, iteration: 90138
loss: 0.9711150527000427,grad_norm: 0.93990686087569, iteration: 90139
loss: 0.9909909963607788,grad_norm: 0.9999991814874555, iteration: 90140
loss: 0.9711039066314697,grad_norm: 0.89368356521711, iteration: 90141
loss: 0.9858251810073853,grad_norm: 0.9999991988006304, iteration: 90142
loss: 1.0117082595825195,grad_norm: 0.9774248519096949, iteration: 90143
loss: 0.9517061710357666,grad_norm: 0.999999172387169, iteration: 90144
loss: 1.0090779066085815,grad_norm: 0.9769265312097324, iteration: 90145
loss: 1.0124372243881226,grad_norm: 0.999999056910315, iteration: 90146
loss: 1.0447795391082764,grad_norm: 0.9999994635901446, iteration: 90147
loss: 1.0008747577667236,grad_norm: 0.9687137801797977, iteration: 90148
loss: 1.0261952877044678,grad_norm: 0.9999996100959747, iteration: 90149
loss: 1.0188409090042114,grad_norm: 0.9999991864925745, iteration: 90150
loss: 1.008016586303711,grad_norm: 0.9999989548978606, iteration: 90151
loss: 0.9869933724403381,grad_norm: 0.9999990699854641, iteration: 90152
loss: 1.0109013319015503,grad_norm: 0.9999990405269873, iteration: 90153
loss: 0.9761386513710022,grad_norm: 0.9999993374376446, iteration: 90154
loss: 1.0404258966445923,grad_norm: 0.9999991357530482, iteration: 90155
loss: 1.0205068588256836,grad_norm: 0.9184080789021077, iteration: 90156
loss: 1.015473484992981,grad_norm: 0.9953213107927414, iteration: 90157
loss: 1.0132406949996948,grad_norm: 0.9999993693817397, iteration: 90158
loss: 1.0017006397247314,grad_norm: 0.9999990485193566, iteration: 90159
loss: 1.0032005310058594,grad_norm: 0.9999991963435219, iteration: 90160
loss: 0.9435899257659912,grad_norm: 0.9999991641933724, iteration: 90161
loss: 0.9935541749000549,grad_norm: 0.9999992538211062, iteration: 90162
loss: 1.009914517402649,grad_norm: 0.9999992517677144, iteration: 90163
loss: 1.0133116245269775,grad_norm: 0.9999996667284776, iteration: 90164
loss: 0.9964243173599243,grad_norm: 0.9446595256036041, iteration: 90165
loss: 1.0232981443405151,grad_norm: 0.999999043505392, iteration: 90166
loss: 0.9825807213783264,grad_norm: 0.9999989805994839, iteration: 90167
loss: 1.0187383890151978,grad_norm: 0.9999992112623796, iteration: 90168
loss: 1.0017088651657104,grad_norm: 0.9999990542741736, iteration: 90169
loss: 1.0032011270523071,grad_norm: 0.9999993586021566, iteration: 90170
loss: 0.961987316608429,grad_norm: 0.9188111880911326, iteration: 90171
loss: 1.0264253616333008,grad_norm: 0.9999992131081595, iteration: 90172
loss: 1.0480430126190186,grad_norm: 0.9999992571536204, iteration: 90173
loss: 0.9916470646858215,grad_norm: 0.9999990017443919, iteration: 90174
loss: 0.9625236988067627,grad_norm: 0.9999992055034698, iteration: 90175
loss: 1.0135046243667603,grad_norm: 0.9999990926322557, iteration: 90176
loss: 0.9732052683830261,grad_norm: 0.9999991931556389, iteration: 90177
loss: 0.9845727682113647,grad_norm: 0.9238028732945436, iteration: 90178
loss: 1.0102678537368774,grad_norm: 0.9999991860287014, iteration: 90179
loss: 0.996185302734375,grad_norm: 0.9999991172176278, iteration: 90180
loss: 0.9894437789916992,grad_norm: 0.9999990694359617, iteration: 90181
loss: 0.9989082217216492,grad_norm: 0.9999990138803335, iteration: 90182
loss: 1.0178996324539185,grad_norm: 0.9999990433904747, iteration: 90183
loss: 0.9925791621208191,grad_norm: 0.9999989852383012, iteration: 90184
loss: 0.9878087639808655,grad_norm: 0.96388393588732, iteration: 90185
loss: 0.9895612597465515,grad_norm: 0.9999991685434918, iteration: 90186
loss: 0.9641438722610474,grad_norm: 0.9999991992716146, iteration: 90187
loss: 1.0030015707015991,grad_norm: 0.999999052212472, iteration: 90188
loss: 0.9570978283882141,grad_norm: 0.9999991343490452, iteration: 90189
loss: 0.9923338294029236,grad_norm: 0.999999206920653, iteration: 90190
loss: 1.0206023454666138,grad_norm: 0.9999992377149819, iteration: 90191
loss: 0.9709786176681519,grad_norm: 0.9999990751321, iteration: 90192
loss: 1.048760175704956,grad_norm: 0.9999989580284384, iteration: 90193
loss: 0.9977808594703674,grad_norm: 0.9091496924811778, iteration: 90194
loss: 1.0139164924621582,grad_norm: 0.9275118308317587, iteration: 90195
loss: 1.0193310976028442,grad_norm: 0.8916497098283419, iteration: 90196
loss: 1.0263231992721558,grad_norm: 0.9999991494081056, iteration: 90197
loss: 1.0134390592575073,grad_norm: 0.999999206376896, iteration: 90198
loss: 0.9862174391746521,grad_norm: 0.9999990407360494, iteration: 90199
loss: 0.978527307510376,grad_norm: 0.9999991978906528, iteration: 90200
loss: 1.0202685594558716,grad_norm: 0.9999992244751049, iteration: 90201
loss: 1.0230450630187988,grad_norm: 0.9231989938947833, iteration: 90202
loss: 1.003060221672058,grad_norm: 0.9999992128812557, iteration: 90203
loss: 0.9792057871818542,grad_norm: 0.9999992052332571, iteration: 90204
loss: 0.9490116834640503,grad_norm: 0.9137927951826438, iteration: 90205
loss: 1.0315263271331787,grad_norm: 0.9999991056049486, iteration: 90206
loss: 0.9820237159729004,grad_norm: 0.9999992545334399, iteration: 90207
loss: 0.97853684425354,grad_norm: 0.9968711147111035, iteration: 90208
loss: 1.0280556678771973,grad_norm: 0.9999991588745523, iteration: 90209
loss: 1.002441167831421,grad_norm: 0.999999264088384, iteration: 90210
loss: 0.969174861907959,grad_norm: 0.8799377898899687, iteration: 90211
loss: 0.9721591472625732,grad_norm: 0.907643164352133, iteration: 90212
loss: 0.9971035718917847,grad_norm: 0.9786666481229854, iteration: 90213
loss: 1.006377100944519,grad_norm: 0.9999993085348046, iteration: 90214
loss: 0.9980493783950806,grad_norm: 0.8666579824842099, iteration: 90215
loss: 0.9850356578826904,grad_norm: 0.9072499046445264, iteration: 90216
loss: 0.9791086912155151,grad_norm: 0.8827265903639442, iteration: 90217
loss: 1.0098835229873657,grad_norm: 0.9999989664403286, iteration: 90218
loss: 0.9761229753494263,grad_norm: 0.8434856899969503, iteration: 90219
loss: 0.973844051361084,grad_norm: 0.8520996932941476, iteration: 90220
loss: 1.040318250656128,grad_norm: 0.9999991588853009, iteration: 90221
loss: 0.9983604550361633,grad_norm: 0.9999996217688105, iteration: 90222
loss: 0.961742639541626,grad_norm: 0.8545462935387586, iteration: 90223
loss: 1.0288716554641724,grad_norm: 0.9999991469232481, iteration: 90224
loss: 1.01383376121521,grad_norm: 0.9859156707324886, iteration: 90225
loss: 1.0137174129486084,grad_norm: 0.9999993438540699, iteration: 90226
loss: 1.0090196132659912,grad_norm: 0.9051954067658052, iteration: 90227
loss: 1.0301592350006104,grad_norm: 0.9999990143275476, iteration: 90228
loss: 0.9977728724479675,grad_norm: 0.9069772350847568, iteration: 90229
loss: 0.9976270198822021,grad_norm: 0.9381019585814345, iteration: 90230
loss: 0.9892570972442627,grad_norm: 0.9999992371086607, iteration: 90231
loss: 1.0115538835525513,grad_norm: 0.993948876476789, iteration: 90232
loss: 1.0026434659957886,grad_norm: 0.9999993045991471, iteration: 90233
loss: 0.9985159635543823,grad_norm: 0.9147295744482035, iteration: 90234
loss: 0.9672555923461914,grad_norm: 0.9999989643420606, iteration: 90235
loss: 1.0137311220169067,grad_norm: 0.9999989857202384, iteration: 90236
loss: 1.001214623451233,grad_norm: 0.9999991203748678, iteration: 90237
loss: 0.9743101596832275,grad_norm: 0.9999990935639335, iteration: 90238
loss: 1.0086432695388794,grad_norm: 0.8444372856194894, iteration: 90239
loss: 0.9859092831611633,grad_norm: 0.9999993407942254, iteration: 90240
loss: 1.001296043395996,grad_norm: 0.9999992285374214, iteration: 90241
loss: 0.985717236995697,grad_norm: 0.9999991630115769, iteration: 90242
loss: 1.004995584487915,grad_norm: 0.9999991358976367, iteration: 90243
loss: 1.042328953742981,grad_norm: 0.9990159310071842, iteration: 90244
loss: 0.9978564381599426,grad_norm: 0.9999990881669787, iteration: 90245
loss: 0.9495583176612854,grad_norm: 0.9019606406321771, iteration: 90246
loss: 1.0024441480636597,grad_norm: 0.9999990696440484, iteration: 90247
loss: 0.9920602440834045,grad_norm: 0.9376704677410944, iteration: 90248
loss: 0.985932469367981,grad_norm: 0.9999989725657582, iteration: 90249
loss: 0.9916790127754211,grad_norm: 0.9999992318635015, iteration: 90250
loss: 1.0158394575119019,grad_norm: 0.9999990212296407, iteration: 90251
loss: 1.022140383720398,grad_norm: 0.9999994519167518, iteration: 90252
loss: 0.9920989871025085,grad_norm: 0.9332346114455032, iteration: 90253
loss: 1.011467695236206,grad_norm: 0.9999991842233142, iteration: 90254
loss: 0.9840153455734253,grad_norm: 0.9999991465241402, iteration: 90255
loss: 0.9670295119285583,grad_norm: 0.9777110400090417, iteration: 90256
loss: 0.9681956768035889,grad_norm: 0.999999186933703, iteration: 90257
loss: 0.9932469725608826,grad_norm: 0.9999990191565115, iteration: 90258
loss: 0.9356988072395325,grad_norm: 0.9999990060441107, iteration: 90259
loss: 0.9810342788696289,grad_norm: 0.9471390717425083, iteration: 90260
loss: 1.0107935667037964,grad_norm: 0.9874589832274905, iteration: 90261
loss: 0.9639676213264465,grad_norm: 0.9070518698926585, iteration: 90262
loss: 1.006753921508789,grad_norm: 0.9999991803618101, iteration: 90263
loss: 1.0260145664215088,grad_norm: 0.999999181800658, iteration: 90264
loss: 0.9934288859367371,grad_norm: 0.9999990934742355, iteration: 90265
loss: 1.0212587118148804,grad_norm: 0.9158102807664793, iteration: 90266
loss: 1.012010931968689,grad_norm: 0.986295235070206, iteration: 90267
loss: 1.0255550146102905,grad_norm: 0.9999990003286269, iteration: 90268
loss: 1.0040745735168457,grad_norm: 0.8581797314661745, iteration: 90269
loss: 1.0113531351089478,grad_norm: 0.9999991002170417, iteration: 90270
loss: 1.0136868953704834,grad_norm: 0.9412286971371419, iteration: 90271
loss: 0.9831559062004089,grad_norm: 0.9999989943025052, iteration: 90272
loss: 1.0041128396987915,grad_norm: 0.9999990372417449, iteration: 90273
loss: 0.9729058146476746,grad_norm: 0.9999991810536166, iteration: 90274
loss: 0.9877563118934631,grad_norm: 0.9999991468099526, iteration: 90275
loss: 0.9709075689315796,grad_norm: 0.999998989845169, iteration: 90276
loss: 0.971159815788269,grad_norm: 0.9643513608216181, iteration: 90277
loss: 1.034773349761963,grad_norm: 0.9999991267995167, iteration: 90278
loss: 1.0008004903793335,grad_norm: 0.9999992863733647, iteration: 90279
loss: 1.019569754600525,grad_norm: 0.9999990688635244, iteration: 90280
loss: 1.0167248249053955,grad_norm: 0.9999989971600863, iteration: 90281
loss: 0.9574931859970093,grad_norm: 0.9536986019902542, iteration: 90282
loss: 0.9818952679634094,grad_norm: 0.9999990921753139, iteration: 90283
loss: 1.0053938627243042,grad_norm: 0.8393378005917598, iteration: 90284
loss: 1.033412218093872,grad_norm: 0.999999115825595, iteration: 90285
loss: 1.0096967220306396,grad_norm: 0.9460733202004219, iteration: 90286
loss: 1.0303760766983032,grad_norm: 0.9999991440283111, iteration: 90287
loss: 0.9984346628189087,grad_norm: 0.9865776945315502, iteration: 90288
loss: 0.998423159122467,grad_norm: 0.9825714783204552, iteration: 90289
loss: 0.9956893920898438,grad_norm: 0.99999904409291, iteration: 90290
loss: 1.0151023864746094,grad_norm: 0.9999992064821068, iteration: 90291
loss: 1.004015326499939,grad_norm: 0.9999991917685349, iteration: 90292
loss: 1.031199336051941,grad_norm: 0.985367506751171, iteration: 90293
loss: 1.0101211071014404,grad_norm: 0.9999997065515396, iteration: 90294
loss: 0.9797534942626953,grad_norm: 0.9999994381102958, iteration: 90295
loss: 0.9608790278434753,grad_norm: 0.9999992023080994, iteration: 90296
loss: 0.9733931422233582,grad_norm: 0.9133665925203504, iteration: 90297
loss: 1.0112950801849365,grad_norm: 0.9999990432028086, iteration: 90298
loss: 1.0257282257080078,grad_norm: 0.999999508563614, iteration: 90299
loss: 0.9772459268569946,grad_norm: 0.9775892196985593, iteration: 90300
loss: 0.979842483997345,grad_norm: 0.9999989280883255, iteration: 90301
loss: 1.024900197982788,grad_norm: 0.9999992696064118, iteration: 90302
loss: 0.982567310333252,grad_norm: 0.9999991181641777, iteration: 90303
loss: 1.022450566291809,grad_norm: 0.9999989874865602, iteration: 90304
loss: 0.9941397905349731,grad_norm: 0.9999990571867309, iteration: 90305
loss: 0.9930939078330994,grad_norm: 0.9959783581091007, iteration: 90306
loss: 1.0164860486984253,grad_norm: 0.9969156755798356, iteration: 90307
loss: 0.976962685585022,grad_norm: 0.971370545726823, iteration: 90308
loss: 1.020093321800232,grad_norm: 0.9317968216707104, iteration: 90309
loss: 0.9906461238861084,grad_norm: 0.8667643191163263, iteration: 90310
loss: 0.9974003434181213,grad_norm: 0.8907094842022018, iteration: 90311
loss: 0.988568902015686,grad_norm: 0.9999992265754667, iteration: 90312
loss: 1.038879156112671,grad_norm: 0.9999999486545125, iteration: 90313
loss: 0.9905130863189697,grad_norm: 0.9999992107000022, iteration: 90314
loss: 0.9532246589660645,grad_norm: 0.9999990474362587, iteration: 90315
loss: 1.0144131183624268,grad_norm: 0.9999992011222444, iteration: 90316
loss: 0.9776061773300171,grad_norm: 0.9999992269228923, iteration: 90317
loss: 0.9862174391746521,grad_norm: 0.9767840689454519, iteration: 90318
loss: 0.9724960327148438,grad_norm: 0.9999991093914989, iteration: 90319
loss: 1.0082721710205078,grad_norm: 0.9999992554469906, iteration: 90320
loss: 0.9783434867858887,grad_norm: 0.9999990674785422, iteration: 90321
loss: 0.9667096138000488,grad_norm: 0.9999992063026706, iteration: 90322
loss: 0.9350759387016296,grad_norm: 0.9999989695207697, iteration: 90323
loss: 1.029937744140625,grad_norm: 0.9999990764555194, iteration: 90324
loss: 1.0022425651550293,grad_norm: 0.9999991063103238, iteration: 90325
loss: 1.0308057069778442,grad_norm: 0.9999992062156331, iteration: 90326
loss: 0.9734455347061157,grad_norm: 0.9640061305649881, iteration: 90327
loss: 0.9763874411582947,grad_norm: 0.9999992508981294, iteration: 90328
loss: 0.9599772691726685,grad_norm: 0.9999991046703731, iteration: 90329
loss: 1.021000623703003,grad_norm: 0.9999991471652175, iteration: 90330
loss: 1.0069419145584106,grad_norm: 0.9999991483894658, iteration: 90331
loss: 0.9981677532196045,grad_norm: 0.9999991654535105, iteration: 90332
loss: 1.0241978168487549,grad_norm: 0.999999196859573, iteration: 90333
loss: 1.0144379138946533,grad_norm: 0.9623184603877873, iteration: 90334
loss: 0.9950600862503052,grad_norm: 0.9999992039906143, iteration: 90335
loss: 1.0169857740402222,grad_norm: 0.9999991871768885, iteration: 90336
loss: 1.0083571672439575,grad_norm: 0.9984091858696624, iteration: 90337
loss: 1.008420467376709,grad_norm: 0.967582044198216, iteration: 90338
loss: 0.9657507538795471,grad_norm: 0.8074330404514605, iteration: 90339
loss: 0.9846846461296082,grad_norm: 0.9999990999912896, iteration: 90340
loss: 0.9796181917190552,grad_norm: 0.9819831430250114, iteration: 90341
loss: 1.0184552669525146,grad_norm: 0.9999991155687288, iteration: 90342
loss: 1.0183104276657104,grad_norm: 0.9999992367507021, iteration: 90343
loss: 1.013041615486145,grad_norm: 0.9999989242414475, iteration: 90344
loss: 1.0182478427886963,grad_norm: 0.9383499963379421, iteration: 90345
loss: 1.0040768384933472,grad_norm: 0.9612396502413058, iteration: 90346
loss: 1.0105952024459839,grad_norm: 0.9999990307267151, iteration: 90347
loss: 0.9955451488494873,grad_norm: 0.9874085385096474, iteration: 90348
loss: 1.0309761762619019,grad_norm: 0.9999991788294993, iteration: 90349
loss: 1.0004726648330688,grad_norm: 0.9999991138452254, iteration: 90350
loss: 1.0117887258529663,grad_norm: 0.999999402455257, iteration: 90351
loss: 0.9886332750320435,grad_norm: 0.9201344327121375, iteration: 90352
loss: 0.9587751030921936,grad_norm: 0.9999992186427497, iteration: 90353
loss: 0.9977129697799683,grad_norm: 0.88525083423837, iteration: 90354
loss: 1.0065041780471802,grad_norm: 0.9999992315836531, iteration: 90355
loss: 0.9924076795578003,grad_norm: 0.8360026421201292, iteration: 90356
loss: 1.0137786865234375,grad_norm: 0.9945336146884004, iteration: 90357
loss: 0.9660355448722839,grad_norm: 0.859020335014082, iteration: 90358
loss: 0.9862324595451355,grad_norm: 0.9999990117577877, iteration: 90359
loss: 0.9914826154708862,grad_norm: 0.9999993567259614, iteration: 90360
loss: 0.9937293529510498,grad_norm: 0.8961199405365967, iteration: 90361
loss: 1.020701289176941,grad_norm: 0.9999992681590394, iteration: 90362
loss: 0.9772217273712158,grad_norm: 0.9999991104149621, iteration: 90363
loss: 1.0022927522659302,grad_norm: 0.999999178603741, iteration: 90364
loss: 0.9793955683708191,grad_norm: 0.9999992087030003, iteration: 90365
loss: 0.9508845210075378,grad_norm: 0.9689735427857464, iteration: 90366
loss: 0.9989311695098877,grad_norm: 0.9599752845018191, iteration: 90367
loss: 0.9841711521148682,grad_norm: 0.9620722721180814, iteration: 90368
loss: 0.9947289228439331,grad_norm: 0.9999993018246452, iteration: 90369
loss: 1.074548602104187,grad_norm: 0.999999945759104, iteration: 90370
loss: 1.0340043306350708,grad_norm: 0.9999998342866152, iteration: 90371
loss: 0.9970685839653015,grad_norm: 0.957984432068511, iteration: 90372
loss: 1.0083926916122437,grad_norm: 0.9759434898851036, iteration: 90373
loss: 1.0384531021118164,grad_norm: 0.9999995797483975, iteration: 90374
loss: 0.9157113432884216,grad_norm: 0.9737817722991786, iteration: 90375
loss: 0.9884803891181946,grad_norm: 0.9691630382084212, iteration: 90376
loss: 0.9866325259208679,grad_norm: 0.9999991001098507, iteration: 90377
loss: 1.102144479751587,grad_norm: 0.9999991508589349, iteration: 90378
loss: 1.0478345155715942,grad_norm: 0.9999994016324107, iteration: 90379
loss: 0.9949199557304382,grad_norm: 0.8807138665860815, iteration: 90380
loss: 1.0439666509628296,grad_norm: 0.9999991878316471, iteration: 90381
loss: 1.0297588109970093,grad_norm: 0.9999989154023253, iteration: 90382
loss: 0.9800630211830139,grad_norm: 0.9999996172988845, iteration: 90383
loss: 0.9748207926750183,grad_norm: 0.999999304077523, iteration: 90384
loss: 0.9752057790756226,grad_norm: 0.9999992921546286, iteration: 90385
loss: 0.99797123670578,grad_norm: 0.9999991077707391, iteration: 90386
loss: 1.0558428764343262,grad_norm: 0.9225829191599578, iteration: 90387
loss: 0.994399905204773,grad_norm: 0.9400617283154304, iteration: 90388
loss: 1.0433337688446045,grad_norm: 0.9999992500173294, iteration: 90389
loss: 1.0722930431365967,grad_norm: 0.9999993728196311, iteration: 90390
loss: 1.1863936185836792,grad_norm: 0.9999993328896684, iteration: 90391
loss: 1.0175814628601074,grad_norm: 0.9999989996526667, iteration: 90392
loss: 1.0201327800750732,grad_norm: 0.9999992372888874, iteration: 90393
loss: 0.9897042512893677,grad_norm: 0.9999991251443416, iteration: 90394
loss: 1.0060045719146729,grad_norm: 0.9999991011298512, iteration: 90395
loss: 1.0429482460021973,grad_norm: 0.9999990852292802, iteration: 90396
loss: 1.0785411596298218,grad_norm: 0.9999993079142395, iteration: 90397
loss: 1.0070021152496338,grad_norm: 0.9999990304947638, iteration: 90398
loss: 1.016001582145691,grad_norm: 0.999999131550913, iteration: 90399
loss: 0.9895090460777283,grad_norm: 0.9711587674501091, iteration: 90400
loss: 1.0401301383972168,grad_norm: 0.9999990435724042, iteration: 90401
loss: 1.0537199974060059,grad_norm: 0.9999999545570646, iteration: 90402
loss: 0.977557361125946,grad_norm: 0.9999992854598394, iteration: 90403
loss: 1.0132261514663696,grad_norm: 0.9999993117120797, iteration: 90404
loss: 0.9910259246826172,grad_norm: 0.999999289383105, iteration: 90405
loss: 1.0564683675765991,grad_norm: 0.9999998109350116, iteration: 90406
loss: 0.986431896686554,grad_norm: 0.9999989606867921, iteration: 90407
loss: 0.9579343795776367,grad_norm: 0.9999990702856775, iteration: 90408
loss: 0.9908419847488403,grad_norm: 0.9999991409544047, iteration: 90409
loss: 1.0061522722244263,grad_norm: 0.9999990664115135, iteration: 90410
loss: 1.003053903579712,grad_norm: 0.9999990868821687, iteration: 90411
loss: 1.0014582872390747,grad_norm: 0.9999991561264504, iteration: 90412
loss: 0.9944551587104797,grad_norm: 0.9430636673796965, iteration: 90413
loss: 1.0031483173370361,grad_norm: 0.9999990754155463, iteration: 90414
loss: 0.9527894854545593,grad_norm: 0.9999992273553573, iteration: 90415
loss: 1.0016313791275024,grad_norm: 0.9999991407625677, iteration: 90416
loss: 0.9948632121086121,grad_norm: 0.999999122657313, iteration: 90417
loss: 1.029475212097168,grad_norm: 0.9371981529812884, iteration: 90418
loss: 0.9444848895072937,grad_norm: 0.99999905244582, iteration: 90419
loss: 1.0042741298675537,grad_norm: 0.9999992162942466, iteration: 90420
loss: 0.9591591358184814,grad_norm: 0.9999991100161246, iteration: 90421
loss: 0.9520734548568726,grad_norm: 0.8981461041314432, iteration: 90422
loss: 1.0232436656951904,grad_norm: 0.9999990304670828, iteration: 90423
loss: 0.9731127023696899,grad_norm: 0.9999990233438694, iteration: 90424
loss: 0.9636931419372559,grad_norm: 0.9999990558015693, iteration: 90425
loss: 1.0444116592407227,grad_norm: 0.9999991086629986, iteration: 90426
loss: 1.0080125331878662,grad_norm: 0.9999992442558282, iteration: 90427
loss: 1.0137958526611328,grad_norm: 0.9999991293650968, iteration: 90428
loss: 1.00105619430542,grad_norm: 0.9999991150719203, iteration: 90429
loss: 1.0050536394119263,grad_norm: 0.9999991749237243, iteration: 90430
loss: 0.968059778213501,grad_norm: 0.999999142207863, iteration: 90431
loss: 1.034372329711914,grad_norm: 0.9999992257396377, iteration: 90432
loss: 0.9993948340415955,grad_norm: 0.999999086694134, iteration: 90433
loss: 0.9745380282402039,grad_norm: 0.9999995333882802, iteration: 90434
loss: 1.0045720338821411,grad_norm: 0.9999990443649568, iteration: 90435
loss: 0.9958158731460571,grad_norm: 0.9999990102949701, iteration: 90436
loss: 1.0089564323425293,grad_norm: 0.9970254115569449, iteration: 90437
loss: 0.9852215051651001,grad_norm: 0.99999925947059, iteration: 90438
loss: 1.0051521062850952,grad_norm: 0.9999991458628998, iteration: 90439
loss: 0.9725008606910706,grad_norm: 0.9926606082115054, iteration: 90440
loss: 0.9809344410896301,grad_norm: 0.9999993047040702, iteration: 90441
loss: 1.009792447090149,grad_norm: 0.9999990880975462, iteration: 90442
loss: 0.9963643550872803,grad_norm: 0.9999990669560178, iteration: 90443
loss: 1.0023622512817383,grad_norm: 0.9999990591990653, iteration: 90444
loss: 0.9801889657974243,grad_norm: 0.8948439346245612, iteration: 90445
loss: 0.9875246286392212,grad_norm: 0.9575877387918472, iteration: 90446
loss: 1.0246105194091797,grad_norm: 0.9656942236015876, iteration: 90447
loss: 0.9845316410064697,grad_norm: 0.9999990100263438, iteration: 90448
loss: 1.039001226425171,grad_norm: 0.9999991910111611, iteration: 90449
loss: 1.0071064233779907,grad_norm: 0.9999992436176551, iteration: 90450
loss: 0.9959750771522522,grad_norm: 0.9999991765202626, iteration: 90451
loss: 1.0251777172088623,grad_norm: 0.9928416794077033, iteration: 90452
loss: 1.0430479049682617,grad_norm: 0.937068746614668, iteration: 90453
loss: 0.9975720643997192,grad_norm: 0.9999990762681031, iteration: 90454
loss: 0.9983302354812622,grad_norm: 0.9999991958112212, iteration: 90455
loss: 0.9989655613899231,grad_norm: 0.9999991215646757, iteration: 90456
loss: 1.0242866277694702,grad_norm: 0.9999990618040518, iteration: 90457
loss: 0.9813156127929688,grad_norm: 0.999999123776835, iteration: 90458
loss: 0.9768416285514832,grad_norm: 0.9999991069069095, iteration: 90459
loss: 1.01823890209198,grad_norm: 0.9999991323151743, iteration: 90460
loss: 1.0096757411956787,grad_norm: 0.9999991558318344, iteration: 90461
loss: 0.9615799188613892,grad_norm: 0.9999991378720087, iteration: 90462
loss: 0.9745779037475586,grad_norm: 0.8287195168167226, iteration: 90463
loss: 0.9662588834762573,grad_norm: 0.8181405041083112, iteration: 90464
loss: 0.9958537220954895,grad_norm: 0.9999991009672428, iteration: 90465
loss: 0.9960837960243225,grad_norm: 0.9999992680400435, iteration: 90466
loss: 0.9685475826263428,grad_norm: 0.9617032615371484, iteration: 90467
loss: 1.0171127319335938,grad_norm: 0.9999993559243595, iteration: 90468
loss: 0.9697635769844055,grad_norm: 0.9999992738470924, iteration: 90469
loss: 1.0021406412124634,grad_norm: 0.9999992028660561, iteration: 90470
loss: 0.9506176114082336,grad_norm: 0.9999991225665473, iteration: 90471
loss: 1.0011577606201172,grad_norm: 0.9999990601319816, iteration: 90472
loss: 1.0024772882461548,grad_norm: 0.9999992144725605, iteration: 90473
loss: 1.0055255889892578,grad_norm: 0.9751175775872372, iteration: 90474
loss: 1.0153316259384155,grad_norm: 0.9999990208161054, iteration: 90475
loss: 0.9956363439559937,grad_norm: 0.8901633853577025, iteration: 90476
loss: 1.0119115114212036,grad_norm: 0.9999991561967451, iteration: 90477
loss: 0.9779565930366516,grad_norm: 0.9999991191107223, iteration: 90478
loss: 1.0236307382583618,grad_norm: 0.999999068908242, iteration: 90479
loss: 1.007501482963562,grad_norm: 0.9999990714199662, iteration: 90480
loss: 1.019455909729004,grad_norm: 0.8446801473029666, iteration: 90481
loss: 0.9709134697914124,grad_norm: 0.9880947738351769, iteration: 90482
loss: 1.0212180614471436,grad_norm: 0.9999990414759252, iteration: 90483
loss: 0.9724486470222473,grad_norm: 0.9315201572587208, iteration: 90484
loss: 1.0339864492416382,grad_norm: 0.9999992227208191, iteration: 90485
loss: 0.9882169365882874,grad_norm: 0.9778908403543787, iteration: 90486
loss: 1.0451873540878296,grad_norm: 0.9999990402284983, iteration: 90487
loss: 0.9875507950782776,grad_norm: 0.9999989988785778, iteration: 90488
loss: 0.978103518486023,grad_norm: 0.9999992177226238, iteration: 90489
loss: 1.014388084411621,grad_norm: 0.904347131799016, iteration: 90490
loss: 1.0043632984161377,grad_norm: 0.9999990854350724, iteration: 90491
loss: 0.9979992508888245,grad_norm: 0.9999991064900362, iteration: 90492
loss: 1.0011318922042847,grad_norm: 0.9999990359645908, iteration: 90493
loss: 1.010089635848999,grad_norm: 0.9999994042523898, iteration: 90494
loss: 1.0374889373779297,grad_norm: 0.9999990720779088, iteration: 90495
loss: 1.1968750953674316,grad_norm: 1.000000089593857, iteration: 90496
loss: 1.0312275886535645,grad_norm: 0.9999993053790388, iteration: 90497
loss: 1.026415467262268,grad_norm: 0.9999990913664741, iteration: 90498
loss: 1.004881739616394,grad_norm: 0.9999992170903422, iteration: 90499
loss: 1.0389927625656128,grad_norm: 0.9798459187831562, iteration: 90500
loss: 1.0122077465057373,grad_norm: 0.9999991782399501, iteration: 90501
loss: 0.9934036731719971,grad_norm: 0.8426826537126437, iteration: 90502
loss: 1.0234277248382568,grad_norm: 0.9999992046077381, iteration: 90503
loss: 0.9810295701026917,grad_norm: 0.9393100480707137, iteration: 90504
loss: 1.040238380432129,grad_norm: 0.961853689732475, iteration: 90505
loss: 1.0249364376068115,grad_norm: 0.9999991737808533, iteration: 90506
loss: 1.0098215341567993,grad_norm: 0.999999050823112, iteration: 90507
loss: 1.0008838176727295,grad_norm: 0.9999990869003242, iteration: 90508
loss: 0.9523738622665405,grad_norm: 0.999999003636254, iteration: 90509
loss: 0.9976274371147156,grad_norm: 0.999999082879569, iteration: 90510
loss: 1.013772964477539,grad_norm: 0.9407162317369767, iteration: 90511
loss: 0.9879710078239441,grad_norm: 0.9554384732515204, iteration: 90512
loss: 1.0053151845932007,grad_norm: 0.9640945089195151, iteration: 90513
loss: 1.0025004148483276,grad_norm: 0.9999991249286667, iteration: 90514
loss: 0.9769206643104553,grad_norm: 0.9999991783368697, iteration: 90515
loss: 1.0299104452133179,grad_norm: 0.8405692232880059, iteration: 90516
loss: 0.9866016507148743,grad_norm: 0.9888078105752188, iteration: 90517
loss: 1.0105993747711182,grad_norm: 0.9747681111350438, iteration: 90518
loss: 0.9830074906349182,grad_norm: 0.9999991396653352, iteration: 90519
loss: 0.9565448760986328,grad_norm: 0.9999991161701519, iteration: 90520
loss: 0.9795580506324768,grad_norm: 0.9999990996098103, iteration: 90521
loss: 0.9775140285491943,grad_norm: 0.9999991966305581, iteration: 90522
loss: 0.9975204467773438,grad_norm: 0.9972634732158815, iteration: 90523
loss: 1.0153915882110596,grad_norm: 0.8206216679588079, iteration: 90524
loss: 1.0416508913040161,grad_norm: 0.8778730912407462, iteration: 90525
loss: 1.0001513957977295,grad_norm: 0.9999989937488653, iteration: 90526
loss: 1.0308380126953125,grad_norm: 0.8707980264059684, iteration: 90527
loss: 0.982499361038208,grad_norm: 0.9272977633240637, iteration: 90528
loss: 1.003738284111023,grad_norm: 0.9999992108225961, iteration: 90529
loss: 0.9811280369758606,grad_norm: 0.9999991348764743, iteration: 90530
loss: 0.9856183528900146,grad_norm: 0.9999989805484658, iteration: 90531
loss: 0.965205192565918,grad_norm: 0.9999992643215999, iteration: 90532
loss: 1.0230116844177246,grad_norm: 0.9999990311877355, iteration: 90533
loss: 1.015159249305725,grad_norm: 0.9999990289277979, iteration: 90534
loss: 1.000749945640564,grad_norm: 0.9999991593308093, iteration: 90535
loss: 1.0100080966949463,grad_norm: 0.9031789716972999, iteration: 90536
loss: 0.9907717704772949,grad_norm: 0.9632058520730835, iteration: 90537
loss: 1.0107018947601318,grad_norm: 0.9999990668862517, iteration: 90538
loss: 1.0203731060028076,grad_norm: 0.9999990129820562, iteration: 90539
loss: 1.0216461420059204,grad_norm: 0.9999991955212013, iteration: 90540
loss: 0.9938440918922424,grad_norm: 0.9611581589615048, iteration: 90541
loss: 0.9857895970344543,grad_norm: 0.9999992810036554, iteration: 90542
loss: 1.0060632228851318,grad_norm: 0.9999990694154918, iteration: 90543
loss: 0.9891811609268188,grad_norm: 0.9999990155073041, iteration: 90544
loss: 1.010627269744873,grad_norm: 0.9999990954204174, iteration: 90545
loss: 1.0236934423446655,grad_norm: 0.950543476928659, iteration: 90546
loss: 0.9451383948326111,grad_norm: 0.9999991320186938, iteration: 90547
loss: 1.0181763172149658,grad_norm: 0.8765722284849098, iteration: 90548
loss: 1.005684733390808,grad_norm: 0.9999993800409505, iteration: 90549
loss: 0.9919612407684326,grad_norm: 0.9999990068952417, iteration: 90550
loss: 0.9642455577850342,grad_norm: 0.9999991754773339, iteration: 90551
loss: 0.9569398164749146,grad_norm: 0.9343426803979313, iteration: 90552
loss: 0.9980546832084656,grad_norm: 0.9175937908348614, iteration: 90553
loss: 0.9531463384628296,grad_norm: 0.9999990158461649, iteration: 90554
loss: 1.0173754692077637,grad_norm: 0.9999991691933905, iteration: 90555
loss: 0.99845951795578,grad_norm: 0.9263491132547851, iteration: 90556
loss: 0.9799279570579529,grad_norm: 0.9999991985151359, iteration: 90557
loss: 1.0012295246124268,grad_norm: 0.9999997303210642, iteration: 90558
loss: 0.9824230670928955,grad_norm: 0.9999993256357225, iteration: 90559
loss: 1.0401912927627563,grad_norm: 0.9999992171516144, iteration: 90560
loss: 0.9456518292427063,grad_norm: 0.9839030813128185, iteration: 90561
loss: 1.0043851137161255,grad_norm: 0.999999171990085, iteration: 90562
loss: 1.0167710781097412,grad_norm: 0.9999992201956401, iteration: 90563
loss: 0.9828299880027771,grad_norm: 0.9999991082675886, iteration: 90564
loss: 1.0564560890197754,grad_norm: 0.9999993339601349, iteration: 90565
loss: 1.0531059503555298,grad_norm: 0.9999990735448931, iteration: 90566
loss: 1.0307340621948242,grad_norm: 0.9999991665284877, iteration: 90567
loss: 0.9659823179244995,grad_norm: 0.9999991927424979, iteration: 90568
loss: 0.9932283759117126,grad_norm: 0.9999989890140024, iteration: 90569
loss: 1.0190337896347046,grad_norm: 0.9611090853536838, iteration: 90570
loss: 0.980466902256012,grad_norm: 0.9827434838187274, iteration: 90571
loss: 1.0012880563735962,grad_norm: 0.9999992921063656, iteration: 90572
loss: 0.9859784245491028,grad_norm: 0.9999989321861722, iteration: 90573
loss: 1.0124300718307495,grad_norm: 0.9999992614093453, iteration: 90574
loss: 1.0299930572509766,grad_norm: 0.9999990692667708, iteration: 90575
loss: 1.002477765083313,grad_norm: 0.9652874598439618, iteration: 90576
loss: 1.017723560333252,grad_norm: 0.9231582727992665, iteration: 90577
loss: 1.0272495746612549,grad_norm: 0.999999033391897, iteration: 90578
loss: 1.0252448320388794,grad_norm: 0.999999262534557, iteration: 90579
loss: 0.9801448583602905,grad_norm: 0.999999183355282, iteration: 90580
loss: 0.9974001049995422,grad_norm: 0.9999994160334129, iteration: 90581
loss: 1.1160590648651123,grad_norm: 0.9999996675321895, iteration: 90582
loss: 0.9874693155288696,grad_norm: 0.9062071839552635, iteration: 90583
loss: 0.9942291975021362,grad_norm: 0.9999992180988272, iteration: 90584
loss: 0.9723185896873474,grad_norm: 0.97145686564441, iteration: 90585
loss: 0.9672409892082214,grad_norm: 0.9999990768374621, iteration: 90586
loss: 0.9722432494163513,grad_norm: 0.9958918222192348, iteration: 90587
loss: 0.953554093837738,grad_norm: 0.9999992882127375, iteration: 90588
loss: 1.0015015602111816,grad_norm: 0.9999990978769563, iteration: 90589
loss: 0.9722601771354675,grad_norm: 0.9999990717025561, iteration: 90590
loss: 0.9990831613540649,grad_norm: 0.999999027137982, iteration: 90591
loss: 1.0984082221984863,grad_norm: 0.9999991759100579, iteration: 90592
loss: 0.9839136004447937,grad_norm: 0.9999992139137672, iteration: 90593
loss: 0.9880886077880859,grad_norm: 0.9999991658813326, iteration: 90594
loss: 0.9963168501853943,grad_norm: 0.9999990846909323, iteration: 90595
loss: 0.9784356355667114,grad_norm: 0.9757353970632191, iteration: 90596
loss: 1.005216121673584,grad_norm: 0.9556986011332393, iteration: 90597
loss: 0.9586769938468933,grad_norm: 0.9999991636912305, iteration: 90598
loss: 0.9747800230979919,grad_norm: 0.9999990672543443, iteration: 90599
loss: 1.0432339906692505,grad_norm: 0.9999991660415443, iteration: 90600
loss: 1.0281345844268799,grad_norm: 0.999999165596847, iteration: 90601
loss: 1.004090666770935,grad_norm: 0.9253044057497257, iteration: 90602
loss: 1.0229371786117554,grad_norm: 0.9999998355328016, iteration: 90603
loss: 1.0076875686645508,grad_norm: 0.9999991311337052, iteration: 90604
loss: 1.0222358703613281,grad_norm: 0.9999990634243582, iteration: 90605
loss: 0.9993534684181213,grad_norm: 0.9999993609400725, iteration: 90606
loss: 1.0273164510726929,grad_norm: 0.999999127108495, iteration: 90607
loss: 1.0644456148147583,grad_norm: 0.999999643607064, iteration: 90608
loss: 0.9848660826683044,grad_norm: 0.9117336428957964, iteration: 90609
loss: 1.2553714513778687,grad_norm: 1.0000000167913534, iteration: 90610
loss: 0.9700364470481873,grad_norm: 0.9999991241674754, iteration: 90611
loss: 1.0165355205535889,grad_norm: 0.9999991950902428, iteration: 90612
loss: 1.005576252937317,grad_norm: 0.9999989995783354, iteration: 90613
loss: 0.9949372410774231,grad_norm: 0.9999993867221701, iteration: 90614
loss: 1.0757663249969482,grad_norm: 0.9999992168044599, iteration: 90615
loss: 0.993272602558136,grad_norm: 0.9999990466142405, iteration: 90616
loss: 1.024529218673706,grad_norm: 0.9999992306186299, iteration: 90617
loss: 1.0208876132965088,grad_norm: 0.9428101024859578, iteration: 90618
loss: 1.1767441034317017,grad_norm: 0.9999994694413491, iteration: 90619
loss: 1.0310953855514526,grad_norm: 0.9201311775110542, iteration: 90620
loss: 1.0086745023727417,grad_norm: 0.9999991827204647, iteration: 90621
loss: 1.0048811435699463,grad_norm: 0.8717552867954085, iteration: 90622
loss: 0.9844323992729187,grad_norm: 0.9999991092803405, iteration: 90623
loss: 0.9687068462371826,grad_norm: 0.9999990907114908, iteration: 90624
loss: 1.0423561334609985,grad_norm: 0.9999989986066389, iteration: 90625
loss: 1.0028917789459229,grad_norm: 0.9999990225848873, iteration: 90626
loss: 1.0190356969833374,grad_norm: 0.9417947139258297, iteration: 90627
loss: 0.9997034072875977,grad_norm: 0.9999997849861479, iteration: 90628
loss: 1.0036500692367554,grad_norm: 0.9275839925887964, iteration: 90629
loss: 1.0162359476089478,grad_norm: 0.9999990094815688, iteration: 90630
loss: 1.0113168954849243,grad_norm: 0.9999989582999236, iteration: 90631
loss: 1.013856053352356,grad_norm: 0.9999991782230258, iteration: 90632
loss: 1.0071496963500977,grad_norm: 0.9999990856438312, iteration: 90633
loss: 0.973230242729187,grad_norm: 0.9999991122220736, iteration: 90634
loss: 1.038003921508789,grad_norm: 0.9999992937445076, iteration: 90635
loss: 0.9798767566680908,grad_norm: 0.9999990664835963, iteration: 90636
loss: 0.9897788763046265,grad_norm: 0.9999990868739134, iteration: 90637
loss: 0.9960853457450867,grad_norm: 0.9999994351683227, iteration: 90638
loss: 0.9564158916473389,grad_norm: 0.9999990376194182, iteration: 90639
loss: 1.0281435251235962,grad_norm: 0.9999992230420904, iteration: 90640
loss: 0.977830708026886,grad_norm: 0.9999992444438461, iteration: 90641
loss: 1.0133830308914185,grad_norm: 0.9999989951327108, iteration: 90642
loss: 0.9857668876647949,grad_norm: 0.9999992425608909, iteration: 90643
loss: 0.9988706111907959,grad_norm: 0.9999991474221976, iteration: 90644
loss: 1.0230528116226196,grad_norm: 0.9999991736719608, iteration: 90645
loss: 1.0183188915252686,grad_norm: 0.9999991998670644, iteration: 90646
loss: 1.0085994005203247,grad_norm: 0.9785400193147958, iteration: 90647
loss: 0.9960971474647522,grad_norm: 0.9999992530193936, iteration: 90648
loss: 0.9800129532814026,grad_norm: 0.9999991830005298, iteration: 90649
loss: 0.9856102466583252,grad_norm: 0.9999992061345746, iteration: 90650
loss: 1.017211675643921,grad_norm: 0.9999992553297242, iteration: 90651
loss: 1.0036628246307373,grad_norm: 0.9999989951162542, iteration: 90652
loss: 1.000217318534851,grad_norm: 0.9999991698969878, iteration: 90653
loss: 1.0132052898406982,grad_norm: 0.9999990413432344, iteration: 90654
loss: 1.0451748371124268,grad_norm: 0.9999990499568486, iteration: 90655
loss: 1.0099544525146484,grad_norm: 0.999999095236543, iteration: 90656
loss: 1.0215040445327759,grad_norm: 0.9999991791730188, iteration: 90657
loss: 1.0539283752441406,grad_norm: 0.9999993096172571, iteration: 90658
loss: 1.0096763372421265,grad_norm: 0.9999990460174918, iteration: 90659
loss: 0.9773741364479065,grad_norm: 0.9180309155237191, iteration: 90660
loss: 1.0707106590270996,grad_norm: 0.9999993212984827, iteration: 90661
loss: 0.975821852684021,grad_norm: 0.8843716209606477, iteration: 90662
loss: 1.009556531906128,grad_norm: 0.9999992791734802, iteration: 90663
loss: 0.9918307065963745,grad_norm: 0.9999990986657944, iteration: 90664
loss: 1.0000241994857788,grad_norm: 0.8478474846909535, iteration: 90665
loss: 1.1391109228134155,grad_norm: 0.9999994841082389, iteration: 90666
loss: 1.0026934146881104,grad_norm: 0.9999999731690553, iteration: 90667
loss: 1.0215617418289185,grad_norm: 0.9999996623076111, iteration: 90668
loss: 1.0273247957229614,grad_norm: 0.9888811649398358, iteration: 90669
loss: 0.9898427724838257,grad_norm: 0.9999990723637018, iteration: 90670
loss: 1.0210171937942505,grad_norm: 0.9670727061797566, iteration: 90671
loss: 1.0367170572280884,grad_norm: 0.9999991843257686, iteration: 90672
loss: 1.0099050998687744,grad_norm: 0.9999992966621898, iteration: 90673
loss: 1.058421015739441,grad_norm: 0.9999994267383087, iteration: 90674
loss: 1.030712366104126,grad_norm: 0.9999991616336725, iteration: 90675
loss: 0.9964102506637573,grad_norm: 0.9700259425825948, iteration: 90676
loss: 1.0127931833267212,grad_norm: 0.9999990609194378, iteration: 90677
loss: 1.0216425657272339,grad_norm: 0.999999245251921, iteration: 90678
loss: 1.0009691715240479,grad_norm: 0.9262446574085479, iteration: 90679
loss: 1.0077612400054932,grad_norm: 0.8526120084505614, iteration: 90680
loss: 0.9985453486442566,grad_norm: 0.9447426109324696, iteration: 90681
loss: 1.004441738128662,grad_norm: 0.994219438701407, iteration: 90682
loss: 1.017453908920288,grad_norm: 0.9999991610431266, iteration: 90683
loss: 0.9762347340583801,grad_norm: 0.9999989915748738, iteration: 90684
loss: 1.007365345954895,grad_norm: 0.9999997811279192, iteration: 90685
loss: 1.0413810014724731,grad_norm: 0.9999998654267175, iteration: 90686
loss: 0.9640582799911499,grad_norm: 0.9999989746591558, iteration: 90687
loss: 1.03917396068573,grad_norm: 0.9999997116910257, iteration: 90688
loss: 1.009764552116394,grad_norm: 0.9999998262598908, iteration: 90689
loss: 0.9913018345832825,grad_norm: 0.9999990397720318, iteration: 90690
loss: 1.0552875995635986,grad_norm: 0.9999996711502918, iteration: 90691
loss: 0.9968846440315247,grad_norm: 0.9999995972646473, iteration: 90692
loss: 1.0246679782867432,grad_norm: 0.9999991164609375, iteration: 90693
loss: 0.9931175112724304,grad_norm: 0.99999898057416, iteration: 90694
loss: 1.06548011302948,grad_norm: 0.9999991612835437, iteration: 90695
loss: 0.9922822117805481,grad_norm: 0.9999991597162979, iteration: 90696
loss: 1.0011717081069946,grad_norm: 0.9999991797828277, iteration: 90697
loss: 0.9945683479309082,grad_norm: 0.9999991700230203, iteration: 90698
loss: 1.0412381887435913,grad_norm: 0.9999992397784241, iteration: 90699
loss: 1.0076587200164795,grad_norm: 0.9999990427127511, iteration: 90700
loss: 1.036444902420044,grad_norm: 0.9183106754213598, iteration: 90701
loss: 1.0052618980407715,grad_norm: 0.999999032762179, iteration: 90702
loss: 1.0080249309539795,grad_norm: 0.9999992353055996, iteration: 90703
loss: 0.945380687713623,grad_norm: 0.999999078398272, iteration: 90704
loss: 0.990985631942749,grad_norm: 0.9999990455490031, iteration: 90705
loss: 1.009322166442871,grad_norm: 0.9999990366873073, iteration: 90706
loss: 0.9706274271011353,grad_norm: 0.9999992837414117, iteration: 90707
loss: 0.9782792925834656,grad_norm: 0.9999990804712561, iteration: 90708
loss: 1.0183219909667969,grad_norm: 0.9309863404024712, iteration: 90709
loss: 1.0020769834518433,grad_norm: 0.9999992826246483, iteration: 90710
loss: 1.0184417963027954,grad_norm: 0.9940652065035795, iteration: 90711
loss: 1.0364930629730225,grad_norm: 0.972663152591304, iteration: 90712
loss: 0.9438164830207825,grad_norm: 0.9773743421098693, iteration: 90713
loss: 0.9934079051017761,grad_norm: 0.86560456760663, iteration: 90714
loss: 1.0084819793701172,grad_norm: 0.9367658645020059, iteration: 90715
loss: 1.0166234970092773,grad_norm: 0.9999991178352228, iteration: 90716
loss: 0.9574382901191711,grad_norm: 0.9320986645169286, iteration: 90717
loss: 0.9881007671356201,grad_norm: 0.9709158253393836, iteration: 90718
loss: 0.9772454500198364,grad_norm: 0.9293275047175901, iteration: 90719
loss: 0.9545546770095825,grad_norm: 0.8923851477712501, iteration: 90720
loss: 0.9897711277008057,grad_norm: 0.9999990446142977, iteration: 90721
loss: 1.0189547538757324,grad_norm: 0.9999990405928695, iteration: 90722
loss: 0.9826348423957825,grad_norm: 0.9999989944472241, iteration: 90723
loss: 0.9933550357818604,grad_norm: 0.9884571844615768, iteration: 90724
loss: 0.9994496703147888,grad_norm: 0.9271421085093199, iteration: 90725
loss: 0.9914030432701111,grad_norm: 0.9513898633775643, iteration: 90726
loss: 1.0291932821273804,grad_norm: 0.9250885619665897, iteration: 90727
loss: 1.080026388168335,grad_norm: 0.9999993974437973, iteration: 90728
loss: 0.991703450679779,grad_norm: 0.9999990938195057, iteration: 90729
loss: 0.9941139817237854,grad_norm: 0.7561592283329855, iteration: 90730
loss: 0.9939197897911072,grad_norm: 0.8564595006146641, iteration: 90731
loss: 0.9694300293922424,grad_norm: 0.9999991045694193, iteration: 90732
loss: 1.0163129568099976,grad_norm: 0.9999991035514512, iteration: 90733
loss: 1.0050994157791138,grad_norm: 0.9999991235530785, iteration: 90734
loss: 0.9936703443527222,grad_norm: 0.9999991532338369, iteration: 90735
loss: 1.0121791362762451,grad_norm: 0.9999997039079009, iteration: 90736
loss: 1.0253294706344604,grad_norm: 0.9999998991195919, iteration: 90737
loss: 0.9921717643737793,grad_norm: 0.9999991373201914, iteration: 90738
loss: 1.0857391357421875,grad_norm: 0.9999998649438848, iteration: 90739
loss: 1.0274289846420288,grad_norm: 0.9548813844302492, iteration: 90740
loss: 0.9807505011558533,grad_norm: 0.9999989915037295, iteration: 90741
loss: 1.0157661437988281,grad_norm: 0.9851043591923495, iteration: 90742
loss: 0.9974198341369629,grad_norm: 0.9999991662554554, iteration: 90743
loss: 0.9976699948310852,grad_norm: 0.9643798850745491, iteration: 90744
loss: 0.9718887209892273,grad_norm: 0.9999990631336819, iteration: 90745
loss: 1.0439167022705078,grad_norm: 0.9999991656544189, iteration: 90746
loss: 1.0279176235198975,grad_norm: 0.8978234770786013, iteration: 90747
loss: 1.0394006967544556,grad_norm: 0.9999995777913455, iteration: 90748
loss: 0.9935586452484131,grad_norm: 0.9999996042442254, iteration: 90749
loss: 0.986370861530304,grad_norm: 0.9153823415653071, iteration: 90750
loss: 0.9792466759681702,grad_norm: 0.9070709161004504, iteration: 90751
loss: 0.966496467590332,grad_norm: 0.9474233905926712, iteration: 90752
loss: 1.0191386938095093,grad_norm: 0.9999992709521778, iteration: 90753
loss: 1.0190339088439941,grad_norm: 0.9999991577185514, iteration: 90754
loss: 1.0141454935073853,grad_norm: 0.9999995895964505, iteration: 90755
loss: 1.0114496946334839,grad_norm: 0.9999992372753593, iteration: 90756
loss: 0.9762519001960754,grad_norm: 0.9999991200495606, iteration: 90757
loss: 0.9869800209999084,grad_norm: 0.8838515419285983, iteration: 90758
loss: 0.9868360757827759,grad_norm: 0.9758884190132375, iteration: 90759
loss: 1.0080751180648804,grad_norm: 0.9910630116242535, iteration: 90760
loss: 1.0055307149887085,grad_norm: 0.9999992282200828, iteration: 90761
loss: 0.9896014332771301,grad_norm: 0.9999990521229992, iteration: 90762
loss: 1.0034546852111816,grad_norm: 0.9999991889639636, iteration: 90763
loss: 0.9677702188491821,grad_norm: 0.9517230098670578, iteration: 90764
loss: 1.015029788017273,grad_norm: 0.9999991374263532, iteration: 90765
loss: 1.0082786083221436,grad_norm: 0.9999990631986654, iteration: 90766
loss: 0.9965781569480896,grad_norm: 0.9999992477049318, iteration: 90767
loss: 0.9544992446899414,grad_norm: 0.9589295023416582, iteration: 90768
loss: 0.9864969253540039,grad_norm: 0.9999992054496103, iteration: 90769
loss: 0.9748283624649048,grad_norm: 0.9747779739556023, iteration: 90770
loss: 0.9806720614433289,grad_norm: 0.8537316010534967, iteration: 90771
loss: 1.0422433614730835,grad_norm: 0.9999998147085398, iteration: 90772
loss: 1.0178898572921753,grad_norm: 0.999999140607513, iteration: 90773
loss: 1.0183645486831665,grad_norm: 0.9999990231706468, iteration: 90774
loss: 1.0333906412124634,grad_norm: 0.9999990642206665, iteration: 90775
loss: 0.9788145422935486,grad_norm: 0.9759436958605733, iteration: 90776
loss: 1.0314174890518188,grad_norm: 0.999999111748339, iteration: 90777
loss: 1.029176950454712,grad_norm: 0.9999992235829501, iteration: 90778
loss: 0.9578819870948792,grad_norm: 0.999999001254086, iteration: 90779
loss: 1.0036035776138306,grad_norm: 0.952715911427547, iteration: 90780
loss: 1.0279240608215332,grad_norm: 0.9339448642917018, iteration: 90781
loss: 1.0211971998214722,grad_norm: 0.9999991995606707, iteration: 90782
loss: 1.040906310081482,grad_norm: 0.9999993422780964, iteration: 90783
loss: 1.0032916069030762,grad_norm: 0.999999525185443, iteration: 90784
loss: 0.9907663464546204,grad_norm: 0.990832131263672, iteration: 90785
loss: 1.009270191192627,grad_norm: 0.9999991698137474, iteration: 90786
loss: 0.9997264742851257,grad_norm: 0.9971442859250215, iteration: 90787
loss: 1.1231106519699097,grad_norm: 0.9999998108250283, iteration: 90788
loss: 0.9837737083435059,grad_norm: 0.9999991702451658, iteration: 90789
loss: 1.0184428691864014,grad_norm: 0.9999999470074443, iteration: 90790
loss: 1.0339595079421997,grad_norm: 0.9999991622984898, iteration: 90791
loss: 0.9854524731636047,grad_norm: 0.9999991949693722, iteration: 90792
loss: 0.9737036824226379,grad_norm: 0.9999989408884199, iteration: 90793
loss: 1.0130846500396729,grad_norm: 0.9882095844223161, iteration: 90794
loss: 0.9755426645278931,grad_norm: 0.9999992967079187, iteration: 90795
loss: 0.9927292466163635,grad_norm: 0.9999992597509387, iteration: 90796
loss: 1.040734052658081,grad_norm: 0.9821947409990203, iteration: 90797
loss: 1.0644150972366333,grad_norm: 0.9296507610979631, iteration: 90798
loss: 1.0100666284561157,grad_norm: 0.9811864204818741, iteration: 90799
loss: 0.9860220551490784,grad_norm: 0.9757517210904214, iteration: 90800
loss: 0.9709845185279846,grad_norm: 0.9746329379737786, iteration: 90801
loss: 0.9990049600601196,grad_norm: 0.9999991005643449, iteration: 90802
loss: 1.0050286054611206,grad_norm: 0.9999991711809095, iteration: 90803
loss: 1.0177505016326904,grad_norm: 0.9999991532923285, iteration: 90804
loss: 0.9998035430908203,grad_norm: 0.9999991090386162, iteration: 90805
loss: 0.9921464323997498,grad_norm: 0.9339330893063081, iteration: 90806
loss: 0.9963363409042358,grad_norm: 0.999999127208258, iteration: 90807
loss: 0.9964163899421692,grad_norm: 0.9878636229542889, iteration: 90808
loss: 0.9726240634918213,grad_norm: 0.9999993040886823, iteration: 90809
loss: 0.9835720062255859,grad_norm: 0.9999991893705352, iteration: 90810
loss: 1.006364345550537,grad_norm: 0.9999992982037448, iteration: 90811
loss: 1.0085759162902832,grad_norm: 0.9999991324355968, iteration: 90812
loss: 0.9762975573539734,grad_norm: 0.9647315142598126, iteration: 90813
loss: 0.9991392493247986,grad_norm: 0.9999991446106525, iteration: 90814
loss: 1.0226972103118896,grad_norm: 0.9366396136368916, iteration: 90815
loss: 1.0152064561843872,grad_norm: 0.9999992933370231, iteration: 90816
loss: 0.9755277633666992,grad_norm: 0.8950091071757474, iteration: 90817
loss: 1.030503749847412,grad_norm: 0.9999997896264936, iteration: 90818
loss: 1.013633370399475,grad_norm: 0.9303953370103507, iteration: 90819
loss: 0.9966931343078613,grad_norm: 0.9999991291242644, iteration: 90820
loss: 0.9995742440223694,grad_norm: 0.928251714162318, iteration: 90821
loss: 0.9605401158332825,grad_norm: 0.9999989839163921, iteration: 90822
loss: 1.0211752653121948,grad_norm: 0.9999992134572405, iteration: 90823
loss: 1.0161218643188477,grad_norm: 0.995424011132102, iteration: 90824
loss: 0.9896693825721741,grad_norm: 0.8279008782221059, iteration: 90825
loss: 1.0081963539123535,grad_norm: 0.9529007735400222, iteration: 90826
loss: 1.0014842748641968,grad_norm: 0.9999992180190144, iteration: 90827
loss: 1.0239399671554565,grad_norm: 0.9539069370470624, iteration: 90828
loss: 1.0478447675704956,grad_norm: 0.9999992805850272, iteration: 90829
loss: 1.0140645503997803,grad_norm: 0.9999991900686185, iteration: 90830
loss: 0.9929875731468201,grad_norm: 0.8729843998920441, iteration: 90831
loss: 0.9816517233848572,grad_norm: 0.9999992826476739, iteration: 90832
loss: 1.039462685585022,grad_norm: 0.9999990965813873, iteration: 90833
loss: 1.0317212343215942,grad_norm: 0.9999995372625395, iteration: 90834
loss: 1.0039949417114258,grad_norm: 0.8973420553391938, iteration: 90835
loss: 1.005836844444275,grad_norm: 0.9999993013263153, iteration: 90836
loss: 1.0020866394042969,grad_norm: 0.9907343040741684, iteration: 90837
loss: 0.988103449344635,grad_norm: 0.9076505916840383, iteration: 90838
loss: 0.9839579463005066,grad_norm: 0.9999990621464098, iteration: 90839
loss: 0.9906876087188721,grad_norm: 0.9595836459541933, iteration: 90840
loss: 0.9698089361190796,grad_norm: 0.8767855614639222, iteration: 90841
loss: 0.9833418130874634,grad_norm: 0.9999991810824078, iteration: 90842
loss: 0.9976263046264648,grad_norm: 0.8858010446597174, iteration: 90843
loss: 0.9923121929168701,grad_norm: 0.9999992398884361, iteration: 90844
loss: 1.0221785306930542,grad_norm: 0.9715650427305544, iteration: 90845
loss: 1.0139319896697998,grad_norm: 0.9999992123256755, iteration: 90846
loss: 1.0354604721069336,grad_norm: 0.9697375683382067, iteration: 90847
loss: 1.0199699401855469,grad_norm: 0.9999991694637822, iteration: 90848
loss: 1.0208969116210938,grad_norm: 0.999999207202403, iteration: 90849
loss: 0.9635990262031555,grad_norm: 0.9250089109881462, iteration: 90850
loss: 0.991522490978241,grad_norm: 0.9999990369356595, iteration: 90851
loss: 0.9925510883331299,grad_norm: 0.9743479932511069, iteration: 90852
loss: 0.9839653372764587,grad_norm: 0.999999112889018, iteration: 90853
loss: 0.9904720187187195,grad_norm: 0.9999989703515657, iteration: 90854
loss: 0.9818454384803772,grad_norm: 0.9780090656217574, iteration: 90855
loss: 1.0054984092712402,grad_norm: 0.9999990824335325, iteration: 90856
loss: 1.0124764442443848,grad_norm: 0.9882423412201494, iteration: 90857
loss: 1.0359933376312256,grad_norm: 0.9999991668018138, iteration: 90858
loss: 0.9751021265983582,grad_norm: 0.9965770547688233, iteration: 90859
loss: 0.991420328617096,grad_norm: 0.999999086045111, iteration: 90860
loss: 0.9914801716804504,grad_norm: 0.9899066340837108, iteration: 90861
loss: 0.9582889080047607,grad_norm: 0.9999992238373081, iteration: 90862
loss: 0.9823188185691833,grad_norm: 0.91568646512699, iteration: 90863
loss: 1.030762791633606,grad_norm: 0.9301517709637814, iteration: 90864
loss: 0.9806904792785645,grad_norm: 0.9202213025503807, iteration: 90865
loss: 0.9784619808197021,grad_norm: 0.8624665514901458, iteration: 90866
loss: 1.0401345491409302,grad_norm: 0.9999991011971493, iteration: 90867
loss: 0.9898392558097839,grad_norm: 0.8938543477800599, iteration: 90868
loss: 1.013791561126709,grad_norm: 0.9999992610567859, iteration: 90869
loss: 1.0006781816482544,grad_norm: 0.9999992457117193, iteration: 90870
loss: 1.003898024559021,grad_norm: 0.9999990260781615, iteration: 90871
loss: 1.0107972621917725,grad_norm: 0.9999993121942871, iteration: 90872
loss: 0.9982349872589111,grad_norm: 0.962528638085095, iteration: 90873
loss: 0.9968903064727783,grad_norm: 0.9240336645139924, iteration: 90874
loss: 0.9523596167564392,grad_norm: 0.8900782813420294, iteration: 90875
loss: 1.0236433744430542,grad_norm: 0.9786371784593991, iteration: 90876
loss: 0.9710373878479004,grad_norm: 0.9999990344673412, iteration: 90877
loss: 0.9628708362579346,grad_norm: 0.8950723735060756, iteration: 90878
loss: 1.0223307609558105,grad_norm: 0.9999990831602398, iteration: 90879
loss: 1.0352669954299927,grad_norm: 0.9999989917882571, iteration: 90880
loss: 1.024114966392517,grad_norm: 0.9999989348935429, iteration: 90881
loss: 1.0012397766113281,grad_norm: 0.9999991610353972, iteration: 90882
loss: 1.0217427015304565,grad_norm: 0.9999990970418413, iteration: 90883
loss: 0.98703533411026,grad_norm: 0.9666374023223631, iteration: 90884
loss: 0.9850353002548218,grad_norm: 0.9999989880030007, iteration: 90885
loss: 0.9952682256698608,grad_norm: 0.9999990612617131, iteration: 90886
loss: 0.9992883205413818,grad_norm: 0.9999991480994627, iteration: 90887
loss: 1.0046547651290894,grad_norm: 0.9787796702880146, iteration: 90888
loss: 1.0015798807144165,grad_norm: 0.9430675194170789, iteration: 90889
loss: 0.972447395324707,grad_norm: 0.9999991355317844, iteration: 90890
loss: 1.0249050855636597,grad_norm: 0.9999991023370365, iteration: 90891
loss: 1.0385305881500244,grad_norm: 0.9999997662069967, iteration: 90892
loss: 0.9981695413589478,grad_norm: 0.9701866872670296, iteration: 90893
loss: 0.9872831702232361,grad_norm: 0.9999991184582923, iteration: 90894
loss: 1.0030895471572876,grad_norm: 0.8857168306164013, iteration: 90895
loss: 1.0487792491912842,grad_norm: 0.9999992598413238, iteration: 90896
loss: 1.012039065361023,grad_norm: 0.9655388176193503, iteration: 90897
loss: 0.9774115085601807,grad_norm: 0.9999991442978, iteration: 90898
loss: 0.9707415103912354,grad_norm: 0.9632264525894476, iteration: 90899
loss: 1.0276219844818115,grad_norm: 0.9999990817141322, iteration: 90900
loss: 1.0642417669296265,grad_norm: 0.9999994680581047, iteration: 90901
loss: 0.9672443866729736,grad_norm: 0.9999989141332926, iteration: 90902
loss: 0.9488875269889832,grad_norm: 0.8987063379146901, iteration: 90903
loss: 1.0182750225067139,grad_norm: 0.9999990906075863, iteration: 90904
loss: 0.9679868817329407,grad_norm: 0.9000700646200085, iteration: 90905
loss: 1.014764666557312,grad_norm: 0.9939803120182426, iteration: 90906
loss: 1.0023272037506104,grad_norm: 0.9999990692504547, iteration: 90907
loss: 1.0212517976760864,grad_norm: 0.9999992432612747, iteration: 90908
loss: 0.9857341051101685,grad_norm: 0.9999992948710139, iteration: 90909
loss: 0.9753659963607788,grad_norm: 0.9999991870291887, iteration: 90910
loss: 0.9798485636711121,grad_norm: 0.9624612578335933, iteration: 90911
loss: 0.9773375391960144,grad_norm: 0.9999991167461375, iteration: 90912
loss: 0.9449363350868225,grad_norm: 0.9276145121782564, iteration: 90913
loss: 1.0039465427398682,grad_norm: 0.9664499970595946, iteration: 90914
loss: 1.0172433853149414,grad_norm: 0.9999991882031274, iteration: 90915
loss: 0.9829058647155762,grad_norm: 0.9850370008152934, iteration: 90916
loss: 0.9592883586883545,grad_norm: 0.9999991293749345, iteration: 90917
loss: 0.9908698201179504,grad_norm: 0.999999097882926, iteration: 90918
loss: 1.0146701335906982,grad_norm: 0.9556518234090929, iteration: 90919
loss: 1.027175784111023,grad_norm: 0.9999993822320254, iteration: 90920
loss: 1.051469087600708,grad_norm: 0.9999995829336134, iteration: 90921
loss: 1.015353798866272,grad_norm: 0.9999991205513606, iteration: 90922
loss: 1.0192720890045166,grad_norm: 0.9999994392855248, iteration: 90923
loss: 1.0072168111801147,grad_norm: 0.9799641844808332, iteration: 90924
loss: 1.0283299684524536,grad_norm: 0.9999991362971359, iteration: 90925
loss: 1.0238194465637207,grad_norm: 0.9454369754260277, iteration: 90926
loss: 1.0117881298065186,grad_norm: 0.891281650160845, iteration: 90927
loss: 0.9817742705345154,grad_norm: 0.9999992081272124, iteration: 90928
loss: 1.0359408855438232,grad_norm: 0.9999990027649316, iteration: 90929
loss: 1.0156772136688232,grad_norm: 0.999999231924826, iteration: 90930
loss: 0.9659945964813232,grad_norm: 0.9411541631215915, iteration: 90931
loss: 0.9687348008155823,grad_norm: 0.9999991445015292, iteration: 90932
loss: 0.9778799414634705,grad_norm: 0.9530439747940308, iteration: 90933
loss: 1.0145241022109985,grad_norm: 0.9377475365749665, iteration: 90934
loss: 1.0031206607818604,grad_norm: 0.9999997378834223, iteration: 90935
loss: 0.9854077696800232,grad_norm: 0.9999990054305606, iteration: 90936
loss: 1.0000072717666626,grad_norm: 0.9999989242860876, iteration: 90937
loss: 0.996649980545044,grad_norm: 0.8766670652864972, iteration: 90938
loss: 0.9875934720039368,grad_norm: 0.9999990479401499, iteration: 90939
loss: 0.9756360054016113,grad_norm: 0.9752795651939419, iteration: 90940
loss: 1.0069712400436401,grad_norm: 0.8793142295028678, iteration: 90941
loss: 1.0132559537887573,grad_norm: 0.9999991927535536, iteration: 90942
loss: 1.0418144464492798,grad_norm: 0.9642255150943224, iteration: 90943
loss: 0.9947678446769714,grad_norm: 0.9999990302235239, iteration: 90944
loss: 0.9871922731399536,grad_norm: 0.8977655603644871, iteration: 90945
loss: 1.0085763931274414,grad_norm: 0.9999991513889227, iteration: 90946
loss: 0.968303382396698,grad_norm: 0.9999990490462423, iteration: 90947
loss: 0.9961031675338745,grad_norm: 0.9999993213204739, iteration: 90948
loss: 0.9737677574157715,grad_norm: 0.999999264169291, iteration: 90949
loss: 1.079685091972351,grad_norm: 0.9999995514409209, iteration: 90950
loss: 1.018234133720398,grad_norm: 0.9999990565135529, iteration: 90951
loss: 1.019338846206665,grad_norm: 0.9868959774571723, iteration: 90952
loss: 0.9820464849472046,grad_norm: 0.98218810003283, iteration: 90953
loss: 1.0063979625701904,grad_norm: 0.9999990176706768, iteration: 90954
loss: 1.0422452688217163,grad_norm: 0.9999993685397415, iteration: 90955
loss: 1.0016154050827026,grad_norm: 0.9488253578819121, iteration: 90956
loss: 1.0332797765731812,grad_norm: 0.9999995303544489, iteration: 90957
loss: 1.003366470336914,grad_norm: 0.9999994333184333, iteration: 90958
loss: 1.0120826959609985,grad_norm: 0.9999992036792636, iteration: 90959
loss: 0.9838131070137024,grad_norm: 0.9999998840653357, iteration: 90960
loss: 1.0236669778823853,grad_norm: 0.9999995424844149, iteration: 90961
loss: 0.9865581393241882,grad_norm: 0.9999991207553083, iteration: 90962
loss: 1.0188833475112915,grad_norm: 0.9783499318118241, iteration: 90963
loss: 1.0065233707427979,grad_norm: 0.9999991709998665, iteration: 90964
loss: 1.0170944929122925,grad_norm: 0.999999672787739, iteration: 90965
loss: 1.0057264566421509,grad_norm: 0.9999990043446655, iteration: 90966
loss: 0.9756442308425903,grad_norm: 0.9999991096658641, iteration: 90967
loss: 1.0331063270568848,grad_norm: 0.9999998884612613, iteration: 90968
loss: 0.9838793277740479,grad_norm: 0.9999990579799628, iteration: 90969
loss: 1.0986913442611694,grad_norm: 0.9999998234053165, iteration: 90970
loss: 1.0298000574111938,grad_norm: 0.8385587365688245, iteration: 90971
loss: 1.0148249864578247,grad_norm: 0.9999992070609756, iteration: 90972
loss: 1.0294458866119385,grad_norm: 0.9999990419915579, iteration: 90973
loss: 0.9845452308654785,grad_norm: 0.9999991147652448, iteration: 90974
loss: 0.990028977394104,grad_norm: 0.9999991079991544, iteration: 90975
loss: 0.9980632662773132,grad_norm: 0.9999991877701954, iteration: 90976
loss: 1.0277332067489624,grad_norm: 0.9999992559818323, iteration: 90977
loss: 1.04606294631958,grad_norm: 0.999999711892103, iteration: 90978
loss: 0.9500117301940918,grad_norm: 0.9726019403891292, iteration: 90979
loss: 0.9925817847251892,grad_norm: 0.9999991053045236, iteration: 90980
loss: 1.0060640573501587,grad_norm: 0.999999275219836, iteration: 90981
loss: 0.9859515428543091,grad_norm: 0.9999993639279029, iteration: 90982
loss: 0.9822023510932922,grad_norm: 0.9999990634364269, iteration: 90983
loss: 1.001725196838379,grad_norm: 0.8865350954927157, iteration: 90984
loss: 1.0145487785339355,grad_norm: 0.9999991273178939, iteration: 90985
loss: 1.0034210681915283,grad_norm: 0.9999991994453309, iteration: 90986
loss: 1.0077379941940308,grad_norm: 0.9999996582112869, iteration: 90987
loss: 1.0016173124313354,grad_norm: 0.8749893265209542, iteration: 90988
loss: 0.9942547082901001,grad_norm: 0.9977309405832782, iteration: 90989
loss: 1.0339393615722656,grad_norm: 0.9999991881856856, iteration: 90990
loss: 1.0050212144851685,grad_norm: 0.9181167728820606, iteration: 90991
loss: 1.0063507556915283,grad_norm: 0.9999995104881864, iteration: 90992
loss: 1.0237340927124023,grad_norm: 0.9999998841646386, iteration: 90993
loss: 1.0140974521636963,grad_norm: 0.9999995751909294, iteration: 90994
loss: 1.0989240407943726,grad_norm: 0.9999990363922375, iteration: 90995
loss: 0.9760096669197083,grad_norm: 0.836587161948731, iteration: 90996
loss: 1.002057433128357,grad_norm: 0.9999990040200172, iteration: 90997
loss: 0.9786882400512695,grad_norm: 0.9999991379026119, iteration: 90998
loss: 0.9882918000221252,grad_norm: 0.9999997409950936, iteration: 90999
loss: 1.0047807693481445,grad_norm: 0.912417650064835, iteration: 91000
loss: 1.007690668106079,grad_norm: 0.9999993163903325, iteration: 91001
loss: 0.9961898326873779,grad_norm: 0.9999989758929342, iteration: 91002
loss: 0.9852282404899597,grad_norm: 0.9177866312856594, iteration: 91003
loss: 0.998536229133606,grad_norm: 0.9999990490709193, iteration: 91004
loss: 0.9820108413696289,grad_norm: 0.9999990753426224, iteration: 91005
loss: 0.9821419715881348,grad_norm: 0.8678709134278518, iteration: 91006
loss: 1.0813109874725342,grad_norm: 0.9999997384256863, iteration: 91007
loss: 1.0309960842132568,grad_norm: 0.9999991764250535, iteration: 91008
loss: 1.0009158849716187,grad_norm: 0.9830486153959733, iteration: 91009
loss: 0.9934318661689758,grad_norm: 0.9999989954618383, iteration: 91010
loss: 1.0423945188522339,grad_norm: 0.9999992756808715, iteration: 91011
loss: 1.000274658203125,grad_norm: 0.9999992508298163, iteration: 91012
loss: 1.0078762769699097,grad_norm: 0.9999992389086357, iteration: 91013
loss: 1.0327855348587036,grad_norm: 0.9999992700593129, iteration: 91014
loss: 0.9923210144042969,grad_norm: 0.9999991011715152, iteration: 91015
loss: 1.0560020208358765,grad_norm: 0.9999994642452728, iteration: 91016
loss: 0.9920019507408142,grad_norm: 0.9999992203284694, iteration: 91017
loss: 0.9697610139846802,grad_norm: 0.9301826798458748, iteration: 91018
loss: 1.0170958042144775,grad_norm: 0.9999991671713051, iteration: 91019
loss: 0.9956446886062622,grad_norm: 0.9999990317075612, iteration: 91020
loss: 0.9823917746543884,grad_norm: 0.9999995218996163, iteration: 91021
loss: 1.009986400604248,grad_norm: 0.9289543424273031, iteration: 91022
loss: 1.008596420288086,grad_norm: 0.9580122283406114, iteration: 91023
loss: 0.9904637336730957,grad_norm: 0.9999991591561964, iteration: 91024
loss: 1.0162131786346436,grad_norm: 0.9999994936682862, iteration: 91025
loss: 1.0140749216079712,grad_norm: 0.9999995878707262, iteration: 91026
loss: 1.0751440525054932,grad_norm: 0.9999994025897798, iteration: 91027
loss: 1.0534237623214722,grad_norm: 0.999999221802169, iteration: 91028
loss: 0.956868588924408,grad_norm: 0.9742626543287786, iteration: 91029
loss: 0.9348305463790894,grad_norm: 0.9055335085924032, iteration: 91030
loss: 0.9991621971130371,grad_norm: 0.9892081850996156, iteration: 91031
loss: 1.0113540887832642,grad_norm: 0.9999991274404733, iteration: 91032
loss: 0.9739790558815002,grad_norm: 0.9999989557547618, iteration: 91033
loss: 0.9934287667274475,grad_norm: 0.99999928996956, iteration: 91034
loss: 0.9637858867645264,grad_norm: 0.999998965440619, iteration: 91035
loss: 1.0360760688781738,grad_norm: 0.9999989698087532, iteration: 91036
loss: 0.9956812262535095,grad_norm: 0.9999992016621663, iteration: 91037
loss: 1.0235332250595093,grad_norm: 0.9533287025343916, iteration: 91038
loss: 1.0372356176376343,grad_norm: 0.9999992012657221, iteration: 91039
loss: 1.0182300806045532,grad_norm: 0.9835046237362336, iteration: 91040
loss: 0.9790098667144775,grad_norm: 0.9999989704048774, iteration: 91041
loss: 1.0124081373214722,grad_norm: 0.9999990694999139, iteration: 91042
loss: 0.9571990370750427,grad_norm: 0.9999991860157342, iteration: 91043
loss: 0.9937063455581665,grad_norm: 0.9999991865155449, iteration: 91044
loss: 1.025039553642273,grad_norm: 0.9999990834439199, iteration: 91045
loss: 0.9589654207229614,grad_norm: 0.9999992158578289, iteration: 91046
loss: 1.0253794193267822,grad_norm: 0.9999992560159874, iteration: 91047
loss: 1.0050289630889893,grad_norm: 0.9999991149863454, iteration: 91048
loss: 1.013365387916565,grad_norm: 0.9999991649972885, iteration: 91049
loss: 1.0521880388259888,grad_norm: 0.9999990104730948, iteration: 91050
loss: 0.9892150163650513,grad_norm: 0.9999991042295306, iteration: 91051
loss: 1.0005507469177246,grad_norm: 0.9999990482873642, iteration: 91052
loss: 1.0044074058532715,grad_norm: 0.9314555687372886, iteration: 91053
loss: 0.9981722235679626,grad_norm: 0.9873723989132025, iteration: 91054
loss: 1.0006157159805298,grad_norm: 0.9999992975425926, iteration: 91055
loss: 1.0115156173706055,grad_norm: 0.9999991468736257, iteration: 91056
loss: 1.0474278926849365,grad_norm: 0.9999997781833344, iteration: 91057
loss: 1.0431095361709595,grad_norm: 0.9999998174989996, iteration: 91058
loss: 0.9596108198165894,grad_norm: 0.9716354380790526, iteration: 91059
loss: 0.9787582159042358,grad_norm: 0.9999991078687228, iteration: 91060
loss: 1.0204100608825684,grad_norm: 0.9999990245713701, iteration: 91061
loss: 1.0016180276870728,grad_norm: 0.9336062093799895, iteration: 91062
loss: 0.9918949604034424,grad_norm: 0.9793416399133589, iteration: 91063
loss: 1.0107862949371338,grad_norm: 0.9999992877822705, iteration: 91064
loss: 1.1219133138656616,grad_norm: 0.9999999728077711, iteration: 91065
loss: 1.0007200241088867,grad_norm: 0.9999992072132019, iteration: 91066
loss: 1.0111513137817383,grad_norm: 0.9999991281659045, iteration: 91067
loss: 0.9625231623649597,grad_norm: 0.9999994303286055, iteration: 91068
loss: 0.9586607813835144,grad_norm: 0.9999993029415324, iteration: 91069
loss: 1.0190538167953491,grad_norm: 0.9999991580094423, iteration: 91070
loss: 0.9558987617492676,grad_norm: 0.9999991334785291, iteration: 91071
loss: 0.9817891716957092,grad_norm: 0.9999991597846103, iteration: 91072
loss: 0.948174774646759,grad_norm: 0.9588236713860999, iteration: 91073
loss: 1.0234521627426147,grad_norm: 0.9999990692101345, iteration: 91074
loss: 1.0026719570159912,grad_norm: 0.9999991801024588, iteration: 91075
loss: 1.0660005807876587,grad_norm: 0.9999990618006229, iteration: 91076
loss: 1.0534498691558838,grad_norm: 0.9999995252596228, iteration: 91077
loss: 0.980016827583313,grad_norm: 0.987517578995371, iteration: 91078
loss: 1.0463409423828125,grad_norm: 0.999999131152895, iteration: 91079
loss: 0.9741305708885193,grad_norm: 0.9999992291550035, iteration: 91080
loss: 1.0075286626815796,grad_norm: 0.9999990246136731, iteration: 91081
loss: 1.0006178617477417,grad_norm: 0.9999990587115032, iteration: 91082
loss: 1.0229448080062866,grad_norm: 0.9999996982999461, iteration: 91083
loss: 1.0248202085494995,grad_norm: 0.9999991265131428, iteration: 91084
loss: 1.0218491554260254,grad_norm: 0.9999990894933792, iteration: 91085
loss: 0.9662054181098938,grad_norm: 0.8504179261232777, iteration: 91086
loss: 1.0040149688720703,grad_norm: 0.9999990546314349, iteration: 91087
loss: 1.005183219909668,grad_norm: 0.9999991321313019, iteration: 91088
loss: 1.0094594955444336,grad_norm: 0.9999991929563191, iteration: 91089
loss: 0.9998928308486938,grad_norm: 0.9999990772377696, iteration: 91090
loss: 0.9942814707756042,grad_norm: 0.9999991550057078, iteration: 91091
loss: 1.0077308416366577,grad_norm: 0.9999990588595559, iteration: 91092
loss: 0.9763701558113098,grad_norm: 0.9999990433155524, iteration: 91093
loss: 0.9927470088005066,grad_norm: 0.9166067547908952, iteration: 91094
loss: 1.0951876640319824,grad_norm: 0.999999486900489, iteration: 91095
loss: 1.0408601760864258,grad_norm: 0.9999990697991568, iteration: 91096
loss: 1.0109037160873413,grad_norm: 0.9599164561444922, iteration: 91097
loss: 0.9996083974838257,grad_norm: 0.9999990528420024, iteration: 91098
loss: 1.0333516597747803,grad_norm: 0.9999991436518729, iteration: 91099
loss: 0.9949688911437988,grad_norm: 0.9999991803466882, iteration: 91100
loss: 1.0097230672836304,grad_norm: 0.9551053200748091, iteration: 91101
loss: 1.0130845308303833,grad_norm: 0.9059021526702172, iteration: 91102
loss: 0.9861356019973755,grad_norm: 0.9999991665709871, iteration: 91103
loss: 1.0100444555282593,grad_norm: 0.999999153456114, iteration: 91104
loss: 1.0150882005691528,grad_norm: 0.9999996523947517, iteration: 91105
loss: 1.027455449104309,grad_norm: 0.9999995905820401, iteration: 91106
loss: 1.0188984870910645,grad_norm: 0.9533008651498763, iteration: 91107
loss: 1.0016205310821533,grad_norm: 0.9999993347610477, iteration: 91108
loss: 1.032999038696289,grad_norm: 0.9706677061214006, iteration: 91109
loss: 0.9849222302436829,grad_norm: 0.9232657549898946, iteration: 91110
loss: 0.9495677947998047,grad_norm: 0.9994840719683145, iteration: 91111
loss: 0.9793468117713928,grad_norm: 0.9139101082721831, iteration: 91112
loss: 1.0103063583374023,grad_norm: 0.9999993126538773, iteration: 91113
loss: 0.9797926545143127,grad_norm: 0.9128135499276219, iteration: 91114
loss: 1.0387699604034424,grad_norm: 0.9247700556226809, iteration: 91115
loss: 1.0078741312026978,grad_norm: 0.9999991525944734, iteration: 91116
loss: 0.9970999956130981,grad_norm: 0.9741506393207549, iteration: 91117
loss: 0.9858680367469788,grad_norm: 0.9887278844974329, iteration: 91118
loss: 0.9996100068092346,grad_norm: 0.9513223346720797, iteration: 91119
loss: 1.0186619758605957,grad_norm: 0.9999989806443715, iteration: 91120
loss: 1.013737440109253,grad_norm: 0.9999991287663959, iteration: 91121
loss: 0.9663362503051758,grad_norm: 0.9150447192420798, iteration: 91122
loss: 1.0313599109649658,grad_norm: 0.9999991832488832, iteration: 91123
loss: 0.9971885681152344,grad_norm: 0.9999991255056445, iteration: 91124
loss: 0.9746520519256592,grad_norm: 0.8969062076371285, iteration: 91125
loss: 1.0302531719207764,grad_norm: 0.9999994321168666, iteration: 91126
loss: 1.0089672803878784,grad_norm: 0.9999991919471266, iteration: 91127
loss: 0.9918079972267151,grad_norm: 0.9999991351348935, iteration: 91128
loss: 0.9963480830192566,grad_norm: 0.9533521848979222, iteration: 91129
loss: 0.9994447827339172,grad_norm: 0.9999992832548835, iteration: 91130
loss: 1.007570505142212,grad_norm: 0.8830401010015885, iteration: 91131
loss: 0.9701571464538574,grad_norm: 0.9999989463212029, iteration: 91132
loss: 0.9902418255805969,grad_norm: 0.917756573526445, iteration: 91133
loss: 0.9563549757003784,grad_norm: 0.999999206064595, iteration: 91134
loss: 0.9788876175880432,grad_norm: 0.962573696224436, iteration: 91135
loss: 1.0093697309494019,grad_norm: 0.999999224464376, iteration: 91136
loss: 0.992363452911377,grad_norm: 0.9942540051283786, iteration: 91137
loss: 1.0271130800247192,grad_norm: 0.9378542085671876, iteration: 91138
loss: 0.9993100166320801,grad_norm: 0.9999990668487483, iteration: 91139
loss: 1.0048075914382935,grad_norm: 0.9999991821260332, iteration: 91140
loss: 0.9619706869125366,grad_norm: 0.9607945826003996, iteration: 91141
loss: 1.037163496017456,grad_norm: 0.9999993826187791, iteration: 91142
loss: 1.0115861892700195,grad_norm: 0.9999990882773384, iteration: 91143
loss: 1.00631844997406,grad_norm: 0.9999991087106466, iteration: 91144
loss: 1.0523862838745117,grad_norm: 0.9999991084120904, iteration: 91145
loss: 0.981791615486145,grad_norm: 0.9857664904217279, iteration: 91146
loss: 1.0173592567443848,grad_norm: 0.9423088970551552, iteration: 91147
loss: 0.9815927147865295,grad_norm: 0.9999991556336659, iteration: 91148
loss: 1.0198535919189453,grad_norm: 0.9999996031127701, iteration: 91149
loss: 0.9910426735877991,grad_norm: 0.9581793836067701, iteration: 91150
loss: 0.9843342304229736,grad_norm: 0.9999991020938421, iteration: 91151
loss: 1.0128991603851318,grad_norm: 0.9999989372964028, iteration: 91152
loss: 1.0160125494003296,grad_norm: 0.9162545870041822, iteration: 91153
loss: 1.0191261768341064,grad_norm: 0.9999991188446106, iteration: 91154
loss: 1.006399393081665,grad_norm: 0.9999990496535519, iteration: 91155
loss: 1.059817910194397,grad_norm: 0.9999990634795705, iteration: 91156
loss: 0.982532799243927,grad_norm: 0.9506459577295457, iteration: 91157
loss: 1.018763542175293,grad_norm: 0.9999991506305528, iteration: 91158
loss: 0.9865293502807617,grad_norm: 0.9999991434532818, iteration: 91159
loss: 1.0004382133483887,grad_norm: 0.999999046901671, iteration: 91160
loss: 0.9982984662055969,grad_norm: 0.9999991773202254, iteration: 91161
loss: 1.028863549232483,grad_norm: 0.9999990940682848, iteration: 91162
loss: 1.0265432596206665,grad_norm: 0.9999989875249374, iteration: 91163
loss: 1.109892725944519,grad_norm: 0.9999994660928032, iteration: 91164
loss: 1.0019587278366089,grad_norm: 0.9999990517161255, iteration: 91165
loss: 0.9656188488006592,grad_norm: 0.999999023416302, iteration: 91166
loss: 0.9715373516082764,grad_norm: 0.999999260574934, iteration: 91167
loss: 0.9903669357299805,grad_norm: 0.9999991137747237, iteration: 91168
loss: 1.0097349882125854,grad_norm: 0.9999997771485386, iteration: 91169
loss: 1.0066827535629272,grad_norm: 0.8948340882804382, iteration: 91170
loss: 1.0549734830856323,grad_norm: 0.9999996415066318, iteration: 91171
loss: 1.075577735900879,grad_norm: 0.9999991470030212, iteration: 91172
loss: 0.9570513963699341,grad_norm: 0.9100846377311939, iteration: 91173
loss: 1.0376513004302979,grad_norm: 0.9999992036596841, iteration: 91174
loss: 1.0663316249847412,grad_norm: 0.9999994408359268, iteration: 91175
loss: 0.9797683358192444,grad_norm: 0.9574265067469906, iteration: 91176
loss: 0.9845443367958069,grad_norm: 0.9766968775138644, iteration: 91177
loss: 0.9947154521942139,grad_norm: 0.9661664932693332, iteration: 91178
loss: 1.0099409818649292,grad_norm: 0.9999992038789375, iteration: 91179
loss: 1.002753734588623,grad_norm: 0.9977798363834323, iteration: 91180
loss: 1.007034182548523,grad_norm: 0.8910278627063056, iteration: 91181
loss: 1.0584030151367188,grad_norm: 0.9999996191767335, iteration: 91182
loss: 0.9759814143180847,grad_norm: 0.9218016585549543, iteration: 91183
loss: 1.0101065635681152,grad_norm: 0.9446691096564063, iteration: 91184
loss: 1.0164722204208374,grad_norm: 0.9999991480541173, iteration: 91185
loss: 1.0263882875442505,grad_norm: 0.9999990319172295, iteration: 91186
loss: 1.0005688667297363,grad_norm: 0.9999992469923841, iteration: 91187
loss: 1.0026530027389526,grad_norm: 0.9999991286440356, iteration: 91188
loss: 0.9986082315444946,grad_norm: 0.999999502908189, iteration: 91189
loss: 0.9957554936408997,grad_norm: 0.9999990416941321, iteration: 91190
loss: 0.9893475770950317,grad_norm: 0.8646134446481862, iteration: 91191
loss: 0.9933621287345886,grad_norm: 0.9094644506876182, iteration: 91192
loss: 1.0083445310592651,grad_norm: 0.99999900720828, iteration: 91193
loss: 1.0100986957550049,grad_norm: 0.9999993297327378, iteration: 91194
loss: 1.0217119455337524,grad_norm: 0.9999991371730332, iteration: 91195
loss: 1.0011733770370483,grad_norm: 0.9595292896844491, iteration: 91196
loss: 0.9736912846565247,grad_norm: 0.9999990780102298, iteration: 91197
loss: 0.9865312576293945,grad_norm: 0.9834419615419577, iteration: 91198
loss: 0.9851166605949402,grad_norm: 0.928105540381249, iteration: 91199
loss: 0.9805238246917725,grad_norm: 0.9999992790699747, iteration: 91200
loss: 0.9847232103347778,grad_norm: 0.9999990825061553, iteration: 91201
loss: 0.9892027974128723,grad_norm: 0.9999991628123895, iteration: 91202
loss: 1.0128753185272217,grad_norm: 0.9999992923351405, iteration: 91203
loss: 0.9697834253311157,grad_norm: 0.9999990834448964, iteration: 91204
loss: 0.9710744023323059,grad_norm: 0.999999067354262, iteration: 91205
loss: 1.0268948078155518,grad_norm: 0.9999993113751229, iteration: 91206
loss: 0.9794869422912598,grad_norm: 0.9999992352020105, iteration: 91207
loss: 1.0204198360443115,grad_norm: 0.9999990940703724, iteration: 91208
loss: 0.9882413148880005,grad_norm: 0.99999907010414, iteration: 91209
loss: 1.0287599563598633,grad_norm: 0.8895569151378347, iteration: 91210
loss: 1.0334888696670532,grad_norm: 0.9999995218835707, iteration: 91211
loss: 0.9834440350532532,grad_norm: 0.9338164814360983, iteration: 91212
loss: 0.932948887348175,grad_norm: 0.999999045476515, iteration: 91213
loss: 1.020632028579712,grad_norm: 0.9999990291866022, iteration: 91214
loss: 0.9782304763793945,grad_norm: 0.9999990707444617, iteration: 91215
loss: 1.0029561519622803,grad_norm: 0.9999989072439643, iteration: 91216
loss: 1.0171897411346436,grad_norm: 0.9953595273114038, iteration: 91217
loss: 1.021986722946167,grad_norm: 0.9999992434317272, iteration: 91218
loss: 0.9874591827392578,grad_norm: 0.8873124248266402, iteration: 91219
loss: 1.0081117153167725,grad_norm: 0.9999992443348565, iteration: 91220
loss: 0.9687773585319519,grad_norm: 0.9999999187390841, iteration: 91221
loss: 1.049479603767395,grad_norm: 0.9999991361278512, iteration: 91222
loss: 1.0118348598480225,grad_norm: 0.9999992768565623, iteration: 91223
loss: 0.9962451457977295,grad_norm: 0.9999991588581902, iteration: 91224
loss: 1.0202993154525757,grad_norm: 0.9334600995056045, iteration: 91225
loss: 1.0308103561401367,grad_norm: 0.9999991599845648, iteration: 91226
loss: 1.0046268701553345,grad_norm: 0.9999991476528084, iteration: 91227
loss: 0.9936405420303345,grad_norm: 0.999999528112154, iteration: 91228
loss: 1.026870608329773,grad_norm: 0.9999990754597373, iteration: 91229
loss: 1.0226109027862549,grad_norm: 0.9999992358068599, iteration: 91230
loss: 0.9772785305976868,grad_norm: 0.999999147163031, iteration: 91231
loss: 1.0362476110458374,grad_norm: 0.9999991902972701, iteration: 91232
loss: 1.0009739398956299,grad_norm: 0.9999992687113718, iteration: 91233
loss: 1.00974440574646,grad_norm: 0.9999989867193197, iteration: 91234
loss: 1.0315096378326416,grad_norm: 0.9999993305514537, iteration: 91235
loss: 0.9841217398643494,grad_norm: 0.9999992201430038, iteration: 91236
loss: 1.0029665231704712,grad_norm: 0.9999989534605193, iteration: 91237
loss: 0.993657112121582,grad_norm: 0.99359794662681, iteration: 91238
loss: 1.012977957725525,grad_norm: 0.9759445274196672, iteration: 91239
loss: 0.9744962453842163,grad_norm: 0.9999991984480306, iteration: 91240
loss: 0.9737681150436401,grad_norm: 0.999999177858246, iteration: 91241
loss: 1.0233296155929565,grad_norm: 0.9999991890813987, iteration: 91242
loss: 1.010536551475525,grad_norm: 0.9999991414150563, iteration: 91243
loss: 1.026898741722107,grad_norm: 0.9999990971287054, iteration: 91244
loss: 0.9747264385223389,grad_norm: 0.9999993072979142, iteration: 91245
loss: 1.0235382318496704,grad_norm: 0.954823384457777, iteration: 91246
loss: 0.9618945121765137,grad_norm: 0.9152054652936187, iteration: 91247
loss: 0.9704274535179138,grad_norm: 0.9999989827756585, iteration: 91248
loss: 0.9887131452560425,grad_norm: 0.9051945032732911, iteration: 91249
loss: 1.0336925983428955,grad_norm: 0.9620181785034327, iteration: 91250
loss: 0.9935476779937744,grad_norm: 0.999999231005714, iteration: 91251
loss: 0.9680129289627075,grad_norm: 0.999999249453561, iteration: 91252
loss: 0.9754888415336609,grad_norm: 0.9999993714375468, iteration: 91253
loss: 0.9747163653373718,grad_norm: 0.9999991733368758, iteration: 91254
loss: 0.98513263463974,grad_norm: 0.9386723440917488, iteration: 91255
loss: 1.0061771869659424,grad_norm: 0.9108592504865424, iteration: 91256
loss: 0.9921895861625671,grad_norm: 0.9999990163755674, iteration: 91257
loss: 1.0154401063919067,grad_norm: 0.9806587175212074, iteration: 91258
loss: 1.0157980918884277,grad_norm: 0.9999992043771264, iteration: 91259
loss: 1.003940224647522,grad_norm: 0.9999992066299431, iteration: 91260
loss: 1.0231983661651611,grad_norm: 0.9025874020830508, iteration: 91261
loss: 1.0085344314575195,grad_norm: 0.9999990049934112, iteration: 91262
loss: 0.9681223034858704,grad_norm: 0.9999990007895933, iteration: 91263
loss: 0.9935663342475891,grad_norm: 0.9999991734458465, iteration: 91264
loss: 0.987821638584137,grad_norm: 0.9999991460482534, iteration: 91265
loss: 1.011916160583496,grad_norm: 0.8982293071831641, iteration: 91266
loss: 0.9651947021484375,grad_norm: 0.9999991907259589, iteration: 91267
loss: 0.991637110710144,grad_norm: 0.9999994954918976, iteration: 91268
loss: 1.0394837856292725,grad_norm: 0.9999992704571011, iteration: 91269
loss: 0.9911020398139954,grad_norm: 0.9999992122567265, iteration: 91270
loss: 1.0304484367370605,grad_norm: 0.9688755144949416, iteration: 91271
loss: 1.005591869354248,grad_norm: 0.9999993462220794, iteration: 91272
loss: 0.9979695677757263,grad_norm: 0.9999992114561251, iteration: 91273
loss: 0.9976144433021545,grad_norm: 0.9999991843426416, iteration: 91274
loss: 1.0361199378967285,grad_norm: 0.9999991702956524, iteration: 91275
loss: 1.0046098232269287,grad_norm: 0.9999990378522813, iteration: 91276
loss: 1.0106046199798584,grad_norm: 0.9999992397982873, iteration: 91277
loss: 0.9721035361289978,grad_norm: 0.882948441719112, iteration: 91278
loss: 0.9551530480384827,grad_norm: 0.9999992009692359, iteration: 91279
loss: 1.0044808387756348,grad_norm: 0.9999990291049696, iteration: 91280
loss: 1.0088404417037964,grad_norm: 0.9999990258485586, iteration: 91281
loss: 1.0105475187301636,grad_norm: 0.999999000547833, iteration: 91282
loss: 1.0040301084518433,grad_norm: 0.9992570903292257, iteration: 91283
loss: 1.0069102048873901,grad_norm: 0.9999990162630199, iteration: 91284
loss: 0.9828636050224304,grad_norm: 0.9999992701100304, iteration: 91285
loss: 0.99229896068573,grad_norm: 0.9999991003217925, iteration: 91286
loss: 1.0096181631088257,grad_norm: 0.9230136166405487, iteration: 91287
loss: 1.0109094381332397,grad_norm: 0.7800865368564661, iteration: 91288
loss: 1.045411229133606,grad_norm: 0.999999121677778, iteration: 91289
loss: 0.9714897274971008,grad_norm: 0.9999991315667566, iteration: 91290
loss: 0.984219491481781,grad_norm: 0.9999990642001314, iteration: 91291
loss: 0.9673972129821777,grad_norm: 0.9999992287795069, iteration: 91292
loss: 0.9955807328224182,grad_norm: 0.9196055876033121, iteration: 91293
loss: 1.0006080865859985,grad_norm: 0.9923002670177395, iteration: 91294
loss: 0.9897869825363159,grad_norm: 0.9999990836476276, iteration: 91295
loss: 0.964350163936615,grad_norm: 0.9999988853195146, iteration: 91296
loss: 1.0185126066207886,grad_norm: 0.9999992122170838, iteration: 91297
loss: 1.038763403892517,grad_norm: 0.9618139085941367, iteration: 91298
loss: 1.0333008766174316,grad_norm: 0.9999992657859645, iteration: 91299
loss: 1.0407531261444092,grad_norm: 0.9999990522708903, iteration: 91300
loss: 1.0405930280685425,grad_norm: 0.9999990650611195, iteration: 91301
loss: 1.022932529449463,grad_norm: 0.9695615062090611, iteration: 91302
loss: 1.0792503356933594,grad_norm: 0.999999430381954, iteration: 91303
loss: 0.9998781085014343,grad_norm: 0.9266863724923812, iteration: 91304
loss: 1.023633360862732,grad_norm: 0.9999992236125683, iteration: 91305
loss: 0.9928773641586304,grad_norm: 0.9999991610999752, iteration: 91306
loss: 0.9990116357803345,grad_norm: 0.9261030252328486, iteration: 91307
loss: 1.0049868822097778,grad_norm: 0.9999990400917435, iteration: 91308
loss: 0.9964076280593872,grad_norm: 0.9999990951799609, iteration: 91309
loss: 0.9854520559310913,grad_norm: 0.9999993709880896, iteration: 91310
loss: 1.0017954111099243,grad_norm: 0.999999083736976, iteration: 91311
loss: 0.999366283416748,grad_norm: 0.8062409452247903, iteration: 91312
loss: 0.9608436226844788,grad_norm: 0.9999990410110117, iteration: 91313
loss: 1.0015809535980225,grad_norm: 0.9803767905814443, iteration: 91314
loss: 1.023889183998108,grad_norm: 0.9999990915723245, iteration: 91315
loss: 0.934130072593689,grad_norm: 0.9999991414792629, iteration: 91316
loss: 0.9620997309684753,grad_norm: 0.9999990670973857, iteration: 91317
loss: 0.99854576587677,grad_norm: 0.8811355246967205, iteration: 91318
loss: 0.9680662155151367,grad_norm: 0.9999990595366723, iteration: 91319
loss: 0.9800746440887451,grad_norm: 0.9999990562441239, iteration: 91320
loss: 1.0258874893188477,grad_norm: 0.9609938367984937, iteration: 91321
loss: 1.009368896484375,grad_norm: 0.999999003685692, iteration: 91322
loss: 1.0707786083221436,grad_norm: 0.9999996846882147, iteration: 91323
loss: 1.0104944705963135,grad_norm: 0.9999991147597814, iteration: 91324
loss: 0.9818827509880066,grad_norm: 0.809096548332683, iteration: 91325
loss: 1.0158662796020508,grad_norm: 0.9999991980982944, iteration: 91326
loss: 0.9887022972106934,grad_norm: 0.9928419741279833, iteration: 91327
loss: 0.9785047769546509,grad_norm: 0.8416789009077823, iteration: 91328
loss: 1.0073859691619873,grad_norm: 0.9999991082695039, iteration: 91329
loss: 1.0195281505584717,grad_norm: 0.999999108572938, iteration: 91330
loss: 1.003408670425415,grad_norm: 0.9999992709924814, iteration: 91331
loss: 0.9825084209442139,grad_norm: 0.9999993255145886, iteration: 91332
loss: 1.0056945085525513,grad_norm: 0.9999991934369805, iteration: 91333
loss: 1.0084218978881836,grad_norm: 0.9999992088196986, iteration: 91334
loss: 1.020613431930542,grad_norm: 0.9999991205485158, iteration: 91335
loss: 0.9899044632911682,grad_norm: 0.9999990983673114, iteration: 91336
loss: 0.9636680483818054,grad_norm: 0.9771426865566312, iteration: 91337
loss: 1.0210984945297241,grad_norm: 0.9999991994427292, iteration: 91338
loss: 1.0210459232330322,grad_norm: 0.999999172101963, iteration: 91339
loss: 0.986152172088623,grad_norm: 0.9999991651278134, iteration: 91340
loss: 0.9859892725944519,grad_norm: 0.9999994646001089, iteration: 91341
loss: 0.972876787185669,grad_norm: 0.9999991417876406, iteration: 91342
loss: 1.001174807548523,grad_norm: 0.9999992095215545, iteration: 91343
loss: 1.0116974115371704,grad_norm: 0.9544912678830798, iteration: 91344
loss: 0.9969355463981628,grad_norm: 0.9999990700295919, iteration: 91345
loss: 1.0355453491210938,grad_norm: 0.9999991072852759, iteration: 91346
loss: 0.9989794492721558,grad_norm: 0.9999990326944492, iteration: 91347
loss: 0.9948211908340454,grad_norm: 0.9858950522881667, iteration: 91348
loss: 1.0138367414474487,grad_norm: 0.9999991255904385, iteration: 91349
loss: 1.016539454460144,grad_norm: 0.9999991102804474, iteration: 91350
loss: 1.0160059928894043,grad_norm: 0.9999993646997458, iteration: 91351
loss: 0.9767231345176697,grad_norm: 0.9543286074220135, iteration: 91352
loss: 1.0160151720046997,grad_norm: 0.9999991205655423, iteration: 91353
loss: 1.049749493598938,grad_norm: 0.9358546534724113, iteration: 91354
loss: 1.0128661394119263,grad_norm: 0.9857807997479271, iteration: 91355
loss: 0.9985882639884949,grad_norm: 0.999998995086718, iteration: 91356
loss: 0.9912886023521423,grad_norm: 0.8933154830775174, iteration: 91357
loss: 0.9859082698822021,grad_norm: 0.9037177403108305, iteration: 91358
loss: 0.9802044630050659,grad_norm: 0.9999991869965348, iteration: 91359
loss: 1.0041669607162476,grad_norm: 0.999999321904014, iteration: 91360
loss: 0.9921935200691223,grad_norm: 0.9999991002949977, iteration: 91361
loss: 1.0538674592971802,grad_norm: 0.9999997411528181, iteration: 91362
loss: 0.987644612789154,grad_norm: 0.9715194806799107, iteration: 91363
loss: 1.0294393301010132,grad_norm: 0.9999989874221019, iteration: 91364
loss: 0.9984882473945618,grad_norm: 0.9999991998598761, iteration: 91365
loss: 1.0161824226379395,grad_norm: 0.9999991946297175, iteration: 91366
loss: 0.975203275680542,grad_norm: 0.9999991363709361, iteration: 91367
loss: 1.02211594581604,grad_norm: 0.9999991161862269, iteration: 91368
loss: 0.9470043182373047,grad_norm: 0.9999990633802539, iteration: 91369
loss: 0.9925952553749084,grad_norm: 0.9999991367942499, iteration: 91370
loss: 0.9660027623176575,grad_norm: 0.9999992211306474, iteration: 91371
loss: 1.015408992767334,grad_norm: 0.8988966891284312, iteration: 91372
loss: 0.9588168263435364,grad_norm: 0.9999991188558275, iteration: 91373
loss: 0.975753664970398,grad_norm: 0.8760797310679669, iteration: 91374
loss: 0.9407820105552673,grad_norm: 0.9999992919897072, iteration: 91375
loss: 0.9911611080169678,grad_norm: 0.9999994011595693, iteration: 91376
loss: 1.0154799222946167,grad_norm: 0.999999527280798, iteration: 91377
loss: 0.9515513181686401,grad_norm: 0.9630043464399032, iteration: 91378
loss: 1.0152517557144165,grad_norm: 0.9999990380285734, iteration: 91379
loss: 1.0174956321716309,grad_norm: 0.9999995494180784, iteration: 91380
loss: 1.0013391971588135,grad_norm: 0.9999991167536608, iteration: 91381
loss: 1.0003385543823242,grad_norm: 0.9966746039115765, iteration: 91382
loss: 0.9748314619064331,grad_norm: 0.9999990925637979, iteration: 91383
loss: 0.9963059425354004,grad_norm: 0.9999992222806758, iteration: 91384
loss: 1.0042701959609985,grad_norm: 0.9999991042959911, iteration: 91385
loss: 1.0440118312835693,grad_norm: 0.9854684741836214, iteration: 91386
loss: 1.0265355110168457,grad_norm: 0.9999989767018262, iteration: 91387
loss: 1.0151294469833374,grad_norm: 0.9999991523243343, iteration: 91388
loss: 1.0355849266052246,grad_norm: 0.9999993202143088, iteration: 91389
loss: 1.0168510675430298,grad_norm: 0.9999992867092379, iteration: 91390
loss: 0.9823644161224365,grad_norm: 0.989471459110995, iteration: 91391
loss: 0.9851648211479187,grad_norm: 0.999999183584442, iteration: 91392
loss: 0.9902644157409668,grad_norm: 0.9999991440241415, iteration: 91393
loss: 1.0366753339767456,grad_norm: 0.8984302296182647, iteration: 91394
loss: 0.9886791706085205,grad_norm: 0.9999992334431125, iteration: 91395
loss: 1.0164709091186523,grad_norm: 0.9519904738274463, iteration: 91396
loss: 0.9805566072463989,grad_norm: 0.9104998737438023, iteration: 91397
loss: 1.0318394899368286,grad_norm: 0.9999999598664636, iteration: 91398
loss: 0.9673777222633362,grad_norm: 0.9999990928412315, iteration: 91399
loss: 0.9706489443778992,grad_norm: 0.9999993476485168, iteration: 91400
loss: 1.0064845085144043,grad_norm: 0.999999124700631, iteration: 91401
loss: 1.0300260782241821,grad_norm: 0.9999991754030202, iteration: 91402
loss: 1.010002851486206,grad_norm: 0.9999993105408401, iteration: 91403
loss: 1.02500319480896,grad_norm: 0.9999992884623873, iteration: 91404
loss: 1.0232471227645874,grad_norm: 0.9999991317114072, iteration: 91405
loss: 1.0056358575820923,grad_norm: 0.9999990192638114, iteration: 91406
loss: 0.9968997240066528,grad_norm: 0.9351099978906946, iteration: 91407
loss: 1.000580906867981,grad_norm: 0.9529244388934589, iteration: 91408
loss: 1.0793497562408447,grad_norm: 0.9999992380792947, iteration: 91409
loss: 1.0223610401153564,grad_norm: 0.9999989470534665, iteration: 91410
loss: 0.9699172973632812,grad_norm: 0.9999990772493359, iteration: 91411
loss: 1.0374656915664673,grad_norm: 0.9999991922153781, iteration: 91412
loss: 0.9841974377632141,grad_norm: 0.9999993652216798, iteration: 91413
loss: 0.9884244203567505,grad_norm: 0.9459787176968517, iteration: 91414
loss: 1.0028691291809082,grad_norm: 0.999999164049343, iteration: 91415
loss: 1.0394392013549805,grad_norm: 0.9999995747075608, iteration: 91416
loss: 1.0260684490203857,grad_norm: 0.9999992620959114, iteration: 91417
loss: 1.0093200206756592,grad_norm: 0.9999990885435041, iteration: 91418
loss: 1.0094256401062012,grad_norm: 0.9999992507738774, iteration: 91419
loss: 1.0062586069107056,grad_norm: 0.9999991330737732, iteration: 91420
loss: 0.9938801527023315,grad_norm: 0.9999990674080208, iteration: 91421
loss: 0.9983018040657043,grad_norm: 0.999999384265211, iteration: 91422
loss: 0.9818284511566162,grad_norm: 0.9934740790496891, iteration: 91423
loss: 1.037821888923645,grad_norm: 0.9999991099563025, iteration: 91424
loss: 1.0208543539047241,grad_norm: 0.9999999369993073, iteration: 91425
loss: 0.9937600493431091,grad_norm: 0.9999992168450612, iteration: 91426
loss: 1.0084372758865356,grad_norm: 0.9999990005552641, iteration: 91427
loss: 1.0162912607192993,grad_norm: 0.9999996899011151, iteration: 91428
loss: 1.0099092721939087,grad_norm: 0.9999990998343112, iteration: 91429
loss: 1.0410950183868408,grad_norm: 0.9999995872896598, iteration: 91430
loss: 1.0110214948654175,grad_norm: 0.9999988951505547, iteration: 91431
loss: 1.0024769306182861,grad_norm: 0.9999991683490708, iteration: 91432
loss: 1.0157326459884644,grad_norm: 0.9523319642424608, iteration: 91433
loss: 0.9871645569801331,grad_norm: 0.968905702813443, iteration: 91434
loss: 1.0093179941177368,grad_norm: 0.9999991211168024, iteration: 91435
loss: 1.006745457649231,grad_norm: 0.9999992973513251, iteration: 91436
loss: 1.0475437641143799,grad_norm: 0.9999994620545659, iteration: 91437
loss: 1.0565106868743896,grad_norm: 0.9999992567695205, iteration: 91438
loss: 0.9886458516120911,grad_norm: 0.9999994521281842, iteration: 91439
loss: 0.9874887466430664,grad_norm: 0.999999145358637, iteration: 91440
loss: 0.9999045133590698,grad_norm: 0.9999991896308096, iteration: 91441
loss: 1.0186212062835693,grad_norm: 0.8760635154441316, iteration: 91442
loss: 1.0247553586959839,grad_norm: 0.9999991711104206, iteration: 91443
loss: 0.9984075427055359,grad_norm: 0.9999989673341797, iteration: 91444
loss: 1.0147645473480225,grad_norm: 0.9678535999381529, iteration: 91445
loss: 1.0246120691299438,grad_norm: 0.9999993365805656, iteration: 91446
loss: 1.033141016960144,grad_norm: 0.9999991453663865, iteration: 91447
loss: 1.0571651458740234,grad_norm: 0.999999376780712, iteration: 91448
loss: 1.0141326189041138,grad_norm: 0.9374963899241646, iteration: 91449
loss: 1.011506199836731,grad_norm: 0.9999993022997137, iteration: 91450
loss: 1.0447200536727905,grad_norm: 0.9999990562535575, iteration: 91451
loss: 0.961497962474823,grad_norm: 0.9999990442885098, iteration: 91452
loss: 1.0286329984664917,grad_norm: 0.8739653795565336, iteration: 91453
loss: 0.9732914566993713,grad_norm: 0.9999989466450174, iteration: 91454
loss: 1.0196642875671387,grad_norm: 0.9999992059897113, iteration: 91455
loss: 0.9928359389305115,grad_norm: 0.99999913686861, iteration: 91456
loss: 0.98305743932724,grad_norm: 0.9999990611297651, iteration: 91457
loss: 0.9925467371940613,grad_norm: 0.9999990531357418, iteration: 91458
loss: 1.113324761390686,grad_norm: 0.9999997336999945, iteration: 91459
loss: 1.0296034812927246,grad_norm: 0.9999993113976366, iteration: 91460
loss: 0.9791597127914429,grad_norm: 0.9999991929800274, iteration: 91461
loss: 1.0192389488220215,grad_norm: 0.9999991458995946, iteration: 91462
loss: 0.9509780406951904,grad_norm: 0.9999992642090881, iteration: 91463
loss: 1.00648832321167,grad_norm: 0.9544946418756949, iteration: 91464
loss: 1.0135518312454224,grad_norm: 0.9999992402585839, iteration: 91465
loss: 0.9903343915939331,grad_norm: 0.9999990812511319, iteration: 91466
loss: 1.0133986473083496,grad_norm: 0.8841048010772835, iteration: 91467
loss: 1.0008054971694946,grad_norm: 0.9999991483981157, iteration: 91468
loss: 0.9863125681877136,grad_norm: 0.9696275695380874, iteration: 91469
loss: 1.0190857648849487,grad_norm: 0.9999997678988974, iteration: 91470
loss: 0.9905376434326172,grad_norm: 0.9999992626480098, iteration: 91471
loss: 1.0054994821548462,grad_norm: 0.8917568433322938, iteration: 91472
loss: 0.9914999604225159,grad_norm: 0.9481459904407844, iteration: 91473
loss: 0.9911127090454102,grad_norm: 0.999999413062097, iteration: 91474
loss: 1.0368036031723022,grad_norm: 0.9999993990732703, iteration: 91475
loss: 1.0364891290664673,grad_norm: 0.9270761891596057, iteration: 91476
loss: 1.0190894603729248,grad_norm: 0.9999991233125343, iteration: 91477
loss: 0.9622191190719604,grad_norm: 0.9999989697587276, iteration: 91478
loss: 1.02554452419281,grad_norm: 0.9999990721478174, iteration: 91479
loss: 1.090740442276001,grad_norm: 0.9999995141392984, iteration: 91480
loss: 0.9994110465049744,grad_norm: 0.9999991542552387, iteration: 91481
loss: 0.983766496181488,grad_norm: 0.9579803229119976, iteration: 91482
loss: 0.9947506785392761,grad_norm: 0.9720410829753977, iteration: 91483
loss: 0.9839834570884705,grad_norm: 0.9703223475610445, iteration: 91484
loss: 0.9884291291236877,grad_norm: 0.9999991766014122, iteration: 91485
loss: 0.9962171912193298,grad_norm: 0.9622443204962163, iteration: 91486
loss: 0.9670054912567139,grad_norm: 0.9999991566814882, iteration: 91487
loss: 0.9772089719772339,grad_norm: 0.9499967303498458, iteration: 91488
loss: 1.0210691690444946,grad_norm: 0.924059470002864, iteration: 91489
loss: 0.9702897071838379,grad_norm: 0.9999993528938771, iteration: 91490
loss: 1.017179250717163,grad_norm: 0.9999990801769636, iteration: 91491
loss: 0.9844310283660889,grad_norm: 0.9999991111545634, iteration: 91492
loss: 1.0154314041137695,grad_norm: 0.9999990796828567, iteration: 91493
loss: 1.0135247707366943,grad_norm: 0.9999991146406121, iteration: 91494
loss: 0.9798175096511841,grad_norm: 0.9999989986401213, iteration: 91495
loss: 1.0028928518295288,grad_norm: 0.942883016898102, iteration: 91496
loss: 0.9942090511322021,grad_norm: 0.9999991332088933, iteration: 91497
loss: 0.9849627614021301,grad_norm: 0.9999991444167101, iteration: 91498
loss: 1.0124620199203491,grad_norm: 0.8561095106238891, iteration: 91499
loss: 0.9958212971687317,grad_norm: 0.9999990811894164, iteration: 91500
loss: 1.0118651390075684,grad_norm: 0.9999990901667805, iteration: 91501
loss: 0.9787219166755676,grad_norm: 0.9999991378657554, iteration: 91502
loss: 1.0166041851043701,grad_norm: 0.9999990864758789, iteration: 91503
loss: 1.0028934478759766,grad_norm: 0.9999992142509375, iteration: 91504
loss: 1.0033090114593506,grad_norm: 0.899931099911505, iteration: 91505
loss: 0.9889176487922668,grad_norm: 0.9990201263422002, iteration: 91506
loss: 0.9944282174110413,grad_norm: 0.9999991495697559, iteration: 91507
loss: 1.0418111085891724,grad_norm: 0.9999990138325016, iteration: 91508
loss: 0.977401852607727,grad_norm: 0.9130366232579205, iteration: 91509
loss: 0.9845735430717468,grad_norm: 0.8662043833087532, iteration: 91510
loss: 0.9956871271133423,grad_norm: 0.9544843931417901, iteration: 91511
loss: 0.9733883142471313,grad_norm: 0.9999992437089064, iteration: 91512
loss: 0.9981570839881897,grad_norm: 0.9999989581149532, iteration: 91513
loss: 1.0116430521011353,grad_norm: 0.9999992222335548, iteration: 91514
loss: 1.0402634143829346,grad_norm: 0.9862171447964933, iteration: 91515
loss: 1.0016319751739502,grad_norm: 0.9999990215827478, iteration: 91516
loss: 1.0098552703857422,grad_norm: 0.9999990654108563, iteration: 91517
loss: 0.9627090692520142,grad_norm: 0.9999990543886245, iteration: 91518
loss: 1.0078753232955933,grad_norm: 0.9265925126586452, iteration: 91519
loss: 0.9840415716171265,grad_norm: 0.9999993217399522, iteration: 91520
loss: 0.9993373155593872,grad_norm: 0.9460238702410545, iteration: 91521
loss: 1.0189729928970337,grad_norm: 0.9999990420790591, iteration: 91522
loss: 0.9984534382820129,grad_norm: 0.8998659105754434, iteration: 91523
loss: 1.0132087469100952,grad_norm: 0.992227599270899, iteration: 91524
loss: 1.0059105157852173,grad_norm: 0.9999989843779131, iteration: 91525
loss: 1.0209290981292725,grad_norm: 0.9999995033151436, iteration: 91526
loss: 1.0420525074005127,grad_norm: 0.9999990720060818, iteration: 91527
loss: 1.0157660245895386,grad_norm: 0.9975768946874771, iteration: 91528
loss: 0.9946768879890442,grad_norm: 0.9909160981392505, iteration: 91529
loss: 0.9953840374946594,grad_norm: 0.9808603135611482, iteration: 91530
loss: 1.0068804025650024,grad_norm: 0.9999991504137944, iteration: 91531
loss: 1.0381231307983398,grad_norm: 0.9999994991961516, iteration: 91532
loss: 1.083328127861023,grad_norm: 0.9999991820059652, iteration: 91533
loss: 1.0298064947128296,grad_norm: 0.9415693598861192, iteration: 91534
loss: 0.992493748664856,grad_norm: 0.9999993321891145, iteration: 91535
loss: 0.9762312173843384,grad_norm: 0.9708491700044317, iteration: 91536
loss: 1.0187803506851196,grad_norm: 0.9999991335216548, iteration: 91537
loss: 1.0350595712661743,grad_norm: 0.9999992496612721, iteration: 91538
loss: 1.0043036937713623,grad_norm: 0.911719487252388, iteration: 91539
loss: 1.0368657112121582,grad_norm: 0.9999992484017386, iteration: 91540
loss: 1.0138274431228638,grad_norm: 0.9999992410234445, iteration: 91541
loss: 0.9819916486740112,grad_norm: 0.9591905167671372, iteration: 91542
loss: 0.9657054543495178,grad_norm: 0.9999992071701537, iteration: 91543
loss: 1.0266331434249878,grad_norm: 0.9473052235007626, iteration: 91544
loss: 0.9822229743003845,grad_norm: 0.9999990396886245, iteration: 91545
loss: 1.0068460702896118,grad_norm: 0.9999990957348659, iteration: 91546
loss: 0.9927908778190613,grad_norm: 0.9999989997458245, iteration: 91547
loss: 1.0344113111495972,grad_norm: 0.8801522682725896, iteration: 91548
loss: 1.0014113187789917,grad_norm: 0.9999991770190801, iteration: 91549
loss: 1.007379174232483,grad_norm: 0.9999992100029451, iteration: 91550
loss: 0.994867205619812,grad_norm: 0.9999991901081844, iteration: 91551
loss: 0.9919772148132324,grad_norm: 0.9999989579246539, iteration: 91552
loss: 0.9809063673019409,grad_norm: 0.9999990462767678, iteration: 91553
loss: 1.0152735710144043,grad_norm: 0.9999993933589816, iteration: 91554
loss: 0.9681998491287231,grad_norm: 0.9999992084893139, iteration: 91555
loss: 1.0030579566955566,grad_norm: 0.9999991601608906, iteration: 91556
loss: 0.9791173934936523,grad_norm: 0.9999993308791959, iteration: 91557
loss: 0.9759166240692139,grad_norm: 0.9763253806649647, iteration: 91558
loss: 0.9915485382080078,grad_norm: 0.8869464506248055, iteration: 91559
loss: 1.0409188270568848,grad_norm: 0.9999993018568561, iteration: 91560
loss: 0.9648740887641907,grad_norm: 0.9424293709512134, iteration: 91561
loss: 1.0120298862457275,grad_norm: 0.8217694423007768, iteration: 91562
loss: 1.0134583711624146,grad_norm: 0.9034280281917247, iteration: 91563
loss: 0.9956990480422974,grad_norm: 0.9999991680385553, iteration: 91564
loss: 0.9983896613121033,grad_norm: 0.9999991473446858, iteration: 91565
loss: 0.9847562909126282,grad_norm: 0.8850790722751947, iteration: 91566
loss: 1.0174741744995117,grad_norm: 0.9999993275211606, iteration: 91567
loss: 0.9971831440925598,grad_norm: 0.9221369015631296, iteration: 91568
loss: 0.9993777871131897,grad_norm: 0.9421560436430396, iteration: 91569
loss: 1.016546368598938,grad_norm: 0.9999991523355567, iteration: 91570
loss: 0.993908166885376,grad_norm: 0.8947098644098785, iteration: 91571
loss: 1.0346084833145142,grad_norm: 0.9999990363724234, iteration: 91572
loss: 1.0614628791809082,grad_norm: 0.9999991852384585, iteration: 91573
loss: 0.9675157070159912,grad_norm: 0.9999991341942573, iteration: 91574
loss: 0.9943602681159973,grad_norm: 0.9418255295721273, iteration: 91575
loss: 1.0138652324676514,grad_norm: 0.8694897488941599, iteration: 91576
loss: 1.0285512208938599,grad_norm: 0.9999992313808436, iteration: 91577
loss: 1.0059421062469482,grad_norm: 0.8925929103468824, iteration: 91578
loss: 1.0040256977081299,grad_norm: 0.9999990871851767, iteration: 91579
loss: 1.0163146257400513,grad_norm: 0.9999991512520039, iteration: 91580
loss: 0.9528084397315979,grad_norm: 0.9999989643665982, iteration: 91581
loss: 1.0080140829086304,grad_norm: 0.9999991541472099, iteration: 91582
loss: 1.016897439956665,grad_norm: 0.9999991570859809, iteration: 91583
loss: 1.01759672164917,grad_norm: 0.9996040973703867, iteration: 91584
loss: 1.0069622993469238,grad_norm: 0.9999991254185917, iteration: 91585
loss: 1.0192086696624756,grad_norm: 0.8302346977818804, iteration: 91586
loss: 1.0162017345428467,grad_norm: 0.9999991463065937, iteration: 91587
loss: 1.0117883682250977,grad_norm: 0.9577523810565792, iteration: 91588
loss: 0.9990450143814087,grad_norm: 0.9999992465540622, iteration: 91589
loss: 1.0144593715667725,grad_norm: 0.997569852450619, iteration: 91590
loss: 0.986924409866333,grad_norm: 0.8588149224829563, iteration: 91591
loss: 1.0196025371551514,grad_norm: 0.9999990376648348, iteration: 91592
loss: 0.9775413870811462,grad_norm: 0.9999991434323894, iteration: 91593
loss: 0.9801096320152283,grad_norm: 0.933698989807821, iteration: 91594
loss: 1.013350009918213,grad_norm: 0.9999991178268194, iteration: 91595
loss: 1.0000877380371094,grad_norm: 0.9040301429583573, iteration: 91596
loss: 1.0032005310058594,grad_norm: 0.9999991926159258, iteration: 91597
loss: 1.0099774599075317,grad_norm: 0.9400211950662095, iteration: 91598
loss: 1.0264208316802979,grad_norm: 0.9999991411264135, iteration: 91599
loss: 0.9970887899398804,grad_norm: 0.9999991990867406, iteration: 91600
loss: 0.9945584535598755,grad_norm: 0.9999991735906558, iteration: 91601
loss: 0.9868929386138916,grad_norm: 0.8866256388456163, iteration: 91602
loss: 0.9593608379364014,grad_norm: 0.9999991881955616, iteration: 91603
loss: 0.9863441586494446,grad_norm: 0.9999991507470429, iteration: 91604
loss: 1.0115764141082764,grad_norm: 0.904423275636477, iteration: 91605
loss: 1.0083460807800293,grad_norm: 0.9999992336794087, iteration: 91606
loss: 1.0163021087646484,grad_norm: 0.9999991705180095, iteration: 91607
loss: 1.0041521787643433,grad_norm: 0.9764311767965055, iteration: 91608
loss: 1.0036592483520508,grad_norm: 0.9080573000041723, iteration: 91609
loss: 0.9778862595558167,grad_norm: 0.8069312339164575, iteration: 91610
loss: 0.9986705780029297,grad_norm: 0.9999992872954069, iteration: 91611
loss: 1.027211308479309,grad_norm: 0.9999991489837721, iteration: 91612
loss: 0.9994420409202576,grad_norm: 0.9846882217993761, iteration: 91613
loss: 1.0233139991760254,grad_norm: 0.9999992067703916, iteration: 91614
loss: 0.9988048076629639,grad_norm: 0.9999990914129817, iteration: 91615
loss: 1.0321683883666992,grad_norm: 0.9421366042495293, iteration: 91616
loss: 0.9913452863693237,grad_norm: 0.8009215080464057, iteration: 91617
loss: 0.981285035610199,grad_norm: 0.9999988405169351, iteration: 91618
loss: 0.9656773209571838,grad_norm: 0.8845662232255985, iteration: 91619
loss: 1.0714609622955322,grad_norm: 0.999999126465412, iteration: 91620
loss: 0.9880222678184509,grad_norm: 0.9095994367627569, iteration: 91621
loss: 0.9855266809463501,grad_norm: 0.999999026821118, iteration: 91622
loss: 0.988898515701294,grad_norm: 0.8992178751742284, iteration: 91623
loss: 0.9887976050376892,grad_norm: 0.999999103475092, iteration: 91624
loss: 1.0107585191726685,grad_norm: 0.970920356681758, iteration: 91625
loss: 1.0287421941757202,grad_norm: 0.9999991977112729, iteration: 91626
loss: 0.9841122627258301,grad_norm: 0.9999991155107105, iteration: 91627
loss: 0.9532429575920105,grad_norm: 0.9999992536373631, iteration: 91628
loss: 0.9892792701721191,grad_norm: 0.9484894850617128, iteration: 91629
loss: 1.0097521543502808,grad_norm: 0.9999991494129612, iteration: 91630
loss: 0.9590481519699097,grad_norm: 0.9999990630945597, iteration: 91631
loss: 0.9800875782966614,grad_norm: 0.9999991042163356, iteration: 91632
loss: 1.0090197324752808,grad_norm: 0.9999990621871474, iteration: 91633
loss: 1.031904697418213,grad_norm: 0.9999998223251394, iteration: 91634
loss: 1.0305014848709106,grad_norm: 0.9999992201881427, iteration: 91635
loss: 1.0316591262817383,grad_norm: 0.999999188582607, iteration: 91636
loss: 1.015194058418274,grad_norm: 0.999999172064386, iteration: 91637
loss: 1.004342794418335,grad_norm: 0.9999989559577458, iteration: 91638
loss: 1.0087898969650269,grad_norm: 0.9999991809226545, iteration: 91639
loss: 1.0123893022537231,grad_norm: 0.8587780794989437, iteration: 91640
loss: 1.018472671508789,grad_norm: 0.8805013089629818, iteration: 91641
loss: 1.044155478477478,grad_norm: 0.8877807093011671, iteration: 91642
loss: 0.997698187828064,grad_norm: 0.9999990390664353, iteration: 91643
loss: 0.9945797920227051,grad_norm: 0.9999990893117344, iteration: 91644
loss: 0.9823975563049316,grad_norm: 0.9999990971235548, iteration: 91645
loss: 1.0247585773468018,grad_norm: 0.898848695902086, iteration: 91646
loss: 0.9805145859718323,grad_norm: 0.9351201238842421, iteration: 91647
loss: 1.028527855873108,grad_norm: 0.9999992270833196, iteration: 91648
loss: 0.9972518086433411,grad_norm: 0.9999990119850978, iteration: 91649
loss: 1.0382639169692993,grad_norm: 0.9999994758821206, iteration: 91650
loss: 0.9636654257774353,grad_norm: 0.9999991593670108, iteration: 91651
loss: 0.989569365978241,grad_norm: 0.9999989763550684, iteration: 91652
loss: 1.0181034803390503,grad_norm: 0.7834839242390182, iteration: 91653
loss: 1.008538007736206,grad_norm: 0.998780274391834, iteration: 91654
loss: 0.9878984689712524,grad_norm: 0.9999990469483825, iteration: 91655
loss: 0.9839654564857483,grad_norm: 0.9999991978040121, iteration: 91656
loss: 1.0092203617095947,grad_norm: 0.9999991476602186, iteration: 91657
loss: 0.9629673957824707,grad_norm: 0.9999992081671154, iteration: 91658
loss: 0.9974838495254517,grad_norm: 0.9999992492426173, iteration: 91659
loss: 0.9901523590087891,grad_norm: 0.9048201430729046, iteration: 91660
loss: 1.0149056911468506,grad_norm: 0.9999991368143866, iteration: 91661
loss: 1.003328561782837,grad_norm: 0.9678586280460099, iteration: 91662
loss: 1.0257441997528076,grad_norm: 0.9999999707947494, iteration: 91663
loss: 1.0077457427978516,grad_norm: 0.9999991009849615, iteration: 91664
loss: 0.9661599397659302,grad_norm: 0.9999991512479859, iteration: 91665
loss: 0.9842963814735413,grad_norm: 0.9999990236452646, iteration: 91666
loss: 0.9970384240150452,grad_norm: 0.956599673044919, iteration: 91667
loss: 1.0458043813705444,grad_norm: 0.9999991853192058, iteration: 91668
loss: 1.0027772188186646,grad_norm: 0.9999990356978131, iteration: 91669
loss: 0.9549288153648376,grad_norm: 0.9999991545272386, iteration: 91670
loss: 1.030933141708374,grad_norm: 0.9999989921150091, iteration: 91671
loss: 0.9774885177612305,grad_norm: 0.9999989599075493, iteration: 91672
loss: 0.9807827472686768,grad_norm: 0.9999992937244834, iteration: 91673
loss: 0.9790734648704529,grad_norm: 0.9999991349123045, iteration: 91674
loss: 0.9773377776145935,grad_norm: 0.9999989842176031, iteration: 91675
loss: 0.9590441584587097,grad_norm: 0.9328291271255132, iteration: 91676
loss: 1.0048668384552002,grad_norm: 0.9999990715096395, iteration: 91677
loss: 0.9697737097740173,grad_norm: 0.9709465780444617, iteration: 91678
loss: 1.016845703125,grad_norm: 0.9999990985130367, iteration: 91679
loss: 1.0387024879455566,grad_norm: 0.9613897179917669, iteration: 91680
loss: 0.9994078874588013,grad_norm: 0.9999994132159562, iteration: 91681
loss: 1.0341012477874756,grad_norm: 0.999999350301517, iteration: 91682
loss: 1.0248554944992065,grad_norm: 0.9999991307124692, iteration: 91683
loss: 0.9782435894012451,grad_norm: 0.9999991596722546, iteration: 91684
loss: 1.0467334985733032,grad_norm: 0.8793060434792305, iteration: 91685
loss: 1.0462840795516968,grad_norm: 0.9999996599816303, iteration: 91686
loss: 1.0306246280670166,grad_norm: 0.9999990870961265, iteration: 91687
loss: 1.0044623613357544,grad_norm: 0.9999992041553512, iteration: 91688
loss: 1.0230830907821655,grad_norm: 0.9999992863610682, iteration: 91689
loss: 1.0224874019622803,grad_norm: 0.972720441013757, iteration: 91690
loss: 1.0163424015045166,grad_norm: 0.9999991121924813, iteration: 91691
loss: 0.9681318402290344,grad_norm: 0.9999990273438648, iteration: 91692
loss: 1.003055214881897,grad_norm: 0.9999991235769105, iteration: 91693
loss: 0.9732872843742371,grad_norm: 0.9999992350849422, iteration: 91694
loss: 0.9717409014701843,grad_norm: 0.9999992386506832, iteration: 91695
loss: 0.9782737493515015,grad_norm: 0.9999989407687074, iteration: 91696
loss: 1.0268831253051758,grad_norm: 0.972920584570665, iteration: 91697
loss: 1.009278416633606,grad_norm: 0.9999991474125687, iteration: 91698
loss: 1.015694499015808,grad_norm: 0.9999990810666853, iteration: 91699
loss: 0.9835795760154724,grad_norm: 0.9999990200336815, iteration: 91700
loss: 1.029284119606018,grad_norm: 0.9999991427167981, iteration: 91701
loss: 1.0125044584274292,grad_norm: 0.9999998619463201, iteration: 91702
loss: 1.0013171434402466,grad_norm: 0.9570231275355191, iteration: 91703
loss: 1.0484248399734497,grad_norm: 0.9999992030427693, iteration: 91704
loss: 0.9862887859344482,grad_norm: 0.9999992391702044, iteration: 91705
loss: 0.9868700504302979,grad_norm: 0.999999107028925, iteration: 91706
loss: 0.9981942772865295,grad_norm: 0.9999992324826602, iteration: 91707
loss: 1.023072361946106,grad_norm: 0.9991412354049736, iteration: 91708
loss: 0.9875690937042236,grad_norm: 0.9650004638690278, iteration: 91709
loss: 0.9825951457023621,grad_norm: 0.9999991434880864, iteration: 91710
loss: 1.0453848838806152,grad_norm: 0.9999990833305784, iteration: 91711
loss: 0.9700876474380493,grad_norm: 0.8831124038083604, iteration: 91712
loss: 0.9833922982215881,grad_norm: 0.999999199710836, iteration: 91713
loss: 0.9753074049949646,grad_norm: 0.9999991283053432, iteration: 91714
loss: 1.013967514038086,grad_norm: 0.9999991811939443, iteration: 91715
loss: 0.9941127300262451,grad_norm: 0.9999992326729223, iteration: 91716
loss: 1.007697343826294,grad_norm: 0.8950224185321877, iteration: 91717
loss: 1.0326566696166992,grad_norm: 0.9999990804627756, iteration: 91718
loss: 1.0050835609436035,grad_norm: 0.9999990262967099, iteration: 91719
loss: 1.0079121589660645,grad_norm: 0.970478138361648, iteration: 91720
loss: 1.0284843444824219,grad_norm: 0.9999991030131873, iteration: 91721
loss: 1.0244722366333008,grad_norm: 0.9999991583162388, iteration: 91722
loss: 1.021097183227539,grad_norm: 0.9999991311610407, iteration: 91723
loss: 0.9692001342773438,grad_norm: 0.945935032660932, iteration: 91724
loss: 0.9624559879302979,grad_norm: 0.999999032408382, iteration: 91725
loss: 1.0114648342132568,grad_norm: 0.9999992286403594, iteration: 91726
loss: 0.9968612194061279,grad_norm: 0.9830950565477605, iteration: 91727
loss: 1.022907018661499,grad_norm: 0.9999991197892059, iteration: 91728
loss: 1.0316097736358643,grad_norm: 0.9999990832352437, iteration: 91729
loss: 1.0022865533828735,grad_norm: 0.9999989136565121, iteration: 91730
loss: 0.997653603553772,grad_norm: 0.9999990648306764, iteration: 91731
loss: 0.9922865033149719,grad_norm: 0.9993829416792392, iteration: 91732
loss: 0.9930859804153442,grad_norm: 0.9999990066705654, iteration: 91733
loss: 0.9768902063369751,grad_norm: 0.847125174665419, iteration: 91734
loss: 0.9913021922111511,grad_norm: 0.9365504429533995, iteration: 91735
loss: 0.9817607998847961,grad_norm: 0.9937139956606799, iteration: 91736
loss: 0.9934013485908508,grad_norm: 0.884377916063402, iteration: 91737
loss: 0.9863128662109375,grad_norm: 0.9592640900837367, iteration: 91738
loss: 0.9959221482276917,grad_norm: 0.9999991954819948, iteration: 91739
loss: 0.980670154094696,grad_norm: 0.9999990719903954, iteration: 91740
loss: 1.0124598741531372,grad_norm: 0.9999990000737577, iteration: 91741
loss: 1.014966368675232,grad_norm: 0.9999991578747287, iteration: 91742
loss: 1.0369726419448853,grad_norm: 0.9879507033078461, iteration: 91743
loss: 0.9889217615127563,grad_norm: 0.9999991732596296, iteration: 91744
loss: 0.9770588874816895,grad_norm: 0.854793876369545, iteration: 91745
loss: 0.9635990858078003,grad_norm: 0.8446664516526122, iteration: 91746
loss: 0.9542168378829956,grad_norm: 0.9195855715049445, iteration: 91747
loss: 1.0220592021942139,grad_norm: 0.9999990778955187, iteration: 91748
loss: 0.9945656061172485,grad_norm: 0.9999993093871917, iteration: 91749
loss: 1.0120117664337158,grad_norm: 0.9999990993165825, iteration: 91750
loss: 0.9908744096755981,grad_norm: 0.9999991798268502, iteration: 91751
loss: 0.9607498049736023,grad_norm: 0.9999991602534071, iteration: 91752
loss: 1.0174490213394165,grad_norm: 0.9999991652189278, iteration: 91753
loss: 1.0409003496170044,grad_norm: 0.9999991078826509, iteration: 91754
loss: 0.9962013363838196,grad_norm: 0.9999991368999949, iteration: 91755
loss: 1.0140933990478516,grad_norm: 0.9955976677499769, iteration: 91756
loss: 1.0074204206466675,grad_norm: 0.9999991666410416, iteration: 91757
loss: 1.0057436227798462,grad_norm: 0.9999990642057072, iteration: 91758
loss: 1.0354540348052979,grad_norm: 0.9720753209742062, iteration: 91759
loss: 1.0421034097671509,grad_norm: 0.9138470792326322, iteration: 91760
loss: 1.008172631263733,grad_norm: 0.928590396924403, iteration: 91761
loss: 1.0035216808319092,grad_norm: 0.9999991990563978, iteration: 91762
loss: 1.0165144205093384,grad_norm: 0.9999992815841305, iteration: 91763
loss: 1.0215368270874023,grad_norm: 0.9999992040653377, iteration: 91764
loss: 1.006892442703247,grad_norm: 0.9999992101196695, iteration: 91765
loss: 1.0197739601135254,grad_norm: 0.9999990258611945, iteration: 91766
loss: 0.9611199498176575,grad_norm: 0.9999991952359812, iteration: 91767
loss: 0.9743301868438721,grad_norm: 0.9999991662824291, iteration: 91768
loss: 0.9693413376808167,grad_norm: 0.9999991128885518, iteration: 91769
loss: 0.9815627336502075,grad_norm: 0.9999990108121706, iteration: 91770
loss: 0.9940978288650513,grad_norm: 0.9999990595810625, iteration: 91771
loss: 0.9810922145843506,grad_norm: 0.9999990333400782, iteration: 91772
loss: 1.013070821762085,grad_norm: 0.9681187130681764, iteration: 91773
loss: 1.0489928722381592,grad_norm: 0.9999991448694531, iteration: 91774
loss: 0.9893381595611572,grad_norm: 0.9999990094367134, iteration: 91775
loss: 1.0074726343154907,grad_norm: 0.9999990961035455, iteration: 91776
loss: 0.9846453070640564,grad_norm: 0.9999991383752465, iteration: 91777
loss: 0.9791251420974731,grad_norm: 0.9519470042457644, iteration: 91778
loss: 0.9835944175720215,grad_norm: 0.9998057798312538, iteration: 91779
loss: 1.0076695680618286,grad_norm: 0.9756901173114878, iteration: 91780
loss: 0.9632735252380371,grad_norm: 0.8949904807247302, iteration: 91781
loss: 0.9924870729446411,grad_norm: 0.8789024086613493, iteration: 91782
loss: 0.9871895909309387,grad_norm: 0.9999991429627182, iteration: 91783
loss: 0.9511551856994629,grad_norm: 0.9999991987149119, iteration: 91784
loss: 0.9944648146629333,grad_norm: 0.9999996290423724, iteration: 91785
loss: 1.013008952140808,grad_norm: 0.9377238382125452, iteration: 91786
loss: 0.9753605127334595,grad_norm: 0.9999991256815152, iteration: 91787
loss: 1.0137869119644165,grad_norm: 0.9999996703960558, iteration: 91788
loss: 0.9895682334899902,grad_norm: 0.9999991434673688, iteration: 91789
loss: 1.033324122428894,grad_norm: 0.9276252106681011, iteration: 91790
loss: 1.0014134645462036,grad_norm: 0.9999990918839378, iteration: 91791
loss: 0.96847003698349,grad_norm: 0.9999990244511039, iteration: 91792
loss: 0.9844307899475098,grad_norm: 0.9033178140861929, iteration: 91793
loss: 0.992381751537323,grad_norm: 0.9999992228551239, iteration: 91794
loss: 0.9932195544242859,grad_norm: 0.999999307169593, iteration: 91795
loss: 1.012408971786499,grad_norm: 0.9718675451416835, iteration: 91796
loss: 1.0560613870620728,grad_norm: 0.9488537048862749, iteration: 91797
loss: 1.0459316968917847,grad_norm: 0.9999991507928744, iteration: 91798
loss: 1.0056253671646118,grad_norm: 0.8624349295178925, iteration: 91799
loss: 0.9897978901863098,grad_norm: 0.9999992560519152, iteration: 91800
loss: 0.9704222679138184,grad_norm: 0.9999991970433855, iteration: 91801
loss: 1.0101139545440674,grad_norm: 0.9999990349582963, iteration: 91802
loss: 0.9883332252502441,grad_norm: 0.8325909253484286, iteration: 91803
loss: 1.0065734386444092,grad_norm: 0.999999619550697, iteration: 91804
loss: 0.9990317225456238,grad_norm: 0.9999991601501361, iteration: 91805
loss: 1.0041475296020508,grad_norm: 0.9860132858515236, iteration: 91806
loss: 1.0220699310302734,grad_norm: 0.9739707407638055, iteration: 91807
loss: 0.9948129057884216,grad_norm: 0.9927916489405886, iteration: 91808
loss: 1.0071237087249756,grad_norm: 0.9999992100493226, iteration: 91809
loss: 1.0134726762771606,grad_norm: 0.9905188426528645, iteration: 91810
loss: 1.0261855125427246,grad_norm: 0.9999992672296291, iteration: 91811
loss: 1.0316078662872314,grad_norm: 0.9999990567063456, iteration: 91812
loss: 0.995184063911438,grad_norm: 0.999999020469596, iteration: 91813
loss: 0.9987063407897949,grad_norm: 0.9999989413488879, iteration: 91814
loss: 0.9520207047462463,grad_norm: 0.9999990836393498, iteration: 91815
loss: 1.037920355796814,grad_norm: 0.9999991249134137, iteration: 91816
loss: 1.0263259410858154,grad_norm: 0.9999992168828341, iteration: 91817
loss: 1.0027827024459839,grad_norm: 0.9999993177544143, iteration: 91818
loss: 0.9947801232337952,grad_norm: 0.9999991140651918, iteration: 91819
loss: 0.9795485734939575,grad_norm: 0.9938670353397558, iteration: 91820
loss: 0.9919897317886353,grad_norm: 0.9999991169957837, iteration: 91821
loss: 0.9437150359153748,grad_norm: 0.995181840342287, iteration: 91822
loss: 1.0156407356262207,grad_norm: 0.9999992084864923, iteration: 91823
loss: 1.0181163549423218,grad_norm: 0.9999991074364297, iteration: 91824
loss: 1.020912766456604,grad_norm: 0.9751678142722197, iteration: 91825
loss: 1.0193971395492554,grad_norm: 0.9999990656431889, iteration: 91826
loss: 1.0213686227798462,grad_norm: 0.9999991675517081, iteration: 91827
loss: 1.0003077983856201,grad_norm: 0.999999202752307, iteration: 91828
loss: 1.0041528940200806,grad_norm: 0.946416956827787, iteration: 91829
loss: 1.011633038520813,grad_norm: 0.9999992201179452, iteration: 91830
loss: 0.9838910102844238,grad_norm: 0.8699526574409411, iteration: 91831
loss: 1.0014383792877197,grad_norm: 0.9471169703175665, iteration: 91832
loss: 0.9768704175949097,grad_norm: 0.9999990665958367, iteration: 91833
loss: 0.9905108213424683,grad_norm: 0.9890489767064572, iteration: 91834
loss: 0.9402371048927307,grad_norm: 0.9974566220894975, iteration: 91835
loss: 0.97894686460495,grad_norm: 0.9999990616627717, iteration: 91836
loss: 1.0354691743850708,grad_norm: 0.9999991531867506, iteration: 91837
loss: 0.9508525729179382,grad_norm: 0.9999992064165422, iteration: 91838
loss: 1.0020679235458374,grad_norm: 0.9213729908716242, iteration: 91839
loss: 1.0070754289627075,grad_norm: 0.9431513734601418, iteration: 91840
loss: 1.015976071357727,grad_norm: 0.9680673327319129, iteration: 91841
loss: 1.035229206085205,grad_norm: 0.9999990480269911, iteration: 91842
loss: 0.987845242023468,grad_norm: 0.9999992262167701, iteration: 91843
loss: 1.0064213275909424,grad_norm: 0.9999992122737532, iteration: 91844
loss: 0.9608728289604187,grad_norm: 0.9999991408764609, iteration: 91845
loss: 1.0325348377227783,grad_norm: 0.999999040803503, iteration: 91846
loss: 0.9842438697814941,grad_norm: 0.9999989458916831, iteration: 91847
loss: 0.9923996329307556,grad_norm: 0.9999990968816498, iteration: 91848
loss: 1.0060924291610718,grad_norm: 0.9391199892779653, iteration: 91849
loss: 1.0491607189178467,grad_norm: 0.9999996037883934, iteration: 91850
loss: 1.0053380727767944,grad_norm: 0.9999991126352116, iteration: 91851
loss: 1.0163031816482544,grad_norm: 0.8579552171455141, iteration: 91852
loss: 1.0289102792739868,grad_norm: 0.9999990803849017, iteration: 91853
loss: 1.039833903312683,grad_norm: 0.9999990852667056, iteration: 91854
loss: 1.0310940742492676,grad_norm: 0.9999992502787752, iteration: 91855
loss: 1.0008529424667358,grad_norm: 0.9060328221808774, iteration: 91856
loss: 0.9622400403022766,grad_norm: 0.9999991433527028, iteration: 91857
loss: 1.020630955696106,grad_norm: 0.8980225715908629, iteration: 91858
loss: 1.0070891380310059,grad_norm: 0.9999990850783412, iteration: 91859
loss: 0.9749453067779541,grad_norm: 0.9999991232384793, iteration: 91860
loss: 0.9811173677444458,grad_norm: 0.9999990264824757, iteration: 91861
loss: 0.9817615747451782,grad_norm: 0.9999992927094243, iteration: 91862
loss: 1.0364971160888672,grad_norm: 0.9999990754681545, iteration: 91863
loss: 0.9872199892997742,grad_norm: 0.9999997547641416, iteration: 91864
loss: 0.9873964190483093,grad_norm: 0.9999989927274329, iteration: 91865
loss: 1.0087031126022339,grad_norm: 0.999999042520668, iteration: 91866
loss: 1.007083535194397,grad_norm: 0.8993357581664936, iteration: 91867
loss: 0.9937874674797058,grad_norm: 0.9147882542155434, iteration: 91868
loss: 1.0203922986984253,grad_norm: 0.9999990984220825, iteration: 91869
loss: 1.0022497177124023,grad_norm: 0.9999991141268818, iteration: 91870
loss: 0.9636662602424622,grad_norm: 0.9999991540703398, iteration: 91871
loss: 1.0364872217178345,grad_norm: 0.9999999211923715, iteration: 91872
loss: 1.0117992162704468,grad_norm: 0.9999992725842355, iteration: 91873
loss: 1.00180983543396,grad_norm: 0.9999992382861106, iteration: 91874
loss: 1.0490633249282837,grad_norm: 0.9999991598503312, iteration: 91875
loss: 0.9903990030288696,grad_norm: 0.9999992170742358, iteration: 91876
loss: 0.9986379146575928,grad_norm: 0.9288988612389467, iteration: 91877
loss: 1.0258309841156006,grad_norm: 0.9403130725304805, iteration: 91878
loss: 1.030657172203064,grad_norm: 0.9999991061029994, iteration: 91879
loss: 0.9924994707107544,grad_norm: 0.9999990706456996, iteration: 91880
loss: 0.9808244109153748,grad_norm: 0.999999165193726, iteration: 91881
loss: 1.0107766389846802,grad_norm: 0.9999990945060878, iteration: 91882
loss: 0.9496945738792419,grad_norm: 0.9999991721843849, iteration: 91883
loss: 0.9705360531806946,grad_norm: 0.9644814421321841, iteration: 91884
loss: 1.0010055303573608,grad_norm: 0.9999989978004854, iteration: 91885
loss: 0.9779176712036133,grad_norm: 0.9999992382101963, iteration: 91886
loss: 1.030659794807434,grad_norm: 0.9999991262140344, iteration: 91887
loss: 0.9887976050376892,grad_norm: 0.9999991705418432, iteration: 91888
loss: 1.0082672834396362,grad_norm: 0.9278370796408579, iteration: 91889
loss: 0.9973843097686768,grad_norm: 0.9739588827632967, iteration: 91890
loss: 0.9607053995132446,grad_norm: 0.9673016724795473, iteration: 91891
loss: 0.9739705920219421,grad_norm: 0.9824295513507355, iteration: 91892
loss: 0.9764383435249329,grad_norm: 0.9999993411909461, iteration: 91893
loss: 1.0191667079925537,grad_norm: 0.9999992962606153, iteration: 91894
loss: 1.0110349655151367,grad_norm: 0.8772107578648394, iteration: 91895
loss: 0.9988487362861633,grad_norm: 0.9272073542920255, iteration: 91896
loss: 1.0217593908309937,grad_norm: 0.999999209195136, iteration: 91897
loss: 0.9968730211257935,grad_norm: 0.9999992045874125, iteration: 91898
loss: 0.9968082308769226,grad_norm: 0.9999992245257651, iteration: 91899
loss: 1.02367103099823,grad_norm: 0.9718726579550826, iteration: 91900
loss: 1.0057787895202637,grad_norm: 0.9999990789025592, iteration: 91901
loss: 0.9672373533248901,grad_norm: 0.987982833679966, iteration: 91902
loss: 1.008184552192688,grad_norm: 0.9221341158133582, iteration: 91903
loss: 0.9970478415489197,grad_norm: 0.9999991800771455, iteration: 91904
loss: 0.9913588762283325,grad_norm: 0.9999991373368721, iteration: 91905
loss: 0.9995025992393494,grad_norm: 0.9977431240046151, iteration: 91906
loss: 0.969332754611969,grad_norm: 0.9999990491566676, iteration: 91907
loss: 0.968678891658783,grad_norm: 0.9999989391778327, iteration: 91908
loss: 1.0688049793243408,grad_norm: 0.9999992472922844, iteration: 91909
loss: 1.0270094871520996,grad_norm: 0.9510800154435047, iteration: 91910
loss: 1.0423855781555176,grad_norm: 0.9902518674592454, iteration: 91911
loss: 0.96041339635849,grad_norm: 0.999999302077022, iteration: 91912
loss: 0.9846436977386475,grad_norm: 0.9999991709202376, iteration: 91913
loss: 1.0103719234466553,grad_norm: 0.8822748664142606, iteration: 91914
loss: 1.0370758771896362,grad_norm: 0.9999991229202099, iteration: 91915
loss: 1.0253709554672241,grad_norm: 0.9319451018389795, iteration: 91916
loss: 1.0011014938354492,grad_norm: 0.9999991233992188, iteration: 91917
loss: 1.0033323764801025,grad_norm: 0.9999991140642198, iteration: 91918
loss: 1.0308761596679688,grad_norm: 0.9999991322202308, iteration: 91919
loss: 1.0046522617340088,grad_norm: 0.9999991027832916, iteration: 91920
loss: 0.995760977268219,grad_norm: 0.9999990698818133, iteration: 91921
loss: 1.0122705698013306,grad_norm: 0.9999991897378387, iteration: 91922
loss: 1.0100860595703125,grad_norm: 0.9999992200293416, iteration: 91923
loss: 0.956180214881897,grad_norm: 0.9999991060486532, iteration: 91924
loss: 1.005631446838379,grad_norm: 0.9954901526202103, iteration: 91925
loss: 0.9670029282569885,grad_norm: 0.9940097600548718, iteration: 91926
loss: 1.007042407989502,grad_norm: 0.9999991433072725, iteration: 91927
loss: 1.0013948678970337,grad_norm: 0.9583227234579432, iteration: 91928
loss: 0.982170820236206,grad_norm: 0.9999992840627243, iteration: 91929
loss: 1.004291296005249,grad_norm: 0.9337397897099043, iteration: 91930
loss: 0.9800776243209839,grad_norm: 0.9641804381220179, iteration: 91931
loss: 0.9952887296676636,grad_norm: 0.9999990416874132, iteration: 91932
loss: 0.9852343201637268,grad_norm: 0.9999991748190952, iteration: 91933
loss: 1.0005367994308472,grad_norm: 0.8632942908922407, iteration: 91934
loss: 1.0043339729309082,grad_norm: 0.9455457074989357, iteration: 91935
loss: 0.9920654296875,grad_norm: 0.9693925172918277, iteration: 91936
loss: 0.9644898772239685,grad_norm: 0.8756343548717725, iteration: 91937
loss: 0.9788281917572021,grad_norm: 0.9296694256534315, iteration: 91938
loss: 1.015472650527954,grad_norm: 0.8631897097124437, iteration: 91939
loss: 0.9912177324295044,grad_norm: 0.9999990277218032, iteration: 91940
loss: 0.9554573893547058,grad_norm: 0.9999991487612704, iteration: 91941
loss: 0.9966180324554443,grad_norm: 0.9999991224186673, iteration: 91942
loss: 0.9943458437919617,grad_norm: 0.9999990773174434, iteration: 91943
loss: 1.0137698650360107,grad_norm: 0.9999998966349334, iteration: 91944
loss: 1.0053027868270874,grad_norm: 0.9518538254380158, iteration: 91945
loss: 0.9843412637710571,grad_norm: 0.9999991780637306, iteration: 91946
loss: 1.0409045219421387,grad_norm: 0.9888953028295452, iteration: 91947
loss: 1.0004470348358154,grad_norm: 0.9999991887821842, iteration: 91948
loss: 0.9725152850151062,grad_norm: 0.8890968040009958, iteration: 91949
loss: 1.0259710550308228,grad_norm: 0.9999991182758153, iteration: 91950
loss: 0.9948903322219849,grad_norm: 0.9669796495717825, iteration: 91951
loss: 1.042867660522461,grad_norm: 0.9999997459869413, iteration: 91952
loss: 0.9879005551338196,grad_norm: 0.9999992072763542, iteration: 91953
loss: 0.9898303747177124,grad_norm: 0.8851167271169127, iteration: 91954
loss: 0.9592449069023132,grad_norm: 0.9999991219728721, iteration: 91955
loss: 1.0357667207717896,grad_norm: 0.9999992908101326, iteration: 91956
loss: 1.0161668062210083,grad_norm: 0.9474434543866442, iteration: 91957
loss: 1.0095645189285278,grad_norm: 0.9999990465517423, iteration: 91958
loss: 1.0209615230560303,grad_norm: 0.9999997947615322, iteration: 91959
loss: 0.9980707168579102,grad_norm: 0.8493367660125168, iteration: 91960
loss: 1.0617945194244385,grad_norm: 0.9999993119821351, iteration: 91961
loss: 0.9632707834243774,grad_norm: 0.9999992797529863, iteration: 91962
loss: 1.0046483278274536,grad_norm: 0.9999991797221948, iteration: 91963
loss: 0.977262556552887,grad_norm: 0.9999992172577599, iteration: 91964
loss: 1.0024101734161377,grad_norm: 0.9559189741578755, iteration: 91965
loss: 0.9826316833496094,grad_norm: 0.999999112529259, iteration: 91966
loss: 1.045937418937683,grad_norm: 0.9999995729231602, iteration: 91967
loss: 0.9737123250961304,grad_norm: 0.9752488827368945, iteration: 91968
loss: 0.9895374774932861,grad_norm: 0.9999992307258506, iteration: 91969
loss: 0.9719944596290588,grad_norm: 0.977271395445136, iteration: 91970
loss: 0.9619277119636536,grad_norm: 0.9999989533078097, iteration: 91971
loss: 1.01664137840271,grad_norm: 0.9999991918131202, iteration: 91972
loss: 1.0052608251571655,grad_norm: 0.8740483058604154, iteration: 91973
loss: 0.9846508502960205,grad_norm: 0.9999990006564732, iteration: 91974
loss: 1.0338287353515625,grad_norm: 0.9999992114423056, iteration: 91975
loss: 1.0293439626693726,grad_norm: 0.9755078092751348, iteration: 91976
loss: 1.039250135421753,grad_norm: 0.9999990677562336, iteration: 91977
loss: 1.063179612159729,grad_norm: 0.9999991356560538, iteration: 91978
loss: 0.9538759589195251,grad_norm: 0.9999989917442866, iteration: 91979
loss: 1.017412543296814,grad_norm: 0.9999991749495124, iteration: 91980
loss: 1.0546680688858032,grad_norm: 0.999999757177884, iteration: 91981
loss: 1.017135500907898,grad_norm: 0.9999991672698196, iteration: 91982
loss: 1.017514944076538,grad_norm: 0.9999990132462288, iteration: 91983
loss: 0.9797189831733704,grad_norm: 0.9999991576914717, iteration: 91984
loss: 1.0124366283416748,grad_norm: 0.9999991355168348, iteration: 91985
loss: 1.0613949298858643,grad_norm: 0.9999990211478239, iteration: 91986
loss: 0.9950472116470337,grad_norm: 0.9460158780986679, iteration: 91987
loss: 1.033389687538147,grad_norm: 0.979202925793542, iteration: 91988
loss: 1.0144332647323608,grad_norm: 0.9999990297429454, iteration: 91989
loss: 0.9753816723823547,grad_norm: 0.9999990618865705, iteration: 91990
loss: 1.0363857746124268,grad_norm: 0.9999992307930939, iteration: 91991
loss: 0.9733696579933167,grad_norm: 0.8594797246108797, iteration: 91992
loss: 1.0001742839813232,grad_norm: 0.9999991394486331, iteration: 91993
loss: 1.0261435508728027,grad_norm: 0.9999991053374153, iteration: 91994
loss: 0.9928150177001953,grad_norm: 0.9830766388723754, iteration: 91995
loss: 0.9972867965698242,grad_norm: 0.9560702293331733, iteration: 91996
loss: 0.9778580665588379,grad_norm: 0.9316535394261325, iteration: 91997
loss: 0.9798614978790283,grad_norm: 0.9999991578728635, iteration: 91998
loss: 0.9890706539154053,grad_norm: 0.999999271707957, iteration: 91999
loss: 1.0244855880737305,grad_norm: 0.9768996643296598, iteration: 92000
loss: 0.9997686147689819,grad_norm: 0.9999991421431044, iteration: 92001
loss: 0.9918280839920044,grad_norm: 0.943537510970525, iteration: 92002
loss: 0.9891365766525269,grad_norm: 0.9999995096150557, iteration: 92003
loss: 0.9832857847213745,grad_norm: 0.9999990792774517, iteration: 92004
loss: 1.0138171911239624,grad_norm: 0.9999992970809548, iteration: 92005
loss: 0.9901610612869263,grad_norm: 0.9595457781618746, iteration: 92006
loss: 1.0319229364395142,grad_norm: 0.9999990511436725, iteration: 92007
loss: 1.0056105852127075,grad_norm: 0.8990552872427623, iteration: 92008
loss: 1.0246247053146362,grad_norm: 0.9999991763811847, iteration: 92009
loss: 1.0270551443099976,grad_norm: 0.9999989135554859, iteration: 92010
loss: 0.9630818963050842,grad_norm: 0.9999993186607912, iteration: 92011
loss: 1.0317281484603882,grad_norm: 0.9999993286653699, iteration: 92012
loss: 0.9699718952178955,grad_norm: 0.9999991311664973, iteration: 92013
loss: 0.9601823091506958,grad_norm: 0.9999990399884021, iteration: 92014
loss: 0.9848443865776062,grad_norm: 0.9999992060325407, iteration: 92015
loss: 1.0115458965301514,grad_norm: 0.9999990857457269, iteration: 92016
loss: 1.0248812437057495,grad_norm: 0.9914881078270494, iteration: 92017
loss: 0.9831859469413757,grad_norm: 0.9507326223929838, iteration: 92018
loss: 1.010528326034546,grad_norm: 0.9745857616284731, iteration: 92019
loss: 1.0252500772476196,grad_norm: 0.9999991901143677, iteration: 92020
loss: 0.9983088374137878,grad_norm: 0.9999991118182597, iteration: 92021
loss: 1.0218018293380737,grad_norm: 0.9282163831282684, iteration: 92022
loss: 1.0096120834350586,grad_norm: 0.9107615616019218, iteration: 92023
loss: 1.026017665863037,grad_norm: 0.9999991010235628, iteration: 92024
loss: 0.990931510925293,grad_norm: 0.9978780364032936, iteration: 92025
loss: 0.9829192757606506,grad_norm: 0.9999989354809907, iteration: 92026
loss: 0.9744055271148682,grad_norm: 0.9920405382856231, iteration: 92027
loss: 0.9827978014945984,grad_norm: 0.9059671804459715, iteration: 92028
loss: 1.014638066291809,grad_norm: 0.9999991043061875, iteration: 92029
loss: 1.0623794794082642,grad_norm: 0.9999991818182776, iteration: 92030
loss: 1.009148120880127,grad_norm: 0.9999991124955446, iteration: 92031
loss: 1.0378634929656982,grad_norm: 0.9999990960026279, iteration: 92032
loss: 0.9670935273170471,grad_norm: 0.9471208350287789, iteration: 92033
loss: 1.021152138710022,grad_norm: 0.9526189082796738, iteration: 92034
loss: 1.0100593566894531,grad_norm: 0.9416883296880937, iteration: 92035
loss: 1.0097581148147583,grad_norm: 0.9999990689629646, iteration: 92036
loss: 0.9880531430244446,grad_norm: 0.9962668214537763, iteration: 92037
loss: 0.9950432181358337,grad_norm: 0.9090160389779832, iteration: 92038
loss: 0.9698569774627686,grad_norm: 0.9999990979928649, iteration: 92039
loss: 1.0157748460769653,grad_norm: 0.9999992369332701, iteration: 92040
loss: 0.9970667958259583,grad_norm: 0.9916308350090796, iteration: 92041
loss: 1.0273207426071167,grad_norm: 0.9999992291241474, iteration: 92042
loss: 0.9865977168083191,grad_norm: 0.9999992318624212, iteration: 92043
loss: 1.004135012626648,grad_norm: 0.9999992239028862, iteration: 92044
loss: 0.9829510450363159,grad_norm: 0.838347404732058, iteration: 92045
loss: 0.9946047067642212,grad_norm: 0.9965218250072416, iteration: 92046
loss: 1.0244852304458618,grad_norm: 0.9999990746787158, iteration: 92047
loss: 1.030160665512085,grad_norm: 0.9177042392552796, iteration: 92048
loss: 0.984466016292572,grad_norm: 0.9159184741984057, iteration: 92049
loss: 0.9989071488380432,grad_norm: 0.9988512784366395, iteration: 92050
loss: 1.0654470920562744,grad_norm: 0.9999993929233614, iteration: 92051
loss: 0.9898893237113953,grad_norm: 0.999999277687349, iteration: 92052
loss: 0.9692178964614868,grad_norm: 0.9999991334459017, iteration: 92053
loss: 0.9814432859420776,grad_norm: 0.9694292123055213, iteration: 92054
loss: 1.0031946897506714,grad_norm: 0.9999990307121609, iteration: 92055
loss: 0.9883776307106018,grad_norm: 0.9999989241927547, iteration: 92056
loss: 1.0290082693099976,grad_norm: 0.9999992550149348, iteration: 92057
loss: 0.9791895151138306,grad_norm: 0.9999990196845494, iteration: 92058
loss: 0.995911180973053,grad_norm: 0.9999990990087051, iteration: 92059
loss: 0.9688334465026855,grad_norm: 0.9999991082783083, iteration: 92060
loss: 0.9727357625961304,grad_norm: 0.9999990913285174, iteration: 92061
loss: 1.0232442617416382,grad_norm: 0.9910769761439532, iteration: 92062
loss: 1.0567548274993896,grad_norm: 0.8801130763980066, iteration: 92063
loss: 0.995699942111969,grad_norm: 0.9999992741115178, iteration: 92064
loss: 1.0050824880599976,grad_norm: 0.998232220171992, iteration: 92065
loss: 0.9826149940490723,grad_norm: 0.999999243069207, iteration: 92066
loss: 0.9721003174781799,grad_norm: 0.856317526994391, iteration: 92067
loss: 1.0192402601242065,grad_norm: 0.9999990589205363, iteration: 92068
loss: 1.091861605644226,grad_norm: 0.9999992803519006, iteration: 92069
loss: 1.0086592435836792,grad_norm: 0.9999993711239122, iteration: 92070
loss: 1.023108959197998,grad_norm: 0.9999992954274592, iteration: 92071
loss: 1.0617910623550415,grad_norm: 0.9999998362113243, iteration: 92072
loss: 0.9825351238250732,grad_norm: 0.9999992045105922, iteration: 92073
loss: 1.0189526081085205,grad_norm: 0.9766813770747661, iteration: 92074
loss: 1.0751760005950928,grad_norm: 0.9999990510289237, iteration: 92075
loss: 1.0164602994918823,grad_norm: 0.9999992574903458, iteration: 92076
loss: 0.9888060688972473,grad_norm: 0.9999991082956922, iteration: 92077
loss: 1.0259618759155273,grad_norm: 0.9999990879115396, iteration: 92078
loss: 1.0047158002853394,grad_norm: 0.9999989555649226, iteration: 92079
loss: 1.087764859199524,grad_norm: 0.9999992844355142, iteration: 92080
loss: 1.0030341148376465,grad_norm: 0.999998934660225, iteration: 92081
loss: 1.0016900300979614,grad_norm: 0.8643901992007584, iteration: 92082
loss: 1.0008600950241089,grad_norm: 0.9999992180287913, iteration: 92083
loss: 1.060105800628662,grad_norm: 0.999999211995811, iteration: 92084
loss: 0.9934192299842834,grad_norm: 0.9999990773407563, iteration: 92085
loss: 0.97153639793396,grad_norm: 0.9320915953875031, iteration: 92086
loss: 0.985494077205658,grad_norm: 0.9999991010295384, iteration: 92087
loss: 0.9758598804473877,grad_norm: 0.9999991210143678, iteration: 92088
loss: 0.961910605430603,grad_norm: 0.9999991863923948, iteration: 92089
loss: 0.9659634828567505,grad_norm: 0.9999990254964357, iteration: 92090
loss: 0.998076856136322,grad_norm: 0.9999993218079587, iteration: 92091
loss: 1.040088176727295,grad_norm: 0.9999993056871762, iteration: 92092
loss: 1.0092589855194092,grad_norm: 0.9999991087978477, iteration: 92093
loss: 1.0180721282958984,grad_norm: 0.9961860617527407, iteration: 92094
loss: 0.9932931661605835,grad_norm: 0.9162969092910247, iteration: 92095
loss: 1.0001407861709595,grad_norm: 0.9999992159147919, iteration: 92096
loss: 1.0065407752990723,grad_norm: 0.9951618301661783, iteration: 92097
loss: 0.9883633255958557,grad_norm: 0.8863107213702999, iteration: 92098
loss: 0.9996180534362793,grad_norm: 0.9999990436514499, iteration: 92099
loss: 1.0279797315597534,grad_norm: 0.9999992198742953, iteration: 92100
loss: 1.006218671798706,grad_norm: 0.9999994283667182, iteration: 92101
loss: 0.9629030227661133,grad_norm: 0.9999990937041152, iteration: 92102
loss: 1.00105881690979,grad_norm: 0.9161812459156129, iteration: 92103
loss: 1.0003306865692139,grad_norm: 0.9470093235865794, iteration: 92104
loss: 1.0371495485305786,grad_norm: 0.9999994092249078, iteration: 92105
loss: 0.9839373230934143,grad_norm: 0.9526902030118315, iteration: 92106
loss: 1.008554458618164,grad_norm: 0.9999991811261794, iteration: 92107
loss: 1.0289207696914673,grad_norm: 0.9999992525198851, iteration: 92108
loss: 0.9859439134597778,grad_norm: 0.9710814741366347, iteration: 92109
loss: 1.009983777999878,grad_norm: 0.9999997106082078, iteration: 92110
loss: 0.9999808669090271,grad_norm: 0.9999993027410644, iteration: 92111
loss: 1.0331768989562988,grad_norm: 0.8915576794132686, iteration: 92112
loss: 1.0134841203689575,grad_norm: 0.9999989560104551, iteration: 92113
loss: 0.9925599098205566,grad_norm: 0.999999164005291, iteration: 92114
loss: 0.9773788452148438,grad_norm: 0.8426358390582616, iteration: 92115
loss: 0.9681975245475769,grad_norm: 0.9999992288545418, iteration: 92116
loss: 1.0131165981292725,grad_norm: 0.9999993894134793, iteration: 92117
loss: 1.049048900604248,grad_norm: 0.9999994992365362, iteration: 92118
loss: 0.9591835141181946,grad_norm: 0.9999992218972626, iteration: 92119
loss: 1.0003737211227417,grad_norm: 0.9763301656407232, iteration: 92120
loss: 1.0186150074005127,grad_norm: 0.8727342625787917, iteration: 92121
loss: 1.0896978378295898,grad_norm: 0.999999211479694, iteration: 92122
loss: 1.0125876665115356,grad_norm: 0.8815617297259314, iteration: 92123
loss: 0.973990261554718,grad_norm: 0.9999991866072557, iteration: 92124
loss: 0.9691677093505859,grad_norm: 0.9999990961611978, iteration: 92125
loss: 1.0308831930160522,grad_norm: 0.9999992074937485, iteration: 92126
loss: 0.9667066335678101,grad_norm: 0.9999990102902961, iteration: 92127
loss: 0.972018837928772,grad_norm: 0.9999989722017373, iteration: 92128
loss: 1.0393892526626587,grad_norm: 0.9999989451440909, iteration: 92129
loss: 1.0011591911315918,grad_norm: 0.9999992083225476, iteration: 92130
loss: 0.9513241648674011,grad_norm: 0.9999990789359744, iteration: 92131
loss: 0.9978820085525513,grad_norm: 0.9999991558808511, iteration: 92132
loss: 0.9796252846717834,grad_norm: 0.999999050332988, iteration: 92133
loss: 0.9569315910339355,grad_norm: 0.9999993383137838, iteration: 92134
loss: 1.0318838357925415,grad_norm: 0.9999992137443884, iteration: 92135
loss: 1.0091145038604736,grad_norm: 0.99999914389952, iteration: 92136
loss: 1.0279181003570557,grad_norm: 0.9999991052605629, iteration: 92137
loss: 0.9624884724617004,grad_norm: 0.9999990977147958, iteration: 92138
loss: 1.0041406154632568,grad_norm: 0.9999994669446838, iteration: 92139
loss: 0.9491470456123352,grad_norm: 0.9840833899434697, iteration: 92140
loss: 0.955040454864502,grad_norm: 0.9311674291392971, iteration: 92141
loss: 0.9923805594444275,grad_norm: 0.9860140405757426, iteration: 92142
loss: 1.0285670757293701,grad_norm: 0.98743047464517, iteration: 92143
loss: 0.9388816356658936,grad_norm: 0.9999993508631394, iteration: 92144
loss: 1.0108836889266968,grad_norm: 0.9999991368551596, iteration: 92145
loss: 1.0027732849121094,grad_norm: 0.999999455065935, iteration: 92146
loss: 1.0067219734191895,grad_norm: 0.955263893735046, iteration: 92147
loss: 1.038321852684021,grad_norm: 0.9999998417514491, iteration: 92148
loss: 0.9964909553527832,grad_norm: 0.9999992465246806, iteration: 92149
loss: 0.9784057140350342,grad_norm: 0.9999992513938853, iteration: 92150
loss: 1.0002914667129517,grad_norm: 0.999999070057929, iteration: 92151
loss: 0.9996141195297241,grad_norm: 0.999999054860993, iteration: 92152
loss: 1.0144751071929932,grad_norm: 0.9999990966805222, iteration: 92153
loss: 0.9663541316986084,grad_norm: 0.9999991184207885, iteration: 92154
loss: 0.9872673153877258,grad_norm: 0.986180679324742, iteration: 92155
loss: 1.0150020122528076,grad_norm: 0.9999990001710509, iteration: 92156
loss: 0.9689053297042847,grad_norm: 0.9829095132599597, iteration: 92157
loss: 0.975619375705719,grad_norm: 0.9999991582869707, iteration: 92158
loss: 1.0136936902999878,grad_norm: 0.9999991005932387, iteration: 92159
loss: 0.9783515334129333,grad_norm: 0.9357939225958175, iteration: 92160
loss: 0.967734158039093,grad_norm: 0.9999991810031545, iteration: 92161
loss: 1.0760189294815063,grad_norm: 0.9707082840180218, iteration: 92162
loss: 1.0180810689926147,grad_norm: 0.9999989492212397, iteration: 92163
loss: 1.0452332496643066,grad_norm: 0.9871973674229917, iteration: 92164
loss: 1.0559731721878052,grad_norm: 0.9999993242122462, iteration: 92165
loss: 1.0111887454986572,grad_norm: 0.9999992078732487, iteration: 92166
loss: 0.9697020053863525,grad_norm: 0.9999991903719637, iteration: 92167
loss: 1.0058258771896362,grad_norm: 0.9999991475257332, iteration: 92168
loss: 1.0646686553955078,grad_norm: 0.9999995944798495, iteration: 92169
loss: 0.9921037554740906,grad_norm: 0.9759996881580094, iteration: 92170
loss: 1.0321775674819946,grad_norm: 0.9999994744800247, iteration: 92171
loss: 1.0033360719680786,grad_norm: 0.9671281534147657, iteration: 92172
loss: 1.0188357830047607,grad_norm: 0.9999990910870993, iteration: 92173
loss: 0.9835664629936218,grad_norm: 0.9999992317650332, iteration: 92174
loss: 0.9748945236206055,grad_norm: 0.9999989919306949, iteration: 92175
loss: 1.0057308673858643,grad_norm: 0.9127555711075332, iteration: 92176
loss: 1.0158108472824097,grad_norm: 0.9999990634651783, iteration: 92177
loss: 0.980228841304779,grad_norm: 0.9999992847004943, iteration: 92178
loss: 1.003195881843567,grad_norm: 0.958903294271372, iteration: 92179
loss: 0.9631341099739075,grad_norm: 0.9999991633302585, iteration: 92180
loss: 1.0137938261032104,grad_norm: 0.976332904587111, iteration: 92181
loss: 0.9805389046669006,grad_norm: 0.9999991709938325, iteration: 92182
loss: 1.0471243858337402,grad_norm: 0.999999101080872, iteration: 92183
loss: 0.9817289113998413,grad_norm: 0.9999992782363263, iteration: 92184
loss: 0.9981426000595093,grad_norm: 0.9999994648699633, iteration: 92185
loss: 0.9736813306808472,grad_norm: 0.9999991075469414, iteration: 92186
loss: 0.9877743124961853,grad_norm: 0.9557473249354836, iteration: 92187
loss: 1.0124592781066895,grad_norm: 0.9999991111681203, iteration: 92188
loss: 0.978342592716217,grad_norm: 0.9999990926088167, iteration: 92189
loss: 1.0131055116653442,grad_norm: 0.9999989233682665, iteration: 92190
loss: 0.9599608778953552,grad_norm: 0.9999992280596823, iteration: 92191
loss: 1.0358788967132568,grad_norm: 0.9999990675978988, iteration: 92192
loss: 1.01322603225708,grad_norm: 0.999999093462988, iteration: 92193
loss: 0.9915283918380737,grad_norm: 0.9232931167544058, iteration: 92194
loss: 1.0305054187774658,grad_norm: 0.9999992432179136, iteration: 92195
loss: 1.0176359415054321,grad_norm: 0.9262880404494688, iteration: 92196
loss: 1.0134202241897583,grad_norm: 0.9999990809499297, iteration: 92197
loss: 0.9789798259735107,grad_norm: 0.9925085380938483, iteration: 92198
loss: 0.9995706081390381,grad_norm: 0.9999991158617383, iteration: 92199
loss: 1.0103158950805664,grad_norm: 0.999999176140376, iteration: 92200
loss: 0.9942852854728699,grad_norm: 0.9999991845563044, iteration: 92201
loss: 0.988645076751709,grad_norm: 0.8603755142846297, iteration: 92202
loss: 1.0035347938537598,grad_norm: 0.9999988892552224, iteration: 92203
loss: 0.9914911389350891,grad_norm: 0.9999990794585065, iteration: 92204
loss: 0.9655988812446594,grad_norm: 0.9999991022926091, iteration: 92205
loss: 1.0073271989822388,grad_norm: 0.9999994214946786, iteration: 92206
loss: 0.9925491213798523,grad_norm: 0.9764751362651792, iteration: 92207
loss: 1.0036320686340332,grad_norm: 0.9999992314309635, iteration: 92208
loss: 0.9947106838226318,grad_norm: 0.9838079653156231, iteration: 92209
loss: 0.9999181032180786,grad_norm: 0.9999990732851515, iteration: 92210
loss: 1.037740707397461,grad_norm: 0.9834330155584976, iteration: 92211
loss: 0.9964235424995422,grad_norm: 0.8763009290678335, iteration: 92212
loss: 1.0007858276367188,grad_norm: 0.9960992519794346, iteration: 92213
loss: 1.0224690437316895,grad_norm: 0.9999992926546878, iteration: 92214
loss: 1.0129687786102295,grad_norm: 0.9537077061775129, iteration: 92215
loss: 1.0026706457138062,grad_norm: 0.9056193510800952, iteration: 92216
loss: 0.9829237461090088,grad_norm: 0.9999990676358324, iteration: 92217
loss: 0.9602553248405457,grad_norm: 0.9999992103566874, iteration: 92218
loss: 0.9715520143508911,grad_norm: 0.9999990743998604, iteration: 92219
loss: 1.048561930656433,grad_norm: 0.9999989978964055, iteration: 92220
loss: 0.9663311243057251,grad_norm: 0.9999991954389494, iteration: 92221
loss: 1.007975459098816,grad_norm: 0.9999990907921227, iteration: 92222
loss: 0.9925374984741211,grad_norm: 0.854155367157986, iteration: 92223
loss: 0.951478898525238,grad_norm: 0.9999990078110729, iteration: 92224
loss: 1.0226167440414429,grad_norm: 0.9988258886707742, iteration: 92225
loss: 0.9922307133674622,grad_norm: 0.970620134333859, iteration: 92226
loss: 1.0220284461975098,grad_norm: 0.9999991042558324, iteration: 92227
loss: 1.0168589353561401,grad_norm: 0.999999355366005, iteration: 92228
loss: 1.0209025144577026,grad_norm: 0.9999991816825394, iteration: 92229
loss: 1.0032811164855957,grad_norm: 0.999999105516458, iteration: 92230
loss: 1.0605748891830444,grad_norm: 0.9999991734411501, iteration: 92231
loss: 1.0214731693267822,grad_norm: 0.9999992004855777, iteration: 92232
loss: 1.0110576152801514,grad_norm: 0.9999993534437978, iteration: 92233
loss: 0.973480224609375,grad_norm: 0.9999991371571252, iteration: 92234
loss: 1.0527018308639526,grad_norm: 0.8910141067042034, iteration: 92235
loss: 0.9994352459907532,grad_norm: 0.9747291218974777, iteration: 92236
loss: 1.0011175870895386,grad_norm: 0.9456796530205903, iteration: 92237
loss: 0.9908368587493896,grad_norm: 0.9363855518227171, iteration: 92238
loss: 1.0202662944793701,grad_norm: 0.9330589921495006, iteration: 92239
loss: 0.9788793325424194,grad_norm: 0.9649094915186003, iteration: 92240
loss: 1.0355525016784668,grad_norm: 0.9999990658975889, iteration: 92241
loss: 1.024169683456421,grad_norm: 0.9999988102738007, iteration: 92242
loss: 1.0148292779922485,grad_norm: 0.9999989218703345, iteration: 92243
loss: 0.9799577593803406,grad_norm: 0.9999996843063089, iteration: 92244
loss: 1.0215661525726318,grad_norm: 0.9999993325850042, iteration: 92245
loss: 1.0129045248031616,grad_norm: 0.9999996906608454, iteration: 92246
loss: 0.9469282627105713,grad_norm: 0.9999991053478273, iteration: 92247
loss: 0.9816555380821228,grad_norm: 0.9999992642359368, iteration: 92248
loss: 0.9694741368293762,grad_norm: 0.9999992470537202, iteration: 92249
loss: 1.1065115928649902,grad_norm: 0.9999998793185112, iteration: 92250
loss: 1.0025253295898438,grad_norm: 0.9757816765989795, iteration: 92251
loss: 0.9934752583503723,grad_norm: 0.9999991623958067, iteration: 92252
loss: 1.047250747680664,grad_norm: 0.9999992000873337, iteration: 92253
loss: 1.0139715671539307,grad_norm: 0.9999990285949982, iteration: 92254
loss: 1.0175728797912598,grad_norm: 0.9999990624222618, iteration: 92255
loss: 1.0157784223556519,grad_norm: 0.9999992143823111, iteration: 92256
loss: 0.948691189289093,grad_norm: 0.962406122370692, iteration: 92257
loss: 0.9841848015785217,grad_norm: 0.9999991376070304, iteration: 92258
loss: 1.0279942750930786,grad_norm: 0.999999869551204, iteration: 92259
loss: 0.9945005178451538,grad_norm: 0.8878248173914072, iteration: 92260
loss: 1.0157984495162964,grad_norm: 0.9999989826415426, iteration: 92261
loss: 0.9849377870559692,grad_norm: 0.9684465802538085, iteration: 92262
loss: 0.9754409790039062,grad_norm: 0.9999990695361888, iteration: 92263
loss: 0.9977664947509766,grad_norm: 0.9618735247010386, iteration: 92264
loss: 0.9894438982009888,grad_norm: 0.9999992135358563, iteration: 92265
loss: 0.9980348348617554,grad_norm: 0.9999990649083892, iteration: 92266
loss: 1.0123063325881958,grad_norm: 0.9696984157407794, iteration: 92267
loss: 0.9727477431297302,grad_norm: 0.9621775312328642, iteration: 92268
loss: 1.0107859373092651,grad_norm: 0.9944311639922483, iteration: 92269
loss: 0.9778647422790527,grad_norm: 0.999999130450245, iteration: 92270
loss: 1.0577833652496338,grad_norm: 0.9999993314505045, iteration: 92271
loss: 0.9812471270561218,grad_norm: 0.9758664750991327, iteration: 92272
loss: 1.0093164443969727,grad_norm: 0.9999991077966612, iteration: 92273
loss: 0.9853107929229736,grad_norm: 0.999999207919347, iteration: 92274
loss: 0.9714813828468323,grad_norm: 0.9773744914894646, iteration: 92275
loss: 1.0224823951721191,grad_norm: 0.9999991999119725, iteration: 92276
loss: 1.0070117712020874,grad_norm: 0.9999996404846897, iteration: 92277
loss: 1.0102996826171875,grad_norm: 0.9999991774142484, iteration: 92278
loss: 1.0205374956130981,grad_norm: 0.9659274486979196, iteration: 92279
loss: 1.016412377357483,grad_norm: 0.8840801325118496, iteration: 92280
loss: 1.0073164701461792,grad_norm: 0.9999991842337895, iteration: 92281
loss: 0.9621186852455139,grad_norm: 0.9673970196415996, iteration: 92282
loss: 1.0189480781555176,grad_norm: 0.9999990022440504, iteration: 92283
loss: 1.007480502128601,grad_norm: 0.9999992305528664, iteration: 92284
loss: 0.9950792193412781,grad_norm: 0.9504512609548434, iteration: 92285
loss: 0.974820613861084,grad_norm: 0.9999990707689154, iteration: 92286
loss: 1.007099986076355,grad_norm: 0.9400546867092605, iteration: 92287
loss: 0.9720310568809509,grad_norm: 0.875968456496115, iteration: 92288
loss: 1.0176807641983032,grad_norm: 0.863433419558198, iteration: 92289
loss: 1.0144753456115723,grad_norm: 0.9999990981926026, iteration: 92290
loss: 0.9551263451576233,grad_norm: 0.9999991485793094, iteration: 92291
loss: 1.0148935317993164,grad_norm: 0.999998988599498, iteration: 92292
loss: 1.061995267868042,grad_norm: 0.9999991862606238, iteration: 92293
loss: 1.024354100227356,grad_norm: 0.9999991068775735, iteration: 92294
loss: 1.0074886083602905,grad_norm: 0.9675169685709403, iteration: 92295
loss: 1.0446642637252808,grad_norm: 0.9999994918209009, iteration: 92296
loss: 0.995607852935791,grad_norm: 0.9999992299554025, iteration: 92297
loss: 1.0876796245574951,grad_norm: 0.9630223059699579, iteration: 92298
loss: 1.0377368927001953,grad_norm: 0.9999991331684926, iteration: 92299
loss: 0.997096061706543,grad_norm: 0.9688884311885233, iteration: 92300
loss: 1.0070112943649292,grad_norm: 0.9999990655087921, iteration: 92301
loss: 1.0089166164398193,grad_norm: 0.9999991787981115, iteration: 92302
loss: 0.9982715249061584,grad_norm: 0.9134224153951848, iteration: 92303
loss: 1.0093101263046265,grad_norm: 0.9999990906572556, iteration: 92304
loss: 0.9709906578063965,grad_norm: 0.9599338471727846, iteration: 92305
loss: 0.9950997233390808,grad_norm: 0.9999992915784395, iteration: 92306
loss: 1.029771327972412,grad_norm: 0.999999216377816, iteration: 92307
loss: 1.021596074104309,grad_norm: 0.9999989762963265, iteration: 92308
loss: 1.0049268007278442,grad_norm: 0.999999099757057, iteration: 92309
loss: 0.9878222346305847,grad_norm: 0.9340083590942465, iteration: 92310
loss: 0.9982410669326782,grad_norm: 0.9999991638218462, iteration: 92311
loss: 1.018219232559204,grad_norm: 0.9999999006406464, iteration: 92312
loss: 0.9981061816215515,grad_norm: 0.9999990928845944, iteration: 92313
loss: 1.0208691358566284,grad_norm: 0.9999998056512138, iteration: 92314
loss: 1.0274397134780884,grad_norm: 0.9431539062665438, iteration: 92315
loss: 1.0132604837417603,grad_norm: 0.9999992830652744, iteration: 92316
loss: 0.9989720582962036,grad_norm: 0.9999991936308769, iteration: 92317
loss: 1.018302321434021,grad_norm: 0.9999992590818623, iteration: 92318
loss: 0.9914928674697876,grad_norm: 0.9999991032239961, iteration: 92319
loss: 0.9953678250312805,grad_norm: 0.9999990251480841, iteration: 92320
loss: 0.9890172481536865,grad_norm: 0.999999105407024, iteration: 92321
loss: 1.0157928466796875,grad_norm: 0.999999304248798, iteration: 92322
loss: 0.9600095152854919,grad_norm: 0.9999991128042397, iteration: 92323
loss: 1.0595556497573853,grad_norm: 0.9999992512152337, iteration: 92324
loss: 0.9754889607429504,grad_norm: 0.9999989224852998, iteration: 92325
loss: 0.9621204137802124,grad_norm: 0.999999014420602, iteration: 92326
loss: 0.9746395945549011,grad_norm: 0.909809098895298, iteration: 92327
loss: 1.0244077444076538,grad_norm: 0.9999991242884632, iteration: 92328
loss: 1.040820598602295,grad_norm: 0.9999997467838204, iteration: 92329
loss: 0.9946971535682678,grad_norm: 0.9999991565705342, iteration: 92330
loss: 1.0151522159576416,grad_norm: 0.9603299275428816, iteration: 92331
loss: 0.9730578660964966,grad_norm: 0.9999991153838104, iteration: 92332
loss: 1.0375968217849731,grad_norm: 0.9999992846780087, iteration: 92333
loss: 0.9780735373497009,grad_norm: 0.8826330007448776, iteration: 92334
loss: 1.0077184438705444,grad_norm: 0.9999991110937779, iteration: 92335
loss: 1.0155407190322876,grad_norm: 0.9728983218780333, iteration: 92336
loss: 1.0157419443130493,grad_norm: 0.9999991581716573, iteration: 92337
loss: 0.9671532511711121,grad_norm: 0.9999991142551108, iteration: 92338
loss: 1.0346250534057617,grad_norm: 0.9999995606363677, iteration: 92339
loss: 0.9988248944282532,grad_norm: 0.9999990892082548, iteration: 92340
loss: 1.015682339668274,grad_norm: 0.9999993148880651, iteration: 92341
loss: 0.9988139271736145,grad_norm: 0.9999991849310522, iteration: 92342
loss: 1.0037039518356323,grad_norm: 0.9999990466606483, iteration: 92343
loss: 1.0166985988616943,grad_norm: 0.9999992387432755, iteration: 92344
loss: 1.0546293258666992,grad_norm: 0.9999996776521437, iteration: 92345
loss: 0.9607036709785461,grad_norm: 0.9999990762801166, iteration: 92346
loss: 0.9767115116119385,grad_norm: 0.9309239844832637, iteration: 92347
loss: 1.020728349685669,grad_norm: 0.9999990850987021, iteration: 92348
loss: 1.0219507217407227,grad_norm: 0.9999991247999399, iteration: 92349
loss: 1.0186501741409302,grad_norm: 0.9999993653454654, iteration: 92350
loss: 0.9878835082054138,grad_norm: 0.9021107286792419, iteration: 92351
loss: 1.0081021785736084,grad_norm: 0.9999991501908895, iteration: 92352
loss: 0.9941053986549377,grad_norm: 0.999999014055173, iteration: 92353
loss: 1.000881552696228,grad_norm: 0.9999993481319442, iteration: 92354
loss: 0.9815922975540161,grad_norm: 0.999999438001333, iteration: 92355
loss: 1.030734658241272,grad_norm: 0.9220563286990245, iteration: 92356
loss: 0.9875200986862183,grad_norm: 0.9974435717217502, iteration: 92357
loss: 1.0205050706863403,grad_norm: 0.9999990251852774, iteration: 92358
loss: 1.015897274017334,grad_norm: 0.9999990165058616, iteration: 92359
loss: 1.005114197731018,grad_norm: 0.9104520700121407, iteration: 92360
loss: 0.9908084869384766,grad_norm: 0.8828087758937939, iteration: 92361
loss: 0.990060567855835,grad_norm: 0.966216434945739, iteration: 92362
loss: 1.0429660081863403,grad_norm: 0.99999914071001, iteration: 92363
loss: 1.0262479782104492,grad_norm: 0.9999990534808086, iteration: 92364
loss: 0.961547315120697,grad_norm: 0.9999992956422886, iteration: 92365
loss: 0.9808939695358276,grad_norm: 0.9999992012581597, iteration: 92366
loss: 0.9872223138809204,grad_norm: 0.9999991736288963, iteration: 92367
loss: 0.9954471588134766,grad_norm: 0.9999992025886647, iteration: 92368
loss: 1.0143786668777466,grad_norm: 0.9876059439451875, iteration: 92369
loss: 1.0055484771728516,grad_norm: 0.9999991572729034, iteration: 92370
loss: 1.001449465751648,grad_norm: 0.9999990226895538, iteration: 92371
loss: 1.0407370328903198,grad_norm: 0.9999991476966917, iteration: 92372
loss: 0.9987861514091492,grad_norm: 0.9999989444649124, iteration: 92373
loss: 0.9563884735107422,grad_norm: 0.9086865582169295, iteration: 92374
loss: 0.942743182182312,grad_norm: 0.9999990745978717, iteration: 92375
loss: 0.9933161735534668,grad_norm: 0.9999991518339021, iteration: 92376
loss: 1.0409255027770996,grad_norm: 0.9999998266558476, iteration: 92377
loss: 0.9884657263755798,grad_norm: 0.9999990216277783, iteration: 92378
loss: 0.9983786344528198,grad_norm: 0.9999991401840905, iteration: 92379
loss: 1.0147509574890137,grad_norm: 0.9999990147097189, iteration: 92380
loss: 0.9751476645469666,grad_norm: 0.9999991043915376, iteration: 92381
loss: 1.0083906650543213,grad_norm: 0.9999990766988522, iteration: 92382
loss: 1.0022515058517456,grad_norm: 0.9561305542259956, iteration: 92383
loss: 0.9904636144638062,grad_norm: 0.9999992286060758, iteration: 92384
loss: 0.9825578331947327,grad_norm: 0.9999990632446083, iteration: 92385
loss: 0.9699060320854187,grad_norm: 0.9999990003286197, iteration: 92386
loss: 0.9857523441314697,grad_norm: 0.9895765422286871, iteration: 92387
loss: 1.016140341758728,grad_norm: 0.9999990641135729, iteration: 92388
loss: 1.029186487197876,grad_norm: 0.9495273202460911, iteration: 92389
loss: 0.9859773516654968,grad_norm: 0.9959366341380965, iteration: 92390
loss: 0.9935795664787292,grad_norm: 0.9999993676687757, iteration: 92391
loss: 1.0071057081222534,grad_norm: 0.8834300619567347, iteration: 92392
loss: 0.970762312412262,grad_norm: 0.9854020238057277, iteration: 92393
loss: 0.9766334891319275,grad_norm: 0.9999990744593852, iteration: 92394
loss: 1.016338586807251,grad_norm: 0.9999992482218422, iteration: 92395
loss: 1.0058555603027344,grad_norm: 0.9999990560117918, iteration: 92396
loss: 0.9531501531600952,grad_norm: 0.9344430168355025, iteration: 92397
loss: 1.0019148588180542,grad_norm: 0.9488070781101775, iteration: 92398
loss: 0.9814914464950562,grad_norm: 0.9483244275956185, iteration: 92399
loss: 0.9712324142456055,grad_norm: 0.9999989988220129, iteration: 92400
loss: 0.9844126105308533,grad_norm: 0.9999989806853727, iteration: 92401
loss: 1.0284262895584106,grad_norm: 0.9999992119373174, iteration: 92402
loss: 0.994827389717102,grad_norm: 0.9511596786160491, iteration: 92403
loss: 1.003032922744751,grad_norm: 0.9999990585536549, iteration: 92404
loss: 1.0042692422866821,grad_norm: 0.9069833962357322, iteration: 92405
loss: 1.0148420333862305,grad_norm: 0.9999990718012872, iteration: 92406
loss: 0.9855716228485107,grad_norm: 0.9999992858109669, iteration: 92407
loss: 1.0067998170852661,grad_norm: 0.9999990165595353, iteration: 92408
loss: 1.001352071762085,grad_norm: 0.9602396073696071, iteration: 92409
loss: 0.9924225211143494,grad_norm: 0.9026124849170942, iteration: 92410
loss: 0.9770578742027283,grad_norm: 0.9999993600859635, iteration: 92411
loss: 0.9798253774642944,grad_norm: 0.9367690573760961, iteration: 92412
loss: 1.009160041809082,grad_norm: 0.999999173012535, iteration: 92413
loss: 0.9673553705215454,grad_norm: 0.9999992444947289, iteration: 92414
loss: 0.9814901351928711,grad_norm: 0.9732486039903165, iteration: 92415
loss: 0.986179769039154,grad_norm: 0.9999996902801594, iteration: 92416
loss: 1.0013078451156616,grad_norm: 0.9999991798837875, iteration: 92417
loss: 1.0256808996200562,grad_norm: 0.9999991398588564, iteration: 92418
loss: 1.0109773874282837,grad_norm: 0.9999991834849979, iteration: 92419
loss: 0.9928593635559082,grad_norm: 0.9884932785173353, iteration: 92420
loss: 0.9884105920791626,grad_norm: 0.9999989116958631, iteration: 92421
loss: 1.0441558361053467,grad_norm: 0.9999992022209406, iteration: 92422
loss: 0.9963138699531555,grad_norm: 0.9999991377529929, iteration: 92423
loss: 1.0433531999588013,grad_norm: 0.999999024955382, iteration: 92424
loss: 1.0135416984558105,grad_norm: 0.9999990612524948, iteration: 92425
loss: 1.0124313831329346,grad_norm: 0.9999990183501227, iteration: 92426
loss: 1.0006135702133179,grad_norm: 0.8816699905842955, iteration: 92427
loss: 1.0413198471069336,grad_norm: 0.9999991634030487, iteration: 92428
loss: 1.031254768371582,grad_norm: 0.999999150542227, iteration: 92429
loss: 0.9923040866851807,grad_norm: 0.9986031253819588, iteration: 92430
loss: 1.022140383720398,grad_norm: 0.9999991362234767, iteration: 92431
loss: 1.0074400901794434,grad_norm: 0.9770623026785513, iteration: 92432
loss: 1.0311415195465088,grad_norm: 0.9008356576940233, iteration: 92433
loss: 1.0697296857833862,grad_norm: 0.9999991433935735, iteration: 92434
loss: 0.9684497117996216,grad_norm: 0.9999991248172732, iteration: 92435
loss: 0.9821844696998596,grad_norm: 0.9999994382122924, iteration: 92436
loss: 1.0039148330688477,grad_norm: 0.993048321588073, iteration: 92437
loss: 1.011396884918213,grad_norm: 0.9999990732824822, iteration: 92438
loss: 0.9699404835700989,grad_norm: 0.9999990457435803, iteration: 92439
loss: 1.0343987941741943,grad_norm: 0.969054481625008, iteration: 92440
loss: 0.9864770174026489,grad_norm: 0.9632267965121918, iteration: 92441
loss: 1.0027159452438354,grad_norm: 0.9779887762078953, iteration: 92442
loss: 0.993259072303772,grad_norm: 0.9999993427976807, iteration: 92443
loss: 0.9475550055503845,grad_norm: 0.9999991840863531, iteration: 92444
loss: 0.993767261505127,grad_norm: 0.9999991769092452, iteration: 92445
loss: 1.0012589693069458,grad_norm: 0.9999991942890776, iteration: 92446
loss: 0.9986279010772705,grad_norm: 0.9999990461990282, iteration: 92447
loss: 1.03853440284729,grad_norm: 0.999999047878587, iteration: 92448
loss: 0.9981988668441772,grad_norm: 0.9999992342660831, iteration: 92449
loss: 1.034417748451233,grad_norm: 0.9999991750504172, iteration: 92450
loss: 1.0387324094772339,grad_norm: 0.9999996102795146, iteration: 92451
loss: 0.9863796234130859,grad_norm: 0.9999991822079211, iteration: 92452
loss: 1.0317013263702393,grad_norm: 0.9999992083314303, iteration: 92453
loss: 0.9812617301940918,grad_norm: 0.9999991861108928, iteration: 92454
loss: 1.0065194368362427,grad_norm: 0.8620447580706768, iteration: 92455
loss: 1.0359798669815063,grad_norm: 0.973157489163211, iteration: 92456
loss: 0.9916595816612244,grad_norm: 0.9218878044263296, iteration: 92457
loss: 0.930012583732605,grad_norm: 0.9999990118882017, iteration: 92458
loss: 1.0190815925598145,grad_norm: 0.9999990867086389, iteration: 92459
loss: 0.9908803701400757,grad_norm: 0.999999030104767, iteration: 92460
loss: 1.001336932182312,grad_norm: 0.9999991534497905, iteration: 92461
loss: 1.015945315361023,grad_norm: 0.9311718180848108, iteration: 92462
loss: 1.0111198425292969,grad_norm: 0.9999992375247857, iteration: 92463
loss: 1.0015161037445068,grad_norm: 0.9999993426597918, iteration: 92464
loss: 1.0036016702651978,grad_norm: 0.9999989543260052, iteration: 92465
loss: 1.0245903730392456,grad_norm: 0.999998903652881, iteration: 92466
loss: 0.9248796105384827,grad_norm: 0.9999994418261154, iteration: 92467
loss: 0.9880809187889099,grad_norm: 0.9596990856049721, iteration: 92468
loss: 1.0267502069473267,grad_norm: 0.9999991374023088, iteration: 92469
loss: 0.9907695651054382,grad_norm: 0.9999991285943057, iteration: 92470
loss: 0.9675397276878357,grad_norm: 0.999999097130006, iteration: 92471
loss: 0.9641959071159363,grad_norm: 0.9470940776334421, iteration: 92472
loss: 0.9923819899559021,grad_norm: 0.973421797993593, iteration: 92473
loss: 0.9962605834007263,grad_norm: 0.9628053935759283, iteration: 92474
loss: 1.0140072107315063,grad_norm: 0.999999069093738, iteration: 92475
loss: 0.9405299425125122,grad_norm: 0.9999991781088843, iteration: 92476
loss: 0.9988313317298889,grad_norm: 0.9999990219818099, iteration: 92477
loss: 0.9542436599731445,grad_norm: 0.9999989695700309, iteration: 92478
loss: 0.9834738969802856,grad_norm: 0.9976573603863438, iteration: 92479
loss: 0.9729810953140259,grad_norm: 0.9810173288284941, iteration: 92480
loss: 0.970322847366333,grad_norm: 0.9707446089799674, iteration: 92481
loss: 0.9434030055999756,grad_norm: 0.9679107354874171, iteration: 92482
loss: 0.9588509202003479,grad_norm: 0.9696464005481414, iteration: 92483
loss: 0.9940889477729797,grad_norm: 0.9999992825948405, iteration: 92484
loss: 1.0096548795700073,grad_norm: 0.9999990665555799, iteration: 92485
loss: 1.0200656652450562,grad_norm: 0.9999990768765764, iteration: 92486
loss: 1.018312931060791,grad_norm: 0.9999991036364313, iteration: 92487
loss: 0.9579341411590576,grad_norm: 0.9952156811065256, iteration: 92488
loss: 0.9728084802627563,grad_norm: 0.9484248828555968, iteration: 92489
loss: 0.999472439289093,grad_norm: 0.9160535163142575, iteration: 92490
loss: 1.001179814338684,grad_norm: 0.9999990665842107, iteration: 92491
loss: 0.9894712567329407,grad_norm: 0.9818074121450309, iteration: 92492
loss: 1.0311219692230225,grad_norm: 0.9999991272378771, iteration: 92493
loss: 1.0075466632843018,grad_norm: 0.9999991098288892, iteration: 92494
loss: 0.972653329372406,grad_norm: 0.9403997495719988, iteration: 92495
loss: 0.9799278974533081,grad_norm: 0.999999121220645, iteration: 92496
loss: 0.9838342070579529,grad_norm: 0.9999991009677283, iteration: 92497
loss: 1.0255643129348755,grad_norm: 0.9999991963848376, iteration: 92498
loss: 1.0037434101104736,grad_norm: 0.9999991155243401, iteration: 92499
loss: 1.0235520601272583,grad_norm: 0.9999991840825003, iteration: 92500
loss: 1.0212407112121582,grad_norm: 0.9999991323471435, iteration: 92501
loss: 0.9792341589927673,grad_norm: 0.8390090176559734, iteration: 92502
loss: 1.0507612228393555,grad_norm: 0.9999993734534555, iteration: 92503
loss: 1.023619294166565,grad_norm: 0.9928302716533913, iteration: 92504
loss: 0.9492405652999878,grad_norm: 0.9999992743868822, iteration: 92505
loss: 1.0179033279418945,grad_norm: 0.9999991180423963, iteration: 92506
loss: 0.9970278143882751,grad_norm: 0.9999990837504903, iteration: 92507
loss: 0.9734850525856018,grad_norm: 0.8176223063611058, iteration: 92508
loss: 0.9906807541847229,grad_norm: 0.9760730854163449, iteration: 92509
loss: 1.028583288192749,grad_norm: 0.9999991709173822, iteration: 92510
loss: 0.9737313389778137,grad_norm: 0.9999991411445408, iteration: 92511
loss: 1.0263667106628418,grad_norm: 0.9999991956257109, iteration: 92512
loss: 1.0188641548156738,grad_norm: 0.9999992493249804, iteration: 92513
loss: 0.9842103123664856,grad_norm: 0.867372113920803, iteration: 92514
loss: 0.98375403881073,grad_norm: 0.9999991782642043, iteration: 92515
loss: 0.9803605675697327,grad_norm: 0.9999989878309846, iteration: 92516
loss: 1.0345969200134277,grad_norm: 0.9999994310759329, iteration: 92517
loss: 1.0316036939620972,grad_norm: 0.999999150172915, iteration: 92518
loss: 1.0216686725616455,grad_norm: 0.9999990641489851, iteration: 92519
loss: 1.0058990716934204,grad_norm: 0.8605164117578995, iteration: 92520
loss: 0.9953467845916748,grad_norm: 0.9977734858711437, iteration: 92521
loss: 0.9905731081962585,grad_norm: 0.9363808341193584, iteration: 92522
loss: 0.9896113276481628,grad_norm: 0.9999991244769525, iteration: 92523
loss: 0.9849169254302979,grad_norm: 0.9999990226082818, iteration: 92524
loss: 0.971815824508667,grad_norm: 0.9999990551902144, iteration: 92525
loss: 1.0321040153503418,grad_norm: 0.9999992246329671, iteration: 92526
loss: 0.9775493144989014,grad_norm: 0.999999204565251, iteration: 92527
loss: 0.9681081771850586,grad_norm: 0.9999990343407404, iteration: 92528
loss: 1.0025655031204224,grad_norm: 0.9999990487986198, iteration: 92529
loss: 0.9844315052032471,grad_norm: 0.9173389587385438, iteration: 92530
loss: 1.0042872428894043,grad_norm: 0.9904257377491043, iteration: 92531
loss: 0.9908220171928406,grad_norm: 0.9999991081773677, iteration: 92532
loss: 0.9732881784439087,grad_norm: 0.999999100129086, iteration: 92533
loss: 1.0114121437072754,grad_norm: 0.9999990810857727, iteration: 92534
loss: 1.0424938201904297,grad_norm: 0.9999992619561351, iteration: 92535
loss: 0.9919757843017578,grad_norm: 0.9999993834906004, iteration: 92536
loss: 0.9877656102180481,grad_norm: 0.999999161774977, iteration: 92537
loss: 0.9985044002532959,grad_norm: 0.9999991733567368, iteration: 92538
loss: 1.017505407333374,grad_norm: 0.9999991131770556, iteration: 92539
loss: 1.0140125751495361,grad_norm: 0.9999993331619936, iteration: 92540
loss: 1.0824483633041382,grad_norm: 0.9999996693861269, iteration: 92541
loss: 1.0459237098693848,grad_norm: 0.9999990414576584, iteration: 92542
loss: 0.9644103646278381,grad_norm: 0.9999992600994522, iteration: 92543
loss: 1.0363746881484985,grad_norm: 0.927387911996755, iteration: 92544
loss: 1.0264402627944946,grad_norm: 0.9999991169541874, iteration: 92545
loss: 0.9647132754325867,grad_norm: 0.999999105662092, iteration: 92546
loss: 1.0225213766098022,grad_norm: 0.9999991649851067, iteration: 92547
loss: 0.9942195415496826,grad_norm: 0.9999990491877048, iteration: 92548
loss: 0.9889963269233704,grad_norm: 0.9744886152955066, iteration: 92549
loss: 1.0202853679656982,grad_norm: 0.9999991825546178, iteration: 92550
loss: 0.997923731803894,grad_norm: 0.9547941850635935, iteration: 92551
loss: 1.0017340183258057,grad_norm: 0.9999991068068121, iteration: 92552
loss: 1.0201313495635986,grad_norm: 0.9999991547070707, iteration: 92553
loss: 0.9812778830528259,grad_norm: 0.9552131799723614, iteration: 92554
loss: 1.0443735122680664,grad_norm: 0.9999990725465706, iteration: 92555
loss: 1.0035251379013062,grad_norm: 0.9999991234556792, iteration: 92556
loss: 0.9808760285377502,grad_norm: 0.99999917046945, iteration: 92557
loss: 0.9808483123779297,grad_norm: 0.9999989843906012, iteration: 92558
loss: 1.0061026811599731,grad_norm: 0.9999998068326853, iteration: 92559
loss: 1.0059094429016113,grad_norm: 0.9331097593268597, iteration: 92560
loss: 0.9707365036010742,grad_norm: 0.9857137399616017, iteration: 92561
loss: 0.9714330434799194,grad_norm: 0.8737173385926558, iteration: 92562
loss: 0.9784159064292908,grad_norm: 0.9999991451409332, iteration: 92563
loss: 1.0121631622314453,grad_norm: 0.999999295702976, iteration: 92564
loss: 0.9840835332870483,grad_norm: 0.9098366811201665, iteration: 92565
loss: 1.0000476837158203,grad_norm: 0.9999990429507343, iteration: 92566
loss: 1.002519965171814,grad_norm: 0.9006723489896244, iteration: 92567
loss: 1.0050450563430786,grad_norm: 0.9999991201821424, iteration: 92568
loss: 1.0003199577331543,grad_norm: 0.9999991197573596, iteration: 92569
loss: 1.0116039514541626,grad_norm: 0.9999992225913195, iteration: 92570
loss: 1.023245930671692,grad_norm: 0.9999990880987253, iteration: 92571
loss: 1.0003892183303833,grad_norm: 0.9999990905354521, iteration: 92572
loss: 0.9757556915283203,grad_norm: 0.9999990299460495, iteration: 92573
loss: 0.9817138910293579,grad_norm: 0.9011673633919928, iteration: 92574
loss: 1.0000522136688232,grad_norm: 0.968355702578748, iteration: 92575
loss: 1.0230247974395752,grad_norm: 0.9999993214538494, iteration: 92576
loss: 1.0326054096221924,grad_norm: 0.9999992702108613, iteration: 92577
loss: 1.00835120677948,grad_norm: 0.8301452523161826, iteration: 92578
loss: 1.00883150100708,grad_norm: 0.9709089355146038, iteration: 92579
loss: 1.0076838731765747,grad_norm: 0.9999990994651249, iteration: 92580
loss: 1.0145524740219116,grad_norm: 0.8383365689891581, iteration: 92581
loss: 1.023810863494873,grad_norm: 0.9999991119889412, iteration: 92582
loss: 1.0211564302444458,grad_norm: 0.9999992393494985, iteration: 92583
loss: 0.9977244734764099,grad_norm: 0.9999990622838307, iteration: 92584
loss: 1.0246896743774414,grad_norm: 0.9999991781576838, iteration: 92585
loss: 0.9919579029083252,grad_norm: 0.9999989835785372, iteration: 92586
loss: 1.0215744972229004,grad_norm: 0.9723323347538388, iteration: 92587
loss: 1.032289743423462,grad_norm: 0.9999993181582602, iteration: 92588
loss: 1.0207366943359375,grad_norm: 0.9485648284417172, iteration: 92589
loss: 1.0139175653457642,grad_norm: 0.9999991566008625, iteration: 92590
loss: 1.0073901414871216,grad_norm: 0.9999991634363417, iteration: 92591
loss: 1.0269726514816284,grad_norm: 0.9999991016196466, iteration: 92592
loss: 0.9949983954429626,grad_norm: 0.9999989885236432, iteration: 92593
loss: 0.9910587668418884,grad_norm: 0.9999989806345364, iteration: 92594
loss: 0.969226062297821,grad_norm: 0.999999058557611, iteration: 92595
loss: 1.0017839670181274,grad_norm: 0.9999991757922451, iteration: 92596
loss: 0.986663818359375,grad_norm: 0.9999990647576461, iteration: 92597
loss: 1.009204387664795,grad_norm: 0.9999990758249443, iteration: 92598
loss: 1.00017511844635,grad_norm: 0.99999907748786, iteration: 92599
loss: 1.0235947370529175,grad_norm: 0.9680720804252323, iteration: 92600
loss: 0.991269588470459,grad_norm: 0.9408364715236404, iteration: 92601
loss: 1.0112022161483765,grad_norm: 0.978203300567582, iteration: 92602
loss: 1.038286805152893,grad_norm: 0.9999990963492202, iteration: 92603
loss: 0.9670515656471252,grad_norm: 0.9999992115234211, iteration: 92604
loss: 1.0023900270462036,grad_norm: 0.999999238625937, iteration: 92605
loss: 1.004427194595337,grad_norm: 0.9999990218975012, iteration: 92606
loss: 1.0069633722305298,grad_norm: 0.9999990976878141, iteration: 92607
loss: 0.9891230463981628,grad_norm: 0.999999252398655, iteration: 92608
loss: 0.9761223793029785,grad_norm: 0.9999990431541421, iteration: 92609
loss: 1.0013247728347778,grad_norm: 0.9999993003369203, iteration: 92610
loss: 0.98808753490448,grad_norm: 0.9403096192127156, iteration: 92611
loss: 1.045835256576538,grad_norm: 0.9999992792304575, iteration: 92612
loss: 1.0066851377487183,grad_norm: 0.9999990977051432, iteration: 92613
loss: 1.0059199333190918,grad_norm: 0.9999990331187605, iteration: 92614
loss: 0.9740886092185974,grad_norm: 0.9999990812544121, iteration: 92615
loss: 0.9768199324607849,grad_norm: 0.9999991387139743, iteration: 92616
loss: 0.9863120317459106,grad_norm: 0.9371691576943854, iteration: 92617
loss: 1.0113767385482788,grad_norm: 0.8981483591100656, iteration: 92618
loss: 1.0156135559082031,grad_norm: 0.8991749084276602, iteration: 92619
loss: 0.9922636151313782,grad_norm: 0.9407714145853294, iteration: 92620
loss: 0.9838583469390869,grad_norm: 0.9999997303861969, iteration: 92621
loss: 0.9817500114440918,grad_norm: 0.900096806102103, iteration: 92622
loss: 0.990780770778656,grad_norm: 0.9999990972241255, iteration: 92623
loss: 0.990079939365387,grad_norm: 0.9999990281555001, iteration: 92624
loss: 0.9784027338027954,grad_norm: 0.947716595142776, iteration: 92625
loss: 0.9893901348114014,grad_norm: 0.8844989343169908, iteration: 92626
loss: 1.0165867805480957,grad_norm: 0.988369755442689, iteration: 92627
loss: 1.0067247152328491,grad_norm: 0.9999991496811917, iteration: 92628
loss: 0.99190753698349,grad_norm: 0.9999989656318369, iteration: 92629
loss: 1.0001137256622314,grad_norm: 0.8506629309630621, iteration: 92630
loss: 0.9826456904411316,grad_norm: 0.9999990730647673, iteration: 92631
loss: 1.032577395439148,grad_norm: 0.9999991252289651, iteration: 92632
loss: 1.0212037563323975,grad_norm: 0.9999990451308298, iteration: 92633
loss: 1.0108606815338135,grad_norm: 0.9999991209895551, iteration: 92634
loss: 0.9908411502838135,grad_norm: 0.9999992238536289, iteration: 92635
loss: 0.9999833106994629,grad_norm: 0.9999989946917146, iteration: 92636
loss: 1.0266045331954956,grad_norm: 0.9999991877578264, iteration: 92637
loss: 1.0095683336257935,grad_norm: 0.9999992029374193, iteration: 92638
loss: 0.9704539179801941,grad_norm: 0.9999989327983256, iteration: 92639
loss: 0.9622493982315063,grad_norm: 0.99999931508581, iteration: 92640
loss: 1.0187666416168213,grad_norm: 0.9999991614506211, iteration: 92641
loss: 0.9613844156265259,grad_norm: 0.9999992614515675, iteration: 92642
loss: 0.960649847984314,grad_norm: 0.9999992267486378, iteration: 92643
loss: 1.0178776979446411,grad_norm: 0.9999992529999454, iteration: 92644
loss: 1.0306259393692017,grad_norm: 0.9999991580895297, iteration: 92645
loss: 1.006753921508789,grad_norm: 0.9898993478361785, iteration: 92646
loss: 0.9906023740768433,grad_norm: 0.963038439066821, iteration: 92647
loss: 0.9832945466041565,grad_norm: 0.9999990587126488, iteration: 92648
loss: 1.0040892362594604,grad_norm: 0.9999990994142341, iteration: 92649
loss: 0.9859488606452942,grad_norm: 0.9999990245077756, iteration: 92650
loss: 1.0356109142303467,grad_norm: 0.9494193862999892, iteration: 92651
loss: 0.9804345965385437,grad_norm: 0.9999992405809173, iteration: 92652
loss: 1.0323375463485718,grad_norm: 0.9999990526584613, iteration: 92653
loss: 0.9549761414527893,grad_norm: 0.9736525520406005, iteration: 92654
loss: 0.970343828201294,grad_norm: 0.9999989823432706, iteration: 92655
loss: 0.9810717105865479,grad_norm: 0.9999991362849202, iteration: 92656
loss: 0.9903813600540161,grad_norm: 0.9999989805508485, iteration: 92657
loss: 1.0098650455474854,grad_norm: 0.9999989914542526, iteration: 92658
loss: 0.9625120162963867,grad_norm: 0.8666760180074192, iteration: 92659
loss: 0.9953714609146118,grad_norm: 0.9734347366168995, iteration: 92660
loss: 0.9825421571731567,grad_norm: 0.999999078804396, iteration: 92661
loss: 0.9958242177963257,grad_norm: 0.999999209904377, iteration: 92662
loss: 1.0188652276992798,grad_norm: 0.9999991468740598, iteration: 92663
loss: 0.9936051964759827,grad_norm: 0.9999992713456264, iteration: 92664
loss: 1.0034101009368896,grad_norm: 0.9999993235307533, iteration: 92665
loss: 0.9518197774887085,grad_norm: 0.9999991902587357, iteration: 92666
loss: 0.9766427874565125,grad_norm: 0.9999991826681093, iteration: 92667
loss: 1.0124033689498901,grad_norm: 0.9999991858195202, iteration: 92668
loss: 1.0008496046066284,grad_norm: 0.9999991718663191, iteration: 92669
loss: 0.9777297973632812,grad_norm: 0.9999990887033474, iteration: 92670
loss: 1.0289525985717773,grad_norm: 0.9999991386456459, iteration: 92671
loss: 1.0045448541641235,grad_norm: 0.9999991743650959, iteration: 92672
loss: 0.9987595677375793,grad_norm: 0.9638533968931683, iteration: 92673
loss: 1.0131349563598633,grad_norm: 0.9999991031784975, iteration: 92674
loss: 1.0003325939178467,grad_norm: 0.9999991924759154, iteration: 92675
loss: 1.027587652206421,grad_norm: 0.999999227687838, iteration: 92676
loss: 1.0038025379180908,grad_norm: 0.9999992060850815, iteration: 92677
loss: 0.9758936166763306,grad_norm: 0.999999190895156, iteration: 92678
loss: 0.9527108073234558,grad_norm: 0.9999991583796186, iteration: 92679
loss: 0.9966549277305603,grad_norm: 0.9999992751375029, iteration: 92680
loss: 0.9792234301567078,grad_norm: 0.9999990920076921, iteration: 92681
loss: 1.0002201795578003,grad_norm: 0.9999991032191935, iteration: 92682
loss: 0.9825687408447266,grad_norm: 0.9999991125574587, iteration: 92683
loss: 1.0022810697555542,grad_norm: 0.9999991801133462, iteration: 92684
loss: 1.0277997255325317,grad_norm: 0.9999991280185069, iteration: 92685
loss: 1.0104122161865234,grad_norm: 0.9999991367281004, iteration: 92686
loss: 0.9918512105941772,grad_norm: 0.9999991582004693, iteration: 92687
loss: 1.0412096977233887,grad_norm: 0.999999214257209, iteration: 92688
loss: 1.0356816053390503,grad_norm: 0.9999991681967577, iteration: 92689
loss: 1.017997145652771,grad_norm: 0.9999991308931355, iteration: 92690
loss: 1.0114392042160034,grad_norm: 0.9796582652494763, iteration: 92691
loss: 1.001201868057251,grad_norm: 0.9964706189425903, iteration: 92692
loss: 0.9792599081993103,grad_norm: 0.9999989706669733, iteration: 92693
loss: 0.9941698908805847,grad_norm: 0.9999991735934033, iteration: 92694
loss: 0.9807167053222656,grad_norm: 0.9209432943422282, iteration: 92695
loss: 1.0196250677108765,grad_norm: 0.9719568841115321, iteration: 92696
loss: 1.047993540763855,grad_norm: 0.982999812355911, iteration: 92697
loss: 1.0032460689544678,grad_norm: 0.987843847277599, iteration: 92698
loss: 1.0342421531677246,grad_norm: 0.9999992170085984, iteration: 92699
loss: 1.01350736618042,grad_norm: 0.9999991530415581, iteration: 92700
loss: 1.029680609703064,grad_norm: 0.8875107761266462, iteration: 92701
loss: 1.0002342462539673,grad_norm: 0.9999990086112754, iteration: 92702
loss: 1.0048081874847412,grad_norm: 0.9314915713104235, iteration: 92703
loss: 0.9819372296333313,grad_norm: 0.9999990470930085, iteration: 92704
loss: 0.9848966598510742,grad_norm: 0.999999096724988, iteration: 92705
loss: 0.9877878427505493,grad_norm: 0.9999996935964264, iteration: 92706
loss: 0.9810936450958252,grad_norm: 0.9282380733846459, iteration: 92707
loss: 1.005651831626892,grad_norm: 0.9999991396388083, iteration: 92708
loss: 0.9673405289649963,grad_norm: 0.9593970960711143, iteration: 92709
loss: 0.9676856398582458,grad_norm: 0.9999990579448418, iteration: 92710
loss: 0.9900705814361572,grad_norm: 0.9999990795377979, iteration: 92711
loss: 1.0093209743499756,grad_norm: 0.9999991239480588, iteration: 92712
loss: 0.975303590297699,grad_norm: 0.999998961392897, iteration: 92713
loss: 1.0745735168457031,grad_norm: 0.9999994061435783, iteration: 92714
loss: 0.9891289472579956,grad_norm: 0.966711013082825, iteration: 92715
loss: 0.9698323607444763,grad_norm: 0.9647650911199076, iteration: 92716
loss: 0.9916538000106812,grad_norm: 0.9905862333375254, iteration: 92717
loss: 0.978032648563385,grad_norm: 0.9999990978660681, iteration: 92718
loss: 1.0071955919265747,grad_norm: 0.9517958664397197, iteration: 92719
loss: 1.0111277103424072,grad_norm: 0.9999991835525591, iteration: 92720
loss: 0.9983934760093689,grad_norm: 0.9895456942703226, iteration: 92721
loss: 0.9903308153152466,grad_norm: 0.9999989251043588, iteration: 92722
loss: 1.0369867086410522,grad_norm: 0.9999991029072198, iteration: 92723
loss: 0.9994682669639587,grad_norm: 0.9419901951213963, iteration: 92724
loss: 0.9447876214981079,grad_norm: 0.8915052299779207, iteration: 92725
loss: 1.004770040512085,grad_norm: 0.9999991131388388, iteration: 92726
loss: 0.996552050113678,grad_norm: 0.9999990105739891, iteration: 92727
loss: 1.00236976146698,grad_norm: 0.942400678880742, iteration: 92728
loss: 1.0237008333206177,grad_norm: 0.9999990383601808, iteration: 92729
loss: 1.0190670490264893,grad_norm: 0.999999154777769, iteration: 92730
loss: 0.9998576641082764,grad_norm: 0.9999990698451423, iteration: 92731
loss: 1.0071234703063965,grad_norm: 0.8357313748283522, iteration: 92732
loss: 0.9905518293380737,grad_norm: 0.9999991610919376, iteration: 92733
loss: 0.9706222414970398,grad_norm: 0.9999990628787756, iteration: 92734
loss: 0.9718424677848816,grad_norm: 0.9999989560799767, iteration: 92735
loss: 1.0045799016952515,grad_norm: 0.9999990626081813, iteration: 92736
loss: 0.976828932762146,grad_norm: 0.9999990480690537, iteration: 92737
loss: 1.0150638818740845,grad_norm: 0.9999990964308275, iteration: 92738
loss: 1.0060999393463135,grad_norm: 0.8285247708429468, iteration: 92739
loss: 1.0234280824661255,grad_norm: 0.9999991523120574, iteration: 92740
loss: 0.9916884899139404,grad_norm: 0.9999991640006368, iteration: 92741
loss: 1.0384682416915894,grad_norm: 0.999999046937933, iteration: 92742
loss: 0.964545488357544,grad_norm: 0.9999991021630443, iteration: 92743
loss: 0.9519410729408264,grad_norm: 0.8035792538726478, iteration: 92744
loss: 0.9842405915260315,grad_norm: 0.9999992573209918, iteration: 92745
loss: 0.9642200469970703,grad_norm: 0.9999991828817033, iteration: 92746
loss: 0.9872915744781494,grad_norm: 0.8546193110685122, iteration: 92747
loss: 1.0242419242858887,grad_norm: 0.999999376866445, iteration: 92748
loss: 0.9866009950637817,grad_norm: 0.9567049715149171, iteration: 92749
loss: 0.9965758323669434,grad_norm: 0.9999991438761521, iteration: 92750
loss: 1.0126765966415405,grad_norm: 0.9999989993742724, iteration: 92751
loss: 1.026023268699646,grad_norm: 0.9999995987430184, iteration: 92752
loss: 1.0121181011199951,grad_norm: 0.9999991154294281, iteration: 92753
loss: 1.0184015035629272,grad_norm: 0.999999114976165, iteration: 92754
loss: 0.9872804880142212,grad_norm: 0.9999992187017619, iteration: 92755
loss: 0.9903516173362732,grad_norm: 0.9999991985150674, iteration: 92756
loss: 0.9497203826904297,grad_norm: 0.9923020817893706, iteration: 92757
loss: 1.0079678297042847,grad_norm: 0.9209782102386415, iteration: 92758
loss: 1.0193302631378174,grad_norm: 0.9999999109053691, iteration: 92759
loss: 1.0017298460006714,grad_norm: 0.9999991198042304, iteration: 92760
loss: 1.0504881143569946,grad_norm: 0.999999101878777, iteration: 92761
loss: 0.9618207812309265,grad_norm: 0.999998968339647, iteration: 92762
loss: 1.0092560052871704,grad_norm: 0.9586648191545164, iteration: 92763
loss: 0.9913711547851562,grad_norm: 0.9511978082169815, iteration: 92764
loss: 1.0035864114761353,grad_norm: 0.9865850845284992, iteration: 92765
loss: 0.9937694072723389,grad_norm: 0.8724366845863694, iteration: 92766
loss: 0.9982545971870422,grad_norm: 0.9999993130228738, iteration: 92767
loss: 0.9957917332649231,grad_norm: 0.9999993817613939, iteration: 92768
loss: 0.9951203465461731,grad_norm: 0.81595509207285, iteration: 92769
loss: 0.971314013004303,grad_norm: 0.9954279640053458, iteration: 92770
loss: 0.9936146140098572,grad_norm: 0.9999990043872684, iteration: 92771
loss: 0.9903686046600342,grad_norm: 0.9999989299441958, iteration: 92772
loss: 0.9950429797172546,grad_norm: 0.9999994970892415, iteration: 92773
loss: 0.9787073731422424,grad_norm: 0.9709292960864221, iteration: 92774
loss: 1.0282478332519531,grad_norm: 0.9999993063986982, iteration: 92775
loss: 0.999495267868042,grad_norm: 0.9820938267213983, iteration: 92776
loss: 1.0139392614364624,grad_norm: 0.9999990778808966, iteration: 92777
loss: 1.0112500190734863,grad_norm: 0.9999990789835584, iteration: 92778
loss: 1.0331368446350098,grad_norm: 0.9567524084275012, iteration: 92779
loss: 1.0135760307312012,grad_norm: 0.9999991259624288, iteration: 92780
loss: 0.9988695979118347,grad_norm: 0.9999989957133378, iteration: 92781
loss: 0.9775711894035339,grad_norm: 0.9999990216192339, iteration: 92782
loss: 0.9890105724334717,grad_norm: 0.9071003856981675, iteration: 92783
loss: 0.9580501914024353,grad_norm: 0.9999992502797496, iteration: 92784
loss: 0.9864479899406433,grad_norm: 0.999999185739509, iteration: 92785
loss: 0.953673779964447,grad_norm: 0.9697207783867271, iteration: 92786
loss: 1.0215637683868408,grad_norm: 0.9999990416200608, iteration: 92787
loss: 1.006971001625061,grad_norm: 0.9999990744939418, iteration: 92788
loss: 1.0098520517349243,grad_norm: 0.9999990884508401, iteration: 92789
loss: 0.9920876622200012,grad_norm: 0.999999128619392, iteration: 92790
loss: 0.9966538548469543,grad_norm: 0.9999991852717165, iteration: 92791
loss: 0.9757386445999146,grad_norm: 0.9069728303146571, iteration: 92792
loss: 1.0054086446762085,grad_norm: 0.9999990943424478, iteration: 92793
loss: 1.022275447845459,grad_norm: 0.9999999473396309, iteration: 92794
loss: 0.9619938731193542,grad_norm: 0.9999991729396844, iteration: 92795
loss: 0.9933282732963562,grad_norm: 0.9999989716735109, iteration: 92796
loss: 1.021843433380127,grad_norm: 0.9999991094673035, iteration: 92797
loss: 0.9813825488090515,grad_norm: 0.9999990982757437, iteration: 92798
loss: 1.0260984897613525,grad_norm: 0.8597095815574634, iteration: 92799
loss: 1.0053496360778809,grad_norm: 0.9999989414169657, iteration: 92800
loss: 0.9846738576889038,grad_norm: 0.9650717839683808, iteration: 92801
loss: 0.9858002662658691,grad_norm: 0.999998997051315, iteration: 92802
loss: 1.0357729196548462,grad_norm: 0.999999255645658, iteration: 92803
loss: 1.047002911567688,grad_norm: 0.9999990961040327, iteration: 92804
loss: 1.0263490676879883,grad_norm: 0.9999991002005368, iteration: 92805
loss: 1.0225857496261597,grad_norm: 0.9999991213385254, iteration: 92806
loss: 0.9813642501831055,grad_norm: 0.9999990971602561, iteration: 92807
loss: 0.9937047362327576,grad_norm: 0.9999990476949907, iteration: 92808
loss: 0.9803664088249207,grad_norm: 0.9107310988432392, iteration: 92809
loss: 0.9964854717254639,grad_norm: 0.9122940290205779, iteration: 92810
loss: 0.9837002158164978,grad_norm: 0.9999993924585568, iteration: 92811
loss: 0.9900783896446228,grad_norm: 0.9999991205460991, iteration: 92812
loss: 0.9667671918869019,grad_norm: 0.9999990424811805, iteration: 92813
loss: 0.97298264503479,grad_norm: 0.9999991357547944, iteration: 92814
loss: 1.013000726699829,grad_norm: 0.9999990456349823, iteration: 92815
loss: 1.010929822921753,grad_norm: 0.9596933573406358, iteration: 92816
loss: 1.0230127573013306,grad_norm: 0.9999991649110144, iteration: 92817
loss: 0.9940564632415771,grad_norm: 0.9999989742388803, iteration: 92818
loss: 0.9815099835395813,grad_norm: 0.9082449503637208, iteration: 92819
loss: 1.0332518815994263,grad_norm: 0.8929810153713713, iteration: 92820
loss: 0.995000958442688,grad_norm: 0.9999996549463643, iteration: 92821
loss: 1.0072425603866577,grad_norm: 0.9515884238412126, iteration: 92822
loss: 0.9826880693435669,grad_norm: 0.9999991380805826, iteration: 92823
loss: 1.006568431854248,grad_norm: 0.999999241390993, iteration: 92824
loss: 1.0143718719482422,grad_norm: 0.9999989711720048, iteration: 92825
loss: 0.9913829565048218,grad_norm: 0.999999185582561, iteration: 92826
loss: 0.9699103832244873,grad_norm: 0.940065548565762, iteration: 92827
loss: 0.9881934523582458,grad_norm: 0.9456060298549829, iteration: 92828
loss: 1.0027031898498535,grad_norm: 0.9999991119153738, iteration: 92829
loss: 1.0481610298156738,grad_norm: 0.9999991232263371, iteration: 92830
loss: 1.0154037475585938,grad_norm: 0.9420192538918694, iteration: 92831
loss: 1.0237131118774414,grad_norm: 0.9999992883587893, iteration: 92832
loss: 0.9794087409973145,grad_norm: 0.9857831746395114, iteration: 92833
loss: 0.9193769097328186,grad_norm: 0.9999990618726524, iteration: 92834
loss: 0.9551700353622437,grad_norm: 0.9999991588740172, iteration: 92835
loss: 0.9725515842437744,grad_norm: 0.9999991915890316, iteration: 92836
loss: 1.0072293281555176,grad_norm: 0.887769281005965, iteration: 92837
loss: 0.9787561297416687,grad_norm: 0.9999990779596916, iteration: 92838
loss: 1.0059815645217896,grad_norm: 0.9373975420146455, iteration: 92839
loss: 0.9667134881019592,grad_norm: 0.9999989774575602, iteration: 92840
loss: 0.9742224812507629,grad_norm: 0.999999151352821, iteration: 92841
loss: 0.9897295832633972,grad_norm: 0.8046857099103312, iteration: 92842
loss: 1.2184361219406128,grad_norm: 0.9999999248915136, iteration: 92843
loss: 0.9737743735313416,grad_norm: 0.8866072278155782, iteration: 92844
loss: 1.0359214544296265,grad_norm: 0.9824974524770625, iteration: 92845
loss: 0.9707513451576233,grad_norm: 0.991756589705811, iteration: 92846
loss: 0.9605768322944641,grad_norm: 0.9999992748182437, iteration: 92847
loss: 1.0124808549880981,grad_norm: 0.999999208657127, iteration: 92848
loss: 0.9949194192886353,grad_norm: 0.9211806561800859, iteration: 92849
loss: 0.9578080773353577,grad_norm: 0.9999992302960867, iteration: 92850
loss: 0.9758709669113159,grad_norm: 0.9999991869505008, iteration: 92851
loss: 0.9940801858901978,grad_norm: 0.9999991448578391, iteration: 92852
loss: 0.9753929972648621,grad_norm: 0.9999991538879152, iteration: 92853
loss: 0.9662408232688904,grad_norm: 0.9999990832420785, iteration: 92854
loss: 1.038558840751648,grad_norm: 0.9999992186207461, iteration: 92855
loss: 0.9988356232643127,grad_norm: 0.9999992207489187, iteration: 92856
loss: 0.9881739020347595,grad_norm: 0.9323293528124856, iteration: 92857
loss: 0.9639869928359985,grad_norm: 0.930543121716328, iteration: 92858
loss: 0.9946901202201843,grad_norm: 0.9999989427326151, iteration: 92859
loss: 0.9646722674369812,grad_norm: 0.9999989932507396, iteration: 92860
loss: 0.972964882850647,grad_norm: 0.9999991596488061, iteration: 92861
loss: 1.0080900192260742,grad_norm: 0.8506327761194993, iteration: 92862
loss: 1.0066795349121094,grad_norm: 0.9999992691253444, iteration: 92863
loss: 1.0009329319000244,grad_norm: 0.9999991286990787, iteration: 92864
loss: 1.0189286470413208,grad_norm: 0.999999997167508, iteration: 92865
loss: 1.0009676218032837,grad_norm: 0.9999991598753464, iteration: 92866
loss: 1.0160918235778809,grad_norm: 0.9999990509199069, iteration: 92867
loss: 1.007818341255188,grad_norm: 0.9567543383756661, iteration: 92868
loss: 1.038017988204956,grad_norm: 0.9999996545272429, iteration: 92869
loss: 0.986543595790863,grad_norm: 0.9992077693093544, iteration: 92870
loss: 1.150031328201294,grad_norm: 0.9999998023541032, iteration: 92871
loss: 0.9888243675231934,grad_norm: 0.9547713277047518, iteration: 92872
loss: 1.2220690250396729,grad_norm: 0.9999996084905669, iteration: 92873
loss: 1.0162197351455688,grad_norm: 0.99999912904604, iteration: 92874
loss: 1.0223788022994995,grad_norm: 0.9999990369312491, iteration: 92875
loss: 1.0304741859436035,grad_norm: 0.8486947507128971, iteration: 92876
loss: 1.0243550539016724,grad_norm: 0.999999140877497, iteration: 92877
loss: 1.0087834596633911,grad_norm: 0.9999994506460436, iteration: 92878
loss: 0.9658747315406799,grad_norm: 0.999999258052465, iteration: 92879
loss: 1.0275501012802124,grad_norm: 0.9649593291408064, iteration: 92880
loss: 1.0161069631576538,grad_norm: 0.9999992327448383, iteration: 92881
loss: 1.029097080230713,grad_norm: 0.9999993348544093, iteration: 92882
loss: 0.9907565116882324,grad_norm: 0.9999991957627673, iteration: 92883
loss: 0.9893748760223389,grad_norm: 0.9021728952096333, iteration: 92884
loss: 0.9785316586494446,grad_norm: 0.9999991933177594, iteration: 92885
loss: 0.9887368083000183,grad_norm: 0.9999991456140387, iteration: 92886
loss: 1.1566779613494873,grad_norm: 0.9999998087930513, iteration: 92887
loss: 0.9922891855239868,grad_norm: 0.986851942150305, iteration: 92888
loss: 0.9868246912956238,grad_norm: 0.9999992084212567, iteration: 92889
loss: 0.9952854514122009,grad_norm: 0.9999990750694274, iteration: 92890
loss: 1.0796974897384644,grad_norm: 0.999999488810181, iteration: 92891
loss: 1.0291037559509277,grad_norm: 0.9244635628038884, iteration: 92892
loss: 1.0025290250778198,grad_norm: 0.9999992862443269, iteration: 92893
loss: 0.9756436347961426,grad_norm: 0.9999996188882878, iteration: 92894
loss: 1.0265172719955444,grad_norm: 0.9960321091530074, iteration: 92895
loss: 1.148688793182373,grad_norm: 1.000000047629337, iteration: 92896
loss: 1.004741907119751,grad_norm: 0.9999991258899099, iteration: 92897
loss: 0.987987220287323,grad_norm: 0.9716204329388312, iteration: 92898
loss: 0.9814603924751282,grad_norm: 0.9457195972628191, iteration: 92899
loss: 0.9663060307502747,grad_norm: 0.9999990226683779, iteration: 92900
loss: 1.0331043004989624,grad_norm: 0.9999992082172617, iteration: 92901
loss: 0.9972435235977173,grad_norm: 0.9155499329264479, iteration: 92902
loss: 0.984559953212738,grad_norm: 0.999999137067002, iteration: 92903
loss: 0.9632367491722107,grad_norm: 0.9999990252469916, iteration: 92904
loss: 0.9999713897705078,grad_norm: 0.9999992370721136, iteration: 92905
loss: 0.9970309138298035,grad_norm: 0.9999991065694117, iteration: 92906
loss: 0.98012775182724,grad_norm: 0.9825989183243405, iteration: 92907
loss: 1.0166561603546143,grad_norm: 0.901620230110977, iteration: 92908
loss: 0.9886124730110168,grad_norm: 0.9934312526902336, iteration: 92909
loss: 1.0023210048675537,grad_norm: 0.9071660612510634, iteration: 92910
loss: 1.0248807668685913,grad_norm: 0.8992092126883201, iteration: 92911
loss: 0.9964330196380615,grad_norm: 0.9179562783724675, iteration: 92912
loss: 1.0073292255401611,grad_norm: 0.8847540997292762, iteration: 92913
loss: 0.9915909171104431,grad_norm: 0.9999993701147972, iteration: 92914
loss: 0.9440931677818298,grad_norm: 0.8657783761446499, iteration: 92915
loss: 1.0352195501327515,grad_norm: 0.9999995386576795, iteration: 92916
loss: 1.050406813621521,grad_norm: 0.9999999011353423, iteration: 92917
loss: 1.0039931535720825,grad_norm: 0.9999991837640658, iteration: 92918
loss: 0.9912514090538025,grad_norm: 0.9991198944409583, iteration: 92919
loss: 0.9954203963279724,grad_norm: 0.9012743296999105, iteration: 92920
loss: 0.9700934886932373,grad_norm: 0.9929628143162434, iteration: 92921
loss: 0.946811318397522,grad_norm: 0.970634815817881, iteration: 92922
loss: 1.0079303979873657,grad_norm: 0.9949170204940625, iteration: 92923
loss: 0.9878067374229431,grad_norm: 0.9999992590422316, iteration: 92924
loss: 0.9849519729614258,grad_norm: 0.8692216410360377, iteration: 92925
loss: 0.986768901348114,grad_norm: 0.9472320236473022, iteration: 92926
loss: 0.9837933778762817,grad_norm: 0.9999992982438707, iteration: 92927
loss: 0.9905314445495605,grad_norm: 0.8685487171078787, iteration: 92928
loss: 0.9878003597259521,grad_norm: 0.8881661904577517, iteration: 92929
loss: 0.9987843036651611,grad_norm: 0.999999226053126, iteration: 92930
loss: 0.9989123344421387,grad_norm: 0.9999992164676201, iteration: 92931
loss: 0.9795932173728943,grad_norm: 0.9999990510775844, iteration: 92932
loss: 1.0145138502120972,grad_norm: 0.9625647056279244, iteration: 92933
loss: 1.0478719472885132,grad_norm: 0.9999993381506143, iteration: 92934
loss: 0.9570043087005615,grad_norm: 0.9999992272151569, iteration: 92935
loss: 0.9568479657173157,grad_norm: 0.96444782777309, iteration: 92936
loss: 1.0011439323425293,grad_norm: 0.9999991227786558, iteration: 92937
loss: 1.0436521768569946,grad_norm: 0.9999991503384652, iteration: 92938
loss: 0.9946470856666565,grad_norm: 0.9738638030787236, iteration: 92939
loss: 0.9722055792808533,grad_norm: 0.9999990538985886, iteration: 92940
loss: 0.9963838458061218,grad_norm: 0.9999991239298894, iteration: 92941
loss: 1.032987117767334,grad_norm: 0.9999990985212893, iteration: 92942
loss: 0.9951885938644409,grad_norm: 0.902247708498093, iteration: 92943
loss: 1.0762683153152466,grad_norm: 0.9999995419795348, iteration: 92944
loss: 0.9949934482574463,grad_norm: 0.9367559771889931, iteration: 92945
loss: 1.061875581741333,grad_norm: 0.9999991463937874, iteration: 92946
loss: 0.9986225366592407,grad_norm: 0.9999992544526598, iteration: 92947
loss: 1.0130115747451782,grad_norm: 0.9999992113180926, iteration: 92948
loss: 0.9813941717147827,grad_norm: 0.9999991250574943, iteration: 92949
loss: 0.9920412302017212,grad_norm: 0.9999992885207449, iteration: 92950
loss: 0.9985866546630859,grad_norm: 0.9122498290501445, iteration: 92951
loss: 0.9373664259910583,grad_norm: 0.9999990938234481, iteration: 92952
loss: 1.0322141647338867,grad_norm: 0.9999991834632479, iteration: 92953
loss: 1.0235527753829956,grad_norm: 0.9999992149589022, iteration: 92954
loss: 1.009220838546753,grad_norm: 0.9999989535091403, iteration: 92955
loss: 1.0739800930023193,grad_norm: 0.9999991749730822, iteration: 92956
loss: 1.000181794166565,grad_norm: 0.9848579762757431, iteration: 92957
loss: 0.996669590473175,grad_norm: 0.9577813712204848, iteration: 92958
loss: 0.9954276084899902,grad_norm: 0.9452176560397045, iteration: 92959
loss: 0.9797216653823853,grad_norm: 0.9999991504134521, iteration: 92960
loss: 0.9994242191314697,grad_norm: 0.9145766339403403, iteration: 92961
loss: 1.0216684341430664,grad_norm: 0.999999199765382, iteration: 92962
loss: 1.0831812620162964,grad_norm: 0.9999991103038568, iteration: 92963
loss: 0.9929768443107605,grad_norm: 0.9999991680797982, iteration: 92964
loss: 0.9596208333969116,grad_norm: 0.9182497155441472, iteration: 92965
loss: 1.008897304534912,grad_norm: 0.784542741073696, iteration: 92966
loss: 0.9924373030662537,grad_norm: 0.9999990190888394, iteration: 92967
loss: 1.0720064640045166,grad_norm: 0.9999997196834357, iteration: 92968
loss: 1.0177043676376343,grad_norm: 0.9999991967112207, iteration: 92969
loss: 1.0014818906784058,grad_norm: 0.9999992915460182, iteration: 92970
loss: 1.044089913368225,grad_norm: 0.9965279580211156, iteration: 92971
loss: 0.9994099140167236,grad_norm: 0.9999990335628604, iteration: 92972
loss: 0.9786304831504822,grad_norm: 0.9999993930157988, iteration: 92973
loss: 1.025996208190918,grad_norm: 0.9751650054021814, iteration: 92974
loss: 1.0620231628417969,grad_norm: 0.9999992746305145, iteration: 92975
loss: 0.9761921763420105,grad_norm: 0.9999991884624067, iteration: 92976
loss: 1.0458813905715942,grad_norm: 0.9999992120011584, iteration: 92977
loss: 1.019598364830017,grad_norm: 0.9999989924836705, iteration: 92978
loss: 0.9875125885009766,grad_norm: 0.9999992502701684, iteration: 92979
loss: 1.0085996389389038,grad_norm: 0.9999995914155029, iteration: 92980
loss: 1.0003842115402222,grad_norm: 0.9999991345191583, iteration: 92981
loss: 0.9470481872558594,grad_norm: 0.9999992721757698, iteration: 92982
loss: 1.0668599605560303,grad_norm: 0.9999991887483076, iteration: 92983
loss: 0.9829237461090088,grad_norm: 0.985663096024906, iteration: 92984
loss: 1.022114872932434,grad_norm: 0.9977156546150567, iteration: 92985
loss: 0.9686350226402283,grad_norm: 0.9999991980798576, iteration: 92986
loss: 1.002163052558899,grad_norm: 0.9949341402970078, iteration: 92987
loss: 1.0012385845184326,grad_norm: 0.9651991117212791, iteration: 92988
loss: 0.9358823299407959,grad_norm: 0.9658043591394874, iteration: 92989
loss: 1.0378258228302002,grad_norm: 0.9999990039731138, iteration: 92990
loss: 1.0109479427337646,grad_norm: 0.8672046109151301, iteration: 92991
loss: 0.9800280928611755,grad_norm: 0.9292307685673814, iteration: 92992
loss: 1.0010790824890137,grad_norm: 0.9999992471880378, iteration: 92993
loss: 1.022804617881775,grad_norm: 0.8940211688734543, iteration: 92994
loss: 1.0053592920303345,grad_norm: 0.983825432163915, iteration: 92995
loss: 0.9958816766738892,grad_norm: 0.9999990627341045, iteration: 92996
loss: 1.0683153867721558,grad_norm: 0.9753649829265522, iteration: 92997
loss: 1.0061217546463013,grad_norm: 0.9999991992880032, iteration: 92998
loss: 0.9622980356216431,grad_norm: 0.9999991085948106, iteration: 92999
loss: 0.9699172973632812,grad_norm: 0.9703343566252348, iteration: 93000
loss: 1.0179238319396973,grad_norm: 0.9999990652473192, iteration: 93001
loss: 0.9945802092552185,grad_norm: 0.9999990654864608, iteration: 93002
loss: 1.0437301397323608,grad_norm: 0.9999991786633065, iteration: 93003
loss: 1.0046130418777466,grad_norm: 0.999999134736376, iteration: 93004
loss: 1.0181714296340942,grad_norm: 0.9999991682807022, iteration: 93005
loss: 0.9672845005989075,grad_norm: 0.961618462318391, iteration: 93006
loss: 1.0265836715698242,grad_norm: 0.9999994099105829, iteration: 93007
loss: 1.0064529180526733,grad_norm: 0.99999911942154, iteration: 93008
loss: 0.988562285900116,grad_norm: 0.9999991683518509, iteration: 93009
loss: 1.0050277709960938,grad_norm: 0.9999996444410039, iteration: 93010
loss: 0.9848094582557678,grad_norm: 0.9999990097085469, iteration: 93011
loss: 0.968353271484375,grad_norm: 0.9999992219678038, iteration: 93012
loss: 0.9499832391738892,grad_norm: 0.9999990767197091, iteration: 93013
loss: 0.9708816409111023,grad_norm: 0.9999989196916418, iteration: 93014
loss: 1.0457966327667236,grad_norm: 0.9732768459485344, iteration: 93015
loss: 1.0135912895202637,grad_norm: 0.9999990420288829, iteration: 93016
loss: 1.0019699335098267,grad_norm: 0.9999990788112885, iteration: 93017
loss: 1.0264778137207031,grad_norm: 0.9999990631627872, iteration: 93018
loss: 0.9950795769691467,grad_norm: 0.9999991908576178, iteration: 93019
loss: 0.9870525002479553,grad_norm: 0.9118396941402089, iteration: 93020
loss: 1.0478318929672241,grad_norm: 0.9999996859423519, iteration: 93021
loss: 1.0075018405914307,grad_norm: 0.9999997416206686, iteration: 93022
loss: 0.9734926819801331,grad_norm: 0.9999991106003501, iteration: 93023
loss: 0.9787419438362122,grad_norm: 0.9999990405195551, iteration: 93024
loss: 1.0011472702026367,grad_norm: 0.9999993156969916, iteration: 93025
loss: 0.9764919877052307,grad_norm: 0.9999992612811627, iteration: 93026
loss: 1.0207104682922363,grad_norm: 0.9999995955301995, iteration: 93027
loss: 1.0418121814727783,grad_norm: 0.9999991550182881, iteration: 93028
loss: 1.0138734579086304,grad_norm: 0.9999989460418741, iteration: 93029
loss: 1.0313501358032227,grad_norm: 0.9999989898781828, iteration: 93030
loss: 0.9847621917724609,grad_norm: 0.9999990769077978, iteration: 93031
loss: 0.9476227164268494,grad_norm: 0.9999990782356505, iteration: 93032
loss: 1.043191909790039,grad_norm: 0.9999992157621578, iteration: 93033
loss: 0.9969642162322998,grad_norm: 0.9999991591770664, iteration: 93034
loss: 1.0282728672027588,grad_norm: 0.9999998432377097, iteration: 93035
loss: 1.006531000137329,grad_norm: 0.9116174527795358, iteration: 93036
loss: 1.040883183479309,grad_norm: 0.9999991180316347, iteration: 93037
loss: 1.0663692951202393,grad_norm: 0.9999990385840053, iteration: 93038
loss: 0.9749505519866943,grad_norm: 0.9999992820623647, iteration: 93039
loss: 1.0208454132080078,grad_norm: 0.9999990989861314, iteration: 93040
loss: 1.0847465991973877,grad_norm: 0.9999999205163596, iteration: 93041
loss: 1.0343650579452515,grad_norm: 0.9264610816494303, iteration: 93042
loss: 1.0582889318466187,grad_norm: 0.9999999321443446, iteration: 93043
loss: 0.996884286403656,grad_norm: 0.9999992243986223, iteration: 93044
loss: 1.010921835899353,grad_norm: 0.9999990515713393, iteration: 93045
loss: 1.0122082233428955,grad_norm: 0.9242899488368995, iteration: 93046
loss: 0.9775214195251465,grad_norm: 0.9999989624694738, iteration: 93047
loss: 1.0267796516418457,grad_norm: 0.7831589256400224, iteration: 93048
loss: 0.9789947271347046,grad_norm: 0.9999990169507132, iteration: 93049
loss: 0.9944143891334534,grad_norm: 0.9826840667203084, iteration: 93050
loss: 1.0007243156433105,grad_norm: 0.9194629820280334, iteration: 93051
loss: 0.9960642457008362,grad_norm: 0.999999062467714, iteration: 93052
loss: 1.066019058227539,grad_norm: 0.9999991163874355, iteration: 93053
loss: 0.998560905456543,grad_norm: 0.9999991223167708, iteration: 93054
loss: 0.9917522668838501,grad_norm: 0.9999991744795241, iteration: 93055
loss: 1.032231330871582,grad_norm: 0.9999993684722462, iteration: 93056
loss: 0.9869146943092346,grad_norm: 0.9999991207713015, iteration: 93057
loss: 1.0070619583129883,grad_norm: 0.9751617274093505, iteration: 93058
loss: 1.0199463367462158,grad_norm: 0.9999994375153689, iteration: 93059
loss: 1.0466649532318115,grad_norm: 0.9999988777720015, iteration: 93060
loss: 0.9584600925445557,grad_norm: 0.8973102033621049, iteration: 93061
loss: 1.0265849828720093,grad_norm: 0.9403842959865428, iteration: 93062
loss: 0.953135073184967,grad_norm: 0.9447873473282025, iteration: 93063
loss: 0.9934402108192444,grad_norm: 0.9999991678431577, iteration: 93064
loss: 0.9962857365608215,grad_norm: 0.999999184143767, iteration: 93065
loss: 1.0244166851043701,grad_norm: 0.9959247117168323, iteration: 93066
loss: 1.0670586824417114,grad_norm: 0.9999995578834902, iteration: 93067
loss: 0.9962446689605713,grad_norm: 0.9999991337934182, iteration: 93068
loss: 1.002289891242981,grad_norm: 0.9999991690974495, iteration: 93069
loss: 1.0120784044265747,grad_norm: 0.9999991585563179, iteration: 93070
loss: 1.0087388753890991,grad_norm: 0.9999993063615892, iteration: 93071
loss: 1.0370292663574219,grad_norm: 0.9999991071530923, iteration: 93072
loss: 1.004464030265808,grad_norm: 0.8624158733815143, iteration: 93073
loss: 0.9880495071411133,grad_norm: 0.9999990130894806, iteration: 93074
loss: 1.0033481121063232,grad_norm: 0.9863059073013137, iteration: 93075
loss: 0.9960243105888367,grad_norm: 0.9999990746409345, iteration: 93076
loss: 1.0407757759094238,grad_norm: 0.9999991443256088, iteration: 93077
loss: 1.020751714706421,grad_norm: 0.9999992713281626, iteration: 93078
loss: 1.0029693841934204,grad_norm: 0.8660015599498814, iteration: 93079
loss: 1.0596356391906738,grad_norm: 0.9999993105014758, iteration: 93080
loss: 0.9697846174240112,grad_norm: 0.9404158582260251, iteration: 93081
loss: 0.9952303171157837,grad_norm: 0.9999989544935524, iteration: 93082
loss: 1.2530282735824585,grad_norm: 0.9999992358414369, iteration: 93083
loss: 0.9746200442314148,grad_norm: 0.9999991219576861, iteration: 93084
loss: 0.9993875622749329,grad_norm: 0.9999995995547462, iteration: 93085
loss: 0.9648786187171936,grad_norm: 0.8934289334111766, iteration: 93086
loss: 0.9707943201065063,grad_norm: 0.9999991585306488, iteration: 93087
loss: 1.077629566192627,grad_norm: 0.9999990582977323, iteration: 93088
loss: 1.082942247390747,grad_norm: 0.982996572202072, iteration: 93089
loss: 1.1033695936203003,grad_norm: 0.9509785625132565, iteration: 93090
loss: 0.9963283538818359,grad_norm: 0.9329121749155718, iteration: 93091
loss: 1.2423700094223022,grad_norm: 0.999999355972236, iteration: 93092
loss: 1.0224260091781616,grad_norm: 0.9999991634478924, iteration: 93093
loss: 1.0214457511901855,grad_norm: 0.9999992480185333, iteration: 93094
loss: 0.9699212908744812,grad_norm: 0.9999991556248515, iteration: 93095
loss: 1.0316764116287231,grad_norm: 0.9999990944021387, iteration: 93096
loss: 0.9682669043540955,grad_norm: 0.9999995476603567, iteration: 93097
loss: 1.0421348810195923,grad_norm: 0.9999999550649611, iteration: 93098
loss: 1.0006022453308105,grad_norm: 0.9999990176801321, iteration: 93099
loss: 1.0275191068649292,grad_norm: 0.9432559250243305, iteration: 93100
loss: 1.0232435464859009,grad_norm: 0.9327143795018946, iteration: 93101
loss: 1.0193811655044556,grad_norm: 0.9999990665853946, iteration: 93102
loss: 1.0064018964767456,grad_norm: 0.9911511292821417, iteration: 93103
loss: 0.9990953803062439,grad_norm: 0.9999992188568549, iteration: 93104
loss: 1.0010590553283691,grad_norm: 0.9999991250532027, iteration: 93105
loss: 1.0238409042358398,grad_norm: 0.9543421843376485, iteration: 93106
loss: 1.0193966627120972,grad_norm: 0.9999995744437462, iteration: 93107
loss: 1.0154860019683838,grad_norm: 0.9999993297217132, iteration: 93108
loss: 1.0525797605514526,grad_norm: 0.999999641451718, iteration: 93109
loss: 1.0036406517028809,grad_norm: 0.9999992173260948, iteration: 93110
loss: 1.008002758026123,grad_norm: 0.9999990749281926, iteration: 93111
loss: 0.9866657257080078,grad_norm: 0.9999990928306179, iteration: 93112
loss: 1.0150089263916016,grad_norm: 0.9999992369459322, iteration: 93113
loss: 1.0006815195083618,grad_norm: 0.9620554381698597, iteration: 93114
loss: 0.9578508734703064,grad_norm: 0.9999992397546698, iteration: 93115
loss: 0.9896760582923889,grad_norm: 0.9999992752025715, iteration: 93116
loss: 1.0633599758148193,grad_norm: 0.999999305814778, iteration: 93117
loss: 0.9980865120887756,grad_norm: 0.9999993804704091, iteration: 93118
loss: 1.01250422000885,grad_norm: 0.9968444037526811, iteration: 93119
loss: 0.9523495435714722,grad_norm: 0.9999992123562556, iteration: 93120
loss: 1.0052508115768433,grad_norm: 0.9999991873017974, iteration: 93121
loss: 1.007452368736267,grad_norm: 0.999999004511772, iteration: 93122
loss: 1.017072319984436,grad_norm: 0.999999900308703, iteration: 93123
loss: 1.0068215131759644,grad_norm: 0.9999991552254323, iteration: 93124
loss: 0.9820204377174377,grad_norm: 0.9999991308981737, iteration: 93125
loss: 0.9730870127677917,grad_norm: 0.9999992472073148, iteration: 93126
loss: 0.9748530387878418,grad_norm: 0.999999107512114, iteration: 93127
loss: 1.037612795829773,grad_norm: 0.9999991517893381, iteration: 93128
loss: 1.0641529560089111,grad_norm: 0.9999991329366401, iteration: 93129
loss: 1.0038100481033325,grad_norm: 0.9999990660028161, iteration: 93130
loss: 0.9828807711601257,grad_norm: 0.9999993464422817, iteration: 93131
loss: 0.9694474339485168,grad_norm: 0.9999989945900875, iteration: 93132
loss: 1.0135807991027832,grad_norm: 0.9417743582920218, iteration: 93133
loss: 1.0180503129959106,grad_norm: 0.932110030577365, iteration: 93134
loss: 0.9841969609260559,grad_norm: 0.9210281238640741, iteration: 93135
loss: 0.961489737033844,grad_norm: 0.9100490318522767, iteration: 93136
loss: 1.0320589542388916,grad_norm: 0.9999989428989426, iteration: 93137
loss: 1.0414057970046997,grad_norm: 0.9999989867423771, iteration: 93138
loss: 1.0322370529174805,grad_norm: 0.9999991512706193, iteration: 93139
loss: 0.9931720495223999,grad_norm: 0.9999992328413514, iteration: 93140
loss: 0.9962543249130249,grad_norm: 0.9312960866996162, iteration: 93141
loss: 1.0242751836776733,grad_norm: 0.9999992745322105, iteration: 93142
loss: 1.002742886543274,grad_norm: 0.9999989689336815, iteration: 93143
loss: 1.0079331398010254,grad_norm: 0.9999990891175077, iteration: 93144
loss: 0.9641236662864685,grad_norm: 0.999999089909394, iteration: 93145
loss: 0.9886415004730225,grad_norm: 0.9999991606744215, iteration: 93146
loss: 0.9798524975776672,grad_norm: 0.9999990374851087, iteration: 93147
loss: 0.9876406192779541,grad_norm: 0.970663779070864, iteration: 93148
loss: 1.0273842811584473,grad_norm: 0.9742328971395121, iteration: 93149
loss: 0.9923961758613586,grad_norm: 0.9999992745944787, iteration: 93150
loss: 1.0054717063903809,grad_norm: 0.9999992154658485, iteration: 93151
loss: 1.0130788087844849,grad_norm: 0.9729032055486635, iteration: 93152
loss: 0.9966423511505127,grad_norm: 0.9999993081906299, iteration: 93153
loss: 0.9956892728805542,grad_norm: 0.9150903953203117, iteration: 93154
loss: 1.0222015380859375,grad_norm: 0.9554766771514058, iteration: 93155
loss: 0.9915645122528076,grad_norm: 0.8943486216962455, iteration: 93156
loss: 1.0327203273773193,grad_norm: 0.9625609950561979, iteration: 93157
loss: 1.0130494832992554,grad_norm: 0.9999992568650011, iteration: 93158
loss: 1.0198086500167847,grad_norm: 0.9371640476398388, iteration: 93159
loss: 0.9530534744262695,grad_norm: 0.9999990834179752, iteration: 93160
loss: 1.0085008144378662,grad_norm: 0.9999991810282064, iteration: 93161
loss: 1.016108512878418,grad_norm: 0.9999989953587888, iteration: 93162
loss: 0.9910879135131836,grad_norm: 0.9999991152676526, iteration: 93163
loss: 1.025780200958252,grad_norm: 0.9999991073595337, iteration: 93164
loss: 1.0177099704742432,grad_norm: 0.8744852247553648, iteration: 93165
loss: 1.034338355064392,grad_norm: 0.9999991754832801, iteration: 93166
loss: 1.0316838026046753,grad_norm: 0.9214420205284811, iteration: 93167
loss: 0.97945636510849,grad_norm: 0.9602853626822225, iteration: 93168
loss: 1.0051088333129883,grad_norm: 0.999999371639335, iteration: 93169
loss: 0.9700729846954346,grad_norm: 0.9527868222780146, iteration: 93170
loss: 0.9853439927101135,grad_norm: 0.9999989799978207, iteration: 93171
loss: 0.969287097454071,grad_norm: 0.999999042254907, iteration: 93172
loss: 0.9740251302719116,grad_norm: 0.9999991529709544, iteration: 93173
loss: 0.9962158203125,grad_norm: 0.9999991071174058, iteration: 93174
loss: 1.032826542854309,grad_norm: 0.9999995801146914, iteration: 93175
loss: 1.0288078784942627,grad_norm: 0.999999189377255, iteration: 93176
loss: 1.0090129375457764,grad_norm: 0.9999991389646613, iteration: 93177
loss: 1.03923499584198,grad_norm: 0.9999997920300053, iteration: 93178
loss: 1.0015411376953125,grad_norm: 0.9932146766751768, iteration: 93179
loss: 0.9721477031707764,grad_norm: 0.9999991776701295, iteration: 93180
loss: 0.9595032334327698,grad_norm: 0.9999991362013327, iteration: 93181
loss: 1.032015323638916,grad_norm: 0.9477229907364962, iteration: 93182
loss: 1.0206273794174194,grad_norm: 0.9999998809485615, iteration: 93183
loss: 1.0104084014892578,grad_norm: 0.9999993107272287, iteration: 93184
loss: 1.0310977697372437,grad_norm: 0.9999993736788971, iteration: 93185
loss: 0.9965412020683289,grad_norm: 0.9373325803187303, iteration: 93186
loss: 0.9572867155075073,grad_norm: 0.9999991996274799, iteration: 93187
loss: 1.0775877237319946,grad_norm: 0.9999995366454985, iteration: 93188
loss: 1.0405149459838867,grad_norm: 0.9999991620836844, iteration: 93189
loss: 1.031813383102417,grad_norm: 0.9215005735625267, iteration: 93190
loss: 0.989044189453125,grad_norm: 0.8591471982448117, iteration: 93191
loss: 1.0195344686508179,grad_norm: 0.9999992719432232, iteration: 93192
loss: 0.9840512871742249,grad_norm: 0.9999990928873719, iteration: 93193
loss: 0.9821683168411255,grad_norm: 0.9999991722615892, iteration: 93194
loss: 0.9805396795272827,grad_norm: 0.9999990890992788, iteration: 93195
loss: 1.0395351648330688,grad_norm: 0.9999991756013772, iteration: 93196
loss: 1.0039044618606567,grad_norm: 0.9999991479778061, iteration: 93197
loss: 0.9878852367401123,grad_norm: 0.9567883509211705, iteration: 93198
loss: 0.9945532083511353,grad_norm: 0.999999244127767, iteration: 93199
loss: 1.0392730236053467,grad_norm: 0.9999995679450997, iteration: 93200
loss: 1.0091240406036377,grad_norm: 0.797597984072229, iteration: 93201
loss: 0.9936703443527222,grad_norm: 0.9820498263721399, iteration: 93202
loss: 1.0848712921142578,grad_norm: 0.9999997026263727, iteration: 93203
loss: 1.1094285249710083,grad_norm: 0.9999992279182411, iteration: 93204
loss: 0.9846398234367371,grad_norm: 0.9999991584310652, iteration: 93205
loss: 0.9909306168556213,grad_norm: 0.9999992562753143, iteration: 93206
loss: 1.0200047492980957,grad_norm: 0.9999990706779183, iteration: 93207
loss: 0.9986642003059387,grad_norm: 0.9815032138590458, iteration: 93208
loss: 1.0106542110443115,grad_norm: 0.9501946360618619, iteration: 93209
loss: 0.9917328953742981,grad_norm: 0.9999997089670616, iteration: 93210
loss: 1.0003248453140259,grad_norm: 0.999999654259681, iteration: 93211
loss: 0.9727550148963928,grad_norm: 0.9999991332675008, iteration: 93212
loss: 1.008513331413269,grad_norm: 0.9999993869698813, iteration: 93213
loss: 1.0500218868255615,grad_norm: 0.9999992254380523, iteration: 93214
loss: 1.0230883359909058,grad_norm: 0.9999997635876239, iteration: 93215
loss: 1.0651664733886719,grad_norm: 0.9999991881208173, iteration: 93216
loss: 1.035198450088501,grad_norm: 0.9351211654969969, iteration: 93217
loss: 1.020883560180664,grad_norm: 0.9999994281506824, iteration: 93218
loss: 0.9629286527633667,grad_norm: 0.9999993448906298, iteration: 93219
loss: 1.017075538635254,grad_norm: 0.999999624833622, iteration: 93220
loss: 1.0431286096572876,grad_norm: 0.9999995744860343, iteration: 93221
loss: 1.0113950967788696,grad_norm: 0.9999991443257326, iteration: 93222
loss: 0.9717814922332764,grad_norm: 0.9999996273435162, iteration: 93223
loss: 0.9880962371826172,grad_norm: 0.9999992412087947, iteration: 93224
loss: 1.0001522302627563,grad_norm: 0.9556644570250402, iteration: 93225
loss: 1.0162327289581299,grad_norm: 0.9999995968848807, iteration: 93226
loss: 1.0823475122451782,grad_norm: 0.9999993341517401, iteration: 93227
loss: 1.0423502922058105,grad_norm: 0.9999993025392512, iteration: 93228
loss: 0.9897062182426453,grad_norm: 0.9797804748470278, iteration: 93229
loss: 1.00675630569458,grad_norm: 0.9999992313028337, iteration: 93230
loss: 1.013261318206787,grad_norm: 0.9768875304646134, iteration: 93231
loss: 0.9842766523361206,grad_norm: 0.985453788062603, iteration: 93232
loss: 0.9998124837875366,grad_norm: 0.999999143046195, iteration: 93233
loss: 1.0164570808410645,grad_norm: 0.9999991156894914, iteration: 93234
loss: 1.020245909690857,grad_norm: 0.9999990665953249, iteration: 93235
loss: 1.0089677572250366,grad_norm: 0.9999991368538532, iteration: 93236
loss: 0.9941392540931702,grad_norm: 0.9999993058052393, iteration: 93237
loss: 0.9960829615592957,grad_norm: 0.9999992356010121, iteration: 93238
loss: 1.0214545726776123,grad_norm: 0.9076058184288728, iteration: 93239
loss: 0.9810648560523987,grad_norm: 0.9792885391681089, iteration: 93240
loss: 1.0313634872436523,grad_norm: 0.9999991415510544, iteration: 93241
loss: 1.0444270372390747,grad_norm: 0.9999991401566237, iteration: 93242
loss: 1.029423475265503,grad_norm: 0.9999996822386741, iteration: 93243
loss: 1.0098341703414917,grad_norm: 0.9981850009180975, iteration: 93244
loss: 1.0137156248092651,grad_norm: 0.9266283622298715, iteration: 93245
loss: 1.0195735692977905,grad_norm: 0.9999991245219411, iteration: 93246
loss: 0.9769078493118286,grad_norm: 0.99999917619319, iteration: 93247
loss: 1.003282904624939,grad_norm: 0.9767851358160435, iteration: 93248
loss: 0.9737347960472107,grad_norm: 0.9999991375504595, iteration: 93249
loss: 1.0253249406814575,grad_norm: 0.9999994617660827, iteration: 93250
loss: 1.0033470392227173,grad_norm: 0.9999990180845774, iteration: 93251
loss: 1.0325019359588623,grad_norm: 0.8829087371812737, iteration: 93252
loss: 0.9777353405952454,grad_norm: 0.9908082952094455, iteration: 93253
loss: 0.9954001903533936,grad_norm: 0.9999990937223937, iteration: 93254
loss: 1.016963243484497,grad_norm: 0.8362856158514763, iteration: 93255
loss: 1.007200837135315,grad_norm: 0.9999999288703105, iteration: 93256
loss: 1.0023791790008545,grad_norm: 0.9999989568357579, iteration: 93257
loss: 1.0190012454986572,grad_norm: 0.9999996801386399, iteration: 93258
loss: 1.0008559226989746,grad_norm: 0.8500637866987327, iteration: 93259
loss: 1.0019747018814087,grad_norm: 0.9326356638733475, iteration: 93260
loss: 0.9737915396690369,grad_norm: 0.9999992371495345, iteration: 93261
loss: 0.9898117184638977,grad_norm: 0.9999993540337443, iteration: 93262
loss: 0.9887850880622864,grad_norm: 0.99999901576386, iteration: 93263
loss: 0.9635633826255798,grad_norm: 0.9999998904063235, iteration: 93264
loss: 0.9804837107658386,grad_norm: 0.9999989604586281, iteration: 93265
loss: 1.012367606163025,grad_norm: 0.9999990443557595, iteration: 93266
loss: 0.9984776377677917,grad_norm: 0.9999990856645137, iteration: 93267
loss: 0.9964666962623596,grad_norm: 0.9999990852392323, iteration: 93268
loss: 1.0363774299621582,grad_norm: 0.9999996428283444, iteration: 93269
loss: 1.014471173286438,grad_norm: 0.9999991360518323, iteration: 93270
loss: 1.0001935958862305,grad_norm: 0.9999990560811154, iteration: 93271
loss: 1.0344420671463013,grad_norm: 0.9999994160944989, iteration: 93272
loss: 0.9482568502426147,grad_norm: 0.9999991773912263, iteration: 93273
loss: 1.0207297801971436,grad_norm: 0.9999992400908203, iteration: 93274
loss: 1.0163569450378418,grad_norm: 0.9999989861463016, iteration: 93275
loss: 1.0037243366241455,grad_norm: 0.9743630832340969, iteration: 93276
loss: 1.1040146350860596,grad_norm: 0.9999993843934889, iteration: 93277
loss: 1.073182463645935,grad_norm: 0.9601744233172511, iteration: 93278
loss: 1.0339816808700562,grad_norm: 0.9999990985131535, iteration: 93279
loss: 1.0276752710342407,grad_norm: 0.999999510460812, iteration: 93280
loss: 1.1883306503295898,grad_norm: 0.9999996710182893, iteration: 93281
loss: 0.9924109578132629,grad_norm: 0.9999991629003943, iteration: 93282
loss: 0.977128267288208,grad_norm: 0.9523888836158736, iteration: 93283
loss: 1.0320292711257935,grad_norm: 0.9999991183620929, iteration: 93284
loss: 0.9922611713409424,grad_norm: 0.9999991779294557, iteration: 93285
loss: 0.9692794680595398,grad_norm: 0.9999991841531902, iteration: 93286
loss: 0.9687906503677368,grad_norm: 0.9999991566181511, iteration: 93287
loss: 0.986316442489624,grad_norm: 0.9999990820223722, iteration: 93288
loss: 1.0149742364883423,grad_norm: 0.9254104997301588, iteration: 93289
loss: 0.9948669075965881,grad_norm: 0.9999991231149197, iteration: 93290
loss: 1.0393073558807373,grad_norm: 0.8590796544803886, iteration: 93291
loss: 0.9816501140594482,grad_norm: 0.8962193191496475, iteration: 93292
loss: 0.9825695753097534,grad_norm: 0.9673804052622, iteration: 93293
loss: 0.9793724417686462,grad_norm: 0.9899362669056182, iteration: 93294
loss: 1.0212559700012207,grad_norm: 0.999998978600965, iteration: 93295
loss: 1.0024023056030273,grad_norm: 0.9072005988359294, iteration: 93296
loss: 1.0073760747909546,grad_norm: 0.9999992520901836, iteration: 93297
loss: 0.9911076426506042,grad_norm: 0.9999994860593925, iteration: 93298
loss: 1.0239170789718628,grad_norm: 0.9723701493379251, iteration: 93299
loss: 1.0041409730911255,grad_norm: 0.9999991679453412, iteration: 93300
loss: 1.067116141319275,grad_norm: 0.9999991199226537, iteration: 93301
loss: 1.0123697519302368,grad_norm: 0.9999991090441743, iteration: 93302
loss: 1.0099722146987915,grad_norm: 0.9999993019791611, iteration: 93303
loss: 0.9995777010917664,grad_norm: 0.9999991950315623, iteration: 93304
loss: 0.9965044856071472,grad_norm: 0.8611180699819128, iteration: 93305
loss: 0.9693576097488403,grad_norm: 0.9528521434261025, iteration: 93306
loss: 0.9552222490310669,grad_norm: 0.9999991398055371, iteration: 93307
loss: 1.013732671737671,grad_norm: 0.9999991153056633, iteration: 93308
loss: 1.0365201234817505,grad_norm: 0.9999992734455504, iteration: 93309
loss: 0.9990347623825073,grad_norm: 0.9104288708404037, iteration: 93310
loss: 1.0264638662338257,grad_norm: 0.9515093739105288, iteration: 93311
loss: 0.9956783056259155,grad_norm: 0.9516178407251478, iteration: 93312
loss: 0.9849395751953125,grad_norm: 0.999998982269801, iteration: 93313
loss: 0.9979998469352722,grad_norm: 0.9999991092382574, iteration: 93314
loss: 0.9848645329475403,grad_norm: 0.9999991856989657, iteration: 93315
loss: 0.9864318370819092,grad_norm: 0.9999993694308722, iteration: 93316
loss: 1.0151007175445557,grad_norm: 0.9999990613969375, iteration: 93317
loss: 1.0051716566085815,grad_norm: 0.9999991829457473, iteration: 93318
loss: 1.0384914875030518,grad_norm: 0.9999997953206147, iteration: 93319
loss: 0.995126485824585,grad_norm: 0.9999990649393635, iteration: 93320
loss: 0.9863343834877014,grad_norm: 0.9999992119095013, iteration: 93321
loss: 0.9656581282615662,grad_norm: 0.9999992008510425, iteration: 93322
loss: 0.9748733639717102,grad_norm: 0.905914297335541, iteration: 93323
loss: 1.0099661350250244,grad_norm: 0.9999993570596926, iteration: 93324
loss: 1.0491160154342651,grad_norm: 0.9999992933334042, iteration: 93325
loss: 1.0247211456298828,grad_norm: 0.9999990145167487, iteration: 93326
loss: 1.0233205556869507,grad_norm: 0.970943982294917, iteration: 93327
loss: 0.9736128449440002,grad_norm: 0.99999914617669, iteration: 93328
loss: 1.007374882698059,grad_norm: 0.9999992532341535, iteration: 93329
loss: 1.0169332027435303,grad_norm: 0.9999989592834344, iteration: 93330
loss: 0.9941702485084534,grad_norm: 0.9999992553045665, iteration: 93331
loss: 1.0115971565246582,grad_norm: 0.999999420872747, iteration: 93332
loss: 0.9787877798080444,grad_norm: 0.9999990551699713, iteration: 93333
loss: 1.0066722631454468,grad_norm: 0.9999992455857095, iteration: 93334
loss: 1.0497246980667114,grad_norm: 0.9999992593783615, iteration: 93335
loss: 0.9916021823883057,grad_norm: 0.8692237077156019, iteration: 93336
loss: 0.9771188497543335,grad_norm: 0.9999990994939041, iteration: 93337
loss: 1.038747787475586,grad_norm: 0.9999991667530902, iteration: 93338
loss: 1.0364603996276855,grad_norm: 0.937702387979865, iteration: 93339
loss: 1.0253149271011353,grad_norm: 0.994826851625251, iteration: 93340
loss: 0.9764242768287659,grad_norm: 0.9999992630256106, iteration: 93341
loss: 1.0294736623764038,grad_norm: 0.9956457947147003, iteration: 93342
loss: 0.982751190662384,grad_norm: 0.9438067480717351, iteration: 93343
loss: 1.0009760856628418,grad_norm: 0.9999992433695615, iteration: 93344
loss: 0.9948328137397766,grad_norm: 0.9954384990044601, iteration: 93345
loss: 1.0629576444625854,grad_norm: 0.9999990978318034, iteration: 93346
loss: 1.0102627277374268,grad_norm: 0.9584905529387938, iteration: 93347
loss: 1.0139135122299194,grad_norm: 0.9999992434337028, iteration: 93348
loss: 1.0028831958770752,grad_norm: 0.9999990513916818, iteration: 93349
loss: 1.0056555271148682,grad_norm: 0.9758838356036357, iteration: 93350
loss: 1.0014139413833618,grad_norm: 0.9999991219821963, iteration: 93351
loss: 0.9991275668144226,grad_norm: 0.9999991223688885, iteration: 93352
loss: 1.0161093473434448,grad_norm: 0.9999988843140037, iteration: 93353
loss: 0.9929502606391907,grad_norm: 0.8906779086482691, iteration: 93354
loss: 1.047736406326294,grad_norm: 0.9999991944398583, iteration: 93355
loss: 1.0159871578216553,grad_norm: 0.9007756684460669, iteration: 93356
loss: 0.9878858327865601,grad_norm: 0.9777946859149615, iteration: 93357
loss: 0.9977990388870239,grad_norm: 0.9999990808865798, iteration: 93358
loss: 0.971152126789093,grad_norm: 0.9999991873314973, iteration: 93359
loss: 1.0115270614624023,grad_norm: 0.9999991006687268, iteration: 93360
loss: 0.9374809861183167,grad_norm: 0.9884931526190097, iteration: 93361
loss: 0.9925230145454407,grad_norm: 0.953123340405992, iteration: 93362
loss: 1.0279909372329712,grad_norm: 0.9999991990723411, iteration: 93363
loss: 0.9763522148132324,grad_norm: 0.9999991143791275, iteration: 93364
loss: 0.9683104753494263,grad_norm: 0.9999992446232605, iteration: 93365
loss: 0.9905825853347778,grad_norm: 0.9999989802995568, iteration: 93366
loss: 0.9958552122116089,grad_norm: 0.9999990845525508, iteration: 93367
loss: 0.9969486594200134,grad_norm: 0.9999991644516194, iteration: 93368
loss: 0.9908925890922546,grad_norm: 0.943348902489886, iteration: 93369
loss: 0.9999107122421265,grad_norm: 0.9999989774871595, iteration: 93370
loss: 0.9674030542373657,grad_norm: 0.9999989593360809, iteration: 93371
loss: 0.9389904737472534,grad_norm: 0.9999990563687272, iteration: 93372
loss: 1.0165399312973022,grad_norm: 0.9999990503317073, iteration: 93373
loss: 1.0534228086471558,grad_norm: 0.9173566509084299, iteration: 93374
loss: 1.0013024806976318,grad_norm: 0.9700220348361199, iteration: 93375
loss: 0.9984707236289978,grad_norm: 0.9495250780664861, iteration: 93376
loss: 1.00295090675354,grad_norm: 0.9999993318049936, iteration: 93377
loss: 0.9859517216682434,grad_norm: 0.9999991688861208, iteration: 93378
loss: 0.991269052028656,grad_norm: 0.9999991333187541, iteration: 93379
loss: 1.1856415271759033,grad_norm: 0.9999997365260537, iteration: 93380
loss: 0.9823763966560364,grad_norm: 0.9124908005044744, iteration: 93381
loss: 0.9828251004219055,grad_norm: 0.999999234111561, iteration: 93382
loss: 0.993312418460846,grad_norm: 0.9999991589713968, iteration: 93383
loss: 1.0145412683486938,grad_norm: 0.9999992944474128, iteration: 93384
loss: 1.0512933731079102,grad_norm: 0.9999991526466481, iteration: 93385
loss: 1.0176889896392822,grad_norm: 0.9486744960708879, iteration: 93386
loss: 1.007307767868042,grad_norm: 0.9999990895960423, iteration: 93387
loss: 1.002679467201233,grad_norm: 0.9999991381797968, iteration: 93388
loss: 0.9954599738121033,grad_norm: 0.9999990969602653, iteration: 93389
loss: 0.9941697120666504,grad_norm: 0.9999990769646485, iteration: 93390
loss: 1.023215889930725,grad_norm: 0.9999990594276349, iteration: 93391
loss: 0.9994359612464905,grad_norm: 0.9879916434778085, iteration: 93392
loss: 0.9903179407119751,grad_norm: 0.9982677049774227, iteration: 93393
loss: 0.9968672394752502,grad_norm: 0.999998972636286, iteration: 93394
loss: 1.0194976329803467,grad_norm: 0.8282226481377709, iteration: 93395
loss: 0.9830507636070251,grad_norm: 0.9845997400809527, iteration: 93396
loss: 1.0070477724075317,grad_norm: 0.9958680477459756, iteration: 93397
loss: 1.0147346258163452,grad_norm: 0.999999729170384, iteration: 93398
loss: 1.018759846687317,grad_norm: 0.9999994431510375, iteration: 93399
loss: 0.9946320056915283,grad_norm: 0.9431064530684862, iteration: 93400
loss: 0.9810532331466675,grad_norm: 0.99999922264432, iteration: 93401
loss: 1.0295159816741943,grad_norm: 0.9524821982576926, iteration: 93402
loss: 0.9755845069885254,grad_norm: 0.9406273722616877, iteration: 93403
loss: 1.0047911405563354,grad_norm: 0.9997449823221359, iteration: 93404
loss: 1.0129599571228027,grad_norm: 0.9977317945140077, iteration: 93405
loss: 0.9827486276626587,grad_norm: 0.9369137172962307, iteration: 93406
loss: 1.0406501293182373,grad_norm: 0.9999994817829749, iteration: 93407
loss: 1.0302326679229736,grad_norm: 0.9999992239557607, iteration: 93408
loss: 0.9927163124084473,grad_norm: 0.9999990924090587, iteration: 93409
loss: 1.01618492603302,grad_norm: 0.9999993639723708, iteration: 93410
loss: 1.001720666885376,grad_norm: 0.9999992406414921, iteration: 93411
loss: 1.0138477087020874,grad_norm: 0.9999990421125647, iteration: 93412
loss: 1.0512617826461792,grad_norm: 0.9999990846695217, iteration: 93413
loss: 0.9786871075630188,grad_norm: 0.9999991383775786, iteration: 93414
loss: 1.0023596286773682,grad_norm: 0.9999992125060622, iteration: 93415
loss: 1.013838529586792,grad_norm: 0.9999993616557277, iteration: 93416
loss: 1.010506510734558,grad_norm: 0.9999990379236013, iteration: 93417
loss: 1.035585641860962,grad_norm: 0.9999995751546624, iteration: 93418
loss: 1.032942771911621,grad_norm: 0.9999997991461037, iteration: 93419
loss: 1.0254459381103516,grad_norm: 0.9999992213416737, iteration: 93420
loss: 0.9926164150238037,grad_norm: 0.9426041682867111, iteration: 93421
loss: 0.9983665943145752,grad_norm: 0.9999992031133499, iteration: 93422
loss: 1.0162866115570068,grad_norm: 0.9999991869099195, iteration: 93423
loss: 1.0118138790130615,grad_norm: 0.9164149766561792, iteration: 93424
loss: 0.9923480749130249,grad_norm: 0.9999991221172192, iteration: 93425
loss: 0.9858932495117188,grad_norm: 0.9495280765424038, iteration: 93426
loss: 0.9971018433570862,grad_norm: 0.9548744950448294, iteration: 93427
loss: 0.9929727911949158,grad_norm: 0.9999991250278537, iteration: 93428
loss: 1.010318636894226,grad_norm: 0.9106699843048431, iteration: 93429
loss: 1.0010172128677368,grad_norm: 0.9999991480863806, iteration: 93430
loss: 0.9960405230522156,grad_norm: 0.9999991739297149, iteration: 93431
loss: 1.0216045379638672,grad_norm: 0.9511478267968172, iteration: 93432
loss: 1.0172219276428223,grad_norm: 0.9999993009743601, iteration: 93433
loss: 1.0379709005355835,grad_norm: 0.999999188781103, iteration: 93434
loss: 1.026921033859253,grad_norm: 0.9999992631383937, iteration: 93435
loss: 0.9891765713691711,grad_norm: 0.9999991196639525, iteration: 93436
loss: 1.0197032690048218,grad_norm: 0.999999846837022, iteration: 93437
loss: 0.9926728010177612,grad_norm: 0.9999991270017159, iteration: 93438
loss: 1.014115571975708,grad_norm: 0.999999048588359, iteration: 93439
loss: 1.0594102144241333,grad_norm: 0.9999991281806158, iteration: 93440
loss: 0.9999114274978638,grad_norm: 0.9999991957396318, iteration: 93441
loss: 1.0237329006195068,grad_norm: 0.9999990531842261, iteration: 93442
loss: 0.9993449449539185,grad_norm: 0.9999990400058367, iteration: 93443
loss: 0.973555326461792,grad_norm: 0.9999989056777634, iteration: 93444
loss: 1.012392520904541,grad_norm: 0.9999992941987949, iteration: 93445
loss: 1.0172114372253418,grad_norm: 0.999999163134099, iteration: 93446
loss: 0.989876389503479,grad_norm: 0.9680398604486109, iteration: 93447
loss: 1.003053903579712,grad_norm: 0.999999297332351, iteration: 93448
loss: 1.01942777633667,grad_norm: 0.9999991100523413, iteration: 93449
loss: 0.9897202253341675,grad_norm: 0.9377090769476564, iteration: 93450
loss: 0.9939543008804321,grad_norm: 0.9191238796679362, iteration: 93451
loss: 1.0207172632217407,grad_norm: 0.9694417021247815, iteration: 93452
loss: 1.0124953985214233,grad_norm: 0.9498255778439139, iteration: 93453
loss: 1.0116137266159058,grad_norm: 0.974765776702878, iteration: 93454
loss: 1.024885892868042,grad_norm: 0.9687653702304485, iteration: 93455
loss: 1.0122573375701904,grad_norm: 0.9999991130127595, iteration: 93456
loss: 0.9906429648399353,grad_norm: 0.9999991391937879, iteration: 93457
loss: 0.9676238894462585,grad_norm: 0.8763930624493708, iteration: 93458
loss: 0.968565046787262,grad_norm: 0.9999991781585805, iteration: 93459
loss: 0.9486491084098816,grad_norm: 0.9999991568378874, iteration: 93460
loss: 1.0221881866455078,grad_norm: 0.9999990430635028, iteration: 93461
loss: 0.9784812927246094,grad_norm: 0.9999992805511565, iteration: 93462
loss: 0.9969362616539001,grad_norm: 0.99999914647989, iteration: 93463
loss: 1.0115654468536377,grad_norm: 0.882639255607125, iteration: 93464
loss: 0.9579414129257202,grad_norm: 0.9999991788863049, iteration: 93465
loss: 1.0156970024108887,grad_norm: 0.999999198862965, iteration: 93466
loss: 1.0658036470413208,grad_norm: 0.9105880870819515, iteration: 93467
loss: 0.9914616346359253,grad_norm: 0.9364792625999013, iteration: 93468
loss: 0.9919999241828918,grad_norm: 0.9999990570425104, iteration: 93469
loss: 0.9998212456703186,grad_norm: 0.9999990777592916, iteration: 93470
loss: 1.023879885673523,grad_norm: 0.9999992722451584, iteration: 93471
loss: 1.0001931190490723,grad_norm: 0.9999991400824426, iteration: 93472
loss: 1.0077613592147827,grad_norm: 0.9127211821758643, iteration: 93473
loss: 1.0160797834396362,grad_norm: 0.9999992758716043, iteration: 93474
loss: 1.0062503814697266,grad_norm: 0.892308798124788, iteration: 93475
loss: 0.998839795589447,grad_norm: 0.955630649016606, iteration: 93476
loss: 1.0134679079055786,grad_norm: 0.9999994399531937, iteration: 93477
loss: 0.9785830974578857,grad_norm: 0.999998974571868, iteration: 93478
loss: 1.026997447013855,grad_norm: 0.9883743068359658, iteration: 93479
loss: 1.0109434127807617,grad_norm: 0.9999991820687125, iteration: 93480
loss: 0.9814286231994629,grad_norm: 0.9999991400655207, iteration: 93481
loss: 1.0030044317245483,grad_norm: 0.9999991598362626, iteration: 93482
loss: 0.9913654327392578,grad_norm: 0.9999992004597783, iteration: 93483
loss: 0.9938011765480042,grad_norm: 0.999999104253457, iteration: 93484
loss: 1.0131497383117676,grad_norm: 0.968688552227418, iteration: 93485
loss: 0.9883466958999634,grad_norm: 0.9999992686672025, iteration: 93486
loss: 0.9560902118682861,grad_norm: 0.8651467183092574, iteration: 93487
loss: 1.01608145236969,grad_norm: 0.9999990802075698, iteration: 93488
loss: 0.9657623171806335,grad_norm: 0.9999990147303947, iteration: 93489
loss: 0.9936408400535583,grad_norm: 0.9999990861303764, iteration: 93490
loss: 0.9769344329833984,grad_norm: 0.9742820118416765, iteration: 93491
loss: 0.9830507636070251,grad_norm: 0.9999991788966796, iteration: 93492
loss: 1.006514549255371,grad_norm: 0.9999992562513268, iteration: 93493
loss: 1.030139446258545,grad_norm: 0.9999991160155101, iteration: 93494
loss: 1.082617998123169,grad_norm: 0.9999991608398141, iteration: 93495
loss: 0.986754298210144,grad_norm: 0.9619119764409897, iteration: 93496
loss: 0.9986079931259155,grad_norm: 0.8845540788244892, iteration: 93497
loss: 1.0050923824310303,grad_norm: 0.9999990860934085, iteration: 93498
loss: 1.0204235315322876,grad_norm: 0.9781896840422555, iteration: 93499
loss: 1.0054717063903809,grad_norm: 0.9619713637205196, iteration: 93500
loss: 1.0006316900253296,grad_norm: 0.9999990351302985, iteration: 93501
loss: 1.010414719581604,grad_norm: 0.9999991408330622, iteration: 93502
loss: 0.9835610389709473,grad_norm: 0.9999996911441442, iteration: 93503
loss: 1.003023386001587,grad_norm: 0.9999992538329339, iteration: 93504
loss: 1.0761739015579224,grad_norm: 0.9999999204842327, iteration: 93505
loss: 1.0063763856887817,grad_norm: 0.9748170506639868, iteration: 93506
loss: 1.0081793069839478,grad_norm: 0.9299030873550588, iteration: 93507
loss: 0.9581615924835205,grad_norm: 0.9999990916837249, iteration: 93508
loss: 0.9924092292785645,grad_norm: 0.9999990659905055, iteration: 93509
loss: 1.022125005722046,grad_norm: 0.9999990565889616, iteration: 93510
loss: 1.0085766315460205,grad_norm: 0.9275626472475961, iteration: 93511
loss: 1.002274990081787,grad_norm: 0.9999993096738667, iteration: 93512
loss: 0.9917771816253662,grad_norm: 0.9754759146127425, iteration: 93513
loss: 1.0306694507598877,grad_norm: 0.9999991171832132, iteration: 93514
loss: 1.0718849897384644,grad_norm: 0.9999991238742512, iteration: 93515
loss: 0.987144410610199,grad_norm: 0.9275258151674896, iteration: 93516
loss: 1.006296157836914,grad_norm: 0.9999991538901954, iteration: 93517
loss: 0.9437990188598633,grad_norm: 0.9999992132512737, iteration: 93518
loss: 0.9878376722335815,grad_norm: 0.9999990388529857, iteration: 93519
loss: 1.007583737373352,grad_norm: 0.9999995004968658, iteration: 93520
loss: 1.0216970443725586,grad_norm: 0.9710815225790835, iteration: 93521
loss: 1.0004228353500366,grad_norm: 0.9999991549332558, iteration: 93522
loss: 0.9726684093475342,grad_norm: 0.9999993563459952, iteration: 93523
loss: 1.0243889093399048,grad_norm: 0.9788629778388321, iteration: 93524
loss: 1.079933762550354,grad_norm: 0.9999996671291157, iteration: 93525
loss: 1.000828742980957,grad_norm: 0.999999287983087, iteration: 93526
loss: 1.015679955482483,grad_norm: 0.9999992054304706, iteration: 93527
loss: 1.0084073543548584,grad_norm: 0.9999992664632961, iteration: 93528
loss: 1.0262771844863892,grad_norm: 0.9782623802420295, iteration: 93529
loss: 0.9573715925216675,grad_norm: 0.9999992718095592, iteration: 93530
loss: 0.9757206439971924,grad_norm: 0.8829557523112294, iteration: 93531
loss: 0.988783597946167,grad_norm: 0.9999995947578941, iteration: 93532
loss: 1.0083417892456055,grad_norm: 0.9999989399578971, iteration: 93533
loss: 0.980767011642456,grad_norm: 0.9937317183046219, iteration: 93534
loss: 1.0269678831100464,grad_norm: 0.9999991951336741, iteration: 93535
loss: 1.026002049446106,grad_norm: 0.9999992415060563, iteration: 93536
loss: 1.1417039632797241,grad_norm: 0.9999995656537704, iteration: 93537
loss: 1.02793288230896,grad_norm: 0.9859673631857948, iteration: 93538
loss: 1.0026330947875977,grad_norm: 0.9999990845716417, iteration: 93539
loss: 1.0572669506072998,grad_norm: 0.9999992471572428, iteration: 93540
loss: 1.0080537796020508,grad_norm: 0.8902484870009356, iteration: 93541
loss: 0.9812031388282776,grad_norm: 0.9999991213091146, iteration: 93542
loss: 1.0002565383911133,grad_norm: 0.9999990219983859, iteration: 93543
loss: 1.0022814273834229,grad_norm: 0.9999992334874936, iteration: 93544
loss: 1.014886498451233,grad_norm: 0.9999990812956334, iteration: 93545
loss: 1.0376172065734863,grad_norm: 0.9894465128405846, iteration: 93546
loss: 1.026699423789978,grad_norm: 0.9957274954288303, iteration: 93547
loss: 1.0066330432891846,grad_norm: 0.9999993300414761, iteration: 93548
loss: 1.024573802947998,grad_norm: 0.9999991467968178, iteration: 93549
loss: 0.9844257235527039,grad_norm: 0.9999991461756113, iteration: 93550
loss: 0.9816857576370239,grad_norm: 0.9999991178361904, iteration: 93551
loss: 0.9870492815971375,grad_norm: 0.9599943848259116, iteration: 93552
loss: 1.0020644664764404,grad_norm: 0.9999992293850953, iteration: 93553
loss: 1.0052133798599243,grad_norm: 0.9999990245280194, iteration: 93554
loss: 0.9775709509849548,grad_norm: 0.9299149335978184, iteration: 93555
loss: 0.9830393195152283,grad_norm: 0.9999994206825141, iteration: 93556
loss: 0.9946334362030029,grad_norm: 0.999999156468279, iteration: 93557
loss: 1.002342700958252,grad_norm: 0.9987891766575313, iteration: 93558
loss: 0.9539690613746643,grad_norm: 0.9999990322638621, iteration: 93559
loss: 0.9976598620414734,grad_norm: 0.9605473624763416, iteration: 93560
loss: 1.0041598081588745,grad_norm: 0.9999990755158418, iteration: 93561
loss: 1.0014245510101318,grad_norm: 0.9999994282886615, iteration: 93562
loss: 1.0002214908599854,grad_norm: 0.8846563369009564, iteration: 93563
loss: 0.9875612258911133,grad_norm: 0.9951917178603646, iteration: 93564
loss: 1.0435216426849365,grad_norm: 0.9999995422193921, iteration: 93565
loss: 1.0056498050689697,grad_norm: 0.9999990729936851, iteration: 93566
loss: 0.9981414675712585,grad_norm: 0.9472117481845985, iteration: 93567
loss: 1.029007911682129,grad_norm: 0.9999991485300009, iteration: 93568
loss: 0.9924516677856445,grad_norm: 0.9967498966261721, iteration: 93569
loss: 1.010665774345398,grad_norm: 0.9999990182841332, iteration: 93570
loss: 1.0066816806793213,grad_norm: 0.9999991846363172, iteration: 93571
loss: 0.9866018295288086,grad_norm: 0.964308303127903, iteration: 93572
loss: 1.0269466638565063,grad_norm: 0.9999992692130807, iteration: 93573
loss: 1.0283606052398682,grad_norm: 0.9999992009684993, iteration: 93574
loss: 0.9826204180717468,grad_norm: 0.9999991764266164, iteration: 93575
loss: 0.9978141188621521,grad_norm: 0.9999992132146204, iteration: 93576
loss: 1.0186408758163452,grad_norm: 0.9999992316622229, iteration: 93577
loss: 0.9905204772949219,grad_norm: 0.969122370824084, iteration: 93578
loss: 1.0294337272644043,grad_norm: 0.9999990611102875, iteration: 93579
loss: 1.0179955959320068,grad_norm: 0.9999990387713263, iteration: 93580
loss: 0.9996750354766846,grad_norm: 0.9161049371953344, iteration: 93581
loss: 0.9982724785804749,grad_norm: 0.8800734832411641, iteration: 93582
loss: 0.977994978427887,grad_norm: 0.994009846215758, iteration: 93583
loss: 1.020811915397644,grad_norm: 0.9999992183336407, iteration: 93584
loss: 0.9591093063354492,grad_norm: 0.9996522870795219, iteration: 93585
loss: 0.9673672318458557,grad_norm: 0.8934030267305773, iteration: 93586
loss: 1.0084619522094727,grad_norm: 0.9999991596168994, iteration: 93587
loss: 1.0437254905700684,grad_norm: 0.8676096201071265, iteration: 93588
loss: 1.0021346807479858,grad_norm: 0.9972015454258498, iteration: 93589
loss: 1.0106067657470703,grad_norm: 0.9999993494343158, iteration: 93590
loss: 1.0030524730682373,grad_norm: 0.9999991097413287, iteration: 93591
loss: 1.0354647636413574,grad_norm: 0.9999990181277963, iteration: 93592
loss: 1.0015360116958618,grad_norm: 0.9999991963824749, iteration: 93593
loss: 0.951123833656311,grad_norm: 0.8788391376801765, iteration: 93594
loss: 0.9939786195755005,grad_norm: 0.9428629841045447, iteration: 93595
loss: 1.0077245235443115,grad_norm: 0.999999231723445, iteration: 93596
loss: 0.973193883895874,grad_norm: 0.9999992998324002, iteration: 93597
loss: 1.0143243074417114,grad_norm: 0.9152618136855516, iteration: 93598
loss: 0.9814127087593079,grad_norm: 0.9990948314546868, iteration: 93599
loss: 1.0152767896652222,grad_norm: 0.9999991288882372, iteration: 93600
loss: 1.0071958303451538,grad_norm: 0.9999991670215616, iteration: 93601
loss: 1.0055559873580933,grad_norm: 0.9999990676090745, iteration: 93602
loss: 1.0252302885055542,grad_norm: 0.9999991838124429, iteration: 93603
loss: 1.0111068487167358,grad_norm: 0.9999989218013782, iteration: 93604
loss: 0.9695329070091248,grad_norm: 0.8714106084504945, iteration: 93605
loss: 1.0174319744110107,grad_norm: 0.9999990700956408, iteration: 93606
loss: 0.9743322730064392,grad_norm: 0.9999992086040772, iteration: 93607
loss: 1.01707923412323,grad_norm: 0.9999993481735611, iteration: 93608
loss: 0.9994019269943237,grad_norm: 0.9999990381362758, iteration: 93609
loss: 0.9729524850845337,grad_norm: 0.857781020167086, iteration: 93610
loss: 0.99197918176651,grad_norm: 0.9999990384654921, iteration: 93611
loss: 1.081825852394104,grad_norm: 0.9999994204874034, iteration: 93612
loss: 0.9801232814788818,grad_norm: 0.9740609046788935, iteration: 93613
loss: 1.0039303302764893,grad_norm: 0.9793072960243815, iteration: 93614
loss: 1.0230900049209595,grad_norm: 0.9537894724605482, iteration: 93615
loss: 1.0066133737564087,grad_norm: 0.9616377837693825, iteration: 93616
loss: 0.9817503094673157,grad_norm: 0.9999992518364582, iteration: 93617
loss: 1.0266767740249634,grad_norm: 0.9999995138203706, iteration: 93618
loss: 0.9923707246780396,grad_norm: 0.9999992936925987, iteration: 93619
loss: 0.9861962795257568,grad_norm: 0.9999991678199478, iteration: 93620
loss: 1.0039433240890503,grad_norm: 0.99999932099801, iteration: 93621
loss: 1.0083658695220947,grad_norm: 0.9999994236303542, iteration: 93622
loss: 1.0204038619995117,grad_norm: 0.9999991637084031, iteration: 93623
loss: 1.0223228931427002,grad_norm: 0.9999993077358359, iteration: 93624
loss: 0.987557053565979,grad_norm: 0.9999990788797043, iteration: 93625
loss: 0.9934203624725342,grad_norm: 0.9999991322839763, iteration: 93626
loss: 1.0125477313995361,grad_norm: 0.9999989646824086, iteration: 93627
loss: 1.0073646306991577,grad_norm: 0.9365975758487005, iteration: 93628
loss: 0.9789670705795288,grad_norm: 0.9999990336734079, iteration: 93629
loss: 1.0222647190093994,grad_norm: 0.9999991838233834, iteration: 93630
loss: 0.9605228900909424,grad_norm: 0.9999993281151744, iteration: 93631
loss: 1.0147143602371216,grad_norm: 0.9999992653304993, iteration: 93632
loss: 1.0094988346099854,grad_norm: 0.9999990602866801, iteration: 93633
loss: 1.0117363929748535,grad_norm: 0.9936126995090755, iteration: 93634
loss: 1.0241670608520508,grad_norm: 0.9869823715787686, iteration: 93635
loss: 0.9783931970596313,grad_norm: 0.9999990452718555, iteration: 93636
loss: 0.997158408164978,grad_norm: 0.9999989721095683, iteration: 93637
loss: 0.9857498407363892,grad_norm: 0.9999991375502926, iteration: 93638
loss: 0.9817789793014526,grad_norm: 0.9861873375223408, iteration: 93639
loss: 1.0078917741775513,grad_norm: 0.8695321625533227, iteration: 93640
loss: 1.0006732940673828,grad_norm: 0.999999142240206, iteration: 93641
loss: 1.050243616104126,grad_norm: 0.9999991821716783, iteration: 93642
loss: 0.9963364601135254,grad_norm: 0.9999992639750758, iteration: 93643
loss: 0.9865272045135498,grad_norm: 0.9999990468544189, iteration: 93644
loss: 0.9567018747329712,grad_norm: 0.9999988640581552, iteration: 93645
loss: 0.9625760316848755,grad_norm: 0.9999990828302483, iteration: 93646
loss: 1.0289777517318726,grad_norm: 0.999999249536455, iteration: 93647
loss: 1.0121407508850098,grad_norm: 0.999999120223316, iteration: 93648
loss: 0.9883813858032227,grad_norm: 0.9859556462314781, iteration: 93649
loss: 0.9862821698188782,grad_norm: 0.9337014305084677, iteration: 93650
loss: 0.9789778590202332,grad_norm: 0.9999992200056741, iteration: 93651
loss: 1.0476454496383667,grad_norm: 0.9999994698604063, iteration: 93652
loss: 1.0152943134307861,grad_norm: 0.9958136455763372, iteration: 93653
loss: 1.0112178325653076,grad_norm: 0.9999992054762922, iteration: 93654
loss: 0.9887275099754333,grad_norm: 0.9999991741722497, iteration: 93655
loss: 1.015758991241455,grad_norm: 0.9623085481573369, iteration: 93656
loss: 1.011512041091919,grad_norm: 0.9999990205255118, iteration: 93657
loss: 0.9951807260513306,grad_norm: 0.9999990890854946, iteration: 93658
loss: 1.0136429071426392,grad_norm: 0.9999993754667632, iteration: 93659
loss: 1.012878656387329,grad_norm: 0.8870746953643244, iteration: 93660
loss: 1.0043092966079712,grad_norm: 0.9999991191945006, iteration: 93661
loss: 0.9964895844459534,grad_norm: 0.9999991882573709, iteration: 93662
loss: 0.9902366399765015,grad_norm: 0.9999992255044755, iteration: 93663
loss: 0.999326229095459,grad_norm: 0.9999990491033313, iteration: 93664
loss: 1.0231702327728271,grad_norm: 0.999999182591461, iteration: 93665
loss: 1.0380946397781372,grad_norm: 0.993821892599419, iteration: 93666
loss: 1.0457987785339355,grad_norm: 0.9999992023618729, iteration: 93667
loss: 0.982904851436615,grad_norm: 0.9999990299935511, iteration: 93668
loss: 0.976292073726654,grad_norm: 0.9782521510732111, iteration: 93669
loss: 1.0137245655059814,grad_norm: 0.9999992582188002, iteration: 93670
loss: 1.0062869787216187,grad_norm: 0.9497001860498809, iteration: 93671
loss: 1.0142459869384766,grad_norm: 0.9999992193895144, iteration: 93672
loss: 0.9811868071556091,grad_norm: 0.9999990892680727, iteration: 93673
loss: 0.9773375391960144,grad_norm: 0.9521152235298166, iteration: 93674
loss: 0.9788316488265991,grad_norm: 0.9999991878789007, iteration: 93675
loss: 0.9865140318870544,grad_norm: 0.9999993060267128, iteration: 93676
loss: 1.0132306814193726,grad_norm: 0.9999991125763784, iteration: 93677
loss: 1.01494300365448,grad_norm: 0.9999991964060526, iteration: 93678
loss: 1.004209280014038,grad_norm: 0.9142718028560877, iteration: 93679
loss: 1.0340521335601807,grad_norm: 0.999998978507294, iteration: 93680
loss: 1.0400514602661133,grad_norm: 0.9999990997042423, iteration: 93681
loss: 0.9975547194480896,grad_norm: 0.9999991935760979, iteration: 93682
loss: 0.992861270904541,grad_norm: 0.9999993083023585, iteration: 93683
loss: 1.0014488697052002,grad_norm: 0.9999992568691274, iteration: 93684
loss: 0.9399954676628113,grad_norm: 0.9999991947442621, iteration: 93685
loss: 0.98004150390625,grad_norm: 0.9999992452386868, iteration: 93686
loss: 1.0393552780151367,grad_norm: 0.9999991239766673, iteration: 93687
loss: 0.9883068799972534,grad_norm: 0.9593558521850106, iteration: 93688
loss: 0.966454029083252,grad_norm: 0.9999991782798954, iteration: 93689
loss: 0.9676517248153687,grad_norm: 0.9999991103390601, iteration: 93690
loss: 0.985776424407959,grad_norm: 0.9274745093012946, iteration: 93691
loss: 0.9816186428070068,grad_norm: 0.9999990156565475, iteration: 93692
loss: 0.9544667601585388,grad_norm: 0.999999186111125, iteration: 93693
loss: 1.0141265392303467,grad_norm: 0.9999991978442225, iteration: 93694
loss: 1.0260095596313477,grad_norm: 0.9999991558183828, iteration: 93695
loss: 1.0397242307662964,grad_norm: 0.9999991086118287, iteration: 93696
loss: 1.0308362245559692,grad_norm: 0.9232475487773859, iteration: 93697
loss: 0.9935503602027893,grad_norm: 0.9999993320593763, iteration: 93698
loss: 0.9827069044113159,grad_norm: 0.9999991040966849, iteration: 93699
loss: 1.0540096759796143,grad_norm: 0.9999995811698122, iteration: 93700
loss: 1.015901803970337,grad_norm: 0.9999992410935675, iteration: 93701
loss: 1.0151575803756714,grad_norm: 0.9999990264680949, iteration: 93702
loss: 0.973543107509613,grad_norm: 0.9999991864740196, iteration: 93703
loss: 1.0265307426452637,grad_norm: 0.9999990983068338, iteration: 93704
loss: 1.0296385288238525,grad_norm: 0.8401403637978817, iteration: 93705
loss: 0.9621357321739197,grad_norm: 0.9999991519516309, iteration: 93706
loss: 0.969339907169342,grad_norm: 0.9999990081208991, iteration: 93707
loss: 0.9745140671730042,grad_norm: 0.955268040747148, iteration: 93708
loss: 1.0353540182113647,grad_norm: 0.9999995127794673, iteration: 93709
loss: 0.996046781539917,grad_norm: 0.9999995682527968, iteration: 93710
loss: 0.9803600907325745,grad_norm: 0.999999312106115, iteration: 93711
loss: 1.0104848146438599,grad_norm: 0.9999991319436602, iteration: 93712
loss: 0.9869376420974731,grad_norm: 0.9999992098346312, iteration: 93713
loss: 0.989238977432251,grad_norm: 0.9999991568948643, iteration: 93714
loss: 0.9864823818206787,grad_norm: 0.9999993014961636, iteration: 93715
loss: 1.0094894170761108,grad_norm: 0.9999990627519387, iteration: 93716
loss: 0.9946708679199219,grad_norm: 0.8619051556443238, iteration: 93717
loss: 1.0307296514511108,grad_norm: 0.9999992303559625, iteration: 93718
loss: 0.953332245349884,grad_norm: 0.9999990223827893, iteration: 93719
loss: 0.9662231802940369,grad_norm: 0.9999989953352599, iteration: 93720
loss: 0.9964737296104431,grad_norm: 0.9999992153805953, iteration: 93721
loss: 1.0006498098373413,grad_norm: 0.8593276103046622, iteration: 93722
loss: 0.9988800883293152,grad_norm: 0.9666984239288927, iteration: 93723
loss: 0.9938985109329224,grad_norm: 0.9364809904735424, iteration: 93724
loss: 1.0025733709335327,grad_norm: 0.9999990944559948, iteration: 93725
loss: 1.002101182937622,grad_norm: 0.8450182712315664, iteration: 93726
loss: 0.9909106492996216,grad_norm: 0.9999991686477914, iteration: 93727
loss: 0.9735518097877502,grad_norm: 0.9607942624543448, iteration: 93728
loss: 0.9925603270530701,grad_norm: 0.9853025724010664, iteration: 93729
loss: 0.9670013785362244,grad_norm: 0.9952029106696442, iteration: 93730
loss: 1.0185216665267944,grad_norm: 0.9332908354269547, iteration: 93731
loss: 1.0064810514450073,grad_norm: 0.9999991896685575, iteration: 93732
loss: 1.0306129455566406,grad_norm: 0.9999990055523541, iteration: 93733
loss: 0.9822431802749634,grad_norm: 0.9999991826477291, iteration: 93734
loss: 1.001485824584961,grad_norm: 0.9999990966992305, iteration: 93735
loss: 1.0012277364730835,grad_norm: 0.9999991703599239, iteration: 93736
loss: 1.0264079570770264,grad_norm: 0.9999992147377574, iteration: 93737
loss: 1.0037919282913208,grad_norm: 0.9999990344332421, iteration: 93738
loss: 1.027053952217102,grad_norm: 0.9999992385672087, iteration: 93739
loss: 1.0142573118209839,grad_norm: 0.9721219877231485, iteration: 93740
loss: 1.0168485641479492,grad_norm: 0.9999990681410719, iteration: 93741
loss: 0.9793569445610046,grad_norm: 0.9999992039639699, iteration: 93742
loss: 1.0107917785644531,grad_norm: 0.8553414872149818, iteration: 93743
loss: 1.0017973184585571,grad_norm: 0.9999989822617453, iteration: 93744
loss: 1.019935131072998,grad_norm: 0.9999991599036689, iteration: 93745
loss: 1.0152370929718018,grad_norm: 0.8163671531589272, iteration: 93746
loss: 1.0505406856536865,grad_norm: 0.999999245264604, iteration: 93747
loss: 1.0097211599349976,grad_norm: 0.9999990407763646, iteration: 93748
loss: 0.9655985236167908,grad_norm: 0.9999992272631713, iteration: 93749
loss: 1.0501967668533325,grad_norm: 0.9999991697220101, iteration: 93750
loss: 0.9687321186065674,grad_norm: 0.8963423877203751, iteration: 93751
loss: 1.020584225654602,grad_norm: 0.9999994717056627, iteration: 93752
loss: 1.0122963190078735,grad_norm: 0.9999991932049624, iteration: 93753
loss: 1.0234187841415405,grad_norm: 0.999999256905793, iteration: 93754
loss: 1.0247246026992798,grad_norm: 0.9999992546828333, iteration: 93755
loss: 1.0421372652053833,grad_norm: 0.9999990261490236, iteration: 93756
loss: 1.0421793460845947,grad_norm: 0.9413387608162267, iteration: 93757
loss: 1.0182799100875854,grad_norm: 0.9999989265322566, iteration: 93758
loss: 1.0370193719863892,grad_norm: 0.9999990618117917, iteration: 93759
loss: 1.0096276998519897,grad_norm: 0.9999989379330636, iteration: 93760
loss: 1.0083030462265015,grad_norm: 0.999999236679615, iteration: 93761
loss: 0.9828208684921265,grad_norm: 0.9999993283853238, iteration: 93762
loss: 0.9860151410102844,grad_norm: 0.9861299603793791, iteration: 93763
loss: 0.966044545173645,grad_norm: 0.9999992989131385, iteration: 93764
loss: 0.9947758913040161,grad_norm: 0.9999989608839172, iteration: 93765
loss: 1.0123310089111328,grad_norm: 0.9999991740500543, iteration: 93766
loss: 1.0311411619186401,grad_norm: 0.9999993878901745, iteration: 93767
loss: 0.9984509944915771,grad_norm: 0.9408899013548715, iteration: 93768
loss: 0.9755911827087402,grad_norm: 0.8921874805164096, iteration: 93769
loss: 0.9939078688621521,grad_norm: 0.9999990868244871, iteration: 93770
loss: 1.062718391418457,grad_norm: 0.9999992333957372, iteration: 93771
loss: 1.015775442123413,grad_norm: 0.9999991164256804, iteration: 93772
loss: 0.9745963215827942,grad_norm: 0.8473427889155658, iteration: 93773
loss: 1.0407440662384033,grad_norm: 0.999999168737925, iteration: 93774
loss: 1.0295524597167969,grad_norm: 0.9999992393405283, iteration: 93775
loss: 1.0099111795425415,grad_norm: 0.9452742541247109, iteration: 93776
loss: 1.0155280828475952,grad_norm: 0.9999991713886934, iteration: 93777
loss: 1.0019735097885132,grad_norm: 0.999999241021342, iteration: 93778
loss: 1.0068267583847046,grad_norm: 0.889594483023556, iteration: 93779
loss: 0.9802170395851135,grad_norm: 0.9999990608907401, iteration: 93780
loss: 1.0337779521942139,grad_norm: 0.999998942933397, iteration: 93781
loss: 0.9984299540519714,grad_norm: 0.9999990386698685, iteration: 93782
loss: 1.0162503719329834,grad_norm: 0.9889559998179235, iteration: 93783
loss: 0.9982393980026245,grad_norm: 0.9999989495602232, iteration: 93784
loss: 1.0254465341567993,grad_norm: 0.9999993606895023, iteration: 93785
loss: 1.0161021947860718,grad_norm: 0.8410588945676047, iteration: 93786
loss: 0.9757843017578125,grad_norm: 0.999998951023357, iteration: 93787
loss: 1.0086320638656616,grad_norm: 0.999999457819889, iteration: 93788
loss: 0.982048749923706,grad_norm: 0.9999992191221795, iteration: 93789
loss: 1.0049110651016235,grad_norm: 0.9786779898668232, iteration: 93790
loss: 0.9944884181022644,grad_norm: 0.9507648909645431, iteration: 93791
loss: 1.0112602710723877,grad_norm: 0.9999991849994829, iteration: 93792
loss: 1.0368404388427734,grad_norm: 0.9999992113996057, iteration: 93793
loss: 1.0237535238265991,grad_norm: 0.8142140855128401, iteration: 93794
loss: 1.0050048828125,grad_norm: 0.9999990261841786, iteration: 93795
loss: 1.0330618619918823,grad_norm: 0.9927902204356596, iteration: 93796
loss: 0.9949414134025574,grad_norm: 0.9999991360971733, iteration: 93797
loss: 1.0286130905151367,grad_norm: 0.9999991017754926, iteration: 93798
loss: 1.0057804584503174,grad_norm: 0.9391453977128511, iteration: 93799
loss: 0.9894061088562012,grad_norm: 0.9999990727486603, iteration: 93800
loss: 0.970659613609314,grad_norm: 0.9999990079979431, iteration: 93801
loss: 1.0194951295852661,grad_norm: 0.9947294815238471, iteration: 93802
loss: 0.9975084066390991,grad_norm: 0.9999990797082722, iteration: 93803
loss: 1.022800087928772,grad_norm: 0.9999993780223001, iteration: 93804
loss: 0.9754591584205627,grad_norm: 0.9999991258237662, iteration: 93805
loss: 0.9856606125831604,grad_norm: 0.9606915540031001, iteration: 93806
loss: 0.9679206013679504,grad_norm: 0.9003884716152075, iteration: 93807
loss: 0.9544914364814758,grad_norm: 0.9999991263106409, iteration: 93808
loss: 1.0234330892562866,grad_norm: 0.8653413894518655, iteration: 93809
loss: 0.9500702619552612,grad_norm: 0.9999991824590828, iteration: 93810
loss: 0.9626510739326477,grad_norm: 0.995400719468931, iteration: 93811
loss: 0.9861433506011963,grad_norm: 0.9999990134479214, iteration: 93812
loss: 1.023010015487671,grad_norm: 0.9999992053636604, iteration: 93813
loss: 0.9867009520530701,grad_norm: 0.9363166226931573, iteration: 93814
loss: 0.9891646504402161,grad_norm: 0.9999991045669654, iteration: 93815
loss: 0.9716307520866394,grad_norm: 0.9999990012059917, iteration: 93816
loss: 1.0147236585617065,grad_norm: 0.9499346466772606, iteration: 93817
loss: 0.9857788681983948,grad_norm: 0.9811711288059485, iteration: 93818
loss: 1.025927186012268,grad_norm: 0.9790787900214444, iteration: 93819
loss: 1.020317792892456,grad_norm: 0.9999990771406337, iteration: 93820
loss: 1.0067799091339111,grad_norm: 0.9999990273266716, iteration: 93821
loss: 1.0106310844421387,grad_norm: 0.9999990961456586, iteration: 93822
loss: 0.9793664813041687,grad_norm: 0.9256150984420808, iteration: 93823
loss: 0.9891731142997742,grad_norm: 0.9999991356893777, iteration: 93824
loss: 0.9697340130805969,grad_norm: 0.9999991509411263, iteration: 93825
loss: 0.9930281639099121,grad_norm: 0.9999989576660312, iteration: 93826
loss: 0.99139404296875,grad_norm: 0.9812668208437934, iteration: 93827
loss: 1.0297720432281494,grad_norm: 0.9999992231647552, iteration: 93828
loss: 1.0106744766235352,grad_norm: 0.9041697466465526, iteration: 93829
loss: 1.0378246307373047,grad_norm: 0.999999146103299, iteration: 93830
loss: 1.02237868309021,grad_norm: 0.9999993338831293, iteration: 93831
loss: 1.0439292192459106,grad_norm: 0.9999993232186467, iteration: 93832
loss: 1.0207864046096802,grad_norm: 0.9999994579559051, iteration: 93833
loss: 1.0033831596374512,grad_norm: 0.9999990127560338, iteration: 93834
loss: 1.0474417209625244,grad_norm: 0.9999991263672076, iteration: 93835
loss: 1.0346723794937134,grad_norm: 0.9999992021470802, iteration: 93836
loss: 0.9821557402610779,grad_norm: 0.9999992008621046, iteration: 93837
loss: 1.0092447996139526,grad_norm: 0.9999992160926418, iteration: 93838
loss: 1.0402424335479736,grad_norm: 0.9999991954931061, iteration: 93839
loss: 1.009484052658081,grad_norm: 0.9999991101951473, iteration: 93840
loss: 0.9898005723953247,grad_norm: 0.9999991884289713, iteration: 93841
loss: 1.0114734172821045,grad_norm: 0.9999991464509107, iteration: 93842
loss: 1.0033358335494995,grad_norm: 0.9999991514887557, iteration: 93843
loss: 1.004721999168396,grad_norm: 0.9999991239818443, iteration: 93844
loss: 1.0244922637939453,grad_norm: 0.9999993631151737, iteration: 93845
loss: 1.0076165199279785,grad_norm: 0.9999990025749136, iteration: 93846
loss: 0.9755685329437256,grad_norm: 0.9761920685751435, iteration: 93847
loss: 1.0268070697784424,grad_norm: 0.9999990565525292, iteration: 93848
loss: 1.0079553127288818,grad_norm: 0.9999990411462207, iteration: 93849
loss: 0.9998296499252319,grad_norm: 0.9999989664224681, iteration: 93850
loss: 1.0171653032302856,grad_norm: 0.9999992296438255, iteration: 93851
loss: 1.017066240310669,grad_norm: 0.9999991980250559, iteration: 93852
loss: 1.0138198137283325,grad_norm: 0.9072625712661578, iteration: 93853
loss: 0.9998658299446106,grad_norm: 0.9999990377747444, iteration: 93854
loss: 0.9816502332687378,grad_norm: 0.8507208656066966, iteration: 93855
loss: 1.007971167564392,grad_norm: 0.9474524992569983, iteration: 93856
loss: 0.9803243279457092,grad_norm: 0.9999991051107231, iteration: 93857
loss: 1.0138635635375977,grad_norm: 0.9640833023922679, iteration: 93858
loss: 1.0004109144210815,grad_norm: 0.9843098978541179, iteration: 93859
loss: 0.9950532913208008,grad_norm: 0.9999993083883463, iteration: 93860
loss: 0.9860327243804932,grad_norm: 0.9404012577717309, iteration: 93861
loss: 0.9871278405189514,grad_norm: 0.9999990314180195, iteration: 93862
loss: 0.994144082069397,grad_norm: 0.9665539341327617, iteration: 93863
loss: 0.9491842985153198,grad_norm: 0.9528400760164719, iteration: 93864
loss: 0.9999825954437256,grad_norm: 0.9453434431513215, iteration: 93865
loss: 1.0483288764953613,grad_norm: 0.9999998425392677, iteration: 93866
loss: 1.0034279823303223,grad_norm: 0.9999990131269856, iteration: 93867
loss: 1.0055856704711914,grad_norm: 0.9885472966467754, iteration: 93868
loss: 1.032370686531067,grad_norm: 0.999998952067428, iteration: 93869
loss: 1.0190277099609375,grad_norm: 0.9999990583428366, iteration: 93870
loss: 0.9951302409172058,grad_norm: 0.9999992631467435, iteration: 93871
loss: 1.0003718137741089,grad_norm: 0.9999994180674437, iteration: 93872
loss: 1.0152558088302612,grad_norm: 0.9999991421807387, iteration: 93873
loss: 1.024818778038025,grad_norm: 0.8794738228165986, iteration: 93874
loss: 1.0138877630233765,grad_norm: 0.9999992294010338, iteration: 93875
loss: 0.9921644330024719,grad_norm: 0.9999989459876139, iteration: 93876
loss: 1.0418481826782227,grad_norm: 0.9999993757497797, iteration: 93877
loss: 0.9750846028327942,grad_norm: 0.9999991155863914, iteration: 93878
loss: 1.0421918630599976,grad_norm: 0.9999990166674666, iteration: 93879
loss: 0.9898161888122559,grad_norm: 0.9999992151390318, iteration: 93880
loss: 0.9611075520515442,grad_norm: 0.9520496010280738, iteration: 93881
loss: 0.9590263962745667,grad_norm: 0.9999992265386014, iteration: 93882
loss: 1.012227177619934,grad_norm: 0.8722823099213147, iteration: 93883
loss: 0.9929340481758118,grad_norm: 0.9999990492743245, iteration: 93884
loss: 1.0003892183303833,grad_norm: 0.9999992544888199, iteration: 93885
loss: 1.026623010635376,grad_norm: 0.9999992273253304, iteration: 93886
loss: 1.0336161851882935,grad_norm: 0.9942323348200981, iteration: 93887
loss: 0.9572427868843079,grad_norm: 0.9324379883799506, iteration: 93888
loss: 0.9893906116485596,grad_norm: 0.9628692330855687, iteration: 93889
loss: 1.0221281051635742,grad_norm: 0.9914666783416509, iteration: 93890
loss: 1.0374093055725098,grad_norm: 0.9999992103963362, iteration: 93891
loss: 0.9888499975204468,grad_norm: 0.9999993124159315, iteration: 93892
loss: 0.9990001916885376,grad_norm: 0.9869756862471407, iteration: 93893
loss: 0.9538851380348206,grad_norm: 0.9999990421935449, iteration: 93894
loss: 0.9841850996017456,grad_norm: 0.9322028039861157, iteration: 93895
loss: 1.007816195487976,grad_norm: 0.999999076146224, iteration: 93896
loss: 1.0287396907806396,grad_norm: 0.9999991218837867, iteration: 93897
loss: 1.0117254257202148,grad_norm: 0.9999991410246295, iteration: 93898
loss: 0.9833179116249084,grad_norm: 0.9999989288343546, iteration: 93899
loss: 0.9988691210746765,grad_norm: 0.8879548435723777, iteration: 93900
loss: 0.9995603561401367,grad_norm: 0.9999989944047546, iteration: 93901
loss: 1.0226796865463257,grad_norm: 0.9999996684605288, iteration: 93902
loss: 1.0171278715133667,grad_norm: 0.9999989665944599, iteration: 93903
loss: 0.9905226230621338,grad_norm: 0.9999990157713082, iteration: 93904
loss: 1.0000638961791992,grad_norm: 0.9999990990744606, iteration: 93905
loss: 1.013403296470642,grad_norm: 0.9999992390346146, iteration: 93906
loss: 0.9722027778625488,grad_norm: 0.9827304648605224, iteration: 93907
loss: 1.005507469177246,grad_norm: 0.9999992030867423, iteration: 93908
loss: 1.0367566347122192,grad_norm: 0.9999990562470968, iteration: 93909
loss: 0.9818084836006165,grad_norm: 0.9999990236302982, iteration: 93910
loss: 0.9648398756980896,grad_norm: 0.9176347049474981, iteration: 93911
loss: 0.9984298944473267,grad_norm: 0.9999991330651488, iteration: 93912
loss: 1.056612253189087,grad_norm: 0.9999991829467916, iteration: 93913
loss: 1.0049681663513184,grad_norm: 0.9999992225324345, iteration: 93914
loss: 0.9979251623153687,grad_norm: 0.9534048302757087, iteration: 93915
loss: 1.0076783895492554,grad_norm: 0.9137553113033882, iteration: 93916
loss: 1.0215718746185303,grad_norm: 0.9999999337364471, iteration: 93917
loss: 1.012761116027832,grad_norm: 0.9999991838555119, iteration: 93918
loss: 0.9959495067596436,grad_norm: 0.9999991417346497, iteration: 93919
loss: 1.0087652206420898,grad_norm: 0.863076065140545, iteration: 93920
loss: 0.9783912301063538,grad_norm: 0.9999990869562735, iteration: 93921
loss: 0.9750739336013794,grad_norm: 0.9999991006060446, iteration: 93922
loss: 1.013342261314392,grad_norm: 0.9999994915260276, iteration: 93923
loss: 0.9958369731903076,grad_norm: 0.9315393827361056, iteration: 93924
loss: 0.9728747606277466,grad_norm: 0.9999990786657343, iteration: 93925
loss: 0.9927683472633362,grad_norm: 0.9481879964818133, iteration: 93926
loss: 1.0145128965377808,grad_norm: 0.9116398611230991, iteration: 93927
loss: 0.9886159300804138,grad_norm: 0.9999991018677289, iteration: 93928
loss: 1.0170753002166748,grad_norm: 0.9490234465069335, iteration: 93929
loss: 1.0140297412872314,grad_norm: 0.9999996065892223, iteration: 93930
loss: 1.0024847984313965,grad_norm: 0.9999990257206438, iteration: 93931
loss: 0.9901233315467834,grad_norm: 0.9465184728256352, iteration: 93932
loss: 0.9517377614974976,grad_norm: 0.9999992343890284, iteration: 93933
loss: 1.0052176713943481,grad_norm: 0.99999914326477, iteration: 93934
loss: 0.9899957180023193,grad_norm: 0.9613379201253528, iteration: 93935
loss: 1.0050934553146362,grad_norm: 0.8844766693243397, iteration: 93936
loss: 1.0260990858078003,grad_norm: 0.9999992492502711, iteration: 93937
loss: 1.014338731765747,grad_norm: 0.9999991547392664, iteration: 93938
loss: 0.9996487498283386,grad_norm: 0.999999246515049, iteration: 93939
loss: 0.9983008503913879,grad_norm: 0.9635189334447869, iteration: 93940
loss: 1.009583830833435,grad_norm: 0.9999999537387299, iteration: 93941
loss: 1.0142686367034912,grad_norm: 0.8944650425828263, iteration: 93942
loss: 1.052381157875061,grad_norm: 0.9431638407189213, iteration: 93943
loss: 1.009752869606018,grad_norm: 0.9027666297833761, iteration: 93944
loss: 0.9925505518913269,grad_norm: 0.9708997314888413, iteration: 93945
loss: 1.0436631441116333,grad_norm: 0.939944825050215, iteration: 93946
loss: 1.0009764432907104,grad_norm: 0.9999991063560187, iteration: 93947
loss: 1.0247242450714111,grad_norm: 0.999999108279997, iteration: 93948
loss: 1.0094834566116333,grad_norm: 0.9999992418203656, iteration: 93949
loss: 1.0167707204818726,grad_norm: 0.9695926839435157, iteration: 93950
loss: 1.0381064414978027,grad_norm: 0.9452456569910705, iteration: 93951
loss: 0.9327930212020874,grad_norm: 0.9999992158786365, iteration: 93952
loss: 1.0183134078979492,grad_norm: 0.9999997822894611, iteration: 93953
loss: 0.9858033061027527,grad_norm: 0.8764900856797748, iteration: 93954
loss: 1.0127707719802856,grad_norm: 0.9999992391020959, iteration: 93955
loss: 1.0228211879730225,grad_norm: 0.9999990435886486, iteration: 93956
loss: 1.0020787715911865,grad_norm: 0.999999208417648, iteration: 93957
loss: 0.965226411819458,grad_norm: 0.9821746846369941, iteration: 93958
loss: 0.9889771938323975,grad_norm: 0.9999990848562494, iteration: 93959
loss: 0.9869783520698547,grad_norm: 0.9999991396606153, iteration: 93960
loss: 0.987160325050354,grad_norm: 0.999999098541453, iteration: 93961
loss: 0.9957689046859741,grad_norm: 0.9999991881659406, iteration: 93962
loss: 0.989764928817749,grad_norm: 0.9999991807083909, iteration: 93963
loss: 0.9927847385406494,grad_norm: 0.9999992199182657, iteration: 93964
loss: 0.9912531971931458,grad_norm: 0.9828377096049925, iteration: 93965
loss: 0.9913769960403442,grad_norm: 0.9388372914023525, iteration: 93966
loss: 1.0146400928497314,grad_norm: 0.9999989947286795, iteration: 93967
loss: 1.0482226610183716,grad_norm: 0.9999992229068777, iteration: 93968
loss: 1.0029575824737549,grad_norm: 0.9835915757139746, iteration: 93969
loss: 0.9970194697380066,grad_norm: 0.999998960577275, iteration: 93970
loss: 0.9895688891410828,grad_norm: 0.9672660095739716, iteration: 93971
loss: 0.987064003944397,grad_norm: 0.9999991345935457, iteration: 93972
loss: 0.9696255326271057,grad_norm: 0.999999211936519, iteration: 93973
loss: 1.0039578676223755,grad_norm: 0.8384764244265115, iteration: 93974
loss: 1.0163935422897339,grad_norm: 0.9999989996361061, iteration: 93975
loss: 1.0221285820007324,grad_norm: 0.9999991582634757, iteration: 93976
loss: 1.0059432983398438,grad_norm: 0.9229562815847131, iteration: 93977
loss: 0.9977497458457947,grad_norm: 0.9999990146304311, iteration: 93978
loss: 0.9934741258621216,grad_norm: 0.9999992718619207, iteration: 93979
loss: 1.008610486984253,grad_norm: 0.993559564147927, iteration: 93980
loss: 0.9962558746337891,grad_norm: 0.9999990114684121, iteration: 93981
loss: 1.0112941265106201,grad_norm: 0.8228361318457205, iteration: 93982
loss: 1.0083506107330322,grad_norm: 0.9999992391709719, iteration: 93983
loss: 0.9644507169723511,grad_norm: 0.9999990819872332, iteration: 93984
loss: 0.9894210696220398,grad_norm: 0.8604122496394987, iteration: 93985
loss: 1.0074020624160767,grad_norm: 0.9999990371293658, iteration: 93986
loss: 0.9953398704528809,grad_norm: 0.9999991302977692, iteration: 93987
loss: 1.0029898881912231,grad_norm: 0.8996964652045859, iteration: 93988
loss: 1.0217385292053223,grad_norm: 0.999999181065773, iteration: 93989
loss: 0.9961269497871399,grad_norm: 0.9999993740142472, iteration: 93990
loss: 1.030988335609436,grad_norm: 0.999999078130428, iteration: 93991
loss: 0.9951351881027222,grad_norm: 0.9686680349877107, iteration: 93992
loss: 0.9856753945350647,grad_norm: 0.9021217653777736, iteration: 93993
loss: 1.000544786453247,grad_norm: 0.9999989748516042, iteration: 93994
loss: 0.963264524936676,grad_norm: 0.9999990288396204, iteration: 93995
loss: 0.9932100176811218,grad_norm: 0.9999990110271216, iteration: 93996
loss: 1.023350715637207,grad_norm: 0.9999991802449257, iteration: 93997
loss: 0.9918041229248047,grad_norm: 0.9999993063643844, iteration: 93998
loss: 1.011323094367981,grad_norm: 0.9120572196918817, iteration: 93999
loss: 0.9690310955047607,grad_norm: 0.9116886590231795, iteration: 94000
loss: 0.9983542561531067,grad_norm: 0.9999992322313993, iteration: 94001
loss: 0.9690520763397217,grad_norm: 0.9994785098244914, iteration: 94002
loss: 1.0193979740142822,grad_norm: 0.9454368161876013, iteration: 94003
loss: 1.0007109642028809,grad_norm: 0.9999991186176087, iteration: 94004
loss: 1.017661213874817,grad_norm: 0.9999990809795456, iteration: 94005
loss: 1.018535852432251,grad_norm: 0.9999991689053578, iteration: 94006
loss: 1.0009040832519531,grad_norm: 0.9999992486700302, iteration: 94007
loss: 0.9986457824707031,grad_norm: 0.9633210513918155, iteration: 94008
loss: 0.9745141267776489,grad_norm: 0.9285162117187893, iteration: 94009
loss: 0.9631561040878296,grad_norm: 0.9999992392418183, iteration: 94010
loss: 1.0065326690673828,grad_norm: 0.9142340397212099, iteration: 94011
loss: 0.9933356046676636,grad_norm: 0.9503896794362425, iteration: 94012
loss: 1.0276589393615723,grad_norm: 0.9999990816156319, iteration: 94013
loss: 1.0529582500457764,grad_norm: 0.9999990366662324, iteration: 94014
loss: 1.0287549495697021,grad_norm: 0.9999992657158925, iteration: 94015
loss: 0.9976229667663574,grad_norm: 0.9999990267256532, iteration: 94016
loss: 1.0065830945968628,grad_norm: 0.9881866973360633, iteration: 94017
loss: 0.9500541090965271,grad_norm: 0.9567365126464272, iteration: 94018
loss: 1.0057562589645386,grad_norm: 0.999999128975022, iteration: 94019
loss: 1.0252197980880737,grad_norm: 0.9999992395282133, iteration: 94020
loss: 1.015138030052185,grad_norm: 0.9999992408592153, iteration: 94021
loss: 1.0164302587509155,grad_norm: 0.9711284046017902, iteration: 94022
loss: 0.9846317768096924,grad_norm: 0.9999992438557014, iteration: 94023
loss: 1.002873420715332,grad_norm: 0.9973036374945795, iteration: 94024
loss: 1.0087509155273438,grad_norm: 0.9999992170250311, iteration: 94025
loss: 0.988008975982666,grad_norm: 0.9999992531934802, iteration: 94026
loss: 1.006395697593689,grad_norm: 0.9999991467028452, iteration: 94027
loss: 1.0199133157730103,grad_norm: 0.9999991024823199, iteration: 94028
loss: 0.972501814365387,grad_norm: 0.9999991464484642, iteration: 94029
loss: 1.000447392463684,grad_norm: 0.9999990237453594, iteration: 94030
loss: 0.9790002107620239,grad_norm: 0.874927636269438, iteration: 94031
loss: 0.9724465012550354,grad_norm: 0.98561118996463, iteration: 94032
loss: 0.9676690101623535,grad_norm: 0.9999991064082704, iteration: 94033
loss: 0.994141161441803,grad_norm: 0.9999990980569501, iteration: 94034
loss: 1.0402947664260864,grad_norm: 0.9999989658254178, iteration: 94035
loss: 0.9976958632469177,grad_norm: 0.9999990987682129, iteration: 94036
loss: 0.9739679098129272,grad_norm: 0.9999991295031606, iteration: 94037
loss: 0.9537479281425476,grad_norm: 0.9597083485517894, iteration: 94038
loss: 1.0397149324417114,grad_norm: 0.9590155220204876, iteration: 94039
loss: 0.9997375011444092,grad_norm: 0.9259379422999816, iteration: 94040
loss: 1.010450839996338,grad_norm: 0.9999990024008606, iteration: 94041
loss: 0.9521851539611816,grad_norm: 0.9999991406565842, iteration: 94042
loss: 1.016055703163147,grad_norm: 0.999999150609657, iteration: 94043
loss: 0.9843166470527649,grad_norm: 0.8559457667660401, iteration: 94044
loss: 1.0207698345184326,grad_norm: 0.9595493809764971, iteration: 94045
loss: 0.9642534255981445,grad_norm: 0.9149638922780946, iteration: 94046
loss: 0.9921290874481201,grad_norm: 0.8848293797645638, iteration: 94047
loss: 1.0057430267333984,grad_norm: 0.9999989694543038, iteration: 94048
loss: 0.9990274310112,grad_norm: 0.9622391666008315, iteration: 94049
loss: 0.9918949007987976,grad_norm: 0.9999991429673764, iteration: 94050
loss: 0.9846623539924622,grad_norm: 0.999999216986261, iteration: 94051
loss: 0.997620165348053,grad_norm: 0.9999991428089738, iteration: 94052
loss: 0.9747133851051331,grad_norm: 0.9999991658507489, iteration: 94053
loss: 1.0046966075897217,grad_norm: 0.999999060557998, iteration: 94054
loss: 1.0034009218215942,grad_norm: 0.9999990664493205, iteration: 94055
loss: 0.9774948954582214,grad_norm: 0.9807850455822253, iteration: 94056
loss: 1.0067167282104492,grad_norm: 0.9999992983185877, iteration: 94057
loss: 0.9309895634651184,grad_norm: 0.9999990818894706, iteration: 94058
loss: 1.029085636138916,grad_norm: 0.9999990047177958, iteration: 94059
loss: 0.9772469401359558,grad_norm: 0.8676166161104294, iteration: 94060
loss: 1.01998770236969,grad_norm: 0.9999990132776035, iteration: 94061
loss: 1.002519965171814,grad_norm: 0.9999991449877396, iteration: 94062
loss: 1.0001192092895508,grad_norm: 0.8690286703748799, iteration: 94063
loss: 1.0131423473358154,grad_norm: 0.9682428299584045, iteration: 94064
loss: 0.9901973009109497,grad_norm: 0.9458422709669443, iteration: 94065
loss: 1.0138070583343506,grad_norm: 0.999999162962931, iteration: 94066
loss: 0.9960646033287048,grad_norm: 0.9999990908412794, iteration: 94067
loss: 0.9937162399291992,grad_norm: 0.9999992543311969, iteration: 94068
loss: 1.0292787551879883,grad_norm: 0.969950428867641, iteration: 94069
loss: 0.9910007119178772,grad_norm: 0.999999264176821, iteration: 94070
loss: 0.9923297762870789,grad_norm: 0.999999039848413, iteration: 94071
loss: 0.9978747963905334,grad_norm: 0.9906340455087794, iteration: 94072
loss: 1.0026943683624268,grad_norm: 0.9028864890239855, iteration: 94073
loss: 0.9960973858833313,grad_norm: 0.9999991550792869, iteration: 94074
loss: 1.0258378982543945,grad_norm: 0.8695147816366229, iteration: 94075
loss: 0.9955623149871826,grad_norm: 0.999999179097353, iteration: 94076
loss: 0.9931732416152954,grad_norm: 0.999999106774355, iteration: 94077
loss: 0.9529995918273926,grad_norm: 0.9817150383560728, iteration: 94078
loss: 1.002364158630371,grad_norm: 0.9999991654606938, iteration: 94079
loss: 1.0065823793411255,grad_norm: 0.9811319076301686, iteration: 94080
loss: 1.0145701169967651,grad_norm: 0.9980730496120305, iteration: 94081
loss: 0.9694327712059021,grad_norm: 0.9999991847537999, iteration: 94082
loss: 1.028698205947876,grad_norm: 0.9999991929853246, iteration: 94083
loss: 1.0086082220077515,grad_norm: 0.9999991082011389, iteration: 94084
loss: 0.9980278611183167,grad_norm: 0.8655855510958586, iteration: 94085
loss: 0.9731008410453796,grad_norm: 0.9999991255341303, iteration: 94086
loss: 1.0093541145324707,grad_norm: 0.9999991623616071, iteration: 94087
loss: 1.0192183256149292,grad_norm: 0.9999990594290136, iteration: 94088
loss: 1.02370285987854,grad_norm: 0.9999991322076425, iteration: 94089
loss: 0.9625762701034546,grad_norm: 0.9999991216415182, iteration: 94090
loss: 0.9928362369537354,grad_norm: 0.9225225610478902, iteration: 94091
loss: 0.9813564419746399,grad_norm: 0.9999991324203127, iteration: 94092
loss: 0.9951918125152588,grad_norm: 0.9999991961723168, iteration: 94093
loss: 0.9907665252685547,grad_norm: 0.9999991594549511, iteration: 94094
loss: 1.0100356340408325,grad_norm: 0.922501202098246, iteration: 94095
loss: 0.9764600396156311,grad_norm: 0.9568777087603043, iteration: 94096
loss: 0.9811499118804932,grad_norm: 0.9784035152090401, iteration: 94097
loss: 0.9637140035629272,grad_norm: 0.999999254412849, iteration: 94098
loss: 1.0262627601623535,grad_norm: 0.999999051414998, iteration: 94099
loss: 1.003267765045166,grad_norm: 0.9540996667120072, iteration: 94100
loss: 1.0196046829223633,grad_norm: 0.9399814585839943, iteration: 94101
loss: 0.9795765280723572,grad_norm: 0.9999990345769773, iteration: 94102
loss: 0.9906549453735352,grad_norm: 0.8608312452821444, iteration: 94103
loss: 0.9773274660110474,grad_norm: 0.9474683502660202, iteration: 94104
loss: 1.0469919443130493,grad_norm: 0.9999991629785888, iteration: 94105
loss: 0.998838484287262,grad_norm: 0.8978670081484911, iteration: 94106
loss: 0.9975818991661072,grad_norm: 0.9999990456489195, iteration: 94107
loss: 1.0024265050888062,grad_norm: 0.9999992155105092, iteration: 94108
loss: 0.9864422082901001,grad_norm: 0.9999989817615456, iteration: 94109
loss: 0.9924051761627197,grad_norm: 0.841815908173267, iteration: 94110
loss: 1.003524661064148,grad_norm: 0.9999991326698588, iteration: 94111
loss: 0.971943736076355,grad_norm: 0.9999990105007459, iteration: 94112
loss: 1.022637128829956,grad_norm: 0.999348765603382, iteration: 94113
loss: 1.0006756782531738,grad_norm: 0.9456753223049249, iteration: 94114
loss: 0.9887447953224182,grad_norm: 0.999999135152324, iteration: 94115
loss: 1.0138388872146606,grad_norm: 0.9999992723272334, iteration: 94116
loss: 0.9683207273483276,grad_norm: 0.9999992124269751, iteration: 94117
loss: 0.9710981249809265,grad_norm: 0.9999993272142008, iteration: 94118
loss: 1.0153690576553345,grad_norm: 0.9999989810488064, iteration: 94119
loss: 0.9859333038330078,grad_norm: 0.9999990341685213, iteration: 94120
loss: 0.9907954931259155,grad_norm: 0.9999992892384376, iteration: 94121
loss: 0.9740837812423706,grad_norm: 0.9560882483121481, iteration: 94122
loss: 0.9755403399467468,grad_norm: 0.9999992008534407, iteration: 94123
loss: 1.0047022104263306,grad_norm: 0.9999990859557297, iteration: 94124
loss: 1.0221550464630127,grad_norm: 0.999999007797846, iteration: 94125
loss: 0.9988489747047424,grad_norm: 0.9999990442503567, iteration: 94126
loss: 0.9862743616104126,grad_norm: 0.9999990340299375, iteration: 94127
loss: 1.0065069198608398,grad_norm: 0.9999991922157805, iteration: 94128
loss: 1.0285145044326782,grad_norm: 0.9999989799512984, iteration: 94129
loss: 0.9829881191253662,grad_norm: 0.9999992552845561, iteration: 94130
loss: 0.9866173267364502,grad_norm: 0.9999990895563925, iteration: 94131
loss: 1.0258365869522095,grad_norm: 0.9999990802245229, iteration: 94132
loss: 1.0255637168884277,grad_norm: 0.9999990627789994, iteration: 94133
loss: 0.9838283658027649,grad_norm: 0.9999990797322755, iteration: 94134
loss: 1.0002682209014893,grad_norm: 0.9863007740392793, iteration: 94135
loss: 0.9941621422767639,grad_norm: 0.9999992260243933, iteration: 94136
loss: 0.9707345366477966,grad_norm: 0.9999990522146593, iteration: 94137
loss: 0.9978945851325989,grad_norm: 0.9999991468946342, iteration: 94138
loss: 1.0106350183486938,grad_norm: 0.9999991212991796, iteration: 94139
loss: 1.007926344871521,grad_norm: 0.9481391885630808, iteration: 94140
loss: 1.005393385887146,grad_norm: 0.8829513572489732, iteration: 94141
loss: 0.9956642985343933,grad_norm: 0.9689252731838419, iteration: 94142
loss: 1.0199477672576904,grad_norm: 0.9999995678065358, iteration: 94143
loss: 0.9737431406974792,grad_norm: 0.9471294594133459, iteration: 94144
loss: 1.0066334009170532,grad_norm: 0.9999991680673894, iteration: 94145
loss: 1.03439199924469,grad_norm: 0.999999525554195, iteration: 94146
loss: 0.9763592481613159,grad_norm: 0.9999992087193679, iteration: 94147
loss: 1.0020047426223755,grad_norm: 0.9999990645913566, iteration: 94148
loss: 1.031867265701294,grad_norm: 0.9307298528729787, iteration: 94149
loss: 1.0017755031585693,grad_norm: 0.9999990262773032, iteration: 94150
loss: 1.0127215385437012,grad_norm: 0.999999050924641, iteration: 94151
loss: 0.9848873615264893,grad_norm: 0.9803595883672079, iteration: 94152
loss: 1.0091084241867065,grad_norm: 0.9251167031805962, iteration: 94153
loss: 0.9531151652336121,grad_norm: 0.9999992433144839, iteration: 94154
loss: 0.9866440892219543,grad_norm: 0.8816330441102044, iteration: 94155
loss: 1.0011540651321411,grad_norm: 0.9999992312377087, iteration: 94156
loss: 1.0186293125152588,grad_norm: 0.9514202276032199, iteration: 94157
loss: 0.9990823268890381,grad_norm: 0.9999991567485463, iteration: 94158
loss: 1.0502015352249146,grad_norm: 0.999999751738255, iteration: 94159
loss: 0.9879075884819031,grad_norm: 0.999999129105743, iteration: 94160
loss: 1.0022969245910645,grad_norm: 0.9999991423325844, iteration: 94161
loss: 1.0139894485473633,grad_norm: 0.958058686274266, iteration: 94162
loss: 0.9966387748718262,grad_norm: 0.9999998000451301, iteration: 94163
loss: 1.015461802482605,grad_norm: 0.9999997669302998, iteration: 94164
loss: 0.9969321489334106,grad_norm: 0.99999907777694, iteration: 94165
loss: 1.016497015953064,grad_norm: 0.9999991196319009, iteration: 94166
loss: 1.0402061939239502,grad_norm: 0.9999995977288367, iteration: 94167
loss: 0.9693906307220459,grad_norm: 0.9500572374066629, iteration: 94168
loss: 1.0559556484222412,grad_norm: 0.9999994787194038, iteration: 94169
loss: 0.9892644286155701,grad_norm: 0.9983316825801132, iteration: 94170
loss: 1.0516929626464844,grad_norm: 0.9999991450510037, iteration: 94171
loss: 0.997275173664093,grad_norm: 0.9999990270512437, iteration: 94172
loss: 1.016021728515625,grad_norm: 0.9847147514889028, iteration: 94173
loss: 0.9977734088897705,grad_norm: 0.9022221068507741, iteration: 94174
loss: 0.9864546656608582,grad_norm: 0.959080315587862, iteration: 94175
loss: 0.9621949791908264,grad_norm: 0.9999989959960398, iteration: 94176
loss: 0.9733378887176514,grad_norm: 0.9999991221451713, iteration: 94177
loss: 1.0144017934799194,grad_norm: 0.9999991094875403, iteration: 94178
loss: 1.0027509927749634,grad_norm: 0.9443442465139601, iteration: 94179
loss: 1.0163003206253052,grad_norm: 0.8339315848821698, iteration: 94180
loss: 0.9783352017402649,grad_norm: 0.9999989402231098, iteration: 94181
loss: 0.9753689765930176,grad_norm: 0.9999991499786917, iteration: 94182
loss: 1.067096471786499,grad_norm: 0.9999991971081396, iteration: 94183
loss: 1.0073785781860352,grad_norm: 0.9999991984879311, iteration: 94184
loss: 0.9908058643341064,grad_norm: 0.999998967290039, iteration: 94185
loss: 1.0082156658172607,grad_norm: 0.9999990938516836, iteration: 94186
loss: 0.9904406666755676,grad_norm: 0.959316504363466, iteration: 94187
loss: 0.9662111401557922,grad_norm: 0.9999990665671148, iteration: 94188
loss: 0.9641764163970947,grad_norm: 0.9999990168359691, iteration: 94189
loss: 1.0304042100906372,grad_norm: 0.8786205697401724, iteration: 94190
loss: 0.9790486693382263,grad_norm: 0.9236455602921818, iteration: 94191
loss: 0.9729912877082825,grad_norm: 0.9999990295093908, iteration: 94192
loss: 0.9840995669364929,grad_norm: 0.9999990487072532, iteration: 94193
loss: 0.9734724760055542,grad_norm: 0.9999991308881512, iteration: 94194
loss: 1.0319039821624756,grad_norm: 0.999999034061492, iteration: 94195
loss: 1.0247489213943481,grad_norm: 0.999998997899335, iteration: 94196
loss: 0.98856121301651,grad_norm: 0.9999990669019551, iteration: 94197
loss: 1.0232430696487427,grad_norm: 0.9280381508046577, iteration: 94198
loss: 0.9954051971435547,grad_norm: 0.999999049724293, iteration: 94199
loss: 0.9664711356163025,grad_norm: 0.9581149228125624, iteration: 94200
loss: 1.0217190980911255,grad_norm: 0.8915980333987314, iteration: 94201
loss: 0.9694463014602661,grad_norm: 0.9999991831753998, iteration: 94202
loss: 0.9932264685630798,grad_norm: 0.9839799763093797, iteration: 94203
loss: 0.9703157544136047,grad_norm: 0.9092604725782296, iteration: 94204
loss: 1.0073835849761963,grad_norm: 0.978331871200201, iteration: 94205
loss: 0.9750270247459412,grad_norm: 0.8869351990415131, iteration: 94206
loss: 1.0049601793289185,grad_norm: 0.9999991236877606, iteration: 94207
loss: 0.9826976656913757,grad_norm: 0.9999991859674823, iteration: 94208
loss: 0.947790265083313,grad_norm: 0.9524754773444981, iteration: 94209
loss: 1.068678617477417,grad_norm: 0.9999993043686976, iteration: 94210
loss: 1.0450221300125122,grad_norm: 0.9999994073286314, iteration: 94211
loss: 1.0162514448165894,grad_norm: 0.9999992830950021, iteration: 94212
loss: 1.0036333799362183,grad_norm: 0.9999992113963808, iteration: 94213
loss: 1.021531343460083,grad_norm: 0.9999992637196736, iteration: 94214
loss: 1.0253554582595825,grad_norm: 0.99999925027767, iteration: 94215
loss: 1.0245345830917358,grad_norm: 0.9663015911075353, iteration: 94216
loss: 0.9713155031204224,grad_norm: 0.8963838796854272, iteration: 94217
loss: 0.9891769886016846,grad_norm: 0.9778505944173751, iteration: 94218
loss: 1.0126214027404785,grad_norm: 0.9999994950287124, iteration: 94219
loss: 1.0244828462600708,grad_norm: 0.9999991052703776, iteration: 94220
loss: 1.009468674659729,grad_norm: 0.9999990470062585, iteration: 94221
loss: 1.0077009201049805,grad_norm: 0.9999989812945366, iteration: 94222
loss: 0.9647198915481567,grad_norm: 0.9968440499765545, iteration: 94223
loss: 0.9700871109962463,grad_norm: 0.9999990985538331, iteration: 94224
loss: 1.02849543094635,grad_norm: 0.928458449326812, iteration: 94225
loss: 0.9869349002838135,grad_norm: 0.9999991513389355, iteration: 94226
loss: 1.0084879398345947,grad_norm: 0.9999989974294559, iteration: 94227
loss: 1.0130654573440552,grad_norm: 0.9999992106179795, iteration: 94228
loss: 1.0067416429519653,grad_norm: 0.9999992170979376, iteration: 94229
loss: 1.005815863609314,grad_norm: 0.9489511349522102, iteration: 94230
loss: 0.9773193001747131,grad_norm: 0.9999992453086216, iteration: 94231
loss: 1.0381529331207275,grad_norm: 0.9999999188477816, iteration: 94232
loss: 0.9579592943191528,grad_norm: 0.9057945599310437, iteration: 94233
loss: 1.0171825885772705,grad_norm: 0.9999991458510755, iteration: 94234
loss: 0.9913482666015625,grad_norm: 0.8867795774787635, iteration: 94235
loss: 0.9980708360671997,grad_norm: 0.9999990490464565, iteration: 94236
loss: 0.9915995597839355,grad_norm: 0.9940015793608322, iteration: 94237
loss: 0.983741819858551,grad_norm: 0.9999991941258766, iteration: 94238
loss: 0.990309476852417,grad_norm: 0.9713267118139522, iteration: 94239
loss: 1.0152901411056519,grad_norm: 0.999999182637452, iteration: 94240
loss: 0.9769646525382996,grad_norm: 0.9914263859504746, iteration: 94241
loss: 1.0460871458053589,grad_norm: 0.9999991857473363, iteration: 94242
loss: 1.0214096307754517,grad_norm: 0.9448072928447633, iteration: 94243
loss: 1.0275192260742188,grad_norm: 0.9999988928214913, iteration: 94244
loss: 0.9950727224349976,grad_norm: 0.9785699379810817, iteration: 94245
loss: 1.0199335813522339,grad_norm: 0.8251528112590325, iteration: 94246
loss: 1.0046700239181519,grad_norm: 0.9999994862575253, iteration: 94247
loss: 0.9808013439178467,grad_norm: 0.9999989609187513, iteration: 94248
loss: 0.9628864526748657,grad_norm: 0.9999992100065875, iteration: 94249
loss: 1.0308051109313965,grad_norm: 0.9999991922218725, iteration: 94250
loss: 1.005185604095459,grad_norm: 0.9999989979380117, iteration: 94251
loss: 1.0274981260299683,grad_norm: 0.9312717810546352, iteration: 94252
loss: 1.0292255878448486,grad_norm: 0.9999996065995082, iteration: 94253
loss: 1.1163160800933838,grad_norm: 0.999999167522647, iteration: 94254
loss: 0.9844129681587219,grad_norm: 0.9086189368538988, iteration: 94255
loss: 0.9638885855674744,grad_norm: 0.9999989285318874, iteration: 94256
loss: 0.9840802550315857,grad_norm: 0.9999990542183934, iteration: 94257
loss: 1.077815055847168,grad_norm: 0.99999929555858, iteration: 94258
loss: 0.9788582921028137,grad_norm: 0.9999992360087552, iteration: 94259
loss: 1.0325944423675537,grad_norm: 0.9999991935998332, iteration: 94260
loss: 1.0063650608062744,grad_norm: 0.9999991549193437, iteration: 94261
loss: 1.0198265314102173,grad_norm: 0.9999989811802626, iteration: 94262
loss: 1.004909873008728,grad_norm: 0.9999990909137074, iteration: 94263
loss: 0.9567179679870605,grad_norm: 0.9999991629122059, iteration: 94264
loss: 0.9868943095207214,grad_norm: 0.9999989759344473, iteration: 94265
loss: 1.0226622819900513,grad_norm: 0.9999991991937416, iteration: 94266
loss: 0.9954918026924133,grad_norm: 0.9730276924445059, iteration: 94267
loss: 0.9691576957702637,grad_norm: 0.9999990022260296, iteration: 94268
loss: 1.0153719186782837,grad_norm: 0.9999992650213665, iteration: 94269
loss: 0.9801139235496521,grad_norm: 0.9999993716620296, iteration: 94270
loss: 0.9876354336738586,grad_norm: 0.9212995295904762, iteration: 94271
loss: 0.9832841753959656,grad_norm: 0.9999991425361117, iteration: 94272
loss: 1.0093140602111816,grad_norm: 0.9999991856179171, iteration: 94273
loss: 1.019665002822876,grad_norm: 0.9999990378064666, iteration: 94274
loss: 1.004840612411499,grad_norm: 0.9869997016143882, iteration: 94275
loss: 0.9819157123565674,grad_norm: 0.9999989920670146, iteration: 94276
loss: 1.019397258758545,grad_norm: 0.9999992204768595, iteration: 94277
loss: 0.9796678423881531,grad_norm: 0.948177653117096, iteration: 94278
loss: 1.0161851644515991,grad_norm: 0.9999991798215851, iteration: 94279
loss: 0.9960600137710571,grad_norm: 0.9999991621593194, iteration: 94280
loss: 1.0224287509918213,grad_norm: 0.999999245308229, iteration: 94281
loss: 1.0308514833450317,grad_norm: 0.9999990514650547, iteration: 94282
loss: 0.9629848599433899,grad_norm: 0.9999990825918752, iteration: 94283
loss: 1.0182874202728271,grad_norm: 0.9999991850809522, iteration: 94284
loss: 1.009110927581787,grad_norm: 0.9999992369791839, iteration: 94285
loss: 1.016254186630249,grad_norm: 0.9810365226024685, iteration: 94286
loss: 1.0144304037094116,grad_norm: 0.9999991560200681, iteration: 94287
loss: 1.004381775856018,grad_norm: 0.9999998178417021, iteration: 94288
loss: 1.0010603666305542,grad_norm: 0.9999990812214063, iteration: 94289
loss: 0.9718844890594482,grad_norm: 0.9512429256899025, iteration: 94290
loss: 1.0174031257629395,grad_norm: 0.9999992630150235, iteration: 94291
loss: 1.0209773778915405,grad_norm: 0.9999990044059148, iteration: 94292
loss: 0.9726848602294922,grad_norm: 0.999999286113701, iteration: 94293
loss: 0.9691187739372253,grad_norm: 0.9566423807810841, iteration: 94294
loss: 1.0265311002731323,grad_norm: 0.9999991275881616, iteration: 94295
loss: 0.9917264580726624,grad_norm: 0.9999991317576286, iteration: 94296
loss: 1.018834114074707,grad_norm: 0.9999991521785304, iteration: 94297
loss: 1.0182043313980103,grad_norm: 0.999999037097832, iteration: 94298
loss: 1.0154197216033936,grad_norm: 0.9999991263140343, iteration: 94299
loss: 1.0378962755203247,grad_norm: 0.9624511496528995, iteration: 94300
loss: 1.023179292678833,grad_norm: 0.9999991053491749, iteration: 94301
loss: 0.9769992232322693,grad_norm: 0.971507524304698, iteration: 94302
loss: 0.9728191494941711,grad_norm: 0.9860940270966162, iteration: 94303
loss: 1.0130839347839355,grad_norm: 0.9999991239487888, iteration: 94304
loss: 0.9885773658752441,grad_norm: 0.9258371527510737, iteration: 94305
loss: 1.0330549478530884,grad_norm: 0.8972580600419425, iteration: 94306
loss: 1.0160648822784424,grad_norm: 0.9999991550451689, iteration: 94307
loss: 1.030764102935791,grad_norm: 0.9999992411575886, iteration: 94308
loss: 1.009178638458252,grad_norm: 0.9999991978199643, iteration: 94309
loss: 0.9997361898422241,grad_norm: 0.9999990417422904, iteration: 94310
loss: 0.9834229946136475,grad_norm: 0.8406774663404154, iteration: 94311
loss: 0.9785928726196289,grad_norm: 0.9398521780492698, iteration: 94312
loss: 0.9809393286705017,grad_norm: 0.9999991232409475, iteration: 94313
loss: 0.9933517575263977,grad_norm: 0.9780040422090951, iteration: 94314
loss: 0.9584529995918274,grad_norm: 0.9999991292675054, iteration: 94315
loss: 0.976658821105957,grad_norm: 0.9999991154946875, iteration: 94316
loss: 0.982635498046875,grad_norm: 0.9999990564295361, iteration: 94317
loss: 1.0289345979690552,grad_norm: 0.9999991220078606, iteration: 94318
loss: 1.0129420757293701,grad_norm: 0.8810448879877104, iteration: 94319
loss: 1.0302722454071045,grad_norm: 0.981602036270434, iteration: 94320
loss: 0.9686877727508545,grad_norm: 0.9700635095916822, iteration: 94321
loss: 0.9826738238334656,grad_norm: 0.8865706045992965, iteration: 94322
loss: 1.0223701000213623,grad_norm: 0.9999990666121835, iteration: 94323
loss: 1.0043811798095703,grad_norm: 0.9999995333405078, iteration: 94324
loss: 1.029998779296875,grad_norm: 0.9309979916557728, iteration: 94325
loss: 1.006223440170288,grad_norm: 0.9999992171208519, iteration: 94326
loss: 0.9795942902565002,grad_norm: 0.9999991262909839, iteration: 94327
loss: 1.022477388381958,grad_norm: 0.9374167420078625, iteration: 94328
loss: 1.0298547744750977,grad_norm: 0.8672546098414528, iteration: 94329
loss: 1.0067261457443237,grad_norm: 0.9999992930785584, iteration: 94330
loss: 0.9891168475151062,grad_norm: 0.981432352132275, iteration: 94331
loss: 1.00314199924469,grad_norm: 0.9635931138065912, iteration: 94332
loss: 1.0321660041809082,grad_norm: 0.9999990872904188, iteration: 94333
loss: 1.0377267599105835,grad_norm: 0.9999989823453163, iteration: 94334
loss: 1.0525386333465576,grad_norm: 0.9999992820750259, iteration: 94335
loss: 0.9935877323150635,grad_norm: 0.9849624169536261, iteration: 94336
loss: 0.9787582159042358,grad_norm: 0.9999992669401461, iteration: 94337
loss: 1.0165685415267944,grad_norm: 0.9999989922781392, iteration: 94338
loss: 0.9850139021873474,grad_norm: 0.9999992380678004, iteration: 94339
loss: 1.0008829832077026,grad_norm: 0.9999991232374393, iteration: 94340
loss: 1.0198237895965576,grad_norm: 0.960598039944811, iteration: 94341
loss: 1.002743124961853,grad_norm: 0.9999992622123458, iteration: 94342
loss: 0.9920014142990112,grad_norm: 0.9999992399075299, iteration: 94343
loss: 0.9784685373306274,grad_norm: 0.9999993510853415, iteration: 94344
loss: 1.0008593797683716,grad_norm: 0.9999990323134035, iteration: 94345
loss: 0.9874194264411926,grad_norm: 0.9999991220698261, iteration: 94346
loss: 0.9538877010345459,grad_norm: 0.9999990658297898, iteration: 94347
loss: 0.9977366924285889,grad_norm: 0.9999992093500584, iteration: 94348
loss: 1.0216810703277588,grad_norm: 0.8341064193302364, iteration: 94349
loss: 0.9747189879417419,grad_norm: 0.999999296322154, iteration: 94350
loss: 1.0018917322158813,grad_norm: 0.9999990669189787, iteration: 94351
loss: 0.9869468212127686,grad_norm: 0.9999991636219897, iteration: 94352
loss: 1.0321850776672363,grad_norm: 0.959330049403783, iteration: 94353
loss: 0.99173903465271,grad_norm: 0.9578429799269084, iteration: 94354
loss: 0.9548528790473938,grad_norm: 0.999999115163545, iteration: 94355
loss: 1.0236924886703491,grad_norm: 0.9999990411555172, iteration: 94356
loss: 0.9980736970901489,grad_norm: 0.9999991003272153, iteration: 94357
loss: 0.9647539258003235,grad_norm: 0.9999992672933508, iteration: 94358
loss: 0.9982969164848328,grad_norm: 0.9999993778256586, iteration: 94359
loss: 0.9804167151451111,grad_norm: 0.9999990847721323, iteration: 94360
loss: 0.9822201728820801,grad_norm: 0.9223577162968339, iteration: 94361
loss: 1.0132215023040771,grad_norm: 0.9999990387039304, iteration: 94362
loss: 0.9686427116394043,grad_norm: 0.9999990655619595, iteration: 94363
loss: 1.0122032165527344,grad_norm: 0.9999990144124524, iteration: 94364
loss: 0.9811229109764099,grad_norm: 0.9999989598620538, iteration: 94365
loss: 1.0133109092712402,grad_norm: 0.9999990029206308, iteration: 94366
loss: 0.9520813822746277,grad_norm: 0.9182716903669522, iteration: 94367
loss: 1.0256009101867676,grad_norm: 0.9999993722661583, iteration: 94368
loss: 1.0641549825668335,grad_norm: 0.999999151079365, iteration: 94369
loss: 0.9700002074241638,grad_norm: 0.9999991458331298, iteration: 94370
loss: 1.05547034740448,grad_norm: 0.9999991876657527, iteration: 94371
loss: 1.0073312520980835,grad_norm: 0.9791211564350717, iteration: 94372
loss: 0.9795066118240356,grad_norm: 0.9999990649054062, iteration: 94373
loss: 0.9646285176277161,grad_norm: 0.9999992196477265, iteration: 94374
loss: 1.0445194244384766,grad_norm: 0.9999989547843884, iteration: 94375
loss: 0.9730089902877808,grad_norm: 0.9999993533854095, iteration: 94376
loss: 1.0098448991775513,grad_norm: 0.9999991494466978, iteration: 94377
loss: 0.9867419004440308,grad_norm: 0.9999990718871511, iteration: 94378
loss: 1.0297479629516602,grad_norm: 0.9999998221617178, iteration: 94379
loss: 0.9988661408424377,grad_norm: 0.9999992714881062, iteration: 94380
loss: 1.019742727279663,grad_norm: 0.9999991217559183, iteration: 94381
loss: 1.0019043684005737,grad_norm: 0.9966600441089665, iteration: 94382
loss: 0.9987374544143677,grad_norm: 0.9999990628111116, iteration: 94383
loss: 0.9858024716377258,grad_norm: 0.9999997890000215, iteration: 94384
loss: 1.036830186843872,grad_norm: 0.896384110653436, iteration: 94385
loss: 1.084702491760254,grad_norm: 0.9999997721842016, iteration: 94386
loss: 1.003191351890564,grad_norm: 0.999999427533225, iteration: 94387
loss: 1.0243316888809204,grad_norm: 0.8844776950746408, iteration: 94388
loss: 0.9739444255828857,grad_norm: 0.9516531969195222, iteration: 94389
loss: 1.0018349885940552,grad_norm: 0.8036883642325561, iteration: 94390
loss: 0.9982683658599854,grad_norm: 0.9999989814257153, iteration: 94391
loss: 0.9856385588645935,grad_norm: 0.9999994012165927, iteration: 94392
loss: 1.0006273984909058,grad_norm: 0.9999990954070239, iteration: 94393
loss: 1.0216269493103027,grad_norm: 0.8233043364016429, iteration: 94394
loss: 1.0104808807373047,grad_norm: 0.9999992217846481, iteration: 94395
loss: 0.9911417365074158,grad_norm: 0.999999025980781, iteration: 94396
loss: 1.0100886821746826,grad_norm: 0.9999997522471624, iteration: 94397
loss: 1.040822982788086,grad_norm: 0.9999993238605382, iteration: 94398
loss: 1.0056803226470947,grad_norm: 0.9108306407402676, iteration: 94399
loss: 1.0155497789382935,grad_norm: 0.922973727785934, iteration: 94400
loss: 0.9914669990539551,grad_norm: 0.999999200769652, iteration: 94401
loss: 0.9988179206848145,grad_norm: 0.9999991913339648, iteration: 94402
loss: 0.9894180297851562,grad_norm: 0.9999990417466804, iteration: 94403
loss: 0.9979847073554993,grad_norm: 0.9999995675811724, iteration: 94404
loss: 0.9493803381919861,grad_norm: 0.9999991717442868, iteration: 94405
loss: 1.0314384698867798,grad_norm: 0.9563772192424147, iteration: 94406
loss: 0.9971082806587219,grad_norm: 0.993410755313309, iteration: 94407
loss: 1.0231382846832275,grad_norm: 0.9082671044616444, iteration: 94408
loss: 1.0401371717453003,grad_norm: 0.9999990811931468, iteration: 94409
loss: 0.9937487244606018,grad_norm: 0.9909818123407453, iteration: 94410
loss: 1.014089822769165,grad_norm: 0.9999998050413739, iteration: 94411
loss: 0.9809096455574036,grad_norm: 0.9999991608895561, iteration: 94412
loss: 0.9836506247520447,grad_norm: 0.999999228278726, iteration: 94413
loss: 0.9939795732498169,grad_norm: 0.9999991561941439, iteration: 94414
loss: 1.0253769159317017,grad_norm: 0.9999990987971358, iteration: 94415
loss: 1.005987524986267,grad_norm: 0.9999991125555494, iteration: 94416
loss: 0.9650581479072571,grad_norm: 0.9855137242760397, iteration: 94417
loss: 1.0138603448867798,grad_norm: 0.9999992497213886, iteration: 94418
loss: 0.9898079037666321,grad_norm: 0.9020868660345025, iteration: 94419
loss: 1.0072697401046753,grad_norm: 0.9830493720337992, iteration: 94420
loss: 0.9863929748535156,grad_norm: 0.9999991279475734, iteration: 94421
loss: 0.9970964193344116,grad_norm: 0.9999990729214521, iteration: 94422
loss: 0.9729058742523193,grad_norm: 0.9999990536022518, iteration: 94423
loss: 0.9954811334609985,grad_norm: 0.9298943347851939, iteration: 94424
loss: 1.0397957563400269,grad_norm: 0.9999996169220345, iteration: 94425
loss: 1.0088622570037842,grad_norm: 0.9999988814170657, iteration: 94426
loss: 1.0496599674224854,grad_norm: 0.9999992473083928, iteration: 94427
loss: 0.9939046502113342,grad_norm: 0.9873230619140437, iteration: 94428
loss: 1.0627747774124146,grad_norm: 0.9999994792576662, iteration: 94429
loss: 1.0060755014419556,grad_norm: 0.9999992061445678, iteration: 94430
loss: 1.0068897008895874,grad_norm: 0.9999991471473443, iteration: 94431
loss: 1.0108572244644165,grad_norm: 0.9999989910235132, iteration: 94432
loss: 1.0967068672180176,grad_norm: 0.9999993516311899, iteration: 94433
loss: 1.0405195951461792,grad_norm: 0.9999992533383825, iteration: 94434
loss: 0.998604416847229,grad_norm: 0.9999993128473895, iteration: 94435
loss: 1.039036750793457,grad_norm: 0.9999990405296463, iteration: 94436
loss: 1.0117875337600708,grad_norm: 0.9999989473357017, iteration: 94437
loss: 0.9496344923973083,grad_norm: 0.9999992776582356, iteration: 94438
loss: 1.050761342048645,grad_norm: 0.9999991600143746, iteration: 94439
loss: 0.9731723666191101,grad_norm: 0.9999991626830795, iteration: 94440
loss: 0.9368338584899902,grad_norm: 0.9999993287369794, iteration: 94441
loss: 1.0314257144927979,grad_norm: 0.9999993977079278, iteration: 94442
loss: 0.977026641368866,grad_norm: 0.96112431223991, iteration: 94443
loss: 0.9814085960388184,grad_norm: 0.9390847998214118, iteration: 94444
loss: 1.0332540273666382,grad_norm: 0.9999993048989811, iteration: 94445
loss: 1.0015968084335327,grad_norm: 0.9999992336280008, iteration: 94446
loss: 1.001991868019104,grad_norm: 0.894104900142814, iteration: 94447
loss: 0.9846401214599609,grad_norm: 0.8779893341861558, iteration: 94448
loss: 0.976125955581665,grad_norm: 0.9999990213784712, iteration: 94449
loss: 1.0148624181747437,grad_norm: 0.9999994118812683, iteration: 94450
loss: 1.0333939790725708,grad_norm: 0.9999992069820381, iteration: 94451
loss: 1.0282173156738281,grad_norm: 0.9638632091845697, iteration: 94452
loss: 1.0164036750793457,grad_norm: 0.9719002584069741, iteration: 94453
loss: 1.0101932287216187,grad_norm: 0.9800367048056798, iteration: 94454
loss: 0.9952834248542786,grad_norm: 0.9999991991904064, iteration: 94455
loss: 1.038723349571228,grad_norm: 0.9999992991993182, iteration: 94456
loss: 0.9756531119346619,grad_norm: 0.9999993481592795, iteration: 94457
loss: 1.0660080909729004,grad_norm: 0.9999994983125513, iteration: 94458
loss: 0.9597488641738892,grad_norm: 0.9086805694823621, iteration: 94459
loss: 1.039854645729065,grad_norm: 0.9999991711634955, iteration: 94460
loss: 1.0174769163131714,grad_norm: 0.8435361356419623, iteration: 94461
loss: 0.9697872996330261,grad_norm: 0.994650037864005, iteration: 94462
loss: 0.9676989316940308,grad_norm: 0.9999990306872493, iteration: 94463
loss: 0.9992011785507202,grad_norm: 0.9999997191108124, iteration: 94464
loss: 0.9910265803337097,grad_norm: 0.9999993035608655, iteration: 94465
loss: 1.0353525876998901,grad_norm: 0.9999991442385213, iteration: 94466
loss: 0.9892924427986145,grad_norm: 1.0000000652675258, iteration: 94467
loss: 0.9835551977157593,grad_norm: 0.9999991611600391, iteration: 94468
loss: 1.0157779455184937,grad_norm: 0.9653759521075117, iteration: 94469
loss: 1.0270304679870605,grad_norm: 0.9999994323112977, iteration: 94470
loss: 0.9565623998641968,grad_norm: 0.9999990239005191, iteration: 94471
loss: 0.9740133881568909,grad_norm: 0.9770279632959328, iteration: 94472
loss: 0.9721547961235046,grad_norm: 0.9999990993000768, iteration: 94473
loss: 0.9993687272071838,grad_norm: 0.9999990138038598, iteration: 94474
loss: 1.0396690368652344,grad_norm: 0.9999991967716688, iteration: 94475
loss: 0.9708458185195923,grad_norm: 0.9999991184552219, iteration: 94476
loss: 0.9818406105041504,grad_norm: 0.9999989528192029, iteration: 94477
loss: 0.9770544171333313,grad_norm: 0.9999989857298976, iteration: 94478
loss: 0.959118664264679,grad_norm: 0.9999991420826686, iteration: 94479
loss: 1.0204132795333862,grad_norm: 0.9999992971274431, iteration: 94480
loss: 0.9517350792884827,grad_norm: 0.9999990295915654, iteration: 94481
loss: 1.0256125926971436,grad_norm: 0.9013581208004584, iteration: 94482
loss: 1.0203866958618164,grad_norm: 0.9490431806768116, iteration: 94483
loss: 1.0083144903182983,grad_norm: 0.9999991806699847, iteration: 94484
loss: 1.037980556488037,grad_norm: 0.9999992511185699, iteration: 94485
loss: 1.0043236017227173,grad_norm: 0.9048259084504142, iteration: 94486
loss: 0.9839010238647461,grad_norm: 0.900654545889559, iteration: 94487
loss: 1.0154697895050049,grad_norm: 0.9999989665767102, iteration: 94488
loss: 1.0777537822723389,grad_norm: 0.9999996995431568, iteration: 94489
loss: 0.9900718927383423,grad_norm: 0.9999991824875852, iteration: 94490
loss: 0.969775915145874,grad_norm: 0.9999991805560274, iteration: 94491
loss: 0.9882118105888367,grad_norm: 0.958563523928617, iteration: 94492
loss: 1.0278940200805664,grad_norm: 0.9999992781115461, iteration: 94493
loss: 1.0074317455291748,grad_norm: 0.9999991517099482, iteration: 94494
loss: 1.0402851104736328,grad_norm: 0.9999989758661617, iteration: 94495
loss: 1.0132944583892822,grad_norm: 0.9999990630637162, iteration: 94496
loss: 1.0019090175628662,grad_norm: 0.9201482286406049, iteration: 94497
loss: 0.985113799571991,grad_norm: 0.9882721288084702, iteration: 94498
loss: 0.997882604598999,grad_norm: 0.9802424150154457, iteration: 94499
loss: 1.011562705039978,grad_norm: 0.9848113044325395, iteration: 94500
loss: 0.9773169159889221,grad_norm: 0.9999991238345791, iteration: 94501
loss: 0.981833279132843,grad_norm: 0.9999990499089602, iteration: 94502
loss: 1.0048191547393799,grad_norm: 0.9999992015066935, iteration: 94503
loss: 0.9853309392929077,grad_norm: 0.9999992663432968, iteration: 94504
loss: 1.0409055948257446,grad_norm: 0.999999030735232, iteration: 94505
loss: 0.9928655624389648,grad_norm: 0.9999992002683461, iteration: 94506
loss: 0.9852995276451111,grad_norm: 0.9999990702864882, iteration: 94507
loss: 0.9832388162612915,grad_norm: 0.9999991102654805, iteration: 94508
loss: 0.9804427623748779,grad_norm: 0.9999991202251943, iteration: 94509
loss: 0.9957733750343323,grad_norm: 0.880140652187167, iteration: 94510
loss: 1.0061204433441162,grad_norm: 0.9999989195640268, iteration: 94511
loss: 0.9816126227378845,grad_norm: 0.9999992225965043, iteration: 94512
loss: 0.9909278154373169,grad_norm: 0.9999992593178522, iteration: 94513
loss: 0.9757290482521057,grad_norm: 0.9101022862972612, iteration: 94514
loss: 1.0020685195922852,grad_norm: 0.9999989370178038, iteration: 94515
loss: 0.9531338810920715,grad_norm: 0.9825090817240434, iteration: 94516
loss: 1.0423508882522583,grad_norm: 0.9999993107475987, iteration: 94517
loss: 1.0268462896347046,grad_norm: 0.9999990642293611, iteration: 94518
loss: 0.9626838564872742,grad_norm: 0.9999990343983173, iteration: 94519
loss: 1.0141180753707886,grad_norm: 0.9999992600032175, iteration: 94520
loss: 0.999794065952301,grad_norm: 0.9110768732911727, iteration: 94521
loss: 0.9966463446617126,grad_norm: 0.9999991588102528, iteration: 94522
loss: 1.0091514587402344,grad_norm: 0.910326101688705, iteration: 94523
loss: 0.9887006282806396,grad_norm: 0.9999992435553668, iteration: 94524
loss: 0.9735271334648132,grad_norm: 0.8205341167954535, iteration: 94525
loss: 1.005695104598999,grad_norm: 0.882792670003778, iteration: 94526
loss: 1.0480650663375854,grad_norm: 0.9999995965952041, iteration: 94527
loss: 1.0044993162155151,grad_norm: 0.9999998671157, iteration: 94528
loss: 1.0251731872558594,grad_norm: 0.9367551668395814, iteration: 94529
loss: 1.0256762504577637,grad_norm: 0.9999992419260239, iteration: 94530
loss: 0.9763501286506653,grad_norm: 0.9999992051616434, iteration: 94531
loss: 0.9967799186706543,grad_norm: 0.9135247942993727, iteration: 94532
loss: 0.9802667498588562,grad_norm: 0.9941267256812585, iteration: 94533
loss: 1.0296019315719604,grad_norm: 0.9999992367991338, iteration: 94534
loss: 0.937874972820282,grad_norm: 0.8932652942747051, iteration: 94535
loss: 1.0510451793670654,grad_norm: 0.9999993439376483, iteration: 94536
loss: 1.0210925340652466,grad_norm: 0.9999990205105771, iteration: 94537
loss: 1.0155867338180542,grad_norm: 0.9999990337672702, iteration: 94538
loss: 0.9922880530357361,grad_norm: 0.9999991308659476, iteration: 94539
loss: 0.9732931852340698,grad_norm: 0.8429770619902839, iteration: 94540
loss: 0.9958471059799194,grad_norm: 0.9999992190751001, iteration: 94541
loss: 1.0067499876022339,grad_norm: 0.9999991182808751, iteration: 94542
loss: 1.0619359016418457,grad_norm: 0.99999980841421, iteration: 94543
loss: 0.98450767993927,grad_norm: 0.9999992305894037, iteration: 94544
loss: 1.0171058177947998,grad_norm: 0.9539517090075531, iteration: 94545
loss: 0.9421933889389038,grad_norm: 0.9420122134004012, iteration: 94546
loss: 0.9957811236381531,grad_norm: 0.9999992750440041, iteration: 94547
loss: 1.027856707572937,grad_norm: 0.9999991941930949, iteration: 94548
loss: 0.9903733134269714,grad_norm: 0.8449059585384274, iteration: 94549
loss: 0.9770965576171875,grad_norm: 0.999999047622641, iteration: 94550
loss: 0.9979488253593445,grad_norm: 0.9906108412426202, iteration: 94551
loss: 0.9975447654724121,grad_norm: 0.9015840851316049, iteration: 94552
loss: 1.0060169696807861,grad_norm: 0.9999991335190785, iteration: 94553
loss: 1.0328670740127563,grad_norm: 0.9999992057762678, iteration: 94554
loss: 0.9776334762573242,grad_norm: 0.9999990018435109, iteration: 94555
loss: 0.979827880859375,grad_norm: 0.9999991505878887, iteration: 94556
loss: 0.9748445749282837,grad_norm: 0.9191579297418336, iteration: 94557
loss: 1.020137906074524,grad_norm: 0.9999992571100214, iteration: 94558
loss: 0.9858552813529968,grad_norm: 0.9691956069499851, iteration: 94559
loss: 0.9964570999145508,grad_norm: 0.9999990106495893, iteration: 94560
loss: 0.9816316366195679,grad_norm: 0.9999991535588048, iteration: 94561
loss: 0.9936760663986206,grad_norm: 0.9999991486748439, iteration: 94562
loss: 0.9731954336166382,grad_norm: 0.9999992364163128, iteration: 94563
loss: 0.9994794130325317,grad_norm: 0.9999991303527835, iteration: 94564
loss: 1.016882061958313,grad_norm: 0.9999992153542159, iteration: 94565
loss: 1.0188429355621338,grad_norm: 0.999999015111892, iteration: 94566
loss: 1.0126523971557617,grad_norm: 0.999576906121106, iteration: 94567
loss: 1.0020567178726196,grad_norm: 0.9999991357909971, iteration: 94568
loss: 1.0098979473114014,grad_norm: 0.9999993413120293, iteration: 94569
loss: 0.9983172416687012,grad_norm: 0.9999990756184112, iteration: 94570
loss: 0.9674997925758362,grad_norm: 0.9074695487578854, iteration: 94571
loss: 0.9708178043365479,grad_norm: 0.999999260015626, iteration: 94572
loss: 0.9950256943702698,grad_norm: 0.9999989216398337, iteration: 94573
loss: 0.9749673008918762,grad_norm: 0.9771486975332516, iteration: 94574
loss: 1.010675072669983,grad_norm: 0.9999990850760146, iteration: 94575
loss: 1.0102031230926514,grad_norm: 0.9999993626714117, iteration: 94576
loss: 1.050320029258728,grad_norm: 0.9999992444687553, iteration: 94577
loss: 0.9987208247184753,grad_norm: 0.8852259054664839, iteration: 94578
loss: 1.0561295747756958,grad_norm: 0.9999990720501915, iteration: 94579
loss: 0.9730823040008545,grad_norm: 0.965080281690487, iteration: 94580
loss: 0.998089611530304,grad_norm: 0.9705782255879849, iteration: 94581
loss: 0.9782947897911072,grad_norm: 0.9999991691831701, iteration: 94582
loss: 0.9833293557167053,grad_norm: 0.973621578817563, iteration: 94583
loss: 1.008573293685913,grad_norm: 0.9999993393024356, iteration: 94584
loss: 0.9943101406097412,grad_norm: 0.9999991758357314, iteration: 94585
loss: 0.955813467502594,grad_norm: 0.9999990459196104, iteration: 94586
loss: 1.0226109027862549,grad_norm: 0.8978698810069965, iteration: 94587
loss: 0.9954121112823486,grad_norm: 0.9999990428711252, iteration: 94588
loss: 1.0254013538360596,grad_norm: 0.9868991158476071, iteration: 94589
loss: 0.9915421009063721,grad_norm: 0.9999991866653487, iteration: 94590
loss: 0.9782311916351318,grad_norm: 0.9999993986239232, iteration: 94591
loss: 1.0519020557403564,grad_norm: 0.9999998048983444, iteration: 94592
loss: 0.9722508788108826,grad_norm: 0.9999990447935448, iteration: 94593
loss: 0.9647848606109619,grad_norm: 0.9537393247967498, iteration: 94594
loss: 0.9989660382270813,grad_norm: 0.9662985615664901, iteration: 94595
loss: 1.0034416913986206,grad_norm: 0.999999033746802, iteration: 94596
loss: 0.9900293350219727,grad_norm: 0.9999990788646316, iteration: 94597
loss: 1.0027687549591064,grad_norm: 0.8980132047293986, iteration: 94598
loss: 0.953224241733551,grad_norm: 0.9255628230819447, iteration: 94599
loss: 1.0113511085510254,grad_norm: 0.9999991370933853, iteration: 94600
loss: 0.9932560920715332,grad_norm: 0.943424821731883, iteration: 94601
loss: 1.0178347826004028,grad_norm: 0.9195543101041525, iteration: 94602
loss: 1.0015411376953125,grad_norm: 0.9999992082813577, iteration: 94603
loss: 0.9990056157112122,grad_norm: 0.9999990619473152, iteration: 94604
loss: 1.0227247476577759,grad_norm: 0.9999992051107544, iteration: 94605
loss: 0.9790621995925903,grad_norm: 0.9999989868437619, iteration: 94606
loss: 1.029488444328308,grad_norm: 0.9999992867099322, iteration: 94607
loss: 0.9905312061309814,grad_norm: 0.9999990397307537, iteration: 94608
loss: 1.0007824897766113,grad_norm: 0.9999993660508512, iteration: 94609
loss: 1.0208942890167236,grad_norm: 0.999999069536249, iteration: 94610
loss: 1.0198826789855957,grad_norm: 0.9981614075158046, iteration: 94611
loss: 0.9967958927154541,grad_norm: 0.9999992973457498, iteration: 94612
loss: 0.9914411902427673,grad_norm: 0.9999991637732696, iteration: 94613
loss: 0.9884442090988159,grad_norm: 0.9999990291266077, iteration: 94614
loss: 1.0015263557434082,grad_norm: 0.9999990331000762, iteration: 94615
loss: 0.9717510938644409,grad_norm: 0.999999063886162, iteration: 94616
loss: 1.0067380666732788,grad_norm: 0.9023014107969376, iteration: 94617
loss: 0.9989987015724182,grad_norm: 0.9999990558248124, iteration: 94618
loss: 0.9790170788764954,grad_norm: 0.9752156111199684, iteration: 94619
loss: 1.030987024307251,grad_norm: 0.9999991779973765, iteration: 94620
loss: 1.0352011919021606,grad_norm: 0.9999993082597196, iteration: 94621
loss: 1.0325125455856323,grad_norm: 0.9999993464316778, iteration: 94622
loss: 1.0047051906585693,grad_norm: 0.9999991064771068, iteration: 94623
loss: 0.9736601114273071,grad_norm: 0.9999992268698498, iteration: 94624
loss: 0.9919509887695312,grad_norm: 0.9999994379249814, iteration: 94625
loss: 0.9804799556732178,grad_norm: 0.9999990456210199, iteration: 94626
loss: 1.0095021724700928,grad_norm: 0.9999990394033028, iteration: 94627
loss: 1.0005695819854736,grad_norm: 0.999999318673189, iteration: 94628
loss: 1.014541506767273,grad_norm: 0.9822918072289336, iteration: 94629
loss: 0.9947717785835266,grad_norm: 0.9999991077731936, iteration: 94630
loss: 1.0148955583572388,grad_norm: 0.9999991257742346, iteration: 94631
loss: 1.0407400131225586,grad_norm: 0.9142610396297581, iteration: 94632
loss: 0.9931433796882629,grad_norm: 0.9999991423087622, iteration: 94633
loss: 1.0124891996383667,grad_norm: 0.8975928482383788, iteration: 94634
loss: 0.9812151789665222,grad_norm: 0.8897802706638783, iteration: 94635
loss: 1.0251356363296509,grad_norm: 0.9999992457425371, iteration: 94636
loss: 0.988791286945343,grad_norm: 0.9999991261864373, iteration: 94637
loss: 1.0062910318374634,grad_norm: 0.9535501082672263, iteration: 94638
loss: 1.0393214225769043,grad_norm: 0.9999993630605689, iteration: 94639
loss: 1.0043919086456299,grad_norm: 0.9999992386253161, iteration: 94640
loss: 1.012359619140625,grad_norm: 0.933459502758837, iteration: 94641
loss: 0.9842324256896973,grad_norm: 0.9999991553270711, iteration: 94642
loss: 0.9823805689811707,grad_norm: 0.9999993274157246, iteration: 94643
loss: 0.9972848296165466,grad_norm: 0.9652206601658068, iteration: 94644
loss: 0.9859199523925781,grad_norm: 0.9999993205193622, iteration: 94645
loss: 1.0054898262023926,grad_norm: 0.9879137818029244, iteration: 94646
loss: 1.0192248821258545,grad_norm: 0.9999990028921107, iteration: 94647
loss: 0.9830735921859741,grad_norm: 0.9999992247378561, iteration: 94648
loss: 1.0119433403015137,grad_norm: 0.9999992597704683, iteration: 94649
loss: 0.9826756715774536,grad_norm: 0.9392999468867316, iteration: 94650
loss: 1.0267047882080078,grad_norm: 0.9999999525144246, iteration: 94651
loss: 1.0104315280914307,grad_norm: 0.9999992168909735, iteration: 94652
loss: 0.972980797290802,grad_norm: 0.9367146945113348, iteration: 94653
loss: 0.9611666798591614,grad_norm: 0.959839759930852, iteration: 94654
loss: 1.012477159500122,grad_norm: 0.9652677819663512, iteration: 94655
loss: 1.0039616823196411,grad_norm: 0.9913652781109851, iteration: 94656
loss: 1.0198488235473633,grad_norm: 0.9999990264765289, iteration: 94657
loss: 0.983452558517456,grad_norm: 0.9999991794714734, iteration: 94658
loss: 0.978693425655365,grad_norm: 0.9956608303635924, iteration: 94659
loss: 1.024624228477478,grad_norm: 0.9891168027044698, iteration: 94660
loss: 0.9839172959327698,grad_norm: 0.9822281487494529, iteration: 94661
loss: 0.9893638491630554,grad_norm: 0.9999993024797141, iteration: 94662
loss: 0.9826930165290833,grad_norm: 0.8515631192329479, iteration: 94663
loss: 1.0162891149520874,grad_norm: 0.9999990551773714, iteration: 94664
loss: 0.9885392785072327,grad_norm: 0.9999989918091734, iteration: 94665
loss: 1.0076133012771606,grad_norm: 0.9999992090571825, iteration: 94666
loss: 1.0449447631835938,grad_norm: 0.9999989727419302, iteration: 94667
loss: 0.9841285347938538,grad_norm: 0.9999990436837497, iteration: 94668
loss: 1.001618504524231,grad_norm: 0.9863347217357405, iteration: 94669
loss: 0.9786177277565002,grad_norm: 0.900355862027792, iteration: 94670
loss: 1.0072495937347412,grad_norm: 0.9074157388494671, iteration: 94671
loss: 0.9480137825012207,grad_norm: 0.9999993382299691, iteration: 94672
loss: 0.9843908548355103,grad_norm: 0.999998990473949, iteration: 94673
loss: 1.0512306690216064,grad_norm: 0.9999992234698448, iteration: 94674
loss: 1.0249497890472412,grad_norm: 0.9999991953480094, iteration: 94675
loss: 1.011122465133667,grad_norm: 0.9999991401632585, iteration: 94676
loss: 0.9645788669586182,grad_norm: 0.8928684849634455, iteration: 94677
loss: 0.9638970494270325,grad_norm: 0.9323869006028115, iteration: 94678
loss: 0.9941513538360596,grad_norm: 0.9999990819998248, iteration: 94679
loss: 0.9937019944190979,grad_norm: 0.9743234894096674, iteration: 94680
loss: 0.9854604005813599,grad_norm: 0.9999990608140864, iteration: 94681
loss: 1.0201308727264404,grad_norm: 0.9999990681019475, iteration: 94682
loss: 0.9658963680267334,grad_norm: 0.9999992616707576, iteration: 94683
loss: 0.9986387491226196,grad_norm: 0.9999990874098043, iteration: 94684
loss: 1.0075196027755737,grad_norm: 0.9999992419431112, iteration: 94685
loss: 0.978639543056488,grad_norm: 0.9999991493078515, iteration: 94686
loss: 1.0397660732269287,grad_norm: 0.9999991865304753, iteration: 94687
loss: 0.9992681741714478,grad_norm: 0.909224670199001, iteration: 94688
loss: 0.9804499745368958,grad_norm: 0.9999990482913618, iteration: 94689
loss: 0.9937846660614014,grad_norm: 0.8962230047224681, iteration: 94690
loss: 0.9937602877616882,grad_norm: 0.9923424184469554, iteration: 94691
loss: 0.9930496215820312,grad_norm: 0.8722182783393049, iteration: 94692
loss: 0.9932872653007507,grad_norm: 0.8859293830662911, iteration: 94693
loss: 1.0238689184188843,grad_norm: 0.9999993531593199, iteration: 94694
loss: 0.9892266988754272,grad_norm: 0.9999990507690568, iteration: 94695
loss: 0.9907387495040894,grad_norm: 0.9636057281220497, iteration: 94696
loss: 1.0110818147659302,grad_norm: 0.9999991148619746, iteration: 94697
loss: 1.0156272649765015,grad_norm: 0.9999990308934754, iteration: 94698
loss: 0.994016170501709,grad_norm: 0.9999990743277829, iteration: 94699
loss: 1.0772989988327026,grad_norm: 0.9999992489127238, iteration: 94700
loss: 0.9683208465576172,grad_norm: 0.9999990286426969, iteration: 94701
loss: 0.9911738038063049,grad_norm: 0.999999393280877, iteration: 94702
loss: 1.0035268068313599,grad_norm: 0.9106104820790293, iteration: 94703
loss: 1.0009593963623047,grad_norm: 0.999999134051928, iteration: 94704
loss: 0.9623669385910034,grad_norm: 0.9999990139021232, iteration: 94705
loss: 1.0162067413330078,grad_norm: 0.9999992635065886, iteration: 94706
loss: 1.0237696170806885,grad_norm: 0.9999992426564064, iteration: 94707
loss: 1.0024135112762451,grad_norm: 0.999999089890284, iteration: 94708
loss: 0.9881994128227234,grad_norm: 0.9999992485774991, iteration: 94709
loss: 0.9990522265434265,grad_norm: 0.999999087947951, iteration: 94710
loss: 1.007959246635437,grad_norm: 0.9999991329479095, iteration: 94711
loss: 1.0030596256256104,grad_norm: 0.9999991153822244, iteration: 94712
loss: 1.0069093704223633,grad_norm: 0.999998885833896, iteration: 94713
loss: 1.0025748014450073,grad_norm: 0.9680232223727026, iteration: 94714
loss: 0.9878747463226318,grad_norm: 0.9999988676213546, iteration: 94715
loss: 0.9675474762916565,grad_norm: 0.9945906226523454, iteration: 94716
loss: 0.9789786338806152,grad_norm: 0.9999990528469188, iteration: 94717
loss: 1.0237343311309814,grad_norm: 0.9999993447515486, iteration: 94718
loss: 1.0059481859207153,grad_norm: 0.9999990801339147, iteration: 94719
loss: 1.006247878074646,grad_norm: 0.9899870156572214, iteration: 94720
loss: 0.9980321526527405,grad_norm: 0.9999991477919339, iteration: 94721
loss: 0.9875150322914124,grad_norm: 0.8849528435304671, iteration: 94722
loss: 0.9668455719947815,grad_norm: 0.9867427375203203, iteration: 94723
loss: 1.0395463705062866,grad_norm: 0.8975841317116078, iteration: 94724
loss: 1.0265213251113892,grad_norm: 0.9999991204713807, iteration: 94725
loss: 1.0068297386169434,grad_norm: 0.9999991529232702, iteration: 94726
loss: 1.014612078666687,grad_norm: 0.8766335950185682, iteration: 94727
loss: 0.9517702460289001,grad_norm: 0.9999991347715059, iteration: 94728
loss: 1.0020869970321655,grad_norm: 0.9999992370883637, iteration: 94729
loss: 1.02456533908844,grad_norm: 0.9999992149233733, iteration: 94730
loss: 1.0549031496047974,grad_norm: 0.9999991411731205, iteration: 94731
loss: 1.0111713409423828,grad_norm: 0.999999192595988, iteration: 94732
loss: 0.9885654449462891,grad_norm: 0.9275900159667435, iteration: 94733
loss: 0.9984604716300964,grad_norm: 0.9344383779950789, iteration: 94734
loss: 0.9911919236183167,grad_norm: 0.9843187535658152, iteration: 94735
loss: 0.9918375611305237,grad_norm: 0.999999160920562, iteration: 94736
loss: 1.0151675939559937,grad_norm: 0.9924705704301571, iteration: 94737
loss: 0.9856071472167969,grad_norm: 0.9999990810038532, iteration: 94738
loss: 1.0032681226730347,grad_norm: 0.9999991165613625, iteration: 94739
loss: 1.036259651184082,grad_norm: 0.9999990933173626, iteration: 94740
loss: 1.0098012685775757,grad_norm: 0.9999990705477183, iteration: 94741
loss: 1.0570075511932373,grad_norm: 0.9999994768221894, iteration: 94742
loss: 0.9727739691734314,grad_norm: 0.9999990055603997, iteration: 94743
loss: 0.9900392889976501,grad_norm: 0.9184690097669911, iteration: 94744
loss: 0.9789183139801025,grad_norm: 0.999999225282573, iteration: 94745
loss: 0.9768985509872437,grad_norm: 0.9999991537038927, iteration: 94746
loss: 1.0037105083465576,grad_norm: 0.9999990728224404, iteration: 94747
loss: 1.0601565837860107,grad_norm: 0.9999996433859657, iteration: 94748
loss: 0.9889373183250427,grad_norm: 0.9456709824744768, iteration: 94749
loss: 1.0089739561080933,grad_norm: 0.9503727825363446, iteration: 94750
loss: 1.020346999168396,grad_norm: 0.999999060358933, iteration: 94751
loss: 0.9759663343429565,grad_norm: 0.999998947065048, iteration: 94752
loss: 1.0053585767745972,grad_norm: 0.9999991029876784, iteration: 94753
loss: 1.0263005495071411,grad_norm: 0.9999992481949116, iteration: 94754
loss: 0.9765148758888245,grad_norm: 0.999998968161206, iteration: 94755
loss: 1.023476243019104,grad_norm: 0.9999990682627444, iteration: 94756
loss: 1.0023155212402344,grad_norm: 0.9999990936990532, iteration: 94757
loss: 1.0071513652801514,grad_norm: 0.9963166873281788, iteration: 94758
loss: 1.000722885131836,grad_norm: 0.9999992205159264, iteration: 94759
loss: 1.006899118423462,grad_norm: 0.9999991353616051, iteration: 94760
loss: 1.0441774129867554,grad_norm: 0.9999990490785278, iteration: 94761
loss: 1.009726643562317,grad_norm: 0.9587391830579217, iteration: 94762
loss: 0.9873008131980896,grad_norm: 0.9775897517593245, iteration: 94763
loss: 0.9667407274246216,grad_norm: 0.9999991657927171, iteration: 94764
loss: 1.0191386938095093,grad_norm: 0.9999992642075509, iteration: 94765
loss: 0.9308783411979675,grad_norm: 0.9999992094319422, iteration: 94766
loss: 0.9755008220672607,grad_norm: 0.9999991136916714, iteration: 94767
loss: 1.0118517875671387,grad_norm: 0.9999990146207147, iteration: 94768
loss: 1.0388480424880981,grad_norm: 0.9251749187798536, iteration: 94769
loss: 0.9716220498085022,grad_norm: 0.9999990440460411, iteration: 94770
loss: 0.9688493609428406,grad_norm: 0.999998981521244, iteration: 94771
loss: 1.0010887384414673,grad_norm: 0.9999993000860297, iteration: 94772
loss: 0.9896805882453918,grad_norm: 0.9999992165387119, iteration: 94773
loss: 0.9728116989135742,grad_norm: 0.9999991330526588, iteration: 94774
loss: 1.0041500329971313,grad_norm: 0.9999992045975787, iteration: 94775
loss: 1.0048925876617432,grad_norm: 0.9999990736674155, iteration: 94776
loss: 0.9759766459465027,grad_norm: 0.9999990079638601, iteration: 94777
loss: 1.032370924949646,grad_norm: 0.9999991162727955, iteration: 94778
loss: 1.0130525827407837,grad_norm: 0.9999989671368938, iteration: 94779
loss: 0.9990343451499939,grad_norm: 0.999999196724921, iteration: 94780
loss: 0.9903389811515808,grad_norm: 0.9923209834175493, iteration: 94781
loss: 0.9706662893295288,grad_norm: 0.9999989682022404, iteration: 94782
loss: 0.9605671763420105,grad_norm: 0.9999990286449745, iteration: 94783
loss: 0.9476616382598877,grad_norm: 0.9999990080113981, iteration: 94784
loss: 1.0020831823349,grad_norm: 0.9138638348548273, iteration: 94785
loss: 0.9971857666969299,grad_norm: 0.9999990618065887, iteration: 94786
loss: 1.0079731941223145,grad_norm: 0.9999991345892677, iteration: 94787
loss: 1.0408906936645508,grad_norm: 0.9999996224820512, iteration: 94788
loss: 0.9838221669197083,grad_norm: 0.8935790846364333, iteration: 94789
loss: 0.972167432308197,grad_norm: 0.999999272755787, iteration: 94790
loss: 0.9792691469192505,grad_norm: 0.8808324901975108, iteration: 94791
loss: 0.9693716764450073,grad_norm: 0.9999992355790697, iteration: 94792
loss: 0.9687732458114624,grad_norm: 0.9999993037494165, iteration: 94793
loss: 1.015622854232788,grad_norm: 0.9999992978003239, iteration: 94794
loss: 0.978578507900238,grad_norm: 0.9048213753391089, iteration: 94795
loss: 0.9890480041503906,grad_norm: 0.9654943948594309, iteration: 94796
loss: 0.9638586640357971,grad_norm: 0.9999992501421895, iteration: 94797
loss: 0.9291225671768188,grad_norm: 0.9999990209178918, iteration: 94798
loss: 0.97919100522995,grad_norm: 0.9999991754097836, iteration: 94799
loss: 1.0186110734939575,grad_norm: 0.9010233282247588, iteration: 94800
loss: 0.970221221446991,grad_norm: 0.9999991610704977, iteration: 94801
loss: 1.0022000074386597,grad_norm: 0.9999992833288734, iteration: 94802
loss: 0.9791215658187866,grad_norm: 0.9075791711183592, iteration: 94803
loss: 0.9802839159965515,grad_norm: 0.9999990020998789, iteration: 94804
loss: 0.9723955392837524,grad_norm: 0.942594627812299, iteration: 94805
loss: 1.0272598266601562,grad_norm: 0.9999991676823359, iteration: 94806
loss: 0.952690064907074,grad_norm: 0.9340590387955795, iteration: 94807
loss: 1.0298893451690674,grad_norm: 0.9786089679854636, iteration: 94808
loss: 0.9628357291221619,grad_norm: 0.9999991230699266, iteration: 94809
loss: 0.9591608643531799,grad_norm: 0.9999991029918875, iteration: 94810
loss: 1.0198400020599365,grad_norm: 0.9999995937636613, iteration: 94811
loss: 1.0383957624435425,grad_norm: 0.9998114136460142, iteration: 94812
loss: 1.0229990482330322,grad_norm: 0.9999991598924808, iteration: 94813
loss: 0.9812449812889099,grad_norm: 0.9999991741118552, iteration: 94814
loss: 0.9960794448852539,grad_norm: 0.999999000761524, iteration: 94815
loss: 0.963699996471405,grad_norm: 0.9999990762380809, iteration: 94816
loss: 0.974431037902832,grad_norm: 0.9999990867576877, iteration: 94817
loss: 0.9442440271377563,grad_norm: 0.9999990225832225, iteration: 94818
loss: 0.9832960963249207,grad_norm: 0.9999991089940639, iteration: 94819
loss: 1.0153815746307373,grad_norm: 0.9999991185089312, iteration: 94820
loss: 1.0137019157409668,grad_norm: 0.9999993234476088, iteration: 94821
loss: 1.0137109756469727,grad_norm: 0.998945239404891, iteration: 94822
loss: 1.0481150150299072,grad_norm: 0.914682656659778, iteration: 94823
loss: 0.9748517870903015,grad_norm: 0.9651033875632955, iteration: 94824
loss: 1.0103049278259277,grad_norm: 0.804846378923891, iteration: 94825
loss: 1.0172070264816284,grad_norm: 0.8787496154440156, iteration: 94826
loss: 1.0064986944198608,grad_norm: 0.9913336952916437, iteration: 94827
loss: 0.9966033697128296,grad_norm: 0.999998960830319, iteration: 94828
loss: 0.9568495750427246,grad_norm: 0.917314620950243, iteration: 94829
loss: 1.0140422582626343,grad_norm: 0.9999990999334656, iteration: 94830
loss: 1.0353507995605469,grad_norm: 0.9999991348962374, iteration: 94831
loss: 0.9913129210472107,grad_norm: 0.9985805064362044, iteration: 94832
loss: 0.9726851582527161,grad_norm: 0.9999989345284478, iteration: 94833
loss: 0.9799061417579651,grad_norm: 0.9999990814790323, iteration: 94834
loss: 1.0277035236358643,grad_norm: 0.9999992748410351, iteration: 94835
loss: 0.9880755543708801,grad_norm: 0.8839869089581326, iteration: 94836
loss: 0.9969075322151184,grad_norm: 0.8956580802662876, iteration: 94837
loss: 1.0106500387191772,grad_norm: 0.9813492028809822, iteration: 94838
loss: 1.0863276720046997,grad_norm: 0.9999996929163015, iteration: 94839
loss: 1.0311182737350464,grad_norm: 0.9999992337566125, iteration: 94840
loss: 1.00167977809906,grad_norm: 0.9999994960586651, iteration: 94841
loss: 0.977130115032196,grad_norm: 0.9999991468606213, iteration: 94842
loss: 1.0294963121414185,grad_norm: 0.9999992087473634, iteration: 94843
loss: 0.9979468584060669,grad_norm: 0.999999067517553, iteration: 94844
loss: 1.01815664768219,grad_norm: 0.9999989495259916, iteration: 94845
loss: 1.0120398998260498,grad_norm: 0.9999990541777648, iteration: 94846
loss: 0.9981266856193542,grad_norm: 0.9999991883758075, iteration: 94847
loss: 0.9815807342529297,grad_norm: 0.9999990834547229, iteration: 94848
loss: 1.0254608392715454,grad_norm: 0.9999993132906372, iteration: 94849
loss: 0.995826005935669,grad_norm: 0.9999991140718503, iteration: 94850
loss: 0.9795417785644531,grad_norm: 0.9999991406948662, iteration: 94851
loss: 1.0079498291015625,grad_norm: 0.9999989720941461, iteration: 94852
loss: 1.0217968225479126,grad_norm: 0.8302671921596462, iteration: 94853
loss: 0.9815304279327393,grad_norm: 0.9999991933207154, iteration: 94854
loss: 0.9953963756561279,grad_norm: 0.9999989677518044, iteration: 94855
loss: 0.9952726364135742,grad_norm: 0.9999990935791263, iteration: 94856
loss: 0.976442813873291,grad_norm: 0.999999022259779, iteration: 94857
loss: 1.010256290435791,grad_norm: 0.9999992119271558, iteration: 94858
loss: 1.1247390508651733,grad_norm: 0.9999990172300601, iteration: 94859
loss: 0.9870789647102356,grad_norm: 0.9999992298447582, iteration: 94860
loss: 0.9988104701042175,grad_norm: 0.9999990951932698, iteration: 94861
loss: 0.9781317710876465,grad_norm: 0.9411069726409537, iteration: 94862
loss: 0.9914465546607971,grad_norm: 0.9952251045042724, iteration: 94863
loss: 1.0341575145721436,grad_norm: 0.9393648041017102, iteration: 94864
loss: 1.004814863204956,grad_norm: 0.9999991944781487, iteration: 94865
loss: 0.9896304607391357,grad_norm: 0.9999996491806434, iteration: 94866
loss: 1.0588666200637817,grad_norm: 0.9999993184449204, iteration: 94867
loss: 0.9990816712379456,grad_norm: 0.941999530378084, iteration: 94868
loss: 0.9923815131187439,grad_norm: 0.9999991182913269, iteration: 94869
loss: 1.0122692584991455,grad_norm: 0.8827136006955314, iteration: 94870
loss: 0.9579835534095764,grad_norm: 0.9999990663787777, iteration: 94871
loss: 1.0035322904586792,grad_norm: 0.9143150931687989, iteration: 94872
loss: 1.0048470497131348,grad_norm: 0.9836012634783383, iteration: 94873
loss: 0.9863620400428772,grad_norm: 0.9999992148795978, iteration: 94874
loss: 1.0057497024536133,grad_norm: 0.9999990559378448, iteration: 94875
loss: 1.2178195714950562,grad_norm: 0.9999995744579944, iteration: 94876
loss: 1.0266318321228027,grad_norm: 0.9999991116110993, iteration: 94877
loss: 0.9939510226249695,grad_norm: 0.9999992565012805, iteration: 94878
loss: 0.9773625731468201,grad_norm: 0.999999095756483, iteration: 94879
loss: 0.9722062945365906,grad_norm: 0.9999991788439101, iteration: 94880
loss: 1.0165637731552124,grad_norm: 0.9999991893638077, iteration: 94881
loss: 1.0588572025299072,grad_norm: 0.9960339507572888, iteration: 94882
loss: 1.0056277513504028,grad_norm: 0.9999989932051552, iteration: 94883
loss: 1.023025631904602,grad_norm: 0.9999991039374736, iteration: 94884
loss: 0.9918565154075623,grad_norm: 0.9967808909564104, iteration: 94885
loss: 0.9476453065872192,grad_norm: 0.9999991745962106, iteration: 94886
loss: 1.0141373872756958,grad_norm: 0.9999992421205535, iteration: 94887
loss: 0.9297489523887634,grad_norm: 0.9753290011915939, iteration: 94888
loss: 1.0523415803909302,grad_norm: 0.9999992153315957, iteration: 94889
loss: 1.0172494649887085,grad_norm: 0.8919977278722644, iteration: 94890
loss: 1.0117682218551636,grad_norm: 0.999999136928246, iteration: 94891
loss: 0.9586607217788696,grad_norm: 0.9999989717295195, iteration: 94892
loss: 1.0471693277359009,grad_norm: 0.9937516300921693, iteration: 94893
loss: 0.9961513876914978,grad_norm: 0.9282054879823887, iteration: 94894
loss: 0.9833005666732788,grad_norm: 0.9664410293084089, iteration: 94895
loss: 1.0311452150344849,grad_norm: 0.9628600701508906, iteration: 94896
loss: 1.0157607793807983,grad_norm: 0.9999990843309323, iteration: 94897
loss: 0.9632617235183716,grad_norm: 0.9999990443519841, iteration: 94898
loss: 0.9497606158256531,grad_norm: 0.9999991831378424, iteration: 94899
loss: 1.0194326639175415,grad_norm: 0.9793666480282279, iteration: 94900
loss: 0.9814648628234863,grad_norm: 0.9999993276476011, iteration: 94901
loss: 0.9714788794517517,grad_norm: 0.9999989978367219, iteration: 94902
loss: 1.0175082683563232,grad_norm: 0.9999990520258597, iteration: 94903
loss: 1.0088735818862915,grad_norm: 0.9999991377526137, iteration: 94904
loss: 1.0391223430633545,grad_norm: 0.999999268199795, iteration: 94905
loss: 0.9901405572891235,grad_norm: 0.9999991299587933, iteration: 94906
loss: 0.9679986238479614,grad_norm: 0.9999991099460928, iteration: 94907
loss: 1.018605351448059,grad_norm: 0.9999991223470868, iteration: 94908
loss: 0.9797046780586243,grad_norm: 0.9866172698960447, iteration: 94909
loss: 1.0307483673095703,grad_norm: 0.9999993008605531, iteration: 94910
loss: 1.0095547437667847,grad_norm: 0.9999992860019278, iteration: 94911
loss: 1.0162872076034546,grad_norm: 0.9999990059499857, iteration: 94912
loss: 1.0210877656936646,grad_norm: 0.9691775244552027, iteration: 94913
loss: 1.0299789905548096,grad_norm: 0.9999989931003127, iteration: 94914
loss: 1.0243502855300903,grad_norm: 0.9999991112256584, iteration: 94915
loss: 0.9905811548233032,grad_norm: 0.9981686925775307, iteration: 94916
loss: 1.022899866104126,grad_norm: 0.999999399474711, iteration: 94917
loss: 0.9856936931610107,grad_norm: 0.999999154482807, iteration: 94918
loss: 0.9840123653411865,grad_norm: 0.9999991382779931, iteration: 94919
loss: 1.0465096235275269,grad_norm: 0.970128755880881, iteration: 94920
loss: 0.998731791973114,grad_norm: 0.9999990346253443, iteration: 94921
loss: 1.0418716669082642,grad_norm: 0.9999992776879292, iteration: 94922
loss: 1.0472971200942993,grad_norm: 0.9999991414304575, iteration: 94923
loss: 1.012176752090454,grad_norm: 0.9999991276547652, iteration: 94924
loss: 0.9953300952911377,grad_norm: 0.9355115370181235, iteration: 94925
loss: 0.9720129370689392,grad_norm: 0.8010888892163535, iteration: 94926
loss: 0.9791578650474548,grad_norm: 0.9999991004062673, iteration: 94927
loss: 1.0056155920028687,grad_norm: 0.9999990761340015, iteration: 94928
loss: 0.9767970442771912,grad_norm: 0.9999990601751125, iteration: 94929
loss: 0.9892244338989258,grad_norm: 0.9999991078183262, iteration: 94930
loss: 0.9827321767807007,grad_norm: 0.9999991432959185, iteration: 94931
loss: 0.971727728843689,grad_norm: 0.9999990723650034, iteration: 94932
loss: 1.029421091079712,grad_norm: 0.9999997050716354, iteration: 94933
loss: 0.9992680549621582,grad_norm: 0.9550579569839139, iteration: 94934
loss: 1.008327841758728,grad_norm: 0.9999992755972136, iteration: 94935
loss: 1.0090113878250122,grad_norm: 0.9999990747864235, iteration: 94936
loss: 1.0077937841415405,grad_norm: 0.9999991824308342, iteration: 94937
loss: 1.0443607568740845,grad_norm: 0.9999990643322965, iteration: 94938
loss: 1.0106748342514038,grad_norm: 0.9433798802323126, iteration: 94939
loss: 0.970177173614502,grad_norm: 0.9999991520655525, iteration: 94940
loss: 1.0615224838256836,grad_norm: 0.9999991950244819, iteration: 94941
loss: 1.01149582862854,grad_norm: 0.9999991929612747, iteration: 94942
loss: 0.9955872297286987,grad_norm: 0.9999990679103082, iteration: 94943
loss: 0.9810081720352173,grad_norm: 0.9999989689785996, iteration: 94944
loss: 1.023022174835205,grad_norm: 0.9999992736348786, iteration: 94945
loss: 0.9696474671363831,grad_norm: 0.9999989590853507, iteration: 94946
loss: 0.9865128993988037,grad_norm: 0.9999992202559173, iteration: 94947
loss: 1.0327296257019043,grad_norm: 0.9999990330689663, iteration: 94948
loss: 0.9933584928512573,grad_norm: 0.8691657846127238, iteration: 94949
loss: 0.9981254935264587,grad_norm: 0.9674332300230654, iteration: 94950
loss: 1.0163381099700928,grad_norm: 0.9999990505404999, iteration: 94951
loss: 1.008582592010498,grad_norm: 0.931534986866526, iteration: 94952
loss: 0.9892950057983398,grad_norm: 0.9999991570414375, iteration: 94953
loss: 0.9938721060752869,grad_norm: 0.9999989584912162, iteration: 94954
loss: 1.0231695175170898,grad_norm: 0.9469923421273233, iteration: 94955
loss: 0.9948813319206238,grad_norm: 0.8115903084972046, iteration: 94956
loss: 1.0356329679489136,grad_norm: 0.999999269611487, iteration: 94957
loss: 0.9912074208259583,grad_norm: 0.8886788007649844, iteration: 94958
loss: 1.006890058517456,grad_norm: 0.9259206576132087, iteration: 94959
loss: 0.9906154870986938,grad_norm: 0.8239080145686357, iteration: 94960
loss: 1.0448213815689087,grad_norm: 0.999999178088407, iteration: 94961
loss: 0.9945244789123535,grad_norm: 0.9537239639703013, iteration: 94962
loss: 1.0189186334609985,grad_norm: 0.9999990281434682, iteration: 94963
loss: 0.9991163611412048,grad_norm: 0.9999990631321972, iteration: 94964
loss: 0.9909906387329102,grad_norm: 0.9999991106993836, iteration: 94965
loss: 1.0151928663253784,grad_norm: 0.9691488836305204, iteration: 94966
loss: 0.9963015913963318,grad_norm: 0.9909589944927645, iteration: 94967
loss: 1.0248998403549194,grad_norm: 0.9902374388709354, iteration: 94968
loss: 1.0335607528686523,grad_norm: 0.920236273236829, iteration: 94969
loss: 0.9808401465415955,grad_norm: 0.9974988691274753, iteration: 94970
loss: 1.0436384677886963,grad_norm: 0.9999992343497534, iteration: 94971
loss: 1.0219151973724365,grad_norm: 0.9901335108998054, iteration: 94972
loss: 1.0132043361663818,grad_norm: 0.9966684070897286, iteration: 94973
loss: 0.9727174043655396,grad_norm: 0.9172218442171662, iteration: 94974
loss: 1.0066914558410645,grad_norm: 0.9999990290436658, iteration: 94975
loss: 1.0219157934188843,grad_norm: 0.99607430353317, iteration: 94976
loss: 1.0222077369689941,grad_norm: 0.999999097010233, iteration: 94977
loss: 0.9647220969200134,grad_norm: 0.9999991959888818, iteration: 94978
loss: 1.0106372833251953,grad_norm: 0.9927743891497225, iteration: 94979
loss: 1.0156034231185913,grad_norm: 0.9999994394616029, iteration: 94980
loss: 1.0244823694229126,grad_norm: 0.9630029835459835, iteration: 94981
loss: 1.0238487720489502,grad_norm: 0.9365405247031813, iteration: 94982
loss: 0.9916714429855347,grad_norm: 0.9999990842808617, iteration: 94983
loss: 1.1195077896118164,grad_norm: 0.9999999264756668, iteration: 94984
loss: 0.9522320628166199,grad_norm: 0.999999272893835, iteration: 94985
loss: 1.0283396244049072,grad_norm: 0.953699291944486, iteration: 94986
loss: 0.9953224658966064,grad_norm: 0.9999991805424628, iteration: 94987
loss: 0.9769327640533447,grad_norm: 0.9618480545477521, iteration: 94988
loss: 1.0107417106628418,grad_norm: 0.9605396319519638, iteration: 94989
loss: 1.00682532787323,grad_norm: 0.9999991265234219, iteration: 94990
loss: 0.9809193015098572,grad_norm: 0.9999992785097246, iteration: 94991
loss: 1.0500218868255615,grad_norm: 0.99704141727125, iteration: 94992
loss: 1.0416302680969238,grad_norm: 0.9999990888992608, iteration: 94993
loss: 1.0344080924987793,grad_norm: 0.9999992009189275, iteration: 94994
loss: 1.00484299659729,grad_norm: 0.9999990762527806, iteration: 94995
loss: 1.0456092357635498,grad_norm: 0.9999992610896525, iteration: 94996
loss: 0.976482629776001,grad_norm: 0.9999990888246284, iteration: 94997
loss: 1.0039007663726807,grad_norm: 0.9099804532821812, iteration: 94998
loss: 0.9908061623573303,grad_norm: 0.933934305969112, iteration: 94999
loss: 0.9923981428146362,grad_norm: 0.9999991147380144, iteration: 95000
loss: 0.9838414192199707,grad_norm: 0.9389928304937203, iteration: 95001
loss: 1.0165960788726807,grad_norm: 0.999999098579112, iteration: 95002
loss: 1.013998031616211,grad_norm: 0.8233422236405099, iteration: 95003
loss: 0.9920583367347717,grad_norm: 0.9274663953318342, iteration: 95004
loss: 0.9847314953804016,grad_norm: 0.9999991712048975, iteration: 95005
loss: 0.9507390856742859,grad_norm: 0.9860570825875524, iteration: 95006
loss: 0.9921815395355225,grad_norm: 0.9999991062117546, iteration: 95007
loss: 0.9692753553390503,grad_norm: 0.8695905475025497, iteration: 95008
loss: 0.9822531342506409,grad_norm: 0.8999023872853407, iteration: 95009
loss: 1.0087100267410278,grad_norm: 0.9999991568161832, iteration: 95010
loss: 0.9741494059562683,grad_norm: 0.9381585730923416, iteration: 95011
loss: 1.017771601676941,grad_norm: 0.9501365077912217, iteration: 95012
loss: 1.0077792406082153,grad_norm: 0.9873186826318, iteration: 95013
loss: 1.0182338953018188,grad_norm: 0.9256561748979877, iteration: 95014
loss: 0.999125599861145,grad_norm: 0.9999990044142649, iteration: 95015
loss: 0.9977996945381165,grad_norm: 0.9226621938976948, iteration: 95016
loss: 1.0105760097503662,grad_norm: 0.9999991244421246, iteration: 95017
loss: 0.9855141639709473,grad_norm: 0.9999992271153101, iteration: 95018
loss: 1.0018953084945679,grad_norm: 0.9270357177561362, iteration: 95019
loss: 0.9416976571083069,grad_norm: 0.9999990876681093, iteration: 95020
loss: 0.9928657412528992,grad_norm: 0.9664938550411661, iteration: 95021
loss: 0.9838810563087463,grad_norm: 0.9319372133057912, iteration: 95022
loss: 1.0213661193847656,grad_norm: 0.9999990381786118, iteration: 95023
loss: 1.0064748525619507,grad_norm: 0.9999989737709593, iteration: 95024
loss: 1.0126969814300537,grad_norm: 0.9774041042258234, iteration: 95025
loss: 1.0134540796279907,grad_norm: 0.9999990841080112, iteration: 95026
loss: 1.0252496004104614,grad_norm: 0.8838318864509734, iteration: 95027
loss: 0.9817848801612854,grad_norm: 0.9999990212627642, iteration: 95028
loss: 0.982823371887207,grad_norm: 0.9877062246196664, iteration: 95029
loss: 0.9686300754547119,grad_norm: 0.9999992415358369, iteration: 95030
loss: 0.9897756576538086,grad_norm: 0.9999990971145138, iteration: 95031
loss: 1.0052498579025269,grad_norm: 0.9999990574185624, iteration: 95032
loss: 0.9810678362846375,grad_norm: 0.9999993516438244, iteration: 95033
loss: 0.9791098237037659,grad_norm: 0.9201037439992739, iteration: 95034
loss: 0.9618197679519653,grad_norm: 0.999999002654423, iteration: 95035
loss: 1.0091900825500488,grad_norm: 0.8917175150332021, iteration: 95036
loss: 1.007569670677185,grad_norm: 0.9999990604672323, iteration: 95037
loss: 0.9703335165977478,grad_norm: 0.9999991329098679, iteration: 95038
loss: 0.9736849665641785,grad_norm: 0.9999989913600169, iteration: 95039
loss: 0.9811890125274658,grad_norm: 0.848815040943414, iteration: 95040
loss: 1.0048705339431763,grad_norm: 0.8866584875597131, iteration: 95041
loss: 1.0203778743743896,grad_norm: 0.9999992278354657, iteration: 95042
loss: 0.9851696491241455,grad_norm: 0.955514122805389, iteration: 95043
loss: 0.9911684393882751,grad_norm: 0.9591075197702758, iteration: 95044
loss: 0.9882153868675232,grad_norm: 0.9999991130995979, iteration: 95045
loss: 1.0134904384613037,grad_norm: 0.9220420271239117, iteration: 95046
loss: 1.0036697387695312,grad_norm: 0.9211141803750835, iteration: 95047
loss: 1.017326831817627,grad_norm: 0.9999989804949359, iteration: 95048
loss: 0.9876842498779297,grad_norm: 0.999999063315372, iteration: 95049
loss: 0.9675232172012329,grad_norm: 0.9999989496092012, iteration: 95050
loss: 1.0006054639816284,grad_norm: 0.9999991386850233, iteration: 95051
loss: 1.009596586227417,grad_norm: 0.9999992476300018, iteration: 95052
loss: 0.947003185749054,grad_norm: 0.9999989443506511, iteration: 95053
loss: 0.9889349341392517,grad_norm: 0.9999990737658845, iteration: 95054
loss: 1.0027779340744019,grad_norm: 0.9999991021068408, iteration: 95055
loss: 1.0245901346206665,grad_norm: 0.939046454210905, iteration: 95056
loss: 1.0180636644363403,grad_norm: 0.9999991366808821, iteration: 95057
loss: 1.0028698444366455,grad_norm: 0.9999993304800727, iteration: 95058
loss: 1.0194905996322632,grad_norm: 0.9999991620340742, iteration: 95059
loss: 0.9829643368721008,grad_norm: 0.9848367160082167, iteration: 95060
loss: 0.9996548891067505,grad_norm: 0.9999993895276147, iteration: 95061
loss: 0.977570652961731,grad_norm: 0.9999990131955864, iteration: 95062
loss: 1.019622564315796,grad_norm: 0.9999991158876235, iteration: 95063
loss: 0.973686158657074,grad_norm: 0.9999991266459142, iteration: 95064
loss: 1.0304923057556152,grad_norm: 0.9151432811654995, iteration: 95065
loss: 1.0015411376953125,grad_norm: 0.9999990694290924, iteration: 95066
loss: 0.9884876012802124,grad_norm: 0.9999992618845397, iteration: 95067
loss: 1.0046846866607666,grad_norm: 0.9760830297585433, iteration: 95068
loss: 1.0443304777145386,grad_norm: 0.9999991431308065, iteration: 95069
loss: 0.995216965675354,grad_norm: 0.9999992076461053, iteration: 95070
loss: 1.0102362632751465,grad_norm: 0.99999924211928, iteration: 95071
loss: 0.9944143295288086,grad_norm: 0.8178990005402152, iteration: 95072
loss: 1.0382615327835083,grad_norm: 0.9999989892740825, iteration: 95073
loss: 1.049389362335205,grad_norm: 0.999999123877252, iteration: 95074
loss: 0.9852415919303894,grad_norm: 0.999999096278828, iteration: 95075
loss: 0.9674249291419983,grad_norm: 0.9999990247325676, iteration: 95076
loss: 0.9562578797340393,grad_norm: 0.9999990706788063, iteration: 95077
loss: 0.983447790145874,grad_norm: 0.9999991859996814, iteration: 95078
loss: 0.9695264101028442,grad_norm: 0.94789233579364, iteration: 95079
loss: 0.9760896563529968,grad_norm: 0.9256780227845474, iteration: 95080
loss: 1.0284388065338135,grad_norm: 0.9999991138688944, iteration: 95081
loss: 1.034271240234375,grad_norm: 0.9999992523901873, iteration: 95082
loss: 1.0042343139648438,grad_norm: 0.9999991120198115, iteration: 95083
loss: 1.002164363861084,grad_norm: 0.8792114259816828, iteration: 95084
loss: 1.0070985555648804,grad_norm: 0.9999990131149156, iteration: 95085
loss: 1.0366274118423462,grad_norm: 0.9999989706118362, iteration: 95086
loss: 1.0154612064361572,grad_norm: 0.9999991585754993, iteration: 95087
loss: 0.9859927296638489,grad_norm: 0.99999903459129, iteration: 95088
loss: 0.9935175776481628,grad_norm: 0.9649528232962938, iteration: 95089
loss: 0.9769288897514343,grad_norm: 0.999999103574935, iteration: 95090
loss: 1.007203221321106,grad_norm: 0.9999990628965096, iteration: 95091
loss: 0.9916196465492249,grad_norm: 0.9999990752976687, iteration: 95092
loss: 1.0188724994659424,grad_norm: 0.8547981453089363, iteration: 95093
loss: 0.9629677534103394,grad_norm: 0.9446026377835179, iteration: 95094
loss: 1.0137057304382324,grad_norm: 0.9999992868027184, iteration: 95095
loss: 1.0403587818145752,grad_norm: 0.9416207488257682, iteration: 95096
loss: 0.9858123064041138,grad_norm: 0.9999990115451579, iteration: 95097
loss: 1.0115054845809937,grad_norm: 0.9999991492712272, iteration: 95098
loss: 1.0111268758773804,grad_norm: 0.9999990555855178, iteration: 95099
loss: 1.0035998821258545,grad_norm: 0.9999991191522518, iteration: 95100
loss: 1.0050183534622192,grad_norm: 0.999999134863414, iteration: 95101
loss: 1.027579426765442,grad_norm: 0.8811576302656303, iteration: 95102
loss: 1.0164482593536377,grad_norm: 0.9817569024202669, iteration: 95103
loss: 0.9877235889434814,grad_norm: 0.9982819004373727, iteration: 95104
loss: 0.9781985282897949,grad_norm: 0.9508708522293136, iteration: 95105
loss: 0.983485996723175,grad_norm: 0.9726290922824973, iteration: 95106
loss: 1.006851315498352,grad_norm: 0.9999992301513999, iteration: 95107
loss: 1.0187281370162964,grad_norm: 0.9999991812809458, iteration: 95108
loss: 0.9475643038749695,grad_norm: 0.9999990713343594, iteration: 95109
loss: 0.9968941807746887,grad_norm: 0.9954260238697906, iteration: 95110
loss: 1.0102390050888062,grad_norm: 0.9999990528302793, iteration: 95111
loss: 1.0424580574035645,grad_norm: 0.9092291312769266, iteration: 95112
loss: 1.0054620504379272,grad_norm: 0.9999989932167304, iteration: 95113
loss: 0.9870851039886475,grad_norm: 0.8527996032030369, iteration: 95114
loss: 0.9979168772697449,grad_norm: 0.9999989376798756, iteration: 95115
loss: 0.9909906983375549,grad_norm: 0.9694935704065839, iteration: 95116
loss: 0.9731965065002441,grad_norm: 0.9749357674995667, iteration: 95117
loss: 1.0182362794876099,grad_norm: 0.9126611045107166, iteration: 95118
loss: 1.0042164325714111,grad_norm: 0.9999991437815736, iteration: 95119
loss: 0.9992204904556274,grad_norm: 0.9999990643902276, iteration: 95120
loss: 0.9938040971755981,grad_norm: 0.9999991529605577, iteration: 95121
loss: 0.9678358435630798,grad_norm: 0.9999991097619064, iteration: 95122
loss: 1.0334880352020264,grad_norm: 0.9999991990038946, iteration: 95123
loss: 0.9863993525505066,grad_norm: 0.9999993135216613, iteration: 95124
loss: 1.0238059759140015,grad_norm: 0.9999991891319198, iteration: 95125
loss: 0.9851147532463074,grad_norm: 0.9999991666612393, iteration: 95126
loss: 1.005702257156372,grad_norm: 0.9404407036441149, iteration: 95127
loss: 1.0155787467956543,grad_norm: 0.9542811434025101, iteration: 95128
loss: 0.9581195116043091,grad_norm: 0.9554735637210571, iteration: 95129
loss: 0.9884195327758789,grad_norm: 0.9999992018536031, iteration: 95130
loss: 1.0065171718597412,grad_norm: 0.9999992215885926, iteration: 95131
loss: 1.015668272972107,grad_norm: 0.9999990164336992, iteration: 95132
loss: 0.9734085202217102,grad_norm: 0.953340113402641, iteration: 95133
loss: 1.0190174579620361,grad_norm: 0.9999999546773006, iteration: 95134
loss: 1.0325711965560913,grad_norm: 0.9999992719308024, iteration: 95135
loss: 1.0255870819091797,grad_norm: 0.9999991291764286, iteration: 95136
loss: 1.0117591619491577,grad_norm: 0.999999211248837, iteration: 95137
loss: 0.99831622838974,grad_norm: 0.9958114836115509, iteration: 95138
loss: 1.0382815599441528,grad_norm: 0.9999993176519767, iteration: 95139
loss: 0.9735474586486816,grad_norm: 0.9999991257487233, iteration: 95140
loss: 1.0217721462249756,grad_norm: 0.9999992214304767, iteration: 95141
loss: 0.9532000422477722,grad_norm: 0.8399330088942447, iteration: 95142
loss: 1.0241358280181885,grad_norm: 0.9999991815487252, iteration: 95143
loss: 0.9977710843086243,grad_norm: 0.9999992537706676, iteration: 95144
loss: 0.9632078409194946,grad_norm: 0.9999990068998824, iteration: 95145
loss: 1.0064369440078735,grad_norm: 0.898031101119601, iteration: 95146
loss: 0.9739981889724731,grad_norm: 0.999999215656401, iteration: 95147
loss: 0.9711217880249023,grad_norm: 0.9379080125434602, iteration: 95148
loss: 0.9941580891609192,grad_norm: 0.9999990536676097, iteration: 95149
loss: 0.9795128703117371,grad_norm: 0.9999991400165398, iteration: 95150
loss: 0.9810482859611511,grad_norm: 0.9999991886293854, iteration: 95151
loss: 0.9747641086578369,grad_norm: 0.9999991819245639, iteration: 95152
loss: 0.9725601077079773,grad_norm: 0.9999992021953531, iteration: 95153
loss: 1.0048015117645264,grad_norm: 0.9999996332828825, iteration: 95154
loss: 1.0043306350708008,grad_norm: 0.9999991963488697, iteration: 95155
loss: 1.0285274982452393,grad_norm: 0.9999991591481218, iteration: 95156
loss: 1.0129094123840332,grad_norm: 0.8922256117794144, iteration: 95157
loss: 0.9825228452682495,grad_norm: 0.999999214764324, iteration: 95158
loss: 0.9922890067100525,grad_norm: 0.9490697747740867, iteration: 95159
loss: 1.008332371711731,grad_norm: 0.8773539499599546, iteration: 95160
loss: 0.9603871703147888,grad_norm: 0.9999993185842688, iteration: 95161
loss: 1.0027334690093994,grad_norm: 0.9999990594250858, iteration: 95162
loss: 0.9985182881355286,grad_norm: 0.9999990795623313, iteration: 95163
loss: 1.0231688022613525,grad_norm: 0.9659614519533876, iteration: 95164
loss: 0.9739633202552795,grad_norm: 0.9999991819846135, iteration: 95165
loss: 0.97020423412323,grad_norm: 0.9999991814094347, iteration: 95166
loss: 1.0287226438522339,grad_norm: 0.9999990174611622, iteration: 95167
loss: 0.9954343438148499,grad_norm: 0.9992579901207173, iteration: 95168
loss: 1.0158265829086304,grad_norm: 0.9999992748286702, iteration: 95169
loss: 0.9630129933357239,grad_norm: 0.9999991337560888, iteration: 95170
loss: 0.9785564541816711,grad_norm: 0.9999992810664792, iteration: 95171
loss: 0.9907029271125793,grad_norm: 0.9999992283812057, iteration: 95172
loss: 1.0035102367401123,grad_norm: 0.8617889171253943, iteration: 95173
loss: 1.0079915523529053,grad_norm: 0.999999238497883, iteration: 95174
loss: 1.0065659284591675,grad_norm: 0.8326967180446976, iteration: 95175
loss: 1.0031007528305054,grad_norm: 0.9999993037583019, iteration: 95176
loss: 0.9886181950569153,grad_norm: 0.9250981074570493, iteration: 95177
loss: 1.008792757987976,grad_norm: 0.9999991836446174, iteration: 95178
loss: 0.997187614440918,grad_norm: 0.9999993022533812, iteration: 95179
loss: 0.9970606565475464,grad_norm: 0.9999991462808081, iteration: 95180
loss: 1.0619796514511108,grad_norm: 0.9198581802445409, iteration: 95181
loss: 0.9849159717559814,grad_norm: 0.9999990888545766, iteration: 95182
loss: 1.0076271295547485,grad_norm: 0.9999990987219255, iteration: 95183
loss: 1.007703423500061,grad_norm: 0.9999993263190544, iteration: 95184
loss: 0.9945998787879944,grad_norm: 0.9999996413264165, iteration: 95185
loss: 1.0401159524917603,grad_norm: 0.9999991365855229, iteration: 95186
loss: 1.0248888731002808,grad_norm: 0.999999184869196, iteration: 95187
loss: 1.0520235300064087,grad_norm: 0.9999997339187577, iteration: 95188
loss: 0.9905650019645691,grad_norm: 0.9702586136470992, iteration: 95189
loss: 0.9862884879112244,grad_norm: 0.9783273558718756, iteration: 95190
loss: 1.0526546239852905,grad_norm: 0.9999999382192986, iteration: 95191
loss: 0.9825457334518433,grad_norm: 0.9999992024624134, iteration: 95192
loss: 0.9971869587898254,grad_norm: 0.9999990619652211, iteration: 95193
loss: 1.017938256263733,grad_norm: 0.9999991362313274, iteration: 95194
loss: 1.010583519935608,grad_norm: 0.9941753716431766, iteration: 95195
loss: 1.010593056678772,grad_norm: 0.919576017794426, iteration: 95196
loss: 1.0146594047546387,grad_norm: 0.9999990922640288, iteration: 95197
loss: 0.9714663028717041,grad_norm: 0.9999991076325583, iteration: 95198
loss: 1.0132204294204712,grad_norm: 0.9999991080791223, iteration: 95199
loss: 0.968163788318634,grad_norm: 0.9999991552523094, iteration: 95200
loss: 1.0090776681900024,grad_norm: 0.9532922071806106, iteration: 95201
loss: 0.9889129400253296,grad_norm: 0.9966437047965337, iteration: 95202
loss: 0.9846562147140503,grad_norm: 0.9999991051938355, iteration: 95203
loss: 0.9773196578025818,grad_norm: 0.9999993259324009, iteration: 95204
loss: 0.9959178566932678,grad_norm: 0.8680822113416184, iteration: 95205
loss: 1.0025721788406372,grad_norm: 0.9162254637510987, iteration: 95206
loss: 1.0192313194274902,grad_norm: 0.9999991444341068, iteration: 95207
loss: 1.0324182510375977,grad_norm: 0.9999991439346174, iteration: 95208
loss: 0.9826557040214539,grad_norm: 0.8900850227466695, iteration: 95209
loss: 1.0028715133666992,grad_norm: 0.9999991513334415, iteration: 95210
loss: 1.013135552406311,grad_norm: 0.924332277391454, iteration: 95211
loss: 0.9997209906578064,grad_norm: 0.832229418831427, iteration: 95212
loss: 0.994703471660614,grad_norm: 0.9999991872568309, iteration: 95213
loss: 0.9839834570884705,grad_norm: 0.9706991193238932, iteration: 95214
loss: 1.003694772720337,grad_norm: 0.9720947338822121, iteration: 95215
loss: 0.9789898991584778,grad_norm: 0.9999992034331074, iteration: 95216
loss: 1.0381489992141724,grad_norm: 0.9999993102008153, iteration: 95217
loss: 1.0153075456619263,grad_norm: 0.9999991897474602, iteration: 95218
loss: 1.008455514907837,grad_norm: 0.8654764595045239, iteration: 95219
loss: 0.9592829942703247,grad_norm: 0.9333554053767166, iteration: 95220
loss: 0.9898145794868469,grad_norm: 0.9999992029109134, iteration: 95221
loss: 0.9650691747665405,grad_norm: 0.9999989429501346, iteration: 95222
loss: 1.0078660249710083,grad_norm: 0.9999991989632204, iteration: 95223
loss: 0.9430313110351562,grad_norm: 0.9999990620534729, iteration: 95224
loss: 1.013806939125061,grad_norm: 0.9785157616538679, iteration: 95225
loss: 1.0045582056045532,grad_norm: 0.8783988935217213, iteration: 95226
loss: 0.9979183673858643,grad_norm: 0.9999991039923342, iteration: 95227
loss: 0.9821112155914307,grad_norm: 0.9999990424381955, iteration: 95228
loss: 0.9738285541534424,grad_norm: 0.9999990448263049, iteration: 95229
loss: 0.9751959443092346,grad_norm: 0.9999992132596889, iteration: 95230
loss: 1.0276296138763428,grad_norm: 0.9999992868077576, iteration: 95231
loss: 0.9859985113143921,grad_norm: 0.9999990778721312, iteration: 95232
loss: 0.9817687273025513,grad_norm: 0.8960401763511056, iteration: 95233
loss: 1.0079611539840698,grad_norm: 0.9999991105515247, iteration: 95234
loss: 0.9626470804214478,grad_norm: 0.9999991118836431, iteration: 95235
loss: 0.975149393081665,grad_norm: 0.9282066730640512, iteration: 95236
loss: 1.0153285264968872,grad_norm: 0.966449501026846, iteration: 95237
loss: 1.0238090753555298,grad_norm: 0.9869131087657357, iteration: 95238
loss: 0.977935254573822,grad_norm: 0.9999990392734236, iteration: 95239
loss: 1.0045446157455444,grad_norm: 0.9809058994671733, iteration: 95240
loss: 0.9810215830802917,grad_norm: 0.9168771900292612, iteration: 95241
loss: 0.9858909249305725,grad_norm: 0.999999039470551, iteration: 95242
loss: 0.9770380258560181,grad_norm: 0.9488460036981933, iteration: 95243
loss: 1.0008631944656372,grad_norm: 0.9999990418713739, iteration: 95244
loss: 1.0231245756149292,grad_norm: 0.9999992146186509, iteration: 95245
loss: 0.9909991025924683,grad_norm: 0.9728594779196051, iteration: 95246
loss: 1.0324052572250366,grad_norm: 0.9999991120506644, iteration: 95247
loss: 1.0112512111663818,grad_norm: 0.9567554987307746, iteration: 95248
loss: 1.0090667009353638,grad_norm: 0.9999994003877931, iteration: 95249
loss: 0.9849057793617249,grad_norm: 0.9999989993138606, iteration: 95250
loss: 0.9749511480331421,grad_norm: 0.8990548791286432, iteration: 95251
loss: 1.0315797328948975,grad_norm: 0.9955841019198817, iteration: 95252
loss: 0.9995049238204956,grad_norm: 0.9999991141238921, iteration: 95253
loss: 1.0425904989242554,grad_norm: 0.999999047711678, iteration: 95254
loss: 0.9990504384040833,grad_norm: 0.9999992625417308, iteration: 95255
loss: 1.0063508749008179,grad_norm: 0.9999991471736887, iteration: 95256
loss: 0.9831971526145935,grad_norm: 0.9999992532403301, iteration: 95257
loss: 1.014243721961975,grad_norm: 0.9448178872992522, iteration: 95258
loss: 0.9901801943778992,grad_norm: 0.848447441856165, iteration: 95259
loss: 1.0035251379013062,grad_norm: 0.9999991340835601, iteration: 95260
loss: 1.013354778289795,grad_norm: 0.8100749896366335, iteration: 95261
loss: 0.9786339402198792,grad_norm: 0.9169868340144159, iteration: 95262
loss: 0.971588134765625,grad_norm: 0.9999990629647849, iteration: 95263
loss: 0.9921825528144836,grad_norm: 0.9999992083177236, iteration: 95264
loss: 0.9925220012664795,grad_norm: 0.9824566637500164, iteration: 95265
loss: 1.021958827972412,grad_norm: 0.9200971423953771, iteration: 95266
loss: 1.0136164426803589,grad_norm: 0.9947252955001249, iteration: 95267
loss: 1.0091633796691895,grad_norm: 0.9562469171931748, iteration: 95268
loss: 0.9772428274154663,grad_norm: 0.9999990010116048, iteration: 95269
loss: 1.006620168685913,grad_norm: 0.9999992034463276, iteration: 95270
loss: 0.960483968257904,grad_norm: 0.9368014832591105, iteration: 95271
loss: 0.9835045337677002,grad_norm: 0.903852832289332, iteration: 95272
loss: 0.9569187760353088,grad_norm: 0.9999991046571577, iteration: 95273
loss: 0.9932063221931458,grad_norm: 0.8861416868852364, iteration: 95274
loss: 0.9855706691741943,grad_norm: 0.9971961336438567, iteration: 95275
loss: 1.008383870124817,grad_norm: 0.8435416462820728, iteration: 95276
loss: 1.033220887184143,grad_norm: 0.9999992098927408, iteration: 95277
loss: 0.9687067270278931,grad_norm: 0.9999992121763046, iteration: 95278
loss: 1.015060544013977,grad_norm: 0.9999991545111239, iteration: 95279
loss: 1.0016385316848755,grad_norm: 0.9999990271311229, iteration: 95280
loss: 0.9951528906822205,grad_norm: 0.9999992628923152, iteration: 95281
loss: 1.0192830562591553,grad_norm: 0.9782119412776418, iteration: 95282
loss: 1.004594326019287,grad_norm: 0.9999991909398374, iteration: 95283
loss: 1.0052192211151123,grad_norm: 0.860352471437505, iteration: 95284
loss: 0.9902675747871399,grad_norm: 0.9865723962511884, iteration: 95285
loss: 1.0213061571121216,grad_norm: 0.9999989871505602, iteration: 95286
loss: 1.00106942653656,grad_norm: 0.9999991710420735, iteration: 95287
loss: 1.0090854167938232,grad_norm: 0.9999991222434101, iteration: 95288
loss: 1.0053493976593018,grad_norm: 0.9999990568689652, iteration: 95289
loss: 1.022382140159607,grad_norm: 0.9999990603033139, iteration: 95290
loss: 1.0353137254714966,grad_norm: 0.9999994087628157, iteration: 95291
loss: 0.988699197769165,grad_norm: 0.999999063434206, iteration: 95292
loss: 0.990257978439331,grad_norm: 0.9999992249885052, iteration: 95293
loss: 0.9460665583610535,grad_norm: 0.9999990815021848, iteration: 95294
loss: 1.0242775678634644,grad_norm: 0.9468144159238298, iteration: 95295
loss: 1.0075030326843262,grad_norm: 0.9999992334397497, iteration: 95296
loss: 0.9680126309394836,grad_norm: 0.9999992580712809, iteration: 95297
loss: 0.9841764569282532,grad_norm: 0.9130932513226169, iteration: 95298
loss: 0.9902622103691101,grad_norm: 0.9151372493289857, iteration: 95299
loss: 1.0569599866867065,grad_norm: 0.9994083672069448, iteration: 95300
loss: 1.0317751169204712,grad_norm: 0.9373911293031323, iteration: 95301
loss: 1.0011332035064697,grad_norm: 0.999999141871844, iteration: 95302
loss: 0.9795703291893005,grad_norm: 0.9125193747025656, iteration: 95303
loss: 1.020198106765747,grad_norm: 0.9775043454896009, iteration: 95304
loss: 1.019999384880066,grad_norm: 0.9999993904890868, iteration: 95305
loss: 0.9801173210144043,grad_norm: 0.999999435299469, iteration: 95306
loss: 0.9959221482276917,grad_norm: 0.9537636236136642, iteration: 95307
loss: 1.0171465873718262,grad_norm: 0.9361478800044801, iteration: 95308
loss: 1.0083880424499512,grad_norm: 0.9999992110978376, iteration: 95309
loss: 0.96482914686203,grad_norm: 0.9999993191815113, iteration: 95310
loss: 1.2245731353759766,grad_norm: 0.9999996665016381, iteration: 95311
loss: 0.9854009747505188,grad_norm: 0.8881853480491284, iteration: 95312
loss: 0.9973467588424683,grad_norm: 0.9999993497270473, iteration: 95313
loss: 1.0245003700256348,grad_norm: 0.9999991323228921, iteration: 95314
loss: 0.9680745601654053,grad_norm: 0.8425283869759562, iteration: 95315
loss: 1.0294302701950073,grad_norm: 0.9916785221562524, iteration: 95316
loss: 1.0148718357086182,grad_norm: 0.9999990096467305, iteration: 95317
loss: 0.98269122838974,grad_norm: 0.9999991963409166, iteration: 95318
loss: 1.0210672616958618,grad_norm: 0.9999990048523258, iteration: 95319
loss: 1.0138393640518188,grad_norm: 0.9030555784595561, iteration: 95320
loss: 1.0039384365081787,grad_norm: 0.99999906487679, iteration: 95321
loss: 0.981245219707489,grad_norm: 0.9999991185694977, iteration: 95322
loss: 0.9987226724624634,grad_norm: 0.9999992595210679, iteration: 95323
loss: 1.0219500064849854,grad_norm: 0.9847675454145687, iteration: 95324
loss: 1.034413456916809,grad_norm: 0.9999992326300121, iteration: 95325
loss: 1.0028903484344482,grad_norm: 0.9999991373734723, iteration: 95326
loss: 1.0104230642318726,grad_norm: 0.9676749996914825, iteration: 95327
loss: 1.0169422626495361,grad_norm: 0.999999112479791, iteration: 95328
loss: 1.0055474042892456,grad_norm: 0.9787273854822204, iteration: 95329
loss: 1.0988337993621826,grad_norm: 0.9999992557784301, iteration: 95330
loss: 0.9785434007644653,grad_norm: 0.9999991517030665, iteration: 95331
loss: 1.0079379081726074,grad_norm: 0.9999992223553673, iteration: 95332
loss: 1.0241587162017822,grad_norm: 0.9637756838373599, iteration: 95333
loss: 0.9771682620048523,grad_norm: 0.9999998999207025, iteration: 95334
loss: 0.9596121907234192,grad_norm: 0.9999991464501463, iteration: 95335
loss: 1.0283652544021606,grad_norm: 0.9999999203805463, iteration: 95336
loss: 0.9891968965530396,grad_norm: 0.867764536850768, iteration: 95337
loss: 1.0290642976760864,grad_norm: 0.9999996003215332, iteration: 95338
loss: 1.023284912109375,grad_norm: 0.9999990113209194, iteration: 95339
loss: 0.985207200050354,grad_norm: 0.8492315051510612, iteration: 95340
loss: 1.0310413837432861,grad_norm: 0.9999991063637594, iteration: 95341
loss: 1.002186894416809,grad_norm: 0.8054261919257191, iteration: 95342
loss: 0.99234539270401,grad_norm: 0.9999992240758552, iteration: 95343
loss: 1.0202412605285645,grad_norm: 0.9181459797826407, iteration: 95344
loss: 0.9845024347305298,grad_norm: 0.9999992244971841, iteration: 95345
loss: 1.0261688232421875,grad_norm: 0.9999991373031797, iteration: 95346
loss: 1.0596094131469727,grad_norm: 0.9999996310185739, iteration: 95347
loss: 0.9894527196884155,grad_norm: 0.9999990997705008, iteration: 95348
loss: 1.037327766418457,grad_norm: 0.9999992108611641, iteration: 95349
loss: 1.0026748180389404,grad_norm: 0.9999992046558459, iteration: 95350
loss: 1.0122321844100952,grad_norm: 0.9999991406928739, iteration: 95351
loss: 1.0035985708236694,grad_norm: 0.9999990659150402, iteration: 95352
loss: 1.0095895528793335,grad_norm: 0.9999991672634032, iteration: 95353
loss: 0.9887256026268005,grad_norm: 0.9999990753734985, iteration: 95354
loss: 0.9794027805328369,grad_norm: 0.9999990481696008, iteration: 95355
loss: 1.074852466583252,grad_norm: 0.9999993137441007, iteration: 95356
loss: 0.9964724183082581,grad_norm: 0.9816086340666748, iteration: 95357
loss: 1.0243475437164307,grad_norm: 0.9999991059191222, iteration: 95358
loss: 0.9904206395149231,grad_norm: 0.9999989046789618, iteration: 95359
loss: 0.9938635230064392,grad_norm: 0.999999165386805, iteration: 95360
loss: 0.9947810769081116,grad_norm: 0.9999991257719207, iteration: 95361
loss: 0.9938908815383911,grad_norm: 0.999998877520184, iteration: 95362
loss: 1.0094311237335205,grad_norm: 0.9999990858671127, iteration: 95363
loss: 1.0226303339004517,grad_norm: 0.9999996955229988, iteration: 95364
loss: 0.9924284219741821,grad_norm: 0.9999991132466214, iteration: 95365
loss: 1.0108060836791992,grad_norm: 0.9999993307149937, iteration: 95366
loss: 0.9849441647529602,grad_norm: 0.9999992061363626, iteration: 95367
loss: 0.9940159320831299,grad_norm: 0.9999990494675665, iteration: 95368
loss: 1.0440459251403809,grad_norm: 0.9999991735755104, iteration: 95369
loss: 0.9315704107284546,grad_norm: 0.9999991543998991, iteration: 95370
loss: 1.0633172988891602,grad_norm: 0.9999993272243234, iteration: 95371
loss: 1.0258811712265015,grad_norm: 0.9676507805314263, iteration: 95372
loss: 0.9871034026145935,grad_norm: 0.9239467317513508, iteration: 95373
loss: 0.9947252869606018,grad_norm: 0.9999990256143043, iteration: 95374
loss: 0.9603174924850464,grad_norm: 0.9861466763225007, iteration: 95375
loss: 0.9854308366775513,grad_norm: 0.9999991611868557, iteration: 95376
loss: 0.9500628709793091,grad_norm: 0.9999990035315375, iteration: 95377
loss: 1.0366644859313965,grad_norm: 0.9884069000818594, iteration: 95378
loss: 1.0176948308944702,grad_norm: 0.999999108431885, iteration: 95379
loss: 0.9924630522727966,grad_norm: 0.999999179820938, iteration: 95380
loss: 1.0098665952682495,grad_norm: 0.9999991520645668, iteration: 95381
loss: 1.0198805332183838,grad_norm: 0.9999990973361779, iteration: 95382
loss: 1.0077507495880127,grad_norm: 0.9999991545620865, iteration: 95383
loss: 1.0003341436386108,grad_norm: 0.9999991473914389, iteration: 95384
loss: 0.9990251660346985,grad_norm: 0.9999992203058785, iteration: 95385
loss: 0.9988453388214111,grad_norm: 0.9999992665162251, iteration: 95386
loss: 0.9859622716903687,grad_norm: 0.9999991339258161, iteration: 95387
loss: 1.011290192604065,grad_norm: 0.9999990555458188, iteration: 95388
loss: 1.0585523843765259,grad_norm: 0.9999992075077494, iteration: 95389
loss: 1.0218422412872314,grad_norm: 0.9999990966329305, iteration: 95390
loss: 0.9537314772605896,grad_norm: 0.9999991155694302, iteration: 95391
loss: 1.003690481185913,grad_norm: 0.9401904037008537, iteration: 95392
loss: 0.9882057905197144,grad_norm: 0.9820505940253142, iteration: 95393
loss: 0.9848195314407349,grad_norm: 0.9999992084327823, iteration: 95394
loss: 1.01864755153656,grad_norm: 0.9999992337119258, iteration: 95395
loss: 0.978493332862854,grad_norm: 0.980894564476387, iteration: 95396
loss: 1.0115399360656738,grad_norm: 0.9999991285635164, iteration: 95397
loss: 1.014832854270935,grad_norm: 0.9621925171066287, iteration: 95398
loss: 1.0144293308258057,grad_norm: 0.999999681923133, iteration: 95399
loss: 1.0005013942718506,grad_norm: 0.9999991526175267, iteration: 95400
loss: 0.9998881220817566,grad_norm: 0.9999997099911546, iteration: 95401
loss: 0.9865524768829346,grad_norm: 0.9175493761629551, iteration: 95402
loss: 0.9990473985671997,grad_norm: 0.999999031914737, iteration: 95403
loss: 0.9910492300987244,grad_norm: 0.9999992898723932, iteration: 95404
loss: 1.010638952255249,grad_norm: 0.9070939622112911, iteration: 95405
loss: 1.0105960369110107,grad_norm: 0.9368654720874003, iteration: 95406
loss: 0.999823808670044,grad_norm: 0.9378232178478765, iteration: 95407
loss: 1.0931422710418701,grad_norm: 0.9999993339918649, iteration: 95408
loss: 1.0152504444122314,grad_norm: 0.999998977697176, iteration: 95409
loss: 1.001226782798767,grad_norm: 0.9999991104753123, iteration: 95410
loss: 1.0279945135116577,grad_norm: 0.8841744282863807, iteration: 95411
loss: 1.0035961866378784,grad_norm: 0.9999990381859739, iteration: 95412
loss: 1.0209510326385498,grad_norm: 0.9999992054481209, iteration: 95413
loss: 1.0374729633331299,grad_norm: 0.9999992256845446, iteration: 95414
loss: 1.045303463935852,grad_norm: 0.9999994296915482, iteration: 95415
loss: 0.9810853600502014,grad_norm: 0.9999989468445845, iteration: 95416
loss: 0.983512818813324,grad_norm: 0.9999990858178038, iteration: 95417
loss: 1.047766923904419,grad_norm: 0.9999996536740983, iteration: 95418
loss: 0.9886139631271362,grad_norm: 0.9712577124779411, iteration: 95419
loss: 1.0120943784713745,grad_norm: 0.9999995236542241, iteration: 95420
loss: 1.0141797065734863,grad_norm: 0.9545764211921337, iteration: 95421
loss: 1.0088165998458862,grad_norm: 0.9983993674774613, iteration: 95422
loss: 0.9866558909416199,grad_norm: 0.9680001443363846, iteration: 95423
loss: 1.0164365768432617,grad_norm: 0.9999992630994348, iteration: 95424
loss: 1.0363109111785889,grad_norm: 0.9999990560240184, iteration: 95425
loss: 1.016982078552246,grad_norm: 0.9999992518927205, iteration: 95426
loss: 1.0151927471160889,grad_norm: 0.9720606745316194, iteration: 95427
loss: 0.9985184669494629,grad_norm: 0.9999990703482401, iteration: 95428
loss: 1.0002399682998657,grad_norm: 0.9999991813661836, iteration: 95429
loss: 1.005877137184143,grad_norm: 0.9999991951888699, iteration: 95430
loss: 1.0313043594360352,grad_norm: 0.9999991682512333, iteration: 95431
loss: 0.9917054772377014,grad_norm: 0.9999991171775774, iteration: 95432
loss: 1.0370286703109741,grad_norm: 0.9999990808307179, iteration: 95433
loss: 1.032840609550476,grad_norm: 0.9999998890666936, iteration: 95434
loss: 1.0187594890594482,grad_norm: 0.9999993927025277, iteration: 95435
loss: 1.0149286985397339,grad_norm: 0.9999992309757871, iteration: 95436
loss: 1.017357349395752,grad_norm: 0.9999990906911228, iteration: 95437
loss: 0.9785787463188171,grad_norm: 0.999999171363172, iteration: 95438
loss: 1.0122904777526855,grad_norm: 0.9273222551351338, iteration: 95439
loss: 0.9718655347824097,grad_norm: 0.9185439115214632, iteration: 95440
loss: 0.9704267382621765,grad_norm: 0.9856512714152263, iteration: 95441
loss: 0.9906909465789795,grad_norm: 0.9707625870801342, iteration: 95442
loss: 0.9999175071716309,grad_norm: 0.9999991369311484, iteration: 95443
loss: 1.0215449333190918,grad_norm: 0.912372313223556, iteration: 95444
loss: 1.0265172719955444,grad_norm: 0.8357538046033296, iteration: 95445
loss: 1.0036793947219849,grad_norm: 0.9999989891102277, iteration: 95446
loss: 1.0372728109359741,grad_norm: 0.9999992387721165, iteration: 95447
loss: 0.9933290481567383,grad_norm: 0.9999990078223323, iteration: 95448
loss: 1.0075358152389526,grad_norm: 0.9999991265777911, iteration: 95449
loss: 0.9977139830589294,grad_norm: 0.999999157297321, iteration: 95450
loss: 1.0091928243637085,grad_norm: 0.9999994228515607, iteration: 95451
loss: 0.9808388352394104,grad_norm: 0.9822933595028607, iteration: 95452
loss: 0.9925700426101685,grad_norm: 0.925288982570247, iteration: 95453
loss: 0.9870336055755615,grad_norm: 0.9999991284086764, iteration: 95454
loss: 0.9746100902557373,grad_norm: 0.9999990156342989, iteration: 95455
loss: 1.0132416486740112,grad_norm: 0.9999990797603727, iteration: 95456
loss: 0.995506227016449,grad_norm: 0.9999989991169719, iteration: 95457
loss: 1.003725290298462,grad_norm: 0.9999991640560159, iteration: 95458
loss: 0.9857121109962463,grad_norm: 0.9999991655594724, iteration: 95459
loss: 1.0378469228744507,grad_norm: 0.9999991347177315, iteration: 95460
loss: 0.988297700881958,grad_norm: 0.9256437342800599, iteration: 95461
loss: 0.9947673678398132,grad_norm: 0.9999990698125684, iteration: 95462
loss: 0.9824913144111633,grad_norm: 0.9455049742879421, iteration: 95463
loss: 0.9962134957313538,grad_norm: 0.9999990055078051, iteration: 95464
loss: 1.0352526903152466,grad_norm: 0.9999996616720794, iteration: 95465
loss: 0.9836402535438538,grad_norm: 0.9999991242490495, iteration: 95466
loss: 1.0190290212631226,grad_norm: 0.999999266229174, iteration: 95467
loss: 1.0130521059036255,grad_norm: 0.9999991927032469, iteration: 95468
loss: 1.0328364372253418,grad_norm: 0.999999042710609, iteration: 95469
loss: 0.9774160981178284,grad_norm: 0.9999990049104356, iteration: 95470
loss: 0.9782191514968872,grad_norm: 0.981453558005887, iteration: 95471
loss: 0.9982157945632935,grad_norm: 0.9039734122024812, iteration: 95472
loss: 1.0352716445922852,grad_norm: 0.9999990921822209, iteration: 95473
loss: 0.9959805607795715,grad_norm: 0.9999990822905238, iteration: 95474
loss: 1.018788456916809,grad_norm: 0.9999990623851159, iteration: 95475
loss: 0.9879565834999084,grad_norm: 0.9930700789058085, iteration: 95476
loss: 1.027658462524414,grad_norm: 0.9999992406905108, iteration: 95477
loss: 1.0017964839935303,grad_norm: 0.9999991026440673, iteration: 95478
loss: 1.0173739194869995,grad_norm: 0.9999990225980494, iteration: 95479
loss: 1.0173975229263306,grad_norm: 0.9999992630758241, iteration: 95480
loss: 0.984235405921936,grad_norm: 0.9999991500687931, iteration: 95481
loss: 0.9926071166992188,grad_norm: 0.9999989389357615, iteration: 95482
loss: 1.0115330219268799,grad_norm: 0.9999990132210252, iteration: 95483
loss: 0.9908667802810669,grad_norm: 0.9999990727768543, iteration: 95484
loss: 0.9996597170829773,grad_norm: 0.9939089588705148, iteration: 95485
loss: 1.012092113494873,grad_norm: 0.8636104357168424, iteration: 95486
loss: 0.956099271774292,grad_norm: 0.9999992237190574, iteration: 95487
loss: 0.9756912589073181,grad_norm: 0.9999990787431549, iteration: 95488
loss: 1.0070363283157349,grad_norm: 0.9041635753572245, iteration: 95489
loss: 0.999187707901001,grad_norm: 0.9760012675386487, iteration: 95490
loss: 1.0206899642944336,grad_norm: 0.9999992000835807, iteration: 95491
loss: 1.0173534154891968,grad_norm: 0.9999988622879137, iteration: 95492
loss: 0.9990319609642029,grad_norm: 0.9048208721422732, iteration: 95493
loss: 0.9782549142837524,grad_norm: 0.9999989017780414, iteration: 95494
loss: 1.1721657514572144,grad_norm: 0.9999994208810875, iteration: 95495
loss: 1.0130083560943604,grad_norm: 0.9999992207008851, iteration: 95496
loss: 1.0024439096450806,grad_norm: 0.9733920240273523, iteration: 95497
loss: 1.0049326419830322,grad_norm: 0.9784735838952155, iteration: 95498
loss: 1.138348937034607,grad_norm: 0.999999243554174, iteration: 95499
loss: 0.9800406098365784,grad_norm: 0.9999995606796124, iteration: 95500
loss: 1.011244773864746,grad_norm: 0.9999991918397444, iteration: 95501
loss: 1.1987110376358032,grad_norm: 0.999999679437857, iteration: 95502
loss: 0.9879714846611023,grad_norm: 0.8527310292867207, iteration: 95503
loss: 0.9980685710906982,grad_norm: 0.9331039561005596, iteration: 95504
loss: 1.0012589693069458,grad_norm: 0.9999991934171037, iteration: 95505
loss: 1.0045545101165771,grad_norm: 0.9999992275768472, iteration: 95506
loss: 1.0022939443588257,grad_norm: 0.999999008600261, iteration: 95507
loss: 0.9725415706634521,grad_norm: 0.9999988912752314, iteration: 95508
loss: 1.010361909866333,grad_norm: 0.9765700420152047, iteration: 95509
loss: 1.018894076347351,grad_norm: 0.9703988260237835, iteration: 95510
loss: 1.0768107175827026,grad_norm: 0.9999995398700905, iteration: 95511
loss: 1.0484888553619385,grad_norm: 0.9999997652171074, iteration: 95512
loss: 0.9999769926071167,grad_norm: 0.999999825355558, iteration: 95513
loss: 1.1055063009262085,grad_norm: 0.9999992448369853, iteration: 95514
loss: 0.9679664969444275,grad_norm: 0.9827148896247845, iteration: 95515
loss: 0.9645599722862244,grad_norm: 0.9999991924979187, iteration: 95516
loss: 1.047808289527893,grad_norm: 0.9999991547160999, iteration: 95517
loss: 0.9893336892127991,grad_norm: 0.8584913749456456, iteration: 95518
loss: 0.9871158599853516,grad_norm: 0.9999989983216513, iteration: 95519
loss: 0.9987077713012695,grad_norm: 0.9999992344711263, iteration: 95520
loss: 1.070568561553955,grad_norm: 0.9999991741878784, iteration: 95521
loss: 1.130497932434082,grad_norm: 0.9999991924177144, iteration: 95522
loss: 1.0077317953109741,grad_norm: 0.9999990688685799, iteration: 95523
loss: 1.0353859663009644,grad_norm: 0.9999992559405372, iteration: 95524
loss: 0.9666834473609924,grad_norm: 0.9999989957848918, iteration: 95525
loss: 1.1311784982681274,grad_norm: 0.9999992596642198, iteration: 95526
loss: 1.0066277980804443,grad_norm: 0.9802842187954258, iteration: 95527
loss: 1.115865707397461,grad_norm: 0.9999998534887693, iteration: 95528
loss: 0.9827760457992554,grad_norm: 0.9999994990898072, iteration: 95529
loss: 1.0212520360946655,grad_norm: 0.9999990822918722, iteration: 95530
loss: 0.9927074313163757,grad_norm: 0.999999162206624, iteration: 95531
loss: 1.0057940483093262,grad_norm: 0.9490789619866731, iteration: 95532
loss: 1.0629695653915405,grad_norm: 0.9999991290534105, iteration: 95533
loss: 1.056093454360962,grad_norm: 0.9999993252554852, iteration: 95534
loss: 1.0414834022521973,grad_norm: 0.9999992696020076, iteration: 95535
loss: 1.0020421743392944,grad_norm: 0.8979995167967353, iteration: 95536
loss: 1.0138798952102661,grad_norm: 0.9999989993312168, iteration: 95537
loss: 1.014109492301941,grad_norm: 0.9999991657514724, iteration: 95538
loss: 1.021822214126587,grad_norm: 0.8677128263451195, iteration: 95539
loss: 1.001225233078003,grad_norm: 0.9999991717280466, iteration: 95540
loss: 1.0013078451156616,grad_norm: 0.8239325048665458, iteration: 95541
loss: 1.0379259586334229,grad_norm: 0.9999990866466026, iteration: 95542
loss: 1.0143698453903198,grad_norm: 0.9999991237740302, iteration: 95543
loss: 1.0090341567993164,grad_norm: 0.999999003652236, iteration: 95544
loss: 1.0354762077331543,grad_norm: 0.9999990424598435, iteration: 95545
loss: 1.0821025371551514,grad_norm: 0.9999994251399603, iteration: 95546
loss: 1.086097002029419,grad_norm: 0.9999997025542827, iteration: 95547
loss: 1.000852108001709,grad_norm: 0.9411994847786371, iteration: 95548
loss: 0.9745596647262573,grad_norm: 0.9259399841465897, iteration: 95549
loss: 0.9770825505256653,grad_norm: 0.9999991701663871, iteration: 95550
loss: 1.0244722366333008,grad_norm: 0.9733346982682176, iteration: 95551
loss: 1.0161514282226562,grad_norm: 0.9379365291495956, iteration: 95552
loss: 1.050274133682251,grad_norm: 0.8690729779649926, iteration: 95553
loss: 0.9980737566947937,grad_norm: 0.9999990299550364, iteration: 95554
loss: 1.025787591934204,grad_norm: 0.9999994464002488, iteration: 95555
loss: 1.0034817457199097,grad_norm: 0.9487386639615379, iteration: 95556
loss: 0.9951044917106628,grad_norm: 0.9999990705119355, iteration: 95557
loss: 0.9862182140350342,grad_norm: 0.9999990649094695, iteration: 95558
loss: 1.0340652465820312,grad_norm: 0.9999997916847677, iteration: 95559
loss: 1.096638560295105,grad_norm: 0.9999994424936012, iteration: 95560
loss: 1.0800466537475586,grad_norm: 0.9999996574893939, iteration: 95561
loss: 1.048548698425293,grad_norm: 0.9999991477843356, iteration: 95562
loss: 1.010155439376831,grad_norm: 0.9999991333131426, iteration: 95563
loss: 1.1034996509552002,grad_norm: 0.9999992057212912, iteration: 95564
loss: 1.0470861196517944,grad_norm: 0.9999992442290419, iteration: 95565
loss: 0.97984379529953,grad_norm: 0.9935117862931927, iteration: 95566
loss: 1.118687629699707,grad_norm: 0.9999995881090696, iteration: 95567
loss: 1.0219491720199585,grad_norm: 0.999999199642589, iteration: 95568
loss: 1.0942542552947998,grad_norm: 0.9999996615003837, iteration: 95569
loss: 1.0486457347869873,grad_norm: 0.999998894098791, iteration: 95570
loss: 0.980778694152832,grad_norm: 0.9999993418659412, iteration: 95571
loss: 0.9971683621406555,grad_norm: 0.9999992203108853, iteration: 95572
loss: 1.0593395233154297,grad_norm: 0.9999990823112581, iteration: 95573
loss: 1.0841048955917358,grad_norm: 0.9999999667461323, iteration: 95574
loss: 0.9684692025184631,grad_norm: 0.999999068744355, iteration: 95575
loss: 0.9863559007644653,grad_norm: 0.9999990850608824, iteration: 95576
loss: 1.0221608877182007,grad_norm: 0.9865381489800769, iteration: 95577
loss: 0.9700126051902771,grad_norm: 0.9999993166633114, iteration: 95578
loss: 1.0163843631744385,grad_norm: 0.9999991032430158, iteration: 95579
loss: 0.9688935279846191,grad_norm: 0.9999990487512046, iteration: 95580
loss: 1.0219364166259766,grad_norm: 0.9125024523956561, iteration: 95581
loss: 0.9944741129875183,grad_norm: 0.9999990844730048, iteration: 95582
loss: 0.996880829334259,grad_norm: 0.9769851889645915, iteration: 95583
loss: 1.128180980682373,grad_norm: 0.9999999077219608, iteration: 95584
loss: 1.2425546646118164,grad_norm: 0.9999992530655213, iteration: 95585
loss: 1.012572169303894,grad_norm: 0.9999994440268217, iteration: 95586
loss: 1.0015380382537842,grad_norm: 0.9999998118755481, iteration: 95587
loss: 0.9882343411445618,grad_norm: 0.9999991494262536, iteration: 95588
loss: 1.0002704858779907,grad_norm: 0.9717875965391903, iteration: 95589
loss: 1.0105360746383667,grad_norm: 0.9366286637168162, iteration: 95590
loss: 0.9703999161720276,grad_norm: 0.9999990569367915, iteration: 95591
loss: 0.9513662457466125,grad_norm: 0.9999990952179127, iteration: 95592
loss: 1.0344349145889282,grad_norm: 0.9999995349299856, iteration: 95593
loss: 0.9810487627983093,grad_norm: 0.9999990830411452, iteration: 95594
loss: 0.9908187985420227,grad_norm: 0.9999991394870047, iteration: 95595
loss: 1.0028882026672363,grad_norm: 0.8748899503804276, iteration: 95596
loss: 1.146788239479065,grad_norm: 0.9999999042244919, iteration: 95597
loss: 1.02931809425354,grad_norm: 0.9999994528454269, iteration: 95598
loss: 1.0516510009765625,grad_norm: 0.9999993329413835, iteration: 95599
loss: 1.016857385635376,grad_norm: 0.9999992368661208, iteration: 95600
loss: 1.0282479524612427,grad_norm: 0.9999996361765481, iteration: 95601
loss: 1.0952527523040771,grad_norm: 0.9999992072199582, iteration: 95602
loss: 1.002835988998413,grad_norm: 0.9999990599614476, iteration: 95603
loss: 0.9456691145896912,grad_norm: 0.9999991918443578, iteration: 95604
loss: 1.1827040910720825,grad_norm: 1.0000000141858185, iteration: 95605
loss: 1.0232161283493042,grad_norm: 0.9999991564592952, iteration: 95606
loss: 1.0292831659317017,grad_norm: 0.9999991949621782, iteration: 95607
loss: 0.9494644999504089,grad_norm: 0.9999990925281976, iteration: 95608
loss: 0.9927631616592407,grad_norm: 0.995433557882978, iteration: 95609
loss: 1.016537070274353,grad_norm: 0.9348175550093721, iteration: 95610
loss: 1.027276635169983,grad_norm: 0.9785527617175321, iteration: 95611
loss: 1.0214656591415405,grad_norm: 0.9999993347429817, iteration: 95612
loss: 1.026650309562683,grad_norm: 0.9999989220222992, iteration: 95613
loss: 1.0725723505020142,grad_norm: 0.9999995546159717, iteration: 95614
loss: 0.9600744843482971,grad_norm: 0.9999990696172459, iteration: 95615
loss: 1.0178930759429932,grad_norm: 0.9999996275151553, iteration: 95616
loss: 0.9834854006767273,grad_norm: 0.999999185892536, iteration: 95617
loss: 0.9753230214118958,grad_norm: 0.9999990403535265, iteration: 95618
loss: 1.0309627056121826,grad_norm: 0.9999994107004989, iteration: 95619
loss: 0.9987779855728149,grad_norm: 0.9999992099944068, iteration: 95620
loss: 0.9818552732467651,grad_norm: 0.8862672593043933, iteration: 95621
loss: 0.9816969633102417,grad_norm: 0.9999990302888964, iteration: 95622
loss: 1.0139340162277222,grad_norm: 0.9999990726311802, iteration: 95623
loss: 0.9820598363876343,grad_norm: 0.9092347887398154, iteration: 95624
loss: 0.974524736404419,grad_norm: 0.9901433877472134, iteration: 95625
loss: 0.986829936504364,grad_norm: 0.9999990126949887, iteration: 95626
loss: 0.9914676547050476,grad_norm: 0.9287837847020401, iteration: 95627
loss: 1.027742624282837,grad_norm: 0.9999990629701064, iteration: 95628
loss: 1.0423792600631714,grad_norm: 0.999999607057389, iteration: 95629
loss: 1.030515193939209,grad_norm: 0.9999994961069093, iteration: 95630
loss: 1.0102448463439941,grad_norm: 0.9999994557823266, iteration: 95631
loss: 1.0394585132598877,grad_norm: 0.9999991531674178, iteration: 95632
loss: 0.9742519855499268,grad_norm: 0.8864169093977596, iteration: 95633
loss: 1.006828784942627,grad_norm: 0.9999991152742111, iteration: 95634
loss: 1.031697154045105,grad_norm: 0.976813312547772, iteration: 95635
loss: 1.0118069648742676,grad_norm: 0.9999991029055081, iteration: 95636
loss: 0.976111650466919,grad_norm: 0.999999160833866, iteration: 95637
loss: 1.0010530948638916,grad_norm: 0.9999990507012955, iteration: 95638
loss: 0.9687588214874268,grad_norm: 0.9999990384527169, iteration: 95639
loss: 0.9981781840324402,grad_norm: 0.9999991880889626, iteration: 95640
loss: 0.9662765264511108,grad_norm: 0.9999990501259106, iteration: 95641
loss: 1.110224723815918,grad_norm: 0.999999235677498, iteration: 95642
loss: 1.005900502204895,grad_norm: 0.7914507938328661, iteration: 95643
loss: 1.0482800006866455,grad_norm: 0.9999998309413759, iteration: 95644
loss: 1.0848628282546997,grad_norm: 0.9999995571258103, iteration: 95645
loss: 1.0399373769760132,grad_norm: 0.9999998714138629, iteration: 95646
loss: 1.0055968761444092,grad_norm: 0.9999997136493536, iteration: 95647
loss: 0.9888806343078613,grad_norm: 0.9999992255940499, iteration: 95648
loss: 0.9968889951705933,grad_norm: 0.9999990118706276, iteration: 95649
loss: 1.0116368532180786,grad_norm: 0.8534450626221461, iteration: 95650
loss: 0.9926585555076599,grad_norm: 0.9999990572904843, iteration: 95651
loss: 1.0165914297103882,grad_norm: 0.9999988336049928, iteration: 95652
loss: 1.0085731744766235,grad_norm: 0.9999993608504334, iteration: 95653
loss: 1.0057626962661743,grad_norm: 0.9770596413252923, iteration: 95654
loss: 1.0057791471481323,grad_norm: 0.9999991581397663, iteration: 95655
loss: 0.9939212799072266,grad_norm: 0.8812208928692059, iteration: 95656
loss: 1.018717885017395,grad_norm: 0.999999239773387, iteration: 95657
loss: 0.9953638911247253,grad_norm: 0.9999990913648682, iteration: 95658
loss: 0.9880056977272034,grad_norm: 0.9945977480994342, iteration: 95659
loss: 1.0279566049575806,grad_norm: 0.999999254786754, iteration: 95660
loss: 1.0023120641708374,grad_norm: 0.9999991714301488, iteration: 95661
loss: 1.019521951675415,grad_norm: 0.9999991089959269, iteration: 95662
loss: 1.0359230041503906,grad_norm: 0.9999993890479991, iteration: 95663
loss: 0.9758128523826599,grad_norm: 0.9999990047636869, iteration: 95664
loss: 1.0611813068389893,grad_norm: 0.9999997578062712, iteration: 95665
loss: 0.9833475947380066,grad_norm: 0.9999991968750123, iteration: 95666
loss: 0.9768378138542175,grad_norm: 0.9999989770532953, iteration: 95667
loss: 1.0044448375701904,grad_norm: 0.9999991448896417, iteration: 95668
loss: 1.0032410621643066,grad_norm: 0.999999573813169, iteration: 95669
loss: 0.9959958791732788,grad_norm: 0.9999992405029365, iteration: 95670
loss: 0.9978132247924805,grad_norm: 0.999999228654273, iteration: 95671
loss: 1.0199412107467651,grad_norm: 0.9741135902934707, iteration: 95672
loss: 0.9901418685913086,grad_norm: 0.963310621035953, iteration: 95673
loss: 1.0086383819580078,grad_norm: 0.9999992043023208, iteration: 95674
loss: 1.0738486051559448,grad_norm: 0.9999997716104978, iteration: 95675
loss: 1.0150691270828247,grad_norm: 0.977078654067729, iteration: 95676
loss: 0.9797219038009644,grad_norm: 0.8924496484098665, iteration: 95677
loss: 0.9947963356971741,grad_norm: 0.8212224081939606, iteration: 95678
loss: 1.018581748008728,grad_norm: 0.9999993320287651, iteration: 95679
loss: 0.9914792776107788,grad_norm: 0.9999991949180861, iteration: 95680
loss: 1.0017375946044922,grad_norm: 0.7995626951193726, iteration: 95681
loss: 0.994606077671051,grad_norm: 0.999999240057375, iteration: 95682
loss: 1.0017551183700562,grad_norm: 0.9999989901146158, iteration: 95683
loss: 0.9509754180908203,grad_norm: 0.9999992970168048, iteration: 95684
loss: 1.00959312915802,grad_norm: 0.9999990245378299, iteration: 95685
loss: 0.9863831400871277,grad_norm: 0.999999367618989, iteration: 95686
loss: 1.0219281911849976,grad_norm: 0.9999995480927616, iteration: 95687
loss: 1.0753628015518188,grad_norm: 0.9999990712980268, iteration: 95688
loss: 1.0121251344680786,grad_norm: 0.9496008854961321, iteration: 95689
loss: 0.9854551553726196,grad_norm: 0.9417512265396198, iteration: 95690
loss: 0.9797788858413696,grad_norm: 0.776514222998384, iteration: 95691
loss: 0.9973063468933105,grad_norm: 0.9999990504628642, iteration: 95692
loss: 1.0113145112991333,grad_norm: 0.9999989827220193, iteration: 95693
loss: 0.9893804788589478,grad_norm: 0.999999274854788, iteration: 95694
loss: 1.0510001182556152,grad_norm: 0.9999992368213689, iteration: 95695
loss: 1.0315070152282715,grad_norm: 0.99999915579546, iteration: 95696
loss: 0.9980382919311523,grad_norm: 0.9999992146941705, iteration: 95697
loss: 1.0249260663986206,grad_norm: 0.9999989028943537, iteration: 95698
loss: 0.9959003329277039,grad_norm: 0.9999991362733223, iteration: 95699
loss: 1.019994854927063,grad_norm: 0.8631434652155403, iteration: 95700
loss: 1.0335205793380737,grad_norm: 0.9999994273099491, iteration: 95701
loss: 0.9768964052200317,grad_norm: 0.9471024273714679, iteration: 95702
loss: 1.0564326047897339,grad_norm: 0.9999994457971582, iteration: 95703
loss: 1.0481648445129395,grad_norm: 0.9745039849389707, iteration: 95704
loss: 1.16936194896698,grad_norm: 0.9999999221147473, iteration: 95705
loss: 1.00355064868927,grad_norm: 0.9999990824962167, iteration: 95706
loss: 0.9773115515708923,grad_norm: 0.999999215012457, iteration: 95707
loss: 0.9988875389099121,grad_norm: 0.9999994683456962, iteration: 95708
loss: 1.0036677122116089,grad_norm: 0.9999995821400927, iteration: 95709
loss: 0.9954293370246887,grad_norm: 0.999999414388176, iteration: 95710
loss: 0.9979649186134338,grad_norm: 0.921353686088685, iteration: 95711
loss: 1.0036998987197876,grad_norm: 0.9902407498154093, iteration: 95712
loss: 1.0174626111984253,grad_norm: 0.9999995734564089, iteration: 95713
loss: 1.0045146942138672,grad_norm: 0.9999990948009628, iteration: 95714
loss: 0.9737651944160461,grad_norm: 0.999999479427532, iteration: 95715
loss: 1.0037208795547485,grad_norm: 0.9999996874852825, iteration: 95716
loss: 0.992368757724762,grad_norm: 0.999998952493364, iteration: 95717
loss: 0.976142942905426,grad_norm: 0.9999991509556329, iteration: 95718
loss: 1.0041472911834717,grad_norm: 0.9999992020892504, iteration: 95719
loss: 0.9806618094444275,grad_norm: 0.9726381572907469, iteration: 95720
loss: 1.0034682750701904,grad_norm: 0.9999992268325973, iteration: 95721
loss: 0.9832814335823059,grad_norm: 0.9999991758651855, iteration: 95722
loss: 1.048290729522705,grad_norm: 0.9999998404773096, iteration: 95723
loss: 0.984086811542511,grad_norm: 0.8757868975539792, iteration: 95724
loss: 1.0061391592025757,grad_norm: 0.9095836597476916, iteration: 95725
loss: 1.0301506519317627,grad_norm: 0.9999999111697999, iteration: 95726
loss: 1.000435471534729,grad_norm: 0.9999994715202744, iteration: 95727
loss: 1.0031511783599854,grad_norm: 0.9999990368429343, iteration: 95728
loss: 0.9971250891685486,grad_norm: 0.9999990967369126, iteration: 95729
loss: 0.9981843829154968,grad_norm: 0.8403816806117281, iteration: 95730
loss: 0.9756629467010498,grad_norm: 0.9999989366582526, iteration: 95731
loss: 0.9975664019584656,grad_norm: 0.9999992164642012, iteration: 95732
loss: 0.993152916431427,grad_norm: 0.9999993175009595, iteration: 95733
loss: 0.9978601336479187,grad_norm: 0.9395539176350413, iteration: 95734
loss: 1.0188027620315552,grad_norm: 0.9999990442120139, iteration: 95735
loss: 0.9950097799301147,grad_norm: 0.9999992433115672, iteration: 95736
loss: 0.9907587766647339,grad_norm: 0.9999992726001841, iteration: 95737
loss: 1.0231993198394775,grad_norm: 0.9938766577310121, iteration: 95738
loss: 1.0164783000946045,grad_norm: 0.9999996517287312, iteration: 95739
loss: 0.979812502861023,grad_norm: 0.9999992910177332, iteration: 95740
loss: 0.9663762450218201,grad_norm: 0.9999990813260664, iteration: 95741
loss: 1.014624834060669,grad_norm: 0.9999991539395048, iteration: 95742
loss: 0.9965455532073975,grad_norm: 0.9792302664879388, iteration: 95743
loss: 0.9820153117179871,grad_norm: 0.8484056918077334, iteration: 95744
loss: 0.9883091449737549,grad_norm: 0.9999990236006382, iteration: 95745
loss: 0.995225191116333,grad_norm: 0.9531166187227811, iteration: 95746
loss: 0.9797710180282593,grad_norm: 0.9999993510188772, iteration: 95747
loss: 1.040536642074585,grad_norm: 0.9999997354675906, iteration: 95748
loss: 1.011254906654358,grad_norm: 0.9999989810788876, iteration: 95749
loss: 0.9508524537086487,grad_norm: 0.8948722454009881, iteration: 95750
loss: 0.9933632612228394,grad_norm: 0.9999991897342149, iteration: 95751
loss: 1.0437341928482056,grad_norm: 0.9999992098676802, iteration: 95752
loss: 0.9947829842567444,grad_norm: 0.9999989837000167, iteration: 95753
loss: 1.0014166831970215,grad_norm: 0.9999991634225033, iteration: 95754
loss: 1.0259127616882324,grad_norm: 0.9572145724208492, iteration: 95755
loss: 1.04422926902771,grad_norm: 0.9999997232146202, iteration: 95756
loss: 1.0756919384002686,grad_norm: 0.9999998381980024, iteration: 95757
loss: 1.039085030555725,grad_norm: 0.9999994914234301, iteration: 95758
loss: 0.9818690419197083,grad_norm: 0.9887178762802404, iteration: 95759
loss: 0.9672834873199463,grad_norm: 0.9999991455995799, iteration: 95760
loss: 1.0079033374786377,grad_norm: 0.9999993293497877, iteration: 95761
loss: 1.0467368364334106,grad_norm: 0.9999989957408584, iteration: 95762
loss: 0.993888795375824,grad_norm: 0.9999991261858115, iteration: 95763
loss: 0.9974009990692139,grad_norm: 0.9151875076603064, iteration: 95764
loss: 1.01292884349823,grad_norm: 0.9999990913489119, iteration: 95765
loss: 1.0297677516937256,grad_norm: 0.9999992994572562, iteration: 95766
loss: 1.040390133857727,grad_norm: 0.9999993548176356, iteration: 95767
loss: 1.0071076154708862,grad_norm: 0.8461303316616497, iteration: 95768
loss: 1.008610725402832,grad_norm: 0.9999991297912909, iteration: 95769
loss: 0.9957500696182251,grad_norm: 0.9970624668560157, iteration: 95770
loss: 1.0137606859207153,grad_norm: 0.9999992746076185, iteration: 95771
loss: 0.9889029860496521,grad_norm: 0.8919091566725093, iteration: 95772
loss: 1.022383451461792,grad_norm: 0.9999990965518288, iteration: 95773
loss: 0.9884195923805237,grad_norm: 0.9999992269526665, iteration: 95774
loss: 0.9780583381652832,grad_norm: 0.999999231574644, iteration: 95775
loss: 1.0028221607208252,grad_norm: 0.9527944629672188, iteration: 95776
loss: 1.0172098875045776,grad_norm: 0.9999993022537917, iteration: 95777
loss: 1.0676013231277466,grad_norm: 0.9999991425459317, iteration: 95778
loss: 0.9603347778320312,grad_norm: 0.9999992135872611, iteration: 95779
loss: 0.9938762187957764,grad_norm: 0.9197983947241241, iteration: 95780
loss: 0.9718717932701111,grad_norm: 0.9999990774726933, iteration: 95781
loss: 1.0096327066421509,grad_norm: 0.9999990289028275, iteration: 95782
loss: 1.0254169702529907,grad_norm: 0.999999032508746, iteration: 95783
loss: 1.0267646312713623,grad_norm: 0.999999193811308, iteration: 95784
loss: 1.0087956190109253,grad_norm: 0.9055615632281829, iteration: 95785
loss: 1.0239719152450562,grad_norm: 0.9999990090387738, iteration: 95786
loss: 0.9745759963989258,grad_norm: 0.9269170488823504, iteration: 95787
loss: 1.0060949325561523,grad_norm: 0.9999997206126429, iteration: 95788
loss: 1.0526045560836792,grad_norm: 0.9999990475291708, iteration: 95789
loss: 1.007794976234436,grad_norm: 0.9999990750755642, iteration: 95790
loss: 1.007309913635254,grad_norm: 0.9999992180787783, iteration: 95791
loss: 0.9695466756820679,grad_norm: 0.9999992387265668, iteration: 95792
loss: 1.0874079465866089,grad_norm: 0.999999292973227, iteration: 95793
loss: 1.0008426904678345,grad_norm: 0.9999991195378689, iteration: 95794
loss: 0.9939299821853638,grad_norm: 0.9999990917433149, iteration: 95795
loss: 0.9924952983856201,grad_norm: 0.9999989152879404, iteration: 95796
loss: 1.0367176532745361,grad_norm: 0.9999993844605088, iteration: 95797
loss: 0.969693660736084,grad_norm: 0.9999992547593063, iteration: 95798
loss: 1.0382380485534668,grad_norm: 0.9861844208157154, iteration: 95799
loss: 0.9977133274078369,grad_norm: 0.9845160260475526, iteration: 95800
loss: 0.9942023754119873,grad_norm: 0.9999991486013817, iteration: 95801
loss: 0.9977236986160278,grad_norm: 0.9999991057832731, iteration: 95802
loss: 1.0330411195755005,grad_norm: 0.9492403958208606, iteration: 95803
loss: 1.0256186723709106,grad_norm: 0.9999991353643461, iteration: 95804
loss: 0.9930196404457092,grad_norm: 0.9999991662054528, iteration: 95805
loss: 0.9862827658653259,grad_norm: 0.957606540867226, iteration: 95806
loss: 1.0081640481948853,grad_norm: 0.8906644593496899, iteration: 95807
loss: 0.9890167713165283,grad_norm: 0.9999990630892139, iteration: 95808
loss: 1.014335036277771,grad_norm: 0.999999274904948, iteration: 95809
loss: 1.0175962448120117,grad_norm: 0.9999996253761567, iteration: 95810
loss: 0.9577440023422241,grad_norm: 0.8972745047126338, iteration: 95811
loss: 1.0083142518997192,grad_norm: 0.9796818551496258, iteration: 95812
loss: 1.0182337760925293,grad_norm: 0.9876950018222597, iteration: 95813
loss: 1.024527668952942,grad_norm: 0.999999179899288, iteration: 95814
loss: 0.9853910207748413,grad_norm: 0.9999991723527446, iteration: 95815
loss: 0.9596703052520752,grad_norm: 0.9999992235916074, iteration: 95816
loss: 0.9641445875167847,grad_norm: 0.9999991627747665, iteration: 95817
loss: 1.033860445022583,grad_norm: 0.9999992382327739, iteration: 95818
loss: 1.001258134841919,grad_norm: 0.9999992111471543, iteration: 95819
loss: 1.0092995166778564,grad_norm: 0.9999990058295792, iteration: 95820
loss: 1.0605659484863281,grad_norm: 0.9999999030742257, iteration: 95821
loss: 1.0111520290374756,grad_norm: 0.9277242100333171, iteration: 95822
loss: 1.010335087776184,grad_norm: 0.9999991215673516, iteration: 95823
loss: 0.9519037008285522,grad_norm: 0.9999992149930433, iteration: 95824
loss: 1.0003451108932495,grad_norm: 0.8530292896253211, iteration: 95825
loss: 0.9786577224731445,grad_norm: 0.9999992162699212, iteration: 95826
loss: 1.04379403591156,grad_norm: 0.9999990154415066, iteration: 95827
loss: 1.023372769355774,grad_norm: 0.9999991043593144, iteration: 95828
loss: 0.9939209222793579,grad_norm: 0.9207756499287819, iteration: 95829
loss: 0.9949080944061279,grad_norm: 0.9999999317237898, iteration: 95830
loss: 0.9717015027999878,grad_norm: 0.9999992909305178, iteration: 95831
loss: 0.9863287806510925,grad_norm: 0.9999992337126228, iteration: 95832
loss: 0.9903075695037842,grad_norm: 0.9999991132985454, iteration: 95833
loss: 1.0107530355453491,grad_norm: 0.9999990586702138, iteration: 95834
loss: 1.0564395189285278,grad_norm: 0.9999998241183692, iteration: 95835
loss: 1.0042619705200195,grad_norm: 0.9999992012571811, iteration: 95836
loss: 1.038649320602417,grad_norm: 0.9999991540304692, iteration: 95837
loss: 0.9694928526878357,grad_norm: 0.9999991764538375, iteration: 95838
loss: 1.037681221961975,grad_norm: 0.9999993309729888, iteration: 95839
loss: 1.0275505781173706,grad_norm: 0.9700521369550261, iteration: 95840
loss: 0.9876700639724731,grad_norm: 0.9999992533125641, iteration: 95841
loss: 1.0305050611495972,grad_norm: 0.9999996342510179, iteration: 95842
loss: 0.9891233444213867,grad_norm: 0.979860702494118, iteration: 95843
loss: 1.0298314094543457,grad_norm: 0.999999067510843, iteration: 95844
loss: 0.9929710626602173,grad_norm: 0.972778188639896, iteration: 95845
loss: 0.9706045985221863,grad_norm: 0.9999990341507269, iteration: 95846
loss: 1.2129955291748047,grad_norm: 0.999999811474852, iteration: 95847
loss: 1.0180505514144897,grad_norm: 0.9053298882228369, iteration: 95848
loss: 0.9889146685600281,grad_norm: 0.9999993145432515, iteration: 95849
loss: 1.1180217266082764,grad_norm: 0.9999997193553536, iteration: 95850
loss: 0.99576336145401,grad_norm: 0.9999992944037841, iteration: 95851
loss: 1.01762056350708,grad_norm: 0.9999991452106765, iteration: 95852
loss: 0.9726809859275818,grad_norm: 0.9999990276658557, iteration: 95853
loss: 1.0256227254867554,grad_norm: 0.9999992514486972, iteration: 95854
loss: 0.9874330163002014,grad_norm: 0.9999991755206312, iteration: 95855
loss: 1.022312879562378,grad_norm: 0.9999991784224359, iteration: 95856
loss: 1.0292882919311523,grad_norm: 0.99999909270891, iteration: 95857
loss: 0.9752829670906067,grad_norm: 0.999999131841542, iteration: 95858
loss: 0.9800463914871216,grad_norm: 0.999999251905998, iteration: 95859
loss: 1.093444585800171,grad_norm: 0.9999998296755254, iteration: 95860
loss: 0.999411940574646,grad_norm: 0.9999993160405939, iteration: 95861
loss: 1.0064606666564941,grad_norm: 0.9527551619065552, iteration: 95862
loss: 1.0023938417434692,grad_norm: 0.8421601063626788, iteration: 95863
loss: 0.9974700808525085,grad_norm: 0.9999992478032782, iteration: 95864
loss: 0.9919622540473938,grad_norm: 0.9967604838065982, iteration: 95865
loss: 0.9973371028900146,grad_norm: 0.9999991779252877, iteration: 95866
loss: 0.9878827929496765,grad_norm: 0.9999990815961428, iteration: 95867
loss: 0.9979422092437744,grad_norm: 0.9999991930797985, iteration: 95868
loss: 1.0099148750305176,grad_norm: 0.9524257140262699, iteration: 95869
loss: 0.9994198083877563,grad_norm: 0.968146886146648, iteration: 95870
loss: 0.9971412420272827,grad_norm: 0.99999928590679, iteration: 95871
loss: 1.0285520553588867,grad_norm: 0.9999991169477181, iteration: 95872
loss: 1.0188322067260742,grad_norm: 0.9999990938894563, iteration: 95873
loss: 1.0039176940917969,grad_norm: 0.9999994285435723, iteration: 95874
loss: 0.990977942943573,grad_norm: 0.9999994033460423, iteration: 95875
loss: 1.0240098237991333,grad_norm: 0.9999994386432277, iteration: 95876
loss: 1.0898936986923218,grad_norm: 0.999999619696383, iteration: 95877
loss: 1.0219991207122803,grad_norm: 0.9999996863498489, iteration: 95878
loss: 1.0601673126220703,grad_norm: 0.9999999625121614, iteration: 95879
loss: 1.0120495557785034,grad_norm: 0.9999989987005753, iteration: 95880
loss: 1.0396910905838013,grad_norm: 0.9999993627146299, iteration: 95881
loss: 0.9749072790145874,grad_norm: 0.9422752316431231, iteration: 95882
loss: 0.9909067153930664,grad_norm: 0.9746207995670164, iteration: 95883
loss: 0.9635151624679565,grad_norm: 0.9469276414735662, iteration: 95884
loss: 1.0140348672866821,grad_norm: 0.9999990239517672, iteration: 95885
loss: 1.0381214618682861,grad_norm: 0.9570690205299828, iteration: 95886
loss: 1.0137715339660645,grad_norm: 0.9999990579820676, iteration: 95887
loss: 1.0140691995620728,grad_norm: 0.9339687589238886, iteration: 95888
loss: 1.0036060810089111,grad_norm: 0.9631490119033532, iteration: 95889
loss: 1.0253981351852417,grad_norm: 0.9999993026046179, iteration: 95890
loss: 1.1159048080444336,grad_norm: 0.9999998997171052, iteration: 95891
loss: 1.0371367931365967,grad_norm: 0.9744703951819769, iteration: 95892
loss: 1.0362130403518677,grad_norm: 0.9999992193001669, iteration: 95893
loss: 1.1241341829299927,grad_norm: 0.9999997214612275, iteration: 95894
loss: 1.1731593608856201,grad_norm: 0.999999585560567, iteration: 95895
loss: 1.0240222215652466,grad_norm: 0.9999990643454227, iteration: 95896
loss: 1.0229610204696655,grad_norm: 0.9439375772423401, iteration: 95897
loss: 0.9947108030319214,grad_norm: 0.8946415404368417, iteration: 95898
loss: 1.0098696947097778,grad_norm: 0.999999104900907, iteration: 95899
loss: 1.0075584650039673,grad_norm: 0.973117080462007, iteration: 95900
loss: 1.1577110290527344,grad_norm: 0.9999995494055909, iteration: 95901
loss: 1.2088087797164917,grad_norm: 0.9999996163282635, iteration: 95902
loss: 0.9941462278366089,grad_norm: 0.9999994107043714, iteration: 95903
loss: 1.2606481313705444,grad_norm: 0.9999997172401489, iteration: 95904
loss: 1.0208990573883057,grad_norm: 0.9999990405425752, iteration: 95905
loss: 1.0523656606674194,grad_norm: 0.9999992803460919, iteration: 95906
loss: 0.9885982275009155,grad_norm: 0.9999990792816768, iteration: 95907
loss: 1.1287473440170288,grad_norm: 0.9999994854168696, iteration: 95908
loss: 1.0182477235794067,grad_norm: 0.9999993501327675, iteration: 95909
loss: 1.0360573530197144,grad_norm: 0.9999992091976111, iteration: 95910
loss: 1.0279186964035034,grad_norm: 0.9999991073095518, iteration: 95911
loss: 1.145444631576538,grad_norm: 0.9999996846893162, iteration: 95912
loss: 1.0207812786102295,grad_norm: 0.9999993070497323, iteration: 95913
loss: 1.0132120847702026,grad_norm: 0.9999990875739585, iteration: 95914
loss: 0.9893598556518555,grad_norm: 0.9999990272534408, iteration: 95915
loss: 0.9721510410308838,grad_norm: 0.9999992044744224, iteration: 95916
loss: 1.0029950141906738,grad_norm: 0.9999992414460503, iteration: 95917
loss: 0.9820981621742249,grad_norm: 0.9999991149281805, iteration: 95918
loss: 1.0318065881729126,grad_norm: 0.9999989761941516, iteration: 95919
loss: 1.0343291759490967,grad_norm: 0.9999990761705448, iteration: 95920
loss: 0.9819971919059753,grad_norm: 0.8550168815433001, iteration: 95921
loss: 0.9858987927436829,grad_norm: 0.9949531598070221, iteration: 95922
loss: 0.9886889457702637,grad_norm: 0.9475946554116628, iteration: 95923
loss: 1.009361982345581,grad_norm: 0.999999175875881, iteration: 95924
loss: 0.9782323241233826,grad_norm: 0.9999992410822086, iteration: 95925
loss: 1.1182379722595215,grad_norm: 0.9999995147100416, iteration: 95926
loss: 0.9989650845527649,grad_norm: 0.9624827649525451, iteration: 95927
loss: 1.051261305809021,grad_norm: 0.9999994514819925, iteration: 95928
loss: 0.995702862739563,grad_norm: 0.9999991331255449, iteration: 95929
loss: 1.0040234327316284,grad_norm: 0.9999999491585925, iteration: 95930
loss: 0.9920443296432495,grad_norm: 0.9822536942570582, iteration: 95931
loss: 0.9614988565444946,grad_norm: 0.9999989517236557, iteration: 95932
loss: 0.993141233921051,grad_norm: 0.9999990838587236, iteration: 95933
loss: 1.0119044780731201,grad_norm: 0.9999992452301856, iteration: 95934
loss: 0.9957977533340454,grad_norm: 0.9999991943857175, iteration: 95935
loss: 0.9737050533294678,grad_norm: 0.9999991620066311, iteration: 95936
loss: 1.0243823528289795,grad_norm: 0.9298831786998898, iteration: 95937
loss: 0.9905151724815369,grad_norm: 0.9999992400470915, iteration: 95938
loss: 1.0020309686660767,grad_norm: 0.9999990094571048, iteration: 95939
loss: 1.0232938528060913,grad_norm: 0.9999990899466509, iteration: 95940
loss: 0.997574508190155,grad_norm: 0.9515760361811849, iteration: 95941
loss: 0.9632708430290222,grad_norm: 0.8670100903076746, iteration: 95942
loss: 1.0260460376739502,grad_norm: 0.9999990701778337, iteration: 95943
loss: 1.0875524282455444,grad_norm: 0.9999992752059736, iteration: 95944
loss: 0.9858996868133545,grad_norm: 0.9999991313245974, iteration: 95945
loss: 0.9660466909408569,grad_norm: 0.9999991567922408, iteration: 95946
loss: 0.9994338154792786,grad_norm: 0.9859545505490086, iteration: 95947
loss: 1.0090832710266113,grad_norm: 0.9999991440866728, iteration: 95948
loss: 1.0480833053588867,grad_norm: 0.999999795824449, iteration: 95949
loss: 1.011181354522705,grad_norm: 0.9721943849764612, iteration: 95950
loss: 0.9840664267539978,grad_norm: 0.9999992481692513, iteration: 95951
loss: 0.9969791173934937,grad_norm: 0.9999991231102716, iteration: 95952
loss: 0.9832178950309753,grad_norm: 0.9999991981496358, iteration: 95953
loss: 1.0306785106658936,grad_norm: 0.9999991926706028, iteration: 95954
loss: 1.018561601638794,grad_norm: 0.9558778744221517, iteration: 95955
loss: 0.9941836595535278,grad_norm: 0.9819951110309355, iteration: 95956
loss: 0.9900089502334595,grad_norm: 0.9023187870493053, iteration: 95957
loss: 0.9819409251213074,grad_norm: 0.9999991086449128, iteration: 95958
loss: 1.040982961654663,grad_norm: 0.9999990671300265, iteration: 95959
loss: 1.0546951293945312,grad_norm: 0.9999992283278245, iteration: 95960
loss: 0.9883202314376831,grad_norm: 0.9999992407210428, iteration: 95961
loss: 1.028861165046692,grad_norm: 0.9999993453805384, iteration: 95962
loss: 1.0269278287887573,grad_norm: 0.9999992870721878, iteration: 95963
loss: 0.9779661297798157,grad_norm: 0.999998938137629, iteration: 95964
loss: 0.9961045384407043,grad_norm: 0.9999990961067131, iteration: 95965
loss: 1.004319190979004,grad_norm: 0.9343360226596439, iteration: 95966
loss: 0.9952595829963684,grad_norm: 0.8863596492953015, iteration: 95967
loss: 1.0050384998321533,grad_norm: 0.9781140877390279, iteration: 95968
loss: 0.9956739544868469,grad_norm: 0.9999992780720671, iteration: 95969
loss: 1.0379841327667236,grad_norm: 0.9999993815194694, iteration: 95970
loss: 0.9604650139808655,grad_norm: 0.9999991007891422, iteration: 95971
loss: 0.9891888499259949,grad_norm: 0.9177066057321404, iteration: 95972
loss: 1.025689721107483,grad_norm: 0.9999993306736586, iteration: 95973
loss: 0.9698696136474609,grad_norm: 0.9899111996759065, iteration: 95974
loss: 0.9964627623558044,grad_norm: 0.9384230433327228, iteration: 95975
loss: 0.9796915650367737,grad_norm: 0.9999993325958713, iteration: 95976
loss: 1.0080692768096924,grad_norm: 0.9068103431595247, iteration: 95977
loss: 1.0304332971572876,grad_norm: 0.9999993818320699, iteration: 95978
loss: 0.9984345436096191,grad_norm: 0.9999993004251099, iteration: 95979
loss: 1.0057520866394043,grad_norm: 0.9999991495070206, iteration: 95980
loss: 1.0395028591156006,grad_norm: 0.999998998710975, iteration: 95981
loss: 0.9593126773834229,grad_norm: 0.9999994794845302, iteration: 95982
loss: 0.9725217223167419,grad_norm: 0.9999989360075865, iteration: 95983
loss: 0.9782262444496155,grad_norm: 0.9999991909264809, iteration: 95984
loss: 1.0285485982894897,grad_norm: 0.9034325289050054, iteration: 95985
loss: 1.0712312459945679,grad_norm: 0.9999993541153561, iteration: 95986
loss: 1.0129268169403076,grad_norm: 0.9999990739135145, iteration: 95987
loss: 1.0546547174453735,grad_norm: 0.9999994860357829, iteration: 95988
loss: 1.0417174100875854,grad_norm: 0.9999994356324138, iteration: 95989
loss: 1.0137677192687988,grad_norm: 0.9023726631311964, iteration: 95990
loss: 1.0146610736846924,grad_norm: 0.927419514298848, iteration: 95991
loss: 0.9957608580589294,grad_norm: 0.9214748821686949, iteration: 95992
loss: 0.9700732231140137,grad_norm: 0.9999992287795176, iteration: 95993
loss: 1.028775930404663,grad_norm: 0.9999992471097886, iteration: 95994
loss: 0.9699344038963318,grad_norm: 0.8797489429468026, iteration: 95995
loss: 0.9828998446464539,grad_norm: 0.9999991715892625, iteration: 95996
loss: 1.0068387985229492,grad_norm: 0.9999991347798594, iteration: 95997
loss: 1.0012606382369995,grad_norm: 0.9999991290580306, iteration: 95998
loss: 1.0271660089492798,grad_norm: 0.863668551042535, iteration: 95999
loss: 1.037111759185791,grad_norm: 0.9999989624868078, iteration: 96000
loss: 1.0063248872756958,grad_norm: 0.8802154100668007, iteration: 96001
loss: 1.0249894857406616,grad_norm: 0.9999990385579393, iteration: 96002
loss: 0.9874356985092163,grad_norm: 0.9466039980625502, iteration: 96003
loss: 1.0102996826171875,grad_norm: 0.9999992532943422, iteration: 96004
loss: 1.0134690999984741,grad_norm: 0.9999991247929615, iteration: 96005
loss: 0.966189444065094,grad_norm: 0.9999994703079267, iteration: 96006
loss: 1.0019513368606567,grad_norm: 0.9999991673246953, iteration: 96007
loss: 0.9792216420173645,grad_norm: 0.9999991486263516, iteration: 96008
loss: 1.03392493724823,grad_norm: 0.9999998176338953, iteration: 96009
loss: 0.9961312413215637,grad_norm: 0.9999992669998313, iteration: 96010
loss: 0.9721721410751343,grad_norm: 0.9999994846512035, iteration: 96011
loss: 0.9977021217346191,grad_norm: 0.9999991674412682, iteration: 96012
loss: 1.027705430984497,grad_norm: 0.9689604119038229, iteration: 96013
loss: 1.0010396242141724,grad_norm: 0.9589141024069154, iteration: 96014
loss: 0.9971935749053955,grad_norm: 0.9999990635698272, iteration: 96015
loss: 1.016334891319275,grad_norm: 0.9999991071216546, iteration: 96016
loss: 0.9797447323799133,grad_norm: 0.9999992010158892, iteration: 96017
loss: 0.9815662503242493,grad_norm: 0.9999990785532328, iteration: 96018
loss: 0.9806009531021118,grad_norm: 0.9547432941091292, iteration: 96019
loss: 1.0315489768981934,grad_norm: 0.9999990558234965, iteration: 96020
loss: 1.0260086059570312,grad_norm: 0.9349677519249441, iteration: 96021
loss: 1.0166996717453003,grad_norm: 0.9625135539138407, iteration: 96022
loss: 0.9821687340736389,grad_norm: 0.9999990329922847, iteration: 96023
loss: 1.00321364402771,grad_norm: 0.9999990507120089, iteration: 96024
loss: 0.9669581055641174,grad_norm: 0.9687000642403348, iteration: 96025
loss: 0.9847939610481262,grad_norm: 0.9999991092811775, iteration: 96026
loss: 1.009677529335022,grad_norm: 0.9999989631489041, iteration: 96027
loss: 1.0165228843688965,grad_norm: 0.9999990934621136, iteration: 96028
loss: 0.9705755114555359,grad_norm: 0.9999992412795103, iteration: 96029
loss: 1.0266004800796509,grad_norm: 0.9999992408154295, iteration: 96030
loss: 0.9724204540252686,grad_norm: 0.999999275912583, iteration: 96031
loss: 1.0161455869674683,grad_norm: 0.9109397679041542, iteration: 96032
loss: 0.9865706562995911,grad_norm: 0.9999993542126038, iteration: 96033
loss: 1.0123950242996216,grad_norm: 0.9999992650679722, iteration: 96034
loss: 1.0031054019927979,grad_norm: 0.9999990959433708, iteration: 96035
loss: 1.0178829431533813,grad_norm: 0.9999991641271043, iteration: 96036
loss: 0.9972748756408691,grad_norm: 0.8478056599865487, iteration: 96037
loss: 1.0052334070205688,grad_norm: 0.9999991867690747, iteration: 96038
loss: 1.0010939836502075,grad_norm: 0.9999991239718319, iteration: 96039
loss: 1.0103496313095093,grad_norm: 0.8550785983088514, iteration: 96040
loss: 1.022769570350647,grad_norm: 0.9999994501009836, iteration: 96041
loss: 0.9798908829689026,grad_norm: 0.9999991092345081, iteration: 96042
loss: 1.0155900716781616,grad_norm: 0.9999992102022961, iteration: 96043
loss: 0.9863958358764648,grad_norm: 0.9777443587624023, iteration: 96044
loss: 1.0100599527359009,grad_norm: 0.9498954315427299, iteration: 96045
loss: 1.0069457292556763,grad_norm: 0.9157993302988279, iteration: 96046
loss: 0.9937582015991211,grad_norm: 0.9999990394587516, iteration: 96047
loss: 0.9755497574806213,grad_norm: 0.9847191833893907, iteration: 96048
loss: 1.0259556770324707,grad_norm: 0.8561345732502444, iteration: 96049
loss: 1.1480802297592163,grad_norm: 0.9999995833177678, iteration: 96050
loss: 1.012120008468628,grad_norm: 0.948349067497003, iteration: 96051
loss: 1.0079346895217896,grad_norm: 0.9578644347534572, iteration: 96052
loss: 0.9849475622177124,grad_norm: 0.9999990668893638, iteration: 96053
loss: 1.024479627609253,grad_norm: 0.9123441568948603, iteration: 96054
loss: 0.9996024966239929,grad_norm: 0.9999991979722587, iteration: 96055
loss: 1.0211780071258545,grad_norm: 0.8693025729442796, iteration: 96056
loss: 1.0025444030761719,grad_norm: 0.9999990841053217, iteration: 96057
loss: 0.9834194183349609,grad_norm: 0.9999989676565593, iteration: 96058
loss: 1.0090864896774292,grad_norm: 0.9447968468375287, iteration: 96059
loss: 1.0051720142364502,grad_norm: 0.9976161690515094, iteration: 96060
loss: 1.0118818283081055,grad_norm: 0.8216166089646891, iteration: 96061
loss: 1.0014911890029907,grad_norm: 0.9999990732228448, iteration: 96062
loss: 0.9895434975624084,grad_norm: 0.9999991187531191, iteration: 96063
loss: 1.0006353855133057,grad_norm: 0.9394515028184097, iteration: 96064
loss: 1.0177732706069946,grad_norm: 0.9999992630773136, iteration: 96065
loss: 1.0233938694000244,grad_norm: 0.9999991020298759, iteration: 96066
loss: 1.015398621559143,grad_norm: 0.9999992555176592, iteration: 96067
loss: 1.0507526397705078,grad_norm: 0.9999992716072598, iteration: 96068
loss: 1.0329339504241943,grad_norm: 0.9999993219320336, iteration: 96069
loss: 0.9893002510070801,grad_norm: 0.9999992407460288, iteration: 96070
loss: 0.9618449211120605,grad_norm: 0.9999990274169648, iteration: 96071
loss: 1.0056132078170776,grad_norm: 0.9999993976037248, iteration: 96072
loss: 1.0343126058578491,grad_norm: 0.999999020611382, iteration: 96073
loss: 0.9768596291542053,grad_norm: 0.9999989843128476, iteration: 96074
loss: 0.9741635918617249,grad_norm: 0.9999991564781638, iteration: 96075
loss: 0.9450481534004211,grad_norm: 0.9999991222104735, iteration: 96076
loss: 1.0237205028533936,grad_norm: 0.9999991394038339, iteration: 96077
loss: 0.969978392124176,grad_norm: 0.9972062131427324, iteration: 96078
loss: 1.0124708414077759,grad_norm: 0.9736866558366535, iteration: 96079
loss: 0.995469331741333,grad_norm: 0.999999233963972, iteration: 96080
loss: 0.9794757962226868,grad_norm: 0.9697101349087276, iteration: 96081
loss: 0.9818017482757568,grad_norm: 0.9322023294662596, iteration: 96082
loss: 0.9904230833053589,grad_norm: 0.999999220628794, iteration: 96083
loss: 0.9988000988960266,grad_norm: 0.9385757464384114, iteration: 96084
loss: 1.0138938426971436,grad_norm: 0.9999991967103676, iteration: 96085
loss: 1.0207384824752808,grad_norm: 0.9999993093723065, iteration: 96086
loss: 0.9913580417633057,grad_norm: 0.999999170149523, iteration: 96087
loss: 1.0305615663528442,grad_norm: 0.9483462274511728, iteration: 96088
loss: 1.012749433517456,grad_norm: 0.9999992212559097, iteration: 96089
loss: 0.9790785312652588,grad_norm: 0.9999992210970492, iteration: 96090
loss: 1.037765622138977,grad_norm: 0.9999990294722767, iteration: 96091
loss: 0.9423717856407166,grad_norm: 0.9999991741845469, iteration: 96092
loss: 1.0312670469284058,grad_norm: 0.9999993770821922, iteration: 96093
loss: 1.011027455329895,grad_norm: 0.8724423354412989, iteration: 96094
loss: 0.9963065981864929,grad_norm: 0.9661767137292456, iteration: 96095
loss: 1.0368669033050537,grad_norm: 0.999999135931476, iteration: 96096
loss: 1.0046128034591675,grad_norm: 0.9999991224794589, iteration: 96097
loss: 1.0332649946212769,grad_norm: 0.9420922755485424, iteration: 96098
loss: 1.0110281705856323,grad_norm: 0.9464373387467966, iteration: 96099
loss: 0.9897857904434204,grad_norm: 0.9999991387514281, iteration: 96100
loss: 0.985822319984436,grad_norm: 0.9999991682358469, iteration: 96101
loss: 1.0704439878463745,grad_norm: 0.9999995981914089, iteration: 96102
loss: 1.0047610998153687,grad_norm: 0.9982355868627537, iteration: 96103
loss: 0.9826257228851318,grad_norm: 0.9999991702122044, iteration: 96104
loss: 1.0300137996673584,grad_norm: 0.9340253841713456, iteration: 96105
loss: 0.9736730456352234,grad_norm: 0.9153565830280703, iteration: 96106
loss: 1.0078176259994507,grad_norm: 0.9857201661949017, iteration: 96107
loss: 1.0184054374694824,grad_norm: 0.9999990631045428, iteration: 96108
loss: 0.9970431923866272,grad_norm: 0.9999993349305503, iteration: 96109
loss: 0.9948943853378296,grad_norm: 0.999999177832099, iteration: 96110
loss: 1.0100001096725464,grad_norm: 0.999999048893432, iteration: 96111
loss: 0.9878666400909424,grad_norm: 0.9999991147972809, iteration: 96112
loss: 1.0208593606948853,grad_norm: 0.9999990121189789, iteration: 96113
loss: 1.0469034910202026,grad_norm: 0.9999991233382833, iteration: 96114
loss: 0.9854970574378967,grad_norm: 0.9999991557256069, iteration: 96115
loss: 0.9794702529907227,grad_norm: 0.9999992503099414, iteration: 96116
loss: 1.013510823249817,grad_norm: 0.9999993176646192, iteration: 96117
loss: 1.0182547569274902,grad_norm: 0.9766466241262134, iteration: 96118
loss: 1.0492581129074097,grad_norm: 0.8917339629156015, iteration: 96119
loss: 1.0325872898101807,grad_norm: 0.9999995465251152, iteration: 96120
loss: 1.0201221704483032,grad_norm: 0.9868227585957645, iteration: 96121
loss: 0.9922510385513306,grad_norm: 0.9999991163984623, iteration: 96122
loss: 0.9938607215881348,grad_norm: 0.9999990830285094, iteration: 96123
loss: 1.0053924322128296,grad_norm: 0.9999991661106175, iteration: 96124
loss: 0.9917002320289612,grad_norm: 0.9999992199839434, iteration: 96125
loss: 0.9780215620994568,grad_norm: 0.999999206228334, iteration: 96126
loss: 1.0154333114624023,grad_norm: 0.9874908345410335, iteration: 96127
loss: 1.006094217300415,grad_norm: 0.9205898402596984, iteration: 96128
loss: 1.0302938222885132,grad_norm: 0.9949247505185543, iteration: 96129
loss: 0.9869940280914307,grad_norm: 0.9999990937135853, iteration: 96130
loss: 1.0143529176712036,grad_norm: 0.8907493213071453, iteration: 96131
loss: 0.9929596781730652,grad_norm: 0.9625117054290587, iteration: 96132
loss: 0.9926549196243286,grad_norm: 0.999999033219934, iteration: 96133
loss: 0.9984795451164246,grad_norm: 0.9871357597055597, iteration: 96134
loss: 1.0040805339813232,grad_norm: 0.999999162243653, iteration: 96135
loss: 0.9934168457984924,grad_norm: 0.9655084921740613, iteration: 96136
loss: 0.9768504500389099,grad_norm: 0.9337402209333099, iteration: 96137
loss: 0.9868925213813782,grad_norm: 0.9999992124220937, iteration: 96138
loss: 1.021910548210144,grad_norm: 0.9999991651969087, iteration: 96139
loss: 1.0231983661651611,grad_norm: 0.9145735276938234, iteration: 96140
loss: 0.9912104606628418,grad_norm: 0.99999905319203, iteration: 96141
loss: 1.0435242652893066,grad_norm: 0.9999992485521838, iteration: 96142
loss: 0.9992169141769409,grad_norm: 0.9999992213336832, iteration: 96143
loss: 0.9986960291862488,grad_norm: 0.9999991602395186, iteration: 96144
loss: 1.0739587545394897,grad_norm: 0.9999992668220503, iteration: 96145
loss: 0.9961029291152954,grad_norm: 0.9999993167522483, iteration: 96146
loss: 1.0075047016143799,grad_norm: 0.964586117670495, iteration: 96147
loss: 0.9884569644927979,grad_norm: 0.9622567825472422, iteration: 96148
loss: 1.05659818649292,grad_norm: 0.9999996506092973, iteration: 96149
loss: 0.9736478924751282,grad_norm: 0.9999990746188047, iteration: 96150
loss: 1.0142475366592407,grad_norm: 0.9999991659042408, iteration: 96151
loss: 0.990501344203949,grad_norm: 0.9564043302685269, iteration: 96152
loss: 1.030845046043396,grad_norm: 0.9999990962755146, iteration: 96153
loss: 1.0763345956802368,grad_norm: 0.9999993312000863, iteration: 96154
loss: 1.0394593477249146,grad_norm: 0.8376327949535999, iteration: 96155
loss: 0.9745889902114868,grad_norm: 0.9999991903872416, iteration: 96156
loss: 0.955569326877594,grad_norm: 0.9999990959469519, iteration: 96157
loss: 1.012279987335205,grad_norm: 0.9999993074523594, iteration: 96158
loss: 0.9734192490577698,grad_norm: 0.999999182999273, iteration: 96159
loss: 1.0362693071365356,grad_norm: 0.9999991986615857, iteration: 96160
loss: 0.9762149453163147,grad_norm: 0.99999924259341, iteration: 96161
loss: 0.9954466819763184,grad_norm: 0.9796412661667585, iteration: 96162
loss: 1.0200221538543701,grad_norm: 0.9346615176047639, iteration: 96163
loss: 1.0046180486679077,grad_norm: 0.999999310627343, iteration: 96164
loss: 1.0268491506576538,grad_norm: 0.8666852836997522, iteration: 96165
loss: 0.9959262013435364,grad_norm: 0.9999988904861647, iteration: 96166
loss: 0.9967535138130188,grad_norm: 0.9279440774535967, iteration: 96167
loss: 1.0390843152999878,grad_norm: 0.9999989370884375, iteration: 96168
loss: 0.9693810939788818,grad_norm: 0.9999992400162943, iteration: 96169
loss: 0.9583014249801636,grad_norm: 0.9274384182261776, iteration: 96170
loss: 1.0072122812271118,grad_norm: 0.8422327970610537, iteration: 96171
loss: 1.026384949684143,grad_norm: 0.9999990522521686, iteration: 96172
loss: 0.9876907467842102,grad_norm: 0.9999991982463688, iteration: 96173
loss: 0.9896129369735718,grad_norm: 0.9999992238704184, iteration: 96174
loss: 0.9520898461341858,grad_norm: 0.8768479050145546, iteration: 96175
loss: 1.009878158569336,grad_norm: 0.9999992226963641, iteration: 96176
loss: 0.9906488060951233,grad_norm: 0.9999990622755269, iteration: 96177
loss: 1.0004281997680664,grad_norm: 0.9999990431926961, iteration: 96178
loss: 0.9741114974021912,grad_norm: 0.9999991273968595, iteration: 96179
loss: 1.0156829357147217,grad_norm: 0.9999991642335399, iteration: 96180
loss: 0.9971985220909119,grad_norm: 0.9710039599651917, iteration: 96181
loss: 0.991299569606781,grad_norm: 0.9999993224936042, iteration: 96182
loss: 1.035585880279541,grad_norm: 0.9999992329040003, iteration: 96183
loss: 0.9668760895729065,grad_norm: 0.8852123337370902, iteration: 96184
loss: 1.0276638269424438,grad_norm: 0.9999990082295308, iteration: 96185
loss: 0.9994690418243408,grad_norm: 0.8799659780650535, iteration: 96186
loss: 0.9876275062561035,grad_norm: 0.999999197807686, iteration: 96187
loss: 1.005679726600647,grad_norm: 0.9999989612970925, iteration: 96188
loss: 0.9866508841514587,grad_norm: 0.9506071280642031, iteration: 96189
loss: 0.9608011245727539,grad_norm: 0.9999990149192891, iteration: 96190
loss: 0.9765688180923462,grad_norm: 0.9999989944157501, iteration: 96191
loss: 0.995326817035675,grad_norm: 0.9999992044527696, iteration: 96192
loss: 1.0510673522949219,grad_norm: 0.9999990930763082, iteration: 96193
loss: 1.0064013004302979,grad_norm: 0.9999991128708609, iteration: 96194
loss: 1.0009230375289917,grad_norm: 0.9999992178617277, iteration: 96195
loss: 0.9959139227867126,grad_norm: 0.9363589071782915, iteration: 96196
loss: 0.9653500914573669,grad_norm: 0.9032135191453553, iteration: 96197
loss: 0.9905509948730469,grad_norm: 0.9334440898094534, iteration: 96198
loss: 1.0240274667739868,grad_norm: 0.9999992959967726, iteration: 96199
loss: 1.011523962020874,grad_norm: 0.8842244169714808, iteration: 96200
loss: 0.964441180229187,grad_norm: 0.9999990933273408, iteration: 96201
loss: 0.9901654720306396,grad_norm: 0.9999989551678025, iteration: 96202
loss: 0.9872646331787109,grad_norm: 0.9999991815585786, iteration: 96203
loss: 1.013153076171875,grad_norm: 0.9999990487854263, iteration: 96204
loss: 1.0046148300170898,grad_norm: 0.9192731412693348, iteration: 96205
loss: 1.0111862421035767,grad_norm: 0.9999992731014588, iteration: 96206
loss: 0.9767118692398071,grad_norm: 0.9999990483194956, iteration: 96207
loss: 0.9976041316986084,grad_norm: 0.9999991663496746, iteration: 96208
loss: 0.9973692893981934,grad_norm: 0.9695338310495899, iteration: 96209
loss: 0.9401464462280273,grad_norm: 0.8713518578061181, iteration: 96210
loss: 1.0032230615615845,grad_norm: 0.9488789642545441, iteration: 96211
loss: 0.9967161417007446,grad_norm: 0.9999992770784003, iteration: 96212
loss: 0.9773366451263428,grad_norm: 0.999999370258284, iteration: 96213
loss: 0.9876450300216675,grad_norm: 0.9999990650754856, iteration: 96214
loss: 1.0087388753890991,grad_norm: 0.9999991087216038, iteration: 96215
loss: 1.0123859643936157,grad_norm: 0.9999992159858377, iteration: 96216
loss: 0.9620999097824097,grad_norm: 0.9999990368735052, iteration: 96217
loss: 0.9906361103057861,grad_norm: 0.9999991351989261, iteration: 96218
loss: 0.9660758972167969,grad_norm: 0.9623278314049492, iteration: 96219
loss: 1.0161973237991333,grad_norm: 0.9999991365097216, iteration: 96220
loss: 0.9828702211380005,grad_norm: 0.9999991097347228, iteration: 96221
loss: 0.9619535207748413,grad_norm: 0.9296020455035218, iteration: 96222
loss: 0.9773337841033936,grad_norm: 0.9999988947743115, iteration: 96223
loss: 0.9844970703125,grad_norm: 0.9999991862540921, iteration: 96224
loss: 0.9821397662162781,grad_norm: 0.9999990955412875, iteration: 96225
loss: 1.0367393493652344,grad_norm: 0.9999991133568417, iteration: 96226
loss: 1.008712649345398,grad_norm: 0.9999992782296999, iteration: 96227
loss: 0.99932861328125,grad_norm: 0.9999991583769106, iteration: 96228
loss: 0.9873209595680237,grad_norm: 0.9999990740074682, iteration: 96229
loss: 1.0014172792434692,grad_norm: 0.9999991047270576, iteration: 96230
loss: 0.989651083946228,grad_norm: 0.9116293046886385, iteration: 96231
loss: 0.9865381121635437,grad_norm: 0.9613178750425704, iteration: 96232
loss: 1.0204695463180542,grad_norm: 0.9445281361391163, iteration: 96233
loss: 0.9566101431846619,grad_norm: 0.9936710191545326, iteration: 96234
loss: 1.0385648012161255,grad_norm: 0.9999992245577726, iteration: 96235
loss: 1.022545576095581,grad_norm: 0.9999991206784719, iteration: 96236
loss: 1.031922698020935,grad_norm: 0.9828863598979645, iteration: 96237
loss: 0.9844506978988647,grad_norm: 0.9999990557395355, iteration: 96238
loss: 0.9531704783439636,grad_norm: 0.9999990678366656, iteration: 96239
loss: 0.9741070866584778,grad_norm: 0.9999991780799439, iteration: 96240
loss: 1.0105947256088257,grad_norm: 0.9999988465540866, iteration: 96241
loss: 0.9971732497215271,grad_norm: 0.9999992324510737, iteration: 96242
loss: 1.0137580633163452,grad_norm: 0.9999990394643009, iteration: 96243
loss: 0.9743972420692444,grad_norm: 0.9999991198908952, iteration: 96244
loss: 0.9976772665977478,grad_norm: 0.9387907612458816, iteration: 96245
loss: 1.0166363716125488,grad_norm: 0.9999991896814782, iteration: 96246
loss: 1.0136874914169312,grad_norm: 0.9999993262610535, iteration: 96247
loss: 1.0288469791412354,grad_norm: 0.9999991431311396, iteration: 96248
loss: 0.9809116125106812,grad_norm: 0.9583961526731688, iteration: 96249
loss: 0.9630225896835327,grad_norm: 0.9999991340149991, iteration: 96250
loss: 0.9938334226608276,grad_norm: 0.9999991286462225, iteration: 96251
loss: 0.9805805683135986,grad_norm: 0.9999991354194916, iteration: 96252
loss: 1.0116859674453735,grad_norm: 0.9999990634216263, iteration: 96253
loss: 0.9973369836807251,grad_norm: 0.9999992160941569, iteration: 96254
loss: 1.0040209293365479,grad_norm: 0.9999992689229575, iteration: 96255
loss: 0.9864988327026367,grad_norm: 0.9769348835988169, iteration: 96256
loss: 1.0235424041748047,grad_norm: 0.9999990742575094, iteration: 96257
loss: 0.9774259328842163,grad_norm: 0.9999992184853477, iteration: 96258
loss: 0.9797766804695129,grad_norm: 0.9999991582306199, iteration: 96259
loss: 0.9791726469993591,grad_norm: 0.9999991633556199, iteration: 96260
loss: 1.0089315176010132,grad_norm: 0.9999993050657306, iteration: 96261
loss: 0.9854704737663269,grad_norm: 0.9999990292228405, iteration: 96262
loss: 1.0432817935943604,grad_norm: 0.9999992035990924, iteration: 96263
loss: 0.9549648761749268,grad_norm: 0.9999991368096828, iteration: 96264
loss: 0.9910919666290283,grad_norm: 0.9593596524393413, iteration: 96265
loss: 1.0032373666763306,grad_norm: 0.999758234961587, iteration: 96266
loss: 1.0330034494400024,grad_norm: 0.9999989727274398, iteration: 96267
loss: 0.99960857629776,grad_norm: 0.9180732401706181, iteration: 96268
loss: 0.9837868213653564,grad_norm: 0.9999993812477933, iteration: 96269
loss: 0.9792331457138062,grad_norm: 0.9999991921735786, iteration: 96270
loss: 0.9881976842880249,grad_norm: 0.9999990667834142, iteration: 96271
loss: 1.0179234743118286,grad_norm: 0.9999992429202583, iteration: 96272
loss: 1.0172755718231201,grad_norm: 0.8460672736028135, iteration: 96273
loss: 1.017591118812561,grad_norm: 0.9804116638745101, iteration: 96274
loss: 0.9966689944267273,grad_norm: 0.9996880611824545, iteration: 96275
loss: 0.9879503846168518,grad_norm: 0.99999904232197, iteration: 96276
loss: 1.0150054693222046,grad_norm: 0.9999991787343332, iteration: 96277
loss: 0.9718188047409058,grad_norm: 0.9934644367727671, iteration: 96278
loss: 1.002798318862915,grad_norm: 0.9612882994596114, iteration: 96279
loss: 1.0046316385269165,grad_norm: 0.9999993404087615, iteration: 96280
loss: 1.0721631050109863,grad_norm: 0.9999996613333789, iteration: 96281
loss: 0.9983578324317932,grad_norm: 0.9720358158789788, iteration: 96282
loss: 1.036192774772644,grad_norm: 0.9999991166334401, iteration: 96283
loss: 0.9912220239639282,grad_norm: 0.9092823914480672, iteration: 96284
loss: 1.0033305883407593,grad_norm: 0.9548417707199686, iteration: 96285
loss: 1.0234253406524658,grad_norm: 0.9999992441107654, iteration: 96286
loss: 1.007168173789978,grad_norm: 0.9999990129746821, iteration: 96287
loss: 1.0075645446777344,grad_norm: 0.9999991133539146, iteration: 96288
loss: 1.0006986856460571,grad_norm: 0.9999992140724053, iteration: 96289
loss: 0.9879651665687561,grad_norm: 0.9999989693212988, iteration: 96290
loss: 0.9585878849029541,grad_norm: 0.9999991762824763, iteration: 96291
loss: 1.0131239891052246,grad_norm: 0.9999992275511138, iteration: 96292
loss: 0.9646746516227722,grad_norm: 0.9999990915269893, iteration: 96293
loss: 0.9743875861167908,grad_norm: 0.9999988994586632, iteration: 96294
loss: 1.0169830322265625,grad_norm: 0.9999991755547591, iteration: 96295
loss: 1.0196270942687988,grad_norm: 0.999999167874024, iteration: 96296
loss: 1.016439437866211,grad_norm: 0.9999991334465994, iteration: 96297
loss: 1.0091173648834229,grad_norm: 0.9999991193115585, iteration: 96298
loss: 0.9817991256713867,grad_norm: 0.9999993358365241, iteration: 96299
loss: 0.9610598683357239,grad_norm: 0.9984109389507441, iteration: 96300
loss: 0.9771003723144531,grad_norm: 0.9999994703424583, iteration: 96301
loss: 1.0315133333206177,grad_norm: 0.9999991639409329, iteration: 96302
loss: 1.0004216432571411,grad_norm: 0.9576916638558811, iteration: 96303
loss: 0.9722024202346802,grad_norm: 0.9999991834404184, iteration: 96304
loss: 0.9549522399902344,grad_norm: 0.9999990231534821, iteration: 96305
loss: 0.9959235787391663,grad_norm: 0.9999993102828958, iteration: 96306
loss: 1.0297011137008667,grad_norm: 0.999999093550608, iteration: 96307
loss: 0.9749841690063477,grad_norm: 0.9631840060722963, iteration: 96308
loss: 0.9827010631561279,grad_norm: 0.9999990105518469, iteration: 96309
loss: 1.0063073635101318,grad_norm: 0.9780827086202823, iteration: 96310
loss: 0.9855974316596985,grad_norm: 0.9999993349225418, iteration: 96311
loss: 0.9869553446769714,grad_norm: 0.9999991977253894, iteration: 96312
loss: 1.030977487564087,grad_norm: 0.9999992718399919, iteration: 96313
loss: 1.0132416486740112,grad_norm: 0.9236880227720474, iteration: 96314
loss: 0.9863107800483704,grad_norm: 0.9999991797951049, iteration: 96315
loss: 0.9796819090843201,grad_norm: 0.9999990140385931, iteration: 96316
loss: 1.0021239519119263,grad_norm: 0.9498052612857404, iteration: 96317
loss: 1.0067867040634155,grad_norm: 0.999998875962338, iteration: 96318
loss: 1.0088986158370972,grad_norm: 0.9999993481077561, iteration: 96319
loss: 0.9606987833976746,grad_norm: 0.9999991964032857, iteration: 96320
loss: 1.0160168409347534,grad_norm: 0.999998932357916, iteration: 96321
loss: 1.0238398313522339,grad_norm: 0.9735823571732177, iteration: 96322
loss: 1.0048577785491943,grad_norm: 0.9934174401669217, iteration: 96323
loss: 0.9952443838119507,grad_norm: 0.9999991045220172, iteration: 96324
loss: 1.0000361204147339,grad_norm: 0.9577012801689448, iteration: 96325
loss: 1.0244723558425903,grad_norm: 0.9999990651247095, iteration: 96326
loss: 1.0558303594589233,grad_norm: 0.9999991585697007, iteration: 96327
loss: 0.9698813557624817,grad_norm: 0.9999991051786049, iteration: 96328
loss: 1.0060228109359741,grad_norm: 0.9999995740817862, iteration: 96329
loss: 1.0230255126953125,grad_norm: 0.9999990272348137, iteration: 96330
loss: 0.9893560409545898,grad_norm: 0.999999063079622, iteration: 96331
loss: 0.9845357537269592,grad_norm: 0.9999992070610186, iteration: 96332
loss: 1.0141315460205078,grad_norm: 0.9402540412491686, iteration: 96333
loss: 0.9799590110778809,grad_norm: 0.9999990972524538, iteration: 96334
loss: 1.0294215679168701,grad_norm: 0.9999992460753381, iteration: 96335
loss: 0.9894751906394958,grad_norm: 0.9999994516907941, iteration: 96336
loss: 0.9918938279151917,grad_norm: 0.9999990611669461, iteration: 96337
loss: 1.042403221130371,grad_norm: 0.9999991189563666, iteration: 96338
loss: 1.008962869644165,grad_norm: 0.9999990747472155, iteration: 96339
loss: 0.9742371439933777,grad_norm: 0.9999993258480137, iteration: 96340
loss: 1.0123851299285889,grad_norm: 0.9999993528476118, iteration: 96341
loss: 0.9692367315292358,grad_norm: 0.9999992048549526, iteration: 96342
loss: 1.0100986957550049,grad_norm: 0.984630758178574, iteration: 96343
loss: 1.0105403661727905,grad_norm: 0.9999993890684012, iteration: 96344
loss: 1.0025455951690674,grad_norm: 0.9999990995050576, iteration: 96345
loss: 1.0071215629577637,grad_norm: 0.9999992191398186, iteration: 96346
loss: 1.010403037071228,grad_norm: 0.9624290460672565, iteration: 96347
loss: 0.9877204895019531,grad_norm: 0.9999993627016444, iteration: 96348
loss: 0.9572573900222778,grad_norm: 0.9999990269337906, iteration: 96349
loss: 1.0140717029571533,grad_norm: 0.9336479347909358, iteration: 96350
loss: 0.9769699573516846,grad_norm: 0.9579233982850229, iteration: 96351
loss: 1.045593023300171,grad_norm: 0.9999995438131869, iteration: 96352
loss: 0.9909732937812805,grad_norm: 0.9999991986735759, iteration: 96353
loss: 0.9714248776435852,grad_norm: 0.9534861153764546, iteration: 96354
loss: 1.0295944213867188,grad_norm: 0.9999993543934309, iteration: 96355
loss: 0.9842652678489685,grad_norm: 0.9288159317456222, iteration: 96356
loss: 1.0401540994644165,grad_norm: 0.9999991952972279, iteration: 96357
loss: 1.0261883735656738,grad_norm: 0.9999996698567625, iteration: 96358
loss: 0.9937037825584412,grad_norm: 0.9999992750372774, iteration: 96359
loss: 1.0121800899505615,grad_norm: 0.9514612615069096, iteration: 96360
loss: 0.9810402393341064,grad_norm: 0.9229851950320987, iteration: 96361
loss: 1.0075981616973877,grad_norm: 0.9236860465417819, iteration: 96362
loss: 1.0726919174194336,grad_norm: 0.9999994122851975, iteration: 96363
loss: 1.031042218208313,grad_norm: 0.9999992511130548, iteration: 96364
loss: 1.0219980478286743,grad_norm: 0.7642249327269282, iteration: 96365
loss: 0.9927363991737366,grad_norm: 0.9999989741117417, iteration: 96366
loss: 0.963526725769043,grad_norm: 0.9999993121860378, iteration: 96367
loss: 0.9902251958847046,grad_norm: 0.9999991184855214, iteration: 96368
loss: 1.0280190706253052,grad_norm: 0.9999992056635839, iteration: 96369
loss: 1.021246314048767,grad_norm: 0.9999994849070916, iteration: 96370
loss: 0.994549572467804,grad_norm: 0.9999990942608598, iteration: 96371
loss: 1.095719575881958,grad_norm: 0.9999998801845037, iteration: 96372
loss: 0.9867410659790039,grad_norm: 0.987094387781662, iteration: 96373
loss: 0.9883737564086914,grad_norm: 0.999999189848349, iteration: 96374
loss: 0.9940242767333984,grad_norm: 0.9138202849280029, iteration: 96375
loss: 0.9774296283721924,grad_norm: 0.9531659794481283, iteration: 96376
loss: 0.9925432801246643,grad_norm: 0.9999992060617949, iteration: 96377
loss: 0.9587975144386292,grad_norm: 0.9472031977732177, iteration: 96378
loss: 0.9925427436828613,grad_norm: 0.9999990638511383, iteration: 96379
loss: 0.9859458804130554,grad_norm: 0.9720504245701016, iteration: 96380
loss: 0.9925472736358643,grad_norm: 0.9999992694460098, iteration: 96381
loss: 0.9910534620285034,grad_norm: 0.9999989835349127, iteration: 96382
loss: 1.0203619003295898,grad_norm: 0.9999990185186189, iteration: 96383
loss: 1.0576999187469482,grad_norm: 0.9999992990542345, iteration: 96384
loss: 1.0316380262374878,grad_norm: 0.9999992504989912, iteration: 96385
loss: 1.089174747467041,grad_norm: 0.9999990539054201, iteration: 96386
loss: 1.0349942445755005,grad_norm: 0.9414367005643225, iteration: 96387
loss: 1.0222831964492798,grad_norm: 0.9640427465506316, iteration: 96388
loss: 0.9781389236450195,grad_norm: 0.9999990335788573, iteration: 96389
loss: 0.9662278890609741,grad_norm: 0.9999990014402895, iteration: 96390
loss: 1.041127324104309,grad_norm: 0.9999990544422117, iteration: 96391
loss: 1.0213794708251953,grad_norm: 0.9999995123035664, iteration: 96392
loss: 1.0070024728775024,grad_norm: 0.9601813525512823, iteration: 96393
loss: 1.0057144165039062,grad_norm: 0.9999995355442699, iteration: 96394
loss: 0.9979169964790344,grad_norm: 0.999999067601192, iteration: 96395
loss: 0.961833119392395,grad_norm: 0.9999989787544997, iteration: 96396
loss: 1.0207206010818481,grad_norm: 0.9999991817310903, iteration: 96397
loss: 1.047359824180603,grad_norm: 0.9999992778793534, iteration: 96398
loss: 0.9924131035804749,grad_norm: 0.9662223908541768, iteration: 96399
loss: 0.9931818246841431,grad_norm: 0.9556871374418705, iteration: 96400
loss: 0.9686635732650757,grad_norm: 0.9999992907979773, iteration: 96401
loss: 1.0081247091293335,grad_norm: 0.99999915586332, iteration: 96402
loss: 0.9878543019294739,grad_norm: 0.9999989760069399, iteration: 96403
loss: 0.9994997978210449,grad_norm: 0.910822142984231, iteration: 96404
loss: 0.9986006617546082,grad_norm: 0.9999990284001989, iteration: 96405
loss: 0.9986478090286255,grad_norm: 0.9999991536605424, iteration: 96406
loss: 1.014384388923645,grad_norm: 0.9594002367098134, iteration: 96407
loss: 0.9811888337135315,grad_norm: 0.9999991559852888, iteration: 96408
loss: 1.0046321153640747,grad_norm: 0.9999990433642902, iteration: 96409
loss: 1.001417636871338,grad_norm: 0.9999992985286645, iteration: 96410
loss: 1.0151909589767456,grad_norm: 0.9999995291898268, iteration: 96411
loss: 0.9936195015907288,grad_norm: 0.9676642109906455, iteration: 96412
loss: 0.9892536401748657,grad_norm: 0.9999990171111123, iteration: 96413
loss: 0.992043673992157,grad_norm: 0.9584886604527891, iteration: 96414
loss: 1.0782315731048584,grad_norm: 0.9999995819768209, iteration: 96415
loss: 1.0227736234664917,grad_norm: 0.926102800664335, iteration: 96416
loss: 1.0245145559310913,grad_norm: 0.9999993116411084, iteration: 96417
loss: 1.0266526937484741,grad_norm: 0.9999990669550715, iteration: 96418
loss: 0.9833207726478577,grad_norm: 0.9999991398080361, iteration: 96419
loss: 0.9955126643180847,grad_norm: 0.9999992424294801, iteration: 96420
loss: 0.9921696186065674,grad_norm: 0.9670552699710877, iteration: 96421
loss: 1.0190643072128296,grad_norm: 0.9565853742679025, iteration: 96422
loss: 0.9475157856941223,grad_norm: 0.9999991590017079, iteration: 96423
loss: 1.011123776435852,grad_norm: 0.999999107283145, iteration: 96424
loss: 0.9418267011642456,grad_norm: 0.9868493510244948, iteration: 96425
loss: 0.9829077124595642,grad_norm: 0.9005669182495644, iteration: 96426
loss: 1.0195728540420532,grad_norm: 0.8186910335231168, iteration: 96427
loss: 1.0177936553955078,grad_norm: 0.9999993225354534, iteration: 96428
loss: 1.0101045370101929,grad_norm: 0.9999991740864477, iteration: 96429
loss: 1.0033713579177856,grad_norm: 0.993832584642801, iteration: 96430
loss: 1.0588572025299072,grad_norm: 0.9999992704415963, iteration: 96431
loss: 1.0173219442367554,grad_norm: 0.9999991402151317, iteration: 96432
loss: 0.9978698492050171,grad_norm: 0.9999991661796351, iteration: 96433
loss: 0.9753167629241943,grad_norm: 0.9999991310296132, iteration: 96434
loss: 1.0082858800888062,grad_norm: 0.9554162302600752, iteration: 96435
loss: 1.038813829421997,grad_norm: 0.9999992505303263, iteration: 96436
loss: 0.9997091293334961,grad_norm: 0.8789369224826139, iteration: 96437
loss: 0.9764688014984131,grad_norm: 0.9516637736072469, iteration: 96438
loss: 0.9612239599227905,grad_norm: 0.999999100009732, iteration: 96439
loss: 1.0153560638427734,grad_norm: 0.9999991179928125, iteration: 96440
loss: 1.1502947807312012,grad_norm: 0.9999995165669469, iteration: 96441
loss: 1.0564786195755005,grad_norm: 0.9999993713727534, iteration: 96442
loss: 0.9694558382034302,grad_norm: 0.9754537137411833, iteration: 96443
loss: 1.0100789070129395,grad_norm: 0.9999989938521159, iteration: 96444
loss: 1.0144344568252563,grad_norm: 0.9999990604334229, iteration: 96445
loss: 1.0062119960784912,grad_norm: 0.952905210500817, iteration: 96446
loss: 1.0535712242126465,grad_norm: 0.999999286354694, iteration: 96447
loss: 0.9847895503044128,grad_norm: 0.9999990547529102, iteration: 96448
loss: 0.9699978828430176,grad_norm: 0.9978191917502736, iteration: 96449
loss: 0.9666200876235962,grad_norm: 0.999999165393536, iteration: 96450
loss: 0.954251229763031,grad_norm: 0.9999998066788013, iteration: 96451
loss: 1.021620512008667,grad_norm: 0.9999993339768128, iteration: 96452
loss: 1.0217150449752808,grad_norm: 0.9999990229729935, iteration: 96453
loss: 0.9935430288314819,grad_norm: 0.8798143567987479, iteration: 96454
loss: 1.0022345781326294,grad_norm: 0.9999995099415921, iteration: 96455
loss: 1.0039817094802856,grad_norm: 0.999999162901365, iteration: 96456
loss: 1.0046255588531494,grad_norm: 0.7822817102637335, iteration: 96457
loss: 1.0015753507614136,grad_norm: 0.9357407268586855, iteration: 96458
loss: 1.0286391973495483,grad_norm: 0.9999992117274981, iteration: 96459
loss: 1.0147676467895508,grad_norm: 0.9999990283572623, iteration: 96460
loss: 1.265256404876709,grad_norm: 0.9999992370800241, iteration: 96461
loss: 0.9867629408836365,grad_norm: 0.9999990779603715, iteration: 96462
loss: 1.0132678747177124,grad_norm: 0.9507777037927974, iteration: 96463
loss: 1.0465469360351562,grad_norm: 0.9999992512151761, iteration: 96464
loss: 0.9668533205986023,grad_norm: 0.9999991210335398, iteration: 96465
loss: 0.9667642116546631,grad_norm: 0.9994810438063892, iteration: 96466
loss: 1.04168701171875,grad_norm: 0.9999992099602685, iteration: 96467
loss: 0.9905229210853577,grad_norm: 0.999999867381853, iteration: 96468
loss: 1.0115834474563599,grad_norm: 0.9999996228151232, iteration: 96469
loss: 1.0100891590118408,grad_norm: 0.999999113679745, iteration: 96470
loss: 1.0178086757659912,grad_norm: 0.999999221693163, iteration: 96471
loss: 1.00701904296875,grad_norm: 0.9999993229701738, iteration: 96472
loss: 1.0267858505249023,grad_norm: 0.9999991849034454, iteration: 96473
loss: 0.9892784953117371,grad_norm: 0.9999990669169435, iteration: 96474
loss: 0.9958375096321106,grad_norm: 0.9999991150829455, iteration: 96475
loss: 0.996625542640686,grad_norm: 0.9999989014640847, iteration: 96476
loss: 0.9770010709762573,grad_norm: 0.9999991614174435, iteration: 96477
loss: 0.9662831425666809,grad_norm: 0.999999344652466, iteration: 96478
loss: 1.0118235349655151,grad_norm: 0.9999992752289422, iteration: 96479
loss: 0.9773072004318237,grad_norm: 0.9983650035248312, iteration: 96480
loss: 0.9865585565567017,grad_norm: 0.9999990625335125, iteration: 96481
loss: 0.9916847348213196,grad_norm: 0.9953527765570586, iteration: 96482
loss: 0.9967285394668579,grad_norm: 0.9493914224731584, iteration: 96483
loss: 1.0102349519729614,grad_norm: 0.999999282329303, iteration: 96484
loss: 1.0210062265396118,grad_norm: 0.9999992391649403, iteration: 96485
loss: 1.0146576166152954,grad_norm: 0.974830684614303, iteration: 96486
loss: 1.020120620727539,grad_norm: 0.9252157951657702, iteration: 96487
loss: 0.9889247417449951,grad_norm: 0.9766630280491211, iteration: 96488
loss: 1.0427465438842773,grad_norm: 0.9999990386069303, iteration: 96489
loss: 1.0001271963119507,grad_norm: 0.9999992777899118, iteration: 96490
loss: 0.9935832619667053,grad_norm: 0.993535463612494, iteration: 96491
loss: 1.0224840641021729,grad_norm: 0.8789096742890072, iteration: 96492
loss: 1.0213329792022705,grad_norm: 0.9999990194675262, iteration: 96493
loss: 0.9843763113021851,grad_norm: 0.9999990914981367, iteration: 96494
loss: 0.9914511442184448,grad_norm: 0.9999992647354379, iteration: 96495
loss: 1.0279905796051025,grad_norm: 0.9999992814837787, iteration: 96496
loss: 0.9852116703987122,grad_norm: 0.9661400982761589, iteration: 96497
loss: 0.9886496067047119,grad_norm: 0.9999992176310296, iteration: 96498
loss: 0.9989410638809204,grad_norm: 0.9999996670081108, iteration: 96499
loss: 1.0199581384658813,grad_norm: 0.9999990893660895, iteration: 96500
loss: 0.9874681830406189,grad_norm: 0.9999992300576481, iteration: 96501
loss: 0.9852821826934814,grad_norm: 0.9999992195141371, iteration: 96502
loss: 0.9804063439369202,grad_norm: 0.9999991624787729, iteration: 96503
loss: 1.0276916027069092,grad_norm: 0.9122027520723314, iteration: 96504
loss: 0.9939357042312622,grad_norm: 0.9999997715420567, iteration: 96505
loss: 0.985471248626709,grad_norm: 0.999999065645306, iteration: 96506
loss: 1.014334797859192,grad_norm: 0.9999992005305466, iteration: 96507
loss: 0.9918283224105835,grad_norm: 0.9999990856583472, iteration: 96508
loss: 1.0107245445251465,grad_norm: 0.8855773899836771, iteration: 96509
loss: 1.041631817817688,grad_norm: 0.9999992433507068, iteration: 96510
loss: 1.011052131652832,grad_norm: 0.9999990981470777, iteration: 96511
loss: 0.9855468273162842,grad_norm: 0.999999157168941, iteration: 96512
loss: 0.9681137204170227,grad_norm: 0.9999990434216587, iteration: 96513
loss: 1.0304170846939087,grad_norm: 0.9999991144022835, iteration: 96514
loss: 1.0228064060211182,grad_norm: 0.9293878392380881, iteration: 96515
loss: 0.9827536344528198,grad_norm: 0.999999099411359, iteration: 96516
loss: 1.0249217748641968,grad_norm: 0.9848884339544538, iteration: 96517
loss: 0.9791784286499023,grad_norm: 0.9390193854589796, iteration: 96518
loss: 0.9799668192863464,grad_norm: 0.9999990954438052, iteration: 96519
loss: 0.994096040725708,grad_norm: 0.9594299483650265, iteration: 96520
loss: 0.9766915440559387,grad_norm: 0.9999991228111432, iteration: 96521
loss: 1.024053931236267,grad_norm: 0.9999991178973464, iteration: 96522
loss: 1.1161439418792725,grad_norm: 0.9999994009239939, iteration: 96523
loss: 0.9755321145057678,grad_norm: 0.9999991453175027, iteration: 96524
loss: 1.0083547830581665,grad_norm: 0.887756429986338, iteration: 96525
loss: 1.012328028678894,grad_norm: 0.8976846763807368, iteration: 96526
loss: 1.0090596675872803,grad_norm: 0.8974092039091125, iteration: 96527
loss: 1.013723611831665,grad_norm: 0.9999991156570379, iteration: 96528
loss: 0.9731345176696777,grad_norm: 0.9441883395874068, iteration: 96529
loss: 1.0071347951889038,grad_norm: 0.9126492964981034, iteration: 96530
loss: 0.9848504662513733,grad_norm: 0.924530678161597, iteration: 96531
loss: 0.996619701385498,grad_norm: 0.9183582292200592, iteration: 96532
loss: 0.9628965854644775,grad_norm: 0.999998943657197, iteration: 96533
loss: 0.9838005304336548,grad_norm: 0.999999340154458, iteration: 96534
loss: 0.9875602126121521,grad_norm: 0.9999992424368024, iteration: 96535
loss: 0.9860169887542725,grad_norm: 0.9999989967641648, iteration: 96536
loss: 0.994047999382019,grad_norm: 0.9295935674761914, iteration: 96537
loss: 1.0041990280151367,grad_norm: 0.9999991627764959, iteration: 96538
loss: 1.0483620166778564,grad_norm: 0.999999234162174, iteration: 96539
loss: 1.1695854663848877,grad_norm: 0.9999992886659327, iteration: 96540
loss: 1.029036045074463,grad_norm: 0.9999992641749449, iteration: 96541
loss: 0.9792928099632263,grad_norm: 0.8846667632621259, iteration: 96542
loss: 1.0494675636291504,grad_norm: 0.8165426711428344, iteration: 96543
loss: 0.9834901094436646,grad_norm: 0.9999989993613668, iteration: 96544
loss: 0.9877472519874573,grad_norm: 0.9999990740616891, iteration: 96545
loss: 1.010117769241333,grad_norm: 0.9211669769750154, iteration: 96546
loss: 1.0027744770050049,grad_norm: 0.9999991910020848, iteration: 96547
loss: 0.9853795170783997,grad_norm: 0.8469490292395189, iteration: 96548
loss: 1.0118927955627441,grad_norm: 0.999999316082761, iteration: 96549
loss: 1.0212901830673218,grad_norm: 0.9294173201834616, iteration: 96550
loss: 0.9983615875244141,grad_norm: 0.9999991957763033, iteration: 96551
loss: 1.0263326168060303,grad_norm: 0.9999990093398554, iteration: 96552
loss: 0.9799726009368896,grad_norm: 0.9119663325367487, iteration: 96553
loss: 1.0228753089904785,grad_norm: 0.9999990092133861, iteration: 96554
loss: 0.9918912053108215,grad_norm: 0.9999991096489973, iteration: 96555
loss: 1.0068697929382324,grad_norm: 0.9999992040502662, iteration: 96556
loss: 1.0161526203155518,grad_norm: 0.9999991308204009, iteration: 96557
loss: 0.9952083826065063,grad_norm: 0.9999991501220916, iteration: 96558
loss: 0.9950492978096008,grad_norm: 0.8586288855387515, iteration: 96559
loss: 0.9881671071052551,grad_norm: 0.9711141706832519, iteration: 96560
loss: 1.013082504272461,grad_norm: 0.9999991703058185, iteration: 96561
loss: 1.0208306312561035,grad_norm: 0.9230443281862156, iteration: 96562
loss: 1.0488070249557495,grad_norm: 0.9999992503234207, iteration: 96563
loss: 1.009680151939392,grad_norm: 0.9999992316154903, iteration: 96564
loss: 1.0240495204925537,grad_norm: 0.9999994017453869, iteration: 96565
loss: 0.9906681776046753,grad_norm: 0.9944992626900441, iteration: 96566
loss: 1.001183271408081,grad_norm: 0.9143925217320407, iteration: 96567
loss: 1.0337828397750854,grad_norm: 0.9999992678510623, iteration: 96568
loss: 1.0337982177734375,grad_norm: 0.9999997703556045, iteration: 96569
loss: 1.0575133562088013,grad_norm: 0.9505805311506434, iteration: 96570
loss: 0.9617268443107605,grad_norm: 0.9895249037739868, iteration: 96571
loss: 1.001450777053833,grad_norm: 0.9999994331951035, iteration: 96572
loss: 0.9832314252853394,grad_norm: 0.9999990665078983, iteration: 96573
loss: 0.9885182976722717,grad_norm: 0.9669088800093163, iteration: 96574
loss: 0.991676390171051,grad_norm: 0.8657387459121535, iteration: 96575
loss: 1.006138563156128,grad_norm: 0.8442546942962135, iteration: 96576
loss: 0.9886364340782166,grad_norm: 0.8496099347240226, iteration: 96577
loss: 1.0156075954437256,grad_norm: 0.8371240794476753, iteration: 96578
loss: 0.9810328483581543,grad_norm: 0.9594423584387264, iteration: 96579
loss: 1.0097107887268066,grad_norm: 0.958936092142571, iteration: 96580
loss: 0.9889987707138062,grad_norm: 0.9763990309012797, iteration: 96581
loss: 1.009885549545288,grad_norm: 0.9999989470192407, iteration: 96582
loss: 1.0130473375320435,grad_norm: 0.9727839264404112, iteration: 96583
loss: 0.9655367136001587,grad_norm: 0.859971622765709, iteration: 96584
loss: 0.9828542470932007,grad_norm: 0.8805460812661325, iteration: 96585
loss: 0.9827077984809875,grad_norm: 0.999999026380425, iteration: 96586
loss: 0.9723101854324341,grad_norm: 0.9430418274444927, iteration: 96587
loss: 0.9989511370658875,grad_norm: 0.9999991931995256, iteration: 96588
loss: 0.9578527808189392,grad_norm: 0.9942590363379101, iteration: 96589
loss: 0.9722006916999817,grad_norm: 0.9402523959287867, iteration: 96590
loss: 0.9898000955581665,grad_norm: 0.9999990978476806, iteration: 96591
loss: 0.9789310693740845,grad_norm: 0.9659432709633697, iteration: 96592
loss: 0.9895097613334656,grad_norm: 0.9999991470988238, iteration: 96593
loss: 0.9592249393463135,grad_norm: 0.9999990042737763, iteration: 96594
loss: 1.0199246406555176,grad_norm: 0.9684918928127708, iteration: 96595
loss: 1.0069608688354492,grad_norm: 0.9455729382448664, iteration: 96596
loss: 1.0173442363739014,grad_norm: 0.999999094445529, iteration: 96597
loss: 1.0080747604370117,grad_norm: 0.9999990990747425, iteration: 96598
loss: 0.9951472282409668,grad_norm: 0.9999992845266552, iteration: 96599
loss: 0.9970876574516296,grad_norm: 0.9513694242245749, iteration: 96600
loss: 0.9928717017173767,grad_norm: 0.9999992046590178, iteration: 96601
loss: 1.02766752243042,grad_norm: 0.9999993465898406, iteration: 96602
loss: 0.9851140379905701,grad_norm: 0.9010273267787308, iteration: 96603
loss: 1.0006521940231323,grad_norm: 0.9999992133131639, iteration: 96604
loss: 0.9912317991256714,grad_norm: 0.9999989610376232, iteration: 96605
loss: 1.0162488222122192,grad_norm: 0.9150660277563838, iteration: 96606
loss: 1.037471890449524,grad_norm: 0.9999999228285684, iteration: 96607
loss: 0.9781538248062134,grad_norm: 0.8808423579289482, iteration: 96608
loss: 0.9794849753379822,grad_norm: 0.9471013517716619, iteration: 96609
loss: 1.0195494890213013,grad_norm: 0.9999989997242776, iteration: 96610
loss: 0.9991089701652527,grad_norm: 0.9999989950838927, iteration: 96611
loss: 0.9658244848251343,grad_norm: 0.9999990530211258, iteration: 96612
loss: 1.0259068012237549,grad_norm: 0.9999991756700702, iteration: 96613
loss: 1.005332112312317,grad_norm: 0.999999120691114, iteration: 96614
loss: 1.0091302394866943,grad_norm: 0.9999990143651056, iteration: 96615
loss: 1.0437204837799072,grad_norm: 0.9999992547929105, iteration: 96616
loss: 0.985325813293457,grad_norm: 0.9999994283568264, iteration: 96617
loss: 0.9988520741462708,grad_norm: 0.9999991722086594, iteration: 96618
loss: 1.0184606313705444,grad_norm: 0.9999990700899226, iteration: 96619
loss: 1.0091837644577026,grad_norm: 0.999999280362448, iteration: 96620
loss: 1.0041773319244385,grad_norm: 0.9590562316558447, iteration: 96621
loss: 1.002658724784851,grad_norm: 0.9999990855761668, iteration: 96622
loss: 1.0381157398223877,grad_norm: 0.950196097026116, iteration: 96623
loss: 1.0061434507369995,grad_norm: 0.9882951852393852, iteration: 96624
loss: 0.9342027306556702,grad_norm: 0.9404255471777325, iteration: 96625
loss: 0.9955810308456421,grad_norm: 0.9691797706116702, iteration: 96626
loss: 1.0449433326721191,grad_norm: 0.9999992658162377, iteration: 96627
loss: 0.9995096921920776,grad_norm: 0.999999114930212, iteration: 96628
loss: 0.997505247592926,grad_norm: 0.9999991891211776, iteration: 96629
loss: 0.9845775961875916,grad_norm: 0.8815419087856011, iteration: 96630
loss: 0.9705846905708313,grad_norm: 0.9648239323529795, iteration: 96631
loss: 0.9866605997085571,grad_norm: 0.9286518199723296, iteration: 96632
loss: 1.036281704902649,grad_norm: 0.9104348040643172, iteration: 96633
loss: 0.979349672794342,grad_norm: 0.9883466408450327, iteration: 96634
loss: 1.0054281949996948,grad_norm: 0.8715989574431565, iteration: 96635
loss: 1.01943838596344,grad_norm: 0.9999991158078091, iteration: 96636
loss: 1.0027151107788086,grad_norm: 0.9999991114693262, iteration: 96637
loss: 1.0135287046432495,grad_norm: 0.9818541738164186, iteration: 96638
loss: 0.9998340606689453,grad_norm: 0.9401642577500213, iteration: 96639
loss: 1.0140187740325928,grad_norm: 0.9999990402291925, iteration: 96640
loss: 1.0211652517318726,grad_norm: 0.9999994874450846, iteration: 96641
loss: 0.9841657280921936,grad_norm: 0.8631203996793272, iteration: 96642
loss: 1.0285353660583496,grad_norm: 0.9999992625836045, iteration: 96643
loss: 1.0260450839996338,grad_norm: 0.9999994385125422, iteration: 96644
loss: 0.9817043542861938,grad_norm: 0.9999991955222605, iteration: 96645
loss: 0.9790549278259277,grad_norm: 0.9999992854708895, iteration: 96646
loss: 0.9596958756446838,grad_norm: 0.9506490817764651, iteration: 96647
loss: 0.9823095202445984,grad_norm: 0.8045509858878028, iteration: 96648
loss: 1.0254452228546143,grad_norm: 0.999999218883417, iteration: 96649
loss: 0.9712205529212952,grad_norm: 0.9999991989957847, iteration: 96650
loss: 1.0035237073898315,grad_norm: 0.9999989851885063, iteration: 96651
loss: 0.9720867872238159,grad_norm: 0.9921103970389857, iteration: 96652
loss: 1.0081876516342163,grad_norm: 0.999999080578937, iteration: 96653
loss: 0.9972020387649536,grad_norm: 0.999999051132597, iteration: 96654
loss: 1.0265458822250366,grad_norm: 0.9999991472265161, iteration: 96655
loss: 1.0219674110412598,grad_norm: 0.9999994452775842, iteration: 96656
loss: 0.9834983348846436,grad_norm: 0.9999991181306584, iteration: 96657
loss: 1.0420514345169067,grad_norm: 0.9999991555595309, iteration: 96658
loss: 0.9992182850837708,grad_norm: 0.8955750302522674, iteration: 96659
loss: 1.0123742818832397,grad_norm: 0.9999990573745916, iteration: 96660
loss: 1.0545141696929932,grad_norm: 0.9999991547026708, iteration: 96661
loss: 0.9677460789680481,grad_norm: 0.9999992104271633, iteration: 96662
loss: 1.0355167388916016,grad_norm: 0.9999998102084098, iteration: 96663
loss: 1.005196452140808,grad_norm: 0.9999990756960828, iteration: 96664
loss: 0.9984569549560547,grad_norm: 0.9999991732249169, iteration: 96665
loss: 1.0033491849899292,grad_norm: 0.9292847679185519, iteration: 96666
loss: 1.0230599641799927,grad_norm: 0.9225303858225865, iteration: 96667
loss: 1.0076338052749634,grad_norm: 0.9806653161745102, iteration: 96668
loss: 1.0179779529571533,grad_norm: 0.9999990416318576, iteration: 96669
loss: 1.0088038444519043,grad_norm: 0.9999992283560184, iteration: 96670
loss: 0.9958813190460205,grad_norm: 0.835327070809423, iteration: 96671
loss: 1.022647738456726,grad_norm: 0.9999990125238156, iteration: 96672
loss: 1.0181736946105957,grad_norm: 0.9999990254564539, iteration: 96673
loss: 1.0013867616653442,grad_norm: 0.9711194504377991, iteration: 96674
loss: 1.02552330493927,grad_norm: 0.9859333463540729, iteration: 96675
loss: 1.000334620475769,grad_norm: 0.9999992926718396, iteration: 96676
loss: 0.9895676374435425,grad_norm: 0.9999991426145842, iteration: 96677
loss: 1.0183578729629517,grad_norm: 0.9999995815765107, iteration: 96678
loss: 0.9675107598304749,grad_norm: 0.9999990486134237, iteration: 96679
loss: 1.011922001838684,grad_norm: 0.9999991638468583, iteration: 96680
loss: 0.9712183475494385,grad_norm: 0.9999992876975248, iteration: 96681
loss: 0.9901724457740784,grad_norm: 0.9999996676234253, iteration: 96682
loss: 1.003947138786316,grad_norm: 0.9999992480012011, iteration: 96683
loss: 0.9962887167930603,grad_norm: 0.9999992159529892, iteration: 96684
loss: 1.0156701803207397,grad_norm: 0.99251208710895, iteration: 96685
loss: 1.0260347127914429,grad_norm: 0.9405620289742116, iteration: 96686
loss: 0.9576331377029419,grad_norm: 0.9646076865784533, iteration: 96687
loss: 1.012264370918274,grad_norm: 0.9786743441076042, iteration: 96688
loss: 1.0141793489456177,grad_norm: 0.9772557142291185, iteration: 96689
loss: 0.9844141602516174,grad_norm: 0.9999990842769314, iteration: 96690
loss: 1.004970908164978,grad_norm: 0.9999989715760101, iteration: 96691
loss: 1.0046815872192383,grad_norm: 0.9999991645827365, iteration: 96692
loss: 1.0227843523025513,grad_norm: 0.9999990219480577, iteration: 96693
loss: 0.9676083326339722,grad_norm: 0.9999990999837116, iteration: 96694
loss: 1.0103745460510254,grad_norm: 0.9999990885824782, iteration: 96695
loss: 1.0067468881607056,grad_norm: 0.9999991529902532, iteration: 96696
loss: 0.9721699357032776,grad_norm: 0.9999991022791471, iteration: 96697
loss: 1.009094476699829,grad_norm: 0.9925701395009262, iteration: 96698
loss: 1.0042170286178589,grad_norm: 0.9999990236187105, iteration: 96699
loss: 1.0225746631622314,grad_norm: 0.919924255763145, iteration: 96700
loss: 1.0122870206832886,grad_norm: 0.9999992980882275, iteration: 96701
loss: 0.9811506867408752,grad_norm: 0.9999990216316518, iteration: 96702
loss: 1.0339982509613037,grad_norm: 0.9999990747718502, iteration: 96703
loss: 0.9932481050491333,grad_norm: 0.9999991332660053, iteration: 96704
loss: 1.0251307487487793,grad_norm: 0.9999989584751081, iteration: 96705
loss: 1.0102887153625488,grad_norm: 0.8612322216025247, iteration: 96706
loss: 0.975773811340332,grad_norm: 0.9999991879019349, iteration: 96707
loss: 1.0034905672073364,grad_norm: 0.9999991451628338, iteration: 96708
loss: 1.0006941556930542,grad_norm: 0.9999992628961285, iteration: 96709
loss: 0.9715375900268555,grad_norm: 0.8873370174387997, iteration: 96710
loss: 0.9997643828392029,grad_norm: 0.9963450917169986, iteration: 96711
loss: 0.9566662907600403,grad_norm: 0.9999990675380642, iteration: 96712
loss: 1.0034675598144531,grad_norm: 0.9999991041521227, iteration: 96713
loss: 1.0318615436553955,grad_norm: 0.9999990294122977, iteration: 96714
loss: 1.0064641237258911,grad_norm: 0.9999990603867851, iteration: 96715
loss: 1.0146766901016235,grad_norm: 0.9999996875323215, iteration: 96716
loss: 1.0013365745544434,grad_norm: 0.9008803124055728, iteration: 96717
loss: 0.9374139904975891,grad_norm: 0.9863281364063784, iteration: 96718
loss: 0.9730434417724609,grad_norm: 0.9999991701041969, iteration: 96719
loss: 1.0144177675247192,grad_norm: 0.9999990117186514, iteration: 96720
loss: 0.9981361031532288,grad_norm: 0.9331633312560978, iteration: 96721
loss: 0.9981562495231628,grad_norm: 0.9973162050088367, iteration: 96722
loss: 1.0210953950881958,grad_norm: 0.9999991514069626, iteration: 96723
loss: 0.9760968089103699,grad_norm: 0.9999992290841373, iteration: 96724
loss: 1.0083363056182861,grad_norm: 0.8482403267245693, iteration: 96725
loss: 1.0139621496200562,grad_norm: 0.9999991077097246, iteration: 96726
loss: 0.9905725121498108,grad_norm: 0.7999772925355423, iteration: 96727
loss: 0.9583033323287964,grad_norm: 0.9999991893352757, iteration: 96728
loss: 1.0259813070297241,grad_norm: 0.9999990028890292, iteration: 96729
loss: 1.0488940477371216,grad_norm: 0.9762860307087704, iteration: 96730
loss: 0.9852417707443237,grad_norm: 0.9628128503009283, iteration: 96731
loss: 0.9746087193489075,grad_norm: 0.9999989801779664, iteration: 96732
loss: 0.9750656485557556,grad_norm: 0.9999993668451761, iteration: 96733
loss: 0.9952409267425537,grad_norm: 0.953739571275287, iteration: 96734
loss: 1.084221363067627,grad_norm: 0.9999998791831984, iteration: 96735
loss: 1.000853419303894,grad_norm: 0.9999991967451818, iteration: 96736
loss: 0.9937061071395874,grad_norm: 0.9649726589090759, iteration: 96737
loss: 0.996826708316803,grad_norm: 0.920857815119505, iteration: 96738
loss: 1.0168414115905762,grad_norm: 0.9999992941475797, iteration: 96739
loss: 0.9955249428749084,grad_norm: 0.9999989798684774, iteration: 96740
loss: 1.028403401374817,grad_norm: 0.9999992476619264, iteration: 96741
loss: 0.9895660877227783,grad_norm: 0.899516261715008, iteration: 96742
loss: 0.9704228639602661,grad_norm: 0.9630216215072508, iteration: 96743
loss: 1.0277100801467896,grad_norm: 0.9176147353662862, iteration: 96744
loss: 0.9817354083061218,grad_norm: 0.9756840343117824, iteration: 96745
loss: 0.9891077280044556,grad_norm: 0.9649111882640045, iteration: 96746
loss: 1.0151981115341187,grad_norm: 0.9999990346151768, iteration: 96747
loss: 0.9746298789978027,grad_norm: 0.9999991996009746, iteration: 96748
loss: 0.9523818492889404,grad_norm: 0.9999993642951618, iteration: 96749
loss: 0.9887537360191345,grad_norm: 0.9999991175173526, iteration: 96750
loss: 1.0407512187957764,grad_norm: 0.9673615454008657, iteration: 96751
loss: 1.0105687379837036,grad_norm: 0.9885160403404337, iteration: 96752
loss: 0.9979763627052307,grad_norm: 0.9999992258872261, iteration: 96753
loss: 0.9543718695640564,grad_norm: 0.9999991257630791, iteration: 96754
loss: 0.9472590684890747,grad_norm: 0.9999991126582084, iteration: 96755
loss: 0.9838749170303345,grad_norm: 0.9999990816281841, iteration: 96756
loss: 0.9977569580078125,grad_norm: 0.9144296048230456, iteration: 96757
loss: 1.0156840085983276,grad_norm: 0.9999993500498552, iteration: 96758
loss: 0.9791615605354309,grad_norm: 0.9999991524199447, iteration: 96759
loss: 1.0219188928604126,grad_norm: 0.9999991989092339, iteration: 96760
loss: 0.9891263842582703,grad_norm: 0.9382793433754355, iteration: 96761
loss: 0.9969958066940308,grad_norm: 0.9999989861368708, iteration: 96762
loss: 0.9759156703948975,grad_norm: 0.9081412086080648, iteration: 96763
loss: 1.0631011724472046,grad_norm: 0.9999996735665625, iteration: 96764
loss: 1.0006282329559326,grad_norm: 0.9782549161967935, iteration: 96765
loss: 1.0350126028060913,grad_norm: 0.9987732785442976, iteration: 96766
loss: 0.9739055633544922,grad_norm: 0.9999991735892231, iteration: 96767
loss: 1.0227758884429932,grad_norm: 0.9999992244521012, iteration: 96768
loss: 1.0074368715286255,grad_norm: 0.9999997448241158, iteration: 96769
loss: 0.9914615154266357,grad_norm: 0.9999991603588775, iteration: 96770
loss: 0.96943598985672,grad_norm: 0.9999992004443563, iteration: 96771
loss: 1.0045019388198853,grad_norm: 0.9999992057708992, iteration: 96772
loss: 0.9912136793136597,grad_norm: 0.9999990985539498, iteration: 96773
loss: 0.9968242645263672,grad_norm: 0.9999989722741619, iteration: 96774
loss: 0.9879379868507385,grad_norm: 0.9999991158348135, iteration: 96775
loss: 1.0174733400344849,grad_norm: 0.9835538881649827, iteration: 96776
loss: 0.9779406785964966,grad_norm: 0.9707166552214713, iteration: 96777
loss: 0.9822498559951782,grad_norm: 0.9999991330320597, iteration: 96778
loss: 0.9866194725036621,grad_norm: 0.9582883731557938, iteration: 96779
loss: 0.9863336682319641,grad_norm: 0.9326346189136344, iteration: 96780
loss: 0.9867626428604126,grad_norm: 0.9999989988268132, iteration: 96781
loss: 0.9557082653045654,grad_norm: 0.9969439707170414, iteration: 96782
loss: 0.9713394641876221,grad_norm: 0.9999992594323958, iteration: 96783
loss: 1.0185233354568481,grad_norm: 0.9999991360219243, iteration: 96784
loss: 0.9935935735702515,grad_norm: 0.9999991722854361, iteration: 96785
loss: 1.0107877254486084,grad_norm: 0.9999990413538153, iteration: 96786
loss: 0.9740111827850342,grad_norm: 0.9475636446732493, iteration: 96787
loss: 1.0212925672531128,grad_norm: 0.9838248390045475, iteration: 96788
loss: 0.996613085269928,grad_norm: 0.9999989191984501, iteration: 96789
loss: 1.0157629251480103,grad_norm: 0.9564829227291735, iteration: 96790
loss: 0.9901023507118225,grad_norm: 0.9999990441527128, iteration: 96791
loss: 1.001117467880249,grad_norm: 0.9575144027166966, iteration: 96792
loss: 0.9432567358016968,grad_norm: 0.9901723799143825, iteration: 96793
loss: 0.9899520874023438,grad_norm: 0.9999992654858961, iteration: 96794
loss: 1.007820963859558,grad_norm: 0.8740735939736022, iteration: 96795
loss: 0.9872720837593079,grad_norm: 0.9999990191487838, iteration: 96796
loss: 0.9769364595413208,grad_norm: 0.9999990549464988, iteration: 96797
loss: 1.0780508518218994,grad_norm: 0.9999992762028541, iteration: 96798
loss: 0.9809690713882446,grad_norm: 0.9588933824504887, iteration: 96799
loss: 0.9828358292579651,grad_norm: 0.9999991305856001, iteration: 96800
loss: 1.038608193397522,grad_norm: 0.999999253761307, iteration: 96801
loss: 1.0000871419906616,grad_norm: 0.9999996186067721, iteration: 96802
loss: 0.9924070835113525,grad_norm: 0.9530973510639833, iteration: 96803
loss: 0.9758957624435425,grad_norm: 0.887654009798206, iteration: 96804
loss: 1.0031718015670776,grad_norm: 0.965810900513353, iteration: 96805
loss: 0.9993404150009155,grad_norm: 0.9999993187521993, iteration: 96806
loss: 0.9724374413490295,grad_norm: 0.999800463617988, iteration: 96807
loss: 1.0472103357315063,grad_norm: 0.9999999209378304, iteration: 96808
loss: 1.0173836946487427,grad_norm: 0.9724497072323687, iteration: 96809
loss: 0.9836329221725464,grad_norm: 0.9999991086265605, iteration: 96810
loss: 0.9771532416343689,grad_norm: 0.9999991704484729, iteration: 96811
loss: 1.0213184356689453,grad_norm: 0.9999990634094437, iteration: 96812
loss: 1.0086466073989868,grad_norm: 0.8614987670464666, iteration: 96813
loss: 1.0010231733322144,grad_norm: 0.9651201828653864, iteration: 96814
loss: 1.0109282732009888,grad_norm: 0.9999992655243143, iteration: 96815
loss: 1.0115983486175537,grad_norm: 0.9999990975377483, iteration: 96816
loss: 0.989771842956543,grad_norm: 0.9999991990745227, iteration: 96817
loss: 1.0196093320846558,grad_norm: 0.8839425733707521, iteration: 96818
loss: 1.0361965894699097,grad_norm: 0.9999990894160481, iteration: 96819
loss: 1.0085004568099976,grad_norm: 0.9999990935128353, iteration: 96820
loss: 0.9907813668251038,grad_norm: 0.9999990797429973, iteration: 96821
loss: 0.9986732006072998,grad_norm: 0.9999991751010879, iteration: 96822
loss: 0.985499918460846,grad_norm: 0.9488466017951916, iteration: 96823
loss: 0.9991539716720581,grad_norm: 0.963511723387741, iteration: 96824
loss: 0.9937546253204346,grad_norm: 0.9999992358415182, iteration: 96825
loss: 0.9979397058486938,grad_norm: 0.9999990133707908, iteration: 96826
loss: 1.0241992473602295,grad_norm: 0.9999991038701217, iteration: 96827
loss: 1.0085831880569458,grad_norm: 0.9920425142693237, iteration: 96828
loss: 0.9959329962730408,grad_norm: 0.9999990104344663, iteration: 96829
loss: 1.0486868619918823,grad_norm: 0.9999990876124849, iteration: 96830
loss: 0.9873008131980896,grad_norm: 0.9298360104760602, iteration: 96831
loss: 1.0272072553634644,grad_norm: 0.9999992772903934, iteration: 96832
loss: 1.0369625091552734,grad_norm: 0.9999990855407495, iteration: 96833
loss: 1.0509122610092163,grad_norm: 0.9999991662335874, iteration: 96834
loss: 0.9504916071891785,grad_norm: 0.9848371181501104, iteration: 96835
loss: 1.0150457620620728,grad_norm: 0.9999991412944281, iteration: 96836
loss: 1.018139123916626,grad_norm: 0.9999992870147041, iteration: 96837
loss: 0.9679248332977295,grad_norm: 0.9999991908020183, iteration: 96838
loss: 1.0058397054672241,grad_norm: 0.9999992041527788, iteration: 96839
loss: 0.9768730998039246,grad_norm: 0.9999991394528881, iteration: 96840
loss: 0.9916728138923645,grad_norm: 0.9999991315901408, iteration: 96841
loss: 1.0068304538726807,grad_norm: 0.9999991701131901, iteration: 96842
loss: 0.9854678511619568,grad_norm: 0.9656823834958994, iteration: 96843
loss: 0.9919589161872864,grad_norm: 0.9999990191847874, iteration: 96844
loss: 1.0294384956359863,grad_norm: 0.9999992869245868, iteration: 96845
loss: 1.0370582342147827,grad_norm: 0.9999992778392113, iteration: 96846
loss: 0.9994655847549438,grad_norm: 0.9999991673819639, iteration: 96847
loss: 1.0256924629211426,grad_norm: 0.999999273386965, iteration: 96848
loss: 1.0151728391647339,grad_norm: 0.9999992483467932, iteration: 96849
loss: 0.9778557419776917,grad_norm: 0.9999991811251553, iteration: 96850
loss: 0.9501883387565613,grad_norm: 0.9999991959335965, iteration: 96851
loss: 1.0056837797164917,grad_norm: 0.8317243779242637, iteration: 96852
loss: 0.9628623723983765,grad_norm: 0.9999990871597891, iteration: 96853
loss: 0.9996597766876221,grad_norm: 0.9999990571738329, iteration: 96854
loss: 0.9923304319381714,grad_norm: 0.9999992892787313, iteration: 96855
loss: 1.051196813583374,grad_norm: 0.9162899811164246, iteration: 96856
loss: 1.012959599494934,grad_norm: 0.9999990480101328, iteration: 96857
loss: 1.0323609113693237,grad_norm: 0.9999999932923355, iteration: 96858
loss: 1.0178241729736328,grad_norm: 0.9999991473228215, iteration: 96859
loss: 1.006418228149414,grad_norm: 0.9999991345775959, iteration: 96860
loss: 0.9992482662200928,grad_norm: 0.9999990558330398, iteration: 96861
loss: 0.9682766199111938,grad_norm: 0.9999990899967858, iteration: 96862
loss: 0.94503253698349,grad_norm: 0.8738781843068301, iteration: 96863
loss: 1.0155715942382812,grad_norm: 0.8970425183898381, iteration: 96864
loss: 1.027754306793213,grad_norm: 0.9999992576328843, iteration: 96865
loss: 1.0347565412521362,grad_norm: 0.9999995964966558, iteration: 96866
loss: 1.0284829139709473,grad_norm: 0.999999322942396, iteration: 96867
loss: 1.00398588180542,grad_norm: 0.9999990826413521, iteration: 96868
loss: 1.043391227722168,grad_norm: 0.9999995909411439, iteration: 96869
loss: 1.0077792406082153,grad_norm: 0.9999991986401262, iteration: 96870
loss: 0.994299054145813,grad_norm: 0.9999991548239061, iteration: 96871
loss: 0.9839184284210205,grad_norm: 0.9999991277391448, iteration: 96872
loss: 1.0069738626480103,grad_norm: 0.914089147711215, iteration: 96873
loss: 1.0006177425384521,grad_norm: 0.9930036789241554, iteration: 96874
loss: 1.007602334022522,grad_norm: 0.9999990054238491, iteration: 96875
loss: 1.021748423576355,grad_norm: 0.9999992653318605, iteration: 96876
loss: 0.986748456954956,grad_norm: 0.979759457932238, iteration: 96877
loss: 0.9865745306015015,grad_norm: 0.9999992696257457, iteration: 96878
loss: 1.006992220878601,grad_norm: 0.9999991595042436, iteration: 96879
loss: 1.0015084743499756,grad_norm: 0.9999993895065831, iteration: 96880
loss: 1.0561012029647827,grad_norm: 0.9999993459462378, iteration: 96881
loss: 0.9986890554428101,grad_norm: 0.9999992315456696, iteration: 96882
loss: 0.9732688069343567,grad_norm: 0.9999989903169126, iteration: 96883
loss: 0.9851282238960266,grad_norm: 0.9856924632780905, iteration: 96884
loss: 0.9825598001480103,grad_norm: 0.9999992447420041, iteration: 96885
loss: 1.0338594913482666,grad_norm: 0.9999993097052502, iteration: 96886
loss: 1.004577398300171,grad_norm: 0.999999043353612, iteration: 96887
loss: 0.9812451601028442,grad_norm: 0.9999991395495637, iteration: 96888
loss: 1.011689305305481,grad_norm: 0.9999990787624303, iteration: 96889
loss: 0.9764672517776489,grad_norm: 0.9113441330538287, iteration: 96890
loss: 1.0204414129257202,grad_norm: 0.8651528195642584, iteration: 96891
loss: 1.0045374631881714,grad_norm: 0.9479988009300467, iteration: 96892
loss: 1.0559138059616089,grad_norm: 0.9999995422508415, iteration: 96893
loss: 1.0159916877746582,grad_norm: 0.9999990110876704, iteration: 96894
loss: 0.9763968586921692,grad_norm: 0.9259493291415503, iteration: 96895
loss: 1.0157781839370728,grad_norm: 0.8124397842585207, iteration: 96896
loss: 0.9849122762680054,grad_norm: 0.9999990665020554, iteration: 96897
loss: 1.0029784440994263,grad_norm: 0.9680194428791352, iteration: 96898
loss: 0.9874091744422913,grad_norm: 0.9999991474323083, iteration: 96899
loss: 0.9747505784034729,grad_norm: 0.9999991479597838, iteration: 96900
loss: 0.9864802956581116,grad_norm: 0.9703812023408853, iteration: 96901
loss: 0.9668950438499451,grad_norm: 0.9999991040315619, iteration: 96902
loss: 1.0137741565704346,grad_norm: 0.999999157572072, iteration: 96903
loss: 0.9814252853393555,grad_norm: 0.9999991008961671, iteration: 96904
loss: 0.959644615650177,grad_norm: 0.8951081782162085, iteration: 96905
loss: 0.9916757941246033,grad_norm: 0.9999990978419129, iteration: 96906
loss: 0.9675398468971252,grad_norm: 0.9999991545632868, iteration: 96907
loss: 0.9832745790481567,grad_norm: 0.9999991036888134, iteration: 96908
loss: 0.9782652258872986,grad_norm: 0.889784962601597, iteration: 96909
loss: 1.1058030128479004,grad_norm: 0.9999994581175389, iteration: 96910
loss: 0.989435076713562,grad_norm: 0.9999992587002529, iteration: 96911
loss: 1.0064661502838135,grad_norm: 0.999999085852877, iteration: 96912
loss: 1.012047290802002,grad_norm: 0.9999991637799288, iteration: 96913
loss: 0.9934292435646057,grad_norm: 0.9999990548890328, iteration: 96914
loss: 0.9595677256584167,grad_norm: 0.9999990649738131, iteration: 96915
loss: 0.9847817420959473,grad_norm: 0.9999991417800951, iteration: 96916
loss: 0.9951386451721191,grad_norm: 0.9549937314475011, iteration: 96917
loss: 1.0036287307739258,grad_norm: 0.9999990653649726, iteration: 96918
loss: 0.9808785319328308,grad_norm: 0.9999990103114442, iteration: 96919
loss: 0.9967729449272156,grad_norm: 0.9999991332155421, iteration: 96920
loss: 0.999487578868866,grad_norm: 0.9999992797871128, iteration: 96921
loss: 1.0097249746322632,grad_norm: 0.999999032512154, iteration: 96922
loss: 1.014407753944397,grad_norm: 0.9863122154770341, iteration: 96923
loss: 1.0262378454208374,grad_norm: 0.9999990902574943, iteration: 96924
loss: 0.9465854167938232,grad_norm: 0.9999991065497309, iteration: 96925
loss: 1.040368676185608,grad_norm: 0.9999993821533019, iteration: 96926
loss: 0.9641695618629456,grad_norm: 0.9812782966987447, iteration: 96927
loss: 1.0165660381317139,grad_norm: 0.9999991253081242, iteration: 96928
loss: 0.9605206847190857,grad_norm: 0.9983548957893755, iteration: 96929
loss: 1.0313671827316284,grad_norm: 0.8868430051198916, iteration: 96930
loss: 1.0006948709487915,grad_norm: 0.9999991735048667, iteration: 96931
loss: 1.0341299772262573,grad_norm: 0.9999990767790874, iteration: 96932
loss: 1.0059478282928467,grad_norm: 0.9390404337958671, iteration: 96933
loss: 0.9746847152709961,grad_norm: 0.9999991108370826, iteration: 96934
loss: 1.0345327854156494,grad_norm: 0.9999991624252427, iteration: 96935
loss: 0.9890255331993103,grad_norm: 0.999999068993857, iteration: 96936
loss: 1.0126020908355713,grad_norm: 0.9999992116811609, iteration: 96937
loss: 1.0068906545639038,grad_norm: 0.9999990667975627, iteration: 96938
loss: 1.0274195671081543,grad_norm: 0.9999991398004229, iteration: 96939
loss: 1.0215896368026733,grad_norm: 0.9297524735714989, iteration: 96940
loss: 0.9540280103683472,grad_norm: 0.9999990636923519, iteration: 96941
loss: 1.0168299674987793,grad_norm: 0.9999990413176533, iteration: 96942
loss: 1.0164484977722168,grad_norm: 0.8743646365133079, iteration: 96943
loss: 0.9940517544746399,grad_norm: 0.9999995574091267, iteration: 96944
loss: 1.0132046937942505,grad_norm: 0.9999993060858208, iteration: 96945
loss: 1.0009455680847168,grad_norm: 0.9999990493927623, iteration: 96946
loss: 1.0239149332046509,grad_norm: 0.9143529708070183, iteration: 96947
loss: 1.0219110250473022,grad_norm: 0.9999993851211516, iteration: 96948
loss: 0.9634288549423218,grad_norm: 0.9230550754583462, iteration: 96949
loss: 1.0132490396499634,grad_norm: 0.9999991789665494, iteration: 96950
loss: 0.9870071411132812,grad_norm: 0.9999990333151167, iteration: 96951
loss: 1.0094516277313232,grad_norm: 0.9999990654880068, iteration: 96952
loss: 1.1597133874893188,grad_norm: 0.9999995159015876, iteration: 96953
loss: 1.0115318298339844,grad_norm: 0.9999991989814878, iteration: 96954
loss: 1.0058791637420654,grad_norm: 0.9787909954997271, iteration: 96955
loss: 1.057159662246704,grad_norm: 0.9999996857336544, iteration: 96956
loss: 1.0014439821243286,grad_norm: 0.9999992758180082, iteration: 96957
loss: 1.014012336730957,grad_norm: 0.9999992145126968, iteration: 96958
loss: 1.0016192197799683,grad_norm: 0.8765578071811088, iteration: 96959
loss: 1.0481147766113281,grad_norm: 0.9999989768348012, iteration: 96960
loss: 0.9271465539932251,grad_norm: 0.99999933127822, iteration: 96961
loss: 0.9716976881027222,grad_norm: 0.9235596516443296, iteration: 96962
loss: 0.9811728000640869,grad_norm: 0.9999991910054019, iteration: 96963
loss: 1.0197224617004395,grad_norm: 0.9999991415346057, iteration: 96964
loss: 1.086681842803955,grad_norm: 0.9999997794360364, iteration: 96965
loss: 1.0268667936325073,grad_norm: 0.9999989960005184, iteration: 96966
loss: 0.9954169988632202,grad_norm: 0.9999992313687778, iteration: 96967
loss: 0.9733032584190369,grad_norm: 0.9999991255924794, iteration: 96968
loss: 1.01932954788208,grad_norm: 0.9999992162744505, iteration: 96969
loss: 1.0170823335647583,grad_norm: 0.9999992085830249, iteration: 96970
loss: 0.9937240481376648,grad_norm: 0.9999990595351613, iteration: 96971
loss: 1.0012423992156982,grad_norm: 0.9248762764406226, iteration: 96972
loss: 0.9632375836372375,grad_norm: 0.999999153558963, iteration: 96973
loss: 1.0227572917938232,grad_norm: 0.9999992566282327, iteration: 96974
loss: 1.0453691482543945,grad_norm: 0.9999991855512399, iteration: 96975
loss: 1.013258457183838,grad_norm: 0.9882078602323534, iteration: 96976
loss: 1.0336567163467407,grad_norm: 0.9999992088862092, iteration: 96977
loss: 0.9894353151321411,grad_norm: 0.9698342230881387, iteration: 96978
loss: 1.035807490348816,grad_norm: 0.9999995044206262, iteration: 96979
loss: 1.0209871530532837,grad_norm: 0.9999990202521585, iteration: 96980
loss: 1.054011583328247,grad_norm: 0.9999999877877396, iteration: 96981
loss: 1.0344655513763428,grad_norm: 0.999999700242394, iteration: 96982
loss: 1.0051263570785522,grad_norm: 0.9328802131624895, iteration: 96983
loss: 0.9734303951263428,grad_norm: 0.9999991824179573, iteration: 96984
loss: 0.9775996804237366,grad_norm: 0.9999992332450645, iteration: 96985
loss: 0.9629201889038086,grad_norm: 0.9999990482186453, iteration: 96986
loss: 1.0133577585220337,grad_norm: 0.9999990521010378, iteration: 96987
loss: 1.006789207458496,grad_norm: 0.9383245916628992, iteration: 96988
loss: 1.0024396181106567,grad_norm: 0.9999990905852916, iteration: 96989
loss: 0.9916577339172363,grad_norm: 0.999999094570857, iteration: 96990
loss: 0.975047767162323,grad_norm: 0.9459614414960248, iteration: 96991
loss: 1.0001857280731201,grad_norm: 0.9999994366407916, iteration: 96992
loss: 1.0118284225463867,grad_norm: 0.9999992537992145, iteration: 96993
loss: 0.9686596989631653,grad_norm: 0.8754745850308956, iteration: 96994
loss: 1.0051332712173462,grad_norm: 0.999999118729626, iteration: 96995
loss: 1.0171198844909668,grad_norm: 0.999999107377886, iteration: 96996
loss: 1.0119414329528809,grad_norm: 0.9999991967917395, iteration: 96997
loss: 0.9802207350730896,grad_norm: 0.9861336238992697, iteration: 96998
loss: 1.108202338218689,grad_norm: 0.9999999681469093, iteration: 96999
loss: 1.1188935041427612,grad_norm: 0.9999995906798548, iteration: 97000
loss: 0.999252200126648,grad_norm: 0.957419432820124, iteration: 97001
loss: 0.9746654033660889,grad_norm: 0.9999991672465842, iteration: 97002
loss: 1.0312824249267578,grad_norm: 0.9999998085864331, iteration: 97003
loss: 1.0288411378860474,grad_norm: 0.9613677078198718, iteration: 97004
loss: 1.006287932395935,grad_norm: 0.9999994143024771, iteration: 97005
loss: 0.9603095650672913,grad_norm: 0.9960082566059794, iteration: 97006
loss: 1.0100549459457397,grad_norm: 0.999999624204342, iteration: 97007
loss: 0.9959731101989746,grad_norm: 0.936051996123519, iteration: 97008
loss: 1.0284485816955566,grad_norm: 0.9999994019653311, iteration: 97009
loss: 1.075372576713562,grad_norm: 0.9999991652288035, iteration: 97010
loss: 1.000441551208496,grad_norm: 0.9999990910351522, iteration: 97011
loss: 0.9851437211036682,grad_norm: 0.9999991071546506, iteration: 97012
loss: 1.0684376955032349,grad_norm: 0.9999991437735486, iteration: 97013
loss: 1.0259950160980225,grad_norm: 0.8140949215971173, iteration: 97014
loss: 1.0501986742019653,grad_norm: 0.9999998363884823, iteration: 97015
loss: 1.0106009244918823,grad_norm: 0.9919765435707355, iteration: 97016
loss: 1.0600553750991821,grad_norm: 0.9999991263270595, iteration: 97017
loss: 1.0069373846054077,grad_norm: 0.9999989766867385, iteration: 97018
loss: 1.040867805480957,grad_norm: 0.9999990524381922, iteration: 97019
loss: 0.9611673951148987,grad_norm: 0.9999991207323095, iteration: 97020
loss: 0.9792933464050293,grad_norm: 0.9999991235671755, iteration: 97021
loss: 1.013755440711975,grad_norm: 0.9999990806414625, iteration: 97022
loss: 0.9954716563224792,grad_norm: 0.9999991446673522, iteration: 97023
loss: 1.039613962173462,grad_norm: 0.999999046924074, iteration: 97024
loss: 1.0000970363616943,grad_norm: 0.9999991514045647, iteration: 97025
loss: 0.9908722639083862,grad_norm: 0.9999991384210942, iteration: 97026
loss: 0.9761574864387512,grad_norm: 0.9999992910175646, iteration: 97027
loss: 0.9933454990386963,grad_norm: 0.9421892527336514, iteration: 97028
loss: 0.9799067974090576,grad_norm: 0.99999903942643, iteration: 97029
loss: 1.0114706754684448,grad_norm: 0.8431129241720975, iteration: 97030
loss: 1.03634774684906,grad_norm: 0.9790396861754218, iteration: 97031
loss: 1.0372467041015625,grad_norm: 0.9999991097060442, iteration: 97032
loss: 0.9332205057144165,grad_norm: 0.9999992856933672, iteration: 97033
loss: 0.9591318964958191,grad_norm: 0.9172525769184807, iteration: 97034
loss: 1.0010956525802612,grad_norm: 0.9999990856564417, iteration: 97035
loss: 0.9655888676643372,grad_norm: 0.99999897011288, iteration: 97036
loss: 0.9573503732681274,grad_norm: 0.9408543869761594, iteration: 97037
loss: 0.9660858511924744,grad_norm: 0.8454693388073419, iteration: 97038
loss: 0.9766841530799866,grad_norm: 0.9999991026834678, iteration: 97039
loss: 1.0100328922271729,grad_norm: 0.9999992116506327, iteration: 97040
loss: 1.018045425415039,grad_norm: 0.9999991239131628, iteration: 97041
loss: 0.9852464199066162,grad_norm: 0.9879088392376831, iteration: 97042
loss: 1.018650770187378,grad_norm: 0.9999990670693759, iteration: 97043
loss: 0.9880800843238831,grad_norm: 0.9999991705407011, iteration: 97044
loss: 0.9874340891838074,grad_norm: 0.8735199720512777, iteration: 97045
loss: 0.9791486859321594,grad_norm: 0.9999990901532027, iteration: 97046
loss: 0.9914519190788269,grad_norm: 0.9999995905284048, iteration: 97047
loss: 0.9636114835739136,grad_norm: 0.999999174014436, iteration: 97048
loss: 1.000944972038269,grad_norm: 0.9999990801451636, iteration: 97049
loss: 0.9636706709861755,grad_norm: 0.9974211708769131, iteration: 97050
loss: 0.9978682398796082,grad_norm: 0.9999993615012123, iteration: 97051
loss: 1.1336696147918701,grad_norm: 0.9999994116681268, iteration: 97052
loss: 1.000150203704834,grad_norm: 0.9999993755930487, iteration: 97053
loss: 1.0036489963531494,grad_norm: 0.9123128501074194, iteration: 97054
loss: 0.9985862374305725,grad_norm: 0.9999992174668534, iteration: 97055
loss: 1.0339206457138062,grad_norm: 0.9999991078092475, iteration: 97056
loss: 0.9851346611976624,grad_norm: 0.9999989590582724, iteration: 97057
loss: 0.957830011844635,grad_norm: 0.9999991483053287, iteration: 97058
loss: 1.0078794956207275,grad_norm: 0.9999991162795557, iteration: 97059
loss: 0.9720927476882935,grad_norm: 0.8673790544582063, iteration: 97060
loss: 0.9793062210083008,grad_norm: 0.9999990848353831, iteration: 97061
loss: 1.0215363502502441,grad_norm: 0.9999991661653483, iteration: 97062
loss: 1.0412812232971191,grad_norm: 0.9999991947216572, iteration: 97063
loss: 1.0052772760391235,grad_norm: 0.9999989530973671, iteration: 97064
loss: 0.9723157286643982,grad_norm: 0.8942480267428126, iteration: 97065
loss: 0.9943720102310181,grad_norm: 0.9999991734855735, iteration: 97066
loss: 0.9811457395553589,grad_norm: 0.9999990313430791, iteration: 97067
loss: 1.003981351852417,grad_norm: 0.9999991685990105, iteration: 97068
loss: 0.9805387854576111,grad_norm: 0.9313309914462777, iteration: 97069
loss: 1.0177658796310425,grad_norm: 0.9999990312104293, iteration: 97070
loss: 1.0523350238800049,grad_norm: 1.0000000744384416, iteration: 97071
loss: 0.9905297756195068,grad_norm: 0.9951288980654691, iteration: 97072
loss: 0.9872444868087769,grad_norm: 0.9999996066348784, iteration: 97073
loss: 1.0028709173202515,grad_norm: 0.9855107258029454, iteration: 97074
loss: 0.9842404723167419,grad_norm: 0.9999993415979889, iteration: 97075
loss: 1.0157625675201416,grad_norm: 0.9999992044332521, iteration: 97076
loss: 1.011550784111023,grad_norm: 0.9999991722250624, iteration: 97077
loss: 0.9670478105545044,grad_norm: 0.9868988009755273, iteration: 97078
loss: 0.9719892740249634,grad_norm: 0.9259416223628771, iteration: 97079
loss: 0.9991353750228882,grad_norm: 0.9538595013996541, iteration: 97080
loss: 0.9663577079772949,grad_norm: 0.9999990972520446, iteration: 97081
loss: 1.0016721487045288,grad_norm: 0.9999990548191394, iteration: 97082
loss: 1.012314796447754,grad_norm: 0.8755406729812182, iteration: 97083
loss: 0.9655026197433472,grad_norm: 0.9999990022207486, iteration: 97084
loss: 0.9640153646469116,grad_norm: 0.9999991049596814, iteration: 97085
loss: 1.009863018989563,grad_norm: 0.999999289923999, iteration: 97086
loss: 1.0000782012939453,grad_norm: 0.9999992342887992, iteration: 97087
loss: 1.019122838973999,grad_norm: 0.9999991640778677, iteration: 97088
loss: 1.0317424535751343,grad_norm: 0.9913156805059496, iteration: 97089
loss: 0.9905402064323425,grad_norm: 0.9386609428499066, iteration: 97090
loss: 1.006419062614441,grad_norm: 0.9999991190069224, iteration: 97091
loss: 1.0137253999710083,grad_norm: 0.9999992025349259, iteration: 97092
loss: 0.9846282005310059,grad_norm: 0.9034995115206137, iteration: 97093
loss: 0.990334689617157,grad_norm: 0.9086486815109897, iteration: 97094
loss: 1.0066227912902832,grad_norm: 0.907132952729782, iteration: 97095
loss: 1.0540584325790405,grad_norm: 0.9710691151547378, iteration: 97096
loss: 1.080979585647583,grad_norm: 0.9999992623529111, iteration: 97097
loss: 1.0150766372680664,grad_norm: 0.9999989572620742, iteration: 97098
loss: 1.0073927640914917,grad_norm: 0.9999992518154361, iteration: 97099
loss: 0.9894142746925354,grad_norm: 0.9999992338101731, iteration: 97100
loss: 1.0504587888717651,grad_norm: 0.9999991652987157, iteration: 97101
loss: 0.9832401871681213,grad_norm: 0.9999993031267416, iteration: 97102
loss: 1.040653109550476,grad_norm: 0.9999993880231025, iteration: 97103
loss: 1.0172052383422852,grad_norm: 0.9820713906504219, iteration: 97104
loss: 0.9734964966773987,grad_norm: 0.999999002675744, iteration: 97105
loss: 0.9887696504592896,grad_norm: 0.9677070597279606, iteration: 97106
loss: 1.0024832487106323,grad_norm: 0.9999990196141897, iteration: 97107
loss: 1.0164788961410522,grad_norm: 0.8444750189767596, iteration: 97108
loss: 0.993488609790802,grad_norm: 0.9159518070888717, iteration: 97109
loss: 1.0186283588409424,grad_norm: 0.9999991364465548, iteration: 97110
loss: 0.9802202582359314,grad_norm: 0.9999990229310692, iteration: 97111
loss: 1.0410223007202148,grad_norm: 0.9999992778327929, iteration: 97112
loss: 0.9966160655021667,grad_norm: 0.9999991585947218, iteration: 97113
loss: 0.9976063966751099,grad_norm: 0.9976589222615554, iteration: 97114
loss: 1.0044734477996826,grad_norm: 0.9999989974427939, iteration: 97115
loss: 1.0002148151397705,grad_norm: 0.9310671372671715, iteration: 97116
loss: 1.0113774538040161,grad_norm: 0.9999990818125735, iteration: 97117
loss: 1.0200611352920532,grad_norm: 0.9999991081308189, iteration: 97118
loss: 1.0344274044036865,grad_norm: 0.9999992480412465, iteration: 97119
loss: 1.0282773971557617,grad_norm: 0.9999992335074924, iteration: 97120
loss: 1.0741407871246338,grad_norm: 0.9999992551175005, iteration: 97121
loss: 1.0014771223068237,grad_norm: 0.9925039532851522, iteration: 97122
loss: 0.9909257888793945,grad_norm: 0.9999998540621403, iteration: 97123
loss: 1.017586588859558,grad_norm: 0.9999998577212487, iteration: 97124
loss: 1.028338074684143,grad_norm: 0.9999992015511109, iteration: 97125
loss: 0.9739070534706116,grad_norm: 0.9999991566221864, iteration: 97126
loss: 1.0066386461257935,grad_norm: 0.9999999214785539, iteration: 97127
loss: 0.9861606359481812,grad_norm: 0.9999990922117016, iteration: 97128
loss: 1.166749119758606,grad_norm: 0.9999999467318822, iteration: 97129
loss: 0.993384599685669,grad_norm: 0.9999990917228865, iteration: 97130
loss: 0.9858470559120178,grad_norm: 0.8513727802258664, iteration: 97131
loss: 1.0134556293487549,grad_norm: 0.8750129944078558, iteration: 97132
loss: 0.9857977628707886,grad_norm: 0.9999991813357252, iteration: 97133
loss: 1.0245503187179565,grad_norm: 0.9477415164898703, iteration: 97134
loss: 1.005845546722412,grad_norm: 0.9999991039157986, iteration: 97135
loss: 1.0573335886001587,grad_norm: 0.9999996987317147, iteration: 97136
loss: 1.0133183002471924,grad_norm: 0.999999146531099, iteration: 97137
loss: 1.0146570205688477,grad_norm: 0.9999990052529352, iteration: 97138
loss: 1.023378849029541,grad_norm: 0.8912651433485889, iteration: 97139
loss: 1.0007370710372925,grad_norm: 0.9999990632223246, iteration: 97140
loss: 0.991671085357666,grad_norm: 0.9999990845952149, iteration: 97141
loss: 0.9523483514785767,grad_norm: 0.999999067786166, iteration: 97142
loss: 0.9615720510482788,grad_norm: 0.9999992165998265, iteration: 97143
loss: 0.9819832444190979,grad_norm: 0.9999991066379988, iteration: 97144
loss: 0.9814032912254333,grad_norm: 0.9680482725015579, iteration: 97145
loss: 0.9726994037628174,grad_norm: 0.999999090574817, iteration: 97146
loss: 0.9836230278015137,grad_norm: 0.9999993375116554, iteration: 97147
loss: 1.0655186176300049,grad_norm: 0.9999992437570072, iteration: 97148
loss: 1.0270068645477295,grad_norm: 0.9999990225294546, iteration: 97149
loss: 1.0555508136749268,grad_norm: 0.8976287710454748, iteration: 97150
loss: 1.0176289081573486,grad_norm: 0.9999993057256192, iteration: 97151
loss: 1.0202641487121582,grad_norm: 0.9999992014774122, iteration: 97152
loss: 0.9757595658302307,grad_norm: 0.9999990645386045, iteration: 97153
loss: 0.9558416604995728,grad_norm: 0.9744844280084082, iteration: 97154
loss: 1.0526312589645386,grad_norm: 0.9776292254151477, iteration: 97155
loss: 1.000715970993042,grad_norm: 0.9999991322434368, iteration: 97156
loss: 1.0242512226104736,grad_norm: 0.9999989978675624, iteration: 97157
loss: 1.0189788341522217,grad_norm: 0.9183842244194252, iteration: 97158
loss: 1.0443458557128906,grad_norm: 0.999999752787205, iteration: 97159
loss: 0.9845037460327148,grad_norm: 0.9999991787615217, iteration: 97160
loss: 0.9935466647148132,grad_norm: 0.8512742621403155, iteration: 97161
loss: 1.0439385175704956,grad_norm: 0.9999990996460746, iteration: 97162
loss: 1.0057690143585205,grad_norm: 0.9999991855195187, iteration: 97163
loss: 0.9998635649681091,grad_norm: 0.8734970167132303, iteration: 97164
loss: 0.9965713024139404,grad_norm: 0.8527705697600255, iteration: 97165
loss: 0.975188672542572,grad_norm: 0.9999991848738601, iteration: 97166
loss: 0.9846917986869812,grad_norm: 0.9999992095991214, iteration: 97167
loss: 0.9797256588935852,grad_norm: 0.9608572695499259, iteration: 97168
loss: 1.0326311588287354,grad_norm: 0.9999991907868304, iteration: 97169
loss: 1.0369093418121338,grad_norm: 0.9596164974768089, iteration: 97170
loss: 0.989910364151001,grad_norm: 0.9928996451164913, iteration: 97171
loss: 1.001548409461975,grad_norm: 0.9999990561067641, iteration: 97172
loss: 1.0409852266311646,grad_norm: 0.9999990957082978, iteration: 97173
loss: 1.0101674795150757,grad_norm: 0.9999992561965896, iteration: 97174
loss: 1.0145188570022583,grad_norm: 0.8545429220404474, iteration: 97175
loss: 1.016116738319397,grad_norm: 0.9999990875326293, iteration: 97176
loss: 1.0011069774627686,grad_norm: 0.8388873387136471, iteration: 97177
loss: 1.0386464595794678,grad_norm: 0.9999990992848432, iteration: 97178
loss: 0.9904821515083313,grad_norm: 0.9939229352606489, iteration: 97179
loss: 1.0411183834075928,grad_norm: 0.999999496084718, iteration: 97180
loss: 1.1005362272262573,grad_norm: 1.000000023936182, iteration: 97181
loss: 1.0256916284561157,grad_norm: 0.9999991036474172, iteration: 97182
loss: 1.0149327516555786,grad_norm: 0.999999164986255, iteration: 97183
loss: 1.002730369567871,grad_norm: 0.9999990718448986, iteration: 97184
loss: 1.0137786865234375,grad_norm: 0.9999991435813234, iteration: 97185
loss: 1.055011510848999,grad_norm: 0.9383793788626448, iteration: 97186
loss: 1.0061267614364624,grad_norm: 0.9999990755821176, iteration: 97187
loss: 0.9954325556755066,grad_norm: 0.9999990760766677, iteration: 97188
loss: 1.0041184425354004,grad_norm: 0.9999992554232154, iteration: 97189
loss: 1.0004774332046509,grad_norm: 0.9999990041485637, iteration: 97190
loss: 0.9787479639053345,grad_norm: 0.9350254911763148, iteration: 97191
loss: 1.0289653539657593,grad_norm: 0.9999991337137502, iteration: 97192
loss: 1.0087019205093384,grad_norm: 0.9216230158938311, iteration: 97193
loss: 1.0488375425338745,grad_norm: 0.9756192090002708, iteration: 97194
loss: 0.9548521041870117,grad_norm: 0.8369453252918745, iteration: 97195
loss: 0.9938392639160156,grad_norm: 0.9286985466854037, iteration: 97196
loss: 1.036943793296814,grad_norm: 0.9999989963688628, iteration: 97197
loss: 0.9613780379295349,grad_norm: 0.9911658600098107, iteration: 97198
loss: 0.9838498830795288,grad_norm: 0.9999991792259116, iteration: 97199
loss: 1.0013635158538818,grad_norm: 0.9999992330174862, iteration: 97200
loss: 0.9907383918762207,grad_norm: 0.9582965565666686, iteration: 97201
loss: 1.017074465751648,grad_norm: 0.9999991189084948, iteration: 97202
loss: 0.9943839907646179,grad_norm: 0.9999990618416059, iteration: 97203
loss: 0.9941832423210144,grad_norm: 0.816512915866426, iteration: 97204
loss: 1.0192755460739136,grad_norm: 0.9999991532447843, iteration: 97205
loss: 0.9965763688087463,grad_norm: 0.999999266471873, iteration: 97206
loss: 0.9982340931892395,grad_norm: 0.999999275607813, iteration: 97207
loss: 0.9781253337860107,grad_norm: 0.9416409444948411, iteration: 97208
loss: 0.9888670444488525,grad_norm: 0.8932687999500921, iteration: 97209
loss: 0.9706560373306274,grad_norm: 0.9999992706855285, iteration: 97210
loss: 1.0297977924346924,grad_norm: 0.9999991206194666, iteration: 97211
loss: 0.9930707216262817,grad_norm: 0.9999992470351441, iteration: 97212
loss: 0.961062490940094,grad_norm: 0.9999990642133693, iteration: 97213
loss: 1.0338364839553833,grad_norm: 0.9999993213734056, iteration: 97214
loss: 1.0117400884628296,grad_norm: 0.9999991221119391, iteration: 97215
loss: 0.9776422381401062,grad_norm: 0.9999991406089928, iteration: 97216
loss: 0.9830834269523621,grad_norm: 0.9999992086408455, iteration: 97217
loss: 0.977429211139679,grad_norm: 0.9999992343772256, iteration: 97218
loss: 1.0255409479141235,grad_norm: 0.9999991878880757, iteration: 97219
loss: 1.0121724605560303,grad_norm: 0.996685300566633, iteration: 97220
loss: 1.015021800994873,grad_norm: 0.889800520862838, iteration: 97221
loss: 1.011959433555603,grad_norm: 0.9999991123939844, iteration: 97222
loss: 0.9769078493118286,grad_norm: 0.9426649060260968, iteration: 97223
loss: 1.0129718780517578,grad_norm: 0.9999991165013888, iteration: 97224
loss: 1.0008158683776855,grad_norm: 0.9379186424961944, iteration: 97225
loss: 0.975048303604126,grad_norm: 0.9999991832214571, iteration: 97226
loss: 1.0127915143966675,grad_norm: 0.9999993227001639, iteration: 97227
loss: 0.9897596836090088,grad_norm: 0.9373988727222728, iteration: 97228
loss: 0.9888330101966858,grad_norm: 0.9999990644874303, iteration: 97229
loss: 1.0151112079620361,grad_norm: 0.8521582338594725, iteration: 97230
loss: 0.9934597611427307,grad_norm: 0.9999989253555248, iteration: 97231
loss: 0.9723982810974121,grad_norm: 0.9999990300057878, iteration: 97232
loss: 1.030211329460144,grad_norm: 0.9649319860018484, iteration: 97233
loss: 0.9874246716499329,grad_norm: 0.9275831933701676, iteration: 97234
loss: 0.9983882308006287,grad_norm: 0.9899777328978226, iteration: 97235
loss: 1.0268572568893433,grad_norm: 0.9999993182377527, iteration: 97236
loss: 0.9941394925117493,grad_norm: 0.9999991997259867, iteration: 97237
loss: 0.9856503009796143,grad_norm: 0.9791709647980936, iteration: 97238
loss: 0.9994020462036133,grad_norm: 0.9017569059668233, iteration: 97239
loss: 0.9415462613105774,grad_norm: 0.9999991706234016, iteration: 97240
loss: 1.0207170248031616,grad_norm: 0.9999991139405767, iteration: 97241
loss: 0.9625821709632874,grad_norm: 0.9999990805250748, iteration: 97242
loss: 1.042272925376892,grad_norm: 0.9999990581445342, iteration: 97243
loss: 0.9965850114822388,grad_norm: 0.9999989728574047, iteration: 97244
loss: 1.0355587005615234,grad_norm: 0.9999991920204692, iteration: 97245
loss: 1.0115281343460083,grad_norm: 0.9999992352877842, iteration: 97246
loss: 1.0183993577957153,grad_norm: 0.9999991513095977, iteration: 97247
loss: 1.0452806949615479,grad_norm: 0.9999992033216244, iteration: 97248
loss: 1.0013654232025146,grad_norm: 0.9999991560429758, iteration: 97249
loss: 0.9748202562332153,grad_norm: 0.9999992135702923, iteration: 97250
loss: 0.9983856081962585,grad_norm: 0.9999991753578155, iteration: 97251
loss: 1.006410837173462,grad_norm: 0.9999990459064279, iteration: 97252
loss: 0.9747053980827332,grad_norm: 0.9999990255727872, iteration: 97253
loss: 1.0327017307281494,grad_norm: 0.9999991517892525, iteration: 97254
loss: 1.005635380744934,grad_norm: 0.9287793090699665, iteration: 97255
loss: 0.9853739142417908,grad_norm: 0.9841992079349895, iteration: 97256
loss: 0.9955428838729858,grad_norm: 0.9999990746558968, iteration: 97257
loss: 0.9693823456764221,grad_norm: 0.9999990473412576, iteration: 97258
loss: 1.0125987529754639,grad_norm: 0.9999992274433257, iteration: 97259
loss: 1.0381349325180054,grad_norm: 0.9999993122633648, iteration: 97260
loss: 0.9850547909736633,grad_norm: 0.999999256718068, iteration: 97261
loss: 0.9911708831787109,grad_norm: 0.8894037459939318, iteration: 97262
loss: 1.013292670249939,grad_norm: 0.9999990862450865, iteration: 97263
loss: 1.0200567245483398,grad_norm: 0.9999991054036695, iteration: 97264
loss: 0.961055338382721,grad_norm: 0.9999990877997567, iteration: 97265
loss: 0.9957448244094849,grad_norm: 0.9999991683655217, iteration: 97266
loss: 0.9922550320625305,grad_norm: 0.9999990642329453, iteration: 97267
loss: 1.0045266151428223,grad_norm: 0.999999186549961, iteration: 97268
loss: 1.0250073671340942,grad_norm: 0.9194522487884403, iteration: 97269
loss: 1.0007832050323486,grad_norm: 0.9999991004226026, iteration: 97270
loss: 0.9856915473937988,grad_norm: 0.9411713260342421, iteration: 97271
loss: 1.0159265995025635,grad_norm: 0.9999992508092993, iteration: 97272
loss: 1.0243985652923584,grad_norm: 0.9904687722893796, iteration: 97273
loss: 0.9915232062339783,grad_norm: 0.9999992616864198, iteration: 97274
loss: 0.9601869583129883,grad_norm: 0.9999991689525057, iteration: 97275
loss: 0.9686729907989502,grad_norm: 0.999999122939568, iteration: 97276
loss: 0.9689102172851562,grad_norm: 0.9999990539911663, iteration: 97277
loss: 0.9788665771484375,grad_norm: 0.9999991807769686, iteration: 97278
loss: 0.9992848038673401,grad_norm: 0.9964622283793216, iteration: 97279
loss: 1.002443790435791,grad_norm: 0.97392898883194, iteration: 97280
loss: 1.0071840286254883,grad_norm: 0.9633459343580669, iteration: 97281
loss: 1.0020713806152344,grad_norm: 0.9474319191465138, iteration: 97282
loss: 1.0238136053085327,grad_norm: 0.999999247665533, iteration: 97283
loss: 1.060691237449646,grad_norm: 0.9999989910255872, iteration: 97284
loss: 1.011911392211914,grad_norm: 0.9999991446315885, iteration: 97285
loss: 1.0174654722213745,grad_norm: 0.9935041310796212, iteration: 97286
loss: 1.0045679807662964,grad_norm: 0.9999992625914197, iteration: 97287
loss: 0.9794182181358337,grad_norm: 0.975782943399211, iteration: 97288
loss: 1.0050815343856812,grad_norm: 0.9662248148322289, iteration: 97289
loss: 1.005158543586731,grad_norm: 0.9999990110645557, iteration: 97290
loss: 1.0251230001449585,grad_norm: 0.9999995435127071, iteration: 97291
loss: 0.9816579222679138,grad_norm: 0.9999991869233265, iteration: 97292
loss: 1.0078006982803345,grad_norm: 0.9999991024929957, iteration: 97293
loss: 0.9927995204925537,grad_norm: 0.9999989984028299, iteration: 97294
loss: 0.996729850769043,grad_norm: 0.9365385239804357, iteration: 97295
loss: 1.0010353326797485,grad_norm: 0.9999990552851187, iteration: 97296
loss: 1.020106315612793,grad_norm: 0.9999991327386598, iteration: 97297
loss: 1.018775224685669,grad_norm: 0.9999989673344333, iteration: 97298
loss: 1.020529866218567,grad_norm: 0.99999928686501, iteration: 97299
loss: 1.0087757110595703,grad_norm: 0.9167602574999646, iteration: 97300
loss: 1.0311391353607178,grad_norm: 0.9985917709848634, iteration: 97301
loss: 1.0119822025299072,grad_norm: 0.9229357599653286, iteration: 97302
loss: 1.0100936889648438,grad_norm: 0.8331410758098867, iteration: 97303
loss: 1.025259256362915,grad_norm: 0.9999991207670937, iteration: 97304
loss: 1.0022908449172974,grad_norm: 0.9999992135796925, iteration: 97305
loss: 1.0122863054275513,grad_norm: 0.9999990662443499, iteration: 97306
loss: 1.0000150203704834,grad_norm: 0.9999992656443298, iteration: 97307
loss: 0.9926266670227051,grad_norm: 0.9999990148429214, iteration: 97308
loss: 0.9581672549247742,grad_norm: 0.9248043122055368, iteration: 97309
loss: 0.9272575974464417,grad_norm: 0.9999991549243546, iteration: 97310
loss: 1.0022716522216797,grad_norm: 0.900537082560039, iteration: 97311
loss: 1.0281130075454712,grad_norm: 0.9999990393613827, iteration: 97312
loss: 0.9979031085968018,grad_norm: 0.9819975816937758, iteration: 97313
loss: 0.9928820729255676,grad_norm: 0.9999991287925944, iteration: 97314
loss: 0.9883149266242981,grad_norm: 0.9999993240520396, iteration: 97315
loss: 1.007806420326233,grad_norm: 0.9999990326524484, iteration: 97316
loss: 0.9984716176986694,grad_norm: 0.999999041271162, iteration: 97317
loss: 0.9826399683952332,grad_norm: 0.9251601230549679, iteration: 97318
loss: 1.0053352117538452,grad_norm: 0.999999116564634, iteration: 97319
loss: 0.9770002365112305,grad_norm: 0.964677092122105, iteration: 97320
loss: 1.0054214000701904,grad_norm: 0.9999992330909471, iteration: 97321
loss: 0.9868733882904053,grad_norm: 0.9999991513877963, iteration: 97322
loss: 1.0090203285217285,grad_norm: 0.9999993205092675, iteration: 97323
loss: 0.9665647149085999,grad_norm: 0.9999992006376972, iteration: 97324
loss: 1.0073251724243164,grad_norm: 0.9999992563688166, iteration: 97325
loss: 1.004866361618042,grad_norm: 0.8648835904440411, iteration: 97326
loss: 0.9624668955802917,grad_norm: 0.9135562763203753, iteration: 97327
loss: 0.9957674145698547,grad_norm: 0.9999990527285013, iteration: 97328
loss: 0.9727017283439636,grad_norm: 0.9999991880899144, iteration: 97329
loss: 1.033329725265503,grad_norm: 0.9999992421995254, iteration: 97330
loss: 1.0153024196624756,grad_norm: 0.9999990377851184, iteration: 97331
loss: 1.002112627029419,grad_norm: 0.9999991257114886, iteration: 97332
loss: 1.0588016510009766,grad_norm: 0.9999991715529221, iteration: 97333
loss: 0.9840453863143921,grad_norm: 0.9999991389148829, iteration: 97334
loss: 1.0326099395751953,grad_norm: 0.9999992300530677, iteration: 97335
loss: 1.0010986328125,grad_norm: 0.9999991396412942, iteration: 97336
loss: 0.9968592524528503,grad_norm: 0.9620138809184016, iteration: 97337
loss: 1.0009641647338867,grad_norm: 0.9803532101952609, iteration: 97338
loss: 0.9976394772529602,grad_norm: 0.9999998983532151, iteration: 97339
loss: 1.0079963207244873,grad_norm: 0.9999990938873957, iteration: 97340
loss: 0.9997586607933044,grad_norm: 0.9999990176792806, iteration: 97341
loss: 1.030516505241394,grad_norm: 0.9467641598241187, iteration: 97342
loss: 1.0038468837738037,grad_norm: 0.9808700608758061, iteration: 97343
loss: 0.99808669090271,grad_norm: 0.99999979873046, iteration: 97344
loss: 1.0132471323013306,grad_norm: 0.9485223184173934, iteration: 97345
loss: 0.9690728783607483,grad_norm: 0.9999992480844754, iteration: 97346
loss: 1.0664018392562866,grad_norm: 0.9999991777168123, iteration: 97347
loss: 0.9989526271820068,grad_norm: 0.946056198621974, iteration: 97348
loss: 0.9924154877662659,grad_norm: 0.8613469782688692, iteration: 97349
loss: 1.0071461200714111,grad_norm: 0.9151196265806788, iteration: 97350
loss: 1.0155482292175293,grad_norm: 0.8879099566444532, iteration: 97351
loss: 1.032909631729126,grad_norm: 0.9999992214181025, iteration: 97352
loss: 0.9940885901451111,grad_norm: 0.9999990881832589, iteration: 97353
loss: 0.9746081233024597,grad_norm: 0.9999997860935573, iteration: 97354
loss: 1.0011521577835083,grad_norm: 0.9999989950649294, iteration: 97355
loss: 1.032741904258728,grad_norm: 0.9761185718725865, iteration: 97356
loss: 0.9818459153175354,grad_norm: 0.9344851743181266, iteration: 97357
loss: 0.9767774939537048,grad_norm: 0.9999992462017387, iteration: 97358
loss: 1.0417224168777466,grad_norm: 0.999999241499009, iteration: 97359
loss: 1.020311713218689,grad_norm: 0.9999991276215682, iteration: 97360
loss: 1.0111802816390991,grad_norm: 0.9999990775085682, iteration: 97361
loss: 1.017666220664978,grad_norm: 0.9999992427612544, iteration: 97362
loss: 1.0071351528167725,grad_norm: 0.9307776063761259, iteration: 97363
loss: 0.9790628552436829,grad_norm: 0.9390722194647806, iteration: 97364
loss: 0.9838061332702637,grad_norm: 0.999999183392079, iteration: 97365
loss: 0.9932398796081543,grad_norm: 0.9382003198203313, iteration: 97366
loss: 1.011705994606018,grad_norm: 0.9999989730077374, iteration: 97367
loss: 0.9998667240142822,grad_norm: 0.9999990604782806, iteration: 97368
loss: 1.0327757596969604,grad_norm: 0.999999145488848, iteration: 97369
loss: 0.9824513792991638,grad_norm: 0.9702371410148358, iteration: 97370
loss: 0.9566847085952759,grad_norm: 0.9999991904940365, iteration: 97371
loss: 1.0221116542816162,grad_norm: 0.9999991380082175, iteration: 97372
loss: 0.9801062941551208,grad_norm: 0.9999991602713765, iteration: 97373
loss: 0.9596551656723022,grad_norm: 0.9999990549226806, iteration: 97374
loss: 1.003077745437622,grad_norm: 0.9999990504286594, iteration: 97375
loss: 1.0192536115646362,grad_norm: 0.9999991744103743, iteration: 97376
loss: 1.0080550909042358,grad_norm: 0.9999991091310813, iteration: 97377
loss: 0.9802132844924927,grad_norm: 0.9177165965605638, iteration: 97378
loss: 1.004979133605957,grad_norm: 0.9141365070677916, iteration: 97379
loss: 1.0113517045974731,grad_norm: 0.8717527706759375, iteration: 97380
loss: 0.9757771492004395,grad_norm: 0.9999992306266472, iteration: 97381
loss: 1.0079153776168823,grad_norm: 0.9999990146895703, iteration: 97382
loss: 1.0334644317626953,grad_norm: 0.947337307102354, iteration: 97383
loss: 0.9862953424453735,grad_norm: 0.8883138347121587, iteration: 97384
loss: 0.9706189036369324,grad_norm: 0.9999991659046766, iteration: 97385
loss: 0.9601457118988037,grad_norm: 0.9999991479597993, iteration: 97386
loss: 1.0042033195495605,grad_norm: 0.9999990311684852, iteration: 97387
loss: 1.0241446495056152,grad_norm: 0.9965472313351407, iteration: 97388
loss: 1.0227398872375488,grad_norm: 0.9957009219190608, iteration: 97389
loss: 1.0015833377838135,grad_norm: 0.9262963646716321, iteration: 97390
loss: 1.0594956874847412,grad_norm: 0.999999657806796, iteration: 97391
loss: 1.0219109058380127,grad_norm: 0.9999991192790136, iteration: 97392
loss: 0.9654025435447693,grad_norm: 0.9999990718595183, iteration: 97393
loss: 1.003199815750122,grad_norm: 0.9999990219362, iteration: 97394
loss: 1.0029829740524292,grad_norm: 0.9999991007995004, iteration: 97395
loss: 1.013845682144165,grad_norm: 0.9999991098808952, iteration: 97396
loss: 1.0294760465621948,grad_norm: 0.9347850371292759, iteration: 97397
loss: 1.0302109718322754,grad_norm: 0.9999995892696936, iteration: 97398
loss: 1.002399206161499,grad_norm: 0.9999992596975282, iteration: 97399
loss: 1.0153532028198242,grad_norm: 0.9567556622678083, iteration: 97400
loss: 1.0006474256515503,grad_norm: 0.9999990443302043, iteration: 97401
loss: 1.0028189420700073,grad_norm: 0.9633307596955679, iteration: 97402
loss: 0.9972426295280457,grad_norm: 0.9999990697809302, iteration: 97403
loss: 0.974768877029419,grad_norm: 0.9999993386253506, iteration: 97404
loss: 1.0087900161743164,grad_norm: 0.9999990127940718, iteration: 97405
loss: 1.00841224193573,grad_norm: 0.9991829417600959, iteration: 97406
loss: 0.9911678433418274,grad_norm: 0.9999990658872979, iteration: 97407
loss: 1.0095043182373047,grad_norm: 0.9999989816039554, iteration: 97408
loss: 0.9942419528961182,grad_norm: 0.9093656043521461, iteration: 97409
loss: 1.0233310461044312,grad_norm: 0.9999990965022256, iteration: 97410
loss: 1.013738989830017,grad_norm: 0.9999992567835979, iteration: 97411
loss: 1.0062624216079712,grad_norm: 0.9518123941101129, iteration: 97412
loss: 1.0289616584777832,grad_norm: 0.9067310263446915, iteration: 97413
loss: 1.0279206037521362,grad_norm: 0.999999011646003, iteration: 97414
loss: 1.0149728059768677,grad_norm: 0.9999990958413452, iteration: 97415
loss: 0.9802020788192749,grad_norm: 0.9479188286376042, iteration: 97416
loss: 1.0127238035202026,grad_norm: 1.000000033056662, iteration: 97417
loss: 1.0066564083099365,grad_norm: 0.9168041891509074, iteration: 97418
loss: 1.0584641695022583,grad_norm: 0.9999992621394119, iteration: 97419
loss: 0.9825330972671509,grad_norm: 0.9999993239670442, iteration: 97420
loss: 0.9688981175422668,grad_norm: 0.9999990802139902, iteration: 97421
loss: 1.0408847332000732,grad_norm: 0.9630389848662808, iteration: 97422
loss: 0.9984362125396729,grad_norm: 0.9999990314845867, iteration: 97423
loss: 1.0264873504638672,grad_norm: 0.9999990847380594, iteration: 97424
loss: 0.9677926898002625,grad_norm: 0.9999990407159233, iteration: 97425
loss: 1.0453007221221924,grad_norm: 0.9999991664701138, iteration: 97426
loss: 1.0026752948760986,grad_norm: 0.9999990325810608, iteration: 97427
loss: 0.979342520236969,grad_norm: 0.9999990911560411, iteration: 97428
loss: 1.0040438175201416,grad_norm: 0.9999992279770723, iteration: 97429
loss: 1.0011959075927734,grad_norm: 0.795192412154122, iteration: 97430
loss: 1.0667698383331299,grad_norm: 0.9999993445672994, iteration: 97431
loss: 0.9844886660575867,grad_norm: 0.8617640457864693, iteration: 97432
loss: 1.05131196975708,grad_norm: 0.999999094337507, iteration: 97433
loss: 0.9793422222137451,grad_norm: 0.9372379894894876, iteration: 97434
loss: 0.9941903948783875,grad_norm: 0.8958734485260946, iteration: 97435
loss: 1.0288077592849731,grad_norm: 0.9999991244633177, iteration: 97436
loss: 1.0206067562103271,grad_norm: 0.9999995840898873, iteration: 97437
loss: 0.976039707660675,grad_norm: 0.9999992284862836, iteration: 97438
loss: 0.9869725108146667,grad_norm: 0.9999991088137967, iteration: 97439
loss: 0.9933393597602844,grad_norm: 0.9999991925230515, iteration: 97440
loss: 1.0311402082443237,grad_norm: 0.9999991519746654, iteration: 97441
loss: 0.9954805374145508,grad_norm: 0.9636594485941483, iteration: 97442
loss: 0.9991264343261719,grad_norm: 0.9999990258784309, iteration: 97443
loss: 0.9189378619194031,grad_norm: 0.9999991624564688, iteration: 97444
loss: 1.0124224424362183,grad_norm: 0.999999021805041, iteration: 97445
loss: 1.0992792844772339,grad_norm: 0.9999994459929289, iteration: 97446
loss: 1.0054410696029663,grad_norm: 0.9999991404949932, iteration: 97447
loss: 0.9879357218742371,grad_norm: 0.9476009559824207, iteration: 97448
loss: 0.9766411185264587,grad_norm: 0.9999990091975456, iteration: 97449
loss: 0.9969912767410278,grad_norm: 0.9999995521851412, iteration: 97450
loss: 1.0200889110565186,grad_norm: 0.999999240939215, iteration: 97451
loss: 0.9862627387046814,grad_norm: 0.999999041479079, iteration: 97452
loss: 1.0133671760559082,grad_norm: 0.9530710458713347, iteration: 97453
loss: 0.9763371348381042,grad_norm: 0.9999990528030398, iteration: 97454
loss: 0.9834882020950317,grad_norm: 0.9999993383617536, iteration: 97455
loss: 1.0661461353302002,grad_norm: 0.9999989530195518, iteration: 97456
loss: 0.9996827840805054,grad_norm: 0.9188242238638458, iteration: 97457
loss: 1.008553385734558,grad_norm: 0.9920545617042485, iteration: 97458
loss: 1.003804087638855,grad_norm: 0.9658031710871775, iteration: 97459
loss: 1.0158895254135132,grad_norm: 0.9999995472027767, iteration: 97460
loss: 1.0632132291793823,grad_norm: 0.9999992337217988, iteration: 97461
loss: 0.9990189671516418,grad_norm: 0.9870240578493724, iteration: 97462
loss: 1.0240437984466553,grad_norm: 0.9999991194848531, iteration: 97463
loss: 1.0301645994186401,grad_norm: 0.999999174273261, iteration: 97464
loss: 0.9779589176177979,grad_norm: 0.9051640865940892, iteration: 97465
loss: 0.9821659326553345,grad_norm: 0.9999990701795929, iteration: 97466
loss: 1.0177103281021118,grad_norm: 0.9385311096212555, iteration: 97467
loss: 0.9858472347259521,grad_norm: 0.9999992605880714, iteration: 97468
loss: 1.0230298042297363,grad_norm: 0.9538278469319498, iteration: 97469
loss: 0.9900908470153809,grad_norm: 0.99999897647738, iteration: 97470
loss: 0.9637379050254822,grad_norm: 0.9999991899302284, iteration: 97471
loss: 1.0047292709350586,grad_norm: 0.9999991783876419, iteration: 97472
loss: 0.9813041687011719,grad_norm: 0.9791280876495755, iteration: 97473
loss: 1.0470871925354004,grad_norm: 0.9419424260665007, iteration: 97474
loss: 0.9846662282943726,grad_norm: 0.9999990317282162, iteration: 97475
loss: 0.9822070598602295,grad_norm: 0.9278923515514448, iteration: 97476
loss: 0.9959428310394287,grad_norm: 0.9999992923583321, iteration: 97477
loss: 1.03846275806427,grad_norm: 0.9999999157871535, iteration: 97478
loss: 0.9735761880874634,grad_norm: 0.9999991194228074, iteration: 97479
loss: 0.9684280753135681,grad_norm: 0.921801270604449, iteration: 97480
loss: 1.0260615348815918,grad_norm: 0.9999993136766075, iteration: 97481
loss: 1.016979455947876,grad_norm: 0.8710889983615302, iteration: 97482
loss: 1.0120247602462769,grad_norm: 0.9999992916589023, iteration: 97483
loss: 0.9658918380737305,grad_norm: 0.9999991172839077, iteration: 97484
loss: 1.1017565727233887,grad_norm: 0.9714250452433266, iteration: 97485
loss: 1.0173593759536743,grad_norm: 0.9999991372290198, iteration: 97486
loss: 1.0139540433883667,grad_norm: 0.9999991129615771, iteration: 97487
loss: 0.9989430904388428,grad_norm: 0.9999991157181141, iteration: 97488
loss: 1.0403263568878174,grad_norm: 0.9999990989410192, iteration: 97489
loss: 1.0519731044769287,grad_norm: 0.999999842381471, iteration: 97490
loss: 1.0112135410308838,grad_norm: 0.9999990777207731, iteration: 97491
loss: 1.0996540784835815,grad_norm: 0.9999993368791034, iteration: 97492
loss: 0.9901465177536011,grad_norm: 0.9999991901654822, iteration: 97493
loss: 0.9732486009597778,grad_norm: 0.999999565361314, iteration: 97494
loss: 0.9982679486274719,grad_norm: 0.9606761437921015, iteration: 97495
loss: 0.9993219375610352,grad_norm: 0.9999991339234785, iteration: 97496
loss: 0.9580519199371338,grad_norm: 0.9806453621224882, iteration: 97497
loss: 1.0008862018585205,grad_norm: 0.9999990865109122, iteration: 97498
loss: 0.9652371406555176,grad_norm: 0.9999990871583944, iteration: 97499
loss: 0.9582467079162598,grad_norm: 0.9999989962726937, iteration: 97500
loss: 1.0115928649902344,grad_norm: 0.9771207458001765, iteration: 97501
loss: 1.0019580125808716,grad_norm: 0.9999990971259967, iteration: 97502
loss: 1.0267876386642456,grad_norm: 0.9999990056873947, iteration: 97503
loss: 0.9955425262451172,grad_norm: 0.9999991324790461, iteration: 97504
loss: 0.9898860454559326,grad_norm: 0.9411769736168869, iteration: 97505
loss: 1.0034441947937012,grad_norm: 0.999999014309931, iteration: 97506
loss: 1.0028356313705444,grad_norm: 0.9999992053061421, iteration: 97507
loss: 0.9746177196502686,grad_norm: 0.999999145400927, iteration: 97508
loss: 0.9729008078575134,grad_norm: 0.999999090998128, iteration: 97509
loss: 1.0080773830413818,grad_norm: 0.889221827663303, iteration: 97510
loss: 0.9761696457862854,grad_norm: 0.92377358755697, iteration: 97511
loss: 0.972529947757721,grad_norm: 0.8088145148536294, iteration: 97512
loss: 0.9716156125068665,grad_norm: 0.9999992187814143, iteration: 97513
loss: 1.0161346197128296,grad_norm: 0.999999246018256, iteration: 97514
loss: 0.9940595030784607,grad_norm: 0.9999996521358272, iteration: 97515
loss: 1.006373405456543,grad_norm: 0.9212804408291673, iteration: 97516
loss: 0.9974223971366882,grad_norm: 0.9905763944532062, iteration: 97517
loss: 0.9536195993423462,grad_norm: 0.9999991094159044, iteration: 97518
loss: 0.9946600198745728,grad_norm: 0.999999168847983, iteration: 97519
loss: 1.014722228050232,grad_norm: 0.9999991878189555, iteration: 97520
loss: 1.0325329303741455,grad_norm: 0.9999989855828944, iteration: 97521
loss: 0.9942182898521423,grad_norm: 0.999999225508394, iteration: 97522
loss: 1.033664584159851,grad_norm: 0.999998960602301, iteration: 97523
loss: 0.9412587881088257,grad_norm: 0.9999992110513689, iteration: 97524
loss: 1.0088716745376587,grad_norm: 0.999999393168474, iteration: 97525
loss: 0.9866171479225159,grad_norm: 0.9520612516506037, iteration: 97526
loss: 0.9706549644470215,grad_norm: 0.9999991991921444, iteration: 97527
loss: 0.9834104180335999,grad_norm: 0.9305365306123378, iteration: 97528
loss: 0.9906530976295471,grad_norm: 0.9115454666996609, iteration: 97529
loss: 1.0071258544921875,grad_norm: 0.8923200375904462, iteration: 97530
loss: 0.9950714111328125,grad_norm: 0.9999991859940044, iteration: 97531
loss: 1.0046820640563965,grad_norm: 0.9618499596453143, iteration: 97532
loss: 1.0136048793792725,grad_norm: 0.9999992346515201, iteration: 97533
loss: 1.0354183912277222,grad_norm: 0.999999116749565, iteration: 97534
loss: 1.041491150856018,grad_norm: 0.9999993399066199, iteration: 97535
loss: 0.9904665946960449,grad_norm: 0.9505452953478547, iteration: 97536
loss: 1.0413023233413696,grad_norm: 0.9999991137914649, iteration: 97537
loss: 1.002389669418335,grad_norm: 0.8289631526282321, iteration: 97538
loss: 0.9956369996070862,grad_norm: 0.9983326780688695, iteration: 97539
loss: 0.9923574328422546,grad_norm: 0.9656684407902484, iteration: 97540
loss: 1.0481475591659546,grad_norm: 0.9999990655166534, iteration: 97541
loss: 0.9911299347877502,grad_norm: 0.9999991845263677, iteration: 97542
loss: 0.9860254526138306,grad_norm: 0.9999991683671011, iteration: 97543
loss: 1.0178357362747192,grad_norm: 0.9999992348595772, iteration: 97544
loss: 1.0045777559280396,grad_norm: 0.9999990722348414, iteration: 97545
loss: 0.9949190020561218,grad_norm: 0.9421587154213782, iteration: 97546
loss: 1.005979299545288,grad_norm: 0.9999994414122272, iteration: 97547
loss: 1.0236961841583252,grad_norm: 0.9999990354880959, iteration: 97548
loss: 1.0057123899459839,grad_norm: 0.9999990548638652, iteration: 97549
loss: 0.9811620116233826,grad_norm: 0.9057902854775988, iteration: 97550
loss: 1.0081603527069092,grad_norm: 0.999998886954575, iteration: 97551
loss: 1.0025743246078491,grad_norm: 0.9999990713738661, iteration: 97552
loss: 1.0098563432693481,grad_norm: 0.9999992066490702, iteration: 97553
loss: 1.0082173347473145,grad_norm: 0.9999991419977496, iteration: 97554
loss: 1.0277193784713745,grad_norm: 0.9999990260992281, iteration: 97555
loss: 0.9933329820632935,grad_norm: 0.9999991345576845, iteration: 97556
loss: 1.0279194116592407,grad_norm: 0.9468217888354372, iteration: 97557
loss: 0.9768417477607727,grad_norm: 0.9999992462377604, iteration: 97558
loss: 0.9597172737121582,grad_norm: 0.9999991202137153, iteration: 97559
loss: 1.0259596109390259,grad_norm: 0.9999990858959228, iteration: 97560
loss: 1.014066219329834,grad_norm: 0.9999990472859202, iteration: 97561
loss: 0.9938027858734131,grad_norm: 0.9999992278909985, iteration: 97562
loss: 0.9820458292961121,grad_norm: 0.9283907359245052, iteration: 97563
loss: 0.994450032711029,grad_norm: 0.9714491512071074, iteration: 97564
loss: 1.0002479553222656,grad_norm: 0.9999989900342696, iteration: 97565
loss: 1.0080519914627075,grad_norm: 0.9999992189944372, iteration: 97566
loss: 1.033577799797058,grad_norm: 0.9999990420114564, iteration: 97567
loss: 1.014649748802185,grad_norm: 0.9907138654923141, iteration: 97568
loss: 1.0201044082641602,grad_norm: 0.9999990385218295, iteration: 97569
loss: 1.0065762996673584,grad_norm: 0.9999991272861927, iteration: 97570
loss: 0.9928703904151917,grad_norm: 0.9145336492344268, iteration: 97571
loss: 0.9988250732421875,grad_norm: 0.999999149224411, iteration: 97572
loss: 1.03628408908844,grad_norm: 0.9999993124852308, iteration: 97573
loss: 1.004770278930664,grad_norm: 0.9999998499817667, iteration: 97574
loss: 0.9950629472732544,grad_norm: 0.9592526894804436, iteration: 97575
loss: 1.0383039712905884,grad_norm: 0.9805113829956733, iteration: 97576
loss: 0.9833980202674866,grad_norm: 0.9698007111118649, iteration: 97577
loss: 1.0108177661895752,grad_norm: 0.917004365167324, iteration: 97578
loss: 1.020265817642212,grad_norm: 0.9017811126411976, iteration: 97579
loss: 1.0681931972503662,grad_norm: 0.9999992799650125, iteration: 97580
loss: 0.9798705577850342,grad_norm: 0.9999991243141209, iteration: 97581
loss: 0.9954485297203064,grad_norm: 0.9650696423453128, iteration: 97582
loss: 0.9734557867050171,grad_norm: 0.9999991514022686, iteration: 97583
loss: 0.9881792068481445,grad_norm: 0.9999991265372907, iteration: 97584
loss: 1.0024458169937134,grad_norm: 0.9999992161466764, iteration: 97585
loss: 1.0299921035766602,grad_norm: 0.9999992251439291, iteration: 97586
loss: 0.9644066095352173,grad_norm: 0.9999992099421017, iteration: 97587
loss: 1.0165175199508667,grad_norm: 0.9999999545873055, iteration: 97588
loss: 1.0093214511871338,grad_norm: 0.9807545711618292, iteration: 97589
loss: 0.9512640237808228,grad_norm: 0.9999991040682121, iteration: 97590
loss: 1.0220342874526978,grad_norm: 0.9999990848362396, iteration: 97591
loss: 0.9981833696365356,grad_norm: 0.8535136385157249, iteration: 97592
loss: 0.9953103065490723,grad_norm: 0.9999992821124671, iteration: 97593
loss: 1.0161770582199097,grad_norm: 0.8261444967047867, iteration: 97594
loss: 1.0201482772827148,grad_norm: 0.9999991689070237, iteration: 97595
loss: 0.9983727335929871,grad_norm: 0.9999992467698603, iteration: 97596
loss: 0.9945594668388367,grad_norm: 0.9659870875539631, iteration: 97597
loss: 1.0319021940231323,grad_norm: 0.9574072606893027, iteration: 97598
loss: 0.9996255040168762,grad_norm: 0.8664766383009445, iteration: 97599
loss: 1.0065124034881592,grad_norm: 0.9763188914522697, iteration: 97600
loss: 0.963214635848999,grad_norm: 0.9528747758460221, iteration: 97601
loss: 1.0093148946762085,grad_norm: 0.9999991522645508, iteration: 97602
loss: 0.9799728989601135,grad_norm: 0.9999994875966135, iteration: 97603
loss: 0.9943941235542297,grad_norm: 0.9121374732655451, iteration: 97604
loss: 1.0090081691741943,grad_norm: 0.9999990330012449, iteration: 97605
loss: 0.9997460842132568,grad_norm: 0.9999991693119032, iteration: 97606
loss: 0.9610313177108765,grad_norm: 0.900995939719869, iteration: 97607
loss: 1.0136535167694092,grad_norm: 0.9999990654698284, iteration: 97608
loss: 1.010290265083313,grad_norm: 0.9999991557563804, iteration: 97609
loss: 0.996897280216217,grad_norm: 0.8693750392512483, iteration: 97610
loss: 0.9469262957572937,grad_norm: 0.9999991972748752, iteration: 97611
loss: 1.0031116008758545,grad_norm: 0.9999991906395738, iteration: 97612
loss: 1.0268034934997559,grad_norm: 0.9999990290499854, iteration: 97613
loss: 0.9810981154441833,grad_norm: 0.8631493091749304, iteration: 97614
loss: 1.0124551057815552,grad_norm: 0.9999991873855946, iteration: 97615
loss: 1.037853479385376,grad_norm: 0.999999208764132, iteration: 97616
loss: 1.0600239038467407,grad_norm: 0.9999991131438263, iteration: 97617
loss: 1.0050232410430908,grad_norm: 0.9899585751645552, iteration: 97618
loss: 0.9923676252365112,grad_norm: 0.9812111673804422, iteration: 97619
loss: 1.023822546005249,grad_norm: 0.9999990600637984, iteration: 97620
loss: 0.9990324378013611,grad_norm: 0.9999991042413866, iteration: 97621
loss: 1.028396725654602,grad_norm: 0.9999991107849976, iteration: 97622
loss: 0.9803272485733032,grad_norm: 0.9999990475075364, iteration: 97623
loss: 1.0043421983718872,grad_norm: 0.9999990690464619, iteration: 97624
loss: 0.9807010889053345,grad_norm: 0.953582717563763, iteration: 97625
loss: 0.9926744699478149,grad_norm: 0.9855927848832005, iteration: 97626
loss: 1.0180145502090454,grad_norm: 0.8905120903701699, iteration: 97627
loss: 1.0315141677856445,grad_norm: 0.9711074543721522, iteration: 97628
loss: 1.0744329690933228,grad_norm: 0.99999917865677, iteration: 97629
loss: 0.9602332711219788,grad_norm: 0.9999991457919654, iteration: 97630
loss: 0.9932749271392822,grad_norm: 0.9999993176156902, iteration: 97631
loss: 1.0100185871124268,grad_norm: 0.9999991225692263, iteration: 97632
loss: 0.9865909218788147,grad_norm: 0.9999990980128315, iteration: 97633
loss: 0.9516096115112305,grad_norm: 0.9790188947055916, iteration: 97634
loss: 0.9956753253936768,grad_norm: 0.9999991997069047, iteration: 97635
loss: 1.009736180305481,grad_norm: 0.9999992180190732, iteration: 97636
loss: 1.0330642461776733,grad_norm: 0.9999998720226704, iteration: 97637
loss: 1.0314139127731323,grad_norm: 0.8015908291723842, iteration: 97638
loss: 1.0196291208267212,grad_norm: 0.9738440527718897, iteration: 97639
loss: 1.0391111373901367,grad_norm: 0.9999994317996347, iteration: 97640
loss: 1.0357211828231812,grad_norm: 0.9999997575341051, iteration: 97641
loss: 0.9668660759925842,grad_norm: 0.9293290084472737, iteration: 97642
loss: 0.988063633441925,grad_norm: 0.9853354207291345, iteration: 97643
loss: 0.9937182664871216,grad_norm: 0.8415962628307384, iteration: 97644
loss: 1.0182551145553589,grad_norm: 0.9999990695508854, iteration: 97645
loss: 0.9740915894508362,grad_norm: 0.9892660923440418, iteration: 97646
loss: 1.0013319253921509,grad_norm: 0.999999112033129, iteration: 97647
loss: 1.03086519241333,grad_norm: 0.9999997609040524, iteration: 97648
loss: 0.9519004821777344,grad_norm: 0.9999989996815207, iteration: 97649
loss: 1.0127536058425903,grad_norm: 0.9200105453014998, iteration: 97650
loss: 0.9658617377281189,grad_norm: 0.9999990884521682, iteration: 97651
loss: 0.9911978840827942,grad_norm: 0.9999991419279015, iteration: 97652
loss: 0.984633207321167,grad_norm: 0.9526080236853995, iteration: 97653
loss: 0.9949961304664612,grad_norm: 0.9999990268389888, iteration: 97654
loss: 0.990946352481842,grad_norm: 0.9999989042976432, iteration: 97655
loss: 0.9681949615478516,grad_norm: 0.9999991666639019, iteration: 97656
loss: 1.1790016889572144,grad_norm: 0.999999476720006, iteration: 97657
loss: 1.0195516347885132,grad_norm: 0.9633229927553981, iteration: 97658
loss: 1.2225695848464966,grad_norm: 0.9999996394113866, iteration: 97659
loss: 1.189851999282837,grad_norm: 0.9999991758336468, iteration: 97660
loss: 1.030086874961853,grad_norm: 0.9554040775341408, iteration: 97661
loss: 1.0001939535140991,grad_norm: 0.9999990269583718, iteration: 97662
loss: 1.0836007595062256,grad_norm: 0.8861494396392952, iteration: 97663
loss: 1.2824684381484985,grad_norm: 0.9999998330139624, iteration: 97664
loss: 1.0589501857757568,grad_norm: 0.9999997388498908, iteration: 97665
loss: 1.1534935235977173,grad_norm: 0.9999993533779861, iteration: 97666
loss: 1.0246877670288086,grad_norm: 0.9999991374766033, iteration: 97667
loss: 1.0312418937683105,grad_norm: 0.9999990710716858, iteration: 97668
loss: 0.9810763597488403,grad_norm: 0.9999990310941307, iteration: 97669
loss: 0.9976813197135925,grad_norm: 0.9999990416090624, iteration: 97670
loss: 1.0444631576538086,grad_norm: 0.9999994505556365, iteration: 97671
loss: 1.110927700996399,grad_norm: 0.9999995778759164, iteration: 97672
loss: 1.0271929502487183,grad_norm: 0.9999991668203105, iteration: 97673
loss: 1.0456149578094482,grad_norm: 0.9999992373523172, iteration: 97674
loss: 0.9636073708534241,grad_norm: 0.999999090407229, iteration: 97675
loss: 0.9887226819992065,grad_norm: 0.9999992701794124, iteration: 97676
loss: 1.0049666166305542,grad_norm: 0.999999045603702, iteration: 97677
loss: 1.0519670248031616,grad_norm: 0.9057310616147893, iteration: 97678
loss: 1.038642168045044,grad_norm: 0.9999996515475259, iteration: 97679
loss: 1.0322381258010864,grad_norm: 0.9999988487659992, iteration: 97680
loss: 1.0051110982894897,grad_norm: 0.999999307244502, iteration: 97681
loss: 1.0095008611679077,grad_norm: 0.9999991351615559, iteration: 97682
loss: 0.980171263217926,grad_norm: 0.9999990899098434, iteration: 97683
loss: 1.1074265241622925,grad_norm: 0.9999998080360882, iteration: 97684
loss: 1.0910468101501465,grad_norm: 0.9999998155392603, iteration: 97685
loss: 1.0235073566436768,grad_norm: 0.9999997031664519, iteration: 97686
loss: 1.06812584400177,grad_norm: 0.9999996800629658, iteration: 97687
loss: 1.023828387260437,grad_norm: 0.9999993378600431, iteration: 97688
loss: 1.0340914726257324,grad_norm: 0.9999992338762822, iteration: 97689
loss: 1.0465930700302124,grad_norm: 0.999999927987177, iteration: 97690
loss: 1.025523066520691,grad_norm: 0.9999992299630263, iteration: 97691
loss: 1.0226699113845825,grad_norm: 0.9999990590478567, iteration: 97692
loss: 1.0203258991241455,grad_norm: 0.9999991134160059, iteration: 97693
loss: 1.0150283575057983,grad_norm: 0.9999990026532349, iteration: 97694
loss: 1.0066251754760742,grad_norm: 0.9999993963472892, iteration: 97695
loss: 0.9947410225868225,grad_norm: 0.9999993309679854, iteration: 97696
loss: 1.013538122177124,grad_norm: 0.9344748015609489, iteration: 97697
loss: 1.0030803680419922,grad_norm: 0.9999990463212366, iteration: 97698
loss: 0.9961100816726685,grad_norm: 0.9999992394285884, iteration: 97699
loss: 0.9352356195449829,grad_norm: 0.9371541261117585, iteration: 97700
loss: 1.0036749839782715,grad_norm: 0.8804915455868212, iteration: 97701
loss: 1.0785244703292847,grad_norm: 0.9999992897708376, iteration: 97702
loss: 1.0100939273834229,grad_norm: 0.9999998846863704, iteration: 97703
loss: 0.9814919829368591,grad_norm: 0.9999990214185331, iteration: 97704
loss: 1.0168095827102661,grad_norm: 0.9501939265984295, iteration: 97705
loss: 0.9882891774177551,grad_norm: 0.9999995049642871, iteration: 97706
loss: 1.0043050050735474,grad_norm: 0.9934387250985056, iteration: 97707
loss: 1.0563818216323853,grad_norm: 0.9999998196534275, iteration: 97708
loss: 1.048187494277954,grad_norm: 0.9999993816567114, iteration: 97709
loss: 1.1240381002426147,grad_norm: 0.9999999322603748, iteration: 97710
loss: 1.0744690895080566,grad_norm: 0.9999991362903569, iteration: 97711
loss: 1.1739587783813477,grad_norm: 0.999999805495282, iteration: 97712
loss: 1.0225297212600708,grad_norm: 0.9999995665185774, iteration: 97713
loss: 0.969678521156311,grad_norm: 0.9999990705184271, iteration: 97714
loss: 1.0517327785491943,grad_norm: 0.9999993860739013, iteration: 97715
loss: 1.1275434494018555,grad_norm: 0.9999995983571066, iteration: 97716
loss: 1.1113256216049194,grad_norm: 0.999999702535739, iteration: 97717
loss: 1.0553444623947144,grad_norm: 0.9999993062118278, iteration: 97718
loss: 1.1290775537490845,grad_norm: 1.0000000449878568, iteration: 97719
loss: 1.1964548826217651,grad_norm: 0.9999998887957159, iteration: 97720
loss: 1.0825202465057373,grad_norm: 0.9999994808190276, iteration: 97721
loss: 0.9930800199508667,grad_norm: 0.9999992920575383, iteration: 97722
loss: 0.962668240070343,grad_norm: 0.9999991899208165, iteration: 97723
loss: 0.9685741662979126,grad_norm: 0.9999993243640752, iteration: 97724
loss: 1.082166075706482,grad_norm: 0.9999994076660821, iteration: 97725
loss: 0.9718459248542786,grad_norm: 0.9999991600829485, iteration: 97726
loss: 1.0056958198547363,grad_norm: 0.9999991669769323, iteration: 97727
loss: 1.0247609615325928,grad_norm: 0.9999991901867309, iteration: 97728
loss: 1.052424430847168,grad_norm: 0.9999996725950435, iteration: 97729
loss: 1.0178645849227905,grad_norm: 0.9999991213184941, iteration: 97730
loss: 1.1129111051559448,grad_norm: 0.9999997382018666, iteration: 97731
loss: 1.0255182981491089,grad_norm: 0.999999361525467, iteration: 97732
loss: 1.0003808736801147,grad_norm: 0.9999991766816252, iteration: 97733
loss: 0.9870739579200745,grad_norm: 0.9999991741865453, iteration: 97734
loss: 1.0225151777267456,grad_norm: 0.8716990687498318, iteration: 97735
loss: 1.009886622428894,grad_norm: 0.9999991261681479, iteration: 97736
loss: 1.0700442790985107,grad_norm: 0.9999994442104525, iteration: 97737
loss: 1.0470552444458008,grad_norm: 0.9999992157506334, iteration: 97738
loss: 1.0185089111328125,grad_norm: 0.9999994646632347, iteration: 97739
loss: 1.0725752115249634,grad_norm: 0.9999993774395989, iteration: 97740
loss: 0.995544970035553,grad_norm: 0.9999992096105752, iteration: 97741
loss: 1.0098432302474976,grad_norm: 0.9257750746102402, iteration: 97742
loss: 1.0005184412002563,grad_norm: 0.9999990593130886, iteration: 97743
loss: 0.9736632704734802,grad_norm: 0.8876597658749416, iteration: 97744
loss: 0.9633119106292725,grad_norm: 0.9999993708612536, iteration: 97745
loss: 1.0066571235656738,grad_norm: 0.999999120488784, iteration: 97746
loss: 1.0278973579406738,grad_norm: 0.9999994088538752, iteration: 97747
loss: 0.9915000200271606,grad_norm: 0.999999286746239, iteration: 97748
loss: 1.0077043771743774,grad_norm: 0.9999993781740594, iteration: 97749
loss: 0.9896377325057983,grad_norm: 0.9212232151217568, iteration: 97750
loss: 1.0459829568862915,grad_norm: 0.9999991392269014, iteration: 97751
loss: 0.9604374766349792,grad_norm: 0.9795803884997094, iteration: 97752
loss: 0.9995514154434204,grad_norm: 0.9964852189082022, iteration: 97753
loss: 0.9750881195068359,grad_norm: 0.9610760764399857, iteration: 97754
loss: 0.988465428352356,grad_norm: 0.9999991233416223, iteration: 97755
loss: 0.9991354942321777,grad_norm: 0.9999991479413575, iteration: 97756
loss: 1.0005414485931396,grad_norm: 0.9021893682488396, iteration: 97757
loss: 1.016360878944397,grad_norm: 0.9999996548400005, iteration: 97758
loss: 0.9863005876541138,grad_norm: 0.9999991219283084, iteration: 97759
loss: 0.9509760141372681,grad_norm: 0.9769984787426188, iteration: 97760
loss: 1.0157753229141235,grad_norm: 0.9999991541606399, iteration: 97761
loss: 0.9817271828651428,grad_norm: 0.9999991014408465, iteration: 97762
loss: 0.978359580039978,grad_norm: 0.999999273320149, iteration: 97763
loss: 0.9842064380645752,grad_norm: 0.9186887376377959, iteration: 97764
loss: 0.9871569275856018,grad_norm: 0.9999994381479637, iteration: 97765
loss: 1.0060456991195679,grad_norm: 0.9999990283585715, iteration: 97766
loss: 0.9786520600318909,grad_norm: 0.8680788657891669, iteration: 97767
loss: 1.000002145767212,grad_norm: 0.9548096485399725, iteration: 97768
loss: 0.9953566789627075,grad_norm: 0.9999991071521591, iteration: 97769
loss: 0.9652987122535706,grad_norm: 0.9999992042475951, iteration: 97770
loss: 1.002016544342041,grad_norm: 0.9999990645500615, iteration: 97771
loss: 0.9935575723648071,grad_norm: 0.9609843426351645, iteration: 97772
loss: 1.0065996646881104,grad_norm: 0.9779020226121118, iteration: 97773
loss: 0.9855870008468628,grad_norm: 0.8863542162610303, iteration: 97774
loss: 1.0462095737457275,grad_norm: 0.9999991207571832, iteration: 97775
loss: 0.9801310300827026,grad_norm: 0.9999991774631222, iteration: 97776
loss: 1.034588098526001,grad_norm: 0.9441218259677409, iteration: 97777
loss: 0.997025191783905,grad_norm: 0.9704135596942781, iteration: 97778
loss: 1.0183805227279663,grad_norm: 0.999999773119528, iteration: 97779
loss: 0.986580491065979,grad_norm: 0.9949806073547807, iteration: 97780
loss: 1.0336863994598389,grad_norm: 0.9999990966435554, iteration: 97781
loss: 1.0171197652816772,grad_norm: 0.9999991790227076, iteration: 97782
loss: 0.973473846912384,grad_norm: 0.9999993706018526, iteration: 97783
loss: 1.0116342306137085,grad_norm: 0.9999990419514874, iteration: 97784
loss: 1.001521348953247,grad_norm: 0.9999992673688799, iteration: 97785
loss: 0.9822004437446594,grad_norm: 0.9999991794848611, iteration: 97786
loss: 1.0369133949279785,grad_norm: 0.9999991802660143, iteration: 97787
loss: 1.0524787902832031,grad_norm: 0.9999990749801777, iteration: 97788
loss: 1.0213122367858887,grad_norm: 0.9999990215181483, iteration: 97789
loss: 1.068234920501709,grad_norm: 0.9999994931814554, iteration: 97790
loss: 1.033574104309082,grad_norm: 0.999999453635866, iteration: 97791
loss: 1.0376523733139038,grad_norm: 0.9999991379288665, iteration: 97792
loss: 0.9941489100456238,grad_norm: 0.9999991264692044, iteration: 97793
loss: 1.0113682746887207,grad_norm: 0.9162737578629007, iteration: 97794
loss: 1.0107530355453491,grad_norm: 0.9946645772465256, iteration: 97795
loss: 0.9642616510391235,grad_norm: 0.9999990814724482, iteration: 97796
loss: 1.0524089336395264,grad_norm: 0.999999747725431, iteration: 97797
loss: 0.9747862219810486,grad_norm: 0.999999249296853, iteration: 97798
loss: 1.0196895599365234,grad_norm: 0.9999994230199298, iteration: 97799
loss: 0.9820157885551453,grad_norm: 0.9999991108423976, iteration: 97800
loss: 1.051508903503418,grad_norm: 0.9999992001693325, iteration: 97801
loss: 1.0164703130722046,grad_norm: 0.99999983632463, iteration: 97802
loss: 1.0291616916656494,grad_norm: 0.9999991094253361, iteration: 97803
loss: 0.9730435013771057,grad_norm: 0.9999991810216634, iteration: 97804
loss: 1.0327293872833252,grad_norm: 0.9999991260116287, iteration: 97805
loss: 0.9712316989898682,grad_norm: 0.9488528811004221, iteration: 97806
loss: 0.9947836995124817,grad_norm: 0.9970374933556629, iteration: 97807
loss: 1.0031739473342896,grad_norm: 0.9999991106806583, iteration: 97808
loss: 1.0116710662841797,grad_norm: 0.9999991946110606, iteration: 97809
loss: 1.0231399536132812,grad_norm: 0.9999992817874448, iteration: 97810
loss: 1.0007960796356201,grad_norm: 0.9627703364934049, iteration: 97811
loss: 0.9951367378234863,grad_norm: 0.9999991392886969, iteration: 97812
loss: 0.9902030229568481,grad_norm: 0.9999991950026859, iteration: 97813
loss: 1.0261741876602173,grad_norm: 0.999999131975944, iteration: 97814
loss: 0.9992761015892029,grad_norm: 0.9999995591992774, iteration: 97815
loss: 0.991996169090271,grad_norm: 0.9999997116859767, iteration: 97816
loss: 1.020103931427002,grad_norm: 0.999999054079348, iteration: 97817
loss: 1.0665791034698486,grad_norm: 0.9999998701653038, iteration: 97818
loss: 1.0214025974273682,grad_norm: 0.999998955500424, iteration: 97819
loss: 0.992097795009613,grad_norm: 0.9066633492907047, iteration: 97820
loss: 0.9837542176246643,grad_norm: 0.9999990432727525, iteration: 97821
loss: 1.0153082609176636,grad_norm: 0.9857855577007442, iteration: 97822
loss: 0.9614325165748596,grad_norm: 0.914068242189934, iteration: 97823
loss: 0.9879938960075378,grad_norm: 0.9999991513514942, iteration: 97824
loss: 1.0344233512878418,grad_norm: 0.9999995220970214, iteration: 97825
loss: 1.0061500072479248,grad_norm: 0.9999992806946215, iteration: 97826
loss: 1.0219414234161377,grad_norm: 0.99999916091515, iteration: 97827
loss: 1.020838737487793,grad_norm: 0.9999991600990996, iteration: 97828
loss: 1.0027011632919312,grad_norm: 0.8956473303018351, iteration: 97829
loss: 1.0160129070281982,grad_norm: 0.9999993179495762, iteration: 97830
loss: 1.0154107809066772,grad_norm: 0.9999992250773966, iteration: 97831
loss: 0.9916012287139893,grad_norm: 0.9634025589872537, iteration: 97832
loss: 1.0063741207122803,grad_norm: 0.9999991561679628, iteration: 97833
loss: 1.0084422826766968,grad_norm: 0.9999989999612338, iteration: 97834
loss: 1.1414568424224854,grad_norm: 0.9999999157098176, iteration: 97835
loss: 0.9763656258583069,grad_norm: 0.9999992068082489, iteration: 97836
loss: 0.9875059127807617,grad_norm: 0.9904815261149228, iteration: 97837
loss: 1.0063161849975586,grad_norm: 0.9999991803469503, iteration: 97838
loss: 1.0348933935165405,grad_norm: 0.9544141729352088, iteration: 97839
loss: 1.0062984228134155,grad_norm: 0.9112525761524544, iteration: 97840
loss: 1.0071539878845215,grad_norm: 0.9999990609542941, iteration: 97841
loss: 1.0116569995880127,grad_norm: 0.9999992179105787, iteration: 97842
loss: 0.9911870360374451,grad_norm: 0.9999988850383733, iteration: 97843
loss: 1.0191739797592163,grad_norm: 0.999999020067594, iteration: 97844
loss: 0.9991762042045593,grad_norm: 0.971943442142007, iteration: 97845
loss: 1.0055012702941895,grad_norm: 0.9922536160165204, iteration: 97846
loss: 1.2206581830978394,grad_norm: 0.9999998704443891, iteration: 97847
loss: 0.9876499176025391,grad_norm: 0.999999100445884, iteration: 97848
loss: 0.9526057243347168,grad_norm: 0.999999288276462, iteration: 97849
loss: 1.0052870512008667,grad_norm: 0.999999135445965, iteration: 97850
loss: 1.0598286390304565,grad_norm: 0.9999992828449101, iteration: 97851
loss: 1.0287470817565918,grad_norm: 0.9032341367685441, iteration: 97852
loss: 1.0092171430587769,grad_norm: 0.999999017277149, iteration: 97853
loss: 1.0162996053695679,grad_norm: 0.999999146187527, iteration: 97854
loss: 1.0092811584472656,grad_norm: 0.9887591525933166, iteration: 97855
loss: 1.0204771757125854,grad_norm: 0.99999904692889, iteration: 97856
loss: 1.0320391654968262,grad_norm: 0.9999991289960112, iteration: 97857
loss: 0.9833534955978394,grad_norm: 0.9999991294591852, iteration: 97858
loss: 0.9765638113021851,grad_norm: 0.919941032829917, iteration: 97859
loss: 1.0014160871505737,grad_norm: 0.9999991853554642, iteration: 97860
loss: 1.0171597003936768,grad_norm: 0.9999991451698795, iteration: 97861
loss: 1.0082143545150757,grad_norm: 0.9999990806446271, iteration: 97862
loss: 1.0054515600204468,grad_norm: 0.921261930951319, iteration: 97863
loss: 1.001589298248291,grad_norm: 0.9999990541940184, iteration: 97864
loss: 0.9734272360801697,grad_norm: 0.8807959811086216, iteration: 97865
loss: 0.9773717522621155,grad_norm: 0.9999991677642247, iteration: 97866
loss: 1.022940754890442,grad_norm: 0.9999995343069448, iteration: 97867
loss: 1.0144565105438232,grad_norm: 0.9999991736107764, iteration: 97868
loss: 1.0124261379241943,grad_norm: 0.9999991753510213, iteration: 97869
loss: 1.0273358821868896,grad_norm: 0.999998998239134, iteration: 97870
loss: 0.9713536500930786,grad_norm: 0.9999991132350069, iteration: 97871
loss: 0.9729403853416443,grad_norm: 0.9999991713581396, iteration: 97872
loss: 0.9877635836601257,grad_norm: 0.9999991154734751, iteration: 97873
loss: 1.0105719566345215,grad_norm: 0.9473577889631274, iteration: 97874
loss: 1.0005143880844116,grad_norm: 0.9883907288702152, iteration: 97875
loss: 1.0114151239395142,grad_norm: 0.9488302981326426, iteration: 97876
loss: 1.021785020828247,grad_norm: 0.9999993321759925, iteration: 97877
loss: 1.0075154304504395,grad_norm: 0.9999990379262983, iteration: 97878
loss: 1.0103334188461304,grad_norm: 0.9999990759543266, iteration: 97879
loss: 0.9822603464126587,grad_norm: 0.9999991986584233, iteration: 97880
loss: 1.0045734643936157,grad_norm: 0.9999991432423914, iteration: 97881
loss: 0.9981828331947327,grad_norm: 0.9999990534704888, iteration: 97882
loss: 1.0231459140777588,grad_norm: 0.9999991599190803, iteration: 97883
loss: 1.0081068277359009,grad_norm: 0.9999993504855199, iteration: 97884
loss: 1.0182088613510132,grad_norm: 0.9348879759897909, iteration: 97885
loss: 0.9720691442489624,grad_norm: 0.999999088521921, iteration: 97886
loss: 0.9792801737785339,grad_norm: 0.9999992111208124, iteration: 97887
loss: 0.9833836555480957,grad_norm: 0.9336488044385216, iteration: 97888
loss: 0.9909452199935913,grad_norm: 0.9999991521596424, iteration: 97889
loss: 1.0196740627288818,grad_norm: 0.9798835667541538, iteration: 97890
loss: 1.0285794734954834,grad_norm: 0.999999225155915, iteration: 97891
loss: 1.0020253658294678,grad_norm: 0.9033160992534671, iteration: 97892
loss: 1.0382102727890015,grad_norm: 0.9999991047036967, iteration: 97893
loss: 0.9972208738327026,grad_norm: 0.9941452545641932, iteration: 97894
loss: 1.0014269351959229,grad_norm: 0.9999992958035158, iteration: 97895
loss: 1.02499258518219,grad_norm: 0.9140817472180451, iteration: 97896
loss: 0.9885101914405823,grad_norm: 0.9999991092073162, iteration: 97897
loss: 1.0029058456420898,grad_norm: 0.9999991365837153, iteration: 97898
loss: 0.9909103512763977,grad_norm: 0.9919549008845467, iteration: 97899
loss: 1.0006405115127563,grad_norm: 0.9999991707457492, iteration: 97900
loss: 1.0037643909454346,grad_norm: 0.8538861027316142, iteration: 97901
loss: 1.0092062950134277,grad_norm: 0.999999584155774, iteration: 97902
loss: 1.0076072216033936,grad_norm: 0.9999990137399306, iteration: 97903
loss: 1.021799087524414,grad_norm: 0.9999991127628217, iteration: 97904
loss: 0.9769287109375,grad_norm: 0.9999992214929669, iteration: 97905
loss: 1.0278598070144653,grad_norm: 0.9999996036887516, iteration: 97906
loss: 1.00639009475708,grad_norm: 0.9999990579614293, iteration: 97907
loss: 0.9655075073242188,grad_norm: 0.9999993234166529, iteration: 97908
loss: 0.9744077920913696,grad_norm: 0.9999992954264003, iteration: 97909
loss: 1.004977822303772,grad_norm: 0.9999992826693667, iteration: 97910
loss: 1.0008864402770996,grad_norm: 0.9999994088737499, iteration: 97911
loss: 0.9486293792724609,grad_norm: 0.9999991806937463, iteration: 97912
loss: 0.9947975277900696,grad_norm: 0.9999990889160826, iteration: 97913
loss: 0.998246967792511,grad_norm: 0.9999990960624306, iteration: 97914
loss: 1.001903772354126,grad_norm: 0.9999991441937557, iteration: 97915
loss: 1.0244518518447876,grad_norm: 0.9999992048940608, iteration: 97916
loss: 1.0410692691802979,grad_norm: 0.999999124660371, iteration: 97917
loss: 0.955115020275116,grad_norm: 0.9999991311802005, iteration: 97918
loss: 1.010992169380188,grad_norm: 0.9999991219841375, iteration: 97919
loss: 1.0162853002548218,grad_norm: 0.9367337914810656, iteration: 97920
loss: 0.9875292778015137,grad_norm: 0.9999994567486715, iteration: 97921
loss: 0.9778084754943848,grad_norm: 0.9999990593710498, iteration: 97922
loss: 1.0309712886810303,grad_norm: 0.9999992913589019, iteration: 97923
loss: 1.0072890520095825,grad_norm: 0.9999992187727943, iteration: 97924
loss: 0.9747509360313416,grad_norm: 0.9999990561409922, iteration: 97925
loss: 0.9886459112167358,grad_norm: 0.999999189819555, iteration: 97926
loss: 1.0358283519744873,grad_norm: 0.999999007984223, iteration: 97927
loss: 1.0613363981246948,grad_norm: 0.9999996732353021, iteration: 97928
loss: 0.9893379807472229,grad_norm: 0.9999991704086941, iteration: 97929
loss: 1.007421851158142,grad_norm: 0.9999991153740725, iteration: 97930
loss: 0.981075644493103,grad_norm: 0.9999990947225423, iteration: 97931
loss: 0.9816130995750427,grad_norm: 0.9999992216702676, iteration: 97932
loss: 1.011932134628296,grad_norm: 0.9438674570180933, iteration: 97933
loss: 0.9716466069221497,grad_norm: 0.9807131889266645, iteration: 97934
loss: 0.986784040927887,grad_norm: 0.9043595207046463, iteration: 97935
loss: 1.015465497970581,grad_norm: 0.9999992122959382, iteration: 97936
loss: 0.979732871055603,grad_norm: 0.9198883981830949, iteration: 97937
loss: 0.9896865487098694,grad_norm: 0.9999991686581593, iteration: 97938
loss: 0.9568332433700562,grad_norm: 0.9672426567727548, iteration: 97939
loss: 0.9976304769515991,grad_norm: 0.9946936333292585, iteration: 97940
loss: 0.976967453956604,grad_norm: 0.9999990797923264, iteration: 97941
loss: 1.00803542137146,grad_norm: 0.9999992921390126, iteration: 97942
loss: 1.0607380867004395,grad_norm: 0.9930686100368046, iteration: 97943
loss: 0.9718307852745056,grad_norm: 0.9999991347725875, iteration: 97944
loss: 0.9715508818626404,grad_norm: 0.9607371845389862, iteration: 97945
loss: 1.048279047012329,grad_norm: 0.9999989872140547, iteration: 97946
loss: 1.0203449726104736,grad_norm: 0.9459033874279884, iteration: 97947
loss: 0.9800631999969482,grad_norm: 0.9525153124442659, iteration: 97948
loss: 0.9996448755264282,grad_norm: 0.958186600267062, iteration: 97949
loss: 0.9899930357933044,grad_norm: 0.9999991765588264, iteration: 97950
loss: 1.0120259523391724,grad_norm: 0.9999990637158971, iteration: 97951
loss: 1.0041744709014893,grad_norm: 0.9999991264549714, iteration: 97952
loss: 0.9774041175842285,grad_norm: 0.9999988648306677, iteration: 97953
loss: 1.0391736030578613,grad_norm: 0.9479958468161634, iteration: 97954
loss: 0.9877873659133911,grad_norm: 0.9999992511738064, iteration: 97955
loss: 1.015938639640808,grad_norm: 0.9999991874223866, iteration: 97956
loss: 1.0015217065811157,grad_norm: 0.9943359191964102, iteration: 97957
loss: 0.9989210367202759,grad_norm: 0.9980189771307363, iteration: 97958
loss: 0.9461125731468201,grad_norm: 0.9611939572791184, iteration: 97959
loss: 0.9913879632949829,grad_norm: 0.9999990245644151, iteration: 97960
loss: 1.0233445167541504,grad_norm: 0.9816928052808651, iteration: 97961
loss: 0.9885556101799011,grad_norm: 0.9999991038827131, iteration: 97962
loss: 0.9893739223480225,grad_norm: 0.9999991072000664, iteration: 97963
loss: 1.0241044759750366,grad_norm: 0.9064034557248863, iteration: 97964
loss: 1.003393530845642,grad_norm: 0.8883387122392693, iteration: 97965
loss: 1.0041192770004272,grad_norm: 0.9999990694875039, iteration: 97966
loss: 0.9677424430847168,grad_norm: 0.9999991236715696, iteration: 97967
loss: 0.9862228035926819,grad_norm: 0.9999991853213843, iteration: 97968
loss: 1.0216941833496094,grad_norm: 0.9999991793134582, iteration: 97969
loss: 1.0141575336456299,grad_norm: 0.9999990525388737, iteration: 97970
loss: 1.0012589693069458,grad_norm: 0.9999991590392553, iteration: 97971
loss: 0.9582982659339905,grad_norm: 0.9430234158741356, iteration: 97972
loss: 1.0174943208694458,grad_norm: 0.9999992857751784, iteration: 97973
loss: 0.9839836955070496,grad_norm: 0.9999991682224745, iteration: 97974
loss: 0.9850793480873108,grad_norm: 0.9584192539973234, iteration: 97975
loss: 1.0064446926116943,grad_norm: 0.9999990582933855, iteration: 97976
loss: 1.0036109685897827,grad_norm: 0.9999991832648841, iteration: 97977
loss: 1.024236798286438,grad_norm: 0.99999914766877, iteration: 97978
loss: 1.007264494895935,grad_norm: 0.9999989677194311, iteration: 97979
loss: 1.0177632570266724,grad_norm: 0.9999991265907866, iteration: 97980
loss: 0.9909747838973999,grad_norm: 0.9999991293689187, iteration: 97981
loss: 0.9841302037239075,grad_norm: 0.9999990973308178, iteration: 97982
loss: 0.9969601035118103,grad_norm: 0.9999993431722471, iteration: 97983
loss: 1.023476481437683,grad_norm: 0.9999992145064802, iteration: 97984
loss: 1.0155720710754395,grad_norm: 0.9999992033033296, iteration: 97985
loss: 1.0155243873596191,grad_norm: 0.994476731866798, iteration: 97986
loss: 1.0199044942855835,grad_norm: 0.9999991494492958, iteration: 97987
loss: 1.007212519645691,grad_norm: 0.9999999682380842, iteration: 97988
loss: 1.0141345262527466,grad_norm: 0.9999991278900235, iteration: 97989
loss: 0.9871944189071655,grad_norm: 0.9329128443366396, iteration: 97990
loss: 0.9813671708106995,grad_norm: 0.9999991029288875, iteration: 97991
loss: 0.9686527848243713,grad_norm: 0.9999991183438278, iteration: 97992
loss: 1.0084500312805176,grad_norm: 0.8407958063330652, iteration: 97993
loss: 1.0020426511764526,grad_norm: 0.9999989541552244, iteration: 97994
loss: 0.9881972670555115,grad_norm: 0.9457165258310011, iteration: 97995
loss: 0.9842422604560852,grad_norm: 0.9875980112723373, iteration: 97996
loss: 0.9916347861289978,grad_norm: 0.9999996308189406, iteration: 97997
loss: 1.005761742591858,grad_norm: 0.9712229212499326, iteration: 97998
loss: 0.9653640985488892,grad_norm: 0.9999992603611843, iteration: 97999
loss: 1.0101643800735474,grad_norm: 0.9999992672951218, iteration: 98000
loss: 0.9720210433006287,grad_norm: 0.9836087740326869, iteration: 98001
loss: 0.9659721851348877,grad_norm: 0.9999991501037445, iteration: 98002
loss: 1.0014187097549438,grad_norm: 0.9999990101893481, iteration: 98003
loss: 0.9841932058334351,grad_norm: 0.9379175593118847, iteration: 98004
loss: 0.9859673380851746,grad_norm: 0.9999992170602009, iteration: 98005
loss: 0.9851422905921936,grad_norm: 0.999998950715896, iteration: 98006
loss: 0.9908296465873718,grad_norm: 0.9880891208655271, iteration: 98007
loss: 1.0215306282043457,grad_norm: 0.9999995973275435, iteration: 98008
loss: 1.0296939611434937,grad_norm: 0.9999991913173262, iteration: 98009
loss: 0.9729469418525696,grad_norm: 0.9837693395511519, iteration: 98010
loss: 0.9636995792388916,grad_norm: 0.9654216473436491, iteration: 98011
loss: 0.9344614148139954,grad_norm: 0.911261014020377, iteration: 98012
loss: 1.0401875972747803,grad_norm: 0.9214212868593671, iteration: 98013
loss: 1.0268874168395996,grad_norm: 0.9999996923918801, iteration: 98014
loss: 1.0010852813720703,grad_norm: 0.9740923641083529, iteration: 98015
loss: 0.999299168586731,grad_norm: 0.9999991432172165, iteration: 98016
loss: 0.9714233875274658,grad_norm: 0.9999992069086009, iteration: 98017
loss: 1.0349435806274414,grad_norm: 0.999999032577358, iteration: 98018
loss: 1.0164042711257935,grad_norm: 0.99999928403201, iteration: 98019
loss: 0.9968339204788208,grad_norm: 0.9999991758938175, iteration: 98020
loss: 1.0050292015075684,grad_norm: 0.9999992256341015, iteration: 98021
loss: 1.0113712549209595,grad_norm: 0.9999989870157571, iteration: 98022
loss: 0.9727200865745544,grad_norm: 0.8537652730465958, iteration: 98023
loss: 1.0227800607681274,grad_norm: 0.999999034522988, iteration: 98024
loss: 0.9949764013290405,grad_norm: 0.999998988695314, iteration: 98025
loss: 0.9582526087760925,grad_norm: 0.9979474825829643, iteration: 98026
loss: 1.0064144134521484,grad_norm: 0.9558220553849014, iteration: 98027
loss: 0.9635249972343445,grad_norm: 0.9999991861722608, iteration: 98028
loss: 1.0085817575454712,grad_norm: 0.9999991205986137, iteration: 98029
loss: 1.0393933057785034,grad_norm: 0.9999989411076521, iteration: 98030
loss: 1.015466570854187,grad_norm: 0.9999990583526722, iteration: 98031
loss: 1.0435421466827393,grad_norm: 0.9999990812022452, iteration: 98032
loss: 0.9387941360473633,grad_norm: 0.9999990195614658, iteration: 98033
loss: 1.002018928527832,grad_norm: 0.8811093989749654, iteration: 98034
loss: 0.9929203987121582,grad_norm: 0.999999110107708, iteration: 98035
loss: 1.017352819442749,grad_norm: 0.9999992898993804, iteration: 98036
loss: 1.036531925201416,grad_norm: 0.9999992389309424, iteration: 98037
loss: 0.9870105981826782,grad_norm: 0.9999991965485542, iteration: 98038
loss: 1.0127025842666626,grad_norm: 0.9999989782406663, iteration: 98039
loss: 0.9827110767364502,grad_norm: 0.9999991467316826, iteration: 98040
loss: 0.9612445831298828,grad_norm: 0.9999989781328145, iteration: 98041
loss: 1.0790504217147827,grad_norm: 0.999999439342921, iteration: 98042
loss: 1.0275732278823853,grad_norm: 0.817127662064058, iteration: 98043
loss: 0.9824408888816833,grad_norm: 0.9408389564283375, iteration: 98044
loss: 1.0185141563415527,grad_norm: 0.999999692220854, iteration: 98045
loss: 1.0250095129013062,grad_norm: 0.9999992610066133, iteration: 98046
loss: 0.9912897348403931,grad_norm: 0.9999991816998911, iteration: 98047
loss: 1.0100514888763428,grad_norm: 0.9985186570124392, iteration: 98048
loss: 1.0115711688995361,grad_norm: 0.9999991519103825, iteration: 98049
loss: 1.0214306116104126,grad_norm: 0.999999624589375, iteration: 98050
loss: 0.9942979216575623,grad_norm: 0.999999093887357, iteration: 98051
loss: 1.0291736125946045,grad_norm: 0.9362753144797327, iteration: 98052
loss: 1.0252034664154053,grad_norm: 0.9999990519641359, iteration: 98053
loss: 0.9773358702659607,grad_norm: 0.9391119251622009, iteration: 98054
loss: 1.04263174533844,grad_norm: 0.9999990727901515, iteration: 98055
loss: 1.0144540071487427,grad_norm: 0.9999995006379759, iteration: 98056
loss: 1.0407625436782837,grad_norm: 0.9999998603099328, iteration: 98057
loss: 0.9868271946907043,grad_norm: 0.8298989541177211, iteration: 98058
loss: 1.0114768743515015,grad_norm: 0.8885680087167116, iteration: 98059
loss: 1.0290734767913818,grad_norm: 0.9999992537401958, iteration: 98060
loss: 1.0122411251068115,grad_norm: 0.9999991578415491, iteration: 98061
loss: 0.9853228330612183,grad_norm: 0.9999991588886562, iteration: 98062
loss: 0.9807694554328918,grad_norm: 0.999999214590777, iteration: 98063
loss: 1.0156643390655518,grad_norm: 0.9426971164997149, iteration: 98064
loss: 1.024666428565979,grad_norm: 0.9850026044697792, iteration: 98065
loss: 1.0365468263626099,grad_norm: 0.9999990953840365, iteration: 98066
loss: 1.0091012716293335,grad_norm: 0.9760446571293129, iteration: 98067
loss: 0.9772018194198608,grad_norm: 0.9611751250151911, iteration: 98068
loss: 1.0078848600387573,grad_norm: 0.9999989559811936, iteration: 98069
loss: 0.9887057542800903,grad_norm: 0.8545275985089188, iteration: 98070
loss: 0.9932238459587097,grad_norm: 0.9999991932135129, iteration: 98071
loss: 0.999468207359314,grad_norm: 0.9999991891559393, iteration: 98072
loss: 1.002335548400879,grad_norm: 0.9999991396640188, iteration: 98073
loss: 1.0095188617706299,grad_norm: 0.9695223137873965, iteration: 98074
loss: 1.0082532167434692,grad_norm: 0.9999990842051878, iteration: 98075
loss: 1.00685453414917,grad_norm: 0.9999991594302009, iteration: 98076
loss: 0.9982936978340149,grad_norm: 0.9999992398018162, iteration: 98077
loss: 0.972785472869873,grad_norm: 0.9999991357376783, iteration: 98078
loss: 1.0670918226242065,grad_norm: 0.9999991886301403, iteration: 98079
loss: 0.9372488856315613,grad_norm: 0.9999991509657729, iteration: 98080
loss: 1.02494478225708,grad_norm: 0.999999036773755, iteration: 98081
loss: 0.9824010729789734,grad_norm: 0.9795735616503601, iteration: 98082
loss: 0.9911640286445618,grad_norm: 0.9887994339015721, iteration: 98083
loss: 0.984739363193512,grad_norm: 0.980189292219414, iteration: 98084
loss: 1.0270484685897827,grad_norm: 0.9484513304032298, iteration: 98085
loss: 0.981995165348053,grad_norm: 0.9999990248937417, iteration: 98086
loss: 1.0407601594924927,grad_norm: 0.9999991712170727, iteration: 98087
loss: 1.0583381652832031,grad_norm: 0.9999990185213854, iteration: 98088
loss: 0.9822308421134949,grad_norm: 0.8472227426440605, iteration: 98089
loss: 0.9734581112861633,grad_norm: 0.9999991346124164, iteration: 98090
loss: 1.044843316078186,grad_norm: 0.999999614185592, iteration: 98091
loss: 0.9554870128631592,grad_norm: 0.9817771108493677, iteration: 98092
loss: 0.9914501905441284,grad_norm: 0.9322009106801274, iteration: 98093
loss: 1.0090230703353882,grad_norm: 0.9999992025340547, iteration: 98094
loss: 1.0051299333572388,grad_norm: 0.9372384223106592, iteration: 98095
loss: 1.0091359615325928,grad_norm: 0.9999992423233662, iteration: 98096
loss: 1.0024811029434204,grad_norm: 0.9999991038872926, iteration: 98097
loss: 0.9777751564979553,grad_norm: 0.9999992435771777, iteration: 98098
loss: 0.9916459918022156,grad_norm: 0.999999163060198, iteration: 98099
loss: 1.018134593963623,grad_norm: 0.9999991684956544, iteration: 98100
loss: 0.9767568707466125,grad_norm: 0.9999991165616889, iteration: 98101
loss: 1.0113465785980225,grad_norm: 0.978872502714417, iteration: 98102
loss: 0.9622047543525696,grad_norm: 0.8983207308027535, iteration: 98103
loss: 0.9994007349014282,grad_norm: 0.9999994678371321, iteration: 98104
loss: 1.0376534461975098,grad_norm: 0.9999991069240036, iteration: 98105
loss: 0.9645688533782959,grad_norm: 0.9999992920791382, iteration: 98106
loss: 0.9796687364578247,grad_norm: 0.8995954244603601, iteration: 98107
loss: 1.0147525072097778,grad_norm: 0.999999176111016, iteration: 98108
loss: 1.006011724472046,grad_norm: 0.9844590314425342, iteration: 98109
loss: 1.0417622327804565,grad_norm: 0.9622442263497099, iteration: 98110
loss: 1.0059449672698975,grad_norm: 0.9999990224019092, iteration: 98111
loss: 1.025607705116272,grad_norm: 0.999999053019191, iteration: 98112
loss: 1.0327295064926147,grad_norm: 0.9439138369629978, iteration: 98113
loss: 0.993211567401886,grad_norm: 0.9999991186596829, iteration: 98114
loss: 0.925849437713623,grad_norm: 0.9999991488510807, iteration: 98115
loss: 0.9973412156105042,grad_norm: 0.9675566208951635, iteration: 98116
loss: 1.0053033828735352,grad_norm: 0.9999991772050392, iteration: 98117
loss: 1.0137550830841064,grad_norm: 0.9702682304265566, iteration: 98118
loss: 1.010177731513977,grad_norm: 0.999999040671761, iteration: 98119
loss: 1.0053720474243164,grad_norm: 0.9991415308983638, iteration: 98120
loss: 1.0310583114624023,grad_norm: 0.9819606238611981, iteration: 98121
loss: 0.9860111474990845,grad_norm: 0.9732776082560585, iteration: 98122
loss: 1.0143139362335205,grad_norm: 0.999999316124358, iteration: 98123
loss: 1.0490190982818604,grad_norm: 0.8163251685434614, iteration: 98124
loss: 0.9900452494621277,grad_norm: 0.9999991252291154, iteration: 98125
loss: 1.02561354637146,grad_norm: 0.9999992579702731, iteration: 98126
loss: 0.9960658550262451,grad_norm: 0.9999990827375507, iteration: 98127
loss: 1.0072652101516724,grad_norm: 0.9596349665679262, iteration: 98128
loss: 1.0493512153625488,grad_norm: 0.9999992170878189, iteration: 98129
loss: 1.0073310136795044,grad_norm: 0.9798077601615048, iteration: 98130
loss: 1.0166842937469482,grad_norm: 0.9640149296303683, iteration: 98131
loss: 1.0286113023757935,grad_norm: 0.9999991543414541, iteration: 98132
loss: 1.0140438079833984,grad_norm: 0.9604859349845135, iteration: 98133
loss: 1.0206077098846436,grad_norm: 0.9999991448076541, iteration: 98134
loss: 0.9885547161102295,grad_norm: 0.9047501595782502, iteration: 98135
loss: 0.9772061705589294,grad_norm: 0.999999037678986, iteration: 98136
loss: 1.0103334188461304,grad_norm: 0.9999997716039705, iteration: 98137
loss: 0.9728423357009888,grad_norm: 0.9999991617914219, iteration: 98138
loss: 0.9875151515007019,grad_norm: 0.9821124582003765, iteration: 98139
loss: 0.9897416830062866,grad_norm: 0.974520121666267, iteration: 98140
loss: 1.0178799629211426,grad_norm: 0.927222253879903, iteration: 98141
loss: 1.0344890356063843,grad_norm: 0.9999990806631432, iteration: 98142
loss: 0.9938101172447205,grad_norm: 0.8964540362955263, iteration: 98143
loss: 1.036172866821289,grad_norm: 0.9999990550472316, iteration: 98144
loss: 1.0242700576782227,grad_norm: 0.9999991995535957, iteration: 98145
loss: 0.9407881498336792,grad_norm: 0.9999992861763355, iteration: 98146
loss: 0.9883973598480225,grad_norm: 0.9999989680041798, iteration: 98147
loss: 0.9906830787658691,grad_norm: 0.9841927516451947, iteration: 98148
loss: 0.9888858795166016,grad_norm: 0.999999235291135, iteration: 98149
loss: 1.005341649055481,grad_norm: 0.9999991611194028, iteration: 98150
loss: 1.0586967468261719,grad_norm: 0.9999990875010396, iteration: 98151
loss: 0.9930932521820068,grad_norm: 0.9999995694039103, iteration: 98152
loss: 0.994120180606842,grad_norm: 0.9162370988033612, iteration: 98153
loss: 0.998817503452301,grad_norm: 0.9999989350442924, iteration: 98154
loss: 1.0254600048065186,grad_norm: 0.9999988897589495, iteration: 98155
loss: 1.0009719133377075,grad_norm: 0.9999990982286465, iteration: 98156
loss: 0.9939212799072266,grad_norm: 0.9999991792600693, iteration: 98157
loss: 0.987866997718811,grad_norm: 0.999998984020676, iteration: 98158
loss: 0.9765191674232483,grad_norm: 0.9999992064377626, iteration: 98159
loss: 1.0423587560653687,grad_norm: 0.9999991258937511, iteration: 98160
loss: 0.979447603225708,grad_norm: 0.9999990432273066, iteration: 98161
loss: 0.9956965446472168,grad_norm: 0.9999990892248019, iteration: 98162
loss: 1.034096360206604,grad_norm: 0.9999991018826357, iteration: 98163
loss: 1.015690565109253,grad_norm: 0.9999993734197978, iteration: 98164
loss: 1.0358500480651855,grad_norm: 0.9999990912340387, iteration: 98165
loss: 0.9862701892852783,grad_norm: 0.9638467783655696, iteration: 98166
loss: 0.9947487115859985,grad_norm: 0.9646776538210708, iteration: 98167
loss: 1.0132492780685425,grad_norm: 0.9911513795804121, iteration: 98168
loss: 0.9901683926582336,grad_norm: 0.9868816147789912, iteration: 98169
loss: 0.982466459274292,grad_norm: 0.8914993419501899, iteration: 98170
loss: 1.0385828018188477,grad_norm: 0.9999990739615526, iteration: 98171
loss: 0.9755874872207642,grad_norm: 0.9999989952482309, iteration: 98172
loss: 0.9823361039161682,grad_norm: 0.9999992776542848, iteration: 98173
loss: 1.0033223628997803,grad_norm: 0.9999990841622529, iteration: 98174
loss: 1.0093283653259277,grad_norm: 0.9999992429962702, iteration: 98175
loss: 1.005851149559021,grad_norm: 0.9999991680437407, iteration: 98176
loss: 0.9830104112625122,grad_norm: 0.9999993294631928, iteration: 98177
loss: 1.0315231084823608,grad_norm: 0.9999992991698433, iteration: 98178
loss: 0.9608609676361084,grad_norm: 0.9447015260672555, iteration: 98179
loss: 0.9906555414199829,grad_norm: 0.9999991605583523, iteration: 98180
loss: 0.990199863910675,grad_norm: 0.999999190601697, iteration: 98181
loss: 1.047262191772461,grad_norm: 0.9999992348030414, iteration: 98182
loss: 1.0594298839569092,grad_norm: 0.9999993639638548, iteration: 98183
loss: 1.0174471139907837,grad_norm: 0.9901882103342381, iteration: 98184
loss: 0.9836896657943726,grad_norm: 0.9999990888136562, iteration: 98185
loss: 1.0047308206558228,grad_norm: 0.9999992591165212, iteration: 98186
loss: 1.028842806816101,grad_norm: 0.9999992693916734, iteration: 98187
loss: 1.0946449041366577,grad_norm: 0.9999997376551367, iteration: 98188
loss: 0.9968600869178772,grad_norm: 0.999999192054312, iteration: 98189
loss: 1.0577300786972046,grad_norm: 0.9999991629308398, iteration: 98190
loss: 0.9930862188339233,grad_norm: 0.99999906618664, iteration: 98191
loss: 0.9816554188728333,grad_norm: 0.9426676985889554, iteration: 98192
loss: 1.014544129371643,grad_norm: 0.9999999144178954, iteration: 98193
loss: 1.0065845251083374,grad_norm: 0.9999992125581036, iteration: 98194
loss: 0.9982139468193054,grad_norm: 0.9713177752698476, iteration: 98195
loss: 0.9617864489555359,grad_norm: 0.9597968960367963, iteration: 98196
loss: 1.0125495195388794,grad_norm: 0.9999992852252614, iteration: 98197
loss: 1.0506199598312378,grad_norm: 0.9999991253091173, iteration: 98198
loss: 1.002968430519104,grad_norm: 0.9376518513350827, iteration: 98199
loss: 0.9775651693344116,grad_norm: 0.9728588592023337, iteration: 98200
loss: 0.9996225833892822,grad_norm: 0.9999991790322288, iteration: 98201
loss: 1.0096789598464966,grad_norm: 0.9999991188592837, iteration: 98202
loss: 0.9917526245117188,grad_norm: 0.9753641504339008, iteration: 98203
loss: 0.9744659066200256,grad_norm: 0.999999261120522, iteration: 98204
loss: 0.994320809841156,grad_norm: 0.9683970373262631, iteration: 98205
loss: 0.9974446296691895,grad_norm: 0.9999992175865626, iteration: 98206
loss: 0.9923179149627686,grad_norm: 0.9061710919664465, iteration: 98207
loss: 0.9924833178520203,grad_norm: 0.999999342055349, iteration: 98208
loss: 1.021521806716919,grad_norm: 0.9999990295320917, iteration: 98209
loss: 0.9997197985649109,grad_norm: 0.9999991292435374, iteration: 98210
loss: 1.0589452981948853,grad_norm: 0.9999991321459959, iteration: 98211
loss: 1.0271319150924683,grad_norm: 0.9999991700142934, iteration: 98212
loss: 1.008574366569519,grad_norm: 0.9999992047107386, iteration: 98213
loss: 1.0177773237228394,grad_norm: 0.9999993089275969, iteration: 98214
loss: 1.016351580619812,grad_norm: 0.9847630938127687, iteration: 98215
loss: 0.9991385340690613,grad_norm: 0.9995329887259216, iteration: 98216
loss: 1.0110255479812622,grad_norm: 0.9473617630236593, iteration: 98217
loss: 1.0203945636749268,grad_norm: 0.9956777188264027, iteration: 98218
loss: 0.9930805563926697,grad_norm: 0.9396910982037596, iteration: 98219
loss: 1.027262806892395,grad_norm: 0.9999990478357939, iteration: 98220
loss: 1.011329174041748,grad_norm: 0.9999992327669267, iteration: 98221
loss: 1.001110315322876,grad_norm: 0.9999991733785693, iteration: 98222
loss: 0.9890396595001221,grad_norm: 0.9999989305109425, iteration: 98223
loss: 1.0329269170761108,grad_norm: 0.9999990473817415, iteration: 98224
loss: 1.0223863124847412,grad_norm: 0.99999892229828, iteration: 98225
loss: 0.9917373657226562,grad_norm: 0.8171589486402069, iteration: 98226
loss: 1.035207748413086,grad_norm: 0.96026851615275, iteration: 98227
loss: 1.0127507448196411,grad_norm: 0.9999992151115481, iteration: 98228
loss: 1.012810230255127,grad_norm: 0.9999991579258737, iteration: 98229
loss: 0.9995713829994202,grad_norm: 0.9999990974246377, iteration: 98230
loss: 1.015710711479187,grad_norm: 0.999999076496489, iteration: 98231
loss: 0.9999579787254333,grad_norm: 0.9732625065990796, iteration: 98232
loss: 0.9836676120758057,grad_norm: 0.9444461818386461, iteration: 98233
loss: 0.9959560036659241,grad_norm: 0.9999990657806903, iteration: 98234
loss: 0.972629964351654,grad_norm: 0.9797096398426728, iteration: 98235
loss: 1.0210089683532715,grad_norm: 0.9027224613253214, iteration: 98236
loss: 1.0123859643936157,grad_norm: 0.9953502040098371, iteration: 98237
loss: 1.0092400312423706,grad_norm: 0.9999991481855287, iteration: 98238
loss: 1.0149282217025757,grad_norm: 0.8966954212132697, iteration: 98239
loss: 1.0009894371032715,grad_norm: 0.9999990941000666, iteration: 98240
loss: 1.0027472972869873,grad_norm: 0.9999990257108345, iteration: 98241
loss: 1.0132980346679688,grad_norm: 0.9999992438141158, iteration: 98242
loss: 1.010185956954956,grad_norm: 0.9999991278951899, iteration: 98243
loss: 0.9978909492492676,grad_norm: 0.9999995970185812, iteration: 98244
loss: 0.9894408583641052,grad_norm: 0.9999989668385879, iteration: 98245
loss: 0.9973390102386475,grad_norm: 0.9999991025627892, iteration: 98246
loss: 0.9717819094657898,grad_norm: 0.9999991393976031, iteration: 98247
loss: 1.0196335315704346,grad_norm: 0.9725344895053387, iteration: 98248
loss: 0.9861712455749512,grad_norm: 0.9999990790451584, iteration: 98249
loss: 0.9980327486991882,grad_norm: 0.9999990898922652, iteration: 98250
loss: 1.0145972967147827,grad_norm: 0.9575688344080688, iteration: 98251
loss: 1.0114514827728271,grad_norm: 0.8809764390838426, iteration: 98252
loss: 1.0096591711044312,grad_norm: 0.9353056409531599, iteration: 98253
loss: 1.0141769647598267,grad_norm: 0.9101694412622563, iteration: 98254
loss: 0.9885060787200928,grad_norm: 0.9999990281717496, iteration: 98255
loss: 1.0236332416534424,grad_norm: 0.999999165837819, iteration: 98256
loss: 1.0123012065887451,grad_norm: 0.9959984794129937, iteration: 98257
loss: 0.9887120127677917,grad_norm: 0.9999990413245796, iteration: 98258
loss: 0.9935535192489624,grad_norm: 0.9835873314646195, iteration: 98259
loss: 0.9790811538696289,grad_norm: 0.927252811363018, iteration: 98260
loss: 1.0194188356399536,grad_norm: 0.9999991079329054, iteration: 98261
loss: 0.9949073791503906,grad_norm: 0.9999988812664901, iteration: 98262
loss: 0.9961457252502441,grad_norm: 0.9999989337705142, iteration: 98263
loss: 0.9837767481803894,grad_norm: 0.9999992186119617, iteration: 98264
loss: 0.981716513633728,grad_norm: 0.9999993400363283, iteration: 98265
loss: 1.0105464458465576,grad_norm: 0.9999989758239132, iteration: 98266
loss: 1.0111514329910278,grad_norm: 0.9601496273652121, iteration: 98267
loss: 1.0007832050323486,grad_norm: 0.9054836287640111, iteration: 98268
loss: 1.0019887685775757,grad_norm: 0.9999993303387134, iteration: 98269
loss: 0.9961026310920715,grad_norm: 0.999999130784962, iteration: 98270
loss: 1.007581353187561,grad_norm: 0.9999991999533366, iteration: 98271
loss: 1.003450870513916,grad_norm: 0.9643802198228413, iteration: 98272
loss: 0.9611240029335022,grad_norm: 0.9999993427866685, iteration: 98273
loss: 0.947991132736206,grad_norm: 0.9497501419707174, iteration: 98274
loss: 1.0245815515518188,grad_norm: 0.9999991995981068, iteration: 98275
loss: 1.0188984870910645,grad_norm: 0.99999914170688, iteration: 98276
loss: 0.9923171401023865,grad_norm: 0.891216030196678, iteration: 98277
loss: 0.9968157410621643,grad_norm: 0.9999991962452045, iteration: 98278
loss: 0.9750827550888062,grad_norm: 0.9999991554489759, iteration: 98279
loss: 0.9606582522392273,grad_norm: 0.9440275039700262, iteration: 98280
loss: 0.9606692790985107,grad_norm: 0.9999991766586748, iteration: 98281
loss: 1.0135767459869385,grad_norm: 0.9999991313437049, iteration: 98282
loss: 0.9919735789299011,grad_norm: 0.9999992740675858, iteration: 98283
loss: 1.0091413259506226,grad_norm: 0.9999991521715095, iteration: 98284
loss: 1.0223597288131714,grad_norm: 0.97436151215098, iteration: 98285
loss: 1.0104326009750366,grad_norm: 0.9999991861624697, iteration: 98286
loss: 0.9955442547798157,grad_norm: 0.9228750417218965, iteration: 98287
loss: 0.9981424808502197,grad_norm: 0.9089965976605001, iteration: 98288
loss: 1.0032752752304077,grad_norm: 0.9999991912171563, iteration: 98289
loss: 0.9937822818756104,grad_norm: 0.8745673330709438, iteration: 98290
loss: 1.0244112014770508,grad_norm: 0.9667715117486818, iteration: 98291
loss: 1.0273550748825073,grad_norm: 0.9999991725606907, iteration: 98292
loss: 0.9921426177024841,grad_norm: 0.9734744990850728, iteration: 98293
loss: 1.0003715753555298,grad_norm: 0.9999992780912865, iteration: 98294
loss: 1.0099693536758423,grad_norm: 0.9999992463862666, iteration: 98295
loss: 0.9947583675384521,grad_norm: 0.8729810519973438, iteration: 98296
loss: 1.0221104621887207,grad_norm: 0.9999992654311519, iteration: 98297
loss: 1.0326646566390991,grad_norm: 0.9999989580894416, iteration: 98298
loss: 1.0020335912704468,grad_norm: 0.9999990682517162, iteration: 98299
loss: 0.9763404726982117,grad_norm: 0.9999991346102688, iteration: 98300
loss: 0.9942707419395447,grad_norm: 0.999999203401344, iteration: 98301
loss: 1.0197263956069946,grad_norm: 0.9999991209906502, iteration: 98302
loss: 1.0329958200454712,grad_norm: 0.999999240814371, iteration: 98303
loss: 0.9881187677383423,grad_norm: 0.9620027598115304, iteration: 98304
loss: 1.0081846714019775,grad_norm: 0.999999130339518, iteration: 98305
loss: 0.980591356754303,grad_norm: 0.999999025120671, iteration: 98306
loss: 0.9873452186584473,grad_norm: 0.9723803900408706, iteration: 98307
loss: 1.0133137702941895,grad_norm: 0.9999990564870465, iteration: 98308
loss: 1.0092028379440308,grad_norm: 0.9772105000639651, iteration: 98309
loss: 1.0227168798446655,grad_norm: 0.9999990953934715, iteration: 98310
loss: 1.0085350275039673,grad_norm: 0.947653321476373, iteration: 98311
loss: 1.0164952278137207,grad_norm: 0.9999990962507903, iteration: 98312
loss: 1.0096228122711182,grad_norm: 0.9999991247123786, iteration: 98313
loss: 0.9657595753669739,grad_norm: 0.8580453409835469, iteration: 98314
loss: 1.004733681678772,grad_norm: 0.938546401626287, iteration: 98315
loss: 0.9622159600257874,grad_norm: 0.9999991138369176, iteration: 98316
loss: 1.0136178731918335,grad_norm: 0.8696782343880826, iteration: 98317
loss: 1.0026419162750244,grad_norm: 0.9999990875946883, iteration: 98318
loss: 1.00834321975708,grad_norm: 0.9999992159034559, iteration: 98319
loss: 0.9856191277503967,grad_norm: 0.9999990995599781, iteration: 98320
loss: 1.0003780126571655,grad_norm: 0.9999994770628031, iteration: 98321
loss: 0.9698214530944824,grad_norm: 0.9581515549807186, iteration: 98322
loss: 1.0108397006988525,grad_norm: 0.9999991829613162, iteration: 98323
loss: 1.0231993198394775,grad_norm: 0.9999992182149859, iteration: 98324
loss: 0.9960790872573853,grad_norm: 0.9999991019082114, iteration: 98325
loss: 0.9610199928283691,grad_norm: 0.9999991982376311, iteration: 98326
loss: 0.9771259427070618,grad_norm: 0.9999991060576925, iteration: 98327
loss: 0.9988497495651245,grad_norm: 0.9999993012006567, iteration: 98328
loss: 1.0198224782943726,grad_norm: 0.9520867913106168, iteration: 98329
loss: 1.0119675397872925,grad_norm: 0.9243655230714325, iteration: 98330
loss: 1.0161094665527344,grad_norm: 0.9999992917976468, iteration: 98331
loss: 1.006371021270752,grad_norm: 0.9999991673942176, iteration: 98332
loss: 1.0447235107421875,grad_norm: 0.9999993537143235, iteration: 98333
loss: 0.9899899959564209,grad_norm: 0.9315904572401158, iteration: 98334
loss: 1.0105594396591187,grad_norm: 0.9999991292851197, iteration: 98335
loss: 1.0211867094039917,grad_norm: 0.9999992484361934, iteration: 98336
loss: 1.0203073024749756,grad_norm: 0.9999992333700541, iteration: 98337
loss: 0.982057511806488,grad_norm: 0.9999991559233655, iteration: 98338
loss: 1.018760085105896,grad_norm: 0.9999991396888521, iteration: 98339
loss: 0.9825912117958069,grad_norm: 0.9999992671374598, iteration: 98340
loss: 0.9909827709197998,grad_norm: 0.9999990945419789, iteration: 98341
loss: 1.0313068628311157,grad_norm: 0.9999992855347989, iteration: 98342
loss: 1.0391281843185425,grad_norm: 0.9761807415448325, iteration: 98343
loss: 1.0053857564926147,grad_norm: 0.8059746590059154, iteration: 98344
loss: 0.9569873809814453,grad_norm: 0.9191258349213599, iteration: 98345
loss: 0.9447999596595764,grad_norm: 0.8841723597855072, iteration: 98346
loss: 1.0063108205795288,grad_norm: 0.9999990739711063, iteration: 98347
loss: 0.9937210083007812,grad_norm: 0.9999991521787147, iteration: 98348
loss: 0.9624173641204834,grad_norm: 0.9999989094773132, iteration: 98349
loss: 0.9951969981193542,grad_norm: 0.9999992140568763, iteration: 98350
loss: 0.9858068823814392,grad_norm: 0.9999991702824381, iteration: 98351
loss: 1.0437262058258057,grad_norm: 0.9999992681550769, iteration: 98352
loss: 1.0196627378463745,grad_norm: 0.9793082319277514, iteration: 98353
loss: 0.9944366812705994,grad_norm: 0.9999991909418575, iteration: 98354
loss: 1.0095114707946777,grad_norm: 0.9999990946466145, iteration: 98355
loss: 1.0336694717407227,grad_norm: 0.9558041162613521, iteration: 98356
loss: 1.0227159261703491,grad_norm: 0.9999991742076743, iteration: 98357
loss: 0.9956285953521729,grad_norm: 0.999999041745077, iteration: 98358
loss: 1.0042617321014404,grad_norm: 0.9999991244063032, iteration: 98359
loss: 1.0309869050979614,grad_norm: 0.9583221116059856, iteration: 98360
loss: 1.0431416034698486,grad_norm: 0.9354057190406159, iteration: 98361
loss: 0.9769759178161621,grad_norm: 0.9999990771655739, iteration: 98362
loss: 0.9821134209632874,grad_norm: 0.999999024672416, iteration: 98363
loss: 0.963351309299469,grad_norm: 0.9999992752198232, iteration: 98364
loss: 1.0057778358459473,grad_norm: 0.9999990976287103, iteration: 98365
loss: 1.0133737325668335,grad_norm: 0.9999993493294649, iteration: 98366
loss: 1.0293878316879272,grad_norm: 0.9161592656845006, iteration: 98367
loss: 1.0270484685897827,grad_norm: 0.9432573479100522, iteration: 98368
loss: 0.9718499779701233,grad_norm: 0.979590687817296, iteration: 98369
loss: 0.9572129845619202,grad_norm: 0.9407162706490111, iteration: 98370
loss: 0.9667742848396301,grad_norm: 0.9616752680539525, iteration: 98371
loss: 0.993333101272583,grad_norm: 0.9999990834857522, iteration: 98372
loss: 1.0056419372558594,grad_norm: 0.9999991156714918, iteration: 98373
loss: 0.990908145904541,grad_norm: 0.999999158779823, iteration: 98374
loss: 1.016122817993164,grad_norm: 0.9999990721154679, iteration: 98375
loss: 0.9924514889717102,grad_norm: 0.859224209796685, iteration: 98376
loss: 0.9967761635780334,grad_norm: 0.9999990859389263, iteration: 98377
loss: 1.0357015132904053,grad_norm: 0.9999990398705999, iteration: 98378
loss: 0.9817768335342407,grad_norm: 0.999999132992723, iteration: 98379
loss: 0.9835596680641174,grad_norm: 0.9802420124958985, iteration: 98380
loss: 0.9797570705413818,grad_norm: 0.940189266223142, iteration: 98381
loss: 0.966299295425415,grad_norm: 0.9543743050580681, iteration: 98382
loss: 1.0003238916397095,grad_norm: 0.9999991718233955, iteration: 98383
loss: 1.0338513851165771,grad_norm: 0.9999992080936523, iteration: 98384
loss: 0.9997174143791199,grad_norm: 0.999999123134257, iteration: 98385
loss: 0.9798556566238403,grad_norm: 0.9832362336645365, iteration: 98386
loss: 1.011162519454956,grad_norm: 0.9620427401044769, iteration: 98387
loss: 0.960040807723999,grad_norm: 0.9999990430157712, iteration: 98388
loss: 1.0569748878479004,grad_norm: 0.9883019153056343, iteration: 98389
loss: 1.030829906463623,grad_norm: 0.9451379414585355, iteration: 98390
loss: 1.0585347414016724,grad_norm: 0.9999993093097067, iteration: 98391
loss: 0.9689541459083557,grad_norm: 0.9529026553732171, iteration: 98392
loss: 0.9904893636703491,grad_norm: 0.9056133050970953, iteration: 98393
loss: 1.024076223373413,grad_norm: 0.9999991498137393, iteration: 98394
loss: 1.0080589056015015,grad_norm: 0.9999992070347998, iteration: 98395
loss: 0.9849587082862854,grad_norm: 0.9999990439918004, iteration: 98396
loss: 1.0088298320770264,grad_norm: 0.9999989896103305, iteration: 98397
loss: 1.0071723461151123,grad_norm: 0.927027279755743, iteration: 98398
loss: 1.0364580154418945,grad_norm: 0.9808560222531096, iteration: 98399
loss: 0.9824861288070679,grad_norm: 0.9999992419867989, iteration: 98400
loss: 0.999937891960144,grad_norm: 0.9999988103450347, iteration: 98401
loss: 1.018924355506897,grad_norm: 0.9981302248957434, iteration: 98402
loss: 1.0036572217941284,grad_norm: 0.9999990779055745, iteration: 98403
loss: 0.9700077176094055,grad_norm: 0.9999990182485697, iteration: 98404
loss: 0.9951342940330505,grad_norm: 0.9999993122495892, iteration: 98405
loss: 1.0123052597045898,grad_norm: 0.9999993034767519, iteration: 98406
loss: 1.0384598970413208,grad_norm: 0.9862716715657394, iteration: 98407
loss: 1.0041066408157349,grad_norm: 0.9790185103801201, iteration: 98408
loss: 0.9566237330436707,grad_norm: 0.9999990949378855, iteration: 98409
loss: 0.9960474371910095,grad_norm: 0.9999992164932997, iteration: 98410
loss: 0.9949357509613037,grad_norm: 0.9999991539692945, iteration: 98411
loss: 1.0069299936294556,grad_norm: 0.9999992199564898, iteration: 98412
loss: 1.0006343126296997,grad_norm: 0.9999991057144973, iteration: 98413
loss: 1.0049043893814087,grad_norm: 0.927810014511905, iteration: 98414
loss: 0.9648119211196899,grad_norm: 0.8943373988655915, iteration: 98415
loss: 0.9847924709320068,grad_norm: 0.9999990586597384, iteration: 98416
loss: 1.020092487335205,grad_norm: 0.9810082527546461, iteration: 98417
loss: 1.0232681035995483,grad_norm: 0.9999991631323413, iteration: 98418
loss: 1.0009573698043823,grad_norm: 0.978997567245449, iteration: 98419
loss: 0.9969213604927063,grad_norm: 0.9563953300290067, iteration: 98420
loss: 0.985000729560852,grad_norm: 0.9999992078046972, iteration: 98421
loss: 1.0025862455368042,grad_norm: 0.9999991823818212, iteration: 98422
loss: 1.0081007480621338,grad_norm: 0.9999991387890897, iteration: 98423
loss: 1.0248886346817017,grad_norm: 0.9999992699898396, iteration: 98424
loss: 0.9773157835006714,grad_norm: 0.9999992522324069, iteration: 98425
loss: 1.0271737575531006,grad_norm: 0.9999991679394074, iteration: 98426
loss: 0.9840697050094604,grad_norm: 0.9803267138404254, iteration: 98427
loss: 1.0091105699539185,grad_norm: 0.9339245793944336, iteration: 98428
loss: 0.9778412580490112,grad_norm: 0.874781922372059, iteration: 98429
loss: 0.9595895409584045,grad_norm: 0.9999992254377638, iteration: 98430
loss: 1.031390905380249,grad_norm: 0.9969058801824242, iteration: 98431
loss: 0.9944723844528198,grad_norm: 0.999999166555921, iteration: 98432
loss: 1.0111351013183594,grad_norm: 0.9350972852597992, iteration: 98433
loss: 0.9730316996574402,grad_norm: 0.9999992903442354, iteration: 98434
loss: 1.0149908065795898,grad_norm: 0.9999990007897193, iteration: 98435
loss: 0.9801905751228333,grad_norm: 0.9999991900807708, iteration: 98436
loss: 0.9936617612838745,grad_norm: 0.9177582367136449, iteration: 98437
loss: 0.9801737070083618,grad_norm: 0.9999990779958564, iteration: 98438
loss: 0.9416071176528931,grad_norm: 0.8853619414090227, iteration: 98439
loss: 1.0210354328155518,grad_norm: 0.9999997672297523, iteration: 98440
loss: 1.016337513923645,grad_norm: 0.9999991661853942, iteration: 98441
loss: 1.0284620523452759,grad_norm: 0.9477051197436676, iteration: 98442
loss: 1.0467618703842163,grad_norm: 0.999999185311721, iteration: 98443
loss: 0.9917809367179871,grad_norm: 0.9655126466598906, iteration: 98444
loss: 1.0110759735107422,grad_norm: 0.9999992284223456, iteration: 98445
loss: 0.9724607467651367,grad_norm: 0.8345722509516971, iteration: 98446
loss: 1.0093660354614258,grad_norm: 0.9999993038109987, iteration: 98447
loss: 0.9883341193199158,grad_norm: 0.9999991683705325, iteration: 98448
loss: 1.0339150428771973,grad_norm: 0.9999989860132652, iteration: 98449
loss: 0.9758399724960327,grad_norm: 0.9999991932225171, iteration: 98450
loss: 0.9828891158103943,grad_norm: 0.9384407248211497, iteration: 98451
loss: 0.991790235042572,grad_norm: 0.9999989826517257, iteration: 98452
loss: 1.0322895050048828,grad_norm: 0.9999992986427441, iteration: 98453
loss: 0.9876338839530945,grad_norm: 0.9999992308242055, iteration: 98454
loss: 1.0256296396255493,grad_norm: 0.9659307161991944, iteration: 98455
loss: 0.9512805938720703,grad_norm: 0.9999992066575146, iteration: 98456
loss: 0.9920265674591064,grad_norm: 0.9999991801636817, iteration: 98457
loss: 1.0223249197006226,grad_norm: 0.999999090097004, iteration: 98458
loss: 1.017223834991455,grad_norm: 0.9999989809832789, iteration: 98459
loss: 0.9848726987838745,grad_norm: 0.9999991814311433, iteration: 98460
loss: 1.0100795030593872,grad_norm: 0.9999991348789092, iteration: 98461
loss: 1.066080927848816,grad_norm: 0.9999992392179479, iteration: 98462
loss: 0.9795905351638794,grad_norm: 0.9999990018232051, iteration: 98463
loss: 0.9939342141151428,grad_norm: 0.918713792885231, iteration: 98464
loss: 1.0217888355255127,grad_norm: 0.9999993353499764, iteration: 98465
loss: 1.0068027973175049,grad_norm: 0.9999989493278557, iteration: 98466
loss: 0.9678608775138855,grad_norm: 0.9999991894729524, iteration: 98467
loss: 0.9750028848648071,grad_norm: 0.9999990306320347, iteration: 98468
loss: 0.9698630571365356,grad_norm: 0.9999992627485041, iteration: 98469
loss: 0.9980926513671875,grad_norm: 0.8424758814036285, iteration: 98470
loss: 0.9710139036178589,grad_norm: 0.9585125878241029, iteration: 98471
loss: 0.9973440766334534,grad_norm: 0.9999990952611733, iteration: 98472
loss: 1.0055793523788452,grad_norm: 0.9999991904861701, iteration: 98473
loss: 0.9878736138343811,grad_norm: 0.9999992667033688, iteration: 98474
loss: 1.0449823141098022,grad_norm: 0.8366083811164896, iteration: 98475
loss: 0.9790663719177246,grad_norm: 0.9624575106684692, iteration: 98476
loss: 0.9812332391738892,grad_norm: 0.9999992853291557, iteration: 98477
loss: 1.030976414680481,grad_norm: 0.9999990222698001, iteration: 98478
loss: 0.9948086142539978,grad_norm: 0.9999989295946012, iteration: 98479
loss: 1.025244116783142,grad_norm: 0.9788916234308036, iteration: 98480
loss: 1.0055888891220093,grad_norm: 0.9999992422341939, iteration: 98481
loss: 0.983357846736908,grad_norm: 0.9999992890808985, iteration: 98482
loss: 0.9836372137069702,grad_norm: 0.9383704551220444, iteration: 98483
loss: 1.0111297369003296,grad_norm: 0.9999991378490731, iteration: 98484
loss: 1.015364170074463,grad_norm: 0.8610716600829281, iteration: 98485
loss: 1.0269027948379517,grad_norm: 0.999999110949716, iteration: 98486
loss: 0.9766913056373596,grad_norm: 0.9370343658618605, iteration: 98487
loss: 0.9840571284294128,grad_norm: 0.9999992273525331, iteration: 98488
loss: 1.0214571952819824,grad_norm: 0.9999991680261985, iteration: 98489
loss: 0.9857547283172607,grad_norm: 0.9571318689639811, iteration: 98490
loss: 0.9967312216758728,grad_norm: 0.9306956090624882, iteration: 98491
loss: 1.0304667949676514,grad_norm: 0.9999990455092913, iteration: 98492
loss: 1.0110141038894653,grad_norm: 0.9999995212831472, iteration: 98493
loss: 1.020493745803833,grad_norm: 0.9999996276761813, iteration: 98494
loss: 0.9731385707855225,grad_norm: 0.9999992412242951, iteration: 98495
loss: 0.9540221691131592,grad_norm: 0.9423638397464031, iteration: 98496
loss: 1.0130916833877563,grad_norm: 0.9999992063043798, iteration: 98497
loss: 0.9739424586296082,grad_norm: 0.9196241442104699, iteration: 98498
loss: 1.0443414449691772,grad_norm: 0.9999992168196711, iteration: 98499
loss: 0.9652788639068604,grad_norm: 0.9999990365990636, iteration: 98500
loss: 1.0156513452529907,grad_norm: 0.9785607873943334, iteration: 98501
loss: 0.9851325750350952,grad_norm: 0.9999990414338968, iteration: 98502
loss: 0.9920744895935059,grad_norm: 0.9849856326904974, iteration: 98503
loss: 0.998471200466156,grad_norm: 0.9149384355364628, iteration: 98504
loss: 0.9955095052719116,grad_norm: 0.999999060330563, iteration: 98505
loss: 0.971086323261261,grad_norm: 0.9999991503060248, iteration: 98506
loss: 0.9830039739608765,grad_norm: 0.9999990191011418, iteration: 98507
loss: 0.9906148910522461,grad_norm: 0.9999990858028449, iteration: 98508
loss: 0.9991735219955444,grad_norm: 0.9999992678089237, iteration: 98509
loss: 1.0109119415283203,grad_norm: 0.9212805963241675, iteration: 98510
loss: 1.0469937324523926,grad_norm: 0.938156518183511, iteration: 98511
loss: 0.9906027913093567,grad_norm: 0.8800584510563587, iteration: 98512
loss: 0.985284149646759,grad_norm: 0.821080778380096, iteration: 98513
loss: 1.0064622163772583,grad_norm: 0.999999070960301, iteration: 98514
loss: 1.0093567371368408,grad_norm: 0.9974857124952433, iteration: 98515
loss: 1.0080265998840332,grad_norm: 0.9163992621500059, iteration: 98516
loss: 0.9953098893165588,grad_norm: 0.9206258961539534, iteration: 98517
loss: 0.9533271193504333,grad_norm: 0.9999990201283259, iteration: 98518
loss: 0.9765850901603699,grad_norm: 0.8387046880361814, iteration: 98519
loss: 1.0187510251998901,grad_norm: 0.9999991530923743, iteration: 98520
loss: 0.9775859713554382,grad_norm: 0.9999991182564096, iteration: 98521
loss: 0.9945042133331299,grad_norm: 0.9339481170292324, iteration: 98522
loss: 1.0046113729476929,grad_norm: 0.9999991282120848, iteration: 98523
loss: 0.9645337462425232,grad_norm: 0.9112868161823676, iteration: 98524
loss: 0.981640636920929,grad_norm: 0.9999991547483348, iteration: 98525
loss: 1.00481116771698,grad_norm: 0.999999092760941, iteration: 98526
loss: 0.9926607012748718,grad_norm: 0.9999991052406861, iteration: 98527
loss: 1.0097999572753906,grad_norm: 0.9999992734565912, iteration: 98528
loss: 1.007394790649414,grad_norm: 0.9756101733768137, iteration: 98529
loss: 1.0313993692398071,grad_norm: 0.9999991444783887, iteration: 98530
loss: 0.9836936593055725,grad_norm: 0.9789856236568437, iteration: 98531
loss: 1.021532416343689,grad_norm: 0.9999990711617096, iteration: 98532
loss: 1.0087389945983887,grad_norm: 0.9772984129420945, iteration: 98533
loss: 0.9599046111106873,grad_norm: 0.9999991232814808, iteration: 98534
loss: 1.0039067268371582,grad_norm: 0.9936198721185011, iteration: 98535
loss: 1.0067636966705322,grad_norm: 0.891466288445976, iteration: 98536
loss: 0.985971212387085,grad_norm: 0.9999990305284925, iteration: 98537
loss: 1.0132533311843872,grad_norm: 0.8238173184308847, iteration: 98538
loss: 0.9824733138084412,grad_norm: 0.9999991522713911, iteration: 98539
loss: 0.9974262714385986,grad_norm: 0.9999990126017647, iteration: 98540
loss: 1.0114941596984863,grad_norm: 0.9510234326944521, iteration: 98541
loss: 1.056100845336914,grad_norm: 0.999999645732839, iteration: 98542
loss: 0.9506053328514099,grad_norm: 0.9999991627983866, iteration: 98543
loss: 1.0052833557128906,grad_norm: 0.9999991383343517, iteration: 98544
loss: 0.9980766773223877,grad_norm: 0.9999991982356731, iteration: 98545
loss: 1.0029250383377075,grad_norm: 0.9999993176541024, iteration: 98546
loss: 1.0072016716003418,grad_norm: 0.9999991947340627, iteration: 98547
loss: 1.0014617443084717,grad_norm: 0.9999992757485618, iteration: 98548
loss: 1.0005295276641846,grad_norm: 0.9999989939798444, iteration: 98549
loss: 1.0403306484222412,grad_norm: 0.999999125898548, iteration: 98550
loss: 1.0037938356399536,grad_norm: 0.9999989273631, iteration: 98551
loss: 1.0144636631011963,grad_norm: 0.9999994074445071, iteration: 98552
loss: 1.0151159763336182,grad_norm: 0.9999991960630038, iteration: 98553
loss: 0.9891350865364075,grad_norm: 0.9189062727049927, iteration: 98554
loss: 1.0163452625274658,grad_norm: 0.9999991519234711, iteration: 98555
loss: 0.9917460680007935,grad_norm: 0.9999992469981058, iteration: 98556
loss: 1.0102157592773438,grad_norm: 0.999999120864422, iteration: 98557
loss: 0.9950249791145325,grad_norm: 0.9999992274500907, iteration: 98558
loss: 1.074398398399353,grad_norm: 0.9999991033727896, iteration: 98559
loss: 1.036037564277649,grad_norm: 0.9277595402620548, iteration: 98560
loss: 1.0512975454330444,grad_norm: 0.9999991285474437, iteration: 98561
loss: 0.9984321594238281,grad_norm: 0.9999992023414732, iteration: 98562
loss: 1.0190112590789795,grad_norm: 0.9999990438976062, iteration: 98563
loss: 1.0283524990081787,grad_norm: 0.9999990518705671, iteration: 98564
loss: 0.9836861491203308,grad_norm: 0.9999996014556567, iteration: 98565
loss: 1.1206973791122437,grad_norm: 0.9999993120039979, iteration: 98566
loss: 1.018714189529419,grad_norm: 0.9999989515646891, iteration: 98567
loss: 0.9864714741706848,grad_norm: 0.9999990778500055, iteration: 98568
loss: 0.9586290717124939,grad_norm: 0.9999990187323858, iteration: 98569
loss: 1.0061346292495728,grad_norm: 0.9999990935153192, iteration: 98570
loss: 0.9390268325805664,grad_norm: 0.9999990895817336, iteration: 98571
loss: 1.0012160539627075,grad_norm: 0.9917313928317435, iteration: 98572
loss: 1.0766292810440063,grad_norm: 0.9999997901074942, iteration: 98573
loss: 0.9939982891082764,grad_norm: 0.923656692712815, iteration: 98574
loss: 1.0120079517364502,grad_norm: 0.9999990774218535, iteration: 98575
loss: 0.9908955097198486,grad_norm: 0.9999992190223687, iteration: 98576
loss: 1.017960786819458,grad_norm: 0.9998335481132871, iteration: 98577
loss: 0.9979637265205383,grad_norm: 0.9999991528011657, iteration: 98578
loss: 0.9966137409210205,grad_norm: 0.9999991860635424, iteration: 98579
loss: 1.0088194608688354,grad_norm: 0.9999992585871357, iteration: 98580
loss: 1.0037060976028442,grad_norm: 0.999999022686768, iteration: 98581
loss: 0.9968772530555725,grad_norm: 0.9999991562946977, iteration: 98582
loss: 1.015268087387085,grad_norm: 0.9999992620112277, iteration: 98583
loss: 1.0035648345947266,grad_norm: 0.9999991957426075, iteration: 98584
loss: 0.9894117116928101,grad_norm: 0.8821570979439983, iteration: 98585
loss: 1.0288918018341064,grad_norm: 0.9965625928637118, iteration: 98586
loss: 0.9735797643661499,grad_norm: 0.9999991399521858, iteration: 98587
loss: 0.9731269478797913,grad_norm: 0.9999991472443388, iteration: 98588
loss: 1.0003169775009155,grad_norm: 0.999999087754457, iteration: 98589
loss: 1.007674217224121,grad_norm: 0.9417347394968334, iteration: 98590
loss: 1.0093713998794556,grad_norm: 0.9188406882171902, iteration: 98591
loss: 0.9850783348083496,grad_norm: 0.999999243292225, iteration: 98592
loss: 1.0289623737335205,grad_norm: 0.9999993296817647, iteration: 98593
loss: 0.9824541807174683,grad_norm: 0.9999990066752258, iteration: 98594
loss: 0.9989685416221619,grad_norm: 0.9259703677257308, iteration: 98595
loss: 1.0107835531234741,grad_norm: 0.9776936855626939, iteration: 98596
loss: 0.9916095733642578,grad_norm: 0.9671310777690004, iteration: 98597
loss: 1.0263909101486206,grad_norm: 0.9906836378544174, iteration: 98598
loss: 0.9904824495315552,grad_norm: 0.999999180787607, iteration: 98599
loss: 0.9786785244941711,grad_norm: 0.9875169389337859, iteration: 98600
loss: 0.971596896648407,grad_norm: 0.9999990390140447, iteration: 98601
loss: 0.9964014291763306,grad_norm: 0.982156478300478, iteration: 98602
loss: 1.008349895477295,grad_norm: 0.9999991709033683, iteration: 98603
loss: 0.9854410886764526,grad_norm: 0.999999180664278, iteration: 98604
loss: 0.9839628338813782,grad_norm: 0.9290475366857581, iteration: 98605
loss: 0.9876154661178589,grad_norm: 0.9999991630451627, iteration: 98606
loss: 0.9955330491065979,grad_norm: 0.9780594138518098, iteration: 98607
loss: 1.018121361732483,grad_norm: 0.9819874082376104, iteration: 98608
loss: 0.9973735809326172,grad_norm: 0.8596717291014836, iteration: 98609
loss: 0.9705489873886108,grad_norm: 0.9999990716869817, iteration: 98610
loss: 1.0030125379562378,grad_norm: 0.9496921620965875, iteration: 98611
loss: 0.9922875761985779,grad_norm: 0.999999202679523, iteration: 98612
loss: 0.9592774510383606,grad_norm: 0.9018792792811131, iteration: 98613
loss: 0.998803436756134,grad_norm: 0.9999991683596305, iteration: 98614
loss: 0.9878905415534973,grad_norm: 0.9999990577469958, iteration: 98615
loss: 1.02589750289917,grad_norm: 0.9504806139206972, iteration: 98616
loss: 0.9854469895362854,grad_norm: 0.8501005910231001, iteration: 98617
loss: 0.9955832958221436,grad_norm: 0.8941680016965884, iteration: 98618
loss: 0.9723185896873474,grad_norm: 0.999999151155211, iteration: 98619
loss: 0.9984163045883179,grad_norm: 0.9999992677893828, iteration: 98620
loss: 0.9974468946456909,grad_norm: 0.9999990245526901, iteration: 98621
loss: 0.9993651509284973,grad_norm: 0.9999992040628651, iteration: 98622
loss: 1.0527102947235107,grad_norm: 0.9999991659988424, iteration: 98623
loss: 0.991241991519928,grad_norm: 0.9999990232654805, iteration: 98624
loss: 1.0138239860534668,grad_norm: 0.9832777146408146, iteration: 98625
loss: 0.9655188918113708,grad_norm: 0.9999993898829641, iteration: 98626
loss: 1.0435585975646973,grad_norm: 0.9999993678772553, iteration: 98627
loss: 0.9753987789154053,grad_norm: 0.9999990344962633, iteration: 98628
loss: 0.9855342507362366,grad_norm: 0.9999991820777012, iteration: 98629
loss: 0.9623823761940002,grad_norm: 0.9999992694925237, iteration: 98630
loss: 1.0251394510269165,grad_norm: 0.914543284938116, iteration: 98631
loss: 1.0256459712982178,grad_norm: 0.9999991385152183, iteration: 98632
loss: 1.0453144311904907,grad_norm: 0.9999996243586142, iteration: 98633
loss: 0.9763970971107483,grad_norm: 0.9768468043742737, iteration: 98634
loss: 0.9440534114837646,grad_norm: 0.9999991453766658, iteration: 98635
loss: 0.9882743954658508,grad_norm: 0.9999992353843473, iteration: 98636
loss: 0.9800601601600647,grad_norm: 0.9999992939803257, iteration: 98637
loss: 0.9982680082321167,grad_norm: 0.9945781221825732, iteration: 98638
loss: 1.0058726072311401,grad_norm: 0.9999991576049757, iteration: 98639
loss: 0.9898942112922668,grad_norm: 0.9999990645793727, iteration: 98640
loss: 0.9989820718765259,grad_norm: 0.9999990843053697, iteration: 98641
loss: 0.9956327676773071,grad_norm: 0.9999990696472737, iteration: 98642
loss: 0.9888842105865479,grad_norm: 0.9999991095542746, iteration: 98643
loss: 0.9909213185310364,grad_norm: 0.9786464548006847, iteration: 98644
loss: 1.0196412801742554,grad_norm: 0.950760820943353, iteration: 98645
loss: 1.0193212032318115,grad_norm: 0.9852014626323212, iteration: 98646
loss: 1.0179110765457153,grad_norm: 0.9506776562408331, iteration: 98647
loss: 1.0647484064102173,grad_norm: 0.999999848672384, iteration: 98648
loss: 1.0084331035614014,grad_norm: 0.9999992187951853, iteration: 98649
loss: 1.0112882852554321,grad_norm: 0.9999991008527537, iteration: 98650
loss: 1.0160363912582397,grad_norm: 0.9737287747394785, iteration: 98651
loss: 0.9813077449798584,grad_norm: 0.9999993036965104, iteration: 98652
loss: 1.0126640796661377,grad_norm: 0.9441960061762523, iteration: 98653
loss: 0.9793024063110352,grad_norm: 0.8766221842485026, iteration: 98654
loss: 1.075665831565857,grad_norm: 0.9999998851325221, iteration: 98655
loss: 0.9994528293609619,grad_norm: 0.9760048624350474, iteration: 98656
loss: 1.040558934211731,grad_norm: 0.9999992536587704, iteration: 98657
loss: 0.9636067152023315,grad_norm: 0.9999991902554577, iteration: 98658
loss: 0.991216242313385,grad_norm: 0.923880765843838, iteration: 98659
loss: 0.9957425594329834,grad_norm: 0.8533437703814746, iteration: 98660
loss: 1.0100769996643066,grad_norm: 0.9999990531346036, iteration: 98661
loss: 1.1360220909118652,grad_norm: 0.9999992968596785, iteration: 98662
loss: 0.9906619191169739,grad_norm: 0.9999991124486943, iteration: 98663
loss: 0.9902849793434143,grad_norm: 0.9999991458790478, iteration: 98664
loss: 0.9989650249481201,grad_norm: 0.9891440309343029, iteration: 98665
loss: 0.9917585253715515,grad_norm: 0.9999991532907175, iteration: 98666
loss: 0.9903669357299805,grad_norm: 0.931187761673398, iteration: 98667
loss: 1.0143057107925415,grad_norm: 0.9999991559527401, iteration: 98668
loss: 0.9840953350067139,grad_norm: 0.9999991907526103, iteration: 98669
loss: 1.001364827156067,grad_norm: 0.9999990873065807, iteration: 98670
loss: 1.0543043613433838,grad_norm: 0.9999999146079974, iteration: 98671
loss: 1.0306895971298218,grad_norm: 0.999999182404421, iteration: 98672
loss: 0.9929903745651245,grad_norm: 0.9492327484858722, iteration: 98673
loss: 1.0139809846878052,grad_norm: 0.9542829640441794, iteration: 98674
loss: 0.982923686504364,grad_norm: 0.9832140615956585, iteration: 98675
loss: 0.9894583225250244,grad_norm: 0.9999991112126018, iteration: 98676
loss: 1.0005383491516113,grad_norm: 0.8936580308715686, iteration: 98677
loss: 1.0099273920059204,grad_norm: 0.9999990662421478, iteration: 98678
loss: 1.0747499465942383,grad_norm: 0.9999993957142153, iteration: 98679
loss: 0.9892929792404175,grad_norm: 0.9834992899307264, iteration: 98680
loss: 0.9622418880462646,grad_norm: 0.9139714492489036, iteration: 98681
loss: 1.0262682437896729,grad_norm: 0.9999991399771709, iteration: 98682
loss: 0.9910431504249573,grad_norm: 0.9849485672839301, iteration: 98683
loss: 1.009627103805542,grad_norm: 0.9999991064681789, iteration: 98684
loss: 1.0800304412841797,grad_norm: 0.9999993380257644, iteration: 98685
loss: 0.9843618273735046,grad_norm: 0.9172686317375672, iteration: 98686
loss: 1.0011848211288452,grad_norm: 0.9837987558673988, iteration: 98687
loss: 0.9921768307685852,grad_norm: 0.9689167940358994, iteration: 98688
loss: 1.1613333225250244,grad_norm: 0.9999992065247476, iteration: 98689
loss: 1.003672480583191,grad_norm: 0.9999990216273502, iteration: 98690
loss: 1.0300489664077759,grad_norm: 0.9999991365339832, iteration: 98691
loss: 1.010805368423462,grad_norm: 0.969889659576512, iteration: 98692
loss: 0.994283139705658,grad_norm: 0.9061522129282945, iteration: 98693
loss: 1.026145100593567,grad_norm: 0.9597711543709749, iteration: 98694
loss: 1.033165454864502,grad_norm: 0.999999260062509, iteration: 98695
loss: 1.013792872428894,grad_norm: 0.9999991171402267, iteration: 98696
loss: 1.0425724983215332,grad_norm: 0.9999992411500453, iteration: 98697
loss: 0.999926745891571,grad_norm: 0.9999992667650759, iteration: 98698
loss: 1.031922459602356,grad_norm: 0.9999995688218353, iteration: 98699
loss: 0.9645105004310608,grad_norm: 0.9999990030792559, iteration: 98700
loss: 1.0209864377975464,grad_norm: 0.9999999695202494, iteration: 98701
loss: 1.020501732826233,grad_norm: 0.9999992796461423, iteration: 98702
loss: 0.9985847473144531,grad_norm: 0.9999989589839628, iteration: 98703
loss: 1.0069849491119385,grad_norm: 0.9999993826130484, iteration: 98704
loss: 0.9930715560913086,grad_norm: 0.9999991629053919, iteration: 98705
loss: 1.0012062788009644,grad_norm: 0.9999993529801227, iteration: 98706
loss: 1.0780415534973145,grad_norm: 0.9999995690039162, iteration: 98707
loss: 0.993121325969696,grad_norm: 0.9199114100584185, iteration: 98708
loss: 0.960846483707428,grad_norm: 0.9999990232148281, iteration: 98709
loss: 0.9682723879814148,grad_norm: 0.9999993209554917, iteration: 98710
loss: 1.0104979276657104,grad_norm: 0.900826120884251, iteration: 98711
loss: 1.0133999586105347,grad_norm: 0.9999992205876117, iteration: 98712
loss: 0.9341955780982971,grad_norm: 0.9999992923967215, iteration: 98713
loss: 1.001321792602539,grad_norm: 0.9999992408779024, iteration: 98714
loss: 1.0029813051223755,grad_norm: 0.9999991820690975, iteration: 98715
loss: 0.9846102595329285,grad_norm: 0.9999991618018402, iteration: 98716
loss: 1.0449317693710327,grad_norm: 0.9999991556190424, iteration: 98717
loss: 1.0054351091384888,grad_norm: 0.999998949696118, iteration: 98718
loss: 0.9792619347572327,grad_norm: 0.999999181258193, iteration: 98719
loss: 1.0068655014038086,grad_norm: 0.999999289462847, iteration: 98720
loss: 0.9996476173400879,grad_norm: 0.999999186722782, iteration: 98721
loss: 0.9983088374137878,grad_norm: 0.9999990603503994, iteration: 98722
loss: 0.9925105571746826,grad_norm: 0.8907063240797021, iteration: 98723
loss: 1.0420613288879395,grad_norm: 0.9999991532721624, iteration: 98724
loss: 1.0719640254974365,grad_norm: 0.9999990884991745, iteration: 98725
loss: 1.0232878923416138,grad_norm: 0.9999992477332342, iteration: 98726
loss: 1.0193471908569336,grad_norm: 0.9999993638389516, iteration: 98727
loss: 0.9753817915916443,grad_norm: 0.9999989837565667, iteration: 98728
loss: 1.0011118650436401,grad_norm: 0.9999991498442586, iteration: 98729
loss: 0.9795419573783875,grad_norm: 0.8399474684081701, iteration: 98730
loss: 0.9818559885025024,grad_norm: 0.8842797363965698, iteration: 98731
loss: 1.009201169013977,grad_norm: 0.9486268182250865, iteration: 98732
loss: 1.026185393333435,grad_norm: 0.9999990499613816, iteration: 98733
loss: 1.0204044580459595,grad_norm: 0.9999992473323511, iteration: 98734
loss: 1.018079400062561,grad_norm: 0.9999994757818059, iteration: 98735
loss: 0.9536855220794678,grad_norm: 0.9999992036036542, iteration: 98736
loss: 0.9936637878417969,grad_norm: 0.9999990951226684, iteration: 98737
loss: 1.0394483804702759,grad_norm: 0.9999998629445276, iteration: 98738
loss: 1.017170786857605,grad_norm: 0.9999991320480177, iteration: 98739
loss: 1.0283057689666748,grad_norm: 0.9999998392381136, iteration: 98740
loss: 1.0447609424591064,grad_norm: 0.999999069789992, iteration: 98741
loss: 1.0294815301895142,grad_norm: 0.9999991232004954, iteration: 98742
loss: 1.0380803346633911,grad_norm: 0.9999992651593439, iteration: 98743
loss: 0.9978702664375305,grad_norm: 0.9102429150791181, iteration: 98744
loss: 0.9711478352546692,grad_norm: 0.8644394581975722, iteration: 98745
loss: 1.0089821815490723,grad_norm: 0.999999375093647, iteration: 98746
loss: 0.9593620896339417,grad_norm: 0.9999991592156466, iteration: 98747
loss: 1.024643063545227,grad_norm: 0.9999989900227075, iteration: 98748
loss: 0.9731485843658447,grad_norm: 0.9999993326682248, iteration: 98749
loss: 1.0257580280303955,grad_norm: 0.9938152123494969, iteration: 98750
loss: 1.0283865928649902,grad_norm: 0.9999991065292306, iteration: 98751
loss: 1.0072252750396729,grad_norm: 0.9999991293630837, iteration: 98752
loss: 1.0066150426864624,grad_norm: 0.8543954169951077, iteration: 98753
loss: 1.0547237396240234,grad_norm: 0.9999995382673458, iteration: 98754
loss: 1.0165959596633911,grad_norm: 0.9999991374330128, iteration: 98755
loss: 0.9975867867469788,grad_norm: 0.9999990761398946, iteration: 98756
loss: 0.9771490693092346,grad_norm: 0.8569837413440415, iteration: 98757
loss: 0.9645094871520996,grad_norm: 0.9518721438410105, iteration: 98758
loss: 1.0253217220306396,grad_norm: 0.9999993394289395, iteration: 98759
loss: 0.9776275753974915,grad_norm: 0.999999178476033, iteration: 98760
loss: 1.0079985857009888,grad_norm: 0.9999990240007613, iteration: 98761
loss: 0.9326110482215881,grad_norm: 0.9409583400776297, iteration: 98762
loss: 1.0356736183166504,grad_norm: 0.9999991402377121, iteration: 98763
loss: 0.9985257387161255,grad_norm: 0.9999990970461708, iteration: 98764
loss: 1.0091203451156616,grad_norm: 0.9999993203736822, iteration: 98765
loss: 0.9898115396499634,grad_norm: 0.9999992943312426, iteration: 98766
loss: 0.9697174429893494,grad_norm: 0.9999990118447853, iteration: 98767
loss: 0.9783779382705688,grad_norm: 0.9999991253052812, iteration: 98768
loss: 1.0204567909240723,grad_norm: 0.9999992722889922, iteration: 98769
loss: 0.9882962703704834,grad_norm: 0.9999994064335359, iteration: 98770
loss: 1.0320369005203247,grad_norm: 0.9999991789701206, iteration: 98771
loss: 1.106967806816101,grad_norm: 0.9999994733200804, iteration: 98772
loss: 0.994685173034668,grad_norm: 0.9814758972289311, iteration: 98773
loss: 1.0433086156845093,grad_norm: 0.9999989973545872, iteration: 98774
loss: 0.9883876442909241,grad_norm: 0.9999991446491495, iteration: 98775
loss: 0.9995188117027283,grad_norm: 0.9039566642218134, iteration: 98776
loss: 0.9690009951591492,grad_norm: 0.9999990962486761, iteration: 98777
loss: 0.9799527525901794,grad_norm: 0.8345727848197438, iteration: 98778
loss: 0.9887709617614746,grad_norm: 0.9999989855248557, iteration: 98779
loss: 1.0219142436981201,grad_norm: 0.9999990901005226, iteration: 98780
loss: 1.0023216009140015,grad_norm: 0.9999991381429655, iteration: 98781
loss: 1.020983099937439,grad_norm: 0.9999991423883517, iteration: 98782
loss: 1.00303053855896,grad_norm: 0.9999992242603544, iteration: 98783
loss: 1.0027536153793335,grad_norm: 0.9999992373840567, iteration: 98784
loss: 1.0100151300430298,grad_norm: 0.9999991774540536, iteration: 98785
loss: 1.0103633403778076,grad_norm: 0.9999991689814748, iteration: 98786
loss: 0.9763533473014832,grad_norm: 0.9999989158930382, iteration: 98787
loss: 0.9990000128746033,grad_norm: 0.9050148126169967, iteration: 98788
loss: 1.0550135374069214,grad_norm: 0.9999994312560851, iteration: 98789
loss: 0.991954505443573,grad_norm: 0.9999990841370149, iteration: 98790
loss: 1.026053786277771,grad_norm: 0.9643624402549412, iteration: 98791
loss: 1.018662452697754,grad_norm: 0.9999988784620982, iteration: 98792
loss: 1.0104252099990845,grad_norm: 0.9999992225438271, iteration: 98793
loss: 1.0069271326065063,grad_norm: 0.9986797777819744, iteration: 98794
loss: 1.0018987655639648,grad_norm: 0.9999991926253591, iteration: 98795
loss: 0.985709547996521,grad_norm: 0.9999991223420108, iteration: 98796
loss: 0.9960047006607056,grad_norm: 0.9999992478288584, iteration: 98797
loss: 1.0168629884719849,grad_norm: 0.999999590586578, iteration: 98798
loss: 1.0176565647125244,grad_norm: 0.9356115209937308, iteration: 98799
loss: 1.018330693244934,grad_norm: 0.9421299388252607, iteration: 98800
loss: 1.0184836387634277,grad_norm: 0.9758365807202869, iteration: 98801
loss: 0.964087963104248,grad_norm: 0.9999992028101949, iteration: 98802
loss: 1.0074952840805054,grad_norm: 0.860461983888111, iteration: 98803
loss: 0.9659151434898376,grad_norm: 0.9666960287822524, iteration: 98804
loss: 0.9894640445709229,grad_norm: 0.9418654202490899, iteration: 98805
loss: 1.0092288255691528,grad_norm: 0.9999993246366984, iteration: 98806
loss: 1.006730556488037,grad_norm: 0.8742097482863384, iteration: 98807
loss: 1.1683636903762817,grad_norm: 0.9999999480082357, iteration: 98808
loss: 0.9922555088996887,grad_norm: 0.9999990947523318, iteration: 98809
loss: 0.9968092441558838,grad_norm: 0.9225453818795937, iteration: 98810
loss: 1.0006030797958374,grad_norm: 0.9999990565639088, iteration: 98811
loss: 1.0010688304901123,grad_norm: 0.9767920732553179, iteration: 98812
loss: 0.9989056587219238,grad_norm: 0.9919661525683928, iteration: 98813
loss: 1.0374006032943726,grad_norm: 0.9999993523675054, iteration: 98814
loss: 0.9886530637741089,grad_norm: 0.8734164671602535, iteration: 98815
loss: 1.0652693510055542,grad_norm: 0.9999994816379417, iteration: 98816
loss: 1.020232081413269,grad_norm: 0.999998997795417, iteration: 98817
loss: 1.0249279737472534,grad_norm: 0.999999226624805, iteration: 98818
loss: 0.9977713227272034,grad_norm: 0.9875289874377169, iteration: 98819
loss: 0.9982664585113525,grad_norm: 0.9999992505368673, iteration: 98820
loss: 0.9795249700546265,grad_norm: 0.9999990808449856, iteration: 98821
loss: 1.0117071866989136,grad_norm: 0.9999991231067613, iteration: 98822
loss: 0.9717922806739807,grad_norm: 0.991760489816679, iteration: 98823
loss: 0.9926934838294983,grad_norm: 0.9999989931692156, iteration: 98824
loss: 0.9773803353309631,grad_norm: 0.9999992243294206, iteration: 98825
loss: 0.9838524460792542,grad_norm: 0.9999990853790107, iteration: 98826
loss: 1.1150307655334473,grad_norm: 0.9999995320671974, iteration: 98827
loss: 1.1052329540252686,grad_norm: 0.9999999404757342, iteration: 98828
loss: 1.1128543615341187,grad_norm: 0.9999994636460506, iteration: 98829
loss: 1.0477503538131714,grad_norm: 0.9999989779131288, iteration: 98830
loss: 1.0215179920196533,grad_norm: 0.9999997147754826, iteration: 98831
loss: 1.1279690265655518,grad_norm: 0.9999999133339553, iteration: 98832
loss: 0.9939950108528137,grad_norm: 0.9999991457593799, iteration: 98833
loss: 0.946574866771698,grad_norm: 0.9999992036663046, iteration: 98834
loss: 0.9880165457725525,grad_norm: 0.9254811372084845, iteration: 98835
loss: 1.0871280431747437,grad_norm: 0.9999992837876954, iteration: 98836
loss: 0.9793070554733276,grad_norm: 0.9926184066259324, iteration: 98837
loss: 0.9866963028907776,grad_norm: 0.9999991307811329, iteration: 98838
loss: 1.111341118812561,grad_norm: 0.9999992406952128, iteration: 98839
loss: 1.0577517747879028,grad_norm: 0.9999992005708359, iteration: 98840
loss: 1.0011324882507324,grad_norm: 0.9419972955075139, iteration: 98841
loss: 1.0102221965789795,grad_norm: 0.8882311002168027, iteration: 98842
loss: 1.0040531158447266,grad_norm: 0.9999990251611425, iteration: 98843
loss: 1.0315830707550049,grad_norm: 0.9999993297202596, iteration: 98844
loss: 1.0136116743087769,grad_norm: 0.9999993032486267, iteration: 98845
loss: 1.0162073373794556,grad_norm: 0.9999991795085549, iteration: 98846
loss: 0.9920520186424255,grad_norm: 0.9999991488078309, iteration: 98847
loss: 1.0423663854599,grad_norm: 0.9999991120326299, iteration: 98848
loss: 1.0031371116638184,grad_norm: 0.9999992989150807, iteration: 98849
loss: 1.0231664180755615,grad_norm: 0.9999990917807019, iteration: 98850
loss: 1.0331779718399048,grad_norm: 0.9999992635565335, iteration: 98851
loss: 0.9849969744682312,grad_norm: 0.9999996230671053, iteration: 98852
loss: 0.9652451872825623,grad_norm: 0.9225276163313376, iteration: 98853
loss: 0.9669379591941833,grad_norm: 0.9999991744327691, iteration: 98854
loss: 0.990653932094574,grad_norm: 0.9608901398557154, iteration: 98855
loss: 0.9798445105552673,grad_norm: 0.9958333072405887, iteration: 98856
loss: 1.0095492601394653,grad_norm: 0.9999990165173802, iteration: 98857
loss: 0.9799596071243286,grad_norm: 0.9894450733485214, iteration: 98858
loss: 0.9475394487380981,grad_norm: 0.9999995697641707, iteration: 98859
loss: 1.0030033588409424,grad_norm: 0.9876903623635137, iteration: 98860
loss: 1.0007621049880981,grad_norm: 0.9999991506173979, iteration: 98861
loss: 0.9650834202766418,grad_norm: 0.9999992557923951, iteration: 98862
loss: 0.992875337600708,grad_norm: 0.8784545907070433, iteration: 98863
loss: 1.037591576576233,grad_norm: 0.9990885769836455, iteration: 98864
loss: 1.0283489227294922,grad_norm: 0.8345560444489895, iteration: 98865
loss: 1.001824140548706,grad_norm: 0.945262206472651, iteration: 98866
loss: 0.9818038940429688,grad_norm: 1.0000000051972364, iteration: 98867
loss: 0.9834895730018616,grad_norm: 0.9999991988819651, iteration: 98868
loss: 1.0075247287750244,grad_norm: 0.9999992514444863, iteration: 98869
loss: 0.9952456951141357,grad_norm: 0.9999992662137058, iteration: 98870
loss: 0.9995023608207703,grad_norm: 0.9999991380372382, iteration: 98871
loss: 1.0864403247833252,grad_norm: 0.9999994317396758, iteration: 98872
loss: 0.9844019412994385,grad_norm: 0.9999995695906453, iteration: 98873
loss: 0.9882287979125977,grad_norm: 0.9999990970007254, iteration: 98874
loss: 1.0124021768569946,grad_norm: 0.9999993094944666, iteration: 98875
loss: 1.0289254188537598,grad_norm: 0.9999991084652896, iteration: 98876
loss: 0.99005126953125,grad_norm: 0.9999991826546825, iteration: 98877
loss: 1.014451503753662,grad_norm: 0.9999991423335695, iteration: 98878
loss: 1.0049954652786255,grad_norm: 0.9999991173895167, iteration: 98879
loss: 1.011566400527954,grad_norm: 0.9999990537940855, iteration: 98880
loss: 1.037237286567688,grad_norm: 0.9999998612569562, iteration: 98881
loss: 1.0989302396774292,grad_norm: 0.9999991857650877, iteration: 98882
loss: 1.0060089826583862,grad_norm: 0.9411133035418272, iteration: 98883
loss: 0.9894865155220032,grad_norm: 0.9999992018061851, iteration: 98884
loss: 1.0748909711837769,grad_norm: 0.999999078795933, iteration: 98885
loss: 0.9843973517417908,grad_norm: 0.9722404770246085, iteration: 98886
loss: 1.0344884395599365,grad_norm: 1.0000000755277012, iteration: 98887
loss: 1.0436229705810547,grad_norm: 0.9059306162670988, iteration: 98888
loss: 1.0129948854446411,grad_norm: 0.9999991914303126, iteration: 98889
loss: 1.0383975505828857,grad_norm: 0.9999996632708237, iteration: 98890
loss: 1.0404667854309082,grad_norm: 0.9999994865674439, iteration: 98891
loss: 1.0216611623764038,grad_norm: 0.9419519047582121, iteration: 98892
loss: 1.0190554857254028,grad_norm: 0.9999991341359846, iteration: 98893
loss: 0.9896873831748962,grad_norm: 0.9999991637855961, iteration: 98894
loss: 1.1382043361663818,grad_norm: 0.9999999220377128, iteration: 98895
loss: 1.0234938859939575,grad_norm: 0.9999992282258056, iteration: 98896
loss: 1.0118205547332764,grad_norm: 0.9961405134112091, iteration: 98897
loss: 1.020044207572937,grad_norm: 0.999998959933145, iteration: 98898
loss: 1.0213844776153564,grad_norm: 0.9999991066324965, iteration: 98899
loss: 0.9763203263282776,grad_norm: 0.9999991637907204, iteration: 98900
loss: 0.9917935729026794,grad_norm: 0.9999991514176148, iteration: 98901
loss: 1.0202654600143433,grad_norm: 0.9999996425087402, iteration: 98902
loss: 1.0153850317001343,grad_norm: 0.9999990891724391, iteration: 98903
loss: 1.049222707748413,grad_norm: 0.9999996560981652, iteration: 98904
loss: 0.9800065159797668,grad_norm: 0.8944282127815014, iteration: 98905
loss: 1.017345905303955,grad_norm: 0.9999992305102425, iteration: 98906
loss: 0.9946838021278381,grad_norm: 0.9999991981653156, iteration: 98907
loss: 1.009744644165039,grad_norm: 0.9999992097934294, iteration: 98908
loss: 1.0026990175247192,grad_norm: 0.9999990257407421, iteration: 98909
loss: 0.9984928369522095,grad_norm: 0.9999991557248662, iteration: 98910
loss: 0.9230797290802002,grad_norm: 0.9999991524895203, iteration: 98911
loss: 1.022143006324768,grad_norm: 0.9410187409411717, iteration: 98912
loss: 1.0103981494903564,grad_norm: 0.8400394718372619, iteration: 98913
loss: 0.9884448051452637,grad_norm: 0.9239593821031471, iteration: 98914
loss: 0.9571651816368103,grad_norm: 0.9954964474980199, iteration: 98915
loss: 1.0046082735061646,grad_norm: 0.9863550825969415, iteration: 98916
loss: 1.0056653022766113,grad_norm: 0.9999990542637023, iteration: 98917
loss: 1.0289350748062134,grad_norm: 0.9999991134895657, iteration: 98918
loss: 0.9685206413269043,grad_norm: 0.9143845507175823, iteration: 98919
loss: 1.0021692514419556,grad_norm: 0.8413877951068406, iteration: 98920
loss: 1.001858115196228,grad_norm: 0.9999992114899927, iteration: 98921
loss: 0.9764429330825806,grad_norm: 0.999999292593156, iteration: 98922
loss: 1.0157115459442139,grad_norm: 0.9999991072438359, iteration: 98923
loss: 1.0069998502731323,grad_norm: 0.9999993177696875, iteration: 98924
loss: 0.962226927280426,grad_norm: 0.9999991118233075, iteration: 98925
loss: 1.0107358694076538,grad_norm: 0.9999990850672266, iteration: 98926
loss: 0.999514102935791,grad_norm: 0.9999989914354063, iteration: 98927
loss: 1.0073294639587402,grad_norm: 0.9999995391521495, iteration: 98928
loss: 0.9938499331474304,grad_norm: 0.9613486153685576, iteration: 98929
loss: 1.0454574823379517,grad_norm: 0.9999999419497648, iteration: 98930
loss: 1.1063307523727417,grad_norm: 0.9999999732947144, iteration: 98931
loss: 0.9890964031219482,grad_norm: 0.9999991058614308, iteration: 98932
loss: 0.9853276610374451,grad_norm: 0.9599575454324306, iteration: 98933
loss: 0.9941697120666504,grad_norm: 0.9999990843582076, iteration: 98934
loss: 1.000580906867981,grad_norm: 0.9999997519713902, iteration: 98935
loss: 1.012141466140747,grad_norm: 0.9999991883431185, iteration: 98936
loss: 1.0093696117401123,grad_norm: 0.9999994518754614, iteration: 98937
loss: 0.975609302520752,grad_norm: 0.9508687334818269, iteration: 98938
loss: 0.976833164691925,grad_norm: 0.9999991026959529, iteration: 98939
loss: 1.0183924436569214,grad_norm: 0.999999215608236, iteration: 98940
loss: 1.0453360080718994,grad_norm: 0.9999991878001705, iteration: 98941
loss: 0.9768688082695007,grad_norm: 0.9276612684190964, iteration: 98942
loss: 0.993769645690918,grad_norm: 0.9999991717965512, iteration: 98943
loss: 1.013024091720581,grad_norm: 0.9999991124708281, iteration: 98944
loss: 0.9910848140716553,grad_norm: 0.9999992958016201, iteration: 98945
loss: 1.0697996616363525,grad_norm: 0.999999333221597, iteration: 98946
loss: 0.9848257899284363,grad_norm: 0.9999991067010094, iteration: 98947
loss: 0.9904634356498718,grad_norm: 0.9999990994906803, iteration: 98948
loss: 0.9967910051345825,grad_norm: 0.9622606460533193, iteration: 98949
loss: 1.0135104656219482,grad_norm: 0.9999992206622044, iteration: 98950
loss: 1.0071414709091187,grad_norm: 0.999999322998061, iteration: 98951
loss: 0.9649030566215515,grad_norm: 0.9742522778783868, iteration: 98952
loss: 0.9986679553985596,grad_norm: 0.9869571952231165, iteration: 98953
loss: 1.0040831565856934,grad_norm: 0.9999989848782983, iteration: 98954
loss: 1.0422780513763428,grad_norm: 0.9999995301268129, iteration: 98955
loss: 0.9901259541511536,grad_norm: 0.9999992065651804, iteration: 98956
loss: 1.0085930824279785,grad_norm: 0.9999992286950722, iteration: 98957
loss: 1.0289671421051025,grad_norm: 0.9999990299790003, iteration: 98958
loss: 0.9507215023040771,grad_norm: 0.9999994691402545, iteration: 98959
loss: 1.022756814956665,grad_norm: 0.9999991142867822, iteration: 98960
loss: 0.9778066277503967,grad_norm: 0.9206525771410512, iteration: 98961
loss: 0.9588512182235718,grad_norm: 0.9999990639604962, iteration: 98962
loss: 0.9827858805656433,grad_norm: 0.9999990328559158, iteration: 98963
loss: 0.9578525424003601,grad_norm: 0.9999989815305314, iteration: 98964
loss: 1.0077873468399048,grad_norm: 0.9201967875029342, iteration: 98965
loss: 1.0103577375411987,grad_norm: 0.9999990071568918, iteration: 98966
loss: 0.9860464930534363,grad_norm: 0.9999990950326892, iteration: 98967
loss: 1.026528000831604,grad_norm: 0.9999992035205849, iteration: 98968
loss: 1.0081247091293335,grad_norm: 0.9999991379451214, iteration: 98969
loss: 1.0388628244400024,grad_norm: 0.9917411917361062, iteration: 98970
loss: 0.9675894975662231,grad_norm: 0.9999990569766177, iteration: 98971
loss: 1.008169174194336,grad_norm: 0.999999172276275, iteration: 98972
loss: 1.0158542394638062,grad_norm: 0.9447012833016164, iteration: 98973
loss: 1.0041555166244507,grad_norm: 0.9999990297924253, iteration: 98974
loss: 0.9581241607666016,grad_norm: 0.9999992274442165, iteration: 98975
loss: 1.0057452917099,grad_norm: 0.9999991199001006, iteration: 98976
loss: 0.9973041415214539,grad_norm: 0.9999992707085895, iteration: 98977
loss: 1.0390346050262451,grad_norm: 0.9999992944492488, iteration: 98978
loss: 0.9724076390266418,grad_norm: 0.9999990555270192, iteration: 98979
loss: 1.039987325668335,grad_norm: 0.9665547987144371, iteration: 98980
loss: 0.9934027791023254,grad_norm: 0.8991632508542473, iteration: 98981
loss: 1.0081188678741455,grad_norm: 0.9999990883426919, iteration: 98982
loss: 0.9519942998886108,grad_norm: 0.9999991164000421, iteration: 98983
loss: 1.030785322189331,grad_norm: 0.9999990364754959, iteration: 98984
loss: 0.9763561487197876,grad_norm: 0.9999991544651154, iteration: 98985
loss: 0.963756799697876,grad_norm: 0.999999259442106, iteration: 98986
loss: 1.0028821229934692,grad_norm: 0.9999991861132338, iteration: 98987
loss: 1.0060758590698242,grad_norm: 0.9999991104618854, iteration: 98988
loss: 1.0492054224014282,grad_norm: 0.9999991608287007, iteration: 98989
loss: 0.9874855279922485,grad_norm: 0.9999991660823242, iteration: 98990
loss: 1.0389858484268188,grad_norm: 0.8781920424030942, iteration: 98991
loss: 0.9900500178337097,grad_norm: 0.9999992642921506, iteration: 98992
loss: 1.0288453102111816,grad_norm: 0.9999990518540108, iteration: 98993
loss: 0.988777756690979,grad_norm: 0.9999993793676629, iteration: 98994
loss: 1.0168360471725464,grad_norm: 0.903788698942292, iteration: 98995
loss: 1.013674259185791,grad_norm: 0.968229357231425, iteration: 98996
loss: 0.9975379705429077,grad_norm: 0.9999992300086195, iteration: 98997
loss: 1.0257898569107056,grad_norm: 0.8534151767173215, iteration: 98998
loss: 1.0497108697891235,grad_norm: 0.9999993524689331, iteration: 98999
loss: 1.0236445665359497,grad_norm: 0.9999991951160433, iteration: 99000
loss: 1.038020133972168,grad_norm: 0.9999991235921606, iteration: 99001
loss: 0.9855095744132996,grad_norm: 0.9999992113673477, iteration: 99002
loss: 1.0105544328689575,grad_norm: 0.9999991525066664, iteration: 99003
loss: 1.0060579776763916,grad_norm: 0.999999091707463, iteration: 99004
loss: 0.9868139624595642,grad_norm: 0.9548639115274117, iteration: 99005
loss: 1.0077401399612427,grad_norm: 0.9999991028970228, iteration: 99006
loss: 1.0250312089920044,grad_norm: 0.935381418470647, iteration: 99007
loss: 1.0156704187393188,grad_norm: 0.9999990862379996, iteration: 99008
loss: 0.9948709011077881,grad_norm: 0.8804103745571864, iteration: 99009
loss: 1.0075308084487915,grad_norm: 0.9999991591080596, iteration: 99010
loss: 1.0151489973068237,grad_norm: 0.9325430263737218, iteration: 99011
loss: 1.0063176155090332,grad_norm: 0.8075045906909191, iteration: 99012
loss: 1.0246232748031616,grad_norm: 0.9999991457031534, iteration: 99013
loss: 0.9847978353500366,grad_norm: 0.9459007448407113, iteration: 99014
loss: 1.015203595161438,grad_norm: 0.9999991797401063, iteration: 99015
loss: 1.0075924396514893,grad_norm: 0.9999989912301408, iteration: 99016
loss: 1.014738917350769,grad_norm: 0.9999990699490243, iteration: 99017
loss: 0.9916720390319824,grad_norm: 0.9999990192718843, iteration: 99018
loss: 0.9553748369216919,grad_norm: 0.9643294966589857, iteration: 99019
loss: 0.9940593242645264,grad_norm: 0.9999990991166999, iteration: 99020
loss: 0.9963382482528687,grad_norm: 0.903450916591427, iteration: 99021
loss: 0.9714957475662231,grad_norm: 0.9999990393592536, iteration: 99022
loss: 0.9948790073394775,grad_norm: 0.9999991715014913, iteration: 99023
loss: 1.038357138633728,grad_norm: 0.9708025304212964, iteration: 99024
loss: 1.0056545734405518,grad_norm: 0.9495947618877469, iteration: 99025
loss: 1.0088237524032593,grad_norm: 0.9586153177734249, iteration: 99026
loss: 1.0302592515945435,grad_norm: 0.9182269482550518, iteration: 99027
loss: 1.0143122673034668,grad_norm: 0.9999991813379884, iteration: 99028
loss: 0.9940484762191772,grad_norm: 0.9167654198576536, iteration: 99029
loss: 0.9956493973731995,grad_norm: 0.9999991997778794, iteration: 99030
loss: 0.9512485265731812,grad_norm: 0.9999990100636372, iteration: 99031
loss: 1.0371812582015991,grad_norm: 0.9999991344130693, iteration: 99032
loss: 0.9934337735176086,grad_norm: 0.9999990989569659, iteration: 99033
loss: 1.0184922218322754,grad_norm: 0.9999992200795734, iteration: 99034
loss: 1.0072038173675537,grad_norm: 0.9466333847826193, iteration: 99035
loss: 0.9959200620651245,grad_norm: 0.9999991521603238, iteration: 99036
loss: 0.9723029136657715,grad_norm: 0.9999990518670586, iteration: 99037
loss: 1.005859613418579,grad_norm: 0.9999992270916762, iteration: 99038
loss: 1.0082833766937256,grad_norm: 0.9999991616131692, iteration: 99039
loss: 0.989036500453949,grad_norm: 0.9962996724527955, iteration: 99040
loss: 1.0279121398925781,grad_norm: 0.9795023343626409, iteration: 99041
loss: 1.054070234298706,grad_norm: 0.9999993254097941, iteration: 99042
loss: 1.0248035192489624,grad_norm: 0.9999992134529135, iteration: 99043
loss: 0.9906923174858093,grad_norm: 0.9999989968277421, iteration: 99044
loss: 1.0130478143692017,grad_norm: 0.9385856876823802, iteration: 99045
loss: 0.9609177708625793,grad_norm: 0.9999990226023944, iteration: 99046
loss: 0.9697853922843933,grad_norm: 0.9999990585318984, iteration: 99047
loss: 1.0140721797943115,grad_norm: 0.9999989411186743, iteration: 99048
loss: 1.0395170450210571,grad_norm: 0.9681386226354961, iteration: 99049
loss: 1.0001988410949707,grad_norm: 0.999999115122801, iteration: 99050
loss: 1.0251197814941406,grad_norm: 0.999999100099507, iteration: 99051
loss: 1.0022635459899902,grad_norm: 0.9999990856797116, iteration: 99052
loss: 1.0149253606796265,grad_norm: 0.9999990605448199, iteration: 99053
loss: 0.9988020658493042,grad_norm: 0.9999991769018836, iteration: 99054
loss: 0.9881709814071655,grad_norm: 0.9999990384657126, iteration: 99055
loss: 0.9821680784225464,grad_norm: 0.903748008207057, iteration: 99056
loss: 1.000309705734253,grad_norm: 0.9621745490614366, iteration: 99057
loss: 0.986874520778656,grad_norm: 0.9706296531079538, iteration: 99058
loss: 1.0068249702453613,grad_norm: 0.9999990119660479, iteration: 99059
loss: 0.9844202995300293,grad_norm: 0.9999990182452175, iteration: 99060
loss: 0.9725948572158813,grad_norm: 0.9682640481271767, iteration: 99061
loss: 0.9850981831550598,grad_norm: 0.9999991645456969, iteration: 99062
loss: 0.9854529500007629,grad_norm: 0.9207139056381496, iteration: 99063
loss: 0.9923221468925476,grad_norm: 0.9999991014163289, iteration: 99064
loss: 1.0321226119995117,grad_norm: 0.9999992095831729, iteration: 99065
loss: 0.9795278906822205,grad_norm: 0.9999990622836029, iteration: 99066
loss: 1.0608172416687012,grad_norm: 0.9999994093892437, iteration: 99067
loss: 1.0027319192886353,grad_norm: 0.8383939093694291, iteration: 99068
loss: 0.995684802532196,grad_norm: 0.9999998593398747, iteration: 99069
loss: 0.9813427925109863,grad_norm: 0.7419742002393281, iteration: 99070
loss: 0.9689293503761292,grad_norm: 0.999999163879536, iteration: 99071
loss: 1.013980507850647,grad_norm: 0.9999992227169451, iteration: 99072
loss: 1.0315556526184082,grad_norm: 0.9999990790022637, iteration: 99073
loss: 1.0183968544006348,grad_norm: 0.9358463548073891, iteration: 99074
loss: 1.0350127220153809,grad_norm: 0.9999993249841534, iteration: 99075
loss: 0.9766412973403931,grad_norm: 0.9999993432721649, iteration: 99076
loss: 0.9967483878135681,grad_norm: 0.9957984979602363, iteration: 99077
loss: 1.008800983428955,grad_norm: 0.9999990408225933, iteration: 99078
loss: 1.0187196731567383,grad_norm: 0.9999992646382355, iteration: 99079
loss: 0.982748806476593,grad_norm: 0.9999991315077743, iteration: 99080
loss: 1.0315784215927124,grad_norm: 0.9474856733769539, iteration: 99081
loss: 1.0028971433639526,grad_norm: 0.9104897009212257, iteration: 99082
loss: 0.9957751631736755,grad_norm: 0.9999991197454684, iteration: 99083
loss: 0.9893057346343994,grad_norm: 0.9999991310712724, iteration: 99084
loss: 1.0195080041885376,grad_norm: 0.8996916372938849, iteration: 99085
loss: 1.0218703746795654,grad_norm: 0.9999996353223041, iteration: 99086
loss: 0.9893648028373718,grad_norm: 0.9999990331263167, iteration: 99087
loss: 0.997734546661377,grad_norm: 0.9761966951153578, iteration: 99088
loss: 1.0208967924118042,grad_norm: 0.9999991835996523, iteration: 99089
loss: 1.0135847330093384,grad_norm: 0.9854882565219572, iteration: 99090
loss: 1.0352470874786377,grad_norm: 0.9603054641417323, iteration: 99091
loss: 0.9973608255386353,grad_norm: 0.9999995280455816, iteration: 99092
loss: 1.00729501247406,grad_norm: 0.9999991968390254, iteration: 99093
loss: 0.9859634637832642,grad_norm: 0.9999989434888472, iteration: 99094
loss: 0.9761463403701782,grad_norm: 0.9999991531495587, iteration: 99095
loss: 0.9989545345306396,grad_norm: 0.9999991345408318, iteration: 99096
loss: 0.9980682730674744,grad_norm: 0.9999991227431806, iteration: 99097
loss: 1.0140568017959595,grad_norm: 0.9999990772350412, iteration: 99098
loss: 1.0194607973098755,grad_norm: 0.9999992745482104, iteration: 99099
loss: 1.0067447423934937,grad_norm: 0.9999990760409986, iteration: 99100
loss: 0.9761188626289368,grad_norm: 0.999999181218599, iteration: 99101
loss: 1.0267919301986694,grad_norm: 0.9999992625594121, iteration: 99102
loss: 1.0181636810302734,grad_norm: 0.9999991545810536, iteration: 99103
loss: 1.01809561252594,grad_norm: 0.999999326348412, iteration: 99104
loss: 0.9956733584403992,grad_norm: 0.8410741509059585, iteration: 99105
loss: 1.0185703039169312,grad_norm: 0.9999992943481739, iteration: 99106
loss: 0.9727335572242737,grad_norm: 0.9999990947258679, iteration: 99107
loss: 0.9912815690040588,grad_norm: 0.9999990186672539, iteration: 99108
loss: 1.0219191312789917,grad_norm: 0.9999991365440023, iteration: 99109
loss: 1.0671261548995972,grad_norm: 0.9999998929065574, iteration: 99110
loss: 1.0278522968292236,grad_norm: 0.9999990502643714, iteration: 99111
loss: 1.016083836555481,grad_norm: 0.991149853781286, iteration: 99112
loss: 0.9720435738563538,grad_norm: 0.999998988494137, iteration: 99113
loss: 0.98735511302948,grad_norm: 0.9999992274741626, iteration: 99114
loss: 0.9877457022666931,grad_norm: 0.9999989912936318, iteration: 99115
loss: 0.9871059060096741,grad_norm: 0.9999991550942584, iteration: 99116
loss: 1.0238678455352783,grad_norm: 0.9999992442142588, iteration: 99117
loss: 0.9697095155715942,grad_norm: 0.9999991450353792, iteration: 99118
loss: 1.052024245262146,grad_norm: 0.9999999730947039, iteration: 99119
loss: 0.964935302734375,grad_norm: 0.8996638660437359, iteration: 99120
loss: 0.9894641041755676,grad_norm: 0.9025493374459986, iteration: 99121
loss: 0.9854404926300049,grad_norm: 0.9662015839437766, iteration: 99122
loss: 1.005394458770752,grad_norm: 0.9999991722403113, iteration: 99123
loss: 1.0186302661895752,grad_norm: 0.974893184090771, iteration: 99124
loss: 0.9874498844146729,grad_norm: 0.9874716627315087, iteration: 99125
loss: 1.0297245979309082,grad_norm: 0.9999989683575811, iteration: 99126
loss: 0.9995853900909424,grad_norm: 0.9999991654462242, iteration: 99127
loss: 0.9724902510643005,grad_norm: 0.9999991671371433, iteration: 99128
loss: 1.0708378553390503,grad_norm: 0.9999991434708614, iteration: 99129
loss: 1.0062003135681152,grad_norm: 0.9999990330204661, iteration: 99130
loss: 0.976673424243927,grad_norm: 0.9999991659140588, iteration: 99131
loss: 1.0185045003890991,grad_norm: 0.9999992777479996, iteration: 99132
loss: 1.0002107620239258,grad_norm: 0.999999080953213, iteration: 99133
loss: 0.9844241142272949,grad_norm: 0.9999993303402895, iteration: 99134
loss: 0.9307073354721069,grad_norm: 0.9999991268173777, iteration: 99135
loss: 1.0022728443145752,grad_norm: 0.9999992046069854, iteration: 99136
loss: 1.017301082611084,grad_norm: 0.9999992653262236, iteration: 99137
loss: 0.9628897905349731,grad_norm: 0.9999991445455082, iteration: 99138
loss: 0.9969317317008972,grad_norm: 0.9187705135042906, iteration: 99139
loss: 1.0028520822525024,grad_norm: 0.9999991732275548, iteration: 99140
loss: 1.0351543426513672,grad_norm: 0.9422598313399704, iteration: 99141
loss: 0.9932124614715576,grad_norm: 0.9999991151712979, iteration: 99142
loss: 1.011986255645752,grad_norm: 0.9737059685659437, iteration: 99143
loss: 1.0138369798660278,grad_norm: 0.999999610653614, iteration: 99144
loss: 1.036826729774475,grad_norm: 0.9999999354187585, iteration: 99145
loss: 1.0040009021759033,grad_norm: 0.999999004251774, iteration: 99146
loss: 0.9910657405853271,grad_norm: 0.9999990679120443, iteration: 99147
loss: 1.032965898513794,grad_norm: 0.9999993381752138, iteration: 99148
loss: 0.9962472319602966,grad_norm: 0.8481132672430566, iteration: 99149
loss: 0.9542383551597595,grad_norm: 0.9999992662710885, iteration: 99150
loss: 0.9930597543716431,grad_norm: 0.9019570382733998, iteration: 99151
loss: 0.9955428838729858,grad_norm: 0.9892947010508332, iteration: 99152
loss: 0.9843162298202515,grad_norm: 0.9908234252829237, iteration: 99153
loss: 1.0280030965805054,grad_norm: 0.89136144064245, iteration: 99154
loss: 0.9879885315895081,grad_norm: 0.9999991076463751, iteration: 99155
loss: 0.9742582440376282,grad_norm: 0.8893066418206352, iteration: 99156
loss: 0.9914795756340027,grad_norm: 0.8963835225348672, iteration: 99157
loss: 1.0000693798065186,grad_norm: 0.999999178640921, iteration: 99158
loss: 0.9746014475822449,grad_norm: 0.9999990777590184, iteration: 99159
loss: 0.9655165076255798,grad_norm: 0.9999991278546386, iteration: 99160
loss: 0.9997841119766235,grad_norm: 0.9999995238005683, iteration: 99161
loss: 1.015566110610962,grad_norm: 0.9268162694674206, iteration: 99162
loss: 0.9680635929107666,grad_norm: 0.9999999050979702, iteration: 99163
loss: 0.9788822531700134,grad_norm: 0.9999990609266027, iteration: 99164
loss: 0.9631062746047974,grad_norm: 0.9999992238437354, iteration: 99165
loss: 1.0151402950286865,grad_norm: 0.9999990193065217, iteration: 99166
loss: 1.0072169303894043,grad_norm: 0.9999996779832311, iteration: 99167
loss: 0.9825977087020874,grad_norm: 0.9999991239127147, iteration: 99168
loss: 1.016098141670227,grad_norm: 0.9312112907847591, iteration: 99169
loss: 1.0221084356307983,grad_norm: 0.9999993382622459, iteration: 99170
loss: 0.9916306138038635,grad_norm: 0.9810761034674581, iteration: 99171
loss: 0.9967411756515503,grad_norm: 0.9999992744363755, iteration: 99172
loss: 1.054587483406067,grad_norm: 0.9999992265984061, iteration: 99173
loss: 0.9603878259658813,grad_norm: 0.9999991777943789, iteration: 99174
loss: 0.9885292649269104,grad_norm: 0.9999989466441787, iteration: 99175
loss: 0.998238742351532,grad_norm: 0.9999992572013557, iteration: 99176
loss: 0.9895467162132263,grad_norm: 0.9839289280620847, iteration: 99177
loss: 1.0121434926986694,grad_norm: 0.8451390838161417, iteration: 99178
loss: 1.0087250471115112,grad_norm: 0.9999994299414318, iteration: 99179
loss: 1.002418875694275,grad_norm: 0.9414145940902507, iteration: 99180
loss: 1.0089290142059326,grad_norm: 0.999999220254228, iteration: 99181
loss: 0.9838482737541199,grad_norm: 0.9999992075539977, iteration: 99182
loss: 1.0157774686813354,grad_norm: 0.9999991919663331, iteration: 99183
loss: 0.9981633424758911,grad_norm: 0.9999991874927361, iteration: 99184
loss: 1.006493330001831,grad_norm: 0.9999990261732055, iteration: 99185
loss: 1.014150857925415,grad_norm: 0.99999911480042, iteration: 99186
loss: 0.9864979982376099,grad_norm: 0.9999991597566198, iteration: 99187
loss: 1.0107403993606567,grad_norm: 0.993553646649848, iteration: 99188
loss: 1.0111887454986572,grad_norm: 0.9999989601741093, iteration: 99189
loss: 0.9668605327606201,grad_norm: 0.9999991088482807, iteration: 99190
loss: 1.0206395387649536,grad_norm: 0.9999994754485043, iteration: 99191
loss: 0.9740003347396851,grad_norm: 0.999999170703706, iteration: 99192
loss: 0.9787282943725586,grad_norm: 0.9999991766478078, iteration: 99193
loss: 1.0845186710357666,grad_norm: 0.9999993486415027, iteration: 99194
loss: 1.0426884889602661,grad_norm: 0.9999993068668658, iteration: 99195
loss: 1.0326870679855347,grad_norm: 0.9999991645329948, iteration: 99196
loss: 1.0104246139526367,grad_norm: 0.9909556900631491, iteration: 99197
loss: 1.0141069889068604,grad_norm: 0.9990731488064131, iteration: 99198
loss: 0.9756583571434021,grad_norm: 0.8180936028840871, iteration: 99199
loss: 1.0296173095703125,grad_norm: 0.9999989999307128, iteration: 99200
loss: 0.9952796101570129,grad_norm: 0.9999992960123064, iteration: 99201
loss: 1.0246362686157227,grad_norm: 0.9999990140372744, iteration: 99202
loss: 1.0324552059173584,grad_norm: 0.9999992633609335, iteration: 99203
loss: 1.0393602848052979,grad_norm: 0.999999216403477, iteration: 99204
loss: 0.9996021389961243,grad_norm: 0.9999991958269137, iteration: 99205
loss: 1.0021222829818726,grad_norm: 0.999999040548219, iteration: 99206
loss: 1.0057251453399658,grad_norm: 0.923980928605283, iteration: 99207
loss: 0.9898499250411987,grad_norm: 0.9999991799811005, iteration: 99208
loss: 1.0030534267425537,grad_norm: 0.9999991987043385, iteration: 99209
loss: 1.0119218826293945,grad_norm: 0.9999998243452083, iteration: 99210
loss: 1.0204812288284302,grad_norm: 0.9999991291583421, iteration: 99211
loss: 1.0157874822616577,grad_norm: 0.999998968302858, iteration: 99212
loss: 1.0361157655715942,grad_norm: 0.9999993866923587, iteration: 99213
loss: 1.001305103302002,grad_norm: 0.9735885312360518, iteration: 99214
loss: 1.005716323852539,grad_norm: 0.9999993219758053, iteration: 99215
loss: 1.0146162509918213,grad_norm: 0.8322668717718109, iteration: 99216
loss: 0.9818649291992188,grad_norm: 0.9999991052777336, iteration: 99217
loss: 0.9723325967788696,grad_norm: 0.9999990263262761, iteration: 99218
loss: 0.9621509313583374,grad_norm: 0.9999991545134063, iteration: 99219
loss: 1.0171878337860107,grad_norm: 0.9999990844670807, iteration: 99220
loss: 1.0208287239074707,grad_norm: 0.8703634386032196, iteration: 99221
loss: 1.0033276081085205,grad_norm: 0.9999991988409035, iteration: 99222
loss: 1.0113760232925415,grad_norm: 0.9462260605974724, iteration: 99223
loss: 1.0072089433670044,grad_norm: 0.9999998553229859, iteration: 99224
loss: 1.0142782926559448,grad_norm: 0.999999048756165, iteration: 99225
loss: 1.0044299364089966,grad_norm: 0.9999989785161105, iteration: 99226
loss: 0.9696767926216125,grad_norm: 0.999998954724788, iteration: 99227
loss: 1.0352987051010132,grad_norm: 0.999999097882352, iteration: 99228
loss: 1.0027658939361572,grad_norm: 0.9777928877634179, iteration: 99229
loss: 0.9824156165122986,grad_norm: 0.9999992591743924, iteration: 99230
loss: 1.0003248453140259,grad_norm: 0.9700335452003297, iteration: 99231
loss: 1.003237247467041,grad_norm: 0.9999992502461023, iteration: 99232
loss: 1.010401964187622,grad_norm: 0.9999999490535404, iteration: 99233
loss: 0.9811289310455322,grad_norm: 0.9999990734435723, iteration: 99234
loss: 1.003305196762085,grad_norm: 0.9999992945656935, iteration: 99235
loss: 0.9817428588867188,grad_norm: 0.8766805688060098, iteration: 99236
loss: 0.9978268146514893,grad_norm: 0.786564668016468, iteration: 99237
loss: 1.0025303363800049,grad_norm: 0.9999992971531118, iteration: 99238
loss: 0.9893032908439636,grad_norm: 0.9999990414910073, iteration: 99239
loss: 1.037292242050171,grad_norm: 0.9999991116515393, iteration: 99240
loss: 1.0337072610855103,grad_norm: 0.9999990931127859, iteration: 99241
loss: 0.9991516470909119,grad_norm: 0.9999994991176742, iteration: 99242
loss: 0.9681254625320435,grad_norm: 0.999999158613486, iteration: 99243
loss: 1.0162765979766846,grad_norm: 0.9999992493265693, iteration: 99244
loss: 0.9677976369857788,grad_norm: 0.9999992444289552, iteration: 99245
loss: 0.9989400506019592,grad_norm: 0.9999993250508536, iteration: 99246
loss: 1.0078694820404053,grad_norm: 0.9735991135697508, iteration: 99247
loss: 0.9980699419975281,grad_norm: 0.9453460333402337, iteration: 99248
loss: 1.0295783281326294,grad_norm: 0.9999992578405984, iteration: 99249
loss: 1.0006412267684937,grad_norm: 0.8981382554559463, iteration: 99250
loss: 1.0174198150634766,grad_norm: 0.9999992814482838, iteration: 99251
loss: 1.0154225826263428,grad_norm: 0.9816781000479159, iteration: 99252
loss: 0.9843713045120239,grad_norm: 0.9999991789112397, iteration: 99253
loss: 0.9902818202972412,grad_norm: 0.977017992598579, iteration: 99254
loss: 1.0057626962661743,grad_norm: 0.9999990464775703, iteration: 99255
loss: 0.9712876677513123,grad_norm: 0.9999989813627068, iteration: 99256
loss: 0.9767071008682251,grad_norm: 0.9999992485684234, iteration: 99257
loss: 1.039405345916748,grad_norm: 0.9999992997407947, iteration: 99258
loss: 1.0031801462173462,grad_norm: 0.9430824235560414, iteration: 99259
loss: 0.9966403245925903,grad_norm: 0.9999991940894066, iteration: 99260
loss: 1.0092761516571045,grad_norm: 0.9999990368602188, iteration: 99261
loss: 0.9999693036079407,grad_norm: 0.9590012768225233, iteration: 99262
loss: 0.994555652141571,grad_norm: 0.9999990954146334, iteration: 99263
loss: 1.0120831727981567,grad_norm: 0.9786231825967643, iteration: 99264
loss: 1.0070968866348267,grad_norm: 0.9999998105476319, iteration: 99265
loss: 0.9946886301040649,grad_norm: 0.9288224315658665, iteration: 99266
loss: 1.02569580078125,grad_norm: 0.9999990949546654, iteration: 99267
loss: 0.9822274446487427,grad_norm: 0.9999988895621833, iteration: 99268
loss: 0.979765772819519,grad_norm: 0.9113426668772802, iteration: 99269
loss: 0.9737423062324524,grad_norm: 0.9999992202320364, iteration: 99270
loss: 1.0370333194732666,grad_norm: 0.9999993835465399, iteration: 99271
loss: 1.0242551565170288,grad_norm: 0.9179118164447172, iteration: 99272
loss: 0.984614908695221,grad_norm: 0.9999991884712428, iteration: 99273
loss: 0.9913251399993896,grad_norm: 0.9875912268196714, iteration: 99274
loss: 0.9880648255348206,grad_norm: 0.9999989356314952, iteration: 99275
loss: 1.001301646232605,grad_norm: 0.9053640472583663, iteration: 99276
loss: 0.9718354344367981,grad_norm: 0.9999989795973002, iteration: 99277
loss: 1.0029009580612183,grad_norm: 0.9999992540818218, iteration: 99278
loss: 0.9782069325447083,grad_norm: 0.8889846372546321, iteration: 99279
loss: 1.049608826637268,grad_norm: 0.999999250766945, iteration: 99280
loss: 1.0070573091506958,grad_norm: 0.999999824191388, iteration: 99281
loss: 0.9516751170158386,grad_norm: 0.951610695410864, iteration: 99282
loss: 1.0156841278076172,grad_norm: 0.9999990885497911, iteration: 99283
loss: 1.0044323205947876,grad_norm: 0.9328335681856653, iteration: 99284
loss: 1.0154584646224976,grad_norm: 0.9683075770814721, iteration: 99285
loss: 0.9682673811912537,grad_norm: 0.9999993611020774, iteration: 99286
loss: 1.0129393339157104,grad_norm: 0.9999992957351039, iteration: 99287
loss: 1.0013091564178467,grad_norm: 0.9999990975260752, iteration: 99288
loss: 0.9753463864326477,grad_norm: 0.9999991609579241, iteration: 99289
loss: 0.9978094100952148,grad_norm: 0.9343319023158694, iteration: 99290
loss: 0.994141161441803,grad_norm: 0.9999991348222911, iteration: 99291
loss: 0.9685925245285034,grad_norm: 0.9552843517484537, iteration: 99292
loss: 1.0196576118469238,grad_norm: 0.9999993113256511, iteration: 99293
loss: 1.0221648216247559,grad_norm: 0.9999991540221508, iteration: 99294
loss: 1.0040620565414429,grad_norm: 0.7998319039978364, iteration: 99295
loss: 1.0043154954910278,grad_norm: 0.9999992142340356, iteration: 99296
loss: 1.0204145908355713,grad_norm: 0.9999991582165358, iteration: 99297
loss: 0.9615846276283264,grad_norm: 0.999999309348537, iteration: 99298
loss: 1.024933099746704,grad_norm: 0.999999367583639, iteration: 99299
loss: 0.9928367137908936,grad_norm: 0.9999991551074151, iteration: 99300
loss: 0.9690853357315063,grad_norm: 0.8973923332394526, iteration: 99301
loss: 1.0110923051834106,grad_norm: 0.9938575628667605, iteration: 99302
loss: 0.9913527369499207,grad_norm: 0.999999218236137, iteration: 99303
loss: 0.9998297095298767,grad_norm: 0.9379554466251356, iteration: 99304
loss: 1.0361955165863037,grad_norm: 0.9999989227418551, iteration: 99305
loss: 1.0076406002044678,grad_norm: 0.9999999437757958, iteration: 99306
loss: 0.9839060306549072,grad_norm: 0.9999991323603906, iteration: 99307
loss: 1.0089969635009766,grad_norm: 0.9999993071335589, iteration: 99308
loss: 1.0543627738952637,grad_norm: 0.9999991084532951, iteration: 99309
loss: 0.9657151699066162,grad_norm: 0.9697693884805966, iteration: 99310
loss: 1.023431658744812,grad_norm: 0.9471363968492589, iteration: 99311
loss: 0.9708983302116394,grad_norm: 0.999999002545695, iteration: 99312
loss: 1.006235957145691,grad_norm: 0.9999991607787221, iteration: 99313
loss: 1.0318372249603271,grad_norm: 0.9569605421861487, iteration: 99314
loss: 0.9931371212005615,grad_norm: 0.9999990873912347, iteration: 99315
loss: 1.0296657085418701,grad_norm: 0.999999186666539, iteration: 99316
loss: 0.9590479135513306,grad_norm: 0.9999992200121646, iteration: 99317
loss: 1.0549147129058838,grad_norm: 0.9999996514526066, iteration: 99318
loss: 1.0267387628555298,grad_norm: 0.9999990775762638, iteration: 99319
loss: 1.0459485054016113,grad_norm: 0.9999994924304147, iteration: 99320
loss: 1.089038372039795,grad_norm: 0.9999994904025088, iteration: 99321
loss: 1.0200074911117554,grad_norm: 0.9999991847735605, iteration: 99322
loss: 0.9998615384101868,grad_norm: 0.9999993839766137, iteration: 99323
loss: 0.9992691278457642,grad_norm: 0.9999990865827155, iteration: 99324
loss: 0.9801473021507263,grad_norm: 0.9902097746221474, iteration: 99325
loss: 0.9987931251525879,grad_norm: 0.8654095408375352, iteration: 99326
loss: 0.9846265316009521,grad_norm: 0.9999989278957152, iteration: 99327
loss: 1.0010544061660767,grad_norm: 0.9999991419624491, iteration: 99328
loss: 0.9577016234397888,grad_norm: 0.9999990966196353, iteration: 99329
loss: 0.965773344039917,grad_norm: 0.9999990669476039, iteration: 99330
loss: 1.0114672183990479,grad_norm: 0.8997503173270026, iteration: 99331
loss: 0.9678860306739807,grad_norm: 0.9999990076465937, iteration: 99332
loss: 1.0017304420471191,grad_norm: 0.9999992637372966, iteration: 99333
loss: 0.9917653799057007,grad_norm: 0.9999992006949743, iteration: 99334
loss: 1.0259668827056885,grad_norm: 0.9999992996578442, iteration: 99335
loss: 1.0128291845321655,grad_norm: 0.9999994934685315, iteration: 99336
loss: 0.9967256188392639,grad_norm: 0.9999990769086871, iteration: 99337
loss: 0.9494202733039856,grad_norm: 0.9999990803554427, iteration: 99338
loss: 1.0222184658050537,grad_norm: 0.9999988664603073, iteration: 99339
loss: 0.9914868474006653,grad_norm: 0.9999990750351009, iteration: 99340
loss: 0.9974938035011292,grad_norm: 0.9999991888837865, iteration: 99341
loss: 0.9920898079872131,grad_norm: 0.9393562775127207, iteration: 99342
loss: 1.0079857110977173,grad_norm: 0.953935886183591, iteration: 99343
loss: 0.9581375122070312,grad_norm: 0.9516648254639442, iteration: 99344
loss: 1.0161550045013428,grad_norm: 0.9999989993694687, iteration: 99345
loss: 1.0184975862503052,grad_norm: 0.999999217068274, iteration: 99346
loss: 0.9661158919334412,grad_norm: 0.9999991362453157, iteration: 99347
loss: 1.0059480667114258,grad_norm: 0.9865755436755574, iteration: 99348
loss: 1.0219181776046753,grad_norm: 0.9999990889108279, iteration: 99349
loss: 1.071372389793396,grad_norm: 0.9999992358959443, iteration: 99350
loss: 0.9955031871795654,grad_norm: 0.9999990168480469, iteration: 99351
loss: 1.0055607557296753,grad_norm: 0.9999990278076367, iteration: 99352
loss: 0.9999430775642395,grad_norm: 0.8990057894872496, iteration: 99353
loss: 0.9646391272544861,grad_norm: 0.8626012776456699, iteration: 99354
loss: 1.0243312120437622,grad_norm: 0.9999995979165016, iteration: 99355
loss: 1.015977382659912,grad_norm: 0.9999992577765703, iteration: 99356
loss: 1.000732183456421,grad_norm: 0.9866405124612598, iteration: 99357
loss: 0.9901100397109985,grad_norm: 0.9999991806283465, iteration: 99358
loss: 0.9954584836959839,grad_norm: 0.9108815370449036, iteration: 99359
loss: 1.0087194442749023,grad_norm: 0.9999990394030154, iteration: 99360
loss: 0.9851159453392029,grad_norm: 0.9999991887575098, iteration: 99361
loss: 0.9782881736755371,grad_norm: 0.9526093233355434, iteration: 99362
loss: 0.9843114018440247,grad_norm: 0.9959899782865955, iteration: 99363
loss: 0.9854809641838074,grad_norm: 0.9950656298536541, iteration: 99364
loss: 0.9793047904968262,grad_norm: 0.9999991161056666, iteration: 99365
loss: 1.0219204425811768,grad_norm: 0.8624238377600347, iteration: 99366
loss: 1.0011540651321411,grad_norm: 0.9577921706996161, iteration: 99367
loss: 0.983623743057251,grad_norm: 0.998023822306413, iteration: 99368
loss: 0.9855032563209534,grad_norm: 0.9999990724967345, iteration: 99369
loss: 1.0052570104599,grad_norm: 0.9999991935873244, iteration: 99370
loss: 1.0172017812728882,grad_norm: 0.999999185862591, iteration: 99371
loss: 0.9800183773040771,grad_norm: 0.9999991227181496, iteration: 99372
loss: 0.9765676856040955,grad_norm: 0.8888634345993318, iteration: 99373
loss: 1.0106083154678345,grad_norm: 0.9888502108778152, iteration: 99374
loss: 1.0309115648269653,grad_norm: 0.9999995905601189, iteration: 99375
loss: 1.012948751449585,grad_norm: 0.9999990469614707, iteration: 99376
loss: 1.0222554206848145,grad_norm: 0.9999991555136797, iteration: 99377
loss: 0.9759347438812256,grad_norm: 0.9441708992150906, iteration: 99378
loss: 0.9795880913734436,grad_norm: 0.9999992084622135, iteration: 99379
loss: 0.9873748421669006,grad_norm: 0.9378816663598323, iteration: 99380
loss: 1.0321170091629028,grad_norm: 0.9999989838572125, iteration: 99381
loss: 0.9926427602767944,grad_norm: 0.9999992542978001, iteration: 99382
loss: 1.0426801443099976,grad_norm: 0.9999992098589241, iteration: 99383
loss: 1.005265474319458,grad_norm: 0.9589876064403844, iteration: 99384
loss: 0.968795120716095,grad_norm: 0.888685323674871, iteration: 99385
loss: 0.9781797528266907,grad_norm: 0.9999991244794214, iteration: 99386
loss: 0.9971227645874023,grad_norm: 0.9449673932145329, iteration: 99387
loss: 1.0215286016464233,grad_norm: 0.9999991448005359, iteration: 99388
loss: 0.9902566075325012,grad_norm: 0.882137912416214, iteration: 99389
loss: 0.9762065410614014,grad_norm: 0.9064631714236947, iteration: 99390
loss: 1.0069514513015747,grad_norm: 0.9999992672235679, iteration: 99391
loss: 1.0070608854293823,grad_norm: 0.9999992870867582, iteration: 99392
loss: 1.050034761428833,grad_norm: 0.9999990309260273, iteration: 99393
loss: 0.981323778629303,grad_norm: 0.9331331128471265, iteration: 99394
loss: 0.9863261580467224,grad_norm: 0.9999991614594967, iteration: 99395
loss: 0.9705637693405151,grad_norm: 0.9999990843240113, iteration: 99396
loss: 0.9950186014175415,grad_norm: 0.9999991809617171, iteration: 99397
loss: 1.0099619626998901,grad_norm: 0.9999990161935294, iteration: 99398
loss: 1.0149253606796265,grad_norm: 0.9999989088586093, iteration: 99399
loss: 1.0363529920578003,grad_norm: 0.9999993196533362, iteration: 99400
loss: 1.051798701286316,grad_norm: 0.9999989508090972, iteration: 99401
loss: 0.9950134754180908,grad_norm: 0.9675794810624683, iteration: 99402
loss: 1.028425693511963,grad_norm: 0.999999228692032, iteration: 99403
loss: 0.9902375340461731,grad_norm: 0.9459845283288385, iteration: 99404
loss: 1.0171536207199097,grad_norm: 0.9999991397590405, iteration: 99405
loss: 0.987126886844635,grad_norm: 0.9999992282989872, iteration: 99406
loss: 0.9806001782417297,grad_norm: 0.7584679692101925, iteration: 99407
loss: 1.0002412796020508,grad_norm: 0.9554149967872658, iteration: 99408
loss: 0.966854453086853,grad_norm: 0.999998923395537, iteration: 99409
loss: 1.0165458917617798,grad_norm: 0.999998956389942, iteration: 99410
loss: 0.997551679611206,grad_norm: 0.9492577615951964, iteration: 99411
loss: 0.9879632592201233,grad_norm: 0.9295419354375549, iteration: 99412
loss: 1.0078442096710205,grad_norm: 0.9999992441557968, iteration: 99413
loss: 0.9982713460922241,grad_norm: 0.8652458848137202, iteration: 99414
loss: 0.9860138893127441,grad_norm: 0.9240231887723829, iteration: 99415
loss: 1.0403481721878052,grad_norm: 0.9999994357529275, iteration: 99416
loss: 1.0000529289245605,grad_norm: 0.9999990632085243, iteration: 99417
loss: 1.3884949684143066,grad_norm: 0.9999999233714822, iteration: 99418
loss: 1.0028098821640015,grad_norm: 0.9127029389892017, iteration: 99419
loss: 1.02665114402771,grad_norm: 0.9999990927335201, iteration: 99420
loss: 1.00083589553833,grad_norm: 0.9999993042428529, iteration: 99421
loss: 1.0246652364730835,grad_norm: 0.8992463330343594, iteration: 99422
loss: 0.960947573184967,grad_norm: 0.9820846840045457, iteration: 99423
loss: 0.9938950538635254,grad_norm: 0.9999998272142091, iteration: 99424
loss: 1.0164589881896973,grad_norm: 0.9166065191494226, iteration: 99425
loss: 1.0648607015609741,grad_norm: 0.9999995651516699, iteration: 99426
loss: 1.0450233221054077,grad_norm: 0.9999991810303737, iteration: 99427
loss: 1.006467342376709,grad_norm: 0.999999289193798, iteration: 99428
loss: 1.053345799446106,grad_norm: 0.9999990606502631, iteration: 99429
loss: 0.9778233766555786,grad_norm: 0.9999991976492472, iteration: 99430
loss: 0.9916784167289734,grad_norm: 0.9999991459111458, iteration: 99431
loss: 1.0255286693572998,grad_norm: 0.9999991686572034, iteration: 99432
loss: 0.9946688413619995,grad_norm: 0.9327432902437083, iteration: 99433
loss: 0.9776421189308167,grad_norm: 0.999999147838661, iteration: 99434
loss: 0.9815177917480469,grad_norm: 0.9999994173297883, iteration: 99435
loss: 0.986968994140625,grad_norm: 0.999999146902961, iteration: 99436
loss: 0.9757190942764282,grad_norm: 0.9999991886624158, iteration: 99437
loss: 0.9787675142288208,grad_norm: 0.9999990223984003, iteration: 99438
loss: 0.9894055724143982,grad_norm: 0.9999994388250572, iteration: 99439
loss: 0.9894460439682007,grad_norm: 0.8969412536613836, iteration: 99440
loss: 1.0465738773345947,grad_norm: 0.999999088316479, iteration: 99441
loss: 0.9966237545013428,grad_norm: 0.9999991257547003, iteration: 99442
loss: 0.9810851216316223,grad_norm: 0.920437257074515, iteration: 99443
loss: 1.0028274059295654,grad_norm: 0.8958715803676263, iteration: 99444
loss: 0.9676435589790344,grad_norm: 0.9999991034644576, iteration: 99445
loss: 0.9580172896385193,grad_norm: 0.9999992964527441, iteration: 99446
loss: 1.0037027597427368,grad_norm: 0.9067337764722171, iteration: 99447
loss: 1.019161343574524,grad_norm: 0.9999991908292033, iteration: 99448
loss: 0.9668973088264465,grad_norm: 0.9999991670222934, iteration: 99449
loss: 1.005859613418579,grad_norm: 0.8319337118121883, iteration: 99450
loss: 1.0275815725326538,grad_norm: 0.8643714619308291, iteration: 99451
loss: 1.0087512731552124,grad_norm: 0.812643534636208, iteration: 99452
loss: 1.0335761308670044,grad_norm: 0.9999991757142839, iteration: 99453
loss: 1.008337378501892,grad_norm: 0.9020901189150214, iteration: 99454
loss: 0.9819879531860352,grad_norm: 0.9999991913131095, iteration: 99455
loss: 0.9641367197036743,grad_norm: 0.999999265609533, iteration: 99456
loss: 1.0289862155914307,grad_norm: 0.9999990914502433, iteration: 99457
loss: 0.9733820557594299,grad_norm: 0.9999991813808655, iteration: 99458
loss: 0.9880697131156921,grad_norm: 0.9983984274472676, iteration: 99459
loss: 1.0658787488937378,grad_norm: 0.9999999361415592, iteration: 99460
loss: 1.0011436939239502,grad_norm: 0.9999994559728268, iteration: 99461
loss: 0.9928540587425232,grad_norm: 0.9434000268341367, iteration: 99462
loss: 1.0023722648620605,grad_norm: 0.9808556970123972, iteration: 99463
loss: 1.004353642463684,grad_norm: 0.9999991571562243, iteration: 99464
loss: 0.978416919708252,grad_norm: 0.9999991367802389, iteration: 99465
loss: 0.9960743188858032,grad_norm: 0.8783049760188303, iteration: 99466
loss: 1.001193642616272,grad_norm: 0.9797770733889538, iteration: 99467
loss: 0.9763659834861755,grad_norm: 0.9999999496622057, iteration: 99468
loss: 1.0082147121429443,grad_norm: 0.9999990329676257, iteration: 99469
loss: 0.9911928176879883,grad_norm: 0.9117273193268307, iteration: 99470
loss: 0.992864191532135,grad_norm: 0.9999993733081611, iteration: 99471
loss: 1.0098856687545776,grad_norm: 0.9999992218656224, iteration: 99472
loss: 1.007955551147461,grad_norm: 0.9999992796674497, iteration: 99473
loss: 0.9858860373497009,grad_norm: 0.9999990568830993, iteration: 99474
loss: 0.9951307773590088,grad_norm: 0.999999164146811, iteration: 99475
loss: 0.9939399361610413,grad_norm: 0.9702071543314259, iteration: 99476
loss: 1.0178170204162598,grad_norm: 0.9076249661924607, iteration: 99477
loss: 0.9823407530784607,grad_norm: 0.9999993996996892, iteration: 99478
loss: 0.9996671676635742,grad_norm: 0.9999995407049507, iteration: 99479
loss: 1.0275137424468994,grad_norm: 0.9999991884447632, iteration: 99480
loss: 0.9965934157371521,grad_norm: 0.9784650927451358, iteration: 99481
loss: 1.007553219795227,grad_norm: 0.9999990614566935, iteration: 99482
loss: 0.9758481979370117,grad_norm: 0.9999991909360217, iteration: 99483
loss: 1.0110599994659424,grad_norm: 0.9999991669649121, iteration: 99484
loss: 1.0166884660720825,grad_norm: 0.9999991251913776, iteration: 99485
loss: 0.9921172857284546,grad_norm: 0.9999992310316108, iteration: 99486
loss: 1.0231294631958008,grad_norm: 0.9999992213372031, iteration: 99487
loss: 1.0280909538269043,grad_norm: 0.9954804080753945, iteration: 99488
loss: 1.0166449546813965,grad_norm: 0.8619947623319665, iteration: 99489
loss: 1.0053514242172241,grad_norm: 0.9999991491162076, iteration: 99490
loss: 1.0379960536956787,grad_norm: 0.9999991974792809, iteration: 99491
loss: 0.9572564959526062,grad_norm: 0.9999991900852493, iteration: 99492
loss: 0.9732679128646851,grad_norm: 0.999999321720559, iteration: 99493
loss: 1.0165972709655762,grad_norm: 0.945329034985867, iteration: 99494
loss: 0.9963315725326538,grad_norm: 0.9999990725302565, iteration: 99495
loss: 1.1058670282363892,grad_norm: 0.999999252514672, iteration: 99496
loss: 0.9672727584838867,grad_norm: 0.9581150704453885, iteration: 99497
loss: 0.9676344990730286,grad_norm: 0.9834420199271318, iteration: 99498
loss: 1.0094592571258545,grad_norm: 0.8083850639114114, iteration: 99499
loss: 1.0060049295425415,grad_norm: 0.9999990408972312, iteration: 99500
loss: 1.0260193347930908,grad_norm: 0.9999991639824402, iteration: 99501
loss: 1.0101351737976074,grad_norm: 0.9952648401049703, iteration: 99502
loss: 1.0079307556152344,grad_norm: 0.9999991092254193, iteration: 99503
loss: 0.9760988354682922,grad_norm: 0.9999992561854572, iteration: 99504
loss: 1.00357985496521,grad_norm: 0.9999996099932342, iteration: 99505
loss: 0.9877384305000305,grad_norm: 0.9999989666674151, iteration: 99506
loss: 0.9978712201118469,grad_norm: 0.999999050786876, iteration: 99507
loss: 0.9820854663848877,grad_norm: 0.9999992181030013, iteration: 99508
loss: 0.9794579744338989,grad_norm: 0.9999991953777854, iteration: 99509
loss: 0.9683421850204468,grad_norm: 0.999999115296375, iteration: 99510
loss: 1.0113378763198853,grad_norm: 0.9999990722826586, iteration: 99511
loss: 0.979714035987854,grad_norm: 0.9999990573943157, iteration: 99512
loss: 0.9901073575019836,grad_norm: 0.9999993417862825, iteration: 99513
loss: 1.0345842838287354,grad_norm: 0.9999992666204822, iteration: 99514
loss: 0.993306040763855,grad_norm: 0.9217845511168691, iteration: 99515
loss: 0.9854834675788879,grad_norm: 0.9999991481439323, iteration: 99516
loss: 1.0201945304870605,grad_norm: 0.9820456849477003, iteration: 99517
loss: 0.9835255146026611,grad_norm: 0.9999992153328807, iteration: 99518
loss: 1.0491620302200317,grad_norm: 0.9999992561916671, iteration: 99519
loss: 1.0360698699951172,grad_norm: 0.9603205464591359, iteration: 99520
loss: 1.0027331113815308,grad_norm: 0.9120380193472355, iteration: 99521
loss: 0.9867244362831116,grad_norm: 0.819852182722524, iteration: 99522
loss: 0.9766391515731812,grad_norm: 0.9563779828274256, iteration: 99523
loss: 0.9824472069740295,grad_norm: 0.9559303871808112, iteration: 99524
loss: 0.995480477809906,grad_norm: 0.9999989819513653, iteration: 99525
loss: 1.0016229152679443,grad_norm: 0.9999990374795236, iteration: 99526
loss: 0.9935855269432068,grad_norm: 0.999999085287409, iteration: 99527
loss: 0.9978393316268921,grad_norm: 0.9999997099640673, iteration: 99528
loss: 1.0436012744903564,grad_norm: 0.9999990952925059, iteration: 99529
loss: 1.0269298553466797,grad_norm: 0.999999165687498, iteration: 99530
loss: 1.0093201398849487,grad_norm: 0.9273689508090597, iteration: 99531
loss: 1.025429368019104,grad_norm: 0.9999991585642909, iteration: 99532
loss: 0.9959456324577332,grad_norm: 0.7936568550903011, iteration: 99533
loss: 0.9996522068977356,grad_norm: 0.9713609986538658, iteration: 99534
loss: 1.0208978652954102,grad_norm: 0.9999996529012306, iteration: 99535
loss: 0.9869392514228821,grad_norm: 0.9895243426003537, iteration: 99536
loss: 1.0389041900634766,grad_norm: 0.9999992001201615, iteration: 99537
loss: 1.0045878887176514,grad_norm: 0.9422985479500373, iteration: 99538
loss: 0.9943562150001526,grad_norm: 0.9999992000907427, iteration: 99539
loss: 1.0104551315307617,grad_norm: 0.9999991718706748, iteration: 99540
loss: 0.9791043400764465,grad_norm: 0.9079748786236457, iteration: 99541
loss: 1.0154743194580078,grad_norm: 0.9999989288075035, iteration: 99542
loss: 1.0165616273880005,grad_norm: 0.9969082279907516, iteration: 99543
loss: 1.0048638582229614,grad_norm: 0.9999990254559626, iteration: 99544
loss: 1.010446310043335,grad_norm: 0.9312273576250494, iteration: 99545
loss: 0.9978363513946533,grad_norm: 0.9999992460755425, iteration: 99546
loss: 0.9808275699615479,grad_norm: 0.9999991029106547, iteration: 99547
loss: 1.009117603302002,grad_norm: 0.9613189596464936, iteration: 99548
loss: 1.0029760599136353,grad_norm: 0.9999992663472014, iteration: 99549
loss: 0.98653244972229,grad_norm: 0.9999990802461538, iteration: 99550
loss: 1.066162109375,grad_norm: 0.9999992447782888, iteration: 99551
loss: 0.9575315117835999,grad_norm: 0.9999991505000554, iteration: 99552
loss: 0.9765474796295166,grad_norm: 0.9181270840098342, iteration: 99553
loss: 1.0284072160720825,grad_norm: 0.9070071958634143, iteration: 99554
loss: 1.0139154195785522,grad_norm: 0.9999991014466466, iteration: 99555
loss: 1.0423022508621216,grad_norm: 0.9999995128343596, iteration: 99556
loss: 0.9934731125831604,grad_norm: 0.999999109174847, iteration: 99557
loss: 0.9944430589675903,grad_norm: 0.9999991632979286, iteration: 99558
loss: 1.0106571912765503,grad_norm: 0.957852949744632, iteration: 99559
loss: 0.9535266160964966,grad_norm: 0.9999992243748543, iteration: 99560
loss: 0.9498307704925537,grad_norm: 0.9999990575555288, iteration: 99561
loss: 1.0059469938278198,grad_norm: 0.953482934394655, iteration: 99562
loss: 1.0031869411468506,grad_norm: 0.9999990468594694, iteration: 99563
loss: 1.0354710817337036,grad_norm: 0.9999993057307285, iteration: 99564
loss: 1.030295491218567,grad_norm: 0.9999992109364754, iteration: 99565
loss: 1.008649468421936,grad_norm: 0.99999898280845, iteration: 99566
loss: 0.9692148566246033,grad_norm: 0.9999992926917491, iteration: 99567
loss: 0.9902703166007996,grad_norm: 0.9140877796519848, iteration: 99568
loss: 1.0242410898208618,grad_norm: 0.85291703935905, iteration: 99569
loss: 1.0318539142608643,grad_norm: 0.9282899736094959, iteration: 99570
loss: 0.9977059364318848,grad_norm: 0.9999990184847339, iteration: 99571
loss: 0.9618803262710571,grad_norm: 0.8341139639343264, iteration: 99572
loss: 0.9867724180221558,grad_norm: 0.822612321535033, iteration: 99573
loss: 0.9891427755355835,grad_norm: 0.9999991236317739, iteration: 99574
loss: 0.9702422022819519,grad_norm: 0.9999991473842529, iteration: 99575
loss: 1.0389379262924194,grad_norm: 0.9796304210967529, iteration: 99576
loss: 0.9840635061264038,grad_norm: 0.9119900581950109, iteration: 99577
loss: 0.9903388023376465,grad_norm: 0.999999175149137, iteration: 99578
loss: 1.0263899564743042,grad_norm: 0.9999990900913797, iteration: 99579
loss: 1.008613109588623,grad_norm: 0.9999990240944163, iteration: 99580
loss: 0.9860408306121826,grad_norm: 0.8601157746109485, iteration: 99581
loss: 1.010534405708313,grad_norm: 0.8880918562758522, iteration: 99582
loss: 1.0613453388214111,grad_norm: 0.9999995641823268, iteration: 99583
loss: 1.0024164915084839,grad_norm: 0.9999992130550578, iteration: 99584
loss: 0.9929701089859009,grad_norm: 0.9999992848727527, iteration: 99585
loss: 0.9858289361000061,grad_norm: 0.8906999456797435, iteration: 99586
loss: 0.997134268283844,grad_norm: 0.9999991811353316, iteration: 99587
loss: 0.9813281893730164,grad_norm: 0.9738788278078463, iteration: 99588
loss: 0.9461641907691956,grad_norm: 0.9394476297456129, iteration: 99589
loss: 0.9801439046859741,grad_norm: 0.8753519633524867, iteration: 99590
loss: 1.0360888242721558,grad_norm: 0.9999992238664591, iteration: 99591
loss: 1.014578938484192,grad_norm: 0.9999991498953142, iteration: 99592
loss: 1.0101596117019653,grad_norm: 0.9999992692036564, iteration: 99593
loss: 0.9610812664031982,grad_norm: 0.9999991344183423, iteration: 99594
loss: 0.9887803792953491,grad_norm: 0.9213268093992368, iteration: 99595
loss: 0.9727296829223633,grad_norm: 0.9999992098490438, iteration: 99596
loss: 0.9533283710479736,grad_norm: 0.999999186633531, iteration: 99597
loss: 1.0165642499923706,grad_norm: 0.9093854148860362, iteration: 99598
loss: 1.0231982469558716,grad_norm: 0.9999989485252362, iteration: 99599
loss: 1.0293800830841064,grad_norm: 0.9999991981550252, iteration: 99600
loss: 0.9818190932273865,grad_norm: 0.999999125968014, iteration: 99601
loss: 1.0199944972991943,grad_norm: 0.9675222943611074, iteration: 99602
loss: 1.0354076623916626,grad_norm: 0.9999992105492728, iteration: 99603
loss: 1.033004879951477,grad_norm: 0.9246241852510076, iteration: 99604
loss: 1.014937400817871,grad_norm: 0.9999994199682288, iteration: 99605
loss: 0.9750145673751831,grad_norm: 0.9999990584094626, iteration: 99606
loss: 1.0372214317321777,grad_norm: 0.9999997432427049, iteration: 99607
loss: 1.004099726676941,grad_norm: 0.7998283115641505, iteration: 99608
loss: 1.0322388410568237,grad_norm: 0.9022758218990775, iteration: 99609
loss: 1.0022684335708618,grad_norm: 0.9999992204420964, iteration: 99610
loss: 0.9856209754943848,grad_norm: 0.9999990809494188, iteration: 99611
loss: 0.9716982245445251,grad_norm: 0.9726955441294615, iteration: 99612
loss: 1.0452570915222168,grad_norm: 0.9999991832092631, iteration: 99613
loss: 0.9832324981689453,grad_norm: 0.9233955041516695, iteration: 99614
loss: 1.0345033407211304,grad_norm: 0.9999991932357049, iteration: 99615
loss: 1.041462779045105,grad_norm: 0.9999992923062306, iteration: 99616
loss: 1.0113154649734497,grad_norm: 0.99999920767751, iteration: 99617
loss: 0.9819556474685669,grad_norm: 0.9321037298871324, iteration: 99618
loss: 1.0116771459579468,grad_norm: 0.9999992150349538, iteration: 99619
loss: 0.9858748912811279,grad_norm: 0.9999992405796984, iteration: 99620
loss: 1.0055861473083496,grad_norm: 0.9663432407607043, iteration: 99621
loss: 0.9703949689865112,grad_norm: 0.9999990365166223, iteration: 99622
loss: 0.9918943643569946,grad_norm: 0.9572542575638001, iteration: 99623
loss: 1.0246024131774902,grad_norm: 0.9999996355958575, iteration: 99624
loss: 0.9849507808685303,grad_norm: 0.9999993367750392, iteration: 99625
loss: 0.9765524864196777,grad_norm: 0.9999990781103762, iteration: 99626
loss: 1.0349795818328857,grad_norm: 0.8747363949628694, iteration: 99627
loss: 1.0150104761123657,grad_norm: 0.9999990737718453, iteration: 99628
loss: 1.0190876722335815,grad_norm: 0.9999991513672426, iteration: 99629
loss: 1.0244141817092896,grad_norm: 0.9778517801793962, iteration: 99630
loss: 0.9889628887176514,grad_norm: 0.9999990633219072, iteration: 99631
loss: 0.9876732230186462,grad_norm: 0.855762897327577, iteration: 99632
loss: 1.0129057168960571,grad_norm: 0.9999991865895665, iteration: 99633
loss: 1.0546317100524902,grad_norm: 0.9121848075692106, iteration: 99634
loss: 0.9984140992164612,grad_norm: 0.999998974215553, iteration: 99635
loss: 1.002625584602356,grad_norm: 0.999999204235508, iteration: 99636
loss: 1.0009764432907104,grad_norm: 0.9999991113348836, iteration: 99637
loss: 0.9887363910675049,grad_norm: 0.9999990618610793, iteration: 99638
loss: 1.0149672031402588,grad_norm: 0.9999992077306683, iteration: 99639
loss: 0.9474782943725586,grad_norm: 0.9877723867491022, iteration: 99640
loss: 1.0188426971435547,grad_norm: 0.9999992117576672, iteration: 99641
loss: 1.0111600160598755,grad_norm: 0.9999990154985025, iteration: 99642
loss: 1.025272011756897,grad_norm: 0.999999224901862, iteration: 99643
loss: 1.005469799041748,grad_norm: 0.999999188207474, iteration: 99644
loss: 1.0084508657455444,grad_norm: 0.9145019819221322, iteration: 99645
loss: 0.9894424080848694,grad_norm: 0.9999992207274896, iteration: 99646
loss: 1.0080997943878174,grad_norm: 0.9113209147029193, iteration: 99647
loss: 0.9757984280586243,grad_norm: 0.9999990010814813, iteration: 99648
loss: 0.9884346127510071,grad_norm: 0.9999992452279935, iteration: 99649
loss: 1.007909893989563,grad_norm: 0.8388859303437719, iteration: 99650
loss: 1.0160515308380127,grad_norm: 0.9999990660038169, iteration: 99651
loss: 1.0031791925430298,grad_norm: 0.9999990496670556, iteration: 99652
loss: 1.0101158618927002,grad_norm: 0.9999990437017086, iteration: 99653
loss: 1.0381298065185547,grad_norm: 0.9999992135253013, iteration: 99654
loss: 1.0012633800506592,grad_norm: 0.9999992042303627, iteration: 99655
loss: 1.0067038536071777,grad_norm: 0.9999992673636853, iteration: 99656
loss: 0.9752176403999329,grad_norm: 0.8604214785023809, iteration: 99657
loss: 1.0236157178878784,grad_norm: 0.9687591926851268, iteration: 99658
loss: 1.0027347803115845,grad_norm: 0.9999991617738764, iteration: 99659
loss: 1.005990982055664,grad_norm: 0.9565881319438266, iteration: 99660
loss: 1.000826358795166,grad_norm: 0.9999993418555015, iteration: 99661
loss: 1.0075665712356567,grad_norm: 0.924747464865579, iteration: 99662
loss: 1.0273550748825073,grad_norm: 0.9999992105709534, iteration: 99663
loss: 0.940686821937561,grad_norm: 0.9392264846934616, iteration: 99664
loss: 1.0399078130722046,grad_norm: 0.9999992407491867, iteration: 99665
loss: 0.9413993954658508,grad_norm: 0.9861640503883422, iteration: 99666
loss: 1.0247431993484497,grad_norm: 0.999999197145962, iteration: 99667
loss: 0.9972896575927734,grad_norm: 0.9999991512234502, iteration: 99668
loss: 0.9941390752792358,grad_norm: 0.999999174439327, iteration: 99669
loss: 1.0480632781982422,grad_norm: 0.9999990910472475, iteration: 99670
loss: 1.0395129919052124,grad_norm: 0.9999991755410077, iteration: 99671
loss: 1.015487551689148,grad_norm: 0.8005165843655648, iteration: 99672
loss: 0.9890745282173157,grad_norm: 0.9999990314568702, iteration: 99673
loss: 0.9534905552864075,grad_norm: 0.9354046441592109, iteration: 99674
loss: 0.986099123954773,grad_norm: 0.8920246712818942, iteration: 99675
loss: 0.9864128232002258,grad_norm: 0.9999991446403792, iteration: 99676
loss: 1.0073851346969604,grad_norm: 0.9999993341514254, iteration: 99677
loss: 1.0213050842285156,grad_norm: 0.9463418728577073, iteration: 99678
loss: 1.002394199371338,grad_norm: 0.9594328543511572, iteration: 99679
loss: 0.9896736741065979,grad_norm: 0.9999990450775254, iteration: 99680
loss: 1.0048012733459473,grad_norm: 0.9999991079958388, iteration: 99681
loss: 0.9973008036613464,grad_norm: 0.9999992054264728, iteration: 99682
loss: 0.97318434715271,grad_norm: 0.9811023091010896, iteration: 99683
loss: 1.023488998413086,grad_norm: 0.9085234770730332, iteration: 99684
loss: 0.9788550734519958,grad_norm: 0.9580989125087387, iteration: 99685
loss: 1.0106793642044067,grad_norm: 0.9999988665787193, iteration: 99686
loss: 1.0295830965042114,grad_norm: 0.9999990332098643, iteration: 99687
loss: 0.9790216088294983,grad_norm: 0.999999246681939, iteration: 99688
loss: 0.9541976451873779,grad_norm: 0.9999990487797157, iteration: 99689
loss: 1.0158665180206299,grad_norm: 0.9999989888639772, iteration: 99690
loss: 1.0137746334075928,grad_norm: 0.9999992279552291, iteration: 99691
loss: 0.9956450462341309,grad_norm: 0.9571811253900395, iteration: 99692
loss: 1.0151468515396118,grad_norm: 0.9999991228836609, iteration: 99693
loss: 1.0097265243530273,grad_norm: 0.944825716616617, iteration: 99694
loss: 0.9936593770980835,grad_norm: 0.9999990460166783, iteration: 99695
loss: 1.0287240743637085,grad_norm: 0.9999993205059796, iteration: 99696
loss: 1.0100409984588623,grad_norm: 0.8466195861568043, iteration: 99697
loss: 1.024328351020813,grad_norm: 0.9819451162771643, iteration: 99698
loss: 0.9535003304481506,grad_norm: 0.9999990433775174, iteration: 99699
loss: 1.0186576843261719,grad_norm: 0.973059235546912, iteration: 99700
loss: 1.0064692497253418,grad_norm: 0.9999990844439278, iteration: 99701
loss: 1.0048677921295166,grad_norm: 0.999999116153091, iteration: 99702
loss: 1.0089236497879028,grad_norm: 0.9999990895103131, iteration: 99703
loss: 0.9606148600578308,grad_norm: 0.9376429570824903, iteration: 99704
loss: 0.9909349679946899,grad_norm: 0.9918176822368998, iteration: 99705
loss: 0.9986703991889954,grad_norm: 0.9500286537078025, iteration: 99706
loss: 1.0318799018859863,grad_norm: 0.9999990890701923, iteration: 99707
loss: 1.0168691873550415,grad_norm: 0.992175710041818, iteration: 99708
loss: 0.9977906346321106,grad_norm: 0.9806548047651686, iteration: 99709
loss: 1.0042970180511475,grad_norm: 0.9999993526561165, iteration: 99710
loss: 1.0187079906463623,grad_norm: 0.9771772460891911, iteration: 99711
loss: 0.9961625933647156,grad_norm: 0.9639512262275174, iteration: 99712
loss: 1.0280842781066895,grad_norm: 0.9999993538584592, iteration: 99713
loss: 0.9845519065856934,grad_norm: 0.9999992813202254, iteration: 99714
loss: 1.0185128450393677,grad_norm: 0.9999992086178995, iteration: 99715
loss: 0.9609772562980652,grad_norm: 0.9999991498181109, iteration: 99716
loss: 1.0050934553146362,grad_norm: 0.9999991098644463, iteration: 99717
loss: 0.9906898140907288,grad_norm: 0.9760498929190551, iteration: 99718
loss: 1.092795729637146,grad_norm: 0.9999996096160554, iteration: 99719
loss: 1.0377259254455566,grad_norm: 0.9218493120811793, iteration: 99720
loss: 1.0198055505752563,grad_norm: 0.9999991070807506, iteration: 99721
loss: 0.9816523790359497,grad_norm: 0.9374296316495471, iteration: 99722
loss: 1.0272059440612793,grad_norm: 0.9999991327641297, iteration: 99723
loss: 0.981322169303894,grad_norm: 0.9999991132279794, iteration: 99724
loss: 0.999957799911499,grad_norm: 0.995466309532377, iteration: 99725
loss: 0.9711641073226929,grad_norm: 0.9999990554714813, iteration: 99726
loss: 0.997406542301178,grad_norm: 0.9999990792767882, iteration: 99727
loss: 0.9569250345230103,grad_norm: 0.9999991515082268, iteration: 99728
loss: 1.042431116104126,grad_norm: 0.9999996909732419, iteration: 99729
loss: 0.9969834685325623,grad_norm: 0.9999991273337693, iteration: 99730
loss: 0.9895637631416321,grad_norm: 0.9248530182878734, iteration: 99731
loss: 0.9794032573699951,grad_norm: 0.9909665392103616, iteration: 99732
loss: 1.0073713064193726,grad_norm: 0.999999213454434, iteration: 99733
loss: 0.9828593730926514,grad_norm: 0.9999991542581415, iteration: 99734
loss: 1.0250803232192993,grad_norm: 0.999999183538235, iteration: 99735
loss: 0.9719035625457764,grad_norm: 0.9999991616369293, iteration: 99736
loss: 1.0415678024291992,grad_norm: 0.9999997850154584, iteration: 99737
loss: 0.9869565963745117,grad_norm: 0.9999989052737521, iteration: 99738
loss: 1.0213773250579834,grad_norm: 0.9999999204714116, iteration: 99739
loss: 0.9969316124916077,grad_norm: 0.9801393996321943, iteration: 99740
loss: 0.9952884912490845,grad_norm: 0.9999991576276052, iteration: 99741
loss: 1.0385860204696655,grad_norm: 0.999999168210285, iteration: 99742
loss: 1.008481502532959,grad_norm: 0.9999990368170526, iteration: 99743
loss: 0.9996098875999451,grad_norm: 0.9999991711244728, iteration: 99744
loss: 1.0212464332580566,grad_norm: 0.9591650766413699, iteration: 99745
loss: 0.9733633399009705,grad_norm: 0.9999991601086723, iteration: 99746
loss: 1.000816822052002,grad_norm: 0.9999992748915434, iteration: 99747
loss: 0.9714975357055664,grad_norm: 0.9999989028799205, iteration: 99748
loss: 0.979063868522644,grad_norm: 0.9999992020121635, iteration: 99749
loss: 0.9884598851203918,grad_norm: 0.9999992013562097, iteration: 99750
loss: 1.0180604457855225,grad_norm: 0.9999991500116006, iteration: 99751
loss: 0.9932829141616821,grad_norm: 0.9786694635074026, iteration: 99752
loss: 1.0065898895263672,grad_norm: 0.9438543815021503, iteration: 99753
loss: 1.0176399946212769,grad_norm: 0.81603088363487, iteration: 99754
loss: 0.9940009713172913,grad_norm: 0.9058457238758858, iteration: 99755
loss: 0.9534100294113159,grad_norm: 0.9999990321942555, iteration: 99756
loss: 0.9899277687072754,grad_norm: 0.9738285506803752, iteration: 99757
loss: 0.986903727054596,grad_norm: 0.9999991947121077, iteration: 99758
loss: 1.0041112899780273,grad_norm: 0.9422649590279389, iteration: 99759
loss: 0.9727852940559387,grad_norm: 0.963062627575332, iteration: 99760
loss: 0.9906579256057739,grad_norm: 0.7907249712529897, iteration: 99761
loss: 1.0139228105545044,grad_norm: 0.9999989512560556, iteration: 99762
loss: 1.0133849382400513,grad_norm: 0.980292584171711, iteration: 99763
loss: 0.9924988150596619,grad_norm: 0.8694768600885541, iteration: 99764
loss: 0.9987680315971375,grad_norm: 0.9200845224693209, iteration: 99765
loss: 1.006149411201477,grad_norm: 0.8944094196321445, iteration: 99766
loss: 0.977281928062439,grad_norm: 0.999999055567525, iteration: 99767
loss: 1.046283483505249,grad_norm: 0.9999990368150191, iteration: 99768
loss: 0.9941912293434143,grad_norm: 0.9740363709190005, iteration: 99769
loss: 1.0584636926651,grad_norm: 0.9999992337248935, iteration: 99770
loss: 0.9817456007003784,grad_norm: 0.9999990479911218, iteration: 99771
loss: 0.9952183961868286,grad_norm: 0.999999044070463, iteration: 99772
loss: 0.9593010544776917,grad_norm: 0.9106479417396278, iteration: 99773
loss: 0.9932920932769775,grad_norm: 0.99999916994447, iteration: 99774
loss: 1.0224072933197021,grad_norm: 0.9909619047131056, iteration: 99775
loss: 0.9958964586257935,grad_norm: 0.9068584133463752, iteration: 99776
loss: 0.9901351928710938,grad_norm: 0.9999992020845435, iteration: 99777
loss: 0.9999592304229736,grad_norm: 0.9999990996803234, iteration: 99778
loss: 1.0150865316390991,grad_norm: 0.9999991996166069, iteration: 99779
loss: 0.9871397018432617,grad_norm: 0.9999992092657327, iteration: 99780
loss: 0.9964509010314941,grad_norm: 0.9999991641064954, iteration: 99781
loss: 1.000719428062439,grad_norm: 0.9290341364484744, iteration: 99782
loss: 0.9869922399520874,grad_norm: 0.9833523025382497, iteration: 99783
loss: 0.9745928645133972,grad_norm: 0.8720996816824038, iteration: 99784
loss: 1.0107102394104004,grad_norm: 0.9999990402712422, iteration: 99785
loss: 0.985399067401886,grad_norm: 0.9999989987581817, iteration: 99786
loss: 1.0111459493637085,grad_norm: 0.9999991484892675, iteration: 99787
loss: 1.0168801546096802,grad_norm: 0.9999990158438102, iteration: 99788
loss: 1.008634090423584,grad_norm: 0.8469667136181533, iteration: 99789
loss: 1.0065886974334717,grad_norm: 0.9999990705098653, iteration: 99790
loss: 0.9542973041534424,grad_norm: 0.9318672867312237, iteration: 99791
loss: 0.9705095291137695,grad_norm: 0.9719149214758591, iteration: 99792
loss: 1.01957106590271,grad_norm: 0.9248294304288598, iteration: 99793
loss: 1.0407062768936157,grad_norm: 0.9999998122195066, iteration: 99794
loss: 1.0211964845657349,grad_norm: 0.9999992104647669, iteration: 99795
loss: 1.0130363702774048,grad_norm: 0.9999992749019009, iteration: 99796
loss: 1.0021196603775024,grad_norm: 0.8403380608804742, iteration: 99797
loss: 0.9946643710136414,grad_norm: 0.9999991560580872, iteration: 99798
loss: 1.0216158628463745,grad_norm: 0.9999990772335593, iteration: 99799
loss: 1.0372984409332275,grad_norm: 0.9842149106628115, iteration: 99800
loss: 1.0037096738815308,grad_norm: 0.898505961609872, iteration: 99801
loss: 0.9898306727409363,grad_norm: 0.9999990656728736, iteration: 99802
loss: 0.9630327820777893,grad_norm: 0.9999991862824751, iteration: 99803
loss: 0.9703040719032288,grad_norm: 0.9999990127621874, iteration: 99804
loss: 0.9892162084579468,grad_norm: 0.9999990991932957, iteration: 99805
loss: 0.9931477904319763,grad_norm: 0.9999990223662119, iteration: 99806
loss: 1.0132548809051514,grad_norm: 0.9956495839748116, iteration: 99807
loss: 1.0005723237991333,grad_norm: 0.9999993053305005, iteration: 99808
loss: 0.9824665784835815,grad_norm: 0.9356147532724108, iteration: 99809
loss: 1.0101791620254517,grad_norm: 0.9389539277178218, iteration: 99810
loss: 0.9646507501602173,grad_norm: 0.9999991590974416, iteration: 99811
loss: 1.0048407316207886,grad_norm: 0.999999058522944, iteration: 99812
loss: 0.9557328820228577,grad_norm: 0.9999993469340603, iteration: 99813
loss: 0.94482421875,grad_norm: 0.9833420843004543, iteration: 99814
loss: 0.9770187139511108,grad_norm: 0.9344142141989836, iteration: 99815
loss: 0.9935089349746704,grad_norm: 0.9557912078290005, iteration: 99816
loss: 0.9758867621421814,grad_norm: 0.9999992148609279, iteration: 99817
loss: 1.0198622941970825,grad_norm: 0.9674386307793564, iteration: 99818
loss: 1.0029546022415161,grad_norm: 0.9999991902707882, iteration: 99819
loss: 0.9872015118598938,grad_norm: 0.882850919123239, iteration: 99820
loss: 1.0129644870758057,grad_norm: 0.9269453270649078, iteration: 99821
loss: 1.014347791671753,grad_norm: 0.9174217450345611, iteration: 99822
loss: 1.011365294456482,grad_norm: 0.9999991489422919, iteration: 99823
loss: 1.0115361213684082,grad_norm: 0.9999990875383746, iteration: 99824
loss: 0.9896235466003418,grad_norm: 0.9999990638262943, iteration: 99825
loss: 1.0313429832458496,grad_norm: 0.9999991433532376, iteration: 99826
loss: 1.0110398530960083,grad_norm: 0.9907862612788504, iteration: 99827
loss: 1.0344511270523071,grad_norm: 0.9999990363335656, iteration: 99828
loss: 0.9620401859283447,grad_norm: 0.9999989769263198, iteration: 99829
loss: 0.9829639196395874,grad_norm: 0.9999993041625762, iteration: 99830
loss: 0.980747640132904,grad_norm: 0.9999991716590678, iteration: 99831
loss: 1.0013229846954346,grad_norm: 0.9999992348865259, iteration: 99832
loss: 0.9909160137176514,grad_norm: 0.9257092519259387, iteration: 99833
loss: 1.0057703256607056,grad_norm: 0.9999993025812784, iteration: 99834
loss: 0.9726647138595581,grad_norm: 0.8993798306512358, iteration: 99835
loss: 0.9837324619293213,grad_norm: 0.999999064857415, iteration: 99836
loss: 0.9656081795692444,grad_norm: 0.9999991279199046, iteration: 99837
loss: 0.9730826616287231,grad_norm: 0.9999989519688397, iteration: 99838
loss: 0.9973564743995667,grad_norm: 0.8978294929049709, iteration: 99839
loss: 1.0468239784240723,grad_norm: 0.9999991954989467, iteration: 99840
loss: 1.0164058208465576,grad_norm: 0.9454908777799236, iteration: 99841
loss: 0.9760580062866211,grad_norm: 0.9999991715494003, iteration: 99842
loss: 0.9846420288085938,grad_norm: 0.9999991623450325, iteration: 99843
loss: 1.0001364946365356,grad_norm: 0.9999991104575451, iteration: 99844
loss: 1.0083951950073242,grad_norm: 0.9999990443753841, iteration: 99845
loss: 0.9289889335632324,grad_norm: 0.9688212997822991, iteration: 99846
loss: 0.9943429827690125,grad_norm: 0.9999996991871576, iteration: 99847
loss: 1.0300688743591309,grad_norm: 0.999998936629284, iteration: 99848
loss: 1.0054378509521484,grad_norm: 0.9999991245140873, iteration: 99849
loss: 1.0020982027053833,grad_norm: 0.9048185369569529, iteration: 99850
loss: 1.0007919073104858,grad_norm: 0.8860180646584709, iteration: 99851
loss: 1.0044245719909668,grad_norm: 0.9999989563687341, iteration: 99852
loss: 1.0294241905212402,grad_norm: 0.9999992429581572, iteration: 99853
loss: 0.97797030210495,grad_norm: 0.9256385450831031, iteration: 99854
loss: 0.9866015911102295,grad_norm: 0.9762067070227656, iteration: 99855
loss: 1.017107605934143,grad_norm: 0.8989756747495145, iteration: 99856
loss: 1.0249830484390259,grad_norm: 0.9999995303378821, iteration: 99857
loss: 1.0093857049942017,grad_norm: 0.9999990289259761, iteration: 99858
loss: 0.9823523163795471,grad_norm: 0.9999991238514646, iteration: 99859
loss: 0.9913313984870911,grad_norm: 0.999999060158995, iteration: 99860
loss: 1.1068414449691772,grad_norm: 0.9999993936115997, iteration: 99861
loss: 0.9951449632644653,grad_norm: 0.9774357762349178, iteration: 99862
loss: 0.9950129389762878,grad_norm: 0.9215606818883472, iteration: 99863
loss: 0.9711228013038635,grad_norm: 0.9999989992552102, iteration: 99864
loss: 0.9998844265937805,grad_norm: 0.9999989650678796, iteration: 99865
loss: 1.0491245985031128,grad_norm: 0.9999994436123585, iteration: 99866
loss: 0.9988700151443481,grad_norm: 0.9687231184576599, iteration: 99867
loss: 1.0440750122070312,grad_norm: 0.9464776600871713, iteration: 99868
loss: 1.033400535583496,grad_norm: 0.9999991238412964, iteration: 99869
loss: 1.01993727684021,grad_norm: 0.9015912426898492, iteration: 99870
loss: 0.977932333946228,grad_norm: 0.9999992188867651, iteration: 99871
loss: 0.9596717953681946,grad_norm: 0.9999991793699078, iteration: 99872
loss: 0.9856705665588379,grad_norm: 0.9999989984687014, iteration: 99873
loss: 0.9837190508842468,grad_norm: 0.9284205691116503, iteration: 99874
loss: 0.9903550744056702,grad_norm: 0.977616611705588, iteration: 99875
loss: 1.006533145904541,grad_norm: 0.9999991868344352, iteration: 99876
loss: 0.9623218178749084,grad_norm: 0.999999189492979, iteration: 99877
loss: 0.9884060025215149,grad_norm: 0.8605439736885683, iteration: 99878
loss: 1.0276645421981812,grad_norm: 0.9999994630842374, iteration: 99879
loss: 1.0299433469772339,grad_norm: 0.999999095016112, iteration: 99880
loss: 1.011036992073059,grad_norm: 0.9999993239820962, iteration: 99881
loss: 1.0006414651870728,grad_norm: 0.9281859390645075, iteration: 99882
loss: 0.9840801954269409,grad_norm: 0.9866948301878493, iteration: 99883
loss: 1.0154999494552612,grad_norm: 0.9679580149899402, iteration: 99884
loss: 0.975376307964325,grad_norm: 0.86074771097266, iteration: 99885
loss: 0.988202691078186,grad_norm: 0.8751431329247374, iteration: 99886
loss: 1.0427141189575195,grad_norm: 0.9999998818771365, iteration: 99887
loss: 1.006549596786499,grad_norm: 0.999999077327392, iteration: 99888
loss: 0.9824031591415405,grad_norm: 0.999999085104118, iteration: 99889
loss: 1.0267168283462524,grad_norm: 0.9352851380424146, iteration: 99890
loss: 1.0400712490081787,grad_norm: 0.9999990894673645, iteration: 99891
loss: 0.9877609014511108,grad_norm: 0.9999990120077579, iteration: 99892
loss: 1.016571044921875,grad_norm: 0.9999991467171581, iteration: 99893
loss: 0.9773359298706055,grad_norm: 0.9999992124733762, iteration: 99894
loss: 0.9771178960800171,grad_norm: 0.9911177602162757, iteration: 99895
loss: 0.9888870120048523,grad_norm: 0.9019927311918932, iteration: 99896
loss: 0.9711933732032776,grad_norm: 0.9999990105053358, iteration: 99897
loss: 1.0243513584136963,grad_norm: 0.9999990861633936, iteration: 99898
loss: 0.975682258605957,grad_norm: 0.9999991652321005, iteration: 99899
loss: 1.026044249534607,grad_norm: 0.9999990544600181, iteration: 99900
loss: 1.0026123523712158,grad_norm: 0.9999990415514013, iteration: 99901
loss: 0.9880185723304749,grad_norm: 0.9999992966377138, iteration: 99902
loss: 0.9776521921157837,grad_norm: 0.933954129567302, iteration: 99903
loss: 1.0351471900939941,grad_norm: 0.9999991440107752, iteration: 99904
loss: 0.9851921796798706,grad_norm: 0.9999991726147033, iteration: 99905
loss: 1.0351076126098633,grad_norm: 0.9999991915592273, iteration: 99906
loss: 1.0166715383529663,grad_norm: 0.9999990410541532, iteration: 99907
loss: 1.0135831832885742,grad_norm: 0.9999990108765273, iteration: 99908
loss: 1.0189963579177856,grad_norm: 0.9999992076840512, iteration: 99909
loss: 0.9543992877006531,grad_norm: 0.9999992185069367, iteration: 99910
loss: 0.9894365072250366,grad_norm: 0.9317133882906815, iteration: 99911
loss: 1.0318493843078613,grad_norm: 0.9999999296326567, iteration: 99912
loss: 1.027719497680664,grad_norm: 0.9999991431631676, iteration: 99913
loss: 1.0232902765274048,grad_norm: 0.9999991949929703, iteration: 99914
loss: 0.9993940591812134,grad_norm: 0.9999991754429646, iteration: 99915
loss: 0.9851423501968384,grad_norm: 0.9380866112491533, iteration: 99916
loss: 0.971838116645813,grad_norm: 0.999999541860899, iteration: 99917
loss: 0.9992051720619202,grad_norm: 0.9999991198453383, iteration: 99918
loss: 0.998529851436615,grad_norm: 0.9999991497206044, iteration: 99919
loss: 0.9748883843421936,grad_norm: 0.9019367507158289, iteration: 99920
loss: 0.9727150201797485,grad_norm: 0.9999990207884354, iteration: 99921
loss: 0.9946177005767822,grad_norm: 0.9999992672314626, iteration: 99922
loss: 0.9832417964935303,grad_norm: 0.9061554911686924, iteration: 99923
loss: 0.9903853535652161,grad_norm: 0.9999991214527566, iteration: 99924
loss: 0.9914069771766663,grad_norm: 0.9528170680961222, iteration: 99925
loss: 1.0034862756729126,grad_norm: 0.9999991195728866, iteration: 99926
loss: 1.0034624338150024,grad_norm: 0.9999990355371303, iteration: 99927
loss: 1.0147851705551147,grad_norm: 0.9999993793365126, iteration: 99928
loss: 0.9946308135986328,grad_norm: 0.9999990979415075, iteration: 99929
loss: 0.9638225436210632,grad_norm: 0.9928412618802435, iteration: 99930
loss: 1.0180706977844238,grad_norm: 0.9999989958229394, iteration: 99931
loss: 1.009371280670166,grad_norm: 0.9999992772593141, iteration: 99932
loss: 0.9803739786148071,grad_norm: 0.9999991441950709, iteration: 99933
loss: 1.0226119756698608,grad_norm: 0.9964456124282746, iteration: 99934
loss: 1.0171456336975098,grad_norm: 0.9876005467874098, iteration: 99935
loss: 0.992340624332428,grad_norm: 0.934607375069214, iteration: 99936
loss: 0.978375256061554,grad_norm: 0.8362705270800002, iteration: 99937
loss: 1.0126370191574097,grad_norm: 0.9999995966584531, iteration: 99938
loss: 0.998004674911499,grad_norm: 0.99999919602382, iteration: 99939
loss: 1.049859881401062,grad_norm: 0.9999990442437382, iteration: 99940
loss: 0.99552983045578,grad_norm: 0.9999991810128884, iteration: 99941
loss: 1.0291036367416382,grad_norm: 0.9999990297734987, iteration: 99942
loss: 0.994655966758728,grad_norm: 0.9999991416918279, iteration: 99943
loss: 0.9796062707901001,grad_norm: 0.9999991978200801, iteration: 99944
loss: 0.9860131740570068,grad_norm: 0.999999178685292, iteration: 99945
loss: 0.9844223260879517,grad_norm: 0.999999115592348, iteration: 99946
loss: 0.9655660390853882,grad_norm: 0.9557953594335881, iteration: 99947
loss: 0.9840638637542725,grad_norm: 0.8256023126056343, iteration: 99948
loss: 0.9554522633552551,grad_norm: 0.9999989821281884, iteration: 99949
loss: 0.9839540123939514,grad_norm: 0.9999992619927738, iteration: 99950
loss: 1.0034618377685547,grad_norm: 0.9999989968091478, iteration: 99951
loss: 1.0187959671020508,grad_norm: 0.9999990543196506, iteration: 99952
loss: 0.9863060116767883,grad_norm: 0.9999991594174085, iteration: 99953
loss: 1.0189944505691528,grad_norm: 0.9999991523704447, iteration: 99954
loss: 1.0040156841278076,grad_norm: 0.872833498905112, iteration: 99955
loss: 0.9945787191390991,grad_norm: 0.9999991774749901, iteration: 99956
loss: 1.003217339515686,grad_norm: 0.9999992659638752, iteration: 99957
loss: 0.9976351857185364,grad_norm: 0.9848916044636588, iteration: 99958
loss: 0.989958643913269,grad_norm: 0.9999991295950315, iteration: 99959
loss: 1.0549784898757935,grad_norm: 0.9999991694130511, iteration: 99960
loss: 1.0223816633224487,grad_norm: 0.9999992281513628, iteration: 99961
loss: 0.9781301617622375,grad_norm: 0.9999991653283576, iteration: 99962
loss: 1.0401660203933716,grad_norm: 0.9253718030392308, iteration: 99963
loss: 0.9818946123123169,grad_norm: 0.999998981231249, iteration: 99964
loss: 0.9706321954727173,grad_norm: 0.9388358288000096, iteration: 99965
loss: 1.006080985069275,grad_norm: 0.9649820092007566, iteration: 99966
loss: 0.9668610095977783,grad_norm: 0.9519769698517578, iteration: 99967
loss: 1.0107581615447998,grad_norm: 0.9625521874732786, iteration: 99968
loss: 1.009482979774475,grad_norm: 0.9999989646701513, iteration: 99969
loss: 1.03578782081604,grad_norm: 0.8085251153812821, iteration: 99970
loss: 0.9848265051841736,grad_norm: 0.9806984520533565, iteration: 99971
loss: 0.9889637231826782,grad_norm: 0.9856185236499546, iteration: 99972
loss: 1.0156502723693848,grad_norm: 0.9999991901172004, iteration: 99973
loss: 1.0013988018035889,grad_norm: 0.9639810801207433, iteration: 99974
loss: 0.988548994064331,grad_norm: 0.9999991077066607, iteration: 99975
loss: 0.9698997735977173,grad_norm: 0.9999992391825363, iteration: 99976
loss: 1.021422266960144,grad_norm: 0.9553518694259219, iteration: 99977
loss: 0.9878873229026794,grad_norm: 0.9999991821748032, iteration: 99978
loss: 0.9705761075019836,grad_norm: 0.9999993103492545, iteration: 99979
loss: 1.0367437601089478,grad_norm: 0.9999992237587052, iteration: 99980
loss: 0.9793495535850525,grad_norm: 0.9999990132011131, iteration: 99981
loss: 1.0141464471817017,grad_norm: 0.9710649496334222, iteration: 99982
loss: 0.9911620020866394,grad_norm: 0.8155060187463739, iteration: 99983
loss: 0.994355320930481,grad_norm: 0.9999991695342396, iteration: 99984
loss: 1.0019025802612305,grad_norm: 0.9999991005227931, iteration: 99985
loss: 1.0116477012634277,grad_norm: 0.9999989268939841, iteration: 99986
loss: 1.0052708387374878,grad_norm: 0.9999991509952256, iteration: 99987
loss: 1.0222970247268677,grad_norm: 0.9109230176451533, iteration: 99988
loss: 1.007393717765808,grad_norm: 0.9585870412807672, iteration: 99989
loss: 1.0239861011505127,grad_norm: 0.9833638831440048, iteration: 99990
loss: 1.0382918119430542,grad_norm: 0.999999035969077, iteration: 99991
loss: 1.0194028615951538,grad_norm: 0.8304017765063889, iteration: 99992
loss: 0.9998434782028198,grad_norm: 0.9999991941631867, iteration: 99993
loss: 1.037073016166687,grad_norm: 0.9999991932361074, iteration: 99994
loss: 0.987479567527771,grad_norm: 0.9999989997426169, iteration: 99995
loss: 0.9844464063644409,grad_norm: 0.9999990277685888, iteration: 99996
loss: 1.0084418058395386,grad_norm: 0.9369949970513928, iteration: 99997
loss: 0.9797036647796631,grad_norm: 0.9842725394899234, iteration: 99998
loss: 1.0075467824935913,grad_norm: 0.9999990743656596, iteration: 99999
loss: 0.9987297654151917,grad_norm: 0.9999990491372511, iteration: 100000
Evaluating at step 100000
{'val': 0.9947670493274927, 'test': 2.3568780742133386}
loss: 0.9697540998458862,grad_norm: 0.8776827073548056, iteration: 100001
loss: 0.9758906364440918,grad_norm: 0.9999990221008829, iteration: 100002
loss: 0.9756128191947937,grad_norm: 0.9999992705058303, iteration: 100003
loss: 0.9932296872138977,grad_norm: 0.9999991457691382, iteration: 100004
loss: 1.0312529802322388,grad_norm: 0.9999989797954684, iteration: 100005
loss: 1.0242843627929688,grad_norm: 0.9046691072623222, iteration: 100006
loss: 0.997592031955719,grad_norm: 0.8020329786254377, iteration: 100007
loss: 0.9824527502059937,grad_norm: 0.9433471329747432, iteration: 100008
loss: 1.0249470472335815,grad_norm: 0.9999992166683974, iteration: 100009
loss: 0.9995794892311096,grad_norm: 0.9882439316663576, iteration: 100010
loss: 0.9957879781723022,grad_norm: 0.999999096114605, iteration: 100011
loss: 0.9929065704345703,grad_norm: 0.9999991369331692, iteration: 100012
loss: 0.9891824126243591,grad_norm: 0.9185615112220775, iteration: 100013
loss: 1.074696660041809,grad_norm: 0.9999993083970975, iteration: 100014
loss: 0.9768206477165222,grad_norm: 0.9999991305700691, iteration: 100015
loss: 1.0017703771591187,grad_norm: 0.9999996941139844, iteration: 100016
loss: 1.0087825059890747,grad_norm: 0.9623099437901715, iteration: 100017
loss: 1.0209189653396606,grad_norm: 0.9999991897557284, iteration: 100018
loss: 1.0062637329101562,grad_norm: 0.9999990537807264, iteration: 100019
loss: 0.9821515083312988,grad_norm: 0.9885189908115675, iteration: 100020
loss: 1.0249292850494385,grad_norm: 0.9999991669235795, iteration: 100021
loss: 0.991546094417572,grad_norm: 0.999999570213302, iteration: 100022
loss: 0.9895809888839722,grad_norm: 0.9999990629048818, iteration: 100023
loss: 0.9941601157188416,grad_norm: 0.9795686953226757, iteration: 100024
loss: 1.0075137615203857,grad_norm: 0.9756298389790464, iteration: 100025
loss: 0.979347288608551,grad_norm: 0.944288160450235, iteration: 100026
loss: 1.0361789464950562,grad_norm: 0.9999989353719684, iteration: 100027
loss: 0.9738051295280457,grad_norm: 0.9999992275110422, iteration: 100028
loss: 0.9943072199821472,grad_norm: 0.9682450751755391, iteration: 100029
loss: 0.9886301159858704,grad_norm: 0.9999991797536941, iteration: 100030
loss: 1.0177433490753174,grad_norm: 0.999999198069642, iteration: 100031
loss: 0.9826002717018127,grad_norm: 0.9678985498610126, iteration: 100032
loss: 1.0049740076065063,grad_norm: 0.9999992103304057, iteration: 100033
loss: 1.0772807598114014,grad_norm: 0.9999992651145592, iteration: 100034
loss: 0.9780452251434326,grad_norm: 0.9999992039290245, iteration: 100035
loss: 1.0081820487976074,grad_norm: 0.9655818550443078, iteration: 100036
loss: 1.0108603239059448,grad_norm: 0.9999991586226316, iteration: 100037
loss: 1.020554780960083,grad_norm: 0.8455596633470853, iteration: 100038
loss: 0.9838148355484009,grad_norm: 0.9903862176869058, iteration: 100039
loss: 1.0382466316223145,grad_norm: 0.9999989833969924, iteration: 100040
loss: 0.9838167428970337,grad_norm: 0.9999991314016504, iteration: 100041
loss: 1.0411584377288818,grad_norm: 0.9999991762122187, iteration: 100042
loss: 1.014928936958313,grad_norm: 0.9999991325966744, iteration: 100043
loss: 1.0341449975967407,grad_norm: 0.9999990478401352, iteration: 100044
loss: 0.9852719902992249,grad_norm: 0.854502552112864, iteration: 100045
loss: 0.991872251033783,grad_norm: 0.9999989646536734, iteration: 100046
loss: 1.00513756275177,grad_norm: 0.927073296350462, iteration: 100047
loss: 0.9713197946548462,grad_norm: 0.9999990410152346, iteration: 100048
loss: 0.9931954145431519,grad_norm: 0.9016232833315438, iteration: 100049
loss: 0.9784227609634399,grad_norm: 0.9999990027620537, iteration: 100050
loss: 0.9764111638069153,grad_norm: 0.9537574130232748, iteration: 100051
loss: 0.970806896686554,grad_norm: 0.9999990448512274, iteration: 100052
loss: 1.0256797075271606,grad_norm: 0.9999991704020399, iteration: 100053
loss: 1.0248242616653442,grad_norm: 0.9588962028059166, iteration: 100054
loss: 0.9574910998344421,grad_norm: 0.9257236649586704, iteration: 100055
loss: 0.9703834652900696,grad_norm: 0.9999992077844981, iteration: 100056
loss: 1.0129910707473755,grad_norm: 0.9269357546226189, iteration: 100057
loss: 1.001238465309143,grad_norm: 0.9025564343428146, iteration: 100058
loss: 1.0093672275543213,grad_norm: 0.9397528286656712, iteration: 100059
loss: 0.9714044332504272,grad_norm: 0.923005502419114, iteration: 100060
loss: 1.0196418762207031,grad_norm: 0.9747989846734303, iteration: 100061
loss: 0.9998921751976013,grad_norm: 0.9999992594238145, iteration: 100062
loss: 0.9942725896835327,grad_norm: 0.9999990770328385, iteration: 100063
loss: 1.0135700702667236,grad_norm: 0.9691274745927333, iteration: 100064
loss: 1.0142723321914673,grad_norm: 0.9999991410496789, iteration: 100065
loss: 0.9817980527877808,grad_norm: 0.9999991512768337, iteration: 100066
loss: 0.9892930388450623,grad_norm: 0.954208558554461, iteration: 100067
loss: 1.0031756162643433,grad_norm: 0.999999234667761, iteration: 100068
loss: 1.0122222900390625,grad_norm: 0.9546447079019003, iteration: 100069
loss: 0.9685832858085632,grad_norm: 0.9456112687207981, iteration: 100070
loss: 0.9961841106414795,grad_norm: 0.9999991866058594, iteration: 100071
loss: 0.9948803782463074,grad_norm: 0.9999993211999763, iteration: 100072
loss: 0.9930319786071777,grad_norm: 0.9999990578956414, iteration: 100073
loss: 0.9823122620582581,grad_norm: 0.9999992204996813, iteration: 100074
loss: 0.978875458240509,grad_norm: 0.9999990991820242, iteration: 100075
loss: 1.0488287210464478,grad_norm: 0.9999996561103881, iteration: 100076
loss: 0.9812945127487183,grad_norm: 0.8683630779687234, iteration: 100077
loss: 0.9803542494773865,grad_norm: 0.9999991817681616, iteration: 100078
loss: 0.9971246123313904,grad_norm: 0.9999989698706061, iteration: 100079
loss: 0.9709404706954956,grad_norm: 0.9999993606790347, iteration: 100080
loss: 0.9931550025939941,grad_norm: 0.8135188394553351, iteration: 100081
loss: 0.9993229508399963,grad_norm: 0.9999990146227135, iteration: 100082
loss: 1.0284053087234497,grad_norm: 0.9835541931840721, iteration: 100083
loss: 0.9616527557373047,grad_norm: 0.9999989701715293, iteration: 100084
loss: 0.961206316947937,grad_norm: 0.985127773177884, iteration: 100085
loss: 1.0238195657730103,grad_norm: 0.9999990820171392, iteration: 100086
loss: 0.9919688701629639,grad_norm: 0.999999268652321, iteration: 100087
loss: 1.0032556056976318,grad_norm: 0.8807078204144174, iteration: 100088
loss: 1.0079816579818726,grad_norm: 0.9999993071665527, iteration: 100089
loss: 0.995063066482544,grad_norm: 0.8638140683719517, iteration: 100090
loss: 0.9781932234764099,grad_norm: 0.9999990122702349, iteration: 100091
loss: 1.0156638622283936,grad_norm: 0.9226629221743351, iteration: 100092
loss: 0.9950659871101379,grad_norm: 0.9401255142242068, iteration: 100093
loss: 1.008201003074646,grad_norm: 0.9999990770984477, iteration: 100094
loss: 1.0130888223648071,grad_norm: 0.9999993090879074, iteration: 100095
loss: 1.0489089488983154,grad_norm: 0.9999991754209334, iteration: 100096
loss: 1.0616041421890259,grad_norm: 0.9999990496388985, iteration: 100097
loss: 0.9879850745201111,grad_norm: 0.9999991237269268, iteration: 100098
loss: 0.9629318118095398,grad_norm: 0.9999991439366679, iteration: 100099
loss: 1.1061896085739136,grad_norm: 0.9999994094358765, iteration: 100100
loss: 1.014739990234375,grad_norm: 0.9483686523684739, iteration: 100101
loss: 1.030035376548767,grad_norm: 0.9999991595260086, iteration: 100102
loss: 1.0654665231704712,grad_norm: 0.9999992073760107, iteration: 100103
loss: 0.9733470678329468,grad_norm: 0.9999992065024027, iteration: 100104
loss: 1.021365761756897,grad_norm: 0.9999990367010064, iteration: 100105
loss: 0.9637651443481445,grad_norm: 0.9999992026500688, iteration: 100106
loss: 0.9815771579742432,grad_norm: 0.9999991974990562, iteration: 100107
loss: 1.0000513792037964,grad_norm: 0.9816110242058265, iteration: 100108
loss: 0.9770461320877075,grad_norm: 0.9999990226163561, iteration: 100109
loss: 1.0057247877120972,grad_norm: 0.9999990430618383, iteration: 100110
loss: 1.0508350133895874,grad_norm: 0.999999045424888, iteration: 100111
loss: 1.0072462558746338,grad_norm: 0.9999990591032112, iteration: 100112
loss: 1.0281546115875244,grad_norm: 0.9999989948278228, iteration: 100113
loss: 1.0148916244506836,grad_norm: 0.941386776076436, iteration: 100114
loss: 0.9951991438865662,grad_norm: 0.9999991578943748, iteration: 100115
loss: 0.972957968711853,grad_norm: 0.9999991115632019, iteration: 100116
loss: 0.9918321371078491,grad_norm: 0.9999991914479475, iteration: 100117
loss: 0.9934791922569275,grad_norm: 0.9052461881962556, iteration: 100118
loss: 1.0001475811004639,grad_norm: 0.9999990827347212, iteration: 100119
loss: 0.9875109195709229,grad_norm: 0.9999991937462981, iteration: 100120
loss: 1.0319278240203857,grad_norm: 0.9999993677770651, iteration: 100121
loss: 1.0155891180038452,grad_norm: 0.9999991583736685, iteration: 100122
loss: 1.0234601497650146,grad_norm: 0.9999992983259451, iteration: 100123
loss: 1.0158417224884033,grad_norm: 0.9806504024295583, iteration: 100124
loss: 1.0171452760696411,grad_norm: 0.9750062631431965, iteration: 100125
loss: 0.9390339851379395,grad_norm: 0.999999031648653, iteration: 100126
loss: 0.9761366248130798,grad_norm: 0.9999991225954962, iteration: 100127
loss: 0.9754684567451477,grad_norm: 0.9999990568300636, iteration: 100128
loss: 0.9956551194190979,grad_norm: 0.8606381768635886, iteration: 100129
loss: 1.014733910560608,grad_norm: 0.9999988320443575, iteration: 100130
loss: 0.9978635907173157,grad_norm: 0.9999991175947576, iteration: 100131
loss: 0.9972043037414551,grad_norm: 0.9094297670390377, iteration: 100132
loss: 0.9981889724731445,grad_norm: 0.9999991074908663, iteration: 100133
loss: 0.9651282429695129,grad_norm: 0.965617645696228, iteration: 100134
loss: 0.9984219074249268,grad_norm: 0.9999990394146125, iteration: 100135
loss: 1.0297324657440186,grad_norm: 0.9999991780238997, iteration: 100136
loss: 0.9661509394645691,grad_norm: 0.9941947120789466, iteration: 100137
loss: 1.038352131843567,grad_norm: 0.999999126668033, iteration: 100138
loss: 0.9824317097663879,grad_norm: 0.9999990352607089, iteration: 100139
loss: 0.976952850818634,grad_norm: 0.9999990702746887, iteration: 100140
loss: 0.9782335162162781,grad_norm: 0.9343439812847885, iteration: 100141
loss: 0.9904935359954834,grad_norm: 0.9239196907969919, iteration: 100142
loss: 1.015526294708252,grad_norm: 0.9999990434685748, iteration: 100143
loss: 1.0654767751693726,grad_norm: 0.9999990935215249, iteration: 100144
loss: 1.0081363916397095,grad_norm: 0.999999225017101, iteration: 100145
loss: 0.9762497544288635,grad_norm: 0.9589072455312212, iteration: 100146
loss: 0.993584930896759,grad_norm: 0.9999990967506942, iteration: 100147
loss: 1.0309983491897583,grad_norm: 0.9585379979258548, iteration: 100148
loss: 1.025464653968811,grad_norm: 0.9999998462451818, iteration: 100149
loss: 0.987917959690094,grad_norm: 0.9999990941537149, iteration: 100150
loss: 1.030768871307373,grad_norm: 0.9999990963891311, iteration: 100151
loss: 0.9785193204879761,grad_norm: 0.9999990024565361, iteration: 100152
loss: 1.007531762123108,grad_norm: 0.9999991632662819, iteration: 100153
loss: 1.0209577083587646,grad_norm: 0.9999991308785177, iteration: 100154
loss: 0.9679696559906006,grad_norm: 0.9999990145575167, iteration: 100155
loss: 1.0100212097167969,grad_norm: 0.9402497664141553, iteration: 100156
loss: 1.0112040042877197,grad_norm: 0.9999991408915498, iteration: 100157
loss: 1.0047903060913086,grad_norm: 0.8858739326232271, iteration: 100158
loss: 0.992652416229248,grad_norm: 0.9999990826541547, iteration: 100159
loss: 1.0433859825134277,grad_norm: 0.9999996045536244, iteration: 100160
loss: 0.9765467643737793,grad_norm: 0.942283938023452, iteration: 100161
loss: 1.0257279872894287,grad_norm: 0.9999990394054196, iteration: 100162
loss: 1.00642991065979,grad_norm: 0.9999993206112665, iteration: 100163
loss: 0.9683666229248047,grad_norm: 0.989282879917105, iteration: 100164
loss: 1.0134999752044678,grad_norm: 0.9999992872418169, iteration: 100165
loss: 0.9895594120025635,grad_norm: 0.9119840089460121, iteration: 100166
loss: 0.9908782243728638,grad_norm: 0.8819665297404048, iteration: 100167
loss: 0.986095666885376,grad_norm: 0.982186241962647, iteration: 100168
loss: 0.9867497682571411,grad_norm: 0.9999992469982713, iteration: 100169
loss: 0.9890320897102356,grad_norm: 0.9999992175267519, iteration: 100170
loss: 1.0194518566131592,grad_norm: 0.9999998726398532, iteration: 100171
loss: 0.955927848815918,grad_norm: 0.9456100736151474, iteration: 100172
loss: 0.9963797330856323,grad_norm: 0.99852600910836, iteration: 100173
loss: 0.9652381539344788,grad_norm: 0.9999991957824228, iteration: 100174
loss: 1.0441644191741943,grad_norm: 0.8331987090076262, iteration: 100175
loss: 1.0219706296920776,grad_norm: 0.9999991130394561, iteration: 100176
loss: 0.9352924823760986,grad_norm: 0.9999991276082463, iteration: 100177
loss: 1.0079909563064575,grad_norm: 0.9999990920334678, iteration: 100178
loss: 1.0208383798599243,grad_norm: 0.9999991762225839, iteration: 100179
loss: 1.0003963708877563,grad_norm: 0.9999991113996717, iteration: 100180
loss: 0.9852505326271057,grad_norm: 0.999999238595323, iteration: 100181
loss: 1.0074093341827393,grad_norm: 0.9999990520266192, iteration: 100182
loss: 0.978726327419281,grad_norm: 0.8656509960401046, iteration: 100183
loss: 0.9868372678756714,grad_norm: 0.9838933066927554, iteration: 100184
loss: 0.9899387955665588,grad_norm: 0.9999992200351422, iteration: 100185
loss: 0.9780889749526978,grad_norm: 0.999999048374321, iteration: 100186
loss: 0.9861647486686707,grad_norm: 0.9327084442134826, iteration: 100187
loss: 0.9895032048225403,grad_norm: 0.9988150050042311, iteration: 100188
loss: 1.0056337118148804,grad_norm: 0.9999990152410136, iteration: 100189
loss: 0.9969048500061035,grad_norm: 0.9840686760735765, iteration: 100190
loss: 0.9734407663345337,grad_norm: 0.9441043304817422, iteration: 100191
loss: 0.9987290501594543,grad_norm: 0.9442582698425764, iteration: 100192
loss: 1.0395718812942505,grad_norm: 0.9999990868326275, iteration: 100193
loss: 1.0013784170150757,grad_norm: 0.9861833793259703, iteration: 100194
loss: 0.9928424954414368,grad_norm: 0.9999992119508727, iteration: 100195
loss: 0.9774124026298523,grad_norm: 0.9999990378844656, iteration: 100196
loss: 1.0106109380722046,grad_norm: 0.9645341073327349, iteration: 100197
loss: 0.9923585653305054,grad_norm: 0.8803732066384437, iteration: 100198
loss: 1.0324994325637817,grad_norm: 0.9999990572773314, iteration: 100199
loss: 0.9940504431724548,grad_norm: 0.9999990302171968, iteration: 100200
loss: 1.0247759819030762,grad_norm: 0.9999992014163873, iteration: 100201
loss: 0.9861130714416504,grad_norm: 0.8836569602908498, iteration: 100202
loss: 0.9983550310134888,grad_norm: 0.9999992527241145, iteration: 100203
loss: 0.9953257441520691,grad_norm: 0.9999991021661887, iteration: 100204
loss: 0.9826647639274597,grad_norm: 0.9999991258497981, iteration: 100205
loss: 0.9982960224151611,grad_norm: 0.9999991910220456, iteration: 100206
loss: 1.0092856884002686,grad_norm: 0.9999990302149359, iteration: 100207
loss: 0.9623836278915405,grad_norm: 0.9999990041594397, iteration: 100208
loss: 0.9810795783996582,grad_norm: 0.9999991317076833, iteration: 100209
loss: 0.976300835609436,grad_norm: 0.7593206894525281, iteration: 100210
loss: 0.9865072965621948,grad_norm: 0.9999992229889894, iteration: 100211
loss: 0.9791129231452942,grad_norm: 0.9795369754778532, iteration: 100212
loss: 1.0335402488708496,grad_norm: 0.9999990440134631, iteration: 100213
loss: 0.9930989742279053,grad_norm: 0.9999991746482914, iteration: 100214
loss: 0.9967057108879089,grad_norm: 0.9913503556090761, iteration: 100215
loss: 1.0042036771774292,grad_norm: 0.9999991463620931, iteration: 100216
loss: 0.9703441262245178,grad_norm: 0.9999991408773824, iteration: 100217
loss: 1.0131869316101074,grad_norm: 0.9999992593449549, iteration: 100218
loss: 0.9970603585243225,grad_norm: 0.9999993692847456, iteration: 100219
loss: 0.9895965456962585,grad_norm: 0.9999990710992589, iteration: 100220
loss: 1.0006393194198608,grad_norm: 0.9999990665277277, iteration: 100221
loss: 0.9899691343307495,grad_norm: 0.9999992789855261, iteration: 100222
loss: 0.9805776476860046,grad_norm: 0.9191291901730491, iteration: 100223
loss: 0.9845626950263977,grad_norm: 0.9999990479247394, iteration: 100224
loss: 0.9938451647758484,grad_norm: 0.9999992402384201, iteration: 100225
loss: 1.0175474882125854,grad_norm: 0.999999211025987, iteration: 100226
loss: 0.9794594049453735,grad_norm: 0.9999990012393741, iteration: 100227
loss: 0.969407856464386,grad_norm: 0.8854251337320509, iteration: 100228
loss: 1.0140964984893799,grad_norm: 0.9999990388007214, iteration: 100229
loss: 1.0139590501785278,grad_norm: 0.9999997827855418, iteration: 100230
loss: 1.0483852624893188,grad_norm: 0.9999988251189991, iteration: 100231
loss: 1.0008000135421753,grad_norm: 0.9941411911050425, iteration: 100232
loss: 0.9799553155899048,grad_norm: 0.9999992903138941, iteration: 100233
loss: 1.00199556350708,grad_norm: 0.9999990528112728, iteration: 100234
loss: 0.9930917620658875,grad_norm: 0.913899617025669, iteration: 100235
loss: 0.985352098941803,grad_norm: 0.9999991452024288, iteration: 100236
loss: 1.0178006887435913,grad_norm: 0.951303331731927, iteration: 100237
loss: 0.9808606505393982,grad_norm: 0.9999990458442042, iteration: 100238
loss: 0.9847113490104675,grad_norm: 0.9926987057669971, iteration: 100239
loss: 1.0121046304702759,grad_norm: 0.8942233575369116, iteration: 100240
loss: 1.0299816131591797,grad_norm: 0.9999992224746848, iteration: 100241
loss: 1.041054368019104,grad_norm: 0.9999997482802596, iteration: 100242
loss: 0.9674127697944641,grad_norm: 0.9999991556045816, iteration: 100243
loss: 1.003737211227417,grad_norm: 0.9999993781612991, iteration: 100244
loss: 1.0181100368499756,grad_norm: 0.9999990881257725, iteration: 100245
loss: 0.9982052445411682,grad_norm: 0.9999990364995177, iteration: 100246
loss: 1.0057061910629272,grad_norm: 0.9999991955788546, iteration: 100247
loss: 1.000075340270996,grad_norm: 0.9999990828042089, iteration: 100248
loss: 0.9967973232269287,grad_norm: 0.9982143972008584, iteration: 100249
loss: 0.9752788543701172,grad_norm: 0.9999991266562843, iteration: 100250
loss: 0.9707374572753906,grad_norm: 0.9999991338954024, iteration: 100251
loss: 0.9985977411270142,grad_norm: 0.9264612915996786, iteration: 100252
loss: 0.9895148873329163,grad_norm: 0.9999996379034669, iteration: 100253
loss: 1.0005031824111938,grad_norm: 0.8969317735093599, iteration: 100254
loss: 0.9610841274261475,grad_norm: 0.9999990795587249, iteration: 100255
loss: 0.9780842065811157,grad_norm: 0.9194028628879075, iteration: 100256
loss: 1.035722017288208,grad_norm: 0.9999991332709683, iteration: 100257
loss: 1.0066717863082886,grad_norm: 0.9724794972069608, iteration: 100258
loss: 0.9926273226737976,grad_norm: 0.9999992652489059, iteration: 100259
loss: 0.9705930948257446,grad_norm: 0.9999991415574041, iteration: 100260
loss: 0.9729132652282715,grad_norm: 0.9467951190315155, iteration: 100261
loss: 1.0109763145446777,grad_norm: 0.8629655717923341, iteration: 100262
loss: 0.9758808612823486,grad_norm: 0.9446685142798285, iteration: 100263
loss: 1.0022534132003784,grad_norm: 0.9999991838506482, iteration: 100264
loss: 1.008731484413147,grad_norm: 0.9894449994907797, iteration: 100265
loss: 0.9867534041404724,grad_norm: 0.9999992515792231, iteration: 100266
loss: 1.0247621536254883,grad_norm: 0.9999996701645263, iteration: 100267
loss: 1.0012428760528564,grad_norm: 0.9999990938214496, iteration: 100268
loss: 1.020167589187622,grad_norm: 0.9999990163327401, iteration: 100269
loss: 1.0043630599975586,grad_norm: 0.9999996293036222, iteration: 100270
loss: 1.0180563926696777,grad_norm: 0.9999998134597121, iteration: 100271
loss: 1.0079675912857056,grad_norm: 0.9999991980709502, iteration: 100272
loss: 1.0423223972320557,grad_norm: 0.9756754227323339, iteration: 100273
loss: 0.955745279788971,grad_norm: 0.9268748095072756, iteration: 100274
loss: 0.9799632430076599,grad_norm: 0.999999237794878, iteration: 100275
loss: 0.9923865795135498,grad_norm: 0.9999990972187062, iteration: 100276
loss: 1.0006167888641357,grad_norm: 0.9893645313736376, iteration: 100277
loss: 0.9657644629478455,grad_norm: 0.9999990728322137, iteration: 100278
loss: 0.9821152687072754,grad_norm: 0.9121856988149616, iteration: 100279
loss: 0.9998334050178528,grad_norm: 0.9999991446404479, iteration: 100280
loss: 1.0018844604492188,grad_norm: 0.9912251558071569, iteration: 100281
loss: 1.0253993272781372,grad_norm: 0.9999991444456029, iteration: 100282
loss: 1.0085865259170532,grad_norm: 0.999999317032142, iteration: 100283
loss: 0.9592710733413696,grad_norm: 0.9642541048952178, iteration: 100284
loss: 1.002273440361023,grad_norm: 0.9999992200757952, iteration: 100285
loss: 1.0141509771347046,grad_norm: 0.9999992116284646, iteration: 100286
loss: 0.9586133360862732,grad_norm: 0.9999990783359153, iteration: 100287
loss: 0.9726153612136841,grad_norm: 0.9572112744919982, iteration: 100288
loss: 0.9744365811347961,grad_norm: 0.9999990195325064, iteration: 100289
loss: 1.0216325521469116,grad_norm: 0.9999991922626247, iteration: 100290
loss: 1.0276678800582886,grad_norm: 0.8291135544518368, iteration: 100291
loss: 0.9907756447792053,grad_norm: 0.9999991602531789, iteration: 100292
loss: 1.0537621974945068,grad_norm: 0.9999997303646263, iteration: 100293
loss: 0.9997093081474304,grad_norm: 0.999999220728352, iteration: 100294
loss: 1.0040267705917358,grad_norm: 0.9999990156302716, iteration: 100295
loss: 1.0225508213043213,grad_norm: 0.9999990994161295, iteration: 100296
loss: 1.023520827293396,grad_norm: 0.9999991383993708, iteration: 100297
loss: 1.0055491924285889,grad_norm: 0.9384609007059187, iteration: 100298
loss: 1.0200947523117065,grad_norm: 0.9989075052643797, iteration: 100299
loss: 1.0056718587875366,grad_norm: 0.8647239722086311, iteration: 100300
loss: 0.9816523194313049,grad_norm: 0.9011595438934176, iteration: 100301
loss: 0.9838865399360657,grad_norm: 0.9416941349813635, iteration: 100302
loss: 1.0489575862884521,grad_norm: 0.8590127092171399, iteration: 100303
loss: 1.0051499605178833,grad_norm: 0.9669635866685201, iteration: 100304
loss: 1.0033155679702759,grad_norm: 0.9999989735106987, iteration: 100305
loss: 1.0129166841506958,grad_norm: 0.9999994669296415, iteration: 100306
loss: 0.978778064250946,grad_norm: 0.9999992658132879, iteration: 100307
loss: 1.012682318687439,grad_norm: 0.8902654342592866, iteration: 100308
loss: 0.9998109936714172,grad_norm: 0.9999991429473307, iteration: 100309
loss: 0.982191801071167,grad_norm: 0.9006111551903652, iteration: 100310
loss: 1.001373529434204,grad_norm: 0.908833393156826, iteration: 100311
loss: 1.0373362302780151,grad_norm: 0.9999992392051279, iteration: 100312
loss: 0.9962360858917236,grad_norm: 0.8947023606969715, iteration: 100313
loss: 0.9791979193687439,grad_norm: 0.999999421112117, iteration: 100314
loss: 1.0291742086410522,grad_norm: 0.9999993105909054, iteration: 100315
loss: 1.008266806602478,grad_norm: 0.9916857704023706, iteration: 100316
loss: 1.016697645187378,grad_norm: 0.9999992106870372, iteration: 100317
loss: 0.9431809186935425,grad_norm: 0.999999106518404, iteration: 100318
loss: 1.0266220569610596,grad_norm: 0.9999992734275912, iteration: 100319
loss: 1.019017219543457,grad_norm: 0.9999991029469284, iteration: 100320
loss: 1.0153769254684448,grad_norm: 0.9373714926087084, iteration: 100321
loss: 0.9740882515907288,grad_norm: 0.9999990497775691, iteration: 100322
loss: 0.9907287359237671,grad_norm: 0.9999991415864948, iteration: 100323
loss: 1.0390889644622803,grad_norm: 0.8569874335478677, iteration: 100324
loss: 1.0103991031646729,grad_norm: 0.9833723887299204, iteration: 100325
loss: 1.0265249013900757,grad_norm: 0.9999990835959153, iteration: 100326
loss: 1.021254539489746,grad_norm: 0.9999988732522599, iteration: 100327
loss: 0.9992724061012268,grad_norm: 0.9842971338589157, iteration: 100328
loss: 1.0001070499420166,grad_norm: 0.9730094221515917, iteration: 100329
loss: 1.0010437965393066,grad_norm: 0.9898326574444114, iteration: 100330
loss: 0.9829374551773071,grad_norm: 0.9788273265545908, iteration: 100331
loss: 0.9907127618789673,grad_norm: 0.9999991880148074, iteration: 100332
loss: 1.0271159410476685,grad_norm: 0.9999992404645163, iteration: 100333
loss: 0.994615912437439,grad_norm: 0.934643589118887, iteration: 100334
loss: 0.9960938096046448,grad_norm: 0.9999992205390068, iteration: 100335
loss: 1.0040292739868164,grad_norm: 0.9999990068129868, iteration: 100336
loss: 1.0235729217529297,grad_norm: 0.9236562830986206, iteration: 100337
loss: 1.0090680122375488,grad_norm: 0.8356799200169663, iteration: 100338
loss: 1.0488892793655396,grad_norm: 0.9286946883809707, iteration: 100339
loss: 1.0464427471160889,grad_norm: 0.99999934423402, iteration: 100340
loss: 1.0082621574401855,grad_norm: 0.9999990541123074, iteration: 100341
loss: 0.9918390512466431,grad_norm: 0.7934096808992359, iteration: 100342
loss: 1.0172172784805298,grad_norm: 0.9999991518700859, iteration: 100343
loss: 1.0128064155578613,grad_norm: 0.9470956597847996, iteration: 100344
loss: 0.9670731425285339,grad_norm: 0.990303087663784, iteration: 100345
loss: 0.9876685738563538,grad_norm: 0.829701233972114, iteration: 100346
loss: 1.0458773374557495,grad_norm: 0.9999995161115824, iteration: 100347
loss: 1.0053421258926392,grad_norm: 0.999999232949533, iteration: 100348
loss: 0.9773014187812805,grad_norm: 0.9999990812637669, iteration: 100349
loss: 0.9980548024177551,grad_norm: 0.9999991324741283, iteration: 100350
loss: 1.020930528640747,grad_norm: 0.9999992996728588, iteration: 100351
loss: 0.9825180172920227,grad_norm: 0.9999990406928688, iteration: 100352
loss: 1.009583830833435,grad_norm: 0.9999992070013637, iteration: 100353
loss: 0.981700599193573,grad_norm: 0.996085778901364, iteration: 100354
loss: 0.992647647857666,grad_norm: 0.999999212487447, iteration: 100355
loss: 1.0007330179214478,grad_norm: 0.9967667674826213, iteration: 100356
loss: 1.0148990154266357,grad_norm: 0.9999991076144059, iteration: 100357
loss: 0.9662541151046753,grad_norm: 0.9694963305179086, iteration: 100358
loss: 0.9833959937095642,grad_norm: 0.9999990056909643, iteration: 100359
loss: 1.0522968769073486,grad_norm: 0.9999992462177084, iteration: 100360
loss: 0.9817354083061218,grad_norm: 0.999999263084166, iteration: 100361
loss: 1.009474515914917,grad_norm: 0.9979978279047493, iteration: 100362
loss: 0.9821258187294006,grad_norm: 0.9835571141103441, iteration: 100363
loss: 0.9964714050292969,grad_norm: 0.999999223016713, iteration: 100364
loss: 1.050205111503601,grad_norm: 0.9999991325679267, iteration: 100365
loss: 1.0096945762634277,grad_norm: 0.9316854304513059, iteration: 100366
loss: 0.9864248633384705,grad_norm: 0.9309759409461795, iteration: 100367
loss: 0.9932624697685242,grad_norm: 0.9701492321171574, iteration: 100368
loss: 1.0236055850982666,grad_norm: 0.999999123089697, iteration: 100369
loss: 0.9998018145561218,grad_norm: 0.8846199975973141, iteration: 100370
loss: 0.9903874397277832,grad_norm: 0.9999991477136491, iteration: 100371
loss: 1.0076512098312378,grad_norm: 0.9116158237305737, iteration: 100372
loss: 1.0134296417236328,grad_norm: 0.9999996444728895, iteration: 100373
loss: 1.01582932472229,grad_norm: 0.8836680563618529, iteration: 100374
loss: 0.9962545037269592,grad_norm: 0.9885295199109723, iteration: 100375
loss: 0.9915891885757446,grad_norm: 0.9741133104937573, iteration: 100376
loss: 1.0013012886047363,grad_norm: 0.9999991962099337, iteration: 100377
loss: 0.9808638095855713,grad_norm: 0.9871846445970577, iteration: 100378
loss: 1.0358906984329224,grad_norm: 0.9999989350203249, iteration: 100379
loss: 0.9988610148429871,grad_norm: 0.9361963478395795, iteration: 100380
loss: 0.9990687966346741,grad_norm: 0.9999992160396481, iteration: 100381
loss: 0.9977773427963257,grad_norm: 0.9999992198814932, iteration: 100382
loss: 1.0124260187149048,grad_norm: 0.9792432749886227, iteration: 100383
loss: 0.9903748035430908,grad_norm: 0.9999991813144634, iteration: 100384
loss: 1.0018223524093628,grad_norm: 0.9999992294591099, iteration: 100385
loss: 1.000507116317749,grad_norm: 0.9999990506655569, iteration: 100386
loss: 0.9733824729919434,grad_norm: 0.9999989663498378, iteration: 100387
loss: 1.0005697011947632,grad_norm: 0.999999079181722, iteration: 100388
loss: 1.038636326789856,grad_norm: 0.9826133707355174, iteration: 100389
loss: 0.9817665219306946,grad_norm: 0.9999993195612481, iteration: 100390
loss: 0.9453614354133606,grad_norm: 0.9999992150532532, iteration: 100391
loss: 0.959609866142273,grad_norm: 0.9999990150909485, iteration: 100392
loss: 1.0140513181686401,grad_norm: 0.9508481387659574, iteration: 100393
loss: 1.0245012044906616,grad_norm: 0.9999990441786071, iteration: 100394
loss: 1.045046091079712,grad_norm: 0.9999997110078152, iteration: 100395
loss: 0.9398695826530457,grad_norm: 0.9495077781508223, iteration: 100396
loss: 1.0348700284957886,grad_norm: 0.9999990236641326, iteration: 100397
loss: 1.0314066410064697,grad_norm: 0.9106583034259573, iteration: 100398
loss: 0.9500287771224976,grad_norm: 0.9999991481143891, iteration: 100399
loss: 0.9767678380012512,grad_norm: 0.9999991755884888, iteration: 100400
loss: 0.9987471699714661,grad_norm: 0.951845821321853, iteration: 100401
loss: 0.9791775941848755,grad_norm: 0.999999150816193, iteration: 100402
loss: 1.0196484327316284,grad_norm: 0.9999991595228187, iteration: 100403
loss: 1.0180160999298096,grad_norm: 0.9999990811311588, iteration: 100404
loss: 0.954182505607605,grad_norm: 0.9300999536928167, iteration: 100405
loss: 1.0192915201187134,grad_norm: 0.9999992993678074, iteration: 100406
loss: 1.006560206413269,grad_norm: 0.9999992212507217, iteration: 100407
loss: 1.0037270784378052,grad_norm: 0.9999991849230546, iteration: 100408
loss: 0.9995937943458557,grad_norm: 0.9158347001254579, iteration: 100409
loss: 1.0452882051467896,grad_norm: 0.999999146887146, iteration: 100410
loss: 1.0117855072021484,grad_norm: 0.9999990269154594, iteration: 100411
loss: 0.9458765387535095,grad_norm: 0.9999992858901299, iteration: 100412
loss: 0.9959401488304138,grad_norm: 0.9999990217015919, iteration: 100413
loss: 0.9932816624641418,grad_norm: 0.9999990826594929, iteration: 100414
loss: 1.0520633459091187,grad_norm: 0.9999989860171781, iteration: 100415
loss: 1.0103086233139038,grad_norm: 0.9999993350010719, iteration: 100416
loss: 0.9984337091445923,grad_norm: 0.9999992268329656, iteration: 100417
loss: 1.0072206258773804,grad_norm: 0.9999990635002427, iteration: 100418
loss: 1.0486576557159424,grad_norm: 0.9999990848446172, iteration: 100419
loss: 0.9834485650062561,grad_norm: 0.9999992518944147, iteration: 100420
loss: 1.033032774925232,grad_norm: 0.9918415065709645, iteration: 100421
loss: 0.980630099773407,grad_norm: 0.9999991479530634, iteration: 100422
loss: 0.9966908693313599,grad_norm: 0.9999990716048343, iteration: 100423
loss: 0.9906983375549316,grad_norm: 0.9999991514858259, iteration: 100424
loss: 0.9865956902503967,grad_norm: 0.9999992143139204, iteration: 100425
loss: 1.0057098865509033,grad_norm: 0.8847608130785356, iteration: 100426
loss: 0.9801708459854126,grad_norm: 0.9999991291359384, iteration: 100427
loss: 1.0224578380584717,grad_norm: 0.9999990848869316, iteration: 100428
loss: 1.032693862915039,grad_norm: 0.9999992420076214, iteration: 100429
loss: 0.9677941799163818,grad_norm: 0.9999990947446488, iteration: 100430
loss: 1.0264946222305298,grad_norm: 0.9999991738306904, iteration: 100431
loss: 0.9986804127693176,grad_norm: 0.9999993352390905, iteration: 100432
loss: 0.994143545627594,grad_norm: 0.9999992197039801, iteration: 100433
loss: 0.9981951117515564,grad_norm: 0.9999991875950207, iteration: 100434
loss: 1.014722466468811,grad_norm: 0.99999930213505, iteration: 100435
loss: 0.9934362769126892,grad_norm: 0.9999992505456328, iteration: 100436
loss: 1.0221105813980103,grad_norm: 0.9999990498160068, iteration: 100437
loss: 1.0234616994857788,grad_norm: 0.9999990804501109, iteration: 100438
loss: 0.9905778765678406,grad_norm: 0.9999991492782847, iteration: 100439
loss: 0.9876742362976074,grad_norm: 0.9303112406732301, iteration: 100440
loss: 1.0200587511062622,grad_norm: 0.996666132540443, iteration: 100441
loss: 0.9922710657119751,grad_norm: 0.9999990487899687, iteration: 100442
loss: 0.9816349148750305,grad_norm: 0.9380998707385372, iteration: 100443
loss: 1.0006283521652222,grad_norm: 0.999999179934495, iteration: 100444
loss: 0.98843914270401,grad_norm: 0.8762769124737108, iteration: 100445
loss: 0.9918906092643738,grad_norm: 0.9938608562372476, iteration: 100446
loss: 1.0009888410568237,grad_norm: 0.9999990442066697, iteration: 100447
loss: 0.9961643815040588,grad_norm: 0.9994189487972427, iteration: 100448
loss: 0.9652066230773926,grad_norm: 0.9770689354422315, iteration: 100449
loss: 1.0105217695236206,grad_norm: 0.999999040096491, iteration: 100450
loss: 0.978165328502655,grad_norm: 0.9999989297977394, iteration: 100451
loss: 1.0333830118179321,grad_norm: 0.999999040007891, iteration: 100452
loss: 1.0289021730422974,grad_norm: 0.9999991820960084, iteration: 100453
loss: 0.9930737018585205,grad_norm: 0.8372488677762466, iteration: 100454
loss: 1.0228381156921387,grad_norm: 0.999999279758071, iteration: 100455
loss: 1.0105109214782715,grad_norm: 0.9999991906216518, iteration: 100456
loss: 1.0026676654815674,grad_norm: 0.7898691705353651, iteration: 100457
loss: 0.9875333309173584,grad_norm: 0.99999923460197, iteration: 100458
loss: 0.9939268231391907,grad_norm: 0.986323045665643, iteration: 100459
loss: 1.0005519390106201,grad_norm: 0.9166827663946242, iteration: 100460
loss: 0.9865924119949341,grad_norm: 0.9898494836284536, iteration: 100461
loss: 0.9882150292396545,grad_norm: 0.9732648803570093, iteration: 100462
loss: 1.018082857131958,grad_norm: 0.9999991974041765, iteration: 100463
loss: 1.0059345960617065,grad_norm: 0.9999990786686653, iteration: 100464
loss: 1.026477336883545,grad_norm: 0.9999992231799006, iteration: 100465
loss: 1.023231029510498,grad_norm: 0.9694200839479684, iteration: 100466
loss: 0.9941848516464233,grad_norm: 0.912958693479858, iteration: 100467
loss: 0.9894821643829346,grad_norm: 0.8951186672386507, iteration: 100468
loss: 1.0357624292373657,grad_norm: 0.9999990734525458, iteration: 100469
loss: 0.9894800186157227,grad_norm: 0.9277580546585946, iteration: 100470
loss: 1.00413179397583,grad_norm: 0.9232878336659706, iteration: 100471
loss: 0.9749981760978699,grad_norm: 0.91289826181383, iteration: 100472
loss: 0.9669010043144226,grad_norm: 0.9438766361354944, iteration: 100473
loss: 1.0135613679885864,grad_norm: 0.9768858103386002, iteration: 100474
loss: 0.9911015629768372,grad_norm: 0.9999990506403554, iteration: 100475
loss: 0.9721595048904419,grad_norm: 0.9248135148674362, iteration: 100476
loss: 1.0041500329971313,grad_norm: 0.9999991260360657, iteration: 100477
loss: 0.9793084263801575,grad_norm: 0.8037831235993542, iteration: 100478
loss: 1.0386019945144653,grad_norm: 0.9932685679553424, iteration: 100479
loss: 0.9910953044891357,grad_norm: 0.9727962404575722, iteration: 100480
loss: 1.0174342393875122,grad_norm: 0.8625717257238458, iteration: 100481
loss: 0.9959509372711182,grad_norm: 0.9999992387197749, iteration: 100482
loss: 0.9634508490562439,grad_norm: 0.9999992316526538, iteration: 100483
loss: 0.9836867451667786,grad_norm: 0.9999992202650703, iteration: 100484
loss: 1.034583330154419,grad_norm: 0.9999991632066197, iteration: 100485
loss: 1.00436270236969,grad_norm: 0.9999989138228873, iteration: 100486
loss: 1.014380931854248,grad_norm: 0.9999991571568252, iteration: 100487
loss: 1.0024968385696411,grad_norm: 0.9999990470557948, iteration: 100488
loss: 0.9773508310317993,grad_norm: 0.9999990229346984, iteration: 100489
loss: 1.0100212097167969,grad_norm: 0.8989711919367062, iteration: 100490
loss: 0.9880247712135315,grad_norm: 0.9680422591384324, iteration: 100491
loss: 0.9827060103416443,grad_norm: 0.9247778329330779, iteration: 100492
loss: 1.0159687995910645,grad_norm: 0.9430867522717051, iteration: 100493
loss: 0.9850742220878601,grad_norm: 0.9999992805321547, iteration: 100494
loss: 0.9791871905326843,grad_norm: 0.9999990900841451, iteration: 100495
loss: 0.9969008564949036,grad_norm: 0.9401793356734514, iteration: 100496
loss: 1.0167982578277588,grad_norm: 0.9999990823406143, iteration: 100497
loss: 1.0266380310058594,grad_norm: 0.9656299627869255, iteration: 100498
loss: 1.0167871713638306,grad_norm: 0.8797197036482152, iteration: 100499
loss: 1.039914846420288,grad_norm: 0.9320643583206956, iteration: 100500
loss: 0.9805648922920227,grad_norm: 0.9999991950555001, iteration: 100501
loss: 1.0211628675460815,grad_norm: 0.9999990788359636, iteration: 100502
loss: 0.9675543308258057,grad_norm: 0.9999990650394237, iteration: 100503
loss: 0.990900993347168,grad_norm: 0.9999991501651506, iteration: 100504
loss: 1.0070191621780396,grad_norm: 0.9999991617123232, iteration: 100505
loss: 1.0111818313598633,grad_norm: 0.9731556615922984, iteration: 100506
loss: 0.9830155968666077,grad_norm: 0.9999088869745002, iteration: 100507
loss: 0.9934266805648804,grad_norm: 0.9999993726559606, iteration: 100508
loss: 1.0000842809677124,grad_norm: 0.9999991604456554, iteration: 100509
loss: 1.002255916595459,grad_norm: 0.9999996852959563, iteration: 100510
loss: 0.9673739075660706,grad_norm: 0.9999990213596487, iteration: 100511
loss: 0.9533746242523193,grad_norm: 0.9999990337946598, iteration: 100512
loss: 0.9907525777816772,grad_norm: 0.9999990992550761, iteration: 100513
loss: 1.0118038654327393,grad_norm: 0.9564488778539674, iteration: 100514
loss: 0.9923086762428284,grad_norm: 0.9040898765336891, iteration: 100515
loss: 0.9677651524543762,grad_norm: 0.9999993708647088, iteration: 100516
loss: 0.9630262851715088,grad_norm: 0.9999990479416262, iteration: 100517
loss: 1.0134552717208862,grad_norm: 0.9999989311153721, iteration: 100518
loss: 0.9863318800926208,grad_norm: 0.9999991210390413, iteration: 100519
loss: 1.0337194204330444,grad_norm: 0.999999134483506, iteration: 100520
loss: 0.9903056621551514,grad_norm: 0.9999989135788215, iteration: 100521
loss: 1.0074738264083862,grad_norm: 0.999999065305586, iteration: 100522
loss: 0.9922624826431274,grad_norm: 0.9999992343440783, iteration: 100523
loss: 0.9621270895004272,grad_norm: 0.9999991441392605, iteration: 100524
loss: 1.0197827816009521,grad_norm: 0.9999991157237796, iteration: 100525
loss: 1.0335657596588135,grad_norm: 0.954002945510795, iteration: 100526
loss: 0.9777877330780029,grad_norm: 0.999999155616291, iteration: 100527
loss: 0.9704598784446716,grad_norm: 0.8456023963519934, iteration: 100528
loss: 1.0034759044647217,grad_norm: 0.9999992346076868, iteration: 100529
loss: 0.9971116185188293,grad_norm: 0.9999990121700506, iteration: 100530
loss: 1.026328206062317,grad_norm: 0.9999991546591845, iteration: 100531
loss: 0.967289388179779,grad_norm: 0.9821463199220882, iteration: 100532
loss: 0.9994256496429443,grad_norm: 0.999999134651239, iteration: 100533
loss: 1.0068365335464478,grad_norm: 0.9053919415706044, iteration: 100534
loss: 0.9861279726028442,grad_norm: 0.9999990495338275, iteration: 100535
loss: 1.0338586568832397,grad_norm: 0.9999991575266935, iteration: 100536
loss: 0.9946167469024658,grad_norm: 0.9999990787227034, iteration: 100537
loss: 0.9974616765975952,grad_norm: 0.9999989900564852, iteration: 100538
loss: 0.9787418842315674,grad_norm: 0.999999139308101, iteration: 100539
loss: 1.0109139680862427,grad_norm: 0.9616263127682132, iteration: 100540
loss: 1.038102149963379,grad_norm: 0.9999991506351616, iteration: 100541
loss: 0.9744182825088501,grad_norm: 0.9187302080483447, iteration: 100542
loss: 1.034497857093811,grad_norm: 0.9999991685628857, iteration: 100543
loss: 1.028039574623108,grad_norm: 0.9999991542528639, iteration: 100544
loss: 1.0229943990707397,grad_norm: 0.9999990754908358, iteration: 100545
loss: 1.009795069694519,grad_norm: 0.9999991083090647, iteration: 100546
loss: 0.99430912733078,grad_norm: 0.9999991718518303, iteration: 100547
loss: 0.9900203347206116,grad_norm: 0.9999990400171845, iteration: 100548
loss: 0.9872447848320007,grad_norm: 0.9999990127779362, iteration: 100549
loss: 1.0117677450180054,grad_norm: 0.9999991831870759, iteration: 100550
loss: 0.9854771494865417,grad_norm: 0.9999991567443431, iteration: 100551
loss: 1.018020510673523,grad_norm: 0.9999991574221285, iteration: 100552
loss: 1.0100693702697754,grad_norm: 0.9999992537724127, iteration: 100553
loss: 1.0100436210632324,grad_norm: 0.9471577918220954, iteration: 100554
loss: 0.9828070402145386,grad_norm: 0.9999991541320817, iteration: 100555
loss: 0.9873262643814087,grad_norm: 0.9999991135109719, iteration: 100556
loss: 0.9852142333984375,grad_norm: 0.9262701980716798, iteration: 100557
loss: 0.982582151889801,grad_norm: 0.9706655757897289, iteration: 100558
loss: 0.9953619837760925,grad_norm: 0.9876768820063442, iteration: 100559
loss: 1.018398404121399,grad_norm: 0.932094445572051, iteration: 100560
loss: 0.9927733540534973,grad_norm: 0.9999991704064883, iteration: 100561
loss: 0.9802500605583191,grad_norm: 0.9999991943785536, iteration: 100562
loss: 1.017109751701355,grad_norm: 0.9999991459468819, iteration: 100563
loss: 0.9762246012687683,grad_norm: 0.9999990990657411, iteration: 100564
loss: 1.0108882188796997,grad_norm: 0.9899578555336668, iteration: 100565
loss: 1.0089378356933594,grad_norm: 0.9999992313457252, iteration: 100566
loss: 0.9530718326568604,grad_norm: 0.8788158685767723, iteration: 100567
loss: 0.9864785075187683,grad_norm: 0.9999991497940668, iteration: 100568
loss: 0.9705318808555603,grad_norm: 0.9999990093091982, iteration: 100569
loss: 0.951660692691803,grad_norm: 0.9999991038654309, iteration: 100570
loss: 1.0117148160934448,grad_norm: 0.9334141971793462, iteration: 100571
loss: 0.9705037474632263,grad_norm: 0.913496475697727, iteration: 100572
loss: 0.9807711243629456,grad_norm: 0.9999991057067646, iteration: 100573
loss: 1.0208197832107544,grad_norm: 0.9999991433650838, iteration: 100574
loss: 0.9612455368041992,grad_norm: 0.9999991903229569, iteration: 100575
loss: 0.9745655059814453,grad_norm: 0.8186754967114785, iteration: 100576
loss: 1.0396332740783691,grad_norm: 0.9869051224246912, iteration: 100577
loss: 0.9977346062660217,grad_norm: 0.9729429526243286, iteration: 100578
loss: 0.9870214462280273,grad_norm: 0.9999991955726887, iteration: 100579
loss: 1.0497684478759766,grad_norm: 0.9999993245146644, iteration: 100580
loss: 1.0086532831192017,grad_norm: 0.9220990003516858, iteration: 100581
loss: 1.0266988277435303,grad_norm: 0.9999990747736583, iteration: 100582
loss: 0.9764679074287415,grad_norm: 0.9999991557896705, iteration: 100583
loss: 1.0190340280532837,grad_norm: 0.999999296251018, iteration: 100584
loss: 1.0087025165557861,grad_norm: 0.9999990276711315, iteration: 100585
loss: 0.9720836281776428,grad_norm: 0.9678020920363375, iteration: 100586
loss: 1.000900387763977,grad_norm: 0.9999993183986299, iteration: 100587
loss: 1.0030741691589355,grad_norm: 0.9198745259457644, iteration: 100588
loss: 1.0045007467269897,grad_norm: 0.9254254980654514, iteration: 100589
loss: 1.0014859437942505,grad_norm: 0.999999313851746, iteration: 100590
loss: 0.992703378200531,grad_norm: 0.9999991169330723, iteration: 100591
loss: 0.9693363308906555,grad_norm: 0.9287577401700067, iteration: 100592
loss: 0.9888287782669067,grad_norm: 0.9228350746862318, iteration: 100593
loss: 0.9979698657989502,grad_norm: 0.9859746492842325, iteration: 100594
loss: 0.994928777217865,grad_norm: 0.9999989747695449, iteration: 100595
loss: 1.059852957725525,grad_norm: 0.9999990864708017, iteration: 100596
loss: 1.010298252105713,grad_norm: 0.9999992048043437, iteration: 100597
loss: 0.9661363959312439,grad_norm: 0.9999989648697094, iteration: 100598
loss: 0.9954602718353271,grad_norm: 0.9756653193374354, iteration: 100599
loss: 1.025740385055542,grad_norm: 0.9999990980564626, iteration: 100600
loss: 0.9645828008651733,grad_norm: 0.9769725432420242, iteration: 100601
loss: 1.017944574356079,grad_norm: 0.9999990706060184, iteration: 100602
loss: 0.997818112373352,grad_norm: 0.9999990708423115, iteration: 100603
loss: 1.0234122276306152,grad_norm: 0.9999991333718654, iteration: 100604
loss: 0.9668753743171692,grad_norm: 0.9999989340555492, iteration: 100605
loss: 0.9959452748298645,grad_norm: 0.8718620896897631, iteration: 100606
loss: 0.9956783652305603,grad_norm: 0.9999990033244831, iteration: 100607
loss: 1.0258127450942993,grad_norm: 0.9049285349163645, iteration: 100608
loss: 1.0048301219940186,grad_norm: 0.9999991217613506, iteration: 100609
loss: 0.9991061687469482,grad_norm: 0.999999317915161, iteration: 100610
loss: 0.9976486563682556,grad_norm: 0.9999991720656719, iteration: 100611
loss: 1.0187662839889526,grad_norm: 0.9999990368510043, iteration: 100612
loss: 0.954046368598938,grad_norm: 0.9999992799007082, iteration: 100613
loss: 0.9707514643669128,grad_norm: 0.9999991351830713, iteration: 100614
loss: 1.0089939832687378,grad_norm: 0.9999990357577065, iteration: 100615
loss: 0.9718755483627319,grad_norm: 0.9176749107507268, iteration: 100616
loss: 1.0222128629684448,grad_norm: 0.9999990857309373, iteration: 100617
loss: 0.9878951907157898,grad_norm: 0.9999990439859346, iteration: 100618
loss: 1.0111589431762695,grad_norm: 0.9366250068938913, iteration: 100619
loss: 0.9811868667602539,grad_norm: 0.892389885700957, iteration: 100620
loss: 0.9569373726844788,grad_norm: 0.9999991565161477, iteration: 100621
loss: 1.0067665576934814,grad_norm: 0.9999991789176096, iteration: 100622
loss: 0.9705446362495422,grad_norm: 0.9999991559027589, iteration: 100623
loss: 1.0170016288757324,grad_norm: 0.8367940485763028, iteration: 100624
loss: 0.9726082682609558,grad_norm: 0.9999993162916678, iteration: 100625
loss: 1.0097359418869019,grad_norm: 0.9094799257969137, iteration: 100626
loss: 1.0393887758255005,grad_norm: 0.9479693252813516, iteration: 100627
loss: 0.9808324575424194,grad_norm: 0.9973033218237691, iteration: 100628
loss: 1.03709077835083,grad_norm: 0.9983043330809475, iteration: 100629
loss: 0.993086576461792,grad_norm: 0.9999991702787422, iteration: 100630
loss: 1.0983082056045532,grad_norm: 0.9999992571758294, iteration: 100631
loss: 0.9973029494285583,grad_norm: 0.9999992364647726, iteration: 100632
loss: 1.0008620023727417,grad_norm: 0.9999990519193894, iteration: 100633
loss: 1.0200589895248413,grad_norm: 0.9311108718184753, iteration: 100634
loss: 1.0560011863708496,grad_norm: 0.9999992319530909, iteration: 100635
loss: 1.0100200176239014,grad_norm: 0.9999993281144578, iteration: 100636
loss: 1.0344855785369873,grad_norm: 0.970032883505968, iteration: 100637
loss: 1.0562864542007446,grad_norm: 0.999999932030597, iteration: 100638
loss: 1.030275583267212,grad_norm: 0.9999992200429327, iteration: 100639
loss: 1.0093897581100464,grad_norm: 0.9898768436240125, iteration: 100640
loss: 0.9768344163894653,grad_norm: 0.9999990740294045, iteration: 100641
loss: 0.9577437043190002,grad_norm: 0.9999991561497021, iteration: 100642
loss: 1.0145474672317505,grad_norm: 0.9999992761991258, iteration: 100643
loss: 1.0106523036956787,grad_norm: 0.999999336350773, iteration: 100644
loss: 0.9957531094551086,grad_norm: 0.9999991982573607, iteration: 100645
loss: 0.9651386737823486,grad_norm: 0.9876314476280297, iteration: 100646
loss: 1.0032421350479126,grad_norm: 0.9999990615191223, iteration: 100647
loss: 1.0080783367156982,grad_norm: 0.9999991328522113, iteration: 100648
loss: 0.9755846858024597,grad_norm: 0.9291329610179997, iteration: 100649
loss: 1.022664189338684,grad_norm: 0.9999990395645973, iteration: 100650
loss: 1.017478346824646,grad_norm: 0.9873551125913477, iteration: 100651
loss: 0.9928444623947144,grad_norm: 0.9171877309487906, iteration: 100652
loss: 0.9834074974060059,grad_norm: 0.9999992301576826, iteration: 100653
loss: 0.9999685883522034,grad_norm: 0.9999991747731171, iteration: 100654
loss: 0.9791092276573181,grad_norm: 0.9375151118991305, iteration: 100655
loss: 0.987663984298706,grad_norm: 0.9999989988991121, iteration: 100656
loss: 0.9912493824958801,grad_norm: 0.9774778293527974, iteration: 100657
loss: 1.0063179731369019,grad_norm: 0.9999990884109742, iteration: 100658
loss: 0.9880849719047546,grad_norm: 0.9999990751283886, iteration: 100659
loss: 0.9948894381523132,grad_norm: 0.9760866350956853, iteration: 100660
loss: 0.984056293964386,grad_norm: 0.9999991850276774, iteration: 100661
loss: 0.9426628947257996,grad_norm: 0.9999993189757712, iteration: 100662
loss: 1.0133808851242065,grad_norm: 0.9999990995312564, iteration: 100663
loss: 1.016783356666565,grad_norm: 0.8899812695255568, iteration: 100664
loss: 0.9717347025871277,grad_norm: 0.9999991257995781, iteration: 100665
loss: 1.0246117115020752,grad_norm: 0.9999992082106123, iteration: 100666
loss: 0.9582791924476624,grad_norm: 0.9999996803885602, iteration: 100667
loss: 1.0308077335357666,grad_norm: 0.999999181712685, iteration: 100668
loss: 0.9910105466842651,grad_norm: 0.9700858643876623, iteration: 100669
loss: 1.0379008054733276,grad_norm: 0.9999092863168432, iteration: 100670
loss: 1.0291651487350464,grad_norm: 0.9570884605081418, iteration: 100671
loss: 0.996487557888031,grad_norm: 0.9999991333256594, iteration: 100672
loss: 0.9871554374694824,grad_norm: 0.9406800302407801, iteration: 100673
loss: 1.0213333368301392,grad_norm: 0.9999995845943396, iteration: 100674
loss: 0.9797332286834717,grad_norm: 0.9999990833928505, iteration: 100675
loss: 1.0043671131134033,grad_norm: 0.9999992555367481, iteration: 100676
loss: 1.0022213459014893,grad_norm: 0.999999139100076, iteration: 100677
loss: 0.9739313125610352,grad_norm: 0.9999990795643491, iteration: 100678
loss: 0.9996118545532227,grad_norm: 0.8718090793768044, iteration: 100679
loss: 1.0091168880462646,grad_norm: 0.9999990151675787, iteration: 100680
loss: 0.9447748064994812,grad_norm: 0.9999991248042273, iteration: 100681
loss: 0.9991918206214905,grad_norm: 0.9999990764304236, iteration: 100682
loss: 1.0139859914779663,grad_norm: 0.9999991085072857, iteration: 100683
loss: 0.9865666627883911,grad_norm: 0.9999990346520518, iteration: 100684
loss: 1.0037634372711182,grad_norm: 0.9983305474867475, iteration: 100685
loss: 1.0306212902069092,grad_norm: 0.999998981641553, iteration: 100686
loss: 1.0825804471969604,grad_norm: 0.9999992730800672, iteration: 100687
loss: 0.955858588218689,grad_norm: 0.9839514996386306, iteration: 100688
loss: 0.9531775712966919,grad_norm: 0.9999990931402315, iteration: 100689
loss: 1.0058231353759766,grad_norm: 0.9999992099300998, iteration: 100690
loss: 1.0068641901016235,grad_norm: 0.9999990568565418, iteration: 100691
loss: 1.0112993717193604,grad_norm: 0.9999989662241848, iteration: 100692
loss: 0.996035099029541,grad_norm: 0.9790277144908549, iteration: 100693
loss: 0.997606635093689,grad_norm: 0.9999990941065376, iteration: 100694
loss: 1.0041801929473877,grad_norm: 0.9999992967405333, iteration: 100695
loss: 0.9774453639984131,grad_norm: 0.9877959451315431, iteration: 100696
loss: 0.9800862073898315,grad_norm: 0.9832118434037135, iteration: 100697
loss: 0.9323868751525879,grad_norm: 0.9506470946336817, iteration: 100698
loss: 0.9918003082275391,grad_norm: 0.999998992254698, iteration: 100699
loss: 1.0087254047393799,grad_norm: 0.999999057889403, iteration: 100700
loss: 0.9811617732048035,grad_norm: 0.9472732563949909, iteration: 100701
loss: 0.9608161449432373,grad_norm: 0.99999912188482, iteration: 100702
loss: 0.9917665719985962,grad_norm: 0.9877620885581495, iteration: 100703
loss: 1.0147844552993774,grad_norm: 0.9999993651059913, iteration: 100704
loss: 1.006309151649475,grad_norm: 0.9999991212358114, iteration: 100705
loss: 0.9847769141197205,grad_norm: 0.9277976378197049, iteration: 100706
loss: 0.9830304384231567,grad_norm: 0.9959820108403342, iteration: 100707
loss: 1.0162471532821655,grad_norm: 0.9999990763754529, iteration: 100708
loss: 1.0261974334716797,grad_norm: 0.9999991221872996, iteration: 100709
loss: 1.019679307937622,grad_norm: 0.9999991025234002, iteration: 100710
loss: 0.9603952169418335,grad_norm: 0.9999989470069203, iteration: 100711
loss: 0.9676270484924316,grad_norm: 0.9999993644944027, iteration: 100712
loss: 0.9877024292945862,grad_norm: 0.9999990272579342, iteration: 100713
loss: 1.005238652229309,grad_norm: 0.9999992146196757, iteration: 100714
loss: 1.098933458328247,grad_norm: 0.9999990853226768, iteration: 100715
loss: 1.0270969867706299,grad_norm: 0.9999992526400634, iteration: 100716
loss: 0.9929710626602173,grad_norm: 0.9639959568891896, iteration: 100717
loss: 0.9942131042480469,grad_norm: 0.7938859641746238, iteration: 100718
loss: 0.9954236149787903,grad_norm: 0.852764869130997, iteration: 100719
loss: 0.992819607257843,grad_norm: 0.999999121547315, iteration: 100720
loss: 0.9918611645698547,grad_norm: 0.9999991625906125, iteration: 100721
loss: 0.9730717539787292,grad_norm: 0.9999990640200688, iteration: 100722
loss: 0.9943460822105408,grad_norm: 0.9999991145982612, iteration: 100723
loss: 1.0022878646850586,grad_norm: 0.9576574040064991, iteration: 100724
loss: 1.0155353546142578,grad_norm: 0.9999990041582235, iteration: 100725
loss: 1.0195587873458862,grad_norm: 0.9637889698408532, iteration: 100726
loss: 0.9778537750244141,grad_norm: 0.9571081925164775, iteration: 100727
loss: 1.0128840208053589,grad_norm: 0.9999992025958319, iteration: 100728
loss: 0.9909761548042297,grad_norm: 0.9999992139442425, iteration: 100729
loss: 1.0152758359909058,grad_norm: 0.9999991069705544, iteration: 100730
loss: 0.9896112680435181,grad_norm: 0.9999990160824641, iteration: 100731
loss: 0.9630597233772278,grad_norm: 0.990068442925642, iteration: 100732
loss: 0.9707795977592468,grad_norm: 0.9999991289653675, iteration: 100733
loss: 0.9993038177490234,grad_norm: 0.9484150554998099, iteration: 100734
loss: 1.0083262920379639,grad_norm: 0.9999991880918868, iteration: 100735
loss: 0.979636549949646,grad_norm: 0.9999991992651044, iteration: 100736
loss: 0.9736377000808716,grad_norm: 0.9698725015728318, iteration: 100737
loss: 0.975988507270813,grad_norm: 0.9999992875791114, iteration: 100738
loss: 0.9987660050392151,grad_norm: 0.9999990321957227, iteration: 100739
loss: 0.9884788393974304,grad_norm: 0.9999990688590523, iteration: 100740
loss: 1.0170780420303345,grad_norm: 0.9999992194152002, iteration: 100741
loss: 1.0099965333938599,grad_norm: 0.9999990928357365, iteration: 100742
loss: 1.0193816423416138,grad_norm: 0.9999993061979038, iteration: 100743
loss: 0.9698591232299805,grad_norm: 0.9536500553160426, iteration: 100744
loss: 1.0056034326553345,grad_norm: 0.999999204529781, iteration: 100745
loss: 0.9722484946250916,grad_norm: 0.999999010049182, iteration: 100746
loss: 0.9749434590339661,grad_norm: 0.9999990727659445, iteration: 100747
loss: 0.9832605123519897,grad_norm: 0.9999993329148743, iteration: 100748
loss: 1.002545952796936,grad_norm: 0.879188275163636, iteration: 100749
loss: 1.0004403591156006,grad_norm: 0.9307882297835294, iteration: 100750
loss: 0.9884741902351379,grad_norm: 0.9999991504292327, iteration: 100751
loss: 1.0148688554763794,grad_norm: 0.9999992479695684, iteration: 100752
loss: 1.0218571424484253,grad_norm: 0.9999993206894555, iteration: 100753
loss: 0.9791265726089478,grad_norm: 0.9187916454531948, iteration: 100754
loss: 1.0054576396942139,grad_norm: 0.986720446631041, iteration: 100755
loss: 1.0135564804077148,grad_norm: 0.9999991495983075, iteration: 100756
loss: 1.01812744140625,grad_norm: 0.9999989692021818, iteration: 100757
loss: 0.9928122758865356,grad_norm: 0.9793509308124243, iteration: 100758
loss: 1.011501669883728,grad_norm: 0.8426431444375075, iteration: 100759
loss: 0.9785169363021851,grad_norm: 0.9999991085370322, iteration: 100760
loss: 0.9951052069664001,grad_norm: 0.999999100977986, iteration: 100761
loss: 0.9826576709747314,grad_norm: 0.9999988985231013, iteration: 100762
loss: 1.0152515172958374,grad_norm: 0.9729852038656236, iteration: 100763
loss: 0.9800041317939758,grad_norm: 0.999999078877931, iteration: 100764
loss: 0.9634676575660706,grad_norm: 0.9417752024679098, iteration: 100765
loss: 1.0309573411941528,grad_norm: 0.9999990928094907, iteration: 100766
loss: 0.9961944818496704,grad_norm: 0.999999056776921, iteration: 100767
loss: 0.9882186055183411,grad_norm: 0.9399505322491188, iteration: 100768
loss: 0.9742575287818909,grad_norm: 0.9999991315851398, iteration: 100769
loss: 0.9977437853813171,grad_norm: 0.9999991097490992, iteration: 100770
loss: 1.0110464096069336,grad_norm: 0.9999992242602507, iteration: 100771
loss: 1.037196159362793,grad_norm: 0.9578655381187449, iteration: 100772
loss: 0.9720093011856079,grad_norm: 0.9056345364413733, iteration: 100773
loss: 0.995662271976471,grad_norm: 0.9067177263338653, iteration: 100774
loss: 0.9848535656929016,grad_norm: 0.9923911459125616, iteration: 100775
loss: 0.9917262196540833,grad_norm: 0.9999991066664092, iteration: 100776
loss: 1.0121172666549683,grad_norm: 0.9999990847998919, iteration: 100777
loss: 1.038859486579895,grad_norm: 0.9999989625329729, iteration: 100778
loss: 0.9855870604515076,grad_norm: 0.9999993612159859, iteration: 100779
loss: 1.027999997138977,grad_norm: 0.9999992148595035, iteration: 100780
loss: 0.9736067056655884,grad_norm: 0.9904953358917815, iteration: 100781
loss: 1.0076243877410889,grad_norm: 0.9999992627999524, iteration: 100782
loss: 1.056540608406067,grad_norm: 0.9999993446828793, iteration: 100783
loss: 0.9954221248626709,grad_norm: 0.9941773667540161, iteration: 100784
loss: 0.97799152135849,grad_norm: 0.9858894420235833, iteration: 100785
loss: 1.0025064945220947,grad_norm: 0.9999990142935323, iteration: 100786
loss: 0.9938775897026062,grad_norm: 0.999999186357418, iteration: 100787
loss: 1.0044690370559692,grad_norm: 0.9771063459642648, iteration: 100788
loss: 0.9991930723190308,grad_norm: 0.9999993071966476, iteration: 100789
loss: 0.9712158441543579,grad_norm: 0.9999991530069555, iteration: 100790
loss: 0.9838764071464539,grad_norm: 0.9376180671665149, iteration: 100791
loss: 1.0088963508605957,grad_norm: 0.7588777483381638, iteration: 100792
loss: 1.0120677947998047,grad_norm: 0.9999991955136712, iteration: 100793
loss: 1.0081199407577515,grad_norm: 0.999999115581406, iteration: 100794
loss: 1.0068167448043823,grad_norm: 0.9999989314317062, iteration: 100795
loss: 0.9685900211334229,grad_norm: 0.9999989498258895, iteration: 100796
loss: 1.0218839645385742,grad_norm: 0.9999992380592216, iteration: 100797
loss: 0.974035382270813,grad_norm: 0.9999998307355457, iteration: 100798
loss: 1.018681526184082,grad_norm: 0.8981980292329812, iteration: 100799
loss: 0.9890885949134827,grad_norm: 0.9999991906185155, iteration: 100800
loss: 1.0127564668655396,grad_norm: 0.999999005551483, iteration: 100801
loss: 0.9683270454406738,grad_norm: 0.9999990418225553, iteration: 100802
loss: 0.997859537601471,grad_norm: 0.9999991094725658, iteration: 100803
loss: 1.0012848377227783,grad_norm: 0.9776404158630504, iteration: 100804
loss: 1.0369280576705933,grad_norm: 0.9999992382268175, iteration: 100805
loss: 1.0037235021591187,grad_norm: 0.8843468278073524, iteration: 100806
loss: 0.9595097303390503,grad_norm: 0.9999991682764341, iteration: 100807
loss: 1.0025417804718018,grad_norm: 0.9999990520440568, iteration: 100808
loss: 1.0096485614776611,grad_norm: 0.839167645964841, iteration: 100809
loss: 1.0145854949951172,grad_norm: 0.8861443560062884, iteration: 100810
loss: 1.0144740343093872,grad_norm: 0.9999990767395391, iteration: 100811
loss: 0.9863529205322266,grad_norm: 0.9999989858288965, iteration: 100812
loss: 1.0348049402236938,grad_norm: 0.9999991648358125, iteration: 100813
loss: 0.9617728590965271,grad_norm: 0.9473267965653721, iteration: 100814
loss: 1.016282081604004,grad_norm: 0.9067920078567001, iteration: 100815
loss: 1.028390645980835,grad_norm: 0.9999992310978825, iteration: 100816
loss: 1.0084935426712036,grad_norm: 0.9999991568827111, iteration: 100817
loss: 1.0189108848571777,grad_norm: 0.9999993053150789, iteration: 100818
loss: 1.046730637550354,grad_norm: 0.9999998782625549, iteration: 100819
loss: 0.9977744221687317,grad_norm: 0.9699759092387149, iteration: 100820
loss: 1.0351885557174683,grad_norm: 0.9742278936592701, iteration: 100821
loss: 1.0273789167404175,grad_norm: 0.8702296823599749, iteration: 100822
loss: 0.9918736219406128,grad_norm: 0.9614976092178295, iteration: 100823
loss: 1.0111476182937622,grad_norm: 0.9999991069319291, iteration: 100824
loss: 0.9737590551376343,grad_norm: 0.9999989959167375, iteration: 100825
loss: 0.9521772861480713,grad_norm: 0.8889118963548779, iteration: 100826
loss: 0.9837711453437805,grad_norm: 0.9797090494162393, iteration: 100827
loss: 0.996533215045929,grad_norm: 0.9999992292572493, iteration: 100828
loss: 0.9908953905105591,grad_norm: 0.9999992070509727, iteration: 100829
loss: 1.0104976892471313,grad_norm: 0.999999285336959, iteration: 100830
loss: 0.963711142539978,grad_norm: 0.9999991916305927, iteration: 100831
loss: 0.9926812052726746,grad_norm: 0.999999014050249, iteration: 100832
loss: 1.0254278182983398,grad_norm: 0.999999091654842, iteration: 100833
loss: 0.9898188710212708,grad_norm: 0.9999990552512634, iteration: 100834
loss: 1.0398467779159546,grad_norm: 0.9999990988870232, iteration: 100835
loss: 0.9912973642349243,grad_norm: 0.9999989985237147, iteration: 100836
loss: 0.9910276532173157,grad_norm: 0.9999992272763192, iteration: 100837
loss: 0.9793336391448975,grad_norm: 0.9999991657158187, iteration: 100838
loss: 1.001978874206543,grad_norm: 0.8649191775590483, iteration: 100839
loss: 1.0237284898757935,grad_norm: 0.964503288109379, iteration: 100840
loss: 1.016709804534912,grad_norm: 0.9999992596474575, iteration: 100841
loss: 0.9870209097862244,grad_norm: 0.8724621600323185, iteration: 100842
loss: 0.9883249998092651,grad_norm: 0.9999992010969735, iteration: 100843
loss: 0.9967086315155029,grad_norm: 0.9999991934169231, iteration: 100844
loss: 0.9673975110054016,grad_norm: 0.9999990476253611, iteration: 100845
loss: 0.9689001441001892,grad_norm: 0.8909073263159439, iteration: 100846
loss: 1.0094788074493408,grad_norm: 0.9999997201313469, iteration: 100847
loss: 1.0196565389633179,grad_norm: 0.9704904852932407, iteration: 100848
loss: 1.0054737329483032,grad_norm: 0.999999127471012, iteration: 100849
loss: 1.0324796438217163,grad_norm: 0.9999990197244113, iteration: 100850
loss: 1.0138760805130005,grad_norm: 0.9734492747986375, iteration: 100851
loss: 1.0253815650939941,grad_norm: 0.999999232104933, iteration: 100852
loss: 0.9830590486526489,grad_norm: 0.9894934033011274, iteration: 100853
loss: 1.0004991292953491,grad_norm: 0.9663929914990105, iteration: 100854
loss: 1.0293172597885132,grad_norm: 0.9999991358132004, iteration: 100855
loss: 1.0018383264541626,grad_norm: 0.9999990250976553, iteration: 100856
loss: 0.9829955697059631,grad_norm: 0.938887177864408, iteration: 100857
loss: 1.048338532447815,grad_norm: 0.9587648336466857, iteration: 100858
loss: 1.028682827949524,grad_norm: 0.9999993295609119, iteration: 100859
loss: 1.0176602602005005,grad_norm: 0.9999991253286631, iteration: 100860
loss: 0.989191472530365,grad_norm: 0.9999990035121439, iteration: 100861
loss: 0.9970532059669495,grad_norm: 0.972527768786955, iteration: 100862
loss: 0.996120810508728,grad_norm: 0.9999992879549134, iteration: 100863
loss: 0.9881129264831543,grad_norm: 0.9829836409181248, iteration: 100864
loss: 0.9948521256446838,grad_norm: 0.9999991669243131, iteration: 100865
loss: 1.0390712022781372,grad_norm: 0.9999989206860076, iteration: 100866
loss: 0.9776896238327026,grad_norm: 0.9999991681595427, iteration: 100867
loss: 1.0032756328582764,grad_norm: 0.9999990482301666, iteration: 100868
loss: 1.0038032531738281,grad_norm: 0.9999992183315471, iteration: 100869
loss: 1.052525520324707,grad_norm: 0.9999999081568456, iteration: 100870
loss: 1.0021655559539795,grad_norm: 0.9999991653151145, iteration: 100871
loss: 1.0200822353363037,grad_norm: 0.8863081862251069, iteration: 100872
loss: 0.9750005602836609,grad_norm: 0.9999990805845893, iteration: 100873
loss: 0.9902709126472473,grad_norm: 0.9999990923493786, iteration: 100874
loss: 1.0231165885925293,grad_norm: 0.9811370872831165, iteration: 100875
loss: 1.0359113216400146,grad_norm: 0.9456282612837372, iteration: 100876
loss: 1.0279521942138672,grad_norm: 0.999999143121018, iteration: 100877
loss: 1.028660535812378,grad_norm: 0.9999990570799344, iteration: 100878
loss: 1.025678038597107,grad_norm: 0.9999991199783236, iteration: 100879
loss: 0.9875653386116028,grad_norm: 0.999999113144271, iteration: 100880
loss: 0.9950464963912964,grad_norm: 0.9999989798505275, iteration: 100881
loss: 1.004605770111084,grad_norm: 0.9999990825409039, iteration: 100882
loss: 1.008385419845581,grad_norm: 0.9999990637827894, iteration: 100883
loss: 0.9947198629379272,grad_norm: 0.9693530531084066, iteration: 100884
loss: 1.0410264730453491,grad_norm: 0.9999995778305333, iteration: 100885
loss: 0.9653270244598389,grad_norm: 0.9984151993487486, iteration: 100886
loss: 0.9888487458229065,grad_norm: 0.9999990750229157, iteration: 100887
loss: 0.9859671592712402,grad_norm: 0.9999991602105776, iteration: 100888
loss: 0.9884858727455139,grad_norm: 0.9999993506196256, iteration: 100889
loss: 1.0003494024276733,grad_norm: 0.9999990453520952, iteration: 100890
loss: 1.0235832929611206,grad_norm: 0.9408767981976772, iteration: 100891
loss: 0.9758543372154236,grad_norm: 0.99999896839845, iteration: 100892
loss: 0.9850112795829773,grad_norm: 0.981663038748346, iteration: 100893
loss: 1.0234061479568481,grad_norm: 0.9999990377374374, iteration: 100894
loss: 1.0049811601638794,grad_norm: 0.9999990893787077, iteration: 100895
loss: 0.9729851484298706,grad_norm: 0.9132174604786925, iteration: 100896
loss: 0.9805150628089905,grad_norm: 0.9322574253000788, iteration: 100897
loss: 1.0294431447982788,grad_norm: 0.9999990956804388, iteration: 100898
loss: 1.018437385559082,grad_norm: 0.9025629516361955, iteration: 100899
loss: 1.0081932544708252,grad_norm: 0.9999997143751287, iteration: 100900
loss: 1.0004701614379883,grad_norm: 0.9999993024930472, iteration: 100901
loss: 1.0159603357315063,grad_norm: 0.9827985043347047, iteration: 100902
loss: 0.9690708518028259,grad_norm: 0.9999990984156645, iteration: 100903
loss: 0.9890816807746887,grad_norm: 0.9999991545933281, iteration: 100904
loss: 1.0084705352783203,grad_norm: 0.9999991226900968, iteration: 100905
loss: 1.0191919803619385,grad_norm: 0.9999992648556157, iteration: 100906
loss: 1.0305055379867554,grad_norm: 0.999999426880341, iteration: 100907
loss: 0.9811341762542725,grad_norm: 0.999998937054038, iteration: 100908
loss: 0.9688311219215393,grad_norm: 0.9556500441479807, iteration: 100909
loss: 0.9966738820075989,grad_norm: 0.838342816397993, iteration: 100910
loss: 1.0028289556503296,grad_norm: 0.9800154567656493, iteration: 100911
loss: 1.0211238861083984,grad_norm: 0.9999992626772406, iteration: 100912
loss: 1.004164695739746,grad_norm: 0.9999991398397672, iteration: 100913
loss: 1.0076745748519897,grad_norm: 0.895364396672729, iteration: 100914
loss: 0.9609392881393433,grad_norm: 0.9979512439394282, iteration: 100915
loss: 0.9997463226318359,grad_norm: 0.9765267616071308, iteration: 100916
loss: 0.9861771464347839,grad_norm: 0.946935728047666, iteration: 100917
loss: 1.0095752477645874,grad_norm: 0.986070720718906, iteration: 100918
loss: 1.0013691186904907,grad_norm: 0.9425608218814241, iteration: 100919
loss: 0.9821871519088745,grad_norm: 0.9999991148063203, iteration: 100920
loss: 1.0033385753631592,grad_norm: 0.9999991370320119, iteration: 100921
loss: 1.0013318061828613,grad_norm: 0.9999990625918455, iteration: 100922
loss: 1.0134912729263306,grad_norm: 0.9999992100723267, iteration: 100923
loss: 0.9992006421089172,grad_norm: 0.999999130510735, iteration: 100924
loss: 0.9950122237205505,grad_norm: 0.9999990503045676, iteration: 100925
loss: 0.9920994639396667,grad_norm: 0.8594426267036642, iteration: 100926
loss: 0.9710801243782043,grad_norm: 0.9753783893971181, iteration: 100927
loss: 1.0173331499099731,grad_norm: 0.9999990493262834, iteration: 100928
loss: 1.0375723838806152,grad_norm: 0.9999994239449636, iteration: 100929
loss: 0.9701624512672424,grad_norm: 0.999999195708881, iteration: 100930
loss: 0.991371750831604,grad_norm: 0.9999993352199292, iteration: 100931
loss: 1.0180113315582275,grad_norm: 0.9999990694156216, iteration: 100932
loss: 1.008716106414795,grad_norm: 0.8997571859611329, iteration: 100933
loss: 0.9756503105163574,grad_norm: 0.9749532973261865, iteration: 100934
loss: 1.0185083150863647,grad_norm: 0.9999990409373571, iteration: 100935
loss: 1.0091627836227417,grad_norm: 0.9999991891633905, iteration: 100936
loss: 0.9774088263511658,grad_norm: 0.9999990667948582, iteration: 100937
loss: 1.1370487213134766,grad_norm: 0.9999999256297247, iteration: 100938
loss: 1.020414113998413,grad_norm: 0.9999992984336258, iteration: 100939
loss: 1.017637848854065,grad_norm: 0.9999990818060719, iteration: 100940
loss: 1.0293477773666382,grad_norm: 0.999999196429784, iteration: 100941
loss: 1.0168242454528809,grad_norm: 0.9602127669948075, iteration: 100942
loss: 1.009525179862976,grad_norm: 0.8834641324709674, iteration: 100943
loss: 0.997231125831604,grad_norm: 0.9999992355624255, iteration: 100944
loss: 1.0150675773620605,grad_norm: 0.8712651427921919, iteration: 100945
loss: 0.9734567999839783,grad_norm: 0.9999991518847484, iteration: 100946
loss: 1.0106115341186523,grad_norm: 0.9999992191486813, iteration: 100947
loss: 0.9387500286102295,grad_norm: 0.9424019533784935, iteration: 100948
loss: 0.9980707168579102,grad_norm: 0.9674082058846795, iteration: 100949
loss: 0.9897192716598511,grad_norm: 0.8609127712625594, iteration: 100950
loss: 1.00514554977417,grad_norm: 0.9999990639816106, iteration: 100951
loss: 1.0247457027435303,grad_norm: 0.9999991097969556, iteration: 100952
loss: 1.0148303508758545,grad_norm: 0.9999989438631214, iteration: 100953
loss: 0.9725197553634644,grad_norm: 0.9999990163982364, iteration: 100954
loss: 0.9897740483283997,grad_norm: 0.9999991128991602, iteration: 100955
loss: 1.0218076705932617,grad_norm: 0.8881430487392095, iteration: 100956
loss: 0.9894638061523438,grad_norm: 0.9999993131071778, iteration: 100957
loss: 1.0017451047897339,grad_norm: 0.9999996986450871, iteration: 100958
loss: 1.053472876548767,grad_norm: 0.9999998501909011, iteration: 100959
loss: 1.0213367938995361,grad_norm: 0.9999992481399969, iteration: 100960
loss: 1.0300776958465576,grad_norm: 0.9999992858901642, iteration: 100961
loss: 1.058728575706482,grad_norm: 0.999999500554344, iteration: 100962
loss: 1.0390844345092773,grad_norm: 0.9999994462691171, iteration: 100963
loss: 1.0028352737426758,grad_norm: 0.9999989151950373, iteration: 100964
loss: 0.9680559635162354,grad_norm: 0.9999997834802251, iteration: 100965
loss: 1.0259952545166016,grad_norm: 0.9999996563548199, iteration: 100966
loss: 0.9958308935165405,grad_norm: 0.9999993036351266, iteration: 100967
loss: 1.0010137557983398,grad_norm: 0.9999992416741333, iteration: 100968
loss: 0.9924066662788391,grad_norm: 0.926589159284671, iteration: 100969
loss: 1.2994731664657593,grad_norm: 0.9999995578033564, iteration: 100970
loss: 0.9751059412956238,grad_norm: 0.9999992266214703, iteration: 100971
loss: 0.9819561243057251,grad_norm: 0.9999991528339649, iteration: 100972
loss: 0.9969030022621155,grad_norm: 0.9999992561009112, iteration: 100973
loss: 0.9499289393424988,grad_norm: 0.999999227350983, iteration: 100974
loss: 1.0094372034072876,grad_norm: 0.9999990733559189, iteration: 100975
loss: 1.0509161949157715,grad_norm: 0.9999996883424641, iteration: 100976
loss: 0.9856594800949097,grad_norm: 0.8461175268904781, iteration: 100977
loss: 1.0007572174072266,grad_norm: 0.999999169527852, iteration: 100978
loss: 1.0001755952835083,grad_norm: 0.9904104351544213, iteration: 100979
loss: 1.0177488327026367,grad_norm: 0.9999990788830478, iteration: 100980
loss: 1.013655424118042,grad_norm: 0.9999991680511358, iteration: 100981
loss: 1.0030068159103394,grad_norm: 0.8994417568023867, iteration: 100982
loss: 1.0419371128082275,grad_norm: 0.9501475757898384, iteration: 100983
loss: 0.9890562891960144,grad_norm: 0.9608496278079907, iteration: 100984
loss: 1.0136582851409912,grad_norm: 0.9999990984180561, iteration: 100985
loss: 1.0141384601593018,grad_norm: 0.8648658061384055, iteration: 100986
loss: 0.9840070009231567,grad_norm: 0.9607283576209453, iteration: 100987
loss: 0.986305832862854,grad_norm: 0.8787239924135434, iteration: 100988
loss: 0.9913507103919983,grad_norm: 0.917866316331155, iteration: 100989
loss: 1.0283199548721313,grad_norm: 0.999999072200181, iteration: 100990
loss: 0.9690984487533569,grad_norm: 0.9999992185040186, iteration: 100991
loss: 1.0204229354858398,grad_norm: 0.9999991287078533, iteration: 100992
loss: 0.9880573749542236,grad_norm: 0.9999990503921647, iteration: 100993
loss: 1.0104303359985352,grad_norm: 0.9385477574208365, iteration: 100994
loss: 0.9910732507705688,grad_norm: 0.9112517023918258, iteration: 100995
loss: 1.0107271671295166,grad_norm: 0.9999992542668745, iteration: 100996
loss: 0.9863483309745789,grad_norm: 0.9999991476925819, iteration: 100997
loss: 0.9828717112541199,grad_norm: 0.9999992506708966, iteration: 100998
loss: 0.9749350547790527,grad_norm: 0.9999991712035922, iteration: 100999
loss: 0.9926671981811523,grad_norm: 0.9634362084365585, iteration: 101000
loss: 1.0166223049163818,grad_norm: 0.999999154291423, iteration: 101001
loss: 0.9789645075798035,grad_norm: 0.9999991705497714, iteration: 101002
loss: 0.9666405320167542,grad_norm: 0.9999992253383632, iteration: 101003
loss: 0.9580211639404297,grad_norm: 0.9999990380559992, iteration: 101004
loss: 0.9987622499465942,grad_norm: 0.9999990824455678, iteration: 101005
loss: 1.0041899681091309,grad_norm: 0.9999991687976616, iteration: 101006
loss: 0.9576699137687683,grad_norm: 0.9999990212628348, iteration: 101007
loss: 0.9999784827232361,grad_norm: 0.9999992820521071, iteration: 101008
loss: 1.0105364322662354,grad_norm: 0.9999991801934924, iteration: 101009
loss: 1.0125932693481445,grad_norm: 0.9860549296034996, iteration: 101010
loss: 0.9802983999252319,grad_norm: 0.999999064753789, iteration: 101011
loss: 1.002772331237793,grad_norm: 1.0000000012262942, iteration: 101012
loss: 0.9644101858139038,grad_norm: 0.9999990911203083, iteration: 101013
loss: 0.9745820164680481,grad_norm: 0.9999992345192817, iteration: 101014
loss: 1.0451115369796753,grad_norm: 0.9999991901797312, iteration: 101015
loss: 1.0200762748718262,grad_norm: 0.9999989602416186, iteration: 101016
loss: 1.0466341972351074,grad_norm: 0.974831957752426, iteration: 101017
loss: 0.9975478649139404,grad_norm: 0.9943487868274286, iteration: 101018
loss: 1.0161988735198975,grad_norm: 0.9999990652359488, iteration: 101019
loss: 1.029327154159546,grad_norm: 0.9999992411413521, iteration: 101020
loss: 1.0099488496780396,grad_norm: 0.9999991209568551, iteration: 101021
loss: 0.9969908595085144,grad_norm: 0.9765343229947081, iteration: 101022
loss: 0.9575542211532593,grad_norm: 0.9999990916859389, iteration: 101023
loss: 1.042911171913147,grad_norm: 0.9999991782322507, iteration: 101024
loss: 0.9968249201774597,grad_norm: 0.8869606435886855, iteration: 101025
loss: 1.010265827178955,grad_norm: 0.9999992298497792, iteration: 101026
loss: 0.9873502254486084,grad_norm: 0.8931534088520507, iteration: 101027
loss: 1.0492072105407715,grad_norm: 0.9999991195651307, iteration: 101028
loss: 1.0100520849227905,grad_norm: 0.9999990884824631, iteration: 101029
loss: 1.0093382596969604,grad_norm: 0.9775469156619376, iteration: 101030
loss: 0.9856639504432678,grad_norm: 0.9999990425363765, iteration: 101031
loss: 1.0002561807632446,grad_norm: 0.9999997440736714, iteration: 101032
loss: 1.016987681388855,grad_norm: 0.9999992382919467, iteration: 101033
loss: 1.0114187002182007,grad_norm: 0.8995943709217906, iteration: 101034
loss: 0.9814155697822571,grad_norm: 0.9999991331679284, iteration: 101035
loss: 0.9959713816642761,grad_norm: 0.9291821383204721, iteration: 101036
loss: 1.004620909690857,grad_norm: 0.9999989744776965, iteration: 101037
loss: 1.0073508024215698,grad_norm: 0.9999991264733769, iteration: 101038
loss: 0.9920855760574341,grad_norm: 0.9999991977200071, iteration: 101039
loss: 1.0101529359817505,grad_norm: 0.999999366269122, iteration: 101040
loss: 1.0213514566421509,grad_norm: 0.9999990535364104, iteration: 101041
loss: 0.9912600517272949,grad_norm: 0.9999992381471912, iteration: 101042
loss: 1.0063939094543457,grad_norm: 0.9067645340787814, iteration: 101043
loss: 1.0255155563354492,grad_norm: 0.9824138960043167, iteration: 101044
loss: 1.0120165348052979,grad_norm: 0.9999993252066679, iteration: 101045
loss: 0.9860507249832153,grad_norm: 0.9999991243198612, iteration: 101046
loss: 1.0071625709533691,grad_norm: 0.9151089531862922, iteration: 101047
loss: 0.9813905954360962,grad_norm: 0.999999090362071, iteration: 101048
loss: 0.9784253835678101,grad_norm: 0.9999992538124914, iteration: 101049
loss: 1.0007812976837158,grad_norm: 0.9999991635731701, iteration: 101050
loss: 1.0075578689575195,grad_norm: 0.9950389221226581, iteration: 101051
loss: 0.9847211241722107,grad_norm: 0.9870866494672398, iteration: 101052
loss: 0.9932281970977783,grad_norm: 0.9999991972581037, iteration: 101053
loss: 0.9974318742752075,grad_norm: 0.9205385199081807, iteration: 101054
loss: 0.9875447154045105,grad_norm: 0.9999991266896255, iteration: 101055
loss: 1.0081324577331543,grad_norm: 0.9999990674899958, iteration: 101056
loss: 1.002588152885437,grad_norm: 0.9646512464042748, iteration: 101057
loss: 1.023050308227539,grad_norm: 0.9999989690296781, iteration: 101058
loss: 0.9811074733734131,grad_norm: 0.8972028160572633, iteration: 101059
loss: 0.9950776100158691,grad_norm: 0.9999991424129198, iteration: 101060
loss: 1.0152419805526733,grad_norm: 0.9999991648307683, iteration: 101061
loss: 1.0269278287887573,grad_norm: 0.9999990807400675, iteration: 101062
loss: 0.990715742111206,grad_norm: 0.9999991829508872, iteration: 101063
loss: 1.0095322132110596,grad_norm: 0.9042065207342612, iteration: 101064
loss: 0.9864804744720459,grad_norm: 0.9912491587074608, iteration: 101065
loss: 0.9747911095619202,grad_norm: 0.8572597841562969, iteration: 101066
loss: 0.975642740726471,grad_norm: 0.9432869649154015, iteration: 101067
loss: 1.0089207887649536,grad_norm: 0.9999997587986424, iteration: 101068
loss: 1.0089833736419678,grad_norm: 0.9506788953252547, iteration: 101069
loss: 0.9978696703910828,grad_norm: 0.999999037811939, iteration: 101070
loss: 1.0280736684799194,grad_norm: 0.9378950596249974, iteration: 101071
loss: 1.0123015642166138,grad_norm: 0.9999990602243989, iteration: 101072
loss: 0.9990378022193909,grad_norm: 0.999999300775994, iteration: 101073
loss: 1.0229239463806152,grad_norm: 0.9855603991467642, iteration: 101074
loss: 0.9125239253044128,grad_norm: 0.9999991957651388, iteration: 101075
loss: 1.0253762006759644,grad_norm: 0.9241094040024351, iteration: 101076
loss: 1.0415223836898804,grad_norm: 0.9133786621231766, iteration: 101077
loss: 0.9844052195549011,grad_norm: 0.9025690808585309, iteration: 101078
loss: 1.0183138847351074,grad_norm: 0.9999991215947016, iteration: 101079
loss: 1.0065374374389648,grad_norm: 0.9997587072888022, iteration: 101080
loss: 1.000452995300293,grad_norm: 0.959259958692369, iteration: 101081
loss: 0.9944155812263489,grad_norm: 0.9999990788160885, iteration: 101082
loss: 1.015778660774231,grad_norm: 0.9999991489924495, iteration: 101083
loss: 0.991237998008728,grad_norm: 0.99999913002651, iteration: 101084
loss: 0.9815550446510315,grad_norm: 0.9638089154721197, iteration: 101085
loss: 1.0318481922149658,grad_norm: 0.9999992156949747, iteration: 101086
loss: 1.030754566192627,grad_norm: 0.9999991469330917, iteration: 101087
loss: 0.9979459643363953,grad_norm: 0.9999989486915707, iteration: 101088
loss: 0.9894043207168579,grad_norm: 0.999999085417571, iteration: 101089
loss: 1.0035632848739624,grad_norm: 0.9999991133053359, iteration: 101090
loss: 0.9849722385406494,grad_norm: 0.9999992082770908, iteration: 101091
loss: 1.0070257186889648,grad_norm: 0.9913359818605311, iteration: 101092
loss: 1.0128562450408936,grad_norm: 0.9999990634864524, iteration: 101093
loss: 1.026698112487793,grad_norm: 0.9999991261414147, iteration: 101094
loss: 1.0030951499938965,grad_norm: 0.9999997888356149, iteration: 101095
loss: 0.9991366863250732,grad_norm: 0.9504752036602349, iteration: 101096
loss: 0.9809941649436951,grad_norm: 0.9926969203072364, iteration: 101097
loss: 0.998023271560669,grad_norm: 0.9712082166655058, iteration: 101098
loss: 1.0241856575012207,grad_norm: 0.9926691297304703, iteration: 101099
loss: 1.0258610248565674,grad_norm: 0.9999992087928874, iteration: 101100
loss: 1.0249600410461426,grad_norm: 0.9999992053019305, iteration: 101101
loss: 1.0093655586242676,grad_norm: 0.9793601268950995, iteration: 101102
loss: 0.9924048185348511,grad_norm: 0.9707609966481183, iteration: 101103
loss: 1.0101526975631714,grad_norm: 0.8825813257493279, iteration: 101104
loss: 0.9713715314865112,grad_norm: 0.9048833516445882, iteration: 101105
loss: 0.9841175079345703,grad_norm: 0.9834884183964782, iteration: 101106
loss: 0.9871450066566467,grad_norm: 0.9999995436123046, iteration: 101107
loss: 0.9838675856590271,grad_norm: 0.9999990013248224, iteration: 101108
loss: 1.0201514959335327,grad_norm: 0.9999990675029622, iteration: 101109
loss: 1.0109422206878662,grad_norm: 0.9999991536845385, iteration: 101110
loss: 1.013577938079834,grad_norm: 0.9779080641179758, iteration: 101111
loss: 1.0190856456756592,grad_norm: 0.9999991963797377, iteration: 101112
loss: 1.0113334655761719,grad_norm: 0.9999992301522524, iteration: 101113
loss: 0.9912192225456238,grad_norm: 0.9999989585618644, iteration: 101114
loss: 0.9900655746459961,grad_norm: 0.9679828608988139, iteration: 101115
loss: 1.003380298614502,grad_norm: 0.9999991381876198, iteration: 101116
loss: 0.9877404570579529,grad_norm: 0.891896471669784, iteration: 101117
loss: 0.9600411057472229,grad_norm: 0.9999991177924606, iteration: 101118
loss: 1.00467050075531,grad_norm: 0.9999990053786205, iteration: 101119
loss: 1.0001370906829834,grad_norm: 0.9447562151534483, iteration: 101120
loss: 1.0198681354522705,grad_norm: 0.893817568235382, iteration: 101121
loss: 1.003056526184082,grad_norm: 0.9288368604363354, iteration: 101122
loss: 0.9981451630592346,grad_norm: 0.9654075747297026, iteration: 101123
loss: 0.9680016040802002,grad_norm: 0.8698954302551253, iteration: 101124
loss: 1.003271460533142,grad_norm: 0.8918355157894833, iteration: 101125
loss: 1.0054142475128174,grad_norm: 0.9999990640768169, iteration: 101126
loss: 0.9892075657844543,grad_norm: 0.9999992914639658, iteration: 101127
loss: 1.0038481950759888,grad_norm: 0.9999991293830409, iteration: 101128
loss: 0.9587342739105225,grad_norm: 0.9999990974690335, iteration: 101129
loss: 0.9416124224662781,grad_norm: 0.9161426023755647, iteration: 101130
loss: 1.0176626443862915,grad_norm: 0.9999991331680372, iteration: 101131
loss: 0.9738016724586487,grad_norm: 0.9932648789957541, iteration: 101132
loss: 0.9762327075004578,grad_norm: 0.9557988793980238, iteration: 101133
loss: 0.9711548089981079,grad_norm: 0.9999991881410325, iteration: 101134
loss: 1.0413614511489868,grad_norm: 0.9020788490701283, iteration: 101135
loss: 1.040583848953247,grad_norm: 0.9999990937097459, iteration: 101136
loss: 1.008371114730835,grad_norm: 0.9999993046030253, iteration: 101137
loss: 0.996509850025177,grad_norm: 0.9999991356846468, iteration: 101138
loss: 1.0172250270843506,grad_norm: 0.9667398166736191, iteration: 101139
loss: 0.9435406923294067,grad_norm: 0.9999991666912771, iteration: 101140
loss: 0.9623737335205078,grad_norm: 0.8238938193303034, iteration: 101141
loss: 0.9990443587303162,grad_norm: 0.9999989753253731, iteration: 101142
loss: 0.9953179359436035,grad_norm: 0.9999991816749886, iteration: 101143
loss: 0.979745090007782,grad_norm: 0.9999991165867698, iteration: 101144
loss: 1.0228713750839233,grad_norm: 0.9999992193259588, iteration: 101145
loss: 1.0216608047485352,grad_norm: 0.9999992906799919, iteration: 101146
loss: 1.0082778930664062,grad_norm: 0.9999991130780721, iteration: 101147
loss: 1.025207757949829,grad_norm: 0.9999991453399548, iteration: 101148
loss: 0.9880119562149048,grad_norm: 0.9999990532497579, iteration: 101149
loss: 1.0025509595870972,grad_norm: 0.999999215735935, iteration: 101150
loss: 1.0179907083511353,grad_norm: 0.8033904420894451, iteration: 101151
loss: 1.0117180347442627,grad_norm: 0.9999991028869051, iteration: 101152
loss: 1.028153896331787,grad_norm: 0.9999991266677293, iteration: 101153
loss: 1.0097508430480957,grad_norm: 0.9999989938617163, iteration: 101154
loss: 0.9817891120910645,grad_norm: 0.9999991096072686, iteration: 101155
loss: 1.013928771018982,grad_norm: 0.9869942120600252, iteration: 101156
loss: 0.987880527973175,grad_norm: 0.9784949871920448, iteration: 101157
loss: 0.9668435454368591,grad_norm: 0.9999991495526234, iteration: 101158
loss: 1.0096371173858643,grad_norm: 0.914705746184984, iteration: 101159
loss: 1.019296407699585,grad_norm: 0.9999991521146484, iteration: 101160
loss: 0.9835488200187683,grad_norm: 0.9999992046047113, iteration: 101161
loss: 1.006787896156311,grad_norm: 0.9999996672697096, iteration: 101162
loss: 1.026729941368103,grad_norm: 0.9999992866226179, iteration: 101163
loss: 0.9779488444328308,grad_norm: 0.999999183901294, iteration: 101164
loss: 1.045827031135559,grad_norm: 0.9999991678515538, iteration: 101165
loss: 0.9982151985168457,grad_norm: 0.9999991872318179, iteration: 101166
loss: 1.0220075845718384,grad_norm: 0.9999990544830168, iteration: 101167
loss: 1.004885196685791,grad_norm: 0.9999991573893242, iteration: 101168
loss: 1.0049400329589844,grad_norm: 0.9854684241201712, iteration: 101169
loss: 1.0312683582305908,grad_norm: 0.9685964697343136, iteration: 101170
loss: 1.0208536386489868,grad_norm: 0.9910402277756416, iteration: 101171
loss: 0.9526028633117676,grad_norm: 0.9818080194792759, iteration: 101172
loss: 1.029578447341919,grad_norm: 0.9999992212586218, iteration: 101173
loss: 0.9866792559623718,grad_norm: 0.9988279594817303, iteration: 101174
loss: 1.003345251083374,grad_norm: 0.9145409981878053, iteration: 101175
loss: 0.9988117814064026,grad_norm: 0.9999992310494845, iteration: 101176
loss: 1.016549825668335,grad_norm: 0.7890868818666522, iteration: 101177
loss: 1.005798101425171,grad_norm: 0.9416388524333187, iteration: 101178
loss: 1.0151817798614502,grad_norm: 0.9999991165168167, iteration: 101179
loss: 1.0233222246170044,grad_norm: 0.8890705156116334, iteration: 101180
loss: 1.0107851028442383,grad_norm: 0.9850210966769154, iteration: 101181
loss: 0.9963700175285339,grad_norm: 0.9999991622165505, iteration: 101182
loss: 1.0042020082473755,grad_norm: 0.9999993132421636, iteration: 101183
loss: 0.9704352021217346,grad_norm: 0.999999267076106, iteration: 101184
loss: 0.9724349975585938,grad_norm: 0.9999992261387943, iteration: 101185
loss: 1.0035234689712524,grad_norm: 0.9811389622037472, iteration: 101186
loss: 1.010160207748413,grad_norm: 0.9999990727409571, iteration: 101187
loss: 1.0911813974380493,grad_norm: 0.9999995216010286, iteration: 101188
loss: 0.9996272325515747,grad_norm: 0.9999991253858515, iteration: 101189
loss: 1.000881314277649,grad_norm: 0.9999991474250077, iteration: 101190
loss: 0.9930475950241089,grad_norm: 0.9397910541622078, iteration: 101191
loss: 1.020944595336914,grad_norm: 0.999999165557058, iteration: 101192
loss: 1.014693260192871,grad_norm: 0.9999991039587969, iteration: 101193
loss: 0.9919843077659607,grad_norm: 0.9658500015369519, iteration: 101194
loss: 0.9871878623962402,grad_norm: 0.9999992207007432, iteration: 101195
loss: 0.9910979270935059,grad_norm: 0.9999991359860465, iteration: 101196
loss: 0.9940508604049683,grad_norm: 0.9999991392160802, iteration: 101197
loss: 1.0055657625198364,grad_norm: 0.9999990075026012, iteration: 101198
loss: 0.9869194626808167,grad_norm: 0.915286240136929, iteration: 101199
loss: 1.0053900480270386,grad_norm: 0.9999992941414204, iteration: 101200
loss: 0.9912504553794861,grad_norm: 0.9259466414823665, iteration: 101201
loss: 0.9932101964950562,grad_norm: 0.9999990982380319, iteration: 101202
loss: 1.0147466659545898,grad_norm: 0.9999992438915527, iteration: 101203
loss: 1.0194896459579468,grad_norm: 0.9999990228010397, iteration: 101204
loss: 0.9797283411026001,grad_norm: 0.9034139771265329, iteration: 101205
loss: 1.015273928642273,grad_norm: 0.9999991352271146, iteration: 101206
loss: 1.02353835105896,grad_norm: 0.9999991467998037, iteration: 101207
loss: 0.9828245043754578,grad_norm: 0.9999992640486695, iteration: 101208
loss: 1.005953311920166,grad_norm: 0.999999073183317, iteration: 101209
loss: 1.0471341609954834,grad_norm: 0.9999991533424663, iteration: 101210
loss: 1.0015861988067627,grad_norm: 0.9999991012373483, iteration: 101211
loss: 0.9784866571426392,grad_norm: 0.9999991863138409, iteration: 101212
loss: 1.0172374248504639,grad_norm: 0.9999991565973143, iteration: 101213
loss: 1.002098798751831,grad_norm: 0.8873144766974141, iteration: 101214
loss: 1.000806212425232,grad_norm: 0.9999990027186269, iteration: 101215
loss: 0.9948123693466187,grad_norm: 0.9885235088279857, iteration: 101216
loss: 1.0098226070404053,grad_norm: 0.9999992793363197, iteration: 101217
loss: 1.0287058353424072,grad_norm: 0.999999236830476, iteration: 101218
loss: 1.0389677286148071,grad_norm: 0.9999991207439115, iteration: 101219
loss: 1.0331417322158813,grad_norm: 0.9999992366448381, iteration: 101220
loss: 0.9942018389701843,grad_norm: 0.9999992903253456, iteration: 101221
loss: 0.9796654582023621,grad_norm: 0.9999991608911897, iteration: 101222
loss: 1.0109046697616577,grad_norm: 0.9999990393761062, iteration: 101223
loss: 0.9815925359725952,grad_norm: 0.9999991261177684, iteration: 101224
loss: 1.0269737243652344,grad_norm: 0.9999996511186703, iteration: 101225
loss: 0.9899526238441467,grad_norm: 0.8974467735119107, iteration: 101226
loss: 1.0024853944778442,grad_norm: 0.999999052096468, iteration: 101227
loss: 1.005537748336792,grad_norm: 0.925089076241344, iteration: 101228
loss: 1.0313197374343872,grad_norm: 0.9999991768165635, iteration: 101229
loss: 1.0238746404647827,grad_norm: 0.9999993584424721, iteration: 101230
loss: 0.9844110012054443,grad_norm: 0.8556613657613608, iteration: 101231
loss: 1.001673698425293,grad_norm: 0.9999991293008849, iteration: 101232
loss: 1.0140079259872437,grad_norm: 0.9999990690585372, iteration: 101233
loss: 1.0675889253616333,grad_norm: 0.9007684248350113, iteration: 101234
loss: 1.0328770875930786,grad_norm: 0.9796444984415033, iteration: 101235
loss: 0.9785281419754028,grad_norm: 0.9999990502242686, iteration: 101236
loss: 0.9975709915161133,grad_norm: 0.9999990527768888, iteration: 101237
loss: 1.0064418315887451,grad_norm: 0.9236565888140077, iteration: 101238
loss: 1.0178096294403076,grad_norm: 0.9394519544130354, iteration: 101239
loss: 1.0078039169311523,grad_norm: 0.9396431237269774, iteration: 101240
loss: 0.9805216193199158,grad_norm: 0.9999993035006408, iteration: 101241
loss: 1.0371228456497192,grad_norm: 0.9999991368825787, iteration: 101242
loss: 0.9903377890586853,grad_norm: 0.9999990831454382, iteration: 101243
loss: 1.0088706016540527,grad_norm: 0.9999992183462703, iteration: 101244
loss: 0.9648330211639404,grad_norm: 0.9999991475198574, iteration: 101245
loss: 0.9934412837028503,grad_norm: 0.9999991669967391, iteration: 101246
loss: 0.9879257082939148,grad_norm: 0.9879307470314902, iteration: 101247
loss: 1.0099506378173828,grad_norm: 0.9999991104521384, iteration: 101248
loss: 1.0061570405960083,grad_norm: 0.8641752824470654, iteration: 101249
loss: 1.0075218677520752,grad_norm: 0.9999991464235352, iteration: 101250
loss: 0.99173903465271,grad_norm: 0.9999991278372319, iteration: 101251
loss: 0.9655537009239197,grad_norm: 0.9999991284423555, iteration: 101252
loss: 1.0053085088729858,grad_norm: 0.9999991058439105, iteration: 101253
loss: 0.980593204498291,grad_norm: 0.9999991549708994, iteration: 101254
loss: 0.9978848099708557,grad_norm: 0.9996492704346115, iteration: 101255
loss: 1.0240434408187866,grad_norm: 0.8122218540899413, iteration: 101256
loss: 1.016272783279419,grad_norm: 0.9999991166489192, iteration: 101257
loss: 1.0038907527923584,grad_norm: 0.9999991721901822, iteration: 101258
loss: 0.974923849105835,grad_norm: 0.9515347414342145, iteration: 101259
loss: 0.9833059310913086,grad_norm: 0.9999991274351656, iteration: 101260
loss: 0.9827224016189575,grad_norm: 0.8309429354988038, iteration: 101261
loss: 0.9818079471588135,grad_norm: 0.9999990274684338, iteration: 101262
loss: 0.9915849566459656,grad_norm: 0.8389181434062003, iteration: 101263
loss: 0.983077347278595,grad_norm: 0.9619558681352732, iteration: 101264
loss: 0.9473223090171814,grad_norm: 0.9999992381503116, iteration: 101265
loss: 0.9520358443260193,grad_norm: 0.9999991243635434, iteration: 101266
loss: 1.00386643409729,grad_norm: 0.9999992359673316, iteration: 101267
loss: 1.0125148296356201,grad_norm: 0.9256540900627964, iteration: 101268
loss: 1.0107437372207642,grad_norm: 0.9999990078486795, iteration: 101269
loss: 1.039846658706665,grad_norm: 0.9661759013201932, iteration: 101270
loss: 1.0357656478881836,grad_norm: 0.9999990367428908, iteration: 101271
loss: 1.0048072338104248,grad_norm: 0.9948883826213648, iteration: 101272
loss: 1.0119622945785522,grad_norm: 0.9999992184128403, iteration: 101273
loss: 0.986998438835144,grad_norm: 0.999999023838888, iteration: 101274
loss: 1.0014923810958862,grad_norm: 0.9999990785512228, iteration: 101275
loss: 1.0061391592025757,grad_norm: 0.9999990790462152, iteration: 101276
loss: 0.9852507710456848,grad_norm: 0.9617184560532256, iteration: 101277
loss: 1.0528652667999268,grad_norm: 0.9999992891592602, iteration: 101278
loss: 1.0099215507507324,grad_norm: 0.9779299977280214, iteration: 101279
loss: 0.9747912287712097,grad_norm: 0.9999990793714165, iteration: 101280
loss: 1.0076966285705566,grad_norm: 0.9999990349151082, iteration: 101281
loss: 1.0203502178192139,grad_norm: 0.9999991902678494, iteration: 101282
loss: 0.988271951675415,grad_norm: 0.999999187503192, iteration: 101283
loss: 1.060436725616455,grad_norm: 0.9999998171287476, iteration: 101284
loss: 0.9648693203926086,grad_norm: 0.9999992469109926, iteration: 101285
loss: 1.0028384923934937,grad_norm: 0.9999991173468604, iteration: 101286
loss: 1.0405882596969604,grad_norm: 0.9999989630372187, iteration: 101287
loss: 1.039795994758606,grad_norm: 0.999999257366305, iteration: 101288
loss: 1.032203197479248,grad_norm: 0.9706240485955218, iteration: 101289
loss: 1.0011215209960938,grad_norm: 0.9999990995553927, iteration: 101290
loss: 0.9607245326042175,grad_norm: 0.999998994863036, iteration: 101291
loss: 1.010733962059021,grad_norm: 0.9860633232494295, iteration: 101292
loss: 1.0139853954315186,grad_norm: 0.9999995090850196, iteration: 101293
loss: 0.9881657361984253,grad_norm: 0.9999998099305899, iteration: 101294
loss: 0.9983758926391602,grad_norm: 0.935471732273617, iteration: 101295
loss: 1.0404384136199951,grad_norm: 0.9999992798805404, iteration: 101296
loss: 0.970870852470398,grad_norm: 0.9532232332513364, iteration: 101297
loss: 0.9984124898910522,grad_norm: 0.9999991446013443, iteration: 101298
loss: 1.2385615110397339,grad_norm: 1.0000000152149002, iteration: 101299
loss: 0.9990084767341614,grad_norm: 0.9999993584025308, iteration: 101300
loss: 0.9863218069076538,grad_norm: 0.9999989859211155, iteration: 101301
loss: 1.0246833562850952,grad_norm: 0.9667787475291291, iteration: 101302
loss: 1.0131834745407104,grad_norm: 0.9999999280945889, iteration: 101303
loss: 0.994271993637085,grad_norm: 0.9999990763105386, iteration: 101304
loss: 1.083264946937561,grad_norm: 0.9999993195364724, iteration: 101305
loss: 1.0055170059204102,grad_norm: 0.999999262058995, iteration: 101306
loss: 0.9724310636520386,grad_norm: 0.943377429642762, iteration: 101307
loss: 1.0419577360153198,grad_norm: 0.9999993465609149, iteration: 101308
loss: 0.9859268665313721,grad_norm: 0.9202873575823565, iteration: 101309
loss: 1.036287546157837,grad_norm: 0.984593137667888, iteration: 101310
loss: 0.9817017912864685,grad_norm: 0.9999991929399256, iteration: 101311
loss: 1.0318655967712402,grad_norm: 0.9999992604667013, iteration: 101312
loss: 0.9865367412567139,grad_norm: 0.9999992666694401, iteration: 101313
loss: 1.0259267091751099,grad_norm: 0.9412188526355844, iteration: 101314
loss: 0.9924556612968445,grad_norm: 0.9673189937616798, iteration: 101315
loss: 1.0283970832824707,grad_norm: 0.9999993072179587, iteration: 101316
loss: 0.984041690826416,grad_norm: 0.9999990212912918, iteration: 101317
loss: 1.0114113092422485,grad_norm: 0.9999993124326406, iteration: 101318
loss: 1.0266876220703125,grad_norm: 0.9432151539043804, iteration: 101319
loss: 1.0259045362472534,grad_norm: 0.9683430449781005, iteration: 101320
loss: 1.0316133499145508,grad_norm: 0.9999992329174274, iteration: 101321
loss: 1.0209954977035522,grad_norm: 0.9999992504944184, iteration: 101322
loss: 1.0072938203811646,grad_norm: 0.9964235207216096, iteration: 101323
loss: 1.1386768817901611,grad_norm: 0.9999998468563281, iteration: 101324
loss: 0.9399824142456055,grad_norm: 0.9999990462074393, iteration: 101325
loss: 1.0074232816696167,grad_norm: 0.9999993358923468, iteration: 101326
loss: 1.0037299394607544,grad_norm: 0.9999990710108861, iteration: 101327
loss: 0.9926179647445679,grad_norm: 0.999999346519651, iteration: 101328
loss: 0.9907265901565552,grad_norm: 0.9999991508662138, iteration: 101329
loss: 1.0384780168533325,grad_norm: 0.9999999343914475, iteration: 101330
loss: 1.109696626663208,grad_norm: 0.9999998303662394, iteration: 101331
loss: 1.0231356620788574,grad_norm: 0.9999991966852151, iteration: 101332
loss: 0.9393468499183655,grad_norm: 0.9999990665030716, iteration: 101333
loss: 0.9709108471870422,grad_norm: 0.9999992706531455, iteration: 101334
loss: 0.9849562048912048,grad_norm: 0.9649651885905963, iteration: 101335
loss: 0.9793716073036194,grad_norm: 0.9530856815917317, iteration: 101336
loss: 1.0121439695358276,grad_norm: 0.9999994201684367, iteration: 101337
loss: 1.031184434890747,grad_norm: 0.9999990136640526, iteration: 101338
loss: 0.9840038418769836,grad_norm: 0.9161065692105087, iteration: 101339
loss: 1.013285756111145,grad_norm: 0.9999991451247773, iteration: 101340
loss: 1.0250457525253296,grad_norm: 0.9999998771477895, iteration: 101341
loss: 1.0188955068588257,grad_norm: 0.999999308958589, iteration: 101342
loss: 1.01020085811615,grad_norm: 0.999999179136325, iteration: 101343
loss: 1.016562581062317,grad_norm: 0.9959443713375384, iteration: 101344
loss: 1.0071457624435425,grad_norm: 0.999999011987062, iteration: 101345
loss: 1.0041203498840332,grad_norm: 0.9999992572317983, iteration: 101346
loss: 0.9809304475784302,grad_norm: 0.9999990470525296, iteration: 101347
loss: 1.0008043050765991,grad_norm: 0.9966626464663589, iteration: 101348
loss: 1.1086935997009277,grad_norm: 0.9999994871491092, iteration: 101349
loss: 0.9976829886436462,grad_norm: 0.9999991835554084, iteration: 101350
loss: 1.0185432434082031,grad_norm: 0.9999990938278245, iteration: 101351
loss: 0.9745694994926453,grad_norm: 0.999999120213794, iteration: 101352
loss: 0.9824784398078918,grad_norm: 0.8841975847142793, iteration: 101353
loss: 0.995071530342102,grad_norm: 0.9999989807042426, iteration: 101354
loss: 0.9986010789871216,grad_norm: 0.9999991660694337, iteration: 101355
loss: 0.9965848922729492,grad_norm: 0.9999989357864622, iteration: 101356
loss: 1.0529308319091797,grad_norm: 0.9999991227066451, iteration: 101357
loss: 1.0146880149841309,grad_norm: 0.9999990953981703, iteration: 101358
loss: 1.0174875259399414,grad_norm: 0.9999990382915397, iteration: 101359
loss: 1.0277127027511597,grad_norm: 0.9999992400052004, iteration: 101360
loss: 0.9642482995986938,grad_norm: 0.9358194219293501, iteration: 101361
loss: 0.9935535788536072,grad_norm: 0.8805597974428016, iteration: 101362
loss: 0.9979960918426514,grad_norm: 0.9196782220435761, iteration: 101363
loss: 0.9987303018569946,grad_norm: 0.9999989919765947, iteration: 101364
loss: 1.021695613861084,grad_norm: 0.9468621832225781, iteration: 101365
loss: 1.0078070163726807,grad_norm: 0.9999991846722716, iteration: 101366
loss: 0.9985228180885315,grad_norm: 0.9999991144597059, iteration: 101367
loss: 1.0000724792480469,grad_norm: 0.9999990749764021, iteration: 101368
loss: 1.0119818449020386,grad_norm: 0.9999991758152103, iteration: 101369
loss: 1.0200614929199219,grad_norm: 0.9260102556516676, iteration: 101370
loss: 0.9722058176994324,grad_norm: 0.9999994865921138, iteration: 101371
loss: 0.9708228707313538,grad_norm: 0.9596057608687301, iteration: 101372
loss: 1.0288968086242676,grad_norm: 0.9999992451941889, iteration: 101373
loss: 1.0006695985794067,grad_norm: 0.9999992101380282, iteration: 101374
loss: 0.9748911261558533,grad_norm: 0.999999062148625, iteration: 101375
loss: 0.9949216246604919,grad_norm: 0.9927494994920022, iteration: 101376
loss: 1.038620114326477,grad_norm: 0.9999990594274905, iteration: 101377
loss: 0.9899930953979492,grad_norm: 0.9999991375822933, iteration: 101378
loss: 1.0463321208953857,grad_norm: 0.9999993399237876, iteration: 101379
loss: 1.0087890625,grad_norm: 0.9999990513709067, iteration: 101380
loss: 0.9693864583969116,grad_norm: 0.9999992222830427, iteration: 101381
loss: 0.9875451326370239,grad_norm: 0.9999990745934236, iteration: 101382
loss: 0.9754005670547485,grad_norm: 0.9928732748609587, iteration: 101383
loss: 0.989582896232605,grad_norm: 0.9999992525082307, iteration: 101384
loss: 1.126229166984558,grad_norm: 0.9999996414438235, iteration: 101385
loss: 1.021449089050293,grad_norm: 0.9999992758060368, iteration: 101386
loss: 1.2465885877609253,grad_norm: 0.9999994796125187, iteration: 101387
loss: 1.0472960472106934,grad_norm: 0.9999997107605257, iteration: 101388
loss: 0.9794168472290039,grad_norm: 0.9538666523025439, iteration: 101389
loss: 1.0347979068756104,grad_norm: 0.999999114912879, iteration: 101390
loss: 0.9969722628593445,grad_norm: 0.9861414459057968, iteration: 101391
loss: 1.0310667753219604,grad_norm: 0.9999991570969072, iteration: 101392
loss: 1.0094670057296753,grad_norm: 0.999999207038637, iteration: 101393
loss: 1.0404144525527954,grad_norm: 0.9999992096498587, iteration: 101394
loss: 1.0081983804702759,grad_norm: 0.9999996902069149, iteration: 101395
loss: 0.9760165810585022,grad_norm: 0.9999991085530069, iteration: 101396
loss: 0.9941433668136597,grad_norm: 0.9999996197385947, iteration: 101397
loss: 0.9927783608436584,grad_norm: 0.962054767134649, iteration: 101398
loss: 1.0013115406036377,grad_norm: 0.9999991569240758, iteration: 101399
loss: 0.9859594702720642,grad_norm: 0.999999135303901, iteration: 101400
loss: 1.0149444341659546,grad_norm: 0.9999996516898936, iteration: 101401
loss: 0.9997146725654602,grad_norm: 0.9999990957850579, iteration: 101402
loss: 1.0012013912200928,grad_norm: 0.9999993176536349, iteration: 101403
loss: 1.0003799200057983,grad_norm: 0.9809994313908881, iteration: 101404
loss: 1.0099974870681763,grad_norm: 0.97514667015132, iteration: 101405
loss: 0.9916211366653442,grad_norm: 0.9999989497794004, iteration: 101406
loss: 1.0051076412200928,grad_norm: 0.9290148706096462, iteration: 101407
loss: 1.0184645652770996,grad_norm: 0.9966654253124708, iteration: 101408
loss: 1.020458698272705,grad_norm: 0.9999992088433344, iteration: 101409
loss: 1.0199285745620728,grad_norm: 0.999999186037063, iteration: 101410
loss: 1.0452333688735962,grad_norm: 0.9999992630696762, iteration: 101411
loss: 1.046116590499878,grad_norm: 0.9999990060895526, iteration: 101412
loss: 1.000036597251892,grad_norm: 0.9999990428081358, iteration: 101413
loss: 0.9955214858055115,grad_norm: 0.9999991288635496, iteration: 101414
loss: 0.9903994202613831,grad_norm: 0.9999990690785081, iteration: 101415
loss: 0.9989806413650513,grad_norm: 0.9750375062254415, iteration: 101416
loss: 1.0045503377914429,grad_norm: 0.999999142926045, iteration: 101417
loss: 1.0746692419052124,grad_norm: 0.9999996153236999, iteration: 101418
loss: 1.0059928894042969,grad_norm: 0.9999991251404843, iteration: 101419
loss: 0.9821826815605164,grad_norm: 0.9708655511511988, iteration: 101420
loss: 1.0006439685821533,grad_norm: 0.9999989370636928, iteration: 101421
loss: 1.0032788515090942,grad_norm: 0.9883520202064636, iteration: 101422
loss: 1.04116690158844,grad_norm: 0.9775582162157563, iteration: 101423
loss: 1.0238221883773804,grad_norm: 0.9999991191769968, iteration: 101424
loss: 1.0049067735671997,grad_norm: 0.9999990114579105, iteration: 101425
loss: 0.9599367380142212,grad_norm: 0.99999910733763, iteration: 101426
loss: 0.9738764762878418,grad_norm: 0.9999991668765462, iteration: 101427
loss: 0.9915177822113037,grad_norm: 0.9999991481170134, iteration: 101428
loss: 0.9977070093154907,grad_norm: 0.9187452869218721, iteration: 101429
loss: 1.0549911260604858,grad_norm: 0.9999992773350685, iteration: 101430
loss: 1.0000731945037842,grad_norm: 0.9999992239950243, iteration: 101431
loss: 1.0111428499221802,grad_norm: 0.9575806416873901, iteration: 101432
loss: 1.0186185836791992,grad_norm: 0.9762577183632333, iteration: 101433
loss: 1.0473819971084595,grad_norm: 0.9999993045440574, iteration: 101434
loss: 0.9974998235702515,grad_norm: 0.9999990765175959, iteration: 101435
loss: 0.9876816868782043,grad_norm: 0.999999297545306, iteration: 101436
loss: 0.976123034954071,grad_norm: 0.999999148465434, iteration: 101437
loss: 0.9763239622116089,grad_norm: 0.9999991316683404, iteration: 101438
loss: 1.0010508298873901,grad_norm: 0.9999992848990382, iteration: 101439
loss: 0.9750946164131165,grad_norm: 0.9999991130480212, iteration: 101440
loss: 0.9968053102493286,grad_norm: 0.8717244323073222, iteration: 101441
loss: 0.9773244857788086,grad_norm: 0.9999990331461946, iteration: 101442
loss: 0.9981529116630554,grad_norm: 0.9041933218796812, iteration: 101443
loss: 1.0787456035614014,grad_norm: 0.9999992618347211, iteration: 101444
loss: 0.9901602864265442,grad_norm: 0.9999990348630776, iteration: 101445
loss: 0.993181049823761,grad_norm: 0.9999989972151249, iteration: 101446
loss: 0.9875057339668274,grad_norm: 0.9999991574281623, iteration: 101447
loss: 0.9968958497047424,grad_norm: 0.9806709082285148, iteration: 101448
loss: 1.0283870697021484,grad_norm: 0.9999989512463744, iteration: 101449
loss: 1.0293715000152588,grad_norm: 0.8776120993976142, iteration: 101450
loss: 1.0118169784545898,grad_norm: 0.9999990837234617, iteration: 101451
loss: 1.0152149200439453,grad_norm: 0.9999992555963626, iteration: 101452
loss: 0.9963402152061462,grad_norm: 0.9643590935192566, iteration: 101453
loss: 1.021726131439209,grad_norm: 0.999999116799867, iteration: 101454
loss: 0.9662712216377258,grad_norm: 0.9987125888108217, iteration: 101455
loss: 1.0035473108291626,grad_norm: 0.9748036903853229, iteration: 101456
loss: 0.9665595293045044,grad_norm: 0.9999991175020135, iteration: 101457
loss: 1.0065733194351196,grad_norm: 0.9999998441998273, iteration: 101458
loss: 0.9527274370193481,grad_norm: 0.9999991527849449, iteration: 101459
loss: 0.9857890605926514,grad_norm: 0.8956833628051037, iteration: 101460
loss: 1.0159475803375244,grad_norm: 0.9999992648811002, iteration: 101461
loss: 0.975609540939331,grad_norm: 0.9999990709268046, iteration: 101462
loss: 0.9555748701095581,grad_norm: 0.9742280406315047, iteration: 101463
loss: 1.0072472095489502,grad_norm: 0.999999157323878, iteration: 101464
loss: 0.9790142774581909,grad_norm: 0.9999993472317438, iteration: 101465
loss: 0.9920399188995361,grad_norm: 0.9622802343627329, iteration: 101466
loss: 0.9979254603385925,grad_norm: 0.9999992177283589, iteration: 101467
loss: 1.0436842441558838,grad_norm: 0.9999996625988802, iteration: 101468
loss: 1.0166890621185303,grad_norm: 0.9999991880195065, iteration: 101469
loss: 0.9733118414878845,grad_norm: 0.9999992344089689, iteration: 101470
loss: 1.0273686647415161,grad_norm: 0.9619461714889866, iteration: 101471
loss: 1.0029168128967285,grad_norm: 0.9204096550412039, iteration: 101472
loss: 1.0064154863357544,grad_norm: 0.9999990101362872, iteration: 101473
loss: 1.0001745223999023,grad_norm: 0.9999989643745907, iteration: 101474
loss: 1.0264058113098145,grad_norm: 0.9999990141452281, iteration: 101475
loss: 1.0073376893997192,grad_norm: 0.999999191722053, iteration: 101476
loss: 0.9874809980392456,grad_norm: 0.9999991037665109, iteration: 101477
loss: 0.9880794882774353,grad_norm: 0.9514873634834269, iteration: 101478
loss: 1.0153850317001343,grad_norm: 0.9034851868972009, iteration: 101479
loss: 0.967894971370697,grad_norm: 0.9999991570483622, iteration: 101480
loss: 1.027345895767212,grad_norm: 0.9999992430849732, iteration: 101481
loss: 0.9740081429481506,grad_norm: 0.9999991166493363, iteration: 101482
loss: 0.9948084950447083,grad_norm: 0.9999991236757376, iteration: 101483
loss: 1.0232287645339966,grad_norm: 0.9073274026062549, iteration: 101484
loss: 1.0199271440505981,grad_norm: 0.9864911318496347, iteration: 101485
loss: 0.9811534285545349,grad_norm: 0.9999990308884937, iteration: 101486
loss: 1.026506781578064,grad_norm: 0.9999991027567331, iteration: 101487
loss: 1.0134822130203247,grad_norm: 0.9999992682616258, iteration: 101488
loss: 0.9932116270065308,grad_norm: 0.9993636569654559, iteration: 101489
loss: 1.0291334390640259,grad_norm: 0.9999991360160057, iteration: 101490
loss: 0.9890011548995972,grad_norm: 0.9999989631010732, iteration: 101491
loss: 0.9853352308273315,grad_norm: 0.9999991007521981, iteration: 101492
loss: 1.0551906824111938,grad_norm: 0.9999993922466291, iteration: 101493
loss: 1.0060136318206787,grad_norm: 0.9999991083236519, iteration: 101494
loss: 1.0060226917266846,grad_norm: 0.9999990782509645, iteration: 101495
loss: 0.9927024245262146,grad_norm: 0.8543529133932373, iteration: 101496
loss: 1.0550060272216797,grad_norm: 0.9300496323554238, iteration: 101497
loss: 0.9778395891189575,grad_norm: 0.999999039796452, iteration: 101498
loss: 0.9877128601074219,grad_norm: 0.9999992519179962, iteration: 101499
loss: 1.0167263746261597,grad_norm: 0.9999992323206309, iteration: 101500
loss: 0.9604099988937378,grad_norm: 0.9449571608948941, iteration: 101501
loss: 1.019443392753601,grad_norm: 0.9878172554077381, iteration: 101502
loss: 0.9730639457702637,grad_norm: 0.9999990797552698, iteration: 101503
loss: 0.9462327361106873,grad_norm: 0.9999990350959488, iteration: 101504
loss: 0.9654040336608887,grad_norm: 0.9999990765144591, iteration: 101505
loss: 1.026512861251831,grad_norm: 0.8084447134426073, iteration: 101506
loss: 0.9546328186988831,grad_norm: 0.9999990109198563, iteration: 101507
loss: 1.033873200416565,grad_norm: 0.9999995733605771, iteration: 101508
loss: 1.0426007509231567,grad_norm: 0.9999996375221512, iteration: 101509
loss: 1.0088491439819336,grad_norm: 0.9999990852528795, iteration: 101510
loss: 0.9804041385650635,grad_norm: 0.9999991363825839, iteration: 101511
loss: 0.9686437249183655,grad_norm: 0.9999992305794841, iteration: 101512
loss: 0.9870484471321106,grad_norm: 0.9338511304448016, iteration: 101513
loss: 1.0125675201416016,grad_norm: 0.9851345187703409, iteration: 101514
loss: 1.0058603286743164,grad_norm: 0.9145585897604112, iteration: 101515
loss: 0.9821125864982605,grad_norm: 0.9999991874461304, iteration: 101516
loss: 1.0020976066589355,grad_norm: 0.9944033529532506, iteration: 101517
loss: 0.9541658163070679,grad_norm: 0.9999991795813843, iteration: 101518
loss: 1.020960807800293,grad_norm: 0.9973822285757342, iteration: 101519
loss: 0.9504404664039612,grad_norm: 0.9999992291405199, iteration: 101520
loss: 0.9621538519859314,grad_norm: 0.8546000426199205, iteration: 101521
loss: 1.0429133176803589,grad_norm: 0.9999991285263219, iteration: 101522
loss: 0.9593068957328796,grad_norm: 0.9999992242560442, iteration: 101523
loss: 1.0387393236160278,grad_norm: 0.9999991583667761, iteration: 101524
loss: 1.0224794149398804,grad_norm: 0.9999990927723492, iteration: 101525
loss: 0.9527748823165894,grad_norm: 0.999999001311734, iteration: 101526
loss: 0.9630300998687744,grad_norm: 0.9999991471214618, iteration: 101527
loss: 1.032724142074585,grad_norm: 0.8611120763267875, iteration: 101528
loss: 1.0113540887832642,grad_norm: 0.999999048034831, iteration: 101529
loss: 0.9587774872779846,grad_norm: 0.9999991735831625, iteration: 101530
loss: 0.9803523421287537,grad_norm: 0.9999993031846792, iteration: 101531
loss: 1.003139853477478,grad_norm: 0.9999992549788568, iteration: 101532
loss: 1.0222917795181274,grad_norm: 0.9999991718971544, iteration: 101533
loss: 1.0063022375106812,grad_norm: 0.9224133886032198, iteration: 101534
loss: 1.0035778284072876,grad_norm: 0.9633789173525299, iteration: 101535
loss: 1.0233558416366577,grad_norm: 0.9999991236779437, iteration: 101536
loss: 0.9878787398338318,grad_norm: 0.8937307351452792, iteration: 101537
loss: 1.0042046308517456,grad_norm: 0.9999993391557235, iteration: 101538
loss: 0.994600772857666,grad_norm: 0.9999991535609836, iteration: 101539
loss: 0.9535821080207825,grad_norm: 0.9999992634697152, iteration: 101540
loss: 1.0024142265319824,grad_norm: 0.9999991656333224, iteration: 101541
loss: 1.0444477796554565,grad_norm: 0.9999991472320128, iteration: 101542
loss: 0.999051034450531,grad_norm: 0.9181174479816718, iteration: 101543
loss: 1.002760648727417,grad_norm: 0.8996493979775491, iteration: 101544
loss: 0.9990553259849548,grad_norm: 0.9999991967592892, iteration: 101545
loss: 1.0146358013153076,grad_norm: 0.999999134965177, iteration: 101546
loss: 1.007369875907898,grad_norm: 0.9999990272952994, iteration: 101547
loss: 0.9674977660179138,grad_norm: 0.9999990300699177, iteration: 101548
loss: 1.0051401853561401,grad_norm: 0.9999990386474582, iteration: 101549
loss: 0.9682130217552185,grad_norm: 0.9999992704936475, iteration: 101550
loss: 0.983122706413269,grad_norm: 0.9999990515963099, iteration: 101551
loss: 1.0058884620666504,grad_norm: 0.9999992096816839, iteration: 101552
loss: 1.0002669095993042,grad_norm: 0.8978602882585044, iteration: 101553
loss: 0.9956104755401611,grad_norm: 0.999999220343657, iteration: 101554
loss: 0.9949795007705688,grad_norm: 0.9999991150536965, iteration: 101555
loss: 0.9685285687446594,grad_norm: 0.971038443750789, iteration: 101556
loss: 0.9769273400306702,grad_norm: 0.9999992400471667, iteration: 101557
loss: 1.0240336656570435,grad_norm: 0.9999992544264704, iteration: 101558
loss: 0.9685655236244202,grad_norm: 0.9999992320172602, iteration: 101559
loss: 1.037487268447876,grad_norm: 0.9957403363960977, iteration: 101560
loss: 1.0219172239303589,grad_norm: 0.9999989849401288, iteration: 101561
loss: 0.978753924369812,grad_norm: 0.9999991811020316, iteration: 101562
loss: 1.0072509050369263,grad_norm: 0.9999990753178493, iteration: 101563
loss: 1.0048481225967407,grad_norm: 0.9999991784781609, iteration: 101564
loss: 0.986386239528656,grad_norm: 0.9999992779028605, iteration: 101565
loss: 1.0125221014022827,grad_norm: 0.9867014580921457, iteration: 101566
loss: 0.9733237028121948,grad_norm: 0.9999990175874912, iteration: 101567
loss: 1.011668086051941,grad_norm: 0.9999992100611304, iteration: 101568
loss: 1.0013600587844849,grad_norm: 0.9999990552923496, iteration: 101569
loss: 0.9829698801040649,grad_norm: 0.9999991916405543, iteration: 101570
loss: 0.995356023311615,grad_norm: 0.8930040872439623, iteration: 101571
loss: 0.9984198212623596,grad_norm: 0.9999995200755977, iteration: 101572
loss: 1.0056570768356323,grad_norm: 0.9870180913949543, iteration: 101573
loss: 1.0053399801254272,grad_norm: 0.9999991285464457, iteration: 101574
loss: 1.0250755548477173,grad_norm: 0.8828269425867308, iteration: 101575
loss: 1.0076351165771484,grad_norm: 0.9999991832788019, iteration: 101576
loss: 1.0302417278289795,grad_norm: 0.9999996640118683, iteration: 101577
loss: 1.0827587842941284,grad_norm: 0.9999996315130627, iteration: 101578
loss: 0.9897984266281128,grad_norm: 0.9999990640970892, iteration: 101579
loss: 1.008811593055725,grad_norm: 0.9999990283368398, iteration: 101580
loss: 1.0623494386672974,grad_norm: 0.9999995930223022, iteration: 101581
loss: 0.9883056282997131,grad_norm: 0.9999991455518455, iteration: 101582
loss: 1.0024986267089844,grad_norm: 0.9999990902804019, iteration: 101583
loss: 0.9762964844703674,grad_norm: 0.9999991647574349, iteration: 101584
loss: 1.0920261144638062,grad_norm: 0.9999999197062245, iteration: 101585
loss: 0.9837487936019897,grad_norm: 0.9292590806688971, iteration: 101586
loss: 0.9990432262420654,grad_norm: 0.9999990947673867, iteration: 101587
loss: 1.0218653678894043,grad_norm: 0.9999989840280337, iteration: 101588
loss: 0.9666359424591064,grad_norm: 0.9999991202517494, iteration: 101589
loss: 0.9789130687713623,grad_norm: 0.9999992409228022, iteration: 101590
loss: 1.0018950700759888,grad_norm: 0.9999992889792682, iteration: 101591
loss: 0.989356279373169,grad_norm: 0.9999991687278548, iteration: 101592
loss: 0.9604199528694153,grad_norm: 0.9622695051815366, iteration: 101593
loss: 1.0365766286849976,grad_norm: 0.9999994520514186, iteration: 101594
loss: 1.0134656429290771,grad_norm: 0.9999992490505555, iteration: 101595
loss: 0.9735293984413147,grad_norm: 0.9999991173961688, iteration: 101596
loss: 0.9923868775367737,grad_norm: 0.9730499284630292, iteration: 101597
loss: 1.0274899005889893,grad_norm: 0.9999991020546775, iteration: 101598
loss: 0.9693660736083984,grad_norm: 0.9999990680601658, iteration: 101599
loss: 1.0002816915512085,grad_norm: 0.9999993995649246, iteration: 101600
loss: 0.9948888421058655,grad_norm: 0.9999991219036486, iteration: 101601
loss: 1.006203055381775,grad_norm: 0.9999990246947147, iteration: 101602
loss: 0.9865686893463135,grad_norm: 0.9999991934350023, iteration: 101603
loss: 0.979654848575592,grad_norm: 0.9999990268969593, iteration: 101604
loss: 0.9674928188323975,grad_norm: 0.9999991012969871, iteration: 101605
loss: 0.975766122341156,grad_norm: 0.9999991058258711, iteration: 101606
loss: 1.0311299562454224,grad_norm: 0.9999992494900379, iteration: 101607
loss: 0.9975883364677429,grad_norm: 0.8640284946446785, iteration: 101608
loss: 1.0316678285598755,grad_norm: 0.9999990854972449, iteration: 101609
loss: 1.0247448682785034,grad_norm: 0.9999996670832076, iteration: 101610
loss: 0.9739723205566406,grad_norm: 0.9980806477055774, iteration: 101611
loss: 0.9748570919036865,grad_norm: 0.9999991308426434, iteration: 101612
loss: 0.9582173824310303,grad_norm: 0.9039320904095106, iteration: 101613
loss: 0.9678696393966675,grad_norm: 0.9999992116734886, iteration: 101614
loss: 0.9739698767662048,grad_norm: 0.9999989285317415, iteration: 101615
loss: 1.0168368816375732,grad_norm: 0.9999992608135657, iteration: 101616
loss: 1.0301587581634521,grad_norm: 0.9999990446840629, iteration: 101617
loss: 1.0075949430465698,grad_norm: 0.9999989919968009, iteration: 101618
loss: 0.9746966361999512,grad_norm: 0.9999991698380942, iteration: 101619
loss: 0.9836006760597229,grad_norm: 0.8445752681501981, iteration: 101620
loss: 1.0027726888656616,grad_norm: 0.9999989970235933, iteration: 101621
loss: 0.978393018245697,grad_norm: 0.9999990515601629, iteration: 101622
loss: 0.9889956712722778,grad_norm: 0.9999993147559805, iteration: 101623
loss: 0.954804539680481,grad_norm: 0.8737670204854419, iteration: 101624
loss: 1.0056792497634888,grad_norm: 0.9322636934350995, iteration: 101625
loss: 0.9947808384895325,grad_norm: 0.9999991389214028, iteration: 101626
loss: 0.9866759777069092,grad_norm: 0.9999994063259247, iteration: 101627
loss: 1.0434695482254028,grad_norm: 0.9549330174130704, iteration: 101628
loss: 1.0207021236419678,grad_norm: 0.9999992760486552, iteration: 101629
loss: 1.01505446434021,grad_norm: 0.9999991813122279, iteration: 101630
loss: 0.9847068190574646,grad_norm: 0.9508165926349131, iteration: 101631
loss: 1.0354424715042114,grad_norm: 0.9813142965662481, iteration: 101632
loss: 1.0200952291488647,grad_norm: 0.8540307468072532, iteration: 101633
loss: 1.0162322521209717,grad_norm: 0.9999991833929331, iteration: 101634
loss: 1.0235555171966553,grad_norm: 0.9748187251717347, iteration: 101635
loss: 1.0070977210998535,grad_norm: 0.9999992409615045, iteration: 101636
loss: 0.9769899249076843,grad_norm: 0.9999991594662933, iteration: 101637
loss: 0.9757223129272461,grad_norm: 0.8395908063158978, iteration: 101638
loss: 0.9900005459785461,grad_norm: 0.8364965564104997, iteration: 101639
loss: 1.030745267868042,grad_norm: 0.9999990825772271, iteration: 101640
loss: 0.9741438031196594,grad_norm: 0.8926426318446162, iteration: 101641
loss: 1.0024960041046143,grad_norm: 0.9892771496463769, iteration: 101642
loss: 1.0117844343185425,grad_norm: 0.9286547330406678, iteration: 101643
loss: 0.9801433086395264,grad_norm: 0.8263198307391284, iteration: 101644
loss: 0.9808405637741089,grad_norm: 0.9980004279322476, iteration: 101645
loss: 0.9985809922218323,grad_norm: 0.999998932511225, iteration: 101646
loss: 0.9881495237350464,grad_norm: 0.999999098540069, iteration: 101647
loss: 1.0140390396118164,grad_norm: 0.9999990427621479, iteration: 101648
loss: 1.003371000289917,grad_norm: 0.9999996719770172, iteration: 101649
loss: 1.021294116973877,grad_norm: 0.999999317112411, iteration: 101650
loss: 0.9991825819015503,grad_norm: 0.9167551563587868, iteration: 101651
loss: 0.9841925501823425,grad_norm: 0.9443949088918404, iteration: 101652
loss: 0.996105968952179,grad_norm: 0.9999991394590924, iteration: 101653
loss: 0.9888505935668945,grad_norm: 0.9999992202103455, iteration: 101654
loss: 0.9905012249946594,grad_norm: 0.9770088755368569, iteration: 101655
loss: 1.0159857273101807,grad_norm: 0.9999990767713229, iteration: 101656
loss: 1.0090374946594238,grad_norm: 0.9999990714090271, iteration: 101657
loss: 1.0034879446029663,grad_norm: 0.9896312409376518, iteration: 101658
loss: 1.011453628540039,grad_norm: 0.9999990601128995, iteration: 101659
loss: 0.9427267909049988,grad_norm: 0.9181987886277075, iteration: 101660
loss: 1.0323387384414673,grad_norm: 0.8967905230227167, iteration: 101661
loss: 0.9965338110923767,grad_norm: 0.9236595777320834, iteration: 101662
loss: 1.0156728029251099,grad_norm: 0.9999989752808094, iteration: 101663
loss: 1.0124537944793701,grad_norm: 0.9999990666182595, iteration: 101664
loss: 1.0173683166503906,grad_norm: 0.9999990619025495, iteration: 101665
loss: 0.9928238391876221,grad_norm: 0.9999991528367432, iteration: 101666
loss: 1.0383330583572388,grad_norm: 0.9999992768433913, iteration: 101667
loss: 0.9757294654846191,grad_norm: 0.9999991502482531, iteration: 101668
loss: 1.0409022569656372,grad_norm: 0.9999991463109628, iteration: 101669
loss: 0.9635418653488159,grad_norm: 0.9999990164812259, iteration: 101670
loss: 0.9698278903961182,grad_norm: 0.9999990463230991, iteration: 101671
loss: 0.9792492389678955,grad_norm: 0.9999990909241262, iteration: 101672
loss: 1.0261062383651733,grad_norm: 0.9999999096512752, iteration: 101673
loss: 1.007272481918335,grad_norm: 0.9674719590380237, iteration: 101674
loss: 0.9903042912483215,grad_norm: 0.9999991546771547, iteration: 101675
loss: 1.0411564111709595,grad_norm: 0.9999991988221603, iteration: 101676
loss: 1.0015597343444824,grad_norm: 0.9999990768952796, iteration: 101677
loss: 0.957048237323761,grad_norm: 0.9999991525340866, iteration: 101678
loss: 0.9857763648033142,grad_norm: 0.895910043698342, iteration: 101679
loss: 1.0320287942886353,grad_norm: 0.9999991199402902, iteration: 101680
loss: 0.9924031496047974,grad_norm: 0.9999991137493326, iteration: 101681
loss: 1.011016607284546,grad_norm: 0.9999992005729402, iteration: 101682
loss: 0.9678623080253601,grad_norm: 0.9999991819058681, iteration: 101683
loss: 1.0225101709365845,grad_norm: 0.9999992087236225, iteration: 101684
loss: 1.0054346323013306,grad_norm: 0.9999993242593652, iteration: 101685
loss: 0.9967806935310364,grad_norm: 0.9999990241288927, iteration: 101686
loss: 1.0086053609848022,grad_norm: 0.9795723983397854, iteration: 101687
loss: 0.9933534264564514,grad_norm: 0.9999991797709593, iteration: 101688
loss: 0.993719756603241,grad_norm: 0.9999991163922254, iteration: 101689
loss: 0.981261134147644,grad_norm: 0.9999991773285536, iteration: 101690
loss: 0.9790160655975342,grad_norm: 0.9696805312771335, iteration: 101691
loss: 0.9748579859733582,grad_norm: 0.9402713236620582, iteration: 101692
loss: 0.994234561920166,grad_norm: 0.9999992569019046, iteration: 101693
loss: 0.9711408019065857,grad_norm: 0.9999991042038407, iteration: 101694
loss: 0.9805171489715576,grad_norm: 0.9999993166336635, iteration: 101695
loss: 1.0058789253234863,grad_norm: 0.9181366821603245, iteration: 101696
loss: 0.9797125458717346,grad_norm: 0.9999993544310253, iteration: 101697
loss: 1.1514161825180054,grad_norm: 0.9999993323311689, iteration: 101698
loss: 1.0144754648208618,grad_norm: 0.9999991554130727, iteration: 101699
loss: 1.0004668235778809,grad_norm: 0.9965302821852382, iteration: 101700
loss: 0.9968256950378418,grad_norm: 0.914749508561236, iteration: 101701
loss: 1.014013409614563,grad_norm: 0.9999991665651438, iteration: 101702
loss: 0.9875198006629944,grad_norm: 0.8830779787887355, iteration: 101703
loss: 1.0025224685668945,grad_norm: 0.9999991369200979, iteration: 101704
loss: 1.0338106155395508,grad_norm: 0.9999995889466121, iteration: 101705
loss: 1.0139919519424438,grad_norm: 0.9587736336251471, iteration: 101706
loss: 1.0205340385437012,grad_norm: 0.9999990358744495, iteration: 101707
loss: 1.0172425508499146,grad_norm: 0.9316383014815782, iteration: 101708
loss: 0.9908470511436462,grad_norm: 0.999999119662201, iteration: 101709
loss: 1.0270936489105225,grad_norm: 0.9999992262253389, iteration: 101710
loss: 1.0111876726150513,grad_norm: 0.9999991756360235, iteration: 101711
loss: 0.9876990914344788,grad_norm: 0.9999992580336615, iteration: 101712
loss: 1.0192352533340454,grad_norm: 0.9999991043559743, iteration: 101713
loss: 1.0017404556274414,grad_norm: 0.9999990228326054, iteration: 101714
loss: 1.0343273878097534,grad_norm: 0.9999997054277491, iteration: 101715
loss: 1.0318949222564697,grad_norm: 0.9999991430199833, iteration: 101716
loss: 1.0075634717941284,grad_norm: 0.8731175228645025, iteration: 101717
loss: 0.9952502846717834,grad_norm: 0.9999991336636636, iteration: 101718
loss: 0.9910721182823181,grad_norm: 0.9999992881893354, iteration: 101719
loss: 0.9837973713874817,grad_norm: 0.9999990324775029, iteration: 101720
loss: 0.9783046841621399,grad_norm: 0.9524679861557754, iteration: 101721
loss: 0.9958443641662598,grad_norm: 0.9980442017418119, iteration: 101722
loss: 0.9721938967704773,grad_norm: 0.9999992296197723, iteration: 101723
loss: 0.9947926998138428,grad_norm: 0.9442488365767278, iteration: 101724
loss: 1.0213065147399902,grad_norm: 0.9999992069981556, iteration: 101725
loss: 1.0202586650848389,grad_norm: 0.9243243940561997, iteration: 101726
loss: 1.0388588905334473,grad_norm: 0.9999991907788467, iteration: 101727
loss: 0.9957048892974854,grad_norm: 0.9999990946303922, iteration: 101728
loss: 1.0370237827301025,grad_norm: 0.9999990789143426, iteration: 101729
loss: 1.0322630405426025,grad_norm: 0.9999991254803683, iteration: 101730
loss: 0.9864754676818848,grad_norm: 0.999999143825868, iteration: 101731
loss: 1.0083171129226685,grad_norm: 0.9999989377095785, iteration: 101732
loss: 0.9832198023796082,grad_norm: 0.8921824748945276, iteration: 101733
loss: 1.0189158916473389,grad_norm: 0.952521361139981, iteration: 101734
loss: 1.0356258153915405,grad_norm: 0.9999991927068581, iteration: 101735
loss: 0.9742360711097717,grad_norm: 0.9999990396048738, iteration: 101736
loss: 0.9931422472000122,grad_norm: 0.9999992995415105, iteration: 101737
loss: 1.0060076713562012,grad_norm: 0.8689571735500055, iteration: 101738
loss: 1.0015156269073486,grad_norm: 0.9999989746247004, iteration: 101739
loss: 1.005283236503601,grad_norm: 0.9999992072070988, iteration: 101740
loss: 0.9967392683029175,grad_norm: 0.8482299036110309, iteration: 101741
loss: 1.0111035108566284,grad_norm: 0.9812431888596069, iteration: 101742
loss: 0.9938638806343079,grad_norm: 0.9904561541481945, iteration: 101743
loss: 1.0099010467529297,grad_norm: 0.8674784617953747, iteration: 101744
loss: 0.9826234579086304,grad_norm: 0.9999990893728112, iteration: 101745
loss: 0.948915958404541,grad_norm: 0.999999103181338, iteration: 101746
loss: 0.9961433410644531,grad_norm: 0.9999992447337143, iteration: 101747
loss: 0.9554536938667297,grad_norm: 0.8880936071829796, iteration: 101748
loss: 1.038856863975525,grad_norm: 0.9999990584133693, iteration: 101749
loss: 1.0109418630599976,grad_norm: 0.9011752332587915, iteration: 101750
loss: 1.0026671886444092,grad_norm: 0.9999990391285826, iteration: 101751
loss: 0.9940754175186157,grad_norm: 0.9999992362991136, iteration: 101752
loss: 0.9869390726089478,grad_norm: 0.9999990490554639, iteration: 101753
loss: 0.9749908447265625,grad_norm: 0.9999990743102981, iteration: 101754
loss: 1.0141477584838867,grad_norm: 0.9663843561568334, iteration: 101755
loss: 1.0324149131774902,grad_norm: 0.9999993380349228, iteration: 101756
loss: 0.9833743572235107,grad_norm: 0.9999991686967817, iteration: 101757
loss: 0.9923338294029236,grad_norm: 0.9885791392849539, iteration: 101758
loss: 0.9930180907249451,grad_norm: 0.9999991260793825, iteration: 101759
loss: 1.0210710763931274,grad_norm: 0.9999990892714518, iteration: 101760
loss: 0.9763777256011963,grad_norm: 0.9865975037612362, iteration: 101761
loss: 1.0000479221343994,grad_norm: 0.9999991047439364, iteration: 101762
loss: 1.0163350105285645,grad_norm: 0.8822895763178576, iteration: 101763
loss: 0.9718786478042603,grad_norm: 0.8953545700546612, iteration: 101764
loss: 1.0090796947479248,grad_norm: 0.99999913915019, iteration: 101765
loss: 1.021111011505127,grad_norm: 0.9120019934879531, iteration: 101766
loss: 0.9675514101982117,grad_norm: 0.9999990535539595, iteration: 101767
loss: 1.031023621559143,grad_norm: 0.9999993993542119, iteration: 101768
loss: 0.9940069317817688,grad_norm: 0.9999990385625751, iteration: 101769
loss: 1.0073624849319458,grad_norm: 0.9999991638361287, iteration: 101770
loss: 1.0321379899978638,grad_norm: 0.9154356956486395, iteration: 101771
loss: 0.953477680683136,grad_norm: 0.9999993635575262, iteration: 101772
loss: 1.0302817821502686,grad_norm: 0.9999991954861032, iteration: 101773
loss: 1.0043559074401855,grad_norm: 0.9823748686019655, iteration: 101774
loss: 0.9233654141426086,grad_norm: 0.9031222726152078, iteration: 101775
loss: 0.9777591824531555,grad_norm: 0.9813607385184772, iteration: 101776
loss: 0.9915334582328796,grad_norm: 0.9999991166317378, iteration: 101777
loss: 1.0102251768112183,grad_norm: 0.9999990578307971, iteration: 101778
loss: 0.9846210479736328,grad_norm: 0.9883671653529094, iteration: 101779
loss: 1.017522931098938,grad_norm: 0.8670529318176399, iteration: 101780
loss: 0.993428111076355,grad_norm: 0.9999991700375092, iteration: 101781
loss: 1.0240273475646973,grad_norm: 0.9999992306466559, iteration: 101782
loss: 1.0114166736602783,grad_norm: 0.9999990933101476, iteration: 101783
loss: 0.9567511677742004,grad_norm: 0.9235549042091039, iteration: 101784
loss: 1.0063458681106567,grad_norm: 0.9884780085794194, iteration: 101785
loss: 0.9787693023681641,grad_norm: 0.9825891988928844, iteration: 101786
loss: 0.9897624850273132,grad_norm: 0.9526315443004179, iteration: 101787
loss: 1.020278811454773,grad_norm: 0.9999991063509277, iteration: 101788
loss: 1.0083439350128174,grad_norm: 0.9999992891646167, iteration: 101789
loss: 1.0262110233306885,grad_norm: 0.9146758588709502, iteration: 101790
loss: 0.959882915019989,grad_norm: 0.9999990520858163, iteration: 101791
loss: 1.0068228244781494,grad_norm: 0.9983115847202676, iteration: 101792
loss: 0.9516721367835999,grad_norm: 0.9999990802074051, iteration: 101793
loss: 0.9966539144515991,grad_norm: 0.954645655355276, iteration: 101794
loss: 0.9890590310096741,grad_norm: 0.922681272043246, iteration: 101795
loss: 0.9802751541137695,grad_norm: 0.9999990825311202, iteration: 101796
loss: 0.9639929533004761,grad_norm: 0.9161329111889356, iteration: 101797
loss: 1.0102581977844238,grad_norm: 0.9999991593182836, iteration: 101798
loss: 1.018366813659668,grad_norm: 0.9599137243622098, iteration: 101799
loss: 1.0469238758087158,grad_norm: 0.999999217702843, iteration: 101800
loss: 0.9810813665390015,grad_norm: 0.9548104382636431, iteration: 101801
loss: 0.9973505139350891,grad_norm: 0.9589219719791608, iteration: 101802
loss: 0.9777549505233765,grad_norm: 0.9999992350426352, iteration: 101803
loss: 1.0132174491882324,grad_norm: 0.9999990627372767, iteration: 101804
loss: 0.9809522032737732,grad_norm: 0.9999992758329698, iteration: 101805
loss: 1.0309876203536987,grad_norm: 0.9999991578359199, iteration: 101806
loss: 1.01514732837677,grad_norm: 0.9999994009194566, iteration: 101807
loss: 1.0498051643371582,grad_norm: 0.999998956955631, iteration: 101808
loss: 1.0416984558105469,grad_norm: 0.9232842504757898, iteration: 101809
loss: 0.9857399463653564,grad_norm: 0.999999294153454, iteration: 101810
loss: 1.0026623010635376,grad_norm: 0.9999990716628941, iteration: 101811
loss: 1.0073509216308594,grad_norm: 0.999999076645567, iteration: 101812
loss: 1.0100525617599487,grad_norm: 0.9999991857318866, iteration: 101813
loss: 0.998056173324585,grad_norm: 0.8313192826337301, iteration: 101814
loss: 1.0503507852554321,grad_norm: 0.9999990918566608, iteration: 101815
loss: 0.9774966239929199,grad_norm: 0.8453215543548506, iteration: 101816
loss: 0.9847770929336548,grad_norm: 0.9618109918001134, iteration: 101817
loss: 0.9994849562644958,grad_norm: 0.9999990672452018, iteration: 101818
loss: 1.0155417919158936,grad_norm: 0.9999990167795878, iteration: 101819
loss: 0.9781871438026428,grad_norm: 0.9999990639571785, iteration: 101820
loss: 1.014211654663086,grad_norm: 0.9999990570373523, iteration: 101821
loss: 1.016084909439087,grad_norm: 0.999999603422256, iteration: 101822
loss: 0.9841222763061523,grad_norm: 0.8816067717253854, iteration: 101823
loss: 1.0418744087219238,grad_norm: 0.9999992739941652, iteration: 101824
loss: 0.9677329659461975,grad_norm: 0.9999991626645105, iteration: 101825
loss: 1.0109666585922241,grad_norm: 0.9999991376291867, iteration: 101826
loss: 1.005688190460205,grad_norm: 0.999999179642025, iteration: 101827
loss: 0.9885220527648926,grad_norm: 0.9468312729520381, iteration: 101828
loss: 1.0616713762283325,grad_norm: 0.9999993030886608, iteration: 101829
loss: 0.9881467819213867,grad_norm: 0.893485943696982, iteration: 101830
loss: 0.9898083806037903,grad_norm: 0.9443001801683337, iteration: 101831
loss: 0.954023540019989,grad_norm: 0.9999989355614302, iteration: 101832
loss: 0.9920710325241089,grad_norm: 0.9311647986014193, iteration: 101833
loss: 1.007174015045166,grad_norm: 0.9999991772392274, iteration: 101834
loss: 1.0190532207489014,grad_norm: 0.9622553649811295, iteration: 101835
loss: 0.9921594262123108,grad_norm: 0.9999992289928197, iteration: 101836
loss: 0.9836111068725586,grad_norm: 0.9999991977235191, iteration: 101837
loss: 0.9956928491592407,grad_norm: 0.9775988806743686, iteration: 101838
loss: 1.0091091394424438,grad_norm: 0.9773742090732126, iteration: 101839
loss: 1.0187506675720215,grad_norm: 0.9999990966494645, iteration: 101840
loss: 1.004380464553833,grad_norm: 0.9999991067834917, iteration: 101841
loss: 1.0125397443771362,grad_norm: 0.9799312838153698, iteration: 101842
loss: 1.0320161581039429,grad_norm: 0.9935148320611661, iteration: 101843
loss: 1.003884196281433,grad_norm: 0.8904898753056301, iteration: 101844
loss: 1.0119489431381226,grad_norm: 0.9999992187526674, iteration: 101845
loss: 1.030792236328125,grad_norm: 0.954754377018086, iteration: 101846
loss: 1.003381609916687,grad_norm: 0.9999990412064375, iteration: 101847
loss: 1.0113797187805176,grad_norm: 0.9999992400287063, iteration: 101848
loss: 0.9638674855232239,grad_norm: 0.9999992955495294, iteration: 101849
loss: 1.0364781618118286,grad_norm: 0.9496126435024645, iteration: 101850
loss: 1.0246076583862305,grad_norm: 0.9999996482964573, iteration: 101851
loss: 1.0024495124816895,grad_norm: 0.9692390299648315, iteration: 101852
loss: 1.0145349502563477,grad_norm: 0.9999996862257766, iteration: 101853
loss: 1.001128077507019,grad_norm: 0.9999992090584123, iteration: 101854
loss: 0.9956414103507996,grad_norm: 0.9480090649136855, iteration: 101855
loss: 1.0079699754714966,grad_norm: 0.9999990537292307, iteration: 101856
loss: 1.0383909940719604,grad_norm: 0.9999999300697158, iteration: 101857
loss: 0.9897153973579407,grad_norm: 0.9999991284771914, iteration: 101858
loss: 1.0178053379058838,grad_norm: 0.8234148384279814, iteration: 101859
loss: 1.00255286693573,grad_norm: 0.9871101077720736, iteration: 101860
loss: 1.0052523612976074,grad_norm: 0.9999993365342488, iteration: 101861
loss: 0.9694212079048157,grad_norm: 0.9212228472146625, iteration: 101862
loss: 0.9738225340843201,grad_norm: 0.9999990198246774, iteration: 101863
loss: 1.0364803075790405,grad_norm: 0.9372969563848518, iteration: 101864
loss: 1.0038775205612183,grad_norm: 0.9999992139317757, iteration: 101865
loss: 1.04330313205719,grad_norm: 0.9999993948517129, iteration: 101866
loss: 1.0244412422180176,grad_norm: 0.9999991180544177, iteration: 101867
loss: 1.00716233253479,grad_norm: 0.9999992218045408, iteration: 101868
loss: 0.9896636605262756,grad_norm: 0.9999990294527026, iteration: 101869
loss: 1.0014046430587769,grad_norm: 0.9999989763943037, iteration: 101870
loss: 1.0061713457107544,grad_norm: 0.9999990779279763, iteration: 101871
loss: 0.9710400700569153,grad_norm: 0.9999991641379866, iteration: 101872
loss: 1.0201853513717651,grad_norm: 0.9999992082147072, iteration: 101873
loss: 0.9752541184425354,grad_norm: 0.9999992420913898, iteration: 101874
loss: 1.0175987482070923,grad_norm: 0.9999992015501662, iteration: 101875
loss: 0.9842847585678101,grad_norm: 0.9242820905457256, iteration: 101876
loss: 1.110046625137329,grad_norm: 0.999999343725905, iteration: 101877
loss: 1.0340566635131836,grad_norm: 0.9999990883650722, iteration: 101878
loss: 1.062361240386963,grad_norm: 0.9999992477096858, iteration: 101879
loss: 1.0211931467056274,grad_norm: 0.9999995911554956, iteration: 101880
loss: 1.0341289043426514,grad_norm: 0.9999992223925057, iteration: 101881
loss: 1.000136375427246,grad_norm: 0.9999990194850792, iteration: 101882
loss: 1.0066332817077637,grad_norm: 0.9999991052800231, iteration: 101883
loss: 0.9867053627967834,grad_norm: 0.9651097290788125, iteration: 101884
loss: 0.9659828543663025,grad_norm: 0.9999991564566925, iteration: 101885
loss: 0.9761240482330322,grad_norm: 0.999999021054945, iteration: 101886
loss: 1.0421990156173706,grad_norm: 0.9999997754409594, iteration: 101887
loss: 0.985539972782135,grad_norm: 0.9088740045851866, iteration: 101888
loss: 1.0013505220413208,grad_norm: 0.9571499391514423, iteration: 101889
loss: 1.0315667390823364,grad_norm: 0.9999997595055229, iteration: 101890
loss: 1.0017167329788208,grad_norm: 0.8910979835748599, iteration: 101891
loss: 0.9689474105834961,grad_norm: 0.999999103291596, iteration: 101892
loss: 1.0037753582000732,grad_norm: 0.9999990724680858, iteration: 101893
loss: 1.0063143968582153,grad_norm: 0.9999992130309087, iteration: 101894
loss: 0.9682231545448303,grad_norm: 0.9999992090751076, iteration: 101895
loss: 1.0223830938339233,grad_norm: 0.9999992502583391, iteration: 101896
loss: 0.9390744566917419,grad_norm: 0.9999991103537754, iteration: 101897
loss: 1.0129624605178833,grad_norm: 0.9999992567831004, iteration: 101898
loss: 1.0088304281234741,grad_norm: 0.9999992393885461, iteration: 101899
loss: 1.0173614025115967,grad_norm: 0.9999991346167296, iteration: 101900
loss: 1.0166467428207397,grad_norm: 0.9999991446469634, iteration: 101901
loss: 1.0133678913116455,grad_norm: 0.9113476193539122, iteration: 101902
loss: 1.0525298118591309,grad_norm: 0.9999998728103082, iteration: 101903
loss: 0.9955244660377502,grad_norm: 0.9999991475283678, iteration: 101904
loss: 1.015235185623169,grad_norm: 0.999998963278005, iteration: 101905
loss: 0.9956430196762085,grad_norm: 0.9999989551907762, iteration: 101906
loss: 1.0607612133026123,grad_norm: 0.9999991788526836, iteration: 101907
loss: 0.9636234045028687,grad_norm: 0.9999991742681107, iteration: 101908
loss: 0.989362359046936,grad_norm: 0.9999991774062345, iteration: 101909
loss: 1.0260896682739258,grad_norm: 0.9999995538807944, iteration: 101910
loss: 1.0047037601470947,grad_norm: 0.8881231680526939, iteration: 101911
loss: 0.9770553708076477,grad_norm: 0.9199525268270047, iteration: 101912
loss: 0.9668949246406555,grad_norm: 0.9266902505923966, iteration: 101913
loss: 1.0298027992248535,grad_norm: 0.9722361868382667, iteration: 101914
loss: 1.011983871459961,grad_norm: 0.8518806903042564, iteration: 101915
loss: 1.0035613775253296,grad_norm: 0.9999991273326423, iteration: 101916
loss: 0.9946408271789551,grad_norm: 0.9999991119425198, iteration: 101917
loss: 0.9380499124526978,grad_norm: 0.9999991304456217, iteration: 101918
loss: 0.9937496781349182,grad_norm: 0.9999990500140781, iteration: 101919
loss: 1.0069605112075806,grad_norm: 0.9999991036596597, iteration: 101920
loss: 0.9867410063743591,grad_norm: 0.9718669323847274, iteration: 101921
loss: 1.0081151723861694,grad_norm: 0.9871626853815647, iteration: 101922
loss: 0.9919928908348083,grad_norm: 0.9999990749811756, iteration: 101923
loss: 0.9813734889030457,grad_norm: 0.999998996557549, iteration: 101924
loss: 1.0395931005477905,grad_norm: 0.99999914465898, iteration: 101925
loss: 0.9904794692993164,grad_norm: 0.93409064111195, iteration: 101926
loss: 1.0059324502944946,grad_norm: 0.9854734624818507, iteration: 101927
loss: 1.0320487022399902,grad_norm: 0.9999991779364946, iteration: 101928
loss: 1.0380115509033203,grad_norm: 0.9999989611431287, iteration: 101929
loss: 1.0712815523147583,grad_norm: 0.9999991277223208, iteration: 101930
loss: 0.9670810699462891,grad_norm: 0.9999990974586042, iteration: 101931
loss: 1.034488320350647,grad_norm: 0.9999991679168815, iteration: 101932
loss: 1.021627426147461,grad_norm: 0.8361998849958225, iteration: 101933
loss: 1.0081028938293457,grad_norm: 0.9999989846478845, iteration: 101934
loss: 1.007752537727356,grad_norm: 0.999999034683204, iteration: 101935
loss: 1.0081979036331177,grad_norm: 0.9999991689499397, iteration: 101936
loss: 1.007175326347351,grad_norm: 0.9999997930074959, iteration: 101937
loss: 0.991064727306366,grad_norm: 0.9814502821878579, iteration: 101938
loss: 1.0225708484649658,grad_norm: 0.9999991467137355, iteration: 101939
loss: 0.9995577931404114,grad_norm: 0.8154867411731165, iteration: 101940
loss: 0.9631186127662659,grad_norm: 0.9999991250118027, iteration: 101941
loss: 0.9786849617958069,grad_norm: 0.9879129938691475, iteration: 101942
loss: 0.9785440564155579,grad_norm: 0.901658794927665, iteration: 101943
loss: 1.0273258686065674,grad_norm: 0.9999990797682523, iteration: 101944
loss: 0.9663102030754089,grad_norm: 0.9999993265353048, iteration: 101945
loss: 1.0141429901123047,grad_norm: 0.9356968059523313, iteration: 101946
loss: 1.0449975728988647,grad_norm: 0.9999994849637714, iteration: 101947
loss: 1.0042864084243774,grad_norm: 0.9999990291355837, iteration: 101948
loss: 1.0127283334732056,grad_norm: 0.9999991656709797, iteration: 101949
loss: 1.0345555543899536,grad_norm: 0.9999991061112135, iteration: 101950
loss: 1.0331991910934448,grad_norm: 0.8760797539304495, iteration: 101951
loss: 0.9804863929748535,grad_norm: 0.8853272188197697, iteration: 101952
loss: 0.9982912540435791,grad_norm: 0.897073135580424, iteration: 101953
loss: 1.030930995941162,grad_norm: 0.9999991115390767, iteration: 101954
loss: 0.9705271124839783,grad_norm: 0.9999989465919326, iteration: 101955
loss: 1.0236769914627075,grad_norm: 0.9657033878374326, iteration: 101956
loss: 1.0101760625839233,grad_norm: 0.925296539053595, iteration: 101957
loss: 1.0002425909042358,grad_norm: 0.930187987653823, iteration: 101958
loss: 1.0238195657730103,grad_norm: 0.9999990087265076, iteration: 101959
loss: 1.0216443538665771,grad_norm: 0.8618522884923457, iteration: 101960
loss: 0.9887347221374512,grad_norm: 0.9999990797203677, iteration: 101961
loss: 1.0058976411819458,grad_norm: 0.999999087264392, iteration: 101962
loss: 0.9943354725837708,grad_norm: 0.907672596829997, iteration: 101963
loss: 0.9872263669967651,grad_norm: 0.9607600145935445, iteration: 101964
loss: 1.0177595615386963,grad_norm: 0.9388325188278641, iteration: 101965
loss: 0.9758076667785645,grad_norm: 0.9648768792689216, iteration: 101966
loss: 0.9940130114555359,grad_norm: 0.9999990502159665, iteration: 101967
loss: 1.0291781425476074,grad_norm: 0.999999192225384, iteration: 101968
loss: 0.9956908822059631,grad_norm: 0.9999991100581247, iteration: 101969
loss: 1.0008448362350464,grad_norm: 0.9890358657670308, iteration: 101970
loss: 0.9672374725341797,grad_norm: 0.9999991172940853, iteration: 101971
loss: 1.0139031410217285,grad_norm: 0.9964630982121997, iteration: 101972
loss: 1.0222737789154053,grad_norm: 0.9999991799995245, iteration: 101973
loss: 1.0172346830368042,grad_norm: 0.9999990725064114, iteration: 101974
loss: 1.0151689052581787,grad_norm: 0.9999989899035704, iteration: 101975
loss: 0.9412714242935181,grad_norm: 0.999999068181359, iteration: 101976
loss: 0.9962065815925598,grad_norm: 0.9426989441042203, iteration: 101977
loss: 1.0494918823242188,grad_norm: 0.9999992207673719, iteration: 101978
loss: 1.0585672855377197,grad_norm: 0.9999995480378066, iteration: 101979
loss: 1.0361312627792358,grad_norm: 0.9999998548971138, iteration: 101980
loss: 0.9697058796882629,grad_norm: 0.9999990634797429, iteration: 101981
loss: 0.987099826335907,grad_norm: 0.9999991732682478, iteration: 101982
loss: 1.0030792951583862,grad_norm: 0.9999991145722353, iteration: 101983
loss: 1.0100218057632446,grad_norm: 0.9999990701962345, iteration: 101984
loss: 1.0101382732391357,grad_norm: 0.9055886950653229, iteration: 101985
loss: 1.0306651592254639,grad_norm: 0.999999198825053, iteration: 101986
loss: 0.962022066116333,grad_norm: 0.9107083807726253, iteration: 101987
loss: 0.9915011525154114,grad_norm: 0.9999991665166598, iteration: 101988
loss: 1.0042366981506348,grad_norm: 0.9999992730059173, iteration: 101989
loss: 0.9857414364814758,grad_norm: 0.9910236682589216, iteration: 101990
loss: 1.000689148902893,grad_norm: 0.999999198543518, iteration: 101991
loss: 0.9831058979034424,grad_norm: 0.9999992566573079, iteration: 101992
loss: 0.9957268834114075,grad_norm: 0.9843686692593039, iteration: 101993
loss: 1.0169448852539062,grad_norm: 0.9999992205500877, iteration: 101994
loss: 0.9929466843605042,grad_norm: 0.9130072414363966, iteration: 101995
loss: 0.9935522675514221,grad_norm: 0.8561952440257775, iteration: 101996
loss: 1.0050828456878662,grad_norm: 0.9999991604344969, iteration: 101997
loss: 1.00764000415802,grad_norm: 0.9178264323684442, iteration: 101998
loss: 1.1109925508499146,grad_norm: 0.9999994029361711, iteration: 101999
loss: 0.9899564385414124,grad_norm: 0.9999992173511082, iteration: 102000
loss: 1.0200185775756836,grad_norm: 0.9999989764900081, iteration: 102001
loss: 1.0151984691619873,grad_norm: 0.9999990478632683, iteration: 102002
loss: 1.0108484029769897,grad_norm: 0.9275094344101905, iteration: 102003
loss: 0.9994867444038391,grad_norm: 0.9999995790356555, iteration: 102004
loss: 0.9957060813903809,grad_norm: 0.9999991601065851, iteration: 102005
loss: 1.0468180179595947,grad_norm: 0.999999085259738, iteration: 102006
loss: 1.0345934629440308,grad_norm: 0.9999991256338643, iteration: 102007
loss: 1.0130412578582764,grad_norm: 0.9999991050785475, iteration: 102008
loss: 1.009932041168213,grad_norm: 0.9999991408612848, iteration: 102009
loss: 1.0234514474868774,grad_norm: 0.9846106572828478, iteration: 102010
loss: 1.0086439847946167,grad_norm: 0.9999990220679255, iteration: 102011
loss: 0.9530179500579834,grad_norm: 0.9840538339181352, iteration: 102012
loss: 0.9981437921524048,grad_norm: 0.9690436139185973, iteration: 102013
loss: 0.9913844466209412,grad_norm: 0.99999914899728, iteration: 102014
loss: 1.0207089185714722,grad_norm: 0.9950326863774782, iteration: 102015
loss: 1.0229853391647339,grad_norm: 0.9999997794099788, iteration: 102016
loss: 1.0085104703903198,grad_norm: 0.9999990789843615, iteration: 102017
loss: 1.021965503692627,grad_norm: 0.9204203504147463, iteration: 102018
loss: 0.9899342060089111,grad_norm: 0.9999990378972192, iteration: 102019
loss: 1.0022404193878174,grad_norm: 0.9999990044805727, iteration: 102020
loss: 1.0111576318740845,grad_norm: 0.9363677689396284, iteration: 102021
loss: 1.0210843086242676,grad_norm: 0.9999992661281563, iteration: 102022
loss: 0.992728590965271,grad_norm: 0.9187138936062201, iteration: 102023
loss: 0.9900389313697815,grad_norm: 0.999999232157094, iteration: 102024
loss: 1.0674169063568115,grad_norm: 0.9999994381554249, iteration: 102025
loss: 0.9860833883285522,grad_norm: 0.9999991269336188, iteration: 102026
loss: 1.03069269657135,grad_norm: 0.9491350766499939, iteration: 102027
loss: 0.9967361688613892,grad_norm: 0.9999997656077039, iteration: 102028
loss: 1.006070613861084,grad_norm: 0.9999989679289565, iteration: 102029
loss: 0.9855601191520691,grad_norm: 0.9728036383282571, iteration: 102030
loss: 0.9511476159095764,grad_norm: 0.999999209171215, iteration: 102031
loss: 1.0040980577468872,grad_norm: 0.9999991777953666, iteration: 102032
loss: 1.0400927066802979,grad_norm: 0.9999990842383958, iteration: 102033
loss: 1.03374445438385,grad_norm: 0.9939076176314384, iteration: 102034
loss: 1.0035103559494019,grad_norm: 0.9999989879320026, iteration: 102035
loss: 1.0477783679962158,grad_norm: 0.9999990717268122, iteration: 102036
loss: 1.0332465171813965,grad_norm: 0.9999990689156071, iteration: 102037
loss: 1.0022228956222534,grad_norm: 0.9999994139354595, iteration: 102038
loss: 1.048742651939392,grad_norm: 0.999999670986079, iteration: 102039
loss: 1.054416298866272,grad_norm: 0.9999993539710288, iteration: 102040
loss: 0.9701525568962097,grad_norm: 0.9088432062626909, iteration: 102041
loss: 1.0136094093322754,grad_norm: 0.9999991574885361, iteration: 102042
loss: 1.0338232517242432,grad_norm: 0.8245384072959164, iteration: 102043
loss: 0.987262487411499,grad_norm: 0.9999991356084217, iteration: 102044
loss: 1.0109004974365234,grad_norm: 0.999999249214184, iteration: 102045
loss: 1.2388393878936768,grad_norm: 0.9999998246471183, iteration: 102046
loss: 1.1184813976287842,grad_norm: 0.9999999621496476, iteration: 102047
loss: 1.0162439346313477,grad_norm: 0.9581167072923595, iteration: 102048
loss: 1.0155924558639526,grad_norm: 0.886968612572653, iteration: 102049
loss: 1.245238184928894,grad_norm: 0.9999999388929368, iteration: 102050
loss: 1.096628189086914,grad_norm: 0.9999992418871159, iteration: 102051
loss: 1.017139196395874,grad_norm: 0.9999990403015112, iteration: 102052
loss: 1.020422339439392,grad_norm: 0.9999991815429229, iteration: 102053
loss: 0.9726570248603821,grad_norm: 0.9514598455643728, iteration: 102054
loss: 1.0066728591918945,grad_norm: 0.9542682095250958, iteration: 102055
loss: 1.015528678894043,grad_norm: 0.999999969568161, iteration: 102056
loss: 0.9901779890060425,grad_norm: 0.9999990915069372, iteration: 102057
loss: 0.9685109257698059,grad_norm: 0.9999992366919481, iteration: 102058
loss: 1.0088833570480347,grad_norm: 0.9999994506984119, iteration: 102059
loss: 0.9864875078201294,grad_norm: 0.9999991655321249, iteration: 102060
loss: 1.0116647481918335,grad_norm: 0.9877474776745556, iteration: 102061
loss: 0.9680353999137878,grad_norm: 0.9999990498247286, iteration: 102062
loss: 0.9957767724990845,grad_norm: 0.9999993085222719, iteration: 102063
loss: 0.988440752029419,grad_norm: 0.9999991198136237, iteration: 102064
loss: 0.9828781485557556,grad_norm: 0.9685087992810495, iteration: 102065
loss: 1.0187076330184937,grad_norm: 0.9999991969543223, iteration: 102066
loss: 1.0207842588424683,grad_norm: 0.9999996830074325, iteration: 102067
loss: 0.9841456413269043,grad_norm: 0.9999995901259895, iteration: 102068
loss: 1.0151550769805908,grad_norm: 0.9999991322924271, iteration: 102069
loss: 0.9886910915374756,grad_norm: 0.9515310246701933, iteration: 102070
loss: 0.986934244632721,grad_norm: 0.9999990481144552, iteration: 102071
loss: 1.0096304416656494,grad_norm: 0.999999237649663, iteration: 102072
loss: 0.9881647825241089,grad_norm: 0.999999280224146, iteration: 102073
loss: 0.9894940853118896,grad_norm: 0.9504365086472992, iteration: 102074
loss: 0.9937040209770203,grad_norm: 0.9999990760383997, iteration: 102075
loss: 0.9998266696929932,grad_norm: 0.9999990090093624, iteration: 102076
loss: 0.983331024646759,grad_norm: 0.9999992206969932, iteration: 102077
loss: 1.0171782970428467,grad_norm: 0.9999992462369064, iteration: 102078
loss: 1.0289878845214844,grad_norm: 0.9999991146241506, iteration: 102079
loss: 1.0025712251663208,grad_norm: 0.9999989964772269, iteration: 102080
loss: 1.0103217363357544,grad_norm: 0.8406581786586789, iteration: 102081
loss: 1.001815676689148,grad_norm: 0.999999208466684, iteration: 102082
loss: 1.0346035957336426,grad_norm: 0.9999990437414055, iteration: 102083
loss: 0.990990400314331,grad_norm: 0.9999990159726091, iteration: 102084
loss: 1.0193792581558228,grad_norm: 0.8334350508853723, iteration: 102085
loss: 0.9998905062675476,grad_norm: 0.9999991556260297, iteration: 102086
loss: 0.9902950525283813,grad_norm: 0.9999991441426576, iteration: 102087
loss: 0.9998345375061035,grad_norm: 0.9999991997874154, iteration: 102088
loss: 0.9843911528587341,grad_norm: 0.9557869205350726, iteration: 102089
loss: 1.0571930408477783,grad_norm: 0.9999993200863643, iteration: 102090
loss: 1.0064181089401245,grad_norm: 0.9496988907780973, iteration: 102091
loss: 1.024753212928772,grad_norm: 0.999999312431582, iteration: 102092
loss: 1.017750859260559,grad_norm: 0.9999991743832571, iteration: 102093
loss: 1.1969374418258667,grad_norm: 0.9999999936940053, iteration: 102094
loss: 0.9740449786186218,grad_norm: 0.9999991438873739, iteration: 102095
loss: 1.0878297090530396,grad_norm: 0.9999990740308886, iteration: 102096
loss: 1.0073407888412476,grad_norm: 0.9999992299764776, iteration: 102097
loss: 1.0118234157562256,grad_norm: 0.9999992766634526, iteration: 102098
loss: 1.0203114748001099,grad_norm: 0.9063516727302494, iteration: 102099
loss: 1.0533324480056763,grad_norm: 0.9999992684324637, iteration: 102100
loss: 1.0223134756088257,grad_norm: 0.9999990434407809, iteration: 102101
loss: 0.9974725842475891,grad_norm: 0.966746637817501, iteration: 102102
loss: 0.9736010432243347,grad_norm: 0.9999991678623017, iteration: 102103
loss: 1.0289063453674316,grad_norm: 0.8996911509852765, iteration: 102104
loss: 0.9945366978645325,grad_norm: 0.9999990918209372, iteration: 102105
loss: 1.0208274126052856,grad_norm: 0.9999990723103585, iteration: 102106
loss: 1.0146490335464478,grad_norm: 0.9913816810873176, iteration: 102107
loss: 1.004911184310913,grad_norm: 0.9999991761013354, iteration: 102108
loss: 0.9881250262260437,grad_norm: 0.9999991520056628, iteration: 102109
loss: 0.9613121747970581,grad_norm: 0.9999992104128574, iteration: 102110
loss: 1.0422896146774292,grad_norm: 0.9999991225240396, iteration: 102111
loss: 0.9941398501396179,grad_norm: 0.9727823388728999, iteration: 102112
loss: 1.0077733993530273,grad_norm: 0.9999991746883014, iteration: 102113
loss: 0.9967040419578552,grad_norm: 0.9999990920652354, iteration: 102114
loss: 1.0145459175109863,grad_norm: 0.9924252717961097, iteration: 102115
loss: 1.002079725265503,grad_norm: 0.9999992613458786, iteration: 102116
loss: 0.9987190365791321,grad_norm: 0.9081807727086718, iteration: 102117
loss: 0.9445471167564392,grad_norm: 0.9999991545743386, iteration: 102118
loss: 1.092729926109314,grad_norm: 0.9999992915169734, iteration: 102119
loss: 0.9817811846733093,grad_norm: 0.9999990900978992, iteration: 102120
loss: 1.043561339378357,grad_norm: 0.9999993164077446, iteration: 102121
loss: 0.989640474319458,grad_norm: 0.9999989387061138, iteration: 102122
loss: 0.9673963785171509,grad_norm: 0.9679127200774038, iteration: 102123
loss: 1.023188829421997,grad_norm: 0.99999916133226, iteration: 102124
loss: 0.9946388006210327,grad_norm: 0.999999169493768, iteration: 102125
loss: 1.1122190952301025,grad_norm: 0.9999994655479936, iteration: 102126
loss: 1.014073133468628,grad_norm: 0.9999990926828979, iteration: 102127
loss: 1.004052996635437,grad_norm: 0.9999991896673532, iteration: 102128
loss: 0.9991559386253357,grad_norm: 0.9886413884168703, iteration: 102129
loss: 1.0008560419082642,grad_norm: 0.9682375381609565, iteration: 102130
loss: 1.0182175636291504,grad_norm: 0.9999990302387743, iteration: 102131
loss: 1.0805933475494385,grad_norm: 0.9999992971698564, iteration: 102132
loss: 1.1304728984832764,grad_norm: 0.9999992261100288, iteration: 102133
loss: 1.0083658695220947,grad_norm: 0.9610843539036878, iteration: 102134
loss: 0.9752236008644104,grad_norm: 0.9999990623139751, iteration: 102135
loss: 0.9987440705299377,grad_norm: 0.9999993864725838, iteration: 102136
loss: 0.9948322176933289,grad_norm: 0.9999992056889744, iteration: 102137
loss: 0.9793058037757874,grad_norm: 0.9999992367010214, iteration: 102138
loss: 0.9878663420677185,grad_norm: 0.999999093983336, iteration: 102139
loss: 0.9832534790039062,grad_norm: 0.9999991294291308, iteration: 102140
loss: 0.9874917268753052,grad_norm: 0.9999990491728687, iteration: 102141
loss: 0.9936010837554932,grad_norm: 0.9999991196090126, iteration: 102142
loss: 0.9757285118103027,grad_norm: 0.9999992871047273, iteration: 102143
loss: 1.0000641345977783,grad_norm: 0.9214821380548331, iteration: 102144
loss: 0.9804348349571228,grad_norm: 0.9999990869327138, iteration: 102145
loss: 0.9825283885002136,grad_norm: 0.9348417292464277, iteration: 102146
loss: 0.995042622089386,grad_norm: 0.8741379977209845, iteration: 102147
loss: 0.9467459917068481,grad_norm: 0.8846436396233742, iteration: 102148
loss: 1.0234017372131348,grad_norm: 0.9999998938651533, iteration: 102149
loss: 1.0019450187683105,grad_norm: 0.9999990721705975, iteration: 102150
loss: 0.9817934036254883,grad_norm: 0.9999991842020213, iteration: 102151
loss: 1.037864327430725,grad_norm: 0.9999989841464871, iteration: 102152
loss: 1.0006486177444458,grad_norm: 0.9999989794619841, iteration: 102153
loss: 0.9910653233528137,grad_norm: 0.9999990753278672, iteration: 102154
loss: 0.9965338110923767,grad_norm: 0.9839329248375243, iteration: 102155
loss: 1.0098552703857422,grad_norm: 0.9939936894858328, iteration: 102156
loss: 0.971699059009552,grad_norm: 0.9953410553113214, iteration: 102157
loss: 0.9525511860847473,grad_norm: 0.9627319318197758, iteration: 102158
loss: 0.971612274646759,grad_norm: 0.9535303541834136, iteration: 102159
loss: 0.9856564998626709,grad_norm: 0.9999993543673591, iteration: 102160
loss: 1.0017906427383423,grad_norm: 0.9999991258567941, iteration: 102161
loss: 1.0066758394241333,grad_norm: 0.9999994818914077, iteration: 102162
loss: 1.0026676654815674,grad_norm: 0.9540557597399137, iteration: 102163
loss: 1.0010125637054443,grad_norm: 0.877123794856619, iteration: 102164
loss: 1.0411494970321655,grad_norm: 0.9999991692770199, iteration: 102165
loss: 1.0356460809707642,grad_norm: 0.999999317389029, iteration: 102166
loss: 0.978034257888794,grad_norm: 0.8959620746189849, iteration: 102167
loss: 0.9847850799560547,grad_norm: 0.9435214990522613, iteration: 102168
loss: 1.0179812908172607,grad_norm: 0.98662191894966, iteration: 102169
loss: 0.9902758002281189,grad_norm: 0.9999991920642651, iteration: 102170
loss: 1.043462872505188,grad_norm: 0.9999991873922581, iteration: 102171
loss: 0.9572479724884033,grad_norm: 0.9999991120276972, iteration: 102172
loss: 0.9771621227264404,grad_norm: 0.9638311076734443, iteration: 102173
loss: 1.0454336404800415,grad_norm: 0.9999991419073417, iteration: 102174
loss: 0.9910732507705688,grad_norm: 0.9506764440352587, iteration: 102175
loss: 0.9954380989074707,grad_norm: 0.9694264033713473, iteration: 102176
loss: 0.981499433517456,grad_norm: 0.9999994503306441, iteration: 102177
loss: 0.9844008088111877,grad_norm: 0.9999991739962263, iteration: 102178
loss: 0.9880657196044922,grad_norm: 0.999999069543717, iteration: 102179
loss: 0.9938262701034546,grad_norm: 0.9999988940133268, iteration: 102180
loss: 0.9809116125106812,grad_norm: 0.9999991802079014, iteration: 102181
loss: 1.155867099761963,grad_norm: 0.9999997145748051, iteration: 102182
loss: 1.0113215446472168,grad_norm: 0.9999998074599473, iteration: 102183
loss: 0.9866155982017517,grad_norm: 0.9700035743561208, iteration: 102184
loss: 0.9714279174804688,grad_norm: 0.9999991114814858, iteration: 102185
loss: 1.0448108911514282,grad_norm: 0.8867431557721062, iteration: 102186
loss: 0.9832440614700317,grad_norm: 0.9999992100974137, iteration: 102187
loss: 1.0288734436035156,grad_norm: 0.9999998580632228, iteration: 102188
loss: 0.985114336013794,grad_norm: 0.9137323930141826, iteration: 102189
loss: 1.0062756538391113,grad_norm: 0.9999992486739279, iteration: 102190
loss: 0.982728898525238,grad_norm: 0.9999995911323797, iteration: 102191
loss: 0.9831821322441101,grad_norm: 0.9999990208665155, iteration: 102192
loss: 1.0147920846939087,grad_norm: 0.9999992554869831, iteration: 102193
loss: 0.9686740040779114,grad_norm: 0.9999990045578956, iteration: 102194
loss: 0.9664549827575684,grad_norm: 0.8615994073271002, iteration: 102195
loss: 1.0173946619033813,grad_norm: 0.9999990525682232, iteration: 102196
loss: 1.0404552221298218,grad_norm: 0.9999990752248084, iteration: 102197
loss: 0.9652772545814514,grad_norm: 0.9999994564223181, iteration: 102198
loss: 1.0062792301177979,grad_norm: 0.9701055162422623, iteration: 102199
loss: 0.9921835660934448,grad_norm: 0.97321899551643, iteration: 102200
loss: 0.9923419952392578,grad_norm: 0.997636984631273, iteration: 102201
loss: 1.0115488767623901,grad_norm: 0.999999132494312, iteration: 102202
loss: 1.0467711687088013,grad_norm: 0.927117230436563, iteration: 102203
loss: 1.0119165182113647,grad_norm: 0.9999991293861262, iteration: 102204
loss: 0.9778693318367004,grad_norm: 0.9999990419731614, iteration: 102205
loss: 0.9789426922798157,grad_norm: 0.8497504254246564, iteration: 102206
loss: 0.9750457406044006,grad_norm: 0.9999989336174028, iteration: 102207
loss: 0.9771957993507385,grad_norm: 0.9687813235740892, iteration: 102208
loss: 0.9814845323562622,grad_norm: 0.9999989613265692, iteration: 102209
loss: 1.0332281589508057,grad_norm: 0.9999990827961477, iteration: 102210
loss: 0.9656280875205994,grad_norm: 0.8851428719881848, iteration: 102211
loss: 1.0084830522537231,grad_norm: 0.9591783518957089, iteration: 102212
loss: 1.0188803672790527,grad_norm: 0.9565977276266551, iteration: 102213
loss: 0.997359573841095,grad_norm: 0.9999990141207464, iteration: 102214
loss: 0.9920150637626648,grad_norm: 0.9999990090376932, iteration: 102215
loss: 1.024212121963501,grad_norm: 0.9999990843016925, iteration: 102216
loss: 1.007109522819519,grad_norm: 0.9999992728936374, iteration: 102217
loss: 0.9722767472267151,grad_norm: 0.9999991844348471, iteration: 102218
loss: 0.966177761554718,grad_norm: 0.9821256351865618, iteration: 102219
loss: 0.974575936794281,grad_norm: 0.9999989863095312, iteration: 102220
loss: 0.9925786256790161,grad_norm: 0.9532840959632849, iteration: 102221
loss: 0.9528008699417114,grad_norm: 0.9999991669833082, iteration: 102222
loss: 1.0238909721374512,grad_norm: 0.9999991723198363, iteration: 102223
loss: 1.014652967453003,grad_norm: 0.9999991521619067, iteration: 102224
loss: 1.0033295154571533,grad_norm: 0.9999992638759384, iteration: 102225
loss: 0.9883149266242981,grad_norm: 0.9999991692239912, iteration: 102226
loss: 1.0089528560638428,grad_norm: 0.9999990474691813, iteration: 102227
loss: 1.0231329202651978,grad_norm: 0.9801643505999786, iteration: 102228
loss: 0.9803816080093384,grad_norm: 0.9424711428356559, iteration: 102229
loss: 0.9773833751678467,grad_norm: 0.9999991667443423, iteration: 102230
loss: 1.0318820476531982,grad_norm: 0.9999991557932125, iteration: 102231
loss: 0.9900365471839905,grad_norm: 0.9999992101080196, iteration: 102232
loss: 1.0566895008087158,grad_norm: 0.9999991720814697, iteration: 102233
loss: 1.0080233812332153,grad_norm: 0.9999994478967497, iteration: 102234
loss: 1.0115584135055542,grad_norm: 0.9922294124677266, iteration: 102235
loss: 0.9858258366584778,grad_norm: 0.9682516665564314, iteration: 102236
loss: 1.000914216041565,grad_norm: 0.9007455903966929, iteration: 102237
loss: 0.9974995851516724,grad_norm: 0.9999990507064205, iteration: 102238
loss: 1.0088903903961182,grad_norm: 0.9234694742129906, iteration: 102239
loss: 1.0419864654541016,grad_norm: 0.9670790038128868, iteration: 102240
loss: 1.0029499530792236,grad_norm: 0.9229470918204905, iteration: 102241
loss: 1.048830270767212,grad_norm: 0.9999992933701108, iteration: 102242
loss: 1.0201234817504883,grad_norm: 0.9999997420872924, iteration: 102243
loss: 1.0235621929168701,grad_norm: 0.9364771266227209, iteration: 102244
loss: 0.9812919497489929,grad_norm: 0.9378657932989022, iteration: 102245
loss: 1.0004476308822632,grad_norm: 0.8808366963895793, iteration: 102246
loss: 0.9667724370956421,grad_norm: 0.9999991483091276, iteration: 102247
loss: 0.9873436689376831,grad_norm: 0.9999991016787162, iteration: 102248
loss: 1.0469043254852295,grad_norm: 0.9999992031479399, iteration: 102249
loss: 0.9606133699417114,grad_norm: 0.9753297097888846, iteration: 102250
loss: 0.9922798871994019,grad_norm: 0.9999991520369472, iteration: 102251
loss: 0.9927080869674683,grad_norm: 0.9778706218918246, iteration: 102252
loss: 0.9852163195610046,grad_norm: 0.9999993235267078, iteration: 102253
loss: 1.0177733898162842,grad_norm: 0.9953321502215412, iteration: 102254
loss: 0.9797040820121765,grad_norm: 0.9999991596493379, iteration: 102255
loss: 1.007731318473816,grad_norm: 0.9999990632483793, iteration: 102256
loss: 1.0132111310958862,grad_norm: 0.9737789550314463, iteration: 102257
loss: 1.0185308456420898,grad_norm: 0.9999990557642696, iteration: 102258
loss: 0.9981737732887268,grad_norm: 0.9665317520652374, iteration: 102259
loss: 0.9874470233917236,grad_norm: 0.999999181702386, iteration: 102260
loss: 0.9598195552825928,grad_norm: 0.9999991194962206, iteration: 102261
loss: 1.0193601846694946,grad_norm: 0.999999277656876, iteration: 102262
loss: 0.983770489692688,grad_norm: 0.9999992036518426, iteration: 102263
loss: 1.0142130851745605,grad_norm: 0.9999991361783546, iteration: 102264
loss: 1.0110644102096558,grad_norm: 0.9999990893490738, iteration: 102265
loss: 0.982018232345581,grad_norm: 0.9961422160712295, iteration: 102266
loss: 0.994881272315979,grad_norm: 0.9999992325277335, iteration: 102267
loss: 0.9952718019485474,grad_norm: 0.9999990843856306, iteration: 102268
loss: 1.0101330280303955,grad_norm: 0.9999996657895917, iteration: 102269
loss: 0.9813017249107361,grad_norm: 0.9487170244376079, iteration: 102270
loss: 0.9789602756500244,grad_norm: 0.9975203492557023, iteration: 102271
loss: 1.00545072555542,grad_norm: 0.9930897506281197, iteration: 102272
loss: 1.0108147859573364,grad_norm: 0.8816559622075105, iteration: 102273
loss: 1.0121352672576904,grad_norm: 0.9686935939319765, iteration: 102274
loss: 0.9909254908561707,grad_norm: 0.9999991452177055, iteration: 102275
loss: 1.0270745754241943,grad_norm: 0.9999990964167403, iteration: 102276
loss: 0.9801285862922668,grad_norm: 0.9999991983781933, iteration: 102277
loss: 0.9834678769111633,grad_norm: 0.9619654423753082, iteration: 102278
loss: 1.01316237449646,grad_norm: 0.9999997592276277, iteration: 102279
loss: 0.9702164530754089,grad_norm: 0.9999989353639429, iteration: 102280
loss: 1.009407639503479,grad_norm: 0.99362664880921, iteration: 102281
loss: 1.0144984722137451,grad_norm: 0.9147553697517503, iteration: 102282
loss: 0.9943467974662781,grad_norm: 0.9385022858976787, iteration: 102283
loss: 0.9927761554718018,grad_norm: 0.8954153324709826, iteration: 102284
loss: 0.9617412686347961,grad_norm: 0.9999992537796083, iteration: 102285
loss: 0.9922084808349609,grad_norm: 0.9914209089498266, iteration: 102286
loss: 0.9828866124153137,grad_norm: 0.922221682634473, iteration: 102287
loss: 0.9992395639419556,grad_norm: 0.9551713332554735, iteration: 102288
loss: 1.0002882480621338,grad_norm: 0.9999992074126455, iteration: 102289
loss: 1.0130168199539185,grad_norm: 0.9999992485108943, iteration: 102290
loss: 0.9883772134780884,grad_norm: 0.9999992551481525, iteration: 102291
loss: 0.9840223789215088,grad_norm: 0.9999991746760849, iteration: 102292
loss: 0.9958049654960632,grad_norm: 0.9999989964417254, iteration: 102293
loss: 1.0116515159606934,grad_norm: 0.999999197807059, iteration: 102294
loss: 1.0135470628738403,grad_norm: 0.9094902647794965, iteration: 102295
loss: 1.0323219299316406,grad_norm: 0.9999992299820213, iteration: 102296
loss: 1.034816026687622,grad_norm: 0.9999991135104757, iteration: 102297
loss: 0.9912402033805847,grad_norm: 0.9999990697188449, iteration: 102298
loss: 0.9941004514694214,grad_norm: 0.9999992292893477, iteration: 102299
loss: 0.9802708029747009,grad_norm: 0.9667945411710266, iteration: 102300
loss: 1.0462934970855713,grad_norm: 0.9999991309283878, iteration: 102301
loss: 1.0063079595565796,grad_norm: 0.9466921781934915, iteration: 102302
loss: 1.0047515630722046,grad_norm: 0.9999989255796569, iteration: 102303
loss: 1.0205833911895752,grad_norm: 0.9999991490470226, iteration: 102304
loss: 1.007664442062378,grad_norm: 0.9920216153498977, iteration: 102305
loss: 0.9664125442504883,grad_norm: 0.9999992040459301, iteration: 102306
loss: 0.9625366926193237,grad_norm: 0.9777112629505333, iteration: 102307
loss: 1.0005700588226318,grad_norm: 0.999999646969233, iteration: 102308
loss: 0.95823073387146,grad_norm: 0.9999991742303298, iteration: 102309
loss: 0.9845938086509705,grad_norm: 0.9532760327646451, iteration: 102310
loss: 1.0205390453338623,grad_norm: 0.9999991672623868, iteration: 102311
loss: 1.0088615417480469,grad_norm: 0.9671825294026171, iteration: 102312
loss: 0.9907367825508118,grad_norm: 0.9999989603348725, iteration: 102313
loss: 0.9962252974510193,grad_norm: 0.9999992195488953, iteration: 102314
loss: 1.0123199224472046,grad_norm: 0.9999991247831375, iteration: 102315
loss: 0.9793894290924072,grad_norm: 0.9092520905316516, iteration: 102316
loss: 1.0107307434082031,grad_norm: 0.9999991921696937, iteration: 102317
loss: 1.0045175552368164,grad_norm: 0.9999992899265034, iteration: 102318
loss: 0.999992847442627,grad_norm: 0.9999989353507521, iteration: 102319
loss: 1.0087372064590454,grad_norm: 0.9999989927421475, iteration: 102320
loss: 1.0218936204910278,grad_norm: 0.9999990643626442, iteration: 102321
loss: 0.9822571277618408,grad_norm: 0.9999989704015653, iteration: 102322
loss: 1.029238224029541,grad_norm: 0.9477749194733414, iteration: 102323
loss: 1.0263171195983887,grad_norm: 0.999999312742051, iteration: 102324
loss: 1.0318611860275269,grad_norm: 0.9841567228964982, iteration: 102325
loss: 1.0024319887161255,grad_norm: 0.9999990893032661, iteration: 102326
loss: 0.9742422103881836,grad_norm: 0.9992272222377987, iteration: 102327
loss: 1.015834927558899,grad_norm: 0.9096871194475199, iteration: 102328
loss: 0.9462343454360962,grad_norm: 0.9999992638902591, iteration: 102329
loss: 0.9840458035469055,grad_norm: 0.9411969155783185, iteration: 102330
loss: 1.0090672969818115,grad_norm: 0.9312283004143317, iteration: 102331
loss: 1.0061891078948975,grad_norm: 0.9885003325069882, iteration: 102332
loss: 1.002464771270752,grad_norm: 0.9999992270992287, iteration: 102333
loss: 1.0095125436782837,grad_norm: 0.9999992309756729, iteration: 102334
loss: 1.0157575607299805,grad_norm: 0.9554432290309333, iteration: 102335
loss: 1.0355361700057983,grad_norm: 0.9999991953901765, iteration: 102336
loss: 1.0094777345657349,grad_norm: 0.999998945014078, iteration: 102337
loss: 0.9954159259796143,grad_norm: 0.8986703545712071, iteration: 102338
loss: 1.0229747295379639,grad_norm: 0.9999991308725465, iteration: 102339
loss: 0.9770193099975586,grad_norm: 0.9999991460843097, iteration: 102340
loss: 1.0205588340759277,grad_norm: 0.9999991460462896, iteration: 102341
loss: 0.9861947894096375,grad_norm: 0.8313389925120604, iteration: 102342
loss: 1.056719183921814,grad_norm: 0.9999990144423561, iteration: 102343
loss: 1.0137656927108765,grad_norm: 0.9159948229305106, iteration: 102344
loss: 0.9703569412231445,grad_norm: 0.9183074060209718, iteration: 102345
loss: 1.0162888765335083,grad_norm: 0.9999992141480604, iteration: 102346
loss: 1.0230700969696045,grad_norm: 0.999999045227664, iteration: 102347
loss: 0.9859583973884583,grad_norm: 0.9617288289136791, iteration: 102348
loss: 0.9726231694221497,grad_norm: 0.9999991040753105, iteration: 102349
loss: 0.9870497584342957,grad_norm: 0.9523415739360693, iteration: 102350
loss: 1.0151267051696777,grad_norm: 0.9999989994648889, iteration: 102351
loss: 0.9953915476799011,grad_norm: 0.9113019077588481, iteration: 102352
loss: 0.9743550419807434,grad_norm: 0.8591168282658758, iteration: 102353
loss: 0.9822027087211609,grad_norm: 0.9692553546098177, iteration: 102354
loss: 0.9952749013900757,grad_norm: 0.8960028421481188, iteration: 102355
loss: 0.9718480706214905,grad_norm: 0.999998956405919, iteration: 102356
loss: 0.9443076252937317,grad_norm: 0.9999991679002915, iteration: 102357
loss: 0.9745519757270813,grad_norm: 0.8796700135543388, iteration: 102358
loss: 1.0545889139175415,grad_norm: 0.9999992929838402, iteration: 102359
loss: 1.019714117050171,grad_norm: 0.9999991616117103, iteration: 102360
loss: 0.9937071204185486,grad_norm: 0.9999992933710605, iteration: 102361
loss: 0.9808594584465027,grad_norm: 0.9946181918758422, iteration: 102362
loss: 1.0233978033065796,grad_norm: 0.9999990500833014, iteration: 102363
loss: 0.9945252537727356,grad_norm: 0.9999992369543254, iteration: 102364
loss: 1.0126268863677979,grad_norm: 0.9718734810553007, iteration: 102365
loss: 0.9367596507072449,grad_norm: 0.9999991127769832, iteration: 102366
loss: 0.986029863357544,grad_norm: 0.9999991444782005, iteration: 102367
loss: 0.9787160754203796,grad_norm: 0.9225543127934202, iteration: 102368
loss: 1.0388168096542358,grad_norm: 0.999999511695688, iteration: 102369
loss: 1.0024369955062866,grad_norm: 0.9408424541777259, iteration: 102370
loss: 1.036189079284668,grad_norm: 0.9999996785559312, iteration: 102371
loss: 1.0125281810760498,grad_norm: 0.9999989820844235, iteration: 102372
loss: 0.9536364078521729,grad_norm: 0.8768336261961412, iteration: 102373
loss: 0.9828260540962219,grad_norm: 0.9999990463691325, iteration: 102374
loss: 0.997367262840271,grad_norm: 0.9548040219911181, iteration: 102375
loss: 0.9791734218597412,grad_norm: 0.9007112483533637, iteration: 102376
loss: 1.0048593282699585,grad_norm: 0.9999991229408132, iteration: 102377
loss: 1.0045268535614014,grad_norm: 0.9999994363118969, iteration: 102378
loss: 1.0376043319702148,grad_norm: 0.9999991026546028, iteration: 102379
loss: 0.9882744550704956,grad_norm: 0.9999991366547525, iteration: 102380
loss: 0.9906812906265259,grad_norm: 0.986325793805646, iteration: 102381
loss: 1.0116645097732544,grad_norm: 0.9301612015561743, iteration: 102382
loss: 0.9795721173286438,grad_norm: 0.9999990049133557, iteration: 102383
loss: 0.9630351066589355,grad_norm: 0.9897871795589197, iteration: 102384
loss: 1.0110549926757812,grad_norm: 0.9985231763581148, iteration: 102385
loss: 0.9775712490081787,grad_norm: 0.8099892743221102, iteration: 102386
loss: 1.0272401571273804,grad_norm: 0.9838501046990253, iteration: 102387
loss: 1.0081266164779663,grad_norm: 0.9999991497786777, iteration: 102388
loss: 1.0155059099197388,grad_norm: 0.9999993193392712, iteration: 102389
loss: 1.0033855438232422,grad_norm: 0.9531015353843585, iteration: 102390
loss: 1.0207215547561646,grad_norm: 0.9999991502801747, iteration: 102391
loss: 1.0281531810760498,grad_norm: 0.9999994443801925, iteration: 102392
loss: 0.9995011687278748,grad_norm: 0.9999991770057471, iteration: 102393
loss: 1.0295753479003906,grad_norm: 0.999999310436162, iteration: 102394
loss: 0.980229914188385,grad_norm: 0.9999990556495734, iteration: 102395
loss: 0.9887266755104065,grad_norm: 0.9999992035433662, iteration: 102396
loss: 0.962997555732727,grad_norm: 0.9483363775893869, iteration: 102397
loss: 0.9897313714027405,grad_norm: 0.9999990046667792, iteration: 102398
loss: 1.0366371870040894,grad_norm: 0.9999992439223298, iteration: 102399
loss: 1.0241519212722778,grad_norm: 0.9992554145289871, iteration: 102400
loss: 0.99079430103302,grad_norm: 0.9999993631373567, iteration: 102401
loss: 1.0327308177947998,grad_norm: 0.9999992885645974, iteration: 102402
loss: 1.0060569047927856,grad_norm: 0.9528149737884114, iteration: 102403
loss: 1.003511905670166,grad_norm: 0.9334562079102455, iteration: 102404
loss: 1.0188490152359009,grad_norm: 0.9999991491607695, iteration: 102405
loss: 1.0343937873840332,grad_norm: 0.8034483458369764, iteration: 102406
loss: 0.9945772290229797,grad_norm: 0.9999993661419241, iteration: 102407
loss: 0.9861170649528503,grad_norm: 0.9999990669830264, iteration: 102408
loss: 1.0136103630065918,grad_norm: 0.9999993387125842, iteration: 102409
loss: 0.9994049072265625,grad_norm: 0.9532524110959355, iteration: 102410
loss: 1.0049623250961304,grad_norm: 0.9999992228450463, iteration: 102411
loss: 1.0100537538528442,grad_norm: 0.9999993803076781, iteration: 102412
loss: 1.0127818584442139,grad_norm: 0.9999990512957827, iteration: 102413
loss: 1.031590461730957,grad_norm: 0.9999990606089132, iteration: 102414
loss: 0.9579149484634399,grad_norm: 0.9999992979547128, iteration: 102415
loss: 1.0481311082839966,grad_norm: 0.9999996646926785, iteration: 102416
loss: 0.9751710891723633,grad_norm: 0.9864754888814065, iteration: 102417
loss: 0.9808900356292725,grad_norm: 0.9999991780658767, iteration: 102418
loss: 0.989008903503418,grad_norm: 0.9999993821823994, iteration: 102419
loss: 1.0206068754196167,grad_norm: 0.979344546958158, iteration: 102420
loss: 0.9764983654022217,grad_norm: 0.9999990427995584, iteration: 102421
loss: 1.014804720878601,grad_norm: 0.9999991668859469, iteration: 102422
loss: 1.027679681777954,grad_norm: 0.9999989916849739, iteration: 102423
loss: 0.9808337688446045,grad_norm: 0.9999990235117128, iteration: 102424
loss: 1.0370539426803589,grad_norm: 0.9638294625394653, iteration: 102425
loss: 0.9959522485733032,grad_norm: 0.9844579655448444, iteration: 102426
loss: 1.0062984228134155,grad_norm: 0.9999991467779615, iteration: 102427
loss: 1.0075427293777466,grad_norm: 0.9999990799638953, iteration: 102428
loss: 0.9736853241920471,grad_norm: 0.9333941943198628, iteration: 102429
loss: 1.0125170946121216,grad_norm: 0.9999993983330302, iteration: 102430
loss: 1.0287578105926514,grad_norm: 0.8761729199311851, iteration: 102431
loss: 1.034974455833435,grad_norm: 0.9999995989899486, iteration: 102432
loss: 0.9846185445785522,grad_norm: 0.9153244337547052, iteration: 102433
loss: 1.0079386234283447,grad_norm: 0.9506866638301262, iteration: 102434
loss: 1.0701649188995361,grad_norm: 0.999999459618202, iteration: 102435
loss: 0.9587646722793579,grad_norm: 0.9562560418756364, iteration: 102436
loss: 1.0114293098449707,grad_norm: 0.9999993140227713, iteration: 102437
loss: 0.9807549715042114,grad_norm: 0.9999991662540304, iteration: 102438
loss: 1.0308994054794312,grad_norm: 0.9999992657004719, iteration: 102439
loss: 0.9693077206611633,grad_norm: 0.9999990714420565, iteration: 102440
loss: 0.9787553548812866,grad_norm: 0.999999151035318, iteration: 102441
loss: 1.0069572925567627,grad_norm: 0.9999997976010738, iteration: 102442
loss: 1.0152455568313599,grad_norm: 0.9999990830293491, iteration: 102443
loss: 1.008485198020935,grad_norm: 0.9999992014036448, iteration: 102444
loss: 0.999739408493042,grad_norm: 0.9999991660200905, iteration: 102445
loss: 0.9953305721282959,grad_norm: 0.92610861159646, iteration: 102446
loss: 0.9784497618675232,grad_norm: 0.8325765735103358, iteration: 102447
loss: 1.0050699710845947,grad_norm: 0.9999996610557773, iteration: 102448
loss: 0.991357147693634,grad_norm: 0.9999991459545323, iteration: 102449
loss: 0.9714794158935547,grad_norm: 0.9547225027127889, iteration: 102450
loss: 1.0084694623947144,grad_norm: 0.8979569179413427, iteration: 102451
loss: 0.9756937623023987,grad_norm: 0.9999992742864954, iteration: 102452
loss: 0.9966281056404114,grad_norm: 0.9414116632947439, iteration: 102453
loss: 1.017903208732605,grad_norm: 0.8060882429495649, iteration: 102454
loss: 0.9661423563957214,grad_norm: 0.9433537406896835, iteration: 102455
loss: 0.9912519454956055,grad_norm: 0.9999991629274985, iteration: 102456
loss: 0.9777171015739441,grad_norm: 0.8832472590703395, iteration: 102457
loss: 0.9997827410697937,grad_norm: 0.9999992142050131, iteration: 102458
loss: 0.9780849814414978,grad_norm: 0.9821596681773747, iteration: 102459
loss: 1.0057542324066162,grad_norm: 0.9999990953228279, iteration: 102460
loss: 1.0180792808532715,grad_norm: 0.9999991557618577, iteration: 102461
loss: 0.9962418079376221,grad_norm: 0.9999989857812372, iteration: 102462
loss: 1.0179848670959473,grad_norm: 0.9999992597837299, iteration: 102463
loss: 1.0097527503967285,grad_norm: 0.9999991446844183, iteration: 102464
loss: 0.9835307598114014,grad_norm: 0.9999992610750302, iteration: 102465
loss: 1.0322012901306152,grad_norm: 0.9999994207511931, iteration: 102466
loss: 1.031502604484558,grad_norm: 0.9999991058132247, iteration: 102467
loss: 1.0065683126449585,grad_norm: 0.9999991931437545, iteration: 102468
loss: 0.9666905999183655,grad_norm: 0.7901867815621064, iteration: 102469
loss: 0.9947527050971985,grad_norm: 0.9999990487776562, iteration: 102470
loss: 1.0336986780166626,grad_norm: 0.9770718840389655, iteration: 102471
loss: 0.9982818365097046,grad_norm: 0.8951525253267224, iteration: 102472
loss: 1.052272081375122,grad_norm: 0.9567608940041697, iteration: 102473
loss: 1.1081948280334473,grad_norm: 0.9999996749088031, iteration: 102474
loss: 0.9722051620483398,grad_norm: 0.8704110456679609, iteration: 102475
loss: 1.0008488893508911,grad_norm: 0.9999990483516589, iteration: 102476
loss: 1.0023962259292603,grad_norm: 0.999999108229407, iteration: 102477
loss: 1.0275899171829224,grad_norm: 0.9999991735850994, iteration: 102478
loss: 0.941919207572937,grad_norm: 0.9999990770352029, iteration: 102479
loss: 0.9827132821083069,grad_norm: 0.9706115507584726, iteration: 102480
loss: 1.024329423904419,grad_norm: 0.9999994139371651, iteration: 102481
loss: 1.0359046459197998,grad_norm: 0.9999992560884938, iteration: 102482
loss: 1.005635142326355,grad_norm: 0.9999992177393168, iteration: 102483
loss: 0.9810082316398621,grad_norm: 0.8861413850912507, iteration: 102484
loss: 0.9909330010414124,grad_norm: 0.9158686077584411, iteration: 102485
loss: 1.0148712396621704,grad_norm: 0.9999991837126254, iteration: 102486
loss: 0.9948018193244934,grad_norm: 0.9582315208513232, iteration: 102487
loss: 1.0004502534866333,grad_norm: 0.9712757063057008, iteration: 102488
loss: 1.0511634349822998,grad_norm: 0.9999997783314828, iteration: 102489
loss: 1.011112093925476,grad_norm: 0.9080255171829832, iteration: 102490
loss: 0.989791214466095,grad_norm: 0.9999992178969128, iteration: 102491
loss: 1.0224725008010864,grad_norm: 0.954772595974038, iteration: 102492
loss: 1.022795557975769,grad_norm: 0.9999992132786738, iteration: 102493
loss: 0.9539090991020203,grad_norm: 0.979481315359413, iteration: 102494
loss: 1.047078013420105,grad_norm: 0.9999991470000827, iteration: 102495
loss: 0.9971670508384705,grad_norm: 0.9999991666246566, iteration: 102496
loss: 1.004539132118225,grad_norm: 0.999999187679035, iteration: 102497
loss: 0.9530803561210632,grad_norm: 0.8357180039432123, iteration: 102498
loss: 0.9419930577278137,grad_norm: 0.9531362439706748, iteration: 102499
loss: 1.0232816934585571,grad_norm: 0.9999992379008443, iteration: 102500
loss: 0.9684798717498779,grad_norm: 0.9999992257341493, iteration: 102501
loss: 1.0472790002822876,grad_norm: 0.998382955672114, iteration: 102502
loss: 1.037813663482666,grad_norm: 0.9999991714499067, iteration: 102503
loss: 1.0010961294174194,grad_norm: 0.9999990859933279, iteration: 102504
loss: 0.9833129048347473,grad_norm: 0.999998923563447, iteration: 102505
loss: 1.0553090572357178,grad_norm: 0.9999989328865904, iteration: 102506
loss: 0.9866214990615845,grad_norm: 0.927635787559863, iteration: 102507
loss: 0.9738185405731201,grad_norm: 0.9999990066800316, iteration: 102508
loss: 1.0126912593841553,grad_norm: 0.999999073620848, iteration: 102509
loss: 0.9925195574760437,grad_norm: 0.9135501621084376, iteration: 102510
loss: 1.0054329633712769,grad_norm: 0.9999992057725687, iteration: 102511
loss: 1.0194364786148071,grad_norm: 0.9999989623727662, iteration: 102512
loss: 0.9714292883872986,grad_norm: 0.9999991042070837, iteration: 102513
loss: 1.0280917882919312,grad_norm: 0.9999989646087454, iteration: 102514
loss: 0.9878801703453064,grad_norm: 0.9999991668702584, iteration: 102515
loss: 0.9910110235214233,grad_norm: 0.9999992962123158, iteration: 102516
loss: 0.9842306971549988,grad_norm: 0.999999098378864, iteration: 102517
loss: 1.034613013267517,grad_norm: 0.9999991806031732, iteration: 102518
loss: 0.9431774020195007,grad_norm: 0.9999992004539429, iteration: 102519
loss: 1.0012149810791016,grad_norm: 0.9999992200763729, iteration: 102520
loss: 0.9936165809631348,grad_norm: 0.9999991663205146, iteration: 102521
loss: 0.9791032671928406,grad_norm: 0.9999992416370306, iteration: 102522
loss: 1.0444475412368774,grad_norm: 0.9999993570693908, iteration: 102523
loss: 1.0341864824295044,grad_norm: 0.9920323669009662, iteration: 102524
loss: 1.0397785902023315,grad_norm: 0.9999992291264592, iteration: 102525
loss: 1.0255316495895386,grad_norm: 0.9999990196213758, iteration: 102526
loss: 0.9878314137458801,grad_norm: 0.9999991568442954, iteration: 102527
loss: 1.0208344459533691,grad_norm: 0.9999992761521069, iteration: 102528
loss: 1.0024067163467407,grad_norm: 0.9999992411964316, iteration: 102529
loss: 1.020506739616394,grad_norm: 0.9851439145967762, iteration: 102530
loss: 1.0352020263671875,grad_norm: 0.9999993951453142, iteration: 102531
loss: 1.0172871351242065,grad_norm: 0.9999991628718036, iteration: 102532
loss: 1.0029617547988892,grad_norm: 0.9496309532952985, iteration: 102533
loss: 0.9941131472587585,grad_norm: 0.9345206795803003, iteration: 102534
loss: 1.0154622793197632,grad_norm: 0.9795967556272879, iteration: 102535
loss: 0.9643980264663696,grad_norm: 0.9999990196283687, iteration: 102536
loss: 1.0090378522872925,grad_norm: 0.9325762596522962, iteration: 102537
loss: 1.0231882333755493,grad_norm: 0.9999993923749613, iteration: 102538
loss: 1.004140853881836,grad_norm: 0.8020419204114619, iteration: 102539
loss: 0.9881977438926697,grad_norm: 0.9999991367601301, iteration: 102540
loss: 1.0275286436080933,grad_norm: 0.894044754033113, iteration: 102541
loss: 1.0526096820831299,grad_norm: 0.9654314575356958, iteration: 102542
loss: 0.9913186430931091,grad_norm: 0.960542514707317, iteration: 102543
loss: 0.9944305419921875,grad_norm: 0.9999990871581841, iteration: 102544
loss: 0.9999855160713196,grad_norm: 0.9999991344491014, iteration: 102545
loss: 0.9880669713020325,grad_norm: 0.953938996567532, iteration: 102546
loss: 0.9729148745536804,grad_norm: 0.9999990593700844, iteration: 102547
loss: 1.0168287754058838,grad_norm: 0.9285257012390287, iteration: 102548
loss: 1.0228928327560425,grad_norm: 0.9999989706951277, iteration: 102549
loss: 1.1475790739059448,grad_norm: 0.9999998688863879, iteration: 102550
loss: 0.9414297938346863,grad_norm: 0.905274656788288, iteration: 102551
loss: 1.0028090476989746,grad_norm: 0.9999990334990315, iteration: 102552
loss: 1.0232865810394287,grad_norm: 0.9999991111475223, iteration: 102553
loss: 0.9918338656425476,grad_norm: 0.9999991517313473, iteration: 102554
loss: 1.0160146951675415,grad_norm: 0.9999992989028395, iteration: 102555
loss: 1.014591097831726,grad_norm: 0.9999993353771948, iteration: 102556
loss: 0.9972289800643921,grad_norm: 0.8597116877495623, iteration: 102557
loss: 1.0041142702102661,grad_norm: 0.9999992448141622, iteration: 102558
loss: 0.9926884174346924,grad_norm: 0.9009614897303254, iteration: 102559
loss: 1.0123984813690186,grad_norm: 0.999999027597843, iteration: 102560
loss: 0.9672762751579285,grad_norm: 0.999999067033154, iteration: 102561
loss: 0.9978964328765869,grad_norm: 0.9999990702556054, iteration: 102562
loss: 1.0056036710739136,grad_norm: 0.9999991710769297, iteration: 102563
loss: 0.9950231313705444,grad_norm: 0.9999993552500659, iteration: 102564
loss: 0.9900872707366943,grad_norm: 0.999999328112323, iteration: 102565
loss: 0.982230007648468,grad_norm: 0.9999991634282015, iteration: 102566
loss: 1.023752212524414,grad_norm: 0.999999751676581, iteration: 102567
loss: 0.9732843637466431,grad_norm: 0.9999991701117402, iteration: 102568
loss: 1.0307785272598267,grad_norm: 0.9999992534093949, iteration: 102569
loss: 0.9720306396484375,grad_norm: 0.9962829438351819, iteration: 102570
loss: 1.0314184427261353,grad_norm: 0.9999991610008541, iteration: 102571
loss: 1.0319370031356812,grad_norm: 0.9999991910560146, iteration: 102572
loss: 1.0601378679275513,grad_norm: 0.999999301279279, iteration: 102573
loss: 1.0100988149642944,grad_norm: 0.9999991911777416, iteration: 102574
loss: 0.9980823993682861,grad_norm: 0.9999991876293874, iteration: 102575
loss: 1.0170109272003174,grad_norm: 0.8208553793782246, iteration: 102576
loss: 0.9510490894317627,grad_norm: 0.9999991984928062, iteration: 102577
loss: 0.9634628295898438,grad_norm: 0.9999991327008747, iteration: 102578
loss: 0.9639372229576111,grad_norm: 0.9999990252860997, iteration: 102579
loss: 1.039614200592041,grad_norm: 0.9999990370346484, iteration: 102580
loss: 0.9960955381393433,grad_norm: 0.9999990854318276, iteration: 102581
loss: 0.9733537435531616,grad_norm: 0.9880595672708636, iteration: 102582
loss: 0.9748220443725586,grad_norm: 0.9213579755134168, iteration: 102583
loss: 1.0432673692703247,grad_norm: 0.985470043445466, iteration: 102584
loss: 0.996810257434845,grad_norm: 0.9999992403432001, iteration: 102585
loss: 1.0176588296890259,grad_norm: 0.9999991385940019, iteration: 102586
loss: 0.9900217652320862,grad_norm: 0.9999992369629761, iteration: 102587
loss: 0.9946427345275879,grad_norm: 0.9999992399971533, iteration: 102588
loss: 1.0083329677581787,grad_norm: 0.9999990531006442, iteration: 102589
loss: 0.9896491169929504,grad_norm: 0.9999992348773689, iteration: 102590
loss: 0.9788493514060974,grad_norm: 0.999999224757603, iteration: 102591
loss: 0.9966455101966858,grad_norm: 0.999999100653734, iteration: 102592
loss: 0.9896453619003296,grad_norm: 0.9999992582294334, iteration: 102593
loss: 0.9901126027107239,grad_norm: 0.9999990928810886, iteration: 102594
loss: 1.0211117267608643,grad_norm: 0.9999990606886984, iteration: 102595
loss: 0.9711787700653076,grad_norm: 0.999999108873917, iteration: 102596
loss: 1.01357901096344,grad_norm: 0.9999994653623084, iteration: 102597
loss: 1.0169364213943481,grad_norm: 0.9999992665190179, iteration: 102598
loss: 1.0158474445343018,grad_norm: 0.9999991959116759, iteration: 102599
loss: 0.9989882707595825,grad_norm: 0.9999991902331694, iteration: 102600
loss: 1.0300201177597046,grad_norm: 0.9999990970492756, iteration: 102601
loss: 1.0174143314361572,grad_norm: 0.9999993155175035, iteration: 102602
loss: 1.0136973857879639,grad_norm: 0.9999989843865763, iteration: 102603
loss: 1.0238710641860962,grad_norm: 0.9999989869947625, iteration: 102604
loss: 0.9958721995353699,grad_norm: 0.99999901341128, iteration: 102605
loss: 0.9980252981185913,grad_norm: 0.9999992139806332, iteration: 102606
loss: 1.0305695533752441,grad_norm: 0.9999992061296062, iteration: 102607
loss: 0.9965137839317322,grad_norm: 0.9999990907234355, iteration: 102608
loss: 1.218420386314392,grad_norm: 0.999999586862679, iteration: 102609
loss: 0.9996182322502136,grad_norm: 0.9636142657675235, iteration: 102610
loss: 1.001196265220642,grad_norm: 0.9759160074061889, iteration: 102611
loss: 1.0072962045669556,grad_norm: 0.9659910654750602, iteration: 102612
loss: 0.9900592565536499,grad_norm: 0.9999990421083171, iteration: 102613
loss: 0.9594780206680298,grad_norm: 0.999999097638363, iteration: 102614
loss: 1.0190569162368774,grad_norm: 0.9999993068606569, iteration: 102615
loss: 0.9920125603675842,grad_norm: 0.999999155736343, iteration: 102616
loss: 0.9929682016372681,grad_norm: 0.9962633378362886, iteration: 102617
loss: 1.0240943431854248,grad_norm: 0.999999111464082, iteration: 102618
loss: 1.0678777694702148,grad_norm: 0.9999990685585427, iteration: 102619
loss: 1.0240639448165894,grad_norm: 0.9999992751725855, iteration: 102620
loss: 0.976204514503479,grad_norm: 0.999999196384322, iteration: 102621
loss: 1.0099010467529297,grad_norm: 0.9999992293729695, iteration: 102622
loss: 1.0402013063430786,grad_norm: 0.999999118826412, iteration: 102623
loss: 1.0250694751739502,grad_norm: 0.9999991459846923, iteration: 102624
loss: 0.9604447484016418,grad_norm: 0.9999990242573342, iteration: 102625
loss: 1.0091572999954224,grad_norm: 0.9999990971619998, iteration: 102626
loss: 1.0160025358200073,grad_norm: 0.9852010076291777, iteration: 102627
loss: 0.9985566735267639,grad_norm: 0.9840441280048637, iteration: 102628
loss: 0.9685623049736023,grad_norm: 0.9999991674898796, iteration: 102629
loss: 1.0196621417999268,grad_norm: 0.9999991332362127, iteration: 102630
loss: 0.9882278442382812,grad_norm: 0.9999994938623958, iteration: 102631
loss: 1.0077967643737793,grad_norm: 0.9999990051037296, iteration: 102632
loss: 0.9932447075843811,grad_norm: 0.9999990040005324, iteration: 102633
loss: 1.025925874710083,grad_norm: 0.9999991654129682, iteration: 102634
loss: 0.9629185795783997,grad_norm: 0.9999990455758948, iteration: 102635
loss: 1.006176233291626,grad_norm: 0.9999991922571053, iteration: 102636
loss: 0.9279167652130127,grad_norm: 0.9939228574688445, iteration: 102637
loss: 0.9792013764381409,grad_norm: 0.9999990269610114, iteration: 102638
loss: 1.0035176277160645,grad_norm: 0.950803583284268, iteration: 102639
loss: 1.005792260169983,grad_norm: 0.8480624253252145, iteration: 102640
loss: 1.0194047689437866,grad_norm: 0.999998959121968, iteration: 102641
loss: 1.0063005685806274,grad_norm: 0.9999989615981929, iteration: 102642
loss: 0.9729040861129761,grad_norm: 0.9999991346738942, iteration: 102643
loss: 1.0204312801361084,grad_norm: 0.9999992321008487, iteration: 102644
loss: 0.9798404574394226,grad_norm: 0.9999991924733246, iteration: 102645
loss: 1.000223159790039,grad_norm: 0.9999991788785361, iteration: 102646
loss: 1.0493861436843872,grad_norm: 0.9999991527482989, iteration: 102647
loss: 1.0162348747253418,grad_norm: 0.9700340328153338, iteration: 102648
loss: 1.0180983543395996,grad_norm: 0.9999991112907719, iteration: 102649
loss: 1.0186306238174438,grad_norm: 0.9999990099649133, iteration: 102650
loss: 1.0002543926239014,grad_norm: 0.9999991953681842, iteration: 102651
loss: 0.9779294729232788,grad_norm: 0.9999994745871864, iteration: 102652
loss: 1.0065312385559082,grad_norm: 0.9999991134479471, iteration: 102653
loss: 1.0471502542495728,grad_norm: 0.9999998807586776, iteration: 102654
loss: 0.9734338521957397,grad_norm: 0.95281031464649, iteration: 102655
loss: 0.9989131689071655,grad_norm: 0.8542671415928527, iteration: 102656
loss: 0.9710336923599243,grad_norm: 0.9328086734051132, iteration: 102657
loss: 0.9813518524169922,grad_norm: 0.9999990590786455, iteration: 102658
loss: 1.0018842220306396,grad_norm: 0.9999992035439403, iteration: 102659
loss: 1.0172514915466309,grad_norm: 0.9444831772332993, iteration: 102660
loss: 0.984246015548706,grad_norm: 0.999999263724018, iteration: 102661
loss: 0.9958741068840027,grad_norm: 0.9999991830150751, iteration: 102662
loss: 1.0093101263046265,grad_norm: 0.9999991050266981, iteration: 102663
loss: 0.9935047030448914,grad_norm: 0.9999991435593084, iteration: 102664
loss: 1.0074750185012817,grad_norm: 0.9084125452144176, iteration: 102665
loss: 0.9702022671699524,grad_norm: 0.9999993618638846, iteration: 102666
loss: 0.983589231967926,grad_norm: 0.9999990464174818, iteration: 102667
loss: 0.9933264851570129,grad_norm: 0.881047943766057, iteration: 102668
loss: 0.9900675415992737,grad_norm: 0.9999992478979538, iteration: 102669
loss: 0.9682740569114685,grad_norm: 0.9999992734436997, iteration: 102670
loss: 0.9819565415382385,grad_norm: 0.9999993366711305, iteration: 102671
loss: 1.0286003351211548,grad_norm: 0.9999993240935314, iteration: 102672
loss: 0.988224446773529,grad_norm: 0.9999991716705109, iteration: 102673
loss: 1.0033265352249146,grad_norm: 0.9999993343150065, iteration: 102674
loss: 0.9877480268478394,grad_norm: 0.999999040281796, iteration: 102675
loss: 0.9784451723098755,grad_norm: 0.9999990936863943, iteration: 102676
loss: 0.9986013770103455,grad_norm: 0.9999990302463436, iteration: 102677
loss: 0.9622010588645935,grad_norm: 0.9999992571609517, iteration: 102678
loss: 1.003449559211731,grad_norm: 0.9999989246966203, iteration: 102679
loss: 0.9887036085128784,grad_norm: 0.9502313923532373, iteration: 102680
loss: 0.9910492897033691,grad_norm: 0.9999991496133924, iteration: 102681
loss: 1.0192927122116089,grad_norm: 0.9999993867111633, iteration: 102682
loss: 0.9694808721542358,grad_norm: 0.9999993167982509, iteration: 102683
loss: 1.0058846473693848,grad_norm: 0.9999991349871712, iteration: 102684
loss: 0.979037880897522,grad_norm: 0.966898220374922, iteration: 102685
loss: 0.9761027693748474,grad_norm: 0.9262452063646128, iteration: 102686
loss: 1.0242840051651,grad_norm: 0.9999990372586901, iteration: 102687
loss: 1.025331735610962,grad_norm: 0.9999992629478244, iteration: 102688
loss: 0.9991613030433655,grad_norm: 0.999999104335438, iteration: 102689
loss: 0.9905962347984314,grad_norm: 0.9265322033910706, iteration: 102690
loss: 1.01389741897583,grad_norm: 0.9999992301765431, iteration: 102691
loss: 1.0097980499267578,grad_norm: 0.9999991599780069, iteration: 102692
loss: 1.0375114679336548,grad_norm: 0.9999991229922024, iteration: 102693
loss: 0.9928483963012695,grad_norm: 0.999999047940883, iteration: 102694
loss: 1.0015043020248413,grad_norm: 0.9999990947301758, iteration: 102695
loss: 1.0200456380844116,grad_norm: 0.9999991418516352, iteration: 102696
loss: 1.0034250020980835,grad_norm: 0.9999992462050924, iteration: 102697
loss: 1.0086758136749268,grad_norm: 0.999999049147042, iteration: 102698
loss: 0.9615058898925781,grad_norm: 0.9629659460142661, iteration: 102699
loss: 1.0100970268249512,grad_norm: 0.9999991472028952, iteration: 102700
loss: 0.9999540448188782,grad_norm: 0.9793304777280446, iteration: 102701
loss: 0.9601384401321411,grad_norm: 0.9999989584027469, iteration: 102702
loss: 1.1060847043991089,grad_norm: 0.9999992581601056, iteration: 102703
loss: 0.9796440601348877,grad_norm: 0.9999990907288532, iteration: 102704
loss: 0.998308539390564,grad_norm: 0.8786831692587377, iteration: 102705
loss: 0.9873497486114502,grad_norm: 0.9724187118756624, iteration: 102706
loss: 1.005813717842102,grad_norm: 0.8483600647196304, iteration: 102707
loss: 0.9793849587440491,grad_norm: 0.9999993721685247, iteration: 102708
loss: 1.0560814142227173,grad_norm: 0.999999262697365, iteration: 102709
loss: 0.9955834746360779,grad_norm: 0.975237077280933, iteration: 102710
loss: 0.9848939776420593,grad_norm: 0.8244637383144106, iteration: 102711
loss: 1.0294898748397827,grad_norm: 0.9999992084297358, iteration: 102712
loss: 0.9923492670059204,grad_norm: 0.9999990984284716, iteration: 102713
loss: 1.0296255350112915,grad_norm: 0.9587577455585511, iteration: 102714
loss: 1.0017434358596802,grad_norm: 0.9999991023419343, iteration: 102715
loss: 1.0187236070632935,grad_norm: 0.9999991596289172, iteration: 102716
loss: 1.0053714513778687,grad_norm: 0.9999993829164697, iteration: 102717
loss: 0.989225447177887,grad_norm: 0.8945188412068773, iteration: 102718
loss: 1.0168308019638062,grad_norm: 0.9941153052543563, iteration: 102719
loss: 0.9824512004852295,grad_norm: 0.9809350214477635, iteration: 102720
loss: 1.0301392078399658,grad_norm: 0.8514938556901211, iteration: 102721
loss: 0.9927732944488525,grad_norm: 0.9604969584328896, iteration: 102722
loss: 0.9862142205238342,grad_norm: 0.999999003630322, iteration: 102723
loss: 1.0075252056121826,grad_norm: 0.999999087422404, iteration: 102724
loss: 1.0128493309020996,grad_norm: 0.9429974215037077, iteration: 102725
loss: 1.0137501955032349,grad_norm: 0.9999992609925268, iteration: 102726
loss: 0.9684475064277649,grad_norm: 0.9999991515842178, iteration: 102727
loss: 1.0126365423202515,grad_norm: 0.904411505818947, iteration: 102728
loss: 1.0291221141815186,grad_norm: 0.9999993221810535, iteration: 102729
loss: 0.9782347083091736,grad_norm: 0.8627998655961462, iteration: 102730
loss: 0.9660171866416931,grad_norm: 0.9359184920408941, iteration: 102731
loss: 1.0262528657913208,grad_norm: 0.985878656752144, iteration: 102732
loss: 1.003465175628662,grad_norm: 0.9999991383993557, iteration: 102733
loss: 0.9916245341300964,grad_norm: 0.9660978023994142, iteration: 102734
loss: 1.0065075159072876,grad_norm: 0.9999991963804934, iteration: 102735
loss: 0.9611064791679382,grad_norm: 0.9999991325426324, iteration: 102736
loss: 1.0101779699325562,grad_norm: 0.9469589418847367, iteration: 102737
loss: 0.9956930875778198,grad_norm: 0.9999991852346625, iteration: 102738
loss: 0.9658622741699219,grad_norm: 0.8305520815270031, iteration: 102739
loss: 0.9922029972076416,grad_norm: 0.8982537076252052, iteration: 102740
loss: 0.9925864338874817,grad_norm: 0.9223656099055072, iteration: 102741
loss: 0.9896472692489624,grad_norm: 0.9665368612358749, iteration: 102742
loss: 0.967962384223938,grad_norm: 0.9854304879480985, iteration: 102743
loss: 1.0291388034820557,grad_norm: 0.9112798971260784, iteration: 102744
loss: 0.9900157451629639,grad_norm: 0.9999992431771582, iteration: 102745
loss: 1.026681900024414,grad_norm: 0.9519791913229829, iteration: 102746
loss: 0.9881739616394043,grad_norm: 0.9999991250968681, iteration: 102747
loss: 0.9961515665054321,grad_norm: 0.9123445018831237, iteration: 102748
loss: 1.011588215827942,grad_norm: 0.9999993204448957, iteration: 102749
loss: 1.0044617652893066,grad_norm: 0.9999991026162338, iteration: 102750
loss: 0.9851426482200623,grad_norm: 0.9999990607601738, iteration: 102751
loss: 0.9918726682662964,grad_norm: 0.9999990664685687, iteration: 102752
loss: 0.9871963262557983,grad_norm: 0.9838494058372061, iteration: 102753
loss: 1.0197652578353882,grad_norm: 0.999999154025754, iteration: 102754
loss: 0.9962031245231628,grad_norm: 0.9999991387048025, iteration: 102755
loss: 0.9824221134185791,grad_norm: 0.962856298595844, iteration: 102756
loss: 0.9652470350265503,grad_norm: 0.9999990523713527, iteration: 102757
loss: 0.9923393130302429,grad_norm: 0.9999991912021505, iteration: 102758
loss: 1.0295383930206299,grad_norm: 0.9999992238041499, iteration: 102759
loss: 0.9756527543067932,grad_norm: 0.9999992773110441, iteration: 102760
loss: 1.0238832235336304,grad_norm: 0.999999197771437, iteration: 102761
loss: 0.9900036454200745,grad_norm: 0.949895363823182, iteration: 102762
loss: 0.9811937808990479,grad_norm: 0.9999992787786205, iteration: 102763
loss: 0.9208052158355713,grad_norm: 0.8923025400821621, iteration: 102764
loss: 0.9983070492744446,grad_norm: 0.9999991863898557, iteration: 102765
loss: 1.0165218114852905,grad_norm: 0.9999992154269576, iteration: 102766
loss: 0.996195912361145,grad_norm: 0.9988791355368721, iteration: 102767
loss: 0.9726073145866394,grad_norm: 0.9999992231804415, iteration: 102768
loss: 1.006677508354187,grad_norm: 0.9841972920024434, iteration: 102769
loss: 1.0109885931015015,grad_norm: 0.9173418937486953, iteration: 102770
loss: 1.0055017471313477,grad_norm: 0.9999992794275139, iteration: 102771
loss: 0.9907877445220947,grad_norm: 0.9999991933750805, iteration: 102772
loss: 0.9765623211860657,grad_norm: 0.9999991278438232, iteration: 102773
loss: 1.0022834539413452,grad_norm: 0.9834672894844878, iteration: 102774
loss: 0.9973894953727722,grad_norm: 0.9999993936141813, iteration: 102775
loss: 0.9766797423362732,grad_norm: 0.9999990843037321, iteration: 102776
loss: 1.1407698392868042,grad_norm: 0.9999996849428727, iteration: 102777
loss: 0.985632061958313,grad_norm: 0.9731803877536525, iteration: 102778
loss: 0.9905279874801636,grad_norm: 0.9999990502192957, iteration: 102779
loss: 1.0169384479522705,grad_norm: 0.9999990764407115, iteration: 102780
loss: 0.9734650254249573,grad_norm: 0.9216861798794179, iteration: 102781
loss: 0.9941264390945435,grad_norm: 0.9999991228066428, iteration: 102782
loss: 1.0101430416107178,grad_norm: 0.954741724007528, iteration: 102783
loss: 0.9876866340637207,grad_norm: 0.9999990731386487, iteration: 102784
loss: 0.9793546795845032,grad_norm: 0.9999991841623871, iteration: 102785
loss: 1.0203479528427124,grad_norm: 0.9438135325195512, iteration: 102786
loss: 1.0233190059661865,grad_norm: 0.9229824762306642, iteration: 102787
loss: 0.9788487553596497,grad_norm: 0.9189287024658728, iteration: 102788
loss: 1.0147923231124878,grad_norm: 0.999999296415621, iteration: 102789
loss: 1.0108906030654907,grad_norm: 0.9999991608187965, iteration: 102790
loss: 1.0176187753677368,grad_norm: 0.9999990283104075, iteration: 102791
loss: 0.9861754775047302,grad_norm: 0.9999991706902163, iteration: 102792
loss: 0.9741254448890686,grad_norm: 0.9448847392893693, iteration: 102793
loss: 1.002644419670105,grad_norm: 0.9999990794376222, iteration: 102794
loss: 1.0436255931854248,grad_norm: 0.9999992389414363, iteration: 102795
loss: 0.99859219789505,grad_norm: 0.962580564559698, iteration: 102796
loss: 1.0102851390838623,grad_norm: 0.9999992455566143, iteration: 102797
loss: 1.009894609451294,grad_norm: 0.999999233094842, iteration: 102798
loss: 0.9929421544075012,grad_norm: 0.9999991825454393, iteration: 102799
loss: 0.9602856040000916,grad_norm: 0.9839253365076283, iteration: 102800
loss: 0.9625001549720764,grad_norm: 0.9999991919923055, iteration: 102801
loss: 0.9891759753227234,grad_norm: 0.9999991343399033, iteration: 102802
loss: 0.9573501348495483,grad_norm: 0.9999991754680015, iteration: 102803
loss: 1.0265122652053833,grad_norm: 0.9999991670151682, iteration: 102804
loss: 0.9973716139793396,grad_norm: 0.9384750825696665, iteration: 102805
loss: 1.0227441787719727,grad_norm: 0.9999992259149163, iteration: 102806
loss: 1.0112494230270386,grad_norm: 0.8899600288343354, iteration: 102807
loss: 1.0095714330673218,grad_norm: 0.9999999845328726, iteration: 102808
loss: 0.9467014670372009,grad_norm: 0.9999991313360139, iteration: 102809
loss: 0.9791274666786194,grad_norm: 0.9764373587178043, iteration: 102810
loss: 1.0162020921707153,grad_norm: 0.9758916098301055, iteration: 102811
loss: 0.9973927140235901,grad_norm: 0.9999991154427957, iteration: 102812
loss: 0.9647703766822815,grad_norm: 0.9036631716488746, iteration: 102813
loss: 0.9868022799491882,grad_norm: 0.869799168123523, iteration: 102814
loss: 0.9896771311759949,grad_norm: 0.9999993009279498, iteration: 102815
loss: 1.0156505107879639,grad_norm: 0.9999989869140644, iteration: 102816
loss: 0.9771323800086975,grad_norm: 0.9399920735815738, iteration: 102817
loss: 1.0084627866744995,grad_norm: 0.9999992019839679, iteration: 102818
loss: 1.0153918266296387,grad_norm: 0.9656300304346741, iteration: 102819
loss: 1.026724934577942,grad_norm: 0.9999995632265236, iteration: 102820
loss: 1.0258476734161377,grad_norm: 0.9999991197219377, iteration: 102821
loss: 0.9777211546897888,grad_norm: 0.9999994925518508, iteration: 102822
loss: 0.9585741758346558,grad_norm: 0.9999993909892405, iteration: 102823
loss: 1.011552095413208,grad_norm: 0.967010354953428, iteration: 102824
loss: 1.0190753936767578,grad_norm: 0.9999992171773653, iteration: 102825
loss: 1.0137717723846436,grad_norm: 0.9999989601123519, iteration: 102826
loss: 1.001743197441101,grad_norm: 0.999999129676305, iteration: 102827
loss: 1.0114654302597046,grad_norm: 0.9164750192140695, iteration: 102828
loss: 0.9885287880897522,grad_norm: 0.9999991943016214, iteration: 102829
loss: 1.024166464805603,grad_norm: 0.9498467277910461, iteration: 102830
loss: 0.9903280735015869,grad_norm: 0.9999990928131168, iteration: 102831
loss: 0.9712720513343811,grad_norm: 0.9999991113531188, iteration: 102832
loss: 0.9969453811645508,grad_norm: 0.9299643513655798, iteration: 102833
loss: 1.0013960599899292,grad_norm: 0.9842048424791621, iteration: 102834
loss: 1.0167009830474854,grad_norm: 0.9434950663724286, iteration: 102835
loss: 0.9991039633750916,grad_norm: 0.961998159675124, iteration: 102836
loss: 1.009562373161316,grad_norm: 0.8936599706219722, iteration: 102837
loss: 0.971881091594696,grad_norm: 0.9800325236161538, iteration: 102838
loss: 1.0076804161071777,grad_norm: 0.9991746321973407, iteration: 102839
loss: 0.9650706648826599,grad_norm: 0.9486366310119977, iteration: 102840
loss: 0.9303817749023438,grad_norm: 0.9999991879948313, iteration: 102841
loss: 0.9653910994529724,grad_norm: 0.9999990177807784, iteration: 102842
loss: 1.012069582939148,grad_norm: 0.9999994325153115, iteration: 102843
loss: 0.9839345216751099,grad_norm: 0.9761861242322054, iteration: 102844
loss: 1.0156365633010864,grad_norm: 0.9999990948024832, iteration: 102845
loss: 0.9734169244766235,grad_norm: 0.9999990630843142, iteration: 102846
loss: 1.0304356813430786,grad_norm: 0.7859472183280337, iteration: 102847
loss: 1.0101875066757202,grad_norm: 0.9999993261441953, iteration: 102848
loss: 1.0166561603546143,grad_norm: 0.9051894165219799, iteration: 102849
loss: 1.0076136589050293,grad_norm: 0.9766549058426679, iteration: 102850
loss: 1.0033669471740723,grad_norm: 0.9999991445579579, iteration: 102851
loss: 0.9942789673805237,grad_norm: 0.951822497720098, iteration: 102852
loss: 0.9970224499702454,grad_norm: 0.9999989477172239, iteration: 102853
loss: 1.016693353652954,grad_norm: 0.9999991010756994, iteration: 102854
loss: 0.975749135017395,grad_norm: 0.99999908868121, iteration: 102855
loss: 0.9893141984939575,grad_norm: 0.999999215049325, iteration: 102856
loss: 1.028754711151123,grad_norm: 0.9999990447818631, iteration: 102857
loss: 1.016424298286438,grad_norm: 0.999999110470788, iteration: 102858
loss: 1.0097736120224,grad_norm: 0.8660269362871387, iteration: 102859
loss: 0.9894909262657166,grad_norm: 0.9999991205780856, iteration: 102860
loss: 0.9897144436836243,grad_norm: 0.943957113107345, iteration: 102861
loss: 0.9938016533851624,grad_norm: 0.8932495137298595, iteration: 102862
loss: 0.972731351852417,grad_norm: 0.9999991668598042, iteration: 102863
loss: 0.9699561595916748,grad_norm: 0.9720412304159738, iteration: 102864
loss: 1.0297136306762695,grad_norm: 0.9999994401634961, iteration: 102865
loss: 1.0467698574066162,grad_norm: 0.9999992713800622, iteration: 102866
loss: 0.961117684841156,grad_norm: 0.9999991491796807, iteration: 102867
loss: 0.962898313999176,grad_norm: 0.9396066626950117, iteration: 102868
loss: 1.0055745840072632,grad_norm: 0.9999991056194144, iteration: 102869
loss: 0.9818406701087952,grad_norm: 0.9999990606940643, iteration: 102870
loss: 1.0021991729736328,grad_norm: 0.9999997026927689, iteration: 102871
loss: 1.0132211446762085,grad_norm: 0.9999992095946945, iteration: 102872
loss: 1.0337615013122559,grad_norm: 0.9999992374394072, iteration: 102873
loss: 1.002703070640564,grad_norm: 0.9999990226295575, iteration: 102874
loss: 1.0146738290786743,grad_norm: 0.9999990973220934, iteration: 102875
loss: 0.9786215424537659,grad_norm: 0.9999990166760768, iteration: 102876
loss: 1.0111968517303467,grad_norm: 0.8974562969548152, iteration: 102877
loss: 1.002180814743042,grad_norm: 0.9904630621543127, iteration: 102878
loss: 1.0063366889953613,grad_norm: 0.9999991299418038, iteration: 102879
loss: 0.9819058179855347,grad_norm: 0.9999990512376186, iteration: 102880
loss: 1.2458739280700684,grad_norm: 0.9999996729711035, iteration: 102881
loss: 0.97492915391922,grad_norm: 0.9999990549442946, iteration: 102882
loss: 0.9967166185379028,grad_norm: 0.9999992841428669, iteration: 102883
loss: 0.9858532547950745,grad_norm: 0.9999990846657604, iteration: 102884
loss: 1.0121923685073853,grad_norm: 0.9999992946879621, iteration: 102885
loss: 0.9551649689674377,grad_norm: 0.9999992389691855, iteration: 102886
loss: 0.9930097460746765,grad_norm: 0.9999990437811013, iteration: 102887
loss: 1.005270004272461,grad_norm: 0.9999991238228733, iteration: 102888
loss: 1.0087648630142212,grad_norm: 0.9999996377080083, iteration: 102889
loss: 0.9670807123184204,grad_norm: 0.9999992181588251, iteration: 102890
loss: 0.9811086654663086,grad_norm: 0.9999991614441657, iteration: 102891
loss: 1.030692458152771,grad_norm: 0.9999991044386962, iteration: 102892
loss: 0.9947206974029541,grad_norm: 0.9999991941445696, iteration: 102893
loss: 0.9978417158126831,grad_norm: 0.9999991216914531, iteration: 102894
loss: 0.9449399709701538,grad_norm: 0.9999991782491929, iteration: 102895
loss: 0.9787793755531311,grad_norm: 0.9999992121687996, iteration: 102896
loss: 0.9954407215118408,grad_norm: 0.999999059803708, iteration: 102897
loss: 1.0204136371612549,grad_norm: 0.9999992193379413, iteration: 102898
loss: 0.9908277988433838,grad_norm: 0.9999991432680883, iteration: 102899
loss: 0.9999789595603943,grad_norm: 0.9999992058892109, iteration: 102900
loss: 1.058384656906128,grad_norm: 0.999999151614561, iteration: 102901
loss: 0.9965190887451172,grad_norm: 0.9403213720047013, iteration: 102902
loss: 0.9816601872444153,grad_norm: 0.9495488719111514, iteration: 102903
loss: 1.043460488319397,grad_norm: 0.9628979490608037, iteration: 102904
loss: 0.9856718182563782,grad_norm: 0.9999991310826345, iteration: 102905
loss: 1.0087110996246338,grad_norm: 0.9999990570568572, iteration: 102906
loss: 1.0404945611953735,grad_norm: 0.8756405044826218, iteration: 102907
loss: 0.992297351360321,grad_norm: 0.9999991316584994, iteration: 102908
loss: 1.0207399129867554,grad_norm: 0.9999991029587085, iteration: 102909
loss: 1.0474961996078491,grad_norm: 0.9999997763483611, iteration: 102910
loss: 1.0555846691131592,grad_norm: 0.9999990603191833, iteration: 102911
loss: 0.9643831849098206,grad_norm: 0.9999992734568468, iteration: 102912
loss: 1.0117484331130981,grad_norm: 0.9999991491193141, iteration: 102913
loss: 0.9824146032333374,grad_norm: 0.9999992889042185, iteration: 102914
loss: 1.0108150243759155,grad_norm: 0.8996746174329644, iteration: 102915
loss: 0.9916326403617859,grad_norm: 0.9408952465467543, iteration: 102916
loss: 1.0246638059616089,grad_norm: 0.9999992383356243, iteration: 102917
loss: 1.0027865171432495,grad_norm: 0.9999991417825017, iteration: 102918
loss: 0.9887163639068604,grad_norm: 0.9237150785584991, iteration: 102919
loss: 1.031974196434021,grad_norm: 0.9999994529152415, iteration: 102920
loss: 0.992256224155426,grad_norm: 0.9824342075739032, iteration: 102921
loss: 1.0350431203842163,grad_norm: 0.9999989771603661, iteration: 102922
loss: 1.0241550207138062,grad_norm: 0.999999489558422, iteration: 102923
loss: 1.0153566598892212,grad_norm: 0.970955401619947, iteration: 102924
loss: 1.0089622735977173,grad_norm: 0.9999992291384957, iteration: 102925
loss: 1.0047119855880737,grad_norm: 0.9999991877432266, iteration: 102926
loss: 0.9807907342910767,grad_norm: 0.9999989820595981, iteration: 102927
loss: 1.062603235244751,grad_norm: 0.9999991566648243, iteration: 102928
loss: 0.9743179678916931,grad_norm: 0.8856053442747944, iteration: 102929
loss: 0.9974614381790161,grad_norm: 0.9249604142495413, iteration: 102930
loss: 0.9979523420333862,grad_norm: 0.999999356112577, iteration: 102931
loss: 1.040805697441101,grad_norm: 0.9999992177892845, iteration: 102932
loss: 1.0523160696029663,grad_norm: 0.9535598195736396, iteration: 102933
loss: 1.0399960279464722,grad_norm: 0.9999993557495119, iteration: 102934
loss: 1.0274461507797241,grad_norm: 0.9594442152645003, iteration: 102935
loss: 1.0060309171676636,grad_norm: 0.9999990684669497, iteration: 102936
loss: 1.0292894840240479,grad_norm: 0.9999990233344807, iteration: 102937
loss: 1.0137121677398682,grad_norm: 0.8999385381809363, iteration: 102938
loss: 1.0180515050888062,grad_norm: 0.9999990421111511, iteration: 102939
loss: 1.0045804977416992,grad_norm: 0.9999990244208637, iteration: 102940
loss: 0.9711248874664307,grad_norm: 0.9999991484154567, iteration: 102941
loss: 1.0458252429962158,grad_norm: 0.9999995723803435, iteration: 102942
loss: 0.9990808367729187,grad_norm: 0.9999989984612659, iteration: 102943
loss: 1.0031194686889648,grad_norm: 0.9999991894424763, iteration: 102944
loss: 1.0144813060760498,grad_norm: 0.9999990845903383, iteration: 102945
loss: 1.0104711055755615,grad_norm: 0.8935569763180661, iteration: 102946
loss: 0.9556846618652344,grad_norm: 0.99999907334331, iteration: 102947
loss: 0.9979142546653748,grad_norm: 0.9999990605976645, iteration: 102948
loss: 0.9939425587654114,grad_norm: 0.9999991258203271, iteration: 102949
loss: 0.997610867023468,grad_norm: 0.9999991426696625, iteration: 102950
loss: 0.9857122898101807,grad_norm: 0.9999990673550164, iteration: 102951
loss: 1.0093134641647339,grad_norm: 0.999999223849109, iteration: 102952
loss: 1.0220324993133545,grad_norm: 0.9002378314659149, iteration: 102953
loss: 0.9944716691970825,grad_norm: 0.9999992557422102, iteration: 102954
loss: 1.001279354095459,grad_norm: 0.9582282623207393, iteration: 102955
loss: 1.0211809873580933,grad_norm: 0.9999992306973039, iteration: 102956
loss: 1.0203651189804077,grad_norm: 0.923470338965887, iteration: 102957
loss: 1.0001671314239502,grad_norm: 0.9999993313027344, iteration: 102958
loss: 0.9999005198478699,grad_norm: 0.9999990842001917, iteration: 102959
loss: 1.0273489952087402,grad_norm: 0.9999991120483118, iteration: 102960
loss: 0.9987103939056396,grad_norm: 0.9999990362588823, iteration: 102961
loss: 1.0209400653839111,grad_norm: 0.99999906245721, iteration: 102962
loss: 0.9789884090423584,grad_norm: 0.9999992229122147, iteration: 102963
loss: 1.010326623916626,grad_norm: 0.9999990274854045, iteration: 102964
loss: 1.0009527206420898,grad_norm: 0.9999992231054474, iteration: 102965
loss: 1.0481764078140259,grad_norm: 0.9999996986689884, iteration: 102966
loss: 1.0366770029067993,grad_norm: 0.9433712142833637, iteration: 102967
loss: 1.012471079826355,grad_norm: 0.9751529407178281, iteration: 102968
loss: 0.9660365581512451,grad_norm: 0.9405643744038429, iteration: 102969
loss: 1.0155318975448608,grad_norm: 0.9999990298475773, iteration: 102970
loss: 1.0363742113113403,grad_norm: 0.9999995811233082, iteration: 102971
loss: 1.0033403635025024,grad_norm: 0.9272780178914237, iteration: 102972
loss: 0.9948626756668091,grad_norm: 0.9999991393778491, iteration: 102973
loss: 0.9892276525497437,grad_norm: 0.9999992238292795, iteration: 102974
loss: 0.9939442873001099,grad_norm: 0.9999991583995282, iteration: 102975
loss: 0.9843332171440125,grad_norm: 0.9726918567869666, iteration: 102976
loss: 0.9920656085014343,grad_norm: 0.8969553837567968, iteration: 102977
loss: 1.0326131582260132,grad_norm: 0.9808515207519687, iteration: 102978
loss: 1.0293595790863037,grad_norm: 0.9999991287850728, iteration: 102979
loss: 1.0059553384780884,grad_norm: 0.9999989174816553, iteration: 102980
loss: 1.0061935186386108,grad_norm: 0.999999213261867, iteration: 102981
loss: 1.025467872619629,grad_norm: 0.9999989979771787, iteration: 102982
loss: 1.0077251195907593,grad_norm: 0.9999991112930764, iteration: 102983
loss: 1.017879843711853,grad_norm: 0.9885393393495928, iteration: 102984
loss: 1.0148447751998901,grad_norm: 0.9548511123261336, iteration: 102985
loss: 1.0323055982589722,grad_norm: 0.9999996355242224, iteration: 102986
loss: 1.0012518167495728,grad_norm: 0.9999989178621218, iteration: 102987
loss: 1.0227779150009155,grad_norm: 0.999999120803243, iteration: 102988
loss: 1.0002573728561401,grad_norm: 0.9154405607338888, iteration: 102989
loss: 1.0106297731399536,grad_norm: 0.999999041416846, iteration: 102990
loss: 1.015123724937439,grad_norm: 0.9999992512779787, iteration: 102991
loss: 0.9828966856002808,grad_norm: 0.9901863449910794, iteration: 102992
loss: 1.0135283470153809,grad_norm: 0.9999992073196967, iteration: 102993
loss: 1.0281898975372314,grad_norm: 0.9999991435958733, iteration: 102994
loss: 1.0209407806396484,grad_norm: 0.9999992039128879, iteration: 102995
loss: 0.9772814512252808,grad_norm: 0.9677784720729397, iteration: 102996
loss: 1.004280924797058,grad_norm: 0.9999991947683439, iteration: 102997
loss: 1.0054291486740112,grad_norm: 0.9999990414057157, iteration: 102998
loss: 0.9911794066429138,grad_norm: 0.9999992706850229, iteration: 102999
loss: 0.9990332126617432,grad_norm: 0.902532572312387, iteration: 103000
loss: 0.9662797451019287,grad_norm: 0.9975989795340997, iteration: 103001
loss: 0.9833245277404785,grad_norm: 0.9999991327624976, iteration: 103002
loss: 0.9653910994529724,grad_norm: 0.9999991744722261, iteration: 103003
loss: 1.0317879915237427,grad_norm: 0.9207770763801195, iteration: 103004
loss: 1.0142996311187744,grad_norm: 0.9999994845471504, iteration: 103005
loss: 1.028319239616394,grad_norm: 0.9999990695980325, iteration: 103006
loss: 1.018466830253601,grad_norm: 0.9999991487666435, iteration: 103007
loss: 0.9892289042472839,grad_norm: 0.9999993368911487, iteration: 103008
loss: 0.99321448802948,grad_norm: 0.9999991012209091, iteration: 103009
loss: 0.9868746995925903,grad_norm: 0.9999990463992658, iteration: 103010
loss: 1.0376801490783691,grad_norm: 0.9999991049402219, iteration: 103011
loss: 1.0284109115600586,grad_norm: 0.9923729701880172, iteration: 103012
loss: 0.9709548354148865,grad_norm: 0.9999991403477093, iteration: 103013
loss: 0.9813483953475952,grad_norm: 0.9999990762528659, iteration: 103014
loss: 0.9749470353126526,grad_norm: 0.9999991263865746, iteration: 103015
loss: 1.003395438194275,grad_norm: 0.9999990733468614, iteration: 103016
loss: 1.0144091844558716,grad_norm: 0.9999991659038426, iteration: 103017
loss: 0.9849867820739746,grad_norm: 0.9501042250520318, iteration: 103018
loss: 0.9833822250366211,grad_norm: 0.9999990894982177, iteration: 103019
loss: 0.9869551062583923,grad_norm: 0.9999989208939971, iteration: 103020
loss: 1.0012069940567017,grad_norm: 0.9936036394377386, iteration: 103021
loss: 1.0144271850585938,grad_norm: 0.9999993364962687, iteration: 103022
loss: 0.9985268115997314,grad_norm: 0.8499414252056552, iteration: 103023
loss: 1.0263972282409668,grad_norm: 0.9999992681903606, iteration: 103024
loss: 1.005676031112671,grad_norm: 0.9762037634003445, iteration: 103025
loss: 1.0025681257247925,grad_norm: 0.999999083031489, iteration: 103026
loss: 1.0302646160125732,grad_norm: 0.9999990433823611, iteration: 103027
loss: 1.0518913269042969,grad_norm: 0.999999215229042, iteration: 103028
loss: 1.0450104475021362,grad_norm: 0.999999115746643, iteration: 103029
loss: 1.0340858697891235,grad_norm: 0.9999990981343354, iteration: 103030
loss: 0.9909710884094238,grad_norm: 0.9999992214233798, iteration: 103031
loss: 1.0272536277770996,grad_norm: 0.9999991735712385, iteration: 103032
loss: 0.9967787265777588,grad_norm: 0.8659118121488101, iteration: 103033
loss: 1.19717538356781,grad_norm: 1.0000000588006532, iteration: 103034
loss: 0.9768209457397461,grad_norm: 0.9999991683263821, iteration: 103035
loss: 1.0068256855010986,grad_norm: 0.9190728233018322, iteration: 103036
loss: 1.0056849718093872,grad_norm: 0.9999990221617572, iteration: 103037
loss: 0.9916356205940247,grad_norm: 0.9655306285501312, iteration: 103038
loss: 0.9816873073577881,grad_norm: 0.9999990974733587, iteration: 103039
loss: 1.0006341934204102,grad_norm: 0.9999992855519547, iteration: 103040
loss: 0.9898312091827393,grad_norm: 0.9999990188295295, iteration: 103041
loss: 0.9917111396789551,grad_norm: 0.999999303469636, iteration: 103042
loss: 1.002111792564392,grad_norm: 0.9999992282667164, iteration: 103043
loss: 0.975313663482666,grad_norm: 0.9059305104890345, iteration: 103044
loss: 0.9645470976829529,grad_norm: 0.9999991587035176, iteration: 103045
loss: 0.977050244808197,grad_norm: 0.9999991403569417, iteration: 103046
loss: 1.031606912612915,grad_norm: 0.9999989806628083, iteration: 103047
loss: 1.0001246929168701,grad_norm: 0.9256956888930754, iteration: 103048
loss: 1.0168441534042358,grad_norm: 0.875526713405506, iteration: 103049
loss: 1.023530125617981,grad_norm: 0.9999991963166289, iteration: 103050
loss: 0.9829537272453308,grad_norm: 0.9999990925399824, iteration: 103051
loss: 0.9868466854095459,grad_norm: 0.9958469031627832, iteration: 103052
loss: 0.9810391664505005,grad_norm: 0.9999989045383528, iteration: 103053
loss: 0.9926770925521851,grad_norm: 0.9999990446830624, iteration: 103054
loss: 1.0010091066360474,grad_norm: 0.9999990449514928, iteration: 103055
loss: 0.9843631982803345,grad_norm: 0.999999207894822, iteration: 103056
loss: 1.0162302255630493,grad_norm: 0.9999991008397544, iteration: 103057
loss: 1.018514633178711,grad_norm: 0.9999992920723833, iteration: 103058
loss: 0.9919905066490173,grad_norm: 0.9999990278539573, iteration: 103059
loss: 1.0747627019882202,grad_norm: 0.9999996173285733, iteration: 103060
loss: 0.9822269082069397,grad_norm: 0.9454077035722646, iteration: 103061
loss: 0.9837852120399475,grad_norm: 0.9999993017825672, iteration: 103062
loss: 0.9825856685638428,grad_norm: 0.9039392881196775, iteration: 103063
loss: 0.9812072515487671,grad_norm: 0.9999990851031215, iteration: 103064
loss: 1.0343701839447021,grad_norm: 0.9627850991687547, iteration: 103065
loss: 1.0496866703033447,grad_norm: 0.9999991165546323, iteration: 103066
loss: 0.984801709651947,grad_norm: 0.999999100036984, iteration: 103067
loss: 0.9961244463920593,grad_norm: 0.9999990329183003, iteration: 103068
loss: 0.9896935820579529,grad_norm: 0.9999993064251159, iteration: 103069
loss: 1.009920597076416,grad_norm: 0.9999992596719379, iteration: 103070
loss: 0.9941419363021851,grad_norm: 0.9999990030735806, iteration: 103071
loss: 0.9921920895576477,grad_norm: 0.8668324989352648, iteration: 103072
loss: 1.0090610980987549,grad_norm: 0.9999990807996019, iteration: 103073
loss: 1.0267677307128906,grad_norm: 0.9999990801245163, iteration: 103074
loss: 1.0151196718215942,grad_norm: 0.9999990374661837, iteration: 103075
loss: 0.997017502784729,grad_norm: 0.9999991062685174, iteration: 103076
loss: 0.9705038666725159,grad_norm: 0.9874864504003488, iteration: 103077
loss: 1.0061486959457397,grad_norm: 0.9999992168390287, iteration: 103078
loss: 0.9809423685073853,grad_norm: 0.999999042672839, iteration: 103079
loss: 0.9591983556747437,grad_norm: 0.9774789374482891, iteration: 103080
loss: 0.9918199181556702,grad_norm: 0.9389701948015373, iteration: 103081
loss: 1.029567837715149,grad_norm: 0.9248558737759006, iteration: 103082
loss: 1.0237466096878052,grad_norm: 0.9999991929946942, iteration: 103083
loss: 1.0791393518447876,grad_norm: 0.9999989775736634, iteration: 103084
loss: 1.0125232934951782,grad_norm: 0.9999992038307939, iteration: 103085
loss: 1.0165044069290161,grad_norm: 0.9917813332271005, iteration: 103086
loss: 1.0360645055770874,grad_norm: 0.9999991764879179, iteration: 103087
loss: 1.073973536491394,grad_norm: 0.8026666485234708, iteration: 103088
loss: 1.0077437162399292,grad_norm: 0.9999990737846498, iteration: 103089
loss: 0.9845099449157715,grad_norm: 0.9999992252458777, iteration: 103090
loss: 0.9845805168151855,grad_norm: 0.9707864799477413, iteration: 103091
loss: 0.9999246597290039,grad_norm: 0.9999991241035816, iteration: 103092
loss: 1.0115559101104736,grad_norm: 0.9987753941881791, iteration: 103093
loss: 0.9987455010414124,grad_norm: 0.9999994993845814, iteration: 103094
loss: 1.0437930822372437,grad_norm: 0.9999997405830039, iteration: 103095
loss: 1.0036624670028687,grad_norm: 0.9999991789352334, iteration: 103096
loss: 0.9838052988052368,grad_norm: 0.9999990308359126, iteration: 103097
loss: 0.9945576786994934,grad_norm: 0.9999991136088318, iteration: 103098
loss: 0.9997572302818298,grad_norm: 0.9999991725253152, iteration: 103099
loss: 0.9744135737419128,grad_norm: 0.9999993379299644, iteration: 103100
loss: 0.9973146915435791,grad_norm: 0.9999991669144115, iteration: 103101
loss: 1.0006572008132935,grad_norm: 0.9024768670777807, iteration: 103102
loss: 1.0414103269577026,grad_norm: 0.9999992936971577, iteration: 103103
loss: 1.0094033479690552,grad_norm: 0.9948029732563178, iteration: 103104
loss: 1.0092120170593262,grad_norm: 0.9280227261236269, iteration: 103105
loss: 0.9872548580169678,grad_norm: 0.9999989531057106, iteration: 103106
loss: 1.0199055671691895,grad_norm: 0.999999534114932, iteration: 103107
loss: 0.9847058653831482,grad_norm: 0.9999990169501668, iteration: 103108
loss: 0.9896469712257385,grad_norm: 0.987929713514633, iteration: 103109
loss: 0.976523756980896,grad_norm: 0.9999991081716054, iteration: 103110
loss: 1.0390636920928955,grad_norm: 0.9999990406136859, iteration: 103111
loss: 0.974014937877655,grad_norm: 0.999999083982559, iteration: 103112
loss: 1.0100438594818115,grad_norm: 0.9999991673372616, iteration: 103113
loss: 0.9973739385604858,grad_norm: 0.9999991830160143, iteration: 103114
loss: 0.9936211109161377,grad_norm: 0.9999991285864589, iteration: 103115
loss: 1.0207127332687378,grad_norm: 0.9999991826074963, iteration: 103116
loss: 1.0139881372451782,grad_norm: 0.9513238484315865, iteration: 103117
loss: 0.9976215958595276,grad_norm: 0.9999991443637152, iteration: 103118
loss: 1.0201678276062012,grad_norm: 0.9054907685670252, iteration: 103119
loss: 1.0091902017593384,grad_norm: 0.9999992340808479, iteration: 103120
loss: 1.0162678956985474,grad_norm: 0.9999990791787162, iteration: 103121
loss: 1.0013484954833984,grad_norm: 0.9947988669224289, iteration: 103122
loss: 0.9695088863372803,grad_norm: 0.9999990807666239, iteration: 103123
loss: 0.9870323538780212,grad_norm: 0.9999993488540696, iteration: 103124
loss: 1.041064977645874,grad_norm: 0.9999990887099902, iteration: 103125
loss: 0.9902511835098267,grad_norm: 0.9858736142784517, iteration: 103126
loss: 1.0320162773132324,grad_norm: 0.999999284975663, iteration: 103127
loss: 1.012163519859314,grad_norm: 0.923190871294339, iteration: 103128
loss: 1.0191024541854858,grad_norm: 0.9999991562571008, iteration: 103129
loss: 1.0232386589050293,grad_norm: 0.9999992154500472, iteration: 103130
loss: 0.9681044220924377,grad_norm: 0.9761337856034168, iteration: 103131
loss: 1.0114747285842896,grad_norm: 0.9912174938608671, iteration: 103132
loss: 1.0330519676208496,grad_norm: 0.9999991112997179, iteration: 103133
loss: 1.0109797716140747,grad_norm: 0.9999991039101815, iteration: 103134
loss: 1.0069859027862549,grad_norm: 0.999999194358828, iteration: 103135
loss: 0.9812883138656616,grad_norm: 0.8767950845671473, iteration: 103136
loss: 0.9937233924865723,grad_norm: 0.99999911312019, iteration: 103137
loss: 0.9853176474571228,grad_norm: 0.9808206323344192, iteration: 103138
loss: 1.0231690406799316,grad_norm: 0.9999991218682026, iteration: 103139
loss: 0.9801459312438965,grad_norm: 0.9999990728635733, iteration: 103140
loss: 0.956346333026886,grad_norm: 0.9999990671323699, iteration: 103141
loss: 1.007753610610962,grad_norm: 0.9999991585225827, iteration: 103142
loss: 0.9681872129440308,grad_norm: 0.9999992558546239, iteration: 103143
loss: 1.009919285774231,grad_norm: 0.9999993086294034, iteration: 103144
loss: 0.9966000318527222,grad_norm: 0.9240534947448443, iteration: 103145
loss: 1.018612027168274,grad_norm: 0.9999989741451373, iteration: 103146
loss: 0.9712728261947632,grad_norm: 0.950941935491013, iteration: 103147
loss: 1.0298259258270264,grad_norm: 0.9999994578301012, iteration: 103148
loss: 0.9747864603996277,grad_norm: 0.9495716984226097, iteration: 103149
loss: 1.0081626176834106,grad_norm: 0.9999990733163556, iteration: 103150
loss: 1.0045138597488403,grad_norm: 0.9640540418563592, iteration: 103151
loss: 0.9723250269889832,grad_norm: 0.9999991985547316, iteration: 103152
loss: 1.034632921218872,grad_norm: 0.9999992919996968, iteration: 103153
loss: 1.0157580375671387,grad_norm: 0.9999992662882499, iteration: 103154
loss: 1.0307345390319824,grad_norm: 0.9999990311428468, iteration: 103155
loss: 0.9516729116439819,grad_norm: 0.9999991054400577, iteration: 103156
loss: 1.016241192817688,grad_norm: 0.9018093936629906, iteration: 103157
loss: 0.9847294688224792,grad_norm: 0.9999989994535486, iteration: 103158
loss: 0.9786643981933594,grad_norm: 0.9999991716853089, iteration: 103159
loss: 0.9964846968650818,grad_norm: 0.9999990492505302, iteration: 103160
loss: 1.017278790473938,grad_norm: 0.9812759865546993, iteration: 103161
loss: 0.989990234375,grad_norm: 0.9972215741478347, iteration: 103162
loss: 1.0100065469741821,grad_norm: 0.9999990915492378, iteration: 103163
loss: 0.9918907284736633,grad_norm: 0.8867981722448659, iteration: 103164
loss: 0.9841976165771484,grad_norm: 0.8835733080850516, iteration: 103165
loss: 1.0066684484481812,grad_norm: 0.999999075739511, iteration: 103166
loss: 1.0196136236190796,grad_norm: 0.960478426724942, iteration: 103167
loss: 0.9720944762229919,grad_norm: 0.9944009458781092, iteration: 103168
loss: 0.959933876991272,grad_norm: 0.999999249188015, iteration: 103169
loss: 0.9765639901161194,grad_norm: 0.8794257014079324, iteration: 103170
loss: 1.0252995491027832,grad_norm: 0.9999992602420923, iteration: 103171
loss: 1.005435585975647,grad_norm: 0.9999991159525987, iteration: 103172
loss: 0.999701201915741,grad_norm: 0.9999990609941956, iteration: 103173
loss: 0.9767044186592102,grad_norm: 0.9999990701656699, iteration: 103174
loss: 0.9878096580505371,grad_norm: 0.9999991189210115, iteration: 103175
loss: 0.9618847370147705,grad_norm: 0.9999991920832086, iteration: 103176
loss: 1.0162466764450073,grad_norm: 0.9999991904680029, iteration: 103177
loss: 1.020251989364624,grad_norm: 0.9999991684884525, iteration: 103178
loss: 1.0225045680999756,grad_norm: 0.9956775062855032, iteration: 103179
loss: 0.9941095113754272,grad_norm: 0.9999990150096125, iteration: 103180
loss: 0.9971009492874146,grad_norm: 0.9999992782042744, iteration: 103181
loss: 1.0032763481140137,grad_norm: 0.999999330639968, iteration: 103182
loss: 0.9916067123413086,grad_norm: 0.9999992405054584, iteration: 103183
loss: 0.9719942808151245,grad_norm: 0.9999989729641956, iteration: 103184
loss: 0.9898914694786072,grad_norm: 0.9999993077601216, iteration: 103185
loss: 0.9708938002586365,grad_norm: 0.9999990405724127, iteration: 103186
loss: 1.0280344486236572,grad_norm: 0.9999989820354337, iteration: 103187
loss: 0.9917997717857361,grad_norm: 0.9404483982397273, iteration: 103188
loss: 0.9841365218162537,grad_norm: 0.9999992843941811, iteration: 103189
loss: 0.992401123046875,grad_norm: 0.8876757179743674, iteration: 103190
loss: 0.9974240064620972,grad_norm: 0.9999993750771373, iteration: 103191
loss: 0.9985927939414978,grad_norm: 0.9999992881772396, iteration: 103192
loss: 0.9650729894638062,grad_norm: 0.8979613859482397, iteration: 103193
loss: 0.9910062551498413,grad_norm: 0.9999989927979049, iteration: 103194
loss: 1.0325473546981812,grad_norm: 0.9999992824171834, iteration: 103195
loss: 1.0185884237289429,grad_norm: 0.9999991843097077, iteration: 103196
loss: 1.0228382349014282,grad_norm: 0.8979167217577425, iteration: 103197
loss: 0.9934818744659424,grad_norm: 0.9999990753186706, iteration: 103198
loss: 1.0178611278533936,grad_norm: 0.9999992603641283, iteration: 103199
loss: 0.9922595620155334,grad_norm: 0.9999989969942313, iteration: 103200
loss: 0.9489544034004211,grad_norm: 0.9593251842384533, iteration: 103201
loss: 1.0417168140411377,grad_norm: 0.9484792776726111, iteration: 103202
loss: 0.9601156115531921,grad_norm: 0.9999992442280784, iteration: 103203
loss: 1.0103760957717896,grad_norm: 0.9690803297979484, iteration: 103204
loss: 1.0272283554077148,grad_norm: 0.9999990823132102, iteration: 103205
loss: 1.0105011463165283,grad_norm: 0.9999989460539831, iteration: 103206
loss: 0.9720515012741089,grad_norm: 0.9999991574908645, iteration: 103207
loss: 1.0283845663070679,grad_norm: 0.9999991854418694, iteration: 103208
loss: 0.9911105036735535,grad_norm: 0.9999990787138073, iteration: 103209
loss: 0.9816978573799133,grad_norm: 0.9023524877677027, iteration: 103210
loss: 1.0096780061721802,grad_norm: 0.9999991587946041, iteration: 103211
loss: 0.9679293632507324,grad_norm: 0.9768080068287839, iteration: 103212
loss: 0.9953398108482361,grad_norm: 0.9999990310366605, iteration: 103213
loss: 0.9907050728797913,grad_norm: 0.999999106157352, iteration: 103214
loss: 0.9839543700218201,grad_norm: 0.9999991646504338, iteration: 103215
loss: 0.9593364000320435,grad_norm: 0.878385219912834, iteration: 103216
loss: 1.022415041923523,grad_norm: 0.9999992760265604, iteration: 103217
loss: 1.0219957828521729,grad_norm: 0.9999991057298667, iteration: 103218
loss: 0.9825348258018494,grad_norm: 0.9310711673587252, iteration: 103219
loss: 0.9967932105064392,grad_norm: 0.9999990503829391, iteration: 103220
loss: 0.9495258927345276,grad_norm: 0.9403856990597963, iteration: 103221
loss: 0.987057626247406,grad_norm: 0.9999991760193007, iteration: 103222
loss: 0.9766563773155212,grad_norm: 0.9999990415734651, iteration: 103223
loss: 1.0133541822433472,grad_norm: 0.9321717862033649, iteration: 103224
loss: 1.0148319005966187,grad_norm: 0.9999991709641884, iteration: 103225
loss: 0.9904493093490601,grad_norm: 0.9999990955977407, iteration: 103226
loss: 1.0334148406982422,grad_norm: 0.843097475811572, iteration: 103227
loss: 0.9655916094779968,grad_norm: 0.9999990949550616, iteration: 103228
loss: 1.0266574621200562,grad_norm: 0.9011783180835504, iteration: 103229
loss: 1.0377514362335205,grad_norm: 0.9999992259344517, iteration: 103230
loss: 1.012103796005249,grad_norm: 0.9999989598677174, iteration: 103231
loss: 0.9857719540596008,grad_norm: 0.9999991921066501, iteration: 103232
loss: 1.0237367153167725,grad_norm: 0.9999991212473898, iteration: 103233
loss: 1.023519515991211,grad_norm: 0.9999991588142395, iteration: 103234
loss: 1.0037106275558472,grad_norm: 0.9700057505044529, iteration: 103235
loss: 0.9804628491401672,grad_norm: 0.999999065427993, iteration: 103236
loss: 1.0069478750228882,grad_norm: 0.8811543971536682, iteration: 103237
loss: 0.9765108823776245,grad_norm: 0.9999994223505394, iteration: 103238
loss: 1.0063282251358032,grad_norm: 0.9999992063608623, iteration: 103239
loss: 1.0340781211853027,grad_norm: 0.8631194571774128, iteration: 103240
loss: 0.9782136678695679,grad_norm: 0.9999991538478931, iteration: 103241
loss: 0.9957764744758606,grad_norm: 0.9999991161910858, iteration: 103242
loss: 1.0136531591415405,grad_norm: 0.9999991221243293, iteration: 103243
loss: 1.0364630222320557,grad_norm: 0.9999989968958087, iteration: 103244
loss: 0.9915258884429932,grad_norm: 0.8964528523643868, iteration: 103245
loss: 0.9911428093910217,grad_norm: 0.999999093954074, iteration: 103246
loss: 0.9561532139778137,grad_norm: 0.9999991472278102, iteration: 103247
loss: 0.9769850373268127,grad_norm: 0.8739589166734473, iteration: 103248
loss: 1.0035804510116577,grad_norm: 0.9710516771879607, iteration: 103249
loss: 0.9984005093574524,grad_norm: 0.9999992914067878, iteration: 103250
loss: 0.9932027459144592,grad_norm: 0.9022070556435043, iteration: 103251
loss: 1.0054705142974854,grad_norm: 0.9366642826532742, iteration: 103252
loss: 1.0080742835998535,grad_norm: 0.9992919576484165, iteration: 103253
loss: 1.012671709060669,grad_norm: 0.9999991312810242, iteration: 103254
loss: 0.9976444840431213,grad_norm: 0.9999993053972323, iteration: 103255
loss: 1.007236123085022,grad_norm: 0.9999990869758003, iteration: 103256
loss: 0.9980041980743408,grad_norm: 0.9999992302989719, iteration: 103257
loss: 0.9773213863372803,grad_norm: 0.9256091436816403, iteration: 103258
loss: 1.0061448812484741,grad_norm: 0.9812966116303142, iteration: 103259
loss: 0.9666285514831543,grad_norm: 0.999999218293605, iteration: 103260
loss: 0.9943100214004517,grad_norm: 0.9269034952270871, iteration: 103261
loss: 1.0145255327224731,grad_norm: 0.9999993164617454, iteration: 103262
loss: 1.0035778284072876,grad_norm: 0.8095627350147563, iteration: 103263
loss: 1.0131088495254517,grad_norm: 0.9474935342999047, iteration: 103264
loss: 0.9734973907470703,grad_norm: 0.9999990123873467, iteration: 103265
loss: 0.9541478157043457,grad_norm: 0.9999992560036746, iteration: 103266
loss: 1.0265918970108032,grad_norm: 0.9999990181171872, iteration: 103267
loss: 1.0117095708847046,grad_norm: 0.9819434079912773, iteration: 103268
loss: 0.9883990287780762,grad_norm: 0.9839906474036942, iteration: 103269
loss: 1.015854835510254,grad_norm: 0.9999991095750881, iteration: 103270
loss: 0.9926391243934631,grad_norm: 0.9999991176836468, iteration: 103271
loss: 1.0082777738571167,grad_norm: 0.9999991299396378, iteration: 103272
loss: 1.008603811264038,grad_norm: 0.9999990884277259, iteration: 103273
loss: 0.9961369633674622,grad_norm: 0.9894621438199721, iteration: 103274
loss: 0.9837384819984436,grad_norm: 0.9216939310118218, iteration: 103275
loss: 1.0082677602767944,grad_norm: 0.9999992477963385, iteration: 103276
loss: 1.0182796716690063,grad_norm: 0.9991143893234858, iteration: 103277
loss: 1.0090322494506836,grad_norm: 0.8938441179404124, iteration: 103278
loss: 1.0257222652435303,grad_norm: 0.9372685004622702, iteration: 103279
loss: 0.9831496477127075,grad_norm: 0.9999989734276857, iteration: 103280
loss: 1.0119799375534058,grad_norm: 0.9979341381924068, iteration: 103281
loss: 1.0014352798461914,grad_norm: 0.9999990141958724, iteration: 103282
loss: 0.9893960952758789,grad_norm: 0.9268903944041135, iteration: 103283
loss: 0.9803969264030457,grad_norm: 0.9999992662963648, iteration: 103284
loss: 0.986475944519043,grad_norm: 0.9032372404893118, iteration: 103285
loss: 1.0077539682388306,grad_norm: 0.8974431840266888, iteration: 103286
loss: 0.962779700756073,grad_norm: 0.9916125715343177, iteration: 103287
loss: 0.9905667901039124,grad_norm: 0.9808857536308446, iteration: 103288
loss: 0.9980950951576233,grad_norm: 0.9152878082611673, iteration: 103289
loss: 1.0188559293746948,grad_norm: 0.9999990512901734, iteration: 103290
loss: 0.9879980087280273,grad_norm: 0.9578629471635249, iteration: 103291
loss: 0.9978604912757874,grad_norm: 0.9999992508643727, iteration: 103292
loss: 0.9946860074996948,grad_norm: 0.9999989396747201, iteration: 103293
loss: 1.0236660242080688,grad_norm: 0.9999991026845974, iteration: 103294
loss: 1.0224766731262207,grad_norm: 0.999100138262906, iteration: 103295
loss: 1.0050835609436035,grad_norm: 0.9999990873289715, iteration: 103296
loss: 0.9843834042549133,grad_norm: 0.963086990607813, iteration: 103297
loss: 1.009130835533142,grad_norm: 0.999999228465877, iteration: 103298
loss: 1.0039384365081787,grad_norm: 0.9999989966599812, iteration: 103299
loss: 0.9675017595291138,grad_norm: 0.999999173399835, iteration: 103300
loss: 0.9671045541763306,grad_norm: 0.9981825486463822, iteration: 103301
loss: 1.0278830528259277,grad_norm: 0.9827730597634938, iteration: 103302
loss: 0.9763539433479309,grad_norm: 0.8548451028310879, iteration: 103303
loss: 0.9620725512504578,grad_norm: 0.9095298877414004, iteration: 103304
loss: 1.004027247428894,grad_norm: 0.9999991245405456, iteration: 103305
loss: 0.9665489792823792,grad_norm: 0.9593884891182297, iteration: 103306
loss: 1.0020469427108765,grad_norm: 0.9999993276066778, iteration: 103307
loss: 0.9701754450798035,grad_norm: 0.9999990807268027, iteration: 103308
loss: 1.0103410482406616,grad_norm: 0.9999991148071169, iteration: 103309
loss: 1.0242401361465454,grad_norm: 0.9999992994519994, iteration: 103310
loss: 0.9863731861114502,grad_norm: 0.936480070532124, iteration: 103311
loss: 0.9719384908676147,grad_norm: 0.9999990987627299, iteration: 103312
loss: 1.001412034034729,grad_norm: 0.9999989380334173, iteration: 103313
loss: 1.0004448890686035,grad_norm: 0.9999991868289051, iteration: 103314
loss: 0.9656877517700195,grad_norm: 0.9999990233861625, iteration: 103315
loss: 0.9747059345245361,grad_norm: 0.9690199126460636, iteration: 103316
loss: 0.9637040495872498,grad_norm: 0.8630696219151034, iteration: 103317
loss: 0.9693922996520996,grad_norm: 0.9999990837642478, iteration: 103318
loss: 1.0248217582702637,grad_norm: 0.8586076770878376, iteration: 103319
loss: 0.9977311491966248,grad_norm: 0.9999992673500329, iteration: 103320
loss: 1.008103609085083,grad_norm: 0.9999990642787236, iteration: 103321
loss: 0.9676998853683472,grad_norm: 0.9547258642062489, iteration: 103322
loss: 1.0026875734329224,grad_norm: 0.9999993143514653, iteration: 103323
loss: 0.9889285564422607,grad_norm: 0.9999993048730133, iteration: 103324
loss: 1.0150799751281738,grad_norm: 0.9623923415311181, iteration: 103325
loss: 1.0040256977081299,grad_norm: 0.9999991753859384, iteration: 103326
loss: 0.964026689529419,grad_norm: 0.9999989161458666, iteration: 103327
loss: 0.9860243201255798,grad_norm: 0.9767191725179196, iteration: 103328
loss: 0.9913596510887146,grad_norm: 0.9092141852928078, iteration: 103329
loss: 0.9877064824104309,grad_norm: 0.9102659318093587, iteration: 103330
loss: 1.0004328489303589,grad_norm: 0.9999992693898377, iteration: 103331
loss: 1.0437979698181152,grad_norm: 0.9999997298752713, iteration: 103332
loss: 0.982082724571228,grad_norm: 0.8945136176220203, iteration: 103333
loss: 1.0168671607971191,grad_norm: 0.9999991638085608, iteration: 103334
loss: 0.9528908133506775,grad_norm: 0.8520054796862081, iteration: 103335
loss: 0.994504988193512,grad_norm: 0.9999991029247075, iteration: 103336
loss: 1.038316249847412,grad_norm: 0.9999991175730509, iteration: 103337
loss: 0.9652685523033142,grad_norm: 0.9999991814273382, iteration: 103338
loss: 0.9915449023246765,grad_norm: 0.9999991536837561, iteration: 103339
loss: 0.9946907758712769,grad_norm: 0.9999992553247284, iteration: 103340
loss: 1.0380842685699463,grad_norm: 0.9286017229025908, iteration: 103341
loss: 1.0385394096374512,grad_norm: 0.9999989688541048, iteration: 103342
loss: 1.0075628757476807,grad_norm: 0.9926375983646464, iteration: 103343
loss: 0.9877066612243652,grad_norm: 0.9512185163068865, iteration: 103344
loss: 1.0070408582687378,grad_norm: 0.9422250042418094, iteration: 103345
loss: 1.0134456157684326,grad_norm: 0.9938203861756484, iteration: 103346
loss: 0.9801861047744751,grad_norm: 0.9999989663402653, iteration: 103347
loss: 1.0155315399169922,grad_norm: 0.9999992258228296, iteration: 103348
loss: 1.0302550792694092,grad_norm: 0.915504676912477, iteration: 103349
loss: 0.9805788397789001,grad_norm: 0.9254382258756109, iteration: 103350
loss: 0.9873591661453247,grad_norm: 0.9999990682770058, iteration: 103351
loss: 0.9948034286499023,grad_norm: 0.9999989649623064, iteration: 103352
loss: 1.0276509523391724,grad_norm: 0.9999991685243627, iteration: 103353
loss: 1.0262404680252075,grad_norm: 0.999999185796207, iteration: 103354
loss: 0.9711815118789673,grad_norm: 0.9999991007403022, iteration: 103355
loss: 1.001402735710144,grad_norm: 0.9885226846574856, iteration: 103356
loss: 0.9959844350814819,grad_norm: 0.999999138201674, iteration: 103357
loss: 1.0058341026306152,grad_norm: 0.8565082011363765, iteration: 103358
loss: 0.9706019163131714,grad_norm: 0.9557667121278166, iteration: 103359
loss: 1.0007926225662231,grad_norm: 0.9637255508236239, iteration: 103360
loss: 1.0373111963272095,grad_norm: 0.9999991454872462, iteration: 103361
loss: 0.9852696657180786,grad_norm: 0.9999990819689074, iteration: 103362
loss: 0.9952632784843445,grad_norm: 0.7515446761890735, iteration: 103363
loss: 1.0052191019058228,grad_norm: 0.9482786603108082, iteration: 103364
loss: 1.0234007835388184,grad_norm: 0.9103111088595375, iteration: 103365
loss: 0.9977425336837769,grad_norm: 0.9279488002199399, iteration: 103366
loss: 0.9763370752334595,grad_norm: 0.999999117874893, iteration: 103367
loss: 1.1574233770370483,grad_norm: 1.0000000373344573, iteration: 103368
loss: 0.9725221395492554,grad_norm: 0.9999991202670887, iteration: 103369
loss: 0.9978319406509399,grad_norm: 0.9999990438162318, iteration: 103370
loss: 0.9993646740913391,grad_norm: 0.8472722290380097, iteration: 103371
loss: 0.9854410886764526,grad_norm: 0.8486245647829934, iteration: 103372
loss: 0.9879909157752991,grad_norm: 0.9999994654239466, iteration: 103373
loss: 0.9645765423774719,grad_norm: 0.9999991330671172, iteration: 103374
loss: 1.0130444765090942,grad_norm: 0.94221919257016, iteration: 103375
loss: 1.0316053628921509,grad_norm: 0.9736958657598023, iteration: 103376
loss: 1.0287258625030518,grad_norm: 0.9434848148932325, iteration: 103377
loss: 0.9950113892555237,grad_norm: 0.99999912439314, iteration: 103378
loss: 1.0103402137756348,grad_norm: 0.9999992891531844, iteration: 103379
loss: 0.9849163293838501,grad_norm: 0.9430788598909169, iteration: 103380
loss: 0.994446337223053,grad_norm: 0.9346061383958242, iteration: 103381
loss: 0.9904583692550659,grad_norm: 0.9999996092882043, iteration: 103382
loss: 1.0218758583068848,grad_norm: 0.9999995911467795, iteration: 103383
loss: 1.0053907632827759,grad_norm: 0.9999994021060852, iteration: 103384
loss: 1.027768611907959,grad_norm: 0.9999991767388267, iteration: 103385
loss: 0.9879669547080994,grad_norm: 0.9900689449965763, iteration: 103386
loss: 0.9724884033203125,grad_norm: 0.9999990266270894, iteration: 103387
loss: 0.9769702553749084,grad_norm: 0.9829176952587428, iteration: 103388
loss: 0.9591705799102783,grad_norm: 0.9999990713719701, iteration: 103389
loss: 1.0344486236572266,grad_norm: 0.9999991319479309, iteration: 103390
loss: 0.9914031624794006,grad_norm: 0.9251990559877107, iteration: 103391
loss: 1.0257863998413086,grad_norm: 0.9999989681728392, iteration: 103392
loss: 1.0045859813690186,grad_norm: 0.9999989767800868, iteration: 103393
loss: 0.9642207622528076,grad_norm: 0.9777943864273966, iteration: 103394
loss: 0.9904349446296692,grad_norm: 0.9004744938542815, iteration: 103395
loss: 0.9995949864387512,grad_norm: 0.9999992424349144, iteration: 103396
loss: 0.9933618307113647,grad_norm: 0.9281873989734442, iteration: 103397
loss: 0.999029815196991,grad_norm: 0.986171755921531, iteration: 103398
loss: 0.9850045442581177,grad_norm: 0.9641625043628603, iteration: 103399
loss: 1.0079044103622437,grad_norm: 0.9999992544687344, iteration: 103400
loss: 1.0351662635803223,grad_norm: 0.9999991088981066, iteration: 103401
loss: 1.0130254030227661,grad_norm: 0.9520317532033408, iteration: 103402
loss: 1.0733999013900757,grad_norm: 0.9999992611544204, iteration: 103403
loss: 0.9999578595161438,grad_norm: 0.9999989929589879, iteration: 103404
loss: 1.0116249322891235,grad_norm: 0.999999272990621, iteration: 103405
loss: 0.9951616525650024,grad_norm: 0.9572327879536763, iteration: 103406
loss: 0.9596304297447205,grad_norm: 0.9999989480457945, iteration: 103407
loss: 0.9851745963096619,grad_norm: 0.9999990533613524, iteration: 103408
loss: 0.9700245261192322,grad_norm: 0.9917999388035484, iteration: 103409
loss: 1.0613369941711426,grad_norm: 0.9999990207409252, iteration: 103410
loss: 0.9996049404144287,grad_norm: 0.9563275631470982, iteration: 103411
loss: 1.047257423400879,grad_norm: 0.9999990803592897, iteration: 103412
loss: 1.0165716409683228,grad_norm: 0.8480756172464344, iteration: 103413
loss: 0.9587522745132446,grad_norm: 0.9999991558511268, iteration: 103414
loss: 0.9978790283203125,grad_norm: 0.994807555546995, iteration: 103415
loss: 1.00632905960083,grad_norm: 0.999999121890873, iteration: 103416
loss: 0.9809623956680298,grad_norm: 0.9999991767569356, iteration: 103417
loss: 1.034563660621643,grad_norm: 0.9438168220037639, iteration: 103418
loss: 0.9953370690345764,grad_norm: 0.9999990745786833, iteration: 103419
loss: 0.9879555106163025,grad_norm: 0.9999992230202609, iteration: 103420
loss: 0.9739225506782532,grad_norm: 0.8661662971074215, iteration: 103421
loss: 0.9923728704452515,grad_norm: 0.8027006660578865, iteration: 103422
loss: 0.9775197505950928,grad_norm: 0.986027002698598, iteration: 103423
loss: 1.0107802152633667,grad_norm: 0.937723291696786, iteration: 103424
loss: 0.9930797219276428,grad_norm: 0.9628667018471448, iteration: 103425
loss: 0.9986252784729004,grad_norm: 0.9947377562041978, iteration: 103426
loss: 0.9829254150390625,grad_norm: 0.9137169975841483, iteration: 103427
loss: 0.9650669693946838,grad_norm: 0.8056452672757467, iteration: 103428
loss: 0.9839935898780823,grad_norm: 0.9999991636677934, iteration: 103429
loss: 0.9930152297019958,grad_norm: 0.9999993364711627, iteration: 103430
loss: 0.9956226348876953,grad_norm: 0.999999052291431, iteration: 103431
loss: 0.9427705407142639,grad_norm: 0.9999990802370212, iteration: 103432
loss: 1.0029828548431396,grad_norm: 0.999999091201527, iteration: 103433
loss: 1.013075590133667,grad_norm: 0.9784621899950696, iteration: 103434
loss: 1.0181379318237305,grad_norm: 0.9999991769897103, iteration: 103435
loss: 0.9731734395027161,grad_norm: 0.9999991863669498, iteration: 103436
loss: 0.9874503016471863,grad_norm: 0.9999991646879424, iteration: 103437
loss: 1.0317031145095825,grad_norm: 0.9742406642459277, iteration: 103438
loss: 1.0195960998535156,grad_norm: 0.9489797428747007, iteration: 103439
loss: 1.031265139579773,grad_norm: 0.9999999700463861, iteration: 103440
loss: 1.0038186311721802,grad_norm: 0.9852084691523094, iteration: 103441
loss: 1.0017988681793213,grad_norm: 0.999999072981465, iteration: 103442
loss: 1.077984094619751,grad_norm: 0.9999999478137123, iteration: 103443
loss: 1.0028128623962402,grad_norm: 0.9999990478871195, iteration: 103444
loss: 1.0104360580444336,grad_norm: 0.9999991264561441, iteration: 103445
loss: 1.0212204456329346,grad_norm: 0.9438575388628969, iteration: 103446
loss: 1.03779935836792,grad_norm: 0.8586268958791384, iteration: 103447
loss: 0.9661232233047485,grad_norm: 0.9999992248577859, iteration: 103448
loss: 1.0172747373580933,grad_norm: 0.9999991474239125, iteration: 103449
loss: 0.993535041809082,grad_norm: 0.9999992052738911, iteration: 103450
loss: 1.016945719718933,grad_norm: 0.9999992951598418, iteration: 103451
loss: 1.0431032180786133,grad_norm: 0.9999990655196794, iteration: 103452
loss: 1.0397578477859497,grad_norm: 0.999999205396152, iteration: 103453
loss: 0.978844165802002,grad_norm: 0.9999994521499838, iteration: 103454
loss: 0.9601216316223145,grad_norm: 0.9935266877774763, iteration: 103455
loss: 0.9882238507270813,grad_norm: 0.9999991088009106, iteration: 103456
loss: 1.0705095529556274,grad_norm: 0.99999909687697, iteration: 103457
loss: 1.0239853858947754,grad_norm: 0.9999997768020946, iteration: 103458
loss: 1.007199764251709,grad_norm: 0.9772186854330792, iteration: 103459
loss: 1.0196040868759155,grad_norm: 0.9883562639332952, iteration: 103460
loss: 1.0231308937072754,grad_norm: 0.9999991061494026, iteration: 103461
loss: 0.9857222437858582,grad_norm: 0.9147837613158069, iteration: 103462
loss: 1.0071253776550293,grad_norm: 0.9999992009250973, iteration: 103463
loss: 0.9948035478591919,grad_norm: 0.9999991783072716, iteration: 103464
loss: 0.9968536496162415,grad_norm: 0.8961223126582267, iteration: 103465
loss: 1.0342612266540527,grad_norm: 0.9999990546366344, iteration: 103466
loss: 0.9623922109603882,grad_norm: 0.9999991569605347, iteration: 103467
loss: 1.0036674737930298,grad_norm: 0.9999990632446274, iteration: 103468
loss: 1.0112296342849731,grad_norm: 0.9999991238660888, iteration: 103469
loss: 1.0122061967849731,grad_norm: 0.9999991556265777, iteration: 103470
loss: 1.1003895998001099,grad_norm: 0.9999993095422447, iteration: 103471
loss: 0.9878589510917664,grad_norm: 0.9999991158169023, iteration: 103472
loss: 1.0067583322525024,grad_norm: 0.9999990531743633, iteration: 103473
loss: 0.9925646185874939,grad_norm: 0.9999992000308283, iteration: 103474
loss: 0.9968026876449585,grad_norm: 0.9999992748447595, iteration: 103475
loss: 1.157138705253601,grad_norm: 0.9999993561423055, iteration: 103476
loss: 0.9917671084403992,grad_norm: 0.9999992069772731, iteration: 103477
loss: 1.012516975402832,grad_norm: 0.999999259061591, iteration: 103478
loss: 1.0050323009490967,grad_norm: 0.9999992155293369, iteration: 103479
loss: 1.03396737575531,grad_norm: 0.9999991729662091, iteration: 103480
loss: 0.9707716107368469,grad_norm: 0.9999991824414496, iteration: 103481
loss: 0.9653190970420837,grad_norm: 0.9999992500593916, iteration: 103482
loss: 1.0314242839813232,grad_norm: 0.9999992100408355, iteration: 103483
loss: 1.0310299396514893,grad_norm: 0.9999991470853224, iteration: 103484
loss: 1.0214840173721313,grad_norm: 0.9999990929066412, iteration: 103485
loss: 1.0088651180267334,grad_norm: 0.9149667099097485, iteration: 103486
loss: 0.9895583391189575,grad_norm: 0.9999991337347129, iteration: 103487
loss: 0.99828040599823,grad_norm: 0.9999990825590327, iteration: 103488
loss: 0.9525392055511475,grad_norm: 0.9999990769411313, iteration: 103489
loss: 0.9990076422691345,grad_norm: 0.9999990942067734, iteration: 103490
loss: 1.0098797082901,grad_norm: 0.999999073424816, iteration: 103491
loss: 1.0069639682769775,grad_norm: 0.9413096167321979, iteration: 103492
loss: 1.0070722103118896,grad_norm: 0.999999122128093, iteration: 103493
loss: 1.0074034929275513,grad_norm: 0.9999990062876576, iteration: 103494
loss: 1.028404951095581,grad_norm: 0.9249478361067817, iteration: 103495
loss: 0.976614236831665,grad_norm: 0.9999991329529746, iteration: 103496
loss: 0.9778639674186707,grad_norm: 0.988328047650255, iteration: 103497
loss: 0.9688605666160583,grad_norm: 0.9999992179164908, iteration: 103498
loss: 1.1133358478546143,grad_norm: 0.9999990531321795, iteration: 103499
loss: 1.008400559425354,grad_norm: 0.9999992168240354, iteration: 103500
loss: 0.9991357922554016,grad_norm: 0.9999989664590629, iteration: 103501
loss: 0.9784964919090271,grad_norm: 0.999999259708851, iteration: 103502
loss: 1.0125296115875244,grad_norm: 0.9999993423232549, iteration: 103503
loss: 1.0157132148742676,grad_norm: 0.9417981045203055, iteration: 103504
loss: 1.0176880359649658,grad_norm: 0.9518728148926573, iteration: 103505
loss: 0.9888330698013306,grad_norm: 0.9866291870735314, iteration: 103506
loss: 0.9911689162254333,grad_norm: 0.9999990412911967, iteration: 103507
loss: 0.9628629684448242,grad_norm: 0.9999990602335211, iteration: 103508
loss: 1.0444862842559814,grad_norm: 0.9999992391823025, iteration: 103509
loss: 1.0180509090423584,grad_norm: 0.9999991163836265, iteration: 103510
loss: 1.0083006620407104,grad_norm: 0.9999989968450347, iteration: 103511
loss: 0.9951043725013733,grad_norm: 0.9136707759357618, iteration: 103512
loss: 0.9936188459396362,grad_norm: 0.9999989982456328, iteration: 103513
loss: 0.9894180297851562,grad_norm: 0.9999991300147892, iteration: 103514
loss: 1.0345953702926636,grad_norm: 0.999999186524073, iteration: 103515
loss: 0.9874328374862671,grad_norm: 0.9999990362029026, iteration: 103516
loss: 1.0159673690795898,grad_norm: 0.9616960501851429, iteration: 103517
loss: 1.015660285949707,grad_norm: 0.999999173664681, iteration: 103518
loss: 1.0035439729690552,grad_norm: 0.9999991281530538, iteration: 103519
loss: 1.0132200717926025,grad_norm: 0.9204463728563452, iteration: 103520
loss: 1.0790835618972778,grad_norm: 0.9999994860739934, iteration: 103521
loss: 1.0467333793640137,grad_norm: 0.9999998613561787, iteration: 103522
loss: 1.036756992340088,grad_norm: 0.9999991834818146, iteration: 103523
loss: 0.9831763505935669,grad_norm: 0.9999990539543789, iteration: 103524
loss: 0.9799491763114929,grad_norm: 0.9999992568810965, iteration: 103525
loss: 1.031987190246582,grad_norm: 0.9999993101323201, iteration: 103526
loss: 0.9697434306144714,grad_norm: 0.934599800791445, iteration: 103527
loss: 0.97464919090271,grad_norm: 0.9999992189901622, iteration: 103528
loss: 1.0419940948486328,grad_norm: 0.9999990006031673, iteration: 103529
loss: 1.0013612508773804,grad_norm: 0.9999991674273259, iteration: 103530
loss: 1.004751205444336,grad_norm: 0.9999992720598357, iteration: 103531
loss: 0.9897986650466919,grad_norm: 0.9999989705136284, iteration: 103532
loss: 1.004929780960083,grad_norm: 0.9999995375225085, iteration: 103533
loss: 1.0236637592315674,grad_norm: 0.9999993959575334, iteration: 103534
loss: 1.0279557704925537,grad_norm: 0.9999993635706647, iteration: 103535
loss: 1.0325313806533813,grad_norm: 0.9930782313524349, iteration: 103536
loss: 0.9696590900421143,grad_norm: 0.9447130353960862, iteration: 103537
loss: 1.0118416547775269,grad_norm: 0.999999103523442, iteration: 103538
loss: 0.9885421395301819,grad_norm: 0.9716519586556647, iteration: 103539
loss: 1.0527763366699219,grad_norm: 0.9999996938993061, iteration: 103540
loss: 1.0005073547363281,grad_norm: 0.9999992866800098, iteration: 103541
loss: 0.997322678565979,grad_norm: 0.9633975681451196, iteration: 103542
loss: 1.0212023258209229,grad_norm: 0.9414622105579117, iteration: 103543
loss: 1.0015361309051514,grad_norm: 0.9999989941669701, iteration: 103544
loss: 1.0179023742675781,grad_norm: 0.9999991881885208, iteration: 103545
loss: 0.9898790121078491,grad_norm: 0.8652958967113007, iteration: 103546
loss: 0.9978920817375183,grad_norm: 0.9999991014281971, iteration: 103547
loss: 0.9931977987289429,grad_norm: 0.9109246558742329, iteration: 103548
loss: 1.0469721555709839,grad_norm: 0.9999991156092325, iteration: 103549
loss: 1.0288971662521362,grad_norm: 0.9236512294897323, iteration: 103550
loss: 1.0118812322616577,grad_norm: 0.956914172011654, iteration: 103551
loss: 1.002677321434021,grad_norm: 0.9999997656126653, iteration: 103552
loss: 1.010522723197937,grad_norm: 0.9999992819786736, iteration: 103553
loss: 1.0199944972991943,grad_norm: 0.9999991807763708, iteration: 103554
loss: 0.981140673160553,grad_norm: 0.9999991616018671, iteration: 103555
loss: 0.9829363226890564,grad_norm: 0.9471835762223104, iteration: 103556
loss: 0.9896731376647949,grad_norm: 0.999999192701147, iteration: 103557
loss: 1.0110126733779907,grad_norm: 0.9999992343000971, iteration: 103558
loss: 1.0077983140945435,grad_norm: 0.999796905559691, iteration: 103559
loss: 1.0136216878890991,grad_norm: 0.9999997622524132, iteration: 103560
loss: 0.9978422522544861,grad_norm: 0.9211124824843322, iteration: 103561
loss: 1.002482533454895,grad_norm: 0.9999990739194373, iteration: 103562
loss: 1.0070103406906128,grad_norm: 0.8428332363369372, iteration: 103563
loss: 1.001974105834961,grad_norm: 0.9423979911627852, iteration: 103564
loss: 1.0142583847045898,grad_norm: 0.9384537639827487, iteration: 103565
loss: 0.9691876769065857,grad_norm: 0.9999991460584505, iteration: 103566
loss: 0.9986233115196228,grad_norm: 0.9999990993435893, iteration: 103567
loss: 0.9821675419807434,grad_norm: 0.9999991039944993, iteration: 103568
loss: 1.0061825513839722,grad_norm: 0.9999991215242829, iteration: 103569
loss: 0.9644045829772949,grad_norm: 0.9575271363968425, iteration: 103570
loss: 0.9879535436630249,grad_norm: 0.9999991328776561, iteration: 103571
loss: 1.0783555507659912,grad_norm: 0.999999079506364, iteration: 103572
loss: 1.0128867626190186,grad_norm: 0.9999990864884554, iteration: 103573
loss: 0.9745864272117615,grad_norm: 0.9131626300704333, iteration: 103574
loss: 0.9425756931304932,grad_norm: 0.8919494018052535, iteration: 103575
loss: 1.0004780292510986,grad_norm: 0.9999992011496743, iteration: 103576
loss: 1.0010920763015747,grad_norm: 0.9999991954850364, iteration: 103577
loss: 0.9887529015541077,grad_norm: 0.9999992829932881, iteration: 103578
loss: 1.0202698707580566,grad_norm: 0.9834993461991962, iteration: 103579
loss: 0.9952793717384338,grad_norm: 0.9999990895371713, iteration: 103580
loss: 1.0734500885009766,grad_norm: 0.9999995495157994, iteration: 103581
loss: 0.9948086142539978,grad_norm: 0.9999993630572616, iteration: 103582
loss: 1.0268536806106567,grad_norm: 0.9697176827087626, iteration: 103583
loss: 0.9976431131362915,grad_norm: 0.9999991361540179, iteration: 103584
loss: 1.0131251811981201,grad_norm: 0.9999992179419274, iteration: 103585
loss: 0.9923372864723206,grad_norm: 0.9999993603596709, iteration: 103586
loss: 0.9826143980026245,grad_norm: 0.934413894772768, iteration: 103587
loss: 1.0236355066299438,grad_norm: 0.9999994755468308, iteration: 103588
loss: 1.0086696147918701,grad_norm: 0.9999991547866299, iteration: 103589
loss: 0.9819823503494263,grad_norm: 0.9214631404090612, iteration: 103590
loss: 1.009291172027588,grad_norm: 0.9590793271468351, iteration: 103591
loss: 1.0164880752563477,grad_norm: 0.9999994066331058, iteration: 103592
loss: 1.024892807006836,grad_norm: 0.9999991487760145, iteration: 103593
loss: 1.0070995092391968,grad_norm: 0.991833239048122, iteration: 103594
loss: 0.9955257177352905,grad_norm: 0.9374719925392565, iteration: 103595
loss: 1.0701043605804443,grad_norm: 0.9999991479988161, iteration: 103596
loss: 1.0136442184448242,grad_norm: 0.9999990944350282, iteration: 103597
loss: 1.0006043910980225,grad_norm: 0.999999585256203, iteration: 103598
loss: 0.9944674968719482,grad_norm: 0.9999989691220189, iteration: 103599
loss: 1.001673936843872,grad_norm: 0.9999990944508454, iteration: 103600
loss: 0.9751501083374023,grad_norm: 0.9999990757696352, iteration: 103601
loss: 0.9728856682777405,grad_norm: 0.9999993687098089, iteration: 103602
loss: 1.0201106071472168,grad_norm: 0.9350102032742478, iteration: 103603
loss: 0.9950792789459229,grad_norm: 0.9999992538701223, iteration: 103604
loss: 1.0023521184921265,grad_norm: 0.9999989895374977, iteration: 103605
loss: 0.9677943587303162,grad_norm: 0.9999992785952128, iteration: 103606
loss: 1.0132578611373901,grad_norm: 0.9887479000242304, iteration: 103607
loss: 1.002756953239441,grad_norm: 0.9208695234089027, iteration: 103608
loss: 1.0029231309890747,grad_norm: 0.9857031967169112, iteration: 103609
loss: 1.0296902656555176,grad_norm: 0.9999992502073106, iteration: 103610
loss: 1.02922523021698,grad_norm: 0.9330907730065143, iteration: 103611
loss: 1.006460189819336,grad_norm: 0.9999992627541769, iteration: 103612
loss: 1.040390133857727,grad_norm: 0.9163937573200125, iteration: 103613
loss: 0.9985325336456299,grad_norm: 0.9999991834098864, iteration: 103614
loss: 1.0001593828201294,grad_norm: 0.9411795979388455, iteration: 103615
loss: 1.0258748531341553,grad_norm: 0.9999990289284638, iteration: 103616
loss: 1.0123673677444458,grad_norm: 0.9999991729438662, iteration: 103617
loss: 1.0394463539123535,grad_norm: 0.9687279165180108, iteration: 103618
loss: 1.0605946779251099,grad_norm: 0.99999949321274, iteration: 103619
loss: 0.9766194224357605,grad_norm: 0.9999998268511988, iteration: 103620
loss: 0.9921535849571228,grad_norm: 0.9999991601707084, iteration: 103621
loss: 1.0078740119934082,grad_norm: 0.9999991804035667, iteration: 103622
loss: 0.9839239716529846,grad_norm: 0.9999990421186455, iteration: 103623
loss: 1.011922836303711,grad_norm: 0.9999993916488322, iteration: 103624
loss: 1.013215184211731,grad_norm: 0.9999997032523444, iteration: 103625
loss: 0.9777776598930359,grad_norm: 0.9999991456741804, iteration: 103626
loss: 1.0352494716644287,grad_norm: 0.9999992296829325, iteration: 103627
loss: 1.0080947875976562,grad_norm: 0.9593400900407771, iteration: 103628
loss: 1.0266674757003784,grad_norm: 0.9999991921267402, iteration: 103629
loss: 0.9830087423324585,grad_norm: 0.9999991278621101, iteration: 103630
loss: 1.0447618961334229,grad_norm: 0.9999992308910137, iteration: 103631
loss: 1.0054792165756226,grad_norm: 0.9837543729498639, iteration: 103632
loss: 1.0338335037231445,grad_norm: 0.9999992783593091, iteration: 103633
loss: 1.0280933380126953,grad_norm: 0.9999996527259739, iteration: 103634
loss: 1.015052318572998,grad_norm: 0.9999992021968103, iteration: 103635
loss: 0.9676978588104248,grad_norm: 0.9999990331051927, iteration: 103636
loss: 1.0825284719467163,grad_norm: 0.9999996570474834, iteration: 103637
loss: 1.0089365243911743,grad_norm: 0.9999988730685111, iteration: 103638
loss: 0.9969512224197388,grad_norm: 0.9999991378227937, iteration: 103639
loss: 1.017697811126709,grad_norm: 0.9999990556623559, iteration: 103640
loss: 0.9849735498428345,grad_norm: 0.9999990921254761, iteration: 103641
loss: 1.0051945447921753,grad_norm: 0.999999136935778, iteration: 103642
loss: 1.1463016271591187,grad_norm: 0.9999997925873338, iteration: 103643
loss: 1.0095442533493042,grad_norm: 0.9999991789682107, iteration: 103644
loss: 1.0638784170150757,grad_norm: 0.9999993006596363, iteration: 103645
loss: 0.9954656958580017,grad_norm: 0.9999991196616325, iteration: 103646
loss: 0.9864241480827332,grad_norm: 0.9999991600214897, iteration: 103647
loss: 0.9949662685394287,grad_norm: 0.9999999432742197, iteration: 103648
loss: 1.0385689735412598,grad_norm: 0.9999994588147367, iteration: 103649
loss: 0.9833289980888367,grad_norm: 0.9999989657468533, iteration: 103650
loss: 1.0283029079437256,grad_norm: 0.9999992454913695, iteration: 103651
loss: 1.0479378700256348,grad_norm: 0.9999991727599321, iteration: 103652
loss: 0.990201473236084,grad_norm: 0.9999993506461063, iteration: 103653
loss: 0.9902471303939819,grad_norm: 0.9999990530208509, iteration: 103654
loss: 0.9772319793701172,grad_norm: 0.9999990579986522, iteration: 103655
loss: 0.9872668385505676,grad_norm: 0.9999991151793662, iteration: 103656
loss: 0.9845635890960693,grad_norm: 0.9999990531809598, iteration: 103657
loss: 1.0005016326904297,grad_norm: 0.999999105006389, iteration: 103658
loss: 1.0061267614364624,grad_norm: 0.999999160153843, iteration: 103659
loss: 0.95444655418396,grad_norm: 0.9223787135544577, iteration: 103660
loss: 1.0129989385604858,grad_norm: 0.9999991542554812, iteration: 103661
loss: 0.9632033109664917,grad_norm: 0.9999992207399513, iteration: 103662
loss: 0.9775340557098389,grad_norm: 0.9999990652082605, iteration: 103663
loss: 1.0120890140533447,grad_norm: 0.9999993909089742, iteration: 103664
loss: 1.034813642501831,grad_norm: 0.999999220937327, iteration: 103665
loss: 1.0911027193069458,grad_norm: 0.9999993692422208, iteration: 103666
loss: 1.0622650384902954,grad_norm: 0.999999997034607, iteration: 103667
loss: 1.0246151685714722,grad_norm: 0.9999992728861997, iteration: 103668
loss: 1.0042681694030762,grad_norm: 0.9999996418939644, iteration: 103669
loss: 1.0444436073303223,grad_norm: 0.999999421698109, iteration: 103670
loss: 0.9804626703262329,grad_norm: 0.9999993284532585, iteration: 103671
loss: 0.9846097826957703,grad_norm: 0.9886655035624677, iteration: 103672
loss: 0.9833973050117493,grad_norm: 0.9904548812538068, iteration: 103673
loss: 1.0136513710021973,grad_norm: 0.999999245080052, iteration: 103674
loss: 1.0177178382873535,grad_norm: 0.914225599471466, iteration: 103675
loss: 1.0022698640823364,grad_norm: 0.999999018300578, iteration: 103676
loss: 0.9731491804122925,grad_norm: 0.9778390515401587, iteration: 103677
loss: 1.007552146911621,grad_norm: 0.999999062069354, iteration: 103678
loss: 0.977152943611145,grad_norm: 0.9131429189973274, iteration: 103679
loss: 0.945526123046875,grad_norm: 0.9999990059822615, iteration: 103680
loss: 1.0047893524169922,grad_norm: 0.9999992112375141, iteration: 103681
loss: 0.9630988836288452,grad_norm: 0.9999992014654225, iteration: 103682
loss: 1.0640000104904175,grad_norm: 0.9999996342207664, iteration: 103683
loss: 0.9972924590110779,grad_norm: 0.934693017821503, iteration: 103684
loss: 1.0232644081115723,grad_norm: 0.9543102963619561, iteration: 103685
loss: 0.9864168167114258,grad_norm: 0.9800779344626128, iteration: 103686
loss: 1.0503681898117065,grad_norm: 0.999999810273702, iteration: 103687
loss: 1.0107674598693848,grad_norm: 0.999999467804803, iteration: 103688
loss: 1.0138435363769531,grad_norm: 0.9999994068206848, iteration: 103689
loss: 1.0076897144317627,grad_norm: 0.9999992916892789, iteration: 103690
loss: 1.0212886333465576,grad_norm: 0.9578734927919634, iteration: 103691
loss: 1.0236217975616455,grad_norm: 0.9999990662556448, iteration: 103692
loss: 0.9583965539932251,grad_norm: 0.999999183589768, iteration: 103693
loss: 1.0080913305282593,grad_norm: 0.9999990628043423, iteration: 103694
loss: 1.024349331855774,grad_norm: 0.9785212691188495, iteration: 103695
loss: 0.9951303005218506,grad_norm: 0.9999993406973138, iteration: 103696
loss: 1.038055419921875,grad_norm: 0.9999992052687742, iteration: 103697
loss: 1.0146108865737915,grad_norm: 0.9906147104620621, iteration: 103698
loss: 0.9964035153388977,grad_norm: 0.9999992200058164, iteration: 103699
loss: 1.025857925415039,grad_norm: 0.9999990662696715, iteration: 103700
loss: 1.009154200553894,grad_norm: 0.9999998299940461, iteration: 103701
loss: 0.9925372004508972,grad_norm: 0.9999990733028685, iteration: 103702
loss: 1.0707097053527832,grad_norm: 0.9999990975802947, iteration: 103703
loss: 0.9793779253959656,grad_norm: 0.9999992472749659, iteration: 103704
loss: 0.9688779711723328,grad_norm: 0.9999992174506264, iteration: 103705
loss: 1.0058143138885498,grad_norm: 0.999999117882301, iteration: 103706
loss: 1.0086300373077393,grad_norm: 0.9999992287845644, iteration: 103707
loss: 0.9881817102432251,grad_norm: 0.9999991310450072, iteration: 103708
loss: 1.0132976770401,grad_norm: 0.9999991738288655, iteration: 103709
loss: 0.9694979786872864,grad_norm: 0.9814112975633278, iteration: 103710
loss: 1.0179368257522583,grad_norm: 0.795649757096357, iteration: 103711
loss: 1.0382788181304932,grad_norm: 0.8783433766886671, iteration: 103712
loss: 1.0097355842590332,grad_norm: 0.9999992480938569, iteration: 103713
loss: 0.9854301810264587,grad_norm: 0.9999991559303396, iteration: 103714
loss: 0.9918039441108704,grad_norm: 0.9999992189670853, iteration: 103715
loss: 0.9617071747779846,grad_norm: 0.9999991892079529, iteration: 103716
loss: 0.9818036556243896,grad_norm: 0.9705287695497438, iteration: 103717
loss: 1.0103837251663208,grad_norm: 0.9999989593006932, iteration: 103718
loss: 1.015748143196106,grad_norm: 0.9999994586340034, iteration: 103719
loss: 1.0137547254562378,grad_norm: 0.8839961556949198, iteration: 103720
loss: 1.0130908489227295,grad_norm: 0.9999990338795538, iteration: 103721
loss: 1.0378717184066772,grad_norm: 0.9999990706792627, iteration: 103722
loss: 0.961569607257843,grad_norm: 0.9999991180158604, iteration: 103723
loss: 1.0126121044158936,grad_norm: 0.9999991366469838, iteration: 103724
loss: 0.9638338685035706,grad_norm: 0.9999990454003238, iteration: 103725
loss: 1.014945149421692,grad_norm: 0.9999990704476788, iteration: 103726
loss: 0.9972093105316162,grad_norm: 0.9999991451049189, iteration: 103727
loss: 1.0160081386566162,grad_norm: 0.9999992389632555, iteration: 103728
loss: 1.0132824182510376,grad_norm: 0.9999991638915013, iteration: 103729
loss: 0.988858699798584,grad_norm: 0.9674232143647342, iteration: 103730
loss: 0.9907205700874329,grad_norm: 0.9470488076524296, iteration: 103731
loss: 1.0064542293548584,grad_norm: 0.9999991559760045, iteration: 103732
loss: 1.0003024339675903,grad_norm: 0.999999160640962, iteration: 103733
loss: 1.0085225105285645,grad_norm: 0.9769517932101834, iteration: 103734
loss: 1.0022481679916382,grad_norm: 0.9455647860907629, iteration: 103735
loss: 0.9914851188659668,grad_norm: 0.9592897801152352, iteration: 103736
loss: 0.9662503004074097,grad_norm: 0.8638712639138038, iteration: 103737
loss: 0.9959978461265564,grad_norm: 0.8400537475901686, iteration: 103738
loss: 0.9912430644035339,grad_norm: 0.9999990653157054, iteration: 103739
loss: 1.0270527601242065,grad_norm: 0.9999991018213087, iteration: 103740
loss: 0.9782804846763611,grad_norm: 0.9869108287684519, iteration: 103741
loss: 0.9841068387031555,grad_norm: 0.9999995521502014, iteration: 103742
loss: 1.0010665655136108,grad_norm: 0.9336545312717018, iteration: 103743
loss: 1.0504149198532104,grad_norm: 0.9999992151392777, iteration: 103744
loss: 0.9970808625221252,grad_norm: 0.9999992702219614, iteration: 103745
loss: 0.9937173128128052,grad_norm: 0.999999081752932, iteration: 103746
loss: 1.009171724319458,grad_norm: 0.9999992945526726, iteration: 103747
loss: 1.002377986907959,grad_norm: 0.9721094247487888, iteration: 103748
loss: 1.0172446966171265,grad_norm: 0.9999991505545558, iteration: 103749
loss: 1.0078073740005493,grad_norm: 0.9735491278318547, iteration: 103750
loss: 0.9999362230300903,grad_norm: 0.9193379431004735, iteration: 103751
loss: 1.0257951021194458,grad_norm: 0.9999990687324762, iteration: 103752
loss: 0.9646852612495422,grad_norm: 0.999999135543438, iteration: 103753
loss: 0.9715070724487305,grad_norm: 0.9999991385576525, iteration: 103754
loss: 0.9819180369377136,grad_norm: 0.9999990192629364, iteration: 103755
loss: 1.0082286596298218,grad_norm: 0.999999169685953, iteration: 103756
loss: 0.9815334677696228,grad_norm: 0.999999137122371, iteration: 103757
loss: 0.9805455207824707,grad_norm: 0.9999990981822642, iteration: 103758
loss: 1.0054664611816406,grad_norm: 0.9999991706354341, iteration: 103759
loss: 1.0227892398834229,grad_norm: 0.9999990849491546, iteration: 103760
loss: 1.0091803073883057,grad_norm: 0.9999991986326402, iteration: 103761
loss: 0.9718754887580872,grad_norm: 0.9127095290863543, iteration: 103762
loss: 1.003960371017456,grad_norm: 0.9999992360113124, iteration: 103763
loss: 0.9881913661956787,grad_norm: 0.9999992704452974, iteration: 103764
loss: 1.0013154745101929,grad_norm: 0.9999991534768673, iteration: 103765
loss: 1.0291792154312134,grad_norm: 0.9133235431994531, iteration: 103766
loss: 1.0112956762313843,grad_norm: 0.9999990985821318, iteration: 103767
loss: 0.9950896501541138,grad_norm: 0.9978044229882066, iteration: 103768
loss: 1.0009781122207642,grad_norm: 0.9999992526220488, iteration: 103769
loss: 1.0559141635894775,grad_norm: 0.9999991708916638, iteration: 103770
loss: 0.9925361275672913,grad_norm: 0.9746224733672639, iteration: 103771
loss: 0.9984913468360901,grad_norm: 0.9999991257156703, iteration: 103772
loss: 1.0131598711013794,grad_norm: 0.9963152629998016, iteration: 103773
loss: 0.9622794389724731,grad_norm: 0.9999992129638007, iteration: 103774
loss: 0.9658662676811218,grad_norm: 0.9999996272829795, iteration: 103775
loss: 1.028383731842041,grad_norm: 0.999999291090089, iteration: 103776
loss: 0.9944645762443542,grad_norm: 0.9999993546584097, iteration: 103777
loss: 0.9705881476402283,grad_norm: 0.8948548317308622, iteration: 103778
loss: 0.9393997192382812,grad_norm: 0.9999990517413351, iteration: 103779
loss: 0.997943639755249,grad_norm: 0.8971246665190871, iteration: 103780
loss: 0.9754683375358582,grad_norm: 0.9999991168375668, iteration: 103781
loss: 1.028711199760437,grad_norm: 0.9999992279713958, iteration: 103782
loss: 0.9821262955665588,grad_norm: 0.9999991509327093, iteration: 103783
loss: 0.9850482940673828,grad_norm: 0.9999989556917014, iteration: 103784
loss: 1.0207868814468384,grad_norm: 0.9999990263527838, iteration: 103785
loss: 1.0015050172805786,grad_norm: 0.9999991367163177, iteration: 103786
loss: 0.9528807997703552,grad_norm: 0.9999992009218412, iteration: 103787
loss: 1.0396648645401,grad_norm: 0.9999990457454172, iteration: 103788
loss: 1.0213278532028198,grad_norm: 0.9505511670399854, iteration: 103789
loss: 1.0062822103500366,grad_norm: 0.9999990715161918, iteration: 103790
loss: 1.000123381614685,grad_norm: 0.9658626246524975, iteration: 103791
loss: 1.0130784511566162,grad_norm: 0.9333885973147008, iteration: 103792
loss: 0.9911586046218872,grad_norm: 0.9999991662660331, iteration: 103793
loss: 1.0115481615066528,grad_norm: 0.8218072856035402, iteration: 103794
loss: 0.9891853928565979,grad_norm: 0.927907440605336, iteration: 103795
loss: 1.033217191696167,grad_norm: 0.9999989535752366, iteration: 103796
loss: 1.0165860652923584,grad_norm: 0.9999990377954415, iteration: 103797
loss: 0.9892852902412415,grad_norm: 0.9999990789621008, iteration: 103798
loss: 0.9971578121185303,grad_norm: 0.9938120402587379, iteration: 103799
loss: 0.9836838245391846,grad_norm: 0.9999990969518036, iteration: 103800
loss: 1.022202491760254,grad_norm: 0.9974358390238356, iteration: 103801
loss: 0.9701554775238037,grad_norm: 0.9999991390728966, iteration: 103802
loss: 0.9616611003875732,grad_norm: 0.9442517576447271, iteration: 103803
loss: 0.9989235401153564,grad_norm: 0.9710759200279275, iteration: 103804
loss: 0.9909449815750122,grad_norm: 0.9999990534086622, iteration: 103805
loss: 1.012628436088562,grad_norm: 0.9999992015406525, iteration: 103806
loss: 0.9981631636619568,grad_norm: 0.9999993245962562, iteration: 103807
loss: 1.0008291006088257,grad_norm: 0.9999991464760483, iteration: 103808
loss: 1.0078171491622925,grad_norm: 0.9999991348408823, iteration: 103809
loss: 1.0027834177017212,grad_norm: 0.9999991015953958, iteration: 103810
loss: 0.9664286971092224,grad_norm: 0.999999017048072, iteration: 103811
loss: 0.9940467476844788,grad_norm: 0.9999994173326407, iteration: 103812
loss: 0.9830939173698425,grad_norm: 0.9999991926520513, iteration: 103813
loss: 0.9833711385726929,grad_norm: 0.9838982134060387, iteration: 103814
loss: 1.02801513671875,grad_norm: 0.8433372547319471, iteration: 103815
loss: 0.9866510629653931,grad_norm: 0.740214801161944, iteration: 103816
loss: 1.0253061056137085,grad_norm: 0.9775924262149048, iteration: 103817
loss: 0.9827750325202942,grad_norm: 0.8876381110725102, iteration: 103818
loss: 1.021757960319519,grad_norm: 0.9740060216642878, iteration: 103819
loss: 1.0133968591690063,grad_norm: 0.9999990901753492, iteration: 103820
loss: 1.0360733270645142,grad_norm: 0.9999991196085267, iteration: 103821
loss: 0.9633344411849976,grad_norm: 0.9999990274118011, iteration: 103822
loss: 1.007683277130127,grad_norm: 0.9386246343637842, iteration: 103823
loss: 1.064307451248169,grad_norm: 0.9999995174418381, iteration: 103824
loss: 1.0122917890548706,grad_norm: 0.9999991140766362, iteration: 103825
loss: 0.9889870882034302,grad_norm: 0.9999991396114197, iteration: 103826
loss: 0.9710702300071716,grad_norm: 0.9307993190840471, iteration: 103827
loss: 0.9922811388969421,grad_norm: 0.9999992239914471, iteration: 103828
loss: 1.0104666948318481,grad_norm: 0.9999992143885861, iteration: 103829
loss: 1.0213600397109985,grad_norm: 0.9999994318584418, iteration: 103830
loss: 0.9735522866249084,grad_norm: 0.9999992062490752, iteration: 103831
loss: 0.9948237538337708,grad_norm: 0.9097380324224671, iteration: 103832
loss: 0.9860697984695435,grad_norm: 0.9818649323397965, iteration: 103833
loss: 0.958056628704071,grad_norm: 0.9287263417009112, iteration: 103834
loss: 1.0041393041610718,grad_norm: 0.9999989737986851, iteration: 103835
loss: 0.9921737909317017,grad_norm: 0.9657746092229661, iteration: 103836
loss: 0.9991682171821594,grad_norm: 0.9999992408475544, iteration: 103837
loss: 1.0068995952606201,grad_norm: 0.9999990475418111, iteration: 103838
loss: 0.9810258150100708,grad_norm: 0.999999200072418, iteration: 103839
loss: 1.0127235651016235,grad_norm: 0.9999991744793804, iteration: 103840
loss: 0.9920321106910706,grad_norm: 0.9999990299874338, iteration: 103841
loss: 0.9983101487159729,grad_norm: 0.999999071643921, iteration: 103842
loss: 1.0365828275680542,grad_norm: 0.9740651244261767, iteration: 103843
loss: 0.988287627696991,grad_norm: 0.9443802894846794, iteration: 103844
loss: 0.9590360522270203,grad_norm: 0.9999991009349668, iteration: 103845
loss: 0.9697586894035339,grad_norm: 0.9214117600048051, iteration: 103846
loss: 1.0237267017364502,grad_norm: 0.9999992986059727, iteration: 103847
loss: 1.0994763374328613,grad_norm: 0.9999995178782772, iteration: 103848
loss: 1.0447666645050049,grad_norm: 0.9858524995397364, iteration: 103849
loss: 1.0108898878097534,grad_norm: 0.8562419508996644, iteration: 103850
loss: 0.980910062789917,grad_norm: 0.8034252293881605, iteration: 103851
loss: 0.9730778336524963,grad_norm: 0.9854729949470236, iteration: 103852
loss: 1.058300256729126,grad_norm: 0.9999993026244841, iteration: 103853
loss: 0.993046760559082,grad_norm: 0.9999992355232425, iteration: 103854
loss: 0.9336748719215393,grad_norm: 0.9999991910217284, iteration: 103855
loss: 1.0201003551483154,grad_norm: 0.999999102449567, iteration: 103856
loss: 1.012001872062683,grad_norm: 0.9107956176085845, iteration: 103857
loss: 0.9863579273223877,grad_norm: 0.999998967817471, iteration: 103858
loss: 0.9989705681800842,grad_norm: 0.9999989296227867, iteration: 103859
loss: 1.0057957172393799,grad_norm: 0.9544757736151489, iteration: 103860
loss: 1.0149269104003906,grad_norm: 0.9999991010240876, iteration: 103861
loss: 0.9717968702316284,grad_norm: 0.9999991166672839, iteration: 103862
loss: 0.9826650023460388,grad_norm: 0.9809553898211423, iteration: 103863
loss: 0.9974188804626465,grad_norm: 0.9999990661612314, iteration: 103864
loss: 0.9666500687599182,grad_norm: 0.9999997348222556, iteration: 103865
loss: 1.0081839561462402,grad_norm: 0.9957283832474456, iteration: 103866
loss: 1.004563570022583,grad_norm: 0.9667664842384395, iteration: 103867
loss: 1.0339685678482056,grad_norm: 0.9730596594258277, iteration: 103868
loss: 1.0157713890075684,grad_norm: 0.999999209005953, iteration: 103869
loss: 0.9866849184036255,grad_norm: 0.9999990650188434, iteration: 103870
loss: 0.984140157699585,grad_norm: 0.9570728784355288, iteration: 103871
loss: 1.025305986404419,grad_norm: 0.9064355714726059, iteration: 103872
loss: 0.9856318831443787,grad_norm: 0.8771466902143397, iteration: 103873
loss: 1.0000554323196411,grad_norm: 0.9999992764334994, iteration: 103874
loss: 1.0495659112930298,grad_norm: 0.9999992478603317, iteration: 103875
loss: 0.9290376305580139,grad_norm: 0.9999990043476081, iteration: 103876
loss: 1.0169662237167358,grad_norm: 0.9999992873897634, iteration: 103877
loss: 0.9730533361434937,grad_norm: 0.9999991857265338, iteration: 103878
loss: 1.0039336681365967,grad_norm: 0.9010903839153365, iteration: 103879
loss: 1.02436101436615,grad_norm: 0.953999375843849, iteration: 103880
loss: 0.9943450093269348,grad_norm: 0.9999990627268517, iteration: 103881
loss: 1.0349775552749634,grad_norm: 0.9999991324497501, iteration: 103882
loss: 0.9839576482772827,grad_norm: 0.9020732617777997, iteration: 103883
loss: 0.9810318350791931,grad_norm: 0.9999992733212801, iteration: 103884
loss: 1.0071974992752075,grad_norm: 0.9999996577971855, iteration: 103885
loss: 1.0053009986877441,grad_norm: 0.9999991687182598, iteration: 103886
loss: 0.9710993766784668,grad_norm: 0.9212872459550501, iteration: 103887
loss: 0.9902634620666504,grad_norm: 0.9999990526236064, iteration: 103888
loss: 1.0030639171600342,grad_norm: 0.9999990201512035, iteration: 103889
loss: 1.0042691230773926,grad_norm: 0.9983068213711239, iteration: 103890
loss: 0.9590793251991272,grad_norm: 0.999999250496857, iteration: 103891
loss: 1.043772578239441,grad_norm: 0.9999992489315451, iteration: 103892
loss: 0.9955915212631226,grad_norm: 0.971368155872093, iteration: 103893
loss: 0.9879698157310486,grad_norm: 0.9999990391174759, iteration: 103894
loss: 1.031874179840088,grad_norm: 0.9999990696518388, iteration: 103895
loss: 0.9904808402061462,grad_norm: 0.9999991659909823, iteration: 103896
loss: 1.0284491777420044,grad_norm: 0.9999990913658808, iteration: 103897
loss: 1.0364962816238403,grad_norm: 0.9999990680898395, iteration: 103898
loss: 0.957809329032898,grad_norm: 0.9999991777032021, iteration: 103899
loss: 0.9866276383399963,grad_norm: 0.9999991495440886, iteration: 103900
loss: 1.0066266059875488,grad_norm: 0.9902778511886315, iteration: 103901
loss: 0.965122640132904,grad_norm: 0.999999097154326, iteration: 103902
loss: 0.9773176312446594,grad_norm: 0.9999990127439439, iteration: 103903
loss: 0.9969583749771118,grad_norm: 0.9381468493062318, iteration: 103904
loss: 1.0033141374588013,grad_norm: 0.9999991131545374, iteration: 103905
loss: 0.9789130091667175,grad_norm: 0.9999990639526424, iteration: 103906
loss: 1.002728819847107,grad_norm: 0.8297502626249369, iteration: 103907
loss: 1.003381609916687,grad_norm: 0.9999990902369732, iteration: 103908
loss: 0.9743524789810181,grad_norm: 0.9999991446597888, iteration: 103909
loss: 0.9832717776298523,grad_norm: 0.9195228517844449, iteration: 103910
loss: 0.9996014833450317,grad_norm: 0.999999261531543, iteration: 103911
loss: 0.9614168405532837,grad_norm: 0.9999992237115874, iteration: 103912
loss: 0.9654195308685303,grad_norm: 0.9999994325771482, iteration: 103913
loss: 1.020154356956482,grad_norm: 0.9146655070633041, iteration: 103914
loss: 1.0145334005355835,grad_norm: 0.956104947084722, iteration: 103915
loss: 1.0074971914291382,grad_norm: 0.9999991129196455, iteration: 103916
loss: 0.9990218877792358,grad_norm: 0.999999212023436, iteration: 103917
loss: 1.0096032619476318,grad_norm: 0.999999079688095, iteration: 103918
loss: 1.021346092224121,grad_norm: 0.999999071084818, iteration: 103919
loss: 1.0070441961288452,grad_norm: 0.999999245177466, iteration: 103920
loss: 0.9831692576408386,grad_norm: 0.8838470184236874, iteration: 103921
loss: 0.980195164680481,grad_norm: 0.9999991179235495, iteration: 103922
loss: 1.0028029680252075,grad_norm: 0.9999992584103813, iteration: 103923
loss: 1.0002690553665161,grad_norm: 0.9999990468347697, iteration: 103924
loss: 1.030167579650879,grad_norm: 0.9999991600331847, iteration: 103925
loss: 0.9776748418807983,grad_norm: 0.9999991709582287, iteration: 103926
loss: 1.0316721200942993,grad_norm: 0.934015916720174, iteration: 103927
loss: 0.9815160632133484,grad_norm: 0.9999989663898201, iteration: 103928
loss: 0.9983264803886414,grad_norm: 0.999999123732531, iteration: 103929
loss: 1.0144414901733398,grad_norm: 0.9693232172736204, iteration: 103930
loss: 1.0110028982162476,grad_norm: 0.9999990400119053, iteration: 103931
loss: 1.0059669017791748,grad_norm: 0.9999991987365076, iteration: 103932
loss: 1.0170639753341675,grad_norm: 0.9999989268699537, iteration: 103933
loss: 0.9705023169517517,grad_norm: 0.9999990400360855, iteration: 103934
loss: 0.9844420552253723,grad_norm: 0.9999991482222604, iteration: 103935
loss: 0.9882280230522156,grad_norm: 0.9999991609688627, iteration: 103936
loss: 1.0526577234268188,grad_norm: 0.8966905904460541, iteration: 103937
loss: 0.9866762757301331,grad_norm: 0.9476362378569864, iteration: 103938
loss: 1.0207107067108154,grad_norm: 0.9999991561375599, iteration: 103939
loss: 0.9667662978172302,grad_norm: 0.9999989995682919, iteration: 103940
loss: 1.0023287534713745,grad_norm: 0.9371591401754251, iteration: 103941
loss: 0.9966073632240295,grad_norm: 0.9999991713273634, iteration: 103942
loss: 1.0402727127075195,grad_norm: 0.9999991331594891, iteration: 103943
loss: 1.0054712295532227,grad_norm: 0.9999991989257515, iteration: 103944
loss: 0.9914116263389587,grad_norm: 0.8197527572558552, iteration: 103945
loss: 0.9709378480911255,grad_norm: 0.8372041271475023, iteration: 103946
loss: 0.9910083413124084,grad_norm: 0.9999991133598767, iteration: 103947
loss: 0.9893102049827576,grad_norm: 0.9999990417450331, iteration: 103948
loss: 0.9796848297119141,grad_norm: 0.9999994697467617, iteration: 103949
loss: 1.0070843696594238,grad_norm: 0.999999096180405, iteration: 103950
loss: 0.9865143299102783,grad_norm: 0.9999991003580638, iteration: 103951
loss: 1.0000089406967163,grad_norm: 0.955408428821105, iteration: 103952
loss: 1.020034909248352,grad_norm: 0.9999992215246502, iteration: 103953
loss: 1.0046581029891968,grad_norm: 0.9999989999948692, iteration: 103954
loss: 0.986561119556427,grad_norm: 0.9554515504400833, iteration: 103955
loss: 0.9697738885879517,grad_norm: 0.9328894579461796, iteration: 103956
loss: 0.9926270842552185,grad_norm: 0.9999991107444641, iteration: 103957
loss: 0.9743474721908569,grad_norm: 0.9754249302956274, iteration: 103958
loss: 1.0253950357437134,grad_norm: 0.9999992372618202, iteration: 103959
loss: 1.0122584104537964,grad_norm: 0.9844601768530397, iteration: 103960
loss: 0.9905940294265747,grad_norm: 0.9999989910530227, iteration: 103961
loss: 0.9740815162658691,grad_norm: 0.9999989755377533, iteration: 103962
loss: 0.9989751577377319,grad_norm: 0.8896010065041525, iteration: 103963
loss: 0.9993594884872437,grad_norm: 0.9999992500628555, iteration: 103964
loss: 0.9904896020889282,grad_norm: 0.8589465740280176, iteration: 103965
loss: 0.9764955639839172,grad_norm: 0.9708296386658182, iteration: 103966
loss: 0.9618444442749023,grad_norm: 0.9999991206026803, iteration: 103967
loss: 1.0300776958465576,grad_norm: 0.9999991109788938, iteration: 103968
loss: 1.0126959085464478,grad_norm: 0.9870797856545201, iteration: 103969
loss: 1.0023698806762695,grad_norm: 0.9999991196025431, iteration: 103970
loss: 1.0088038444519043,grad_norm: 0.9999991421161629, iteration: 103971
loss: 1.0080227851867676,grad_norm: 0.9999991098309229, iteration: 103972
loss: 1.0060781240463257,grad_norm: 0.9377337874196765, iteration: 103973
loss: 0.9870319366455078,grad_norm: 0.9927393390013421, iteration: 103974
loss: 1.0472800731658936,grad_norm: 0.9999992599832629, iteration: 103975
loss: 1.0110549926757812,grad_norm: 0.9999992137647086, iteration: 103976
loss: 1.008515477180481,grad_norm: 0.9999992119096832, iteration: 103977
loss: 1.0346760749816895,grad_norm: 0.9999992361481448, iteration: 103978
loss: 1.0113314390182495,grad_norm: 0.9999990523426532, iteration: 103979
loss: 0.9529209136962891,grad_norm: 0.9999991189887041, iteration: 103980
loss: 1.008989691734314,grad_norm: 0.8917574603903199, iteration: 103981
loss: 0.9356566667556763,grad_norm: 0.9999991406856361, iteration: 103982
loss: 1.003853440284729,grad_norm: 0.918648138457757, iteration: 103983
loss: 1.0194519758224487,grad_norm: 0.9999991050842024, iteration: 103984
loss: 1.0024141073226929,grad_norm: 0.9267576470578645, iteration: 103985
loss: 1.0071582794189453,grad_norm: 0.9999991393692285, iteration: 103986
loss: 1.0133943557739258,grad_norm: 0.9999991053321544, iteration: 103987
loss: 1.0351253747940063,grad_norm: 0.9999992040593524, iteration: 103988
loss: 1.0183556079864502,grad_norm: 0.924452106591322, iteration: 103989
loss: 1.0250154733657837,grad_norm: 0.9999990878006862, iteration: 103990
loss: 1.0444366931915283,grad_norm: 0.9999993635405694, iteration: 103991
loss: 0.9446063041687012,grad_norm: 0.9999991846085164, iteration: 103992
loss: 1.0012125968933105,grad_norm: 0.9999990326692478, iteration: 103993
loss: 0.9989648461341858,grad_norm: 0.9999990966894544, iteration: 103994
loss: 1.046383023262024,grad_norm: 0.9999993335313527, iteration: 103995
loss: 1.0142390727996826,grad_norm: 0.9999992360524811, iteration: 103996
loss: 1.017810583114624,grad_norm: 0.9999990246838005, iteration: 103997
loss: 1.0050830841064453,grad_norm: 0.9849641470022622, iteration: 103998
loss: 0.9930731654167175,grad_norm: 0.9630842110624362, iteration: 103999
loss: 1.021223783493042,grad_norm: 0.9572799333569028, iteration: 104000
loss: 0.9792554974555969,grad_norm: 0.7730233651286932, iteration: 104001
loss: 1.0225690603256226,grad_norm: 0.9999993473072025, iteration: 104002
loss: 1.0014764070510864,grad_norm: 0.9999991665023735, iteration: 104003
loss: 0.9664523005485535,grad_norm: 0.999999247157688, iteration: 104004
loss: 0.9685578942298889,grad_norm: 0.9999990649363752, iteration: 104005
loss: 1.025061011314392,grad_norm: 0.916470504237125, iteration: 104006
loss: 1.020318865776062,grad_norm: 0.9999992528788395, iteration: 104007
loss: 1.0071896314620972,grad_norm: 0.9999990024649527, iteration: 104008
loss: 1.007712483406067,grad_norm: 0.9999991050043411, iteration: 104009
loss: 1.0091878175735474,grad_norm: 0.9999990758272674, iteration: 104010
loss: 0.9771574139595032,grad_norm: 0.9999991230269324, iteration: 104011
loss: 0.9937642812728882,grad_norm: 0.9999990971162833, iteration: 104012
loss: 0.9912645816802979,grad_norm: 0.8162371939112264, iteration: 104013
loss: 1.0067007541656494,grad_norm: 0.9999991358615219, iteration: 104014
loss: 1.0114094018936157,grad_norm: 0.9999991700527253, iteration: 104015
loss: 0.9760331511497498,grad_norm: 0.9999990924955687, iteration: 104016
loss: 1.0157028436660767,grad_norm: 0.9692058114966732, iteration: 104017
loss: 0.9441086053848267,grad_norm: 0.9999992943207049, iteration: 104018
loss: 1.0318539142608643,grad_norm: 0.8679589009551938, iteration: 104019
loss: 0.9835005402565002,grad_norm: 0.9999991225093879, iteration: 104020
loss: 1.0090124607086182,grad_norm: 0.957683668253077, iteration: 104021
loss: 1.0011283159255981,grad_norm: 0.999999123674685, iteration: 104022
loss: 0.9984933137893677,grad_norm: 0.9999990863064261, iteration: 104023
loss: 0.9836113452911377,grad_norm: 0.9999989832969467, iteration: 104024
loss: 0.9787346124649048,grad_norm: 0.9969446656857611, iteration: 104025
loss: 1.0059128999710083,grad_norm: 0.999999197132311, iteration: 104026
loss: 0.9814906120300293,grad_norm: 0.9836668885483996, iteration: 104027
loss: 1.0468287467956543,grad_norm: 0.9999991933373283, iteration: 104028
loss: 0.949739396572113,grad_norm: 0.9999990176392183, iteration: 104029
loss: 1.0121030807495117,grad_norm: 0.9999993928267056, iteration: 104030
loss: 1.0031031370162964,grad_norm: 0.9999991190502642, iteration: 104031
loss: 0.9765943288803101,grad_norm: 0.954600025391895, iteration: 104032
loss: 0.9876829981803894,grad_norm: 0.9276836684985499, iteration: 104033
loss: 1.0361109972000122,grad_norm: 0.9999992875642687, iteration: 104034
loss: 0.9654948711395264,grad_norm: 0.9999990209403056, iteration: 104035
loss: 1.012843370437622,grad_norm: 0.9249909489543824, iteration: 104036
loss: 1.0118876695632935,grad_norm: 0.9393265890044367, iteration: 104037
loss: 1.0180854797363281,grad_norm: 0.9999990608708733, iteration: 104038
loss: 1.010107398033142,grad_norm: 0.99999934657202, iteration: 104039
loss: 1.0142439603805542,grad_norm: 0.9999991656141266, iteration: 104040
loss: 1.0110118389129639,grad_norm: 0.9999991428111551, iteration: 104041
loss: 0.9983935356140137,grad_norm: 0.9295548596570616, iteration: 104042
loss: 1.0751583576202393,grad_norm: 0.9999997427099302, iteration: 104043
loss: 1.0237343311309814,grad_norm: 0.9999989684742986, iteration: 104044
loss: 1.0118557214736938,grad_norm: 0.9443834923947225, iteration: 104045
loss: 0.9845374822616577,grad_norm: 0.9999991828951221, iteration: 104046
loss: 0.966884434223175,grad_norm: 0.9999991704310024, iteration: 104047
loss: 1.0180141925811768,grad_norm: 0.9999988991692171, iteration: 104048
loss: 0.9946132302284241,grad_norm: 0.9999990479162741, iteration: 104049
loss: 0.9952662587165833,grad_norm: 0.9999990754231844, iteration: 104050
loss: 0.985727846622467,grad_norm: 0.9999990595127843, iteration: 104051
loss: 1.0264174938201904,grad_norm: 0.9999991631227683, iteration: 104052
loss: 1.000947117805481,grad_norm: 0.9986077851479193, iteration: 104053
loss: 1.0129424333572388,grad_norm: 0.9999991432706337, iteration: 104054
loss: 0.9917860627174377,grad_norm: 0.9727632750076639, iteration: 104055
loss: 1.0087089538574219,grad_norm: 0.9999992934090244, iteration: 104056
loss: 0.9971709847450256,grad_norm: 0.9999989683000137, iteration: 104057
loss: 0.950411319732666,grad_norm: 0.9999992670534859, iteration: 104058
loss: 1.0347862243652344,grad_norm: 0.9999997466411852, iteration: 104059
loss: 1.0032848119735718,grad_norm: 0.9999989725685529, iteration: 104060
loss: 1.0351450443267822,grad_norm: 0.9999989244797605, iteration: 104061
loss: 0.9767921566963196,grad_norm: 0.9999991122367204, iteration: 104062
loss: 1.0083831548690796,grad_norm: 0.9999996680570321, iteration: 104063
loss: 1.0032217502593994,grad_norm: 0.9999991687677701, iteration: 104064
loss: 1.017471432685852,grad_norm: 0.9999990925854947, iteration: 104065
loss: 0.982404351234436,grad_norm: 0.9494437550269034, iteration: 104066
loss: 1.0764728784561157,grad_norm: 0.9999992885187147, iteration: 104067
loss: 0.9937491416931152,grad_norm: 0.9999991257399252, iteration: 104068
loss: 1.0045167207717896,grad_norm: 0.999999020609778, iteration: 104069
loss: 0.994723379611969,grad_norm: 0.9805798449311375, iteration: 104070
loss: 1.0197031497955322,grad_norm: 0.9999992599222491, iteration: 104071
loss: 0.9891958832740784,grad_norm: 0.9999991787129555, iteration: 104072
loss: 1.0449682474136353,grad_norm: 0.9999996628705777, iteration: 104073
loss: 1.0403409004211426,grad_norm: 0.9999991678687434, iteration: 104074
loss: 1.0126558542251587,grad_norm: 0.9643653169814164, iteration: 104075
loss: 0.9929832816123962,grad_norm: 0.9999991924880678, iteration: 104076
loss: 0.9826310873031616,grad_norm: 0.9754069332451178, iteration: 104077
loss: 0.9893207550048828,grad_norm: 0.9999992341434707, iteration: 104078
loss: 0.9918716549873352,grad_norm: 0.9999990343843285, iteration: 104079
loss: 1.0177792310714722,grad_norm: 0.9999990923880921, iteration: 104080
loss: 0.9824735522270203,grad_norm: 0.9999991489811619, iteration: 104081
loss: 1.0085482597351074,grad_norm: 0.9213025951171482, iteration: 104082
loss: 1.0813605785369873,grad_norm: 0.9604164615954075, iteration: 104083
loss: 1.052405834197998,grad_norm: 0.9999991181385027, iteration: 104084
loss: 0.9828236103057861,grad_norm: 0.9999991300521142, iteration: 104085
loss: 1.0296114683151245,grad_norm: 0.9999992091236851, iteration: 104086
loss: 0.9757789373397827,grad_norm: 0.9999990747523476, iteration: 104087
loss: 1.0290111303329468,grad_norm: 0.9999991363746344, iteration: 104088
loss: 0.9897589087486267,grad_norm: 0.9999991679052483, iteration: 104089
loss: 0.996838390827179,grad_norm: 0.9843376218225101, iteration: 104090
loss: 0.9981268048286438,grad_norm: 0.9999994718844488, iteration: 104091
loss: 0.9800030589103699,grad_norm: 0.9837055449973434, iteration: 104092
loss: 1.0080928802490234,grad_norm: 0.9999991694489493, iteration: 104093
loss: 1.002960443496704,grad_norm: 0.9999991984550631, iteration: 104094
loss: 1.1340700387954712,grad_norm: 0.9999997694893439, iteration: 104095
loss: 1.0006808042526245,grad_norm: 0.8762209971100257, iteration: 104096
loss: 1.036237359046936,grad_norm: 0.933005822095495, iteration: 104097
loss: 0.9887453317642212,grad_norm: 0.9999990148967863, iteration: 104098
loss: 0.9934701323509216,grad_norm: 0.9608154148812739, iteration: 104099
loss: 0.9899508953094482,grad_norm: 0.999999025337836, iteration: 104100
loss: 1.01253080368042,grad_norm: 0.9999990992236238, iteration: 104101
loss: 1.0012943744659424,grad_norm: 0.8955855580375162, iteration: 104102
loss: 0.9877822995185852,grad_norm: 0.999999055139007, iteration: 104103
loss: 0.980543315410614,grad_norm: 0.994724496260033, iteration: 104104
loss: 0.9743051528930664,grad_norm: 0.8353734394733706, iteration: 104105
loss: 1.0008702278137207,grad_norm: 0.9999992061294295, iteration: 104106
loss: 0.9953132271766663,grad_norm: 0.9999992020745727, iteration: 104107
loss: 0.9699234962463379,grad_norm: 0.9999992416248155, iteration: 104108
loss: 0.9715439677238464,grad_norm: 0.9999991071685607, iteration: 104109
loss: 1.0333119630813599,grad_norm: 0.9999991505764938, iteration: 104110
loss: 0.9962301254272461,grad_norm: 0.9927409571021176, iteration: 104111
loss: 0.975719153881073,grad_norm: 0.999999127513902, iteration: 104112
loss: 0.9979564547538757,grad_norm: 0.9999993997966832, iteration: 104113
loss: 0.9857578277587891,grad_norm: 0.9999992026869656, iteration: 104114
loss: 1.0277048349380493,grad_norm: 0.9999991828660489, iteration: 104115
loss: 1.0474112033843994,grad_norm: 0.999999013982079, iteration: 104116
loss: 1.012036919593811,grad_norm: 0.9999992727961571, iteration: 104117
loss: 1.0233256816864014,grad_norm: 0.9121818042961842, iteration: 104118
loss: 0.9906566143035889,grad_norm: 0.9999991403495789, iteration: 104119
loss: 1.0041557550430298,grad_norm: 0.9999993447590553, iteration: 104120
loss: 0.9955511689186096,grad_norm: 0.9999991571856752, iteration: 104121
loss: 0.974183976650238,grad_norm: 0.9999990806239883, iteration: 104122
loss: 0.9964601397514343,grad_norm: 0.883435350995514, iteration: 104123
loss: 1.0076422691345215,grad_norm: 0.999999311414951, iteration: 104124
loss: 0.9625746011734009,grad_norm: 0.9999991910781364, iteration: 104125
loss: 0.9878737330436707,grad_norm: 0.9288011908183756, iteration: 104126
loss: 1.0152729749679565,grad_norm: 0.999999274766253, iteration: 104127
loss: 1.0047451257705688,grad_norm: 0.9999990040228894, iteration: 104128
loss: 0.9956321120262146,grad_norm: 0.980626874805133, iteration: 104129
loss: 1.0005502700805664,grad_norm: 0.9443262359769052, iteration: 104130
loss: 1.078436017036438,grad_norm: 0.9872263679464321, iteration: 104131
loss: 1.0015780925750732,grad_norm: 0.9999992005112791, iteration: 104132
loss: 1.015432357788086,grad_norm: 0.9392439744761774, iteration: 104133
loss: 1.0339545011520386,grad_norm: 0.9601222602306471, iteration: 104134
loss: 0.9722166657447815,grad_norm: 0.9999990806189518, iteration: 104135
loss: 0.9775539636611938,grad_norm: 0.9999990493602683, iteration: 104136
loss: 1.018861174583435,grad_norm: 0.9999993066670346, iteration: 104137
loss: 1.0193992853164673,grad_norm: 0.9752684380764877, iteration: 104138
loss: 1.0496413707733154,grad_norm: 0.9999992530498654, iteration: 104139
loss: 1.038781762123108,grad_norm: 0.9999996864336924, iteration: 104140
loss: 0.9924513101577759,grad_norm: 0.9445676824278454, iteration: 104141
loss: 0.9882515072822571,grad_norm: 0.9999991992625694, iteration: 104142
loss: 0.9827229976654053,grad_norm: 0.9999992409084675, iteration: 104143
loss: 0.9798347353935242,grad_norm: 0.9391609984575211, iteration: 104144
loss: 1.0095785856246948,grad_norm: 0.9396875368338462, iteration: 104145
loss: 0.9804220795631409,grad_norm: 0.9999993358645888, iteration: 104146
loss: 0.9865893125534058,grad_norm: 0.9999991588492845, iteration: 104147
loss: 0.9672597646713257,grad_norm: 0.9853595335523374, iteration: 104148
loss: 1.0060508251190186,grad_norm: 0.9695245931774185, iteration: 104149
loss: 1.0331462621688843,grad_norm: 0.9999989955528343, iteration: 104150
loss: 0.9852787852287292,grad_norm: 0.9999991701475839, iteration: 104151
loss: 1.0143343210220337,grad_norm: 0.9999991455520411, iteration: 104152
loss: 0.9686851501464844,grad_norm: 0.9999992376943811, iteration: 104153
loss: 0.990702211856842,grad_norm: 0.9999991605752637, iteration: 104154
loss: 1.0187549591064453,grad_norm: 0.9999991003308658, iteration: 104155
loss: 1.0915216207504272,grad_norm: 0.9999997499143702, iteration: 104156
loss: 1.0149848461151123,grad_norm: 0.9345290372209885, iteration: 104157
loss: 1.0141462087631226,grad_norm: 0.9999991862952589, iteration: 104158
loss: 1.0164870023727417,grad_norm: 0.9999990904536809, iteration: 104159
loss: 0.9959108233451843,grad_norm: 0.9999990868933282, iteration: 104160
loss: 0.9869459271430969,grad_norm: 0.9999991545946019, iteration: 104161
loss: 0.9686115384101868,grad_norm: 0.9999991262101057, iteration: 104162
loss: 1.00132155418396,grad_norm: 0.9999990922787911, iteration: 104163
loss: 0.9934395551681519,grad_norm: 0.9999992186764849, iteration: 104164
loss: 0.9915871024131775,grad_norm: 0.9999991257892394, iteration: 104165
loss: 1.0322120189666748,grad_norm: 0.9999993209008294, iteration: 104166
loss: 1.0648808479309082,grad_norm: 0.9999989593071139, iteration: 104167
loss: 0.9773374795913696,grad_norm: 0.9999991889236242, iteration: 104168
loss: 1.0177398920059204,grad_norm: 0.9999992196145071, iteration: 104169
loss: 0.9868525266647339,grad_norm: 0.9999990691808217, iteration: 104170
loss: 0.9907869100570679,grad_norm: 0.8495477924915673, iteration: 104171
loss: 0.9892770051956177,grad_norm: 0.9999990944386248, iteration: 104172
loss: 1.0007027387619019,grad_norm: 0.9999991784579955, iteration: 104173
loss: 1.0158311128616333,grad_norm: 0.9175961158568311, iteration: 104174
loss: 0.9842435717582703,grad_norm: 0.9999989766755875, iteration: 104175
loss: 1.072544813156128,grad_norm: 0.9999991816759485, iteration: 104176
loss: 0.9702867269515991,grad_norm: 0.9258779266589381, iteration: 104177
loss: 1.0292342901229858,grad_norm: 0.9888798480374374, iteration: 104178
loss: 0.9940096139907837,grad_norm: 0.9999990039081981, iteration: 104179
loss: 1.0379271507263184,grad_norm: 0.9999992100877291, iteration: 104180
loss: 1.0242565870285034,grad_norm: 0.9999993632441646, iteration: 104181
loss: 0.9746649861335754,grad_norm: 0.9999991437540535, iteration: 104182
loss: 0.9732868075370789,grad_norm: 0.8889915246623402, iteration: 104183
loss: 0.995418906211853,grad_norm: 0.999999154404657, iteration: 104184
loss: 0.9931433796882629,grad_norm: 0.9204309249236674, iteration: 104185
loss: 0.983184278011322,grad_norm: 0.9999992752525539, iteration: 104186
loss: 0.9547503590583801,grad_norm: 0.9999992335143926, iteration: 104187
loss: 1.0310587882995605,grad_norm: 0.8499837359181974, iteration: 104188
loss: 0.9829922318458557,grad_norm: 0.9608009543930969, iteration: 104189
loss: 0.9851680994033813,grad_norm: 0.9999992345918078, iteration: 104190
loss: 1.0211104154586792,grad_norm: 0.9999991151115917, iteration: 104191
loss: 0.9783385396003723,grad_norm: 0.999999053795278, iteration: 104192
loss: 1.0129293203353882,grad_norm: 0.9999991435728725, iteration: 104193
loss: 1.0220224857330322,grad_norm: 0.9910076726548273, iteration: 104194
loss: 0.992517352104187,grad_norm: 0.9374292508816123, iteration: 104195
loss: 1.0166523456573486,grad_norm: 0.9999989942452562, iteration: 104196
loss: 0.9965370297431946,grad_norm: 0.9999991097792985, iteration: 104197
loss: 0.9781880974769592,grad_norm: 0.9649401111284072, iteration: 104198
loss: 0.9837305545806885,grad_norm: 0.9999989420681555, iteration: 104199
loss: 1.026233196258545,grad_norm: 0.999999090773243, iteration: 104200
loss: 0.9613921642303467,grad_norm: 0.999999119559794, iteration: 104201
loss: 0.9843716025352478,grad_norm: 0.9999990601874715, iteration: 104202
loss: 0.9612407684326172,grad_norm: 0.9999991419943036, iteration: 104203
loss: 1.006973147392273,grad_norm: 0.9931433833189743, iteration: 104204
loss: 0.9980453848838806,grad_norm: 0.9699518182253494, iteration: 104205
loss: 1.008140206336975,grad_norm: 0.9999994891851237, iteration: 104206
loss: 1.0141483545303345,grad_norm: 0.9999990769030633, iteration: 104207
loss: 1.0498627424240112,grad_norm: 0.9999991121372955, iteration: 104208
loss: 1.0019947290420532,grad_norm: 0.9999990507596555, iteration: 104209
loss: 1.0149027109146118,grad_norm: 0.9999993137813398, iteration: 104210
loss: 1.0090415477752686,grad_norm: 0.9999992198882046, iteration: 104211
loss: 1.0036990642547607,grad_norm: 0.9999989663812623, iteration: 104212
loss: 1.0311317443847656,grad_norm: 0.9999992497100246, iteration: 104213
loss: 1.0056886672973633,grad_norm: 0.930525856622119, iteration: 104214
loss: 0.9678198099136353,grad_norm: 0.9999991630073387, iteration: 104215
loss: 0.9642478823661804,grad_norm: 0.9999991036789202, iteration: 104216
loss: 0.999597430229187,grad_norm: 0.9999991430651037, iteration: 104217
loss: 1.020504117012024,grad_norm: 0.9526069728799923, iteration: 104218
loss: 1.012121558189392,grad_norm: 0.99999915011044, iteration: 104219
loss: 1.0669152736663818,grad_norm: 0.9999998444967284, iteration: 104220
loss: 0.9748016595840454,grad_norm: 0.9230236196504141, iteration: 104221
loss: 1.0111908912658691,grad_norm: 0.9999991874257189, iteration: 104222
loss: 0.995577335357666,grad_norm: 0.9999992078611283, iteration: 104223
loss: 0.9815623760223389,grad_norm: 0.9677059804696567, iteration: 104224
loss: 0.9876194596290588,grad_norm: 0.9999990370758587, iteration: 104225
loss: 1.0213621854782104,grad_norm: 0.9999991858336755, iteration: 104226
loss: 0.9994544982910156,grad_norm: 0.9999991178205834, iteration: 104227
loss: 0.9824288487434387,grad_norm: 0.9870307887046555, iteration: 104228
loss: 1.002233862876892,grad_norm: 0.9716981043768073, iteration: 104229
loss: 1.0203628540039062,grad_norm: 0.9999990717598208, iteration: 104230
loss: 1.0094025135040283,grad_norm: 0.9999990250502064, iteration: 104231
loss: 1.0156477689743042,grad_norm: 0.9185874032988814, iteration: 104232
loss: 0.9939346313476562,grad_norm: 0.8845804116320302, iteration: 104233
loss: 0.9548768997192383,grad_norm: 0.9999990638670017, iteration: 104234
loss: 0.9756737947463989,grad_norm: 0.8780628786605501, iteration: 104235
loss: 0.9958732724189758,grad_norm: 0.9996014319062142, iteration: 104236
loss: 1.0142414569854736,grad_norm: 0.9999998471331829, iteration: 104237
loss: 1.021984338760376,grad_norm: 0.9999990080141374, iteration: 104238
loss: 1.0382165908813477,grad_norm: 0.9487974120152426, iteration: 104239
loss: 1.0177000761032104,grad_norm: 0.9999991217293991, iteration: 104240
loss: 0.9657811522483826,grad_norm: 0.9999993291331302, iteration: 104241
loss: 0.9562230706214905,grad_norm: 0.969581582898702, iteration: 104242
loss: 0.9818089008331299,grad_norm: 0.9999992824698033, iteration: 104243
loss: 0.9749700427055359,grad_norm: 0.9999991914997223, iteration: 104244
loss: 1.0230845212936401,grad_norm: 0.9779063914483686, iteration: 104245
loss: 1.0048943758010864,grad_norm: 0.9999992073692952, iteration: 104246
loss: 1.0175285339355469,grad_norm: 0.9999993304995146, iteration: 104247
loss: 0.9988605976104736,grad_norm: 0.9814153524145736, iteration: 104248
loss: 0.9563574194908142,grad_norm: 0.9806875006988413, iteration: 104249
loss: 1.0157803297042847,grad_norm: 0.9953843959619872, iteration: 104250
loss: 0.9762184023857117,grad_norm: 0.9999991765088533, iteration: 104251
loss: 1.0064702033996582,grad_norm: 0.9999991160281189, iteration: 104252
loss: 1.0040316581726074,grad_norm: 0.9867712745400434, iteration: 104253
loss: 1.0328840017318726,grad_norm: 0.9999990549354257, iteration: 104254
loss: 0.979922890663147,grad_norm: 0.999999119434414, iteration: 104255
loss: 0.980566680431366,grad_norm: 0.9999988281098533, iteration: 104256
loss: 1.0241130590438843,grad_norm: 0.9999997683259143, iteration: 104257
loss: 0.9941519498825073,grad_norm: 0.8508671739138842, iteration: 104258
loss: 0.9872064590454102,grad_norm: 0.9999991583821278, iteration: 104259
loss: 0.9855257868766785,grad_norm: 0.9999991289519684, iteration: 104260
loss: 1.0266594886779785,grad_norm: 0.9999991618209892, iteration: 104261
loss: 1.0654841661453247,grad_norm: 0.9999993994345664, iteration: 104262
loss: 0.9893734455108643,grad_norm: 0.9999989652231671, iteration: 104263
loss: 0.9931354522705078,grad_norm: 0.999999083205656, iteration: 104264
loss: 0.9934632182121277,grad_norm: 0.9999989223607406, iteration: 104265
loss: 0.963891327381134,grad_norm: 0.9999991759173158, iteration: 104266
loss: 1.0050852298736572,grad_norm: 0.9999998222817863, iteration: 104267
loss: 1.0232057571411133,grad_norm: 0.9999996315864224, iteration: 104268
loss: 1.0362050533294678,grad_norm: 0.9999998942549407, iteration: 104269
loss: 0.960912823677063,grad_norm: 0.9999991020394827, iteration: 104270
loss: 0.9961528778076172,grad_norm: 0.9999990444863356, iteration: 104271
loss: 0.9924954771995544,grad_norm: 0.8981857533104539, iteration: 104272
loss: 1.1898822784423828,grad_norm: 0.9999993613395481, iteration: 104273
loss: 1.020031452178955,grad_norm: 0.9999994777234443, iteration: 104274
loss: 1.0009331703186035,grad_norm: 0.9999989453616818, iteration: 104275
loss: 1.0355983972549438,grad_norm: 0.9999993827772091, iteration: 104276
loss: 1.005802869796753,grad_norm: 0.9999993371181168, iteration: 104277
loss: 0.9718385338783264,grad_norm: 0.9999991969862329, iteration: 104278
loss: 1.0586711168289185,grad_norm: 0.9999994968019558, iteration: 104279
loss: 1.0109165906906128,grad_norm: 0.9999991005536658, iteration: 104280
loss: 1.0182571411132812,grad_norm: 0.9999992322193242, iteration: 104281
loss: 1.007212519645691,grad_norm: 0.9444792695288409, iteration: 104282
loss: 1.0358891487121582,grad_norm: 0.9875756627454544, iteration: 104283
loss: 1.0278669595718384,grad_norm: 0.9999991856478927, iteration: 104284
loss: 1.0070931911468506,grad_norm: 0.9813765877280748, iteration: 104285
loss: 1.030912160873413,grad_norm: 0.9999990600115799, iteration: 104286
loss: 1.0172940492630005,grad_norm: 0.9999991621217605, iteration: 104287
loss: 0.9765043258666992,grad_norm: 0.9135289474567552, iteration: 104288
loss: 0.9947835206985474,grad_norm: 0.9174349445933379, iteration: 104289
loss: 1.0131930112838745,grad_norm: 0.9999993614458457, iteration: 104290
loss: 0.9725300669670105,grad_norm: 0.9229252351428257, iteration: 104291
loss: 1.0356533527374268,grad_norm: 0.9999994399048573, iteration: 104292
loss: 1.0047187805175781,grad_norm: 0.9739315497593918, iteration: 104293
loss: 1.0003145933151245,grad_norm: 0.954512097158989, iteration: 104294
loss: 1.0084233283996582,grad_norm: 0.9999992733911556, iteration: 104295
loss: 1.014630913734436,grad_norm: 0.983733218843296, iteration: 104296
loss: 1.0144233703613281,grad_norm: 0.9250875766549398, iteration: 104297
loss: 1.029835820198059,grad_norm: 0.9999993519422251, iteration: 104298
loss: 1.0116682052612305,grad_norm: 0.999999221527411, iteration: 104299
loss: 0.996614396572113,grad_norm: 0.9999991114791849, iteration: 104300
loss: 0.9525723457336426,grad_norm: 0.9564123221255794, iteration: 104301
loss: 0.9910576343536377,grad_norm: 0.9886606886191285, iteration: 104302
loss: 0.9725225567817688,grad_norm: 0.9999991417916998, iteration: 104303
loss: 0.9442416429519653,grad_norm: 0.9999990290801888, iteration: 104304
loss: 1.0043582916259766,grad_norm: 0.9999994650607649, iteration: 104305
loss: 1.0074200630187988,grad_norm: 0.9999991557912725, iteration: 104306
loss: 0.9962827563285828,grad_norm: 0.9999992475556306, iteration: 104307
loss: 0.9729596376419067,grad_norm: 0.9922650816098533, iteration: 104308
loss: 1.0039174556732178,grad_norm: 0.9999993331203518, iteration: 104309
loss: 0.9917694926261902,grad_norm: 0.9999991050281127, iteration: 104310
loss: 1.0615698099136353,grad_norm: 0.9999993017961017, iteration: 104311
loss: 1.0084247589111328,grad_norm: 0.9999991072159875, iteration: 104312
loss: 1.0221799612045288,grad_norm: 0.9999992862382591, iteration: 104313
loss: 0.9996911883354187,grad_norm: 0.9938419409905919, iteration: 104314
loss: 1.0566390752792358,grad_norm: 0.9999999041667266, iteration: 104315
loss: 0.9689445495605469,grad_norm: 0.9999992284132752, iteration: 104316
loss: 0.9903210401535034,grad_norm: 0.9999989785820421, iteration: 104317
loss: 1.0351142883300781,grad_norm: 0.9999991868739758, iteration: 104318
loss: 1.0049892663955688,grad_norm: 0.9898076354492709, iteration: 104319
loss: 0.9770582318305969,grad_norm: 0.9999989651619758, iteration: 104320
loss: 0.9975020289421082,grad_norm: 0.8134835552805662, iteration: 104321
loss: 0.9507181644439697,grad_norm: 0.9999992632612583, iteration: 104322
loss: 1.0189156532287598,grad_norm: 0.8134929818212364, iteration: 104323
loss: 1.0771814584732056,grad_norm: 0.9999992477907086, iteration: 104324
loss: 0.9880350232124329,grad_norm: 0.8644204025117759, iteration: 104325
loss: 0.9934016466140747,grad_norm: 0.9999993228783856, iteration: 104326
loss: 1.0112996101379395,grad_norm: 0.937880325149679, iteration: 104327
loss: 0.9775243997573853,grad_norm: 0.9999991593156982, iteration: 104328
loss: 0.9869288206100464,grad_norm: 0.9999989523646888, iteration: 104329
loss: 0.9967280626296997,grad_norm: 0.999999248389193, iteration: 104330
loss: 0.9795292019844055,grad_norm: 0.9999990132165737, iteration: 104331
loss: 0.9934699535369873,grad_norm: 0.958230773313344, iteration: 104332
loss: 1.0125524997711182,grad_norm: 0.999999114972464, iteration: 104333
loss: 1.022261142730713,grad_norm: 0.9775633289509357, iteration: 104334
loss: 1.0362943410873413,grad_norm: 0.8522569650350413, iteration: 104335
loss: 0.9735150933265686,grad_norm: 0.9999990777506618, iteration: 104336
loss: 0.9696845412254333,grad_norm: 0.9999990266076402, iteration: 104337
loss: 1.0212191343307495,grad_norm: 0.9999992763475134, iteration: 104338
loss: 0.9796945452690125,grad_norm: 0.9999991053854527, iteration: 104339
loss: 0.9608166217803955,grad_norm: 0.9999989237274162, iteration: 104340
loss: 0.943960964679718,grad_norm: 0.8608128051618869, iteration: 104341
loss: 0.9854814410209656,grad_norm: 0.8446787292333279, iteration: 104342
loss: 1.0114343166351318,grad_norm: 0.9999992730006568, iteration: 104343
loss: 1.0112686157226562,grad_norm: 0.8679954582684166, iteration: 104344
loss: 1.0026401281356812,grad_norm: 0.9516727055147046, iteration: 104345
loss: 1.0162409543991089,grad_norm: 0.9999991814903586, iteration: 104346
loss: 1.0112624168395996,grad_norm: 0.9999992242717319, iteration: 104347
loss: 1.0115281343460083,grad_norm: 0.9999990974915587, iteration: 104348
loss: 0.9978181719779968,grad_norm: 0.9880055177818472, iteration: 104349
loss: 0.9704040288925171,grad_norm: 0.9999992018240965, iteration: 104350
loss: 0.9754360914230347,grad_norm: 0.9312522388340556, iteration: 104351
loss: 0.9855754971504211,grad_norm: 0.9256761459613365, iteration: 104352
loss: 1.0253640413284302,grad_norm: 0.9999991679022047, iteration: 104353
loss: 1.0268384218215942,grad_norm: 0.9999991226987476, iteration: 104354
loss: 1.0227965116500854,grad_norm: 0.999999128786361, iteration: 104355
loss: 1.0114972591400146,grad_norm: 0.9999991888298425, iteration: 104356
loss: 1.0853089094161987,grad_norm: 0.9999992220246582, iteration: 104357
loss: 1.0028308629989624,grad_norm: 0.9999992678965814, iteration: 104358
loss: 1.01332688331604,grad_norm: 0.9157781334774804, iteration: 104359
loss: 0.9916491508483887,grad_norm: 0.9999992293619485, iteration: 104360
loss: 1.010754108428955,grad_norm: 0.9999991238734469, iteration: 104361
loss: 1.0108435153961182,grad_norm: 0.961896007047425, iteration: 104362
loss: 1.0011435747146606,grad_norm: 0.9999991395080636, iteration: 104363
loss: 1.0628173351287842,grad_norm: 0.9999996087538179, iteration: 104364
loss: 0.98859041929245,grad_norm: 0.9629125845022907, iteration: 104365
loss: 0.9938672780990601,grad_norm: 0.9999989930690903, iteration: 104366
loss: 0.9593592882156372,grad_norm: 0.9999991604498333, iteration: 104367
loss: 1.0025891065597534,grad_norm: 0.9999990547322207, iteration: 104368
loss: 0.9900045990943909,grad_norm: 0.9999990496790316, iteration: 104369
loss: 0.9828311800956726,grad_norm: 0.9999996366363997, iteration: 104370
loss: 0.9842198491096497,grad_norm: 0.9999990691952563, iteration: 104371
loss: 1.0279101133346558,grad_norm: 0.9999992137787139, iteration: 104372
loss: 0.9837527275085449,grad_norm: 0.9999989986010527, iteration: 104373
loss: 1.025687575340271,grad_norm: 0.999999433588587, iteration: 104374
loss: 0.9567093253135681,grad_norm: 0.9999992678728405, iteration: 104375
loss: 1.0087283849716187,grad_norm: 0.9883894122987745, iteration: 104376
loss: 0.9650176167488098,grad_norm: 0.9999990213443832, iteration: 104377
loss: 1.0253245830535889,grad_norm: 0.9999989245216739, iteration: 104378
loss: 0.9887028336524963,grad_norm: 0.9999990110563131, iteration: 104379
loss: 1.0191714763641357,grad_norm: 0.9999991161475221, iteration: 104380
loss: 1.0176604986190796,grad_norm: 0.9999991396992591, iteration: 104381
loss: 0.9775881767272949,grad_norm: 0.9999991327050038, iteration: 104382
loss: 1.0096020698547363,grad_norm: 0.9999995798559641, iteration: 104383
loss: 1.006282091140747,grad_norm: 0.9042115614048756, iteration: 104384
loss: 0.9720799922943115,grad_norm: 0.9492539541418913, iteration: 104385
loss: 1.000258445739746,grad_norm: 0.999999285112341, iteration: 104386
loss: 1.0133497714996338,grad_norm: 0.9999991340618802, iteration: 104387
loss: 0.9621616005897522,grad_norm: 0.9405311162645864, iteration: 104388
loss: 1.0689449310302734,grad_norm: 0.999999400000247, iteration: 104389
loss: 0.9874707460403442,grad_norm: 0.8852668785494867, iteration: 104390
loss: 0.9694622159004211,grad_norm: 0.9310266937042508, iteration: 104391
loss: 1.008675217628479,grad_norm: 0.9843620006008174, iteration: 104392
loss: 1.009833812713623,grad_norm: 0.999999083791775, iteration: 104393
loss: 1.01447594165802,grad_norm: 0.9999992071945294, iteration: 104394
loss: 1.0156043767929077,grad_norm: 0.9999990162696565, iteration: 104395
loss: 0.9967910647392273,grad_norm: 0.9412348407573295, iteration: 104396
loss: 1.0061219930648804,grad_norm: 0.9999994167689438, iteration: 104397
loss: 0.9745923280715942,grad_norm: 0.9999992588918364, iteration: 104398
loss: 1.0085853338241577,grad_norm: 0.9999991012527157, iteration: 104399
loss: 1.0044803619384766,grad_norm: 0.9751560245624475, iteration: 104400
loss: 0.998657763004303,grad_norm: 0.8958124689313629, iteration: 104401
loss: 1.0171327590942383,grad_norm: 0.9999991068496097, iteration: 104402
loss: 1.0375334024429321,grad_norm: 0.9999989563697753, iteration: 104403
loss: 0.9814121723175049,grad_norm: 0.9999989923208014, iteration: 104404
loss: 1.0154571533203125,grad_norm: 0.9663578767381907, iteration: 104405
loss: 0.9887215495109558,grad_norm: 0.9999991489283374, iteration: 104406
loss: 0.9956029057502747,grad_norm: 0.9999992415255441, iteration: 104407
loss: 1.0177198648452759,grad_norm: 0.9999992802186024, iteration: 104408
loss: 1.0275593996047974,grad_norm: 0.9999991274047081, iteration: 104409
loss: 1.013444185256958,grad_norm: 0.8916574490793387, iteration: 104410
loss: 0.9721215963363647,grad_norm: 0.9999989973898655, iteration: 104411
loss: 1.0024077892303467,grad_norm: 0.9999988571171927, iteration: 104412
loss: 0.9988729953765869,grad_norm: 0.9542400002803955, iteration: 104413
loss: 1.0023934841156006,grad_norm: 0.9999991470024256, iteration: 104414
loss: 1.0263381004333496,grad_norm: 0.9999991492692515, iteration: 104415
loss: 1.0112981796264648,grad_norm: 0.9999991836354339, iteration: 104416
loss: 0.999846339225769,grad_norm: 0.9999991291374034, iteration: 104417
loss: 1.0097403526306152,grad_norm: 0.9999991020501335, iteration: 104418
loss: 0.9976755976676941,grad_norm: 0.9999996279844893, iteration: 104419
loss: 0.976675271987915,grad_norm: 0.9999994158300631, iteration: 104420
loss: 0.9865260124206543,grad_norm: 0.9575119174317789, iteration: 104421
loss: 0.9948577880859375,grad_norm: 0.9999991712339843, iteration: 104422
loss: 0.9901215434074402,grad_norm: 0.9999990675631972, iteration: 104423
loss: 0.9827454090118408,grad_norm: 0.9999991731827684, iteration: 104424
loss: 0.9854899644851685,grad_norm: 0.9111033671361665, iteration: 104425
loss: 0.9615681171417236,grad_norm: 0.9999991348533742, iteration: 104426
loss: 1.0072119235992432,grad_norm: 0.9346153290728431, iteration: 104427
loss: 0.9929724335670471,grad_norm: 0.9999991419065419, iteration: 104428
loss: 0.9824650287628174,grad_norm: 0.9999990740254509, iteration: 104429
loss: 0.9536154866218567,grad_norm: 0.9999992056039007, iteration: 104430
loss: 0.9970788955688477,grad_norm: 0.9743864771552762, iteration: 104431
loss: 0.976617693901062,grad_norm: 0.9999990174899775, iteration: 104432
loss: 0.9630628824234009,grad_norm: 0.9999991953103224, iteration: 104433
loss: 0.9878994226455688,grad_norm: 0.9999991135005893, iteration: 104434
loss: 1.017284870147705,grad_norm: 0.9999992205656013, iteration: 104435
loss: 0.9826908707618713,grad_norm: 0.9999989925264401, iteration: 104436
loss: 1.0259135961532593,grad_norm: 0.9697589244187225, iteration: 104437
loss: 1.0493558645248413,grad_norm: 0.9999996344701755, iteration: 104438
loss: 0.9943487644195557,grad_norm: 0.9470037564818482, iteration: 104439
loss: 1.043254017829895,grad_norm: 0.9999992384827036, iteration: 104440
loss: 0.9823637008666992,grad_norm: 0.9132235403943049, iteration: 104441
loss: 1.012182354927063,grad_norm: 0.9999991057624784, iteration: 104442
loss: 1.0141699314117432,grad_norm: 0.9999991526194606, iteration: 104443
loss: 1.0412040948867798,grad_norm: 0.9999991849943549, iteration: 104444
loss: 1.007102608680725,grad_norm: 0.999999172928161, iteration: 104445
loss: 1.0232044458389282,grad_norm: 0.9999991900443902, iteration: 104446
loss: 0.9986766576766968,grad_norm: 0.9908537452162771, iteration: 104447
loss: 1.0052399635314941,grad_norm: 0.9999992272691341, iteration: 104448
loss: 1.0260200500488281,grad_norm: 0.9590188403745716, iteration: 104449
loss: 1.0010906457901,grad_norm: 0.9999991505593091, iteration: 104450
loss: 0.9903542995452881,grad_norm: 0.9868889466237052, iteration: 104451
loss: 1.0511903762817383,grad_norm: 0.9999997732477064, iteration: 104452
loss: 1.0095882415771484,grad_norm: 0.999999840373967, iteration: 104453
loss: 0.9781348705291748,grad_norm: 0.9999991067493212, iteration: 104454
loss: 0.9878791570663452,grad_norm: 0.900549768791291, iteration: 104455
loss: 0.9951077699661255,grad_norm: 0.9311239338141375, iteration: 104456
loss: 0.9873462319374084,grad_norm: 0.9999992940878222, iteration: 104457
loss: 1.0367631912231445,grad_norm: 0.9999992262483522, iteration: 104458
loss: 0.9832173585891724,grad_norm: 0.9999991902924529, iteration: 104459
loss: 1.050819993019104,grad_norm: 0.9999991645249275, iteration: 104460
loss: 0.9675765633583069,grad_norm: 0.9999991519617437, iteration: 104461
loss: 0.9865559935569763,grad_norm: 0.907905289621176, iteration: 104462
loss: 0.9959047436714172,grad_norm: 0.9960532859561069, iteration: 104463
loss: 1.0260099172592163,grad_norm: 0.9665472891685031, iteration: 104464
loss: 0.9622353911399841,grad_norm: 0.9999992410120007, iteration: 104465
loss: 0.9920081496238708,grad_norm: 0.9999991707786434, iteration: 104466
loss: 0.9917234778404236,grad_norm: 0.9999991023586746, iteration: 104467
loss: 0.974575400352478,grad_norm: 0.9999990944648133, iteration: 104468
loss: 1.0552003383636475,grad_norm: 0.9999991542620725, iteration: 104469
loss: 1.0213992595672607,grad_norm: 0.8551920074195588, iteration: 104470
loss: 1.0034170150756836,grad_norm: 0.9999991540308, iteration: 104471
loss: 1.0026111602783203,grad_norm: 0.983907374374328, iteration: 104472
loss: 1.01786208152771,grad_norm: 0.9022106131200326, iteration: 104473
loss: 0.9814087152481079,grad_norm: 0.9999991689327637, iteration: 104474
loss: 1.0046205520629883,grad_norm: 0.9999992327349955, iteration: 104475
loss: 0.995022177696228,grad_norm: 0.9999989306647754, iteration: 104476
loss: 1.021889328956604,grad_norm: 0.9999998895250808, iteration: 104477
loss: 0.9827399849891663,grad_norm: 0.9999991776924907, iteration: 104478
loss: 1.0139193534851074,grad_norm: 0.9999992844961443, iteration: 104479
loss: 1.0270782709121704,grad_norm: 0.999999685280603, iteration: 104480
loss: 1.0245999097824097,grad_norm: 0.9999990918025843, iteration: 104481
loss: 1.203474521636963,grad_norm: 0.9999995574627236, iteration: 104482
loss: 0.9784689545631409,grad_norm: 0.999999113486465, iteration: 104483
loss: 0.9881895184516907,grad_norm: 0.871472419168361, iteration: 104484
loss: 0.9855787754058838,grad_norm: 0.9804702349772015, iteration: 104485
loss: 1.0150446891784668,grad_norm: 0.9999991237060155, iteration: 104486
loss: 0.9841724634170532,grad_norm: 0.9999991760330526, iteration: 104487
loss: 1.0044853687286377,grad_norm: 0.999999029030496, iteration: 104488
loss: 1.0101302862167358,grad_norm: 0.9714259596890761, iteration: 104489
loss: 0.9519355297088623,grad_norm: 0.9999991923243471, iteration: 104490
loss: 1.0145481824874878,grad_norm: 0.9999994304461762, iteration: 104491
loss: 0.9833275675773621,grad_norm: 0.9999992130851721, iteration: 104492
loss: 0.9854438900947571,grad_norm: 0.9226479010017417, iteration: 104493
loss: 1.004181981086731,grad_norm: 0.9938490889438717, iteration: 104494
loss: 0.9966335892677307,grad_norm: 0.9864544453389847, iteration: 104495
loss: 1.001295566558838,grad_norm: 0.9999991158132937, iteration: 104496
loss: 1.01503324508667,grad_norm: 0.9999991615907478, iteration: 104497
loss: 1.156034231185913,grad_norm: 0.9999992687606929, iteration: 104498
loss: 1.0073192119598389,grad_norm: 0.9999993317334177, iteration: 104499
loss: 1.0441968441009521,grad_norm: 0.9999998567449264, iteration: 104500
loss: 0.9767065644264221,grad_norm: 0.9999991458637144, iteration: 104501
loss: 1.0137683153152466,grad_norm: 0.9999992899288983, iteration: 104502
loss: 1.132409691810608,grad_norm: 0.9999992070057838, iteration: 104503
loss: 1.1902093887329102,grad_norm: 0.9999998658915032, iteration: 104504
loss: 1.0495977401733398,grad_norm: 0.9999995461412775, iteration: 104505
loss: 1.0303587913513184,grad_norm: 0.9999992549652577, iteration: 104506
loss: 0.998060941696167,grad_norm: 0.9999998504462766, iteration: 104507
loss: 1.3611310720443726,grad_norm: 0.9999998559977152, iteration: 104508
loss: 1.1778351068496704,grad_norm: 0.9999993710785543, iteration: 104509
loss: 1.2727471590042114,grad_norm: 0.9999999874900085, iteration: 104510
loss: 1.2772984504699707,grad_norm: 0.9999999724601245, iteration: 104511
loss: 1.1243292093276978,grad_norm: 0.9999999565115028, iteration: 104512
loss: 1.2642245292663574,grad_norm: 0.9999998001060559, iteration: 104513
loss: 1.075838565826416,grad_norm: 0.9999998661785465, iteration: 104514
loss: 1.2064659595489502,grad_norm: 0.999999479445465, iteration: 104515
loss: 1.021802544593811,grad_norm: 0.9999990946177416, iteration: 104516
loss: 1.0227094888687134,grad_norm: 0.9999991431677355, iteration: 104517
loss: 1.0977239608764648,grad_norm: 0.999999274994301, iteration: 104518
loss: 0.9814273118972778,grad_norm: 0.9999991281782263, iteration: 104519
loss: 1.1109932661056519,grad_norm: 0.9999997534590708, iteration: 104520
loss: 1.101998209953308,grad_norm: 0.999999174543688, iteration: 104521
loss: 1.021524429321289,grad_norm: 0.9999994884707561, iteration: 104522
loss: 1.0266187191009521,grad_norm: 0.9999993443781188, iteration: 104523
loss: 1.1219180822372437,grad_norm: 0.999999848208017, iteration: 104524
loss: 1.0413999557495117,grad_norm: 0.9999999787472327, iteration: 104525
loss: 1.000563621520996,grad_norm: 0.9937506783684252, iteration: 104526
loss: 1.0623141527175903,grad_norm: 0.999999261056566, iteration: 104527
loss: 1.0754272937774658,grad_norm: 0.9999994751582417, iteration: 104528
loss: 1.012067437171936,grad_norm: 0.9404974798529431, iteration: 104529
loss: 1.0804215669631958,grad_norm: 0.9999997962410708, iteration: 104530
loss: 1.0236896276474,grad_norm: 0.9999991377757366, iteration: 104531
loss: 1.031530499458313,grad_norm: 0.9999992148299466, iteration: 104532
loss: 1.0694578886032104,grad_norm: 0.9999992480384763, iteration: 104533
loss: 1.077824592590332,grad_norm: 0.9999994707470606, iteration: 104534
loss: 1.1436889171600342,grad_norm: 0.9999989537787829, iteration: 104535
loss: 1.024363398551941,grad_norm: 0.9999995370115896, iteration: 104536
loss: 1.0123069286346436,grad_norm: 0.9999991379010518, iteration: 104537
loss: 0.9835381507873535,grad_norm: 0.9999989336684109, iteration: 104538
loss: 1.005325198173523,grad_norm: 0.9999998976748884, iteration: 104539
loss: 0.9979032874107361,grad_norm: 0.9999991955054645, iteration: 104540
loss: 1.020730972290039,grad_norm: 0.9999991614390656, iteration: 104541
loss: 0.9969200491905212,grad_norm: 0.9999992484375018, iteration: 104542
loss: 1.147911787033081,grad_norm: 0.9999995529652589, iteration: 104543
loss: 1.000834584236145,grad_norm: 0.9695005635070485, iteration: 104544
loss: 1.0432041883468628,grad_norm: 0.9999990763211182, iteration: 104545
loss: 1.1547209024429321,grad_norm: 0.9999992161655744, iteration: 104546
loss: 1.0375285148620605,grad_norm: 0.9999991569486334, iteration: 104547
loss: 1.0287058353424072,grad_norm: 0.9999990781721878, iteration: 104548
loss: 1.0762733221054077,grad_norm: 0.9999997948959264, iteration: 104549
loss: 1.0359126329421997,grad_norm: 0.9999993479385914, iteration: 104550
loss: 1.0365281105041504,grad_norm: 0.9999991200397681, iteration: 104551
loss: 1.3525152206420898,grad_norm: 1.0000000207513267, iteration: 104552
loss: 1.0583257675170898,grad_norm: 0.9999995808874655, iteration: 104553
loss: 1.0251027345657349,grad_norm: 0.9945696449325534, iteration: 104554
loss: 1.0364878177642822,grad_norm: 0.9999998224117367, iteration: 104555
loss: 1.0088255405426025,grad_norm: 0.9999990869884319, iteration: 104556
loss: 1.0677446126937866,grad_norm: 1.0000000158421063, iteration: 104557
loss: 1.0340111255645752,grad_norm: 0.9999998316795503, iteration: 104558
loss: 1.000096321105957,grad_norm: 0.9941962448423496, iteration: 104559
loss: 1.0193920135498047,grad_norm: 0.9998146683929845, iteration: 104560
loss: 1.0196701288223267,grad_norm: 0.9999990754228438, iteration: 104561
loss: 1.0639753341674805,grad_norm: 0.9999991807673796, iteration: 104562
loss: 1.0111325979232788,grad_norm: 0.9999998823159751, iteration: 104563
loss: 0.9734131693840027,grad_norm: 0.9999990680647309, iteration: 104564
loss: 0.9761715531349182,grad_norm: 0.9999992404808753, iteration: 104565
loss: 1.0400574207305908,grad_norm: 0.9999994665831775, iteration: 104566
loss: 1.2011854648590088,grad_norm: 0.9999998000291139, iteration: 104567
loss: 1.026054859161377,grad_norm: 0.9999992135847926, iteration: 104568
loss: 1.0264521837234497,grad_norm: 0.9999994493020589, iteration: 104569
loss: 1.0195125341415405,grad_norm: 0.9999994256109744, iteration: 104570
loss: 0.9618943929672241,grad_norm: 0.9475958500781644, iteration: 104571
loss: 1.0358222723007202,grad_norm: 0.999999519872147, iteration: 104572
loss: 0.9932247400283813,grad_norm: 0.9999992450607729, iteration: 104573
loss: 1.071090579032898,grad_norm: 0.999999245856872, iteration: 104574
loss: 0.9933809638023376,grad_norm: 0.9542047537782657, iteration: 104575
loss: 1.1340337991714478,grad_norm: 0.9999991432883157, iteration: 104576
loss: 1.026033639907837,grad_norm: 0.9999991811451294, iteration: 104577
loss: 1.1074191331863403,grad_norm: 0.999999494114411, iteration: 104578
loss: 1.0786091089248657,grad_norm: 0.9999990563538537, iteration: 104579
loss: 1.0815176963806152,grad_norm: 0.9999994806245486, iteration: 104580
loss: 0.9924304485321045,grad_norm: 0.9613399404211547, iteration: 104581
loss: 0.978874921798706,grad_norm: 0.9164409645209126, iteration: 104582
loss: 1.0001592636108398,grad_norm: 0.9271105384947418, iteration: 104583
loss: 1.0026285648345947,grad_norm: 0.9999993050089071, iteration: 104584
loss: 1.032818078994751,grad_norm: 0.9116482879567656, iteration: 104585
loss: 1.0381193161010742,grad_norm: 0.999999160282208, iteration: 104586
loss: 0.9749210476875305,grad_norm: 0.9999992184980245, iteration: 104587
loss: 0.9869113564491272,grad_norm: 0.999999084649042, iteration: 104588
loss: 0.9971289038658142,grad_norm: 0.9999990922942118, iteration: 104589
loss: 1.0250591039657593,grad_norm: 0.9999992137434207, iteration: 104590
loss: 1.0306495428085327,grad_norm: 0.999999560212482, iteration: 104591
loss: 0.9847756028175354,grad_norm: 0.9999992594389618, iteration: 104592
loss: 0.984420120716095,grad_norm: 0.9742192149322738, iteration: 104593
loss: 1.0543731451034546,grad_norm: 0.9582103787379985, iteration: 104594
loss: 0.994594156742096,grad_norm: 0.9999991025731074, iteration: 104595
loss: 1.0745548009872437,grad_norm: 0.9999992933942596, iteration: 104596
loss: 0.9733372330665588,grad_norm: 0.9999989143927711, iteration: 104597
loss: 0.9721806049346924,grad_norm: 0.9463477497886849, iteration: 104598
loss: 1.0014846324920654,grad_norm: 0.9999990955134275, iteration: 104599
loss: 0.972862958908081,grad_norm: 0.9481710405167015, iteration: 104600
loss: 0.9837540984153748,grad_norm: 0.9999992207678478, iteration: 104601
loss: 0.9636035561561584,grad_norm: 0.9999989546999221, iteration: 104602
loss: 0.9905484914779663,grad_norm: 0.9999990684757922, iteration: 104603
loss: 0.9975549578666687,grad_norm: 0.993795003807795, iteration: 104604
loss: 1.0134272575378418,grad_norm: 0.9999990395608375, iteration: 104605
loss: 1.0049175024032593,grad_norm: 0.9999991320879844, iteration: 104606
loss: 0.9872218370437622,grad_norm: 0.9999996469836279, iteration: 104607
loss: 1.0403800010681152,grad_norm: 0.9999988724341445, iteration: 104608
loss: 0.999654233455658,grad_norm: 0.9999995413787481, iteration: 104609
loss: 0.9961135983467102,grad_norm: 0.9999992428311065, iteration: 104610
loss: 1.0151652097702026,grad_norm: 0.9999990080757138, iteration: 104611
loss: 1.109790563583374,grad_norm: 0.9999990696542985, iteration: 104612
loss: 1.0021672248840332,grad_norm: 0.9999990918116198, iteration: 104613
loss: 1.0165812969207764,grad_norm: 0.972203511423179, iteration: 104614
loss: 1.0087413787841797,grad_norm: 0.9999991391394386, iteration: 104615
loss: 0.9970443844795227,grad_norm: 0.9999990206491425, iteration: 104616
loss: 1.0191736221313477,grad_norm: 0.975608320654209, iteration: 104617
loss: 1.0227856636047363,grad_norm: 0.9710759932011589, iteration: 104618
loss: 1.1536482572555542,grad_norm: 0.999999749331628, iteration: 104619
loss: 0.9934204816818237,grad_norm: 0.9692981365988694, iteration: 104620
loss: 0.9987926483154297,grad_norm: 0.9999990081407925, iteration: 104621
loss: 1.0176316499710083,grad_norm: 0.8770687754686965, iteration: 104622
loss: 1.0214956998825073,grad_norm: 0.9723996550198041, iteration: 104623
loss: 1.0064661502838135,grad_norm: 0.9999991152492427, iteration: 104624
loss: 1.0502986907958984,grad_norm: 0.9999996888229399, iteration: 104625
loss: 1.000953197479248,grad_norm: 0.9999991132620083, iteration: 104626
loss: 1.040543556213379,grad_norm: 0.9999996344956402, iteration: 104627
loss: 1.0213686227798462,grad_norm: 0.9826791573186567, iteration: 104628
loss: 1.0290322303771973,grad_norm: 0.9999992071598394, iteration: 104629
loss: 1.0172206163406372,grad_norm: 0.9999991619575319, iteration: 104630
loss: 1.0167784690856934,grad_norm: 0.9999990897539885, iteration: 104631
loss: 1.0459883213043213,grad_norm: 0.9999991822615488, iteration: 104632
loss: 1.0519593954086304,grad_norm: 0.9999995375388874, iteration: 104633
loss: 0.9909955263137817,grad_norm: 0.9246555769926244, iteration: 104634
loss: 1.00027334690094,grad_norm: 0.9682779182411605, iteration: 104635
loss: 1.017261028289795,grad_norm: 0.9999997378138782, iteration: 104636
loss: 1.0058355331420898,grad_norm: 0.9331122257196166, iteration: 104637
loss: 1.0164453983306885,grad_norm: 0.9950906004367459, iteration: 104638
loss: 1.002310037612915,grad_norm: 0.9999989349343807, iteration: 104639
loss: 0.984178364276886,grad_norm: 0.9967047443491384, iteration: 104640
loss: 1.0410960912704468,grad_norm: 0.9999992845320949, iteration: 104641
loss: 1.009409785270691,grad_norm: 0.9999991974732537, iteration: 104642
loss: 0.9859784841537476,grad_norm: 0.9999991465349705, iteration: 104643
loss: 1.0096375942230225,grad_norm: 0.9537104669244582, iteration: 104644
loss: 1.026004672050476,grad_norm: 0.999999174550859, iteration: 104645
loss: 0.9894406199455261,grad_norm: 0.999999029316639, iteration: 104646
loss: 0.986135721206665,grad_norm: 0.9999994311532152, iteration: 104647
loss: 1.2973631620407104,grad_norm: 0.9999996601204395, iteration: 104648
loss: 1.0208245515823364,grad_norm: 0.9999992033457019, iteration: 104649
loss: 1.0382180213928223,grad_norm: 0.9999992489430177, iteration: 104650
loss: 1.0075243711471558,grad_norm: 0.9999991975964183, iteration: 104651
loss: 1.0485813617706299,grad_norm: 1.0000000464790337, iteration: 104652
loss: 0.988548994064331,grad_norm: 0.9999992005236666, iteration: 104653
loss: 0.9684914946556091,grad_norm: 0.9999992483776238, iteration: 104654
loss: 0.9632824659347534,grad_norm: 0.9412806264263569, iteration: 104655
loss: 1.0067856311798096,grad_norm: 0.9999990709025394, iteration: 104656
loss: 1.0323548316955566,grad_norm: 0.9999990394692951, iteration: 104657
loss: 1.020404577255249,grad_norm: 0.914534598301907, iteration: 104658
loss: 0.996199905872345,grad_norm: 0.9999992205355067, iteration: 104659
loss: 1.03079354763031,grad_norm: 0.9999993145692471, iteration: 104660
loss: 1.0238689184188843,grad_norm: 0.8864214160426332, iteration: 104661
loss: 0.9857380986213684,grad_norm: 0.9089733047863491, iteration: 104662
loss: 1.032320261001587,grad_norm: 0.9927851309366665, iteration: 104663
loss: 1.00240159034729,grad_norm: 0.9190080971511686, iteration: 104664
loss: 1.0717499256134033,grad_norm: 0.9999990743013686, iteration: 104665
loss: 1.0044161081314087,grad_norm: 0.9287285888444154, iteration: 104666
loss: 1.0110716819763184,grad_norm: 0.9999992428631371, iteration: 104667
loss: 1.0013628005981445,grad_norm: 0.9999992802762694, iteration: 104668
loss: 0.9759202003479004,grad_norm: 0.9999990766549472, iteration: 104669
loss: 1.0113049745559692,grad_norm: 0.8808190316342485, iteration: 104670
loss: 1.005312204360962,grad_norm: 0.9445240614105989, iteration: 104671
loss: 0.9904810190200806,grad_norm: 0.9999991185892757, iteration: 104672
loss: 1.0717902183532715,grad_norm: 0.9999990422419817, iteration: 104673
loss: 0.9889602661132812,grad_norm: 0.9999991161545354, iteration: 104674
loss: 1.0090323686599731,grad_norm: 0.9999991086939463, iteration: 104675
loss: 1.0296809673309326,grad_norm: 0.9999994645165705, iteration: 104676
loss: 0.9712466597557068,grad_norm: 0.9999991240407428, iteration: 104677
loss: 0.9555140733718872,grad_norm: 0.9999997252047679, iteration: 104678
loss: 1.0120645761489868,grad_norm: 0.9999992148985971, iteration: 104679
loss: 0.9855810403823853,grad_norm: 0.9999989785148029, iteration: 104680
loss: 0.9989078044891357,grad_norm: 0.9999990790934904, iteration: 104681
loss: 1.0273641347885132,grad_norm: 0.9999993942448667, iteration: 104682
loss: 1.0747156143188477,grad_norm: 0.9999995097496418, iteration: 104683
loss: 0.9719160199165344,grad_norm: 0.9999991279367935, iteration: 104684
loss: 0.9781343340873718,grad_norm: 0.9999992261509055, iteration: 104685
loss: 0.9837173819541931,grad_norm: 0.9287121289054118, iteration: 104686
loss: 0.9907659888267517,grad_norm: 0.9999992303926085, iteration: 104687
loss: 1.0512089729309082,grad_norm: 0.9999990642204968, iteration: 104688
loss: 0.9946295022964478,grad_norm: 0.9999992698006991, iteration: 104689
loss: 0.9717883467674255,grad_norm: 0.9999990404759852, iteration: 104690
loss: 1.0296050310134888,grad_norm: 0.9999994151777557, iteration: 104691
loss: 0.9842548966407776,grad_norm: 0.9999990754019255, iteration: 104692
loss: 1.018518090248108,grad_norm: 0.9999990480341233, iteration: 104693
loss: 0.9728770852088928,grad_norm: 0.9456045824317826, iteration: 104694
loss: 0.9836934208869934,grad_norm: 0.9999990950062532, iteration: 104695
loss: 1.0076167583465576,grad_norm: 0.9999995185336957, iteration: 104696
loss: 0.9771609306335449,grad_norm: 0.999999065959762, iteration: 104697
loss: 0.9845077395439148,grad_norm: 0.9999992338968303, iteration: 104698
loss: 1.0079705715179443,grad_norm: 0.9903922157850715, iteration: 104699
loss: 1.0068342685699463,grad_norm: 0.9999993270386061, iteration: 104700
loss: 1.016530990600586,grad_norm: 0.9999992643142556, iteration: 104701
loss: 0.9970484972000122,grad_norm: 0.9999992232201927, iteration: 104702
loss: 1.0166536569595337,grad_norm: 0.9999993886405427, iteration: 104703
loss: 0.9985032081604004,grad_norm: 0.999999219102852, iteration: 104704
loss: 0.9745668768882751,grad_norm: 0.9999991603087528, iteration: 104705
loss: 1.0269217491149902,grad_norm: 0.9999996458919854, iteration: 104706
loss: 1.0686602592468262,grad_norm: 0.9999996426311036, iteration: 104707
loss: 1.004858136177063,grad_norm: 0.9567974718105304, iteration: 104708
loss: 0.9977017641067505,grad_norm: 0.9999991320142841, iteration: 104709
loss: 1.0805468559265137,grad_norm: 0.8229482888016953, iteration: 104710
loss: 0.9765077233314514,grad_norm: 0.9999991208694182, iteration: 104711
loss: 0.9830135703086853,grad_norm: 0.9999992670971519, iteration: 104712
loss: 1.0418474674224854,grad_norm: 0.9999999092368129, iteration: 104713
loss: 0.9997171759605408,grad_norm: 0.9970059406857091, iteration: 104714
loss: 1.016321063041687,grad_norm: 0.9999991700630524, iteration: 104715
loss: 1.0093384981155396,grad_norm: 0.9333624086912495, iteration: 104716
loss: 1.0162763595581055,grad_norm: 0.9999990476333741, iteration: 104717
loss: 0.9999774694442749,grad_norm: 0.9728835538002846, iteration: 104718
loss: 1.0142720937728882,grad_norm: 0.999999223328405, iteration: 104719
loss: 1.0048799514770508,grad_norm: 0.9999993996551431, iteration: 104720
loss: 1.0048110485076904,grad_norm: 0.983312462841552, iteration: 104721
loss: 1.0137205123901367,grad_norm: 0.9999992918643008, iteration: 104722
loss: 1.0380967855453491,grad_norm: 0.9632440964337394, iteration: 104723
loss: 1.0263296365737915,grad_norm: 0.9999990920669191, iteration: 104724
loss: 0.969639241695404,grad_norm: 0.8918046968961825, iteration: 104725
loss: 0.9644514322280884,grad_norm: 0.9871266655889134, iteration: 104726
loss: 1.0156430006027222,grad_norm: 0.999999625916192, iteration: 104727
loss: 1.002077579498291,grad_norm: 0.9999994483585812, iteration: 104728
loss: 1.0125553607940674,grad_norm: 0.9999998241028519, iteration: 104729
loss: 1.0126818418502808,grad_norm: 0.9049036225167904, iteration: 104730
loss: 1.0223586559295654,grad_norm: 0.9421684524190027, iteration: 104731
loss: 0.9943216443061829,grad_norm: 0.9933230751639691, iteration: 104732
loss: 0.9921954274177551,grad_norm: 0.9999993437708751, iteration: 104733
loss: 1.026138424873352,grad_norm: 0.9999993986335913, iteration: 104734
loss: 0.9986113905906677,grad_norm: 0.9999993973123016, iteration: 104735
loss: 0.9848078489303589,grad_norm: 0.9058464374616882, iteration: 104736
loss: 0.9546217918395996,grad_norm: 0.9999991461603486, iteration: 104737
loss: 1.0004162788391113,grad_norm: 0.9999994006280191, iteration: 104738
loss: 0.9681084752082825,grad_norm: 0.9999994196836572, iteration: 104739
loss: 1.0012776851654053,grad_norm: 0.9999991814328387, iteration: 104740
loss: 1.0223904848098755,grad_norm: 0.9999992179528051, iteration: 104741
loss: 0.9592868685722351,grad_norm: 0.999999114797715, iteration: 104742
loss: 0.9941482543945312,grad_norm: 0.9999990659333601, iteration: 104743
loss: 1.0210529565811157,grad_norm: 0.9999990952113617, iteration: 104744
loss: 1.0010920763015747,grad_norm: 0.8968942310576407, iteration: 104745
loss: 0.9797157049179077,grad_norm: 0.9999991411303861, iteration: 104746
loss: 0.9764143824577332,grad_norm: 0.946465128945403, iteration: 104747
loss: 0.9856181144714355,grad_norm: 0.874838074606061, iteration: 104748
loss: 0.9748607277870178,grad_norm: 0.9999990025025418, iteration: 104749
loss: 1.032564640045166,grad_norm: 0.9999992474669194, iteration: 104750
loss: 0.9867584705352783,grad_norm: 0.93089791374307, iteration: 104751
loss: 0.9396349787712097,grad_norm: 0.9894588368959122, iteration: 104752
loss: 0.9727928042411804,grad_norm: 0.9999991749969113, iteration: 104753
loss: 1.0194207429885864,grad_norm: 0.9999990031780812, iteration: 104754
loss: 0.9678372144699097,grad_norm: 0.9726433071195402, iteration: 104755
loss: 1.0221657752990723,grad_norm: 0.9500564379252646, iteration: 104756
loss: 1.0214481353759766,grad_norm: 0.999998966074509, iteration: 104757
loss: 1.0284901857376099,grad_norm: 0.9999991767029083, iteration: 104758
loss: 0.9870069622993469,grad_norm: 0.8264279132882348, iteration: 104759
loss: 1.125461459159851,grad_norm: 0.9999991070878801, iteration: 104760
loss: 1.0013023614883423,grad_norm: 0.846611220017174, iteration: 104761
loss: 1.0228497982025146,grad_norm: 0.9999990646795911, iteration: 104762
loss: 1.0853129625320435,grad_norm: 0.9999998596418237, iteration: 104763
loss: 1.022752046585083,grad_norm: 0.9999991174863191, iteration: 104764
loss: 1.0141098499298096,grad_norm: 0.9999993677521607, iteration: 104765
loss: 1.037203311920166,grad_norm: 0.9416929579890397, iteration: 104766
loss: 1.0234140157699585,grad_norm: 0.9999992022149811, iteration: 104767
loss: 0.9577073454856873,grad_norm: 0.9999991032490065, iteration: 104768
loss: 1.004878044128418,grad_norm: 0.922207575652875, iteration: 104769
loss: 1.0006569623947144,grad_norm: 0.9932465394747485, iteration: 104770
loss: 1.0059900283813477,grad_norm: 0.9999991745824365, iteration: 104771
loss: 1.0106887817382812,grad_norm: 0.9565667136061258, iteration: 104772
loss: 1.0233019590377808,grad_norm: 0.999999487285151, iteration: 104773
loss: 0.9934589266777039,grad_norm: 0.9999991311051848, iteration: 104774
loss: 0.941620409488678,grad_norm: 0.9999991999931042, iteration: 104775
loss: 0.9970704317092896,grad_norm: 0.9683608147050562, iteration: 104776
loss: 1.0318723917007446,grad_norm: 0.999999275664293, iteration: 104777
loss: 1.0753388404846191,grad_norm: 0.9999991988077964, iteration: 104778
loss: 1.0189543962478638,grad_norm: 0.9999991279634963, iteration: 104779
loss: 1.0149521827697754,grad_norm: 0.9999991578024696, iteration: 104780
loss: 0.9958589673042297,grad_norm: 0.9999990612814212, iteration: 104781
loss: 0.9757077097892761,grad_norm: 0.9999992122245187, iteration: 104782
loss: 0.9940614700317383,grad_norm: 0.9999991487805269, iteration: 104783
loss: 1.0729469060897827,grad_norm: 0.9999991618285109, iteration: 104784
loss: 1.0194028615951538,grad_norm: 0.9999991705595612, iteration: 104785
loss: 0.9759156107902527,grad_norm: 0.9999993446081873, iteration: 104786
loss: 1.016542673110962,grad_norm: 0.9999990780446402, iteration: 104787
loss: 0.998028039932251,grad_norm: 0.9740788518976273, iteration: 104788
loss: 1.017197608947754,grad_norm: 0.9999993021389475, iteration: 104789
loss: 0.9988937377929688,grad_norm: 0.9999990998843887, iteration: 104790
loss: 0.9775453209877014,grad_norm: 0.9999991580551806, iteration: 104791
loss: 1.0070594549179077,grad_norm: 0.9999995857690358, iteration: 104792
loss: 1.0471484661102295,grad_norm: 0.9999995186421969, iteration: 104793
loss: 1.0139449834823608,grad_norm: 0.9999990250739706, iteration: 104794
loss: 0.9760333299636841,grad_norm: 0.9999990000693613, iteration: 104795
loss: 1.003411054611206,grad_norm: 0.9999991295605482, iteration: 104796
loss: 1.0197466611862183,grad_norm: 0.9999990931651869, iteration: 104797
loss: 1.0113474130630493,grad_norm: 0.9497305132951005, iteration: 104798
loss: 0.9931302666664124,grad_norm: 0.9999990855015708, iteration: 104799
loss: 1.010536789894104,grad_norm: 0.9999991592377054, iteration: 104800
loss: 1.1210663318634033,grad_norm: 0.9999993236470345, iteration: 104801
loss: 0.9980993866920471,grad_norm: 0.9254472847396025, iteration: 104802
loss: 0.9732841849327087,grad_norm: 0.9999989565017355, iteration: 104803
loss: 0.9949461221694946,grad_norm: 0.9999990901611071, iteration: 104804
loss: 0.9870882630348206,grad_norm: 0.9999990663104069, iteration: 104805
loss: 1.012147307395935,grad_norm: 0.9852987718346378, iteration: 104806
loss: 0.9886308312416077,grad_norm: 0.966682880273954, iteration: 104807
loss: 1.0063130855560303,grad_norm: 0.999999111868603, iteration: 104808
loss: 1.0412442684173584,grad_norm: 0.898321121518496, iteration: 104809
loss: 0.9643001556396484,grad_norm: 0.9999987715175556, iteration: 104810
loss: 0.9530057907104492,grad_norm: 0.9999992120207734, iteration: 104811
loss: 0.9636626243591309,grad_norm: 0.9999991272209592, iteration: 104812
loss: 0.9955921769142151,grad_norm: 0.995825660932976, iteration: 104813
loss: 0.9813128709793091,grad_norm: 0.9369638437262364, iteration: 104814
loss: 0.9645018577575684,grad_norm: 0.9999990173515838, iteration: 104815
loss: 0.9988289475440979,grad_norm: 0.9887528233462837, iteration: 104816
loss: 1.0089777708053589,grad_norm: 0.9999992152562959, iteration: 104817
loss: 1.0533087253570557,grad_norm: 0.9999998062756554, iteration: 104818
loss: 1.0013962984085083,grad_norm: 0.9999991445388778, iteration: 104819
loss: 0.9853653311729431,grad_norm: 0.9999991451252139, iteration: 104820
loss: 1.0215940475463867,grad_norm: 0.9999990767536512, iteration: 104821
loss: 0.9540413618087769,grad_norm: 0.9750546183087565, iteration: 104822
loss: 1.0276789665222168,grad_norm: 0.9999994545628749, iteration: 104823
loss: 1.0021414756774902,grad_norm: 0.9936620575755151, iteration: 104824
loss: 0.9942149519920349,grad_norm: 0.9999991618660139, iteration: 104825
loss: 0.9838589429855347,grad_norm: 0.9270330902364008, iteration: 104826
loss: 1.038596510887146,grad_norm: 0.9999991697181325, iteration: 104827
loss: 1.001715064048767,grad_norm: 0.9999993110475816, iteration: 104828
loss: 1.0165594816207886,grad_norm: 0.9297595860089147, iteration: 104829
loss: 0.9849794507026672,grad_norm: 0.9999996831742427, iteration: 104830
loss: 1.0083271265029907,grad_norm: 0.9999991430600692, iteration: 104831
loss: 1.0931270122528076,grad_norm: 0.9999991642408841, iteration: 104832
loss: 1.0566656589508057,grad_norm: 0.9999991722963365, iteration: 104833
loss: 1.0128360986709595,grad_norm: 0.9737698882647646, iteration: 104834
loss: 1.0061028003692627,grad_norm: 0.8265089475574898, iteration: 104835
loss: 0.9684628248214722,grad_norm: 0.9999990990906942, iteration: 104836
loss: 1.020788550376892,grad_norm: 0.9999991941067179, iteration: 104837
loss: 1.010909914970398,grad_norm: 0.9999993006408862, iteration: 104838
loss: 0.977275013923645,grad_norm: 0.9785488437020229, iteration: 104839
loss: 0.9830700159072876,grad_norm: 0.9698828298082689, iteration: 104840
loss: 0.9906888604164124,grad_norm: 0.9999991244974608, iteration: 104841
loss: 1.0234483480453491,grad_norm: 0.9345097404549324, iteration: 104842
loss: 0.9572699069976807,grad_norm: 0.7445330534879688, iteration: 104843
loss: 1.0086095333099365,grad_norm: 0.9999992243254336, iteration: 104844
loss: 0.9734200835227966,grad_norm: 0.9999991505506832, iteration: 104845
loss: 1.014014720916748,grad_norm: 0.911078178241282, iteration: 104846
loss: 0.9937347173690796,grad_norm: 0.9139870505381319, iteration: 104847
loss: 1.0175371170043945,grad_norm: 0.9999991538414297, iteration: 104848
loss: 1.012302041053772,grad_norm: 0.9999990513504056, iteration: 104849
loss: 1.0284215211868286,grad_norm: 0.9999996503871904, iteration: 104850
loss: 0.9981141090393066,grad_norm: 0.9999991693569192, iteration: 104851
loss: 0.9937608242034912,grad_norm: 0.8979357941332199, iteration: 104852
loss: 0.9612725973129272,grad_norm: 0.9999992073976459, iteration: 104853
loss: 0.981812059879303,grad_norm: 0.9999990683645946, iteration: 104854
loss: 1.0171335935592651,grad_norm: 0.9999992027191733, iteration: 104855
loss: 0.9810596108436584,grad_norm: 0.9999992126756708, iteration: 104856
loss: 0.9899025559425354,grad_norm: 0.9999992706090693, iteration: 104857
loss: 0.9914495944976807,grad_norm: 0.99958303396546, iteration: 104858
loss: 0.9673774242401123,grad_norm: 0.9398743225565026, iteration: 104859
loss: 0.9814766645431519,grad_norm: 0.9999991929307138, iteration: 104860
loss: 0.9950211644172668,grad_norm: 0.8900808405839004, iteration: 104861
loss: 0.993791401386261,grad_norm: 0.9965579913512107, iteration: 104862
loss: 1.0001111030578613,grad_norm: 0.9952066123529394, iteration: 104863
loss: 0.994422972202301,grad_norm: 0.964912787261246, iteration: 104864
loss: 0.9941504001617432,grad_norm: 0.9999990278241512, iteration: 104865
loss: 1.010574221611023,grad_norm: 0.9999996541416233, iteration: 104866
loss: 0.9815658926963806,grad_norm: 0.8982184413897215, iteration: 104867
loss: 0.9379696846008301,grad_norm: 0.999999268760665, iteration: 104868
loss: 0.9896135926246643,grad_norm: 0.9999991925705931, iteration: 104869
loss: 1.0207512378692627,grad_norm: 0.9772110193163417, iteration: 104870
loss: 1.002427101135254,grad_norm: 0.9999990802229841, iteration: 104871
loss: 0.9897782206535339,grad_norm: 0.9824497505177302, iteration: 104872
loss: 0.9956520199775696,grad_norm: 0.9999996665740561, iteration: 104873
loss: 1.0209509134292603,grad_norm: 0.9999990180002974, iteration: 104874
loss: 1.0667942762374878,grad_norm: 0.9999995343651911, iteration: 104875
loss: 1.0226479768753052,grad_norm: 0.9999990564106048, iteration: 104876
loss: 1.134476661682129,grad_norm: 0.9128434720054707, iteration: 104877
loss: 1.0032016038894653,grad_norm: 0.9999995819502991, iteration: 104878
loss: 1.0566552877426147,grad_norm: 0.999999029982816, iteration: 104879
loss: 0.9685672521591187,grad_norm: 0.9999990988318636, iteration: 104880
loss: 1.0318174362182617,grad_norm: 0.9999991725152507, iteration: 104881
loss: 1.113656759262085,grad_norm: 0.9999992460781534, iteration: 104882
loss: 1.0204548835754395,grad_norm: 0.9999992034567764, iteration: 104883
loss: 0.9924368262290955,grad_norm: 0.9999991520836611, iteration: 104884
loss: 0.9712982773780823,grad_norm: 0.999998919525513, iteration: 104885
loss: 1.0015205144882202,grad_norm: 0.9999990403473538, iteration: 104886
loss: 1.0155032873153687,grad_norm: 0.9147091089695663, iteration: 104887
loss: 0.9915208220481873,grad_norm: 0.8821391986274683, iteration: 104888
loss: 1.0460773706436157,grad_norm: 0.9999992790799959, iteration: 104889
loss: 0.9907236099243164,grad_norm: 0.9999995011380644, iteration: 104890
loss: 1.0271793603897095,grad_norm: 0.954763251016113, iteration: 104891
loss: 0.9698305130004883,grad_norm: 0.905798187203691, iteration: 104892
loss: 1.0268583297729492,grad_norm: 0.9999993089671013, iteration: 104893
loss: 1.1148974895477295,grad_norm: 0.9999993173827436, iteration: 104894
loss: 1.021783471107483,grad_norm: 0.9999991832938521, iteration: 104895
loss: 1.0828765630722046,grad_norm: 0.9999992592094064, iteration: 104896
loss: 1.0433133840560913,grad_norm: 0.9999992393559106, iteration: 104897
loss: 0.9723579287528992,grad_norm: 0.9172771877752774, iteration: 104898
loss: 0.9851179122924805,grad_norm: 0.999999168090469, iteration: 104899
loss: 1.0183888673782349,grad_norm: 0.9999989963127849, iteration: 104900
loss: 1.0281481742858887,grad_norm: 0.9999991773352132, iteration: 104901
loss: 1.022849440574646,grad_norm: 0.9999992336006508, iteration: 104902
loss: 0.9816153645515442,grad_norm: 0.918619513703886, iteration: 104903
loss: 0.9775827527046204,grad_norm: 0.9999992260102094, iteration: 104904
loss: 0.991357684135437,grad_norm: 0.9999990495378204, iteration: 104905
loss: 1.0108006000518799,grad_norm: 0.8800872583228223, iteration: 104906
loss: 0.9958484172821045,grad_norm: 0.999999144699528, iteration: 104907
loss: 0.9863942265510559,grad_norm: 0.9999989560515528, iteration: 104908
loss: 0.9901193380355835,grad_norm: 0.9999990000291166, iteration: 104909
loss: 0.9708830118179321,grad_norm: 0.9681777332142789, iteration: 104910
loss: 1.0047738552093506,grad_norm: 0.9999990079618929, iteration: 104911
loss: 1.0212798118591309,grad_norm: 0.9999992253503811, iteration: 104912
loss: 0.9858242273330688,grad_norm: 0.9839235116309192, iteration: 104913
loss: 0.9920560121536255,grad_norm: 0.9881656616037733, iteration: 104914
loss: 0.990843653678894,grad_norm: 0.9525304750548568, iteration: 104915
loss: 1.0373417139053345,grad_norm: 0.999999000089114, iteration: 104916
loss: 1.0165188312530518,grad_norm: 0.9999990108785021, iteration: 104917
loss: 1.0005556344985962,grad_norm: 0.8552110711751902, iteration: 104918
loss: 1.0170246362686157,grad_norm: 0.9999989609645111, iteration: 104919
loss: 1.027073621749878,grad_norm: 0.8678270062163359, iteration: 104920
loss: 1.0152355432510376,grad_norm: 0.9661586380860566, iteration: 104921
loss: 1.0139795541763306,grad_norm: 0.999999082593361, iteration: 104922
loss: 0.9790289998054504,grad_norm: 0.9999991509517917, iteration: 104923
loss: 1.00728178024292,grad_norm: 0.9999991692188472, iteration: 104924
loss: 1.0234882831573486,grad_norm: 0.9999990269912942, iteration: 104925
loss: 1.004549503326416,grad_norm: 0.9999989476461447, iteration: 104926
loss: 1.0655580759048462,grad_norm: 0.999999574648478, iteration: 104927
loss: 1.0209468603134155,grad_norm: 0.999999149889977, iteration: 104928
loss: 1.0181732177734375,grad_norm: 0.9235164297941606, iteration: 104929
loss: 1.1636427640914917,grad_norm: 0.9999999781398846, iteration: 104930
loss: 0.9929273724555969,grad_norm: 0.9999990653053318, iteration: 104931
loss: 0.984269917011261,grad_norm: 0.999998950030754, iteration: 104932
loss: 0.9906665086746216,grad_norm: 0.9999991027316206, iteration: 104933
loss: 0.9725026488304138,grad_norm: 0.9999991913357145, iteration: 104934
loss: 0.9948899745941162,grad_norm: 0.9918131195660541, iteration: 104935
loss: 1.053370714187622,grad_norm: 0.9999991957499125, iteration: 104936
loss: 0.9555068016052246,grad_norm: 0.9115863432853216, iteration: 104937
loss: 1.024965524673462,grad_norm: 0.9999992546666836, iteration: 104938
loss: 1.0379765033721924,grad_norm: 0.9639354970806213, iteration: 104939
loss: 0.9918453097343445,grad_norm: 0.9999991591677874, iteration: 104940
loss: 1.0203757286071777,grad_norm: 0.96650755311564, iteration: 104941
loss: 0.9742411971092224,grad_norm: 0.9999992537149567, iteration: 104942
loss: 0.9731972217559814,grad_norm: 0.9999993697192264, iteration: 104943
loss: 1.1914286613464355,grad_norm: 0.9999990282974762, iteration: 104944
loss: 1.0047687292099,grad_norm: 0.9999991943861299, iteration: 104945
loss: 0.9845879077911377,grad_norm: 0.9999990764451964, iteration: 104946
loss: 1.0108808279037476,grad_norm: 0.9999991724583178, iteration: 104947
loss: 1.0073895454406738,grad_norm: 0.9745137894864172, iteration: 104948
loss: 0.9822564125061035,grad_norm: 0.9526143746403218, iteration: 104949
loss: 1.0272839069366455,grad_norm: 0.977107140068234, iteration: 104950
loss: 1.0100706815719604,grad_norm: 0.9999992076300257, iteration: 104951
loss: 0.9879505038261414,grad_norm: 0.9549955682701402, iteration: 104952
loss: 1.0279443264007568,grad_norm: 0.9999990654840235, iteration: 104953
loss: 0.9571179747581482,grad_norm: 0.9999991539864658, iteration: 104954
loss: 1.0049060583114624,grad_norm: 0.9811032402471799, iteration: 104955
loss: 1.003255844116211,grad_norm: 0.999999191629753, iteration: 104956
loss: 1.0175758600234985,grad_norm: 0.9218156778478668, iteration: 104957
loss: 1.0098133087158203,grad_norm: 0.9999993516940852, iteration: 104958
loss: 1.012833595275879,grad_norm: 0.9777366628353087, iteration: 104959
loss: 0.9948408007621765,grad_norm: 0.9999991838873105, iteration: 104960
loss: 1.0222638845443726,grad_norm: 0.9999991382981237, iteration: 104961
loss: 1.0241152048110962,grad_norm: 0.9999991999031806, iteration: 104962
loss: 0.9965941309928894,grad_norm: 0.8717806203419922, iteration: 104963
loss: 0.9999046921730042,grad_norm: 0.9999993580594374, iteration: 104964
loss: 0.9520329236984253,grad_norm: 0.9999990519383719, iteration: 104965
loss: 0.9976372122764587,grad_norm: 0.94122959910169, iteration: 104966
loss: 0.9950724244117737,grad_norm: 0.9468548648449849, iteration: 104967
loss: 0.9978811144828796,grad_norm: 0.9999989208391176, iteration: 104968
loss: 0.999638557434082,grad_norm: 0.8776047928124009, iteration: 104969
loss: 0.9863471984863281,grad_norm: 0.9999991017144737, iteration: 104970
loss: 0.9822984933853149,grad_norm: 0.9999992457335842, iteration: 104971
loss: 0.9807407259941101,grad_norm: 0.9999991892313864, iteration: 104972
loss: 1.0551260709762573,grad_norm: 0.9999994440672411, iteration: 104973
loss: 0.963513195514679,grad_norm: 0.9289637833155525, iteration: 104974
loss: 1.0077953338623047,grad_norm: 0.8417740520915215, iteration: 104975
loss: 0.9858324527740479,grad_norm: 0.999999080703952, iteration: 104976
loss: 0.9419257044792175,grad_norm: 0.9999990431487401, iteration: 104977
loss: 1.0030219554901123,grad_norm: 0.9824902878838577, iteration: 104978
loss: 1.076511025428772,grad_norm: 0.9999998534015947, iteration: 104979
loss: 1.010154128074646,grad_norm: 0.999999336384659, iteration: 104980
loss: 1.0117862224578857,grad_norm: 0.9985918723488522, iteration: 104981
loss: 1.0032718181610107,grad_norm: 0.9999990561339476, iteration: 104982
loss: 1.0099109411239624,grad_norm: 0.9999995266008499, iteration: 104983
loss: 0.973038375377655,grad_norm: 0.9077271190003203, iteration: 104984
loss: 0.9548859596252441,grad_norm: 0.9999991356436553, iteration: 104985
loss: 1.007253885269165,grad_norm: 0.920863060524822, iteration: 104986
loss: 1.046156644821167,grad_norm: 0.999999726312823, iteration: 104987
loss: 0.9625653028488159,grad_norm: 0.985085660439622, iteration: 104988
loss: 1.0174355506896973,grad_norm: 0.9999991638165664, iteration: 104989
loss: 0.9996741414070129,grad_norm: 0.8837340479003057, iteration: 104990
loss: 1.0190097093582153,grad_norm: 0.999999312870821, iteration: 104991
loss: 1.005536675453186,grad_norm: 0.9931168334766955, iteration: 104992
loss: 1.0360777378082275,grad_norm: 0.9999992394633431, iteration: 104993
loss: 0.9741900563240051,grad_norm: 0.9999990600832075, iteration: 104994
loss: 1.044417142868042,grad_norm: 0.999999249975816, iteration: 104995
loss: 1.037761926651001,grad_norm: 0.8747775877690949, iteration: 104996
loss: 1.0625593662261963,grad_norm: 0.999999396198018, iteration: 104997
loss: 0.9663114547729492,grad_norm: 0.9999998789510863, iteration: 104998
loss: 0.9802361130714417,grad_norm: 0.9788138418351581, iteration: 104999
loss: 0.9969413876533508,grad_norm: 0.9999990811933469, iteration: 105000
loss: 0.9939067363739014,grad_norm: 0.9013483669637691, iteration: 105001
loss: 0.9920800924301147,grad_norm: 0.8833607984807963, iteration: 105002
loss: 1.0142245292663574,grad_norm: 0.9999990870451906, iteration: 105003
loss: 1.0499482154846191,grad_norm: 0.9999996113423484, iteration: 105004
loss: 1.0288227796554565,grad_norm: 0.9999993847713806, iteration: 105005
loss: 1.0308016538619995,grad_norm: 0.9409236510368445, iteration: 105006
loss: 1.0127935409545898,grad_norm: 0.8915655144715408, iteration: 105007
loss: 1.01394784450531,grad_norm: 0.9999991443989827, iteration: 105008
loss: 1.034590721130371,grad_norm: 0.9999989911930388, iteration: 105009
loss: 0.9981474876403809,grad_norm: 0.9999993898872922, iteration: 105010
loss: 0.9963824152946472,grad_norm: 0.9999996919543396, iteration: 105011
loss: 1.027195692062378,grad_norm: 0.9999991221502164, iteration: 105012
loss: 1.043144702911377,grad_norm: 0.9999991738932518, iteration: 105013
loss: 1.0439170598983765,grad_norm: 0.9999996057001613, iteration: 105014
loss: 0.9948747754096985,grad_norm: 0.9999992869484318, iteration: 105015
loss: 0.9942080974578857,grad_norm: 0.9209077449911655, iteration: 105016
loss: 1.003202199935913,grad_norm: 0.9999992238749875, iteration: 105017
loss: 0.9910826683044434,grad_norm: 0.9999991688241137, iteration: 105018
loss: 0.9872879385948181,grad_norm: 0.9999992161352071, iteration: 105019
loss: 1.0456286668777466,grad_norm: 0.9999992036830131, iteration: 105020
loss: 1.107853889465332,grad_norm: 0.999999638694634, iteration: 105021
loss: 0.9822843670845032,grad_norm: 0.9299172436945331, iteration: 105022
loss: 1.0681089162826538,grad_norm: 0.999999188873444, iteration: 105023
loss: 1.0015519857406616,grad_norm: 0.9801071011441074, iteration: 105024
loss: 0.9768655896186829,grad_norm: 0.9999990979771302, iteration: 105025
loss: 1.016371488571167,grad_norm: 0.9999992280127035, iteration: 105026
loss: 1.0050866603851318,grad_norm: 0.8591662025539027, iteration: 105027
loss: 1.040452003479004,grad_norm: 0.9271465326420615, iteration: 105028
loss: 1.0564380884170532,grad_norm: 0.9999996207409394, iteration: 105029
loss: 1.0151028633117676,grad_norm: 0.9778529207819621, iteration: 105030
loss: 1.0091421604156494,grad_norm: 0.9999991612637157, iteration: 105031
loss: 0.966205894947052,grad_norm: 0.9860144210713876, iteration: 105032
loss: 1.0220261812210083,grad_norm: 0.8477875593649395, iteration: 105033
loss: 0.9772818088531494,grad_norm: 0.9833106096488432, iteration: 105034
loss: 1.0442235469818115,grad_norm: 0.9540090753755033, iteration: 105035
loss: 0.9679857492446899,grad_norm: 0.9999993658868507, iteration: 105036
loss: 0.9821723699569702,grad_norm: 0.999999281676183, iteration: 105037
loss: 1.008596658706665,grad_norm: 0.9999990577559879, iteration: 105038
loss: 0.9838040471076965,grad_norm: 0.9731228090734945, iteration: 105039
loss: 1.0234441757202148,grad_norm: 0.9581128259584107, iteration: 105040
loss: 0.9999901652336121,grad_norm: 0.9999991809457845, iteration: 105041
loss: 1.0347093343734741,grad_norm: 0.9999991570075709, iteration: 105042
loss: 0.9811927080154419,grad_norm: 0.9999990524107434, iteration: 105043
loss: 1.0429273843765259,grad_norm: 0.9999991825825476, iteration: 105044
loss: 0.9832929372787476,grad_norm: 0.9317310460137515, iteration: 105045
loss: 0.9995245337486267,grad_norm: 0.8717032025670987, iteration: 105046
loss: 1.007652759552002,grad_norm: 0.9966314043575086, iteration: 105047
loss: 1.0213470458984375,grad_norm: 0.8539474795883885, iteration: 105048
loss: 1.01551353931427,grad_norm: 0.9999991076348894, iteration: 105049
loss: 1.0268303155899048,grad_norm: 0.9999992627651945, iteration: 105050
loss: 0.9980316162109375,grad_norm: 0.9235528147478022, iteration: 105051
loss: 0.9822781682014465,grad_norm: 0.9999990850244821, iteration: 105052
loss: 1.0118939876556396,grad_norm: 0.9499699983594818, iteration: 105053
loss: 1.0015878677368164,grad_norm: 0.9999991399220723, iteration: 105054
loss: 1.0317295789718628,grad_norm: 0.9999993053238833, iteration: 105055
loss: 1.09141206741333,grad_norm: 0.9999996330471642, iteration: 105056
loss: 1.0425769090652466,grad_norm: 0.9380498979897632, iteration: 105057
loss: 1.0163508653640747,grad_norm: 0.9999989546142165, iteration: 105058
loss: 0.9789669513702393,grad_norm: 0.9422744918870817, iteration: 105059
loss: 1.034030556678772,grad_norm: 0.9999990363312647, iteration: 105060
loss: 0.9590287208557129,grad_norm: 0.9999991142038177, iteration: 105061
loss: 0.9851398468017578,grad_norm: 0.9999992387054509, iteration: 105062
loss: 0.9994324445724487,grad_norm: 0.9999992003317939, iteration: 105063
loss: 0.9945115447044373,grad_norm: 0.9999990881295199, iteration: 105064
loss: 0.981134295463562,grad_norm: 0.9999991122686998, iteration: 105065
loss: 0.9873674511909485,grad_norm: 0.999999128296175, iteration: 105066
loss: 1.0032774209976196,grad_norm: 0.9999989036099624, iteration: 105067
loss: 1.0061911344528198,grad_norm: 0.9733786880077909, iteration: 105068
loss: 1.002906322479248,grad_norm: 0.9768476802911841, iteration: 105069
loss: 0.9544433951377869,grad_norm: 0.9908249044109798, iteration: 105070
loss: 1.0108823776245117,grad_norm: 0.9999991326178568, iteration: 105071
loss: 1.1141808032989502,grad_norm: 0.9999996721577271, iteration: 105072
loss: 1.0239158868789673,grad_norm: 0.9999996525961699, iteration: 105073
loss: 1.04020094871521,grad_norm: 0.9999990988758879, iteration: 105074
loss: 1.0009069442749023,grad_norm: 0.9999991419458625, iteration: 105075
loss: 1.0048702955245972,grad_norm: 0.9999991377633715, iteration: 105076
loss: 1.0114251375198364,grad_norm: 0.9304040216383764, iteration: 105077
loss: 1.0024584531784058,grad_norm: 0.9999991883501375, iteration: 105078
loss: 0.9834616780281067,grad_norm: 0.9999990027716836, iteration: 105079
loss: 0.9709806442260742,grad_norm: 0.9999991000011784, iteration: 105080
loss: 0.9562875032424927,grad_norm: 0.9999989944030713, iteration: 105081
loss: 1.0022921562194824,grad_norm: 0.932484083434534, iteration: 105082
loss: 0.9908726215362549,grad_norm: 0.8911104023187532, iteration: 105083
loss: 1.0039900541305542,grad_norm: 0.9867584180999357, iteration: 105084
loss: 1.0068094730377197,grad_norm: 0.9999995384554623, iteration: 105085
loss: 0.973668098449707,grad_norm: 0.9951050430740849, iteration: 105086
loss: 1.0075609683990479,grad_norm: 0.9999992769584033, iteration: 105087
loss: 0.9772416949272156,grad_norm: 0.9999989240810898, iteration: 105088
loss: 0.9885739684104919,grad_norm: 0.9999990727565303, iteration: 105089
loss: 1.0060575008392334,grad_norm: 0.9999991288316646, iteration: 105090
loss: 1.0277024507522583,grad_norm: 0.999999260559419, iteration: 105091
loss: 1.030830979347229,grad_norm: 0.9939030503988731, iteration: 105092
loss: 0.9880505204200745,grad_norm: 0.9999990682557166, iteration: 105093
loss: 1.0182949304580688,grad_norm: 0.9999992830244002, iteration: 105094
loss: 1.032146692276001,grad_norm: 0.9999991137799511, iteration: 105095
loss: 1.0029480457305908,grad_norm: 0.9999994123647113, iteration: 105096
loss: 1.001380443572998,grad_norm: 0.9999991998004324, iteration: 105097
loss: 1.0985043048858643,grad_norm: 0.9999996312013782, iteration: 105098
loss: 0.9938369989395142,grad_norm: 0.9999989731924152, iteration: 105099
loss: 1.006959319114685,grad_norm: 0.9999991966055285, iteration: 105100
loss: 1.0198798179626465,grad_norm: 0.9600634907912108, iteration: 105101
loss: 0.9882715940475464,grad_norm: 0.999998972651983, iteration: 105102
loss: 1.0203089714050293,grad_norm: 0.9999990496776562, iteration: 105103
loss: 0.9795565605163574,grad_norm: 0.9999990295943126, iteration: 105104
loss: 0.9964977502822876,grad_norm: 0.9999989584886043, iteration: 105105
loss: 0.9727495908737183,grad_norm: 0.9999990827097717, iteration: 105106
loss: 1.0048288106918335,grad_norm: 0.9999992726673075, iteration: 105107
loss: 0.9858664274215698,grad_norm: 0.9999994280019955, iteration: 105108
loss: 0.9950756430625916,grad_norm: 0.9461985870251792, iteration: 105109
loss: 0.9465163946151733,grad_norm: 0.9595327248726687, iteration: 105110
loss: 1.0251210927963257,grad_norm: 0.9150399551945527, iteration: 105111
loss: 0.9849554896354675,grad_norm: 0.9999990359384594, iteration: 105112
loss: 1.0005518198013306,grad_norm: 0.999999028823635, iteration: 105113
loss: 0.9977961182594299,grad_norm: 0.9869206987391771, iteration: 105114
loss: 0.9986969828605652,grad_norm: 0.9335301156640398, iteration: 105115
loss: 0.9788891077041626,grad_norm: 0.9999991806098383, iteration: 105116
loss: 0.9562857747077942,grad_norm: 0.9999992017969378, iteration: 105117
loss: 0.9907326698303223,grad_norm: 0.9999995062002968, iteration: 105118
loss: 1.036684274673462,grad_norm: 0.999999097215583, iteration: 105119
loss: 0.9575147032737732,grad_norm: 0.9999991152590306, iteration: 105120
loss: 0.9940721392631531,grad_norm: 0.8607495730928237, iteration: 105121
loss: 1.0431641340255737,grad_norm: 0.9999992328310378, iteration: 105122
loss: 0.9821172952651978,grad_norm: 0.9999992090065198, iteration: 105123
loss: 0.9858630895614624,grad_norm: 0.9999990672157802, iteration: 105124
loss: 0.992824137210846,grad_norm: 0.8402115306264992, iteration: 105125
loss: 1.013769507408142,grad_norm: 0.999999362904688, iteration: 105126
loss: 0.9731255173683167,grad_norm: 0.9999991376718927, iteration: 105127
loss: 1.0040274858474731,grad_norm: 0.9873501301627043, iteration: 105128
loss: 0.9672876000404358,grad_norm: 0.999999046537033, iteration: 105129
loss: 1.023595929145813,grad_norm: 0.903906821193531, iteration: 105130
loss: 1.0136637687683105,grad_norm: 0.8995513631409237, iteration: 105131
loss: 1.032077670097351,grad_norm: 0.999999195114714, iteration: 105132
loss: 0.9824966788291931,grad_norm: 0.8648477620879377, iteration: 105133
loss: 0.9628188610076904,grad_norm: 0.9999992342624278, iteration: 105134
loss: 1.0278743505477905,grad_norm: 0.9999989336759678, iteration: 105135
loss: 0.995493471622467,grad_norm: 0.9999990895544782, iteration: 105136
loss: 0.9857107996940613,grad_norm: 0.8669563609075666, iteration: 105137
loss: 0.9686128497123718,grad_norm: 0.9999990995707848, iteration: 105138
loss: 1.0250027179718018,grad_norm: 0.9999989984942104, iteration: 105139
loss: 1.048290729522705,grad_norm: 0.999999346818127, iteration: 105140
loss: 1.0038063526153564,grad_norm: 0.9999991921455256, iteration: 105141
loss: 0.9807472229003906,grad_norm: 0.9999990210466109, iteration: 105142
loss: 1.0153145790100098,grad_norm: 0.991241984074482, iteration: 105143
loss: 1.04135262966156,grad_norm: 0.9999990685307429, iteration: 105144
loss: 0.9693683981895447,grad_norm: 0.9780319154502743, iteration: 105145
loss: 1.0333540439605713,grad_norm: 0.9829065856721018, iteration: 105146
loss: 1.0238128900527954,grad_norm: 0.9999991109589057, iteration: 105147
loss: 0.9870396852493286,grad_norm: 0.8964566197146336, iteration: 105148
loss: 1.0088081359863281,grad_norm: 0.9999992705398972, iteration: 105149
loss: 0.9874969124794006,grad_norm: 0.9999991689730292, iteration: 105150
loss: 1.0083882808685303,grad_norm: 0.999999352614773, iteration: 105151
loss: 1.0164557695388794,grad_norm: 0.9148634698423151, iteration: 105152
loss: 0.9829811453819275,grad_norm: 0.9999992047487479, iteration: 105153
loss: 1.0045826435089111,grad_norm: 0.999999307118509, iteration: 105154
loss: 1.028028964996338,grad_norm: 0.9999994441252362, iteration: 105155
loss: 1.0031532049179077,grad_norm: 0.9999990867194051, iteration: 105156
loss: 1.0154014825820923,grad_norm: 0.9999992729988807, iteration: 105157
loss: 1.0107605457305908,grad_norm: 0.9999991876027458, iteration: 105158
loss: 1.0057594776153564,grad_norm: 0.9849156251811386, iteration: 105159
loss: 0.9828469753265381,grad_norm: 0.9999992355502192, iteration: 105160
loss: 1.0081344842910767,grad_norm: 0.9757447375722763, iteration: 105161
loss: 1.022559642791748,grad_norm: 0.9999992147382358, iteration: 105162
loss: 1.0129356384277344,grad_norm: 0.9999991357724529, iteration: 105163
loss: 0.9918606281280518,grad_norm: 0.9999990284949121, iteration: 105164
loss: 0.9917038679122925,grad_norm: 0.9458168225318343, iteration: 105165
loss: 0.9897881150245667,grad_norm: 0.9999989688129006, iteration: 105166
loss: 0.9956878423690796,grad_norm: 0.9999990590666067, iteration: 105167
loss: 1.0277830362319946,grad_norm: 0.9999993014931498, iteration: 105168
loss: 1.0191177129745483,grad_norm: 0.9999989951162471, iteration: 105169
loss: 0.9825100898742676,grad_norm: 0.9999991612062064, iteration: 105170
loss: 0.9889277815818787,grad_norm: 0.9999991677942359, iteration: 105171
loss: 1.0118557214736938,grad_norm: 0.9318843369136192, iteration: 105172
loss: 1.0003021955490112,grad_norm: 0.9999990533948826, iteration: 105173
loss: 0.9821919202804565,grad_norm: 0.9999992217661366, iteration: 105174
loss: 0.9853918552398682,grad_norm: 0.907422341641699, iteration: 105175
loss: 0.9647843837738037,grad_norm: 0.9999991110289523, iteration: 105176
loss: 1.0119086503982544,grad_norm: 0.8820962604207185, iteration: 105177
loss: 1.008105993270874,grad_norm: 0.9609388307178159, iteration: 105178
loss: 0.9831816554069519,grad_norm: 0.824964010385589, iteration: 105179
loss: 0.995973527431488,grad_norm: 0.9102943583780183, iteration: 105180
loss: 0.9934054017066956,grad_norm: 0.888051282577088, iteration: 105181
loss: 1.0240569114685059,grad_norm: 0.9999992361005995, iteration: 105182
loss: 0.9802637100219727,grad_norm: 0.9814818584021893, iteration: 105183
loss: 1.0730565786361694,grad_norm: 0.9999998349272542, iteration: 105184
loss: 0.9924460649490356,grad_norm: 0.8693580866364167, iteration: 105185
loss: 1.0394141674041748,grad_norm: 0.9693934671702651, iteration: 105186
loss: 0.9888803958892822,grad_norm: 0.9863049326948766, iteration: 105187
loss: 1.00324285030365,grad_norm: 0.8570386248562241, iteration: 105188
loss: 1.0039855241775513,grad_norm: 0.9999991678532452, iteration: 105189
loss: 0.9786666631698608,grad_norm: 0.9331518047841998, iteration: 105190
loss: 0.9846451282501221,grad_norm: 0.9624061950539982, iteration: 105191
loss: 0.9986509680747986,grad_norm: 0.9999990256002333, iteration: 105192
loss: 1.0395041704177856,grad_norm: 0.9999990976122715, iteration: 105193
loss: 0.9843884110450745,grad_norm: 0.8162815552032157, iteration: 105194
loss: 0.9693145751953125,grad_norm: 0.9907882334065213, iteration: 105195
loss: 0.9949215650558472,grad_norm: 0.9999990356559851, iteration: 105196
loss: 0.9817835092544556,grad_norm: 0.9999992067570401, iteration: 105197
loss: 0.9973464012145996,grad_norm: 0.9999991657161604, iteration: 105198
loss: 1.029526948928833,grad_norm: 0.9999990540245245, iteration: 105199
loss: 0.9984243512153625,grad_norm: 0.7862525001890367, iteration: 105200
loss: 0.9858874082565308,grad_norm: 0.9999990726982463, iteration: 105201
loss: 1.0108364820480347,grad_norm: 0.9858022725994897, iteration: 105202
loss: 1.0165621042251587,grad_norm: 0.9227870791576929, iteration: 105203
loss: 0.9807566404342651,grad_norm: 0.976576999887001, iteration: 105204
loss: 0.9884835481643677,grad_norm: 0.9999991172578987, iteration: 105205
loss: 1.0078259706497192,grad_norm: 0.9908110237214921, iteration: 105206
loss: 0.980469286441803,grad_norm: 0.9030579826075487, iteration: 105207
loss: 1.0061252117156982,grad_norm: 0.9999990758948194, iteration: 105208
loss: 1.0015374422073364,grad_norm: 0.9999991135333088, iteration: 105209
loss: 1.020227074623108,grad_norm: 0.9999993077948328, iteration: 105210
loss: 1.0114763975143433,grad_norm: 0.9324646829331296, iteration: 105211
loss: 1.011045217514038,grad_norm: 0.9080436306497647, iteration: 105212
loss: 1.0430690050125122,grad_norm: 0.9948786303026806, iteration: 105213
loss: 1.0551486015319824,grad_norm: 0.9999991478512807, iteration: 105214
loss: 1.0157840251922607,grad_norm: 0.9999996477913403, iteration: 105215
loss: 1.0067509412765503,grad_norm: 0.9399002209593604, iteration: 105216
loss: 0.9583143591880798,grad_norm: 0.9755815349115798, iteration: 105217
loss: 1.0122864246368408,grad_norm: 0.999999171122755, iteration: 105218
loss: 1.032580018043518,grad_norm: 0.9999992024734827, iteration: 105219
loss: 0.9967968463897705,grad_norm: 0.8808304760476355, iteration: 105220
loss: 0.971411406993866,grad_norm: 0.9999991109815326, iteration: 105221
loss: 0.9984508752822876,grad_norm: 0.9999990181996217, iteration: 105222
loss: 1.0152761936187744,grad_norm: 0.9999992080607116, iteration: 105223
loss: 1.0083281993865967,grad_norm: 0.9999991041136617, iteration: 105224
loss: 0.9538991451263428,grad_norm: 0.9493207929847807, iteration: 105225
loss: 1.0061743259429932,grad_norm: 0.999999194946327, iteration: 105226
loss: 1.0060169696807861,grad_norm: 0.9999992031975254, iteration: 105227
loss: 0.9998730421066284,grad_norm: 0.9105939636220698, iteration: 105228
loss: 0.9985464811325073,grad_norm: 0.999999031672017, iteration: 105229
loss: 0.9981787800788879,grad_norm: 0.9037248123204437, iteration: 105230
loss: 1.0172450542449951,grad_norm: 0.9999990782731846, iteration: 105231
loss: 0.9836620688438416,grad_norm: 0.9999991270237578, iteration: 105232
loss: 0.9781016707420349,grad_norm: 0.9731575688473039, iteration: 105233
loss: 0.9871073365211487,grad_norm: 0.9999989791285203, iteration: 105234
loss: 0.9840213656425476,grad_norm: 0.9946549662877807, iteration: 105235
loss: 1.0176070928573608,grad_norm: 0.9999990903195681, iteration: 105236
loss: 0.9895144104957581,grad_norm: 0.923049026612062, iteration: 105237
loss: 1.0177141427993774,grad_norm: 0.9049627982911976, iteration: 105238
loss: 0.9694783091545105,grad_norm: 0.8876268656478513, iteration: 105239
loss: 0.9692046046257019,grad_norm: 0.9999990698898455, iteration: 105240
loss: 0.9614277482032776,grad_norm: 0.9999990527800073, iteration: 105241
loss: 1.0359387397766113,grad_norm: 0.9999992867082172, iteration: 105242
loss: 0.956333577632904,grad_norm: 0.9999990413987186, iteration: 105243
loss: 1.0226558446884155,grad_norm: 0.9999990716719218, iteration: 105244
loss: 1.0016008615493774,grad_norm: 0.9999992028461859, iteration: 105245
loss: 1.0140025615692139,grad_norm: 0.9999993091749273, iteration: 105246
loss: 0.9999492168426514,grad_norm: 0.999999735355443, iteration: 105247
loss: 1.0118451118469238,grad_norm: 0.9999991261012472, iteration: 105248
loss: 1.0068680047988892,grad_norm: 0.999999185768601, iteration: 105249
loss: 0.9821674227714539,grad_norm: 0.9692075455142053, iteration: 105250
loss: 1.0221986770629883,grad_norm: 0.999999069747323, iteration: 105251
loss: 0.9917696118354797,grad_norm: 0.9999991387399491, iteration: 105252
loss: 0.968677818775177,grad_norm: 0.9999990159067674, iteration: 105253
loss: 1.0308350324630737,grad_norm: 0.9239647077244562, iteration: 105254
loss: 0.9716047048568726,grad_norm: 0.9999990822625208, iteration: 105255
loss: 0.979557454586029,grad_norm: 0.9999992774089438, iteration: 105256
loss: 0.9921807050704956,grad_norm: 0.9999992530366397, iteration: 105257
loss: 0.9878820776939392,grad_norm: 0.8888412274717253, iteration: 105258
loss: 0.9925068616867065,grad_norm: 0.9999991203992263, iteration: 105259
loss: 1.0394597053527832,grad_norm: 0.900490015477468, iteration: 105260
loss: 0.9793621301651001,grad_norm: 0.9999990966108654, iteration: 105261
loss: 0.9879506826400757,grad_norm: 0.9999991020003309, iteration: 105262
loss: 0.9670124650001526,grad_norm: 0.9999992097813832, iteration: 105263
loss: 1.0351375341415405,grad_norm: 0.9999991268137924, iteration: 105264
loss: 0.9870648980140686,grad_norm: 0.9755314341868749, iteration: 105265
loss: 0.9870988726615906,grad_norm: 0.8150158596839612, iteration: 105266
loss: 0.9867305159568787,grad_norm: 0.9999990487993402, iteration: 105267
loss: 1.0050406455993652,grad_norm: 0.9947696775783621, iteration: 105268
loss: 0.9513320326805115,grad_norm: 0.9638210080410166, iteration: 105269
loss: 0.9806164503097534,grad_norm: 0.9999990266278677, iteration: 105270
loss: 1.0122281312942505,grad_norm: 0.9773852554973396, iteration: 105271
loss: 1.0146762132644653,grad_norm: 0.9999991962794033, iteration: 105272
loss: 0.9944650530815125,grad_norm: 0.9939984157323991, iteration: 105273
loss: 0.992011547088623,grad_norm: 0.9999992350006227, iteration: 105274
loss: 0.9735313057899475,grad_norm: 0.9999991686444218, iteration: 105275
loss: 1.0094516277313232,grad_norm: 0.9999992232084381, iteration: 105276
loss: 0.9903069138526917,grad_norm: 0.9999990175382936, iteration: 105277
loss: 0.9729247689247131,grad_norm: 0.9999991761447717, iteration: 105278
loss: 1.0038214921951294,grad_norm: 0.9715448280829072, iteration: 105279
loss: 1.050733208656311,grad_norm: 0.999999623001962, iteration: 105280
loss: 0.9661770462989807,grad_norm: 0.9375307462772914, iteration: 105281
loss: 1.005185604095459,grad_norm: 0.8757345167739664, iteration: 105282
loss: 1.0262360572814941,grad_norm: 0.9628604786117664, iteration: 105283
loss: 1.0147241353988647,grad_norm: 0.9999990555880317, iteration: 105284
loss: 0.9958862066268921,grad_norm: 0.9657824584033782, iteration: 105285
loss: 0.9786149263381958,grad_norm: 0.9999993390878009, iteration: 105286
loss: 0.9764164090156555,grad_norm: 0.9999989749690699, iteration: 105287
loss: 1.0160342454910278,grad_norm: 0.9999989938567398, iteration: 105288
loss: 0.9869535565376282,grad_norm: 0.9999990834264924, iteration: 105289
loss: 1.0146244764328003,grad_norm: 0.9999990518598864, iteration: 105290
loss: 1.070813536643982,grad_norm: 0.9999991133775213, iteration: 105291
loss: 1.009110927581787,grad_norm: 0.9999991542151072, iteration: 105292
loss: 1.0115008354187012,grad_norm: 0.999839800384357, iteration: 105293
loss: 0.9942209124565125,grad_norm: 0.9759251428522591, iteration: 105294
loss: 1.025589942932129,grad_norm: 0.9999991646200739, iteration: 105295
loss: 0.9654910564422607,grad_norm: 0.9999990675901207, iteration: 105296
loss: 1.0283665657043457,grad_norm: 0.9999992206635713, iteration: 105297
loss: 0.9985353350639343,grad_norm: 0.9325836122400636, iteration: 105298
loss: 1.0105016231536865,grad_norm: 0.9363399617545222, iteration: 105299
loss: 0.9572351574897766,grad_norm: 0.9327921646072399, iteration: 105300
loss: 1.017729640007019,grad_norm: 0.9999991540473138, iteration: 105301
loss: 0.9693790674209595,grad_norm: 0.9999990865593877, iteration: 105302
loss: 0.9334250688552856,grad_norm: 0.9999990075407708, iteration: 105303
loss: 1.029776692390442,grad_norm: 0.9999991033976572, iteration: 105304
loss: 0.9877321124076843,grad_norm: 0.8777650567646449, iteration: 105305
loss: 0.9751935005187988,grad_norm: 0.9999992307511362, iteration: 105306
loss: 0.964773416519165,grad_norm: 0.9954247490954645, iteration: 105307
loss: 0.9708383083343506,grad_norm: 0.9747107423500527, iteration: 105308
loss: 1.0264718532562256,grad_norm: 0.9999990470793688, iteration: 105309
loss: 0.9937466382980347,grad_norm: 0.9080144624935781, iteration: 105310
loss: 1.0087695121765137,grad_norm: 0.9999991140388903, iteration: 105311
loss: 1.081032156944275,grad_norm: 0.9999992480944103, iteration: 105312
loss: 1.010183334350586,grad_norm: 0.9907790463583303, iteration: 105313
loss: 0.9838740229606628,grad_norm: 0.9630098380907995, iteration: 105314
loss: 1.0286989212036133,grad_norm: 0.9999992218166194, iteration: 105315
loss: 1.0025023221969604,grad_norm: 0.9999992370158776, iteration: 105316
loss: 1.030076265335083,grad_norm: 0.9999992616850356, iteration: 105317
loss: 1.00515878200531,grad_norm: 0.9999991187605793, iteration: 105318
loss: 1.0115742683410645,grad_norm: 0.9838614314304795, iteration: 105319
loss: 0.9777349233627319,grad_norm: 0.9999991163173222, iteration: 105320
loss: 0.9726654291152954,grad_norm: 0.9799090691194676, iteration: 105321
loss: 0.971993088722229,grad_norm: 0.9999990971370488, iteration: 105322
loss: 0.9962040185928345,grad_norm: 0.9627740993185502, iteration: 105323
loss: 0.9988276958465576,grad_norm: 0.9999992430514376, iteration: 105324
loss: 0.9927442073822021,grad_norm: 0.9999990767672494, iteration: 105325
loss: 0.9735302925109863,grad_norm: 0.9801508516012661, iteration: 105326
loss: 1.031873345375061,grad_norm: 0.9999992351766506, iteration: 105327
loss: 0.9958965182304382,grad_norm: 0.8931212331396338, iteration: 105328
loss: 1.0108439922332764,grad_norm: 0.9796227932751181, iteration: 105329
loss: 0.9586529731750488,grad_norm: 0.9438660504418169, iteration: 105330
loss: 1.021462321281433,grad_norm: 0.9999990202294068, iteration: 105331
loss: 1.2475063800811768,grad_norm: 0.9999991410718677, iteration: 105332
loss: 0.9691808819770813,grad_norm: 0.8603156831053169, iteration: 105333
loss: 0.9985785484313965,grad_norm: 0.9999993120163981, iteration: 105334
loss: 1.0196309089660645,grad_norm: 0.9999991981486117, iteration: 105335
loss: 1.09414803981781,grad_norm: 0.9999996860938646, iteration: 105336
loss: 1.0274162292480469,grad_norm: 0.8433752260198648, iteration: 105337
loss: 1.0081462860107422,grad_norm: 0.9999991329022624, iteration: 105338
loss: 1.0041033029556274,grad_norm: 0.9999990123243202, iteration: 105339
loss: 0.9636937379837036,grad_norm: 0.9999991975967281, iteration: 105340
loss: 1.00007963180542,grad_norm: 0.99999907840916, iteration: 105341
loss: 0.9806649088859558,grad_norm: 0.9999990919403099, iteration: 105342
loss: 1.004764437675476,grad_norm: 0.9363238288241337, iteration: 105343
loss: 1.017435073852539,grad_norm: 0.8943592566013365, iteration: 105344
loss: 0.9982370734214783,grad_norm: 0.9784408367677513, iteration: 105345
loss: 1.0284266471862793,grad_norm: 0.9999993386559386, iteration: 105346
loss: 1.0042768716812134,grad_norm: 0.9999991545361355, iteration: 105347
loss: 0.9783146381378174,grad_norm: 0.9999990196591164, iteration: 105348
loss: 1.0294885635375977,grad_norm: 0.9999990219003092, iteration: 105349
loss: 1.0815211534500122,grad_norm: 0.9999993010347454, iteration: 105350
loss: 0.9891586899757385,grad_norm: 0.9999992924293709, iteration: 105351
loss: 1.0091012716293335,grad_norm: 0.9336821961133569, iteration: 105352
loss: 1.0098495483398438,grad_norm: 0.9999990383201632, iteration: 105353
loss: 1.034136414527893,grad_norm: 0.9481107394903765, iteration: 105354
loss: 0.9626299142837524,grad_norm: 0.9999990857309176, iteration: 105355
loss: 0.984169602394104,grad_norm: 0.9999992686115632, iteration: 105356
loss: 0.9884974360466003,grad_norm: 0.9999990481548745, iteration: 105357
loss: 0.9988459348678589,grad_norm: 0.9999992410307604, iteration: 105358
loss: 0.9822224974632263,grad_norm: 0.9999993111454075, iteration: 105359
loss: 1.017555594444275,grad_norm: 0.9999991849160089, iteration: 105360
loss: 1.0044748783111572,grad_norm: 0.9999989150930126, iteration: 105361
loss: 0.9758915305137634,grad_norm: 0.9999990160471875, iteration: 105362
loss: 0.9930639266967773,grad_norm: 0.9999990662094261, iteration: 105363
loss: 1.0068963766098022,grad_norm: 0.9999990054475782, iteration: 105364
loss: 1.0348925590515137,grad_norm: 0.999999241184389, iteration: 105365
loss: 1.0073727369308472,grad_norm: 0.9999991443873171, iteration: 105366
loss: 0.9977360367774963,grad_norm: 0.8596785093248536, iteration: 105367
loss: 1.0119684934616089,grad_norm: 0.9999989941845513, iteration: 105368
loss: 1.0436571836471558,grad_norm: 0.9999991121765087, iteration: 105369
loss: 1.0028049945831299,grad_norm: 0.8914909935307476, iteration: 105370
loss: 1.0039342641830444,grad_norm: 0.8903880813708591, iteration: 105371
loss: 0.9946627020835876,grad_norm: 0.9999991456185325, iteration: 105372
loss: 1.050516963005066,grad_norm: 0.914328983287018, iteration: 105373
loss: 0.9757993817329407,grad_norm: 0.9999991620945736, iteration: 105374
loss: 0.9703083634376526,grad_norm: 0.9999990798305513, iteration: 105375
loss: 1.0243834257125854,grad_norm: 0.9999992037759096, iteration: 105376
loss: 0.9918224811553955,grad_norm: 0.9999991615357883, iteration: 105377
loss: 0.999875009059906,grad_norm: 0.8894985752922557, iteration: 105378
loss: 1.0334594249725342,grad_norm: 0.999999668480252, iteration: 105379
loss: 1.0129694938659668,grad_norm: 0.9519662043433788, iteration: 105380
loss: 0.9968952536582947,grad_norm: 0.9934228560007452, iteration: 105381
loss: 0.9810376167297363,grad_norm: 0.9999991264219803, iteration: 105382
loss: 0.9929710030555725,grad_norm: 0.9999991153043816, iteration: 105383
loss: 1.0333712100982666,grad_norm: 0.8461689521954434, iteration: 105384
loss: 0.9845112562179565,grad_norm: 0.9999990324481286, iteration: 105385
loss: 0.9867144227027893,grad_norm: 0.9999991222351998, iteration: 105386
loss: 1.0329233407974243,grad_norm: 0.9999991672877392, iteration: 105387
loss: 0.9945722818374634,grad_norm: 0.8692193086182531, iteration: 105388
loss: 0.9744905233383179,grad_norm: 0.9999991418769507, iteration: 105389
loss: 1.0194453001022339,grad_norm: 0.9999989886851739, iteration: 105390
loss: 1.0263841152191162,grad_norm: 0.9999991933930428, iteration: 105391
loss: 0.9854015707969666,grad_norm: 0.8217735370391472, iteration: 105392
loss: 1.045506477355957,grad_norm: 0.9999989945579997, iteration: 105393
loss: 1.0177968740463257,grad_norm: 0.9999989968628837, iteration: 105394
loss: 0.9695888161659241,grad_norm: 0.9753105779534662, iteration: 105395
loss: 1.0113483667373657,grad_norm: 0.9999991380235641, iteration: 105396
loss: 1.0079734325408936,grad_norm: 0.9999990077058071, iteration: 105397
loss: 0.9812252521514893,grad_norm: 0.9999990655504759, iteration: 105398
loss: 0.9516398906707764,grad_norm: 0.9999991711637682, iteration: 105399
loss: 0.9933517575263977,grad_norm: 0.9999991194956962, iteration: 105400
loss: 0.9685044884681702,grad_norm: 0.9321847465139406, iteration: 105401
loss: 0.9884211421012878,grad_norm: 0.9093568856393918, iteration: 105402
loss: 1.005354642868042,grad_norm: 0.9242450770016333, iteration: 105403
loss: 0.994161069393158,grad_norm: 0.9999991846189344, iteration: 105404
loss: 1.0109022855758667,grad_norm: 0.9999991669928374, iteration: 105405
loss: 0.9803330898284912,grad_norm: 0.9999990571405708, iteration: 105406
loss: 1.0334715843200684,grad_norm: 0.9999989891891305, iteration: 105407
loss: 0.9602324962615967,grad_norm: 0.9999991104768774, iteration: 105408
loss: 0.99860018491745,grad_norm: 0.9807046654835023, iteration: 105409
loss: 1.0157179832458496,grad_norm: 0.9999993290195953, iteration: 105410
loss: 1.0318405628204346,grad_norm: 0.9807120735479424, iteration: 105411
loss: 1.0154255628585815,grad_norm: 0.9999990835430954, iteration: 105412
loss: 1.0130990743637085,grad_norm: 0.9149371662564949, iteration: 105413
loss: 0.9883331656455994,grad_norm: 0.999999251295349, iteration: 105414
loss: 1.0254440307617188,grad_norm: 0.9999990915371407, iteration: 105415
loss: 1.0002154111862183,grad_norm: 0.9999991022493977, iteration: 105416
loss: 1.008348822593689,grad_norm: 0.9999993099500111, iteration: 105417
loss: 0.9729991555213928,grad_norm: 0.9407887422230151, iteration: 105418
loss: 0.99161696434021,grad_norm: 0.9999991289982291, iteration: 105419
loss: 0.9665754437446594,grad_norm: 0.9999991308371208, iteration: 105420
loss: 0.9857520461082458,grad_norm: 0.9999994214844219, iteration: 105421
loss: 1.0294793844223022,grad_norm: 0.9999995127907049, iteration: 105422
loss: 1.0186315774917603,grad_norm: 0.9999995028241969, iteration: 105423
loss: 1.0304890871047974,grad_norm: 0.9999991730714471, iteration: 105424
loss: 0.9725517630577087,grad_norm: 0.999999015382789, iteration: 105425
loss: 0.9884614944458008,grad_norm: 0.9305458655703348, iteration: 105426
loss: 0.9764351844787598,grad_norm: 0.999999019597871, iteration: 105427
loss: 1.0314674377441406,grad_norm: 0.9920186049401255, iteration: 105428
loss: 1.0049675703048706,grad_norm: 0.9999990590347285, iteration: 105429
loss: 0.9683163166046143,grad_norm: 0.9999990431834148, iteration: 105430
loss: 0.9546058773994446,grad_norm: 0.9381069418112676, iteration: 105431
loss: 1.019479513168335,grad_norm: 0.9999993123420314, iteration: 105432
loss: 1.0316903591156006,grad_norm: 0.9605480714498603, iteration: 105433
loss: 0.9804744124412537,grad_norm: 0.9999990525703127, iteration: 105434
loss: 0.9937022924423218,grad_norm: 0.9999990803695006, iteration: 105435
loss: 1.0147384405136108,grad_norm: 0.9999990420255925, iteration: 105436
loss: 1.0144572257995605,grad_norm: 0.9999996144574412, iteration: 105437
loss: 1.022504448890686,grad_norm: 0.9757918739902668, iteration: 105438
loss: 1.017029047012329,grad_norm: 0.9571477356490227, iteration: 105439
loss: 1.0042914152145386,grad_norm: 0.9999990893144455, iteration: 105440
loss: 1.0787206888198853,grad_norm: 0.8690745574152829, iteration: 105441
loss: 0.9883161783218384,grad_norm: 0.9999991047291344, iteration: 105442
loss: 0.9953365325927734,grad_norm: 0.9571840629668128, iteration: 105443
loss: 1.0079231262207031,grad_norm: 0.9999990630510334, iteration: 105444
loss: 1.015643835067749,grad_norm: 0.9379309605123602, iteration: 105445
loss: 0.9878653883934021,grad_norm: 0.9010689306647471, iteration: 105446
loss: 0.9945140480995178,grad_norm: 0.999999042536962, iteration: 105447
loss: 1.0188090801239014,grad_norm: 0.9999990762971191, iteration: 105448
loss: 1.0344456434249878,grad_norm: 0.999999189900481, iteration: 105449
loss: 1.0001078844070435,grad_norm: 0.999999085136105, iteration: 105450
loss: 1.0869730710983276,grad_norm: 0.9999997806977932, iteration: 105451
loss: 0.9574883580207825,grad_norm: 0.9999991397069454, iteration: 105452
loss: 1.0092010498046875,grad_norm: 0.9999991915013756, iteration: 105453
loss: 0.9962414503097534,grad_norm: 0.9999990734078587, iteration: 105454
loss: 0.9922960996627808,grad_norm: 0.9999990835249987, iteration: 105455
loss: 1.000064730644226,grad_norm: 0.9999990628399753, iteration: 105456
loss: 0.9734925031661987,grad_norm: 0.9999991623374566, iteration: 105457
loss: 1.036849021911621,grad_norm: 0.9999992796091759, iteration: 105458
loss: 1.006227731704712,grad_norm: 0.9999992992171335, iteration: 105459
loss: 1.025195837020874,grad_norm: 0.9999990802274533, iteration: 105460
loss: 1.0281935930252075,grad_norm: 0.9999991243834553, iteration: 105461
loss: 0.9828378558158875,grad_norm: 0.8919106579971268, iteration: 105462
loss: 1.0397529602050781,grad_norm: 0.9999997633864213, iteration: 105463
loss: 0.9973639845848083,grad_norm: 0.9999990648947082, iteration: 105464
loss: 0.9970991611480713,grad_norm: 0.9999991513629286, iteration: 105465
loss: 1.0082601308822632,grad_norm: 0.9999990179295932, iteration: 105466
loss: 0.9526650309562683,grad_norm: 0.9999991753868228, iteration: 105467
loss: 1.0052505731582642,grad_norm: 0.9620281553594888, iteration: 105468
loss: 0.9980816841125488,grad_norm: 0.999999929283772, iteration: 105469
loss: 1.0387706756591797,grad_norm: 0.9684531872237101, iteration: 105470
loss: 0.9895570874214172,grad_norm: 0.9999990260880945, iteration: 105471
loss: 1.026546835899353,grad_norm: 0.9999992385199095, iteration: 105472
loss: 1.015990138053894,grad_norm: 0.9999991332676605, iteration: 105473
loss: 0.9872205853462219,grad_norm: 0.9999989750609313, iteration: 105474
loss: 0.9711814522743225,grad_norm: 0.9999992536857721, iteration: 105475
loss: 1.0010895729064941,grad_norm: 0.9999990691486143, iteration: 105476
loss: 1.0023384094238281,grad_norm: 0.9999990576524963, iteration: 105477
loss: 0.9699125289916992,grad_norm: 0.9999991779037749, iteration: 105478
loss: 1.0444759130477905,grad_norm: 0.9999996373182339, iteration: 105479
loss: 1.0115087032318115,grad_norm: 0.9060406168199208, iteration: 105480
loss: 1.0242708921432495,grad_norm: 0.99999956770532, iteration: 105481
loss: 0.9902232885360718,grad_norm: 0.8946036764449977, iteration: 105482
loss: 1.0021870136260986,grad_norm: 0.9633914556787782, iteration: 105483
loss: 0.9792109131813049,grad_norm: 0.9879056275635303, iteration: 105484
loss: 1.0148736238479614,grad_norm: 0.9192081696626422, iteration: 105485
loss: 0.9997193813323975,grad_norm: 0.9999991551583406, iteration: 105486
loss: 0.9725710153579712,grad_norm: 0.9999991746005447, iteration: 105487
loss: 0.9693213701248169,grad_norm: 0.9109553979716825, iteration: 105488
loss: 0.9882468581199646,grad_norm: 0.999999361049689, iteration: 105489
loss: 0.9947363138198853,grad_norm: 0.9999991877480084, iteration: 105490
loss: 1.0368250608444214,grad_norm: 0.9999991521981103, iteration: 105491
loss: 1.0025302171707153,grad_norm: 0.9999991073696749, iteration: 105492
loss: 1.0032246112823486,grad_norm: 0.9254029553578386, iteration: 105493
loss: 0.9898822903633118,grad_norm: 0.9999992230742978, iteration: 105494
loss: 0.9924468398094177,grad_norm: 0.9999989577152015, iteration: 105495
loss: 1.023876428604126,grad_norm: 0.9999991119987975, iteration: 105496
loss: 1.0002758502960205,grad_norm: 0.9364370070328802, iteration: 105497
loss: 0.9670584201812744,grad_norm: 0.824018172016435, iteration: 105498
loss: 1.01568603515625,grad_norm: 0.9999991381494213, iteration: 105499
loss: 0.9827755689620972,grad_norm: 0.956811474697517, iteration: 105500
loss: 1.0368772745132446,grad_norm: 0.9999998721266173, iteration: 105501
loss: 0.9978066086769104,grad_norm: 0.9999991171757128, iteration: 105502
loss: 1.005585789680481,grad_norm: 0.9901523615255161, iteration: 105503
loss: 1.0149474143981934,grad_norm: 0.9999993087126052, iteration: 105504
loss: 1.0016967058181763,grad_norm: 0.845535596170356, iteration: 105505
loss: 1.01662278175354,grad_norm: 0.920732126343002, iteration: 105506
loss: 1.0181596279144287,grad_norm: 0.9069850372981789, iteration: 105507
loss: 0.9924332499504089,grad_norm: 0.9999993541738404, iteration: 105508
loss: 1.0020750761032104,grad_norm: 0.9999990820193265, iteration: 105509
loss: 1.0327297449111938,grad_norm: 0.9999992050264414, iteration: 105510
loss: 0.9729421734809875,grad_norm: 0.999999044020152, iteration: 105511
loss: 0.9989316463470459,grad_norm: 0.9999989546130663, iteration: 105512
loss: 1.0132359266281128,grad_norm: 0.9999991091221148, iteration: 105513
loss: 1.0361855030059814,grad_norm: 0.999999128935099, iteration: 105514
loss: 1.0037050247192383,grad_norm: 0.9999992381162848, iteration: 105515
loss: 1.0285463333129883,grad_norm: 0.9999991230455546, iteration: 105516
loss: 1.015294075012207,grad_norm: 0.9912784125668688, iteration: 105517
loss: 0.9913267493247986,grad_norm: 0.9853820187930773, iteration: 105518
loss: 1.0009909868240356,grad_norm: 0.9999991192931138, iteration: 105519
loss: 0.9837009906768799,grad_norm: 0.9999991122732574, iteration: 105520
loss: 0.9258595108985901,grad_norm: 0.9999992509888588, iteration: 105521
loss: 0.984738826751709,grad_norm: 0.8486502003224498, iteration: 105522
loss: 1.0065900087356567,grad_norm: 0.9999991631307195, iteration: 105523
loss: 0.9677123427391052,grad_norm: 0.9999990871664259, iteration: 105524
loss: 0.9863266348838806,grad_norm: 0.8477936420221099, iteration: 105525
loss: 1.0394580364227295,grad_norm: 0.9370451106773918, iteration: 105526
loss: 0.9917292594909668,grad_norm: 0.9999992359592433, iteration: 105527
loss: 1.004592776298523,grad_norm: 0.8805916597175854, iteration: 105528
loss: 0.9739911556243896,grad_norm: 0.9999991883629422, iteration: 105529
loss: 0.9807300567626953,grad_norm: 0.9999994093408222, iteration: 105530
loss: 1.0018404722213745,grad_norm: 0.9999990441677741, iteration: 105531
loss: 1.0164614915847778,grad_norm: 0.9999991772014656, iteration: 105532
loss: 0.9919275045394897,grad_norm: 0.9999991149169987, iteration: 105533
loss: 1.0006176233291626,grad_norm: 0.9999989952768193, iteration: 105534
loss: 0.9891409277915955,grad_norm: 0.9286835254973964, iteration: 105535
loss: 0.9729260802268982,grad_norm: 0.950903118005869, iteration: 105536
loss: 1.0043786764144897,grad_norm: 0.99999905756967, iteration: 105537
loss: 0.973787248134613,grad_norm: 0.9999992076045707, iteration: 105538
loss: 1.0173261165618896,grad_norm: 0.9744038549774567, iteration: 105539
loss: 1.0449177026748657,grad_norm: 0.9999990637593145, iteration: 105540
loss: 0.9908420443534851,grad_norm: 0.9999990987695639, iteration: 105541
loss: 0.9946684837341309,grad_norm: 0.9999990903173788, iteration: 105542
loss: 0.9699739217758179,grad_norm: 0.9999989909194866, iteration: 105543
loss: 1.0246927738189697,grad_norm: 0.9999991743901807, iteration: 105544
loss: 1.0069289207458496,grad_norm: 0.998161950277485, iteration: 105545
loss: 0.9496588706970215,grad_norm: 0.9999992419088939, iteration: 105546
loss: 1.0156289339065552,grad_norm: 0.9070095477303136, iteration: 105547
loss: 1.1097975969314575,grad_norm: 0.9999994757510334, iteration: 105548
loss: 0.9866622090339661,grad_norm: 0.9999991405864459, iteration: 105549
loss: 0.983222246170044,grad_norm: 0.9999992225855571, iteration: 105550
loss: 0.9906177520751953,grad_norm: 0.9999992009401079, iteration: 105551
loss: 0.9866104125976562,grad_norm: 0.8912356975734841, iteration: 105552
loss: 0.9902254939079285,grad_norm: 0.9999990162694675, iteration: 105553
loss: 1.011553406715393,grad_norm: 0.9999991095787758, iteration: 105554
loss: 1.0821115970611572,grad_norm: 0.9999991543759443, iteration: 105555
loss: 1.0048152208328247,grad_norm: 0.9541088143289879, iteration: 105556
loss: 1.0284961462020874,grad_norm: 0.9999990739321248, iteration: 105557
loss: 0.9815866351127625,grad_norm: 0.9999991438572969, iteration: 105558
loss: 1.0158206224441528,grad_norm: 0.9999991987514665, iteration: 105559
loss: 1.0156587362289429,grad_norm: 0.8887179663613644, iteration: 105560
loss: 0.9744115471839905,grad_norm: 0.9999991369018042, iteration: 105561
loss: 1.0011967420578003,grad_norm: 0.9999991762285075, iteration: 105562
loss: 1.0007394552230835,grad_norm: 0.9999990589160573, iteration: 105563
loss: 0.9901741147041321,grad_norm: 0.9999989215396416, iteration: 105564
loss: 1.0000739097595215,grad_norm: 0.9999992151494741, iteration: 105565
loss: 0.9949809908866882,grad_norm: 0.955058072778584, iteration: 105566
loss: 1.0342566967010498,grad_norm: 0.9999991913968683, iteration: 105567
loss: 1.0012118816375732,grad_norm: 0.9999991833906764, iteration: 105568
loss: 0.9936152100563049,grad_norm: 0.9999991406905746, iteration: 105569
loss: 0.9931975603103638,grad_norm: 0.9580222045179534, iteration: 105570
loss: 1.0098345279693604,grad_norm: 0.9388662185148824, iteration: 105571
loss: 1.0004520416259766,grad_norm: 0.9999990562713937, iteration: 105572
loss: 0.9676364660263062,grad_norm: 0.9999990409882138, iteration: 105573
loss: 0.9785282611846924,grad_norm: 0.9999992450213124, iteration: 105574
loss: 0.999975860118866,grad_norm: 0.9999988600769618, iteration: 105575
loss: 0.9444222450256348,grad_norm: 0.9999991636543046, iteration: 105576
loss: 0.9818974733352661,grad_norm: 0.9228727120209426, iteration: 105577
loss: 1.034104824066162,grad_norm: 0.9069438662161164, iteration: 105578
loss: 1.01459538936615,grad_norm: 0.9999991354508413, iteration: 105579
loss: 0.9772871136665344,grad_norm: 0.9420253880974078, iteration: 105580
loss: 0.9737507104873657,grad_norm: 0.9999990681527099, iteration: 105581
loss: 0.9617049098014832,grad_norm: 0.9999992713040862, iteration: 105582
loss: 1.0074280500411987,grad_norm: 0.999999054588002, iteration: 105583
loss: 0.9972137212753296,grad_norm: 0.9149738189243615, iteration: 105584
loss: 0.9961453080177307,grad_norm: 0.9999989876622368, iteration: 105585
loss: 0.9638824462890625,grad_norm: 0.9467385049197314, iteration: 105586
loss: 1.005603313446045,grad_norm: 0.9999991234014238, iteration: 105587
loss: 1.008695125579834,grad_norm: 0.9266030171431685, iteration: 105588
loss: 0.9858812093734741,grad_norm: 0.9999992626198909, iteration: 105589
loss: 0.987799346446991,grad_norm: 0.9999989654570669, iteration: 105590
loss: 1.0321037769317627,grad_norm: 0.9999991047606737, iteration: 105591
loss: 0.9917890429496765,grad_norm: 0.9691083661151347, iteration: 105592
loss: 0.9606339931488037,grad_norm: 0.9922033205301042, iteration: 105593
loss: 1.0019034147262573,grad_norm: 0.9999992600468893, iteration: 105594
loss: 1.0040699243545532,grad_norm: 0.9999990261675065, iteration: 105595
loss: 1.01077401638031,grad_norm: 0.999999188368254, iteration: 105596
loss: 1.0127310752868652,grad_norm: 0.8565061292867893, iteration: 105597
loss: 0.9832913875579834,grad_norm: 0.9609031778202989, iteration: 105598
loss: 1.0071214437484741,grad_norm: 0.9880000844369664, iteration: 105599
loss: 0.9903717041015625,grad_norm: 0.9999990465880014, iteration: 105600
loss: 1.0329638719558716,grad_norm: 0.9999993003970777, iteration: 105601
loss: 0.995259702205658,grad_norm: 0.9999991469882543, iteration: 105602
loss: 0.9935053586959839,grad_norm: 0.9409980577049522, iteration: 105603
loss: 1.022057294845581,grad_norm: 0.9999992348259931, iteration: 105604
loss: 0.9817792773246765,grad_norm: 0.9999991972164719, iteration: 105605
loss: 0.9773290753364563,grad_norm: 0.9646154831320822, iteration: 105606
loss: 0.9813912510871887,grad_norm: 0.9555856922921944, iteration: 105607
loss: 0.9848878979682922,grad_norm: 0.9936134769462841, iteration: 105608
loss: 1.0328067541122437,grad_norm: 0.9999991008760625, iteration: 105609
loss: 1.0344005823135376,grad_norm: 0.9999994174419881, iteration: 105610
loss: 0.9777149558067322,grad_norm: 0.9999990332892752, iteration: 105611
loss: 0.968336820602417,grad_norm: 0.8951697299268372, iteration: 105612
loss: 1.0020819902420044,grad_norm: 0.9631949150556312, iteration: 105613
loss: 0.9895609617233276,grad_norm: 0.9999990681600154, iteration: 105614
loss: 0.9794652462005615,grad_norm: 0.9999989696288221, iteration: 105615
loss: 0.9518032073974609,grad_norm: 0.999999058927471, iteration: 105616
loss: 0.9971517324447632,grad_norm: 0.9999993495732494, iteration: 105617
loss: 0.961261510848999,grad_norm: 0.8736098823930317, iteration: 105618
loss: 1.0057660341262817,grad_norm: 0.9437988224489371, iteration: 105619
loss: 0.960955798625946,grad_norm: 0.9999991398019307, iteration: 105620
loss: 1.1046833992004395,grad_norm: 0.9999992290137127, iteration: 105621
loss: 1.0110900402069092,grad_norm: 0.9999992821964192, iteration: 105622
loss: 1.0167641639709473,grad_norm: 0.9999991041468388, iteration: 105623
loss: 1.0019049644470215,grad_norm: 0.9756367518486325, iteration: 105624
loss: 1.0112130641937256,grad_norm: 0.9078632429647479, iteration: 105625
loss: 0.9797589182853699,grad_norm: 0.9999999012614917, iteration: 105626
loss: 1.017028570175171,grad_norm: 0.9999992187259806, iteration: 105627
loss: 1.007511854171753,grad_norm: 0.9999994754003615, iteration: 105628
loss: 0.9968176484107971,grad_norm: 0.9999989926819229, iteration: 105629
loss: 0.9997185468673706,grad_norm: 0.999999199683217, iteration: 105630
loss: 0.9655295014381409,grad_norm: 0.9667363947803927, iteration: 105631
loss: 1.0412204265594482,grad_norm: 0.9999991765123467, iteration: 105632
loss: 1.0200474262237549,grad_norm: 0.908461569808288, iteration: 105633
loss: 1.004473090171814,grad_norm: 0.9261665441980632, iteration: 105634
loss: 1.0035126209259033,grad_norm: 0.9119608776579505, iteration: 105635
loss: 1.0313581228256226,grad_norm: 0.9999991069074231, iteration: 105636
loss: 1.0034505128860474,grad_norm: 0.943133402976402, iteration: 105637
loss: 1.0371956825256348,grad_norm: 0.9729198556901908, iteration: 105638
loss: 0.9719641804695129,grad_norm: 0.9999991392018059, iteration: 105639
loss: 0.9840675592422485,grad_norm: 0.9999991016460658, iteration: 105640
loss: 0.9512263536453247,grad_norm: 0.9999992995840149, iteration: 105641
loss: 1.004101276397705,grad_norm: 0.9999991340935117, iteration: 105642
loss: 0.9902130365371704,grad_norm: 0.8495302084011811, iteration: 105643
loss: 1.0254241228103638,grad_norm: 0.999999189250625, iteration: 105644
loss: 1.0030597448349,grad_norm: 0.9999990105231807, iteration: 105645
loss: 1.0239697694778442,grad_norm: 0.966183635113376, iteration: 105646
loss: 1.0163589715957642,grad_norm: 0.9901112125387957, iteration: 105647
loss: 1.020987868309021,grad_norm: 0.934931171842612, iteration: 105648
loss: 0.9980675578117371,grad_norm: 0.9533477574913966, iteration: 105649
loss: 0.9836602210998535,grad_norm: 0.9090322379580739, iteration: 105650
loss: 1.0488553047180176,grad_norm: 0.9999991160967752, iteration: 105651
loss: 1.0005205869674683,grad_norm: 0.8192188527627382, iteration: 105652
loss: 1.012837290763855,grad_norm: 0.9994091256335514, iteration: 105653
loss: 1.0107121467590332,grad_norm: 0.9999990913233785, iteration: 105654
loss: 0.9668256044387817,grad_norm: 0.9745660355789554, iteration: 105655
loss: 0.9975627064704895,grad_norm: 0.9999989541335831, iteration: 105656
loss: 0.9846435785293579,grad_norm: 0.9356382260793125, iteration: 105657
loss: 1.0252145528793335,grad_norm: 0.9999993646947737, iteration: 105658
loss: 0.9828715920448303,grad_norm: 0.9999991524715848, iteration: 105659
loss: 1.0202997922897339,grad_norm: 0.9999991845773172, iteration: 105660
loss: 1.0281540155410767,grad_norm: 0.9999993915215143, iteration: 105661
loss: 1.0242767333984375,grad_norm: 0.8134791001752049, iteration: 105662
loss: 1.0031172037124634,grad_norm: 0.9999990062480042, iteration: 105663
loss: 0.9652032852172852,grad_norm: 0.9602340490003345, iteration: 105664
loss: 0.9814533591270447,grad_norm: 0.9999989991774612, iteration: 105665
loss: 0.9866532683372498,grad_norm: 0.9999991552736118, iteration: 105666
loss: 1.0152169466018677,grad_norm: 0.8243072273188834, iteration: 105667
loss: 0.9982433319091797,grad_norm: 0.9999991365842907, iteration: 105668
loss: 0.9767777919769287,grad_norm: 0.9999991682266393, iteration: 105669
loss: 0.9658493399620056,grad_norm: 0.9873653291806554, iteration: 105670
loss: 0.9600232243537903,grad_norm: 0.9092616839143106, iteration: 105671
loss: 0.9926740527153015,grad_norm: 0.8504861264714735, iteration: 105672
loss: 0.9770179986953735,grad_norm: 0.8316524313586203, iteration: 105673
loss: 1.0189290046691895,grad_norm: 0.9999992467957831, iteration: 105674
loss: 0.9814881086349487,grad_norm: 0.9827960233811971, iteration: 105675
loss: 1.0165410041809082,grad_norm: 0.9256668635032815, iteration: 105676
loss: 0.9952329397201538,grad_norm: 0.8930584820879759, iteration: 105677
loss: 0.9973512887954712,grad_norm: 0.9999991431938162, iteration: 105678
loss: 0.9834346771240234,grad_norm: 0.9864529405304095, iteration: 105679
loss: 1.0570060014724731,grad_norm: 0.8144181034990583, iteration: 105680
loss: 0.999599277973175,grad_norm: 0.9999989313385251, iteration: 105681
loss: 0.9796387553215027,grad_norm: 0.9999989703525314, iteration: 105682
loss: 0.9946839809417725,grad_norm: 0.9999992209970782, iteration: 105683
loss: 0.9874997138977051,grad_norm: 0.9999989894626612, iteration: 105684
loss: 0.9715180993080139,grad_norm: 0.9999991531741474, iteration: 105685
loss: 0.9718047380447388,grad_norm: 0.9999991251131546, iteration: 105686
loss: 1.0506075620651245,grad_norm: 0.9999991375221141, iteration: 105687
loss: 0.995232343673706,grad_norm: 0.9999992926335761, iteration: 105688
loss: 1.0059640407562256,grad_norm: 0.999999032241248, iteration: 105689
loss: 0.9956778287887573,grad_norm: 0.9999992370418852, iteration: 105690
loss: 1.0005754232406616,grad_norm: 0.9886663895985452, iteration: 105691
loss: 0.9955124258995056,grad_norm: 0.9999991749883306, iteration: 105692
loss: 1.0101522207260132,grad_norm: 0.9202234332372481, iteration: 105693
loss: 0.9908297657966614,grad_norm: 0.9999990264512676, iteration: 105694
loss: 1.0209344625473022,grad_norm: 0.9620820549069088, iteration: 105695
loss: 1.0182273387908936,grad_norm: 0.9999992485218391, iteration: 105696
loss: 0.9793437719345093,grad_norm: 0.9885974373664969, iteration: 105697
loss: 0.9664463400840759,grad_norm: 0.9999992401075656, iteration: 105698
loss: 0.9762496948242188,grad_norm: 0.9999991976127439, iteration: 105699
loss: 1.000240683555603,grad_norm: 0.9999990860625833, iteration: 105700
loss: 0.9712029695510864,grad_norm: 0.989677283999204, iteration: 105701
loss: 1.0004379749298096,grad_norm: 0.9999990338664951, iteration: 105702
loss: 0.9810733199119568,grad_norm: 0.9999989934674339, iteration: 105703
loss: 1.0070966482162476,grad_norm: 0.9999990564625839, iteration: 105704
loss: 0.9871898889541626,grad_norm: 0.9999989710542208, iteration: 105705
loss: 1.0151326656341553,grad_norm: 0.9999992445716216, iteration: 105706
loss: 0.9857210516929626,grad_norm: 0.9999990108564142, iteration: 105707
loss: 0.9487723708152771,grad_norm: 0.9999990046239965, iteration: 105708
loss: 0.9723047614097595,grad_norm: 0.9999991449895307, iteration: 105709
loss: 0.9990283846855164,grad_norm: 0.9999991444414709, iteration: 105710
loss: 0.9512642025947571,grad_norm: 0.9999992588605319, iteration: 105711
loss: 1.0086256265640259,grad_norm: 0.9999990578818162, iteration: 105712
loss: 0.9986045956611633,grad_norm: 0.9999996594706966, iteration: 105713
loss: 0.9684579372406006,grad_norm: 0.9478194240887753, iteration: 105714
loss: 0.9754777550697327,grad_norm: 0.9999992328900298, iteration: 105715
loss: 0.9814376831054688,grad_norm: 0.9362126464122363, iteration: 105716
loss: 0.963553786277771,grad_norm: 0.999999093143069, iteration: 105717
loss: 0.9889816045761108,grad_norm: 0.9999990848285785, iteration: 105718
loss: 0.9733198285102844,grad_norm: 0.9999989788980554, iteration: 105719
loss: 1.0220489501953125,grad_norm: 0.924436392037688, iteration: 105720
loss: 1.0180575847625732,grad_norm: 0.9999993102228715, iteration: 105721
loss: 0.9746444225311279,grad_norm: 0.9999991502382924, iteration: 105722
loss: 1.0248680114746094,grad_norm: 0.9999992494970539, iteration: 105723
loss: 1.0111701488494873,grad_norm: 0.9999992119216674, iteration: 105724
loss: 1.0099296569824219,grad_norm: 0.8542644736241568, iteration: 105725
loss: 1.0246162414550781,grad_norm: 0.9999991867155127, iteration: 105726
loss: 1.0091156959533691,grad_norm: 0.9999998283268569, iteration: 105727
loss: 1.01767098903656,grad_norm: 0.9995402151172047, iteration: 105728
loss: 0.9638363122940063,grad_norm: 0.9697503250281683, iteration: 105729
loss: 0.9876375794410706,grad_norm: 0.919034553687537, iteration: 105730
loss: 0.9913330674171448,grad_norm: 0.999999172677343, iteration: 105731
loss: 0.9711622595787048,grad_norm: 0.9999992264506948, iteration: 105732
loss: 1.0455676317214966,grad_norm: 0.9692940177488081, iteration: 105733
loss: 0.998752236366272,grad_norm: 0.9999995274364684, iteration: 105734
loss: 1.09669828414917,grad_norm: 0.9999997351341315, iteration: 105735
loss: 1.0484083890914917,grad_norm: 0.9999990916057829, iteration: 105736
loss: 1.0107686519622803,grad_norm: 0.9999990891912395, iteration: 105737
loss: 1.0223939418792725,grad_norm: 0.9567099336088051, iteration: 105738
loss: 1.0256515741348267,grad_norm: 0.9999992753634274, iteration: 105739
loss: 0.9913452863693237,grad_norm: 0.9694614515707548, iteration: 105740
loss: 0.9793104529380798,grad_norm: 0.9999990767657927, iteration: 105741
loss: 1.0010604858398438,grad_norm: 0.9324747676372657, iteration: 105742
loss: 0.9812352061271667,grad_norm: 0.9919087040858517, iteration: 105743
loss: 0.9639006853103638,grad_norm: 0.9999991373032373, iteration: 105744
loss: 1.0043507814407349,grad_norm: 0.9999989942839381, iteration: 105745
loss: 0.9793927073478699,grad_norm: 0.9999991430197361, iteration: 105746
loss: 1.0226906538009644,grad_norm: 0.9999991707255639, iteration: 105747
loss: 1.0203306674957275,grad_norm: 0.9433043240572486, iteration: 105748
loss: 1.0048127174377441,grad_norm: 0.9258946194446814, iteration: 105749
loss: 0.9979783892631531,grad_norm: 0.9999992300128534, iteration: 105750
loss: 0.9890389442443848,grad_norm: 0.9121468306469723, iteration: 105751
loss: 0.9986568689346313,grad_norm: 0.9999991088718403, iteration: 105752
loss: 1.075065016746521,grad_norm: 0.9999990256659239, iteration: 105753
loss: 0.9917856454849243,grad_norm: 0.9999991213027878, iteration: 105754
loss: 0.9982737898826599,grad_norm: 0.9360113886543352, iteration: 105755
loss: 0.9990692734718323,grad_norm: 0.9999990699555414, iteration: 105756
loss: 0.991725742816925,grad_norm: 0.99999919654616, iteration: 105757
loss: 1.0036426782608032,grad_norm: 0.999999165568696, iteration: 105758
loss: 1.0282825231552124,grad_norm: 0.9999991654938786, iteration: 105759
loss: 0.9815636277198792,grad_norm: 0.9999990958868631, iteration: 105760
loss: 0.962556779384613,grad_norm: 0.9999991534895616, iteration: 105761
loss: 1.0127686262130737,grad_norm: 0.999999040266756, iteration: 105762
loss: 1.0175340175628662,grad_norm: 0.9999992774557899, iteration: 105763
loss: 0.997902512550354,grad_norm: 0.976275055749645, iteration: 105764
loss: 1.021660327911377,grad_norm: 0.9999992755775989, iteration: 105765
loss: 1.042922019958496,grad_norm: 0.9999992105096116, iteration: 105766
loss: 0.9911645650863647,grad_norm: 0.9999990857152714, iteration: 105767
loss: 1.0096038579940796,grad_norm: 0.9171068476204769, iteration: 105768
loss: 1.0323864221572876,grad_norm: 0.999999073336863, iteration: 105769
loss: 0.9814412593841553,grad_norm: 0.8995307200973945, iteration: 105770
loss: 1.0139036178588867,grad_norm: 0.8037498492579795, iteration: 105771
loss: 0.9512225389480591,grad_norm: 0.9999991673843694, iteration: 105772
loss: 1.0124480724334717,grad_norm: 0.9999994091511571, iteration: 105773
loss: 0.9775021076202393,grad_norm: 0.9999991134554006, iteration: 105774
loss: 1.0271128416061401,grad_norm: 0.9999991083183091, iteration: 105775
loss: 0.9921226501464844,grad_norm: 0.9900038873568365, iteration: 105776
loss: 1.0266276597976685,grad_norm: 0.9999992527806971, iteration: 105777
loss: 1.0011215209960938,grad_norm: 0.9999994410627933, iteration: 105778
loss: 1.0055581331253052,grad_norm: 0.9999994393976696, iteration: 105779
loss: 1.0176302194595337,grad_norm: 0.9761938326206565, iteration: 105780
loss: 0.9927588701248169,grad_norm: 0.9999992050184296, iteration: 105781
loss: 0.9776976704597473,grad_norm: 0.8440006913275354, iteration: 105782
loss: 0.9948395490646362,grad_norm: 0.9364767908868121, iteration: 105783
loss: 1.0124045610427856,grad_norm: 0.9999992947561132, iteration: 105784
loss: 0.9958417415618896,grad_norm: 0.9999990990658434, iteration: 105785
loss: 1.0200328826904297,grad_norm: 0.9999990941729945, iteration: 105786
loss: 1.0004140138626099,grad_norm: 0.9999990680893851, iteration: 105787
loss: 1.0307296514511108,grad_norm: 0.8981504852407455, iteration: 105788
loss: 1.0146201848983765,grad_norm: 0.9999991771323666, iteration: 105789
loss: 0.9944918155670166,grad_norm: 0.9947785544110218, iteration: 105790
loss: 0.978722333908081,grad_norm: 0.8698015968216417, iteration: 105791
loss: 1.0262254476547241,grad_norm: 0.999999213191607, iteration: 105792
loss: 1.0075644254684448,grad_norm: 0.9357847412689384, iteration: 105793
loss: 1.019810438156128,grad_norm: 0.9999992046067576, iteration: 105794
loss: 0.9865186810493469,grad_norm: 0.9816664633972448, iteration: 105795
loss: 1.0069355964660645,grad_norm: 0.9999993600501875, iteration: 105796
loss: 0.9988909959793091,grad_norm: 0.9999991838276661, iteration: 105797
loss: 1.0155694484710693,grad_norm: 0.9999990472827842, iteration: 105798
loss: 1.0676559209823608,grad_norm: 0.9872550084328828, iteration: 105799
loss: 1.036232829093933,grad_norm: 0.9999991451477223, iteration: 105800
loss: 0.9907904863357544,grad_norm: 0.8576779924342635, iteration: 105801
loss: 1.000544548034668,grad_norm: 0.9999990907772978, iteration: 105802
loss: 0.9968410134315491,grad_norm: 0.9999991904251644, iteration: 105803
loss: 1.0550198554992676,grad_norm: 0.9999991163493439, iteration: 105804
loss: 1.0023072957992554,grad_norm: 0.9999991365466671, iteration: 105805
loss: 0.9829306602478027,grad_norm: 0.9999990601182137, iteration: 105806
loss: 0.9823803901672363,grad_norm: 0.9137084283533005, iteration: 105807
loss: 1.0150717496871948,grad_norm: 0.9091813409106104, iteration: 105808
loss: 1.0005336999893188,grad_norm: 0.9999995088325259, iteration: 105809
loss: 0.9797308444976807,grad_norm: 0.999999285893684, iteration: 105810
loss: 0.9878109097480774,grad_norm: 0.9967917021418241, iteration: 105811
loss: 1.062983751296997,grad_norm: 0.999999831010448, iteration: 105812
loss: 1.0113292932510376,grad_norm: 0.9999990488080621, iteration: 105813
loss: 0.969299852848053,grad_norm: 0.9999990833423095, iteration: 105814
loss: 0.9991313815116882,grad_norm: 0.9999990644188328, iteration: 105815
loss: 0.9763830304145813,grad_norm: 0.9999992645032049, iteration: 105816
loss: 0.9810351729393005,grad_norm: 0.999999128586785, iteration: 105817
loss: 1.063533067703247,grad_norm: 0.9999995239291551, iteration: 105818
loss: 0.9599083662033081,grad_norm: 0.9999990981096122, iteration: 105819
loss: 1.0409064292907715,grad_norm: 0.9999991569399556, iteration: 105820
loss: 1.0339654684066772,grad_norm: 0.999999200486336, iteration: 105821
loss: 0.9815118312835693,grad_norm: 0.8464513542041437, iteration: 105822
loss: 1.015660285949707,grad_norm: 0.9882854406346271, iteration: 105823
loss: 0.9506640434265137,grad_norm: 0.9999989510144651, iteration: 105824
loss: 1.0302213430404663,grad_norm: 0.9999992020843298, iteration: 105825
loss: 0.9676420092582703,grad_norm: 0.9999992521063501, iteration: 105826
loss: 1.0283863544464111,grad_norm: 0.9999991520527423, iteration: 105827
loss: 1.0155665874481201,grad_norm: 0.9309835668232065, iteration: 105828
loss: 1.0100412368774414,grad_norm: 0.9999990885132554, iteration: 105829
loss: 1.0316627025604248,grad_norm: 0.9999993181936033, iteration: 105830
loss: 1.0205227136611938,grad_norm: 0.9999993103571857, iteration: 105831
loss: 0.9806159734725952,grad_norm: 0.9999990328700775, iteration: 105832
loss: 1.0085208415985107,grad_norm: 0.9999990442806107, iteration: 105833
loss: 1.0010371208190918,grad_norm: 0.9999989991805251, iteration: 105834
loss: 1.0193400382995605,grad_norm: 0.9999992782698852, iteration: 105835
loss: 1.0009253025054932,grad_norm: 0.9999993736922365, iteration: 105836
loss: 1.0469956398010254,grad_norm: 0.9999992632741185, iteration: 105837
loss: 1.0125131607055664,grad_norm: 0.9026230293288314, iteration: 105838
loss: 0.981323778629303,grad_norm: 0.9999989997037922, iteration: 105839
loss: 0.9855799674987793,grad_norm: 0.849661953226791, iteration: 105840
loss: 0.9999357461929321,grad_norm: 0.9999993231852986, iteration: 105841
loss: 0.9893119931221008,grad_norm: 0.9999990917958029, iteration: 105842
loss: 1.0211243629455566,grad_norm: 0.9999991941655579, iteration: 105843
loss: 0.9994667768478394,grad_norm: 0.9999990427153157, iteration: 105844
loss: 0.9725631475448608,grad_norm: 0.999999265428599, iteration: 105845
loss: 0.9842381477355957,grad_norm: 0.9999989783538337, iteration: 105846
loss: 0.9809509515762329,grad_norm: 0.9999992261964038, iteration: 105847
loss: 1.001067042350769,grad_norm: 0.9999992188741185, iteration: 105848
loss: 0.9653478264808655,grad_norm: 0.9999991400966558, iteration: 105849
loss: 1.021010160446167,grad_norm: 0.9999990447747474, iteration: 105850
loss: 0.9888324737548828,grad_norm: 0.8953869493989457, iteration: 105851
loss: 1.0111603736877441,grad_norm: 0.999999383437603, iteration: 105852
loss: 0.9975094795227051,grad_norm: 0.9529469101302274, iteration: 105853
loss: 0.9520584940910339,grad_norm: 0.9792543209123541, iteration: 105854
loss: 1.0104758739471436,grad_norm: 0.8991475238127804, iteration: 105855
loss: 1.0241416692733765,grad_norm: 0.9811942501045442, iteration: 105856
loss: 1.0203548669815063,grad_norm: 0.9999992174976449, iteration: 105857
loss: 0.9643909335136414,grad_norm: 0.9999991304572188, iteration: 105858
loss: 0.9821605086326599,grad_norm: 0.9999992917614929, iteration: 105859
loss: 0.9880800247192383,grad_norm: 0.9999990544416172, iteration: 105860
loss: 1.0172797441482544,grad_norm: 0.9999992079285774, iteration: 105861
loss: 0.981977641582489,grad_norm: 0.9999992041490029, iteration: 105862
loss: 1.0325990915298462,grad_norm: 0.9999992193377697, iteration: 105863
loss: 0.9890283942222595,grad_norm: 0.9999997723649158, iteration: 105864
loss: 1.0139191150665283,grad_norm: 0.9356952672538549, iteration: 105865
loss: 1.0284072160720825,grad_norm: 0.9706674425932451, iteration: 105866
loss: 0.9806797504425049,grad_norm: 0.9999992457674028, iteration: 105867
loss: 0.9985613226890564,grad_norm: 0.9999990934924208, iteration: 105868
loss: 1.0104520320892334,grad_norm: 0.9999990814035378, iteration: 105869
loss: 0.9706611633300781,grad_norm: 0.9560445400392225, iteration: 105870
loss: 1.0017671585083008,grad_norm: 0.9806142647884188, iteration: 105871
loss: 1.0219908952713013,grad_norm: 0.999999172277219, iteration: 105872
loss: 1.0101767778396606,grad_norm: 0.9707998075141164, iteration: 105873
loss: 1.014998197555542,grad_norm: 0.9999989987124549, iteration: 105874
loss: 0.9494448900222778,grad_norm: 0.9999990149391357, iteration: 105875
loss: 0.9935201406478882,grad_norm: 0.9999993191440145, iteration: 105876
loss: 1.007026195526123,grad_norm: 0.9887718214577769, iteration: 105877
loss: 0.9924332499504089,grad_norm: 0.9999992409195483, iteration: 105878
loss: 1.0522117614746094,grad_norm: 0.9999992104945786, iteration: 105879
loss: 1.031628966331482,grad_norm: 0.9999992118890217, iteration: 105880
loss: 1.0019803047180176,grad_norm: 0.9971882380589494, iteration: 105881
loss: 1.0531877279281616,grad_norm: 0.999999165764704, iteration: 105882
loss: 0.997241735458374,grad_norm: 0.8884109637132516, iteration: 105883
loss: 1.0107835531234741,grad_norm: 0.9199541496378005, iteration: 105884
loss: 1.017421007156372,grad_norm: 0.7965365742480305, iteration: 105885
loss: 0.9901787042617798,grad_norm: 0.9999993455327063, iteration: 105886
loss: 1.0281713008880615,grad_norm: 0.999999403559432, iteration: 105887
loss: 0.9998437166213989,grad_norm: 0.9573090602855214, iteration: 105888
loss: 0.9901310801506042,grad_norm: 0.999999606314292, iteration: 105889
loss: 1.017470359802246,grad_norm: 0.8819538154401813, iteration: 105890
loss: 1.0270991325378418,grad_norm: 0.9999991171056635, iteration: 105891
loss: 1.0041630268096924,grad_norm: 0.9999991085636029, iteration: 105892
loss: 0.9825385212898254,grad_norm: 0.9999992882828167, iteration: 105893
loss: 0.9687522053718567,grad_norm: 0.9999990443347483, iteration: 105894
loss: 0.9778632521629333,grad_norm: 0.9706792958093116, iteration: 105895
loss: 0.9952592849731445,grad_norm: 0.9804310606482574, iteration: 105896
loss: 1.0493190288543701,grad_norm: 0.9397607196494446, iteration: 105897
loss: 1.0451723337173462,grad_norm: 0.9999991168063299, iteration: 105898
loss: 0.9916331171989441,grad_norm: 0.9999990505584411, iteration: 105899
loss: 1.0137189626693726,grad_norm: 0.9999991513501573, iteration: 105900
loss: 0.9595465660095215,grad_norm: 0.9999989445354207, iteration: 105901
loss: 1.0009866952896118,grad_norm: 0.9243362061817404, iteration: 105902
loss: 0.9621953368186951,grad_norm: 0.9686649818523188, iteration: 105903
loss: 1.011116862297058,grad_norm: 0.999999163535686, iteration: 105904
loss: 0.9737521409988403,grad_norm: 0.9170014427226212, iteration: 105905
loss: 0.9863256812095642,grad_norm: 0.9999991573368955, iteration: 105906
loss: 1.0102348327636719,grad_norm: 0.9369960342451366, iteration: 105907
loss: 0.9695486426353455,grad_norm: 0.8753831377024747, iteration: 105908
loss: 0.999042809009552,grad_norm: 0.999998955951837, iteration: 105909
loss: 1.0425193309783936,grad_norm: 0.9999992656093961, iteration: 105910
loss: 1.0042567253112793,grad_norm: 0.9999991324888041, iteration: 105911
loss: 1.014072299003601,grad_norm: 0.9999989731272535, iteration: 105912
loss: 1.1454951763153076,grad_norm: 0.9999994214782818, iteration: 105913
loss: 0.9520758986473083,grad_norm: 0.9323699597376024, iteration: 105914
loss: 0.9969722628593445,grad_norm: 0.9165178751577745, iteration: 105915
loss: 1.029158115386963,grad_norm: 0.9964813813130089, iteration: 105916
loss: 1.0052130222320557,grad_norm: 0.9999991433584584, iteration: 105917
loss: 1.0393993854522705,grad_norm: 0.9999994539647532, iteration: 105918
loss: 1.0314033031463623,grad_norm: 0.8596401665274257, iteration: 105919
loss: 1.0496346950531006,grad_norm: 0.9999997227181264, iteration: 105920
loss: 0.9947972297668457,grad_norm: 0.9999991299694444, iteration: 105921
loss: 0.9803736805915833,grad_norm: 0.9999991255353181, iteration: 105922
loss: 1.0024235248565674,grad_norm: 0.8489883372120338, iteration: 105923
loss: 0.9870905876159668,grad_norm: 0.9999993883606371, iteration: 105924
loss: 0.9995439052581787,grad_norm: 0.999621966468564, iteration: 105925
loss: 1.0029230117797852,grad_norm: 0.9858233408289048, iteration: 105926
loss: 0.9611226916313171,grad_norm: 0.9999991359536696, iteration: 105927
loss: 0.9977887272834778,grad_norm: 0.9999990706348187, iteration: 105928
loss: 0.9924951791763306,grad_norm: 0.9137095999388464, iteration: 105929
loss: 0.9803985953330994,grad_norm: 0.9999991554436004, iteration: 105930
loss: 0.9572911262512207,grad_norm: 0.838953989363361, iteration: 105931
loss: 1.0295709371566772,grad_norm: 0.999999678631061, iteration: 105932
loss: 1.0178143978118896,grad_norm: 0.9999991683687041, iteration: 105933
loss: 0.9781246781349182,grad_norm: 0.9786924372990853, iteration: 105934
loss: 0.987088143825531,grad_norm: 0.9999991202788855, iteration: 105935
loss: 1.0206278562545776,grad_norm: 0.9999992521004935, iteration: 105936
loss: 0.9937201142311096,grad_norm: 0.9999991456467177, iteration: 105937
loss: 0.9780547618865967,grad_norm: 0.9999991120655163, iteration: 105938
loss: 1.0322136878967285,grad_norm: 0.8806066334077666, iteration: 105939
loss: 0.9872685074806213,grad_norm: 0.9999991922853643, iteration: 105940
loss: 1.0247645378112793,grad_norm: 0.9111229086479619, iteration: 105941
loss: 1.0489925146102905,grad_norm: 0.9999991594544115, iteration: 105942
loss: 1.014757513999939,grad_norm: 0.9999991745630075, iteration: 105943
loss: 0.9852426648139954,grad_norm: 0.9999990316139115, iteration: 105944
loss: 0.96051025390625,grad_norm: 0.8807049943282618, iteration: 105945
loss: 0.9990896582603455,grad_norm: 0.9999990254391365, iteration: 105946
loss: 1.0075112581253052,grad_norm: 0.8384078307660878, iteration: 105947
loss: 0.9877751469612122,grad_norm: 0.999999112773356, iteration: 105948
loss: 1.0446076393127441,grad_norm: 0.999999338594301, iteration: 105949
loss: 0.9903907775878906,grad_norm: 0.9999991957793319, iteration: 105950
loss: 0.9991722106933594,grad_norm: 0.9999989885045169, iteration: 105951
loss: 1.0253421068191528,grad_norm: 0.9999991396242839, iteration: 105952
loss: 1.0202770233154297,grad_norm: 0.9999991634804278, iteration: 105953
loss: 0.9674604535102844,grad_norm: 0.9999992180637366, iteration: 105954
loss: 1.0063453912734985,grad_norm: 0.9726512761578581, iteration: 105955
loss: 1.041654109954834,grad_norm: 0.9999990082456452, iteration: 105956
loss: 0.9737313389778137,grad_norm: 0.9999992081121594, iteration: 105957
loss: 1.0481959581375122,grad_norm: 0.9999990621331348, iteration: 105958
loss: 0.9605761766433716,grad_norm: 0.9782879885314069, iteration: 105959
loss: 0.9969954490661621,grad_norm: 0.9878588331200411, iteration: 105960
loss: 1.0499935150146484,grad_norm: 0.9999990699968715, iteration: 105961
loss: 1.0386830568313599,grad_norm: 0.9999990460333649, iteration: 105962
loss: 0.9621468186378479,grad_norm: 0.9999990798148424, iteration: 105963
loss: 1.0350844860076904,grad_norm: 0.999999057445074, iteration: 105964
loss: 0.9527718424797058,grad_norm: 0.9999990921029319, iteration: 105965
loss: 1.0190391540527344,grad_norm: 0.9916395554429471, iteration: 105966
loss: 0.992919385433197,grad_norm: 0.8934608199925161, iteration: 105967
loss: 1.0047587156295776,grad_norm: 0.9999989920787782, iteration: 105968
loss: 1.0172359943389893,grad_norm: 0.9999990902252677, iteration: 105969
loss: 1.0472652912139893,grad_norm: 0.999999152796596, iteration: 105970
loss: 1.030930995941162,grad_norm: 0.9999991658184688, iteration: 105971
loss: 0.9775028824806213,grad_norm: 0.9999989414427958, iteration: 105972
loss: 0.9884141087532043,grad_norm: 0.9224010399178971, iteration: 105973
loss: 1.0163112878799438,grad_norm: 0.9999991854842578, iteration: 105974
loss: 0.9991417527198792,grad_norm: 0.9999993312912029, iteration: 105975
loss: 0.9772124886512756,grad_norm: 0.9538351955247251, iteration: 105976
loss: 1.0456825494766235,grad_norm: 0.9999991354204317, iteration: 105977
loss: 0.9892151951789856,grad_norm: 0.9661554459746893, iteration: 105978
loss: 0.9973664879798889,grad_norm: 0.9999990939825848, iteration: 105979
loss: 0.9790995121002197,grad_norm: 0.9966674614247605, iteration: 105980
loss: 1.0166704654693604,grad_norm: 0.9697321720780282, iteration: 105981
loss: 0.9715643525123596,grad_norm: 0.999999703951548, iteration: 105982
loss: 0.9879962205886841,grad_norm: 0.9345970671959232, iteration: 105983
loss: 0.965057373046875,grad_norm: 0.999999186853075, iteration: 105984
loss: 0.9781938195228577,grad_norm: 0.9999991879435385, iteration: 105985
loss: 0.9790157675743103,grad_norm: 0.9999991743169219, iteration: 105986
loss: 1.0019696950912476,grad_norm: 0.9999990643116099, iteration: 105987
loss: 1.0005654096603394,grad_norm: 0.9999990718235497, iteration: 105988
loss: 0.9956451654434204,grad_norm: 0.9999990957177955, iteration: 105989
loss: 0.993882417678833,grad_norm: 0.9860840834670734, iteration: 105990
loss: 1.0312539339065552,grad_norm: 0.8081241072245425, iteration: 105991
loss: 0.992508590221405,grad_norm: 0.932928292010738, iteration: 105992
loss: 1.0046098232269287,grad_norm: 0.9038673762878973, iteration: 105993
loss: 0.968514084815979,grad_norm: 0.9999990599629972, iteration: 105994
loss: 0.9833725690841675,grad_norm: 0.999999032817631, iteration: 105995
loss: 1.0015991926193237,grad_norm: 0.9999989227974512, iteration: 105996
loss: 0.9844544529914856,grad_norm: 0.9711331400108713, iteration: 105997
loss: 0.9729390144348145,grad_norm: 0.9999991500037709, iteration: 105998
loss: 0.9957961440086365,grad_norm: 0.9511266513296707, iteration: 105999
loss: 1.00045907497406,grad_norm: 0.9999991056466547, iteration: 106000
loss: 0.9970905780792236,grad_norm: 0.9131772539028408, iteration: 106001
loss: 0.9927175641059875,grad_norm: 0.9999992016982994, iteration: 106002
loss: 0.9965844750404358,grad_norm: 0.9999992219116294, iteration: 106003
loss: 1.036922574043274,grad_norm: 0.9999990907211636, iteration: 106004
loss: 1.0992814302444458,grad_norm: 0.9999991606531474, iteration: 106005
loss: 0.9793020486831665,grad_norm: 0.9813183094156336, iteration: 106006
loss: 0.9912946820259094,grad_norm: 0.9982754650245644, iteration: 106007
loss: 0.9960789084434509,grad_norm: 0.9778645116515903, iteration: 106008
loss: 1.018926739692688,grad_norm: 0.9999991915599356, iteration: 106009
loss: 1.000009536743164,grad_norm: 0.9999989432005457, iteration: 106010
loss: 0.9977461099624634,grad_norm: 0.9999991478682038, iteration: 106011
loss: 0.9591643214225769,grad_norm: 0.9999991341311737, iteration: 106012
loss: 0.9917052388191223,grad_norm: 0.9956105494055207, iteration: 106013
loss: 1.0159697532653809,grad_norm: 0.8035480931385426, iteration: 106014
loss: 1.02275812625885,grad_norm: 0.9999995892612987, iteration: 106015
loss: 1.0102607011795044,grad_norm: 0.9167922973442404, iteration: 106016
loss: 0.9861718416213989,grad_norm: 0.9999990615769135, iteration: 106017
loss: 1.007026195526123,grad_norm: 0.9171774505224128, iteration: 106018
loss: 0.9812820553779602,grad_norm: 0.9999991954382326, iteration: 106019
loss: 1.006419062614441,grad_norm: 0.9871385642119688, iteration: 106020
loss: 1.0101702213287354,grad_norm: 0.9999992893007629, iteration: 106021
loss: 1.0224838256835938,grad_norm: 0.9999992743111165, iteration: 106022
loss: 0.956465482711792,grad_norm: 0.9999990858174365, iteration: 106023
loss: 0.9660760760307312,grad_norm: 0.967061238923921, iteration: 106024
loss: 0.999354362487793,grad_norm: 0.999999108717499, iteration: 106025
loss: 1.0056389570236206,grad_norm: 0.9999991446781752, iteration: 106026
loss: 0.9949062466621399,grad_norm: 0.9277431105483284, iteration: 106027
loss: 1.002278447151184,grad_norm: 0.9999991050937321, iteration: 106028
loss: 0.9838887453079224,grad_norm: 0.9999992213970315, iteration: 106029
loss: 1.0002413988113403,grad_norm: 0.952368319070583, iteration: 106030
loss: 0.9965254664421082,grad_norm: 0.8298764358701372, iteration: 106031
loss: 0.9777225255966187,grad_norm: 0.9999990161000684, iteration: 106032
loss: 1.0136768817901611,grad_norm: 0.9999993331255613, iteration: 106033
loss: 1.0214149951934814,grad_norm: 0.999999051868316, iteration: 106034
loss: 1.017984390258789,grad_norm: 0.9999991350365743, iteration: 106035
loss: 0.9853695631027222,grad_norm: 0.9999992093076621, iteration: 106036
loss: 1.0005837678909302,grad_norm: 0.9999991115482592, iteration: 106037
loss: 0.9809371829032898,grad_norm: 0.9875208437826736, iteration: 106038
loss: 1.0251673460006714,grad_norm: 0.9999992259353447, iteration: 106039
loss: 0.9730680584907532,grad_norm: 0.983613299406712, iteration: 106040
loss: 0.9999701380729675,grad_norm: 0.9999990610674957, iteration: 106041
loss: 1.007649302482605,grad_norm: 0.9999992678452633, iteration: 106042
loss: 1.0090891122817993,grad_norm: 0.950404702628249, iteration: 106043
loss: 0.9861965775489807,grad_norm: 0.9999989792243463, iteration: 106044
loss: 1.0007439851760864,grad_norm: 0.9999990421619087, iteration: 106045
loss: 0.9991742372512817,grad_norm: 0.8650411010146843, iteration: 106046
loss: 1.0089335441589355,grad_norm: 0.8463767705087526, iteration: 106047
loss: 0.9686987400054932,grad_norm: 0.9540932790886417, iteration: 106048
loss: 1.010874629020691,grad_norm: 0.9999990502571562, iteration: 106049
loss: 0.9696146249771118,grad_norm: 0.9478319425213929, iteration: 106050
loss: 0.9901256561279297,grad_norm: 0.9999992513392475, iteration: 106051
loss: 1.015099048614502,grad_norm: 0.9983162122761114, iteration: 106052
loss: 0.9946344494819641,grad_norm: 0.9359269482544502, iteration: 106053
loss: 0.9886558055877686,grad_norm: 0.9327367151245689, iteration: 106054
loss: 0.9958775043487549,grad_norm: 0.9999990088734829, iteration: 106055
loss: 1.0362145900726318,grad_norm: 0.9999992054237644, iteration: 106056
loss: 1.0107439756393433,grad_norm: 0.9999990389000681, iteration: 106057
loss: 1.0192581415176392,grad_norm: 0.8677042134648403, iteration: 106058
loss: 0.9847531318664551,grad_norm: 0.9521774537964749, iteration: 106059
loss: 1.0294948816299438,grad_norm: 0.9999990730851426, iteration: 106060
loss: 0.9986832737922668,grad_norm: 0.9690789008879876, iteration: 106061
loss: 0.9580323696136475,grad_norm: 0.9999990852245283, iteration: 106062
loss: 1.018414855003357,grad_norm: 0.9999992184744768, iteration: 106063
loss: 0.9658607840538025,grad_norm: 0.9173627802853886, iteration: 106064
loss: 1.0232306718826294,grad_norm: 0.8464464704204872, iteration: 106065
loss: 1.003680944442749,grad_norm: 0.9999992170454974, iteration: 106066
loss: 1.0302419662475586,grad_norm: 0.9338997373835821, iteration: 106067
loss: 1.0356035232543945,grad_norm: 0.9999991579939628, iteration: 106068
loss: 1.0155243873596191,grad_norm: 0.9999990779649102, iteration: 106069
loss: 0.9888553023338318,grad_norm: 0.9776281738462027, iteration: 106070
loss: 1.0640567541122437,grad_norm: 0.8522181054768923, iteration: 106071
loss: 0.9805966019630432,grad_norm: 0.9999991616702928, iteration: 106072
loss: 1.020426630973816,grad_norm: 0.9870336216966579, iteration: 106073
loss: 0.9872966408729553,grad_norm: 0.9119957493929717, iteration: 106074
loss: 0.9702708721160889,grad_norm: 0.9047721547608665, iteration: 106075
loss: 1.0007524490356445,grad_norm: 0.9999990391632573, iteration: 106076
loss: 1.0086581707000732,grad_norm: 0.9999990024557351, iteration: 106077
loss: 1.0185344219207764,grad_norm: 0.9999991969252822, iteration: 106078
loss: 0.9847566485404968,grad_norm: 0.999357602988804, iteration: 106079
loss: 0.9500112533569336,grad_norm: 0.9999991773149927, iteration: 106080
loss: 1.0332186222076416,grad_norm: 0.9038357739514016, iteration: 106081
loss: 0.9537703394889832,grad_norm: 0.9999991946009696, iteration: 106082
loss: 1.0165635347366333,grad_norm: 0.9999991352316197, iteration: 106083
loss: 0.9912669658660889,grad_norm: 0.9999991856774937, iteration: 106084
loss: 0.9771809577941895,grad_norm: 0.9113114714329872, iteration: 106085
loss: 0.9923700094223022,grad_norm: 0.9322982922194304, iteration: 106086
loss: 1.0002416372299194,grad_norm: 0.8105217537017125, iteration: 106087
loss: 0.9741104245185852,grad_norm: 0.9999991293504841, iteration: 106088
loss: 1.0009987354278564,grad_norm: 0.9848334895669992, iteration: 106089
loss: 0.9802525639533997,grad_norm: 0.991084540831037, iteration: 106090
loss: 0.9923522472381592,grad_norm: 0.9999989354562823, iteration: 106091
loss: 0.9927845597267151,grad_norm: 0.9345966738125767, iteration: 106092
loss: 0.9598537683486938,grad_norm: 0.9999991827072876, iteration: 106093
loss: 0.9838594198226929,grad_norm: 0.8816050502363059, iteration: 106094
loss: 1.0200895071029663,grad_norm: 0.916582192973084, iteration: 106095
loss: 0.9943634867668152,grad_norm: 0.9999992339854762, iteration: 106096
loss: 0.9718534350395203,grad_norm: 0.9405021767368348, iteration: 106097
loss: 1.0418205261230469,grad_norm: 0.9999993176727083, iteration: 106098
loss: 1.027132511138916,grad_norm: 0.9999990051324038, iteration: 106099
loss: 1.047523856163025,grad_norm: 0.9999991262913531, iteration: 106100
loss: 0.998661994934082,grad_norm: 0.9999990036718162, iteration: 106101
loss: 1.0126986503601074,grad_norm: 0.9914312479921547, iteration: 106102
loss: 0.9970747232437134,grad_norm: 0.9999992265589482, iteration: 106103
loss: 1.0142052173614502,grad_norm: 0.9999992226368979, iteration: 106104
loss: 0.9866673946380615,grad_norm: 0.8271706395433834, iteration: 106105
loss: 0.9737825393676758,grad_norm: 0.904423708482479, iteration: 106106
loss: 0.9923127889633179,grad_norm: 0.9327991553864465, iteration: 106107
loss: 0.9294708967208862,grad_norm: 0.999999083736651, iteration: 106108
loss: 1.0062651634216309,grad_norm: 0.9511849953806821, iteration: 106109
loss: 0.9905562996864319,grad_norm: 0.9999991352622191, iteration: 106110
loss: 0.9978216290473938,grad_norm: 0.9551409619842915, iteration: 106111
loss: 1.0392645597457886,grad_norm: 0.9272559395656583, iteration: 106112
loss: 0.9857254028320312,grad_norm: 0.9999991219238715, iteration: 106113
loss: 0.9733346104621887,grad_norm: 0.9649311351488901, iteration: 106114
loss: 1.0084577798843384,grad_norm: 0.9367755081113096, iteration: 106115
loss: 0.948429524898529,grad_norm: 0.9999992721586828, iteration: 106116
loss: 1.0251638889312744,grad_norm: 0.8860326635618415, iteration: 106117
loss: 1.013826608657837,grad_norm: 0.9999992723452293, iteration: 106118
loss: 0.9990533590316772,grad_norm: 0.9999991017139127, iteration: 106119
loss: 0.9655227065086365,grad_norm: 0.9999990420603445, iteration: 106120
loss: 1.0319104194641113,grad_norm: 0.9999992042142163, iteration: 106121
loss: 0.9858909249305725,grad_norm: 0.9612297457918834, iteration: 106122
loss: 1.0022865533828735,grad_norm: 0.999999218761095, iteration: 106123
loss: 0.9746948480606079,grad_norm: 0.8369815528678047, iteration: 106124
loss: 1.0329493284225464,grad_norm: 0.9999994132786855, iteration: 106125
loss: 1.0590859651565552,grad_norm: 0.9999992716463013, iteration: 106126
loss: 0.9935230016708374,grad_norm: 0.9999990998726956, iteration: 106127
loss: 1.012941837310791,grad_norm: 0.9999992857352145, iteration: 106128
loss: 0.9793139100074768,grad_norm: 0.9375908996040176, iteration: 106129
loss: 1.0059823989868164,grad_norm: 0.9999992310223844, iteration: 106130
loss: 1.0030772686004639,grad_norm: 0.9025461425705178, iteration: 106131
loss: 1.0355676412582397,grad_norm: 0.9999991478750455, iteration: 106132
loss: 1.0016820430755615,grad_norm: 0.9999990379953979, iteration: 106133
loss: 1.009027123451233,grad_norm: 0.9919527566689779, iteration: 106134
loss: 0.9803773760795593,grad_norm: 0.9999991186982489, iteration: 106135
loss: 0.9768975377082825,grad_norm: 0.9400146840917123, iteration: 106136
loss: 1.0095024108886719,grad_norm: 0.9357583499679946, iteration: 106137
loss: 0.9996545910835266,grad_norm: 0.9375354333856435, iteration: 106138
loss: 0.9477694630622864,grad_norm: 0.9999989600698159, iteration: 106139
loss: 1.0160529613494873,grad_norm: 0.9999992463161098, iteration: 106140
loss: 0.9883008003234863,grad_norm: 0.9999989536759841, iteration: 106141
loss: 0.9906632900238037,grad_norm: 0.9999990650177623, iteration: 106142
loss: 0.945865273475647,grad_norm: 0.8769952555533493, iteration: 106143
loss: 0.9860036373138428,grad_norm: 0.9999991812305373, iteration: 106144
loss: 0.9789419174194336,grad_norm: 0.9999991489167, iteration: 106145
loss: 1.0288900136947632,grad_norm: 0.9999992434215709, iteration: 106146
loss: 0.9705245494842529,grad_norm: 0.9421903337987755, iteration: 106147
loss: 0.9937823414802551,grad_norm: 0.9999990056746929, iteration: 106148
loss: 1.0095458030700684,grad_norm: 0.9510442241714848, iteration: 106149
loss: 1.0581327676773071,grad_norm: 0.999999129227096, iteration: 106150
loss: 0.9857553839683533,grad_norm: 0.9999992986891878, iteration: 106151
loss: 1.0041433572769165,grad_norm: 0.9194898799944957, iteration: 106152
loss: 0.9971227645874023,grad_norm: 0.9999991743893062, iteration: 106153
loss: 1.016769528388977,grad_norm: 0.9999991058487246, iteration: 106154
loss: 1.0057456493377686,grad_norm: 0.9999991791231022, iteration: 106155
loss: 0.9782189130783081,grad_norm: 0.999999122260759, iteration: 106156
loss: 1.067033290863037,grad_norm: 0.9999990801259379, iteration: 106157
loss: 1.0617247819900513,grad_norm: 0.9999992219830517, iteration: 106158
loss: 0.9417822957038879,grad_norm: 0.9999991175494796, iteration: 106159
loss: 0.9949994683265686,grad_norm: 0.9233770709911325, iteration: 106160
loss: 1.013662338256836,grad_norm: 0.9999992667079306, iteration: 106161
loss: 0.9805192947387695,grad_norm: 0.9682852187838153, iteration: 106162
loss: 1.01357102394104,grad_norm: 0.9999991533198871, iteration: 106163
loss: 0.9616120457649231,grad_norm: 0.9999991726501587, iteration: 106164
loss: 0.99771648645401,grad_norm: 0.9282262797616012, iteration: 106165
loss: 0.9870957136154175,grad_norm: 0.9999990891309037, iteration: 106166
loss: 1.0276014804840088,grad_norm: 0.999998993649884, iteration: 106167
loss: 0.9756500124931335,grad_norm: 0.9999993160531672, iteration: 106168
loss: 0.9578800797462463,grad_norm: 0.9999990385101917, iteration: 106169
loss: 0.9963131546974182,grad_norm: 0.9999991687456599, iteration: 106170
loss: 1.0465511083602905,grad_norm: 0.9999992321184883, iteration: 106171
loss: 1.0023123025894165,grad_norm: 0.9647640026805323, iteration: 106172
loss: 0.994987964630127,grad_norm: 0.9999991841620278, iteration: 106173
loss: 1.0141404867172241,grad_norm: 0.9999990152266149, iteration: 106174
loss: 0.9732311964035034,grad_norm: 0.9999989841826952, iteration: 106175
loss: 1.0016789436340332,grad_norm: 0.9662523822951319, iteration: 106176
loss: 0.9670133590698242,grad_norm: 0.9198774681591751, iteration: 106177
loss: 1.0530771017074585,grad_norm: 0.99999950307556, iteration: 106178
loss: 0.9864862561225891,grad_norm: 0.9999991248653507, iteration: 106179
loss: 1.0009263753890991,grad_norm: 0.9999990805060571, iteration: 106180
loss: 1.003962516784668,grad_norm: 0.9999992691615601, iteration: 106181
loss: 0.9696997404098511,grad_norm: 0.9659764454839289, iteration: 106182
loss: 1.0070375204086304,grad_norm: 0.9999991490578876, iteration: 106183
loss: 0.9890131950378418,grad_norm: 0.9978828419773325, iteration: 106184
loss: 1.001145839691162,grad_norm: 0.9049935361398832, iteration: 106185
loss: 1.0183485746383667,grad_norm: 0.9999990731618895, iteration: 106186
loss: 0.9673008322715759,grad_norm: 0.9492358730933963, iteration: 106187
loss: 1.009358286857605,grad_norm: 0.8695917149606488, iteration: 106188
loss: 1.027860164642334,grad_norm: 0.9999991224524876, iteration: 106189
loss: 0.9661041498184204,grad_norm: 0.958293263209468, iteration: 106190
loss: 0.979444682598114,grad_norm: 0.999999674317964, iteration: 106191
loss: 1.0147932767868042,grad_norm: 0.9024775466370205, iteration: 106192
loss: 1.0240247249603271,grad_norm: 0.9999989858294747, iteration: 106193
loss: 1.0842911005020142,grad_norm: 0.9999992763848551, iteration: 106194
loss: 1.0114184617996216,grad_norm: 0.999999202551102, iteration: 106195
loss: 0.9742196798324585,grad_norm: 0.9999990460164938, iteration: 106196
loss: 0.9698051810264587,grad_norm: 0.9999991711676894, iteration: 106197
loss: 0.9526130557060242,grad_norm: 0.9999993432338268, iteration: 106198
loss: 1.0029951333999634,grad_norm: 0.987802934178022, iteration: 106199
loss: 0.9644572138786316,grad_norm: 0.9999989727804526, iteration: 106200
loss: 1.0405629873275757,grad_norm: 0.9999990588044602, iteration: 106201
loss: 0.9628040790557861,grad_norm: 0.9999990554089186, iteration: 106202
loss: 1.0339558124542236,grad_norm: 0.9353575265880183, iteration: 106203
loss: 0.9828460812568665,grad_norm: 0.9999990310771115, iteration: 106204
loss: 1.021346092224121,grad_norm: 0.9999992330857717, iteration: 106205
loss: 0.9778115749359131,grad_norm: 0.8579834968069672, iteration: 106206
loss: 1.1070101261138916,grad_norm: 0.9999997554209991, iteration: 106207
loss: 1.0162299871444702,grad_norm: 0.9999991296680293, iteration: 106208
loss: 1.0178604125976562,grad_norm: 0.9999991664515241, iteration: 106209
loss: 1.020438551902771,grad_norm: 0.8885081960860769, iteration: 106210
loss: 1.000123381614685,grad_norm: 0.9999991351166454, iteration: 106211
loss: 0.9608567357063293,grad_norm: 0.9999990604580611, iteration: 106212
loss: 1.0150229930877686,grad_norm: 0.9999991002367187, iteration: 106213
loss: 0.9912259578704834,grad_norm: 0.9999991672659143, iteration: 106214
loss: 0.9962376356124878,grad_norm: 0.9999992153768528, iteration: 106215
loss: 1.0079896450042725,grad_norm: 0.9999991529236815, iteration: 106216
loss: 1.0146387815475464,grad_norm: 0.9980404489413124, iteration: 106217
loss: 0.9741739630699158,grad_norm: 0.9999990482754527, iteration: 106218
loss: 0.9775993824005127,grad_norm: 0.9999990887835998, iteration: 106219
loss: 0.9715843200683594,grad_norm: 0.9431615514537882, iteration: 106220
loss: 0.9970492720603943,grad_norm: 0.9999992490692379, iteration: 106221
loss: 1.0146962404251099,grad_norm: 0.9999991866737631, iteration: 106222
loss: 1.0050597190856934,grad_norm: 0.9999992018071592, iteration: 106223
loss: 1.0054640769958496,grad_norm: 0.8415762052547925, iteration: 106224
loss: 1.0089836120605469,grad_norm: 0.8854724571099126, iteration: 106225
loss: 0.9988918900489807,grad_norm: 0.9999992006348085, iteration: 106226
loss: 0.9785373210906982,grad_norm: 0.9540691457313014, iteration: 106227
loss: 1.000213384628296,grad_norm: 0.9999989865426757, iteration: 106228
loss: 1.0332362651824951,grad_norm: 0.999999237734663, iteration: 106229
loss: 0.9721169471740723,grad_norm: 0.9999989823464233, iteration: 106230
loss: 0.9751251935958862,grad_norm: 0.9653546642082007, iteration: 106231
loss: 0.9929667115211487,grad_norm: 0.9156933281914701, iteration: 106232
loss: 1.03200101852417,grad_norm: 0.9999991153389783, iteration: 106233
loss: 0.9839187264442444,grad_norm: 0.9680335646799981, iteration: 106234
loss: 0.986549437046051,grad_norm: 0.9999992947112474, iteration: 106235
loss: 1.018092393875122,grad_norm: 0.9999991043723049, iteration: 106236
loss: 0.9644789695739746,grad_norm: 0.9467341306846242, iteration: 106237
loss: 1.0320284366607666,grad_norm: 0.8819031275310532, iteration: 106238
loss: 0.9914546608924866,grad_norm: 0.9999991368128043, iteration: 106239
loss: 1.002514362335205,grad_norm: 0.9282128597795309, iteration: 106240
loss: 0.9711624979972839,grad_norm: 0.9999991987592182, iteration: 106241
loss: 1.0173649787902832,grad_norm: 0.9602513391552259, iteration: 106242
loss: 0.9928969144821167,grad_norm: 0.8713331294423349, iteration: 106243
loss: 0.9791414737701416,grad_norm: 0.9999990702669512, iteration: 106244
loss: 1.0007683038711548,grad_norm: 0.9999992718731501, iteration: 106245
loss: 1.023836612701416,grad_norm: 0.9999991781725486, iteration: 106246
loss: 1.0019246339797974,grad_norm: 0.9779285105833516, iteration: 106247
loss: 0.9876446723937988,grad_norm: 0.9999991118705267, iteration: 106248
loss: 0.9563513994216919,grad_norm: 0.9999991942117568, iteration: 106249
loss: 1.0228697061538696,grad_norm: 0.9999991005667531, iteration: 106250
loss: 0.9664762616157532,grad_norm: 0.9822262876159013, iteration: 106251
loss: 1.0239571332931519,grad_norm: 0.9999996163391672, iteration: 106252
loss: 0.9993157386779785,grad_norm: 0.9999991514229141, iteration: 106253
loss: 0.9894680976867676,grad_norm: 0.9999991528460452, iteration: 106254
loss: 0.9608579874038696,grad_norm: 0.978987344839315, iteration: 106255
loss: 1.0169508457183838,grad_norm: 0.999999163242356, iteration: 106256
loss: 0.990410327911377,grad_norm: 0.9801304585697503, iteration: 106257
loss: 0.987913191318512,grad_norm: 0.9999990007315396, iteration: 106258
loss: 0.9928603172302246,grad_norm: 0.9999991401238464, iteration: 106259
loss: 1.0451587438583374,grad_norm: 0.9999992427833371, iteration: 106260
loss: 0.9892807006835938,grad_norm: 0.9999991817972923, iteration: 106261
loss: 1.0165218114852905,grad_norm: 0.9999992380093866, iteration: 106262
loss: 0.9803693890571594,grad_norm: 0.9999991532592942, iteration: 106263
loss: 0.9755935072898865,grad_norm: 0.9999991272158848, iteration: 106264
loss: 1.025702953338623,grad_norm: 0.99999899238148, iteration: 106265
loss: 1.037400245666504,grad_norm: 0.9999991795600116, iteration: 106266
loss: 1.0032570362091064,grad_norm: 0.9125713680886279, iteration: 106267
loss: 1.020448088645935,grad_norm: 0.9999992281434288, iteration: 106268
loss: 0.9826934337615967,grad_norm: 0.995804254697833, iteration: 106269
loss: 1.0523624420166016,grad_norm: 0.9999996235869799, iteration: 106270
loss: 0.9825906157493591,grad_norm: 0.999999149087639, iteration: 106271
loss: 0.9906815886497498,grad_norm: 0.9442254578189114, iteration: 106272
loss: 1.008836269378662,grad_norm: 0.9681117526553166, iteration: 106273
loss: 1.0498557090759277,grad_norm: 0.9999991633227211, iteration: 106274
loss: 1.0262571573257446,grad_norm: 0.9911739659512585, iteration: 106275
loss: 1.0075805187225342,grad_norm: 0.9999991263091155, iteration: 106276
loss: 0.9929395318031311,grad_norm: 0.9482072836064792, iteration: 106277
loss: 1.0195982456207275,grad_norm: 0.9746335514282966, iteration: 106278
loss: 0.975257396697998,grad_norm: 0.8711525734897698, iteration: 106279
loss: 1.035346269607544,grad_norm: 0.9685983220284429, iteration: 106280
loss: 1.0184394121170044,grad_norm: 0.9999991606668654, iteration: 106281
loss: 1.0795358419418335,grad_norm: 0.999999308079419, iteration: 106282
loss: 0.9698259830474854,grad_norm: 0.9670406474297083, iteration: 106283
loss: 0.9994686841964722,grad_norm: 0.9999992474925579, iteration: 106284
loss: 1.006349802017212,grad_norm: 0.9999992705816158, iteration: 106285
loss: 0.9436183571815491,grad_norm: 0.9999990416028136, iteration: 106286
loss: 1.0113694667816162,grad_norm: 0.9999990682615657, iteration: 106287
loss: 1.0262632369995117,grad_norm: 0.9999992218364546, iteration: 106288
loss: 1.0277414321899414,grad_norm: 0.9999992315669818, iteration: 106289
loss: 1.0056794881820679,grad_norm: 0.9893043057087202, iteration: 106290
loss: 1.0176620483398438,grad_norm: 0.999999261194073, iteration: 106291
loss: 0.990216851234436,grad_norm: 0.9898255802507462, iteration: 106292
loss: 0.9656105637550354,grad_norm: 0.9999992145788908, iteration: 106293
loss: 0.9721692204475403,grad_norm: 0.9999991737818009, iteration: 106294
loss: 1.015103816986084,grad_norm: 0.9999992506032725, iteration: 106295
loss: 1.0482709407806396,grad_norm: 0.9999990947300836, iteration: 106296
loss: 1.0193132162094116,grad_norm: 0.9999989867380306, iteration: 106297
loss: 1.027040719985962,grad_norm: 0.9999992100602229, iteration: 106298
loss: 0.988406777381897,grad_norm: 0.9999990509909972, iteration: 106299
loss: 0.9779632687568665,grad_norm: 0.9999991609092969, iteration: 106300
loss: 1.004576563835144,grad_norm: 0.9999990405979371, iteration: 106301
loss: 0.9793257713317871,grad_norm: 0.9999990386096715, iteration: 106302
loss: 0.9926936030387878,grad_norm: 0.9999991359585153, iteration: 106303
loss: 0.9937896728515625,grad_norm: 0.9999992601665205, iteration: 106304
loss: 1.012696385383606,grad_norm: 0.9768899873381739, iteration: 106305
loss: 1.026597023010254,grad_norm: 0.9999991871550774, iteration: 106306
loss: 1.0091804265975952,grad_norm: 0.9999992354836788, iteration: 106307
loss: 0.9818181395530701,grad_norm: 0.9999989668722574, iteration: 106308
loss: 0.975870668888092,grad_norm: 0.9277671714288606, iteration: 106309
loss: 0.9528242945671082,grad_norm: 0.9999993199776953, iteration: 106310
loss: 1.0156216621398926,grad_norm: 0.9999990784624042, iteration: 106311
loss: 1.0328223705291748,grad_norm: 0.9437490933462338, iteration: 106312
loss: 1.0264883041381836,grad_norm: 0.9288212599482223, iteration: 106313
loss: 0.9932158589363098,grad_norm: 0.9999992785223671, iteration: 106314
loss: 1.0138604640960693,grad_norm: 0.9999991316181206, iteration: 106315
loss: 1.0104740858078003,grad_norm: 0.9999988501400646, iteration: 106316
loss: 1.0124469995498657,grad_norm: 0.9999992358960156, iteration: 106317
loss: 0.980779230594635,grad_norm: 0.9999991881844147, iteration: 106318
loss: 0.9970836043357849,grad_norm: 0.9999991648897798, iteration: 106319
loss: 1.0089439153671265,grad_norm: 0.9999990528158286, iteration: 106320
loss: 0.9792605638504028,grad_norm: 0.988175670699713, iteration: 106321
loss: 0.9639156460762024,grad_norm: 0.9171995250436713, iteration: 106322
loss: 0.9981070756912231,grad_norm: 0.9570738131097064, iteration: 106323
loss: 1.0712051391601562,grad_norm: 0.9999995480238502, iteration: 106324
loss: 1.005494475364685,grad_norm: 0.9999990331032047, iteration: 106325
loss: 0.9920200109481812,grad_norm: 0.9999991157296325, iteration: 106326
loss: 1.007653832435608,grad_norm: 0.9016940160515532, iteration: 106327
loss: 0.9653149843215942,grad_norm: 0.8740808875806777, iteration: 106328
loss: 0.9963837265968323,grad_norm: 0.848952434951209, iteration: 106329
loss: 1.009007453918457,grad_norm: 0.9417383856763688, iteration: 106330
loss: 0.9816319346427917,grad_norm: 0.8531316958619477, iteration: 106331
loss: 0.9933213591575623,grad_norm: 0.8689139086266101, iteration: 106332
loss: 0.9661221504211426,grad_norm: 0.9191213850714123, iteration: 106333
loss: 0.9792709946632385,grad_norm: 0.9999991177842101, iteration: 106334
loss: 1.0071747303009033,grad_norm: 0.9847262276890355, iteration: 106335
loss: 0.9874811768531799,grad_norm: 0.9253545613598174, iteration: 106336
loss: 0.9981668591499329,grad_norm: 0.9172081073177248, iteration: 106337
loss: 1.0070635080337524,grad_norm: 0.9999991091516518, iteration: 106338
loss: 1.0235562324523926,grad_norm: 0.9999992223904662, iteration: 106339
loss: 0.9858072400093079,grad_norm: 0.8967069267822025, iteration: 106340
loss: 0.9815627336502075,grad_norm: 0.999999241585573, iteration: 106341
loss: 1.026574969291687,grad_norm: 0.9999992865801803, iteration: 106342
loss: 0.9858745336532593,grad_norm: 0.9974020480903921, iteration: 106343
loss: 0.9948917627334595,grad_norm: 0.8318373953345727, iteration: 106344
loss: 1.0085722208023071,grad_norm: 0.9960325421624391, iteration: 106345
loss: 1.0004318952560425,grad_norm: 0.9999991096043045, iteration: 106346
loss: 1.047426700592041,grad_norm: 0.9999990532329981, iteration: 106347
loss: 1.0325520038604736,grad_norm: 0.8764446685415297, iteration: 106348
loss: 0.9976759552955627,grad_norm: 0.999998937240428, iteration: 106349
loss: 1.005507230758667,grad_norm: 0.9999992833871634, iteration: 106350
loss: 1.0088826417922974,grad_norm: 0.8196989242657516, iteration: 106351
loss: 0.9721737504005432,grad_norm: 0.9999990520368979, iteration: 106352
loss: 1.0158025026321411,grad_norm: 0.8972994065257515, iteration: 106353
loss: 0.9930694699287415,grad_norm: 0.9090828459082545, iteration: 106354
loss: 0.9891932606697083,grad_norm: 0.8235236959451243, iteration: 106355
loss: 1.0672202110290527,grad_norm: 0.9999997650525807, iteration: 106356
loss: 0.9983141422271729,grad_norm: 0.9999991515317328, iteration: 106357
loss: 1.0364880561828613,grad_norm: 0.9432257003612924, iteration: 106358
loss: 0.9611691236495972,grad_norm: 0.9999991316127175, iteration: 106359
loss: 0.9674346446990967,grad_norm: 0.9969063527742957, iteration: 106360
loss: 0.9950471520423889,grad_norm: 0.9237008227404842, iteration: 106361
loss: 1.0049118995666504,grad_norm: 0.9999989943059279, iteration: 106362
loss: 1.036138653755188,grad_norm: 0.9999993302301526, iteration: 106363
loss: 1.0143929719924927,grad_norm: 0.9999991916144759, iteration: 106364
loss: 0.9426769614219666,grad_norm: 0.9447393441930849, iteration: 106365
loss: 1.000731348991394,grad_norm: 0.9512690266471776, iteration: 106366
loss: 1.0252310037612915,grad_norm: 0.946999503780556, iteration: 106367
loss: 0.9825649261474609,grad_norm: 0.9367674428179049, iteration: 106368
loss: 1.0240509510040283,grad_norm: 0.9999991371463983, iteration: 106369
loss: 1.0295097827911377,grad_norm: 0.9999991119099422, iteration: 106370
loss: 0.9819450378417969,grad_norm: 0.977834935864109, iteration: 106371
loss: 1.0064516067504883,grad_norm: 0.9999990901272586, iteration: 106372
loss: 0.9970491528511047,grad_norm: 0.9999992228013628, iteration: 106373
loss: 0.9799751043319702,grad_norm: 0.9999992553768635, iteration: 106374
loss: 1.0015442371368408,grad_norm: 0.9684749156874864, iteration: 106375
loss: 1.010396122932434,grad_norm: 0.9999991451746818, iteration: 106376
loss: 0.9855355620384216,grad_norm: 0.9999992236952617, iteration: 106377
loss: 1.0236870050430298,grad_norm: 0.9999990630584025, iteration: 106378
loss: 1.0303274393081665,grad_norm: 0.9999991874791717, iteration: 106379
loss: 0.9757359027862549,grad_norm: 0.9509152340857996, iteration: 106380
loss: 1.0001444816589355,grad_norm: 0.999999136050811, iteration: 106381
loss: 1.0082709789276123,grad_norm: 0.916509512380245, iteration: 106382
loss: 0.9881286025047302,grad_norm: 0.999999083521769, iteration: 106383
loss: 0.9750206470489502,grad_norm: 0.9999991104435314, iteration: 106384
loss: 1.0053876638412476,grad_norm: 0.8948703598240612, iteration: 106385
loss: 0.9721269607543945,grad_norm: 0.9256410896260849, iteration: 106386
loss: 1.025709867477417,grad_norm: 0.9999991598648339, iteration: 106387
loss: 0.9648720622062683,grad_norm: 0.8972943691142783, iteration: 106388
loss: 0.9682003259658813,grad_norm: 0.9999992342027907, iteration: 106389
loss: 0.9466401934623718,grad_norm: 0.9999991878586016, iteration: 106390
loss: 0.9869367480278015,grad_norm: 0.9999991780025809, iteration: 106391
loss: 1.0464255809783936,grad_norm: 0.9352736940933787, iteration: 106392
loss: 0.9812629222869873,grad_norm: 0.9999991849288941, iteration: 106393
loss: 1.008625864982605,grad_norm: 0.9999991597386658, iteration: 106394
loss: 1.0115188360214233,grad_norm: 0.9999991685430423, iteration: 106395
loss: 0.9414898753166199,grad_norm: 0.9950806478727334, iteration: 106396
loss: 1.0100562572479248,grad_norm: 0.9999991682186504, iteration: 106397
loss: 1.009710669517517,grad_norm: 0.9999994113725745, iteration: 106398
loss: 0.9765568375587463,grad_norm: 0.9811295090880631, iteration: 106399
loss: 1.0240908861160278,grad_norm: 0.9999990625304361, iteration: 106400
loss: 1.014131784439087,grad_norm: 0.9999991539423807, iteration: 106401
loss: 1.0288434028625488,grad_norm: 0.9999991397330625, iteration: 106402
loss: 0.9737505912780762,grad_norm: 0.9774644801359013, iteration: 106403
loss: 1.04302179813385,grad_norm: 0.9999993248338753, iteration: 106404
loss: 1.0290987491607666,grad_norm: 0.9999988946619277, iteration: 106405
loss: 1.0057826042175293,grad_norm: 0.999999484530755, iteration: 106406
loss: 0.9967693090438843,grad_norm: 0.9999990765441016, iteration: 106407
loss: 1.0233581066131592,grad_norm: 0.999999076668571, iteration: 106408
loss: 1.0072460174560547,grad_norm: 0.9999989878712642, iteration: 106409
loss: 0.9977321028709412,grad_norm: 0.9999991867177839, iteration: 106410
loss: 1.003617525100708,grad_norm: 0.999999289862167, iteration: 106411
loss: 0.9603475332260132,grad_norm: 0.9579853218016249, iteration: 106412
loss: 1.0262701511383057,grad_norm: 0.9999991060799385, iteration: 106413
loss: 0.9773544669151306,grad_norm: 0.9999992411102883, iteration: 106414
loss: 0.9937940239906311,grad_norm: 0.9999990750629852, iteration: 106415
loss: 0.9687352776527405,grad_norm: 0.9246087398633339, iteration: 106416
loss: 1.0042177438735962,grad_norm: 0.9999990714851406, iteration: 106417
loss: 1.0003182888031006,grad_norm: 0.9999993409842852, iteration: 106418
loss: 0.9856826663017273,grad_norm: 0.9999991797506651, iteration: 106419
loss: 0.9768649935722351,grad_norm: 0.9999991500946009, iteration: 106420
loss: 1.010677456855774,grad_norm: 0.970477887466217, iteration: 106421
loss: 0.9694026708602905,grad_norm: 0.999999173860856, iteration: 106422
loss: 1.016991376876831,grad_norm: 0.9999993181396011, iteration: 106423
loss: 1.0042332410812378,grad_norm: 0.9999991812883711, iteration: 106424
loss: 1.0301456451416016,grad_norm: 0.9999991425458588, iteration: 106425
loss: 0.9856728911399841,grad_norm: 0.7917141110761428, iteration: 106426
loss: 1.0058612823486328,grad_norm: 0.9999991147307699, iteration: 106427
loss: 0.995938777923584,grad_norm: 0.9999994742058701, iteration: 106428
loss: 1.004111886024475,grad_norm: 0.9250736127206404, iteration: 106429
loss: 1.0182116031646729,grad_norm: 0.9999991312729163, iteration: 106430
loss: 1.0149483680725098,grad_norm: 0.9999991014453168, iteration: 106431
loss: 1.0095171928405762,grad_norm: 0.9999991486528164, iteration: 106432
loss: 0.9706165790557861,grad_norm: 0.9794442223648797, iteration: 106433
loss: 1.0034962892532349,grad_norm: 0.9298023451764595, iteration: 106434
loss: 1.0002210140228271,grad_norm: 0.9999991072993561, iteration: 106435
loss: 1.0080225467681885,grad_norm: 0.8849218201041479, iteration: 106436
loss: 0.9972627758979797,grad_norm: 0.9999992330031975, iteration: 106437
loss: 1.0052235126495361,grad_norm: 0.9999990849893935, iteration: 106438
loss: 0.9688714146614075,grad_norm: 0.9479945485560904, iteration: 106439
loss: 1.0084723234176636,grad_norm: 0.8354862037929635, iteration: 106440
loss: 0.9891479015350342,grad_norm: 0.8922063539029333, iteration: 106441
loss: 1.0346652269363403,grad_norm: 0.9999992153880727, iteration: 106442
loss: 0.9978936314582825,grad_norm: 0.999999216921449, iteration: 106443
loss: 0.9989529252052307,grad_norm: 0.9834265419434899, iteration: 106444
loss: 0.9952176809310913,grad_norm: 0.9999992204233185, iteration: 106445
loss: 1.0356491804122925,grad_norm: 0.999999082321306, iteration: 106446
loss: 1.010515570640564,grad_norm: 0.9999991595581164, iteration: 106447
loss: 1.042578101158142,grad_norm: 0.9999994013155045, iteration: 106448
loss: 1.0305505990982056,grad_norm: 0.999999463696076, iteration: 106449
loss: 0.9938444495201111,grad_norm: 0.9831663345402453, iteration: 106450
loss: 0.9673060178756714,grad_norm: 0.8893280599564277, iteration: 106451
loss: 1.0006273984909058,grad_norm: 0.9999991341640183, iteration: 106452
loss: 0.9858158230781555,grad_norm: 0.9964493228849944, iteration: 106453
loss: 1.0405091047286987,grad_norm: 0.9999992184734219, iteration: 106454
loss: 0.9941585659980774,grad_norm: 0.8583687299230488, iteration: 106455
loss: 1.0073935985565186,grad_norm: 0.9999989569654497, iteration: 106456
loss: 0.9583616256713867,grad_norm: 0.9797397287232299, iteration: 106457
loss: 0.9766274690628052,grad_norm: 0.999999138817676, iteration: 106458
loss: 0.9876370429992676,grad_norm: 0.9999991219632877, iteration: 106459
loss: 0.9803880453109741,grad_norm: 0.9999995904427438, iteration: 106460
loss: 1.034434199333191,grad_norm: 0.9999991194102712, iteration: 106461
loss: 0.9462673664093018,grad_norm: 0.9999992014186421, iteration: 106462
loss: 0.9926138520240784,grad_norm: 0.9999991659337746, iteration: 106463
loss: 0.9774709343910217,grad_norm: 0.9999992971948654, iteration: 106464
loss: 1.0020703077316284,grad_norm: 0.9782358731869553, iteration: 106465
loss: 0.9992528557777405,grad_norm: 0.9999990584597297, iteration: 106466
loss: 0.9869750738143921,grad_norm: 0.930880213354904, iteration: 106467
loss: 0.9932662844657898,grad_norm: 0.9999993368368557, iteration: 106468
loss: 0.9777488112449646,grad_norm: 0.9999990888330268, iteration: 106469
loss: 1.0392411947250366,grad_norm: 0.9999998232858044, iteration: 106470
loss: 1.0324602127075195,grad_norm: 0.9999991581346215, iteration: 106471
loss: 0.9854248762130737,grad_norm: 0.9855500418874302, iteration: 106472
loss: 1.0019996166229248,grad_norm: 0.9999990288246454, iteration: 106473
loss: 1.0249766111373901,grad_norm: 0.9999990774020555, iteration: 106474
loss: 0.9947388172149658,grad_norm: 0.9999990961988207, iteration: 106475
loss: 0.9937515258789062,grad_norm: 0.9999991166516742, iteration: 106476
loss: 1.0117720365524292,grad_norm: 0.9288644999147433, iteration: 106477
loss: 0.9889479279518127,grad_norm: 0.9151081551303344, iteration: 106478
loss: 1.039384365081787,grad_norm: 0.9999992286074274, iteration: 106479
loss: 0.981982946395874,grad_norm: 0.9346086514779972, iteration: 106480
loss: 1.0330690145492554,grad_norm: 0.999999054224875, iteration: 106481
loss: 0.9751613736152649,grad_norm: 0.9388868068149139, iteration: 106482
loss: 0.9943846464157104,grad_norm: 0.8574084302034481, iteration: 106483
loss: 1.030527949333191,grad_norm: 0.9683258296542968, iteration: 106484
loss: 0.9917548894882202,grad_norm: 0.9999991508109106, iteration: 106485
loss: 0.9827206134796143,grad_norm: 0.9999990718453443, iteration: 106486
loss: 0.9852275252342224,grad_norm: 0.941574439050769, iteration: 106487
loss: 1.005039095878601,grad_norm: 0.8876288389129731, iteration: 106488
loss: 0.9935495853424072,grad_norm: 0.9999990874596506, iteration: 106489
loss: 1.007882833480835,grad_norm: 0.8683317762441655, iteration: 106490
loss: 0.9823136329650879,grad_norm: 0.9968402547483779, iteration: 106491
loss: 0.9638574719429016,grad_norm: 0.9999991856845087, iteration: 106492
loss: 0.9986645579338074,grad_norm: 0.9867304914498092, iteration: 106493
loss: 1.0116502046585083,grad_norm: 0.8985688477609755, iteration: 106494
loss: 1.0188777446746826,grad_norm: 0.9999991337577675, iteration: 106495
loss: 1.0138728618621826,grad_norm: 0.9999992067652786, iteration: 106496
loss: 1.0324692726135254,grad_norm: 0.9999991601144422, iteration: 106497
loss: 1.0089210271835327,grad_norm: 0.9999992271165691, iteration: 106498
loss: 0.9833593964576721,grad_norm: 0.9999991070300631, iteration: 106499
loss: 0.9546322226524353,grad_norm: 0.9999991051906317, iteration: 106500
loss: 0.9621099233627319,grad_norm: 0.9999990669479822, iteration: 106501
loss: 1.0174380540847778,grad_norm: 0.9999990648214344, iteration: 106502
loss: 0.9880792498588562,grad_norm: 0.9999989504853124, iteration: 106503
loss: 1.0267685651779175,grad_norm: 0.9621722922347098, iteration: 106504
loss: 1.0043723583221436,grad_norm: 0.9245953527414764, iteration: 106505
loss: 1.015805721282959,grad_norm: 0.9999989091871753, iteration: 106506
loss: 1.0287106037139893,grad_norm: 0.9255934684796583, iteration: 106507
loss: 0.9650967121124268,grad_norm: 0.9999991497232145, iteration: 106508
loss: 0.9983600974082947,grad_norm: 0.9337053940916057, iteration: 106509
loss: 1.0922096967697144,grad_norm: 0.979840270376836, iteration: 106510
loss: 1.0291099548339844,grad_norm: 0.9301061859853413, iteration: 106511
loss: 1.0283561944961548,grad_norm: 0.9999990774948023, iteration: 106512
loss: 0.971924364566803,grad_norm: 0.934458339964021, iteration: 106513
loss: 1.0329848527908325,grad_norm: 0.9999992739495007, iteration: 106514
loss: 0.9888999462127686,grad_norm: 0.9999991341998634, iteration: 106515
loss: 0.9805245995521545,grad_norm: 0.9999991781650021, iteration: 106516
loss: 0.9896811842918396,grad_norm: 0.8144258118389401, iteration: 106517
loss: 1.0195527076721191,grad_norm: 0.9999990698099677, iteration: 106518
loss: 0.9849242568016052,grad_norm: 0.9115057944882428, iteration: 106519
loss: 0.9873983263969421,grad_norm: 0.9999993103092502, iteration: 106520
loss: 1.0093128681182861,grad_norm: 0.999999220186899, iteration: 106521
loss: 0.9837980270385742,grad_norm: 0.9999991530340065, iteration: 106522
loss: 0.9579827189445496,grad_norm: 0.9999992164721238, iteration: 106523
loss: 1.0253329277038574,grad_norm: 0.9999991157124618, iteration: 106524
loss: 0.9617339372634888,grad_norm: 0.9804383221965534, iteration: 106525
loss: 0.9510820508003235,grad_norm: 0.9999991614708199, iteration: 106526
loss: 1.0096173286437988,grad_norm: 0.9999995384608883, iteration: 106527
loss: 1.0388903617858887,grad_norm: 0.9999995224764995, iteration: 106528
loss: 1.013107180595398,grad_norm: 0.9999991106582402, iteration: 106529
loss: 1.1908442974090576,grad_norm: 0.9999998875922409, iteration: 106530
loss: 0.9904298782348633,grad_norm: 0.958843368055417, iteration: 106531
loss: 0.9920793175697327,grad_norm: 0.9982420211750308, iteration: 106532
loss: 1.0045878887176514,grad_norm: 0.9999990597838557, iteration: 106533
loss: 0.9890851974487305,grad_norm: 0.9100901122190876, iteration: 106534
loss: 0.9870403409004211,grad_norm: 0.999999045699385, iteration: 106535
loss: 1.0061542987823486,grad_norm: 0.9999991206265241, iteration: 106536
loss: 0.9853817224502563,grad_norm: 0.9448401142327731, iteration: 106537
loss: 1.047270655632019,grad_norm: 0.9999989915254894, iteration: 106538
loss: 0.9854366779327393,grad_norm: 0.9169529630772418, iteration: 106539
loss: 1.0371042490005493,grad_norm: 0.9999991702084003, iteration: 106540
loss: 0.9956883788108826,grad_norm: 0.9999992478788342, iteration: 106541
loss: 1.0071066617965698,grad_norm: 0.9702917148066997, iteration: 106542
loss: 0.9852647185325623,grad_norm: 0.971013083811016, iteration: 106543
loss: 0.9803470969200134,grad_norm: 0.9376758239090334, iteration: 106544
loss: 1.0246894359588623,grad_norm: 0.999999019048082, iteration: 106545
loss: 1.0132544040679932,grad_norm: 0.9999993339306458, iteration: 106546
loss: 0.9832323789596558,grad_norm: 0.969495235054959, iteration: 106547
loss: 0.9791354537010193,grad_norm: 0.9999991484311515, iteration: 106548
loss: 1.0226496458053589,grad_norm: 0.999999192835911, iteration: 106549
loss: 0.9890814423561096,grad_norm: 0.869613970443299, iteration: 106550
loss: 0.9772706031799316,grad_norm: 0.999999177871459, iteration: 106551
loss: 1.0078426599502563,grad_norm: 0.9999990223607363, iteration: 106552
loss: 0.9953998923301697,grad_norm: 0.9999992162441864, iteration: 106553
loss: 1.0368531942367554,grad_norm: 0.7793139839443916, iteration: 106554
loss: 0.9480859637260437,grad_norm: 0.9999991367638478, iteration: 106555
loss: 1.0195633172988892,grad_norm: 0.9999991190377066, iteration: 106556
loss: 0.9822561144828796,grad_norm: 0.9999991946512442, iteration: 106557
loss: 1.0051902532577515,grad_norm: 0.9999992485177622, iteration: 106558
loss: 1.0171043872833252,grad_norm: 0.9167318009713961, iteration: 106559
loss: 1.005608081817627,grad_norm: 0.9999991503688219, iteration: 106560
loss: 0.9910547137260437,grad_norm: 0.9999992145169022, iteration: 106561
loss: 1.0100616216659546,grad_norm: 0.9282354254480857, iteration: 106562
loss: 0.9994854927062988,grad_norm: 0.8577864625109374, iteration: 106563
loss: 1.0270036458969116,grad_norm: 0.9805001254106797, iteration: 106564
loss: 0.993928849697113,grad_norm: 0.9008234529977461, iteration: 106565
loss: 1.0099314451217651,grad_norm: 0.9999990228911937, iteration: 106566
loss: 1.0005911588668823,grad_norm: 0.9999992556794206, iteration: 106567
loss: 1.029249906539917,grad_norm: 0.9999990024513, iteration: 106568
loss: 0.9916728734970093,grad_norm: 0.999999019380962, iteration: 106569
loss: 0.981416642665863,grad_norm: 0.9999995012925215, iteration: 106570
loss: 1.0159257650375366,grad_norm: 0.9189808266813768, iteration: 106571
loss: 0.9907204508781433,grad_norm: 0.9999997010189285, iteration: 106572
loss: 0.9878389239311218,grad_norm: 0.9999991380410286, iteration: 106573
loss: 0.9986745715141296,grad_norm: 0.9999990757652264, iteration: 106574
loss: 0.9976866841316223,grad_norm: 0.9886526893853512, iteration: 106575
loss: 1.0140845775604248,grad_norm: 0.9999991356971512, iteration: 106576
loss: 1.0026110410690308,grad_norm: 0.9999992176468971, iteration: 106577
loss: 1.0148862600326538,grad_norm: 0.9999989283013265, iteration: 106578
loss: 1.0236279964447021,grad_norm: 0.9999995668719112, iteration: 106579
loss: 0.9718700051307678,grad_norm: 0.9154371244396385, iteration: 106580
loss: 0.9838137030601501,grad_norm: 0.9906102127882446, iteration: 106581
loss: 0.9915109276771545,grad_norm: 0.9999998712905245, iteration: 106582
loss: 0.9975749850273132,grad_norm: 0.9899580367642201, iteration: 106583
loss: 0.9690815210342407,grad_norm: 0.9999992749943105, iteration: 106584
loss: 0.9872981309890747,grad_norm: 0.9894888984447118, iteration: 106585
loss: 0.99784255027771,grad_norm: 0.9999989569337809, iteration: 106586
loss: 1.0172628164291382,grad_norm: 0.9954567807256984, iteration: 106587
loss: 0.9815439581871033,grad_norm: 0.9999991322101931, iteration: 106588
loss: 1.0156946182250977,grad_norm: 0.8949216129863338, iteration: 106589
loss: 0.962232768535614,grad_norm: 0.9999999187556624, iteration: 106590
loss: 0.9852215051651001,grad_norm: 0.9979699137370753, iteration: 106591
loss: 1.005486249923706,grad_norm: 0.9999993012775373, iteration: 106592
loss: 1.0288047790527344,grad_norm: 0.999999386243067, iteration: 106593
loss: 1.0500563383102417,grad_norm: 0.9999997092952259, iteration: 106594
loss: 1.009247899055481,grad_norm: 0.9999993783571643, iteration: 106595
loss: 1.0166959762573242,grad_norm: 0.9999989970764879, iteration: 106596
loss: 1.0226484537124634,grad_norm: 0.9999990427170328, iteration: 106597
loss: 0.9946572780609131,grad_norm: 0.9876508594207689, iteration: 106598
loss: 1.0013904571533203,grad_norm: 0.9999991539015302, iteration: 106599
loss: 1.014830231666565,grad_norm: 0.8477629186609971, iteration: 106600
loss: 0.9553667902946472,grad_norm: 0.9999991709765897, iteration: 106601
loss: 0.9823613166809082,grad_norm: 0.952723307417783, iteration: 106602
loss: 0.9978030323982239,grad_norm: 0.8630982102289353, iteration: 106603
loss: 1.0096455812454224,grad_norm: 0.9999990219247803, iteration: 106604
loss: 1.0165297985076904,grad_norm: 0.9999990366380829, iteration: 106605
loss: 0.9903295040130615,grad_norm: 0.9399483613732939, iteration: 106606
loss: 1.0204699039459229,grad_norm: 0.9999992841142215, iteration: 106607
loss: 0.977312445640564,grad_norm: 0.9999990754463673, iteration: 106608
loss: 1.008560299873352,grad_norm: 0.861819912756997, iteration: 106609
loss: 1.0123732089996338,grad_norm: 0.999999061857165, iteration: 106610
loss: 0.993349015712738,grad_norm: 0.9999992096394178, iteration: 106611
loss: 1.0387580394744873,grad_norm: 0.9999996656230289, iteration: 106612
loss: 0.982133686542511,grad_norm: 0.9999991050502176, iteration: 106613
loss: 0.9744548797607422,grad_norm: 0.8757917082191802, iteration: 106614
loss: 0.9728153347969055,grad_norm: 0.9999991940821832, iteration: 106615
loss: 1.0392922163009644,grad_norm: 0.889760772293643, iteration: 106616
loss: 0.9901771545410156,grad_norm: 0.9999992097866656, iteration: 106617
loss: 0.9733948707580566,grad_norm: 0.9846412563579813, iteration: 106618
loss: 1.0146058797836304,grad_norm: 0.9999993164015162, iteration: 106619
loss: 0.9757459163665771,grad_norm: 0.9641814240171162, iteration: 106620
loss: 0.9891215562820435,grad_norm: 0.9999990692786134, iteration: 106621
loss: 0.9897429943084717,grad_norm: 0.982930754818072, iteration: 106622
loss: 0.9973065853118896,grad_norm: 0.742685215712347, iteration: 106623
loss: 1.0357708930969238,grad_norm: 0.9999996526123056, iteration: 106624
loss: 0.9814084768295288,grad_norm: 0.9999992139454694, iteration: 106625
loss: 1.003459095954895,grad_norm: 0.9999990770462688, iteration: 106626
loss: 1.0314823389053345,grad_norm: 0.9999992756321937, iteration: 106627
loss: 0.9910528659820557,grad_norm: 0.933905325388729, iteration: 106628
loss: 0.9901488423347473,grad_norm: 0.9945115603419505, iteration: 106629
loss: 0.9834326505661011,grad_norm: 0.9718589888776195, iteration: 106630
loss: 0.9855695366859436,grad_norm: 0.9999990269074402, iteration: 106631
loss: 0.994770884513855,grad_norm: 0.9794364186329406, iteration: 106632
loss: 1.0079631805419922,grad_norm: 0.9999990046030814, iteration: 106633
loss: 0.9791802763938904,grad_norm: 0.9999992090272408, iteration: 106634
loss: 1.0163190364837646,grad_norm: 0.9999992304262322, iteration: 106635
loss: 0.9672601819038391,grad_norm: 0.9999990920540089, iteration: 106636
loss: 0.9633122086524963,grad_norm: 0.9999997416578849, iteration: 106637
loss: 0.9930747747421265,grad_norm: 0.9999991137947163, iteration: 106638
loss: 0.9740704298019409,grad_norm: 0.9957175942185553, iteration: 106639
loss: 1.0137686729431152,grad_norm: 0.9999991982840453, iteration: 106640
loss: 0.95635986328125,grad_norm: 0.999999134767901, iteration: 106641
loss: 0.9926362633705139,grad_norm: 0.9999991295188907, iteration: 106642
loss: 0.9771921634674072,grad_norm: 0.9976166565779004, iteration: 106643
loss: 0.9870224595069885,grad_norm: 0.9999994664439047, iteration: 106644
loss: 1.01162850856781,grad_norm: 0.9999991255716132, iteration: 106645
loss: 0.9726175665855408,grad_norm: 0.9513538438187701, iteration: 106646
loss: 0.9765750169754028,grad_norm: 0.9547970043796725, iteration: 106647
loss: 0.9834654331207275,grad_norm: 0.9510578638903889, iteration: 106648
loss: 1.0094337463378906,grad_norm: 0.9886620951133762, iteration: 106649
loss: 1.0280624628067017,grad_norm: 0.9999990579721924, iteration: 106650
loss: 1.0348551273345947,grad_norm: 0.9999990535592007, iteration: 106651
loss: 0.9932257533073425,grad_norm: 0.9999991302659063, iteration: 106652
loss: 1.0918781757354736,grad_norm: 0.9999993232367731, iteration: 106653
loss: 1.0279419422149658,grad_norm: 0.9173629805155163, iteration: 106654
loss: 1.0973052978515625,grad_norm: 0.9351327718814766, iteration: 106655
loss: 1.0943001508712769,grad_norm: 0.9999991732462779, iteration: 106656
loss: 0.9713895916938782,grad_norm: 0.9476136204162705, iteration: 106657
loss: 1.0036619901657104,grad_norm: 0.9546012148096137, iteration: 106658
loss: 1.0011924505233765,grad_norm: 0.9999990569918049, iteration: 106659
loss: 0.9672436714172363,grad_norm: 0.9999991856671522, iteration: 106660
loss: 0.9845272302627563,grad_norm: 0.9999989217467367, iteration: 106661
loss: 0.9896338582038879,grad_norm: 0.9999991968748673, iteration: 106662
loss: 1.0072691440582275,grad_norm: 0.9999990234558874, iteration: 106663
loss: 1.0245893001556396,grad_norm: 0.9999992756563547, iteration: 106664
loss: 1.0571902990341187,grad_norm: 0.9570319526132913, iteration: 106665
loss: 1.0196644067764282,grad_norm: 0.9999991242895657, iteration: 106666
loss: 1.0321909189224243,grad_norm: 0.9999999372653331, iteration: 106667
loss: 0.9379620552062988,grad_norm: 0.9999989402738906, iteration: 106668
loss: 1.0084404945373535,grad_norm: 0.9999990703109618, iteration: 106669
loss: 0.9773561358451843,grad_norm: 0.9999991807082912, iteration: 106670
loss: 0.9602214097976685,grad_norm: 0.989145480498591, iteration: 106671
loss: 1.0231950283050537,grad_norm: 0.9999991265887709, iteration: 106672
loss: 0.9839893579483032,grad_norm: 0.9937770191374331, iteration: 106673
loss: 0.9958057403564453,grad_norm: 0.9999998733528801, iteration: 106674
loss: 0.9963439106941223,grad_norm: 0.9999998176926645, iteration: 106675
loss: 1.026713490486145,grad_norm: 0.92643979240968, iteration: 106676
loss: 0.9985895752906799,grad_norm: 0.8997846709151668, iteration: 106677
loss: 1.0098541975021362,grad_norm: 0.9999992237241654, iteration: 106678
loss: 1.0495030879974365,grad_norm: 0.9999995802242077, iteration: 106679
loss: 0.9653673768043518,grad_norm: 0.9999991798446236, iteration: 106680
loss: 0.9929001927375793,grad_norm: 0.9839814432212745, iteration: 106681
loss: 1.0207220315933228,grad_norm: 0.9999991551559019, iteration: 106682
loss: 1.0040205717086792,grad_norm: 0.9464758432672701, iteration: 106683
loss: 0.959018886089325,grad_norm: 0.9999990219005627, iteration: 106684
loss: 0.9989699721336365,grad_norm: 0.9999991533090318, iteration: 106685
loss: 1.0030035972595215,grad_norm: 0.99999896241008, iteration: 106686
loss: 1.043961763381958,grad_norm: 0.999999434838582, iteration: 106687
loss: 1.0121341943740845,grad_norm: 0.9999992455585518, iteration: 106688
loss: 1.0417677164077759,grad_norm: 0.9999991159483528, iteration: 106689
loss: 1.087367057800293,grad_norm: 0.9999994402427443, iteration: 106690
loss: 1.0336952209472656,grad_norm: 0.8777959784820888, iteration: 106691
loss: 1.0049629211425781,grad_norm: 0.999999104085531, iteration: 106692
loss: 1.000632882118225,grad_norm: 0.9999991743752868, iteration: 106693
loss: 1.0262839794158936,grad_norm: 0.9978078459957339, iteration: 106694
loss: 0.9973558187484741,grad_norm: 0.9999992263564058, iteration: 106695
loss: 1.0089530944824219,grad_norm: 0.9009803052062051, iteration: 106696
loss: 1.003080129623413,grad_norm: 0.9999993977035373, iteration: 106697
loss: 1.0088415145874023,grad_norm: 0.8972499705545888, iteration: 106698
loss: 1.0165942907333374,grad_norm: 0.9999992489433791, iteration: 106699
loss: 0.9796456098556519,grad_norm: 0.9689780481193137, iteration: 106700
loss: 1.0260732173919678,grad_norm: 0.8769289633706858, iteration: 106701
loss: 1.006884217262268,grad_norm: 0.9999992550890362, iteration: 106702
loss: 1.0068484544754028,grad_norm: 0.9999991227857916, iteration: 106703
loss: 1.0176419019699097,grad_norm: 0.9999992433028126, iteration: 106704
loss: 1.0153417587280273,grad_norm: 0.8725533065447209, iteration: 106705
loss: 1.0016330480575562,grad_norm: 0.9999992644718824, iteration: 106706
loss: 1.1110397577285767,grad_norm: 0.9999992944928923, iteration: 106707
loss: 1.07792329788208,grad_norm: 0.9999992468153642, iteration: 106708
loss: 1.0156611204147339,grad_norm: 0.9999991756449434, iteration: 106709
loss: 1.0268081426620483,grad_norm: 0.9523488716857846, iteration: 106710
loss: 1.0053330659866333,grad_norm: 0.9999990935172978, iteration: 106711
loss: 1.0000531673431396,grad_norm: 0.99999921629415, iteration: 106712
loss: 1.0234267711639404,grad_norm: 0.9999991277628997, iteration: 106713
loss: 1.0041605234146118,grad_norm: 0.9778842691761016, iteration: 106714
loss: 0.9921838045120239,grad_norm: 0.9999990270924036, iteration: 106715
loss: 1.0017157793045044,grad_norm: 0.9999991901090368, iteration: 106716
loss: 0.9991084337234497,grad_norm: 0.9999992025836001, iteration: 106717
loss: 1.0530883073806763,grad_norm: 0.9999996451882605, iteration: 106718
loss: 1.0195088386535645,grad_norm: 0.9327949704313317, iteration: 106719
loss: 1.006629228591919,grad_norm: 0.99999903394676, iteration: 106720
loss: 1.063776969909668,grad_norm: 0.9999994984462364, iteration: 106721
loss: 1.0289626121520996,grad_norm: 0.9999988626882512, iteration: 106722
loss: 0.9710065126419067,grad_norm: 0.9999990790369288, iteration: 106723
loss: 0.9973477721214294,grad_norm: 0.9999992466210589, iteration: 106724
loss: 1.0116068124771118,grad_norm: 0.9999990106928172, iteration: 106725
loss: 0.991639256477356,grad_norm: 0.999999195835103, iteration: 106726
loss: 1.0316228866577148,grad_norm: 0.9999992325156588, iteration: 106727
loss: 1.0852025747299194,grad_norm: 0.9999990915955512, iteration: 106728
loss: 0.9866310954093933,grad_norm: 0.9641837630426572, iteration: 106729
loss: 1.1627320051193237,grad_norm: 0.9999998763859164, iteration: 106730
loss: 1.0236669778823853,grad_norm: 0.9791524777780368, iteration: 106731
loss: 1.0363616943359375,grad_norm: 0.9999992027715241, iteration: 106732
loss: 0.9701444506645203,grad_norm: 0.9999993031708274, iteration: 106733
loss: 0.9677556753158569,grad_norm: 0.9999990033025367, iteration: 106734
loss: 1.0168545246124268,grad_norm: 0.9179974648779653, iteration: 106735
loss: 1.0880918502807617,grad_norm: 0.9999994981619305, iteration: 106736
loss: 1.0301436185836792,grad_norm: 0.9999993917011565, iteration: 106737
loss: 0.9622792601585388,grad_norm: 0.9937930309341737, iteration: 106738
loss: 1.0172017812728882,grad_norm: 0.9999990738824147, iteration: 106739
loss: 0.9957725405693054,grad_norm: 0.9999991086542159, iteration: 106740
loss: 1.0296101570129395,grad_norm: 0.9999991482539499, iteration: 106741
loss: 1.0138261318206787,grad_norm: 0.9999993543610857, iteration: 106742
loss: 1.0295383930206299,grad_norm: 0.9999992205051322, iteration: 106743
loss: 1.0077425241470337,grad_norm: 0.9999990767749741, iteration: 106744
loss: 1.0101350545883179,grad_norm: 0.9999998303609736, iteration: 106745
loss: 0.9986381530761719,grad_norm: 0.9999990312995128, iteration: 106746
loss: 0.991621196269989,grad_norm: 0.9999990384118091, iteration: 106747
loss: 1.032063364982605,grad_norm: 0.9999991278872158, iteration: 106748
loss: 1.0233341455459595,grad_norm: 0.999999220157401, iteration: 106749
loss: 1.0030092000961304,grad_norm: 0.9999993153136058, iteration: 106750
loss: 0.9992418885231018,grad_norm: 0.9999991002721924, iteration: 106751
loss: 1.028595209121704,grad_norm: 0.9999989839633149, iteration: 106752
loss: 1.0248734951019287,grad_norm: 0.9873274058628609, iteration: 106753
loss: 1.0805892944335938,grad_norm: 0.999999189714687, iteration: 106754
loss: 0.9892144203186035,grad_norm: 0.9999991523344722, iteration: 106755
loss: 1.0351558923721313,grad_norm: 0.9486848319356214, iteration: 106756
loss: 1.0222684144973755,grad_norm: 0.8408845545995881, iteration: 106757
loss: 1.0838419198989868,grad_norm: 0.9999992942485058, iteration: 106758
loss: 0.996468186378479,grad_norm: 0.9999991036866472, iteration: 106759
loss: 1.0072590112686157,grad_norm: 0.8402324713522535, iteration: 106760
loss: 0.9835443496704102,grad_norm: 0.9999992272846785, iteration: 106761
loss: 1.0040032863616943,grad_norm: 0.9404486084834348, iteration: 106762
loss: 0.9963909983634949,grad_norm: 0.9999992148950704, iteration: 106763
loss: 1.0025163888931274,grad_norm: 0.7947718901683269, iteration: 106764
loss: 0.97829669713974,grad_norm: 0.9999996338546706, iteration: 106765
loss: 0.9740607738494873,grad_norm: 0.9999992911766429, iteration: 106766
loss: 0.9992246031761169,grad_norm: 0.9999990599570739, iteration: 106767
loss: 0.9686021208763123,grad_norm: 0.9044640254083943, iteration: 106768
loss: 0.9841738939285278,grad_norm: 0.9999991195144455, iteration: 106769
loss: 1.0148781538009644,grad_norm: 0.8558096843351556, iteration: 106770
loss: 0.9939277172088623,grad_norm: 0.9509191976563043, iteration: 106771
loss: 1.0185985565185547,grad_norm: 0.9999999472395014, iteration: 106772
loss: 1.0331913232803345,grad_norm: 0.9999993193074279, iteration: 106773
loss: 1.0457342863082886,grad_norm: 0.9999995851408111, iteration: 106774
loss: 1.0600486993789673,grad_norm: 0.9999995918920089, iteration: 106775
loss: 1.0415208339691162,grad_norm: 0.9999991854496303, iteration: 106776
loss: 0.99086993932724,grad_norm: 0.9422547015378737, iteration: 106777
loss: 1.0135618448257446,grad_norm: 0.9999992281640102, iteration: 106778
loss: 1.0198646783828735,grad_norm: 0.9999990197325608, iteration: 106779
loss: 0.9919312000274658,grad_norm: 0.9999993182401214, iteration: 106780
loss: 1.0105631351470947,grad_norm: 0.9865977697497739, iteration: 106781
loss: 1.0066149234771729,grad_norm: 0.9999990725011221, iteration: 106782
loss: 1.0087993144989014,grad_norm: 0.999999012594571, iteration: 106783
loss: 1.0197964906692505,grad_norm: 0.9888138192106494, iteration: 106784
loss: 0.9731168746948242,grad_norm: 0.9278385236125127, iteration: 106785
loss: 0.9788064956665039,grad_norm: 0.9999991446310202, iteration: 106786
loss: 1.0278979539871216,grad_norm: 0.9999991022489141, iteration: 106787
loss: 0.9568845629692078,grad_norm: 0.9999990860535979, iteration: 106788
loss: 1.0175457000732422,grad_norm: 0.9999992100254105, iteration: 106789
loss: 1.0020177364349365,grad_norm: 0.9952633387429518, iteration: 106790
loss: 1.0362051725387573,grad_norm: 0.9442496643068028, iteration: 106791
loss: 0.9767029285430908,grad_norm: 0.9999992546274358, iteration: 106792
loss: 0.9957494735717773,grad_norm: 0.9999990623332204, iteration: 106793
loss: 0.9846290946006775,grad_norm: 0.8204425086794699, iteration: 106794
loss: 0.9872809052467346,grad_norm: 0.9758844742892824, iteration: 106795
loss: 0.9995412826538086,grad_norm: 0.9511830697286433, iteration: 106796
loss: 1.0072691440582275,grad_norm: 0.9999990662639723, iteration: 106797
loss: 0.9946995973587036,grad_norm: 0.9437535651168756, iteration: 106798
loss: 0.980266809463501,grad_norm: 0.9999990651090103, iteration: 106799
loss: 1.0143834352493286,grad_norm: 0.9999992111125245, iteration: 106800
loss: 1.0132174491882324,grad_norm: 0.9999990592677377, iteration: 106801
loss: 1.0066317319869995,grad_norm: 0.9999991061329805, iteration: 106802
loss: 0.9914641380310059,grad_norm: 0.9999994782947559, iteration: 106803
loss: 0.9847924113273621,grad_norm: 0.9835058621891599, iteration: 106804
loss: 0.9858424067497253,grad_norm: 0.9795836007837057, iteration: 106805
loss: 0.9955036640167236,grad_norm: 0.9999989090910079, iteration: 106806
loss: 1.0063735246658325,grad_norm: 0.9888965598355461, iteration: 106807
loss: 1.0213918685913086,grad_norm: 0.9999992512413087, iteration: 106808
loss: 1.041649341583252,grad_norm: 0.9539965122459334, iteration: 106809
loss: 0.9770249724388123,grad_norm: 0.9999991080985522, iteration: 106810
loss: 0.9953559637069702,grad_norm: 0.9501396981325096, iteration: 106811
loss: 1.0288866758346558,grad_norm: 0.8597958363708819, iteration: 106812
loss: 0.9953795075416565,grad_norm: 0.926269532525493, iteration: 106813
loss: 0.9900016188621521,grad_norm: 0.9999991943968009, iteration: 106814
loss: 0.9921705722808838,grad_norm: 0.8791391535100416, iteration: 106815
loss: 0.995049238204956,grad_norm: 0.9999991940660754, iteration: 106816
loss: 0.9665167927742004,grad_norm: 0.8809966754868417, iteration: 106817
loss: 1.022973895072937,grad_norm: 0.9999990958186807, iteration: 106818
loss: 0.9833865761756897,grad_norm: 0.9999999662891887, iteration: 106819
loss: 0.9960182309150696,grad_norm: 0.9999992451200255, iteration: 106820
loss: 1.0066827535629272,grad_norm: 0.944968647312934, iteration: 106821
loss: 1.0092380046844482,grad_norm: 0.9999999274967151, iteration: 106822
loss: 0.9898681640625,grad_norm: 0.9999991499873092, iteration: 106823
loss: 0.9942278265953064,grad_norm: 0.999999258246405, iteration: 106824
loss: 1.0082831382751465,grad_norm: 0.936075008605243, iteration: 106825
loss: 1.0209980010986328,grad_norm: 0.9907162703856279, iteration: 106826
loss: 1.0193709135055542,grad_norm: 0.9469225029720492, iteration: 106827
loss: 1.0075527429580688,grad_norm: 0.9999990751359055, iteration: 106828
loss: 0.9881131649017334,grad_norm: 0.9725916312720112, iteration: 106829
loss: 0.9433305859565735,grad_norm: 0.9682754601602349, iteration: 106830
loss: 1.0046888589859009,grad_norm: 0.9712515317761278, iteration: 106831
loss: 0.992010235786438,grad_norm: 0.9999991848888673, iteration: 106832
loss: 1.0605401992797852,grad_norm: 0.9999991435027501, iteration: 106833
loss: 0.969435453414917,grad_norm: 0.9999990451180273, iteration: 106834
loss: 0.978324830532074,grad_norm: 0.999999059402741, iteration: 106835
loss: 1.019544005393982,grad_norm: 0.9442004779811116, iteration: 106836
loss: 0.9648756384849548,grad_norm: 0.9999991583780554, iteration: 106837
loss: 0.9953333139419556,grad_norm: 0.9393679373364263, iteration: 106838
loss: 0.998317539691925,grad_norm: 0.9529758314188441, iteration: 106839
loss: 1.0275721549987793,grad_norm: 0.9330920212611854, iteration: 106840
loss: 0.987186074256897,grad_norm: 0.9999992319289186, iteration: 106841
loss: 0.9859380722045898,grad_norm: 0.9999991047340645, iteration: 106842
loss: 1.0076247453689575,grad_norm: 0.9999990229669022, iteration: 106843
loss: 0.9492428302764893,grad_norm: 0.999999174423109, iteration: 106844
loss: 1.0111310482025146,grad_norm: 0.9999992153807032, iteration: 106845
loss: 1.0290814638137817,grad_norm: 0.9999990441410522, iteration: 106846
loss: 0.9908146262168884,grad_norm: 0.963072417595095, iteration: 106847
loss: 0.9988872408866882,grad_norm: 0.9999993168637071, iteration: 106848
loss: 0.9761248826980591,grad_norm: 0.9876417261712251, iteration: 106849
loss: 0.9944469928741455,grad_norm: 0.99999912086958, iteration: 106850
loss: 0.990355372428894,grad_norm: 0.9999991967458682, iteration: 106851
loss: 1.0170077085494995,grad_norm: 0.9999990634958705, iteration: 106852
loss: 0.9925535321235657,grad_norm: 0.9999996115890233, iteration: 106853
loss: 1.0220112800598145,grad_norm: 0.9999994785591986, iteration: 106854
loss: 1.0081731081008911,grad_norm: 0.9999991621343576, iteration: 106855
loss: 0.9859777092933655,grad_norm: 0.9999991857501144, iteration: 106856
loss: 1.0415679216384888,grad_norm: 0.9999999560519461, iteration: 106857
loss: 1.0298811197280884,grad_norm: 0.9999990955025807, iteration: 106858
loss: 0.981397271156311,grad_norm: 0.8460762981690328, iteration: 106859
loss: 0.9873705506324768,grad_norm: 0.999998931553081, iteration: 106860
loss: 0.9888461232185364,grad_norm: 0.8567569721417796, iteration: 106861
loss: 1.0161535739898682,grad_norm: 0.9386111269788826, iteration: 106862
loss: 1.0095691680908203,grad_norm: 0.9999990481417267, iteration: 106863
loss: 1.009292483329773,grad_norm: 0.9999990618196706, iteration: 106864
loss: 1.0126932859420776,grad_norm: 0.8354415509178365, iteration: 106865
loss: 1.045297622680664,grad_norm: 0.9831896618331859, iteration: 106866
loss: 1.011256456375122,grad_norm: 0.9999992303353376, iteration: 106867
loss: 0.991302490234375,grad_norm: 0.999999178964045, iteration: 106868
loss: 0.9804402589797974,grad_norm: 0.9605507507960924, iteration: 106869
loss: 1.0042431354522705,grad_norm: 0.9999993189192927, iteration: 106870
loss: 1.0346678495407104,grad_norm: 0.9999992439635133, iteration: 106871
loss: 1.0002576112747192,grad_norm: 0.9462587211152542, iteration: 106872
loss: 0.9851370453834534,grad_norm: 0.9999991083601916, iteration: 106873
loss: 1.0095576047897339,grad_norm: 0.9999992274660311, iteration: 106874
loss: 0.9936243295669556,grad_norm: 0.9999990364486104, iteration: 106875
loss: 1.0220744609832764,grad_norm: 0.9999990381781406, iteration: 106876
loss: 0.9833365678787231,grad_norm: 0.9884836340956519, iteration: 106877
loss: 1.0000330209732056,grad_norm: 0.8206785287013121, iteration: 106878
loss: 0.9949459433555603,grad_norm: 0.9999990354329581, iteration: 106879
loss: 1.0140634775161743,grad_norm: 0.937402012936646, iteration: 106880
loss: 0.9940845370292664,grad_norm: 0.9999991429350644, iteration: 106881
loss: 0.9848844408988953,grad_norm: 0.9999991381411798, iteration: 106882
loss: 1.0190244913101196,grad_norm: 0.9999990031469234, iteration: 106883
loss: 0.979448139667511,grad_norm: 0.9999989348789663, iteration: 106884
loss: 1.0044312477111816,grad_norm: 0.85765807090848, iteration: 106885
loss: 0.9949248433113098,grad_norm: 0.9976534817528635, iteration: 106886
loss: 0.9974789023399353,grad_norm: 0.9999991331773387, iteration: 106887
loss: 1.0155611038208008,grad_norm: 0.9999991230347688, iteration: 106888
loss: 0.9838587045669556,grad_norm: 0.9827854695796503, iteration: 106889
loss: 1.0143691301345825,grad_norm: 0.9999993077970252, iteration: 106890
loss: 0.9994571805000305,grad_norm: 0.8778179171540773, iteration: 106891
loss: 0.981512725353241,grad_norm: 0.9347410511428317, iteration: 106892
loss: 1.0191874504089355,grad_norm: 0.9999996697968159, iteration: 106893
loss: 1.0290330648422241,grad_norm: 0.9004487575097057, iteration: 106894
loss: 0.9948348999023438,grad_norm: 0.9999991437266681, iteration: 106895
loss: 0.9626482129096985,grad_norm: 0.9999990965772881, iteration: 106896
loss: 0.9856733679771423,grad_norm: 0.9999990618575971, iteration: 106897
loss: 1.0217139720916748,grad_norm: 0.9999991290046703, iteration: 106898
loss: 0.9783783555030823,grad_norm: 0.9999990145622639, iteration: 106899
loss: 0.9691082835197449,grad_norm: 0.9999990596378366, iteration: 106900
loss: 1.043947458267212,grad_norm: 0.9158146906010635, iteration: 106901
loss: 0.9754115343093872,grad_norm: 0.9999991972464399, iteration: 106902
loss: 0.9772011637687683,grad_norm: 0.9999991789427398, iteration: 106903
loss: 1.013411521911621,grad_norm: 0.9999990299027676, iteration: 106904
loss: 0.9585543870925903,grad_norm: 0.8813895943950834, iteration: 106905
loss: 0.9910504817962646,grad_norm: 0.9431411652844537, iteration: 106906
loss: 1.0399349927902222,grad_norm: 0.9893344207730577, iteration: 106907
loss: 0.9941127896308899,grad_norm: 0.9999991749090941, iteration: 106908
loss: 1.00198233127594,grad_norm: 0.9567325530775274, iteration: 106909
loss: 0.96633380651474,grad_norm: 0.9999991126983458, iteration: 106910
loss: 1.008757472038269,grad_norm: 0.9999992531690687, iteration: 106911
loss: 1.0246025323867798,grad_norm: 0.9999990297239402, iteration: 106912
loss: 0.9878256916999817,grad_norm: 0.9999990186471048, iteration: 106913
loss: 1.0290511846542358,grad_norm: 0.9999992917729033, iteration: 106914
loss: 1.0386921167373657,grad_norm: 0.9999991709073708, iteration: 106915
loss: 0.9850747585296631,grad_norm: 0.9999991436169416, iteration: 106916
loss: 1.0120999813079834,grad_norm: 0.8236417374322335, iteration: 106917
loss: 1.0197783708572388,grad_norm: 0.8331895186749655, iteration: 106918
loss: 1.0007508993148804,grad_norm: 0.9999994255913232, iteration: 106919
loss: 1.0213124752044678,grad_norm: 0.9999992004129133, iteration: 106920
loss: 0.9893988966941833,grad_norm: 0.9999991915030265, iteration: 106921
loss: 1.0251778364181519,grad_norm: 0.9980743852133116, iteration: 106922
loss: 1.0291078090667725,grad_norm: 0.9999991298408902, iteration: 106923
loss: 1.0025824308395386,grad_norm: 0.9652348660618661, iteration: 106924
loss: 1.0029845237731934,grad_norm: 0.9999989694447715, iteration: 106925
loss: 0.9691030383110046,grad_norm: 0.9798044043746902, iteration: 106926
loss: 1.0118622779846191,grad_norm: 0.999999072725977, iteration: 106927
loss: 1.0291507244110107,grad_norm: 0.9999990794043898, iteration: 106928
loss: 1.0318710803985596,grad_norm: 0.9999990472869806, iteration: 106929
loss: 1.0138580799102783,grad_norm: 0.9999992193006211, iteration: 106930
loss: 0.9839038252830505,grad_norm: 0.9999996925094355, iteration: 106931
loss: 0.9894271492958069,grad_norm: 0.9056863102812219, iteration: 106932
loss: 1.0208454132080078,grad_norm: 0.9999991112620874, iteration: 106933
loss: 0.9816205501556396,grad_norm: 0.9999990669319869, iteration: 106934
loss: 0.9802786707878113,grad_norm: 0.9968554005649825, iteration: 106935
loss: 0.9880319833755493,grad_norm: 0.9999990529041326, iteration: 106936
loss: 1.1053982973098755,grad_norm: 0.9999992004742507, iteration: 106937
loss: 1.0417128801345825,grad_norm: 0.9509562783979312, iteration: 106938
loss: 0.9794683456420898,grad_norm: 0.9849471769961989, iteration: 106939
loss: 0.9992986917495728,grad_norm: 0.9999992044390027, iteration: 106940
loss: 1.0028529167175293,grad_norm: 0.9999992063434058, iteration: 106941
loss: 1.0253901481628418,grad_norm: 0.9999991073442966, iteration: 106942
loss: 0.9850934147834778,grad_norm: 0.999999078047126, iteration: 106943
loss: 0.9997681379318237,grad_norm: 0.9267811351881156, iteration: 106944
loss: 0.9960824847221375,grad_norm: 0.8237533260675098, iteration: 106945
loss: 1.0116682052612305,grad_norm: 0.9999990610598076, iteration: 106946
loss: 0.9971408843994141,grad_norm: 0.9999996369246027, iteration: 106947
loss: 1.0144479274749756,grad_norm: 0.9196469747893485, iteration: 106948
loss: 0.9693801999092102,grad_norm: 0.9999990893748725, iteration: 106949
loss: 0.9941766858100891,grad_norm: 0.999999179694517, iteration: 106950
loss: 0.9877861142158508,grad_norm: 0.9999990761645834, iteration: 106951
loss: 0.982118546962738,grad_norm: 0.9999990489072954, iteration: 106952
loss: 1.0114471912384033,grad_norm: 0.999999280142008, iteration: 106953
loss: 0.9751250743865967,grad_norm: 0.8709868373708003, iteration: 106954
loss: 0.9594914317131042,grad_norm: 0.9999991386249721, iteration: 106955
loss: 1.023290991783142,grad_norm: 0.8866904205867085, iteration: 106956
loss: 0.9907312989234924,grad_norm: 0.9440305344811518, iteration: 106957
loss: 1.012518286705017,grad_norm: 0.9999992362568295, iteration: 106958
loss: 0.9479519128799438,grad_norm: 0.9563482706078676, iteration: 106959
loss: 1.0097590684890747,grad_norm: 0.9999991875244667, iteration: 106960
loss: 0.9864705204963684,grad_norm: 0.9999992067315658, iteration: 106961
loss: 0.9688831567764282,grad_norm: 0.9601837084285065, iteration: 106962
loss: 0.9646367430686951,grad_norm: 0.9999991133521746, iteration: 106963
loss: 0.9310379028320312,grad_norm: 0.9590274024652617, iteration: 106964
loss: 1.0077954530715942,grad_norm: 0.8743751875019306, iteration: 106965
loss: 0.9979506134986877,grad_norm: 0.9999991182532794, iteration: 106966
loss: 0.9954973459243774,grad_norm: 0.9999991879768769, iteration: 106967
loss: 0.9889966249465942,grad_norm: 0.9999990459457717, iteration: 106968
loss: 1.0483242273330688,grad_norm: 0.9999993987192143, iteration: 106969
loss: 0.9696722030639648,grad_norm: 0.9999991088819509, iteration: 106970
loss: 0.9991428852081299,grad_norm: 0.999999206494026, iteration: 106971
loss: 0.9888566732406616,grad_norm: 0.999999216237832, iteration: 106972
loss: 0.9794275164604187,grad_norm: 0.9999993046422689, iteration: 106973
loss: 1.0325298309326172,grad_norm: 0.9999993863304938, iteration: 106974
loss: 1.0054000616073608,grad_norm: 0.9999990302357501, iteration: 106975
loss: 0.9681710600852966,grad_norm: 0.9560411336017796, iteration: 106976
loss: 0.9804548025131226,grad_norm: 0.999999336767423, iteration: 106977
loss: 1.009992241859436,grad_norm: 0.9767953538858328, iteration: 106978
loss: 1.0597877502441406,grad_norm: 0.9999992348510466, iteration: 106979
loss: 0.9499837756156921,grad_norm: 0.9555930303891185, iteration: 106980
loss: 0.9801738262176514,grad_norm: 0.9613039020916183, iteration: 106981
loss: 1.0209197998046875,grad_norm: 0.9999990573457249, iteration: 106982
loss: 1.0134384632110596,grad_norm: 0.8757846279831255, iteration: 106983
loss: 1.0051593780517578,grad_norm: 0.9438228677149879, iteration: 106984
loss: 0.9824623465538025,grad_norm: 0.8697760080100491, iteration: 106985
loss: 0.9851490259170532,grad_norm: 0.9999993639115141, iteration: 106986
loss: 1.0268266201019287,grad_norm: 0.9999990648616577, iteration: 106987
loss: 1.0169532299041748,grad_norm: 0.999999198477396, iteration: 106988
loss: 1.0271656513214111,grad_norm: 0.9999991379669576, iteration: 106989
loss: 0.9463051557540894,grad_norm: 0.9999992346334584, iteration: 106990
loss: 1.051658272743225,grad_norm: 0.9999994162264891, iteration: 106991
loss: 1.0353471040725708,grad_norm: 0.9999991319793647, iteration: 106992
loss: 1.0025025606155396,grad_norm: 0.9999989672289131, iteration: 106993
loss: 1.0269125699996948,grad_norm: 0.9903391671405154, iteration: 106994
loss: 1.0036448240280151,grad_norm: 0.9426179463901333, iteration: 106995
loss: 0.987859308719635,grad_norm: 0.9999992117782754, iteration: 106996
loss: 1.0361982583999634,grad_norm: 0.8126460420843383, iteration: 106997
loss: 1.0117526054382324,grad_norm: 0.9544783874306056, iteration: 106998
loss: 0.991321861743927,grad_norm: 0.929874329241411, iteration: 106999
loss: 1.0104142427444458,grad_norm: 0.999999146899264, iteration: 107000
loss: 0.9603309631347656,grad_norm: 0.9999990275623039, iteration: 107001
loss: 0.9883005023002625,grad_norm: 0.9184852295764838, iteration: 107002
loss: 0.9885157346725464,grad_norm: 0.9999992026073901, iteration: 107003
loss: 1.0353024005889893,grad_norm: 0.9505653125241423, iteration: 107004
loss: 1.0158380270004272,grad_norm: 0.9999990648882279, iteration: 107005
loss: 1.0176682472229004,grad_norm: 0.9827561417223415, iteration: 107006
loss: 1.005452275276184,grad_norm: 0.9999996745062862, iteration: 107007
loss: 0.9968923926353455,grad_norm: 0.9919852615208924, iteration: 107008
loss: 0.9927994012832642,grad_norm: 0.9999990824426481, iteration: 107009
loss: 1.0101741552352905,grad_norm: 0.9999991690340945, iteration: 107010
loss: 0.9998254776000977,grad_norm: 0.9999991024518482, iteration: 107011
loss: 1.0044487714767456,grad_norm: 0.9396823306253163, iteration: 107012
loss: 0.9999904036521912,grad_norm: 0.8785527958045476, iteration: 107013
loss: 0.9793003797531128,grad_norm: 0.9999991170877909, iteration: 107014
loss: 0.9821829199790955,grad_norm: 0.9062147946126294, iteration: 107015
loss: 0.9824244379997253,grad_norm: 0.9750572619650362, iteration: 107016
loss: 1.0286626815795898,grad_norm: 0.9999991791500091, iteration: 107017
loss: 1.0148247480392456,grad_norm: 0.9611308246815252, iteration: 107018
loss: 1.0003912448883057,grad_norm: 0.9999990631917532, iteration: 107019
loss: 1.0000041723251343,grad_norm: 0.9999992066842407, iteration: 107020
loss: 1.007798671722412,grad_norm: 0.9850427853662496, iteration: 107021
loss: 0.9981023669242859,grad_norm: 0.9999990525138758, iteration: 107022
loss: 1.0223201513290405,grad_norm: 0.9999990742982303, iteration: 107023
loss: 1.0004287958145142,grad_norm: 0.9999991298757838, iteration: 107024
loss: 0.9801498055458069,grad_norm: 0.9999991190538972, iteration: 107025
loss: 1.020961046218872,grad_norm: 0.999999006706338, iteration: 107026
loss: 1.0059993267059326,grad_norm: 0.9999990133359408, iteration: 107027
loss: 1.0200083255767822,grad_norm: 0.9999994499739747, iteration: 107028
loss: 0.9759793877601624,grad_norm: 0.9999991450540626, iteration: 107029
loss: 0.9852347373962402,grad_norm: 0.9138129644665773, iteration: 107030
loss: 0.9486586451530457,grad_norm: 0.9999990971629518, iteration: 107031
loss: 0.9987329840660095,grad_norm: 0.9999992151953528, iteration: 107032
loss: 0.9900619387626648,grad_norm: 0.9999989672362718, iteration: 107033
loss: 0.9873833656311035,grad_norm: 0.9999992169687846, iteration: 107034
loss: 0.9901525974273682,grad_norm: 0.9999991161443486, iteration: 107035
loss: 1.007880449295044,grad_norm: 0.999999170416081, iteration: 107036
loss: 0.9912096858024597,grad_norm: 0.9999990892150392, iteration: 107037
loss: 1.006981611251831,grad_norm: 0.9999991580633176, iteration: 107038
loss: 0.9788001775741577,grad_norm: 0.9492627897018748, iteration: 107039
loss: 0.9976715445518494,grad_norm: 0.999999049497839, iteration: 107040
loss: 1.027113914489746,grad_norm: 0.9999990502892849, iteration: 107041
loss: 0.9912376403808594,grad_norm: 0.8682767030564497, iteration: 107042
loss: 0.9931408166885376,grad_norm: 0.8754554808395563, iteration: 107043
loss: 0.9788753986358643,grad_norm: 0.9999990941027321, iteration: 107044
loss: 1.1163192987442017,grad_norm: 0.9999992795406961, iteration: 107045
loss: 0.9943683743476868,grad_norm: 0.9999993038756955, iteration: 107046
loss: 1.011754035949707,grad_norm: 0.9999990697782375, iteration: 107047
loss: 1.001293420791626,grad_norm: 0.924553348252928, iteration: 107048
loss: 1.0063382387161255,grad_norm: 0.9999991325089829, iteration: 107049
loss: 1.0157647132873535,grad_norm: 0.981605546278253, iteration: 107050
loss: 0.9643423557281494,grad_norm: 0.886037616277282, iteration: 107051
loss: 0.9654691815376282,grad_norm: 0.9799424396934517, iteration: 107052
loss: 1.0215200185775757,grad_norm: 0.9999991294815046, iteration: 107053
loss: 0.993828296661377,grad_norm: 0.9999991486540948, iteration: 107054
loss: 1.0152997970581055,grad_norm: 0.9614669532369637, iteration: 107055
loss: 0.9952640533447266,grad_norm: 0.9999991060556251, iteration: 107056
loss: 0.967911422252655,grad_norm: 0.9640113115600288, iteration: 107057
loss: 0.9744589328765869,grad_norm: 0.9999991389921375, iteration: 107058
loss: 1.0161327123641968,grad_norm: 0.9999991879350539, iteration: 107059
loss: 1.0178260803222656,grad_norm: 0.9999994995890372, iteration: 107060
loss: 0.9555671215057373,grad_norm: 0.9163553669710953, iteration: 107061
loss: 1.0353641510009766,grad_norm: 0.9999990313165694, iteration: 107062
loss: 0.9675167202949524,grad_norm: 0.9531316636923002, iteration: 107063
loss: 0.9854400753974915,grad_norm: 0.9999994926436392, iteration: 107064
loss: 1.0348774194717407,grad_norm: 0.9999998280298624, iteration: 107065
loss: 1.0058653354644775,grad_norm: 0.993247176433456, iteration: 107066
loss: 0.9865279793739319,grad_norm: 0.9999992170548455, iteration: 107067
loss: 0.9765394330024719,grad_norm: 0.9313582959639005, iteration: 107068
loss: 0.9501287341117859,grad_norm: 0.9523933223339205, iteration: 107069
loss: 0.9989876747131348,grad_norm: 0.9733961142954128, iteration: 107070
loss: 0.9719322919845581,grad_norm: 0.9182166856037721, iteration: 107071
loss: 1.020243525505066,grad_norm: 0.9650836204512998, iteration: 107072
loss: 0.9995518326759338,grad_norm: 0.7922870898874846, iteration: 107073
loss: 0.9895569086074829,grad_norm: 0.9999990569001942, iteration: 107074
loss: 0.9999247193336487,grad_norm: 0.9242672801084415, iteration: 107075
loss: 1.0022289752960205,grad_norm: 0.9999990844824728, iteration: 107076
loss: 1.0008189678192139,grad_norm: 0.9930653336771883, iteration: 107077
loss: 0.9697904586791992,grad_norm: 0.9999992108898924, iteration: 107078
loss: 1.0174274444580078,grad_norm: 0.9999990191450883, iteration: 107079
loss: 1.0370149612426758,grad_norm: 0.999999033895815, iteration: 107080
loss: 0.9651387333869934,grad_norm: 0.9999991576233785, iteration: 107081
loss: 1.0537010431289673,grad_norm: 0.9411401466466149, iteration: 107082
loss: 1.0021995306015015,grad_norm: 0.9999991695315676, iteration: 107083
loss: 1.0108234882354736,grad_norm: 0.9999990868761298, iteration: 107084
loss: 1.0070054531097412,grad_norm: 0.9999991925878426, iteration: 107085
loss: 0.9982037544250488,grad_norm: 0.9999991595202089, iteration: 107086
loss: 0.99451744556427,grad_norm: 0.9999992228802446, iteration: 107087
loss: 0.963382363319397,grad_norm: 0.8200177913207141, iteration: 107088
loss: 0.9578206539154053,grad_norm: 0.9999992266671179, iteration: 107089
loss: 1.0221946239471436,grad_norm: 0.9999991426419832, iteration: 107090
loss: 1.02066969871521,grad_norm: 0.9999991131591045, iteration: 107091
loss: 0.988584578037262,grad_norm: 0.9422152310483141, iteration: 107092
loss: 0.9931111335754395,grad_norm: 0.9999992077749235, iteration: 107093
loss: 1.0066889524459839,grad_norm: 0.9999991729390872, iteration: 107094
loss: 1.0137566328048706,grad_norm: 0.9173279688251418, iteration: 107095
loss: 1.0133472681045532,grad_norm: 0.9999992396229437, iteration: 107096
loss: 1.0023411512374878,grad_norm: 0.8944726276851213, iteration: 107097
loss: 1.017687439918518,grad_norm: 0.99999902298916, iteration: 107098
loss: 1.0016592741012573,grad_norm: 0.8750140481341004, iteration: 107099
loss: 1.0293506383895874,grad_norm: 0.9705338912331853, iteration: 107100
loss: 0.9968771934509277,grad_norm: 0.9999991313228573, iteration: 107101
loss: 1.0171760320663452,grad_norm: 0.9999991763686946, iteration: 107102
loss: 0.9961270689964294,grad_norm: 0.997319433338937, iteration: 107103
loss: 1.0199800729751587,grad_norm: 0.9999991052954117, iteration: 107104
loss: 1.020379662513733,grad_norm: 0.9140067105255522, iteration: 107105
loss: 1.0015292167663574,grad_norm: 0.9999992711842832, iteration: 107106
loss: 1.0177942514419556,grad_norm: 0.9999992589886337, iteration: 107107
loss: 1.0212255716323853,grad_norm: 0.9833391973676158, iteration: 107108
loss: 0.9917488694190979,grad_norm: 0.8392185567475926, iteration: 107109
loss: 1.023454189300537,grad_norm: 0.9999991103997155, iteration: 107110
loss: 0.9834237694740295,grad_norm: 0.9999990097624197, iteration: 107111
loss: 0.9711220860481262,grad_norm: 0.9998376507182367, iteration: 107112
loss: 0.998586893081665,grad_norm: 0.9999991275342324, iteration: 107113
loss: 1.016161322593689,grad_norm: 0.963724286770964, iteration: 107114
loss: 1.024070382118225,grad_norm: 0.9745567422159288, iteration: 107115
loss: 1.0467904806137085,grad_norm: 0.9999990093634861, iteration: 107116
loss: 1.013256549835205,grad_norm: 0.9999992883175133, iteration: 107117
loss: 1.0020503997802734,grad_norm: 0.9412886870431602, iteration: 107118
loss: 1.0220776796340942,grad_norm: 0.9999991322999272, iteration: 107119
loss: 1.0107736587524414,grad_norm: 0.9999992319616001, iteration: 107120
loss: 0.9737502932548523,grad_norm: 0.9488916151478182, iteration: 107121
loss: 0.9752305150032043,grad_norm: 0.9999989451257489, iteration: 107122
loss: 0.996713399887085,grad_norm: 0.9999990534443878, iteration: 107123
loss: 0.9972816705703735,grad_norm: 0.9999998152447587, iteration: 107124
loss: 0.9783421754837036,grad_norm: 0.9999991842383626, iteration: 107125
loss: 1.0493460893630981,grad_norm: 0.9999991190822533, iteration: 107126
loss: 0.9993046522140503,grad_norm: 0.9999991723852534, iteration: 107127
loss: 1.0273774862289429,grad_norm: 0.9999992227262233, iteration: 107128
loss: 1.0140835046768188,grad_norm: 0.999999232805131, iteration: 107129
loss: 1.0344170331954956,grad_norm: 0.999999131267718, iteration: 107130
loss: 1.020511269569397,grad_norm: 0.9548237649708179, iteration: 107131
loss: 1.00947105884552,grad_norm: 0.9999990418975246, iteration: 107132
loss: 0.968290388584137,grad_norm: 0.9999991545842606, iteration: 107133
loss: 1.02411687374115,grad_norm: 0.9318049874695306, iteration: 107134
loss: 1.005335807800293,grad_norm: 0.9999990907017637, iteration: 107135
loss: 1.0179839134216309,grad_norm: 0.904774493822333, iteration: 107136
loss: 1.0074704885482788,grad_norm: 0.9999990319144638, iteration: 107137
loss: 1.0296568870544434,grad_norm: 0.9999991177193018, iteration: 107138
loss: 0.9810611605644226,grad_norm: 0.9999990295565895, iteration: 107139
loss: 1.0163580179214478,grad_norm: 0.9999998504847891, iteration: 107140
loss: 0.992009162902832,grad_norm: 0.9999990685926012, iteration: 107141
loss: 1.0219388008117676,grad_norm: 0.9999992653322728, iteration: 107142
loss: 0.986053466796875,grad_norm: 0.9169986364892307, iteration: 107143
loss: 1.0355530977249146,grad_norm: 0.9999991734968365, iteration: 107144
loss: 0.9679107666015625,grad_norm: 0.999999036117758, iteration: 107145
loss: 0.9972753524780273,grad_norm: 0.9999989518755341, iteration: 107146
loss: 0.9734308123588562,grad_norm: 0.9960683125292279, iteration: 107147
loss: 1.011198878288269,grad_norm: 0.9999992969586503, iteration: 107148
loss: 1.034334659576416,grad_norm: 0.999999606749167, iteration: 107149
loss: 0.9982420206069946,grad_norm: 0.9573944151404231, iteration: 107150
loss: 0.9960725903511047,grad_norm: 0.9320493515192064, iteration: 107151
loss: 0.9767321348190308,grad_norm: 0.9999992280791502, iteration: 107152
loss: 1.0706336498260498,grad_norm: 0.9999995542427924, iteration: 107153
loss: 1.0146383047103882,grad_norm: 0.9999989919711295, iteration: 107154
loss: 1.002644658088684,grad_norm: 0.9999991195421271, iteration: 107155
loss: 1.007239818572998,grad_norm: 0.8961532146147747, iteration: 107156
loss: 0.9709068536758423,grad_norm: 0.86639021157876, iteration: 107157
loss: 1.0237600803375244,grad_norm: 0.9999991433839749, iteration: 107158
loss: 0.963161826133728,grad_norm: 0.9097324088199096, iteration: 107159
loss: 0.9750853180885315,grad_norm: 0.9261573206066807, iteration: 107160
loss: 1.003493309020996,grad_norm: 0.9999990285653576, iteration: 107161
loss: 1.0327688455581665,grad_norm: 0.9999995517183158, iteration: 107162
loss: 0.9944534301757812,grad_norm: 0.9738088804318017, iteration: 107163
loss: 1.0172570943832397,grad_norm: 0.9999990262828187, iteration: 107164
loss: 1.056396722793579,grad_norm: 0.9999995287140594, iteration: 107165
loss: 0.9939046502113342,grad_norm: 0.9088757648297755, iteration: 107166
loss: 0.9689561724662781,grad_norm: 0.9999991877069375, iteration: 107167
loss: 0.9943429231643677,grad_norm: 0.9999991287233895, iteration: 107168
loss: 1.0045242309570312,grad_norm: 0.9999991788845076, iteration: 107169
loss: 1.0177854299545288,grad_norm: 0.8703153580710256, iteration: 107170
loss: 0.9716177582740784,grad_norm: 0.9999992525461318, iteration: 107171
loss: 0.9879247546195984,grad_norm: 0.9794986881655703, iteration: 107172
loss: 1.0234746932983398,grad_norm: 0.9999989656865685, iteration: 107173
loss: 1.040022850036621,grad_norm: 0.9999992566706845, iteration: 107174
loss: 0.9790438413619995,grad_norm: 0.9999992745100554, iteration: 107175
loss: 1.0158888101577759,grad_norm: 0.9999991689687769, iteration: 107176
loss: 1.014775037765503,grad_norm: 0.9554602158678064, iteration: 107177
loss: 1.0105630159378052,grad_norm: 0.9999989473901817, iteration: 107178
loss: 1.0427340269088745,grad_norm: 0.9999990610704468, iteration: 107179
loss: 1.0236806869506836,grad_norm: 0.9999991323568269, iteration: 107180
loss: 1.0017588138580322,grad_norm: 0.9999990059107114, iteration: 107181
loss: 1.0217351913452148,grad_norm: 0.9999991509674334, iteration: 107182
loss: 1.0060012340545654,grad_norm: 0.9752017786765174, iteration: 107183
loss: 0.9663851261138916,grad_norm: 0.9999991089950543, iteration: 107184
loss: 0.991416335105896,grad_norm: 0.999999330795021, iteration: 107185
loss: 0.9669458270072937,grad_norm: 0.9999992237119921, iteration: 107186
loss: 1.0288891792297363,grad_norm: 0.9999997347613211, iteration: 107187
loss: 1.0081769227981567,grad_norm: 0.926634704498013, iteration: 107188
loss: 1.01063072681427,grad_norm: 0.9041343916309562, iteration: 107189
loss: 1.0187150239944458,grad_norm: 0.9999991215390548, iteration: 107190
loss: 1.0251662731170654,grad_norm: 0.9999989674096692, iteration: 107191
loss: 0.9950320720672607,grad_norm: 0.9999990781549808, iteration: 107192
loss: 0.9683418869972229,grad_norm: 0.9999990296734852, iteration: 107193
loss: 0.9917457103729248,grad_norm: 0.9999989770550717, iteration: 107194
loss: 1.0102338790893555,grad_norm: 0.9999991379742621, iteration: 107195
loss: 1.0043697357177734,grad_norm: 0.9999990310447678, iteration: 107196
loss: 1.0334995985031128,grad_norm: 0.9999992775830837, iteration: 107197
loss: 1.0022342205047607,grad_norm: 0.9999990907262698, iteration: 107198
loss: 1.0628688335418701,grad_norm: 0.9999997693988586, iteration: 107199
loss: 0.9626798629760742,grad_norm: 0.9999991071852773, iteration: 107200
loss: 0.9998176693916321,grad_norm: 0.8662901720141742, iteration: 107201
loss: 1.0293185710906982,grad_norm: 0.999999277224692, iteration: 107202
loss: 1.0051528215408325,grad_norm: 0.9999991713923415, iteration: 107203
loss: 0.9780355095863342,grad_norm: 0.9949553985062756, iteration: 107204
loss: 1.0045181512832642,grad_norm: 0.9918567173326253, iteration: 107205
loss: 1.023482084274292,grad_norm: 0.9999991786649349, iteration: 107206
loss: 0.9576132893562317,grad_norm: 0.9999991156647519, iteration: 107207
loss: 1.0115920305252075,grad_norm: 0.8690858037276534, iteration: 107208
loss: 1.0111888647079468,grad_norm: 0.9999991659408954, iteration: 107209
loss: 1.0318363904953003,grad_norm: 0.9999992183549006, iteration: 107210
loss: 1.0058687925338745,grad_norm: 0.9999992342123434, iteration: 107211
loss: 1.079209327697754,grad_norm: 0.9999996284787944, iteration: 107212
loss: 1.014539361000061,grad_norm: 0.8693372255091757, iteration: 107213
loss: 0.9823926687240601,grad_norm: 0.9999991329109985, iteration: 107214
loss: 1.0316165685653687,grad_norm: 0.9999997709626351, iteration: 107215
loss: 1.0386343002319336,grad_norm: 0.9999991670817281, iteration: 107216
loss: 1.0295939445495605,grad_norm: 0.9999990925295178, iteration: 107217
loss: 1.0610400438308716,grad_norm: 0.9999996956566053, iteration: 107218
loss: 1.015481948852539,grad_norm: 0.9999991363946849, iteration: 107219
loss: 0.9798206686973572,grad_norm: 0.8916623758766025, iteration: 107220
loss: 1.000370740890503,grad_norm: 0.9999990627208973, iteration: 107221
loss: 1.0058505535125732,grad_norm: 0.9999989801780453, iteration: 107222
loss: 1.0056408643722534,grad_norm: 0.905507737587384, iteration: 107223
loss: 1.0032774209976196,grad_norm: 0.999999199270323, iteration: 107224
loss: 1.1149481534957886,grad_norm: 0.9999990864116742, iteration: 107225
loss: 0.986852765083313,grad_norm: 0.9899709059151665, iteration: 107226
loss: 1.0295768976211548,grad_norm: 0.8946552765031888, iteration: 107227
loss: 1.0133686065673828,grad_norm: 0.9999991605327644, iteration: 107228
loss: 1.085433006286621,grad_norm: 0.9999998560221581, iteration: 107229
loss: 0.9986449480056763,grad_norm: 0.9999991515754988, iteration: 107230
loss: 1.0363298654556274,grad_norm: 0.941900992441192, iteration: 107231
loss: 1.0017149448394775,grad_norm: 0.9999990114839513, iteration: 107232
loss: 0.9591728448867798,grad_norm: 0.9471678180832807, iteration: 107233
loss: 0.9804191589355469,grad_norm: 0.9509575442197553, iteration: 107234
loss: 1.0037230253219604,grad_norm: 0.9175947640520812, iteration: 107235
loss: 0.9773067235946655,grad_norm: 0.9999991315100213, iteration: 107236
loss: 1.027549386024475,grad_norm: 0.9941698476931448, iteration: 107237
loss: 1.0105358362197876,grad_norm: 0.9417354614198141, iteration: 107238
loss: 0.9809758067131042,grad_norm: 0.9236894413280505, iteration: 107239
loss: 1.008666753768921,grad_norm: 0.9999991731306924, iteration: 107240
loss: 0.9920455813407898,grad_norm: 0.9999992819005794, iteration: 107241
loss: 1.0045150518417358,grad_norm: 0.9999989875453181, iteration: 107242
loss: 0.985396146774292,grad_norm: 0.9625597839640978, iteration: 107243
loss: 1.0099998712539673,grad_norm: 0.8652987065577813, iteration: 107244
loss: 1.0084770917892456,grad_norm: 0.9999992137187118, iteration: 107245
loss: 1.0235071182250977,grad_norm: 0.9999992267379278, iteration: 107246
loss: 0.9428818225860596,grad_norm: 0.9999991882219946, iteration: 107247
loss: 1.0134094953536987,grad_norm: 0.928486054936841, iteration: 107248
loss: 1.023921251296997,grad_norm: 0.9999990526605156, iteration: 107249
loss: 0.9960469603538513,grad_norm: 0.9999990864220498, iteration: 107250
loss: 0.9863784313201904,grad_norm: 0.9169491244562827, iteration: 107251
loss: 0.9961951375007629,grad_norm: 0.9999992167254782, iteration: 107252
loss: 0.973915159702301,grad_norm: 0.9999992148204807, iteration: 107253
loss: 1.0047687292099,grad_norm: 0.8548201799457908, iteration: 107254
loss: 1.009203314781189,grad_norm: 0.9875702041299649, iteration: 107255
loss: 1.0287363529205322,grad_norm: 0.9650242322448757, iteration: 107256
loss: 1.042302131652832,grad_norm: 0.8575501850173782, iteration: 107257
loss: 1.00855553150177,grad_norm: 0.9778723431394877, iteration: 107258
loss: 1.0044583082199097,grad_norm: 0.999999075055624, iteration: 107259
loss: 0.9911499619483948,grad_norm: 0.9999990659610196, iteration: 107260
loss: 1.012758731842041,grad_norm: 0.9999992804616574, iteration: 107261
loss: 1.021907091140747,grad_norm: 0.9542549870190273, iteration: 107262
loss: 0.9789443612098694,grad_norm: 0.9999991567931716, iteration: 107263
loss: 1.022896409034729,grad_norm: 0.9999992897389194, iteration: 107264
loss: 1.0343682765960693,grad_norm: 0.9277706746263407, iteration: 107265
loss: 0.9649026989936829,grad_norm: 0.9761700806314377, iteration: 107266
loss: 1.0111312866210938,grad_norm: 0.9999991975273295, iteration: 107267
loss: 1.0111836194992065,grad_norm: 0.999999075399934, iteration: 107268
loss: 1.0335825681686401,grad_norm: 0.9999992335564425, iteration: 107269
loss: 0.9918259382247925,grad_norm: 0.9999991581341244, iteration: 107270
loss: 0.977813720703125,grad_norm: 0.9999990202258664, iteration: 107271
loss: 1.0499482154846191,grad_norm: 0.9999990482059721, iteration: 107272
loss: 0.9972063302993774,grad_norm: 0.9168306140341727, iteration: 107273
loss: 0.9874401688575745,grad_norm: 0.9999992808896604, iteration: 107274
loss: 0.9961059093475342,grad_norm: 0.9999991514167614, iteration: 107275
loss: 1.0150086879730225,grad_norm: 0.9383647387440504, iteration: 107276
loss: 1.0272798538208008,grad_norm: 0.9109553649795442, iteration: 107277
loss: 1.0034947395324707,grad_norm: 0.9999990998847003, iteration: 107278
loss: 1.0055756568908691,grad_norm: 0.9987745456276343, iteration: 107279
loss: 0.963285505771637,grad_norm: 0.9476817300184558, iteration: 107280
loss: 1.0162708759307861,grad_norm: 0.9999991628989486, iteration: 107281
loss: 0.9762132167816162,grad_norm: 0.99999902014613, iteration: 107282
loss: 0.9582163691520691,grad_norm: 0.9999990920394063, iteration: 107283
loss: 1.0463688373565674,grad_norm: 0.9999991101826339, iteration: 107284
loss: 0.9773764610290527,grad_norm: 0.9999990711497857, iteration: 107285
loss: 0.9944263696670532,grad_norm: 0.999999071181263, iteration: 107286
loss: 1.0329203605651855,grad_norm: 0.9999998540279882, iteration: 107287
loss: 1.0167639255523682,grad_norm: 0.999999383456267, iteration: 107288
loss: 0.9885954260826111,grad_norm: 0.8963148120884041, iteration: 107289
loss: 0.9904537200927734,grad_norm: 0.9999990407620032, iteration: 107290
loss: 1.0296216011047363,grad_norm: 0.999999117066679, iteration: 107291
loss: 0.9906514883041382,grad_norm: 0.9369827127176649, iteration: 107292
loss: 0.9775627851486206,grad_norm: 0.999999319658291, iteration: 107293
loss: 0.9668772220611572,grad_norm: 0.9999991676607702, iteration: 107294
loss: 1.0282273292541504,grad_norm: 0.9999992026190523, iteration: 107295
loss: 1.0239909887313843,grad_norm: 0.999999272753825, iteration: 107296
loss: 0.9673116207122803,grad_norm: 0.905042633400306, iteration: 107297
loss: 0.9929467439651489,grad_norm: 0.9999991619009836, iteration: 107298
loss: 1.0008089542388916,grad_norm: 0.9999991538187485, iteration: 107299
loss: 0.9699769616127014,grad_norm: 0.9276721512471162, iteration: 107300
loss: 1.0108814239501953,grad_norm: 0.9999991832286892, iteration: 107301
loss: 1.0121444463729858,grad_norm: 0.9999989410899948, iteration: 107302
loss: 1.0101127624511719,grad_norm: 0.9999991305515741, iteration: 107303
loss: 0.9976312518119812,grad_norm: 0.9999990799054349, iteration: 107304
loss: 1.0401383638381958,grad_norm: 0.9999990963783847, iteration: 107305
loss: 0.9878817200660706,grad_norm: 0.92045593695935, iteration: 107306
loss: 0.9911549687385559,grad_norm: 0.9999992071974889, iteration: 107307
loss: 1.0046333074569702,grad_norm: 0.9999992096587426, iteration: 107308
loss: 0.9689437747001648,grad_norm: 0.9999991601719141, iteration: 107309
loss: 1.0290722846984863,grad_norm: 0.9999992763202354, iteration: 107310
loss: 1.0179673433303833,grad_norm: 0.9999992000460157, iteration: 107311
loss: 0.9896685481071472,grad_norm: 0.9999991624087916, iteration: 107312
loss: 1.0355982780456543,grad_norm: 0.9999994221527808, iteration: 107313
loss: 1.013953447341919,grad_norm: 0.999999426362694, iteration: 107314
loss: 1.0172992944717407,grad_norm: 0.9984584471276674, iteration: 107315
loss: 0.9824007153511047,grad_norm: 0.9999989788931649, iteration: 107316
loss: 1.0035117864608765,grad_norm: 0.9341371833832453, iteration: 107317
loss: 0.956573486328125,grad_norm: 0.9999990736885702, iteration: 107318
loss: 1.0203005075454712,grad_norm: 0.9999992448163999, iteration: 107319
loss: 0.9962447285652161,grad_norm: 0.9999991887053564, iteration: 107320
loss: 1.024112582206726,grad_norm: 0.9999991957633959, iteration: 107321
loss: 0.983474612236023,grad_norm: 0.9999991252870584, iteration: 107322
loss: 1.0058887004852295,grad_norm: 0.9999992844206643, iteration: 107323
loss: 1.0254521369934082,grad_norm: 0.9999991237305194, iteration: 107324
loss: 0.9725755453109741,grad_norm: 0.9999993018576826, iteration: 107325
loss: 0.9812467694282532,grad_norm: 0.9999995057109057, iteration: 107326
loss: 0.9668267369270325,grad_norm: 0.9999992411994131, iteration: 107327
loss: 0.9828813672065735,grad_norm: 0.980915687355404, iteration: 107328
loss: 0.9916240572929382,grad_norm: 0.9999991160420787, iteration: 107329
loss: 1.0093250274658203,grad_norm: 0.999998951632691, iteration: 107330
loss: 1.0246134996414185,grad_norm: 0.9999990158704888, iteration: 107331
loss: 1.0112550258636475,grad_norm: 0.908411243380315, iteration: 107332
loss: 1.067531704902649,grad_norm: 0.9999996826673345, iteration: 107333
loss: 1.0029969215393066,grad_norm: 0.9999991202525553, iteration: 107334
loss: 0.9946123957633972,grad_norm: 0.9999990932244177, iteration: 107335
loss: 0.9976356029510498,grad_norm: 0.9999990333614964, iteration: 107336
loss: 0.9938596487045288,grad_norm: 0.9999991514131755, iteration: 107337
loss: 1.023193359375,grad_norm: 0.9999990593482234, iteration: 107338
loss: 0.9800736308097839,grad_norm: 0.9999992078949292, iteration: 107339
loss: 1.0059301853179932,grad_norm: 0.9999990975583979, iteration: 107340
loss: 0.9956314563751221,grad_norm: 0.9999995261986212, iteration: 107341
loss: 1.0387409925460815,grad_norm: 0.9999991749979784, iteration: 107342
loss: 1.0191453695297241,grad_norm: 0.9470715409216315, iteration: 107343
loss: 0.9942173361778259,grad_norm: 0.999999004641357, iteration: 107344
loss: 1.0170537233352661,grad_norm: 0.9999993020826219, iteration: 107345
loss: 1.008750081062317,grad_norm: 0.8528539315820354, iteration: 107346
loss: 1.0019035339355469,grad_norm: 0.999999020852191, iteration: 107347
loss: 1.03456711769104,grad_norm: 0.9999991751330976, iteration: 107348
loss: 1.0005388259887695,grad_norm: 0.9444257385483606, iteration: 107349
loss: 0.9899654388427734,grad_norm: 0.8733763219152981, iteration: 107350
loss: 0.9994732141494751,grad_norm: 0.9999989878240103, iteration: 107351
loss: 1.0221047401428223,grad_norm: 0.9246276772411006, iteration: 107352
loss: 0.9659634232521057,grad_norm: 0.9908970597425417, iteration: 107353
loss: 1.0460230112075806,grad_norm: 0.9999992649824114, iteration: 107354
loss: 0.9603498578071594,grad_norm: 0.999999046494361, iteration: 107355
loss: 1.1223526000976562,grad_norm: 0.999999110847901, iteration: 107356
loss: 1.0103821754455566,grad_norm: 0.9999990848466884, iteration: 107357
loss: 1.0151879787445068,grad_norm: 0.9999992758607246, iteration: 107358
loss: 1.1056437492370605,grad_norm: 0.9999989675745729, iteration: 107359
loss: 1.0544345378875732,grad_norm: 0.9999990220121391, iteration: 107360
loss: 0.9971935153007507,grad_norm: 0.9999993225392428, iteration: 107361
loss: 1.0181928873062134,grad_norm: 0.9311209851898185, iteration: 107362
loss: 1.0204441547393799,grad_norm: 0.9999989574458701, iteration: 107363
loss: 0.9974439740180969,grad_norm: 0.9889295761333413, iteration: 107364
loss: 1.023006558418274,grad_norm: 0.9909344785613355, iteration: 107365
loss: 0.9732379913330078,grad_norm: 0.9335969320431148, iteration: 107366
loss: 0.9755945205688477,grad_norm: 0.9999990884689128, iteration: 107367
loss: 0.984752893447876,grad_norm: 0.9999989252471816, iteration: 107368
loss: 0.9915187954902649,grad_norm: 0.9999991247350513, iteration: 107369
loss: 1.0537043809890747,grad_norm: 0.9999991031053493, iteration: 107370
loss: 1.0107667446136475,grad_norm: 0.9580673244486825, iteration: 107371
loss: 0.9877498745918274,grad_norm: 0.9464707410633766, iteration: 107372
loss: 1.0040119886398315,grad_norm: 0.9609993492005195, iteration: 107373
loss: 0.9898790717124939,grad_norm: 0.9994857485584425, iteration: 107374
loss: 0.9961459636688232,grad_norm: 0.9461250972696749, iteration: 107375
loss: 0.9979246258735657,grad_norm: 0.8849078624241855, iteration: 107376
loss: 0.9708201885223389,grad_norm: 0.9999991201740123, iteration: 107377
loss: 0.9887444972991943,grad_norm: 0.9999990829971785, iteration: 107378
loss: 0.9768275618553162,grad_norm: 0.8718828267240026, iteration: 107379
loss: 1.0069226026535034,grad_norm: 0.9439953061937775, iteration: 107380
loss: 0.987845242023468,grad_norm: 0.9999991521118661, iteration: 107381
loss: 0.9850670099258423,grad_norm: 0.9999991254402496, iteration: 107382
loss: 1.0374552011489868,grad_norm: 0.8537893588910597, iteration: 107383
loss: 1.005509376525879,grad_norm: 0.9999991547140702, iteration: 107384
loss: 1.024520754814148,grad_norm: 0.9999992323406346, iteration: 107385
loss: 1.0033270120620728,grad_norm: 0.9238647040931248, iteration: 107386
loss: 0.97821044921875,grad_norm: 0.8559967669421072, iteration: 107387
loss: 0.9678938388824463,grad_norm: 0.9999992697035218, iteration: 107388
loss: 0.9649152159690857,grad_norm: 0.9999990981289469, iteration: 107389
loss: 1.028794527053833,grad_norm: 0.9999991843176977, iteration: 107390
loss: 0.9875268936157227,grad_norm: 0.8885550494920065, iteration: 107391
loss: 0.9992680549621582,grad_norm: 0.9597526206641124, iteration: 107392
loss: 0.9582987427711487,grad_norm: 0.999999198100374, iteration: 107393
loss: 0.9995267987251282,grad_norm: 0.9999992394461932, iteration: 107394
loss: 0.9994627237319946,grad_norm: 0.9999992200859926, iteration: 107395
loss: 0.9888668060302734,grad_norm: 0.9999990103326298, iteration: 107396
loss: 1.003924012184143,grad_norm: 0.9999994651512183, iteration: 107397
loss: 0.9993363618850708,grad_norm: 0.9999990242688371, iteration: 107398
loss: 0.9928460121154785,grad_norm: 0.9999991075698109, iteration: 107399
loss: 0.9898959994316101,grad_norm: 0.9999990601942169, iteration: 107400
loss: 0.9705080389976501,grad_norm: 0.9920644073601935, iteration: 107401
loss: 0.9706234931945801,grad_norm: 0.9313847629590122, iteration: 107402
loss: 1.0102555751800537,grad_norm: 0.9289008146243196, iteration: 107403
loss: 1.0067819356918335,grad_norm: 0.9875863160365969, iteration: 107404
loss: 0.9782339334487915,grad_norm: 0.9999993694965246, iteration: 107405
loss: 1.0247540473937988,grad_norm: 0.8468839044760722, iteration: 107406
loss: 0.9756390452384949,grad_norm: 0.9999991248679516, iteration: 107407
loss: 1.0019986629486084,grad_norm: 0.8933580286645051, iteration: 107408
loss: 1.0231069326400757,grad_norm: 0.9135446795161141, iteration: 107409
loss: 0.9888566732406616,grad_norm: 0.9999990030628606, iteration: 107410
loss: 1.0028908252716064,grad_norm: 0.9999990851966857, iteration: 107411
loss: 1.004870891571045,grad_norm: 0.9289638017329844, iteration: 107412
loss: 1.0207617282867432,grad_norm: 0.9920452885872046, iteration: 107413
loss: 1.0200594663619995,grad_norm: 0.9999991720575413, iteration: 107414
loss: 0.9982702136039734,grad_norm: 0.99999912019927, iteration: 107415
loss: 1.0088603496551514,grad_norm: 0.8660004930272571, iteration: 107416
loss: 1.0002986192703247,grad_norm: 0.9743314140098895, iteration: 107417
loss: 1.0379399061203003,grad_norm: 0.9999990615775877, iteration: 107418
loss: 1.0483845472335815,grad_norm: 0.9875741129494034, iteration: 107419
loss: 1.0040110349655151,grad_norm: 0.9999990633041896, iteration: 107420
loss: 0.9558505415916443,grad_norm: 0.9611042759533742, iteration: 107421
loss: 1.0312482118606567,grad_norm: 0.9999990704962212, iteration: 107422
loss: 0.9924885034561157,grad_norm: 0.9999990408272514, iteration: 107423
loss: 0.9850736260414124,grad_norm: 0.9999990144637614, iteration: 107424
loss: 0.9806850552558899,grad_norm: 0.9999991773090581, iteration: 107425
loss: 1.0023094415664673,grad_norm: 0.9999990257326581, iteration: 107426
loss: 1.0168874263763428,grad_norm: 0.9999992319436996, iteration: 107427
loss: 1.0191476345062256,grad_norm: 0.9999991409500439, iteration: 107428
loss: 0.9821439981460571,grad_norm: 0.9999992082073174, iteration: 107429
loss: 1.017403244972229,grad_norm: 0.9825211516552289, iteration: 107430
loss: 1.0269500017166138,grad_norm: 0.9597385194653004, iteration: 107431
loss: 0.9839154481887817,grad_norm: 0.9930811961246443, iteration: 107432
loss: 1.0063409805297852,grad_norm: 0.9987589602498566, iteration: 107433
loss: 1.0208815336227417,grad_norm: 0.9999991200773896, iteration: 107434
loss: 0.9639002084732056,grad_norm: 0.9999992547126698, iteration: 107435
loss: 1.0012156963348389,grad_norm: 0.9999993531572392, iteration: 107436
loss: 1.0292412042617798,grad_norm: 0.9999991091918065, iteration: 107437
loss: 1.0103567838668823,grad_norm: 0.9999991953146061, iteration: 107438
loss: 1.0100775957107544,grad_norm: 0.9999992550672749, iteration: 107439
loss: 0.9609419107437134,grad_norm: 0.9999989663710883, iteration: 107440
loss: 0.986924409866333,grad_norm: 0.9999992222880599, iteration: 107441
loss: 1.027677059173584,grad_norm: 0.9999990968294964, iteration: 107442
loss: 0.9671667814254761,grad_norm: 0.9528692097608407, iteration: 107443
loss: 0.9742448925971985,grad_norm: 0.9999991909974915, iteration: 107444
loss: 1.019640564918518,grad_norm: 0.9999991604044319, iteration: 107445
loss: 1.0040477514266968,grad_norm: 0.999999143502307, iteration: 107446
loss: 0.9910261034965515,grad_norm: 0.999999006362205, iteration: 107447
loss: 1.0109057426452637,grad_norm: 0.9999991518491437, iteration: 107448
loss: 1.02690851688385,grad_norm: 0.9999990674633583, iteration: 107449
loss: 0.969700276851654,grad_norm: 0.9999994112852606, iteration: 107450
loss: 1.0063093900680542,grad_norm: 0.9999992115855458, iteration: 107451
loss: 0.9963070750236511,grad_norm: 0.999999073607546, iteration: 107452
loss: 0.9567310214042664,grad_norm: 0.9999991988534699, iteration: 107453
loss: 1.026285171508789,grad_norm: 0.9999990836243906, iteration: 107454
loss: 0.9862616062164307,grad_norm: 0.973026396086695, iteration: 107455
loss: 1.001001238822937,grad_norm: 0.8512278892195617, iteration: 107456
loss: 0.9933706521987915,grad_norm: 0.9999991247969469, iteration: 107457
loss: 0.9957575798034668,grad_norm: 0.9951421877471773, iteration: 107458
loss: 1.0037063360214233,grad_norm: 0.9999991391762486, iteration: 107459
loss: 0.9635081887245178,grad_norm: 0.9117582901246717, iteration: 107460
loss: 1.0226658582687378,grad_norm: 0.9999990025487726, iteration: 107461
loss: 1.0002869367599487,grad_norm: 0.8912769069856462, iteration: 107462
loss: 0.9588063359260559,grad_norm: 0.9999989659694888, iteration: 107463
loss: 1.0033149719238281,grad_norm: 0.9999991774134418, iteration: 107464
loss: 0.9995862245559692,grad_norm: 0.9899489708000833, iteration: 107465
loss: 0.980664074420929,grad_norm: 0.9999990294205223, iteration: 107466
loss: 0.9674462080001831,grad_norm: 0.9999991266157081, iteration: 107467
loss: 0.9786103963851929,grad_norm: 0.9956978914277218, iteration: 107468
loss: 1.0120809078216553,grad_norm: 0.9076668972023018, iteration: 107469
loss: 1.0137909650802612,grad_norm: 0.9999991180726688, iteration: 107470
loss: 1.0143072605133057,grad_norm: 0.9214945663077135, iteration: 107471
loss: 0.9880087971687317,grad_norm: 0.9643693618756565, iteration: 107472
loss: 1.0291632413864136,grad_norm: 0.9999991008881237, iteration: 107473
loss: 0.9788632392883301,grad_norm: 0.9999992166424585, iteration: 107474
loss: 0.9731260538101196,grad_norm: 0.8860817339960662, iteration: 107475
loss: 1.0266326665878296,grad_norm: 0.9733515794170173, iteration: 107476
loss: 1.0489025115966797,grad_norm: 0.9999992480588019, iteration: 107477
loss: 1.0040940046310425,grad_norm: 0.9999990117373325, iteration: 107478
loss: 1.002233862876892,grad_norm: 0.9743403789062955, iteration: 107479
loss: 1.0020086765289307,grad_norm: 0.9999990140714614, iteration: 107480
loss: 0.9773811101913452,grad_norm: 0.9999991628482967, iteration: 107481
loss: 1.0174545049667358,grad_norm: 0.8461558985310368, iteration: 107482
loss: 0.9922848343849182,grad_norm: 0.8006967328180608, iteration: 107483
loss: 0.9987302422523499,grad_norm: 0.9999992212902703, iteration: 107484
loss: 0.982960045337677,grad_norm: 0.9999990342368399, iteration: 107485
loss: 0.9902520775794983,grad_norm: 0.9999992463697536, iteration: 107486
loss: 0.9991081953048706,grad_norm: 0.9999991984006257, iteration: 107487
loss: 1.0024083852767944,grad_norm: 0.9999990800063046, iteration: 107488
loss: 1.001631498336792,grad_norm: 0.9999990699281104, iteration: 107489
loss: 1.0238769054412842,grad_norm: 0.9999990324116861, iteration: 107490
loss: 0.9704445004463196,grad_norm: 0.9666521565308146, iteration: 107491
loss: 0.9976087808609009,grad_norm: 0.9607721579568133, iteration: 107492
loss: 1.0088609457015991,grad_norm: 0.9615937155710885, iteration: 107493
loss: 0.9673202037811279,grad_norm: 0.8791879323153378, iteration: 107494
loss: 0.9722223281860352,grad_norm: 0.9500419271169216, iteration: 107495
loss: 0.9623165130615234,grad_norm: 0.9023675376373034, iteration: 107496
loss: 1.0429010391235352,grad_norm: 0.9334589873551437, iteration: 107497
loss: 1.049075961112976,grad_norm: 0.9999991311026333, iteration: 107498
loss: 0.9887644648551941,grad_norm: 0.999999147823336, iteration: 107499
loss: 1.0385361909866333,grad_norm: 0.9999990608226141, iteration: 107500
loss: 0.994307816028595,grad_norm: 0.9999992078506234, iteration: 107501
loss: 1.0167425870895386,grad_norm: 0.9999990749673657, iteration: 107502
loss: 1.007450819015503,grad_norm: 0.9508011285287176, iteration: 107503
loss: 0.9582814574241638,grad_norm: 0.9999992582556247, iteration: 107504
loss: 1.0284756422042847,grad_norm: 0.9999991254907042, iteration: 107505
loss: 1.0123419761657715,grad_norm: 0.999999156842362, iteration: 107506
loss: 0.9662553668022156,grad_norm: 0.9418827165076332, iteration: 107507
loss: 0.9992982745170593,grad_norm: 0.9688887933899757, iteration: 107508
loss: 1.0329196453094482,grad_norm: 0.9999990094617978, iteration: 107509
loss: 0.9898950457572937,grad_norm: 0.999999171532538, iteration: 107510
loss: 0.9873458743095398,grad_norm: 0.9999993259418375, iteration: 107511
loss: 0.968548059463501,grad_norm: 0.9525700657396199, iteration: 107512
loss: 1.0468072891235352,grad_norm: 0.9999991505995334, iteration: 107513
loss: 1.0127853155136108,grad_norm: 0.9999993135798967, iteration: 107514
loss: 1.0022435188293457,grad_norm: 0.9999992481922562, iteration: 107515
loss: 0.9898078441619873,grad_norm: 0.9999992152802865, iteration: 107516
loss: 0.9845150113105774,grad_norm: 0.9999996126590853, iteration: 107517
loss: 0.989396333694458,grad_norm: 0.8911141478057418, iteration: 107518
loss: 1.0124353170394897,grad_norm: 0.9580427218864407, iteration: 107519
loss: 0.990796685218811,grad_norm: 0.972427027193929, iteration: 107520
loss: 1.0044313669204712,grad_norm: 0.9388724397713133, iteration: 107521
loss: 0.9590973854064941,grad_norm: 0.9999991209643154, iteration: 107522
loss: 0.9951053857803345,grad_norm: 0.9999990915990377, iteration: 107523
loss: 0.9778566956520081,grad_norm: 0.9394962207525447, iteration: 107524
loss: 0.9918527007102966,grad_norm: 0.9999989882790327, iteration: 107525
loss: 1.0227360725402832,grad_norm: 0.9885538515033788, iteration: 107526
loss: 0.9813397526741028,grad_norm: 0.9999991606119278, iteration: 107527
loss: 1.0478509664535522,grad_norm: 0.9999993691121934, iteration: 107528
loss: 1.017174482345581,grad_norm: 0.9999991764908092, iteration: 107529
loss: 1.023144006729126,grad_norm: 0.9999989538491706, iteration: 107530
loss: 1.0022649765014648,grad_norm: 0.886165648792125, iteration: 107531
loss: 0.9722198843955994,grad_norm: 0.9999991185837812, iteration: 107532
loss: 1.0323491096496582,grad_norm: 0.9999995373043454, iteration: 107533
loss: 0.9893066883087158,grad_norm: 0.9413253606275543, iteration: 107534
loss: 1.0018669366836548,grad_norm: 0.9071792619945473, iteration: 107535
loss: 0.9790668487548828,grad_norm: 0.856963198844218, iteration: 107536
loss: 1.0176141262054443,grad_norm: 0.9795461872926463, iteration: 107537
loss: 0.9946158528327942,grad_norm: 0.9170445245207587, iteration: 107538
loss: 1.027191162109375,grad_norm: 0.9999993654487818, iteration: 107539
loss: 0.9989618062973022,grad_norm: 0.9999989994584493, iteration: 107540
loss: 1.0185117721557617,grad_norm: 0.9999990925142198, iteration: 107541
loss: 0.9820957183837891,grad_norm: 0.9380220941003662, iteration: 107542
loss: 1.0100808143615723,grad_norm: 0.9999990996179492, iteration: 107543
loss: 1.007469892501831,grad_norm: 0.9999988268551455, iteration: 107544
loss: 1.0136638879776,grad_norm: 0.8777875926052383, iteration: 107545
loss: 1.0009288787841797,grad_norm: 0.997406768717614, iteration: 107546
loss: 1.002254843711853,grad_norm: 0.9999991039006118, iteration: 107547
loss: 1.0153311491012573,grad_norm: 0.9999988382796716, iteration: 107548
loss: 0.9976021647453308,grad_norm: 0.9999990414769551, iteration: 107549
loss: 0.9793681502342224,grad_norm: 0.8823307319945327, iteration: 107550
loss: 0.9975757598876953,grad_norm: 0.9999991862586632, iteration: 107551
loss: 1.001226782798767,grad_norm: 0.9999990968004465, iteration: 107552
loss: 0.9990673065185547,grad_norm: 0.953950854261802, iteration: 107553
loss: 0.9937059879302979,grad_norm: 0.9999990665852728, iteration: 107554
loss: 1.0062365531921387,grad_norm: 0.9999992738675054, iteration: 107555
loss: 0.9702364802360535,grad_norm: 0.9999990882061291, iteration: 107556
loss: 0.9934685826301575,grad_norm: 0.999999129055635, iteration: 107557
loss: 1.0060887336730957,grad_norm: 0.9999992568374302, iteration: 107558
loss: 1.018127679824829,grad_norm: 0.9999992248598776, iteration: 107559
loss: 0.9957112073898315,grad_norm: 0.9999993317734504, iteration: 107560
loss: 1.017547369003296,grad_norm: 0.999999064179706, iteration: 107561
loss: 1.0292041301727295,grad_norm: 0.9999992672075737, iteration: 107562
loss: 0.9896243810653687,grad_norm: 0.921927276806557, iteration: 107563
loss: 1.0225261449813843,grad_norm: 0.9999991890797335, iteration: 107564
loss: 1.0125107765197754,grad_norm: 0.9999991764669744, iteration: 107565
loss: 1.0050311088562012,grad_norm: 0.9999991036922795, iteration: 107566
loss: 1.00441575050354,grad_norm: 0.9082851552347146, iteration: 107567
loss: 0.9832425713539124,grad_norm: 0.999999191558146, iteration: 107568
loss: 0.9723895788192749,grad_norm: 0.9999992412508325, iteration: 107569
loss: 0.9925699830055237,grad_norm: 0.9999991426011796, iteration: 107570
loss: 1.0080095529556274,grad_norm: 0.9999991026667167, iteration: 107571
loss: 0.9727583527565002,grad_norm: 0.9999993003760645, iteration: 107572
loss: 0.9717983603477478,grad_norm: 0.999999106346534, iteration: 107573
loss: 1.0482133626937866,grad_norm: 0.999999290568117, iteration: 107574
loss: 1.0159876346588135,grad_norm: 0.9486725912077338, iteration: 107575
loss: 0.9875537157058716,grad_norm: 0.9083064770459748, iteration: 107576
loss: 0.9912463426589966,grad_norm: 0.9421199908288589, iteration: 107577
loss: 1.0134005546569824,grad_norm: 0.9999990620065597, iteration: 107578
loss: 1.0173128843307495,grad_norm: 0.8486345628887642, iteration: 107579
loss: 1.0222669839859009,grad_norm: 0.999999174945243, iteration: 107580
loss: 1.0026438236236572,grad_norm: 0.972780164715772, iteration: 107581
loss: 0.9945791959762573,grad_norm: 0.9315075005169378, iteration: 107582
loss: 1.0124340057373047,grad_norm: 0.999999190044106, iteration: 107583
loss: 1.008224606513977,grad_norm: 0.9663292787876551, iteration: 107584
loss: 0.9975683093070984,grad_norm: 0.9999991259148389, iteration: 107585
loss: 1.0359348058700562,grad_norm: 0.8569626969862084, iteration: 107586
loss: 1.0054631233215332,grad_norm: 0.9999991972895107, iteration: 107587
loss: 0.9890936613082886,grad_norm: 0.9999990551709764, iteration: 107588
loss: 0.9915860295295715,grad_norm: 0.9726392194286283, iteration: 107589
loss: 1.0162100791931152,grad_norm: 0.9999990620316536, iteration: 107590
loss: 1.0157873630523682,grad_norm: 0.8960393967630959, iteration: 107591
loss: 1.0040783882141113,grad_norm: 0.9999991506595117, iteration: 107592
loss: 0.9940082430839539,grad_norm: 0.9283632627188679, iteration: 107593
loss: 0.9519456624984741,grad_norm: 0.9960628397289715, iteration: 107594
loss: 1.0215905904769897,grad_norm: 0.9999989909149442, iteration: 107595
loss: 1.0502463579177856,grad_norm: 0.9999992828887962, iteration: 107596
loss: 0.9889093041419983,grad_norm: 0.9314462691532553, iteration: 107597
loss: 0.9744194149971008,grad_norm: 0.9999991054272953, iteration: 107598
loss: 1.0028984546661377,grad_norm: 0.9999991467274496, iteration: 107599
loss: 1.017861247062683,grad_norm: 0.9017354461412274, iteration: 107600
loss: 1.0246467590332031,grad_norm: 0.9999992608685238, iteration: 107601
loss: 0.9740388989448547,grad_norm: 0.9999990260813824, iteration: 107602
loss: 0.9753902554512024,grad_norm: 0.9672784060628253, iteration: 107603
loss: 0.9906893968582153,grad_norm: 0.9999991177960277, iteration: 107604
loss: 0.9841213822364807,grad_norm: 0.999999135513729, iteration: 107605
loss: 0.9940868020057678,grad_norm: 0.9650960627827954, iteration: 107606
loss: 0.9969022870063782,grad_norm: 0.9711812358616645, iteration: 107607
loss: 1.0250571966171265,grad_norm: 0.9999992296314215, iteration: 107608
loss: 1.0051724910736084,grad_norm: 0.9999993407535533, iteration: 107609
loss: 1.0182856321334839,grad_norm: 0.9999991828764935, iteration: 107610
loss: 0.9674003720283508,grad_norm: 0.9999990726968262, iteration: 107611
loss: 0.9961629509925842,grad_norm: 0.9999992305586822, iteration: 107612
loss: 1.0194318294525146,grad_norm: 0.9999990882208077, iteration: 107613
loss: 0.986892819404602,grad_norm: 0.8658739420624305, iteration: 107614
loss: 0.9958735704421997,grad_norm: 0.9999992376721182, iteration: 107615
loss: 0.9970707893371582,grad_norm: 0.9339748487158701, iteration: 107616
loss: 0.9822368621826172,grad_norm: 0.9999991313000525, iteration: 107617
loss: 0.9979591369628906,grad_norm: 0.9999991326852403, iteration: 107618
loss: 1.0353295803070068,grad_norm: 0.9999992498975927, iteration: 107619
loss: 1.0211992263793945,grad_norm: 0.9999991443433892, iteration: 107620
loss: 0.9446814656257629,grad_norm: 0.9535460336980679, iteration: 107621
loss: 0.9593523740768433,grad_norm: 0.9999990439930934, iteration: 107622
loss: 1.0097087621688843,grad_norm: 0.9999992121498538, iteration: 107623
loss: 1.011298656463623,grad_norm: 0.9999992846960147, iteration: 107624
loss: 1.0141769647598267,grad_norm: 0.9999992712775211, iteration: 107625
loss: 1.0248736143112183,grad_norm: 0.9417273948957066, iteration: 107626
loss: 0.9874339699745178,grad_norm: 0.9999991483423272, iteration: 107627
loss: 0.9794417023658752,grad_norm: 0.9007389443650313, iteration: 107628
loss: 1.0260884761810303,grad_norm: 0.9999990351445909, iteration: 107629
loss: 0.9676135778427124,grad_norm: 0.9999991998077291, iteration: 107630
loss: 1.0287448167800903,grad_norm: 0.9772700361889455, iteration: 107631
loss: 0.9916315674781799,grad_norm: 0.9832536788048014, iteration: 107632
loss: 0.9817808270454407,grad_norm: 0.9999990446903094, iteration: 107633
loss: 1.006129503250122,grad_norm: 0.9999990301708045, iteration: 107634
loss: 0.9853420853614807,grad_norm: 0.9999992458412451, iteration: 107635
loss: 0.9808985590934753,grad_norm: 0.9625950805788861, iteration: 107636
loss: 0.9639305472373962,grad_norm: 0.9999991034184551, iteration: 107637
loss: 1.008249044418335,grad_norm: 0.9999992531608188, iteration: 107638
loss: 1.0307949781417847,grad_norm: 0.9999990413353536, iteration: 107639
loss: 1.0069427490234375,grad_norm: 0.9999990627108358, iteration: 107640
loss: 1.0308308601379395,grad_norm: 0.9999992171240288, iteration: 107641
loss: 1.0111231803894043,grad_norm: 0.9999991475970317, iteration: 107642
loss: 0.9860621094703674,grad_norm: 0.9999991314782679, iteration: 107643
loss: 0.9921833276748657,grad_norm: 0.9999991247455975, iteration: 107644
loss: 1.0006582736968994,grad_norm: 0.999999179541215, iteration: 107645
loss: 1.0560873746871948,grad_norm: 0.9999990569823102, iteration: 107646
loss: 0.9665085077285767,grad_norm: 0.8722709084783999, iteration: 107647
loss: 1.0186829566955566,grad_norm: 0.831661779948012, iteration: 107648
loss: 1.0122896432876587,grad_norm: 0.9999991112121115, iteration: 107649
loss: 0.9910294413566589,grad_norm: 0.9999991208478836, iteration: 107650
loss: 0.9963016510009766,grad_norm: 0.9999991973349358, iteration: 107651
loss: 1.0089889764785767,grad_norm: 0.9999992517186972, iteration: 107652
loss: 0.9876596331596375,grad_norm: 0.9005079449613561, iteration: 107653
loss: 0.9841157793998718,grad_norm: 0.9724022578911358, iteration: 107654
loss: 0.9533820748329163,grad_norm: 0.9999991539004555, iteration: 107655
loss: 0.999917209148407,grad_norm: 0.9921012048873198, iteration: 107656
loss: 0.9665371775627136,grad_norm: 0.9196412010487409, iteration: 107657
loss: 1.0159904956817627,grad_norm: 0.8712936363450359, iteration: 107658
loss: 0.9631015658378601,grad_norm: 0.9999990031510337, iteration: 107659
loss: 0.9912385940551758,grad_norm: 0.8123855067778218, iteration: 107660
loss: 1.0188716650009155,grad_norm: 0.8698855282178021, iteration: 107661
loss: 0.9919556379318237,grad_norm: 0.9999990894441255, iteration: 107662
loss: 0.9911377429962158,grad_norm: 0.9157634676090597, iteration: 107663
loss: 0.9496873617172241,grad_norm: 0.9999990614701412, iteration: 107664
loss: 0.9986803531646729,grad_norm: 0.9603110367018086, iteration: 107665
loss: 1.0047587156295776,grad_norm: 0.9999990563369774, iteration: 107666
loss: 1.0260629653930664,grad_norm: 0.8492969677129862, iteration: 107667
loss: 1.014398455619812,grad_norm: 0.9999991260385263, iteration: 107668
loss: 1.0369446277618408,grad_norm: 0.9999990704092376, iteration: 107669
loss: 1.0915793180465698,grad_norm: 0.9999999005632911, iteration: 107670
loss: 1.0168943405151367,grad_norm: 0.9929262756776214, iteration: 107671
loss: 1.0077677965164185,grad_norm: 0.9999991632283098, iteration: 107672
loss: 0.9663545489311218,grad_norm: 0.8829925715156915, iteration: 107673
loss: 0.9958760738372803,grad_norm: 0.9999991855921392, iteration: 107674
loss: 1.1175566911697388,grad_norm: 0.999999849830549, iteration: 107675
loss: 1.0137509107589722,grad_norm: 0.9999990484517958, iteration: 107676
loss: 0.9903993010520935,grad_norm: 0.9945188107671774, iteration: 107677
loss: 0.9765815734863281,grad_norm: 0.9999991273617899, iteration: 107678
loss: 1.0073392391204834,grad_norm: 0.9999990943619494, iteration: 107679
loss: 1.0242549180984497,grad_norm: 0.9999990810096244, iteration: 107680
loss: 1.001847743988037,grad_norm: 0.989625527115044, iteration: 107681
loss: 0.9755107760429382,grad_norm: 0.9290477009951644, iteration: 107682
loss: 1.0689417123794556,grad_norm: 0.9999992777425337, iteration: 107683
loss: 1.0137779712677002,grad_norm: 0.974025411827479, iteration: 107684
loss: 1.0211749076843262,grad_norm: 0.9999991237803892, iteration: 107685
loss: 1.0489696264266968,grad_norm: 0.9999999186421609, iteration: 107686
loss: 1.0140173435211182,grad_norm: 0.9999990901589344, iteration: 107687
loss: 0.9803431034088135,grad_norm: 0.8648088453614684, iteration: 107688
loss: 0.9884067177772522,grad_norm: 0.9999990839572411, iteration: 107689
loss: 1.0229986906051636,grad_norm: 0.999999077100001, iteration: 107690
loss: 1.0020077228546143,grad_norm: 0.9133959079244497, iteration: 107691
loss: 1.0083566904067993,grad_norm: 0.8946418747141062, iteration: 107692
loss: 1.0236085653305054,grad_norm: 0.9999991583073868, iteration: 107693
loss: 0.975533127784729,grad_norm: 0.9999991602289383, iteration: 107694
loss: 0.9948922991752625,grad_norm: 0.934160943028345, iteration: 107695
loss: 0.9947803616523743,grad_norm: 0.9999993673222773, iteration: 107696
loss: 0.9441403746604919,grad_norm: 0.9999991153998417, iteration: 107697
loss: 0.9864524602890015,grad_norm: 0.9999991862330287, iteration: 107698
loss: 1.0139964818954468,grad_norm: 0.999999168187504, iteration: 107699
loss: 0.9695548415184021,grad_norm: 0.9999992182566372, iteration: 107700
loss: 1.0383508205413818,grad_norm: 0.999999138130149, iteration: 107701
loss: 0.9413347840309143,grad_norm: 0.9999991177816979, iteration: 107702
loss: 0.9686154723167419,grad_norm: 0.9999989647445832, iteration: 107703
loss: 0.9967882037162781,grad_norm: 0.969429299046355, iteration: 107704
loss: 0.9890007972717285,grad_norm: 0.9999991662764829, iteration: 107705
loss: 0.9680627584457397,grad_norm: 0.9325229529232072, iteration: 107706
loss: 1.0086816549301147,grad_norm: 0.9999992263138784, iteration: 107707
loss: 1.033349633216858,grad_norm: 0.999999150212289, iteration: 107708
loss: 0.996270477771759,grad_norm: 0.9240591802758089, iteration: 107709
loss: 0.9985233545303345,grad_norm: 0.9999991700487681, iteration: 107710
loss: 1.0031627416610718,grad_norm: 0.8941088502028501, iteration: 107711
loss: 0.9971358776092529,grad_norm: 0.9695971401306838, iteration: 107712
loss: 0.999508261680603,grad_norm: 0.9999991537900907, iteration: 107713
loss: 1.0059064626693726,grad_norm: 0.999999076800909, iteration: 107714
loss: 0.9707471132278442,grad_norm: 0.999999098750354, iteration: 107715
loss: 0.9903706312179565,grad_norm: 0.999999130115898, iteration: 107716
loss: 1.021731972694397,grad_norm: 0.9999991776087722, iteration: 107717
loss: 0.9924799799919128,grad_norm: 0.9999991013070947, iteration: 107718
loss: 0.9982903003692627,grad_norm: 0.9999989781127885, iteration: 107719
loss: 1.0031747817993164,grad_norm: 0.9999990832004322, iteration: 107720
loss: 1.019472360610962,grad_norm: 0.9999991335826582, iteration: 107721
loss: 0.9825940132141113,grad_norm: 0.999999198835409, iteration: 107722
loss: 1.0247684717178345,grad_norm: 0.9999993234292488, iteration: 107723
loss: 1.0042558908462524,grad_norm: 0.8776710898805808, iteration: 107724
loss: 1.006475567817688,grad_norm: 0.9999990298763551, iteration: 107725
loss: 1.0337599515914917,grad_norm: 0.9999994994234369, iteration: 107726
loss: 0.9910925626754761,grad_norm: 0.9999991736545253, iteration: 107727
loss: 0.998322606086731,grad_norm: 0.9999991208934786, iteration: 107728
loss: 0.9399006962776184,grad_norm: 0.9999989991972301, iteration: 107729
loss: 1.023844838142395,grad_norm: 0.999999115744317, iteration: 107730
loss: 1.0106465816497803,grad_norm: 0.9999992228238757, iteration: 107731
loss: 1.0384633541107178,grad_norm: 0.999999129381514, iteration: 107732
loss: 1.0128103494644165,grad_norm: 0.9999989385191145, iteration: 107733
loss: 0.9888675212860107,grad_norm: 0.9999992337620659, iteration: 107734
loss: 1.0102922916412354,grad_norm: 0.9999991852620399, iteration: 107735
loss: 1.0157759189605713,grad_norm: 0.9999991445321837, iteration: 107736
loss: 1.0160841941833496,grad_norm: 0.9999991669729453, iteration: 107737
loss: 0.9953763484954834,grad_norm: 0.9999992559920107, iteration: 107738
loss: 0.9926374554634094,grad_norm: 0.9748783126276532, iteration: 107739
loss: 0.9957753419876099,grad_norm: 0.9999991604015087, iteration: 107740
loss: 1.0321849584579468,grad_norm: 0.865469695267278, iteration: 107741
loss: 1.030314326286316,grad_norm: 0.9999991783590395, iteration: 107742
loss: 1.003322958946228,grad_norm: 0.9999991232918167, iteration: 107743
loss: 1.0080264806747437,grad_norm: 0.9999995083494988, iteration: 107744
loss: 0.9960217475891113,grad_norm: 0.9811591822450613, iteration: 107745
loss: 1.007973313331604,grad_norm: 0.9999991808603949, iteration: 107746
loss: 1.0010327100753784,grad_norm: 0.9999992428597229, iteration: 107747
loss: 0.995707631111145,grad_norm: 0.9999991014864857, iteration: 107748
loss: 0.9980009198188782,grad_norm: 0.999999095821444, iteration: 107749
loss: 1.0066075325012207,grad_norm: 0.8536015110135113, iteration: 107750
loss: 1.0076801776885986,grad_norm: 0.9999990796490366, iteration: 107751
loss: 0.9780011773109436,grad_norm: 0.9555059005241919, iteration: 107752
loss: 0.9951300024986267,grad_norm: 0.9999991769964381, iteration: 107753
loss: 1.0136380195617676,grad_norm: 0.9999992946437208, iteration: 107754
loss: 0.9806944727897644,grad_norm: 0.999999111570888, iteration: 107755
loss: 1.0353130102157593,grad_norm: 0.9015879730623119, iteration: 107756
loss: 0.9858886003494263,grad_norm: 0.9845561842964303, iteration: 107757
loss: 0.9909003376960754,grad_norm: 0.9988630741094009, iteration: 107758
loss: 1.027880311012268,grad_norm: 0.9999991115385829, iteration: 107759
loss: 0.9905939698219299,grad_norm: 0.9278435155402924, iteration: 107760
loss: 1.0034959316253662,grad_norm: 0.9999992414783289, iteration: 107761
loss: 1.0044605731964111,grad_norm: 0.9999992006552776, iteration: 107762
loss: 0.9784391522407532,grad_norm: 0.9999991100008384, iteration: 107763
loss: 0.9934684038162231,grad_norm: 0.8668921066659334, iteration: 107764
loss: 0.969097912311554,grad_norm: 0.8903995311978807, iteration: 107765
loss: 0.9819076061248779,grad_norm: 0.8668335913463798, iteration: 107766
loss: 1.0170373916625977,grad_norm: 0.9999989418538697, iteration: 107767
loss: 0.9887977242469788,grad_norm: 0.9999990587586289, iteration: 107768
loss: 0.9701579213142395,grad_norm: 0.9999989407803982, iteration: 107769
loss: 0.9987298250198364,grad_norm: 0.9688957339013059, iteration: 107770
loss: 1.0470131635665894,grad_norm: 0.999999127488422, iteration: 107771
loss: 0.996677577495575,grad_norm: 0.8867610020958149, iteration: 107772
loss: 1.0083376169204712,grad_norm: 0.9999993936627564, iteration: 107773
loss: 0.9903297424316406,grad_norm: 0.9398195613166308, iteration: 107774
loss: 1.010528564453125,grad_norm: 0.9999990790263339, iteration: 107775
loss: 1.0194456577301025,grad_norm: 0.999999051975045, iteration: 107776
loss: 0.9749109148979187,grad_norm: 0.9366626781496725, iteration: 107777
loss: 1.0395705699920654,grad_norm: 0.999999077770775, iteration: 107778
loss: 0.9986985921859741,grad_norm: 0.9999990291711314, iteration: 107779
loss: 0.9764454960823059,grad_norm: 0.9999992880075103, iteration: 107780
loss: 0.9988633394241333,grad_norm: 0.9999990245590824, iteration: 107781
loss: 0.9507507085800171,grad_norm: 0.9999991580667476, iteration: 107782
loss: 1.010973334312439,grad_norm: 0.9999989562503461, iteration: 107783
loss: 1.0039265155792236,grad_norm: 0.9999990802428151, iteration: 107784
loss: 0.9965755343437195,grad_norm: 0.99999904834319, iteration: 107785
loss: 1.025256633758545,grad_norm: 0.9999991338074472, iteration: 107786
loss: 0.9978184103965759,grad_norm: 0.9999991684895903, iteration: 107787
loss: 0.9607529640197754,grad_norm: 0.9999991428373938, iteration: 107788
loss: 1.0166373252868652,grad_norm: 0.9999991667657934, iteration: 107789
loss: 0.9823241233825684,grad_norm: 0.8872117393495658, iteration: 107790
loss: 1.0159200429916382,grad_norm: 0.9999990958736008, iteration: 107791
loss: 1.0087049007415771,grad_norm: 0.9951252607685883, iteration: 107792
loss: 1.008371353149414,grad_norm: 0.9999994765292599, iteration: 107793
loss: 1.000068187713623,grad_norm: 0.9999991011476179, iteration: 107794
loss: 1.0422688722610474,grad_norm: 0.9999994094248428, iteration: 107795
loss: 1.043473243713379,grad_norm: 0.999999252472465, iteration: 107796
loss: 0.9895723462104797,grad_norm: 0.999999146494223, iteration: 107797
loss: 1.0031579732894897,grad_norm: 0.9999991254647224, iteration: 107798
loss: 0.976495087146759,grad_norm: 0.9999991000627737, iteration: 107799
loss: 1.0105388164520264,grad_norm: 0.9999990354093543, iteration: 107800
loss: 1.115431308746338,grad_norm: 0.9999991331679685, iteration: 107801
loss: 1.0265341997146606,grad_norm: 0.9999992262725044, iteration: 107802
loss: 1.0225071907043457,grad_norm: 0.9999992194340606, iteration: 107803
loss: 0.9903675317764282,grad_norm: 0.9244430602211563, iteration: 107804
loss: 0.9771351218223572,grad_norm: 0.9588165542201001, iteration: 107805
loss: 0.9702017903327942,grad_norm: 0.9195276326419922, iteration: 107806
loss: 1.0173858404159546,grad_norm: 0.9999990525107003, iteration: 107807
loss: 0.9976275563240051,grad_norm: 0.9999991440062149, iteration: 107808
loss: 1.0159623622894287,grad_norm: 0.9999990599833588, iteration: 107809
loss: 1.0003384351730347,grad_norm: 0.9308781052774896, iteration: 107810
loss: 1.0424401760101318,grad_norm: 0.999999132737896, iteration: 107811
loss: 1.0224056243896484,grad_norm: 0.9999992676056393, iteration: 107812
loss: 0.9860495924949646,grad_norm: 0.8273576728815232, iteration: 107813
loss: 0.9945446848869324,grad_norm: 0.9999990466428232, iteration: 107814
loss: 1.0543040037155151,grad_norm: 0.9999993733054459, iteration: 107815
loss: 1.0121732950210571,grad_norm: 0.8345052866774704, iteration: 107816
loss: 1.0130118131637573,grad_norm: 0.8577880531157095, iteration: 107817
loss: 0.996165931224823,grad_norm: 0.9394597705234428, iteration: 107818
loss: 1.0520622730255127,grad_norm: 0.9999993126859188, iteration: 107819
loss: 0.9968979358673096,grad_norm: 0.9999993680537749, iteration: 107820
loss: 1.0008724927902222,grad_norm: 0.999999273484509, iteration: 107821
loss: 1.0216169357299805,grad_norm: 0.9999992705167766, iteration: 107822
loss: 1.0497223138809204,grad_norm: 0.9999990692215358, iteration: 107823
loss: 0.9829387664794922,grad_norm: 0.999999104096407, iteration: 107824
loss: 0.9895511269569397,grad_norm: 0.9999991445013372, iteration: 107825
loss: 1.0242726802825928,grad_norm: 0.9999992148246158, iteration: 107826
loss: 1.0137418508529663,grad_norm: 0.9999991870696926, iteration: 107827
loss: 1.0355465412139893,grad_norm: 0.9999991075778886, iteration: 107828
loss: 0.9851513504981995,grad_norm: 0.8909624089770534, iteration: 107829
loss: 1.0308877229690552,grad_norm: 0.9763446299957835, iteration: 107830
loss: 0.9969546794891357,grad_norm: 0.9999991793983634, iteration: 107831
loss: 0.991811215877533,grad_norm: 0.9999989955630263, iteration: 107832
loss: 1.0018482208251953,grad_norm: 0.9999992869706028, iteration: 107833
loss: 0.9696013927459717,grad_norm: 0.9628986520953066, iteration: 107834
loss: 1.0180343389511108,grad_norm: 0.8999707429553042, iteration: 107835
loss: 1.0058661699295044,grad_norm: 0.9999991037740198, iteration: 107836
loss: 0.9776855111122131,grad_norm: 0.8869716708042915, iteration: 107837
loss: 0.9804307818412781,grad_norm: 0.9999992642137963, iteration: 107838
loss: 0.9984193444252014,grad_norm: 0.9999991601631588, iteration: 107839
loss: 1.0029089450836182,grad_norm: 0.9999991318937729, iteration: 107840
loss: 1.0514500141143799,grad_norm: 0.999999662337614, iteration: 107841
loss: 1.00742506980896,grad_norm: 0.9999990472645566, iteration: 107842
loss: 0.9826813340187073,grad_norm: 0.9999992154047646, iteration: 107843
loss: 1.0355126857757568,grad_norm: 0.9999997866408822, iteration: 107844
loss: 0.9818674921989441,grad_norm: 0.9999991045875805, iteration: 107845
loss: 1.035435438156128,grad_norm: 0.9999990338419406, iteration: 107846
loss: 0.9864053130149841,grad_norm: 0.99999922814327, iteration: 107847
loss: 1.0091098546981812,grad_norm: 0.9999990562500202, iteration: 107848
loss: 1.0418574810028076,grad_norm: 0.9999995434855009, iteration: 107849
loss: 0.9458597302436829,grad_norm: 0.9830192223588846, iteration: 107850
loss: 0.9798198938369751,grad_norm: 0.9999991324133267, iteration: 107851
loss: 0.9730793833732605,grad_norm: 0.9999990511375211, iteration: 107852
loss: 1.0250097513198853,grad_norm: 0.9999991848807614, iteration: 107853
loss: 1.010299801826477,grad_norm: 0.9999991306548995, iteration: 107854
loss: 0.9793297052383423,grad_norm: 0.9949210266729662, iteration: 107855
loss: 1.0056546926498413,grad_norm: 0.9999991810626716, iteration: 107856
loss: 1.0251832008361816,grad_norm: 0.9820027279971304, iteration: 107857
loss: 0.9830125570297241,grad_norm: 0.9999994544542337, iteration: 107858
loss: 0.9898049831390381,grad_norm: 0.9999989632416734, iteration: 107859
loss: 1.02463698387146,grad_norm: 0.9778773269467138, iteration: 107860
loss: 0.9919134974479675,grad_norm: 0.9999992451896796, iteration: 107861
loss: 1.0606045722961426,grad_norm: 0.9999993241131611, iteration: 107862
loss: 1.0609233379364014,grad_norm: 0.999999414003888, iteration: 107863
loss: 1.0064616203308105,grad_norm: 0.9999992466122993, iteration: 107864
loss: 0.9952214360237122,grad_norm: 0.873470746774515, iteration: 107865
loss: 1.025566577911377,grad_norm: 0.9187154392771174, iteration: 107866
loss: 1.0182603597640991,grad_norm: 0.9999992955686328, iteration: 107867
loss: 1.031285047531128,grad_norm: 0.9999991973379871, iteration: 107868
loss: 0.989763081073761,grad_norm: 0.9086566936813786, iteration: 107869
loss: 0.9947635531425476,grad_norm: 0.9999992054756746, iteration: 107870
loss: 1.0263203382492065,grad_norm: 0.9999990418738972, iteration: 107871
loss: 1.0295945405960083,grad_norm: 0.9999992186017765, iteration: 107872
loss: 0.9610527753829956,grad_norm: 0.9999990748010219, iteration: 107873
loss: 0.9851265549659729,grad_norm: 0.999999127006909, iteration: 107874
loss: 0.9761387705802917,grad_norm: 0.9999990987103601, iteration: 107875
loss: 1.0445834398269653,grad_norm: 0.9999992851088407, iteration: 107876
loss: 1.039526104927063,grad_norm: 0.999999198083079, iteration: 107877
loss: 1.0008137226104736,grad_norm: 0.9214561100448432, iteration: 107878
loss: 0.9934021830558777,grad_norm: 0.999999237423141, iteration: 107879
loss: 1.0165202617645264,grad_norm: 0.8887313056307441, iteration: 107880
loss: 1.0297892093658447,grad_norm: 0.9999993127351144, iteration: 107881
loss: 0.9899048209190369,grad_norm: 0.9999989050423049, iteration: 107882
loss: 1.0003386735916138,grad_norm: 0.9679324654761606, iteration: 107883
loss: 0.9691261649131775,grad_norm: 0.9999991670230793, iteration: 107884
loss: 0.9958775043487549,grad_norm: 0.9597038428169015, iteration: 107885
loss: 1.0261493921279907,grad_norm: 0.9952900567595713, iteration: 107886
loss: 0.9794872403144836,grad_norm: 0.9420313448280272, iteration: 107887
loss: 1.0437411069869995,grad_norm: 0.9999990419910597, iteration: 107888
loss: 1.0038018226623535,grad_norm: 0.9116801766947273, iteration: 107889
loss: 0.9909607768058777,grad_norm: 0.9999991407388953, iteration: 107890
loss: 1.0228087902069092,grad_norm: 0.9999988914193505, iteration: 107891
loss: 1.0058997869491577,grad_norm: 0.9202650717704215, iteration: 107892
loss: 0.9866207242012024,grad_norm: 0.999999079586389, iteration: 107893
loss: 0.9922196865081787,grad_norm: 0.9999995034755088, iteration: 107894
loss: 1.0282106399536133,grad_norm: 0.959102940493549, iteration: 107895
loss: 0.9838186502456665,grad_norm: 0.9999991944471651, iteration: 107896
loss: 1.00735604763031,grad_norm: 0.9999991382897983, iteration: 107897
loss: 0.9997901916503906,grad_norm: 0.9999991643020004, iteration: 107898
loss: 0.9754000902175903,grad_norm: 0.908922964936244, iteration: 107899
loss: 1.0044314861297607,grad_norm: 0.9999990943441839, iteration: 107900
loss: 0.9808877110481262,grad_norm: 0.9046958326028326, iteration: 107901
loss: 0.9765433073043823,grad_norm: 0.9999992072586218, iteration: 107902
loss: 1.007506012916565,grad_norm: 0.9999993162557941, iteration: 107903
loss: 0.9812015891075134,grad_norm: 0.9999990635970225, iteration: 107904
loss: 0.9797992706298828,grad_norm: 0.9999990119910155, iteration: 107905
loss: 0.9930029511451721,grad_norm: 0.9999992919413068, iteration: 107906
loss: 1.0020116567611694,grad_norm: 0.9999991738868712, iteration: 107907
loss: 1.0036587715148926,grad_norm: 0.9999992282071115, iteration: 107908
loss: 1.0182825326919556,grad_norm: 0.9999992058010301, iteration: 107909
loss: 0.9817142486572266,grad_norm: 0.9999991358023422, iteration: 107910
loss: 1.0158933401107788,grad_norm: 0.9999992692816765, iteration: 107911
loss: 1.0014495849609375,grad_norm: 0.9755702119274672, iteration: 107912
loss: 1.008811593055725,grad_norm: 0.9999991685157992, iteration: 107913
loss: 0.9888922572135925,grad_norm: 0.8751475275343158, iteration: 107914
loss: 1.0107561349868774,grad_norm: 0.9999993032773123, iteration: 107915
loss: 0.9974471926689148,grad_norm: 0.8927093550175956, iteration: 107916
loss: 1.0287247896194458,grad_norm: 0.9999989117191326, iteration: 107917
loss: 1.0009204149246216,grad_norm: 0.9999992149115875, iteration: 107918
loss: 0.9628962278366089,grad_norm: 0.9667458999793759, iteration: 107919
loss: 0.962990939617157,grad_norm: 0.9999991864200211, iteration: 107920
loss: 0.9776623249053955,grad_norm: 0.9999990677424245, iteration: 107921
loss: 0.9849079251289368,grad_norm: 0.9667154402884756, iteration: 107922
loss: 1.0219464302062988,grad_norm: 0.9999992402075751, iteration: 107923
loss: 1.0370999574661255,grad_norm: 0.9999990447774242, iteration: 107924
loss: 1.0272282361984253,grad_norm: 0.999999102827184, iteration: 107925
loss: 1.0895642042160034,grad_norm: 0.9999995459830665, iteration: 107926
loss: 1.034922480583191,grad_norm: 0.9999989874932406, iteration: 107927
loss: 1.026968002319336,grad_norm: 0.9999992210997738, iteration: 107928
loss: 0.9955998659133911,grad_norm: 0.9131174156511898, iteration: 107929
loss: 1.013283610343933,grad_norm: 0.999999177650421, iteration: 107930
loss: 1.004802942276001,grad_norm: 0.999999390791974, iteration: 107931
loss: 0.999653697013855,grad_norm: 0.9999990002035043, iteration: 107932
loss: 0.9374207258224487,grad_norm: 0.9731087232456411, iteration: 107933
loss: 0.9817613363265991,grad_norm: 0.9464030331479512, iteration: 107934
loss: 0.9436655044555664,grad_norm: 0.9195308511681723, iteration: 107935
loss: 0.9902186989784241,grad_norm: 0.9999993197562617, iteration: 107936
loss: 0.9905986785888672,grad_norm: 0.9844829509182882, iteration: 107937
loss: 1.0580288171768188,grad_norm: 0.960439256261275, iteration: 107938
loss: 1.0030454397201538,grad_norm: 0.9909248927401296, iteration: 107939
loss: 1.0247645378112793,grad_norm: 0.9992301631825309, iteration: 107940
loss: 1.0104107856750488,grad_norm: 0.9999991331807684, iteration: 107941
loss: 0.989854633808136,grad_norm: 0.9837425848560707, iteration: 107942
loss: 0.9653252363204956,grad_norm: 0.9501602532700223, iteration: 107943
loss: 0.9633811116218567,grad_norm: 0.9999991919878544, iteration: 107944
loss: 1.0256956815719604,grad_norm: 0.9999990547396868, iteration: 107945
loss: 0.9656592607498169,grad_norm: 0.9999990440426919, iteration: 107946
loss: 1.0098589658737183,grad_norm: 0.9999992557742642, iteration: 107947
loss: 1.0712711811065674,grad_norm: 0.9999993277513852, iteration: 107948
loss: 1.0168397426605225,grad_norm: 0.9999992753687895, iteration: 107949
loss: 0.9782876372337341,grad_norm: 0.9999992895270997, iteration: 107950
loss: 0.9877157807350159,grad_norm: 0.9999992504471716, iteration: 107951
loss: 0.9881629943847656,grad_norm: 0.9999991858753944, iteration: 107952
loss: 1.014974594116211,grad_norm: 0.9999991412192317, iteration: 107953
loss: 0.9836300611495972,grad_norm: 0.8935626386489008, iteration: 107954
loss: 0.9804095029830933,grad_norm: 0.8710428247612827, iteration: 107955
loss: 1.1223382949829102,grad_norm: 0.9999999422951262, iteration: 107956
loss: 0.9830671548843384,grad_norm: 0.9969052543928576, iteration: 107957
loss: 1.0026687383651733,grad_norm: 0.995141205452037, iteration: 107958
loss: 0.9972791075706482,grad_norm: 0.8955198956347086, iteration: 107959
loss: 1.0335277318954468,grad_norm: 0.999999423990628, iteration: 107960
loss: 1.0526325702667236,grad_norm: 0.9999989891790614, iteration: 107961
loss: 1.019951343536377,grad_norm: 0.9999991245457295, iteration: 107962
loss: 1.0436290502548218,grad_norm: 0.9999993134616164, iteration: 107963
loss: 1.0102288722991943,grad_norm: 0.9693649510494365, iteration: 107964
loss: 1.0680522918701172,grad_norm: 0.9999992500601397, iteration: 107965
loss: 1.040076732635498,grad_norm: 0.9418556141226581, iteration: 107966
loss: 0.9547327160835266,grad_norm: 0.9999990186394657, iteration: 107967
loss: 1.090357780456543,grad_norm: 0.9700127163147282, iteration: 107968
loss: 1.0175279378890991,grad_norm: 0.9276440062137689, iteration: 107969
loss: 1.0420387983322144,grad_norm: 0.8805421659989416, iteration: 107970
loss: 1.021003007888794,grad_norm: 0.9000819311713814, iteration: 107971
loss: 0.9836409687995911,grad_norm: 0.9313030476696311, iteration: 107972
loss: 1.0493477582931519,grad_norm: 0.9999990965482329, iteration: 107973
loss: 1.0048719644546509,grad_norm: 0.9999990967762044, iteration: 107974
loss: 0.9833884239196777,grad_norm: 0.9999994495995139, iteration: 107975
loss: 1.0280234813690186,grad_norm: 0.9999992158086579, iteration: 107976
loss: 0.9479179382324219,grad_norm: 0.999998992861821, iteration: 107977
loss: 1.0417916774749756,grad_norm: 0.9999992774547586, iteration: 107978
loss: 1.0266886949539185,grad_norm: 0.9999991010184288, iteration: 107979
loss: 0.9718547463417053,grad_norm: 0.9999991219459112, iteration: 107980
loss: 0.9810612201690674,grad_norm: 0.9999990853650591, iteration: 107981
loss: 1.0163047313690186,grad_norm: 0.9999992243437774, iteration: 107982
loss: 1.019436001777649,grad_norm: 0.7892692455465199, iteration: 107983
loss: 0.9711980819702148,grad_norm: 0.9999991136667428, iteration: 107984
loss: 1.0183236598968506,grad_norm: 0.9943924653760607, iteration: 107985
loss: 1.00748610496521,grad_norm: 0.8625327385766336, iteration: 107986
loss: 1.0140513181686401,grad_norm: 0.9999990912222564, iteration: 107987
loss: 0.9776332378387451,grad_norm: 0.9999992387848291, iteration: 107988
loss: 0.9864797592163086,grad_norm: 0.999999205445872, iteration: 107989
loss: 0.9946755766868591,grad_norm: 0.9999991226181763, iteration: 107990
loss: 0.9664913415908813,grad_norm: 0.9999990963898806, iteration: 107991
loss: 1.0032567977905273,grad_norm: 0.9999990352300712, iteration: 107992
loss: 1.009081244468689,grad_norm: 0.9139258834739814, iteration: 107993
loss: 1.0268999338150024,grad_norm: 0.957863442057082, iteration: 107994
loss: 0.9817785024642944,grad_norm: 0.9999990473835415, iteration: 107995
loss: 1.0001243352890015,grad_norm: 0.99999926859727, iteration: 107996
loss: 0.9743078947067261,grad_norm: 0.9184947413031949, iteration: 107997
loss: 1.0062246322631836,grad_norm: 0.9999989706038633, iteration: 107998
loss: 0.9910410642623901,grad_norm: 0.9999991744796893, iteration: 107999
loss: 0.9999175667762756,grad_norm: 0.9999989891917406, iteration: 108000
loss: 1.00165593624115,grad_norm: 0.9999990260618535, iteration: 108001
loss: 1.0570223331451416,grad_norm: 0.9999993650254287, iteration: 108002
loss: 1.014639139175415,grad_norm: 0.9999990222082173, iteration: 108003
loss: 0.9905823469161987,grad_norm: 0.9999991384048825, iteration: 108004
loss: 1.0493202209472656,grad_norm: 0.9999992412238567, iteration: 108005
loss: 0.9797102808952332,grad_norm: 0.999999328621993, iteration: 108006
loss: 1.0054543018341064,grad_norm: 0.9999991387041247, iteration: 108007
loss: 0.9960450530052185,grad_norm: 0.9999991633494842, iteration: 108008
loss: 1.0129587650299072,grad_norm: 0.9054704575467756, iteration: 108009
loss: 0.9962736368179321,grad_norm: 0.9999990984083029, iteration: 108010
loss: 0.9961678385734558,grad_norm: 0.9999990641106419, iteration: 108011
loss: 1.0325788259506226,grad_norm: 0.9999991880258102, iteration: 108012
loss: 1.0114789009094238,grad_norm: 0.9999989726076439, iteration: 108013
loss: 0.994845449924469,grad_norm: 0.9999991839316867, iteration: 108014
loss: 0.9995577335357666,grad_norm: 0.9910432521126332, iteration: 108015
loss: 0.9682191610336304,grad_norm: 0.9999991163053932, iteration: 108016
loss: 0.9903774261474609,grad_norm: 0.9913498667231097, iteration: 108017
loss: 0.9911074638366699,grad_norm: 0.9999991171863039, iteration: 108018
loss: 1.0069048404693604,grad_norm: 0.9481569019179832, iteration: 108019
loss: 1.0001163482666016,grad_norm: 0.9658542530252889, iteration: 108020
loss: 1.002744436264038,grad_norm: 0.9999992162642025, iteration: 108021
loss: 1.0286154747009277,grad_norm: 0.9571014580613962, iteration: 108022
loss: 1.0288420915603638,grad_norm: 0.9906481879044863, iteration: 108023
loss: 1.0179821252822876,grad_norm: 0.8923571540864256, iteration: 108024
loss: 1.0304075479507446,grad_norm: 0.9999989828816194, iteration: 108025
loss: 1.0052794218063354,grad_norm: 0.9999991742604702, iteration: 108026
loss: 1.0029816627502441,grad_norm: 0.9999990754943187, iteration: 108027
loss: 0.9669381380081177,grad_norm: 0.9999990851889139, iteration: 108028
loss: 1.0187736749649048,grad_norm: 0.9999990663584947, iteration: 108029
loss: 0.983738124370575,grad_norm: 0.9116830634443994, iteration: 108030
loss: 1.0295647382736206,grad_norm: 0.999999150943037, iteration: 108031
loss: 0.9973088502883911,grad_norm: 0.9999990383658306, iteration: 108032
loss: 0.972390353679657,grad_norm: 0.9999991607220712, iteration: 108033
loss: 1.0067874193191528,grad_norm: 0.9999995719019414, iteration: 108034
loss: 0.968729555606842,grad_norm: 0.9999990588254727, iteration: 108035
loss: 0.986775815486908,grad_norm: 0.9999993055125814, iteration: 108036
loss: 0.9923607110977173,grad_norm: 0.9495675639863428, iteration: 108037
loss: 0.9843984246253967,grad_norm: 0.9895761544635138, iteration: 108038
loss: 0.9927830696105957,grad_norm: 0.9123396144507225, iteration: 108039
loss: 1.0050894021987915,grad_norm: 0.9999991137951587, iteration: 108040
loss: 0.9688953161239624,grad_norm: 0.9999991501432919, iteration: 108041
loss: 1.0026772022247314,grad_norm: 0.999999246392242, iteration: 108042
loss: 1.012299656867981,grad_norm: 0.999999267403511, iteration: 108043
loss: 0.9835636019706726,grad_norm: 0.9280399801390504, iteration: 108044
loss: 1.0070905685424805,grad_norm: 0.8907020558976256, iteration: 108045
loss: 1.0261207818984985,grad_norm: 0.8661239080202642, iteration: 108046
loss: 1.0253527164459229,grad_norm: 0.9999990959223024, iteration: 108047
loss: 0.9697480797767639,grad_norm: 0.9999994067772852, iteration: 108048
loss: 1.0146640539169312,grad_norm: 0.9905933330179304, iteration: 108049
loss: 0.9646274447441101,grad_norm: 0.9999991347005328, iteration: 108050
loss: 1.0008329153060913,grad_norm: 0.9999998428729487, iteration: 108051
loss: 1.0241011381149292,grad_norm: 0.999999112672451, iteration: 108052
loss: 1.016640543937683,grad_norm: 0.9999991475440396, iteration: 108053
loss: 1.0008633136749268,grad_norm: 0.97978063203577, iteration: 108054
loss: 1.0431458950042725,grad_norm: 0.9999995390573249, iteration: 108055
loss: 1.008667230606079,grad_norm: 0.9465561437422548, iteration: 108056
loss: 0.9781920313835144,grad_norm: 0.9999991053538294, iteration: 108057
loss: 1.0357494354248047,grad_norm: 0.9999993386214814, iteration: 108058
loss: 0.9687855243682861,grad_norm: 0.9999990392453078, iteration: 108059
loss: 0.994250476360321,grad_norm: 0.9848678281287196, iteration: 108060
loss: 1.0182253122329712,grad_norm: 0.9999991872944716, iteration: 108061
loss: 1.0245317220687866,grad_norm: 0.9420982380444699, iteration: 108062
loss: 0.9541229009628296,grad_norm: 0.9999990562471591, iteration: 108063
loss: 0.9646835327148438,grad_norm: 0.9922653029790183, iteration: 108064
loss: 1.0145319700241089,grad_norm: 0.9576743071075402, iteration: 108065
loss: 1.0135974884033203,grad_norm: 0.9999991011608278, iteration: 108066
loss: 1.0253647565841675,grad_norm: 0.8869198107631948, iteration: 108067
loss: 1.0204335451126099,grad_norm: 0.8993756049706382, iteration: 108068
loss: 1.0068491697311401,grad_norm: 0.9726747426381799, iteration: 108069
loss: 1.0408942699432373,grad_norm: 0.999912576235917, iteration: 108070
loss: 0.9817401766777039,grad_norm: 0.9999991699634232, iteration: 108071
loss: 0.9964107871055603,grad_norm: 0.9999992359581149, iteration: 108072
loss: 1.033250093460083,grad_norm: 0.9999992749823139, iteration: 108073
loss: 1.0103051662445068,grad_norm: 0.7788535884246435, iteration: 108074
loss: 0.9551907181739807,grad_norm: 0.9545820633202716, iteration: 108075
loss: 1.0230817794799805,grad_norm: 0.999999638441461, iteration: 108076
loss: 0.9783448576927185,grad_norm: 0.999999281082153, iteration: 108077
loss: 1.0023137331008911,grad_norm: 0.9999992208000125, iteration: 108078
loss: 0.999054491519928,grad_norm: 0.9999990537413345, iteration: 108079
loss: 1.0055718421936035,grad_norm: 0.9999990642857756, iteration: 108080
loss: 1.0147759914398193,grad_norm: 0.9999990839511176, iteration: 108081
loss: 0.9996059536933899,grad_norm: 0.9999991625219116, iteration: 108082
loss: 1.023107647895813,grad_norm: 0.9999992268701715, iteration: 108083
loss: 0.9849154949188232,grad_norm: 0.966786670687135, iteration: 108084
loss: 0.9909604787826538,grad_norm: 0.9999990558206637, iteration: 108085
loss: 1.0132070779800415,grad_norm: 0.987826368152273, iteration: 108086
loss: 1.006765604019165,grad_norm: 0.9999991649932003, iteration: 108087
loss: 1.0298166275024414,grad_norm: 0.8866083561594106, iteration: 108088
loss: 0.9974281191825867,grad_norm: 0.9999991455487427, iteration: 108089
loss: 0.9762714505195618,grad_norm: 0.9999990903126122, iteration: 108090
loss: 1.052802324295044,grad_norm: 0.9999992811477724, iteration: 108091
loss: 0.9957961440086365,grad_norm: 0.9999991047668441, iteration: 108092
loss: 1.1470502614974976,grad_norm: 0.999999752991998, iteration: 108093
loss: 1.0732005834579468,grad_norm: 0.9999993696967804, iteration: 108094
loss: 0.970011830329895,grad_norm: 0.9999991127007084, iteration: 108095
loss: 1.0367292165756226,grad_norm: 0.9996138824690209, iteration: 108096
loss: 1.0151368379592896,grad_norm: 0.9999991575644689, iteration: 108097
loss: 1.0310144424438477,grad_norm: 0.9999992776340187, iteration: 108098
loss: 1.018872618675232,grad_norm: 0.9999990516863672, iteration: 108099
loss: 0.9838702082633972,grad_norm: 0.9779857828656592, iteration: 108100
loss: 1.0219978094100952,grad_norm: 0.9999992258284961, iteration: 108101
loss: 0.9975360631942749,grad_norm: 0.9619292152960339, iteration: 108102
loss: 1.0033870935440063,grad_norm: 0.999999010096721, iteration: 108103
loss: 0.9928531646728516,grad_norm: 0.9999990810901369, iteration: 108104
loss: 0.9923460483551025,grad_norm: 0.9999991424384772, iteration: 108105
loss: 1.0122209787368774,grad_norm: 0.9341647353254315, iteration: 108106
loss: 1.0559277534484863,grad_norm: 0.9999996198540412, iteration: 108107
loss: 1.0691848993301392,grad_norm: 0.9999995062055925, iteration: 108108
loss: 1.0007997751235962,grad_norm: 0.9999990151436497, iteration: 108109
loss: 1.043624758720398,grad_norm: 0.9999992573285053, iteration: 108110
loss: 0.9855048060417175,grad_norm: 0.9999991141994575, iteration: 108111
loss: 1.0168828964233398,grad_norm: 0.9999991567131158, iteration: 108112
loss: 1.0120500326156616,grad_norm: 0.999999522503766, iteration: 108113
loss: 0.982893168926239,grad_norm: 0.8812432129634561, iteration: 108114
loss: 0.9902564883232117,grad_norm: 0.9415908270231459, iteration: 108115
loss: 1.050696849822998,grad_norm: 0.9999994338074351, iteration: 108116
loss: 0.9998995065689087,grad_norm: 0.9999990987584089, iteration: 108117
loss: 1.0191887617111206,grad_norm: 0.9999992371942241, iteration: 108118
loss: 0.9719200730323792,grad_norm: 0.9999993355986339, iteration: 108119
loss: 1.176620602607727,grad_norm: 0.9999992052409813, iteration: 108120
loss: 0.9979488849639893,grad_norm: 0.9466624759835282, iteration: 108121
loss: 0.9928176403045654,grad_norm: 0.9999991905799182, iteration: 108122
loss: 0.9839030504226685,grad_norm: 0.9999991933339522, iteration: 108123
loss: 1.0153541564941406,grad_norm: 0.9999991484213213, iteration: 108124
loss: 1.0075020790100098,grad_norm: 0.9999991015590907, iteration: 108125
loss: 0.9769307971000671,grad_norm: 0.9999990228282333, iteration: 108126
loss: 0.9958146214485168,grad_norm: 0.9979437125858514, iteration: 108127
loss: 1.0393086671829224,grad_norm: 0.9999991978587786, iteration: 108128
loss: 0.9822785258293152,grad_norm: 0.9999989313178602, iteration: 108129
loss: 0.9675399661064148,grad_norm: 0.9999990400987134, iteration: 108130
loss: 1.044482946395874,grad_norm: 0.9999998212089756, iteration: 108131
loss: 0.9676329493522644,grad_norm: 0.890148899900376, iteration: 108132
loss: 0.9968237280845642,grad_norm: 0.9999992233899049, iteration: 108133
loss: 0.9927741289138794,grad_norm: 0.9999993742082797, iteration: 108134
loss: 0.9763532280921936,grad_norm: 0.9110696929472528, iteration: 108135
loss: 1.0118815898895264,grad_norm: 0.9748142524512354, iteration: 108136
loss: 1.0347946882247925,grad_norm: 0.9999992524815935, iteration: 108137
loss: 1.0190523862838745,grad_norm: 0.9999996179790263, iteration: 108138
loss: 1.0008995532989502,grad_norm: 0.9848421781156337, iteration: 108139
loss: 1.0120806694030762,grad_norm: 0.9584427175484791, iteration: 108140
loss: 1.0368479490280151,grad_norm: 0.9999990075578722, iteration: 108141
loss: 0.9967489838600159,grad_norm: 0.9999992372830837, iteration: 108142
loss: 0.9530662298202515,grad_norm: 0.9899216245101526, iteration: 108143
loss: 1.0115702152252197,grad_norm: 0.8463652188712378, iteration: 108144
loss: 1.0198389291763306,grad_norm: 0.9999989958905215, iteration: 108145
loss: 1.0618356466293335,grad_norm: 0.9999998528494577, iteration: 108146
loss: 0.9843140840530396,grad_norm: 0.9999990602669603, iteration: 108147
loss: 0.995997965335846,grad_norm: 0.9027506066213172, iteration: 108148
loss: 1.0046584606170654,grad_norm: 0.9562298937548492, iteration: 108149
loss: 1.0167953968048096,grad_norm: 0.9716851305634346, iteration: 108150
loss: 1.0122078657150269,grad_norm: 0.9478791448156889, iteration: 108151
loss: 0.9841505289077759,grad_norm: 0.9681334595890153, iteration: 108152
loss: 0.9508475661277771,grad_norm: 0.9999992202698923, iteration: 108153
loss: 1.021017074584961,grad_norm: 0.9999991837356634, iteration: 108154
loss: 1.0227452516555786,grad_norm: 0.9325626803838237, iteration: 108155
loss: 1.004825472831726,grad_norm: 0.9999992088910964, iteration: 108156
loss: 1.011589527130127,grad_norm: 0.8602512160165645, iteration: 108157
loss: 1.0030380487442017,grad_norm: 0.9999990290526006, iteration: 108158
loss: 1.0059515237808228,grad_norm: 0.9148643862184044, iteration: 108159
loss: 1.0065383911132812,grad_norm: 0.8180300868496214, iteration: 108160
loss: 0.9722169041633606,grad_norm: 0.8855167985791979, iteration: 108161
loss: 0.9963880777359009,grad_norm: 0.9999990085316232, iteration: 108162
loss: 1.0263019800186157,grad_norm: 0.9999990908393882, iteration: 108163
loss: 1.004870057106018,grad_norm: 0.9999992610824551, iteration: 108164
loss: 0.9975261688232422,grad_norm: 0.9999991047761856, iteration: 108165
loss: 0.9732041358947754,grad_norm: 0.9606917033724521, iteration: 108166
loss: 1.000967025756836,grad_norm: 0.999999185531246, iteration: 108167
loss: 1.0263280868530273,grad_norm: 0.9999992429791034, iteration: 108168
loss: 0.9713351726531982,grad_norm: 0.9627250238454147, iteration: 108169
loss: 0.9984031915664673,grad_norm: 0.9999993373439895, iteration: 108170
loss: 1.0074249505996704,grad_norm: 0.9999990112803668, iteration: 108171
loss: 0.9937856197357178,grad_norm: 0.9999992602091261, iteration: 108172
loss: 1.0275815725326538,grad_norm: 0.9999990159423312, iteration: 108173
loss: 0.9849773645401001,grad_norm: 0.9591597847110388, iteration: 108174
loss: 1.0076216459274292,grad_norm: 0.9999992699134728, iteration: 108175
loss: 1.0002416372299194,grad_norm: 0.9999991379276014, iteration: 108176
loss: 1.022157907485962,grad_norm: 0.9999993480558006, iteration: 108177
loss: 0.9929060339927673,grad_norm: 0.899188970634558, iteration: 108178
loss: 1.0210473537445068,grad_norm: 0.999999252827806, iteration: 108179
loss: 1.016281247138977,grad_norm: 0.9999989827290842, iteration: 108180
loss: 0.9425223469734192,grad_norm: 0.9640020076783082, iteration: 108181
loss: 0.9878343343734741,grad_norm: 0.9736419041662387, iteration: 108182
loss: 1.0159474611282349,grad_norm: 0.9999991394668288, iteration: 108183
loss: 0.9876195192337036,grad_norm: 0.9610671576765124, iteration: 108184
loss: 1.035963773727417,grad_norm: 0.9999996708818174, iteration: 108185
loss: 0.9736495018005371,grad_norm: 0.999999156799686, iteration: 108186
loss: 0.9952410459518433,grad_norm: 0.9654946670741018, iteration: 108187
loss: 1.0012907981872559,grad_norm: 0.9999991517702304, iteration: 108188
loss: 0.9969667792320251,grad_norm: 0.9999992068217353, iteration: 108189
loss: 1.0115094184875488,grad_norm: 0.9552910208789784, iteration: 108190
loss: 1.0170681476593018,grad_norm: 0.9999992869415909, iteration: 108191
loss: 1.0057127475738525,grad_norm: 0.9216918119568714, iteration: 108192
loss: 1.0142822265625,grad_norm: 0.9999991485960835, iteration: 108193
loss: 0.9740302562713623,grad_norm: 0.9999989364117865, iteration: 108194
loss: 1.028311014175415,grad_norm: 0.9999992068832934, iteration: 108195
loss: 1.0259662866592407,grad_norm: 0.9999990974413449, iteration: 108196
loss: 1.0045981407165527,grad_norm: 0.9605704914855587, iteration: 108197
loss: 1.0088098049163818,grad_norm: 0.9999991605756223, iteration: 108198
loss: 1.0054140090942383,grad_norm: 0.9999991283527296, iteration: 108199
loss: 1.0217933654785156,grad_norm: 0.9118481023883639, iteration: 108200
loss: 1.046976923942566,grad_norm: 0.9677615889179599, iteration: 108201
loss: 1.004294514656067,grad_norm: 0.9999997289189392, iteration: 108202
loss: 1.0030862092971802,grad_norm: 0.9251833958549195, iteration: 108203
loss: 1.0126041173934937,grad_norm: 0.9999992463721676, iteration: 108204
loss: 0.9876217246055603,grad_norm: 0.9524441960473731, iteration: 108205
loss: 0.9818760752677917,grad_norm: 0.9999992075881113, iteration: 108206
loss: 0.9766838550567627,grad_norm: 0.9951137793405754, iteration: 108207
loss: 1.0017627477645874,grad_norm: 0.9510704197973802, iteration: 108208
loss: 0.9932955503463745,grad_norm: 0.9086697718169375, iteration: 108209
loss: 0.9660245776176453,grad_norm: 0.9999990808729363, iteration: 108210
loss: 1.0019103288650513,grad_norm: 0.9758140586022995, iteration: 108211
loss: 1.0253578424453735,grad_norm: 0.9787223392354281, iteration: 108212
loss: 0.9971111416816711,grad_norm: 0.9999992567419991, iteration: 108213
loss: 1.013306975364685,grad_norm: 0.9130032043040963, iteration: 108214
loss: 1.0196278095245361,grad_norm: 0.9132421742507171, iteration: 108215
loss: 1.038386344909668,grad_norm: 0.9999990452065357, iteration: 108216
loss: 1.0138676166534424,grad_norm: 0.999999210433788, iteration: 108217
loss: 0.9378275275230408,grad_norm: 0.967753124474282, iteration: 108218
loss: 1.0153158903121948,grad_norm: 0.9999991466295722, iteration: 108219
loss: 0.9776442050933838,grad_norm: 0.9999990874644161, iteration: 108220
loss: 0.9653063416481018,grad_norm: 0.977842095097318, iteration: 108221
loss: 0.9717133641242981,grad_norm: 0.9597199927811025, iteration: 108222
loss: 1.0091569423675537,grad_norm: 0.999999156799769, iteration: 108223
loss: 0.9954741597175598,grad_norm: 0.9562135376177295, iteration: 108224
loss: 1.0181702375411987,grad_norm: 0.9706757683935285, iteration: 108225
loss: 1.0153566598892212,grad_norm: 0.9207900942596375, iteration: 108226
loss: 1.0058096647262573,grad_norm: 0.9999991572538128, iteration: 108227
loss: 1.0099667310714722,grad_norm: 0.967096718956212, iteration: 108228
loss: 0.9963968396186829,grad_norm: 0.9999990308950227, iteration: 108229
loss: 0.9897083044052124,grad_norm: 0.9673371866777857, iteration: 108230
loss: 0.9836704134941101,grad_norm: 0.9999991480449824, iteration: 108231
loss: 1.004485011100769,grad_norm: 0.9999990851443714, iteration: 108232
loss: 0.954930305480957,grad_norm: 0.9927985466927796, iteration: 108233
loss: 0.9838230013847351,grad_norm: 0.9999991150044308, iteration: 108234
loss: 1.0278221368789673,grad_norm: 0.9705829169136075, iteration: 108235
loss: 0.9990065097808838,grad_norm: 0.9999992301725356, iteration: 108236
loss: 0.9738062024116516,grad_norm: 0.9999992230071105, iteration: 108237
loss: 0.980640172958374,grad_norm: 0.999999047175359, iteration: 108238
loss: 0.9754960536956787,grad_norm: 0.9759135614959932, iteration: 108239
loss: 0.9525008201599121,grad_norm: 0.9999991532870042, iteration: 108240
loss: 1.0103049278259277,grad_norm: 0.9999992100676706, iteration: 108241
loss: 1.0139755010604858,grad_norm: 0.9999996726149821, iteration: 108242
loss: 0.9851821064949036,grad_norm: 0.999999266482824, iteration: 108243
loss: 1.0125638246536255,grad_norm: 0.9999990658489153, iteration: 108244
loss: 0.9961285591125488,grad_norm: 0.9324499045370372, iteration: 108245
loss: 0.9879869222640991,grad_norm: 0.9999990867032349, iteration: 108246
loss: 0.9780433177947998,grad_norm: 0.9999990783383462, iteration: 108247
loss: 0.9691577553749084,grad_norm: 0.9871937133467792, iteration: 108248
loss: 0.9792191386222839,grad_norm: 0.994781913699039, iteration: 108249
loss: 0.9901514053344727,grad_norm: 0.9999993338282548, iteration: 108250
loss: 1.0092395544052124,grad_norm: 0.9999992929000567, iteration: 108251
loss: 0.9808576703071594,grad_norm: 0.8279184215002077, iteration: 108252
loss: 0.9655054807662964,grad_norm: 0.9999991791854482, iteration: 108253
loss: 0.9915012121200562,grad_norm: 0.9999992723201963, iteration: 108254
loss: 0.9985710978507996,grad_norm: 0.9999992561680514, iteration: 108255
loss: 0.9939888119697571,grad_norm: 0.999999159622627, iteration: 108256
loss: 0.9687207937240601,grad_norm: 0.9999992177741002, iteration: 108257
loss: 1.0442874431610107,grad_norm: 0.9999991202083379, iteration: 108258
loss: 1.0068875551223755,grad_norm: 0.9999992433023986, iteration: 108259
loss: 1.0166301727294922,grad_norm: 0.8711781765311178, iteration: 108260
loss: 0.9820072054862976,grad_norm: 0.9999990735748651, iteration: 108261
loss: 0.9876982569694519,grad_norm: 0.925185878341179, iteration: 108262
loss: 1.0231555700302124,grad_norm: 0.9999991860361751, iteration: 108263
loss: 1.0480186939239502,grad_norm: 0.9999991626814078, iteration: 108264
loss: 0.9709439873695374,grad_norm: 0.9494149321642481, iteration: 108265
loss: 1.0120151042938232,grad_norm: 0.9999989971371869, iteration: 108266
loss: 1.02731454372406,grad_norm: 0.999999059337582, iteration: 108267
loss: 1.0195608139038086,grad_norm: 0.8840479480636937, iteration: 108268
loss: 0.9824665188789368,grad_norm: 0.9999991977915728, iteration: 108269
loss: 1.0133718252182007,grad_norm: 0.9999990739235008, iteration: 108270
loss: 0.9887998700141907,grad_norm: 0.9999990861226812, iteration: 108271
loss: 1.0305266380310059,grad_norm: 0.907847117106035, iteration: 108272
loss: 1.0150949954986572,grad_norm: 0.9999991359557358, iteration: 108273
loss: 0.9717861413955688,grad_norm: 0.9594475035814157, iteration: 108274
loss: 0.9696580171585083,grad_norm: 0.9999992798080646, iteration: 108275
loss: 1.0255125761032104,grad_norm: 0.893519665227442, iteration: 108276
loss: 0.9842382669448853,grad_norm: 0.9999990586028672, iteration: 108277
loss: 1.0510616302490234,grad_norm: 0.9999990930931034, iteration: 108278
loss: 0.9985185265541077,grad_norm: 0.9999991835513777, iteration: 108279
loss: 1.024450659751892,grad_norm: 0.9999991039415423, iteration: 108280
loss: 1.0158520936965942,grad_norm: 0.9999991133477582, iteration: 108281
loss: 1.0173921585083008,grad_norm: 0.9403318631521282, iteration: 108282
loss: 1.0154666900634766,grad_norm: 0.9999991265485609, iteration: 108283
loss: 1.019670009613037,grad_norm: 0.9018519625165003, iteration: 108284
loss: 0.9538118243217468,grad_norm: 0.9999991912989993, iteration: 108285
loss: 0.9786999225616455,grad_norm: 0.9859849966440811, iteration: 108286
loss: 1.033245325088501,grad_norm: 0.9999990367791277, iteration: 108287
loss: 1.0056178569793701,grad_norm: 0.9999992249165853, iteration: 108288
loss: 0.9973779916763306,grad_norm: 0.8544852162640281, iteration: 108289
loss: 1.013596773147583,grad_norm: 0.9999992050624114, iteration: 108290
loss: 0.9983149170875549,grad_norm: 0.9999991816570786, iteration: 108291
loss: 1.0480449199676514,grad_norm: 0.9999990886016413, iteration: 108292
loss: 0.9889426231384277,grad_norm: 0.9999989668644705, iteration: 108293
loss: 1.0384016036987305,grad_norm: 0.9999990970906606, iteration: 108294
loss: 1.0337640047073364,grad_norm: 0.9311539729886956, iteration: 108295
loss: 1.0240312814712524,grad_norm: 0.9680165645610488, iteration: 108296
loss: 0.9754189848899841,grad_norm: 0.9999990967353651, iteration: 108297
loss: 0.9684393405914307,grad_norm: 0.9999993781241076, iteration: 108298
loss: 0.9802384376525879,grad_norm: 0.9881666098925577, iteration: 108299
loss: 1.0051190853118896,grad_norm: 0.9124524721697497, iteration: 108300
loss: 1.0195064544677734,grad_norm: 0.9541261512164096, iteration: 108301
loss: 1.0811922550201416,grad_norm: 0.9999992351588968, iteration: 108302
loss: 1.0418312549591064,grad_norm: 0.9999991281723458, iteration: 108303
loss: 1.0010631084442139,grad_norm: 0.9699189263069875, iteration: 108304
loss: 0.9765158295631409,grad_norm: 0.999999275070684, iteration: 108305
loss: 0.9804439544677734,grad_norm: 0.9999990158842322, iteration: 108306
loss: 1.0013643503189087,grad_norm: 0.9008254658256653, iteration: 108307
loss: 0.9474645256996155,grad_norm: 0.9999992386098215, iteration: 108308
loss: 1.0280543565750122,grad_norm: 0.999999113076408, iteration: 108309
loss: 0.9731612205505371,grad_norm: 0.9999991686496708, iteration: 108310
loss: 0.9756883382797241,grad_norm: 0.9824950981266173, iteration: 108311
loss: 1.0043423175811768,grad_norm: 0.999998924907587, iteration: 108312
loss: 0.9779479503631592,grad_norm: 0.8415603959837551, iteration: 108313
loss: 0.9947448968887329,grad_norm: 0.9843996764808451, iteration: 108314
loss: 1.0053099393844604,grad_norm: 0.9999991157784053, iteration: 108315
loss: 1.0455646514892578,grad_norm: 0.9089423674332012, iteration: 108316
loss: 1.0204988718032837,grad_norm: 0.9622601492244276, iteration: 108317
loss: 1.0185267925262451,grad_norm: 0.9999992285304814, iteration: 108318
loss: 0.9638348817825317,grad_norm: 0.9999990820701606, iteration: 108319
loss: 0.9999073147773743,grad_norm: 0.9999990468389249, iteration: 108320
loss: 1.000974178314209,grad_norm: 0.9192625005380196, iteration: 108321
loss: 0.9829066395759583,grad_norm: 0.9999991475580323, iteration: 108322
loss: 0.9728584289550781,grad_norm: 0.99999902232153, iteration: 108323
loss: 0.9804533123970032,grad_norm: 0.9999990895656323, iteration: 108324
loss: 0.9987910389900208,grad_norm: 0.9709564170375672, iteration: 108325
loss: 0.9792979955673218,grad_norm: 0.9999992470039785, iteration: 108326
loss: 0.981217622756958,grad_norm: 0.9999991776195345, iteration: 108327
loss: 1.0017168521881104,grad_norm: 0.9999990931017451, iteration: 108328
loss: 0.9737921953201294,grad_norm: 0.9999990589944343, iteration: 108329
loss: 1.0033546686172485,grad_norm: 0.9999991106133455, iteration: 108330
loss: 0.9667540192604065,grad_norm: 0.999999035850263, iteration: 108331
loss: 1.0317251682281494,grad_norm: 0.9443053739163739, iteration: 108332
loss: 1.0010825395584106,grad_norm: 0.9999992151936302, iteration: 108333
loss: 0.9694033861160278,grad_norm: 0.9447809302332797, iteration: 108334
loss: 0.9698898196220398,grad_norm: 0.963641736544792, iteration: 108335
loss: 0.9860795736312866,grad_norm: 0.9312775087439266, iteration: 108336
loss: 1.0059640407562256,grad_norm: 0.8890448886686686, iteration: 108337
loss: 0.9840164184570312,grad_norm: 0.9349319521173999, iteration: 108338
loss: 0.9852659702301025,grad_norm: 0.9999992518344656, iteration: 108339
loss: 0.9722076058387756,grad_norm: 0.9999991384847015, iteration: 108340
loss: 1.0065381526947021,grad_norm: 0.9999992214383815, iteration: 108341
loss: 0.990960955619812,grad_norm: 0.9999990964416167, iteration: 108342
loss: 0.9660115838050842,grad_norm: 0.99050796737676, iteration: 108343
loss: 0.9619243741035461,grad_norm: 0.9999989918443706, iteration: 108344
loss: 1.011758804321289,grad_norm: 0.9933805653509337, iteration: 108345
loss: 1.0415968894958496,grad_norm: 0.9999991607766312, iteration: 108346
loss: 1.0349758863449097,grad_norm: 0.923535977291752, iteration: 108347
loss: 0.9957935214042664,grad_norm: 0.9999992024357189, iteration: 108348
loss: 0.9602922797203064,grad_norm: 0.9690239131953903, iteration: 108349
loss: 0.992692768573761,grad_norm: 0.999999063253021, iteration: 108350
loss: 0.9971491694450378,grad_norm: 0.9425170735092757, iteration: 108351
loss: 1.0170143842697144,grad_norm: 0.9999992174039616, iteration: 108352
loss: 0.994954526424408,grad_norm: 0.9076674817702219, iteration: 108353
loss: 0.9794042706489563,grad_norm: 0.999999037262054, iteration: 108354
loss: 1.0144069194793701,grad_norm: 0.9999992295890517, iteration: 108355
loss: 0.9914364218711853,grad_norm: 0.9999991390343488, iteration: 108356
loss: 1.0303068161010742,grad_norm: 0.922674323710044, iteration: 108357
loss: 0.9996408224105835,grad_norm: 0.9999990021979711, iteration: 108358
loss: 0.9940868616104126,grad_norm: 0.9999990135817477, iteration: 108359
loss: 1.0120912790298462,grad_norm: 0.9999992322274969, iteration: 108360
loss: 0.9748150110244751,grad_norm: 0.8475086383899995, iteration: 108361
loss: 1.0293245315551758,grad_norm: 0.9999999848373147, iteration: 108362
loss: 1.0686835050582886,grad_norm: 0.9999993452785503, iteration: 108363
loss: 1.006743311882019,grad_norm: 0.9957931159726642, iteration: 108364
loss: 0.9943369030952454,grad_norm: 0.9999990776672598, iteration: 108365
loss: 0.9781134724617004,grad_norm: 0.9323878861153938, iteration: 108366
loss: 1.030727744102478,grad_norm: 0.9656298742597271, iteration: 108367
loss: 0.9879961013793945,grad_norm: 0.9999990652555192, iteration: 108368
loss: 0.9901869893074036,grad_norm: 0.9999991274220158, iteration: 108369
loss: 1.0015785694122314,grad_norm: 0.9999991254850441, iteration: 108370
loss: 1.0368671417236328,grad_norm: 0.999999218310767, iteration: 108371
loss: 1.0128810405731201,grad_norm: 0.949563717561932, iteration: 108372
loss: 1.0003893375396729,grad_norm: 0.8618377312349138, iteration: 108373
loss: 1.0039606094360352,grad_norm: 0.9519334908388541, iteration: 108374
loss: 1.0322215557098389,grad_norm: 0.9643463155940696, iteration: 108375
loss: 0.9841932654380798,grad_norm: 0.9516639433856119, iteration: 108376
loss: 0.9843198657035828,grad_norm: 0.911887564493934, iteration: 108377
loss: 1.006314754486084,grad_norm: 0.9999992487316434, iteration: 108378
loss: 0.9806821346282959,grad_norm: 0.8750685895003005, iteration: 108379
loss: 0.9975878000259399,grad_norm: 0.9801501904576473, iteration: 108380
loss: 0.9682232141494751,grad_norm: 0.9999992514786468, iteration: 108381
loss: 1.0017958879470825,grad_norm: 0.9999990099477728, iteration: 108382
loss: 1.0210106372833252,grad_norm: 0.9999991802018374, iteration: 108383
loss: 1.0174901485443115,grad_norm: 0.9999989956191979, iteration: 108384
loss: 1.0124456882476807,grad_norm: 0.9999995499483676, iteration: 108385
loss: 1.1016361713409424,grad_norm: 0.8942047145561216, iteration: 108386
loss: 0.9516876339912415,grad_norm: 0.9997690652789989, iteration: 108387
loss: 0.9812833666801453,grad_norm: 0.9731648151687072, iteration: 108388
loss: 0.9851322174072266,grad_norm: 0.9597815010316784, iteration: 108389
loss: 1.0082508325576782,grad_norm: 0.9999990684907588, iteration: 108390
loss: 1.064038872718811,grad_norm: 0.9999992118982052, iteration: 108391
loss: 1.0273324251174927,grad_norm: 0.9786873425721027, iteration: 108392
loss: 0.9930137991905212,grad_norm: 0.9071714597943094, iteration: 108393
loss: 0.9533431529998779,grad_norm: 0.9999993010489712, iteration: 108394
loss: 0.9652655124664307,grad_norm: 0.9999991065586128, iteration: 108395
loss: 1.0038357973098755,grad_norm: 0.9999992265138589, iteration: 108396
loss: 1.0221904516220093,grad_norm: 0.9999992777514702, iteration: 108397
loss: 1.0057578086853027,grad_norm: 0.9999992167180732, iteration: 108398
loss: 1.0186551809310913,grad_norm: 0.9528163151077939, iteration: 108399
loss: 0.9988914132118225,grad_norm: 0.9999994759209136, iteration: 108400
loss: 0.9831199049949646,grad_norm: 0.9999992561616614, iteration: 108401
loss: 1.0121508836746216,grad_norm: 0.9999993152197671, iteration: 108402
loss: 0.9970188736915588,grad_norm: 0.9278053582064419, iteration: 108403
loss: 1.0387529134750366,grad_norm: 0.9999995157292002, iteration: 108404
loss: 0.986518383026123,grad_norm: 0.9999992727782154, iteration: 108405
loss: 0.9963229894638062,grad_norm: 0.999999048767809, iteration: 108406
loss: 0.9655573964118958,grad_norm: 0.9999991012893392, iteration: 108407
loss: 1.0147074460983276,grad_norm: 0.9999990852681223, iteration: 108408
loss: 1.0247960090637207,grad_norm: 0.9999992111598808, iteration: 108409
loss: 1.0943198204040527,grad_norm: 0.9999990595191396, iteration: 108410
loss: 0.9816363453865051,grad_norm: 0.9999993972077642, iteration: 108411
loss: 1.0275908708572388,grad_norm: 0.9999991507178415, iteration: 108412
loss: 1.0058510303497314,grad_norm: 0.9815524458625834, iteration: 108413
loss: 1.0163260698318481,grad_norm: 0.9999992178606572, iteration: 108414
loss: 0.9910026788711548,grad_norm: 0.9999991686876009, iteration: 108415
loss: 0.980478048324585,grad_norm: 0.9850216133670978, iteration: 108416
loss: 1.0135231018066406,grad_norm: 0.9999993517760976, iteration: 108417
loss: 1.0692932605743408,grad_norm: 0.9999999324571412, iteration: 108418
loss: 1.021631121635437,grad_norm: 0.8305624376721462, iteration: 108419
loss: 1.0034465789794922,grad_norm: 0.9794508939754207, iteration: 108420
loss: 0.9982775449752808,grad_norm: 0.9877992573979201, iteration: 108421
loss: 1.0281938314437866,grad_norm: 0.9705826429723732, iteration: 108422
loss: 1.0362732410430908,grad_norm: 0.9483057185518635, iteration: 108423
loss: 0.9505686163902283,grad_norm: 0.9999990890733522, iteration: 108424
loss: 0.9437468647956848,grad_norm: 0.9999990686634701, iteration: 108425
loss: 1.0282665491104126,grad_norm: 0.9360368562926497, iteration: 108426
loss: 1.0002384185791016,grad_norm: 0.9517873454336123, iteration: 108427
loss: 1.0294376611709595,grad_norm: 0.9999995009093637, iteration: 108428
loss: 1.0065982341766357,grad_norm: 0.9999996134114962, iteration: 108429
loss: 1.026559591293335,grad_norm: 0.9733695971714768, iteration: 108430
loss: 0.9690858721733093,grad_norm: 0.9482650318929939, iteration: 108431
loss: 0.993170440196991,grad_norm: 0.9999991796220599, iteration: 108432
loss: 0.9969015121459961,grad_norm: 0.999999006327015, iteration: 108433
loss: 1.0407363176345825,grad_norm: 0.9999992770991665, iteration: 108434
loss: 0.9723270535469055,grad_norm: 0.9999991208674218, iteration: 108435
loss: 1.0110832452774048,grad_norm: 0.9141402434836068, iteration: 108436
loss: 0.9825214147567749,grad_norm: 0.9999993046908315, iteration: 108437
loss: 1.0030200481414795,grad_norm: 0.999999249496594, iteration: 108438
loss: 0.9937150478363037,grad_norm: 0.9585811662148518, iteration: 108439
loss: 1.02892005443573,grad_norm: 0.9999988764152558, iteration: 108440
loss: 1.0189696550369263,grad_norm: 0.9567396907571141, iteration: 108441
loss: 1.008719801902771,grad_norm: 0.9999995445407903, iteration: 108442
loss: 1.0086638927459717,grad_norm: 0.9559146889241634, iteration: 108443
loss: 0.97677081823349,grad_norm: 0.9999989989699994, iteration: 108444
loss: 0.9986822009086609,grad_norm: 0.8984899561079067, iteration: 108445
loss: 0.9822075366973877,grad_norm: 0.9999992053343004, iteration: 108446
loss: 1.0266269445419312,grad_norm: 0.9999990851822502, iteration: 108447
loss: 1.0161486864089966,grad_norm: 0.999999067943308, iteration: 108448
loss: 1.001407265663147,grad_norm: 0.9352362108010011, iteration: 108449
loss: 0.9748468995094299,grad_norm: 0.9621026956168622, iteration: 108450
loss: 1.0375337600708008,grad_norm: 0.9663645696169689, iteration: 108451
loss: 1.0226376056671143,grad_norm: 0.9999993191535684, iteration: 108452
loss: 1.0418363809585571,grad_norm: 0.9900827602537702, iteration: 108453
loss: 1.0132811069488525,grad_norm: 0.9999992126611957, iteration: 108454
loss: 0.9947234988212585,grad_norm: 0.9003025882206698, iteration: 108455
loss: 1.0512226819992065,grad_norm: 0.9999994481645529, iteration: 108456
loss: 1.007019281387329,grad_norm: 0.9690692187817914, iteration: 108457
loss: 1.0156941413879395,grad_norm: 0.9999992413936754, iteration: 108458
loss: 1.048613429069519,grad_norm: 0.9999991950623539, iteration: 108459
loss: 0.9869162440299988,grad_norm: 0.9999990288452171, iteration: 108460
loss: 0.9943649768829346,grad_norm: 0.9999992630163138, iteration: 108461
loss: 0.986949622631073,grad_norm: 0.9999990603963745, iteration: 108462
loss: 0.9685191512107849,grad_norm: 0.9999991615450321, iteration: 108463
loss: 1.0219004154205322,grad_norm: 0.8262409419849304, iteration: 108464
loss: 1.0048351287841797,grad_norm: 0.9999993336128956, iteration: 108465
loss: 0.9815686941146851,grad_norm: 0.8909858344917374, iteration: 108466
loss: 1.0067391395568848,grad_norm: 0.9914120297160761, iteration: 108467
loss: 1.0064870119094849,grad_norm: 0.9999990718140886, iteration: 108468
loss: 1.0242267847061157,grad_norm: 0.9999993572364303, iteration: 108469
loss: 0.9862809777259827,grad_norm: 0.991164422021822, iteration: 108470
loss: 1.0147373676300049,grad_norm: 0.9999994957431589, iteration: 108471
loss: 0.9851574897766113,grad_norm: 0.9999992193424553, iteration: 108472
loss: 0.984550416469574,grad_norm: 0.9411377415431951, iteration: 108473
loss: 1.0129457712173462,grad_norm: 0.8795645826251319, iteration: 108474
loss: 0.9929314851760864,grad_norm: 0.9999989857546212, iteration: 108475
loss: 1.0225880146026611,grad_norm: 0.9999989895003507, iteration: 108476
loss: 0.9981739521026611,grad_norm: 0.9999991653158659, iteration: 108477
loss: 1.010111689567566,grad_norm: 0.8810319109146713, iteration: 108478
loss: 1.0102684497833252,grad_norm: 0.9999991568213877, iteration: 108479
loss: 0.9706951975822449,grad_norm: 0.9999993469622421, iteration: 108480
loss: 1.0312230587005615,grad_norm: 0.8958024096885437, iteration: 108481
loss: 0.9866779446601868,grad_norm: 0.9999992278822251, iteration: 108482
loss: 1.0492054224014282,grad_norm: 0.9999990832140627, iteration: 108483
loss: 1.017937421798706,grad_norm: 0.999999180735578, iteration: 108484
loss: 0.9794673323631287,grad_norm: 0.9999989428086109, iteration: 108485
loss: 1.0299708843231201,grad_norm: 0.9999992287163384, iteration: 108486
loss: 1.0403436422348022,grad_norm: 0.9999991284250772, iteration: 108487
loss: 1.0269525051116943,grad_norm: 0.9999996804884991, iteration: 108488
loss: 1.0143332481384277,grad_norm: 0.9999991947407769, iteration: 108489
loss: 1.0326297283172607,grad_norm: 0.9999993723349849, iteration: 108490
loss: 1.0666987895965576,grad_norm: 0.9999990887345768, iteration: 108491
loss: 1.0318024158477783,grad_norm: 0.999999934950341, iteration: 108492
loss: 0.9716493487358093,grad_norm: 0.9999991300009079, iteration: 108493
loss: 0.990388035774231,grad_norm: 0.9999991081052093, iteration: 108494
loss: 0.9768691658973694,grad_norm: 0.9999992353609211, iteration: 108495
loss: 0.9905319809913635,grad_norm: 0.9999990842433772, iteration: 108496
loss: 1.0236246585845947,grad_norm: 0.9999991699123467, iteration: 108497
loss: 1.0056513547897339,grad_norm: 0.9999991204082262, iteration: 108498
loss: 1.0096371173858643,grad_norm: 0.9999995196186979, iteration: 108499
loss: 0.9628274440765381,grad_norm: 0.9714831493294864, iteration: 108500
loss: 0.9889807105064392,grad_norm: 0.94905357159499, iteration: 108501
loss: 0.972061812877655,grad_norm: 0.9695891068475146, iteration: 108502
loss: 1.042277455329895,grad_norm: 0.999999386423492, iteration: 108503
loss: 1.0440268516540527,grad_norm: 1.0000000478156252, iteration: 108504
loss: 1.0162461996078491,grad_norm: 0.9999992141959797, iteration: 108505
loss: 1.0174448490142822,grad_norm: 0.9999990366622343, iteration: 108506
loss: 0.9794869422912598,grad_norm: 0.786050348920563, iteration: 108507
loss: 1.0418192148208618,grad_norm: 0.9999991755794421, iteration: 108508
loss: 0.975934624671936,grad_norm: 0.999999237443126, iteration: 108509
loss: 1.1513844728469849,grad_norm: 0.999999486614459, iteration: 108510
loss: 1.0009294748306274,grad_norm: 0.9999992536223904, iteration: 108511
loss: 1.0287173986434937,grad_norm: 0.999999559744132, iteration: 108512
loss: 1.019914984703064,grad_norm: 0.999999324887942, iteration: 108513
loss: 0.9934070706367493,grad_norm: 0.9999989767348448, iteration: 108514
loss: 1.0187631845474243,grad_norm: 0.999999159906306, iteration: 108515
loss: 1.0091699361801147,grad_norm: 0.9999992244362578, iteration: 108516
loss: 0.9508816003799438,grad_norm: 0.9999991213228108, iteration: 108517
loss: 1.0085216760635376,grad_norm: 0.9999989219254396, iteration: 108518
loss: 0.98634272813797,grad_norm: 0.9999990125130916, iteration: 108519
loss: 1.0046950578689575,grad_norm: 0.8575677926867156, iteration: 108520
loss: 0.9916788339614868,grad_norm: 0.9999990524033384, iteration: 108521
loss: 1.0410363674163818,grad_norm: 0.9999998651316722, iteration: 108522
loss: 0.9805933833122253,grad_norm: 0.9999990877357947, iteration: 108523
loss: 0.9888173937797546,grad_norm: 0.9999990972277482, iteration: 108524
loss: 1.0284875631332397,grad_norm: 0.9999993734801395, iteration: 108525
loss: 1.0490896701812744,grad_norm: 0.9999998877914592, iteration: 108526
loss: 0.9884759187698364,grad_norm: 0.999998965449302, iteration: 108527
loss: 0.9770167469978333,grad_norm: 0.9999991832111425, iteration: 108528
loss: 0.971922755241394,grad_norm: 0.9570381244227928, iteration: 108529
loss: 1.0171711444854736,grad_norm: 0.8008460542104131, iteration: 108530
loss: 1.0097554922103882,grad_norm: 0.9212051310700913, iteration: 108531
loss: 0.9875792860984802,grad_norm: 0.8547928963099681, iteration: 108532
loss: 1.0005772113800049,grad_norm: 0.9999991668489184, iteration: 108533
loss: 0.9795578718185425,grad_norm: 0.9999990368481938, iteration: 108534
loss: 1.0081733465194702,grad_norm: 0.9999990948842407, iteration: 108535
loss: 0.9972501993179321,grad_norm: 0.9999992810681911, iteration: 108536
loss: 1.0616954565048218,grad_norm: 0.9999997833025998, iteration: 108537
loss: 0.986832320690155,grad_norm: 0.9999991091456185, iteration: 108538
loss: 0.9818418622016907,grad_norm: 0.999999274656897, iteration: 108539
loss: 1.0004886388778687,grad_norm: 0.9999991544447813, iteration: 108540
loss: 1.058206558227539,grad_norm: 0.9999992626244529, iteration: 108541
loss: 0.9879406094551086,grad_norm: 0.999999072146038, iteration: 108542
loss: 1.003387689590454,grad_norm: 0.9999991551539389, iteration: 108543
loss: 1.0463602542877197,grad_norm: 0.9999998606732674, iteration: 108544
loss: 1.0341295003890991,grad_norm: 0.9999994642508775, iteration: 108545
loss: 1.0346390008926392,grad_norm: 0.9999991509560765, iteration: 108546
loss: 0.9512879252433777,grad_norm: 0.9999991091337872, iteration: 108547
loss: 1.0174955129623413,grad_norm: 0.9274967875483295, iteration: 108548
loss: 0.9936051368713379,grad_norm: 0.9538865337502924, iteration: 108549
loss: 1.0156519412994385,grad_norm: 0.8914205258988563, iteration: 108550
loss: 0.9832314252853394,grad_norm: 0.9999990554843221, iteration: 108551
loss: 1.0170738697052002,grad_norm: 0.9999991342604992, iteration: 108552
loss: 1.0564417839050293,grad_norm: 0.9999995536273235, iteration: 108553
loss: 0.9844014048576355,grad_norm: 0.9999990980934461, iteration: 108554
loss: 0.9546116590499878,grad_norm: 0.8909012234038483, iteration: 108555
loss: 1.0072638988494873,grad_norm: 0.9999991454559933, iteration: 108556
loss: 1.202440619468689,grad_norm: 0.9999993806770953, iteration: 108557
loss: 1.0305043458938599,grad_norm: 0.9999993801312563, iteration: 108558
loss: 0.9817622900009155,grad_norm: 0.9999992089833956, iteration: 108559
loss: 1.0369088649749756,grad_norm: 0.9999995797556516, iteration: 108560
loss: 0.9759743809700012,grad_norm: 0.9999991815096058, iteration: 108561
loss: 1.0122981071472168,grad_norm: 0.9999991081394339, iteration: 108562
loss: 1.0380704402923584,grad_norm: 0.9999999056953834, iteration: 108563
loss: 0.9997355341911316,grad_norm: 0.9611341184728234, iteration: 108564
loss: 1.0063343048095703,grad_norm: 0.9999989285668234, iteration: 108565
loss: 1.0043412446975708,grad_norm: 0.999999123084599, iteration: 108566
loss: 1.0285496711730957,grad_norm: 0.9885289540335699, iteration: 108567
loss: 0.9506804347038269,grad_norm: 0.9999991420622076, iteration: 108568
loss: 1.0231928825378418,grad_norm: 0.8885977653054261, iteration: 108569
loss: 1.0931220054626465,grad_norm: 0.9999999113070219, iteration: 108570
loss: 1.0031698942184448,grad_norm: 0.8035105805179316, iteration: 108571
loss: 1.0435000658035278,grad_norm: 0.9999991168306268, iteration: 108572
loss: 0.980901300907135,grad_norm: 0.9999993193309628, iteration: 108573
loss: 1.0900744199752808,grad_norm: 0.9999998297896207, iteration: 108574
loss: 1.057193636894226,grad_norm: 0.9999994313422288, iteration: 108575
loss: 1.0179401636123657,grad_norm: 0.9999995582179099, iteration: 108576
loss: 1.017987608909607,grad_norm: 0.9999991846512953, iteration: 108577
loss: 1.0085793733596802,grad_norm: 0.9999989511675931, iteration: 108578
loss: 0.9798928499221802,grad_norm: 0.9999996104055884, iteration: 108579
loss: 1.0144363641738892,grad_norm: 0.9999117702175977, iteration: 108580
loss: 0.9858869314193726,grad_norm: 0.999999102179095, iteration: 108581
loss: 1.019818902015686,grad_norm: 0.9999994794000936, iteration: 108582
loss: 0.9741967916488647,grad_norm: 0.9999991651585566, iteration: 108583
loss: 0.9726189970970154,grad_norm: 0.9999990845507748, iteration: 108584
loss: 0.9986321330070496,grad_norm: 0.9999992662150842, iteration: 108585
loss: 1.0439726114273071,grad_norm: 0.9999993516494553, iteration: 108586
loss: 1.0416359901428223,grad_norm: 0.9891505784947875, iteration: 108587
loss: 1.0384202003479004,grad_norm: 0.999999142720078, iteration: 108588
loss: 1.0383987426757812,grad_norm: 0.9999993096207157, iteration: 108589
loss: 0.9916781187057495,grad_norm: 0.9999991784488587, iteration: 108590
loss: 0.9760459661483765,grad_norm: 0.9999990877024492, iteration: 108591
loss: 0.9759792685508728,grad_norm: 0.9999990743684282, iteration: 108592
loss: 1.030009388923645,grad_norm: 0.9999994293326886, iteration: 108593
loss: 1.0622190237045288,grad_norm: 0.9999995391428563, iteration: 108594
loss: 0.9885385036468506,grad_norm: 0.9999989964976627, iteration: 108595
loss: 0.9971473217010498,grad_norm: 0.9999990301768816, iteration: 108596
loss: 0.9964861869812012,grad_norm: 0.9999990789343184, iteration: 108597
loss: 0.9753232002258301,grad_norm: 0.9999989877684311, iteration: 108598
loss: 1.0172585248947144,grad_norm: 0.999998967469825, iteration: 108599
loss: 1.0111511945724487,grad_norm: 0.999999020747255, iteration: 108600
loss: 1.0576528310775757,grad_norm: 0.9999992669029573, iteration: 108601
loss: 0.9934948086738586,grad_norm: 0.9999990579941455, iteration: 108602
loss: 1.024724006652832,grad_norm: 0.999999414965663, iteration: 108603
loss: 1.1934272050857544,grad_norm: 0.9999996145060217, iteration: 108604
loss: 0.9873820543289185,grad_norm: 0.9999992466098525, iteration: 108605
loss: 0.9879647493362427,grad_norm: 0.999999278645903, iteration: 108606
loss: 0.9683468341827393,grad_norm: 0.9999990193072139, iteration: 108607
loss: 1.0110881328582764,grad_norm: 0.9999993746203079, iteration: 108608
loss: 0.9653847813606262,grad_norm: 0.8489735953658331, iteration: 108609
loss: 1.0125102996826172,grad_norm: 0.9999991197602676, iteration: 108610
loss: 1.0080763101577759,grad_norm: 0.9999991196664089, iteration: 108611
loss: 0.9980217218399048,grad_norm: 0.990346165496939, iteration: 108612
loss: 1.055306315422058,grad_norm: 0.9999992150687976, iteration: 108613
loss: 1.021531581878662,grad_norm: 0.8634232407223745, iteration: 108614
loss: 0.973890483379364,grad_norm: 0.9999991119817656, iteration: 108615
loss: 1.0221977233886719,grad_norm: 0.9999994608971126, iteration: 108616
loss: 1.000419020652771,grad_norm: 0.9999991792595143, iteration: 108617
loss: 1.0174545049667358,grad_norm: 0.9999992537605598, iteration: 108618
loss: 1.0176622867584229,grad_norm: 0.9999991031152013, iteration: 108619
loss: 1.0149537324905396,grad_norm: 0.9999990435673314, iteration: 108620
loss: 1.0003527402877808,grad_norm: 0.9999993709734053, iteration: 108621
loss: 1.0739847421646118,grad_norm: 0.9999996310509007, iteration: 108622
loss: 1.007593035697937,grad_norm: 0.9830032261798559, iteration: 108623
loss: 0.9957422614097595,grad_norm: 0.999999126340549, iteration: 108624
loss: 1.0370938777923584,grad_norm: 0.9999994313364072, iteration: 108625
loss: 1.0384896993637085,grad_norm: 0.9999994528273349, iteration: 108626
loss: 0.9894788861274719,grad_norm: 0.8763775202565617, iteration: 108627
loss: 1.007637858390808,grad_norm: 0.9701263738412419, iteration: 108628
loss: 0.9968590140342712,grad_norm: 0.9182669153721681, iteration: 108629
loss: 1.03069269657135,grad_norm: 0.999999629485968, iteration: 108630
loss: 0.9823686480522156,grad_norm: 0.999999441404101, iteration: 108631
loss: 1.0128107070922852,grad_norm: 0.9681683247563971, iteration: 108632
loss: 0.9975241422653198,grad_norm: 0.9999993584048628, iteration: 108633
loss: 0.9830438494682312,grad_norm: 0.9999991017565514, iteration: 108634
loss: 1.016406774520874,grad_norm: 0.9003770779562768, iteration: 108635
loss: 1.0445809364318848,grad_norm: 0.9999989995685676, iteration: 108636
loss: 0.9930376410484314,grad_norm: 0.9999991516278949, iteration: 108637
loss: 1.0045095682144165,grad_norm: 0.9999998169010401, iteration: 108638
loss: 0.9960961937904358,grad_norm: 0.9999993464938838, iteration: 108639
loss: 0.9921386241912842,grad_norm: 0.9999992075450423, iteration: 108640
loss: 1.0141096115112305,grad_norm: 0.9999998020846197, iteration: 108641
loss: 1.0136842727661133,grad_norm: 0.9957788823136, iteration: 108642
loss: 1.0077738761901855,grad_norm: 0.9999990765251893, iteration: 108643
loss: 1.0465954542160034,grad_norm: 0.9633558707193555, iteration: 108644
loss: 1.0013022422790527,grad_norm: 0.9999991237172486, iteration: 108645
loss: 1.0327298641204834,grad_norm: 0.999999168920376, iteration: 108646
loss: 1.0189130306243896,grad_norm: 0.9813766843772123, iteration: 108647
loss: 0.9878822565078735,grad_norm: 0.9866586593995322, iteration: 108648
loss: 1.0008846521377563,grad_norm: 0.9401747507249105, iteration: 108649
loss: 0.9568728804588318,grad_norm: 0.9703481685311077, iteration: 108650
loss: 1.0085504055023193,grad_norm: 0.9999991345943896, iteration: 108651
loss: 1.0531349182128906,grad_norm: 0.9999997726207628, iteration: 108652
loss: 0.9744840264320374,grad_norm: 0.9999991349660872, iteration: 108653
loss: 1.0191668272018433,grad_norm: 0.8025590377115098, iteration: 108654
loss: 0.9845386743545532,grad_norm: 0.9999989516176901, iteration: 108655
loss: 0.9854996204376221,grad_norm: 0.9999991158966461, iteration: 108656
loss: 1.0232938528060913,grad_norm: 0.9999994381202923, iteration: 108657
loss: 0.9720260500907898,grad_norm: 0.9003505434513431, iteration: 108658
loss: 1.013237714767456,grad_norm: 0.9999991897575518, iteration: 108659
loss: 1.0128432512283325,grad_norm: 0.8047413260321407, iteration: 108660
loss: 1.0263135433197021,grad_norm: 0.9999990517288486, iteration: 108661
loss: 0.9883109927177429,grad_norm: 0.9999990805126092, iteration: 108662
loss: 0.9933941960334778,grad_norm: 0.9999991710515076, iteration: 108663
loss: 0.9954769611358643,grad_norm: 0.999594651717241, iteration: 108664
loss: 0.9875842928886414,grad_norm: 0.9999992130726723, iteration: 108665
loss: 0.9702665209770203,grad_norm: 0.9999991790094004, iteration: 108666
loss: 1.0245707035064697,grad_norm: 0.9999992796036115, iteration: 108667
loss: 0.9636093378067017,grad_norm: 0.9999992800804958, iteration: 108668
loss: 1.0269734859466553,grad_norm: 0.9999991762504398, iteration: 108669
loss: 1.0013495683670044,grad_norm: 0.9999995396622223, iteration: 108670
loss: 1.0141969919204712,grad_norm: 0.9718293643437357, iteration: 108671
loss: 1.0089153051376343,grad_norm: 0.9999992065858905, iteration: 108672
loss: 0.9778719544410706,grad_norm: 0.9098032152001027, iteration: 108673
loss: 0.9903733730316162,grad_norm: 0.9999991218942942, iteration: 108674
loss: 1.0003522634506226,grad_norm: 0.9462411985632329, iteration: 108675
loss: 1.0078799724578857,grad_norm: 0.9999993150027032, iteration: 108676
loss: 0.9674832224845886,grad_norm: 0.9999991211687583, iteration: 108677
loss: 0.9713449478149414,grad_norm: 0.9999991349465361, iteration: 108678
loss: 1.014357089996338,grad_norm: 0.9930413624844519, iteration: 108679
loss: 1.011156678199768,grad_norm: 0.9999992063727442, iteration: 108680
loss: 1.0285422801971436,grad_norm: 0.9999996864505414, iteration: 108681
loss: 1.0221874713897705,grad_norm: 0.9999998169042084, iteration: 108682
loss: 0.9709676504135132,grad_norm: 0.9999990391438961, iteration: 108683
loss: 1.003265380859375,grad_norm: 0.9999992213406456, iteration: 108684
loss: 1.003656029701233,grad_norm: 0.9543973653272606, iteration: 108685
loss: 0.9980711936950684,grad_norm: 0.9999990085999917, iteration: 108686
loss: 0.9945913553237915,grad_norm: 0.9999992633665823, iteration: 108687
loss: 1.015761375427246,grad_norm: 0.9999994763533865, iteration: 108688
loss: 0.997980535030365,grad_norm: 0.9999995409856057, iteration: 108689
loss: 0.9609792232513428,grad_norm: 0.9999990717857261, iteration: 108690
loss: 1.0038917064666748,grad_norm: 0.9054365012799116, iteration: 108691
loss: 1.0136209726333618,grad_norm: 0.9999991553482422, iteration: 108692
loss: 0.993768572807312,grad_norm: 0.9835625518505847, iteration: 108693
loss: 1.0024495124816895,grad_norm: 0.8615885172304958, iteration: 108694
loss: 1.046126365661621,grad_norm: 0.9999992756035626, iteration: 108695
loss: 1.0163191556930542,grad_norm: 0.9999992812718526, iteration: 108696
loss: 1.046228051185608,grad_norm: 0.9999993738937574, iteration: 108697
loss: 1.0069780349731445,grad_norm: 0.9999990565628851, iteration: 108698
loss: 1.0018678903579712,grad_norm: 0.8804426252077306, iteration: 108699
loss: 0.9983761310577393,grad_norm: 0.9780759588662608, iteration: 108700
loss: 1.0276083946228027,grad_norm: 0.9999990953437736, iteration: 108701
loss: 1.0442436933517456,grad_norm: 0.9078061177786403, iteration: 108702
loss: 0.9902690052986145,grad_norm: 0.999999394896068, iteration: 108703
loss: 0.9765492677688599,grad_norm: 0.8219714003341454, iteration: 108704
loss: 1.0034148693084717,grad_norm: 0.9991956102472577, iteration: 108705
loss: 1.052734375,grad_norm: 0.9999998968716269, iteration: 108706
loss: 1.0234142541885376,grad_norm: 0.983290560253424, iteration: 108707
loss: 0.9623508453369141,grad_norm: 0.9618312074881107, iteration: 108708
loss: 0.9793934226036072,grad_norm: 0.9999990893537882, iteration: 108709
loss: 0.9821878671646118,grad_norm: 0.999999131842163, iteration: 108710
loss: 1.014277696609497,grad_norm: 0.9999992047991302, iteration: 108711
loss: 0.9917375445365906,grad_norm: 0.9940241933876434, iteration: 108712
loss: 1.0296753644943237,grad_norm: 0.9999991539015723, iteration: 108713
loss: 1.0098159313201904,grad_norm: 0.9999992600638183, iteration: 108714
loss: 1.0103504657745361,grad_norm: 0.8247178832238209, iteration: 108715
loss: 1.0239518880844116,grad_norm: 0.9999992559736084, iteration: 108716
loss: 1.0118433237075806,grad_norm: 0.999998965950988, iteration: 108717
loss: 0.9752522706985474,grad_norm: 0.9999991967436305, iteration: 108718
loss: 1.0336666107177734,grad_norm: 0.8968194017750422, iteration: 108719
loss: 1.0069248676300049,grad_norm: 0.9999989530211558, iteration: 108720
loss: 0.9824574589729309,grad_norm: 0.9999990405802773, iteration: 108721
loss: 0.973203182220459,grad_norm: 0.9168153333765063, iteration: 108722
loss: 0.9929145574569702,grad_norm: 0.9582899722928444, iteration: 108723
loss: 0.9960981607437134,grad_norm: 0.9999991757708515, iteration: 108724
loss: 1.019400715827942,grad_norm: 0.9999992347022799, iteration: 108725
loss: 1.0448719263076782,grad_norm: 0.9999992102741209, iteration: 108726
loss: 0.9793031215667725,grad_norm: 0.9586857604323, iteration: 108727
loss: 0.9874613285064697,grad_norm: 0.9395576329588471, iteration: 108728
loss: 1.0049524307250977,grad_norm: 0.9999992496925948, iteration: 108729
loss: 1.0016759634017944,grad_norm: 0.9651982975124808, iteration: 108730
loss: 1.0048203468322754,grad_norm: 0.9789349840035418, iteration: 108731
loss: 0.9957360625267029,grad_norm: 0.9999992157134373, iteration: 108732
loss: 1.0010794401168823,grad_norm: 0.8408329982741705, iteration: 108733
loss: 1.093496322631836,grad_norm: 0.9999998135338756, iteration: 108734
loss: 1.0052833557128906,grad_norm: 0.8840361959567679, iteration: 108735
loss: 1.0265001058578491,grad_norm: 0.9999989754242059, iteration: 108736
loss: 1.015187382698059,grad_norm: 0.9999991856913583, iteration: 108737
loss: 1.0232553482055664,grad_norm: 0.9999992245071789, iteration: 108738
loss: 1.008084774017334,grad_norm: 0.9999992080754846, iteration: 108739
loss: 0.9626516103744507,grad_norm: 0.9999991650653772, iteration: 108740
loss: 0.9943961501121521,grad_norm: 0.9999996113671772, iteration: 108741
loss: 0.9950945377349854,grad_norm: 0.8964249208289639, iteration: 108742
loss: 1.0149027109146118,grad_norm: 0.944353589059046, iteration: 108743
loss: 0.9987385869026184,grad_norm: 0.999999220299691, iteration: 108744
loss: 0.996788740158081,grad_norm: 0.9314954521246954, iteration: 108745
loss: 1.0205119848251343,grad_norm: 0.9930466665399823, iteration: 108746
loss: 0.9890336990356445,grad_norm: 0.9167254611127358, iteration: 108747
loss: 1.023750901222229,grad_norm: 0.9999991014623923, iteration: 108748
loss: 1.0115485191345215,grad_norm: 0.8127158973255495, iteration: 108749
loss: 0.9802557229995728,grad_norm: 0.9999989840752574, iteration: 108750
loss: 0.9896234273910522,grad_norm: 0.9999991206576725, iteration: 108751
loss: 0.9959849715232849,grad_norm: 0.9999989786159614, iteration: 108752
loss: 1.00026273727417,grad_norm: 0.9999996183106962, iteration: 108753
loss: 1.004062294960022,grad_norm: 0.9999997704751369, iteration: 108754
loss: 0.9925347566604614,grad_norm: 0.9825156197072462, iteration: 108755
loss: 1.0154118537902832,grad_norm: 0.9947064672812741, iteration: 108756
loss: 0.9840657114982605,grad_norm: 0.999999151452798, iteration: 108757
loss: 0.9788088798522949,grad_norm: 0.9999992007926147, iteration: 108758
loss: 1.0124176740646362,grad_norm: 0.999999146923711, iteration: 108759
loss: 1.0211504697799683,grad_norm: 0.9999988813360062, iteration: 108760
loss: 0.9742600917816162,grad_norm: 0.9999991622818064, iteration: 108761
loss: 0.9747522473335266,grad_norm: 0.9999990216825991, iteration: 108762
loss: 1.0060770511627197,grad_norm: 0.898347070468502, iteration: 108763
loss: 1.0177485942840576,grad_norm: 0.9999994618660312, iteration: 108764
loss: 1.00810968875885,grad_norm: 0.9999990791167833, iteration: 108765
loss: 0.9893642663955688,grad_norm: 0.9999990916794188, iteration: 108766
loss: 0.9779914021492004,grad_norm: 0.8928525452738156, iteration: 108767
loss: 1.0558909177780151,grad_norm: 0.9999999013198018, iteration: 108768
loss: 1.0104323625564575,grad_norm: 0.9999993583029835, iteration: 108769
loss: 0.9627558588981628,grad_norm: 0.9393408036873582, iteration: 108770
loss: 0.9858514666557312,grad_norm: 0.9268944337793429, iteration: 108771
loss: 0.986774206161499,grad_norm: 0.9999991480622789, iteration: 108772
loss: 1.0452381372451782,grad_norm: 0.9999992246719918, iteration: 108773
loss: 1.0051524639129639,grad_norm: 0.9999990371296417, iteration: 108774
loss: 1.083358645439148,grad_norm: 0.999999951225951, iteration: 108775
loss: 1.0309104919433594,grad_norm: 0.9999991554063711, iteration: 108776
loss: 1.0303356647491455,grad_norm: 0.9999995982010905, iteration: 108777
loss: 0.9835951328277588,grad_norm: 0.8747151509485658, iteration: 108778
loss: 1.0030009746551514,grad_norm: 0.9999991868257178, iteration: 108779
loss: 1.0108696222305298,grad_norm: 0.9999992544116307, iteration: 108780
loss: 1.0378034114837646,grad_norm: 0.9890236076207799, iteration: 108781
loss: 0.993073046207428,grad_norm: 0.9999996461257551, iteration: 108782
loss: 0.975584089756012,grad_norm: 0.9999991210027193, iteration: 108783
loss: 0.9838501811027527,grad_norm: 0.9814919471598786, iteration: 108784
loss: 1.0199695825576782,grad_norm: 0.9442727760012738, iteration: 108785
loss: 1.0188815593719482,grad_norm: 0.9999995981055503, iteration: 108786
loss: 0.9755831360816956,grad_norm: 0.9716092318028791, iteration: 108787
loss: 1.0074626207351685,grad_norm: 0.9999991574439007, iteration: 108788
loss: 1.003071904182434,grad_norm: 0.9999991834962356, iteration: 108789
loss: 1.021618127822876,grad_norm: 0.9964297252106316, iteration: 108790
loss: 0.9988954663276672,grad_norm: 0.840072958010386, iteration: 108791
loss: 1.0140223503112793,grad_norm: 0.9999992336628873, iteration: 108792
loss: 0.982363224029541,grad_norm: 0.8548615445218735, iteration: 108793
loss: 1.0080504417419434,grad_norm: 0.9999991931655252, iteration: 108794
loss: 0.9827409386634827,grad_norm: 0.9523470333719959, iteration: 108795
loss: 1.0220317840576172,grad_norm: 0.9999990578640773, iteration: 108796
loss: 1.0034310817718506,grad_norm: 0.9133515627807662, iteration: 108797
loss: 1.0185387134552002,grad_norm: 0.9999991793989379, iteration: 108798
loss: 1.0082013607025146,grad_norm: 0.8359228897595132, iteration: 108799
loss: 0.9993883371353149,grad_norm: 0.9999993215184588, iteration: 108800
loss: 1.000187873840332,grad_norm: 0.9999990067090546, iteration: 108801
loss: 1.019690990447998,grad_norm: 0.9999992340969199, iteration: 108802
loss: 0.9623399376869202,grad_norm: 0.9999992559622326, iteration: 108803
loss: 0.970275342464447,grad_norm: 0.999999046435354, iteration: 108804
loss: 0.9633810520172119,grad_norm: 0.9999991740849392, iteration: 108805
loss: 0.9925916194915771,grad_norm: 0.9756220878304734, iteration: 108806
loss: 0.979457437992096,grad_norm: 0.9999991342796377, iteration: 108807
loss: 0.9948176145553589,grad_norm: 0.9999991204762897, iteration: 108808
loss: 1.0291365385055542,grad_norm: 0.9394659527624896, iteration: 108809
loss: 0.9642652273178101,grad_norm: 0.9999990800286079, iteration: 108810
loss: 1.0155600309371948,grad_norm: 0.9999992971214556, iteration: 108811
loss: 0.9788634181022644,grad_norm: 0.9944320731075191, iteration: 108812
loss: 1.0116645097732544,grad_norm: 0.9398360293745188, iteration: 108813
loss: 0.9690501093864441,grad_norm: 0.9661286450821726, iteration: 108814
loss: 0.9837571978569031,grad_norm: 0.9999992382915022, iteration: 108815
loss: 0.9597753286361694,grad_norm: 0.9999990950604274, iteration: 108816
loss: 1.0580661296844482,grad_norm: 0.9999995506200877, iteration: 108817
loss: 1.0052624940872192,grad_norm: 0.9432278718742323, iteration: 108818
loss: 0.9875838756561279,grad_norm: 0.9999990805406289, iteration: 108819
loss: 1.0564069747924805,grad_norm: 0.9999990448366889, iteration: 108820
loss: 1.0173457860946655,grad_norm: 0.9999991738341729, iteration: 108821
loss: 0.9866246581077576,grad_norm: 0.9999991345286771, iteration: 108822
loss: 1.019970178604126,grad_norm: 0.9684473174890161, iteration: 108823
loss: 1.0529245138168335,grad_norm: 0.9999991063399267, iteration: 108824
loss: 1.0140941143035889,grad_norm: 0.9968945729000387, iteration: 108825
loss: 1.0542869567871094,grad_norm: 0.9999990928639917, iteration: 108826
loss: 0.986380934715271,grad_norm: 0.9999991876585278, iteration: 108827
loss: 0.9620608687400818,grad_norm: 0.9128129464668392, iteration: 108828
loss: 1.007333755493164,grad_norm: 0.9860856290166906, iteration: 108829
loss: 1.0472534894943237,grad_norm: 0.9739485836757927, iteration: 108830
loss: 1.039608120918274,grad_norm: 0.9999990389885209, iteration: 108831
loss: 1.0011601448059082,grad_norm: 0.9999989506290591, iteration: 108832
loss: 1.044187307357788,grad_norm: 0.9999996663623384, iteration: 108833
loss: 1.0003563165664673,grad_norm: 0.9999989836724166, iteration: 108834
loss: 0.99740070104599,grad_norm: 0.9999992347435273, iteration: 108835
loss: 1.0104798078536987,grad_norm: 0.9421015071722316, iteration: 108836
loss: 0.9955560564994812,grad_norm: 0.9999992225199115, iteration: 108837
loss: 1.01932692527771,grad_norm: 0.999999687907134, iteration: 108838
loss: 0.9680614471435547,grad_norm: 0.8912132813434239, iteration: 108839
loss: 0.9896258115768433,grad_norm: 0.9806425847024806, iteration: 108840
loss: 0.9702021479606628,grad_norm: 0.9999990283903603, iteration: 108841
loss: 1.0161652565002441,grad_norm: 0.9999992341871554, iteration: 108842
loss: 0.9858956933021545,grad_norm: 0.9999991628521935, iteration: 108843
loss: 1.0170851945877075,grad_norm: 0.9767371930484957, iteration: 108844
loss: 0.9999632835388184,grad_norm: 0.9999992504614039, iteration: 108845
loss: 0.9919548630714417,grad_norm: 0.9999990547471176, iteration: 108846
loss: 0.9751888513565063,grad_norm: 0.9729488645398504, iteration: 108847
loss: 1.0169892311096191,grad_norm: 0.9999991055560132, iteration: 108848
loss: 0.9968820810317993,grad_norm: 0.9999990269509887, iteration: 108849
loss: 1.0137516260147095,grad_norm: 0.9890008814374394, iteration: 108850
loss: 1.0193456411361694,grad_norm: 0.9999990570527117, iteration: 108851
loss: 1.0178025960922241,grad_norm: 0.9999990922606511, iteration: 108852
loss: 0.9895286560058594,grad_norm: 0.9999992058078813, iteration: 108853
loss: 0.9897913932800293,grad_norm: 0.9331245174506416, iteration: 108854
loss: 0.9727458953857422,grad_norm: 0.9999990931729131, iteration: 108855
loss: 0.9689671993255615,grad_norm: 0.9999991907033967, iteration: 108856
loss: 0.9834039807319641,grad_norm: 0.9999990556230672, iteration: 108857
loss: 0.9730988144874573,grad_norm: 0.9999991779968402, iteration: 108858
loss: 1.022441029548645,grad_norm: 0.9999991836546142, iteration: 108859
loss: 1.014296293258667,grad_norm: 0.9999991297828438, iteration: 108860
loss: 0.9804983735084534,grad_norm: 0.9999990228971921, iteration: 108861
loss: 1.024933934211731,grad_norm: 0.9366660175857506, iteration: 108862
loss: 1.0115129947662354,grad_norm: 0.9999992674035094, iteration: 108863
loss: 1.0326919555664062,grad_norm: 0.9999997944344522, iteration: 108864
loss: 0.9766796827316284,grad_norm: 0.9999991637020708, iteration: 108865
loss: 0.9991545081138611,grad_norm: 0.9999992534811681, iteration: 108866
loss: 0.9965380430221558,grad_norm: 0.9732023390717417, iteration: 108867
loss: 1.008430004119873,grad_norm: 0.9999990609693602, iteration: 108868
loss: 1.0184056758880615,grad_norm: 0.9999991171435996, iteration: 108869
loss: 0.9778962731361389,grad_norm: 0.9934913997146189, iteration: 108870
loss: 1.0164684057235718,grad_norm: 0.9999992291216395, iteration: 108871
loss: 1.0243085622787476,grad_norm: 0.9999991559259606, iteration: 108872
loss: 0.9836805462837219,grad_norm: 0.9801988581633427, iteration: 108873
loss: 1.0068607330322266,grad_norm: 0.9608208620921984, iteration: 108874
loss: 0.9975726008415222,grad_norm: 0.9999991693224787, iteration: 108875
loss: 1.0551213026046753,grad_norm: 0.9999998898939988, iteration: 108876
loss: 1.011915922164917,grad_norm: 0.999999142238481, iteration: 108877
loss: 1.0006632804870605,grad_norm: 0.999999058723968, iteration: 108878
loss: 1.0148921012878418,grad_norm: 0.9541229415840042, iteration: 108879
loss: 1.002185344696045,grad_norm: 0.9999992404418034, iteration: 108880
loss: 0.9916517734527588,grad_norm: 0.9999991800182341, iteration: 108881
loss: 0.9542945623397827,grad_norm: 0.9999993627041511, iteration: 108882
loss: 1.0163651704788208,grad_norm: 0.9999991161583026, iteration: 108883
loss: 1.0143288373947144,grad_norm: 0.93092814302161, iteration: 108884
loss: 0.9897686243057251,grad_norm: 0.99999905655826, iteration: 108885
loss: 0.9996159672737122,grad_norm: 0.9065473309201408, iteration: 108886
loss: 1.1068090200424194,grad_norm: 0.9999991428238483, iteration: 108887
loss: 0.975283145904541,grad_norm: 0.9999990658390282, iteration: 108888
loss: 0.9920712113380432,grad_norm: 0.9999991934725557, iteration: 108889
loss: 0.9954673647880554,grad_norm: 0.9806895771091027, iteration: 108890
loss: 0.9643005728721619,grad_norm: 0.9302973241768877, iteration: 108891
loss: 0.9894182682037354,grad_norm: 0.9999992564732718, iteration: 108892
loss: 1.0017913579940796,grad_norm: 0.9999990549641178, iteration: 108893
loss: 1.0435611009597778,grad_norm: 0.9999991800037428, iteration: 108894
loss: 1.1816481351852417,grad_norm: 0.9999999090895576, iteration: 108895
loss: 1.0273029804229736,grad_norm: 0.9999991692191628, iteration: 108896
loss: 1.0005121231079102,grad_norm: 0.9336608218165813, iteration: 108897
loss: 1.0003100633621216,grad_norm: 0.9999991087550133, iteration: 108898
loss: 0.9772574305534363,grad_norm: 0.9522631717212432, iteration: 108899
loss: 0.9833413362503052,grad_norm: 0.9999991254896073, iteration: 108900
loss: 0.9823101162910461,grad_norm: 0.999999192663962, iteration: 108901
loss: 1.0206085443496704,grad_norm: 0.9726576192347673, iteration: 108902
loss: 1.0579466819763184,grad_norm: 0.9999990353621565, iteration: 108903
loss: 1.0149016380310059,grad_norm: 0.9735768511950932, iteration: 108904
loss: 1.024880290031433,grad_norm: 0.9999991442604798, iteration: 108905
loss: 0.996300458908081,grad_norm: 0.9661773029285046, iteration: 108906
loss: 1.0507365465164185,grad_norm: 0.9698490027120401, iteration: 108907
loss: 0.9949228167533875,grad_norm: 0.999999138649815, iteration: 108908
loss: 0.9785951972007751,grad_norm: 0.9657047634612136, iteration: 108909
loss: 1.0284459590911865,grad_norm: 0.9870213512209376, iteration: 108910
loss: 0.9984301328659058,grad_norm: 0.9999991455383853, iteration: 108911
loss: 0.9928829669952393,grad_norm: 0.9074255186460282, iteration: 108912
loss: 1.008466124534607,grad_norm: 0.9999992550243126, iteration: 108913
loss: 0.9927809238433838,grad_norm: 0.9999990187785035, iteration: 108914
loss: 0.9778308868408203,grad_norm: 0.9999991190475431, iteration: 108915
loss: 1.038301706314087,grad_norm: 0.9999994339436201, iteration: 108916
loss: 1.0162039995193481,grad_norm: 0.8903974630269517, iteration: 108917
loss: 0.9740891456604004,grad_norm: 0.9696478758111191, iteration: 108918
loss: 0.9902715682983398,grad_norm: 0.9999995361815747, iteration: 108919
loss: 0.9998812675476074,grad_norm: 0.9999991427983121, iteration: 108920
loss: 1.0816015005111694,grad_norm: 0.9999992304359503, iteration: 108921
loss: 0.9741977453231812,grad_norm: 0.8869618227655478, iteration: 108922
loss: 0.9998064041137695,grad_norm: 0.993198458579689, iteration: 108923
loss: 1.0075281858444214,grad_norm: 0.9936161257468109, iteration: 108924
loss: 1.0201929807662964,grad_norm: 0.9999991381697886, iteration: 108925
loss: 1.1075100898742676,grad_norm: 0.9999992606286772, iteration: 108926
loss: 0.9775390625,grad_norm: 0.9999992224880353, iteration: 108927
loss: 0.9723334908485413,grad_norm: 0.9999991473928964, iteration: 108928
loss: 1.0080313682556152,grad_norm: 0.9763627498742861, iteration: 108929
loss: 1.0011729001998901,grad_norm: 0.9515171909509096, iteration: 108930
loss: 1.0006388425827026,grad_norm: 0.9999990819260827, iteration: 108931
loss: 1.0310190916061401,grad_norm: 0.9999990633908126, iteration: 108932
loss: 1.0406423807144165,grad_norm: 0.9999991317317528, iteration: 108933
loss: 1.001552939414978,grad_norm: 0.9885669660745638, iteration: 108934
loss: 1.0008763074874878,grad_norm: 0.9999990711516263, iteration: 108935
loss: 0.9825512170791626,grad_norm: 0.8207283203058091, iteration: 108936
loss: 1.0097064971923828,grad_norm: 0.9771752872494357, iteration: 108937
loss: 1.0228873491287231,grad_norm: 0.9999992370542768, iteration: 108938
loss: 0.9948075413703918,grad_norm: 0.9999991549554355, iteration: 108939
loss: 1.1004056930541992,grad_norm: 0.9999998100139634, iteration: 108940
loss: 0.994767963886261,grad_norm: 0.9918957402895132, iteration: 108941
loss: 1.0131466388702393,grad_norm: 0.999999163193172, iteration: 108942
loss: 1.075971007347107,grad_norm: 0.9770629021519827, iteration: 108943
loss: 1.0381253957748413,grad_norm: 0.9900167947631132, iteration: 108944
loss: 1.0101821422576904,grad_norm: 0.9999990576657742, iteration: 108945
loss: 0.990811824798584,grad_norm: 0.9999998104232511, iteration: 108946
loss: 1.0440627336502075,grad_norm: 0.9999997426777907, iteration: 108947
loss: 0.9878111481666565,grad_norm: 1.0000000265902584, iteration: 108948
loss: 1.0056445598602295,grad_norm: 0.9999989927589483, iteration: 108949
loss: 1.0163089036941528,grad_norm: 0.8928878705692622, iteration: 108950
loss: 1.0165156126022339,grad_norm: 0.9577027294009358, iteration: 108951
loss: 0.9892607927322388,grad_norm: 0.9999990388104893, iteration: 108952
loss: 1.0241953134536743,grad_norm: 0.9999991040205732, iteration: 108953
loss: 1.0634942054748535,grad_norm: 0.9999991963062358, iteration: 108954
loss: 0.9992809295654297,grad_norm: 0.9817878182222103, iteration: 108955
loss: 0.9875149726867676,grad_norm: 0.9999992694113894, iteration: 108956
loss: 1.0413987636566162,grad_norm: 0.9999991300971035, iteration: 108957
loss: 0.9871067404747009,grad_norm: 0.999999358397523, iteration: 108958
loss: 0.9781743288040161,grad_norm: 0.9999992051672901, iteration: 108959
loss: 0.9777690768241882,grad_norm: 0.9658482147762248, iteration: 108960
loss: 1.0110886096954346,grad_norm: 0.9999993085619429, iteration: 108961
loss: 1.0120787620544434,grad_norm: 0.7856633976863197, iteration: 108962
loss: 0.9773229360580444,grad_norm: 0.9808801463177772, iteration: 108963
loss: 1.010080337524414,grad_norm: 0.9999998569603102, iteration: 108964
loss: 0.9820098876953125,grad_norm: 0.9999990199605296, iteration: 108965
loss: 0.9783056974411011,grad_norm: 0.9999990573557709, iteration: 108966
loss: 1.0048774480819702,grad_norm: 0.9762443666377456, iteration: 108967
loss: 1.0081593990325928,grad_norm: 0.9999990470731691, iteration: 108968
loss: 0.9947631359100342,grad_norm: 0.8100847672527045, iteration: 108969
loss: 1.0041970014572144,grad_norm: 0.930516526105132, iteration: 108970
loss: 0.9837123155593872,grad_norm: 0.9915132994942487, iteration: 108971
loss: 1.0238882303237915,grad_norm: 0.9999998360464551, iteration: 108972
loss: 1.006699562072754,grad_norm: 0.9999992914039769, iteration: 108973
loss: 0.9940750002861023,grad_norm: 0.99069209648461, iteration: 108974
loss: 1.0002778768539429,grad_norm: 0.9379904516791593, iteration: 108975
loss: 1.0050545930862427,grad_norm: 0.9999992365695578, iteration: 108976
loss: 1.0006877183914185,grad_norm: 0.9999990997513946, iteration: 108977
loss: 0.9697186946868896,grad_norm: 0.9999991285205627, iteration: 108978
loss: 0.9984389543533325,grad_norm: 0.9102505004308145, iteration: 108979
loss: 0.9797581434249878,grad_norm: 0.9999992787546187, iteration: 108980
loss: 1.016448736190796,grad_norm: 0.9896016553128067, iteration: 108981
loss: 0.9996829032897949,grad_norm: 0.9999990905235232, iteration: 108982
loss: 1.0268199443817139,grad_norm: 0.8747099920204163, iteration: 108983
loss: 1.0272865295410156,grad_norm: 0.9882278465978588, iteration: 108984
loss: 1.020804762840271,grad_norm: 0.8780081637545903, iteration: 108985
loss: 0.9584516882896423,grad_norm: 0.9999991168429715, iteration: 108986
loss: 1.0186491012573242,grad_norm: 0.9999992079732668, iteration: 108987
loss: 0.9918769001960754,grad_norm: 0.9999991530886011, iteration: 108988
loss: 1.010772466659546,grad_norm: 0.9607571708377323, iteration: 108989
loss: 1.0133739709854126,grad_norm: 0.9999991387147167, iteration: 108990
loss: 0.9657280445098877,grad_norm: 0.9896004239173282, iteration: 108991
loss: 1.0065170526504517,grad_norm: 0.9999992240002329, iteration: 108992
loss: 0.977446973323822,grad_norm: 0.9999992442646298, iteration: 108993
loss: 1.036460041999817,grad_norm: 0.999999375928896, iteration: 108994
loss: 1.0105574131011963,grad_norm: 0.9706804597447093, iteration: 108995
loss: 0.9972164034843445,grad_norm: 0.9999992971425773, iteration: 108996
loss: 1.0485386848449707,grad_norm: 0.999999318946008, iteration: 108997
loss: 1.0107818841934204,grad_norm: 0.9999991752848445, iteration: 108998
loss: 1.0012723207473755,grad_norm: 0.8927412614149365, iteration: 108999
loss: 0.9982239603996277,grad_norm: 0.9923944652066596, iteration: 109000
loss: 0.9862210154533386,grad_norm: 0.9999991086703032, iteration: 109001
loss: 1.0478969812393188,grad_norm: 0.9241066905709555, iteration: 109002
loss: 0.968376100063324,grad_norm: 0.9999991404959041, iteration: 109003
loss: 1.0441523790359497,grad_norm: 0.9725496229284719, iteration: 109004
loss: 1.0204358100891113,grad_norm: 0.8764431489154076, iteration: 109005
loss: 0.9936940670013428,grad_norm: 0.9234035837828964, iteration: 109006
loss: 1.0249978303909302,grad_norm: 0.9999991236825676, iteration: 109007
loss: 0.952566385269165,grad_norm: 0.9999991796292013, iteration: 109008
loss: 1.0184437036514282,grad_norm: 0.9999988737507988, iteration: 109009
loss: 1.0013078451156616,grad_norm: 0.9999991304768965, iteration: 109010
loss: 1.0150728225708008,grad_norm: 0.8923638909170016, iteration: 109011
loss: 1.006206750869751,grad_norm: 0.9999991383247254, iteration: 109012
loss: 0.9617061018943787,grad_norm: 0.9999991299189108, iteration: 109013
loss: 1.0085400342941284,grad_norm: 0.9999990884040558, iteration: 109014
loss: 0.9830593466758728,grad_norm: 0.9017593299201665, iteration: 109015
loss: 1.0008316040039062,grad_norm: 0.9999991818202626, iteration: 109016
loss: 1.0151039361953735,grad_norm: 0.9999990963259063, iteration: 109017
loss: 0.9929954409599304,grad_norm: 0.9021512546217471, iteration: 109018
loss: 1.020405650138855,grad_norm: 0.9591087577819727, iteration: 109019
loss: 1.0205936431884766,grad_norm: 0.9999992103324481, iteration: 109020
loss: 0.9739969968795776,grad_norm: 0.8178040771877717, iteration: 109021
loss: 0.9970176815986633,grad_norm: 0.9999990053683051, iteration: 109022
loss: 0.9785918593406677,grad_norm: 0.9862263213738489, iteration: 109023
loss: 0.968618631362915,grad_norm: 0.9835560114345209, iteration: 109024
loss: 0.9867485761642456,grad_norm: 0.9512732323762713, iteration: 109025
loss: 1.0196043252944946,grad_norm: 0.9999991939419701, iteration: 109026
loss: 0.961247980594635,grad_norm: 0.8959675561793244, iteration: 109027
loss: 1.0132962465286255,grad_norm: 0.9999990604396941, iteration: 109028
loss: 0.9788148999214172,grad_norm: 0.9864166302720251, iteration: 109029
loss: 1.0100951194763184,grad_norm: 0.9999991688763394, iteration: 109030
loss: 1.0078271627426147,grad_norm: 0.999999085047954, iteration: 109031
loss: 1.0042847394943237,grad_norm: 0.9999989901090901, iteration: 109032
loss: 0.9995420575141907,grad_norm: 0.9999991586324227, iteration: 109033
loss: 0.994310736656189,grad_norm: 0.9999991820590329, iteration: 109034
loss: 1.0005542039871216,grad_norm: 0.9999990690319884, iteration: 109035
loss: 0.9637097716331482,grad_norm: 0.9999991219174124, iteration: 109036
loss: 0.9708157181739807,grad_norm: 0.999999155390211, iteration: 109037
loss: 0.9729043245315552,grad_norm: 0.9999991886678272, iteration: 109038
loss: 0.950485348701477,grad_norm: 0.9999992060532529, iteration: 109039
loss: 1.0082679986953735,grad_norm: 0.9589036271510716, iteration: 109040
loss: 0.9863221645355225,grad_norm: 0.9999991637587173, iteration: 109041
loss: 1.0124000310897827,grad_norm: 0.9658335311127161, iteration: 109042
loss: 0.9855867028236389,grad_norm: 0.9999992188971545, iteration: 109043
loss: 1.0300648212432861,grad_norm: 0.9999992360599674, iteration: 109044
loss: 1.0325944423675537,grad_norm: 0.973166494000859, iteration: 109045
loss: 1.0097033977508545,grad_norm: 0.8905226095460904, iteration: 109046
loss: 0.9783746600151062,grad_norm: 0.9999990475243742, iteration: 109047
loss: 0.992919921875,grad_norm: 0.8953522223183135, iteration: 109048
loss: 1.0648704767227173,grad_norm: 0.9999992082777693, iteration: 109049
loss: 0.980974555015564,grad_norm: 0.9509773540926783, iteration: 109050
loss: 0.9987947344779968,grad_norm: 0.9999991113378022, iteration: 109051
loss: 0.9944672584533691,grad_norm: 0.940028576854257, iteration: 109052
loss: 1.0065243244171143,grad_norm: 0.7851446041227371, iteration: 109053
loss: 1.0096062421798706,grad_norm: 0.9999989615367504, iteration: 109054
loss: 0.9915016889572144,grad_norm: 0.9052098617954962, iteration: 109055
loss: 1.00515878200531,grad_norm: 0.9999993091673077, iteration: 109056
loss: 0.9911097288131714,grad_norm: 0.9999992165369802, iteration: 109057
loss: 0.999059796333313,grad_norm: 0.9070372370619231, iteration: 109058
loss: 0.9840760231018066,grad_norm: 0.883882047680593, iteration: 109059
loss: 1.052720069885254,grad_norm: 0.9999995698724806, iteration: 109060
loss: 0.9886330366134644,grad_norm: 0.9999991575872037, iteration: 109061
loss: 0.989655077457428,grad_norm: 0.9999991146907619, iteration: 109062
loss: 1.0077649354934692,grad_norm: 0.9999990765535354, iteration: 109063
loss: 0.9978147149085999,grad_norm: 0.9999990379391863, iteration: 109064
loss: 1.0023787021636963,grad_norm: 0.999999059157036, iteration: 109065
loss: 1.0261973142623901,grad_norm: 0.9999991066422852, iteration: 109066
loss: 0.992503821849823,grad_norm: 0.9999990396013876, iteration: 109067
loss: 0.9703048467636108,grad_norm: 0.9250569258105215, iteration: 109068
loss: 0.9772158861160278,grad_norm: 0.9999990867996114, iteration: 109069
loss: 0.981202244758606,grad_norm: 0.9999991396434705, iteration: 109070
loss: 0.9887785911560059,grad_norm: 0.9999991099114006, iteration: 109071
loss: 1.0207561254501343,grad_norm: 0.9999992727358706, iteration: 109072
loss: 1.0712502002716064,grad_norm: 0.999999375075111, iteration: 109073
loss: 1.0433663129806519,grad_norm: 0.9999990741996632, iteration: 109074
loss: 1.0003036260604858,grad_norm: 0.9999993880185449, iteration: 109075
loss: 0.9582661986351013,grad_norm: 0.9999992329517249, iteration: 109076
loss: 0.9704100489616394,grad_norm: 0.9999991158230829, iteration: 109077
loss: 0.9833064079284668,grad_norm: 0.9465978660582759, iteration: 109078
loss: 0.9696656465530396,grad_norm: 0.9999990953698271, iteration: 109079
loss: 0.9959312677383423,grad_norm: 0.9999990116972999, iteration: 109080
loss: 0.9990547895431519,grad_norm: 0.9999990802085296, iteration: 109081
loss: 1.0147558450698853,grad_norm: 0.9999991490308551, iteration: 109082
loss: 0.9888601303100586,grad_norm: 0.9999991129734374, iteration: 109083
loss: 1.0232691764831543,grad_norm: 0.9999993417006576, iteration: 109084
loss: 1.0101534128189087,grad_norm: 0.9870084541607136, iteration: 109085
loss: 0.9870505928993225,grad_norm: 0.9999991754913766, iteration: 109086
loss: 1.009548306465149,grad_norm: 0.975219705673946, iteration: 109087
loss: 0.9864895939826965,grad_norm: 0.9970681955554199, iteration: 109088
loss: 0.976201593875885,grad_norm: 0.9999991585594633, iteration: 109089
loss: 0.9691824316978455,grad_norm: 0.9426717564523627, iteration: 109090
loss: 0.9875668883323669,grad_norm: 0.9999990467935473, iteration: 109091
loss: 1.0001673698425293,grad_norm: 0.8979091596265838, iteration: 109092
loss: 1.006307601928711,grad_norm: 0.96863559139609, iteration: 109093
loss: 1.030669093132019,grad_norm: 0.9999990811976291, iteration: 109094
loss: 0.9874177575111389,grad_norm: 0.9186848108405553, iteration: 109095
loss: 0.958324670791626,grad_norm: 0.9999992438680639, iteration: 109096
loss: 1.004095196723938,grad_norm: 0.867831123851157, iteration: 109097
loss: 1.0193172693252563,grad_norm: 0.9999991186330504, iteration: 109098
loss: 1.0474082231521606,grad_norm: 0.9999993020775654, iteration: 109099
loss: 0.9982050657272339,grad_norm: 0.9833477219721831, iteration: 109100
loss: 0.9842403531074524,grad_norm: 0.9999989793825652, iteration: 109101
loss: 0.9931268692016602,grad_norm: 0.8870863688771518, iteration: 109102
loss: 0.9748607873916626,grad_norm: 0.9999990743074011, iteration: 109103
loss: 1.0048396587371826,grad_norm: 0.9999991226490315, iteration: 109104
loss: 0.9950506091117859,grad_norm: 0.864426093320991, iteration: 109105
loss: 0.9613409042358398,grad_norm: 0.9999990419364845, iteration: 109106
loss: 1.0023794174194336,grad_norm: 0.9784067612062823, iteration: 109107
loss: 0.9751536846160889,grad_norm: 0.9999990131027348, iteration: 109108
loss: 0.9939187169075012,grad_norm: 0.9999988917502773, iteration: 109109
loss: 1.0157018899917603,grad_norm: 0.9999991952614494, iteration: 109110
loss: 0.991560697555542,grad_norm: 0.9999996854599863, iteration: 109111
loss: 1.003818392753601,grad_norm: 0.9999991026434294, iteration: 109112
loss: 1.0289934873580933,grad_norm: 0.8837295584819069, iteration: 109113
loss: 0.9713467359542847,grad_norm: 0.9999991076790227, iteration: 109114
loss: 1.0319205522537231,grad_norm: 0.9999991633649019, iteration: 109115
loss: 1.012171983718872,grad_norm: 0.8429465668440338, iteration: 109116
loss: 0.9755401015281677,grad_norm: 0.9049844968993677, iteration: 109117
loss: 0.9886313080787659,grad_norm: 0.9999992744938478, iteration: 109118
loss: 1.0440694093704224,grad_norm: 0.9706830070587273, iteration: 109119
loss: 0.9935454726219177,grad_norm: 0.9999991030111046, iteration: 109120
loss: 0.9815025925636292,grad_norm: 0.9999998143475192, iteration: 109121
loss: 1.0264568328857422,grad_norm: 0.9929112954813928, iteration: 109122
loss: 0.9882382750511169,grad_norm: 0.8711598266916785, iteration: 109123
loss: 0.9899337291717529,grad_norm: 0.9999992882169934, iteration: 109124
loss: 0.9887596964836121,grad_norm: 0.9999991931060135, iteration: 109125
loss: 1.000010371208191,grad_norm: 0.999999180724748, iteration: 109126
loss: 1.0176876783370972,grad_norm: 0.9226371724372194, iteration: 109127
loss: 1.0060955286026,grad_norm: 0.9999993021677482, iteration: 109128
loss: 0.99643474817276,grad_norm: 0.999999097282613, iteration: 109129
loss: 1.0079954862594604,grad_norm: 0.9627827803010346, iteration: 109130
loss: 1.086864709854126,grad_norm: 0.9999998045791594, iteration: 109131
loss: 1.0118173360824585,grad_norm: 0.9999991391770423, iteration: 109132
loss: 1.0172255039215088,grad_norm: 0.9578878348210879, iteration: 109133
loss: 1.0052021741867065,grad_norm: 0.9999992058041733, iteration: 109134
loss: 1.0008466243743896,grad_norm: 0.9999992623952975, iteration: 109135
loss: 1.0028016567230225,grad_norm: 0.815907449331095, iteration: 109136
loss: 0.9898330569267273,grad_norm: 0.9844175931549584, iteration: 109137
loss: 1.0186653137207031,grad_norm: 0.9999990142804365, iteration: 109138
loss: 1.0345176458358765,grad_norm: 0.9999995640805242, iteration: 109139
loss: 1.0707926750183105,grad_norm: 0.9936276316369871, iteration: 109140
loss: 0.9859941005706787,grad_norm: 0.9999991248290216, iteration: 109141
loss: 0.9898440837860107,grad_norm: 0.9999991888938567, iteration: 109142
loss: 0.9964292645454407,grad_norm: 0.9684288591965415, iteration: 109143
loss: 1.0266412496566772,grad_norm: 0.9999989928046764, iteration: 109144
loss: 1.047861933708191,grad_norm: 0.9999994565899021, iteration: 109145
loss: 0.9999935626983643,grad_norm: 0.8979442940176618, iteration: 109146
loss: 1.0190030336380005,grad_norm: 0.99999923599147, iteration: 109147
loss: 1.0749001502990723,grad_norm: 0.9999997203951683, iteration: 109148
loss: 1.0070624351501465,grad_norm: 0.9479921342000267, iteration: 109149
loss: 1.0214512348175049,grad_norm: 0.8571063436617591, iteration: 109150
loss: 0.9679887890815735,grad_norm: 0.9999996038726194, iteration: 109151
loss: 1.005112648010254,grad_norm: 0.8048525258888803, iteration: 109152
loss: 0.9751739501953125,grad_norm: 0.99999932019657, iteration: 109153
loss: 1.0051813125610352,grad_norm: 0.9999992054300436, iteration: 109154
loss: 0.99156653881073,grad_norm: 0.9999990960293749, iteration: 109155
loss: 0.9891532063484192,grad_norm: 0.9999990244468564, iteration: 109156
loss: 1.0224120616912842,grad_norm: 0.9999993062674155, iteration: 109157
loss: 0.984863817691803,grad_norm: 0.9650932626738118, iteration: 109158
loss: 1.0059987306594849,grad_norm: 0.9014961784221392, iteration: 109159
loss: 0.9793319702148438,grad_norm: 0.9999991128479487, iteration: 109160
loss: 0.9905959963798523,grad_norm: 0.9776232607179928, iteration: 109161
loss: 0.9956072568893433,grad_norm: 0.9999990873422654, iteration: 109162
loss: 0.9749935269355774,grad_norm: 0.9381098256582237, iteration: 109163
loss: 1.007378101348877,grad_norm: 0.9830697995416336, iteration: 109164
loss: 0.9653235673904419,grad_norm: 0.993434211038039, iteration: 109165
loss: 0.9619583487510681,grad_norm: 0.8882645099260723, iteration: 109166
loss: 0.9933224320411682,grad_norm: 0.8916344053261571, iteration: 109167
loss: 0.9564001560211182,grad_norm: 0.9501981562102965, iteration: 109168
loss: 0.990128755569458,grad_norm: 0.9391343229353722, iteration: 109169
loss: 1.0100122690200806,grad_norm: 0.9999991795923434, iteration: 109170
loss: 0.9934669137001038,grad_norm: 0.9383458248678009, iteration: 109171
loss: 0.976285457611084,grad_norm: 0.9430613763344822, iteration: 109172
loss: 0.9483705163002014,grad_norm: 0.999999187738271, iteration: 109173
loss: 1.007933259010315,grad_norm: 0.9999991124838422, iteration: 109174
loss: 1.1688957214355469,grad_norm: 0.9999998704219758, iteration: 109175
loss: 1.0265769958496094,grad_norm: 0.9999990264045291, iteration: 109176
loss: 0.9500536918640137,grad_norm: 0.9999990695514587, iteration: 109177
loss: 0.9985416531562805,grad_norm: 0.9999990583346514, iteration: 109178
loss: 1.0020015239715576,grad_norm: 0.9999991350933977, iteration: 109179
loss: 1.0092517137527466,grad_norm: 0.9320960080097538, iteration: 109180
loss: 0.971827507019043,grad_norm: 0.9999991938896082, iteration: 109181
loss: 1.0321341753005981,grad_norm: 0.9999992529087754, iteration: 109182
loss: 0.9966648817062378,grad_norm: 0.9831732847507594, iteration: 109183
loss: 0.9874605536460876,grad_norm: 0.9693606427262317, iteration: 109184
loss: 0.9964873194694519,grad_norm: 0.9999991979913845, iteration: 109185
loss: 0.9861679673194885,grad_norm: 0.9215865444348379, iteration: 109186
loss: 0.9911280274391174,grad_norm: 0.9534702913461668, iteration: 109187
loss: 0.9828596711158752,grad_norm: 0.9999991165050011, iteration: 109188
loss: 1.0074803829193115,grad_norm: 0.9999991760663989, iteration: 109189
loss: 1.0199692249298096,grad_norm: 0.999999181758926, iteration: 109190
loss: 0.9652978181838989,grad_norm: 0.999999104999139, iteration: 109191
loss: 1.017892837524414,grad_norm: 0.9999991763504754, iteration: 109192
loss: 0.9846301674842834,grad_norm: 0.9999991978373831, iteration: 109193
loss: 0.9551219940185547,grad_norm: 0.9325598858687576, iteration: 109194
loss: 1.0076823234558105,grad_norm: 0.9913648582990383, iteration: 109195
loss: 0.9739091992378235,grad_norm: 0.9999990849140369, iteration: 109196
loss: 0.9905979633331299,grad_norm: 0.9991423056075728, iteration: 109197
loss: 1.0109565258026123,grad_norm: 0.9999991921417524, iteration: 109198
loss: 1.019383192062378,grad_norm: 0.9999991206454936, iteration: 109199
loss: 0.9996687173843384,grad_norm: 0.9999991689093931, iteration: 109200
loss: 0.9720786809921265,grad_norm: 0.9999991762325466, iteration: 109201
loss: 0.9667927622795105,grad_norm: 0.9999991481815933, iteration: 109202
loss: 1.0423299074172974,grad_norm: 0.9963998102260326, iteration: 109203
loss: 0.9815670847892761,grad_norm: 0.9999993055949342, iteration: 109204
loss: 1.0098021030426025,grad_norm: 0.9999992372393482, iteration: 109205
loss: 0.9977161288261414,grad_norm: 0.9999992044485133, iteration: 109206
loss: 0.9727649092674255,grad_norm: 0.9918951798205189, iteration: 109207
loss: 0.9953507781028748,grad_norm: 0.8836556419863797, iteration: 109208
loss: 1.0291078090667725,grad_norm: 0.9515755757380072, iteration: 109209
loss: 0.990203857421875,grad_norm: 0.9928455421937828, iteration: 109210
loss: 0.9918604493141174,grad_norm: 0.9999997601097391, iteration: 109211
loss: 0.9875542521476746,grad_norm: 0.9581730619988487, iteration: 109212
loss: 0.9827031493186951,grad_norm: 0.9097843189305008, iteration: 109213
loss: 0.983117401599884,grad_norm: 0.9999990810332742, iteration: 109214
loss: 1.0209476947784424,grad_norm: 0.9999992982942191, iteration: 109215
loss: 0.9845319390296936,grad_norm: 0.9999990267552479, iteration: 109216
loss: 1.0060173273086548,grad_norm: 0.9148535198435981, iteration: 109217
loss: 1.0038189888000488,grad_norm: 0.9999991672264247, iteration: 109218
loss: 1.0029088258743286,grad_norm: 0.910596730787985, iteration: 109219
loss: 0.9834094643592834,grad_norm: 0.9999990795298158, iteration: 109220
loss: 1.0085536241531372,grad_norm: 0.9153859507874316, iteration: 109221
loss: 0.9728938341140747,grad_norm: 0.9999990595214993, iteration: 109222
loss: 1.0068669319152832,grad_norm: 0.9999991196652249, iteration: 109223
loss: 0.9510217905044556,grad_norm: 0.9999992905881412, iteration: 109224
loss: 0.9877778887748718,grad_norm: 0.999999161496662, iteration: 109225
loss: 1.0188244581222534,grad_norm: 0.9999992870661293, iteration: 109226
loss: 1.0265125036239624,grad_norm: 0.9999991484434996, iteration: 109227
loss: 1.0193346738815308,grad_norm: 0.8714804233384501, iteration: 109228
loss: 1.0082272291183472,grad_norm: 0.999999066900928, iteration: 109229
loss: 0.9889228940010071,grad_norm: 0.9999990558632132, iteration: 109230
loss: 1.0243834257125854,grad_norm: 0.8972215606580384, iteration: 109231
loss: 1.000681757926941,grad_norm: 0.96766269748314, iteration: 109232
loss: 1.0140101909637451,grad_norm: 0.9999993065434657, iteration: 109233
loss: 1.0092369318008423,grad_norm: 0.9999990058212573, iteration: 109234
loss: 0.95066899061203,grad_norm: 0.9999989979001346, iteration: 109235
loss: 0.9664921164512634,grad_norm: 0.999998858246966, iteration: 109236
loss: 0.9935831427574158,grad_norm: 0.9999989882732638, iteration: 109237
loss: 1.0306998491287231,grad_norm: 0.9999992473025091, iteration: 109238
loss: 0.9944786429405212,grad_norm: 0.9283498709152095, iteration: 109239
loss: 0.9991943836212158,grad_norm: 0.9999992081820389, iteration: 109240
loss: 0.9774160385131836,grad_norm: 0.9999991047356662, iteration: 109241
loss: 1.0145642757415771,grad_norm: 0.9999994130307943, iteration: 109242
loss: 0.9931637644767761,grad_norm: 0.9730018562983765, iteration: 109243
loss: 0.9665647149085999,grad_norm: 0.8700059421428383, iteration: 109244
loss: 0.9877511262893677,grad_norm: 0.9999992142949399, iteration: 109245
loss: 1.0322037935256958,grad_norm: 0.9999995985105494, iteration: 109246
loss: 1.0142408609390259,grad_norm: 0.9999992144221777, iteration: 109247
loss: 0.977303683757782,grad_norm: 0.8708410737012894, iteration: 109248
loss: 0.9443403482437134,grad_norm: 0.9999991493093154, iteration: 109249
loss: 0.96884685754776,grad_norm: 0.9559398333196428, iteration: 109250
loss: 1.0126981735229492,grad_norm: 0.9999990992691035, iteration: 109251
loss: 0.9679650664329529,grad_norm: 0.9999990405355956, iteration: 109252
loss: 1.024821400642395,grad_norm: 0.9999991575635343, iteration: 109253
loss: 1.1425671577453613,grad_norm: 0.9999996938653557, iteration: 109254
loss: 1.0149705410003662,grad_norm: 0.9999993231366247, iteration: 109255
loss: 1.003757119178772,grad_norm: 0.8011810874919367, iteration: 109256
loss: 1.012276291847229,grad_norm: 0.9999990712858993, iteration: 109257
loss: 1.0301709175109863,grad_norm: 0.961088767819813, iteration: 109258
loss: 1.0015581846237183,grad_norm: 0.9721343386628625, iteration: 109259
loss: 0.9974135160446167,grad_norm: 0.9999991282936874, iteration: 109260
loss: 0.9590979814529419,grad_norm: 0.9057037968107671, iteration: 109261
loss: 1.0116099119186401,grad_norm: 0.9999991827977734, iteration: 109262
loss: 0.9609671235084534,grad_norm: 0.9999991162495464, iteration: 109263
loss: 1.0235543251037598,grad_norm: 0.9999992076739507, iteration: 109264
loss: 1.0388472080230713,grad_norm: 0.9999990627701043, iteration: 109265
loss: 1.0269041061401367,grad_norm: 0.9999994433209898, iteration: 109266
loss: 0.9918361306190491,grad_norm: 0.9999991034045613, iteration: 109267
loss: 0.9884002804756165,grad_norm: 0.9266516428473706, iteration: 109268
loss: 1.0129411220550537,grad_norm: 0.9999991601342312, iteration: 109269
loss: 0.9958949685096741,grad_norm: 0.8705366662476873, iteration: 109270
loss: 0.9866570234298706,grad_norm: 0.9999990193592824, iteration: 109271
loss: 0.9678078889846802,grad_norm: 0.9999992256835168, iteration: 109272
loss: 1.0223675966262817,grad_norm: 0.9999992195116948, iteration: 109273
loss: 0.9892323613166809,grad_norm: 0.9999991345163743, iteration: 109274
loss: 0.9837673902511597,grad_norm: 0.9967057572989757, iteration: 109275
loss: 0.9989531636238098,grad_norm: 0.9999993440636464, iteration: 109276
loss: 1.019525170326233,grad_norm: 0.9466655188371826, iteration: 109277
loss: 0.9915500283241272,grad_norm: 0.9856459894865818, iteration: 109278
loss: 1.0119096040725708,grad_norm: 0.9999992402100785, iteration: 109279
loss: 1.0363962650299072,grad_norm: 0.9382863434094446, iteration: 109280
loss: 0.9980250000953674,grad_norm: 0.9439925077857871, iteration: 109281
loss: 1.0100935697555542,grad_norm: 0.9999992892025334, iteration: 109282
loss: 0.9850484728813171,grad_norm: 0.9999992287917686, iteration: 109283
loss: 1.0270673036575317,grad_norm: 0.9999991097392463, iteration: 109284
loss: 0.9911613464355469,grad_norm: 0.9999991974336057, iteration: 109285
loss: 0.996795654296875,grad_norm: 0.8814687582129258, iteration: 109286
loss: 0.9985501170158386,grad_norm: 0.9118639084868555, iteration: 109287
loss: 1.0083736181259155,grad_norm: 0.9999992254341276, iteration: 109288
loss: 0.9659909605979919,grad_norm: 0.9999992519494108, iteration: 109289
loss: 0.9962109327316284,grad_norm: 0.9863953107974296, iteration: 109290
loss: 0.9606752395629883,grad_norm: 0.958618463727475, iteration: 109291
loss: 1.0298242568969727,grad_norm: 0.9342657135988606, iteration: 109292
loss: 1.0327439308166504,grad_norm: 0.9999989810125111, iteration: 109293
loss: 0.966256320476532,grad_norm: 0.999999319427375, iteration: 109294
loss: 0.993979811668396,grad_norm: 0.9999992442084894, iteration: 109295
loss: 0.9880733489990234,grad_norm: 0.8937101396093092, iteration: 109296
loss: 0.9835193157196045,grad_norm: 0.9999992001155222, iteration: 109297
loss: 0.9377557039260864,grad_norm: 0.944191192616081, iteration: 109298
loss: 0.9974791407585144,grad_norm: 0.8379945356425876, iteration: 109299
loss: 0.9943810701370239,grad_norm: 0.9999990562021592, iteration: 109300
loss: 1.1067519187927246,grad_norm: 0.9999992085343005, iteration: 109301
loss: 1.0476011037826538,grad_norm: 0.999999198553511, iteration: 109302
loss: 0.9914647936820984,grad_norm: 0.8927327115623799, iteration: 109303
loss: 1.0042613744735718,grad_norm: 0.9999992912197652, iteration: 109304
loss: 1.005754828453064,grad_norm: 0.9999992803769588, iteration: 109305
loss: 1.0113916397094727,grad_norm: 0.999998983793566, iteration: 109306
loss: 0.9888224601745605,grad_norm: 0.9889984178047979, iteration: 109307
loss: 0.9772242307662964,grad_norm: 0.9033202060014274, iteration: 109308
loss: 0.9867137670516968,grad_norm: 0.9999992167904799, iteration: 109309
loss: 1.0098844766616821,grad_norm: 0.9999991275805609, iteration: 109310
loss: 1.0179804563522339,grad_norm: 0.9428146170663145, iteration: 109311
loss: 0.9886890649795532,grad_norm: 0.9999991552441647, iteration: 109312
loss: 0.9707391858100891,grad_norm: 0.9985496657430628, iteration: 109313
loss: 1.0007189512252808,grad_norm: 0.9999989507512133, iteration: 109314
loss: 1.015609622001648,grad_norm: 0.949775640377649, iteration: 109315
loss: 1.0290031433105469,grad_norm: 0.9999993160817123, iteration: 109316
loss: 1.0234750509262085,grad_norm: 0.9999991261328899, iteration: 109317
loss: 1.0119130611419678,grad_norm: 0.9999991563622553, iteration: 109318
loss: 1.011321783065796,grad_norm: 0.9999992119585043, iteration: 109319
loss: 0.9948474168777466,grad_norm: 0.9999991216325884, iteration: 109320
loss: 0.9876925349235535,grad_norm: 0.9999989839298945, iteration: 109321
loss: 0.9549182653427124,grad_norm: 0.992827227885119, iteration: 109322
loss: 0.9932603240013123,grad_norm: 0.9999991052455793, iteration: 109323
loss: 0.9841611385345459,grad_norm: 0.9265683166885695, iteration: 109324
loss: 1.0037654638290405,grad_norm: 0.9813946494806662, iteration: 109325
loss: 1.0201324224472046,grad_norm: 0.9999991729641436, iteration: 109326
loss: 0.9587965607643127,grad_norm: 0.9999992664430696, iteration: 109327
loss: 1.009049654006958,grad_norm: 0.999999103339538, iteration: 109328
loss: 1.000234603881836,grad_norm: 0.9999992025949626, iteration: 109329
loss: 0.9801768660545349,grad_norm: 0.9284146439713509, iteration: 109330
loss: 1.0244770050048828,grad_norm: 0.9670686267730695, iteration: 109331
loss: 0.978346586227417,grad_norm: 0.9999992116738619, iteration: 109332
loss: 1.0210299491882324,grad_norm: 0.9999991070461932, iteration: 109333
loss: 0.9768586754798889,grad_norm: 0.9034735554274241, iteration: 109334
loss: 1.0181481838226318,grad_norm: 0.9999992658964223, iteration: 109335
loss: 0.9731050729751587,grad_norm: 0.9999991709615824, iteration: 109336
loss: 0.9996298551559448,grad_norm: 0.9999991602296587, iteration: 109337
loss: 0.9934541583061218,grad_norm: 0.9590183945101687, iteration: 109338
loss: 1.0476480722427368,grad_norm: 0.967844949739699, iteration: 109339
loss: 0.9828222990036011,grad_norm: 0.9999990724588342, iteration: 109340
loss: 1.0078561305999756,grad_norm: 0.8933415157890726, iteration: 109341
loss: 1.0199484825134277,grad_norm: 0.9999993218443213, iteration: 109342
loss: 1.0452375411987305,grad_norm: 0.9999996432665451, iteration: 109343
loss: 1.003368854522705,grad_norm: 0.9999991488122496, iteration: 109344
loss: 1.004472017288208,grad_norm: 0.9999991612418825, iteration: 109345
loss: 0.9868329167366028,grad_norm: 0.9999993430465945, iteration: 109346
loss: 1.0212125778198242,grad_norm: 0.9385496466209412, iteration: 109347
loss: 1.0201963186264038,grad_norm: 0.9999991947914902, iteration: 109348
loss: 1.0097570419311523,grad_norm: 0.9185878334119942, iteration: 109349
loss: 0.9868542551994324,grad_norm: 0.9317239394665575, iteration: 109350
loss: 0.9713696241378784,grad_norm: 0.9999991419519465, iteration: 109351
loss: 0.978753387928009,grad_norm: 0.9435422122628344, iteration: 109352
loss: 0.9826650619506836,grad_norm: 0.9999992466955313, iteration: 109353
loss: 1.0109435319900513,grad_norm: 0.9723330274618502, iteration: 109354
loss: 0.9900833368301392,grad_norm: 0.9271630990078992, iteration: 109355
loss: 1.0077427625656128,grad_norm: 0.9999991447662551, iteration: 109356
loss: 0.9892810583114624,grad_norm: 0.9671116452526912, iteration: 109357
loss: 0.997093141078949,grad_norm: 0.9896839950289145, iteration: 109358
loss: 0.9823740124702454,grad_norm: 0.960005078822178, iteration: 109359
loss: 1.0978412628173828,grad_norm: 0.9999992899432683, iteration: 109360
loss: 1.0262935161590576,grad_norm: 0.9999995566938721, iteration: 109361
loss: 0.9677855968475342,grad_norm: 0.9999991884159025, iteration: 109362
loss: 0.9775984287261963,grad_norm: 0.9999989725073597, iteration: 109363
loss: 1.0326470136642456,grad_norm: 0.9999991820937892, iteration: 109364
loss: 1.0239441394805908,grad_norm: 0.9999990788705294, iteration: 109365
loss: 0.9829662442207336,grad_norm: 0.8423351627525507, iteration: 109366
loss: 0.9564533233642578,grad_norm: 0.9194526020116784, iteration: 109367
loss: 1.0334300994873047,grad_norm: 0.8274462710168909, iteration: 109368
loss: 0.9530385732650757,grad_norm: 0.9990929249375854, iteration: 109369
loss: 0.9922048449516296,grad_norm: 0.999999287658102, iteration: 109370
loss: 1.0047520399093628,grad_norm: 0.9108052860630359, iteration: 109371
loss: 0.9594200253486633,grad_norm: 0.9999989880169067, iteration: 109372
loss: 1.0349313020706177,grad_norm: 0.9999990494028346, iteration: 109373
loss: 1.0100957155227661,grad_norm: 0.9999992313832347, iteration: 109374
loss: 0.9735387563705444,grad_norm: 0.9999990803752623, iteration: 109375
loss: 0.9806491136550903,grad_norm: 0.9494151703132513, iteration: 109376
loss: 0.9980682730674744,grad_norm: 0.9145678496421982, iteration: 109377
loss: 1.0185909271240234,grad_norm: 0.9999992639552371, iteration: 109378
loss: 0.9858988523483276,grad_norm: 0.999999212674731, iteration: 109379
loss: 0.9819273352622986,grad_norm: 0.9999990036783635, iteration: 109380
loss: 0.9820699691772461,grad_norm: 0.9999989955235756, iteration: 109381
loss: 0.9742689728736877,grad_norm: 0.9999990837211434, iteration: 109382
loss: 1.0130637884140015,grad_norm: 0.9658106476922234, iteration: 109383
loss: 0.9751310348510742,grad_norm: 0.9679656272180673, iteration: 109384
loss: 0.9596512317657471,grad_norm: 0.9999992558938079, iteration: 109385
loss: 1.0071375370025635,grad_norm: 0.9999990308141984, iteration: 109386
loss: 1.0366873741149902,grad_norm: 0.9999992221191227, iteration: 109387
loss: 1.0369396209716797,grad_norm: 0.999999050584815, iteration: 109388
loss: 1.011855125427246,grad_norm: 0.9999992334961997, iteration: 109389
loss: 1.0007383823394775,grad_norm: 0.8984066675871184, iteration: 109390
loss: 0.9939952492713928,grad_norm: 0.999999083101825, iteration: 109391
loss: 1.0252361297607422,grad_norm: 0.9999990602058875, iteration: 109392
loss: 0.9620612263679504,grad_norm: 0.9315604288622643, iteration: 109393
loss: 1.0134997367858887,grad_norm: 0.9445467205383306, iteration: 109394
loss: 0.9915005564689636,grad_norm: 0.9999991008407432, iteration: 109395
loss: 1.0188881158828735,grad_norm: 0.9514373653164225, iteration: 109396
loss: 1.0226171016693115,grad_norm: 0.9999997527975351, iteration: 109397
loss: 0.9853919148445129,grad_norm: 0.9999990580427369, iteration: 109398
loss: 0.9905708432197571,grad_norm: 0.9999992477823954, iteration: 109399
loss: 1.01499605178833,grad_norm: 0.7984107592283753, iteration: 109400
loss: 0.9823939204216003,grad_norm: 0.9999993431995652, iteration: 109401
loss: 0.9795710444450378,grad_norm: 0.9999994955122945, iteration: 109402
loss: 0.9951344728469849,grad_norm: 0.9999991719920218, iteration: 109403
loss: 0.9781613349914551,grad_norm: 0.9999992325782381, iteration: 109404
loss: 0.9777904152870178,grad_norm: 0.9099348710647038, iteration: 109405
loss: 1.0325496196746826,grad_norm: 0.9999990753163931, iteration: 109406
loss: 0.986763060092926,grad_norm: 0.862062753340291, iteration: 109407
loss: 0.9818404912948608,grad_norm: 0.9861983428428991, iteration: 109408
loss: 0.9995580315589905,grad_norm: 0.9999992160478569, iteration: 109409
loss: 1.0057225227355957,grad_norm: 0.9999996930820321, iteration: 109410
loss: 1.008676528930664,grad_norm: 0.9756015145034255, iteration: 109411
loss: 1.097103476524353,grad_norm: 0.9999998440274515, iteration: 109412
loss: 1.0025652647018433,grad_norm: 0.8644039992204816, iteration: 109413
loss: 0.9876220226287842,grad_norm: 0.9593991422475957, iteration: 109414
loss: 0.9910019636154175,grad_norm: 0.9697223921340957, iteration: 109415
loss: 1.0135383605957031,grad_norm: 0.7685299064307024, iteration: 109416
loss: 0.9750345349311829,grad_norm: 0.8720163220000942, iteration: 109417
loss: 1.0054295063018799,grad_norm: 0.9999992624712881, iteration: 109418
loss: 1.0096702575683594,grad_norm: 0.9551336638137911, iteration: 109419
loss: 1.01621675491333,grad_norm: 0.7282741206158571, iteration: 109420
loss: 1.0168542861938477,grad_norm: 0.9999991549805535, iteration: 109421
loss: 1.0019129514694214,grad_norm: 0.9999991234989788, iteration: 109422
loss: 1.0168012380599976,grad_norm: 0.9999990054655825, iteration: 109423
loss: 0.9970951080322266,grad_norm: 0.9999998669476493, iteration: 109424
loss: 1.000397801399231,grad_norm: 0.9999989731639897, iteration: 109425
loss: 1.0684064626693726,grad_norm: 0.9999992179682365, iteration: 109426
loss: 0.9989434480667114,grad_norm: 0.9993506324448514, iteration: 109427
loss: 1.0818336009979248,grad_norm: 0.9999999330112697, iteration: 109428
loss: 0.98576420545578,grad_norm: 0.8556384026638117, iteration: 109429
loss: 1.014512538909912,grad_norm: 0.9999991312142076, iteration: 109430
loss: 0.9987651705741882,grad_norm: 0.8794379061547243, iteration: 109431
loss: 1.0255073308944702,grad_norm: 0.9280838236092033, iteration: 109432
loss: 0.9706521034240723,grad_norm: 0.9530020825173783, iteration: 109433
loss: 1.0923027992248535,grad_norm: 0.9999997371788605, iteration: 109434
loss: 1.1660581827163696,grad_norm: 0.9999996539155379, iteration: 109435
loss: 0.9999240636825562,grad_norm: 0.999999208073349, iteration: 109436
loss: 1.0157604217529297,grad_norm: 0.9999989872766476, iteration: 109437
loss: 1.1221472024917603,grad_norm: 0.9999994782044738, iteration: 109438
loss: 1.074235439300537,grad_norm: 0.9999992922062956, iteration: 109439
loss: 0.9759733080863953,grad_norm: 0.9999993393009242, iteration: 109440
loss: 1.002426266670227,grad_norm: 0.9999993164871216, iteration: 109441
loss: 0.9653815627098083,grad_norm: 0.9999992330160902, iteration: 109442
loss: 1.1625168323516846,grad_norm: 1.0000000048839677, iteration: 109443
loss: 1.0104076862335205,grad_norm: 0.9999991254087706, iteration: 109444
loss: 0.9886360168457031,grad_norm: 0.9999995635845532, iteration: 109445
loss: 1.0306857824325562,grad_norm: 0.9999990707299397, iteration: 109446
loss: 1.0327131748199463,grad_norm: 0.9906112507857564, iteration: 109447
loss: 0.9946885108947754,grad_norm: 0.9538312823265656, iteration: 109448
loss: 0.9984927773475647,grad_norm: 0.9999991752996034, iteration: 109449
loss: 1.004704236984253,grad_norm: 0.9999990828358944, iteration: 109450
loss: 1.013231873512268,grad_norm: 0.9999990686980984, iteration: 109451
loss: 0.9915076494216919,grad_norm: 0.9999991925982374, iteration: 109452
loss: 1.0098519325256348,grad_norm: 0.9886600412728361, iteration: 109453
loss: 0.9874075055122375,grad_norm: 0.9867971346795249, iteration: 109454
loss: 0.9775192141532898,grad_norm: 0.9936804455728728, iteration: 109455
loss: 1.0006853342056274,grad_norm: 0.9999990659114223, iteration: 109456
loss: 1.0526182651519775,grad_norm: 0.9999990624597724, iteration: 109457
loss: 1.0046409368515015,grad_norm: 0.9999990562300574, iteration: 109458
loss: 0.9859804511070251,grad_norm: 0.9999989725707492, iteration: 109459
loss: 1.0161044597625732,grad_norm: 0.9995240979946488, iteration: 109460
loss: 1.0337965488433838,grad_norm: 0.9999994800985461, iteration: 109461
loss: 1.0412733554840088,grad_norm: 0.9999992499561743, iteration: 109462
loss: 0.9678760170936584,grad_norm: 0.8732637939685236, iteration: 109463
loss: 0.9924585223197937,grad_norm: 0.9999991380363241, iteration: 109464
loss: 0.9744266271591187,grad_norm: 0.9999991845411348, iteration: 109465
loss: 1.087878704071045,grad_norm: 0.9999992304864735, iteration: 109466
loss: 0.9954668879508972,grad_norm: 0.9999991497444324, iteration: 109467
loss: 0.9486895203590393,grad_norm: 0.9999990415551769, iteration: 109468
loss: 1.024656891822815,grad_norm: 0.9999995166818133, iteration: 109469
loss: 1.023704171180725,grad_norm: 0.9999992093509619, iteration: 109470
loss: 0.9962772130966187,grad_norm: 0.9999991987366348, iteration: 109471
loss: 1.0426092147827148,grad_norm: 0.9999990490859307, iteration: 109472
loss: 0.9797201752662659,grad_norm: 0.999998980951074, iteration: 109473
loss: 1.0240675210952759,grad_norm: 0.9999994195605788, iteration: 109474
loss: 0.9714255332946777,grad_norm: 0.9999990808938857, iteration: 109475
loss: 1.0091156959533691,grad_norm: 0.9999990553316211, iteration: 109476
loss: 0.9963726997375488,grad_norm: 0.9999991330944615, iteration: 109477
loss: 0.9680870175361633,grad_norm: 0.9999990848193195, iteration: 109478
loss: 1.0464777946472168,grad_norm: 0.9999990732724349, iteration: 109479
loss: 1.0269378423690796,grad_norm: 0.8457320522826005, iteration: 109480
loss: 1.0223145484924316,grad_norm: 0.9999991152081694, iteration: 109481
loss: 1.0373691320419312,grad_norm: 0.9999991698525816, iteration: 109482
loss: 1.0147666931152344,grad_norm: 0.9999989909553554, iteration: 109483
loss: 1.0458048582077026,grad_norm: 0.9999990813476319, iteration: 109484
loss: 0.9944830536842346,grad_norm: 0.9999989724726241, iteration: 109485
loss: 1.025827169418335,grad_norm: 0.9539530839468557, iteration: 109486
loss: 1.0135859251022339,grad_norm: 0.999999123511646, iteration: 109487
loss: 0.9559515714645386,grad_norm: 0.9999991619606702, iteration: 109488
loss: 0.9706523418426514,grad_norm: 0.9999991120916578, iteration: 109489
loss: 0.9835664629936218,grad_norm: 0.9999991418787293, iteration: 109490
loss: 1.0133558511734009,grad_norm: 0.9999992928182071, iteration: 109491
loss: 1.0464304685592651,grad_norm: 0.9999990127297643, iteration: 109492
loss: 1.0010236501693726,grad_norm: 0.9999992615229595, iteration: 109493
loss: 0.9708229899406433,grad_norm: 0.9999991438352787, iteration: 109494
loss: 0.9998800158500671,grad_norm: 0.9999991658396143, iteration: 109495
loss: 1.0508836507797241,grad_norm: 0.9287202580746083, iteration: 109496
loss: 0.9749084711074829,grad_norm: 0.9999990198334727, iteration: 109497
loss: 0.9820266962051392,grad_norm: 0.9999992631977396, iteration: 109498
loss: 1.0668649673461914,grad_norm: 0.999999149918109, iteration: 109499
loss: 1.0601145029067993,grad_norm: 0.9999997720016917, iteration: 109500
loss: 1.0250581502914429,grad_norm: 0.9150785370770582, iteration: 109501
loss: 0.9940564036369324,grad_norm: 0.94289681220383, iteration: 109502
loss: 0.988654375076294,grad_norm: 0.9999991333237332, iteration: 109503
loss: 0.9659609198570251,grad_norm: 0.9415098721086053, iteration: 109504
loss: 0.990843653678894,grad_norm: 0.9805832785411593, iteration: 109505
loss: 1.0034176111221313,grad_norm: 0.9999991931031046, iteration: 109506
loss: 0.9720937609672546,grad_norm: 0.8999811250632375, iteration: 109507
loss: 1.0078511238098145,grad_norm: 0.9800834828197463, iteration: 109508
loss: 1.0122299194335938,grad_norm: 0.9999995983852192, iteration: 109509
loss: 1.0530263185501099,grad_norm: 0.9475742541250068, iteration: 109510
loss: 0.9601999521255493,grad_norm: 0.9636023341458133, iteration: 109511
loss: 0.9948470592498779,grad_norm: 0.9676427085628336, iteration: 109512
loss: 1.0163977146148682,grad_norm: 0.9999991877431887, iteration: 109513
loss: 1.0136443376541138,grad_norm: 0.9999990389008311, iteration: 109514
loss: 1.015606164932251,grad_norm: 0.9999991620235695, iteration: 109515
loss: 0.9608867764472961,grad_norm: 0.999998993738621, iteration: 109516
loss: 0.976553738117218,grad_norm: 0.9530556021899514, iteration: 109517
loss: 0.9833940267562866,grad_norm: 0.9999992096924706, iteration: 109518
loss: 1.0041255950927734,grad_norm: 0.9999990996616716, iteration: 109519
loss: 1.0089257955551147,grad_norm: 0.9999991248904139, iteration: 109520
loss: 0.9992485642433167,grad_norm: 0.999999261101965, iteration: 109521
loss: 1.0218584537506104,grad_norm: 0.9999992922095988, iteration: 109522
loss: 0.9924842715263367,grad_norm: 0.9999991695290593, iteration: 109523
loss: 1.0083106756210327,grad_norm: 0.8813665879696795, iteration: 109524
loss: 0.9830167293548584,grad_norm: 0.9999991006349405, iteration: 109525
loss: 0.9672536849975586,grad_norm: 0.9729843747082232, iteration: 109526
loss: 1.0206024646759033,grad_norm: 0.9999991530788924, iteration: 109527
loss: 1.0253933668136597,grad_norm: 0.9999990263131505, iteration: 109528
loss: 0.9805517792701721,grad_norm: 0.9999992046085828, iteration: 109529
loss: 1.0060893297195435,grad_norm: 0.9868457920977229, iteration: 109530
loss: 0.9945772886276245,grad_norm: 0.9759181949753213, iteration: 109531
loss: 0.9932743906974792,grad_norm: 0.999999181261793, iteration: 109532
loss: 0.9840238094329834,grad_norm: 0.9498307824653532, iteration: 109533
loss: 1.0202124118804932,grad_norm: 0.9999991179388984, iteration: 109534
loss: 1.0098261833190918,grad_norm: 0.9595851053497315, iteration: 109535
loss: 1.0198382139205933,grad_norm: 0.912729239960286, iteration: 109536
loss: 0.9978251457214355,grad_norm: 0.9999990438300987, iteration: 109537
loss: 1.0362356901168823,grad_norm: 0.9999990341480923, iteration: 109538
loss: 0.9973000884056091,grad_norm: 0.9999992202123654, iteration: 109539
loss: 1.0281546115875244,grad_norm: 0.9999996781706891, iteration: 109540
loss: 1.0142500400543213,grad_norm: 0.8812126425076031, iteration: 109541
loss: 0.9910845756530762,grad_norm: 0.9113718521992058, iteration: 109542
loss: 1.00307297706604,grad_norm: 0.9999990209623526, iteration: 109543
loss: 1.0090947151184082,grad_norm: 0.9999993070268168, iteration: 109544
loss: 0.990251898765564,grad_norm: 0.9535766065768189, iteration: 109545
loss: 0.9740695357322693,grad_norm: 0.8766560011607328, iteration: 109546
loss: 1.0362155437469482,grad_norm: 0.9999998974150276, iteration: 109547
loss: 0.9965125918388367,grad_norm: 0.999999214272032, iteration: 109548
loss: 1.0213360786437988,grad_norm: 0.99999908098821, iteration: 109549
loss: 1.0178802013397217,grad_norm: 0.9999993032014104, iteration: 109550
loss: 0.998748779296875,grad_norm: 0.9999991717602058, iteration: 109551
loss: 0.9903990030288696,grad_norm: 0.9799211699225711, iteration: 109552
loss: 0.9844993948936462,grad_norm: 0.9999995869136705, iteration: 109553
loss: 1.0101723670959473,grad_norm: 0.9579310760106893, iteration: 109554
loss: 0.9952630996704102,grad_norm: 0.933257577162626, iteration: 109555
loss: 1.0283410549163818,grad_norm: 0.9999990723576101, iteration: 109556
loss: 0.9964407086372375,grad_norm: 0.9999996657461643, iteration: 109557
loss: 0.9823188185691833,grad_norm: 0.9723978415441807, iteration: 109558
loss: 0.9988808035850525,grad_norm: 0.9999991283958726, iteration: 109559
loss: 1.0042771100997925,grad_norm: 0.9999992562941681, iteration: 109560
loss: 1.0641462802886963,grad_norm: 0.9999996182029065, iteration: 109561
loss: 1.1552199125289917,grad_norm: 0.9999999453446444, iteration: 109562
loss: 0.9925453066825867,grad_norm: 0.9999990459036905, iteration: 109563
loss: 1.0050569772720337,grad_norm: 0.9937736145098993, iteration: 109564
loss: 0.9852831363677979,grad_norm: 0.9999991244793252, iteration: 109565
loss: 1.0122400522232056,grad_norm: 0.9999990816744702, iteration: 109566
loss: 0.990088701248169,grad_norm: 0.9999990339903414, iteration: 109567
loss: 0.9940831661224365,grad_norm: 0.9999991641135856, iteration: 109568
loss: 1.0056618452072144,grad_norm: 0.999999253507844, iteration: 109569
loss: 1.066301941871643,grad_norm: 0.9999991797789134, iteration: 109570
loss: 0.9885197877883911,grad_norm: 0.9239211391962239, iteration: 109571
loss: 0.9794202446937561,grad_norm: 0.9999991282148574, iteration: 109572
loss: 1.0149562358856201,grad_norm: 0.999999116997673, iteration: 109573
loss: 1.174393892288208,grad_norm: 0.9999996048129178, iteration: 109574
loss: 1.0077670812606812,grad_norm: 0.9999992242189113, iteration: 109575
loss: 1.0206788778305054,grad_norm: 0.8741920259442445, iteration: 109576
loss: 1.0677775144577026,grad_norm: 0.999999162826881, iteration: 109577
loss: 1.0227084159851074,grad_norm: 0.9954339884292671, iteration: 109578
loss: 1.0111416578292847,grad_norm: 0.9999993136958604, iteration: 109579
loss: 0.9953190684318542,grad_norm: 0.9999992988849749, iteration: 109580
loss: 1.01853346824646,grad_norm: 0.9763858659428564, iteration: 109581
loss: 1.0056103467941284,grad_norm: 0.9661351241701599, iteration: 109582
loss: 0.9894173741340637,grad_norm: 0.999999075936215, iteration: 109583
loss: 1.0002374649047852,grad_norm: 0.9999998243526168, iteration: 109584
loss: 1.0370391607284546,grad_norm: 0.9999993430670189, iteration: 109585
loss: 0.9852845668792725,grad_norm: 0.9999991042905514, iteration: 109586
loss: 1.0354512929916382,grad_norm: 0.9999994340223717, iteration: 109587
loss: 0.9656690359115601,grad_norm: 0.9999993680686766, iteration: 109588
loss: 0.9925782084465027,grad_norm: 0.9944588492434783, iteration: 109589
loss: 1.0166205167770386,grad_norm: 0.9877560541484355, iteration: 109590
loss: 0.988078236579895,grad_norm: 0.9280182555996431, iteration: 109591
loss: 0.9742273092269897,grad_norm: 0.9999991519219715, iteration: 109592
loss: 1.0039392709732056,grad_norm: 0.9999992105289254, iteration: 109593
loss: 1.0387219190597534,grad_norm: 0.9999991062031224, iteration: 109594
loss: 1.024104118347168,grad_norm: 0.9673732621927565, iteration: 109595
loss: 0.986686646938324,grad_norm: 0.9999993400877147, iteration: 109596
loss: 0.966948926448822,grad_norm: 0.9999992693651619, iteration: 109597
loss: 0.998218297958374,grad_norm: 0.9999990463787944, iteration: 109598
loss: 0.993964672088623,grad_norm: 0.9999990802662988, iteration: 109599
loss: 0.9711849093437195,grad_norm: 0.9223647010166542, iteration: 109600
loss: 1.0247061252593994,grad_norm: 0.9999990527296865, iteration: 109601
loss: 1.0225961208343506,grad_norm: 0.9999992085804665, iteration: 109602
loss: 1.0306980609893799,grad_norm: 0.9999992454208132, iteration: 109603
loss: 0.9986200332641602,grad_norm: 0.9948113041957388, iteration: 109604
loss: 1.0030419826507568,grad_norm: 0.9999992104809229, iteration: 109605
loss: 1.0176756381988525,grad_norm: 0.9999990646263646, iteration: 109606
loss: 1.0487407445907593,grad_norm: 0.9999992178870734, iteration: 109607
loss: 0.9732779264450073,grad_norm: 0.9999991404469, iteration: 109608
loss: 1.0482672452926636,grad_norm: 0.9999994278272059, iteration: 109609
loss: 0.9648044109344482,grad_norm: 0.9999992452275759, iteration: 109610
loss: 1.0174579620361328,grad_norm: 0.8835163400411283, iteration: 109611
loss: 1.0323981046676636,grad_norm: 0.9999990441836528, iteration: 109612
loss: 1.0238635540008545,grad_norm: 0.999999240460099, iteration: 109613
loss: 0.9950334429740906,grad_norm: 0.9999991393593434, iteration: 109614
loss: 1.001808524131775,grad_norm: 0.9124498175778444, iteration: 109615
loss: 0.9853797554969788,grad_norm: 0.8878543724013752, iteration: 109616
loss: 1.010636568069458,grad_norm: 0.9742865846170431, iteration: 109617
loss: 1.0224961042404175,grad_norm: 0.9999991290494676, iteration: 109618
loss: 1.0020396709442139,grad_norm: 0.9625555645658371, iteration: 109619
loss: 1.016343593597412,grad_norm: 0.9999991322777603, iteration: 109620
loss: 1.025484561920166,grad_norm: 0.9999990736405889, iteration: 109621
loss: 0.9729572534561157,grad_norm: 0.9999991219120135, iteration: 109622
loss: 1.0113332271575928,grad_norm: 0.9999990305318525, iteration: 109623
loss: 0.9787417650222778,grad_norm: 0.999998957251533, iteration: 109624
loss: 0.9893903136253357,grad_norm: 0.8958706548102114, iteration: 109625
loss: 1.0018417835235596,grad_norm: 0.9999990918213686, iteration: 109626
loss: 0.9871660470962524,grad_norm: 0.8680508224150002, iteration: 109627
loss: 1.0107389688491821,grad_norm: 0.9999997896451291, iteration: 109628
loss: 0.9831289052963257,grad_norm: 0.9999993965326097, iteration: 109629
loss: 1.0099458694458008,grad_norm: 0.8798482373123239, iteration: 109630
loss: 0.984053373336792,grad_norm: 0.9999994595905518, iteration: 109631
loss: 0.9832898378372192,grad_norm: 0.9584135970563474, iteration: 109632
loss: 0.9944214224815369,grad_norm: 0.9999993171194926, iteration: 109633
loss: 1.0237903594970703,grad_norm: 0.9215158382856482, iteration: 109634
loss: 1.0011534690856934,grad_norm: 0.8778094778777812, iteration: 109635
loss: 1.0909159183502197,grad_norm: 0.9999999098476318, iteration: 109636
loss: 1.0347238779067993,grad_norm: 0.9999989576825787, iteration: 109637
loss: 1.1336948871612549,grad_norm: 0.9999993626379956, iteration: 109638
loss: 0.993808925151825,grad_norm: 0.9999991121096792, iteration: 109639
loss: 0.9918273687362671,grad_norm: 0.9813168415362413, iteration: 109640
loss: 1.0628447532653809,grad_norm: 0.9999992012904163, iteration: 109641
loss: 1.02599036693573,grad_norm: 0.9999992444545476, iteration: 109642
loss: 0.9972997903823853,grad_norm: 0.9139729518904448, iteration: 109643
loss: 1.0109751224517822,grad_norm: 0.999999058831818, iteration: 109644
loss: 1.0310184955596924,grad_norm: 0.9378523034498069, iteration: 109645
loss: 1.009210228919983,grad_norm: 0.974199202894135, iteration: 109646
loss: 1.0254911184310913,grad_norm: 0.9999992795754545, iteration: 109647
loss: 1.004179835319519,grad_norm: 0.9733221413940792, iteration: 109648
loss: 1.0219941139221191,grad_norm: 0.9999990829845691, iteration: 109649
loss: 1.0370838642120361,grad_norm: 0.9999992458251572, iteration: 109650
loss: 1.0110191106796265,grad_norm: 0.9999991227191187, iteration: 109651
loss: 1.0070786476135254,grad_norm: 0.9999990342995042, iteration: 109652
loss: 1.0267151594161987,grad_norm: 0.9999991463481361, iteration: 109653
loss: 0.9919830560684204,grad_norm: 0.9995055976279144, iteration: 109654
loss: 1.0319558382034302,grad_norm: 0.9999990741094478, iteration: 109655
loss: 0.9913010001182556,grad_norm: 0.8905356433292595, iteration: 109656
loss: 1.0009853839874268,grad_norm: 0.9999991474286473, iteration: 109657
loss: 1.0555404424667358,grad_norm: 0.9999992015554612, iteration: 109658
loss: 1.0154577493667603,grad_norm: 0.9941994253124317, iteration: 109659
loss: 0.9702786207199097,grad_norm: 0.9999992634363503, iteration: 109660
loss: 0.979569673538208,grad_norm: 0.8896091309627113, iteration: 109661
loss: 1.0275700092315674,grad_norm: 0.9999991861621789, iteration: 109662
loss: 1.0007104873657227,grad_norm: 0.9743189782863081, iteration: 109663
loss: 1.0438146591186523,grad_norm: 0.9880330527901708, iteration: 109664
loss: 0.9883579015731812,grad_norm: 0.9999990756033791, iteration: 109665
loss: 0.9967052340507507,grad_norm: 0.9999991716381396, iteration: 109666
loss: 0.9919148683547974,grad_norm: 0.9999990938041242, iteration: 109667
loss: 0.9686445593833923,grad_norm: 0.9999990254820252, iteration: 109668
loss: 0.9925801753997803,grad_norm: 0.9999990626801858, iteration: 109669
loss: 1.0237911939620972,grad_norm: 0.9813771468324638, iteration: 109670
loss: 1.0285868644714355,grad_norm: 0.9999990912486588, iteration: 109671
loss: 1.0219371318817139,grad_norm: 0.9999991214727822, iteration: 109672
loss: 0.9809812307357788,grad_norm: 0.9551670211508876, iteration: 109673
loss: 0.9678968787193298,grad_norm: 0.9999990725808187, iteration: 109674
loss: 1.0175213813781738,grad_norm: 0.9999990261775814, iteration: 109675
loss: 0.9956697821617126,grad_norm: 0.9999991041451899, iteration: 109676
loss: 0.9450150728225708,grad_norm: 0.9999991877203105, iteration: 109677
loss: 0.9602539539337158,grad_norm: 0.9887933252523773, iteration: 109678
loss: 1.0020427703857422,grad_norm: 0.8201151418010508, iteration: 109679
loss: 0.9829261898994446,grad_norm: 0.9994483965491581, iteration: 109680
loss: 0.9689544439315796,grad_norm: 0.9801903626145696, iteration: 109681
loss: 1.0292257070541382,grad_norm: 0.8114062316998597, iteration: 109682
loss: 1.0036005973815918,grad_norm: 0.9323898556866779, iteration: 109683
loss: 0.9956612586975098,grad_norm: 0.9267629653993104, iteration: 109684
loss: 0.9827418923377991,grad_norm: 0.8803031426821413, iteration: 109685
loss: 1.0176844596862793,grad_norm: 0.9999990176231429, iteration: 109686
loss: 0.9863457679748535,grad_norm: 0.9950693814053071, iteration: 109687
loss: 1.0243916511535645,grad_norm: 0.9999995103230049, iteration: 109688
loss: 0.9812933206558228,grad_norm: 0.9999991636644733, iteration: 109689
loss: 1.012733817100525,grad_norm: 0.9999991593568531, iteration: 109690
loss: 1.0000419616699219,grad_norm: 0.999999101507226, iteration: 109691
loss: 0.9596700668334961,grad_norm: 0.9999991376190215, iteration: 109692
loss: 0.9884222745895386,grad_norm: 0.9999992098411205, iteration: 109693
loss: 0.9987853169441223,grad_norm: 0.9999991739184844, iteration: 109694
loss: 1.0052733421325684,grad_norm: 0.9119462022309096, iteration: 109695
loss: 0.9987187385559082,grad_norm: 0.9999990164811229, iteration: 109696
loss: 1.0314918756484985,grad_norm: 0.9999997317020647, iteration: 109697
loss: 0.9885881543159485,grad_norm: 0.9999992063609144, iteration: 109698
loss: 1.0066004991531372,grad_norm: 0.9999990637348575, iteration: 109699
loss: 1.0449459552764893,grad_norm: 0.9999989866438782, iteration: 109700
loss: 0.9814788699150085,grad_norm: 0.9999992588866133, iteration: 109701
loss: 1.0009911060333252,grad_norm: 0.9999992784515507, iteration: 109702
loss: 0.9962409138679504,grad_norm: 0.999999020061056, iteration: 109703
loss: 1.0114973783493042,grad_norm: 0.9999990419640515, iteration: 109704
loss: 1.006632685661316,grad_norm: 0.9775121080881305, iteration: 109705
loss: 1.0014368295669556,grad_norm: 0.9999991238943955, iteration: 109706
loss: 1.0483224391937256,grad_norm: 0.9999991704958509, iteration: 109707
loss: 1.0147387981414795,grad_norm: 0.999999163100002, iteration: 109708
loss: 0.9900306463241577,grad_norm: 0.9999992051188428, iteration: 109709
loss: 0.99806809425354,grad_norm: 0.999999006189951, iteration: 109710
loss: 1.0017207860946655,grad_norm: 0.9999991787221003, iteration: 109711
loss: 1.030813455581665,grad_norm: 0.9999990919777683, iteration: 109712
loss: 1.0261656045913696,grad_norm: 0.9999992229886645, iteration: 109713
loss: 1.013555645942688,grad_norm: 0.9999991474020486, iteration: 109714
loss: 1.0039221048355103,grad_norm: 0.9640420878689804, iteration: 109715
loss: 1.0054335594177246,grad_norm: 0.872050295868979, iteration: 109716
loss: 1.014763355255127,grad_norm: 0.9447696290255599, iteration: 109717
loss: 1.0127314329147339,grad_norm: 0.9999992339188296, iteration: 109718
loss: 1.0096158981323242,grad_norm: 0.9724018232966096, iteration: 109719
loss: 0.977666437625885,grad_norm: 0.9305303722119042, iteration: 109720
loss: 1.0170607566833496,grad_norm: 0.9999990706031009, iteration: 109721
loss: 0.9880983233451843,grad_norm: 0.9999992274949965, iteration: 109722
loss: 0.9997533559799194,grad_norm: 0.9999991703230515, iteration: 109723
loss: 0.9923433661460876,grad_norm: 0.9999991473187216, iteration: 109724
loss: 0.9723321199417114,grad_norm: 0.9999991838445765, iteration: 109725
loss: 1.0133272409439087,grad_norm: 0.9999993155969003, iteration: 109726
loss: 1.0040074586868286,grad_norm: 0.9999990508625717, iteration: 109727
loss: 0.9715781211853027,grad_norm: 0.9999990208341843, iteration: 109728
loss: 1.004517674446106,grad_norm: 0.9999989006955111, iteration: 109729
loss: 1.0078083276748657,grad_norm: 0.9999992134445385, iteration: 109730
loss: 0.9898428320884705,grad_norm: 0.9999993295681574, iteration: 109731
loss: 1.0025230646133423,grad_norm: 0.9999991068974297, iteration: 109732
loss: 0.9795935750007629,grad_norm: 0.8870479837932556, iteration: 109733
loss: 1.0398199558258057,grad_norm: 0.9999991293237084, iteration: 109734
loss: 0.9788026809692383,grad_norm: 0.9999990410369901, iteration: 109735
loss: 1.0248836278915405,grad_norm: 0.8520484348188401, iteration: 109736
loss: 1.0118727684020996,grad_norm: 0.999999072421254, iteration: 109737
loss: 1.0042915344238281,grad_norm: 0.9999992824952741, iteration: 109738
loss: 1.0369346141815186,grad_norm: 0.9999991865912192, iteration: 109739
loss: 1.0375896692276,grad_norm: 0.9673830009942311, iteration: 109740
loss: 0.9924604296684265,grad_norm: 0.9999990413657163, iteration: 109741
loss: 0.9657669067382812,grad_norm: 0.9999991115510236, iteration: 109742
loss: 1.0183014869689941,grad_norm: 0.9999993338718589, iteration: 109743
loss: 1.0107672214508057,grad_norm: 0.9999991821634534, iteration: 109744
loss: 0.9366309642791748,grad_norm: 0.9999991882579952, iteration: 109745
loss: 1.0924612283706665,grad_norm: 0.9999992322811821, iteration: 109746
loss: 0.9947420358657837,grad_norm: 0.999999335130756, iteration: 109747
loss: 0.9838432669639587,grad_norm: 0.9999989715351812, iteration: 109748
loss: 1.0382388830184937,grad_norm: 0.9999997546792632, iteration: 109749
loss: 0.9972169399261475,grad_norm: 0.9999990658751576, iteration: 109750
loss: 0.9768306016921997,grad_norm: 0.9999990768301634, iteration: 109751
loss: 0.9922789335250854,grad_norm: 0.999999158106562, iteration: 109752
loss: 1.0445529222488403,grad_norm: 0.9999992024468496, iteration: 109753
loss: 1.0053749084472656,grad_norm: 0.9999990927451738, iteration: 109754
loss: 0.9635890126228333,grad_norm: 0.9999992335825109, iteration: 109755
loss: 0.9820222854614258,grad_norm: 0.9522077575945715, iteration: 109756
loss: 0.9644957780838013,grad_norm: 0.984516885665108, iteration: 109757
loss: 0.982804000377655,grad_norm: 0.9999989222031651, iteration: 109758
loss: 0.9910982251167297,grad_norm: 0.9999991147162897, iteration: 109759
loss: 0.9930562973022461,grad_norm: 0.9278007855270554, iteration: 109760
loss: 1.0274274349212646,grad_norm: 0.999999084739622, iteration: 109761
loss: 1.0170199871063232,grad_norm: 0.7508420960433708, iteration: 109762
loss: 1.0145944356918335,grad_norm: 0.9999998116500385, iteration: 109763
loss: 1.046678900718689,grad_norm: 0.9843484062685668, iteration: 109764
loss: 1.0417066812515259,grad_norm: 0.9999992083106852, iteration: 109765
loss: 1.0120446681976318,grad_norm: 0.8860079256839223, iteration: 109766
loss: 1.0255956649780273,grad_norm: 0.9999992485191637, iteration: 109767
loss: 0.9885675311088562,grad_norm: 0.9999990826024321, iteration: 109768
loss: 1.0248833894729614,grad_norm: 0.9725524492844918, iteration: 109769
loss: 1.020043134689331,grad_norm: 0.9999991972518925, iteration: 109770
loss: 1.00846529006958,grad_norm: 0.9999991190249296, iteration: 109771
loss: 1.0061954259872437,grad_norm: 0.9999994628029709, iteration: 109772
loss: 0.9986735582351685,grad_norm: 0.9999990885738823, iteration: 109773
loss: 1.0257344245910645,grad_norm: 0.9999989571402365, iteration: 109774
loss: 1.0163304805755615,grad_norm: 0.9999990699488527, iteration: 109775
loss: 0.9944083094596863,grad_norm: 0.806006507351926, iteration: 109776
loss: 0.9944539666175842,grad_norm: 0.958031174587829, iteration: 109777
loss: 1.0001410245895386,grad_norm: 0.9999991884000003, iteration: 109778
loss: 1.0014415979385376,grad_norm: 0.9223729143953601, iteration: 109779
loss: 1.0167429447174072,grad_norm: 0.9999991231636107, iteration: 109780
loss: 1.0137258768081665,grad_norm: 0.9999990542174233, iteration: 109781
loss: 1.0208468437194824,grad_norm: 0.9381450604772426, iteration: 109782
loss: 0.9669365882873535,grad_norm: 0.9999989089604924, iteration: 109783
loss: 0.9582003951072693,grad_norm: 0.9999990980132072, iteration: 109784
loss: 1.0067211389541626,grad_norm: 0.9357204566082636, iteration: 109785
loss: 1.011739730834961,grad_norm: 0.9999992714402116, iteration: 109786
loss: 0.9950101971626282,grad_norm: 0.9999991840629792, iteration: 109787
loss: 0.9929566979408264,grad_norm: 0.8793517905217265, iteration: 109788
loss: 1.0023574829101562,grad_norm: 0.9999991490521822, iteration: 109789
loss: 1.0189229249954224,grad_norm: 0.9999991069378716, iteration: 109790
loss: 1.014193058013916,grad_norm: 0.8227310975212739, iteration: 109791
loss: 1.0246589183807373,grad_norm: 0.9999991970758519, iteration: 109792
loss: 0.967094361782074,grad_norm: 0.9426140330762014, iteration: 109793
loss: 1.022922158241272,grad_norm: 0.9999996874275562, iteration: 109794
loss: 1.0395852327346802,grad_norm: 0.9999991333783526, iteration: 109795
loss: 1.004657506942749,grad_norm: 0.9319157073643357, iteration: 109796
loss: 0.9795390963554382,grad_norm: 0.9999991224412905, iteration: 109797
loss: 1.0342458486557007,grad_norm: 0.9999991383185152, iteration: 109798
loss: 1.0165181159973145,grad_norm: 0.9379259999708548, iteration: 109799
loss: 1.0243866443634033,grad_norm: 0.9999990236706574, iteration: 109800
loss: 0.9812663793563843,grad_norm: 0.9999991482557581, iteration: 109801
loss: 0.993730902671814,grad_norm: 0.9738979993614041, iteration: 109802
loss: 0.9500837326049805,grad_norm: 0.9999990778262643, iteration: 109803
loss: 1.0139647722244263,grad_norm: 0.9054216309090853, iteration: 109804
loss: 1.0212440490722656,grad_norm: 0.9999994456429767, iteration: 109805
loss: 1.0057011842727661,grad_norm: 0.9457589266046121, iteration: 109806
loss: 1.0434116125106812,grad_norm: 0.9999991627630518, iteration: 109807
loss: 0.975824773311615,grad_norm: 0.9720421837088891, iteration: 109808
loss: 1.034830927848816,grad_norm: 0.9999991229303515, iteration: 109809
loss: 1.0096601247787476,grad_norm: 0.9999989012178139, iteration: 109810
loss: 1.0005698204040527,grad_norm: 0.9999991542676572, iteration: 109811
loss: 1.0381733179092407,grad_norm: 0.999999150817242, iteration: 109812
loss: 0.9965920448303223,grad_norm: 0.9128415590606687, iteration: 109813
loss: 0.9985867738723755,grad_norm: 0.9999991760159377, iteration: 109814
loss: 0.975238025188446,grad_norm: 0.9999991714827441, iteration: 109815
loss: 1.0308042764663696,grad_norm: 0.9999990374490219, iteration: 109816
loss: 0.9819163084030151,grad_norm: 0.9999990930798903, iteration: 109817
loss: 0.9999046921730042,grad_norm: 0.9988121003608982, iteration: 109818
loss: 0.9895970225334167,grad_norm: 0.9999991601132463, iteration: 109819
loss: 1.0100454092025757,grad_norm: 0.9999990855012402, iteration: 109820
loss: 0.9866085648536682,grad_norm: 0.9999992551682884, iteration: 109821
loss: 1.001025676727295,grad_norm: 0.9999994000998761, iteration: 109822
loss: 1.0083330869674683,grad_norm: 0.9999990592184415, iteration: 109823
loss: 1.027452826499939,grad_norm: 0.9999991501118249, iteration: 109824
loss: 0.99284428358078,grad_norm: 0.9999991275346609, iteration: 109825
loss: 1.0030196905136108,grad_norm: 0.9999991613681014, iteration: 109826
loss: 0.9998018145561218,grad_norm: 0.9999990861291268, iteration: 109827
loss: 0.9811228513717651,grad_norm: 0.9999990898029854, iteration: 109828
loss: 0.9975333213806152,grad_norm: 0.9464111201561815, iteration: 109829
loss: 1.0297977924346924,grad_norm: 0.9999991378241567, iteration: 109830
loss: 1.0051665306091309,grad_norm: 0.982993259028224, iteration: 109831
loss: 0.9969832897186279,grad_norm: 0.9451256870117368, iteration: 109832
loss: 1.012236475944519,grad_norm: 0.9999990392083048, iteration: 109833
loss: 1.0133726596832275,grad_norm: 0.9999992159190673, iteration: 109834
loss: 0.9902588725090027,grad_norm: 0.9999991128492989, iteration: 109835
loss: 1.0110455751419067,grad_norm: 0.9999993292533342, iteration: 109836
loss: 1.0116677284240723,grad_norm: 0.9999990876076901, iteration: 109837
loss: 1.0365097522735596,grad_norm: 0.9999993787177712, iteration: 109838
loss: 1.0060453414916992,grad_norm: 0.9999991181387791, iteration: 109839
loss: 1.0297001600265503,grad_norm: 0.9999992361945654, iteration: 109840
loss: 0.9623879194259644,grad_norm: 0.9999991230625405, iteration: 109841
loss: 1.0153743028640747,grad_norm: 0.9999991304643188, iteration: 109842
loss: 0.9725808501243591,grad_norm: 0.9627404126722752, iteration: 109843
loss: 1.010254144668579,grad_norm: 0.9679768119728137, iteration: 109844
loss: 1.0062730312347412,grad_norm: 0.9074644891653931, iteration: 109845
loss: 0.9790154099464417,grad_norm: 0.9999991533243653, iteration: 109846
loss: 0.9911466836929321,grad_norm: 0.998684960154222, iteration: 109847
loss: 1.0013113021850586,grad_norm: 0.9015493006364692, iteration: 109848
loss: 0.9791512489318848,grad_norm: 0.9999991538911843, iteration: 109849
loss: 0.9791645407676697,grad_norm: 0.9999992040426673, iteration: 109850
loss: 0.9977251291275024,grad_norm: 0.9999989473320029, iteration: 109851
loss: 1.024596929550171,grad_norm: 0.9294128280834382, iteration: 109852
loss: 0.9949290752410889,grad_norm: 0.9642454811934872, iteration: 109853
loss: 1.0005759000778198,grad_norm: 0.9806120104069026, iteration: 109854
loss: 1.0180646181106567,grad_norm: 0.9999991514116725, iteration: 109855
loss: 1.0133343935012817,grad_norm: 0.9999992775142115, iteration: 109856
loss: 1.0029553174972534,grad_norm: 0.9999990770592775, iteration: 109857
loss: 0.9938974380493164,grad_norm: 0.9999992334485235, iteration: 109858
loss: 1.0082288980484009,grad_norm: 0.9924078075378568, iteration: 109859
loss: 1.0062639713287354,grad_norm: 0.9335820707437272, iteration: 109860
loss: 0.9910881519317627,grad_norm: 0.9999992182553661, iteration: 109861
loss: 0.9744461178779602,grad_norm: 0.9999990343162973, iteration: 109862
loss: 1.0332841873168945,grad_norm: 0.9889526549066717, iteration: 109863
loss: 1.0043448209762573,grad_norm: 0.9999994003692296, iteration: 109864
loss: 0.9830891489982605,grad_norm: 0.9999990486327391, iteration: 109865
loss: 0.9841320514678955,grad_norm: 0.9999990888986359, iteration: 109866
loss: 1.007279872894287,grad_norm: 0.9030881074652704, iteration: 109867
loss: 1.0132639408111572,grad_norm: 0.9999990741475598, iteration: 109868
loss: 1.0025190114974976,grad_norm: 0.9999993955849757, iteration: 109869
loss: 0.9842900633811951,grad_norm: 0.9999992077369484, iteration: 109870
loss: 1.0034528970718384,grad_norm: 0.966367373426474, iteration: 109871
loss: 0.9968991875648499,grad_norm: 0.9999989275687468, iteration: 109872
loss: 0.9739837646484375,grad_norm: 0.9999991574570634, iteration: 109873
loss: 0.9944815635681152,grad_norm: 0.9594739690387549, iteration: 109874
loss: 1.0185602903366089,grad_norm: 0.9999996666510216, iteration: 109875
loss: 0.9697474241256714,grad_norm: 0.9999990515403115, iteration: 109876
loss: 1.0367814302444458,grad_norm: 0.9906578682086842, iteration: 109877
loss: 0.9638379812240601,grad_norm: 0.9384299286378888, iteration: 109878
loss: 0.9853131175041199,grad_norm: 0.9054546969521842, iteration: 109879
loss: 0.9698250889778137,grad_norm: 0.999999231713289, iteration: 109880
loss: 1.012382984161377,grad_norm: 0.9999990883550954, iteration: 109881
loss: 1.0374335050582886,grad_norm: 0.9999994250154166, iteration: 109882
loss: 0.9732612371444702,grad_norm: 0.9999991885703857, iteration: 109883
loss: 0.9880583882331848,grad_norm: 0.987011174647366, iteration: 109884
loss: 1.0381641387939453,grad_norm: 0.9934802272939106, iteration: 109885
loss: 1.0171452760696411,grad_norm: 0.999999031532792, iteration: 109886
loss: 0.9406650066375732,grad_norm: 0.9063904856033, iteration: 109887
loss: 0.9998635053634644,grad_norm: 0.9999991504836626, iteration: 109888
loss: 0.9824411273002625,grad_norm: 0.8564504673811603, iteration: 109889
loss: 1.0178943872451782,grad_norm: 0.9999992122191216, iteration: 109890
loss: 0.9758608937263489,grad_norm: 0.999999578963502, iteration: 109891
loss: 0.9878777265548706,grad_norm: 0.7593424278583749, iteration: 109892
loss: 0.9891600608825684,grad_norm: 0.9999990987417264, iteration: 109893
loss: 1.0155036449432373,grad_norm: 0.9999989822563294, iteration: 109894
loss: 1.0249758958816528,grad_norm: 0.9999992084222589, iteration: 109895
loss: 0.9720697402954102,grad_norm: 0.8628651830301708, iteration: 109896
loss: 0.9917000532150269,grad_norm: 0.9825218918841305, iteration: 109897
loss: 1.0046045780181885,grad_norm: 0.9058278888711313, iteration: 109898
loss: 1.0051311254501343,grad_norm: 0.9412131718571338, iteration: 109899
loss: 0.9586119651794434,grad_norm: 0.983098531557562, iteration: 109900
loss: 0.9847005009651184,grad_norm: 0.8678367690247579, iteration: 109901
loss: 0.9899309873580933,grad_norm: 0.9710649525836331, iteration: 109902
loss: 0.9961946606636047,grad_norm: 0.9999991355820281, iteration: 109903
loss: 0.9933460354804993,grad_norm: 0.9033210768901364, iteration: 109904
loss: 0.984655499458313,grad_norm: 0.8602306750713818, iteration: 109905
loss: 1.0142526626586914,grad_norm: 0.9999990769036438, iteration: 109906
loss: 0.9753446578979492,grad_norm: 0.9594791342762218, iteration: 109907
loss: 0.9363846182823181,grad_norm: 0.8368694007263439, iteration: 109908
loss: 1.0212376117706299,grad_norm: 0.9999990596900709, iteration: 109909
loss: 1.0048505067825317,grad_norm: 0.9999991176839611, iteration: 109910
loss: 1.0784207582473755,grad_norm: 0.9999993120917571, iteration: 109911
loss: 1.0203770399093628,grad_norm: 0.9471669589651323, iteration: 109912
loss: 0.9785715341567993,grad_norm: 0.9999992237206169, iteration: 109913
loss: 1.0224236249923706,grad_norm: 0.9999994774540626, iteration: 109914
loss: 1.006589651107788,grad_norm: 0.9999992393969628, iteration: 109915
loss: 0.9852684140205383,grad_norm: 0.9937816812160548, iteration: 109916
loss: 1.0258262157440186,grad_norm: 0.9317865031045064, iteration: 109917
loss: 0.9777706265449524,grad_norm: 0.9999991771381729, iteration: 109918
loss: 1.0236680507659912,grad_norm: 0.9028775325780802, iteration: 109919
loss: 1.000923752784729,grad_norm: 0.9999994858099345, iteration: 109920
loss: 1.016146183013916,grad_norm: 0.9999991589691241, iteration: 109921
loss: 0.964018702507019,grad_norm: 0.9999991378820151, iteration: 109922
loss: 0.9933823943138123,grad_norm: 0.9535531137384121, iteration: 109923
loss: 0.9820939302444458,grad_norm: 0.9348587174341242, iteration: 109924
loss: 0.9913255572319031,grad_norm: 0.9999993076043407, iteration: 109925
loss: 1.0001952648162842,grad_norm: 0.9999991747330654, iteration: 109926
loss: 0.9866954684257507,grad_norm: 0.9218220417318498, iteration: 109927
loss: 1.0964981317520142,grad_norm: 0.999999381034387, iteration: 109928
loss: 0.977354109287262,grad_norm: 0.9999989980535078, iteration: 109929
loss: 1.0009260177612305,grad_norm: 0.9999990554864602, iteration: 109930
loss: 1.017586350440979,grad_norm: 0.9529948482934777, iteration: 109931
loss: 0.9930681586265564,grad_norm: 0.9344747066688555, iteration: 109932
loss: 1.015777826309204,grad_norm: 0.967297074328758, iteration: 109933
loss: 1.029556393623352,grad_norm: 0.9425798984072751, iteration: 109934
loss: 1.0309898853302002,grad_norm: 0.999999234982362, iteration: 109935
loss: 1.0496567487716675,grad_norm: 0.926856810260265, iteration: 109936
loss: 1.0055229663848877,grad_norm: 0.9999990702047595, iteration: 109937
loss: 1.0364384651184082,grad_norm: 0.9999997143422327, iteration: 109938
loss: 0.9988778233528137,grad_norm: 0.999999089380773, iteration: 109939
loss: 1.0245740413665771,grad_norm: 0.9999991760913425, iteration: 109940
loss: 0.9624832272529602,grad_norm: 0.9999990992336821, iteration: 109941
loss: 0.9829956293106079,grad_norm: 0.999999246826063, iteration: 109942
loss: 0.9839715957641602,grad_norm: 0.9999991593864617, iteration: 109943
loss: 0.993971586227417,grad_norm: 0.9999991720152389, iteration: 109944
loss: 1.0001780986785889,grad_norm: 0.9999993822794367, iteration: 109945
loss: 1.0505565404891968,grad_norm: 0.9999992306453914, iteration: 109946
loss: 1.005784511566162,grad_norm: 0.8429579183867317, iteration: 109947
loss: 0.9740007519721985,grad_norm: 0.9999991099229321, iteration: 109948
loss: 1.0203344821929932,grad_norm: 0.9999997731457059, iteration: 109949
loss: 0.9945257306098938,grad_norm: 0.9999993784512579, iteration: 109950
loss: 1.2253769636154175,grad_norm: 0.9999998757852707, iteration: 109951
loss: 1.0021106004714966,grad_norm: 0.9999991638319966, iteration: 109952
loss: 1.044637680053711,grad_norm: 0.9999992077746096, iteration: 109953
loss: 0.9843952059745789,grad_norm: 0.8524407466082197, iteration: 109954
loss: 0.9881592392921448,grad_norm: 0.9458057152668854, iteration: 109955
loss: 0.9848597049713135,grad_norm: 0.9999990704543431, iteration: 109956
loss: 0.9798572063446045,grad_norm: 0.8712189517508087, iteration: 109957
loss: 1.0484592914581299,grad_norm: 0.9999992832870709, iteration: 109958
loss: 1.005939245223999,grad_norm: 0.9464875126809174, iteration: 109959
loss: 1.0354753732681274,grad_norm: 0.9999992987812804, iteration: 109960
loss: 1.0049840211868286,grad_norm: 0.911577129242153, iteration: 109961
loss: 1.049109935760498,grad_norm: 0.9743112501828165, iteration: 109962
loss: 0.9855399131774902,grad_norm: 0.9937676581042652, iteration: 109963
loss: 0.9785841107368469,grad_norm: 0.9505066974011631, iteration: 109964
loss: 0.996188759803772,grad_norm: 0.9999994945849446, iteration: 109965
loss: 1.0021216869354248,grad_norm: 0.9999992135188226, iteration: 109966
loss: 1.0199089050292969,grad_norm: 0.9999991830082018, iteration: 109967
loss: 0.9787976741790771,grad_norm: 0.9584920090551406, iteration: 109968
loss: 1.0197278261184692,grad_norm: 0.9999996897077928, iteration: 109969
loss: 1.013807773590088,grad_norm: 0.9999991599745842, iteration: 109970
loss: 0.962070882320404,grad_norm: 0.9999992501839176, iteration: 109971
loss: 1.0075334310531616,grad_norm: 0.9999989510538906, iteration: 109972
loss: 1.006538987159729,grad_norm: 0.9658808760555997, iteration: 109973
loss: 1.0308011770248413,grad_norm: 0.9999992343156268, iteration: 109974
loss: 1.019723653793335,grad_norm: 0.9999991785857568, iteration: 109975
loss: 1.013095498085022,grad_norm: 0.9999990419712895, iteration: 109976
loss: 1.0103431940078735,grad_norm: 0.9999989860385746, iteration: 109977
loss: 0.979644238948822,grad_norm: 0.9999991936152716, iteration: 109978
loss: 0.9788792729377747,grad_norm: 0.9999992841118025, iteration: 109979
loss: 0.9990801215171814,grad_norm: 0.9999989960247468, iteration: 109980
loss: 1.0226420164108276,grad_norm: 0.9021072061989123, iteration: 109981
loss: 0.9975743293762207,grad_norm: 0.9907065558025976, iteration: 109982
loss: 1.029335379600525,grad_norm: 0.9999992698535525, iteration: 109983
loss: 1.0240538120269775,grad_norm: 0.9999993908036078, iteration: 109984
loss: 1.0137618780136108,grad_norm: 0.8544639478298188, iteration: 109985
loss: 0.9798585772514343,grad_norm: 0.9999992511887817, iteration: 109986
loss: 0.9986779689788818,grad_norm: 0.9999990015837399, iteration: 109987
loss: 1.0241304636001587,grad_norm: 0.9999991441192226, iteration: 109988
loss: 0.9924293160438538,grad_norm: 0.9999989756865222, iteration: 109989
loss: 1.004374623298645,grad_norm: 0.9999991547811075, iteration: 109990
loss: 0.9630222916603088,grad_norm: 0.9999990664977924, iteration: 109991
loss: 0.9950963854789734,grad_norm: 0.9999990611894329, iteration: 109992
loss: 0.9958063960075378,grad_norm: 0.9999991218251983, iteration: 109993
loss: 1.0103123188018799,grad_norm: 0.9188388470849903, iteration: 109994
loss: 1.0139535665512085,grad_norm: 0.8731978062609794, iteration: 109995
loss: 1.0149458646774292,grad_norm: 0.9884974094277256, iteration: 109996
loss: 1.0119293928146362,grad_norm: 0.9999991514897282, iteration: 109997
loss: 1.0217020511627197,grad_norm: 0.9889010702915333, iteration: 109998
loss: 1.0325039625167847,grad_norm: 0.9999992410916028, iteration: 109999
loss: 0.9877676963806152,grad_norm: 0.9995952113415485, iteration: 110000
Evaluating at step 110000
{'val': 0.9952383302152157, 'test': 2.790476723600881}
loss: 1.002184510231018,grad_norm: 0.9041366830773172, iteration: 110001
loss: 0.9949178099632263,grad_norm: 0.9999990859180299, iteration: 110002
loss: 0.9975459575653076,grad_norm: 0.9609300165943252, iteration: 110003
loss: 1.002754807472229,grad_norm: 0.9295516658343327, iteration: 110004
loss: 0.9997106194496155,grad_norm: 0.9760663928793158, iteration: 110005
loss: 0.9958010911941528,grad_norm: 0.9999991224403928, iteration: 110006
loss: 0.9792457222938538,grad_norm: 0.9999989942952426, iteration: 110007
loss: 0.9915322065353394,grad_norm: 0.8738021260029125, iteration: 110008
loss: 0.9363664984703064,grad_norm: 0.9999990402097647, iteration: 110009
loss: 0.9853118062019348,grad_norm: 0.9999992020836539, iteration: 110010
loss: 0.9795725345611572,grad_norm: 0.8400495495266151, iteration: 110011
loss: 1.0052578449249268,grad_norm: 0.9999993596532823, iteration: 110012
loss: 1.0043439865112305,grad_norm: 0.9999990838848003, iteration: 110013
loss: 0.9758764505386353,grad_norm: 0.999999169876307, iteration: 110014
loss: 0.9898093342781067,grad_norm: 0.9462851397304193, iteration: 110015
loss: 1.0490899085998535,grad_norm: 0.9999991438564672, iteration: 110016
loss: 0.9821501970291138,grad_norm: 0.9067049472026301, iteration: 110017
loss: 1.045318365097046,grad_norm: 0.999998996815462, iteration: 110018
loss: 0.9891147613525391,grad_norm: 0.9999991130824047, iteration: 110019
loss: 0.9985762238502502,grad_norm: 0.9999992533796966, iteration: 110020
loss: 0.9878792762756348,grad_norm: 0.9999994619707221, iteration: 110021
loss: 1.0445690155029297,grad_norm: 0.9999991276709033, iteration: 110022
loss: 1.0062718391418457,grad_norm: 0.8844082066936361, iteration: 110023
loss: 1.046776294708252,grad_norm: 0.9999991477200473, iteration: 110024
loss: 1.0115480422973633,grad_norm: 0.9999989814982504, iteration: 110025
loss: 1.017215609550476,grad_norm: 0.9573327211654936, iteration: 110026
loss: 1.0673604011535645,grad_norm: 0.9999992507879062, iteration: 110027
loss: 0.9893422722816467,grad_norm: 0.9999991112925987, iteration: 110028
loss: 0.9613800644874573,grad_norm: 0.9999989728855763, iteration: 110029
loss: 1.0109843015670776,grad_norm: 0.9999990990926789, iteration: 110030
loss: 1.0012375116348267,grad_norm: 0.9470748767863114, iteration: 110031
loss: 1.016980528831482,grad_norm: 0.9896456929050368, iteration: 110032
loss: 0.9667989611625671,grad_norm: 0.9999991454722154, iteration: 110033
loss: 0.9821755290031433,grad_norm: 0.9999992789368368, iteration: 110034
loss: 1.0124094486236572,grad_norm: 0.9999990545529496, iteration: 110035
loss: 0.9842337965965271,grad_norm: 0.9919269220025453, iteration: 110036
loss: 0.9797040820121765,grad_norm: 0.9261705771334868, iteration: 110037
loss: 0.9921007752418518,grad_norm: 0.9999991046123476, iteration: 110038
loss: 1.0171992778778076,grad_norm: 0.9722908998109897, iteration: 110039
loss: 0.9993014931678772,grad_norm: 0.9999990514331158, iteration: 110040
loss: 1.0091164112091064,grad_norm: 0.8965403266485521, iteration: 110041
loss: 1.0226470232009888,grad_norm: 0.9999990800715236, iteration: 110042
loss: 1.0100916624069214,grad_norm: 0.845218885658221, iteration: 110043
loss: 0.9812794327735901,grad_norm: 0.9985681078551157, iteration: 110044
loss: 1.0149751901626587,grad_norm: 0.9071537487041919, iteration: 110045
loss: 1.0042712688446045,grad_norm: 0.940089730373542, iteration: 110046
loss: 1.0052553415298462,grad_norm: 0.9721166225406056, iteration: 110047
loss: 0.9956095814704895,grad_norm: 0.999999130168268, iteration: 110048
loss: 0.9921451210975647,grad_norm: 0.9999989789871424, iteration: 110049
loss: 0.9741501808166504,grad_norm: 0.9999991021265702, iteration: 110050
loss: 0.9923020601272583,grad_norm: 0.9664774921921903, iteration: 110051
loss: 1.0112245082855225,grad_norm: 0.9999989611868841, iteration: 110052
loss: 1.0034013986587524,grad_norm: 0.9999991197358513, iteration: 110053
loss: 1.0151418447494507,grad_norm: 0.9440576266137173, iteration: 110054
loss: 0.9593119621276855,grad_norm: 0.9404699538534819, iteration: 110055
loss: 1.00614333152771,grad_norm: 0.8633958585255271, iteration: 110056
loss: 1.0593364238739014,grad_norm: 0.9999992753950371, iteration: 110057
loss: 1.0928211212158203,grad_norm: 0.9999996850854339, iteration: 110058
loss: 1.0157642364501953,grad_norm: 0.9999992349982417, iteration: 110059
loss: 0.9859248995780945,grad_norm: 0.8764941702899769, iteration: 110060
loss: 1.0040884017944336,grad_norm: 0.9999991420310782, iteration: 110061
loss: 0.9874257445335388,grad_norm: 0.8746911773554324, iteration: 110062
loss: 1.0117301940917969,grad_norm: 0.9999990610688363, iteration: 110063
loss: 1.009032130241394,grad_norm: 0.9999991982190014, iteration: 110064
loss: 0.977464497089386,grad_norm: 0.9999990967268005, iteration: 110065
loss: 1.0088390111923218,grad_norm: 0.999999169113668, iteration: 110066
loss: 1.0331323146820068,grad_norm: 0.9999992485361007, iteration: 110067
loss: 1.0205104351043701,grad_norm: 0.9999990394989269, iteration: 110068
loss: 1.0086002349853516,grad_norm: 0.9999989884723031, iteration: 110069
loss: 0.9875695705413818,grad_norm: 0.9156493788405505, iteration: 110070
loss: 1.0099374055862427,grad_norm: 0.9999990924121497, iteration: 110071
loss: 0.9803616404533386,grad_norm: 0.999999094202031, iteration: 110072
loss: 0.9963467717170715,grad_norm: 0.9999990089679451, iteration: 110073
loss: 1.0016244649887085,grad_norm: 0.9292766196868137, iteration: 110074
loss: 0.9745029211044312,grad_norm: 0.9999991070083862, iteration: 110075
loss: 0.9811986684799194,grad_norm: 0.99999916317766, iteration: 110076
loss: 0.995699942111969,grad_norm: 0.9999991929208157, iteration: 110077
loss: 1.006367564201355,grad_norm: 0.9999991606144378, iteration: 110078
loss: 0.9932466745376587,grad_norm: 0.9999991920327541, iteration: 110079
loss: 0.977302610874176,grad_norm: 0.99471243158602, iteration: 110080
loss: 0.9909930229187012,grad_norm: 0.9999992070315321, iteration: 110081
loss: 1.0019869804382324,grad_norm: 0.9999990435489652, iteration: 110082
loss: 1.036049485206604,grad_norm: 0.976198196344589, iteration: 110083
loss: 0.989565372467041,grad_norm: 0.9999990549232528, iteration: 110084
loss: 0.9879516959190369,grad_norm: 0.8016416584724649, iteration: 110085
loss: 1.0185812711715698,grad_norm: 0.9999997801866729, iteration: 110086
loss: 1.006844401359558,grad_norm: 0.9999990664174867, iteration: 110087
loss: 1.0266674757003784,grad_norm: 0.9246614409212559, iteration: 110088
loss: 1.0171087980270386,grad_norm: 0.9999992923868785, iteration: 110089
loss: 1.096588373184204,grad_norm: 0.9999993789821878, iteration: 110090
loss: 0.9824422001838684,grad_norm: 0.9420005829727933, iteration: 110091
loss: 1.025892734527588,grad_norm: 0.9999991024902669, iteration: 110092
loss: 0.9938539862632751,grad_norm: 0.9999989439703679, iteration: 110093
loss: 1.0125607252120972,grad_norm: 0.9922240507806265, iteration: 110094
loss: 1.0309386253356934,grad_norm: 0.9999997420535915, iteration: 110095
loss: 0.9974192380905151,grad_norm: 0.9999991212644954, iteration: 110096
loss: 0.9794833064079285,grad_norm: 0.9999993093262678, iteration: 110097
loss: 0.986568033695221,grad_norm: 0.9999991853858252, iteration: 110098
loss: 1.0258913040161133,grad_norm: 0.9999992596211513, iteration: 110099
loss: 0.9532186388969421,grad_norm: 0.9999992151237753, iteration: 110100
loss: 1.00755774974823,grad_norm: 0.9999991317731977, iteration: 110101
loss: 0.982664167881012,grad_norm: 0.9999991506899154, iteration: 110102
loss: 1.1231542825698853,grad_norm: 0.9999997531477021, iteration: 110103
loss: 1.0028862953186035,grad_norm: 0.928244549509368, iteration: 110104
loss: 0.9657576680183411,grad_norm: 0.9999991471985694, iteration: 110105
loss: 0.9950425028800964,grad_norm: 0.9999991535706118, iteration: 110106
loss: 1.0066936016082764,grad_norm: 0.9999991861441442, iteration: 110107
loss: 0.9972540736198425,grad_norm: 0.9999990025913553, iteration: 110108
loss: 0.998464047908783,grad_norm: 0.9999992099073949, iteration: 110109
loss: 1.0240010023117065,grad_norm: 0.9999993095436724, iteration: 110110
loss: 1.02189302444458,grad_norm: 0.9568474484033531, iteration: 110111
loss: 1.0092413425445557,grad_norm: 0.9563184687433578, iteration: 110112
loss: 1.0042158365249634,grad_norm: 0.9999990948540812, iteration: 110113
loss: 0.9984866976737976,grad_norm: 0.9999991812005975, iteration: 110114
loss: 1.0047959089279175,grad_norm: 0.9999990936507896, iteration: 110115
loss: 0.9579706788063049,grad_norm: 0.9999992236473204, iteration: 110116
loss: 0.9518049359321594,grad_norm: 0.8721583499810284, iteration: 110117
loss: 1.0153477191925049,grad_norm: 0.9971008756755503, iteration: 110118
loss: 1.0449583530426025,grad_norm: 0.9499279710330742, iteration: 110119
loss: 0.9977871179580688,grad_norm: 0.8224510703036724, iteration: 110120
loss: 0.9826983213424683,grad_norm: 0.9999992061104118, iteration: 110121
loss: 1.024175763130188,grad_norm: 0.9999990626277718, iteration: 110122
loss: 1.0343517065048218,grad_norm: 0.999999168771182, iteration: 110123
loss: 1.0162104368209839,grad_norm: 0.9026011916131473, iteration: 110124
loss: 1.0270501375198364,grad_norm: 0.9999990659833421, iteration: 110125
loss: 1.0141444206237793,grad_norm: 0.999999112746557, iteration: 110126
loss: 0.9544072151184082,grad_norm: 0.8903971619064618, iteration: 110127
loss: 0.997090220451355,grad_norm: 0.8394496902417158, iteration: 110128
loss: 0.9914305806159973,grad_norm: 0.9999992198162034, iteration: 110129
loss: 1.0016043186187744,grad_norm: 0.99999934739483, iteration: 110130
loss: 1.0239616632461548,grad_norm: 0.9084457526909561, iteration: 110131
loss: 1.0216306447982788,grad_norm: 0.9999991384668763, iteration: 110132
loss: 1.0395766496658325,grad_norm: 0.9999996950470028, iteration: 110133
loss: 1.009312629699707,grad_norm: 0.9999991191952937, iteration: 110134
loss: 1.0003663301467896,grad_norm: 0.976984300767193, iteration: 110135
loss: 0.9809540510177612,grad_norm: 0.9999992094854858, iteration: 110136
loss: 1.0069845914840698,grad_norm: 0.9999998717645938, iteration: 110137
loss: 0.9670266509056091,grad_norm: 0.9999991779129201, iteration: 110138
loss: 1.0114789009094238,grad_norm: 0.9999991688144667, iteration: 110139
loss: 0.9973767995834351,grad_norm: 0.9631859937438699, iteration: 110140
loss: 1.0015051364898682,grad_norm: 0.9999992839311046, iteration: 110141
loss: 1.0054278373718262,grad_norm: 0.9999991286111849, iteration: 110142
loss: 0.9578155279159546,grad_norm: 0.9999990770359884, iteration: 110143
loss: 1.0297729969024658,grad_norm: 0.9999992476334486, iteration: 110144
loss: 0.981433629989624,grad_norm: 0.999999145264871, iteration: 110145
loss: 0.9972323179244995,grad_norm: 0.9825428274145656, iteration: 110146
loss: 0.9841545820236206,grad_norm: 0.9762329745195529, iteration: 110147
loss: 0.9996584057807922,grad_norm: 0.9999989709105589, iteration: 110148
loss: 1.0119614601135254,grad_norm: 0.9999991804321757, iteration: 110149
loss: 1.0110080242156982,grad_norm: 0.9999990933286421, iteration: 110150
loss: 0.984580397605896,grad_norm: 0.8750337313973191, iteration: 110151
loss: 1.0255674123764038,grad_norm: 0.9999992383708614, iteration: 110152
loss: 1.0182727575302124,grad_norm: 0.9999991445440879, iteration: 110153
loss: 1.0181330442428589,grad_norm: 0.9999991563362052, iteration: 110154
loss: 1.0347014665603638,grad_norm: 0.999999183657329, iteration: 110155
loss: 1.0176788568496704,grad_norm: 0.9999991255341587, iteration: 110156
loss: 1.0243135690689087,grad_norm: 0.9999989945723317, iteration: 110157
loss: 1.0323399305343628,grad_norm: 0.9999992766743421, iteration: 110158
loss: 1.0925769805908203,grad_norm: 0.9999996833726501, iteration: 110159
loss: 0.9751407504081726,grad_norm: 0.9822395713485755, iteration: 110160
loss: 1.0398790836334229,grad_norm: 0.9999876685304513, iteration: 110161
loss: 0.9979429244995117,grad_norm: 0.9999991910966629, iteration: 110162
loss: 1.0375983715057373,grad_norm: 0.9999994588001726, iteration: 110163
loss: 1.0125467777252197,grad_norm: 0.9999992044551294, iteration: 110164
loss: 0.9921137690544128,grad_norm: 0.9999992712060718, iteration: 110165
loss: 1.0016319751739502,grad_norm: 0.9999991864742759, iteration: 110166
loss: 1.042912483215332,grad_norm: 0.999999206457867, iteration: 110167
loss: 1.058929681777954,grad_norm: 0.9999999920658529, iteration: 110168
loss: 0.9727057814598083,grad_norm: 0.8944687537731079, iteration: 110169
loss: 0.9891682267189026,grad_norm: 0.8872499716547554, iteration: 110170
loss: 1.0411992073059082,grad_norm: 0.9480428271496376, iteration: 110171
loss: 0.9980444312095642,grad_norm: 0.9999992451744864, iteration: 110172
loss: 1.020795464515686,grad_norm: 0.8767428675813002, iteration: 110173
loss: 0.9674866795539856,grad_norm: 0.9999989766872742, iteration: 110174
loss: 0.9753287434577942,grad_norm: 0.9999991125465311, iteration: 110175
loss: 0.982063889503479,grad_norm: 0.9182439942922818, iteration: 110176
loss: 1.0352205038070679,grad_norm: 0.9999991506749186, iteration: 110177
loss: 1.0160956382751465,grad_norm: 0.9999990887611123, iteration: 110178
loss: 0.9943246245384216,grad_norm: 0.9550174082373285, iteration: 110179
loss: 0.9917109608650208,grad_norm: 0.9917903208306789, iteration: 110180
loss: 0.9861278533935547,grad_norm: 0.9999992070808534, iteration: 110181
loss: 0.9966379404067993,grad_norm: 0.9999990135900124, iteration: 110182
loss: 0.9968962669372559,grad_norm: 0.874830500864352, iteration: 110183
loss: 0.9947522282600403,grad_norm: 0.999999194472245, iteration: 110184
loss: 0.9972870349884033,grad_norm: 0.9999990399408911, iteration: 110185
loss: 0.9946978688240051,grad_norm: 0.8367634188647748, iteration: 110186
loss: 0.9849034547805786,grad_norm: 0.9999991578728448, iteration: 110187
loss: 1.0220264196395874,grad_norm: 0.999999027554744, iteration: 110188
loss: 0.9983710646629333,grad_norm: 0.999999077057477, iteration: 110189
loss: 1.0288026332855225,grad_norm: 0.9999990890493927, iteration: 110190
loss: 0.9974120855331421,grad_norm: 0.9999991603281521, iteration: 110191
loss: 1.0075323581695557,grad_norm: 0.9999992523615495, iteration: 110192
loss: 0.9819541573524475,grad_norm: 0.9999990988820809, iteration: 110193
loss: 1.0193520784378052,grad_norm: 0.999999105338534, iteration: 110194
loss: 1.0115584135055542,grad_norm: 0.9999992667716707, iteration: 110195
loss: 0.9912195801734924,grad_norm: 0.999999164077191, iteration: 110196
loss: 0.9709908962249756,grad_norm: 0.9999991833608851, iteration: 110197
loss: 0.9878543019294739,grad_norm: 0.9999990437814431, iteration: 110198
loss: 1.0816739797592163,grad_norm: 0.9999999473335744, iteration: 110199
loss: 1.0153430700302124,grad_norm: 0.9999991827437086, iteration: 110200
loss: 0.9863734245300293,grad_norm: 0.8911623374283011, iteration: 110201
loss: 0.9953534007072449,grad_norm: 0.9747300590563983, iteration: 110202
loss: 0.9894424080848694,grad_norm: 0.9677844112236432, iteration: 110203
loss: 0.9857038259506226,grad_norm: 0.9999991108199703, iteration: 110204
loss: 1.0052835941314697,grad_norm: 0.9999990888048592, iteration: 110205
loss: 0.9464324116706848,grad_norm: 0.9999992054416622, iteration: 110206
loss: 1.0328806638717651,grad_norm: 0.9999991346380556, iteration: 110207
loss: 1.0393226146697998,grad_norm: 0.9911032699840608, iteration: 110208
loss: 0.9747698307037354,grad_norm: 0.9999992396605718, iteration: 110209
loss: 0.9819275140762329,grad_norm: 0.999999281650875, iteration: 110210
loss: 0.9705945253372192,grad_norm: 0.9513129023724193, iteration: 110211
loss: 0.9861935377120972,grad_norm: 0.9999991404494958, iteration: 110212
loss: 0.9950870275497437,grad_norm: 0.9999992118303012, iteration: 110213
loss: 0.941423773765564,grad_norm: 0.9999991521816867, iteration: 110214
loss: 0.9808618426322937,grad_norm: 0.999999082310446, iteration: 110215
loss: 1.024807333946228,grad_norm: 0.9999991838162902, iteration: 110216
loss: 0.9881939888000488,grad_norm: 0.9999991082961766, iteration: 110217
loss: 1.0066372156143188,grad_norm: 0.9999993072525022, iteration: 110218
loss: 0.9912049174308777,grad_norm: 0.999999039965496, iteration: 110219
loss: 0.9978334307670593,grad_norm: 0.9999991934966884, iteration: 110220
loss: 1.0129574537277222,grad_norm: 0.999999130023814, iteration: 110221
loss: 1.0351496934890747,grad_norm: 0.9714752282651495, iteration: 110222
loss: 1.003175973892212,grad_norm: 0.9607562056625361, iteration: 110223
loss: 0.9835783839225769,grad_norm: 0.9999991576545388, iteration: 110224
loss: 0.9695510864257812,grad_norm: 0.9898954691522832, iteration: 110225
loss: 1.0303339958190918,grad_norm: 0.9635909670804229, iteration: 110226
loss: 0.9841898679733276,grad_norm: 0.9999992400737892, iteration: 110227
loss: 1.0058798789978027,grad_norm: 0.9999991009739627, iteration: 110228
loss: 0.9967861175537109,grad_norm: 0.9999991670230568, iteration: 110229
loss: 1.0112966299057007,grad_norm: 0.9999991998822904, iteration: 110230
loss: 1.0087743997573853,grad_norm: 0.9999991987440737, iteration: 110231
loss: 0.9905733466148376,grad_norm: 0.9999991582442912, iteration: 110232
loss: 0.9619452357292175,grad_norm: 0.8708232760913486, iteration: 110233
loss: 0.9919192790985107,grad_norm: 0.9999990658792286, iteration: 110234
loss: 0.994901716709137,grad_norm: 0.8596070411338346, iteration: 110235
loss: 1.001469373703003,grad_norm: 0.9833983925200916, iteration: 110236
loss: 1.0044233798980713,grad_norm: 0.9999991705466894, iteration: 110237
loss: 1.0177425146102905,grad_norm: 0.9692478245068932, iteration: 110238
loss: 1.008742094039917,grad_norm: 0.9999989540840195, iteration: 110239
loss: 0.9858802556991577,grad_norm: 0.9999991048070209, iteration: 110240
loss: 1.0108768939971924,grad_norm: 0.9128889484047668, iteration: 110241
loss: 0.998051106929779,grad_norm: 0.9999991570199775, iteration: 110242
loss: 1.008498191833496,grad_norm: 0.999999030885269, iteration: 110243
loss: 1.0007905960083008,grad_norm: 0.9999990390351656, iteration: 110244
loss: 0.9918667078018188,grad_norm: 0.9999992712995952, iteration: 110245
loss: 0.9764503240585327,grad_norm: 0.999999373980968, iteration: 110246
loss: 1.0301567316055298,grad_norm: 0.9999989663129778, iteration: 110247
loss: 0.9548522233963013,grad_norm: 0.9288364141725405, iteration: 110248
loss: 1.0324612855911255,grad_norm: 0.9999995057511257, iteration: 110249
loss: 0.9603995084762573,grad_norm: 0.9745284455129406, iteration: 110250
loss: 0.9767068028450012,grad_norm: 0.87857608604793, iteration: 110251
loss: 1.0175280570983887,grad_norm: 0.9999991681251497, iteration: 110252
loss: 0.9655569195747375,grad_norm: 0.9705114161802221, iteration: 110253
loss: 0.9715321063995361,grad_norm: 0.9450334631573851, iteration: 110254
loss: 1.0143671035766602,grad_norm: 0.8935971314388599, iteration: 110255
loss: 1.0002775192260742,grad_norm: 0.9999992439334412, iteration: 110256
loss: 0.9953570365905762,grad_norm: 0.911514686090996, iteration: 110257
loss: 1.0317176580429077,grad_norm: 0.9999991098342691, iteration: 110258
loss: 0.9994586706161499,grad_norm: 0.9999990852801397, iteration: 110259
loss: 1.0431857109069824,grad_norm: 0.9999989638289591, iteration: 110260
loss: 0.9634369611740112,grad_norm: 0.9999992136163549, iteration: 110261
loss: 1.0198185443878174,grad_norm: 0.9999990978146789, iteration: 110262
loss: 1.0062311887741089,grad_norm: 0.999999203609189, iteration: 110263
loss: 1.0040233135223389,grad_norm: 0.9999990919723397, iteration: 110264
loss: 1.0027283430099487,grad_norm: 0.9999990834225927, iteration: 110265
loss: 1.0153367519378662,grad_norm: 0.9999991497370008, iteration: 110266
loss: 0.9575433135032654,grad_norm: 0.9999991394897165, iteration: 110267
loss: 1.000075340270996,grad_norm: 0.9369497914963321, iteration: 110268
loss: 0.9695231914520264,grad_norm: 0.926716609219672, iteration: 110269
loss: 1.00509774684906,grad_norm: 0.9999991616419517, iteration: 110270
loss: 0.981644868850708,grad_norm: 0.8873251363949439, iteration: 110271
loss: 0.9931056499481201,grad_norm: 0.9829487440819645, iteration: 110272
loss: 1.010588526725769,grad_norm: 0.9591250391531215, iteration: 110273
loss: 0.976008415222168,grad_norm: 0.9999991432880194, iteration: 110274
loss: 0.9838001132011414,grad_norm: 0.9967356946590886, iteration: 110275
loss: 1.0022815465927124,grad_norm: 0.9235614494130719, iteration: 110276
loss: 0.9696247577667236,grad_norm: 0.9999991116744358, iteration: 110277
loss: 1.0211387872695923,grad_norm: 0.999999113174549, iteration: 110278
loss: 1.0026447772979736,grad_norm: 0.8675128077234553, iteration: 110279
loss: 0.9897535443305969,grad_norm: 0.8959216881540739, iteration: 110280
loss: 1.009627103805542,grad_norm: 0.9999990653452373, iteration: 110281
loss: 0.9983174800872803,grad_norm: 0.999999059275144, iteration: 110282
loss: 1.0107609033584595,grad_norm: 0.9999990575375391, iteration: 110283
loss: 0.9546175599098206,grad_norm: 0.9999992210181587, iteration: 110284
loss: 0.9864816069602966,grad_norm: 0.9999990220763035, iteration: 110285
loss: 1.0024558305740356,grad_norm: 0.9546196099363481, iteration: 110286
loss: 1.0398739576339722,grad_norm: 0.9151091265387816, iteration: 110287
loss: 0.999919056892395,grad_norm: 0.9999991352612161, iteration: 110288
loss: 1.01053786277771,grad_norm: 0.9999992623797179, iteration: 110289
loss: 0.9815303087234497,grad_norm: 0.9999992056477868, iteration: 110290
loss: 1.0392905473709106,grad_norm: 0.9999989974673743, iteration: 110291
loss: 0.9761219024658203,grad_norm: 0.9999991953557112, iteration: 110292
loss: 0.9842527508735657,grad_norm: 0.9999991711545988, iteration: 110293
loss: 0.9702134728431702,grad_norm: 0.9815889138117002, iteration: 110294
loss: 1.0128648281097412,grad_norm: 0.9707196935944451, iteration: 110295
loss: 0.9955962896347046,grad_norm: 0.861783711254947, iteration: 110296
loss: 1.0014451742172241,grad_norm: 0.9999989807362601, iteration: 110297
loss: 1.007470726966858,grad_norm: 0.9285132624747734, iteration: 110298
loss: 1.0298656225204468,grad_norm: 0.9999990622924855, iteration: 110299
loss: 1.0209258794784546,grad_norm: 0.9999991402332397, iteration: 110300
loss: 1.0365347862243652,grad_norm: 0.9999992323587521, iteration: 110301
loss: 0.9829609394073486,grad_norm: 0.9999991705336888, iteration: 110302
loss: 1.0166945457458496,grad_norm: 0.9070397133009239, iteration: 110303
loss: 1.0358891487121582,grad_norm: 0.9999990853892428, iteration: 110304
loss: 1.0005956888198853,grad_norm: 0.9999993483444379, iteration: 110305
loss: 1.0098438262939453,grad_norm: 0.9999996571411516, iteration: 110306
loss: 1.012369990348816,grad_norm: 0.9999990996328881, iteration: 110307
loss: 0.9670597910881042,grad_norm: 0.9999992435718276, iteration: 110308
loss: 1.0025047063827515,grad_norm: 0.9999991106572227, iteration: 110309
loss: 0.9804174900054932,grad_norm: 0.9997906987435893, iteration: 110310
loss: 1.0005159378051758,grad_norm: 0.99999932184585, iteration: 110311
loss: 1.0316470861434937,grad_norm: 0.9999991087732869, iteration: 110312
loss: 1.0259965658187866,grad_norm: 0.9999992026372917, iteration: 110313
loss: 0.9782282710075378,grad_norm: 0.999999080366174, iteration: 110314
loss: 0.993759036064148,grad_norm: 0.9999991406916015, iteration: 110315
loss: 0.9899288415908813,grad_norm: 0.9530410204832123, iteration: 110316
loss: 1.0074827671051025,grad_norm: 0.967477407052488, iteration: 110317
loss: 0.9747408032417297,grad_norm: 0.9999991037186005, iteration: 110318
loss: 0.9851411581039429,grad_norm: 0.9480599629625546, iteration: 110319
loss: 1.0167150497436523,grad_norm: 0.9999991345842403, iteration: 110320
loss: 1.0315959453582764,grad_norm: 0.9999992156301123, iteration: 110321
loss: 0.9987523555755615,grad_norm: 0.8382462370340727, iteration: 110322
loss: 1.0382657051086426,grad_norm: 0.9999998748430999, iteration: 110323
loss: 0.9748758673667908,grad_norm: 0.9321579041557104, iteration: 110324
loss: 0.9997403621673584,grad_norm: 0.9999990330733682, iteration: 110325
loss: 1.0086300373077393,grad_norm: 0.9999993118775862, iteration: 110326
loss: 1.004876732826233,grad_norm: 0.9605520532889965, iteration: 110327
loss: 1.0497785806655884,grad_norm: 0.9999998015045914, iteration: 110328
loss: 0.9953994750976562,grad_norm: 0.9865768849970155, iteration: 110329
loss: 0.9969772100448608,grad_norm: 0.9999990633550263, iteration: 110330
loss: 1.0008265972137451,grad_norm: 0.9999991296509679, iteration: 110331
loss: 0.9939925670623779,grad_norm: 0.9999992268771605, iteration: 110332
loss: 1.0152825117111206,grad_norm: 0.9999990946156848, iteration: 110333
loss: 1.003024935722351,grad_norm: 0.9762970942991613, iteration: 110334
loss: 0.9924942255020142,grad_norm: 0.9999991591793431, iteration: 110335
loss: 0.982615053653717,grad_norm: 0.9999991625049527, iteration: 110336
loss: 1.0002914667129517,grad_norm: 0.971665341208604, iteration: 110337
loss: 1.010468602180481,grad_norm: 0.9836295557468563, iteration: 110338
loss: 1.0050891637802124,grad_norm: 0.9738934600534939, iteration: 110339
loss: 1.0077342987060547,grad_norm: 0.9999991410573555, iteration: 110340
loss: 1.0086549520492554,grad_norm: 0.9999991579363511, iteration: 110341
loss: 1.006098747253418,grad_norm: 0.9999992278068041, iteration: 110342
loss: 1.0390673875808716,grad_norm: 0.9999990891594239, iteration: 110343
loss: 1.0094728469848633,grad_norm: 0.9999990794712199, iteration: 110344
loss: 1.0031272172927856,grad_norm: 0.999999166666829, iteration: 110345
loss: 0.9796234369277954,grad_norm: 0.99999901061646, iteration: 110346
loss: 1.0040149688720703,grad_norm: 0.9586625923131027, iteration: 110347
loss: 1.0149999856948853,grad_norm: 0.9977342488756208, iteration: 110348
loss: 0.9742218255996704,grad_norm: 0.9361980587852445, iteration: 110349
loss: 0.9743264317512512,grad_norm: 0.9348849739282314, iteration: 110350
loss: 0.9935653805732727,grad_norm: 0.9438729320211159, iteration: 110351
loss: 0.9944585561752319,grad_norm: 0.8983636390468599, iteration: 110352
loss: 0.9788269996643066,grad_norm: 0.9999991334710192, iteration: 110353
loss: 0.9918578267097473,grad_norm: 0.9999991318111019, iteration: 110354
loss: 0.9878082275390625,grad_norm: 0.9538784615992504, iteration: 110355
loss: 1.0006954669952393,grad_norm: 0.9999992663237656, iteration: 110356
loss: 0.977608323097229,grad_norm: 0.8602099222700078, iteration: 110357
loss: 1.0337929725646973,grad_norm: 0.9999991267087903, iteration: 110358
loss: 1.0183864831924438,grad_norm: 0.8900866269491932, iteration: 110359
loss: 0.9863246083259583,grad_norm: 0.8592177746271454, iteration: 110360
loss: 1.0129753351211548,grad_norm: 0.9999991257808244, iteration: 110361
loss: 1.0287754535675049,grad_norm: 0.9590640651296574, iteration: 110362
loss: 0.9860318899154663,grad_norm: 0.9999990621593869, iteration: 110363
loss: 1.0274120569229126,grad_norm: 0.9999991019880516, iteration: 110364
loss: 0.9911209344863892,grad_norm: 0.9999991939845572, iteration: 110365
loss: 0.9971104264259338,grad_norm: 0.9094593138814356, iteration: 110366
loss: 1.0065075159072876,grad_norm: 0.9999990524429554, iteration: 110367
loss: 1.000360131263733,grad_norm: 0.9999991965510598, iteration: 110368
loss: 1.0063127279281616,grad_norm: 0.9999989725562975, iteration: 110369
loss: 1.1159141063690186,grad_norm: 0.9999996856210697, iteration: 110370
loss: 1.0369867086410522,grad_norm: 0.9094762643208565, iteration: 110371
loss: 0.9785289764404297,grad_norm: 0.9999992936086562, iteration: 110372
loss: 1.0074503421783447,grad_norm: 0.9999992279088705, iteration: 110373
loss: 1.0451021194458008,grad_norm: 0.9999992725821699, iteration: 110374
loss: 1.0100041627883911,grad_norm: 0.9999990892629917, iteration: 110375
loss: 0.9834861159324646,grad_norm: 0.9999992911992017, iteration: 110376
loss: 0.9914578199386597,grad_norm: 0.9000596034437994, iteration: 110377
loss: 1.0351743698120117,grad_norm: 0.99999967166393, iteration: 110378
loss: 0.9931623339653015,grad_norm: 0.9998843788821054, iteration: 110379
loss: 1.0091756582260132,grad_norm: 0.9796219264871657, iteration: 110380
loss: 0.9915661215782166,grad_norm: 0.999999241471392, iteration: 110381
loss: 1.0241374969482422,grad_norm: 0.9999991875227318, iteration: 110382
loss: 1.0036892890930176,grad_norm: 0.9999993334338774, iteration: 110383
loss: 1.0078986883163452,grad_norm: 0.9999990594743804, iteration: 110384
loss: 0.9947537779808044,grad_norm: 0.9999991551100281, iteration: 110385
loss: 1.0093990564346313,grad_norm: 0.9525873588003922, iteration: 110386
loss: 1.0044598579406738,grad_norm: 0.870137652554985, iteration: 110387
loss: 1.0298082828521729,grad_norm: 0.9510501188731671, iteration: 110388
loss: 0.9933303594589233,grad_norm: 0.9999989994120813, iteration: 110389
loss: 1.0800580978393555,grad_norm: 0.999999506597461, iteration: 110390
loss: 1.011637806892395,grad_norm: 0.9999992225734645, iteration: 110391
loss: 1.0313827991485596,grad_norm: 0.8807719211874317, iteration: 110392
loss: 1.028692603111267,grad_norm: 0.9999991474443068, iteration: 110393
loss: 0.998046875,grad_norm: 0.9743043695935064, iteration: 110394
loss: 1.0000908374786377,grad_norm: 0.999999099395049, iteration: 110395
loss: 1.0475612878799438,grad_norm: 0.999999089296811, iteration: 110396
loss: 0.931388795375824,grad_norm: 0.9999990169923701, iteration: 110397
loss: 0.9764260649681091,grad_norm: 0.8842568890168502, iteration: 110398
loss: 1.004575252532959,grad_norm: 0.9999991790450602, iteration: 110399
loss: 1.0130271911621094,grad_norm: 0.9999992581996243, iteration: 110400
loss: 0.9671586751937866,grad_norm: 0.9999990010489559, iteration: 110401
loss: 1.0159807205200195,grad_norm: 0.9999992005040509, iteration: 110402
loss: 0.9969651103019714,grad_norm: 0.9999990396161385, iteration: 110403
loss: 1.0099120140075684,grad_norm: 0.9999992072364424, iteration: 110404
loss: 1.0154858827590942,grad_norm: 0.9999993116131508, iteration: 110405
loss: 1.0003056526184082,grad_norm: 0.9999997996279735, iteration: 110406
loss: 0.9869145750999451,grad_norm: 0.9999992910503557, iteration: 110407
loss: 1.0032222270965576,grad_norm: 0.9898722943377529, iteration: 110408
loss: 0.9916083812713623,grad_norm: 0.9999992361860345, iteration: 110409
loss: 1.0091379880905151,grad_norm: 0.9999991664614525, iteration: 110410
loss: 1.0129520893096924,grad_norm: 0.9548575219131504, iteration: 110411
loss: 1.0092616081237793,grad_norm: 0.9999992715653631, iteration: 110412
loss: 0.9671688675880432,grad_norm: 0.9999991598502265, iteration: 110413
loss: 0.9852820634841919,grad_norm: 0.9999989228438384, iteration: 110414
loss: 0.9575100541114807,grad_norm: 0.9999991471144202, iteration: 110415
loss: 1.009931206703186,grad_norm: 0.9999989704764147, iteration: 110416
loss: 1.0218160152435303,grad_norm: 0.999999174404142, iteration: 110417
loss: 0.9909844994544983,grad_norm: 0.9999996348679485, iteration: 110418
loss: 0.9907681941986084,grad_norm: 0.9999990805930857, iteration: 110419
loss: 1.028299331665039,grad_norm: 0.9999992012375046, iteration: 110420
loss: 0.9968317747116089,grad_norm: 0.9362412116585236, iteration: 110421
loss: 0.9384245276451111,grad_norm: 0.9999991452183755, iteration: 110422
loss: 0.9766464829444885,grad_norm: 0.9730691540696258, iteration: 110423
loss: 0.9959394931793213,grad_norm: 0.9999991481286916, iteration: 110424
loss: 0.9764633178710938,grad_norm: 0.8824997707192404, iteration: 110425
loss: 0.9689363241195679,grad_norm: 0.9524844311392019, iteration: 110426
loss: 1.0165401697158813,grad_norm: 0.9999994852218816, iteration: 110427
loss: 1.009021282196045,grad_norm: 0.9999992558676742, iteration: 110428
loss: 0.9946005940437317,grad_norm: 0.9999991438921284, iteration: 110429
loss: 0.9960206747055054,grad_norm: 0.9984676554590427, iteration: 110430
loss: 1.004611611366272,grad_norm: 0.9999991781397972, iteration: 110431
loss: 1.0347533226013184,grad_norm: 1.000000071106316, iteration: 110432
loss: 0.9856112599372864,grad_norm: 0.8709652039444702, iteration: 110433
loss: 1.0209312438964844,grad_norm: 0.9726629793724075, iteration: 110434
loss: 1.0252569913864136,grad_norm: 0.999999656102177, iteration: 110435
loss: 1.0299943685531616,grad_norm: 0.9999992326784654, iteration: 110436
loss: 0.9647693634033203,grad_norm: 0.9171114815001082, iteration: 110437
loss: 0.982326865196228,grad_norm: 0.9853237540086714, iteration: 110438
loss: 1.117890477180481,grad_norm: 0.9999991367066329, iteration: 110439
loss: 0.9809442758560181,grad_norm: 0.9999990582800057, iteration: 110440
loss: 1.0606956481933594,grad_norm: 0.9999993463637536, iteration: 110441
loss: 1.0184999704360962,grad_norm: 0.9999991400468812, iteration: 110442
loss: 1.015946388244629,grad_norm: 0.9999989848229066, iteration: 110443
loss: 1.022186517715454,grad_norm: 0.9241829472185098, iteration: 110444
loss: 1.0152698755264282,grad_norm: 0.9950113993568175, iteration: 110445
loss: 1.0431077480316162,grad_norm: 0.9454015802145456, iteration: 110446
loss: 0.9807602763175964,grad_norm: 0.9999991035847189, iteration: 110447
loss: 0.9829588532447815,grad_norm: 0.932216150308579, iteration: 110448
loss: 1.0019314289093018,grad_norm: 0.9990584175950743, iteration: 110449
loss: 1.0055075883865356,grad_norm: 0.9999992145050838, iteration: 110450
loss: 0.9657129049301147,grad_norm: 0.9999993067391857, iteration: 110451
loss: 1.0129880905151367,grad_norm: 0.9999990841667001, iteration: 110452
loss: 1.0888559818267822,grad_norm: 0.9999993390581167, iteration: 110453
loss: 1.0056397914886475,grad_norm: 0.9999992210809191, iteration: 110454
loss: 1.0170615911483765,grad_norm: 0.999999159941364, iteration: 110455
loss: 0.9918684363365173,grad_norm: 0.9999991716465748, iteration: 110456
loss: 0.9526083469390869,grad_norm: 0.9999991900270355, iteration: 110457
loss: 1.0121241807937622,grad_norm: 0.9317921208532177, iteration: 110458
loss: 0.9885346293449402,grad_norm: 0.9999995628368811, iteration: 110459
loss: 0.9887072443962097,grad_norm: 0.912067642409383, iteration: 110460
loss: 0.9914800524711609,grad_norm: 0.9999991342484289, iteration: 110461
loss: 0.9933408498764038,grad_norm: 0.9999991133518759, iteration: 110462
loss: 0.9937679767608643,grad_norm: 0.9737624239643281, iteration: 110463
loss: 1.0054395198822021,grad_norm: 0.9999991847992828, iteration: 110464
loss: 1.0051424503326416,grad_norm: 0.9999991999581289, iteration: 110465
loss: 0.9795919060707092,grad_norm: 0.9324543411728945, iteration: 110466
loss: 0.9961024522781372,grad_norm: 0.9999991297765256, iteration: 110467
loss: 1.0101932287216187,grad_norm: 0.9999990975252393, iteration: 110468
loss: 0.9748074412345886,grad_norm: 0.9724066142512843, iteration: 110469
loss: 1.0220786333084106,grad_norm: 0.9999992819377731, iteration: 110470
loss: 0.9543720483779907,grad_norm: 0.9999995183440098, iteration: 110471
loss: 1.0224483013153076,grad_norm: 0.999999078500373, iteration: 110472
loss: 0.9925634264945984,grad_norm: 0.9999992604934249, iteration: 110473
loss: 0.9662302136421204,grad_norm: 0.9999991365488046, iteration: 110474
loss: 1.0297892093658447,grad_norm: 0.9999992113107222, iteration: 110475
loss: 1.012916088104248,grad_norm: 0.9999992229342519, iteration: 110476
loss: 0.961745023727417,grad_norm: 0.9999991141830076, iteration: 110477
loss: 0.9803383350372314,grad_norm: 0.9999992880667288, iteration: 110478
loss: 0.9529273509979248,grad_norm: 0.9231052733234199, iteration: 110479
loss: 0.9914247393608093,grad_norm: 0.9999990273567743, iteration: 110480
loss: 1.0181852579116821,grad_norm: 0.9999991902101665, iteration: 110481
loss: 0.9918572306632996,grad_norm: 0.9999992462164435, iteration: 110482
loss: 0.9946168065071106,grad_norm: 0.9999992217871841, iteration: 110483
loss: 0.9845592975616455,grad_norm: 0.9999992877240232, iteration: 110484
loss: 1.016404151916504,grad_norm: 0.9891667683867577, iteration: 110485
loss: 1.01047945022583,grad_norm: 0.999999060347575, iteration: 110486
loss: 1.0256930589675903,grad_norm: 0.9929410337476718, iteration: 110487
loss: 1.0224463939666748,grad_norm: 0.9182009647785807, iteration: 110488
loss: 1.002902865409851,grad_norm: 0.9320634307253834, iteration: 110489
loss: 0.9956568479537964,grad_norm: 0.9999988546363223, iteration: 110490
loss: 0.9977355599403381,grad_norm: 0.8766547793687467, iteration: 110491
loss: 0.9710249304771423,grad_norm: 0.9608363040462201, iteration: 110492
loss: 1.0318245887756348,grad_norm: 0.9999993247862126, iteration: 110493
loss: 0.9943321347236633,grad_norm: 0.9999990191985907, iteration: 110494
loss: 1.0056209564208984,grad_norm: 0.9999991366165714, iteration: 110495
loss: 1.0039937496185303,grad_norm: 0.9999991494763937, iteration: 110496
loss: 1.0062133073806763,grad_norm: 0.9999990170794799, iteration: 110497
loss: 1.0003350973129272,grad_norm: 0.9999992342740235, iteration: 110498
loss: 0.992946982383728,grad_norm: 0.9673624756814051, iteration: 110499
loss: 0.9889497756958008,grad_norm: 0.8811892333549945, iteration: 110500
loss: 0.9656524062156677,grad_norm: 0.9133570376977843, iteration: 110501
loss: 1.046950101852417,grad_norm: 0.9999996350050065, iteration: 110502
loss: 1.0224530696868896,grad_norm: 0.9999991201555705, iteration: 110503
loss: 1.0124536752700806,grad_norm: 0.9820458466322091, iteration: 110504
loss: 1.0046582221984863,grad_norm: 0.9671338028955945, iteration: 110505
loss: 1.027433156967163,grad_norm: 0.9999992897790071, iteration: 110506
loss: 1.0108778476715088,grad_norm: 0.9999990001710809, iteration: 110507
loss: 0.9487784504890442,grad_norm: 0.8609702113430618, iteration: 110508
loss: 1.0144920349121094,grad_norm: 0.8673191966109379, iteration: 110509
loss: 1.0102564096450806,grad_norm: 0.993596461791348, iteration: 110510
loss: 0.9999452829360962,grad_norm: 0.8819577677693262, iteration: 110511
loss: 0.987175464630127,grad_norm: 0.9999990241869027, iteration: 110512
loss: 0.9782342314720154,grad_norm: 0.9999990646490654, iteration: 110513
loss: 1.0048705339431763,grad_norm: 0.9999993778731835, iteration: 110514
loss: 1.0090378522872925,grad_norm: 0.9999990462689923, iteration: 110515
loss: 1.002230167388916,grad_norm: 0.9999993762856239, iteration: 110516
loss: 0.9880738258361816,grad_norm: 0.9014813543167077, iteration: 110517
loss: 0.9865454435348511,grad_norm: 0.9999993109259235, iteration: 110518
loss: 0.9915399551391602,grad_norm: 0.9999990720691111, iteration: 110519
loss: 1.0149623155593872,grad_norm: 0.9999991353999861, iteration: 110520
loss: 1.0080878734588623,grad_norm: 0.9999990677920906, iteration: 110521
loss: 0.9868531227111816,grad_norm: 0.9999991524488008, iteration: 110522
loss: 1.024632453918457,grad_norm: 0.9999991962961844, iteration: 110523
loss: 1.003165364265442,grad_norm: 0.9999991061714482, iteration: 110524
loss: 0.997066080570221,grad_norm: 0.9999991385316735, iteration: 110525
loss: 1.0459058284759521,grad_norm: 0.9999991725614816, iteration: 110526
loss: 0.9783511757850647,grad_norm: 0.9999991545368814, iteration: 110527
loss: 1.0163640975952148,grad_norm: 0.890205851040456, iteration: 110528
loss: 1.0201854705810547,grad_norm: 0.9999991652264194, iteration: 110529
loss: 0.9703916311264038,grad_norm: 0.9999990041277911, iteration: 110530
loss: 1.0290570259094238,grad_norm: 0.9999990443342378, iteration: 110531
loss: 1.0045055150985718,grad_norm: 0.9506952090310405, iteration: 110532
loss: 1.0163801908493042,grad_norm: 0.9999991288994201, iteration: 110533
loss: 1.016961693763733,grad_norm: 0.9743654780618407, iteration: 110534
loss: 0.9836714863777161,grad_norm: 0.9999991390836459, iteration: 110535
loss: 1.0102308988571167,grad_norm: 0.9999991911986621, iteration: 110536
loss: 1.0380797386169434,grad_norm: 0.9244165457102028, iteration: 110537
loss: 1.0119410753250122,grad_norm: 0.9528033043868229, iteration: 110538
loss: 1.012276291847229,grad_norm: 0.9999992444812447, iteration: 110539
loss: 1.0091923475265503,grad_norm: 0.9352779384835108, iteration: 110540
loss: 0.9965773224830627,grad_norm: 0.999999277010006, iteration: 110541
loss: 0.9810765981674194,grad_norm: 0.9999990690441151, iteration: 110542
loss: 1.026340126991272,grad_norm: 0.9999992280180553, iteration: 110543
loss: 0.9528700113296509,grad_norm: 0.9999991495842091, iteration: 110544
loss: 1.0358269214630127,grad_norm: 0.9999991742440508, iteration: 110545
loss: 0.9867165088653564,grad_norm: 0.9999991482184857, iteration: 110546
loss: 0.9733647108078003,grad_norm: 0.9999990841487078, iteration: 110547
loss: 0.9770286083221436,grad_norm: 0.9245916501346296, iteration: 110548
loss: 1.0026037693023682,grad_norm: 0.99999917404325, iteration: 110549
loss: 1.0083115100860596,grad_norm: 0.8919450290951039, iteration: 110550
loss: 0.9865719676017761,grad_norm: 0.9538250204174875, iteration: 110551
loss: 1.1048078536987305,grad_norm: 0.9999997153588285, iteration: 110552
loss: 1.0703730583190918,grad_norm: 0.999999312836514, iteration: 110553
loss: 0.9505874514579773,grad_norm: 0.9999989926295708, iteration: 110554
loss: 1.0198657512664795,grad_norm: 0.9999991926368202, iteration: 110555
loss: 0.990743100643158,grad_norm: 0.9999991119710196, iteration: 110556
loss: 1.0130594968795776,grad_norm: 0.9728461567556524, iteration: 110557
loss: 1.0327409505844116,grad_norm: 0.9463210590718624, iteration: 110558
loss: 1.0023473501205444,grad_norm: 0.8809671407382023, iteration: 110559
loss: 1.0306262969970703,grad_norm: 0.9684137895121738, iteration: 110560
loss: 0.9962766766548157,grad_norm: 0.962900637181306, iteration: 110561
loss: 1.0364378690719604,grad_norm: 0.9999998277659464, iteration: 110562
loss: 1.014809012413025,grad_norm: 0.9671293863259081, iteration: 110563
loss: 1.0371507406234741,grad_norm: 0.9999993291206689, iteration: 110564
loss: 1.005322813987732,grad_norm: 0.999999083725309, iteration: 110565
loss: 1.0319195985794067,grad_norm: 0.9999990076426937, iteration: 110566
loss: 0.9714465141296387,grad_norm: 0.999999150771386, iteration: 110567
loss: 0.9831753969192505,grad_norm: 0.9999989737936902, iteration: 110568
loss: 1.0213929414749146,grad_norm: 0.9999991032966665, iteration: 110569
loss: 0.9737708568572998,grad_norm: 0.9999990193877121, iteration: 110570
loss: 0.9792017340660095,grad_norm: 0.9999990642775034, iteration: 110571
loss: 1.025827407836914,grad_norm: 0.9736170593398104, iteration: 110572
loss: 1.0036789178848267,grad_norm: 0.8839074413600975, iteration: 110573
loss: 0.9779291152954102,grad_norm: 0.9999991333051278, iteration: 110574
loss: 1.0005922317504883,grad_norm: 0.9291208514798838, iteration: 110575
loss: 0.9781540632247925,grad_norm: 0.9999991392051304, iteration: 110576
loss: 1.0383882522583008,grad_norm: 0.9999992094905358, iteration: 110577
loss: 0.9969549775123596,grad_norm: 0.9999992386524157, iteration: 110578
loss: 1.0604196786880493,grad_norm: 0.9999997882737951, iteration: 110579
loss: 1.0282628536224365,grad_norm: 0.9999991050861181, iteration: 110580
loss: 1.0340576171875,grad_norm: 0.9999993767347084, iteration: 110581
loss: 1.0137360095977783,grad_norm: 0.999999020891161, iteration: 110582
loss: 0.9775782823562622,grad_norm: 0.9999990466694537, iteration: 110583
loss: 0.9808385372161865,grad_norm: 0.9471610875871628, iteration: 110584
loss: 1.030724287033081,grad_norm: 0.9999991256499794, iteration: 110585
loss: 1.0303179025650024,grad_norm: 0.9316463422816862, iteration: 110586
loss: 1.040101408958435,grad_norm: 0.9999991772335158, iteration: 110587
loss: 1.0065592527389526,grad_norm: 0.9999991217925799, iteration: 110588
loss: 1.0049049854278564,grad_norm: 0.9604932497082148, iteration: 110589
loss: 0.994131863117218,grad_norm: 0.9569676989779557, iteration: 110590
loss: 1.0230811834335327,grad_norm: 0.9875252120941216, iteration: 110591
loss: 0.9805086851119995,grad_norm: 0.999999166362314, iteration: 110592
loss: 0.9768616557121277,grad_norm: 0.9999991483026693, iteration: 110593
loss: 0.9903745055198669,grad_norm: 0.9999994025005445, iteration: 110594
loss: 1.0170942544937134,grad_norm: 0.9919631209849753, iteration: 110595
loss: 1.031632423400879,grad_norm: 0.9999993250977282, iteration: 110596
loss: 0.9654262661933899,grad_norm: 0.9999990360169668, iteration: 110597
loss: 1.0204622745513916,grad_norm: 0.9999989652386669, iteration: 110598
loss: 0.9889321327209473,grad_norm: 0.9267469525324624, iteration: 110599
loss: 1.0141923427581787,grad_norm: 0.9999992734225169, iteration: 110600
loss: 0.9984297752380371,grad_norm: 0.9999992766626753, iteration: 110601
loss: 0.9965106248855591,grad_norm: 0.9999990685402538, iteration: 110602
loss: 1.0135756731033325,grad_norm: 0.999999078451825, iteration: 110603
loss: 0.9630797505378723,grad_norm: 0.9999991174353136, iteration: 110604
loss: 1.0146926641464233,grad_norm: 0.9999994025368134, iteration: 110605
loss: 1.0123229026794434,grad_norm: 0.9999990961886014, iteration: 110606
loss: 0.9885596036911011,grad_norm: 0.8866496804837303, iteration: 110607
loss: 0.9907706379890442,grad_norm: 0.9999989962114253, iteration: 110608
loss: 1.0240249633789062,grad_norm: 0.9999990637503804, iteration: 110609
loss: 0.9857739806175232,grad_norm: 0.9999990887360642, iteration: 110610
loss: 0.9935073256492615,grad_norm: 0.9999992421806895, iteration: 110611
loss: 1.0025818347930908,grad_norm: 0.9999989553822966, iteration: 110612
loss: 1.0262137651443481,grad_norm: 0.9999991460949327, iteration: 110613
loss: 0.9946600794792175,grad_norm: 0.9999990849414888, iteration: 110614
loss: 0.9810018539428711,grad_norm: 0.9999991273377747, iteration: 110615
loss: 1.012182593345642,grad_norm: 0.9999993291103944, iteration: 110616
loss: 1.066950798034668,grad_norm: 0.9999991551832773, iteration: 110617
loss: 0.997843325138092,grad_norm: 0.9999989677835706, iteration: 110618
loss: 1.0555129051208496,grad_norm: 0.9999990663689033, iteration: 110619
loss: 1.0147202014923096,grad_norm: 0.8740454672798976, iteration: 110620
loss: 0.9730100631713867,grad_norm: 0.9970572373044297, iteration: 110621
loss: 1.0043573379516602,grad_norm: 0.9874716491266807, iteration: 110622
loss: 1.0097368955612183,grad_norm: 0.8899679903180099, iteration: 110623
loss: 1.0394853353500366,grad_norm: 0.9999997042270372, iteration: 110624
loss: 0.988749623298645,grad_norm: 0.9999990192688004, iteration: 110625
loss: 1.0401883125305176,grad_norm: 0.9999992219133473, iteration: 110626
loss: 1.0223841667175293,grad_norm: 0.9999991755427832, iteration: 110627
loss: 1.0444847345352173,grad_norm: 0.9999991390121515, iteration: 110628
loss: 1.09779691696167,grad_norm: 0.9999991683402383, iteration: 110629
loss: 0.9776652455329895,grad_norm: 0.9999990184404706, iteration: 110630
loss: 0.9868729114532471,grad_norm: 0.8231406396920242, iteration: 110631
loss: 0.9637972712516785,grad_norm: 0.9999992876613815, iteration: 110632
loss: 0.9773347973823547,grad_norm: 0.9999990142680707, iteration: 110633
loss: 1.0167148113250732,grad_norm: 0.9252447677442952, iteration: 110634
loss: 1.0023515224456787,grad_norm: 0.9453209910667247, iteration: 110635
loss: 1.0116620063781738,grad_norm: 0.9999991535657473, iteration: 110636
loss: 1.0008255243301392,grad_norm: 0.999999144358356, iteration: 110637
loss: 0.9936818480491638,grad_norm: 0.9000273026965487, iteration: 110638
loss: 1.0076268911361694,grad_norm: 0.9999991409675372, iteration: 110639
loss: 0.9688594341278076,grad_norm: 0.9999993580337346, iteration: 110640
loss: 1.0326099395751953,grad_norm: 0.9143134141016109, iteration: 110641
loss: 0.9972730875015259,grad_norm: 0.9274932926571519, iteration: 110642
loss: 1.0261871814727783,grad_norm: 0.9543121043345624, iteration: 110643
loss: 1.0094841718673706,grad_norm: 0.9999993168143286, iteration: 110644
loss: 0.9603855609893799,grad_norm: 0.9999991877707672, iteration: 110645
loss: 0.9725083112716675,grad_norm: 0.9999992351567462, iteration: 110646
loss: 0.9862099885940552,grad_norm: 0.9999989531537053, iteration: 110647
loss: 1.0077130794525146,grad_norm: 0.9999991324358625, iteration: 110648
loss: 1.0161923170089722,grad_norm: 0.8836603215545651, iteration: 110649
loss: 0.9733057022094727,grad_norm: 0.9999990892597047, iteration: 110650
loss: 1.0292115211486816,grad_norm: 0.9999992235011078, iteration: 110651
loss: 0.9916775822639465,grad_norm: 0.9679942523391956, iteration: 110652
loss: 1.0014863014221191,grad_norm: 0.9669586201619611, iteration: 110653
loss: 0.975689709186554,grad_norm: 0.9999990475467224, iteration: 110654
loss: 1.0467909574508667,grad_norm: 0.9543105145185476, iteration: 110655
loss: 0.9835891723632812,grad_norm: 0.9999992841848772, iteration: 110656
loss: 0.9729261994361877,grad_norm: 0.9999991890261855, iteration: 110657
loss: 1.0013259649276733,grad_norm: 0.9943663085490196, iteration: 110658
loss: 0.9933319687843323,grad_norm: 0.9999989608038848, iteration: 110659
loss: 1.0037134885787964,grad_norm: 0.9297643011607304, iteration: 110660
loss: 0.9987195134162903,grad_norm: 0.9999991256578556, iteration: 110661
loss: 1.0339406728744507,grad_norm: 0.9455633291396973, iteration: 110662
loss: 1.0109941959381104,grad_norm: 0.9488315505393144, iteration: 110663
loss: 1.0497127771377563,grad_norm: 0.9999992449303161, iteration: 110664
loss: 0.9864240884780884,grad_norm: 0.9977216152957138, iteration: 110665
loss: 1.0118725299835205,grad_norm: 0.8964716033659288, iteration: 110666
loss: 1.0622596740722656,grad_norm: 0.9999993803933689, iteration: 110667
loss: 0.99495929479599,grad_norm: 0.9999991156141594, iteration: 110668
loss: 1.0158705711364746,grad_norm: 0.9999990646359133, iteration: 110669
loss: 1.054024577140808,grad_norm: 0.866334847182721, iteration: 110670
loss: 0.9431890249252319,grad_norm: 0.9999992558621789, iteration: 110671
loss: 1.0294369459152222,grad_norm: 0.9999990159576564, iteration: 110672
loss: 0.9650063514709473,grad_norm: 0.9999992650648273, iteration: 110673
loss: 1.0004403591156006,grad_norm: 0.9999989801257488, iteration: 110674
loss: 1.0108245611190796,grad_norm: 0.999999202577998, iteration: 110675
loss: 0.9741728901863098,grad_norm: 0.9999991481387157, iteration: 110676
loss: 0.9827630519866943,grad_norm: 0.9999991647491857, iteration: 110677
loss: 1.0466214418411255,grad_norm: 0.9097031460475365, iteration: 110678
loss: 1.0248299837112427,grad_norm: 0.9999995304360609, iteration: 110679
loss: 0.9595471620559692,grad_norm: 0.9999991328650205, iteration: 110680
loss: 0.9998218417167664,grad_norm: 0.9999991052477024, iteration: 110681
loss: 0.9993996620178223,grad_norm: 0.9999990544944604, iteration: 110682
loss: 1.0074902772903442,grad_norm: 0.8664422349675401, iteration: 110683
loss: 1.010419487953186,grad_norm: 0.774628758669924, iteration: 110684
loss: 1.0107395648956299,grad_norm: 0.9999993296817634, iteration: 110685
loss: 1.022299885749817,grad_norm: 0.9999992435733371, iteration: 110686
loss: 1.0116307735443115,grad_norm: 0.9753592060981511, iteration: 110687
loss: 1.0011533498764038,grad_norm: 0.9999991323497824, iteration: 110688
loss: 1.0219001770019531,grad_norm: 0.9999992384874457, iteration: 110689
loss: 0.9739514589309692,grad_norm: 0.8789043095766534, iteration: 110690
loss: 0.9661508798599243,grad_norm: 0.9999990649302175, iteration: 110691
loss: 1.014966607093811,grad_norm: 0.9577998707810639, iteration: 110692
loss: 1.0690633058547974,grad_norm: 0.9999993387044631, iteration: 110693
loss: 0.9987238645553589,grad_norm: 0.9999991655651513, iteration: 110694
loss: 1.0412452220916748,grad_norm: 0.8828271640212598, iteration: 110695
loss: 1.0038846731185913,grad_norm: 0.9696065427885047, iteration: 110696
loss: 0.9606305360794067,grad_norm: 0.8360686398109912, iteration: 110697
loss: 1.0146186351776123,grad_norm: 0.999999240887732, iteration: 110698
loss: 0.9872156381607056,grad_norm: 0.9999991771120689, iteration: 110699
loss: 0.9865619540214539,grad_norm: 0.9999993211511412, iteration: 110700
loss: 1.0260818004608154,grad_norm: 0.9999998217269088, iteration: 110701
loss: 0.9974080324172974,grad_norm: 0.9999991725913268, iteration: 110702
loss: 0.995151937007904,grad_norm: 0.9430649792700521, iteration: 110703
loss: 0.9975436925888062,grad_norm: 0.9999990503460771, iteration: 110704
loss: 0.9821017980575562,grad_norm: 0.9849345227998771, iteration: 110705
loss: 0.9822236895561218,grad_norm: 0.8145392037246975, iteration: 110706
loss: 0.9913946390151978,grad_norm: 0.9999991621583285, iteration: 110707
loss: 0.9662563800811768,grad_norm: 0.9999992257827653, iteration: 110708
loss: 1.0027334690093994,grad_norm: 0.9999991078819795, iteration: 110709
loss: 0.9959565997123718,grad_norm: 0.999999254183165, iteration: 110710
loss: 0.998112142086029,grad_norm: 0.9999989777140955, iteration: 110711
loss: 1.020422101020813,grad_norm: 0.9999997616900681, iteration: 110712
loss: 1.0291554927825928,grad_norm: 0.9999997863491249, iteration: 110713
loss: 1.0015078783035278,grad_norm: 0.9678956637001014, iteration: 110714
loss: 0.9962288737297058,grad_norm: 0.9999991264361937, iteration: 110715
loss: 0.9812337756156921,grad_norm: 0.8711658663882027, iteration: 110716
loss: 1.032516598701477,grad_norm: 0.9999996373786814, iteration: 110717
loss: 0.9985166788101196,grad_norm: 0.9999992788166064, iteration: 110718
loss: 0.9782229065895081,grad_norm: 0.9937121026125929, iteration: 110719
loss: 1.0177782773971558,grad_norm: 0.9999991529050154, iteration: 110720
loss: 0.9790869951248169,grad_norm: 0.9765805397974919, iteration: 110721
loss: 0.9967995882034302,grad_norm: 0.9999992770941504, iteration: 110722
loss: 1.00947904586792,grad_norm: 0.9999991173988235, iteration: 110723
loss: 1.082330346107483,grad_norm: 0.9999992150666562, iteration: 110724
loss: 0.9980520009994507,grad_norm: 0.9302342473414132, iteration: 110725
loss: 1.001983404159546,grad_norm: 0.9889869835945715, iteration: 110726
loss: 1.0088715553283691,grad_norm: 0.9999991896886784, iteration: 110727
loss: 1.0539262294769287,grad_norm: 0.9224986521899758, iteration: 110728
loss: 1.0127042531967163,grad_norm: 0.9999991292740582, iteration: 110729
loss: 0.9667789936065674,grad_norm: 0.999999051035146, iteration: 110730
loss: 0.9824548959732056,grad_norm: 0.9999993055006762, iteration: 110731
loss: 0.9581511616706848,grad_norm: 0.9999991980887855, iteration: 110732
loss: 0.9858644604682922,grad_norm: 0.9999991674306203, iteration: 110733
loss: 1.0288152694702148,grad_norm: 0.9999992518666794, iteration: 110734
loss: 0.963811993598938,grad_norm: 0.9999991267533214, iteration: 110735
loss: 1.0210455656051636,grad_norm: 0.9806318956050932, iteration: 110736
loss: 1.0095082521438599,grad_norm: 0.8821589443837818, iteration: 110737
loss: 1.0133496522903442,grad_norm: 0.9999990928643125, iteration: 110738
loss: 0.9464096426963806,grad_norm: 0.9999992008237402, iteration: 110739
loss: 0.9804971218109131,grad_norm: 0.9414479592899212, iteration: 110740
loss: 0.9976280927658081,grad_norm: 0.9999990603999271, iteration: 110741
loss: 0.9645915627479553,grad_norm: 0.9999990729612075, iteration: 110742
loss: 0.9975059032440186,grad_norm: 0.9999991067238937, iteration: 110743
loss: 0.9816833734512329,grad_norm: 0.9999990410368438, iteration: 110744
loss: 0.9964691400527954,grad_norm: 0.999999252885115, iteration: 110745
loss: 0.9911823868751526,grad_norm: 0.9999992317747459, iteration: 110746
loss: 1.0013779401779175,grad_norm: 0.9282453556958233, iteration: 110747
loss: 0.9701599478721619,grad_norm: 0.9999990990265174, iteration: 110748
loss: 1.0084534883499146,grad_norm: 0.9999992673820245, iteration: 110749
loss: 0.9992586970329285,grad_norm: 0.9999991208777055, iteration: 110750
loss: 1.0168429613113403,grad_norm: 0.8957754447732528, iteration: 110751
loss: 1.0294493436813354,grad_norm: 0.999999118674545, iteration: 110752
loss: 0.9704924821853638,grad_norm: 0.9999990942273338, iteration: 110753
loss: 0.9972774386405945,grad_norm: 0.9999991866072773, iteration: 110754
loss: 1.0313323736190796,grad_norm: 0.9999990824800007, iteration: 110755
loss: 0.9712097644805908,grad_norm: 0.9957385569699301, iteration: 110756
loss: 1.0807849168777466,grad_norm: 0.9944197352784546, iteration: 110757
loss: 0.9785431623458862,grad_norm: 0.9999990462433471, iteration: 110758
loss: 0.9750422239303589,grad_norm: 0.8808296715080491, iteration: 110759
loss: 1.0219255685806274,grad_norm: 0.9999992510591568, iteration: 110760
loss: 0.9827742576599121,grad_norm: 0.999999186363531, iteration: 110761
loss: 0.9963759183883667,grad_norm: 0.9820650029380658, iteration: 110762
loss: 1.0275003910064697,grad_norm: 0.9999990389306199, iteration: 110763
loss: 0.9772241711616516,grad_norm: 0.9366134531499474, iteration: 110764
loss: 1.0153064727783203,grad_norm: 0.9999989092343271, iteration: 110765
loss: 0.9679583311080933,grad_norm: 0.9999992237302697, iteration: 110766
loss: 1.0002782344818115,grad_norm: 0.9999991345634067, iteration: 110767
loss: 0.986085832118988,grad_norm: 0.9999992716226894, iteration: 110768
loss: 0.9798710346221924,grad_norm: 0.9999991209354993, iteration: 110769
loss: 1.0132163763046265,grad_norm: 0.9780619242628249, iteration: 110770
loss: 0.9773566126823425,grad_norm: 0.9894455469789549, iteration: 110771
loss: 1.0186841487884521,grad_norm: 0.9999994303931629, iteration: 110772
loss: 0.9746304154396057,grad_norm: 0.9999989518423275, iteration: 110773
loss: 0.9882737994194031,grad_norm: 0.999999266759778, iteration: 110774
loss: 0.9723212122917175,grad_norm: 0.9070435103261957, iteration: 110775
loss: 0.9739713072776794,grad_norm: 0.9827258550490403, iteration: 110776
loss: 1.0187307596206665,grad_norm: 0.9981471296740896, iteration: 110777
loss: 0.9825485944747925,grad_norm: 0.9999990394395663, iteration: 110778
loss: 0.9945909976959229,grad_norm: 0.9999990519296077, iteration: 110779
loss: 0.980822741985321,grad_norm: 0.9999991563072549, iteration: 110780
loss: 0.9884541630744934,grad_norm: 0.8852657067827804, iteration: 110781
loss: 0.9972290396690369,grad_norm: 0.9999991453900533, iteration: 110782
loss: 0.9926741123199463,grad_norm: 0.9999991323748163, iteration: 110783
loss: 1.011878490447998,grad_norm: 0.9937088267100187, iteration: 110784
loss: 0.9676210284233093,grad_norm: 0.9999990722966957, iteration: 110785
loss: 1.0323998928070068,grad_norm: 0.9065092914819088, iteration: 110786
loss: 0.9998670816421509,grad_norm: 0.9999993129380141, iteration: 110787
loss: 1.0042732954025269,grad_norm: 0.9999989483774366, iteration: 110788
loss: 1.0196664333343506,grad_norm: 0.9817790358099464, iteration: 110789
loss: 1.0343314409255981,grad_norm: 0.9999997300505583, iteration: 110790
loss: 0.9871078729629517,grad_norm: 0.9999991023473137, iteration: 110791
loss: 1.004289150238037,grad_norm: 0.9124165570389, iteration: 110792
loss: 0.9830415844917297,grad_norm: 0.9999990734718227, iteration: 110793
loss: 0.9698144793510437,grad_norm: 0.9999991124577181, iteration: 110794
loss: 0.9709279537200928,grad_norm: 0.9999992128091209, iteration: 110795
loss: 0.9865836501121521,grad_norm: 0.9999993789709236, iteration: 110796
loss: 1.000057339668274,grad_norm: 0.9999990408213965, iteration: 110797
loss: 1.0119824409484863,grad_norm: 0.999999202441997, iteration: 110798
loss: 1.038535475730896,grad_norm: 0.9910066526679999, iteration: 110799
loss: 0.9889858365058899,grad_norm: 0.9258046334378157, iteration: 110800
loss: 0.9818175435066223,grad_norm: 0.99999915857648, iteration: 110801
loss: 1.0176526308059692,grad_norm: 0.9847360233363326, iteration: 110802
loss: 0.9987348318099976,grad_norm: 0.9999991299960491, iteration: 110803
loss: 0.9995496273040771,grad_norm: 0.9999990256673069, iteration: 110804
loss: 1.0033891201019287,grad_norm: 0.9999992099728665, iteration: 110805
loss: 0.9823896288871765,grad_norm: 0.9993974747532226, iteration: 110806
loss: 0.9658036231994629,grad_norm: 0.9608570274442394, iteration: 110807
loss: 0.9949370622634888,grad_norm: 0.9999990852920035, iteration: 110808
loss: 0.9802249670028687,grad_norm: 0.8137601853344049, iteration: 110809
loss: 0.9568116068840027,grad_norm: 0.8222582967475842, iteration: 110810
loss: 1.0209356546401978,grad_norm: 0.999999102367891, iteration: 110811
loss: 1.0324733257293701,grad_norm: 0.8676113218599504, iteration: 110812
loss: 0.9866777062416077,grad_norm: 0.9999991212783136, iteration: 110813
loss: 0.9886398911476135,grad_norm: 0.9999990458931325, iteration: 110814
loss: 0.9655370712280273,grad_norm: 0.9999992742582557, iteration: 110815
loss: 1.0112932920455933,grad_norm: 0.9999990352830402, iteration: 110816
loss: 1.0184659957885742,grad_norm: 0.999999171064927, iteration: 110817
loss: 1.052549123764038,grad_norm: 0.9999992023869524, iteration: 110818
loss: 0.9800686836242676,grad_norm: 0.9999989810819989, iteration: 110819
loss: 1.0024605989456177,grad_norm: 0.999999296608124, iteration: 110820
loss: 0.9779625535011292,grad_norm: 0.8767186427807371, iteration: 110821
loss: 1.0259467363357544,grad_norm: 0.8924818044370245, iteration: 110822
loss: 0.9991637468338013,grad_norm: 0.9999992697341823, iteration: 110823
loss: 0.9806481599807739,grad_norm: 0.9999989489510238, iteration: 110824
loss: 0.9993045330047607,grad_norm: 0.999999134232179, iteration: 110825
loss: 0.9866989254951477,grad_norm: 0.9999991969611348, iteration: 110826
loss: 0.9707391262054443,grad_norm: 0.9307744692852717, iteration: 110827
loss: 0.96271812915802,grad_norm: 0.9569760714576084, iteration: 110828
loss: 1.0135396718978882,grad_norm: 0.9999992819897123, iteration: 110829
loss: 0.982061505317688,grad_norm: 0.9910256600126666, iteration: 110830
loss: 1.001814365386963,grad_norm: 0.8793840076929226, iteration: 110831
loss: 0.9536080956459045,grad_norm: 0.8727837817294523, iteration: 110832
loss: 0.9806432127952576,grad_norm: 0.9999991888823888, iteration: 110833
loss: 0.9982744455337524,grad_norm: 0.9999991174919741, iteration: 110834
loss: 1.027464509010315,grad_norm: 0.9999992164567851, iteration: 110835
loss: 0.9574474096298218,grad_norm: 0.9999991139727655, iteration: 110836
loss: 0.9468433856964111,grad_norm: 0.9999991366222812, iteration: 110837
loss: 0.9855090975761414,grad_norm: 0.9468133457893324, iteration: 110838
loss: 0.9647542834281921,grad_norm: 0.9999990925790698, iteration: 110839
loss: 0.9883682727813721,grad_norm: 0.9380471826969885, iteration: 110840
loss: 0.963171124458313,grad_norm: 0.9999992945536623, iteration: 110841
loss: 1.029897928237915,grad_norm: 0.9999991779916486, iteration: 110842
loss: 1.0324651002883911,grad_norm: 0.9999989618978586, iteration: 110843
loss: 1.0224827527999878,grad_norm: 0.9999990927324023, iteration: 110844
loss: 0.9940279722213745,grad_norm: 0.9999989901698587, iteration: 110845
loss: 1.0387779474258423,grad_norm: 0.9539893793440295, iteration: 110846
loss: 0.994248628616333,grad_norm: 0.999999299328896, iteration: 110847
loss: 1.005386233329773,grad_norm: 0.9999992372232618, iteration: 110848
loss: 0.9804480671882629,grad_norm: 0.9999992117971546, iteration: 110849
loss: 0.9907745718955994,grad_norm: 0.9999990355106244, iteration: 110850
loss: 1.0210667848587036,grad_norm: 0.9569782070114206, iteration: 110851
loss: 1.0154320001602173,grad_norm: 0.9999991535885026, iteration: 110852
loss: 0.982765793800354,grad_norm: 0.9737590497433273, iteration: 110853
loss: 0.982485830783844,grad_norm: 0.9999993522187414, iteration: 110854
loss: 0.9938538074493408,grad_norm: 0.9999990730022577, iteration: 110855
loss: 0.9679567813873291,grad_norm: 0.9999989356998708, iteration: 110856
loss: 0.9711410999298096,grad_norm: 0.9999993319805286, iteration: 110857
loss: 1.0057300329208374,grad_norm: 0.9524536917658094, iteration: 110858
loss: 0.9701019525527954,grad_norm: 0.9999991095836122, iteration: 110859
loss: 1.0304925441741943,grad_norm: 0.9999991848639646, iteration: 110860
loss: 1.0139979124069214,grad_norm: 0.9999992239128406, iteration: 110861
loss: 0.9730307459831238,grad_norm: 0.9999992605399699, iteration: 110862
loss: 1.0099431276321411,grad_norm: 0.9999990912340743, iteration: 110863
loss: 0.9990334510803223,grad_norm: 0.8795685437398042, iteration: 110864
loss: 0.9927619099617004,grad_norm: 0.9999989209708355, iteration: 110865
loss: 0.9672415256500244,grad_norm: 0.9970437529522206, iteration: 110866
loss: 0.9930922985076904,grad_norm: 0.999998981860134, iteration: 110867
loss: 1.0181132555007935,grad_norm: 0.9999990961663033, iteration: 110868
loss: 0.9888958930969238,grad_norm: 0.9828308465536092, iteration: 110869
loss: 0.9894943237304688,grad_norm: 0.99999917657483, iteration: 110870
loss: 0.97023606300354,grad_norm: 0.9999992248631252, iteration: 110871
loss: 0.9800973534584045,grad_norm: 0.9999992248255688, iteration: 110872
loss: 1.0110492706298828,grad_norm: 0.9999991099785727, iteration: 110873
loss: 0.9867048859596252,grad_norm: 0.9999992136061927, iteration: 110874
loss: 0.9911253452301025,grad_norm: 0.9999990247030831, iteration: 110875
loss: 1.0030474662780762,grad_norm: 0.9999991590441126, iteration: 110876
loss: 0.9821652173995972,grad_norm: 0.999999223641968, iteration: 110877
loss: 0.9860316514968872,grad_norm: 0.9999991944190081, iteration: 110878
loss: 0.9584194421768188,grad_norm: 0.9999990360078891, iteration: 110879
loss: 0.9708810448646545,grad_norm: 0.9096002133388827, iteration: 110880
loss: 1.0286242961883545,grad_norm: 0.9999992215014547, iteration: 110881
loss: 0.991841733455658,grad_norm: 0.9999992448223046, iteration: 110882
loss: 0.9650789499282837,grad_norm: 0.9999991997844834, iteration: 110883
loss: 1.0246996879577637,grad_norm: 0.9686639182425912, iteration: 110884
loss: 0.9880980253219604,grad_norm: 0.9999990306103052, iteration: 110885
loss: 1.0029854774475098,grad_norm: 0.9999989434104798, iteration: 110886
loss: 1.0042766332626343,grad_norm: 0.9999990786967492, iteration: 110887
loss: 1.0109002590179443,grad_norm: 0.9999999404749059, iteration: 110888
loss: 0.9839980602264404,grad_norm: 0.9889226188167491, iteration: 110889
loss: 1.0187379121780396,grad_norm: 0.9999990793413965, iteration: 110890
loss: 0.9900129437446594,grad_norm: 0.9849196347924523, iteration: 110891
loss: 0.9735790491104126,grad_norm: 0.9999991521882753, iteration: 110892
loss: 0.9432660341262817,grad_norm: 0.9999992350434378, iteration: 110893
loss: 1.0111026763916016,grad_norm: 0.8030595656253787, iteration: 110894
loss: 0.9955593943595886,grad_norm: 0.9999991786513073, iteration: 110895
loss: 1.0129119157791138,grad_norm: 0.9999991484234324, iteration: 110896
loss: 1.0269711017608643,grad_norm: 0.9197804152148634, iteration: 110897
loss: 1.0319658517837524,grad_norm: 0.9999998099438017, iteration: 110898
loss: 0.9499589800834656,grad_norm: 0.9938469811563652, iteration: 110899
loss: 1.0068907737731934,grad_norm: 0.9999992334843166, iteration: 110900
loss: 0.997427225112915,grad_norm: 0.9999992835405127, iteration: 110901
loss: 0.99486243724823,grad_norm: 0.832364538262896, iteration: 110902
loss: 0.9927298426628113,grad_norm: 0.999999161628804, iteration: 110903
loss: 1.0101635456085205,grad_norm: 0.82417014548062, iteration: 110904
loss: 0.980853259563446,grad_norm: 0.9999991276178097, iteration: 110905
loss: 0.9822260141372681,grad_norm: 0.9999990929821962, iteration: 110906
loss: 1.0296754837036133,grad_norm: 0.9999993070948372, iteration: 110907
loss: 1.0284758806228638,grad_norm: 0.999999209980162, iteration: 110908
loss: 1.0045112371444702,grad_norm: 0.9999991730015073, iteration: 110909
loss: 0.9916681051254272,grad_norm: 0.8911072032652103, iteration: 110910
loss: 1.022186279296875,grad_norm: 0.9999991433134542, iteration: 110911
loss: 1.01704740524292,grad_norm: 0.9159923325168511, iteration: 110912
loss: 1.031412124633789,grad_norm: 0.9368622972167706, iteration: 110913
loss: 1.0370317697525024,grad_norm: 0.9999992288162912, iteration: 110914
loss: 1.0131231546401978,grad_norm: 0.9999990805526455, iteration: 110915
loss: 1.0005298852920532,grad_norm: 0.9449190750774761, iteration: 110916
loss: 0.9890028834342957,grad_norm: 0.9140222903943527, iteration: 110917
loss: 0.9956942200660706,grad_norm: 0.9999992750986331, iteration: 110918
loss: 0.9972267746925354,grad_norm: 0.9999991375263196, iteration: 110919
loss: 0.9809010028839111,grad_norm: 0.999999266295129, iteration: 110920
loss: 1.0362296104431152,grad_norm: 0.9999997114692425, iteration: 110921
loss: 0.9909663200378418,grad_norm: 0.9999989723752238, iteration: 110922
loss: 0.9848032593727112,grad_norm: 0.941701636476009, iteration: 110923
loss: 1.0205600261688232,grad_norm: 0.9999991982122286, iteration: 110924
loss: 0.9887349009513855,grad_norm: 0.9999997134449407, iteration: 110925
loss: 1.0018950700759888,grad_norm: 0.9999990430332352, iteration: 110926
loss: 1.0244842767715454,grad_norm: 0.999999144170521, iteration: 110927
loss: 0.9811171889305115,grad_norm: 0.9222009218836356, iteration: 110928
loss: 1.0074849128723145,grad_norm: 0.9999996667832659, iteration: 110929
loss: 1.0251052379608154,grad_norm: 0.9999991333323861, iteration: 110930
loss: 1.0337491035461426,grad_norm: 0.9999990518843266, iteration: 110931
loss: 0.9811787009239197,grad_norm: 0.999998937541012, iteration: 110932
loss: 1.043597936630249,grad_norm: 0.9999995161081503, iteration: 110933
loss: 1.0468270778656006,grad_norm: 1.0000000360081533, iteration: 110934
loss: 1.0081418752670288,grad_norm: 0.9992052049331922, iteration: 110935
loss: 0.9738648533821106,grad_norm: 0.9999990917453139, iteration: 110936
loss: 0.9873300790786743,grad_norm: 0.9999991311138658, iteration: 110937
loss: 1.0132471323013306,grad_norm: 0.9954642452279765, iteration: 110938
loss: 0.9849828481674194,grad_norm: 0.8410400625472882, iteration: 110939
loss: 0.9886600971221924,grad_norm: 0.9964203977245735, iteration: 110940
loss: 1.0029133558273315,grad_norm: 0.9999990297210208, iteration: 110941
loss: 1.0029034614562988,grad_norm: 0.9999992441679143, iteration: 110942
loss: 0.9520331621170044,grad_norm: 0.9114468832016358, iteration: 110943
loss: 0.9797823429107666,grad_norm: 0.9969263020540222, iteration: 110944
loss: 1.014634370803833,grad_norm: 0.9999990699327812, iteration: 110945
loss: 1.0076775550842285,grad_norm: 0.9443138633636649, iteration: 110946
loss: 0.9617573618888855,grad_norm: 0.9513765801379169, iteration: 110947
loss: 1.0823578834533691,grad_norm: 0.9999991072774107, iteration: 110948
loss: 1.0021438598632812,grad_norm: 0.9930819581992354, iteration: 110949
loss: 0.9862068295478821,grad_norm: 0.8952754549036369, iteration: 110950
loss: 0.9682779908180237,grad_norm: 0.8627590207563636, iteration: 110951
loss: 1.033287763595581,grad_norm: 0.9999998092910891, iteration: 110952
loss: 0.9960570335388184,grad_norm: 0.9689485113466135, iteration: 110953
loss: 1.026635766029358,grad_norm: 0.9999997150924748, iteration: 110954
loss: 0.9894230365753174,grad_norm: 0.9999990248758895, iteration: 110955
loss: 1.0042415857315063,grad_norm: 0.9999991308466121, iteration: 110956
loss: 0.9714846014976501,grad_norm: 0.9476970713026074, iteration: 110957
loss: 1.0265212059020996,grad_norm: 0.9999994386498503, iteration: 110958
loss: 0.9628642797470093,grad_norm: 0.9999993086204563, iteration: 110959
loss: 1.015683889389038,grad_norm: 0.9999997697518923, iteration: 110960
loss: 0.9912159442901611,grad_norm: 0.9929516678875554, iteration: 110961
loss: 0.998178243637085,grad_norm: 0.9999990873947069, iteration: 110962
loss: 1.048730492591858,grad_norm: 0.9891756844737265, iteration: 110963
loss: 1.0389189720153809,grad_norm: 0.9999991473463936, iteration: 110964
loss: 1.015041708946228,grad_norm: 0.9999989285388368, iteration: 110965
loss: 1.0084714889526367,grad_norm: 0.9677473617923618, iteration: 110966
loss: 1.0340348482131958,grad_norm: 0.9999992923868944, iteration: 110967
loss: 1.0131932497024536,grad_norm: 0.9922212680427832, iteration: 110968
loss: 1.0142226219177246,grad_norm: 0.9725959997766779, iteration: 110969
loss: 0.9947289228439331,grad_norm: 0.9336690509008349, iteration: 110970
loss: 1.0097177028656006,grad_norm: 0.9999991241694951, iteration: 110971
loss: 0.9930778741836548,grad_norm: 0.9999991621787998, iteration: 110972
loss: 1.027559518814087,grad_norm: 0.9028557806931485, iteration: 110973
loss: 1.0241960287094116,grad_norm: 0.9999991531893618, iteration: 110974
loss: 0.9840123653411865,grad_norm: 0.9886694075744951, iteration: 110975
loss: 0.9716089963912964,grad_norm: 0.9999990119076289, iteration: 110976
loss: 1.0311839580535889,grad_norm: 0.9999990109974695, iteration: 110977
loss: 0.9768487215042114,grad_norm: 0.9587340246068136, iteration: 110978
loss: 0.9802091717720032,grad_norm: 0.9658720054361831, iteration: 110979
loss: 0.9874347448348999,grad_norm: 0.9999991427768286, iteration: 110980
loss: 0.9808745980262756,grad_norm: 0.999999000547302, iteration: 110981
loss: 1.0061651468276978,grad_norm: 0.9999992479050175, iteration: 110982
loss: 1.0226097106933594,grad_norm: 0.9999991508091025, iteration: 110983
loss: 0.9999485015869141,grad_norm: 0.9999991319735748, iteration: 110984
loss: 1.0082967281341553,grad_norm: 0.9999990737720795, iteration: 110985
loss: 0.9648348689079285,grad_norm: 0.9999990815316193, iteration: 110986
loss: 1.01310133934021,grad_norm: 0.9999991215150198, iteration: 110987
loss: 0.9908478260040283,grad_norm: 0.8530051235050001, iteration: 110988
loss: 1.0435060262680054,grad_norm: 0.9999991422886831, iteration: 110989
loss: 0.9950224757194519,grad_norm: 0.8878220221830068, iteration: 110990
loss: 1.0299320220947266,grad_norm: 0.999999188184483, iteration: 110991
loss: 1.0035836696624756,grad_norm: 0.9999991083460262, iteration: 110992
loss: 1.002928376197815,grad_norm: 0.9999992478263665, iteration: 110993
loss: 1.0076357126235962,grad_norm: 0.9999991787328006, iteration: 110994
loss: 1.017480731010437,grad_norm: 0.9413747285232911, iteration: 110995
loss: 0.9761703610420227,grad_norm: 0.9999991540360696, iteration: 110996
loss: 1.0206491947174072,grad_norm: 0.961906640432933, iteration: 110997
loss: 0.9752435088157654,grad_norm: 0.9999990696897835, iteration: 110998
loss: 0.9938866496086121,grad_norm: 0.9999992634042355, iteration: 110999
loss: 0.9921115636825562,grad_norm: 0.999999182959007, iteration: 111000
loss: 1.003867268562317,grad_norm: 0.9999992763002994, iteration: 111001
loss: 0.9881138801574707,grad_norm: 0.9999991181876561, iteration: 111002
loss: 0.9497902989387512,grad_norm: 0.9999990394783212, iteration: 111003
loss: 0.9968041777610779,grad_norm: 0.9930726401690801, iteration: 111004
loss: 0.9886239171028137,grad_norm: 0.9563376563925057, iteration: 111005
loss: 1.0096365213394165,grad_norm: 0.9279390267911826, iteration: 111006
loss: 0.9827474355697632,grad_norm: 0.9999990775196153, iteration: 111007
loss: 0.9794633984565735,grad_norm: 0.9999990277158156, iteration: 111008
loss: 1.0259674787521362,grad_norm: 0.9458427747562669, iteration: 111009
loss: 0.9742727875709534,grad_norm: 0.9999991889117313, iteration: 111010
loss: 1.002440094947815,grad_norm: 0.9999992448723338, iteration: 111011
loss: 1.0068925619125366,grad_norm: 0.9999993861964215, iteration: 111012
loss: 1.0971192121505737,grad_norm: 0.9999999524670128, iteration: 111013
loss: 0.9849914908409119,grad_norm: 0.9999990219992041, iteration: 111014
loss: 1.0263365507125854,grad_norm: 0.9999990902931567, iteration: 111015
loss: 1.0488117933273315,grad_norm: 0.9702436848304113, iteration: 111016
loss: 1.0190118551254272,grad_norm: 0.9999994388091517, iteration: 111017
loss: 1.0052447319030762,grad_norm: 0.8498993405592018, iteration: 111018
loss: 0.9690113663673401,grad_norm: 0.9999991064686704, iteration: 111019
loss: 0.998817503452301,grad_norm: 0.9999992438012475, iteration: 111020
loss: 0.96646648645401,grad_norm: 0.9999990420347736, iteration: 111021
loss: 1.0072283744812012,grad_norm: 0.9999991031276599, iteration: 111022
loss: 0.9798889756202698,grad_norm: 0.9999990994189449, iteration: 111023
loss: 1.0280139446258545,grad_norm: 0.9999990793287087, iteration: 111024
loss: 1.0066580772399902,grad_norm: 0.9999991434094839, iteration: 111025
loss: 1.025251865386963,grad_norm: 0.9185094773297326, iteration: 111026
loss: 1.02048659324646,grad_norm: 0.9999993148107046, iteration: 111027
loss: 1.0314487218856812,grad_norm: 0.9999991027241834, iteration: 111028
loss: 1.0326725244522095,grad_norm: 0.928930935279078, iteration: 111029
loss: 0.9936278462409973,grad_norm: 0.9999992378105343, iteration: 111030
loss: 1.0024904012680054,grad_norm: 0.9999989547375183, iteration: 111031
loss: 1.0007489919662476,grad_norm: 0.999999012527, iteration: 111032
loss: 0.9864429235458374,grad_norm: 0.9999991879022131, iteration: 111033
loss: 1.0175026655197144,grad_norm: 0.9999999162131052, iteration: 111034
loss: 1.0124411582946777,grad_norm: 0.9999991799062907, iteration: 111035
loss: 0.994276225566864,grad_norm: 0.9999991515912569, iteration: 111036
loss: 1.0362260341644287,grad_norm: 0.999999033064979, iteration: 111037
loss: 0.9881379008293152,grad_norm: 0.9999990944789536, iteration: 111038
loss: 1.096938967704773,grad_norm: 0.9999998581878232, iteration: 111039
loss: 0.9955270886421204,grad_norm: 0.8555598827090658, iteration: 111040
loss: 1.0299221277236938,grad_norm: 0.8645142678862933, iteration: 111041
loss: 0.9964113831520081,grad_norm: 0.999999019467194, iteration: 111042
loss: 1.0300183296203613,grad_norm: 0.9238806022109501, iteration: 111043
loss: 1.0023518800735474,grad_norm: 0.9999991718366505, iteration: 111044
loss: 1.02556312084198,grad_norm: 0.9999991417690433, iteration: 111045
loss: 0.9939706921577454,grad_norm: 0.9635437577202225, iteration: 111046
loss: 0.9759549498558044,grad_norm: 0.9999991988610853, iteration: 111047
loss: 1.0093313455581665,grad_norm: 0.9999989788491052, iteration: 111048
loss: 1.0016450881958008,grad_norm: 0.9999991162684623, iteration: 111049
loss: 1.0161548852920532,grad_norm: 0.9999990277079626, iteration: 111050
loss: 1.011060357093811,grad_norm: 0.9694762769742977, iteration: 111051
loss: 1.0029959678649902,grad_norm: 0.852325447932957, iteration: 111052
loss: 1.0008379220962524,grad_norm: 0.9179106316704262, iteration: 111053
loss: 1.016901969909668,grad_norm: 0.9999993845239195, iteration: 111054
loss: 0.9926726818084717,grad_norm: 0.9999991595600584, iteration: 111055
loss: 0.9834624528884888,grad_norm: 0.9999991350982267, iteration: 111056
loss: 0.9887644052505493,grad_norm: 0.9999990294986171, iteration: 111057
loss: 0.9952272176742554,grad_norm: 0.9999991049888338, iteration: 111058
loss: 1.036794662475586,grad_norm: 0.9999994473373498, iteration: 111059
loss: 0.9751015305519104,grad_norm: 0.999999074779056, iteration: 111060
loss: 1.0042226314544678,grad_norm: 0.9999991481491657, iteration: 111061
loss: 0.9571558833122253,grad_norm: 0.9999990940494989, iteration: 111062
loss: 1.0066148042678833,grad_norm: 0.942040341848162, iteration: 111063
loss: 0.9900198578834534,grad_norm: 0.9999996772899913, iteration: 111064
loss: 1.0213979482650757,grad_norm: 0.928813711644178, iteration: 111065
loss: 0.9823981523513794,grad_norm: 0.8414573950044649, iteration: 111066
loss: 0.9901264905929565,grad_norm: 0.9999991613030474, iteration: 111067
loss: 0.9706906676292419,grad_norm: 0.9999991949722835, iteration: 111068
loss: 1.0096588134765625,grad_norm: 0.9823449111746713, iteration: 111069
loss: 0.9899622201919556,grad_norm: 0.999999102632133, iteration: 111070
loss: 0.999764084815979,grad_norm: 0.9999991353363004, iteration: 111071
loss: 1.0088409185409546,grad_norm: 0.9533633454612408, iteration: 111072
loss: 1.0074512958526611,grad_norm: 0.9999990219672691, iteration: 111073
loss: 1.0344005823135376,grad_norm: 0.9999990620019621, iteration: 111074
loss: 1.0017541646957397,grad_norm: 0.999999089731784, iteration: 111075
loss: 0.9143849611282349,grad_norm: 0.9999991715390889, iteration: 111076
loss: 1.006179928779602,grad_norm: 0.9999990514701125, iteration: 111077
loss: 1.0174652338027954,grad_norm: 0.9999991801501332, iteration: 111078
loss: 1.0200179815292358,grad_norm: 0.999999253619715, iteration: 111079
loss: 1.0275018215179443,grad_norm: 0.9999991533624388, iteration: 111080
loss: 1.0419552326202393,grad_norm: 0.9999995807395398, iteration: 111081
loss: 1.0011920928955078,grad_norm: 0.8606456274056437, iteration: 111082
loss: 1.023446798324585,grad_norm: 0.9999990578913501, iteration: 111083
loss: 1.032340407371521,grad_norm: 0.9999991929705693, iteration: 111084
loss: 1.0566695928573608,grad_norm: 0.9999993783146789, iteration: 111085
loss: 1.0186667442321777,grad_norm: 0.9999990358316933, iteration: 111086
loss: 1.0268551111221313,grad_norm: 0.9999994158690433, iteration: 111087
loss: 0.9853442311286926,grad_norm: 0.9999993044579285, iteration: 111088
loss: 1.0192632675170898,grad_norm: 0.9999992124514162, iteration: 111089
loss: 0.9647383093833923,grad_norm: 0.9999994332401747, iteration: 111090
loss: 0.9594465494155884,grad_norm: 0.999999122477647, iteration: 111091
loss: 1.0193066596984863,grad_norm: 0.9999990248567515, iteration: 111092
loss: 1.001863718032837,grad_norm: 0.9806507165952957, iteration: 111093
loss: 1.0109280347824097,grad_norm: 0.9999991247146641, iteration: 111094
loss: 1.0291701555252075,grad_norm: 0.9439386500808807, iteration: 111095
loss: 0.9808105826377869,grad_norm: 0.9999990821141993, iteration: 111096
loss: 0.9478347897529602,grad_norm: 0.8853230990003957, iteration: 111097
loss: 0.9941841959953308,grad_norm: 0.9999990192917592, iteration: 111098
loss: 1.0079691410064697,grad_norm: 0.999999224315313, iteration: 111099
loss: 1.006040334701538,grad_norm: 0.9821642531573025, iteration: 111100
loss: 1.2612828016281128,grad_norm: 0.9696521036493778, iteration: 111101
loss: 1.0273478031158447,grad_norm: 0.9999992038861572, iteration: 111102
loss: 1.0221656560897827,grad_norm: 0.8390907053337859, iteration: 111103
loss: 1.0268194675445557,grad_norm: 0.9048440324675115, iteration: 111104
loss: 1.0064979791641235,grad_norm: 0.9999998113251021, iteration: 111105
loss: 1.0053255558013916,grad_norm: 0.987883039037311, iteration: 111106
loss: 1.0283894538879395,grad_norm: 0.8966613521850159, iteration: 111107
loss: 1.0159631967544556,grad_norm: 0.9999991060137249, iteration: 111108
loss: 0.9454719424247742,grad_norm: 0.9999991869337279, iteration: 111109
loss: 1.0662803649902344,grad_norm: 0.9999997333892214, iteration: 111110
loss: 1.0326957702636719,grad_norm: 0.9999991795506517, iteration: 111111
loss: 1.0154169797897339,grad_norm: 0.9859629711475119, iteration: 111112
loss: 0.9744925498962402,grad_norm: 0.8066091045881093, iteration: 111113
loss: 0.9627647399902344,grad_norm: 0.8793425060566714, iteration: 111114
loss: 0.9887146353721619,grad_norm: 0.9999995352235436, iteration: 111115
loss: 0.9929285645484924,grad_norm: 0.9999991320804873, iteration: 111116
loss: 0.9617169499397278,grad_norm: 0.9999992204421415, iteration: 111117
loss: 0.9921717047691345,grad_norm: 0.9999988998852513, iteration: 111118
loss: 0.9822970628738403,grad_norm: 0.9999991542257382, iteration: 111119
loss: 1.003149390220642,grad_norm: 0.9439726874121155, iteration: 111120
loss: 1.0218679904937744,grad_norm: 0.9999991872970777, iteration: 111121
loss: 1.0008270740509033,grad_norm: 0.9999990714162599, iteration: 111122
loss: 1.0054223537445068,grad_norm: 0.9999998587650623, iteration: 111123
loss: 0.990999698638916,grad_norm: 0.9999990736868104, iteration: 111124
loss: 1.0050156116485596,grad_norm: 0.9999993319198278, iteration: 111125
loss: 0.9893355369567871,grad_norm: 0.9999992848999584, iteration: 111126
loss: 1.0215126276016235,grad_norm: 0.9999991279517928, iteration: 111127
loss: 1.04135000705719,grad_norm: 0.9999995264036973, iteration: 111128
loss: 0.9649587869644165,grad_norm: 0.9184917159566495, iteration: 111129
loss: 1.0099432468414307,grad_norm: 0.9999990166803667, iteration: 111130
loss: 1.016728162765503,grad_norm: 0.9999992209297944, iteration: 111131
loss: 0.9867514967918396,grad_norm: 0.9040781303324155, iteration: 111132
loss: 1.0050339698791504,grad_norm: 0.9999989702616852, iteration: 111133
loss: 0.9788542985916138,grad_norm: 0.9999991270251627, iteration: 111134
loss: 0.9899166226387024,grad_norm: 0.9999990909383589, iteration: 111135
loss: 1.007055401802063,grad_norm: 0.838291293416603, iteration: 111136
loss: 0.9968227744102478,grad_norm: 0.9999993493079725, iteration: 111137
loss: 1.0104162693023682,grad_norm: 0.999999397500995, iteration: 111138
loss: 1.022666096687317,grad_norm: 0.9999990854893247, iteration: 111139
loss: 1.0255051851272583,grad_norm: 0.9489759698240725, iteration: 111140
loss: 0.9478857517242432,grad_norm: 0.9999989679480568, iteration: 111141
loss: 0.9882703423500061,grad_norm: 0.9118606909914225, iteration: 111142
loss: 1.0786083936691284,grad_norm: 0.9999991859381365, iteration: 111143
loss: 1.0423407554626465,grad_norm: 0.9999991337086276, iteration: 111144
loss: 0.988854169845581,grad_norm: 0.9999991276980132, iteration: 111145
loss: 0.9852309226989746,grad_norm: 0.8751053821892304, iteration: 111146
loss: 0.9835054278373718,grad_norm: 0.9999991110509101, iteration: 111147
loss: 1.0559611320495605,grad_norm: 0.9723724610423738, iteration: 111148
loss: 1.0370267629623413,grad_norm: 0.9999990082157808, iteration: 111149
loss: 0.9792338609695435,grad_norm: 0.9999995526896523, iteration: 111150
loss: 0.9961845278739929,grad_norm: 0.9999990323600482, iteration: 111151
loss: 0.9424871802330017,grad_norm: 0.9698753019794296, iteration: 111152
loss: 1.0276483297348022,grad_norm: 0.9999990969964964, iteration: 111153
loss: 1.0117193460464478,grad_norm: 0.9999995100837626, iteration: 111154
loss: 0.9242526888847351,grad_norm: 0.9999990966239245, iteration: 111155
loss: 1.0328375101089478,grad_norm: 0.9999991990721292, iteration: 111156
loss: 0.9761519432067871,grad_norm: 0.9999995341617379, iteration: 111157
loss: 1.0214686393737793,grad_norm: 0.9999991732712818, iteration: 111158
loss: 1.007819652557373,grad_norm: 0.9999990341634103, iteration: 111159
loss: 1.0375018119812012,grad_norm: 0.9999992907570368, iteration: 111160
loss: 0.9933453798294067,grad_norm: 0.9026338289412426, iteration: 111161
loss: 0.9910079836845398,grad_norm: 0.9999990623982074, iteration: 111162
loss: 0.9552580714225769,grad_norm: 0.9544559483314986, iteration: 111163
loss: 0.996738076210022,grad_norm: 0.9999990065859279, iteration: 111164
loss: 0.996529221534729,grad_norm: 0.9999991542928331, iteration: 111165
loss: 1.0795713663101196,grad_norm: 0.9999991013047621, iteration: 111166
loss: 1.0007745027542114,grad_norm: 0.999999231849004, iteration: 111167
loss: 1.03240168094635,grad_norm: 0.9999992113138776, iteration: 111168
loss: 0.9589604735374451,grad_norm: 0.9364088542906069, iteration: 111169
loss: 1.0974537134170532,grad_norm: 0.9999993474757276, iteration: 111170
loss: 0.9928565621376038,grad_norm: 0.9999995532716607, iteration: 111171
loss: 0.952189564704895,grad_norm: 0.9999990222751309, iteration: 111172
loss: 1.0173230171203613,grad_norm: 0.9071609539328297, iteration: 111173
loss: 0.9657207131385803,grad_norm: 0.9999989243847159, iteration: 111174
loss: 1.0197798013687134,grad_norm: 0.9999992142304661, iteration: 111175
loss: 1.0350333452224731,grad_norm: 0.9999991114187798, iteration: 111176
loss: 0.9859212040901184,grad_norm: 0.9999991834075395, iteration: 111177
loss: 1.0097384452819824,grad_norm: 0.9999990296934441, iteration: 111178
loss: 0.9747910499572754,grad_norm: 0.9999990892138226, iteration: 111179
loss: 1.0307341814041138,grad_norm: 0.8525873030516404, iteration: 111180
loss: 0.9905637502670288,grad_norm: 0.9999991443159545, iteration: 111181
loss: 1.0239816904067993,grad_norm: 0.9999990908921168, iteration: 111182
loss: 1.0225648880004883,grad_norm: 0.9459211351739774, iteration: 111183
loss: 1.0324715375900269,grad_norm: 0.9992011779396267, iteration: 111184
loss: 0.9700450897216797,grad_norm: 0.9691531865083735, iteration: 111185
loss: 1.0036089420318604,grad_norm: 0.9999990663381786, iteration: 111186
loss: 1.0294662714004517,grad_norm: 0.9999990768238067, iteration: 111187
loss: 1.020105242729187,grad_norm: 0.9999990070542621, iteration: 111188
loss: 0.973905622959137,grad_norm: 0.9487005594428412, iteration: 111189
loss: 1.0707354545593262,grad_norm: 0.999999424629967, iteration: 111190
loss: 0.9819576740264893,grad_norm: 0.9733922865463601, iteration: 111191
loss: 0.9963168501853943,grad_norm: 0.9999991839153316, iteration: 111192
loss: 0.9880926609039307,grad_norm: 0.9697473955245168, iteration: 111193
loss: 1.0000817775726318,grad_norm: 0.9999992351076004, iteration: 111194
loss: 1.039262294769287,grad_norm: 0.9999991933238785, iteration: 111195
loss: 1.0617787837982178,grad_norm: 0.99999986550548, iteration: 111196
loss: 0.9873533844947815,grad_norm: 0.9999992872507915, iteration: 111197
loss: 1.0242602825164795,grad_norm: 0.9999992363348105, iteration: 111198
loss: 0.9926981329917908,grad_norm: 0.9715558099624466, iteration: 111199
loss: 0.9710371494293213,grad_norm: 0.9999991441057723, iteration: 111200
loss: 0.9947089552879333,grad_norm: 0.9999991737080076, iteration: 111201
loss: 1.0011122226715088,grad_norm: 0.9999990279172118, iteration: 111202
loss: 0.9923422932624817,grad_norm: 0.9999992079339441, iteration: 111203
loss: 0.9813094735145569,grad_norm: 0.9999993719746855, iteration: 111204
loss: 0.9977879524230957,grad_norm: 0.9999991760261264, iteration: 111205
loss: 1.013231635093689,grad_norm: 0.9999990787558278, iteration: 111206
loss: 0.9759660363197327,grad_norm: 0.9999990419850294, iteration: 111207
loss: 1.0194201469421387,grad_norm: 0.99999917656507, iteration: 111208
loss: 1.039686918258667,grad_norm: 0.9910348527851017, iteration: 111209
loss: 1.029588222503662,grad_norm: 0.9767995217859149, iteration: 111210
loss: 0.9931109547615051,grad_norm: 0.9999990383259453, iteration: 111211
loss: 0.9598456025123596,grad_norm: 0.9999990316847768, iteration: 111212
loss: 0.9940364956855774,grad_norm: 0.9592250255635933, iteration: 111213
loss: 0.9995117783546448,grad_norm: 0.9999992720244752, iteration: 111214
loss: 1.0092670917510986,grad_norm: 0.999999134739178, iteration: 111215
loss: 1.0401231050491333,grad_norm: 0.9759526043894486, iteration: 111216
loss: 1.0220125913619995,grad_norm: 0.9999995267651361, iteration: 111217
loss: 1.0022755861282349,grad_norm: 0.9999991411498803, iteration: 111218
loss: 0.981917142868042,grad_norm: 0.9999990230042385, iteration: 111219
loss: 0.9911474585533142,grad_norm: 0.9999991669290503, iteration: 111220
loss: 0.9996404647827148,grad_norm: 0.9534229311355541, iteration: 111221
loss: 1.0117517709732056,grad_norm: 0.9999992254180571, iteration: 111222
loss: 1.0061107873916626,grad_norm: 0.9321714150132961, iteration: 111223
loss: 0.9839370846748352,grad_norm: 0.999999503384818, iteration: 111224
loss: 1.0070116519927979,grad_norm: 0.9999991977774518, iteration: 111225
loss: 0.9933029413223267,grad_norm: 0.9999991586548238, iteration: 111226
loss: 1.0246118307113647,grad_norm: 0.9999989662921701, iteration: 111227
loss: 0.9845754504203796,grad_norm: 0.999999141239059, iteration: 111228
loss: 1.0282045602798462,grad_norm: 0.9999992291615046, iteration: 111229
loss: 1.0357263088226318,grad_norm: 0.9999991699356343, iteration: 111230
loss: 1.0084105730056763,grad_norm: 0.9999993659135069, iteration: 111231
loss: 0.9747847318649292,grad_norm: 0.9999995099266239, iteration: 111232
loss: 1.0002996921539307,grad_norm: 0.9999992170510131, iteration: 111233
loss: 0.9828569293022156,grad_norm: 0.9999992167102562, iteration: 111234
loss: 1.0146158933639526,grad_norm: 0.9999991452011359, iteration: 111235
loss: 1.0173757076263428,grad_norm: 0.9999993618513209, iteration: 111236
loss: 0.981593906879425,grad_norm: 0.9271883739128386, iteration: 111237
loss: 0.9731763005256653,grad_norm: 0.9999992113721953, iteration: 111238
loss: 0.9804077744483948,grad_norm: 0.977871170883741, iteration: 111239
loss: 0.995455265045166,grad_norm: 0.9999996284593506, iteration: 111240
loss: 0.9772389531135559,grad_norm: 0.9999991175226677, iteration: 111241
loss: 1.0222598314285278,grad_norm: 0.9999991052844855, iteration: 111242
loss: 0.9873050451278687,grad_norm: 0.9999993078102879, iteration: 111243
loss: 0.9827976226806641,grad_norm: 0.9999991363393768, iteration: 111244
loss: 1.0251506567001343,grad_norm: 0.9999990526730583, iteration: 111245
loss: 1.0671526193618774,grad_norm: 0.9999994144288767, iteration: 111246
loss: 0.9787120223045349,grad_norm: 0.9999990713875448, iteration: 111247
loss: 1.0614081621170044,grad_norm: 0.9999993534926362, iteration: 111248
loss: 1.0105656385421753,grad_norm: 0.9999992355825807, iteration: 111249
loss: 1.0240800380706787,grad_norm: 0.9999992105794588, iteration: 111250
loss: 0.984161913394928,grad_norm: 0.9999990504936686, iteration: 111251
loss: 0.9834269285202026,grad_norm: 0.9656557980816824, iteration: 111252
loss: 1.0196462869644165,grad_norm: 0.9999991580712771, iteration: 111253
loss: 1.040000319480896,grad_norm: 0.999999128664701, iteration: 111254
loss: 1.0510748624801636,grad_norm: 0.9999994937860024, iteration: 111255
loss: 1.0143885612487793,grad_norm: 0.9999997698802915, iteration: 111256
loss: 1.001697063446045,grad_norm: 0.9999991199369641, iteration: 111257
loss: 1.0150812864303589,grad_norm: 0.9097400860491534, iteration: 111258
loss: 0.9783920645713806,grad_norm: 0.9999991078607756, iteration: 111259
loss: 0.997969388961792,grad_norm: 0.9999991481771023, iteration: 111260
loss: 0.9858272075653076,grad_norm: 0.9999991204081068, iteration: 111261
loss: 1.012745976448059,grad_norm: 0.9663686133508824, iteration: 111262
loss: 0.9585785865783691,grad_norm: 0.9999990735261584, iteration: 111263
loss: 0.9892669916152954,grad_norm: 0.9070511331171575, iteration: 111264
loss: 0.9918927550315857,grad_norm: 0.9999990960935884, iteration: 111265
loss: 1.0027989149093628,grad_norm: 0.9999991644844579, iteration: 111266
loss: 0.9908018112182617,grad_norm: 0.9970856259924936, iteration: 111267
loss: 1.000153660774231,grad_norm: 0.9999991958264163, iteration: 111268
loss: 0.9794460535049438,grad_norm: 0.9999992299007346, iteration: 111269
loss: 1.0276082754135132,grad_norm: 0.9999989849195526, iteration: 111270
loss: 0.980975866317749,grad_norm: 0.9643479017416491, iteration: 111271
loss: 0.9985945820808411,grad_norm: 0.999998992911777, iteration: 111272
loss: 1.0039135217666626,grad_norm: 0.9999991022973059, iteration: 111273
loss: 1.01079523563385,grad_norm: 0.868732354083999, iteration: 111274
loss: 1.0260674953460693,grad_norm: 0.9999990483234171, iteration: 111275
loss: 1.024285912513733,grad_norm: 0.9999990605162881, iteration: 111276
loss: 1.0069825649261475,grad_norm: 0.9999991220181067, iteration: 111277
loss: 0.9955063462257385,grad_norm: 0.999999720754127, iteration: 111278
loss: 0.9956185817718506,grad_norm: 0.9305688410904701, iteration: 111279
loss: 0.9875127077102661,grad_norm: 0.9999990411549663, iteration: 111280
loss: 1.0198746919631958,grad_norm: 0.9999999238678791, iteration: 111281
loss: 0.9841462969779968,grad_norm: 0.9999993781982698, iteration: 111282
loss: 0.9971863031387329,grad_norm: 0.940794889558009, iteration: 111283
loss: 0.9926366209983826,grad_norm: 0.9902473986499823, iteration: 111284
loss: 0.9936962723731995,grad_norm: 0.9930395698801211, iteration: 111285
loss: 1.0002678632736206,grad_norm: 0.9999991312810356, iteration: 111286
loss: 0.959804117679596,grad_norm: 0.9951982759859942, iteration: 111287
loss: 1.0456856489181519,grad_norm: 0.9999990863518127, iteration: 111288
loss: 0.9900196194648743,grad_norm: 0.9999991906046904, iteration: 111289
loss: 0.9556367993354797,grad_norm: 0.9591936749433441, iteration: 111290
loss: 0.9962831139564514,grad_norm: 0.9999990981293755, iteration: 111291
loss: 1.0109786987304688,grad_norm: 0.9999992936285035, iteration: 111292
loss: 0.9790787100791931,grad_norm: 0.9999991828501424, iteration: 111293
loss: 1.021185278892517,grad_norm: 0.9999991452172925, iteration: 111294
loss: 1.0036683082580566,grad_norm: 0.9999991474940866, iteration: 111295
loss: 0.9792282581329346,grad_norm: 0.8615330682241595, iteration: 111296
loss: 0.9866670370101929,grad_norm: 0.9911832508089397, iteration: 111297
loss: 0.9944819211959839,grad_norm: 0.9358905129978566, iteration: 111298
loss: 1.0266387462615967,grad_norm: 0.9999991397670162, iteration: 111299
loss: 1.0182088613510132,grad_norm: 0.9999989773759865, iteration: 111300
loss: 0.9962588548660278,grad_norm: 0.964540371040009, iteration: 111301
loss: 1.0262113809585571,grad_norm: 0.9999996418354608, iteration: 111302
loss: 1.0038659572601318,grad_norm: 0.9999991290787797, iteration: 111303
loss: 1.0330978631973267,grad_norm: 0.9999991394201135, iteration: 111304
loss: 1.0109424591064453,grad_norm: 0.9999990975380498, iteration: 111305
loss: 1.0142135620117188,grad_norm: 0.9900056019703359, iteration: 111306
loss: 0.9913176894187927,grad_norm: 0.951845861726214, iteration: 111307
loss: 0.9782205820083618,grad_norm: 0.9999990942471731, iteration: 111308
loss: 0.9780750870704651,grad_norm: 0.9634614811285821, iteration: 111309
loss: 0.9519019722938538,grad_norm: 0.9999991956069223, iteration: 111310
loss: 1.0146225690841675,grad_norm: 0.9270238996995303, iteration: 111311
loss: 0.9702064990997314,grad_norm: 0.9876197057405449, iteration: 111312
loss: 0.9824178218841553,grad_norm: 0.9999990451305728, iteration: 111313
loss: 0.9696462750434875,grad_norm: 0.9999990862670012, iteration: 111314
loss: 0.9945804476737976,grad_norm: 0.9999990939720104, iteration: 111315
loss: 1.0325796604156494,grad_norm: 0.9999992103894108, iteration: 111316
loss: 1.0318341255187988,grad_norm: 0.9999990329463765, iteration: 111317
loss: 1.0005860328674316,grad_norm: 0.9999992465686435, iteration: 111318
loss: 1.0338342189788818,grad_norm: 0.9570779174720403, iteration: 111319
loss: 1.0218048095703125,grad_norm: 0.9999993863299984, iteration: 111320
loss: 1.021152377128601,grad_norm: 0.9024599827455905, iteration: 111321
loss: 1.0073000192642212,grad_norm: 0.9999991857100438, iteration: 111322
loss: 0.9672303199768066,grad_norm: 0.8863126825462422, iteration: 111323
loss: 1.034279704093933,grad_norm: 0.9999989780909183, iteration: 111324
loss: 0.9820281863212585,grad_norm: 0.9999991283200628, iteration: 111325
loss: 0.9830995202064514,grad_norm: 0.9287288385305802, iteration: 111326
loss: 1.0147631168365479,grad_norm: 0.9999989602494651, iteration: 111327
loss: 1.039650559425354,grad_norm: 0.9872718596591517, iteration: 111328
loss: 1.0410313606262207,grad_norm: 0.9999994612642512, iteration: 111329
loss: 1.0060163736343384,grad_norm: 0.9999993869218764, iteration: 111330
loss: 1.0077953338623047,grad_norm: 0.9999991818943628, iteration: 111331
loss: 0.9966702461242676,grad_norm: 0.8669744958001099, iteration: 111332
loss: 1.0020039081573486,grad_norm: 0.999999163193437, iteration: 111333
loss: 1.0418795347213745,grad_norm: 0.9999990042368438, iteration: 111334
loss: 0.970454752445221,grad_norm: 0.8735109463792908, iteration: 111335
loss: 1.0301824808120728,grad_norm: 0.9999993484296503, iteration: 111336
loss: 1.0181035995483398,grad_norm: 0.9263905457692717, iteration: 111337
loss: 1.069642424583435,grad_norm: 0.9999992866937523, iteration: 111338
loss: 0.99305260181427,grad_norm: 0.8537293484689334, iteration: 111339
loss: 0.9600257277488708,grad_norm: 0.878123993367991, iteration: 111340
loss: 0.9997027516365051,grad_norm: 0.9564986582353562, iteration: 111341
loss: 1.2866926193237305,grad_norm: 0.9999997236388889, iteration: 111342
loss: 1.051954746246338,grad_norm: 0.9999991026650186, iteration: 111343
loss: 1.0180367231369019,grad_norm: 0.969494294658297, iteration: 111344
loss: 1.089735507965088,grad_norm: 0.9999994581355119, iteration: 111345
loss: 0.9582993388175964,grad_norm: 0.9862858162543751, iteration: 111346
loss: 1.1176581382751465,grad_norm: 0.9999997021769621, iteration: 111347
loss: 1.1499325037002563,grad_norm: 0.9999999512490068, iteration: 111348
loss: 0.9993465542793274,grad_norm: 0.9999991176432907, iteration: 111349
loss: 1.0265573263168335,grad_norm: 0.9670160252656966, iteration: 111350
loss: 1.025529384613037,grad_norm: 0.9999992010569037, iteration: 111351
loss: 0.9806680679321289,grad_norm: 0.9300137008490694, iteration: 111352
loss: 1.0330395698547363,grad_norm: 0.9999991844116801, iteration: 111353
loss: 0.9946962594985962,grad_norm: 0.9999990716674602, iteration: 111354
loss: 1.0109058618545532,grad_norm: 0.9999991546117989, iteration: 111355
loss: 0.9977645874023438,grad_norm: 0.9999990949028481, iteration: 111356
loss: 0.9687525629997253,grad_norm: 0.9964736825003145, iteration: 111357
loss: 1.0102949142456055,grad_norm: 0.9999991511609514, iteration: 111358
loss: 0.9857478141784668,grad_norm: 0.9999989185469252, iteration: 111359
loss: 0.9907795786857605,grad_norm: 0.9999991633724382, iteration: 111360
loss: 1.0085405111312866,grad_norm: 0.9999991930032879, iteration: 111361
loss: 1.059043526649475,grad_norm: 0.9999999642630062, iteration: 111362
loss: 0.985312283039093,grad_norm: 0.9658752834280053, iteration: 111363
loss: 1.023159384727478,grad_norm: 0.9999992181500781, iteration: 111364
loss: 1.0210517644882202,grad_norm: 0.9854770888965153, iteration: 111365
loss: 1.019347906112671,grad_norm: 0.8674249161173906, iteration: 111366
loss: 0.9997574687004089,grad_norm: 0.9999991999500003, iteration: 111367
loss: 1.117720603942871,grad_norm: 0.9999999549394581, iteration: 111368
loss: 0.999104380607605,grad_norm: 0.9999991271962446, iteration: 111369
loss: 0.960639238357544,grad_norm: 0.9999992318249766, iteration: 111370
loss: 1.0166776180267334,grad_norm: 0.9999991310641698, iteration: 111371
loss: 1.0339499711990356,grad_norm: 0.9928166937511843, iteration: 111372
loss: 1.0135809183120728,grad_norm: 0.9999992027762004, iteration: 111373
loss: 1.009432315826416,grad_norm: 0.999999074480519, iteration: 111374
loss: 1.1051582098007202,grad_norm: 0.9999998582365057, iteration: 111375
loss: 0.9987661838531494,grad_norm: 0.9999990285935784, iteration: 111376
loss: 1.0154086351394653,grad_norm: 0.9823096069856323, iteration: 111377
loss: 1.0004241466522217,grad_norm: 0.999999051155977, iteration: 111378
loss: 0.9829674363136292,grad_norm: 0.9999992965052119, iteration: 111379
loss: 1.0215102434158325,grad_norm: 0.9999990699442959, iteration: 111380
loss: 1.0254870653152466,grad_norm: 0.9999991725985283, iteration: 111381
loss: 1.0045554637908936,grad_norm: 0.997241140467814, iteration: 111382
loss: 1.0255752801895142,grad_norm: 0.9999990163140866, iteration: 111383
loss: 1.0113897323608398,grad_norm: 0.9999995589223686, iteration: 111384
loss: 0.9654778242111206,grad_norm: 0.9999990586236297, iteration: 111385
loss: 1.0575487613677979,grad_norm: 0.9699223261273667, iteration: 111386
loss: 0.9972267746925354,grad_norm: 0.9663933047774467, iteration: 111387
loss: 1.0302029848098755,grad_norm: 0.9999991374085958, iteration: 111388
loss: 0.9897338151931763,grad_norm: 0.8786302777870424, iteration: 111389
loss: 0.9955036640167236,grad_norm: 0.9999991368487187, iteration: 111390
loss: 0.9955404996871948,grad_norm: 0.9999991058567288, iteration: 111391
loss: 0.9996423721313477,grad_norm: 0.929452734223334, iteration: 111392
loss: 1.0179333686828613,grad_norm: 0.8993582955640207, iteration: 111393
loss: 0.998341977596283,grad_norm: 0.9999991323156231, iteration: 111394
loss: 0.9627394676208496,grad_norm: 0.9149121330274578, iteration: 111395
loss: 1.0221279859542847,grad_norm: 0.9476228563579271, iteration: 111396
loss: 1.0091875791549683,grad_norm: 0.9999991899299316, iteration: 111397
loss: 0.9876378774642944,grad_norm: 0.9999991310577279, iteration: 111398
loss: 0.9897440671920776,grad_norm: 0.9999990700722298, iteration: 111399
loss: 0.9800364971160889,grad_norm: 0.9809722312408082, iteration: 111400
loss: 0.9852827191352844,grad_norm: 0.9955209006132293, iteration: 111401
loss: 0.9955887198448181,grad_norm: 0.9470549848570777, iteration: 111402
loss: 0.975644052028656,grad_norm: 0.9999989477923527, iteration: 111403
loss: 0.9623234272003174,grad_norm: 0.9999991433958707, iteration: 111404
loss: 0.9693301916122437,grad_norm: 0.9999991489632023, iteration: 111405
loss: 0.9852399826049805,grad_norm: 0.9999992044895806, iteration: 111406
loss: 1.0035595893859863,grad_norm: 0.995366094849194, iteration: 111407
loss: 0.9924407601356506,grad_norm: 0.9999990641919518, iteration: 111408
loss: 1.0411345958709717,grad_norm: 0.9999993770580284, iteration: 111409
loss: 0.9910042881965637,grad_norm: 0.9999989672654737, iteration: 111410
loss: 0.993425190448761,grad_norm: 0.9999988629349134, iteration: 111411
loss: 0.981643795967102,grad_norm: 0.9999990592476132, iteration: 111412
loss: 1.0057132244110107,grad_norm: 0.9999990975205375, iteration: 111413
loss: 1.0020698308944702,grad_norm: 0.9999990318589262, iteration: 111414
loss: 1.0430794954299927,grad_norm: 0.9889216724805578, iteration: 111415
loss: 1.0165938138961792,grad_norm: 0.999999340365111, iteration: 111416
loss: 1.0123748779296875,grad_norm: 0.9999991401824657, iteration: 111417
loss: 0.99859219789505,grad_norm: 0.9999991767998959, iteration: 111418
loss: 0.9746049046516418,grad_norm: 0.9999990143562237, iteration: 111419
loss: 1.0062638521194458,grad_norm: 0.9999990262283517, iteration: 111420
loss: 0.9963626861572266,grad_norm: 0.9999989870942183, iteration: 111421
loss: 0.9874001741409302,grad_norm: 0.999998988421359, iteration: 111422
loss: 0.9821541905403137,grad_norm: 0.931120290653548, iteration: 111423
loss: 1.040147066116333,grad_norm: 0.9999991036580649, iteration: 111424
loss: 0.9821511507034302,grad_norm: 0.9999990446742067, iteration: 111425
loss: 0.9719647169113159,grad_norm: 0.9999991887305164, iteration: 111426
loss: 0.9894365072250366,grad_norm: 0.90798383872177, iteration: 111427
loss: 0.9923211336135864,grad_norm: 0.9939352069069072, iteration: 111428
loss: 0.9941754937171936,grad_norm: 0.9999991993261585, iteration: 111429
loss: 0.9576014280319214,grad_norm: 0.8874478762267335, iteration: 111430
loss: 0.9918432235717773,grad_norm: 0.9999992008708102, iteration: 111431
loss: 1.0288877487182617,grad_norm: 0.9999991259108598, iteration: 111432
loss: 0.968390166759491,grad_norm: 0.9999990255888842, iteration: 111433
loss: 1.0014293193817139,grad_norm: 0.9999992555455418, iteration: 111434
loss: 1.023221492767334,grad_norm: 0.9916551421593072, iteration: 111435
loss: 1.0017142295837402,grad_norm: 0.9999992684400317, iteration: 111436
loss: 1.0199534893035889,grad_norm: 0.9999990765761658, iteration: 111437
loss: 0.9632844924926758,grad_norm: 0.9999991309930174, iteration: 111438
loss: 0.9651824831962585,grad_norm: 0.922382110666418, iteration: 111439
loss: 1.0032920837402344,grad_norm: 0.9999994030806848, iteration: 111440
loss: 0.9997835755348206,grad_norm: 0.989490610711277, iteration: 111441
loss: 0.9810527563095093,grad_norm: 0.9283843312379535, iteration: 111442
loss: 0.9967345595359802,grad_norm: 0.9999989130132922, iteration: 111443
loss: 0.9636601805686951,grad_norm: 0.9999988716082576, iteration: 111444
loss: 1.0499351024627686,grad_norm: 0.9999990872694331, iteration: 111445
loss: 0.9768481254577637,grad_norm: 0.9999991640030639, iteration: 111446
loss: 1.022111415863037,grad_norm: 0.9681976020335676, iteration: 111447
loss: 1.008048176765442,grad_norm: 0.9999992935683506, iteration: 111448
loss: 0.9855726361274719,grad_norm: 0.999998988205913, iteration: 111449
loss: 1.0428920984268188,grad_norm: 0.9999991300245578, iteration: 111450
loss: 0.976702094078064,grad_norm: 0.9888303388614967, iteration: 111451
loss: 1.0127367973327637,grad_norm: 0.9999991635158177, iteration: 111452
loss: 1.0068519115447998,grad_norm: 0.9989173099955372, iteration: 111453
loss: 1.0091933012008667,grad_norm: 0.9999991413256198, iteration: 111454
loss: 1.0011428594589233,grad_norm: 0.9999991162534577, iteration: 111455
loss: 1.028769850730896,grad_norm: 0.9999993239961242, iteration: 111456
loss: 0.9933353662490845,grad_norm: 0.9342632885078063, iteration: 111457
loss: 1.0055644512176514,grad_norm: 0.9999992522358759, iteration: 111458
loss: 0.9896710515022278,grad_norm: 0.9999989930287055, iteration: 111459
loss: 1.0048818588256836,grad_norm: 0.9999993669413269, iteration: 111460
loss: 1.0247578620910645,grad_norm: 0.9999989550629748, iteration: 111461
loss: 1.0146361589431763,grad_norm: 0.9999990392602188, iteration: 111462
loss: 0.9830794334411621,grad_norm: 0.999999133614669, iteration: 111463
loss: 1.0490827560424805,grad_norm: 0.9999991671468895, iteration: 111464
loss: 1.0036978721618652,grad_norm: 0.9850836310726155, iteration: 111465
loss: 1.0020999908447266,grad_norm: 0.9380300558278112, iteration: 111466
loss: 0.9751127362251282,grad_norm: 0.9373038113222482, iteration: 111467
loss: 1.0053406953811646,grad_norm: 0.9571457041164294, iteration: 111468
loss: 1.0169312953948975,grad_norm: 0.9582543489962422, iteration: 111469
loss: 0.9686411619186401,grad_norm: 0.9466996378031538, iteration: 111470
loss: 0.9883002042770386,grad_norm: 0.9999990442637637, iteration: 111471
loss: 0.9911933541297913,grad_norm: 0.9999989809535705, iteration: 111472
loss: 0.9982404708862305,grad_norm: 0.9976255075195205, iteration: 111473
loss: 1.0156465768814087,grad_norm: 0.9630561027533582, iteration: 111474
loss: 1.0245630741119385,grad_norm: 0.9999990004384431, iteration: 111475
loss: 1.0061554908752441,grad_norm: 0.9799599605457456, iteration: 111476
loss: 0.9999842643737793,grad_norm: 0.8306036684704389, iteration: 111477
loss: 1.0022398233413696,grad_norm: 0.9484309148967442, iteration: 111478
loss: 1.0160120725631714,grad_norm: 0.9999990838498448, iteration: 111479
loss: 1.0178048610687256,grad_norm: 0.999999433703255, iteration: 111480
loss: 0.9782189130783081,grad_norm: 0.9999990784665144, iteration: 111481
loss: 0.9950818419456482,grad_norm: 0.9615427935276328, iteration: 111482
loss: 0.9952206611633301,grad_norm: 0.9329499210468243, iteration: 111483
loss: 0.9750656485557556,grad_norm: 0.8494510850696133, iteration: 111484
loss: 1.0245245695114136,grad_norm: 0.9999990318860443, iteration: 111485
loss: 1.0071675777435303,grad_norm: 0.9984220579870032, iteration: 111486
loss: 0.9987481236457825,grad_norm: 0.9978692784634328, iteration: 111487
loss: 1.0330193042755127,grad_norm: 0.9032766064540717, iteration: 111488
loss: 1.0391470193862915,grad_norm: 0.9999989985562329, iteration: 111489
loss: 0.9759160876274109,grad_norm: 0.9999990515418268, iteration: 111490
loss: 1.0228084325790405,grad_norm: 0.999999071452145, iteration: 111491
loss: 0.9621652960777283,grad_norm: 0.99536336618046, iteration: 111492
loss: 0.97780442237854,grad_norm: 0.9999992099497699, iteration: 111493
loss: 0.986711323261261,grad_norm: 0.9999992523368556, iteration: 111494
loss: 0.9803386330604553,grad_norm: 0.9747767705705689, iteration: 111495
loss: 1.011824131011963,grad_norm: 0.9999991892360063, iteration: 111496
loss: 1.0161865949630737,grad_norm: 0.9999992705709115, iteration: 111497
loss: 1.0000391006469727,grad_norm: 0.8671029344096908, iteration: 111498
loss: 0.9866204261779785,grad_norm: 0.8813914729192194, iteration: 111499
loss: 1.0048621892929077,grad_norm: 0.9999998775042905, iteration: 111500
loss: 1.0145081281661987,grad_norm: 0.9999992401121699, iteration: 111501
loss: 1.0225579738616943,grad_norm: 0.9999990722253567, iteration: 111502
loss: 1.0310786962509155,grad_norm: 0.9999991423090573, iteration: 111503
loss: 0.9711036086082458,grad_norm: 0.9920679446987202, iteration: 111504
loss: 1.025975227355957,grad_norm: 0.9999998419028396, iteration: 111505
loss: 1.1536980867385864,grad_norm: 0.999999519874836, iteration: 111506
loss: 1.0554994344711304,grad_norm: 0.999999493677416, iteration: 111507
loss: 0.9854762554168701,grad_norm: 0.9999991169251454, iteration: 111508
loss: 1.0062479972839355,grad_norm: 0.9999991047068917, iteration: 111509
loss: 1.018722653388977,grad_norm: 0.9999991251097213, iteration: 111510
loss: 1.0313361883163452,grad_norm: 0.9999991654288591, iteration: 111511
loss: 0.9969819784164429,grad_norm: 0.999999077350444, iteration: 111512
loss: 1.047261118888855,grad_norm: 0.9999993615087452, iteration: 111513
loss: 1.0321484804153442,grad_norm: 0.999999135112374, iteration: 111514
loss: 1.0254064798355103,grad_norm: 0.8029952205374499, iteration: 111515
loss: 1.021120548248291,grad_norm: 0.9485540275480342, iteration: 111516
loss: 0.9911787509918213,grad_norm: 0.9999990140559996, iteration: 111517
loss: 0.9928255081176758,grad_norm: 0.940804283038515, iteration: 111518
loss: 1.0005919933319092,grad_norm: 0.9878346348350134, iteration: 111519
loss: 1.003008246421814,grad_norm: 0.883301091044137, iteration: 111520
loss: 0.9860252141952515,grad_norm: 0.9999997327134593, iteration: 111521
loss: 1.0300673246383667,grad_norm: 0.9999992255333197, iteration: 111522
loss: 1.0083904266357422,grad_norm: 0.9265096081393475, iteration: 111523
loss: 1.009666919708252,grad_norm: 0.9999990953224169, iteration: 111524
loss: 0.9525817632675171,grad_norm: 0.9479656294166255, iteration: 111525
loss: 1.009143352508545,grad_norm: 0.9999994961254426, iteration: 111526
loss: 1.0016738176345825,grad_norm: 0.9999988343306567, iteration: 111527
loss: 1.0107755661010742,grad_norm: 0.999999090081808, iteration: 111528
loss: 1.00173819065094,grad_norm: 0.9459371410862186, iteration: 111529
loss: 1.0245803594589233,grad_norm: 0.8255222606764115, iteration: 111530
loss: 0.9989727735519409,grad_norm: 0.9778635435298554, iteration: 111531
loss: 0.9998512864112854,grad_norm: 0.969036150106119, iteration: 111532
loss: 1.0046802759170532,grad_norm: 0.999999214793348, iteration: 111533
loss: 0.9771633744239807,grad_norm: 0.9999991722910593, iteration: 111534
loss: 1.0017544031143188,grad_norm: 0.9999991272423677, iteration: 111535
loss: 0.9746690392494202,grad_norm: 0.9999990950461987, iteration: 111536
loss: 0.9997830390930176,grad_norm: 0.9999991531708877, iteration: 111537
loss: 1.0192078351974487,grad_norm: 0.9999992348775766, iteration: 111538
loss: 0.9901476502418518,grad_norm: 0.9999991348166082, iteration: 111539
loss: 1.0151540040969849,grad_norm: 0.8675870708463544, iteration: 111540
loss: 1.0209683179855347,grad_norm: 0.9310082184907709, iteration: 111541
loss: 0.9797629714012146,grad_norm: 0.9999991037670312, iteration: 111542
loss: 0.9979180693626404,grad_norm: 0.9999990061878872, iteration: 111543
loss: 1.0239038467407227,grad_norm: 0.9999992542968452, iteration: 111544
loss: 0.9945839047431946,grad_norm: 0.9999990663049723, iteration: 111545
loss: 0.9688282012939453,grad_norm: 0.9999991667564728, iteration: 111546
loss: 1.0020557641983032,grad_norm: 0.99999896377463, iteration: 111547
loss: 1.038241982460022,grad_norm: 0.9999992013501208, iteration: 111548
loss: 1.0588170289993286,grad_norm: 0.9999997754704895, iteration: 111549
loss: 1.0314688682556152,grad_norm: 0.9529329626412456, iteration: 111550
loss: 0.9866982102394104,grad_norm: 0.9999992283864653, iteration: 111551
loss: 0.9719572067260742,grad_norm: 0.9999991545518612, iteration: 111552
loss: 1.0140966176986694,grad_norm: 0.8809473884872424, iteration: 111553
loss: 1.049024224281311,grad_norm: 0.9999997027456566, iteration: 111554
loss: 1.0140949487686157,grad_norm: 0.9603281808380852, iteration: 111555
loss: 0.9682826995849609,grad_norm: 0.9921949586649643, iteration: 111556
loss: 0.9785895943641663,grad_norm: 0.9999991268510395, iteration: 111557
loss: 1.0381282567977905,grad_norm: 0.9999996602859276, iteration: 111558
loss: 1.042796015739441,grad_norm: 0.9999995119138697, iteration: 111559
loss: 1.014667272567749,grad_norm: 0.9999992223187388, iteration: 111560
loss: 1.198444128036499,grad_norm: 0.9999993104134066, iteration: 111561
loss: 1.0199955701828003,grad_norm: 0.9999991943410428, iteration: 111562
loss: 0.9872470498085022,grad_norm: 0.9981524305492068, iteration: 111563
loss: 1.002182126045227,grad_norm: 0.928151366908638, iteration: 111564
loss: 1.0029605627059937,grad_norm: 0.9999990271877639, iteration: 111565
loss: 0.9633718132972717,grad_norm: 0.9999990652423384, iteration: 111566
loss: 1.0029423236846924,grad_norm: 0.9999991721475854, iteration: 111567
loss: 1.0015712976455688,grad_norm: 0.9725838465369211, iteration: 111568
loss: 1.0836554765701294,grad_norm: 0.9999998724182935, iteration: 111569
loss: 0.9929484724998474,grad_norm: 0.9999990212290949, iteration: 111570
loss: 1.0844758749008179,grad_norm: 0.9999991823000054, iteration: 111571
loss: 1.047103762626648,grad_norm: 0.9999992132826147, iteration: 111572
loss: 1.0214545726776123,grad_norm: 0.9999994487101876, iteration: 111573
loss: 1.006150722503662,grad_norm: 0.9999991598029443, iteration: 111574
loss: 0.9851836562156677,grad_norm: 0.960663158976377, iteration: 111575
loss: 0.9846211075782776,grad_norm: 0.999999043408027, iteration: 111576
loss: 1.0116266012191772,grad_norm: 0.9999991011870218, iteration: 111577
loss: 0.9770215749740601,grad_norm: 0.9284017761891105, iteration: 111578
loss: 0.9916699528694153,grad_norm: 0.9999993919884915, iteration: 111579
loss: 1.0169563293457031,grad_norm: 0.9999992304018811, iteration: 111580
loss: 1.0214143991470337,grad_norm: 0.9999991514990252, iteration: 111581
loss: 1.0155569314956665,grad_norm: 0.9999991067252382, iteration: 111582
loss: 0.9957014322280884,grad_norm: 0.9629047904632411, iteration: 111583
loss: 0.980486273765564,grad_norm: 0.9999991533348205, iteration: 111584
loss: 0.9940353631973267,grad_norm: 0.9999991035665008, iteration: 111585
loss: 0.9719844460487366,grad_norm: 0.9999990117843434, iteration: 111586
loss: 0.9823330044746399,grad_norm: 0.9569913642209502, iteration: 111587
loss: 1.0103545188903809,grad_norm: 0.9999992080732749, iteration: 111588
loss: 0.9888269305229187,grad_norm: 0.9999991688909017, iteration: 111589
loss: 1.008692741394043,grad_norm: 0.9858800629700407, iteration: 111590
loss: 1.0002844333648682,grad_norm: 0.9999991024134941, iteration: 111591
loss: 1.00387704372406,grad_norm: 0.999999093331613, iteration: 111592
loss: 1.002732515335083,grad_norm: 0.9661038602408605, iteration: 111593
loss: 0.991190493106842,grad_norm: 0.9999992514048928, iteration: 111594
loss: 0.9901097416877747,grad_norm: 0.9999992875821421, iteration: 111595
loss: 0.9916659593582153,grad_norm: 0.9999991378401833, iteration: 111596
loss: 0.9947438836097717,grad_norm: 0.999999223856897, iteration: 111597
loss: 0.9947818517684937,grad_norm: 0.9999989032008691, iteration: 111598
loss: 0.9786034226417542,grad_norm: 0.9999992595849096, iteration: 111599
loss: 0.9842552542686462,grad_norm: 0.9968756536522952, iteration: 111600
loss: 1.0135071277618408,grad_norm: 0.9999991838329322, iteration: 111601
loss: 0.9920952320098877,grad_norm: 0.9999991774494091, iteration: 111602
loss: 0.991220235824585,grad_norm: 0.8944810097487169, iteration: 111603
loss: 1.0196870565414429,grad_norm: 0.9999990895291334, iteration: 111604
loss: 1.0149239301681519,grad_norm: 0.9999992838725913, iteration: 111605
loss: 1.0120939016342163,grad_norm: 0.9386476827587088, iteration: 111606
loss: 0.995648205280304,grad_norm: 0.9999990009929031, iteration: 111607
loss: 0.9882297515869141,grad_norm: 0.9999991982366425, iteration: 111608
loss: 0.9884577393531799,grad_norm: 0.9999991146121863, iteration: 111609
loss: 1.0256624221801758,grad_norm: 0.9999991552508396, iteration: 111610
loss: 0.9781659841537476,grad_norm: 0.9999990340285896, iteration: 111611
loss: 1.036927342414856,grad_norm: 0.9442568663119841, iteration: 111612
loss: 0.9951319694519043,grad_norm: 0.975671977373145, iteration: 111613
loss: 0.9814684391021729,grad_norm: 0.9999989753847184, iteration: 111614
loss: 0.994938850402832,grad_norm: 0.9396211986791152, iteration: 111615
loss: 1.02493417263031,grad_norm: 0.9886192733377721, iteration: 111616
loss: 1.0107274055480957,grad_norm: 0.9999996154982909, iteration: 111617
loss: 1.0011178255081177,grad_norm: 0.9999992086709782, iteration: 111618
loss: 0.9953455924987793,grad_norm: 0.9999991921334054, iteration: 111619
loss: 1.0142340660095215,grad_norm: 0.999999181795359, iteration: 111620
loss: 1.0291303396224976,grad_norm: 0.999999148426452, iteration: 111621
loss: 1.0053210258483887,grad_norm: 0.9999990348798043, iteration: 111622
loss: 1.0059847831726074,grad_norm: 0.9999992804345805, iteration: 111623
loss: 0.993916928768158,grad_norm: 0.999999054834273, iteration: 111624
loss: 1.0290422439575195,grad_norm: 0.920794334488012, iteration: 111625
loss: 0.967609167098999,grad_norm: 0.9999992598572576, iteration: 111626
loss: 1.0204399824142456,grad_norm: 0.9999992414929202, iteration: 111627
loss: 1.0076292753219604,grad_norm: 0.9998519903785557, iteration: 111628
loss: 1.0223114490509033,grad_norm: 0.9999990648215503, iteration: 111629
loss: 1.004698395729065,grad_norm: 0.9999990420201742, iteration: 111630
loss: 0.9987984895706177,grad_norm: 0.9999990151350764, iteration: 111631
loss: 0.9855982661247253,grad_norm: 0.9478575206264087, iteration: 111632
loss: 0.9933288097381592,grad_norm: 0.9999991614221354, iteration: 111633
loss: 0.9836331605911255,grad_norm: 0.9999991379706172, iteration: 111634
loss: 1.0241832733154297,grad_norm: 0.9999991883725853, iteration: 111635
loss: 1.0190180540084839,grad_norm: 0.9999992346410004, iteration: 111636
loss: 0.9956688284873962,grad_norm: 0.9999991532386701, iteration: 111637
loss: 1.0017755031585693,grad_norm: 0.8715885784870057, iteration: 111638
loss: 1.0117300748825073,grad_norm: 0.85532875248603, iteration: 111639
loss: 0.9601669907569885,grad_norm: 0.9040438008939445, iteration: 111640
loss: 1.033099889755249,grad_norm: 0.8677131568127554, iteration: 111641
loss: 0.9455225467681885,grad_norm: 0.9893794968182014, iteration: 111642
loss: 0.983476459980011,grad_norm: 0.999998953848895, iteration: 111643
loss: 0.9822544455528259,grad_norm: 0.9863975701700828, iteration: 111644
loss: 0.998207688331604,grad_norm: 0.999999125000358, iteration: 111645
loss: 1.093287706375122,grad_norm: 0.9999993483080363, iteration: 111646
loss: 1.0010172128677368,grad_norm: 0.9373662764423658, iteration: 111647
loss: 0.9959480166435242,grad_norm: 0.9914094824694336, iteration: 111648
loss: 1.0139905214309692,grad_norm: 0.9999992555936613, iteration: 111649
loss: 1.0269923210144043,grad_norm: 0.9999990622530045, iteration: 111650
loss: 1.0128051042556763,grad_norm: 0.999999144984429, iteration: 111651
loss: 1.0095528364181519,grad_norm: 0.8825461557646505, iteration: 111652
loss: 0.9856570363044739,grad_norm: 0.9999991386872337, iteration: 111653
loss: 1.007910966873169,grad_norm: 0.9999991230663622, iteration: 111654
loss: 0.9712128639221191,grad_norm: 0.9999991225808359, iteration: 111655
loss: 1.021841049194336,grad_norm: 0.9594989668665648, iteration: 111656
loss: 0.9927775859832764,grad_norm: 0.9999990598487418, iteration: 111657
loss: 0.9942631125450134,grad_norm: 0.9999992815543208, iteration: 111658
loss: 1.0380635261535645,grad_norm: 0.9999991167271429, iteration: 111659
loss: 1.0251729488372803,grad_norm: 0.9488069832315458, iteration: 111660
loss: 1.0030745267868042,grad_norm: 0.9999990244617005, iteration: 111661
loss: 0.9920647144317627,grad_norm: 0.9999990887016948, iteration: 111662
loss: 0.9959641098976135,grad_norm: 0.9999990651031696, iteration: 111663
loss: 0.9790463447570801,grad_norm: 0.9675150104903021, iteration: 111664
loss: 1.0478368997573853,grad_norm: 0.9999992357305011, iteration: 111665
loss: 0.9531379342079163,grad_norm: 0.961896184078641, iteration: 111666
loss: 1.0010873079299927,grad_norm: 0.9999992491744724, iteration: 111667
loss: 0.9770539402961731,grad_norm: 0.999999132680857, iteration: 111668
loss: 1.0361016988754272,grad_norm: 0.9999988655267534, iteration: 111669
loss: 1.0193480253219604,grad_norm: 0.9369227906501933, iteration: 111670
loss: 1.0024272203445435,grad_norm: 0.8306809533269792, iteration: 111671
loss: 1.0206661224365234,grad_norm: 0.9999991015185764, iteration: 111672
loss: 0.9961707592010498,grad_norm: 0.9999996939307724, iteration: 111673
loss: 1.0468579530715942,grad_norm: 0.9999991544570849, iteration: 111674
loss: 1.0029644966125488,grad_norm: 0.9999990953748799, iteration: 111675
loss: 0.9944263696670532,grad_norm: 0.9999991309605143, iteration: 111676
loss: 0.9599276781082153,grad_norm: 0.9554673665764751, iteration: 111677
loss: 0.9806756973266602,grad_norm: 0.9999990728168686, iteration: 111678
loss: 0.9839319586753845,grad_norm: 0.9545137155461287, iteration: 111679
loss: 1.0155776739120483,grad_norm: 0.9830824993459474, iteration: 111680
loss: 1.0175248384475708,grad_norm: 0.9999988972102468, iteration: 111681
loss: 0.9979218244552612,grad_norm: 0.999999126273357, iteration: 111682
loss: 0.9981913566589355,grad_norm: 0.9999990517835984, iteration: 111683
loss: 0.9644872546195984,grad_norm: 0.9999990269042833, iteration: 111684
loss: 1.0132118463516235,grad_norm: 0.9999991135583364, iteration: 111685
loss: 0.9990023970603943,grad_norm: 0.9815741059107287, iteration: 111686
loss: 1.0056483745574951,grad_norm: 0.9760200969863096, iteration: 111687
loss: 1.0079015493392944,grad_norm: 0.8977628139733171, iteration: 111688
loss: 1.0240623950958252,grad_norm: 0.9999992955006056, iteration: 111689
loss: 0.9800460338592529,grad_norm: 0.9999989430492927, iteration: 111690
loss: 1.0167701244354248,grad_norm: 0.9417112949987255, iteration: 111691
loss: 0.9686002731323242,grad_norm: 0.9999991621989585, iteration: 111692
loss: 0.9923518896102905,grad_norm: 0.9999990455188831, iteration: 111693
loss: 1.0040756464004517,grad_norm: 0.9999991611050913, iteration: 111694
loss: 1.0355875492095947,grad_norm: 0.9999991446671073, iteration: 111695
loss: 1.0111331939697266,grad_norm: 0.9483289189751248, iteration: 111696
loss: 1.00943124294281,grad_norm: 0.9999992555909127, iteration: 111697
loss: 0.9386257529258728,grad_norm: 0.9999991520139333, iteration: 111698
loss: 0.9974245429039001,grad_norm: 0.9999990794392328, iteration: 111699
loss: 0.9805715680122375,grad_norm: 0.999999238275312, iteration: 111700
loss: 0.9879617094993591,grad_norm: 0.9999990781056907, iteration: 111701
loss: 1.0089980363845825,grad_norm: 0.9999991333093684, iteration: 111702
loss: 1.0554667711257935,grad_norm: 0.999999601560234, iteration: 111703
loss: 1.0156830549240112,grad_norm: 0.9999999721523768, iteration: 111704
loss: 0.9864068627357483,grad_norm: 0.8414161053601824, iteration: 111705
loss: 0.9898946285247803,grad_norm: 0.9999991283356422, iteration: 111706
loss: 0.9918736815452576,grad_norm: 0.999999067506875, iteration: 111707
loss: 1.0101356506347656,grad_norm: 0.9999991070462658, iteration: 111708
loss: 0.9643424153327942,grad_norm: 0.9999991721468499, iteration: 111709
loss: 0.9850772023200989,grad_norm: 0.999998993876284, iteration: 111710
loss: 0.9848670363426208,grad_norm: 0.8675895489790749, iteration: 111711
loss: 1.0062482357025146,grad_norm: 0.993506318005714, iteration: 111712
loss: 1.0317046642303467,grad_norm: 0.999999181567058, iteration: 111713
loss: 1.020967721939087,grad_norm: 0.8732270492701862, iteration: 111714
loss: 0.9992138147354126,grad_norm: 0.987604266200838, iteration: 111715
loss: 1.0018510818481445,grad_norm: 0.9999991361832136, iteration: 111716
loss: 0.9973881840705872,grad_norm: 0.9999990571153555, iteration: 111717
loss: 1.0189785957336426,grad_norm: 0.9999990735889438, iteration: 111718
loss: 1.0530147552490234,grad_norm: 0.9999992145993286, iteration: 111719
loss: 0.9533185958862305,grad_norm: 0.9999992057514951, iteration: 111720
loss: 0.991519033908844,grad_norm: 0.8775101244582716, iteration: 111721
loss: 0.9971535205841064,grad_norm: 0.889833774912406, iteration: 111722
loss: 1.0448671579360962,grad_norm: 0.9999991783738142, iteration: 111723
loss: 0.9698743224143982,grad_norm: 0.9583343599775672, iteration: 111724
loss: 1.0812492370605469,grad_norm: 0.9999992049375757, iteration: 111725
loss: 1.017045497894287,grad_norm: 0.9427886020426197, iteration: 111726
loss: 0.9725933074951172,grad_norm: 0.9999992185124907, iteration: 111727
loss: 0.9942860007286072,grad_norm: 0.9999991638925757, iteration: 111728
loss: 1.0159415006637573,grad_norm: 0.9999991085601017, iteration: 111729
loss: 1.0179903507232666,grad_norm: 0.9999991222742772, iteration: 111730
loss: 1.030870795249939,grad_norm: 0.9999991850791725, iteration: 111731
loss: 1.0014686584472656,grad_norm: 0.9828385795876116, iteration: 111732
loss: 1.0616717338562012,grad_norm: 0.9999998635119792, iteration: 111733
loss: 1.0058940649032593,grad_norm: 0.9660505281689432, iteration: 111734
loss: 1.0145988464355469,grad_norm: 0.9455382221828337, iteration: 111735
loss: 1.0253280401229858,grad_norm: 0.9525313226676434, iteration: 111736
loss: 0.9945740103721619,grad_norm: 0.9999991065011182, iteration: 111737
loss: 1.0265952348709106,grad_norm: 0.9999993529894398, iteration: 111738
loss: 0.9683532118797302,grad_norm: 0.9999990943742461, iteration: 111739
loss: 1.0609803199768066,grad_norm: 0.9961286923654273, iteration: 111740
loss: 1.0154629945755005,grad_norm: 0.9999994921125517, iteration: 111741
loss: 0.9879317283630371,grad_norm: 0.9775697296927166, iteration: 111742
loss: 1.0437225103378296,grad_norm: 0.9999998595822411, iteration: 111743
loss: 1.0196714401245117,grad_norm: 0.9999992474237729, iteration: 111744
loss: 0.9951954483985901,grad_norm: 0.9999992086702282, iteration: 111745
loss: 0.9659281969070435,grad_norm: 0.9999992202948609, iteration: 111746
loss: 0.9983669519424438,grad_norm: 0.9400160900082445, iteration: 111747
loss: 0.999224841594696,grad_norm: 0.8756073273869905, iteration: 111748
loss: 1.023663878440857,grad_norm: 0.9895566636301193, iteration: 111749
loss: 1.0583516359329224,grad_norm: 0.9999999050203832, iteration: 111750
loss: 0.9662392139434814,grad_norm: 0.9999989492001232, iteration: 111751
loss: 1.0062270164489746,grad_norm: 0.9999992128559421, iteration: 111752
loss: 0.9965721964836121,grad_norm: 0.9616965939469483, iteration: 111753
loss: 0.9872959852218628,grad_norm: 0.9999991785130341, iteration: 111754
loss: 1.003015160560608,grad_norm: 0.9999992257685153, iteration: 111755
loss: 0.9867371916770935,grad_norm: 0.9377750418560802, iteration: 111756
loss: 1.0194684267044067,grad_norm: 0.999999313104126, iteration: 111757
loss: 0.9829617142677307,grad_norm: 0.9999989639538256, iteration: 111758
loss: 1.005895972251892,grad_norm: 0.9270668314372046, iteration: 111759
loss: 0.9796373248100281,grad_norm: 0.9999989372762693, iteration: 111760
loss: 1.038426160812378,grad_norm: 0.9999991334269743, iteration: 111761
loss: 0.9736544489860535,grad_norm: 0.9999992043513714, iteration: 111762
loss: 1.0075687170028687,grad_norm: 0.9999992520799194, iteration: 111763
loss: 0.9980676174163818,grad_norm: 0.9633989187484108, iteration: 111764
loss: 0.9540292620658875,grad_norm: 0.9999989933698495, iteration: 111765
loss: 0.9973852038383484,grad_norm: 0.9999993219086887, iteration: 111766
loss: 0.9788336753845215,grad_norm: 0.9999990767680795, iteration: 111767
loss: 1.002824306488037,grad_norm: 0.9999992569494452, iteration: 111768
loss: 0.9823599457740784,grad_norm: 0.999999047070325, iteration: 111769
loss: 0.951537013053894,grad_norm: 0.9646842207036042, iteration: 111770
loss: 0.9714128971099854,grad_norm: 0.9999991858793141, iteration: 111771
loss: 1.0543206930160522,grad_norm: 0.999999269584445, iteration: 111772
loss: 1.031312108039856,grad_norm: 0.9999991365957751, iteration: 111773
loss: 0.9778686165809631,grad_norm: 0.9999991719327699, iteration: 111774
loss: 1.0145955085754395,grad_norm: 0.9999989966331116, iteration: 111775
loss: 1.001627802848816,grad_norm: 0.966671996410289, iteration: 111776
loss: 1.0542877912521362,grad_norm: 0.8433659417531871, iteration: 111777
loss: 0.9888546466827393,grad_norm: 0.8765272819922897, iteration: 111778
loss: 0.9730560779571533,grad_norm: 0.8976950705083746, iteration: 111779
loss: 0.9417552947998047,grad_norm: 0.9999991368804313, iteration: 111780
loss: 1.0191861391067505,grad_norm: 0.9999990829617428, iteration: 111781
loss: 0.977977991104126,grad_norm: 0.9127955183014398, iteration: 111782
loss: 0.9329257011413574,grad_norm: 0.999998988689481, iteration: 111783
loss: 1.0233769416809082,grad_norm: 0.999999035222776, iteration: 111784
loss: 1.0110923051834106,grad_norm: 0.9999991140449354, iteration: 111785
loss: 0.9786864519119263,grad_norm: 0.972320890477554, iteration: 111786
loss: 0.9885995984077454,grad_norm: 0.9247886457013919, iteration: 111787
loss: 1.0177663564682007,grad_norm: 0.9999995469366935, iteration: 111788
loss: 0.9981521368026733,grad_norm: 0.8977631509268716, iteration: 111789
loss: 1.0092180967330933,grad_norm: 0.9999989926285126, iteration: 111790
loss: 1.00069260597229,grad_norm: 0.8455138982178173, iteration: 111791
loss: 1.015692114830017,grad_norm: 0.9999991932023072, iteration: 111792
loss: 1.0225396156311035,grad_norm: 0.9999998894324822, iteration: 111793
loss: 0.9632218480110168,grad_norm: 0.9999991653598626, iteration: 111794
loss: 0.978952169418335,grad_norm: 0.8553074019846181, iteration: 111795
loss: 1.0448588132858276,grad_norm: 0.9999999327989649, iteration: 111796
loss: 0.9921984076499939,grad_norm: 0.9217067933375936, iteration: 111797
loss: 1.0075023174285889,grad_norm: 0.9835061921528555, iteration: 111798
loss: 1.0392016172409058,grad_norm: 0.9995476353387731, iteration: 111799
loss: 0.9923186302185059,grad_norm: 0.9663019641846557, iteration: 111800
loss: 0.9649251103401184,grad_norm: 0.9774307889334655, iteration: 111801
loss: 0.9666877388954163,grad_norm: 0.919198885754258, iteration: 111802
loss: 0.9748677611351013,grad_norm: 0.9739624299116085, iteration: 111803
loss: 1.0097334384918213,grad_norm: 0.9999990761681452, iteration: 111804
loss: 0.9530754089355469,grad_norm: 0.9895571543980757, iteration: 111805
loss: 1.0128601789474487,grad_norm: 0.9999990947759483, iteration: 111806
loss: 1.031436800956726,grad_norm: 0.9999990038163274, iteration: 111807
loss: 1.0337425470352173,grad_norm: 0.9999990014827437, iteration: 111808
loss: 1.030083179473877,grad_norm: 0.9999992797866499, iteration: 111809
loss: 0.9732510447502136,grad_norm: 0.9999991672784131, iteration: 111810
loss: 1.0133123397827148,grad_norm: 0.9999991762127299, iteration: 111811
loss: 1.0002366304397583,grad_norm: 0.9999992148468875, iteration: 111812
loss: 1.0053012371063232,grad_norm: 0.9999990201572382, iteration: 111813
loss: 0.9957687258720398,grad_norm: 0.9999991549935446, iteration: 111814
loss: 1.0039548873901367,grad_norm: 0.9546598535740575, iteration: 111815
loss: 1.009514331817627,grad_norm: 0.9800960418537578, iteration: 111816
loss: 0.9953266978263855,grad_norm: 0.9656630340894161, iteration: 111817
loss: 1.0024975538253784,grad_norm: 0.915498760302813, iteration: 111818
loss: 1.0001580715179443,grad_norm: 0.9587723636803659, iteration: 111819
loss: 1.005366325378418,grad_norm: 0.9999993531348043, iteration: 111820
loss: 0.9625245928764343,grad_norm: 0.9999989511637403, iteration: 111821
loss: 0.9829872250556946,grad_norm: 0.999999173336811, iteration: 111822
loss: 1.0039056539535522,grad_norm: 0.9999990706574197, iteration: 111823
loss: 0.9982569217681885,grad_norm: 0.9999990941658068, iteration: 111824
loss: 1.0032581090927124,grad_norm: 0.9999992343707109, iteration: 111825
loss: 0.9608996510505676,grad_norm: 0.9999991168829983, iteration: 111826
loss: 0.983975887298584,grad_norm: 0.9310130501517577, iteration: 111827
loss: 0.988265335559845,grad_norm: 0.9999989616333883, iteration: 111828
loss: 0.9981815218925476,grad_norm: 0.8383323844483446, iteration: 111829
loss: 0.9818668365478516,grad_norm: 0.9479748618351075, iteration: 111830
loss: 0.962726354598999,grad_norm: 0.88043242925244, iteration: 111831
loss: 0.990657389163971,grad_norm: 0.9999991719124761, iteration: 111832
loss: 1.0073145627975464,grad_norm: 0.9999991328693072, iteration: 111833
loss: 0.9883601665496826,grad_norm: 0.9999991893144954, iteration: 111834
loss: 1.0054675340652466,grad_norm: 0.9999993128649726, iteration: 111835
loss: 1.0392465591430664,grad_norm: 0.9427524438848038, iteration: 111836
loss: 1.0146372318267822,grad_norm: 0.9510673085343327, iteration: 111837
loss: 1.0446096658706665,grad_norm: 0.9999994315100721, iteration: 111838
loss: 0.9645722508430481,grad_norm: 0.9999991126866764, iteration: 111839
loss: 0.9757990837097168,grad_norm: 0.9529052427202316, iteration: 111840
loss: 0.9913443326950073,grad_norm: 0.9999990379590512, iteration: 111841
loss: 1.0351524353027344,grad_norm: 0.8636617760107445, iteration: 111842
loss: 1.008117437362671,grad_norm: 0.999999247721788, iteration: 111843
loss: 0.9608747363090515,grad_norm: 0.9999991298932387, iteration: 111844
loss: 1.0019978284835815,grad_norm: 0.8969424834659978, iteration: 111845
loss: 1.0296931266784668,grad_norm: 0.9579804880322105, iteration: 111846
loss: 1.0041604042053223,grad_norm: 0.9999990644798826, iteration: 111847
loss: 0.9983300566673279,grad_norm: 0.9999995673419279, iteration: 111848
loss: 0.9574207663536072,grad_norm: 0.9222144761323893, iteration: 111849
loss: 0.9851517677307129,grad_norm: 0.9999990947499918, iteration: 111850
loss: 0.9591317176818848,grad_norm: 0.9999990329525107, iteration: 111851
loss: 1.01199209690094,grad_norm: 0.9999991459790196, iteration: 111852
loss: 0.9988101124763489,grad_norm: 0.9999990016276212, iteration: 111853
loss: 0.9748358726501465,grad_norm: 0.9999996998682761, iteration: 111854
loss: 0.9556073546409607,grad_norm: 0.9567194397807409, iteration: 111855
loss: 1.081001877784729,grad_norm: 0.999999396282968, iteration: 111856
loss: 1.0293641090393066,grad_norm: 0.9764456365024629, iteration: 111857
loss: 1.0359735488891602,grad_norm: 0.9999991295515887, iteration: 111858
loss: 0.9790830016136169,grad_norm: 0.9999991023146333, iteration: 111859
loss: 0.9884005784988403,grad_norm: 0.9942155233230071, iteration: 111860
loss: 1.028875470161438,grad_norm: 0.9999991392755795, iteration: 111861
loss: 0.986467719078064,grad_norm: 0.975125494031883, iteration: 111862
loss: 0.978665828704834,grad_norm: 0.8306396172207318, iteration: 111863
loss: 1.0195871591567993,grad_norm: 0.9714450329267035, iteration: 111864
loss: 0.9651209712028503,grad_norm: 0.9999991484017915, iteration: 111865
loss: 1.035056233406067,grad_norm: 0.9999992834976701, iteration: 111866
loss: 1.0063642263412476,grad_norm: 0.9717672804552137, iteration: 111867
loss: 1.0016142129898071,grad_norm: 0.9999992685531808, iteration: 111868
loss: 1.0177236795425415,grad_norm: 0.9999991723084923, iteration: 111869
loss: 0.9788411259651184,grad_norm: 0.9999991243445053, iteration: 111870
loss: 0.9750522971153259,grad_norm: 0.9999991622985969, iteration: 111871
loss: 0.9736148118972778,grad_norm: 0.8048707582830574, iteration: 111872
loss: 0.998620867729187,grad_norm: 0.9999991011690285, iteration: 111873
loss: 0.987781822681427,grad_norm: 0.9999991828910507, iteration: 111874
loss: 0.9911418557167053,grad_norm: 0.9999990589604393, iteration: 111875
loss: 1.0040574073791504,grad_norm: 0.905998277889422, iteration: 111876
loss: 1.0085490942001343,grad_norm: 0.9999992306666946, iteration: 111877
loss: 0.9901002645492554,grad_norm: 0.9178430505874736, iteration: 111878
loss: 1.0217046737670898,grad_norm: 0.9414777651166548, iteration: 111879
loss: 0.9752762317657471,grad_norm: 0.993295419237396, iteration: 111880
loss: 1.0319780111312866,grad_norm: 0.9999992284015856, iteration: 111881
loss: 0.948456883430481,grad_norm: 0.999999091057981, iteration: 111882
loss: 0.9733924865722656,grad_norm: 0.9999991075073857, iteration: 111883
loss: 1.013154149055481,grad_norm: 0.9895461302699653, iteration: 111884
loss: 1.0250380039215088,grad_norm: 0.9999992096268007, iteration: 111885
loss: 0.9747549891471863,grad_norm: 0.9999992215620119, iteration: 111886
loss: 1.002907633781433,grad_norm: 0.9999999066974065, iteration: 111887
loss: 1.0188571214675903,grad_norm: 0.9347132562056516, iteration: 111888
loss: 0.9380435943603516,grad_norm: 0.9999990680737902, iteration: 111889
loss: 1.0645711421966553,grad_norm: 0.9999992266802352, iteration: 111890
loss: 1.01352059841156,grad_norm: 0.8972002693645187, iteration: 111891
loss: 1.0329021215438843,grad_norm: 0.9999992448694216, iteration: 111892
loss: 1.0099096298217773,grad_norm: 0.9999994030536071, iteration: 111893
loss: 0.9904590845108032,grad_norm: 0.9999991957032228, iteration: 111894
loss: 0.9937320351600647,grad_norm: 0.999999149674409, iteration: 111895
loss: 0.9935240745544434,grad_norm: 0.9233935397528503, iteration: 111896
loss: 0.9953407049179077,grad_norm: 0.9999990310120548, iteration: 111897
loss: 0.9681611657142639,grad_norm: 0.8327851269401966, iteration: 111898
loss: 0.9662238359451294,grad_norm: 0.8741532767811012, iteration: 111899
loss: 1.0273367166519165,grad_norm: 0.8874085520862398, iteration: 111900
loss: 1.0101078748703003,grad_norm: 0.9999991300975185, iteration: 111901
loss: 1.0276401042938232,grad_norm: 0.9266620499929293, iteration: 111902
loss: 1.023926854133606,grad_norm: 0.9541723228400646, iteration: 111903
loss: 1.0483990907669067,grad_norm: 0.8988535180141963, iteration: 111904
loss: 1.0073517560958862,grad_norm: 0.9999991002088079, iteration: 111905
loss: 0.9971421957015991,grad_norm: 0.9999992432700632, iteration: 111906
loss: 1.0340415239334106,grad_norm: 0.9999992149024695, iteration: 111907
loss: 1.0033454895019531,grad_norm: 0.9692574408601498, iteration: 111908
loss: 1.021533727645874,grad_norm: 0.9999992219724932, iteration: 111909
loss: 1.0101969242095947,grad_norm: 0.8662225265499848, iteration: 111910
loss: 1.0581363439559937,grad_norm: 0.9999999278311261, iteration: 111911
loss: 1.0274263620376587,grad_norm: 0.9999993257958079, iteration: 111912
loss: 0.9740400910377502,grad_norm: 0.9999991470054009, iteration: 111913
loss: 0.9960863590240479,grad_norm: 0.9999989338838158, iteration: 111914
loss: 0.9933883547782898,grad_norm: 0.9524544692644161, iteration: 111915
loss: 0.9885016679763794,grad_norm: 0.9999990918344738, iteration: 111916
loss: 0.9980813264846802,grad_norm: 0.9999991908778809, iteration: 111917
loss: 0.9573574662208557,grad_norm: 0.9390877960212459, iteration: 111918
loss: 0.9632098078727722,grad_norm: 0.9999991301667948, iteration: 111919
loss: 0.9895422458648682,grad_norm: 0.9999990447978622, iteration: 111920
loss: 0.9964707493782043,grad_norm: 0.9203605585409497, iteration: 111921
loss: 0.9842771291732788,grad_norm: 0.9999990707348158, iteration: 111922
loss: 1.004583716392517,grad_norm: 0.9999990224423967, iteration: 111923
loss: 1.0102332830429077,grad_norm: 0.9999991749529757, iteration: 111924
loss: 0.9884076118469238,grad_norm: 0.9999990845961318, iteration: 111925
loss: 1.0433149337768555,grad_norm: 0.9985897816780849, iteration: 111926
loss: 0.9903867840766907,grad_norm: 0.9864443875428859, iteration: 111927
loss: 0.9981945157051086,grad_norm: 0.9585012880209925, iteration: 111928
loss: 1.0040644407272339,grad_norm: 0.9266773317375774, iteration: 111929
loss: 1.0027834177017212,grad_norm: 0.9999993670637626, iteration: 111930
loss: 0.9685875773429871,grad_norm: 0.999999178308747, iteration: 111931
loss: 1.0034379959106445,grad_norm: 0.9999991047414032, iteration: 111932
loss: 1.0290993452072144,grad_norm: 0.9999991188770361, iteration: 111933
loss: 0.9870160222053528,grad_norm: 0.8489801116005106, iteration: 111934
loss: 0.9740453958511353,grad_norm: 0.999999363778135, iteration: 111935
loss: 0.9965850710868835,grad_norm: 0.9999992288693843, iteration: 111936
loss: 0.9863573908805847,grad_norm: 0.9473805326470077, iteration: 111937
loss: 1.0134631395339966,grad_norm: 0.8716548987381125, iteration: 111938
loss: 1.011478304862976,grad_norm: 0.9793440567170373, iteration: 111939
loss: 0.9348275065422058,grad_norm: 0.9999992292750648, iteration: 111940
loss: 1.0209999084472656,grad_norm: 0.9729472415913842, iteration: 111941
loss: 1.0315967798233032,grad_norm: 0.9999991234532932, iteration: 111942
loss: 1.0016872882843018,grad_norm: 0.9999991418529843, iteration: 111943
loss: 1.0262573957443237,grad_norm: 0.9999996296488599, iteration: 111944
loss: 1.0132097005844116,grad_norm: 0.999999572625866, iteration: 111945
loss: 1.0183360576629639,grad_norm: 0.999999067085637, iteration: 111946
loss: 0.9934821724891663,grad_norm: 0.9999991721776725, iteration: 111947
loss: 0.9908401966094971,grad_norm: 0.9575673218227873, iteration: 111948
loss: 1.0480502843856812,grad_norm: 0.9999992219408601, iteration: 111949
loss: 1.0506573915481567,grad_norm: 0.9999990815541544, iteration: 111950
loss: 1.0295904874801636,grad_norm: 0.9999994341485934, iteration: 111951
loss: 0.9801187515258789,grad_norm: 0.943588342347503, iteration: 111952
loss: 0.9980076551437378,grad_norm: 0.9999990508294345, iteration: 111953
loss: 0.9831815361976624,grad_norm: 0.9332740883329509, iteration: 111954
loss: 1.0427182912826538,grad_norm: 0.9999993240439776, iteration: 111955
loss: 0.9806770086288452,grad_norm: 0.9999992555270447, iteration: 111956
loss: 0.9999125599861145,grad_norm: 0.8432154544885039, iteration: 111957
loss: 0.9967402815818787,grad_norm: 0.9999990516034928, iteration: 111958
loss: 0.9845380783081055,grad_norm: 0.9948497018842363, iteration: 111959
loss: 0.9915144443511963,grad_norm: 0.999999225078897, iteration: 111960
loss: 0.9685856699943542,grad_norm: 0.9999992480600228, iteration: 111961
loss: 1.0213207006454468,grad_norm: 0.9999991444888638, iteration: 111962
loss: 0.9827880263328552,grad_norm: 0.9243999667085812, iteration: 111963
loss: 1.023693323135376,grad_norm: 0.9999991359767686, iteration: 111964
loss: 0.9759054780006409,grad_norm: 0.8969807368504119, iteration: 111965
loss: 0.9849184155464172,grad_norm: 0.8866271655593386, iteration: 111966
loss: 0.9739207625389099,grad_norm: 0.9471379823485527, iteration: 111967
loss: 0.949454665184021,grad_norm: 0.9999993085920114, iteration: 111968
loss: 0.9639800190925598,grad_norm: 0.9999990109016373, iteration: 111969
loss: 0.9993103742599487,grad_norm: 0.9827264190119918, iteration: 111970
loss: 1.0125094652175903,grad_norm: 0.9455423468526103, iteration: 111971
loss: 1.03227961063385,grad_norm: 0.9999989290940313, iteration: 111972
loss: 1.0040329694747925,grad_norm: 0.9817428952640042, iteration: 111973
loss: 1.0013164281845093,grad_norm: 0.9999991673927905, iteration: 111974
loss: 0.9987881183624268,grad_norm: 0.9606394037676242, iteration: 111975
loss: 1.0383081436157227,grad_norm: 0.9999991528557519, iteration: 111976
loss: 0.9839661121368408,grad_norm: 0.9999990716309985, iteration: 111977
loss: 0.9986273050308228,grad_norm: 0.9999995416408466, iteration: 111978
loss: 0.9588379859924316,grad_norm: 0.9999991196181646, iteration: 111979
loss: 1.0185911655426025,grad_norm: 0.9999992180493907, iteration: 111980
loss: 1.0162397623062134,grad_norm: 0.9999990138902043, iteration: 111981
loss: 1.0083303451538086,grad_norm: 0.8157463284711579, iteration: 111982
loss: 0.9926825761795044,grad_norm: 0.9999991730624617, iteration: 111983
loss: 1.0438182353973389,grad_norm: 0.9999991394510218, iteration: 111984
loss: 0.9925472140312195,grad_norm: 0.9999992796758056, iteration: 111985
loss: 0.9939702749252319,grad_norm: 0.9999989921037995, iteration: 111986
loss: 1.0074748992919922,grad_norm: 0.9911689279594321, iteration: 111987
loss: 0.9963219165802002,grad_norm: 0.8439154991154284, iteration: 111988
loss: 0.9614486694335938,grad_norm: 0.9999991295793592, iteration: 111989
loss: 0.9973322153091431,grad_norm: 0.9999991338350527, iteration: 111990
loss: 0.9987306594848633,grad_norm: 0.9999990417726541, iteration: 111991
loss: 0.9668054580688477,grad_norm: 0.8558980906675384, iteration: 111992
loss: 1.0185627937316895,grad_norm: 0.999999245788687, iteration: 111993
loss: 0.9689382910728455,grad_norm: 0.9999989596474466, iteration: 111994
loss: 0.9911845326423645,grad_norm: 0.9999991371219259, iteration: 111995
loss: 1.0015846490859985,grad_norm: 0.9999990193820106, iteration: 111996
loss: 1.000231146812439,grad_norm: 0.9999990002221855, iteration: 111997
loss: 0.987392783164978,grad_norm: 0.929496022115361, iteration: 111998
loss: 0.9711113572120667,grad_norm: 0.9776629232594231, iteration: 111999
loss: 1.0198907852172852,grad_norm: 0.999999177393739, iteration: 112000
loss: 0.944494903087616,grad_norm: 0.9999991797278079, iteration: 112001
loss: 1.038685917854309,grad_norm: 0.9999992265979188, iteration: 112002
loss: 1.001001238822937,grad_norm: 0.8712829128923839, iteration: 112003
loss: 1.0171164274215698,grad_norm: 0.9158977408766832, iteration: 112004
loss: 1.002691626548767,grad_norm: 0.9342836800281545, iteration: 112005
loss: 1.0046617984771729,grad_norm: 0.9999992687972188, iteration: 112006
loss: 1.004596471786499,grad_norm: 0.991541122462685, iteration: 112007
loss: 1.0053894519805908,grad_norm: 0.8150845954150613, iteration: 112008
loss: 0.9926970601081848,grad_norm: 0.9860849139109749, iteration: 112009
loss: 1.0284851789474487,grad_norm: 0.9999991173221583, iteration: 112010
loss: 1.0314512252807617,grad_norm: 0.999998997004209, iteration: 112011
loss: 0.9718543887138367,grad_norm: 0.9999991823631125, iteration: 112012
loss: 1.0017389059066772,grad_norm: 0.9999992030427871, iteration: 112013
loss: 0.9866997599601746,grad_norm: 0.9302409689501361, iteration: 112014
loss: 0.9848005771636963,grad_norm: 0.9999990408637223, iteration: 112015
loss: 1.01140296459198,grad_norm: 0.9199967665392382, iteration: 112016
loss: 1.0090113878250122,grad_norm: 0.9999991634089632, iteration: 112017
loss: 1.0124553442001343,grad_norm: 0.999999145361433, iteration: 112018
loss: 1.0345181226730347,grad_norm: 0.9999991364818452, iteration: 112019
loss: 1.0105379819869995,grad_norm: 0.9368528201697299, iteration: 112020
loss: 0.9809207320213318,grad_norm: 0.9596284940259953, iteration: 112021
loss: 1.0635863542556763,grad_norm: 0.9999992229010781, iteration: 112022
loss: 1.0204730033874512,grad_norm: 0.9999991821154874, iteration: 112023
loss: 1.0036286115646362,grad_norm: 0.9999991969672185, iteration: 112024
loss: 0.9911491870880127,grad_norm: 0.9999994349305602, iteration: 112025
loss: 0.9992355704307556,grad_norm: 0.9076918758440494, iteration: 112026
loss: 0.9887146353721619,grad_norm: 0.9632198289334302, iteration: 112027
loss: 1.1039851903915405,grad_norm: 0.9999999037091877, iteration: 112028
loss: 1.0133559703826904,grad_norm: 0.9999990897490719, iteration: 112029
loss: 0.990075409412384,grad_norm: 0.9999993237932148, iteration: 112030
loss: 0.9921432137489319,grad_norm: 0.9999991570437132, iteration: 112031
loss: 1.0013691186904907,grad_norm: 0.9999990295979487, iteration: 112032
loss: 0.9675562977790833,grad_norm: 0.999999148236787, iteration: 112033
loss: 0.9929074645042419,grad_norm: 0.9228712468990251, iteration: 112034
loss: 0.987781822681427,grad_norm: 0.9818398995464027, iteration: 112035
loss: 0.9600192904472351,grad_norm: 0.9999991483106189, iteration: 112036
loss: 0.9708432555198669,grad_norm: 0.9692859171196322, iteration: 112037
loss: 0.9935165643692017,grad_norm: 0.9999990193604689, iteration: 112038
loss: 1.0084965229034424,grad_norm: 0.9999991405508482, iteration: 112039
loss: 0.9944305419921875,grad_norm: 0.8996571395992768, iteration: 112040
loss: 0.9906297326087952,grad_norm: 0.9999997023623028, iteration: 112041
loss: 1.0109028816223145,grad_norm: 0.9999991174222456, iteration: 112042
loss: 1.0121111869812012,grad_norm: 0.9999992369934476, iteration: 112043
loss: 1.037127137184143,grad_norm: 0.9999989317921737, iteration: 112044
loss: 0.9321867823600769,grad_norm: 0.999999083884826, iteration: 112045
loss: 0.9745489358901978,grad_norm: 0.9999990228529574, iteration: 112046
loss: 1.0078599452972412,grad_norm: 0.9999990190663789, iteration: 112047
loss: 0.9896907806396484,grad_norm: 0.87015465410371, iteration: 112048
loss: 1.0147180557250977,grad_norm: 0.9999990905342849, iteration: 112049
loss: 1.027169108390808,grad_norm: 0.9999995837608383, iteration: 112050
loss: 1.0049744844436646,grad_norm: 0.9828076900312817, iteration: 112051
loss: 0.9979993104934692,grad_norm: 0.9999989865767809, iteration: 112052
loss: 1.0025633573532104,grad_norm: 0.9999990506090234, iteration: 112053
loss: 0.9980536699295044,grad_norm: 0.9999990228519776, iteration: 112054
loss: 1.0079880952835083,grad_norm: 0.9999992068770764, iteration: 112055
loss: 0.9877586960792542,grad_norm: 0.9999990473185499, iteration: 112056
loss: 1.0254509449005127,grad_norm: 0.9999990478876261, iteration: 112057
loss: 1.0285979509353638,grad_norm: 0.9999990008134162, iteration: 112058
loss: 0.9867152571678162,grad_norm: 0.9642159741578893, iteration: 112059
loss: 0.9995729327201843,grad_norm: 0.9034742054390896, iteration: 112060
loss: 0.9752891063690186,grad_norm: 0.9999996327769692, iteration: 112061
loss: 0.9478709101676941,grad_norm: 0.999998992901605, iteration: 112062
loss: 0.9775830507278442,grad_norm: 0.9999991936552787, iteration: 112063
loss: 0.9739249348640442,grad_norm: 0.9999989527359506, iteration: 112064
loss: 0.9935639500617981,grad_norm: 0.9999992153161986, iteration: 112065
loss: 1.0056276321411133,grad_norm: 0.9999991561180458, iteration: 112066
loss: 0.9900081753730774,grad_norm: 0.930162423496611, iteration: 112067
loss: 1.0323748588562012,grad_norm: 0.8100348942705196, iteration: 112068
loss: 0.979677140712738,grad_norm: 0.9636589609785589, iteration: 112069
loss: 0.9869388341903687,grad_norm: 0.9999990734584615, iteration: 112070
loss: 0.9685835838317871,grad_norm: 0.8863314073966515, iteration: 112071
loss: 0.9767491221427917,grad_norm: 0.9216541739921272, iteration: 112072
loss: 0.9808675646781921,grad_norm: 0.99999920533347, iteration: 112073
loss: 1.0035123825073242,grad_norm: 0.9999990637477664, iteration: 112074
loss: 1.005252480506897,grad_norm: 0.9999990723810137, iteration: 112075
loss: 0.9656347632408142,grad_norm: 0.9999994315438835, iteration: 112076
loss: 1.0009218454360962,grad_norm: 0.9508216364129688, iteration: 112077
loss: 0.9597615599632263,grad_norm: 0.9999991586480509, iteration: 112078
loss: 0.9626322388648987,grad_norm: 0.9999990232115228, iteration: 112079
loss: 1.0050511360168457,grad_norm: 0.9823870251769391, iteration: 112080
loss: 1.0089141130447388,grad_norm: 0.9685251436933897, iteration: 112081
loss: 1.0129048824310303,grad_norm: 0.9341670899493101, iteration: 112082
loss: 1.002129077911377,grad_norm: 0.9999991131921584, iteration: 112083
loss: 1.0002481937408447,grad_norm: 0.9353930018972267, iteration: 112084
loss: 1.0319900512695312,grad_norm: 0.9999998506936089, iteration: 112085
loss: 0.9756757616996765,grad_norm: 0.9999991011488492, iteration: 112086
loss: 0.9722036719322205,grad_norm: 0.9999991858096042, iteration: 112087
loss: 0.9687343239784241,grad_norm: 0.9380743137799944, iteration: 112088
loss: 0.9698617458343506,grad_norm: 0.9999991480614123, iteration: 112089
loss: 0.9977859258651733,grad_norm: 0.9853465290758245, iteration: 112090
loss: 0.9794671535491943,grad_norm: 0.9021727207802103, iteration: 112091
loss: 1.0105315446853638,grad_norm: 0.9999992408195804, iteration: 112092
loss: 0.9878202080726624,grad_norm: 0.9666834344845651, iteration: 112093
loss: 1.0238444805145264,grad_norm: 0.9999993401281827, iteration: 112094
loss: 0.986263632774353,grad_norm: 0.9410537954754792, iteration: 112095
loss: 1.0094726085662842,grad_norm: 0.8952920084088386, iteration: 112096
loss: 0.9833703637123108,grad_norm: 0.9999991023805858, iteration: 112097
loss: 1.0084059238433838,grad_norm: 0.9999992964355525, iteration: 112098
loss: 0.9704172015190125,grad_norm: 0.9108632550740642, iteration: 112099
loss: 0.9923051595687866,grad_norm: 0.8798760940829319, iteration: 112100
loss: 0.9917638301849365,grad_norm: 0.820422187550696, iteration: 112101
loss: 1.0078558921813965,grad_norm: 0.9999992239783355, iteration: 112102
loss: 1.004745364189148,grad_norm: 0.9415392907197726, iteration: 112103
loss: 0.9664963483810425,grad_norm: 0.9999991291868044, iteration: 112104
loss: 0.9966209530830383,grad_norm: 0.860464621748564, iteration: 112105
loss: 0.9769750237464905,grad_norm: 0.9999991708018887, iteration: 112106
loss: 1.0046781301498413,grad_norm: 0.9377997071354044, iteration: 112107
loss: 1.005550742149353,grad_norm: 0.9999990899177267, iteration: 112108
loss: 1.0186892747879028,grad_norm: 0.9999993799592247, iteration: 112109
loss: 0.9917066693305969,grad_norm: 0.999998942204926, iteration: 112110
loss: 1.0295004844665527,grad_norm: 0.9999993255875794, iteration: 112111
loss: 0.9302550554275513,grad_norm: 0.9999993205464774, iteration: 112112
loss: 0.9653723239898682,grad_norm: 0.9999990649915008, iteration: 112113
loss: 1.0049998760223389,grad_norm: 0.9602853429803764, iteration: 112114
loss: 0.9969587922096252,grad_norm: 0.9772256045202012, iteration: 112115
loss: 0.9924095273017883,grad_norm: 0.9999990171103668, iteration: 112116
loss: 1.0224182605743408,grad_norm: 0.9999992816222898, iteration: 112117
loss: 0.9812548756599426,grad_norm: 0.9999991561044344, iteration: 112118
loss: 1.003646969795227,grad_norm: 0.9999991966094837, iteration: 112119
loss: 0.9750756621360779,grad_norm: 0.9999989947869733, iteration: 112120
loss: 0.9686849117279053,grad_norm: 0.9999988462018243, iteration: 112121
loss: 1.013490915298462,grad_norm: 0.9999991542389922, iteration: 112122
loss: 1.0124456882476807,grad_norm: 0.9999991627325718, iteration: 112123
loss: 1.0081822872161865,grad_norm: 0.9999992677584816, iteration: 112124
loss: 0.9771479368209839,grad_norm: 0.8993049931628038, iteration: 112125
loss: 0.957435667514801,grad_norm: 0.9876088499956428, iteration: 112126
loss: 0.9997450709342957,grad_norm: 0.8861129212923379, iteration: 112127
loss: 1.0217697620391846,grad_norm: 0.9999991334815047, iteration: 112128
loss: 0.9921764731407166,grad_norm: 0.9999994775246929, iteration: 112129
loss: 0.9945153594017029,grad_norm: 0.9600883166362443, iteration: 112130
loss: 1.029473066329956,grad_norm: 0.999999085500091, iteration: 112131
loss: 0.9923657774925232,grad_norm: 0.9999991586395363, iteration: 112132
loss: 0.9705530405044556,grad_norm: 0.9999992619657353, iteration: 112133
loss: 0.9911787509918213,grad_norm: 0.9999990807298005, iteration: 112134
loss: 0.9739123582839966,grad_norm: 0.9999990754893583, iteration: 112135
loss: 0.9994404911994934,grad_norm: 0.9999990548726376, iteration: 112136
loss: 1.0125679969787598,grad_norm: 0.969822051540215, iteration: 112137
loss: 0.99162358045578,grad_norm: 0.8788445431339641, iteration: 112138
loss: 0.9777654409408569,grad_norm: 0.9999994011549247, iteration: 112139
loss: 1.0018014907836914,grad_norm: 0.999999120072075, iteration: 112140
loss: 1.0050064325332642,grad_norm: 0.9323048536113141, iteration: 112141
loss: 0.9958593845367432,grad_norm: 0.999999129256567, iteration: 112142
loss: 0.9802724719047546,grad_norm: 0.9999995876840669, iteration: 112143
loss: 0.9735664129257202,grad_norm: 0.9999989757924685, iteration: 112144
loss: 1.0398017168045044,grad_norm: 0.999999227237871, iteration: 112145
loss: 1.0037226676940918,grad_norm: 0.9568278470490047, iteration: 112146
loss: 1.025967001914978,grad_norm: 0.9999991688918529, iteration: 112147
loss: 1.0078089237213135,grad_norm: 0.9999991177725532, iteration: 112148
loss: 1.0081920623779297,grad_norm: 0.9999992437450212, iteration: 112149
loss: 0.9950354695320129,grad_norm: 0.9935304673378654, iteration: 112150
loss: 0.9845277070999146,grad_norm: 0.9999990305726081, iteration: 112151
loss: 0.9810920357704163,grad_norm: 0.9495356336577024, iteration: 112152
loss: 1.0001447200775146,grad_norm: 0.9999990378354223, iteration: 112153
loss: 0.9805752038955688,grad_norm: 0.9999990490008775, iteration: 112154
loss: 0.9809523224830627,grad_norm: 0.999999210737982, iteration: 112155
loss: 1.0222609043121338,grad_norm: 0.9999991468405819, iteration: 112156
loss: 0.9925166368484497,grad_norm: 0.9579106484266465, iteration: 112157
loss: 0.984107494354248,grad_norm: 0.9999991481770868, iteration: 112158
loss: 1.0148053169250488,grad_norm: 0.9999992692763096, iteration: 112159
loss: 0.9806687831878662,grad_norm: 0.9999990237896556, iteration: 112160
loss: 0.9742883443832397,grad_norm: 0.9999991283732073, iteration: 112161
loss: 1.0114303827285767,grad_norm: 0.99999932342467, iteration: 112162
loss: 0.9426361918449402,grad_norm: 0.9999991248608873, iteration: 112163
loss: 1.040730357170105,grad_norm: 0.9999994769714724, iteration: 112164
loss: 0.9803100824356079,grad_norm: 0.9999992689693311, iteration: 112165
loss: 1.0180498361587524,grad_norm: 0.9999991049392385, iteration: 112166
loss: 1.0105751752853394,grad_norm: 0.9999990387943842, iteration: 112167
loss: 0.9780445098876953,grad_norm: 0.999999181357117, iteration: 112168
loss: 1.0007483959197998,grad_norm: 0.9999999177343232, iteration: 112169
loss: 0.991156816482544,grad_norm: 0.9572702485012563, iteration: 112170
loss: 0.9675705432891846,grad_norm: 0.9983211068846921, iteration: 112171
loss: 0.9834697246551514,grad_norm: 0.9999993181220502, iteration: 112172
loss: 0.9786261320114136,grad_norm: 0.9184892925335623, iteration: 112173
loss: 1.0153214931488037,grad_norm: 0.9722066719945693, iteration: 112174
loss: 0.9804585576057434,grad_norm: 0.9999991682372885, iteration: 112175
loss: 0.9601036310195923,grad_norm: 0.9999991582593718, iteration: 112176
loss: 1.021505355834961,grad_norm: 0.999999223718632, iteration: 112177
loss: 1.0061583518981934,grad_norm: 0.981711937831732, iteration: 112178
loss: 0.9741500020027161,grad_norm: 0.9999993628855969, iteration: 112179
loss: 1.0068085193634033,grad_norm: 0.9613240477643773, iteration: 112180
loss: 1.1010446548461914,grad_norm: 0.9999992226137072, iteration: 112181
loss: 0.9910529851913452,grad_norm: 0.8201032435089533, iteration: 112182
loss: 0.9787333607673645,grad_norm: 0.9712715230497757, iteration: 112183
loss: 1.00908625125885,grad_norm: 0.999999119924156, iteration: 112184
loss: 0.9581159353256226,grad_norm: 0.8623065112213053, iteration: 112185
loss: 1.0456901788711548,grad_norm: 0.9999997802769373, iteration: 112186
loss: 0.9716237783432007,grad_norm: 0.906119026791815, iteration: 112187
loss: 0.9939309358596802,grad_norm: 0.9999992126557605, iteration: 112188
loss: 1.026097059249878,grad_norm: 0.9999991883817975, iteration: 112189
loss: 1.0262720584869385,grad_norm: 0.9999991251837483, iteration: 112190
loss: 1.0063259601593018,grad_norm: 0.8799815779601913, iteration: 112191
loss: 1.0168304443359375,grad_norm: 0.999999013749056, iteration: 112192
loss: 0.9823517203330994,grad_norm: 0.9999990530245418, iteration: 112193
loss: 0.9851973056793213,grad_norm: 0.9999992783036198, iteration: 112194
loss: 0.9948405027389526,grad_norm: 0.8965082333371098, iteration: 112195
loss: 0.9800058603286743,grad_norm: 0.9999990175217961, iteration: 112196
loss: 0.9856500029563904,grad_norm: 0.9999989680335712, iteration: 112197
loss: 0.9917088747024536,grad_norm: 0.9705122468993055, iteration: 112198
loss: 1.0041544437408447,grad_norm: 0.9999990926683652, iteration: 112199
loss: 1.0081838369369507,grad_norm: 0.9999991635655058, iteration: 112200
loss: 0.9843565225601196,grad_norm: 0.9999992671128013, iteration: 112201
loss: 1.0070874691009521,grad_norm: 0.9325696951650831, iteration: 112202
loss: 0.9767277240753174,grad_norm: 0.9698539951594674, iteration: 112203
loss: 0.9410973787307739,grad_norm: 0.9999992924637379, iteration: 112204
loss: 1.0141769647598267,grad_norm: 0.9999990423395598, iteration: 112205
loss: 0.9673927426338196,grad_norm: 0.999999150718435, iteration: 112206
loss: 1.0256037712097168,grad_norm: 0.9999991106928141, iteration: 112207
loss: 0.9616383910179138,grad_norm: 0.9299342717628735, iteration: 112208
loss: 1.0032483339309692,grad_norm: 0.9999994140579505, iteration: 112209
loss: 1.0109471082687378,grad_norm: 0.8526566936291369, iteration: 112210
loss: 1.0037026405334473,grad_norm: 0.9999995781641529, iteration: 112211
loss: 0.9910526871681213,grad_norm: 0.9999990877886916, iteration: 112212
loss: 0.9847331643104553,grad_norm: 0.991377594356211, iteration: 112213
loss: 0.9758603572845459,grad_norm: 0.9568773626527654, iteration: 112214
loss: 0.9882019758224487,grad_norm: 0.9999992909018367, iteration: 112215
loss: 0.9777776002883911,grad_norm: 0.8770654593927368, iteration: 112216
loss: 0.9816557765007019,grad_norm: 0.9999990587233423, iteration: 112217
loss: 1.100358247756958,grad_norm: 0.9999993737627819, iteration: 112218
loss: 1.0062299966812134,grad_norm: 0.8479009748763829, iteration: 112219
loss: 0.9555631875991821,grad_norm: 0.9999992563038438, iteration: 112220
loss: 1.0264546871185303,grad_norm: 0.9999990119489974, iteration: 112221
loss: 0.9906971454620361,grad_norm: 0.9264121151911507, iteration: 112222
loss: 1.022154688835144,grad_norm: 0.9383699868810133, iteration: 112223
loss: 1.0147181749343872,grad_norm: 0.935244283417663, iteration: 112224
loss: 1.0211570262908936,grad_norm: 0.9999992257109569, iteration: 112225
loss: 0.9883769154548645,grad_norm: 0.9999991624569677, iteration: 112226
loss: 1.047081708908081,grad_norm: 0.9999993478663687, iteration: 112227
loss: 1.0459017753601074,grad_norm: 0.9999991892677063, iteration: 112228
loss: 0.9667377471923828,grad_norm: 0.9684004199353733, iteration: 112229
loss: 0.9646310806274414,grad_norm: 0.9540130189508624, iteration: 112230
loss: 0.9981431365013123,grad_norm: 0.9479624048534339, iteration: 112231
loss: 0.9844802021980286,grad_norm: 0.9761874021222106, iteration: 112232
loss: 1.0073151588439941,grad_norm: 0.9999990389180882, iteration: 112233
loss: 0.9713358879089355,grad_norm: 0.9999990672545436, iteration: 112234
loss: 0.998487114906311,grad_norm: 0.9999992239526769, iteration: 112235
loss: 0.997467577457428,grad_norm: 0.9806438324429274, iteration: 112236
loss: 1.0850868225097656,grad_norm: 0.9999992951594269, iteration: 112237
loss: 1.0252937078475952,grad_norm: 0.7519682179823769, iteration: 112238
loss: 1.0220986604690552,grad_norm: 0.9999994264214779, iteration: 112239
loss: 1.01339852809906,grad_norm: 0.9999990442836474, iteration: 112240
loss: 1.013056993484497,grad_norm: 0.999999199943186, iteration: 112241
loss: 1.0181890726089478,grad_norm: 0.9274745343515296, iteration: 112242
loss: 0.9467915296554565,grad_norm: 0.999999078307077, iteration: 112243
loss: 0.9735333323478699,grad_norm: 0.9999991248339565, iteration: 112244
loss: 0.9668775796890259,grad_norm: 0.9999991544414085, iteration: 112245
loss: 0.9846389889717102,grad_norm: 0.9999991302391916, iteration: 112246
loss: 0.9994710683822632,grad_norm: 0.9999990851881226, iteration: 112247
loss: 0.9780849814414978,grad_norm: 0.9999991017512206, iteration: 112248
loss: 1.0311084985733032,grad_norm: 0.8780561593689923, iteration: 112249
loss: 1.030499815940857,grad_norm: 0.9053586978653645, iteration: 112250
loss: 0.9801597595214844,grad_norm: 0.9999990882943679, iteration: 112251
loss: 1.0292719602584839,grad_norm: 0.9999993556817006, iteration: 112252
loss: 0.9960215091705322,grad_norm: 0.9978206987263445, iteration: 112253
loss: 1.0433727502822876,grad_norm: 0.9999991278223215, iteration: 112254
loss: 1.0131689310073853,grad_norm: 0.8561197277597864, iteration: 112255
loss: 0.9657694101333618,grad_norm: 0.9389466458276112, iteration: 112256
loss: 1.0019731521606445,grad_norm: 0.8667703810835131, iteration: 112257
loss: 0.9856120944023132,grad_norm: 0.9999991898398096, iteration: 112258
loss: 1.017462968826294,grad_norm: 0.9767631758788451, iteration: 112259
loss: 0.9878545999526978,grad_norm: 0.9198463107652874, iteration: 112260
loss: 0.9992431998252869,grad_norm: 0.892549211626017, iteration: 112261
loss: 0.9942641854286194,grad_norm: 0.9561792528974664, iteration: 112262
loss: 1.0036139488220215,grad_norm: 0.992424934339257, iteration: 112263
loss: 1.015418529510498,grad_norm: 0.972774882606455, iteration: 112264
loss: 1.04718017578125,grad_norm: 0.9717890722158656, iteration: 112265
loss: 0.9861430525779724,grad_norm: 0.9999992848375204, iteration: 112266
loss: 0.9714468717575073,grad_norm: 0.9999990916929984, iteration: 112267
loss: 0.9984563589096069,grad_norm: 0.9999989726247185, iteration: 112268
loss: 0.9746802449226379,grad_norm: 0.9636322822350533, iteration: 112269
loss: 1.0091962814331055,grad_norm: 0.8673957770459082, iteration: 112270
loss: 1.0178674459457397,grad_norm: 0.9656734137518611, iteration: 112271
loss: 1.026594877243042,grad_norm: 0.9150593418820918, iteration: 112272
loss: 0.9865320324897766,grad_norm: 0.9889394836897557, iteration: 112273
loss: 0.9950539469718933,grad_norm: 0.9967055622671875, iteration: 112274
loss: 1.0177292823791504,grad_norm: 0.9999991719073991, iteration: 112275
loss: 1.004546046257019,grad_norm: 0.9047252708057396, iteration: 112276
loss: 0.9802405834197998,grad_norm: 0.8267311380921096, iteration: 112277
loss: 1.020522117614746,grad_norm: 0.9999991532414259, iteration: 112278
loss: 1.0245810747146606,grad_norm: 0.999999080465303, iteration: 112279
loss: 1.0232640504837036,grad_norm: 0.9999991051594443, iteration: 112280
loss: 0.9926729798316956,grad_norm: 0.9439461397492253, iteration: 112281
loss: 1.0352118015289307,grad_norm: 0.9999989760142137, iteration: 112282
loss: 0.9949933290481567,grad_norm: 0.9999990026207349, iteration: 112283
loss: 1.0288429260253906,grad_norm: 0.9780496357722311, iteration: 112284
loss: 0.9810905456542969,grad_norm: 0.9999991592820398, iteration: 112285
loss: 0.9887890219688416,grad_norm: 0.9535490869799933, iteration: 112286
loss: 1.0013762712478638,grad_norm: 0.9353137302078971, iteration: 112287
loss: 1.0000122785568237,grad_norm: 0.9761755970850955, iteration: 112288
loss: 0.9838635921478271,grad_norm: 0.8625226607418802, iteration: 112289
loss: 0.9936139583587646,grad_norm: 0.9999991103737735, iteration: 112290
loss: 0.9982421398162842,grad_norm: 0.9999992871084077, iteration: 112291
loss: 1.0151358842849731,grad_norm: 0.9999992360656499, iteration: 112292
loss: 0.9756220579147339,grad_norm: 0.9094336177710484, iteration: 112293
loss: 0.98856121301651,grad_norm: 0.98326903234957, iteration: 112294
loss: 1.0205639600753784,grad_norm: 0.9999990840399924, iteration: 112295
loss: 0.9535463452339172,grad_norm: 0.9999991028044044, iteration: 112296
loss: 1.0082197189331055,grad_norm: 0.9999990560210539, iteration: 112297
loss: 1.011631727218628,grad_norm: 0.9999991046234423, iteration: 112298
loss: 0.9908910989761353,grad_norm: 0.8756726056958875, iteration: 112299
loss: 1.0059502124786377,grad_norm: 0.9999995960489242, iteration: 112300
loss: 0.997230589389801,grad_norm: 0.99999970767577, iteration: 112301
loss: 1.0224283933639526,grad_norm: 0.9403256572574895, iteration: 112302
loss: 1.031688928604126,grad_norm: 0.9999990951738316, iteration: 112303
loss: 1.0367285013198853,grad_norm: 0.999999751447438, iteration: 112304
loss: 1.0023372173309326,grad_norm: 0.9999990318963006, iteration: 112305
loss: 0.9890990853309631,grad_norm: 0.9673581490225183, iteration: 112306
loss: 0.9955095052719116,grad_norm: 0.9133935091149953, iteration: 112307
loss: 1.0188690423965454,grad_norm: 0.9999992149645364, iteration: 112308
loss: 1.0010453462600708,grad_norm: 0.9999990388943923, iteration: 112309
loss: 0.9900501370429993,grad_norm: 0.9223108155041253, iteration: 112310
loss: 1.010259747505188,grad_norm: 0.9201002206952291, iteration: 112311
loss: 1.0342265367507935,grad_norm: 0.8725814833257628, iteration: 112312
loss: 0.9961211085319519,grad_norm: 0.9601678646674907, iteration: 112313
loss: 1.010230541229248,grad_norm: 0.9999998666763853, iteration: 112314
loss: 1.0432573556900024,grad_norm: 0.9999991939108067, iteration: 112315
loss: 0.9993080496788025,grad_norm: 0.9999992446175294, iteration: 112316
loss: 0.9919059872627258,grad_norm: 0.9999991316578413, iteration: 112317
loss: 1.0129079818725586,grad_norm: 0.9999990008732752, iteration: 112318
loss: 0.9798027276992798,grad_norm: 0.8684662971831077, iteration: 112319
loss: 1.0126546621322632,grad_norm: 0.9999990456362283, iteration: 112320
loss: 1.0154352188110352,grad_norm: 0.9999992715218733, iteration: 112321
loss: 1.0178736448287964,grad_norm: 0.9999992386464166, iteration: 112322
loss: 1.010427474975586,grad_norm: 0.9999990166668367, iteration: 112323
loss: 1.0184721946716309,grad_norm: 0.9999991960320512, iteration: 112324
loss: 1.0134623050689697,grad_norm: 0.9999991312384043, iteration: 112325
loss: 1.0219569206237793,grad_norm: 0.9999991147090525, iteration: 112326
loss: 0.9794849753379822,grad_norm: 0.9999991587774653, iteration: 112327
loss: 1.0195469856262207,grad_norm: 0.9999991696770414, iteration: 112328
loss: 1.0183216333389282,grad_norm: 0.9902407093614253, iteration: 112329
loss: 0.9881678223609924,grad_norm: 0.9999990635858564, iteration: 112330
loss: 0.982393205165863,grad_norm: 0.9999991788699933, iteration: 112331
loss: 0.9926941990852356,grad_norm: 0.9308723157204257, iteration: 112332
loss: 1.049096941947937,grad_norm: 0.9999992191821451, iteration: 112333
loss: 0.9787243604660034,grad_norm: 0.9229157754972376, iteration: 112334
loss: 0.981934666633606,grad_norm: 0.9999990525662413, iteration: 112335
loss: 0.9935502409934998,grad_norm: 0.9999991740978241, iteration: 112336
loss: 1.043131709098816,grad_norm: 0.9999990153484266, iteration: 112337
loss: 0.9890340566635132,grad_norm: 0.9373076245744506, iteration: 112338
loss: 0.9933242201805115,grad_norm: 0.9999991016851697, iteration: 112339
loss: 1.0095564126968384,grad_norm: 0.9999992441369159, iteration: 112340
loss: 1.0095995664596558,grad_norm: 0.999999215993917, iteration: 112341
loss: 1.0232350826263428,grad_norm: 0.99999890938657, iteration: 112342
loss: 1.0012606382369995,grad_norm: 0.9999992922999462, iteration: 112343
loss: 1.0201619863510132,grad_norm: 0.9999989622885059, iteration: 112344
loss: 1.0438313484191895,grad_norm: 0.999999192585568, iteration: 112345
loss: 1.0312579870224,grad_norm: 0.9999991277392694, iteration: 112346
loss: 1.0085175037384033,grad_norm: 0.9999991543276446, iteration: 112347
loss: 1.0304245948791504,grad_norm: 0.9999988501369667, iteration: 112348
loss: 0.9905852675437927,grad_norm: 0.9774377807172915, iteration: 112349
loss: 1.0443958044052124,grad_norm: 0.9999990834352642, iteration: 112350
loss: 0.9950628280639648,grad_norm: 0.904277812376573, iteration: 112351
loss: 0.9879222512245178,grad_norm: 0.9999993059131137, iteration: 112352
loss: 0.9994240403175354,grad_norm: 0.9704161926767655, iteration: 112353
loss: 1.0569050312042236,grad_norm: 0.9585988420232581, iteration: 112354
loss: 0.9670766592025757,grad_norm: 0.9999990147300496, iteration: 112355
loss: 0.9634164571762085,grad_norm: 0.9999991729700655, iteration: 112356
loss: 0.9828879237174988,grad_norm: 0.9999991834750692, iteration: 112357
loss: 1.0113569498062134,grad_norm: 0.994594249699121, iteration: 112358
loss: 0.9950238466262817,grad_norm: 0.9999992464772721, iteration: 112359
loss: 0.9901334643363953,grad_norm: 0.9999989512922949, iteration: 112360
loss: 0.9916552305221558,grad_norm: 0.9999992747713863, iteration: 112361
loss: 1.0055692195892334,grad_norm: 0.9999990904796179, iteration: 112362
loss: 1.0175482034683228,grad_norm: 0.9999990137058733, iteration: 112363
loss: 1.0019937753677368,grad_norm: 0.9999990736982121, iteration: 112364
loss: 0.9919322729110718,grad_norm: 0.9403583716625966, iteration: 112365
loss: 0.9964559078216553,grad_norm: 0.976069940088197, iteration: 112366
loss: 0.9750378131866455,grad_norm: 0.9999991193932366, iteration: 112367
loss: 0.9776813983917236,grad_norm: 0.9562968705723979, iteration: 112368
loss: 1.0421193838119507,grad_norm: 0.9999991181967652, iteration: 112369
loss: 1.0109343528747559,grad_norm: 0.9948805525134581, iteration: 112370
loss: 1.0212066173553467,grad_norm: 0.9999991183428207, iteration: 112371
loss: 0.9806683659553528,grad_norm: 0.9145666073111614, iteration: 112372
loss: 0.9664312601089478,grad_norm: 0.9663942613563733, iteration: 112373
loss: 0.9663420915603638,grad_norm: 0.999999042090116, iteration: 112374
loss: 0.9780097007751465,grad_norm: 0.9999991947908519, iteration: 112375
loss: 0.9933093190193176,grad_norm: 0.9999991674468264, iteration: 112376
loss: 0.9858102202415466,grad_norm: 0.9999992724085063, iteration: 112377
loss: 0.9908881187438965,grad_norm: 0.8816904855878306, iteration: 112378
loss: 1.0319795608520508,grad_norm: 0.9999990964259141, iteration: 112379
loss: 1.0230432748794556,grad_norm: 0.9999996807143094, iteration: 112380
loss: 0.9913249611854553,grad_norm: 0.9999989658229945, iteration: 112381
loss: 1.0051894187927246,grad_norm: 0.9999990879291865, iteration: 112382
loss: 1.0152941942214966,grad_norm: 0.999999120942735, iteration: 112383
loss: 0.9878454804420471,grad_norm: 0.9999992300092367, iteration: 112384
loss: 1.0027657747268677,grad_norm: 0.9999990993239717, iteration: 112385
loss: 0.9742550253868103,grad_norm: 0.9789234016777285, iteration: 112386
loss: 1.0203168392181396,grad_norm: 0.9999992022762857, iteration: 112387
loss: 0.9788611531257629,grad_norm: 0.9999991744382116, iteration: 112388
loss: 1.001705527305603,grad_norm: 0.9099591963278393, iteration: 112389
loss: 0.9783889055252075,grad_norm: 0.9999990785856349, iteration: 112390
loss: 0.9731152653694153,grad_norm: 0.9999990597588365, iteration: 112391
loss: 0.9816269278526306,grad_norm: 0.8922417495894228, iteration: 112392
loss: 0.9835598468780518,grad_norm: 0.9999992042610969, iteration: 112393
loss: 1.0146223306655884,grad_norm: 0.8355722049888358, iteration: 112394
loss: 0.9710573554039001,grad_norm: 0.9999992458152317, iteration: 112395
loss: 1.0130493640899658,grad_norm: 0.9980044589039081, iteration: 112396
loss: 0.9575130343437195,grad_norm: 0.9999991981664653, iteration: 112397
loss: 0.9639116525650024,grad_norm: 0.9999990693222054, iteration: 112398
loss: 0.9739249348640442,grad_norm: 0.9991743181678836, iteration: 112399
loss: 0.9912686944007874,grad_norm: 0.9511699893741928, iteration: 112400
loss: 1.0284371376037598,grad_norm: 0.9935900692123707, iteration: 112401
loss: 1.0076782703399658,grad_norm: 0.9705574196299972, iteration: 112402
loss: 0.9718344807624817,grad_norm: 0.8541346896587086, iteration: 112403
loss: 1.0397831201553345,grad_norm: 0.9613136118498323, iteration: 112404
loss: 0.9916741251945496,grad_norm: 0.9848250225896804, iteration: 112405
loss: 1.0336024761199951,grad_norm: 0.999999216907908, iteration: 112406
loss: 1.0193355083465576,grad_norm: 0.9068948577366583, iteration: 112407
loss: 1.012663722038269,grad_norm: 0.9999993136424202, iteration: 112408
loss: 1.030243158340454,grad_norm: 0.999999235682227, iteration: 112409
loss: 0.9890041947364807,grad_norm: 0.9999989420662283, iteration: 112410
loss: 1.0501000881195068,grad_norm: 0.9294968817574387, iteration: 112411
loss: 1.0331859588623047,grad_norm: 0.9999991577524163, iteration: 112412
loss: 1.0419480800628662,grad_norm: 0.9999991194873582, iteration: 112413
loss: 1.0580955743789673,grad_norm: 0.9999992030245893, iteration: 112414
loss: 1.002038598060608,grad_norm: 0.763332124655806, iteration: 112415
loss: 0.9797171950340271,grad_norm: 0.9836206257633262, iteration: 112416
loss: 0.9644620418548584,grad_norm: 0.9999989525336059, iteration: 112417
loss: 1.0125751495361328,grad_norm: 0.9999992178395333, iteration: 112418
loss: 1.0056089162826538,grad_norm: 0.8334886702786131, iteration: 112419
loss: 0.9815006852149963,grad_norm: 0.9999991278919335, iteration: 112420
loss: 0.96440190076828,grad_norm: 0.9999992430651177, iteration: 112421
loss: 1.000315546989441,grad_norm: 0.9999992027262169, iteration: 112422
loss: 0.9828837513923645,grad_norm: 0.9999990814433867, iteration: 112423
loss: 1.0034844875335693,grad_norm: 0.9999990706961985, iteration: 112424
loss: 0.9977149367332458,grad_norm: 0.9788484802302707, iteration: 112425
loss: 1.026189923286438,grad_norm: 0.9999991478143121, iteration: 112426
loss: 0.9943410158157349,grad_norm: 0.9999989935428506, iteration: 112427
loss: 1.0342135429382324,grad_norm: 0.9999992864371315, iteration: 112428
loss: 0.9638151526451111,grad_norm: 0.9907265984221165, iteration: 112429
loss: 0.960109531879425,grad_norm: 0.9082080240696337, iteration: 112430
loss: 1.002027988433838,grad_norm: 0.9863871500688488, iteration: 112431
loss: 1.0370407104492188,grad_norm: 0.9999992149235996, iteration: 112432
loss: 1.00691556930542,grad_norm: 0.9906822844928206, iteration: 112433
loss: 1.0140554904937744,grad_norm: 0.9999990844310874, iteration: 112434
loss: 0.983210563659668,grad_norm: 0.8956265429517734, iteration: 112435
loss: 1.0121040344238281,grad_norm: 0.9999990940377893, iteration: 112436
loss: 1.0411409139633179,grad_norm: 0.9999998564635482, iteration: 112437
loss: 1.0326073169708252,grad_norm: 0.9613266561793994, iteration: 112438
loss: 0.9858248829841614,grad_norm: 0.9999993597252776, iteration: 112439
loss: 0.9773903489112854,grad_norm: 0.8261398551335117, iteration: 112440
loss: 1.0063953399658203,grad_norm: 0.9300850036442412, iteration: 112441
loss: 0.9489195942878723,grad_norm: 0.9595807343552812, iteration: 112442
loss: 1.0149890184402466,grad_norm: 0.9999991373420621, iteration: 112443
loss: 0.9891692996025085,grad_norm: 0.9091317048735937, iteration: 112444
loss: 1.0032612085342407,grad_norm: 0.9999990697220458, iteration: 112445
loss: 1.0222433805465698,grad_norm: 0.9092741470642494, iteration: 112446
loss: 0.9681477546691895,grad_norm: 0.9999990717958985, iteration: 112447
loss: 1.0291883945465088,grad_norm: 0.9118066076302547, iteration: 112448
loss: 0.98341304063797,grad_norm: 0.917519519074321, iteration: 112449
loss: 1.0223441123962402,grad_norm: 0.999999206726973, iteration: 112450
loss: 1.0300867557525635,grad_norm: 0.999999088661092, iteration: 112451
loss: 1.0272430181503296,grad_norm: 0.9842191815858639, iteration: 112452
loss: 0.9849801063537598,grad_norm: 0.9291397833346665, iteration: 112453
loss: 1.0237858295440674,grad_norm: 0.9982468799266851, iteration: 112454
loss: 0.9750683307647705,grad_norm: 0.9116809351629171, iteration: 112455
loss: 1.027357816696167,grad_norm: 0.9999991305997248, iteration: 112456
loss: 1.012357234954834,grad_norm: 0.9951643709340742, iteration: 112457
loss: 0.9880226850509644,grad_norm: 0.9856941921794051, iteration: 112458
loss: 0.9940495491027832,grad_norm: 0.8961506829423209, iteration: 112459
loss: 0.9854832887649536,grad_norm: 0.9219651088876124, iteration: 112460
loss: 1.0162460803985596,grad_norm: 0.9999991405690626, iteration: 112461
loss: 1.0122954845428467,grad_norm: 0.9999992068765905, iteration: 112462
loss: 1.0053486824035645,grad_norm: 0.9999992682855405, iteration: 112463
loss: 0.9988874197006226,grad_norm: 0.9999991853651642, iteration: 112464
loss: 1.0034682750701904,grad_norm: 0.8886972244836541, iteration: 112465
loss: 1.004697322845459,grad_norm: 0.9999991106661056, iteration: 112466
loss: 1.0074386596679688,grad_norm: 0.8533385077567898, iteration: 112467
loss: 1.030523657798767,grad_norm: 0.9581041748596556, iteration: 112468
loss: 1.0001389980316162,grad_norm: 0.9999990734629005, iteration: 112469
loss: 1.0084633827209473,grad_norm: 0.9999995813532754, iteration: 112470
loss: 0.9851416945457458,grad_norm: 0.9999990643531761, iteration: 112471
loss: 0.9930025935173035,grad_norm: 0.9999989862558349, iteration: 112472
loss: 0.9905131459236145,grad_norm: 0.9999992297733972, iteration: 112473
loss: 0.9636110663414001,grad_norm: 0.9058099092700169, iteration: 112474
loss: 0.9669532179832458,grad_norm: 0.9999992539124036, iteration: 112475
loss: 0.979389488697052,grad_norm: 0.8179914775237253, iteration: 112476
loss: 0.9750104546546936,grad_norm: 0.9999996956657997, iteration: 112477
loss: 1.0003385543823242,grad_norm: 0.9949651117506344, iteration: 112478
loss: 0.9726635217666626,grad_norm: 0.9999991736088568, iteration: 112479
loss: 0.9769198894500732,grad_norm: 0.9999989762017422, iteration: 112480
loss: 0.9849259853363037,grad_norm: 0.9999991540925316, iteration: 112481
loss: 1.011911392211914,grad_norm: 0.9999990760715098, iteration: 112482
loss: 0.9373008012771606,grad_norm: 0.9999992210759262, iteration: 112483
loss: 0.9682430624961853,grad_norm: 0.9999992382313881, iteration: 112484
loss: 0.9761722087860107,grad_norm: 0.9999991209776593, iteration: 112485
loss: 1.0110065937042236,grad_norm: 0.9705096825199002, iteration: 112486
loss: 1.0318583250045776,grad_norm: 0.960012372018959, iteration: 112487
loss: 1.0206644535064697,grad_norm: 0.9999992710645548, iteration: 112488
loss: 1.0207794904708862,grad_norm: 0.999999206434218, iteration: 112489
loss: 0.985651969909668,grad_norm: 0.9999991346273285, iteration: 112490
loss: 1.012181043624878,grad_norm: 0.9099114091440482, iteration: 112491
loss: 0.991303026676178,grad_norm: 0.8812887624463461, iteration: 112492
loss: 0.9994142651557922,grad_norm: 0.999999243549214, iteration: 112493
loss: 1.0225214958190918,grad_norm: 0.9999992094241754, iteration: 112494
loss: 0.9935806393623352,grad_norm: 0.9325140807664348, iteration: 112495
loss: 0.9888117909431458,grad_norm: 0.9695427104062563, iteration: 112496
loss: 0.9827406406402588,grad_norm: 0.9277118647146833, iteration: 112497
loss: 0.9993305802345276,grad_norm: 0.971198864954505, iteration: 112498
loss: 0.9841594696044922,grad_norm: 0.9999992976504998, iteration: 112499
loss: 1.0122597217559814,grad_norm: 0.9999992265678698, iteration: 112500
loss: 1.0103449821472168,grad_norm: 0.9999990454431894, iteration: 112501
loss: 0.9707760810852051,grad_norm: 0.9999990159990649, iteration: 112502
loss: 0.9966937303543091,grad_norm: 0.9999995443283144, iteration: 112503
loss: 1.0346968173980713,grad_norm: 0.9999992292830591, iteration: 112504
loss: 0.975220263004303,grad_norm: 0.9999990286328111, iteration: 112505
loss: 0.9604086875915527,grad_norm: 0.9999989677753303, iteration: 112506
loss: 0.970991849899292,grad_norm: 0.9999990550965484, iteration: 112507
loss: 1.023611307144165,grad_norm: 0.9999992238875496, iteration: 112508
loss: 1.0329700708389282,grad_norm: 0.9999991589250927, iteration: 112509
loss: 0.9908117055892944,grad_norm: 0.9999990470798392, iteration: 112510
loss: 0.9739565253257751,grad_norm: 0.9999990879770362, iteration: 112511
loss: 1.0155587196350098,grad_norm: 0.9277296610183392, iteration: 112512
loss: 0.9993110299110413,grad_norm: 0.9999991205295613, iteration: 112513
loss: 1.005681037902832,grad_norm: 0.9167725285798656, iteration: 112514
loss: 1.0137869119644165,grad_norm: 0.9999990724300316, iteration: 112515
loss: 1.02876877784729,grad_norm: 0.9836255942457456, iteration: 112516
loss: 1.0405290126800537,grad_norm: 0.8760910102062517, iteration: 112517
loss: 0.9710150361061096,grad_norm: 0.9999991541332901, iteration: 112518
loss: 0.999150812625885,grad_norm: 0.9999991336674297, iteration: 112519
loss: 1.0125755071640015,grad_norm: 0.9167392477483555, iteration: 112520
loss: 0.9676763415336609,grad_norm: 0.9999992498919796, iteration: 112521
loss: 1.0114467144012451,grad_norm: 0.9999990247277448, iteration: 112522
loss: 1.0239661931991577,grad_norm: 0.9999991094228298, iteration: 112523
loss: 1.0073612928390503,grad_norm: 0.9999990737268224, iteration: 112524
loss: 0.9735441207885742,grad_norm: 0.9145782282491751, iteration: 112525
loss: 1.0038710832595825,grad_norm: 0.9955450097389872, iteration: 112526
loss: 1.0256249904632568,grad_norm: 0.883671950801037, iteration: 112527
loss: 1.0109496116638184,grad_norm: 0.9999991777106674, iteration: 112528
loss: 0.9937990307807922,grad_norm: 0.999999076172453, iteration: 112529
loss: 1.026475429534912,grad_norm: 0.9606686750240822, iteration: 112530
loss: 1.036307454109192,grad_norm: 0.9908685589797649, iteration: 112531
loss: 1.0177216529846191,grad_norm: 0.9999992619918262, iteration: 112532
loss: 0.9872279763221741,grad_norm: 0.8398579526425555, iteration: 112533
loss: 1.0116339921951294,grad_norm: 0.9527164575509866, iteration: 112534
loss: 1.0446765422821045,grad_norm: 0.8548535696265152, iteration: 112535
loss: 1.0245987176895142,grad_norm: 0.9634565429814658, iteration: 112536
loss: 0.9985787272453308,grad_norm: 0.9192325445607985, iteration: 112537
loss: 0.988497793674469,grad_norm: 0.9769364723103869, iteration: 112538
loss: 0.9803310632705688,grad_norm: 0.9999993261161637, iteration: 112539
loss: 0.9793624877929688,grad_norm: 0.9338505007368392, iteration: 112540
loss: 0.9546316862106323,grad_norm: 0.9999992058928768, iteration: 112541
loss: 0.9995368719100952,grad_norm: 0.9999990809982916, iteration: 112542
loss: 0.9988667368888855,grad_norm: 0.9999991620645824, iteration: 112543
loss: 0.9888613224029541,grad_norm: 0.9757044299133151, iteration: 112544
loss: 0.9835310578346252,grad_norm: 0.9999991247304957, iteration: 112545
loss: 0.9771520495414734,grad_norm: 0.8918958676371196, iteration: 112546
loss: 1.0077831745147705,grad_norm: 0.9618622899263551, iteration: 112547
loss: 0.996300458908081,grad_norm: 0.9928619450570226, iteration: 112548
loss: 0.9491977095603943,grad_norm: 0.999999168740745, iteration: 112549
loss: 1.0478277206420898,grad_norm: 0.9999989691766469, iteration: 112550
loss: 0.9953699111938477,grad_norm: 0.9305992624481442, iteration: 112551
loss: 1.0334632396697998,grad_norm: 0.999999267640513, iteration: 112552
loss: 0.9776512980461121,grad_norm: 0.999999304801499, iteration: 112553
loss: 0.9866095185279846,grad_norm: 0.9999991195081038, iteration: 112554
loss: 1.029396653175354,grad_norm: 0.9999991364394527, iteration: 112555
loss: 1.1702617406845093,grad_norm: 0.9999995374036036, iteration: 112556
loss: 1.0077660083770752,grad_norm: 0.999999227402858, iteration: 112557
loss: 1.0059144496917725,grad_norm: 0.8985284179017989, iteration: 112558
loss: 1.105678915977478,grad_norm: 0.9999999152036286, iteration: 112559
loss: 1.0105795860290527,grad_norm: 0.9999990872527539, iteration: 112560
loss: 1.0029101371765137,grad_norm: 0.9999992107312233, iteration: 112561
loss: 1.0087205171585083,grad_norm: 0.9999990962110644, iteration: 112562
loss: 1.0315715074539185,grad_norm: 0.9999993350929629, iteration: 112563
loss: 0.982414960861206,grad_norm: 0.9151860957464424, iteration: 112564
loss: 0.9871432185173035,grad_norm: 0.9999991268995363, iteration: 112565
loss: 0.950585126876831,grad_norm: 0.9999990933316046, iteration: 112566
loss: 0.9901139140129089,grad_norm: 0.999999151635083, iteration: 112567
loss: 1.00015127658844,grad_norm: 0.9999990035105044, iteration: 112568
loss: 1.0156220197677612,grad_norm: 0.9999994049611449, iteration: 112569
loss: 1.0011711120605469,grad_norm: 0.8545966080849052, iteration: 112570
loss: 0.9950588941574097,grad_norm: 0.900636136014043, iteration: 112571
loss: 0.9887833595275879,grad_norm: 0.9685091864857113, iteration: 112572
loss: 0.9847965240478516,grad_norm: 0.9999990024042866, iteration: 112573
loss: 0.9716766476631165,grad_norm: 0.9999992480303215, iteration: 112574
loss: 1.0266929864883423,grad_norm: 0.9999991824295205, iteration: 112575
loss: 1.0331053733825684,grad_norm: 0.9999989852934601, iteration: 112576
loss: 1.0822877883911133,grad_norm: 0.9999993771856267, iteration: 112577
loss: 1.2260921001434326,grad_norm: 0.9999996396735853, iteration: 112578
loss: 0.9463223814964294,grad_norm: 0.9260175555359844, iteration: 112579
loss: 1.1179783344268799,grad_norm: 0.9999993926550415, iteration: 112580
loss: 1.005354642868042,grad_norm: 0.9999992562710156, iteration: 112581
loss: 0.9787827134132385,grad_norm: 0.999999286270543, iteration: 112582
loss: 0.9913275837898254,grad_norm: 0.999999120939858, iteration: 112583
loss: 0.9760023951530457,grad_norm: 0.9999990238064242, iteration: 112584
loss: 0.9874411821365356,grad_norm: 0.9999992289811148, iteration: 112585
loss: 1.121233582496643,grad_norm: 0.9999996944251855, iteration: 112586
loss: 0.9677512645721436,grad_norm: 0.969474989104877, iteration: 112587
loss: 0.9974943995475769,grad_norm: 0.9999992331175884, iteration: 112588
loss: 1.0327469110488892,grad_norm: 0.9999992456946947, iteration: 112589
loss: 0.9742262363433838,grad_norm: 0.9999990210425144, iteration: 112590
loss: 0.9792941212654114,grad_norm: 0.9999991790096143, iteration: 112591
loss: 0.9950751066207886,grad_norm: 0.891341300627919, iteration: 112592
loss: 1.0405279397964478,grad_norm: 0.9831712773353093, iteration: 112593
loss: 1.0243916511535645,grad_norm: 0.9999990806972032, iteration: 112594
loss: 0.9992552995681763,grad_norm: 0.9999991966130889, iteration: 112595
loss: 1.0219714641571045,grad_norm: 0.9999991745612719, iteration: 112596
loss: 1.014082908630371,grad_norm: 0.99999924226989, iteration: 112597
loss: 0.9767343401908875,grad_norm: 0.9999991035545165, iteration: 112598
loss: 1.0060442686080933,grad_norm: 0.9158874648554685, iteration: 112599
loss: 0.9950854182243347,grad_norm: 0.9999992301629108, iteration: 112600
loss: 1.0166951417922974,grad_norm: 0.9999992540232092, iteration: 112601
loss: 1.035552978515625,grad_norm: 0.9999991399922813, iteration: 112602
loss: 1.0223727226257324,grad_norm: 0.9999991128329163, iteration: 112603
loss: 1.0433588027954102,grad_norm: 0.9999997038420916, iteration: 112604
loss: 1.014192819595337,grad_norm: 0.9457009808843473, iteration: 112605
loss: 1.0307973623275757,grad_norm: 0.999999354478629, iteration: 112606
loss: 0.9968706369400024,grad_norm: 0.9999990837726751, iteration: 112607
loss: 1.0070756673812866,grad_norm: 0.9999989557033455, iteration: 112608
loss: 1.0113435983657837,grad_norm: 0.9330988790844146, iteration: 112609
loss: 1.0352377891540527,grad_norm: 0.9999991198614224, iteration: 112610
loss: 0.9989467859268188,grad_norm: 0.9999992720185223, iteration: 112611
loss: 1.0173951387405396,grad_norm: 0.9999992091379475, iteration: 112612
loss: 1.003005862236023,grad_norm: 0.9860368124203018, iteration: 112613
loss: 1.0032140016555786,grad_norm: 0.836412242383488, iteration: 112614
loss: 1.030613660812378,grad_norm: 0.999999039197377, iteration: 112615
loss: 0.9810187220573425,grad_norm: 0.943298312705147, iteration: 112616
loss: 0.9786182045936584,grad_norm: 0.9999991541641612, iteration: 112617
loss: 0.9852149486541748,grad_norm: 0.9509210260418508, iteration: 112618
loss: 1.0112742185592651,grad_norm: 0.9999991351261719, iteration: 112619
loss: 0.9839360117912292,grad_norm: 0.9999989769263182, iteration: 112620
loss: 1.020036220550537,grad_norm: 0.8880150296736862, iteration: 112621
loss: 0.9927991628646851,grad_norm: 0.9999990132604418, iteration: 112622
loss: 0.9768083691596985,grad_norm: 0.9999991025066544, iteration: 112623
loss: 0.9984240531921387,grad_norm: 0.9999996223009113, iteration: 112624
loss: 1.0356647968292236,grad_norm: 0.9999992440374853, iteration: 112625
loss: 1.0349678993225098,grad_norm: 0.9999993944529791, iteration: 112626
loss: 1.0322825908660889,grad_norm: 0.9811450538471845, iteration: 112627
loss: 0.9775745868682861,grad_norm: 0.999999141162649, iteration: 112628
loss: 1.003511667251587,grad_norm: 0.8968809717139185, iteration: 112629
loss: 1.0126402378082275,grad_norm: 0.915018490887346, iteration: 112630
loss: 1.0303549766540527,grad_norm: 0.9999992295515453, iteration: 112631
loss: 0.9734893441200256,grad_norm: 0.9999991017578344, iteration: 112632
loss: 0.9953048229217529,grad_norm: 0.8626997385247231, iteration: 112633
loss: 1.0282915830612183,grad_norm: 0.9265707698502909, iteration: 112634
loss: 1.0183037519454956,grad_norm: 0.9999991435637926, iteration: 112635
loss: 1.0272198915481567,grad_norm: 0.9999991491827287, iteration: 112636
loss: 0.9916099309921265,grad_norm: 0.9379423968384286, iteration: 112637
loss: 1.023909568786621,grad_norm: 0.9999993947543767, iteration: 112638
loss: 0.9785869717597961,grad_norm: 0.9178560677355316, iteration: 112639
loss: 1.0162298679351807,grad_norm: 0.9048713117465957, iteration: 112640
loss: 1.0301527976989746,grad_norm: 0.8960307212553487, iteration: 112641
loss: 1.001491904258728,grad_norm: 0.883046817953831, iteration: 112642
loss: 1.0657005310058594,grad_norm: 0.9999993741942607, iteration: 112643
loss: 0.9877889752388,grad_norm: 0.9547910694126114, iteration: 112644
loss: 1.0181478261947632,grad_norm: 0.9999991317350164, iteration: 112645
loss: 0.9845806360244751,grad_norm: 0.9999990241233593, iteration: 112646
loss: 1.0205868482589722,grad_norm: 0.999999811419503, iteration: 112647
loss: 1.0281941890716553,grad_norm: 0.9999989711593923, iteration: 112648
loss: 0.9734411239624023,grad_norm: 0.9425332708167898, iteration: 112649
loss: 0.9831128716468811,grad_norm: 0.8735881377495992, iteration: 112650
loss: 0.9705262184143066,grad_norm: 0.9999992788946694, iteration: 112651
loss: 1.031593680381775,grad_norm: 0.9699067890901447, iteration: 112652
loss: 1.0630652904510498,grad_norm: 0.9999992882993212, iteration: 112653
loss: 1.0317796468734741,grad_norm: 0.9999990119615032, iteration: 112654
loss: 1.0093015432357788,grad_norm: 0.9500368864468856, iteration: 112655
loss: 1.000341773033142,grad_norm: 0.9532704213460333, iteration: 112656
loss: 1.0121243000030518,grad_norm: 0.9999990824897224, iteration: 112657
loss: 0.9847462773323059,grad_norm: 0.8707415232980459, iteration: 112658
loss: 1.0149226188659668,grad_norm: 0.9999990578603373, iteration: 112659
loss: 0.9949666261672974,grad_norm: 0.86984017706418, iteration: 112660
loss: 1.011950135231018,grad_norm: 0.999999577407132, iteration: 112661
loss: 1.0068856477737427,grad_norm: 0.9999990409266537, iteration: 112662
loss: 1.0033897161483765,grad_norm: 0.9999990618665372, iteration: 112663
loss: 0.9991836547851562,grad_norm: 0.9060666455584182, iteration: 112664
loss: 0.9901232719421387,grad_norm: 0.999999179359458, iteration: 112665
loss: 0.9735803008079529,grad_norm: 0.9999989906817517, iteration: 112666
loss: 0.9929016828536987,grad_norm: 0.9999991736274044, iteration: 112667
loss: 1.0328298807144165,grad_norm: 0.9999991367648164, iteration: 112668
loss: 0.9852885007858276,grad_norm: 0.8010973134646064, iteration: 112669
loss: 0.9473249316215515,grad_norm: 0.8768986400639601, iteration: 112670
loss: 1.091855764389038,grad_norm: 0.9999992139204058, iteration: 112671
loss: 1.033454418182373,grad_norm: 0.9999999580092884, iteration: 112672
loss: 0.9618619084358215,grad_norm: 0.9999990482814386, iteration: 112673
loss: 0.9791666865348816,grad_norm: 0.999999002049213, iteration: 112674
loss: 0.9979745745658875,grad_norm: 0.8543901163465435, iteration: 112675
loss: 0.9772516489028931,grad_norm: 0.8992839998775866, iteration: 112676
loss: 0.9821802377700806,grad_norm: 0.9318437729153576, iteration: 112677
loss: 0.9668258428573608,grad_norm: 0.9999991233356696, iteration: 112678
loss: 1.0032508373260498,grad_norm: 0.9999990926478098, iteration: 112679
loss: 0.973370373249054,grad_norm: 0.9999990829407754, iteration: 112680
loss: 1.0195260047912598,grad_norm: 0.9999990907234807, iteration: 112681
loss: 1.0231684446334839,grad_norm: 0.999999044712507, iteration: 112682
loss: 1.0058187246322632,grad_norm: 0.9999991738825064, iteration: 112683
loss: 1.007756233215332,grad_norm: 0.9239314688113393, iteration: 112684
loss: 0.9798550605773926,grad_norm: 0.9999996201973729, iteration: 112685
loss: 0.9697709083557129,grad_norm: 0.9981105593203574, iteration: 112686
loss: 0.9932777881622314,grad_norm: 0.9254019566598418, iteration: 112687
loss: 1.0007076263427734,grad_norm: 0.9590975998058422, iteration: 112688
loss: 0.9826876521110535,grad_norm: 0.7797612750614149, iteration: 112689
loss: 0.9931784272193909,grad_norm: 0.8975698073056335, iteration: 112690
loss: 1.0319061279296875,grad_norm: 0.9999992521790664, iteration: 112691
loss: 0.9915719628334045,grad_norm: 0.9999991196366261, iteration: 112692
loss: 1.0264112949371338,grad_norm: 0.9999990597668007, iteration: 112693
loss: 1.025121808052063,grad_norm: 0.8864271412324363, iteration: 112694
loss: 0.9940053820610046,grad_norm: 0.999999021675412, iteration: 112695
loss: 0.9966435432434082,grad_norm: 0.934903780046053, iteration: 112696
loss: 0.9887428879737854,grad_norm: 0.9999991638285525, iteration: 112697
loss: 1.0044986009597778,grad_norm: 0.9999991476315417, iteration: 112698
loss: 0.9998396039009094,grad_norm: 0.9999991136591467, iteration: 112699
loss: 1.0062472820281982,grad_norm: 0.9999990759812657, iteration: 112700
loss: 1.0242265462875366,grad_norm: 0.9999991153215446, iteration: 112701
loss: 0.9658434987068176,grad_norm: 0.9999992159581665, iteration: 112702
loss: 0.9877704977989197,grad_norm: 0.8440035682220053, iteration: 112703
loss: 0.9810296297073364,grad_norm: 0.8574537422024359, iteration: 112704
loss: 1.0118364095687866,grad_norm: 0.8855516081756366, iteration: 112705
loss: 1.0117818117141724,grad_norm: 0.9999990586380502, iteration: 112706
loss: 0.9636115431785583,grad_norm: 0.8728640917679541, iteration: 112707
loss: 0.989662230014801,grad_norm: 0.9999990029332293, iteration: 112708
loss: 0.9980770349502563,grad_norm: 0.9724173170684073, iteration: 112709
loss: 0.9891935586929321,grad_norm: 0.9899952504367594, iteration: 112710
loss: 0.9853671789169312,grad_norm: 0.9288137557821338, iteration: 112711
loss: 0.9874832034111023,grad_norm: 0.9999992243467236, iteration: 112712
loss: 1.0047664642333984,grad_norm: 0.9630188849487508, iteration: 112713
loss: 0.9882645606994629,grad_norm: 0.8801845105000726, iteration: 112714
loss: 1.0010340213775635,grad_norm: 0.9999991158444768, iteration: 112715
loss: 1.0080045461654663,grad_norm: 0.9924784522224327, iteration: 112716
loss: 0.9884808659553528,grad_norm: 0.999999164739338, iteration: 112717
loss: 1.017785668373108,grad_norm: 0.9999990718838273, iteration: 112718
loss: 1.016860008239746,grad_norm: 0.9999991286643501, iteration: 112719
loss: 0.9754989743232727,grad_norm: 0.9982181597059646, iteration: 112720
loss: 0.9898073673248291,grad_norm: 0.873384226033496, iteration: 112721
loss: 0.9783696532249451,grad_norm: 0.9747291738264858, iteration: 112722
loss: 1.001333475112915,grad_norm: 0.9507745168968643, iteration: 112723
loss: 0.964074969291687,grad_norm: 0.9614679977991787, iteration: 112724
loss: 0.9941995739936829,grad_norm: 0.9519211145033857, iteration: 112725
loss: 1.0255110263824463,grad_norm: 0.9933239243790202, iteration: 112726
loss: 1.0039321184158325,grad_norm: 0.9934684761333002, iteration: 112727
loss: 0.9656185507774353,grad_norm: 0.9999990242183678, iteration: 112728
loss: 0.9939203858375549,grad_norm: 0.9999991590469774, iteration: 112729
loss: 1.0227973461151123,grad_norm: 0.9999990790622075, iteration: 112730
loss: 1.0424509048461914,grad_norm: 0.9999998473452529, iteration: 112731
loss: 1.0074201822280884,grad_norm: 0.9288179929799925, iteration: 112732
loss: 1.0033031702041626,grad_norm: 0.9999990030178416, iteration: 112733
loss: 1.0067373514175415,grad_norm: 0.9999995582285807, iteration: 112734
loss: 0.9925863742828369,grad_norm: 0.9999991874479172, iteration: 112735
loss: 1.0335179567337036,grad_norm: 0.9202253883034249, iteration: 112736
loss: 0.9749540686607361,grad_norm: 0.9999992140903939, iteration: 112737
loss: 1.0265101194381714,grad_norm: 0.9616358539468347, iteration: 112738
loss: 1.024316668510437,grad_norm: 0.9999991327580297, iteration: 112739
loss: 0.9933815598487854,grad_norm: 0.9999989986881508, iteration: 112740
loss: 0.9566579461097717,grad_norm: 0.9999990989332213, iteration: 112741
loss: 0.9900914430618286,grad_norm: 0.9025726162879825, iteration: 112742
loss: 1.0124279260635376,grad_norm: 0.9999993721086111, iteration: 112743
loss: 1.0300955772399902,grad_norm: 0.9999989788861098, iteration: 112744
loss: 0.9848209619522095,grad_norm: 0.9052702605493828, iteration: 112745
loss: 1.005295753479004,grad_norm: 0.9379773138784178, iteration: 112746
loss: 0.9808668494224548,grad_norm: 0.9999995671481642, iteration: 112747
loss: 0.9957253932952881,grad_norm: 0.9999991446887069, iteration: 112748
loss: 0.9657873511314392,grad_norm: 0.9729925555800206, iteration: 112749
loss: 1.0056638717651367,grad_norm: 0.9999991088709488, iteration: 112750
loss: 1.0062016248703003,grad_norm: 0.9999996639286057, iteration: 112751
loss: 1.030727744102478,grad_norm: 0.999999095395385, iteration: 112752
loss: 1.0091853141784668,grad_norm: 0.9999990618150542, iteration: 112753
loss: 1.0063507556915283,grad_norm: 0.9999990594568031, iteration: 112754
loss: 0.9898728728294373,grad_norm: 0.9999989837590074, iteration: 112755
loss: 1.0087858438491821,grad_norm: 0.9310765112574367, iteration: 112756
loss: 1.0443881750106812,grad_norm: 0.9999996610347591, iteration: 112757
loss: 1.0140923261642456,grad_norm: 0.9445803225266806, iteration: 112758
loss: 0.9844868183135986,grad_norm: 0.8376258806293921, iteration: 112759
loss: 1.0011446475982666,grad_norm: 0.9999990943923667, iteration: 112760
loss: 0.9668236374855042,grad_norm: 0.9918520418698461, iteration: 112761
loss: 1.0415090322494507,grad_norm: 0.9999992006683082, iteration: 112762
loss: 1.0385122299194336,grad_norm: 0.999999011690485, iteration: 112763
loss: 0.9871616959571838,grad_norm: 0.9155135748085327, iteration: 112764
loss: 1.0019261837005615,grad_norm: 0.9481178327808429, iteration: 112765
loss: 1.009644865989685,grad_norm: 0.9999990952870251, iteration: 112766
loss: 0.9700347781181335,grad_norm: 0.9414942920785989, iteration: 112767
loss: 1.0198174715042114,grad_norm: 0.999999123383142, iteration: 112768
loss: 1.0097652673721313,grad_norm: 0.9670316074130803, iteration: 112769
loss: 0.9552281498908997,grad_norm: 0.9181965778948092, iteration: 112770
loss: 0.9924766421318054,grad_norm: 0.9999997965480316, iteration: 112771
loss: 1.0007867813110352,grad_norm: 0.9999990714682502, iteration: 112772
loss: 0.9942350387573242,grad_norm: 0.9999990819602401, iteration: 112773
loss: 1.013624668121338,grad_norm: 0.9865248626375515, iteration: 112774
loss: 1.0168962478637695,grad_norm: 0.9887477093159003, iteration: 112775
loss: 0.9610725045204163,grad_norm: 0.999999053203097, iteration: 112776
loss: 0.995455265045166,grad_norm: 0.9999991167260127, iteration: 112777
loss: 0.996616780757904,grad_norm: 0.9999991382373739, iteration: 112778
loss: 0.9948676228523254,grad_norm: 0.9926591244812385, iteration: 112779
loss: 1.0200296640396118,grad_norm: 0.9999992232119254, iteration: 112780
loss: 1.0060449838638306,grad_norm: 0.9999991429509499, iteration: 112781
loss: 1.01027250289917,grad_norm: 0.8952812603898694, iteration: 112782
loss: 0.9753171801567078,grad_norm: 0.9999993810912176, iteration: 112783
loss: 0.990352213382721,grad_norm: 0.9999991060379751, iteration: 112784
loss: 1.0059524774551392,grad_norm: 0.9116289415163609, iteration: 112785
loss: 0.9816404581069946,grad_norm: 0.999999232983614, iteration: 112786
loss: 0.9925566911697388,grad_norm: 0.9999991632649227, iteration: 112787
loss: 1.005653977394104,grad_norm: 0.9934001485228443, iteration: 112788
loss: 0.9734170436859131,grad_norm: 0.9999991112225279, iteration: 112789
loss: 1.0114758014678955,grad_norm: 0.9999991813648066, iteration: 112790
loss: 1.0001599788665771,grad_norm: 0.9835645604990315, iteration: 112791
loss: 1.0105149745941162,grad_norm: 0.9999990549974509, iteration: 112792
loss: 0.9807006120681763,grad_norm: 0.9999989114094379, iteration: 112793
loss: 1.0254857540130615,grad_norm: 0.9999995142485387, iteration: 112794
loss: 1.0002843141555786,grad_norm: 0.9999990941472003, iteration: 112795
loss: 1.0129069089889526,grad_norm: 0.9999991150382448, iteration: 112796
loss: 1.009135365486145,grad_norm: 0.9999993275371992, iteration: 112797
loss: 1.0184251070022583,grad_norm: 0.9999990853693036, iteration: 112798
loss: 1.0085126161575317,grad_norm: 0.942597726124395, iteration: 112799
loss: 1.0218150615692139,grad_norm: 0.9999990573694918, iteration: 112800
loss: 0.9742804169654846,grad_norm: 0.8222892794139091, iteration: 112801
loss: 1.0335396528244019,grad_norm: 0.9140454167375035, iteration: 112802
loss: 1.1246232986450195,grad_norm: 0.9999998455561617, iteration: 112803
loss: 0.9754704833030701,grad_norm: 0.9847873845859088, iteration: 112804
loss: 0.995884895324707,grad_norm: 0.9999991543727966, iteration: 112805
loss: 1.0006194114685059,grad_norm: 0.99999934570219, iteration: 112806
loss: 0.9775605797767639,grad_norm: 0.9676841177983544, iteration: 112807
loss: 0.9816921949386597,grad_norm: 0.9999990365320306, iteration: 112808
loss: 0.992004930973053,grad_norm: 0.9999991048156921, iteration: 112809
loss: 1.0395623445510864,grad_norm: 0.9999998604309511, iteration: 112810
loss: 1.011085033416748,grad_norm: 0.9999993204603218, iteration: 112811
loss: 1.0118390321731567,grad_norm: 0.8892416464945423, iteration: 112812
loss: 1.030685544013977,grad_norm: 0.9999992812139494, iteration: 112813
loss: 0.9908300638198853,grad_norm: 0.9999990278040958, iteration: 112814
loss: 1.0033328533172607,grad_norm: 0.9555547953695928, iteration: 112815
loss: 0.9904131293296814,grad_norm: 0.9008732604196797, iteration: 112816
loss: 1.0304591655731201,grad_norm: 0.7587572931770036, iteration: 112817
loss: 0.9825337529182434,grad_norm: 0.8987939347087712, iteration: 112818
loss: 1.0145567655563354,grad_norm: 0.9830045478406857, iteration: 112819
loss: 0.9984661936759949,grad_norm: 0.9999993593016869, iteration: 112820
loss: 0.974833607673645,grad_norm: 0.8938691354574254, iteration: 112821
loss: 0.9918797612190247,grad_norm: 0.9752031771094894, iteration: 112822
loss: 0.976843535900116,grad_norm: 0.9999990150012129, iteration: 112823
loss: 1.0072171688079834,grad_norm: 0.9999993734438964, iteration: 112824
loss: 1.0095287561416626,grad_norm: 0.9999990241046554, iteration: 112825
loss: 1.0234668254852295,grad_norm: 0.9999992803197819, iteration: 112826
loss: 1.1298118829727173,grad_norm: 0.9708842011048496, iteration: 112827
loss: 0.9959508776664734,grad_norm: 0.9999991870800211, iteration: 112828
loss: 0.9979607462882996,grad_norm: 0.9800025695895968, iteration: 112829
loss: 0.9678780436515808,grad_norm: 0.9500905699363681, iteration: 112830
loss: 0.9933744668960571,grad_norm: 0.8909304667575939, iteration: 112831
loss: 1.005440592765808,grad_norm: 0.9999992073341486, iteration: 112832
loss: 0.9862350225448608,grad_norm: 0.9999998912319698, iteration: 112833
loss: 1.0054759979248047,grad_norm: 0.8726401177381314, iteration: 112834
loss: 0.991766095161438,grad_norm: 0.9999990315908059, iteration: 112835
loss: 1.0016130208969116,grad_norm: 0.9442990634392053, iteration: 112836
loss: 0.9599261283874512,grad_norm: 0.9999990324151062, iteration: 112837
loss: 1.0133482217788696,grad_norm: 0.9999991524426929, iteration: 112838
loss: 1.016079306602478,grad_norm: 0.8813118229992033, iteration: 112839
loss: 1.0339970588684082,grad_norm: 0.9999991208457262, iteration: 112840
loss: 1.0292341709136963,grad_norm: 0.9999991975185472, iteration: 112841
loss: 1.0022618770599365,grad_norm: 0.9999992337398389, iteration: 112842
loss: 0.9677115678787231,grad_norm: 0.9999991902393314, iteration: 112843
loss: 1.0018401145935059,grad_norm: 0.9154513516275845, iteration: 112844
loss: 0.9648041129112244,grad_norm: 0.9808619726428123, iteration: 112845
loss: 0.9814847707748413,grad_norm: 0.999999169190904, iteration: 112846
loss: 1.048539161682129,grad_norm: 0.9999990716266874, iteration: 112847
loss: 0.9834950566291809,grad_norm: 0.9419000321313327, iteration: 112848
loss: 1.0047070980072021,grad_norm: 0.9999990532859945, iteration: 112849
loss: 1.005312442779541,grad_norm: 0.9999992490764714, iteration: 112850
loss: 1.0094130039215088,grad_norm: 0.9999991250675065, iteration: 112851
loss: 0.9770247340202332,grad_norm: 0.9999990992045412, iteration: 112852
loss: 1.0149251222610474,grad_norm: 0.9235023348387439, iteration: 112853
loss: 1.0219242572784424,grad_norm: 0.9999990266071547, iteration: 112854
loss: 1.0426115989685059,grad_norm: 0.9999991750173738, iteration: 112855
loss: 1.0035539865493774,grad_norm: 0.9999991787883296, iteration: 112856
loss: 0.999904215335846,grad_norm: 0.9387897100312399, iteration: 112857
loss: 1.0071196556091309,grad_norm: 0.854366397282192, iteration: 112858
loss: 1.0111616849899292,grad_norm: 0.8451647344706859, iteration: 112859
loss: 1.0026522874832153,grad_norm: 0.9999992796224346, iteration: 112860
loss: 1.0136264562606812,grad_norm: 0.9999990666133369, iteration: 112861
loss: 1.0142451524734497,grad_norm: 0.9999992312146245, iteration: 112862
loss: 0.980213463306427,grad_norm: 0.9999992433006457, iteration: 112863
loss: 1.0122586488723755,grad_norm: 0.9999991291581726, iteration: 112864
loss: 1.0079900026321411,grad_norm: 0.9999992828063397, iteration: 112865
loss: 0.9914892315864563,grad_norm: 0.9869389316965699, iteration: 112866
loss: 1.0177114009857178,grad_norm: 0.9999991001865542, iteration: 112867
loss: 0.9978864192962646,grad_norm: 0.9999991726597833, iteration: 112868
loss: 1.006259560585022,grad_norm: 0.9559895840744594, iteration: 112869
loss: 0.9689205288887024,grad_norm: 0.9999991612348935, iteration: 112870
loss: 0.9930585622787476,grad_norm: 0.9999991449530352, iteration: 112871
loss: 0.9766461849212646,grad_norm: 0.9303319813495483, iteration: 112872
loss: 0.9814789891242981,grad_norm: 0.9999990416054164, iteration: 112873
loss: 0.9943838715553284,grad_norm: 0.9999990818520907, iteration: 112874
loss: 0.9823309779167175,grad_norm: 0.9463710236291817, iteration: 112875
loss: 0.9968094229698181,grad_norm: 0.9999992437790657, iteration: 112876
loss: 0.9784652590751648,grad_norm: 0.9999992099098325, iteration: 112877
loss: 0.9688618183135986,grad_norm: 0.9986546555184316, iteration: 112878
loss: 1.0042873620986938,grad_norm: 0.9250716970421874, iteration: 112879
loss: 0.9717438817024231,grad_norm: 0.999999062255405, iteration: 112880
loss: 0.9905717372894287,grad_norm: 0.8732820722563263, iteration: 112881
loss: 0.9518558382987976,grad_norm: 0.9999991630631799, iteration: 112882
loss: 0.9881037473678589,grad_norm: 0.9999991666852613, iteration: 112883
loss: 1.0012749433517456,grad_norm: 0.8544200853310951, iteration: 112884
loss: 1.0105985403060913,grad_norm: 0.9999991182206927, iteration: 112885
loss: 1.0277529954910278,grad_norm: 0.9827122759076121, iteration: 112886
loss: 1.019474744796753,grad_norm: 0.9709665228228571, iteration: 112887
loss: 1.0149145126342773,grad_norm: 0.9471644352564809, iteration: 112888
loss: 1.0100127458572388,grad_norm: 0.8475974417591499, iteration: 112889
loss: 0.9903700947761536,grad_norm: 0.9999990561564306, iteration: 112890
loss: 0.982769250869751,grad_norm: 0.9684629305196802, iteration: 112891
loss: 1.0170016288757324,grad_norm: 0.9999993049458284, iteration: 112892
loss: 0.9789130091667175,grad_norm: 0.9981998302442474, iteration: 112893
loss: 0.9880558848381042,grad_norm: 0.9999991250747825, iteration: 112894
loss: 1.0158764123916626,grad_norm: 0.8216865923690135, iteration: 112895
loss: 1.0070887804031372,grad_norm: 0.9749470667375549, iteration: 112896
loss: 1.0102267265319824,grad_norm: 0.914706651454973, iteration: 112897
loss: 1.061954379081726,grad_norm: 0.999999878726945, iteration: 112898
loss: 0.9673529863357544,grad_norm: 0.9999992378295048, iteration: 112899
loss: 0.9754329323768616,grad_norm: 0.9750721560405096, iteration: 112900
loss: 1.0090008974075317,grad_norm: 0.9235772898884085, iteration: 112901
loss: 0.9591314792633057,grad_norm: 0.9999989926618477, iteration: 112902
loss: 0.9979865550994873,grad_norm: 0.9037915400260413, iteration: 112903
loss: 1.0206797122955322,grad_norm: 0.9857972405931229, iteration: 112904
loss: 0.9501610994338989,grad_norm: 0.9999991102827868, iteration: 112905
loss: 1.0087299346923828,grad_norm: 0.9999992123649063, iteration: 112906
loss: 1.0047286748886108,grad_norm: 0.999999045278119, iteration: 112907
loss: 0.9941290020942688,grad_norm: 0.9599598500759895, iteration: 112908
loss: 1.0196497440338135,grad_norm: 0.9999990397730473, iteration: 112909
loss: 1.0526607036590576,grad_norm: 0.9999992824462358, iteration: 112910
loss: 0.9940633177757263,grad_norm: 0.858150121397947, iteration: 112911
loss: 1.026528239250183,grad_norm: 0.8408947982831102, iteration: 112912
loss: 0.971461832523346,grad_norm: 0.979655604809242, iteration: 112913
loss: 0.9813546538352966,grad_norm: 0.9999992477133337, iteration: 112914
loss: 1.0216501951217651,grad_norm: 0.9999998526840974, iteration: 112915
loss: 1.0521693229675293,grad_norm: 0.9999990959336678, iteration: 112916
loss: 0.9864659905433655,grad_norm: 0.9999991692674727, iteration: 112917
loss: 0.9958933591842651,grad_norm: 0.9999992807289397, iteration: 112918
loss: 1.0125693082809448,grad_norm: 0.999999195769365, iteration: 112919
loss: 1.0069586038589478,grad_norm: 0.999999303047639, iteration: 112920
loss: 0.9880058765411377,grad_norm: 0.9999992381083148, iteration: 112921
loss: 0.9403449296951294,grad_norm: 0.9254232730753413, iteration: 112922
loss: 0.9946506023406982,grad_norm: 0.9999990762813041, iteration: 112923
loss: 0.987849771976471,grad_norm: 0.8981272324068439, iteration: 112924
loss: 0.9606451988220215,grad_norm: 0.9999991592041817, iteration: 112925
loss: 0.9945985674858093,grad_norm: 0.8590373739391581, iteration: 112926
loss: 1.0122170448303223,grad_norm: 0.9999990424092804, iteration: 112927
loss: 0.9798921942710876,grad_norm: 0.9999991089482203, iteration: 112928
loss: 0.9917410016059875,grad_norm: 0.9999990765317341, iteration: 112929
loss: 0.98837810754776,grad_norm: 0.9730904602427415, iteration: 112930
loss: 1.0262982845306396,grad_norm: 0.9578812471704725, iteration: 112931
loss: 0.9767535328865051,grad_norm: 0.9999991450397887, iteration: 112932
loss: 0.9742092490196228,grad_norm: 0.9999991684055703, iteration: 112933
loss: 0.9657248258590698,grad_norm: 0.9999990298770427, iteration: 112934
loss: 0.9712241888046265,grad_norm: 0.9999989270195206, iteration: 112935
loss: 0.9966604709625244,grad_norm: 0.99999916687778, iteration: 112936
loss: 1.0159204006195068,grad_norm: 0.9999990871509257, iteration: 112937
loss: 1.0336556434631348,grad_norm: 0.9999991850782567, iteration: 112938
loss: 1.0003708600997925,grad_norm: 0.9999990648574054, iteration: 112939
loss: 1.0498247146606445,grad_norm: 0.9999993202812251, iteration: 112940
loss: 0.9943171739578247,grad_norm: 0.9999993400331516, iteration: 112941
loss: 1.0381863117218018,grad_norm: 0.9999992065186851, iteration: 112942
loss: 0.9904107451438904,grad_norm: 0.9999991361683207, iteration: 112943
loss: 1.0246576070785522,grad_norm: 0.9999999663449286, iteration: 112944
loss: 1.0091307163238525,grad_norm: 0.9798873522305546, iteration: 112945
loss: 1.027295470237732,grad_norm: 0.9307392416773529, iteration: 112946
loss: 1.0462816953659058,grad_norm: 0.9999990361321566, iteration: 112947
loss: 1.0174500942230225,grad_norm: 0.999999168055899, iteration: 112948
loss: 1.0055633783340454,grad_norm: 0.9689321391115463, iteration: 112949
loss: 0.9814651608467102,grad_norm: 0.9999990944323556, iteration: 112950
loss: 0.9894670248031616,grad_norm: 0.9999991277472231, iteration: 112951
loss: 0.9874855279922485,grad_norm: 0.9183133883087103, iteration: 112952
loss: 1.0151222944259644,grad_norm: 0.999999269733811, iteration: 112953
loss: 1.034134030342102,grad_norm: 0.9999997532799668, iteration: 112954
loss: 1.0080835819244385,grad_norm: 0.9815701316521203, iteration: 112955
loss: 1.0399675369262695,grad_norm: 0.999999138729068, iteration: 112956
loss: 0.9782707691192627,grad_norm: 0.9999991459645738, iteration: 112957
loss: 1.007586121559143,grad_norm: 0.9759602404338955, iteration: 112958
loss: 1.063264012336731,grad_norm: 0.9999991697820673, iteration: 112959
loss: 0.960874080657959,grad_norm: 0.8742643393918369, iteration: 112960
loss: 0.9682957530021667,grad_norm: 0.9617269032600865, iteration: 112961
loss: 0.9976502060890198,grad_norm: 0.9999990802842941, iteration: 112962
loss: 0.9916911721229553,grad_norm: 0.9230607997710079, iteration: 112963
loss: 0.9916568398475647,grad_norm: 0.9999990213688154, iteration: 112964
loss: 1.030186653137207,grad_norm: 0.9999992893333766, iteration: 112965
loss: 1.0201220512390137,grad_norm: 0.9999992253015896, iteration: 112966
loss: 1.0009880065917969,grad_norm: 0.9999992139383236, iteration: 112967
loss: 1.0406265258789062,grad_norm: 0.9999991267556585, iteration: 112968
loss: 1.019891381263733,grad_norm: 0.9592744716160676, iteration: 112969
loss: 0.9873270392417908,grad_norm: 0.9165268299503244, iteration: 112970
loss: 0.9998672008514404,grad_norm: 0.9999989890058392, iteration: 112971
loss: 1.006467580795288,grad_norm: 0.9999991084534667, iteration: 112972
loss: 1.0195589065551758,grad_norm: 0.9412132722420253, iteration: 112973
loss: 0.9693818092346191,grad_norm: 0.9740448086855301, iteration: 112974
loss: 1.0309395790100098,grad_norm: 0.9791716870982315, iteration: 112975
loss: 1.0115866661071777,grad_norm: 0.9999991777560038, iteration: 112976
loss: 1.0343233346939087,grad_norm: 0.9362292447965849, iteration: 112977
loss: 1.0054558515548706,grad_norm: 0.8718981919968669, iteration: 112978
loss: 1.0289585590362549,grad_norm: 0.99999899825872, iteration: 112979
loss: 0.9968324303627014,grad_norm: 0.9999990225000623, iteration: 112980
loss: 0.991515040397644,grad_norm: 0.9163968865076955, iteration: 112981
loss: 0.9961328506469727,grad_norm: 0.9999990740966499, iteration: 112982
loss: 1.005894660949707,grad_norm: 0.8439735950376609, iteration: 112983
loss: 0.9943481683731079,grad_norm: 0.9625883887799779, iteration: 112984
loss: 0.9898272156715393,grad_norm: 0.9999991001315025, iteration: 112985
loss: 0.9888730049133301,grad_norm: 0.9999991647449841, iteration: 112986
loss: 0.9669436812400818,grad_norm: 0.7853617013873322, iteration: 112987
loss: 0.9714200496673584,grad_norm: 0.989854822922997, iteration: 112988
loss: 1.068589687347412,grad_norm: 0.999999611008463, iteration: 112989
loss: 0.9885337352752686,grad_norm: 0.9999992592739051, iteration: 112990
loss: 1.019507646560669,grad_norm: 0.9999991254935724, iteration: 112991
loss: 0.9961569905281067,grad_norm: 0.946360526991725, iteration: 112992
loss: 1.0039281845092773,grad_norm: 0.9999992171736941, iteration: 112993
loss: 0.9835522174835205,grad_norm: 0.9584499326659619, iteration: 112994
loss: 0.9610831141471863,grad_norm: 0.968497951705764, iteration: 112995
loss: 0.996994137763977,grad_norm: 0.9999990893589615, iteration: 112996
loss: 1.0196568965911865,grad_norm: 0.9999989980616932, iteration: 112997
loss: 1.011382818222046,grad_norm: 0.999999076264949, iteration: 112998
loss: 1.002060055732727,grad_norm: 0.867113154419896, iteration: 112999
loss: 1.0293515920639038,grad_norm: 0.9999991089587253, iteration: 113000
loss: 1.0306462049484253,grad_norm: 0.9999991965980526, iteration: 113001
loss: 1.0172797441482544,grad_norm: 0.9999990982997131, iteration: 113002
loss: 0.9637740850448608,grad_norm: 0.9262093242733547, iteration: 113003
loss: 1.0216364860534668,grad_norm: 0.9999992953827859, iteration: 113004
loss: 1.0329669713974,grad_norm: 0.9343875851611577, iteration: 113005
loss: 0.9760088920593262,grad_norm: 0.9273419631268991, iteration: 113006
loss: 1.0236896276474,grad_norm: 0.9999991281991029, iteration: 113007
loss: 1.0023738145828247,grad_norm: 0.9386736598284697, iteration: 113008
loss: 0.9843700528144836,grad_norm: 0.9999991664259303, iteration: 113009
loss: 0.9835932850837708,grad_norm: 0.9714465941609797, iteration: 113010
loss: 0.9957363605499268,grad_norm: 0.9999990791839678, iteration: 113011
loss: 1.0110259056091309,grad_norm: 0.9802372362235382, iteration: 113012
loss: 0.986544132232666,grad_norm: 0.9999991455867193, iteration: 113013
loss: 1.0055491924285889,grad_norm: 0.9999992589826459, iteration: 113014
loss: 0.9838893413543701,grad_norm: 0.9999990516381959, iteration: 113015
loss: 1.010941505432129,grad_norm: 0.9302445811796479, iteration: 113016
loss: 1.0009862184524536,grad_norm: 0.972475540335416, iteration: 113017
loss: 1.039811372756958,grad_norm: 0.9890042219586871, iteration: 113018
loss: 1.0075013637542725,grad_norm: 0.9708307755060941, iteration: 113019
loss: 0.9794170260429382,grad_norm: 0.9530464580090543, iteration: 113020
loss: 1.0222628116607666,grad_norm: 0.9999991261893145, iteration: 113021
loss: 1.0056421756744385,grad_norm: 0.9746682441235167, iteration: 113022
loss: 1.0190587043762207,grad_norm: 0.9999990082117312, iteration: 113023
loss: 0.9779533743858337,grad_norm: 0.9083660114814934, iteration: 113024
loss: 1.008655309677124,grad_norm: 0.9877165546817362, iteration: 113025
loss: 1.0089377164840698,grad_norm: 0.9999992801215579, iteration: 113026
loss: 1.0085622072219849,grad_norm: 0.9136998700079589, iteration: 113027
loss: 0.9983745813369751,grad_norm: 0.9821198185731185, iteration: 113028
loss: 1.0032141208648682,grad_norm: 0.9999992330760403, iteration: 113029
loss: 1.0166279077529907,grad_norm: 0.912328443600641, iteration: 113030
loss: 0.9940263628959656,grad_norm: 0.9275613225504931, iteration: 113031
loss: 1.0291519165039062,grad_norm: 0.9999992402644307, iteration: 113032
loss: 0.9921131134033203,grad_norm: 0.9999990663553558, iteration: 113033
loss: 0.9589840769767761,grad_norm: 0.9647732904143224, iteration: 113034
loss: 1.0351437330245972,grad_norm: 0.999999777216329, iteration: 113035
loss: 1.0148351192474365,grad_norm: 0.9999990319902148, iteration: 113036
loss: 0.9684756994247437,grad_norm: 0.9047392425701857, iteration: 113037
loss: 1.0023586750030518,grad_norm: 0.999999091767442, iteration: 113038
loss: 0.9597887992858887,grad_norm: 0.9648202323978053, iteration: 113039
loss: 0.9847094416618347,grad_norm: 0.9999990980260838, iteration: 113040
loss: 1.0015124082565308,grad_norm: 0.8721972431823163, iteration: 113041
loss: 1.0869884490966797,grad_norm: 0.9999995042039318, iteration: 113042
loss: 0.9875409603118896,grad_norm: 0.9999990499791601, iteration: 113043
loss: 0.9553336501121521,grad_norm: 0.999999195463705, iteration: 113044
loss: 1.0193947553634644,grad_norm: 0.9999990229963516, iteration: 113045
loss: 0.99149090051651,grad_norm: 0.9999989386862995, iteration: 113046
loss: 1.000064492225647,grad_norm: 0.999999117591849, iteration: 113047
loss: 1.0350919961929321,grad_norm: 0.961932956666256, iteration: 113048
loss: 1.012047290802002,grad_norm: 0.9999994404287624, iteration: 113049
loss: 1.015351414680481,grad_norm: 0.9999991559740061, iteration: 113050
loss: 0.9816094040870667,grad_norm: 0.92653129280203, iteration: 113051
loss: 0.9642348885536194,grad_norm: 0.9999990870210244, iteration: 113052
loss: 1.0336310863494873,grad_norm: 0.9493341653361157, iteration: 113053
loss: 0.9988677501678467,grad_norm: 0.999999023015414, iteration: 113054
loss: 0.9961471557617188,grad_norm: 0.9229118691068242, iteration: 113055
loss: 1.1031243801116943,grad_norm: 0.9999998837618986, iteration: 113056
loss: 1.0143078565597534,grad_norm: 0.9284371271718196, iteration: 113057
loss: 1.0291553735733032,grad_norm: 0.9999996126436084, iteration: 113058
loss: 1.0054948329925537,grad_norm: 0.9016873427329818, iteration: 113059
loss: 0.9857689142227173,grad_norm: 0.9999997836364242, iteration: 113060
loss: 1.0176129341125488,grad_norm: 0.9752065219421978, iteration: 113061
loss: 1.002307415008545,grad_norm: 0.9999991215061825, iteration: 113062
loss: 1.00809907913208,grad_norm: 0.9999990651822451, iteration: 113063
loss: 1.0409306287765503,grad_norm: 0.999999038371235, iteration: 113064
loss: 1.015831708908081,grad_norm: 0.9999990802858028, iteration: 113065
loss: 1.022325038909912,grad_norm: 0.9999990212792969, iteration: 113066
loss: 0.9880513548851013,grad_norm: 0.9999991559002804, iteration: 113067
loss: 0.9981892704963684,grad_norm: 0.9999991962717734, iteration: 113068
loss: 0.9674729704856873,grad_norm: 0.9548458769541378, iteration: 113069
loss: 1.0310206413269043,grad_norm: 0.9999990642380786, iteration: 113070
loss: 1.0051413774490356,grad_norm: 0.9252820431415859, iteration: 113071
loss: 0.9870460629463196,grad_norm: 0.9999990416106715, iteration: 113072
loss: 0.9720277786254883,grad_norm: 0.9999990451342522, iteration: 113073
loss: 0.9755195379257202,grad_norm: 0.9999991134595498, iteration: 113074
loss: 0.9617450833320618,grad_norm: 0.9999993220125408, iteration: 113075
loss: 0.9786936640739441,grad_norm: 0.9999991783125273, iteration: 113076
loss: 0.9912035465240479,grad_norm: 0.9999992240150826, iteration: 113077
loss: 0.9934664964675903,grad_norm: 0.9999991432535776, iteration: 113078
loss: 0.9774882793426514,grad_norm: 0.8816807046316998, iteration: 113079
loss: 1.000663161277771,grad_norm: 0.9999991244929052, iteration: 113080
loss: 0.9812947511672974,grad_norm: 0.9999993014415036, iteration: 113081
loss: 0.994748592376709,grad_norm: 0.9999990428037595, iteration: 113082
loss: 0.9994606375694275,grad_norm: 0.9999990348098466, iteration: 113083
loss: 1.0321635007858276,grad_norm: 0.9999993584434381, iteration: 113084
loss: 0.9914520978927612,grad_norm: 0.9999990522751215, iteration: 113085
loss: 1.000341773033142,grad_norm: 0.948981269795781, iteration: 113086
loss: 0.9736778140068054,grad_norm: 0.9570278590284643, iteration: 113087
loss: 1.0303255319595337,grad_norm: 0.9197645163753835, iteration: 113088
loss: 0.9866632223129272,grad_norm: 0.9999992387858126, iteration: 113089
loss: 1.0230331420898438,grad_norm: 0.99999907716912, iteration: 113090
loss: 1.015475869178772,grad_norm: 0.9972409494450288, iteration: 113091
loss: 0.9785792231559753,grad_norm: 0.9999990763237214, iteration: 113092
loss: 0.9999183416366577,grad_norm: 0.9660475989103617, iteration: 113093
loss: 1.0155912637710571,grad_norm: 0.9999991425916053, iteration: 113094
loss: 1.000431776046753,grad_norm: 0.9999991323939404, iteration: 113095
loss: 0.9830325245857239,grad_norm: 0.9999990965649985, iteration: 113096
loss: 1.030833125114441,grad_norm: 0.9999995296169834, iteration: 113097
loss: 1.002772331237793,grad_norm: 0.990165194598623, iteration: 113098
loss: 0.9985224604606628,grad_norm: 0.9999992393088147, iteration: 113099
loss: 0.9951027035713196,grad_norm: 0.9221724838409573, iteration: 113100
loss: 1.0218936204910278,grad_norm: 0.9999991350455498, iteration: 113101
loss: 0.9829151034355164,grad_norm: 0.836758957275801, iteration: 113102
loss: 0.9657400250434875,grad_norm: 0.999998964694048, iteration: 113103
loss: 1.0144753456115723,grad_norm: 0.9589874895930749, iteration: 113104
loss: 0.9941917061805725,grad_norm: 0.9999990838106306, iteration: 113105
loss: 1.015077829360962,grad_norm: 0.9100629372794816, iteration: 113106
loss: 0.9897497296333313,grad_norm: 0.9999992753152842, iteration: 113107
loss: 0.9717435240745544,grad_norm: 0.9999991444527003, iteration: 113108
loss: 1.072178840637207,grad_norm: 0.9999991089382338, iteration: 113109
loss: 1.0056226253509521,grad_norm: 0.9999991638050492, iteration: 113110
loss: 1.002711534500122,grad_norm: 0.9999991774453264, iteration: 113111
loss: 1.0158882141113281,grad_norm: 0.9999997761885371, iteration: 113112
loss: 1.0134072303771973,grad_norm: 0.8419938799809772, iteration: 113113
loss: 0.9787779450416565,grad_norm: 0.9856740940400307, iteration: 113114
loss: 1.0045695304870605,grad_norm: 0.9440700194323953, iteration: 113115
loss: 0.9946044683456421,grad_norm: 0.9999992031262409, iteration: 113116
loss: 0.9877458214759827,grad_norm: 0.9999990376590949, iteration: 113117
loss: 0.966446578502655,grad_norm: 0.999999136931371, iteration: 113118
loss: 0.9988037347793579,grad_norm: 0.9999990854222109, iteration: 113119
loss: 0.9950973391532898,grad_norm: 0.9999996349804177, iteration: 113120
loss: 1.0382949113845825,grad_norm: 0.9279487895678376, iteration: 113121
loss: 1.0339818000793457,grad_norm: 0.9388092603131574, iteration: 113122
loss: 0.9883687496185303,grad_norm: 0.9790643377446715, iteration: 113123
loss: 0.9964432716369629,grad_norm: 0.9999990715898355, iteration: 113124
loss: 0.9644020795822144,grad_norm: 0.9908934540259067, iteration: 113125
loss: 0.9959849119186401,grad_norm: 0.9130873473683869, iteration: 113126
loss: 1.0220791101455688,grad_norm: 0.999999237269133, iteration: 113127
loss: 1.0006195306777954,grad_norm: 0.962521717043236, iteration: 113128
loss: 0.9752505421638489,grad_norm: 0.9999989243136961, iteration: 113129
loss: 1.0394266843795776,grad_norm: 0.9999991198771646, iteration: 113130
loss: 0.9963071942329407,grad_norm: 0.9999992586751199, iteration: 113131
loss: 0.9956600069999695,grad_norm: 0.979672623315287, iteration: 113132
loss: 1.0464789867401123,grad_norm: 0.9999989872894968, iteration: 113133
loss: 0.9882131218910217,grad_norm: 0.9759302031532916, iteration: 113134
loss: 1.0003364086151123,grad_norm: 0.9537807616474775, iteration: 113135
loss: 1.0383168458938599,grad_norm: 0.9999995808506197, iteration: 113136
loss: 0.9839718341827393,grad_norm: 0.9168742798366749, iteration: 113137
loss: 0.9638634920120239,grad_norm: 0.9899462942999074, iteration: 113138
loss: 1.011155366897583,grad_norm: 0.9999991666398251, iteration: 113139
loss: 0.9896109104156494,grad_norm: 0.8695093403099006, iteration: 113140
loss: 0.9981817603111267,grad_norm: 0.9999992766662842, iteration: 113141
loss: 0.9778931140899658,grad_norm: 0.9999990367589787, iteration: 113142
loss: 1.0039077997207642,grad_norm: 0.912209758803651, iteration: 113143
loss: 0.9925487637519836,grad_norm: 0.9999991110237295, iteration: 113144
loss: 0.9705148935317993,grad_norm: 0.9999992288952001, iteration: 113145
loss: 0.983982503414154,grad_norm: 0.9999990876715912, iteration: 113146
loss: 1.0116764307022095,grad_norm: 0.99999924724946, iteration: 113147
loss: 1.045348048210144,grad_norm: 0.9999991116371736, iteration: 113148
loss: 0.9643208980560303,grad_norm: 0.9999991357601329, iteration: 113149
loss: 1.033086895942688,grad_norm: 0.9999995099165285, iteration: 113150
loss: 0.9849570989608765,grad_norm: 0.9999990207047119, iteration: 113151
loss: 0.9651437401771545,grad_norm: 0.9999991439826258, iteration: 113152
loss: 1.0044668912887573,grad_norm: 0.9999991558021291, iteration: 113153
loss: 1.0413033962249756,grad_norm: 0.999999262404321, iteration: 113154
loss: 0.9660541415214539,grad_norm: 0.9500260953374705, iteration: 113155
loss: 0.9934934973716736,grad_norm: 0.9757795670591751, iteration: 113156
loss: 1.0111401081085205,grad_norm: 0.9999991226072898, iteration: 113157
loss: 0.9757416844367981,grad_norm: 0.9999990549959807, iteration: 113158
loss: 0.96300208568573,grad_norm: 0.8861054525057653, iteration: 113159
loss: 1.0174777507781982,grad_norm: 0.9999990768559418, iteration: 113160
loss: 1.0126166343688965,grad_norm: 0.813145695209839, iteration: 113161
loss: 1.0342386960983276,grad_norm: 0.9999990437529047, iteration: 113162
loss: 1.0494074821472168,grad_norm: 0.9999994608528631, iteration: 113163
loss: 1.0371501445770264,grad_norm: 0.8848016254147667, iteration: 113164
loss: 0.9992491006851196,grad_norm: 0.9999990804278175, iteration: 113165
loss: 1.0143436193466187,grad_norm: 0.8443954758331221, iteration: 113166
loss: 0.9546031951904297,grad_norm: 0.9999989039561589, iteration: 113167
loss: 1.0247957706451416,grad_norm: 0.9999991336286146, iteration: 113168
loss: 1.0138252973556519,grad_norm: 0.9393374588483588, iteration: 113169
loss: 1.0076956748962402,grad_norm: 0.9308837063345211, iteration: 113170
loss: 0.9775486588478088,grad_norm: 0.999999782324525, iteration: 113171
loss: 1.0089800357818604,grad_norm: 0.9291167686042437, iteration: 113172
loss: 1.0156482458114624,grad_norm: 0.8783886174020636, iteration: 113173
loss: 1.0187435150146484,grad_norm: 0.9999992155774816, iteration: 113174
loss: 1.0423541069030762,grad_norm: 0.9999992394813085, iteration: 113175
loss: 1.029308795928955,grad_norm: 0.9752487699955029, iteration: 113176
loss: 1.0228971242904663,grad_norm: 0.9345577659719914, iteration: 113177
loss: 1.004554271697998,grad_norm: 0.9138847900175583, iteration: 113178
loss: 0.9982036352157593,grad_norm: 0.9999990955736594, iteration: 113179
loss: 1.003657341003418,grad_norm: 0.9473629489388304, iteration: 113180
loss: 0.9879646897315979,grad_norm: 0.9472089512780718, iteration: 113181
loss: 1.0213596820831299,grad_norm: 0.9999991368051001, iteration: 113182
loss: 0.989538848400116,grad_norm: 0.9999992424047431, iteration: 113183
loss: 1.0205097198486328,grad_norm: 0.9621184008110633, iteration: 113184
loss: 0.9833459258079529,grad_norm: 0.9999989964461791, iteration: 113185
loss: 1.0028611421585083,grad_norm: 0.8551062048067933, iteration: 113186
loss: 1.019857406616211,grad_norm: 0.9999991393121942, iteration: 113187
loss: 0.9652566909790039,grad_norm: 0.9999991016880331, iteration: 113188
loss: 0.9968227744102478,grad_norm: 0.9908186754364144, iteration: 113189
loss: 0.9884865283966064,grad_norm: 0.9945720509691733, iteration: 113190
loss: 0.9959197640419006,grad_norm: 0.9999990444535041, iteration: 113191
loss: 1.0330688953399658,grad_norm: 0.9999998268629561, iteration: 113192
loss: 1.0016961097717285,grad_norm: 0.9999991979573549, iteration: 113193
loss: 0.9758779406547546,grad_norm: 0.9999989630616454, iteration: 113194
loss: 0.9762367606163025,grad_norm: 0.9391490246355747, iteration: 113195
loss: 1.0183781385421753,grad_norm: 0.9999994696862866, iteration: 113196
loss: 1.0073533058166504,grad_norm: 0.9815086106004204, iteration: 113197
loss: 1.0079203844070435,grad_norm: 0.8969409540782796, iteration: 113198
loss: 0.9709300398826599,grad_norm: 0.9999989029411238, iteration: 113199
loss: 0.961918294429779,grad_norm: 0.9999992334529654, iteration: 113200
loss: 1.0074015855789185,grad_norm: 0.942891790557355, iteration: 113201
loss: 0.9837707877159119,grad_norm: 0.9306423986387473, iteration: 113202
loss: 1.0163544416427612,grad_norm: 0.9999995162988985, iteration: 113203
loss: 0.9788479804992676,grad_norm: 0.954910873502377, iteration: 113204
loss: 0.9810421466827393,grad_norm: 0.9439993867531156, iteration: 113205
loss: 0.9863365292549133,grad_norm: 0.916511495385291, iteration: 113206
loss: 0.9919964671134949,grad_norm: 0.999999317659499, iteration: 113207
loss: 0.9731082320213318,grad_norm: 0.9999992249646946, iteration: 113208
loss: 1.0297263860702515,grad_norm: 0.9999989763495807, iteration: 113209
loss: 1.0849473476409912,grad_norm: 0.999999245683172, iteration: 113210
loss: 1.0050698518753052,grad_norm: 0.8261990038552405, iteration: 113211
loss: 0.9914825558662415,grad_norm: 0.9999996862375264, iteration: 113212
loss: 0.9753667116165161,grad_norm: 0.974606288838835, iteration: 113213
loss: 0.9958897233009338,grad_norm: 0.9999993487531489, iteration: 113214
loss: 0.992329478263855,grad_norm: 0.999999285259321, iteration: 113215
loss: 0.9711277484893799,grad_norm: 0.9999991074068469, iteration: 113216
loss: 1.0282530784606934,grad_norm: 0.9999990512137831, iteration: 113217
loss: 1.0130746364593506,grad_norm: 0.9999991991705853, iteration: 113218
loss: 0.9855012893676758,grad_norm: 0.9927384682374875, iteration: 113219
loss: 1.0105443000793457,grad_norm: 0.9291816203215933, iteration: 113220
loss: 0.9953325390815735,grad_norm: 0.999999086583165, iteration: 113221
loss: 0.9839439988136292,grad_norm: 0.9999994425574429, iteration: 113222
loss: 1.0155647993087769,grad_norm: 0.9999992000909323, iteration: 113223
loss: 1.027773380279541,grad_norm: 0.9999990232212318, iteration: 113224
loss: 0.9710772037506104,grad_norm: 0.9999992853929678, iteration: 113225
loss: 1.019882082939148,grad_norm: 0.9999991270925178, iteration: 113226
loss: 1.014926552772522,grad_norm: 0.9999991418803414, iteration: 113227
loss: 1.0149058103561401,grad_norm: 0.9999994238746234, iteration: 113228
loss: 1.0056978464126587,grad_norm: 0.999999120265073, iteration: 113229
loss: 1.0147205591201782,grad_norm: 0.9999991271726402, iteration: 113230
loss: 0.9766165614128113,grad_norm: 0.9725774780029703, iteration: 113231
loss: 1.0302248001098633,grad_norm: 0.9999989314116835, iteration: 113232
loss: 0.9749177694320679,grad_norm: 0.9999990783290011, iteration: 113233
loss: 1.0479466915130615,grad_norm: 0.8442038195164802, iteration: 113234
loss: 0.9855076670646667,grad_norm: 0.9999992589809504, iteration: 113235
loss: 1.061691164970398,grad_norm: 0.9999996682850596, iteration: 113236
loss: 1.0060598850250244,grad_norm: 0.9999992751449199, iteration: 113237
loss: 1.0171799659729004,grad_norm: 0.999999094538055, iteration: 113238
loss: 0.9994486570358276,grad_norm: 0.8574836455625628, iteration: 113239
loss: 0.9799990653991699,grad_norm: 0.9990140063965234, iteration: 113240
loss: 1.0070542097091675,grad_norm: 0.9999990167344726, iteration: 113241
loss: 0.9939072728157043,grad_norm: 0.9999991102921171, iteration: 113242
loss: 0.9967617392539978,grad_norm: 0.9999991187194682, iteration: 113243
loss: 1.010151982307434,grad_norm: 0.9662834703299267, iteration: 113244
loss: 1.0298075675964355,grad_norm: 0.9999992249484135, iteration: 113245
loss: 1.01475191116333,grad_norm: 0.9999990318502715, iteration: 113246
loss: 0.9990824460983276,grad_norm: 0.9999990256365594, iteration: 113247
loss: 1.0114574432373047,grad_norm: 0.9999991843894047, iteration: 113248
loss: 0.9704612493515015,grad_norm: 0.9999991207731058, iteration: 113249
loss: 0.9998295307159424,grad_norm: 0.9999991518806404, iteration: 113250
loss: 1.0238724946975708,grad_norm: 0.9999993030333931, iteration: 113251
loss: 1.0022131204605103,grad_norm: 0.9999991292233952, iteration: 113252
loss: 0.98317950963974,grad_norm: 0.9999989756975229, iteration: 113253
loss: 0.9958350658416748,grad_norm: 0.9879303394858001, iteration: 113254
loss: 0.9476600885391235,grad_norm: 0.9322436032432304, iteration: 113255
loss: 0.9967532157897949,grad_norm: 0.999999204608457, iteration: 113256
loss: 1.0041224956512451,grad_norm: 0.9720050214139035, iteration: 113257
loss: 1.0183359384536743,grad_norm: 0.973649556742461, iteration: 113258
loss: 0.9914726614952087,grad_norm: 0.9793326579705006, iteration: 113259
loss: 1.0378172397613525,grad_norm: 0.9999992999895552, iteration: 113260
loss: 0.9637379050254822,grad_norm: 0.9761751390430354, iteration: 113261
loss: 0.9851374626159668,grad_norm: 0.9999989628519551, iteration: 113262
loss: 1.0303924083709717,grad_norm: 0.9836496225598653, iteration: 113263
loss: 0.9732590317726135,grad_norm: 0.9999991033928463, iteration: 113264
loss: 0.9923087954521179,grad_norm: 0.9999989489576001, iteration: 113265
loss: 1.020012378692627,grad_norm: 0.9999991366149116, iteration: 113266
loss: 1.026867151260376,grad_norm: 0.9999996412903307, iteration: 113267
loss: 1.0104210376739502,grad_norm: 0.9999991034946758, iteration: 113268
loss: 1.0012829303741455,grad_norm: 0.9999990540989216, iteration: 113269
loss: 0.9588685631752014,grad_norm: 0.9999992386718325, iteration: 113270
loss: 0.9966467618942261,grad_norm: 0.9999991209693315, iteration: 113271
loss: 0.9955722689628601,grad_norm: 0.9999997288534265, iteration: 113272
loss: 1.0381476879119873,grad_norm: 0.9999991633477036, iteration: 113273
loss: 1.0079081058502197,grad_norm: 0.9999988920209433, iteration: 113274
loss: 0.9835266470909119,grad_norm: 0.9999342507344285, iteration: 113275
loss: 0.9986565709114075,grad_norm: 0.8778874133386007, iteration: 113276
loss: 0.9879370927810669,grad_norm: 0.9500581569168683, iteration: 113277
loss: 1.0120140314102173,grad_norm: 0.8262231825974208, iteration: 113278
loss: 0.9851064682006836,grad_norm: 0.9999993327245698, iteration: 113279
loss: 0.9510388970375061,grad_norm: 0.999998941948459, iteration: 113280
loss: 0.9745152592658997,grad_norm: 0.9999991920504723, iteration: 113281
loss: 1.0055601596832275,grad_norm: 0.9544439409742719, iteration: 113282
loss: 0.964168906211853,grad_norm: 0.9999990636704289, iteration: 113283
loss: 1.0088279247283936,grad_norm: 0.9475530026488022, iteration: 113284
loss: 1.004400372505188,grad_norm: 0.9999990873247699, iteration: 113285
loss: 1.023918867111206,grad_norm: 0.9999989767344967, iteration: 113286
loss: 1.0096043348312378,grad_norm: 0.9999992014002432, iteration: 113287
loss: 0.9657361507415771,grad_norm: 0.9999993926411535, iteration: 113288
loss: 1.0127146244049072,grad_norm: 0.9963906135528517, iteration: 113289
loss: 0.9885579347610474,grad_norm: 0.9999990494117027, iteration: 113290
loss: 1.0171072483062744,grad_norm: 0.9911817227175628, iteration: 113291
loss: 0.9875874519348145,grad_norm: 0.9476451933400917, iteration: 113292
loss: 1.0228171348571777,grad_norm: 0.877546624066648, iteration: 113293
loss: 1.0333147048950195,grad_norm: 0.9999991649373081, iteration: 113294
loss: 0.9986079335212708,grad_norm: 0.874101836459034, iteration: 113295
loss: 1.0010310411453247,grad_norm: 0.9999990932018995, iteration: 113296
loss: 0.9601173996925354,grad_norm: 0.9527068844378124, iteration: 113297
loss: 0.9863657355308533,grad_norm: 0.999998977482625, iteration: 113298
loss: 0.9900361895561218,grad_norm: 0.9389056332310709, iteration: 113299
loss: 0.9998968839645386,grad_norm: 0.885616429851385, iteration: 113300
loss: 0.9672041535377502,grad_norm: 0.9999991865736251, iteration: 113301
loss: 0.9879510402679443,grad_norm: 0.9999991130134326, iteration: 113302
loss: 1.0066332817077637,grad_norm: 0.999999202075527, iteration: 113303
loss: 0.9790791869163513,grad_norm: 0.9999991236840021, iteration: 113304
loss: 1.0150147676467896,grad_norm: 0.9999989741166321, iteration: 113305
loss: 1.0028456449508667,grad_norm: 0.9999990789041039, iteration: 113306
loss: 0.9947383403778076,grad_norm: 0.976288574319903, iteration: 113307
loss: 1.0161503553390503,grad_norm: 0.9999991834803234, iteration: 113308
loss: 1.0034236907958984,grad_norm: 0.9999991263831979, iteration: 113309
loss: 1.0134271383285522,grad_norm: 0.9827349132645341, iteration: 113310
loss: 0.9570959210395813,grad_norm: 0.9754763227901699, iteration: 113311
loss: 1.0021131038665771,grad_norm: 0.9999990110000873, iteration: 113312
loss: 0.9825606346130371,grad_norm: 0.8957318671571094, iteration: 113313
loss: 1.0142107009887695,grad_norm: 0.9999990872006742, iteration: 113314
loss: 1.0028914213180542,grad_norm: 0.8412574528481102, iteration: 113315
loss: 0.9942410588264465,grad_norm: 0.9999992246453757, iteration: 113316
loss: 1.0074608325958252,grad_norm: 0.9999992578050572, iteration: 113317
loss: 0.9765568375587463,grad_norm: 0.9999995191796607, iteration: 113318
loss: 0.9910783171653748,grad_norm: 0.9023118928608891, iteration: 113319
loss: 1.0555224418640137,grad_norm: 0.9999994811977344, iteration: 113320
loss: 1.0324020385742188,grad_norm: 0.9999991948422672, iteration: 113321
loss: 0.9935817718505859,grad_norm: 0.9999993721767986, iteration: 113322
loss: 1.0044857263565063,grad_norm: 0.999999155731106, iteration: 113323
loss: 0.9820821285247803,grad_norm: 0.9999989341282522, iteration: 113324
loss: 0.9948203563690186,grad_norm: 0.999999161495754, iteration: 113325
loss: 0.9886045455932617,grad_norm: 0.9999991490717479, iteration: 113326
loss: 0.9896053075790405,grad_norm: 0.9999991824031039, iteration: 113327
loss: 1.0330924987792969,grad_norm: 0.9999992804779279, iteration: 113328
loss: 1.0315680503845215,grad_norm: 0.999999289166256, iteration: 113329
loss: 0.981754720211029,grad_norm: 0.9363167967323894, iteration: 113330
loss: 1.0111558437347412,grad_norm: 0.9999993678423029, iteration: 113331
loss: 0.9902238845825195,grad_norm: 0.9999991021741439, iteration: 113332
loss: 1.0105113983154297,grad_norm: 0.999999100797353, iteration: 113333
loss: 0.9857479929924011,grad_norm: 0.9999991411951815, iteration: 113334
loss: 1.0017499923706055,grad_norm: 0.9999990720827003, iteration: 113335
loss: 1.0166081190109253,grad_norm: 0.9999992386826927, iteration: 113336
loss: 0.9776430726051331,grad_norm: 0.9121379243114923, iteration: 113337
loss: 0.9750493168830872,grad_norm: 0.9999991088499213, iteration: 113338
loss: 1.013867735862732,grad_norm: 0.9999991591293608, iteration: 113339
loss: 1.0223382711410522,grad_norm: 0.9999992245421674, iteration: 113340
loss: 0.9560118317604065,grad_norm: 0.8619095652737069, iteration: 113341
loss: 0.9970799684524536,grad_norm: 0.9999990380342619, iteration: 113342
loss: 0.9608216285705566,grad_norm: 0.9904050117334038, iteration: 113343
loss: 1.0310194492340088,grad_norm: 0.9999991214391228, iteration: 113344
loss: 1.0408426523208618,grad_norm: 0.9999995712631693, iteration: 113345
loss: 0.9377206563949585,grad_norm: 0.9781075110051487, iteration: 113346
loss: 0.964994490146637,grad_norm: 0.9806083402882666, iteration: 113347
loss: 1.038710355758667,grad_norm: 0.9999990161101625, iteration: 113348
loss: 0.985905647277832,grad_norm: 0.9999990564632912, iteration: 113349
loss: 1.0081760883331299,grad_norm: 0.9999991927088555, iteration: 113350
loss: 0.9783825874328613,grad_norm: 0.9999990770879005, iteration: 113351
loss: 1.0058891773223877,grad_norm: 0.999999199466371, iteration: 113352
loss: 1.0151115655899048,grad_norm: 0.9999991934034297, iteration: 113353
loss: 0.9884640574455261,grad_norm: 0.9381032042910661, iteration: 113354
loss: 0.9999630451202393,grad_norm: 0.9999992172644592, iteration: 113355
loss: 1.0148086547851562,grad_norm: 0.9999992140324717, iteration: 113356
loss: 0.9785304069519043,grad_norm: 0.9999990247544679, iteration: 113357
loss: 1.0361084938049316,grad_norm: 0.9999993252057083, iteration: 113358
loss: 1.0122078657150269,grad_norm: 0.99999918057541, iteration: 113359
loss: 1.0057724714279175,grad_norm: 0.9999992077383216, iteration: 113360
loss: 1.0189141035079956,grad_norm: 0.9903522347174073, iteration: 113361
loss: 1.0062364339828491,grad_norm: 0.9999991007720891, iteration: 113362
loss: 1.0349856615066528,grad_norm: 0.999999249162713, iteration: 113363
loss: 0.9742517471313477,grad_norm: 0.9930830653003383, iteration: 113364
loss: 1.004225254058838,grad_norm: 0.9999990571840063, iteration: 113365
loss: 1.0032057762145996,grad_norm: 0.9999990174711526, iteration: 113366
loss: 1.014601707458496,grad_norm: 0.9999992270998699, iteration: 113367
loss: 1.0014517307281494,grad_norm: 0.9999989584825464, iteration: 113368
loss: 1.0156571865081787,grad_norm: 0.9741134241550112, iteration: 113369
loss: 1.006118893623352,grad_norm: 0.9795176504960614, iteration: 113370
loss: 0.9962981939315796,grad_norm: 0.9999992072232482, iteration: 113371
loss: 0.9989234209060669,grad_norm: 0.9313634034658844, iteration: 113372
loss: 1.0129368305206299,grad_norm: 0.9999991666488678, iteration: 113373
loss: 0.9913492798805237,grad_norm: 0.9766872741605434, iteration: 113374
loss: 0.9647122025489807,grad_norm: 0.9828169900720152, iteration: 113375
loss: 0.9933796525001526,grad_norm: 0.9999991310846339, iteration: 113376
loss: 0.9971258044242859,grad_norm: 0.9999988930169007, iteration: 113377
loss: 1.0127640962600708,grad_norm: 0.9741528053645734, iteration: 113378
loss: 1.014330267906189,grad_norm: 0.9999992184436661, iteration: 113379
loss: 0.996229887008667,grad_norm: 0.9999991818554178, iteration: 113380
loss: 1.0277634859085083,grad_norm: 0.9999990708375294, iteration: 113381
loss: 0.9571123719215393,grad_norm: 0.8761854639242297, iteration: 113382
loss: 1.0113505125045776,grad_norm: 0.9535734516631644, iteration: 113383
loss: 1.0118805170059204,grad_norm: 0.892386582269569, iteration: 113384
loss: 1.012993335723877,grad_norm: 0.9999989325986268, iteration: 113385
loss: 0.9978823661804199,grad_norm: 0.9999993209932543, iteration: 113386
loss: 1.0215450525283813,grad_norm: 0.9570417996462263, iteration: 113387
loss: 0.9959672093391418,grad_norm: 0.999999154125393, iteration: 113388
loss: 1.1260489225387573,grad_norm: 0.9999995922692017, iteration: 113389
loss: 0.9896066784858704,grad_norm: 0.927448701553296, iteration: 113390
loss: 0.9979014992713928,grad_norm: 0.9077670242112097, iteration: 113391
loss: 0.9781972765922546,grad_norm: 0.9999991338513218, iteration: 113392
loss: 1.0119558572769165,grad_norm: 0.9619847327380772, iteration: 113393
loss: 1.0118802785873413,grad_norm: 0.9999990867113941, iteration: 113394
loss: 0.9921746850013733,grad_norm: 0.9999990672309007, iteration: 113395
loss: 1.0000801086425781,grad_norm: 0.9999991770538739, iteration: 113396
loss: 1.0248969793319702,grad_norm: 0.9999990223702913, iteration: 113397
loss: 1.0224577188491821,grad_norm: 0.9999990594875818, iteration: 113398
loss: 1.0315660238265991,grad_norm: 0.9999992673965639, iteration: 113399
loss: 1.014241099357605,grad_norm: 0.9999990930835444, iteration: 113400
loss: 0.9947147965431213,grad_norm: 0.964289815081373, iteration: 113401
loss: 1.0017917156219482,grad_norm: 0.9999991220341667, iteration: 113402
loss: 1.0131566524505615,grad_norm: 0.9999993098472036, iteration: 113403
loss: 0.9745127558708191,grad_norm: 0.9999990341322028, iteration: 113404
loss: 0.9839128851890564,grad_norm: 0.9179119899439709, iteration: 113405
loss: 1.0170612335205078,grad_norm: 0.9999990676777856, iteration: 113406
loss: 1.270822286605835,grad_norm: 0.9999992673958722, iteration: 113407
loss: 0.9828104972839355,grad_norm: 0.9999989501481467, iteration: 113408
loss: 1.0210301876068115,grad_norm: 0.9999993278388564, iteration: 113409
loss: 0.9925026893615723,grad_norm: 0.999999142205238, iteration: 113410
loss: 1.0368187427520752,grad_norm: 0.9999991559283822, iteration: 113411
loss: 1.0110015869140625,grad_norm: 0.9999992265258963, iteration: 113412
loss: 1.0132027864456177,grad_norm: 0.9999994802361947, iteration: 113413
loss: 1.012587308883667,grad_norm: 0.9999995259746117, iteration: 113414
loss: 1.03657066822052,grad_norm: 0.9999994053657543, iteration: 113415
loss: 1.008256196975708,grad_norm: 0.9969186539272946, iteration: 113416
loss: 1.0137146711349487,grad_norm: 0.9003309344826265, iteration: 113417
loss: 1.3328111171722412,grad_norm: 0.9999991684340905, iteration: 113418
loss: 1.0558634996414185,grad_norm: 0.999999296656895, iteration: 113419
loss: 1.0118327140808105,grad_norm: 0.9999992764183384, iteration: 113420
loss: 1.0225107669830322,grad_norm: 0.9714557782530318, iteration: 113421
loss: 0.953519344329834,grad_norm: 0.9999989970591441, iteration: 113422
loss: 0.9966752529144287,grad_norm: 0.9999989981094121, iteration: 113423
loss: 1.0369577407836914,grad_norm: 0.9999995593828964, iteration: 113424
loss: 0.9787822365760803,grad_norm: 0.959623690677132, iteration: 113425
loss: 1.0245012044906616,grad_norm: 0.8793091597379004, iteration: 113426
loss: 1.0092997550964355,grad_norm: 0.8573186289397403, iteration: 113427
loss: 0.995674192905426,grad_norm: 0.9546331318383225, iteration: 113428
loss: 1.0049842596054077,grad_norm: 0.8891753261388117, iteration: 113429
loss: 0.9828567504882812,grad_norm: 0.9999989326880112, iteration: 113430
loss: 1.012498378753662,grad_norm: 0.9999991004114145, iteration: 113431
loss: 1.0012106895446777,grad_norm: 0.973410098418262, iteration: 113432
loss: 0.9767055511474609,grad_norm: 0.9529496514522663, iteration: 113433
loss: 1.0081799030303955,grad_norm: 0.9999992672302506, iteration: 113434
loss: 0.9946147799491882,grad_norm: 0.9964741852057926, iteration: 113435
loss: 0.9696176648139954,grad_norm: 0.9999990084841169, iteration: 113436
loss: 1.00055992603302,grad_norm: 0.9726651187599519, iteration: 113437
loss: 1.0521620512008667,grad_norm: 0.9999990847561208, iteration: 113438
loss: 0.9951486587524414,grad_norm: 0.9999991887838325, iteration: 113439
loss: 1.0023517608642578,grad_norm: 0.9999991948827539, iteration: 113440
loss: 1.0113765001296997,grad_norm: 0.8850365256334041, iteration: 113441
loss: 1.0128657817840576,grad_norm: 0.9999993115590907, iteration: 113442
loss: 0.9984549283981323,grad_norm: 0.9999992193815045, iteration: 113443
loss: 1.0643519163131714,grad_norm: 0.9999994821454564, iteration: 113444
loss: 1.0143013000488281,grad_norm: 0.9999991957830731, iteration: 113445
loss: 0.9899517297744751,grad_norm: 0.9999991829022622, iteration: 113446
loss: 1.0257891416549683,grad_norm: 0.9500976815093956, iteration: 113447
loss: 0.9867231845855713,grad_norm: 0.9999991480541666, iteration: 113448
loss: 0.9865695238113403,grad_norm: 0.9999991030799497, iteration: 113449
loss: 1.0493766069412231,grad_norm: 0.9999993471764075, iteration: 113450
loss: 1.035857081413269,grad_norm: 0.9999992675863536, iteration: 113451
loss: 1.001941442489624,grad_norm: 0.9999991054884709, iteration: 113452
loss: 0.9874460697174072,grad_norm: 0.8201017821469007, iteration: 113453
loss: 0.9542977809906006,grad_norm: 0.9999990163755226, iteration: 113454
loss: 0.9983299970626831,grad_norm: 0.9643470934424682, iteration: 113455
loss: 0.9795932173728943,grad_norm: 0.9171327016315964, iteration: 113456
loss: 1.1053472757339478,grad_norm: 0.9999994130666664, iteration: 113457
loss: 1.0270185470581055,grad_norm: 0.9999992649728386, iteration: 113458
loss: 0.9986453652381897,grad_norm: 0.9999991608704047, iteration: 113459
loss: 0.9847047328948975,grad_norm: 0.9999991494026133, iteration: 113460
loss: 1.0038105249404907,grad_norm: 0.9999990704669046, iteration: 113461
loss: 0.9885719418525696,grad_norm: 0.9999992113285124, iteration: 113462
loss: 0.9715881943702698,grad_norm: 0.9999991171382118, iteration: 113463
loss: 1.0219866037368774,grad_norm: 0.9271307800343767, iteration: 113464
loss: 0.981181800365448,grad_norm: 0.9999990051903281, iteration: 113465
loss: 1.0086283683776855,grad_norm: 0.999998933036459, iteration: 113466
loss: 1.0560872554779053,grad_norm: 0.9999991587905979, iteration: 113467
loss: 0.9957671165466309,grad_norm: 0.9999991675568659, iteration: 113468
loss: 1.0337154865264893,grad_norm: 0.999999204055232, iteration: 113469
loss: 0.9768999218940735,grad_norm: 0.9873271094751078, iteration: 113470
loss: 1.0034457445144653,grad_norm: 0.9999988903991888, iteration: 113471
loss: 1.0016804933547974,grad_norm: 0.9999991136581128, iteration: 113472
loss: 0.9858566522598267,grad_norm: 0.9734625258328021, iteration: 113473
loss: 1.0294498205184937,grad_norm: 0.9999990428413441, iteration: 113474
loss: 1.0273188352584839,grad_norm: 0.9553988799078482, iteration: 113475
loss: 1.0056759119033813,grad_norm: 0.9999990572387716, iteration: 113476
loss: 1.0070213079452515,grad_norm: 0.9999992668160129, iteration: 113477
loss: 0.9784801006317139,grad_norm: 0.9364008922720978, iteration: 113478
loss: 0.9717214107513428,grad_norm: 0.9999991293662749, iteration: 113479
loss: 1.0034819841384888,grad_norm: 0.9999991060544706, iteration: 113480
loss: 1.0321638584136963,grad_norm: 0.9999990712588289, iteration: 113481
loss: 1.0158096551895142,grad_norm: 0.909354798410104, iteration: 113482
loss: 1.0335274934768677,grad_norm: 0.9999997915740494, iteration: 113483
loss: 0.9861295819282532,grad_norm: 0.9999992598170214, iteration: 113484
loss: 0.9695740342140198,grad_norm: 0.9294978671963399, iteration: 113485
loss: 1.0178859233856201,grad_norm: 0.9163530629561342, iteration: 113486
loss: 1.0007957220077515,grad_norm: 0.9999990360138483, iteration: 113487
loss: 0.9949933886528015,grad_norm: 0.9998224166502405, iteration: 113488
loss: 1.0276819467544556,grad_norm: 0.9045691323112242, iteration: 113489
loss: 1.0050938129425049,grad_norm: 0.9776860126418635, iteration: 113490
loss: 1.0218616724014282,grad_norm: 0.9999992450062825, iteration: 113491
loss: 0.9720165133476257,grad_norm: 0.9999991277571948, iteration: 113492
loss: 0.9901999831199646,grad_norm: 0.9999991950922381, iteration: 113493
loss: 0.9937624335289001,grad_norm: 0.999999192889534, iteration: 113494
loss: 0.9828593730926514,grad_norm: 0.9999991620095321, iteration: 113495
loss: 0.9876545071601868,grad_norm: 0.9999993315351876, iteration: 113496
loss: 1.000968337059021,grad_norm: 0.9999990799946337, iteration: 113497
loss: 0.9868834614753723,grad_norm: 0.8968401590120615, iteration: 113498
loss: 0.9976056218147278,grad_norm: 0.9999992266761399, iteration: 113499
loss: 1.0099241733551025,grad_norm: 0.9965172776788911, iteration: 113500
loss: 0.977575957775116,grad_norm: 0.9999991922144763, iteration: 113501
loss: 0.9813224673271179,grad_norm: 0.9999992336441275, iteration: 113502
loss: 1.0155936479568481,grad_norm: 0.9999989586844792, iteration: 113503
loss: 1.0001285076141357,grad_norm: 0.9673038381904213, iteration: 113504
loss: 0.950230062007904,grad_norm: 0.9999990053751724, iteration: 113505
loss: 0.9994584918022156,grad_norm: 0.9999990813549778, iteration: 113506
loss: 0.9826201796531677,grad_norm: 0.999999202488717, iteration: 113507
loss: 0.9884428977966309,grad_norm: 0.9076034904318041, iteration: 113508
loss: 0.9965581893920898,grad_norm: 0.9999991321294435, iteration: 113509
loss: 0.9790614247322083,grad_norm: 0.9999990657129276, iteration: 113510
loss: 0.9828083515167236,grad_norm: 0.9810578986595527, iteration: 113511
loss: 1.0435909032821655,grad_norm: 0.9999991348193756, iteration: 113512
loss: 0.9514977335929871,grad_norm: 0.9999991151369888, iteration: 113513
loss: 0.9573564529418945,grad_norm: 0.9212476588543466, iteration: 113514
loss: 0.9798692464828491,grad_norm: 0.9999990552745645, iteration: 113515
loss: 0.9692168235778809,grad_norm: 0.9999991394505373, iteration: 113516
loss: 1.018367052078247,grad_norm: 0.9860203411881846, iteration: 113517
loss: 1.0119446516036987,grad_norm: 0.9999991941201375, iteration: 113518
loss: 1.0977739095687866,grad_norm: 0.9999992664113406, iteration: 113519
loss: 1.0227643251419067,grad_norm: 0.9999991241843613, iteration: 113520
loss: 1.004390835762024,grad_norm: 0.9811966418681448, iteration: 113521
loss: 1.1148247718811035,grad_norm: 0.9999992014091977, iteration: 113522
loss: 0.9803371429443359,grad_norm: 0.9999990494333906, iteration: 113523
loss: 0.999798595905304,grad_norm: 0.999999102300302, iteration: 113524
loss: 0.9891151189804077,grad_norm: 0.9999991374023673, iteration: 113525
loss: 1.0378904342651367,grad_norm: 0.9999990211337941, iteration: 113526
loss: 0.9932735562324524,grad_norm: 0.9999990877415526, iteration: 113527
loss: 0.9792569875717163,grad_norm: 0.9999990419472573, iteration: 113528
loss: 1.0137399435043335,grad_norm: 0.9999991079410613, iteration: 113529
loss: 0.95596843957901,grad_norm: 0.9899654007838813, iteration: 113530
loss: 1.020537257194519,grad_norm: 0.881178144902315, iteration: 113531
loss: 0.9837054014205933,grad_norm: 0.999999144242647, iteration: 113532
loss: 0.9863503575325012,grad_norm: 0.9999991024575643, iteration: 113533
loss: 1.003082275390625,grad_norm: 0.9999991917372775, iteration: 113534
loss: 0.9973465204238892,grad_norm: 0.999999164636622, iteration: 113535
loss: 1.0043922662734985,grad_norm: 0.9999990100894388, iteration: 113536
loss: 0.9795789122581482,grad_norm: 0.8905910699025991, iteration: 113537
loss: 1.0170267820358276,grad_norm: 0.9999991010228562, iteration: 113538
loss: 0.9999411702156067,grad_norm: 0.9953976827750394, iteration: 113539
loss: 1.0277131795883179,grad_norm: 0.9999990765655641, iteration: 113540
loss: 1.0138565301895142,grad_norm: 0.9999991268219178, iteration: 113541
loss: 0.9732908606529236,grad_norm: 0.9758560871439403, iteration: 113542
loss: 1.0424485206604004,grad_norm: 0.99999925073484, iteration: 113543
loss: 1.0026134252548218,grad_norm: 0.8417160989910124, iteration: 113544
loss: 1.025399923324585,grad_norm: 0.999999167301341, iteration: 113545
loss: 0.9976781010627747,grad_norm: 0.999999206265021, iteration: 113546
loss: 1.01776921749115,grad_norm: 0.9999989673359128, iteration: 113547
loss: 0.9753759503364563,grad_norm: 0.9999992067105736, iteration: 113548
loss: 1.0060259103775024,grad_norm: 0.9999990677212341, iteration: 113549
loss: 0.9692883491516113,grad_norm: 0.9999991568416389, iteration: 113550
loss: 0.983661413192749,grad_norm: 0.9306937531469548, iteration: 113551
loss: 0.9854300022125244,grad_norm: 0.9999991402227661, iteration: 113552
loss: 0.9911014437675476,grad_norm: 0.9216185137455822, iteration: 113553
loss: 1.0196309089660645,grad_norm: 0.9977878610007184, iteration: 113554
loss: 0.9935200810432434,grad_norm: 0.9153109717278288, iteration: 113555
loss: 0.9976602792739868,grad_norm: 0.9999990927638138, iteration: 113556
loss: 1.031394124031067,grad_norm: 0.9999990778497144, iteration: 113557
loss: 1.038034439086914,grad_norm: 0.9999994373501344, iteration: 113558
loss: 0.9827802181243896,grad_norm: 0.8726197298075156, iteration: 113559
loss: 1.0061777830123901,grad_norm: 0.9999993029095741, iteration: 113560
loss: 0.9558721780776978,grad_norm: 0.9999991630194724, iteration: 113561
loss: 1.0317829847335815,grad_norm: 0.9999989026065885, iteration: 113562
loss: 0.9799904823303223,grad_norm: 0.9999989104043309, iteration: 113563
loss: 1.0113106966018677,grad_norm: 0.9999992048548123, iteration: 113564
loss: 0.9931236505508423,grad_norm: 0.9999992361923762, iteration: 113565
loss: 1.024052381515503,grad_norm: 0.8616433634945073, iteration: 113566
loss: 1.0433632135391235,grad_norm: 0.9999990933335032, iteration: 113567
loss: 0.9964309930801392,grad_norm: 0.9999992201567169, iteration: 113568
loss: 0.998414933681488,grad_norm: 0.999999113956147, iteration: 113569
loss: 1.0080393552780151,grad_norm: 0.9999990793855312, iteration: 113570
loss: 0.9982696771621704,grad_norm: 0.9786583281443272, iteration: 113571
loss: 1.0044608116149902,grad_norm: 0.9999991377648082, iteration: 113572
loss: 0.9945319294929504,grad_norm: 0.9999990718479947, iteration: 113573
loss: 0.9592553973197937,grad_norm: 0.9999990560595028, iteration: 113574
loss: 0.9974201321601868,grad_norm: 0.9999993760095174, iteration: 113575
loss: 1.0031073093414307,grad_norm: 0.999999849569034, iteration: 113576
loss: 0.995713472366333,grad_norm: 0.9999991291695517, iteration: 113577
loss: 1.0115755796432495,grad_norm: 0.8539285311183992, iteration: 113578
loss: 1.003131628036499,grad_norm: 0.9999990690091696, iteration: 113579
loss: 0.9840328097343445,grad_norm: 0.9999991205798467, iteration: 113580
loss: 1.016156792640686,grad_norm: 0.9487921450392448, iteration: 113581
loss: 1.0022388696670532,grad_norm: 0.9097943883964124, iteration: 113582
loss: 1.004737377166748,grad_norm: 0.9473994525250238, iteration: 113583
loss: 0.9615878462791443,grad_norm: 0.9950780082562487, iteration: 113584
loss: 1.0014054775238037,grad_norm: 0.9999990607157389, iteration: 113585
loss: 1.011958360671997,grad_norm: 0.9999991634471226, iteration: 113586
loss: 0.9767301082611084,grad_norm: 0.9411939420425987, iteration: 113587
loss: 0.9846406579017639,grad_norm: 0.9999991812892792, iteration: 113588
loss: 1.039547324180603,grad_norm: 0.9864795952216964, iteration: 113589
loss: 1.0819436311721802,grad_norm: 0.9999998275014148, iteration: 113590
loss: 0.9713197350502014,grad_norm: 0.9999990344561895, iteration: 113591
loss: 1.0204097032546997,grad_norm: 0.9999990604640457, iteration: 113592
loss: 0.9801472425460815,grad_norm: 0.9571967261573259, iteration: 113593
loss: 1.006280541419983,grad_norm: 0.9999991629035133, iteration: 113594
loss: 0.9916052222251892,grad_norm: 0.9999990917771986, iteration: 113595
loss: 0.9802917838096619,grad_norm: 0.9999991777054963, iteration: 113596
loss: 1.0159341096878052,grad_norm: 0.9686535387704434, iteration: 113597
loss: 0.9855409264564514,grad_norm: 0.9999991747838118, iteration: 113598
loss: 1.0045828819274902,grad_norm: 0.9415877190351447, iteration: 113599
loss: 0.9929951429367065,grad_norm: 0.8648852621335547, iteration: 113600
loss: 1.0060813426971436,grad_norm: 0.842039305945472, iteration: 113601
loss: 0.9912727475166321,grad_norm: 0.9694349784537146, iteration: 113602
loss: 1.0062487125396729,grad_norm: 0.9473317510761429, iteration: 113603
loss: 1.0132646560668945,grad_norm: 0.9999992032689623, iteration: 113604
loss: 1.0181668996810913,grad_norm: 0.9450645036193999, iteration: 113605
loss: 1.0144463777542114,grad_norm: 0.9839815263196817, iteration: 113606
loss: 0.9963716864585876,grad_norm: 0.9917260354820518, iteration: 113607
loss: 1.016501545906067,grad_norm: 0.9999990625065001, iteration: 113608
loss: 1.0139036178588867,grad_norm: 0.9741480021385371, iteration: 113609
loss: 0.9844118356704712,grad_norm: 0.9999990217762079, iteration: 113610
loss: 1.0028198957443237,grad_norm: 0.9748542057801411, iteration: 113611
loss: 0.9725785255432129,grad_norm: 0.9010532424453115, iteration: 113612
loss: 0.9623961448669434,grad_norm: 0.965677420081514, iteration: 113613
loss: 1.0141441822052002,grad_norm: 0.9999989872474119, iteration: 113614
loss: 1.004287838935852,grad_norm: 0.9999996244364576, iteration: 113615
loss: 0.9858556389808655,grad_norm: 0.9999991005695066, iteration: 113616
loss: 0.9828482866287231,grad_norm: 0.9999990619066292, iteration: 113617
loss: 0.9642369151115417,grad_norm: 0.9590604783804073, iteration: 113618
loss: 1.0597620010375977,grad_norm: 0.9999992105336057, iteration: 113619
loss: 1.0039451122283936,grad_norm: 0.9999992178202592, iteration: 113620
loss: 1.0140284299850464,grad_norm: 0.9999991725064475, iteration: 113621
loss: 0.9900029897689819,grad_norm: 0.999999163535336, iteration: 113622
loss: 0.9984496831893921,grad_norm: 0.9999990366473458, iteration: 113623
loss: 1.025183081626892,grad_norm: 0.9999989453429206, iteration: 113624
loss: 1.039915680885315,grad_norm: 0.9999992703110723, iteration: 113625
loss: 1.010079026222229,grad_norm: 0.999999265681599, iteration: 113626
loss: 1.0112158060073853,grad_norm: 0.9999990341261505, iteration: 113627
loss: 0.9977682828903198,grad_norm: 0.9999991530977186, iteration: 113628
loss: 1.0089921951293945,grad_norm: 0.8939447243955173, iteration: 113629
loss: 1.006020426750183,grad_norm: 0.998111139547315, iteration: 113630
loss: 0.9534149169921875,grad_norm: 0.9623603430839843, iteration: 113631
loss: 1.0095081329345703,grad_norm: 0.9793390387200079, iteration: 113632
loss: 1.0395281314849854,grad_norm: 0.9999994921031327, iteration: 113633
loss: 1.00589120388031,grad_norm: 0.9231009372669965, iteration: 113634
loss: 1.0210144519805908,grad_norm: 0.9957223963782408, iteration: 113635
loss: 0.9973312020301819,grad_norm: 0.9999992502821056, iteration: 113636
loss: 1.0015472173690796,grad_norm: 0.9377142918887552, iteration: 113637
loss: 0.994836688041687,grad_norm: 0.8751601423171476, iteration: 113638
loss: 0.9943787455558777,grad_norm: 0.9999989529495503, iteration: 113639
loss: 1.014146327972412,grad_norm: 0.9814271148661189, iteration: 113640
loss: 0.9713425636291504,grad_norm: 0.9999990227371393, iteration: 113641
loss: 1.0038928985595703,grad_norm: 0.9999991717640545, iteration: 113642
loss: 1.078682780265808,grad_norm: 0.999999423207574, iteration: 113643
loss: 0.9916999340057373,grad_norm: 0.9999990615717209, iteration: 113644
loss: 0.9818180203437805,grad_norm: 0.9999991637969617, iteration: 113645
loss: 0.9891510009765625,grad_norm: 0.9982004564669898, iteration: 113646
loss: 0.9671646952629089,grad_norm: 0.9999993794163047, iteration: 113647
loss: 0.9897714257240295,grad_norm: 0.9999991114973308, iteration: 113648
loss: 0.9753147959709167,grad_norm: 0.9999993529564798, iteration: 113649
loss: 0.9959157705307007,grad_norm: 0.9999990979523617, iteration: 113650
loss: 1.0426520109176636,grad_norm: 0.9999991119663643, iteration: 113651
loss: 1.0212721824645996,grad_norm: 0.9092043601669243, iteration: 113652
loss: 1.032374382019043,grad_norm: 0.966100166947529, iteration: 113653
loss: 0.9785729050636292,grad_norm: 0.8688555264560451, iteration: 113654
loss: 0.9853874444961548,grad_norm: 0.9999989471038372, iteration: 113655
loss: 1.0186727046966553,grad_norm: 0.9999992797386706, iteration: 113656
loss: 1.0717777013778687,grad_norm: 0.9999992777097949, iteration: 113657
loss: 1.0598870515823364,grad_norm: 0.9999996064076279, iteration: 113658
loss: 0.9831938147544861,grad_norm: 0.9999988900946123, iteration: 113659
loss: 1.0170823335647583,grad_norm: 0.99999909725597, iteration: 113660
loss: 0.9889519214630127,grad_norm: 0.9445557716739654, iteration: 113661
loss: 1.0029467344284058,grad_norm: 0.9269752633745348, iteration: 113662
loss: 1.0113420486450195,grad_norm: 0.9595700326095645, iteration: 113663
loss: 1.008604645729065,grad_norm: 0.9999992018636783, iteration: 113664
loss: 1.0245035886764526,grad_norm: 0.9999990563880933, iteration: 113665
loss: 1.0066499710083008,grad_norm: 0.9999991752034263, iteration: 113666
loss: 1.0277891159057617,grad_norm: 0.9999991686030472, iteration: 113667
loss: 1.0075843334197998,grad_norm: 0.9999990949989154, iteration: 113668
loss: 0.9875388741493225,grad_norm: 0.9628261748604325, iteration: 113669
loss: 1.037056565284729,grad_norm: 0.999999112896481, iteration: 113670
loss: 0.9990746378898621,grad_norm: 0.9999992689328377, iteration: 113671
loss: 0.9819293022155762,grad_norm: 0.9999994059265709, iteration: 113672
loss: 1.0682295560836792,grad_norm: 0.9999991090907905, iteration: 113673
loss: 1.011096715927124,grad_norm: 0.9999991669821835, iteration: 113674
loss: 0.9934293627738953,grad_norm: 0.7917543977785255, iteration: 113675
loss: 1.0013389587402344,grad_norm: 0.9651746221572444, iteration: 113676
loss: 1.0036566257476807,grad_norm: 0.9999991616941212, iteration: 113677
loss: 1.000744342803955,grad_norm: 0.9999990697615305, iteration: 113678
loss: 0.9690419435501099,grad_norm: 0.9999991626843784, iteration: 113679
loss: 1.0107699632644653,grad_norm: 0.9999991198970175, iteration: 113680
loss: 1.032820463180542,grad_norm: 0.9999991671840127, iteration: 113681
loss: 1.0206035375595093,grad_norm: 0.9999991197099222, iteration: 113682
loss: 0.9638976454734802,grad_norm: 0.9999991004645453, iteration: 113683
loss: 0.9620274901390076,grad_norm: 0.9773542761371091, iteration: 113684
loss: 0.988045334815979,grad_norm: 0.868042054416, iteration: 113685
loss: 0.9869880080223083,grad_norm: 0.9999988969254172, iteration: 113686
loss: 0.993560254573822,grad_norm: 0.9999995540793593, iteration: 113687
loss: 1.0295804738998413,grad_norm: 0.9999990487112018, iteration: 113688
loss: 0.9840324521064758,grad_norm: 0.9999992745791749, iteration: 113689
loss: 0.9692911505699158,grad_norm: 0.8832840807611624, iteration: 113690
loss: 1.0084257125854492,grad_norm: 0.9999998238766691, iteration: 113691
loss: 1.0401314496994019,grad_norm: 0.9999991385841185, iteration: 113692
loss: 0.9951282143592834,grad_norm: 0.9999989595548302, iteration: 113693
loss: 0.9797203540802002,grad_norm: 0.8812120578384353, iteration: 113694
loss: 1.0038421154022217,grad_norm: 0.9999990944912504, iteration: 113695
loss: 1.0024160146713257,grad_norm: 0.9332517310856583, iteration: 113696
loss: 1.0310581922531128,grad_norm: 0.9999991840119334, iteration: 113697
loss: 0.9678876399993896,grad_norm: 0.8965329292369444, iteration: 113698
loss: 0.9893012642860413,grad_norm: 0.9999990482647348, iteration: 113699
loss: 1.0005520582199097,grad_norm: 0.9999991964728308, iteration: 113700
loss: 1.0286651849746704,grad_norm: 0.9999992638191287, iteration: 113701
loss: 0.9941055774688721,grad_norm: 0.9999990359280141, iteration: 113702
loss: 1.0005590915679932,grad_norm: 0.9999993597578303, iteration: 113703
loss: 0.9854861497879028,grad_norm: 0.9118733308672536, iteration: 113704
loss: 1.0026662349700928,grad_norm: 0.9999989774672736, iteration: 113705
loss: 0.9841894507408142,grad_norm: 0.9535035445068131, iteration: 113706
loss: 1.0069035291671753,grad_norm: 0.9779171499545204, iteration: 113707
loss: 1.0064480304718018,grad_norm: 0.8839681200046123, iteration: 113708
loss: 0.9799643158912659,grad_norm: 0.9069728893176313, iteration: 113709
loss: 0.9941452741622925,grad_norm: 0.9999991648769817, iteration: 113710
loss: 1.018071174621582,grad_norm: 0.9999996186863134, iteration: 113711
loss: 1.0256311893463135,grad_norm: 0.9999993832255876, iteration: 113712
loss: 1.028017282485962,grad_norm: 0.9999993073833431, iteration: 113713
loss: 1.044832706451416,grad_norm: 0.9999991901997456, iteration: 113714
loss: 0.9846649169921875,grad_norm: 0.9999991689231833, iteration: 113715
loss: 0.9511447548866272,grad_norm: 0.977604105119527, iteration: 113716
loss: 0.9769582748413086,grad_norm: 0.9999992131742498, iteration: 113717
loss: 0.9904717206954956,grad_norm: 0.9999991931766388, iteration: 113718
loss: 0.9945971369743347,grad_norm: 0.9999991720364809, iteration: 113719
loss: 0.9952855706214905,grad_norm: 0.9999992041007367, iteration: 113720
loss: 0.9849560856819153,grad_norm: 0.9999991595692632, iteration: 113721
loss: 1.035200834274292,grad_norm: 0.9999993461573999, iteration: 113722
loss: 1.03267240524292,grad_norm: 0.9999993312953912, iteration: 113723
loss: 1.0322177410125732,grad_norm: 0.9999991609252535, iteration: 113724
loss: 1.0247870683670044,grad_norm: 0.9999990763423428, iteration: 113725
loss: 0.9771377444267273,grad_norm: 0.8930813786317885, iteration: 113726
loss: 1.0198097229003906,grad_norm: 0.9977169493375052, iteration: 113727
loss: 0.9998676180839539,grad_norm: 0.9999990599877966, iteration: 113728
loss: 1.001181960105896,grad_norm: 0.9540223758626656, iteration: 113729
loss: 1.018341064453125,grad_norm: 0.9999996902592151, iteration: 113730
loss: 0.9826185703277588,grad_norm: 0.999999271052867, iteration: 113731
loss: 0.9931537508964539,grad_norm: 0.9999992463708758, iteration: 113732
loss: 0.9889776110649109,grad_norm: 0.9999990001705885, iteration: 113733
loss: 0.9905868768692017,grad_norm: 0.9999993617957583, iteration: 113734
loss: 1.0767003297805786,grad_norm: 0.9999994168792784, iteration: 113735
loss: 1.0504387617111206,grad_norm: 0.9999993462381438, iteration: 113736
loss: 1.0343583822250366,grad_norm: 0.9854121342665038, iteration: 113737
loss: 1.0095065832138062,grad_norm: 0.9019405884457423, iteration: 113738
loss: 0.9853286743164062,grad_norm: 0.9999989416079372, iteration: 113739
loss: 1.0051140785217285,grad_norm: 0.999999127844401, iteration: 113740
loss: 0.9946508407592773,grad_norm: 0.9999991896920256, iteration: 113741
loss: 1.02020263671875,grad_norm: 0.9999991635279745, iteration: 113742
loss: 1.0364669561386108,grad_norm: 0.9999993541363523, iteration: 113743
loss: 0.9967908263206482,grad_norm: 0.999999015641848, iteration: 113744
loss: 0.9910529255867004,grad_norm: 0.9302822155777195, iteration: 113745
loss: 0.9811897277832031,grad_norm: 0.9999992900255272, iteration: 113746
loss: 0.991506814956665,grad_norm: 0.9999993374293127, iteration: 113747
loss: 1.0087357759475708,grad_norm: 0.9999995742849742, iteration: 113748
loss: 1.0177531242370605,grad_norm: 0.9525464145781146, iteration: 113749
loss: 1.0640621185302734,grad_norm: 0.9999993156635664, iteration: 113750
loss: 1.0262268781661987,grad_norm: 0.9999990204241835, iteration: 113751
loss: 1.0038316249847412,grad_norm: 0.912026892327048, iteration: 113752
loss: 0.9795544743537903,grad_norm: 0.8719947333701797, iteration: 113753
loss: 1.0145752429962158,grad_norm: 0.9999990079387732, iteration: 113754
loss: 0.9870591163635254,grad_norm: 0.9289425006174862, iteration: 113755
loss: 1.0666061639785767,grad_norm: 0.9999989885205673, iteration: 113756
loss: 0.9755233526229858,grad_norm: 0.9999991135110412, iteration: 113757
loss: 1.019749402999878,grad_norm: 0.9999991991700317, iteration: 113758
loss: 1.003677487373352,grad_norm: 0.9999991505288237, iteration: 113759
loss: 1.0054019689559937,grad_norm: 0.9522611647478787, iteration: 113760
loss: 1.0370148420333862,grad_norm: 0.9999996857968436, iteration: 113761
loss: 0.9830805063247681,grad_norm: 0.9999993200614907, iteration: 113762
loss: 0.9999269843101501,grad_norm: 0.9999991391550089, iteration: 113763
loss: 0.9755960702896118,grad_norm: 0.9888070068702117, iteration: 113764
loss: 0.9858914613723755,grad_norm: 0.981437742946709, iteration: 113765
loss: 0.9832600355148315,grad_norm: 0.9999991559655764, iteration: 113766
loss: 0.9678197503089905,grad_norm: 0.999998979592084, iteration: 113767
loss: 0.9637601971626282,grad_norm: 0.9999991115661139, iteration: 113768
loss: 0.9967756867408752,grad_norm: 0.9346929733267803, iteration: 113769
loss: 1.0276442766189575,grad_norm: 0.9999990959890477, iteration: 113770
loss: 1.0135315656661987,grad_norm: 0.9999990939007076, iteration: 113771
loss: 0.9820154309272766,grad_norm: 0.9999993909859604, iteration: 113772
loss: 0.9930930733680725,grad_norm: 0.9471537889701788, iteration: 113773
loss: 1.0386202335357666,grad_norm: 0.9999992499776286, iteration: 113774
loss: 0.9975262880325317,grad_norm: 0.9592198494898662, iteration: 113775
loss: 1.0068678855895996,grad_norm: 0.9999991997674187, iteration: 113776
loss: 1.0192973613739014,grad_norm: 0.9209197164728058, iteration: 113777
loss: 0.9640031456947327,grad_norm: 0.9999990979997695, iteration: 113778
loss: 1.0152770280838013,grad_norm: 0.9999991173001107, iteration: 113779
loss: 1.0164215564727783,grad_norm: 0.9874901141779152, iteration: 113780
loss: 1.0041743516921997,grad_norm: 0.9999993488274829, iteration: 113781
loss: 0.9928783178329468,grad_norm: 0.8987508176983665, iteration: 113782
loss: 0.9902911186218262,grad_norm: 0.999999118000119, iteration: 113783
loss: 1.0221737623214722,grad_norm: 0.9101157278749809, iteration: 113784
loss: 1.0003712177276611,grad_norm: 0.8848121641778095, iteration: 113785
loss: 1.0108184814453125,grad_norm: 0.9999991216211689, iteration: 113786
loss: 0.9767816066741943,grad_norm: 0.9999991611637903, iteration: 113787
loss: 1.0088545083999634,grad_norm: 0.9999991823442591, iteration: 113788
loss: 0.9625060558319092,grad_norm: 0.9999991704926812, iteration: 113789
loss: 0.9835523366928101,grad_norm: 0.9999990731718791, iteration: 113790
loss: 0.936874508857727,grad_norm: 0.9999990792616174, iteration: 113791
loss: 0.9875068664550781,grad_norm: 0.9670140540005268, iteration: 113792
loss: 0.996724009513855,grad_norm: 0.9088827029882627, iteration: 113793
loss: 1.0009698867797852,grad_norm: 0.99999914753169, iteration: 113794
loss: 0.9956768751144409,grad_norm: 0.9906797084237072, iteration: 113795
loss: 0.9703471660614014,grad_norm: 0.999998842228566, iteration: 113796
loss: 1.0246652364730835,grad_norm: 0.8334648281374399, iteration: 113797
loss: 1.014345407485962,grad_norm: 0.999999254271354, iteration: 113798
loss: 1.0126010179519653,grad_norm: 0.9999990156623078, iteration: 113799
loss: 1.0492830276489258,grad_norm: 0.9999995582379856, iteration: 113800
loss: 0.9959656596183777,grad_norm: 0.9864399140051318, iteration: 113801
loss: 1.0418676137924194,grad_norm: 0.9999992043887719, iteration: 113802
loss: 1.0297998189926147,grad_norm: 0.8575384515435586, iteration: 113803
loss: 0.9671475887298584,grad_norm: 0.9549459441553689, iteration: 113804
loss: 1.035641074180603,grad_norm: 0.9999996783748982, iteration: 113805
loss: 0.9866024851799011,grad_norm: 0.9827279827752967, iteration: 113806
loss: 1.0337525606155396,grad_norm: 0.9684057836267358, iteration: 113807
loss: 1.0006762742996216,grad_norm: 0.9999992246898398, iteration: 113808
loss: 1.0052690505981445,grad_norm: 0.9999991737632756, iteration: 113809
loss: 1.03242826461792,grad_norm: 0.956081280875225, iteration: 113810
loss: 0.9962461590766907,grad_norm: 0.965846173679968, iteration: 113811
loss: 0.9932169318199158,grad_norm: 0.9999992685071797, iteration: 113812
loss: 1.0422494411468506,grad_norm: 0.9740528347150663, iteration: 113813
loss: 0.993466317653656,grad_norm: 0.9999990775074398, iteration: 113814
loss: 1.0074690580368042,grad_norm: 0.9999990744464019, iteration: 113815
loss: 1.0045734643936157,grad_norm: 0.960831152372075, iteration: 113816
loss: 0.9661378860473633,grad_norm: 0.9999990570452397, iteration: 113817
loss: 1.00657057762146,grad_norm: 0.9862525726044064, iteration: 113818
loss: 0.9989349246025085,grad_norm: 0.9999990379580495, iteration: 113819
loss: 0.9786521792411804,grad_norm: 0.9662256872248509, iteration: 113820
loss: 0.9840927124023438,grad_norm: 0.9999991482674467, iteration: 113821
loss: 0.9808425307273865,grad_norm: 0.8943273771116159, iteration: 113822
loss: 0.9802742600440979,grad_norm: 0.9446861660276814, iteration: 113823
loss: 1.0138647556304932,grad_norm: 0.9999990472048839, iteration: 113824
loss: 0.9756267666816711,grad_norm: 0.9999992666358293, iteration: 113825
loss: 0.9978770613670349,grad_norm: 0.8472505136185036, iteration: 113826
loss: 1.0538378953933716,grad_norm: 0.9999995343504438, iteration: 113827
loss: 0.9876251220703125,grad_norm: 0.8845655222641978, iteration: 113828
loss: 1.020161747932434,grad_norm: 0.9999992438434291, iteration: 113829
loss: 1.0069011449813843,grad_norm: 0.9999991937867027, iteration: 113830
loss: 0.9509075880050659,grad_norm: 0.8199428286905853, iteration: 113831
loss: 1.0737497806549072,grad_norm: 0.9999992593958271, iteration: 113832
loss: 1.0118330717086792,grad_norm: 0.9999990522364172, iteration: 113833
loss: 1.0262246131896973,grad_norm: 0.9999996355779662, iteration: 113834
loss: 0.9913455843925476,grad_norm: 0.8553156335221578, iteration: 113835
loss: 1.0061548948287964,grad_norm: 0.9999991972195813, iteration: 113836
loss: 1.00108802318573,grad_norm: 0.9999992239368699, iteration: 113837
loss: 1.020310878753662,grad_norm: 0.8806046272657868, iteration: 113838
loss: 1.0133877992630005,grad_norm: 0.999999119527594, iteration: 113839
loss: 0.9682199954986572,grad_norm: 0.9799008143582038, iteration: 113840
loss: 1.0736134052276611,grad_norm: 0.9999994565514413, iteration: 113841
loss: 1.0124634504318237,grad_norm: 0.999999175186861, iteration: 113842
loss: 1.0457884073257446,grad_norm: 0.9999991258807053, iteration: 113843
loss: 1.050930380821228,grad_norm: 0.9999990614826237, iteration: 113844
loss: 0.9883766174316406,grad_norm: 0.970899067691782, iteration: 113845
loss: 0.9681259989738464,grad_norm: 0.9850615720788124, iteration: 113846
loss: 0.9874435067176819,grad_norm: 0.9999992460752128, iteration: 113847
loss: 1.0223255157470703,grad_norm: 0.9999993041476738, iteration: 113848
loss: 1.0108708143234253,grad_norm: 0.999999374840539, iteration: 113849
loss: 0.9779140949249268,grad_norm: 0.9999989681084552, iteration: 113850
loss: 1.0141441822052002,grad_norm: 0.9753418889292158, iteration: 113851
loss: 0.9674408435821533,grad_norm: 0.9999990954151939, iteration: 113852
loss: 0.9756336808204651,grad_norm: 0.9999990073649507, iteration: 113853
loss: 0.9892984628677368,grad_norm: 0.8925989975657108, iteration: 113854
loss: 1.0378185510635376,grad_norm: 0.999999131627465, iteration: 113855
loss: 1.0163249969482422,grad_norm: 0.9999991188102257, iteration: 113856
loss: 1.00325345993042,grad_norm: 0.9056057671052032, iteration: 113857
loss: 1.0080989599227905,grad_norm: 0.9762787051506531, iteration: 113858
loss: 1.0053671598434448,grad_norm: 0.9999991652636747, iteration: 113859
loss: 1.0152064561843872,grad_norm: 0.9999993154364543, iteration: 113860
loss: 0.9914659261703491,grad_norm: 0.9999992792246245, iteration: 113861
loss: 1.0151433944702148,grad_norm: 0.9999991408394602, iteration: 113862
loss: 0.9867777824401855,grad_norm: 0.9999990524735247, iteration: 113863
loss: 0.9665205478668213,grad_norm: 0.9999991134295361, iteration: 113864
loss: 0.9536262154579163,grad_norm: 0.9999991768076613, iteration: 113865
loss: 0.9830304980278015,grad_norm: 0.9144552140483949, iteration: 113866
loss: 0.9823863506317139,grad_norm: 0.9587574037932882, iteration: 113867
loss: 1.0137243270874023,grad_norm: 0.9850737338768489, iteration: 113868
loss: 1.0066128969192505,grad_norm: 0.9810386727817835, iteration: 113869
loss: 0.9603405594825745,grad_norm: 0.9999997521207513, iteration: 113870
loss: 1.0121568441390991,grad_norm: 0.9402769759649261, iteration: 113871
loss: 1.0166707038879395,grad_norm: 0.9999990231913704, iteration: 113872
loss: 1.0082639455795288,grad_norm: 0.9999989790229875, iteration: 113873
loss: 1.0253870487213135,grad_norm: 0.9790653540867668, iteration: 113874
loss: 1.0016318559646606,grad_norm: 0.9788338945603327, iteration: 113875
loss: 0.9834239482879639,grad_norm: 0.8609824269402997, iteration: 113876
loss: 0.9673844575881958,grad_norm: 0.9999989768890171, iteration: 113877
loss: 1.0029776096343994,grad_norm: 0.999999179488823, iteration: 113878
loss: 1.0005404949188232,grad_norm: 0.9999991173806051, iteration: 113879
loss: 1.0228041410446167,grad_norm: 0.9051548430848707, iteration: 113880
loss: 0.959409773349762,grad_norm: 0.8717220765407229, iteration: 113881
loss: 0.987723171710968,grad_norm: 0.9293125959188439, iteration: 113882
loss: 0.9834365844726562,grad_norm: 0.9999992234518061, iteration: 113883
loss: 1.0221319198608398,grad_norm: 0.9999991502184596, iteration: 113884
loss: 1.0109189748764038,grad_norm: 0.9999990279120664, iteration: 113885
loss: 1.0300389528274536,grad_norm: 0.9999992850151738, iteration: 113886
loss: 0.9743399620056152,grad_norm: 0.8163369956687925, iteration: 113887
loss: 0.9775105714797974,grad_norm: 0.9853991900309844, iteration: 113888
loss: 0.9674828052520752,grad_norm: 0.9999991505435278, iteration: 113889
loss: 0.9955814480781555,grad_norm: 0.9999991665301695, iteration: 113890
loss: 0.9888104796409607,grad_norm: 0.9999991235644721, iteration: 113891
loss: 0.9921714067459106,grad_norm: 0.9578925101051283, iteration: 113892
loss: 1.0216715335845947,grad_norm: 0.9629822909718202, iteration: 113893
loss: 1.005002498626709,grad_norm: 0.9072035810887622, iteration: 113894
loss: 1.0105199813842773,grad_norm: 0.9870720656306843, iteration: 113895
loss: 0.9891641139984131,grad_norm: 0.9380477301115835, iteration: 113896
loss: 1.0162218809127808,grad_norm: 0.9726164650548915, iteration: 113897
loss: 0.973400354385376,grad_norm: 0.9999988326727163, iteration: 113898
loss: 0.9992462992668152,grad_norm: 0.9999991591530851, iteration: 113899
loss: 0.9660666584968567,grad_norm: 0.99999905110784, iteration: 113900
loss: 0.9844729900360107,grad_norm: 0.976096635901427, iteration: 113901
loss: 0.9723047018051147,grad_norm: 0.999999068137393, iteration: 113902
loss: 0.9729059934616089,grad_norm: 0.9999990326644207, iteration: 113903
loss: 0.9688699245452881,grad_norm: 0.8258402342273996, iteration: 113904
loss: 1.0133341550827026,grad_norm: 0.9999991299584265, iteration: 113905
loss: 1.006833791732788,grad_norm: 0.9999990894409435, iteration: 113906
loss: 0.9869480729103088,grad_norm: 0.8883356782910001, iteration: 113907
loss: 0.9728366136550903,grad_norm: 0.964360698568086, iteration: 113908
loss: 0.9566634297370911,grad_norm: 0.9872299764319046, iteration: 113909
loss: 1.0016447305679321,grad_norm: 0.9999992059358943, iteration: 113910
loss: 1.0212756395339966,grad_norm: 0.9999989147112801, iteration: 113911
loss: 1.0274468660354614,grad_norm: 0.9999991725643944, iteration: 113912
loss: 1.0108169317245483,grad_norm: 0.8136763140303657, iteration: 113913
loss: 0.9545672535896301,grad_norm: 0.999999314965761, iteration: 113914
loss: 1.0209518671035767,grad_norm: 0.9999992728635768, iteration: 113915
loss: 0.9548899531364441,grad_norm: 0.9999991662350771, iteration: 113916
loss: 0.9988967776298523,grad_norm: 0.8409359474961058, iteration: 113917
loss: 0.9530097842216492,grad_norm: 0.9458190682881751, iteration: 113918
loss: 1.014880895614624,grad_norm: 0.9999989661524263, iteration: 113919
loss: 0.955154538154602,grad_norm: 0.9999991453172469, iteration: 113920
loss: 1.0155978202819824,grad_norm: 0.9999992449812309, iteration: 113921
loss: 0.9951854944229126,grad_norm: 0.9999991400893672, iteration: 113922
loss: 1.0159035921096802,grad_norm: 0.9999990804837641, iteration: 113923
loss: 0.9898263216018677,grad_norm: 0.9999992423412091, iteration: 113924
loss: 0.97231525182724,grad_norm: 0.9239536977099476, iteration: 113925
loss: 0.9804807305335999,grad_norm: 0.921539086965734, iteration: 113926
loss: 0.985413134098053,grad_norm: 0.9229514087926801, iteration: 113927
loss: 1.0090841054916382,grad_norm: 0.9512844393364369, iteration: 113928
loss: 1.0384643077850342,grad_norm: 0.9999993592015307, iteration: 113929
loss: 1.0396735668182373,grad_norm: 0.9999990616283422, iteration: 113930
loss: 0.9637892842292786,grad_norm: 0.9631267387312924, iteration: 113931
loss: 1.0194058418273926,grad_norm: 0.9999990891551539, iteration: 113932
loss: 1.0579509735107422,grad_norm: 0.9999990837468944, iteration: 113933
loss: 1.0112202167510986,grad_norm: 0.9350965072688662, iteration: 113934
loss: 0.9814125299453735,grad_norm: 0.8813897579185148, iteration: 113935
loss: 1.012579321861267,grad_norm: 0.9576772124154965, iteration: 113936
loss: 1.009594440460205,grad_norm: 0.9999993019935405, iteration: 113937
loss: 0.9741242527961731,grad_norm: 0.9999993282054405, iteration: 113938
loss: 1.0162129402160645,grad_norm: 0.9633095502933863, iteration: 113939
loss: 0.9924221634864807,grad_norm: 0.9999992295401153, iteration: 113940
loss: 1.0226699113845825,grad_norm: 0.9999993741940615, iteration: 113941
loss: 0.9910818934440613,grad_norm: 0.9999991216386409, iteration: 113942
loss: 1.0038506984710693,grad_norm: 0.9922810990507887, iteration: 113943
loss: 1.0114669799804688,grad_norm: 0.9616269421289648, iteration: 113944
loss: 0.9675373435020447,grad_norm: 0.9999992523330838, iteration: 113945
loss: 0.9960988163948059,grad_norm: 0.9999992428860228, iteration: 113946
loss: 1.0319424867630005,grad_norm: 0.9999991151052406, iteration: 113947
loss: 0.9849449992179871,grad_norm: 0.9022635531760322, iteration: 113948
loss: 1.003816843032837,grad_norm: 0.95884212546439, iteration: 113949
loss: 1.0039218664169312,grad_norm: 0.9396984966582048, iteration: 113950
loss: 1.0065357685089111,grad_norm: 0.9900565613402974, iteration: 113951
loss: 1.0002614259719849,grad_norm: 0.9999992376790146, iteration: 113952
loss: 0.9748857021331787,grad_norm: 0.9999991268632449, iteration: 113953
loss: 1.0176912546157837,grad_norm: 0.8586322575749074, iteration: 113954
loss: 0.9665799140930176,grad_norm: 0.9999992328790097, iteration: 113955
loss: 1.0002617835998535,grad_norm: 0.9999992538284556, iteration: 113956
loss: 0.9804539680480957,grad_norm: 0.9999992409405125, iteration: 113957
loss: 1.0015898942947388,grad_norm: 0.9999992791905808, iteration: 113958
loss: 1.0050380229949951,grad_norm: 0.9999989971420721, iteration: 113959
loss: 1.0332845449447632,grad_norm: 0.9999991178336028, iteration: 113960
loss: 1.0010641813278198,grad_norm: 0.9999995374275922, iteration: 113961
loss: 0.9937676787376404,grad_norm: 0.9999993202879932, iteration: 113962
loss: 0.9768874049186707,grad_norm: 0.9999992757446309, iteration: 113963
loss: 0.989189088344574,grad_norm: 0.9999991340826074, iteration: 113964
loss: 1.0022459030151367,grad_norm: 0.9999991484930086, iteration: 113965
loss: 1.0647939443588257,grad_norm: 0.9999992604971514, iteration: 113966
loss: 1.004894733428955,grad_norm: 0.9696018101570112, iteration: 113967
loss: 1.0013819932937622,grad_norm: 0.9723585208324305, iteration: 113968
loss: 0.9553108215332031,grad_norm: 0.999999176575403, iteration: 113969
loss: 1.00455904006958,grad_norm: 0.9999991559843456, iteration: 113970
loss: 0.9950098991394043,grad_norm: 0.9999992063530179, iteration: 113971
loss: 0.9602726697921753,grad_norm: 0.9868847883667355, iteration: 113972
loss: 0.986695408821106,grad_norm: 0.9766306063998458, iteration: 113973
loss: 1.021831750869751,grad_norm: 0.9999995387581706, iteration: 113974
loss: 1.0193169116973877,grad_norm: 0.9999991466205198, iteration: 113975
loss: 1.0127508640289307,grad_norm: 0.9020759726031811, iteration: 113976
loss: 0.9896584153175354,grad_norm: 0.9477741056618371, iteration: 113977
loss: 1.0184273719787598,grad_norm: 0.9999996588789611, iteration: 113978
loss: 1.005208969116211,grad_norm: 0.9999990597128304, iteration: 113979
loss: 1.011405348777771,grad_norm: 0.9999990769842118, iteration: 113980
loss: 1.010959506034851,grad_norm: 0.9999991670706897, iteration: 113981
loss: 0.9865526556968689,grad_norm: 0.9482210201048219, iteration: 113982
loss: 1.0037662982940674,grad_norm: 0.9163816760746563, iteration: 113983
loss: 1.0062570571899414,grad_norm: 0.95109898537007, iteration: 113984
loss: 0.993246853351593,grad_norm: 0.9999991356175587, iteration: 113985
loss: 0.9990338087081909,grad_norm: 0.9999992145909843, iteration: 113986
loss: 1.0166823863983154,grad_norm: 0.9999990261320614, iteration: 113987
loss: 1.0150314569473267,grad_norm: 0.9999994102340052, iteration: 113988
loss: 1.0121124982833862,grad_norm: 0.9197976661170644, iteration: 113989
loss: 1.0074732303619385,grad_norm: 0.9999991366052486, iteration: 113990
loss: 1.0002100467681885,grad_norm: 0.9999991841410268, iteration: 113991
loss: 0.9607545733451843,grad_norm: 0.9999991826910826, iteration: 113992
loss: 1.0184537172317505,grad_norm: 0.9020238072031055, iteration: 113993
loss: 1.0276604890823364,grad_norm: 0.9999991337866762, iteration: 113994
loss: 1.0330537557601929,grad_norm: 0.9999991625462398, iteration: 113995
loss: 1.0024492740631104,grad_norm: 0.9150470103688415, iteration: 113996
loss: 0.9731674790382385,grad_norm: 0.9999991762145706, iteration: 113997
loss: 0.9827033877372742,grad_norm: 0.9824084932278936, iteration: 113998
loss: 0.9903398752212524,grad_norm: 0.8915644999154304, iteration: 113999
loss: 1.0039570331573486,grad_norm: 0.9999991295161064, iteration: 114000
loss: 0.9809994697570801,grad_norm: 0.9514747116216259, iteration: 114001
loss: 1.0025848150253296,grad_norm: 0.9999990832104296, iteration: 114002
loss: 0.9982096552848816,grad_norm: 0.9999996134755607, iteration: 114003
loss: 0.984754741191864,grad_norm: 0.9907514455541535, iteration: 114004
loss: 0.9782226085662842,grad_norm: 0.9999994247941499, iteration: 114005
loss: 1.0149513483047485,grad_norm: 0.9999992359957759, iteration: 114006
loss: 0.9797729253768921,grad_norm: 0.9972009576463396, iteration: 114007
loss: 0.9846393465995789,grad_norm: 0.8641474458750892, iteration: 114008
loss: 0.9523577094078064,grad_norm: 0.9999992183908417, iteration: 114009
loss: 1.020257592201233,grad_norm: 0.9999990584008471, iteration: 114010
loss: 1.0375735759735107,grad_norm: 0.9999992017749654, iteration: 114011
loss: 1.0140212774276733,grad_norm: 0.8977514604516821, iteration: 114012
loss: 0.9611583352088928,grad_norm: 0.9999991474737091, iteration: 114013
loss: 0.9834534525871277,grad_norm: 0.9999989923459023, iteration: 114014
loss: 1.020464301109314,grad_norm: 0.8783149572339403, iteration: 114015
loss: 1.0028183460235596,grad_norm: 0.9693498129149707, iteration: 114016
loss: 1.0161679983139038,grad_norm: 0.9999989503341373, iteration: 114017
loss: 0.974453866481781,grad_norm: 0.9687990069195737, iteration: 114018
loss: 0.963087797164917,grad_norm: 0.9999990423484699, iteration: 114019
loss: 0.9851455688476562,grad_norm: 0.9999990309261966, iteration: 114020
loss: 1.0172029733657837,grad_norm: 0.877882595525792, iteration: 114021
loss: 0.9810197949409485,grad_norm: 0.9999991643160253, iteration: 114022
loss: 1.054354190826416,grad_norm: 0.8995220007496268, iteration: 114023
loss: 1.0173771381378174,grad_norm: 0.9999991176512544, iteration: 114024
loss: 0.9817212224006653,grad_norm: 0.9999991158623942, iteration: 114025
loss: 1.042521595954895,grad_norm: 0.9699398730550416, iteration: 114026
loss: 1.0346637964248657,grad_norm: 0.9999991509112784, iteration: 114027
loss: 0.9871664643287659,grad_norm: 0.8284113841249141, iteration: 114028
loss: 1.0129770040512085,grad_norm: 0.9649968947726477, iteration: 114029
loss: 0.9935509562492371,grad_norm: 0.9551747815872499, iteration: 114030
loss: 1.0045868158340454,grad_norm: 0.8708032703350773, iteration: 114031
loss: 0.9974750876426697,grad_norm: 0.9903112116737628, iteration: 114032
loss: 0.9698855876922607,grad_norm: 0.9465842686436656, iteration: 114033
loss: 0.9967790842056274,grad_norm: 0.9999990221715933, iteration: 114034
loss: 0.99541836977005,grad_norm: 0.8894969203188677, iteration: 114035
loss: 0.9985221028327942,grad_norm: 0.9033230602982498, iteration: 114036
loss: 1.012757658958435,grad_norm: 0.9999992369224866, iteration: 114037
loss: 0.9909216165542603,grad_norm: 0.9999989677242371, iteration: 114038
loss: 1.004036784172058,grad_norm: 0.9999990993176758, iteration: 114039
loss: 1.0235775709152222,grad_norm: 0.9999989693493716, iteration: 114040
loss: 1.0089303255081177,grad_norm: 0.9999990535313114, iteration: 114041
loss: 1.0390206575393677,grad_norm: 0.9999991668282884, iteration: 114042
loss: 1.0066239833831787,grad_norm: 0.9412092639608306, iteration: 114043
loss: 1.011739730834961,grad_norm: 0.9619624725840233, iteration: 114044
loss: 0.962716817855835,grad_norm: 0.948815839141623, iteration: 114045
loss: 0.9915355443954468,grad_norm: 0.9999992174319602, iteration: 114046
loss: 0.9950397610664368,grad_norm: 0.9999990251142318, iteration: 114047
loss: 1.0097547769546509,grad_norm: 0.8142415205552342, iteration: 114048
loss: 1.016177773475647,grad_norm: 0.9999991565683736, iteration: 114049
loss: 1.0038434267044067,grad_norm: 0.9999990539137762, iteration: 114050
loss: 1.0095094442367554,grad_norm: 0.9999991660659018, iteration: 114051
loss: 1.0046418905258179,grad_norm: 0.8841460579336243, iteration: 114052
loss: 0.9745120406150818,grad_norm: 0.9251220130468066, iteration: 114053
loss: 1.0023525953292847,grad_norm: 0.9999993250845809, iteration: 114054
loss: 1.0124845504760742,grad_norm: 0.9440013134488405, iteration: 114055
loss: 1.0225073099136353,grad_norm: 0.9963527094716533, iteration: 114056
loss: 0.9792031645774841,grad_norm: 0.9293912020457207, iteration: 114057
loss: 0.9784862399101257,grad_norm: 0.9999991572799526, iteration: 114058
loss: 0.994784414768219,grad_norm: 0.9999992029315068, iteration: 114059
loss: 1.013331651687622,grad_norm: 0.9999991223460495, iteration: 114060
loss: 1.0015604496002197,grad_norm: 0.9094021008624181, iteration: 114061
loss: 1.0013982057571411,grad_norm: 0.9999991685657966, iteration: 114062
loss: 0.996958315372467,grad_norm: 0.9999989980552356, iteration: 114063
loss: 0.9833240509033203,grad_norm: 0.822245008026412, iteration: 114064
loss: 1.0047191381454468,grad_norm: 0.9999991365367011, iteration: 114065
loss: 1.0204564332962036,grad_norm: 0.9999989943229586, iteration: 114066
loss: 0.9863339066505432,grad_norm: 0.9999992228033008, iteration: 114067
loss: 1.039874792098999,grad_norm: 0.9780391424360888, iteration: 114068
loss: 0.9624636769294739,grad_norm: 0.9999992203357753, iteration: 114069
loss: 1.0228304862976074,grad_norm: 0.986921668664875, iteration: 114070
loss: 0.9903134703636169,grad_norm: 0.9999990739558868, iteration: 114071
loss: 0.9911770224571228,grad_norm: 0.9484449056862132, iteration: 114072
loss: 0.9955912828445435,grad_norm: 0.9999992001164858, iteration: 114073
loss: 0.9820988178253174,grad_norm: 0.9999990973234463, iteration: 114074
loss: 0.9783340692520142,grad_norm: 0.95254523973449, iteration: 114075
loss: 1.0668652057647705,grad_norm: 0.9879686295746517, iteration: 114076
loss: 1.0027763843536377,grad_norm: 0.9999990461922071, iteration: 114077
loss: 1.0256390571594238,grad_norm: 0.934287298536484, iteration: 114078
loss: 1.0842927694320679,grad_norm: 0.9999995099016687, iteration: 114079
loss: 0.9839206337928772,grad_norm: 0.9999990885728669, iteration: 114080
loss: 1.0005605220794678,grad_norm: 0.9999990142337629, iteration: 114081
loss: 0.9730284810066223,grad_norm: 0.9999989553847123, iteration: 114082
loss: 0.9725726246833801,grad_norm: 0.9999992343305167, iteration: 114083
loss: 1.0069420337677002,grad_norm: 0.999999032672813, iteration: 114084
loss: 0.9892313480377197,grad_norm: 0.999998965312444, iteration: 114085
loss: 1.0244157314300537,grad_norm: 0.9999989759619832, iteration: 114086
loss: 0.98434978723526,grad_norm: 0.9999992612683829, iteration: 114087
loss: 0.9804994463920593,grad_norm: 0.9999991476043164, iteration: 114088
loss: 1.0181994438171387,grad_norm: 0.9999991338656739, iteration: 114089
loss: 0.9729426503181458,grad_norm: 0.9999991319992674, iteration: 114090
loss: 1.0016353130340576,grad_norm: 0.8811028098658464, iteration: 114091
loss: 0.9920348525047302,grad_norm: 0.9999991178971335, iteration: 114092
loss: 0.9720590710639954,grad_norm: 0.949312598554716, iteration: 114093
loss: 1.0457830429077148,grad_norm: 0.9999991324193584, iteration: 114094
loss: 0.9621057510375977,grad_norm: 0.9889668680260643, iteration: 114095
loss: 1.0133187770843506,grad_norm: 0.9269038790431164, iteration: 114096
loss: 0.9884650111198425,grad_norm: 0.9553657339485165, iteration: 114097
loss: 1.0088475942611694,grad_norm: 0.9999991167039771, iteration: 114098
loss: 1.0545731782913208,grad_norm: 0.999999525156954, iteration: 114099
loss: 1.0136915445327759,grad_norm: 0.9999991950778945, iteration: 114100
loss: 1.0131921768188477,grad_norm: 0.9343474402318825, iteration: 114101
loss: 0.9950748085975647,grad_norm: 0.9999992325566445, iteration: 114102
loss: 1.0265910625457764,grad_norm: 0.9382327130079264, iteration: 114103
loss: 0.9914785623550415,grad_norm: 0.9999991176738743, iteration: 114104
loss: 0.9662769436836243,grad_norm: 0.8888744745189249, iteration: 114105
loss: 1.010975956916809,grad_norm: 0.9196193272154889, iteration: 114106
loss: 1.0348365306854248,grad_norm: 0.9999996350507092, iteration: 114107
loss: 0.9815675616264343,grad_norm: 0.7766864926193459, iteration: 114108
loss: 1.0324411392211914,grad_norm: 0.9999992251040478, iteration: 114109
loss: 0.9767357110977173,grad_norm: 0.9999990237360393, iteration: 114110
loss: 1.0280838012695312,grad_norm: 0.9999990476495749, iteration: 114111
loss: 1.0310016870498657,grad_norm: 0.9999990601592079, iteration: 114112
loss: 1.009907841682434,grad_norm: 0.9999989553069782, iteration: 114113
loss: 1.0154004096984863,grad_norm: 0.9999989354357653, iteration: 114114
loss: 1.0017290115356445,grad_norm: 0.9999991077478707, iteration: 114115
loss: 0.9707870483398438,grad_norm: 0.9683321740849679, iteration: 114116
loss: 1.0096391439437866,grad_norm: 0.9874399185608707, iteration: 114117
loss: 1.0050610303878784,grad_norm: 0.9999991906063761, iteration: 114118
loss: 0.9724088311195374,grad_norm: 0.97552186623118, iteration: 114119
loss: 0.9907370209693909,grad_norm: 0.9999992046199356, iteration: 114120
loss: 1.0068062543869019,grad_norm: 0.9999998674211563, iteration: 114121
loss: 1.0185121297836304,grad_norm: 0.9999990508899825, iteration: 114122
loss: 1.0078338384628296,grad_norm: 0.9999991430343862, iteration: 114123
loss: 0.9708478450775146,grad_norm: 0.9999992134846193, iteration: 114124
loss: 0.9892712831497192,grad_norm: 0.999999240744813, iteration: 114125
loss: 0.9803239703178406,grad_norm: 0.9999992550407046, iteration: 114126
loss: 0.9369757771492004,grad_norm: 0.9999989980565428, iteration: 114127
loss: 1.0039795637130737,grad_norm: 0.9999991498979937, iteration: 114128
loss: 0.9720647931098938,grad_norm: 0.9999990310943316, iteration: 114129
loss: 0.9702820181846619,grad_norm: 0.9999991508190275, iteration: 114130
loss: 1.0168200731277466,grad_norm: 0.9999991767951063, iteration: 114131
loss: 1.0064688920974731,grad_norm: 0.9065792519640931, iteration: 114132
loss: 1.0096790790557861,grad_norm: 0.9999991816675918, iteration: 114133
loss: 0.9960958361625671,grad_norm: 0.8778494773584404, iteration: 114134
loss: 1.0037586688995361,grad_norm: 0.9999989535583105, iteration: 114135
loss: 0.9792141914367676,grad_norm: 0.9999990220957423, iteration: 114136
loss: 0.9660722017288208,grad_norm: 0.9999990930584534, iteration: 114137
loss: 0.9798436164855957,grad_norm: 0.9999991115617324, iteration: 114138
loss: 1.031787395477295,grad_norm: 0.9999990692361231, iteration: 114139
loss: 1.1280100345611572,grad_norm: 0.9999990618035876, iteration: 114140
loss: 1.0039669275283813,grad_norm: 0.9999991107014358, iteration: 114141
loss: 1.0282210111618042,grad_norm: 0.9999991426468671, iteration: 114142
loss: 0.9762997627258301,grad_norm: 0.9244625719269655, iteration: 114143
loss: 1.0263718366622925,grad_norm: 0.9999990227310495, iteration: 114144
loss: 0.9766265749931335,grad_norm: 0.8528754558044792, iteration: 114145
loss: 0.9759305119514465,grad_norm: 0.9999992444256279, iteration: 114146
loss: 0.990452229976654,grad_norm: 0.88808055910279, iteration: 114147
loss: 0.9712268114089966,grad_norm: 0.9999992008754641, iteration: 114148
loss: 0.9739206433296204,grad_norm: 0.9999992820765435, iteration: 114149
loss: 1.009048342704773,grad_norm: 0.9999996688764589, iteration: 114150
loss: 1.0170882940292358,grad_norm: 0.9999991134299976, iteration: 114151
loss: 0.98077392578125,grad_norm: 0.9999993314249501, iteration: 114152
loss: 1.0169123411178589,grad_norm: 0.9999992292646944, iteration: 114153
loss: 0.9919822812080383,grad_norm: 0.9999991947359851, iteration: 114154
loss: 1.0123716592788696,grad_norm: 0.999999161223628, iteration: 114155
loss: 0.9728553295135498,grad_norm: 0.9999991308055289, iteration: 114156
loss: 1.0105160474777222,grad_norm: 0.9775333624024228, iteration: 114157
loss: 0.9984642863273621,grad_norm: 0.9999990036423264, iteration: 114158
loss: 0.9851219058036804,grad_norm: 0.999998966693918, iteration: 114159
loss: 0.9960694313049316,grad_norm: 0.999999044664767, iteration: 114160
loss: 0.9630002975463867,grad_norm: 0.9516466501423969, iteration: 114161
loss: 1.022810697555542,grad_norm: 0.9999990438985132, iteration: 114162
loss: 1.013413667678833,grad_norm: 0.9999991703228505, iteration: 114163
loss: 0.999741792678833,grad_norm: 0.999999052675262, iteration: 114164
loss: 0.9696698784828186,grad_norm: 0.9999990659604232, iteration: 114165
loss: 1.029943585395813,grad_norm: 0.9999992049733025, iteration: 114166
loss: 0.9951501488685608,grad_norm: 0.9999992226423483, iteration: 114167
loss: 0.998426079750061,grad_norm: 0.9999990558585344, iteration: 114168
loss: 0.9823073148727417,grad_norm: 0.9999991345216469, iteration: 114169
loss: 1.0006976127624512,grad_norm: 0.9999991371832414, iteration: 114170
loss: 1.0105878114700317,grad_norm: 0.9999988981834856, iteration: 114171
loss: 0.974007248878479,grad_norm: 0.9651549351774102, iteration: 114172
loss: 0.9996329545974731,grad_norm: 0.999999146120729, iteration: 114173
loss: 1.0135834217071533,grad_norm: 0.9404568743034316, iteration: 114174
loss: 0.9501364827156067,grad_norm: 0.9999990399707865, iteration: 114175
loss: 1.0228163003921509,grad_norm: 0.9168859276270918, iteration: 114176
loss: 0.9568724632263184,grad_norm: 0.9999990071733759, iteration: 114177
loss: 0.9424082636833191,grad_norm: 0.9882396762964064, iteration: 114178
loss: 1.0338088274002075,grad_norm: 0.9721678551080829, iteration: 114179
loss: 1.0242183208465576,grad_norm: 0.9999990756339325, iteration: 114180
loss: 1.016119360923767,grad_norm: 0.9689275858443177, iteration: 114181
loss: 0.9738783240318298,grad_norm: 0.9999991767906794, iteration: 114182
loss: 0.9893079400062561,grad_norm: 0.9999993082559592, iteration: 114183
loss: 0.9655671119689941,grad_norm: 0.9583463751807544, iteration: 114184
loss: 0.9712902307510376,grad_norm: 0.9772217157310163, iteration: 114185
loss: 1.0221896171569824,grad_norm: 0.9999991496357805, iteration: 114186
loss: 0.9857590794563293,grad_norm: 0.8895751721516905, iteration: 114187
loss: 1.060834527015686,grad_norm: 0.9999990456856769, iteration: 114188
loss: 0.9810110330581665,grad_norm: 0.9999992399190619, iteration: 114189
loss: 1.001680850982666,grad_norm: 0.999999137412344, iteration: 114190
loss: 0.994573175907135,grad_norm: 0.9737558559572987, iteration: 114191
loss: 0.9888643026351929,grad_norm: 0.9895218610917361, iteration: 114192
loss: 1.0045281648635864,grad_norm: 0.9999991919500522, iteration: 114193
loss: 0.9258925914764404,grad_norm: 0.899523388565956, iteration: 114194
loss: 1.0136481523513794,grad_norm: 0.9999991759950572, iteration: 114195
loss: 1.0254786014556885,grad_norm: 0.9999990233878573, iteration: 114196
loss: 0.9761056303977966,grad_norm: 0.9259218138009488, iteration: 114197
loss: 1.056564450263977,grad_norm: 0.9999992158495168, iteration: 114198
loss: 1.0004795789718628,grad_norm: 0.9999991213296044, iteration: 114199
loss: 1.0020419359207153,grad_norm: 0.877439896708972, iteration: 114200
loss: 1.0141648054122925,grad_norm: 0.999999254560559, iteration: 114201
loss: 1.016005277633667,grad_norm: 0.9999992138851099, iteration: 114202
loss: 0.98108971118927,grad_norm: 0.999999125813533, iteration: 114203
loss: 1.055735468864441,grad_norm: 0.9999989966578773, iteration: 114204
loss: 0.9486954212188721,grad_norm: 0.9999990095400616, iteration: 114205
loss: 1.0458784103393555,grad_norm: 0.9999990521545692, iteration: 114206
loss: 0.9763460159301758,grad_norm: 0.9999991444584562, iteration: 114207
loss: 1.0187432765960693,grad_norm: 0.9999993511928971, iteration: 114208
loss: 0.9783834218978882,grad_norm: 0.9831942047054982, iteration: 114209
loss: 0.9838359355926514,grad_norm: 0.997483158029292, iteration: 114210
loss: 1.0397679805755615,grad_norm: 0.9999991619489561, iteration: 114211
loss: 1.0112920999526978,grad_norm: 0.999998940046261, iteration: 114212
loss: 1.0117862224578857,grad_norm: 0.9999990697795215, iteration: 114213
loss: 1.0154623985290527,grad_norm: 0.9999991719586316, iteration: 114214
loss: 1.0556966066360474,grad_norm: 0.9999991357177107, iteration: 114215
loss: 1.0094395875930786,grad_norm: 0.999999142871436, iteration: 114216
loss: 0.9925122261047363,grad_norm: 0.9577145074351943, iteration: 114217
loss: 1.0226304531097412,grad_norm: 0.8795832506223596, iteration: 114218
loss: 1.0239450931549072,grad_norm: 0.9728476948739126, iteration: 114219
loss: 1.0098811388015747,grad_norm: 0.9752299457697272, iteration: 114220
loss: 0.9468780755996704,grad_norm: 0.9587975777545082, iteration: 114221
loss: 0.9560222625732422,grad_norm: 0.9999992471633465, iteration: 114222
loss: 1.0061469078063965,grad_norm: 0.9273679991437384, iteration: 114223
loss: 1.0167914628982544,grad_norm: 0.9731068638244308, iteration: 114224
loss: 1.0016584396362305,grad_norm: 0.9992975849316341, iteration: 114225
loss: 1.0197619199752808,grad_norm: 0.9999992422661663, iteration: 114226
loss: 1.0103111267089844,grad_norm: 0.9999993128566875, iteration: 114227
loss: 0.9775262475013733,grad_norm: 0.9429713743451077, iteration: 114228
loss: 0.9761030077934265,grad_norm: 0.9040655405675622, iteration: 114229
loss: 1.0496245622634888,grad_norm: 0.8406807483210228, iteration: 114230
loss: 0.9724896550178528,grad_norm: 0.9929016643742291, iteration: 114231
loss: 1.0223742723464966,grad_norm: 0.9999990928906336, iteration: 114232
loss: 0.9966416954994202,grad_norm: 0.9999990225517292, iteration: 114233
loss: 0.987948477268219,grad_norm: 0.9594986994297262, iteration: 114234
loss: 1.0110836029052734,grad_norm: 0.9999993178127206, iteration: 114235
loss: 1.0054073333740234,grad_norm: 0.9144502036908565, iteration: 114236
loss: 0.9768169522285461,grad_norm: 0.9999991011629756, iteration: 114237
loss: 0.9804350733757019,grad_norm: 0.9999989829453169, iteration: 114238
loss: 0.9896829724311829,grad_norm: 0.9758652846091735, iteration: 114239
loss: 1.0289095640182495,grad_norm: 0.9915865312669697, iteration: 114240
loss: 1.0260705947875977,grad_norm: 0.9999991207105681, iteration: 114241
loss: 1.002478003501892,grad_norm: 0.9240912458554198, iteration: 114242
loss: 0.977594256401062,grad_norm: 0.8397983302690211, iteration: 114243
loss: 0.9571103453636169,grad_norm: 0.9977362232509376, iteration: 114244
loss: 1.0086127519607544,grad_norm: 0.9999991543151902, iteration: 114245
loss: 1.0007559061050415,grad_norm: 0.9107337441138471, iteration: 114246
loss: 0.9873464703559875,grad_norm: 0.9999992811028353, iteration: 114247
loss: 1.0017216205596924,grad_norm: 0.9999993294109982, iteration: 114248
loss: 0.9979087114334106,grad_norm: 0.9999989973514597, iteration: 114249
loss: 0.9982706904411316,grad_norm: 0.9999991602894698, iteration: 114250
loss: 0.9698331952095032,grad_norm: 0.9999990553805175, iteration: 114251
loss: 0.9550052285194397,grad_norm: 0.8802930787714075, iteration: 114252
loss: 1.0149739980697632,grad_norm: 0.9999990452798231, iteration: 114253
loss: 0.9922828078269958,grad_norm: 0.9999991507251895, iteration: 114254
loss: 1.015575647354126,grad_norm: 0.9999990658480125, iteration: 114255
loss: 1.0270448923110962,grad_norm: 0.9999991310935721, iteration: 114256
loss: 0.9994766116142273,grad_norm: 0.9999992721745262, iteration: 114257
loss: 0.9735172986984253,grad_norm: 0.9999990351593759, iteration: 114258
loss: 1.001846194267273,grad_norm: 0.9999989928055222, iteration: 114259
loss: 0.9971296191215515,grad_norm: 0.9999991358076447, iteration: 114260
loss: 1.0313713550567627,grad_norm: 0.9999990330472377, iteration: 114261
loss: 1.0569345951080322,grad_norm: 0.9999992422544458, iteration: 114262
loss: 1.0053882598876953,grad_norm: 0.9999991886483345, iteration: 114263
loss: 0.9842763543128967,grad_norm: 0.9999990401551111, iteration: 114264
loss: 0.9731777310371399,grad_norm: 0.9999991571294709, iteration: 114265
loss: 1.0230433940887451,grad_norm: 0.9999990941462071, iteration: 114266
loss: 1.045140266418457,grad_norm: 0.999999166459535, iteration: 114267
loss: 1.0052658319473267,grad_norm: 0.8904959548926424, iteration: 114268
loss: 0.9783504009246826,grad_norm: 0.9999990644353401, iteration: 114269
loss: 1.0230180025100708,grad_norm: 0.8870231618622999, iteration: 114270
loss: 0.9915509819984436,grad_norm: 0.9899563865957566, iteration: 114271
loss: 1.0320059061050415,grad_norm: 0.9999991421950508, iteration: 114272
loss: 0.9894269108772278,grad_norm: 0.9999993645508701, iteration: 114273
loss: 1.0043507814407349,grad_norm: 0.9999991009154456, iteration: 114274
loss: 0.959441065788269,grad_norm: 0.9638717120089292, iteration: 114275
loss: 1.0164707899093628,grad_norm: 0.9050981772125112, iteration: 114276
loss: 1.0039288997650146,grad_norm: 0.999999112912729, iteration: 114277
loss: 1.015045404434204,grad_norm: 0.998226280310282, iteration: 114278
loss: 1.0058072805404663,grad_norm: 0.94890985855873, iteration: 114279
loss: 0.97938072681427,grad_norm: 0.9999993922470151, iteration: 114280
loss: 1.021781086921692,grad_norm: 0.9999990863157783, iteration: 114281
loss: 0.9877581596374512,grad_norm: 0.9576706440520575, iteration: 114282
loss: 1.0264812707901,grad_norm: 0.9091324651902787, iteration: 114283
loss: 1.0159821510314941,grad_norm: 0.999998982398702, iteration: 114284
loss: 1.0232011079788208,grad_norm: 0.9999990104900525, iteration: 114285
loss: 0.9976602792739868,grad_norm: 0.9999992723641715, iteration: 114286
loss: 0.9770362973213196,grad_norm: 0.9999991737952196, iteration: 114287
loss: 0.9987603425979614,grad_norm: 0.9999991708992368, iteration: 114288
loss: 1.0079407691955566,grad_norm: 0.9999991573232102, iteration: 114289
loss: 0.976203978061676,grad_norm: 0.9999990802951051, iteration: 114290
loss: 1.0545316934585571,grad_norm: 0.9999992279574915, iteration: 114291
loss: 1.0072652101516724,grad_norm: 0.999999167688056, iteration: 114292
loss: 0.9873180985450745,grad_norm: 0.849640598163051, iteration: 114293
loss: 1.018216609954834,grad_norm: 0.9999990987286526, iteration: 114294
loss: 1.0168522596359253,grad_norm: 0.9999990393557933, iteration: 114295
loss: 1.0098676681518555,grad_norm: 0.8841135955463593, iteration: 114296
loss: 1.027025818824768,grad_norm: 0.9999999163242059, iteration: 114297
loss: 1.0255943536758423,grad_norm: 0.9997516441009799, iteration: 114298
loss: 1.0482256412506104,grad_norm: 0.9999992535089746, iteration: 114299
loss: 0.974399745464325,grad_norm: 0.9999990372624336, iteration: 114300
loss: 0.9861097931861877,grad_norm: 0.9999991401270771, iteration: 114301
loss: 0.9914360046386719,grad_norm: 0.9404242571926738, iteration: 114302
loss: 1.0233937501907349,grad_norm: 0.9999998884649874, iteration: 114303
loss: 1.0265381336212158,grad_norm: 0.9999994260882901, iteration: 114304
loss: 1.0303832292556763,grad_norm: 0.9999991796841186, iteration: 114305
loss: 1.0038594007492065,grad_norm: 0.9999990357229201, iteration: 114306
loss: 1.0091313123703003,grad_norm: 0.9999998520847694, iteration: 114307
loss: 1.00480318069458,grad_norm: 0.9999991493308313, iteration: 114308
loss: 1.0075680017471313,grad_norm: 0.9999990890598811, iteration: 114309
loss: 0.9464261531829834,grad_norm: 0.9210188499285161, iteration: 114310
loss: 0.9867992997169495,grad_norm: 0.9999991045485742, iteration: 114311
loss: 0.9977205395698547,grad_norm: 0.9530039760172141, iteration: 114312
loss: 1.0380665063858032,grad_norm: 0.9999993039915844, iteration: 114313
loss: 1.004346489906311,grad_norm: 0.9999991932265097, iteration: 114314
loss: 1.0030587911605835,grad_norm: 0.9999991332439109, iteration: 114315
loss: 0.9585800170898438,grad_norm: 0.9999991261289908, iteration: 114316
loss: 0.9929145574569702,grad_norm: 0.9689458321003707, iteration: 114317
loss: 1.0049586296081543,grad_norm: 0.9999990127312214, iteration: 114318
loss: 0.980628252029419,grad_norm: 0.9999990025008423, iteration: 114319
loss: 0.9833042621612549,grad_norm: 0.9924753311742707, iteration: 114320
loss: 1.0244860649108887,grad_norm: 0.999999178284903, iteration: 114321
loss: 1.0562855005264282,grad_norm: 0.9999993846741542, iteration: 114322
loss: 0.9999896883964539,grad_norm: 0.9382818445811643, iteration: 114323
loss: 0.9764778017997742,grad_norm: 0.9999993106555493, iteration: 114324
loss: 0.9920789003372192,grad_norm: 0.9499022769413862, iteration: 114325
loss: 1.0302380323410034,grad_norm: 0.9999989622517892, iteration: 114326
loss: 0.9966640472412109,grad_norm: 0.9999990336478973, iteration: 114327
loss: 1.0495375394821167,grad_norm: 0.8140741532761183, iteration: 114328
loss: 0.9765165448188782,grad_norm: 0.9999992267035543, iteration: 114329
loss: 1.0459281206130981,grad_norm: 0.9999992393181248, iteration: 114330
loss: 0.9780908823013306,grad_norm: 0.9999991355329, iteration: 114331
loss: 0.9617226719856262,grad_norm: 0.9999989816111928, iteration: 114332
loss: 1.0039563179016113,grad_norm: 0.9999992549870188, iteration: 114333
loss: 1.0045413970947266,grad_norm: 0.9999991183572813, iteration: 114334
loss: 0.9502894878387451,grad_norm: 0.9822909101379681, iteration: 114335
loss: 0.9902888536453247,grad_norm: 0.8562120885878395, iteration: 114336
loss: 1.0155531167984009,grad_norm: 0.9999988453386924, iteration: 114337
loss: 0.9993678331375122,grad_norm: 0.9204946282289894, iteration: 114338
loss: 1.00986647605896,grad_norm: 0.8479750071053519, iteration: 114339
loss: 1.0283896923065186,grad_norm: 0.9999998419752638, iteration: 114340
loss: 0.9645211100578308,grad_norm: 0.9914804606165984, iteration: 114341
loss: 0.9982688426971436,grad_norm: 0.9999990624295888, iteration: 114342
loss: 1.0120325088500977,grad_norm: 0.9999999679957409, iteration: 114343
loss: 1.072949767112732,grad_norm: 0.9999998047631128, iteration: 114344
loss: 0.9884902238845825,grad_norm: 0.9999989653411951, iteration: 114345
loss: 0.9760627746582031,grad_norm: 0.9999990655273587, iteration: 114346
loss: 0.9858898520469666,grad_norm: 0.9999992068819904, iteration: 114347
loss: 0.9929624199867249,grad_norm: 0.9167637246261794, iteration: 114348
loss: 0.9818262457847595,grad_norm: 0.9999992708469725, iteration: 114349
loss: 1.0467122793197632,grad_norm: 0.9999996839517253, iteration: 114350
loss: 0.9810126423835754,grad_norm: 0.9864482010936383, iteration: 114351
loss: 1.0299752950668335,grad_norm: 0.971805149838787, iteration: 114352
loss: 1.0033838748931885,grad_norm: 0.9999992245454168, iteration: 114353
loss: 1.3051484823226929,grad_norm: 0.9999998072647256, iteration: 114354
loss: 1.00438392162323,grad_norm: 0.995122069074329, iteration: 114355
loss: 1.0184324979782104,grad_norm: 0.9999998098906937, iteration: 114356
loss: 1.0188159942626953,grad_norm: 0.9025262025453009, iteration: 114357
loss: 0.9929271340370178,grad_norm: 0.9600214418357821, iteration: 114358
loss: 1.0047043561935425,grad_norm: 0.9999991964214185, iteration: 114359
loss: 1.0178190469741821,grad_norm: 0.9577828846331246, iteration: 114360
loss: 1.1466317176818848,grad_norm: 0.9999990476667301, iteration: 114361
loss: 1.1187822818756104,grad_norm: 1.0000000689141046, iteration: 114362
loss: 0.9946373701095581,grad_norm: 0.8678801662923101, iteration: 114363
loss: 1.0473434925079346,grad_norm: 0.9999990362468465, iteration: 114364
loss: 0.9779785871505737,grad_norm: 0.9999991432129317, iteration: 114365
loss: 0.9951053857803345,grad_norm: 0.9999991351750562, iteration: 114366
loss: 1.016842246055603,grad_norm: 0.9999990809424406, iteration: 114367
loss: 1.0464835166931152,grad_norm: 0.9999994685365634, iteration: 114368
loss: 1.004374623298645,grad_norm: 0.9669169469618344, iteration: 114369
loss: 1.0033477544784546,grad_norm: 0.9999993013073423, iteration: 114370
loss: 0.9768393039703369,grad_norm: 0.9999990363056714, iteration: 114371
loss: 1.1238367557525635,grad_norm: 0.9999998260526352, iteration: 114372
loss: 1.0057975053787231,grad_norm: 0.8984606307316554, iteration: 114373
loss: 1.0237153768539429,grad_norm: 0.9999993937764295, iteration: 114374
loss: 1.0816725492477417,grad_norm: 0.999999037962203, iteration: 114375
loss: 1.0666608810424805,grad_norm: 0.9999990417446969, iteration: 114376
loss: 0.969791829586029,grad_norm: 0.9852935862652938, iteration: 114377
loss: 0.9832813143730164,grad_norm: 0.9999990178606882, iteration: 114378
loss: 1.0068408250808716,grad_norm: 0.9210756634952446, iteration: 114379
loss: 1.0131192207336426,grad_norm: 0.9963768027431276, iteration: 114380
loss: 1.0017974376678467,grad_norm: 0.9999995398927579, iteration: 114381
loss: 0.983845591545105,grad_norm: 0.9999996598142741, iteration: 114382
loss: 0.9719876050949097,grad_norm: 0.8838727844169867, iteration: 114383
loss: 1.0529385805130005,grad_norm: 0.8999624324939269, iteration: 114384
loss: 1.0457851886749268,grad_norm: 0.999999589220028, iteration: 114385
loss: 1.2406752109527588,grad_norm: 0.9999994941117611, iteration: 114386
loss: 0.9952194690704346,grad_norm: 0.999999542163973, iteration: 114387
loss: 1.1505955457687378,grad_norm: 0.9999999381344093, iteration: 114388
loss: 0.9900559186935425,grad_norm: 0.9654514047999714, iteration: 114389
loss: 1.0348137617111206,grad_norm: 0.985427872119428, iteration: 114390
loss: 0.9862204790115356,grad_norm: 0.9999991736164165, iteration: 114391
loss: 0.9922757744789124,grad_norm: 0.9638334212287011, iteration: 114392
loss: 1.0797230005264282,grad_norm: 0.9999991223150658, iteration: 114393
loss: 1.002282738685608,grad_norm: 0.9999991659890752, iteration: 114394
loss: 1.0057669878005981,grad_norm: 0.971709058774888, iteration: 114395
loss: 1.0114034414291382,grad_norm: 0.8637815029816281, iteration: 114396
loss: 1.0045685768127441,grad_norm: 0.9999991662239045, iteration: 114397
loss: 1.021196722984314,grad_norm: 0.9999990464021509, iteration: 114398
loss: 1.0176177024841309,grad_norm: 0.8310070436468291, iteration: 114399
loss: 1.0194129943847656,grad_norm: 0.9999992622165995, iteration: 114400
loss: 1.0011528730392456,grad_norm: 0.9999991957954669, iteration: 114401
loss: 1.0222687721252441,grad_norm: 0.9999991296460918, iteration: 114402
loss: 1.0018882751464844,grad_norm: 0.9999991366709793, iteration: 114403
loss: 0.9721010327339172,grad_norm: 0.9594920215362078, iteration: 114404
loss: 1.0510786771774292,grad_norm: 0.9999995178953938, iteration: 114405
loss: 1.0118281841278076,grad_norm: 0.9999992042817346, iteration: 114406
loss: 0.9874392151832581,grad_norm: 0.9999991052476408, iteration: 114407
loss: 1.0131350755691528,grad_norm: 0.9999993222537655, iteration: 114408
loss: 0.9935481548309326,grad_norm: 0.9999991977925772, iteration: 114409
loss: 1.0038933753967285,grad_norm: 0.9999990534334211, iteration: 114410
loss: 1.0169224739074707,grad_norm: 0.909331095027255, iteration: 114411
loss: 1.0367431640625,grad_norm: 0.9484859739188474, iteration: 114412
loss: 1.040575385093689,grad_norm: 0.9999999684830948, iteration: 114413
loss: 1.0076215267181396,grad_norm: 0.9999992044424713, iteration: 114414
loss: 0.9866001605987549,grad_norm: 0.9999990063954861, iteration: 114415
loss: 1.094720721244812,grad_norm: 0.9999991926273272, iteration: 114416
loss: 1.0150052309036255,grad_norm: 0.9999990714937333, iteration: 114417
loss: 0.9965472221374512,grad_norm: 0.9999992280523505, iteration: 114418
loss: 0.9515315294265747,grad_norm: 0.999999353322504, iteration: 114419
loss: 1.050793170928955,grad_norm: 0.999999235462459, iteration: 114420
loss: 0.9918911457061768,grad_norm: 0.8536038882333739, iteration: 114421
loss: 0.9880868792533875,grad_norm: 0.999999054106846, iteration: 114422
loss: 0.9788634181022644,grad_norm: 0.9999990710083332, iteration: 114423
loss: 0.9880625009536743,grad_norm: 0.9999991107467741, iteration: 114424
loss: 1.0004490613937378,grad_norm: 0.8776210951797886, iteration: 114425
loss: 1.1462785005569458,grad_norm: 0.9999997009091993, iteration: 114426
loss: 0.9895387887954712,grad_norm: 0.9877936025487668, iteration: 114427
loss: 1.054242730140686,grad_norm: 0.9999990313309252, iteration: 114428
loss: 0.9646515250205994,grad_norm: 0.999999106062344, iteration: 114429
loss: 0.9967527389526367,grad_norm: 0.9999991904137097, iteration: 114430
loss: 1.0333032608032227,grad_norm: 0.9999998818609664, iteration: 114431
loss: 1.2091306447982788,grad_norm: 0.999999288530441, iteration: 114432
loss: 1.02229905128479,grad_norm: 0.96056749563979, iteration: 114433
loss: 1.0289795398712158,grad_norm: 1.0000000401316047, iteration: 114434
loss: 1.0133036375045776,grad_norm: 0.9999990466149815, iteration: 114435
loss: 1.0100071430206299,grad_norm: 0.9999991235965708, iteration: 114436
loss: 0.9739342927932739,grad_norm: 0.9852102653624315, iteration: 114437
loss: 0.9900535345077515,grad_norm: 0.9999990049425733, iteration: 114438
loss: 1.0379278659820557,grad_norm: 0.999999087338925, iteration: 114439
loss: 0.9923933148384094,grad_norm: 0.9993500785514023, iteration: 114440
loss: 1.023410677909851,grad_norm: 0.9999990488119784, iteration: 114441
loss: 1.0023279190063477,grad_norm: 0.992713010266553, iteration: 114442
loss: 0.9878499507904053,grad_norm: 0.9137658685048271, iteration: 114443
loss: 1.0090560913085938,grad_norm: 0.999999238860138, iteration: 114444
loss: 1.0066993236541748,grad_norm: 0.9358325390006775, iteration: 114445
loss: 1.023144006729126,grad_norm: 0.9650390330140591, iteration: 114446
loss: 0.9756585359573364,grad_norm: 0.9999992824195573, iteration: 114447
loss: 0.9840793013572693,grad_norm: 0.8434021358576823, iteration: 114448
loss: 1.0539977550506592,grad_norm: 0.9999998552410979, iteration: 114449
loss: 1.1021547317504883,grad_norm: 0.9999993544108315, iteration: 114450
loss: 1.002626895904541,grad_norm: 0.9999991055667448, iteration: 114451
loss: 1.0613725185394287,grad_norm: 0.999999688295221, iteration: 114452
loss: 1.0773488283157349,grad_norm: 0.9999999017997568, iteration: 114453
loss: 1.0140419006347656,grad_norm: 0.9808197668411535, iteration: 114454
loss: 1.0030498504638672,grad_norm: 0.9484731237015102, iteration: 114455
loss: 0.978651225566864,grad_norm: 0.9999991727956928, iteration: 114456
loss: 1.1332292556762695,grad_norm: 1.0000000090126504, iteration: 114457
loss: 0.9924711585044861,grad_norm: 0.8718207182984166, iteration: 114458
loss: 1.0642014741897583,grad_norm: 0.9999990996567499, iteration: 114459
loss: 1.1611952781677246,grad_norm: 0.9999999443192256, iteration: 114460
loss: 1.1599701642990112,grad_norm: 0.9999994161471085, iteration: 114461
loss: 0.9690553545951843,grad_norm: 0.9999991481550247, iteration: 114462
loss: 0.9697628021240234,grad_norm: 0.9999990413800043, iteration: 114463
loss: 1.0039048194885254,grad_norm: 0.9999991222413855, iteration: 114464
loss: 1.2195736169815063,grad_norm: 0.9999999252904174, iteration: 114465
loss: 1.1505825519561768,grad_norm: 0.999999261227704, iteration: 114466
loss: 1.0455803871154785,grad_norm: 0.9999998234566249, iteration: 114467
loss: 1.3442199230194092,grad_norm: 1.0000000065356576, iteration: 114468
loss: 1.2541155815124512,grad_norm: 0.9999994517405311, iteration: 114469
loss: 1.4082425832748413,grad_norm: 1.0000000061067866, iteration: 114470
loss: 1.39179265499115,grad_norm: 0.9999999820101033, iteration: 114471
loss: 1.0176935195922852,grad_norm: 0.999999018974147, iteration: 114472
loss: 1.3069478273391724,grad_norm: 1.0000000505489326, iteration: 114473
loss: 1.2671451568603516,grad_norm: 0.9999992934402003, iteration: 114474
loss: 1.05613112449646,grad_norm: 0.9999994986026975, iteration: 114475
loss: 0.9977151155471802,grad_norm: 0.9579192715386337, iteration: 114476
loss: 1.0077672004699707,grad_norm: 1.0000000518316332, iteration: 114477
loss: 1.0168596506118774,grad_norm: 0.9999996033277287, iteration: 114478
loss: 1.0629099607467651,grad_norm: 0.9999998099385451, iteration: 114479
loss: 1.0177414417266846,grad_norm: 0.9383647248332522, iteration: 114480
loss: 1.0381780862808228,grad_norm: 0.9553076645179674, iteration: 114481
loss: 1.0066097974777222,grad_norm: 0.9999991397073883, iteration: 114482
loss: 1.0285897254943848,grad_norm: 0.9999991087398279, iteration: 114483
loss: 1.1165584325790405,grad_norm: 0.9999996878212029, iteration: 114484
loss: 1.0609195232391357,grad_norm: 0.9999993969825461, iteration: 114485
loss: 0.959952175617218,grad_norm: 0.9527498470307968, iteration: 114486
loss: 1.0151232481002808,grad_norm: 0.9999992078113205, iteration: 114487
loss: 1.037262201309204,grad_norm: 0.999999382601854, iteration: 114488
loss: 0.9819138646125793,grad_norm: 0.9999990697614937, iteration: 114489
loss: 0.9949992895126343,grad_norm: 0.9999991055176898, iteration: 114490
loss: 1.0299251079559326,grad_norm: 0.9999992145924591, iteration: 114491
loss: 1.0171083211898804,grad_norm: 0.9999992099864642, iteration: 114492
loss: 1.0102709531784058,grad_norm: 0.999999174635613, iteration: 114493
loss: 1.0153861045837402,grad_norm: 0.999999240759928, iteration: 114494
loss: 0.9813712239265442,grad_norm: 0.994378837315109, iteration: 114495
loss: 1.0078428983688354,grad_norm: 0.9362052632868637, iteration: 114496
loss: 1.1255649328231812,grad_norm: 0.999999423838902, iteration: 114497
loss: 1.0429425239562988,grad_norm: 0.9999995655448127, iteration: 114498
loss: 1.0210131406784058,grad_norm: 0.9999993972588921, iteration: 114499
loss: 0.993462324142456,grad_norm: 0.9999991593104387, iteration: 114500
loss: 1.042067527770996,grad_norm: 0.9999993097395653, iteration: 114501
loss: 1.0190703868865967,grad_norm: 0.9999997275472877, iteration: 114502
loss: 0.9954805374145508,grad_norm: 0.9999992279367697, iteration: 114503
loss: 1.024747610092163,grad_norm: 0.999999565120649, iteration: 114504
loss: 1.0610498189926147,grad_norm: 0.9999999977310321, iteration: 114505
loss: 0.991828441619873,grad_norm: 0.9999992235995538, iteration: 114506
loss: 1.0380213260650635,grad_norm: 0.9999991184697271, iteration: 114507
loss: 1.0314970016479492,grad_norm: 0.9999994285929262, iteration: 114508
loss: 0.9778267741203308,grad_norm: 0.9999991260459952, iteration: 114509
loss: 1.0195674896240234,grad_norm: 0.9999993005306692, iteration: 114510
loss: 0.9976125955581665,grad_norm: 0.9437649966989207, iteration: 114511
loss: 0.9795878529548645,grad_norm: 0.9999992751099891, iteration: 114512
loss: 0.9937343001365662,grad_norm: 0.8894216468164075, iteration: 114513
loss: 1.0340875387191772,grad_norm: 0.9924942668613516, iteration: 114514
loss: 1.0664721727371216,grad_norm: 0.9999998218968407, iteration: 114515
loss: 1.0022085905075073,grad_norm: 0.9999990476986066, iteration: 114516
loss: 1.1026854515075684,grad_norm: 0.9999992280261936, iteration: 114517
loss: 1.021746039390564,grad_norm: 0.9999990813178105, iteration: 114518
loss: 1.0235823392868042,grad_norm: 0.999999092827464, iteration: 114519
loss: 1.0076522827148438,grad_norm: 0.9999990366900564, iteration: 114520
loss: 1.0612188577651978,grad_norm: 0.9999994779122746, iteration: 114521
loss: 0.9984368681907654,grad_norm: 0.9999993582678964, iteration: 114522
loss: 1.0619914531707764,grad_norm: 0.9999992412540961, iteration: 114523
loss: 0.9863771200180054,grad_norm: 0.9999991429345104, iteration: 114524
loss: 1.0385730266571045,grad_norm: 0.9999992460577859, iteration: 114525
loss: 1.0216035842895508,grad_norm: 0.9999989985171064, iteration: 114526
loss: 0.9880712032318115,grad_norm: 0.9999993051863153, iteration: 114527
loss: 1.0205861330032349,grad_norm: 0.9998047489958864, iteration: 114528
loss: 0.9509422779083252,grad_norm: 0.999998937592069, iteration: 114529
loss: 1.0116171836853027,grad_norm: 0.9999991935245127, iteration: 114530
loss: 1.0066821575164795,grad_norm: 0.9698320596614622, iteration: 114531
loss: 1.0340338945388794,grad_norm: 0.9999996000373338, iteration: 114532
loss: 0.9936765432357788,grad_norm: 0.9999989781839482, iteration: 114533
loss: 1.0209099054336548,grad_norm: 0.9999991128745613, iteration: 114534
loss: 1.0427970886230469,grad_norm: 0.9999991747938022, iteration: 114535
loss: 1.0098843574523926,grad_norm: 0.9999992280158734, iteration: 114536
loss: 1.0071343183517456,grad_norm: 0.9999990508986125, iteration: 114537
loss: 1.0195302963256836,grad_norm: 0.9867578510640218, iteration: 114538
loss: 1.0130980014801025,grad_norm: 0.946340075795216, iteration: 114539
loss: 1.0383504629135132,grad_norm: 0.9449989941574017, iteration: 114540
loss: 1.0406436920166016,grad_norm: 0.9999994345016079, iteration: 114541
loss: 0.9885518550872803,grad_norm: 0.9999989702926237, iteration: 114542
loss: 0.998855471611023,grad_norm: 0.9727518944363587, iteration: 114543
loss: 1.0741318464279175,grad_norm: 0.9999992784984756, iteration: 114544
loss: 1.1063957214355469,grad_norm: 0.9999996432920054, iteration: 114545
loss: 1.0125114917755127,grad_norm: 0.9999990999116766, iteration: 114546
loss: 1.0464234352111816,grad_norm: 0.9999991777296762, iteration: 114547
loss: 0.9868115782737732,grad_norm: 0.9999992876599711, iteration: 114548
loss: 1.0037368535995483,grad_norm: 0.9295975592405369, iteration: 114549
loss: 0.9527868628501892,grad_norm: 0.9412088543485464, iteration: 114550
loss: 1.0079339742660522,grad_norm: 0.9999990197757088, iteration: 114551
loss: 1.0064693689346313,grad_norm: 0.9999990709770061, iteration: 114552
loss: 1.0263539552688599,grad_norm: 0.8625841989852342, iteration: 114553
loss: 1.0133538246154785,grad_norm: 0.9999990821296373, iteration: 114554
loss: 0.946451723575592,grad_norm: 0.9160741116346357, iteration: 114555
loss: 0.9892277717590332,grad_norm: 0.9999991885046812, iteration: 114556
loss: 0.9966300129890442,grad_norm: 0.9999992814988177, iteration: 114557
loss: 0.9909751415252686,grad_norm: 0.9811282483359702, iteration: 114558
loss: 0.9942901134490967,grad_norm: 0.9999990436074601, iteration: 114559
loss: 0.9284856915473938,grad_norm: 0.999999053913964, iteration: 114560
loss: 0.9845022559165955,grad_norm: 0.9613820781958182, iteration: 114561
loss: 0.9887255430221558,grad_norm: 0.9999991848040929, iteration: 114562
loss: 1.0526968240737915,grad_norm: 0.9999998818987897, iteration: 114563
loss: 1.02085542678833,grad_norm: 0.9999991808150551, iteration: 114564
loss: 1.025598406791687,grad_norm: 0.9999992105508567, iteration: 114565
loss: 1.0029646158218384,grad_norm: 0.999999082630065, iteration: 114566
loss: 1.0112167596817017,grad_norm: 0.9999991380607491, iteration: 114567
loss: 1.0018761157989502,grad_norm: 0.9999990378059979, iteration: 114568
loss: 1.038185715675354,grad_norm: 0.99999927339647, iteration: 114569
loss: 1.0208899974822998,grad_norm: 0.95380877655957, iteration: 114570
loss: 1.0477758646011353,grad_norm: 0.9661115730894363, iteration: 114571
loss: 0.9860196113586426,grad_norm: 0.9157763458765963, iteration: 114572
loss: 0.9671103358268738,grad_norm: 0.9999991743271628, iteration: 114573
loss: 0.9938556551933289,grad_norm: 0.9999991075831197, iteration: 114574
loss: 0.9957988858222961,grad_norm: 0.9542919184783012, iteration: 114575
loss: 1.0265613794326782,grad_norm: 0.9999991492821483, iteration: 114576
loss: 1.0228607654571533,grad_norm: 0.9999992478963209, iteration: 114577
loss: 0.996442437171936,grad_norm: 0.9999990583055556, iteration: 114578
loss: 1.016890287399292,grad_norm: 0.9999996685361956, iteration: 114579
loss: 1.0041011571884155,grad_norm: 0.9999990974053207, iteration: 114580
loss: 0.9644372463226318,grad_norm: 0.999998912496048, iteration: 114581
loss: 1.0031089782714844,grad_norm: 0.9999991334760423, iteration: 114582
loss: 1.0190829038619995,grad_norm: 0.9999990899837792, iteration: 114583
loss: 0.9952446818351746,grad_norm: 0.9999991503352529, iteration: 114584
loss: 0.975261926651001,grad_norm: 0.8981183091996674, iteration: 114585
loss: 1.0014837980270386,grad_norm: 0.9999990717214348, iteration: 114586
loss: 0.9766939282417297,grad_norm: 0.9999997004941469, iteration: 114587
loss: 0.9599928259849548,grad_norm: 0.9999990376370764, iteration: 114588
loss: 1.0016523599624634,grad_norm: 0.9681707171732193, iteration: 114589
loss: 0.971170961856842,grad_norm: 0.9999990542921747, iteration: 114590
loss: 1.027734637260437,grad_norm: 0.9999991659113708, iteration: 114591
loss: 1.0044108629226685,grad_norm: 0.999999307164583, iteration: 114592
loss: 1.038491129875183,grad_norm: 0.9618188971256574, iteration: 114593
loss: 1.023171067237854,grad_norm: 0.9999990246588181, iteration: 114594
loss: 0.9756133556365967,grad_norm: 0.94786316206931, iteration: 114595
loss: 0.9606572985649109,grad_norm: 0.9999991802052101, iteration: 114596
loss: 0.9909661412239075,grad_norm: 0.9766539041331184, iteration: 114597
loss: 0.9734920263290405,grad_norm: 0.9999991905196453, iteration: 114598
loss: 0.9762977957725525,grad_norm: 0.9146579851359226, iteration: 114599
loss: 0.9749652743339539,grad_norm: 0.8574649978725426, iteration: 114600
loss: 1.0151861906051636,grad_norm: 0.9999990430436749, iteration: 114601
loss: 1.008727788925171,grad_norm: 0.9237298575804852, iteration: 114602
loss: 1.0062742233276367,grad_norm: 0.9999991173504467, iteration: 114603
loss: 1.0145195722579956,grad_norm: 0.9999990836229116, iteration: 114604
loss: 0.9880180358886719,grad_norm: 0.9999990884274569, iteration: 114605
loss: 0.9852741956710815,grad_norm: 0.9999991197948204, iteration: 114606
loss: 0.9986050724983215,grad_norm: 0.9716703433456265, iteration: 114607
loss: 1.0458393096923828,grad_norm: 0.999999147328106, iteration: 114608
loss: 1.0227553844451904,grad_norm: 0.9999990115929309, iteration: 114609
loss: 0.9696561694145203,grad_norm: 0.9999991287209168, iteration: 114610
loss: 0.9836234450340271,grad_norm: 0.9999990361209203, iteration: 114611
loss: 1.0221744775772095,grad_norm: 0.9999992070464281, iteration: 114612
loss: 0.9826971888542175,grad_norm: 0.9999991471714065, iteration: 114613
loss: 1.078721523284912,grad_norm: 0.9999990962719937, iteration: 114614
loss: 1.0021049976348877,grad_norm: 0.9721766701490796, iteration: 114615
loss: 0.9919266700744629,grad_norm: 0.999999175112796, iteration: 114616
loss: 0.9915519952774048,grad_norm: 0.9999992639002135, iteration: 114617
loss: 1.0139533281326294,grad_norm: 0.9999992143143149, iteration: 114618
loss: 0.9511733055114746,grad_norm: 0.9999991099847617, iteration: 114619
loss: 1.0313130617141724,grad_norm: 0.9999992036614732, iteration: 114620
loss: 0.9945405721664429,grad_norm: 0.9749965262992966, iteration: 114621
loss: 1.0548337697982788,grad_norm: 0.999999113961998, iteration: 114622
loss: 0.9859132170677185,grad_norm: 0.9999991862544262, iteration: 114623
loss: 1.0015169382095337,grad_norm: 0.9999991985843901, iteration: 114624
loss: 0.9903599619865417,grad_norm: 0.9464684251651476, iteration: 114625
loss: 0.99556964635849,grad_norm: 0.8016297981329557, iteration: 114626
loss: 1.0314744710922241,grad_norm: 0.9999990694385956, iteration: 114627
loss: 0.9777529835700989,grad_norm: 0.9999991051465746, iteration: 114628
loss: 0.9680427312850952,grad_norm: 0.9317302360528099, iteration: 114629
loss: 1.0128511190414429,grad_norm: 0.9055725017054249, iteration: 114630
loss: 1.0024728775024414,grad_norm: 0.9999988834782787, iteration: 114631
loss: 0.9962417483329773,grad_norm: 0.8342665587306537, iteration: 114632
loss: 0.9797511696815491,grad_norm: 0.9999990723178869, iteration: 114633
loss: 0.9880239963531494,grad_norm: 0.968257286250489, iteration: 114634
loss: 0.9859324097633362,grad_norm: 0.9797416808887883, iteration: 114635
loss: 0.9844316840171814,grad_norm: 0.9999992986623929, iteration: 114636
loss: 1.0752851963043213,grad_norm: 0.9607523763216234, iteration: 114637
loss: 1.0131821632385254,grad_norm: 0.9999992745164092, iteration: 114638
loss: 1.020469307899475,grad_norm: 0.9999992059287274, iteration: 114639
loss: 1.036405324935913,grad_norm: 0.9999995967480989, iteration: 114640
loss: 1.0103248357772827,grad_norm: 0.9999992692196495, iteration: 114641
loss: 0.9654459953308105,grad_norm: 0.999999104669603, iteration: 114642
loss: 1.0167077779769897,grad_norm: 0.9999992694057542, iteration: 114643
loss: 1.028075098991394,grad_norm: 0.9999989806593231, iteration: 114644
loss: 0.9603440761566162,grad_norm: 0.9999990095273361, iteration: 114645
loss: 1.0210180282592773,grad_norm: 0.9999990541949891, iteration: 114646
loss: 1.0153508186340332,grad_norm: 0.9999991933974554, iteration: 114647
loss: 0.9856892228126526,grad_norm: 0.9576254945561202, iteration: 114648
loss: 1.0170387029647827,grad_norm: 0.9999991360216289, iteration: 114649
loss: 0.9868379831314087,grad_norm: 0.9999991029584251, iteration: 114650
loss: 1.0273661613464355,grad_norm: 0.9999990400252521, iteration: 114651
loss: 0.987368106842041,grad_norm: 0.9999993490758083, iteration: 114652
loss: 1.0156794786453247,grad_norm: 0.999999155734028, iteration: 114653
loss: 0.9730734825134277,grad_norm: 0.9999989387134903, iteration: 114654
loss: 1.0091907978057861,grad_norm: 0.9069446251540542, iteration: 114655
loss: 1.0062743425369263,grad_norm: 0.993268371084543, iteration: 114656
loss: 0.9951745271682739,grad_norm: 0.956507104554314, iteration: 114657
loss: 0.9911895990371704,grad_norm: 0.9999992192379649, iteration: 114658
loss: 0.9888600707054138,grad_norm: 0.8549409575755869, iteration: 114659
loss: 0.9800459742546082,grad_norm: 0.9999990408825787, iteration: 114660
loss: 0.9993844628334045,grad_norm: 0.9999990857809312, iteration: 114661
loss: 1.0035264492034912,grad_norm: 0.9808661336027839, iteration: 114662
loss: 0.9810205101966858,grad_norm: 0.9999990983415521, iteration: 114663
loss: 0.981660783290863,grad_norm: 0.999999099329437, iteration: 114664
loss: 0.9951579570770264,grad_norm: 0.9999989852387838, iteration: 114665
loss: 1.016088604927063,grad_norm: 0.9999991742931496, iteration: 114666
loss: 1.0070587396621704,grad_norm: 0.9061835908710751, iteration: 114667
loss: 0.9907562136650085,grad_norm: 0.9999991354076112, iteration: 114668
loss: 0.9691466689109802,grad_norm: 0.9999990630392621, iteration: 114669
loss: 0.9928852915763855,grad_norm: 0.9999991201456018, iteration: 114670
loss: 1.0032602548599243,grad_norm: 0.9999990310552591, iteration: 114671
loss: 1.0345780849456787,grad_norm: 0.999999676478654, iteration: 114672
loss: 0.9714522957801819,grad_norm: 0.9999991624697189, iteration: 114673
loss: 0.9756583571434021,grad_norm: 0.8132712187597574, iteration: 114674
loss: 0.9865705966949463,grad_norm: 0.8355225513113091, iteration: 114675
loss: 1.0211248397827148,grad_norm: 0.9019707338494488, iteration: 114676
loss: 0.9845173954963684,grad_norm: 0.9870501129584709, iteration: 114677
loss: 0.9950831532478333,grad_norm: 0.9999990097754574, iteration: 114678
loss: 1.0021028518676758,grad_norm: 0.911497553009539, iteration: 114679
loss: 0.9937508702278137,grad_norm: 0.9340464099755137, iteration: 114680
loss: 1.0365110635757446,grad_norm: 0.9999993633571223, iteration: 114681
loss: 0.9993128776550293,grad_norm: 0.9999990810344431, iteration: 114682
loss: 0.9843206405639648,grad_norm: 0.9667252380908136, iteration: 114683
loss: 1.0077706575393677,grad_norm: 0.9759577846038877, iteration: 114684
loss: 0.9715306162834167,grad_norm: 0.9999991924290708, iteration: 114685
loss: 1.0814327001571655,grad_norm: 0.9340544310847865, iteration: 114686
loss: 1.007619857788086,grad_norm: 0.8754740561955685, iteration: 114687
loss: 1.034751534461975,grad_norm: 0.9999992532232179, iteration: 114688
loss: 1.0176284313201904,grad_norm: 0.9999991022249046, iteration: 114689
loss: 1.0001745223999023,grad_norm: 0.9999990885066206, iteration: 114690
loss: 1.003739595413208,grad_norm: 0.9999990753615096, iteration: 114691
loss: 1.0312539339065552,grad_norm: 0.999999144765684, iteration: 114692
loss: 1.0166746377944946,grad_norm: 0.99999911769241, iteration: 114693
loss: 0.9982683062553406,grad_norm: 0.9824011345544643, iteration: 114694
loss: 0.9631151556968689,grad_norm: 0.9999991931763964, iteration: 114695
loss: 0.9775710701942444,grad_norm: 0.9765032305659115, iteration: 114696
loss: 0.9965223073959351,grad_norm: 0.9614604853859723, iteration: 114697
loss: 0.9796510338783264,grad_norm: 0.8919438917239146, iteration: 114698
loss: 0.9962074756622314,grad_norm: 0.9357656121293034, iteration: 114699
loss: 1.0927923917770386,grad_norm: 0.9999991883553785, iteration: 114700
loss: 0.9999690055847168,grad_norm: 0.7894476226461669, iteration: 114701
loss: 1.1019117832183838,grad_norm: 0.9379907149152797, iteration: 114702
loss: 1.0187491178512573,grad_norm: 0.9999991066953621, iteration: 114703
loss: 0.9951880574226379,grad_norm: 0.8606502671512175, iteration: 114704
loss: 0.979693591594696,grad_norm: 0.9999991580768041, iteration: 114705
loss: 1.0015740394592285,grad_norm: 0.9999989601861217, iteration: 114706
loss: 0.9915123581886292,grad_norm: 0.8392386853890592, iteration: 114707
loss: 0.9873964190483093,grad_norm: 0.9999991788997226, iteration: 114708
loss: 0.9949222207069397,grad_norm: 0.8906645641518164, iteration: 114709
loss: 1.0002824068069458,grad_norm: 0.9356996563250282, iteration: 114710
loss: 0.9687300324440002,grad_norm: 0.9998903709933504, iteration: 114711
loss: 1.0126166343688965,grad_norm: 0.9613462409834336, iteration: 114712
loss: 1.0359126329421997,grad_norm: 0.9999996796965904, iteration: 114713
loss: 1.0029120445251465,grad_norm: 0.9518738192661572, iteration: 114714
loss: 1.0078365802764893,grad_norm: 0.8782406028193303, iteration: 114715
loss: 0.9589431285858154,grad_norm: 0.99999904900073, iteration: 114716
loss: 0.9941393733024597,grad_norm: 0.9999992497600778, iteration: 114717
loss: 1.0251457691192627,grad_norm: 0.9999993050832207, iteration: 114718
loss: 0.9925615191459656,grad_norm: 0.9999991285164985, iteration: 114719
loss: 0.9970241189002991,grad_norm: 0.9685299271507957, iteration: 114720
loss: 1.011759638786316,grad_norm: 0.9566842928243576, iteration: 114721
loss: 1.032821536064148,grad_norm: 0.9999991492984032, iteration: 114722
loss: 0.9766462445259094,grad_norm: 0.9999991088602894, iteration: 114723
loss: 1.0170655250549316,grad_norm: 0.9999991908647266, iteration: 114724
loss: 1.067131519317627,grad_norm: 0.9999993959276334, iteration: 114725
loss: 0.9779258966445923,grad_norm: 0.985574709935589, iteration: 114726
loss: 1.009661316871643,grad_norm: 0.9999990073712851, iteration: 114727
loss: 0.9957502484321594,grad_norm: 0.9668000546315554, iteration: 114728
loss: 0.9988216757774353,grad_norm: 0.9999991718253901, iteration: 114729
loss: 0.967684805393219,grad_norm: 0.9968568251735836, iteration: 114730
loss: 0.9806972742080688,grad_norm: 0.9741425643203064, iteration: 114731
loss: 1.016667366027832,grad_norm: 0.9999995039887734, iteration: 114732
loss: 0.9704645276069641,grad_norm: 0.9999992224353048, iteration: 114733
loss: 0.9775387048721313,grad_norm: 0.9999995165535371, iteration: 114734
loss: 0.9448270201683044,grad_norm: 0.9999992176981138, iteration: 114735
loss: 1.0467287302017212,grad_norm: 0.9999990813712416, iteration: 114736
loss: 0.9777628779411316,grad_norm: 0.999999194778344, iteration: 114737
loss: 1.0136340856552124,grad_norm: 0.9087974227120863, iteration: 114738
loss: 1.0105631351470947,grad_norm: 0.9323217965077761, iteration: 114739
loss: 1.0273669958114624,grad_norm: 0.9999990728829619, iteration: 114740
loss: 1.0008385181427002,grad_norm: 0.9977807574505497, iteration: 114741
loss: 1.0470762252807617,grad_norm: 0.9999991346564476, iteration: 114742
loss: 1.0092214345932007,grad_norm: 0.9999991411088827, iteration: 114743
loss: 1.0460678339004517,grad_norm: 0.9999992456441448, iteration: 114744
loss: 1.0013623237609863,grad_norm: 0.9999991776609306, iteration: 114745
loss: 0.9973726272583008,grad_norm: 0.8840895891181947, iteration: 114746
loss: 0.9695858359336853,grad_norm: 0.8956133005505008, iteration: 114747
loss: 0.9648845791816711,grad_norm: 0.9999992563240514, iteration: 114748
loss: 0.9609407782554626,grad_norm: 0.9999991618732048, iteration: 114749
loss: 0.9772911071777344,grad_norm: 0.9999992404701151, iteration: 114750
loss: 1.0161763429641724,grad_norm: 0.9999992713502532, iteration: 114751
loss: 0.976614773273468,grad_norm: 0.9498188847287125, iteration: 114752
loss: 0.9934907555580139,grad_norm: 0.9999989535176014, iteration: 114753
loss: 0.9938339591026306,grad_norm: 0.9999991494201179, iteration: 114754
loss: 1.040475845336914,grad_norm: 0.9999990812996145, iteration: 114755
loss: 0.9938834309577942,grad_norm: 0.9527830713624691, iteration: 114756
loss: 0.9776755571365356,grad_norm: 0.9999994346740395, iteration: 114757
loss: 1.0117429494857788,grad_norm: 0.9999990759936346, iteration: 114758
loss: 0.966379702091217,grad_norm: 0.999999019714341, iteration: 114759
loss: 1.03505277633667,grad_norm: 0.9999990879801348, iteration: 114760
loss: 1.016491413116455,grad_norm: 0.9999991068502405, iteration: 114761
loss: 1.0285922288894653,grad_norm: 0.9999991629831032, iteration: 114762
loss: 0.9610309600830078,grad_norm: 0.9999990716640659, iteration: 114763
loss: 1.0109589099884033,grad_norm: 0.9999990314158789, iteration: 114764
loss: 1.0410549640655518,grad_norm: 0.8594760109008441, iteration: 114765
loss: 0.9967157244682312,grad_norm: 0.9961608627057303, iteration: 114766
loss: 1.0164122581481934,grad_norm: 0.9642308503303224, iteration: 114767
loss: 1.029055118560791,grad_norm: 0.987572728302839, iteration: 114768
loss: 0.9571437835693359,grad_norm: 0.9173803924721323, iteration: 114769
loss: 1.0176249742507935,grad_norm: 0.9977991239836527, iteration: 114770
loss: 1.1572794914245605,grad_norm: 0.999999392844652, iteration: 114771
loss: 1.0112239122390747,grad_norm: 0.990798088423971, iteration: 114772
loss: 1.0003236532211304,grad_norm: 0.9999992026278127, iteration: 114773
loss: 1.0076960325241089,grad_norm: 0.9999994226398945, iteration: 114774
loss: 0.9818159937858582,grad_norm: 0.9460778034239048, iteration: 114775
loss: 0.9753698706626892,grad_norm: 0.9798882747431742, iteration: 114776
loss: 0.9960322976112366,grad_norm: 0.9999989972922879, iteration: 114777
loss: 0.995588481426239,grad_norm: 0.9229596217372367, iteration: 114778
loss: 1.0292526483535767,grad_norm: 0.994259109819326, iteration: 114779
loss: 0.9967424869537354,grad_norm: 0.9299054099981408, iteration: 114780
loss: 0.9822788834571838,grad_norm: 0.9999991482455493, iteration: 114781
loss: 1.023619294166565,grad_norm: 0.9636551704461136, iteration: 114782
loss: 1.0790194272994995,grad_norm: 0.9980093389478356, iteration: 114783
loss: 1.0366220474243164,grad_norm: 0.9999992079083103, iteration: 114784
loss: 1.0191247463226318,grad_norm: 0.959708882904475, iteration: 114785
loss: 0.9934126138687134,grad_norm: 0.9999992186718908, iteration: 114786
loss: 0.996228039264679,grad_norm: 0.9213292781801119, iteration: 114787
loss: 1.0090410709381104,grad_norm: 0.9999990240196175, iteration: 114788
loss: 1.0228424072265625,grad_norm: 0.9999991490250135, iteration: 114789
loss: 0.9833968877792358,grad_norm: 0.922383052989196, iteration: 114790
loss: 1.0283284187316895,grad_norm: 0.9917397972879926, iteration: 114791
loss: 0.9971486926078796,grad_norm: 0.9999992365993154, iteration: 114792
loss: 1.0077770948410034,grad_norm: 0.9001459888129888, iteration: 114793
loss: 1.026063084602356,grad_norm: 0.9999993873078576, iteration: 114794
loss: 0.9813622236251831,grad_norm: 0.8698124567732937, iteration: 114795
loss: 1.0006424188613892,grad_norm: 0.9999991331640045, iteration: 114796
loss: 0.9831888675689697,grad_norm: 0.9999991277145369, iteration: 114797
loss: 1.0034977197647095,grad_norm: 0.9999991611473807, iteration: 114798
loss: 0.9834921360015869,grad_norm: 0.8652532783626856, iteration: 114799
loss: 1.0091336965560913,grad_norm: 0.8138410494664648, iteration: 114800
loss: 1.040643334388733,grad_norm: 0.8486372616997409, iteration: 114801
loss: 1.017334222793579,grad_norm: 0.9999993242469136, iteration: 114802
loss: 1.0461032390594482,grad_norm: 0.9999997347835354, iteration: 114803
loss: 0.9970853924751282,grad_norm: 0.9999994754528635, iteration: 114804
loss: 1.0692086219787598,grad_norm: 0.9999993076957728, iteration: 114805
loss: 1.0058109760284424,grad_norm: 0.9123384751144596, iteration: 114806
loss: 0.9980195760726929,grad_norm: 0.9999990618966865, iteration: 114807
loss: 0.9576327800750732,grad_norm: 0.9999990171795701, iteration: 114808
loss: 0.982994794845581,grad_norm: 0.9505766532020685, iteration: 114809
loss: 1.0195540189743042,grad_norm: 0.9573167001209159, iteration: 114810
loss: 0.9917433261871338,grad_norm: 0.9999990170638897, iteration: 114811
loss: 1.069010615348816,grad_norm: 0.9999997475554901, iteration: 114812
loss: 1.0264906883239746,grad_norm: 0.9999990560075649, iteration: 114813
loss: 1.0114772319793701,grad_norm: 0.9999991774377731, iteration: 114814
loss: 1.0010005235671997,grad_norm: 0.9454970735544194, iteration: 114815
loss: 1.0140455961227417,grad_norm: 0.9999992207570195, iteration: 114816
loss: 1.008838176727295,grad_norm: 0.9999992201910322, iteration: 114817
loss: 1.005264401435852,grad_norm: 0.946232457160592, iteration: 114818
loss: 1.0256013870239258,grad_norm: 0.9999991069934345, iteration: 114819
loss: 1.0381754636764526,grad_norm: 0.9999999073764174, iteration: 114820
loss: 1.0252013206481934,grad_norm: 0.9999990691307088, iteration: 114821
loss: 1.0030803680419922,grad_norm: 0.9322929905109938, iteration: 114822
loss: 1.0200875997543335,grad_norm: 0.9999991532772885, iteration: 114823
loss: 1.030979037284851,grad_norm: 0.9999990931522611, iteration: 114824
loss: 1.011892557144165,grad_norm: 0.9999997670330844, iteration: 114825
loss: 1.0026450157165527,grad_norm: 0.9916292063357044, iteration: 114826
loss: 1.0723674297332764,grad_norm: 0.9999992351568128, iteration: 114827
loss: 0.991797149181366,grad_norm: 0.9999991817736099, iteration: 114828
loss: 1.0262444019317627,grad_norm: 0.9999994669176417, iteration: 114829
loss: 0.9838123917579651,grad_norm: 0.9999992532758425, iteration: 114830
loss: 0.9969698786735535,grad_norm: 0.9999993240301475, iteration: 114831
loss: 0.9993292689323425,grad_norm: 0.9999991101377587, iteration: 114832
loss: 0.9982078671455383,grad_norm: 0.9999991592134616, iteration: 114833
loss: 1.002750039100647,grad_norm: 0.8673016497055791, iteration: 114834
loss: 0.9826650023460388,grad_norm: 0.9999992117483321, iteration: 114835
loss: 0.9848647713661194,grad_norm: 0.96034086650565, iteration: 114836
loss: 0.9505908489227295,grad_norm: 0.9999998426005767, iteration: 114837
loss: 1.0194635391235352,grad_norm: 0.8175558049935833, iteration: 114838
loss: 1.0149810314178467,grad_norm: 0.9467993198503196, iteration: 114839
loss: 0.9984273314476013,grad_norm: 0.9999994360610744, iteration: 114840
loss: 0.9372202157974243,grad_norm: 0.9999990361358941, iteration: 114841
loss: 0.9563254714012146,grad_norm: 0.9188420612817705, iteration: 114842
loss: 1.0234498977661133,grad_norm: 0.9999992342228279, iteration: 114843
loss: 1.0065962076187134,grad_norm: 0.9999991459648448, iteration: 114844
loss: 1.010704755783081,grad_norm: 0.9934296353960697, iteration: 114845
loss: 0.9877445101737976,grad_norm: 0.9999992114957367, iteration: 114846
loss: 0.9781978726387024,grad_norm: 0.9999989977266467, iteration: 114847
loss: 0.9825068712234497,grad_norm: 0.9999991218010625, iteration: 114848
loss: 1.0195828676223755,grad_norm: 0.9999990576294916, iteration: 114849
loss: 0.9890328645706177,grad_norm: 0.999999227094823, iteration: 114850
loss: 0.9920538663864136,grad_norm: 0.9999993015015392, iteration: 114851
loss: 1.0092251300811768,grad_norm: 0.9999991518729361, iteration: 114852
loss: 0.9963231086730957,grad_norm: 0.9999992773914016, iteration: 114853
loss: 0.983421802520752,grad_norm: 0.9245005762164128, iteration: 114854
loss: 1.0182753801345825,grad_norm: 0.981972880808485, iteration: 114855
loss: 1.0206249952316284,grad_norm: 0.9999997756339223, iteration: 114856
loss: 0.9887340664863586,grad_norm: 0.9999992463262299, iteration: 114857
loss: 1.0176528692245483,grad_norm: 0.9999995309157582, iteration: 114858
loss: 1.0273607969284058,grad_norm: 0.9999989250391726, iteration: 114859
loss: 0.9948203563690186,grad_norm: 0.9999991800212992, iteration: 114860
loss: 1.022678256034851,grad_norm: 0.9999992440246551, iteration: 114861
loss: 0.9914526343345642,grad_norm: 0.9999992217617781, iteration: 114862
loss: 0.963628888130188,grad_norm: 0.9999992862041106, iteration: 114863
loss: 1.0387612581253052,grad_norm: 0.8550554596119593, iteration: 114864
loss: 0.9764167666435242,grad_norm: 0.9999988916121099, iteration: 114865
loss: 0.9703053832054138,grad_norm: 0.9999991609357434, iteration: 114866
loss: 1.0072349309921265,grad_norm: 0.9199012957247003, iteration: 114867
loss: 1.0204880237579346,grad_norm: 0.9999990890601446, iteration: 114868
loss: 0.9781320095062256,grad_norm: 0.9999989966773689, iteration: 114869
loss: 0.997033953666687,grad_norm: 0.8910057831470869, iteration: 114870
loss: 0.982894778251648,grad_norm: 0.987041217598965, iteration: 114871
loss: 1.0327517986297607,grad_norm: 0.9699944967583171, iteration: 114872
loss: 0.9647145867347717,grad_norm: 0.9107031981629831, iteration: 114873
loss: 0.9979881644248962,grad_norm: 0.8003244145003475, iteration: 114874
loss: 1.0121681690216064,grad_norm: 0.9999995922027005, iteration: 114875
loss: 0.9805781841278076,grad_norm: 0.9999991508033218, iteration: 114876
loss: 0.9557872414588928,grad_norm: 0.9999992143081063, iteration: 114877
loss: 0.9928466081619263,grad_norm: 0.9999992863576908, iteration: 114878
loss: 1.1296242475509644,grad_norm: 0.9999999085542415, iteration: 114879
loss: 0.9666334390640259,grad_norm: 0.9999991265522721, iteration: 114880
loss: 0.9661566019058228,grad_norm: 0.9999992172664652, iteration: 114881
loss: 1.0060795545578003,grad_norm: 0.9999990389507112, iteration: 114882
loss: 1.0293999910354614,grad_norm: 0.999999178402901, iteration: 114883
loss: 1.016213059425354,grad_norm: 0.9999998979098454, iteration: 114884
loss: 1.115394115447998,grad_norm: 0.9999996277563603, iteration: 114885
loss: 1.0406080484390259,grad_norm: 0.9999992264559194, iteration: 114886
loss: 1.0047717094421387,grad_norm: 0.9736307568020823, iteration: 114887
loss: 1.022141933441162,grad_norm: 0.8720237093127657, iteration: 114888
loss: 1.0078456401824951,grad_norm: 0.9999991760358989, iteration: 114889
loss: 0.9709694981575012,grad_norm: 0.9858111010019155, iteration: 114890
loss: 0.9880486130714417,grad_norm: 0.9999992217557729, iteration: 114891
loss: 1.0177699327468872,grad_norm: 0.9999990611495425, iteration: 114892
loss: 0.9791573286056519,grad_norm: 0.9227802727736457, iteration: 114893
loss: 1.026490569114685,grad_norm: 0.999999080122387, iteration: 114894
loss: 0.9989948272705078,grad_norm: 0.9299823160363688, iteration: 114895
loss: 0.9861064553260803,grad_norm: 0.9999992195632883, iteration: 114896
loss: 0.9959806799888611,grad_norm: 0.9319365620435272, iteration: 114897
loss: 0.9923486709594727,grad_norm: 0.9026047119373372, iteration: 114898
loss: 1.0115140676498413,grad_norm: 0.8661288030828083, iteration: 114899
loss: 0.9993144869804382,grad_norm: 0.9439285805441061, iteration: 114900
loss: 0.9892700910568237,grad_norm: 0.999999126398207, iteration: 114901
loss: 1.0186511278152466,grad_norm: 0.9999990826500701, iteration: 114902
loss: 0.9844887256622314,grad_norm: 0.8732243584661056, iteration: 114903
loss: 1.0089366436004639,grad_norm: 0.9999991457351696, iteration: 114904
loss: 0.9991209506988525,grad_norm: 0.9314587972266738, iteration: 114905
loss: 1.023349404335022,grad_norm: 0.9999993799877577, iteration: 114906
loss: 0.9930073618888855,grad_norm: 0.9999990493566033, iteration: 114907
loss: 0.9645072817802429,grad_norm: 0.9999991969607271, iteration: 114908
loss: 0.9912722706794739,grad_norm: 0.8576694853291899, iteration: 114909
loss: 1.0547252893447876,grad_norm: 0.9999996100001649, iteration: 114910
loss: 0.9878912568092346,grad_norm: 0.9999990632510827, iteration: 114911
loss: 1.0154378414154053,grad_norm: 0.9999991241214339, iteration: 114912
loss: 1.0562032461166382,grad_norm: 0.9999991687068998, iteration: 114913
loss: 0.974159300327301,grad_norm: 0.9999991837627551, iteration: 114914
loss: 1.0812908411026,grad_norm: 0.9999998730772747, iteration: 114915
loss: 1.0179744958877563,grad_norm: 0.9999990378267386, iteration: 114916
loss: 1.0194886922836304,grad_norm: 0.9584434304700552, iteration: 114917
loss: 1.0147114992141724,grad_norm: 0.9999991290941045, iteration: 114918
loss: 1.0009506940841675,grad_norm: 0.9999990986038557, iteration: 114919
loss: 1.026168704032898,grad_norm: 0.9999993489565401, iteration: 114920
loss: 1.0321780443191528,grad_norm: 0.9985674482595731, iteration: 114921
loss: 1.0520387887954712,grad_norm: 0.9999990229742253, iteration: 114922
loss: 0.9881662726402283,grad_norm: 0.9999992505379127, iteration: 114923
loss: 0.9961055517196655,grad_norm: 0.9570282410554515, iteration: 114924
loss: 0.9959807991981506,grad_norm: 0.9999992836563273, iteration: 114925
loss: 1.0383654832839966,grad_norm: 0.9999992866015367, iteration: 114926
loss: 1.0179097652435303,grad_norm: 0.9999990102068869, iteration: 114927
loss: 1.0118303298950195,grad_norm: 0.9999989446548005, iteration: 114928
loss: 1.0208660364151,grad_norm: 0.8748051793934064, iteration: 114929
loss: 0.986578643321991,grad_norm: 0.9999990846848498, iteration: 114930
loss: 1.0190967321395874,grad_norm: 0.9999991191887552, iteration: 114931
loss: 0.9843676090240479,grad_norm: 0.9679704674070717, iteration: 114932
loss: 1.0351368188858032,grad_norm: 0.9999990784698326, iteration: 114933
loss: 0.984309732913971,grad_norm: 0.9878831287981267, iteration: 114934
loss: 1.025981068611145,grad_norm: 0.9999991929244767, iteration: 114935
loss: 0.9949240684509277,grad_norm: 0.9919278522410134, iteration: 114936
loss: 0.9935572147369385,grad_norm: 0.892571300881018, iteration: 114937
loss: 0.9767628908157349,grad_norm: 0.9999992231218422, iteration: 114938
loss: 0.9777601361274719,grad_norm: 0.9763450100388356, iteration: 114939
loss: 0.9979735016822815,grad_norm: 0.9999990513579122, iteration: 114940
loss: 0.9942950010299683,grad_norm: 0.9598103684661964, iteration: 114941
loss: 0.9826340675354004,grad_norm: 0.999999047253896, iteration: 114942
loss: 1.0017359256744385,grad_norm: 0.9999989404867934, iteration: 114943
loss: 1.0188955068588257,grad_norm: 0.9876108083160333, iteration: 114944
loss: 0.9802435040473938,grad_norm: 0.9999991600438715, iteration: 114945
loss: 1.014492392539978,grad_norm: 0.9368853259375344, iteration: 114946
loss: 0.9487969279289246,grad_norm: 0.999999173403298, iteration: 114947
loss: 0.9876353740692139,grad_norm: 0.9206930670886446, iteration: 114948
loss: 1.001575231552124,grad_norm: 0.7827113880263207, iteration: 114949
loss: 0.9745106101036072,grad_norm: 0.9999995595339409, iteration: 114950
loss: 1.0812788009643555,grad_norm: 0.9413507520049933, iteration: 114951
loss: 1.007468581199646,grad_norm: 0.9902081119467823, iteration: 114952
loss: 0.9777418971061707,grad_norm: 0.9999989643826359, iteration: 114953
loss: 1.0064250230789185,grad_norm: 0.9913440074141934, iteration: 114954
loss: 0.9810853004455566,grad_norm: 0.9999990837720851, iteration: 114955
loss: 1.0005022287368774,grad_norm: 0.9999991799443048, iteration: 114956
loss: 1.0712577104568481,grad_norm: 0.999999031803826, iteration: 114957
loss: 1.0229320526123047,grad_norm: 0.9999992402431668, iteration: 114958
loss: 1.031853199005127,grad_norm: 0.9999992415680893, iteration: 114959
loss: 1.0259813070297241,grad_norm: 0.9999993095504502, iteration: 114960
loss: 1.020401120185852,grad_norm: 0.999999192313349, iteration: 114961
loss: 0.9988066554069519,grad_norm: 0.9999991882540609, iteration: 114962
loss: 1.0199558734893799,grad_norm: 0.9999993482533103, iteration: 114963
loss: 0.979182779788971,grad_norm: 0.9999991660010059, iteration: 114964
loss: 1.0059746503829956,grad_norm: 0.8939495581809199, iteration: 114965
loss: 0.9834807515144348,grad_norm: 0.9999989858294794, iteration: 114966
loss: 0.9553003907203674,grad_norm: 0.9999991919839426, iteration: 114967
loss: 1.010449767112732,grad_norm: 0.924193268853925, iteration: 114968
loss: 0.9943404197692871,grad_norm: 0.9999990296416101, iteration: 114969
loss: 0.9947952032089233,grad_norm: 0.9683438692361064, iteration: 114970
loss: 0.9971159100532532,grad_norm: 0.9999991632350417, iteration: 114971
loss: 0.984223484992981,grad_norm: 0.9999991077425436, iteration: 114972
loss: 0.9961868524551392,grad_norm: 0.9999990105628356, iteration: 114973
loss: 1.0333147048950195,grad_norm: 0.9999995012649427, iteration: 114974
loss: 1.0169013738632202,grad_norm: 0.9999990036393289, iteration: 114975
loss: 1.0084788799285889,grad_norm: 0.9822953672701088, iteration: 114976
loss: 1.0017528533935547,grad_norm: 0.9999991093511604, iteration: 114977
loss: 1.0204092264175415,grad_norm: 0.9999992004971225, iteration: 114978
loss: 0.9853383898735046,grad_norm: 0.9508984556454892, iteration: 114979
loss: 1.0342124700546265,grad_norm: 0.999999403161643, iteration: 114980
loss: 1.0146633386611938,grad_norm: 0.9999992434130072, iteration: 114981
loss: 1.0129023790359497,grad_norm: 0.9664716656768388, iteration: 114982
loss: 0.9849063754081726,grad_norm: 0.999999275504637, iteration: 114983
loss: 1.0009845495224,grad_norm: 0.9734133625474949, iteration: 114984
loss: 1.0016344785690308,grad_norm: 0.9999989763832638, iteration: 114985
loss: 0.98576420545578,grad_norm: 0.9623457947431119, iteration: 114986
loss: 1.0035874843597412,grad_norm: 0.920265110814181, iteration: 114987
loss: 0.9793612957000732,grad_norm: 0.9889008986705246, iteration: 114988
loss: 0.9974241852760315,grad_norm: 0.9999991213969571, iteration: 114989
loss: 1.0077972412109375,grad_norm: 0.9913674721556854, iteration: 114990
loss: 0.9741542935371399,grad_norm: 0.9999991464225174, iteration: 114991
loss: 0.9798862934112549,grad_norm: 0.9999990047747146, iteration: 114992
loss: 0.9760948419570923,grad_norm: 0.9422841433914245, iteration: 114993
loss: 0.9691435694694519,grad_norm: 0.9999992428784183, iteration: 114994
loss: 0.9862820506095886,grad_norm: 0.9999991080052135, iteration: 114995
loss: 0.9824454188346863,grad_norm: 0.9410321141991295, iteration: 114996
loss: 1.0575274229049683,grad_norm: 0.9999991726935223, iteration: 114997
loss: 1.0237866640090942,grad_norm: 0.9999989863624834, iteration: 114998
loss: 0.9753333926200867,grad_norm: 0.984646349018239, iteration: 114999
loss: 0.9792398810386658,grad_norm: 0.9999991996298274, iteration: 115000
loss: 0.9922718405723572,grad_norm: 0.9999991323486506, iteration: 115001
loss: 1.0164308547973633,grad_norm: 0.9588696850693421, iteration: 115002
loss: 0.9824406504631042,grad_norm: 0.8773997171856535, iteration: 115003
loss: 1.0008183717727661,grad_norm: 0.9999991576943612, iteration: 115004
loss: 0.9833949208259583,grad_norm: 0.8305220980574238, iteration: 115005
loss: 0.9949300289154053,grad_norm: 0.9956134897156773, iteration: 115006
loss: 1.0106712579727173,grad_norm: 0.9716426824610775, iteration: 115007
loss: 0.9952899217605591,grad_norm: 0.9648814163177215, iteration: 115008
loss: 0.9507172107696533,grad_norm: 0.999999051562436, iteration: 115009
loss: 1.0946364402770996,grad_norm: 0.9999994095305542, iteration: 115010
loss: 1.0130393505096436,grad_norm: 0.9141156335828526, iteration: 115011
loss: 1.005127191543579,grad_norm: 0.9999996808401217, iteration: 115012
loss: 1.0071542263031006,grad_norm: 0.9268394329682668, iteration: 115013
loss: 0.9767805933952332,grad_norm: 0.98382667245857, iteration: 115014
loss: 0.9594787359237671,grad_norm: 0.8704622971050783, iteration: 115015
loss: 1.025864839553833,grad_norm: 0.9999991890405737, iteration: 115016
loss: 1.0661200284957886,grad_norm: 0.9999998368672772, iteration: 115017
loss: 0.9803491234779358,grad_norm: 0.9942371940965687, iteration: 115018
loss: 0.9827356934547424,grad_norm: 0.9999990883969516, iteration: 115019
loss: 1.0059040784835815,grad_norm: 0.9999992743831524, iteration: 115020
loss: 1.0034303665161133,grad_norm: 0.9999991845410914, iteration: 115021
loss: 1.0323163270950317,grad_norm: 0.9999990418423144, iteration: 115022
loss: 1.0627975463867188,grad_norm: 0.9525621573052387, iteration: 115023
loss: 1.0098353624343872,grad_norm: 0.9999991571174583, iteration: 115024
loss: 1.0216822624206543,grad_norm: 0.9999989818415822, iteration: 115025
loss: 1.0099343061447144,grad_norm: 0.9999991327321655, iteration: 115026
loss: 1.0120943784713745,grad_norm: 0.879123308072283, iteration: 115027
loss: 0.9800792336463928,grad_norm: 0.9999989534951276, iteration: 115028
loss: 0.9974787831306458,grad_norm: 0.9999992944146138, iteration: 115029
loss: 0.9878263473510742,grad_norm: 0.8601243882094448, iteration: 115030
loss: 1.0101134777069092,grad_norm: 0.999999191799625, iteration: 115031
loss: 0.9886264204978943,grad_norm: 0.9999990925840081, iteration: 115032
loss: 0.9964473247528076,grad_norm: 0.9999991242921512, iteration: 115033
loss: 0.9924107193946838,grad_norm: 0.9999989836697896, iteration: 115034
loss: 0.9728817939758301,grad_norm: 0.9999990428574386, iteration: 115035
loss: 1.0053948163986206,grad_norm: 0.9999990571266754, iteration: 115036
loss: 1.0339077711105347,grad_norm: 0.9999992913495936, iteration: 115037
loss: 0.9988235831260681,grad_norm: 0.9999992675521057, iteration: 115038
loss: 0.9700028896331787,grad_norm: 0.9428409574405118, iteration: 115039
loss: 0.9979585409164429,grad_norm: 0.9604547974269749, iteration: 115040
loss: 1.0138037204742432,grad_norm: 0.9999994001859408, iteration: 115041
loss: 1.0011659860610962,grad_norm: 0.9999991920457866, iteration: 115042
loss: 0.9772509336471558,grad_norm: 0.9999991985679303, iteration: 115043
loss: 0.9904171228408813,grad_norm: 0.9968471993371033, iteration: 115044
loss: 1.0041780471801758,grad_norm: 0.9999990862091754, iteration: 115045
loss: 0.9623045921325684,grad_norm: 0.8413627425623734, iteration: 115046
loss: 1.0343352556228638,grad_norm: 0.9148284161883893, iteration: 115047
loss: 1.0106202363967896,grad_norm: 0.9634045464165899, iteration: 115048
loss: 1.038952112197876,grad_norm: 0.9999991557705591, iteration: 115049
loss: 0.9475157260894775,grad_norm: 0.9999992292292521, iteration: 115050
loss: 0.9907159805297852,grad_norm: 0.9631654373715993, iteration: 115051
loss: 0.9720302224159241,grad_norm: 0.9999990904343375, iteration: 115052
loss: 0.9800218939781189,grad_norm: 0.9999990223203862, iteration: 115053
loss: 0.9563393592834473,grad_norm: 0.9999992002810278, iteration: 115054
loss: 0.9771174788475037,grad_norm: 0.999999192761749, iteration: 115055
loss: 0.994560182094574,grad_norm: 0.9999991182112048, iteration: 115056
loss: 1.0112892389297485,grad_norm: 0.9688978001675497, iteration: 115057
loss: 1.0399858951568604,grad_norm: 0.9999991611089846, iteration: 115058
loss: 1.0126512050628662,grad_norm: 0.9999990496348095, iteration: 115059
loss: 0.9997779130935669,grad_norm: 0.9855744157429683, iteration: 115060
loss: 1.0108455419540405,grad_norm: 0.9999990688775192, iteration: 115061
loss: 0.9688546657562256,grad_norm: 0.9999990660583926, iteration: 115062
loss: 1.03025221824646,grad_norm: 0.9999990714728175, iteration: 115063
loss: 0.9918230772018433,grad_norm: 0.9999990025204795, iteration: 115064
loss: 1.0231748819351196,grad_norm: 0.9999989351126387, iteration: 115065
loss: 0.9640920162200928,grad_norm: 0.9999990702898235, iteration: 115066
loss: 1.0167059898376465,grad_norm: 0.8988088590768801, iteration: 115067
loss: 1.0180025100708008,grad_norm: 0.9999991811283121, iteration: 115068
loss: 0.9992625713348389,grad_norm: 0.9999990773841805, iteration: 115069
loss: 0.9988911151885986,grad_norm: 0.999999020241297, iteration: 115070
loss: 0.9743216037750244,grad_norm: 0.9999992632392434, iteration: 115071
loss: 1.0127437114715576,grad_norm: 0.8338521640756301, iteration: 115072
loss: 1.0331130027770996,grad_norm: 0.9999991546074593, iteration: 115073
loss: 1.0487011671066284,grad_norm: 0.9999992589953524, iteration: 115074
loss: 0.9864752888679504,grad_norm: 0.9875071442440879, iteration: 115075
loss: 0.9835538864135742,grad_norm: 0.9999992052452914, iteration: 115076
loss: 0.9893340468406677,grad_norm: 0.9999993255703735, iteration: 115077
loss: 1.0124638080596924,grad_norm: 0.9999991506054617, iteration: 115078
loss: 1.0003066062927246,grad_norm: 0.9999993175301791, iteration: 115079
loss: 1.0141767263412476,grad_norm: 0.9999990357830904, iteration: 115080
loss: 1.0697953701019287,grad_norm: 0.9999996050743591, iteration: 115081
loss: 1.001853346824646,grad_norm: 0.9469251509665583, iteration: 115082
loss: 0.9801715016365051,grad_norm: 0.9999992774775063, iteration: 115083
loss: 0.9847156405448914,grad_norm: 0.9661430797324647, iteration: 115084
loss: 0.9709893465042114,grad_norm: 0.9999991861954225, iteration: 115085
loss: 0.9829564690589905,grad_norm: 0.9999991850649613, iteration: 115086
loss: 1.0556002855300903,grad_norm: 0.9999991483114562, iteration: 115087
loss: 1.0758966207504272,grad_norm: 0.9648849990245608, iteration: 115088
loss: 0.9935666918754578,grad_norm: 0.9545844822360211, iteration: 115089
loss: 0.9864089488983154,grad_norm: 0.9976894335227713, iteration: 115090
loss: 1.0094882249832153,grad_norm: 0.935334338757511, iteration: 115091
loss: 1.0488213300704956,grad_norm: 0.9999990405951898, iteration: 115092
loss: 0.9909021854400635,grad_norm: 0.9999990994276429, iteration: 115093
loss: 1.0042418241500854,grad_norm: 0.9999990424193813, iteration: 115094
loss: 1.0282670259475708,grad_norm: 0.8637122186009909, iteration: 115095
loss: 1.002739667892456,grad_norm: 0.9999993316328134, iteration: 115096
loss: 0.9894425272941589,grad_norm: 0.999999185537302, iteration: 115097
loss: 0.9756017923355103,grad_norm: 0.9999991045301037, iteration: 115098
loss: 0.9724061489105225,grad_norm: 0.9999990586266737, iteration: 115099
loss: 0.9900301694869995,grad_norm: 0.9999992130894685, iteration: 115100
loss: 1.0021625757217407,grad_norm: 0.9022857089234798, iteration: 115101
loss: 0.9810082316398621,grad_norm: 0.999999064006295, iteration: 115102
loss: 0.9890738129615784,grad_norm: 0.9327106309605767, iteration: 115103
loss: 0.9779571294784546,grad_norm: 0.9999992719888231, iteration: 115104
loss: 0.9990314245223999,grad_norm: 0.983450697708375, iteration: 115105
loss: 1.0228312015533447,grad_norm: 0.999999149066068, iteration: 115106
loss: 0.9727563261985779,grad_norm: 0.9999991818785625, iteration: 115107
loss: 0.9919156432151794,grad_norm: 0.9999990875445276, iteration: 115108
loss: 1.0145033597946167,grad_norm: 0.9999993932846776, iteration: 115109
loss: 1.0281336307525635,grad_norm: 0.9999991430808886, iteration: 115110
loss: 0.9924026727676392,grad_norm: 0.9920103157896522, iteration: 115111
loss: 0.9858034253120422,grad_norm: 0.9999991787129706, iteration: 115112
loss: 1.0147401094436646,grad_norm: 0.9999991513323971, iteration: 115113
loss: 0.9942649602890015,grad_norm: 0.8146605047345691, iteration: 115114
loss: 1.0011868476867676,grad_norm: 0.9999992059426281, iteration: 115115
loss: 1.0041484832763672,grad_norm: 0.8945096362506874, iteration: 115116
loss: 1.021196961402893,grad_norm: 0.9113013376584378, iteration: 115117
loss: 1.0016705989837646,grad_norm: 0.9999993048928205, iteration: 115118
loss: 0.9893817901611328,grad_norm: 0.9999992056372365, iteration: 115119
loss: 1.0185266733169556,grad_norm: 0.9237366937242536, iteration: 115120
loss: 0.9880213737487793,grad_norm: 0.9999991090325284, iteration: 115121
loss: 0.9894670248031616,grad_norm: 0.9664613688798374, iteration: 115122
loss: 0.9855826497077942,grad_norm: 0.9999993706440311, iteration: 115123
loss: 0.9800140857696533,grad_norm: 0.9999993067152443, iteration: 115124
loss: 1.001757025718689,grad_norm: 0.9980463454970275, iteration: 115125
loss: 0.9662164449691772,grad_norm: 0.9076059996325391, iteration: 115126
loss: 1.0043584108352661,grad_norm: 0.8332822674229907, iteration: 115127
loss: 0.9912424683570862,grad_norm: 0.9999990586415889, iteration: 115128
loss: 1.0323385000228882,grad_norm: 0.9999998394356042, iteration: 115129
loss: 0.986750602722168,grad_norm: 0.9999991887211876, iteration: 115130
loss: 0.9836001396179199,grad_norm: 0.999999161138148, iteration: 115131
loss: 0.9754678010940552,grad_norm: 0.9999993365551062, iteration: 115132
loss: 0.9982607960700989,grad_norm: 0.8996661874370901, iteration: 115133
loss: 0.9795773029327393,grad_norm: 0.9933422501871946, iteration: 115134
loss: 0.940883219242096,grad_norm: 0.9999992159966914, iteration: 115135
loss: 0.9865146279335022,grad_norm: 0.9215686892130194, iteration: 115136
loss: 1.026849389076233,grad_norm: 0.9999992173366592, iteration: 115137
loss: 0.9855102300643921,grad_norm: 0.9999990045121671, iteration: 115138
loss: 1.0410926342010498,grad_norm: 0.9999991794752545, iteration: 115139
loss: 1.0467185974121094,grad_norm: 0.99999919286986, iteration: 115140
loss: 1.033149003982544,grad_norm: 0.9339442995425088, iteration: 115141
loss: 0.9797961115837097,grad_norm: 0.9793882151081424, iteration: 115142
loss: 1.0479201078414917,grad_norm: 0.9999991230747798, iteration: 115143
loss: 0.9755732417106628,grad_norm: 0.9999992273839422, iteration: 115144
loss: 0.9693689942359924,grad_norm: 0.9999990561599164, iteration: 115145
loss: 0.9657500386238098,grad_norm: 0.9999992011627168, iteration: 115146
loss: 1.003517746925354,grad_norm: 0.9999991215810158, iteration: 115147
loss: 0.9940943121910095,grad_norm: 0.9999991554882208, iteration: 115148
loss: 1.0056716203689575,grad_norm: 0.878957738293781, iteration: 115149
loss: 1.0236467123031616,grad_norm: 0.9766780310319103, iteration: 115150
loss: 0.9840414524078369,grad_norm: 0.923667009091904, iteration: 115151
loss: 1.012658953666687,grad_norm: 0.999999012231795, iteration: 115152
loss: 1.0153868198394775,grad_norm: 0.9358351948021287, iteration: 115153
loss: 1.0112216472625732,grad_norm: 0.9999990480955869, iteration: 115154
loss: 0.9752801656723022,grad_norm: 0.9999992416430005, iteration: 115155
loss: 0.9837638735771179,grad_norm: 0.9999989203465175, iteration: 115156
loss: 1.0214248895645142,grad_norm: 0.8870733897959749, iteration: 115157
loss: 1.0131196975708008,grad_norm: 0.9999990697100609, iteration: 115158
loss: 0.99947190284729,grad_norm: 0.9826158046807044, iteration: 115159
loss: 0.986414909362793,grad_norm: 0.9133965843827827, iteration: 115160
loss: 0.9579399228096008,grad_norm: 0.9999989263649648, iteration: 115161
loss: 1.0141046047210693,grad_norm: 0.9999990203756114, iteration: 115162
loss: 1.0209375619888306,grad_norm: 0.999999347595458, iteration: 115163
loss: 0.9975799322128296,grad_norm: 0.9999991291020012, iteration: 115164
loss: 1.016024112701416,grad_norm: 0.9999993087533273, iteration: 115165
loss: 0.9804190993309021,grad_norm: 0.9999989967717969, iteration: 115166
loss: 0.9752293825149536,grad_norm: 0.9999991066603492, iteration: 115167
loss: 1.0439242124557495,grad_norm: 0.9999991874553513, iteration: 115168
loss: 0.9832841157913208,grad_norm: 0.9999992078792898, iteration: 115169
loss: 0.9830062985420227,grad_norm: 0.9351678051364495, iteration: 115170
loss: 0.9315249919891357,grad_norm: 0.9999990461251387, iteration: 115171
loss: 1.0197985172271729,grad_norm: 0.9982068934223339, iteration: 115172
loss: 0.997677743434906,grad_norm: 0.9999996076939125, iteration: 115173
loss: 0.9971463680267334,grad_norm: 0.8725515974678281, iteration: 115174
loss: 0.9940474033355713,grad_norm: 0.9999992924887277, iteration: 115175
loss: 1.0242992639541626,grad_norm: 0.9999997061796034, iteration: 115176
loss: 0.9879828095436096,grad_norm: 0.7986491160988817, iteration: 115177
loss: 0.9649601578712463,grad_norm: 0.9939547954112825, iteration: 115178
loss: 0.9959619045257568,grad_norm: 0.9597935203019203, iteration: 115179
loss: 0.9998412132263184,grad_norm: 0.9199445834411802, iteration: 115180
loss: 1.0190762281417847,grad_norm: 0.9843435292281736, iteration: 115181
loss: 1.0320816040039062,grad_norm: 0.9999991991604992, iteration: 115182
loss: 1.0130165815353394,grad_norm: 0.9665380654494193, iteration: 115183
loss: 0.9804176092147827,grad_norm: 0.94889269323814, iteration: 115184
loss: 1.0156733989715576,grad_norm: 0.9999990877511928, iteration: 115185
loss: 0.9583394527435303,grad_norm: 0.9999992926882035, iteration: 115186
loss: 1.0103435516357422,grad_norm: 0.9999996539661025, iteration: 115187
loss: 1.0278880596160889,grad_norm: 0.9999990378505464, iteration: 115188
loss: 1.0075773000717163,grad_norm: 0.8258025843242213, iteration: 115189
loss: 0.9713611006736755,grad_norm: 0.9999991321910421, iteration: 115190
loss: 0.9715417623519897,grad_norm: 0.9452537501687786, iteration: 115191
loss: 0.9936584234237671,grad_norm: 0.9999991088670441, iteration: 115192
loss: 0.9893366694450378,grad_norm: 0.9999991219395757, iteration: 115193
loss: 0.9956992268562317,grad_norm: 0.9999989843128722, iteration: 115194
loss: 0.9569071531295776,grad_norm: 0.9157152183637703, iteration: 115195
loss: 1.0176273584365845,grad_norm: 0.999999160079092, iteration: 115196
loss: 0.9855977892875671,grad_norm: 0.8662371175697068, iteration: 115197
loss: 0.9929766058921814,grad_norm: 0.9999992757049627, iteration: 115198
loss: 0.9904643297195435,grad_norm: 0.9763136880996695, iteration: 115199
loss: 0.9844583868980408,grad_norm: 0.9999993653425598, iteration: 115200
loss: 1.0316362380981445,grad_norm: 0.9999990664728396, iteration: 115201
loss: 0.9936083555221558,grad_norm: 0.9999989869771395, iteration: 115202
loss: 1.0043517351150513,grad_norm: 0.9216984703791729, iteration: 115203
loss: 0.973633885383606,grad_norm: 0.9999993325300044, iteration: 115204
loss: 0.9904212355613708,grad_norm: 0.9594557055352306, iteration: 115205
loss: 1.0000194311141968,grad_norm: 0.9905901456494867, iteration: 115206
loss: 1.0019758939743042,grad_norm: 0.9138560655940164, iteration: 115207
loss: 1.0038923025131226,grad_norm: 0.9874957488360722, iteration: 115208
loss: 1.0003076791763306,grad_norm: 0.9999993377789537, iteration: 115209
loss: 1.008375644683838,grad_norm: 0.9999989823731557, iteration: 115210
loss: 1.0580425262451172,grad_norm: 0.8867312222148445, iteration: 115211
loss: 0.9877180457115173,grad_norm: 0.9401179727464887, iteration: 115212
loss: 1.0021307468414307,grad_norm: 0.9938138431950916, iteration: 115213
loss: 1.0202126502990723,grad_norm: 0.9999990689794117, iteration: 115214
loss: 0.9982134103775024,grad_norm: 0.9702198099944532, iteration: 115215
loss: 1.0043073892593384,grad_norm: 0.9303145623042226, iteration: 115216
loss: 1.0470714569091797,grad_norm: 0.9318271841733111, iteration: 115217
loss: 1.0183534622192383,grad_norm: 0.9999990689469909, iteration: 115218
loss: 1.016255259513855,grad_norm: 0.8999838435673878, iteration: 115219
loss: 0.984780490398407,grad_norm: 0.8654901022377002, iteration: 115220
loss: 0.9819138646125793,grad_norm: 0.9999991701098976, iteration: 115221
loss: 1.0131943225860596,grad_norm: 0.9999991790502682, iteration: 115222
loss: 1.0176451206207275,grad_norm: 0.9797218841510525, iteration: 115223
loss: 1.0193830728530884,grad_norm: 0.9999992722368661, iteration: 115224
loss: 0.9970938563346863,grad_norm: 0.999999142269505, iteration: 115225
loss: 0.9898139238357544,grad_norm: 0.9999997575153574, iteration: 115226
loss: 1.0114268064498901,grad_norm: 0.9999992141655137, iteration: 115227
loss: 1.033196210861206,grad_norm: 0.9999991818041262, iteration: 115228
loss: 0.9852762818336487,grad_norm: 0.9999991768755413, iteration: 115229
loss: 0.9992961883544922,grad_norm: 0.9416347013716021, iteration: 115230
loss: 1.0090522766113281,grad_norm: 0.9999991570206871, iteration: 115231
loss: 1.0305250883102417,grad_norm: 0.9999992988960259, iteration: 115232
loss: 0.9962883591651917,grad_norm: 0.9999989712386116, iteration: 115233
loss: 1.0069034099578857,grad_norm: 0.9119147748629289, iteration: 115234
loss: 0.9890349507331848,grad_norm: 0.9999991123487506, iteration: 115235
loss: 0.9931195974349976,grad_norm: 0.9704761125131844, iteration: 115236
loss: 1.0216224193572998,grad_norm: 0.9791235845196335, iteration: 115237
loss: 0.9996742606163025,grad_norm: 0.9344091277947045, iteration: 115238
loss: 0.9840250015258789,grad_norm: 0.9609838372446066, iteration: 115239
loss: 1.0020232200622559,grad_norm: 0.999999795288185, iteration: 115240
loss: 1.002456545829773,grad_norm: 0.9999992402852278, iteration: 115241
loss: 1.0293136835098267,grad_norm: 0.9999990464003126, iteration: 115242
loss: 0.9813000559806824,grad_norm: 0.9999992133545278, iteration: 115243
loss: 1.012034296989441,grad_norm: 0.9999990679970305, iteration: 115244
loss: 1.1736279726028442,grad_norm: 0.9999997286904881, iteration: 115245
loss: 0.9953248500823975,grad_norm: 0.9999992173261653, iteration: 115246
loss: 0.981634795665741,grad_norm: 0.9332780941703601, iteration: 115247
loss: 1.0045416355133057,grad_norm: 0.9999992728768093, iteration: 115248
loss: 0.9897727966308594,grad_norm: 0.9129653470984802, iteration: 115249
loss: 1.0116240978240967,grad_norm: 0.9999990256978969, iteration: 115250
loss: 0.987760066986084,grad_norm: 0.992244988018313, iteration: 115251
loss: 1.0170644521713257,grad_norm: 0.9999994253909014, iteration: 115252
loss: 1.0323998928070068,grad_norm: 0.970310061240195, iteration: 115253
loss: 0.9822728037834167,grad_norm: 0.9857353674028407, iteration: 115254
loss: 1.0165269374847412,grad_norm: 0.999999084181163, iteration: 115255
loss: 1.0505355596542358,grad_norm: 0.9999999354112084, iteration: 115256
loss: 1.0038676261901855,grad_norm: 0.9999990979739439, iteration: 115257
loss: 0.9889482259750366,grad_norm: 0.9218034291663929, iteration: 115258
loss: 1.0258398056030273,grad_norm: 0.9999993112935626, iteration: 115259
loss: 1.003821849822998,grad_norm: 0.923813071685345, iteration: 115260
loss: 1.0034583806991577,grad_norm: 0.999999631603591, iteration: 115261
loss: 0.9833554625511169,grad_norm: 0.9999989934508162, iteration: 115262
loss: 0.9825378656387329,grad_norm: 0.9999993144214423, iteration: 115263
loss: 0.9882270693778992,grad_norm: 0.9846644490679552, iteration: 115264
loss: 0.9713144898414612,grad_norm: 0.9999989825114257, iteration: 115265
loss: 0.9372701644897461,grad_norm: 0.9999992828552468, iteration: 115266
loss: 0.9877412915229797,grad_norm: 0.9223718921192176, iteration: 115267
loss: 0.9812484383583069,grad_norm: 0.9999989797480764, iteration: 115268
loss: 1.0018298625946045,grad_norm: 0.8791184471607962, iteration: 115269
loss: 0.9894632697105408,grad_norm: 0.9999992219240083, iteration: 115270
loss: 0.9927167892456055,grad_norm: 0.9999991284240344, iteration: 115271
loss: 0.9673706889152527,grad_norm: 0.9999990994539105, iteration: 115272
loss: 0.9915814399719238,grad_norm: 0.9625054649289867, iteration: 115273
loss: 1.0232568979263306,grad_norm: 0.9999993808171356, iteration: 115274
loss: 1.0197880268096924,grad_norm: 0.9899627088942828, iteration: 115275
loss: 1.0041900873184204,grad_norm: 0.9999989878539418, iteration: 115276
loss: 1.002215027809143,grad_norm: 0.8426545024538138, iteration: 115277
loss: 1.0020703077316284,grad_norm: 0.9999990838914827, iteration: 115278
loss: 0.9642181396484375,grad_norm: 0.999998970883966, iteration: 115279
loss: 1.0037965774536133,grad_norm: 0.9999993750816494, iteration: 115280
loss: 1.0021573305130005,grad_norm: 0.999999292300623, iteration: 115281
loss: 0.9840510487556458,grad_norm: 0.9999991593204984, iteration: 115282
loss: 0.9999567270278931,grad_norm: 0.9252544972447525, iteration: 115283
loss: 1.0100347995758057,grad_norm: 0.9999991982175377, iteration: 115284
loss: 0.9793404340744019,grad_norm: 0.9392036535350419, iteration: 115285
loss: 1.0043630599975586,grad_norm: 0.999999225979868, iteration: 115286
loss: 0.9676753878593445,grad_norm: 0.9999990165386233, iteration: 115287
loss: 1.017142415046692,grad_norm: 0.9801814215404023, iteration: 115288
loss: 0.9747688174247742,grad_norm: 0.9999992999381744, iteration: 115289
loss: 0.9881287813186646,grad_norm: 0.999999141658269, iteration: 115290
loss: 1.0344308614730835,grad_norm: 0.9999991466685162, iteration: 115291
loss: 1.041954755783081,grad_norm: 0.999999222321362, iteration: 115292
loss: 0.9718105792999268,grad_norm: 0.8940032820433663, iteration: 115293
loss: 1.0143866539001465,grad_norm: 0.9706307548599029, iteration: 115294
loss: 0.9876883029937744,grad_norm: 0.8659577568216879, iteration: 115295
loss: 1.013948917388916,grad_norm: 0.9692776886775472, iteration: 115296
loss: 0.9741886258125305,grad_norm: 0.9915763019956042, iteration: 115297
loss: 0.9717823266983032,grad_norm: 0.9999991233726857, iteration: 115298
loss: 1.0006929636001587,grad_norm: 0.9999991827524354, iteration: 115299
loss: 0.9804989695549011,grad_norm: 0.9805885962328608, iteration: 115300
loss: 0.9763030409812927,grad_norm: 0.9020235967071467, iteration: 115301
loss: 0.9562690258026123,grad_norm: 0.9805247408282055, iteration: 115302
loss: 1.0062142610549927,grad_norm: 0.9824370588705766, iteration: 115303
loss: 0.9825679659843445,grad_norm: 0.9999992299954981, iteration: 115304
loss: 1.0092400312423706,grad_norm: 0.9999990414064166, iteration: 115305
loss: 1.0033681392669678,grad_norm: 0.9999989147607548, iteration: 115306
loss: 0.9952816367149353,grad_norm: 0.9999991179053507, iteration: 115307
loss: 1.0084540843963623,grad_norm: 0.9999989620508993, iteration: 115308
loss: 0.995205819606781,grad_norm: 0.9554270268338535, iteration: 115309
loss: 0.9947094917297363,grad_norm: 0.9999990395179047, iteration: 115310
loss: 1.0213100910186768,grad_norm: 0.9999992907625326, iteration: 115311
loss: 1.0211886167526245,grad_norm: 0.9999992747160342, iteration: 115312
loss: 0.9867609739303589,grad_norm: 0.9414949224010786, iteration: 115313
loss: 1.0273226499557495,grad_norm: 0.9643922331313862, iteration: 115314
loss: 1.0156742334365845,grad_norm: 0.9583693022343892, iteration: 115315
loss: 0.9818133115768433,grad_norm: 0.9903953661363809, iteration: 115316
loss: 0.9947722554206848,grad_norm: 0.9999990506054506, iteration: 115317
loss: 0.9871384501457214,grad_norm: 0.999999169566337, iteration: 115318
loss: 1.0127544403076172,grad_norm: 0.9999992190231567, iteration: 115319
loss: 0.9836285710334778,grad_norm: 0.9999991849547272, iteration: 115320
loss: 1.018630027770996,grad_norm: 0.9999992346765759, iteration: 115321
loss: 0.9594333171844482,grad_norm: 0.9999992270227953, iteration: 115322
loss: 1.0134994983673096,grad_norm: 0.9624496746864192, iteration: 115323
loss: 0.9961490631103516,grad_norm: 0.9999990081865084, iteration: 115324
loss: 0.9766269326210022,grad_norm: 0.9647447599284621, iteration: 115325
loss: 0.9910635948181152,grad_norm: 0.9724862081438513, iteration: 115326
loss: 0.9615858197212219,grad_norm: 0.9999992692347723, iteration: 115327
loss: 1.0123765468597412,grad_norm: 0.99999901012312, iteration: 115328
loss: 1.0006870031356812,grad_norm: 0.999999003232277, iteration: 115329
loss: 0.978411853313446,grad_norm: 0.9999993447693062, iteration: 115330
loss: 0.9977654218673706,grad_norm: 0.9999990821068349, iteration: 115331
loss: 0.9807249307632446,grad_norm: 0.9388517058834783, iteration: 115332
loss: 1.016829252243042,grad_norm: 0.9999989008597155, iteration: 115333
loss: 1.0303478240966797,grad_norm: 0.9999992419526716, iteration: 115334
loss: 0.975114643573761,grad_norm: 0.9999991277135838, iteration: 115335
loss: 1.0023730993270874,grad_norm: 0.8975647076322487, iteration: 115336
loss: 0.970892608165741,grad_norm: 0.999998997652329, iteration: 115337
loss: 1.0370815992355347,grad_norm: 0.999999148090575, iteration: 115338
loss: 1.0279663801193237,grad_norm: 0.999999102240544, iteration: 115339
loss: 0.9650781154632568,grad_norm: 0.9948199793308943, iteration: 115340
loss: 1.005183219909668,grad_norm: 0.9999992626762392, iteration: 115341
loss: 1.0028200149536133,grad_norm: 0.9999992717769403, iteration: 115342
loss: 0.9924441576004028,grad_norm: 0.9999991951232279, iteration: 115343
loss: 0.9892186522483826,grad_norm: 0.9999992723367469, iteration: 115344
loss: 0.9556781649589539,grad_norm: 0.9999991608983204, iteration: 115345
loss: 1.0110249519348145,grad_norm: 0.9516830121384635, iteration: 115346
loss: 1.0393468141555786,grad_norm: 0.9999992555141791, iteration: 115347
loss: 0.9711659550666809,grad_norm: 0.9250307407148557, iteration: 115348
loss: 0.9700358510017395,grad_norm: 0.999999246817459, iteration: 115349
loss: 1.0022588968276978,grad_norm: 0.9929984877464244, iteration: 115350
loss: 1.0180763006210327,grad_norm: 0.9999991539711072, iteration: 115351
loss: 0.9943515062332153,grad_norm: 0.83693579103771, iteration: 115352
loss: 1.0066407918930054,grad_norm: 0.9999991254984297, iteration: 115353
loss: 0.997151255607605,grad_norm: 0.9999991747350067, iteration: 115354
loss: 1.28504478931427,grad_norm: 0.9999992527772038, iteration: 115355
loss: 1.029386281967163,grad_norm: 0.9999993395138803, iteration: 115356
loss: 1.0425063371658325,grad_norm: 0.9903593144164594, iteration: 115357
loss: 0.9839351177215576,grad_norm: 0.9491277666240814, iteration: 115358
loss: 0.9678699970245361,grad_norm: 0.9999990027742424, iteration: 115359
loss: 1.0211665630340576,grad_norm: 0.9999990842438279, iteration: 115360
loss: 1.0171891450881958,grad_norm: 0.9999989269538405, iteration: 115361
loss: 1.0122209787368774,grad_norm: 0.9999992015730763, iteration: 115362
loss: 1.0061957836151123,grad_norm: 0.999999037072296, iteration: 115363
loss: 0.9833651781082153,grad_norm: 0.9999989171392455, iteration: 115364
loss: 1.0406500101089478,grad_norm: 0.9999991154031773, iteration: 115365
loss: 0.9573557376861572,grad_norm: 0.9999991895698414, iteration: 115366
loss: 0.9926874041557312,grad_norm: 0.999998918474924, iteration: 115367
loss: 0.9603729248046875,grad_norm: 0.9999992917691484, iteration: 115368
loss: 1.033208966255188,grad_norm: 0.9999991581918533, iteration: 115369
loss: 1.006062388420105,grad_norm: 0.9706776333465044, iteration: 115370
loss: 1.0245901346206665,grad_norm: 0.9999991066149913, iteration: 115371
loss: 1.0077029466629028,grad_norm: 0.9999991355086298, iteration: 115372
loss: 1.0229514837265015,grad_norm: 0.9999992324236269, iteration: 115373
loss: 1.0130467414855957,grad_norm: 0.9999990159143146, iteration: 115374
loss: 0.9988887906074524,grad_norm: 0.9999991891735063, iteration: 115375
loss: 0.9934995770454407,grad_norm: 0.9999991558865272, iteration: 115376
loss: 0.9828447699546814,grad_norm: 0.9999991579461892, iteration: 115377
loss: 0.9901243448257446,grad_norm: 0.9999993965751777, iteration: 115378
loss: 1.0083770751953125,grad_norm: 0.9999991188634859, iteration: 115379
loss: 0.9990856647491455,grad_norm: 0.9837329696676983, iteration: 115380
loss: 0.9826745390892029,grad_norm: 0.999999259859158, iteration: 115381
loss: 1.0537755489349365,grad_norm: 0.9999993261715465, iteration: 115382
loss: 1.0015937089920044,grad_norm: 0.9999990457570421, iteration: 115383
loss: 0.9911984205245972,grad_norm: 0.9930276864346337, iteration: 115384
loss: 1.0094019174575806,grad_norm: 0.8868385203665452, iteration: 115385
loss: 0.9568990468978882,grad_norm: 0.999999065212833, iteration: 115386
loss: 1.0251067876815796,grad_norm: 0.9999991324692524, iteration: 115387
loss: 1.0053285360336304,grad_norm: 0.9999990758283087, iteration: 115388
loss: 1.0485092401504517,grad_norm: 0.9999990331301803, iteration: 115389
loss: 1.012907862663269,grad_norm: 0.9587203794810845, iteration: 115390
loss: 1.0137687921524048,grad_norm: 0.9999992714158269, iteration: 115391
loss: 1.0020183324813843,grad_norm: 0.9999991559827278, iteration: 115392
loss: 1.1255220174789429,grad_norm: 0.9999998046612549, iteration: 115393
loss: 1.0002244710922241,grad_norm: 0.8988851969878142, iteration: 115394
loss: 0.9913687705993652,grad_norm: 0.9999991903337876, iteration: 115395
loss: 0.9885661005973816,grad_norm: 0.9037115332726702, iteration: 115396
loss: 0.9907397031784058,grad_norm: 0.9897108103154483, iteration: 115397
loss: 1.0013688802719116,grad_norm: 0.9999990883785044, iteration: 115398
loss: 1.006224513053894,grad_norm: 0.9999991709193319, iteration: 115399
loss: 0.9924833178520203,grad_norm: 0.9999991972514652, iteration: 115400
loss: 1.0239660739898682,grad_norm: 0.9952199374164876, iteration: 115401
loss: 0.9826496243476868,grad_norm: 0.9999994464270319, iteration: 115402
loss: 1.030165433883667,grad_norm: 0.9953790964384475, iteration: 115403
loss: 1.000983715057373,grad_norm: 0.9968420858480125, iteration: 115404
loss: 1.2868517637252808,grad_norm: 0.9999997603714101, iteration: 115405
loss: 0.983212411403656,grad_norm: 0.994615086158389, iteration: 115406
loss: 1.0650651454925537,grad_norm: 0.9999995323220211, iteration: 115407
loss: 1.014668345451355,grad_norm: 0.9999991093437185, iteration: 115408
loss: 0.9912109375,grad_norm: 0.9999991760260113, iteration: 115409
loss: 0.9698510766029358,grad_norm: 0.9999993106069928, iteration: 115410
loss: 1.0430967807769775,grad_norm: 0.9999995625743396, iteration: 115411
loss: 0.9938520789146423,grad_norm: 0.9999990573881161, iteration: 115412
loss: 1.029312014579773,grad_norm: 0.9999992224220274, iteration: 115413
loss: 0.9799737930297852,grad_norm: 0.999999184383649, iteration: 115414
loss: 0.9990882277488708,grad_norm: 0.999999195082813, iteration: 115415
loss: 1.0058540105819702,grad_norm: 0.9999993198693787, iteration: 115416
loss: 0.9891740679740906,grad_norm: 0.960587176063025, iteration: 115417
loss: 1.0256832838058472,grad_norm: 0.999999173305305, iteration: 115418
loss: 0.9888833165168762,grad_norm: 0.9286013277630665, iteration: 115419
loss: 0.9970332980155945,grad_norm: 0.9999991079523424, iteration: 115420
loss: 0.9898868203163147,grad_norm: 0.9999992075869798, iteration: 115421
loss: 1.0157841444015503,grad_norm: 0.9962518403125966, iteration: 115422
loss: 1.0020813941955566,grad_norm: 0.9859310760244933, iteration: 115423
loss: 1.00946843624115,grad_norm: 0.8903376126870236, iteration: 115424
loss: 1.0241599082946777,grad_norm: 0.9999992094766593, iteration: 115425
loss: 0.9928975105285645,grad_norm: 0.9999991434039117, iteration: 115426
loss: 0.989568293094635,grad_norm: 0.9999991933196116, iteration: 115427
loss: 1.0001124143600464,grad_norm: 0.9999990042375482, iteration: 115428
loss: 0.9894261360168457,grad_norm: 0.9485469199571016, iteration: 115429
loss: 1.0022598505020142,grad_norm: 0.909643777989201, iteration: 115430
loss: 0.9675218462944031,grad_norm: 0.8290981632709202, iteration: 115431
loss: 1.0421459674835205,grad_norm: 0.9999993293936258, iteration: 115432
loss: 0.9975846409797668,grad_norm: 0.9999991496364625, iteration: 115433
loss: 1.0020935535430908,grad_norm: 0.9999999113495932, iteration: 115434
loss: 1.0084244012832642,grad_norm: 0.9999992061966747, iteration: 115435
loss: 1.0335968732833862,grad_norm: 0.9999990893038084, iteration: 115436
loss: 1.0150103569030762,grad_norm: 0.9945482998383829, iteration: 115437
loss: 1.0023714303970337,grad_norm: 0.911253645055372, iteration: 115438
loss: 0.9917302131652832,grad_norm: 0.9999991127300762, iteration: 115439
loss: 1.0049508810043335,grad_norm: 0.9999992107414318, iteration: 115440
loss: 1.0006941556930542,grad_norm: 0.9999991568751601, iteration: 115441
loss: 1.026464819908142,grad_norm: 0.9999991175253856, iteration: 115442
loss: 1.0021452903747559,grad_norm: 0.9443539202164917, iteration: 115443
loss: 0.9902969002723694,grad_norm: 0.9999992040524349, iteration: 115444
loss: 0.9745270609855652,grad_norm: 0.9999990716379757, iteration: 115445
loss: 0.9708954095840454,grad_norm: 0.9999989727126718, iteration: 115446
loss: 1.0114551782608032,grad_norm: 0.9999989742254614, iteration: 115447
loss: 0.9980072975158691,grad_norm: 0.9815599065137275, iteration: 115448
loss: 0.9965021014213562,grad_norm: 0.9999992193124496, iteration: 115449
loss: 1.0010628700256348,grad_norm: 0.94020034041103, iteration: 115450
loss: 0.9562295079231262,grad_norm: 0.9999990772551742, iteration: 115451
loss: 1.011269211769104,grad_norm: 0.9781348051734358, iteration: 115452
loss: 0.9801467061042786,grad_norm: 0.9999991843347273, iteration: 115453
loss: 0.9914183020591736,grad_norm: 0.999999087228898, iteration: 115454
loss: 1.0185474157333374,grad_norm: 0.9999990889168476, iteration: 115455
loss: 1.015714406967163,grad_norm: 0.9999990377579832, iteration: 115456
loss: 1.0041477680206299,grad_norm: 0.9999991685824289, iteration: 115457
loss: 1.0038059949874878,grad_norm: 0.9999990829132308, iteration: 115458
loss: 0.9837029576301575,grad_norm: 0.9999991424620922, iteration: 115459
loss: 1.0699936151504517,grad_norm: 0.9999992196281016, iteration: 115460
loss: 1.0043960809707642,grad_norm: 0.9999991083716853, iteration: 115461
loss: 0.9869157075881958,grad_norm: 0.999999084603436, iteration: 115462
loss: 1.0259515047073364,grad_norm: 0.9999992354638906, iteration: 115463
loss: 1.0007280111312866,grad_norm: 0.8561878769036988, iteration: 115464
loss: 1.0080245733261108,grad_norm: 0.9999990318313452, iteration: 115465
loss: 0.9839001297950745,grad_norm: 0.9999992393979619, iteration: 115466
loss: 1.010392427444458,grad_norm: 0.9999989961244204, iteration: 115467
loss: 0.9714141488075256,grad_norm: 0.9999991175878606, iteration: 115468
loss: 0.9828612804412842,grad_norm: 0.8580541219305173, iteration: 115469
loss: 1.0057129859924316,grad_norm: 0.9999991466103948, iteration: 115470
loss: 1.0227299928665161,grad_norm: 0.9999990542794704, iteration: 115471
loss: 1.0039987564086914,grad_norm: 0.9999990899429059, iteration: 115472
loss: 1.008001685142517,grad_norm: 0.9999991769640882, iteration: 115473
loss: 0.9961731433868408,grad_norm: 0.9261946884761637, iteration: 115474
loss: 1.0164481401443481,grad_norm: 0.9999989789254152, iteration: 115475
loss: 0.9732149839401245,grad_norm: 0.9999994303633152, iteration: 115476
loss: 0.9889000654220581,grad_norm: 0.8726408096862401, iteration: 115477
loss: 0.9701668620109558,grad_norm: 0.9421984929060615, iteration: 115478
loss: 0.9829216003417969,grad_norm: 0.9999993015425073, iteration: 115479
loss: 0.9890599250793457,grad_norm: 0.9381817768924616, iteration: 115480
loss: 1.024835228919983,grad_norm: 0.9424199829332961, iteration: 115481
loss: 1.017860770225525,grad_norm: 0.9999991384030527, iteration: 115482
loss: 0.9689133167266846,grad_norm: 0.9999991918781331, iteration: 115483
loss: 0.9950485229492188,grad_norm: 0.9999993076780943, iteration: 115484
loss: 1.0134673118591309,grad_norm: 0.9999990893157658, iteration: 115485
loss: 1.0177348852157593,grad_norm: 0.9894920845864273, iteration: 115486
loss: 1.002166986465454,grad_norm: 0.9961918460966742, iteration: 115487
loss: 0.98245769739151,grad_norm: 0.9999990980458019, iteration: 115488
loss: 1.0214264392852783,grad_norm: 0.9426444197187109, iteration: 115489
loss: 0.9824258685112,grad_norm: 0.9227377088661002, iteration: 115490
loss: 1.007614016532898,grad_norm: 0.874669223190386, iteration: 115491
loss: 0.9978892803192139,grad_norm: 0.9772693616630846, iteration: 115492
loss: 1.0082930326461792,grad_norm: 0.9285750365791782, iteration: 115493
loss: 1.0170153379440308,grad_norm: 0.9999990849820255, iteration: 115494
loss: 0.9960230588912964,grad_norm: 0.9874964994878727, iteration: 115495
loss: 1.022714614868164,grad_norm: 0.9981243632772209, iteration: 115496
loss: 0.953789472579956,grad_norm: 0.827534608905255, iteration: 115497
loss: 0.9687109589576721,grad_norm: 0.9654438602854402, iteration: 115498
loss: 1.0120474100112915,grad_norm: 0.9811904437951499, iteration: 115499
loss: 1.008226752281189,grad_norm: 0.9999992827506619, iteration: 115500
loss: 0.9998319745063782,grad_norm: 0.9999994467834706, iteration: 115501
loss: 1.000696063041687,grad_norm: 0.9999991370475826, iteration: 115502
loss: 1.0171737670898438,grad_norm: 0.9999990842553387, iteration: 115503
loss: 0.999101996421814,grad_norm: 0.9999991564898155, iteration: 115504
loss: 1.0147082805633545,grad_norm: 0.9766220450271762, iteration: 115505
loss: 1.006527304649353,grad_norm: 0.9506402836625629, iteration: 115506
loss: 0.9768187403678894,grad_norm: 0.9975959373313878, iteration: 115507
loss: 1.0326814651489258,grad_norm: 0.9999990005617748, iteration: 115508
loss: 1.0160386562347412,grad_norm: 0.9121706581185408, iteration: 115509
loss: 0.9653550386428833,grad_norm: 0.9999991907563897, iteration: 115510
loss: 1.000151515007019,grad_norm: 0.9294735878616189, iteration: 115511
loss: 0.9947564005851746,grad_norm: 0.9539512862535484, iteration: 115512
loss: 1.0027618408203125,grad_norm: 0.957139276207436, iteration: 115513
loss: 0.9912463426589966,grad_norm: 0.9999993825295136, iteration: 115514
loss: 1.000289797782898,grad_norm: 0.9999991891361518, iteration: 115515
loss: 1.0118329524993896,grad_norm: 0.9999992038259444, iteration: 115516
loss: 1.038475513458252,grad_norm: 0.99999924878868, iteration: 115517
loss: 0.9929655194282532,grad_norm: 0.8227425753184575, iteration: 115518
loss: 1.011948585510254,grad_norm: 0.9312396192921287, iteration: 115519
loss: 1.0035532712936401,grad_norm: 0.8380876515099807, iteration: 115520
loss: 1.0414215326309204,grad_norm: 0.9999991180936919, iteration: 115521
loss: 0.993399977684021,grad_norm: 0.9999990981335973, iteration: 115522
loss: 0.9971315264701843,grad_norm: 0.9999990495724402, iteration: 115523
loss: 0.9882901906967163,grad_norm: 0.9874310557129252, iteration: 115524
loss: 0.9956162571907043,grad_norm: 0.9999993152327827, iteration: 115525
loss: 1.0105229616165161,grad_norm: 0.9999991338075519, iteration: 115526
loss: 1.04351806640625,grad_norm: 0.9999993193737644, iteration: 115527
loss: 1.0131020545959473,grad_norm: 0.9661645658152425, iteration: 115528
loss: 0.9901258945465088,grad_norm: 0.9999990271762313, iteration: 115529
loss: 1.142891764640808,grad_norm: 0.9999993591655325, iteration: 115530
loss: 1.0170986652374268,grad_norm: 0.9999991407416092, iteration: 115531
loss: 1.0180232524871826,grad_norm: 0.9999989659115011, iteration: 115532
loss: 0.9959567785263062,grad_norm: 0.9460647483379745, iteration: 115533
loss: 1.0192214250564575,grad_norm: 0.9765206699138759, iteration: 115534
loss: 1.000583529472351,grad_norm: 0.9461011280539946, iteration: 115535
loss: 1.0524330139160156,grad_norm: 0.9999997738055888, iteration: 115536
loss: 0.9797353744506836,grad_norm: 0.9999990088008179, iteration: 115537
loss: 1.0700774192810059,grad_norm: 0.9999992695816939, iteration: 115538
loss: 1.0884264707565308,grad_norm: 0.9999992738094139, iteration: 115539
loss: 0.9822406768798828,grad_norm: 0.9999991812912451, iteration: 115540
loss: 0.989413857460022,grad_norm: 0.9999990335159462, iteration: 115541
loss: 0.9728501439094543,grad_norm: 0.9999991041736497, iteration: 115542
loss: 0.989637017250061,grad_norm: 0.9999991329216079, iteration: 115543
loss: 0.9746308922767639,grad_norm: 0.9999991632525969, iteration: 115544
loss: 0.9810861945152283,grad_norm: 0.9929408654517812, iteration: 115545
loss: 1.0197203159332275,grad_norm: 0.9999991077013777, iteration: 115546
loss: 0.9495490789413452,grad_norm: 0.999999207663305, iteration: 115547
loss: 1.0408674478530884,grad_norm: 0.999999662394902, iteration: 115548
loss: 1.1847643852233887,grad_norm: 0.9999995223736068, iteration: 115549
loss: 0.993938148021698,grad_norm: 0.9999991158789905, iteration: 115550
loss: 1.0258188247680664,grad_norm: 0.9999995312430273, iteration: 115551
loss: 0.9674039483070374,grad_norm: 0.999999237756459, iteration: 115552
loss: 1.0103888511657715,grad_norm: 0.9999991152909508, iteration: 115553
loss: 0.9871084094047546,grad_norm: 0.9999991009581554, iteration: 115554
loss: 1.0620415210723877,grad_norm: 0.9999992185342589, iteration: 115555
loss: 1.0012555122375488,grad_norm: 0.9999991943570843, iteration: 115556
loss: 1.0356706380844116,grad_norm: 0.9999992895418682, iteration: 115557
loss: 1.074479103088379,grad_norm: 0.9999992787761253, iteration: 115558
loss: 1.0017006397247314,grad_norm: 0.9999990875818311, iteration: 115559
loss: 1.0377795696258545,grad_norm: 0.9432650422951695, iteration: 115560
loss: 1.0053367614746094,grad_norm: 0.9999991670779574, iteration: 115561
loss: 1.0311849117279053,grad_norm: 0.9999991666802525, iteration: 115562
loss: 0.9829968214035034,grad_norm: 0.9999990961812631, iteration: 115563
loss: 0.9915168881416321,grad_norm: 0.9999990759985646, iteration: 115564
loss: 1.000325322151184,grad_norm: 0.9999992771278144, iteration: 115565
loss: 1.00877845287323,grad_norm: 0.999999535448552, iteration: 115566
loss: 0.9561529159545898,grad_norm: 0.9999991735093821, iteration: 115567
loss: 0.9905901551246643,grad_norm: 0.9275525432992406, iteration: 115568
loss: 0.9965717792510986,grad_norm: 0.9999991427809868, iteration: 115569
loss: 0.9565584659576416,grad_norm: 0.9999990825092708, iteration: 115570
loss: 0.9991011619567871,grad_norm: 0.9192071780510088, iteration: 115571
loss: 0.9942677021026611,grad_norm: 0.988979667907119, iteration: 115572
loss: 0.9976838231086731,grad_norm: 0.9963524250260118, iteration: 115573
loss: 1.064395546913147,grad_norm: 0.9999996503796712, iteration: 115574
loss: 1.012676477432251,grad_norm: 0.9058698355810055, iteration: 115575
loss: 1.0423868894577026,grad_norm: 0.9999990925706831, iteration: 115576
loss: 1.0168384313583374,grad_norm: 0.980007412758073, iteration: 115577
loss: 0.9522131085395813,grad_norm: 0.9999992979036241, iteration: 115578
loss: 0.9853472709655762,grad_norm: 0.9334601487003635, iteration: 115579
loss: 1.0073659420013428,grad_norm: 0.9999997618639774, iteration: 115580
loss: 1.0054562091827393,grad_norm: 0.9584105741563852, iteration: 115581
loss: 0.9947826266288757,grad_norm: 0.9999991260889488, iteration: 115582
loss: 0.9945396184921265,grad_norm: 0.999999118753126, iteration: 115583
loss: 1.0165296792984009,grad_norm: 0.9706284782430868, iteration: 115584
loss: 1.0295031070709229,grad_norm: 0.9999991068164538, iteration: 115585
loss: 0.991104781627655,grad_norm: 0.9352277752610465, iteration: 115586
loss: 1.026128888130188,grad_norm: 0.9999992328924567, iteration: 115587
loss: 0.9922451972961426,grad_norm: 0.9999992962448834, iteration: 115588
loss: 0.9853712916374207,grad_norm: 0.999999289712151, iteration: 115589
loss: 0.9965722560882568,grad_norm: 0.999999136635773, iteration: 115590
loss: 1.0327355861663818,grad_norm: 0.9999996177086458, iteration: 115591
loss: 1.0077714920043945,grad_norm: 0.9999991162492406, iteration: 115592
loss: 0.9899164438247681,grad_norm: 0.9999992126304458, iteration: 115593
loss: 1.0006803274154663,grad_norm: 0.9999992797879969, iteration: 115594
loss: 0.9501634240150452,grad_norm: 0.9999991882906223, iteration: 115595
loss: 1.03595769405365,grad_norm: 0.9999990131068942, iteration: 115596
loss: 1.0117173194885254,grad_norm: 0.9999989676914165, iteration: 115597
loss: 1.029036283493042,grad_norm: 0.9913364580569666, iteration: 115598
loss: 0.9818740487098694,grad_norm: 0.9778941504482078, iteration: 115599
loss: 1.098178505897522,grad_norm: 0.9999997109552434, iteration: 115600
loss: 0.9881584048271179,grad_norm: 0.9999992168849708, iteration: 115601
loss: 0.9953671097755432,grad_norm: 0.9999992562214141, iteration: 115602
loss: 1.0160584449768066,grad_norm: 0.9999990908239137, iteration: 115603
loss: 1.006873607635498,grad_norm: 0.8562303759155144, iteration: 115604
loss: 0.9997805953025818,grad_norm: 0.8719726157514756, iteration: 115605
loss: 1.0255190134048462,grad_norm: 0.9999992899520443, iteration: 115606
loss: 0.9795445799827576,grad_norm: 0.99999909181277, iteration: 115607
loss: 1.0679155588150024,grad_norm: 0.9999992078976185, iteration: 115608
loss: 1.0355805158615112,grad_norm: 0.9999990182487586, iteration: 115609
loss: 1.000044345855713,grad_norm: 0.9999990999403214, iteration: 115610
loss: 1.0717427730560303,grad_norm: 0.9999992839361881, iteration: 115611
loss: 0.9521082639694214,grad_norm: 0.99999911423441, iteration: 115612
loss: 0.9939907193183899,grad_norm: 0.999999269309494, iteration: 115613
loss: 1.0030286312103271,grad_norm: 0.9999996582145516, iteration: 115614
loss: 1.0045233964920044,grad_norm: 0.9874855426640157, iteration: 115615
loss: 1.0185688734054565,grad_norm: 0.8999603135428181, iteration: 115616
loss: 1.0070947408676147,grad_norm: 0.9999992025341029, iteration: 115617
loss: 1.000382423400879,grad_norm: 0.8557014851838469, iteration: 115618
loss: 1.021281361579895,grad_norm: 0.9999989997322947, iteration: 115619
loss: 1.0157142877578735,grad_norm: 0.9999991226568093, iteration: 115620
loss: 0.9957991242408752,grad_norm: 0.9999992373780965, iteration: 115621
loss: 0.9738284349441528,grad_norm: 0.9999989663481679, iteration: 115622
loss: 1.015889048576355,grad_norm: 0.9999996624742751, iteration: 115623
loss: 1.0098423957824707,grad_norm: 0.9999991658339062, iteration: 115624
loss: 1.0138485431671143,grad_norm: 0.9688094923647393, iteration: 115625
loss: 0.9968788623809814,grad_norm: 0.9999992605828174, iteration: 115626
loss: 1.0167911052703857,grad_norm: 0.9999997713364501, iteration: 115627
loss: 0.9809594750404358,grad_norm: 0.9999993022932859, iteration: 115628
loss: 0.9948839545249939,grad_norm: 0.926166158840466, iteration: 115629
loss: 0.9862186312675476,grad_norm: 0.8832505974031257, iteration: 115630
loss: 1.0110079050064087,grad_norm: 0.9288358268017102, iteration: 115631
loss: 0.9618346691131592,grad_norm: 0.999999029295977, iteration: 115632
loss: 1.007076382637024,grad_norm: 0.9999991029265985, iteration: 115633
loss: 0.9657238721847534,grad_norm: 0.9801398819273847, iteration: 115634
loss: 0.9734505414962769,grad_norm: 0.9999993554415614, iteration: 115635
loss: 1.0118778944015503,grad_norm: 0.9999991159135146, iteration: 115636
loss: 0.980819046497345,grad_norm: 0.9999990498307286, iteration: 115637
loss: 0.9922256469726562,grad_norm: 0.9999990778197577, iteration: 115638
loss: 1.0101431608200073,grad_norm: 0.9999992635762344, iteration: 115639
loss: 1.0247424840927124,grad_norm: 0.9999991289321803, iteration: 115640
loss: 1.022421956062317,grad_norm: 0.9999992583716879, iteration: 115641
loss: 0.9887605905532837,grad_norm: 0.9999988941606636, iteration: 115642
loss: 1.010728359222412,grad_norm: 0.9999990699592961, iteration: 115643
loss: 0.987455427646637,grad_norm: 0.9838881821330527, iteration: 115644
loss: 0.9991649389266968,grad_norm: 0.9999992860914034, iteration: 115645
loss: 1.0005146265029907,grad_norm: 0.9999992359790533, iteration: 115646
loss: 1.0262501239776611,grad_norm: 0.9999990190969399, iteration: 115647
loss: 0.9463237524032593,grad_norm: 0.9999991040899108, iteration: 115648
loss: 1.0026795864105225,grad_norm: 0.9295306388237629, iteration: 115649
loss: 1.0910879373550415,grad_norm: 0.9999989389884018, iteration: 115650
loss: 1.007206916809082,grad_norm: 0.9999994092912212, iteration: 115651
loss: 1.2638719081878662,grad_norm: 0.999999620065172, iteration: 115652
loss: 1.0345237255096436,grad_norm: 0.9999990988139967, iteration: 115653
loss: 1.003353476524353,grad_norm: 0.9568315246007519, iteration: 115654
loss: 1.0909041166305542,grad_norm: 0.9999991809385544, iteration: 115655
loss: 1.0298357009887695,grad_norm: 0.9999990838137317, iteration: 115656
loss: 1.0390610694885254,grad_norm: 0.9999994774898168, iteration: 115657
loss: 0.976632297039032,grad_norm: 0.9627703119099461, iteration: 115658
loss: 1.036601185798645,grad_norm: 0.9999991209116017, iteration: 115659
loss: 0.9836646914482117,grad_norm: 0.9999991980321918, iteration: 115660
loss: 1.032490611076355,grad_norm: 0.9999989896854817, iteration: 115661
loss: 0.9670314788818359,grad_norm: 0.9999990621806162, iteration: 115662
loss: 0.9647544622421265,grad_norm: 0.9999991633491977, iteration: 115663
loss: 0.9789493083953857,grad_norm: 0.984159240220466, iteration: 115664
loss: 1.0169634819030762,grad_norm: 0.9999990915708686, iteration: 115665
loss: 1.076468825340271,grad_norm: 0.9999998666252784, iteration: 115666
loss: 0.9948726892471313,grad_norm: 0.9999992187608432, iteration: 115667
loss: 0.9757775068283081,grad_norm: 0.999999158763523, iteration: 115668
loss: 1.0127651691436768,grad_norm: 0.9427372041175374, iteration: 115669
loss: 1.018448829650879,grad_norm: 0.9708936182328246, iteration: 115670
loss: 0.9986332058906555,grad_norm: 0.9608883234166454, iteration: 115671
loss: 1.1302863359451294,grad_norm: 0.9999999096714085, iteration: 115672
loss: 0.937328040599823,grad_norm: 0.9999990053420742, iteration: 115673
loss: 0.9699639081954956,grad_norm: 0.9999991741548633, iteration: 115674
loss: 0.9927365779876709,grad_norm: 0.9468012428446692, iteration: 115675
loss: 0.9968137741088867,grad_norm: 0.999999218207057, iteration: 115676
loss: 0.9722030162811279,grad_norm: 0.9999991528314982, iteration: 115677
loss: 1.0099459886550903,grad_norm: 0.9999991467644073, iteration: 115678
loss: 1.0320322513580322,grad_norm: 0.8745358537559635, iteration: 115679
loss: 0.9917144775390625,grad_norm: 0.9999992382170221, iteration: 115680
loss: 0.9577193856239319,grad_norm: 0.9999989340085063, iteration: 115681
loss: 1.0350303649902344,grad_norm: 1.0000000033132221, iteration: 115682
loss: 1.0151375532150269,grad_norm: 0.9999997675335311, iteration: 115683
loss: 1.0299040079116821,grad_norm: 0.890884906768211, iteration: 115684
loss: 0.9857021570205688,grad_norm: 0.9999991641755884, iteration: 115685
loss: 0.9938305616378784,grad_norm: 0.9999991349575278, iteration: 115686
loss: 0.9705321192741394,grad_norm: 0.9896721024333411, iteration: 115687
loss: 1.0097922086715698,grad_norm: 0.9999992206700008, iteration: 115688
loss: 1.0249933004379272,grad_norm: 0.9999990443108118, iteration: 115689
loss: 1.0387195348739624,grad_norm: 0.9999992251773028, iteration: 115690
loss: 1.008650541305542,grad_norm: 0.9548277711529506, iteration: 115691
loss: 0.9862061142921448,grad_norm: 0.99999924366181, iteration: 115692
loss: 1.0041413307189941,grad_norm: 0.999999087910407, iteration: 115693
loss: 1.0294561386108398,grad_norm: 0.954381130543329, iteration: 115694
loss: 0.9913898706436157,grad_norm: 0.9999991899105857, iteration: 115695
loss: 0.9821298718452454,grad_norm: 0.9999991113998719, iteration: 115696
loss: 0.9934572577476501,grad_norm: 0.9645723511419622, iteration: 115697
loss: 0.9808093309402466,grad_norm: 0.9999990827902453, iteration: 115698
loss: 0.9694710969924927,grad_norm: 0.9999991725096177, iteration: 115699
loss: 1.1016451120376587,grad_norm: 0.9999996763638924, iteration: 115700
loss: 1.0084748268127441,grad_norm: 0.9999991920153403, iteration: 115701
loss: 0.9968973398208618,grad_norm: 0.8434379576107561, iteration: 115702
loss: 0.988001823425293,grad_norm: 0.9999992563532876, iteration: 115703
loss: 0.9887208938598633,grad_norm: 0.99999907719539, iteration: 115704
loss: 1.0122750997543335,grad_norm: 0.9999998712630344, iteration: 115705
loss: 1.0135971307754517,grad_norm: 0.9999992811073106, iteration: 115706
loss: 1.017654299736023,grad_norm: 0.999999032425812, iteration: 115707
loss: 1.0082215070724487,grad_norm: 0.9360659313973219, iteration: 115708
loss: 0.9835941791534424,grad_norm: 0.9999989923844592, iteration: 115709
loss: 1.0111417770385742,grad_norm: 0.9215109986359893, iteration: 115710
loss: 1.0485162734985352,grad_norm: 0.9999991387412285, iteration: 115711
loss: 1.0073802471160889,grad_norm: 0.809602485400197, iteration: 115712
loss: 1.0053290128707886,grad_norm: 0.9325483576283405, iteration: 115713
loss: 1.0105555057525635,grad_norm: 0.9999991775084087, iteration: 115714
loss: 1.0342923402786255,grad_norm: 0.9999990739793715, iteration: 115715
loss: 0.9438844323158264,grad_norm: 0.9999992384714234, iteration: 115716
loss: 1.039247989654541,grad_norm: 0.986456712823126, iteration: 115717
loss: 1.2539092302322388,grad_norm: 0.9999997994585397, iteration: 115718
loss: 1.0060077905654907,grad_norm: 0.9999990343826956, iteration: 115719
loss: 1.0073378086090088,grad_norm: 0.9918259918726153, iteration: 115720
loss: 1.1108922958374023,grad_norm: 0.999999460380557, iteration: 115721
loss: 0.9801005721092224,grad_norm: 0.9999991329554999, iteration: 115722
loss: 1.0461410284042358,grad_norm: 0.9616183694820574, iteration: 115723
loss: 1.0084584951400757,grad_norm: 0.9424923524152627, iteration: 115724
loss: 0.9962388873100281,grad_norm: 0.9999992851536369, iteration: 115725
loss: 1.004988431930542,grad_norm: 0.8108163548332135, iteration: 115726
loss: 0.9874686598777771,grad_norm: 0.9999991450193768, iteration: 115727
loss: 1.1038960218429565,grad_norm: 0.9999997322287872, iteration: 115728
loss: 0.9793936014175415,grad_norm: 0.9999991239805079, iteration: 115729
loss: 1.0842279195785522,grad_norm: 0.9999998579130156, iteration: 115730
loss: 0.9825857877731323,grad_norm: 0.9999993373577101, iteration: 115731
loss: 0.9853976964950562,grad_norm: 0.9999991894657684, iteration: 115732
loss: 0.9424935579299927,grad_norm: 0.999999115115819, iteration: 115733
loss: 1.0422714948654175,grad_norm: 0.9999992295314006, iteration: 115734
loss: 0.9933637976646423,grad_norm: 0.8738214012667629, iteration: 115735
loss: 0.9930750131607056,grad_norm: 0.9999990871268647, iteration: 115736
loss: 1.201119303703308,grad_norm: 0.9999995322594625, iteration: 115737
loss: 1.0097064971923828,grad_norm: 0.9999991273712878, iteration: 115738
loss: 0.9629367589950562,grad_norm: 0.9999990781604896, iteration: 115739
loss: 1.0258150100708008,grad_norm: 0.9155485340687892, iteration: 115740
loss: 1.024389386177063,grad_norm: 0.9999990348194672, iteration: 115741
loss: 1.017562747001648,grad_norm: 0.9999993042452403, iteration: 115742
loss: 0.9940872192382812,grad_norm: 0.9999991595571736, iteration: 115743
loss: 1.026430606842041,grad_norm: 0.9999991586347267, iteration: 115744
loss: 0.9877645373344421,grad_norm: 0.9999992003620276, iteration: 115745
loss: 1.1615384817123413,grad_norm: 0.9999991652074288, iteration: 115746
loss: 0.9875021576881409,grad_norm: 0.9999991404380278, iteration: 115747
loss: 0.9961683750152588,grad_norm: 0.8868633477180144, iteration: 115748
loss: 1.0565693378448486,grad_norm: 0.9999991539699903, iteration: 115749
loss: 0.9718518257141113,grad_norm: 0.9607553836962928, iteration: 115750
loss: 0.9928480982780457,grad_norm: 0.9793072949887498, iteration: 115751
loss: 1.0043754577636719,grad_norm: 0.9999991740689032, iteration: 115752
loss: 1.0153497457504272,grad_norm: 0.9999991786541653, iteration: 115753
loss: 1.083844542503357,grad_norm: 0.9999993020780328, iteration: 115754
loss: 0.9891421794891357,grad_norm: 0.9440511860188269, iteration: 115755
loss: 1.0036126375198364,grad_norm: 0.9122415344614425, iteration: 115756
loss: 0.9654079079627991,grad_norm: 0.9838008792750961, iteration: 115757
loss: 1.0097633600234985,grad_norm: 0.9839616478004249, iteration: 115758
loss: 1.0319437980651855,grad_norm: 0.999999166273799, iteration: 115759
loss: 0.987937867641449,grad_norm: 0.9563338442733843, iteration: 115760
loss: 0.978033721446991,grad_norm: 0.9999988884885742, iteration: 115761
loss: 0.9855351448059082,grad_norm: 0.9130336453754047, iteration: 115762
loss: 0.9647626876831055,grad_norm: 0.9999990505211012, iteration: 115763
loss: 1.0406982898712158,grad_norm: 0.9999992759485503, iteration: 115764
loss: 1.0443607568740845,grad_norm: 0.9999989492561446, iteration: 115765
loss: 1.013514518737793,grad_norm: 0.9999989433483749, iteration: 115766
loss: 0.9582201838493347,grad_norm: 0.9999991229935971, iteration: 115767
loss: 1.0682498216629028,grad_norm: 0.9999993601499904, iteration: 115768
loss: 0.9927693009376526,grad_norm: 0.9687104202327349, iteration: 115769
loss: 1.003623127937317,grad_norm: 0.9999991275866673, iteration: 115770
loss: 1.0292600393295288,grad_norm: 0.9964861251504856, iteration: 115771
loss: 1.0308146476745605,grad_norm: 0.9459063757258404, iteration: 115772
loss: 1.0089811086654663,grad_norm: 0.9999991269052131, iteration: 115773
loss: 1.000116229057312,grad_norm: 0.9999992628499679, iteration: 115774
loss: 0.9913804531097412,grad_norm: 0.8572872617880782, iteration: 115775
loss: 0.9887943863868713,grad_norm: 0.9343887895087568, iteration: 115776
loss: 1.0128332376480103,grad_norm: 0.9160932742363465, iteration: 115777
loss: 1.0796172618865967,grad_norm: 0.9999993823406794, iteration: 115778
loss: 1.0010759830474854,grad_norm: 0.9999991931065764, iteration: 115779
loss: 1.0393010377883911,grad_norm: 0.9999990537568427, iteration: 115780
loss: 1.0137311220169067,grad_norm: 0.9999992631393803, iteration: 115781
loss: 0.9618653655052185,grad_norm: 0.9999991885201279, iteration: 115782
loss: 0.9888501763343811,grad_norm: 0.9999991609782029, iteration: 115783
loss: 1.0143812894821167,grad_norm: 0.9181838251557546, iteration: 115784
loss: 0.9890684485435486,grad_norm: 0.9718726941388109, iteration: 115785
loss: 0.9745802283287048,grad_norm: 0.9999990822675785, iteration: 115786
loss: 0.9937132000923157,grad_norm: 0.9999992624748183, iteration: 115787
loss: 0.9895883202552795,grad_norm: 0.9947984126846989, iteration: 115788
loss: 0.9686656594276428,grad_norm: 0.9999990426112274, iteration: 115789
loss: 1.024208664894104,grad_norm: 0.9999991154443381, iteration: 115790
loss: 0.9910587668418884,grad_norm: 0.9952847834483176, iteration: 115791
loss: 0.9841279983520508,grad_norm: 0.9396273635713963, iteration: 115792
loss: 0.9901292324066162,grad_norm: 0.9806697024982493, iteration: 115793
loss: 0.9922866225242615,grad_norm: 0.9999989930805033, iteration: 115794
loss: 1.0047342777252197,grad_norm: 0.9999991125296022, iteration: 115795
loss: 0.9691895246505737,grad_norm: 0.977914416438534, iteration: 115796
loss: 1.0158300399780273,grad_norm: 0.9999990548934062, iteration: 115797
loss: 0.9849819540977478,grad_norm: 0.9999991340703996, iteration: 115798
loss: 1.0134004354476929,grad_norm: 0.9377979892846604, iteration: 115799
loss: 1.0144721269607544,grad_norm: 0.7960164662403684, iteration: 115800
loss: 0.9819954633712769,grad_norm: 0.9999991104311104, iteration: 115801
loss: 1.0095431804656982,grad_norm: 0.9361325539604518, iteration: 115802
loss: 0.991531491279602,grad_norm: 0.9999991384372934, iteration: 115803
loss: 0.9769713282585144,grad_norm: 0.9999990945821331, iteration: 115804
loss: 1.0019716024398804,grad_norm: 0.8897987506206193, iteration: 115805
loss: 0.9989185333251953,grad_norm: 0.9999991601251824, iteration: 115806
loss: 1.0067150592803955,grad_norm: 0.9999991091771326, iteration: 115807
loss: 1.0149333477020264,grad_norm: 0.9999991395906657, iteration: 115808
loss: 1.0350806713104248,grad_norm: 0.9999992947800068, iteration: 115809
loss: 1.0553417205810547,grad_norm: 0.9999993452333993, iteration: 115810
loss: 0.9740437865257263,grad_norm: 0.9999993139815272, iteration: 115811
loss: 1.0177347660064697,grad_norm: 0.9602809253671775, iteration: 115812
loss: 1.0493704080581665,grad_norm: 0.9999991651421705, iteration: 115813
loss: 1.0159330368041992,grad_norm: 0.9999991587366154, iteration: 115814
loss: 1.0126925706863403,grad_norm: 0.9999991924922839, iteration: 115815
loss: 1.02064847946167,grad_norm: 0.9999991337267886, iteration: 115816
loss: 1.0084104537963867,grad_norm: 0.9999991387334316, iteration: 115817
loss: 1.0796736478805542,grad_norm: 0.9999992915262942, iteration: 115818
loss: 1.0539578199386597,grad_norm: 0.9999992757960317, iteration: 115819
loss: 1.0004514455795288,grad_norm: 0.9999991995406885, iteration: 115820
loss: 0.9995651245117188,grad_norm: 0.9999992206991833, iteration: 115821
loss: 1.030745267868042,grad_norm: 0.9999990469209304, iteration: 115822
loss: 1.0170708894729614,grad_norm: 0.9999990465704084, iteration: 115823
loss: 0.9917401075363159,grad_norm: 0.9229449404596316, iteration: 115824
loss: 0.9700520038604736,grad_norm: 0.9999990178753289, iteration: 115825
loss: 1.0534613132476807,grad_norm: 0.9999992351810818, iteration: 115826
loss: 0.9710759520530701,grad_norm: 0.9999991325101665, iteration: 115827
loss: 0.9881449937820435,grad_norm: 0.9999990059765368, iteration: 115828
loss: 0.9637379050254822,grad_norm: 0.9999993173230455, iteration: 115829
loss: 0.9967336058616638,grad_norm: 0.9999991343930292, iteration: 115830
loss: 1.1040623188018799,grad_norm: 0.9999993560003569, iteration: 115831
loss: 0.9803471565246582,grad_norm: 0.9935773122955285, iteration: 115832
loss: 1.030954122543335,grad_norm: 0.9999999005723982, iteration: 115833
loss: 1.0033589601516724,grad_norm: 0.9999991880161788, iteration: 115834
loss: 1.0263642072677612,grad_norm: 0.999999181090029, iteration: 115835
loss: 0.9756399989128113,grad_norm: 0.9574410873281954, iteration: 115836
loss: 1.011264681816101,grad_norm: 0.9999991407806027, iteration: 115837
loss: 1.0471237897872925,grad_norm: 0.9999990629930857, iteration: 115838
loss: 1.0243573188781738,grad_norm: 0.9745836078773764, iteration: 115839
loss: 0.9779779314994812,grad_norm: 0.9999992765549134, iteration: 115840
loss: 1.0173392295837402,grad_norm: 0.9999992042270259, iteration: 115841
loss: 1.034718632698059,grad_norm: 0.9999996341058526, iteration: 115842
loss: 0.9708970785140991,grad_norm: 0.9999990525273132, iteration: 115843
loss: 1.002900242805481,grad_norm: 0.9999988906058328, iteration: 115844
loss: 0.9969493746757507,grad_norm: 0.9999991016962763, iteration: 115845
loss: 1.0000405311584473,grad_norm: 0.9999992715296949, iteration: 115846
loss: 1.0081977844238281,grad_norm: 0.8075243941510176, iteration: 115847
loss: 1.0007967948913574,grad_norm: 0.9999990412649158, iteration: 115848
loss: 1.0396357774734497,grad_norm: 0.9999990425407097, iteration: 115849
loss: 1.0210769176483154,grad_norm: 0.917627188420254, iteration: 115850
loss: 1.0251049995422363,grad_norm: 0.9999991634229339, iteration: 115851
loss: 1.0368081331253052,grad_norm: 0.9999990367657089, iteration: 115852
loss: 1.0085656642913818,grad_norm: 0.9672179533037583, iteration: 115853
loss: 0.9586618542671204,grad_norm: 0.9999991098603698, iteration: 115854
loss: 1.0475637912750244,grad_norm: 0.991457980155004, iteration: 115855
loss: 1.0289585590362549,grad_norm: 0.9999993384779764, iteration: 115856
loss: 1.0123600959777832,grad_norm: 0.8633560567710377, iteration: 115857
loss: 1.0291069746017456,grad_norm: 0.9999989882976201, iteration: 115858
loss: 0.9833120703697205,grad_norm: 0.999998954249895, iteration: 115859
loss: 1.0124030113220215,grad_norm: 0.9999991174921242, iteration: 115860
loss: 1.0355212688446045,grad_norm: 0.9999991481523357, iteration: 115861
loss: 0.9969164133071899,grad_norm: 0.9249095000366587, iteration: 115862
loss: 1.0324286222457886,grad_norm: 0.9539887816491943, iteration: 115863
loss: 1.0723671913146973,grad_norm: 0.9999992531826019, iteration: 115864
loss: 1.0294324159622192,grad_norm: 0.9428710716985241, iteration: 115865
loss: 0.9425305128097534,grad_norm: 0.9944552280848091, iteration: 115866
loss: 1.0115361213684082,grad_norm: 0.9650222514774806, iteration: 115867
loss: 0.9952996373176575,grad_norm: 0.9999991160484951, iteration: 115868
loss: 1.0155352354049683,grad_norm: 0.9404030469577181, iteration: 115869
loss: 0.9729698300361633,grad_norm: 0.9999991064376793, iteration: 115870
loss: 0.9799480438232422,grad_norm: 0.9632570037865753, iteration: 115871
loss: 0.9968200922012329,grad_norm: 0.9999990887143547, iteration: 115872
loss: 1.061342477798462,grad_norm: 0.9999994374542288, iteration: 115873
loss: 1.0122150182724,grad_norm: 0.9999990900800945, iteration: 115874
loss: 1.0190614461898804,grad_norm: 0.9999994409613575, iteration: 115875
loss: 1.0444601774215698,grad_norm: 0.999999175600941, iteration: 115876
loss: 0.9980699419975281,grad_norm: 0.9999990436470098, iteration: 115877
loss: 1.016075849533081,grad_norm: 0.9999990786716468, iteration: 115878
loss: 1.0194728374481201,grad_norm: 0.9999990749209409, iteration: 115879
loss: 0.9761165380477905,grad_norm: 0.9809965385510652, iteration: 115880
loss: 1.0056686401367188,grad_norm: 0.9999992410155026, iteration: 115881
loss: 1.0017114877700806,grad_norm: 0.9999990324674976, iteration: 115882
loss: 1.0021401643753052,grad_norm: 0.9999990960495189, iteration: 115883
loss: 1.019637107849121,grad_norm: 0.9999991985197513, iteration: 115884
loss: 1.0062435865402222,grad_norm: 0.9666041199043931, iteration: 115885
loss: 0.9767942428588867,grad_norm: 0.9999990327837492, iteration: 115886
loss: 1.006094217300415,grad_norm: 0.9999992669897907, iteration: 115887
loss: 1.0735288858413696,grad_norm: 0.999999275784225, iteration: 115888
loss: 1.0173778533935547,grad_norm: 0.9999992381182351, iteration: 115889
loss: 0.9871846437454224,grad_norm: 0.9999993422586803, iteration: 115890
loss: 1.0012242794036865,grad_norm: 0.9999992155994949, iteration: 115891
loss: 1.0020617246627808,grad_norm: 0.9999990082458903, iteration: 115892
loss: 1.108217716217041,grad_norm: 0.9999991625728885, iteration: 115893
loss: 0.9948036074638367,grad_norm: 0.9040533046258645, iteration: 115894
loss: 0.9811637997627258,grad_norm: 0.9407814801496901, iteration: 115895
loss: 0.9854261875152588,grad_norm: 0.838854380568919, iteration: 115896
loss: 1.0503631830215454,grad_norm: 0.999999540448405, iteration: 115897
loss: 1.0097566843032837,grad_norm: 0.9999991065005724, iteration: 115898
loss: 1.013505458831787,grad_norm: 0.9999992314988452, iteration: 115899
loss: 1.018170952796936,grad_norm: 0.9999998409899743, iteration: 115900
loss: 0.9860177040100098,grad_norm: 0.9999992738974564, iteration: 115901
loss: 1.031164526939392,grad_norm: 0.8159721694922606, iteration: 115902
loss: 1.0183764696121216,grad_norm: 0.9999991131733581, iteration: 115903
loss: 1.027701735496521,grad_norm: 0.9301758967514782, iteration: 115904
loss: 1.00651216506958,grad_norm: 0.9999990748011024, iteration: 115905
loss: 0.9846696853637695,grad_norm: 0.999999098891842, iteration: 115906
loss: 0.9872814416885376,grad_norm: 0.8716360138230165, iteration: 115907
loss: 0.993962287902832,grad_norm: 0.9999990972274282, iteration: 115908
loss: 1.0071827173233032,grad_norm: 0.9792067738737255, iteration: 115909
loss: 1.0230779647827148,grad_norm: 0.9999991463338793, iteration: 115910
loss: 0.9847050309181213,grad_norm: 0.8929490431759213, iteration: 115911
loss: 0.9917895793914795,grad_norm: 0.9999990390447304, iteration: 115912
loss: 1.0940773487091064,grad_norm: 0.9999996297595312, iteration: 115913
loss: 1.0252244472503662,grad_norm: 0.9961012874982416, iteration: 115914
loss: 0.9819478988647461,grad_norm: 0.9541327943220861, iteration: 115915
loss: 0.9788399934768677,grad_norm: 0.9999991523845154, iteration: 115916
loss: 0.9929008483886719,grad_norm: 0.999999468906407, iteration: 115917
loss: 1.0123590230941772,grad_norm: 0.9999990601466596, iteration: 115918
loss: 0.9930717349052429,grad_norm: 0.999999304451876, iteration: 115919
loss: 0.9963157176971436,grad_norm: 0.9999991876824095, iteration: 115920
loss: 1.0178803205490112,grad_norm: 0.999999141066321, iteration: 115921
loss: 0.9790599942207336,grad_norm: 0.9835584799781375, iteration: 115922
loss: 1.0161340236663818,grad_norm: 0.9999990589106403, iteration: 115923
loss: 1.0384325981140137,grad_norm: 0.9999991239548697, iteration: 115924
loss: 0.9789246916770935,grad_norm: 0.9999991093641352, iteration: 115925
loss: 1.0091629028320312,grad_norm: 0.9048329486807377, iteration: 115926
loss: 1.015561580657959,grad_norm: 0.9999992728755223, iteration: 115927
loss: 0.9958945512771606,grad_norm: 0.9999991520336, iteration: 115928
loss: 0.9792040586471558,grad_norm: 0.8994408041798809, iteration: 115929
loss: 0.9993162751197815,grad_norm: 0.9243371372618765, iteration: 115930
loss: 1.040724515914917,grad_norm: 0.9999991281950019, iteration: 115931
loss: 1.0072544813156128,grad_norm: 0.9492063708166705, iteration: 115932
loss: 0.9576645493507385,grad_norm: 0.9999990842258893, iteration: 115933
loss: 1.045478105545044,grad_norm: 0.9999991233511706, iteration: 115934
loss: 0.9754208922386169,grad_norm: 0.9999992934660041, iteration: 115935
loss: 0.9814658761024475,grad_norm: 0.9999991343802586, iteration: 115936
loss: 1.0236560106277466,grad_norm: 0.9999992099811559, iteration: 115937
loss: 0.9797654151916504,grad_norm: 0.9999996541859099, iteration: 115938
loss: 1.0196603536605835,grad_norm: 0.9445505150217927, iteration: 115939
loss: 1.0058470964431763,grad_norm: 0.9999991973078242, iteration: 115940
loss: 0.9943521618843079,grad_norm: 0.9593944504561657, iteration: 115941
loss: 1.0316028594970703,grad_norm: 0.9999991250465822, iteration: 115942
loss: 0.9661173820495605,grad_norm: 0.9999991179760362, iteration: 115943
loss: 1.0081895589828491,grad_norm: 0.9999991026159796, iteration: 115944
loss: 1.0091804265975952,grad_norm: 0.9451276359821517, iteration: 115945
loss: 1.0099245309829712,grad_norm: 0.9999992154539087, iteration: 115946
loss: 1.023995041847229,grad_norm: 0.9999990643944869, iteration: 115947
loss: 1.0259097814559937,grad_norm: 0.9999992306750628, iteration: 115948
loss: 0.9607236981391907,grad_norm: 0.9893217805950655, iteration: 115949
loss: 1.0122743844985962,grad_norm: 0.9949968548897097, iteration: 115950
loss: 1.0010188817977905,grad_norm: 0.9999990450666464, iteration: 115951
loss: 0.9940230846405029,grad_norm: 0.9306156173427489, iteration: 115952
loss: 1.0171397924423218,grad_norm: 0.9751366897677484, iteration: 115953
loss: 0.9725227355957031,grad_norm: 0.9999990309805822, iteration: 115954
loss: 0.9899255037307739,grad_norm: 0.999999107521906, iteration: 115955
loss: 1.0222376585006714,grad_norm: 0.9999993280531371, iteration: 115956
loss: 0.9875447154045105,grad_norm: 0.96902863421172, iteration: 115957
loss: 0.9859467148780823,grad_norm: 0.8541938405865088, iteration: 115958
loss: 1.0062309503555298,grad_norm: 0.9979897295931281, iteration: 115959
loss: 0.9754394888877869,grad_norm: 0.9635867694278943, iteration: 115960
loss: 0.9872404932975769,grad_norm: 0.9999991847069134, iteration: 115961
loss: 0.9758480191230774,grad_norm: 0.9999991309742258, iteration: 115962
loss: 1.0039098262786865,grad_norm: 0.9509228577620098, iteration: 115963
loss: 1.0154516696929932,grad_norm: 0.9683517275869766, iteration: 115964
loss: 0.9739176630973816,grad_norm: 0.8764148313447014, iteration: 115965
loss: 0.9670848250389099,grad_norm: 0.9133332466906426, iteration: 115966
loss: 1.0262287855148315,grad_norm: 0.7930023578771829, iteration: 115967
loss: 1.0351834297180176,grad_norm: 0.9999991310657631, iteration: 115968
loss: 1.0444940328598022,grad_norm: 0.9999991070854873, iteration: 115969
loss: 1.0502203702926636,grad_norm: 0.9999991297690122, iteration: 115970
loss: 1.0355520248413086,grad_norm: 0.9459581490273571, iteration: 115971
loss: 0.9988844990730286,grad_norm: 0.9328625913415349, iteration: 115972
loss: 1.006156086921692,grad_norm: 0.999999274467512, iteration: 115973
loss: 0.9878637790679932,grad_norm: 0.9999992868590709, iteration: 115974
loss: 1.0024771690368652,grad_norm: 0.9144859488504385, iteration: 115975
loss: 1.0103126764297485,grad_norm: 0.9999992267572562, iteration: 115976
loss: 1.0246038436889648,grad_norm: 0.999999653251514, iteration: 115977
loss: 1.0114446878433228,grad_norm: 0.9999990959373076, iteration: 115978
loss: 0.9929825663566589,grad_norm: 0.9999990883333816, iteration: 115979
loss: 0.9782564043998718,grad_norm: 0.9999989855558511, iteration: 115980
loss: 0.9808378219604492,grad_norm: 0.9892703017051521, iteration: 115981
loss: 0.9838721752166748,grad_norm: 0.8691134945582395, iteration: 115982
loss: 0.9809390902519226,grad_norm: 0.9999990098700515, iteration: 115983
loss: 0.9952695965766907,grad_norm: 0.9961289109666088, iteration: 115984
loss: 1.0014638900756836,grad_norm: 0.9999990900174994, iteration: 115985
loss: 1.0087960958480835,grad_norm: 0.9999990687409136, iteration: 115986
loss: 1.0269719362258911,grad_norm: 0.9804039762157538, iteration: 115987
loss: 0.9966005086898804,grad_norm: 0.999999314397734, iteration: 115988
loss: 0.9859645366668701,grad_norm: 0.9185204227619649, iteration: 115989
loss: 0.995630145072937,grad_norm: 0.9999991422137038, iteration: 115990
loss: 0.9804666042327881,grad_norm: 0.999751175159965, iteration: 115991
loss: 1.0060726404190063,grad_norm: 0.999999191704299, iteration: 115992
loss: 1.0098694562911987,grad_norm: 0.9999992107733827, iteration: 115993
loss: 0.9932039976119995,grad_norm: 0.9999991546582866, iteration: 115994
loss: 0.9744577407836914,grad_norm: 0.9545307393619997, iteration: 115995
loss: 0.9934924244880676,grad_norm: 0.9999991681823195, iteration: 115996
loss: 0.9929481148719788,grad_norm: 0.9999992340519593, iteration: 115997
loss: 1.0154974460601807,grad_norm: 0.9999989647699205, iteration: 115998
loss: 1.0204787254333496,grad_norm: 0.9999991897576945, iteration: 115999
loss: 1.0083966255187988,grad_norm: 0.999999130078537, iteration: 116000
loss: 0.9880430102348328,grad_norm: 0.9999991271175057, iteration: 116001
loss: 1.0385960340499878,grad_norm: 0.9999991287599785, iteration: 116002
loss: 0.9700804948806763,grad_norm: 0.9999993597061952, iteration: 116003
loss: 0.9891206622123718,grad_norm: 0.9670772509463029, iteration: 116004
loss: 0.9918164610862732,grad_norm: 0.9999991242927597, iteration: 116005
loss: 1.0234417915344238,grad_norm: 0.9999990751366007, iteration: 116006
loss: 1.0330007076263428,grad_norm: 0.999999088285102, iteration: 116007
loss: 0.9706988334655762,grad_norm: 0.9579355846489381, iteration: 116008
loss: 0.9784470796585083,grad_norm: 0.8983868268838325, iteration: 116009
loss: 0.9947958588600159,grad_norm: 0.9999991502624724, iteration: 116010
loss: 1.0352224111557007,grad_norm: 0.9999990900137654, iteration: 116011
loss: 1.0346662998199463,grad_norm: 0.8822094102607239, iteration: 116012
loss: 1.0085200071334839,grad_norm: 0.9999990814076005, iteration: 116013
loss: 0.968802809715271,grad_norm: 0.9999991826472776, iteration: 116014
loss: 1.0047733783721924,grad_norm: 0.9999990657823689, iteration: 116015
loss: 0.9629676342010498,grad_norm: 0.9916300861605696, iteration: 116016
loss: 1.0137474536895752,grad_norm: 0.9999990698985187, iteration: 116017
loss: 0.966636061668396,grad_norm: 0.9512278204359198, iteration: 116018
loss: 0.9987173080444336,grad_norm: 0.9999990721005618, iteration: 116019
loss: 1.030502438545227,grad_norm: 0.9999990533375305, iteration: 116020
loss: 0.9875062108039856,grad_norm: 0.9999991424634905, iteration: 116021
loss: 1.011156439781189,grad_norm: 0.9999992681273315, iteration: 116022
loss: 0.9921277165412903,grad_norm: 0.9999990818446965, iteration: 116023
loss: 0.9933081269264221,grad_norm: 0.9986006481987542, iteration: 116024
loss: 0.9478243589401245,grad_norm: 0.9999990870256212, iteration: 116025
loss: 0.9986456632614136,grad_norm: 0.9999991656965462, iteration: 116026
loss: 0.9717294573783875,grad_norm: 0.8641505684737173, iteration: 116027
loss: 1.0148545503616333,grad_norm: 0.9860284926748201, iteration: 116028
loss: 1.0105544328689575,grad_norm: 0.9999990745118192, iteration: 116029
loss: 1.1989825963974,grad_norm: 0.9999994979799902, iteration: 116030
loss: 0.994512677192688,grad_norm: 0.9316538741050658, iteration: 116031
loss: 1.0235358476638794,grad_norm: 0.9895928296710941, iteration: 116032
loss: 0.961467444896698,grad_norm: 0.934206566851658, iteration: 116033
loss: 1.014621376991272,grad_norm: 0.9999992139249334, iteration: 116034
loss: 1.0293601751327515,grad_norm: 0.9999992872558734, iteration: 116035
loss: 0.9422317743301392,grad_norm: 0.9999991258683447, iteration: 116036
loss: 0.9717527031898499,grad_norm: 0.9999991328648111, iteration: 116037
loss: 0.9872515201568604,grad_norm: 0.8896760374429473, iteration: 116038
loss: 1.001693606376648,grad_norm: 0.9999990445504355, iteration: 116039
loss: 1.0168017148971558,grad_norm: 0.8924127306528133, iteration: 116040
loss: 0.9860783815383911,grad_norm: 0.8545385891695136, iteration: 116041
loss: 1.0044671297073364,grad_norm: 0.9999991802440796, iteration: 116042
loss: 0.9665600657463074,grad_norm: 0.9999992640611678, iteration: 116043
loss: 0.9766305685043335,grad_norm: 0.9912577494906809, iteration: 116044
loss: 0.9769536852836609,grad_norm: 0.999999167181599, iteration: 116045
loss: 0.9908445477485657,grad_norm: 0.8781472657885877, iteration: 116046
loss: 1.0214054584503174,grad_norm: 0.9999991915886096, iteration: 116047
loss: 1.0239202976226807,grad_norm: 0.9517670077966162, iteration: 116048
loss: 1.009653925895691,grad_norm: 0.999999191255481, iteration: 116049
loss: 0.9596512317657471,grad_norm: 0.9999991136253459, iteration: 116050
loss: 0.9715913534164429,grad_norm: 0.9999991010995668, iteration: 116051
loss: 0.9830883741378784,grad_norm: 0.9999993463416289, iteration: 116052
loss: 0.9776257872581482,grad_norm: 0.9999992480566446, iteration: 116053
loss: 0.9962102770805359,grad_norm: 0.9347452575222089, iteration: 116054
loss: 1.0102310180664062,grad_norm: 0.9999992633884097, iteration: 116055
loss: 1.0020238161087036,grad_norm: 0.9999990794468775, iteration: 116056
loss: 0.9484710693359375,grad_norm: 0.9999991766942534, iteration: 116057
loss: 1.0272682905197144,grad_norm: 0.9999990038248645, iteration: 116058
loss: 1.0148234367370605,grad_norm: 0.9999990082644677, iteration: 116059
loss: 0.9963136911392212,grad_norm: 0.9999991435224809, iteration: 116060
loss: 0.963800847530365,grad_norm: 0.9999989481426942, iteration: 116061
loss: 0.9550496935844421,grad_norm: 0.8934920935127555, iteration: 116062
loss: 0.9992554783821106,grad_norm: 0.9999989942475409, iteration: 116063
loss: 1.0104573965072632,grad_norm: 0.9999991707284751, iteration: 116064
loss: 1.0251710414886475,grad_norm: 0.999999147819418, iteration: 116065
loss: 0.9897745847702026,grad_norm: 0.9113352627701192, iteration: 116066
loss: 0.9752622246742249,grad_norm: 0.9999989992663781, iteration: 116067
loss: 1.0140436887741089,grad_norm: 0.9753609556023324, iteration: 116068
loss: 0.9819799661636353,grad_norm: 0.9897816748436513, iteration: 116069
loss: 1.0188722610473633,grad_norm: 0.9759954170685611, iteration: 116070
loss: 1.0121015310287476,grad_norm: 0.9662674258968508, iteration: 116071
loss: 1.0308667421340942,grad_norm: 0.9898336288766638, iteration: 116072
loss: 0.9987448453903198,grad_norm: 0.8457279405406712, iteration: 116073
loss: 1.0413422584533691,grad_norm: 0.9999991683964471, iteration: 116074
loss: 1.0295040607452393,grad_norm: 0.9999993273673538, iteration: 116075
loss: 0.9858546257019043,grad_norm: 0.9999993386109637, iteration: 116076
loss: 1.0049275159835815,grad_norm: 0.9999992054627119, iteration: 116077
loss: 0.9674872159957886,grad_norm: 0.9999991387109616, iteration: 116078
loss: 0.9883511066436768,grad_norm: 0.9908086643991018, iteration: 116079
loss: 1.0007964372634888,grad_norm: 0.976384340998686, iteration: 116080
loss: 0.9930185079574585,grad_norm: 0.9999988506062211, iteration: 116081
loss: 0.969744861125946,grad_norm: 0.9999991382089448, iteration: 116082
loss: 1.0497783422470093,grad_norm: 0.9999992548714088, iteration: 116083
loss: 1.035253882408142,grad_norm: 0.999998981904597, iteration: 116084
loss: 1.0073442459106445,grad_norm: 0.9272070084104542, iteration: 116085
loss: 1.026076316833496,grad_norm: 0.9999991584515979, iteration: 116086
loss: 1.0052025318145752,grad_norm: 0.9999991785095879, iteration: 116087
loss: 1.0107171535491943,grad_norm: 0.8513459385627075, iteration: 116088
loss: 0.9732120037078857,grad_norm: 0.9999990783158579, iteration: 116089
loss: 1.0361932516098022,grad_norm: 0.9999995797199823, iteration: 116090
loss: 0.9804156422615051,grad_norm: 0.9999990034214894, iteration: 116091
loss: 1.0218067169189453,grad_norm: 0.9999992986379103, iteration: 116092
loss: 0.9738389253616333,grad_norm: 0.9828540654294264, iteration: 116093
loss: 1.0195401906967163,grad_norm: 0.9999996041935054, iteration: 116094
loss: 1.0130468606948853,grad_norm: 0.9336410988219849, iteration: 116095
loss: 0.996942400932312,grad_norm: 0.9257871813360609, iteration: 116096
loss: 1.0224493741989136,grad_norm: 0.9999993049245637, iteration: 116097
loss: 1.0353721380233765,grad_norm: 0.9999990613769417, iteration: 116098
loss: 1.0008736848831177,grad_norm: 0.9999991266000552, iteration: 116099
loss: 1.0324021577835083,grad_norm: 0.951827741795149, iteration: 116100
loss: 0.9930851459503174,grad_norm: 0.9999994537322469, iteration: 116101
loss: 0.9888189435005188,grad_norm: 0.9999991399407876, iteration: 116102
loss: 1.0337573289871216,grad_norm: 0.9999991532661451, iteration: 116103
loss: 0.965901792049408,grad_norm: 0.8621245245510516, iteration: 116104
loss: 1.016208529472351,grad_norm: 0.9999990309677768, iteration: 116105
loss: 0.9870579838752747,grad_norm: 0.9999991724413623, iteration: 116106
loss: 1.019097089767456,grad_norm: 0.9320548112112417, iteration: 116107
loss: 1.0067802667617798,grad_norm: 0.919259546544362, iteration: 116108
loss: 1.003780722618103,grad_norm: 0.9999991693156671, iteration: 116109
loss: 0.9770771861076355,grad_norm: 0.8617134281421411, iteration: 116110
loss: 0.9764196276664734,grad_norm: 0.9300236185505133, iteration: 116111
loss: 0.9604964852333069,grad_norm: 0.9999991407816838, iteration: 116112
loss: 1.0091207027435303,grad_norm: 0.9999991116461886, iteration: 116113
loss: 0.9521389603614807,grad_norm: 0.999999115236353, iteration: 116114
loss: 1.0008891820907593,grad_norm: 0.999999027547208, iteration: 116115
loss: 1.0078284740447998,grad_norm: 0.9999992145261538, iteration: 116116
loss: 0.9650142192840576,grad_norm: 0.999999346279958, iteration: 116117
loss: 0.994289755821228,grad_norm: 0.9710168651082672, iteration: 116118
loss: 1.0148032903671265,grad_norm: 0.9999990745717358, iteration: 116119
loss: 0.9876582622528076,grad_norm: 0.9999991314007773, iteration: 116120
loss: 0.9782655239105225,grad_norm: 0.999999264253021, iteration: 116121
loss: 0.9913709759712219,grad_norm: 0.9999992371993721, iteration: 116122
loss: 1.0146793127059937,grad_norm: 0.9714558105314386, iteration: 116123
loss: 0.9800260066986084,grad_norm: 0.9857026344630928, iteration: 116124
loss: 1.0359653234481812,grad_norm: 0.9999993566818514, iteration: 116125
loss: 1.0043312311172485,grad_norm: 0.9999990378268893, iteration: 116126
loss: 1.0356441736221313,grad_norm: 0.924371886267729, iteration: 116127
loss: 0.989600658416748,grad_norm: 0.8340596259026922, iteration: 116128
loss: 1.0186028480529785,grad_norm: 0.9999989882785757, iteration: 116129
loss: 1.0194858312606812,grad_norm: 0.9999989077147919, iteration: 116130
loss: 0.9919700026512146,grad_norm: 0.9999990550283752, iteration: 116131
loss: 1.0032894611358643,grad_norm: 0.999999091322127, iteration: 116132
loss: 0.9896129965782166,grad_norm: 0.9999991415833565, iteration: 116133
loss: 1.0487803220748901,grad_norm: 0.9999993982665736, iteration: 116134
loss: 1.0118401050567627,grad_norm: 0.861132701627154, iteration: 116135
loss: 0.9654344320297241,grad_norm: 0.999999262425772, iteration: 116136
loss: 0.9812413454055786,grad_norm: 0.8528499145287017, iteration: 116137
loss: 0.986936628818512,grad_norm: 0.9366143312116902, iteration: 116138
loss: 0.9922518730163574,grad_norm: 0.9364284183043342, iteration: 116139
loss: 1.0083041191101074,grad_norm: 0.9999990659252428, iteration: 116140
loss: 1.042649507522583,grad_norm: 0.9728088382892796, iteration: 116141
loss: 1.0161305665969849,grad_norm: 0.9999991461699956, iteration: 116142
loss: 1.0011245012283325,grad_norm: 0.7382408089261497, iteration: 116143
loss: 0.9602412581443787,grad_norm: 0.9998789353080294, iteration: 116144
loss: 0.9839231967926025,grad_norm: 0.9999992257753824, iteration: 116145
loss: 1.0061882734298706,grad_norm: 0.9697112415382365, iteration: 116146
loss: 0.9844862222671509,grad_norm: 0.9999991950335889, iteration: 116147
loss: 0.9778617024421692,grad_norm: 0.9340013408288381, iteration: 116148
loss: 1.0764940977096558,grad_norm: 0.9999996736920215, iteration: 116149
loss: 0.983704686164856,grad_norm: 0.999999208340866, iteration: 116150
loss: 1.0415916442871094,grad_norm: 0.9999988883121785, iteration: 116151
loss: 1.006801962852478,grad_norm: 0.9616667110285829, iteration: 116152
loss: 1.0292716026306152,grad_norm: 0.9999994611614905, iteration: 116153
loss: 0.9857273697853088,grad_norm: 0.9999991265373374, iteration: 116154
loss: 0.9948803782463074,grad_norm: 0.9903757847758433, iteration: 116155
loss: 1.0178184509277344,grad_norm: 0.9999992735445777, iteration: 116156
loss: 0.9999678134918213,grad_norm: 0.9486587477837733, iteration: 116157
loss: 1.0085028409957886,grad_norm: 0.9551132955685014, iteration: 116158
loss: 1.0119320154190063,grad_norm: 0.9999989732350665, iteration: 116159
loss: 1.0101639032363892,grad_norm: 0.9999990705758451, iteration: 116160
loss: 1.0251682996749878,grad_norm: 0.9605665501848577, iteration: 116161
loss: 0.9930739402770996,grad_norm: 0.9684805898543927, iteration: 116162
loss: 1.017038106918335,grad_norm: 0.8833909331686134, iteration: 116163
loss: 0.9950221180915833,grad_norm: 0.9999991249377409, iteration: 116164
loss: 0.9946073293685913,grad_norm: 0.9927312003614868, iteration: 116165
loss: 0.9627196788787842,grad_norm: 0.999999107318791, iteration: 116166
loss: 0.9933252930641174,grad_norm: 0.9999989617282844, iteration: 116167
loss: 0.9386739730834961,grad_norm: 0.9999990825831924, iteration: 116168
loss: 0.9898723363876343,grad_norm: 0.999999157877993, iteration: 116169
loss: 0.998757541179657,grad_norm: 0.9999992133064658, iteration: 116170
loss: 0.9556965827941895,grad_norm: 0.9999990085839119, iteration: 116171
loss: 1.0073671340942383,grad_norm: 0.8959492197389853, iteration: 116172
loss: 1.0291939973831177,grad_norm: 0.9831899815486022, iteration: 116173
loss: 0.9956636428833008,grad_norm: 0.962710614943274, iteration: 116174
loss: 0.9954373240470886,grad_norm: 0.9999990985228292, iteration: 116175
loss: 1.0061769485473633,grad_norm: 0.9999992590816956, iteration: 116176
loss: 0.9806312322616577,grad_norm: 0.9999992975086897, iteration: 116177
loss: 0.984002411365509,grad_norm: 0.957118004228945, iteration: 116178
loss: 1.0074533224105835,grad_norm: 0.9999991779891575, iteration: 116179
loss: 0.9853929877281189,grad_norm: 0.8872052726512685, iteration: 116180
loss: 0.9760060906410217,grad_norm: 0.9310518884598672, iteration: 116181
loss: 1.043279767036438,grad_norm: 0.9999993192418113, iteration: 116182
loss: 1.0014005899429321,grad_norm: 0.9999992604815596, iteration: 116183
loss: 1.002846360206604,grad_norm: 0.9741655742504408, iteration: 116184
loss: 1.0251436233520508,grad_norm: 0.8920896112936347, iteration: 116185
loss: 0.9872452020645142,grad_norm: 0.9999990665184406, iteration: 116186
loss: 0.9917069673538208,grad_norm: 0.9999992444758663, iteration: 116187
loss: 1.0005645751953125,grad_norm: 0.9999990664558379, iteration: 116188
loss: 0.9787337183952332,grad_norm: 0.9999992316099112, iteration: 116189
loss: 0.9764970541000366,grad_norm: 0.999999139065233, iteration: 116190
loss: 0.9698671102523804,grad_norm: 0.9071329302547321, iteration: 116191
loss: 0.9837809801101685,grad_norm: 0.9999992317789121, iteration: 116192
loss: 1.0279535055160522,grad_norm: 0.8455167458521115, iteration: 116193
loss: 0.9590413570404053,grad_norm: 0.8643030842764566, iteration: 116194
loss: 0.9954543113708496,grad_norm: 0.9999991091557202, iteration: 116195
loss: 0.9840564131736755,grad_norm: 0.9999991519616559, iteration: 116196
loss: 1.007473349571228,grad_norm: 0.9067588831575233, iteration: 116197
loss: 0.9832422137260437,grad_norm: 0.9586591598736164, iteration: 116198
loss: 0.9774079322814941,grad_norm: 0.9899100940194291, iteration: 116199
loss: 1.0114057064056396,grad_norm: 0.9702531624672349, iteration: 116200
loss: 1.0070593357086182,grad_norm: 0.9999990216105428, iteration: 116201
loss: 0.987941324710846,grad_norm: 0.9999992545754863, iteration: 116202
loss: 0.9688676595687866,grad_norm: 0.9585488635226804, iteration: 116203
loss: 1.0204404592514038,grad_norm: 0.9999994854033126, iteration: 116204
loss: 0.9928420782089233,grad_norm: 0.9999989457245692, iteration: 116205
loss: 1.0451328754425049,grad_norm: 0.9924251222527871, iteration: 116206
loss: 1.012035846710205,grad_norm: 0.9999991589420995, iteration: 116207
loss: 1.0081782341003418,grad_norm: 0.9999991030444101, iteration: 116208
loss: 1.0095103979110718,grad_norm: 0.9979136666535879, iteration: 116209
loss: 0.9829137325286865,grad_norm: 0.9999992015285226, iteration: 116210
loss: 0.958118736743927,grad_norm: 0.9999991390143002, iteration: 116211
loss: 1.0050904750823975,grad_norm: 0.9999991522748958, iteration: 116212
loss: 0.9822319149971008,grad_norm: 0.9737432983280039, iteration: 116213
loss: 0.979664146900177,grad_norm: 0.9695588264433809, iteration: 116214
loss: 0.9899590611457825,grad_norm: 0.9188159816752914, iteration: 116215
loss: 1.0123579502105713,grad_norm: 0.9999990115388822, iteration: 116216
loss: 1.0170408487319946,grad_norm: 0.9999989593044996, iteration: 116217
loss: 0.9843800663948059,grad_norm: 0.9504600612968341, iteration: 116218
loss: 1.001877784729004,grad_norm: 0.9316568298372511, iteration: 116219
loss: 0.9921692609786987,grad_norm: 0.9999992366908874, iteration: 116220
loss: 0.9810084700584412,grad_norm: 0.9999991225165328, iteration: 116221
loss: 1.015207290649414,grad_norm: 0.9999992752137357, iteration: 116222
loss: 0.9812096953392029,grad_norm: 0.9598109907670394, iteration: 116223
loss: 1.031196117401123,grad_norm: 0.9999990490826637, iteration: 116224
loss: 1.0042251348495483,grad_norm: 0.9999991345813568, iteration: 116225
loss: 0.945037305355072,grad_norm: 0.8902600245014418, iteration: 116226
loss: 0.982746422290802,grad_norm: 0.9999990671694516, iteration: 116227
loss: 1.067696452140808,grad_norm: 0.9999991747658296, iteration: 116228
loss: 1.0370150804519653,grad_norm: 0.9999991375345644, iteration: 116229
loss: 1.01044499874115,grad_norm: 0.9963119282843892, iteration: 116230
loss: 1.008899450302124,grad_norm: 0.9999992089966713, iteration: 116231
loss: 0.9907159805297852,grad_norm: 0.9999991539111329, iteration: 116232
loss: 1.0119274854660034,grad_norm: 0.9999990908431676, iteration: 116233
loss: 1.001389741897583,grad_norm: 0.9999991701729622, iteration: 116234
loss: 0.9832751154899597,grad_norm: 0.9828113401719579, iteration: 116235
loss: 0.9670663475990295,grad_norm: 0.9999990401229701, iteration: 116236
loss: 1.0361920595169067,grad_norm: 0.9999989944180279, iteration: 116237
loss: 1.0240260362625122,grad_norm: 0.9999999572939986, iteration: 116238
loss: 0.9902547001838684,grad_norm: 0.9999991550817362, iteration: 116239
loss: 1.0353484153747559,grad_norm: 0.9999989564079247, iteration: 116240
loss: 1.0089184045791626,grad_norm: 0.9999992069477571, iteration: 116241
loss: 0.9713117480278015,grad_norm: 0.8699078126934222, iteration: 116242
loss: 1.04813814163208,grad_norm: 0.9999990392991401, iteration: 116243
loss: 1.0294348001480103,grad_norm: 0.9999991150477257, iteration: 116244
loss: 1.010025978088379,grad_norm: 0.9999990261856143, iteration: 116245
loss: 0.9964817762374878,grad_norm: 0.9999989242780949, iteration: 116246
loss: 0.9583993554115295,grad_norm: 0.8677027757693881, iteration: 116247
loss: 0.9770839214324951,grad_norm: 0.9999988641747548, iteration: 116248
loss: 1.0049973726272583,grad_norm: 0.99999904328046, iteration: 116249
loss: 1.0196595191955566,grad_norm: 0.9999991881424853, iteration: 116250
loss: 1.0006060600280762,grad_norm: 0.999999137295302, iteration: 116251
loss: 0.9913472533226013,grad_norm: 0.946242658191507, iteration: 116252
loss: 1.0234588384628296,grad_norm: 0.9225004759878525, iteration: 116253
loss: 0.9959789514541626,grad_norm: 0.8982169891320909, iteration: 116254
loss: 1.000619888305664,grad_norm: 0.9999991346596968, iteration: 116255
loss: 1.038198471069336,grad_norm: 0.9999990199754978, iteration: 116256
loss: 1.0157685279846191,grad_norm: 0.9999991165265575, iteration: 116257
loss: 1.0161242485046387,grad_norm: 0.9478925196155324, iteration: 116258
loss: 0.9855030179023743,grad_norm: 0.9999991994304029, iteration: 116259
loss: 1.0072648525238037,grad_norm: 0.9894934415273947, iteration: 116260
loss: 1.0351463556289673,grad_norm: 0.960141217554289, iteration: 116261
loss: 1.0049188137054443,grad_norm: 0.9433863983619067, iteration: 116262
loss: 0.9862641096115112,grad_norm: 0.9607709764578156, iteration: 116263
loss: 1.0223513841629028,grad_norm: 0.9999988984251179, iteration: 116264
loss: 0.9934158325195312,grad_norm: 0.9999992446198108, iteration: 116265
loss: 0.9813305139541626,grad_norm: 0.9999993366893967, iteration: 116266
loss: 0.9868636131286621,grad_norm: 0.9647101853236262, iteration: 116267
loss: 1.0067179203033447,grad_norm: 0.9347512851267459, iteration: 116268
loss: 0.9765574336051941,grad_norm: 0.9999992166863232, iteration: 116269
loss: 0.9980278015136719,grad_norm: 0.9470950674232894, iteration: 116270
loss: 1.0041342973709106,grad_norm: 0.9999992197439633, iteration: 116271
loss: 0.9844701886177063,grad_norm: 0.9999990862781316, iteration: 116272
loss: 1.009340524673462,grad_norm: 0.9999990556584631, iteration: 116273
loss: 0.9942511320114136,grad_norm: 0.999999005487412, iteration: 116274
loss: 1.0016993284225464,grad_norm: 0.9999992651549187, iteration: 116275
loss: 0.9685727953910828,grad_norm: 0.9999992063722009, iteration: 116276
loss: 0.9740447998046875,grad_norm: 0.9999989958122661, iteration: 116277
loss: 1.0001230239868164,grad_norm: 0.9999990745476743, iteration: 116278
loss: 1.0156387090682983,grad_norm: 0.9999992509006748, iteration: 116279
loss: 1.0146188735961914,grad_norm: 0.9999991684275725, iteration: 116280
loss: 1.0097875595092773,grad_norm: 0.9999993321706024, iteration: 116281
loss: 1.0209895372390747,grad_norm: 0.9999989815821116, iteration: 116282
loss: 1.0217148065567017,grad_norm: 0.9041152344829457, iteration: 116283
loss: 1.0156770944595337,grad_norm: 0.9999990070210014, iteration: 116284
loss: 1.0019497871398926,grad_norm: 0.9738135260204804, iteration: 116285
loss: 1.0161340236663818,grad_norm: 0.9999990907742647, iteration: 116286
loss: 1.000771164894104,grad_norm: 0.9431498105542376, iteration: 116287
loss: 1.0506806373596191,grad_norm: 0.9694137402777269, iteration: 116288
loss: 1.0403010845184326,grad_norm: 0.9999991513004859, iteration: 116289
loss: 1.00300931930542,grad_norm: 0.9918790490230864, iteration: 116290
loss: 0.9915964007377625,grad_norm: 0.7626878065618564, iteration: 116291
loss: 1.004936933517456,grad_norm: 0.9999990239933297, iteration: 116292
loss: 0.9954610466957092,grad_norm: 0.9999990347821344, iteration: 116293
loss: 0.9810146689414978,grad_norm: 0.9999991127578985, iteration: 116294
loss: 1.0138165950775146,grad_norm: 0.9999990913642977, iteration: 116295
loss: 1.0049324035644531,grad_norm: 0.9999992128319756, iteration: 116296
loss: 1.078857660293579,grad_norm: 0.9999998914276109, iteration: 116297
loss: 0.9797801375389099,grad_norm: 0.8582163604856577, iteration: 116298
loss: 0.9709259271621704,grad_norm: 0.999999178973674, iteration: 116299
loss: 1.0178686380386353,grad_norm: 0.9999991334005487, iteration: 116300
loss: 0.9971359372138977,grad_norm: 0.9800698485485038, iteration: 116301
loss: 1.0263718366622925,grad_norm: 0.9999992382018038, iteration: 116302
loss: 1.0096369981765747,grad_norm: 0.999999141676871, iteration: 116303
loss: 1.004529356956482,grad_norm: 0.9999993210415044, iteration: 116304
loss: 1.0139001607894897,grad_norm: 0.9999992757893923, iteration: 116305
loss: 0.995136022567749,grad_norm: 0.89688706458407, iteration: 116306
loss: 1.0238350629806519,grad_norm: 0.94690720113564, iteration: 116307
loss: 1.0047773122787476,grad_norm: 0.9999990894914534, iteration: 116308
loss: 0.987292468547821,grad_norm: 0.9999991979696535, iteration: 116309
loss: 0.9867649078369141,grad_norm: 0.999999074567295, iteration: 116310
loss: 1.0238889455795288,grad_norm: 0.9999990918868022, iteration: 116311
loss: 1.0073695182800293,grad_norm: 0.888002082768929, iteration: 116312
loss: 0.9620359539985657,grad_norm: 0.9999989109191735, iteration: 116313
loss: 0.995654284954071,grad_norm: 0.9999991059372283, iteration: 116314
loss: 1.0096193552017212,grad_norm: 0.9999994068452418, iteration: 116315
loss: 1.0051151514053345,grad_norm: 0.9871055451007479, iteration: 116316
loss: 1.0215795040130615,grad_norm: 0.9999991773736321, iteration: 116317
loss: 1.0169669389724731,grad_norm: 0.9999993846061689, iteration: 116318
loss: 0.971770167350769,grad_norm: 0.9662450876200727, iteration: 116319
loss: 0.9974290728569031,grad_norm: 0.9999990455510102, iteration: 116320
loss: 0.9854893684387207,grad_norm: 0.9999990789085693, iteration: 116321
loss: 0.980701744556427,grad_norm: 0.8830540176784933, iteration: 116322
loss: 1.0080429315567017,grad_norm: 0.999999145526437, iteration: 116323
loss: 0.9820553064346313,grad_norm: 0.9776857627872609, iteration: 116324
loss: 0.9959967732429504,grad_norm: 0.9723741876321549, iteration: 116325
loss: 0.9785173535346985,grad_norm: 0.9555261684915914, iteration: 116326
loss: 0.9907758235931396,grad_norm: 0.9999992201131603, iteration: 116327
loss: 0.9923084378242493,grad_norm: 0.9999990780641602, iteration: 116328
loss: 0.9946090579032898,grad_norm: 0.9999992155000852, iteration: 116329
loss: 0.9674319624900818,grad_norm: 0.9128480971167368, iteration: 116330
loss: 0.9856503009796143,grad_norm: 0.9490452446301001, iteration: 116331
loss: 1.006257176399231,grad_norm: 0.9999991273693581, iteration: 116332
loss: 0.974820613861084,grad_norm: 0.9999992185577316, iteration: 116333
loss: 0.9830659031867981,grad_norm: 0.9554597577225686, iteration: 116334
loss: 0.9669013619422913,grad_norm: 0.9773301559030828, iteration: 116335
loss: 0.9885269403457642,grad_norm: 0.9999990488411633, iteration: 116336
loss: 0.9866543412208557,grad_norm: 0.9487591930413083, iteration: 116337
loss: 1.0337504148483276,grad_norm: 0.9952760051783417, iteration: 116338
loss: 1.0052483081817627,grad_norm: 0.9999991584842055, iteration: 116339
loss: 1.0081791877746582,grad_norm: 0.9992494857349866, iteration: 116340
loss: 0.9939602017402649,grad_norm: 0.9826044769848631, iteration: 116341
loss: 1.0221656560897827,grad_norm: 0.9999989750557107, iteration: 116342
loss: 1.0162032842636108,grad_norm: 0.9999992178363584, iteration: 116343
loss: 0.9749712347984314,grad_norm: 0.9999991958813966, iteration: 116344
loss: 1.011004090309143,grad_norm: 0.9999990435849797, iteration: 116345
loss: 0.9728024005889893,grad_norm: 0.9999992427279643, iteration: 116346
loss: 1.0017454624176025,grad_norm: 0.9963222506920351, iteration: 116347
loss: 0.9654108285903931,grad_norm: 0.9808486595072493, iteration: 116348
loss: 1.0603187084197998,grad_norm: 0.9999997870207683, iteration: 116349
loss: 1.0015870332717896,grad_norm: 0.9999991540204284, iteration: 116350
loss: 1.0130201578140259,grad_norm: 0.9254100535512408, iteration: 116351
loss: 1.0199780464172363,grad_norm: 0.9811467300124413, iteration: 116352
loss: 1.0009188652038574,grad_norm: 0.9999991468763052, iteration: 116353
loss: 1.0258138179779053,grad_norm: 0.9999991075540406, iteration: 116354
loss: 1.0045603513717651,grad_norm: 0.8893310118006202, iteration: 116355
loss: 0.9974334836006165,grad_norm: 0.9999991342323007, iteration: 116356
loss: 1.004255771636963,grad_norm: 0.9999993157972318, iteration: 116357
loss: 1.0334115028381348,grad_norm: 0.9999991015573605, iteration: 116358
loss: 1.043204426765442,grad_norm: 0.9999995264429282, iteration: 116359
loss: 1.026233434677124,grad_norm: 0.9795775575504214, iteration: 116360
loss: 1.0310522317886353,grad_norm: 0.9782356589182882, iteration: 116361
loss: 0.9976857304573059,grad_norm: 0.9999991832899598, iteration: 116362
loss: 0.9872727394104004,grad_norm: 0.8331521428957976, iteration: 116363
loss: 1.0226274728775024,grad_norm: 0.9788950433900279, iteration: 116364
loss: 0.9931560754776001,grad_norm: 0.8728587920717205, iteration: 116365
loss: 1.0379620790481567,grad_norm: 0.897212786081279, iteration: 116366
loss: 1.0057363510131836,grad_norm: 0.9999990670253304, iteration: 116367
loss: 1.0902976989746094,grad_norm: 0.9999998919965003, iteration: 116368
loss: 0.9906983375549316,grad_norm: 0.956980427676547, iteration: 116369
loss: 1.0155727863311768,grad_norm: 0.9980264693792439, iteration: 116370
loss: 0.9771453738212585,grad_norm: 0.8404443280666322, iteration: 116371
loss: 0.984447181224823,grad_norm: 0.89188069488751, iteration: 116372
loss: 1.0352901220321655,grad_norm: 0.9999991193676304, iteration: 116373
loss: 1.0191624164581299,grad_norm: 0.9768937613086275, iteration: 116374
loss: 1.0368322134017944,grad_norm: 0.9573082090253942, iteration: 116375
loss: 0.998224139213562,grad_norm: 0.9299734851944521, iteration: 116376
loss: 1.0340242385864258,grad_norm: 0.9870544541298407, iteration: 116377
loss: 1.0337657928466797,grad_norm: 0.9493686010286032, iteration: 116378
loss: 0.9437668919563293,grad_norm: 0.999999363484782, iteration: 116379
loss: 0.9765518307685852,grad_norm: 0.9821924457348936, iteration: 116380
loss: 1.0211577415466309,grad_norm: 0.9853571834149094, iteration: 116381
loss: 0.9940433502197266,grad_norm: 0.9999991432897164, iteration: 116382
loss: 1.004430890083313,grad_norm: 0.9999991331256576, iteration: 116383
loss: 0.9982430338859558,grad_norm: 0.9999994600276997, iteration: 116384
loss: 1.0278347730636597,grad_norm: 0.9999988974865968, iteration: 116385
loss: 1.001856803894043,grad_norm: 0.9999990914026182, iteration: 116386
loss: 0.9963963627815247,grad_norm: 0.9916421858797067, iteration: 116387
loss: 1.02212655544281,grad_norm: 0.9999992294938171, iteration: 116388
loss: 1.0131571292877197,grad_norm: 0.9999998473784135, iteration: 116389
loss: 0.9949147701263428,grad_norm: 0.9862421701293407, iteration: 116390
loss: 0.979723334312439,grad_norm: 0.999999320535911, iteration: 116391
loss: 0.9979805946350098,grad_norm: 0.9999991441971735, iteration: 116392
loss: 1.0052697658538818,grad_norm: 0.9999993322380205, iteration: 116393
loss: 0.9659876227378845,grad_norm: 0.9999992986565022, iteration: 116394
loss: 0.99747234582901,grad_norm: 0.8319431779201858, iteration: 116395
loss: 0.9972281455993652,grad_norm: 0.9999992339147616, iteration: 116396
loss: 1.0123194456100464,grad_norm: 0.9668302225416674, iteration: 116397
loss: 0.9865811467170715,grad_norm: 0.9999991808399809, iteration: 116398
loss: 0.969933032989502,grad_norm: 0.9814371906013526, iteration: 116399
loss: 0.9975706338882446,grad_norm: 0.9999991433287979, iteration: 116400
loss: 1.0028126239776611,grad_norm: 0.9999997429118972, iteration: 116401
loss: 0.9681829214096069,grad_norm: 0.9138500938582481, iteration: 116402
loss: 1.015477180480957,grad_norm: 0.9999991618427008, iteration: 116403
loss: 1.049269437789917,grad_norm: 0.9890635197527112, iteration: 116404
loss: 0.9925652146339417,grad_norm: 0.9962737276304804, iteration: 116405
loss: 0.9825379848480225,grad_norm: 0.9999991345682471, iteration: 116406
loss: 1.008810043334961,grad_norm: 0.9999990680343407, iteration: 116407
loss: 1.0076241493225098,grad_norm: 0.802462303166128, iteration: 116408
loss: 0.9814649224281311,grad_norm: 0.9999990768019075, iteration: 116409
loss: 1.028913140296936,grad_norm: 0.999999502374798, iteration: 116410
loss: 1.0036290884017944,grad_norm: 0.9702408341572585, iteration: 116411
loss: 0.9879311919212341,grad_norm: 0.9999992157556866, iteration: 116412
loss: 1.0172063112258911,grad_norm: 0.9999991416866131, iteration: 116413
loss: 0.9710928797721863,grad_norm: 0.9999991840787541, iteration: 116414
loss: 0.9816834926605225,grad_norm: 0.9616950885948213, iteration: 116415
loss: 1.002231240272522,grad_norm: 0.9999991847410109, iteration: 116416
loss: 1.000754952430725,grad_norm: 0.9749216660021679, iteration: 116417
loss: 1.0159317255020142,grad_norm: 0.9174066949180334, iteration: 116418
loss: 0.9999551177024841,grad_norm: 0.9999990904554323, iteration: 116419
loss: 0.9625237584114075,grad_norm: 0.9999991771121999, iteration: 116420
loss: 0.9786903262138367,grad_norm: 0.9999991063189669, iteration: 116421
loss: 0.9744794368743896,grad_norm: 0.9999991370016513, iteration: 116422
loss: 1.001031756401062,grad_norm: 0.9999991502606789, iteration: 116423
loss: 1.048530101776123,grad_norm: 0.9768633664357949, iteration: 116424
loss: 0.9848400354385376,grad_norm: 0.9999991658828457, iteration: 116425
loss: 1.034279465675354,grad_norm: 0.9999991061009361, iteration: 116426
loss: 1.0190426111221313,grad_norm: 0.9999991122691023, iteration: 116427
loss: 1.0535916090011597,grad_norm: 0.999999415441663, iteration: 116428
loss: 1.013245940208435,grad_norm: 0.9034927549734539, iteration: 116429
loss: 1.1062544584274292,grad_norm: 0.9999997960530798, iteration: 116430
loss: 1.0063414573669434,grad_norm: 0.9999991759181726, iteration: 116431
loss: 0.9755033254623413,grad_norm: 0.9999990827989313, iteration: 116432
loss: 1.012152075767517,grad_norm: 0.9214177098757933, iteration: 116433
loss: 1.021072268486023,grad_norm: 0.9738265036705249, iteration: 116434
loss: 1.0127172470092773,grad_norm: 0.9999991298111337, iteration: 116435
loss: 1.011635661125183,grad_norm: 0.9826929221469044, iteration: 116436
loss: 0.9717673063278198,grad_norm: 0.9801141044319323, iteration: 116437
loss: 1.0812060832977295,grad_norm: 0.9999991138915907, iteration: 116438
loss: 1.0545576810836792,grad_norm: 0.9999996906760933, iteration: 116439
loss: 0.9922516345977783,grad_norm: 0.9999991574635476, iteration: 116440
loss: 1.0220166444778442,grad_norm: 0.9331989474537276, iteration: 116441
loss: 1.112216830253601,grad_norm: 0.9793560641900957, iteration: 116442
loss: 1.155338168144226,grad_norm: 0.9999992484171959, iteration: 116443
loss: 1.0211116075515747,grad_norm: 0.9999991407922336, iteration: 116444
loss: 1.0219980478286743,grad_norm: 0.9106934660124041, iteration: 116445
loss: 1.0237483978271484,grad_norm: 0.9999991025183548, iteration: 116446
loss: 1.040867567062378,grad_norm: 0.9999991610064719, iteration: 116447
loss: 1.0376091003417969,grad_norm: 0.9139592189068844, iteration: 116448
loss: 1.0222924947738647,grad_norm: 0.9999992568830494, iteration: 116449
loss: 1.0111664533615112,grad_norm: 0.9999993284214769, iteration: 116450
loss: 0.9890416860580444,grad_norm: 0.9999993067517774, iteration: 116451
loss: 0.9931620359420776,grad_norm: 0.9999990821348217, iteration: 116452
loss: 1.0275359153747559,grad_norm: 0.9999993219979674, iteration: 116453
loss: 0.9833028316497803,grad_norm: 0.9999991539259724, iteration: 116454
loss: 0.9685596227645874,grad_norm: 0.8229958215892632, iteration: 116455
loss: 0.9811692833900452,grad_norm: 0.9759032870825165, iteration: 116456
loss: 0.9736261367797852,grad_norm: 0.9999992191264914, iteration: 116457
loss: 0.9782544374465942,grad_norm: 0.9445694997351413, iteration: 116458
loss: 1.0018424987792969,grad_norm: 0.9999992275900882, iteration: 116459
loss: 0.9969443082809448,grad_norm: 0.9999991078456149, iteration: 116460
loss: 1.0343620777130127,grad_norm: 0.9999991569993608, iteration: 116461
loss: 0.9918428659439087,grad_norm: 0.9999993524914672, iteration: 116462
loss: 1.0072152614593506,grad_norm: 0.8828660059977681, iteration: 116463
loss: 1.0031983852386475,grad_norm: 0.9999992206593592, iteration: 116464
loss: 0.9879677891731262,grad_norm: 0.8630594049578489, iteration: 116465
loss: 1.0134625434875488,grad_norm: 0.9119690606564332, iteration: 116466
loss: 1.005121111869812,grad_norm: 0.9999991826728879, iteration: 116467
loss: 0.9635538458824158,grad_norm: 0.9538510700687043, iteration: 116468
loss: 0.984362781047821,grad_norm: 0.9736632098555932, iteration: 116469
loss: 1.002996563911438,grad_norm: 0.9721446666191371, iteration: 116470
loss: 0.9980794191360474,grad_norm: 0.9302292485740624, iteration: 116471
loss: 1.0236327648162842,grad_norm: 0.9999992272476119, iteration: 116472
loss: 0.9781256318092346,grad_norm: 0.9485655366179246, iteration: 116473
loss: 0.9812512993812561,grad_norm: 0.8702161022771574, iteration: 116474
loss: 1.0022448301315308,grad_norm: 0.9999991791104093, iteration: 116475
loss: 1.020016074180603,grad_norm: 0.9999991460579101, iteration: 116476
loss: 1.0191481113433838,grad_norm: 0.9999990831654304, iteration: 116477
loss: 0.9872147440910339,grad_norm: 0.9110865203784444, iteration: 116478
loss: 1.0285100936889648,grad_norm: 0.9999991588361952, iteration: 116479
loss: 0.9843357801437378,grad_norm: 0.8559485628219363, iteration: 116480
loss: 1.0153589248657227,grad_norm: 0.9999990830859244, iteration: 116481
loss: 0.9711962342262268,grad_norm: 0.9999992781597815, iteration: 116482
loss: 1.0109230279922485,grad_norm: 0.9625413309080261, iteration: 116483
loss: 1.0194976329803467,grad_norm: 0.9999992011279629, iteration: 116484
loss: 1.0635658502578735,grad_norm: 0.999999242545697, iteration: 116485
loss: 1.002531886100769,grad_norm: 0.9999992351843796, iteration: 116486
loss: 1.0876340866088867,grad_norm: 0.9999999286861389, iteration: 116487
loss: 1.0336735248565674,grad_norm: 0.9999991599342872, iteration: 116488
loss: 0.9952006340026855,grad_norm: 0.9568561927161054, iteration: 116489
loss: 1.013542890548706,grad_norm: 0.8797994587133717, iteration: 116490
loss: 0.9770318865776062,grad_norm: 0.9999991614471542, iteration: 116491
loss: 1.0150775909423828,grad_norm: 0.9999992173188336, iteration: 116492
loss: 1.0092271566390991,grad_norm: 0.9374863959966084, iteration: 116493
loss: 0.9831612706184387,grad_norm: 0.9999990922029893, iteration: 116494
loss: 1.0042399168014526,grad_norm: 0.9999991480356923, iteration: 116495
loss: 1.006784439086914,grad_norm: 0.9999990492269364, iteration: 116496
loss: 1.0337469577789307,grad_norm: 0.9999991858198235, iteration: 116497
loss: 1.0181634426116943,grad_norm: 0.9999990617965188, iteration: 116498
loss: 1.0320384502410889,grad_norm: 0.993478307691997, iteration: 116499
loss: 0.9651985764503479,grad_norm: 0.9999991809156844, iteration: 116500
loss: 0.9823006391525269,grad_norm: 0.9999992673153065, iteration: 116501
loss: 0.9771807193756104,grad_norm: 0.9999991485735021, iteration: 116502
loss: 1.0077344179153442,grad_norm: 0.99999904359805, iteration: 116503
loss: 0.9789927005767822,grad_norm: 0.9999990751628239, iteration: 116504
loss: 0.9791728258132935,grad_norm: 0.9539304785562964, iteration: 116505
loss: 1.0576547384262085,grad_norm: 0.9999991562001173, iteration: 116506
loss: 0.9777919054031372,grad_norm: 0.9999990255444801, iteration: 116507
loss: 0.9798124432563782,grad_norm: 0.999999171605181, iteration: 116508
loss: 1.0243295431137085,grad_norm: 0.9999990329572331, iteration: 116509
loss: 1.0153729915618896,grad_norm: 0.9722743307361935, iteration: 116510
loss: 0.9545549154281616,grad_norm: 0.999999141363935, iteration: 116511
loss: 0.9970145225524902,grad_norm: 0.9999990868434312, iteration: 116512
loss: 0.957080066204071,grad_norm: 0.9976778818419845, iteration: 116513
loss: 0.9779230952262878,grad_norm: 0.9999990946007101, iteration: 116514
loss: 0.9839309453964233,grad_norm: 0.9999992587241406, iteration: 116515
loss: 1.0071738958358765,grad_norm: 0.9999991264576403, iteration: 116516
loss: 0.9993692636489868,grad_norm: 0.9999991439382503, iteration: 116517
loss: 1.0097849369049072,grad_norm: 0.9999990845495546, iteration: 116518
loss: 1.005553960800171,grad_norm: 0.9999992717946313, iteration: 116519
loss: 1.0056499242782593,grad_norm: 0.9999992133670402, iteration: 116520
loss: 1.0108442306518555,grad_norm: 0.999999073615521, iteration: 116521
loss: 1.027650237083435,grad_norm: 0.8245161626879559, iteration: 116522
loss: 0.9672597646713257,grad_norm: 0.9148183048758699, iteration: 116523
loss: 1.015202522277832,grad_norm: 0.9811294102160181, iteration: 116524
loss: 0.9863689541816711,grad_norm: 0.9653193025938519, iteration: 116525
loss: 0.9902011752128601,grad_norm: 0.8877244477490572, iteration: 116526
loss: 0.9855002760887146,grad_norm: 0.8931721965256548, iteration: 116527
loss: 1.003174066543579,grad_norm: 0.972792839457349, iteration: 116528
loss: 0.9917815923690796,grad_norm: 0.9999991299165, iteration: 116529
loss: 1.0009446144104004,grad_norm: 0.9999991944098928, iteration: 116530
loss: 0.9789673089981079,grad_norm: 0.9999990728366202, iteration: 116531
loss: 1.0296523571014404,grad_norm: 0.9999991172268681, iteration: 116532
loss: 1.0007681846618652,grad_norm: 0.9999992398248265, iteration: 116533
loss: 1.0002366304397583,grad_norm: 0.9671627794501435, iteration: 116534
loss: 1.019156575202942,grad_norm: 0.9849774203913123, iteration: 116535
loss: 1.013027548789978,grad_norm: 0.8843047914804754, iteration: 116536
loss: 0.9722086191177368,grad_norm: 0.8932791711186382, iteration: 116537
loss: 1.0402634143829346,grad_norm: 0.999999108947505, iteration: 116538
loss: 1.0197142362594604,grad_norm: 0.9496636683790501, iteration: 116539
loss: 0.9900377988815308,grad_norm: 0.9651899928943181, iteration: 116540
loss: 1.0049269199371338,grad_norm: 0.9999990254870625, iteration: 116541
loss: 0.9779269099235535,grad_norm: 0.9999990065494873, iteration: 116542
loss: 1.0201750993728638,grad_norm: 0.9505678844674081, iteration: 116543
loss: 0.9543265700340271,grad_norm: 0.9999991419205717, iteration: 116544
loss: 0.9997701644897461,grad_norm: 0.937218188507199, iteration: 116545
loss: 1.0075409412384033,grad_norm: 0.9999991125436806, iteration: 116546
loss: 0.9886375069618225,grad_norm: 0.9999990817266367, iteration: 116547
loss: 1.0086164474487305,grad_norm: 0.9999991778508442, iteration: 116548
loss: 1.0062284469604492,grad_norm: 0.9999991873196175, iteration: 116549
loss: 0.9834442138671875,grad_norm: 0.9583914150954654, iteration: 116550
loss: 0.9983217716217041,grad_norm: 0.9999990692001544, iteration: 116551
loss: 1.0048413276672363,grad_norm: 0.9999990152022659, iteration: 116552
loss: 1.0258476734161377,grad_norm: 0.9999992029605559, iteration: 116553
loss: 0.9749917984008789,grad_norm: 0.8488815788038417, iteration: 116554
loss: 0.9861747622489929,grad_norm: 0.9999994388467256, iteration: 116555
loss: 1.0284178256988525,grad_norm: 0.9999989595753384, iteration: 116556
loss: 1.0221551656723022,grad_norm: 0.9837223909326822, iteration: 116557
loss: 1.0005958080291748,grad_norm: 0.9999991584369017, iteration: 116558
loss: 1.0155280828475952,grad_norm: 0.9999991226364705, iteration: 116559
loss: 1.0122243165969849,grad_norm: 0.999999079080135, iteration: 116560
loss: 1.014535665512085,grad_norm: 0.9999990934223711, iteration: 116561
loss: 0.9800162315368652,grad_norm: 0.9999991206482243, iteration: 116562
loss: 0.9686545729637146,grad_norm: 0.8821937447951222, iteration: 116563
loss: 0.9849117398262024,grad_norm: 0.9999991623345041, iteration: 116564
loss: 0.9662425518035889,grad_norm: 0.9999991296420458, iteration: 116565
loss: 0.9902012348175049,grad_norm: 0.9999992223373088, iteration: 116566
loss: 0.960843563079834,grad_norm: 0.9805231105504595, iteration: 116567
loss: 1.0070310831069946,grad_norm: 0.9999990670789777, iteration: 116568
loss: 0.9868928790092468,grad_norm: 0.9999991877247475, iteration: 116569
loss: 1.0241918563842773,grad_norm: 0.9999992915956134, iteration: 116570
loss: 0.9876919388771057,grad_norm: 0.9999991926919626, iteration: 116571
loss: 1.0206960439682007,grad_norm: 0.999999061861599, iteration: 116572
loss: 1.0181196928024292,grad_norm: 0.9999990430514548, iteration: 116573
loss: 1.0000711679458618,grad_norm: 0.9999992065649865, iteration: 116574
loss: 1.0077323913574219,grad_norm: 0.9999989710824068, iteration: 116575
loss: 1.0157500505447388,grad_norm: 0.9335332281393791, iteration: 116576
loss: 1.037014365196228,grad_norm: 0.9999998751484552, iteration: 116577
loss: 1.0057657957077026,grad_norm: 0.9999989620340026, iteration: 116578
loss: 1.0119102001190186,grad_norm: 0.9465661091584575, iteration: 116579
loss: 0.9657136797904968,grad_norm: 0.9999991356673853, iteration: 116580
loss: 0.9969657063484192,grad_norm: 0.9999990734300328, iteration: 116581
loss: 0.9951468110084534,grad_norm: 0.9999990498940965, iteration: 116582
loss: 1.0177277326583862,grad_norm: 0.9999990327511189, iteration: 116583
loss: 1.0098356008529663,grad_norm: 0.9640398862267855, iteration: 116584
loss: 0.989054262638092,grad_norm: 0.9999990368428975, iteration: 116585
loss: 0.9801149368286133,grad_norm: 0.9999991906306123, iteration: 116586
loss: 0.9579659104347229,grad_norm: 0.9650811710467425, iteration: 116587
loss: 0.9861753582954407,grad_norm: 0.9999990833303954, iteration: 116588
loss: 1.018059253692627,grad_norm: 0.999998930497413, iteration: 116589
loss: 1.0202915668487549,grad_norm: 0.9999990390500078, iteration: 116590
loss: 0.9739276766777039,grad_norm: 0.9773099338264748, iteration: 116591
loss: 1.0400277376174927,grad_norm: 0.9546114297920003, iteration: 116592
loss: 1.0083853006362915,grad_norm: 0.9083973499617056, iteration: 116593
loss: 1.0069636106491089,grad_norm: 0.9999990796598281, iteration: 116594
loss: 0.9940095543861389,grad_norm: 0.9999989812052217, iteration: 116595
loss: 1.0230607986450195,grad_norm: 0.9999989742257297, iteration: 116596
loss: 0.9911364316940308,grad_norm: 0.9767906030453466, iteration: 116597
loss: 1.0072267055511475,grad_norm: 0.9999991737144311, iteration: 116598
loss: 1.021931529045105,grad_norm: 0.8866281386103425, iteration: 116599
loss: 0.9680305123329163,grad_norm: 0.9999990740840827, iteration: 116600
loss: 0.9745833277702332,grad_norm: 0.9146210401735998, iteration: 116601
loss: 0.9944301247596741,grad_norm: 0.9999990681529709, iteration: 116602
loss: 0.9900282025337219,grad_norm: 0.9332151523841692, iteration: 116603
loss: 0.9918901920318604,grad_norm: 0.999999267167682, iteration: 116604
loss: 1.0263701677322388,grad_norm: 0.9999990962650669, iteration: 116605
loss: 0.9672600626945496,grad_norm: 0.9674691362162662, iteration: 116606
loss: 0.967606246471405,grad_norm: 0.9999993037795714, iteration: 116607
loss: 1.0143259763717651,grad_norm: 0.9999991060925184, iteration: 116608
loss: 1.0096310377120972,grad_norm: 0.9999990593241961, iteration: 116609
loss: 1.129561424255371,grad_norm: 0.9999998741822446, iteration: 116610
loss: 1.0325080156326294,grad_norm: 0.9999991553008685, iteration: 116611
loss: 1.0001623630523682,grad_norm: 0.9142194505117093, iteration: 116612
loss: 0.9996176958084106,grad_norm: 0.8863870908000332, iteration: 116613
loss: 1.013067364692688,grad_norm: 0.9999993998335196, iteration: 116614
loss: 0.9433638453483582,grad_norm: 0.9999990799519312, iteration: 116615
loss: 0.9973607659339905,grad_norm: 0.9999990819676425, iteration: 116616
loss: 0.9954749345779419,grad_norm: 0.9935754603935052, iteration: 116617
loss: 0.9649373292922974,grad_norm: 0.9999992550610567, iteration: 116618
loss: 0.983081579208374,grad_norm: 0.9999992706864712, iteration: 116619
loss: 1.003270149230957,grad_norm: 0.9902385309469528, iteration: 116620
loss: 1.0231380462646484,grad_norm: 0.9406733631518003, iteration: 116621
loss: 1.031424880027771,grad_norm: 0.9725473721949104, iteration: 116622
loss: 0.9951889514923096,grad_norm: 0.9999990554496421, iteration: 116623
loss: 1.0075832605361938,grad_norm: 0.9999991766985684, iteration: 116624
loss: 0.9820742011070251,grad_norm: 0.9999991172462669, iteration: 116625
loss: 1.0377305746078491,grad_norm: 0.9999992392553114, iteration: 116626
loss: 1.0082900524139404,grad_norm: 0.9999992075075654, iteration: 116627
loss: 1.0073082447052002,grad_norm: 0.9999989734967671, iteration: 116628
loss: 0.992146372795105,grad_norm: 0.8783362623965866, iteration: 116629
loss: 1.0133047103881836,grad_norm: 0.9938720527212133, iteration: 116630
loss: 1.023627519607544,grad_norm: 0.9999990113209343, iteration: 116631
loss: 1.0185973644256592,grad_norm: 0.9999995112818052, iteration: 116632
loss: 1.0138897895812988,grad_norm: 0.9999991822034745, iteration: 116633
loss: 1.004922866821289,grad_norm: 0.9884792183503348, iteration: 116634
loss: 0.9839448928833008,grad_norm: 0.9999991712117218, iteration: 116635
loss: 0.9717204570770264,grad_norm: 0.999999244557635, iteration: 116636
loss: 0.9936562776565552,grad_norm: 0.9999991186164263, iteration: 116637
loss: 0.9977297782897949,grad_norm: 0.9999991903543378, iteration: 116638
loss: 1.0213383436203003,grad_norm: 0.9818452651155861, iteration: 116639
loss: 0.9900723695755005,grad_norm: 0.9999991164424931, iteration: 116640
loss: 0.9788644313812256,grad_norm: 0.9999990672148771, iteration: 116641
loss: 0.9989981651306152,grad_norm: 0.9247020007192858, iteration: 116642
loss: 0.9798672199249268,grad_norm: 0.8176754135244536, iteration: 116643
loss: 1.01246976852417,grad_norm: 0.9999991043567185, iteration: 116644
loss: 1.0433685779571533,grad_norm: 0.9999991170577217, iteration: 116645
loss: 1.008897304534912,grad_norm: 0.9999991561245791, iteration: 116646
loss: 1.0169711112976074,grad_norm: 0.9999991934119569, iteration: 116647
loss: 1.0211756229400635,grad_norm: 0.9999989735408973, iteration: 116648
loss: 1.0117909908294678,grad_norm: 0.9686534603479031, iteration: 116649
loss: 1.0001322031021118,grad_norm: 0.9999990082653639, iteration: 116650
loss: 1.0124921798706055,grad_norm: 0.99999911911967, iteration: 116651
loss: 0.9896747469902039,grad_norm: 0.960968718744976, iteration: 116652
loss: 0.997735321521759,grad_norm: 0.9999990211605521, iteration: 116653
loss: 1.011695146560669,grad_norm: 0.999998999759878, iteration: 116654
loss: 1.010807752609253,grad_norm: 0.9999989845214983, iteration: 116655
loss: 0.984388530254364,grad_norm: 0.9690319685787258, iteration: 116656
loss: 1.0021111965179443,grad_norm: 0.9260843378617678, iteration: 116657
loss: 0.9761760830879211,grad_norm: 0.9999990705634598, iteration: 116658
loss: 0.9792654514312744,grad_norm: 0.9999991919430352, iteration: 116659
loss: 1.027381181716919,grad_norm: 0.9999991932203561, iteration: 116660
loss: 1.0027635097503662,grad_norm: 0.9592096860074879, iteration: 116661
loss: 0.9799485802650452,grad_norm: 0.9999991398910906, iteration: 116662
loss: 0.9917353391647339,grad_norm: 0.9999990032937581, iteration: 116663
loss: 1.006903052330017,grad_norm: 0.8607903560311919, iteration: 116664
loss: 0.9994236826896667,grad_norm: 0.9999991270331556, iteration: 116665
loss: 1.0004634857177734,grad_norm: 0.9999993804153121, iteration: 116666
loss: 0.9698861241340637,grad_norm: 0.9999992085905813, iteration: 116667
loss: 1.0090020895004272,grad_norm: 0.9999992038265243, iteration: 116668
loss: 1.0352445840835571,grad_norm: 0.9999990392588192, iteration: 116669
loss: 0.9859356880187988,grad_norm: 0.9999990952235953, iteration: 116670
loss: 1.0031412839889526,grad_norm: 0.9999989629705093, iteration: 116671
loss: 0.9883268475532532,grad_norm: 0.9999990895403406, iteration: 116672
loss: 0.9855130910873413,grad_norm: 0.9999991647287694, iteration: 116673
loss: 1.0354046821594238,grad_norm: 0.8672154264106384, iteration: 116674
loss: 0.9851453900337219,grad_norm: 0.9999990904454706, iteration: 116675
loss: 0.9529203176498413,grad_norm: 0.9999993249160433, iteration: 116676
loss: 0.9913216233253479,grad_norm: 0.9307192245913603, iteration: 116677
loss: 1.0122171640396118,grad_norm: 0.8854142178656899, iteration: 116678
loss: 0.9917441010475159,grad_norm: 0.9590028463471497, iteration: 116679
loss: 0.9646037220954895,grad_norm: 0.8962899597758558, iteration: 116680
loss: 1.005758285522461,grad_norm: 0.8775865520243444, iteration: 116681
loss: 0.9992477893829346,grad_norm: 0.8838944614906297, iteration: 116682
loss: 1.0365643501281738,grad_norm: 0.9999998048298712, iteration: 116683
loss: 0.9973248243331909,grad_norm: 0.9943300411060797, iteration: 116684
loss: 0.9757593274116516,grad_norm: 0.9999991777531815, iteration: 116685
loss: 0.9741872549057007,grad_norm: 0.9999992531683813, iteration: 116686
loss: 1.027726173400879,grad_norm: 0.9999993073524736, iteration: 116687
loss: 0.9874625205993652,grad_norm: 0.9398090154795739, iteration: 116688
loss: 1.0079834461212158,grad_norm: 0.9988714495362541, iteration: 116689
loss: 0.9846553802490234,grad_norm: 0.9421392178375136, iteration: 116690
loss: 1.0147662162780762,grad_norm: 0.926458537600357, iteration: 116691
loss: 0.978585958480835,grad_norm: 0.8874653364557169, iteration: 116692
loss: 0.9733219146728516,grad_norm: 0.9999991184893727, iteration: 116693
loss: 1.017542839050293,grad_norm: 0.9999991452292174, iteration: 116694
loss: 1.0087037086486816,grad_norm: 0.9473872079022024, iteration: 116695
loss: 0.9753064513206482,grad_norm: 0.9637253939250718, iteration: 116696
loss: 1.0017333030700684,grad_norm: 0.9747356141722752, iteration: 116697
loss: 1.009757161140442,grad_norm: 0.9999994533904668, iteration: 116698
loss: 0.9814676642417908,grad_norm: 0.8406456873690336, iteration: 116699
loss: 0.9842119812965393,grad_norm: 0.9999992442176472, iteration: 116700
loss: 0.9812549352645874,grad_norm: 0.9999992544763362, iteration: 116701
loss: 0.9931163191795349,grad_norm: 0.9634559049355494, iteration: 116702
loss: 1.0022751092910767,grad_norm: 0.9529094212002456, iteration: 116703
loss: 0.9759731292724609,grad_norm: 0.8581275621229112, iteration: 116704
loss: 0.9889322519302368,grad_norm: 0.9999990474419493, iteration: 116705
loss: 1.0109808444976807,grad_norm: 0.9999991540691775, iteration: 116706
loss: 1.0115443468093872,grad_norm: 0.9442039345696706, iteration: 116707
loss: 1.0074659585952759,grad_norm: 0.9999996707472946, iteration: 116708
loss: 1.0148824453353882,grad_norm: 0.9204075666479378, iteration: 116709
loss: 1.0445990562438965,grad_norm: 0.9774969060816515, iteration: 116710
loss: 0.9875340461730957,grad_norm: 0.9999989994666268, iteration: 116711
loss: 1.0013762712478638,grad_norm: 0.9999991975218465, iteration: 116712
loss: 1.0161246061325073,grad_norm: 0.9999992134620805, iteration: 116713
loss: 0.9589912295341492,grad_norm: 0.9999992545022839, iteration: 116714
loss: 1.009085774421692,grad_norm: 0.9999991288132802, iteration: 116715
loss: 1.0002098083496094,grad_norm: 0.9544465113008549, iteration: 116716
loss: 1.0107897520065308,grad_norm: 0.9999990698773689, iteration: 116717
loss: 1.0081684589385986,grad_norm: 0.9999997885486792, iteration: 116718
loss: 0.9751645922660828,grad_norm: 0.9357504788088244, iteration: 116719
loss: 1.0053894519805908,grad_norm: 0.8954192435300603, iteration: 116720
loss: 1.0049052238464355,grad_norm: 0.999999182899264, iteration: 116721
loss: 1.0236307382583618,grad_norm: 0.999999262456246, iteration: 116722
loss: 0.9709608554840088,grad_norm: 0.9999999023138306, iteration: 116723
loss: 0.990886390209198,grad_norm: 0.9661617623972415, iteration: 116724
loss: 0.9749808311462402,grad_norm: 0.9999991243239735, iteration: 116725
loss: 1.002017855644226,grad_norm: 0.9999990494422908, iteration: 116726
loss: 0.9669482111930847,grad_norm: 0.999998989946835, iteration: 116727
loss: 0.9779266715049744,grad_norm: 0.9999990986439682, iteration: 116728
loss: 1.0224663019180298,grad_norm: 0.9999991383687321, iteration: 116729
loss: 1.0010803937911987,grad_norm: 0.855483931660476, iteration: 116730
loss: 1.022761344909668,grad_norm: 0.999999047997628, iteration: 116731
loss: 1.0259172916412354,grad_norm: 0.9730126499132781, iteration: 116732
loss: 0.9741694927215576,grad_norm: 0.8484427939072271, iteration: 116733
loss: 0.9888597726821899,grad_norm: 0.9569835666354144, iteration: 116734
loss: 1.0010082721710205,grad_norm: 0.9999992235472216, iteration: 116735
loss: 0.9670965075492859,grad_norm: 0.9999990765203672, iteration: 116736
loss: 1.022186517715454,grad_norm: 0.9999990093474374, iteration: 116737
loss: 1.0112965106964111,grad_norm: 0.9999991274876446, iteration: 116738
loss: 1.0009690523147583,grad_norm: 0.9041329238486664, iteration: 116739
loss: 1.0078877210617065,grad_norm: 0.879347327608791, iteration: 116740
loss: 0.9794577360153198,grad_norm: 0.7903882068372486, iteration: 116741
loss: 0.9991217851638794,grad_norm: 0.9706003848868046, iteration: 116742
loss: 0.9947095513343811,grad_norm: 0.9999991581190528, iteration: 116743
loss: 0.991392195224762,grad_norm: 0.999999209811315, iteration: 116744
loss: 1.00981867313385,grad_norm: 0.9999991856899981, iteration: 116745
loss: 1.021064043045044,grad_norm: 0.9999991731673669, iteration: 116746
loss: 1.0164343118667603,grad_norm: 0.9272835648592829, iteration: 116747
loss: 1.0136780738830566,grad_norm: 0.9999990882617187, iteration: 116748
loss: 1.0028190612792969,grad_norm: 0.9999991907538357, iteration: 116749
loss: 1.0400561094284058,grad_norm: 0.9999990283700096, iteration: 116750
loss: 1.0135468244552612,grad_norm: 0.9999990965139172, iteration: 116751
loss: 1.0128562450408936,grad_norm: 0.9585985439605336, iteration: 116752
loss: 1.0073339939117432,grad_norm: 0.9658428701475871, iteration: 116753
loss: 0.9900771975517273,grad_norm: 0.9410852360012563, iteration: 116754
loss: 1.0310722589492798,grad_norm: 0.9999993393515446, iteration: 116755
loss: 1.0060622692108154,grad_norm: 0.9200414811076925, iteration: 116756
loss: 1.0162032842636108,grad_norm: 0.9622667994373143, iteration: 116757
loss: 1.0158377885818481,grad_norm: 0.9999994656774716, iteration: 116758
loss: 0.994818925857544,grad_norm: 0.999999208756443, iteration: 116759
loss: 0.9902728199958801,grad_norm: 0.9999991597554244, iteration: 116760
loss: 0.9378468990325928,grad_norm: 0.9909378252650328, iteration: 116761
loss: 0.9875198602676392,grad_norm: 0.9999990174394308, iteration: 116762
loss: 0.9904851913452148,grad_norm: 0.9999991131469735, iteration: 116763
loss: 0.988286018371582,grad_norm: 0.999999276438851, iteration: 116764
loss: 0.9686070680618286,grad_norm: 0.9999991264187392, iteration: 116765
loss: 1.002018690109253,grad_norm: 0.9077131863417219, iteration: 116766
loss: 1.0138145685195923,grad_norm: 0.9999992710841202, iteration: 116767
loss: 0.992104709148407,grad_norm: 0.9999990558940794, iteration: 116768
loss: 0.9686827659606934,grad_norm: 0.999999066570336, iteration: 116769
loss: 1.0184266567230225,grad_norm: 0.9999995803225212, iteration: 116770
loss: 0.9617341160774231,grad_norm: 0.9857815239800478, iteration: 116771
loss: 0.986051619052887,grad_norm: 0.9999991034382876, iteration: 116772
loss: 1.0270835161209106,grad_norm: 0.9999993389945595, iteration: 116773
loss: 0.9805992841720581,grad_norm: 0.9999990940977539, iteration: 116774
loss: 0.983234703540802,grad_norm: 0.9358081717563772, iteration: 116775
loss: 1.0103586912155151,grad_norm: 0.9116897426979294, iteration: 116776
loss: 0.9755504727363586,grad_norm: 0.978581124218924, iteration: 116777
loss: 1.0064083337783813,grad_norm: 0.9999990544061114, iteration: 116778
loss: 0.9978004693984985,grad_norm: 0.946279282788429, iteration: 116779
loss: 1.0364190340042114,grad_norm: 0.9999996148816123, iteration: 116780
loss: 0.9786933660507202,grad_norm: 0.8777038853198103, iteration: 116781
loss: 0.9928443431854248,grad_norm: 0.8370863320115419, iteration: 116782
loss: 0.9625968933105469,grad_norm: 0.9999993373986031, iteration: 116783
loss: 0.9976892471313477,grad_norm: 0.9999992623131078, iteration: 116784
loss: 0.9899922609329224,grad_norm: 0.9999991001705667, iteration: 116785
loss: 1.0313472747802734,grad_norm: 0.999013902201388, iteration: 116786
loss: 0.9735423922538757,grad_norm: 0.994620108787043, iteration: 116787
loss: 0.9917966723442078,grad_norm: 0.9757074329407989, iteration: 116788
loss: 1.0224497318267822,grad_norm: 0.9999993754540919, iteration: 116789
loss: 0.991993248462677,grad_norm: 0.9847928522077991, iteration: 116790
loss: 1.0112226009368896,grad_norm: 0.8896691360505902, iteration: 116791
loss: 0.9911836981773376,grad_norm: 0.9999990407335878, iteration: 116792
loss: 1.0115612745285034,grad_norm: 0.9999990937585014, iteration: 116793
loss: 1.0140734910964966,grad_norm: 0.9999992221476172, iteration: 116794
loss: 1.0018327236175537,grad_norm: 0.9999992252645292, iteration: 116795
loss: 1.0146340131759644,grad_norm: 0.8493638537734117, iteration: 116796
loss: 0.9808587431907654,grad_norm: 0.989092632864539, iteration: 116797
loss: 1.0189218521118164,grad_norm: 0.9358257864490667, iteration: 116798
loss: 1.0125812292099,grad_norm: 0.9999991753009492, iteration: 116799
loss: 0.9894392490386963,grad_norm: 0.9999989435445911, iteration: 116800
loss: 1.0248706340789795,grad_norm: 0.9999992390867148, iteration: 116801
loss: 0.9657406806945801,grad_norm: 0.999999145499366, iteration: 116802
loss: 1.0050021409988403,grad_norm: 0.9999997521462627, iteration: 116803
loss: 0.9771508574485779,grad_norm: 0.9999991752507509, iteration: 116804
loss: 1.0377006530761719,grad_norm: 0.9999996217307723, iteration: 116805
loss: 0.9866582155227661,grad_norm: 0.9914833949174703, iteration: 116806
loss: 1.0054905414581299,grad_norm: 0.8798909181285878, iteration: 116807
loss: 0.9792922735214233,grad_norm: 0.9004269727052562, iteration: 116808
loss: 1.0187488794326782,grad_norm: 0.999999064398827, iteration: 116809
loss: 0.9998548030853271,grad_norm: 0.999998956801895, iteration: 116810
loss: 0.9989279508590698,grad_norm: 0.9244019139999614, iteration: 116811
loss: 0.957599401473999,grad_norm: 0.9999990108801369, iteration: 116812
loss: 1.0440149307250977,grad_norm: 0.9999993296012327, iteration: 116813
loss: 0.9940742254257202,grad_norm: 0.944719601640274, iteration: 116814
loss: 1.0189480781555176,grad_norm: 0.9999989130208488, iteration: 116815
loss: 1.0001804828643799,grad_norm: 0.9999990700898319, iteration: 116816
loss: 0.9829416871070862,grad_norm: 0.9999996843231398, iteration: 116817
loss: 0.9722114205360413,grad_norm: 0.986178876251465, iteration: 116818
loss: 0.9991167783737183,grad_norm: 0.999998942506628, iteration: 116819
loss: 0.9971368908882141,grad_norm: 0.9999991541210794, iteration: 116820
loss: 1.0322448015213013,grad_norm: 0.7727403402311628, iteration: 116821
loss: 0.9945342540740967,grad_norm: 0.9816999978732253, iteration: 116822
loss: 1.0078198909759521,grad_norm: 0.9079064687016261, iteration: 116823
loss: 0.9724053144454956,grad_norm: 0.8448945754738221, iteration: 116824
loss: 0.979282557964325,grad_norm: 0.9999990661205835, iteration: 116825
loss: 1.0059274435043335,grad_norm: 0.9999992283068722, iteration: 116826
loss: 1.0212247371673584,grad_norm: 0.9999992051254303, iteration: 116827
loss: 0.968106746673584,grad_norm: 0.9999991111568741, iteration: 116828
loss: 0.9683049917221069,grad_norm: 0.9999990399611695, iteration: 116829
loss: 0.9991683959960938,grad_norm: 0.8822411278467865, iteration: 116830
loss: 0.9941530227661133,grad_norm: 0.982463220926094, iteration: 116831
loss: 0.9608097672462463,grad_norm: 0.9999991789862579, iteration: 116832
loss: 1.0186283588409424,grad_norm: 0.9923560516628632, iteration: 116833
loss: 1.0283564329147339,grad_norm: 0.9999992553104714, iteration: 116834
loss: 1.0211492776870728,grad_norm: 0.9999991229340464, iteration: 116835
loss: 0.9823625683784485,grad_norm: 0.9999991189526485, iteration: 116836
loss: 1.0197758674621582,grad_norm: 0.9294075034095044, iteration: 116837
loss: 1.0251466035842896,grad_norm: 0.9999993712683475, iteration: 116838
loss: 1.0008456707000732,grad_norm: 0.9999992953741101, iteration: 116839
loss: 0.9772148728370667,grad_norm: 0.9999992140586944, iteration: 116840
loss: 1.0447320938110352,grad_norm: 0.9999991606518943, iteration: 116841
loss: 1.0154716968536377,grad_norm: 0.9164731495138729, iteration: 116842
loss: 1.0319905281066895,grad_norm: 0.9999989759344159, iteration: 116843
loss: 0.9654664993286133,grad_norm: 0.9410088425752215, iteration: 116844
loss: 0.9645809531211853,grad_norm: 0.985394490874202, iteration: 116845
loss: 1.0471129417419434,grad_norm: 0.9999991130305467, iteration: 116846
loss: 0.956932008266449,grad_norm: 0.999999086650207, iteration: 116847
loss: 0.9485328197479248,grad_norm: 0.8806752817959831, iteration: 116848
loss: 1.0080947875976562,grad_norm: 0.9999991217677994, iteration: 116849
loss: 0.9998124241828918,grad_norm: 0.9999991381757781, iteration: 116850
loss: 0.9939939379692078,grad_norm: 0.8898363365316263, iteration: 116851
loss: 1.0224804878234863,grad_norm: 0.999999130197878, iteration: 116852
loss: 0.9955422282218933,grad_norm: 0.999999104610955, iteration: 116853
loss: 0.9685876369476318,grad_norm: 0.969000636830202, iteration: 116854
loss: 0.9999813437461853,grad_norm: 0.823610526484571, iteration: 116855
loss: 0.9654596447944641,grad_norm: 0.9999992808318634, iteration: 116856
loss: 1.0503982305526733,grad_norm: 0.999999211698749, iteration: 116857
loss: 0.9616133570671082,grad_norm: 0.8922159633878606, iteration: 116858
loss: 1.0122629404067993,grad_norm: 0.9999992371465246, iteration: 116859
loss: 1.0246772766113281,grad_norm: 0.9006300588282736, iteration: 116860
loss: 1.018432855606079,grad_norm: 0.9999991376606349, iteration: 116861
loss: 1.1680259704589844,grad_norm: 0.9999992521699992, iteration: 116862
loss: 0.9857040643692017,grad_norm: 0.9999991346131871, iteration: 116863
loss: 1.0075212717056274,grad_norm: 0.8584160674648335, iteration: 116864
loss: 0.9998350739479065,grad_norm: 0.9999991047342962, iteration: 116865
loss: 1.0135284662246704,grad_norm: 0.9999991401080524, iteration: 116866
loss: 1.0128384828567505,grad_norm: 0.9999991557986616, iteration: 116867
loss: 0.9989234805107117,grad_norm: 0.9999991950204229, iteration: 116868
loss: 1.0119096040725708,grad_norm: 0.9999992615965805, iteration: 116869
loss: 0.9904558062553406,grad_norm: 0.999998990033844, iteration: 116870
loss: 0.9795801639556885,grad_norm: 0.9999989788545754, iteration: 116871
loss: 1.0284035205841064,grad_norm: 0.9496054989471752, iteration: 116872
loss: 1.0358144044876099,grad_norm: 0.9999990809541007, iteration: 116873
loss: 0.9997401237487793,grad_norm: 0.9739273166715599, iteration: 116874
loss: 0.9780922532081604,grad_norm: 0.9999992284775479, iteration: 116875
loss: 0.987192690372467,grad_norm: 0.9999992106018832, iteration: 116876
loss: 0.9839879274368286,grad_norm: 0.9999990196518591, iteration: 116877
loss: 1.0233911275863647,grad_norm: 0.9999993132002992, iteration: 116878
loss: 0.9581775069236755,grad_norm: 0.9999991946995521, iteration: 116879
loss: 1.0105156898498535,grad_norm: 0.9999991712000957, iteration: 116880
loss: 0.9965566396713257,grad_norm: 0.9999990511276132, iteration: 116881
loss: 0.9969373941421509,grad_norm: 0.9999992762026285, iteration: 116882
loss: 0.9831198453903198,grad_norm: 0.9999990198100766, iteration: 116883
loss: 1.0091207027435303,grad_norm: 0.9999991222715279, iteration: 116884
loss: 0.954289972782135,grad_norm: 0.9999991660680172, iteration: 116885
loss: 1.0309042930603027,grad_norm: 0.9999991713847842, iteration: 116886
loss: 0.9712087512016296,grad_norm: 0.9999991281523956, iteration: 116887
loss: 0.9609746336936951,grad_norm: 0.9383631807115446, iteration: 116888
loss: 0.9636255502700806,grad_norm: 0.9699459228780806, iteration: 116889
loss: 0.9361237287521362,grad_norm: 0.9999992109511738, iteration: 116890
loss: 0.9770593047142029,grad_norm: 0.9477526303205953, iteration: 116891
loss: 0.9920018315315247,grad_norm: 0.9999991455109021, iteration: 116892
loss: 0.987136960029602,grad_norm: 0.9999991110886736, iteration: 116893
loss: 0.9818671345710754,grad_norm: 0.9999990979396344, iteration: 116894
loss: 0.9970177412033081,grad_norm: 0.9999991980315617, iteration: 116895
loss: 0.9809908866882324,grad_norm: 0.9269692150540734, iteration: 116896
loss: 1.011064887046814,grad_norm: 0.9999990965178984, iteration: 116897
loss: 1.0015130043029785,grad_norm: 0.9999990772712863, iteration: 116898
loss: 0.9704633951187134,grad_norm: 0.9779212089047762, iteration: 116899
loss: 1.0276625156402588,grad_norm: 0.8184503070502831, iteration: 116900
loss: 1.0188252925872803,grad_norm: 0.9894219549671462, iteration: 116901
loss: 0.9910483956336975,grad_norm: 0.9999992886833533, iteration: 116902
loss: 1.0070579051971436,grad_norm: 0.9734423159567231, iteration: 116903
loss: 1.0085915327072144,grad_norm: 0.9999990435584949, iteration: 116904
loss: 0.9992855787277222,grad_norm: 0.9381088710685539, iteration: 116905
loss: 0.9865687489509583,grad_norm: 0.9999990515048738, iteration: 116906
loss: 0.9777488708496094,grad_norm: 0.99999906882274, iteration: 116907
loss: 1.0314873456954956,grad_norm: 0.8925830367865727, iteration: 116908
loss: 1.0075381994247437,grad_norm: 0.9643302869696341, iteration: 116909
loss: 0.9841539859771729,grad_norm: 0.9999991450030816, iteration: 116910
loss: 0.963508665561676,grad_norm: 0.9471126162294673, iteration: 116911
loss: 0.9836921095848083,grad_norm: 0.9999992425453312, iteration: 116912
loss: 1.0285569429397583,grad_norm: 0.9999991616334325, iteration: 116913
loss: 1.0527479648590088,grad_norm: 0.999999519905119, iteration: 116914
loss: 1.0349674224853516,grad_norm: 0.9005694338067047, iteration: 116915
loss: 0.9827369451522827,grad_norm: 0.9999992928507823, iteration: 116916
loss: 0.9637544751167297,grad_norm: 0.9000353555749804, iteration: 116917
loss: 0.985618531703949,grad_norm: 0.9999991213743976, iteration: 116918
loss: 1.022456407546997,grad_norm: 0.9999991526431042, iteration: 116919
loss: 0.9994409680366516,grad_norm: 0.9999990665925708, iteration: 116920
loss: 0.9990516304969788,grad_norm: 0.9999991864549616, iteration: 116921
loss: 1.0412540435791016,grad_norm: 0.9999992803601114, iteration: 116922
loss: 0.99172043800354,grad_norm: 0.999999166500115, iteration: 116923
loss: 0.9960627555847168,grad_norm: 0.9999991598076649, iteration: 116924
loss: 1.0050132274627686,grad_norm: 0.9999990850671442, iteration: 116925
loss: 1.0169200897216797,grad_norm: 0.9999992549623674, iteration: 116926
loss: 0.996163010597229,grad_norm: 0.9999991916769713, iteration: 116927
loss: 1.0735821723937988,grad_norm: 0.9999993950476074, iteration: 116928
loss: 1.0103726387023926,grad_norm: 0.9999990089665531, iteration: 116929
loss: 0.9918835163116455,grad_norm: 0.9999992101152142, iteration: 116930
loss: 0.9821666479110718,grad_norm: 0.9999990222192878, iteration: 116931
loss: 1.0018690824508667,grad_norm: 0.9999991402623393, iteration: 116932
loss: 1.0106221437454224,grad_norm: 0.9999992327910305, iteration: 116933
loss: 1.003514051437378,grad_norm: 0.7982881086217313, iteration: 116934
loss: 1.048331379890442,grad_norm: 0.9999992403641795, iteration: 116935
loss: 0.9958089590072632,grad_norm: 0.9999989249014591, iteration: 116936
loss: 1.00617253780365,grad_norm: 0.9901514620420064, iteration: 116937
loss: 0.9456474781036377,grad_norm: 0.9999991650213373, iteration: 116938
loss: 1.0169882774353027,grad_norm: 0.9413698079506844, iteration: 116939
loss: 0.9569854736328125,grad_norm: 0.999999168580971, iteration: 116940
loss: 0.9804062247276306,grad_norm: 0.9999991054715461, iteration: 116941
loss: 1.0193687677383423,grad_norm: 0.9999991766659688, iteration: 116942
loss: 1.0296841859817505,grad_norm: 0.8602994357418411, iteration: 116943
loss: 0.9991458058357239,grad_norm: 0.9999991903210356, iteration: 116944
loss: 1.0316987037658691,grad_norm: 0.9999990324098504, iteration: 116945
loss: 1.0100017786026,grad_norm: 0.999999802382897, iteration: 116946
loss: 0.9920485019683838,grad_norm: 0.912576771882635, iteration: 116947
loss: 1.0483167171478271,grad_norm: 0.8904699746113407, iteration: 116948
loss: 0.9881248474121094,grad_norm: 0.9999991173887617, iteration: 116949
loss: 0.9859660267829895,grad_norm: 0.999999175300518, iteration: 116950
loss: 0.9926527142524719,grad_norm: 0.9999993295807097, iteration: 116951
loss: 1.0013163089752197,grad_norm: 0.9999990961615829, iteration: 116952
loss: 0.9881536364555359,grad_norm: 0.9999991538509313, iteration: 116953
loss: 1.0078142881393433,grad_norm: 0.9323709795349482, iteration: 116954
loss: 0.9976768493652344,grad_norm: 0.9999990591011374, iteration: 116955
loss: 1.0014941692352295,grad_norm: 0.9999991344339665, iteration: 116956
loss: 1.0185682773590088,grad_norm: 0.9999991421775284, iteration: 116957
loss: 1.0054001808166504,grad_norm: 0.9999991469355195, iteration: 116958
loss: 0.9728763103485107,grad_norm: 0.9971048137865182, iteration: 116959
loss: 0.9783532619476318,grad_norm: 0.9999989853266662, iteration: 116960
loss: 0.9949424862861633,grad_norm: 0.9999992247920313, iteration: 116961
loss: 0.9936937689781189,grad_norm: 0.9999991452587543, iteration: 116962
loss: 0.989556610584259,grad_norm: 0.9999989955570989, iteration: 116963
loss: 1.0052013397216797,grad_norm: 0.9999989865181321, iteration: 116964
loss: 0.9776294827461243,grad_norm: 0.9684081533654221, iteration: 116965
loss: 0.9829211235046387,grad_norm: 0.9999989697190425, iteration: 116966
loss: 1.0125848054885864,grad_norm: 0.9999989699262473, iteration: 116967
loss: 0.9737941026687622,grad_norm: 0.9902462145576396, iteration: 116968
loss: 1.0345196723937988,grad_norm: 0.9840676005254283, iteration: 116969
loss: 0.9841034412384033,grad_norm: 0.9288046100610107, iteration: 116970
loss: 0.9810765981674194,grad_norm: 0.9811268521741068, iteration: 116971
loss: 1.0055276155471802,grad_norm: 0.9999992153578998, iteration: 116972
loss: 1.044815182685852,grad_norm: 0.9999991096882135, iteration: 116973
loss: 0.9977715611457825,grad_norm: 0.9936120657576668, iteration: 116974
loss: 1.0311247110366821,grad_norm: 0.9999996421748689, iteration: 116975
loss: 1.0247613191604614,grad_norm: 0.9999990241490069, iteration: 116976
loss: 1.012058138847351,grad_norm: 0.9329915691783734, iteration: 116977
loss: 1.0219776630401611,grad_norm: 0.9999991223968668, iteration: 116978
loss: 0.9752823114395142,grad_norm: 0.9999991426621347, iteration: 116979
loss: 1.0095890760421753,grad_norm: 0.9999991056334508, iteration: 116980
loss: 0.9838435053825378,grad_norm: 0.999999129158485, iteration: 116981
loss: 0.9793785810470581,grad_norm: 0.9674028118584854, iteration: 116982
loss: 1.0025795698165894,grad_norm: 0.9999989333258011, iteration: 116983
loss: 1.031177282333374,grad_norm: 0.9950616888775938, iteration: 116984
loss: 1.0276362895965576,grad_norm: 0.9999990622129423, iteration: 116985
loss: 1.048404335975647,grad_norm: 0.9925773086969687, iteration: 116986
loss: 1.020703911781311,grad_norm: 0.9272208971628969, iteration: 116987
loss: 1.0034376382827759,grad_norm: 0.7824547361577462, iteration: 116988
loss: 0.9863775372505188,grad_norm: 0.990318980669491, iteration: 116989
loss: 0.9513850212097168,grad_norm: 0.963651974643561, iteration: 116990
loss: 1.0096180438995361,grad_norm: 0.9999990293869576, iteration: 116991
loss: 1.0472265481948853,grad_norm: 0.9999994983679676, iteration: 116992
loss: 0.9799125790596008,grad_norm: 0.9657781001780654, iteration: 116993
loss: 0.9985539317131042,grad_norm: 0.9999992152015692, iteration: 116994
loss: 0.985359251499176,grad_norm: 0.9999991563510571, iteration: 116995
loss: 1.0100667476654053,grad_norm: 0.999998955692776, iteration: 116996
loss: 0.9853936433792114,grad_norm: 0.9999990550305495, iteration: 116997
loss: 0.9605352282524109,grad_norm: 0.9999990697791656, iteration: 116998
loss: 0.9860506057739258,grad_norm: 0.9999991644661766, iteration: 116999
loss: 1.0405287742614746,grad_norm: 0.999999334260556, iteration: 117000
loss: 1.065864086151123,grad_norm: 0.999999597008521, iteration: 117001
loss: 1.002142071723938,grad_norm: 0.9999990110443756, iteration: 117002
loss: 0.9739937782287598,grad_norm: 0.9647103968595464, iteration: 117003
loss: 0.9850379824638367,grad_norm: 0.962484019897362, iteration: 117004
loss: 1.0093938112258911,grad_norm: 0.9141674216018001, iteration: 117005
loss: 1.0171512365341187,grad_norm: 0.9999991616175425, iteration: 117006
loss: 1.0090614557266235,grad_norm: 0.9999990047846551, iteration: 117007
loss: 1.0060861110687256,grad_norm: 0.796495308062246, iteration: 117008
loss: 1.0330002307891846,grad_norm: 0.9999993077901768, iteration: 117009
loss: 0.9704437255859375,grad_norm: 0.9999991416741113, iteration: 117010
loss: 1.0178028345108032,grad_norm: 0.9999991495846835, iteration: 117011
loss: 0.99903404712677,grad_norm: 0.9892033308026167, iteration: 117012
loss: 0.9886398315429688,grad_norm: 0.9999989683951569, iteration: 117013
loss: 0.9688583612442017,grad_norm: 0.999999097795065, iteration: 117014
loss: 1.0098649263381958,grad_norm: 0.9860315655261817, iteration: 117015
loss: 1.0245059728622437,grad_norm: 0.9999991692481258, iteration: 117016
loss: 0.9943042993545532,grad_norm: 0.8554693445280842, iteration: 117017
loss: 0.987328827381134,grad_norm: 0.9999990772168006, iteration: 117018
loss: 0.9860328435897827,grad_norm: 0.9999992987428733, iteration: 117019
loss: 0.9785609245300293,grad_norm: 0.9999992520014347, iteration: 117020
loss: 1.009772777557373,grad_norm: 0.9641326809736377, iteration: 117021
loss: 0.9979888796806335,grad_norm: 0.9999991259685923, iteration: 117022
loss: 0.9460166096687317,grad_norm: 0.999999167016622, iteration: 117023
loss: 1.0240284204483032,grad_norm: 0.9438974547892209, iteration: 117024
loss: 0.964272141456604,grad_norm: 0.9710482778004904, iteration: 117025
loss: 1.0450034141540527,grad_norm: 0.9999993044470877, iteration: 117026
loss: 0.9864542484283447,grad_norm: 0.9999990252185915, iteration: 117027
loss: 1.009106993675232,grad_norm: 0.9410240342754347, iteration: 117028
loss: 0.9826420545578003,grad_norm: 0.9999990886712425, iteration: 117029
loss: 0.9584954380989075,grad_norm: 0.9999991657981444, iteration: 117030
loss: 0.99465012550354,grad_norm: 0.9772446997387995, iteration: 117031
loss: 0.9789074659347534,grad_norm: 0.9999993319963119, iteration: 117032
loss: 1.0217260122299194,grad_norm: 0.9383197536158093, iteration: 117033
loss: 1.0341646671295166,grad_norm: 0.9965903655920094, iteration: 117034
loss: 1.0190069675445557,grad_norm: 0.9999991842433407, iteration: 117035
loss: 1.0178128480911255,grad_norm: 0.8237820177745815, iteration: 117036
loss: 1.004536747932434,grad_norm: 0.9999992750594431, iteration: 117037
loss: 0.9752869009971619,grad_norm: 0.8705703589104796, iteration: 117038
loss: 0.9966772794723511,grad_norm: 0.9999991087402053, iteration: 117039
loss: 1.0193413496017456,grad_norm: 0.8269571792430301, iteration: 117040
loss: 0.9865214228630066,grad_norm: 0.9999990796111199, iteration: 117041
loss: 1.0007115602493286,grad_norm: 0.9999991436284952, iteration: 117042
loss: 1.0722129344940186,grad_norm: 0.9999993356355797, iteration: 117043
loss: 0.9738946557044983,grad_norm: 0.9999991996878361, iteration: 117044
loss: 1.034454584121704,grad_norm: 0.9532520247154623, iteration: 117045
loss: 1.0080939531326294,grad_norm: 0.9999990557232125, iteration: 117046
loss: 0.9938855171203613,grad_norm: 0.9999992597174668, iteration: 117047
loss: 1.013881802558899,grad_norm: 0.9631519814548294, iteration: 117048
loss: 0.9998803734779358,grad_norm: 0.9999991829997541, iteration: 117049
loss: 0.9597318768501282,grad_norm: 0.9999992489034376, iteration: 117050
loss: 0.9977582693099976,grad_norm: 0.9419218355525094, iteration: 117051
loss: 1.0056273937225342,grad_norm: 0.9999992369964149, iteration: 117052
loss: 1.0087778568267822,grad_norm: 0.9999991261518025, iteration: 117053
loss: 1.0046430826187134,grad_norm: 0.9999991349714518, iteration: 117054
loss: 0.9816058874130249,grad_norm: 0.9999991817872779, iteration: 117055
loss: 1.0153015851974487,grad_norm: 0.999999171347297, iteration: 117056
loss: 1.0149719715118408,grad_norm: 0.999999120308683, iteration: 117057
loss: 1.0216737985610962,grad_norm: 0.9999991935523388, iteration: 117058
loss: 0.9734897017478943,grad_norm: 0.9831807372364304, iteration: 117059
loss: 0.9925947785377502,grad_norm: 0.9999990879102155, iteration: 117060
loss: 0.9796670079231262,grad_norm: 0.9999991652828333, iteration: 117061
loss: 1.0320078134536743,grad_norm: 0.9999991972077245, iteration: 117062
loss: 1.04669988155365,grad_norm: 0.9999992699264904, iteration: 117063
loss: 1.0065816640853882,grad_norm: 0.9999990414571884, iteration: 117064
loss: 1.006248950958252,grad_norm: 0.9999991724706, iteration: 117065
loss: 1.0330356359481812,grad_norm: 0.9999991312840169, iteration: 117066
loss: 1.0018954277038574,grad_norm: 0.9999991269765581, iteration: 117067
loss: 0.9973896145820618,grad_norm: 0.999999080345462, iteration: 117068
loss: 1.0565474033355713,grad_norm: 0.9999992449724765, iteration: 117069
loss: 0.9987056255340576,grad_norm: 0.9015427875843728, iteration: 117070
loss: 1.007184386253357,grad_norm: 0.9940951394916595, iteration: 117071
loss: 0.9902588129043579,grad_norm: 0.9999990925079686, iteration: 117072
loss: 1.004273772239685,grad_norm: 0.9999990987285408, iteration: 117073
loss: 0.9847869873046875,grad_norm: 0.9999991066154077, iteration: 117074
loss: 1.019890308380127,grad_norm: 0.9999992069982682, iteration: 117075
loss: 1.0241858959197998,grad_norm: 0.8277213863765244, iteration: 117076
loss: 0.9977717399597168,grad_norm: 0.9999990863148658, iteration: 117077
loss: 0.9827364683151245,grad_norm: 0.9999991943967862, iteration: 117078
loss: 0.9905861020088196,grad_norm: 0.9999991007834427, iteration: 117079
loss: 0.9582663774490356,grad_norm: 0.9999991447677733, iteration: 117080
loss: 1.0096948146820068,grad_norm: 0.9459012612874089, iteration: 117081
loss: 0.9971391558647156,grad_norm: 0.9637842686640059, iteration: 117082
loss: 1.1329776048660278,grad_norm: 0.9999999384991862, iteration: 117083
loss: 0.9950127005577087,grad_norm: 0.9999990687546791, iteration: 117084
loss: 1.0187791585922241,grad_norm: 0.9999992077468555, iteration: 117085
loss: 0.9876613020896912,grad_norm: 0.9999989734046543, iteration: 117086
loss: 0.9979737997055054,grad_norm: 0.999999122948699, iteration: 117087
loss: 1.015058159828186,grad_norm: 0.9528989268323486, iteration: 117088
loss: 0.9905571341514587,grad_norm: 0.9999991469710401, iteration: 117089
loss: 1.089391827583313,grad_norm: 0.9999997129812115, iteration: 117090
loss: 0.9820128083229065,grad_norm: 0.9999992322634365, iteration: 117091
loss: 1.0809316635131836,grad_norm: 0.9999995818812709, iteration: 117092
loss: 1.0107362270355225,grad_norm: 0.9999992559347064, iteration: 117093
loss: 1.014490008354187,grad_norm: 0.9999991874997155, iteration: 117094
loss: 1.0371923446655273,grad_norm: 0.9999996052484112, iteration: 117095
loss: 1.0089733600616455,grad_norm: 0.9339715655395655, iteration: 117096
loss: 0.9787555932998657,grad_norm: 0.9578159841529559, iteration: 117097
loss: 0.9788925647735596,grad_norm: 0.9999991434906631, iteration: 117098
loss: 0.9741430878639221,grad_norm: 0.9999992062843274, iteration: 117099
loss: 1.0263919830322266,grad_norm: 0.9999990487897373, iteration: 117100
loss: 0.9873079061508179,grad_norm: 0.999999058295453, iteration: 117101
loss: 0.9707144498825073,grad_norm: 0.939265330750161, iteration: 117102
loss: 1.0272719860076904,grad_norm: 0.9999990349968206, iteration: 117103
loss: 1.0029281377792358,grad_norm: 0.9999991070853086, iteration: 117104
loss: 1.0402058362960815,grad_norm: 0.999999389105955, iteration: 117105
loss: 0.9757373332977295,grad_norm: 0.999999105299604, iteration: 117106
loss: 1.0030443668365479,grad_norm: 0.9999991978344609, iteration: 117107
loss: 0.9979645609855652,grad_norm: 0.9999994099589368, iteration: 117108
loss: 0.9884331822395325,grad_norm: 0.9752411093913363, iteration: 117109
loss: 0.9873466491699219,grad_norm: 0.9836579927990211, iteration: 117110
loss: 1.0919458866119385,grad_norm: 0.9999996377420223, iteration: 117111
loss: 1.048112392425537,grad_norm: 0.9394412579044077, iteration: 117112
loss: 1.0327980518341064,grad_norm: 0.9999992845111336, iteration: 117113
loss: 1.0014865398406982,grad_norm: 0.9999989694882939, iteration: 117114
loss: 1.0072901248931885,grad_norm: 0.8561461946137098, iteration: 117115
loss: 0.9479514956474304,grad_norm: 0.9999991221820624, iteration: 117116
loss: 1.0030121803283691,grad_norm: 0.9814565909371942, iteration: 117117
loss: 0.997119665145874,grad_norm: 0.9999991307942931, iteration: 117118
loss: 1.01194167137146,grad_norm: 0.9416807445153348, iteration: 117119
loss: 1.015167236328125,grad_norm: 0.9886731185096438, iteration: 117120
loss: 1.0020947456359863,grad_norm: 0.9353151461545289, iteration: 117121
loss: 1.0204910039901733,grad_norm: 0.9999990457159718, iteration: 117122
loss: 0.9909912943840027,grad_norm: 0.9999990400107797, iteration: 117123
loss: 1.0011060237884521,grad_norm: 0.8499867481369163, iteration: 117124
loss: 0.9900854229927063,grad_norm: 0.9928094920370961, iteration: 117125
loss: 0.9823532104492188,grad_norm: 0.8845504971769684, iteration: 117126
loss: 1.0012907981872559,grad_norm: 0.9999990908025187, iteration: 117127
loss: 0.9953372478485107,grad_norm: 0.999999190411703, iteration: 117128
loss: 0.9492546916007996,grad_norm: 0.9999990838706597, iteration: 117129
loss: 0.9829851388931274,grad_norm: 0.9711394027891666, iteration: 117130
loss: 1.0160540342330933,grad_norm: 0.9721497081114691, iteration: 117131
loss: 1.0105476379394531,grad_norm: 0.9999993081853574, iteration: 117132
loss: 1.0343414545059204,grad_norm: 0.9999995817640662, iteration: 117133
loss: 0.9945803284645081,grad_norm: 0.8678302645181485, iteration: 117134
loss: 1.0504372119903564,grad_norm: 0.9947517943048894, iteration: 117135
loss: 0.9836578965187073,grad_norm: 0.9999990796536451, iteration: 117136
loss: 1.0131934881210327,grad_norm: 0.9278733046214066, iteration: 117137
loss: 0.9666595458984375,grad_norm: 0.9999988403048614, iteration: 117138
loss: 0.9841359257698059,grad_norm: 0.9881910635490369, iteration: 117139
loss: 1.0087591409683228,grad_norm: 0.9267379797318894, iteration: 117140
loss: 1.0097641944885254,grad_norm: 0.9999992617394992, iteration: 117141
loss: 0.9732364416122437,grad_norm: 0.9999991117026525, iteration: 117142
loss: 1.0192404985427856,grad_norm: 0.9999992348929545, iteration: 117143
loss: 1.0197323560714722,grad_norm: 0.9965136811805729, iteration: 117144
loss: 1.0134297609329224,grad_norm: 0.9999990810507631, iteration: 117145
loss: 1.0175446271896362,grad_norm: 0.9999991874662646, iteration: 117146
loss: 0.9883490204811096,grad_norm: 0.9304315605194203, iteration: 117147
loss: 1.0052433013916016,grad_norm: 0.9999992068876061, iteration: 117148
loss: 0.984216570854187,grad_norm: 0.8211876419943417, iteration: 117149
loss: 0.9833095073699951,grad_norm: 0.9999991019102371, iteration: 117150
loss: 0.9977489709854126,grad_norm: 0.9999990780738419, iteration: 117151
loss: 1.04087233543396,grad_norm: 0.9999992025008276, iteration: 117152
loss: 0.999498724937439,grad_norm: 0.9999991952480084, iteration: 117153
loss: 0.9944186210632324,grad_norm: 0.9849806904420016, iteration: 117154
loss: 0.9832019209861755,grad_norm: 0.9999989380312215, iteration: 117155
loss: 1.0271180868148804,grad_norm: 0.9999990147219444, iteration: 117156
loss: 1.0260072946548462,grad_norm: 0.9961718613276925, iteration: 117157
loss: 1.0212790966033936,grad_norm: 0.9999991004266024, iteration: 117158
loss: 1.0135705471038818,grad_norm: 0.8782997717256384, iteration: 117159
loss: 1.0146855115890503,grad_norm: 0.8792648243929264, iteration: 117160
loss: 1.0352121591567993,grad_norm: 0.9999997561600822, iteration: 117161
loss: 1.0203365087509155,grad_norm: 0.8563136710461826, iteration: 117162
loss: 1.0050290822982788,grad_norm: 0.9999990676146782, iteration: 117163
loss: 0.9946340918540955,grad_norm: 0.9999990731906231, iteration: 117164
loss: 1.5708812475204468,grad_norm: 0.9999999445225736, iteration: 117165
loss: 1.0013772249221802,grad_norm: 0.9999993184733804, iteration: 117166
loss: 1.00660240650177,grad_norm: 0.999999260048485, iteration: 117167
loss: 1.0288969278335571,grad_norm: 0.9386490047396798, iteration: 117168
loss: 0.9909178614616394,grad_norm: 0.9977760546401577, iteration: 117169
loss: 1.006266474723816,grad_norm: 0.9999990767380155, iteration: 117170
loss: 1.010976791381836,grad_norm: 0.9999992127278562, iteration: 117171
loss: 0.9951585531234741,grad_norm: 0.9999990630207458, iteration: 117172
loss: 0.9763481020927429,grad_norm: 0.9488401251225571, iteration: 117173
loss: 0.9966216683387756,grad_norm: 0.9999990162533355, iteration: 117174
loss: 1.011934518814087,grad_norm: 0.9999990259241098, iteration: 117175
loss: 0.9656116366386414,grad_norm: 0.9999989668655876, iteration: 117176
loss: 0.9992342591285706,grad_norm: 0.9999989978501608, iteration: 117177
loss: 1.0174131393432617,grad_norm: 0.9999990545314886, iteration: 117178
loss: 0.9953314661979675,grad_norm: 0.9502490570750427, iteration: 117179
loss: 1.0038038492202759,grad_norm: 0.9999991439980143, iteration: 117180
loss: 0.9907853007316589,grad_norm: 0.9999992584182483, iteration: 117181
loss: 1.0093384981155396,grad_norm: 0.9999992542206745, iteration: 117182
loss: 1.0103906393051147,grad_norm: 0.9999992434956411, iteration: 117183
loss: 0.9923879504203796,grad_norm: 0.9999989680716542, iteration: 117184
loss: 1.002280831336975,grad_norm: 0.8607414889967355, iteration: 117185
loss: 0.9938919544219971,grad_norm: 0.893499739650484, iteration: 117186
loss: 0.9925726056098938,grad_norm: 0.8220301744282225, iteration: 117187
loss: 0.9877642393112183,grad_norm: 0.8769443097794966, iteration: 117188
loss: 1.0069139003753662,grad_norm: 0.9999991323140072, iteration: 117189
loss: 0.9843305349349976,grad_norm: 0.9999989250818672, iteration: 117190
loss: 0.9679999947547913,grad_norm: 0.9999991428185598, iteration: 117191
loss: 1.0148077011108398,grad_norm: 0.9999993430156464, iteration: 117192
loss: 0.995686411857605,grad_norm: 0.9573228791303036, iteration: 117193
loss: 1.0168170928955078,grad_norm: 0.8794200590224878, iteration: 117194
loss: 1.0122138261795044,grad_norm: 0.8877043913115479, iteration: 117195
loss: 0.9648579955101013,grad_norm: 0.999999186308843, iteration: 117196
loss: 1.0089727640151978,grad_norm: 0.9999992920493799, iteration: 117197
loss: 1.0226545333862305,grad_norm: 0.9999990718955989, iteration: 117198
loss: 1.0034581422805786,grad_norm: 0.9999990968244491, iteration: 117199
loss: 1.0233615636825562,grad_norm: 0.9999991212242005, iteration: 117200
loss: 1.0030450820922852,grad_norm: 0.9999991010289926, iteration: 117201
loss: 0.9956231713294983,grad_norm: 0.9999989529064656, iteration: 117202
loss: 0.9631521105766296,grad_norm: 0.9999990329296822, iteration: 117203
loss: 0.994313657283783,grad_norm: 0.9999990809478636, iteration: 117204
loss: 0.9960863590240479,grad_norm: 0.9999990714977588, iteration: 117205
loss: 1.009988784790039,grad_norm: 0.9999991280675887, iteration: 117206
loss: 1.0144603252410889,grad_norm: 0.9999992283933868, iteration: 117207
loss: 1.004194736480713,grad_norm: 0.9999990582201641, iteration: 117208
loss: 0.9468119144439697,grad_norm: 0.9999992100140647, iteration: 117209
loss: 1.0824486017227173,grad_norm: 0.9999992745346832, iteration: 117210
loss: 0.9871093034744263,grad_norm: 0.9999989805449165, iteration: 117211
loss: 1.00139319896698,grad_norm: 0.745779839961496, iteration: 117212
loss: 0.9616950154304504,grad_norm: 0.9377860674628199, iteration: 117213
loss: 1.0463769435882568,grad_norm: 0.9772191876815626, iteration: 117214
loss: 0.9742852449417114,grad_norm: 0.9999992300516348, iteration: 117215
loss: 0.9733760356903076,grad_norm: 0.9999991654024866, iteration: 117216
loss: 0.9388397932052612,grad_norm: 0.9999990550022358, iteration: 117217
loss: 0.9945796728134155,grad_norm: 0.999999190750369, iteration: 117218
loss: 1.0003825426101685,grad_norm: 0.9359807933008493, iteration: 117219
loss: 1.0064072608947754,grad_norm: 0.9999992613085024, iteration: 117220
loss: 1.0141268968582153,grad_norm: 0.9999991416203319, iteration: 117221
loss: 0.9889714121818542,grad_norm: 0.9999989692754905, iteration: 117222
loss: 1.0031298398971558,grad_norm: 0.8976905423789747, iteration: 117223
loss: 1.0216180086135864,grad_norm: 0.981771777702062, iteration: 117224
loss: 0.9966490268707275,grad_norm: 0.9927792948975228, iteration: 117225
loss: 0.9862204194068909,grad_norm: 0.9999992615984565, iteration: 117226
loss: 1.03547203540802,grad_norm: 0.9999996174335699, iteration: 117227
loss: 1.0055172443389893,grad_norm: 0.9793501860722165, iteration: 117228
loss: 1.0515753030776978,grad_norm: 0.9999991064267559, iteration: 117229
loss: 0.9886287450790405,grad_norm: 0.9912277827444766, iteration: 117230
loss: 0.9920896291732788,grad_norm: 0.957216821376583, iteration: 117231
loss: 0.9734199047088623,grad_norm: 0.9328338004637549, iteration: 117232
loss: 0.984732449054718,grad_norm: 0.9999990806738697, iteration: 117233
loss: 1.0415147542953491,grad_norm: 0.9382805579036129, iteration: 117234
loss: 0.9896715879440308,grad_norm: 0.9999990500361935, iteration: 117235
loss: 0.9857233762741089,grad_norm: 0.9706732503785082, iteration: 117236
loss: 0.9843781590461731,grad_norm: 0.99999911479356, iteration: 117237
loss: 0.9646229147911072,grad_norm: 0.9999992189304241, iteration: 117238
loss: 1.0013892650604248,grad_norm: 0.9999992991917255, iteration: 117239
loss: 1.0305660963058472,grad_norm: 0.9999989584557527, iteration: 117240
loss: 1.0287891626358032,grad_norm: 0.8573557472433424, iteration: 117241
loss: 1.0032676458358765,grad_norm: 0.8970414074060954, iteration: 117242
loss: 1.0002045631408691,grad_norm: 0.9999992709870871, iteration: 117243
loss: 1.0373307466506958,grad_norm: 0.9999989831718712, iteration: 117244
loss: 0.9880647659301758,grad_norm: 0.9999990459334192, iteration: 117245
loss: 0.9892840385437012,grad_norm: 0.9999990574856552, iteration: 117246
loss: 1.0259977579116821,grad_norm: 0.9999995671471162, iteration: 117247
loss: 1.0594501495361328,grad_norm: 0.9999992443315264, iteration: 117248
loss: 0.936058759689331,grad_norm: 0.9999992089650273, iteration: 117249
loss: 1.0346455574035645,grad_norm: 0.9958430509775468, iteration: 117250
loss: 1.0300612449645996,grad_norm: 0.9999992174765596, iteration: 117251
loss: 0.9985894560813904,grad_norm: 0.9999995581340905, iteration: 117252
loss: 1.0069715976715088,grad_norm: 0.9477850674843546, iteration: 117253
loss: 1.0538873672485352,grad_norm: 0.9999992664750371, iteration: 117254
loss: 0.9892222285270691,grad_norm: 0.9843263527502374, iteration: 117255
loss: 0.9902070164680481,grad_norm: 0.9702896018177664, iteration: 117256
loss: 1.0347628593444824,grad_norm: 0.9812329433040413, iteration: 117257
loss: 0.9970089793205261,grad_norm: 0.8658177711123409, iteration: 117258
loss: 0.9928210973739624,grad_norm: 0.9999990628537878, iteration: 117259
loss: 1.0131388902664185,grad_norm: 0.999999326324899, iteration: 117260
loss: 0.978852391242981,grad_norm: 0.9999991177279437, iteration: 117261
loss: 0.9578965306282043,grad_norm: 0.9999991620631974, iteration: 117262
loss: 1.016003131866455,grad_norm: 0.9999991234944278, iteration: 117263
loss: 0.9579330086708069,grad_norm: 0.9626373938581907, iteration: 117264
loss: 0.9921379089355469,grad_norm: 0.8908331319246545, iteration: 117265
loss: 0.9829036593437195,grad_norm: 0.974060963082863, iteration: 117266
loss: 1.016356110572815,grad_norm: 0.9999991043379973, iteration: 117267
loss: 0.9964143633842468,grad_norm: 0.9456897875360782, iteration: 117268
loss: 0.9792270064353943,grad_norm: 0.9058005881837341, iteration: 117269
loss: 1.0266321897506714,grad_norm: 0.9999989932637176, iteration: 117270
loss: 0.9588026404380798,grad_norm: 0.9999990793998145, iteration: 117271
loss: 1.0100510120391846,grad_norm: 0.9999988943364211, iteration: 117272
loss: 0.9663078188896179,grad_norm: 0.9566332327715913, iteration: 117273
loss: 1.022265911102295,grad_norm: 0.9999990993962262, iteration: 117274
loss: 0.9994372129440308,grad_norm: 0.896490171169955, iteration: 117275
loss: 0.9938949942588806,grad_norm: 0.9999990201204473, iteration: 117276
loss: 1.0013335943222046,grad_norm: 0.9395874531360953, iteration: 117277
loss: 0.9765231609344482,grad_norm: 0.9999991009474254, iteration: 117278
loss: 0.9932035803794861,grad_norm: 0.9999991145502729, iteration: 117279
loss: 0.9899966716766357,grad_norm: 0.9999989299470813, iteration: 117280
loss: 1.0044211149215698,grad_norm: 0.9999990605674619, iteration: 117281
loss: 1.0275176763534546,grad_norm: 0.9677676263109464, iteration: 117282
loss: 0.9963445067405701,grad_norm: 0.8499373608070667, iteration: 117283
loss: 0.9705070853233337,grad_norm: 0.9999991617161236, iteration: 117284
loss: 1.019166350364685,grad_norm: 0.9999993709951287, iteration: 117285
loss: 1.0225613117218018,grad_norm: 0.8914137472259895, iteration: 117286
loss: 1.0443850755691528,grad_norm: 0.9999992045650422, iteration: 117287
loss: 0.9916322827339172,grad_norm: 0.9999992118139325, iteration: 117288
loss: 1.055011510848999,grad_norm: 0.9918102950074293, iteration: 117289
loss: 1.0078153610229492,grad_norm: 0.999999491816743, iteration: 117290
loss: 1.0174325704574585,grad_norm: 0.9754807280241962, iteration: 117291
loss: 0.9811283946037292,grad_norm: 0.866888197340628, iteration: 117292
loss: 0.9917017817497253,grad_norm: 0.9116069612833511, iteration: 117293
loss: 0.9598833322525024,grad_norm: 0.8653843148205694, iteration: 117294
loss: 0.9755800366401672,grad_norm: 0.933890776829746, iteration: 117295
loss: 1.045458197593689,grad_norm: 0.9999992404559545, iteration: 117296
loss: 1.0108546018600464,grad_norm: 0.9999992379111029, iteration: 117297
loss: 1.0095069408416748,grad_norm: 0.905982423152925, iteration: 117298
loss: 1.0336419343948364,grad_norm: 0.999999893935466, iteration: 117299
loss: 1.0178221464157104,grad_norm: 0.9999990594397669, iteration: 117300
loss: 0.9937685132026672,grad_norm: 0.9314483206686088, iteration: 117301
loss: 1.002596378326416,grad_norm: 0.9999990479550337, iteration: 117302
loss: 1.0085885524749756,grad_norm: 0.8623333712557221, iteration: 117303
loss: 0.9464907050132751,grad_norm: 0.8764483644585636, iteration: 117304
loss: 0.9696642160415649,grad_norm: 0.9202172436145629, iteration: 117305
loss: 0.9875223636627197,grad_norm: 0.861608122464802, iteration: 117306
loss: 1.009508490562439,grad_norm: 0.7835737690829712, iteration: 117307
loss: 0.9784037470817566,grad_norm: 0.9999991396741864, iteration: 117308
loss: 0.9970875382423401,grad_norm: 0.999999002332084, iteration: 117309
loss: 1.0726014375686646,grad_norm: 0.9999998833798386, iteration: 117310
loss: 0.9998825788497925,grad_norm: 0.9895637118483823, iteration: 117311
loss: 1.0057669878005981,grad_norm: 0.988233688034795, iteration: 117312
loss: 0.9908791184425354,grad_norm: 0.9999992663740148, iteration: 117313
loss: 1.0013331174850464,grad_norm: 0.9999990673348609, iteration: 117314
loss: 0.9783231616020203,grad_norm: 0.9999991763330217, iteration: 117315
loss: 0.9913376569747925,grad_norm: 0.9999990699252541, iteration: 117316
loss: 1.0383354425430298,grad_norm: 0.9999992367893443, iteration: 117317
loss: 1.001812219619751,grad_norm: 0.9999992104823558, iteration: 117318
loss: 1.0186489820480347,grad_norm: 0.9999990979036443, iteration: 117319
loss: 0.9739647507667542,grad_norm: 0.8284838337233318, iteration: 117320
loss: 0.9790560007095337,grad_norm: 0.9999991753542447, iteration: 117321
loss: 1.0082508325576782,grad_norm: 0.9999997917218405, iteration: 117322
loss: 0.9864271283149719,grad_norm: 0.9278132550566609, iteration: 117323
loss: 0.9722086787223816,grad_norm: 0.8877348917552033, iteration: 117324
loss: 1.0086199045181274,grad_norm: 0.9753150469035713, iteration: 117325
loss: 1.0017108917236328,grad_norm: 0.9749366055201497, iteration: 117326
loss: 1.0111442804336548,grad_norm: 0.9099432503500645, iteration: 117327
loss: 0.9837702512741089,grad_norm: 0.9999991440859843, iteration: 117328
loss: 1.027026653289795,grad_norm: 0.9999990728926714, iteration: 117329
loss: 0.9985640645027161,grad_norm: 0.9999990699251486, iteration: 117330
loss: 0.9700257778167725,grad_norm: 0.8992982204087177, iteration: 117331
loss: 1.0118027925491333,grad_norm: 0.9901032447997524, iteration: 117332
loss: 1.002187728881836,grad_norm: 0.999999150356745, iteration: 117333
loss: 1.0222697257995605,grad_norm: 0.9999992646110273, iteration: 117334
loss: 1.0516632795333862,grad_norm: 0.9999998997736055, iteration: 117335
loss: 0.990245521068573,grad_norm: 0.9999998929894602, iteration: 117336
loss: 1.0268622636795044,grad_norm: 0.9999992779880218, iteration: 117337
loss: 0.9894699454307556,grad_norm: 0.9162430165951181, iteration: 117338
loss: 1.0305230617523193,grad_norm: 0.9999998535308637, iteration: 117339
loss: 1.0336910486221313,grad_norm: 0.9999991335360342, iteration: 117340
loss: 1.008347511291504,grad_norm: 0.9965518629437793, iteration: 117341
loss: 1.0270426273345947,grad_norm: 0.9602128100523512, iteration: 117342
loss: 1.0168339014053345,grad_norm: 0.9999992405004667, iteration: 117343
loss: 1.0232692956924438,grad_norm: 0.9857641812668987, iteration: 117344
loss: 0.9630271792411804,grad_norm: 0.8903265779075017, iteration: 117345
loss: 1.0041393041610718,grad_norm: 0.9999988631666125, iteration: 117346
loss: 0.9841188788414001,grad_norm: 0.9999993297510645, iteration: 117347
loss: 0.9872085452079773,grad_norm: 0.9999990157642337, iteration: 117348
loss: 0.9840344786643982,grad_norm: 0.999999275235824, iteration: 117349
loss: 1.0380775928497314,grad_norm: 0.9999991875457402, iteration: 117350
loss: 1.082767367362976,grad_norm: 0.9999993085858123, iteration: 117351
loss: 0.9954857230186462,grad_norm: 0.9999991776526168, iteration: 117352
loss: 1.0080045461654663,grad_norm: 0.9676573186405454, iteration: 117353
loss: 0.9887115955352783,grad_norm: 0.9999991523113715, iteration: 117354
loss: 1.0283905267715454,grad_norm: 0.9999991295372718, iteration: 117355
loss: 1.0174224376678467,grad_norm: 0.9999991552551593, iteration: 117356
loss: 1.0206711292266846,grad_norm: 0.9999992570180712, iteration: 117357
loss: 1.0026087760925293,grad_norm: 0.9999998623085523, iteration: 117358
loss: 0.9948382377624512,grad_norm: 0.902237575741191, iteration: 117359
loss: 1.0204883813858032,grad_norm: 0.9999991278407552, iteration: 117360
loss: 0.9973040223121643,grad_norm: 0.9999991960970828, iteration: 117361
loss: 1.0213675498962402,grad_norm: 0.9999990126325538, iteration: 117362
loss: 0.9863491058349609,grad_norm: 0.999999183972304, iteration: 117363
loss: 0.9923274517059326,grad_norm: 0.9999990722317672, iteration: 117364
loss: 0.9688848853111267,grad_norm: 0.9999990930223527, iteration: 117365
loss: 0.9854031205177307,grad_norm: 0.8929294367545207, iteration: 117366
loss: 1.0361496210098267,grad_norm: 0.9461130152449044, iteration: 117367
loss: 1.0236972570419312,grad_norm: 0.9999990242615092, iteration: 117368
loss: 1.0905790328979492,grad_norm: 0.9999994859956332, iteration: 117369
loss: 1.0033180713653564,grad_norm: 0.9288990751221684, iteration: 117370
loss: 1.0096670389175415,grad_norm: 0.9999990017259819, iteration: 117371
loss: 1.0037943124771118,grad_norm: 0.7939965965631025, iteration: 117372
loss: 1.0793194770812988,grad_norm: 0.9999996075797665, iteration: 117373
loss: 0.9902552962303162,grad_norm: 0.9878891873809084, iteration: 117374
loss: 1.024330735206604,grad_norm: 0.9999991477141712, iteration: 117375
loss: 1.0533698797225952,grad_norm: 0.9999998383848293, iteration: 117376
loss: 1.0011104345321655,grad_norm: 0.999999241205042, iteration: 117377
loss: 1.0067098140716553,grad_norm: 0.8624587418741712, iteration: 117378
loss: 0.9697338342666626,grad_norm: 0.8729379034627276, iteration: 117379
loss: 0.9750193357467651,grad_norm: 0.9999991388565392, iteration: 117380
loss: 0.9729852080345154,grad_norm: 0.9777262738657381, iteration: 117381
loss: 0.9796890616416931,grad_norm: 0.9353030854686982, iteration: 117382
loss: 1.084114670753479,grad_norm: 0.9999992396127547, iteration: 117383
loss: 1.0775156021118164,grad_norm: 0.9999991205962061, iteration: 117384
loss: 1.0101619958877563,grad_norm: 0.9999991504920208, iteration: 117385
loss: 1.2077940702438354,grad_norm: 0.999999110491727, iteration: 117386
loss: 1.1216492652893066,grad_norm: 0.9999992471962363, iteration: 117387
loss: 1.0745311975479126,grad_norm: 0.9999994422190607, iteration: 117388
loss: 1.0113118886947632,grad_norm: 0.9999991451153148, iteration: 117389
loss: 1.036942958831787,grad_norm: 0.9999995966454736, iteration: 117390
loss: 0.9770836234092712,grad_norm: 0.9999990768890598, iteration: 117391
loss: 0.9671273231506348,grad_norm: 0.9999991289456788, iteration: 117392
loss: 1.2048290967941284,grad_norm: 0.9999998296825292, iteration: 117393
loss: 1.0088742971420288,grad_norm: 0.8914909768562612, iteration: 117394
loss: 1.0156757831573486,grad_norm: 0.9552747282260234, iteration: 117395
loss: 1.0026869773864746,grad_norm: 0.9999992102208046, iteration: 117396
loss: 1.001735806465149,grad_norm: 0.9999997260600996, iteration: 117397
loss: 0.9981029629707336,grad_norm: 0.9999989544848669, iteration: 117398
loss: 1.0697052478790283,grad_norm: 0.9999996822828049, iteration: 117399
loss: 1.0182777643203735,grad_norm: 0.9999991972186651, iteration: 117400
loss: 1.0915687084197998,grad_norm: 0.9999997491543127, iteration: 117401
loss: 1.0466876029968262,grad_norm: 0.9999993649476424, iteration: 117402
loss: 0.9743711948394775,grad_norm: 0.9999990393178478, iteration: 117403
loss: 1.0147417783737183,grad_norm: 0.9999991010016815, iteration: 117404
loss: 0.995459794998169,grad_norm: 0.9634737634698868, iteration: 117405
loss: 1.0603872537612915,grad_norm: 0.9999996107288193, iteration: 117406
loss: 0.9625070691108704,grad_norm: 0.9999989694990183, iteration: 117407
loss: 0.9983800053596497,grad_norm: 0.9999990967239665, iteration: 117408
loss: 1.0665369033813477,grad_norm: 1.0000001674064638, iteration: 117409
loss: 1.4249267578125,grad_norm: 0.9999998019496499, iteration: 117410
loss: 1.004686951637268,grad_norm: 0.9999991180624217, iteration: 117411
loss: 0.9758203625679016,grad_norm: 0.9051870567853462, iteration: 117412
loss: 1.021111011505127,grad_norm: 0.9502383285832418, iteration: 117413
loss: 1.0331867933273315,grad_norm: 0.999999227313848, iteration: 117414
loss: 1.1877639293670654,grad_norm: 0.9999997213053328, iteration: 117415
loss: 0.9983462691307068,grad_norm: 0.9815601710644697, iteration: 117416
loss: 0.9983392953872681,grad_norm: 0.9999992149094867, iteration: 117417
loss: 0.9802915453910828,grad_norm: 0.999999059834179, iteration: 117418
loss: 1.0000576972961426,grad_norm: 0.999999207392551, iteration: 117419
loss: 1.0751489400863647,grad_norm: 0.9999991540801294, iteration: 117420
loss: 1.0040217638015747,grad_norm: 0.9999991812725534, iteration: 117421
loss: 0.9909014105796814,grad_norm: 0.9999991448244093, iteration: 117422
loss: 1.0131608247756958,grad_norm: 0.9999990369519001, iteration: 117423
loss: 1.009305715560913,grad_norm: 0.9999991838750797, iteration: 117424
loss: 1.0038433074951172,grad_norm: 0.9835289680728765, iteration: 117425
loss: 1.0035802125930786,grad_norm: 0.9999990769300233, iteration: 117426
loss: 1.0203694105148315,grad_norm: 0.9808419997854422, iteration: 117427
loss: 0.987373948097229,grad_norm: 0.959844022099304, iteration: 117428
loss: 1.086370587348938,grad_norm: 0.9999997313471465, iteration: 117429
loss: 0.9936089515686035,grad_norm: 0.9439427937149888, iteration: 117430
loss: 1.0910402536392212,grad_norm: 0.9886881978700868, iteration: 117431
loss: 1.0337070226669312,grad_norm: 0.9999992910558957, iteration: 117432
loss: 1.00455641746521,grad_norm: 0.9114610472889669, iteration: 117433
loss: 0.98846834897995,grad_norm: 0.9999990251953094, iteration: 117434
loss: 1.0072834491729736,grad_norm: 0.9999991580047414, iteration: 117435
loss: 1.0029127597808838,grad_norm: 0.9999991532323624, iteration: 117436
loss: 1.0094407796859741,grad_norm: 0.9999991510369342, iteration: 117437
loss: 1.0430359840393066,grad_norm: 0.844833116699819, iteration: 117438
loss: 0.9593346118927002,grad_norm: 0.9999991561072441, iteration: 117439
loss: 1.0131285190582275,grad_norm: 0.9999991370284875, iteration: 117440
loss: 0.9899379014968872,grad_norm: 0.982415256895179, iteration: 117441
loss: 1.0180819034576416,grad_norm: 0.9999992308153256, iteration: 117442
loss: 1.0146598815917969,grad_norm: 0.9999998944719153, iteration: 117443
loss: 1.0452238321304321,grad_norm: 0.9999992005994158, iteration: 117444
loss: 1.015881061553955,grad_norm: 0.9999998510777702, iteration: 117445
loss: 0.9938598275184631,grad_norm: 0.999999142468695, iteration: 117446
loss: 1.0105535984039307,grad_norm: 0.9999993415787783, iteration: 117447
loss: 1.0060158967971802,grad_norm: 0.856213346026935, iteration: 117448
loss: 0.98371821641922,grad_norm: 0.9999992085758802, iteration: 117449
loss: 1.0351297855377197,grad_norm: 0.9999989305499949, iteration: 117450
loss: 1.0702582597732544,grad_norm: 0.999999289962979, iteration: 117451
loss: 1.084456443786621,grad_norm: 0.9999993504964365, iteration: 117452
loss: 1.0618815422058105,grad_norm: 0.9999993115616507, iteration: 117453
loss: 0.9868482351303101,grad_norm: 0.9999992331335795, iteration: 117454
loss: 1.0367927551269531,grad_norm: 0.9999990573271466, iteration: 117455
loss: 0.9997610449790955,grad_norm: 0.8746812414084166, iteration: 117456
loss: 1.0070475339889526,grad_norm: 0.9689242302335531, iteration: 117457
loss: 0.9831631183624268,grad_norm: 0.9999992267341087, iteration: 117458
loss: 0.9890186190605164,grad_norm: 0.9189539358384013, iteration: 117459
loss: 0.9882153272628784,grad_norm: 0.9999993114363691, iteration: 117460
loss: 1.0262794494628906,grad_norm: 0.9999993405587186, iteration: 117461
loss: 1.0041570663452148,grad_norm: 0.9007525886280386, iteration: 117462
loss: 1.0137999057769775,grad_norm: 0.9285921670436932, iteration: 117463
loss: 0.959122896194458,grad_norm: 0.9999990778616358, iteration: 117464
loss: 1.0783917903900146,grad_norm: 0.999999045755217, iteration: 117465
loss: 1.020310401916504,grad_norm: 0.9999991710199391, iteration: 117466
loss: 0.969507098197937,grad_norm: 0.9999992718867277, iteration: 117467
loss: 1.0557738542556763,grad_norm: 0.999999403321632, iteration: 117468
loss: 1.0330007076263428,grad_norm: 0.9999993017770276, iteration: 117469
loss: 1.0055397748947144,grad_norm: 0.999999141319934, iteration: 117470
loss: 0.9870890378952026,grad_norm: 0.9999992923718468, iteration: 117471
loss: 0.9914296269416809,grad_norm: 0.9999993318822568, iteration: 117472
loss: 1.0260010957717896,grad_norm: 0.9999990591054506, iteration: 117473
loss: 1.0274977684020996,grad_norm: 1.0000000388520902, iteration: 117474
loss: 0.9852468371391296,grad_norm: 0.999999155268535, iteration: 117475
loss: 0.9987898468971252,grad_norm: 0.9263864216593974, iteration: 117476
loss: 0.9976201057434082,grad_norm: 0.9549146721429329, iteration: 117477
loss: 0.9438556432723999,grad_norm: 0.9691937364746487, iteration: 117478
loss: 0.976036787033081,grad_norm: 0.9999992916671178, iteration: 117479
loss: 0.9563618898391724,grad_norm: 0.9572048020864371, iteration: 117480
loss: 0.9817695021629333,grad_norm: 0.9749675813736516, iteration: 117481
loss: 0.987987756729126,grad_norm: 0.9999992382724451, iteration: 117482
loss: 1.0089070796966553,grad_norm: 0.9786992928677887, iteration: 117483
loss: 0.9901800155639648,grad_norm: 0.999999062140209, iteration: 117484
loss: 1.0142122507095337,grad_norm: 0.9401290081774067, iteration: 117485
loss: 1.0199553966522217,grad_norm: 0.9999992470925736, iteration: 117486
loss: 1.0113441944122314,grad_norm: 0.9999995928789747, iteration: 117487
loss: 1.0270473957061768,grad_norm: 0.9999997478248748, iteration: 117488
loss: 0.9845283031463623,grad_norm: 0.9778729902425021, iteration: 117489
loss: 0.9945976138114929,grad_norm: 0.9999989738933875, iteration: 117490
loss: 1.0040936470031738,grad_norm: 0.9999990137933661, iteration: 117491
loss: 0.9968369007110596,grad_norm: 0.9999989839724418, iteration: 117492
loss: 0.9746782183647156,grad_norm: 0.9999990275805274, iteration: 117493
loss: 0.9814037680625916,grad_norm: 0.9999992918956903, iteration: 117494
loss: 1.0047563314437866,grad_norm: 0.9626654805251953, iteration: 117495
loss: 0.9904784560203552,grad_norm: 0.9999993316573758, iteration: 117496
loss: 1.0098539590835571,grad_norm: 0.9999998353648161, iteration: 117497
loss: 0.9968816041946411,grad_norm: 0.8619287805576165, iteration: 117498
loss: 1.0229811668395996,grad_norm: 0.9999992084862872, iteration: 117499
loss: 1.0688177347183228,grad_norm: 0.9999997253108205, iteration: 117500
loss: 1.01935613155365,grad_norm: 0.9999991494598748, iteration: 117501
loss: 1.0420745611190796,grad_norm: 0.9999991861109582, iteration: 117502
loss: 0.9940205216407776,grad_norm: 0.9999993030378257, iteration: 117503
loss: 1.0335932970046997,grad_norm: 0.9999991696816019, iteration: 117504
loss: 1.0107948780059814,grad_norm: 0.9999992111571653, iteration: 117505
loss: 0.9866341948509216,grad_norm: 0.932772533451367, iteration: 117506
loss: 1.0043492317199707,grad_norm: 0.9999991940957825, iteration: 117507
loss: 0.9616697430610657,grad_norm: 0.9999990674885737, iteration: 117508
loss: 1.0072073936462402,grad_norm: 0.9999990408929834, iteration: 117509
loss: 0.9945796728134155,grad_norm: 0.9064997689560228, iteration: 117510
loss: 1.0124764442443848,grad_norm: 0.823778858426211, iteration: 117511
loss: 1.0018205642700195,grad_norm: 0.9999990715326476, iteration: 117512
loss: 1.0022064447402954,grad_norm: 0.999999033904974, iteration: 117513
loss: 1.036724328994751,grad_norm: 0.9999990931054762, iteration: 117514
loss: 0.9518347978591919,grad_norm: 0.9759178727667999, iteration: 117515
loss: 1.0195834636688232,grad_norm: 0.9999989992515022, iteration: 117516
loss: 0.9769309759140015,grad_norm: 0.9999991836531081, iteration: 117517
loss: 1.0047107934951782,grad_norm: 0.9999990457922198, iteration: 117518
loss: 1.0271297693252563,grad_norm: 0.999999197365841, iteration: 117519
loss: 1.0087487697601318,grad_norm: 0.9999991407949541, iteration: 117520
loss: 1.0363831520080566,grad_norm: 0.9999990135584844, iteration: 117521
loss: 1.0224547386169434,grad_norm: 0.9769879368809061, iteration: 117522
loss: 0.9691155552864075,grad_norm: 0.9999991135148645, iteration: 117523
loss: 1.084855556488037,grad_norm: 0.9999990241105744, iteration: 117524
loss: 0.9747586250305176,grad_norm: 0.9999990022414907, iteration: 117525
loss: 1.05563223361969,grad_norm: 0.9999991744934622, iteration: 117526
loss: 1.016300082206726,grad_norm: 0.9999989107062449, iteration: 117527
loss: 0.9946103692054749,grad_norm: 0.8988500013771016, iteration: 117528
loss: 0.9873036742210388,grad_norm: 0.9999990673663174, iteration: 117529
loss: 1.0763459205627441,grad_norm: 0.9999997496272028, iteration: 117530
loss: 0.9928338527679443,grad_norm: 0.9999991068147067, iteration: 117531
loss: 1.012393593788147,grad_norm: 0.9999992759608219, iteration: 117532
loss: 1.0260844230651855,grad_norm: 0.9999992894584244, iteration: 117533
loss: 1.0114847421646118,grad_norm: 0.8688742930894547, iteration: 117534
loss: 0.9414458870887756,grad_norm: 0.999999227347252, iteration: 117535
loss: 1.0147053003311157,grad_norm: 0.9999990999660189, iteration: 117536
loss: 1.0269378423690796,grad_norm: 0.9999992710601261, iteration: 117537
loss: 0.9777323007583618,grad_norm: 0.9999991216992452, iteration: 117538
loss: 1.0228651762008667,grad_norm: 0.9999990003259885, iteration: 117539
loss: 1.0220999717712402,grad_norm: 0.999999141660376, iteration: 117540
loss: 1.026410698890686,grad_norm: 0.9999991186559203, iteration: 117541
loss: 1.1992948055267334,grad_norm: 0.9999999134266431, iteration: 117542
loss: 1.0246179103851318,grad_norm: 0.9999991302774568, iteration: 117543
loss: 0.9665891528129578,grad_norm: 0.9999992515427166, iteration: 117544
loss: 1.006845474243164,grad_norm: 0.999999612651615, iteration: 117545
loss: 1.0060362815856934,grad_norm: 0.9999992032765755, iteration: 117546
loss: 0.9849291443824768,grad_norm: 0.9999990415666998, iteration: 117547
loss: 0.995400071144104,grad_norm: 0.9999991586124183, iteration: 117548
loss: 0.9988059997558594,grad_norm: 0.9999989930503467, iteration: 117549
loss: 1.015241265296936,grad_norm: 0.9999991489663287, iteration: 117550
loss: 1.0540356636047363,grad_norm: 0.999999146241606, iteration: 117551
loss: 1.0250893831253052,grad_norm: 0.9999991666545319, iteration: 117552
loss: 1.0050864219665527,grad_norm: 0.9999993039558062, iteration: 117553
loss: 0.9856706857681274,grad_norm: 0.9999993072861154, iteration: 117554
loss: 1.214769721031189,grad_norm: 0.999999769036307, iteration: 117555
loss: 0.9996302127838135,grad_norm: 0.999999183553523, iteration: 117556
loss: 0.9864619970321655,grad_norm: 0.8070708489777118, iteration: 117557
loss: 0.9701951146125793,grad_norm: 0.9047587087387691, iteration: 117558
loss: 0.9514521956443787,grad_norm: 0.9999990928977219, iteration: 117559
loss: 0.9995546340942383,grad_norm: 0.9999994886758488, iteration: 117560
loss: 0.9902638792991638,grad_norm: 0.9999990451758098, iteration: 117561
loss: 0.9871484041213989,grad_norm: 0.986566714431164, iteration: 117562
loss: 1.0028865337371826,grad_norm: 0.97469018758209, iteration: 117563
loss: 0.9535767436027527,grad_norm: 0.9423583145302602, iteration: 117564
loss: 1.0752062797546387,grad_norm: 0.9999992260421887, iteration: 117565
loss: 0.9928660988807678,grad_norm: 0.9999993255660272, iteration: 117566
loss: 1.0149831771850586,grad_norm: 0.999999061680169, iteration: 117567
loss: 0.9718300700187683,grad_norm: 0.8979483658132578, iteration: 117568
loss: 1.0167564153671265,grad_norm: 0.9999992264825331, iteration: 117569
loss: 1.0166490077972412,grad_norm: 0.9999991088173156, iteration: 117570
loss: 1.0031591653823853,grad_norm: 0.9999991472164502, iteration: 117571
loss: 1.0197595357894897,grad_norm: 0.9999992132058735, iteration: 117572
loss: 0.9757477045059204,grad_norm: 0.9999989970097654, iteration: 117573
loss: 0.9883482456207275,grad_norm: 0.9999990922882488, iteration: 117574
loss: 1.0194987058639526,grad_norm: 0.9999990151686342, iteration: 117575
loss: 0.9929019212722778,grad_norm: 0.995297976444062, iteration: 117576
loss: 1.0331915616989136,grad_norm: 0.9589422418241969, iteration: 117577
loss: 0.9927467703819275,grad_norm: 0.9999992846882451, iteration: 117578
loss: 0.9829692244529724,grad_norm: 0.8894949834851041, iteration: 117579
loss: 0.9541022181510925,grad_norm: 0.9999991114836264, iteration: 117580
loss: 0.977318525314331,grad_norm: 0.9999991777274606, iteration: 117581
loss: 1.012377142906189,grad_norm: 0.9999992266765672, iteration: 117582
loss: 0.9893367290496826,grad_norm: 0.9999991567567001, iteration: 117583
loss: 1.0079277753829956,grad_norm: 0.9257211926358664, iteration: 117584
loss: 0.9850659370422363,grad_norm: 0.9999990993882496, iteration: 117585
loss: 0.9703721404075623,grad_norm: 0.9999992995630219, iteration: 117586
loss: 0.9827505946159363,grad_norm: 0.9999992741080023, iteration: 117587
loss: 1.009178876876831,grad_norm: 0.9999991906363069, iteration: 117588
loss: 0.9879100322723389,grad_norm: 0.9999990625470347, iteration: 117589
loss: 0.9779675006866455,grad_norm: 0.999999061718744, iteration: 117590
loss: 0.9774115085601807,grad_norm: 0.9896052377546344, iteration: 117591
loss: 1.0177627801895142,grad_norm: 0.9649321523331251, iteration: 117592
loss: 1.030849814414978,grad_norm: 0.9999990316744881, iteration: 117593
loss: 1.003271460533142,grad_norm: 0.9999992297440726, iteration: 117594
loss: 1.0688285827636719,grad_norm: 0.9999994579578756, iteration: 117595
loss: 1.0258140563964844,grad_norm: 0.9999990740443229, iteration: 117596
loss: 1.002601981163025,grad_norm: 0.9938063421680751, iteration: 117597
loss: 0.9594953656196594,grad_norm: 0.8941502728003099, iteration: 117598
loss: 1.015765905380249,grad_norm: 0.9222401051828543, iteration: 117599
loss: 0.9524294137954712,grad_norm: 0.9934733643070344, iteration: 117600
loss: 1.0141252279281616,grad_norm: 0.883266619326095, iteration: 117601
loss: 0.9918615221977234,grad_norm: 0.9577597282556313, iteration: 117602
loss: 0.973328173160553,grad_norm: 0.9660941188527745, iteration: 117603
loss: 1.017393708229065,grad_norm: 0.9999989563246748, iteration: 117604
loss: 1.0338585376739502,grad_norm: 0.9999990259330112, iteration: 117605
loss: 1.291213035583496,grad_norm: 0.9999998259312253, iteration: 117606
loss: 1.0294290781021118,grad_norm: 0.9349345938677545, iteration: 117607
loss: 1.002764344215393,grad_norm: 0.886705434691298, iteration: 117608
loss: 0.9629455208778381,grad_norm: 0.9999992137133121, iteration: 117609
loss: 1.0100129842758179,grad_norm: 0.8749770778512922, iteration: 117610
loss: 0.9823697805404663,grad_norm: 0.9999992018597782, iteration: 117611
loss: 1.0345624685287476,grad_norm: 0.9999996288704414, iteration: 117612
loss: 0.9937854409217834,grad_norm: 0.999998901860934, iteration: 117613
loss: 0.9907309412956238,grad_norm: 0.9793161714978694, iteration: 117614
loss: 0.9726675748825073,grad_norm: 0.9999990141821654, iteration: 117615
loss: 0.9515226483345032,grad_norm: 0.9999992213162755, iteration: 117616
loss: 1.0041773319244385,grad_norm: 0.9999991122415732, iteration: 117617
loss: 1.016506314277649,grad_norm: 0.9999991582642228, iteration: 117618
loss: 1.0043762922286987,grad_norm: 0.834613090415255, iteration: 117619
loss: 0.9643507599830627,grad_norm: 0.9999992230240229, iteration: 117620
loss: 1.0900850296020508,grad_norm: 0.9999993748327283, iteration: 117621
loss: 1.0037777423858643,grad_norm: 0.9999990966174602, iteration: 117622
loss: 0.9732615351676941,grad_norm: 0.9261147637200631, iteration: 117623
loss: 1.0195766687393188,grad_norm: 0.8576112674145991, iteration: 117624
loss: 0.9936161041259766,grad_norm: 0.8367961020222019, iteration: 117625
loss: 0.9951961040496826,grad_norm: 0.9999990956991313, iteration: 117626
loss: 1.0045498609542847,grad_norm: 0.9242424217676262, iteration: 117627
loss: 1.0305426120758057,grad_norm: 0.9832549339093408, iteration: 117628
loss: 1.011950135231018,grad_norm: 0.890633706128953, iteration: 117629
loss: 1.0051387548446655,grad_norm: 0.999999191798315, iteration: 117630
loss: 0.9846144914627075,grad_norm: 0.9217247050241902, iteration: 117631
loss: 1.0195488929748535,grad_norm: 0.9999992912285335, iteration: 117632
loss: 0.9831466674804688,grad_norm: 0.9999990529582958, iteration: 117633
loss: 1.0415674448013306,grad_norm: 0.9999992993631531, iteration: 117634
loss: 1.0390626192092896,grad_norm: 0.9999992609619873, iteration: 117635
loss: 0.9898725152015686,grad_norm: 0.9232214311283237, iteration: 117636
loss: 0.962509036064148,grad_norm: 0.9999990200460656, iteration: 117637
loss: 1.0183663368225098,grad_norm: 0.9999992390126936, iteration: 117638
loss: 1.0290688276290894,grad_norm: 0.9999989128758363, iteration: 117639
loss: 0.9894132018089294,grad_norm: 0.8926940315785383, iteration: 117640
loss: 1.0419530868530273,grad_norm: 0.9858719540240435, iteration: 117641
loss: 1.01687753200531,grad_norm: 0.9999990062315327, iteration: 117642
loss: 0.9810102581977844,grad_norm: 0.9999992479498123, iteration: 117643
loss: 1.0067696571350098,grad_norm: 0.9999991212597684, iteration: 117644
loss: 0.9700315594673157,grad_norm: 0.9999991072848837, iteration: 117645
loss: 0.9981949329376221,grad_norm: 0.9999994755838163, iteration: 117646
loss: 0.9970327019691467,grad_norm: 0.9932249033087978, iteration: 117647
loss: 0.9504966139793396,grad_norm: 0.999999143425157, iteration: 117648
loss: 1.0062847137451172,grad_norm: 0.9999990901970156, iteration: 117649
loss: 0.981680691242218,grad_norm: 0.9857084595905615, iteration: 117650
loss: 1.009630560874939,grad_norm: 0.9600012890440174, iteration: 117651
loss: 1.0171351432800293,grad_norm: 0.9554312888032631, iteration: 117652
loss: 1.0308802127838135,grad_norm: 0.9470823650019655, iteration: 117653
loss: 1.0092641115188599,grad_norm: 0.9110885112254695, iteration: 117654
loss: 0.9989745616912842,grad_norm: 0.9999990519107537, iteration: 117655
loss: 0.9722980856895447,grad_norm: 0.8457823153950478, iteration: 117656
loss: 0.9987574219703674,grad_norm: 0.9999989641318923, iteration: 117657
loss: 0.9922804832458496,grad_norm: 0.9020163016243913, iteration: 117658
loss: 0.9936199188232422,grad_norm: 0.999998922382553, iteration: 117659
loss: 0.9763249754905701,grad_norm: 0.9303282810172003, iteration: 117660
loss: 1.0344505310058594,grad_norm: 0.9999999220032338, iteration: 117661
loss: 1.0126304626464844,grad_norm: 0.9999988144334033, iteration: 117662
loss: 0.9947758913040161,grad_norm: 0.8987146697183143, iteration: 117663
loss: 0.9775320887565613,grad_norm: 0.9999992424642703, iteration: 117664
loss: 1.006609559059143,grad_norm: 0.9999992526898205, iteration: 117665
loss: 0.9856151342391968,grad_norm: 0.9999993137105817, iteration: 117666
loss: 0.9882169365882874,grad_norm: 0.999999407944984, iteration: 117667
loss: 1.019716739654541,grad_norm: 0.99999908990404, iteration: 117668
loss: 1.034712314605713,grad_norm: 0.9707444907387128, iteration: 117669
loss: 0.9950422644615173,grad_norm: 0.9999991090635778, iteration: 117670
loss: 1.0652427673339844,grad_norm: 0.905877830086537, iteration: 117671
loss: 0.9410961270332336,grad_norm: 0.999999115071715, iteration: 117672
loss: 0.969964325428009,grad_norm: 0.9999991773690271, iteration: 117673
loss: 1.011314034461975,grad_norm: 0.9071516449628549, iteration: 117674
loss: 1.0197334289550781,grad_norm: 0.999999133288659, iteration: 117675
loss: 0.9935181736946106,grad_norm: 0.9999990957769689, iteration: 117676
loss: 1.0283972024917603,grad_norm: 0.9999991287459182, iteration: 117677
loss: 1.0288140773773193,grad_norm: 0.9999999686941944, iteration: 117678
loss: 0.9655936360359192,grad_norm: 0.9999992433099967, iteration: 117679
loss: 0.9993361830711365,grad_norm: 0.999999335254426, iteration: 117680
loss: 0.9667665362358093,grad_norm: 0.9999991283756695, iteration: 117681
loss: 0.9596247673034668,grad_norm: 0.9999991191677372, iteration: 117682
loss: 1.0124112367630005,grad_norm: 0.9999995695669697, iteration: 117683
loss: 0.9677384495735168,grad_norm: 0.9999992011938034, iteration: 117684
loss: 1.0168068408966064,grad_norm: 0.9876991024779268, iteration: 117685
loss: 0.9842535257339478,grad_norm: 0.9999992205861725, iteration: 117686
loss: 0.9660726189613342,grad_norm: 0.9000072400718623, iteration: 117687
loss: 1.0021086931228638,grad_norm: 0.9999991475004578, iteration: 117688
loss: 0.9976573586463928,grad_norm: 0.9999998164403147, iteration: 117689
loss: 0.976251482963562,grad_norm: 0.999999174040741, iteration: 117690
loss: 0.9957331418991089,grad_norm: 0.9999990841318107, iteration: 117691
loss: 0.9886502623558044,grad_norm: 0.9999990354774441, iteration: 117692
loss: 0.9995926022529602,grad_norm: 0.9999993068233386, iteration: 117693
loss: 1.0354043245315552,grad_norm: 0.9999990811976857, iteration: 117694
loss: 0.9675648212432861,grad_norm: 0.9965958989383565, iteration: 117695
loss: 1.0252903699874878,grad_norm: 0.99328557140852, iteration: 117696
loss: 0.9881649017333984,grad_norm: 0.999999072271367, iteration: 117697
loss: 1.0207293033599854,grad_norm: 0.9999990752393546, iteration: 117698
loss: 0.9965950846672058,grad_norm: 0.9999998015031165, iteration: 117699
loss: 1.098044753074646,grad_norm: 0.9999992422914382, iteration: 117700
loss: 1.0051707029342651,grad_norm: 0.999999078912345, iteration: 117701
loss: 0.9995946884155273,grad_norm: 0.9394902773203864, iteration: 117702
loss: 1.0090432167053223,grad_norm: 0.8311295958857484, iteration: 117703
loss: 0.9946416020393372,grad_norm: 0.9999989818933105, iteration: 117704
loss: 1.0261249542236328,grad_norm: 0.9999992512331495, iteration: 117705
loss: 0.9824115633964539,grad_norm: 0.9461437546084582, iteration: 117706
loss: 0.9894250631332397,grad_norm: 0.9514300058311861, iteration: 117707
loss: 1.032234787940979,grad_norm: 0.9999990707098275, iteration: 117708
loss: 1.022876501083374,grad_norm: 0.9999991033989031, iteration: 117709
loss: 1.0174065828323364,grad_norm: 0.9999991872268726, iteration: 117710
loss: 0.9974896907806396,grad_norm: 0.940880519667499, iteration: 117711
loss: 1.117392659187317,grad_norm: 0.9999999525469433, iteration: 117712
loss: 1.0710930824279785,grad_norm: 0.9999993253774564, iteration: 117713
loss: 1.0047885179519653,grad_norm: 0.9999990652834483, iteration: 117714
loss: 1.0060045719146729,grad_norm: 0.9999991777379147, iteration: 117715
loss: 0.9369348883628845,grad_norm: 0.9804058316803717, iteration: 117716
loss: 0.9914424419403076,grad_norm: 0.9999991318085143, iteration: 117717
loss: 0.9964148998260498,grad_norm: 0.9330574507349481, iteration: 117718
loss: 1.002976655960083,grad_norm: 0.9999992158806479, iteration: 117719
loss: 0.9960470199584961,grad_norm: 0.9256013409197479, iteration: 117720
loss: 1.0293391942977905,grad_norm: 0.9999991926230736, iteration: 117721
loss: 1.0024635791778564,grad_norm: 0.9988572601057168, iteration: 117722
loss: 0.9958089590072632,grad_norm: 0.9999989348756073, iteration: 117723
loss: 1.0042322874069214,grad_norm: 0.949939144865096, iteration: 117724
loss: 1.034989833831787,grad_norm: 0.9999990165259313, iteration: 117725
loss: 1.003262996673584,grad_norm: 0.9999992334226324, iteration: 117726
loss: 1.0187876224517822,grad_norm: 0.9999992832031012, iteration: 117727
loss: 1.0126210451126099,grad_norm: 0.9999990707821326, iteration: 117728
loss: 1.015740156173706,grad_norm: 0.9206221189611857, iteration: 117729
loss: 1.0314587354660034,grad_norm: 0.9999992095099706, iteration: 117730
loss: 0.9654602408409119,grad_norm: 0.9999992550064464, iteration: 117731
loss: 0.9701723456382751,grad_norm: 0.9999991080419343, iteration: 117732
loss: 1.0194528102874756,grad_norm: 0.8644365980194636, iteration: 117733
loss: 1.0144410133361816,grad_norm: 0.9999990872108375, iteration: 117734
loss: 0.9770529866218567,grad_norm: 0.8497559369308967, iteration: 117735
loss: 1.0024032592773438,grad_norm: 0.9999990328476177, iteration: 117736
loss: 0.9789584279060364,grad_norm: 0.9999994958889551, iteration: 117737
loss: 0.9732893109321594,grad_norm: 0.9026613183686568, iteration: 117738
loss: 0.9803958535194397,grad_norm: 0.9999990872420447, iteration: 117739
loss: 0.9871636629104614,grad_norm: 0.9999996302954672, iteration: 117740
loss: 1.0150185823440552,grad_norm: 0.9056843238930231, iteration: 117741
loss: 1.0128620862960815,grad_norm: 0.9999993104485679, iteration: 117742
loss: 0.9810647964477539,grad_norm: 0.999999557683913, iteration: 117743
loss: 0.9694977402687073,grad_norm: 0.8698702091298923, iteration: 117744
loss: 1.0381370782852173,grad_norm: 0.9999995588858177, iteration: 117745
loss: 0.9864668846130371,grad_norm: 0.947537549187828, iteration: 117746
loss: 1.0022237300872803,grad_norm: 0.9999992321174332, iteration: 117747
loss: 0.9763312935829163,grad_norm: 0.9485669495911246, iteration: 117748
loss: 1.009595513343811,grad_norm: 0.9999993090801206, iteration: 117749
loss: 1.0469229221343994,grad_norm: 0.9999995751098893, iteration: 117750
loss: 0.9561259150505066,grad_norm: 0.9583904253443247, iteration: 117751
loss: 1.0092051029205322,grad_norm: 0.9999998037290105, iteration: 117752
loss: 1.0025163888931274,grad_norm: 0.9903896175279444, iteration: 117753
loss: 1.0726916790008545,grad_norm: 0.9999995280417514, iteration: 117754
loss: 1.0079536437988281,grad_norm: 0.9999992595093229, iteration: 117755
loss: 1.0069397687911987,grad_norm: 0.8994727267324428, iteration: 117756
loss: 0.9873855710029602,grad_norm: 0.9999992284477133, iteration: 117757
loss: 1.0166699886322021,grad_norm: 0.9999990931853882, iteration: 117758
loss: 0.9572046995162964,grad_norm: 0.9999990427424897, iteration: 117759
loss: 0.9838842749595642,grad_norm: 0.999999112471223, iteration: 117760
loss: 0.9988932013511658,grad_norm: 0.9959792735127841, iteration: 117761
loss: 0.9873854517936707,grad_norm: 0.9312181921529841, iteration: 117762
loss: 1.0053850412368774,grad_norm: 0.9999991942332148, iteration: 117763
loss: 1.0335805416107178,grad_norm: 0.9999990847557159, iteration: 117764
loss: 1.0344699621200562,grad_norm: 0.9999991327510592, iteration: 117765
loss: 0.9940500259399414,grad_norm: 0.999999103452773, iteration: 117766
loss: 0.9935139417648315,grad_norm: 0.9999991701411294, iteration: 117767
loss: 1.069964051246643,grad_norm: 0.9999991846887712, iteration: 117768
loss: 1.0235134363174438,grad_norm: 0.9999992810337878, iteration: 117769
loss: 1.0319985151290894,grad_norm: 0.9999997037903787, iteration: 117770
loss: 1.0066426992416382,grad_norm: 0.8552227936584498, iteration: 117771
loss: 1.046945571899414,grad_norm: 0.9999991330370411, iteration: 117772
loss: 0.9546403884887695,grad_norm: 0.9999992293092607, iteration: 117773
loss: 0.9678750038146973,grad_norm: 0.9999991371557313, iteration: 117774
loss: 0.9972798824310303,grad_norm: 0.9509379128783934, iteration: 117775
loss: 1.0012449026107788,grad_norm: 0.9999990966571, iteration: 117776
loss: 1.0069684982299805,grad_norm: 0.9999991393323021, iteration: 117777
loss: 1.0233814716339111,grad_norm: 0.9999991523910767, iteration: 117778
loss: 1.0363833904266357,grad_norm: 0.9999990836190414, iteration: 117779
loss: 0.9929866790771484,grad_norm: 0.9999992413352197, iteration: 117780
loss: 1.0187366008758545,grad_norm: 0.9999990773554827, iteration: 117781
loss: 0.9995747208595276,grad_norm: 0.9211548554589515, iteration: 117782
loss: 1.0006805658340454,grad_norm: 0.9999992410791987, iteration: 117783
loss: 1.0585333108901978,grad_norm: 0.999999044449488, iteration: 117784
loss: 0.9529581665992737,grad_norm: 0.9457221257115466, iteration: 117785
loss: 0.9817467927932739,grad_norm: 0.9999991635855977, iteration: 117786
loss: 0.9592612385749817,grad_norm: 0.9999992662162658, iteration: 117787
loss: 1.0200389623641968,grad_norm: 0.999999223844892, iteration: 117788
loss: 1.0508308410644531,grad_norm: 0.9999994997975207, iteration: 117789
loss: 1.0090954303741455,grad_norm: 0.999999159695512, iteration: 117790
loss: 0.977729082107544,grad_norm: 0.9999991275287595, iteration: 117791
loss: 1.0343233346939087,grad_norm: 0.9999991008875052, iteration: 117792
loss: 1.013305425643921,grad_norm: 0.9200401372519367, iteration: 117793
loss: 1.0129215717315674,grad_norm: 0.9999993656486356, iteration: 117794
loss: 0.9943028688430786,grad_norm: 0.9221131480043895, iteration: 117795
loss: 1.0635298490524292,grad_norm: 0.9999997192740377, iteration: 117796
loss: 0.992766261100769,grad_norm: 0.9999990589360505, iteration: 117797
loss: 1.0007615089416504,grad_norm: 0.9999991026552234, iteration: 117798
loss: 1.0245577096939087,grad_norm: 0.9999991496880788, iteration: 117799
loss: 0.9833709001541138,grad_norm: 0.9999991249023722, iteration: 117800
loss: 1.029415488243103,grad_norm: 0.999999101579897, iteration: 117801
loss: 1.0805069208145142,grad_norm: 0.9999992860298634, iteration: 117802
loss: 0.9864301681518555,grad_norm: 0.9999991068819616, iteration: 117803
loss: 1.013020634651184,grad_norm: 0.9999990091425688, iteration: 117804
loss: 0.989179790019989,grad_norm: 0.9999992099889957, iteration: 117805
loss: 0.9983503818511963,grad_norm: 0.9999990419913068, iteration: 117806
loss: 0.9873638153076172,grad_norm: 0.9999990836834592, iteration: 117807
loss: 1.0070077180862427,grad_norm: 0.999999069686018, iteration: 117808
loss: 1.025137186050415,grad_norm: 0.9999990650983198, iteration: 117809
loss: 1.0099656581878662,grad_norm: 0.9999989988694898, iteration: 117810
loss: 0.9849120378494263,grad_norm: 0.9999990309167672, iteration: 117811
loss: 1.0008035898208618,grad_norm: 0.9999992271631848, iteration: 117812
loss: 1.0217788219451904,grad_norm: 0.9999991757074341, iteration: 117813
loss: 1.0376169681549072,grad_norm: 0.953215312165667, iteration: 117814
loss: 1.0095908641815186,grad_norm: 0.9999992014163984, iteration: 117815
loss: 1.018631100654602,grad_norm: 0.9446940554085405, iteration: 117816
loss: 1.0189697742462158,grad_norm: 0.9999990878511666, iteration: 117817
loss: 1.0263017416000366,grad_norm: 0.8525279103013426, iteration: 117818
loss: 1.0392968654632568,grad_norm: 0.9999990579257351, iteration: 117819
loss: 0.9660974144935608,grad_norm: 0.999999114090492, iteration: 117820
loss: 1.0002790689468384,grad_norm: 0.9999992004474592, iteration: 117821
loss: 0.9945691227912903,grad_norm: 0.9999992808862445, iteration: 117822
loss: 1.0151898860931396,grad_norm: 0.9999991383727029, iteration: 117823
loss: 1.019118309020996,grad_norm: 0.9165483775061202, iteration: 117824
loss: 1.0167158842086792,grad_norm: 0.9999999476683059, iteration: 117825
loss: 1.0088744163513184,grad_norm: 0.9999992273360788, iteration: 117826
loss: 1.0103167295455933,grad_norm: 0.9999994046357004, iteration: 117827
loss: 0.9628806114196777,grad_norm: 0.9999992166218568, iteration: 117828
loss: 1.0314674377441406,grad_norm: 0.9999991361549678, iteration: 117829
loss: 0.9966974258422852,grad_norm: 0.9548521926269306, iteration: 117830
loss: 1.0148426294326782,grad_norm: 0.9537515646985405, iteration: 117831
loss: 1.028669834136963,grad_norm: 0.9999992500762538, iteration: 117832
loss: 1.0297356843948364,grad_norm: 0.9999992047204583, iteration: 117833
loss: 0.9817270040512085,grad_norm: 0.9999991725304733, iteration: 117834
loss: 0.9762846827507019,grad_norm: 0.9983504812265108, iteration: 117835
loss: 0.9999358057975769,grad_norm: 0.9766916013455689, iteration: 117836
loss: 0.998791515827179,grad_norm: 0.9999990349947165, iteration: 117837
loss: 0.9835315942764282,grad_norm: 0.880750953028552, iteration: 117838
loss: 0.9833568334579468,grad_norm: 0.990151711372127, iteration: 117839
loss: 0.9844390749931335,grad_norm: 0.9999990986593144, iteration: 117840
loss: 0.9801908731460571,grad_norm: 0.999999059729599, iteration: 117841
loss: 0.9924498796463013,grad_norm: 0.9999995187125673, iteration: 117842
loss: 1.0400391817092896,grad_norm: 0.9999995855066236, iteration: 117843
loss: 1.0259056091308594,grad_norm: 0.9999997261944509, iteration: 117844
loss: 1.0633443593978882,grad_norm: 0.9999991594189215, iteration: 117845
loss: 0.9707767963409424,grad_norm: 0.9999991924090907, iteration: 117846
loss: 0.9625366926193237,grad_norm: 0.9999992551980347, iteration: 117847
loss: 0.9946687817573547,grad_norm: 0.9999989822760562, iteration: 117848
loss: 1.0142136812210083,grad_norm: 0.9736848235385837, iteration: 117849
loss: 1.017777919769287,grad_norm: 0.8881676304051502, iteration: 117850
loss: 0.9878237843513489,grad_norm: 0.999999174030922, iteration: 117851
loss: 0.9816036224365234,grad_norm: 0.916227994194738, iteration: 117852
loss: 0.9939411282539368,grad_norm: 0.9999989078000442, iteration: 117853
loss: 0.9945561289787292,grad_norm: 0.9999990099910712, iteration: 117854
loss: 1.0394659042358398,grad_norm: 0.999999229539766, iteration: 117855
loss: 1.0080524682998657,grad_norm: 0.973266577731982, iteration: 117856
loss: 0.983967125415802,grad_norm: 0.9999991383424278, iteration: 117857
loss: 1.1008108854293823,grad_norm: 0.9999991577353232, iteration: 117858
loss: 1.0555803775787354,grad_norm: 0.9999990613787739, iteration: 117859
loss: 1.0124675035476685,grad_norm: 0.9999990683386615, iteration: 117860
loss: 0.9963852167129517,grad_norm: 0.9322672152722106, iteration: 117861
loss: 1.0170376300811768,grad_norm: 0.9987243393485429, iteration: 117862
loss: 0.9932620525360107,grad_norm: 0.8492913288914701, iteration: 117863
loss: 0.9687157273292542,grad_norm: 0.9999991039621486, iteration: 117864
loss: 0.9812997579574585,grad_norm: 0.9999990421012603, iteration: 117865
loss: 1.0033292770385742,grad_norm: 0.9999992660576904, iteration: 117866
loss: 0.9767741560935974,grad_norm: 0.9999991788151156, iteration: 117867
loss: 1.02790367603302,grad_norm: 0.9999993807174739, iteration: 117868
loss: 1.0381734371185303,grad_norm: 0.9999994555998483, iteration: 117869
loss: 0.9653899669647217,grad_norm: 0.9999992592959579, iteration: 117870
loss: 1.0518592596054077,grad_norm: 0.9999994521063924, iteration: 117871
loss: 1.0518348217010498,grad_norm: 0.9005400166911351, iteration: 117872
loss: 1.0030285120010376,grad_norm: 0.9999992596017051, iteration: 117873
loss: 1.0047833919525146,grad_norm: 0.9999991380360798, iteration: 117874
loss: 1.0436043739318848,grad_norm: 0.9999991862089574, iteration: 117875
loss: 1.028120517730713,grad_norm: 0.9365273226302213, iteration: 117876
loss: 0.9936531782150269,grad_norm: 0.999999057218667, iteration: 117877
loss: 1.0435130596160889,grad_norm: 0.9999991822506271, iteration: 117878
loss: 1.0642340183258057,grad_norm: 0.9999991439293326, iteration: 117879
loss: 1.030293583869934,grad_norm: 0.9572607495740612, iteration: 117880
loss: 0.9706147909164429,grad_norm: 0.9999992746975963, iteration: 117881
loss: 1.0198732614517212,grad_norm: 0.9999991150587917, iteration: 117882
loss: 0.9842324256896973,grad_norm: 0.9671194709911446, iteration: 117883
loss: 0.9571515321731567,grad_norm: 0.9999991112577524, iteration: 117884
loss: 0.9958239793777466,grad_norm: 0.9273487624828836, iteration: 117885
loss: 0.9950140118598938,grad_norm: 0.9999992588584794, iteration: 117886
loss: 1.011452078819275,grad_norm: 0.9999992286333663, iteration: 117887
loss: 1.0296528339385986,grad_norm: 0.9898998320397001, iteration: 117888
loss: 1.0312004089355469,grad_norm: 0.9835762636164277, iteration: 117889
loss: 1.0236701965332031,grad_norm: 0.9489292378274842, iteration: 117890
loss: 0.9886660575866699,grad_norm: 0.9999991578971701, iteration: 117891
loss: 1.0353606939315796,grad_norm: 0.999999071534946, iteration: 117892
loss: 1.0427666902542114,grad_norm: 0.9999994067738358, iteration: 117893
loss: 1.0066113471984863,grad_norm: 0.9999990620735755, iteration: 117894
loss: 0.9870709180831909,grad_norm: 0.9999990979880621, iteration: 117895
loss: 1.0314654111862183,grad_norm: 0.9713335066933035, iteration: 117896
loss: 0.9892998337745667,grad_norm: 0.8737202658580471, iteration: 117897
loss: 0.9847458600997925,grad_norm: 0.9999991506614606, iteration: 117898
loss: 0.9747881889343262,grad_norm: 0.9999990453013622, iteration: 117899
loss: 1.0071059465408325,grad_norm: 0.962209596882863, iteration: 117900
loss: 0.994299054145813,grad_norm: 0.9999992261445412, iteration: 117901
loss: 1.0218862295150757,grad_norm: 0.9999993500751329, iteration: 117902
loss: 1.0424246788024902,grad_norm: 0.9999993269225387, iteration: 117903
loss: 0.9741315841674805,grad_norm: 0.866454204450267, iteration: 117904
loss: 1.0087010860443115,grad_norm: 0.9586171292151938, iteration: 117905
loss: 0.9914188385009766,grad_norm: 0.9766747425499995, iteration: 117906
loss: 1.0450800657272339,grad_norm: 0.9344244408492156, iteration: 117907
loss: 1.038711428642273,grad_norm: 0.8870724025813651, iteration: 117908
loss: 0.9779017567634583,grad_norm: 0.9753496338257641, iteration: 117909
loss: 0.9971870183944702,grad_norm: 0.8831794707557604, iteration: 117910
loss: 1.0085116624832153,grad_norm: 0.9999997155876472, iteration: 117911
loss: 1.0157443284988403,grad_norm: 0.9999991175588118, iteration: 117912
loss: 1.0232027769088745,grad_norm: 0.9999990259107168, iteration: 117913
loss: 0.977782130241394,grad_norm: 0.971546581945244, iteration: 117914
loss: 0.9951456785202026,grad_norm: 0.999628175450987, iteration: 117915
loss: 1.026367425918579,grad_norm: 0.9999991015436287, iteration: 117916
loss: 1.031969666481018,grad_norm: 0.9999994640740825, iteration: 117917
loss: 1.0009918212890625,grad_norm: 0.999999193268265, iteration: 117918
loss: 0.9984855651855469,grad_norm: 0.9901402545565953, iteration: 117919
loss: 0.9771316647529602,grad_norm: 0.8246502004738409, iteration: 117920
loss: 0.9823301434516907,grad_norm: 0.999998957111947, iteration: 117921
loss: 0.9998410940170288,grad_norm: 0.9436867878005212, iteration: 117922
loss: 0.9800743460655212,grad_norm: 0.9999992369499424, iteration: 117923
loss: 1.00469970703125,grad_norm: 0.9999994693180008, iteration: 117924
loss: 1.0139964818954468,grad_norm: 0.999999103718236, iteration: 117925
loss: 0.9691764712333679,grad_norm: 0.9999993367522626, iteration: 117926
loss: 0.973209023475647,grad_norm: 0.9293125288473351, iteration: 117927
loss: 0.9927509427070618,grad_norm: 0.9999991763824035, iteration: 117928
loss: 0.9959030747413635,grad_norm: 0.9999991571464023, iteration: 117929
loss: 1.0585328340530396,grad_norm: 0.9999998575145994, iteration: 117930
loss: 0.9676406383514404,grad_norm: 0.9999992272790332, iteration: 117931
loss: 0.9905380010604858,grad_norm: 0.9999989716814848, iteration: 117932
loss: 1.119032382965088,grad_norm: 0.9999992358101015, iteration: 117933
loss: 1.0244643688201904,grad_norm: 0.9999998305561262, iteration: 117934
loss: 1.0376536846160889,grad_norm: 0.9391694712972084, iteration: 117935
loss: 1.0581210851669312,grad_norm: 0.9999999071536052, iteration: 117936
loss: 1.0170165300369263,grad_norm: 0.9915044830252394, iteration: 117937
loss: 1.004848837852478,grad_norm: 0.9176739490768838, iteration: 117938
loss: 1.0145690441131592,grad_norm: 0.9999990946593914, iteration: 117939
loss: 1.0289016962051392,grad_norm: 0.9999992104962769, iteration: 117940
loss: 1.0262430906295776,grad_norm: 0.9005892502751043, iteration: 117941
loss: 1.0072593688964844,grad_norm: 0.99999916942418, iteration: 117942
loss: 0.9916365146636963,grad_norm: 0.9999990787153232, iteration: 117943
loss: 0.9882557392120361,grad_norm: 0.9999992431244822, iteration: 117944
loss: 1.003572940826416,grad_norm: 0.9786995168811923, iteration: 117945
loss: 1.0203564167022705,grad_norm: 0.8576261844895624, iteration: 117946
loss: 0.9785661697387695,grad_norm: 0.9849160146949556, iteration: 117947
loss: 0.9742437601089478,grad_norm: 0.9999991477603803, iteration: 117948
loss: 1.0311082601547241,grad_norm: 0.999999032799224, iteration: 117949
loss: 1.0216715335845947,grad_norm: 0.9737712676882452, iteration: 117950
loss: 0.9905679821968079,grad_norm: 0.881296867879668, iteration: 117951
loss: 0.9823565483093262,grad_norm: 0.9999992127526253, iteration: 117952
loss: 1.0057721138000488,grad_norm: 0.9988459031986268, iteration: 117953
loss: 1.0208722352981567,grad_norm: 0.9993617186654384, iteration: 117954
loss: 1.0231951475143433,grad_norm: 0.8548001963057408, iteration: 117955
loss: 1.0368337631225586,grad_norm: 0.9999991776913008, iteration: 117956
loss: 1.0464228391647339,grad_norm: 0.9999990314139017, iteration: 117957
loss: 1.0774447917938232,grad_norm: 0.9999990359722005, iteration: 117958
loss: 1.0475441217422485,grad_norm: 0.9999997742977123, iteration: 117959
loss: 0.9959819912910461,grad_norm: 0.999999000018581, iteration: 117960
loss: 0.9753499031066895,grad_norm: 0.9807731532012993, iteration: 117961
loss: 1.0089188814163208,grad_norm: 0.993143133953139, iteration: 117962
loss: 0.9416491389274597,grad_norm: 0.9999989445532906, iteration: 117963
loss: 1.2066326141357422,grad_norm: 0.9999999026565431, iteration: 117964
loss: 1.2229018211364746,grad_norm: 0.9999995827291179, iteration: 117965
loss: 1.0008689165115356,grad_norm: 0.992971115802952, iteration: 117966
loss: 1.0186984539031982,grad_norm: 0.9999994978306406, iteration: 117967
loss: 1.0007619857788086,grad_norm: 0.9999991268430614, iteration: 117968
loss: 1.0319699048995972,grad_norm: 0.9999991890592907, iteration: 117969
loss: 1.008132815361023,grad_norm: 0.9999991270226928, iteration: 117970
loss: 0.99680495262146,grad_norm: 0.9999990823482426, iteration: 117971
loss: 0.9888007044792175,grad_norm: 0.9999993021322439, iteration: 117972
loss: 1.000465750694275,grad_norm: 0.9975604925606566, iteration: 117973
loss: 1.0271353721618652,grad_norm: 0.9522705741052134, iteration: 117974
loss: 1.1187998056411743,grad_norm: 0.9614442220205863, iteration: 117975
loss: 1.0090975761413574,grad_norm: 0.8342071267379714, iteration: 117976
loss: 1.0421497821807861,grad_norm: 0.9999994735530131, iteration: 117977
loss: 1.088426947593689,grad_norm: 0.8833412950215535, iteration: 117978
loss: 1.0631732940673828,grad_norm: 0.9999990841562177, iteration: 117979
loss: 1.0189298391342163,grad_norm: 0.9999999668682465, iteration: 117980
loss: 0.9695925712585449,grad_norm: 0.9999989858906361, iteration: 117981
loss: 1.0365959405899048,grad_norm: 0.9999992139704286, iteration: 117982
loss: 0.9911490678787231,grad_norm: 0.9999990268725777, iteration: 117983
loss: 0.9871898889541626,grad_norm: 0.999999075447999, iteration: 117984
loss: 0.9957293272018433,grad_norm: 0.9999997139429818, iteration: 117985
loss: 1.1945456266403198,grad_norm: 0.9999992407519012, iteration: 117986
loss: 1.0764265060424805,grad_norm: 0.9999999256430374, iteration: 117987
loss: 1.029065728187561,grad_norm: 0.9999991024335431, iteration: 117988
loss: 1.0234657526016235,grad_norm: 0.9999995241062805, iteration: 117989
loss: 1.0202444791793823,grad_norm: 0.9423609785289888, iteration: 117990
loss: 1.0340031385421753,grad_norm: 0.9999993647643566, iteration: 117991
loss: 1.095726728439331,grad_norm: 0.9999998615698494, iteration: 117992
loss: 1.0558576583862305,grad_norm: 0.999999247967993, iteration: 117993
loss: 1.024131178855896,grad_norm: 0.9999992080819167, iteration: 117994
loss: 1.206770658493042,grad_norm: 0.999999147950315, iteration: 117995
loss: 1.2057167291641235,grad_norm: 0.999999983334838, iteration: 117996
loss: 0.9912742376327515,grad_norm: 0.9999993030258, iteration: 117997
loss: 1.0451300144195557,grad_norm: 0.999999314874278, iteration: 117998
loss: 1.0078868865966797,grad_norm: 0.9999990593194928, iteration: 117999
loss: 1.1640021800994873,grad_norm: 0.9999993574299921, iteration: 118000
loss: 1.0128775835037231,grad_norm: 0.999999247051442, iteration: 118001
loss: 1.1404587030410767,grad_norm: 0.9999993178965555, iteration: 118002
loss: 1.0625255107879639,grad_norm: 0.9999991651300031, iteration: 118003
loss: 1.0561370849609375,grad_norm: 0.9999990932898531, iteration: 118004
loss: 1.0566452741622925,grad_norm: 0.999999554365864, iteration: 118005
loss: 1.171714186668396,grad_norm: 0.9999992662733076, iteration: 118006
loss: 1.1656126976013184,grad_norm: 0.9999999751110256, iteration: 118007
loss: 1.1142158508300781,grad_norm: 0.9999993129223144, iteration: 118008
loss: 1.1114633083343506,grad_norm: 0.999999476280486, iteration: 118009
loss: 1.0860044956207275,grad_norm: 0.9999994346148391, iteration: 118010
loss: 1.1905370950698853,grad_norm: 0.9999998663471585, iteration: 118011
loss: 1.0753254890441895,grad_norm: 0.9999997279620513, iteration: 118012
loss: 1.1170108318328857,grad_norm: 0.9999995056232086, iteration: 118013
loss: 1.230320930480957,grad_norm: 0.9999993899336855, iteration: 118014
loss: 1.020113229751587,grad_norm: 0.9999998497141975, iteration: 118015
loss: 1.223673939704895,grad_norm: 0.999999560307817, iteration: 118016
loss: 1.1276297569274902,grad_norm: 0.9999994706832321, iteration: 118017
loss: 1.0124127864837646,grad_norm: 0.9999994751500547, iteration: 118018
loss: 1.1255767345428467,grad_norm: 0.999999800420309, iteration: 118019
loss: 1.1379787921905518,grad_norm: 0.9999994786739519, iteration: 118020
loss: 1.1407413482666016,grad_norm: 0.999999776607761, iteration: 118021
loss: 1.16466224193573,grad_norm: 0.9999995535361292, iteration: 118022
loss: 1.1011580228805542,grad_norm: 0.9999992611255104, iteration: 118023
loss: 1.0479316711425781,grad_norm: 0.9999992058785961, iteration: 118024
loss: 1.1254760026931763,grad_norm: 0.999999959496894, iteration: 118025
loss: 1.1452919244766235,grad_norm: 0.9999990718797637, iteration: 118026
loss: 1.1119587421417236,grad_norm: 0.9999995420205607, iteration: 118027
loss: 1.038311243057251,grad_norm: 0.999999419512902, iteration: 118028
loss: 1.0635931491851807,grad_norm: 0.9999991909281402, iteration: 118029
loss: 1.1010982990264893,grad_norm: 0.9999992363196057, iteration: 118030
loss: 1.0974804162979126,grad_norm: 0.9999996765220721, iteration: 118031
loss: 1.1268929243087769,grad_norm: 0.9999998445028331, iteration: 118032
loss: 1.1112772226333618,grad_norm: 0.999999873018342, iteration: 118033
loss: 1.1038634777069092,grad_norm: 0.9999996416529741, iteration: 118034
loss: 1.1320946216583252,grad_norm: 0.9999999307971957, iteration: 118035
loss: 1.0318108797073364,grad_norm: 0.9999999595955843, iteration: 118036
loss: 1.1616830825805664,grad_norm: 0.9999998882626661, iteration: 118037
loss: 1.092902421951294,grad_norm: 0.9999994316284232, iteration: 118038
loss: 1.0783756971359253,grad_norm: 0.9999992958452931, iteration: 118039
loss: 1.0561387538909912,grad_norm: 0.9999994319922474, iteration: 118040
loss: 1.1184680461883545,grad_norm: 0.9999990498643114, iteration: 118041
loss: 1.117002248764038,grad_norm: 0.9999992854398588, iteration: 118042
loss: 1.0233674049377441,grad_norm: 0.9999994292775108, iteration: 118043
loss: 1.339099407196045,grad_norm: 0.9999999338747143, iteration: 118044
loss: 1.127091646194458,grad_norm: 0.9714027251576098, iteration: 118045
loss: 1.0244007110595703,grad_norm: 0.9999991517482606, iteration: 118046
loss: 1.150072693824768,grad_norm: 0.9999992664566704, iteration: 118047
loss: 1.072407603263855,grad_norm: 0.99999939475473, iteration: 118048
loss: 1.073839783668518,grad_norm: 0.9999997188969596, iteration: 118049
loss: 1.0612902641296387,grad_norm: 0.9999995500549015, iteration: 118050
loss: 1.1296440362930298,grad_norm: 0.9999998565420141, iteration: 118051
loss: 1.155025601387024,grad_norm: 0.9999998742641769, iteration: 118052
loss: 1.0915104150772095,grad_norm: 0.9999999157502416, iteration: 118053
loss: 1.0576740503311157,grad_norm: 0.9999994605274297, iteration: 118054
loss: 0.9792435765266418,grad_norm: 0.9999993757893827, iteration: 118055
loss: 1.1747206449508667,grad_norm: 0.9999999540984446, iteration: 118056
loss: 1.1910815238952637,grad_norm: 0.9999997824008671, iteration: 118057
loss: 0.9746660590171814,grad_norm: 0.9999993027576197, iteration: 118058
loss: 1.1195955276489258,grad_norm: 0.9999998015548698, iteration: 118059
loss: 1.0163977146148682,grad_norm: 0.9999991743049877, iteration: 118060
loss: 0.9890409708023071,grad_norm: 1.0000000193186642, iteration: 118061
loss: 1.0398694276809692,grad_norm: 0.999999846991279, iteration: 118062
loss: 0.989398181438446,grad_norm: 0.999999421890436, iteration: 118063
loss: 1.0542919635772705,grad_norm: 0.9999992253426834, iteration: 118064
loss: 1.008002758026123,grad_norm: 0.9999993648241694, iteration: 118065
loss: 1.115607738494873,grad_norm: 0.9999998914879873, iteration: 118066
loss: 1.184249997138977,grad_norm: 0.9999994546308676, iteration: 118067
loss: 1.0926913022994995,grad_norm: 0.9999993077074303, iteration: 118068
loss: 1.1238170862197876,grad_norm: 0.9999994334181704, iteration: 118069
loss: 0.9903237223625183,grad_norm: 0.9999992454743715, iteration: 118070
loss: 0.994632363319397,grad_norm: 0.9704589292298393, iteration: 118071
loss: 1.014117956161499,grad_norm: 0.9999990690917414, iteration: 118072
loss: 1.0238460302352905,grad_norm: 0.999999551331482, iteration: 118073
loss: 1.0539603233337402,grad_norm: 0.9999996463950107, iteration: 118074
loss: 1.092392086982727,grad_norm: 0.9999994254320521, iteration: 118075
loss: 1.0642298460006714,grad_norm: 0.9999991527400188, iteration: 118076
loss: 1.064160943031311,grad_norm: 0.9999999500200284, iteration: 118077
loss: 1.0942450761795044,grad_norm: 0.9999999187877125, iteration: 118078
loss: 1.072709083557129,grad_norm: 0.999999750192156, iteration: 118079
loss: 1.0436525344848633,grad_norm: 0.9999993750818236, iteration: 118080
loss: 1.0749626159667969,grad_norm: 1.000000092557066, iteration: 118081
loss: 1.188000202178955,grad_norm: 0.9999992615532477, iteration: 118082
loss: 1.0748707056045532,grad_norm: 0.9999999088730773, iteration: 118083
loss: 1.042783498764038,grad_norm: 0.9999992828570822, iteration: 118084
loss: 0.9645833969116211,grad_norm: 0.9097985382670788, iteration: 118085
loss: 1.093540906906128,grad_norm: 0.9999996934151624, iteration: 118086
loss: 1.0153671503067017,grad_norm: 0.9999990500246607, iteration: 118087
loss: 0.9974839091300964,grad_norm: 0.955128345034753, iteration: 118088
loss: 1.043562412261963,grad_norm: 0.9999995566554533, iteration: 118089
loss: 1.0177350044250488,grad_norm: 0.9999990433928042, iteration: 118090
loss: 1.0319925546646118,grad_norm: 0.9999992448507588, iteration: 118091
loss: 1.0151323080062866,grad_norm: 0.9999993046070683, iteration: 118092
loss: 1.0326420068740845,grad_norm: 0.9999995183457037, iteration: 118093
loss: 1.025651216506958,grad_norm: 0.9999993217749765, iteration: 118094
loss: 1.0227515697479248,grad_norm: 0.9999991466006797, iteration: 118095
loss: 1.0102006196975708,grad_norm: 0.9999991091114373, iteration: 118096
loss: 1.2381654977798462,grad_norm: 0.9999996677073432, iteration: 118097
loss: 1.000479817390442,grad_norm: 0.9782218630470109, iteration: 118098
loss: 1.0024120807647705,grad_norm: 0.9999991623227373, iteration: 118099
loss: 1.0011838674545288,grad_norm: 0.9285294163895236, iteration: 118100
loss: 1.0420329570770264,grad_norm: 0.9999995006222417, iteration: 118101
loss: 1.1139222383499146,grad_norm: 0.9999998846115221, iteration: 118102
loss: 1.0050311088562012,grad_norm: 0.9999991286411074, iteration: 118103
loss: 1.0539668798446655,grad_norm: 0.9865628727634126, iteration: 118104
loss: 1.0187382698059082,grad_norm: 0.9999992477213262, iteration: 118105
loss: 1.0851643085479736,grad_norm: 0.9999999164430232, iteration: 118106
loss: 1.066341519355774,grad_norm: 0.9999994559022931, iteration: 118107
loss: 1.024304986000061,grad_norm: 0.9999990788285474, iteration: 118108
loss: 0.9787835478782654,grad_norm: 0.917313382077785, iteration: 118109
loss: 1.0295222997665405,grad_norm: 0.9882325106226524, iteration: 118110
loss: 1.0565881729125977,grad_norm: 0.9999992751473091, iteration: 118111
loss: 1.0810014009475708,grad_norm: 0.9999991083093022, iteration: 118112
loss: 1.084316372871399,grad_norm: 0.9999993463023679, iteration: 118113
loss: 1.0189868211746216,grad_norm: 0.9999994856831115, iteration: 118114
loss: 1.1419684886932373,grad_norm: 0.9999998956274752, iteration: 118115
loss: 1.0044666528701782,grad_norm: 0.9979300995648225, iteration: 118116
loss: 0.9753548502922058,grad_norm: 0.999999391737526, iteration: 118117
loss: 1.0014269351959229,grad_norm: 0.9999991554072757, iteration: 118118
loss: 1.0191707611083984,grad_norm: 0.9999990323175063, iteration: 118119
loss: 1.0074725151062012,grad_norm: 0.9999989276483982, iteration: 118120
loss: 0.9901164770126343,grad_norm: 0.9999994039476959, iteration: 118121
loss: 1.025227665901184,grad_norm: 0.9999999996852067, iteration: 118122
loss: 1.1204992532730103,grad_norm: 0.999999226117567, iteration: 118123
loss: 1.070294976234436,grad_norm: 0.9999991354580903, iteration: 118124
loss: 0.9913196563720703,grad_norm: 0.9999991806028998, iteration: 118125
loss: 1.087589144706726,grad_norm: 0.9999998134969611, iteration: 118126
loss: 1.031380534172058,grad_norm: 0.999999312960883, iteration: 118127
loss: 0.9951549768447876,grad_norm: 0.9999991161908937, iteration: 118128
loss: 1.01925528049469,grad_norm: 0.9999993121458345, iteration: 118129
loss: 0.9391530752182007,grad_norm: 0.9984500778530766, iteration: 118130
loss: 1.1276533603668213,grad_norm: 0.9706729008104173, iteration: 118131
loss: 1.0037057399749756,grad_norm: 0.9874096503594716, iteration: 118132
loss: 1.0496385097503662,grad_norm: 0.9228267516827493, iteration: 118133
loss: 1.0211135149002075,grad_norm: 0.9999993772111604, iteration: 118134
loss: 1.027854084968567,grad_norm: 0.9999991106630374, iteration: 118135
loss: 1.1458059549331665,grad_norm: 0.9999993374451941, iteration: 118136
loss: 0.9888086915016174,grad_norm: 0.965287713045882, iteration: 118137
loss: 0.9992485642433167,grad_norm: 0.9999990405756246, iteration: 118138
loss: 1.129473090171814,grad_norm: 0.9999996309869282, iteration: 118139
loss: 0.9747312664985657,grad_norm: 0.9279903015989444, iteration: 118140
loss: 0.9891520738601685,grad_norm: 0.9999997275375063, iteration: 118141
loss: 0.9777437448501587,grad_norm: 0.9999991023869332, iteration: 118142
loss: 1.0425297021865845,grad_norm: 0.9955840018503732, iteration: 118143
loss: 1.0165863037109375,grad_norm: 0.999999782473812, iteration: 118144
loss: 1.0166347026824951,grad_norm: 0.9999996079030111, iteration: 118145
loss: 1.0177031755447388,grad_norm: 0.9999995435511418, iteration: 118146
loss: 0.9870725870132446,grad_norm: 0.9583847082679889, iteration: 118147
loss: 1.0615954399108887,grad_norm: 0.9999995437202892, iteration: 118148
loss: 1.0038599967956543,grad_norm: 0.9999989048829483, iteration: 118149
loss: 1.1372249126434326,grad_norm: 0.9999996873983635, iteration: 118150
loss: 1.001194953918457,grad_norm: 0.99999924828951, iteration: 118151
loss: 0.9744181632995605,grad_norm: 0.954248706537512, iteration: 118152
loss: 1.0515447854995728,grad_norm: 0.9999998487379721, iteration: 118153
loss: 1.0022735595703125,grad_norm: 0.9999995396502738, iteration: 118154
loss: 0.9689167141914368,grad_norm: 0.9622603098143664, iteration: 118155
loss: 1.103759527206421,grad_norm: 0.9999998276452472, iteration: 118156
loss: 1.135703444480896,grad_norm: 0.9999998001679739, iteration: 118157
loss: 1.0559337139129639,grad_norm: 0.9907510766654725, iteration: 118158
loss: 1.0075603723526,grad_norm: 0.9999996029198119, iteration: 118159
loss: 0.9934865832328796,grad_norm: 0.9999990933234654, iteration: 118160
loss: 1.0389995574951172,grad_norm: 0.99999966700626, iteration: 118161
loss: 1.0523477792739868,grad_norm: 0.999999406077146, iteration: 118162
loss: 1.1198843717575073,grad_norm: 0.9999998515913302, iteration: 118163
loss: 1.0126668214797974,grad_norm: 0.9999992673074253, iteration: 118164
loss: 1.0362260341644287,grad_norm: 0.9999990116301464, iteration: 118165
loss: 0.9864479303359985,grad_norm: 0.9999993708874214, iteration: 118166
loss: 1.0998470783233643,grad_norm: 0.999999822145785, iteration: 118167
loss: 1.0161077976226807,grad_norm: 0.9999990728533941, iteration: 118168
loss: 1.0790661573410034,grad_norm: 0.9999998259993638, iteration: 118169
loss: 1.2550853490829468,grad_norm: 0.9999998766321099, iteration: 118170
loss: 1.030857801437378,grad_norm: 0.9999998788897367, iteration: 118171
loss: 1.0309327840805054,grad_norm: 0.9999993143889812, iteration: 118172
loss: 1.0022225379943848,grad_norm: 0.9328250871877939, iteration: 118173
loss: 1.0784367322921753,grad_norm: 0.999999236518243, iteration: 118174
loss: 1.0496413707733154,grad_norm: 0.9999992489956124, iteration: 118175
loss: 0.9923848509788513,grad_norm: 0.9999990976165916, iteration: 118176
loss: 1.0131218433380127,grad_norm: 0.9999993391183203, iteration: 118177
loss: 1.0062048435211182,grad_norm: 0.9999990883764641, iteration: 118178
loss: 1.038217306137085,grad_norm: 0.9999996446731844, iteration: 118179
loss: 1.0108095407485962,grad_norm: 0.9999991260856256, iteration: 118180
loss: 1.068074107170105,grad_norm: 0.9999996473203485, iteration: 118181
loss: 0.9673770070075989,grad_norm: 0.9999991337305782, iteration: 118182
loss: 1.0540858507156372,grad_norm: 0.9999998175890806, iteration: 118183
loss: 0.9847453236579895,grad_norm: 0.9999994253511957, iteration: 118184
loss: 0.9978548288345337,grad_norm: 0.8990549949768247, iteration: 118185
loss: 1.0188891887664795,grad_norm: 0.9999991382912713, iteration: 118186
loss: 0.983642578125,grad_norm: 0.9999998323349941, iteration: 118187
loss: 1.0002648830413818,grad_norm: 0.9999990982035915, iteration: 118188
loss: 1.0279505252838135,grad_norm: 0.999999108106398, iteration: 118189
loss: 1.0698928833007812,grad_norm: 0.9999995174260657, iteration: 118190
loss: 0.9808438420295715,grad_norm: 0.9999990990006625, iteration: 118191
loss: 1.0430071353912354,grad_norm: 0.9999994097800111, iteration: 118192
loss: 1.0300196409225464,grad_norm: 0.9611865889248439, iteration: 118193
loss: 1.0155155658721924,grad_norm: 0.9999992270652222, iteration: 118194
loss: 1.0255327224731445,grad_norm: 0.9999998334919918, iteration: 118195
loss: 0.9768797755241394,grad_norm: 0.9999991492340405, iteration: 118196
loss: 1.0520992279052734,grad_norm: 0.9999993061668044, iteration: 118197
loss: 1.060470700263977,grad_norm: 0.9999999292633402, iteration: 118198
loss: 1.0097240209579468,grad_norm: 0.9747347942356829, iteration: 118199
loss: 1.0042483806610107,grad_norm: 0.9999990415876705, iteration: 118200
loss: 0.9885490536689758,grad_norm: 0.9999992162012162, iteration: 118201
loss: 1.0353928804397583,grad_norm: 0.9999992155644402, iteration: 118202
loss: 1.1333837509155273,grad_norm: 0.9999992450113827, iteration: 118203
loss: 1.0249046087265015,grad_norm: 0.9999999475885196, iteration: 118204
loss: 1.0319732427597046,grad_norm: 0.9999995613901708, iteration: 118205
loss: 1.0478962659835815,grad_norm: 0.9999990521979167, iteration: 118206
loss: 1.067378044128418,grad_norm: 0.9999993036199853, iteration: 118207
loss: 0.9690278768539429,grad_norm: 0.9999992338857137, iteration: 118208
loss: 0.997027575969696,grad_norm: 0.8731046751701543, iteration: 118209
loss: 1.000917673110962,grad_norm: 0.9523420700214884, iteration: 118210
loss: 0.9890975952148438,grad_norm: 0.9147856269467668, iteration: 118211
loss: 0.9807096123695374,grad_norm: 0.8656733445089146, iteration: 118212
loss: 0.9875952005386353,grad_norm: 0.8689999733250129, iteration: 118213
loss: 0.9964373707771301,grad_norm: 0.9999991794901656, iteration: 118214
loss: 1.0549529790878296,grad_norm: 0.9999999107571871, iteration: 118215
loss: 1.0198251008987427,grad_norm: 0.9999993688429613, iteration: 118216
loss: 0.9921818971633911,grad_norm: 0.866913405785583, iteration: 118217
loss: 1.017755150794983,grad_norm: 0.9999990276300725, iteration: 118218
loss: 1.0611363649368286,grad_norm: 0.9999997810453709, iteration: 118219
loss: 1.091658115386963,grad_norm: 0.9999997899063205, iteration: 118220
loss: 0.9825728535652161,grad_norm: 0.9295038029947648, iteration: 118221
loss: 1.1215311288833618,grad_norm: 0.9999993996453438, iteration: 118222
loss: 0.9903998374938965,grad_norm: 0.9476889503715984, iteration: 118223
loss: 1.0130627155303955,grad_norm: 0.9999992945402141, iteration: 118224
loss: 0.9852272272109985,grad_norm: 0.999999151316372, iteration: 118225
loss: 1.006609559059143,grad_norm: 0.9375845641447447, iteration: 118226
loss: 0.9885744452476501,grad_norm: 0.9999998095327914, iteration: 118227
loss: 1.0406502485275269,grad_norm: 0.9999990757772429, iteration: 118228
loss: 1.0210951566696167,grad_norm: 0.9999992978591616, iteration: 118229
loss: 0.9872556924819946,grad_norm: 0.9359459597703655, iteration: 118230
loss: 0.983156144618988,grad_norm: 0.9999990841691094, iteration: 118231
loss: 1.0013864040374756,grad_norm: 0.9999991084590273, iteration: 118232
loss: 0.9796019196510315,grad_norm: 0.9999991347872053, iteration: 118233
loss: 0.9907363057136536,grad_norm: 0.9180908760086772, iteration: 118234
loss: 0.9598106741905212,grad_norm: 0.9999991451961354, iteration: 118235
loss: 1.002872347831726,grad_norm: 0.9480651054401584, iteration: 118236
loss: 1.0009931325912476,grad_norm: 0.9999991310460464, iteration: 118237
loss: 0.9924936294555664,grad_norm: 0.9999991192937575, iteration: 118238
loss: 1.0355749130249023,grad_norm: 0.999999293251986, iteration: 118239
loss: 1.01298987865448,grad_norm: 0.9999990869456425, iteration: 118240
loss: 1.0012811422348022,grad_norm: 0.999999246982723, iteration: 118241
loss: 0.9935777187347412,grad_norm: 0.999999137728389, iteration: 118242
loss: 0.9828211665153503,grad_norm: 0.9999989862420877, iteration: 118243
loss: 0.960612416267395,grad_norm: 0.9999991267822811, iteration: 118244
loss: 0.9712448120117188,grad_norm: 0.9999991031891029, iteration: 118245
loss: 0.9849033951759338,grad_norm: 0.9999992275426569, iteration: 118246
loss: 1.0027801990509033,grad_norm: 0.9969901889708721, iteration: 118247
loss: 1.0084444284439087,grad_norm: 0.9999990426680547, iteration: 118248
loss: 0.9717866778373718,grad_norm: 0.999999265200424, iteration: 118249
loss: 1.0218849182128906,grad_norm: 0.9267160605664647, iteration: 118250
loss: 1.0029250383377075,grad_norm: 0.9999990008521735, iteration: 118251
loss: 0.994699239730835,grad_norm: 0.9662212716897907, iteration: 118252
loss: 0.98126620054245,grad_norm: 0.9999989962136993, iteration: 118253
loss: 0.9873138070106506,grad_norm: 0.9999989064370156, iteration: 118254
loss: 0.9985386729240417,grad_norm: 0.9999991063489364, iteration: 118255
loss: 1.0058422088623047,grad_norm: 0.9999990641954211, iteration: 118256
loss: 1.0130877494812012,grad_norm: 0.9999991501948264, iteration: 118257
loss: 1.0022196769714355,grad_norm: 0.9359398799929113, iteration: 118258
loss: 1.0190142393112183,grad_norm: 0.9999996915325786, iteration: 118259
loss: 0.9992957711219788,grad_norm: 0.9426074237330903, iteration: 118260
loss: 1.026274561882019,grad_norm: 0.9999996602799112, iteration: 118261
loss: 1.0333178043365479,grad_norm: 0.9999989288733756, iteration: 118262
loss: 1.0186004638671875,grad_norm: 0.9999991606282219, iteration: 118263
loss: 0.9905617833137512,grad_norm: 0.999999419521329, iteration: 118264
loss: 1.023058295249939,grad_norm: 0.9999991268796273, iteration: 118265
loss: 1.004133701324463,grad_norm: 0.9486614482549839, iteration: 118266
loss: 1.009453296661377,grad_norm: 0.999999085785708, iteration: 118267
loss: 1.0143160820007324,grad_norm: 0.7783106096043437, iteration: 118268
loss: 0.9935723543167114,grad_norm: 0.9999990653704528, iteration: 118269
loss: 1.0216516256332397,grad_norm: 0.9999989381995894, iteration: 118270
loss: 1.0118755102157593,grad_norm: 0.9999989270721582, iteration: 118271
loss: 0.9994513392448425,grad_norm: 0.9999993101062017, iteration: 118272
loss: 0.9869608283042908,grad_norm: 0.9999992038023087, iteration: 118273
loss: 0.9956580400466919,grad_norm: 0.9204669807475583, iteration: 118274
loss: 1.0195506811141968,grad_norm: 0.999999393565315, iteration: 118275
loss: 1.0173205137252808,grad_norm: 0.9062077131469464, iteration: 118276
loss: 1.0141738653182983,grad_norm: 0.9910364657089507, iteration: 118277
loss: 1.0136553049087524,grad_norm: 0.9999991506411194, iteration: 118278
loss: 1.0013573169708252,grad_norm: 0.9999990693693156, iteration: 118279
loss: 0.979636549949646,grad_norm: 0.9999992888829412, iteration: 118280
loss: 1.0405125617980957,grad_norm: 0.8911023158114953, iteration: 118281
loss: 0.99375981092453,grad_norm: 0.9999991313680696, iteration: 118282
loss: 0.9971577525138855,grad_norm: 0.9999991795783164, iteration: 118283
loss: 0.9942014813423157,grad_norm: 0.9999990433759257, iteration: 118284
loss: 1.017037272453308,grad_norm: 0.9999990866067078, iteration: 118285
loss: 0.9979439377784729,grad_norm: 0.7803925085117758, iteration: 118286
loss: 1.0027068853378296,grad_norm: 0.9999991489770168, iteration: 118287
loss: 1.0297623872756958,grad_norm: 0.999999898159188, iteration: 118288
loss: 1.0215048789978027,grad_norm: 0.9999991614976872, iteration: 118289
loss: 0.9582871794700623,grad_norm: 0.9999990423313815, iteration: 118290
loss: 1.0030949115753174,grad_norm: 0.9999990305226968, iteration: 118291
loss: 1.0414613485336304,grad_norm: 0.8653299378003261, iteration: 118292
loss: 1.0044265985488892,grad_norm: 0.9999992175265353, iteration: 118293
loss: 0.9703227281570435,grad_norm: 0.9999989333870224, iteration: 118294
loss: 0.988699734210968,grad_norm: 0.9626575904025855, iteration: 118295
loss: 1.0184310674667358,grad_norm: 0.9999993203539606, iteration: 118296
loss: 1.0058550834655762,grad_norm: 0.9252543383843764, iteration: 118297
loss: 1.0170106887817383,grad_norm: 0.9999992869964452, iteration: 118298
loss: 0.9848988056182861,grad_norm: 0.9999991090071086, iteration: 118299
loss: 0.9699681997299194,grad_norm: 0.9999989990336325, iteration: 118300
loss: 1.0164567232131958,grad_norm: 0.9999992017943397, iteration: 118301
loss: 0.9697486758232117,grad_norm: 0.9280820330141968, iteration: 118302
loss: 0.9826601147651672,grad_norm: 0.9999990708808089, iteration: 118303
loss: 1.0046507120132446,grad_norm: 0.9999991323998982, iteration: 118304
loss: 0.949751615524292,grad_norm: 0.9999990244457976, iteration: 118305
loss: 1.0405840873718262,grad_norm: 0.9999992861179378, iteration: 118306
loss: 1.0034916400909424,grad_norm: 0.9999991363507422, iteration: 118307
loss: 1.009284496307373,grad_norm: 0.9691094702461028, iteration: 118308
loss: 0.9761285185813904,grad_norm: 0.9163244159416251, iteration: 118309
loss: 0.972894549369812,grad_norm: 0.9999990406862064, iteration: 118310
loss: 1.0118502378463745,grad_norm: 0.9999991567268653, iteration: 118311
loss: 1.0194700956344604,grad_norm: 0.9999992054348701, iteration: 118312
loss: 0.971853494644165,grad_norm: 0.999999193924235, iteration: 118313
loss: 1.0455728769302368,grad_norm: 0.9999990292953248, iteration: 118314
loss: 0.9883370995521545,grad_norm: 0.9999990447737936, iteration: 118315
loss: 1.0894529819488525,grad_norm: 0.9999990729807318, iteration: 118316
loss: 1.0022715330123901,grad_norm: 0.9999990951095495, iteration: 118317
loss: 1.0255317687988281,grad_norm: 0.9931691584588362, iteration: 118318
loss: 0.9669991731643677,grad_norm: 0.999999294700026, iteration: 118319
loss: 0.9783087968826294,grad_norm: 0.9999991263878627, iteration: 118320
loss: 1.016005516052246,grad_norm: 0.9999990825745688, iteration: 118321
loss: 1.0107747316360474,grad_norm: 0.9999993280955125, iteration: 118322
loss: 0.9838393330574036,grad_norm: 0.9777027610618931, iteration: 118323
loss: 1.0133315324783325,grad_norm: 0.9999991279670868, iteration: 118324
loss: 1.0824604034423828,grad_norm: 0.9999994161958146, iteration: 118325
loss: 0.9962447285652161,grad_norm: 0.8597197470337375, iteration: 118326
loss: 1.0496727228164673,grad_norm: 0.9999995652978941, iteration: 118327
loss: 0.9976365566253662,grad_norm: 0.9999992019483662, iteration: 118328
loss: 0.9918529391288757,grad_norm: 0.9406391552177484, iteration: 118329
loss: 0.9711576104164124,grad_norm: 0.9999991854812642, iteration: 118330
loss: 1.0040174722671509,grad_norm: 0.9982414876356074, iteration: 118331
loss: 1.0140459537506104,grad_norm: 0.9799350930679556, iteration: 118332
loss: 1.0146002769470215,grad_norm: 0.9999990924787506, iteration: 118333
loss: 1.0011481046676636,grad_norm: 0.9999992460169707, iteration: 118334
loss: 0.983652651309967,grad_norm: 0.9999990306813302, iteration: 118335
loss: 0.9963170289993286,grad_norm: 0.9789226856172104, iteration: 118336
loss: 1.0316509008407593,grad_norm: 0.9999995280787094, iteration: 118337
loss: 0.9753302335739136,grad_norm: 0.9999992119649487, iteration: 118338
loss: 1.041975975036621,grad_norm: 0.9999991235263952, iteration: 118339
loss: 1.0259524583816528,grad_norm: 0.9999989169484151, iteration: 118340
loss: 0.9805306792259216,grad_norm: 0.9999992543516849, iteration: 118341
loss: 0.9675120711326599,grad_norm: 0.9705379859981008, iteration: 118342
loss: 1.0120007991790771,grad_norm: 0.8865379410610843, iteration: 118343
loss: 0.9732242226600647,grad_norm: 0.999999208685201, iteration: 118344
loss: 1.061353087425232,grad_norm: 0.9999992753802994, iteration: 118345
loss: 1.0070234537124634,grad_norm: 0.9999996853043737, iteration: 118346
loss: 1.0610507726669312,grad_norm: 0.9999991239998356, iteration: 118347
loss: 1.0372017621994019,grad_norm: 0.9999999481648809, iteration: 118348
loss: 1.0026195049285889,grad_norm: 0.8807666184277315, iteration: 118349
loss: 0.9716047048568726,grad_norm: 0.853458005893289, iteration: 118350
loss: 1.0181869268417358,grad_norm: 0.9999992512380537, iteration: 118351
loss: 0.9975135326385498,grad_norm: 0.978696170137964, iteration: 118352
loss: 1.0210506916046143,grad_norm: 0.9962021287915908, iteration: 118353
loss: 1.0196549892425537,grad_norm: 0.9909077158189885, iteration: 118354
loss: 1.0009686946868896,grad_norm: 0.9999992052112474, iteration: 118355
loss: 1.0025303363800049,grad_norm: 0.8070177358194652, iteration: 118356
loss: 0.9492254853248596,grad_norm: 0.9999992040947632, iteration: 118357
loss: 1.176520586013794,grad_norm: 0.9999991570650147, iteration: 118358
loss: 1.0041446685791016,grad_norm: 0.9520690504127286, iteration: 118359
loss: 0.9642150402069092,grad_norm: 0.9934001367662348, iteration: 118360
loss: 0.9947600364685059,grad_norm: 0.9999991037494874, iteration: 118361
loss: 0.9990442395210266,grad_norm: 0.9999992335765451, iteration: 118362
loss: 0.9903780221939087,grad_norm: 0.9795732038480016, iteration: 118363
loss: 1.04485285282135,grad_norm: 0.999999225255158, iteration: 118364
loss: 1.0591840744018555,grad_norm: 0.9419900117342984, iteration: 118365
loss: 1.0789954662322998,grad_norm: 0.9999991616086514, iteration: 118366
loss: 0.9998560547828674,grad_norm: 0.9775842661207035, iteration: 118367
loss: 0.9894534945487976,grad_norm: 0.999999065165474, iteration: 118368
loss: 1.0038377046585083,grad_norm: 0.9999990501183078, iteration: 118369
loss: 1.0102483034133911,grad_norm: 0.9999989837339226, iteration: 118370
loss: 1.0100407600402832,grad_norm: 0.9999996193461912, iteration: 118371
loss: 1.0192649364471436,grad_norm: 0.9999996831394719, iteration: 118372
loss: 1.037971019744873,grad_norm: 0.9999992437712746, iteration: 118373
loss: 0.9953287243843079,grad_norm: 0.9355858017215788, iteration: 118374
loss: 1.0834392309188843,grad_norm: 0.9999991601775003, iteration: 118375
loss: 0.9757031798362732,grad_norm: 0.9999991840529138, iteration: 118376
loss: 1.0024551153182983,grad_norm: 0.9999991454455964, iteration: 118377
loss: 0.9914559125900269,grad_norm: 0.9999990781700138, iteration: 118378
loss: 1.0197176933288574,grad_norm: 0.9999995634708444, iteration: 118379
loss: 0.9881382584571838,grad_norm: 0.9999990149284005, iteration: 118380
loss: 1.0014606714248657,grad_norm: 0.9999990984341736, iteration: 118381
loss: 1.0497914552688599,grad_norm: 0.9851448130363657, iteration: 118382
loss: 0.962465763092041,grad_norm: 0.9999992690970164, iteration: 118383
loss: 1.0057557821273804,grad_norm: 0.9999991968267576, iteration: 118384
loss: 0.9521136283874512,grad_norm: 0.9999991357779849, iteration: 118385
loss: 1.0068986415863037,grad_norm: 0.9999990477245929, iteration: 118386
loss: 1.0074191093444824,grad_norm: 0.999999041603025, iteration: 118387
loss: 1.0176531076431274,grad_norm: 0.8510771797593675, iteration: 118388
loss: 1.0182490348815918,grad_norm: 0.9999991456977729, iteration: 118389
loss: 1.0021954774856567,grad_norm: 0.9999992942803031, iteration: 118390
loss: 1.0239735841751099,grad_norm: 0.9999998879830696, iteration: 118391
loss: 1.0064977407455444,grad_norm: 0.9999991346453244, iteration: 118392
loss: 1.0106110572814941,grad_norm: 0.9999991724005178, iteration: 118393
loss: 1.0165026187896729,grad_norm: 0.9999991184935756, iteration: 118394
loss: 0.9781510233879089,grad_norm: 0.9720812587794322, iteration: 118395
loss: 0.995589017868042,grad_norm: 0.8984136086201294, iteration: 118396
loss: 0.9763959050178528,grad_norm: 0.924920335177093, iteration: 118397
loss: 1.0037227869033813,grad_norm: 0.9762189313729085, iteration: 118398
loss: 0.9803922772407532,grad_norm: 0.9999993702634837, iteration: 118399
loss: 1.017163634300232,grad_norm: 0.9999990662184434, iteration: 118400
loss: 1.017432451248169,grad_norm: 0.9949158704588141, iteration: 118401
loss: 1.032705545425415,grad_norm: 0.9999992569598001, iteration: 118402
loss: 1.0218021869659424,grad_norm: 0.9730541184594198, iteration: 118403
loss: 0.9942421317100525,grad_norm: 0.8730739048294825, iteration: 118404
loss: 1.0115360021591187,grad_norm: 0.9999991493518459, iteration: 118405
loss: 0.9859293103218079,grad_norm: 0.9999990662360708, iteration: 118406
loss: 1.0062240362167358,grad_norm: 0.9309273564865076, iteration: 118407
loss: 0.9818401336669922,grad_norm: 0.9999990273771291, iteration: 118408
loss: 0.957526445388794,grad_norm: 0.999999284337857, iteration: 118409
loss: 1.0308820009231567,grad_norm: 0.8740344911639953, iteration: 118410
loss: 1.0289626121520996,grad_norm: 0.9999988924651544, iteration: 118411
loss: 1.013567328453064,grad_norm: 0.9999992452121893, iteration: 118412
loss: 1.0242353677749634,grad_norm: 0.8440709848829253, iteration: 118413
loss: 1.000292420387268,grad_norm: 0.9993153155284062, iteration: 118414
loss: 0.9916554093360901,grad_norm: 0.9999994751730372, iteration: 118415
loss: 1.0454214811325073,grad_norm: 0.999999144733966, iteration: 118416
loss: 1.0306636095046997,grad_norm: 0.9999995916114824, iteration: 118417
loss: 1.0516517162322998,grad_norm: 0.9619739115131829, iteration: 118418
loss: 0.9995028376579285,grad_norm: 0.9999990890005398, iteration: 118419
loss: 1.040094017982483,grad_norm: 0.9999992056970946, iteration: 118420
loss: 1.0275663137435913,grad_norm: 0.999999357970489, iteration: 118421
loss: 0.9909190535545349,grad_norm: 0.879367385805962, iteration: 118422
loss: 1.023701548576355,grad_norm: 0.9630275902451017, iteration: 118423
loss: 1.0106143951416016,grad_norm: 0.8502325644050703, iteration: 118424
loss: 0.9787370562553406,grad_norm: 0.9999989838290678, iteration: 118425
loss: 1.0140929222106934,grad_norm: 0.9505366511488426, iteration: 118426
loss: 1.0076631307601929,grad_norm: 0.8585133379428812, iteration: 118427
loss: 1.0437796115875244,grad_norm: 0.9456392838782787, iteration: 118428
loss: 1.0819522142410278,grad_norm: 0.9999995179333563, iteration: 118429
loss: 0.9472591280937195,grad_norm: 0.9112041709676323, iteration: 118430
loss: 0.995739221572876,grad_norm: 0.9999990521734279, iteration: 118431
loss: 0.9833791255950928,grad_norm: 0.9505254964916096, iteration: 118432
loss: 1.008870005607605,grad_norm: 0.99999952498836, iteration: 118433
loss: 1.0146125555038452,grad_norm: 0.9999989217747295, iteration: 118434
loss: 1.0533455610275269,grad_norm: 0.9999991792232188, iteration: 118435
loss: 0.97699373960495,grad_norm: 0.9999998608766851, iteration: 118436
loss: 0.9384446740150452,grad_norm: 0.9861263802509164, iteration: 118437
loss: 1.0224535465240479,grad_norm: 0.896354534112782, iteration: 118438
loss: 1.0075194835662842,grad_norm: 0.8910166690173824, iteration: 118439
loss: 1.0678389072418213,grad_norm: 0.9999990155284427, iteration: 118440
loss: 1.0049699544906616,grad_norm: 0.999999112905636, iteration: 118441
loss: 1.0418217182159424,grad_norm: 0.8446098253824028, iteration: 118442
loss: 1.0042927265167236,grad_norm: 0.9999991781082814, iteration: 118443
loss: 0.9926654100418091,grad_norm: 0.9999994612408807, iteration: 118444
loss: 1.005066156387329,grad_norm: 0.9999989804419984, iteration: 118445
loss: 0.9845529794692993,grad_norm: 0.9999990812687117, iteration: 118446
loss: 0.9908689856529236,grad_norm: 0.9999991555465033, iteration: 118447
loss: 0.9991481304168701,grad_norm: 0.9999990909640203, iteration: 118448
loss: 0.974868655204773,grad_norm: 0.999998964026675, iteration: 118449
loss: 1.0131962299346924,grad_norm: 0.8187519810787751, iteration: 118450
loss: 1.012990117073059,grad_norm: 0.999999217619201, iteration: 118451
loss: 0.9870485663414001,grad_norm: 0.9060998095943097, iteration: 118452
loss: 1.0377061367034912,grad_norm: 0.825548641101187, iteration: 118453
loss: 1.0244717597961426,grad_norm: 0.9837551446157664, iteration: 118454
loss: 0.9626463055610657,grad_norm: 0.9999990452630264, iteration: 118455
loss: 0.97362220287323,grad_norm: 0.9999991564292919, iteration: 118456
loss: 0.9979981184005737,grad_norm: 0.9999993787215765, iteration: 118457
loss: 0.9758087992668152,grad_norm: 0.9999990354596563, iteration: 118458
loss: 0.9658451676368713,grad_norm: 0.9934736747310439, iteration: 118459
loss: 1.0275787115097046,grad_norm: 0.9999992015676638, iteration: 118460
loss: 0.9959966540336609,grad_norm: 0.9281691184833948, iteration: 118461
loss: 0.9663198590278625,grad_norm: 0.9999991695757889, iteration: 118462
loss: 1.0198839902877808,grad_norm: 0.9999989745983877, iteration: 118463
loss: 0.9692053198814392,grad_norm: 0.9424659527216133, iteration: 118464
loss: 1.0076689720153809,grad_norm: 0.999999328106028, iteration: 118465
loss: 1.0426596403121948,grad_norm: 0.9999992261505656, iteration: 118466
loss: 1.0048179626464844,grad_norm: 0.9715365702788938, iteration: 118467
loss: 1.0489736795425415,grad_norm: 0.9999992653077485, iteration: 118468
loss: 1.0124913454055786,grad_norm: 0.9999990955952419, iteration: 118469
loss: 0.9926264882087708,grad_norm: 0.9279783971986301, iteration: 118470
loss: 1.0014067888259888,grad_norm: 0.9999990259193223, iteration: 118471
loss: 0.9754915237426758,grad_norm: 0.9433605134409581, iteration: 118472
loss: 0.9968332648277283,grad_norm: 0.9999991635211268, iteration: 118473
loss: 1.0014159679412842,grad_norm: 0.9483974342489063, iteration: 118474
loss: 1.0209019184112549,grad_norm: 0.9999991455277718, iteration: 118475
loss: 1.0089623928070068,grad_norm: 0.9999993351342387, iteration: 118476
loss: 1.0147029161453247,grad_norm: 0.9362695269411728, iteration: 118477
loss: 0.9992234110832214,grad_norm: 0.9373488355036858, iteration: 118478
loss: 1.0488064289093018,grad_norm: 0.9999994727391712, iteration: 118479
loss: 1.0295422077178955,grad_norm: 0.9759279536896598, iteration: 118480
loss: 1.007233738899231,grad_norm: 0.9999991117818133, iteration: 118481
loss: 1.0045619010925293,grad_norm: 0.9999992580019654, iteration: 118482
loss: 0.9552628397941589,grad_norm: 0.9999990699185473, iteration: 118483
loss: 0.960504949092865,grad_norm: 0.9999991501269414, iteration: 118484
loss: 1.0173317193984985,grad_norm: 0.9781877398821976, iteration: 118485
loss: 0.9896707534790039,grad_norm: 0.9999991048450492, iteration: 118486
loss: 0.9974480271339417,grad_norm: 0.8880168383873086, iteration: 118487
loss: 0.9791175127029419,grad_norm: 0.9999994957054132, iteration: 118488
loss: 1.0138720273971558,grad_norm: 0.9999992163371705, iteration: 118489
loss: 0.9999932646751404,grad_norm: 0.9578213414521423, iteration: 118490
loss: 0.9890412092208862,grad_norm: 0.9999992961115922, iteration: 118491
loss: 1.0180633068084717,grad_norm: 0.9960453490755194, iteration: 118492
loss: 1.0155627727508545,grad_norm: 0.9730629782815695, iteration: 118493
loss: 1.0474724769592285,grad_norm: 0.9999991299407379, iteration: 118494
loss: 0.988556981086731,grad_norm: 0.9688246302988711, iteration: 118495
loss: 1.05826735496521,grad_norm: 0.9999994804948278, iteration: 118496
loss: 1.0034271478652954,grad_norm: 0.9798589547599784, iteration: 118497
loss: 1.011725664138794,grad_norm: 0.9471772696073628, iteration: 118498
loss: 1.0003236532211304,grad_norm: 0.9999991707498436, iteration: 118499
loss: 1.0742994546890259,grad_norm: 0.9500984441687277, iteration: 118500
loss: 1.0199432373046875,grad_norm: 0.9721715553111955, iteration: 118501
loss: 1.1384522914886475,grad_norm: 0.999999745741325, iteration: 118502
loss: 0.9719486236572266,grad_norm: 0.9999992484789905, iteration: 118503
loss: 1.0298925638198853,grad_norm: 0.9999992850128945, iteration: 118504
loss: 0.9912105798721313,grad_norm: 0.913448708363769, iteration: 118505
loss: 0.9412943124771118,grad_norm: 0.9999993036428575, iteration: 118506
loss: 1.0155935287475586,grad_norm: 0.9999991451175837, iteration: 118507
loss: 1.0027486085891724,grad_norm: 0.9218309759411883, iteration: 118508
loss: 1.0108981132507324,grad_norm: 0.9906838512102721, iteration: 118509
loss: 1.0061315298080444,grad_norm: 0.9999991042049536, iteration: 118510
loss: 0.9576288461685181,grad_norm: 0.9686040572271183, iteration: 118511
loss: 1.0153546333312988,grad_norm: 0.9999992053271195, iteration: 118512
loss: 0.9733954071998596,grad_norm: 0.9205117086099853, iteration: 118513
loss: 1.0463685989379883,grad_norm: 0.9999996738060578, iteration: 118514
loss: 1.0855001211166382,grad_norm: 0.9999991612170581, iteration: 118515
loss: 1.0017074346542358,grad_norm: 0.9716542457793355, iteration: 118516
loss: 1.0199251174926758,grad_norm: 0.999999095183543, iteration: 118517
loss: 0.9940502643585205,grad_norm: 0.9999991473646394, iteration: 118518
loss: 0.9975090026855469,grad_norm: 0.9134879566143995, iteration: 118519
loss: 1.0008647441864014,grad_norm: 0.9675772100488403, iteration: 118520
loss: 1.0215568542480469,grad_norm: 0.9785283780769041, iteration: 118521
loss: 1.037274718284607,grad_norm: 0.9999990255153408, iteration: 118522
loss: 0.9846473932266235,grad_norm: 0.9999991489626983, iteration: 118523
loss: 1.0181857347488403,grad_norm: 0.9999991983291014, iteration: 118524
loss: 1.018298864364624,grad_norm: 0.8560485677180594, iteration: 118525
loss: 1.0001444816589355,grad_norm: 0.9999993379163313, iteration: 118526
loss: 0.9722277522087097,grad_norm: 0.9785735550669575, iteration: 118527
loss: 1.0305906534194946,grad_norm: 0.9999994638566365, iteration: 118528
loss: 0.9989246129989624,grad_norm: 0.8608456375357207, iteration: 118529
loss: 1.0124945640563965,grad_norm: 0.8163099485069558, iteration: 118530
loss: 0.9894832372665405,grad_norm: 0.9999992532660127, iteration: 118531
loss: 1.0417654514312744,grad_norm: 0.9999994661685467, iteration: 118532
loss: 1.0208688974380493,grad_norm: 0.9999990984726782, iteration: 118533
loss: 1.0191528797149658,grad_norm: 0.9999991116593566, iteration: 118534
loss: 1.0432648658752441,grad_norm: 0.9999992233486449, iteration: 118535
loss: 1.034432291984558,grad_norm: 0.9999990655208556, iteration: 118536
loss: 1.0155348777770996,grad_norm: 0.9135523689658982, iteration: 118537
loss: 1.0360100269317627,grad_norm: 0.9999998316066451, iteration: 118538
loss: 1.0173227787017822,grad_norm: 0.9999990727764226, iteration: 118539
loss: 1.026804804801941,grad_norm: 0.9999989784392614, iteration: 118540
loss: 1.0192666053771973,grad_norm: 0.8323948823908124, iteration: 118541
loss: 0.9812381863594055,grad_norm: 0.9232549810351028, iteration: 118542
loss: 0.9990910887718201,grad_norm: 0.9999992884648339, iteration: 118543
loss: 1.0097566843032837,grad_norm: 0.9385974312343961, iteration: 118544
loss: 1.0062005519866943,grad_norm: 0.9999991929709019, iteration: 118545
loss: 0.9711867570877075,grad_norm: 0.9967041436666124, iteration: 118546
loss: 1.0100935697555542,grad_norm: 0.9999992407032525, iteration: 118547
loss: 1.0126267671585083,grad_norm: 0.9999998257085084, iteration: 118548
loss: 1.018815517425537,grad_norm: 0.9999992165817905, iteration: 118549
loss: 1.0149450302124023,grad_norm: 0.999998930740524, iteration: 118550
loss: 1.0013607740402222,grad_norm: 0.9999990603755214, iteration: 118551
loss: 0.9830830693244934,grad_norm: 0.9999990548989798, iteration: 118552
loss: 1.0273749828338623,grad_norm: 0.9999991127132418, iteration: 118553
loss: 0.9776611328125,grad_norm: 0.9298914482804628, iteration: 118554
loss: 1.0514583587646484,grad_norm: 0.9999992401587476, iteration: 118555
loss: 1.0269140005111694,grad_norm: 0.9999990380258492, iteration: 118556
loss: 1.0577207803726196,grad_norm: 0.9999992541031567, iteration: 118557
loss: 0.9781855940818787,grad_norm: 0.9999991660230332, iteration: 118558
loss: 0.9890046715736389,grad_norm: 0.9999990334924755, iteration: 118559
loss: 1.0070056915283203,grad_norm: 0.8737712911852766, iteration: 118560
loss: 1.0027174949645996,grad_norm: 0.9441339246069516, iteration: 118561
loss: 0.987772524356842,grad_norm: 0.9999990010802718, iteration: 118562
loss: 1.035831332206726,grad_norm: 0.9999992613663822, iteration: 118563
loss: 1.012222409248352,grad_norm: 0.9999993253378295, iteration: 118564
loss: 1.03043794631958,grad_norm: 0.8392030907425196, iteration: 118565
loss: 0.9861649870872498,grad_norm: 0.98740747510153, iteration: 118566
loss: 1.0096781253814697,grad_norm: 0.8242094823975599, iteration: 118567
loss: 1.0109522342681885,grad_norm: 0.999999168736099, iteration: 118568
loss: 1.0124728679656982,grad_norm: 0.9999990030628485, iteration: 118569
loss: 1.0390138626098633,grad_norm: 0.9763474457726734, iteration: 118570
loss: 0.9761175513267517,grad_norm: 0.9552328022640475, iteration: 118571
loss: 0.9539580941200256,grad_norm: 0.9999991647111357, iteration: 118572
loss: 0.9923834800720215,grad_norm: 0.9999989578704451, iteration: 118573
loss: 0.9922773838043213,grad_norm: 0.9734225516099743, iteration: 118574
loss: 1.0402936935424805,grad_norm: 0.9999993035260102, iteration: 118575
loss: 1.0092917680740356,grad_norm: 0.9999992793669066, iteration: 118576
loss: 0.9743356108665466,grad_norm: 0.9046023295498297, iteration: 118577
loss: 0.9951452016830444,grad_norm: 0.9999992979085354, iteration: 118578
loss: 1.0218461751937866,grad_norm: 0.9655865864172021, iteration: 118579
loss: 1.0047332048416138,grad_norm: 0.8742046374924404, iteration: 118580
loss: 1.0252833366394043,grad_norm: 0.9999992628880597, iteration: 118581
loss: 0.9987009167671204,grad_norm: 0.9664049576973771, iteration: 118582
loss: 1.1087664365768433,grad_norm: 0.9999996086513709, iteration: 118583
loss: 0.9912320971488953,grad_norm: 0.9999992505289828, iteration: 118584
loss: 1.0071640014648438,grad_norm: 0.9999991802395126, iteration: 118585
loss: 1.0056685209274292,grad_norm: 0.9999992635524209, iteration: 118586
loss: 0.995476245880127,grad_norm: 0.9927962158720626, iteration: 118587
loss: 1.0086040496826172,grad_norm: 0.8933305800407512, iteration: 118588
loss: 1.0582841634750366,grad_norm: 0.9999992011345215, iteration: 118589
loss: 1.0385335683822632,grad_norm: 0.9999990714487963, iteration: 118590
loss: 1.013014793395996,grad_norm: 0.9999991828866388, iteration: 118591
loss: 1.0154207944869995,grad_norm: 0.9999991054381016, iteration: 118592
loss: 1.010144591331482,grad_norm: 0.9999993258799257, iteration: 118593
loss: 1.0138169527053833,grad_norm: 0.9999989724822156, iteration: 118594
loss: 1.0138843059539795,grad_norm: 0.9475181775024484, iteration: 118595
loss: 0.9660822749137878,grad_norm: 0.9999992041954614, iteration: 118596
loss: 0.9943004250526428,grad_norm: 0.9104184160677776, iteration: 118597
loss: 0.998748779296875,grad_norm: 0.999999660991811, iteration: 118598
loss: 1.0244828462600708,grad_norm: 0.9999991441273218, iteration: 118599
loss: 1.0416581630706787,grad_norm: 0.9999991049321317, iteration: 118600
loss: 0.9850135445594788,grad_norm: 0.9713490319987956, iteration: 118601
loss: 1.0372024774551392,grad_norm: 0.9999990683691113, iteration: 118602
loss: 0.9708766937255859,grad_norm: 0.9999992256306526, iteration: 118603
loss: 0.9753355979919434,grad_norm: 0.9999992818897293, iteration: 118604
loss: 1.016463041305542,grad_norm: 0.9668217440737819, iteration: 118605
loss: 0.9939648509025574,grad_norm: 0.999999009013873, iteration: 118606
loss: 1.1005533933639526,grad_norm: 0.9999996623489533, iteration: 118607
loss: 1.0165361166000366,grad_norm: 0.9999991013073695, iteration: 118608
loss: 1.0567362308502197,grad_norm: 0.9999993220889649, iteration: 118609
loss: 1.0339854955673218,grad_norm: 0.999999280370529, iteration: 118610
loss: 1.0427342653274536,grad_norm: 0.99999935133971, iteration: 118611
loss: 1.0612002611160278,grad_norm: 0.9999997755608764, iteration: 118612
loss: 1.0518755912780762,grad_norm: 0.9999999416650759, iteration: 118613
loss: 0.9537955522537231,grad_norm: 0.9999990058333527, iteration: 118614
loss: 0.9807330965995789,grad_norm: 0.9489720992769703, iteration: 118615
loss: 1.0664957761764526,grad_norm: 0.9999998754551416, iteration: 118616
loss: 1.0068615674972534,grad_norm: 0.9999990636056794, iteration: 118617
loss: 0.9899547696113586,grad_norm: 0.9999989029597546, iteration: 118618
loss: 1.0247163772583008,grad_norm: 0.9999991477227541, iteration: 118619
loss: 0.967910885810852,grad_norm: 0.9999991862072387, iteration: 118620
loss: 1.1105709075927734,grad_norm: 0.999999265826719, iteration: 118621
loss: 1.0103143453598022,grad_norm: 0.9999991786979529, iteration: 118622
loss: 1.0002973079681396,grad_norm: 0.999999131042016, iteration: 118623
loss: 1.0564525127410889,grad_norm: 0.9999998884262671, iteration: 118624
loss: 1.0018186569213867,grad_norm: 0.9529798338655591, iteration: 118625
loss: 0.9862115979194641,grad_norm: 0.9999991105759635, iteration: 118626
loss: 1.0501728057861328,grad_norm: 0.9999996134809634, iteration: 118627
loss: 1.0125283002853394,grad_norm: 0.9999991948605306, iteration: 118628
loss: 1.0025933980941772,grad_norm: 0.9999993734743915, iteration: 118629
loss: 1.0398765802383423,grad_norm: 0.9999993492693242, iteration: 118630
loss: 0.9954125285148621,grad_norm: 0.8546906380962783, iteration: 118631
loss: 1.0044426918029785,grad_norm: 0.9928773367680878, iteration: 118632
loss: 1.052595615386963,grad_norm: 0.9999993694224709, iteration: 118633
loss: 1.079250693321228,grad_norm: 0.9999990289702891, iteration: 118634
loss: 1.0358818769454956,grad_norm: 0.9879897836402564, iteration: 118635
loss: 1.0013691186904907,grad_norm: 0.9999991530738294, iteration: 118636
loss: 0.9732772707939148,grad_norm: 0.9999992951436678, iteration: 118637
loss: 1.0583668947219849,grad_norm: 0.999999657713589, iteration: 118638
loss: 1.0005862712860107,grad_norm: 0.9999990743243441, iteration: 118639
loss: 1.0366252660751343,grad_norm: 0.9999995220677398, iteration: 118640
loss: 1.0159051418304443,grad_norm: 0.999999237724994, iteration: 118641
loss: 0.9722329378128052,grad_norm: 0.9999992301304312, iteration: 118642
loss: 0.9717172980308533,grad_norm: 0.9041733315040842, iteration: 118643
loss: 1.0569052696228027,grad_norm: 0.999999064214348, iteration: 118644
loss: 1.0103470087051392,grad_norm: 0.9999991223472571, iteration: 118645
loss: 1.0170527696609497,grad_norm: 0.9999995572187911, iteration: 118646
loss: 0.9814407229423523,grad_norm: 0.9999994831996846, iteration: 118647
loss: 1.0000702142715454,grad_norm: 0.9999993642608223, iteration: 118648
loss: 0.9923035502433777,grad_norm: 0.9999992581617929, iteration: 118649
loss: 0.9675261974334717,grad_norm: 0.9999990227327101, iteration: 118650
loss: 1.0025535821914673,grad_norm: 0.9999989229509634, iteration: 118651
loss: 0.9961050152778625,grad_norm: 0.9999991675956309, iteration: 118652
loss: 1.0055919885635376,grad_norm: 0.9999996388221754, iteration: 118653
loss: 0.9966524839401245,grad_norm: 0.9999996641214216, iteration: 118654
loss: 1.0583466291427612,grad_norm: 0.9999999675405425, iteration: 118655
loss: 0.9802916646003723,grad_norm: 0.9999991658398913, iteration: 118656
loss: 1.0070915222167969,grad_norm: 0.8444083235847939, iteration: 118657
loss: 1.06035315990448,grad_norm: 0.9999995322318721, iteration: 118658
loss: 1.016431212425232,grad_norm: 0.9999995769478791, iteration: 118659
loss: 1.0113029479980469,grad_norm: 0.9999992740801272, iteration: 118660
loss: 1.0337412357330322,grad_norm: 0.9999993023960606, iteration: 118661
loss: 1.0057576894760132,grad_norm: 0.9999996129574106, iteration: 118662
loss: 1.0023761987686157,grad_norm: 0.9999991201779562, iteration: 118663
loss: 1.0673575401306152,grad_norm: 0.9999995143967141, iteration: 118664
loss: 0.9859939813613892,grad_norm: 0.9999991648955396, iteration: 118665
loss: 0.9785987138748169,grad_norm: 0.9619807347321716, iteration: 118666
loss: 1.045573353767395,grad_norm: 0.9509805273764115, iteration: 118667
loss: 0.9963416457176208,grad_norm: 0.9999994091614975, iteration: 118668
loss: 0.9724136590957642,grad_norm: 0.9999992333561154, iteration: 118669
loss: 1.0003468990325928,grad_norm: 0.9261053967000353, iteration: 118670
loss: 0.9833931922912598,grad_norm: 0.8592331705656122, iteration: 118671
loss: 1.011183738708496,grad_norm: 0.9999990804169451, iteration: 118672
loss: 0.9713738560676575,grad_norm: 0.9999991059827049, iteration: 118673
loss: 1.0207337141036987,grad_norm: 0.9999991422180632, iteration: 118674
loss: 1.0365697145462036,grad_norm: 0.9999992917193904, iteration: 118675
loss: 1.0197035074234009,grad_norm: 0.9855361525369147, iteration: 118676
loss: 0.9926726818084717,grad_norm: 0.9999996315726076, iteration: 118677
loss: 0.9464358687400818,grad_norm: 0.9312931378998908, iteration: 118678
loss: 0.9746155738830566,grad_norm: 0.9999991987179608, iteration: 118679
loss: 1.0220892429351807,grad_norm: 0.9999992038330471, iteration: 118680
loss: 0.9745556116104126,grad_norm: 0.9999990638130255, iteration: 118681
loss: 1.052518367767334,grad_norm: 0.9999997138551896, iteration: 118682
loss: 0.9904149174690247,grad_norm: 0.9999990911070357, iteration: 118683
loss: 0.9859233498573303,grad_norm: 0.99999957795783, iteration: 118684
loss: 1.0645787715911865,grad_norm: 0.8947004574762283, iteration: 118685
loss: 1.0876909494400024,grad_norm: 0.9999997437352369, iteration: 118686
loss: 1.0145777463912964,grad_norm: 0.9999992068147512, iteration: 118687
loss: 0.9575566053390503,grad_norm: 0.9999990654955971, iteration: 118688
loss: 1.0914695262908936,grad_norm: 0.9999990357303862, iteration: 118689
loss: 1.0060608386993408,grad_norm: 0.9999991805929686, iteration: 118690
loss: 1.1010593175888062,grad_norm: 0.9999996353542584, iteration: 118691
loss: 0.9840694665908813,grad_norm: 0.7798561861265795, iteration: 118692
loss: 1.0286256074905396,grad_norm: 0.9999996081136724, iteration: 118693
loss: 1.0248476266860962,grad_norm: 0.9999991842309452, iteration: 118694
loss: 1.0051690340042114,grad_norm: 0.7971119642229769, iteration: 118695
loss: 0.9593026638031006,grad_norm: 0.9999991801349277, iteration: 118696
loss: 1.0544294118881226,grad_norm: 0.9999996497427716, iteration: 118697
loss: 0.9959607124328613,grad_norm: 0.9712583514848924, iteration: 118698
loss: 1.013844609260559,grad_norm: 0.9329754946476058, iteration: 118699
loss: 1.0488567352294922,grad_norm: 0.9999993727522923, iteration: 118700
loss: 0.9801179766654968,grad_norm: 0.8882826423552865, iteration: 118701
loss: 1.035946249961853,grad_norm: 0.9999996704895492, iteration: 118702
loss: 0.9742445945739746,grad_norm: 0.9202265486151568, iteration: 118703
loss: 0.9956219792366028,grad_norm: 0.8269430296360387, iteration: 118704
loss: 1.0008697509765625,grad_norm: 0.9999993057954388, iteration: 118705
loss: 1.1435648202896118,grad_norm: 0.9999997897818916, iteration: 118706
loss: 1.0028061866760254,grad_norm: 0.9999993829222728, iteration: 118707
loss: 0.9963243007659912,grad_norm: 0.9999991995148442, iteration: 118708
loss: 1.0096384286880493,grad_norm: 0.9971035094225956, iteration: 118709
loss: 1.0061689615249634,grad_norm: 0.9999993698865958, iteration: 118710
loss: 0.9949462413787842,grad_norm: 0.9999993270522434, iteration: 118711
loss: 1.0037963390350342,grad_norm: 0.9293301909548293, iteration: 118712
loss: 1.065081000328064,grad_norm: 0.9999993516136007, iteration: 118713
loss: 1.1012016534805298,grad_norm: 0.9999994670237149, iteration: 118714
loss: 0.9774672389030457,grad_norm: 0.951042267763124, iteration: 118715
loss: 1.0025696754455566,grad_norm: 0.9999993608926024, iteration: 118716
loss: 1.005232334136963,grad_norm: 0.8915653036062399, iteration: 118717
loss: 1.0190500020980835,grad_norm: 0.9999991295088255, iteration: 118718
loss: 1.0043540000915527,grad_norm: 0.9983190520611179, iteration: 118719
loss: 0.9593907594680786,grad_norm: 0.9373885613888648, iteration: 118720
loss: 0.9985672235488892,grad_norm: 0.999999043569999, iteration: 118721
loss: 1.0065819025039673,grad_norm: 0.9999992234997758, iteration: 118722
loss: 0.99314945936203,grad_norm: 0.9690326129168629, iteration: 118723
loss: 0.9966561794281006,grad_norm: 0.8880897601201255, iteration: 118724
loss: 0.977539598941803,grad_norm: 0.9999996002162241, iteration: 118725
loss: 1.028464674949646,grad_norm: 0.9999992077565482, iteration: 118726
loss: 1.0075594186782837,grad_norm: 0.994999687687787, iteration: 118727
loss: 1.0015655755996704,grad_norm: 0.9999992555441699, iteration: 118728
loss: 1.0175520181655884,grad_norm: 0.9999993075759758, iteration: 118729
loss: 1.1299363374710083,grad_norm: 0.9999999536164853, iteration: 118730
loss: 1.090046763420105,grad_norm: 0.999999341793115, iteration: 118731
loss: 1.0515711307525635,grad_norm: 0.9999992273883324, iteration: 118732
loss: 1.0482043027877808,grad_norm: 0.9999992478219266, iteration: 118733
loss: 0.9866380095481873,grad_norm: 0.9626085585498396, iteration: 118734
loss: 1.019975185394287,grad_norm: 0.9786620818081476, iteration: 118735
loss: 0.9801568388938904,grad_norm: 0.9829278641341436, iteration: 118736
loss: 1.025109052658081,grad_norm: 0.9999993262883966, iteration: 118737
loss: 1.0368330478668213,grad_norm: 0.9999997492057195, iteration: 118738
loss: 1.0778453350067139,grad_norm: 0.9999999288369933, iteration: 118739
loss: 1.0054030418395996,grad_norm: 0.9999992666737264, iteration: 118740
loss: 1.0099233388900757,grad_norm: 0.9999992834611666, iteration: 118741
loss: 1.0006016492843628,grad_norm: 0.9999996423491638, iteration: 118742
loss: 0.9999157786369324,grad_norm: 0.999999562421013, iteration: 118743
loss: 1.053594708442688,grad_norm: 0.9999997335710975, iteration: 118744
loss: 1.0983076095581055,grad_norm: 0.9999995482344656, iteration: 118745
loss: 0.9656040072441101,grad_norm: 0.9759444596018054, iteration: 118746
loss: 1.081332802772522,grad_norm: 0.9999997071169264, iteration: 118747
loss: 0.9419690370559692,grad_norm: 0.9999990919871095, iteration: 118748
loss: 1.0126632452011108,grad_norm: 0.9999994543407481, iteration: 118749
loss: 1.02073073387146,grad_norm: 0.9022882158441518, iteration: 118750
loss: 1.131875991821289,grad_norm: 0.9999992794746126, iteration: 118751
loss: 1.0225768089294434,grad_norm: 0.9999991290014658, iteration: 118752
loss: 0.970486581325531,grad_norm: 0.9738304208818191, iteration: 118753
loss: 0.9949514269828796,grad_norm: 0.9999990351430987, iteration: 118754
loss: 1.094730019569397,grad_norm: 0.9999997487561892, iteration: 118755
loss: 1.0598080158233643,grad_norm: 0.9999991063833533, iteration: 118756
loss: 1.0129467248916626,grad_norm: 0.9999991350095461, iteration: 118757
loss: 1.0153378248214722,grad_norm: 0.9999997434246134, iteration: 118758
loss: 0.9878036379814148,grad_norm: 0.9999992087953552, iteration: 118759
loss: 0.9681775569915771,grad_norm: 0.9320284063523255, iteration: 118760
loss: 0.9996814727783203,grad_norm: 0.9999991154035466, iteration: 118761
loss: 0.9659121632575989,grad_norm: 0.8173612399677596, iteration: 118762
loss: 1.0930253267288208,grad_norm: 0.9999991056029442, iteration: 118763
loss: 0.9550296068191528,grad_norm: 0.9999991953158647, iteration: 118764
loss: 1.0530892610549927,grad_norm: 0.9999998240605954, iteration: 118765
loss: 0.9714885354042053,grad_norm: 0.9999991238091174, iteration: 118766
loss: 1.0278264284133911,grad_norm: 0.9999997129053082, iteration: 118767
loss: 1.0723612308502197,grad_norm: 0.9999994836589937, iteration: 118768
loss: 1.0058881044387817,grad_norm: 0.9999992961836706, iteration: 118769
loss: 1.0399366617202759,grad_norm: 0.9999995640003966, iteration: 118770
loss: 0.96370929479599,grad_norm: 0.9999992219117647, iteration: 118771
loss: 1.022339940071106,grad_norm: 0.999999191292471, iteration: 118772
loss: 0.9682357907295227,grad_norm: 0.9999991590275557, iteration: 118773
loss: 1.0788689851760864,grad_norm: 0.9999991882297025, iteration: 118774
loss: 1.025701880455017,grad_norm: 0.8597291623475017, iteration: 118775
loss: 1.0410058498382568,grad_norm: 0.9880599199037332, iteration: 118776
loss: 1.0650027990341187,grad_norm: 0.9999713584283682, iteration: 118777
loss: 0.9493880867958069,grad_norm: 0.9999991079526005, iteration: 118778
loss: 1.0070598125457764,grad_norm: 0.9999993051722882, iteration: 118779
loss: 1.010327935218811,grad_norm: 0.9999991511922416, iteration: 118780
loss: 1.0171769857406616,grad_norm: 0.9999992332494405, iteration: 118781
loss: 1.0901480913162231,grad_norm: 0.9999996912752391, iteration: 118782
loss: 1.0264431238174438,grad_norm: 0.9999990504850432, iteration: 118783
loss: 1.3159540891647339,grad_norm: 0.9999998904301026, iteration: 118784
loss: 0.9683006405830383,grad_norm: 0.9999992384806329, iteration: 118785
loss: 1.0097206830978394,grad_norm: 0.9030448954016864, iteration: 118786
loss: 1.1061973571777344,grad_norm: 0.9999995185132338, iteration: 118787
loss: 0.9757393002510071,grad_norm: 0.999999437416011, iteration: 118788
loss: 1.0538554191589355,grad_norm: 0.9999992362536074, iteration: 118789
loss: 1.1121528148651123,grad_norm: 0.9999993760610262, iteration: 118790
loss: 1.143592357635498,grad_norm: 0.9999993586506142, iteration: 118791
loss: 1.1376575231552124,grad_norm: 0.9999999083663569, iteration: 118792
loss: 1.003905177116394,grad_norm: 0.9999998853394194, iteration: 118793
loss: 1.0212645530700684,grad_norm: 0.9999991316190564, iteration: 118794
loss: 1.0166865587234497,grad_norm: 0.999999794100976, iteration: 118795
loss: 1.0726627111434937,grad_norm: 0.9999991068798074, iteration: 118796
loss: 1.032692313194275,grad_norm: 0.9999990449252987, iteration: 118797
loss: 1.022300362586975,grad_norm: 0.9999991340925771, iteration: 118798
loss: 1.0062698125839233,grad_norm: 0.9999992608425395, iteration: 118799
loss: 0.9981465935707092,grad_norm: 0.8776382898879201, iteration: 118800
loss: 0.9935983419418335,grad_norm: 0.9999992012848509, iteration: 118801
loss: 0.995766818523407,grad_norm: 0.9998959484266227, iteration: 118802
loss: 1.0026686191558838,grad_norm: 0.9999991152426034, iteration: 118803
loss: 1.1163183450698853,grad_norm: 0.9999992759963804, iteration: 118804
loss: 0.9802659153938293,grad_norm: 0.9999996452419883, iteration: 118805
loss: 1.0325350761413574,grad_norm: 0.9999996046917945, iteration: 118806
loss: 1.031555414199829,grad_norm: 0.9999995851189418, iteration: 118807
loss: 0.9910053014755249,grad_norm: 0.946887425425508, iteration: 118808
loss: 1.0272254943847656,grad_norm: 0.9999991695417142, iteration: 118809
loss: 1.0019729137420654,grad_norm: 0.999999050979772, iteration: 118810
loss: 1.02266263961792,grad_norm: 0.9740946337293414, iteration: 118811
loss: 1.0156574249267578,grad_norm: 0.9999991274238886, iteration: 118812
loss: 0.9912281036376953,grad_norm: 0.9614560122123702, iteration: 118813
loss: 1.0217137336730957,grad_norm: 0.9999992691219242, iteration: 118814
loss: 0.9873479008674622,grad_norm: 0.9999992597305636, iteration: 118815
loss: 1.0183013677597046,grad_norm: 0.9701452026888807, iteration: 118816
loss: 1.023443579673767,grad_norm: 0.9999991829967297, iteration: 118817
loss: 1.0181528329849243,grad_norm: 0.9463176951006328, iteration: 118818
loss: 1.0065990686416626,grad_norm: 0.9999990603239323, iteration: 118819
loss: 1.0337088108062744,grad_norm: 0.9999992345754977, iteration: 118820
loss: 0.9743838310241699,grad_norm: 0.9999991836369835, iteration: 118821
loss: 0.9902678728103638,grad_norm: 0.999999017894875, iteration: 118822
loss: 0.9930989146232605,grad_norm: 0.8723717884845601, iteration: 118823
loss: 1.0049623250961304,grad_norm: 0.9999991445596116, iteration: 118824
loss: 0.9802449345588684,grad_norm: 0.9999992177009612, iteration: 118825
loss: 1.0432517528533936,grad_norm: 0.9905823737392706, iteration: 118826
loss: 0.9997681975364685,grad_norm: 0.9999990146663763, iteration: 118827
loss: 0.9626338481903076,grad_norm: 0.9231690823514385, iteration: 118828
loss: 0.9973661303520203,grad_norm: 0.9186769154113178, iteration: 118829
loss: 1.0194497108459473,grad_norm: 0.9999997330779792, iteration: 118830
loss: 1.0377007722854614,grad_norm: 0.9999990832252602, iteration: 118831
loss: 1.0110294818878174,grad_norm: 0.8930476237125264, iteration: 118832
loss: 0.9903292655944824,grad_norm: 0.9999990766418964, iteration: 118833
loss: 1.025546669960022,grad_norm: 0.9999991786524904, iteration: 118834
loss: 0.9819685816764832,grad_norm: 0.9892163802344103, iteration: 118835
loss: 1.013019323348999,grad_norm: 0.9617690688138317, iteration: 118836
loss: 0.9869676828384399,grad_norm: 0.9999992154854781, iteration: 118837
loss: 0.9727429747581482,grad_norm: 0.9999991421502789, iteration: 118838
loss: 1.0220481157302856,grad_norm: 0.9999993243055025, iteration: 118839
loss: 0.9656692743301392,grad_norm: 0.9659162373570577, iteration: 118840
loss: 1.0189796686172485,grad_norm: 0.9303716609505039, iteration: 118841
loss: 1.01699960231781,grad_norm: 0.9956894965175278, iteration: 118842
loss: 0.9465795159339905,grad_norm: 0.9999993093740791, iteration: 118843
loss: 1.0119478702545166,grad_norm: 0.9999989865174774, iteration: 118844
loss: 1.0363160371780396,grad_norm: 0.9999990902179022, iteration: 118845
loss: 1.113312005996704,grad_norm: 0.9999998861701556, iteration: 118846
loss: 0.9756594896316528,grad_norm: 0.9999991403681209, iteration: 118847
loss: 0.9960317611694336,grad_norm: 0.9999990653419228, iteration: 118848
loss: 0.9837696552276611,grad_norm: 0.8582916427585003, iteration: 118849
loss: 1.0240451097488403,grad_norm: 0.9999990752698916, iteration: 118850
loss: 1.0318818092346191,grad_norm: 0.9137403746422651, iteration: 118851
loss: 1.058244228363037,grad_norm: 0.9999992418290181, iteration: 118852
loss: 1.0107884407043457,grad_norm: 0.999999243687387, iteration: 118853
loss: 1.0091216564178467,grad_norm: 0.938259620426888, iteration: 118854
loss: 1.0193036794662476,grad_norm: 0.9999992768556321, iteration: 118855
loss: 0.983127236366272,grad_norm: 0.9999991471623174, iteration: 118856
loss: 0.9828819036483765,grad_norm: 0.9999993103684388, iteration: 118857
loss: 0.9504269957542419,grad_norm: 0.9105556188928194, iteration: 118858
loss: 1.126452088356018,grad_norm: 0.999999903964216, iteration: 118859
loss: 1.032490849494934,grad_norm: 0.9999996057465732, iteration: 118860
loss: 1.0755354166030884,grad_norm: 0.9999998134895909, iteration: 118861
loss: 0.9962272644042969,grad_norm: 0.9999990232614333, iteration: 118862
loss: 1.010369896888733,grad_norm: 0.9999991418978443, iteration: 118863
loss: 1.0416557788848877,grad_norm: 0.9999990944006782, iteration: 118864
loss: 0.9856729507446289,grad_norm: 0.8806969005907049, iteration: 118865
loss: 0.9632359743118286,grad_norm: 0.93463837861011, iteration: 118866
loss: 0.987051248550415,grad_norm: 0.9999991357268239, iteration: 118867
loss: 0.995237410068512,grad_norm: 0.9350549221409121, iteration: 118868
loss: 1.0296763181686401,grad_norm: 0.9999991008070931, iteration: 118869
loss: 0.9888663291931152,grad_norm: 0.9581272421525433, iteration: 118870
loss: 0.9480146169662476,grad_norm: 0.9999990609558164, iteration: 118871
loss: 1.04192316532135,grad_norm: 0.9999993205250114, iteration: 118872
loss: 0.9980518817901611,grad_norm: 0.9999991031055308, iteration: 118873
loss: 1.0277082920074463,grad_norm: 0.8337776549501831, iteration: 118874
loss: 1.011403203010559,grad_norm: 0.9938234973652995, iteration: 118875
loss: 1.003259301185608,grad_norm: 0.9999993436137437, iteration: 118876
loss: 1.0272682905197144,grad_norm: 0.9682736039420436, iteration: 118877
loss: 0.9847443103790283,grad_norm: 0.9370558904268815, iteration: 118878
loss: 0.9782596826553345,grad_norm: 0.9999991003032126, iteration: 118879
loss: 0.9795606136322021,grad_norm: 0.9999996181037837, iteration: 118880
loss: 1.0040278434753418,grad_norm: 0.9866308239960545, iteration: 118881
loss: 1.0438311100006104,grad_norm: 0.9999998171912455, iteration: 118882
loss: 0.9700809717178345,grad_norm: 0.950547514967829, iteration: 118883
loss: 0.9899213910102844,grad_norm: 0.8622262152513258, iteration: 118884
loss: 0.9695467352867126,grad_norm: 0.9999991911156477, iteration: 118885
loss: 0.9818331003189087,grad_norm: 0.9999992310008323, iteration: 118886
loss: 1.0251092910766602,grad_norm: 0.9999990691252041, iteration: 118887
loss: 1.000542163848877,grad_norm: 0.9999991239189263, iteration: 118888
loss: 0.9767118096351624,grad_norm: 0.9999992305647727, iteration: 118889
loss: 1.0025503635406494,grad_norm: 0.999999142886496, iteration: 118890
loss: 1.0184345245361328,grad_norm: 0.9999991111023543, iteration: 118891
loss: 1.0010520219802856,grad_norm: 0.9402797997317118, iteration: 118892
loss: 0.9783359169960022,grad_norm: 0.9999991020590832, iteration: 118893
loss: 0.9866047501564026,grad_norm: 0.9828027113086417, iteration: 118894
loss: 0.9901394844055176,grad_norm: 0.9177460659864017, iteration: 118895
loss: 0.973354160785675,grad_norm: 0.9999993577698316, iteration: 118896
loss: 1.0183056592941284,grad_norm: 0.9012570457290143, iteration: 118897
loss: 0.9674375653266907,grad_norm: 0.9999991199845559, iteration: 118898
loss: 1.0040284395217896,grad_norm: 0.99999913525932, iteration: 118899
loss: 1.0203614234924316,grad_norm: 0.7959139915222635, iteration: 118900
loss: 1.0430575609207153,grad_norm: 0.9999992301441472, iteration: 118901
loss: 0.9914795756340027,grad_norm: 0.8888010767742696, iteration: 118902
loss: 1.0051155090332031,grad_norm: 0.999999064100523, iteration: 118903
loss: 0.9903934597969055,grad_norm: 0.9999989266146548, iteration: 118904
loss: 0.991450309753418,grad_norm: 0.9999992487106169, iteration: 118905
loss: 1.0046941041946411,grad_norm: 0.9529908806088578, iteration: 118906
loss: 0.9868009686470032,grad_norm: 0.9999991002261829, iteration: 118907
loss: 1.0882010459899902,grad_norm: 0.9999992555498168, iteration: 118908
loss: 1.0131994485855103,grad_norm: 0.9999993498797435, iteration: 118909
loss: 0.9757984280586243,grad_norm: 0.9655056514608781, iteration: 118910
loss: 0.9768332242965698,grad_norm: 0.9999990731824645, iteration: 118911
loss: 1.0011392831802368,grad_norm: 0.9999995079169272, iteration: 118912
loss: 0.9941273927688599,grad_norm: 0.9999989741354581, iteration: 118913
loss: 0.9328116774559021,grad_norm: 0.9375142189845063, iteration: 118914
loss: 0.9857351779937744,grad_norm: 0.9017935907839749, iteration: 118915
loss: 1.0080063343048096,grad_norm: 0.9841215898815142, iteration: 118916
loss: 0.9869106411933899,grad_norm: 0.9999993357792145, iteration: 118917
loss: 0.9795594215393066,grad_norm: 0.9533813787051334, iteration: 118918
loss: 0.9969571232795715,grad_norm: 0.8514590689233208, iteration: 118919
loss: 1.0139938592910767,grad_norm: 0.8850839950160831, iteration: 118920
loss: 0.9880859851837158,grad_norm: 0.902468540260879, iteration: 118921
loss: 0.9862610101699829,grad_norm: 0.9999990016561286, iteration: 118922
loss: 0.9961344599723816,grad_norm: 0.999999035516718, iteration: 118923
loss: 0.9908617734909058,grad_norm: 0.9999990680906995, iteration: 118924
loss: 0.9778563380241394,grad_norm: 0.999999075447375, iteration: 118925
loss: 0.9929236769676208,grad_norm: 0.9999991097073668, iteration: 118926
loss: 0.9557832479476929,grad_norm: 0.999999150868194, iteration: 118927
loss: 0.9785086512565613,grad_norm: 0.9087812536534194, iteration: 118928
loss: 1.0309844017028809,grad_norm: 0.999999146960187, iteration: 118929
loss: 0.9539486765861511,grad_norm: 0.8932053796899251, iteration: 118930
loss: 1.045841932296753,grad_norm: 0.9999991703484563, iteration: 118931
loss: 0.9845614433288574,grad_norm: 0.9999992327475056, iteration: 118932
loss: 1.0263409614562988,grad_norm: 0.9999996630495656, iteration: 118933
loss: 0.9954900145530701,grad_norm: 0.9999990054242779, iteration: 118934
loss: 0.9448596835136414,grad_norm: 0.9999992898621348, iteration: 118935
loss: 1.0107020139694214,grad_norm: 0.9999990751526969, iteration: 118936
loss: 0.9972866773605347,grad_norm: 0.9641212327217716, iteration: 118937
loss: 1.0568256378173828,grad_norm: 0.9999989774757974, iteration: 118938
loss: 1.0037811994552612,grad_norm: 0.9999991162990415, iteration: 118939
loss: 0.9685767889022827,grad_norm: 0.9787418476364121, iteration: 118940
loss: 1.0106371641159058,grad_norm: 0.9999990835833064, iteration: 118941
loss: 0.9674509763717651,grad_norm: 0.9999990623816973, iteration: 118942
loss: 0.9852561354637146,grad_norm: 0.9330222124139127, iteration: 118943
loss: 1.0443528890609741,grad_norm: 0.9999995260071395, iteration: 118944
loss: 1.0246847867965698,grad_norm: 0.9434406294514942, iteration: 118945
loss: 1.0004243850708008,grad_norm: 0.9999990276609793, iteration: 118946
loss: 0.9692930579185486,grad_norm: 0.9999991717475482, iteration: 118947
loss: 1.0142806768417358,grad_norm: 0.8548353751685219, iteration: 118948
loss: 1.0083767175674438,grad_norm: 0.9999991513060671, iteration: 118949
loss: 1.1344850063323975,grad_norm: 1.000000016376911, iteration: 118950
loss: 1.0421615839004517,grad_norm: 0.9999992958913712, iteration: 118951
loss: 1.0134673118591309,grad_norm: 0.9999989955222106, iteration: 118952
loss: 0.9854918718338013,grad_norm: 0.9999990220807158, iteration: 118953
loss: 1.0687528848648071,grad_norm: 0.9999994589306631, iteration: 118954
loss: 1.0119739770889282,grad_norm: 0.9238921465195769, iteration: 118955
loss: 1.0068743228912354,grad_norm: 0.9999991788952349, iteration: 118956
loss: 0.9927605986595154,grad_norm: 0.9999990204232921, iteration: 118957
loss: 1.0021624565124512,grad_norm: 0.9027892195609386, iteration: 118958
loss: 1.022193431854248,grad_norm: 0.999999153967938, iteration: 118959
loss: 0.972323477268219,grad_norm: 0.9776481081872984, iteration: 118960
loss: 0.9906619787216187,grad_norm: 0.9967760839884873, iteration: 118961
loss: 1.0073387622833252,grad_norm: 0.9452877971182944, iteration: 118962
loss: 1.0078582763671875,grad_norm: 0.9999992869162423, iteration: 118963
loss: 0.9968404173851013,grad_norm: 0.9040127357881743, iteration: 118964
loss: 1.0076225996017456,grad_norm: 0.9579710145673276, iteration: 118965
loss: 1.0241285562515259,grad_norm: 0.9999996030854116, iteration: 118966
loss: 1.003570795059204,grad_norm: 1.0000000004035345, iteration: 118967
loss: 1.016044020652771,grad_norm: 0.9999990840382881, iteration: 118968
loss: 0.9961114525794983,grad_norm: 0.9999991886363783, iteration: 118969
loss: 0.9949572086334229,grad_norm: 0.913125293373083, iteration: 118970
loss: 0.9676929116249084,grad_norm: 0.9999991598518013, iteration: 118971
loss: 1.018417477607727,grad_norm: 0.9999991945541764, iteration: 118972
loss: 1.013865351676941,grad_norm: 0.9999990968841227, iteration: 118973
loss: 0.9611572623252869,grad_norm: 0.9999991353863, iteration: 118974
loss: 1.0357331037521362,grad_norm: 0.9999993035589291, iteration: 118975
loss: 1.002035140991211,grad_norm: 0.8757453198427888, iteration: 118976
loss: 1.0053311586380005,grad_norm: 0.9819757045102362, iteration: 118977
loss: 1.0029784440994263,grad_norm: 0.9539642121581308, iteration: 118978
loss: 1.0447304248809814,grad_norm: 0.9999993927095162, iteration: 118979
loss: 0.9414504766464233,grad_norm: 0.9999991037250019, iteration: 118980
loss: 1.0787956714630127,grad_norm: 0.9999997060911414, iteration: 118981
loss: 0.967105507850647,grad_norm: 0.9999989406662578, iteration: 118982
loss: 1.045813798904419,grad_norm: 0.9999991012975467, iteration: 118983
loss: 0.9806776642799377,grad_norm: 0.9959591607752668, iteration: 118984
loss: 1.000174880027771,grad_norm: 0.9299660957066499, iteration: 118985
loss: 0.9999480843544006,grad_norm: 0.9999991841105997, iteration: 118986
loss: 1.0024561882019043,grad_norm: 0.9999989809213423, iteration: 118987
loss: 0.9795348048210144,grad_norm: 0.9999992374321119, iteration: 118988
loss: 0.9858230352401733,grad_norm: 0.9999992333700007, iteration: 118989
loss: 1.0060973167419434,grad_norm: 0.999999155981846, iteration: 118990
loss: 0.9950941205024719,grad_norm: 0.9239724437587739, iteration: 118991
loss: 0.9760352969169617,grad_norm: 0.9999990318416928, iteration: 118992
loss: 1.0413874387741089,grad_norm: 0.9999991210541851, iteration: 118993
loss: 0.9846835732460022,grad_norm: 0.9937648560053503, iteration: 118994
loss: 0.9824107885360718,grad_norm: 0.8997536777527911, iteration: 118995
loss: 1.128097414970398,grad_norm: 0.9999991401014798, iteration: 118996
loss: 0.9911977052688599,grad_norm: 0.9939355238696407, iteration: 118997
loss: 0.9849076271057129,grad_norm: 0.9197012756260895, iteration: 118998
loss: 0.9813985824584961,grad_norm: 0.9999990320512924, iteration: 118999
loss: 1.01982843875885,grad_norm: 0.919670512768809, iteration: 119000
loss: 1.003080129623413,grad_norm: 0.972137501665312, iteration: 119001
loss: 1.0273643732070923,grad_norm: 0.9741233605526636, iteration: 119002
loss: 1.0429461002349854,grad_norm: 0.999999170421343, iteration: 119003
loss: 0.998310387134552,grad_norm: 0.9999992205418287, iteration: 119004
loss: 1.0326753854751587,grad_norm: 0.9999992245234396, iteration: 119005
loss: 1.0072649717330933,grad_norm: 0.9999991467350217, iteration: 119006
loss: 0.9659776091575623,grad_norm: 0.7982311107671985, iteration: 119007
loss: 0.991929292678833,grad_norm: 0.9999991798958237, iteration: 119008
loss: 1.0677493810653687,grad_norm: 0.9999993344108301, iteration: 119009
loss: 1.008225440979004,grad_norm: 0.9897445993859751, iteration: 119010
loss: 0.9908105731010437,grad_norm: 0.8565731767408828, iteration: 119011
loss: 0.9922550320625305,grad_norm: 0.9999998732327394, iteration: 119012
loss: 1.0212409496307373,grad_norm: 0.9999991185573452, iteration: 119013
loss: 0.9913896322250366,grad_norm: 0.9892120367624929, iteration: 119014
loss: 1.0002914667129517,grad_norm: 0.8657423446962439, iteration: 119015
loss: 1.0048983097076416,grad_norm: 0.9180868738253751, iteration: 119016
loss: 0.9567866921424866,grad_norm: 0.9831474374448729, iteration: 119017
loss: 0.9904892444610596,grad_norm: 0.9999991593912936, iteration: 119018
loss: 1.0093671083450317,grad_norm: 0.9667529532357872, iteration: 119019
loss: 0.9886345863342285,grad_norm: 0.9492480480180927, iteration: 119020
loss: 1.059021234512329,grad_norm: 0.9999994540041597, iteration: 119021
loss: 1.090162992477417,grad_norm: 0.999999092981248, iteration: 119022
loss: 1.0308680534362793,grad_norm: 0.9756368755958865, iteration: 119023
loss: 1.0052778720855713,grad_norm: 0.9999993516293213, iteration: 119024
loss: 1.1267263889312744,grad_norm: 0.9999996651869396, iteration: 119025
loss: 1.0064767599105835,grad_norm: 0.98263787522106, iteration: 119026
loss: 1.0057990550994873,grad_norm: 0.999999467909317, iteration: 119027
loss: 1.0163304805755615,grad_norm: 0.9999991772588643, iteration: 119028
loss: 0.9984838366508484,grad_norm: 0.9999992138489997, iteration: 119029
loss: 1.0146774053573608,grad_norm: 0.9999991082778906, iteration: 119030
loss: 1.0608960390090942,grad_norm: 0.9999992942728261, iteration: 119031
loss: 1.0103906393051147,grad_norm: 0.9999999001778066, iteration: 119032
loss: 0.9738050103187561,grad_norm: 0.9999991256379467, iteration: 119033
loss: 1.200498104095459,grad_norm: 0.9999995606107468, iteration: 119034
loss: 1.0669748783111572,grad_norm: 0.9999992001853836, iteration: 119035
loss: 1.0037457942962646,grad_norm: 0.999999125562277, iteration: 119036
loss: 0.9928651452064514,grad_norm: 0.9999992534666593, iteration: 119037
loss: 1.2805736064910889,grad_norm: 0.9999998747369822, iteration: 119038
loss: 0.9802289605140686,grad_norm: 0.9807477287329576, iteration: 119039
loss: 0.986686110496521,grad_norm: 0.9727467658628032, iteration: 119040
loss: 0.9835849404335022,grad_norm: 0.9999989854817692, iteration: 119041
loss: 0.9706044793128967,grad_norm: 0.9999991470839477, iteration: 119042
loss: 0.9987601041793823,grad_norm: 0.9595447253900405, iteration: 119043
loss: 0.9801955819129944,grad_norm: 0.9999992022005031, iteration: 119044
loss: 1.0170953273773193,grad_norm: 0.9999992008534383, iteration: 119045
loss: 1.0095399618148804,grad_norm: 0.8397590677470641, iteration: 119046
loss: 0.981738269329071,grad_norm: 0.8596837497222906, iteration: 119047
loss: 1.0088014602661133,grad_norm: 0.9999995944469482, iteration: 119048
loss: 1.0239431858062744,grad_norm: 0.886179065493423, iteration: 119049
loss: 1.0385146141052246,grad_norm: 0.9999991509787008, iteration: 119050
loss: 1.0440133810043335,grad_norm: 0.9999991665934757, iteration: 119051
loss: 1.0315901041030884,grad_norm: 0.9999992241929138, iteration: 119052
loss: 1.0238804817199707,grad_norm: 0.9999992438866385, iteration: 119053
loss: 1.0414899587631226,grad_norm: 0.9999991072901664, iteration: 119054
loss: 1.0070101022720337,grad_norm: 0.9999992153533601, iteration: 119055
loss: 0.9696409106254578,grad_norm: 0.9999992766418883, iteration: 119056
loss: 1.0058600902557373,grad_norm: 0.9999990039418938, iteration: 119057
loss: 0.9862644076347351,grad_norm: 0.9404797179977377, iteration: 119058
loss: 0.9805187582969666,grad_norm: 0.9178308742493104, iteration: 119059
loss: 1.0007386207580566,grad_norm: 0.999999006741309, iteration: 119060
loss: 0.9999735355377197,grad_norm: 0.9055116672942071, iteration: 119061
loss: 0.9866455793380737,grad_norm: 0.9999991526223527, iteration: 119062
loss: 1.0024911165237427,grad_norm: 0.9999991523392944, iteration: 119063
loss: 0.981904923915863,grad_norm: 0.9777967367406804, iteration: 119064
loss: 1.0749584436416626,grad_norm: 0.9999992406375883, iteration: 119065
loss: 1.008968710899353,grad_norm: 0.9999990809489002, iteration: 119066
loss: 1.013360857963562,grad_norm: 0.9999990625038728, iteration: 119067
loss: 0.9948891401290894,grad_norm: 0.9999990574037222, iteration: 119068
loss: 1.0849109888076782,grad_norm: 0.9999996108011785, iteration: 119069
loss: 1.0436967611312866,grad_norm: 0.9999991538504569, iteration: 119070
loss: 1.1341625452041626,grad_norm: 0.9999995844327184, iteration: 119071
loss: 0.9869048595428467,grad_norm: 0.9999990550824646, iteration: 119072
loss: 0.9913848638534546,grad_norm: 0.9075012084345242, iteration: 119073
loss: 0.9988468885421753,grad_norm: 0.8730265655654768, iteration: 119074
loss: 1.1793078184127808,grad_norm: 0.9999999477949392, iteration: 119075
loss: 1.090177059173584,grad_norm: 0.9999990876988227, iteration: 119076
loss: 0.9880478382110596,grad_norm: 0.9729028199644821, iteration: 119077
loss: 0.9978659749031067,grad_norm: 0.9626804358981648, iteration: 119078
loss: 1.1667587757110596,grad_norm: 0.999999114667302, iteration: 119079
loss: 1.0159780979156494,grad_norm: 0.9999994044556687, iteration: 119080
loss: 0.9723384976387024,grad_norm: 0.8682676111589769, iteration: 119081
loss: 1.0339561700820923,grad_norm: 0.999999111948838, iteration: 119082
loss: 1.0000067949295044,grad_norm: 0.8544955108942606, iteration: 119083
loss: 1.033416509628296,grad_norm: 0.9999989013390777, iteration: 119084
loss: 0.9941916465759277,grad_norm: 0.9999990227032874, iteration: 119085
loss: 0.9857931733131409,grad_norm: 0.9999991713884444, iteration: 119086
loss: 0.9929507970809937,grad_norm: 0.9107731659519293, iteration: 119087
loss: 0.9838171005249023,grad_norm: 0.9040516529496765, iteration: 119088
loss: 1.0270793437957764,grad_norm: 0.9999992098955793, iteration: 119089
loss: 0.999691367149353,grad_norm: 0.99999922646894, iteration: 119090
loss: 1.0384777784347534,grad_norm: 0.9999993237966481, iteration: 119091
loss: 1.0267635583877563,grad_norm: 0.9913239967846464, iteration: 119092
loss: 1.3384865522384644,grad_norm: 0.9999999227596882, iteration: 119093
loss: 1.0084171295166016,grad_norm: 0.9999991604695637, iteration: 119094
loss: 1.0107426643371582,grad_norm: 0.937595214683586, iteration: 119095
loss: 0.9906811714172363,grad_norm: 0.9999991783680658, iteration: 119096
loss: 1.018805742263794,grad_norm: 0.9999991212922209, iteration: 119097
loss: 1.0057411193847656,grad_norm: 0.9999990773329587, iteration: 119098
loss: 0.9980534911155701,grad_norm: 0.9899757436254213, iteration: 119099
loss: 1.0179446935653687,grad_norm: 0.9999991621971233, iteration: 119100
loss: 1.0023882389068604,grad_norm: 0.9999991921535133, iteration: 119101
loss: 1.0193595886230469,grad_norm: 0.9999990874562847, iteration: 119102
loss: 0.9933643341064453,grad_norm: 0.9999990700936964, iteration: 119103
loss: 1.0092819929122925,grad_norm: 0.9474507605278001, iteration: 119104
loss: 1.0011706352233887,grad_norm: 0.9611713303189425, iteration: 119105
loss: 1.019690990447998,grad_norm: 0.9999990312283997, iteration: 119106
loss: 0.9981738328933716,grad_norm: 0.9999990766823976, iteration: 119107
loss: 1.008878231048584,grad_norm: 0.9999991640211989, iteration: 119108
loss: 1.0241948366165161,grad_norm: 0.900498373100431, iteration: 119109
loss: 0.9960584044456482,grad_norm: 0.9999990804128457, iteration: 119110
loss: 0.9800218939781189,grad_norm: 0.9999990515527533, iteration: 119111
loss: 1.0107077360153198,grad_norm: 0.9450398518857814, iteration: 119112
loss: 1.0277342796325684,grad_norm: 0.9999995603302163, iteration: 119113
loss: 0.9995630383491516,grad_norm: 0.9999989676554862, iteration: 119114
loss: 0.9782916903495789,grad_norm: 0.898590473648642, iteration: 119115
loss: 1.0009238719940186,grad_norm: 0.9999990307017822, iteration: 119116
loss: 0.9920124411582947,grad_norm: 0.9586320876153998, iteration: 119117
loss: 1.0116794109344482,grad_norm: 0.9999991649564474, iteration: 119118
loss: 1.0138113498687744,grad_norm: 0.999999182650915, iteration: 119119
loss: 1.0072846412658691,grad_norm: 0.9999992093706037, iteration: 119120
loss: 1.0095617771148682,grad_norm: 0.9999993701762714, iteration: 119121
loss: 0.9615185856819153,grad_norm: 0.9233689270546012, iteration: 119122
loss: 0.9622716307640076,grad_norm: 0.9235686789103676, iteration: 119123
loss: 1.0059874057769775,grad_norm: 0.9999989957582988, iteration: 119124
loss: 0.9843385219573975,grad_norm: 0.999999229143738, iteration: 119125
loss: 0.9991486668586731,grad_norm: 0.9999991696009014, iteration: 119126
loss: 1.03938627243042,grad_norm: 0.9999991890175782, iteration: 119127
loss: 0.9780218601226807,grad_norm: 0.9999991657351696, iteration: 119128
loss: 0.9775647521018982,grad_norm: 0.9999992035313983, iteration: 119129
loss: 1.0326530933380127,grad_norm: 0.9754744279514528, iteration: 119130
loss: 0.9623265862464905,grad_norm: 0.999998955418279, iteration: 119131
loss: 0.9586264491081238,grad_norm: 0.9999990619365928, iteration: 119132
loss: 1.0798176527023315,grad_norm: 0.9999998040893772, iteration: 119133
loss: 0.9742997884750366,grad_norm: 0.9999992951195237, iteration: 119134
loss: 0.9960503578186035,grad_norm: 0.9999990158784643, iteration: 119135
loss: 0.9733184576034546,grad_norm: 0.920221032333245, iteration: 119136
loss: 0.9903908371925354,grad_norm: 0.8982990506628511, iteration: 119137
loss: 0.9939689040184021,grad_norm: 0.9999990996231235, iteration: 119138
loss: 1.017286777496338,grad_norm: 0.9315960245746692, iteration: 119139
loss: 1.0078548192977905,grad_norm: 0.9999990059145875, iteration: 119140
loss: 1.0249007940292358,grad_norm: 0.9999990653135729, iteration: 119141
loss: 1.023271083831787,grad_norm: 0.9999990282184448, iteration: 119142
loss: 0.9919371008872986,grad_norm: 0.999999066547757, iteration: 119143
loss: 0.9990413188934326,grad_norm: 0.9999990766173821, iteration: 119144
loss: 1.014297604560852,grad_norm: 0.999999043548801, iteration: 119145
loss: 0.9951342344284058,grad_norm: 0.9999991777759157, iteration: 119146
loss: 0.9881709218025208,grad_norm: 0.9356237542193563, iteration: 119147
loss: 1.0226998329162598,grad_norm: 0.9999991611911281, iteration: 119148
loss: 1.0234442949295044,grad_norm: 0.9999991975358293, iteration: 119149
loss: 1.001396894454956,grad_norm: 0.9999992349679169, iteration: 119150
loss: 0.9695136547088623,grad_norm: 0.9999992813862676, iteration: 119151
loss: 1.0863507986068726,grad_norm: 0.9999997037510868, iteration: 119152
loss: 1.0146706104278564,grad_norm: 0.9999991467047088, iteration: 119153
loss: 0.9930493831634521,grad_norm: 0.9336878137765234, iteration: 119154
loss: 1.0287123918533325,grad_norm: 0.9999990432424666, iteration: 119155
loss: 1.008590817451477,grad_norm: 0.9999991457977798, iteration: 119156
loss: 1.0271662473678589,grad_norm: 0.9999993925284127, iteration: 119157
loss: 1.075239658355713,grad_norm: 0.9999998785273185, iteration: 119158
loss: 1.010794758796692,grad_norm: 0.9788262644478889, iteration: 119159
loss: 1.0241608619689941,grad_norm: 0.9999990362825388, iteration: 119160
loss: 1.017360806465149,grad_norm: 0.9999992983126246, iteration: 119161
loss: 0.9881860017776489,grad_norm: 0.9999990726052121, iteration: 119162
loss: 1.005785584449768,grad_norm: 0.999999002696482, iteration: 119163
loss: 1.0193710327148438,grad_norm: 0.9999991306988353, iteration: 119164
loss: 1.0054285526275635,grad_norm: 0.9074861658119421, iteration: 119165
loss: 1.007095456123352,grad_norm: 0.9999993068941528, iteration: 119166
loss: 0.986051082611084,grad_norm: 0.9999991116383187, iteration: 119167
loss: 1.003636121749878,grad_norm: 0.999999217787842, iteration: 119168
loss: 0.9510623216629028,grad_norm: 0.909952920606565, iteration: 119169
loss: 0.9753259420394897,grad_norm: 0.9999992204645508, iteration: 119170
loss: 0.9722577929496765,grad_norm: 0.8565807774583649, iteration: 119171
loss: 0.9926142692565918,grad_norm: 0.9999990713130642, iteration: 119172
loss: 1.016601324081421,grad_norm: 0.9999992313754266, iteration: 119173
loss: 1.0085859298706055,grad_norm: 0.9999991795954755, iteration: 119174
loss: 1.0014623403549194,grad_norm: 0.9999990078262205, iteration: 119175
loss: 1.085900902748108,grad_norm: 0.9999998333389858, iteration: 119176
loss: 1.0463002920150757,grad_norm: 0.9999990316717933, iteration: 119177
loss: 1.0055159330368042,grad_norm: 0.9816606252427386, iteration: 119178
loss: 1.0182740688323975,grad_norm: 0.9407439346945841, iteration: 119179
loss: 0.997628390789032,grad_norm: 0.9999990182810358, iteration: 119180
loss: 0.9731796979904175,grad_norm: 0.9999990679539822, iteration: 119181
loss: 0.961641788482666,grad_norm: 0.9999991432498628, iteration: 119182
loss: 1.000834584236145,grad_norm: 0.9999991622708997, iteration: 119183
loss: 1.0141321420669556,grad_norm: 0.9203390981589803, iteration: 119184
loss: 0.9949540495872498,grad_norm: 0.9991280711936207, iteration: 119185
loss: 0.9645215272903442,grad_norm: 0.9999991281233267, iteration: 119186
loss: 1.003065824508667,grad_norm: 0.9999990975154264, iteration: 119187
loss: 0.9992693662643433,grad_norm: 0.9529184999619703, iteration: 119188
loss: 0.9843389391899109,grad_norm: 0.9999991272006976, iteration: 119189
loss: 1.0385650396347046,grad_norm: 0.9999992520136455, iteration: 119190
loss: 0.9910776615142822,grad_norm: 0.9999989226274438, iteration: 119191
loss: 0.9784337282180786,grad_norm: 0.9999998856639775, iteration: 119192
loss: 1.0423885583877563,grad_norm: 0.9999991180079889, iteration: 119193
loss: 1.0449835062026978,grad_norm: 0.9999989613035648, iteration: 119194
loss: 0.991993248462677,grad_norm: 0.9999992487260734, iteration: 119195
loss: 0.9684611558914185,grad_norm: 0.9999990301269815, iteration: 119196
loss: 1.0245929956436157,grad_norm: 0.8550804410250219, iteration: 119197
loss: 1.0243799686431885,grad_norm: 0.9191665948133998, iteration: 119198
loss: 1.1835153102874756,grad_norm: 0.9999998949239943, iteration: 119199
loss: 0.9951598048210144,grad_norm: 0.999999172326103, iteration: 119200
loss: 1.0170212984085083,grad_norm: 0.999999022565977, iteration: 119201
loss: 0.9809777736663818,grad_norm: 0.9999991466285367, iteration: 119202
loss: 1.0305893421173096,grad_norm: 0.999999183528628, iteration: 119203
loss: 0.9945738911628723,grad_norm: 0.9999991591314157, iteration: 119204
loss: 1.0190770626068115,grad_norm: 0.9999992189801371, iteration: 119205
loss: 0.9416781067848206,grad_norm: 0.9999991232123959, iteration: 119206
loss: 0.994400143623352,grad_norm: 0.9999992674156952, iteration: 119207
loss: 1.0132653713226318,grad_norm: 0.9999991836168817, iteration: 119208
loss: 1.0156548023223877,grad_norm: 0.9473579979903323, iteration: 119209
loss: 1.1369199752807617,grad_norm: 0.9999990892406725, iteration: 119210
loss: 0.9658755660057068,grad_norm: 0.9284327892357896, iteration: 119211
loss: 0.9671820998191833,grad_norm: 0.9999990949074047, iteration: 119212
loss: 0.996757447719574,grad_norm: 0.9999989429171537, iteration: 119213
loss: 1.0632363557815552,grad_norm: 0.999999176809701, iteration: 119214
loss: 0.9623044729232788,grad_norm: 0.9999991279266263, iteration: 119215
loss: 0.994569718837738,grad_norm: 0.9999991491146666, iteration: 119216
loss: 0.9808333516120911,grad_norm: 0.9999990525620696, iteration: 119217
loss: 1.0025750398635864,grad_norm: 0.9999991094075362, iteration: 119218
loss: 0.9731234908103943,grad_norm: 0.9999992550766079, iteration: 119219
loss: 1.0044244527816772,grad_norm: 0.9999989895666745, iteration: 119220
loss: 1.0038684606552124,grad_norm: 0.9999992143975729, iteration: 119221
loss: 0.9990912079811096,grad_norm: 0.9999993977928756, iteration: 119222
loss: 1.0225346088409424,grad_norm: 0.9999998419054075, iteration: 119223
loss: 1.014484167098999,grad_norm: 0.9993342664861621, iteration: 119224
loss: 1.0145280361175537,grad_norm: 0.8360399168515403, iteration: 119225
loss: 1.0400066375732422,grad_norm: 0.9999990714420499, iteration: 119226
loss: 1.0235638618469238,grad_norm: 0.9999990367531243, iteration: 119227
loss: 1.0464810132980347,grad_norm: 0.9191534472754742, iteration: 119228
loss: 1.0015974044799805,grad_norm: 0.9999991630751731, iteration: 119229
loss: 1.0456501245498657,grad_norm: 0.9999991549386753, iteration: 119230
loss: 1.0077999830245972,grad_norm: 0.9999992282356455, iteration: 119231
loss: 1.0208741426467896,grad_norm: 0.9999991805582545, iteration: 119232
loss: 1.0064959526062012,grad_norm: 0.9999991063421334, iteration: 119233
loss: 0.9982040524482727,grad_norm: 0.9999996769672007, iteration: 119234
loss: 1.0883772373199463,grad_norm: 0.9999990850320922, iteration: 119235
loss: 0.9931743144989014,grad_norm: 0.9999990731298896, iteration: 119236
loss: 0.9812764525413513,grad_norm: 0.9203842914354262, iteration: 119237
loss: 1.0043481588363647,grad_norm: 0.9920161098729623, iteration: 119238
loss: 1.0158904790878296,grad_norm: 0.9489821077204528, iteration: 119239
loss: 0.980688750743866,grad_norm: 0.9999992778937733, iteration: 119240
loss: 1.019325613975525,grad_norm: 0.9999990179569284, iteration: 119241
loss: 0.9570754170417786,grad_norm: 0.9999992859013066, iteration: 119242
loss: 1.0009397268295288,grad_norm: 0.9999991042216844, iteration: 119243
loss: 1.0108668804168701,grad_norm: 0.9975431096617114, iteration: 119244
loss: 0.9887660145759583,grad_norm: 0.9227391504135527, iteration: 119245
loss: 0.9909331798553467,grad_norm: 0.9655647920581055, iteration: 119246
loss: 1.0292335748672485,grad_norm: 0.9999990208592476, iteration: 119247
loss: 1.0151214599609375,grad_norm: 0.9999991600283761, iteration: 119248
loss: 1.005112886428833,grad_norm: 0.9340186834900707, iteration: 119249
loss: 0.9806623458862305,grad_norm: 0.8589708991816559, iteration: 119250
loss: 0.9578951597213745,grad_norm: 0.8818326445933778, iteration: 119251
loss: 1.1110105514526367,grad_norm: 0.9999993241336441, iteration: 119252
loss: 0.9904020428657532,grad_norm: 0.9999992094091947, iteration: 119253
loss: 1.014807939529419,grad_norm: 0.9999995052452109, iteration: 119254
loss: 0.9994140267372131,grad_norm: 0.9999992156436546, iteration: 119255
loss: 0.9980130195617676,grad_norm: 0.9999992754553401, iteration: 119256
loss: 1.0192371606826782,grad_norm: 0.9999991124135491, iteration: 119257
loss: 0.9719936847686768,grad_norm: 0.9999991061754875, iteration: 119258
loss: 1.0100127458572388,grad_norm: 0.8127375040206204, iteration: 119259
loss: 0.9678711891174316,grad_norm: 0.9824374672020204, iteration: 119260
loss: 0.9995926022529602,grad_norm: 0.9999993747900576, iteration: 119261
loss: 0.9898611903190613,grad_norm: 0.8661423689199127, iteration: 119262
loss: 0.9921362996101379,grad_norm: 0.9999992145839791, iteration: 119263
loss: 0.986055314540863,grad_norm: 0.9999989639607696, iteration: 119264
loss: 0.9918564558029175,grad_norm: 0.9640953842516692, iteration: 119265
loss: 1.0106415748596191,grad_norm: 0.9814642994100724, iteration: 119266
loss: 0.9797658324241638,grad_norm: 0.9976956288031611, iteration: 119267
loss: 1.040310263633728,grad_norm: 0.9999994218822907, iteration: 119268
loss: 1.0171030759811401,grad_norm: 0.9552935149296491, iteration: 119269
loss: 1.0311527252197266,grad_norm: 0.8640148650967815, iteration: 119270
loss: 0.9974302649497986,grad_norm: 0.9999990264842635, iteration: 119271
loss: 1.0146647691726685,grad_norm: 0.9999992673112312, iteration: 119272
loss: 0.9930631518363953,grad_norm: 0.9999990675944926, iteration: 119273
loss: 0.9761444926261902,grad_norm: 0.9999991469073675, iteration: 119274
loss: 1.0000007152557373,grad_norm: 0.9939854083995311, iteration: 119275
loss: 0.994109570980072,grad_norm: 0.9039293479358447, iteration: 119276
loss: 0.9408584237098694,grad_norm: 0.9999991214330778, iteration: 119277
loss: 1.0053759813308716,grad_norm: 0.912235128923274, iteration: 119278
loss: 1.008584976196289,grad_norm: 0.9999991432927422, iteration: 119279
loss: 0.9683341383934021,grad_norm: 0.8947646646479771, iteration: 119280
loss: 1.0092501640319824,grad_norm: 0.9999991285259169, iteration: 119281
loss: 1.007210612297058,grad_norm: 0.8744975014607631, iteration: 119282
loss: 0.9916118383407593,grad_norm: 0.9999991239640023, iteration: 119283
loss: 1.0354526042938232,grad_norm: 0.9163370166607313, iteration: 119284
loss: 1.0082895755767822,grad_norm: 0.958353624263069, iteration: 119285
loss: 0.9857826828956604,grad_norm: 0.9999990761364396, iteration: 119286
loss: 1.021610975265503,grad_norm: 0.9999991575783219, iteration: 119287
loss: 1.0239049196243286,grad_norm: 0.9999991330755279, iteration: 119288
loss: 1.0126326084136963,grad_norm: 0.9999991975753747, iteration: 119289
loss: 0.9791197180747986,grad_norm: 0.9057546083832312, iteration: 119290
loss: 1.0055409669876099,grad_norm: 0.999999253419592, iteration: 119291
loss: 0.997683584690094,grad_norm: 0.9999991227195325, iteration: 119292
loss: 0.9514625072479248,grad_norm: 0.9999992237987799, iteration: 119293
loss: 0.9751642942428589,grad_norm: 0.9815213746081822, iteration: 119294
loss: 0.9860824346542358,grad_norm: 0.9833646877388255, iteration: 119295
loss: 1.000490665435791,grad_norm: 0.9999990552542852, iteration: 119296
loss: 0.9880362749099731,grad_norm: 0.8373489813449807, iteration: 119297
loss: 0.9686080813407898,grad_norm: 0.9999990621176228, iteration: 119298
loss: 1.025922417640686,grad_norm: 0.9999993404067025, iteration: 119299
loss: 1.026917576789856,grad_norm: 0.9999990591692249, iteration: 119300
loss: 0.9915827512741089,grad_norm: 0.9999990925466837, iteration: 119301
loss: 1.016243815422058,grad_norm: 0.9795106688756794, iteration: 119302
loss: 0.9929159283638,grad_norm: 0.9999990868644717, iteration: 119303
loss: 0.9911910891532898,grad_norm: 0.9999991248408653, iteration: 119304
loss: 1.0063698291778564,grad_norm: 0.9248654754826031, iteration: 119305
loss: 1.011391282081604,grad_norm: 0.9999989112240555, iteration: 119306
loss: 0.9706616997718811,grad_norm: 0.8983963155686764, iteration: 119307
loss: 1.0697429180145264,grad_norm: 0.9999996205852898, iteration: 119308
loss: 0.9650923609733582,grad_norm: 0.9999991959644077, iteration: 119309
loss: 1.0010714530944824,grad_norm: 0.9485357243771647, iteration: 119310
loss: 0.9645228981971741,grad_norm: 0.9999990081470482, iteration: 119311
loss: 1.048259973526001,grad_norm: 0.9999993357475746, iteration: 119312
loss: 1.0223288536071777,grad_norm: 0.8240783052632229, iteration: 119313
loss: 1.0166685581207275,grad_norm: 0.9999992643042626, iteration: 119314
loss: 1.0099308490753174,grad_norm: 0.999999069651767, iteration: 119315
loss: 1.170503854751587,grad_norm: 0.999999476464994, iteration: 119316
loss: 1.0085105895996094,grad_norm: 0.9999992025431206, iteration: 119317
loss: 0.9867357015609741,grad_norm: 0.8357449927102396, iteration: 119318
loss: 1.0123069286346436,grad_norm: 0.9999991758989598, iteration: 119319
loss: 1.0020391941070557,grad_norm: 0.9999990040687255, iteration: 119320
loss: 0.9874204993247986,grad_norm: 0.9255571702688772, iteration: 119321
loss: 1.0260365009307861,grad_norm: 0.9999990013482316, iteration: 119322
loss: 1.0254441499710083,grad_norm: 0.9479462478868009, iteration: 119323
loss: 1.0878477096557617,grad_norm: 0.9999993539409078, iteration: 119324
loss: 1.0019216537475586,grad_norm: 0.9694634633429099, iteration: 119325
loss: 1.0351313352584839,grad_norm: 0.8994403247981247, iteration: 119326
loss: 1.0099873542785645,grad_norm: 0.9497345000130407, iteration: 119327
loss: 1.018900990486145,grad_norm: 0.9999991904813427, iteration: 119328
loss: 1.02734375,grad_norm: 0.9438221312451339, iteration: 119329
loss: 0.989384114742279,grad_norm: 0.9999991494076652, iteration: 119330
loss: 1.0019818544387817,grad_norm: 0.9999992499873047, iteration: 119331
loss: 0.996083676815033,grad_norm: 0.9999989290975534, iteration: 119332
loss: 1.0430299043655396,grad_norm: 0.9999994239232325, iteration: 119333
loss: 0.9752766489982605,grad_norm: 0.9999990886550303, iteration: 119334
loss: 1.0235636234283447,grad_norm: 0.8199005325959314, iteration: 119335
loss: 0.960928201675415,grad_norm: 0.9999991097778108, iteration: 119336
loss: 1.0037201642990112,grad_norm: 0.9998882955657505, iteration: 119337
loss: 1.0225030183792114,grad_norm: 0.9940402341924327, iteration: 119338
loss: 1.0276310443878174,grad_norm: 0.9999990743617679, iteration: 119339
loss: 0.9562374353408813,grad_norm: 0.999999126094849, iteration: 119340
loss: 1.006773591041565,grad_norm: 0.9759115913049782, iteration: 119341
loss: 1.0284132957458496,grad_norm: 0.9999991523136441, iteration: 119342
loss: 1.007991909980774,grad_norm: 0.9840543949578201, iteration: 119343
loss: 0.994540810585022,grad_norm: 0.9999991869181554, iteration: 119344
loss: 1.0312551259994507,grad_norm: 0.9999990702009941, iteration: 119345
loss: 0.9579070806503296,grad_norm: 0.9763261629550788, iteration: 119346
loss: 1.031611680984497,grad_norm: 0.9943363447562124, iteration: 119347
loss: 0.9682848453521729,grad_norm: 0.9999989228135752, iteration: 119348
loss: 1.0185761451721191,grad_norm: 0.9999993845432578, iteration: 119349
loss: 0.9937859177589417,grad_norm: 0.9999992033581362, iteration: 119350
loss: 0.9951492547988892,grad_norm: 0.9999997987974283, iteration: 119351
loss: 0.9879878163337708,grad_norm: 0.9999992293315193, iteration: 119352
loss: 1.004319190979004,grad_norm: 0.9203433239225932, iteration: 119353
loss: 1.0159358978271484,grad_norm: 0.9999992266224117, iteration: 119354
loss: 1.0246293544769287,grad_norm: 0.9999990900265187, iteration: 119355
loss: 1.0190911293029785,grad_norm: 0.9999990463114083, iteration: 119356
loss: 0.963134229183197,grad_norm: 0.965285653710879, iteration: 119357
loss: 1.0230334997177124,grad_norm: 0.9286537017671999, iteration: 119358
loss: 0.9933748245239258,grad_norm: 0.9999990268994972, iteration: 119359
loss: 1.0250747203826904,grad_norm: 0.8533414333290044, iteration: 119360
loss: 1.0215777158737183,grad_norm: 0.9761246644709471, iteration: 119361
loss: 1.0127485990524292,grad_norm: 0.9999992307807567, iteration: 119362
loss: 1.041185736656189,grad_norm: 0.9900689532656783, iteration: 119363
loss: 0.9829273819923401,grad_norm: 0.9999990935537266, iteration: 119364
loss: 1.0287177562713623,grad_norm: 0.999999059974854, iteration: 119365
loss: 0.995341956615448,grad_norm: 0.99999908304012, iteration: 119366
loss: 0.972498893737793,grad_norm: 0.9531802457852403, iteration: 119367
loss: 1.023993730545044,grad_norm: 0.999999101285163, iteration: 119368
loss: 0.9909375905990601,grad_norm: 0.9999992786188726, iteration: 119369
loss: 1.0295790433883667,grad_norm: 0.896937639465948, iteration: 119370
loss: 1.0029172897338867,grad_norm: 0.9535770115227608, iteration: 119371
loss: 1.0281654596328735,grad_norm: 0.9465392856115906, iteration: 119372
loss: 0.9902680516242981,grad_norm: 0.9999991894489206, iteration: 119373
loss: 1.0047168731689453,grad_norm: 0.8639702308668881, iteration: 119374
loss: 0.9507350325584412,grad_norm: 0.9999991355357597, iteration: 119375
loss: 0.9953163862228394,grad_norm: 0.974585834600669, iteration: 119376
loss: 0.9818492531776428,grad_norm: 0.9527207627904279, iteration: 119377
loss: 0.958594024181366,grad_norm: 0.9538546455819505, iteration: 119378
loss: 0.9807829260826111,grad_norm: 0.9218120570270693, iteration: 119379
loss: 1.025242567062378,grad_norm: 0.9720786568214655, iteration: 119380
loss: 1.0005276203155518,grad_norm: 0.9524502808372586, iteration: 119381
loss: 0.9930893182754517,grad_norm: 0.9999991762879855, iteration: 119382
loss: 1.0161234140396118,grad_norm: 0.9442215500889197, iteration: 119383
loss: 0.9999324083328247,grad_norm: 0.9798298638689117, iteration: 119384
loss: 0.9880881309509277,grad_norm: 0.8810384321128691, iteration: 119385
loss: 0.9911150336265564,grad_norm: 0.9999991607222104, iteration: 119386
loss: 0.9829553961753845,grad_norm: 0.8910703346764454, iteration: 119387
loss: 1.0581086874008179,grad_norm: 0.9999993179534471, iteration: 119388
loss: 1.073676347732544,grad_norm: 0.9999996528119521, iteration: 119389
loss: 1.0054237842559814,grad_norm: 0.9999990535634701, iteration: 119390
loss: 1.0395634174346924,grad_norm: 0.9999993910291506, iteration: 119391
loss: 0.9538676142692566,grad_norm: 0.9999992082504199, iteration: 119392
loss: 1.023019552230835,grad_norm: 0.999999162367333, iteration: 119393
loss: 1.0259650945663452,grad_norm: 0.9999991860563321, iteration: 119394
loss: 1.032991647720337,grad_norm: 0.8694746252882365, iteration: 119395
loss: 0.9877980947494507,grad_norm: 0.9553754722308405, iteration: 119396
loss: 1.0264995098114014,grad_norm: 0.9999991459319433, iteration: 119397
loss: 0.9744349122047424,grad_norm: 0.9353278954891695, iteration: 119398
loss: 1.0128791332244873,grad_norm: 0.8786786109216247, iteration: 119399
loss: 1.0101611614227295,grad_norm: 0.9999989841808338, iteration: 119400
loss: 0.9979245066642761,grad_norm: 0.999999257796287, iteration: 119401
loss: 0.9931826591491699,grad_norm: 0.9272168100616874, iteration: 119402
loss: 0.9626931548118591,grad_norm: 0.9696506733219891, iteration: 119403
loss: 1.011201024055481,grad_norm: 0.9999989872972341, iteration: 119404
loss: 1.0090651512145996,grad_norm: 0.9999993934584795, iteration: 119405
loss: 0.9970101714134216,grad_norm: 0.999999171331797, iteration: 119406
loss: 1.0012353658676147,grad_norm: 0.9999991313984047, iteration: 119407
loss: 1.000764012336731,grad_norm: 0.9999992830825024, iteration: 119408
loss: 0.977408230304718,grad_norm: 0.9999992409118362, iteration: 119409
loss: 1.0362979173660278,grad_norm: 0.9478836129342388, iteration: 119410
loss: 1.0137498378753662,grad_norm: 0.9999992959090575, iteration: 119411
loss: 0.9830726981163025,grad_norm: 0.9822427755905636, iteration: 119412
loss: 0.983111560344696,grad_norm: 0.9568232684396708, iteration: 119413
loss: 0.9813021421432495,grad_norm: 0.9999988995547495, iteration: 119414
loss: 0.9711971879005432,grad_norm: 0.9524049181221018, iteration: 119415
loss: 0.9972400665283203,grad_norm: 0.9999990575459623, iteration: 119416
loss: 1.0268925428390503,grad_norm: 0.9999992250300216, iteration: 119417
loss: 0.988182783126831,grad_norm: 0.9999991822919277, iteration: 119418
loss: 0.9769994020462036,grad_norm: 0.9999989738887607, iteration: 119419
loss: 1.0131913423538208,grad_norm: 0.9254098955341744, iteration: 119420
loss: 1.004402756690979,grad_norm: 0.9999991386494176, iteration: 119421
loss: 0.9640207290649414,grad_norm: 0.9665458461121846, iteration: 119422
loss: 0.9994269609451294,grad_norm: 0.8741388695783512, iteration: 119423
loss: 0.9931579828262329,grad_norm: 0.9910398383821659, iteration: 119424
loss: 0.9835088849067688,grad_norm: 0.9541582230495484, iteration: 119425
loss: 1.0615510940551758,grad_norm: 0.9999990659394213, iteration: 119426
loss: 0.9951835870742798,grad_norm: 0.9999990477018103, iteration: 119427
loss: 1.0349279642105103,grad_norm: 0.9999991252131839, iteration: 119428
loss: 1.0232406854629517,grad_norm: 0.9999996747908839, iteration: 119429
loss: 1.0330634117126465,grad_norm: 0.9072341125227095, iteration: 119430
loss: 0.9772520065307617,grad_norm: 0.9999989954421589, iteration: 119431
loss: 1.013231873512268,grad_norm: 0.999999117730814, iteration: 119432
loss: 0.9965621829032898,grad_norm: 0.9999991769526638, iteration: 119433
loss: 0.9794217944145203,grad_norm: 0.8916170871449216, iteration: 119434
loss: 0.987602949142456,grad_norm: 0.9439505942649772, iteration: 119435
loss: 1.0064955949783325,grad_norm: 0.9999991971041704, iteration: 119436
loss: 0.9953073263168335,grad_norm: 0.9999989794869634, iteration: 119437
loss: 1.007025122642517,grad_norm: 0.999999053815971, iteration: 119438
loss: 0.9768276214599609,grad_norm: 0.9999991833390234, iteration: 119439
loss: 0.9847740530967712,grad_norm: 0.9999991260013614, iteration: 119440
loss: 0.9382140040397644,grad_norm: 0.8901124026433758, iteration: 119441
loss: 1.0152636766433716,grad_norm: 0.9999991331790552, iteration: 119442
loss: 0.9870318174362183,grad_norm: 0.9999991496127251, iteration: 119443
loss: 1.0489870309829712,grad_norm: 0.9908358643100688, iteration: 119444
loss: 0.9810669422149658,grad_norm: 0.9999992309085416, iteration: 119445
loss: 1.0158761739730835,grad_norm: 0.9999992460289816, iteration: 119446
loss: 1.0258774757385254,grad_norm: 0.9186556246601553, iteration: 119447
loss: 1.018702745437622,grad_norm: 0.9999992186294325, iteration: 119448
loss: 1.005146861076355,grad_norm: 0.9999992664697398, iteration: 119449
loss: 0.9861060380935669,grad_norm: 0.9999991651528407, iteration: 119450
loss: 0.9844646453857422,grad_norm: 0.9999991536173315, iteration: 119451
loss: 0.9961608648300171,grad_norm: 0.99999905911382, iteration: 119452
loss: 0.989130437374115,grad_norm: 0.9999990750350242, iteration: 119453
loss: 0.9918299317359924,grad_norm: 0.9725461296466007, iteration: 119454
loss: 0.9979638457298279,grad_norm: 0.9999992531288263, iteration: 119455
loss: 1.0192339420318604,grad_norm: 0.9999992229942876, iteration: 119456
loss: 1.0325833559036255,grad_norm: 0.9214614660148741, iteration: 119457
loss: 1.0004289150238037,grad_norm: 0.8964915165311647, iteration: 119458
loss: 0.978851318359375,grad_norm: 0.8847802490646306, iteration: 119459
loss: 1.008745551109314,grad_norm: 0.9999990481478179, iteration: 119460
loss: 1.0071537494659424,grad_norm: 0.999999191699951, iteration: 119461
loss: 1.014323115348816,grad_norm: 0.8258751380927406, iteration: 119462
loss: 1.0088584423065186,grad_norm: 0.951961411464882, iteration: 119463
loss: 1.0569719076156616,grad_norm: 0.9999993125929186, iteration: 119464
loss: 1.0014430284500122,grad_norm: 0.9999991799063084, iteration: 119465
loss: 0.9610341191291809,grad_norm: 0.9256281020285679, iteration: 119466
loss: 1.0193555355072021,grad_norm: 0.99613810084023, iteration: 119467
loss: 1.0133228302001953,grad_norm: 0.9999990019925851, iteration: 119468
loss: 0.9961630702018738,grad_norm: 0.9999991279123028, iteration: 119469
loss: 0.9901515245437622,grad_norm: 0.9999991244093269, iteration: 119470
loss: 0.9802498817443848,grad_norm: 0.9999991452440383, iteration: 119471
loss: 1.0007297992706299,grad_norm: 0.9999988991580234, iteration: 119472
loss: 0.9917094707489014,grad_norm: 0.9999990918321823, iteration: 119473
loss: 1.0903981924057007,grad_norm: 0.9999996206308958, iteration: 119474
loss: 0.9906337857246399,grad_norm: 0.9999990418090071, iteration: 119475
loss: 0.9880030751228333,grad_norm: 0.8196118501421217, iteration: 119476
loss: 0.9882057309150696,grad_norm: 0.999999183478864, iteration: 119477
loss: 0.9815493822097778,grad_norm: 0.999999089763638, iteration: 119478
loss: 0.9853107929229736,grad_norm: 0.9661247908355768, iteration: 119479
loss: 1.009893536567688,grad_norm: 0.9999990007754137, iteration: 119480
loss: 0.9989855885505676,grad_norm: 0.9999991283114119, iteration: 119481
loss: 0.993053674697876,grad_norm: 0.9999991427106304, iteration: 119482
loss: 0.9966869354248047,grad_norm: 0.9770173373526645, iteration: 119483
loss: 1.0891982316970825,grad_norm: 0.9999994753588591, iteration: 119484
loss: 0.9980379939079285,grad_norm: 0.9999990063908102, iteration: 119485
loss: 0.9863888025283813,grad_norm: 0.9952359899115956, iteration: 119486
loss: 1.0238932371139526,grad_norm: 0.9999995222064304, iteration: 119487
loss: 0.983696699142456,grad_norm: 0.9999991257051267, iteration: 119488
loss: 0.976449728012085,grad_norm: 0.9629788131812936, iteration: 119489
loss: 1.0012154579162598,grad_norm: 0.9999992638307064, iteration: 119490
loss: 0.9723874926567078,grad_norm: 0.9499282358427965, iteration: 119491
loss: 1.0204799175262451,grad_norm: 0.9999991228594483, iteration: 119492
loss: 0.9889957904815674,grad_norm: 0.9999991374399969, iteration: 119493
loss: 0.9539270401000977,grad_norm: 0.9999990657065885, iteration: 119494
loss: 0.9721633195877075,grad_norm: 0.9999991688158444, iteration: 119495
loss: 1.0085440874099731,grad_norm: 0.9999990867678475, iteration: 119496
loss: 1.0987927913665771,grad_norm: 0.9999995331993891, iteration: 119497
loss: 0.9793972969055176,grad_norm: 0.9999990336971887, iteration: 119498
loss: 1.0026907920837402,grad_norm: 0.9999991574241301, iteration: 119499
loss: 0.9819162487983704,grad_norm: 0.9999990177994312, iteration: 119500
loss: 1.0303609371185303,grad_norm: 0.9999992119624117, iteration: 119501
loss: 1.0147067308425903,grad_norm: 0.9999990619181284, iteration: 119502
loss: 1.035736322402954,grad_norm: 0.9999990585111851, iteration: 119503
loss: 1.012365460395813,grad_norm: 0.99999907016823, iteration: 119504
loss: 1.0126478672027588,grad_norm: 0.9999990506694719, iteration: 119505
loss: 1.0064250230789185,grad_norm: 0.9722790989447134, iteration: 119506
loss: 1.0334267616271973,grad_norm: 0.9068919284096949, iteration: 119507
loss: 1.0039011240005493,grad_norm: 0.9999989507006687, iteration: 119508
loss: 0.9646613001823425,grad_norm: 0.9419481887532354, iteration: 119509
loss: 0.991737425327301,grad_norm: 0.9999992278491412, iteration: 119510
loss: 1.0004719495773315,grad_norm: 0.9726364275736545, iteration: 119511
loss: 1.0169932842254639,grad_norm: 0.9249931473539442, iteration: 119512
loss: 1.0030168294906616,grad_norm: 0.9036973358875692, iteration: 119513
loss: 1.0240800380706787,grad_norm: 0.9999990587483519, iteration: 119514
loss: 0.9785163402557373,grad_norm: 0.9999989852111109, iteration: 119515
loss: 1.022985816001892,grad_norm: 0.9999991128725109, iteration: 119516
loss: 1.0056195259094238,grad_norm: 0.9271410969038142, iteration: 119517
loss: 1.0179262161254883,grad_norm: 0.9999991183692949, iteration: 119518
loss: 1.0165798664093018,grad_norm: 0.9850977740942982, iteration: 119519
loss: 1.0188437700271606,grad_norm: 0.9999991161185502, iteration: 119520
loss: 1.011157751083374,grad_norm: 0.9999990347266939, iteration: 119521
loss: 1.0305076837539673,grad_norm: 0.9999991831467115, iteration: 119522
loss: 0.9831680655479431,grad_norm: 0.8980469169639237, iteration: 119523
loss: 0.9871017336845398,grad_norm: 0.9999990703125103, iteration: 119524
loss: 0.9801662564277649,grad_norm: 0.9999991825006137, iteration: 119525
loss: 1.0548633337020874,grad_norm: 0.9999992828328323, iteration: 119526
loss: 0.9854505658149719,grad_norm: 0.9999990261599229, iteration: 119527
loss: 1.047841191291809,grad_norm: 0.9999995762912958, iteration: 119528
loss: 1.0410832166671753,grad_norm: 0.8569308041384389, iteration: 119529
loss: 1.0287582874298096,grad_norm: 0.8947203328316183, iteration: 119530
loss: 1.0110805034637451,grad_norm: 0.9999991806089371, iteration: 119531
loss: 1.0102355480194092,grad_norm: 0.999999156621726, iteration: 119532
loss: 0.986370861530304,grad_norm: 0.9999991014450745, iteration: 119533
loss: 0.9989523887634277,grad_norm: 0.9999991440491408, iteration: 119534
loss: 1.0042411088943481,grad_norm: 0.9999991468728865, iteration: 119535
loss: 0.9840782284736633,grad_norm: 0.9662804363017846, iteration: 119536
loss: 1.0039517879486084,grad_norm: 0.9999996746847839, iteration: 119537
loss: 1.0032798051834106,grad_norm: 0.950074403445825, iteration: 119538
loss: 1.0139118432998657,grad_norm: 0.9999992565202966, iteration: 119539
loss: 1.0164369344711304,grad_norm: 0.9999993559820435, iteration: 119540
loss: 1.011565923690796,grad_norm: 0.829861088306999, iteration: 119541
loss: 1.0048401355743408,grad_norm: 0.9999990301295905, iteration: 119542
loss: 1.013290524482727,grad_norm: 0.999999240286333, iteration: 119543
loss: 1.0293173789978027,grad_norm: 0.9999995980913545, iteration: 119544
loss: 1.0940401554107666,grad_norm: 0.9999990376801294, iteration: 119545
loss: 1.0255125761032104,grad_norm: 0.9999992051539031, iteration: 119546
loss: 0.9961955547332764,grad_norm: 0.9999991119745448, iteration: 119547
loss: 0.9604789018630981,grad_norm: 0.9999990403475096, iteration: 119548
loss: 1.019883155822754,grad_norm: 0.9999990885926513, iteration: 119549
loss: 1.0035758018493652,grad_norm: 0.9999991840347042, iteration: 119550
loss: 0.956629753112793,grad_norm: 0.9928422344312487, iteration: 119551
loss: 1.011976957321167,grad_norm: 0.9999991690673842, iteration: 119552
loss: 0.9720907211303711,grad_norm: 0.9999992898105351, iteration: 119553
loss: 1.0203911066055298,grad_norm: 0.9999992377880894, iteration: 119554
loss: 0.970964252948761,grad_norm: 0.9999993423505222, iteration: 119555
loss: 1.003373146057129,grad_norm: 0.9999990175418103, iteration: 119556
loss: 1.0083723068237305,grad_norm: 0.9999992879530281, iteration: 119557
loss: 0.9976871609687805,grad_norm: 0.9999990840155936, iteration: 119558
loss: 0.9580625295639038,grad_norm: 0.9747651464032194, iteration: 119559
loss: 0.9818941950798035,grad_norm: 0.9805618600717319, iteration: 119560
loss: 1.0294580459594727,grad_norm: 0.9999994781387368, iteration: 119561
loss: 1.000630497932434,grad_norm: 0.9999992702076935, iteration: 119562
loss: 0.9659560322761536,grad_norm: 0.9999992456555152, iteration: 119563
loss: 0.983083963394165,grad_norm: 0.9167747253145812, iteration: 119564
loss: 1.0032451152801514,grad_norm: 0.9999991568057108, iteration: 119565
loss: 1.0265198945999146,grad_norm: 0.9431914282800421, iteration: 119566
loss: 0.9773221611976624,grad_norm: 0.9999990695653757, iteration: 119567
loss: 1.0217697620391846,grad_norm: 0.9999991206739601, iteration: 119568
loss: 1.0156471729278564,grad_norm: 0.9999990836506846, iteration: 119569
loss: 0.9927261471748352,grad_norm: 0.960306593697231, iteration: 119570
loss: 1.012046217918396,grad_norm: 0.9999992397667969, iteration: 119571
loss: 1.035836100578308,grad_norm: 0.8648739748189932, iteration: 119572
loss: 0.9777343273162842,grad_norm: 0.9999990652184062, iteration: 119573
loss: 1.0152854919433594,grad_norm: 0.9999989275719995, iteration: 119574
loss: 1.063259243965149,grad_norm: 0.9999999018862133, iteration: 119575
loss: 1.0432982444763184,grad_norm: 0.9999991013469229, iteration: 119576
loss: 0.9509185552597046,grad_norm: 0.9031546927540938, iteration: 119577
loss: 1.0111030340194702,grad_norm: 0.9770375548822499, iteration: 119578
loss: 0.9962755441665649,grad_norm: 0.999999186577231, iteration: 119579
loss: 0.9972458481788635,grad_norm: 0.9999992298729685, iteration: 119580
loss: 0.9980143904685974,grad_norm: 0.8997054327607573, iteration: 119581
loss: 1.0003951787948608,grad_norm: 0.9999990817900725, iteration: 119582
loss: 0.9855848550796509,grad_norm: 0.9999992070701479, iteration: 119583
loss: 0.9346904754638672,grad_norm: 0.999999116685727, iteration: 119584
loss: 1.0156095027923584,grad_norm: 0.9672780910519879, iteration: 119585
loss: 1.011335849761963,grad_norm: 0.9999999153642651, iteration: 119586
loss: 1.0326354503631592,grad_norm: 0.9662417807688596, iteration: 119587
loss: 1.0404738187789917,grad_norm: 0.999999307744128, iteration: 119588
loss: 0.993577241897583,grad_norm: 0.9790164042726553, iteration: 119589
loss: 1.0281084775924683,grad_norm: 0.9999993607670382, iteration: 119590
loss: 1.0188523530960083,grad_norm: 0.9999989621180263, iteration: 119591
loss: 0.9547069668769836,grad_norm: 0.9978886519607627, iteration: 119592
loss: 1.0191676616668701,grad_norm: 0.8608000885598446, iteration: 119593
loss: 1.0215529203414917,grad_norm: 0.9156835548076084, iteration: 119594
loss: 0.9751681089401245,grad_norm: 0.8919191146405268, iteration: 119595
loss: 1.0580546855926514,grad_norm: 0.9999996522602835, iteration: 119596
loss: 1.0554029941558838,grad_norm: 0.9999991745646502, iteration: 119597
loss: 0.9843989014625549,grad_norm: 0.8802879642468202, iteration: 119598
loss: 1.019111156463623,grad_norm: 0.9999992313462915, iteration: 119599
loss: 1.0006073713302612,grad_norm: 0.9754387465254236, iteration: 119600
loss: 0.988481342792511,grad_norm: 0.999998992587798, iteration: 119601
loss: 1.0305266380310059,grad_norm: 0.9999990977418388, iteration: 119602
loss: 0.9812250137329102,grad_norm: 0.9999991946020179, iteration: 119603
loss: 1.008620023727417,grad_norm: 0.9675089934756786, iteration: 119604
loss: 1.074148178100586,grad_norm: 0.9999998758083205, iteration: 119605
loss: 1.0187877416610718,grad_norm: 0.9999990179020886, iteration: 119606
loss: 0.9847608804702759,grad_norm: 0.986829127417381, iteration: 119607
loss: 0.9785223007202148,grad_norm: 0.9999991629587547, iteration: 119608
loss: 1.02263343334198,grad_norm: 0.9999991725351804, iteration: 119609
loss: 0.9694843888282776,grad_norm: 0.9999991030883499, iteration: 119610
loss: 0.9705950021743774,grad_norm: 0.9167281270746703, iteration: 119611
loss: 1.033109188079834,grad_norm: 0.9999990219984443, iteration: 119612
loss: 1.0126869678497314,grad_norm: 0.9999992327663195, iteration: 119613
loss: 1.0295382738113403,grad_norm: 1.0000000112614844, iteration: 119614
loss: 1.0131113529205322,grad_norm: 0.9999990291768327, iteration: 119615
loss: 0.9981765151023865,grad_norm: 0.9999994500720281, iteration: 119616
loss: 0.9898862838745117,grad_norm: 0.9677020084040109, iteration: 119617
loss: 0.9968193173408508,grad_norm: 0.8950888217298579, iteration: 119618
loss: 1.0324293375015259,grad_norm: 0.999999185847615, iteration: 119619
loss: 1.0183871984481812,grad_norm: 0.9999990049003216, iteration: 119620
loss: 1.0223135948181152,grad_norm: 0.9931803422367098, iteration: 119621
loss: 1.0918794870376587,grad_norm: 0.999999922532486, iteration: 119622
loss: 0.9916090965270996,grad_norm: 0.999999197003561, iteration: 119623
loss: 1.0269252061843872,grad_norm: 0.9999992656282176, iteration: 119624
loss: 0.9592938423156738,grad_norm: 0.9999990670067578, iteration: 119625
loss: 0.9839306473731995,grad_norm: 0.9999991019695962, iteration: 119626
loss: 1.006596565246582,grad_norm: 0.9222991638977349, iteration: 119627
loss: 1.0116561651229858,grad_norm: 0.9551496548582088, iteration: 119628
loss: 0.9972469806671143,grad_norm: 0.8045678198241922, iteration: 119629
loss: 1.0867867469787598,grad_norm: 0.9999993035581572, iteration: 119630
loss: 0.9957906007766724,grad_norm: 0.999999037334386, iteration: 119631
loss: 0.9957929849624634,grad_norm: 0.9999990065427007, iteration: 119632
loss: 1.015274167060852,grad_norm: 0.999999155606512, iteration: 119633
loss: 0.9815677404403687,grad_norm: 0.9888613306643182, iteration: 119634
loss: 0.9798260927200317,grad_norm: 0.9898768697203562, iteration: 119635
loss: 0.9676564335823059,grad_norm: 0.9999991033845242, iteration: 119636
loss: 0.9941442608833313,grad_norm: 0.8579199414750273, iteration: 119637
loss: 0.9897426962852478,grad_norm: 0.9999990944924477, iteration: 119638
loss: 1.0043443441390991,grad_norm: 0.999999106702031, iteration: 119639
loss: 0.9944316148757935,grad_norm: 0.9999990800103905, iteration: 119640
loss: 1.0074595212936401,grad_norm: 0.9999991854342308, iteration: 119641
loss: 1.0097383260726929,grad_norm: 0.8701993834119651, iteration: 119642
loss: 0.9670717120170593,grad_norm: 0.9909425271377874, iteration: 119643
loss: 1.0102910995483398,grad_norm: 0.9134058959863657, iteration: 119644
loss: 1.0186978578567505,grad_norm: 0.999999843730155, iteration: 119645
loss: 0.993133544921875,grad_norm: 0.9999991842785773, iteration: 119646
loss: 0.9822167158126831,grad_norm: 0.9999991856447628, iteration: 119647
loss: 1.0091943740844727,grad_norm: 0.9999992312038418, iteration: 119648
loss: 0.9996535181999207,grad_norm: 0.9999992666406533, iteration: 119649
loss: 1.0326218605041504,grad_norm: 0.9889682129187675, iteration: 119650
loss: 1.0396981239318848,grad_norm: 0.913999175447372, iteration: 119651
loss: 1.0213642120361328,grad_norm: 0.8969394863624479, iteration: 119652
loss: 0.9695965647697449,grad_norm: 0.9321536328453969, iteration: 119653
loss: 1.0364235639572144,grad_norm: 0.99999984439496, iteration: 119654
loss: 1.0130209922790527,grad_norm: 0.9999992194707337, iteration: 119655
loss: 0.9963980317115784,grad_norm: 0.9999992431780345, iteration: 119656
loss: 1.0166033506393433,grad_norm: 0.9999991644947331, iteration: 119657
loss: 1.0123889446258545,grad_norm: 0.9999992991629529, iteration: 119658
loss: 0.9959475994110107,grad_norm: 0.9999990725064996, iteration: 119659
loss: 0.9896706342697144,grad_norm: 0.9747307846547396, iteration: 119660
loss: 1.0030566453933716,grad_norm: 0.9999990853007302, iteration: 119661
loss: 1.0022896528244019,grad_norm: 0.9999992080811773, iteration: 119662
loss: 1.0008515119552612,grad_norm: 0.9999990261641899, iteration: 119663
loss: 0.9838848114013672,grad_norm: 0.9999990256619227, iteration: 119664
loss: 0.9727715253829956,grad_norm: 0.9999991039620436, iteration: 119665
loss: 0.9535883069038391,grad_norm: 0.9999990445543336, iteration: 119666
loss: 1.0174411535263062,grad_norm: 0.9999990287853149, iteration: 119667
loss: 1.0036391019821167,grad_norm: 0.9999991339362411, iteration: 119668
loss: 0.9615059494972229,grad_norm: 0.9999990776561195, iteration: 119669
loss: 0.9710423946380615,grad_norm: 0.9293536664518046, iteration: 119670
loss: 0.9696688652038574,grad_norm: 0.9999991618704668, iteration: 119671
loss: 1.045433759689331,grad_norm: 0.9629797579551729, iteration: 119672
loss: 0.9832764863967896,grad_norm: 0.9999991836210698, iteration: 119673
loss: 0.9974748492240906,grad_norm: 0.9999992865565288, iteration: 119674
loss: 0.9618110060691833,grad_norm: 0.9999991365350449, iteration: 119675
loss: 1.1001968383789062,grad_norm: 0.9999999387634292, iteration: 119676
loss: 0.9942077398300171,grad_norm: 0.9999991893753587, iteration: 119677
loss: 1.0161964893341064,grad_norm: 0.9594747454692665, iteration: 119678
loss: 0.9854264259338379,grad_norm: 0.9999989784096273, iteration: 119679
loss: 0.98040771484375,grad_norm: 0.9999989531362594, iteration: 119680
loss: 1.0034661293029785,grad_norm: 0.9999991592508553, iteration: 119681
loss: 0.9628724455833435,grad_norm: 0.8721801162314388, iteration: 119682
loss: 0.9913349747657776,grad_norm: 0.9999990369553734, iteration: 119683
loss: 1.0033539533615112,grad_norm: 0.9999991193055238, iteration: 119684
loss: 0.9582168459892273,grad_norm: 0.9999999669238232, iteration: 119685
loss: 0.995531439781189,grad_norm: 0.9999991747857033, iteration: 119686
loss: 1.028049111366272,grad_norm: 0.9550412441913269, iteration: 119687
loss: 0.9697133302688599,grad_norm: 0.9886432524493554, iteration: 119688
loss: 1.006371259689331,grad_norm: 0.9999997715118811, iteration: 119689
loss: 0.9836673140525818,grad_norm: 0.9730132462541944, iteration: 119690
loss: 0.9876656532287598,grad_norm: 0.945424091133286, iteration: 119691
loss: 0.9952729344367981,grad_norm: 0.9999997898959548, iteration: 119692
loss: 0.9969751834869385,grad_norm: 0.7804142770918719, iteration: 119693
loss: 0.9953733682632446,grad_norm: 0.9999991204908772, iteration: 119694
loss: 0.9931900501251221,grad_norm: 0.9200346064220349, iteration: 119695
loss: 1.0141271352767944,grad_norm: 0.9626746847672953, iteration: 119696
loss: 1.0773707628250122,grad_norm: 0.9999993496401521, iteration: 119697
loss: 1.0384739637374878,grad_norm: 0.9999991430836865, iteration: 119698
loss: 0.9937803149223328,grad_norm: 0.9999992852930871, iteration: 119699
loss: 0.9843867421150208,grad_norm: 0.9452713423320522, iteration: 119700
loss: 0.9591183066368103,grad_norm: 0.9999990414296285, iteration: 119701
loss: 1.0019645690917969,grad_norm: 0.804671645232938, iteration: 119702
loss: 1.0000488758087158,grad_norm: 0.9999992644075889, iteration: 119703
loss: 1.0008667707443237,grad_norm: 0.9999993571833888, iteration: 119704
loss: 1.0350401401519775,grad_norm: 0.9999989562268955, iteration: 119705
loss: 0.9958065748214722,grad_norm: 0.9832770714086457, iteration: 119706
loss: 1.0016738176345825,grad_norm: 0.9999992255957674, iteration: 119707
loss: 1.0142790079116821,grad_norm: 0.8685400457841455, iteration: 119708
loss: 0.9810466766357422,grad_norm: 0.9576513938286896, iteration: 119709
loss: 1.0050780773162842,grad_norm: 0.9999991176436839, iteration: 119710
loss: 0.9984273314476013,grad_norm: 0.8579125920409482, iteration: 119711
loss: 1.016539454460144,grad_norm: 0.9999991347687869, iteration: 119712
loss: 0.9912406206130981,grad_norm: 0.9999992016938083, iteration: 119713
loss: 0.9792240858078003,grad_norm: 0.9772259901815078, iteration: 119714
loss: 0.9865359663963318,grad_norm: 0.8862025211154725, iteration: 119715
loss: 1.0011568069458008,grad_norm: 0.9989860201926012, iteration: 119716
loss: 0.9651341438293457,grad_norm: 0.9999991618181027, iteration: 119717
loss: 1.0517263412475586,grad_norm: 0.9999997608134187, iteration: 119718
loss: 1.000268816947937,grad_norm: 0.9999990450942179, iteration: 119719
loss: 0.977627694606781,grad_norm: 0.9638948089892563, iteration: 119720
loss: 0.9809009432792664,grad_norm: 0.9999991259590061, iteration: 119721
loss: 0.9833783507347107,grad_norm: 0.9339644267161894, iteration: 119722
loss: 0.9729676842689514,grad_norm: 0.9584270752099749, iteration: 119723
loss: 0.9559667706489563,grad_norm: 0.999999081967024, iteration: 119724
loss: 0.9949197769165039,grad_norm: 0.9999997169153122, iteration: 119725
loss: 0.9797028303146362,grad_norm: 0.9609461114575358, iteration: 119726
loss: 1.0009406805038452,grad_norm: 0.9999991149910238, iteration: 119727
loss: 0.9961115121841431,grad_norm: 0.9999990736456127, iteration: 119728
loss: 0.989265501499176,grad_norm: 0.9999992542359836, iteration: 119729
loss: 0.96147620677948,grad_norm: 0.9999991170191426, iteration: 119730
loss: 1.0317976474761963,grad_norm: 0.9999991283801806, iteration: 119731
loss: 1.017109751701355,grad_norm: 0.9999991560129502, iteration: 119732
loss: 1.0240963697433472,grad_norm: 0.9999991746104844, iteration: 119733
loss: 1.022639513015747,grad_norm: 0.917143237073623, iteration: 119734
loss: 0.9937352538108826,grad_norm: 0.9999992186651668, iteration: 119735
loss: 0.9778591394424438,grad_norm: 0.9999991660048893, iteration: 119736
loss: 1.0107402801513672,grad_norm: 0.999999994126983, iteration: 119737
loss: 0.9580599665641785,grad_norm: 0.9649021183844327, iteration: 119738
loss: 0.9849998950958252,grad_norm: 0.9999992706774322, iteration: 119739
loss: 0.998950719833374,grad_norm: 0.999998930445552, iteration: 119740
loss: 0.9841412901878357,grad_norm: 0.9999991698006012, iteration: 119741
loss: 1.0515838861465454,grad_norm: 0.9899879886148136, iteration: 119742
loss: 1.0220588445663452,grad_norm: 0.9999990391683194, iteration: 119743
loss: 1.0089366436004639,grad_norm: 0.9999995798248372, iteration: 119744
loss: 0.9959251284599304,grad_norm: 0.9999990493306116, iteration: 119745
loss: 1.0233254432678223,grad_norm: 0.9765634671628027, iteration: 119746
loss: 0.9940550327301025,grad_norm: 0.810456029696081, iteration: 119747
loss: 1.002258062362671,grad_norm: 0.9240333290166247, iteration: 119748
loss: 1.0064843893051147,grad_norm: 0.9999991267944189, iteration: 119749
loss: 1.0404081344604492,grad_norm: 0.9999989952692182, iteration: 119750
loss: 1.0266534090042114,grad_norm: 0.9878312747280745, iteration: 119751
loss: 1.011273741722107,grad_norm: 0.8875545275541205, iteration: 119752
loss: 0.9928526878356934,grad_norm: 0.9999991552155177, iteration: 119753
loss: 1.0054124593734741,grad_norm: 0.9675962106869106, iteration: 119754
loss: 1.0055062770843506,grad_norm: 0.9999990858203548, iteration: 119755
loss: 0.9570288062095642,grad_norm: 0.9177793207897452, iteration: 119756
loss: 0.995158314704895,grad_norm: 0.9999991535633376, iteration: 119757
loss: 0.9836761951446533,grad_norm: 0.9999990244405847, iteration: 119758
loss: 1.0190765857696533,grad_norm: 0.9999991636948603, iteration: 119759
loss: 0.9414322972297668,grad_norm: 0.9999990068107074, iteration: 119760
loss: 1.0059813261032104,grad_norm: 0.9999994707406957, iteration: 119761
loss: 1.0185762643814087,grad_norm: 0.8407953738087064, iteration: 119762
loss: 0.986405611038208,grad_norm: 0.9999990691940736, iteration: 119763
loss: 1.0036380290985107,grad_norm: 0.9649996684280666, iteration: 119764
loss: 1.0144423246383667,grad_norm: 0.999999025412301, iteration: 119765
loss: 0.9951000213623047,grad_norm: 0.9999990035828489, iteration: 119766
loss: 0.9817070364952087,grad_norm: 0.8905238354161593, iteration: 119767
loss: 0.9841529130935669,grad_norm: 0.9503690432218321, iteration: 119768
loss: 1.0047825574874878,grad_norm: 0.7858656013610721, iteration: 119769
loss: 1.005122184753418,grad_norm: 0.9999991463923139, iteration: 119770
loss: 1.01614511013031,grad_norm: 0.9999996560778607, iteration: 119771
loss: 1.0219956636428833,grad_norm: 0.9999992940318485, iteration: 119772
loss: 0.969904899597168,grad_norm: 0.9999991727307266, iteration: 119773
loss: 1.0024330615997314,grad_norm: 0.9999989699937132, iteration: 119774
loss: 0.9792928099632263,grad_norm: 0.9999992867527491, iteration: 119775
loss: 1.018182635307312,grad_norm: 0.9334274430578282, iteration: 119776
loss: 1.024077296257019,grad_norm: 0.882056564322855, iteration: 119777
loss: 1.0217043161392212,grad_norm: 0.9999991215373217, iteration: 119778
loss: 0.9681299328804016,grad_norm: 0.9999990249115512, iteration: 119779
loss: 1.0200917720794678,grad_norm: 0.9999992385968202, iteration: 119780
loss: 1.0475237369537354,grad_norm: 0.9999991120155524, iteration: 119781
loss: 1.0292719602584839,grad_norm: 0.9999991241920705, iteration: 119782
loss: 0.9981107711791992,grad_norm: 0.99999910308901, iteration: 119783
loss: 1.0531444549560547,grad_norm: 0.9999998336616591, iteration: 119784
loss: 1.0051501989364624,grad_norm: 0.949872315505937, iteration: 119785
loss: 0.972280740737915,grad_norm: 0.9300731439692351, iteration: 119786
loss: 1.0157314538955688,grad_norm: 0.9999991691583028, iteration: 119787
loss: 1.0139225721359253,grad_norm: 0.9091885626811022, iteration: 119788
loss: 0.979506254196167,grad_norm: 0.9999990381575901, iteration: 119789
loss: 1.0014822483062744,grad_norm: 0.9147873821214291, iteration: 119790
loss: 0.964684247970581,grad_norm: 0.9999991348287671, iteration: 119791
loss: 0.9897415637969971,grad_norm: 0.9999998456420451, iteration: 119792
loss: 1.0419789552688599,grad_norm: 0.9999995455408089, iteration: 119793
loss: 1.0011465549468994,grad_norm: 0.9999990928840647, iteration: 119794
loss: 0.9874747395515442,grad_norm: 0.999999172392094, iteration: 119795
loss: 0.9942710995674133,grad_norm: 0.937766167341746, iteration: 119796
loss: 0.9546642303466797,grad_norm: 0.8946169903590107, iteration: 119797
loss: 0.9954850077629089,grad_norm: 0.9999993485053869, iteration: 119798
loss: 1.0061625242233276,grad_norm: 0.9159027805398181, iteration: 119799
loss: 1.0036615133285522,grad_norm: 0.9999991522167281, iteration: 119800
loss: 1.009839415550232,grad_norm: 0.9999993604522741, iteration: 119801
loss: 1.0325039625167847,grad_norm: 0.9999989801068564, iteration: 119802
loss: 0.9932950139045715,grad_norm: 0.9509580401866846, iteration: 119803
loss: 1.0071439743041992,grad_norm: 0.9999991104885501, iteration: 119804
loss: 1.0082443952560425,grad_norm: 0.9999991655621361, iteration: 119805
loss: 0.9980223178863525,grad_norm: 0.8813841524928496, iteration: 119806
loss: 0.9978493452072144,grad_norm: 0.9999992502893775, iteration: 119807
loss: 0.9644452333450317,grad_norm: 0.9999992593820857, iteration: 119808
loss: 1.0230649709701538,grad_norm: 0.999999203383719, iteration: 119809
loss: 1.0093475580215454,grad_norm: 0.9765150798437285, iteration: 119810
loss: 1.014869213104248,grad_norm: 0.9999990466557047, iteration: 119811
loss: 1.0375124216079712,grad_norm: 0.9999991409050637, iteration: 119812
loss: 0.9799388647079468,grad_norm: 0.925313149586459, iteration: 119813
loss: 0.979846179485321,grad_norm: 0.9999990487106728, iteration: 119814
loss: 0.959775447845459,grad_norm: 0.9999993181346664, iteration: 119815
loss: 1.0201376676559448,grad_norm: 0.9999990476183733, iteration: 119816
loss: 0.9651880860328674,grad_norm: 0.9999991776377924, iteration: 119817
loss: 0.9841627478599548,grad_norm: 0.9999991479618788, iteration: 119818
loss: 0.9909841418266296,grad_norm: 0.9100798811805944, iteration: 119819
loss: 1.008754849433899,grad_norm: 0.8490670806948685, iteration: 119820
loss: 1.0331579446792603,grad_norm: 0.9709243354754872, iteration: 119821
loss: 1.017082691192627,grad_norm: 0.954568587276011, iteration: 119822
loss: 0.9984064102172852,grad_norm: 0.9999992114064052, iteration: 119823
loss: 1.0106955766677856,grad_norm: 0.9999994648256371, iteration: 119824
loss: 1.022800326347351,grad_norm: 0.9965640496864419, iteration: 119825
loss: 0.9994432926177979,grad_norm: 0.9901654952073476, iteration: 119826
loss: 0.9717318415641785,grad_norm: 0.8582432477374317, iteration: 119827
loss: 1.0067368745803833,grad_norm: 0.9365329138671059, iteration: 119828
loss: 1.021613597869873,grad_norm: 0.9999989687380407, iteration: 119829
loss: 0.9912932515144348,grad_norm: 0.9999992036573121, iteration: 119830
loss: 0.9971609115600586,grad_norm: 0.989193590508492, iteration: 119831
loss: 0.9963170289993286,grad_norm: 0.8945391141897033, iteration: 119832
loss: 1.0043907165527344,grad_norm: 0.9999990111007442, iteration: 119833
loss: 1.005334734916687,grad_norm: 0.9999990758237282, iteration: 119834
loss: 1.0075359344482422,grad_norm: 0.99999896077214, iteration: 119835
loss: 1.0039212703704834,grad_norm: 0.9276252828817008, iteration: 119836
loss: 1.0742229223251343,grad_norm: 0.9999992181140734, iteration: 119837
loss: 0.987213134765625,grad_norm: 0.9999990515733831, iteration: 119838
loss: 0.9605611562728882,grad_norm: 0.9999992284730271, iteration: 119839
loss: 1.0145241022109985,grad_norm: 0.8004518569270369, iteration: 119840
loss: 1.0090681314468384,grad_norm: 0.9999991567724827, iteration: 119841
loss: 1.005143642425537,grad_norm: 0.9999991047144074, iteration: 119842
loss: 1.00334632396698,grad_norm: 0.9999990855941249, iteration: 119843
loss: 1.0156824588775635,grad_norm: 0.9785730380174099, iteration: 119844
loss: 1.028754711151123,grad_norm: 0.9999990282339648, iteration: 119845
loss: 1.0172839164733887,grad_norm: 0.9999991090721515, iteration: 119846
loss: 1.0339192152023315,grad_norm: 0.9663541760510574, iteration: 119847
loss: 0.9842485785484314,grad_norm: 0.9999990793356895, iteration: 119848
loss: 1.0121865272521973,grad_norm: 0.9999992498065637, iteration: 119849
loss: 1.0149414539337158,grad_norm: 0.9999994903680592, iteration: 119850
loss: 1.0017499923706055,grad_norm: 0.9999990838800954, iteration: 119851
loss: 0.9859403967857361,grad_norm: 0.9999992328438327, iteration: 119852
loss: 0.9816774725914001,grad_norm: 0.9505377735217824, iteration: 119853
loss: 0.9391763806343079,grad_norm: 0.9999990811820273, iteration: 119854
loss: 1.041404128074646,grad_norm: 0.9750837042623213, iteration: 119855
loss: 1.0310988426208496,grad_norm: 0.9999990977030954, iteration: 119856
loss: 0.9738476872444153,grad_norm: 0.8588827300158731, iteration: 119857
loss: 1.005388855934143,grad_norm: 0.9999991169038837, iteration: 119858
loss: 1.011731505393982,grad_norm: 0.9999989726450337, iteration: 119859
loss: 1.0273650884628296,grad_norm: 0.924640268949883, iteration: 119860
loss: 0.9781069755554199,grad_norm: 0.9999991579579143, iteration: 119861
loss: 1.0279014110565186,grad_norm: 0.9999990528471193, iteration: 119862
loss: 1.0201385021209717,grad_norm: 0.9999990677482244, iteration: 119863
loss: 0.9684403538703918,grad_norm: 0.9999992283691408, iteration: 119864
loss: 1.0002108812332153,grad_norm: 0.9999991077929882, iteration: 119865
loss: 1.0057356357574463,grad_norm: 0.9683714668047846, iteration: 119866
loss: 0.9947824478149414,grad_norm: 0.8505473012020383, iteration: 119867
loss: 0.9949904680252075,grad_norm: 0.9985350094198102, iteration: 119868
loss: 1.0183018445968628,grad_norm: 0.9999994008023975, iteration: 119869
loss: 0.9981640577316284,grad_norm: 0.9999991537890658, iteration: 119870
loss: 0.9872832894325256,grad_norm: 0.99999905273982, iteration: 119871
loss: 0.9920313358306885,grad_norm: 0.9970925099783118, iteration: 119872
loss: 0.985287070274353,grad_norm: 0.9999993219985656, iteration: 119873
loss: 1.0173323154449463,grad_norm: 0.7708794864127548, iteration: 119874
loss: 0.9949762225151062,grad_norm: 0.9999990563217418, iteration: 119875
loss: 1.0139797925949097,grad_norm: 0.9999990962079135, iteration: 119876
loss: 1.0153664350509644,grad_norm: 0.9853294137587535, iteration: 119877
loss: 0.9988452196121216,grad_norm: 0.973705410475157, iteration: 119878
loss: 0.9534642100334167,grad_norm: 0.9999990233004665, iteration: 119879
loss: 1.0304865837097168,grad_norm: 0.9999991151723234, iteration: 119880
loss: 0.9994531869888306,grad_norm: 0.999999366091619, iteration: 119881
loss: 0.9616166949272156,grad_norm: 0.999999106659715, iteration: 119882
loss: 1.0309959650039673,grad_norm: 0.9999993175629752, iteration: 119883
loss: 0.9889252185821533,grad_norm: 0.9196496115881803, iteration: 119884
loss: 0.9881029725074768,grad_norm: 0.9999992102128538, iteration: 119885
loss: 0.9798107743263245,grad_norm: 0.999999230456212, iteration: 119886
loss: 0.9715639352798462,grad_norm: 0.9132829225609308, iteration: 119887
loss: 1.006253719329834,grad_norm: 0.9999990733210041, iteration: 119888
loss: 1.0473796129226685,grad_norm: 0.9999994217546733, iteration: 119889
loss: 0.9864038825035095,grad_norm: 0.9730252000059093, iteration: 119890
loss: 1.02884042263031,grad_norm: 0.9999991215146602, iteration: 119891
loss: 1.0014532804489136,grad_norm: 0.9999991746244469, iteration: 119892
loss: 0.9980229735374451,grad_norm: 0.8772019549013874, iteration: 119893
loss: 1.0079911947250366,grad_norm: 0.9999991046162467, iteration: 119894
loss: 1.0247102975845337,grad_norm: 0.9999990781757175, iteration: 119895
loss: 1.0796960592269897,grad_norm: 0.9999996824666912, iteration: 119896
loss: 0.9915738701820374,grad_norm: 0.9999990538012626, iteration: 119897
loss: 1.0200145244598389,grad_norm: 0.8761799832957005, iteration: 119898
loss: 1.0185024738311768,grad_norm: 0.864633917923175, iteration: 119899
loss: 0.9703037738800049,grad_norm: 0.9999990652799444, iteration: 119900
loss: 0.9872233867645264,grad_norm: 0.9720674034188701, iteration: 119901
loss: 1.0001863241195679,grad_norm: 0.9999990230993674, iteration: 119902
loss: 1.0062106847763062,grad_norm: 0.9999990738886911, iteration: 119903
loss: 0.9989008903503418,grad_norm: 0.9999990624145366, iteration: 119904
loss: 1.0070356130599976,grad_norm: 0.9999991665552009, iteration: 119905
loss: 0.9986233115196228,grad_norm: 0.9999990962451444, iteration: 119906
loss: 0.9923927783966064,grad_norm: 0.9999996169944316, iteration: 119907
loss: 1.0286775827407837,grad_norm: 0.9999992137463258, iteration: 119908
loss: 1.0394498109817505,grad_norm: 0.9394294603169312, iteration: 119909
loss: 1.014009714126587,grad_norm: 0.999999367989627, iteration: 119910
loss: 0.9935024380683899,grad_norm: 0.8921853873072101, iteration: 119911
loss: 0.957876443862915,grad_norm: 0.9887210599862688, iteration: 119912
loss: 1.0068490505218506,grad_norm: 0.999999280343962, iteration: 119913
loss: 1.0184361934661865,grad_norm: 0.9999990458229355, iteration: 119914
loss: 1.0729868412017822,grad_norm: 0.9999999615386196, iteration: 119915
loss: 1.0285998582839966,grad_norm: 0.8291589771064631, iteration: 119916
loss: 1.0498701333999634,grad_norm: 0.9999998973104097, iteration: 119917
loss: 1.0200611352920532,grad_norm: 0.9999990573864925, iteration: 119918
loss: 0.9567258954048157,grad_norm: 0.9497113456600893, iteration: 119919
loss: 0.9861828684806824,grad_norm: 0.9999992970807988, iteration: 119920
loss: 1.015323281288147,grad_norm: 0.9999991195505528, iteration: 119921
loss: 0.9915047287940979,grad_norm: 0.8268938588093155, iteration: 119922
loss: 0.9766685962677002,grad_norm: 0.9999992070948096, iteration: 119923
loss: 0.9910928606987,grad_norm: 0.8867301003924499, iteration: 119924
loss: 1.0211012363433838,grad_norm: 0.9362041666327571, iteration: 119925
loss: 1.0047714710235596,grad_norm: 0.9536976527302315, iteration: 119926
loss: 0.9667941331863403,grad_norm: 0.999999198219884, iteration: 119927
loss: 0.9769266843795776,grad_norm: 0.9172073237122251, iteration: 119928
loss: 1.00411856174469,grad_norm: 0.9999991189125812, iteration: 119929
loss: 1.4224478006362915,grad_norm: 0.9999998345416722, iteration: 119930
loss: 1.0030100345611572,grad_norm: 0.9999996719718688, iteration: 119931
loss: 0.9990835785865784,grad_norm: 0.9999989708121895, iteration: 119932
loss: 0.9996368288993835,grad_norm: 0.999999775273004, iteration: 119933
loss: 0.9657730460166931,grad_norm: 0.9999992729318531, iteration: 119934
loss: 1.0975452661514282,grad_norm: 0.9999998636757073, iteration: 119935
loss: 0.977908194065094,grad_norm: 0.8867420285581229, iteration: 119936
loss: 0.9945583343505859,grad_norm: 0.9549054953295805, iteration: 119937
loss: 1.030071496963501,grad_norm: 0.9999991515769248, iteration: 119938
loss: 0.9811411499977112,grad_norm: 0.9999992994871216, iteration: 119939
loss: 0.9802237749099731,grad_norm: 0.9914293029441016, iteration: 119940
loss: 1.010603427886963,grad_norm: 0.999999658220687, iteration: 119941
loss: 1.0261703729629517,grad_norm: 0.9999992385390816, iteration: 119942
loss: 1.015722393989563,grad_norm: 0.892938050211513, iteration: 119943
loss: 0.9951189756393433,grad_norm: 0.965517945588939, iteration: 119944
loss: 0.9650525450706482,grad_norm: 0.9999991947453912, iteration: 119945
loss: 1.0153666734695435,grad_norm: 0.9999989401632106, iteration: 119946
loss: 0.9771760702133179,grad_norm: 0.9358073352623926, iteration: 119947
loss: 1.0142995119094849,grad_norm: 0.999999037130089, iteration: 119948
loss: 0.9946365356445312,grad_norm: 0.9999992977184414, iteration: 119949
loss: 1.0240797996520996,grad_norm: 0.9999992177425864, iteration: 119950
loss: 0.9900402426719666,grad_norm: 0.9261192125449316, iteration: 119951
loss: 1.0045932531356812,grad_norm: 0.9999992389698628, iteration: 119952
loss: 0.9994798302650452,grad_norm: 0.9999991801884661, iteration: 119953
loss: 0.9566473960876465,grad_norm: 0.9999990782568129, iteration: 119954
loss: 1.0452663898468018,grad_norm: 0.9999993156015585, iteration: 119955
loss: 1.0220341682434082,grad_norm: 0.9999995770417213, iteration: 119956
loss: 0.987887978553772,grad_norm: 0.9999990488491268, iteration: 119957
loss: 0.9923706650733948,grad_norm: 0.9999990449367472, iteration: 119958
loss: 0.9972652792930603,grad_norm: 0.9546679250647311, iteration: 119959
loss: 1.000872254371643,grad_norm: 0.9668756669720078, iteration: 119960
loss: 1.00942862033844,grad_norm: 0.9999991710754663, iteration: 119961
loss: 1.0450116395950317,grad_norm: 0.9999991929374888, iteration: 119962
loss: 0.973955512046814,grad_norm: 0.9442075506856134, iteration: 119963
loss: 0.9547503590583801,grad_norm: 0.9999992491843113, iteration: 119964
loss: 1.0235068798065186,grad_norm: 0.9999991304940763, iteration: 119965
loss: 0.9864956736564636,grad_norm: 0.9999989681597917, iteration: 119966
loss: 1.0035037994384766,grad_norm: 0.9999990584880999, iteration: 119967
loss: 0.9799185991287231,grad_norm: 0.8424998298218167, iteration: 119968
loss: 0.9679552316665649,grad_norm: 0.9999991138759953, iteration: 119969
loss: 1.0023478269577026,grad_norm: 0.8527204348135563, iteration: 119970
loss: 1.008508563041687,grad_norm: 0.9968841599751026, iteration: 119971
loss: 1.0104175806045532,grad_norm: 0.9320216117506698, iteration: 119972
loss: 0.9827978610992432,grad_norm: 0.9147771233947545, iteration: 119973
loss: 0.9931783676147461,grad_norm: 0.8511108793536477, iteration: 119974
loss: 0.9695513248443604,grad_norm: 0.9999990761297554, iteration: 119975
loss: 1.0420668125152588,grad_norm: 0.9354606119170049, iteration: 119976
loss: 0.9981144070625305,grad_norm: 0.9999990682999034, iteration: 119977
loss: 1.037804365158081,grad_norm: 0.9999993161897369, iteration: 119978
loss: 1.0102977752685547,grad_norm: 0.8397493427268408, iteration: 119979
loss: 0.9819455146789551,grad_norm: 0.9999990763522798, iteration: 119980
loss: 1.0158823728561401,grad_norm: 0.999999163653371, iteration: 119981
loss: 0.9919999241828918,grad_norm: 0.9938181824225887, iteration: 119982
loss: 1.0136040449142456,grad_norm: 0.9999990736530991, iteration: 119983
loss: 0.9970589280128479,grad_norm: 0.9815514252929193, iteration: 119984
loss: 1.0332159996032715,grad_norm: 0.9082753091781356, iteration: 119985
loss: 1.024973750114441,grad_norm: 0.9168612971769623, iteration: 119986
loss: 0.9993869066238403,grad_norm: 0.9999993323346982, iteration: 119987
loss: 0.9904427528381348,grad_norm: 0.999999096403189, iteration: 119988
loss: 1.0120052099227905,grad_norm: 0.9999991721080331, iteration: 119989
loss: 0.9936443567276001,grad_norm: 0.9999991662539115, iteration: 119990
loss: 1.0206787586212158,grad_norm: 0.9999992431786545, iteration: 119991
loss: 1.0021692514419556,grad_norm: 0.9999991929172102, iteration: 119992
loss: 1.02464759349823,grad_norm: 0.9999990987256006, iteration: 119993
loss: 1.0197570323944092,grad_norm: 0.9999989022956256, iteration: 119994
loss: 1.023585557937622,grad_norm: 0.9999991403672719, iteration: 119995
loss: 0.9804345965385437,grad_norm: 0.9999989411458488, iteration: 119996
loss: 0.9865859746932983,grad_norm: 0.8995976275032719, iteration: 119997
loss: 0.95799320936203,grad_norm: 0.9690127439997988, iteration: 119998
loss: 1.00712251663208,grad_norm: 0.9999992393681432, iteration: 119999
loss: 1.005088210105896,grad_norm: 0.9999991316593543, iteration: 120000
Evaluating at step 120000
{'val': 0.9948103986680508, 'test': 2.643660476794258}
loss: 1.0305073261260986,grad_norm: 0.9999989848177867, iteration: 120001
loss: 0.9912230372428894,grad_norm: 0.9975558317592296, iteration: 120002
loss: 0.988939106464386,grad_norm: 0.9057452770148683, iteration: 120003
loss: 0.9743354320526123,grad_norm: 0.9999991497569134, iteration: 120004
loss: 1.0181748867034912,grad_norm: 0.9999990584455702, iteration: 120005
loss: 1.0414800643920898,grad_norm: 0.9999990563684578, iteration: 120006
loss: 0.9879083037376404,grad_norm: 0.9725225487660193, iteration: 120007
loss: 1.0128391981124878,grad_norm: 0.999999084123386, iteration: 120008
loss: 1.0377297401428223,grad_norm: 0.9492578675331925, iteration: 120009
loss: 1.0156233310699463,grad_norm: 0.9999991069068753, iteration: 120010
loss: 0.9668325781822205,grad_norm: 0.9999992410889208, iteration: 120011
loss: 0.9961421489715576,grad_norm: 0.9999991288633292, iteration: 120012
loss: 0.9819307327270508,grad_norm: 0.9999991580949457, iteration: 120013
loss: 1.0111569166183472,grad_norm: 0.8867368015634195, iteration: 120014
loss: 0.9713354110717773,grad_norm: 0.9999991885540556, iteration: 120015
loss: 1.0100458860397339,grad_norm: 0.9999991814659347, iteration: 120016
loss: 0.9635184407234192,grad_norm: 0.9383772729923825, iteration: 120017
loss: 1.0138447284698486,grad_norm: 0.9999993487878471, iteration: 120018
loss: 0.9821829795837402,grad_norm: 0.9695121481971355, iteration: 120019
loss: 1.0838048458099365,grad_norm: 0.999999614407435, iteration: 120020
loss: 1.0196093320846558,grad_norm: 0.9535441777314154, iteration: 120021
loss: 0.9798825979232788,grad_norm: 0.999999095498681, iteration: 120022
loss: 0.9680283665657043,grad_norm: 0.9915794031117032, iteration: 120023
loss: 1.0565404891967773,grad_norm: 0.999999799513797, iteration: 120024
loss: 0.9720592498779297,grad_norm: 0.999999109109566, iteration: 120025
loss: 0.9910407066345215,grad_norm: 0.9999990437301631, iteration: 120026
loss: 0.9941584467887878,grad_norm: 0.9999991558977851, iteration: 120027
loss: 1.0149571895599365,grad_norm: 0.973700040516067, iteration: 120028
loss: 1.0074830055236816,grad_norm: 0.9999992066079554, iteration: 120029
loss: 0.9682217240333557,grad_norm: 0.9999990405671069, iteration: 120030
loss: 0.986164391040802,grad_norm: 0.9999991662610652, iteration: 120031
loss: 0.990410566329956,grad_norm: 0.8597455084438673, iteration: 120032
loss: 0.9650552272796631,grad_norm: 0.9999993170088872, iteration: 120033
loss: 1.081542730331421,grad_norm: 0.9999995744768306, iteration: 120034
loss: 0.9972697496414185,grad_norm: 0.8707511837970328, iteration: 120035
loss: 0.9898163676261902,grad_norm: 0.999999139451089, iteration: 120036
loss: 1.0119071006774902,grad_norm: 0.9999992292850977, iteration: 120037
loss: 0.9861671924591064,grad_norm: 0.9999990914042651, iteration: 120038
loss: 0.9778024554252625,grad_norm: 0.7215959582527406, iteration: 120039
loss: 1.0292282104492188,grad_norm: 0.9656086296072282, iteration: 120040
loss: 1.0576486587524414,grad_norm: 0.9999991963072764, iteration: 120041
loss: 1.0108400583267212,grad_norm: 0.9999990483783238, iteration: 120042
loss: 0.9699880480766296,grad_norm: 0.9999991623529091, iteration: 120043
loss: 1.018417239189148,grad_norm: 0.9999992786129878, iteration: 120044
loss: 0.9571294188499451,grad_norm: 0.9999991351439308, iteration: 120045
loss: 1.0040425062179565,grad_norm: 0.9999990679825665, iteration: 120046
loss: 1.0166058540344238,grad_norm: 0.9999991538916118, iteration: 120047
loss: 1.0248032808303833,grad_norm: 0.9999992165664383, iteration: 120048
loss: 0.9792616963386536,grad_norm: 0.9846009168160459, iteration: 120049
loss: 1.0124403238296509,grad_norm: 0.9999991664392747, iteration: 120050
loss: 1.0259394645690918,grad_norm: 0.9999991798684005, iteration: 120051
loss: 0.9793423414230347,grad_norm: 0.9999991555398948, iteration: 120052
loss: 0.9981849789619446,grad_norm: 0.999999149771244, iteration: 120053
loss: 1.0304216146469116,grad_norm: 0.9999990286789956, iteration: 120054
loss: 0.9692848920822144,grad_norm: 0.9999991118136294, iteration: 120055
loss: 1.0133213996887207,grad_norm: 0.9999990924034367, iteration: 120056
loss: 1.0317625999450684,grad_norm: 0.9999994839885191, iteration: 120057
loss: 1.039089322090149,grad_norm: 0.92962835964527, iteration: 120058
loss: 1.0215946435928345,grad_norm: 0.9631165069616924, iteration: 120059
loss: 1.0093871355056763,grad_norm: 0.9999991612573905, iteration: 120060
loss: 1.013548493385315,grad_norm: 0.9999991282529208, iteration: 120061
loss: 1.0107624530792236,grad_norm: 0.9999991464217591, iteration: 120062
loss: 0.9999276995658875,grad_norm: 0.8513892105951723, iteration: 120063
loss: 0.9850192070007324,grad_norm: 0.9999990206907935, iteration: 120064
loss: 0.9868741035461426,grad_norm: 0.9999991990022308, iteration: 120065
loss: 1.0223218202590942,grad_norm: 0.9999990166856829, iteration: 120066
loss: 1.0195454359054565,grad_norm: 0.9999991975713, iteration: 120067
loss: 1.053109049797058,grad_norm: 0.9999990462160773, iteration: 120068
loss: 0.9712982177734375,grad_norm: 0.9999992623368786, iteration: 120069
loss: 0.9988003373146057,grad_norm: 0.9999990382954894, iteration: 120070
loss: 1.0293406248092651,grad_norm: 0.9999990457593118, iteration: 120071
loss: 0.9582333564758301,grad_norm: 0.9999990600261229, iteration: 120072
loss: 0.9813929200172424,grad_norm: 0.9999991226056811, iteration: 120073
loss: 1.0120879411697388,grad_norm: 0.9999992307729947, iteration: 120074
loss: 0.9699714183807373,grad_norm: 0.9999995389649056, iteration: 120075
loss: 0.97993403673172,grad_norm: 0.9999991720345851, iteration: 120076
loss: 1.0166304111480713,grad_norm: 0.999999031755765, iteration: 120077
loss: 0.9905178546905518,grad_norm: 0.9999989477652799, iteration: 120078
loss: 0.9624906182289124,grad_norm: 0.9999991745087953, iteration: 120079
loss: 0.9950129985809326,grad_norm: 0.9607949458518857, iteration: 120080
loss: 1.0486451387405396,grad_norm: 0.9999995914084019, iteration: 120081
loss: 0.9900599122047424,grad_norm: 0.9999991101214479, iteration: 120082
loss: 1.0039751529693604,grad_norm: 0.9999991554116567, iteration: 120083
loss: 0.9760077595710754,grad_norm: 0.9602758397780967, iteration: 120084
loss: 1.000916838645935,grad_norm: 0.998802798679512, iteration: 120085
loss: 1.0818681716918945,grad_norm: 0.999999312910166, iteration: 120086
loss: 1.0175074338912964,grad_norm: 0.9999991820715605, iteration: 120087
loss: 1.0232243537902832,grad_norm: 0.9532354115516212, iteration: 120088
loss: 0.9527400732040405,grad_norm: 0.9999991447456228, iteration: 120089
loss: 1.0165427923202515,grad_norm: 0.9999991201958155, iteration: 120090
loss: 0.9517373442649841,grad_norm: 0.9999990213180135, iteration: 120091
loss: 1.0313537120819092,grad_norm: 0.9999194702614793, iteration: 120092
loss: 0.9873215556144714,grad_norm: 0.9463705521465036, iteration: 120093
loss: 0.9984274506568909,grad_norm: 0.9999990534681112, iteration: 120094
loss: 1.0144892930984497,grad_norm: 0.9999990720476057, iteration: 120095
loss: 0.9850558042526245,grad_norm: 0.9999990367324426, iteration: 120096
loss: 0.9556635618209839,grad_norm: 0.9999991862756012, iteration: 120097
loss: 0.9594714045524597,grad_norm: 0.999999138097679, iteration: 120098
loss: 1.0068410634994507,grad_norm: 0.9623142970960581, iteration: 120099
loss: 0.9546335339546204,grad_norm: 0.9999991194204008, iteration: 120100
loss: 0.9767487049102783,grad_norm: 0.9999988895270961, iteration: 120101
loss: 1.0134191513061523,grad_norm: 0.999999224231653, iteration: 120102
loss: 1.0213773250579834,grad_norm: 0.9999989992111713, iteration: 120103
loss: 0.9907684922218323,grad_norm: 0.9999999768527061, iteration: 120104
loss: 1.013633370399475,grad_norm: 0.9999991526021131, iteration: 120105
loss: 1.0134243965148926,grad_norm: 0.999999201363413, iteration: 120106
loss: 0.9646472334861755,grad_norm: 0.9999992276044375, iteration: 120107
loss: 1.000040888786316,grad_norm: 0.9999991445708413, iteration: 120108
loss: 0.9628185629844666,grad_norm: 0.9381522271187094, iteration: 120109
loss: 1.008126139640808,grad_norm: 0.9999992321733271, iteration: 120110
loss: 1.0205193758010864,grad_norm: 0.9237672334137523, iteration: 120111
loss: 0.972788393497467,grad_norm: 0.9999990523962332, iteration: 120112
loss: 1.008556842803955,grad_norm: 0.9079033199536147, iteration: 120113
loss: 1.057841420173645,grad_norm: 0.9999990787442353, iteration: 120114
loss: 0.9832571148872375,grad_norm: 0.9999991781273287, iteration: 120115
loss: 1.0452641248703003,grad_norm: 0.9999992535919058, iteration: 120116
loss: 1.0297054052352905,grad_norm: 0.9999992144144902, iteration: 120117
loss: 0.9593807458877563,grad_norm: 0.999999176965706, iteration: 120118
loss: 1.009751319885254,grad_norm: 0.9999991534384152, iteration: 120119
loss: 1.0086652040481567,grad_norm: 0.9412680087620188, iteration: 120120
loss: 1.025158405303955,grad_norm: 0.9999990449637056, iteration: 120121
loss: 1.001240849494934,grad_norm: 0.8513498145448558, iteration: 120122
loss: 0.9721114039421082,grad_norm: 0.9999989291898835, iteration: 120123
loss: 1.0112682580947876,grad_norm: 0.9338878154602405, iteration: 120124
loss: 0.9994811415672302,grad_norm: 0.9322445940401615, iteration: 120125
loss: 1.0366859436035156,grad_norm: 0.9999991883874151, iteration: 120126
loss: 0.9836167097091675,grad_norm: 0.9999990411911708, iteration: 120127
loss: 1.0267277956008911,grad_norm: 0.999999130395462, iteration: 120128
loss: 0.9484318494796753,grad_norm: 0.9855871193053752, iteration: 120129
loss: 1.0154742002487183,grad_norm: 0.9593127531773868, iteration: 120130
loss: 1.0022610425949097,grad_norm: 0.9393610215683282, iteration: 120131
loss: 1.0370324850082397,grad_norm: 0.999999180608386, iteration: 120132
loss: 0.964809238910675,grad_norm: 0.9999989729521638, iteration: 120133
loss: 0.9818724989891052,grad_norm: 0.999999293207511, iteration: 120134
loss: 1.0149108171463013,grad_norm: 0.9999991193015828, iteration: 120135
loss: 1.0192927122116089,grad_norm: 0.9999995780008172, iteration: 120136
loss: 1.0160938501358032,grad_norm: 0.964726417171964, iteration: 120137
loss: 1.0045006275177002,grad_norm: 0.9132646011966867, iteration: 120138
loss: 0.9886398315429688,grad_norm: 0.9999990895234947, iteration: 120139
loss: 0.9871293306350708,grad_norm: 0.9999990566749414, iteration: 120140
loss: 0.9751390218734741,grad_norm: 0.9999991499204417, iteration: 120141
loss: 0.9678927659988403,grad_norm: 0.9999990812512813, iteration: 120142
loss: 1.0039831399917603,grad_norm: 0.9999989853313639, iteration: 120143
loss: 0.9918091893196106,grad_norm: 0.9272202237058297, iteration: 120144
loss: 0.9977480173110962,grad_norm: 0.9999991307100031, iteration: 120145
loss: 0.991189181804657,grad_norm: 0.9999989852656778, iteration: 120146
loss: 0.9604083299636841,grad_norm: 0.9999991915903974, iteration: 120147
loss: 1.022516131401062,grad_norm: 0.8608013033871506, iteration: 120148
loss: 0.9973180294036865,grad_norm: 0.9771892486701829, iteration: 120149
loss: 1.01938796043396,grad_norm: 0.999999529553337, iteration: 120150
loss: 1.0027358531951904,grad_norm: 0.9999989034489453, iteration: 120151
loss: 1.0019371509552002,grad_norm: 0.9999991015850868, iteration: 120152
loss: 1.0004358291625977,grad_norm: 0.999999286588458, iteration: 120153
loss: 0.9865238070487976,grad_norm: 0.9478761787762658, iteration: 120154
loss: 0.9721894860267639,grad_norm: 0.9999992873818692, iteration: 120155
loss: 0.9898160099983215,grad_norm: 0.9999992879644833, iteration: 120156
loss: 0.9716603755950928,grad_norm: 0.9999992143910069, iteration: 120157
loss: 1.0192182064056396,grad_norm: 0.7668168392782325, iteration: 120158
loss: 1.0266517400741577,grad_norm: 0.9540967517411195, iteration: 120159
loss: 0.9481480717658997,grad_norm: 0.9999992232371347, iteration: 120160
loss: 1.0256285667419434,grad_norm: 0.8923614467708596, iteration: 120161
loss: 0.9983587265014648,grad_norm: 0.9999990691299382, iteration: 120162
loss: 0.9819462895393372,grad_norm: 0.9999991209129628, iteration: 120163
loss: 1.0709346532821655,grad_norm: 0.9999992740830514, iteration: 120164
loss: 1.0006544589996338,grad_norm: 0.9999992338750183, iteration: 120165
loss: 0.9731077551841736,grad_norm: 0.9999992016215521, iteration: 120166
loss: 0.9992699027061462,grad_norm: 0.9999990721856825, iteration: 120167
loss: 1.1172418594360352,grad_norm: 0.9999997511126965, iteration: 120168
loss: 1.0426827669143677,grad_norm: 0.9999999117454507, iteration: 120169
loss: 0.9753032326698303,grad_norm: 0.999999139163387, iteration: 120170
loss: 1.0016144514083862,grad_norm: 0.9999990878054024, iteration: 120171
loss: 1.0000368356704712,grad_norm: 0.9999994904747408, iteration: 120172
loss: 0.9737424254417419,grad_norm: 0.9999989965079955, iteration: 120173
loss: 0.9934486150741577,grad_norm: 0.9999991664270806, iteration: 120174
loss: 0.9863446950912476,grad_norm: 0.999999041080042, iteration: 120175
loss: 0.9961541891098022,grad_norm: 0.9325545519275655, iteration: 120176
loss: 1.0200469493865967,grad_norm: 0.9999990599506552, iteration: 120177
loss: 0.9966614246368408,grad_norm: 0.9999990845916223, iteration: 120178
loss: 0.9892479777336121,grad_norm: 0.9612117272097178, iteration: 120179
loss: 0.9682161808013916,grad_norm: 0.9783956375166986, iteration: 120180
loss: 0.9750841856002808,grad_norm: 0.999999001408808, iteration: 120181
loss: 0.9987133145332336,grad_norm: 0.9999992026002488, iteration: 120182
loss: 0.9822996258735657,grad_norm: 0.9406759080242857, iteration: 120183
loss: 0.9746987819671631,grad_norm: 0.9999990314558047, iteration: 120184
loss: 0.9517115354537964,grad_norm: 0.9748254439085224, iteration: 120185
loss: 1.0264464616775513,grad_norm: 0.9999992880596089, iteration: 120186
loss: 1.0061445236206055,grad_norm: 0.962575040647249, iteration: 120187
loss: 0.9931647181510925,grad_norm: 0.9999993389019053, iteration: 120188
loss: 1.0278074741363525,grad_norm: 0.9999991628903175, iteration: 120189
loss: 0.9972432851791382,grad_norm: 0.9999990993225454, iteration: 120190
loss: 1.0146214962005615,grad_norm: 0.9366468054204372, iteration: 120191
loss: 1.0279184579849243,grad_norm: 0.8996596481487711, iteration: 120192
loss: 0.9785348176956177,grad_norm: 0.8846872637467753, iteration: 120193
loss: 0.9622706770896912,grad_norm: 0.9999992272722502, iteration: 120194
loss: 0.9764775633811951,grad_norm: 0.9999991375760553, iteration: 120195
loss: 0.9864256381988525,grad_norm: 0.9999990613705492, iteration: 120196
loss: 0.9800194501876831,grad_norm: 0.9999990481100779, iteration: 120197
loss: 1.030035376548767,grad_norm: 0.9999991004659784, iteration: 120198
loss: 0.9531410932540894,grad_norm: 0.9803770278768457, iteration: 120199
loss: 0.9804834127426147,grad_norm: 0.9741624080280669, iteration: 120200
loss: 1.0337024927139282,grad_norm: 0.9999991654776048, iteration: 120201
loss: 0.9649839401245117,grad_norm: 0.9122851659699056, iteration: 120202
loss: 1.0706969499588013,grad_norm: 0.9999996794412874, iteration: 120203
loss: 1.0026050806045532,grad_norm: 0.8633436626178854, iteration: 120204
loss: 1.0257693529129028,grad_norm: 0.8136611418445774, iteration: 120205
loss: 1.003453016281128,grad_norm: 0.9999992769402187, iteration: 120206
loss: 0.9978640079498291,grad_norm: 0.9999996280856022, iteration: 120207
loss: 1.0051335096359253,grad_norm: 0.9999992422354047, iteration: 120208
loss: 1.0142124891281128,grad_norm: 0.9029785801639229, iteration: 120209
loss: 1.0066733360290527,grad_norm: 0.976441522147126, iteration: 120210
loss: 0.9747799038887024,grad_norm: 0.9999990047343031, iteration: 120211
loss: 1.0240410566329956,grad_norm: 0.9095098775503044, iteration: 120212
loss: 1.024081826210022,grad_norm: 0.999999188580613, iteration: 120213
loss: 0.9762622117996216,grad_norm: 0.9999992494169124, iteration: 120214
loss: 1.0145093202590942,grad_norm: 0.9999990264845562, iteration: 120215
loss: 1.0051450729370117,grad_norm: 0.9999990521224879, iteration: 120216
loss: 1.0620474815368652,grad_norm: 0.9999995543401804, iteration: 120217
loss: 0.9908133149147034,grad_norm: 0.9578523346523609, iteration: 120218
loss: 1.015062928199768,grad_norm: 0.9999991985822532, iteration: 120219
loss: 1.0003632307052612,grad_norm: 0.9999991266226138, iteration: 120220
loss: 0.9968861937522888,grad_norm: 0.9052129009199882, iteration: 120221
loss: 1.0609101057052612,grad_norm: 0.9999991456548959, iteration: 120222
loss: 1.0052882432937622,grad_norm: 0.9960676075161231, iteration: 120223
loss: 1.066711187362671,grad_norm: 0.9999991468853663, iteration: 120224
loss: 1.0324082374572754,grad_norm: 0.9999997676348171, iteration: 120225
loss: 0.9944750666618347,grad_norm: 0.9999991663197035, iteration: 120226
loss: 0.9564436078071594,grad_norm: 0.9999990613823376, iteration: 120227
loss: 1.0335465669631958,grad_norm: 0.9999991846333879, iteration: 120228
loss: 0.9597980976104736,grad_norm: 0.9820436033046821, iteration: 120229
loss: 1.0175575017929077,grad_norm: 0.999999946792873, iteration: 120230
loss: 0.9598091244697571,grad_norm: 0.9999991536483688, iteration: 120231
loss: 1.019722580909729,grad_norm: 0.9469195603081868, iteration: 120232
loss: 1.0081374645233154,grad_norm: 0.9999991209335906, iteration: 120233
loss: 1.021540641784668,grad_norm: 0.9999991629051612, iteration: 120234
loss: 1.0515694618225098,grad_norm: 0.9999991467123274, iteration: 120235
loss: 1.0445828437805176,grad_norm: 0.9999991997270806, iteration: 120236
loss: 1.0143839120864868,grad_norm: 0.9190435915929088, iteration: 120237
loss: 0.9646972417831421,grad_norm: 0.9999991428305175, iteration: 120238
loss: 1.0222694873809814,grad_norm: 0.9999992985480314, iteration: 120239
loss: 1.025581955909729,grad_norm: 0.9999992380977273, iteration: 120240
loss: 1.0196751356124878,grad_norm: 0.9999996500765734, iteration: 120241
loss: 0.9675065279006958,grad_norm: 0.9999990362022039, iteration: 120242
loss: 0.993014395236969,grad_norm: 0.9999991047496501, iteration: 120243
loss: 1.0273560285568237,grad_norm: 0.999999250589718, iteration: 120244
loss: 0.9712063670158386,grad_norm: 0.9999991283843822, iteration: 120245
loss: 0.9679213166236877,grad_norm: 0.9778678588808372, iteration: 120246
loss: 0.9978786706924438,grad_norm: 0.9435568879517654, iteration: 120247
loss: 1.0101674795150757,grad_norm: 0.8751829627224307, iteration: 120248
loss: 1.0170131921768188,grad_norm: 0.9999992203290013, iteration: 120249
loss: 0.9787521958351135,grad_norm: 0.9461127871297844, iteration: 120250
loss: 1.010216474533081,grad_norm: 0.9999993299590647, iteration: 120251
loss: 0.9896767735481262,grad_norm: 0.9314843024308489, iteration: 120252
loss: 1.0063896179199219,grad_norm: 0.999999052733198, iteration: 120253
loss: 0.9974395632743835,grad_norm: 0.8649138825500983, iteration: 120254
loss: 0.9984385967254639,grad_norm: 0.9999991610612821, iteration: 120255
loss: 1.0595728158950806,grad_norm: 0.9999995677049098, iteration: 120256
loss: 0.9926177859306335,grad_norm: 0.9997357196414398, iteration: 120257
loss: 1.0308752059936523,grad_norm: 0.9999991016975968, iteration: 120258
loss: 1.0425831079483032,grad_norm: 0.9999997460563109, iteration: 120259
loss: 1.0102132558822632,grad_norm: 0.9999989495955142, iteration: 120260
loss: 0.997279703617096,grad_norm: 0.9999991134656728, iteration: 120261
loss: 1.0151827335357666,grad_norm: 0.8459499197295308, iteration: 120262
loss: 1.0238192081451416,grad_norm: 0.999998998259639, iteration: 120263
loss: 1.003478765487671,grad_norm: 0.9999991725466989, iteration: 120264
loss: 0.960244357585907,grad_norm: 0.9978962107354952, iteration: 120265
loss: 0.9874908328056335,grad_norm: 0.9999990374349521, iteration: 120266
loss: 0.9938617944717407,grad_norm: 0.8956815667993085, iteration: 120267
loss: 1.014871597290039,grad_norm: 0.935012650203526, iteration: 120268
loss: 0.9738004207611084,grad_norm: 0.999999100030098, iteration: 120269
loss: 1.010141134262085,grad_norm: 0.9999990901725503, iteration: 120270
loss: 1.0378265380859375,grad_norm: 0.9999991465961815, iteration: 120271
loss: 0.9906353950500488,grad_norm: 0.9425365695422468, iteration: 120272
loss: 0.9895392060279846,grad_norm: 0.9957035305359219, iteration: 120273
loss: 1.0197639465332031,grad_norm: 0.9999991414753893, iteration: 120274
loss: 1.0405092239379883,grad_norm: 0.9999995762261867, iteration: 120275
loss: 1.002742886543274,grad_norm: 0.9999989773940254, iteration: 120276
loss: 0.9552374482154846,grad_norm: 0.9409583454231604, iteration: 120277
loss: 0.9770715832710266,grad_norm: 0.9999990864582219, iteration: 120278
loss: 0.9985660314559937,grad_norm: 0.9999989230758264, iteration: 120279
loss: 0.9818471074104309,grad_norm: 0.9588388744267432, iteration: 120280
loss: 0.9965230226516724,grad_norm: 0.9999999436644833, iteration: 120281
loss: 1.0091440677642822,grad_norm: 0.9253872962069633, iteration: 120282
loss: 0.9886873960494995,grad_norm: 0.9999992458616469, iteration: 120283
loss: 0.9633833765983582,grad_norm: 0.95875634105331, iteration: 120284
loss: 1.0324541330337524,grad_norm: 0.9999991583609629, iteration: 120285
loss: 1.0248697996139526,grad_norm: 0.9999995029558179, iteration: 120286
loss: 1.0034794807434082,grad_norm: 0.9999991340395634, iteration: 120287
loss: 1.050418734550476,grad_norm: 0.9999990054199509, iteration: 120288
loss: 1.0143041610717773,grad_norm: 0.9999993577866565, iteration: 120289
loss: 1.0516492128372192,grad_norm: 0.9999991078151483, iteration: 120290
loss: 0.9459102749824524,grad_norm: 0.9999989780539164, iteration: 120291
loss: 1.0420327186584473,grad_norm: 0.9999996762725342, iteration: 120292
loss: 1.023025393486023,grad_norm: 0.9999990157550318, iteration: 120293
loss: 0.9810246825218201,grad_norm: 0.9878939839279631, iteration: 120294
loss: 1.0139071941375732,grad_norm: 0.9999991603095165, iteration: 120295
loss: 1.0430189371109009,grad_norm: 0.9999996426150941, iteration: 120296
loss: 0.9652002453804016,grad_norm: 0.9150414319433614, iteration: 120297
loss: 1.000142216682434,grad_norm: 0.9793802542800918, iteration: 120298
loss: 1.0144243240356445,grad_norm: 0.9906136949495338, iteration: 120299
loss: 0.9708654284477234,grad_norm: 0.9999990378512231, iteration: 120300
loss: 0.9678390622138977,grad_norm: 0.9940040423494163, iteration: 120301
loss: 0.9960122108459473,grad_norm: 0.9999992767557361, iteration: 120302
loss: 1.010224461555481,grad_norm: 0.9999992263604153, iteration: 120303
loss: 1.007589340209961,grad_norm: 0.9692344307784148, iteration: 120304
loss: 0.9937143325805664,grad_norm: 0.9999991567550197, iteration: 120305
loss: 0.9955200552940369,grad_norm: 0.9999991215141313, iteration: 120306
loss: 1.001338005065918,grad_norm: 0.9292781098005589, iteration: 120307
loss: 0.9947124719619751,grad_norm: 0.9999991967771266, iteration: 120308
loss: 0.995897650718689,grad_norm: 0.8918823220378745, iteration: 120309
loss: 1.0036654472351074,grad_norm: 0.9999992263085873, iteration: 120310
loss: 1.0126523971557617,grad_norm: 0.9999989662853886, iteration: 120311
loss: 0.9869236350059509,grad_norm: 0.8483998732832052, iteration: 120312
loss: 1.002564549446106,grad_norm: 0.9999991005340623, iteration: 120313
loss: 1.020471215248108,grad_norm: 0.9999990165543062, iteration: 120314
loss: 1.0060008764266968,grad_norm: 0.9999990131741833, iteration: 120315
loss: 1.0288174152374268,grad_norm: 0.9999991605699425, iteration: 120316
loss: 1.0334137678146362,grad_norm: 0.9999992584115787, iteration: 120317
loss: 0.9870975017547607,grad_norm: 0.999999141828921, iteration: 120318
loss: 0.98337322473526,grad_norm: 0.9999991216498328, iteration: 120319
loss: 1.1343415975570679,grad_norm: 0.9999992868812372, iteration: 120320
loss: 1.0130794048309326,grad_norm: 0.9999992802757739, iteration: 120321
loss: 0.975355327129364,grad_norm: 0.9999992566725879, iteration: 120322
loss: 0.9917526245117188,grad_norm: 0.9999991380601274, iteration: 120323
loss: 0.9999288320541382,grad_norm: 0.9372204551153849, iteration: 120324
loss: 1.0012805461883545,grad_norm: 0.9999991397808751, iteration: 120325
loss: 0.986364483833313,grad_norm: 0.9999990707070082, iteration: 120326
loss: 1.0113779306411743,grad_norm: 0.9282049183940791, iteration: 120327
loss: 0.9814632534980774,grad_norm: 0.9115517912973012, iteration: 120328
loss: 1.0197696685791016,grad_norm: 0.9999991196927228, iteration: 120329
loss: 1.0173587799072266,grad_norm: 0.9553225318382024, iteration: 120330
loss: 1.052266240119934,grad_norm: 0.9999992267913758, iteration: 120331
loss: 1.0197380781173706,grad_norm: 0.9999991896092997, iteration: 120332
loss: 1.0077251195907593,grad_norm: 0.999999076832338, iteration: 120333
loss: 1.0032868385314941,grad_norm: 0.9999992162732354, iteration: 120334
loss: 0.9637889862060547,grad_norm: 0.8815504495313654, iteration: 120335
loss: 1.0734115839004517,grad_norm: 0.9999993618964668, iteration: 120336
loss: 0.9851173162460327,grad_norm: 0.999999066614171, iteration: 120337
loss: 0.9658505916595459,grad_norm: 0.9310532688801789, iteration: 120338
loss: 1.0318742990493774,grad_norm: 0.9999996048665292, iteration: 120339
loss: 1.005443811416626,grad_norm: 0.9999991196111861, iteration: 120340
loss: 1.0195683240890503,grad_norm: 0.9999997832737468, iteration: 120341
loss: 0.9352605938911438,grad_norm: 0.9049435412646755, iteration: 120342
loss: 0.9890295267105103,grad_norm: 0.9999990340691819, iteration: 120343
loss: 1.0000224113464355,grad_norm: 0.9999992452149393, iteration: 120344
loss: 1.0120763778686523,grad_norm: 0.9928158674546781, iteration: 120345
loss: 1.0111786127090454,grad_norm: 0.9999991396466918, iteration: 120346
loss: 1.0215420722961426,grad_norm: 0.9999990794182874, iteration: 120347
loss: 0.9907340407371521,grad_norm: 0.9999990642185769, iteration: 120348
loss: 1.010594367980957,grad_norm: 0.979907894335697, iteration: 120349
loss: 0.9864522814750671,grad_norm: 0.9999990080984471, iteration: 120350
loss: 1.0137616395950317,grad_norm: 0.9999992569191501, iteration: 120351
loss: 1.008453607559204,grad_norm: 0.9477267761811822, iteration: 120352
loss: 1.0236057043075562,grad_norm: 0.9999991497048439, iteration: 120353
loss: 1.007520079612732,grad_norm: 0.9738549104614836, iteration: 120354
loss: 1.024778962135315,grad_norm: 0.9999991573888238, iteration: 120355
loss: 1.0112581253051758,grad_norm: 0.999999485490725, iteration: 120356
loss: 1.0581051111221313,grad_norm: 0.9999995892626631, iteration: 120357
loss: 1.0153007507324219,grad_norm: 0.999999177868022, iteration: 120358
loss: 1.0187695026397705,grad_norm: 0.999999143086169, iteration: 120359
loss: 0.96265709400177,grad_norm: 0.9614140471175183, iteration: 120360
loss: 0.9993468523025513,grad_norm: 0.9999989957803911, iteration: 120361
loss: 0.96605384349823,grad_norm: 0.9999988992111523, iteration: 120362
loss: 1.0103378295898438,grad_norm: 0.9999991581216777, iteration: 120363
loss: 1.0027962923049927,grad_norm: 0.9999992022656423, iteration: 120364
loss: 0.9919926524162292,grad_norm: 0.946557669771587, iteration: 120365
loss: 1.0224575996398926,grad_norm: 0.9999991153987923, iteration: 120366
loss: 0.9960108399391174,grad_norm: 0.9270905904882921, iteration: 120367
loss: 0.9971129298210144,grad_norm: 0.9999993717467467, iteration: 120368
loss: 0.9763953685760498,grad_norm: 0.9148492604188746, iteration: 120369
loss: 0.9760758876800537,grad_norm: 0.9999991104981997, iteration: 120370
loss: 0.9948013424873352,grad_norm: 0.9999990426342109, iteration: 120371
loss: 1.0320318937301636,grad_norm: 0.9999992306107567, iteration: 120372
loss: 0.9855338931083679,grad_norm: 0.8818228739535564, iteration: 120373
loss: 0.9970445036888123,grad_norm: 0.9999991536485506, iteration: 120374
loss: 0.9957786202430725,grad_norm: 0.8676716436860765, iteration: 120375
loss: 1.0017890930175781,grad_norm: 0.9999990298716515, iteration: 120376
loss: 0.9865503907203674,grad_norm: 0.9255385523876067, iteration: 120377
loss: 0.9906126260757446,grad_norm: 0.8594660702094982, iteration: 120378
loss: 0.9581224322319031,grad_norm: 0.999999157182426, iteration: 120379
loss: 0.9564051628112793,grad_norm: 0.8991893880036946, iteration: 120380
loss: 0.9754498600959778,grad_norm: 0.9999989888027582, iteration: 120381
loss: 1.0561331510543823,grad_norm: 0.9999994262229939, iteration: 120382
loss: 1.0190935134887695,grad_norm: 0.9999990493023084, iteration: 120383
loss: 1.0000735521316528,grad_norm: 0.9999990856709796, iteration: 120384
loss: 0.9917051792144775,grad_norm: 0.9802480309124748, iteration: 120385
loss: 1.0095270872116089,grad_norm: 0.9241790963726877, iteration: 120386
loss: 0.9683730006217957,grad_norm: 0.9999991683892044, iteration: 120387
loss: 1.0186744928359985,grad_norm: 0.9601507408720226, iteration: 120388
loss: 0.96689772605896,grad_norm: 0.9999992636764061, iteration: 120389
loss: 0.9748489260673523,grad_norm: 0.9999991702994602, iteration: 120390
loss: 1.0109857320785522,grad_norm: 0.9999992430709408, iteration: 120391
loss: 1.0160322189331055,grad_norm: 0.9999991585912849, iteration: 120392
loss: 1.0079188346862793,grad_norm: 0.9710197848338966, iteration: 120393
loss: 0.9911029934883118,grad_norm: 0.8568081354341186, iteration: 120394
loss: 1.0331624746322632,grad_norm: 0.9467620159203712, iteration: 120395
loss: 1.0118356943130493,grad_norm: 0.9999990931783225, iteration: 120396
loss: 0.9821345806121826,grad_norm: 0.9999992828310349, iteration: 120397
loss: 0.9648170471191406,grad_norm: 0.9880918351180588, iteration: 120398
loss: 1.0200684070587158,grad_norm: 0.9999990614215541, iteration: 120399
loss: 1.0123594999313354,grad_norm: 0.9952048019969088, iteration: 120400
loss: 0.990523099899292,grad_norm: 0.9640821813869588, iteration: 120401
loss: 0.983651340007782,grad_norm: 0.9548379599571701, iteration: 120402
loss: 0.9989697337150574,grad_norm: 0.9657830533471409, iteration: 120403
loss: 0.9925049543380737,grad_norm: 0.9887896112871657, iteration: 120404
loss: 1.0022754669189453,grad_norm: 0.9512179899588341, iteration: 120405
loss: 1.0123752355575562,grad_norm: 0.9999990683591712, iteration: 120406
loss: 0.9448148608207703,grad_norm: 0.9999990007754139, iteration: 120407
loss: 0.9536605477333069,grad_norm: 0.9999991719467468, iteration: 120408
loss: 1.0526549816131592,grad_norm: 0.9999992712654305, iteration: 120409
loss: 0.9729894399642944,grad_norm: 0.9999990927797664, iteration: 120410
loss: 1.0101962089538574,grad_norm: 0.9999990343048061, iteration: 120411
loss: 1.0136003494262695,grad_norm: 0.9337529206342419, iteration: 120412
loss: 1.001667857170105,grad_norm: 0.9999991397870577, iteration: 120413
loss: 0.9786887764930725,grad_norm: 0.9999991466304686, iteration: 120414
loss: 0.981528103351593,grad_norm: 0.8478807128803817, iteration: 120415
loss: 1.0152332782745361,grad_norm: 0.9999991154181949, iteration: 120416
loss: 1.0121262073516846,grad_norm: 0.999999219703829, iteration: 120417
loss: 1.0089192390441895,grad_norm: 0.9238150786975936, iteration: 120418
loss: 0.9696826934814453,grad_norm: 0.9999989499230088, iteration: 120419
loss: 0.9982024431228638,grad_norm: 0.939058320351791, iteration: 120420
loss: 0.9907812476158142,grad_norm: 0.9999992599980354, iteration: 120421
loss: 1.016782522201538,grad_norm: 0.9999992547838469, iteration: 120422
loss: 1.000968337059021,grad_norm: 0.999999198222318, iteration: 120423
loss: 1.0430800914764404,grad_norm: 0.9999995204461388, iteration: 120424
loss: 1.0275394916534424,grad_norm: 0.9999990679125086, iteration: 120425
loss: 1.0045818090438843,grad_norm: 0.999999096748906, iteration: 120426
loss: 1.0019935369491577,grad_norm: 0.9999990889879962, iteration: 120427
loss: 1.008029580116272,grad_norm: 0.8719334499042766, iteration: 120428
loss: 1.0020098686218262,grad_norm: 0.784326413884914, iteration: 120429
loss: 1.006555199623108,grad_norm: 0.9999990826978481, iteration: 120430
loss: 0.9762564301490784,grad_norm: 0.9999992581446246, iteration: 120431
loss: 0.988836944103241,grad_norm: 0.8938820942141532, iteration: 120432
loss: 1.0046873092651367,grad_norm: 0.9999991653301206, iteration: 120433
loss: 0.9733920097351074,grad_norm: 0.9999994780493989, iteration: 120434
loss: 1.018821358680725,grad_norm: 0.9263264305027573, iteration: 120435
loss: 0.9743139147758484,grad_norm: 0.9999990985495163, iteration: 120436
loss: 0.9943088889122009,grad_norm: 0.9999990693190832, iteration: 120437
loss: 1.006625771522522,grad_norm: 0.9999991802908601, iteration: 120438
loss: 1.01585853099823,grad_norm: 0.9999992765742823, iteration: 120439
loss: 0.9645858407020569,grad_norm: 0.9999991270280397, iteration: 120440
loss: 0.9812286496162415,grad_norm: 0.9999991328357762, iteration: 120441
loss: 1.0116151571273804,grad_norm: 0.9999992773858437, iteration: 120442
loss: 1.009331464767456,grad_norm: 0.9999998469872684, iteration: 120443
loss: 0.9868443012237549,grad_norm: 0.9497205057171151, iteration: 120444
loss: 1.0093709230422974,grad_norm: 0.9999991510655821, iteration: 120445
loss: 0.986589252948761,grad_norm: 0.9417547333657865, iteration: 120446
loss: 1.0221582651138306,grad_norm: 0.8011537026647938, iteration: 120447
loss: 0.986927330493927,grad_norm: 0.9576989095060818, iteration: 120448
loss: 1.0072232484817505,grad_norm: 0.9999990622306139, iteration: 120449
loss: 1.001343011856079,grad_norm: 0.9412231604754233, iteration: 120450
loss: 0.994273841381073,grad_norm: 0.9999991873207236, iteration: 120451
loss: 1.0058026313781738,grad_norm: 0.9714922314731536, iteration: 120452
loss: 0.986243724822998,grad_norm: 0.9217882807898841, iteration: 120453
loss: 1.027673602104187,grad_norm: 0.9252601044089185, iteration: 120454
loss: 1.0219144821166992,grad_norm: 0.9999993060619685, iteration: 120455
loss: 1.177653431892395,grad_norm: 0.9999993267337086, iteration: 120456
loss: 0.9970108866691589,grad_norm: 0.9999991589452534, iteration: 120457
loss: 0.9980292916297913,grad_norm: 0.9999990672050977, iteration: 120458
loss: 0.9849963784217834,grad_norm: 0.9999992659078812, iteration: 120459
loss: 1.0158659219741821,grad_norm: 0.9999992890529845, iteration: 120460
loss: 0.9783437848091125,grad_norm: 0.9999990574340926, iteration: 120461
loss: 1.0175843238830566,grad_norm: 0.9999992635086521, iteration: 120462
loss: 0.9649998545646667,grad_norm: 0.9999991073891812, iteration: 120463
loss: 1.0127716064453125,grad_norm: 0.999999145220671, iteration: 120464
loss: 1.0513935089111328,grad_norm: 0.9999996827589831, iteration: 120465
loss: 1.029834508895874,grad_norm: 0.9117727285138731, iteration: 120466
loss: 0.9676533341407776,grad_norm: 0.9999989590492963, iteration: 120467
loss: 1.021355152130127,grad_norm: 0.9026030803684799, iteration: 120468
loss: 0.9977196455001831,grad_norm: 0.9999990656599353, iteration: 120469
loss: 1.0311403274536133,grad_norm: 0.9999990579885933, iteration: 120470
loss: 0.986492395401001,grad_norm: 0.8868072472155033, iteration: 120471
loss: 0.9967782497406006,grad_norm: 0.9999993912999184, iteration: 120472
loss: 1.0731793642044067,grad_norm: 0.9999996112624968, iteration: 120473
loss: 0.9841026663780212,grad_norm: 0.9999992736119919, iteration: 120474
loss: 0.9973576068878174,grad_norm: 0.999999085728808, iteration: 120475
loss: 1.0022386312484741,grad_norm: 0.999998970398231, iteration: 120476
loss: 1.0138057470321655,grad_norm: 0.9999990269354428, iteration: 120477
loss: 1.0221251249313354,grad_norm: 0.9976787986740991, iteration: 120478
loss: 1.0124905109405518,grad_norm: 0.9999993767364223, iteration: 120479
loss: 0.9579490423202515,grad_norm: 0.9999991461024653, iteration: 120480
loss: 1.0065748691558838,grad_norm: 0.9674895788680258, iteration: 120481
loss: 0.9966948628425598,grad_norm: 0.999999106147472, iteration: 120482
loss: 1.0403556823730469,grad_norm: 0.9999990881936868, iteration: 120483
loss: 0.9738427996635437,grad_norm: 0.999999124391345, iteration: 120484
loss: 1.0131232738494873,grad_norm: 0.9514019833686552, iteration: 120485
loss: 0.9871190190315247,grad_norm: 0.964392648050671, iteration: 120486
loss: 1.0308054685592651,grad_norm: 0.9999991297920751, iteration: 120487
loss: 1.0134862661361694,grad_norm: 0.9999989824494812, iteration: 120488
loss: 0.9993610382080078,grad_norm: 0.9999990736721434, iteration: 120489
loss: 1.0084481239318848,grad_norm: 0.9999991745318042, iteration: 120490
loss: 1.03653085231781,grad_norm: 0.9999996503814357, iteration: 120491
loss: 1.008144736289978,grad_norm: 0.9999992670204735, iteration: 120492
loss: 1.000479817390442,grad_norm: 0.9999991461104228, iteration: 120493
loss: 1.014062523841858,grad_norm: 0.9748972315173152, iteration: 120494
loss: 1.006259560585022,grad_norm: 0.9868155504867463, iteration: 120495
loss: 0.9791665077209473,grad_norm: 0.9999991574715859, iteration: 120496
loss: 0.9786513447761536,grad_norm: 0.8274081387772743, iteration: 120497
loss: 0.9853646755218506,grad_norm: 0.9629280940500358, iteration: 120498
loss: 0.987465500831604,grad_norm: 0.8494372132998321, iteration: 120499
loss: 0.9883191585540771,grad_norm: 0.9999990372774551, iteration: 120500
loss: 0.9940906167030334,grad_norm: 0.9999990655220836, iteration: 120501
loss: 0.9843655824661255,grad_norm: 0.999999180491806, iteration: 120502
loss: 0.9805265069007874,grad_norm: 0.9999991299477717, iteration: 120503
loss: 0.9983776807785034,grad_norm: 0.9484243229479901, iteration: 120504
loss: 1.0106635093688965,grad_norm: 0.9999993450610044, iteration: 120505
loss: 1.005750298500061,grad_norm: 0.9621168771142232, iteration: 120506
loss: 0.9777793288230896,grad_norm: 0.9999992811224716, iteration: 120507
loss: 1.0182223320007324,grad_norm: 0.9999991566806595, iteration: 120508
loss: 1.0501173734664917,grad_norm: 0.9999992406492058, iteration: 120509
loss: 0.9765612483024597,grad_norm: 0.9421421827192433, iteration: 120510
loss: 1.0607054233551025,grad_norm: 0.9999996617899575, iteration: 120511
loss: 1.0153354406356812,grad_norm: 0.9999991348045486, iteration: 120512
loss: 0.9676525592803955,grad_norm: 0.961298090784258, iteration: 120513
loss: 0.9860955476760864,grad_norm: 0.9999991318482241, iteration: 120514
loss: 1.0413103103637695,grad_norm: 0.9999993930777067, iteration: 120515
loss: 0.991724967956543,grad_norm: 0.9030214069170361, iteration: 120516
loss: 1.003895878791809,grad_norm: 0.9702322986832173, iteration: 120517
loss: 0.9928320646286011,grad_norm: 0.9999993587419704, iteration: 120518
loss: 1.0049997568130493,grad_norm: 0.9934746592173684, iteration: 120519
loss: 1.008050799369812,grad_norm: 0.9999992413938932, iteration: 120520
loss: 0.9726176261901855,grad_norm: 0.946411005098496, iteration: 120521
loss: 0.9960105419158936,grad_norm: 0.9836585227199457, iteration: 120522
loss: 1.011483073234558,grad_norm: 0.9939263665047106, iteration: 120523
loss: 1.0254846811294556,grad_norm: 0.9999994067698241, iteration: 120524
loss: 1.3680747747421265,grad_norm: 0.9999990296730138, iteration: 120525
loss: 1.0336066484451294,grad_norm: 0.9999990301606415, iteration: 120526
loss: 1.0335537195205688,grad_norm: 0.999999015886684, iteration: 120527
loss: 0.9970325231552124,grad_norm: 0.9999991703858305, iteration: 120528
loss: 1.0024601221084595,grad_norm: 0.9999991294588709, iteration: 120529
loss: 0.9841756224632263,grad_norm: 0.9999990233853503, iteration: 120530
loss: 1.089916706085205,grad_norm: 0.9999995841105466, iteration: 120531
loss: 0.9535124897956848,grad_norm: 0.9664261594536996, iteration: 120532
loss: 0.9778215289115906,grad_norm: 0.9999991147682618, iteration: 120533
loss: 1.001848578453064,grad_norm: 0.8667878940109698, iteration: 120534
loss: 0.9880185723304749,grad_norm: 0.9879655503476106, iteration: 120535
loss: 0.9761805534362793,grad_norm: 0.8607966526219348, iteration: 120536
loss: 0.9544644951820374,grad_norm: 0.9999991953566224, iteration: 120537
loss: 0.960751473903656,grad_norm: 0.8755981026188878, iteration: 120538
loss: 0.9928465485572815,grad_norm: 0.9999989809280617, iteration: 120539
loss: 1.1371909379959106,grad_norm: 0.9999998098168639, iteration: 120540
loss: 0.9997031688690186,grad_norm: 0.9596920178909902, iteration: 120541
loss: 0.9901295304298401,grad_norm: 0.8807397205564425, iteration: 120542
loss: 1.0140138864517212,grad_norm: 0.9808900131078642, iteration: 120543
loss: 1.0335415601730347,grad_norm: 0.9999990711648141, iteration: 120544
loss: 0.9590432047843933,grad_norm: 0.9999993874224494, iteration: 120545
loss: 1.0373058319091797,grad_norm: 0.9999993583879684, iteration: 120546
loss: 0.9772850871086121,grad_norm: 0.9999991630242409, iteration: 120547
loss: 0.9813637733459473,grad_norm: 0.9993654410872852, iteration: 120548
loss: 0.9727134108543396,grad_norm: 0.9999991454388001, iteration: 120549
loss: 1.0171916484832764,grad_norm: 0.9423053294020751, iteration: 120550
loss: 1.0003092288970947,grad_norm: 0.9999993626463517, iteration: 120551
loss: 0.9962599277496338,grad_norm: 0.9999990467617322, iteration: 120552
loss: 0.9983838200569153,grad_norm: 0.9999989108225578, iteration: 120553
loss: 1.0036834478378296,grad_norm: 0.9733904103129011, iteration: 120554
loss: 0.9843645095825195,grad_norm: 0.8825462613789012, iteration: 120555
loss: 0.9906965494155884,grad_norm: 0.9999991543822767, iteration: 120556
loss: 0.9954413771629333,grad_norm: 0.9999990612419094, iteration: 120557
loss: 1.0016186237335205,grad_norm: 0.9999998901055973, iteration: 120558
loss: 1.0260615348815918,grad_norm: 0.999624541886154, iteration: 120559
loss: 1.020517349243164,grad_norm: 0.9999991729133973, iteration: 120560
loss: 0.975823700428009,grad_norm: 0.9999993528810383, iteration: 120561
loss: 1.006170630455017,grad_norm: 0.9999995874353077, iteration: 120562
loss: 1.015306830406189,grad_norm: 0.9999993796450566, iteration: 120563
loss: 0.9683489203453064,grad_norm: 0.9999989601835093, iteration: 120564
loss: 1.0003812313079834,grad_norm: 0.999999250590985, iteration: 120565
loss: 1.0249426364898682,grad_norm: 0.9999990686336343, iteration: 120566
loss: 1.0048152208328247,grad_norm: 0.9295845945811773, iteration: 120567
loss: 0.9857867956161499,grad_norm: 0.9999991339699376, iteration: 120568
loss: 0.9623014330863953,grad_norm: 0.999999052529225, iteration: 120569
loss: 0.9741801619529724,grad_norm: 0.9999989736310129, iteration: 120570
loss: 0.9968832731246948,grad_norm: 0.9999991810082227, iteration: 120571
loss: 1.0084614753723145,grad_norm: 0.9736827755878258, iteration: 120572
loss: 0.9869270920753479,grad_norm: 0.914477736532401, iteration: 120573
loss: 1.000946283340454,grad_norm: 0.9999990174029791, iteration: 120574
loss: 1.0419338941574097,grad_norm: 0.9999994503330687, iteration: 120575
loss: 1.0459493398666382,grad_norm: 0.9999992891184162, iteration: 120576
loss: 1.023453950881958,grad_norm: 0.9999993785110877, iteration: 120577
loss: 1.019538164138794,grad_norm: 0.9999991263162672, iteration: 120578
loss: 1.0160853862762451,grad_norm: 0.9999992027925694, iteration: 120579
loss: 1.012274980545044,grad_norm: 0.9999991735485533, iteration: 120580
loss: 1.0145748853683472,grad_norm: 0.9466730493052574, iteration: 120581
loss: 0.994365394115448,grad_norm: 0.9999992738608736, iteration: 120582
loss: 1.0167254209518433,grad_norm: 0.9999992552869241, iteration: 120583
loss: 1.049710988998413,grad_norm: 0.9999997543169301, iteration: 120584
loss: 1.0158954858779907,grad_norm: 0.9999990574467615, iteration: 120585
loss: 1.005759596824646,grad_norm: 0.9798100795372171, iteration: 120586
loss: 0.9984363913536072,grad_norm: 0.999999687432788, iteration: 120587
loss: 1.0139051675796509,grad_norm: 0.9999989427791115, iteration: 120588
loss: 0.9862696528434753,grad_norm: 0.9999990764013077, iteration: 120589
loss: 1.0149815082550049,grad_norm: 0.9999991001676324, iteration: 120590
loss: 0.9772234559059143,grad_norm: 0.9999989897473387, iteration: 120591
loss: 0.9929158687591553,grad_norm: 0.9224352410685415, iteration: 120592
loss: 0.9801843762397766,grad_norm: 0.9999991197529015, iteration: 120593
loss: 1.0087552070617676,grad_norm: 0.9999991661686367, iteration: 120594
loss: 0.9914681315422058,grad_norm: 0.9849260921069991, iteration: 120595
loss: 0.9735307097434998,grad_norm: 0.8939649227356422, iteration: 120596
loss: 1.016109585762024,grad_norm: 0.9999991190881626, iteration: 120597
loss: 1.0151410102844238,grad_norm: 0.9360825736296411, iteration: 120598
loss: 1.003643274307251,grad_norm: 0.9999991434628386, iteration: 120599
loss: 1.007247805595398,grad_norm: 0.9999993463421446, iteration: 120600
loss: 0.9659309983253479,grad_norm: 0.9307749977258373, iteration: 120601
loss: 1.0006277561187744,grad_norm: 0.9999991995277124, iteration: 120602
loss: 0.9745941758155823,grad_norm: 0.9500137971110699, iteration: 120603
loss: 1.0056185722351074,grad_norm: 0.9999991171865342, iteration: 120604
loss: 1.0137442350387573,grad_norm: 0.9209405640702875, iteration: 120605
loss: 0.9741844534873962,grad_norm: 0.999999121621897, iteration: 120606
loss: 1.0376372337341309,grad_norm: 0.9999992045494185, iteration: 120607
loss: 1.0430430173873901,grad_norm: 0.9999992842820659, iteration: 120608
loss: 1.0114530324935913,grad_norm: 0.9999991794181496, iteration: 120609
loss: 1.0149794816970825,grad_norm: 0.9214593415797342, iteration: 120610
loss: 1.015997290611267,grad_norm: 0.9535398071784705, iteration: 120611
loss: 1.037428379058838,grad_norm: 0.9999991085869012, iteration: 120612
loss: 1.0098581314086914,grad_norm: 0.955058626018752, iteration: 120613
loss: 0.993863046169281,grad_norm: 0.8669608876292686, iteration: 120614
loss: 0.9929630160331726,grad_norm: 0.9999989604409576, iteration: 120615
loss: 1.0520997047424316,grad_norm: 0.9999990369378353, iteration: 120616
loss: 1.026728868484497,grad_norm: 0.9962805227661758, iteration: 120617
loss: 0.9997047781944275,grad_norm: 0.9999991638940928, iteration: 120618
loss: 1.0088595151901245,grad_norm: 0.9999993024324042, iteration: 120619
loss: 1.010360598564148,grad_norm: 0.999999201401861, iteration: 120620
loss: 1.019000768661499,grad_norm: 0.9999992143557244, iteration: 120621
loss: 0.9812104105949402,grad_norm: 0.9999991765947873, iteration: 120622
loss: 0.9444892406463623,grad_norm: 0.9999990552315763, iteration: 120623
loss: 0.9950273036956787,grad_norm: 0.9999992773072048, iteration: 120624
loss: 0.9623804688453674,grad_norm: 0.999998940675814, iteration: 120625
loss: 1.0303875207901,grad_norm: 0.9999991226677802, iteration: 120626
loss: 1.103468656539917,grad_norm: 0.9999989175981414, iteration: 120627
loss: 1.0041333436965942,grad_norm: 0.9462302675553347, iteration: 120628
loss: 1.0227402448654175,grad_norm: 0.9999989787388592, iteration: 120629
loss: 0.9495748281478882,grad_norm: 0.9999990696664716, iteration: 120630
loss: 1.0068318843841553,grad_norm: 0.9999991439326782, iteration: 120631
loss: 0.9630059003829956,grad_norm: 0.9999993384004049, iteration: 120632
loss: 1.0149768590927124,grad_norm: 0.9999991944984903, iteration: 120633
loss: 1.0313726663589478,grad_norm: 0.9999991935538771, iteration: 120634
loss: 0.9975696206092834,grad_norm: 0.9999990322397692, iteration: 120635
loss: 0.9833623170852661,grad_norm: 0.9999992703951232, iteration: 120636
loss: 0.9702085256576538,grad_norm: 0.912463968034158, iteration: 120637
loss: 1.0207951068878174,grad_norm: 0.9565166403654894, iteration: 120638
loss: 0.9767501354217529,grad_norm: 0.8949750561217905, iteration: 120639
loss: 0.9868132472038269,grad_norm: 0.9999991687937534, iteration: 120640
loss: 1.0121515989303589,grad_norm: 0.9655280551984098, iteration: 120641
loss: 1.0217384099960327,grad_norm: 0.9999989431841116, iteration: 120642
loss: 1.0263108015060425,grad_norm: 0.9999991119892131, iteration: 120643
loss: 1.0053430795669556,grad_norm: 0.9008969646736446, iteration: 120644
loss: 1.0131433010101318,grad_norm: 0.9999995856753633, iteration: 120645
loss: 1.049211025238037,grad_norm: 0.9999992184835427, iteration: 120646
loss: 0.9770612120628357,grad_norm: 0.890426797283784, iteration: 120647
loss: 0.9969221949577332,grad_norm: 0.999999163397434, iteration: 120648
loss: 0.9720646142959595,grad_norm: 0.9999990641764268, iteration: 120649
loss: 0.9580926299095154,grad_norm: 0.902480998039771, iteration: 120650
loss: 0.963085949420929,grad_norm: 0.9366435565985332, iteration: 120651
loss: 1.0200053453445435,grad_norm: 0.9999992433607038, iteration: 120652
loss: 0.9917011857032776,grad_norm: 0.9060098931097893, iteration: 120653
loss: 0.9705894589424133,grad_norm: 0.9999992187309441, iteration: 120654
loss: 0.9845365881919861,grad_norm: 0.9999989943815042, iteration: 120655
loss: 1.002282738685608,grad_norm: 0.9999990127642784, iteration: 120656
loss: 0.9793893694877625,grad_norm: 0.9999992117326624, iteration: 120657
loss: 1.0104048252105713,grad_norm: 0.927221878127123, iteration: 120658
loss: 1.0449323654174805,grad_norm: 0.9999997184815043, iteration: 120659
loss: 0.9959049820899963,grad_norm: 0.9999997938033378, iteration: 120660
loss: 0.9771938323974609,grad_norm: 0.999999225699459, iteration: 120661
loss: 0.9818666577339172,grad_norm: 0.8828415079589914, iteration: 120662
loss: 1.01032555103302,grad_norm: 0.9553451860607424, iteration: 120663
loss: 0.9757209420204163,grad_norm: 0.8923777528505401, iteration: 120664
loss: 1.0321248769760132,grad_norm: 0.9193754824496647, iteration: 120665
loss: 0.9752301573753357,grad_norm: 0.9007187491537523, iteration: 120666
loss: 0.9947057366371155,grad_norm: 0.9999991703691457, iteration: 120667
loss: 0.9879165887832642,grad_norm: 0.971163998678357, iteration: 120668
loss: 1.0561376810073853,grad_norm: 0.9555758896132769, iteration: 120669
loss: 0.9812386631965637,grad_norm: 0.9933936173596377, iteration: 120670
loss: 0.9963278770446777,grad_norm: 0.8546575827906757, iteration: 120671
loss: 0.9970121383666992,grad_norm: 0.9999991895532859, iteration: 120672
loss: 0.9902578592300415,grad_norm: 0.9999991465336009, iteration: 120673
loss: 1.010184645652771,grad_norm: 0.885768199688049, iteration: 120674
loss: 0.9904708862304688,grad_norm: 0.9999989921738108, iteration: 120675
loss: 1.0306092500686646,grad_norm: 0.9999994254059, iteration: 120676
loss: 0.9942406415939331,grad_norm: 0.9999991251667459, iteration: 120677
loss: 1.0415737628936768,grad_norm: 0.999999286800471, iteration: 120678
loss: 0.9651339650154114,grad_norm: 0.9999991732552563, iteration: 120679
loss: 1.0115492343902588,grad_norm: 0.9999991860493547, iteration: 120680
loss: 0.9871329665184021,grad_norm: 0.9999994010561175, iteration: 120681
loss: 0.9742857217788696,grad_norm: 0.9398797167652662, iteration: 120682
loss: 0.9822816252708435,grad_norm: 0.9999991404392191, iteration: 120683
loss: 0.9939053058624268,grad_norm: 0.9999989469522967, iteration: 120684
loss: 0.9923364520072937,grad_norm: 0.8693308458905381, iteration: 120685
loss: 1.0150662660598755,grad_norm: 0.9999991036477961, iteration: 120686
loss: 0.976497232913971,grad_norm: 0.9433939569287597, iteration: 120687
loss: 0.9667948484420776,grad_norm: 0.9662765035986987, iteration: 120688
loss: 0.9794889688491821,grad_norm: 0.999998992904689, iteration: 120689
loss: 1.0193487405776978,grad_norm: 0.9805806497719698, iteration: 120690
loss: 1.011509656906128,grad_norm: 0.9999990582102011, iteration: 120691
loss: 0.9978223443031311,grad_norm: 0.9783563152938053, iteration: 120692
loss: 0.9993806481361389,grad_norm: 0.9999990855533251, iteration: 120693
loss: 0.9692466855049133,grad_norm: 0.7887590868742149, iteration: 120694
loss: 1.0342614650726318,grad_norm: 0.9416387467764445, iteration: 120695
loss: 1.0895196199417114,grad_norm: 0.9999992409114359, iteration: 120696
loss: 1.0112156867980957,grad_norm: 0.9022831073529592, iteration: 120697
loss: 1.0515409708023071,grad_norm: 0.9999992625435365, iteration: 120698
loss: 1.0157947540283203,grad_norm: 0.9999993613322182, iteration: 120699
loss: 0.9923595190048218,grad_norm: 0.9189703210172832, iteration: 120700
loss: 1.0097804069519043,grad_norm: 0.9999992159871197, iteration: 120701
loss: 1.0031417608261108,grad_norm: 0.9999992420532974, iteration: 120702
loss: 1.0262439250946045,grad_norm: 0.9999989885748404, iteration: 120703
loss: 0.995954692363739,grad_norm: 0.9999997306655275, iteration: 120704
loss: 0.9915423393249512,grad_norm: 0.9712946223724003, iteration: 120705
loss: 0.9945070743560791,grad_norm: 0.9999990132432219, iteration: 120706
loss: 0.9915452599525452,grad_norm: 0.9999989343344042, iteration: 120707
loss: 0.9672791957855225,grad_norm: 0.9217328907380608, iteration: 120708
loss: 1.0179753303527832,grad_norm: 0.9950697649462013, iteration: 120709
loss: 0.9783451557159424,grad_norm: 0.9999991935064118, iteration: 120710
loss: 0.9826740622520447,grad_norm: 0.9412089393386842, iteration: 120711
loss: 1.0504969358444214,grad_norm: 0.9999994765411268, iteration: 120712
loss: 1.0184811353683472,grad_norm: 0.9409946408830172, iteration: 120713
loss: 1.0206867456436157,grad_norm: 0.9575831131068702, iteration: 120714
loss: 1.0526013374328613,grad_norm: 0.9999993457518381, iteration: 120715
loss: 0.9959443211555481,grad_norm: 0.9999992057779861, iteration: 120716
loss: 0.9856745600700378,grad_norm: 0.9515868668826803, iteration: 120717
loss: 1.0166401863098145,grad_norm: 0.8524106041305176, iteration: 120718
loss: 1.0097061395645142,grad_norm: 0.9999991944497851, iteration: 120719
loss: 1.0204914808273315,grad_norm: 0.9999992523222533, iteration: 120720
loss: 1.0102061033248901,grad_norm: 0.9785126969052937, iteration: 120721
loss: 1.011962890625,grad_norm: 0.9999990562591181, iteration: 120722
loss: 0.9936978220939636,grad_norm: 0.9999991382392867, iteration: 120723
loss: 0.9715836644172668,grad_norm: 0.8717617725125179, iteration: 120724
loss: 0.9786025285720825,grad_norm: 0.9999990041262461, iteration: 120725
loss: 1.0088696479797363,grad_norm: 0.9999991585393702, iteration: 120726
loss: 0.9888176321983337,grad_norm: 0.9489356210240713, iteration: 120727
loss: 0.9845951199531555,grad_norm: 0.9999990742872307, iteration: 120728
loss: 1.0093051195144653,grad_norm: 0.9999990023051871, iteration: 120729
loss: 0.9868179559707642,grad_norm: 0.9999990335541692, iteration: 120730
loss: 1.0262279510498047,grad_norm: 0.9966797372473151, iteration: 120731
loss: 1.0112545490264893,grad_norm: 0.9999990483211273, iteration: 120732
loss: 1.0142781734466553,grad_norm: 0.8951558504921113, iteration: 120733
loss: 0.9915816187858582,grad_norm: 0.9999990379555151, iteration: 120734
loss: 1.0254641771316528,grad_norm: 0.8107996739087103, iteration: 120735
loss: 1.0139073133468628,grad_norm: 0.9999991092646188, iteration: 120736
loss: 1.0238592624664307,grad_norm: 0.9999991038627702, iteration: 120737
loss: 0.9896316528320312,grad_norm: 0.9999990386559463, iteration: 120738
loss: 0.9499627351760864,grad_norm: 0.9999989845216314, iteration: 120739
loss: 0.9985016584396362,grad_norm: 0.9569538223179919, iteration: 120740
loss: 0.9636321663856506,grad_norm: 0.9156816510101041, iteration: 120741
loss: 1.0139387845993042,grad_norm: 0.8491804226452286, iteration: 120742
loss: 1.006008505821228,grad_norm: 0.9870906545482837, iteration: 120743
loss: 1.0145630836486816,grad_norm: 0.9999990724717193, iteration: 120744
loss: 1.0114082098007202,grad_norm: 0.9999991651831331, iteration: 120745
loss: 1.0236514806747437,grad_norm: 0.9999992380966858, iteration: 120746
loss: 1.0794075727462769,grad_norm: 0.9999995597957136, iteration: 120747
loss: 0.9841282367706299,grad_norm: 0.9999991034856434, iteration: 120748
loss: 0.9876633286476135,grad_norm: 0.9556177965705267, iteration: 120749
loss: 0.9687743782997131,grad_norm: 0.999999133381432, iteration: 120750
loss: 0.9914591312408447,grad_norm: 0.9882498236151036, iteration: 120751
loss: 1.0136723518371582,grad_norm: 0.9999992176431536, iteration: 120752
loss: 0.9996808171272278,grad_norm: 0.9999990637882507, iteration: 120753
loss: 0.9718062877655029,grad_norm: 0.9999990763103004, iteration: 120754
loss: 1.0048251152038574,grad_norm: 0.9999994527220306, iteration: 120755
loss: 1.0077674388885498,grad_norm: 0.9912185614738347, iteration: 120756
loss: 0.9929612874984741,grad_norm: 0.8585471296598942, iteration: 120757
loss: 0.9632565379142761,grad_norm: 0.9999990363910027, iteration: 120758
loss: 0.9952388405799866,grad_norm: 0.9510315735711545, iteration: 120759
loss: 0.966644287109375,grad_norm: 0.9999989944105885, iteration: 120760
loss: 1.0268505811691284,grad_norm: 0.9999990345738095, iteration: 120761
loss: 0.9893296360969543,grad_norm: 0.973717122223534, iteration: 120762
loss: 1.0172382593154907,grad_norm: 0.9999990009163442, iteration: 120763
loss: 0.9999338388442993,grad_norm: 0.9999992412457449, iteration: 120764
loss: 0.9924166202545166,grad_norm: 0.999999053048226, iteration: 120765
loss: 0.9932113885879517,grad_norm: 0.9999991080985974, iteration: 120766
loss: 1.020247459411621,grad_norm: 0.9999992103809867, iteration: 120767
loss: 0.9986171126365662,grad_norm: 0.9935440251824234, iteration: 120768
loss: 1.004481554031372,grad_norm: 0.8825209223012325, iteration: 120769
loss: 1.0188297033309937,grad_norm: 0.9999992067180349, iteration: 120770
loss: 0.980073094367981,grad_norm: 0.9999990878046698, iteration: 120771
loss: 0.9922541379928589,grad_norm: 0.9999991453173339, iteration: 120772
loss: 1.0050171613693237,grad_norm: 0.9468544695167359, iteration: 120773
loss: 1.0321186780929565,grad_norm: 0.9588463029691748, iteration: 120774
loss: 1.0073601007461548,grad_norm: 0.9999998660016665, iteration: 120775
loss: 1.0126326084136963,grad_norm: 0.9999992264439909, iteration: 120776
loss: 1.0017927885055542,grad_norm: 0.9999991175270034, iteration: 120777
loss: 1.0102447271347046,grad_norm: 0.9999990998864888, iteration: 120778
loss: 1.0154304504394531,grad_norm: 0.9999991995359108, iteration: 120779
loss: 0.9903163909912109,grad_norm: 0.9465323190145289, iteration: 120780
loss: 1.0245122909545898,grad_norm: 0.9328288461523098, iteration: 120781
loss: 0.9733527302742004,grad_norm: 0.9479069521214345, iteration: 120782
loss: 1.0233311653137207,grad_norm: 0.9999991516004044, iteration: 120783
loss: 1.025437355041504,grad_norm: 0.9999990450727702, iteration: 120784
loss: 0.9948683977127075,grad_norm: 0.9999991183960858, iteration: 120785
loss: 1.0068650245666504,grad_norm: 0.9070358061067547, iteration: 120786
loss: 0.98014897108078,grad_norm: 0.9420500835638361, iteration: 120787
loss: 0.9932475686073303,grad_norm: 0.9999991036623751, iteration: 120788
loss: 1.0010566711425781,grad_norm: 0.9510128519817755, iteration: 120789
loss: 0.9867910742759705,grad_norm: 0.9934960604590463, iteration: 120790
loss: 1.0516380071640015,grad_norm: 0.9999992732031145, iteration: 120791
loss: 1.000643014907837,grad_norm: 0.9999991019807994, iteration: 120792
loss: 0.976125180721283,grad_norm: 0.9999991016389436, iteration: 120793
loss: 1.0060443878173828,grad_norm: 0.9479207294104167, iteration: 120794
loss: 0.9937065839767456,grad_norm: 0.9999991424657773, iteration: 120795
loss: 1.0219342708587646,grad_norm: 0.9999991288928197, iteration: 120796
loss: 0.9790856242179871,grad_norm: 0.9999994000607045, iteration: 120797
loss: 1.0249261856079102,grad_norm: 0.9999992111541783, iteration: 120798
loss: 1.0022817850112915,grad_norm: 0.9999992192655527, iteration: 120799
loss: 1.0030821561813354,grad_norm: 0.9999989456656841, iteration: 120800
loss: 1.0056530237197876,grad_norm: 0.8303633394239432, iteration: 120801
loss: 0.997120201587677,grad_norm: 0.9884083171829373, iteration: 120802
loss: 1.0422418117523193,grad_norm: 0.9999999395541379, iteration: 120803
loss: 1.008283019065857,grad_norm: 0.9269485215272316, iteration: 120804
loss: 0.9874584078788757,grad_norm: 0.9999991719809874, iteration: 120805
loss: 0.9863809943199158,grad_norm: 0.9999991247393775, iteration: 120806
loss: 0.9936681389808655,grad_norm: 0.9623589346517619, iteration: 120807
loss: 1.0045052766799927,grad_norm: 0.9647674389690408, iteration: 120808
loss: 0.9929957985877991,grad_norm: 0.9502472217135879, iteration: 120809
loss: 1.0380163192749023,grad_norm: 0.9999992221890128, iteration: 120810
loss: 1.0267984867095947,grad_norm: 0.9243717656868057, iteration: 120811
loss: 0.9859535098075867,grad_norm: 0.9933411509442834, iteration: 120812
loss: 0.996727705001831,grad_norm: 0.9999991601803451, iteration: 120813
loss: 0.9805471301078796,grad_norm: 0.9999990772420834, iteration: 120814
loss: 1.0123512744903564,grad_norm: 0.9999992155252138, iteration: 120815
loss: 1.0015027523040771,grad_norm: 0.9999990499726634, iteration: 120816
loss: 1.0038976669311523,grad_norm: 0.9278668194799451, iteration: 120817
loss: 0.9811586141586304,grad_norm: 0.9999998739193068, iteration: 120818
loss: 0.978190541267395,grad_norm: 0.9455567800826074, iteration: 120819
loss: 0.9814096689224243,grad_norm: 0.9999993798273833, iteration: 120820
loss: 0.9960594177246094,grad_norm: 0.9999991935749535, iteration: 120821
loss: 0.9678676724433899,grad_norm: 0.9999989841354354, iteration: 120822
loss: 1.0159574747085571,grad_norm: 0.9999990717877489, iteration: 120823
loss: 1.0501238107681274,grad_norm: 0.9815905004663432, iteration: 120824
loss: 1.03599214553833,grad_norm: 0.9191712767911251, iteration: 120825
loss: 0.9977597594261169,grad_norm: 0.9558473072446031, iteration: 120826
loss: 1.0184355974197388,grad_norm: 0.7799051756718604, iteration: 120827
loss: 0.9642087817192078,grad_norm: 0.9999990572856062, iteration: 120828
loss: 0.9933662414550781,grad_norm: 0.9251334600876332, iteration: 120829
loss: 0.9927852749824524,grad_norm: 0.9999990590638496, iteration: 120830
loss: 1.0031219720840454,grad_norm: 0.8731984466769424, iteration: 120831
loss: 1.0269471406936646,grad_norm: 0.9471515875141936, iteration: 120832
loss: 0.9999901056289673,grad_norm: 0.9999989999387531, iteration: 120833
loss: 0.9907272458076477,grad_norm: 0.9999991926612853, iteration: 120834
loss: 0.9863023161888123,grad_norm: 0.9999990919238783, iteration: 120835
loss: 1.009453535079956,grad_norm: 0.9999994630403481, iteration: 120836
loss: 1.0673034191131592,grad_norm: 0.999999304620043, iteration: 120837
loss: 1.146947979927063,grad_norm: 0.9999992303884044, iteration: 120838
loss: 0.9946988224983215,grad_norm: 0.999999295382411, iteration: 120839
loss: 1.013582706451416,grad_norm: 0.9999994787715798, iteration: 120840
loss: 1.0216107368469238,grad_norm: 0.9103662665559548, iteration: 120841
loss: 1.004412055015564,grad_norm: 0.9999992652747608, iteration: 120842
loss: 0.9875486493110657,grad_norm: 0.9698927029094969, iteration: 120843
loss: 1.005548357963562,grad_norm: 0.9999992106871319, iteration: 120844
loss: 0.9968817234039307,grad_norm: 0.9431631337311082, iteration: 120845
loss: 0.9743354320526123,grad_norm: 0.866146993650572, iteration: 120846
loss: 1.008009910583496,grad_norm: 0.9999992249084368, iteration: 120847
loss: 0.9798752665519714,grad_norm: 0.9999991597202803, iteration: 120848
loss: 1.1206793785095215,grad_norm: 0.9999993190644391, iteration: 120849
loss: 1.0092103481292725,grad_norm: 0.9999993016180369, iteration: 120850
loss: 1.0064321756362915,grad_norm: 0.9753093924519118, iteration: 120851
loss: 1.012850046157837,grad_norm: 0.9999989518884919, iteration: 120852
loss: 0.9488158226013184,grad_norm: 0.9476985871880235, iteration: 120853
loss: 1.0059006214141846,grad_norm: 0.999999287407408, iteration: 120854
loss: 1.0140790939331055,grad_norm: 0.9999991878895174, iteration: 120855
loss: 0.9945507645606995,grad_norm: 0.9999990110719381, iteration: 120856
loss: 0.995267927646637,grad_norm: 0.9741309656239353, iteration: 120857
loss: 1.0518224239349365,grad_norm: 0.999999765006527, iteration: 120858
loss: 1.0154904127120972,grad_norm: 0.9999991632709423, iteration: 120859
loss: 0.9863241910934448,grad_norm: 0.9058175664087319, iteration: 120860
loss: 1.002459168434143,grad_norm: 0.9999990731667712, iteration: 120861
loss: 0.986154317855835,grad_norm: 0.9668810989807387, iteration: 120862
loss: 1.0176301002502441,grad_norm: 0.9999996910444152, iteration: 120863
loss: 0.9861530065536499,grad_norm: 0.99120295416444, iteration: 120864
loss: 0.9953141808509827,grad_norm: 0.9999992164775914, iteration: 120865
loss: 1.0036489963531494,grad_norm: 0.9999990897884491, iteration: 120866
loss: 0.9955320358276367,grad_norm: 0.9052855806678705, iteration: 120867
loss: 0.9969430565834045,grad_norm: 0.9999991173670655, iteration: 120868
loss: 0.9958217740058899,grad_norm: 0.9761289474257917, iteration: 120869
loss: 1.029308795928955,grad_norm: 0.9599893833660976, iteration: 120870
loss: 1.0493288040161133,grad_norm: 0.9999997650955507, iteration: 120871
loss: 0.9952230453491211,grad_norm: 0.9999990067174822, iteration: 120872
loss: 0.9661576151847839,grad_norm: 0.9999990040308014, iteration: 120873
loss: 0.9777629375457764,grad_norm: 0.9634263414880923, iteration: 120874
loss: 1.020272135734558,grad_norm: 0.9999991811948072, iteration: 120875
loss: 1.0091884136199951,grad_norm: 0.8523691759640304, iteration: 120876
loss: 1.0276893377304077,grad_norm: 0.8109618713873106, iteration: 120877
loss: 1.0286283493041992,grad_norm: 0.9945894707337918, iteration: 120878
loss: 0.9848839044570923,grad_norm: 0.9999989997081634, iteration: 120879
loss: 1.0042195320129395,grad_norm: 0.9757086314170847, iteration: 120880
loss: 1.0126323699951172,grad_norm: 0.9761327069962014, iteration: 120881
loss: 1.0149544477462769,grad_norm: 0.9999991517060441, iteration: 120882
loss: 1.0087991952896118,grad_norm: 0.9999995061665584, iteration: 120883
loss: 0.9572000503540039,grad_norm: 0.9999991964518499, iteration: 120884
loss: 1.0124266147613525,grad_norm: 0.9999991754687356, iteration: 120885
loss: 1.0529450178146362,grad_norm: 0.9979860783116421, iteration: 120886
loss: 1.0497745275497437,grad_norm: 0.9999994024081232, iteration: 120887
loss: 0.9991832971572876,grad_norm: 0.9168217180500391, iteration: 120888
loss: 1.0907613039016724,grad_norm: 0.9999996097264187, iteration: 120889
loss: 0.9942980408668518,grad_norm: 0.9999992071199765, iteration: 120890
loss: 1.014143705368042,grad_norm: 0.9981675791099118, iteration: 120891
loss: 0.9972662329673767,grad_norm: 0.9043685222935285, iteration: 120892
loss: 1.0191415548324585,grad_norm: 0.9999992413674288, iteration: 120893
loss: 1.0226306915283203,grad_norm: 0.9999989939655276, iteration: 120894
loss: 0.9668534398078918,grad_norm: 0.9644513187947159, iteration: 120895
loss: 1.1111629009246826,grad_norm: 0.9999990583945171, iteration: 120896
loss: 1.0273879766464233,grad_norm: 0.9999990813006533, iteration: 120897
loss: 0.9983245730400085,grad_norm: 0.9999991359318949, iteration: 120898
loss: 1.0331729650497437,grad_norm: 0.999999149284241, iteration: 120899
loss: 0.9657014012336731,grad_norm: 0.9999990285740641, iteration: 120900
loss: 1.0218172073364258,grad_norm: 0.9576180321216801, iteration: 120901
loss: 0.9740160703659058,grad_norm: 0.999999031109682, iteration: 120902
loss: 1.0158839225769043,grad_norm: 0.9999993568659488, iteration: 120903
loss: 1.0160422325134277,grad_norm: 0.8416375193165548, iteration: 120904
loss: 1.0320719480514526,grad_norm: 0.9282394551312003, iteration: 120905
loss: 1.0187060832977295,grad_norm: 0.999999475188042, iteration: 120906
loss: 1.0293948650360107,grad_norm: 0.9330990492546989, iteration: 120907
loss: 0.969870924949646,grad_norm: 0.9999990420954948, iteration: 120908
loss: 1.011534333229065,grad_norm: 0.9337355863032629, iteration: 120909
loss: 0.9885595440864563,grad_norm: 0.8238628461419958, iteration: 120910
loss: 0.968855619430542,grad_norm: 0.9308533546334704, iteration: 120911
loss: 0.9811654686927795,grad_norm: 0.9999991918758568, iteration: 120912
loss: 1.0296576023101807,grad_norm: 0.9999991862439388, iteration: 120913
loss: 0.9780452251434326,grad_norm: 0.9999990321865916, iteration: 120914
loss: 1.0320632457733154,grad_norm: 0.9999992022420525, iteration: 120915
loss: 1.0412659645080566,grad_norm: 0.9604375995267881, iteration: 120916
loss: 1.0009725093841553,grad_norm: 0.9999990852477949, iteration: 120917
loss: 0.98736971616745,grad_norm: 0.9999992604362589, iteration: 120918
loss: 1.022719383239746,grad_norm: 0.9999991962440439, iteration: 120919
loss: 1.0228906869888306,grad_norm: 0.9999991096773171, iteration: 120920
loss: 0.9749621748924255,grad_norm: 0.9999989940166316, iteration: 120921
loss: 1.0155272483825684,grad_norm: 0.9999989467256766, iteration: 120922
loss: 0.9992446899414062,grad_norm: 0.9999992408131593, iteration: 120923
loss: 0.9936752319335938,grad_norm: 0.9999995365697265, iteration: 120924
loss: 0.975217342376709,grad_norm: 0.9999991504881611, iteration: 120925
loss: 1.018877625465393,grad_norm: 0.9999991745691735, iteration: 120926
loss: 0.997126579284668,grad_norm: 0.9891793670381296, iteration: 120927
loss: 0.9804046750068665,grad_norm: 0.9523556637475048, iteration: 120928
loss: 1.0127071142196655,grad_norm: 0.9999992614409299, iteration: 120929
loss: 0.9915783405303955,grad_norm: 0.9999992259856629, iteration: 120930
loss: 1.000165343284607,grad_norm: 0.9999991379301437, iteration: 120931
loss: 1.0133088827133179,grad_norm: 0.9781302188876203, iteration: 120932
loss: 1.0080894231796265,grad_norm: 0.997619814407319, iteration: 120933
loss: 1.0072815418243408,grad_norm: 0.8949776867396749, iteration: 120934
loss: 0.9845177531242371,grad_norm: 0.9455969682960417, iteration: 120935
loss: 1.1367576122283936,grad_norm: 0.9999998698026538, iteration: 120936
loss: 0.9771518707275391,grad_norm: 0.8250029030931451, iteration: 120937
loss: 1.0181246995925903,grad_norm: 0.8875074774930132, iteration: 120938
loss: 1.0342583656311035,grad_norm: 0.9999991052496079, iteration: 120939
loss: 1.0042051076889038,grad_norm: 0.999999156691089, iteration: 120940
loss: 0.9459639191627502,grad_norm: 0.9235922545232035, iteration: 120941
loss: 1.0529749393463135,grad_norm: 0.9999995630296219, iteration: 120942
loss: 1.0180048942565918,grad_norm: 0.9999990677173995, iteration: 120943
loss: 1.125665545463562,grad_norm: 0.9999992248967995, iteration: 120944
loss: 1.0093882083892822,grad_norm: 0.9999991357716429, iteration: 120945
loss: 0.9775488376617432,grad_norm: 0.9999991622055792, iteration: 120946
loss: 1.023942470550537,grad_norm: 0.916232054191023, iteration: 120947
loss: 1.0393061637878418,grad_norm: 0.9999989874062089, iteration: 120948
loss: 0.9744700193405151,grad_norm: 0.9999990247949961, iteration: 120949
loss: 0.9996393918991089,grad_norm: 0.9999993335303707, iteration: 120950
loss: 1.0430370569229126,grad_norm: 0.9999993090170561, iteration: 120951
loss: 1.0259407758712769,grad_norm: 0.9999991697284216, iteration: 120952
loss: 1.011408805847168,grad_norm: 0.9999991792694044, iteration: 120953
loss: 0.969002366065979,grad_norm: 0.999999053392859, iteration: 120954
loss: 1.0022728443145752,grad_norm: 0.9555565195294766, iteration: 120955
loss: 0.9911104440689087,grad_norm: 0.9999991114324788, iteration: 120956
loss: 0.9854903817176819,grad_norm: 0.9999990479654853, iteration: 120957
loss: 0.9839633107185364,grad_norm: 0.8791138235420926, iteration: 120958
loss: 1.2150410413742065,grad_norm: 0.9999997668502221, iteration: 120959
loss: 1.0062707662582397,grad_norm: 0.9999992854873276, iteration: 120960
loss: 1.0150249004364014,grad_norm: 0.9999991316462783, iteration: 120961
loss: 1.0036587715148926,grad_norm: 0.9589116864793166, iteration: 120962
loss: 0.9742620587348938,grad_norm: 0.9999991241762596, iteration: 120963
loss: 1.0171360969543457,grad_norm: 0.9999990937650155, iteration: 120964
loss: 0.9964219927787781,grad_norm: 0.9999989669823457, iteration: 120965
loss: 0.9640963673591614,grad_norm: 0.9999991186191596, iteration: 120966
loss: 0.9951430559158325,grad_norm: 0.9999995362518227, iteration: 120967
loss: 0.9971903562545776,grad_norm: 0.9999991101465061, iteration: 120968
loss: 1.0021470785140991,grad_norm: 0.9999995646795495, iteration: 120969
loss: 1.0443649291992188,grad_norm: 0.9999993174745196, iteration: 120970
loss: 0.9667201638221741,grad_norm: 0.9999992828531992, iteration: 120971
loss: 1.0163291692733765,grad_norm: 0.9999990504825478, iteration: 120972
loss: 1.0126101970672607,grad_norm: 0.9318871690195029, iteration: 120973
loss: 0.9995373487472534,grad_norm: 0.9999990963765196, iteration: 120974
loss: 1.0047011375427246,grad_norm: 0.9999995562071394, iteration: 120975
loss: 0.9891404509544373,grad_norm: 0.9999991734127215, iteration: 120976
loss: 1.0192394256591797,grad_norm: 0.9992068156963344, iteration: 120977
loss: 0.9925806522369385,grad_norm: 0.9999992732663446, iteration: 120978
loss: 1.0991020202636719,grad_norm: 0.9999993865613158, iteration: 120979
loss: 1.017511010169983,grad_norm: 0.9578300003174225, iteration: 120980
loss: 1.0163209438323975,grad_norm: 0.9332969105198804, iteration: 120981
loss: 1.0289748907089233,grad_norm: 0.9999991392221785, iteration: 120982
loss: 1.0227324962615967,grad_norm: 0.9287106730148139, iteration: 120983
loss: 1.0289207696914673,grad_norm: 0.9999990248325191, iteration: 120984
loss: 1.000612735748291,grad_norm: 0.999999068229388, iteration: 120985
loss: 0.9958962202072144,grad_norm: 0.999999078929395, iteration: 120986
loss: 0.973641574382782,grad_norm: 0.9999990509666691, iteration: 120987
loss: 1.0546208620071411,grad_norm: 0.999999205482692, iteration: 120988
loss: 1.0195560455322266,grad_norm: 0.9999994787865257, iteration: 120989
loss: 1.0162811279296875,grad_norm: 0.9188995176577149, iteration: 120990
loss: 1.0143709182739258,grad_norm: 0.9999990149945116, iteration: 120991
loss: 0.9949678182601929,grad_norm: 0.9999991747914044, iteration: 120992
loss: 1.0191230773925781,grad_norm: 0.9274636644554711, iteration: 120993
loss: 1.0226198434829712,grad_norm: 0.9999992005977386, iteration: 120994
loss: 0.9867468476295471,grad_norm: 0.9999988825915914, iteration: 120995
loss: 0.9890558123588562,grad_norm: 0.999999236369528, iteration: 120996
loss: 0.996899425983429,grad_norm: 0.9999991082326108, iteration: 120997
loss: 1.010604977607727,grad_norm: 0.9999988095004468, iteration: 120998
loss: 1.0386784076690674,grad_norm: 0.9533969981542629, iteration: 120999
loss: 1.0075960159301758,grad_norm: 0.9985359929066232, iteration: 121000
loss: 0.9936230778694153,grad_norm: 0.980998205518313, iteration: 121001
loss: 0.9905966520309448,grad_norm: 0.9699814451457109, iteration: 121002
loss: 1.0067639350891113,grad_norm: 0.99999924021448, iteration: 121003
loss: 1.0076825618743896,grad_norm: 0.9480510753438325, iteration: 121004
loss: 1.039794921875,grad_norm: 0.9447676373657095, iteration: 121005
loss: 1.0154472589492798,grad_norm: 0.9999989155791089, iteration: 121006
loss: 0.984580934047699,grad_norm: 0.9271215425684236, iteration: 121007
loss: 0.992518424987793,grad_norm: 0.9999992025636291, iteration: 121008
loss: 1.0067800283432007,grad_norm: 0.9999991912758203, iteration: 121009
loss: 0.9688554406166077,grad_norm: 0.9999991471022475, iteration: 121010
loss: 1.0761698484420776,grad_norm: 0.9999995524991552, iteration: 121011
loss: 1.0314112901687622,grad_norm: 0.9338094699475066, iteration: 121012
loss: 1.0010771751403809,grad_norm: 0.911857532709645, iteration: 121013
loss: 1.0016709566116333,grad_norm: 0.9999990484653223, iteration: 121014
loss: 1.0146358013153076,grad_norm: 0.9999990600090424, iteration: 121015
loss: 1.0069929361343384,grad_norm: 0.9857615041218349, iteration: 121016
loss: 1.0070236921310425,grad_norm: 0.9793298760553353, iteration: 121017
loss: 0.9857028722763062,grad_norm: 0.9999992629093641, iteration: 121018
loss: 1.0104267597198486,grad_norm: 0.8145913431689006, iteration: 121019
loss: 1.0081490278244019,grad_norm: 0.9902730064781528, iteration: 121020
loss: 0.9914271235466003,grad_norm: 0.999999490582895, iteration: 121021
loss: 1.0143349170684814,grad_norm: 0.8872137224399331, iteration: 121022
loss: 1.0280941724777222,grad_norm: 0.9999992041990219, iteration: 121023
loss: 0.9803987145423889,grad_norm: 0.84360474801035, iteration: 121024
loss: 0.9869938492774963,grad_norm: 0.8747897557518384, iteration: 121025
loss: 1.003239393234253,grad_norm: 0.9999990633022203, iteration: 121026
loss: 1.0240452289581299,grad_norm: 0.9950193030891962, iteration: 121027
loss: 1.0146491527557373,grad_norm: 0.9999991882972512, iteration: 121028
loss: 1.0380079746246338,grad_norm: 0.9895988303564692, iteration: 121029
loss: 1.0387136936187744,grad_norm: 0.9492114429734757, iteration: 121030
loss: 1.009990930557251,grad_norm: 0.9999992516472281, iteration: 121031
loss: 0.9833871126174927,grad_norm: 0.9999991251694859, iteration: 121032
loss: 1.184076189994812,grad_norm: 0.9999998674989458, iteration: 121033
loss: 1.0279476642608643,grad_norm: 0.9999991717562289, iteration: 121034
loss: 0.9974128007888794,grad_norm: 0.9999991093088193, iteration: 121035
loss: 1.0044649839401245,grad_norm: 0.9991615071281202, iteration: 121036
loss: 1.008245825767517,grad_norm: 0.9999990832630289, iteration: 121037
loss: 0.9713367223739624,grad_norm: 0.9999992056406689, iteration: 121038
loss: 1.042399525642395,grad_norm: 0.9999990308638639, iteration: 121039
loss: 1.020897388458252,grad_norm: 0.9367896599680656, iteration: 121040
loss: 0.98995441198349,grad_norm: 0.9999990303323172, iteration: 121041
loss: 1.0099748373031616,grad_norm: 0.9999993361476116, iteration: 121042
loss: 0.9987226724624634,grad_norm: 0.9999990896829171, iteration: 121043
loss: 1.0160784721374512,grad_norm: 0.9578002458730597, iteration: 121044
loss: 1.0253785848617554,grad_norm: 0.9999991152243057, iteration: 121045
loss: 1.0333771705627441,grad_norm: 0.9999993118227278, iteration: 121046
loss: 1.0086928606033325,grad_norm: 0.9998989560301491, iteration: 121047
loss: 0.9828528165817261,grad_norm: 0.9999990857447221, iteration: 121048
loss: 1.007217526435852,grad_norm: 0.8306589156845189, iteration: 121049
loss: 1.0144507884979248,grad_norm: 0.9999991199456837, iteration: 121050
loss: 0.9978746771812439,grad_norm: 0.9999992004315649, iteration: 121051
loss: 0.997711181640625,grad_norm: 0.9196387338369979, iteration: 121052
loss: 0.9977266192436218,grad_norm: 0.9999991792963376, iteration: 121053
loss: 1.0222700834274292,grad_norm: 0.8896295000041065, iteration: 121054
loss: 0.9657000303268433,grad_norm: 0.9999990261089134, iteration: 121055
loss: 1.000656247138977,grad_norm: 0.9999992026893393, iteration: 121056
loss: 0.991973340511322,grad_norm: 0.9999991418434795, iteration: 121057
loss: 1.0392662286758423,grad_norm: 0.9999992317313772, iteration: 121058
loss: 1.010514736175537,grad_norm: 0.9999992542035018, iteration: 121059
loss: 1.0034441947937012,grad_norm: 0.9999992051026222, iteration: 121060
loss: 1.022870421409607,grad_norm: 0.9999990016982612, iteration: 121061
loss: 1.0243604183197021,grad_norm: 0.9999990322998745, iteration: 121062
loss: 0.9803821444511414,grad_norm: 0.9999990725045179, iteration: 121063
loss: 1.0059059858322144,grad_norm: 0.9999991346104389, iteration: 121064
loss: 1.0022523403167725,grad_norm: 0.9999991114864566, iteration: 121065
loss: 0.965733528137207,grad_norm: 0.9999991177571929, iteration: 121066
loss: 1.0176142454147339,grad_norm: 0.999999038844133, iteration: 121067
loss: 0.9915366768836975,grad_norm: 0.9999990843388911, iteration: 121068
loss: 0.992432713508606,grad_norm: 0.9999991676591851, iteration: 121069
loss: 1.0267363786697388,grad_norm: 0.9003199467548242, iteration: 121070
loss: 1.016955852508545,grad_norm: 0.9999993103719129, iteration: 121071
loss: 0.9942264556884766,grad_norm: 0.9999992598659851, iteration: 121072
loss: 0.9929705262184143,grad_norm: 0.9859331924077165, iteration: 121073
loss: 1.0323944091796875,grad_norm: 0.9999990901804433, iteration: 121074
loss: 1.021929144859314,grad_norm: 0.9656045580821273, iteration: 121075
loss: 0.9881008267402649,grad_norm: 0.9999991856712023, iteration: 121076
loss: 1.000951886177063,grad_norm: 0.9999991341402502, iteration: 121077
loss: 1.0314769744873047,grad_norm: 0.8897234923298409, iteration: 121078
loss: 1.0381875038146973,grad_norm: 0.9999990598445155, iteration: 121079
loss: 0.994303286075592,grad_norm: 0.9115862465784184, iteration: 121080
loss: 1.0222983360290527,grad_norm: 0.999999167090586, iteration: 121081
loss: 0.9715445637702942,grad_norm: 0.9999991720303854, iteration: 121082
loss: 0.9739112257957458,grad_norm: 0.9335560406542522, iteration: 121083
loss: 0.9857743382453918,grad_norm: 0.9228793818784625, iteration: 121084
loss: 1.0162572860717773,grad_norm: 0.999999173357912, iteration: 121085
loss: 0.9732735753059387,grad_norm: 0.9999991771580868, iteration: 121086
loss: 1.0029457807540894,grad_norm: 0.9999991299075272, iteration: 121087
loss: 1.0319633483886719,grad_norm: 0.8661643347744888, iteration: 121088
loss: 0.9918232560157776,grad_norm: 0.9999991169599045, iteration: 121089
loss: 0.9921661019325256,grad_norm: 0.9999992839203984, iteration: 121090
loss: 1.010933756828308,grad_norm: 0.9778926302937906, iteration: 121091
loss: 0.9868349432945251,grad_norm: 0.999999285417222, iteration: 121092
loss: 1.0043072700500488,grad_norm: 0.9780860018949142, iteration: 121093
loss: 0.9634393453598022,grad_norm: 0.9999993002106606, iteration: 121094
loss: 1.0177124738693237,grad_norm: 0.9999992971178023, iteration: 121095
loss: 0.9728261232376099,grad_norm: 0.9999990627086918, iteration: 121096
loss: 1.0219523906707764,grad_norm: 0.909027057454796, iteration: 121097
loss: 0.9819961786270142,grad_norm: 0.9064142733964016, iteration: 121098
loss: 1.0075340270996094,grad_norm: 0.9999991281397955, iteration: 121099
loss: 0.9945595860481262,grad_norm: 0.9999990532626706, iteration: 121100
loss: 1.022821307182312,grad_norm: 0.9999990825610007, iteration: 121101
loss: 1.0093995332717896,grad_norm: 0.9254270942732087, iteration: 121102
loss: 1.001343846321106,grad_norm: 0.999999155028539, iteration: 121103
loss: 1.0058913230895996,grad_norm: 0.9999992081333795, iteration: 121104
loss: 1.0372717380523682,grad_norm: 0.9999990020953653, iteration: 121105
loss: 0.9819412231445312,grad_norm: 0.9999990324797747, iteration: 121106
loss: 1.0480271577835083,grad_norm: 0.785783326514339, iteration: 121107
loss: 1.0103754997253418,grad_norm: 0.9999990712955128, iteration: 121108
loss: 1.0103482007980347,grad_norm: 0.8463474300125029, iteration: 121109
loss: 1.0087594985961914,grad_norm: 0.9999989950437933, iteration: 121110
loss: 0.9970775842666626,grad_norm: 0.9999990601263168, iteration: 121111
loss: 1.093472957611084,grad_norm: 0.9999991926844205, iteration: 121112
loss: 0.9774308800697327,grad_norm: 0.9611609519684327, iteration: 121113
loss: 0.9774922728538513,grad_norm: 0.9999991952678283, iteration: 121114
loss: 1.0331963300704956,grad_norm: 0.8184304560669432, iteration: 121115
loss: 0.9672356843948364,grad_norm: 0.9999990529364032, iteration: 121116
loss: 0.9894914627075195,grad_norm: 0.9999990883003764, iteration: 121117
loss: 1.0210832357406616,grad_norm: 0.966066489151428, iteration: 121118
loss: 0.9978533983230591,grad_norm: 0.9883536173207347, iteration: 121119
loss: 0.9893051981925964,grad_norm: 0.9018109994435384, iteration: 121120
loss: 1.0024465322494507,grad_norm: 0.9859940725975853, iteration: 121121
loss: 1.0056544542312622,grad_norm: 0.9526897588843762, iteration: 121122
loss: 0.9850795865058899,grad_norm: 0.9999993567653117, iteration: 121123
loss: 1.0028042793273926,grad_norm: 0.9976770563363814, iteration: 121124
loss: 1.0177407264709473,grad_norm: 0.9999992127403404, iteration: 121125
loss: 1.0236185789108276,grad_norm: 0.9999990384081097, iteration: 121126
loss: 1.0104695558547974,grad_norm: 0.9999991664979841, iteration: 121127
loss: 1.0053352117538452,grad_norm: 0.9762591478609721, iteration: 121128
loss: 0.9696156978607178,grad_norm: 0.8917768385023283, iteration: 121129
loss: 0.9974250197410583,grad_norm: 0.8768185182743685, iteration: 121130
loss: 1.0338778495788574,grad_norm: 0.9999992079439742, iteration: 121131
loss: 1.0240410566329956,grad_norm: 0.9885206596813882, iteration: 121132
loss: 0.9974858164787292,grad_norm: 0.9528791519643103, iteration: 121133
loss: 1.0218303203582764,grad_norm: 0.999998992659153, iteration: 121134
loss: 0.9779009222984314,grad_norm: 0.9981292080345797, iteration: 121135
loss: 1.0030792951583862,grad_norm: 0.9999990419873445, iteration: 121136
loss: 0.9750825762748718,grad_norm: 0.9999989770791193, iteration: 121137
loss: 1.0060566663742065,grad_norm: 0.8715910917096253, iteration: 121138
loss: 0.9821024537086487,grad_norm: 0.9999991744506745, iteration: 121139
loss: 1.027815580368042,grad_norm: 0.8028101187094945, iteration: 121140
loss: 0.9977692365646362,grad_norm: 0.9952129647729782, iteration: 121141
loss: 1.0145630836486816,grad_norm: 0.9761510225768185, iteration: 121142
loss: 1.0168808698654175,grad_norm: 0.999998953925368, iteration: 121143
loss: 0.9670429229736328,grad_norm: 0.9999992134295953, iteration: 121144
loss: 0.9690993428230286,grad_norm: 0.9999991340110979, iteration: 121145
loss: 0.9966285228729248,grad_norm: 0.9999992201560696, iteration: 121146
loss: 1.0155781507492065,grad_norm: 0.9999993194825103, iteration: 121147
loss: 1.0293601751327515,grad_norm: 0.8781191368006688, iteration: 121148
loss: 1.0373331308364868,grad_norm: 0.9999991671969245, iteration: 121149
loss: 1.0125114917755127,grad_norm: 0.9452274669260942, iteration: 121150
loss: 1.0340323448181152,grad_norm: 0.9999991378981767, iteration: 121151
loss: 1.0047123432159424,grad_norm: 0.9831350785155796, iteration: 121152
loss: 0.9890649318695068,grad_norm: 0.9999994138855771, iteration: 121153
loss: 1.0338134765625,grad_norm: 0.9999990576193044, iteration: 121154
loss: 1.005222201347351,grad_norm: 0.8307308695642267, iteration: 121155
loss: 0.9901925325393677,grad_norm: 0.9568774951940507, iteration: 121156
loss: 0.9590996503829956,grad_norm: 0.9797036157323915, iteration: 121157
loss: 0.982519805431366,grad_norm: 0.9999993616172412, iteration: 121158
loss: 0.9879978895187378,grad_norm: 0.9395788029758242, iteration: 121159
loss: 1.0253957509994507,grad_norm: 0.9027263491236809, iteration: 121160
loss: 0.9929488897323608,grad_norm: 0.9999990929746019, iteration: 121161
loss: 0.9864208102226257,grad_norm: 0.999999209336945, iteration: 121162
loss: 1.0256178379058838,grad_norm: 0.999999070026116, iteration: 121163
loss: 1.0044773817062378,grad_norm: 0.9999989946338989, iteration: 121164
loss: 1.0080385208129883,grad_norm: 0.7821005689858086, iteration: 121165
loss: 0.9899809956550598,grad_norm: 0.9069999622142766, iteration: 121166
loss: 1.0044528245925903,grad_norm: 0.9555605395517767, iteration: 121167
loss: 1.0251073837280273,grad_norm: 0.9999994481013894, iteration: 121168
loss: 0.9620859622955322,grad_norm: 0.9999990147443457, iteration: 121169
loss: 0.9843055009841919,grad_norm: 0.9999990856287512, iteration: 121170
loss: 0.9446108341217041,grad_norm: 0.9414157682633928, iteration: 121171
loss: 1.0191906690597534,grad_norm: 0.9999992222577259, iteration: 121172
loss: 1.0581507682800293,grad_norm: 0.9993494891720326, iteration: 121173
loss: 1.0174118280410767,grad_norm: 0.9999992108923784, iteration: 121174
loss: 1.0337731838226318,grad_norm: 0.9622827524339759, iteration: 121175
loss: 1.0195690393447876,grad_norm: 0.9999991932774128, iteration: 121176
loss: 0.9829950332641602,grad_norm: 0.9999992450475095, iteration: 121177
loss: 1.0872124433517456,grad_norm: 0.9999990787515495, iteration: 121178
loss: 1.0136107206344604,grad_norm: 0.8702494991472233, iteration: 121179
loss: 1.0174320936203003,grad_norm: 0.8771896591235963, iteration: 121180
loss: 1.01568603515625,grad_norm: 0.999999207008678, iteration: 121181
loss: 1.0000778436660767,grad_norm: 0.999999023234177, iteration: 121182
loss: 1.0063329935073853,grad_norm: 0.894570431250035, iteration: 121183
loss: 0.9954071044921875,grad_norm: 0.9999993582135016, iteration: 121184
loss: 0.9545887112617493,grad_norm: 0.9999993085709189, iteration: 121185
loss: 0.982816755771637,grad_norm: 0.9453086454203725, iteration: 121186
loss: 1.176621675491333,grad_norm: 0.9999992080148844, iteration: 121187
loss: 1.0384477376937866,grad_norm: 0.8319350585650257, iteration: 121188
loss: 1.0108438730239868,grad_norm: 0.999999344498563, iteration: 121189
loss: 1.0252470970153809,grad_norm: 0.9999991793967525, iteration: 121190
loss: 0.9965941309928894,grad_norm: 0.9182996135988303, iteration: 121191
loss: 1.0115301609039307,grad_norm: 0.998535518522983, iteration: 121192
loss: 0.9898166656494141,grad_norm: 0.9930368774564158, iteration: 121193
loss: 1.002571940422058,grad_norm: 0.9295271900250759, iteration: 121194
loss: 1.0096876621246338,grad_norm: 0.9999991245200034, iteration: 121195
loss: 0.9892076253890991,grad_norm: 0.9324807387704828, iteration: 121196
loss: 1.0420998334884644,grad_norm: 0.9999995766011821, iteration: 121197
loss: 0.9865413308143616,grad_norm: 0.9999993109898914, iteration: 121198
loss: 0.9881439805030823,grad_norm: 0.9537193721274424, iteration: 121199
loss: 0.9721046686172485,grad_norm: 0.9891476589224196, iteration: 121200
loss: 0.9931498169898987,grad_norm: 0.999999237171084, iteration: 121201
loss: 1.0419502258300781,grad_norm: 0.9999994423973637, iteration: 121202
loss: 0.9919920563697815,grad_norm: 0.9728507156600281, iteration: 121203
loss: 1.005902886390686,grad_norm: 0.9999990853334185, iteration: 121204
loss: 0.9897148609161377,grad_norm: 0.9999991074158745, iteration: 121205
loss: 1.010256290435791,grad_norm: 0.9085948369057752, iteration: 121206
loss: 1.009207844734192,grad_norm: 0.9714637909176864, iteration: 121207
loss: 1.0007495880126953,grad_norm: 0.9647803010486302, iteration: 121208
loss: 0.9748039841651917,grad_norm: 0.83004104526346, iteration: 121209
loss: 0.9578081965446472,grad_norm: 0.9999989945831167, iteration: 121210
loss: 0.9621755480766296,grad_norm: 0.999999169968487, iteration: 121211
loss: 1.044516682624817,grad_norm: 0.9999991830131799, iteration: 121212
loss: 1.001373052597046,grad_norm: 0.9999991363660298, iteration: 121213
loss: 0.9995526075363159,grad_norm: 0.9313856402906521, iteration: 121214
loss: 0.9892823696136475,grad_norm: 0.9999991357188753, iteration: 121215
loss: 0.9881982207298279,grad_norm: 0.9999991667487513, iteration: 121216
loss: 0.966796338558197,grad_norm: 0.9999990316332307, iteration: 121217
loss: 1.0169081687927246,grad_norm: 0.9999989895814083, iteration: 121218
loss: 0.9779553413391113,grad_norm: 0.9999990424656803, iteration: 121219
loss: 0.9996396899223328,grad_norm: 0.8634118182937544, iteration: 121220
loss: 1.0254050493240356,grad_norm: 0.9999991218353977, iteration: 121221
loss: 1.038184642791748,grad_norm: 0.9934638505603244, iteration: 121222
loss: 0.9728387594223022,grad_norm: 0.9378575619769653, iteration: 121223
loss: 0.9889777898788452,grad_norm: 0.9999991273839132, iteration: 121224
loss: 1.003524899482727,grad_norm: 0.999999109875123, iteration: 121225
loss: 1.0012123584747314,grad_norm: 0.999999051943546, iteration: 121226
loss: 1.0071581602096558,grad_norm: 0.9999990847696443, iteration: 121227
loss: 0.9912020564079285,grad_norm: 0.831529755891136, iteration: 121228
loss: 0.9882994890213013,grad_norm: 0.992918999624151, iteration: 121229
loss: 0.9997960925102234,grad_norm: 0.9999991907214124, iteration: 121230
loss: 0.9758052229881287,grad_norm: 0.9999991411319532, iteration: 121231
loss: 0.9952631592750549,grad_norm: 0.9999989042398878, iteration: 121232
loss: 1.0000189542770386,grad_norm: 0.9415969482248154, iteration: 121233
loss: 0.9897058606147766,grad_norm: 0.9999990264241849, iteration: 121234
loss: 0.9715477228164673,grad_norm: 0.9999990844014464, iteration: 121235
loss: 0.9684743881225586,grad_norm: 0.9999990843901687, iteration: 121236
loss: 0.9635009765625,grad_norm: 0.826856723809622, iteration: 121237
loss: 0.9777514338493347,grad_norm: 0.8574152554730194, iteration: 121238
loss: 1.0215318202972412,grad_norm: 0.9999990538152568, iteration: 121239
loss: 0.9976419806480408,grad_norm: 0.9901585206665875, iteration: 121240
loss: 1.0127640962600708,grad_norm: 0.9999990824719179, iteration: 121241
loss: 0.9985015392303467,grad_norm: 0.9999991865357538, iteration: 121242
loss: 0.9999602437019348,grad_norm: 0.9999992187928447, iteration: 121243
loss: 1.0072565078735352,grad_norm: 0.9999991885675191, iteration: 121244
loss: 0.9845687747001648,grad_norm: 0.883392696690487, iteration: 121245
loss: 0.9954548478126526,grad_norm: 0.894144652796328, iteration: 121246
loss: 0.9889406561851501,grad_norm: 0.9460787894628045, iteration: 121247
loss: 0.9644684791564941,grad_norm: 0.9999991132313949, iteration: 121248
loss: 0.9947450757026672,grad_norm: 0.942947177404699, iteration: 121249
loss: 0.9855493903160095,grad_norm: 0.999999237864544, iteration: 121250
loss: 1.0081816911697388,grad_norm: 0.9999992169526214, iteration: 121251
loss: 1.009116530418396,grad_norm: 0.9091585056139521, iteration: 121252
loss: 0.9904289841651917,grad_norm: 0.9999991612603911, iteration: 121253
loss: 0.9784808158874512,grad_norm: 0.8944922902699121, iteration: 121254
loss: 1.0039831399917603,grad_norm: 0.967338885602056, iteration: 121255
loss: 0.9998390078544617,grad_norm: 0.9999990624665847, iteration: 121256
loss: 1.0157853364944458,grad_norm: 0.910283327306998, iteration: 121257
loss: 1.010886311531067,grad_norm: 0.900294171969246, iteration: 121258
loss: 0.9744489789009094,grad_norm: 0.9999992326790662, iteration: 121259
loss: 1.0058039426803589,grad_norm: 0.9999991517177519, iteration: 121260
loss: 1.0480982065200806,grad_norm: 0.9999999777667349, iteration: 121261
loss: 1.000980257987976,grad_norm: 0.9999991132495256, iteration: 121262
loss: 0.9715965986251831,grad_norm: 0.9957272097067661, iteration: 121263
loss: 1.003016710281372,grad_norm: 0.9999990869316979, iteration: 121264
loss: 1.0034024715423584,grad_norm: 0.9939965406864056, iteration: 121265
loss: 1.0462162494659424,grad_norm: 0.9999991524400866, iteration: 121266
loss: 1.0288207530975342,grad_norm: 0.9999992218631275, iteration: 121267
loss: 1.0054610967636108,grad_norm: 0.9999990007459716, iteration: 121268
loss: 0.9907960891723633,grad_norm: 0.9574770733719744, iteration: 121269
loss: 1.0197938680648804,grad_norm: 0.9306526307080708, iteration: 121270
loss: 0.9851240515708923,grad_norm: 0.9730044433583969, iteration: 121271
loss: 0.9797173738479614,grad_norm: 0.9999991068812505, iteration: 121272
loss: 0.9754939079284668,grad_norm: 0.9423680706208761, iteration: 121273
loss: 1.0060288906097412,grad_norm: 0.999999138396746, iteration: 121274
loss: 1.0097627639770508,grad_norm: 0.9247196899096892, iteration: 121275
loss: 0.9686893820762634,grad_norm: 0.9999991223889813, iteration: 121276
loss: 0.9855825901031494,grad_norm: 0.9999991256151457, iteration: 121277
loss: 1.0157586336135864,grad_norm: 0.9767495069742272, iteration: 121278
loss: 0.9528077840805054,grad_norm: 0.9999992416815996, iteration: 121279
loss: 0.9990769028663635,grad_norm: 0.9999989728472457, iteration: 121280
loss: 1.0025956630706787,grad_norm: 0.9999994024373395, iteration: 121281
loss: 1.0194703340530396,grad_norm: 0.9661878052372083, iteration: 121282
loss: 1.0071675777435303,grad_norm: 0.9620609831649966, iteration: 121283
loss: 1.0104470252990723,grad_norm: 0.9952055359117414, iteration: 121284
loss: 0.9718614220619202,grad_norm: 0.9999992331869503, iteration: 121285
loss: 1.0121887922286987,grad_norm: 0.8804634443091305, iteration: 121286
loss: 1.0158300399780273,grad_norm: 0.9999990728807989, iteration: 121287
loss: 0.9432176351547241,grad_norm: 0.9999990003736362, iteration: 121288
loss: 0.9961552619934082,grad_norm: 0.9999992365459501, iteration: 121289
loss: 0.9793018102645874,grad_norm: 0.9999991207227606, iteration: 121290
loss: 1.0103787183761597,grad_norm: 0.9999991688585105, iteration: 121291
loss: 0.9719858765602112,grad_norm: 0.9999991319797769, iteration: 121292
loss: 0.9701237082481384,grad_norm: 0.9999990635811505, iteration: 121293
loss: 0.9968289732933044,grad_norm: 0.9999991378478443, iteration: 121294
loss: 0.9955175518989563,grad_norm: 0.9999991061650191, iteration: 121295
loss: 0.9688461422920227,grad_norm: 0.9999991294748664, iteration: 121296
loss: 1.002907395362854,grad_norm: 0.9459102013849946, iteration: 121297
loss: 1.0005104541778564,grad_norm: 0.999999102442051, iteration: 121298
loss: 1.017126441001892,grad_norm: 0.9999990904323384, iteration: 121299
loss: 1.0004286766052246,grad_norm: 0.999999133945555, iteration: 121300
loss: 1.030781626701355,grad_norm: 0.9999991355164712, iteration: 121301
loss: 1.0280152559280396,grad_norm: 0.999999052229213, iteration: 121302
loss: 1.0140000581741333,grad_norm: 0.9314095838232112, iteration: 121303
loss: 0.9705371260643005,grad_norm: 0.9999992128548592, iteration: 121304
loss: 1.0208334922790527,grad_norm: 0.9999993944826566, iteration: 121305
loss: 1.0197285413742065,grad_norm: 0.9999991278293736, iteration: 121306
loss: 1.0167087316513062,grad_norm: 0.8098570579326525, iteration: 121307
loss: 0.965509831905365,grad_norm: 0.8846156430868596, iteration: 121308
loss: 0.9903056621551514,grad_norm: 0.9999992417689044, iteration: 121309
loss: 1.0206704139709473,grad_norm: 0.9999991811324337, iteration: 121310
loss: 0.9984656572341919,grad_norm: 0.9999992358032249, iteration: 121311
loss: 0.9969604015350342,grad_norm: 0.9999991062082669, iteration: 121312
loss: 1.0186718702316284,grad_norm: 0.9119017204763755, iteration: 121313
loss: 0.9905701279640198,grad_norm: 0.9999990469773284, iteration: 121314
loss: 0.9796883463859558,grad_norm: 0.9999990828185256, iteration: 121315
loss: 0.9858956336975098,grad_norm: 0.9999992189472143, iteration: 121316
loss: 1.0082494020462036,grad_norm: 0.9999990953376086, iteration: 121317
loss: 1.0202993154525757,grad_norm: 0.999999138584791, iteration: 121318
loss: 1.0250036716461182,grad_norm: 0.8139927625971909, iteration: 121319
loss: 1.1218658685684204,grad_norm: 0.9999998797984799, iteration: 121320
loss: 0.9925781488418579,grad_norm: 0.9999992482603347, iteration: 121321
loss: 1.0368252992630005,grad_norm: 0.9999993401912042, iteration: 121322
loss: 1.0185405015945435,grad_norm: 0.8020964699181478, iteration: 121323
loss: 1.0118017196655273,grad_norm: 0.9999992536515437, iteration: 121324
loss: 1.004906177520752,grad_norm: 0.9999991356442693, iteration: 121325
loss: 1.0109102725982666,grad_norm: 0.9999991236297517, iteration: 121326
loss: 0.9647592902183533,grad_norm: 0.9716849638197356, iteration: 121327
loss: 1.0114147663116455,grad_norm: 0.9999990048620262, iteration: 121328
loss: 1.029988169670105,grad_norm: 0.9862898610736606, iteration: 121329
loss: 1.03529953956604,grad_norm: 0.9999991813155865, iteration: 121330
loss: 0.9615693688392639,grad_norm: 0.9999990951237475, iteration: 121331
loss: 0.9550800323486328,grad_norm: 0.9999990292887974, iteration: 121332
loss: 1.0028172731399536,grad_norm: 0.9430623345423476, iteration: 121333
loss: 1.0033177137374878,grad_norm: 0.9557876659637903, iteration: 121334
loss: 1.042840600013733,grad_norm: 0.9547496357318084, iteration: 121335
loss: 0.9814178347587585,grad_norm: 0.9999992623454651, iteration: 121336
loss: 1.0035325288772583,grad_norm: 0.9999991762879604, iteration: 121337
loss: 0.975623607635498,grad_norm: 0.9202088782165198, iteration: 121338
loss: 1.0210320949554443,grad_norm: 0.99999907378052, iteration: 121339
loss: 1.0305135250091553,grad_norm: 0.9999991456630244, iteration: 121340
loss: 1.004302740097046,grad_norm: 0.9999991248203114, iteration: 121341
loss: 1.0111106634140015,grad_norm: 0.9999991002550809, iteration: 121342
loss: 1.00275719165802,grad_norm: 0.9950501988831818, iteration: 121343
loss: 0.999298095703125,grad_norm: 0.9999992718221244, iteration: 121344
loss: 0.957629919052124,grad_norm: 0.9764217933629054, iteration: 121345
loss: 1.0074150562286377,grad_norm: 0.945897801970219, iteration: 121346
loss: 1.0433108806610107,grad_norm: 0.999999047528477, iteration: 121347
loss: 1.0152636766433716,grad_norm: 0.9999994802089601, iteration: 121348
loss: 1.004593849182129,grad_norm: 0.9999992918748549, iteration: 121349
loss: 0.9750956892967224,grad_norm: 0.9746493290992156, iteration: 121350
loss: 1.02262544631958,grad_norm: 0.999999210131302, iteration: 121351
loss: 1.0006190538406372,grad_norm: 0.9999991251450364, iteration: 121352
loss: 1.0254524946212769,grad_norm: 0.9539224161473114, iteration: 121353
loss: 1.007449984550476,grad_norm: 0.9999991896066885, iteration: 121354
loss: 1.0000206232070923,grad_norm: 0.9281404635677585, iteration: 121355
loss: 1.0084739923477173,grad_norm: 0.9999991704954947, iteration: 121356
loss: 0.9985542893409729,grad_norm: 0.9065273810654949, iteration: 121357
loss: 0.9839536547660828,grad_norm: 0.9115643705206136, iteration: 121358
loss: 0.977924108505249,grad_norm: 0.9999990737169676, iteration: 121359
loss: 0.9959409832954407,grad_norm: 0.9999991584672269, iteration: 121360
loss: 1.012587308883667,grad_norm: 0.9753967039460678, iteration: 121361
loss: 1.0089402198791504,grad_norm: 0.9999991059582566, iteration: 121362
loss: 0.9803519248962402,grad_norm: 0.9999993166191466, iteration: 121363
loss: 0.9849987030029297,grad_norm: 0.9999993335482468, iteration: 121364
loss: 1.0165871381759644,grad_norm: 0.9999990027677174, iteration: 121365
loss: 1.014907956123352,grad_norm: 0.9179681088410145, iteration: 121366
loss: 1.0262198448181152,grad_norm: 0.999999348865924, iteration: 121367
loss: 0.9968001842498779,grad_norm: 0.9999991342568529, iteration: 121368
loss: 1.0360966920852661,grad_norm: 0.9999995057666196, iteration: 121369
loss: 0.9978954195976257,grad_norm: 0.9999990330483455, iteration: 121370
loss: 0.9946353435516357,grad_norm: 0.953727377821587, iteration: 121371
loss: 0.9730075001716614,grad_norm: 0.959505816588519, iteration: 121372
loss: 0.9894961714744568,grad_norm: 0.9999990351662053, iteration: 121373
loss: 1.026400089263916,grad_norm: 0.999999078049132, iteration: 121374
loss: 1.048397421836853,grad_norm: 0.9999993219178158, iteration: 121375
loss: 0.9851647019386292,grad_norm: 0.9999991296975025, iteration: 121376
loss: 0.9952730536460876,grad_norm: 0.9999991291362088, iteration: 121377
loss: 1.0164750814437866,grad_norm: 0.9999996660133431, iteration: 121378
loss: 1.006208062171936,grad_norm: 0.9241610364757264, iteration: 121379
loss: 0.9929039478302002,grad_norm: 0.9822235291170633, iteration: 121380
loss: 0.9674387574195862,grad_norm: 0.9756726387563597, iteration: 121381
loss: 0.9991426467895508,grad_norm: 0.9999991867023595, iteration: 121382
loss: 0.998071551322937,grad_norm: 0.9999992626810368, iteration: 121383
loss: 0.9902653098106384,grad_norm: 0.9999989892919535, iteration: 121384
loss: 1.000136375427246,grad_norm: 0.9999990255623634, iteration: 121385
loss: 1.054909348487854,grad_norm: 0.9999992929909954, iteration: 121386
loss: 1.0236585140228271,grad_norm: 0.9524102962353463, iteration: 121387
loss: 1.0059572458267212,grad_norm: 0.9999990783343253, iteration: 121388
loss: 0.9857608079910278,grad_norm: 0.9999990540712608, iteration: 121389
loss: 1.0014820098876953,grad_norm: 0.9999991708206643, iteration: 121390
loss: 1.0553163290023804,grad_norm: 0.9999991142215806, iteration: 121391
loss: 0.9920812249183655,grad_norm: 0.9185831010839651, iteration: 121392
loss: 1.0180503129959106,grad_norm: 0.9034716527659284, iteration: 121393
loss: 0.9860108494758606,grad_norm: 0.9531975817126083, iteration: 121394
loss: 0.9963800311088562,grad_norm: 0.935945114408424, iteration: 121395
loss: 1.0332266092300415,grad_norm: 0.999999157870907, iteration: 121396
loss: 0.9766677021980286,grad_norm: 0.9936035778447967, iteration: 121397
loss: 0.9926061630249023,grad_norm: 0.999999122972209, iteration: 121398
loss: 1.013696312904358,grad_norm: 0.9999991522188745, iteration: 121399
loss: 1.0063726902008057,grad_norm: 0.9987859164102973, iteration: 121400
loss: 0.9650650024414062,grad_norm: 0.9999989921246741, iteration: 121401
loss: 0.9978747963905334,grad_norm: 0.999999200493883, iteration: 121402
loss: 0.9983356595039368,grad_norm: 0.999998968683002, iteration: 121403
loss: 1.0114485025405884,grad_norm: 0.9999991156538388, iteration: 121404
loss: 1.0193153619766235,grad_norm: 0.9999997550518673, iteration: 121405
loss: 0.981138288974762,grad_norm: 0.9610641615504238, iteration: 121406
loss: 0.9964550137519836,grad_norm: 0.9027956632553794, iteration: 121407
loss: 0.9921833276748657,grad_norm: 0.9194344351765092, iteration: 121408
loss: 1.0812525749206543,grad_norm: 0.9999992947774049, iteration: 121409
loss: 1.0048935413360596,grad_norm: 0.9233785893678548, iteration: 121410
loss: 1.0084338188171387,grad_norm: 0.8888058652105942, iteration: 121411
loss: 1.0390397310256958,grad_norm: 0.9999989800697059, iteration: 121412
loss: 0.9936697483062744,grad_norm: 0.9999990745707373, iteration: 121413
loss: 0.9743356108665466,grad_norm: 0.9982942063786138, iteration: 121414
loss: 1.07184898853302,grad_norm: 0.9999991825517455, iteration: 121415
loss: 1.0098083019256592,grad_norm: 0.9999992584547334, iteration: 121416
loss: 1.0008320808410645,grad_norm: 0.9999990028154148, iteration: 121417
loss: 0.9978071451187134,grad_norm: 0.8370100788377723, iteration: 121418
loss: 1.000461459159851,grad_norm: 0.9903726204585536, iteration: 121419
loss: 1.0209780931472778,grad_norm: 0.9999991922690866, iteration: 121420
loss: 1.0024559497833252,grad_norm: 0.999999157529355, iteration: 121421
loss: 0.9842279553413391,grad_norm: 0.9737414315345498, iteration: 121422
loss: 0.9889660477638245,grad_norm: 0.9092301149753533, iteration: 121423
loss: 0.9842492341995239,grad_norm: 0.834222155649931, iteration: 121424
loss: 1.2672295570373535,grad_norm: 0.9999997264627403, iteration: 121425
loss: 1.0840307474136353,grad_norm: 0.9999994043996309, iteration: 121426
loss: 0.9693431258201599,grad_norm: 0.9648513510960157, iteration: 121427
loss: 1.0018784999847412,grad_norm: 0.9862031470080114, iteration: 121428
loss: 1.0218991041183472,grad_norm: 0.999999905258526, iteration: 121429
loss: 1.005331039428711,grad_norm: 0.9999991388872942, iteration: 121430
loss: 0.9981803297996521,grad_norm: 0.9311778112115634, iteration: 121431
loss: 0.9858382940292358,grad_norm: 0.9756313755810467, iteration: 121432
loss: 1.0113650560379028,grad_norm: 0.9999989880690883, iteration: 121433
loss: 1.0503522157669067,grad_norm: 0.9999998236015704, iteration: 121434
loss: 0.9807613492012024,grad_norm: 0.9999990315852588, iteration: 121435
loss: 1.0266364812850952,grad_norm: 0.9999990527504934, iteration: 121436
loss: 1.016185998916626,grad_norm: 0.8990519049119556, iteration: 121437
loss: 0.9816365838050842,grad_norm: 0.9999990471557253, iteration: 121438
loss: 1.0204750299453735,grad_norm: 0.9999991397554969, iteration: 121439
loss: 0.9765658974647522,grad_norm: 0.9999991101152123, iteration: 121440
loss: 1.0086979866027832,grad_norm: 0.8560274403338028, iteration: 121441
loss: 1.0138663053512573,grad_norm: 0.9999991005560521, iteration: 121442
loss: 0.9802957773208618,grad_norm: 0.9999989731159089, iteration: 121443
loss: 0.9929267764091492,grad_norm: 0.7593479499909482, iteration: 121444
loss: 0.9661219120025635,grad_norm: 0.8414245012093695, iteration: 121445
loss: 0.9996531009674072,grad_norm: 0.9999989732160351, iteration: 121446
loss: 0.9930044412612915,grad_norm: 0.9999989869619179, iteration: 121447
loss: 1.0216290950775146,grad_norm: 0.999999241994251, iteration: 121448
loss: 0.9952102303504944,grad_norm: 0.9999989772603841, iteration: 121449
loss: 1.0208243131637573,grad_norm: 0.9999992628589507, iteration: 121450
loss: 1.008575439453125,grad_norm: 0.766251591702456, iteration: 121451
loss: 1.0170701742172241,grad_norm: 0.9999991409499183, iteration: 121452
loss: 1.0185364484786987,grad_norm: 0.9999989030179718, iteration: 121453
loss: 1.0317405462265015,grad_norm: 0.9672500769837423, iteration: 121454
loss: 1.0405770540237427,grad_norm: 0.9999991254719224, iteration: 121455
loss: 1.0156805515289307,grad_norm: 0.9099666717356147, iteration: 121456
loss: 1.0186325311660767,grad_norm: 0.9999990915630843, iteration: 121457
loss: 0.9800553321838379,grad_norm: 0.9621401217425204, iteration: 121458
loss: 0.9682048559188843,grad_norm: 0.9999993487062413, iteration: 121459
loss: 1.0018539428710938,grad_norm: 0.9999992925303094, iteration: 121460
loss: 0.9987596273422241,grad_norm: 0.9999992708895534, iteration: 121461
loss: 1.0064749717712402,grad_norm: 0.9983412427229016, iteration: 121462
loss: 1.0041874647140503,grad_norm: 0.9235166318617768, iteration: 121463
loss: 1.0175541639328003,grad_norm: 0.9999991538183652, iteration: 121464
loss: 1.0426983833312988,grad_norm: 0.9999993778568756, iteration: 121465
loss: 0.9716894626617432,grad_norm: 0.9467390363670993, iteration: 121466
loss: 1.0149081945419312,grad_norm: 0.9385348875946923, iteration: 121467
loss: 0.9687929153442383,grad_norm: 0.8762165566890169, iteration: 121468
loss: 0.97630774974823,grad_norm: 0.9999991360944007, iteration: 121469
loss: 1.0085575580596924,grad_norm: 0.8188714580839728, iteration: 121470
loss: 0.9919549226760864,grad_norm: 0.999999959325051, iteration: 121471
loss: 0.9975287318229675,grad_norm: 0.9999989879993987, iteration: 121472
loss: 0.9825525879859924,grad_norm: 0.9999991891578082, iteration: 121473
loss: 0.9686418771743774,grad_norm: 0.9999990997313262, iteration: 121474
loss: 0.9947322010993958,grad_norm: 0.9999990889027646, iteration: 121475
loss: 1.0098811388015747,grad_norm: 0.9265207993004254, iteration: 121476
loss: 1.0186834335327148,grad_norm: 0.974122756275313, iteration: 121477
loss: 0.9999412894248962,grad_norm: 0.9999990805695985, iteration: 121478
loss: 1.0113372802734375,grad_norm: 0.9999991610541171, iteration: 121479
loss: 1.0312821865081787,grad_norm: 0.8905799489112315, iteration: 121480
loss: 1.0022258758544922,grad_norm: 0.9999991644153409, iteration: 121481
loss: 0.9855095148086548,grad_norm: 0.9527411469030274, iteration: 121482
loss: 0.9785506725311279,grad_norm: 0.9999990853925138, iteration: 121483
loss: 0.9971986413002014,grad_norm: 0.9969791877470863, iteration: 121484
loss: 0.9899298548698425,grad_norm: 0.9999991081124713, iteration: 121485
loss: 1.0126787424087524,grad_norm: 0.9999990745564917, iteration: 121486
loss: 0.9630898833274841,grad_norm: 0.9999991845985015, iteration: 121487
loss: 0.9915211796760559,grad_norm: 0.9999990583270983, iteration: 121488
loss: 0.9777464270591736,grad_norm: 0.952040341222428, iteration: 121489
loss: 1.0271358489990234,grad_norm: 0.99999917647479, iteration: 121490
loss: 1.0349911451339722,grad_norm: 0.9999998773872829, iteration: 121491
loss: 0.9825612902641296,grad_norm: 0.9999990792338739, iteration: 121492
loss: 0.9683592319488525,grad_norm: 0.9267615494823243, iteration: 121493
loss: 0.986805260181427,grad_norm: 0.999999143461766, iteration: 121494
loss: 0.944206178188324,grad_norm: 0.9617813807097482, iteration: 121495
loss: 1.0119378566741943,grad_norm: 0.9419847715551408, iteration: 121496
loss: 1.0039390325546265,grad_norm: 0.9999991022578899, iteration: 121497
loss: 0.9991569519042969,grad_norm: 0.9999991336549832, iteration: 121498
loss: 1.0002468824386597,grad_norm: 0.9999991124399469, iteration: 121499
loss: 1.046971082687378,grad_norm: 0.9999992136785383, iteration: 121500
loss: 1.0161644220352173,grad_norm: 0.999999255936292, iteration: 121501
loss: 0.9685169458389282,grad_norm: 0.999999171012909, iteration: 121502
loss: 1.0274394750595093,grad_norm: 0.9999992676250156, iteration: 121503
loss: 1.0107192993164062,grad_norm: 0.9999991230212648, iteration: 121504
loss: 1.0024014711380005,grad_norm: 0.9999991867244125, iteration: 121505
loss: 0.985598087310791,grad_norm: 0.9999990277452038, iteration: 121506
loss: 1.0075546503067017,grad_norm: 0.9958380795582431, iteration: 121507
loss: 1.0416477918624878,grad_norm: 0.9999992677031794, iteration: 121508
loss: 0.9976751208305359,grad_norm: 0.8963548262340213, iteration: 121509
loss: 1.0018423795700073,grad_norm: 0.9999997369735861, iteration: 121510
loss: 1.0506631135940552,grad_norm: 0.9999991218728979, iteration: 121511
loss: 1.0447217226028442,grad_norm: 0.9999989799233765, iteration: 121512
loss: 1.005834937095642,grad_norm: 0.9686451457911417, iteration: 121513
loss: 1.1532038450241089,grad_norm: 0.9999992168824544, iteration: 121514
loss: 0.9999447464942932,grad_norm: 0.9999990691848577, iteration: 121515
loss: 0.9825853705406189,grad_norm: 0.9999992043669643, iteration: 121516
loss: 1.0221432447433472,grad_norm: 0.9655132664667007, iteration: 121517
loss: 0.9944929480552673,grad_norm: 0.9999993696772851, iteration: 121518
loss: 0.9801920056343079,grad_norm: 0.9999990640767297, iteration: 121519
loss: 1.040575385093689,grad_norm: 0.999999219785488, iteration: 121520
loss: 1.0084069967269897,grad_norm: 0.9723497520211302, iteration: 121521
loss: 1.0241460800170898,grad_norm: 0.9234888466524566, iteration: 121522
loss: 1.016994595527649,grad_norm: 0.9999991255310532, iteration: 121523
loss: 1.0689314603805542,grad_norm: 0.9999997021930829, iteration: 121524
loss: 1.003310203552246,grad_norm: 0.9999991630571669, iteration: 121525
loss: 0.9675531387329102,grad_norm: 0.9999991873026198, iteration: 121526
loss: 1.008437156677246,grad_norm: 0.9999989829005642, iteration: 121527
loss: 0.9653512835502625,grad_norm: 0.9999991633715974, iteration: 121528
loss: 0.9731351137161255,grad_norm: 0.9999989810053032, iteration: 121529
loss: 0.9875149130821228,grad_norm: 0.9494996498977332, iteration: 121530
loss: 1.007030725479126,grad_norm: 0.999999756568858, iteration: 121531
loss: 1.026014804840088,grad_norm: 0.999999023034724, iteration: 121532
loss: 1.0131925344467163,grad_norm: 0.9999991454215404, iteration: 121533
loss: 1.0248392820358276,grad_norm: 0.8353759904977276, iteration: 121534
loss: 0.9887658953666687,grad_norm: 0.9466165579292536, iteration: 121535
loss: 1.0221905708312988,grad_norm: 0.9999989226882614, iteration: 121536
loss: 1.0112268924713135,grad_norm: 0.9999992074655255, iteration: 121537
loss: 1.0003818273544312,grad_norm: 0.9748996540932381, iteration: 121538
loss: 1.0443068742752075,grad_norm: 0.9999991655061148, iteration: 121539
loss: 0.9924768209457397,grad_norm: 0.9088268720623598, iteration: 121540
loss: 0.9606531262397766,grad_norm: 0.9999991070831459, iteration: 121541
loss: 1.0000613927841187,grad_norm: 0.9999991391527212, iteration: 121542
loss: 0.9740495085716248,grad_norm: 0.9999989740211379, iteration: 121543
loss: 1.0225186347961426,grad_norm: 0.9999992972165604, iteration: 121544
loss: 0.9976074695587158,grad_norm: 0.9076422243666148, iteration: 121545
loss: 0.9860684871673584,grad_norm: 0.9999991905711683, iteration: 121546
loss: 1.0061193704605103,grad_norm: 0.9999992735592381, iteration: 121547
loss: 0.9693692922592163,grad_norm: 0.9999990323833303, iteration: 121548
loss: 1.0321309566497803,grad_norm: 0.9990085315520822, iteration: 121549
loss: 1.0056549310684204,grad_norm: 0.999999517813896, iteration: 121550
loss: 0.982201337814331,grad_norm: 0.9999992241420526, iteration: 121551
loss: 1.0076230764389038,grad_norm: 0.9999991631666441, iteration: 121552
loss: 0.9930939078330994,grad_norm: 0.9999990767030048, iteration: 121553
loss: 0.9883179068565369,grad_norm: 0.8778261727950398, iteration: 121554
loss: 1.0147877931594849,grad_norm: 0.9699881437199377, iteration: 121555
loss: 1.0214570760726929,grad_norm: 0.9999990161384493, iteration: 121556
loss: 0.9530882835388184,grad_norm: 0.9980615879087785, iteration: 121557
loss: 1.029430627822876,grad_norm: 0.9999992251203775, iteration: 121558
loss: 1.0053706169128418,grad_norm: 0.9999993002626641, iteration: 121559
loss: 1.0173803567886353,grad_norm: 0.999999181454139, iteration: 121560
loss: 0.9731114506721497,grad_norm: 0.9999991304659529, iteration: 121561
loss: 0.9802464842796326,grad_norm: 0.9218610257596437, iteration: 121562
loss: 1.0314158201217651,grad_norm: 0.9904161941171085, iteration: 121563
loss: 0.9925332069396973,grad_norm: 0.9999992376226188, iteration: 121564
loss: 1.012616515159607,grad_norm: 0.9563724670693157, iteration: 121565
loss: 1.0064539909362793,grad_norm: 0.9999990003616729, iteration: 121566
loss: 1.001509428024292,grad_norm: 0.999998956716729, iteration: 121567
loss: 1.0274105072021484,grad_norm: 0.9999998244247938, iteration: 121568
loss: 1.0352051258087158,grad_norm: 0.9999990872956365, iteration: 121569
loss: 0.9783793091773987,grad_norm: 0.9999990568531248, iteration: 121570
loss: 0.9849610328674316,grad_norm: 0.999999157677203, iteration: 121571
loss: 0.9810767769813538,grad_norm: 0.999444463946147, iteration: 121572
loss: 0.9634021520614624,grad_norm: 0.9999990807183096, iteration: 121573
loss: 0.999692440032959,grad_norm: 0.8990742947446813, iteration: 121574
loss: 1.013708472251892,grad_norm: 0.9999990044399577, iteration: 121575
loss: 1.0440279245376587,grad_norm: 0.8287428607655248, iteration: 121576
loss: 1.0148613452911377,grad_norm: 0.9688220642428498, iteration: 121577
loss: 0.9883800745010376,grad_norm: 0.9999996057038902, iteration: 121578
loss: 0.955933690071106,grad_norm: 0.9999991007134306, iteration: 121579
loss: 0.9749184846878052,grad_norm: 0.9938055654454127, iteration: 121580
loss: 1.0048190355300903,grad_norm: 0.9614958150661043, iteration: 121581
loss: 0.9527485370635986,grad_norm: 0.9999992103348285, iteration: 121582
loss: 0.9856372475624084,grad_norm: 0.999999332163928, iteration: 121583
loss: 1.0058672428131104,grad_norm: 0.8815214622646746, iteration: 121584
loss: 0.9996228218078613,grad_norm: 0.9511872745111801, iteration: 121585
loss: 1.0187433958053589,grad_norm: 0.963801390145814, iteration: 121586
loss: 1.024756908416748,grad_norm: 0.99999972023556, iteration: 121587
loss: 0.9314129948616028,grad_norm: 0.9959988940465794, iteration: 121588
loss: 1.0212681293487549,grad_norm: 0.9999990835930873, iteration: 121589
loss: 1.0091960430145264,grad_norm: 0.9800933901775658, iteration: 121590
loss: 0.9824865460395813,grad_norm: 0.9675479335863952, iteration: 121591
loss: 1.013602375984192,grad_norm: 0.9999991266606869, iteration: 121592
loss: 0.9829781651496887,grad_norm: 0.9695448560818383, iteration: 121593
loss: 0.9740368127822876,grad_norm: 0.9800090571838879, iteration: 121594
loss: 1.0062901973724365,grad_norm: 0.9836336028863076, iteration: 121595
loss: 0.9873327016830444,grad_norm: 0.9999993844319796, iteration: 121596
loss: 0.9701613187789917,grad_norm: 0.9999989436775013, iteration: 121597
loss: 0.9894612431526184,grad_norm: 0.7826899589207565, iteration: 121598
loss: 1.0124977827072144,grad_norm: 0.9999991499007621, iteration: 121599
loss: 0.9953568577766418,grad_norm: 0.8690216173173863, iteration: 121600
loss: 1.007468581199646,grad_norm: 0.8952659203199546, iteration: 121601
loss: 0.9825798273086548,grad_norm: 0.9999992426907854, iteration: 121602
loss: 1.023025393486023,grad_norm: 0.9043109479950839, iteration: 121603
loss: 1.00900399684906,grad_norm: 0.9585716846644661, iteration: 121604
loss: 0.9528290629386902,grad_norm: 0.9999991261810097, iteration: 121605
loss: 1.0503894090652466,grad_norm: 0.9999992206056005, iteration: 121606
loss: 1.0251182317733765,grad_norm: 0.9999990851645094, iteration: 121607
loss: 1.0059926509857178,grad_norm: 0.9379784895612006, iteration: 121608
loss: 1.0318090915679932,grad_norm: 0.9999998856860399, iteration: 121609
loss: 1.0083147287368774,grad_norm: 0.9999999914444381, iteration: 121610
loss: 1.0207006931304932,grad_norm: 0.9999991184488355, iteration: 121611
loss: 1.0467146635055542,grad_norm: 0.9999991743375616, iteration: 121612
loss: 0.9911160469055176,grad_norm: 0.99999914582892, iteration: 121613
loss: 0.9903426170349121,grad_norm: 0.9999992526318735, iteration: 121614
loss: 1.031441569328308,grad_norm: 0.9999995731437198, iteration: 121615
loss: 1.0226491689682007,grad_norm: 0.9999992872800436, iteration: 121616
loss: 1.0046939849853516,grad_norm: 0.8441266488300636, iteration: 121617
loss: 1.011410117149353,grad_norm: 0.9999991592768818, iteration: 121618
loss: 1.021538257598877,grad_norm: 0.9687314101758316, iteration: 121619
loss: 1.026782751083374,grad_norm: 0.9999992552498691, iteration: 121620
loss: 1.0010671615600586,grad_norm: 0.9999993862325615, iteration: 121621
loss: 1.0006023645401,grad_norm: 0.999999129328019, iteration: 121622
loss: 1.0052112340927124,grad_norm: 0.9999990899960889, iteration: 121623
loss: 0.9952645301818848,grad_norm: 0.9999993024570822, iteration: 121624
loss: 0.9560973048210144,grad_norm: 0.9999991706373509, iteration: 121625
loss: 0.9966464042663574,grad_norm: 0.9999991807374985, iteration: 121626
loss: 1.0270354747772217,grad_norm: 0.9999992399140029, iteration: 121627
loss: 1.039851427078247,grad_norm: 0.9999990353893135, iteration: 121628
loss: 0.9958224296569824,grad_norm: 0.999999053073227, iteration: 121629
loss: 1.0292755365371704,grad_norm: 0.9999989625221033, iteration: 121630
loss: 1.0015286207199097,grad_norm: 0.9999991187645483, iteration: 121631
loss: 0.9897329211235046,grad_norm: 0.9968683199502841, iteration: 121632
loss: 1.0067249536514282,grad_norm: 0.9999990246854658, iteration: 121633
loss: 0.9996393918991089,grad_norm: 0.9999990990220163, iteration: 121634
loss: 1.0278129577636719,grad_norm: 0.9999991608233502, iteration: 121635
loss: 1.0258959531784058,grad_norm: 0.9999991048050598, iteration: 121636
loss: 0.9697798490524292,grad_norm: 0.9911296162732643, iteration: 121637
loss: 1.0058205127716064,grad_norm: 0.9481391014759926, iteration: 121638
loss: 0.9936445355415344,grad_norm: 0.9999992282758565, iteration: 121639
loss: 1.0207343101501465,grad_norm: 0.8894029001974115, iteration: 121640
loss: 0.9859286546707153,grad_norm: 0.9999991928004305, iteration: 121641
loss: 1.0021382570266724,grad_norm: 0.9150835500923804, iteration: 121642
loss: 1.021835207939148,grad_norm: 0.9469562388249481, iteration: 121643
loss: 0.9806869029998779,grad_norm: 0.9365574685737058, iteration: 121644
loss: 0.994307279586792,grad_norm: 0.9999990618478325, iteration: 121645
loss: 0.9874703288078308,grad_norm: 0.9999991399618708, iteration: 121646
loss: 1.0505774021148682,grad_norm: 0.9999993277978012, iteration: 121647
loss: 1.0274080038070679,grad_norm: 0.9999992604584614, iteration: 121648
loss: 0.9854110479354858,grad_norm: 0.9999991217953811, iteration: 121649
loss: 0.9894042611122131,grad_norm: 0.8981197894503458, iteration: 121650
loss: 0.9858488440513611,grad_norm: 0.9999991549072215, iteration: 121651
loss: 1.019256830215454,grad_norm: 0.9907845586070262, iteration: 121652
loss: 1.0046387910842896,grad_norm: 0.9999991937124963, iteration: 121653
loss: 0.9882833361625671,grad_norm: 0.8649896083360847, iteration: 121654
loss: 0.9937106966972351,grad_norm: 0.9999992639814524, iteration: 121655
loss: 1.0052337646484375,grad_norm: 0.9999990607392931, iteration: 121656
loss: 1.024099588394165,grad_norm: 0.9999990974125477, iteration: 121657
loss: 0.9934850931167603,grad_norm: 0.999999111822424, iteration: 121658
loss: 1.000687837600708,grad_norm: 0.9999992066096549, iteration: 121659
loss: 0.973503828048706,grad_norm: 0.9814956087128256, iteration: 121660
loss: 0.9616413116455078,grad_norm: 0.9999991350313403, iteration: 121661
loss: 1.010514259338379,grad_norm: 0.9740364992407478, iteration: 121662
loss: 0.9959241151809692,grad_norm: 0.9317143380273771, iteration: 121663
loss: 1.0226716995239258,grad_norm: 0.9999991908985446, iteration: 121664
loss: 1.0041447877883911,grad_norm: 0.8770896970717778, iteration: 121665
loss: 0.9740858674049377,grad_norm: 0.999998968046555, iteration: 121666
loss: 0.953129768371582,grad_norm: 0.9999991580081533, iteration: 121667
loss: 0.9920652508735657,grad_norm: 0.9999991525700335, iteration: 121668
loss: 1.0174287557601929,grad_norm: 0.9999988811001904, iteration: 121669
loss: 1.047141432762146,grad_norm: 0.9999999831459973, iteration: 121670
loss: 1.0423083305358887,grad_norm: 0.9999992023774349, iteration: 121671
loss: 0.9981896281242371,grad_norm: 0.9999990322898651, iteration: 121672
loss: 1.0182061195373535,grad_norm: 0.876929601205933, iteration: 121673
loss: 1.025790810585022,grad_norm: 0.9999992210961414, iteration: 121674
loss: 1.0075379610061646,grad_norm: 0.9999990846281691, iteration: 121675
loss: 1.00257408618927,grad_norm: 0.9999992344334825, iteration: 121676
loss: 0.9897572994232178,grad_norm: 0.9999991812292943, iteration: 121677
loss: 1.0236270427703857,grad_norm: 0.9999990371539402, iteration: 121678
loss: 1.0053105354309082,grad_norm: 0.9999991227642688, iteration: 121679
loss: 0.98038250207901,grad_norm: 0.9999994067593134, iteration: 121680
loss: 1.0412102937698364,grad_norm: 0.9711955258918429, iteration: 121681
loss: 1.0451844930648804,grad_norm: 0.9237421690680395, iteration: 121682
loss: 0.9821601510047913,grad_norm: 0.9999989740980968, iteration: 121683
loss: 1.0119880437850952,grad_norm: 0.889189270755525, iteration: 121684
loss: 1.0234538316726685,grad_norm: 0.9999989789803908, iteration: 121685
loss: 1.0038836002349854,grad_norm: 0.9145791161995096, iteration: 121686
loss: 1.0035699605941772,grad_norm: 0.9999989883778877, iteration: 121687
loss: 1.0133640766143799,grad_norm: 0.9999992922869029, iteration: 121688
loss: 0.9831529855728149,grad_norm: 0.897618111831014, iteration: 121689
loss: 0.9837877750396729,grad_norm: 0.9999990788622639, iteration: 121690
loss: 0.9970072507858276,grad_norm: 0.9999990953143952, iteration: 121691
loss: 1.020410418510437,grad_norm: 0.9364946707768229, iteration: 121692
loss: 0.9580572843551636,grad_norm: 0.9999989933109675, iteration: 121693
loss: 1.0013679265975952,grad_norm: 0.9999996060367402, iteration: 121694
loss: 0.9628133177757263,grad_norm: 0.9999990843035326, iteration: 121695
loss: 0.9884697794914246,grad_norm: 0.9922785844881978, iteration: 121696
loss: 1.0492404699325562,grad_norm: 0.9999993847476695, iteration: 121697
loss: 0.9983415007591248,grad_norm: 0.9999990997966336, iteration: 121698
loss: 1.0253573656082153,grad_norm: 0.9999992468128696, iteration: 121699
loss: 0.9621419906616211,grad_norm: 0.8178809355829411, iteration: 121700
loss: 0.9960247874259949,grad_norm: 0.8877716837481714, iteration: 121701
loss: 1.0258965492248535,grad_norm: 0.9999991374319737, iteration: 121702
loss: 1.0193994045257568,grad_norm: 0.9999990763377654, iteration: 121703
loss: 1.0355266332626343,grad_norm: 0.9999992523180559, iteration: 121704
loss: 0.9979524612426758,grad_norm: 0.9999991586981722, iteration: 121705
loss: 1.0010846853256226,grad_norm: 0.9999991868288669, iteration: 121706
loss: 1.0111651420593262,grad_norm: 0.9999991222254545, iteration: 121707
loss: 0.9858793616294861,grad_norm: 0.9999990675929259, iteration: 121708
loss: 1.017798662185669,grad_norm: 0.9929662955139817, iteration: 121709
loss: 0.9922415018081665,grad_norm: 0.9999989779448304, iteration: 121710
loss: 0.9773497581481934,grad_norm: 0.9999990818773352, iteration: 121711
loss: 0.9984953999519348,grad_norm: 0.9246535671986141, iteration: 121712
loss: 1.0160319805145264,grad_norm: 0.9013948904556456, iteration: 121713
loss: 0.9773873090744019,grad_norm: 0.8848418986514002, iteration: 121714
loss: 0.9848703145980835,grad_norm: 0.9232176943114028, iteration: 121715
loss: 0.9908426403999329,grad_norm: 0.9999992224260145, iteration: 121716
loss: 1.0195709466934204,grad_norm: 0.961267527553239, iteration: 121717
loss: 0.9661447405815125,grad_norm: 0.9999990456551375, iteration: 121718
loss: 1.0207281112670898,grad_norm: 0.8414207362683235, iteration: 121719
loss: 0.992425799369812,grad_norm: 0.9999996819265871, iteration: 121720
loss: 1.006054162979126,grad_norm: 0.9999991258375163, iteration: 121721
loss: 0.9819821715354919,grad_norm: 0.9999991327058309, iteration: 121722
loss: 1.0161367654800415,grad_norm: 0.8953278870767737, iteration: 121723
loss: 1.0100417137145996,grad_norm: 0.9999990361077314, iteration: 121724
loss: 1.0067870616912842,grad_norm: 0.9055488201809915, iteration: 121725
loss: 0.9747664332389832,grad_norm: 0.903718315974634, iteration: 121726
loss: 1.0184218883514404,grad_norm: 0.999999179943681, iteration: 121727
loss: 0.9764800071716309,grad_norm: 0.9186826985017913, iteration: 121728
loss: 1.002711296081543,grad_norm: 0.9999992069041613, iteration: 121729
loss: 0.9891524314880371,grad_norm: 0.9999990664025237, iteration: 121730
loss: 1.0078500509262085,grad_norm: 0.9999992786934325, iteration: 121731
loss: 0.9971461296081543,grad_norm: 0.9686303422522159, iteration: 121732
loss: 0.9778028130531311,grad_norm: 0.999999201415105, iteration: 121733
loss: 1.003385066986084,grad_norm: 0.9999991382075855, iteration: 121734
loss: 1.0038942098617554,grad_norm: 0.9555724427634511, iteration: 121735
loss: 0.9891140460968018,grad_norm: 0.9999995274723804, iteration: 121736
loss: 1.0440369844436646,grad_norm: 0.997833421324468, iteration: 121737
loss: 0.9702938795089722,grad_norm: 0.9999993203417608, iteration: 121738
loss: 0.9584972858428955,grad_norm: 0.8773474406568647, iteration: 121739
loss: 1.0060389041900635,grad_norm: 0.9999990428916967, iteration: 121740
loss: 0.9949473738670349,grad_norm: 0.9282519957813463, iteration: 121741
loss: 1.017563819885254,grad_norm: 0.9999990501996006, iteration: 121742
loss: 0.9769912362098694,grad_norm: 0.9999992024248612, iteration: 121743
loss: 0.9518052935600281,grad_norm: 0.9999990401499025, iteration: 121744
loss: 0.9747674465179443,grad_norm: 0.9999990089842559, iteration: 121745
loss: 1.024420976638794,grad_norm: 0.95422728976142, iteration: 121746
loss: 1.003951907157898,grad_norm: 0.965443221012595, iteration: 121747
loss: 1.000736117362976,grad_norm: 0.9679650391152946, iteration: 121748
loss: 1.0251898765563965,grad_norm: 0.9999990826241628, iteration: 121749
loss: 0.9868019819259644,grad_norm: 0.9999992094010995, iteration: 121750
loss: 0.9876007437705994,grad_norm: 0.9999991674586042, iteration: 121751
loss: 1.007432460784912,grad_norm: 0.9999992453529885, iteration: 121752
loss: 0.9969698786735535,grad_norm: 0.999999172044812, iteration: 121753
loss: 0.9962468147277832,grad_norm: 0.9999991425371619, iteration: 121754
loss: 0.9984513521194458,grad_norm: 0.9999989815014949, iteration: 121755
loss: 0.9917492866516113,grad_norm: 0.9099260244273181, iteration: 121756
loss: 1.0149672031402588,grad_norm: 0.9999991371095593, iteration: 121757
loss: 0.9577139616012573,grad_norm: 0.9999990626886306, iteration: 121758
loss: 0.9910091161727905,grad_norm: 0.9472056012388604, iteration: 121759
loss: 0.9832356572151184,grad_norm: 0.9999991085387034, iteration: 121760
loss: 0.9518925547599792,grad_norm: 0.9841518913665078, iteration: 121761
loss: 0.9888935089111328,grad_norm: 0.9981498323247521, iteration: 121762
loss: 0.963910698890686,grad_norm: 0.9999991450128228, iteration: 121763
loss: 0.9738239049911499,grad_norm: 0.9227471514600791, iteration: 121764
loss: 1.059160590171814,grad_norm: 0.9999991736585025, iteration: 121765
loss: 1.0106652975082397,grad_norm: 0.9999992283159757, iteration: 121766
loss: 1.0397851467132568,grad_norm: 0.9273835521330732, iteration: 121767
loss: 0.9793039560317993,grad_norm: 0.9488572921962937, iteration: 121768
loss: 1.0130269527435303,grad_norm: 0.999999208542879, iteration: 121769
loss: 1.012605905532837,grad_norm: 0.9020679599314443, iteration: 121770
loss: 1.0280781984329224,grad_norm: 0.9927004787485786, iteration: 121771
loss: 1.0093170404434204,grad_norm: 0.9999989730181669, iteration: 121772
loss: 1.0054458379745483,grad_norm: 0.9999991322521202, iteration: 121773
loss: 1.0168968439102173,grad_norm: 0.9549097048092988, iteration: 121774
loss: 0.9892347455024719,grad_norm: 0.9999991590282657, iteration: 121775
loss: 0.9819614291191101,grad_norm: 0.9666796340919902, iteration: 121776
loss: 0.9952158331871033,grad_norm: 0.9999990764900294, iteration: 121777
loss: 0.9979894161224365,grad_norm: 0.8706774617306665, iteration: 121778
loss: 1.0270509719848633,grad_norm: 0.9647930985147618, iteration: 121779
loss: 1.0186866521835327,grad_norm: 0.9215091556690232, iteration: 121780
loss: 1.0040380954742432,grad_norm: 0.9999991000296311, iteration: 121781
loss: 0.9869410395622253,grad_norm: 0.9999991306821563, iteration: 121782
loss: 0.9912789463996887,grad_norm: 0.999999008014129, iteration: 121783
loss: 1.016518235206604,grad_norm: 0.9999990834285692, iteration: 121784
loss: 0.9773733019828796,grad_norm: 0.8721013848894233, iteration: 121785
loss: 0.9814943671226501,grad_norm: 0.9999992125865285, iteration: 121786
loss: 0.997499406337738,grad_norm: 0.9523674958297261, iteration: 121787
loss: 1.0296978950500488,grad_norm: 0.9999991487202875, iteration: 121788
loss: 0.9715973138809204,grad_norm: 0.9578800263117069, iteration: 121789
loss: 0.9717156291007996,grad_norm: 0.9626329590322111, iteration: 121790
loss: 0.9842864871025085,grad_norm: 0.9999990927827719, iteration: 121791
loss: 0.9685275554656982,grad_norm: 0.9999990940286252, iteration: 121792
loss: 0.9781784415245056,grad_norm: 0.8921352214071041, iteration: 121793
loss: 1.0244699716567993,grad_norm: 0.9999993264719371, iteration: 121794
loss: 1.0110918283462524,grad_norm: 0.9999990795078013, iteration: 121795
loss: 0.9902195930480957,grad_norm: 0.9999991640531298, iteration: 121796
loss: 0.9728385806083679,grad_norm: 0.9794728289400129, iteration: 121797
loss: 1.0108736753463745,grad_norm: 0.992612636614972, iteration: 121798
loss: 1.0330699682235718,grad_norm: 0.9508274694054114, iteration: 121799
loss: 0.9615097641944885,grad_norm: 0.9508297325004151, iteration: 121800
loss: 0.9972883462905884,grad_norm: 0.9999990708093377, iteration: 121801
loss: 0.9619112610816956,grad_norm: 0.916755672444166, iteration: 121802
loss: 1.051011085510254,grad_norm: 0.999999188211268, iteration: 121803
loss: 1.0182024240493774,grad_norm: 0.9452867230246819, iteration: 121804
loss: 1.0120900869369507,grad_norm: 0.9299396073606194, iteration: 121805
loss: 1.0138130187988281,grad_norm: 0.999999107154401, iteration: 121806
loss: 0.949222207069397,grad_norm: 0.9864969277754642, iteration: 121807
loss: 1.0086901187896729,grad_norm: 0.9999990011264019, iteration: 121808
loss: 1.0132899284362793,grad_norm: 0.9999992156381933, iteration: 121809
loss: 0.958496630191803,grad_norm: 0.8984175197689276, iteration: 121810
loss: 1.0283583402633667,grad_norm: 0.9999991549967886, iteration: 121811
loss: 0.986279308795929,grad_norm: 0.999999094652636, iteration: 121812
loss: 0.9985219836235046,grad_norm: 0.844666895253515, iteration: 121813
loss: 0.936778724193573,grad_norm: 0.9999992285375809, iteration: 121814
loss: 1.0174567699432373,grad_norm: 0.9999992424643744, iteration: 121815
loss: 1.0061835050582886,grad_norm: 0.999999041454803, iteration: 121816
loss: 1.0069856643676758,grad_norm: 0.9999992895145194, iteration: 121817
loss: 1.0263389348983765,grad_norm: 0.9317576644094093, iteration: 121818
loss: 1.02659010887146,grad_norm: 0.9999998303622052, iteration: 121819
loss: 1.0014582872390747,grad_norm: 0.9131791366661313, iteration: 121820
loss: 0.9997056126594543,grad_norm: 0.9999991611088115, iteration: 121821
loss: 0.9850564002990723,grad_norm: 0.9999991215647679, iteration: 121822
loss: 0.9920165538787842,grad_norm: 0.9999992404046623, iteration: 121823
loss: 1.0211862325668335,grad_norm: 0.9999998668252034, iteration: 121824
loss: 0.9893591403961182,grad_norm: 0.9999991341514483, iteration: 121825
loss: 1.0056891441345215,grad_norm: 0.9130664388055831, iteration: 121826
loss: 0.9878919124603271,grad_norm: 0.9999990482997105, iteration: 121827
loss: 1.0379692316055298,grad_norm: 0.9999991879576455, iteration: 121828
loss: 0.9864375591278076,grad_norm: 0.8763437280330932, iteration: 121829
loss: 0.9745346903800964,grad_norm: 0.9162006208393298, iteration: 121830
loss: 1.0048677921295166,grad_norm: 0.9999990605511698, iteration: 121831
loss: 1.0104905366897583,grad_norm: 0.9999990400241239, iteration: 121832
loss: 1.032800555229187,grad_norm: 0.9999990652535796, iteration: 121833
loss: 0.9859973192214966,grad_norm: 0.8143711547758474, iteration: 121834
loss: 1.0187305212020874,grad_norm: 0.9999991845094627, iteration: 121835
loss: 0.9903011918067932,grad_norm: 0.9509517537689274, iteration: 121836
loss: 1.0059984922409058,grad_norm: 0.9494279788787339, iteration: 121837
loss: 1.0326212644577026,grad_norm: 0.9999991253297761, iteration: 121838
loss: 0.9690098762512207,grad_norm: 0.8879718972543014, iteration: 121839
loss: 0.9741871953010559,grad_norm: 0.9999989838295718, iteration: 121840
loss: 1.0051066875457764,grad_norm: 0.999999005938896, iteration: 121841
loss: 0.9681302905082703,grad_norm: 0.9999992098495528, iteration: 121842
loss: 1.0013344287872314,grad_norm: 0.9999990058152454, iteration: 121843
loss: 0.9888012409210205,grad_norm: 0.9339529433125416, iteration: 121844
loss: 0.9947535395622253,grad_norm: 0.9021214385108841, iteration: 121845
loss: 0.9822883605957031,grad_norm: 0.9337622628436684, iteration: 121846
loss: 0.9947445392608643,grad_norm: 0.9056191174519909, iteration: 121847
loss: 0.9810715913772583,grad_norm: 0.9670706067086587, iteration: 121848
loss: 0.9525939226150513,grad_norm: 0.9658870964796985, iteration: 121849
loss: 1.0275226831436157,grad_norm: 0.8410724546486865, iteration: 121850
loss: 0.9683641195297241,grad_norm: 0.9536208089948328, iteration: 121851
loss: 1.0032060146331787,grad_norm: 0.8924189031187383, iteration: 121852
loss: 1.010863184928894,grad_norm: 0.9999989879620969, iteration: 121853
loss: 1.0049034357070923,grad_norm: 0.999998968757331, iteration: 121854
loss: 1.0403623580932617,grad_norm: 0.9999992643561848, iteration: 121855
loss: 0.9878947138786316,grad_norm: 0.9819116946544008, iteration: 121856
loss: 1.0059640407562256,grad_norm: 0.9999990754335445, iteration: 121857
loss: 1.0041372776031494,grad_norm: 0.9079089445592211, iteration: 121858
loss: 0.9937950372695923,grad_norm: 0.9999993321687132, iteration: 121859
loss: 1.0007604360580444,grad_norm: 0.9999992036809949, iteration: 121860
loss: 0.9996145367622375,grad_norm: 0.974540759683419, iteration: 121861
loss: 0.9867701530456543,grad_norm: 0.9999991950611671, iteration: 121862
loss: 1.0147064924240112,grad_norm: 0.9999991078281598, iteration: 121863
loss: 0.9673280715942383,grad_norm: 0.9999991521618009, iteration: 121864
loss: 0.9642623066902161,grad_norm: 0.9999992116754546, iteration: 121865
loss: 0.9830811619758606,grad_norm: 0.7725843491382376, iteration: 121866
loss: 1.0356669425964355,grad_norm: 0.9999993504315026, iteration: 121867
loss: 0.9986199140548706,grad_norm: 0.9999992016234238, iteration: 121868
loss: 0.9896026849746704,grad_norm: 0.9999992045097049, iteration: 121869
loss: 0.9944209456443787,grad_norm: 0.9999991611393079, iteration: 121870
loss: 0.9876224994659424,grad_norm: 0.9999990472574537, iteration: 121871
loss: 1.0043277740478516,grad_norm: 0.9435188673974438, iteration: 121872
loss: 0.9543116688728333,grad_norm: 0.9999990154801963, iteration: 121873
loss: 1.0098485946655273,grad_norm: 0.9999990061114212, iteration: 121874
loss: 1.0004061460494995,grad_norm: 0.9956414864040537, iteration: 121875
loss: 1.0169914960861206,grad_norm: 0.9956473318151096, iteration: 121876
loss: 1.0142526626586914,grad_norm: 0.9999991139477208, iteration: 121877
loss: 0.9788671731948853,grad_norm: 0.9999991757795421, iteration: 121878
loss: 0.9805494546890259,grad_norm: 0.9014403520118528, iteration: 121879
loss: 0.9996531009674072,grad_norm: 0.8566938785123565, iteration: 121880
loss: 1.024532675743103,grad_norm: 0.9999992521712587, iteration: 121881
loss: 1.005329966545105,grad_norm: 0.9999989037164588, iteration: 121882
loss: 1.001794695854187,grad_norm: 0.999999090528917, iteration: 121883
loss: 0.974395751953125,grad_norm: 0.9768155041376544, iteration: 121884
loss: 1.0206369161605835,grad_norm: 0.9999991583755858, iteration: 121885
loss: 1.0095044374465942,grad_norm: 0.9999989748698344, iteration: 121886
loss: 1.035697340965271,grad_norm: 0.9999992774911691, iteration: 121887
loss: 1.0232915878295898,grad_norm: 0.9999991209302564, iteration: 121888
loss: 0.9801561236381531,grad_norm: 0.9999991072940232, iteration: 121889
loss: 0.9825245141983032,grad_norm: 0.8841652721883205, iteration: 121890
loss: 0.9637032151222229,grad_norm: 0.999999092015152, iteration: 121891
loss: 1.0135880708694458,grad_norm: 0.999999041502552, iteration: 121892
loss: 0.9719200730323792,grad_norm: 0.877371218451252, iteration: 121893
loss: 0.960867166519165,grad_norm: 0.9999992581572097, iteration: 121894
loss: 0.9652460813522339,grad_norm: 0.9999990684553421, iteration: 121895
loss: 0.9786059856414795,grad_norm: 0.9999992299199411, iteration: 121896
loss: 0.9697048664093018,grad_norm: 0.7935222534115471, iteration: 121897
loss: 1.0515059232711792,grad_norm: 0.9999990424162472, iteration: 121898
loss: 0.9834860563278198,grad_norm: 0.9999989826273216, iteration: 121899
loss: 1.0082063674926758,grad_norm: 0.8934059249489469, iteration: 121900
loss: 0.9806484580039978,grad_norm: 0.9999989789741006, iteration: 121901
loss: 1.0073198080062866,grad_norm: 0.9999990413880573, iteration: 121902
loss: 1.0174365043640137,grad_norm: 0.9999990669322317, iteration: 121903
loss: 0.99993896484375,grad_norm: 0.99999898605322, iteration: 121904
loss: 1.0231257677078247,grad_norm: 0.9070247644857027, iteration: 121905
loss: 0.9673327803611755,grad_norm: 0.9969951331918185, iteration: 121906
loss: 0.9820361137390137,grad_norm: 0.9999990698871933, iteration: 121907
loss: 0.9886260628700256,grad_norm: 0.9746779197034587, iteration: 121908
loss: 1.0324052572250366,grad_norm: 0.9999991467623762, iteration: 121909
loss: 1.0516307353973389,grad_norm: 0.9810773533942833, iteration: 121910
loss: 0.9740830063819885,grad_norm: 0.999999079124199, iteration: 121911
loss: 0.968743622303009,grad_norm: 0.9999991281907945, iteration: 121912
loss: 1.041656255722046,grad_norm: 0.9337422310785138, iteration: 121913
loss: 0.9813920855522156,grad_norm: 0.9999991266745528, iteration: 121914
loss: 0.9946226477622986,grad_norm: 0.9999991254991122, iteration: 121915
loss: 0.9848409295082092,grad_norm: 0.9999991615391038, iteration: 121916
loss: 1.016704797744751,grad_norm: 0.9999990404250914, iteration: 121917
loss: 1.032798171043396,grad_norm: 0.9999990183735359, iteration: 121918
loss: 1.0257503986358643,grad_norm: 0.9999991263324621, iteration: 121919
loss: 0.986116349697113,grad_norm: 0.9999990821367245, iteration: 121920
loss: 1.0230872631072998,grad_norm: 0.9999991100622595, iteration: 121921
loss: 1.0220117568969727,grad_norm: 0.9303731482019831, iteration: 121922
loss: 1.004624843597412,grad_norm: 0.9525090832432938, iteration: 121923
loss: 1.0141793489456177,grad_norm: 0.9999991066215352, iteration: 121924
loss: 0.98048335313797,grad_norm: 0.9999990794055078, iteration: 121925
loss: 0.9543195962905884,grad_norm: 0.9999991699058137, iteration: 121926
loss: 0.977425217628479,grad_norm: 0.8900313594424317, iteration: 121927
loss: 1.0282566547393799,grad_norm: 0.9999989659572116, iteration: 121928
loss: 0.9868953824043274,grad_norm: 0.9605787327081077, iteration: 121929
loss: 0.9952833652496338,grad_norm: 0.9887215883785995, iteration: 121930
loss: 0.9770098924636841,grad_norm: 0.8551551510535881, iteration: 121931
loss: 1.0938589572906494,grad_norm: 0.9999998844510561, iteration: 121932
loss: 1.0249334573745728,grad_norm: 0.9999992590523177, iteration: 121933
loss: 0.9910962581634521,grad_norm: 0.9954935861486223, iteration: 121934
loss: 1.000792145729065,grad_norm: 0.9050508659969528, iteration: 121935
loss: 0.9655704498291016,grad_norm: 0.9615955126601665, iteration: 121936
loss: 0.9837471842765808,grad_norm: 0.9999990146604419, iteration: 121937
loss: 0.9785178303718567,grad_norm: 0.9999991384905951, iteration: 121938
loss: 0.968761146068573,grad_norm: 0.9314604116046169, iteration: 121939
loss: 1.0110626220703125,grad_norm: 0.9999991934592684, iteration: 121940
loss: 0.965273380279541,grad_norm: 0.9760562496621792, iteration: 121941
loss: 1.0361014604568481,grad_norm: 0.9999990384837179, iteration: 121942
loss: 0.9993667602539062,grad_norm: 0.8928701030019908, iteration: 121943
loss: 1.0137813091278076,grad_norm: 0.9903466080198018, iteration: 121944
loss: 0.9379433989524841,grad_norm: 0.9999990448324427, iteration: 121945
loss: 0.9452149868011475,grad_norm: 0.9425802538682444, iteration: 121946
loss: 1.01016104221344,grad_norm: 0.9999991708459834, iteration: 121947
loss: 1.0163331031799316,grad_norm: 0.9056569678243005, iteration: 121948
loss: 1.0521761178970337,grad_norm: 0.9999991862794793, iteration: 121949
loss: 1.020799994468689,grad_norm: 0.9481934343829174, iteration: 121950
loss: 1.0280448198318481,grad_norm: 0.9111463437910292, iteration: 121951
loss: 0.9931321144104004,grad_norm: 0.9725550399269723, iteration: 121952
loss: 1.0593063831329346,grad_norm: 0.9999992122950199, iteration: 121953
loss: 0.9783560633659363,grad_norm: 0.9999992758199956, iteration: 121954
loss: 0.9971743226051331,grad_norm: 0.9999991093756544, iteration: 121955
loss: 0.9820929765701294,grad_norm: 0.9641010228950236, iteration: 121956
loss: 0.9783605337142944,grad_norm: 0.9999989450354142, iteration: 121957
loss: 1.0197463035583496,grad_norm: 0.9012104650303557, iteration: 121958
loss: 0.9955828785896301,grad_norm: 0.9154987121322786, iteration: 121959
loss: 0.9921596646308899,grad_norm: 0.9389897292747762, iteration: 121960
loss: 0.99735426902771,grad_norm: 0.9999991001798322, iteration: 121961
loss: 1.017518162727356,grad_norm: 0.9999991999696906, iteration: 121962
loss: 0.9780608415603638,grad_norm: 0.8443742238434478, iteration: 121963
loss: 0.9990186095237732,grad_norm: 0.9808696703312285, iteration: 121964
loss: 1.0081650018692017,grad_norm: 0.9999989405657489, iteration: 121965
loss: 0.9681771397590637,grad_norm: 0.9618884640021614, iteration: 121966
loss: 1.0155106782913208,grad_norm: 0.9925489488582462, iteration: 121967
loss: 1.0160869359970093,grad_norm: 0.8764735484594832, iteration: 121968
loss: 1.0093785524368286,grad_norm: 0.9651076272676475, iteration: 121969
loss: 1.0157949924468994,grad_norm: 0.9999991388443884, iteration: 121970
loss: 1.0172609090805054,grad_norm: 0.9924617137344741, iteration: 121971
loss: 0.9628592729568481,grad_norm: 0.9794913077445648, iteration: 121972
loss: 0.9950827956199646,grad_norm: 0.8035179842092518, iteration: 121973
loss: 0.9729641079902649,grad_norm: 0.9999990340996665, iteration: 121974
loss: 0.9928037524223328,grad_norm: 0.8571147675547571, iteration: 121975
loss: 1.0008667707443237,grad_norm: 0.9999990744723929, iteration: 121976
loss: 0.9949520826339722,grad_norm: 0.9999991880324351, iteration: 121977
loss: 1.011766791343689,grad_norm: 0.9590093959310365, iteration: 121978
loss: 0.986648678779602,grad_norm: 0.9999990546595559, iteration: 121979
loss: 0.9610320329666138,grad_norm: 0.9999992332095887, iteration: 121980
loss: 1.0697702169418335,grad_norm: 0.9999998752187854, iteration: 121981
loss: 1.040743350982666,grad_norm: 0.9999993430466891, iteration: 121982
loss: 0.9743041396141052,grad_norm: 0.9999990639498504, iteration: 121983
loss: 0.9893823266029358,grad_norm: 0.979366119387159, iteration: 121984
loss: 1.0133305788040161,grad_norm: 0.999999293055594, iteration: 121985
loss: 1.026030421257019,grad_norm: 0.870563536973337, iteration: 121986
loss: 1.086459994316101,grad_norm: 0.9792843164664674, iteration: 121987
loss: 1.0273741483688354,grad_norm: 0.9999989703469161, iteration: 121988
loss: 1.016904354095459,grad_norm: 0.9999991187729886, iteration: 121989
loss: 0.9987050294876099,grad_norm: 0.9945162889932337, iteration: 121990
loss: 1.0612235069274902,grad_norm: 0.9999996861482825, iteration: 121991
loss: 1.0526641607284546,grad_norm: 0.980836972695225, iteration: 121992
loss: 1.2656652927398682,grad_norm: 0.9999995970706472, iteration: 121993
loss: 1.0281834602355957,grad_norm: 0.9999990587698744, iteration: 121994
loss: 0.9883615970611572,grad_norm: 0.9999996889276912, iteration: 121995
loss: 1.017500400543213,grad_norm: 0.9905207461225478, iteration: 121996
loss: 0.9962650537490845,grad_norm: 0.9999990815756683, iteration: 121997
loss: 1.0134111642837524,grad_norm: 0.9999990011468589, iteration: 121998
loss: 1.0254467725753784,grad_norm: 0.9999991344731746, iteration: 121999
loss: 0.9954703450202942,grad_norm: 0.9999993441290873, iteration: 122000
loss: 1.015026569366455,grad_norm: 0.9934379668955197, iteration: 122001
loss: 0.9817295074462891,grad_norm: 0.926609577883527, iteration: 122002
loss: 1.0118913650512695,grad_norm: 0.999999122065424, iteration: 122003
loss: 0.9695013165473938,grad_norm: 0.9186068791459087, iteration: 122004
loss: 0.9757224321365356,grad_norm: 0.9999990989242392, iteration: 122005
loss: 1.0459387302398682,grad_norm: 0.9999992466731111, iteration: 122006
loss: 0.9962323307991028,grad_norm: 0.9999989941202866, iteration: 122007
loss: 1.0066962242126465,grad_norm: 0.9999990589028336, iteration: 122008
loss: 0.9919982552528381,grad_norm: 0.9199068587395333, iteration: 122009
loss: 1.009311556816101,grad_norm: 0.9999997933223828, iteration: 122010
loss: 0.9901302456855774,grad_norm: 0.9999992072012057, iteration: 122011
loss: 0.9846066832542419,grad_norm: 0.9648146402000435, iteration: 122012
loss: 1.004319190979004,grad_norm: 0.9273735420669256, iteration: 122013
loss: 1.0217607021331787,grad_norm: 0.8736907585801882, iteration: 122014
loss: 0.9902998805046082,grad_norm: 0.995447346926726, iteration: 122015
loss: 0.9659852981567383,grad_norm: 0.893993602208187, iteration: 122016
loss: 0.9928627014160156,grad_norm: 0.9783703238626962, iteration: 122017
loss: 0.9832276105880737,grad_norm: 0.9999991919907784, iteration: 122018
loss: 1.0147931575775146,grad_norm: 0.9999990688313565, iteration: 122019
loss: 1.0300419330596924,grad_norm: 0.9999990285412226, iteration: 122020
loss: 0.9819821715354919,grad_norm: 0.995473641422489, iteration: 122021
loss: 0.9835348129272461,grad_norm: 0.9999991976155667, iteration: 122022
loss: 0.9677962064743042,grad_norm: 0.8742539508302745, iteration: 122023
loss: 1.0143765211105347,grad_norm: 0.9999990690598876, iteration: 122024
loss: 0.9839398860931396,grad_norm: 0.9999991668099183, iteration: 122025
loss: 1.0088870525360107,grad_norm: 0.8934158242701925, iteration: 122026
loss: 1.0254325866699219,grad_norm: 0.9999991650239263, iteration: 122027
loss: 0.9968153834342957,grad_norm: 0.9866784560337306, iteration: 122028
loss: 0.9712622761726379,grad_norm: 0.8535578071921877, iteration: 122029
loss: 0.9837131500244141,grad_norm: 0.9021995536467313, iteration: 122030
loss: 0.9847576022148132,grad_norm: 0.9999992035635663, iteration: 122031
loss: 1.0028281211853027,grad_norm: 0.9999992332740708, iteration: 122032
loss: 0.9877249002456665,grad_norm: 0.9070296345039818, iteration: 122033
loss: 0.9956573247909546,grad_norm: 0.9405775527547365, iteration: 122034
loss: 1.0288596153259277,grad_norm: 0.9999989626764809, iteration: 122035
loss: 0.9947440028190613,grad_norm: 0.9999990639875292, iteration: 122036
loss: 1.0246661901474,grad_norm: 0.9589009804527393, iteration: 122037
loss: 0.9969384670257568,grad_norm: 0.8396193543305567, iteration: 122038
loss: 0.9841335415840149,grad_norm: 0.999999087882025, iteration: 122039
loss: 1.0041437149047852,grad_norm: 0.955485701844385, iteration: 122040
loss: 0.9938292503356934,grad_norm: 0.9999990861894578, iteration: 122041
loss: 0.9824399352073669,grad_norm: 0.9080804678612123, iteration: 122042
loss: 1.032514214515686,grad_norm: 0.9999993118085821, iteration: 122043
loss: 1.0101462602615356,grad_norm: 0.9999991085797968, iteration: 122044
loss: 0.9977278709411621,grad_norm: 0.9999989844145388, iteration: 122045
loss: 0.9973505139350891,grad_norm: 0.9999992500968878, iteration: 122046
loss: 1.0040847063064575,grad_norm: 0.9316518477903658, iteration: 122047
loss: 1.0270109176635742,grad_norm: 0.9999990283946736, iteration: 122048
loss: 1.0069066286087036,grad_norm: 0.8930200299500283, iteration: 122049
loss: 0.9811830520629883,grad_norm: 0.9629985709225725, iteration: 122050
loss: 1.0124863386154175,grad_norm: 0.8764383743235595, iteration: 122051
loss: 1.0118318796157837,grad_norm: 0.9999992045396419, iteration: 122052
loss: 0.9862135648727417,grad_norm: 0.8906880535760037, iteration: 122053
loss: 1.007394790649414,grad_norm: 0.9999997984340849, iteration: 122054
loss: 0.9836037158966064,grad_norm: 0.9999991297531916, iteration: 122055
loss: 0.983684778213501,grad_norm: 0.9999991799964006, iteration: 122056
loss: 1.031746506690979,grad_norm: 0.999999203070721, iteration: 122057
loss: 1.0420171022415161,grad_norm: 0.9999996452838312, iteration: 122058
loss: 0.9757535457611084,grad_norm: 0.9678899904732918, iteration: 122059
loss: 1.0113096237182617,grad_norm: 0.9999993382604844, iteration: 122060
loss: 0.9886630177497864,grad_norm: 0.9999993147770826, iteration: 122061
loss: 1.0313280820846558,grad_norm: 0.9999989705366907, iteration: 122062
loss: 0.990727424621582,grad_norm: 0.9917102820643895, iteration: 122063
loss: 1.0440136194229126,grad_norm: 0.9278411568594674, iteration: 122064
loss: 1.007537841796875,grad_norm: 0.999999188393897, iteration: 122065
loss: 1.0241026878356934,grad_norm: 0.9999991823259742, iteration: 122066
loss: 1.0029045343399048,grad_norm: 0.9570508785329938, iteration: 122067
loss: 0.9928113222122192,grad_norm: 0.999999045109378, iteration: 122068
loss: 1.0119962692260742,grad_norm: 0.8352112523641215, iteration: 122069
loss: 1.0030572414398193,grad_norm: 0.9999990813542864, iteration: 122070
loss: 1.0119662284851074,grad_norm: 0.9999991432008865, iteration: 122071
loss: 0.9638261795043945,grad_norm: 0.9982109169109322, iteration: 122072
loss: 1.0310335159301758,grad_norm: 0.9999990017676573, iteration: 122073
loss: 1.0128997564315796,grad_norm: 0.9538326367139717, iteration: 122074
loss: 1.0214213132858276,grad_norm: 0.9999990728426654, iteration: 122075
loss: 0.9905425906181335,grad_norm: 0.9999992778211123, iteration: 122076
loss: 1.0054742097854614,grad_norm: 0.950005101484179, iteration: 122077
loss: 0.9595192670822144,grad_norm: 0.9999991761492557, iteration: 122078
loss: 0.9934395551681519,grad_norm: 0.9999991723878019, iteration: 122079
loss: 1.0006937980651855,grad_norm: 0.9475126051446069, iteration: 122080
loss: 0.9804298281669617,grad_norm: 0.8737843460745673, iteration: 122081
loss: 0.9919979572296143,grad_norm: 0.9999990564386366, iteration: 122082
loss: 0.9919903874397278,grad_norm: 0.9999991539550783, iteration: 122083
loss: 1.0143547058105469,grad_norm: 0.8848666035084554, iteration: 122084
loss: 1.0205435752868652,grad_norm: 0.8919122613740157, iteration: 122085
loss: 1.026472568511963,grad_norm: 0.9250118303186963, iteration: 122086
loss: 0.9965584874153137,grad_norm: 0.9999990881130912, iteration: 122087
loss: 0.9828060269355774,grad_norm: 0.9540575289148909, iteration: 122088
loss: 0.9731617569923401,grad_norm: 0.9999992038093393, iteration: 122089
loss: 1.0027503967285156,grad_norm: 0.8949695554262779, iteration: 122090
loss: 0.9896705150604248,grad_norm: 0.9999992879706452, iteration: 122091
loss: 1.0189012289047241,grad_norm: 0.9999991003425073, iteration: 122092
loss: 0.9786848425865173,grad_norm: 0.9466064097504284, iteration: 122093
loss: 0.9731562733650208,grad_norm: 0.9325886324692076, iteration: 122094
loss: 1.0342068672180176,grad_norm: 0.959016165366648, iteration: 122095
loss: 0.9636639356613159,grad_norm: 0.9999989911149775, iteration: 122096
loss: 0.9900660514831543,grad_norm: 0.8536012231328443, iteration: 122097
loss: 1.0290063619613647,grad_norm: 0.9999995617552738, iteration: 122098
loss: 1.0475248098373413,grad_norm: 0.9999993305869024, iteration: 122099
loss: 0.9670863151550293,grad_norm: 0.952802949197285, iteration: 122100
loss: 1.0225926637649536,grad_norm: 0.9999996709252819, iteration: 122101
loss: 1.0458720922470093,grad_norm: 0.8515906057789825, iteration: 122102
loss: 1.0190393924713135,grad_norm: 0.9999991379374511, iteration: 122103
loss: 1.0099761486053467,grad_norm: 0.9999991482839791, iteration: 122104
loss: 0.9885714650154114,grad_norm: 0.9157585200229825, iteration: 122105
loss: 1.042420744895935,grad_norm: 0.9999994924539137, iteration: 122106
loss: 0.9756554365158081,grad_norm: 0.8812108625578872, iteration: 122107
loss: 0.9840441346168518,grad_norm: 0.7689449550228363, iteration: 122108
loss: 1.02347993850708,grad_norm: 0.9999990050469884, iteration: 122109
loss: 1.0044219493865967,grad_norm: 0.8801326924703252, iteration: 122110
loss: 0.9994199275970459,grad_norm: 0.9999990715142837, iteration: 122111
loss: 1.0179353952407837,grad_norm: 0.9999990604345138, iteration: 122112
loss: 1.1004526615142822,grad_norm: 0.9999998195302956, iteration: 122113
loss: 1.0078929662704468,grad_norm: 0.9285834438451137, iteration: 122114
loss: 1.0150607824325562,grad_norm: 0.9999990119544111, iteration: 122115
loss: 1.0261934995651245,grad_norm: 0.9999992954078767, iteration: 122116
loss: 0.9786168932914734,grad_norm: 0.9999992494103933, iteration: 122117
loss: 1.003995656967163,grad_norm: 0.999999320907009, iteration: 122118
loss: 1.010061264038086,grad_norm: 0.9999990883218096, iteration: 122119
loss: 0.988950252532959,grad_norm: 0.9999992712913021, iteration: 122120
loss: 0.9991323351860046,grad_norm: 0.986734357695805, iteration: 122121
loss: 1.0132390260696411,grad_norm: 0.992484744393169, iteration: 122122
loss: 1.1039327383041382,grad_norm: 0.999999578240583, iteration: 122123
loss: 0.988483726978302,grad_norm: 0.9256384156836996, iteration: 122124
loss: 1.0038765668869019,grad_norm: 0.9999991348595129, iteration: 122125
loss: 1.0012658834457397,grad_norm: 0.9999989509252772, iteration: 122126
loss: 1.0095908641815186,grad_norm: 0.9025403409636686, iteration: 122127
loss: 0.9963551163673401,grad_norm: 0.9202483580793817, iteration: 122128
loss: 0.99543696641922,grad_norm: 0.9999991110436991, iteration: 122129
loss: 1.0085556507110596,grad_norm: 0.9999991774725687, iteration: 122130
loss: 0.9801167249679565,grad_norm: 0.9999990542601599, iteration: 122131
loss: 0.9799419641494751,grad_norm: 0.999999103828729, iteration: 122132
loss: 1.02009117603302,grad_norm: 0.8952693326258453, iteration: 122133
loss: 0.9652422666549683,grad_norm: 0.9121533864021694, iteration: 122134
loss: 0.9343434572219849,grad_norm: 0.9999992391544907, iteration: 122135
loss: 1.0047807693481445,grad_norm: 0.9999991259662265, iteration: 122136
loss: 1.0741280317306519,grad_norm: 0.9999998406870598, iteration: 122137
loss: 1.0110913515090942,grad_norm: 0.9999992680811609, iteration: 122138
loss: 1.0147868394851685,grad_norm: 0.9737820824983763, iteration: 122139
loss: 0.9896271824836731,grad_norm: 0.9800861955859852, iteration: 122140
loss: 0.970039963722229,grad_norm: 0.9999993636114821, iteration: 122141
loss: 1.000287652015686,grad_norm: 0.9950263793665396, iteration: 122142
loss: 1.0591431856155396,grad_norm: 0.9999991788117338, iteration: 122143
loss: 0.9831075668334961,grad_norm: 0.9999990904048637, iteration: 122144
loss: 1.0223596096038818,grad_norm: 0.9999990241594325, iteration: 122145
loss: 1.0228520631790161,grad_norm: 0.999999190114548, iteration: 122146
loss: 1.0003634691238403,grad_norm: 0.9999992063755483, iteration: 122147
loss: 0.9913660883903503,grad_norm: 0.9999991523157855, iteration: 122148
loss: 0.9753673672676086,grad_norm: 0.9999992062409471, iteration: 122149
loss: 1.0034046173095703,grad_norm: 0.9174257150901299, iteration: 122150
loss: 0.9555651545524597,grad_norm: 0.9999991310932422, iteration: 122151
loss: 0.9828746914863586,grad_norm: 0.9851009258802942, iteration: 122152
loss: 0.9949793815612793,grad_norm: 0.93909176357715, iteration: 122153
loss: 1.014920711517334,grad_norm: 0.9999993392300558, iteration: 122154
loss: 0.9924209713935852,grad_norm: 0.9999988458752537, iteration: 122155
loss: 0.9955844283103943,grad_norm: 0.9901393463140709, iteration: 122156
loss: 1.0476714372634888,grad_norm: 0.9999997501228133, iteration: 122157
loss: 1.0312340259552002,grad_norm: 0.9501664504187676, iteration: 122158
loss: 1.0229724645614624,grad_norm: 0.9999991529231288, iteration: 122159
loss: 1.034612774848938,grad_norm: 0.9999990948222076, iteration: 122160
loss: 0.9982292652130127,grad_norm: 0.9042234000042402, iteration: 122161
loss: 1.0197112560272217,grad_norm: 0.9999991488782729, iteration: 122162
loss: 1.0413821935653687,grad_norm: 0.9734988998653293, iteration: 122163
loss: 0.9993169903755188,grad_norm: 0.9999991808042322, iteration: 122164
loss: 0.9922647476196289,grad_norm: 0.8916275267132838, iteration: 122165
loss: 1.0036206245422363,grad_norm: 0.9999990570390054, iteration: 122166
loss: 1.0268863439559937,grad_norm: 0.9999990509515835, iteration: 122167
loss: 1.0034691095352173,grad_norm: 0.9999991985192179, iteration: 122168
loss: 0.9910371899604797,grad_norm: 0.9999992929443297, iteration: 122169
loss: 0.9963027834892273,grad_norm: 0.9381523316589103, iteration: 122170
loss: 1.0475246906280518,grad_norm: 0.9999992223883234, iteration: 122171
loss: 1.0280847549438477,grad_norm: 0.8897683147121562, iteration: 122172
loss: 1.0073155164718628,grad_norm: 0.9515052648384864, iteration: 122173
loss: 0.9747243523597717,grad_norm: 0.9402098927505055, iteration: 122174
loss: 1.0369499921798706,grad_norm: 0.9999992318244023, iteration: 122175
loss: 0.9783225655555725,grad_norm: 0.9999991889097883, iteration: 122176
loss: 1.0077037811279297,grad_norm: 0.9999991859987543, iteration: 122177
loss: 1.0333460569381714,grad_norm: 0.9999991180645862, iteration: 122178
loss: 1.028059959411621,grad_norm: 0.9999994051185808, iteration: 122179
loss: 0.9851891994476318,grad_norm: 0.9999991415323457, iteration: 122180
loss: 1.0082132816314697,grad_norm: 0.999999138629048, iteration: 122181
loss: 1.0033347606658936,grad_norm: 0.8950859225077007, iteration: 122182
loss: 0.9957966804504395,grad_norm: 0.9204967455649928, iteration: 122183
loss: 1.0246793031692505,grad_norm: 0.9999998399927807, iteration: 122184
loss: 0.9788358807563782,grad_norm: 0.9999990850494532, iteration: 122185
loss: 1.0789998769760132,grad_norm: 0.9999992146115111, iteration: 122186
loss: 0.9935752749443054,grad_norm: 0.9999991569796347, iteration: 122187
loss: 0.9490424394607544,grad_norm: 0.9999990965911685, iteration: 122188
loss: 0.9935939311981201,grad_norm: 0.9999992348749903, iteration: 122189
loss: 0.9728331565856934,grad_norm: 0.9999992574231193, iteration: 122190
loss: 1.0009822845458984,grad_norm: 0.9774984891195234, iteration: 122191
loss: 1.0026570558547974,grad_norm: 0.958038454880435, iteration: 122192
loss: 0.9959570169448853,grad_norm: 0.9999991077057151, iteration: 122193
loss: 1.0177286863327026,grad_norm: 0.999999267585425, iteration: 122194
loss: 1.0085406303405762,grad_norm: 0.9199951889720516, iteration: 122195
loss: 1.0250121355056763,grad_norm: 0.887724313464643, iteration: 122196
loss: 1.0082358121871948,grad_norm: 0.8919236566657255, iteration: 122197
loss: 0.9952266812324524,grad_norm: 0.8635884320023893, iteration: 122198
loss: 0.965725839138031,grad_norm: 0.9909598342126325, iteration: 122199
loss: 1.004404067993164,grad_norm: 0.9999992876899788, iteration: 122200
loss: 1.0213501453399658,grad_norm: 0.9866238219865204, iteration: 122201
loss: 0.986226499080658,grad_norm: 0.9992151137295798, iteration: 122202
loss: 1.034511685371399,grad_norm: 0.9733330646909009, iteration: 122203
loss: 0.9625132083892822,grad_norm: 0.9999991436726124, iteration: 122204
loss: 1.0392169952392578,grad_norm: 0.9999990379127185, iteration: 122205
loss: 0.9965536594390869,grad_norm: 0.9069124889613362, iteration: 122206
loss: 0.9955741763114929,grad_norm: 0.9999991386515297, iteration: 122207
loss: 0.9768217206001282,grad_norm: 0.9999991000487573, iteration: 122208
loss: 1.0207325220108032,grad_norm: 0.9999991804727687, iteration: 122209
loss: 1.007003903388977,grad_norm: 0.9999991169085387, iteration: 122210
loss: 1.026563286781311,grad_norm: 0.874850319468458, iteration: 122211
loss: 1.0123844146728516,grad_norm: 0.9999993835518385, iteration: 122212
loss: 0.9793723821640015,grad_norm: 0.9683611304433317, iteration: 122213
loss: 0.9923741221427917,grad_norm: 0.9999991490396422, iteration: 122214
loss: 1.010556936264038,grad_norm: 0.9969979336122448, iteration: 122215
loss: 0.997582733631134,grad_norm: 0.9467795784697983, iteration: 122216
loss: 0.985583484172821,grad_norm: 0.9999991232875063, iteration: 122217
loss: 0.9895450472831726,grad_norm: 0.9886718054450964, iteration: 122218
loss: 0.9745094776153564,grad_norm: 0.9999989575638195, iteration: 122219
loss: 1.0127651691436768,grad_norm: 0.9396273246571546, iteration: 122220
loss: 0.9952983856201172,grad_norm: 0.9725030643053003, iteration: 122221
loss: 0.9891619086265564,grad_norm: 0.999999103270579, iteration: 122222
loss: 0.975261926651001,grad_norm: 0.9999990617976635, iteration: 122223
loss: 0.9704679846763611,grad_norm: 0.9568960534837629, iteration: 122224
loss: 0.9814601540565491,grad_norm: 0.9999992927910227, iteration: 122225
loss: 0.9981415867805481,grad_norm: 0.901873162663571, iteration: 122226
loss: 1.0078774690628052,grad_norm: 0.9400253511197146, iteration: 122227
loss: 0.998176634311676,grad_norm: 0.9063595446632694, iteration: 122228
loss: 1.0050419569015503,grad_norm: 0.9999993342928096, iteration: 122229
loss: 1.0179800987243652,grad_norm: 0.9006981904587404, iteration: 122230
loss: 0.9873838424682617,grad_norm: 0.9999990412021466, iteration: 122231
loss: 0.99445641040802,grad_norm: 0.910986737343879, iteration: 122232
loss: 1.0196356773376465,grad_norm: 0.9317799977390808, iteration: 122233
loss: 1.0156898498535156,grad_norm: 0.9999992686026662, iteration: 122234
loss: 0.9844799637794495,grad_norm: 0.9961287998860325, iteration: 122235
loss: 1.0062527656555176,grad_norm: 0.9739388812247268, iteration: 122236
loss: 0.9608041048049927,grad_norm: 0.9988436218265021, iteration: 122237
loss: 0.9915666580200195,grad_norm: 0.9999991430109947, iteration: 122238
loss: 0.9745053052902222,grad_norm: 0.9999989954941586, iteration: 122239
loss: 0.9927019476890564,grad_norm: 0.9999992573194546, iteration: 122240
loss: 0.9787209033966064,grad_norm: 0.9999992686727511, iteration: 122241
loss: 0.9884727597236633,grad_norm: 0.9584693993529851, iteration: 122242
loss: 0.927738606929779,grad_norm: 0.9999992140626854, iteration: 122243
loss: 1.008653163909912,grad_norm: 0.931153062114364, iteration: 122244
loss: 1.009739875793457,grad_norm: 0.9999991151889981, iteration: 122245
loss: 1.0171629190444946,grad_norm: 0.9904513400666827, iteration: 122246
loss: 1.0113482475280762,grad_norm: 0.9999992433290632, iteration: 122247
loss: 1.0018190145492554,grad_norm: 0.9999991485639976, iteration: 122248
loss: 1.0533097982406616,grad_norm: 0.9999992592106167, iteration: 122249
loss: 1.0218181610107422,grad_norm: 0.9999991933777304, iteration: 122250
loss: 0.9868033528327942,grad_norm: 0.9999990602381016, iteration: 122251
loss: 1.1188217401504517,grad_norm: 0.9999992654481045, iteration: 122252
loss: 0.9909965991973877,grad_norm: 0.9999991564942627, iteration: 122253
loss: 0.9855005741119385,grad_norm: 0.9999991099704018, iteration: 122254
loss: 0.9578177332878113,grad_norm: 0.9416429243470631, iteration: 122255
loss: 1.0314444303512573,grad_norm: 0.9999991929124101, iteration: 122256
loss: 0.9869844913482666,grad_norm: 0.9999990123110184, iteration: 122257
loss: 1.0199542045593262,grad_norm: 0.9999993394141751, iteration: 122258
loss: 1.033308506011963,grad_norm: 0.9969812930960855, iteration: 122259
loss: 1.01963210105896,grad_norm: 0.9999991929117642, iteration: 122260
loss: 1.010942816734314,grad_norm: 0.9999991107533553, iteration: 122261
loss: 1.0108004808425903,grad_norm: 0.9999992725019488, iteration: 122262
loss: 0.9588912725448608,grad_norm: 0.9999991336774037, iteration: 122263
loss: 1.006124496459961,grad_norm: 0.9999993381417936, iteration: 122264
loss: 1.078492522239685,grad_norm: 0.9999991259075702, iteration: 122265
loss: 1.02399480342865,grad_norm: 0.9999991500114863, iteration: 122266
loss: 1.0166538953781128,grad_norm: 0.999999031685616, iteration: 122267
loss: 0.9972219467163086,grad_norm: 0.9985384627598858, iteration: 122268
loss: 0.988523542881012,grad_norm: 0.9999991128862944, iteration: 122269
loss: 0.9625773429870605,grad_norm: 0.9999992023368324, iteration: 122270
loss: 0.9778000116348267,grad_norm: 0.99999915485536, iteration: 122271
loss: 1.019071102142334,grad_norm: 0.9999992322845431, iteration: 122272
loss: 0.9853230714797974,grad_norm: 0.9999990837245933, iteration: 122273
loss: 0.9838590621948242,grad_norm: 0.9999990133613692, iteration: 122274
loss: 1.0107067823410034,grad_norm: 0.9999991871180285, iteration: 122275
loss: 1.0036605596542358,grad_norm: 0.9999990579063703, iteration: 122276
loss: 1.0028482675552368,grad_norm: 0.7436871482056492, iteration: 122277
loss: 0.984171450138092,grad_norm: 0.9999990504281705, iteration: 122278
loss: 0.9740598797798157,grad_norm: 0.9999992837093971, iteration: 122279
loss: 1.0241471529006958,grad_norm: 0.9660994239781993, iteration: 122280
loss: 1.0086908340454102,grad_norm: 0.9999992941827276, iteration: 122281
loss: 1.0838654041290283,grad_norm: 0.9999993412527085, iteration: 122282
loss: 1.0131208896636963,grad_norm: 0.9999990149882935, iteration: 122283
loss: 1.0018224716186523,grad_norm: 0.9999990513678402, iteration: 122284
loss: 1.0337456464767456,grad_norm: 0.9999991786780354, iteration: 122285
loss: 1.0181050300598145,grad_norm: 0.9999990927177698, iteration: 122286
loss: 0.9914942383766174,grad_norm: 0.9999991071497328, iteration: 122287
loss: 0.9861279129981995,grad_norm: 0.9999990814115532, iteration: 122288
loss: 1.0174548625946045,grad_norm: 0.93927769467145, iteration: 122289
loss: 0.9852406978607178,grad_norm: 0.999998993979741, iteration: 122290
loss: 1.0245697498321533,grad_norm: 0.9999993152129699, iteration: 122291
loss: 1.014684796333313,grad_norm: 0.9999991243010945, iteration: 122292
loss: 0.9923356771469116,grad_norm: 0.9075868539049972, iteration: 122293
loss: 1.0144836902618408,grad_norm: 0.9956740207550162, iteration: 122294
loss: 1.0207006931304932,grad_norm: 0.9999991414078807, iteration: 122295
loss: 0.999828040599823,grad_norm: 0.858115161254695, iteration: 122296
loss: 1.067467212677002,grad_norm: 0.9999997382950597, iteration: 122297
loss: 1.0107662677764893,grad_norm: 0.9999992229591027, iteration: 122298
loss: 0.9981483817100525,grad_norm: 0.9999989962862073, iteration: 122299
loss: 1.0068089962005615,grad_norm: 0.999999360749282, iteration: 122300
loss: 1.0402127504348755,grad_norm: 0.9999992643432242, iteration: 122301
loss: 1.0041217803955078,grad_norm: 0.8766525723454238, iteration: 122302
loss: 0.9974750876426697,grad_norm: 0.9854756968845394, iteration: 122303
loss: 0.9902898669242859,grad_norm: 0.9999991378188765, iteration: 122304
loss: 1.0313600301742554,grad_norm: 0.999999087829022, iteration: 122305
loss: 0.9937515258789062,grad_norm: 0.9459073088297385, iteration: 122306
loss: 0.977500319480896,grad_norm: 0.9050480221841785, iteration: 122307
loss: 0.9953312277793884,grad_norm: 0.8838761482150308, iteration: 122308
loss: 1.0471696853637695,grad_norm: 0.9999993760147428, iteration: 122309
loss: 1.051287055015564,grad_norm: 0.9999997319352374, iteration: 122310
loss: 0.9868699908256531,grad_norm: 0.9999993062829274, iteration: 122311
loss: 1.003436803817749,grad_norm: 0.999999948131605, iteration: 122312
loss: 1.0130478143692017,grad_norm: 0.9940356372681604, iteration: 122313
loss: 1.0206642150878906,grad_norm: 0.9999990190042465, iteration: 122314
loss: 1.0114973783493042,grad_norm: 0.9999992590694391, iteration: 122315
loss: 0.9690083265304565,grad_norm: 0.9999991282451647, iteration: 122316
loss: 0.9899518489837646,grad_norm: 0.9791034653573827, iteration: 122317
loss: 1.0379743576049805,grad_norm: 0.9999991532702991, iteration: 122318
loss: 1.0196568965911865,grad_norm: 0.9999993168970074, iteration: 122319
loss: 0.9990079998970032,grad_norm: 0.9999992478419459, iteration: 122320
loss: 1.0162051916122437,grad_norm: 0.9999994413810498, iteration: 122321
loss: 1.013362169265747,grad_norm: 0.9999991807430826, iteration: 122322
loss: 0.9926403760910034,grad_norm: 0.9615329651747367, iteration: 122323
loss: 0.9660933613777161,grad_norm: 0.9999990139087119, iteration: 122324
loss: 0.9980464577674866,grad_norm: 0.9999992298171815, iteration: 122325
loss: 0.9768403172492981,grad_norm: 0.9816314470787079, iteration: 122326
loss: 1.2183021306991577,grad_norm: 0.9999993643197262, iteration: 122327
loss: 0.9854245185852051,grad_norm: 0.8792233924453454, iteration: 122328
loss: 0.9803192615509033,grad_norm: 0.9143397374421919, iteration: 122329
loss: 0.9760027527809143,grad_norm: 0.9999991440741165, iteration: 122330
loss: 0.98484867811203,grad_norm: 0.9999990325091792, iteration: 122331
loss: 0.9974167346954346,grad_norm: 0.9508380276808521, iteration: 122332
loss: 1.0379939079284668,grad_norm: 0.9999994043442128, iteration: 122333
loss: 0.9902181029319763,grad_norm: 0.9999990291317259, iteration: 122334
loss: 0.9928315281867981,grad_norm: 0.9999998610239333, iteration: 122335
loss: 0.9912500977516174,grad_norm: 0.9999990989969095, iteration: 122336
loss: 1.030013084411621,grad_norm: 0.9999991612330593, iteration: 122337
loss: 1.0041905641555786,grad_norm: 0.9999991655215679, iteration: 122338
loss: 0.9848518967628479,grad_norm: 0.9740919396417497, iteration: 122339
loss: 0.94815593957901,grad_norm: 0.9999990517747748, iteration: 122340
loss: 1.027335286140442,grad_norm: 0.999999072661798, iteration: 122341
loss: 1.0249937772750854,grad_norm: 0.8699006914391404, iteration: 122342
loss: 1.005138635635376,grad_norm: 0.9999993280864753, iteration: 122343
loss: 0.9940847754478455,grad_norm: 0.9999989653212475, iteration: 122344
loss: 1.0105775594711304,grad_norm: 0.8856515738129842, iteration: 122345
loss: 0.9793402552604675,grad_norm: 0.9999990716970829, iteration: 122346
loss: 1.0201011896133423,grad_norm: 0.8924396767265219, iteration: 122347
loss: 1.0159565210342407,grad_norm: 0.813053517247517, iteration: 122348
loss: 1.0120909214019775,grad_norm: 0.9999992278688282, iteration: 122349
loss: 1.043933629989624,grad_norm: 0.9999991407642534, iteration: 122350
loss: 0.9515860676765442,grad_norm: 0.999999233982119, iteration: 122351
loss: 0.9857354760169983,grad_norm: 0.9150174167033012, iteration: 122352
loss: 1.031542181968689,grad_norm: 0.9999990876787167, iteration: 122353
loss: 0.9730145335197449,grad_norm: 0.9999991507140562, iteration: 122354
loss: 0.9895067811012268,grad_norm: 0.9999992075841513, iteration: 122355
loss: 1.0103849172592163,grad_norm: 0.9408759509305452, iteration: 122356
loss: 0.9916363954544067,grad_norm: 0.9047413046647922, iteration: 122357
loss: 1.0511133670806885,grad_norm: 0.9999991473971701, iteration: 122358
loss: 1.0156103372573853,grad_norm: 0.931726355259435, iteration: 122359
loss: 1.0138556957244873,grad_norm: 0.9999991167184656, iteration: 122360
loss: 0.9614186882972717,grad_norm: 0.8991711966026184, iteration: 122361
loss: 0.9941542744636536,grad_norm: 0.9572128295090266, iteration: 122362
loss: 0.9450775980949402,grad_norm: 0.9999991647541964, iteration: 122363
loss: 1.0315438508987427,grad_norm: 0.9133655094557048, iteration: 122364
loss: 1.0045117139816284,grad_norm: 0.9999992244837438, iteration: 122365
loss: 1.0050220489501953,grad_norm: 0.9593975773671418, iteration: 122366
loss: 1.0155973434448242,grad_norm: 0.9999990011885805, iteration: 122367
loss: 0.9815055131912231,grad_norm: 0.9328316277879334, iteration: 122368
loss: 0.9762757420539856,grad_norm: 0.9999991125753178, iteration: 122369
loss: 0.9576448798179626,grad_norm: 0.9999991294030994, iteration: 122370
loss: 1.017484426498413,grad_norm: 0.9715984214418506, iteration: 122371
loss: 1.0243784189224243,grad_norm: 0.9371731906008245, iteration: 122372
loss: 1.0022519826889038,grad_norm: 0.9998019899174371, iteration: 122373
loss: 0.9961307644844055,grad_norm: 0.9832236695321065, iteration: 122374
loss: 0.9814680814743042,grad_norm: 0.9999994408244688, iteration: 122375
loss: 0.9978916049003601,grad_norm: 0.9999989225964518, iteration: 122376
loss: 0.9889465570449829,grad_norm: 0.9167673217298472, iteration: 122377
loss: 1.0072826147079468,grad_norm: 0.999999198616143, iteration: 122378
loss: 0.9870513677597046,grad_norm: 0.8724817468810911, iteration: 122379
loss: 1.01580011844635,grad_norm: 0.999999148124358, iteration: 122380
loss: 1.0251230001449585,grad_norm: 0.999999178548875, iteration: 122381
loss: 0.9603045582771301,grad_norm: 0.9917758771047599, iteration: 122382
loss: 0.9687861204147339,grad_norm: 0.9729894723385536, iteration: 122383
loss: 1.0018481016159058,grad_norm: 0.9434388646077051, iteration: 122384
loss: 0.9858742356300354,grad_norm: 0.9999993475616288, iteration: 122385
loss: 0.9666748046875,grad_norm: 0.8993081113171026, iteration: 122386
loss: 0.9654088020324707,grad_norm: 0.9999990227038662, iteration: 122387
loss: 0.9802297353744507,grad_norm: 0.8115854459134672, iteration: 122388
loss: 1.019271969795227,grad_norm: 0.9999993581254129, iteration: 122389
loss: 1.0649625062942505,grad_norm: 0.9999997743927648, iteration: 122390
loss: 1.0016340017318726,grad_norm: 0.9200652708004915, iteration: 122391
loss: 1.0077905654907227,grad_norm: 0.9999990412464353, iteration: 122392
loss: 0.9874796867370605,grad_norm: 0.8827723426078734, iteration: 122393
loss: 0.9851129651069641,grad_norm: 0.9977672446423329, iteration: 122394
loss: 0.9621537923812866,grad_norm: 0.9999990132083302, iteration: 122395
loss: 0.9781786799430847,grad_norm: 0.999999053301887, iteration: 122396
loss: 1.055590033531189,grad_norm: 0.9999993623934385, iteration: 122397
loss: 1.0205764770507812,grad_norm: 0.9999990673156046, iteration: 122398
loss: 1.0092930793762207,grad_norm: 0.9999993082986045, iteration: 122399
loss: 1.001502513885498,grad_norm: 0.8428980089440399, iteration: 122400
loss: 0.9954267740249634,grad_norm: 0.999999077951097, iteration: 122401
loss: 0.9799454212188721,grad_norm: 0.9040924216441258, iteration: 122402
loss: 1.0037481784820557,grad_norm: 0.9999998363110055, iteration: 122403
loss: 1.0032986402511597,grad_norm: 0.999999217804275, iteration: 122404
loss: 1.0088385343551636,grad_norm: 0.9999992217211746, iteration: 122405
loss: 0.9626618027687073,grad_norm: 0.9999991155634277, iteration: 122406
loss: 0.9908180236816406,grad_norm: 0.9418678790251036, iteration: 122407
loss: 1.0039639472961426,grad_norm: 0.8515616275940732, iteration: 122408
loss: 0.9856479167938232,grad_norm: 0.8838382710641978, iteration: 122409
loss: 0.9732692837715149,grad_norm: 0.9999991352326938, iteration: 122410
loss: 1.055206060409546,grad_norm: 0.9999993564831079, iteration: 122411
loss: 1.0114984512329102,grad_norm: 0.9999989983906282, iteration: 122412
loss: 1.048814296722412,grad_norm: 0.9999993579115948, iteration: 122413
loss: 0.9733340740203857,grad_norm: 0.9245335836775823, iteration: 122414
loss: 0.9829248785972595,grad_norm: 0.9999990864524003, iteration: 122415
loss: 1.0054622888565063,grad_norm: 0.9410863214351993, iteration: 122416
loss: 1.0019704103469849,grad_norm: 0.9999991580423317, iteration: 122417
loss: 1.008718729019165,grad_norm: 0.9999993118716174, iteration: 122418
loss: 0.9835834503173828,grad_norm: 0.9999992024181338, iteration: 122419
loss: 0.9752246737480164,grad_norm: 0.999999330129657, iteration: 122420
loss: 1.0603179931640625,grad_norm: 0.9999994816386752, iteration: 122421
loss: 0.9812650680541992,grad_norm: 0.9999990732694877, iteration: 122422
loss: 0.9908599853515625,grad_norm: 0.9999990815818447, iteration: 122423
loss: 1.0032379627227783,grad_norm: 0.9706747487298453, iteration: 122424
loss: 0.9939274787902832,grad_norm: 0.9999991416655344, iteration: 122425
loss: 1.0120692253112793,grad_norm: 0.999999110142968, iteration: 122426
loss: 0.9837560057640076,grad_norm: 0.9938594110139707, iteration: 122427
loss: 0.9876677989959717,grad_norm: 0.9999990584569264, iteration: 122428
loss: 0.9799023270606995,grad_norm: 0.9676466888653056, iteration: 122429
loss: 0.987155556678772,grad_norm: 0.8860858730397381, iteration: 122430
loss: 1.006365418434143,grad_norm: 0.9999991265888486, iteration: 122431
loss: 0.9723398089408875,grad_norm: 0.9484671846873898, iteration: 122432
loss: 1.0136947631835938,grad_norm: 0.9999990328963838, iteration: 122433
loss: 1.0044939517974854,grad_norm: 0.999999053887929, iteration: 122434
loss: 0.9945517778396606,grad_norm: 0.9999991859336643, iteration: 122435
loss: 0.9997532367706299,grad_norm: 0.9999990396569474, iteration: 122436
loss: 0.9959859848022461,grad_norm: 0.929608222404756, iteration: 122437
loss: 1.039720892906189,grad_norm: 0.9999995359910739, iteration: 122438
loss: 0.986537516117096,grad_norm: 0.9999990906817418, iteration: 122439
loss: 1.0435404777526855,grad_norm: 0.9999991132760426, iteration: 122440
loss: 0.9805890917778015,grad_norm: 0.997716815760572, iteration: 122441
loss: 0.9877638220787048,grad_norm: 0.9999991713709819, iteration: 122442
loss: 0.9964345097541809,grad_norm: 0.9999992075076849, iteration: 122443
loss: 0.9988531470298767,grad_norm: 0.9532322096657906, iteration: 122444
loss: 1.0039376020431519,grad_norm: 0.9999989665610824, iteration: 122445
loss: 0.9848960041999817,grad_norm: 0.9293295090580503, iteration: 122446
loss: 1.0136905908584595,grad_norm: 0.9758071750722297, iteration: 122447
loss: 1.0005520582199097,grad_norm: 0.9999992320869925, iteration: 122448
loss: 0.994906485080719,grad_norm: 0.9999989457904996, iteration: 122449
loss: 0.979092538356781,grad_norm: 0.9999990807052961, iteration: 122450
loss: 0.9777629375457764,grad_norm: 0.9538127591622166, iteration: 122451
loss: 1.0310900211334229,grad_norm: 0.9999990902328117, iteration: 122452
loss: 0.9734499454498291,grad_norm: 0.9999992776756106, iteration: 122453
loss: 1.033750295639038,grad_norm: 0.9999991243463625, iteration: 122454
loss: 0.9808841943740845,grad_norm: 0.9999990763144537, iteration: 122455
loss: 1.0185627937316895,grad_norm: 0.9999991989924742, iteration: 122456
loss: 0.9932284355163574,grad_norm: 0.9240496793406974, iteration: 122457
loss: 1.1244075298309326,grad_norm: 0.9999996758010767, iteration: 122458
loss: 1.0038974285125732,grad_norm: 0.9999992462231326, iteration: 122459
loss: 0.9832467436790466,grad_norm: 0.999999246357498, iteration: 122460
loss: 1.0007758140563965,grad_norm: 0.9999995442697479, iteration: 122461
loss: 1.0202735662460327,grad_norm: 0.9999992944859489, iteration: 122462
loss: 1.0174639225006104,grad_norm: 0.9806163602395002, iteration: 122463
loss: 1.0337806940078735,grad_norm: 0.9961315308752594, iteration: 122464
loss: 0.998961865901947,grad_norm: 0.9532598456870608, iteration: 122465
loss: 0.9705029726028442,grad_norm: 0.9999998779772149, iteration: 122466
loss: 1.0229449272155762,grad_norm: 0.9873495347781326, iteration: 122467
loss: 1.0164715051651,grad_norm: 0.9999991350204992, iteration: 122468
loss: 1.0245567560195923,grad_norm: 0.9999991175602755, iteration: 122469
loss: 1.0161727666854858,grad_norm: 0.9107505132282456, iteration: 122470
loss: 0.98909592628479,grad_norm: 0.9999991386048042, iteration: 122471
loss: 1.0197697877883911,grad_norm: 0.9666388128584122, iteration: 122472
loss: 1.018815040588379,grad_norm: 0.9999991929003642, iteration: 122473
loss: 0.9963623881340027,grad_norm: 0.9999991955982417, iteration: 122474
loss: 1.0483516454696655,grad_norm: 0.907885090024287, iteration: 122475
loss: 1.039530873298645,grad_norm: 0.9640072233819749, iteration: 122476
loss: 1.0102943181991577,grad_norm: 0.9999990630544493, iteration: 122477
loss: 0.9756929874420166,grad_norm: 0.9329902877291645, iteration: 122478
loss: 1.0575954914093018,grad_norm: 0.999999229715492, iteration: 122479
loss: 1.0040111541748047,grad_norm: 0.9999992145321094, iteration: 122480
loss: 1.0487909317016602,grad_norm: 0.9999998190002038, iteration: 122481
loss: 0.9392634630203247,grad_norm: 0.999999219784488, iteration: 122482
loss: 0.9850121140480042,grad_norm: 0.9999990394410998, iteration: 122483
loss: 0.9991323351860046,grad_norm: 0.9999991719273488, iteration: 122484
loss: 1.0026507377624512,grad_norm: 0.9999990667395174, iteration: 122485
loss: 1.0135831832885742,grad_norm: 0.8365608541677417, iteration: 122486
loss: 1.017747163772583,grad_norm: 0.9999991551097487, iteration: 122487
loss: 1.063199520111084,grad_norm: 0.9999998736036838, iteration: 122488
loss: 0.9790574908256531,grad_norm: 0.999998882152897, iteration: 122489
loss: 1.0608081817626953,grad_norm: 0.9999997728364706, iteration: 122490
loss: 0.9954859018325806,grad_norm: 0.9000703100539372, iteration: 122491
loss: 1.0242635011672974,grad_norm: 0.9999990498235412, iteration: 122492
loss: 0.990578293800354,grad_norm: 0.9679090363586965, iteration: 122493
loss: 1.0189778804779053,grad_norm: 0.9573128666535937, iteration: 122494
loss: 0.9831799864768982,grad_norm: 0.9999993190286558, iteration: 122495
loss: 0.994781494140625,grad_norm: 0.9999992664976238, iteration: 122496
loss: 1.0391613245010376,grad_norm: 0.9999999182134193, iteration: 122497
loss: 1.0604573488235474,grad_norm: 0.9594851635652688, iteration: 122498
loss: 1.000031590461731,grad_norm: 0.9924105904190372, iteration: 122499
loss: 1.0481947660446167,grad_norm: 0.9999994542464824, iteration: 122500
loss: 0.9843010902404785,grad_norm: 0.9999992623044966, iteration: 122501
loss: 1.1578437089920044,grad_norm: 0.9999992040903479, iteration: 122502
loss: 0.9902446866035461,grad_norm: 0.7864802099378034, iteration: 122503
loss: 1.0029032230377197,grad_norm: 0.9999993832016434, iteration: 122504
loss: 0.9814178347587585,grad_norm: 0.9617016427827076, iteration: 122505
loss: 1.1777012348175049,grad_norm: 1.000000000622242, iteration: 122506
loss: 1.0148956775665283,grad_norm: 0.9999992296381631, iteration: 122507
loss: 1.0766464471817017,grad_norm: 0.9999998579519407, iteration: 122508
loss: 1.0097397565841675,grad_norm: 0.9999992820127518, iteration: 122509
loss: 1.0348889827728271,grad_norm: 0.9999992984792637, iteration: 122510
loss: 1.1707452535629272,grad_norm: 0.9999993942690045, iteration: 122511
loss: 1.012776255607605,grad_norm: 0.999999032827468, iteration: 122512
loss: 1.2230224609375,grad_norm: 0.9999999572232416, iteration: 122513
loss: 1.0883662700653076,grad_norm: 0.9999998183500168, iteration: 122514
loss: 1.380827784538269,grad_norm: 0.9999999743726813, iteration: 122515
loss: 1.7915217876434326,grad_norm: 0.9999999811099746, iteration: 122516
loss: 1.2228567600250244,grad_norm: 0.9999992817787255, iteration: 122517
loss: 1.5256140232086182,grad_norm: 0.9999996617352304, iteration: 122518
loss: 1.0641162395477295,grad_norm: 0.9999992691955837, iteration: 122519
loss: 1.2857074737548828,grad_norm: 0.9999998227961248, iteration: 122520
loss: 1.84030020236969,grad_norm: 0.9999998597293422, iteration: 122521
loss: 1.1775466203689575,grad_norm: 0.9999998364015152, iteration: 122522
loss: 1.362945318222046,grad_norm: 0.9999998237019532, iteration: 122523
loss: 1.032905101776123,grad_norm: 0.9999999354876128, iteration: 122524
loss: 1.39678955078125,grad_norm: 0.9999996365065156, iteration: 122525
loss: 1.1111083030700684,grad_norm: 0.9999993925506651, iteration: 122526
loss: 1.3546733856201172,grad_norm: 0.9999998480527001, iteration: 122527
loss: 1.0956579446792603,grad_norm: 0.9999996108123195, iteration: 122528
loss: 1.5732429027557373,grad_norm: 0.9999997962525895, iteration: 122529
loss: 1.2830140590667725,grad_norm: 0.9999999045659308, iteration: 122530
loss: 1.0622612237930298,grad_norm: 0.9999994893258574, iteration: 122531
loss: 1.202052116394043,grad_norm: 0.9999998451679859, iteration: 122532
loss: 1.2372660636901855,grad_norm: 0.9999998694053186, iteration: 122533
loss: 1.1082327365875244,grad_norm: 0.9999995300472804, iteration: 122534
loss: 1.2055178880691528,grad_norm: 0.9999998658934344, iteration: 122535
loss: 1.1591330766677856,grad_norm: 0.999999954326364, iteration: 122536
loss: 1.425137996673584,grad_norm: 0.9999999363734773, iteration: 122537
loss: 1.1077052354812622,grad_norm: 0.9999997586094771, iteration: 122538
loss: 1.1377251148223877,grad_norm: 0.9999998385484185, iteration: 122539
loss: 1.4595087766647339,grad_norm: 0.9999998859789528, iteration: 122540
loss: 1.1197789907455444,grad_norm: 0.999999187669174, iteration: 122541
loss: 1.2472009658813477,grad_norm: 0.9999998650352268, iteration: 122542
loss: 1.4472029209136963,grad_norm: 0.9999999753100238, iteration: 122543
loss: 1.1262154579162598,grad_norm: 0.9999998546020212, iteration: 122544
loss: 1.1960010528564453,grad_norm: 0.9999999205101938, iteration: 122545
loss: 1.0592237710952759,grad_norm: 0.9999999462197257, iteration: 122546
loss: 1.4790785312652588,grad_norm: 1.0000000441865704, iteration: 122547
loss: 1.211223840713501,grad_norm: 0.9999999404117244, iteration: 122548
loss: 1.1907240152359009,grad_norm: 1.0000000672507414, iteration: 122549
loss: 1.2000524997711182,grad_norm: 0.9999998419495512, iteration: 122550
loss: 1.5037143230438232,grad_norm: 0.9999999236529679, iteration: 122551
loss: 1.1664363145828247,grad_norm: 0.9999997236472133, iteration: 122552
loss: 1.3841814994812012,grad_norm: 0.9999999167953596, iteration: 122553
loss: 1.2757405042648315,grad_norm: 0.9999999562082068, iteration: 122554
loss: 1.4112050533294678,grad_norm: 0.9999999917190912, iteration: 122555
loss: 1.2987900972366333,grad_norm: 0.9999998237259906, iteration: 122556
loss: 1.4680911302566528,grad_norm: 0.9999999136120575, iteration: 122557
loss: 1.624147653579712,grad_norm: 1.0000000235878832, iteration: 122558
loss: 1.2289224863052368,grad_norm: 0.9999999253236147, iteration: 122559
loss: 1.259550929069519,grad_norm: 0.9999999953086496, iteration: 122560
loss: 1.2828842401504517,grad_norm: 0.9999998942319226, iteration: 122561
loss: 1.6735265254974365,grad_norm: 0.9999999878111445, iteration: 122562
loss: 1.5421775579452515,grad_norm: 0.9999999665019218, iteration: 122563
loss: 1.1231430768966675,grad_norm: 0.999999559694577, iteration: 122564
loss: 1.6776210069656372,grad_norm: 0.9999998707849054, iteration: 122565
loss: 1.394344687461853,grad_norm: 0.9999999022829771, iteration: 122566
loss: 1.5055686235427856,grad_norm: 1.0000000098854605, iteration: 122567
loss: 1.5628571510314941,grad_norm: 0.9999999427785368, iteration: 122568
loss: 1.37936270236969,grad_norm: 1.0000000284584236, iteration: 122569
loss: 1.4929202795028687,grad_norm: 0.9999999249075358, iteration: 122570
loss: 1.420640230178833,grad_norm: 0.9999999306451279, iteration: 122571
loss: 1.7487232685089111,grad_norm: 0.9999998319154912, iteration: 122572
loss: 1.4073923826217651,grad_norm: 0.9999998579687417, iteration: 122573
loss: 1.4009654521942139,grad_norm: 0.9999998894867616, iteration: 122574
loss: 1.2932069301605225,grad_norm: 0.9999999236011262, iteration: 122575
loss: 1.7766832113265991,grad_norm: 0.9999998719755794, iteration: 122576
loss: 1.3979960680007935,grad_norm: 0.9999999247638299, iteration: 122577
loss: 1.3798537254333496,grad_norm: 0.9999998063580644, iteration: 122578
loss: 1.3265244960784912,grad_norm: 0.9999999844195204, iteration: 122579
loss: 1.5231146812438965,grad_norm: 0.99999995372241, iteration: 122580
loss: 1.6583400964736938,grad_norm: 0.9999998683371332, iteration: 122581
loss: 1.5432850122451782,grad_norm: 1.0000000461197238, iteration: 122582
loss: 2.2019989490509033,grad_norm: 1.0000000611136335, iteration: 122583
loss: 1.7428802251815796,grad_norm: 1.0000000796361521, iteration: 122584
loss: 1.9740217924118042,grad_norm: 0.999999968375671, iteration: 122585
loss: 1.6937205791473389,grad_norm: 0.9999999613174624, iteration: 122586
loss: 1.8021756410598755,grad_norm: 1.0000000287684503, iteration: 122587
loss: 1.7744272947311401,grad_norm: 0.9999999678423229, iteration: 122588
loss: 1.5817155838012695,grad_norm: 0.9999999810865898, iteration: 122589
loss: 2.089111566543579,grad_norm: 0.9999999280231854, iteration: 122590
loss: 1.6805399656295776,grad_norm: 0.9999999429481554, iteration: 122591
loss: 1.9207793474197388,grad_norm: 0.999999687813197, iteration: 122592
loss: 1.8095550537109375,grad_norm: 0.9999998137889996, iteration: 122593
loss: 1.6671040058135986,grad_norm: 0.9999998381102106, iteration: 122594
loss: 1.6263067722320557,grad_norm: 0.9999998892720505, iteration: 122595
loss: 1.5047286748886108,grad_norm: 1.0000000183877251, iteration: 122596
loss: 1.4204379320144653,grad_norm: 0.9999998701397006, iteration: 122597
loss: 1.6323702335357666,grad_norm: 0.9999999570046745, iteration: 122598
loss: 1.3702372312545776,grad_norm: 0.9999999040255894, iteration: 122599
loss: 1.44613778591156,grad_norm: 0.9999999176262355, iteration: 122600
loss: 1.4031344652175903,grad_norm: 0.9999999799265978, iteration: 122601
loss: 1.4095730781555176,grad_norm: 0.9999998785816689, iteration: 122602
loss: 1.2897218465805054,grad_norm: 0.999999861020846, iteration: 122603
loss: 1.3217754364013672,grad_norm: 0.9999999127300061, iteration: 122604
loss: 1.4989413022994995,grad_norm: 0.9999998910017096, iteration: 122605
loss: 1.1863844394683838,grad_norm: 0.9999998456683518, iteration: 122606
loss: 1.2172931432724,grad_norm: 0.9999995965737304, iteration: 122607
loss: 1.2727338075637817,grad_norm: 0.99999998176372, iteration: 122608
loss: 1.37186598777771,grad_norm: 0.9999999382084953, iteration: 122609
loss: 1.2233211994171143,grad_norm: 0.9999994103241027, iteration: 122610
loss: 1.569784164428711,grad_norm: 0.9999998141007221, iteration: 122611
loss: 1.1925495862960815,grad_norm: 0.9999998584921707, iteration: 122612
loss: 1.1429252624511719,grad_norm: 0.9999998897373223, iteration: 122613
loss: 1.2099330425262451,grad_norm: 0.9999997316880005, iteration: 122614
loss: 0.9923451542854309,grad_norm: 0.9999994962179835, iteration: 122615
loss: 1.2603827714920044,grad_norm: 0.9999997970316896, iteration: 122616
loss: 1.1230298280715942,grad_norm: 0.9999996570038023, iteration: 122617
loss: 1.1601507663726807,grad_norm: 0.99999979536772, iteration: 122618
loss: 1.1871083974838257,grad_norm: 0.9999999095311639, iteration: 122619
loss: 1.248779535293579,grad_norm: 0.99999986341461, iteration: 122620
loss: 1.3935173749923706,grad_norm: 0.9999999674102442, iteration: 122621
loss: 1.303430199623108,grad_norm: 0.9999994903103483, iteration: 122622
loss: 1.161646842956543,grad_norm: 0.9999996885563402, iteration: 122623
loss: 1.6589953899383545,grad_norm: 0.9999998581434253, iteration: 122624
loss: 1.328595519065857,grad_norm: 0.999999909655849, iteration: 122625
loss: 1.1039206981658936,grad_norm: 0.9999997862522079, iteration: 122626
loss: 1.0870214700698853,grad_norm: 0.9999999539369282, iteration: 122627
loss: 1.072465419769287,grad_norm: 0.9999993046409292, iteration: 122628
loss: 1.1164153814315796,grad_norm: 0.9999995609029955, iteration: 122629
loss: 1.2689052820205688,grad_norm: 0.9999997989290559, iteration: 122630
loss: 1.2461133003234863,grad_norm: 0.999999870242184, iteration: 122631
loss: 1.2423186302185059,grad_norm: 0.9999999215831749, iteration: 122632
loss: 1.1977944374084473,grad_norm: 0.9999997230505989, iteration: 122633
loss: 1.0752263069152832,grad_norm: 0.999999476034966, iteration: 122634
loss: 1.0685092210769653,grad_norm: 0.9999997114022907, iteration: 122635
loss: 1.142654538154602,grad_norm: 1.0000000082079836, iteration: 122636
loss: 1.18707275390625,grad_norm: 0.9999997826846297, iteration: 122637
loss: 1.093785047531128,grad_norm: 0.9999998085560041, iteration: 122638
loss: 1.2490506172180176,grad_norm: 0.9999997255445757, iteration: 122639
loss: 1.3731964826583862,grad_norm: 1.0000000581098167, iteration: 122640
loss: 1.1906569004058838,grad_norm: 0.9999998751264928, iteration: 122641
loss: 1.0179756879806519,grad_norm: 0.9155653430917352, iteration: 122642
loss: 1.2154810428619385,grad_norm: 0.9999998736523166, iteration: 122643
loss: 1.0547012090682983,grad_norm: 0.9999996540788291, iteration: 122644
loss: 1.0627256631851196,grad_norm: 0.9303717058241929, iteration: 122645
loss: 1.0493029356002808,grad_norm: 0.9999998727826228, iteration: 122646
loss: 1.0276143550872803,grad_norm: 0.9999998529397991, iteration: 122647
loss: 0.9855385422706604,grad_norm: 0.9999997596871685, iteration: 122648
loss: 1.036295771598816,grad_norm: 0.9999993253037218, iteration: 122649
loss: 1.1067397594451904,grad_norm: 0.999999977455858, iteration: 122650
loss: 1.1402531862258911,grad_norm: 0.9999997285579343, iteration: 122651
loss: 1.0068986415863037,grad_norm: 0.9999997729899147, iteration: 122652
loss: 1.0052639245986938,grad_norm: 0.9999995391925468, iteration: 122653
loss: 0.9869543313980103,grad_norm: 0.9346262496647529, iteration: 122654
loss: 1.2056052684783936,grad_norm: 1.0000000969893796, iteration: 122655
loss: 1.0287529230117798,grad_norm: 0.9999991089301186, iteration: 122656
loss: 0.9812920689582825,grad_norm: 0.9999990996360636, iteration: 122657
loss: 1.0216788053512573,grad_norm: 0.9999997263899526, iteration: 122658
loss: 1.020566463470459,grad_norm: 0.9999991168737199, iteration: 122659
loss: 1.0281800031661987,grad_norm: 0.9999993144660321, iteration: 122660
loss: 1.001298189163208,grad_norm: 0.9999993264884446, iteration: 122661
loss: 1.1773407459259033,grad_norm: 0.9999999898584507, iteration: 122662
loss: 1.0649374723434448,grad_norm: 0.9999991722301739, iteration: 122663
loss: 1.1070280075073242,grad_norm: 0.9999996732148659, iteration: 122664
loss: 1.0228990316390991,grad_norm: 0.9999997178027852, iteration: 122665
loss: 1.0435036420822144,grad_norm: 0.9999995710920627, iteration: 122666
loss: 1.0423715114593506,grad_norm: 0.9999998947376352, iteration: 122667
loss: 1.2061126232147217,grad_norm: 0.9999999982710626, iteration: 122668
loss: 0.9762405157089233,grad_norm: 0.9787048628001697, iteration: 122669
loss: 0.9990518689155579,grad_norm: 0.9999992141318177, iteration: 122670
loss: 1.0184053182601929,grad_norm: 0.9999994177036935, iteration: 122671
loss: 1.2774235010147095,grad_norm: 0.9999999388730575, iteration: 122672
loss: 1.0022286176681519,grad_norm: 0.9999994639778736, iteration: 122673
loss: 1.0492963790893555,grad_norm: 0.9999992476293132, iteration: 122674
loss: 1.0822765827178955,grad_norm: 0.9999994064462195, iteration: 122675
loss: 1.1084370613098145,grad_norm: 0.9999998648436002, iteration: 122676
loss: 1.0855803489685059,grad_norm: 0.9999998300158857, iteration: 122677
loss: 0.9651706218719482,grad_norm: 0.9999993742619125, iteration: 122678
loss: 1.007405400276184,grad_norm: 0.9999994734378276, iteration: 122679
loss: 0.999800443649292,grad_norm: 0.9999992306607581, iteration: 122680
loss: 0.9920521974563599,grad_norm: 0.999999217242586, iteration: 122681
loss: 1.0422886610031128,grad_norm: 0.999999688583187, iteration: 122682
loss: 1.0509620904922485,grad_norm: 0.9999995769094043, iteration: 122683
loss: 1.025904893875122,grad_norm: 0.999999339703479, iteration: 122684
loss: 1.006811499595642,grad_norm: 0.9999992810660516, iteration: 122685
loss: 1.1303945779800415,grad_norm: 0.9999997641989486, iteration: 122686
loss: 1.117876648902893,grad_norm: 0.9999992201138427, iteration: 122687
loss: 0.9950418472290039,grad_norm: 0.9999992307392707, iteration: 122688
loss: 1.1312891244888306,grad_norm: 0.9999991722657078, iteration: 122689
loss: 1.0500515699386597,grad_norm: 0.9999997080358587, iteration: 122690
loss: 1.171357274055481,grad_norm: 0.9999998136987759, iteration: 122691
loss: 1.0485732555389404,grad_norm: 0.9999996860395834, iteration: 122692
loss: 1.0626064538955688,grad_norm: 0.9999991829252536, iteration: 122693
loss: 1.017587661743164,grad_norm: 0.9200249581193367, iteration: 122694
loss: 1.1635342836380005,grad_norm: 0.9999997865138167, iteration: 122695
loss: 1.0173707008361816,grad_norm: 0.9999994613053809, iteration: 122696
loss: 1.0263205766677856,grad_norm: 0.9999997998460675, iteration: 122697
loss: 1.2138959169387817,grad_norm: 0.9999994703710968, iteration: 122698
loss: 1.0443110466003418,grad_norm: 0.9999999924797299, iteration: 122699
loss: 0.9832686185836792,grad_norm: 0.999999200979527, iteration: 122700
loss: 0.9904546141624451,grad_norm: 0.9999992662240544, iteration: 122701
loss: 1.1393353939056396,grad_norm: 0.9999999043267775, iteration: 122702
loss: 1.0476843118667603,grad_norm: 0.9999997761838603, iteration: 122703
loss: 1.0993024110794067,grad_norm: 0.9999998810094903, iteration: 122704
loss: 1.0120757818222046,grad_norm: 0.9434913866666973, iteration: 122705
loss: 1.0926610231399536,grad_norm: 0.9999993518595667, iteration: 122706
loss: 1.0245689153671265,grad_norm: 0.9999991056088403, iteration: 122707
loss: 1.1492716073989868,grad_norm: 0.9999992817703898, iteration: 122708
loss: 1.0768229961395264,grad_norm: 0.9999998579475781, iteration: 122709
loss: 1.0541542768478394,grad_norm: 0.9999998885334883, iteration: 122710
loss: 0.9929182529449463,grad_norm: 0.9999992406604478, iteration: 122711
loss: 0.9659629464149475,grad_norm: 0.9999990964032894, iteration: 122712
loss: 1.0414729118347168,grad_norm: 0.9999998926461272, iteration: 122713
loss: 1.150657057762146,grad_norm: 0.9999999183848342, iteration: 122714
loss: 1.0688585042953491,grad_norm: 0.9999997343318491, iteration: 122715
loss: 0.9867130517959595,grad_norm: 0.999999256655803, iteration: 122716
loss: 1.0015524625778198,grad_norm: 0.9999989964714697, iteration: 122717
loss: 1.163603663444519,grad_norm: 0.9999998280702664, iteration: 122718
loss: 0.9824110865592957,grad_norm: 0.9999991747538325, iteration: 122719
loss: 0.9661471843719482,grad_norm: 0.9999992924207012, iteration: 122720
loss: 1.0145137310028076,grad_norm: 0.999999258289474, iteration: 122721
loss: 0.9796121716499329,grad_norm: 0.999999224842516, iteration: 122722
loss: 1.0019283294677734,grad_norm: 0.9999993642716071, iteration: 122723
loss: 1.0314496755599976,grad_norm: 0.9999992959027848, iteration: 122724
loss: 1.130824089050293,grad_norm: 0.9999994662218802, iteration: 122725
loss: 1.0380085706710815,grad_norm: 0.9999991993948489, iteration: 122726
loss: 1.1697932481765747,grad_norm: 0.9999999576978581, iteration: 122727
loss: 0.9849165081977844,grad_norm: 0.9206513873672378, iteration: 122728
loss: 1.0556166172027588,grad_norm: 0.9999999024330954, iteration: 122729
loss: 1.0120521783828735,grad_norm: 0.999999500744571, iteration: 122730
loss: 1.0791739225387573,grad_norm: 0.9999992007106294, iteration: 122731
loss: 1.0224981307983398,grad_norm: 0.9999994238474342, iteration: 122732
loss: 1.0356439352035522,grad_norm: 0.9999995820595895, iteration: 122733
loss: 0.9688964486122131,grad_norm: 0.9999992361686358, iteration: 122734
loss: 1.080505609512329,grad_norm: 0.9999993558416297, iteration: 122735
loss: 1.0195738077163696,grad_norm: 0.9254152319488449, iteration: 122736
loss: 0.989711582660675,grad_norm: 0.9999990566463608, iteration: 122737
loss: 1.127640724182129,grad_norm: 0.9999993122409684, iteration: 122738
loss: 1.0162245035171509,grad_norm: 0.999999408738231, iteration: 122739
loss: 0.9995049238204956,grad_norm: 0.999999156906767, iteration: 122740
loss: 1.2081472873687744,grad_norm: 0.9999996221138501, iteration: 122741
loss: 1.0087875127792358,grad_norm: 0.9999994237580355, iteration: 122742
loss: 1.0771199464797974,grad_norm: 0.9999992752794766, iteration: 122743
loss: 1.0196163654327393,grad_norm: 0.9999991443539995, iteration: 122744
loss: 1.0773948431015015,grad_norm: 0.999999713161662, iteration: 122745
loss: 1.0417894124984741,grad_norm: 0.9999992008888752, iteration: 122746
loss: 1.1031324863433838,grad_norm: 0.9999998690477733, iteration: 122747
loss: 1.0160486698150635,grad_norm: 0.8652342099716624, iteration: 122748
loss: 1.0373516082763672,grad_norm: 0.999999205502795, iteration: 122749
loss: 1.0704561471939087,grad_norm: 0.9999990297254266, iteration: 122750
loss: 1.0432718992233276,grad_norm: 0.9999997514662199, iteration: 122751
loss: 1.250301718711853,grad_norm: 0.9999999300576516, iteration: 122752
loss: 1.1642245054244995,grad_norm: 1.0000000291918005, iteration: 122753
loss: 0.9798930883407593,grad_norm: 0.9972360828256549, iteration: 122754
loss: 1.0288172960281372,grad_norm: 0.999999097750517, iteration: 122755
loss: 1.0032576322555542,grad_norm: 0.9659285076407514, iteration: 122756
loss: 1.0463480949401855,grad_norm: 0.9999992655434417, iteration: 122757
loss: 1.025534749031067,grad_norm: 0.9999991012285793, iteration: 122758
loss: 1.0074303150177002,grad_norm: 0.999999152451598, iteration: 122759
loss: 1.089504599571228,grad_norm: 0.9999993508191977, iteration: 122760
loss: 1.2835265398025513,grad_norm: 0.9999994663048297, iteration: 122761
loss: 1.009401798248291,grad_norm: 0.9999993419144135, iteration: 122762
loss: 1.1065733432769775,grad_norm: 0.9999997026799581, iteration: 122763
loss: 1.252078890800476,grad_norm: 0.9999997745967799, iteration: 122764
loss: 0.9832168221473694,grad_norm: 0.9999990940726717, iteration: 122765
loss: 1.0834991931915283,grad_norm: 0.9999992861819647, iteration: 122766
loss: 0.9967359900474548,grad_norm: 0.9999993576719756, iteration: 122767
loss: 0.9952675104141235,grad_norm: 0.9999990562556632, iteration: 122768
loss: 1.0700013637542725,grad_norm: 0.9999991829972529, iteration: 122769
loss: 1.1219007968902588,grad_norm: 0.9999992363026271, iteration: 122770
loss: 1.2852269411087036,grad_norm: 0.9999998410679278, iteration: 122771
loss: 1.003159523010254,grad_norm: 0.9173393291753142, iteration: 122772
loss: 1.053135633468628,grad_norm: 0.9999996134640111, iteration: 122773
loss: 1.028674840927124,grad_norm: 0.9999991879267693, iteration: 122774
loss: 1.0773972272872925,grad_norm: 0.9999991897581693, iteration: 122775
loss: 1.0294193029403687,grad_norm: 0.9999990912787055, iteration: 122776
loss: 1.2309658527374268,grad_norm: 0.9999999028633539, iteration: 122777
loss: 1.0204671621322632,grad_norm: 0.9999990916835911, iteration: 122778
loss: 1.0141541957855225,grad_norm: 0.999999194369951, iteration: 122779
loss: 1.0117615461349487,grad_norm: 0.9828090304706104, iteration: 122780
loss: 1.0268844366073608,grad_norm: 0.9999990613814623, iteration: 122781
loss: 1.1089215278625488,grad_norm: 0.9999995520938979, iteration: 122782
loss: 1.0066157579421997,grad_norm: 0.9999990554717162, iteration: 122783
loss: 1.0525745153427124,grad_norm: 0.902448724360528, iteration: 122784
loss: 1.025675654411316,grad_norm: 0.8957655675342767, iteration: 122785
loss: 1.0183912515640259,grad_norm: 0.9389743276694325, iteration: 122786
loss: 1.0470492839813232,grad_norm: 0.9999992126166369, iteration: 122787
loss: 1.1483941078186035,grad_norm: 0.999999589219731, iteration: 122788
loss: 1.0170820951461792,grad_norm: 0.9999992039971181, iteration: 122789
loss: 1.048741102218628,grad_norm: 0.9999991396931647, iteration: 122790
loss: 1.1368398666381836,grad_norm: 0.9999991555743565, iteration: 122791
loss: 1.0246435403823853,grad_norm: 0.999999043357873, iteration: 122792
loss: 1.0211610794067383,grad_norm: 0.999999138101972, iteration: 122793
loss: 1.004172444343567,grad_norm: 0.9999992034340779, iteration: 122794
loss: 1.0350509881973267,grad_norm: 0.9999995941741868, iteration: 122795
loss: 1.1004005670547485,grad_norm: 0.9999992404106863, iteration: 122796
loss: 1.0251327753067017,grad_norm: 0.9999990915300463, iteration: 122797
loss: 1.015221357345581,grad_norm: 0.9999996259156451, iteration: 122798
loss: 0.973006010055542,grad_norm: 0.9999992952397605, iteration: 122799
loss: 1.0234452486038208,grad_norm: 0.9999998288253745, iteration: 122800
loss: 1.3710546493530273,grad_norm: 0.9999994262005021, iteration: 122801
loss: 1.0480726957321167,grad_norm: 0.9999999376369297, iteration: 122802
loss: 1.0219768285751343,grad_norm: 0.9999991456241534, iteration: 122803
loss: 1.0437378883361816,grad_norm: 0.9999995660495455, iteration: 122804
loss: 1.091848611831665,grad_norm: 0.9999998872558535, iteration: 122805
loss: 1.0040947198867798,grad_norm: 0.9999991325586735, iteration: 122806
loss: 0.9884718060493469,grad_norm: 0.999999301604649, iteration: 122807
loss: 1.0122168064117432,grad_norm: 0.9999991737309366, iteration: 122808
loss: 0.9944730997085571,grad_norm: 0.999999255158588, iteration: 122809
loss: 0.9862294793128967,grad_norm: 0.9999990875768547, iteration: 122810
loss: 1.0295360088348389,grad_norm: 0.9999998040865521, iteration: 122811
loss: 1.0987907648086548,grad_norm: 0.9999999465910054, iteration: 122812
loss: 1.0458639860153198,grad_norm: 0.9999999365641581, iteration: 122813
loss: 1.0072978734970093,grad_norm: 0.9999990432219554, iteration: 122814
loss: 0.9570273756980896,grad_norm: 0.9624535990230924, iteration: 122815
loss: 0.9650177955627441,grad_norm: 0.911245686278182, iteration: 122816
loss: 1.024316430091858,grad_norm: 0.8754560636440613, iteration: 122817
loss: 1.0250905752182007,grad_norm: 0.9999997072757825, iteration: 122818
loss: 0.9940778017044067,grad_norm: 0.9390206955685885, iteration: 122819
loss: 1.0050450563430786,grad_norm: 0.8805094104875444, iteration: 122820
loss: 0.996455729007721,grad_norm: 0.952871832791888, iteration: 122821
loss: 0.9903864860534668,grad_norm: 0.9999992785182945, iteration: 122822
loss: 1.0242305994033813,grad_norm: 0.9999992310813554, iteration: 122823
loss: 1.002801775932312,grad_norm: 0.9999991692092883, iteration: 122824
loss: 0.9924004077911377,grad_norm: 0.999999199370773, iteration: 122825
loss: 1.016000509262085,grad_norm: 0.9999994661860718, iteration: 122826
loss: 1.074611783027649,grad_norm: 0.9999991060355821, iteration: 122827
loss: 1.029874563217163,grad_norm: 0.9878325035301377, iteration: 122828
loss: 1.0827611684799194,grad_norm: 0.9999997119845626, iteration: 122829
loss: 1.0347962379455566,grad_norm: 0.9999991224602848, iteration: 122830
loss: 1.0115021467208862,grad_norm: 0.99999901923402, iteration: 122831
loss: 0.993061363697052,grad_norm: 0.9445478621942408, iteration: 122832
loss: 0.9853686690330505,grad_norm: 0.999998940057765, iteration: 122833
loss: 0.9795939922332764,grad_norm: 0.9999998718193086, iteration: 122834
loss: 1.0331679582595825,grad_norm: 0.999999357536291, iteration: 122835
loss: 0.989932656288147,grad_norm: 0.9232841316132008, iteration: 122836
loss: 1.025884985923767,grad_norm: 0.9003348442142315, iteration: 122837
loss: 1.0379228591918945,grad_norm: 0.9999993355077158, iteration: 122838
loss: 1.026107907295227,grad_norm: 0.9999992089005003, iteration: 122839
loss: 0.9780113101005554,grad_norm: 0.9999991473269592, iteration: 122840
loss: 0.9599683880805969,grad_norm: 0.9999990644752296, iteration: 122841
loss: 1.077327847480774,grad_norm: 0.9999991910946803, iteration: 122842
loss: 1.0169479846954346,grad_norm: 0.9999991528513287, iteration: 122843
loss: 1.0083906650543213,grad_norm: 0.9999992397145624, iteration: 122844
loss: 1.0525314807891846,grad_norm: 0.9999992271262724, iteration: 122845
loss: 1.0048538446426392,grad_norm: 0.999999085785554, iteration: 122846
loss: 1.0229871273040771,grad_norm: 0.9999991932798769, iteration: 122847
loss: 0.9832361936569214,grad_norm: 0.9168048318799034, iteration: 122848
loss: 0.9657213091850281,grad_norm: 0.9999991216640904, iteration: 122849
loss: 1.0366228818893433,grad_norm: 0.999999854728724, iteration: 122850
loss: 0.9921879768371582,grad_norm: 0.9999991129680694, iteration: 122851
loss: 1.0238772630691528,grad_norm: 0.9328604766205392, iteration: 122852
loss: 1.3355799913406372,grad_norm: 0.9999993229970198, iteration: 122853
loss: 1.0813971757888794,grad_norm: 0.9999992006756951, iteration: 122854
loss: 1.039628505706787,grad_norm: 0.9999990794414156, iteration: 122855
loss: 1.0800423622131348,grad_norm: 0.9999998182888891, iteration: 122856
loss: 0.979059636592865,grad_norm: 0.9999991302222103, iteration: 122857
loss: 0.9924411177635193,grad_norm: 0.8911050703871769, iteration: 122858
loss: 1.1064969301223755,grad_norm: 0.9999998779550892, iteration: 122859
loss: 1.0256447792053223,grad_norm: 0.9303496488570722, iteration: 122860
loss: 1.011541724205017,grad_norm: 0.9999992156330039, iteration: 122861
loss: 1.0197120904922485,grad_norm: 0.8977967232019908, iteration: 122862
loss: 1.003478765487671,grad_norm: 0.9999994099478754, iteration: 122863
loss: 1.0940136909484863,grad_norm: 0.9999990004581694, iteration: 122864
loss: 0.9930754899978638,grad_norm: 0.9541333236983415, iteration: 122865
loss: 1.022957444190979,grad_norm: 0.9999992030708154, iteration: 122866
loss: 1.0186667442321777,grad_norm: 0.9999988731414283, iteration: 122867
loss: 1.0357917547225952,grad_norm: 0.9999991163966688, iteration: 122868
loss: 1.0833035707473755,grad_norm: 1.000000029388765, iteration: 122869
loss: 1.0182825326919556,grad_norm: 0.9999989945561697, iteration: 122870
loss: 0.9897336363792419,grad_norm: 0.9999991681767562, iteration: 122871
loss: 1.047423243522644,grad_norm: 0.9999999507153388, iteration: 122872
loss: 1.0643024444580078,grad_norm: 0.9999994462514123, iteration: 122873
loss: 0.9836328625679016,grad_norm: 0.9718413999136931, iteration: 122874
loss: 0.966126561164856,grad_norm: 0.9999990539196769, iteration: 122875
loss: 1.0466880798339844,grad_norm: 0.9999991701427382, iteration: 122876
loss: 1.0125705003738403,grad_norm: 0.9999989308022396, iteration: 122877
loss: 1.0497846603393555,grad_norm: 0.9999991989296995, iteration: 122878
loss: 1.036060094833374,grad_norm: 0.9999996924823863, iteration: 122879
loss: 1.0165053606033325,grad_norm: 0.9378998218365732, iteration: 122880
loss: 1.0606218576431274,grad_norm: 0.9999992209000528, iteration: 122881
loss: 1.010424017906189,grad_norm: 0.9581865792326623, iteration: 122882
loss: 1.0039304494857788,grad_norm: 0.9999995050615783, iteration: 122883
loss: 1.0909814834594727,grad_norm: 0.9999998714455046, iteration: 122884
loss: 1.0227673053741455,grad_norm: 0.9890226692984113, iteration: 122885
loss: 1.0171396732330322,grad_norm: 0.9999995019489135, iteration: 122886
loss: 1.4177136421203613,grad_norm: 0.9999994087220863, iteration: 122887
loss: 0.9547004699707031,grad_norm: 0.8401965842819296, iteration: 122888
loss: 1.0361392498016357,grad_norm: 0.9999992606456585, iteration: 122889
loss: 1.01980459690094,grad_norm: 0.9999991998845781, iteration: 122890
loss: 0.995037853717804,grad_norm: 0.9999992560924308, iteration: 122891
loss: 1.0461817979812622,grad_norm: 0.9999993146045769, iteration: 122892
loss: 1.0347248315811157,grad_norm: 0.9999990865595083, iteration: 122893
loss: 1.1174520254135132,grad_norm: 0.999999564040349, iteration: 122894
loss: 1.017893671989441,grad_norm: 0.9999990639288493, iteration: 122895
loss: 1.1229387521743774,grad_norm: 0.9999992005647075, iteration: 122896
loss: 0.9757370948791504,grad_norm: 0.9999990997150712, iteration: 122897
loss: 1.0371448993682861,grad_norm: 0.9999992919993006, iteration: 122898
loss: 1.0453635454177856,grad_norm: 0.9999994624732337, iteration: 122899
loss: 1.1100331544876099,grad_norm: 0.9999995638649575, iteration: 122900
loss: 1.113111972808838,grad_norm: 0.999999538688194, iteration: 122901
loss: 1.0451761484146118,grad_norm: 0.9999996813589862, iteration: 122902
loss: 0.9548481702804565,grad_norm: 0.9331378913467603, iteration: 122903
loss: 1.1012294292449951,grad_norm: 0.9999992219655972, iteration: 122904
loss: 1.073087453842163,grad_norm: 0.9999992641057931, iteration: 122905
loss: 1.0103336572647095,grad_norm: 0.9229311644676944, iteration: 122906
loss: 1.0025020837783813,grad_norm: 0.9999993396894004, iteration: 122907
loss: 0.9961864352226257,grad_norm: 0.999999064584329, iteration: 122908
loss: 1.026615023612976,grad_norm: 0.9999992622814943, iteration: 122909
loss: 0.9660065770149231,grad_norm: 0.9999989721539784, iteration: 122910
loss: 1.04051673412323,grad_norm: 1.0000000102575415, iteration: 122911
loss: 0.9978163838386536,grad_norm: 0.9999992005982788, iteration: 122912
loss: 1.0614197254180908,grad_norm: 0.9999991834065249, iteration: 122913
loss: 1.0002919435501099,grad_norm: 0.9999990701964119, iteration: 122914
loss: 0.9973852634429932,grad_norm: 0.9999991572346588, iteration: 122915
loss: 1.0727958679199219,grad_norm: 0.9999994588322328, iteration: 122916
loss: 1.0000966787338257,grad_norm: 0.9999995949109366, iteration: 122917
loss: 1.0153117179870605,grad_norm: 0.9999992271465881, iteration: 122918
loss: 1.0834596157073975,grad_norm: 0.9999992372233165, iteration: 122919
loss: 1.0021569728851318,grad_norm: 0.9999998644546592, iteration: 122920
loss: 1.0171711444854736,grad_norm: 0.9999998132702287, iteration: 122921
loss: 1.0463979244232178,grad_norm: 0.9999992851836859, iteration: 122922
loss: 1.0097001791000366,grad_norm: 0.9999991784744542, iteration: 122923
loss: 1.0074900388717651,grad_norm: 0.8592186592719662, iteration: 122924
loss: 0.9844807982444763,grad_norm: 0.9999992032579215, iteration: 122925
loss: 0.9937829375267029,grad_norm: 0.9853647270663126, iteration: 122926
loss: 1.0041413307189941,grad_norm: 0.9999991029835666, iteration: 122927
loss: 1.0281455516815186,grad_norm: 0.999999297320189, iteration: 122928
loss: 0.9924569129943848,grad_norm: 0.999999103620392, iteration: 122929
loss: 1.0466299057006836,grad_norm: 0.9999993905683775, iteration: 122930
loss: 1.032004714012146,grad_norm: 0.8680682989840319, iteration: 122931
loss: 0.976410448551178,grad_norm: 0.9999994294513842, iteration: 122932
loss: 0.9720293283462524,grad_norm: 0.9044110648942699, iteration: 122933
loss: 1.0043703317642212,grad_norm: 0.9999991637433399, iteration: 122934
loss: 1.083675742149353,grad_norm: 0.9008663390819228, iteration: 122935
loss: 0.9951051473617554,grad_norm: 0.9999992323087106, iteration: 122936
loss: 1.0142918825149536,grad_norm: 0.8905208113979416, iteration: 122937
loss: 0.9791258573532104,grad_norm: 0.980072234334755, iteration: 122938
loss: 1.0060415267944336,grad_norm: 0.9999995011147765, iteration: 122939
loss: 0.9710015058517456,grad_norm: 0.9816269049429898, iteration: 122940
loss: 1.0192632675170898,grad_norm: 0.9999994035446804, iteration: 122941
loss: 1.014861822128296,grad_norm: 0.9759043851151635, iteration: 122942
loss: 0.9924271106719971,grad_norm: 0.9999991869191498, iteration: 122943
loss: 0.985387921333313,grad_norm: 0.9190926756014776, iteration: 122944
loss: 0.9645735025405884,grad_norm: 0.9999991122898795, iteration: 122945
loss: 1.0003297328948975,grad_norm: 0.9999991520714828, iteration: 122946
loss: 1.0008504390716553,grad_norm: 0.9999992492910603, iteration: 122947
loss: 1.079210877418518,grad_norm: 0.9999991958469062, iteration: 122948
loss: 0.9886668920516968,grad_norm: 0.9710138083606371, iteration: 122949
loss: 0.9506310224533081,grad_norm: 0.9937832694290238, iteration: 122950
loss: 1.0281481742858887,grad_norm: 0.9999991420275159, iteration: 122951
loss: 1.0153752565383911,grad_norm: 0.9999992370241403, iteration: 122952
loss: 1.0050138235092163,grad_norm: 0.8494360453727454, iteration: 122953
loss: 0.9879418015480042,grad_norm: 0.9999991217655144, iteration: 122954
loss: 1.0378450155258179,grad_norm: 0.9999994097966115, iteration: 122955
loss: 0.9831972122192383,grad_norm: 0.9551595333662558, iteration: 122956
loss: 0.9814072847366333,grad_norm: 0.8741625589445274, iteration: 122957
loss: 1.0330982208251953,grad_norm: 0.9999990745434439, iteration: 122958
loss: 0.971306324005127,grad_norm: 0.9999990655427243, iteration: 122959
loss: 0.9824747443199158,grad_norm: 0.9748973571040463, iteration: 122960
loss: 1.0075247287750244,grad_norm: 0.9999990280729022, iteration: 122961
loss: 1.0154343843460083,grad_norm: 0.99999924836084, iteration: 122962
loss: 0.985270082950592,grad_norm: 0.9999993099334115, iteration: 122963
loss: 0.9779766798019409,grad_norm: 0.9999989801878375, iteration: 122964
loss: 1.0135493278503418,grad_norm: 0.9999996939111622, iteration: 122965
loss: 0.9867583513259888,grad_norm: 0.9999992468695357, iteration: 122966
loss: 1.075073003768921,grad_norm: 0.9999996741781761, iteration: 122967
loss: 1.0621274709701538,grad_norm: 0.9999999243414621, iteration: 122968
loss: 0.9951226711273193,grad_norm: 0.9999996664425403, iteration: 122969
loss: 1.025593876838684,grad_norm: 0.9999993143630242, iteration: 122970
loss: 0.9962604641914368,grad_norm: 0.9999991082454681, iteration: 122971
loss: 1.0280122756958008,grad_norm: 0.9345951367304977, iteration: 122972
loss: 0.9863332509994507,grad_norm: 0.999999054447234, iteration: 122973
loss: 1.0403966903686523,grad_norm: 0.9999995350791792, iteration: 122974
loss: 1.031917929649353,grad_norm: 0.9999991523045131, iteration: 122975
loss: 1.0321526527404785,grad_norm: 0.9999993239393375, iteration: 122976
loss: 1.0091116428375244,grad_norm: 0.9999999817349664, iteration: 122977
loss: 1.0002031326293945,grad_norm: 0.8878629632409135, iteration: 122978
loss: 1.0004180669784546,grad_norm: 0.8609919714627334, iteration: 122979
loss: 1.076456904411316,grad_norm: 0.9999991542639982, iteration: 122980
loss: 1.14824640750885,grad_norm: 0.999999508972901, iteration: 122981
loss: 1.0527702569961548,grad_norm: 0.9999992490928469, iteration: 122982
loss: 1.0198817253112793,grad_norm: 0.9999990850334938, iteration: 122983
loss: 1.105299711227417,grad_norm: 0.999999707791144, iteration: 122984
loss: 0.9762508273124695,grad_norm: 0.9999991423533244, iteration: 122985
loss: 1.0148077011108398,grad_norm: 0.9999991541614695, iteration: 122986
loss: 1.0063146352767944,grad_norm: 0.934782642952264, iteration: 122987
loss: 0.9819828867912292,grad_norm: 0.9999990703286673, iteration: 122988
loss: 1.037676453590393,grad_norm: 0.9999992785016893, iteration: 122989
loss: 1.022944688796997,grad_norm: 0.8944934443128534, iteration: 122990
loss: 1.1072195768356323,grad_norm: 0.9999993133634594, iteration: 122991
loss: 1.0689533948898315,grad_norm: 0.999999925441357, iteration: 122992
loss: 1.1052801609039307,grad_norm: 0.999999396049247, iteration: 122993
loss: 1.0435686111450195,grad_norm: 0.9999998837802933, iteration: 122994
loss: 0.9867476224899292,grad_norm: 0.9999992079122934, iteration: 122995
loss: 0.9556959271430969,grad_norm: 0.9999991863819243, iteration: 122996
loss: 1.0650626420974731,grad_norm: 0.9999994379711523, iteration: 122997
loss: 1.118934988975525,grad_norm: 0.9999997087529423, iteration: 122998
loss: 0.9919105172157288,grad_norm: 0.9688016152588269, iteration: 122999
loss: 1.1107187271118164,grad_norm: 0.9999999013054311, iteration: 123000
loss: 1.0377475023269653,grad_norm: 0.9999991471425115, iteration: 123001
loss: 1.0819826126098633,grad_norm: 0.9999996860977268, iteration: 123002
loss: 0.9937384724617004,grad_norm: 0.9999992888181962, iteration: 123003
loss: 1.0102767944335938,grad_norm: 0.9999992243737613, iteration: 123004
loss: 1.0567063093185425,grad_norm: 0.9999994539917789, iteration: 123005
loss: 1.1564146280288696,grad_norm: 0.9999992365240167, iteration: 123006
loss: 1.1000255346298218,grad_norm: 0.9999995519510387, iteration: 123007
loss: 0.9895579218864441,grad_norm: 0.9999991868533655, iteration: 123008
loss: 1.0091474056243896,grad_norm: 0.9999991380076583, iteration: 123009
loss: 1.0053800344467163,grad_norm: 0.9999993804720372, iteration: 123010
loss: 1.0408638715744019,grad_norm: 0.999999829221633, iteration: 123011
loss: 0.9879752397537231,grad_norm: 0.9744191437722525, iteration: 123012
loss: 1.0111433267593384,grad_norm: 0.999999058111158, iteration: 123013
loss: 1.0297197103500366,grad_norm: 0.9999995043709599, iteration: 123014
loss: 0.9683697819709778,grad_norm: 0.9999990569170915, iteration: 123015
loss: 0.9549606442451477,grad_norm: 0.9999990829508019, iteration: 123016
loss: 1.0084781646728516,grad_norm: 0.9999991817694727, iteration: 123017
loss: 0.996117889881134,grad_norm: 0.999999145798254, iteration: 123018
loss: 0.9993894696235657,grad_norm: 0.9999994053000775, iteration: 123019
loss: 1.0054523944854736,grad_norm: 0.9999992374166939, iteration: 123020
loss: 1.0046297311782837,grad_norm: 0.9999998139841918, iteration: 123021
loss: 1.0037013292312622,grad_norm: 0.9999991431528146, iteration: 123022
loss: 0.9996211528778076,grad_norm: 0.9999991945517546, iteration: 123023
loss: 0.9998797178268433,grad_norm: 0.9505915551480035, iteration: 123024
loss: 1.0352782011032104,grad_norm: 0.9999991079301606, iteration: 123025
loss: 0.9647558927536011,grad_norm: 0.9623120319971635, iteration: 123026
loss: 0.9632782936096191,grad_norm: 0.9999989856101481, iteration: 123027
loss: 1.0114909410476685,grad_norm: 0.9999995611220143, iteration: 123028
loss: 1.0366666316986084,grad_norm: 0.9999993182821034, iteration: 123029
loss: 0.9961276650428772,grad_norm: 0.9708508546265892, iteration: 123030
loss: 0.97361820936203,grad_norm: 0.9999990140494873, iteration: 123031
loss: 1.0151474475860596,grad_norm: 0.9999991099696057, iteration: 123032
loss: 1.0844677686691284,grad_norm: 0.9999998074738401, iteration: 123033
loss: 1.023391842842102,grad_norm: 0.9999989026440138, iteration: 123034
loss: 0.9904372096061707,grad_norm: 0.9999990804018611, iteration: 123035
loss: 1.0500216484069824,grad_norm: 0.9999992876910825, iteration: 123036
loss: 1.0988157987594604,grad_norm: 0.9999991948818068, iteration: 123037
loss: 1.0079286098480225,grad_norm: 0.9999993381083165, iteration: 123038
loss: 1.0213385820388794,grad_norm: 0.9999991012775433, iteration: 123039
loss: 0.9885880351066589,grad_norm: 0.9999992406811044, iteration: 123040
loss: 1.0450575351715088,grad_norm: 0.9999992191470152, iteration: 123041
loss: 0.9895700216293335,grad_norm: 0.8904980936004617, iteration: 123042
loss: 0.999785840511322,grad_norm: 0.9301806587348636, iteration: 123043
loss: 1.011592984199524,grad_norm: 0.9999990040274693, iteration: 123044
loss: 1.0413349866867065,grad_norm: 0.999999097006015, iteration: 123045
loss: 1.0205566883087158,grad_norm: 0.9172675199125381, iteration: 123046
loss: 1.0054744482040405,grad_norm: 0.9963574448764103, iteration: 123047
loss: 0.9702154397964478,grad_norm: 0.8618659652641311, iteration: 123048
loss: 1.0429737567901611,grad_norm: 0.9999990613788575, iteration: 123049
loss: 0.9803646206855774,grad_norm: 0.9999991310214502, iteration: 123050
loss: 1.0240339040756226,grad_norm: 0.9999991422414274, iteration: 123051
loss: 0.9791993498802185,grad_norm: 0.8113946465960055, iteration: 123052
loss: 0.9911728501319885,grad_norm: 0.9259733455442886, iteration: 123053
loss: 1.0028619766235352,grad_norm: 0.999999028539808, iteration: 123054
loss: 1.0097594261169434,grad_norm: 0.9999991356039032, iteration: 123055
loss: 1.1356086730957031,grad_norm: 0.9999990920267069, iteration: 123056
loss: 0.9875138401985168,grad_norm: 0.9999990645396902, iteration: 123057
loss: 1.0279269218444824,grad_norm: 0.9999998730488207, iteration: 123058
loss: 0.9785474538803101,grad_norm: 0.9999991275384423, iteration: 123059
loss: 1.0007134675979614,grad_norm: 0.9317296589679596, iteration: 123060
loss: 1.0636544227600098,grad_norm: 0.9999994231239516, iteration: 123061
loss: 0.9798628091812134,grad_norm: 0.9999991536449496, iteration: 123062
loss: 1.0108007192611694,grad_norm: 0.9999992599828919, iteration: 123063
loss: 0.9764670133590698,grad_norm: 0.9454761671462434, iteration: 123064
loss: 1.0046368837356567,grad_norm: 0.9999992242195046, iteration: 123065
loss: 0.993908703327179,grad_norm: 0.9102160493800763, iteration: 123066
loss: 1.0077581405639648,grad_norm: 0.9572865329526573, iteration: 123067
loss: 0.984063982963562,grad_norm: 0.966843101783507, iteration: 123068
loss: 1.0248593091964722,grad_norm: 0.9999994672074999, iteration: 123069
loss: 0.9796023964881897,grad_norm: 0.9999991698749672, iteration: 123070
loss: 1.021061658859253,grad_norm: 0.9999991932965246, iteration: 123071
loss: 1.0357731580734253,grad_norm: 0.9999991337296101, iteration: 123072
loss: 0.9906506538391113,grad_norm: 0.9999989067676981, iteration: 123073
loss: 0.9854565858840942,grad_norm: 0.9099706836652554, iteration: 123074
loss: 0.9882111549377441,grad_norm: 0.9999990236309731, iteration: 123075
loss: 0.9904510974884033,grad_norm: 0.9999992272440541, iteration: 123076
loss: 0.9991235136985779,grad_norm: 0.9999990768278354, iteration: 123077
loss: 0.9891623854637146,grad_norm: 0.9999990781760557, iteration: 123078
loss: 1.0366679430007935,grad_norm: 0.999999196401208, iteration: 123079
loss: 0.9882748126983643,grad_norm: 0.9126404457288028, iteration: 123080
loss: 1.0001945495605469,grad_norm: 0.9999993278884166, iteration: 123081
loss: 0.9875571727752686,grad_norm: 0.9999990100930092, iteration: 123082
loss: 1.0305453538894653,grad_norm: 0.99999913814938, iteration: 123083
loss: 1.0281198024749756,grad_norm: 0.9999991364538013, iteration: 123084
loss: 1.0260939598083496,grad_norm: 0.9999989966756251, iteration: 123085
loss: 1.0227763652801514,grad_norm: 0.9999990573271841, iteration: 123086
loss: 0.9679230451583862,grad_norm: 0.8087947562281178, iteration: 123087
loss: 1.0075485706329346,grad_norm: 0.9670107724002804, iteration: 123088
loss: 0.96630859375,grad_norm: 0.9987750070157518, iteration: 123089
loss: 0.9741199016571045,grad_norm: 0.999999800907973, iteration: 123090
loss: 1.0016944408416748,grad_norm: 0.9999990109701343, iteration: 123091
loss: 1.0448400974273682,grad_norm: 0.9999990568001812, iteration: 123092
loss: 0.9716411232948303,grad_norm: 0.9999990375913843, iteration: 123093
loss: 1.0142995119094849,grad_norm: 0.9999990248714749, iteration: 123094
loss: 1.0064529180526733,grad_norm: 0.9999991404294013, iteration: 123095
loss: 0.9803482890129089,grad_norm: 0.8057979792270273, iteration: 123096
loss: 1.0312732458114624,grad_norm: 0.999999120206651, iteration: 123097
loss: 0.995638906955719,grad_norm: 0.8808055902471734, iteration: 123098
loss: 1.0230220556259155,grad_norm: 0.999999315431294, iteration: 123099
loss: 0.9991889595985413,grad_norm: 0.9885557957928482, iteration: 123100
loss: 1.0370513200759888,grad_norm: 0.9999992814926569, iteration: 123101
loss: 1.0147788524627686,grad_norm: 0.9999991082375461, iteration: 123102
loss: 1.0501106977462769,grad_norm: 0.9999993142029467, iteration: 123103
loss: 1.0083292722702026,grad_norm: 0.9999688818914125, iteration: 123104
loss: 0.9901438355445862,grad_norm: 0.9305278817017656, iteration: 123105
loss: 0.9719395637512207,grad_norm: 0.9623352570516579, iteration: 123106
loss: 0.9978431463241577,grad_norm: 0.9999991727375348, iteration: 123107
loss: 1.0256266593933105,grad_norm: 0.999999710822771, iteration: 123108
loss: 0.9639477133750916,grad_norm: 0.9781221297887853, iteration: 123109
loss: 1.109130620956421,grad_norm: 0.9999998775805086, iteration: 123110
loss: 0.9947313070297241,grad_norm: 0.9999989818471843, iteration: 123111
loss: 1.0179128646850586,grad_norm: 0.9597146833804052, iteration: 123112
loss: 0.9556641578674316,grad_norm: 0.8857026431229871, iteration: 123113
loss: 0.9957365393638611,grad_norm: 0.999999262484627, iteration: 123114
loss: 0.9895917177200317,grad_norm: 0.9999992077370032, iteration: 123115
loss: 1.030508279800415,grad_norm: 0.9019559021824526, iteration: 123116
loss: 1.0503922700881958,grad_norm: 0.9999996318933869, iteration: 123117
loss: 1.0231091976165771,grad_norm: 0.9999990679842218, iteration: 123118
loss: 1.036121129989624,grad_norm: 0.9670907394906441, iteration: 123119
loss: 1.0080076456069946,grad_norm: 0.9999989448687329, iteration: 123120
loss: 1.0497246980667114,grad_norm: 0.9999989953499256, iteration: 123121
loss: 0.9916765689849854,grad_norm: 0.9999991538684512, iteration: 123122
loss: 1.096015214920044,grad_norm: 0.9816291661218408, iteration: 123123
loss: 0.9980826377868652,grad_norm: 0.9980068111269906, iteration: 123124
loss: 0.9920791387557983,grad_norm: 0.9931822904097866, iteration: 123125
loss: 1.0185368061065674,grad_norm: 0.9880904342115007, iteration: 123126
loss: 0.9918227791786194,grad_norm: 0.9999990823600123, iteration: 123127
loss: 0.9951745867729187,grad_norm: 0.9999991315731974, iteration: 123128
loss: 0.9868947863578796,grad_norm: 0.999999005332447, iteration: 123129
loss: 1.0298467874526978,grad_norm: 0.999999193694857, iteration: 123130
loss: 0.9877098798751831,grad_norm: 0.9793735146749915, iteration: 123131
loss: 1.0347900390625,grad_norm: 0.9999991706044957, iteration: 123132
loss: 1.022592544555664,grad_norm: 0.9008042655656081, iteration: 123133
loss: 1.0157908201217651,grad_norm: 0.9999996806481827, iteration: 123134
loss: 1.0600730180740356,grad_norm: 0.9999998308736443, iteration: 123135
loss: 1.0059469938278198,grad_norm: 0.9999990651801179, iteration: 123136
loss: 0.9766049385070801,grad_norm: 0.9999990239822664, iteration: 123137
loss: 1.0227454900741577,grad_norm: 0.9999990352193168, iteration: 123138
loss: 0.9861198663711548,grad_norm: 0.9999990932970205, iteration: 123139
loss: 1.027047038078308,grad_norm: 0.8594154310034098, iteration: 123140
loss: 1.0039252042770386,grad_norm: 0.9999994012979784, iteration: 123141
loss: 1.0116323232650757,grad_norm: 0.9999991062475266, iteration: 123142
loss: 1.002710223197937,grad_norm: 0.9999993530886876, iteration: 123143
loss: 0.9955172538757324,grad_norm: 0.9999990653663555, iteration: 123144
loss: 0.9925397038459778,grad_norm: 0.9999991416883222, iteration: 123145
loss: 1.0030826330184937,grad_norm: 0.9999991201339768, iteration: 123146
loss: 1.0332303047180176,grad_norm: 0.8870396760794091, iteration: 123147
loss: 0.9440737962722778,grad_norm: 0.9999991829775026, iteration: 123148
loss: 0.9792516827583313,grad_norm: 0.9999991020107314, iteration: 123149
loss: 0.9828935265541077,grad_norm: 0.9999991738352587, iteration: 123150
loss: 1.0092260837554932,grad_norm: 0.979624078797081, iteration: 123151
loss: 0.9588369131088257,grad_norm: 0.9999991405347116, iteration: 123152
loss: 0.992424488067627,grad_norm: 0.9617126941807289, iteration: 123153
loss: 0.9996724724769592,grad_norm: 0.9999992716141755, iteration: 123154
loss: 1.0323477983474731,grad_norm: 0.9999993312128499, iteration: 123155
loss: 0.9908897876739502,grad_norm: 0.9999990424763645, iteration: 123156
loss: 1.0138622522354126,grad_norm: 0.9999990163481598, iteration: 123157
loss: 1.0337562561035156,grad_norm: 0.9999995235908448, iteration: 123158
loss: 1.0192204713821411,grad_norm: 0.9999990250784586, iteration: 123159
loss: 0.9779884815216064,grad_norm: 0.9201365533087575, iteration: 123160
loss: 1.084355115890503,grad_norm: 0.9999994943844628, iteration: 123161
loss: 0.983985960483551,grad_norm: 0.9999989646959876, iteration: 123162
loss: 0.9807071089744568,grad_norm: 0.9999991585188358, iteration: 123163
loss: 1.0410871505737305,grad_norm: 0.9293163266187827, iteration: 123164
loss: 1.0006613731384277,grad_norm: 0.983220816744655, iteration: 123165
loss: 1.0034184455871582,grad_norm: 0.9587822945177666, iteration: 123166
loss: 0.9673808813095093,grad_norm: 0.999999185973354, iteration: 123167
loss: 1.0068267583847046,grad_norm: 0.9999991577652989, iteration: 123168
loss: 0.9531304240226746,grad_norm: 0.967552351755208, iteration: 123169
loss: 0.9945959448814392,grad_norm: 0.9999991879994374, iteration: 123170
loss: 0.9991697669029236,grad_norm: 0.999999206819897, iteration: 123171
loss: 0.9743403792381287,grad_norm: 0.9999990251857261, iteration: 123172
loss: 1.000217080116272,grad_norm: 0.9640780989812587, iteration: 123173
loss: 0.9802658557891846,grad_norm: 0.9999991942071723, iteration: 123174
loss: 1.0011547803878784,grad_norm: 0.9999992374857786, iteration: 123175
loss: 0.9968851804733276,grad_norm: 0.8524839879130179, iteration: 123176
loss: 1.038216471672058,grad_norm: 0.9999990154314893, iteration: 123177
loss: 1.1111360788345337,grad_norm: 0.9999998639262653, iteration: 123178
loss: 1.019901990890503,grad_norm: 0.9631647677215641, iteration: 123179
loss: 0.9627769589424133,grad_norm: 0.9626699254389234, iteration: 123180
loss: 1.0137295722961426,grad_norm: 0.9999990024954071, iteration: 123181
loss: 0.9661232829093933,grad_norm: 0.9999992129213794, iteration: 123182
loss: 0.99592524766922,grad_norm: 0.9999990888665111, iteration: 123183
loss: 0.9849740266799927,grad_norm: 0.9999990537534503, iteration: 123184
loss: 0.9813740849494934,grad_norm: 0.999999171919976, iteration: 123185
loss: 0.9665483832359314,grad_norm: 0.9999992046262712, iteration: 123186
loss: 1.0131056308746338,grad_norm: 0.9999991917078153, iteration: 123187
loss: 1.0337436199188232,grad_norm: 0.9999991153913708, iteration: 123188
loss: 1.009670615196228,grad_norm: 0.9999990678653007, iteration: 123189
loss: 1.0192979574203491,grad_norm: 0.9999991133106753, iteration: 123190
loss: 0.9688513278961182,grad_norm: 0.99999919463079, iteration: 123191
loss: 0.9911538362503052,grad_norm: 0.999999206753236, iteration: 123192
loss: 1.015466570854187,grad_norm: 0.9999991507682242, iteration: 123193
loss: 1.00823175907135,grad_norm: 0.9999991671325019, iteration: 123194
loss: 1.0443313121795654,grad_norm: 0.9999991738404079, iteration: 123195
loss: 1.0699878931045532,grad_norm: 0.9999995032556062, iteration: 123196
loss: 1.0364285707473755,grad_norm: 0.9999989987122884, iteration: 123197
loss: 1.023467779159546,grad_norm: 0.9999991836199853, iteration: 123198
loss: 0.9563390612602234,grad_norm: 0.9796655667082428, iteration: 123199
loss: 1.0233560800552368,grad_norm: 0.9999990644725577, iteration: 123200
loss: 1.0161256790161133,grad_norm: 0.9999991871535722, iteration: 123201
loss: 0.9998336434364319,grad_norm: 0.9999991525874706, iteration: 123202
loss: 1.028782606124878,grad_norm: 0.9999991875253728, iteration: 123203
loss: 0.9770233631134033,grad_norm: 0.9999991668568436, iteration: 123204
loss: 0.9775050282478333,grad_norm: 0.9297295891778522, iteration: 123205
loss: 1.0345820188522339,grad_norm: 0.9999997324866305, iteration: 123206
loss: 0.9978614449501038,grad_norm: 0.9999990739457947, iteration: 123207
loss: 0.9920206069946289,grad_norm: 0.9999990256810407, iteration: 123208
loss: 1.032436728477478,grad_norm: 0.9999991992993822, iteration: 123209
loss: 0.9886696338653564,grad_norm: 0.9999998621153058, iteration: 123210
loss: 1.0433183908462524,grad_norm: 0.9373945449668718, iteration: 123211
loss: 0.9793341159820557,grad_norm: 0.9999998715126905, iteration: 123212
loss: 1.0147961378097534,grad_norm: 0.9799245292736367, iteration: 123213
loss: 1.0264238119125366,grad_norm: 0.9999998825252487, iteration: 123214
loss: 1.016918420791626,grad_norm: 0.9979797432285699, iteration: 123215
loss: 0.9880679249763489,grad_norm: 0.9999991723179283, iteration: 123216
loss: 1.0216137170791626,grad_norm: 0.999999081366747, iteration: 123217
loss: 1.0394737720489502,grad_norm: 0.9999996994661831, iteration: 123218
loss: 0.9974390268325806,grad_norm: 0.9075943861939704, iteration: 123219
loss: 0.9682496190071106,grad_norm: 0.9999991127174848, iteration: 123220
loss: 1.0014442205429077,grad_norm: 0.9392260196147125, iteration: 123221
loss: 0.9913239479064941,grad_norm: 0.8760132415906924, iteration: 123222
loss: 0.9842720627784729,grad_norm: 0.9999991633447959, iteration: 123223
loss: 0.9699046611785889,grad_norm: 0.9999991744732128, iteration: 123224
loss: 1.0045400857925415,grad_norm: 0.9999990297790696, iteration: 123225
loss: 1.0013068914413452,grad_norm: 0.9999990380074664, iteration: 123226
loss: 1.0514463186264038,grad_norm: 0.9734627352757713, iteration: 123227
loss: 0.9711580872535706,grad_norm: 0.9789047206590863, iteration: 123228
loss: 1.0349254608154297,grad_norm: 0.999999147621923, iteration: 123229
loss: 1.017912745475769,grad_norm: 0.9999990676643588, iteration: 123230
loss: 0.9845883250236511,grad_norm: 0.9999989715451479, iteration: 123231
loss: 0.9885154962539673,grad_norm: 0.9523261080011255, iteration: 123232
loss: 0.953496515750885,grad_norm: 0.8725015689090452, iteration: 123233
loss: 0.9952425360679626,grad_norm: 0.9984314899366405, iteration: 123234
loss: 1.1632026433944702,grad_norm: 0.9999992918714585, iteration: 123235
loss: 0.9477333426475525,grad_norm: 0.9221453539933505, iteration: 123236
loss: 0.9896796345710754,grad_norm: 0.9999989449858613, iteration: 123237
loss: 1.017456293106079,grad_norm: 0.9999992508581927, iteration: 123238
loss: 0.9819681644439697,grad_norm: 0.9999991491713268, iteration: 123239
loss: 0.9908891916275024,grad_norm: 0.9999991891440039, iteration: 123240
loss: 1.0082874298095703,grad_norm: 0.9855491519221548, iteration: 123241
loss: 1.0013972520828247,grad_norm: 0.9079928118011561, iteration: 123242
loss: 1.0143110752105713,grad_norm: 0.9999991873303752, iteration: 123243
loss: 0.9994863271713257,grad_norm: 0.9999990799984688, iteration: 123244
loss: 1.0547178983688354,grad_norm: 0.9999992062370429, iteration: 123245
loss: 1.003961443901062,grad_norm: 0.999999125270856, iteration: 123246
loss: 1.001315712928772,grad_norm: 0.9999991263221756, iteration: 123247
loss: 1.0552544593811035,grad_norm: 0.9999991509419532, iteration: 123248
loss: 0.9931361079216003,grad_norm: 0.986958712384888, iteration: 123249
loss: 0.9809711575508118,grad_norm: 0.9999990719948618, iteration: 123250
loss: 0.9865341782569885,grad_norm: 0.8993487829727759, iteration: 123251
loss: 0.9981265664100647,grad_norm: 0.90235880128071, iteration: 123252
loss: 1.0134507417678833,grad_norm: 0.9999990976798385, iteration: 123253
loss: 1.018998146057129,grad_norm: 0.9913990757615941, iteration: 123254
loss: 1.0163606405258179,grad_norm: 0.999999144794915, iteration: 123255
loss: 1.0002985000610352,grad_norm: 0.9314191076328777, iteration: 123256
loss: 0.9968074560165405,grad_norm: 0.9999990731990648, iteration: 123257
loss: 1.044273018836975,grad_norm: 0.999999032991097, iteration: 123258
loss: 0.9894881844520569,grad_norm: 0.999999122967101, iteration: 123259
loss: 1.0027638673782349,grad_norm: 0.9851479842669252, iteration: 123260
loss: 1.0330615043640137,grad_norm: 0.9999991375984231, iteration: 123261
loss: 1.0370053052902222,grad_norm: 0.9229436486070954, iteration: 123262
loss: 0.9855648875236511,grad_norm: 0.9530016245919002, iteration: 123263
loss: 1.0498310327529907,grad_norm: 0.9999991530702301, iteration: 123264
loss: 1.0793665647506714,grad_norm: 0.9999992492758055, iteration: 123265
loss: 1.0799012184143066,grad_norm: 0.9999992481295087, iteration: 123266
loss: 1.0343248844146729,grad_norm: 0.9354772313535513, iteration: 123267
loss: 0.9822807908058167,grad_norm: 0.856686290962896, iteration: 123268
loss: 0.9997397661209106,grad_norm: 0.9999990718690981, iteration: 123269
loss: 1.0684703588485718,grad_norm: 0.9999992027547885, iteration: 123270
loss: 1.002376675605774,grad_norm: 0.9999992403200373, iteration: 123271
loss: 0.9711140394210815,grad_norm: 0.9999990422765259, iteration: 123272
loss: 1.0362541675567627,grad_norm: 0.9999994230464277, iteration: 123273
loss: 0.9789264798164368,grad_norm: 0.9822455861592745, iteration: 123274
loss: 1.081731915473938,grad_norm: 0.9999991605715084, iteration: 123275
loss: 1.047285795211792,grad_norm: 0.9961269738324596, iteration: 123276
loss: 1.0149426460266113,grad_norm: 0.9999991168604124, iteration: 123277
loss: 0.975874662399292,grad_norm: 0.8668639640434656, iteration: 123278
loss: 1.1027637720108032,grad_norm: 0.9999994622396124, iteration: 123279
loss: 1.0571048259735107,grad_norm: 0.9999996169520079, iteration: 123280
loss: 0.992624044418335,grad_norm: 0.8319935161838375, iteration: 123281
loss: 1.0514568090438843,grad_norm: 0.9999992677654957, iteration: 123282
loss: 1.0204200744628906,grad_norm: 0.9988557929082128, iteration: 123283
loss: 0.982067883014679,grad_norm: 0.9999990144357609, iteration: 123284
loss: 1.0278370380401611,grad_norm: 0.9433103030893712, iteration: 123285
loss: 1.1020605564117432,grad_norm: 0.9999997467188919, iteration: 123286
loss: 1.0556288957595825,grad_norm: 0.9999992933810524, iteration: 123287
loss: 1.0124847888946533,grad_norm: 0.9999990162415536, iteration: 123288
loss: 0.9763921499252319,grad_norm: 0.9715023793366859, iteration: 123289
loss: 0.9703280925750732,grad_norm: 0.999999040745489, iteration: 123290
loss: 0.999578595161438,grad_norm: 0.9514369742321475, iteration: 123291
loss: 1.0048412084579468,grad_norm: 0.9653203790088921, iteration: 123292
loss: 0.9918551445007324,grad_norm: 0.931739867980734, iteration: 123293
loss: 1.0398006439208984,grad_norm: 0.9999993799976141, iteration: 123294
loss: 1.0983487367630005,grad_norm: 0.9999993070032053, iteration: 123295
loss: 1.0166882276535034,grad_norm: 0.9883870810311747, iteration: 123296
loss: 1.0037858486175537,grad_norm: 0.8991627535433108, iteration: 123297
loss: 1.0735572576522827,grad_norm: 0.9999990764122648, iteration: 123298
loss: 1.0078152418136597,grad_norm: 0.9999990220094271, iteration: 123299
loss: 1.0466433763504028,grad_norm: 0.9999990744398739, iteration: 123300
loss: 1.1102793216705322,grad_norm: 0.9999992166368401, iteration: 123301
loss: 1.003331184387207,grad_norm: 0.999999184813816, iteration: 123302
loss: 1.0342321395874023,grad_norm: 0.9999992906667854, iteration: 123303
loss: 0.9858630895614624,grad_norm: 0.9999993302888225, iteration: 123304
loss: 1.0209156274795532,grad_norm: 0.9999990053331327, iteration: 123305
loss: 1.0190080404281616,grad_norm: 0.9999991860281772, iteration: 123306
loss: 1.0194950103759766,grad_norm: 0.906805683070959, iteration: 123307
loss: 1.0125253200531006,grad_norm: 0.8674583106640902, iteration: 123308
loss: 1.0092682838439941,grad_norm: 0.9999991818487404, iteration: 123309
loss: 1.0075430870056152,grad_norm: 0.9704058432833387, iteration: 123310
loss: 1.0240488052368164,grad_norm: 0.9314042791643306, iteration: 123311
loss: 0.9852099418640137,grad_norm: 0.9593693650418554, iteration: 123312
loss: 0.9947877526283264,grad_norm: 0.9823494190110819, iteration: 123313
loss: 1.1088716983795166,grad_norm: 0.9999996041637708, iteration: 123314
loss: 1.0479331016540527,grad_norm: 0.9999991872369314, iteration: 123315
loss: 0.9816604852676392,grad_norm: 0.999999155938936, iteration: 123316
loss: 0.9820725917816162,grad_norm: 0.9999991501569633, iteration: 123317
loss: 0.9672542214393616,grad_norm: 0.967580511666998, iteration: 123318
loss: 0.9687350988388062,grad_norm: 0.9999991435317029, iteration: 123319
loss: 1.0045050382614136,grad_norm: 0.9999989934273693, iteration: 123320
loss: 1.0416070222854614,grad_norm: 0.9999998841829578, iteration: 123321
loss: 0.9930015802383423,grad_norm: 0.935354633965978, iteration: 123322
loss: 1.022406816482544,grad_norm: 0.9999997461375999, iteration: 123323
loss: 0.9947224259376526,grad_norm: 0.9148591567422741, iteration: 123324
loss: 0.9911029934883118,grad_norm: 0.9999990586261246, iteration: 123325
loss: 0.9833606481552124,grad_norm: 0.9999999462173306, iteration: 123326
loss: 0.9930390119552612,grad_norm: 0.9999994513711503, iteration: 123327
loss: 1.0129334926605225,grad_norm: 0.999999168448291, iteration: 123328
loss: 0.9975554943084717,grad_norm: 0.9999992888799989, iteration: 123329
loss: 1.015141248703003,grad_norm: 0.999999278138513, iteration: 123330
loss: 0.9947507977485657,grad_norm: 0.9999991325686483, iteration: 123331
loss: 0.9853346347808838,grad_norm: 0.943513559181336, iteration: 123332
loss: 1.0158833265304565,grad_norm: 0.9999991280748526, iteration: 123333
loss: 1.0051268339157104,grad_norm: 0.8585730004058948, iteration: 123334
loss: 0.960437536239624,grad_norm: 0.9999992243030502, iteration: 123335
loss: 1.0080558061599731,grad_norm: 0.999999225496614, iteration: 123336
loss: 1.0208810567855835,grad_norm: 0.9999998607803787, iteration: 123337
loss: 1.0025254487991333,grad_norm: 0.9999993375638802, iteration: 123338
loss: 0.9786279201507568,grad_norm: 0.9999991910247904, iteration: 123339
loss: 1.0385596752166748,grad_norm: 0.9999991217475075, iteration: 123340
loss: 1.0036003589630127,grad_norm: 0.9999995542629575, iteration: 123341
loss: 0.9957041144371033,grad_norm: 0.999999150003045, iteration: 123342
loss: 0.9689548015594482,grad_norm: 0.999998989638044, iteration: 123343
loss: 1.0232701301574707,grad_norm: 0.9833890947729556, iteration: 123344
loss: 0.9789034724235535,grad_norm: 0.8875737255962737, iteration: 123345
loss: 1.0100970268249512,grad_norm: 0.999999842971532, iteration: 123346
loss: 1.0056138038635254,grad_norm: 0.9679956862909525, iteration: 123347
loss: 1.0248302221298218,grad_norm: 0.9999991616711974, iteration: 123348
loss: 1.0338093042373657,grad_norm: 0.9999998569502437, iteration: 123349
loss: 0.9638524055480957,grad_norm: 0.8668951402408717, iteration: 123350
loss: 1.01368248462677,grad_norm: 0.9125240250134955, iteration: 123351
loss: 1.0032068490982056,grad_norm: 0.9999991990048761, iteration: 123352
loss: 1.0642368793487549,grad_norm: 0.9999990896765, iteration: 123353
loss: 1.0323328971862793,grad_norm: 0.9999997176247867, iteration: 123354
loss: 1.089340329170227,grad_norm: 0.9999996640117366, iteration: 123355
loss: 0.9752280712127686,grad_norm: 0.9999990657965896, iteration: 123356
loss: 1.0054962635040283,grad_norm: 0.9712259034740987, iteration: 123357
loss: 1.1495692729949951,grad_norm: 0.9999995408773736, iteration: 123358
loss: 1.0321710109710693,grad_norm: 0.9999994390882306, iteration: 123359
loss: 0.9634569883346558,grad_norm: 0.9999989985062377, iteration: 123360
loss: 1.0183444023132324,grad_norm: 0.9823728677230948, iteration: 123361
loss: 1.014009952545166,grad_norm: 0.9454836566853866, iteration: 123362
loss: 1.017729640007019,grad_norm: 0.9999990821044821, iteration: 123363
loss: 0.9872996211051941,grad_norm: 0.9999992273862448, iteration: 123364
loss: 0.9565286040306091,grad_norm: 0.9999990726431534, iteration: 123365
loss: 0.9636870622634888,grad_norm: 0.9999993616509404, iteration: 123366
loss: 1.001911997795105,grad_norm: 0.9999990108240376, iteration: 123367
loss: 0.9975374937057495,grad_norm: 0.9999989910994821, iteration: 123368
loss: 1.0264129638671875,grad_norm: 0.9719991408065314, iteration: 123369
loss: 0.9855793118476868,grad_norm: 0.9999991301301109, iteration: 123370
loss: 1.0360127687454224,grad_norm: 0.999999226101234, iteration: 123371
loss: 1.0346680879592896,grad_norm: 0.9999990466672651, iteration: 123372
loss: 0.9834496974945068,grad_norm: 0.9999992722758367, iteration: 123373
loss: 0.9876501560211182,grad_norm: 0.9999990844577995, iteration: 123374
loss: 0.9907756447792053,grad_norm: 0.9822825710527839, iteration: 123375
loss: 1.053237795829773,grad_norm: 0.9999989190122962, iteration: 123376
loss: 1.023861289024353,grad_norm: 0.9999991348067021, iteration: 123377
loss: 1.027620553970337,grad_norm: 0.8850318655549883, iteration: 123378
loss: 0.9978997707366943,grad_norm: 0.9999991900845625, iteration: 123379
loss: 1.007176160812378,grad_norm: 0.99999932310579, iteration: 123380
loss: 0.9908704161643982,grad_norm: 0.929659859989204, iteration: 123381
loss: 1.0459918975830078,grad_norm: 0.999999180099873, iteration: 123382
loss: 1.0048778057098389,grad_norm: 0.999999086144842, iteration: 123383
loss: 0.9896054267883301,grad_norm: 0.9999989820858846, iteration: 123384
loss: 0.9668177962303162,grad_norm: 0.9473356353950101, iteration: 123385
loss: 0.9483067989349365,grad_norm: 0.9999990463454328, iteration: 123386
loss: 1.011513352394104,grad_norm: 0.967768258560715, iteration: 123387
loss: 1.0619723796844482,grad_norm: 0.9999991797716715, iteration: 123388
loss: 1.072394847869873,grad_norm: 0.9999991086502867, iteration: 123389
loss: 1.0281990766525269,grad_norm: 0.9999992070355522, iteration: 123390
loss: 0.9798762202262878,grad_norm: 0.9999993104299169, iteration: 123391
loss: 0.976682722568512,grad_norm: 0.9999991269516398, iteration: 123392
loss: 1.0148895978927612,grad_norm: 0.9999991434446224, iteration: 123393
loss: 0.9697681069374084,grad_norm: 0.9885463028614065, iteration: 123394
loss: 0.9873087406158447,grad_norm: 0.9999991289015842, iteration: 123395
loss: 0.9895861148834229,grad_norm: 0.9171197276920576, iteration: 123396
loss: 1.0655068159103394,grad_norm: 0.9999997588175802, iteration: 123397
loss: 1.0086663961410522,grad_norm: 0.8795751494439158, iteration: 123398
loss: 1.0097250938415527,grad_norm: 0.9999992476724779, iteration: 123399
loss: 0.9709326028823853,grad_norm: 0.999999032662692, iteration: 123400
loss: 1.032865285873413,grad_norm: 0.9999991041841336, iteration: 123401
loss: 1.0363361835479736,grad_norm: 0.999999572071612, iteration: 123402
loss: 0.9755340218544006,grad_norm: 0.9132594675765743, iteration: 123403
loss: 0.982614278793335,grad_norm: 0.9999991026416707, iteration: 123404
loss: 1.0492409467697144,grad_norm: 0.9999993564916363, iteration: 123405
loss: 0.9955475330352783,grad_norm: 0.999999047460524, iteration: 123406
loss: 1.022134780883789,grad_norm: 0.9999992458186783, iteration: 123407
loss: 0.9971065521240234,grad_norm: 0.9994808634752381, iteration: 123408
loss: 1.0023473501205444,grad_norm: 0.7598745148696908, iteration: 123409
loss: 1.0025256872177124,grad_norm: 0.9999990987709302, iteration: 123410
loss: 0.9802827835083008,grad_norm: 0.9999989871144151, iteration: 123411
loss: 1.102009654045105,grad_norm: 0.9999998077248718, iteration: 123412
loss: 1.0858409404754639,grad_norm: 0.9999994267236515, iteration: 123413
loss: 0.9820569157600403,grad_norm: 0.9999992162650817, iteration: 123414
loss: 1.003677248954773,grad_norm: 0.9999992889588137, iteration: 123415
loss: 0.9944412708282471,grad_norm: 0.999999149534319, iteration: 123416
loss: 0.9847345948219299,grad_norm: 0.9820323433505188, iteration: 123417
loss: 1.0008533000946045,grad_norm: 0.9999992220830207, iteration: 123418
loss: 1.0200923681259155,grad_norm: 0.925396685960709, iteration: 123419
loss: 1.0555866956710815,grad_norm: 0.9999992358878211, iteration: 123420
loss: 1.0547804832458496,grad_norm: 0.9455335591207786, iteration: 123421
loss: 0.9887077808380127,grad_norm: 0.9999992205951463, iteration: 123422
loss: 0.9865974187850952,grad_norm: 0.9999990519453315, iteration: 123423
loss: 0.9707227945327759,grad_norm: 0.8972373401005249, iteration: 123424
loss: 1.0684765577316284,grad_norm: 0.9999989842716381, iteration: 123425
loss: 1.0299090147018433,grad_norm: 0.9999990761872007, iteration: 123426
loss: 1.014800786972046,grad_norm: 0.8843491845162217, iteration: 123427
loss: 0.979687511920929,grad_norm: 0.9999997001392057, iteration: 123428
loss: 0.9824771285057068,grad_norm: 0.9999991750807203, iteration: 123429
loss: 0.9898951053619385,grad_norm: 0.9871374383670466, iteration: 123430
loss: 1.0476845502853394,grad_norm: 0.999999964916525, iteration: 123431
loss: 1.0142711400985718,grad_norm: 0.9999990953804273, iteration: 123432
loss: 1.0193713903427124,grad_norm: 0.9611117642963126, iteration: 123433
loss: 0.9846645593643188,grad_norm: 0.9999991317211738, iteration: 123434
loss: 1.0853317975997925,grad_norm: 0.9999991772324426, iteration: 123435
loss: 0.9842928647994995,grad_norm: 0.9788962106136253, iteration: 123436
loss: 1.0240685939788818,grad_norm: 0.916795437510947, iteration: 123437
loss: 0.9989947080612183,grad_norm: 0.9999991432802765, iteration: 123438
loss: 0.9674382209777832,grad_norm: 0.9999992629793362, iteration: 123439
loss: 0.9960086345672607,grad_norm: 0.9999992799227475, iteration: 123440
loss: 1.044157862663269,grad_norm: 0.9999991382541993, iteration: 123441
loss: 1.0483392477035522,grad_norm: 0.9999991028887919, iteration: 123442
loss: 1.0290641784667969,grad_norm: 0.9098622455881037, iteration: 123443
loss: 1.0076180696487427,grad_norm: 0.9999991871405827, iteration: 123444
loss: 1.017374873161316,grad_norm: 0.9999993045790975, iteration: 123445
loss: 0.9941056370735168,grad_norm: 0.8525103415834181, iteration: 123446
loss: 1.003299593925476,grad_norm: 0.9817907736218104, iteration: 123447
loss: 0.9922326803207397,grad_norm: 0.9999992238108784, iteration: 123448
loss: 1.0163094997406006,grad_norm: 0.9999993788017298, iteration: 123449
loss: 1.0076266527175903,grad_norm: 0.9268438838518374, iteration: 123450
loss: 0.984731912612915,grad_norm: 0.9999991353045014, iteration: 123451
loss: 0.969623863697052,grad_norm: 0.8979276731412008, iteration: 123452
loss: 0.9951347708702087,grad_norm: 0.9999989953685822, iteration: 123453
loss: 1.0089510679244995,grad_norm: 0.9999996460727317, iteration: 123454
loss: 1.021842360496521,grad_norm: 0.9999993273988964, iteration: 123455
loss: 1.0202962160110474,grad_norm: 0.943833323200654, iteration: 123456
loss: 1.0735443830490112,grad_norm: 0.967582285860471, iteration: 123457
loss: 0.9643511772155762,grad_norm: 0.9999992146170411, iteration: 123458
loss: 0.9933907985687256,grad_norm: 0.926504993354586, iteration: 123459
loss: 1.2180808782577515,grad_norm: 0.9999996617288328, iteration: 123460
loss: 0.9857010841369629,grad_norm: 0.9804256846266708, iteration: 123461
loss: 1.0090261697769165,grad_norm: 0.9676625917617637, iteration: 123462
loss: 1.0257540941238403,grad_norm: 0.9999991159764812, iteration: 123463
loss: 0.9867598414421082,grad_norm: 0.9415901627103044, iteration: 123464
loss: 0.9920483827590942,grad_norm: 0.9999996496556769, iteration: 123465
loss: 0.9944305419921875,grad_norm: 0.952752167747435, iteration: 123466
loss: 0.9727407693862915,grad_norm: 0.9999993320647385, iteration: 123467
loss: 0.9761722087860107,grad_norm: 0.999999148845068, iteration: 123468
loss: 0.9900022149085999,grad_norm: 0.8920459722491496, iteration: 123469
loss: 1.0345956087112427,grad_norm: 0.9999991311057297, iteration: 123470
loss: 0.9854099154472351,grad_norm: 0.8938310442061437, iteration: 123471
loss: 0.9650114178657532,grad_norm: 0.9999992411058727, iteration: 123472
loss: 0.9970847368240356,grad_norm: 0.966188995635465, iteration: 123473
loss: 1.0344072580337524,grad_norm: 0.9999992224048169, iteration: 123474
loss: 0.9935681819915771,grad_norm: 0.9426586497011196, iteration: 123475
loss: 1.0013142824172974,grad_norm: 0.9999990905659637, iteration: 123476
loss: 1.0253647565841675,grad_norm: 0.9999989825781743, iteration: 123477
loss: 0.9722990393638611,grad_norm: 0.9999993308858927, iteration: 123478
loss: 0.9566871523857117,grad_norm: 0.9999990459577621, iteration: 123479
loss: 0.9834165573120117,grad_norm: 0.9999992943541033, iteration: 123480
loss: 0.9967241287231445,grad_norm: 0.9999990221457917, iteration: 123481
loss: 1.0816899538040161,grad_norm: 0.9999991581219247, iteration: 123482
loss: 0.9831662774085999,grad_norm: 0.9999989645267084, iteration: 123483
loss: 0.9885772466659546,grad_norm: 0.9753525568143804, iteration: 123484
loss: 1.012102484703064,grad_norm: 0.99829676451379, iteration: 123485
loss: 1.023730993270874,grad_norm: 0.878959929180658, iteration: 123486
loss: 0.9907932877540588,grad_norm: 0.9999992025975878, iteration: 123487
loss: 1.0287256240844727,grad_norm: 0.8909971719547791, iteration: 123488
loss: 1.0319772958755493,grad_norm: 0.9999993533592431, iteration: 123489
loss: 0.9871059656143188,grad_norm: 0.999999248668631, iteration: 123490
loss: 1.0139930248260498,grad_norm: 0.9999990791946182, iteration: 123491
loss: 1.0847601890563965,grad_norm: 0.9999999172457216, iteration: 123492
loss: 1.0015424489974976,grad_norm: 0.9999992376229041, iteration: 123493
loss: 0.9976692199707031,grad_norm: 0.999999138698786, iteration: 123494
loss: 1.0658488273620605,grad_norm: 0.9999990925670151, iteration: 123495
loss: 0.9956475496292114,grad_norm: 0.9999989007524581, iteration: 123496
loss: 1.0164278745651245,grad_norm: 0.9451517508740457, iteration: 123497
loss: 0.993866503238678,grad_norm: 0.8810888906164569, iteration: 123498
loss: 0.9995821118354797,grad_norm: 0.99999920789386, iteration: 123499
loss: 1.02228844165802,grad_norm: 0.9999991908801287, iteration: 123500
loss: 1.0092512369155884,grad_norm: 0.9999997865527703, iteration: 123501
loss: 1.0413047075271606,grad_norm: 0.9999996238408847, iteration: 123502
loss: 0.978732705116272,grad_norm: 0.999999232877579, iteration: 123503
loss: 0.9724835753440857,grad_norm: 0.9999991631575874, iteration: 123504
loss: 1.013190746307373,grad_norm: 0.9999990937981204, iteration: 123505
loss: 1.036814570426941,grad_norm: 0.9999992105423747, iteration: 123506
loss: 0.9802560806274414,grad_norm: 0.9999991638078513, iteration: 123507
loss: 1.0048061609268188,grad_norm: 0.9999993893077397, iteration: 123508
loss: 1.0197781324386597,grad_norm: 0.9772658233189309, iteration: 123509
loss: 1.001970648765564,grad_norm: 0.9999989949918949, iteration: 123510
loss: 0.9485262632369995,grad_norm: 0.9999990150716436, iteration: 123511
loss: 1.0191880464553833,grad_norm: 0.9999997853831897, iteration: 123512
loss: 1.0309516191482544,grad_norm: 0.8817106574309105, iteration: 123513
loss: 0.9940266013145447,grad_norm: 0.9999991038103838, iteration: 123514
loss: 1.0503123998641968,grad_norm: 0.9442492439155799, iteration: 123515
loss: 0.9946218132972717,grad_norm: 0.9586345263634898, iteration: 123516
loss: 1.0120906829833984,grad_norm: 0.9999989993915284, iteration: 123517
loss: 0.975043535232544,grad_norm: 0.9924530742594148, iteration: 123518
loss: 1.0165473222732544,grad_norm: 0.9999993799744934, iteration: 123519
loss: 0.9986401796340942,grad_norm: 0.9999989689721401, iteration: 123520
loss: 1.1245003938674927,grad_norm: 0.9999995209393585, iteration: 123521
loss: 0.985509991645813,grad_norm: 0.9676076464252554, iteration: 123522
loss: 0.9986335635185242,grad_norm: 0.9999991276831497, iteration: 123523
loss: 0.9702004194259644,grad_norm: 0.983347892681395, iteration: 123524
loss: 0.9634684920310974,grad_norm: 0.9999990888487259, iteration: 123525
loss: 1.0085787773132324,grad_norm: 0.9999997234946457, iteration: 123526
loss: 1.0692579746246338,grad_norm: 0.9999993145023047, iteration: 123527
loss: 1.0083204507827759,grad_norm: 0.9999992988111253, iteration: 123528
loss: 1.0050902366638184,grad_norm: 0.9999991870795634, iteration: 123529
loss: 0.9710776805877686,grad_norm: 0.9999991279225108, iteration: 123530
loss: 0.9967864751815796,grad_norm: 0.9999992287557337, iteration: 123531
loss: 1.0114428997039795,grad_norm: 0.943059404901975, iteration: 123532
loss: 0.9830522537231445,grad_norm: 0.9805919752528084, iteration: 123533
loss: 1.1095514297485352,grad_norm: 0.9999995946165243, iteration: 123534
loss: 0.9819062948226929,grad_norm: 0.9618390753623091, iteration: 123535
loss: 1.003623604774475,grad_norm: 0.9165314917969201, iteration: 123536
loss: 0.9809455275535583,grad_norm: 0.8779637916209193, iteration: 123537
loss: 0.9915639162063599,grad_norm: 0.9999990918410756, iteration: 123538
loss: 1.0373443365097046,grad_norm: 0.9296689264356762, iteration: 123539
loss: 0.998593270778656,grad_norm: 0.9882126798537115, iteration: 123540
loss: 1.0009691715240479,grad_norm: 0.9409484577884784, iteration: 123541
loss: 1.0584766864776611,grad_norm: 0.9999997648583885, iteration: 123542
loss: 1.050345778465271,grad_norm: 0.9999992617395416, iteration: 123543
loss: 0.9696516990661621,grad_norm: 0.9866019071364482, iteration: 123544
loss: 1.1115820407867432,grad_norm: 0.9655312612049389, iteration: 123545
loss: 1.0220420360565186,grad_norm: 0.999999685675105, iteration: 123546
loss: 0.9917691349983215,grad_norm: 0.9999992313066074, iteration: 123547
loss: 1.001931071281433,grad_norm: 0.9022419483160087, iteration: 123548
loss: 1.0151890516281128,grad_norm: 0.9999991749438114, iteration: 123549
loss: 0.9891213178634644,grad_norm: 0.9999990251861699, iteration: 123550
loss: 1.0181801319122314,grad_norm: 0.9999995195916808, iteration: 123551
loss: 1.0112491846084595,grad_norm: 0.8922899260961795, iteration: 123552
loss: 1.0081567764282227,grad_norm: 0.9235267991351367, iteration: 123553
loss: 0.9566999673843384,grad_norm: 0.9508914577191311, iteration: 123554
loss: 0.996653139591217,grad_norm: 0.8217807144917938, iteration: 123555
loss: 1.009735345840454,grad_norm: 0.9999992110794123, iteration: 123556
loss: 1.0113064050674438,grad_norm: 0.9999990179565843, iteration: 123557
loss: 1.0037775039672852,grad_norm: 0.999999149366851, iteration: 123558
loss: 0.9805024862289429,grad_norm: 0.9999990994872919, iteration: 123559
loss: 0.9919930100440979,grad_norm: 0.8664127743051304, iteration: 123560
loss: 1.0112454891204834,grad_norm: 0.9999992909317814, iteration: 123561
loss: 1.0934420824050903,grad_norm: 0.9999992344951932, iteration: 123562
loss: 1.0083019733428955,grad_norm: 0.9999992064158126, iteration: 123563
loss: 0.9980350136756897,grad_norm: 0.9999997084284237, iteration: 123564
loss: 0.9633673429489136,grad_norm: 0.8748479325404915, iteration: 123565
loss: 0.9953988790512085,grad_norm: 0.9999997021518462, iteration: 123566
loss: 0.9907978177070618,grad_norm: 0.9885268484061809, iteration: 123567
loss: 0.9765199422836304,grad_norm: 0.9999991203023201, iteration: 123568
loss: 0.9956385493278503,grad_norm: 0.9999989559071988, iteration: 123569
loss: 0.9348700046539307,grad_norm: 0.97844175676526, iteration: 123570
loss: 1.011676549911499,grad_norm: 0.9999992200667487, iteration: 123571
loss: 1.0043952465057373,grad_norm: 0.999999000650819, iteration: 123572
loss: 0.9794521927833557,grad_norm: 0.9999993585284825, iteration: 123573
loss: 0.9940314292907715,grad_norm: 0.999999392589685, iteration: 123574
loss: 0.9627862572669983,grad_norm: 0.888712559571667, iteration: 123575
loss: 1.0145277976989746,grad_norm: 0.9999996283483276, iteration: 123576
loss: 0.9941217303276062,grad_norm: 0.999999187360333, iteration: 123577
loss: 1.0275317430496216,grad_norm: 0.9693422777863214, iteration: 123578
loss: 0.9956775903701782,grad_norm: 0.9999991683495971, iteration: 123579
loss: 1.0176208019256592,grad_norm: 0.9999994740031335, iteration: 123580
loss: 0.9514946341514587,grad_norm: 0.9999990367427185, iteration: 123581
loss: 0.9718661904335022,grad_norm: 0.9999997010748385, iteration: 123582
loss: 1.0247200727462769,grad_norm: 0.9999992273807324, iteration: 123583
loss: 1.0019571781158447,grad_norm: 0.9167478315886602, iteration: 123584
loss: 1.0146781206130981,grad_norm: 0.9516813329429314, iteration: 123585
loss: 1.0234498977661133,grad_norm: 0.9999989853902423, iteration: 123586
loss: 1.0299493074417114,grad_norm: 0.9999991134225771, iteration: 123587
loss: 0.9776114225387573,grad_norm: 0.8604063097704092, iteration: 123588
loss: 1.0122047662734985,grad_norm: 0.999999253317842, iteration: 123589
loss: 0.9886502623558044,grad_norm: 0.9999989772469239, iteration: 123590
loss: 1.010740876197815,grad_norm: 0.9999995662891149, iteration: 123591
loss: 1.0223156213760376,grad_norm: 0.9999991885962195, iteration: 123592
loss: 0.995789647102356,grad_norm: 0.9999990819242621, iteration: 123593
loss: 1.0119329690933228,grad_norm: 0.9999991041100746, iteration: 123594
loss: 0.9871819615364075,grad_norm: 0.8572363061929433, iteration: 123595
loss: 0.9731599688529968,grad_norm: 0.8766072672928649, iteration: 123596
loss: 1.0080010890960693,grad_norm: 0.9999989518647062, iteration: 123597
loss: 1.0450867414474487,grad_norm: 0.9999993026443065, iteration: 123598
loss: 0.9808029532432556,grad_norm: 0.9322687197450876, iteration: 123599
loss: 0.982864260673523,grad_norm: 0.9999992260876924, iteration: 123600
loss: 1.009952187538147,grad_norm: 0.9993408814077077, iteration: 123601
loss: 0.9660120010375977,grad_norm: 0.9999991403550421, iteration: 123602
loss: 0.980240523815155,grad_norm: 0.999999061503013, iteration: 123603
loss: 0.9912745952606201,grad_norm: 0.8944215451737281, iteration: 123604
loss: 0.9906389117240906,grad_norm: 0.9999992349661286, iteration: 123605
loss: 1.0466876029968262,grad_norm: 0.9999991207635853, iteration: 123606
loss: 1.059216856956482,grad_norm: 0.999999661311954, iteration: 123607
loss: 1.0353724956512451,grad_norm: 0.9999993733754278, iteration: 123608
loss: 1.0090817213058472,grad_norm: 0.9999990998694434, iteration: 123609
loss: 0.996327817440033,grad_norm: 0.965387964785856, iteration: 123610
loss: 0.9902222752571106,grad_norm: 0.9149225307242027, iteration: 123611
loss: 0.9828063249588013,grad_norm: 0.9722132520709045, iteration: 123612
loss: 0.98646479845047,grad_norm: 0.9999992726488384, iteration: 123613
loss: 1.0372453927993774,grad_norm: 0.9999991556917468, iteration: 123614
loss: 1.0145591497421265,grad_norm: 0.9859384418587838, iteration: 123615
loss: 0.9854575395584106,grad_norm: 0.9999992214831538, iteration: 123616
loss: 0.9728478789329529,grad_norm: 0.9467376490086253, iteration: 123617
loss: 1.0415055751800537,grad_norm: 0.9999993640426972, iteration: 123618
loss: 0.9643705487251282,grad_norm: 0.999999027652732, iteration: 123619
loss: 1.0242916345596313,grad_norm: 0.975028829295905, iteration: 123620
loss: 1.0002574920654297,grad_norm: 0.9999991013835077, iteration: 123621
loss: 0.9650245308876038,grad_norm: 0.9999991421300154, iteration: 123622
loss: 0.9900025725364685,grad_norm: 0.99999932157715, iteration: 123623
loss: 0.9870281219482422,grad_norm: 0.8860653278480753, iteration: 123624
loss: 0.9871636629104614,grad_norm: 0.999999013040083, iteration: 123625
loss: 0.9938727617263794,grad_norm: 0.9999990657799149, iteration: 123626
loss: 1.0101429224014282,grad_norm: 0.9365067658067626, iteration: 123627
loss: 0.9962215423583984,grad_norm: 0.9999991339202757, iteration: 123628
loss: 0.9742305278778076,grad_norm: 0.8683520474285207, iteration: 123629
loss: 0.9705420136451721,grad_norm: 0.999999091210832, iteration: 123630
loss: 0.9506882429122925,grad_norm: 0.9999992393158749, iteration: 123631
loss: 0.9758195877075195,grad_norm: 0.9999989596399858, iteration: 123632
loss: 1.21684992313385,grad_norm: 0.9999995684592453, iteration: 123633
loss: 0.9906847476959229,grad_norm: 0.9999991014785107, iteration: 123634
loss: 1.0078197717666626,grad_norm: 0.9302549035255991, iteration: 123635
loss: 0.9551162719726562,grad_norm: 0.9999990081294443, iteration: 123636
loss: 1.004827618598938,grad_norm: 0.9599205054857411, iteration: 123637
loss: 1.0025988817214966,grad_norm: 0.9999991368623913, iteration: 123638
loss: 0.984771728515625,grad_norm: 0.9711509464385749, iteration: 123639
loss: 0.9694015979766846,grad_norm: 0.9999991395297352, iteration: 123640
loss: 1.01572847366333,grad_norm: 0.9999991864621324, iteration: 123641
loss: 1.1388052701950073,grad_norm: 0.9999994271435462, iteration: 123642
loss: 1.0495625734329224,grad_norm: 0.9999989646220618, iteration: 123643
loss: 0.9608376026153564,grad_norm: 0.9999991181827673, iteration: 123644
loss: 0.9728821516036987,grad_norm: 0.9030571624732315, iteration: 123645
loss: 1.032769799232483,grad_norm: 0.9529198025458423, iteration: 123646
loss: 0.9769842624664307,grad_norm: 0.9999993417063543, iteration: 123647
loss: 0.9695101976394653,grad_norm: 0.8153067128040462, iteration: 123648
loss: 0.9937039017677307,grad_norm: 0.948962795429238, iteration: 123649
loss: 0.996864914894104,grad_norm: 0.9999997613849027, iteration: 123650
loss: 0.9796543121337891,grad_norm: 0.9999990899752754, iteration: 123651
loss: 0.9408153295516968,grad_norm: 0.9999991484372469, iteration: 123652
loss: 0.9961645007133484,grad_norm: 0.999999109959426, iteration: 123653
loss: 0.9699686765670776,grad_norm: 0.9999993244427956, iteration: 123654
loss: 1.022296667098999,grad_norm: 0.9748496372980346, iteration: 123655
loss: 1.0126867294311523,grad_norm: 0.967114488691643, iteration: 123656
loss: 0.9466116428375244,grad_norm: 0.9999991813971593, iteration: 123657
loss: 1.0933243036270142,grad_norm: 0.9999992025176413, iteration: 123658
loss: 0.9898993372917175,grad_norm: 0.9999991363898351, iteration: 123659
loss: 1.0188857316970825,grad_norm: 0.999999081836173, iteration: 123660
loss: 0.9694973230361938,grad_norm: 0.9999991124510815, iteration: 123661
loss: 1.0137231349945068,grad_norm: 0.8738690501050341, iteration: 123662
loss: 1.0032464265823364,grad_norm: 0.8151406603979736, iteration: 123663
loss: 1.0016148090362549,grad_norm: 0.9999991957084767, iteration: 123664
loss: 0.9934958219528198,grad_norm: 0.9370872850746579, iteration: 123665
loss: 0.9842095971107483,grad_norm: 0.9999991498613493, iteration: 123666
loss: 1.0239585638046265,grad_norm: 0.9999990230530488, iteration: 123667
loss: 1.0020859241485596,grad_norm: 0.9999991644035231, iteration: 123668
loss: 0.9838430285453796,grad_norm: 0.9999991887811983, iteration: 123669
loss: 0.9783942699432373,grad_norm: 0.9999994302287877, iteration: 123670
loss: 1.0176489353179932,grad_norm: 0.8329661238788265, iteration: 123671
loss: 1.004287838935852,grad_norm: 0.9999991767339974, iteration: 123672
loss: 1.0192570686340332,grad_norm: 0.9334502317667492, iteration: 123673
loss: 1.025355577468872,grad_norm: 0.8890479221443903, iteration: 123674
loss: 0.9891674518585205,grad_norm: 0.9999990454264043, iteration: 123675
loss: 0.9782378673553467,grad_norm: 0.9999991472675488, iteration: 123676
loss: 0.9704817533493042,grad_norm: 0.999999041890893, iteration: 123677
loss: 0.9633813500404358,grad_norm: 0.9997774267966045, iteration: 123678
loss: 0.9839445352554321,grad_norm: 0.9999992944217603, iteration: 123679
loss: 1.0166488885879517,grad_norm: 0.9999990609867758, iteration: 123680
loss: 1.0352832078933716,grad_norm: 0.9999990826752633, iteration: 123681
loss: 1.0000280141830444,grad_norm: 0.9999994491281984, iteration: 123682
loss: 1.0057494640350342,grad_norm: 0.8909901746816553, iteration: 123683
loss: 0.9702525734901428,grad_norm: 0.9999990745758142, iteration: 123684
loss: 1.0076427459716797,grad_norm: 0.9999990583838354, iteration: 123685
loss: 0.9881060123443604,grad_norm: 0.9153827366547512, iteration: 123686
loss: 1.025643229484558,grad_norm: 0.9999990043751095, iteration: 123687
loss: 0.9731993079185486,grad_norm: 0.8788604125816463, iteration: 123688
loss: 0.9599026441574097,grad_norm: 0.99999934530539, iteration: 123689
loss: 1.032748818397522,grad_norm: 0.9999999273233854, iteration: 123690
loss: 1.002199411392212,grad_norm: 0.9999992013562823, iteration: 123691
loss: 0.9725660085678101,grad_norm: 0.9129089070445394, iteration: 123692
loss: 0.998401403427124,grad_norm: 0.9999989586098522, iteration: 123693
loss: 1.0137596130371094,grad_norm: 0.9999991282230656, iteration: 123694
loss: 0.969690203666687,grad_norm: 0.9718359840018994, iteration: 123695
loss: 1.0164304971694946,grad_norm: 0.9886629771423465, iteration: 123696
loss: 1.0174411535263062,grad_norm: 0.9861078955150608, iteration: 123697
loss: 0.9938071966171265,grad_norm: 0.9999990651075069, iteration: 123698
loss: 0.9781494140625,grad_norm: 0.9846663848523626, iteration: 123699
loss: 1.0002901554107666,grad_norm: 0.975483922789892, iteration: 123700
loss: 1.0107423067092896,grad_norm: 0.9930661709938411, iteration: 123701
loss: 1.022024154663086,grad_norm: 0.9826699368237036, iteration: 123702
loss: 0.9899775981903076,grad_norm: 0.9999992797583123, iteration: 123703
loss: 1.016157627105713,grad_norm: 0.9999996985702286, iteration: 123704
loss: 1.0083799362182617,grad_norm: 0.9999990298372742, iteration: 123705
loss: 1.0234493017196655,grad_norm: 0.9999990286104649, iteration: 123706
loss: 0.9398037195205688,grad_norm: 0.9746567630108314, iteration: 123707
loss: 1.0069022178649902,grad_norm: 0.9262318224958022, iteration: 123708
loss: 0.9923763275146484,grad_norm: 0.8428489241502007, iteration: 123709
loss: 1.0249476432800293,grad_norm: 0.9999998252501502, iteration: 123710
loss: 1.000036358833313,grad_norm: 0.9999991479664208, iteration: 123711
loss: 0.9947761297225952,grad_norm: 0.9168499768535472, iteration: 123712
loss: 0.9916676878929138,grad_norm: 0.8680818925428881, iteration: 123713
loss: 0.9540262222290039,grad_norm: 0.999999181714293, iteration: 123714
loss: 1.0007011890411377,grad_norm: 0.9999992528554386, iteration: 123715
loss: 0.9868295788764954,grad_norm: 0.9999990835293551, iteration: 123716
loss: 1.0163060426712036,grad_norm: 0.9999990765567426, iteration: 123717
loss: 1.0479462146759033,grad_norm: 0.9999996138104407, iteration: 123718
loss: 1.0244947671890259,grad_norm: 0.999999143675997, iteration: 123719
loss: 0.9942626953125,grad_norm: 0.992881093013121, iteration: 123720
loss: 0.9955483675003052,grad_norm: 0.9999991595360875, iteration: 123721
loss: 0.9900469779968262,grad_norm: 0.9999991481046081, iteration: 123722
loss: 1.0049506425857544,grad_norm: 0.9999992231349883, iteration: 123723
loss: 1.0178792476654053,grad_norm: 0.9999989843275668, iteration: 123724
loss: 1.0074113607406616,grad_norm: 0.9999992619367293, iteration: 123725
loss: 0.9728820323944092,grad_norm: 0.990873138417035, iteration: 123726
loss: 1.0091513395309448,grad_norm: 0.8900862041943982, iteration: 123727
loss: 0.9851646423339844,grad_norm: 0.9999993165260015, iteration: 123728
loss: 0.9949026703834534,grad_norm: 0.9999991702321948, iteration: 123729
loss: 1.0183571577072144,grad_norm: 0.9999989764896832, iteration: 123730
loss: 1.0600926876068115,grad_norm: 0.9999992571266091, iteration: 123731
loss: 1.0219911336898804,grad_norm: 0.9342536874791093, iteration: 123732
loss: 1.0203189849853516,grad_norm: 0.999999358477114, iteration: 123733
loss: 1.0365195274353027,grad_norm: 0.9890505015072913, iteration: 123734
loss: 1.0260961055755615,grad_norm: 0.9472467758081906, iteration: 123735
loss: 1.0255242586135864,grad_norm: 0.9999990652793316, iteration: 123736
loss: 1.013772964477539,grad_norm: 0.9999990263755378, iteration: 123737
loss: 0.9846424460411072,grad_norm: 0.9714222789734185, iteration: 123738
loss: 0.9952688813209534,grad_norm: 0.9726062989399367, iteration: 123739
loss: 1.231388807296753,grad_norm: 0.9999996930240773, iteration: 123740
loss: 0.9711605310440063,grad_norm: 0.8265761122389611, iteration: 123741
loss: 1.0245587825775146,grad_norm: 0.9999993068505193, iteration: 123742
loss: 1.0253068208694458,grad_norm: 0.9999989662855227, iteration: 123743
loss: 1.0400081872940063,grad_norm: 0.9999989432261607, iteration: 123744
loss: 0.9924225211143494,grad_norm: 0.9999991906033523, iteration: 123745
loss: 0.9747417569160461,grad_norm: 0.99999914254892, iteration: 123746
loss: 1.1656502485275269,grad_norm: 0.9999995522613953, iteration: 123747
loss: 1.0223442316055298,grad_norm: 0.9999991518953151, iteration: 123748
loss: 1.0177826881408691,grad_norm: 0.9999990359576194, iteration: 123749
loss: 0.9719564318656921,grad_norm: 0.9220725722055388, iteration: 123750
loss: 1.0360076427459717,grad_norm: 0.9999992698939184, iteration: 123751
loss: 1.019662857055664,grad_norm: 0.9999990015264123, iteration: 123752
loss: 0.9978330731391907,grad_norm: 0.9999991041266953, iteration: 123753
loss: 0.9974609613418579,grad_norm: 0.9999991104404555, iteration: 123754
loss: 0.969256579875946,grad_norm: 0.9999991092528148, iteration: 123755
loss: 0.9690302014350891,grad_norm: 0.9999991107149762, iteration: 123756
loss: 0.9843550324440002,grad_norm: 0.999999020423905, iteration: 123757
loss: 1.0145224332809448,grad_norm: 0.9999996516619268, iteration: 123758
loss: 0.9680626392364502,grad_norm: 0.9999992376625364, iteration: 123759
loss: 0.9598840475082397,grad_norm: 0.9999991592259481, iteration: 123760
loss: 1.0722179412841797,grad_norm: 0.9219822550464023, iteration: 123761
loss: 1.037642478942871,grad_norm: 0.9999994983819611, iteration: 123762
loss: 1.0083603858947754,grad_norm: 0.9999991431453414, iteration: 123763
loss: 1.006218433380127,grad_norm: 0.9999990850648169, iteration: 123764
loss: 0.9925670027732849,grad_norm: 0.9999993519988687, iteration: 123765
loss: 0.9788773655891418,grad_norm: 0.9999990935343309, iteration: 123766
loss: 1.007908582687378,grad_norm: 0.8621930667878532, iteration: 123767
loss: 1.0290043354034424,grad_norm: 0.9999990531252578, iteration: 123768
loss: 1.012046217918396,grad_norm: 0.8739409141586674, iteration: 123769
loss: 1.0310986042022705,grad_norm: 0.8607919839152907, iteration: 123770
loss: 0.9858890771865845,grad_norm: 0.9734988338347176, iteration: 123771
loss: 0.9705600738525391,grad_norm: 0.9999993035548195, iteration: 123772
loss: 1.0639793872833252,grad_norm: 0.9999998466213759, iteration: 123773
loss: 0.9834357500076294,grad_norm: 0.9999991867411535, iteration: 123774
loss: 0.960675060749054,grad_norm: 0.9891745611032342, iteration: 123775
loss: 1.024103045463562,grad_norm: 0.9999991243003004, iteration: 123776
loss: 1.002779245376587,grad_norm: 0.9999991090218356, iteration: 123777
loss: 1.0020596981048584,grad_norm: 0.9999990925983735, iteration: 123778
loss: 1.0147953033447266,grad_norm: 0.9935020197534694, iteration: 123779
loss: 1.030085802078247,grad_norm: 0.9999992949653814, iteration: 123780
loss: 1.003252387046814,grad_norm: 0.9999991579379701, iteration: 123781
loss: 1.0084680318832397,grad_norm: 0.9999989544660582, iteration: 123782
loss: 1.1039307117462158,grad_norm: 0.9999991788337835, iteration: 123783
loss: 1.0099873542785645,grad_norm: 0.9999999793590941, iteration: 123784
loss: 0.932978093624115,grad_norm: 0.9999989001477367, iteration: 123785
loss: 0.9771069884300232,grad_norm: 0.9738930627035611, iteration: 123786
loss: 0.9654579162597656,grad_norm: 0.9999990728720549, iteration: 123787
loss: 0.9951560497283936,grad_norm: 0.9558130500789873, iteration: 123788
loss: 0.9666279554367065,grad_norm: 0.8833688286884284, iteration: 123789
loss: 1.0250427722930908,grad_norm: 0.8946069903010487, iteration: 123790
loss: 1.022513747215271,grad_norm: 0.9999992538704164, iteration: 123791
loss: 1.211846113204956,grad_norm: 0.9999999184039923, iteration: 123792
loss: 0.9752045273780823,grad_norm: 0.858628908767904, iteration: 123793
loss: 1.0192211866378784,grad_norm: 0.8028529502499357, iteration: 123794
loss: 0.9753050208091736,grad_norm: 0.9999991501571734, iteration: 123795
loss: 0.9880436658859253,grad_norm: 0.8486219603813193, iteration: 123796
loss: 0.9999300837516785,grad_norm: 0.9999991121566499, iteration: 123797
loss: 0.9827508926391602,grad_norm: 0.9999991884467684, iteration: 123798
loss: 1.1181172132492065,grad_norm: 0.9999996584154031, iteration: 123799
loss: 0.978352963924408,grad_norm: 0.9999991871921339, iteration: 123800
loss: 1.1656569242477417,grad_norm: 0.9999990237692563, iteration: 123801
loss: 1.0282827615737915,grad_norm: 0.9999991282594864, iteration: 123802
loss: 0.9602102637290955,grad_norm: 0.9999992167878151, iteration: 123803
loss: 0.9558038711547852,grad_norm: 0.9999991522410973, iteration: 123804
loss: 1.0151183605194092,grad_norm: 0.9999990437876045, iteration: 123805
loss: 1.0482792854309082,grad_norm: 0.999999219517155, iteration: 123806
loss: 0.9938597679138184,grad_norm: 0.9999992195395727, iteration: 123807
loss: 0.9761415123939514,grad_norm: 0.9731575785710961, iteration: 123808
loss: 1.0194522142410278,grad_norm: 0.9999992963247208, iteration: 123809
loss: 1.0231716632843018,grad_norm: 0.9999991853544301, iteration: 123810
loss: 1.0483412742614746,grad_norm: 0.9999992370522739, iteration: 123811
loss: 0.9646060466766357,grad_norm: 0.9382717989996415, iteration: 123812
loss: 0.9985833764076233,grad_norm: 0.9876526983285929, iteration: 123813
loss: 0.9670307636260986,grad_norm: 0.9999989152525287, iteration: 123814
loss: 0.9917558431625366,grad_norm: 0.9254555823980838, iteration: 123815
loss: 1.1371628046035767,grad_norm: 0.9999998761937, iteration: 123816
loss: 1.0061861276626587,grad_norm: 0.9742038755577273, iteration: 123817
loss: 1.047363519668579,grad_norm: 0.9999991551511825, iteration: 123818
loss: 0.9953061938285828,grad_norm: 0.8022544693783097, iteration: 123819
loss: 1.0084208250045776,grad_norm: 0.9999991484421431, iteration: 123820
loss: 0.9882904291152954,grad_norm: 0.9999991212813469, iteration: 123821
loss: 0.9598031640052795,grad_norm: 0.8881334355902704, iteration: 123822
loss: 1.0088000297546387,grad_norm: 0.9999991842840763, iteration: 123823
loss: 0.9791706204414368,grad_norm: 0.9124367296014705, iteration: 123824
loss: 0.9985566735267639,grad_norm: 0.9430990666847621, iteration: 123825
loss: 0.9722063541412354,grad_norm: 0.9999990520284332, iteration: 123826
loss: 1.023891806602478,grad_norm: 0.9435268566060672, iteration: 123827
loss: 0.9466827511787415,grad_norm: 0.999999073320919, iteration: 123828
loss: 1.0903699398040771,grad_norm: 0.9999996302249939, iteration: 123829
loss: 0.9862752556800842,grad_norm: 0.9999991563045273, iteration: 123830
loss: 1.0558820962905884,grad_norm: 0.9999996882866206, iteration: 123831
loss: 0.9814534187316895,grad_norm: 0.9999991244314633, iteration: 123832
loss: 1.0060577392578125,grad_norm: 0.9999990338950409, iteration: 123833
loss: 1.020997405052185,grad_norm: 0.9999990733419917, iteration: 123834
loss: 1.0152490139007568,grad_norm: 0.9999991410784105, iteration: 123835
loss: 1.0188781023025513,grad_norm: 0.9593723722370721, iteration: 123836
loss: 0.9756182432174683,grad_norm: 0.9999991238933378, iteration: 123837
loss: 1.1619209051132202,grad_norm: 0.9999999348801399, iteration: 123838
loss: 1.0138969421386719,grad_norm: 0.8654066520269289, iteration: 123839
loss: 0.9910489320755005,grad_norm: 0.9999990412630025, iteration: 123840
loss: 0.9824825525283813,grad_norm: 0.9800571252392087, iteration: 123841
loss: 1.0261859893798828,grad_norm: 0.9999992546641954, iteration: 123842
loss: 0.9783297181129456,grad_norm: 0.9999989876599663, iteration: 123843
loss: 1.0045086145401,grad_norm: 0.994954280519164, iteration: 123844
loss: 1.099916934967041,grad_norm: 0.9999996963880899, iteration: 123845
loss: 1.0256822109222412,grad_norm: 0.9999991062017429, iteration: 123846
loss: 1.000296950340271,grad_norm: 0.854206204009484, iteration: 123847
loss: 0.9930293560028076,grad_norm: 0.9999991236748873, iteration: 123848
loss: 0.9936869740486145,grad_norm: 0.9999990353468894, iteration: 123849
loss: 1.0831037759780884,grad_norm: 0.9999997336323727, iteration: 123850
loss: 0.9669901728630066,grad_norm: 0.9715167648332159, iteration: 123851
loss: 0.9871577024459839,grad_norm: 0.9776497596312236, iteration: 123852
loss: 1.0020463466644287,grad_norm: 0.8997441508887681, iteration: 123853
loss: 1.0331212282180786,grad_norm: 0.9999991959639184, iteration: 123854
loss: 0.9778390526771545,grad_norm: 0.8721291209880107, iteration: 123855
loss: 0.9799064993858337,grad_norm: 0.9262011510365972, iteration: 123856
loss: 1.0983542203903198,grad_norm: 0.9999991128639585, iteration: 123857
loss: 0.9910576343536377,grad_norm: 0.9999991280579791, iteration: 123858
loss: 1.041374921798706,grad_norm: 0.9999998943599225, iteration: 123859
loss: 0.9524503350257874,grad_norm: 0.9999989953792702, iteration: 123860
loss: 0.9834325313568115,grad_norm: 0.9999989925337112, iteration: 123861
loss: 0.9775887131690979,grad_norm: 0.9999992008340841, iteration: 123862
loss: 1.0166475772857666,grad_norm: 0.9999988999912435, iteration: 123863
loss: 0.9849640727043152,grad_norm: 0.9913765850803187, iteration: 123864
loss: 0.995722234249115,grad_norm: 0.9999993173047727, iteration: 123865
loss: 1.1028245687484741,grad_norm: 0.9432666502371305, iteration: 123866
loss: 1.1174495220184326,grad_norm: 0.9999993321656185, iteration: 123867
loss: 1.024059772491455,grad_norm: 0.9999990708584799, iteration: 123868
loss: 0.970776379108429,grad_norm: 0.905748160289816, iteration: 123869
loss: 0.9951964020729065,grad_norm: 0.999999206750809, iteration: 123870
loss: 1.0201767683029175,grad_norm: 0.9999991374819516, iteration: 123871
loss: 0.98652184009552,grad_norm: 0.8988606668778697, iteration: 123872
loss: 0.9854355454444885,grad_norm: 0.9092868613297105, iteration: 123873
loss: 1.0182594060897827,grad_norm: 0.9999990914831508, iteration: 123874
loss: 1.039970874786377,grad_norm: 0.9999994956152006, iteration: 123875
loss: 1.1245901584625244,grad_norm: 0.9670582912346471, iteration: 123876
loss: 1.0293185710906982,grad_norm: 0.9999993555773824, iteration: 123877
loss: 1.0209928750991821,grad_norm: 0.9999990402629014, iteration: 123878
loss: 1.1228467226028442,grad_norm: 0.999999103255356, iteration: 123879
loss: 1.0091043710708618,grad_norm: 0.8693633024249675, iteration: 123880
loss: 0.9968447089195251,grad_norm: 0.9950668031415206, iteration: 123881
loss: 0.9796846508979797,grad_norm: 0.9999991308034281, iteration: 123882
loss: 0.9572059512138367,grad_norm: 0.9999991391483796, iteration: 123883
loss: 0.9713650941848755,grad_norm: 0.9999991449954388, iteration: 123884
loss: 0.9845243692398071,grad_norm: 0.9999995294045965, iteration: 123885
loss: 1.039863109588623,grad_norm: 0.9999991966527292, iteration: 123886
loss: 0.9562379717826843,grad_norm: 0.908796385300048, iteration: 123887
loss: 1.0109131336212158,grad_norm: 0.9999992193578003, iteration: 123888
loss: 1.0030872821807861,grad_norm: 0.9999991605556214, iteration: 123889
loss: 1.0104548931121826,grad_norm: 0.9999991239795265, iteration: 123890
loss: 1.0247044563293457,grad_norm: 0.9111828190899663, iteration: 123891
loss: 0.9655095934867859,grad_norm: 0.8813800961023303, iteration: 123892
loss: 1.0069574117660522,grad_norm: 0.999999084889423, iteration: 123893
loss: 1.0062623023986816,grad_norm: 0.9713415299586288, iteration: 123894
loss: 1.0026615858078003,grad_norm: 0.9999991041618295, iteration: 123895
loss: 0.9947371482849121,grad_norm: 0.9999990485894598, iteration: 123896
loss: 1.003749132156372,grad_norm: 0.9999992818122595, iteration: 123897
loss: 0.9886085391044617,grad_norm: 0.9999992201630091, iteration: 123898
loss: 1.017195224761963,grad_norm: 0.9557833164625211, iteration: 123899
loss: 1.036199927330017,grad_norm: 0.9999992091796766, iteration: 123900
loss: 0.9881309270858765,grad_norm: 0.9999990701876429, iteration: 123901
loss: 0.9747040867805481,grad_norm: 0.9999990172341623, iteration: 123902
loss: 0.9669306874275208,grad_norm: 0.8756332549611864, iteration: 123903
loss: 1.0010275840759277,grad_norm: 0.9999990721436516, iteration: 123904
loss: 1.012951374053955,grad_norm: 0.9803138518732316, iteration: 123905
loss: 1.0416756868362427,grad_norm: 0.9037589861616789, iteration: 123906
loss: 0.9881412386894226,grad_norm: 0.9999992778607163, iteration: 123907
loss: 1.0355315208435059,grad_norm: 0.9999991035999198, iteration: 123908
loss: 1.0039803981781006,grad_norm: 0.9864720999691908, iteration: 123909
loss: 1.0141503810882568,grad_norm: 0.9538752534065361, iteration: 123910
loss: 0.9738072156906128,grad_norm: 0.9655950724253343, iteration: 123911
loss: 1.019553303718567,grad_norm: 0.9999991173439262, iteration: 123912
loss: 1.010265588760376,grad_norm: 0.9328337484915467, iteration: 123913
loss: 0.9758476614952087,grad_norm: 0.9999991309612992, iteration: 123914
loss: 0.9852840900421143,grad_norm: 0.9801849560267349, iteration: 123915
loss: 0.9825414419174194,grad_norm: 0.9999997347979771, iteration: 123916
loss: 0.992521345615387,grad_norm: 0.9999992166541608, iteration: 123917
loss: 0.9915591478347778,grad_norm: 0.9999989947990677, iteration: 123918
loss: 1.0212596654891968,grad_norm: 0.8338948139367204, iteration: 123919
loss: 0.9883167743682861,grad_norm: 0.9999989902164473, iteration: 123920
loss: 0.9780976176261902,grad_norm: 0.9893524893882567, iteration: 123921
loss: 0.9442205429077148,grad_norm: 0.9999991215722218, iteration: 123922
loss: 0.983449399471283,grad_norm: 0.9999991613719676, iteration: 123923
loss: 1.0109699964523315,grad_norm: 0.999999166338802, iteration: 123924
loss: 0.9634902477264404,grad_norm: 0.999999105380909, iteration: 123925
loss: 0.9811410307884216,grad_norm: 0.9649431471631768, iteration: 123926
loss: 1.002036213874817,grad_norm: 0.9999990799978631, iteration: 123927
loss: 0.976315438747406,grad_norm: 0.9682997185233044, iteration: 123928
loss: 0.9629542231559753,grad_norm: 0.9999990554357431, iteration: 123929
loss: 1.0040000677108765,grad_norm: 0.9922868690490896, iteration: 123930
loss: 1.0095272064208984,grad_norm: 0.9999991208802117, iteration: 123931
loss: 1.0182499885559082,grad_norm: 0.9517770764485245, iteration: 123932
loss: 1.0731533765792847,grad_norm: 0.9999998345631361, iteration: 123933
loss: 0.9975879788398743,grad_norm: 0.9701828651260697, iteration: 123934
loss: 0.9972970485687256,grad_norm: 0.9893149429746602, iteration: 123935
loss: 1.0044922828674316,grad_norm: 0.9999992301663638, iteration: 123936
loss: 0.9840956926345825,grad_norm: 0.9999992722835148, iteration: 123937
loss: 1.0163531303405762,grad_norm: 0.9999990368523847, iteration: 123938
loss: 1.0046852827072144,grad_norm: 0.9766784653894885, iteration: 123939
loss: 0.9924646615982056,grad_norm: 0.9999990922365253, iteration: 123940
loss: 0.9720773696899414,grad_norm: 0.9999991450199229, iteration: 123941
loss: 1.0044299364089966,grad_norm: 0.878633750299027, iteration: 123942
loss: 1.0030035972595215,grad_norm: 0.974480630282861, iteration: 123943
loss: 1.0265212059020996,grad_norm: 0.999999110386084, iteration: 123944
loss: 1.0089741945266724,grad_norm: 0.9936083857243286, iteration: 123945
loss: 0.9848711490631104,grad_norm: 0.9288228483304504, iteration: 123946
loss: 1.017228126525879,grad_norm: 0.8514622797760202, iteration: 123947
loss: 1.0176022052764893,grad_norm: 0.9686361696870774, iteration: 123948
loss: 0.9618492722511292,grad_norm: 0.9719311156798812, iteration: 123949
loss: 1.0052553415298462,grad_norm: 0.9577353486974429, iteration: 123950
loss: 0.9997690320014954,grad_norm: 0.9999993408551385, iteration: 123951
loss: 0.9993383288383484,grad_norm: 0.9705228012651517, iteration: 123952
loss: 0.9783507585525513,grad_norm: 0.9999995233616815, iteration: 123953
loss: 1.042249083518982,grad_norm: 0.9999991999964783, iteration: 123954
loss: 0.9973179697990417,grad_norm: 0.9999990558903474, iteration: 123955
loss: 1.0100862979888916,grad_norm: 0.8609804911411121, iteration: 123956
loss: 0.9899961948394775,grad_norm: 0.999999154496826, iteration: 123957
loss: 0.9622478485107422,grad_norm: 0.9999991742961342, iteration: 123958
loss: 0.9972379803657532,grad_norm: 0.999999181851824, iteration: 123959
loss: 1.010211706161499,grad_norm: 0.9999991874697833, iteration: 123960
loss: 1.0953620672225952,grad_norm: 0.9999989854353296, iteration: 123961
loss: 1.02167809009552,grad_norm: 0.9999992422395565, iteration: 123962
loss: 1.0036696195602417,grad_norm: 0.9995855101394514, iteration: 123963
loss: 1.0102524757385254,grad_norm: 0.9618260378425927, iteration: 123964
loss: 1.0044416189193726,grad_norm: 0.9999991994846714, iteration: 123965
loss: 1.012974500656128,grad_norm: 0.9967284268082193, iteration: 123966
loss: 0.9912159442901611,grad_norm: 0.9999990032782837, iteration: 123967
loss: 0.993632972240448,grad_norm: 0.9999992822268668, iteration: 123968
loss: 1.019076943397522,grad_norm: 0.8566072315097052, iteration: 123969
loss: 0.9957599639892578,grad_norm: 0.9695740501073341, iteration: 123970
loss: 1.0036311149597168,grad_norm: 0.9581015117922754, iteration: 123971
loss: 1.0471999645233154,grad_norm: 0.9999994893860685, iteration: 123972
loss: 1.0037546157836914,grad_norm: 0.9999991028075089, iteration: 123973
loss: 1.0065727233886719,grad_norm: 0.9999989875228152, iteration: 123974
loss: 1.015863060951233,grad_norm: 0.9999990311792079, iteration: 123975
loss: 0.9710010886192322,grad_norm: 0.9999992996239444, iteration: 123976
loss: 0.9858141541481018,grad_norm: 0.9569269425154574, iteration: 123977
loss: 1.0137275457382202,grad_norm: 0.9820121804252584, iteration: 123978
loss: 0.9721006155014038,grad_norm: 0.9999992792500323, iteration: 123979
loss: 0.9911920428276062,grad_norm: 0.9999990528349544, iteration: 123980
loss: 0.9651317596435547,grad_norm: 0.9830894306519656, iteration: 123981
loss: 0.9938324093818665,grad_norm: 0.9999991019196961, iteration: 123982
loss: 0.9721840620040894,grad_norm: 0.9252727354570018, iteration: 123983
loss: 1.105971097946167,grad_norm: 0.9999991970818288, iteration: 123984
loss: 1.0369644165039062,grad_norm: 0.9999991966435567, iteration: 123985
loss: 0.984006941318512,grad_norm: 0.9440633521817374, iteration: 123986
loss: 1.004080891609192,grad_norm: 0.9754486203021911, iteration: 123987
loss: 1.0166199207305908,grad_norm: 0.9999990466603498, iteration: 123988
loss: 0.9960845112800598,grad_norm: 0.9014501739188266, iteration: 123989
loss: 1.0042883157730103,grad_norm: 0.9999993416890627, iteration: 123990
loss: 0.9991527795791626,grad_norm: 0.968892883132532, iteration: 123991
loss: 1.0197973251342773,grad_norm: 0.9999992901522113, iteration: 123992
loss: 0.9864956140518188,grad_norm: 0.9999992138699431, iteration: 123993
loss: 1.0118372440338135,grad_norm: 0.9267378485731508, iteration: 123994
loss: 1.0113370418548584,grad_norm: 0.9999992051836714, iteration: 123995
loss: 0.9715493321418762,grad_norm: 0.9999990324199683, iteration: 123996
loss: 0.9937225580215454,grad_norm: 0.9656962352471838, iteration: 123997
loss: 1.000311017036438,grad_norm: 0.9660875843787514, iteration: 123998
loss: 0.9982720017433167,grad_norm: 0.9999992964958955, iteration: 123999
loss: 1.0031862258911133,grad_norm: 0.9999997295479058, iteration: 124000
loss: 1.0029674768447876,grad_norm: 0.974875915032751, iteration: 124001
loss: 0.9377491474151611,grad_norm: 0.9999990987100225, iteration: 124002
loss: 1.013203740119934,grad_norm: 0.9999990523850114, iteration: 124003
loss: 0.9972192645072937,grad_norm: 0.9999991918542074, iteration: 124004
loss: 1.0458166599273682,grad_norm: 0.9999995595262547, iteration: 124005
loss: 0.9478397965431213,grad_norm: 0.9241265081854466, iteration: 124006
loss: 1.0355621576309204,grad_norm: 0.9999991443195263, iteration: 124007
loss: 1.030716896057129,grad_norm: 0.9999991923908296, iteration: 124008
loss: 1.0101404190063477,grad_norm: 0.9750097299958813, iteration: 124009
loss: 1.0339802503585815,grad_norm: 0.9999993695959867, iteration: 124010
loss: 0.9875727891921997,grad_norm: 0.999999133605888, iteration: 124011
loss: 0.9495891332626343,grad_norm: 0.9999991144979441, iteration: 124012
loss: 1.0173211097717285,grad_norm: 0.9999992690378635, iteration: 124013
loss: 0.9874535202980042,grad_norm: 0.9999991068307098, iteration: 124014
loss: 1.0113767385482788,grad_norm: 0.9999990639482541, iteration: 124015
loss: 1.0479893684387207,grad_norm: 0.9999991709905413, iteration: 124016
loss: 1.0039095878601074,grad_norm: 0.9999990228668953, iteration: 124017
loss: 1.0275747776031494,grad_norm: 0.9078608953066232, iteration: 124018
loss: 1.00007164478302,grad_norm: 0.9999989354321271, iteration: 124019
loss: 0.9921560883522034,grad_norm: 0.975001217168375, iteration: 124020
loss: 1.0132973194122314,grad_norm: 0.9034307464181119, iteration: 124021
loss: 0.9779758453369141,grad_norm: 0.9999990675946613, iteration: 124022
loss: 0.9847844839096069,grad_norm: 0.9999990213086815, iteration: 124023
loss: 0.9669088125228882,grad_norm: 0.9842714255127419, iteration: 124024
loss: 1.0054395198822021,grad_norm: 0.8364839500433352, iteration: 124025
loss: 1.0525903701782227,grad_norm: 0.9999991341048603, iteration: 124026
loss: 1.0178583860397339,grad_norm: 0.9999990183726456, iteration: 124027
loss: 1.006990909576416,grad_norm: 0.8965757698531945, iteration: 124028
loss: 1.006324291229248,grad_norm: 0.9999995722899504, iteration: 124029
loss: 0.9906084537506104,grad_norm: 0.9695196694970831, iteration: 124030
loss: 0.9576112031936646,grad_norm: 0.999999128799268, iteration: 124031
loss: 1.028172254562378,grad_norm: 0.9999995395064613, iteration: 124032
loss: 1.0237785577774048,grad_norm: 0.9999989436625819, iteration: 124033
loss: 1.0106149911880493,grad_norm: 0.9999991073144917, iteration: 124034
loss: 0.9724165201187134,grad_norm: 0.947540656562594, iteration: 124035
loss: 1.010994791984558,grad_norm: 0.9528427217438473, iteration: 124036
loss: 1.0450353622436523,grad_norm: 0.9999992810310715, iteration: 124037
loss: 0.9823320508003235,grad_norm: 0.9999989970225251, iteration: 124038
loss: 1.0132746696472168,grad_norm: 0.999074653788356, iteration: 124039
loss: 0.9766840934753418,grad_norm: 0.8814612524606322, iteration: 124040
loss: 1.0035536289215088,grad_norm: 0.9999990225537932, iteration: 124041
loss: 1.0356770753860474,grad_norm: 0.9999992059957489, iteration: 124042
loss: 1.0505539178848267,grad_norm: 0.9999999321779115, iteration: 124043
loss: 0.9885212182998657,grad_norm: 0.9958867318464664, iteration: 124044
loss: 1.0099824666976929,grad_norm: 0.9656538283591155, iteration: 124045
loss: 0.99960857629776,grad_norm: 0.9001402170157852, iteration: 124046
loss: 1.006402611732483,grad_norm: 0.9822585691799134, iteration: 124047
loss: 1.0087289810180664,grad_norm: 0.9999990452239025, iteration: 124048
loss: 0.9760751724243164,grad_norm: 0.7610816455296338, iteration: 124049
loss: 1.0222145318984985,grad_norm: 0.9999990933813425, iteration: 124050
loss: 1.011595368385315,grad_norm: 0.9999995072133491, iteration: 124051
loss: 1.0092722177505493,grad_norm: 0.9999989813430341, iteration: 124052
loss: 1.029258131980896,grad_norm: 0.9999990603808188, iteration: 124053
loss: 1.0181353092193604,grad_norm: 0.99999916951816, iteration: 124054
loss: 1.0329381227493286,grad_norm: 0.9999992432792303, iteration: 124055
loss: 0.9990425705909729,grad_norm: 0.999999064622216, iteration: 124056
loss: 1.017674207687378,grad_norm: 0.9608170154325277, iteration: 124057
loss: 1.0043652057647705,grad_norm: 0.9999990379285981, iteration: 124058
loss: 1.0195890665054321,grad_norm: 0.9999990651675139, iteration: 124059
loss: 0.9658591151237488,grad_norm: 0.9527610670088573, iteration: 124060
loss: 1.011920690536499,grad_norm: 0.9999991648630198, iteration: 124061
loss: 1.0031198263168335,grad_norm: 0.9999992922258024, iteration: 124062
loss: 1.011790156364441,grad_norm: 0.9999991305312523, iteration: 124063
loss: 0.9746181964874268,grad_norm: 0.8729589871928223, iteration: 124064
loss: 1.0103188753128052,grad_norm: 0.9999989923610424, iteration: 124065
loss: 0.9920241236686707,grad_norm: 0.9999991768812126, iteration: 124066
loss: 1.0134342908859253,grad_norm: 0.9999990908519709, iteration: 124067
loss: 0.9873864650726318,grad_norm: 0.9821223850776764, iteration: 124068
loss: 1.014915943145752,grad_norm: 0.9999992062952803, iteration: 124069
loss: 1.0107372999191284,grad_norm: 0.9590100918167139, iteration: 124070
loss: 0.9903117418289185,grad_norm: 0.9999991106782448, iteration: 124071
loss: 1.004928469657898,grad_norm: 0.8323006585853879, iteration: 124072
loss: 1.0080944299697876,grad_norm: 0.9999991760236451, iteration: 124073
loss: 0.995564877986908,grad_norm: 0.9999991754508565, iteration: 124074
loss: 0.972998321056366,grad_norm: 0.8571358714203365, iteration: 124075
loss: 1.0123130083084106,grad_norm: 0.9999991091122155, iteration: 124076
loss: 0.9871885180473328,grad_norm: 0.8353085836808147, iteration: 124077
loss: 1.049739956855774,grad_norm: 0.9999990305366825, iteration: 124078
loss: 0.9728305339813232,grad_norm: 0.9999992751064727, iteration: 124079
loss: 1.0098756551742554,grad_norm: 0.8475956476861846, iteration: 124080
loss: 1.0200039148330688,grad_norm: 0.9769960404321396, iteration: 124081
loss: 1.0938701629638672,grad_norm: 0.9999992386122211, iteration: 124082
loss: 1.0132911205291748,grad_norm: 0.9999996067348266, iteration: 124083
loss: 0.9855306148529053,grad_norm: 0.9748723112517742, iteration: 124084
loss: 1.0057802200317383,grad_norm: 0.9880937397485804, iteration: 124085
loss: 0.9815558195114136,grad_norm: 0.9630922847474115, iteration: 124086
loss: 1.0124433040618896,grad_norm: 0.9220214099592532, iteration: 124087
loss: 1.0654692649841309,grad_norm: 0.9999991534925875, iteration: 124088
loss: 0.9761533141136169,grad_norm: 0.9999990207353252, iteration: 124089
loss: 1.0478847026824951,grad_norm: 0.9999991846183808, iteration: 124090
loss: 1.0152976512908936,grad_norm: 0.8570000441240372, iteration: 124091
loss: 0.99593585729599,grad_norm: 0.997386515173703, iteration: 124092
loss: 0.9999304413795471,grad_norm: 0.9170350026216258, iteration: 124093
loss: 0.99959397315979,grad_norm: 0.9999993307047523, iteration: 124094
loss: 1.0632648468017578,grad_norm: 0.9999999998628069, iteration: 124095
loss: 1.0105100870132446,grad_norm: 0.9999991682405048, iteration: 124096
loss: 0.9777030348777771,grad_norm: 0.9999990717511181, iteration: 124097
loss: 1.0102348327636719,grad_norm: 0.9999993607125779, iteration: 124098
loss: 1.0239496231079102,grad_norm: 0.9999991587664554, iteration: 124099
loss: 1.0121313333511353,grad_norm: 0.9999990407306958, iteration: 124100
loss: 1.0152674913406372,grad_norm: 0.867678871462052, iteration: 124101
loss: 1.0004231929779053,grad_norm: 0.7921770726801268, iteration: 124102
loss: 0.9860402941703796,grad_norm: 0.9999990740703075, iteration: 124103
loss: 0.9957706928253174,grad_norm: 0.9973348879456244, iteration: 124104
loss: 1.0050355195999146,grad_norm: 0.9573162870957377, iteration: 124105
loss: 0.9905750155448914,grad_norm: 0.9999992564073606, iteration: 124106
loss: 0.9795008897781372,grad_norm: 0.9328242206585687, iteration: 124107
loss: 1.0308923721313477,grad_norm: 0.9999997060481569, iteration: 124108
loss: 0.9952648878097534,grad_norm: 0.9999991116436521, iteration: 124109
loss: 1.022011637687683,grad_norm: 0.941114421383953, iteration: 124110
loss: 0.9937653541564941,grad_norm: 0.9999989997167341, iteration: 124111
loss: 1.0124075412750244,grad_norm: 0.9999991307002134, iteration: 124112
loss: 1.0206940174102783,grad_norm: 0.9999991711115015, iteration: 124113
loss: 1.0551846027374268,grad_norm: 0.9999993706278273, iteration: 124114
loss: 1.170304775238037,grad_norm: 0.9999998682420362, iteration: 124115
loss: 1.0025181770324707,grad_norm: 0.8638013224961975, iteration: 124116
loss: 0.9568780660629272,grad_norm: 0.9999990520747452, iteration: 124117
loss: 0.9766057729721069,grad_norm: 0.9117840036007375, iteration: 124118
loss: 1.003472924232483,grad_norm: 0.9999989385042292, iteration: 124119
loss: 0.9997693300247192,grad_norm: 0.9560138892271792, iteration: 124120
loss: 1.0208133459091187,grad_norm: 0.999999121100477, iteration: 124121
loss: 1.0087639093399048,grad_norm: 0.9339560048416308, iteration: 124122
loss: 0.9936472773551941,grad_norm: 0.9999991593154107, iteration: 124123
loss: 0.9989370107650757,grad_norm: 0.9999992664666851, iteration: 124124
loss: 0.9921549558639526,grad_norm: 0.9999992079297979, iteration: 124125
loss: 1.011880874633789,grad_norm: 0.9999989959359994, iteration: 124126
loss: 0.9902173280715942,grad_norm: 0.9628006781835715, iteration: 124127
loss: 0.9900439381599426,grad_norm: 0.8935536817642696, iteration: 124128
loss: 0.9872292876243591,grad_norm: 0.9999990470815969, iteration: 124129
loss: 1.0055663585662842,grad_norm: 0.9973586932986346, iteration: 124130
loss: 1.0153084993362427,grad_norm: 0.8531456559992935, iteration: 124131
loss: 0.9992960095405579,grad_norm: 0.9129016410817099, iteration: 124132
loss: 0.9833451509475708,grad_norm: 0.9999991945088036, iteration: 124133
loss: 1.0052889585494995,grad_norm: 0.9999989648885341, iteration: 124134
loss: 0.984466552734375,grad_norm: 0.9999992645130511, iteration: 124135
loss: 0.9961826801300049,grad_norm: 0.9999992099008106, iteration: 124136
loss: 0.9766583442687988,grad_norm: 0.9999992011185665, iteration: 124137
loss: 1.0233601331710815,grad_norm: 0.8458941925344566, iteration: 124138
loss: 0.9631322622299194,grad_norm: 0.9243054117175793, iteration: 124139
loss: 0.9799994826316833,grad_norm: 0.9732623359794587, iteration: 124140
loss: 0.961966335773468,grad_norm: 0.9999990300412889, iteration: 124141
loss: 0.9860458970069885,grad_norm: 0.9999992703440438, iteration: 124142
loss: 1.0066334009170532,grad_norm: 0.9999991257594254, iteration: 124143
loss: 0.9730761051177979,grad_norm: 0.9332022567102062, iteration: 124144
loss: 0.9952106475830078,grad_norm: 0.8952916292323805, iteration: 124145
loss: 1.005502700805664,grad_norm: 0.9999991315481523, iteration: 124146
loss: 0.9765047430992126,grad_norm: 0.9999990901484537, iteration: 124147
loss: 1.0274858474731445,grad_norm: 0.9999997996173705, iteration: 124148
loss: 1.0093562602996826,grad_norm: 0.9999991957216262, iteration: 124149
loss: 0.9893419146537781,grad_norm: 0.9999990659692066, iteration: 124150
loss: 0.9888243079185486,grad_norm: 0.9387769716418349, iteration: 124151
loss: 1.0346758365631104,grad_norm: 0.9999990813350076, iteration: 124152
loss: 0.9866745471954346,grad_norm: 0.9999992132465136, iteration: 124153
loss: 1.0343966484069824,grad_norm: 0.9999990478073075, iteration: 124154
loss: 1.0296648740768433,grad_norm: 0.9655007438836143, iteration: 124155
loss: 0.9721484780311584,grad_norm: 0.9999993313511908, iteration: 124156
loss: 0.9560990929603577,grad_norm: 0.9512797776895561, iteration: 124157
loss: 0.9929631352424622,grad_norm: 0.9999991905140375, iteration: 124158
loss: 1.001979112625122,grad_norm: 0.955340521455268, iteration: 124159
loss: 0.9980483651161194,grad_norm: 0.9839328354735647, iteration: 124160
loss: 1.0032052993774414,grad_norm: 0.894141229471051, iteration: 124161
loss: 0.9930355548858643,grad_norm: 0.9999991179580707, iteration: 124162
loss: 0.9833171963691711,grad_norm: 0.9999991975592639, iteration: 124163
loss: 1.0747559070587158,grad_norm: 0.9751006375495409, iteration: 124164
loss: 1.0345841646194458,grad_norm: 0.9999996159097795, iteration: 124165
loss: 1.0267908573150635,grad_norm: 0.9999990804958174, iteration: 124166
loss: 1.002732276916504,grad_norm: 0.9999991318304292, iteration: 124167
loss: 0.9503847360610962,grad_norm: 0.9999991327085389, iteration: 124168
loss: 1.0332820415496826,grad_norm: 0.9104350065748466, iteration: 124169
loss: 1.0274077653884888,grad_norm: 0.9036892132577031, iteration: 124170
loss: 0.9919314980506897,grad_norm: 0.999999206602135, iteration: 124171
loss: 1.0628236532211304,grad_norm: 0.9999990635108524, iteration: 124172
loss: 0.9919374585151672,grad_norm: 0.8703270197452929, iteration: 124173
loss: 1.0017896890640259,grad_norm: 0.9999992721728324, iteration: 124174
loss: 1.0766875743865967,grad_norm: 0.999999496811031, iteration: 124175
loss: 0.9937276840209961,grad_norm: 0.999999207776841, iteration: 124176
loss: 1.0506794452667236,grad_norm: 0.9999995759747334, iteration: 124177
loss: 0.9675235748291016,grad_norm: 0.951870830362747, iteration: 124178
loss: 1.0166538953781128,grad_norm: 0.9999991279730637, iteration: 124179
loss: 0.9927560687065125,grad_norm: 0.9887269421627886, iteration: 124180
loss: 1.0321991443634033,grad_norm: 0.9999995824943233, iteration: 124181
loss: 1.0224955081939697,grad_norm: 0.9999990711350772, iteration: 124182
loss: 0.9803807139396667,grad_norm: 0.9760743465313864, iteration: 124183
loss: 1.0646682977676392,grad_norm: 0.9999992705724284, iteration: 124184
loss: 1.0080455541610718,grad_norm: 0.9999990054723025, iteration: 124185
loss: 1.2044709920883179,grad_norm: 0.9999994091992505, iteration: 124186
loss: 1.1029837131500244,grad_norm: 0.9999993900851009, iteration: 124187
loss: 0.994590163230896,grad_norm: 0.9238619654099904, iteration: 124188
loss: 0.9867448806762695,grad_norm: 0.9999992787046916, iteration: 124189
loss: 0.9796806573867798,grad_norm: 0.9999991392324603, iteration: 124190
loss: 1.0105071067810059,grad_norm: 0.9999990097005615, iteration: 124191
loss: 1.094150185585022,grad_norm: 0.9999991013858992, iteration: 124192
loss: 0.9995149374008179,grad_norm: 0.9830279099151871, iteration: 124193
loss: 1.0172182321548462,grad_norm: 0.9999991100594966, iteration: 124194
loss: 0.9761008620262146,grad_norm: 0.9999990415527962, iteration: 124195
loss: 1.0085119009017944,grad_norm: 0.9558972799899631, iteration: 124196
loss: 1.013332486152649,grad_norm: 0.9915613997791026, iteration: 124197
loss: 1.0001215934753418,grad_norm: 0.8132860253059837, iteration: 124198
loss: 0.9652703404426575,grad_norm: 0.8771044535213294, iteration: 124199
loss: 1.0034875869750977,grad_norm: 0.9999991772056006, iteration: 124200
loss: 1.0191766023635864,grad_norm: 0.9915628348015182, iteration: 124201
loss: 0.984162449836731,grad_norm: 0.9999998616621643, iteration: 124202
loss: 1.027017593383789,grad_norm: 0.9999991997832779, iteration: 124203
loss: 1.0528738498687744,grad_norm: 0.9999999390497342, iteration: 124204
loss: 1.0176512002944946,grad_norm: 0.9999992502064249, iteration: 124205
loss: 1.0839638710021973,grad_norm: 0.9999998810192817, iteration: 124206
loss: 1.0614392757415771,grad_norm: 0.9999990171986851, iteration: 124207
loss: 0.9632286429405212,grad_norm: 0.9760294958490124, iteration: 124208
loss: 1.0243406295776367,grad_norm: 0.999999021303844, iteration: 124209
loss: 1.0640695095062256,grad_norm: 0.9999996992724084, iteration: 124210
loss: 0.9682667851448059,grad_norm: 0.9999990083138448, iteration: 124211
loss: 0.9952098727226257,grad_norm: 0.9999991250193699, iteration: 124212
loss: 1.008473515510559,grad_norm: 0.9326283891862986, iteration: 124213
loss: 1.01164710521698,grad_norm: 0.9808798473982974, iteration: 124214
loss: 1.0109658241271973,grad_norm: 0.9593664018688467, iteration: 124215
loss: 1.0442888736724854,grad_norm: 0.9223053296469861, iteration: 124216
loss: 1.0018680095672607,grad_norm: 0.9820034161904253, iteration: 124217
loss: 0.9617888331413269,grad_norm: 0.907468153253427, iteration: 124218
loss: 1.0211994647979736,grad_norm: 0.8508875830608598, iteration: 124219
loss: 0.9920427203178406,grad_norm: 0.9973612639641444, iteration: 124220
loss: 0.9924250245094299,grad_norm: 0.9003506605688637, iteration: 124221
loss: 0.9654906988143921,grad_norm: 0.8684489597036305, iteration: 124222
loss: 1.0049703121185303,grad_norm: 0.9999998890899144, iteration: 124223
loss: 1.0165470838546753,grad_norm: 0.9999992235506406, iteration: 124224
loss: 1.0173319578170776,grad_norm: 0.9999989963828976, iteration: 124225
loss: 1.0062955617904663,grad_norm: 0.8982760004110926, iteration: 124226
loss: 1.032101035118103,grad_norm: 0.999999253885907, iteration: 124227
loss: 1.006917119026184,grad_norm: 0.9999992219121471, iteration: 124228
loss: 1.0275801420211792,grad_norm: 0.9999991785117232, iteration: 124229
loss: 0.9801012873649597,grad_norm: 0.9309992626720462, iteration: 124230
loss: 1.0041991472244263,grad_norm: 0.9999991738242686, iteration: 124231
loss: 1.0085725784301758,grad_norm: 0.9999990802027776, iteration: 124232
loss: 1.0144094228744507,grad_norm: 0.9999998871623202, iteration: 124233
loss: 1.0030791759490967,grad_norm: 0.999999245776442, iteration: 124234
loss: 0.9885144233703613,grad_norm: 0.9999990886334684, iteration: 124235
loss: 0.9802335500717163,grad_norm: 0.9027676571809145, iteration: 124236
loss: 0.9970996975898743,grad_norm: 0.9999991269517897, iteration: 124237
loss: 1.0109943151474,grad_norm: 0.9999991190019478, iteration: 124238
loss: 1.0496152639389038,grad_norm: 0.9999999018466925, iteration: 124239
loss: 0.999381422996521,grad_norm: 0.9999992476057514, iteration: 124240
loss: 0.9776092171669006,grad_norm: 0.9126574149906239, iteration: 124241
loss: 1.0131540298461914,grad_norm: 0.8319334836441657, iteration: 124242
loss: 1.1633751392364502,grad_norm: 0.9999995587610008, iteration: 124243
loss: 0.9866058826446533,grad_norm: 0.9999990200340918, iteration: 124244
loss: 0.9972927570343018,grad_norm: 0.9490903856431737, iteration: 124245
loss: 0.9876360297203064,grad_norm: 0.9999990316732879, iteration: 124246
loss: 1.0083487033843994,grad_norm: 0.9999992556774632, iteration: 124247
loss: 0.9907838702201843,grad_norm: 0.7929636498432977, iteration: 124248
loss: 1.0282235145568848,grad_norm: 0.9239180764165832, iteration: 124249
loss: 1.0215504169464111,grad_norm: 0.9999992531408061, iteration: 124250
loss: 1.0059787034988403,grad_norm: 0.9999992439664771, iteration: 124251
loss: 0.9876338839530945,grad_norm: 0.9999991462204484, iteration: 124252
loss: 0.9835807681083679,grad_norm: 0.9999992208128891, iteration: 124253
loss: 1.0272226333618164,grad_norm: 0.8563380410545711, iteration: 124254
loss: 1.020275592803955,grad_norm: 0.9720439167981482, iteration: 124255
loss: 1.0222837924957275,grad_norm: 1.0000000351579739, iteration: 124256
loss: 1.0045032501220703,grad_norm: 0.8594034350526415, iteration: 124257
loss: 1.005983591079712,grad_norm: 0.9550236123405161, iteration: 124258
loss: 1.0069962739944458,grad_norm: 0.9999992249684568, iteration: 124259
loss: 0.9836533069610596,grad_norm: 0.9999993677012391, iteration: 124260
loss: 0.9872998595237732,grad_norm: 0.9132793454441193, iteration: 124261
loss: 0.9713438153266907,grad_norm: 0.9858274615353311, iteration: 124262
loss: 0.947340190410614,grad_norm: 0.9999991173996318, iteration: 124263
loss: 0.9734042882919312,grad_norm: 0.9999989151660846, iteration: 124264
loss: 0.9631316661834717,grad_norm: 0.9999989828993444, iteration: 124265
loss: 0.980492889881134,grad_norm: 0.9999991204765576, iteration: 124266
loss: 1.0283015966415405,grad_norm: 0.9999991449848308, iteration: 124267
loss: 0.975061297416687,grad_norm: 0.9489452761457885, iteration: 124268
loss: 1.011538028717041,grad_norm: 0.9999991021305927, iteration: 124269
loss: 1.0291301012039185,grad_norm: 0.8228222126753842, iteration: 124270
loss: 0.980555534362793,grad_norm: 0.9999992218882141, iteration: 124271
loss: 1.0051069259643555,grad_norm: 0.8716631198918067, iteration: 124272
loss: 1.0141152143478394,grad_norm: 0.9041496480129236, iteration: 124273
loss: 1.0078442096710205,grad_norm: 0.9999990946437849, iteration: 124274
loss: 1.0183825492858887,grad_norm: 0.9315297217564986, iteration: 124275
loss: 1.0302993059158325,grad_norm: 0.9999991506890309, iteration: 124276
loss: 1.0329066514968872,grad_norm: 0.9999989839353486, iteration: 124277
loss: 1.0073378086090088,grad_norm: 0.9999991466339383, iteration: 124278
loss: 0.9959650039672852,grad_norm: 0.8859431737864076, iteration: 124279
loss: 0.9707146883010864,grad_norm: 0.9999990934860337, iteration: 124280
loss: 0.991072416305542,grad_norm: 0.9999990266175237, iteration: 124281
loss: 0.9464809894561768,grad_norm: 0.9934639598058201, iteration: 124282
loss: 0.995425283908844,grad_norm: 0.9999990177715217, iteration: 124283
loss: 1.0024174451828003,grad_norm: 0.9999991927574177, iteration: 124284
loss: 0.9766543507575989,grad_norm: 0.9999990577651247, iteration: 124285
loss: 0.9784599542617798,grad_norm: 0.9999992429478796, iteration: 124286
loss: 1.0174369812011719,grad_norm: 0.9999997795252417, iteration: 124287
loss: 0.9957087635993958,grad_norm: 0.871870029353657, iteration: 124288
loss: 1.0043983459472656,grad_norm: 0.9999991088260362, iteration: 124289
loss: 0.956968367099762,grad_norm: 0.8933574157948413, iteration: 124290
loss: 0.9666008949279785,grad_norm: 0.9999991690349141, iteration: 124291
loss: 1.009170413017273,grad_norm: 0.9610233476861804, iteration: 124292
loss: 0.9950000047683716,grad_norm: 0.9725242670263838, iteration: 124293
loss: 1.0114965438842773,grad_norm: 0.9999990485201502, iteration: 124294
loss: 0.9860227108001709,grad_norm: 0.9999989614050918, iteration: 124295
loss: 1.0049970149993896,grad_norm: 0.9999991374812873, iteration: 124296
loss: 0.977851390838623,grad_norm: 0.9999990851398604, iteration: 124297
loss: 1.015156626701355,grad_norm: 0.9999992699082373, iteration: 124298
loss: 1.0258557796478271,grad_norm: 0.9999990294776991, iteration: 124299
loss: 0.9891688823699951,grad_norm: 0.9999989485365314, iteration: 124300
loss: 0.9883723258972168,grad_norm: 0.999999111955793, iteration: 124301
loss: 0.9928146004676819,grad_norm: 0.9999992614537164, iteration: 124302
loss: 0.9843799471855164,grad_norm: 0.9076000911167242, iteration: 124303
loss: 1.0144546031951904,grad_norm: 0.9281430198091242, iteration: 124304
loss: 0.9938068389892578,grad_norm: 0.9999993161144313, iteration: 124305
loss: 1.038336157798767,grad_norm: 0.9465775724711275, iteration: 124306
loss: 0.9679814577102661,grad_norm: 0.9999990878659027, iteration: 124307
loss: 1.0125404596328735,grad_norm: 0.9999992382721032, iteration: 124308
loss: 0.9778019785881042,grad_norm: 0.9999990313722614, iteration: 124309
loss: 1.0032920837402344,grad_norm: 0.8622749702032225, iteration: 124310
loss: 0.9536699652671814,grad_norm: 0.9999991037499184, iteration: 124311
loss: 0.9639328718185425,grad_norm: 0.9999991704232299, iteration: 124312
loss: 1.0091021060943604,grad_norm: 0.9893669665665837, iteration: 124313
loss: 1.0056170225143433,grad_norm: 0.9999990664900148, iteration: 124314
loss: 1.0487384796142578,grad_norm: 0.9787558746060201, iteration: 124315
loss: 1.0027778148651123,grad_norm: 0.9047230757438769, iteration: 124316
loss: 0.9956437945365906,grad_norm: 0.9999989633531393, iteration: 124317
loss: 0.9594792127609253,grad_norm: 0.9999992211337156, iteration: 124318
loss: 1.0222883224487305,grad_norm: 0.999999103917758, iteration: 124319
loss: 1.0445494651794434,grad_norm: 0.9999996771055515, iteration: 124320
loss: 1.0075323581695557,grad_norm: 0.9149377790698282, iteration: 124321
loss: 0.994840145111084,grad_norm: 0.9046218041050651, iteration: 124322
loss: 0.9719648361206055,grad_norm: 0.8934869317803098, iteration: 124323
loss: 0.9756017327308655,grad_norm: 0.9999990735787158, iteration: 124324
loss: 0.9956052899360657,grad_norm: 0.9999996322307553, iteration: 124325
loss: 0.9701142907142639,grad_norm: 0.8531997100825904, iteration: 124326
loss: 0.9764374494552612,grad_norm: 0.9999992329667956, iteration: 124327
loss: 1.0091731548309326,grad_norm: 0.9999991946462518, iteration: 124328
loss: 1.0920636653900146,grad_norm: 0.9999999823703463, iteration: 124329
loss: 0.990268886089325,grad_norm: 0.8879941961320018, iteration: 124330
loss: 0.9853478670120239,grad_norm: 0.9999989946128611, iteration: 124331
loss: 0.9924975037574768,grad_norm: 0.9999991762236813, iteration: 124332
loss: 0.9898150563240051,grad_norm: 0.9969771744413494, iteration: 124333
loss: 0.9629044532775879,grad_norm: 0.999772590046371, iteration: 124334
loss: 1.0006252527236938,grad_norm: 0.9810403511484104, iteration: 124335
loss: 1.0117884874343872,grad_norm: 0.8734470485152518, iteration: 124336
loss: 1.0124353170394897,grad_norm: 0.9999989554599723, iteration: 124337
loss: 1.0145506858825684,grad_norm: 0.9999990638228294, iteration: 124338
loss: 0.9895156025886536,grad_norm: 0.9999991269506868, iteration: 124339
loss: 1.0025553703308105,grad_norm: 0.999999220948619, iteration: 124340
loss: 0.9916739463806152,grad_norm: 0.9397473977718837, iteration: 124341
loss: 1.001470685005188,grad_norm: 0.9999992409189625, iteration: 124342
loss: 0.9889390468597412,grad_norm: 0.9849643019239853, iteration: 124343
loss: 1.0130856037139893,grad_norm: 0.9876448137578239, iteration: 124344
loss: 1.0314229726791382,grad_norm: 0.9842928285381707, iteration: 124345
loss: 0.9956949949264526,grad_norm: 0.9999991184122841, iteration: 124346
loss: 0.9649369716644287,grad_norm: 0.9999991956906391, iteration: 124347
loss: 1.023254156112671,grad_norm: 0.8737216449048445, iteration: 124348
loss: 0.9459673166275024,grad_norm: 0.9999991334359501, iteration: 124349
loss: 1.020393967628479,grad_norm: 0.9844064012397655, iteration: 124350
loss: 0.9614042043685913,grad_norm: 0.826167321468135, iteration: 124351
loss: 1.004201889038086,grad_norm: 0.9999991790019678, iteration: 124352
loss: 0.978665828704834,grad_norm: 0.9999990197426166, iteration: 124353
loss: 0.982153594493866,grad_norm: 0.9999991547582322, iteration: 124354
loss: 1.0114238262176514,grad_norm: 0.9420705563710756, iteration: 124355
loss: 0.9506061673164368,grad_norm: 0.9999991804022796, iteration: 124356
loss: 0.9689789414405823,grad_norm: 0.9999990987054084, iteration: 124357
loss: 1.012622356414795,grad_norm: 0.9999991211539995, iteration: 124358
loss: 0.9906582236289978,grad_norm: 0.9999999806582238, iteration: 124359
loss: 1.0126240253448486,grad_norm: 0.9999991916266101, iteration: 124360
loss: 1.0026640892028809,grad_norm: 0.9999993770858836, iteration: 124361
loss: 1.0484179258346558,grad_norm: 0.8888434707252484, iteration: 124362
loss: 1.016379714012146,grad_norm: 0.999999020398085, iteration: 124363
loss: 1.003129482269287,grad_norm: 0.8154466008379441, iteration: 124364
loss: 0.9882913827896118,grad_norm: 0.9434912312571829, iteration: 124365
loss: 1.0067713260650635,grad_norm: 0.9999992121493205, iteration: 124366
loss: 1.029997706413269,grad_norm: 0.9999990691070206, iteration: 124367
loss: 1.0288630723953247,grad_norm: 0.9999991887875456, iteration: 124368
loss: 1.031813621520996,grad_norm: 0.8171945391226252, iteration: 124369
loss: 0.998930037021637,grad_norm: 0.8764677952462734, iteration: 124370
loss: 0.9837431311607361,grad_norm: 0.9999992797809978, iteration: 124371
loss: 0.952687680721283,grad_norm: 0.9019492832943545, iteration: 124372
loss: 1.0233595371246338,grad_norm: 0.999998952224494, iteration: 124373
loss: 1.034334659576416,grad_norm: 0.9538364572739672, iteration: 124374
loss: 1.019038200378418,grad_norm: 0.9999990391668997, iteration: 124375
loss: 1.0259259939193726,grad_norm: 0.9999990977521163, iteration: 124376
loss: 0.9957239627838135,grad_norm: 0.9999991559724021, iteration: 124377
loss: 1.0729421377182007,grad_norm: 0.9999990935796865, iteration: 124378
loss: 1.0382909774780273,grad_norm: 0.9385482184226667, iteration: 124379
loss: 0.993836522102356,grad_norm: 0.9999992256271439, iteration: 124380
loss: 1.021594762802124,grad_norm: 0.8661822276054051, iteration: 124381
loss: 1.0199201107025146,grad_norm: 0.9999992157290232, iteration: 124382
loss: 1.0034611225128174,grad_norm: 0.9999991747673587, iteration: 124383
loss: 1.0047343969345093,grad_norm: 0.9999990265990406, iteration: 124384
loss: 0.9763027429580688,grad_norm: 0.9999991093524826, iteration: 124385
loss: 1.0513277053833008,grad_norm: 0.9999991227131313, iteration: 124386
loss: 1.0223076343536377,grad_norm: 0.999999295994131, iteration: 124387
loss: 0.9978212118148804,grad_norm: 0.9999990737855142, iteration: 124388
loss: 1.0070375204086304,grad_norm: 0.9600206285355407, iteration: 124389
loss: 0.9888842701911926,grad_norm: 0.9999991883977016, iteration: 124390
loss: 1.0035319328308105,grad_norm: 0.9999990731911494, iteration: 124391
loss: 0.9600019454956055,grad_norm: 0.9999991467073607, iteration: 124392
loss: 0.9727821350097656,grad_norm: 0.9693738844422742, iteration: 124393
loss: 1.015123724937439,grad_norm: 0.9832040979019798, iteration: 124394
loss: 1.0319079160690308,grad_norm: 0.9999991158761521, iteration: 124395
loss: 1.0062493085861206,grad_norm: 0.999999179813634, iteration: 124396
loss: 1.0272438526153564,grad_norm: 0.9999992566095087, iteration: 124397
loss: 0.999922513961792,grad_norm: 0.988337885813449, iteration: 124398
loss: 1.0266468524932861,grad_norm: 0.946647465448396, iteration: 124399
loss: 0.9975236654281616,grad_norm: 0.9999991730399402, iteration: 124400
loss: 0.970953643321991,grad_norm: 0.9999992988025721, iteration: 124401
loss: 0.9661913514137268,grad_norm: 0.94524807553083, iteration: 124402
loss: 0.9833990931510925,grad_norm: 0.8983472860188081, iteration: 124403
loss: 0.9718647003173828,grad_norm: 0.8800530168284295, iteration: 124404
loss: 1.0198761224746704,grad_norm: 0.9323458212458041, iteration: 124405
loss: 0.9879979491233826,grad_norm: 0.9999990816202491, iteration: 124406
loss: 1.0345059633255005,grad_norm: 0.9999990659516768, iteration: 124407
loss: 1.0069884061813354,grad_norm: 0.9765723328338192, iteration: 124408
loss: 0.9739847183227539,grad_norm: 0.9999992128663262, iteration: 124409
loss: 0.9597867131233215,grad_norm: 0.9999991112304095, iteration: 124410
loss: 1.0418062210083008,grad_norm: 0.9999991741584509, iteration: 124411
loss: 1.003085732460022,grad_norm: 0.9528162187056469, iteration: 124412
loss: 0.9553869366645813,grad_norm: 0.9091884681538599, iteration: 124413
loss: 0.953058123588562,grad_norm: 0.9999991162489172, iteration: 124414
loss: 1.0134166479110718,grad_norm: 0.834594079565076, iteration: 124415
loss: 0.9841322898864746,grad_norm: 0.9999991688767252, iteration: 124416
loss: 0.999420166015625,grad_norm: 0.9793427390706313, iteration: 124417
loss: 0.9859371185302734,grad_norm: 0.8485951026698154, iteration: 124418
loss: 1.0011653900146484,grad_norm: 0.9999989985082849, iteration: 124419
loss: 0.9804214835166931,grad_norm: 0.9999990677285286, iteration: 124420
loss: 0.977270781993866,grad_norm: 0.9999992471096683, iteration: 124421
loss: 1.0047712326049805,grad_norm: 0.9999990611432613, iteration: 124422
loss: 0.9886948466300964,grad_norm: 0.9999992104577247, iteration: 124423
loss: 1.014893651008606,grad_norm: 0.9999992699310126, iteration: 124424
loss: 0.9836273193359375,grad_norm: 0.9999990541650032, iteration: 124425
loss: 0.9814891815185547,grad_norm: 0.9999992807253459, iteration: 124426
loss: 0.9965478777885437,grad_norm: 0.8923535474000718, iteration: 124427
loss: 0.9855086803436279,grad_norm: 0.97078231785406, iteration: 124428
loss: 1.02977454662323,grad_norm: 0.9999990988785232, iteration: 124429
loss: 1.0049082040786743,grad_norm: 0.9999991829134366, iteration: 124430
loss: 1.0090986490249634,grad_norm: 0.9883661226963234, iteration: 124431
loss: 1.0470013618469238,grad_norm: 0.9953437081418531, iteration: 124432
loss: 1.0165214538574219,grad_norm: 0.9796788391798609, iteration: 124433
loss: 0.961237370967865,grad_norm: 0.8687008338835454, iteration: 124434
loss: 0.9990103244781494,grad_norm: 0.8878750149108903, iteration: 124435
loss: 1.0085123777389526,grad_norm: 0.9999992479780032, iteration: 124436
loss: 0.9624359011650085,grad_norm: 0.9999990727095138, iteration: 124437
loss: 1.0474025011062622,grad_norm: 0.9999991437813434, iteration: 124438
loss: 0.9869728684425354,grad_norm: 0.8938841669565912, iteration: 124439
loss: 1.0113955736160278,grad_norm: 0.9999991322282002, iteration: 124440
loss: 0.985914409160614,grad_norm: 0.9999990958796141, iteration: 124441
loss: 0.9857426285743713,grad_norm: 0.9999990455858347, iteration: 124442
loss: 0.9890022873878479,grad_norm: 0.9999991547528267, iteration: 124443
loss: 1.0361346006393433,grad_norm: 0.9999991124542543, iteration: 124444
loss: 0.9957497119903564,grad_norm: 0.999999159926179, iteration: 124445
loss: 0.9930917620658875,grad_norm: 0.9999989310769769, iteration: 124446
loss: 1.004339575767517,grad_norm: 0.9759843461521855, iteration: 124447
loss: 0.9779046773910522,grad_norm: 0.9146223551528985, iteration: 124448
loss: 1.019952416419983,grad_norm: 0.9999993941534927, iteration: 124449
loss: 0.9524592757225037,grad_norm: 0.9999990962688128, iteration: 124450
loss: 1.0015792846679688,grad_norm: 0.9680424646813376, iteration: 124451
loss: 1.0033657550811768,grad_norm: 0.9999992486886824, iteration: 124452
loss: 0.9874778985977173,grad_norm: 0.9906539146223727, iteration: 124453
loss: 0.9860336780548096,grad_norm: 0.9999990372653899, iteration: 124454
loss: 1.0335450172424316,grad_norm: 0.9999992786676348, iteration: 124455
loss: 0.9994374513626099,grad_norm: 0.9999989038801907, iteration: 124456
loss: 1.0214183330535889,grad_norm: 0.9999991361333892, iteration: 124457
loss: 0.9735782146453857,grad_norm: 0.9999991655065055, iteration: 124458
loss: 1.0387438535690308,grad_norm: 0.9999989820934813, iteration: 124459
loss: 0.9802334308624268,grad_norm: 0.9288154719868111, iteration: 124460
loss: 0.9912750720977783,grad_norm: 0.9999989577546643, iteration: 124461
loss: 1.005265712738037,grad_norm: 0.9999990853579842, iteration: 124462
loss: 0.9925724864006042,grad_norm: 0.9999990283066512, iteration: 124463
loss: 1.0193510055541992,grad_norm: 0.9999993082332885, iteration: 124464
loss: 1.0089775323867798,grad_norm: 0.938389859548626, iteration: 124465
loss: 1.0003756284713745,grad_norm: 0.99999919757199, iteration: 124466
loss: 0.9875875115394592,grad_norm: 0.9999991092304861, iteration: 124467
loss: 0.917980968952179,grad_norm: 0.8603154103020546, iteration: 124468
loss: 0.9804612994194031,grad_norm: 0.9999990839437322, iteration: 124469
loss: 0.9933452010154724,grad_norm: 0.966096514304457, iteration: 124470
loss: 1.011647343635559,grad_norm: 0.9999990783095226, iteration: 124471
loss: 0.9653618335723877,grad_norm: 0.9713727623956987, iteration: 124472
loss: 1.0122859477996826,grad_norm: 0.9999991248968599, iteration: 124473
loss: 1.0081651210784912,grad_norm: 0.9999990611486572, iteration: 124474
loss: 1.0059844255447388,grad_norm: 0.9999990632084732, iteration: 124475
loss: 1.0281788110733032,grad_norm: 0.9404552571572196, iteration: 124476
loss: 0.9797496795654297,grad_norm: 0.8982009560964922, iteration: 124477
loss: 0.9874047636985779,grad_norm: 0.9166119643113334, iteration: 124478
loss: 0.9970065951347351,grad_norm: 0.9999992192139441, iteration: 124479
loss: 1.0135031938552856,grad_norm: 0.9999990768497496, iteration: 124480
loss: 1.0167782306671143,grad_norm: 0.9300818923285784, iteration: 124481
loss: 0.9852944612503052,grad_norm: 0.9857843957045486, iteration: 124482
loss: 1.0122177600860596,grad_norm: 0.999999385452675, iteration: 124483
loss: 0.9511198997497559,grad_norm: 0.9999991094449424, iteration: 124484
loss: 0.9767410159111023,grad_norm: 0.9999991475389578, iteration: 124485
loss: 1.0478596687316895,grad_norm: 0.9999992100859386, iteration: 124486
loss: 0.9917367696762085,grad_norm: 0.9999991583861662, iteration: 124487
loss: 0.9932042360305786,grad_norm: 0.8313782490280494, iteration: 124488
loss: 0.9773478507995605,grad_norm: 0.8526893655952428, iteration: 124489
loss: 0.9920096397399902,grad_norm: 0.8942414574651856, iteration: 124490
loss: 1.0043468475341797,grad_norm: 0.9999992794627719, iteration: 124491
loss: 0.9656918048858643,grad_norm: 0.9999990829591155, iteration: 124492
loss: 1.0174446105957031,grad_norm: 0.9999992281835633, iteration: 124493
loss: 0.9796295762062073,grad_norm: 0.9983360849258387, iteration: 124494
loss: 0.9924624562263489,grad_norm: 0.8045804135573456, iteration: 124495
loss: 0.9796139001846313,grad_norm: 0.9999990624790315, iteration: 124496
loss: 0.9940086603164673,grad_norm: 0.9915483170231527, iteration: 124497
loss: 0.9676649570465088,grad_norm: 0.9999990861916166, iteration: 124498
loss: 0.9911467432975769,grad_norm: 0.999999019169152, iteration: 124499
loss: 0.9752023816108704,grad_norm: 0.8810927038788896, iteration: 124500
loss: 0.9889659285545349,grad_norm: 0.9999990255184631, iteration: 124501
loss: 1.0286797285079956,grad_norm: 0.8838559257777346, iteration: 124502
loss: 0.9400724768638611,grad_norm: 0.9999989799131461, iteration: 124503
loss: 1.019544005393982,grad_norm: 0.954235325165362, iteration: 124504
loss: 0.9925239682197571,grad_norm: 0.9200695852225916, iteration: 124505
loss: 0.9856814742088318,grad_norm: 0.8285728590419736, iteration: 124506
loss: 1.0211615562438965,grad_norm: 0.9999991109383171, iteration: 124507
loss: 1.0226038694381714,grad_norm: 0.9727243311413641, iteration: 124508
loss: 0.9315863251686096,grad_norm: 0.9999990153035222, iteration: 124509
loss: 1.0380325317382812,grad_norm: 0.9999989135958812, iteration: 124510
loss: 0.9886093139648438,grad_norm: 0.9913502743871776, iteration: 124511
loss: 1.0010430812835693,grad_norm: 0.9999990480389196, iteration: 124512
loss: 0.9587863087654114,grad_norm: 0.999999168527382, iteration: 124513
loss: 0.9957576990127563,grad_norm: 0.9999991546391304, iteration: 124514
loss: 1.0211883783340454,grad_norm: 0.9999990612846908, iteration: 124515
loss: 0.9788843989372253,grad_norm: 0.9999991418963333, iteration: 124516
loss: 0.9744580388069153,grad_norm: 0.9999990749308266, iteration: 124517
loss: 1.006215214729309,grad_norm: 0.9999992208077522, iteration: 124518
loss: 1.014339804649353,grad_norm: 0.999999411247813, iteration: 124519
loss: 1.0032151937484741,grad_norm: 0.9409052100855841, iteration: 124520
loss: 0.9753363728523254,grad_norm: 0.999999051855945, iteration: 124521
loss: 0.9807915091514587,grad_norm: 0.9296211925599676, iteration: 124522
loss: 1.0739123821258545,grad_norm: 0.9999997582046481, iteration: 124523
loss: 1.0225708484649658,grad_norm: 0.9999991833588583, iteration: 124524
loss: 0.9988596439361572,grad_norm: 0.999999121109201, iteration: 124525
loss: 0.9564680457115173,grad_norm: 0.9475605788445535, iteration: 124526
loss: 1.0134930610656738,grad_norm: 0.9999989991985133, iteration: 124527
loss: 0.9899458289146423,grad_norm: 0.999999221001478, iteration: 124528
loss: 1.0157551765441895,grad_norm: 0.9999992845165313, iteration: 124529
loss: 1.0523890256881714,grad_norm: 0.9999992354833442, iteration: 124530
loss: 1.00108802318573,grad_norm: 0.8888046419958129, iteration: 124531
loss: 1.0278563499450684,grad_norm: 0.9999992601105604, iteration: 124532
loss: 1.0048730373382568,grad_norm: 0.9999991428349974, iteration: 124533
loss: 1.0029246807098389,grad_norm: 0.9999991728957878, iteration: 124534
loss: 1.0085055828094482,grad_norm: 0.9999992487105275, iteration: 124535
loss: 0.9970080256462097,grad_norm: 0.9999991481450284, iteration: 124536
loss: 0.9674686193466187,grad_norm: 0.8959278028114787, iteration: 124537
loss: 0.9968327879905701,grad_norm: 0.9999992888765633, iteration: 124538
loss: 1.0189696550369263,grad_norm: 0.9608765220208981, iteration: 124539
loss: 0.9878333806991577,grad_norm: 0.9999990678914377, iteration: 124540
loss: 1.0427170991897583,grad_norm: 0.9999992784466348, iteration: 124541
loss: 1.0297636985778809,grad_norm: 0.9478378205613821, iteration: 124542
loss: 1.0257539749145508,grad_norm: 0.7674490243169765, iteration: 124543
loss: 1.0014547109603882,grad_norm: 0.9999991623628418, iteration: 124544
loss: 0.9923616647720337,grad_norm: 0.999999035754702, iteration: 124545
loss: 0.9863319396972656,grad_norm: 0.9999989634299209, iteration: 124546
loss: 0.9785714745521545,grad_norm: 0.9355678359784668, iteration: 124547
loss: 0.984104573726654,grad_norm: 0.9999990495615707, iteration: 124548
loss: 0.9963804483413696,grad_norm: 0.9999990857587439, iteration: 124549
loss: 0.970309317111969,grad_norm: 0.9999991773052431, iteration: 124550
loss: 1.0156354904174805,grad_norm: 0.9999993974596609, iteration: 124551
loss: 0.9744960069656372,grad_norm: 0.9544642897621606, iteration: 124552
loss: 0.9863516688346863,grad_norm: 0.9999992760211032, iteration: 124553
loss: 1.0249748229980469,grad_norm: 0.9945656142278465, iteration: 124554
loss: 1.0118138790130615,grad_norm: 0.9999992500388298, iteration: 124555
loss: 0.9879122376441956,grad_norm: 0.9999991763059956, iteration: 124556
loss: 0.9936180114746094,grad_norm: 0.9999992036860313, iteration: 124557
loss: 0.9962274432182312,grad_norm: 0.9999996696762599, iteration: 124558
loss: 0.9907782673835754,grad_norm: 0.9999991668115153, iteration: 124559
loss: 0.9981555938720703,grad_norm: 0.9999991090349738, iteration: 124560
loss: 1.0158510208129883,grad_norm: 0.9999991730512829, iteration: 124561
loss: 0.9894065260887146,grad_norm: 0.9999993368891907, iteration: 124562
loss: 0.9694625735282898,grad_norm: 0.9999990192310106, iteration: 124563
loss: 1.0408012866973877,grad_norm: 0.9999992251236067, iteration: 124564
loss: 1.0028916597366333,grad_norm: 0.9999992132197536, iteration: 124565
loss: 1.000614047050476,grad_norm: 0.9243128745025786, iteration: 124566
loss: 0.9883984923362732,grad_norm: 0.9459835758063826, iteration: 124567
loss: 1.0109586715698242,grad_norm: 0.9999997044179799, iteration: 124568
loss: 1.0281740427017212,grad_norm: 0.9999991448533646, iteration: 124569
loss: 1.0037792921066284,grad_norm: 0.9795517005153538, iteration: 124570
loss: 1.021581768989563,grad_norm: 0.9396740109681699, iteration: 124571
loss: 0.9929710030555725,grad_norm: 0.8994126737091553, iteration: 124572
loss: 1.0227296352386475,grad_norm: 0.9999991395430159, iteration: 124573
loss: 1.0277267694473267,grad_norm: 0.9999990348507831, iteration: 124574
loss: 0.9961910843849182,grad_norm: 0.9999990968889818, iteration: 124575
loss: 1.0230836868286133,grad_norm: 0.8343239287835024, iteration: 124576
loss: 0.9819647669792175,grad_norm: 0.9999993037250979, iteration: 124577
loss: 0.9918884038925171,grad_norm: 0.999999187155087, iteration: 124578
loss: 1.0032216310501099,grad_norm: 0.9999991216764668, iteration: 124579
loss: 0.9593772888183594,grad_norm: 0.8736491569523456, iteration: 124580
loss: 0.9976161122322083,grad_norm: 0.895288111597431, iteration: 124581
loss: 1.026695728302002,grad_norm: 0.9999991705738008, iteration: 124582
loss: 1.0010159015655518,grad_norm: 0.9999991564392932, iteration: 124583
loss: 0.9782032370567322,grad_norm: 0.9999991476854144, iteration: 124584
loss: 1.0049469470977783,grad_norm: 0.9999992882581432, iteration: 124585
loss: 0.9787494540214539,grad_norm: 0.9999994097415831, iteration: 124586
loss: 1.0370872020721436,grad_norm: 0.9999992271157129, iteration: 124587
loss: 0.9913890957832336,grad_norm: 0.9999991850144226, iteration: 124588
loss: 1.0533249378204346,grad_norm: 0.9690964355384587, iteration: 124589
loss: 1.0079864263534546,grad_norm: 0.9999990710765378, iteration: 124590
loss: 1.0217161178588867,grad_norm: 0.9999992384142009, iteration: 124591
loss: 1.0158766508102417,grad_norm: 0.9999993500032874, iteration: 124592
loss: 1.014477014541626,grad_norm: 0.999999625626462, iteration: 124593
loss: 0.9700312614440918,grad_norm: 0.8413194850510727, iteration: 124594
loss: 0.9733155965805054,grad_norm: 0.9999990752368386, iteration: 124595
loss: 0.9776764512062073,grad_norm: 0.8994872753546139, iteration: 124596
loss: 0.9976742267608643,grad_norm: 0.99999652368279, iteration: 124597
loss: 1.0064102411270142,grad_norm: 0.9999990961025783, iteration: 124598
loss: 1.034614086151123,grad_norm: 0.9365205845398666, iteration: 124599
loss: 1.0209681987762451,grad_norm: 0.99999911638256, iteration: 124600
loss: 1.024641752243042,grad_norm: 0.9999990383561279, iteration: 124601
loss: 0.9987860918045044,grad_norm: 0.908253843760778, iteration: 124602
loss: 1.0105624198913574,grad_norm: 0.9999991694433528, iteration: 124603
loss: 1.0164357423782349,grad_norm: 0.9999991602627349, iteration: 124604
loss: 1.0162746906280518,grad_norm: 0.9999990025597212, iteration: 124605
loss: 1.0175375938415527,grad_norm: 0.9999989585042892, iteration: 124606
loss: 1.0138964653015137,grad_norm: 0.9631234260832623, iteration: 124607
loss: 1.0221611261367798,grad_norm: 0.9208510525722566, iteration: 124608
loss: 0.9840522408485413,grad_norm: 0.9999992662539371, iteration: 124609
loss: 1.0451455116271973,grad_norm: 0.9999995111057238, iteration: 124610
loss: 0.9860863089561462,grad_norm: 0.9999990765237078, iteration: 124611
loss: 0.9639405012130737,grad_norm: 0.8919107076133045, iteration: 124612
loss: 1.0468534231185913,grad_norm: 0.9999990495848756, iteration: 124613
loss: 0.9781181812286377,grad_norm: 0.9999990330196795, iteration: 124614
loss: 1.0145213603973389,grad_norm: 0.9267608234122362, iteration: 124615
loss: 1.0116791725158691,grad_norm: 0.8955108430980351, iteration: 124616
loss: 1.008611798286438,grad_norm: 0.9999991487606027, iteration: 124617
loss: 0.9804340600967407,grad_norm: 0.9999990913890995, iteration: 124618
loss: 1.0060349702835083,grad_norm: 0.9999989450530647, iteration: 124619
loss: 1.0301302671432495,grad_norm: 0.9999990223757405, iteration: 124620
loss: 0.9970934987068176,grad_norm: 0.9774042855276849, iteration: 124621
loss: 0.9921757578849792,grad_norm: 0.9922460272354925, iteration: 124622
loss: 1.0341739654541016,grad_norm: 0.9713207745966101, iteration: 124623
loss: 1.004074215888977,grad_norm: 0.999998987173251, iteration: 124624
loss: 1.0168858766555786,grad_norm: 0.9999991465663071, iteration: 124625
loss: 1.0069071054458618,grad_norm: 0.9369695979287491, iteration: 124626
loss: 1.0053656101226807,grad_norm: 0.9999990398387438, iteration: 124627
loss: 1.0004631280899048,grad_norm: 0.999999202643725, iteration: 124628
loss: 0.9622761607170105,grad_norm: 0.9061466455931246, iteration: 124629
loss: 0.991075336933136,grad_norm: 0.9999989655964877, iteration: 124630
loss: 0.987342894077301,grad_norm: 0.868392699643645, iteration: 124631
loss: 1.006145715713501,grad_norm: 0.9923624322835624, iteration: 124632
loss: 0.9617056846618652,grad_norm: 0.9999990652220211, iteration: 124633
loss: 1.0763071775436401,grad_norm: 0.999999122612499, iteration: 124634
loss: 0.9874381422996521,grad_norm: 0.9999990098104744, iteration: 124635
loss: 1.0028839111328125,grad_norm: 0.8586187973020052, iteration: 124636
loss: 1.0025655031204224,grad_norm: 0.9863021194297089, iteration: 124637
loss: 0.9682390689849854,grad_norm: 0.9999991763879116, iteration: 124638
loss: 1.0029445886611938,grad_norm: 0.9999993245477797, iteration: 124639
loss: 0.9903526902198792,grad_norm: 0.9999991746837008, iteration: 124640
loss: 1.0443835258483887,grad_norm: 0.9999991406755079, iteration: 124641
loss: 1.0103126764297485,grad_norm: 0.9311462187289782, iteration: 124642
loss: 1.0007973909378052,grad_norm: 0.9999989880111764, iteration: 124643
loss: 1.0459057092666626,grad_norm: 0.9879454348862063, iteration: 124644
loss: 1.015763521194458,grad_norm: 0.9999992417465886, iteration: 124645
loss: 0.9676114916801453,grad_norm: 0.9999989961523605, iteration: 124646
loss: 1.0247769355773926,grad_norm: 0.9241753029512512, iteration: 124647
loss: 1.0562280416488647,grad_norm: 0.9999997718185666, iteration: 124648
loss: 1.0498740673065186,grad_norm: 0.9999991639845188, iteration: 124649
loss: 1.0108307600021362,grad_norm: 0.9181576972333182, iteration: 124650
loss: 1.008878469467163,grad_norm: 0.9999992334515005, iteration: 124651
loss: 0.9578527212142944,grad_norm: 0.9999991159562387, iteration: 124652
loss: 1.0048339366912842,grad_norm: 0.9999991147864702, iteration: 124653
loss: 1.0136528015136719,grad_norm: 0.9979386191112706, iteration: 124654
loss: 1.0181597471237183,grad_norm: 0.9831674049190068, iteration: 124655
loss: 1.0374870300292969,grad_norm: 0.9999992642178109, iteration: 124656
loss: 0.99104905128479,grad_norm: 0.9999991925757419, iteration: 124657
loss: 1.0353662967681885,grad_norm: 0.9999990605033579, iteration: 124658
loss: 0.9738183617591858,grad_norm: 0.9999989576833809, iteration: 124659
loss: 0.9959062337875366,grad_norm: 0.9516138819962355, iteration: 124660
loss: 0.9952043890953064,grad_norm: 0.987583436456135, iteration: 124661
loss: 1.02735435962677,grad_norm: 0.9999991162500319, iteration: 124662
loss: 0.9724447727203369,grad_norm: 0.9439304358857413, iteration: 124663
loss: 1.0028918981552124,grad_norm: 0.9999991300627671, iteration: 124664
loss: 0.9808152914047241,grad_norm: 0.947645037040179, iteration: 124665
loss: 0.97221440076828,grad_norm: 0.9999990809214602, iteration: 124666
loss: 1.0019150972366333,grad_norm: 0.9468319624101659, iteration: 124667
loss: 0.989960789680481,grad_norm: 0.981493551290052, iteration: 124668
loss: 0.9837116003036499,grad_norm: 0.9417685458185738, iteration: 124669
loss: 1.0121878385543823,grad_norm: 0.9999990964709051, iteration: 124670
loss: 0.9735188484191895,grad_norm: 0.9741909209706017, iteration: 124671
loss: 0.9912159442901611,grad_norm: 0.8743343080603784, iteration: 124672
loss: 0.997758150100708,grad_norm: 0.9999991502252011, iteration: 124673
loss: 0.9747638702392578,grad_norm: 0.9999989627835609, iteration: 124674
loss: 1.0175597667694092,grad_norm: 0.999999021560669, iteration: 124675
loss: 0.9805346727371216,grad_norm: 0.9999990808508541, iteration: 124676
loss: 0.9802795052528381,grad_norm: 0.9373195161779638, iteration: 124677
loss: 0.982513427734375,grad_norm: 0.999999184318328, iteration: 124678
loss: 1.0335829257965088,grad_norm: 0.9629924857810703, iteration: 124679
loss: 1.0011316537857056,grad_norm: 0.999998932715196, iteration: 124680
loss: 0.9675418734550476,grad_norm: 0.9999992237515112, iteration: 124681
loss: 0.9971454739570618,grad_norm: 0.9432168885260797, iteration: 124682
loss: 1.1180399656295776,grad_norm: 0.9999990950277007, iteration: 124683
loss: 0.9911854863166809,grad_norm: 0.999999135541043, iteration: 124684
loss: 0.9825872778892517,grad_norm: 0.8177134882166367, iteration: 124685
loss: 0.9702040553092957,grad_norm: 0.9999991897180782, iteration: 124686
loss: 0.9709805846214294,grad_norm: 0.9999991046735484, iteration: 124687
loss: 0.9826458096504211,grad_norm: 0.9208756669580432, iteration: 124688
loss: 1.034112572669983,grad_norm: 0.9999993721347532, iteration: 124689
loss: 0.9564578533172607,grad_norm: 0.9999992370978765, iteration: 124690
loss: 1.0011214017868042,grad_norm: 0.9999991848542739, iteration: 124691
loss: 1.0246250629425049,grad_norm: 0.9746184773862309, iteration: 124692
loss: 1.0096435546875,grad_norm: 0.9999991837881367, iteration: 124693
loss: 0.977355420589447,grad_norm: 0.9999990718886137, iteration: 124694
loss: 1.0048902034759521,grad_norm: 0.9999991559142325, iteration: 124695
loss: 1.0080419778823853,grad_norm: 0.999998974575764, iteration: 124696
loss: 0.9531741738319397,grad_norm: 0.9999991683127702, iteration: 124697
loss: 1.02370285987854,grad_norm: 0.8932732808024358, iteration: 124698
loss: 0.9982473850250244,grad_norm: 0.9244720667217345, iteration: 124699
loss: 1.0339654684066772,grad_norm: 0.9999990908350205, iteration: 124700
loss: 0.9791641235351562,grad_norm: 0.9999990643385394, iteration: 124701
loss: 0.9719403982162476,grad_norm: 0.999999044307747, iteration: 124702
loss: 0.9963057637214661,grad_norm: 0.9409349977576954, iteration: 124703
loss: 1.0028656721115112,grad_norm: 0.9999990496399415, iteration: 124704
loss: 0.9950615763664246,grad_norm: 0.9529464860660728, iteration: 124705
loss: 1.0125930309295654,grad_norm: 0.8933247679513305, iteration: 124706
loss: 0.9322238564491272,grad_norm: 0.9750605430364381, iteration: 124707
loss: 1.0401555299758911,grad_norm: 0.9999989869453585, iteration: 124708
loss: 1.0102802515029907,grad_norm: 0.9999990973403743, iteration: 124709
loss: 0.995855450630188,grad_norm: 0.889187177249975, iteration: 124710
loss: 0.9644180536270142,grad_norm: 0.9474552577997747, iteration: 124711
loss: 1.0394002199172974,grad_norm: 0.9999995236861, iteration: 124712
loss: 0.9956462979316711,grad_norm: 0.9999990727927562, iteration: 124713
loss: 0.9803867936134338,grad_norm: 0.9227771076146002, iteration: 124714
loss: 1.0351876020431519,grad_norm: 0.8782531361059711, iteration: 124715
loss: 1.028613567352295,grad_norm: 0.8244456445684727, iteration: 124716
loss: 1.0098942518234253,grad_norm: 0.9790433874368357, iteration: 124717
loss: 0.98894202709198,grad_norm: 0.9971047539171072, iteration: 124718
loss: 1.035295009613037,grad_norm: 0.9999991145581123, iteration: 124719
loss: 1.0137096643447876,grad_norm: 0.9999992261131544, iteration: 124720
loss: 1.0106170177459717,grad_norm: 0.9999990914991485, iteration: 124721
loss: 1.009387493133545,grad_norm: 0.9936505532673281, iteration: 124722
loss: 1.0092853307724,grad_norm: 0.8973544581288951, iteration: 124723
loss: 0.9680535197257996,grad_norm: 0.9999989470160394, iteration: 124724
loss: 1.0370008945465088,grad_norm: 0.9999991497821686, iteration: 124725
loss: 1.00864577293396,grad_norm: 0.9999990062168457, iteration: 124726
loss: 0.9801828861236572,grad_norm: 0.9999992107940927, iteration: 124727
loss: 1.0030573606491089,grad_norm: 0.8316563402522007, iteration: 124728
loss: 0.9947125315666199,grad_norm: 0.999999148590008, iteration: 124729
loss: 1.0027120113372803,grad_norm: 0.9999991739064523, iteration: 124730
loss: 1.0020198822021484,grad_norm: 0.9999990384440269, iteration: 124731
loss: 0.998107373714447,grad_norm: 0.9256540203515974, iteration: 124732
loss: 0.9888087511062622,grad_norm: 0.9026958531611078, iteration: 124733
loss: 0.9992177486419678,grad_norm: 0.9999990733840591, iteration: 124734
loss: 0.983707070350647,grad_norm: 0.9999993528409328, iteration: 124735
loss: 0.9681289792060852,grad_norm: 0.9999991759828125, iteration: 124736
loss: 1.0185410976409912,grad_norm: 0.9949469495886959, iteration: 124737
loss: 1.0391470193862915,grad_norm: 0.9528475409200118, iteration: 124738
loss: 0.9704440832138062,grad_norm: 0.9988260705207119, iteration: 124739
loss: 1.0196750164031982,grad_norm: 0.9999989033327543, iteration: 124740
loss: 0.9963718056678772,grad_norm: 0.9890205524246656, iteration: 124741
loss: 0.9943636059761047,grad_norm: 0.99999922800898, iteration: 124742
loss: 0.9713714122772217,grad_norm: 0.9999991397641265, iteration: 124743
loss: 0.988893985748291,grad_norm: 0.9999991356203649, iteration: 124744
loss: 1.0024021863937378,grad_norm: 0.9332560936294383, iteration: 124745
loss: 0.9728960990905762,grad_norm: 0.9079197061338865, iteration: 124746
loss: 1.0348607301712036,grad_norm: 0.9999991167534745, iteration: 124747
loss: 0.9967653155326843,grad_norm: 0.9171778919302501, iteration: 124748
loss: 1.013343334197998,grad_norm: 0.9906554926132775, iteration: 124749
loss: 0.9820970296859741,grad_norm: 0.8868387576706369, iteration: 124750
loss: 1.0463553667068481,grad_norm: 0.9999995071030733, iteration: 124751
loss: 0.9974387884140015,grad_norm: 0.9999992384967699, iteration: 124752
loss: 1.0139827728271484,grad_norm: 0.9999993627355763, iteration: 124753
loss: 0.987834632396698,grad_norm: 0.9999991036886343, iteration: 124754
loss: 0.9758048057556152,grad_norm: 0.999999112904922, iteration: 124755
loss: 1.0094082355499268,grad_norm: 0.9445250547212635, iteration: 124756
loss: 1.0224486589431763,grad_norm: 0.999999043855004, iteration: 124757
loss: 0.9816147089004517,grad_norm: 0.9999991464707049, iteration: 124758
loss: 1.0119959115982056,grad_norm: 0.9999992484929198, iteration: 124759
loss: 1.0059213638305664,grad_norm: 0.9999990412684122, iteration: 124760
loss: 0.9888960719108582,grad_norm: 0.9999994139995256, iteration: 124761
loss: 1.027283787727356,grad_norm: 0.9772525766246811, iteration: 124762
loss: 1.0181987285614014,grad_norm: 0.9999992361738986, iteration: 124763
loss: 0.9738937616348267,grad_norm: 0.9999992245681661, iteration: 124764
loss: 0.9764760732650757,grad_norm: 0.9999989853546186, iteration: 124765
loss: 1.0084409713745117,grad_norm: 0.9125948884562042, iteration: 124766
loss: 0.9952392578125,grad_norm: 0.9545488441965884, iteration: 124767
loss: 1.0740646123886108,grad_norm: 0.9999992911025849, iteration: 124768
loss: 0.977046012878418,grad_norm: 0.9061466383534075, iteration: 124769
loss: 0.9874390959739685,grad_norm: 0.9999991977915836, iteration: 124770
loss: 0.9706752300262451,grad_norm: 0.9971083182453774, iteration: 124771
loss: 0.9838197231292725,grad_norm: 0.9999991688968486, iteration: 124772
loss: 0.9950064420700073,grad_norm: 0.9999992610859318, iteration: 124773
loss: 0.9875164031982422,grad_norm: 0.9999990880310856, iteration: 124774
loss: 1.0089610815048218,grad_norm: 0.9999997984829201, iteration: 124775
loss: 1.0217472314834595,grad_norm: 0.9999990898770806, iteration: 124776
loss: 0.9823163151741028,grad_norm: 0.9705292559965716, iteration: 124777
loss: 1.0419981479644775,grad_norm: 0.999999088584591, iteration: 124778
loss: 0.9744031429290771,grad_norm: 0.9999992115033199, iteration: 124779
loss: 0.9638077616691589,grad_norm: 0.999999261751552, iteration: 124780
loss: 1.0197430849075317,grad_norm: 0.9999996454284968, iteration: 124781
loss: 0.9787919521331787,grad_norm: 0.945037595476067, iteration: 124782
loss: 0.9784200191497803,grad_norm: 0.9999991441835298, iteration: 124783
loss: 1.0062267780303955,grad_norm: 0.958836660391782, iteration: 124784
loss: 1.0318012237548828,grad_norm: 0.9999990774636074, iteration: 124785
loss: 1.0302250385284424,grad_norm: 0.999999482802134, iteration: 124786
loss: 0.996684730052948,grad_norm: 0.999999123410571, iteration: 124787
loss: 0.996242105960846,grad_norm: 0.9772037032247416, iteration: 124788
loss: 0.9870583415031433,grad_norm: 0.9461452433307997, iteration: 124789
loss: 1.0123443603515625,grad_norm: 0.9999994723269666, iteration: 124790
loss: 0.9973487257957458,grad_norm: 0.9999992025497249, iteration: 124791
loss: 0.9957823157310486,grad_norm: 0.9060796987865536, iteration: 124792
loss: 0.9675869345664978,grad_norm: 0.9043093284447381, iteration: 124793
loss: 0.9719788432121277,grad_norm: 0.9999992142989808, iteration: 124794
loss: 0.9897958636283875,grad_norm: 0.9501141835841311, iteration: 124795
loss: 0.9907202124595642,grad_norm: 0.9278528248251046, iteration: 124796
loss: 1.0147759914398193,grad_norm: 0.9070736358487412, iteration: 124797
loss: 0.9969538450241089,grad_norm: 0.9999991777415672, iteration: 124798
loss: 1.0055232048034668,grad_norm: 0.9999992388810788, iteration: 124799
loss: 1.0069607496261597,grad_norm: 0.9999990864698248, iteration: 124800
loss: 1.0084582567214966,grad_norm: 0.9999991540832476, iteration: 124801
loss: 1.0221096277236938,grad_norm: 0.8658246037822389, iteration: 124802
loss: 0.9944072365760803,grad_norm: 0.9999992431654265, iteration: 124803
loss: 0.9897194504737854,grad_norm: 0.8493258412550173, iteration: 124804
loss: 0.9680430889129639,grad_norm: 0.9999991247440626, iteration: 124805
loss: 0.981429398059845,grad_norm: 0.9418898546496375, iteration: 124806
loss: 1.0217467546463013,grad_norm: 0.9497616683808148, iteration: 124807
loss: 1.04214346408844,grad_norm: 0.9999991112146864, iteration: 124808
loss: 1.0338950157165527,grad_norm: 0.9999991415061024, iteration: 124809
loss: 0.9758662581443787,grad_norm: 0.9999992986582138, iteration: 124810
loss: 1.0178804397583008,grad_norm: 0.9999991342042702, iteration: 124811
loss: 1.025924563407898,grad_norm: 0.9999991922787463, iteration: 124812
loss: 1.0607972145080566,grad_norm: 0.9999990768057188, iteration: 124813
loss: 1.0913951396942139,grad_norm: 0.9999997230580906, iteration: 124814
loss: 0.9701564311981201,grad_norm: 0.9669053881200177, iteration: 124815
loss: 1.005557656288147,grad_norm: 0.922340243827089, iteration: 124816
loss: 1.0007917881011963,grad_norm: 0.9999990328892361, iteration: 124817
loss: 0.9931990504264832,grad_norm: 0.9999996175649831, iteration: 124818
loss: 1.0372284650802612,grad_norm: 0.9999991308260886, iteration: 124819
loss: 1.0040544271469116,grad_norm: 0.9999991625926066, iteration: 124820
loss: 0.9955764412879944,grad_norm: 0.9953543732663909, iteration: 124821
loss: 1.002211093902588,grad_norm: 0.8754829526444009, iteration: 124822
loss: 0.997671902179718,grad_norm: 0.9999991723279587, iteration: 124823
loss: 1.0249159336090088,grad_norm: 0.9999990126002266, iteration: 124824
loss: 0.9812761545181274,grad_norm: 0.9710037291430698, iteration: 124825
loss: 0.9537619352340698,grad_norm: 0.926148051258424, iteration: 124826
loss: 1.0272343158721924,grad_norm: 0.9787519246178721, iteration: 124827
loss: 0.990267276763916,grad_norm: 0.9641233112334406, iteration: 124828
loss: 0.9936017394065857,grad_norm: 0.9999992024787261, iteration: 124829
loss: 1.0115596055984497,grad_norm: 0.9836544407743819, iteration: 124830
loss: 1.001381278038025,grad_norm: 0.8575905570097829, iteration: 124831
loss: 0.9946987628936768,grad_norm: 0.9877713888313551, iteration: 124832
loss: 0.967781126499176,grad_norm: 0.9701725146853122, iteration: 124833
loss: 1.0187574625015259,grad_norm: 0.8961343117111606, iteration: 124834
loss: 1.0179080963134766,grad_norm: 0.9999992069839494, iteration: 124835
loss: 0.9814034104347229,grad_norm: 0.9999991541631902, iteration: 124836
loss: 0.982926607131958,grad_norm: 0.93299611112954, iteration: 124837
loss: 1.0001304149627686,grad_norm: 0.9999991498157638, iteration: 124838
loss: 0.9795873165130615,grad_norm: 0.9999992509445301, iteration: 124839
loss: 0.9953165054321289,grad_norm: 0.9285607816372319, iteration: 124840
loss: 0.9898689985275269,grad_norm: 0.9999991959318733, iteration: 124841
loss: 0.9890945553779602,grad_norm: 0.9999990710991914, iteration: 124842
loss: 1.0018640756607056,grad_norm: 0.9187848868190781, iteration: 124843
loss: 1.0241998434066772,grad_norm: 0.999999179699734, iteration: 124844
loss: 0.97465980052948,grad_norm: 0.9680223653950494, iteration: 124845
loss: 0.9901227355003357,grad_norm: 0.9509163604461112, iteration: 124846
loss: 1.0116081237792969,grad_norm: 0.9999990622596672, iteration: 124847
loss: 1.011192798614502,grad_norm: 0.8886062049552625, iteration: 124848
loss: 0.9890853762626648,grad_norm: 0.8350160954879594, iteration: 124849
loss: 0.9906172752380371,grad_norm: 0.9999990304646929, iteration: 124850
loss: 0.986799955368042,grad_norm: 0.9200997749265495, iteration: 124851
loss: 0.9959973096847534,grad_norm: 0.9999990087304317, iteration: 124852
loss: 1.004830002784729,grad_norm: 0.7899219837277429, iteration: 124853
loss: 0.9762386679649353,grad_norm: 0.8875569932330084, iteration: 124854
loss: 1.0006544589996338,grad_norm: 0.9999990983063884, iteration: 124855
loss: 1.00241219997406,grad_norm: 0.9850326781003501, iteration: 124856
loss: 0.9653385281562805,grad_norm: 0.999999072324862, iteration: 124857
loss: 1.0062251091003418,grad_norm: 0.999999161545891, iteration: 124858
loss: 1.000930905342102,grad_norm: 0.9999992375231315, iteration: 124859
loss: 1.0259294509887695,grad_norm: 0.8544040964684303, iteration: 124860
loss: 0.9609348177909851,grad_norm: 0.9728268792827997, iteration: 124861
loss: 1.0544129610061646,grad_norm: 0.9987978836542741, iteration: 124862
loss: 0.983132004737854,grad_norm: 0.9676106271692363, iteration: 124863
loss: 1.0426342487335205,grad_norm: 0.9856131424230627, iteration: 124864
loss: 0.9851484894752502,grad_norm: 0.9999990412816766, iteration: 124865
loss: 1.011587142944336,grad_norm: 0.8808121615828249, iteration: 124866
loss: 0.9998771548271179,grad_norm: 0.9999993516489566, iteration: 124867
loss: 0.9947057366371155,grad_norm: 0.9999992362873691, iteration: 124868
loss: 1.006550669670105,grad_norm: 0.9999989763843122, iteration: 124869
loss: 0.9839188456535339,grad_norm: 0.9999990875744019, iteration: 124870
loss: 1.015488624572754,grad_norm: 0.9999992056767609, iteration: 124871
loss: 0.9978772401809692,grad_norm: 0.9999992741608881, iteration: 124872
loss: 1.0162043571472168,grad_norm: 0.9999992517861024, iteration: 124873
loss: 0.9902487993240356,grad_norm: 0.9999990020097519, iteration: 124874
loss: 0.9960014820098877,grad_norm: 0.999998937656682, iteration: 124875
loss: 0.9646342396736145,grad_norm: 0.8943447522143285, iteration: 124876
loss: 0.9734373092651367,grad_norm: 0.9985771785486681, iteration: 124877
loss: 1.2245293855667114,grad_norm: 0.9999994429053835, iteration: 124878
loss: 0.9975553750991821,grad_norm: 0.8764064318199358, iteration: 124879
loss: 0.9835674166679382,grad_norm: 0.9999991752767726, iteration: 124880
loss: 0.9846730828285217,grad_norm: 0.9999992719627298, iteration: 124881
loss: 0.986716628074646,grad_norm: 0.9332125707396965, iteration: 124882
loss: 0.9718033075332642,grad_norm: 0.9999991134329946, iteration: 124883
loss: 1.0355467796325684,grad_norm: 0.9999995812841147, iteration: 124884
loss: 1.0183449983596802,grad_norm: 0.9999992230069059, iteration: 124885
loss: 1.0046637058258057,grad_norm: 0.9999992988484636, iteration: 124886
loss: 1.0035117864608765,grad_norm: 0.9999991213499569, iteration: 124887
loss: 1.0107662677764893,grad_norm: 0.9999992020879592, iteration: 124888
loss: 1.0613685846328735,grad_norm: 0.999999396699576, iteration: 124889
loss: 1.0002192258834839,grad_norm: 0.8274868373493335, iteration: 124890
loss: 1.0451982021331787,grad_norm: 0.8629278042968928, iteration: 124891
loss: 0.9941885471343994,grad_norm: 0.9999989904627836, iteration: 124892
loss: 0.993781328201294,grad_norm: 0.9999989707380516, iteration: 124893
loss: 0.9684153199195862,grad_norm: 0.9999992102219382, iteration: 124894
loss: 0.980569064617157,grad_norm: 0.9999998661783215, iteration: 124895
loss: 1.014727234840393,grad_norm: 0.9102733720352649, iteration: 124896
loss: 1.0626685619354248,grad_norm: 0.9999993537766765, iteration: 124897
loss: 1.0081866979599,grad_norm: 0.9999990905294749, iteration: 124898
loss: 0.991682231426239,grad_norm: 0.999999385854549, iteration: 124899
loss: 0.9875308871269226,grad_norm: 0.9129082351678769, iteration: 124900
loss: 1.0126084089279175,grad_norm: 0.9999990198148667, iteration: 124901
loss: 0.9905195236206055,grad_norm: 0.9999991356022807, iteration: 124902
loss: 0.989439070224762,grad_norm: 0.9259495979036074, iteration: 124903
loss: 0.976559579372406,grad_norm: 0.9824123419083399, iteration: 124904
loss: 0.995259165763855,grad_norm: 0.9999992247363606, iteration: 124905
loss: 1.0047539472579956,grad_norm: 0.9999990139290919, iteration: 124906
loss: 0.9501408338546753,grad_norm: 0.999999120513244, iteration: 124907
loss: 1.0096189975738525,grad_norm: 0.9999992009460805, iteration: 124908
loss: 0.9767501950263977,grad_norm: 0.9999990737434253, iteration: 124909
loss: 1.0169775485992432,grad_norm: 0.9507490547301729, iteration: 124910
loss: 1.0226973295211792,grad_norm: 0.9999991654773989, iteration: 124911
loss: 1.012995958328247,grad_norm: 0.8615387911527531, iteration: 124912
loss: 1.0057737827301025,grad_norm: 0.9999992255768073, iteration: 124913
loss: 0.9744930863380432,grad_norm: 0.9999991696583286, iteration: 124914
loss: 1.0139456987380981,grad_norm: 0.8733630253777752, iteration: 124915
loss: 1.086434006690979,grad_norm: 0.9999998352742614, iteration: 124916
loss: 0.973696768283844,grad_norm: 0.999999228210466, iteration: 124917
loss: 1.0166747570037842,grad_norm: 0.9999991508767517, iteration: 124918
loss: 0.9711428284645081,grad_norm: 0.9999991242264753, iteration: 124919
loss: 0.9631355404853821,grad_norm: 0.9999998020819251, iteration: 124920
loss: 1.0043456554412842,grad_norm: 0.9999992311661939, iteration: 124921
loss: 0.994667112827301,grad_norm: 0.99999925847444, iteration: 124922
loss: 1.0009924173355103,grad_norm: 0.9027892261292867, iteration: 124923
loss: 1.0897555351257324,grad_norm: 0.999999080745879, iteration: 124924
loss: 0.9883838891983032,grad_norm: 0.9999989641829117, iteration: 124925
loss: 0.9916782975196838,grad_norm: 0.9999991395338242, iteration: 124926
loss: 0.9869851469993591,grad_norm: 0.8904626614144181, iteration: 124927
loss: 1.0301862955093384,grad_norm: 0.999999235577269, iteration: 124928
loss: 0.9855234026908875,grad_norm: 0.9999992497259671, iteration: 124929
loss: 0.9974892139434814,grad_norm: 0.9999991103254495, iteration: 124930
loss: 1.007813572883606,grad_norm: 0.9375355810244115, iteration: 124931
loss: 0.9786474704742432,grad_norm: 0.9999989867075006, iteration: 124932
loss: 0.9825018644332886,grad_norm: 0.9999991810211218, iteration: 124933
loss: 1.0027811527252197,grad_norm: 0.9455311466399625, iteration: 124934
loss: 0.9388893246650696,grad_norm: 0.9999991525767393, iteration: 124935
loss: 1.0289427042007446,grad_norm: 0.9999992162771852, iteration: 124936
loss: 0.9963468909263611,grad_norm: 0.9999990369938361, iteration: 124937
loss: 1.057313323020935,grad_norm: 0.9999990916970151, iteration: 124938
loss: 0.9957188367843628,grad_norm: 0.9330066885955619, iteration: 124939
loss: 1.0007448196411133,grad_norm: 0.999999133894722, iteration: 124940
loss: 0.9738661646842957,grad_norm: 0.9999991906561017, iteration: 124941
loss: 0.9874426126480103,grad_norm: 0.9520341781218419, iteration: 124942
loss: 1.0064036846160889,grad_norm: 0.8952671456055648, iteration: 124943
loss: 0.9996099472045898,grad_norm: 0.9999992030017834, iteration: 124944
loss: 0.9859992861747742,grad_norm: 0.8765699657294188, iteration: 124945
loss: 1.0238703489303589,grad_norm: 0.9999994278176596, iteration: 124946
loss: 1.0412607192993164,grad_norm: 0.9999992658585298, iteration: 124947
loss: 0.9986271858215332,grad_norm: 0.9956911891216901, iteration: 124948
loss: 0.9710306525230408,grad_norm: 0.9999990940434413, iteration: 124949
loss: 0.960534930229187,grad_norm: 0.9999991645614529, iteration: 124950
loss: 1.0005003213882446,grad_norm: 0.9847014483469252, iteration: 124951
loss: 0.9964112639427185,grad_norm: 0.9999992720100883, iteration: 124952
loss: 1.0187840461730957,grad_norm: 0.9999991681429004, iteration: 124953
loss: 0.9745869636535645,grad_norm: 0.9852241765668482, iteration: 124954
loss: 0.9892128705978394,grad_norm: 0.9777907113935609, iteration: 124955
loss: 0.9892522692680359,grad_norm: 0.9999991229022116, iteration: 124956
loss: 1.0273369550704956,grad_norm: 0.9999992681169829, iteration: 124957
loss: 0.9892962574958801,grad_norm: 0.9999991133814365, iteration: 124958
loss: 1.0253769159317017,grad_norm: 0.9876693235291653, iteration: 124959
loss: 1.0065747499465942,grad_norm: 0.9877202972479068, iteration: 124960
loss: 0.9907759428024292,grad_norm: 0.9999997503323954, iteration: 124961
loss: 0.9698391556739807,grad_norm: 0.999999246111204, iteration: 124962
loss: 0.969526469707489,grad_norm: 0.9999990275194512, iteration: 124963
loss: 0.9938063025474548,grad_norm: 0.9999992492707671, iteration: 124964
loss: 1.018815040588379,grad_norm: 0.999999352856452, iteration: 124965
loss: 1.0140471458435059,grad_norm: 0.9999997363309074, iteration: 124966
loss: 1.0766528844833374,grad_norm: 0.9999998003629057, iteration: 124967
loss: 0.9832534790039062,grad_norm: 0.9971562738758982, iteration: 124968
loss: 1.0215694904327393,grad_norm: 0.9999991810120319, iteration: 124969
loss: 0.9738276600837708,grad_norm: 0.999999127322932, iteration: 124970
loss: 1.0186727046966553,grad_norm: 0.9999992772854092, iteration: 124971
loss: 1.0427151918411255,grad_norm: 0.918254660795337, iteration: 124972
loss: 0.972491443157196,grad_norm: 0.9999989728797207, iteration: 124973
loss: 0.960701584815979,grad_norm: 0.9999991188670495, iteration: 124974
loss: 1.018771767616272,grad_norm: 0.9999991012362152, iteration: 124975
loss: 0.9912900328636169,grad_norm: 0.9999991898875326, iteration: 124976
loss: 0.9675469398498535,grad_norm: 0.8312070240043266, iteration: 124977
loss: 1.015718698501587,grad_norm: 0.9999992217582678, iteration: 124978
loss: 1.0244781970977783,grad_norm: 0.9999994558296849, iteration: 124979
loss: 0.9741675853729248,grad_norm: 0.9999991395147002, iteration: 124980
loss: 1.0861718654632568,grad_norm: 0.9999993879812891, iteration: 124981
loss: 1.0394920110702515,grad_norm: 0.9999995488407069, iteration: 124982
loss: 1.0390208959579468,grad_norm: 1.0000000096318002, iteration: 124983
loss: 0.9792900085449219,grad_norm: 0.9518262904480144, iteration: 124984
loss: 1.0130468606948853,grad_norm: 0.9999991868536124, iteration: 124985
loss: 0.9703613519668579,grad_norm: 0.9999989812496329, iteration: 124986
loss: 0.9806177020072937,grad_norm: 0.9999989053530421, iteration: 124987
loss: 0.9966939091682434,grad_norm: 0.9999990549803509, iteration: 124988
loss: 1.0163558721542358,grad_norm: 0.9999992222833647, iteration: 124989
loss: 1.0121158361434937,grad_norm: 0.9999990823389814, iteration: 124990
loss: 0.9973071813583374,grad_norm: 0.9999991403655701, iteration: 124991
loss: 0.9844682216644287,grad_norm: 0.9999990086343505, iteration: 124992
loss: 1.0029942989349365,grad_norm: 0.9999989974713204, iteration: 124993
loss: 1.0019279718399048,grad_norm: 0.999999023730625, iteration: 124994
loss: 0.9953280687332153,grad_norm: 0.9999991265651589, iteration: 124995
loss: 1.0182946920394897,grad_norm: 0.9999990935855073, iteration: 124996
loss: 1.0198723077774048,grad_norm: 0.9999992895924895, iteration: 124997
loss: 0.961783766746521,grad_norm: 0.8735321634663031, iteration: 124998
loss: 1.0596047639846802,grad_norm: 0.9999996395395057, iteration: 124999
loss: 1.0039863586425781,grad_norm: 0.9999994517764069, iteration: 125000
loss: 0.9886954426765442,grad_norm: 0.8156372065318209, iteration: 125001
loss: 1.0104671716690063,grad_norm: 0.9999991238029436, iteration: 125002
loss: 0.9996665716171265,grad_norm: 0.9999993147922505, iteration: 125003
loss: 0.9666094779968262,grad_norm: 0.9886353869194857, iteration: 125004
loss: 0.9859700202941895,grad_norm: 0.9999991657689767, iteration: 125005
loss: 1.0035791397094727,grad_norm: 0.9999990878559797, iteration: 125006
loss: 1.0185073614120483,grad_norm: 0.9999989144019914, iteration: 125007
loss: 1.0342285633087158,grad_norm: 0.9636111557353492, iteration: 125008
loss: 0.9912347197532654,grad_norm: 0.9845610119504619, iteration: 125009
loss: 0.9953076839447021,grad_norm: 0.9790347518790474, iteration: 125010
loss: 1.052839994430542,grad_norm: 1.0000000086407528, iteration: 125011
loss: 1.013893961906433,grad_norm: 0.9999991560463513, iteration: 125012
loss: 0.9976162910461426,grad_norm: 0.9999991599414415, iteration: 125013
loss: 1.0104296207427979,grad_norm: 0.9999991877934326, iteration: 125014
loss: 0.9947205781936646,grad_norm: 0.999999124428409, iteration: 125015
loss: 1.1201157569885254,grad_norm: 0.9664000284275913, iteration: 125016
loss: 1.0116143226623535,grad_norm: 0.9814391688628682, iteration: 125017
loss: 0.9851657152175903,grad_norm: 0.9999990084137773, iteration: 125018
loss: 1.006617546081543,grad_norm: 0.9454836909378787, iteration: 125019
loss: 0.982231855392456,grad_norm: 0.9999991919490642, iteration: 125020
loss: 1.0250194072723389,grad_norm: 0.9999991824320381, iteration: 125021
loss: 0.9797514081001282,grad_norm: 0.9736424973012645, iteration: 125022
loss: 1.0340280532836914,grad_norm: 0.9999996688382087, iteration: 125023
loss: 0.9843679070472717,grad_norm: 0.9261608360028747, iteration: 125024
loss: 1.0294651985168457,grad_norm: 0.9999991659661699, iteration: 125025
loss: 0.9989591836929321,grad_norm: 0.999999132818555, iteration: 125026
loss: 1.0176217555999756,grad_norm: 0.9246938396080132, iteration: 125027
loss: 0.9920109510421753,grad_norm: 0.9999994394436368, iteration: 125028
loss: 1.0274606943130493,grad_norm: 0.866546709185068, iteration: 125029
loss: 0.9896344542503357,grad_norm: 0.9999993430553619, iteration: 125030
loss: 0.9764752388000488,grad_norm: 0.999999392977553, iteration: 125031
loss: 0.9970547556877136,grad_norm: 0.9999990523140387, iteration: 125032
loss: 1.0249030590057373,grad_norm: 0.99999905044611, iteration: 125033
loss: 0.9927441477775574,grad_norm: 0.9999992414130383, iteration: 125034
loss: 1.0046378374099731,grad_norm: 0.9999991970657625, iteration: 125035
loss: 0.9683460593223572,grad_norm: 0.999999062630826, iteration: 125036
loss: 0.9840950965881348,grad_norm: 0.9999990068121715, iteration: 125037
loss: 0.987395703792572,grad_norm: 0.9999997908138732, iteration: 125038
loss: 0.9863083958625793,grad_norm: 0.9999992318186541, iteration: 125039
loss: 0.9881874322891235,grad_norm: 0.9999990097976693, iteration: 125040
loss: 0.9929240942001343,grad_norm: 0.9236466434875179, iteration: 125041
loss: 0.9778859615325928,grad_norm: 0.8456246037075853, iteration: 125042
loss: 1.148443579673767,grad_norm: 0.999999929033032, iteration: 125043
loss: 1.0358850955963135,grad_norm: 0.99999922439083, iteration: 125044
loss: 1.0083774328231812,grad_norm: 0.9999989476337955, iteration: 125045
loss: 1.0234256982803345,grad_norm: 0.999999251543636, iteration: 125046
loss: 1.0375049114227295,grad_norm: 0.9088013241013125, iteration: 125047
loss: 1.0034880638122559,grad_norm: 0.9999989203804801, iteration: 125048
loss: 1.0203742980957031,grad_norm: 0.9999992638273268, iteration: 125049
loss: 1.0000654458999634,grad_norm: 0.8018221485042745, iteration: 125050
loss: 1.0207628011703491,grad_norm: 0.9999993495517034, iteration: 125051
loss: 1.0464221239089966,grad_norm: 0.8728965255571688, iteration: 125052
loss: 0.9890926480293274,grad_norm: 0.9999992071329797, iteration: 125053
loss: 1.1085636615753174,grad_norm: 0.99999937252448, iteration: 125054
loss: 0.9909317493438721,grad_norm: 0.9568877831915562, iteration: 125055
loss: 1.012218952178955,grad_norm: 0.9763035768633198, iteration: 125056
loss: 1.024507761001587,grad_norm: 0.971004091493336, iteration: 125057
loss: 1.0028589963912964,grad_norm: 0.999999147362023, iteration: 125058
loss: 0.9808456897735596,grad_norm: 0.8377516089529207, iteration: 125059
loss: 1.0322526693344116,grad_norm: 0.9999993187414252, iteration: 125060
loss: 1.0013471841812134,grad_norm: 0.9999992707426689, iteration: 125061
loss: 0.9889106750488281,grad_norm: 0.9209993496014396, iteration: 125062
loss: 1.1054518222808838,grad_norm: 0.9999992495280393, iteration: 125063
loss: 1.0079126358032227,grad_norm: 0.999999547202986, iteration: 125064
loss: 1.0268961191177368,grad_norm: 0.9999992454191194, iteration: 125065
loss: 1.0755788087844849,grad_norm: 0.9999992893969848, iteration: 125066
loss: 0.9979255795478821,grad_norm: 0.9924730659014012, iteration: 125067
loss: 1.0249297618865967,grad_norm: 0.9227930839379299, iteration: 125068
loss: 0.9982861876487732,grad_norm: 0.8533981405312532, iteration: 125069
loss: 0.9908965826034546,grad_norm: 0.9869311831160051, iteration: 125070
loss: 1.0073848962783813,grad_norm: 0.9999991515957466, iteration: 125071
loss: 1.0400607585906982,grad_norm: 0.9999991238544622, iteration: 125072
loss: 0.991049587726593,grad_norm: 0.9527727222844924, iteration: 125073
loss: 1.053497076034546,grad_norm: 0.9999990832339202, iteration: 125074
loss: 1.0169540643692017,grad_norm: 0.8910920602090802, iteration: 125075
loss: 1.0417287349700928,grad_norm: 0.9999990230851943, iteration: 125076
loss: 1.0063756704330444,grad_norm: 0.9999991268063176, iteration: 125077
loss: 1.1264845132827759,grad_norm: 0.9999993616079077, iteration: 125078
loss: 0.9806905388832092,grad_norm: 0.942413177248716, iteration: 125079
loss: 0.9990917444229126,grad_norm: 0.9999991775625207, iteration: 125080
loss: 0.9936281442642212,grad_norm: 0.9999990377623122, iteration: 125081
loss: 1.018957257270813,grad_norm: 0.9999991824330565, iteration: 125082
loss: 1.0048772096633911,grad_norm: 0.9807025583671982, iteration: 125083
loss: 0.9906678795814514,grad_norm: 0.9999992312827948, iteration: 125084
loss: 1.0215436220169067,grad_norm: 0.9823575960485703, iteration: 125085
loss: 0.9814090728759766,grad_norm: 0.9749507132688483, iteration: 125086
loss: 1.0003353357315063,grad_norm: 0.8940425181660193, iteration: 125087
loss: 0.9940837025642395,grad_norm: 0.9603721847462223, iteration: 125088
loss: 1.018032431602478,grad_norm: 0.9999992218786097, iteration: 125089
loss: 1.1479933261871338,grad_norm: 0.9999997506814391, iteration: 125090
loss: 1.018034815788269,grad_norm: 0.9999991369825585, iteration: 125091
loss: 1.0048763751983643,grad_norm: 0.9999996304582004, iteration: 125092
loss: 0.9561309218406677,grad_norm: 0.9999991465222275, iteration: 125093
loss: 0.9748533368110657,grad_norm: 0.9999990827585425, iteration: 125094
loss: 1.005279302597046,grad_norm: 0.9999990478540451, iteration: 125095
loss: 0.9927185773849487,grad_norm: 0.9999992502523196, iteration: 125096
loss: 0.9655042886734009,grad_norm: 0.9999992017644925, iteration: 125097
loss: 1.0046896934509277,grad_norm: 0.9999991127041785, iteration: 125098
loss: 0.9986189603805542,grad_norm: 0.9999991386896969, iteration: 125099
loss: 0.999226450920105,grad_norm: 0.999999630811857, iteration: 125100
loss: 1.031472086906433,grad_norm: 0.9999990089866662, iteration: 125101
loss: 1.003037929534912,grad_norm: 0.9999991883319607, iteration: 125102
loss: 0.996690034866333,grad_norm: 0.999999183279494, iteration: 125103
loss: 0.9944164752960205,grad_norm: 0.9999989792255664, iteration: 125104
loss: 0.9809996485710144,grad_norm: 0.9186033364461252, iteration: 125105
loss: 1.0263174772262573,grad_norm: 0.9999993253070748, iteration: 125106
loss: 1.0137218236923218,grad_norm: 0.9999990402822803, iteration: 125107
loss: 0.9816612005233765,grad_norm: 0.9999991930550665, iteration: 125108
loss: 1.0064215660095215,grad_norm: 0.9999991516572021, iteration: 125109
loss: 1.0332614183425903,grad_norm: 0.9999991960946881, iteration: 125110
loss: 0.9911973476409912,grad_norm: 0.9117279904766457, iteration: 125111
loss: 1.0003530979156494,grad_norm: 0.8741747477764514, iteration: 125112
loss: 1.0138194561004639,grad_norm: 0.9214143566695365, iteration: 125113
loss: 0.990873396396637,grad_norm: 0.9999990284648569, iteration: 125114
loss: 0.9920825958251953,grad_norm: 0.8605628751119874, iteration: 125115
loss: 0.9871512055397034,grad_norm: 0.9999993295692234, iteration: 125116
loss: 1.011765718460083,grad_norm: 0.9999992188813236, iteration: 125117
loss: 1.0018088817596436,grad_norm: 0.9667292270219626, iteration: 125118
loss: 1.0065034627914429,grad_norm: 0.9999990619323779, iteration: 125119
loss: 1.00703763961792,grad_norm: 0.9999995959665859, iteration: 125120
loss: 0.9997812509536743,grad_norm: 0.9999989817518276, iteration: 125121
loss: 1.0054905414581299,grad_norm: 0.9803970062143991, iteration: 125122
loss: 0.9901314973831177,grad_norm: 0.9999990576411524, iteration: 125123
loss: 0.9676618576049805,grad_norm: 0.9999989954906215, iteration: 125124
loss: 0.982350766658783,grad_norm: 0.9999991839074017, iteration: 125125
loss: 1.1519458293914795,grad_norm: 0.9999997544976739, iteration: 125126
loss: 0.986933708190918,grad_norm: 0.9999989182563012, iteration: 125127
loss: 1.038553237915039,grad_norm: 0.9999993164184121, iteration: 125128
loss: 0.9791075587272644,grad_norm: 0.9059980546649667, iteration: 125129
loss: 0.9779446125030518,grad_norm: 0.9999992022499911, iteration: 125130
loss: 0.9887184500694275,grad_norm: 0.8251378066033257, iteration: 125131
loss: 0.9974589943885803,grad_norm: 0.9999991817237639, iteration: 125132
loss: 1.0309969186782837,grad_norm: 0.999999406065967, iteration: 125133
loss: 0.977555513381958,grad_norm: 0.9554075196892338, iteration: 125134
loss: 0.9802588224411011,grad_norm: 0.9999990689975686, iteration: 125135
loss: 0.9768540263175964,grad_norm: 0.9999990039227303, iteration: 125136
loss: 1.060619592666626,grad_norm: 0.9999992846802904, iteration: 125137
loss: 0.978617787361145,grad_norm: 0.9999990359113682, iteration: 125138
loss: 1.0083225965499878,grad_norm: 0.9710443602935891, iteration: 125139
loss: 1.001680612564087,grad_norm: 0.9999992046231976, iteration: 125140
loss: 0.9710387587547302,grad_norm: 0.9999989450581402, iteration: 125141
loss: 1.0175268650054932,grad_norm: 0.9144399134475567, iteration: 125142
loss: 1.0055840015411377,grad_norm: 0.999999121591484, iteration: 125143
loss: 0.9814883470535278,grad_norm: 0.8778896746364024, iteration: 125144
loss: 1.0282518863677979,grad_norm: 0.9999991713385227, iteration: 125145
loss: 0.9896106123924255,grad_norm: 0.9999993351118122, iteration: 125146
loss: 0.9879846572875977,grad_norm: 0.9550578790295423, iteration: 125147
loss: 1.0236060619354248,grad_norm: 0.9999992408934198, iteration: 125148
loss: 1.0034061670303345,grad_norm: 0.8836720721310727, iteration: 125149
loss: 1.0203588008880615,grad_norm: 0.9695440476532482, iteration: 125150
loss: 0.9734851717948914,grad_norm: 0.999999202005409, iteration: 125151
loss: 0.9953563809394836,grad_norm: 0.9999990581010609, iteration: 125152
loss: 1.025492787361145,grad_norm: 0.999999584257104, iteration: 125153
loss: 1.042696237564087,grad_norm: 0.9999996599988992, iteration: 125154
loss: 0.9978532791137695,grad_norm: 0.9999996743684008, iteration: 125155
loss: 0.9947516918182373,grad_norm: 0.9126270403417268, iteration: 125156
loss: 1.0112160444259644,grad_norm: 0.9999992199150798, iteration: 125157
loss: 1.0233421325683594,grad_norm: 0.9999991185955593, iteration: 125158
loss: 0.9655022025108337,grad_norm: 0.9430024214868218, iteration: 125159
loss: 0.9987456202507019,grad_norm: 0.8116511157101896, iteration: 125160
loss: 0.9869584441184998,grad_norm: 0.9410035766963899, iteration: 125161
loss: 1.0327355861663818,grad_norm: 0.9999990869243754, iteration: 125162
loss: 1.0359724760055542,grad_norm: 0.9999991174284819, iteration: 125163
loss: 1.1249884366989136,grad_norm: 0.9999993096409925, iteration: 125164
loss: 0.9870067238807678,grad_norm: 0.99999903245466, iteration: 125165
loss: 0.9961994886398315,grad_norm: 0.9999990892142098, iteration: 125166
loss: 1.0026867389678955,grad_norm: 0.9999990584268819, iteration: 125167
loss: 0.9958560466766357,grad_norm: 0.9999992229619956, iteration: 125168
loss: 1.020957589149475,grad_norm: 0.9999998036537556, iteration: 125169
loss: 0.9787775874137878,grad_norm: 0.9375524280877886, iteration: 125170
loss: 1.0004005432128906,grad_norm: 0.9999992518485633, iteration: 125171
loss: 1.0033555030822754,grad_norm: 0.9999991060714677, iteration: 125172
loss: 0.9926997423171997,grad_norm: 0.9862540034239854, iteration: 125173
loss: 1.012924075126648,grad_norm: 0.9999990478542302, iteration: 125174
loss: 0.9744288325309753,grad_norm: 0.999999132512196, iteration: 125175
loss: 1.0312812328338623,grad_norm: 0.9999996610067784, iteration: 125176
loss: 1.0424964427947998,grad_norm: 0.9999992459549403, iteration: 125177
loss: 1.029228925704956,grad_norm: 0.9999992813597791, iteration: 125178
loss: 1.0297586917877197,grad_norm: 0.9999991363507371, iteration: 125179
loss: 0.9856379628181458,grad_norm: 0.9999989977969413, iteration: 125180
loss: 1.0112394094467163,grad_norm: 0.9917110897350135, iteration: 125181
loss: 1.011780858039856,grad_norm: 0.9087254752454321, iteration: 125182
loss: 0.9914964437484741,grad_norm: 0.9999990100684808, iteration: 125183
loss: 1.0010052919387817,grad_norm: 0.9445427227752046, iteration: 125184
loss: 0.9843562245368958,grad_norm: 0.8806448184172311, iteration: 125185
loss: 0.9808663725852966,grad_norm: 0.9999990536187716, iteration: 125186
loss: 1.0016108751296997,grad_norm: 0.9526925319689037, iteration: 125187
loss: 0.9938633441925049,grad_norm: 0.9999991829339512, iteration: 125188
loss: 0.9844285845756531,grad_norm: 0.9999990676413677, iteration: 125189
loss: 1.0112824440002441,grad_norm: 0.9999991478453325, iteration: 125190
loss: 1.0148789882659912,grad_norm: 0.9999992171220999, iteration: 125191
loss: 1.0096970796585083,grad_norm: 0.9350735591158748, iteration: 125192
loss: 1.0029659271240234,grad_norm: 0.8983786950754657, iteration: 125193
loss: 1.0104162693023682,grad_norm: 0.9999990466476539, iteration: 125194
loss: 0.9696346521377563,grad_norm: 0.9999990008955849, iteration: 125195
loss: 0.9993346929550171,grad_norm: 0.930447576429117, iteration: 125196
loss: 1.0875591039657593,grad_norm: 0.9999995672864074, iteration: 125197
loss: 0.9879642724990845,grad_norm: 0.9999991375455235, iteration: 125198
loss: 0.9582007527351379,grad_norm: 0.9999991792607609, iteration: 125199
loss: 1.0826517343521118,grad_norm: 0.9999993741445443, iteration: 125200
loss: 0.950286865234375,grad_norm: 0.8455960103824313, iteration: 125201
loss: 1.0540010929107666,grad_norm: 0.9999991372113953, iteration: 125202
loss: 1.0279566049575806,grad_norm: 0.9734837508174904, iteration: 125203
loss: 1.0605347156524658,grad_norm: 0.9999994836562552, iteration: 125204
loss: 1.0095607042312622,grad_norm: 0.9792099882259734, iteration: 125205
loss: 0.9937928915023804,grad_norm: 0.8980121100314326, iteration: 125206
loss: 0.9967910051345825,grad_norm: 0.9999991906548515, iteration: 125207
loss: 1.001462697982788,grad_norm: 0.8666066551974017, iteration: 125208
loss: 1.0036746263504028,grad_norm: 0.9999991964292545, iteration: 125209
loss: 1.0041296482086182,grad_norm: 0.9999992215161987, iteration: 125210
loss: 1.0125905275344849,grad_norm: 0.9572016846424463, iteration: 125211
loss: 1.009924292564392,grad_norm: 0.9499977521766853, iteration: 125212
loss: 1.040952444076538,grad_norm: 0.9998551384031453, iteration: 125213
loss: 0.974807620048523,grad_norm: 0.9999991757202201, iteration: 125214
loss: 1.0026401281356812,grad_norm: 0.9999991401849893, iteration: 125215
loss: 0.9994096159934998,grad_norm: 0.8782281729418591, iteration: 125216
loss: 0.9396271705627441,grad_norm: 0.8948240892314487, iteration: 125217
loss: 1.0105394124984741,grad_norm: 0.9999991552095533, iteration: 125218
loss: 0.9861823320388794,grad_norm: 0.9914517842568417, iteration: 125219
loss: 1.0085948705673218,grad_norm: 0.8791244264968229, iteration: 125220
loss: 1.0088953971862793,grad_norm: 0.9999989036804653, iteration: 125221
loss: 0.9689383506774902,grad_norm: 0.8640629378687539, iteration: 125222
loss: 0.9676880836486816,grad_norm: 0.9999989764203688, iteration: 125223
loss: 0.9773002862930298,grad_norm: 0.934822278110843, iteration: 125224
loss: 0.983680248260498,grad_norm: 0.9999992235770261, iteration: 125225
loss: 1.0062363147735596,grad_norm: 0.9999990880302714, iteration: 125226
loss: 0.9936746954917908,grad_norm: 0.9788775973435727, iteration: 125227
loss: 1.0145440101623535,grad_norm: 0.9999989187061198, iteration: 125228
loss: 1.0326186418533325,grad_norm: 0.8528963331034011, iteration: 125229
loss: 0.9968359470367432,grad_norm: 0.9999992117165228, iteration: 125230
loss: 1.0079246759414673,grad_norm: 0.9999990349045621, iteration: 125231
loss: 1.0081260204315186,grad_norm: 0.9999990606086411, iteration: 125232
loss: 0.977225661277771,grad_norm: 0.9999990819442005, iteration: 125233
loss: 1.0105628967285156,grad_norm: 0.9999990616833694, iteration: 125234
loss: 0.967673659324646,grad_norm: 0.9657636494742107, iteration: 125235
loss: 0.9798552393913269,grad_norm: 0.9999990954670791, iteration: 125236
loss: 1.0004916191101074,grad_norm: 0.943486802562681, iteration: 125237
loss: 0.9811701774597168,grad_norm: 0.9570534362141162, iteration: 125238
loss: 0.9343464374542236,grad_norm: 0.9930318322925525, iteration: 125239
loss: 1.0186294317245483,grad_norm: 0.99999918295719, iteration: 125240
loss: 0.9864206314086914,grad_norm: 0.9999991409826048, iteration: 125241
loss: 1.0038957595825195,grad_norm: 0.9999991255047412, iteration: 125242
loss: 0.9892318248748779,grad_norm: 0.99999924539545, iteration: 125243
loss: 1.0018582344055176,grad_norm: 0.9914551374739824, iteration: 125244
loss: 1.039514422416687,grad_norm: 0.9999989864033201, iteration: 125245
loss: 1.0253779888153076,grad_norm: 0.9999992263429541, iteration: 125246
loss: 0.9766294360160828,grad_norm: 0.9957221613985209, iteration: 125247
loss: 0.9811148643493652,grad_norm: 0.999999146879182, iteration: 125248
loss: 1.0053426027297974,grad_norm: 0.9999992398979807, iteration: 125249
loss: 0.9924863576889038,grad_norm: 0.999999585454728, iteration: 125250
loss: 1.0341987609863281,grad_norm: 0.9999992734770337, iteration: 125251
loss: 1.0085961818695068,grad_norm: 0.8655264100278559, iteration: 125252
loss: 0.9996210336685181,grad_norm: 0.9937267907183447, iteration: 125253
loss: 0.9673939943313599,grad_norm: 0.8978531034374435, iteration: 125254
loss: 0.9828477501869202,grad_norm: 0.9999989938968321, iteration: 125255
loss: 0.9721605777740479,grad_norm: 0.9474253615237367, iteration: 125256
loss: 0.9917124509811401,grad_norm: 0.9999991885945565, iteration: 125257
loss: 0.9997876882553101,grad_norm: 0.9999991055485586, iteration: 125258
loss: 1.019653081893921,grad_norm: 0.9999991833473357, iteration: 125259
loss: 0.9744134545326233,grad_norm: 0.9824854051813725, iteration: 125260
loss: 0.9662233591079712,grad_norm: 0.9999990396635735, iteration: 125261
loss: 1.0283125638961792,grad_norm: 0.9999990496667859, iteration: 125262
loss: 1.0062450170516968,grad_norm: 0.9932612951092189, iteration: 125263
loss: 1.0062617063522339,grad_norm: 0.9343965394352819, iteration: 125264
loss: 1.0013628005981445,grad_norm: 0.9434975538170876, iteration: 125265
loss: 0.9830763339996338,grad_norm: 0.9999993645240643, iteration: 125266
loss: 0.97445148229599,grad_norm: 0.9999990281467511, iteration: 125267
loss: 1.0411951541900635,grad_norm: 0.9892125325186734, iteration: 125268
loss: 1.0458344221115112,grad_norm: 0.9412038730905654, iteration: 125269
loss: 0.986718475818634,grad_norm: 0.9761556951698852, iteration: 125270
loss: 1.0077173709869385,grad_norm: 0.9390251123695387, iteration: 125271
loss: 0.9993918538093567,grad_norm: 0.9388551139960034, iteration: 125272
loss: 0.9797987937927246,grad_norm: 0.9999993052435098, iteration: 125273
loss: 1.0177605152130127,grad_norm: 0.9999999194969792, iteration: 125274
loss: 0.9858476519584656,grad_norm: 0.9999991558555972, iteration: 125275
loss: 0.9970166087150574,grad_norm: 0.99999920000301, iteration: 125276
loss: 0.9940124750137329,grad_norm: 0.999999229073984, iteration: 125277
loss: 1.0143921375274658,grad_norm: 0.9999990971633091, iteration: 125278
loss: 1.0018960237503052,grad_norm: 0.9754690801180478, iteration: 125279
loss: 0.9649844765663147,grad_norm: 0.9495064660389884, iteration: 125280
loss: 0.9897127747535706,grad_norm: 0.8665904498081597, iteration: 125281
loss: 0.9999717473983765,grad_norm: 0.9579914793362638, iteration: 125282
loss: 0.9939717054367065,grad_norm: 0.9999990722121894, iteration: 125283
loss: 1.0223389863967896,grad_norm: 0.9999992062731193, iteration: 125284
loss: 0.9851031303405762,grad_norm: 0.9832883519074112, iteration: 125285
loss: 1.0261273384094238,grad_norm: 0.9999991383227244, iteration: 125286
loss: 1.0025266408920288,grad_norm: 0.9999991325523111, iteration: 125287
loss: 1.0070778131484985,grad_norm: 0.9999990732276943, iteration: 125288
loss: 0.9645373225212097,grad_norm: 0.9999990601449908, iteration: 125289
loss: 1.0245981216430664,grad_norm: 0.9999989764065237, iteration: 125290
loss: 1.0058244466781616,grad_norm: 0.9999997481388843, iteration: 125291
loss: 0.9884068965911865,grad_norm: 0.9999993000798967, iteration: 125292
loss: 1.0203627347946167,grad_norm: 0.9999992011084472, iteration: 125293
loss: 0.9929069876670837,grad_norm: 0.9999992347823334, iteration: 125294
loss: 1.017369270324707,grad_norm: 0.946474414755214, iteration: 125295
loss: 0.9991260766983032,grad_norm: 0.9999993011925933, iteration: 125296
loss: 0.9946384429931641,grad_norm: 0.9393894417034023, iteration: 125297
loss: 1.0018208026885986,grad_norm: 0.999999190968371, iteration: 125298
loss: 1.0161018371582031,grad_norm: 0.9999991044729116, iteration: 125299
loss: 0.9723080992698669,grad_norm: 0.9938495012184676, iteration: 125300
loss: 1.0353057384490967,grad_norm: 0.9999990959397845, iteration: 125301
loss: 0.9937783479690552,grad_norm: 0.9999992653683637, iteration: 125302
loss: 1.0007659196853638,grad_norm: 0.8024404437235542, iteration: 125303
loss: 0.9910846948623657,grad_norm: 0.9402913106550593, iteration: 125304
loss: 0.9815115928649902,grad_norm: 0.8932371089469814, iteration: 125305
loss: 0.9829323887825012,grad_norm: 0.9999990025364672, iteration: 125306
loss: 0.9594348669052124,grad_norm: 0.9999990863262568, iteration: 125307
loss: 1.007219672203064,grad_norm: 0.9560954162752681, iteration: 125308
loss: 0.996501624584198,grad_norm: 0.9999991123084097, iteration: 125309
loss: 1.0127630233764648,grad_norm: 0.9202062599744236, iteration: 125310
loss: 0.9957222938537598,grad_norm: 0.7315839214094295, iteration: 125311
loss: 0.999846339225769,grad_norm: 0.9999990308917596, iteration: 125312
loss: 1.0193637609481812,grad_norm: 0.9505566548631802, iteration: 125313
loss: 0.9803702235221863,grad_norm: 0.9999992254518657, iteration: 125314
loss: 0.998615562915802,grad_norm: 0.9160871212812925, iteration: 125315
loss: 0.9767378568649292,grad_norm: 0.999999328644713, iteration: 125316
loss: 0.9749271869659424,grad_norm: 0.9742438443512758, iteration: 125317
loss: 0.9710410237312317,grad_norm: 0.954225966252142, iteration: 125318
loss: 1.0074762105941772,grad_norm: 0.8786017128076569, iteration: 125319
loss: 0.9737786054611206,grad_norm: 0.9999991526666807, iteration: 125320
loss: 1.009955883026123,grad_norm: 0.9999997322303235, iteration: 125321
loss: 0.9521609544754028,grad_norm: 0.9661378120848482, iteration: 125322
loss: 0.9772467017173767,grad_norm: 0.999999609226156, iteration: 125323
loss: 1.0323988199234009,grad_norm: 0.9814731821762828, iteration: 125324
loss: 1.020999550819397,grad_norm: 0.9999992056065878, iteration: 125325
loss: 1.0171207189559937,grad_norm: 0.947177152150574, iteration: 125326
loss: 1.0266003608703613,grad_norm: 0.9055380612999804, iteration: 125327
loss: 0.9967003464698792,grad_norm: 0.9999989875851639, iteration: 125328
loss: 0.9943034052848816,grad_norm: 0.9999990914043966, iteration: 125329
loss: 0.9851028919219971,grad_norm: 0.999999191788881, iteration: 125330
loss: 1.0035347938537598,grad_norm: 0.9999989584047199, iteration: 125331
loss: 1.0536110401153564,grad_norm: 0.9999990884281288, iteration: 125332
loss: 1.0284526348114014,grad_norm: 0.9999995064657139, iteration: 125333
loss: 1.0288219451904297,grad_norm: 0.9999994238737255, iteration: 125334
loss: 1.0199271440505981,grad_norm: 0.9999990914593896, iteration: 125335
loss: 1.0204362869262695,grad_norm: 0.9999992568779148, iteration: 125336
loss: 0.9951722621917725,grad_norm: 0.9999992397266907, iteration: 125337
loss: 1.0460615158081055,grad_norm: 0.9999995312914726, iteration: 125338
loss: 1.0052297115325928,grad_norm: 0.9758684633892676, iteration: 125339
loss: 1.0038135051727295,grad_norm: 0.9999994505026789, iteration: 125340
loss: 0.967732846736908,grad_norm: 0.9999991479910815, iteration: 125341
loss: 1.0241550207138062,grad_norm: 0.9999989994530233, iteration: 125342
loss: 1.000728726387024,grad_norm: 0.9999991552243613, iteration: 125343
loss: 0.9895161986351013,grad_norm: 0.9880000677792197, iteration: 125344
loss: 1.0159521102905273,grad_norm: 0.8956581568386704, iteration: 125345
loss: 0.9912988543510437,grad_norm: 0.8734437350823903, iteration: 125346
loss: 0.986293613910675,grad_norm: 0.9999991204909301, iteration: 125347
loss: 0.9793028831481934,grad_norm: 0.928639461991427, iteration: 125348
loss: 1.009957194328308,grad_norm: 0.9999995621361522, iteration: 125349
loss: 1.014919638633728,grad_norm: 0.9999991471734377, iteration: 125350
loss: 0.9927789568901062,grad_norm: 0.9999991735507485, iteration: 125351
loss: 0.9963604807853699,grad_norm: 0.9999992166594064, iteration: 125352
loss: 1.0067733526229858,grad_norm: 0.999999120112007, iteration: 125353
loss: 1.02786123752594,grad_norm: 0.9999992454729774, iteration: 125354
loss: 0.9631230235099792,grad_norm: 0.8892109712792664, iteration: 125355
loss: 1.0598751306533813,grad_norm: 0.9999996512465711, iteration: 125356
loss: 0.9879288673400879,grad_norm: 0.9999990667135386, iteration: 125357
loss: 0.9923568964004517,grad_norm: 0.9999991076275534, iteration: 125358
loss: 1.0638161897659302,grad_norm: 0.9999995942719602, iteration: 125359
loss: 0.9999915957450867,grad_norm: 0.9407885684883994, iteration: 125360
loss: 0.9899965524673462,grad_norm: 0.9999992617282061, iteration: 125361
loss: 1.0211982727050781,grad_norm: 0.999999217357562, iteration: 125362
loss: 1.003143072128296,grad_norm: 0.9637759932298778, iteration: 125363
loss: 0.9709354639053345,grad_norm: 0.9538824519127098, iteration: 125364
loss: 1.007863998413086,grad_norm: 0.9999992660223602, iteration: 125365
loss: 1.073566198348999,grad_norm: 0.9999993373183264, iteration: 125366
loss: 0.9626166224479675,grad_norm: 0.957146627661957, iteration: 125367
loss: 1.0182154178619385,grad_norm: 0.9412801038157431, iteration: 125368
loss: 1.0335017442703247,grad_norm: 0.9999992474527775, iteration: 125369
loss: 1.0260688066482544,grad_norm: 0.9999992648466737, iteration: 125370
loss: 0.9574617743492126,grad_norm: 0.999999316177047, iteration: 125371
loss: 1.0154671669006348,grad_norm: 0.9999990118014697, iteration: 125372
loss: 0.9927231669425964,grad_norm: 0.9117745208552139, iteration: 125373
loss: 1.0409964323043823,grad_norm: 0.9999991807385938, iteration: 125374
loss: 1.0207767486572266,grad_norm: 0.9999992048310636, iteration: 125375
loss: 1.0107213258743286,grad_norm: 0.9999990792700544, iteration: 125376
loss: 1.043744444847107,grad_norm: 0.9999992071540913, iteration: 125377
loss: 0.9586725831031799,grad_norm: 0.9999992726956446, iteration: 125378
loss: 0.9543358087539673,grad_norm: 0.9999989620671763, iteration: 125379
loss: 1.033575177192688,grad_norm: 0.9999990303418059, iteration: 125380
loss: 1.0121597051620483,grad_norm: 0.9999997451429254, iteration: 125381
loss: 0.9783870577812195,grad_norm: 0.9999990962082567, iteration: 125382
loss: 1.0253103971481323,grad_norm: 0.9999991310735481, iteration: 125383
loss: 1.0044928789138794,grad_norm: 0.915660522069572, iteration: 125384
loss: 1.0011719465255737,grad_norm: 0.9999991701867585, iteration: 125385
loss: 1.0291426181793213,grad_norm: 0.8428930951116741, iteration: 125386
loss: 0.9864362478256226,grad_norm: 0.9512721522322617, iteration: 125387
loss: 1.0081285238265991,grad_norm: 0.9603124237043664, iteration: 125388
loss: 1.0391993522644043,grad_norm: 0.9999989777180891, iteration: 125389
loss: 1.0086207389831543,grad_norm: 0.9999991312816454, iteration: 125390
loss: 1.006210207939148,grad_norm: 0.9999997172629952, iteration: 125391
loss: 0.9768847823143005,grad_norm: 0.9670376811766938, iteration: 125392
loss: 0.9938931465148926,grad_norm: 0.9999995100208021, iteration: 125393
loss: 0.9905509948730469,grad_norm: 0.9999989866168071, iteration: 125394
loss: 1.0534580945968628,grad_norm: 0.9999990073464978, iteration: 125395
loss: 1.0138667821884155,grad_norm: 0.9749876934707723, iteration: 125396
loss: 1.0170996189117432,grad_norm: 0.9703031530849306, iteration: 125397
loss: 0.9865913987159729,grad_norm: 0.9999991359862312, iteration: 125398
loss: 0.9825023412704468,grad_norm: 0.999999202029869, iteration: 125399
loss: 0.9959617257118225,grad_norm: 0.8092007618633542, iteration: 125400
loss: 0.9790249466896057,grad_norm: 0.9999990491231923, iteration: 125401
loss: 0.9965732097625732,grad_norm: 0.9999992579163401, iteration: 125402
loss: 1.0033231973648071,grad_norm: 0.9999990920538153, iteration: 125403
loss: 1.00472092628479,grad_norm: 0.999999021830829, iteration: 125404
loss: 0.9765657186508179,grad_norm: 0.9999990458858862, iteration: 125405
loss: 0.9800587296485901,grad_norm: 0.9999990769177021, iteration: 125406
loss: 1.0192393064498901,grad_norm: 0.936665229258468, iteration: 125407
loss: 1.0635643005371094,grad_norm: 0.9503694536098812, iteration: 125408
loss: 1.0187673568725586,grad_norm: 0.9999993092053162, iteration: 125409
loss: 1.0435703992843628,grad_norm: 0.9999991811977864, iteration: 125410
loss: 1.0138202905654907,grad_norm: 0.9746385960435476, iteration: 125411
loss: 0.9851002097129822,grad_norm: 0.9178438592720571, iteration: 125412
loss: 1.0336180925369263,grad_norm: 0.9635720106420044, iteration: 125413
loss: 0.9696189165115356,grad_norm: 0.9131548127221828, iteration: 125414
loss: 1.0241667032241821,grad_norm: 0.9999990596238706, iteration: 125415
loss: 1.0379955768585205,grad_norm: 0.99811510533562, iteration: 125416
loss: 0.9782965779304504,grad_norm: 0.9999991464175009, iteration: 125417
loss: 0.9876394271850586,grad_norm: 0.9994328827697474, iteration: 125418
loss: 1.0346020460128784,grad_norm: 0.9772178727369961, iteration: 125419
loss: 1.017012119293213,grad_norm: 0.9999990632370129, iteration: 125420
loss: 0.9668024778366089,grad_norm: 0.9565608168955949, iteration: 125421
loss: 0.9889234304428101,grad_norm: 0.9590605805873593, iteration: 125422
loss: 1.021594524383545,grad_norm: 0.9999993942075647, iteration: 125423
loss: 1.024369239807129,grad_norm: 0.99999897475955, iteration: 125424
loss: 1.0608367919921875,grad_norm: 0.9999993193143877, iteration: 125425
loss: 0.9623036980628967,grad_norm: 0.9999992057113553, iteration: 125426
loss: 0.9951797723770142,grad_norm: 0.9352252123445326, iteration: 125427
loss: 0.9830284118652344,grad_norm: 0.9317360418117497, iteration: 125428
loss: 0.9739132523536682,grad_norm: 0.9999991825990041, iteration: 125429
loss: 0.9606208205223083,grad_norm: 0.9999991957495941, iteration: 125430
loss: 1.0168166160583496,grad_norm: 0.999999195622132, iteration: 125431
loss: 0.9589267373085022,grad_norm: 0.9999990840308335, iteration: 125432
loss: 1.0347625017166138,grad_norm: 0.9999991882910031, iteration: 125433
loss: 1.0430445671081543,grad_norm: 0.921097431947496, iteration: 125434
loss: 0.9900760650634766,grad_norm: 0.9999990567408956, iteration: 125435
loss: 1.0649083852767944,grad_norm: 0.9999990973063393, iteration: 125436
loss: 1.0146182775497437,grad_norm: 0.9733323885780817, iteration: 125437
loss: 0.99369215965271,grad_norm: 0.9999994638074703, iteration: 125438
loss: 1.057633876800537,grad_norm: 0.9999994316022444, iteration: 125439
loss: 0.9895439743995667,grad_norm: 0.9999990533369184, iteration: 125440
loss: 0.9407405853271484,grad_norm: 0.9999990127820405, iteration: 125441
loss: 0.990553617477417,grad_norm: 0.999999306026638, iteration: 125442
loss: 1.009352207183838,grad_norm: 0.9999991574449795, iteration: 125443
loss: 1.0198094844818115,grad_norm: 0.9999991970200401, iteration: 125444
loss: 0.9830565452575684,grad_norm: 0.9871523683160304, iteration: 125445
loss: 1.0117995738983154,grad_norm: 0.9999989722980607, iteration: 125446
loss: 1.0232495069503784,grad_norm: 0.9999992603361129, iteration: 125447
loss: 1.0047749280929565,grad_norm: 0.8422583659075561, iteration: 125448
loss: 1.0318288803100586,grad_norm: 0.9999992044427812, iteration: 125449
loss: 0.9609314203262329,grad_norm: 0.9999990595415036, iteration: 125450
loss: 0.9940162301063538,grad_norm: 0.9999991310672464, iteration: 125451
loss: 0.9807977080345154,grad_norm: 0.9892254348269542, iteration: 125452
loss: 1.008008360862732,grad_norm: 0.999999107949075, iteration: 125453
loss: 0.9779802560806274,grad_norm: 0.9611072373720344, iteration: 125454
loss: 1.0109076499938965,grad_norm: 0.9613067425676111, iteration: 125455
loss: 0.9675682187080383,grad_norm: 0.9999991001968569, iteration: 125456
loss: 0.9958089590072632,grad_norm: 0.9623341035137032, iteration: 125457
loss: 0.9858976006507874,grad_norm: 0.9943974917070904, iteration: 125458
loss: 0.9918532371520996,grad_norm: 0.7603880848169255, iteration: 125459
loss: 1.0547022819519043,grad_norm: 0.9999990967907559, iteration: 125460
loss: 0.9864091873168945,grad_norm: 0.9999990492563546, iteration: 125461
loss: 1.0239874124526978,grad_norm: 0.9699309112552243, iteration: 125462
loss: 1.027209758758545,grad_norm: 0.9999992770437203, iteration: 125463
loss: 1.0150851011276245,grad_norm: 0.9999991508973254, iteration: 125464
loss: 0.9811229705810547,grad_norm: 0.9999990753187893, iteration: 125465
loss: 1.0545804500579834,grad_norm: 0.9999996870192186, iteration: 125466
loss: 1.002964735031128,grad_norm: 0.9999990577071888, iteration: 125467
loss: 1.015566349029541,grad_norm: 0.9999994933420885, iteration: 125468
loss: 0.9829086065292358,grad_norm: 0.9999991318900227, iteration: 125469
loss: 0.9747974276542664,grad_norm: 0.9999991428346647, iteration: 125470
loss: 0.9860396385192871,grad_norm: 0.9631613576305679, iteration: 125471
loss: 1.0998278856277466,grad_norm: 0.9999995313322991, iteration: 125472
loss: 1.0447263717651367,grad_norm: 0.9999996148618248, iteration: 125473
loss: 1.0367563962936401,grad_norm: 0.9999993218462057, iteration: 125474
loss: 1.0187840461730957,grad_norm: 0.9999991775736495, iteration: 125475
loss: 1.0368605852127075,grad_norm: 0.9999990845811002, iteration: 125476
loss: 1.0296154022216797,grad_norm: 0.9999990026578066, iteration: 125477
loss: 1.0031570196151733,grad_norm: 0.9684937906901704, iteration: 125478
loss: 1.029115915298462,grad_norm: 0.9520800393193433, iteration: 125479
loss: 0.9736743569374084,grad_norm: 0.999999225876834, iteration: 125480
loss: 1.0022242069244385,grad_norm: 0.9774777219430475, iteration: 125481
loss: 1.0404634475708008,grad_norm: 0.9557393122989712, iteration: 125482
loss: 0.9897382855415344,grad_norm: 0.9999993913560287, iteration: 125483
loss: 1.0113842487335205,grad_norm: 0.9700010516811035, iteration: 125484
loss: 0.9781782627105713,grad_norm: 0.9999991598817908, iteration: 125485
loss: 0.947213351726532,grad_norm: 0.9999992627631914, iteration: 125486
loss: 1.005994439125061,grad_norm: 0.9740769586395054, iteration: 125487
loss: 1.0137094259262085,grad_norm: 0.9999991756530044, iteration: 125488
loss: 0.9623172879219055,grad_norm: 0.9999991624675959, iteration: 125489
loss: 1.0253773927688599,grad_norm: 0.9999993641418182, iteration: 125490
loss: 0.9938027858734131,grad_norm: 0.9150103063499114, iteration: 125491
loss: 1.027777910232544,grad_norm: 0.8965536174025054, iteration: 125492
loss: 1.0569018125534058,grad_norm: 0.9999993100774036, iteration: 125493
loss: 0.994375467300415,grad_norm: 0.9561764302677442, iteration: 125494
loss: 1.0208772420883179,grad_norm: 0.9999993800794488, iteration: 125495
loss: 0.9918690919876099,grad_norm: 0.9999992762922844, iteration: 125496
loss: 1.0357576608657837,grad_norm: 0.9999989227920797, iteration: 125497
loss: 0.9884006381034851,grad_norm: 0.8980733409617664, iteration: 125498
loss: 1.020318627357483,grad_norm: 0.8754664648842589, iteration: 125499
loss: 1.031507968902588,grad_norm: 0.9999993150676465, iteration: 125500
loss: 1.0231990814208984,grad_norm: 0.9999991950185247, iteration: 125501
loss: 1.0110688209533691,grad_norm: 0.8660753883542014, iteration: 125502
loss: 0.9987004399299622,grad_norm: 0.9441817512133355, iteration: 125503
loss: 0.9813599586486816,grad_norm: 0.9712541201084708, iteration: 125504
loss: 1.0267575979232788,grad_norm: 0.9999998709451802, iteration: 125505
loss: 0.9978020191192627,grad_norm: 0.8657761307322741, iteration: 125506
loss: 0.9846187829971313,grad_norm: 0.8655836270694742, iteration: 125507
loss: 1.0267903804779053,grad_norm: 0.9571489813266566, iteration: 125508
loss: 1.0300216674804688,grad_norm: 0.999999124857458, iteration: 125509
loss: 1.0253446102142334,grad_norm: 0.9999998111325619, iteration: 125510
loss: 1.022635579109192,grad_norm: 0.9086841330180885, iteration: 125511
loss: 1.0069823265075684,grad_norm: 0.9999993779527303, iteration: 125512
loss: 1.0162383317947388,grad_norm: 0.9999991157787997, iteration: 125513
loss: 1.0484532117843628,grad_norm: 0.9999995555432096, iteration: 125514
loss: 0.9958410859107971,grad_norm: 0.9930684295990762, iteration: 125515
loss: 0.9949085712432861,grad_norm: 0.8491118627539662, iteration: 125516
loss: 0.9990261793136597,grad_norm: 0.8902499875440583, iteration: 125517
loss: 0.9921762347221375,grad_norm: 0.9655985530599485, iteration: 125518
loss: 1.0042226314544678,grad_norm: 0.9265098552195284, iteration: 125519
loss: 1.0153601169586182,grad_norm: 0.9476119353372936, iteration: 125520
loss: 1.0145530700683594,grad_norm: 0.9999995912843435, iteration: 125521
loss: 0.9430081844329834,grad_norm: 0.9999990623628421, iteration: 125522
loss: 0.9934399127960205,grad_norm: 0.9999992458575471, iteration: 125523
loss: 0.9873349070549011,grad_norm: 0.9999991444191221, iteration: 125524
loss: 1.012223243713379,grad_norm: 0.9348289251131109, iteration: 125525
loss: 0.9814846515655518,grad_norm: 0.873754518294423, iteration: 125526
loss: 1.0158787965774536,grad_norm: 0.9999993062992679, iteration: 125527
loss: 0.9619907736778259,grad_norm: 0.9999993170710352, iteration: 125528
loss: 0.9781205654144287,grad_norm: 0.9524014367969741, iteration: 125529
loss: 0.9779148101806641,grad_norm: 0.9676417517118683, iteration: 125530
loss: 0.9891655445098877,grad_norm: 0.9355977582892493, iteration: 125531
loss: 0.992219090461731,grad_norm: 0.9060898453008069, iteration: 125532
loss: 0.9921135902404785,grad_norm: 0.968073260739787, iteration: 125533
loss: 0.9909286499023438,grad_norm: 0.9330324329962423, iteration: 125534
loss: 0.9888676404953003,grad_norm: 0.9292979703685895, iteration: 125535
loss: 1.123419165611267,grad_norm: 0.9999998804429148, iteration: 125536
loss: 1.0013668537139893,grad_norm: 0.9999990665300449, iteration: 125537
loss: 1.0185490846633911,grad_norm: 0.9999990902550108, iteration: 125538
loss: 0.9619576930999756,grad_norm: 0.9999990973137439, iteration: 125539
loss: 0.9949024319648743,grad_norm: 0.9999990849761846, iteration: 125540
loss: 1.01529061794281,grad_norm: 0.9669155297107347, iteration: 125541
loss: 1.0299479961395264,grad_norm: 0.9113677136140612, iteration: 125542
loss: 1.001958966255188,grad_norm: 0.999999108087531, iteration: 125543
loss: 1.0255300998687744,grad_norm: 0.9999992585070199, iteration: 125544
loss: 1.0044914484024048,grad_norm: 0.8199369471609605, iteration: 125545
loss: 1.0121701955795288,grad_norm: 0.9075929758460921, iteration: 125546
loss: 1.0391963720321655,grad_norm: 0.9999991279402027, iteration: 125547
loss: 1.0552303791046143,grad_norm: 0.9999996475236909, iteration: 125548
loss: 0.962408185005188,grad_norm: 0.9459024941280908, iteration: 125549
loss: 0.9702896475791931,grad_norm: 0.9999990139432988, iteration: 125550
loss: 0.9950003027915955,grad_norm: 0.9755977341637969, iteration: 125551
loss: 1.012067437171936,grad_norm: 0.9999998035367725, iteration: 125552
loss: 0.9957007765769958,grad_norm: 0.9610806562867661, iteration: 125553
loss: 0.9879664778709412,grad_norm: 0.9999991355949477, iteration: 125554
loss: 0.9935471415519714,grad_norm: 0.9999998007714768, iteration: 125555
loss: 1.1026777029037476,grad_norm: 0.9999998464648719, iteration: 125556
loss: 1.0110206604003906,grad_norm: 0.9920609864999145, iteration: 125557
loss: 1.0120967626571655,grad_norm: 0.9999990997140328, iteration: 125558
loss: 0.9930459260940552,grad_norm: 0.9158909493912598, iteration: 125559
loss: 0.9737024307250977,grad_norm: 0.9999992722642707, iteration: 125560
loss: 1.01836097240448,grad_norm: 0.9999990422053562, iteration: 125561
loss: 0.9977019429206848,grad_norm: 0.9999990998398604, iteration: 125562
loss: 1.0633713006973267,grad_norm: 0.9999994093115363, iteration: 125563
loss: 1.0014389753341675,grad_norm: 0.9437235817522229, iteration: 125564
loss: 0.9961620569229126,grad_norm: 0.9999992040361031, iteration: 125565
loss: 1.015575647354126,grad_norm: 0.9999992042280994, iteration: 125566
loss: 0.9812394380569458,grad_norm: 0.9999993107004198, iteration: 125567
loss: 1.0043346881866455,grad_norm: 0.9999991860296884, iteration: 125568
loss: 0.9945771098136902,grad_norm: 0.9999991531469413, iteration: 125569
loss: 1.0064643621444702,grad_norm: 0.9999992270510538, iteration: 125570
loss: 0.9699692130088806,grad_norm: 0.9999990953866286, iteration: 125571
loss: 1.0412356853485107,grad_norm: 0.9999998323279515, iteration: 125572
loss: 1.0034407377243042,grad_norm: 0.999998994753916, iteration: 125573
loss: 1.0065666437149048,grad_norm: 0.9999989952405578, iteration: 125574
loss: 0.9678733348846436,grad_norm: 0.9943192291267395, iteration: 125575
loss: 0.9899454712867737,grad_norm: 0.9999991389448605, iteration: 125576
loss: 0.970487117767334,grad_norm: 0.9999989798098335, iteration: 125577
loss: 0.9568268656730652,grad_norm: 0.9999996733579503, iteration: 125578
loss: 1.1540597677230835,grad_norm: 0.9999993670060486, iteration: 125579
loss: 1.0025395154953003,grad_norm: 0.9669983226132831, iteration: 125580
loss: 1.0856271982192993,grad_norm: 0.9999992189560069, iteration: 125581
loss: 1.0111397504806519,grad_norm: 0.9897754453703768, iteration: 125582
loss: 0.962613046169281,grad_norm: 0.9999990947799268, iteration: 125583
loss: 0.9801107048988342,grad_norm: 0.9999993342808735, iteration: 125584
loss: 0.9953119158744812,grad_norm: 0.8476036759255752, iteration: 125585
loss: 0.9997500777244568,grad_norm: 0.9999990811880801, iteration: 125586
loss: 1.02742338180542,grad_norm: 0.9999990896724736, iteration: 125587
loss: 1.018904685974121,grad_norm: 0.9065565794569502, iteration: 125588
loss: 1.0455902814865112,grad_norm: 0.99999984551338, iteration: 125589
loss: 0.9712297320365906,grad_norm: 0.9337458072883823, iteration: 125590
loss: 1.0020825862884521,grad_norm: 0.999999104512024, iteration: 125591
loss: 0.9911333918571472,grad_norm: 0.9999990245962977, iteration: 125592
loss: 1.0359359979629517,grad_norm: 0.9500972063570993, iteration: 125593
loss: 0.9787788987159729,grad_norm: 0.9999991156563423, iteration: 125594
loss: 0.9762764573097229,grad_norm: 0.9999990021127535, iteration: 125595
loss: 0.9981510043144226,grad_norm: 0.993265675407287, iteration: 125596
loss: 1.0347241163253784,grad_norm: 0.9999991047987118, iteration: 125597
loss: 1.059838056564331,grad_norm: 0.9999990869728499, iteration: 125598
loss: 0.9705225229263306,grad_norm: 0.9999991223473493, iteration: 125599
loss: 0.987774670124054,grad_norm: 0.9999993033072538, iteration: 125600
loss: 0.9729909300804138,grad_norm: 0.9999992199937415, iteration: 125601
loss: 1.0011811256408691,grad_norm: 0.9956575565127002, iteration: 125602
loss: 1.0317974090576172,grad_norm: 0.99999909709549, iteration: 125603
loss: 1.0216097831726074,grad_norm: 0.9999992599153366, iteration: 125604
loss: 1.0074647665023804,grad_norm: 0.9999991749194128, iteration: 125605
loss: 0.9958606362342834,grad_norm: 0.9999990223874511, iteration: 125606
loss: 0.9998215436935425,grad_norm: 0.9842547819464579, iteration: 125607
loss: 0.991223931312561,grad_norm: 0.9961763475454712, iteration: 125608
loss: 0.9779733419418335,grad_norm: 0.884983625428865, iteration: 125609
loss: 1.00262451171875,grad_norm: 0.7657833142198034, iteration: 125610
loss: 0.9825536012649536,grad_norm: 0.9791702310140048, iteration: 125611
loss: 0.9622142314910889,grad_norm: 0.8620485220340223, iteration: 125612
loss: 0.9835581183433533,grad_norm: 0.999999143939086, iteration: 125613
loss: 1.0170718431472778,grad_norm: 0.9999990788592285, iteration: 125614
loss: 1.0120792388916016,grad_norm: 0.99999906275293, iteration: 125615
loss: 1.0113626718521118,grad_norm: 0.8655803300154733, iteration: 125616
loss: 1.0530030727386475,grad_norm: 0.9999993579681532, iteration: 125617
loss: 1.0092350244522095,grad_norm: 0.8637146582769069, iteration: 125618
loss: 1.0249807834625244,grad_norm: 0.999999143044355, iteration: 125619
loss: 1.0087544918060303,grad_norm: 0.9999989966918135, iteration: 125620
loss: 0.9974539875984192,grad_norm: 0.9683390643721325, iteration: 125621
loss: 0.9716759324073792,grad_norm: 0.9716587941348123, iteration: 125622
loss: 0.9932563900947571,grad_norm: 0.9108224083615987, iteration: 125623
loss: 1.0076528787612915,grad_norm: 0.9999991907118407, iteration: 125624
loss: 0.9931154847145081,grad_norm: 0.9999989567681148, iteration: 125625
loss: 0.9893510341644287,grad_norm: 0.9999998337984826, iteration: 125626
loss: 1.0241894721984863,grad_norm: 0.9999988855816576, iteration: 125627
loss: 0.9760425090789795,grad_norm: 0.9358242586092269, iteration: 125628
loss: 0.9563729763031006,grad_norm: 0.899478047287565, iteration: 125629
loss: 0.9880058169364929,grad_norm: 0.9003689899034786, iteration: 125630
loss: 1.0201337337493896,grad_norm: 0.9999999149111429, iteration: 125631
loss: 1.0269737243652344,grad_norm: 0.9999990896629665, iteration: 125632
loss: 0.987504780292511,grad_norm: 0.9999990860865852, iteration: 125633
loss: 0.985198438167572,grad_norm: 0.9999990760345822, iteration: 125634
loss: 0.9852898120880127,grad_norm: 0.9999994757687076, iteration: 125635
loss: 1.0038175582885742,grad_norm: 0.9999990619897458, iteration: 125636
loss: 1.0342285633087158,grad_norm: 0.861760174833147, iteration: 125637
loss: 1.018917202949524,grad_norm: 0.999999165243868, iteration: 125638
loss: 1.033821940422058,grad_norm: 0.9653133083759381, iteration: 125639
loss: 0.9764126539230347,grad_norm: 0.9599002160930603, iteration: 125640
loss: 0.9657727479934692,grad_norm: 0.9999990886229037, iteration: 125641
loss: 0.9920884966850281,grad_norm: 0.9684517804392038, iteration: 125642
loss: 0.992156445980072,grad_norm: 0.9213337962574387, iteration: 125643
loss: 1.0072945356369019,grad_norm: 0.9999993308304569, iteration: 125644
loss: 0.9911498427391052,grad_norm: 0.9999991619274649, iteration: 125645
loss: 0.9995157718658447,grad_norm: 0.8995837720984053, iteration: 125646
loss: 0.999464213848114,grad_norm: 0.9876108229252616, iteration: 125647
loss: 1.0070265531539917,grad_norm: 0.9999990261289007, iteration: 125648
loss: 0.9907448887825012,grad_norm: 0.9540627573123187, iteration: 125649
loss: 1.024481177330017,grad_norm: 0.999999129156013, iteration: 125650
loss: 1.0183632373809814,grad_norm: 0.9999994263191709, iteration: 125651
loss: 0.9797883033752441,grad_norm: 0.9999991032986768, iteration: 125652
loss: 1.007595181465149,grad_norm: 0.9999991426844476, iteration: 125653
loss: 0.9856879115104675,grad_norm: 0.9999991323005121, iteration: 125654
loss: 1.0233782529830933,grad_norm: 0.9999999118283736, iteration: 125655
loss: 0.9867680072784424,grad_norm: 0.9421387978176213, iteration: 125656
loss: 1.0381909608840942,grad_norm: 0.999999309306737, iteration: 125657
loss: 0.9970098733901978,grad_norm: 0.9476798057357282, iteration: 125658
loss: 0.960460364818573,grad_norm: 0.9999990209427947, iteration: 125659
loss: 1.0844653844833374,grad_norm: 0.9999994280938733, iteration: 125660
loss: 1.094064474105835,grad_norm: 0.9999999833119302, iteration: 125661
loss: 0.9673624038696289,grad_norm: 0.9871146996897171, iteration: 125662
loss: 1.0149221420288086,grad_norm: 0.9999990341284872, iteration: 125663
loss: 0.9931439161300659,grad_norm: 0.8871025472156014, iteration: 125664
loss: 1.0231221914291382,grad_norm: 0.9999998260302394, iteration: 125665
loss: 1.0069994926452637,grad_norm: 0.9999993400821224, iteration: 125666
loss: 0.9709495902061462,grad_norm: 0.9999990415468545, iteration: 125667
loss: 0.9968118667602539,grad_norm: 0.9719478165057427, iteration: 125668
loss: 0.9926753044128418,grad_norm: 0.9999990992539856, iteration: 125669
loss: 1.0025384426116943,grad_norm: 0.9999991001464036, iteration: 125670
loss: 0.9887624382972717,grad_norm: 0.9999990613608475, iteration: 125671
loss: 1.0403939485549927,grad_norm: 0.9999991587781969, iteration: 125672
loss: 1.014022707939148,grad_norm: 0.9999996329479544, iteration: 125673
loss: 0.9836400151252747,grad_norm: 0.9999991432650908, iteration: 125674
loss: 1.0228301286697388,grad_norm: 0.9394003816927894, iteration: 125675
loss: 1.053905963897705,grad_norm: 0.9999995430269512, iteration: 125676
loss: 0.9788420796394348,grad_norm: 0.9776278013624281, iteration: 125677
loss: 1.0138351917266846,grad_norm: 0.9999991326509537, iteration: 125678
loss: 1.0116504430770874,grad_norm: 0.9908987282480807, iteration: 125679
loss: 1.019299030303955,grad_norm: 0.9999990216128027, iteration: 125680
loss: 1.031382441520691,grad_norm: 0.9999993315390396, iteration: 125681
loss: 0.9959917068481445,grad_norm: 0.9268661094834991, iteration: 125682
loss: 1.000942587852478,grad_norm: 0.999999345465849, iteration: 125683
loss: 1.0017695426940918,grad_norm: 0.9685624923295351, iteration: 125684
loss: 0.9719642996788025,grad_norm: 0.9999991014176199, iteration: 125685
loss: 0.9853389859199524,grad_norm: 0.859820342527498, iteration: 125686
loss: 0.9949464201927185,grad_norm: 0.9999989070621614, iteration: 125687
loss: 1.0203207731246948,grad_norm: 0.99999910627127, iteration: 125688
loss: 0.9919533133506775,grad_norm: 0.9877370192380165, iteration: 125689
loss: 0.9944371581077576,grad_norm: 0.9999991145081943, iteration: 125690
loss: 0.9970171451568604,grad_norm: 0.8794946867793211, iteration: 125691
loss: 1.00698983669281,grad_norm: 0.9999998400423532, iteration: 125692
loss: 0.9715041518211365,grad_norm: 0.9999989784503444, iteration: 125693
loss: 0.9782147407531738,grad_norm: 0.9089849292361893, iteration: 125694
loss: 1.0085049867630005,grad_norm: 0.9999991609809831, iteration: 125695
loss: 0.9984478950500488,grad_norm: 0.9999991051743136, iteration: 125696
loss: 1.0089572668075562,grad_norm: 0.9537827970849763, iteration: 125697
loss: 0.9994522333145142,grad_norm: 0.9999991483614311, iteration: 125698
loss: 0.9948691129684448,grad_norm: 0.8667854142444703, iteration: 125699
loss: 1.0418143272399902,grad_norm: 0.9999992086354248, iteration: 125700
loss: 1.0120773315429688,grad_norm: 0.8259622662261622, iteration: 125701
loss: 1.022704839706421,grad_norm: 0.9697167356512049, iteration: 125702
loss: 0.974116861820221,grad_norm: 0.9037464847373234, iteration: 125703
loss: 0.9906049370765686,grad_norm: 0.9918909297586787, iteration: 125704
loss: 1.0107145309448242,grad_norm: 0.9999992111740371, iteration: 125705
loss: 1.0011317729949951,grad_norm: 0.8499767720715165, iteration: 125706
loss: 1.0007342100143433,grad_norm: 0.9999992268281013, iteration: 125707
loss: 0.9775826930999756,grad_norm: 0.9999991602078291, iteration: 125708
loss: 0.9928446412086487,grad_norm: 0.9999990527236576, iteration: 125709
loss: 0.9954074025154114,grad_norm: 0.9477371510100845, iteration: 125710
loss: 1.0346328020095825,grad_norm: 0.9999990478169468, iteration: 125711
loss: 0.9865942001342773,grad_norm: 0.9999992257776601, iteration: 125712
loss: 0.9824718832969666,grad_norm: 0.8944503092341234, iteration: 125713
loss: 1.004558801651001,grad_norm: 0.9907115425342895, iteration: 125714
loss: 1.015438199043274,grad_norm: 0.9999990417419526, iteration: 125715
loss: 0.967027485370636,grad_norm: 0.8424237164143659, iteration: 125716
loss: 1.0041534900665283,grad_norm: 0.9999993173598927, iteration: 125717
loss: 0.9905923008918762,grad_norm: 0.9838376882429749, iteration: 125718
loss: 1.0233454704284668,grad_norm: 0.999999774049325, iteration: 125719
loss: 1.0228052139282227,grad_norm: 0.9999990374825519, iteration: 125720
loss: 1.0069936513900757,grad_norm: 0.9025080211880598, iteration: 125721
loss: 1.0037975311279297,grad_norm: 0.9999997378721999, iteration: 125722
loss: 1.0162080526351929,grad_norm: 0.9999998344770549, iteration: 125723
loss: 1.0078504085540771,grad_norm: 0.9691107263437839, iteration: 125724
loss: 0.9944731593132019,grad_norm: 0.9999991249463106, iteration: 125725
loss: 0.9974341988563538,grad_norm: 0.9999989955878931, iteration: 125726
loss: 0.9850332140922546,grad_norm: 0.9999990491415829, iteration: 125727
loss: 1.0330524444580078,grad_norm: 0.8790349167725507, iteration: 125728
loss: 0.9654892086982727,grad_norm: 0.9583919635632092, iteration: 125729
loss: 0.9781795144081116,grad_norm: 0.9999991703880587, iteration: 125730
loss: 0.9727484583854675,grad_norm: 0.947377124452127, iteration: 125731
loss: 1.0086760520935059,grad_norm: 0.9999990808981581, iteration: 125732
loss: 0.9883612990379333,grad_norm: 0.9946382482636611, iteration: 125733
loss: 1.0055087804794312,grad_norm: 0.9999991202677235, iteration: 125734
loss: 1.0099384784698486,grad_norm: 0.7547479641921867, iteration: 125735
loss: 1.0005955696105957,grad_norm: 0.9999995299243726, iteration: 125736
loss: 1.0121883153915405,grad_norm: 0.9710365511266028, iteration: 125737
loss: 0.9858468770980835,grad_norm: 0.9999990246600166, iteration: 125738
loss: 0.9971121549606323,grad_norm: 0.9999990287368644, iteration: 125739
loss: 1.0139631032943726,grad_norm: 0.9999990919640945, iteration: 125740
loss: 0.9910234808921814,grad_norm: 0.999999207622553, iteration: 125741
loss: 0.9667810797691345,grad_norm: 0.996096542356202, iteration: 125742
loss: 1.0428338050842285,grad_norm: 0.9999992853606428, iteration: 125743
loss: 0.9987431168556213,grad_norm: 0.9999991120581095, iteration: 125744
loss: 1.0510550737380981,grad_norm: 0.9999993171536746, iteration: 125745
loss: 1.0259580612182617,grad_norm: 0.9999990650120414, iteration: 125746
loss: 0.9889876842498779,grad_norm: 0.9999991314415434, iteration: 125747
loss: 1.0013900995254517,grad_norm: 0.9999991096785863, iteration: 125748
loss: 0.9814164638519287,grad_norm: 0.9999991174595945, iteration: 125749
loss: 1.0233302116394043,grad_norm: 0.9999990910073898, iteration: 125750
loss: 1.0009710788726807,grad_norm: 0.9677677719121757, iteration: 125751
loss: 1.0181946754455566,grad_norm: 0.9999991947601401, iteration: 125752
loss: 1.0146832466125488,grad_norm: 0.9999991143144624, iteration: 125753
loss: 0.9905064105987549,grad_norm: 0.9999999184639079, iteration: 125754
loss: 1.0474880933761597,grad_norm: 0.9999991158572482, iteration: 125755
loss: 1.0147795677185059,grad_norm: 0.9999990004590432, iteration: 125756
loss: 0.9711312651634216,grad_norm: 0.9999992173948993, iteration: 125757
loss: 1.0228776931762695,grad_norm: 0.9999991553801495, iteration: 125758
loss: 0.9703704118728638,grad_norm: 0.9999990121005574, iteration: 125759
loss: 1.0277529954910278,grad_norm: 0.9999993515785359, iteration: 125760
loss: 0.9985725283622742,grad_norm: 0.9514739353385457, iteration: 125761
loss: 0.9913551211357117,grad_norm: 0.9999991642270671, iteration: 125762
loss: 0.966588020324707,grad_norm: 0.9999990289979289, iteration: 125763
loss: 0.9941869974136353,grad_norm: 0.9999992149667145, iteration: 125764
loss: 1.0056331157684326,grad_norm: 0.9999992823213655, iteration: 125765
loss: 1.0020467042922974,grad_norm: 0.7849272255968102, iteration: 125766
loss: 0.9982155561447144,grad_norm: 0.9999991559531305, iteration: 125767
loss: 1.0185503959655762,grad_norm: 0.9999993192175313, iteration: 125768
loss: 1.0055502653121948,grad_norm: 0.9999990803171354, iteration: 125769
loss: 1.022678017616272,grad_norm: 0.9999990653822999, iteration: 125770
loss: 1.009115219116211,grad_norm: 0.9999992315203361, iteration: 125771
loss: 0.9728400707244873,grad_norm: 0.9476961067984128, iteration: 125772
loss: 0.9997599720954895,grad_norm: 0.8957235152326422, iteration: 125773
loss: 1.0365368127822876,grad_norm: 0.9999997249545133, iteration: 125774
loss: 0.9943486452102661,grad_norm: 0.9311048740901446, iteration: 125775
loss: 1.019190788269043,grad_norm: 0.9999991049020447, iteration: 125776
loss: 0.9852944016456604,grad_norm: 0.926189408541559, iteration: 125777
loss: 1.0419855117797852,grad_norm: 0.8525786372855236, iteration: 125778
loss: 0.9542545080184937,grad_norm: 0.9999993875483459, iteration: 125779
loss: 1.0137475728988647,grad_norm: 0.9999992017302363, iteration: 125780
loss: 0.967637836933136,grad_norm: 0.999999518490958, iteration: 125781
loss: 1.0479811429977417,grad_norm: 0.9999989981217227, iteration: 125782
loss: 0.9925869107246399,grad_norm: 0.9999991287133132, iteration: 125783
loss: 0.9767636060714722,grad_norm: 0.9012517351757293, iteration: 125784
loss: 1.01113760471344,grad_norm: 0.9999991195879764, iteration: 125785
loss: 0.9892593622207642,grad_norm: 0.9999991378676765, iteration: 125786
loss: 0.9883094429969788,grad_norm: 0.8766462563134146, iteration: 125787
loss: 1.0294559001922607,grad_norm: 0.9999992559823432, iteration: 125788
loss: 0.9819602370262146,grad_norm: 0.9483108294612479, iteration: 125789
loss: 1.0249539613723755,grad_norm: 0.9999992030327526, iteration: 125790
loss: 1.0018385648727417,grad_norm: 0.9999991061906819, iteration: 125791
loss: 0.9816172122955322,grad_norm: 0.9999991405151851, iteration: 125792
loss: 1.0253055095672607,grad_norm: 0.9999990769802434, iteration: 125793
loss: 1.042420744895935,grad_norm: 0.9999992481003632, iteration: 125794
loss: 1.0837390422821045,grad_norm: 0.9999991439677921, iteration: 125795
loss: 1.0071868896484375,grad_norm: 0.9999989843495548, iteration: 125796
loss: 1.0379375219345093,grad_norm: 0.9232385126566083, iteration: 125797
loss: 0.9997169971466064,grad_norm: 0.999999147967447, iteration: 125798
loss: 1.116769790649414,grad_norm: 0.9999994928138473, iteration: 125799
loss: 0.9980151653289795,grad_norm: 0.9999989134159919, iteration: 125800
loss: 1.0408084392547607,grad_norm: 0.999999319364838, iteration: 125801
loss: 1.0166898965835571,grad_norm: 0.9102020024818628, iteration: 125802
loss: 1.033540964126587,grad_norm: 0.982914795133699, iteration: 125803
loss: 0.9952574968338013,grad_norm: 0.9999991337131587, iteration: 125804
loss: 0.9782922267913818,grad_norm: 0.9808842023712043, iteration: 125805
loss: 1.0094072818756104,grad_norm: 0.9541886576958106, iteration: 125806
loss: 1.0130681991577148,grad_norm: 0.9416525578011556, iteration: 125807
loss: 1.0410361289978027,grad_norm: 0.9999996227573769, iteration: 125808
loss: 0.969439685344696,grad_norm: 0.8320774564631804, iteration: 125809
loss: 1.0244368314743042,grad_norm: 0.970010365549939, iteration: 125810
loss: 1.0311225652694702,grad_norm: 0.9999990343998304, iteration: 125811
loss: 0.9895907640457153,grad_norm: 0.9503507027393525, iteration: 125812
loss: 1.0044528245925903,grad_norm: 0.9961189910398012, iteration: 125813
loss: 1.012519359588623,grad_norm: 0.9999992556941556, iteration: 125814
loss: 1.0317530632019043,grad_norm: 0.9364876007430778, iteration: 125815
loss: 1.0135462284088135,grad_norm: 0.9999991986563447, iteration: 125816
loss: 0.9846218824386597,grad_norm: 0.9999996698497794, iteration: 125817
loss: 1.0092048645019531,grad_norm: 0.9999991985453126, iteration: 125818
loss: 0.9955829977989197,grad_norm: 0.9999992262195868, iteration: 125819
loss: 1.0110281705856323,grad_norm: 0.9999991091290784, iteration: 125820
loss: 1.015588402748108,grad_norm: 0.9999991463774237, iteration: 125821
loss: 1.0191125869750977,grad_norm: 0.9204684049019007, iteration: 125822
loss: 0.9706867933273315,grad_norm: 0.9454390118075577, iteration: 125823
loss: 0.9939428567886353,grad_norm: 0.8524321803232825, iteration: 125824
loss: 1.020652413368225,grad_norm: 0.9999992389017163, iteration: 125825
loss: 1.005277395248413,grad_norm: 0.9542708234921123, iteration: 125826
loss: 1.0041080713272095,grad_norm: 0.9999990302654261, iteration: 125827
loss: 0.9730749130249023,grad_norm: 0.9174983205778593, iteration: 125828
loss: 0.9982866644859314,grad_norm: 0.9109333118211944, iteration: 125829
loss: 0.9932886362075806,grad_norm: 0.9999990645120259, iteration: 125830
loss: 1.0073778629302979,grad_norm: 0.9999994511471785, iteration: 125831
loss: 1.03187894821167,grad_norm: 0.9474972268461939, iteration: 125832
loss: 0.9937722086906433,grad_norm: 0.9999991910327498, iteration: 125833
loss: 0.9713765382766724,grad_norm: 0.9999990378626799, iteration: 125834
loss: 0.9961950182914734,grad_norm: 0.9999990125416259, iteration: 125835
loss: 0.9939008355140686,grad_norm: 0.999999307744442, iteration: 125836
loss: 0.9893249273300171,grad_norm: 0.9999990206693443, iteration: 125837
loss: 0.9793682098388672,grad_norm: 0.9999990230069561, iteration: 125838
loss: 1.0349191427230835,grad_norm: 0.9999993196981456, iteration: 125839
loss: 0.9942153096199036,grad_norm: 0.9999988630499372, iteration: 125840
loss: 1.023034691810608,grad_norm: 0.9999991210363536, iteration: 125841
loss: 1.0135048627853394,grad_norm: 0.9880647584143774, iteration: 125842
loss: 0.9906523823738098,grad_norm: 0.9710705338340005, iteration: 125843
loss: 1.0377782583236694,grad_norm: 0.9999998758093591, iteration: 125844
loss: 0.9907070398330688,grad_norm: 0.9999992131190172, iteration: 125845
loss: 0.9732576608657837,grad_norm: 0.9999990768158815, iteration: 125846
loss: 1.0072485208511353,grad_norm: 0.9999991816328507, iteration: 125847
loss: 1.00579035282135,grad_norm: 0.9892099152205358, iteration: 125848
loss: 0.9935892224311829,grad_norm: 0.9999991881300249, iteration: 125849
loss: 0.9889547228813171,grad_norm: 0.9896204983888336, iteration: 125850
loss: 1.0066128969192505,grad_norm: 0.9608481491491211, iteration: 125851
loss: 1.0004652738571167,grad_norm: 0.8487112405941574, iteration: 125852
loss: 1.0325361490249634,grad_norm: 0.8914059870553143, iteration: 125853
loss: 0.9777774810791016,grad_norm: 0.9999997092361249, iteration: 125854
loss: 1.013556957244873,grad_norm: 0.9544733889084798, iteration: 125855
loss: 1.0116807222366333,grad_norm: 0.9999990304898546, iteration: 125856
loss: 0.9718019366264343,grad_norm: 0.9999992550902771, iteration: 125857
loss: 0.983376145362854,grad_norm: 0.9675246235147116, iteration: 125858
loss: 0.9514919519424438,grad_norm: 0.87164676940108, iteration: 125859
loss: 1.0529927015304565,grad_norm: 0.9999999614540305, iteration: 125860
loss: 1.0378308296203613,grad_norm: 0.9999993010413427, iteration: 125861
loss: 1.0232062339782715,grad_norm: 0.999999722881529, iteration: 125862
loss: 0.959976851940155,grad_norm: 0.9926136712868664, iteration: 125863
loss: 1.0051406621932983,grad_norm: 0.9999992843596601, iteration: 125864
loss: 0.9660829901695251,grad_norm: 0.9999989869739369, iteration: 125865
loss: 1.0090773105621338,grad_norm: 0.9999992234679385, iteration: 125866
loss: 1.0689822435379028,grad_norm: 0.9999991770342456, iteration: 125867
loss: 0.9563970565795898,grad_norm: 0.9696124142611403, iteration: 125868
loss: 1.0240882635116577,grad_norm: 0.9999990144654917, iteration: 125869
loss: 0.9968245029449463,grad_norm: 0.9999995003264666, iteration: 125870
loss: 1.0162352323532104,grad_norm: 0.9999991625436936, iteration: 125871
loss: 1.000947117805481,grad_norm: 0.915932731092044, iteration: 125872
loss: 0.9659099578857422,grad_norm: 0.958084028673507, iteration: 125873
loss: 1.0524009466171265,grad_norm: 0.9999998839268751, iteration: 125874
loss: 0.9908960461616516,grad_norm: 0.9999998119389971, iteration: 125875
loss: 1.0088202953338623,grad_norm: 0.9999991370420928, iteration: 125876
loss: 1.0380797386169434,grad_norm: 0.9999993820668098, iteration: 125877
loss: 1.0045043230056763,grad_norm: 0.9579596508560526, iteration: 125878
loss: 0.9848365187644958,grad_norm: 0.9999992041240031, iteration: 125879
loss: 1.0715235471725464,grad_norm: 0.9999993260642623, iteration: 125880
loss: 0.9981865882873535,grad_norm: 0.8419670612320153, iteration: 125881
loss: 1.024166226387024,grad_norm: 0.9999990155025448, iteration: 125882
loss: 1.0049127340316772,grad_norm: 0.9644178665324437, iteration: 125883
loss: 0.9870306849479675,grad_norm: 0.9268517045907633, iteration: 125884
loss: 1.012854814529419,grad_norm: 0.9844324105708387, iteration: 125885
loss: 0.988486111164093,grad_norm: 0.9999992129769756, iteration: 125886
loss: 1.0530427694320679,grad_norm: 0.9999993008680138, iteration: 125887
loss: 1.0183175802230835,grad_norm: 0.9999991873281692, iteration: 125888
loss: 1.0285987854003906,grad_norm: 0.999999131000393, iteration: 125889
loss: 0.9650285840034485,grad_norm: 0.9999989823218677, iteration: 125890
loss: 1.0020538568496704,grad_norm: 0.9250637078698358, iteration: 125891
loss: 1.0223448276519775,grad_norm: 0.9999195915741984, iteration: 125892
loss: 1.0050963163375854,grad_norm: 0.9999992613244906, iteration: 125893
loss: 1.0081210136413574,grad_norm: 0.9340349794507288, iteration: 125894
loss: 0.9932534098625183,grad_norm: 0.9999989511091084, iteration: 125895
loss: 1.0125890970230103,grad_norm: 0.9007208038361876, iteration: 125896
loss: 1.0358833074569702,grad_norm: 0.9999992559488181, iteration: 125897
loss: 0.9863954186439514,grad_norm: 0.9999991317408597, iteration: 125898
loss: 1.008886456489563,grad_norm: 0.9999990232715518, iteration: 125899
loss: 0.9794015288352966,grad_norm: 0.9999993343874709, iteration: 125900
loss: 0.993768036365509,grad_norm: 0.9304443997181866, iteration: 125901
loss: 1.0004299879074097,grad_norm: 0.9999990269647739, iteration: 125902
loss: 0.9911943078041077,grad_norm: 0.9641193461665132, iteration: 125903
loss: 0.9511893391609192,grad_norm: 0.9999990617193919, iteration: 125904
loss: 0.9803271889686584,grad_norm: 0.9999989870814671, iteration: 125905
loss: 1.0382599830627441,grad_norm: 0.9999992687429745, iteration: 125906
loss: 0.9955419301986694,grad_norm: 0.8878362793560142, iteration: 125907
loss: 1.0311475992202759,grad_norm: 0.9999990348613296, iteration: 125908
loss: 1.0057144165039062,grad_norm: 0.9999990473420243, iteration: 125909
loss: 0.9961518049240112,grad_norm: 0.9965164689053778, iteration: 125910
loss: 1.0109760761260986,grad_norm: 0.9999991146305679, iteration: 125911
loss: 0.9784849286079407,grad_norm: 0.9999990475904376, iteration: 125912
loss: 0.9640532732009888,grad_norm: 0.9888574070063663, iteration: 125913
loss: 0.9973401427268982,grad_norm: 0.9999991401487812, iteration: 125914
loss: 1.0016367435455322,grad_norm: 0.9405522697018303, iteration: 125915
loss: 0.9960537552833557,grad_norm: 0.9999990409534114, iteration: 125916
loss: 0.975409209728241,grad_norm: 0.9427456521734606, iteration: 125917
loss: 0.9728809595108032,grad_norm: 0.9891971149357008, iteration: 125918
loss: 1.0638768672943115,grad_norm: 0.9999991204049252, iteration: 125919
loss: 0.9404789209365845,grad_norm: 0.9999991910566745, iteration: 125920
loss: 1.0161128044128418,grad_norm: 0.9999991356270478, iteration: 125921
loss: 1.0176732540130615,grad_norm: 0.9665596615293196, iteration: 125922
loss: 1.0078750848770142,grad_norm: 0.9712068588640476, iteration: 125923
loss: 1.005868673324585,grad_norm: 0.9999990934096404, iteration: 125924
loss: 1.0343736410140991,grad_norm: 0.9360453294410149, iteration: 125925
loss: 1.013587236404419,grad_norm: 0.9999990882517326, iteration: 125926
loss: 1.0265560150146484,grad_norm: 0.9104712913491302, iteration: 125927
loss: 1.0261143445968628,grad_norm: 0.9999991205499865, iteration: 125928
loss: 1.0011284351348877,grad_norm: 0.9846271295097797, iteration: 125929
loss: 1.0058517456054688,grad_norm: 0.8404163753008673, iteration: 125930
loss: 0.9835615754127502,grad_norm: 0.8975541412344282, iteration: 125931
loss: 1.0131059885025024,grad_norm: 0.9783285162522737, iteration: 125932
loss: 1.018966555595398,grad_norm: 0.9999993851924222, iteration: 125933
loss: 0.9942491054534912,grad_norm: 0.9999991729974387, iteration: 125934
loss: 0.9984709620475769,grad_norm: 0.9999991395121886, iteration: 125935
loss: 1.0801160335540771,grad_norm: 0.9999991817205275, iteration: 125936
loss: 0.9945434927940369,grad_norm: 0.9999992106470785, iteration: 125937
loss: 1.0286272764205933,grad_norm: 0.9999990947078486, iteration: 125938
loss: 1.042794108390808,grad_norm: 0.9999990151947341, iteration: 125939
loss: 0.9677379131317139,grad_norm: 0.9593469440614734, iteration: 125940
loss: 1.0260300636291504,grad_norm: 0.999999162661249, iteration: 125941
loss: 0.9487653970718384,grad_norm: 0.9999992465422202, iteration: 125942
loss: 0.9980893731117249,grad_norm: 0.9999990206054232, iteration: 125943
loss: 0.9875491261482239,grad_norm: 0.9999990354488655, iteration: 125944
loss: 1.0071572065353394,grad_norm: 0.8251701201027931, iteration: 125945
loss: 0.9600322842597961,grad_norm: 0.9999992729844331, iteration: 125946
loss: 0.9816266894340515,grad_norm: 0.9999991505852809, iteration: 125947
loss: 0.9643757939338684,grad_norm: 0.9277052987802743, iteration: 125948
loss: 0.9910827875137329,grad_norm: 0.9999990853681666, iteration: 125949
loss: 1.0150370597839355,grad_norm: 0.999998972457812, iteration: 125950
loss: 0.9907337427139282,grad_norm: 0.9999989937314162, iteration: 125951
loss: 1.0240819454193115,grad_norm: 0.9999991796971371, iteration: 125952
loss: 1.0286649465560913,grad_norm: 0.9999994101850129, iteration: 125953
loss: 1.1327922344207764,grad_norm: 0.9999993007985302, iteration: 125954
loss: 1.007720708847046,grad_norm: 0.9999991420033106, iteration: 125955
loss: 1.049124836921692,grad_norm: 0.9999993543629134, iteration: 125956
loss: 0.9906920790672302,grad_norm: 0.9550476957946065, iteration: 125957
loss: 1.132501482963562,grad_norm: 0.9999998261170865, iteration: 125958
loss: 1.014261245727539,grad_norm: 0.9999991566837398, iteration: 125959
loss: 0.9869582056999207,grad_norm: 0.9999991955691155, iteration: 125960
loss: 0.9583802819252014,grad_norm: 0.999999298399809, iteration: 125961
loss: 1.0253777503967285,grad_norm: 0.9999994306285815, iteration: 125962
loss: 1.0558834075927734,grad_norm: 0.999999360610566, iteration: 125963
loss: 0.9594188928604126,grad_norm: 0.999999443488709, iteration: 125964
loss: 0.9976289868354797,grad_norm: 0.9347065628284017, iteration: 125965
loss: 1.003585696220398,grad_norm: 0.9999991756126495, iteration: 125966
loss: 0.9985690116882324,grad_norm: 0.9999991266271816, iteration: 125967
loss: 0.9952691793441772,grad_norm: 0.9892370141426932, iteration: 125968
loss: 1.0014398097991943,grad_norm: 0.9412354209411817, iteration: 125969
loss: 1.0234177112579346,grad_norm: 0.9821394591567539, iteration: 125970
loss: 1.028341293334961,grad_norm: 0.8970367895817483, iteration: 125971
loss: 1.0260281562805176,grad_norm: 0.9901187216662998, iteration: 125972
loss: 1.0015629529953003,grad_norm: 0.9438661164597226, iteration: 125973
loss: 0.9877074956893921,grad_norm: 0.9999992188510087, iteration: 125974
loss: 1.0085805654525757,grad_norm: 0.9099261606286381, iteration: 125975
loss: 1.039719581604004,grad_norm: 0.9999994657555364, iteration: 125976
loss: 1.0058422088623047,grad_norm: 0.9966863916863794, iteration: 125977
loss: 0.9909759759902954,grad_norm: 0.9999994989304422, iteration: 125978
loss: 0.9955867528915405,grad_norm: 0.9999990884012722, iteration: 125979
loss: 1.0019402503967285,grad_norm: 0.9999991269470931, iteration: 125980
loss: 0.986127495765686,grad_norm: 0.855865147404746, iteration: 125981
loss: 0.9883065819740295,grad_norm: 0.9999989402456453, iteration: 125982
loss: 0.994251012802124,grad_norm: 0.9999991493853375, iteration: 125983
loss: 0.9910751581192017,grad_norm: 0.9999991968240721, iteration: 125984
loss: 0.9982693791389465,grad_norm: 0.9645702534206795, iteration: 125985
loss: 1.019061803817749,grad_norm: 0.8576292405976883, iteration: 125986
loss: 1.012962818145752,grad_norm: 0.7617597156645572, iteration: 125987
loss: 0.9901975989341736,grad_norm: 0.999999092861914, iteration: 125988
loss: 0.9621782302856445,grad_norm: 0.9999992443656399, iteration: 125989
loss: 1.0099642276763916,grad_norm: 0.9999991771259803, iteration: 125990
loss: 1.0357340574264526,grad_norm: 0.9494052083265406, iteration: 125991
loss: 1.001604437828064,grad_norm: 0.9999991500197225, iteration: 125992
loss: 0.9976761341094971,grad_norm: 0.9816353471377893, iteration: 125993
loss: 1.0168750286102295,grad_norm: 0.8998781253840913, iteration: 125994
loss: 1.0144339799880981,grad_norm: 0.9378317962932727, iteration: 125995
loss: 1.0142110586166382,grad_norm: 0.9999993691933116, iteration: 125996
loss: 0.9763755202293396,grad_norm: 0.999999001648988, iteration: 125997
loss: 1.011862874031067,grad_norm: 0.9125907621836383, iteration: 125998
loss: 0.9954450130462646,grad_norm: 0.9999990805610588, iteration: 125999
loss: 1.0112329721450806,grad_norm: 0.8971747321534067, iteration: 126000
loss: 0.9812443256378174,grad_norm: 0.9999990593855298, iteration: 126001
loss: 1.0113658905029297,grad_norm: 0.9999993165129256, iteration: 126002
loss: 0.9783086180686951,grad_norm: 0.999999065272638, iteration: 126003
loss: 1.0194607973098755,grad_norm: 0.9999995517902163, iteration: 126004
loss: 1.0007168054580688,grad_norm: 0.9077360572167844, iteration: 126005
loss: 0.999131441116333,grad_norm: 0.9178479825554805, iteration: 126006
loss: 1.0211602449417114,grad_norm: 0.9999992095313149, iteration: 126007
loss: 1.0510170459747314,grad_norm: 0.9450248531092116, iteration: 126008
loss: 1.0404207706451416,grad_norm: 0.9917854930828331, iteration: 126009
loss: 0.976620078086853,grad_norm: 0.9999990646134612, iteration: 126010
loss: 1.070219874382019,grad_norm: 0.9999999094546855, iteration: 126011
loss: 0.9624608159065247,grad_norm: 0.9999991277178372, iteration: 126012
loss: 0.9958642721176147,grad_norm: 0.9999991046959494, iteration: 126013
loss: 0.9784531593322754,grad_norm: 0.9999990180214118, iteration: 126014
loss: 1.0226761102676392,grad_norm: 0.9473077448630302, iteration: 126015
loss: 0.97528475522995,grad_norm: 0.9999991774181455, iteration: 126016
loss: 1.0306826829910278,grad_norm: 0.9999992330203662, iteration: 126017
loss: 0.9935934543609619,grad_norm: 0.9344801981091773, iteration: 126018
loss: 1.0115596055984497,grad_norm: 0.9999992239833199, iteration: 126019
loss: 0.9994067549705505,grad_norm: 0.7965464880169991, iteration: 126020
loss: 0.9979053139686584,grad_norm: 0.8984500334276888, iteration: 126021
loss: 1.0303324460983276,grad_norm: 0.9328661185459299, iteration: 126022
loss: 0.9878849387168884,grad_norm: 0.9951825116756015, iteration: 126023
loss: 1.005208134651184,grad_norm: 0.9999991644343831, iteration: 126024
loss: 0.9612356424331665,grad_norm: 0.999999137837991, iteration: 126025
loss: 0.9823316335678101,grad_norm: 0.9999990732973385, iteration: 126026
loss: 0.9807549118995667,grad_norm: 0.9999991600668634, iteration: 126027
loss: 1.0104025602340698,grad_norm: 0.999999153105462, iteration: 126028
loss: 0.9686208963394165,grad_norm: 0.9880675482047137, iteration: 126029
loss: 0.9959701299667358,grad_norm: 0.8605690533271223, iteration: 126030
loss: 0.986879825592041,grad_norm: 0.9835023274671564, iteration: 126031
loss: 0.995880663394928,grad_norm: 0.9999991424219047, iteration: 126032
loss: 0.9770027995109558,grad_norm: 0.9859177056132313, iteration: 126033
loss: 0.9990686178207397,grad_norm: 0.9999992061092289, iteration: 126034
loss: 1.0220508575439453,grad_norm: 0.9999991689225254, iteration: 126035
loss: 0.9838529825210571,grad_norm: 0.9999993117930357, iteration: 126036
loss: 1.00371253490448,grad_norm: 0.9999990706753987, iteration: 126037
loss: 1.0286269187927246,grad_norm: 0.928847203301699, iteration: 126038
loss: 0.9534948468208313,grad_norm: 0.955938408595494, iteration: 126039
loss: 1.0393421649932861,grad_norm: 0.9999990043391787, iteration: 126040
loss: 1.012097716331482,grad_norm: 0.9999995598117681, iteration: 126041
loss: 0.9857531785964966,grad_norm: 0.9999991680994186, iteration: 126042
loss: 0.9528883099555969,grad_norm: 0.9454260148970948, iteration: 126043
loss: 0.9759621620178223,grad_norm: 0.9999992308318528, iteration: 126044
loss: 1.0047892332077026,grad_norm: 0.97960430359231, iteration: 126045
loss: 0.9885809421539307,grad_norm: 0.9999994787017333, iteration: 126046
loss: 1.0424026250839233,grad_norm: 0.9999991623423047, iteration: 126047
loss: 0.98177570104599,grad_norm: 0.9150107220601416, iteration: 126048
loss: 0.987501859664917,grad_norm: 0.9999990795634635, iteration: 126049
loss: 0.940031886100769,grad_norm: 0.9999991038280268, iteration: 126050
loss: 0.9699724316596985,grad_norm: 0.9999991221310975, iteration: 126051
loss: 0.9590056538581848,grad_norm: 0.9762866139287484, iteration: 126052
loss: 1.0081866979599,grad_norm: 0.8742048352772136, iteration: 126053
loss: 1.0011372566223145,grad_norm: 0.9999990386079856, iteration: 126054
loss: 1.0135869979858398,grad_norm: 0.9999988991489802, iteration: 126055
loss: 1.0323654413223267,grad_norm: 0.8942167248862835, iteration: 126056
loss: 1.0219818353652954,grad_norm: 0.9999991722596584, iteration: 126057
loss: 1.029390573501587,grad_norm: 0.9999999634743802, iteration: 126058
loss: 0.976270318031311,grad_norm: 0.9999993742469512, iteration: 126059
loss: 0.990781307220459,grad_norm: 0.9999991728667591, iteration: 126060
loss: 1.0041106939315796,grad_norm: 0.9999991369149217, iteration: 126061
loss: 0.9923250079154968,grad_norm: 0.9229516846905385, iteration: 126062
loss: 1.0033321380615234,grad_norm: 0.9999991282000547, iteration: 126063
loss: 1.0327905416488647,grad_norm: 0.9631425839761295, iteration: 126064
loss: 0.9925851225852966,grad_norm: 0.8293743161978349, iteration: 126065
loss: 1.027492880821228,grad_norm: 0.9999991570196737, iteration: 126066
loss: 1.0006667375564575,grad_norm: 0.8650111800828924, iteration: 126067
loss: 0.9938383102416992,grad_norm: 0.9999991947178695, iteration: 126068
loss: 1.0195553302764893,grad_norm: 0.9999992628830884, iteration: 126069
loss: 1.0237455368041992,grad_norm: 0.9284464644454345, iteration: 126070
loss: 1.0414670705795288,grad_norm: 0.9999990196399163, iteration: 126071
loss: 1.0280207395553589,grad_norm: 0.9999992515081771, iteration: 126072
loss: 0.9932299852371216,grad_norm: 0.9999990841316295, iteration: 126073
loss: 0.9784655570983887,grad_norm: 0.992305616505254, iteration: 126074
loss: 1.000430941581726,grad_norm: 0.9999990679823939, iteration: 126075
loss: 0.9915168881416321,grad_norm: 0.9999992338118574, iteration: 126076
loss: 1.02455472946167,grad_norm: 0.9999990737073647, iteration: 126077
loss: 1.0027732849121094,grad_norm: 0.9500586009856757, iteration: 126078
loss: 1.001177191734314,grad_norm: 0.9415819967165456, iteration: 126079
loss: 1.004662275314331,grad_norm: 0.9246845385436998, iteration: 126080
loss: 0.9660884737968445,grad_norm: 0.9999990482264625, iteration: 126081
loss: 1.158264398574829,grad_norm: 0.9999994269003416, iteration: 126082
loss: 1.1039632558822632,grad_norm: 0.999998997243184, iteration: 126083
loss: 0.9907333254814148,grad_norm: 0.973211579344271, iteration: 126084
loss: 1.0100138187408447,grad_norm: 0.9596634781516911, iteration: 126085
loss: 0.9903264045715332,grad_norm: 0.9494099212487345, iteration: 126086
loss: 0.9835680723190308,grad_norm: 0.9999991618801035, iteration: 126087
loss: 1.009189486503601,grad_norm: 0.9501169485875886, iteration: 126088
loss: 1.0244015455245972,grad_norm: 0.9999993144747298, iteration: 126089
loss: 0.9910988211631775,grad_norm: 0.9999990967356797, iteration: 126090
loss: 0.9873355031013489,grad_norm: 0.9999990693047169, iteration: 126091
loss: 0.9832419753074646,grad_norm: 0.9999990497010091, iteration: 126092
loss: 0.9737078547477722,grad_norm: 0.9999990947071365, iteration: 126093
loss: 1.0054396390914917,grad_norm: 0.999999082488416, iteration: 126094
loss: 0.979684054851532,grad_norm: 0.9999991417284315, iteration: 126095
loss: 1.0154552459716797,grad_norm: 0.9999991818046939, iteration: 126096
loss: 0.9640867710113525,grad_norm: 0.9318922821132679, iteration: 126097
loss: 1.0100504159927368,grad_norm: 0.894223368455148, iteration: 126098
loss: 0.9825937747955322,grad_norm: 0.9999991832104793, iteration: 126099
loss: 1.0088502168655396,grad_norm: 0.9999993657194587, iteration: 126100
loss: 0.9699447751045227,grad_norm: 0.8602569080134413, iteration: 126101
loss: 0.990327775478363,grad_norm: 0.9999991263952621, iteration: 126102
loss: 0.9760459065437317,grad_norm: 0.9999991352816088, iteration: 126103
loss: 1.0282642841339111,grad_norm: 0.9327467364064537, iteration: 126104
loss: 1.0137925148010254,grad_norm: 0.9999991623955637, iteration: 126105
loss: 1.00464928150177,grad_norm: 0.9999991024070884, iteration: 126106
loss: 1.001988410949707,grad_norm: 0.999999237113672, iteration: 126107
loss: 0.9922102093696594,grad_norm: 0.8752648645625809, iteration: 126108
loss: 0.9868996143341064,grad_norm: 0.9999990001294846, iteration: 126109
loss: 1.0059483051300049,grad_norm: 0.999999210414412, iteration: 126110
loss: 1.016895055770874,grad_norm: 0.9491973612853201, iteration: 126111
loss: 1.015786051750183,grad_norm: 0.9999992059007622, iteration: 126112
loss: 1.0153391361236572,grad_norm: 0.985567779568198, iteration: 126113
loss: 0.9629167318344116,grad_norm: 0.9999990918094236, iteration: 126114
loss: 1.0373268127441406,grad_norm: 0.9999992064555335, iteration: 126115
loss: 1.1190991401672363,grad_norm: 0.9999993846307489, iteration: 126116
loss: 1.0076377391815186,grad_norm: 0.99999926054478, iteration: 126117
loss: 0.9965896010398865,grad_norm: 0.9999990143121136, iteration: 126118
loss: 0.9852126836776733,grad_norm: 0.999999139250982, iteration: 126119
loss: 1.008249044418335,grad_norm: 0.9999990956790712, iteration: 126120
loss: 1.009580135345459,grad_norm: 0.907019460957906, iteration: 126121
loss: 1.02458655834198,grad_norm: 0.8898756686388912, iteration: 126122
loss: 1.0110169649124146,grad_norm: 0.9999990388718089, iteration: 126123
loss: 0.9761354923248291,grad_norm: 0.9570721335865892, iteration: 126124
loss: 0.9931311011314392,grad_norm: 0.9999991758028528, iteration: 126125
loss: 0.9755769968032837,grad_norm: 0.9999990663842008, iteration: 126126
loss: 0.9859592914581299,grad_norm: 0.9999990044575288, iteration: 126127
loss: 0.9944005608558655,grad_norm: 0.9594086793210239, iteration: 126128
loss: 0.9848865270614624,grad_norm: 0.9999990771510651, iteration: 126129
loss: 1.0060533285140991,grad_norm: 0.9011542431454098, iteration: 126130
loss: 0.9771935343742371,grad_norm: 0.9347558016700269, iteration: 126131
loss: 0.9615460634231567,grad_norm: 0.9999990634313466, iteration: 126132
loss: 0.9916067719459534,grad_norm: 0.9715168828988358, iteration: 126133
loss: 0.9795329570770264,grad_norm: 0.8556308378116878, iteration: 126134
loss: 1.0005167722702026,grad_norm: 0.9999990081783723, iteration: 126135
loss: 0.9922100305557251,grad_norm: 0.9999992470754752, iteration: 126136
loss: 1.0965968370437622,grad_norm: 0.9999995132196966, iteration: 126137
loss: 1.0057069063186646,grad_norm: 0.9999991215639906, iteration: 126138
loss: 1.0424741506576538,grad_norm: 0.9999994693996177, iteration: 126139
loss: 1.018059492111206,grad_norm: 0.9551201154694461, iteration: 126140
loss: 0.9735375046730042,grad_norm: 0.9999991677084562, iteration: 126141
loss: 1.0164105892181396,grad_norm: 0.9999989579240891, iteration: 126142
loss: 0.9938393235206604,grad_norm: 0.9999991753547997, iteration: 126143
loss: 1.0162935256958008,grad_norm: 0.9866922153502543, iteration: 126144
loss: 0.9712387323379517,grad_norm: 0.9929251881768664, iteration: 126145
loss: 1.0149884223937988,grad_norm: 0.9999992474331891, iteration: 126146
loss: 1.043565273284912,grad_norm: 0.9999992291157411, iteration: 126147
loss: 1.0138099193572998,grad_norm: 0.9999990747893883, iteration: 126148
loss: 0.9886921048164368,grad_norm: 0.9739330145639649, iteration: 126149
loss: 0.9779050946235657,grad_norm: 0.9999992157497832, iteration: 126150
loss: 0.9853882193565369,grad_norm: 0.9999989821347375, iteration: 126151
loss: 1.010549783706665,grad_norm: 0.991200551763408, iteration: 126152
loss: 0.988371729850769,grad_norm: 0.9999992208389272, iteration: 126153
loss: 0.9878581762313843,grad_norm: 0.9999991571533751, iteration: 126154
loss: 1.0024150609970093,grad_norm: 0.9999991764607046, iteration: 126155
loss: 0.9771735668182373,grad_norm: 0.999999066401807, iteration: 126156
loss: 0.9775909781455994,grad_norm: 0.9415382264717536, iteration: 126157
loss: 0.9785358905792236,grad_norm: 0.9999989305978968, iteration: 126158
loss: 1.0224392414093018,grad_norm: 0.8566639295765628, iteration: 126159
loss: 0.9904757738113403,grad_norm: 0.9999991633449932, iteration: 126160
loss: 1.0171698331832886,grad_norm: 0.9999992467769968, iteration: 126161
loss: 0.9981871843338013,grad_norm: 0.999998907972272, iteration: 126162
loss: 1.0069844722747803,grad_norm: 0.9999991774479488, iteration: 126163
loss: 0.9615240097045898,grad_norm: 0.936148620219965, iteration: 126164
loss: 0.9949171543121338,grad_norm: 0.9999991952997154, iteration: 126165
loss: 1.0016357898712158,grad_norm: 0.9999989942616431, iteration: 126166
loss: 0.9980497360229492,grad_norm: 0.9098271776993623, iteration: 126167
loss: 1.0167417526245117,grad_norm: 0.9999991193362665, iteration: 126168
loss: 1.0460350513458252,grad_norm: 0.9999992505232087, iteration: 126169
loss: 0.9887128472328186,grad_norm: 0.9801320008030319, iteration: 126170
loss: 1.0193971395492554,grad_norm: 0.9999989290089254, iteration: 126171
loss: 1.035269021987915,grad_norm: 0.9999991954407434, iteration: 126172
loss: 1.0007954835891724,grad_norm: 0.999999049977223, iteration: 126173
loss: 0.9790123105049133,grad_norm: 0.999999149718779, iteration: 126174
loss: 1.00924551486969,grad_norm: 0.9999990618315867, iteration: 126175
loss: 1.0343531370162964,grad_norm: 0.9441983745856081, iteration: 126176
loss: 1.0088192224502563,grad_norm: 0.9688630741125334, iteration: 126177
loss: 1.0096101760864258,grad_norm: 0.9999990493738972, iteration: 126178
loss: 0.9807895421981812,grad_norm: 0.9999991474458266, iteration: 126179
loss: 1.0052624940872192,grad_norm: 0.8012477603812429, iteration: 126180
loss: 0.9767213463783264,grad_norm: 0.9999991092805317, iteration: 126181
loss: 0.9751285910606384,grad_norm: 0.9999990919748334, iteration: 126182
loss: 1.0112338066101074,grad_norm: 0.8790022982615597, iteration: 126183
loss: 1.0300017595291138,grad_norm: 0.9999989841101172, iteration: 126184
loss: 1.0150750875473022,grad_norm: 0.9999996185035844, iteration: 126185
loss: 0.9928916692733765,grad_norm: 0.9999993363228811, iteration: 126186
loss: 0.9709206223487854,grad_norm: 0.9884912183048242, iteration: 126187
loss: 0.9976759552955627,grad_norm: 0.9999991393220873, iteration: 126188
loss: 1.0020817518234253,grad_norm: 0.9999997749146161, iteration: 126189
loss: 1.0255718231201172,grad_norm: 0.9790096272977395, iteration: 126190
loss: 1.0143847465515137,grad_norm: 0.9977660756658538, iteration: 126191
loss: 1.0284926891326904,grad_norm: 0.9999991158658028, iteration: 126192
loss: 0.9381099343299866,grad_norm: 0.9934418710542857, iteration: 126193
loss: 0.993918776512146,grad_norm: 0.8522416236322109, iteration: 126194
loss: 0.9982444643974304,grad_norm: 0.9632991870753838, iteration: 126195
loss: 1.0110009908676147,grad_norm: 0.942632788208108, iteration: 126196
loss: 0.960677981376648,grad_norm: 0.999999076115748, iteration: 126197
loss: 1.1361026763916016,grad_norm: 0.9999997464431734, iteration: 126198
loss: 0.9933618307113647,grad_norm: 0.999999163465189, iteration: 126199
loss: 1.000450849533081,grad_norm: 0.9999991516831572, iteration: 126200
loss: 1.0035558938980103,grad_norm: 0.8758806508317643, iteration: 126201
loss: 1.0167088508605957,grad_norm: 0.9424995270738806, iteration: 126202
loss: 0.9982476234436035,grad_norm: 0.9753494895237083, iteration: 126203
loss: 0.9442710280418396,grad_norm: 0.9999992366865844, iteration: 126204
loss: 1.0082619190216064,grad_norm: 0.9999988689049171, iteration: 126205
loss: 0.968698263168335,grad_norm: 0.999999016634189, iteration: 126206
loss: 1.0058358907699585,grad_norm: 0.9999990746326584, iteration: 126207
loss: 1.0078374147415161,grad_norm: 0.898875207194458, iteration: 126208
loss: 0.9856101274490356,grad_norm: 0.9999991337622395, iteration: 126209
loss: 1.0181584358215332,grad_norm: 0.8683855905297021, iteration: 126210
loss: 0.9793829321861267,grad_norm: 0.9999990744801158, iteration: 126211
loss: 1.0140066146850586,grad_norm: 0.999999165794965, iteration: 126212
loss: 1.011800765991211,grad_norm: 0.9999991219173755, iteration: 126213
loss: 1.0064655542373657,grad_norm: 0.7432553719058517, iteration: 126214
loss: 1.0230765342712402,grad_norm: 0.9999997417737617, iteration: 126215
loss: 1.0057812929153442,grad_norm: 0.9999992514183579, iteration: 126216
loss: 1.0305486917495728,grad_norm: 0.985885143876986, iteration: 126217
loss: 0.9828252196311951,grad_norm: 0.9999989565705105, iteration: 126218
loss: 1.0254777669906616,grad_norm: 0.9999991701187844, iteration: 126219
loss: 0.9968692660331726,grad_norm: 0.9396690905997933, iteration: 126220
loss: 1.0239298343658447,grad_norm: 0.9999991898065119, iteration: 126221
loss: 1.0205403566360474,grad_norm: 0.9999991029150811, iteration: 126222
loss: 0.9788542985916138,grad_norm: 0.8593487250208178, iteration: 126223
loss: 1.012497901916504,grad_norm: 0.9423922278744941, iteration: 126224
loss: 1.0197558403015137,grad_norm: 0.9999990100422862, iteration: 126225
loss: 1.0057950019836426,grad_norm: 0.9999990971287434, iteration: 126226
loss: 1.0250169038772583,grad_norm: 0.9999991030195129, iteration: 126227
loss: 1.0101608037948608,grad_norm: 0.9999992814326366, iteration: 126228
loss: 1.033257007598877,grad_norm: 0.9999991856722641, iteration: 126229
loss: 1.0229543447494507,grad_norm: 0.9999994612968053, iteration: 126230
loss: 1.0401545763015747,grad_norm: 0.9999993185241145, iteration: 126231
loss: 0.9931885600090027,grad_norm: 0.9530007220585461, iteration: 126232
loss: 1.0094852447509766,grad_norm: 0.9999991206512602, iteration: 126233
loss: 0.9986462593078613,grad_norm: 0.9279619719533442, iteration: 126234
loss: 0.9954820871353149,grad_norm: 0.9539874838073169, iteration: 126235
loss: 1.0276824235916138,grad_norm: 0.9999992222790376, iteration: 126236
loss: 1.154131293296814,grad_norm: 0.9999999674525819, iteration: 126237
loss: 1.0208858251571655,grad_norm: 0.9945000285201607, iteration: 126238
loss: 1.0074131488800049,grad_norm: 0.9999989148985821, iteration: 126239
loss: 1.050576090812683,grad_norm: 0.9999991579063403, iteration: 126240
loss: 0.990949809551239,grad_norm: 0.935258947276463, iteration: 126241
loss: 0.9945873022079468,grad_norm: 0.9999990518315146, iteration: 126242
loss: 0.9860680103302002,grad_norm: 0.9999989985355673, iteration: 126243
loss: 0.9995183944702148,grad_norm: 0.8204313216452818, iteration: 126244
loss: 0.9855493307113647,grad_norm: 0.9841346785849538, iteration: 126245
loss: 1.0028716325759888,grad_norm: 0.9999991712266193, iteration: 126246
loss: 0.9903132319450378,grad_norm: 0.9007711234453525, iteration: 126247
loss: 1.0004534721374512,grad_norm: 0.9913142153239904, iteration: 126248
loss: 1.0191456079483032,grad_norm: 0.9999992058146916, iteration: 126249
loss: 0.9894788265228271,grad_norm: 0.8565883360746884, iteration: 126250
loss: 0.9890839457511902,grad_norm: 0.9957724685644674, iteration: 126251
loss: 1.0439356565475464,grad_norm: 0.9999990505231074, iteration: 126252
loss: 1.0063097476959229,grad_norm: 0.9948557336479457, iteration: 126253
loss: 1.0167795419692993,grad_norm: 0.9999991173237027, iteration: 126254
loss: 0.9889426231384277,grad_norm: 0.9999991387166913, iteration: 126255
loss: 1.0024055242538452,grad_norm: 0.99999905717905, iteration: 126256
loss: 0.993497371673584,grad_norm: 0.9999989721403639, iteration: 126257
loss: 0.99852055311203,grad_norm: 0.9999992180695159, iteration: 126258
loss: 0.990401029586792,grad_norm: 0.9125153478450244, iteration: 126259
loss: 0.9853850603103638,grad_norm: 0.9999991036465582, iteration: 126260
loss: 0.97145676612854,grad_norm: 0.986867581549698, iteration: 126261
loss: 1.0009822845458984,grad_norm: 0.9999989727886149, iteration: 126262
loss: 1.0300662517547607,grad_norm: 0.99999910948253, iteration: 126263
loss: 0.9995583295822144,grad_norm: 0.9999991087932487, iteration: 126264
loss: 0.9799293875694275,grad_norm: 0.9670454586607025, iteration: 126265
loss: 0.9949178695678711,grad_norm: 0.7996539773430451, iteration: 126266
loss: 0.988542914390564,grad_norm: 0.9999990384233813, iteration: 126267
loss: 1.001579999923706,grad_norm: 0.9559178260506662, iteration: 126268
loss: 1.0146654844284058,grad_norm: 0.9999990096470851, iteration: 126269
loss: 0.9465413689613342,grad_norm: 0.9837566782630122, iteration: 126270
loss: 1.0006166696548462,grad_norm: 0.9999990187518739, iteration: 126271
loss: 0.9679001569747925,grad_norm: 0.9999991445012102, iteration: 126272
loss: 0.9969378113746643,grad_norm: 0.9999992009677433, iteration: 126273
loss: 1.0037391185760498,grad_norm: 0.9504011790077295, iteration: 126274
loss: 0.9821231961250305,grad_norm: 0.9999990992002603, iteration: 126275
loss: 1.016777515411377,grad_norm: 0.8687688215794674, iteration: 126276
loss: 0.9727964401245117,grad_norm: 0.9999990537231024, iteration: 126277
loss: 0.9953837394714355,grad_norm: 0.9277570853090162, iteration: 126278
loss: 0.980332612991333,grad_norm: 0.9253182898391481, iteration: 126279
loss: 0.9920473098754883,grad_norm: 0.9692105073437424, iteration: 126280
loss: 1.01162850856781,grad_norm: 0.9055877406151145, iteration: 126281
loss: 1.00029718875885,grad_norm: 0.8996782684737886, iteration: 126282
loss: 1.0193904638290405,grad_norm: 0.9999994461241515, iteration: 126283
loss: 0.9745087623596191,grad_norm: 0.883912961299914, iteration: 126284
loss: 0.9799134731292725,grad_norm: 0.9691140806378494, iteration: 126285
loss: 0.9943034648895264,grad_norm: 0.9999990429727952, iteration: 126286
loss: 1.0547016859054565,grad_norm: 0.9625074614525427, iteration: 126287
loss: 0.9711886048316956,grad_norm: 0.9999989371204662, iteration: 126288
loss: 1.030328392982483,grad_norm: 0.9999990930670023, iteration: 126289
loss: 1.0003405809402466,grad_norm: 0.9999991565230729, iteration: 126290
loss: 0.9292848706245422,grad_norm: 0.9999991245907094, iteration: 126291
loss: 0.9903693795204163,grad_norm: 0.9999990864866872, iteration: 126292
loss: 1.0161408185958862,grad_norm: 0.9517916542862914, iteration: 126293
loss: 1.0126155614852905,grad_norm: 0.8096054154237036, iteration: 126294
loss: 0.9864124655723572,grad_norm: 0.8757071989918462, iteration: 126295
loss: 1.0193456411361694,grad_norm: 0.9999992447222176, iteration: 126296
loss: 1.0406843423843384,grad_norm: 0.9999991710891067, iteration: 126297
loss: 0.9993383884429932,grad_norm: 0.8681382597391013, iteration: 126298
loss: 0.9594601392745972,grad_norm: 0.9754824305527368, iteration: 126299
loss: 1.12616765499115,grad_norm: 0.9999996299908213, iteration: 126300
loss: 0.9898475408554077,grad_norm: 0.9999990772548639, iteration: 126301
loss: 1.0069609880447388,grad_norm: 0.9999992343790347, iteration: 126302
loss: 0.9625099301338196,grad_norm: 0.9999992668118082, iteration: 126303
loss: 0.9754136800765991,grad_norm: 0.9999991568654484, iteration: 126304
loss: 1.005448579788208,grad_norm: 0.9496415191836064, iteration: 126305
loss: 1.0017062425613403,grad_norm: 0.9264078042265582, iteration: 126306
loss: 0.9945854544639587,grad_norm: 0.9348737053908611, iteration: 126307
loss: 1.015426754951477,grad_norm: 0.9999991131162799, iteration: 126308
loss: 1.013088345527649,grad_norm: 0.9152845709294056, iteration: 126309
loss: 1.0192433595657349,grad_norm: 0.908935492632285, iteration: 126310
loss: 1.015297532081604,grad_norm: 0.9999990365706357, iteration: 126311
loss: 1.014214038848877,grad_norm: 0.9647462623869909, iteration: 126312
loss: 1.0276052951812744,grad_norm: 0.9999990834164832, iteration: 126313
loss: 0.977219820022583,grad_norm: 0.9516036772636746, iteration: 126314
loss: 0.9959786534309387,grad_norm: 0.9999988898748364, iteration: 126315
loss: 0.9927406907081604,grad_norm: 0.9999990916017726, iteration: 126316
loss: 0.9565671682357788,grad_norm: 0.9999990663304934, iteration: 126317
loss: 1.0019946098327637,grad_norm: 0.9740520781345731, iteration: 126318
loss: 1.000895619392395,grad_norm: 0.9999991867289644, iteration: 126319
loss: 0.9986642599105835,grad_norm: 0.9289128768989073, iteration: 126320
loss: 1.0024806261062622,grad_norm: 0.913060297938459, iteration: 126321
loss: 1.0332645177841187,grad_norm: 0.9842826258754197, iteration: 126322
loss: 0.9758694171905518,grad_norm: 0.8745125297879217, iteration: 126323
loss: 1.024337649345398,grad_norm: 0.9999991767675325, iteration: 126324
loss: 1.028857707977295,grad_norm: 0.9999990670993804, iteration: 126325
loss: 0.993383526802063,grad_norm: 0.9999989517797713, iteration: 126326
loss: 0.9638882279396057,grad_norm: 0.999999089069811, iteration: 126327
loss: 0.9789097905158997,grad_norm: 0.9999992009159063, iteration: 126328
loss: 1.1432805061340332,grad_norm: 0.9999996236461827, iteration: 126329
loss: 1.0095834732055664,grad_norm: 0.8378095398744714, iteration: 126330
loss: 0.9640344977378845,grad_norm: 0.9714902535200226, iteration: 126331
loss: 0.9976577758789062,grad_norm: 0.8766596342739413, iteration: 126332
loss: 0.9795127511024475,grad_norm: 0.9999991980515466, iteration: 126333
loss: 1.0033138990402222,grad_norm: 0.9999991241168629, iteration: 126334
loss: 0.9646899700164795,grad_norm: 0.9580229309083367, iteration: 126335
loss: 0.990018904209137,grad_norm: 0.9999991721825157, iteration: 126336
loss: 0.966538667678833,grad_norm: 0.9616299797319371, iteration: 126337
loss: 1.0112749338150024,grad_norm: 0.9999990711648007, iteration: 126338
loss: 1.0072089433670044,grad_norm: 0.9999991791005248, iteration: 126339
loss: 0.9550575017929077,grad_norm: 0.9157750630289712, iteration: 126340
loss: 1.0069926977157593,grad_norm: 0.9999991441201694, iteration: 126341
loss: 1.0185506343841553,grad_norm: 0.9999990204487084, iteration: 126342
loss: 0.9858831763267517,grad_norm: 0.9999990882613865, iteration: 126343
loss: 1.0210349559783936,grad_norm: 0.9999991495543079, iteration: 126344
loss: 1.018330454826355,grad_norm: 0.9999989961714978, iteration: 126345
loss: 1.0121009349822998,grad_norm: 0.9999991205579059, iteration: 126346
loss: 0.9702389240264893,grad_norm: 0.9947885801205236, iteration: 126347
loss: 1.0281611680984497,grad_norm: 0.9999997288611612, iteration: 126348
loss: 0.9932050704956055,grad_norm: 0.9999991863418287, iteration: 126349
loss: 0.974575400352478,grad_norm: 0.9999990606293048, iteration: 126350
loss: 1.0167292356491089,grad_norm: 0.9999998889652675, iteration: 126351
loss: 0.9847841262817383,grad_norm: 0.9926982367720303, iteration: 126352
loss: 0.9774746298789978,grad_norm: 0.9737097667875327, iteration: 126353
loss: 1.0118423700332642,grad_norm: 0.999999131066034, iteration: 126354
loss: 1.0055902004241943,grad_norm: 0.9680870918479992, iteration: 126355
loss: 1.0062918663024902,grad_norm: 0.9999991412599452, iteration: 126356
loss: 0.9658834934234619,grad_norm: 0.9913508303765645, iteration: 126357
loss: 1.029779076576233,grad_norm: 0.8924914905084702, iteration: 126358
loss: 0.993134617805481,grad_norm: 0.9999990098353257, iteration: 126359
loss: 0.9602212309837341,grad_norm: 0.9999992365294301, iteration: 126360
loss: 0.9875062704086304,grad_norm: 0.9999991120782599, iteration: 126361
loss: 0.9765418767929077,grad_norm: 0.8792906595234076, iteration: 126362
loss: 1.008404016494751,grad_norm: 0.8874302965740509, iteration: 126363
loss: 0.9986371397972107,grad_norm: 0.8523673406246896, iteration: 126364
loss: 1.0202797651290894,grad_norm: 0.9694608239242314, iteration: 126365
loss: 1.0693111419677734,grad_norm: 0.9524206574838859, iteration: 126366
loss: 0.9772632718086243,grad_norm: 0.9999990379500485, iteration: 126367
loss: 1.0072191953659058,grad_norm: 0.9857258336077352, iteration: 126368
loss: 1.022392749786377,grad_norm: 0.9999990733333036, iteration: 126369
loss: 0.9613472819328308,grad_norm: 0.9999991646508998, iteration: 126370
loss: 0.9422103762626648,grad_norm: 0.9999991815135594, iteration: 126371
loss: 0.9636070132255554,grad_norm: 0.9999990393259861, iteration: 126372
loss: 0.994206428527832,grad_norm: 0.897687426069134, iteration: 126373
loss: 0.9853405952453613,grad_norm: 0.8940221028841695, iteration: 126374
loss: 0.9672591686248779,grad_norm: 0.99999932379639, iteration: 126375
loss: 0.9959580898284912,grad_norm: 0.9082991935001259, iteration: 126376
loss: 1.0231915712356567,grad_norm: 0.9999990748891587, iteration: 126377
loss: 0.9835357666015625,grad_norm: 0.9999991420403689, iteration: 126378
loss: 0.9834107160568237,grad_norm: 0.9999992063607159, iteration: 126379
loss: 1.0743921995162964,grad_norm: 0.9999990796819258, iteration: 126380
loss: 1.041847825050354,grad_norm: 0.9999991215915539, iteration: 126381
loss: 1.0082874298095703,grad_norm: 0.9311087185435516, iteration: 126382
loss: 1.0389316082000732,grad_norm: 0.9999996355768065, iteration: 126383
loss: 0.9803069829940796,grad_norm: 0.9711084871023002, iteration: 126384
loss: 0.9580868482589722,grad_norm: 0.9999991904951356, iteration: 126385
loss: 1.0052722692489624,grad_norm: 0.9999994006062525, iteration: 126386
loss: 1.0241143703460693,grad_norm: 0.995946797365085, iteration: 126387
loss: 1.0112584829330444,grad_norm: 0.9999991699760006, iteration: 126388
loss: 0.9840909242630005,grad_norm: 0.9270644303802482, iteration: 126389
loss: 0.9614257216453552,grad_norm: 0.999999021640543, iteration: 126390
loss: 1.0275989770889282,grad_norm: 0.9999992085563647, iteration: 126391
loss: 0.9724125862121582,grad_norm: 0.9999990887639224, iteration: 126392
loss: 0.9902723431587219,grad_norm: 0.9999991021509171, iteration: 126393
loss: 0.9743706583976746,grad_norm: 0.9868046200479692, iteration: 126394
loss: 0.9875112771987915,grad_norm: 0.9358330652252392, iteration: 126395
loss: 0.9762341976165771,grad_norm: 0.9862019036439063, iteration: 126396
loss: 0.9616212844848633,grad_norm: 0.9999994004043756, iteration: 126397
loss: 1.0156409740447998,grad_norm: 0.9469978027652054, iteration: 126398
loss: 0.9920254349708557,grad_norm: 0.9999989499464323, iteration: 126399
loss: 1.036446452140808,grad_norm: 0.9999992174031551, iteration: 126400
loss: 0.9890808463096619,grad_norm: 0.9526229132190714, iteration: 126401
loss: 1.0148779153823853,grad_norm: 0.9999991603580762, iteration: 126402
loss: 0.9927579164505005,grad_norm: 0.8383259845425972, iteration: 126403
loss: 1.0299454927444458,grad_norm: 0.999999886558027, iteration: 126404
loss: 1.0149332284927368,grad_norm: 0.9264526837629409, iteration: 126405
loss: 1.0491317510604858,grad_norm: 0.9999992147478538, iteration: 126406
loss: 0.9981432557106018,grad_norm: 0.9999992378639896, iteration: 126407
loss: 0.9948667883872986,grad_norm: 0.9999990527600779, iteration: 126408
loss: 0.9860801100730896,grad_norm: 0.9999990471013367, iteration: 126409
loss: 1.0138142108917236,grad_norm: 0.9538363573377825, iteration: 126410
loss: 0.9723968505859375,grad_norm: 0.9999989313053133, iteration: 126411
loss: 1.0305006504058838,grad_norm: 0.9999991186699169, iteration: 126412
loss: 1.023653507232666,grad_norm: 0.8557835276356107, iteration: 126413
loss: 0.9889105558395386,grad_norm: 0.9999991933625683, iteration: 126414
loss: 1.030354380607605,grad_norm: 0.9999992790340603, iteration: 126415
loss: 0.9929111003875732,grad_norm: 0.9999991098817141, iteration: 126416
loss: 1.0206875801086426,grad_norm: 0.9999991652369113, iteration: 126417
loss: 1.0488260984420776,grad_norm: 0.9999997619995925, iteration: 126418
loss: 1.0360591411590576,grad_norm: 0.9999993515234574, iteration: 126419
loss: 1.0075008869171143,grad_norm: 0.9999991176684688, iteration: 126420
loss: 0.9989824891090393,grad_norm: 0.9999991382500358, iteration: 126421
loss: 0.9807971119880676,grad_norm: 0.9999990848416952, iteration: 126422
loss: 1.0481113195419312,grad_norm: 0.9999996923760637, iteration: 126423
loss: 1.000891923904419,grad_norm: 0.9660424118050372, iteration: 126424
loss: 1.0154722929000854,grad_norm: 0.999998998499336, iteration: 126425
loss: 1.0372538566589355,grad_norm: 0.9999991176512687, iteration: 126426
loss: 1.0739973783493042,grad_norm: 1.0000000023559208, iteration: 126427
loss: 1.0087286233901978,grad_norm: 0.9939252046907376, iteration: 126428
loss: 1.0216602087020874,grad_norm: 0.9999994062450047, iteration: 126429
loss: 1.0010426044464111,grad_norm: 0.9364842229081254, iteration: 126430
loss: 1.0443298816680908,grad_norm: 0.9999991625167066, iteration: 126431
loss: 0.9967057704925537,grad_norm: 0.9999990941270667, iteration: 126432
loss: 1.020125150680542,grad_norm: 0.963540694067636, iteration: 126433
loss: 1.05636727809906,grad_norm: 0.999998997982821, iteration: 126434
loss: 1.0842182636260986,grad_norm: 0.9999991145603861, iteration: 126435
loss: 0.971768319606781,grad_norm: 0.9999992935054527, iteration: 126436
loss: 1.0066245794296265,grad_norm: 0.8873350839519448, iteration: 126437
loss: 0.9921718239784241,grad_norm: 0.9999990179205923, iteration: 126438
loss: 0.9968575835227966,grad_norm: 0.9999992599734938, iteration: 126439
loss: 1.2513397932052612,grad_norm: 0.9999999269870306, iteration: 126440
loss: 1.2452914714813232,grad_norm: 0.999999544707029, iteration: 126441
loss: 1.0178720951080322,grad_norm: 0.9042081318170563, iteration: 126442
loss: 1.1937764883041382,grad_norm: 0.9999998106530562, iteration: 126443
loss: 1.0090012550354004,grad_norm: 0.9999990707537233, iteration: 126444
loss: 1.000789999961853,grad_norm: 0.9999999125757327, iteration: 126445
loss: 1.077882170677185,grad_norm: 0.9999995241745109, iteration: 126446
loss: 1.10873281955719,grad_norm: 0.9999991802836126, iteration: 126447
loss: 1.0244232416152954,grad_norm: 0.9999992023720635, iteration: 126448
loss: 1.0131560564041138,grad_norm: 0.9999991180522035, iteration: 126449
loss: 1.0588072538375854,grad_norm: 0.9999991198094378, iteration: 126450
loss: 1.0035181045532227,grad_norm: 0.9999994082447368, iteration: 126451
loss: 1.03985595703125,grad_norm: 0.9999996899157755, iteration: 126452
loss: 0.981536865234375,grad_norm: 0.9857027433612808, iteration: 126453
loss: 0.9647501111030579,grad_norm: 0.9999991465901941, iteration: 126454
loss: 1.0228114128112793,grad_norm: 0.9999991206919701, iteration: 126455
loss: 1.0162254571914673,grad_norm: 0.9999994841710934, iteration: 126456
loss: 0.9986269474029541,grad_norm: 0.9999990270901352, iteration: 126457
loss: 0.9987297654151917,grad_norm: 0.9999992933552675, iteration: 126458
loss: 0.990064799785614,grad_norm: 0.9999992246092604, iteration: 126459
loss: 1.0137717723846436,grad_norm: 0.9999992727708606, iteration: 126460
loss: 1.0809924602508545,grad_norm: 0.999999234336659, iteration: 126461
loss: 1.1220951080322266,grad_norm: 0.9999991438122897, iteration: 126462
loss: 1.0250948667526245,grad_norm: 0.9999992806355045, iteration: 126463
loss: 1.0133981704711914,grad_norm: 0.9999991236549542, iteration: 126464
loss: 0.9893553256988525,grad_norm: 0.9707931763946446, iteration: 126465
loss: 0.9987360239028931,grad_norm: 0.9646521181579806, iteration: 126466
loss: 0.9851381182670593,grad_norm: 0.9381783344080827, iteration: 126467
loss: 0.9858351349830627,grad_norm: 0.9550722889447486, iteration: 126468
loss: 0.9980689883232117,grad_norm: 0.9899138821169727, iteration: 126469
loss: 0.9876741170883179,grad_norm: 0.9999990752133835, iteration: 126470
loss: 0.9680358171463013,grad_norm: 0.9611959337557385, iteration: 126471
loss: 1.0241211652755737,grad_norm: 0.9999991534242587, iteration: 126472
loss: 0.9989855885505676,grad_norm: 0.9999995138904701, iteration: 126473
loss: 1.0581482648849487,grad_norm: 0.9999995438518364, iteration: 126474
loss: 1.0386615991592407,grad_norm: 0.9036767319829683, iteration: 126475
loss: 0.9767217636108398,grad_norm: 0.999999179478375, iteration: 126476
loss: 0.9885189533233643,grad_norm: 0.9999989348618714, iteration: 126477
loss: 1.0124672651290894,grad_norm: 0.9999990456709703, iteration: 126478
loss: 1.0149714946746826,grad_norm: 0.9004197624877017, iteration: 126479
loss: 1.000271201133728,grad_norm: 0.9986054021267491, iteration: 126480
loss: 0.9956556558609009,grad_norm: 0.9999991163245736, iteration: 126481
loss: 0.9702978730201721,grad_norm: 0.9999991056560852, iteration: 126482
loss: 0.9906767010688782,grad_norm: 0.9999991701270521, iteration: 126483
loss: 0.9859628677368164,grad_norm: 0.9999991045118829, iteration: 126484
loss: 1.0113569498062134,grad_norm: 0.9999992021645997, iteration: 126485
loss: 1.0038398504257202,grad_norm: 0.8671661182148201, iteration: 126486
loss: 1.021518349647522,grad_norm: 0.9759086529821208, iteration: 126487
loss: 1.0079940557479858,grad_norm: 0.9338188175497805, iteration: 126488
loss: 0.9899914860725403,grad_norm: 0.9999988937086938, iteration: 126489
loss: 0.9878008365631104,grad_norm: 0.9999990840849194, iteration: 126490
loss: 1.013197422027588,grad_norm: 0.9999990565922345, iteration: 126491
loss: 1.0453240871429443,grad_norm: 0.9004050554698508, iteration: 126492
loss: 0.9845020771026611,grad_norm: 0.9999991442121365, iteration: 126493
loss: 1.0114444494247437,grad_norm: 0.9999991834997686, iteration: 126494
loss: 1.002737045288086,grad_norm: 0.9584330784925669, iteration: 126495
loss: 1.0183944702148438,grad_norm: 0.9999992815645381, iteration: 126496
loss: 1.0141969919204712,grad_norm: 0.9349923878022016, iteration: 126497
loss: 1.0026551485061646,grad_norm: 0.8696792336984288, iteration: 126498
loss: 0.9971255660057068,grad_norm: 0.944776480268547, iteration: 126499
loss: 1.0284467935562134,grad_norm: 0.9999991377474695, iteration: 126500
loss: 1.0093411207199097,grad_norm: 0.9056753365652255, iteration: 126501
loss: 0.9621335864067078,grad_norm: 0.9305858523850308, iteration: 126502
loss: 1.0217822790145874,grad_norm: 0.9999992210269882, iteration: 126503
loss: 0.9831753969192505,grad_norm: 0.936234563170028, iteration: 126504
loss: 0.9670682549476624,grad_norm: 0.9999990252500841, iteration: 126505
loss: 1.0190553665161133,grad_norm: 0.9552169657602422, iteration: 126506
loss: 0.9748660326004028,grad_norm: 0.999999031496171, iteration: 126507
loss: 1.0908234119415283,grad_norm: 0.9999992793743995, iteration: 126508
loss: 0.9914201498031616,grad_norm: 0.9912242876425712, iteration: 126509
loss: 1.0130695104599,grad_norm: 0.999999521740985, iteration: 126510
loss: 0.9899768829345703,grad_norm: 0.9999991503762322, iteration: 126511
loss: 1.021708369255066,grad_norm: 0.9776486436024089, iteration: 126512
loss: 1.000755786895752,grad_norm: 0.9999991723543309, iteration: 126513
loss: 1.0177114009857178,grad_norm: 0.913459690453391, iteration: 126514
loss: 1.0297328233718872,grad_norm: 0.9999994225713188, iteration: 126515
loss: 0.9964932799339294,grad_norm: 0.9483507350812732, iteration: 126516
loss: 1.0118235349655151,grad_norm: 0.9999990565819978, iteration: 126517
loss: 0.9939674139022827,grad_norm: 0.9999991795633258, iteration: 126518
loss: 0.9973609447479248,grad_norm: 0.9895309371927415, iteration: 126519
loss: 1.0410736799240112,grad_norm: 0.9999990143897173, iteration: 126520
loss: 1.0111428499221802,grad_norm: 0.8733389118147551, iteration: 126521
loss: 0.9846439361572266,grad_norm: 0.9999991654043516, iteration: 126522
loss: 1.0251245498657227,grad_norm: 0.9999992475396262, iteration: 126523
loss: 1.0249545574188232,grad_norm: 0.9999991693308955, iteration: 126524
loss: 0.9952111840248108,grad_norm: 0.9253778365306282, iteration: 126525
loss: 0.9942396283149719,grad_norm: 0.9920968420876665, iteration: 126526
loss: 0.9815885424613953,grad_norm: 0.9943324343636467, iteration: 126527
loss: 0.9933101534843445,grad_norm: 0.9121569862768937, iteration: 126528
loss: 0.9724175930023193,grad_norm: 0.9808641486147803, iteration: 126529
loss: 0.9734971523284912,grad_norm: 0.9999992201990658, iteration: 126530
loss: 0.9615015983581543,grad_norm: 0.8315662245970336, iteration: 126531
loss: 1.0304678678512573,grad_norm: 0.9999992577674122, iteration: 126532
loss: 1.010254144668579,grad_norm: 0.999999174601457, iteration: 126533
loss: 0.987847089767456,grad_norm: 0.999999198318196, iteration: 126534
loss: 1.0055958032608032,grad_norm: 0.9999991229135639, iteration: 126535
loss: 0.9903389811515808,grad_norm: 0.9999990909154628, iteration: 126536
loss: 0.9564273357391357,grad_norm: 0.8379496943318783, iteration: 126537
loss: 1.0084013938903809,grad_norm: 0.9999990846977765, iteration: 126538
loss: 1.0301096439361572,grad_norm: 0.9999997943914564, iteration: 126539
loss: 0.9671171307563782,grad_norm: 0.8984444451903255, iteration: 126540
loss: 0.985683798789978,grad_norm: 0.9508036273785195, iteration: 126541
loss: 0.9968269467353821,grad_norm: 0.9999989978466205, iteration: 126542
loss: 1.0028516054153442,grad_norm: 0.9823435585962464, iteration: 126543
loss: 0.977371096611023,grad_norm: 0.9069076784843786, iteration: 126544
loss: 1.0291996002197266,grad_norm: 0.999999137360363, iteration: 126545
loss: 1.0074938535690308,grad_norm: 0.9492106672531949, iteration: 126546
loss: 0.9850402474403381,grad_norm: 0.8925039944492152, iteration: 126547
loss: 1.0036109685897827,grad_norm: 0.9999990813809275, iteration: 126548
loss: 0.9642966985702515,grad_norm: 0.8974090607334917, iteration: 126549
loss: 0.9614083170890808,grad_norm: 0.9999990041077051, iteration: 126550
loss: 1.0050345659255981,grad_norm: 0.9999991647489717, iteration: 126551
loss: 1.0138882398605347,grad_norm: 0.9999991566794952, iteration: 126552
loss: 0.983767032623291,grad_norm: 0.7337650079813294, iteration: 126553
loss: 0.965084433555603,grad_norm: 0.9999990436048605, iteration: 126554
loss: 1.0181639194488525,grad_norm: 0.9246213147714647, iteration: 126555
loss: 1.0389219522476196,grad_norm: 0.9999994631997052, iteration: 126556
loss: 0.9931181073188782,grad_norm: 0.9212389690250343, iteration: 126557
loss: 1.0410600900650024,grad_norm: 0.9999997873312825, iteration: 126558
loss: 0.9637247323989868,grad_norm: 0.9848811507476855, iteration: 126559
loss: 0.9772638082504272,grad_norm: 0.9999990405428018, iteration: 126560
loss: 1.0181937217712402,grad_norm: 0.9999990450066679, iteration: 126561
loss: 0.9792917370796204,grad_norm: 0.9999990093279612, iteration: 126562
loss: 1.0088469982147217,grad_norm: 0.9999992819560988, iteration: 126563
loss: 1.0164198875427246,grad_norm: 0.8961149507536189, iteration: 126564
loss: 0.9925061464309692,grad_norm: 0.8577522840473104, iteration: 126565
loss: 0.9994664192199707,grad_norm: 0.9999992422321345, iteration: 126566
loss: 1.0048937797546387,grad_norm: 0.999999087887116, iteration: 126567
loss: 1.0091084241867065,grad_norm: 0.8057955279755972, iteration: 126568
loss: 1.0113887786865234,grad_norm: 0.9999990699901323, iteration: 126569
loss: 0.9887042045593262,grad_norm: 0.8971313293678579, iteration: 126570
loss: 0.9924171566963196,grad_norm: 0.9999992614242093, iteration: 126571
loss: 0.9913427233695984,grad_norm: 0.9999991103984561, iteration: 126572
loss: 0.9760829210281372,grad_norm: 0.9999989690773492, iteration: 126573
loss: 1.0184016227722168,grad_norm: 0.9999992156280789, iteration: 126574
loss: 0.9938003420829773,grad_norm: 0.9972633779987417, iteration: 126575
loss: 0.9834579229354858,grad_norm: 0.9450649002086091, iteration: 126576
loss: 1.0369672775268555,grad_norm: 0.9999990721222901, iteration: 126577
loss: 1.0207796096801758,grad_norm: 0.9981622907806766, iteration: 126578
loss: 0.9960696697235107,grad_norm: 0.999999593894711, iteration: 126579
loss: 0.9933999180793762,grad_norm: 0.9999989811228542, iteration: 126580
loss: 1.0002353191375732,grad_norm: 0.9740378447804431, iteration: 126581
loss: 0.9940343499183655,grad_norm: 0.8446795088839956, iteration: 126582
loss: 0.9953091740608215,grad_norm: 0.9615883759109483, iteration: 126583
loss: 1.0152450799942017,grad_norm: 0.9848187472133717, iteration: 126584
loss: 1.034193754196167,grad_norm: 0.9313240029571029, iteration: 126585
loss: 1.0128588676452637,grad_norm: 0.9999991613382435, iteration: 126586
loss: 0.9866083264350891,grad_norm: 0.9999992705557965, iteration: 126587
loss: 0.9985783100128174,grad_norm: 0.9999992236867835, iteration: 126588
loss: 1.0028162002563477,grad_norm: 0.9999990734709615, iteration: 126589
loss: 0.978482186794281,grad_norm: 0.9473270861453184, iteration: 126590
loss: 0.9745620489120483,grad_norm: 0.9299609845239378, iteration: 126591
loss: 0.9995852112770081,grad_norm: 0.9999991437348356, iteration: 126592
loss: 0.9820352792739868,grad_norm: 0.8729082449053152, iteration: 126593
loss: 1.0012686252593994,grad_norm: 0.9094907912534191, iteration: 126594
loss: 0.9997707605361938,grad_norm: 0.9999995065522994, iteration: 126595
loss: 0.9819759130477905,grad_norm: 0.9610160772513918, iteration: 126596
loss: 1.0229582786560059,grad_norm: 0.9662067884119095, iteration: 126597
loss: 0.9797438979148865,grad_norm: 0.9875222935726636, iteration: 126598
loss: 1.0084669589996338,grad_norm: 0.999999128104276, iteration: 126599
loss: 0.9948023557662964,grad_norm: 0.9999991596627684, iteration: 126600
loss: 1.018052339553833,grad_norm: 0.9999991076396093, iteration: 126601
loss: 1.0341583490371704,grad_norm: 0.9999990495075254, iteration: 126602
loss: 1.0057764053344727,grad_norm: 0.9999990794151877, iteration: 126603
loss: 0.9732386469841003,grad_norm: 0.9999991132654933, iteration: 126604
loss: 1.0236985683441162,grad_norm: 0.999999201760504, iteration: 126605
loss: 1.0185291767120361,grad_norm: 0.9999991139163599, iteration: 126606
loss: 0.9940945506095886,grad_norm: 0.8992155182686611, iteration: 126607
loss: 0.978816032409668,grad_norm: 0.9999992811003635, iteration: 126608
loss: 0.9885828495025635,grad_norm: 0.9301879142686045, iteration: 126609
loss: 0.9854896664619446,grad_norm: 0.9818308640249348, iteration: 126610
loss: 0.9996253848075867,grad_norm: 0.9999991760255186, iteration: 126611
loss: 1.0047216415405273,grad_norm: 0.9190410731149499, iteration: 126612
loss: 0.9969430565834045,grad_norm: 0.9999991626121062, iteration: 126613
loss: 1.0517081022262573,grad_norm: 0.8956186117171545, iteration: 126614
loss: 1.0334641933441162,grad_norm: 0.974399059183064, iteration: 126615
loss: 1.0198471546173096,grad_norm: 0.9999991837554169, iteration: 126616
loss: 0.9816991090774536,grad_norm: 0.9999992214046606, iteration: 126617
loss: 0.9870914816856384,grad_norm: 0.9787981988762288, iteration: 126618
loss: 0.9570128321647644,grad_norm: 0.9700141463130626, iteration: 126619
loss: 1.006388783454895,grad_norm: 0.9999990744586791, iteration: 126620
loss: 0.9850160479545593,grad_norm: 0.9236006743159558, iteration: 126621
loss: 1.01514732837677,grad_norm: 0.8825499275242182, iteration: 126622
loss: 1.0090564489364624,grad_norm: 0.9876688131774427, iteration: 126623
loss: 1.0341339111328125,grad_norm: 0.8451230150873269, iteration: 126624
loss: 0.9794899225234985,grad_norm: 0.9999990759157974, iteration: 126625
loss: 0.9988548755645752,grad_norm: 0.9267608767982549, iteration: 126626
loss: 0.9818111062049866,grad_norm: 0.910960243269863, iteration: 126627
loss: 1.0141627788543701,grad_norm: 0.8573982301987358, iteration: 126628
loss: 0.984529972076416,grad_norm: 0.7898500826140961, iteration: 126629
loss: 0.9703105688095093,grad_norm: 0.9743119727393931, iteration: 126630
loss: 1.049286127090454,grad_norm: 0.9999995831229639, iteration: 126631
loss: 0.9693857431411743,grad_norm: 0.9282891799012931, iteration: 126632
loss: 0.9989801049232483,grad_norm: 0.9999992283660747, iteration: 126633
loss: 0.9771293997764587,grad_norm: 0.9999990160820095, iteration: 126634
loss: 0.9787483215332031,grad_norm: 0.9201505632970941, iteration: 126635
loss: 0.97562575340271,grad_norm: 0.9286883929015275, iteration: 126636
loss: 1.0127345323562622,grad_norm: 0.9999991106060278, iteration: 126637
loss: 1.007705807685852,grad_norm: 0.8989689683038674, iteration: 126638
loss: 0.9668256044387817,grad_norm: 0.9535947904040297, iteration: 126639
loss: 1.0030620098114014,grad_norm: 0.9848404925966746, iteration: 126640
loss: 0.978117823600769,grad_norm: 0.9999990020283175, iteration: 126641
loss: 1.019629955291748,grad_norm: 0.9583110535433661, iteration: 126642
loss: 0.9831846356391907,grad_norm: 0.9999992279534125, iteration: 126643
loss: 1.0363144874572754,grad_norm: 0.9999991608526629, iteration: 126644
loss: 1.0059123039245605,grad_norm: 0.9999992137882808, iteration: 126645
loss: 0.9993960857391357,grad_norm: 0.9783150257100267, iteration: 126646
loss: 0.9789186716079712,grad_norm: 0.982623732835367, iteration: 126647
loss: 1.0054287910461426,grad_norm: 0.9999990107269346, iteration: 126648
loss: 0.9933027029037476,grad_norm: 0.9999994229347152, iteration: 126649
loss: 0.9670932292938232,grad_norm: 0.9328934723890281, iteration: 126650
loss: 0.9838711619377136,grad_norm: 0.9999990839981967, iteration: 126651
loss: 0.9908467531204224,grad_norm: 0.9999990579293861, iteration: 126652
loss: 0.9782494306564331,grad_norm: 0.9589908154715082, iteration: 126653
loss: 0.9959562420845032,grad_norm: 0.999999239129522, iteration: 126654
loss: 0.9891632199287415,grad_norm: 0.9999990715153657, iteration: 126655
loss: 0.9256450533866882,grad_norm: 0.9999992432186834, iteration: 126656
loss: 0.9901090264320374,grad_norm: 0.9999990990132219, iteration: 126657
loss: 1.0045002698898315,grad_norm: 0.9999992270521141, iteration: 126658
loss: 0.9994203448295593,grad_norm: 0.9865644613858738, iteration: 126659
loss: 1.0016601085662842,grad_norm: 0.9999992766983045, iteration: 126660
loss: 1.0001996755599976,grad_norm: 0.9999991511913264, iteration: 126661
loss: 0.9717339277267456,grad_norm: 0.9999990715260718, iteration: 126662
loss: 1.0037686824798584,grad_norm: 0.9176726416354052, iteration: 126663
loss: 1.0032488107681274,grad_norm: 0.9428324250374815, iteration: 126664
loss: 0.9502280950546265,grad_norm: 0.8409865357568046, iteration: 126665
loss: 1.0032888650894165,grad_norm: 0.9189843913539167, iteration: 126666
loss: 1.0621806383132935,grad_norm: 0.9999991935349557, iteration: 126667
loss: 1.0158205032348633,grad_norm: 0.9999990586406573, iteration: 126668
loss: 0.9836029410362244,grad_norm: 0.9999990391970559, iteration: 126669
loss: 0.9903858304023743,grad_norm: 0.9999989776437993, iteration: 126670
loss: 0.9661256074905396,grad_norm: 0.947539347174986, iteration: 126671
loss: 1.0040043592453003,grad_norm: 0.9999998894845632, iteration: 126672
loss: 1.0197092294692993,grad_norm: 0.9999991896560992, iteration: 126673
loss: 1.0360724925994873,grad_norm: 0.9999991130563136, iteration: 126674
loss: 1.0422412157058716,grad_norm: 0.9999990841160323, iteration: 126675
loss: 0.9924036860466003,grad_norm: 0.9999991491374159, iteration: 126676
loss: 0.9929174184799194,grad_norm: 0.9999991477055501, iteration: 126677
loss: 0.9833644032478333,grad_norm: 0.9999991503187229, iteration: 126678
loss: 1.0024994611740112,grad_norm: 0.9772168916202602, iteration: 126679
loss: 0.9615316390991211,grad_norm: 0.9999992255765715, iteration: 126680
loss: 0.9971003532409668,grad_norm: 0.9778707219385968, iteration: 126681
loss: 1.0058798789978027,grad_norm: 0.9754881913802247, iteration: 126682
loss: 1.0395779609680176,grad_norm: 0.9999994736534425, iteration: 126683
loss: 1.0099048614501953,grad_norm: 0.9999991693377215, iteration: 126684
loss: 0.9547978639602661,grad_norm: 0.9999991862898884, iteration: 126685
loss: 0.9651223421096802,grad_norm: 0.9999991949223267, iteration: 126686
loss: 1.0295876264572144,grad_norm: 0.940250224069358, iteration: 126687
loss: 0.9861167669296265,grad_norm: 0.9999990021952881, iteration: 126688
loss: 1.0274959802627563,grad_norm: 0.9999999052789095, iteration: 126689
loss: 0.9630218148231506,grad_norm: 0.9999990846255767, iteration: 126690
loss: 1.0794285535812378,grad_norm: 0.9999994144129661, iteration: 126691
loss: 1.0022103786468506,grad_norm: 0.9999988931007706, iteration: 126692
loss: 0.9994091391563416,grad_norm: 0.9999991464041661, iteration: 126693
loss: 1.0206490755081177,grad_norm: 0.8951415086294494, iteration: 126694
loss: 1.02444326877594,grad_norm: 0.9220000149996539, iteration: 126695
loss: 0.9617616534233093,grad_norm: 0.9915380975008629, iteration: 126696
loss: 0.9884771108627319,grad_norm: 0.9999991343167343, iteration: 126697
loss: 0.9463422894477844,grad_norm: 0.9147527234881901, iteration: 126698
loss: 0.9577012062072754,grad_norm: 0.9716197534977525, iteration: 126699
loss: 0.9997552037239075,grad_norm: 0.9999990286126964, iteration: 126700
loss: 0.9764993786811829,grad_norm: 0.9999990030619932, iteration: 126701
loss: 0.9666142463684082,grad_norm: 0.9117245438285108, iteration: 126702
loss: 0.96841961145401,grad_norm: 0.9999989905805186, iteration: 126703
loss: 0.9912275671958923,grad_norm: 0.9999991973347135, iteration: 126704
loss: 1.0060226917266846,grad_norm: 0.7927473089264017, iteration: 126705
loss: 0.974191427230835,grad_norm: 0.9999990799481142, iteration: 126706
loss: 0.9718030095100403,grad_norm: 0.7923186811804677, iteration: 126707
loss: 1.0092402696609497,grad_norm: 0.9016883387177484, iteration: 126708
loss: 1.0140641927719116,grad_norm: 0.8219347346194019, iteration: 126709
loss: 0.99830561876297,grad_norm: 0.9978821987041177, iteration: 126710
loss: 1.0002318620681763,grad_norm: 0.9531551733082503, iteration: 126711
loss: 1.000187873840332,grad_norm: 0.9999990986342338, iteration: 126712
loss: 1.0192604064941406,grad_norm: 0.9999991087106309, iteration: 126713
loss: 1.0459275245666504,grad_norm: 0.9999989555709983, iteration: 126714
loss: 0.954990804195404,grad_norm: 0.9999990716085295, iteration: 126715
loss: 0.9702079892158508,grad_norm: 0.9999990275570997, iteration: 126716
loss: 0.9943848252296448,grad_norm: 0.8281280317713726, iteration: 126717
loss: 1.0026131868362427,grad_norm: 0.9002014853131471, iteration: 126718
loss: 0.9921440482139587,grad_norm: 0.9999991716607096, iteration: 126719
loss: 1.0576553344726562,grad_norm: 0.9857522413262721, iteration: 126720
loss: 0.9956575632095337,grad_norm: 0.9999989974736228, iteration: 126721
loss: 1.0397486686706543,grad_norm: 0.9618639838928275, iteration: 126722
loss: 1.01842200756073,grad_norm: 0.9103856169165282, iteration: 126723
loss: 0.9835023880004883,grad_norm: 0.7983714295100389, iteration: 126724
loss: 0.996881365776062,grad_norm: 0.9999990586163809, iteration: 126725
loss: 0.9597887992858887,grad_norm: 0.9451849281018851, iteration: 126726
loss: 1.0238144397735596,grad_norm: 0.9999991265624159, iteration: 126727
loss: 1.0207505226135254,grad_norm: 0.999999093580953, iteration: 126728
loss: 1.080828309059143,grad_norm: 0.999999658875287, iteration: 126729
loss: 1.0545624494552612,grad_norm: 0.9999990774619691, iteration: 126730
loss: 1.024754285812378,grad_norm: 0.8236534074241008, iteration: 126731
loss: 0.9993735551834106,grad_norm: 0.9999991341843819, iteration: 126732
loss: 0.9968040585517883,grad_norm: 0.9999991194881813, iteration: 126733
loss: 1.0278903245925903,grad_norm: 0.9999990550222668, iteration: 126734
loss: 0.9922411441802979,grad_norm: 0.9999992130483598, iteration: 126735
loss: 1.0125459432601929,grad_norm: 0.8718919286047094, iteration: 126736
loss: 1.009484052658081,grad_norm: 0.8960365560477643, iteration: 126737
loss: 0.9693418145179749,grad_norm: 0.8907084203417889, iteration: 126738
loss: 1.012831449508667,grad_norm: 0.9303003770080986, iteration: 126739
loss: 1.0324918031692505,grad_norm: 0.9999991679448432, iteration: 126740
loss: 1.0260599851608276,grad_norm: 0.999999239081404, iteration: 126741
loss: 0.9891894459724426,grad_norm: 0.9954379487232615, iteration: 126742
loss: 0.9646748900413513,grad_norm: 0.8804524849493394, iteration: 126743
loss: 1.04487943649292,grad_norm: 0.9999994432759092, iteration: 126744
loss: 0.9979223012924194,grad_norm: 0.9033549524688997, iteration: 126745
loss: 1.0211325883865356,grad_norm: 0.9999992502180319, iteration: 126746
loss: 0.9717631340026855,grad_norm: 0.9999991497036305, iteration: 126747
loss: 0.9897466897964478,grad_norm: 0.9986579284364557, iteration: 126748
loss: 0.9974257349967957,grad_norm: 0.8820978891172147, iteration: 126749
loss: 1.0015779733657837,grad_norm: 0.9815653940659002, iteration: 126750
loss: 0.9942609667778015,grad_norm: 0.8994665989985955, iteration: 126751
loss: 0.9817367196083069,grad_norm: 0.9745107086464596, iteration: 126752
loss: 1.0128954648971558,grad_norm: 0.9999991853022152, iteration: 126753
loss: 1.008471965789795,grad_norm: 0.9999992249664923, iteration: 126754
loss: 1.0280011892318726,grad_norm: 0.9902873413761906, iteration: 126755
loss: 1.018581748008728,grad_norm: 0.9999992258336239, iteration: 126756
loss: 1.0273404121398926,grad_norm: 0.9999991518171432, iteration: 126757
loss: 0.9864749908447266,grad_norm: 0.9999991195084268, iteration: 126758
loss: 0.9937780499458313,grad_norm: 0.9999991771921559, iteration: 126759
loss: 1.0039973258972168,grad_norm: 0.999999087191958, iteration: 126760
loss: 0.9890629053115845,grad_norm: 0.9999989990676516, iteration: 126761
loss: 1.013536810874939,grad_norm: 0.9665404768243805, iteration: 126762
loss: 0.970391035079956,grad_norm: 0.9999990550824751, iteration: 126763
loss: 1.0397192239761353,grad_norm: 0.9999990795553498, iteration: 126764
loss: 0.9695836901664734,grad_norm: 0.9999990873423773, iteration: 126765
loss: 1.0038315057754517,grad_norm: 0.9999990185939792, iteration: 126766
loss: 1.0036958456039429,grad_norm: 0.9999991032987391, iteration: 126767
loss: 1.0035996437072754,grad_norm: 0.9999990927324934, iteration: 126768
loss: 0.9806975722312927,grad_norm: 0.8825704546479388, iteration: 126769
loss: 1.0169050693511963,grad_norm: 0.992659241870893, iteration: 126770
loss: 0.9947872161865234,grad_norm: 0.9999989707187049, iteration: 126771
loss: 0.9890115261077881,grad_norm: 0.9999991404559347, iteration: 126772
loss: 0.9744893908500671,grad_norm: 0.9999991830526339, iteration: 126773
loss: 1.0085890293121338,grad_norm: 0.8763435526299902, iteration: 126774
loss: 1.124239206314087,grad_norm: 0.9999999661742742, iteration: 126775
loss: 0.9958176612854004,grad_norm: 0.9999990713212127, iteration: 126776
loss: 1.0007011890411377,grad_norm: 0.9494650965410476, iteration: 126777
loss: 0.979987621307373,grad_norm: 0.9913425868364116, iteration: 126778
loss: 1.004929542541504,grad_norm: 0.9368887117902919, iteration: 126779
loss: 1.0011926889419556,grad_norm: 0.9999992207701849, iteration: 126780
loss: 1.0351336002349854,grad_norm: 0.843054095374454, iteration: 126781
loss: 0.99002605676651,grad_norm: 0.9999991184602423, iteration: 126782
loss: 1.0049959421157837,grad_norm: 0.9525954848020086, iteration: 126783
loss: 0.9927176237106323,grad_norm: 0.9999990974989945, iteration: 126784
loss: 1.040840744972229,grad_norm: 0.9999992885737518, iteration: 126785
loss: 1.0361695289611816,grad_norm: 0.8656223074305983, iteration: 126786
loss: 0.9926086068153381,grad_norm: 0.9999991519569444, iteration: 126787
loss: 1.0032614469528198,grad_norm: 0.9944953160941589, iteration: 126788
loss: 1.0185497999191284,grad_norm: 0.9999989330508454, iteration: 126789
loss: 1.013949990272522,grad_norm: 0.9748403144747461, iteration: 126790
loss: 1.0032484531402588,grad_norm: 0.9999991770262411, iteration: 126791
loss: 0.9752011299133301,grad_norm: 0.9999990949497884, iteration: 126792
loss: 0.9850799441337585,grad_norm: 0.9331765915252297, iteration: 126793
loss: 0.9679197669029236,grad_norm: 0.9938114368158589, iteration: 126794
loss: 0.9560888409614563,grad_norm: 0.9999991226542538, iteration: 126795
loss: 1.0137203931808472,grad_norm: 0.9999995861403138, iteration: 126796
loss: 1.0075621604919434,grad_norm: 0.9999991581595857, iteration: 126797
loss: 0.9721776843070984,grad_norm: 0.9999989941047001, iteration: 126798
loss: 0.9538758993148804,grad_norm: 0.9861169012802374, iteration: 126799
loss: 0.9778481721878052,grad_norm: 0.9999990757420443, iteration: 126800
loss: 1.0319744348526,grad_norm: 0.9999992493118953, iteration: 126801
loss: 0.9987049102783203,grad_norm: 0.8812014556168232, iteration: 126802
loss: 1.012912392616272,grad_norm: 0.8025438654391392, iteration: 126803
loss: 1.0429633855819702,grad_norm: 0.9999991931004049, iteration: 126804
loss: 1.0472701787948608,grad_norm: 0.999999621446425, iteration: 126805
loss: 0.9916965961456299,grad_norm: 0.8604260064328517, iteration: 126806
loss: 0.9596081972122192,grad_norm: 0.9790103008356718, iteration: 126807
loss: 0.993809163570404,grad_norm: 0.9354178092507616, iteration: 126808
loss: 1.0031472444534302,grad_norm: 0.9876422331907752, iteration: 126809
loss: 0.9616673588752747,grad_norm: 0.9218747706265668, iteration: 126810
loss: 1.025713324546814,grad_norm: 0.9768478503098181, iteration: 126811
loss: 1.0720775127410889,grad_norm: 0.9999993967079374, iteration: 126812
loss: 1.0122445821762085,grad_norm: 0.9648371328663641, iteration: 126813
loss: 0.97988361120224,grad_norm: 0.9999991023589222, iteration: 126814
loss: 0.997439444065094,grad_norm: 0.9999992477519484, iteration: 126815
loss: 0.9781492352485657,grad_norm: 0.999999150308222, iteration: 126816
loss: 1.0068978071212769,grad_norm: 0.9999990166192801, iteration: 126817
loss: 0.9547204971313477,grad_norm: 0.99999896003533, iteration: 126818
loss: 1.0024025440216064,grad_norm: 0.945255345346062, iteration: 126819
loss: 1.0344724655151367,grad_norm: 0.9999992543544882, iteration: 126820
loss: 1.002211332321167,grad_norm: 0.9980805959759246, iteration: 126821
loss: 1.0285369157791138,grad_norm: 0.9406637514957658, iteration: 126822
loss: 0.9930099844932556,grad_norm: 0.8459608404937105, iteration: 126823
loss: 0.951155424118042,grad_norm: 0.9999992391794907, iteration: 126824
loss: 1.0034884214401245,grad_norm: 0.9999990852860303, iteration: 126825
loss: 0.9899929761886597,grad_norm: 0.9710787746227508, iteration: 126826
loss: 1.0010054111480713,grad_norm: 0.9999993093624493, iteration: 126827
loss: 1.039614200592041,grad_norm: 0.9999989975940636, iteration: 126828
loss: 0.9828642010688782,grad_norm: 0.9683722721675968, iteration: 126829
loss: 0.973996639251709,grad_norm: 0.9999992128042275, iteration: 126830
loss: 1.0186725854873657,grad_norm: 0.9999992009841673, iteration: 126831
loss: 0.9714926481246948,grad_norm: 0.9999993056722908, iteration: 126832
loss: 0.974176824092865,grad_norm: 0.9999991903180475, iteration: 126833
loss: 0.9771443009376526,grad_norm: 0.9999991864303553, iteration: 126834
loss: 1.0233228206634521,grad_norm: 0.9999991103766483, iteration: 126835
loss: 0.995840847492218,grad_norm: 0.9999991402041332, iteration: 126836
loss: 0.9753773212432861,grad_norm: 0.9558221752043596, iteration: 126837
loss: 1.000683069229126,grad_norm: 0.9999990591098618, iteration: 126838
loss: 0.9992018342018127,grad_norm: 0.9483413826662382, iteration: 126839
loss: 0.9714795351028442,grad_norm: 0.8607470025696308, iteration: 126840
loss: 0.9992833733558655,grad_norm: 0.9357339098142696, iteration: 126841
loss: 0.992476224899292,grad_norm: 0.9345017890008569, iteration: 126842
loss: 0.9863755702972412,grad_norm: 0.999999048703064, iteration: 126843
loss: 0.9596149921417236,grad_norm: 0.9999991626785624, iteration: 126844
loss: 0.9837712049484253,grad_norm: 0.9925253685598414, iteration: 126845
loss: 0.9881971478462219,grad_norm: 0.8658164107720603, iteration: 126846
loss: 1.0085430145263672,grad_norm: 0.9999992797082646, iteration: 126847
loss: 1.0102213621139526,grad_norm: 0.9999991822826014, iteration: 126848
loss: 1.009361982345581,grad_norm: 0.8442506051552953, iteration: 126849
loss: 0.9980093240737915,grad_norm: 0.9999992418422491, iteration: 126850
loss: 1.0245822668075562,grad_norm: 0.9914948250828362, iteration: 126851
loss: 1.0018428564071655,grad_norm: 0.965787009155679, iteration: 126852
loss: 0.9946921467781067,grad_norm: 0.9999990383857454, iteration: 126853
loss: 0.9874460697174072,grad_norm: 0.9999991606330867, iteration: 126854
loss: 0.9720793962478638,grad_norm: 0.9999993319300109, iteration: 126855
loss: 1.009668231010437,grad_norm: 0.9999990975188555, iteration: 126856
loss: 1.022866129875183,grad_norm: 0.9999991710422353, iteration: 126857
loss: 0.9760687947273254,grad_norm: 0.999999300052277, iteration: 126858
loss: 0.9745469689369202,grad_norm: 0.9999993643594496, iteration: 126859
loss: 0.9866737127304077,grad_norm: 0.9455519551586917, iteration: 126860
loss: 1.0191127061843872,grad_norm: 0.9170492154357776, iteration: 126861
loss: 1.0079292058944702,grad_norm: 0.9999991617432027, iteration: 126862
loss: 1.0053242444992065,grad_norm: 0.9672597988054797, iteration: 126863
loss: 0.9842870831489563,grad_norm: 0.9603163558029107, iteration: 126864
loss: 1.0076910257339478,grad_norm: 0.9999991046659884, iteration: 126865
loss: 1.0124907493591309,grad_norm: 0.9999989843159104, iteration: 126866
loss: 0.9916075468063354,grad_norm: 0.9999991611269828, iteration: 126867
loss: 1.0145074129104614,grad_norm: 0.8741438407011441, iteration: 126868
loss: 1.0208605527877808,grad_norm: 0.9999991183417305, iteration: 126869
loss: 1.0014679431915283,grad_norm: 0.9999992119536214, iteration: 126870
loss: 0.9944272637367249,grad_norm: 0.8048825331647976, iteration: 126871
loss: 0.971103847026825,grad_norm: 0.9999991978991524, iteration: 126872
loss: 0.9589186310768127,grad_norm: 0.9999989451922399, iteration: 126873
loss: 0.9636884331703186,grad_norm: 0.9324409374171252, iteration: 126874
loss: 0.9936437010765076,grad_norm: 0.9999990582480062, iteration: 126875
loss: 1.0331183671951294,grad_norm: 0.9855375900990566, iteration: 126876
loss: 1.0334292650222778,grad_norm: 0.9454197732951835, iteration: 126877
loss: 1.0018243789672852,grad_norm: 0.9999991624812065, iteration: 126878
loss: 1.0217961072921753,grad_norm: 0.9720487426550198, iteration: 126879
loss: 0.9990832209587097,grad_norm: 0.9426417151065358, iteration: 126880
loss: 1.0389198064804077,grad_norm: 0.9999991364165879, iteration: 126881
loss: 0.9773632287979126,grad_norm: 0.983613679438928, iteration: 126882
loss: 1.0228099822998047,grad_norm: 0.9999992012980058, iteration: 126883
loss: 1.008730411529541,grad_norm: 0.9959024346157426, iteration: 126884
loss: 1.003008484840393,grad_norm: 0.9999993039526105, iteration: 126885
loss: 1.0115513801574707,grad_norm: 0.952082227784235, iteration: 126886
loss: 1.0437499284744263,grad_norm: 0.9999992250280791, iteration: 126887
loss: 1.0321683883666992,grad_norm: 0.9930920994659871, iteration: 126888
loss: 0.9634582996368408,grad_norm: 0.9999991464524182, iteration: 126889
loss: 0.9842641353607178,grad_norm: 0.9659652457066653, iteration: 126890
loss: 1.0088937282562256,grad_norm: 0.9999991524969695, iteration: 126891
loss: 1.0229791402816772,grad_norm: 0.9999992408389208, iteration: 126892
loss: 1.0004359483718872,grad_norm: 0.9999990376101715, iteration: 126893
loss: 1.0121123790740967,grad_norm: 0.9999992449511822, iteration: 126894
loss: 0.9960618019104004,grad_norm: 0.8471821044973048, iteration: 126895
loss: 0.9987163543701172,grad_norm: 0.9999990075501882, iteration: 126896
loss: 0.9945812225341797,grad_norm: 0.9999991296200246, iteration: 126897
loss: 0.999719500541687,grad_norm: 0.9999992192693845, iteration: 126898
loss: 0.9840070009231567,grad_norm: 0.9528746690487107, iteration: 126899
loss: 1.0301856994628906,grad_norm: 0.9999991238097841, iteration: 126900
loss: 1.019635796546936,grad_norm: 0.9515185991365422, iteration: 126901
loss: 1.037407398223877,grad_norm: 0.9999991159250525, iteration: 126902
loss: 1.006572961807251,grad_norm: 0.9999990862133145, iteration: 126903
loss: 1.0174083709716797,grad_norm: 0.9999992021850705, iteration: 126904
loss: 0.9934465885162354,grad_norm: 0.9999991709734275, iteration: 126905
loss: 1.0329217910766602,grad_norm: 0.999999055715503, iteration: 126906
loss: 0.9986263513565063,grad_norm: 0.9999990825097883, iteration: 126907
loss: 1.0120587348937988,grad_norm: 0.9999990844974327, iteration: 126908
loss: 0.971102237701416,grad_norm: 0.9999991607614738, iteration: 126909
loss: 1.0060642957687378,grad_norm: 0.8856483858886832, iteration: 126910
loss: 1.004575490951538,grad_norm: 0.8688780314937623, iteration: 126911
loss: 1.0055443048477173,grad_norm: 0.9999989929141628, iteration: 126912
loss: 1.025952935218811,grad_norm: 0.9999992585793441, iteration: 126913
loss: 1.0184853076934814,grad_norm: 0.9999990550056682, iteration: 126914
loss: 0.9758753180503845,grad_norm: 0.9303072489511928, iteration: 126915
loss: 1.0257221460342407,grad_norm: 0.9999993091717571, iteration: 126916
loss: 0.9913084506988525,grad_norm: 0.9999990658133316, iteration: 126917
loss: 0.9706827402114868,grad_norm: 0.9999991165646043, iteration: 126918
loss: 0.9741050004959106,grad_norm: 0.9999990682376486, iteration: 126919
loss: 1.0179287195205688,grad_norm: 0.9999992362413904, iteration: 126920
loss: 1.0391331911087036,grad_norm: 0.9999994069700276, iteration: 126921
loss: 0.9758777022361755,grad_norm: 0.9999990815700922, iteration: 126922
loss: 0.9773749709129333,grad_norm: 0.9999990365202962, iteration: 126923
loss: 1.0024747848510742,grad_norm: 0.9999992891438508, iteration: 126924
loss: 0.9886307716369629,grad_norm: 0.9999992783357596, iteration: 126925
loss: 1.0038481950759888,grad_norm: 0.8889818863933797, iteration: 126926
loss: 1.0222501754760742,grad_norm: 0.9999992177933285, iteration: 126927
loss: 0.9712150692939758,grad_norm: 0.8973999653773765, iteration: 126928
loss: 0.9891618490219116,grad_norm: 0.8862644321641157, iteration: 126929
loss: 0.9702184200286865,grad_norm: 0.9999991538603643, iteration: 126930
loss: 0.996269702911377,grad_norm: 0.9481067255147599, iteration: 126931
loss: 1.1359786987304688,grad_norm: 0.9999993807382014, iteration: 126932
loss: 0.9721628427505493,grad_norm: 0.9999991104572321, iteration: 126933
loss: 1.0233983993530273,grad_norm: 0.9710082735983437, iteration: 126934
loss: 1.0187830924987793,grad_norm: 0.995328161569448, iteration: 126935
loss: 0.9970762133598328,grad_norm: 0.9879932487011445, iteration: 126936
loss: 1.0041656494140625,grad_norm: 0.9999989921958088, iteration: 126937
loss: 0.9882820248603821,grad_norm: 0.9999991776057839, iteration: 126938
loss: 1.0032942295074463,grad_norm: 0.8894675955929883, iteration: 126939
loss: 1.0247262716293335,grad_norm: 0.9999998010389578, iteration: 126940
loss: 1.0148203372955322,grad_norm: 0.9802063048021128, iteration: 126941
loss: 0.9815059900283813,grad_norm: 0.9999990340757069, iteration: 126942
loss: 0.9984838962554932,grad_norm: 0.9999991064442867, iteration: 126943
loss: 0.9989429116249084,grad_norm: 0.9999990667353662, iteration: 126944
loss: 1.0151716470718384,grad_norm: 0.9999990133820812, iteration: 126945
loss: 1.0362015962600708,grad_norm: 0.9999992399122617, iteration: 126946
loss: 0.9707000851631165,grad_norm: 0.8793062232291384, iteration: 126947
loss: 1.010520100593567,grad_norm: 0.9727719914821844, iteration: 126948
loss: 0.9913910031318665,grad_norm: 0.9999989855074294, iteration: 126949
loss: 1.031436562538147,grad_norm: 0.8379270837034227, iteration: 126950
loss: 0.9883601069450378,grad_norm: 0.9999990503055244, iteration: 126951
loss: 1.0169559717178345,grad_norm: 0.8713744355348884, iteration: 126952
loss: 0.9684985876083374,grad_norm: 0.9717996543457788, iteration: 126953
loss: 0.9656922817230225,grad_norm: 0.999999264590827, iteration: 126954
loss: 0.9927187561988831,grad_norm: 0.9999990039401861, iteration: 126955
loss: 0.9540367126464844,grad_norm: 0.9672250981118657, iteration: 126956
loss: 0.9826934933662415,grad_norm: 0.9999991035036246, iteration: 126957
loss: 1.0014150142669678,grad_norm: 0.9999991461685829, iteration: 126958
loss: 1.0086207389831543,grad_norm: 0.9400230864950029, iteration: 126959
loss: 1.0269227027893066,grad_norm: 0.9999991361800963, iteration: 126960
loss: 0.9739536643028259,grad_norm: 0.9999989969797739, iteration: 126961
loss: 1.0402510166168213,grad_norm: 0.9420920366132461, iteration: 126962
loss: 1.0094815492630005,grad_norm: 0.9390758525734275, iteration: 126963
loss: 1.0348117351531982,grad_norm: 0.9999991737342935, iteration: 126964
loss: 1.0030921697616577,grad_norm: 0.9709758643525223, iteration: 126965
loss: 1.0284093618392944,grad_norm: 0.8939286921905496, iteration: 126966
loss: 0.9705076813697815,grad_norm: 0.9999990833872675, iteration: 126967
loss: 0.9905108213424683,grad_norm: 0.999999265564499, iteration: 126968
loss: 1.0156359672546387,grad_norm: 0.896645372286238, iteration: 126969
loss: 1.039682149887085,grad_norm: 0.9999990572216646, iteration: 126970
loss: 0.9947428107261658,grad_norm: 0.9753863497683884, iteration: 126971
loss: 0.9803023934364319,grad_norm: 0.928849555647974, iteration: 126972
loss: 0.9640935659408569,grad_norm: 0.929744330671115, iteration: 126973
loss: 1.020040512084961,grad_norm: 0.9999993413578341, iteration: 126974
loss: 1.0150266885757446,grad_norm: 0.9999989753443177, iteration: 126975
loss: 1.0048291683197021,grad_norm: 0.9742561360648339, iteration: 126976
loss: 0.9801357984542847,grad_norm: 0.9999991155079831, iteration: 126977
loss: 1.012954831123352,grad_norm: 0.9999989777832422, iteration: 126978
loss: 1.0380444526672363,grad_norm: 0.9999992330955734, iteration: 126979
loss: 1.0342174768447876,grad_norm: 0.9999991486590676, iteration: 126980
loss: 1.0170818567276,grad_norm: 0.9999989709908518, iteration: 126981
loss: 1.0199992656707764,grad_norm: 0.8864623790853365, iteration: 126982
loss: 0.9702745676040649,grad_norm: 0.9828729927955763, iteration: 126983
loss: 0.977986216545105,grad_norm: 0.9999991928679787, iteration: 126984
loss: 0.979827880859375,grad_norm: 0.9999990889405451, iteration: 126985
loss: 1.0275897979736328,grad_norm: 0.9233344366644124, iteration: 126986
loss: 1.0257487297058105,grad_norm: 0.9999990202644392, iteration: 126987
loss: 0.9930351376533508,grad_norm: 0.987086960600881, iteration: 126988
loss: 0.9773576259613037,grad_norm: 0.950955621095465, iteration: 126989
loss: 1.0381691455841064,grad_norm: 0.9195610614850879, iteration: 126990
loss: 0.9922406673431396,grad_norm: 0.9154623331340206, iteration: 126991
loss: 0.983497142791748,grad_norm: 0.9999991366169317, iteration: 126992
loss: 0.9704920649528503,grad_norm: 0.9999989928342267, iteration: 126993
loss: 1.0264090299606323,grad_norm: 0.9999992224293872, iteration: 126994
loss: 0.9872526526451111,grad_norm: 0.9191777464949894, iteration: 126995
loss: 0.973882257938385,grad_norm: 0.9868298726081304, iteration: 126996
loss: 1.0328102111816406,grad_norm: 0.9999997664897642, iteration: 126997
loss: 1.0141105651855469,grad_norm: 0.9365022491441062, iteration: 126998
loss: 0.995096743106842,grad_norm: 0.9955206011698605, iteration: 126999
loss: 1.0232247114181519,grad_norm: 0.9999990328094686, iteration: 127000
loss: 0.9930728673934937,grad_norm: 0.9999991839865298, iteration: 127001
loss: 1.0067046880722046,grad_norm: 0.9602085168377693, iteration: 127002
loss: 1.0185410976409912,grad_norm: 0.9704396526792146, iteration: 127003
loss: 1.00750732421875,grad_norm: 0.9999991274687705, iteration: 127004
loss: 0.9621095061302185,grad_norm: 0.9999990052395454, iteration: 127005
loss: 0.9940592050552368,grad_norm: 0.9999990364314625, iteration: 127006
loss: 0.9865387678146362,grad_norm: 0.958895429910025, iteration: 127007
loss: 1.021439552307129,grad_norm: 0.9026679924331679, iteration: 127008
loss: 1.0249085426330566,grad_norm: 0.9999991562656926, iteration: 127009
loss: 1.0132060050964355,grad_norm: 0.999999256106668, iteration: 127010
loss: 0.9794017672538757,grad_norm: 0.9923623541725848, iteration: 127011
loss: 0.9923413395881653,grad_norm: 0.9996920645127189, iteration: 127012
loss: 0.9968107342720032,grad_norm: 0.9380176175678054, iteration: 127013
loss: 0.9999465942382812,grad_norm: 0.9999990731510137, iteration: 127014
loss: 0.9597710371017456,grad_norm: 0.9999992786118183, iteration: 127015
loss: 1.0118228197097778,grad_norm: 0.8519311131874717, iteration: 127016
loss: 0.9834728837013245,grad_norm: 0.9999990933826016, iteration: 127017
loss: 0.9881924986839294,grad_norm: 0.913412720355711, iteration: 127018
loss: 0.9990300536155701,grad_norm: 0.9999990999488814, iteration: 127019
loss: 1.0196938514709473,grad_norm: 0.962307172520702, iteration: 127020
loss: 1.02065110206604,grad_norm: 0.9822594783953439, iteration: 127021
loss: 1.026691198348999,grad_norm: 0.9999991719707847, iteration: 127022
loss: 0.9720988273620605,grad_norm: 0.9025237192114836, iteration: 127023
loss: 0.9961534142494202,grad_norm: 0.9999990060964351, iteration: 127024
loss: 1.0632439851760864,grad_norm: 0.9999996741969609, iteration: 127025
loss: 0.9885233640670776,grad_norm: 0.9999991075878141, iteration: 127026
loss: 0.9945887923240662,grad_norm: 0.895563554464864, iteration: 127027
loss: 0.981722354888916,grad_norm: 0.9999992456725905, iteration: 127028
loss: 1.0161280632019043,grad_norm: 0.9601703614518441, iteration: 127029
loss: 1.0104506015777588,grad_norm: 0.9239699013127346, iteration: 127030
loss: 0.9885743260383606,grad_norm: 0.9999989933558528, iteration: 127031
loss: 0.9976485967636108,grad_norm: 0.9071029022663315, iteration: 127032
loss: 0.9599886536598206,grad_norm: 0.9749166823756622, iteration: 127033
loss: 0.9665767550468445,grad_norm: 0.9999991859640901, iteration: 127034
loss: 1.0368914604187012,grad_norm: 0.8932297250651431, iteration: 127035
loss: 1.001208782196045,grad_norm: 0.9999991861110463, iteration: 127036
loss: 0.9954071044921875,grad_norm: 0.9999991639996854, iteration: 127037
loss: 1.0057759284973145,grad_norm: 0.8928990323424789, iteration: 127038
loss: 0.9775800108909607,grad_norm: 0.9999991409002282, iteration: 127039
loss: 0.9679784774780273,grad_norm: 0.9660772414941949, iteration: 127040
loss: 1.0299427509307861,grad_norm: 0.8761875036411139, iteration: 127041
loss: 0.9771736264228821,grad_norm: 0.9786381969290592, iteration: 127042
loss: 1.0058372020721436,grad_norm: 0.966745785822468, iteration: 127043
loss: 0.9808799028396606,grad_norm: 0.9999990421682422, iteration: 127044
loss: 1.0012954473495483,grad_norm: 0.9999991133066019, iteration: 127045
loss: 0.9643058776855469,grad_norm: 0.9090345768310766, iteration: 127046
loss: 1.0129321813583374,grad_norm: 0.8301929826077312, iteration: 127047
loss: 1.0105606317520142,grad_norm: 0.8885361878032677, iteration: 127048
loss: 1.0026707649230957,grad_norm: 0.9850079663365423, iteration: 127049
loss: 0.9648412466049194,grad_norm: 0.9999992839719998, iteration: 127050
loss: 0.9868236780166626,grad_norm: 0.9754968103557347, iteration: 127051
loss: 0.9881105422973633,grad_norm: 0.9999991825005805, iteration: 127052
loss: 1.0037566423416138,grad_norm: 0.9999989894668541, iteration: 127053
loss: 1.0246291160583496,grad_norm: 0.9440381966285318, iteration: 127054
loss: 0.9998314380645752,grad_norm: 0.9999992635198838, iteration: 127055
loss: 1.0047203302383423,grad_norm: 0.9999991316563722, iteration: 127056
loss: 0.9781298637390137,grad_norm: 0.9984766976852493, iteration: 127057
loss: 1.0000104904174805,grad_norm: 0.9999990680756521, iteration: 127058
loss: 1.0062823295593262,grad_norm: 0.9039720218762467, iteration: 127059
loss: 1.0111372470855713,grad_norm: 0.932485030383793, iteration: 127060
loss: 1.002282977104187,grad_norm: 0.9999992774237365, iteration: 127061
loss: 0.9890334010124207,grad_norm: 0.8945168278728702, iteration: 127062
loss: 1.0106017589569092,grad_norm: 0.9999989887728322, iteration: 127063
loss: 1.0177648067474365,grad_norm: 0.9488921635026826, iteration: 127064
loss: 1.007325530052185,grad_norm: 0.9999988878909934, iteration: 127065
loss: 1.0044565200805664,grad_norm: 0.9347794317703297, iteration: 127066
loss: 0.9903948307037354,grad_norm: 0.9620275635881367, iteration: 127067
loss: 1.0073152780532837,grad_norm: 0.9795763152003939, iteration: 127068
loss: 1.0026803016662598,grad_norm: 0.9999990813515146, iteration: 127069
loss: 1.0182902812957764,grad_norm: 0.9999991939890254, iteration: 127070
loss: 0.9934766292572021,grad_norm: 0.999999186504918, iteration: 127071
loss: 0.9727336168289185,grad_norm: 0.9120451322169508, iteration: 127072
loss: 0.9954386353492737,grad_norm: 0.9999991559647073, iteration: 127073
loss: 0.9804463982582092,grad_norm: 0.9443238473649888, iteration: 127074
loss: 0.9705191850662231,grad_norm: 0.999999253209913, iteration: 127075
loss: 0.9965876340866089,grad_norm: 0.9999992162701753, iteration: 127076
loss: 1.0413243770599365,grad_norm: 0.9999991837223053, iteration: 127077
loss: 0.9905806183815002,grad_norm: 0.9999991579137881, iteration: 127078
loss: 0.9905255436897278,grad_norm: 0.9621807962462379, iteration: 127079
loss: 0.9915745854377747,grad_norm: 0.983859068395132, iteration: 127080
loss: 1.015118956565857,grad_norm: 0.9110986230275091, iteration: 127081
loss: 0.9835367798805237,grad_norm: 0.9999991580612574, iteration: 127082
loss: 0.977902889251709,grad_norm: 0.9999992063422185, iteration: 127083
loss: 0.9923381805419922,grad_norm: 0.8874341572146004, iteration: 127084
loss: 1.0069825649261475,grad_norm: 0.8857212397006483, iteration: 127085
loss: 0.9889747500419617,grad_norm: 0.9867207524902483, iteration: 127086
loss: 1.029018759727478,grad_norm: 0.9961818129551897, iteration: 127087
loss: 1.0285882949829102,grad_norm: 0.9999989761656938, iteration: 127088
loss: 0.9734440445899963,grad_norm: 0.9236553103411037, iteration: 127089
loss: 0.9959230422973633,grad_norm: 0.9226070957484777, iteration: 127090
loss: 0.9419105052947998,grad_norm: 0.9999992362929635, iteration: 127091
loss: 0.9948533177375793,grad_norm: 0.8695160296492165, iteration: 127092
loss: 1.0144214630126953,grad_norm: 0.89809690405035, iteration: 127093
loss: 1.007721185684204,grad_norm: 0.9397941035897239, iteration: 127094
loss: 0.9675458669662476,grad_norm: 0.8859938228514919, iteration: 127095
loss: 0.9874728918075562,grad_norm: 0.9967718863170835, iteration: 127096
loss: 0.9803543090820312,grad_norm: 0.975254385342568, iteration: 127097
loss: 1.0077563524246216,grad_norm: 0.9999990303760024, iteration: 127098
loss: 1.023281216621399,grad_norm: 0.9999990690313499, iteration: 127099
loss: 0.9759820699691772,grad_norm: 0.9999991092787114, iteration: 127100
loss: 0.9624754190444946,grad_norm: 0.9254950719610324, iteration: 127101
loss: 1.0493053197860718,grad_norm: 0.9999996599535085, iteration: 127102
loss: 0.9745410084724426,grad_norm: 0.8748991975456444, iteration: 127103
loss: 1.0057313442230225,grad_norm: 0.9856035088933142, iteration: 127104
loss: 0.9873003363609314,grad_norm: 0.8182004001673713, iteration: 127105
loss: 0.9970046877861023,grad_norm: 0.9651182817418095, iteration: 127106
loss: 1.0023189783096313,grad_norm: 0.9999991954726039, iteration: 127107
loss: 1.0175484418869019,grad_norm: 0.9999991155581518, iteration: 127108
loss: 0.9673564434051514,grad_norm: 0.9999991637093808, iteration: 127109
loss: 1.0039271116256714,grad_norm: 0.9940682632450912, iteration: 127110
loss: 0.9589831829071045,grad_norm: 0.9194621053821768, iteration: 127111
loss: 0.9965286254882812,grad_norm: 0.999999202913131, iteration: 127112
loss: 0.9869256615638733,grad_norm: 0.9999991494144707, iteration: 127113
loss: 0.9800230860710144,grad_norm: 0.9999992218847794, iteration: 127114
loss: 0.9933754801750183,grad_norm: 0.9999992367980858, iteration: 127115
loss: 1.0374683141708374,grad_norm: 0.9999992416507697, iteration: 127116
loss: 1.020253300666809,grad_norm: 0.8904456197047402, iteration: 127117
loss: 1.0074900388717651,grad_norm: 0.999999130345139, iteration: 127118
loss: 1.0151653289794922,grad_norm: 0.9999991162548937, iteration: 127119
loss: 0.9621895551681519,grad_norm: 0.8674763116902476, iteration: 127120
loss: 1.0280489921569824,grad_norm: 0.9999990097073382, iteration: 127121
loss: 1.0390491485595703,grad_norm: 0.879861206803769, iteration: 127122
loss: 0.9970910549163818,grad_norm: 0.8666455201882106, iteration: 127123
loss: 1.0152089595794678,grad_norm: 0.9999992475084725, iteration: 127124
loss: 1.0090404748916626,grad_norm: 0.9999997551881599, iteration: 127125
loss: 1.012724757194519,grad_norm: 0.9999993099792456, iteration: 127126
loss: 0.9978209137916565,grad_norm: 0.7858040023615318, iteration: 127127
loss: 1.0047295093536377,grad_norm: 0.9999993262794352, iteration: 127128
loss: 1.0125515460968018,grad_norm: 0.9333591738080284, iteration: 127129
loss: 0.9900195598602295,grad_norm: 0.9999990383314376, iteration: 127130
loss: 0.9995770454406738,grad_norm: 0.9999992057499492, iteration: 127131
loss: 1.006455898284912,grad_norm: 0.9999995130098666, iteration: 127132
loss: 0.9859777688980103,grad_norm: 0.999998990524911, iteration: 127133
loss: 1.0107698440551758,grad_norm: 0.9999991069767952, iteration: 127134
loss: 1.0306590795516968,grad_norm: 0.9321356676032283, iteration: 127135
loss: 1.0260803699493408,grad_norm: 0.9999990257845194, iteration: 127136
loss: 1.0078997611999512,grad_norm: 0.9797135489327151, iteration: 127137
loss: 1.076341152191162,grad_norm: 0.9999990871014817, iteration: 127138
loss: 0.9870354533195496,grad_norm: 0.99999899675925, iteration: 127139
loss: 1.0005438327789307,grad_norm: 0.9999990654753124, iteration: 127140
loss: 0.9739089012145996,grad_norm: 0.999999220886239, iteration: 127141
loss: 1.0081769227981567,grad_norm: 0.9737505485789496, iteration: 127142
loss: 1.014729380607605,grad_norm: 0.903726618954764, iteration: 127143
loss: 0.9714509844779968,grad_norm: 0.9613508915112401, iteration: 127144
loss: 1.0087593793869019,grad_norm: 0.9999991237855475, iteration: 127145
loss: 1.023431420326233,grad_norm: 0.9752640938330365, iteration: 127146
loss: 1.039716124534607,grad_norm: 0.9999991798644484, iteration: 127147
loss: 0.9624494314193726,grad_norm: 0.9721976646725564, iteration: 127148
loss: 0.9793551564216614,grad_norm: 0.9839087984014135, iteration: 127149
loss: 0.937278687953949,grad_norm: 0.9374683060044076, iteration: 127150
loss: 0.9550381302833557,grad_norm: 0.9999992461063929, iteration: 127151
loss: 1.0276039838790894,grad_norm: 0.9999990038410286, iteration: 127152
loss: 1.004859209060669,grad_norm: 0.9999990817701062, iteration: 127153
loss: 0.982387125492096,grad_norm: 0.9999992125539275, iteration: 127154
loss: 0.9764438271522522,grad_norm: 0.9999990725156715, iteration: 127155
loss: 1.0145456790924072,grad_norm: 0.9999990675734829, iteration: 127156
loss: 0.9907280802726746,grad_norm: 0.9597924611124596, iteration: 127157
loss: 1.0042743682861328,grad_norm: 0.9751869898725773, iteration: 127158
loss: 0.9721971750259399,grad_norm: 0.9819678051788957, iteration: 127159
loss: 0.9444352388381958,grad_norm: 0.9999991448948834, iteration: 127160
loss: 1.0326080322265625,grad_norm: 0.9215737507613777, iteration: 127161
loss: 0.9950495958328247,grad_norm: 0.96081419024912, iteration: 127162
loss: 0.9646629095077515,grad_norm: 0.9999992337527672, iteration: 127163
loss: 1.0107002258300781,grad_norm: 0.7658243498365381, iteration: 127164
loss: 0.9684154391288757,grad_norm: 0.9357855249660938, iteration: 127165
loss: 1.0207040309906006,grad_norm: 0.9954337635237662, iteration: 127166
loss: 1.0769526958465576,grad_norm: 0.9999990842857253, iteration: 127167
loss: 1.006987452507019,grad_norm: 0.9999991861337503, iteration: 127168
loss: 0.970054566860199,grad_norm: 0.9999991443942092, iteration: 127169
loss: 0.9585196375846863,grad_norm: 0.9208112730947525, iteration: 127170
loss: 1.0282585620880127,grad_norm: 0.9999992072626602, iteration: 127171
loss: 0.986454963684082,grad_norm: 0.9671298297393757, iteration: 127172
loss: 0.9881547689437866,grad_norm: 0.9999991476426352, iteration: 127173
loss: 0.9982020258903503,grad_norm: 0.9999990498983343, iteration: 127174
loss: 1.0533447265625,grad_norm: 0.9999992185674078, iteration: 127175
loss: 0.9736434817314148,grad_norm: 0.9751639598754577, iteration: 127176
loss: 1.0003604888916016,grad_norm: 0.8910858443504778, iteration: 127177
loss: 1.015439748764038,grad_norm: 0.9999992445749352, iteration: 127178
loss: 0.9749135375022888,grad_norm: 0.9999990093884373, iteration: 127179
loss: 0.9997714161872864,grad_norm: 0.9999991859354316, iteration: 127180
loss: 0.981518030166626,grad_norm: 0.9999992274160896, iteration: 127181
loss: 0.979033350944519,grad_norm: 0.9999991732448473, iteration: 127182
loss: 0.99431973695755,grad_norm: 0.9999992911059798, iteration: 127183
loss: 1.0392067432403564,grad_norm: 0.9999995140565062, iteration: 127184
loss: 1.0006425380706787,grad_norm: 0.9934403222540534, iteration: 127185
loss: 0.9929265975952148,grad_norm: 0.9784849045269434, iteration: 127186
loss: 1.008429765701294,grad_norm: 0.9999991227661829, iteration: 127187
loss: 0.993719756603241,grad_norm: 0.9999990943004198, iteration: 127188
loss: 0.9996694922447205,grad_norm: 0.9999992614239664, iteration: 127189
loss: 1.0047606229782104,grad_norm: 0.913992934310084, iteration: 127190
loss: 1.0085103511810303,grad_norm: 0.9067454986071642, iteration: 127191
loss: 1.0015771389007568,grad_norm: 0.972405977914257, iteration: 127192
loss: 0.989406943321228,grad_norm: 0.9999993292818163, iteration: 127193
loss: 1.1628285646438599,grad_norm: 0.9999999829652751, iteration: 127194
loss: 0.9854956269264221,grad_norm: 0.9999991209435019, iteration: 127195
loss: 1.0149040222167969,grad_norm: 0.9917110405178363, iteration: 127196
loss: 0.9857727289199829,grad_norm: 0.9999990074324533, iteration: 127197
loss: 0.9848204255104065,grad_norm: 0.9801056146201585, iteration: 127198
loss: 0.9707211256027222,grad_norm: 0.9999990582790218, iteration: 127199
loss: 1.0024492740631104,grad_norm: 0.8731200704551133, iteration: 127200
loss: 1.0335242748260498,grad_norm: 0.9712056161640248, iteration: 127201
loss: 0.9752177596092224,grad_norm: 0.9731400386295477, iteration: 127202
loss: 1.0263738632202148,grad_norm: 0.999998991877539, iteration: 127203
loss: 0.9865362048149109,grad_norm: 0.9999994553108598, iteration: 127204
loss: 0.9799737930297852,grad_norm: 0.8901841202437414, iteration: 127205
loss: 0.9803746938705444,grad_norm: 0.9754433934266379, iteration: 127206
loss: 0.9514800906181335,grad_norm: 0.8842637950277507, iteration: 127207
loss: 0.9878583550453186,grad_norm: 0.9986638027535129, iteration: 127208
loss: 0.981080174446106,grad_norm: 0.9999991928864177, iteration: 127209
loss: 0.9973593354225159,grad_norm: 0.9999991624519933, iteration: 127210
loss: 1.0356040000915527,grad_norm: 0.999999313416616, iteration: 127211
loss: 0.9996948838233948,grad_norm: 0.9139540638308016, iteration: 127212
loss: 0.9756256937980652,grad_norm: 0.8955306580461242, iteration: 127213
loss: 1.026789665222168,grad_norm: 0.9999991611759216, iteration: 127214
loss: 1.0263088941574097,grad_norm: 0.9908260005871734, iteration: 127215
loss: 1.0108017921447754,grad_norm: 0.9999991048817758, iteration: 127216
loss: 0.9799832105636597,grad_norm: 0.918886935071109, iteration: 127217
loss: 1.0216164588928223,grad_norm: 0.9999990780274443, iteration: 127218
loss: 0.9929900765419006,grad_norm: 0.8563151169356631, iteration: 127219
loss: 1.0293478965759277,grad_norm: 0.9766066279003047, iteration: 127220
loss: 1.0044286251068115,grad_norm: 0.9803107515622325, iteration: 127221
loss: 1.011714220046997,grad_norm: 0.9999997953299105, iteration: 127222
loss: 0.9964737892150879,grad_norm: 0.9565421210520478, iteration: 127223
loss: 1.059694528579712,grad_norm: 0.9999995612401376, iteration: 127224
loss: 0.9903374314308167,grad_norm: 0.8698307412263738, iteration: 127225
loss: 0.996039092540741,grad_norm: 0.9999991270014996, iteration: 127226
loss: 1.021092176437378,grad_norm: 0.9999991739052071, iteration: 127227
loss: 0.9848797917366028,grad_norm: 0.9337652655061234, iteration: 127228
loss: 1.0227338075637817,grad_norm: 0.9999992439144118, iteration: 127229
loss: 1.0442081689834595,grad_norm: 0.9288106844141968, iteration: 127230
loss: 1.0154024362564087,grad_norm: 0.9999990366556347, iteration: 127231
loss: 1.0195378065109253,grad_norm: 0.9999991269478921, iteration: 127232
loss: 1.0511072874069214,grad_norm: 0.9999989891524567, iteration: 127233
loss: 1.036057472229004,grad_norm: 0.9999989121799634, iteration: 127234
loss: 1.0059503316879272,grad_norm: 0.999999028113944, iteration: 127235
loss: 1.0230860710144043,grad_norm: 0.999998949068143, iteration: 127236
loss: 0.9976921677589417,grad_norm: 0.9999989419059826, iteration: 127237
loss: 1.0080832242965698,grad_norm: 0.8934654290589834, iteration: 127238
loss: 1.003840446472168,grad_norm: 0.9999990652106691, iteration: 127239
loss: 1.0003652572631836,grad_norm: 0.9999991049195318, iteration: 127240
loss: 0.9898970127105713,grad_norm: 0.9999991182583597, iteration: 127241
loss: 0.9769436717033386,grad_norm: 0.9999991492754517, iteration: 127242
loss: 0.9919261336326599,grad_norm: 0.9999992386683386, iteration: 127243
loss: 0.9982975721359253,grad_norm: 0.9999991120986595, iteration: 127244
loss: 1.019477367401123,grad_norm: 0.9999990184772022, iteration: 127245
loss: 1.020280122756958,grad_norm: 0.999999009358531, iteration: 127246
loss: 0.9981421828269958,grad_norm: 0.9999991294597932, iteration: 127247
loss: 1.0182521343231201,grad_norm: 0.8562726585729825, iteration: 127248
loss: 1.0400148630142212,grad_norm: 0.9999991872796086, iteration: 127249
loss: 0.9934870004653931,grad_norm: 0.9999990253658267, iteration: 127250
loss: 0.9910944700241089,grad_norm: 0.8773612788082623, iteration: 127251
loss: 0.9871172308921814,grad_norm: 0.9999991242836126, iteration: 127252
loss: 1.0199846029281616,grad_norm: 0.999998962503463, iteration: 127253
loss: 1.0025413036346436,grad_norm: 0.9823067186540456, iteration: 127254
loss: 1.0000945329666138,grad_norm: 0.9999991071331085, iteration: 127255
loss: 0.9948570132255554,grad_norm: 0.9999990923456598, iteration: 127256
loss: 0.9790188074111938,grad_norm: 0.9999991285524389, iteration: 127257
loss: 0.9991458654403687,grad_norm: 0.9999989617813717, iteration: 127258
loss: 0.991407036781311,grad_norm: 0.887222053550192, iteration: 127259
loss: 0.9863011240959167,grad_norm: 0.9164933698512084, iteration: 127260
loss: 1.0116264820098877,grad_norm: 0.9151841603907548, iteration: 127261
loss: 0.9889255166053772,grad_norm: 0.980231069804321, iteration: 127262
loss: 0.9636168479919434,grad_norm: 0.885421155491549, iteration: 127263
loss: 0.9944448471069336,grad_norm: 0.9970450444788074, iteration: 127264
loss: 0.984036386013031,grad_norm: 0.9495125754068618, iteration: 127265
loss: 0.9906126856803894,grad_norm: 0.9999991284775676, iteration: 127266
loss: 0.9723750948905945,grad_norm: 0.9999989507303838, iteration: 127267
loss: 1.0162936449050903,grad_norm: 0.9999991408112446, iteration: 127268
loss: 0.9668879508972168,grad_norm: 0.9999990940187098, iteration: 127269
loss: 1.0207022428512573,grad_norm: 0.9185646298383668, iteration: 127270
loss: 1.0236526727676392,grad_norm: 0.9999993017908713, iteration: 127271
loss: 0.9959143996238708,grad_norm: 0.9999994883961268, iteration: 127272
loss: 1.0383614301681519,grad_norm: 0.999999814451094, iteration: 127273
loss: 0.9993341565132141,grad_norm: 0.9999990659830453, iteration: 127274
loss: 0.9746176600456238,grad_norm: 0.9999991896068385, iteration: 127275
loss: 1.0401992797851562,grad_norm: 0.8805451116616748, iteration: 127276
loss: 0.9873684048652649,grad_norm: 0.9999991099079159, iteration: 127277
loss: 1.031139850616455,grad_norm: 0.9999992407994754, iteration: 127278
loss: 0.9916048645973206,grad_norm: 0.9999991697163562, iteration: 127279
loss: 0.9988604784011841,grad_norm: 0.9999991390287245, iteration: 127280
loss: 1.0182037353515625,grad_norm: 0.9681850053960103, iteration: 127281
loss: 0.9976421594619751,grad_norm: 0.9999991794479985, iteration: 127282
loss: 1.003001093864441,grad_norm: 0.9797645633231737, iteration: 127283
loss: 1.021012306213379,grad_norm: 0.8793067071273544, iteration: 127284
loss: 1.0153242349624634,grad_norm: 0.9336713690205503, iteration: 127285
loss: 0.9927219748497009,grad_norm: 0.9999991766975601, iteration: 127286
loss: 0.9668353796005249,grad_norm: 0.9999991793337615, iteration: 127287
loss: 0.9631948471069336,grad_norm: 0.8964180575578008, iteration: 127288
loss: 1.0114611387252808,grad_norm: 0.9999991177528404, iteration: 127289
loss: 0.9978384971618652,grad_norm: 0.9999992890738226, iteration: 127290
loss: 0.9826130867004395,grad_norm: 0.9999993922326011, iteration: 127291
loss: 1.0014928579330444,grad_norm: 0.9436358568564572, iteration: 127292
loss: 0.9963237047195435,grad_norm: 0.9999991287287023, iteration: 127293
loss: 0.9858835339546204,grad_norm: 0.9999990422766588, iteration: 127294
loss: 0.9784523844718933,grad_norm: 0.9999991296226893, iteration: 127295
loss: 1.01158607006073,grad_norm: 0.9999994495950965, iteration: 127296
loss: 0.9987159371376038,grad_norm: 0.9999990588113207, iteration: 127297
loss: 0.9862921237945557,grad_norm: 0.9664917269124386, iteration: 127298
loss: 1.00737464427948,grad_norm: 0.999999122024394, iteration: 127299
loss: 1.0179822444915771,grad_norm: 0.8694670845579497, iteration: 127300
loss: 0.9949038028717041,grad_norm: 0.9999992080719393, iteration: 127301
loss: 0.9727886915206909,grad_norm: 0.806460465969869, iteration: 127302
loss: 1.0157806873321533,grad_norm: 0.935866421928281, iteration: 127303
loss: 1.0040124654769897,grad_norm: 0.99999910465757, iteration: 127304
loss: 0.9836815595626831,grad_norm: 0.820633996524607, iteration: 127305
loss: 1.0074760913848877,grad_norm: 0.9999991738465461, iteration: 127306
loss: 1.0105280876159668,grad_norm: 0.9522938051968296, iteration: 127307
loss: 1.0203773975372314,grad_norm: 0.9999993433316099, iteration: 127308
loss: 1.0017346143722534,grad_norm: 0.9999991413167916, iteration: 127309
loss: 1.017511248588562,grad_norm: 0.9999992971372915, iteration: 127310
loss: 0.9717572927474976,grad_norm: 0.9999994571012563, iteration: 127311
loss: 1.0430623292922974,grad_norm: 0.9999989855150162, iteration: 127312
loss: 1.0258394479751587,grad_norm: 0.9999993180796342, iteration: 127313
loss: 1.003308892250061,grad_norm: 0.918727716315688, iteration: 127314
loss: 0.9735538363456726,grad_norm: 0.9999990046494521, iteration: 127315
loss: 0.995969831943512,grad_norm: 0.9999991161531593, iteration: 127316
loss: 1.0152941942214966,grad_norm: 0.9999990895478253, iteration: 127317
loss: 1.0085264444351196,grad_norm: 0.9905279309686543, iteration: 127318
loss: 1.0110783576965332,grad_norm: 0.9999991031362381, iteration: 127319
loss: 0.9966269135475159,grad_norm: 0.9227240367279019, iteration: 127320
loss: 1.024419903755188,grad_norm: 0.9860456174850564, iteration: 127321
loss: 1.0040816068649292,grad_norm: 0.876399165918664, iteration: 127322
loss: 1.0518615245819092,grad_norm: 0.9264144405320237, iteration: 127323
loss: 0.9793951511383057,grad_norm: 0.9451151386107166, iteration: 127324
loss: 1.0103323459625244,grad_norm: 0.9864217762559113, iteration: 127325
loss: 0.9827268719673157,grad_norm: 0.9999990076233507, iteration: 127326
loss: 0.9726184010505676,grad_norm: 0.9999990780466805, iteration: 127327
loss: 0.9730289578437805,grad_norm: 0.8047044295923185, iteration: 127328
loss: 1.0194586515426636,grad_norm: 0.9999991189749009, iteration: 127329
loss: 0.9955446124076843,grad_norm: 0.9999990066622759, iteration: 127330
loss: 0.9891456961631775,grad_norm: 0.9999990462877285, iteration: 127331
loss: 0.9570245742797852,grad_norm: 0.9999991912963822, iteration: 127332
loss: 0.987038791179657,grad_norm: 0.9855770535776108, iteration: 127333
loss: 1.004077434539795,grad_norm: 0.9776362529132606, iteration: 127334
loss: 0.9833694696426392,grad_norm: 0.9215712343760166, iteration: 127335
loss: 0.9905377626419067,grad_norm: 0.9999991567619503, iteration: 127336
loss: 1.055920124053955,grad_norm: 0.9976424608538765, iteration: 127337
loss: 0.955178439617157,grad_norm: 0.9999991399785376, iteration: 127338
loss: 0.9768544435501099,grad_norm: 0.9418278547531981, iteration: 127339
loss: 0.9523313641548157,grad_norm: 0.9999989666516557, iteration: 127340
loss: 0.9750299453735352,grad_norm: 0.9124045268457659, iteration: 127341
loss: 1.0424060821533203,grad_norm: 0.999999169556858, iteration: 127342
loss: 1.0078033208847046,grad_norm: 0.9999992796340824, iteration: 127343
loss: 0.9901601672172546,grad_norm: 0.9999990811524954, iteration: 127344
loss: 1.0211979150772095,grad_norm: 0.9999991028341465, iteration: 127345
loss: 0.9409955143928528,grad_norm: 0.8320349833119834, iteration: 127346
loss: 0.9753475785255432,grad_norm: 0.9999990539968076, iteration: 127347
loss: 1.0037236213684082,grad_norm: 0.999999193470715, iteration: 127348
loss: 1.010011911392212,grad_norm: 0.9999990639378493, iteration: 127349
loss: 1.0200401544570923,grad_norm: 0.9999991718573253, iteration: 127350
loss: 1.0073696374893188,grad_norm: 0.9675317715582279, iteration: 127351
loss: 0.9941090941429138,grad_norm: 0.9726697099356947, iteration: 127352
loss: 0.9921886324882507,grad_norm: 0.9999990796604938, iteration: 127353
loss: 0.9933106303215027,grad_norm: 0.9999990535572535, iteration: 127354
loss: 1.000859260559082,grad_norm: 0.9397453582893811, iteration: 127355
loss: 1.0360411405563354,grad_norm: 0.9999990201030514, iteration: 127356
loss: 1.0050181150436401,grad_norm: 0.8785900263061839, iteration: 127357
loss: 0.9714629650115967,grad_norm: 0.9999991973517183, iteration: 127358
loss: 0.985601007938385,grad_norm: 0.9820627472181419, iteration: 127359
loss: 0.9427613019943237,grad_norm: 0.9999990394814531, iteration: 127360
loss: 1.020828366279602,grad_norm: 0.999999081681254, iteration: 127361
loss: 1.0228726863861084,grad_norm: 0.9999991503440268, iteration: 127362
loss: 1.0022464990615845,grad_norm: 0.9999990371796605, iteration: 127363
loss: 0.9769070744514465,grad_norm: 0.8979916538773874, iteration: 127364
loss: 0.9920677542686462,grad_norm: 0.9999992348599144, iteration: 127365
loss: 0.9992647767066956,grad_norm: 0.9999996458308062, iteration: 127366
loss: 1.020442008972168,grad_norm: 0.9999995635630345, iteration: 127367
loss: 0.9669908881187439,grad_norm: 0.8639135785550682, iteration: 127368
loss: 0.9755800366401672,grad_norm: 0.9999998944496788, iteration: 127369
loss: 1.0041905641555786,grad_norm: 0.922543677189609, iteration: 127370
loss: 0.9498190879821777,grad_norm: 0.8762111407420832, iteration: 127371
loss: 0.9902317523956299,grad_norm: 0.9999992411856383, iteration: 127372
loss: 1.0102523565292358,grad_norm: 0.9999989261869913, iteration: 127373
loss: 0.955666720867157,grad_norm: 0.9999898777474785, iteration: 127374
loss: 1.0171751976013184,grad_norm: 0.9999992407253485, iteration: 127375
loss: 0.9698213338851929,grad_norm: 0.9999990925588985, iteration: 127376
loss: 1.024826169013977,grad_norm: 0.9999990506782453, iteration: 127377
loss: 0.9850437045097351,grad_norm: 0.9999991187978582, iteration: 127378
loss: 1.0138424634933472,grad_norm: 0.9999995287937514, iteration: 127379
loss: 1.056844711303711,grad_norm: 0.9999989709879387, iteration: 127380
loss: 1.0163602828979492,grad_norm: 0.9999990441392964, iteration: 127381
loss: 0.9755133390426636,grad_norm: 0.9919244029722325, iteration: 127382
loss: 0.9675634503364563,grad_norm: 0.9999991579601074, iteration: 127383
loss: 0.9561261534690857,grad_norm: 0.9626499830649367, iteration: 127384
loss: 0.98161381483078,grad_norm: 0.999999167967379, iteration: 127385
loss: 1.0272586345672607,grad_norm: 0.9999993478620285, iteration: 127386
loss: 1.052284836769104,grad_norm: 0.9999997023924143, iteration: 127387
loss: 0.9741155505180359,grad_norm: 0.9999991607506716, iteration: 127388
loss: 1.0016953945159912,grad_norm: 0.9999990343387628, iteration: 127389
loss: 0.9887024164199829,grad_norm: 0.977902942326208, iteration: 127390
loss: 1.0116180181503296,grad_norm: 0.9999994877913385, iteration: 127391
loss: 1.0253336429595947,grad_norm: 0.9618652345963057, iteration: 127392
loss: 1.0137263536453247,grad_norm: 0.9999991165534442, iteration: 127393
loss: 1.021572232246399,grad_norm: 0.9343277442434009, iteration: 127394
loss: 0.9919931888580322,grad_norm: 0.9999991230355992, iteration: 127395
loss: 0.9620450735092163,grad_norm: 0.9999991314736845, iteration: 127396
loss: 0.9988800287246704,grad_norm: 0.9999991186480662, iteration: 127397
loss: 0.9859036803245544,grad_norm: 0.9537349156703511, iteration: 127398
loss: 1.0079158544540405,grad_norm: 0.8910958306813077, iteration: 127399
loss: 1.0346828699111938,grad_norm: 0.9999994196429216, iteration: 127400
loss: 0.9948146939277649,grad_norm: 0.9999992286613448, iteration: 127401
loss: 1.0149773359298706,grad_norm: 0.9999990225421396, iteration: 127402
loss: 0.9733167290687561,grad_norm: 0.9999991445399644, iteration: 127403
loss: 0.9924784302711487,grad_norm: 0.9999990804185394, iteration: 127404
loss: 0.9855453372001648,grad_norm: 0.9490354426711178, iteration: 127405
loss: 0.9812100529670715,grad_norm: 0.906814069236556, iteration: 127406
loss: 1.0074647665023804,grad_norm: 0.9999990326718281, iteration: 127407
loss: 1.0193605422973633,grad_norm: 0.9999992837639349, iteration: 127408
loss: 0.9998008012771606,grad_norm: 0.9999990505016177, iteration: 127409
loss: 1.0057506561279297,grad_norm: 0.8741815930876861, iteration: 127410
loss: 0.9870822429656982,grad_norm: 0.9999990140899834, iteration: 127411
loss: 1.0081654787063599,grad_norm: 0.9999991378648913, iteration: 127412
loss: 1.0085556507110596,grad_norm: 0.8805831162207444, iteration: 127413
loss: 1.0135706663131714,grad_norm: 0.9999991844848662, iteration: 127414
loss: 1.0092124938964844,grad_norm: 0.9999991521982522, iteration: 127415
loss: 0.922683835029602,grad_norm: 0.9999992964295028, iteration: 127416
loss: 0.9796551465988159,grad_norm: 0.9709530378343014, iteration: 127417
loss: 0.9980307221412659,grad_norm: 0.9176389716750216, iteration: 127418
loss: 0.9955137372016907,grad_norm: 0.9999991170625788, iteration: 127419
loss: 0.9961404800415039,grad_norm: 0.8914089756522453, iteration: 127420
loss: 1.0053484439849854,grad_norm: 0.999999320109929, iteration: 127421
loss: 1.0330408811569214,grad_norm: 0.9999992945864354, iteration: 127422
loss: 1.0028064250946045,grad_norm: 0.999999013352362, iteration: 127423
loss: 1.0284979343414307,grad_norm: 0.9999989047725295, iteration: 127424
loss: 1.0051103830337524,grad_norm: 0.9180087690222875, iteration: 127425
loss: 0.9885439872741699,grad_norm: 0.8776368371194194, iteration: 127426
loss: 1.0057114362716675,grad_norm: 0.8892358447682134, iteration: 127427
loss: 1.0145061016082764,grad_norm: 0.9999990993448377, iteration: 127428
loss: 1.0064606666564941,grad_norm: 0.999999098441925, iteration: 127429
loss: 1.0152003765106201,grad_norm: 0.999999010535145, iteration: 127430
loss: 0.9757015109062195,grad_norm: 0.9999991349873427, iteration: 127431
loss: 1.004217267036438,grad_norm: 0.9999990820707579, iteration: 127432
loss: 1.0141525268554688,grad_norm: 0.9098182338876275, iteration: 127433
loss: 0.986603856086731,grad_norm: 0.9509028137385904, iteration: 127434
loss: 0.9654262065887451,grad_norm: 0.9999990826617484, iteration: 127435
loss: 1.0136791467666626,grad_norm: 0.99999909357935, iteration: 127436
loss: 1.0002449750900269,grad_norm: 0.919661618140276, iteration: 127437
loss: 0.9998490214347839,grad_norm: 0.9999990832608532, iteration: 127438
loss: 0.9711613655090332,grad_norm: 0.9999990540988186, iteration: 127439
loss: 1.0116521120071411,grad_norm: 0.999999129297182, iteration: 127440
loss: 0.965670645236969,grad_norm: 0.9999991405964523, iteration: 127441
loss: 1.0059618949890137,grad_norm: 0.9393829105597123, iteration: 127442
loss: 1.016038179397583,grad_norm: 0.9999990883651217, iteration: 127443
loss: 1.0101326704025269,grad_norm: 0.9999991358404847, iteration: 127444
loss: 1.0152934789657593,grad_norm: 0.92547558263601, iteration: 127445
loss: 0.9700166583061218,grad_norm: 0.9999989233401021, iteration: 127446
loss: 1.0153024196624756,grad_norm: 0.9128543136915344, iteration: 127447
loss: 0.9967805743217468,grad_norm: 0.9756169852820312, iteration: 127448
loss: 1.0272372961044312,grad_norm: 0.9999990708074354, iteration: 127449
loss: 1.0331194400787354,grad_norm: 0.999999188263496, iteration: 127450
loss: 0.9921174645423889,grad_norm: 0.9999991928511375, iteration: 127451
loss: 0.9981713891029358,grad_norm: 0.9999991263253301, iteration: 127452
loss: 1.0185009241104126,grad_norm: 0.9092725256371573, iteration: 127453
loss: 1.0048296451568604,grad_norm: 0.9442201736114139, iteration: 127454
loss: 1.011885166168213,grad_norm: 0.9999999007277082, iteration: 127455
loss: 1.0431114435195923,grad_norm: 0.9892473310538843, iteration: 127456
loss: 0.9814466238021851,grad_norm: 0.9999990444576375, iteration: 127457
loss: 1.0207903385162354,grad_norm: 0.9999989695984457, iteration: 127458
loss: 1.0376758575439453,grad_norm: 0.9999991265518492, iteration: 127459
loss: 0.9576530456542969,grad_norm: 0.9999991716940334, iteration: 127460
loss: 1.0188556909561157,grad_norm: 0.9999990760367242, iteration: 127461
loss: 1.0261552333831787,grad_norm: 0.9357183709143414, iteration: 127462
loss: 1.0197192430496216,grad_norm: 0.999999190423142, iteration: 127463
loss: 0.9781067371368408,grad_norm: 0.9878582934386406, iteration: 127464
loss: 0.9952744245529175,grad_norm: 0.9610041930363137, iteration: 127465
loss: 1.0076215267181396,grad_norm: 0.9999991169569898, iteration: 127466
loss: 1.0317944288253784,grad_norm: 0.9079271532030162, iteration: 127467
loss: 1.0076905488967896,grad_norm: 0.9999993804262604, iteration: 127468
loss: 1.0163030624389648,grad_norm: 0.9418546226366199, iteration: 127469
loss: 0.9633749127388,grad_norm: 0.9999990881191352, iteration: 127470
loss: 1.0169398784637451,grad_norm: 0.8344264427752498, iteration: 127471
loss: 1.010847568511963,grad_norm: 0.9999993589919529, iteration: 127472
loss: 0.9821270704269409,grad_norm: 0.9913654167408935, iteration: 127473
loss: 1.01529061794281,grad_norm: 0.9999992723992523, iteration: 127474
loss: 0.9840777516365051,grad_norm: 0.9999991167029183, iteration: 127475
loss: 1.0149428844451904,grad_norm: 0.9525362616348445, iteration: 127476
loss: 1.0208966732025146,grad_norm: 0.9999990144041182, iteration: 127477
loss: 0.9675402045249939,grad_norm: 0.9999991688317631, iteration: 127478
loss: 1.0090810060501099,grad_norm: 0.9999991619962221, iteration: 127479
loss: 0.9885003566741943,grad_norm: 0.9245018239030801, iteration: 127480
loss: 0.9827526807785034,grad_norm: 0.9992882246503482, iteration: 127481
loss: 1.0030924081802368,grad_norm: 0.9999992208357487, iteration: 127482
loss: 0.995968222618103,grad_norm: 0.9742288747118021, iteration: 127483
loss: 0.9956477880477905,grad_norm: 0.999999021144377, iteration: 127484
loss: 0.9946237206459045,grad_norm: 0.9896791604189485, iteration: 127485
loss: 1.0030275583267212,grad_norm: 0.9999991535859443, iteration: 127486
loss: 0.9601194858551025,grad_norm: 0.9999992017188006, iteration: 127487
loss: 1.0061646699905396,grad_norm: 0.9673922643925954, iteration: 127488
loss: 1.0590543746948242,grad_norm: 0.9999991903531972, iteration: 127489
loss: 0.9960163831710815,grad_norm: 0.9547532428127737, iteration: 127490
loss: 0.9702805876731873,grad_norm: 0.9146347130687998, iteration: 127491
loss: 0.9919014573097229,grad_norm: 0.9302140005182022, iteration: 127492
loss: 1.048591136932373,grad_norm: 0.9999991810645099, iteration: 127493
loss: 0.992911159992218,grad_norm: 0.9831871300684792, iteration: 127494
loss: 0.9844576716423035,grad_norm: 0.9745207399504074, iteration: 127495
loss: 0.9957517981529236,grad_norm: 0.9440316958375659, iteration: 127496
loss: 1.0107464790344238,grad_norm: 0.9999990894411513, iteration: 127497
loss: 0.9959017634391785,grad_norm: 0.9999991442630944, iteration: 127498
loss: 0.9764305949211121,grad_norm: 0.8661175257095585, iteration: 127499
loss: 0.9915246367454529,grad_norm: 0.9999991931280031, iteration: 127500
loss: 0.9867808222770691,grad_norm: 0.982384454001071, iteration: 127501
loss: 0.968893826007843,grad_norm: 0.9999992222611384, iteration: 127502
loss: 1.0205705165863037,grad_norm: 0.9999991846561163, iteration: 127503
loss: 1.025610089302063,grad_norm: 0.952119219560215, iteration: 127504
loss: 0.9981592297554016,grad_norm: 0.895139326350012, iteration: 127505
loss: 1.0246793031692505,grad_norm: 0.9970214339531804, iteration: 127506
loss: 1.0335466861724854,grad_norm: 0.9999992244681293, iteration: 127507
loss: 1.0009578466415405,grad_norm: 0.9691335834739095, iteration: 127508
loss: 0.9952834844589233,grad_norm: 0.9999990317642073, iteration: 127509
loss: 1.1828230619430542,grad_norm: 0.9999994682257323, iteration: 127510
loss: 1.0315039157867432,grad_norm: 0.9952659070253053, iteration: 127511
loss: 1.0094062089920044,grad_norm: 0.9999992143677784, iteration: 127512
loss: 0.9991691708564758,grad_norm: 0.9863371676864012, iteration: 127513
loss: 1.0549477338790894,grad_norm: 0.9103519887501147, iteration: 127514
loss: 0.9854337573051453,grad_norm: 0.9999989435964639, iteration: 127515
loss: 0.9855532050132751,grad_norm: 0.9766972378216126, iteration: 127516
loss: 0.9666303396224976,grad_norm: 0.9999992345364562, iteration: 127517
loss: 1.001039743423462,grad_norm: 0.9999990699575306, iteration: 127518
loss: 1.0115017890930176,grad_norm: 0.9912711574988122, iteration: 127519
loss: 1.0237908363342285,grad_norm: 0.9999992341632118, iteration: 127520
loss: 0.918074369430542,grad_norm: 0.9773052715483264, iteration: 127521
loss: 0.9765522480010986,grad_norm: 0.8044077904125072, iteration: 127522
loss: 1.0154163837432861,grad_norm: 0.9999991520362683, iteration: 127523
loss: 0.9958392977714539,grad_norm: 0.9341208110860583, iteration: 127524
loss: 0.98773592710495,grad_norm: 0.9999992335649599, iteration: 127525
loss: 1.021218180656433,grad_norm: 0.9999990779810654, iteration: 127526
loss: 0.9950045943260193,grad_norm: 0.9999991335917514, iteration: 127527
loss: 1.0020372867584229,grad_norm: 0.9999991619787643, iteration: 127528
loss: 0.9951006174087524,grad_norm: 0.9278050626801607, iteration: 127529
loss: 1.0102213621139526,grad_norm: 0.9999992300596189, iteration: 127530
loss: 1.023421049118042,grad_norm: 0.9999990643843142, iteration: 127531
loss: 0.991213858127594,grad_norm: 0.9929088818919652, iteration: 127532
loss: 1.0251260995864868,grad_norm: 0.9999990878616337, iteration: 127533
loss: 1.0201131105422974,grad_norm: 0.9160974654954235, iteration: 127534
loss: 1.0254403352737427,grad_norm: 0.9999990976490151, iteration: 127535
loss: 0.9980857968330383,grad_norm: 0.9999990971190175, iteration: 127536
loss: 0.9998006224632263,grad_norm: 0.9999992308990353, iteration: 127537
loss: 1.030402421951294,grad_norm: 0.8431789417951716, iteration: 127538
loss: 0.973688006401062,grad_norm: 0.9999991803673939, iteration: 127539
loss: 1.0003678798675537,grad_norm: 0.9999990125896608, iteration: 127540
loss: 0.9950320720672607,grad_norm: 0.9431384557472475, iteration: 127541
loss: 0.9822032451629639,grad_norm: 0.9993402383699992, iteration: 127542
loss: 0.9734850525856018,grad_norm: 0.99999898332244, iteration: 127543
loss: 1.023438572883606,grad_norm: 0.9992959674882569, iteration: 127544
loss: 0.9638828039169312,grad_norm: 0.9999992112265592, iteration: 127545
loss: 1.01106595993042,grad_norm: 0.9447137357675424, iteration: 127546
loss: 1.0212339162826538,grad_norm: 0.9999990558686939, iteration: 127547
loss: 1.0070704221725464,grad_norm: 0.9999991973867954, iteration: 127548
loss: 0.9572672843933105,grad_norm: 0.9999989959751495, iteration: 127549
loss: 1.0111020803451538,grad_norm: 0.8859888630195186, iteration: 127550
loss: 1.0091485977172852,grad_norm: 0.9999991942906162, iteration: 127551
loss: 1.006898283958435,grad_norm: 0.9999992516667047, iteration: 127552
loss: 1.0031983852386475,grad_norm: 0.9999991424935034, iteration: 127553
loss: 0.995140790939331,grad_norm: 0.9999990172921611, iteration: 127554
loss: 1.00627863407135,grad_norm: 0.9051110128260756, iteration: 127555
loss: 0.9892303347587585,grad_norm: 0.9250723944554137, iteration: 127556
loss: 1.0209943056106567,grad_norm: 0.9999990756342332, iteration: 127557
loss: 0.9656186699867249,grad_norm: 0.8728694157435994, iteration: 127558
loss: 1.027714490890503,grad_norm: 0.7387649005467604, iteration: 127559
loss: 1.0287256240844727,grad_norm: 0.9999992192583154, iteration: 127560
loss: 1.026496410369873,grad_norm: 0.9707601105812123, iteration: 127561
loss: 0.9903963208198547,grad_norm: 0.9999991783337088, iteration: 127562
loss: 1.0092926025390625,grad_norm: 0.9999991462517467, iteration: 127563
loss: 1.0006046295166016,grad_norm: 0.9999991248599309, iteration: 127564
loss: 0.9934952855110168,grad_norm: 0.9999990481783673, iteration: 127565
loss: 1.0365469455718994,grad_norm: 0.9999992108237924, iteration: 127566
loss: 1.053996205329895,grad_norm: 0.8809047383650374, iteration: 127567
loss: 1.0015172958374023,grad_norm: 0.999999201272145, iteration: 127568
loss: 0.9950684905052185,grad_norm: 0.9999992069747969, iteration: 127569
loss: 1.007236123085022,grad_norm: 0.9999992411890499, iteration: 127570
loss: 1.0042698383331299,grad_norm: 0.9177426144171763, iteration: 127571
loss: 1.0146880149841309,grad_norm: 0.8882470252635503, iteration: 127572
loss: 1.0065745115280151,grad_norm: 0.9999991589366589, iteration: 127573
loss: 1.017017126083374,grad_norm: 0.9999991163455977, iteration: 127574
loss: 0.992057740688324,grad_norm: 0.988014180090733, iteration: 127575
loss: 0.9928858280181885,grad_norm: 0.9555496374537795, iteration: 127576
loss: 0.9698197245597839,grad_norm: 0.9999992224118293, iteration: 127577
loss: 1.0201044082641602,grad_norm: 0.999999220423916, iteration: 127578
loss: 1.0012472867965698,grad_norm: 0.9999990508281291, iteration: 127579
loss: 1.002794623374939,grad_norm: 0.999999174923253, iteration: 127580
loss: 1.000596046447754,grad_norm: 0.9739252425887391, iteration: 127581
loss: 1.0166873931884766,grad_norm: 0.9999992347668516, iteration: 127582
loss: 0.9850465655326843,grad_norm: 0.919981787352753, iteration: 127583
loss: 0.9886736869812012,grad_norm: 0.9342081574926772, iteration: 127584
loss: 1.000674843788147,grad_norm: 0.9999991414649186, iteration: 127585
loss: 0.9749832153320312,grad_norm: 0.9999991384849409, iteration: 127586
loss: 1.024709701538086,grad_norm: 0.9754894528510553, iteration: 127587
loss: 1.0122592449188232,grad_norm: 0.999999106128477, iteration: 127588
loss: 0.9700928330421448,grad_norm: 0.9999992084578525, iteration: 127589
loss: 0.9738185405731201,grad_norm: 0.9999991378705687, iteration: 127590
loss: 1.025860071182251,grad_norm: 0.999999006613753, iteration: 127591
loss: 1.0171265602111816,grad_norm: 0.8932127871589938, iteration: 127592
loss: 0.9691975116729736,grad_norm: 0.9832652769636459, iteration: 127593
loss: 1.0049656629562378,grad_norm: 0.9999991134872209, iteration: 127594
loss: 0.9716454148292542,grad_norm: 0.9235655443848654, iteration: 127595
loss: 1.0295474529266357,grad_norm: 0.9999989867714613, iteration: 127596
loss: 0.9944466352462769,grad_norm: 0.9999990715892516, iteration: 127597
loss: 1.0084929466247559,grad_norm: 0.9999997078067262, iteration: 127598
loss: 1.0188407897949219,grad_norm: 0.9999989746642353, iteration: 127599
loss: 0.959619402885437,grad_norm: 0.9942924068662519, iteration: 127600
loss: 0.968313455581665,grad_norm: 0.8149722763132354, iteration: 127601
loss: 1.0051262378692627,grad_norm: 0.9999989906862318, iteration: 127602
loss: 0.9772722125053406,grad_norm: 0.999999102030841, iteration: 127603
loss: 1.032259464263916,grad_norm: 0.9999990209810199, iteration: 127604
loss: 1.002848744392395,grad_norm: 0.9122381176107021, iteration: 127605
loss: 0.9696399569511414,grad_norm: 0.9999991769707366, iteration: 127606
loss: 0.994983971118927,grad_norm: 0.9999990526759103, iteration: 127607
loss: 1.0240492820739746,grad_norm: 0.9999992270439718, iteration: 127608
loss: 0.9853253960609436,grad_norm: 0.908423858972496, iteration: 127609
loss: 1.0146082639694214,grad_norm: 0.9999992235769832, iteration: 127610
loss: 1.0392272472381592,grad_norm: 0.99999922406088, iteration: 127611
loss: 1.0132747888565063,grad_norm: 0.9193354537075591, iteration: 127612
loss: 1.0092706680297852,grad_norm: 0.999999132422015, iteration: 127613
loss: 0.987955629825592,grad_norm: 0.9999995844867209, iteration: 127614
loss: 0.9913375973701477,grad_norm: 0.9999991488134822, iteration: 127615
loss: 1.0025817155838013,grad_norm: 0.9999992185815361, iteration: 127616
loss: 0.9809262156486511,grad_norm: 0.9999991257769796, iteration: 127617
loss: 1.0605063438415527,grad_norm: 0.9999991806504908, iteration: 127618
loss: 1.024403691291809,grad_norm: 0.999999003490932, iteration: 127619
loss: 0.9881500005722046,grad_norm: 0.9999990119085798, iteration: 127620
loss: 0.990666925907135,grad_norm: 0.9999992307772447, iteration: 127621
loss: 0.994499921798706,grad_norm: 0.8609643014361029, iteration: 127622
loss: 1.0088530778884888,grad_norm: 0.9681775004405493, iteration: 127623
loss: 1.0011361837387085,grad_norm: 0.9172834896923605, iteration: 127624
loss: 1.0469123125076294,grad_norm: 0.9999990581753586, iteration: 127625
loss: 0.9854338765144348,grad_norm: 0.8389477148016642, iteration: 127626
loss: 0.9844987392425537,grad_norm: 0.9999990331530472, iteration: 127627
loss: 0.9863247871398926,grad_norm: 0.9999992037833664, iteration: 127628
loss: 1.0869265794754028,grad_norm: 0.9999989830799247, iteration: 127629
loss: 0.9979702234268188,grad_norm: 0.9999991291671008, iteration: 127630
loss: 1.0325334072113037,grad_norm: 0.9558626255895633, iteration: 127631
loss: 1.0062942504882812,grad_norm: 0.9999991202924098, iteration: 127632
loss: 1.0422030687332153,grad_norm: 0.9999992281573753, iteration: 127633
loss: 0.9953319430351257,grad_norm: 0.9999992053577074, iteration: 127634
loss: 0.9821080565452576,grad_norm: 0.9999991840108183, iteration: 127635
loss: 1.0138356685638428,grad_norm: 0.9940791546433555, iteration: 127636
loss: 0.9939307570457458,grad_norm: 0.9999991979540428, iteration: 127637
loss: 1.0050948858261108,grad_norm: 0.9648798957922841, iteration: 127638
loss: 1.0115138292312622,grad_norm: 0.9999993005398403, iteration: 127639
loss: 0.9929575324058533,grad_norm: 0.9546318381210818, iteration: 127640
loss: 1.0106699466705322,grad_norm: 0.9999989898793791, iteration: 127641
loss: 0.9862192273139954,grad_norm: 0.9999991492262491, iteration: 127642
loss: 1.0296677350997925,grad_norm: 0.9999990313767599, iteration: 127643
loss: 1.1049749851226807,grad_norm: 0.9999991308147118, iteration: 127644
loss: 1.0551912784576416,grad_norm: 0.9999994805376283, iteration: 127645
loss: 1.0069879293441772,grad_norm: 0.9703849908884444, iteration: 127646
loss: 0.9634948968887329,grad_norm: 0.999999001404102, iteration: 127647
loss: 1.0102322101593018,grad_norm: 0.9999991612872942, iteration: 127648
loss: 1.0198923349380493,grad_norm: 0.9999991449059987, iteration: 127649
loss: 0.9696207642555237,grad_norm: 0.9792344906487643, iteration: 127650
loss: 1.0476415157318115,grad_norm: 0.9051695230956518, iteration: 127651
loss: 0.9839069843292236,grad_norm: 0.9999992846265338, iteration: 127652
loss: 0.9932920336723328,grad_norm: 0.9999991812224085, iteration: 127653
loss: 1.0533941984176636,grad_norm: 0.9999997688868173, iteration: 127654
loss: 1.016932487487793,grad_norm: 0.9999991906661295, iteration: 127655
loss: 1.0244547128677368,grad_norm: 0.9999992707285514, iteration: 127656
loss: 0.9931524991989136,grad_norm: 0.9999991765134568, iteration: 127657
loss: 0.9801367521286011,grad_norm: 0.9999990120653853, iteration: 127658
loss: 0.9860910773277283,grad_norm: 0.9762939236246878, iteration: 127659
loss: 0.9832224249839783,grad_norm: 0.8898787867181951, iteration: 127660
loss: 0.995120644569397,grad_norm: 0.9800643314269076, iteration: 127661
loss: 1.0728347301483154,grad_norm: 0.9999990989870061, iteration: 127662
loss: 0.9511333703994751,grad_norm: 0.9999990073212227, iteration: 127663
loss: 0.9884267449378967,grad_norm: 0.9047952111248904, iteration: 127664
loss: 1.0199097394943237,grad_norm: 0.9999991727176938, iteration: 127665
loss: 0.9764049649238586,grad_norm: 0.9389820083494588, iteration: 127666
loss: 0.9659637212753296,grad_norm: 0.8411996616137021, iteration: 127667
loss: 0.9712561964988708,grad_norm: 0.9479998023407615, iteration: 127668
loss: 0.9955675601959229,grad_norm: 0.9168671658407732, iteration: 127669
loss: 1.0131100416183472,grad_norm: 0.9921661932629371, iteration: 127670
loss: 0.9986974596977234,grad_norm: 0.999999266536028, iteration: 127671
loss: 0.9884587526321411,grad_norm: 0.9999991926538314, iteration: 127672
loss: 0.9773792624473572,grad_norm: 0.9999991421975506, iteration: 127673
loss: 0.9828062653541565,grad_norm: 0.9999991619268744, iteration: 127674
loss: 1.0037654638290405,grad_norm: 0.999998899111075, iteration: 127675
loss: 0.9818856120109558,grad_norm: 0.9999991833388374, iteration: 127676
loss: 1.014511227607727,grad_norm: 0.9999990314291313, iteration: 127677
loss: 0.9439495801925659,grad_norm: 0.9999991866804937, iteration: 127678
loss: 0.9915640354156494,grad_norm: 0.9999991424389166, iteration: 127679
loss: 1.0912379026412964,grad_norm: 0.9999998240994383, iteration: 127680
loss: 0.9896069765090942,grad_norm: 0.992348243240194, iteration: 127681
loss: 0.9529216885566711,grad_norm: 0.9601265635256566, iteration: 127682
loss: 1.017903208732605,grad_norm: 0.9999991993243093, iteration: 127683
loss: 1.0055558681488037,grad_norm: 0.999998981173075, iteration: 127684
loss: 1.1686656475067139,grad_norm: 0.9998426385661433, iteration: 127685
loss: 0.9842817783355713,grad_norm: 0.9205857961881457, iteration: 127686
loss: 1.024003267288208,grad_norm: 0.9999991983014145, iteration: 127687
loss: 0.9858443140983582,grad_norm: 0.9999992126065099, iteration: 127688
loss: 0.9874652624130249,grad_norm: 0.9999989608468031, iteration: 127689
loss: 0.9982020854949951,grad_norm: 0.9936810904894575, iteration: 127690
loss: 1.000899314880371,grad_norm: 0.9237134094842581, iteration: 127691
loss: 1.0083292722702026,grad_norm: 0.9999990456462056, iteration: 127692
loss: 0.9817620515823364,grad_norm: 0.8232970959440261, iteration: 127693
loss: 0.954669713973999,grad_norm: 0.9999990872868586, iteration: 127694
loss: 1.005447268486023,grad_norm: 0.9999989754433245, iteration: 127695
loss: 1.0071333646774292,grad_norm: 0.95979920717302, iteration: 127696
loss: 1.0170164108276367,grad_norm: 0.9999991245274229, iteration: 127697
loss: 1.0195523500442505,grad_norm: 0.9999990086773148, iteration: 127698
loss: 0.9827983379364014,grad_norm: 0.9544448210710678, iteration: 127699
loss: 0.9949606657028198,grad_norm: 0.838130810780063, iteration: 127700
loss: 1.0162948369979858,grad_norm: 0.9291076244475113, iteration: 127701
loss: 0.9997293949127197,grad_norm: 0.9999991778160241, iteration: 127702
loss: 0.9917479753494263,grad_norm: 0.9999991544692297, iteration: 127703
loss: 1.0225074291229248,grad_norm: 0.9999990518011671, iteration: 127704
loss: 0.988015353679657,grad_norm: 0.9999991578972242, iteration: 127705
loss: 0.9742578864097595,grad_norm: 0.999999093386828, iteration: 127706
loss: 0.9736044406890869,grad_norm: 0.936895958651987, iteration: 127707
loss: 1.0015872716903687,grad_norm: 0.9080130089883016, iteration: 127708
loss: 1.0185645818710327,grad_norm: 0.9999991360302082, iteration: 127709
loss: 1.003632664680481,grad_norm: 0.9999991988400823, iteration: 127710
loss: 0.9636169075965881,grad_norm: 0.9999990954811409, iteration: 127711
loss: 1.0357016324996948,grad_norm: 0.9999991068626287, iteration: 127712
loss: 0.9890627264976501,grad_norm: 0.9999990068393162, iteration: 127713
loss: 0.9953529238700867,grad_norm: 0.999938410130676, iteration: 127714
loss: 1.0029650926589966,grad_norm: 0.9814489137488598, iteration: 127715
loss: 1.0111182928085327,grad_norm: 0.9305217538039238, iteration: 127716
loss: 0.9996856451034546,grad_norm: 0.9999992146360877, iteration: 127717
loss: 1.0109647512435913,grad_norm: 0.9999991375153003, iteration: 127718
loss: 0.9866639971733093,grad_norm: 0.9999990994984324, iteration: 127719
loss: 1.0031901597976685,grad_norm: 0.9999990803880374, iteration: 127720
loss: 0.997611939907074,grad_norm: 0.9999991176919626, iteration: 127721
loss: 0.9744248986244202,grad_norm: 0.9999990964419709, iteration: 127722
loss: 1.0020861625671387,grad_norm: 0.9999990771005608, iteration: 127723
loss: 1.0221728086471558,grad_norm: 0.9999991199411554, iteration: 127724
loss: 1.0120338201522827,grad_norm: 0.9217205719638433, iteration: 127725
loss: 0.9702965617179871,grad_norm: 0.9999990677755645, iteration: 127726
loss: 1.0303244590759277,grad_norm: 0.9999993199199895, iteration: 127727
loss: 1.0201478004455566,grad_norm: 0.9865494642238458, iteration: 127728
loss: 0.981802225112915,grad_norm: 0.9999991666232104, iteration: 127729
loss: 0.9993122816085815,grad_norm: 0.9358272341872613, iteration: 127730
loss: 0.9909860491752625,grad_norm: 0.9999992841027154, iteration: 127731
loss: 0.9961032271385193,grad_norm: 0.9768190691466262, iteration: 127732
loss: 1.0286900997161865,grad_norm: 0.9999994040284352, iteration: 127733
loss: 1.052679181098938,grad_norm: 0.9999992628252978, iteration: 127734
loss: 1.0088231563568115,grad_norm: 0.987236729830979, iteration: 127735
loss: 0.9913969039916992,grad_norm: 0.9999996582860239, iteration: 127736
loss: 0.9635298848152161,grad_norm: 0.9999991525894041, iteration: 127737
loss: 1.0088493824005127,grad_norm: 0.99999900546489, iteration: 127738
loss: 0.9733846783638,grad_norm: 0.9999990768915444, iteration: 127739
loss: 0.9844731092453003,grad_norm: 0.999999198043472, iteration: 127740
loss: 1.0594947338104248,grad_norm: 0.9999992067808391, iteration: 127741
loss: 0.9831339120864868,grad_norm: 0.8804503611573145, iteration: 127742
loss: 0.9826303720474243,grad_norm: 0.9921414624439299, iteration: 127743
loss: 1.008023977279663,grad_norm: 0.9999991244828998, iteration: 127744
loss: 0.9782468676567078,grad_norm: 0.9872702966895497, iteration: 127745
loss: 1.0250509977340698,grad_norm: 0.9999990898189642, iteration: 127746
loss: 1.0092650651931763,grad_norm: 0.999999731704479, iteration: 127747
loss: 0.9797810912132263,grad_norm: 0.9999990560768918, iteration: 127748
loss: 0.9988053441047668,grad_norm: 0.9999990421949443, iteration: 127749
loss: 0.9729297757148743,grad_norm: 0.9999992366477669, iteration: 127750
loss: 0.9920992255210876,grad_norm: 0.9999992461054512, iteration: 127751
loss: 0.99582439661026,grad_norm: 0.9999990888271183, iteration: 127752
loss: 0.9681147336959839,grad_norm: 0.9459505588164114, iteration: 127753
loss: 1.0063416957855225,grad_norm: 0.9999990666463396, iteration: 127754
loss: 0.9705813527107239,grad_norm: 0.9293297077719462, iteration: 127755
loss: 0.9973666667938232,grad_norm: 0.9999991179597213, iteration: 127756
loss: 1.0295416116714478,grad_norm: 0.9999997217514744, iteration: 127757
loss: 1.0171374082565308,grad_norm: 0.9999991170236607, iteration: 127758
loss: 1.023818016052246,grad_norm: 0.9999990408941171, iteration: 127759
loss: 1.0131598711013794,grad_norm: 0.9999990448833485, iteration: 127760
loss: 1.001325011253357,grad_norm: 0.7478431004848252, iteration: 127761
loss: 1.016432762145996,grad_norm: 0.9999991306220681, iteration: 127762
loss: 0.933335542678833,grad_norm: 0.9047802062959455, iteration: 127763
loss: 0.9947848916053772,grad_norm: 0.9999990265277707, iteration: 127764
loss: 1.0350770950317383,grad_norm: 0.9999993863160114, iteration: 127765
loss: 1.012848973274231,grad_norm: 0.9999990099903949, iteration: 127766
loss: 1.0080177783966064,grad_norm: 0.9840777748059891, iteration: 127767
loss: 0.9747121930122375,grad_norm: 0.9999990478850195, iteration: 127768
loss: 1.0061662197113037,grad_norm: 0.999999016994459, iteration: 127769
loss: 1.0099228620529175,grad_norm: 0.9999996198921142, iteration: 127770
loss: 1.0103888511657715,grad_norm: 0.9999990844457954, iteration: 127771
loss: 1.0116173028945923,grad_norm: 0.9583679665416442, iteration: 127772
loss: 1.0075496435165405,grad_norm: 0.9133512291054738, iteration: 127773
loss: 0.9839280247688293,grad_norm: 0.9999991863846087, iteration: 127774
loss: 1.0220338106155396,grad_norm: 0.999999267581049, iteration: 127775
loss: 1.0286712646484375,grad_norm: 0.999999300805266, iteration: 127776
loss: 0.9827411770820618,grad_norm: 0.908944796404506, iteration: 127777
loss: 1.0718801021575928,grad_norm: 0.9999997154833993, iteration: 127778
loss: 1.0867615938186646,grad_norm: 0.9999997807007338, iteration: 127779
loss: 0.9816656112670898,grad_norm: 0.9999991246544312, iteration: 127780
loss: 1.0059034824371338,grad_norm: 0.9999991494256288, iteration: 127781
loss: 0.9725135564804077,grad_norm: 0.9493085489516534, iteration: 127782
loss: 0.986049473285675,grad_norm: 0.9999989411032194, iteration: 127783
loss: 0.9897566437721252,grad_norm: 0.9832978884901534, iteration: 127784
loss: 1.0724782943725586,grad_norm: 0.9999990228074418, iteration: 127785
loss: 1.0266354084014893,grad_norm: 0.9395050476650942, iteration: 127786
loss: 1.027216911315918,grad_norm: 0.9999992711328245, iteration: 127787
loss: 0.984696626663208,grad_norm: 0.9999991476385991, iteration: 127788
loss: 1.0275869369506836,grad_norm: 0.9999990766375284, iteration: 127789
loss: 0.9506757855415344,grad_norm: 0.9395636285994278, iteration: 127790
loss: 0.9784824252128601,grad_norm: 0.9999990791979843, iteration: 127791
loss: 1.0126620531082153,grad_norm: 0.999999331280545, iteration: 127792
loss: 0.9748521447181702,grad_norm: 0.9999991900455126, iteration: 127793
loss: 1.0018770694732666,grad_norm: 0.8865173251202317, iteration: 127794
loss: 0.9971595406532288,grad_norm: 0.9999990308794281, iteration: 127795
loss: 1.0136548280715942,grad_norm: 0.9999989440589955, iteration: 127796
loss: 1.0756738185882568,grad_norm: 0.9999993864249368, iteration: 127797
loss: 1.024802803993225,grad_norm: 0.8748188942185963, iteration: 127798
loss: 1.0111373662948608,grad_norm: 0.977154063180608, iteration: 127799
loss: 1.0416052341461182,grad_norm: 0.9999994877987847, iteration: 127800
loss: 1.0040159225463867,grad_norm: 0.999999113141601, iteration: 127801
loss: 1.0063632726669312,grad_norm: 0.9197199005578119, iteration: 127802
loss: 0.9919298887252808,grad_norm: 0.939098135295975, iteration: 127803
loss: 1.015722632408142,grad_norm: 0.857143508297365, iteration: 127804
loss: 0.9721696972846985,grad_norm: 0.9999989988665094, iteration: 127805
loss: 1.019741415977478,grad_norm: 0.9322155206846278, iteration: 127806
loss: 0.9981995224952698,grad_norm: 0.9236614052360205, iteration: 127807
loss: 1.0094006061553955,grad_norm: 0.9999991107680964, iteration: 127808
loss: 0.9897358417510986,grad_norm: 0.873013414813711, iteration: 127809
loss: 1.0096553564071655,grad_norm: 0.9999992486585454, iteration: 127810
loss: 1.0079445838928223,grad_norm: 0.8940202399424403, iteration: 127811
loss: 0.9792121648788452,grad_norm: 0.8858299633634444, iteration: 127812
loss: 0.9987065196037292,grad_norm: 0.9999994822492517, iteration: 127813
loss: 1.0401479005813599,grad_norm: 0.9999991922150793, iteration: 127814
loss: 1.0284196138381958,grad_norm: 0.8711979748851459, iteration: 127815
loss: 1.0120422840118408,grad_norm: 0.9999989708376349, iteration: 127816
loss: 1.035754680633545,grad_norm: 0.9999998728458693, iteration: 127817
loss: 1.0008738040924072,grad_norm: 0.8072505812457657, iteration: 127818
loss: 0.9931524395942688,grad_norm: 0.9847775913396728, iteration: 127819
loss: 1.014340877532959,grad_norm: 0.9999990626995472, iteration: 127820
loss: 1.0220779180526733,grad_norm: 0.999999166602006, iteration: 127821
loss: 1.0081603527069092,grad_norm: 0.9999993115331394, iteration: 127822
loss: 1.0082734823226929,grad_norm: 0.9999991519363127, iteration: 127823
loss: 0.9968698620796204,grad_norm: 0.9912226696982965, iteration: 127824
loss: 1.0694066286087036,grad_norm: 0.9764819953849148, iteration: 127825
loss: 0.9719728827476501,grad_norm: 0.9132525902081399, iteration: 127826
loss: 1.042947769165039,grad_norm: 0.9999992211120843, iteration: 127827
loss: 1.000866413116455,grad_norm: 0.9991908831678723, iteration: 127828
loss: 1.0108041763305664,grad_norm: 0.9999991443421169, iteration: 127829
loss: 0.9932852387428284,grad_norm: 0.9999991746308854, iteration: 127830
loss: 0.9812148809432983,grad_norm: 0.9733318500290814, iteration: 127831
loss: 1.0178425312042236,grad_norm: 0.9999990093334924, iteration: 127832
loss: 1.0024913549423218,grad_norm: 0.8937368289061816, iteration: 127833
loss: 1.0071920156478882,grad_norm: 0.8514571704141906, iteration: 127834
loss: 1.0747283697128296,grad_norm: 0.999999046106757, iteration: 127835
loss: 1.0057601928710938,grad_norm: 0.9999990059175163, iteration: 127836
loss: 1.0165910720825195,grad_norm: 0.9999991006770107, iteration: 127837
loss: 0.9786720275878906,grad_norm: 0.8789919072372928, iteration: 127838
loss: 1.0189599990844727,grad_norm: 0.9999995930728227, iteration: 127839
loss: 0.983859658241272,grad_norm: 0.9638404585425162, iteration: 127840
loss: 1.0374420881271362,grad_norm: 0.8590837093775552, iteration: 127841
loss: 0.963646411895752,grad_norm: 0.9999991464196688, iteration: 127842
loss: 0.9982351064682007,grad_norm: 0.9999991353789557, iteration: 127843
loss: 0.9741498827934265,grad_norm: 0.999999133776025, iteration: 127844
loss: 1.0067640542984009,grad_norm: 0.8966251311778761, iteration: 127845
loss: 1.0156327486038208,grad_norm: 0.9999991399235545, iteration: 127846
loss: 1.0136436223983765,grad_norm: 0.9999991188933843, iteration: 127847
loss: 0.9960585236549377,grad_norm: 0.9999992736291043, iteration: 127848
loss: 1.0172247886657715,grad_norm: 0.979242082613998, iteration: 127849
loss: 0.9858933687210083,grad_norm: 0.9999990965488781, iteration: 127850
loss: 0.9782660007476807,grad_norm: 0.9921984962477084, iteration: 127851
loss: 1.041458010673523,grad_norm: 0.8992763390507267, iteration: 127852
loss: 0.9911169409751892,grad_norm: 0.92396798003321, iteration: 127853
loss: 0.9883707761764526,grad_norm: 0.9999996988833724, iteration: 127854
loss: 0.9646543264389038,grad_norm: 0.9999989764495887, iteration: 127855
loss: 0.9969254732131958,grad_norm: 0.9999992169893718, iteration: 127856
loss: 0.9805105328559875,grad_norm: 0.892572428873217, iteration: 127857
loss: 1.0247477293014526,grad_norm: 0.9999991538267194, iteration: 127858
loss: 0.9711114764213562,grad_norm: 0.9529641250751177, iteration: 127859
loss: 0.9864965677261353,grad_norm: 0.899367146252553, iteration: 127860
loss: 0.9828817248344421,grad_norm: 0.9999990782993216, iteration: 127861
loss: 1.0088704824447632,grad_norm: 0.8984014412981176, iteration: 127862
loss: 0.9759188890457153,grad_norm: 0.9855292953146999, iteration: 127863
loss: 0.9563074707984924,grad_norm: 0.9900090650258699, iteration: 127864
loss: 0.9891196489334106,grad_norm: 0.9600713258645053, iteration: 127865
loss: 1.01788330078125,grad_norm: 0.9999999017189999, iteration: 127866
loss: 1.0246772766113281,grad_norm: 0.9999995892373555, iteration: 127867
loss: 1.044873595237732,grad_norm: 0.9999991164048895, iteration: 127868
loss: 1.0049641132354736,grad_norm: 0.9681302179224144, iteration: 127869
loss: 0.9901465177536011,grad_norm: 0.9999990166272017, iteration: 127870
loss: 1.013122797012329,grad_norm: 0.9999991672717, iteration: 127871
loss: 1.0083341598510742,grad_norm: 0.9999990967986013, iteration: 127872
loss: 1.0076956748962402,grad_norm: 0.8551574937219378, iteration: 127873
loss: 0.981410801410675,grad_norm: 0.9060203867117828, iteration: 127874
loss: 1.022600531578064,grad_norm: 0.8770134817195055, iteration: 127875
loss: 1.009216547012329,grad_norm: 0.9087614059672477, iteration: 127876
loss: 1.0005918741226196,grad_norm: 0.9999992281698957, iteration: 127877
loss: 0.9982802271842957,grad_norm: 0.9912761548142993, iteration: 127878
loss: 0.9966175556182861,grad_norm: 0.9999991558511172, iteration: 127879
loss: 0.9971510767936707,grad_norm: 0.9999991916542565, iteration: 127880
loss: 0.9947546720504761,grad_norm: 0.9999991551321403, iteration: 127881
loss: 1.0201961994171143,grad_norm: 0.9744113918132128, iteration: 127882
loss: 0.9907405376434326,grad_norm: 0.9152105318756621, iteration: 127883
loss: 1.0205498933792114,grad_norm: 0.9999991264829515, iteration: 127884
loss: 1.0142549276351929,grad_norm: 0.9999990350250751, iteration: 127885
loss: 0.9988370537757874,grad_norm: 0.8748577765640244, iteration: 127886
loss: 1.0263042449951172,grad_norm: 0.9358332923360995, iteration: 127887
loss: 0.9985030293464661,grad_norm: 0.9999989992171852, iteration: 127888
loss: 1.0190097093582153,grad_norm: 0.9999991028714121, iteration: 127889
loss: 0.9787558317184448,grad_norm: 0.8852875140996804, iteration: 127890
loss: 0.9814561605453491,grad_norm: 0.9999991108989066, iteration: 127891
loss: 1.03180992603302,grad_norm: 0.9999991399244446, iteration: 127892
loss: 0.9872715473175049,grad_norm: 0.9274660505112374, iteration: 127893
loss: 0.9987349510192871,grad_norm: 0.9999990427740696, iteration: 127894
loss: 0.9853008389472961,grad_norm: 0.9999991656485273, iteration: 127895
loss: 0.9736959934234619,grad_norm: 0.9958986361140632, iteration: 127896
loss: 0.9827247262001038,grad_norm: 0.9999990903643134, iteration: 127897
loss: 1.0503805875778198,grad_norm: 0.9999991261763429, iteration: 127898
loss: 0.9904769062995911,grad_norm: 0.9999991401247569, iteration: 127899
loss: 1.0035388469696045,grad_norm: 0.8755716979073038, iteration: 127900
loss: 1.0311181545257568,grad_norm: 0.9999990875989573, iteration: 127901
loss: 0.9833390712738037,grad_norm: 0.9999991603033783, iteration: 127902
loss: 0.9819366931915283,grad_norm: 0.9419525459770055, iteration: 127903
loss: 1.0276474952697754,grad_norm: 0.9999990347220999, iteration: 127904
loss: 0.9973922967910767,grad_norm: 0.9999990419732304, iteration: 127905
loss: 0.9977784752845764,grad_norm: 0.9200074189112968, iteration: 127906
loss: 0.9592096209526062,grad_norm: 0.9999991875709558, iteration: 127907
loss: 1.002345323562622,grad_norm: 0.9998886392954056, iteration: 127908
loss: 1.0132427215576172,grad_norm: 0.9999991277855003, iteration: 127909
loss: 0.995336651802063,grad_norm: 0.925011270648413, iteration: 127910
loss: 1.0002415180206299,grad_norm: 0.9999993998814805, iteration: 127911
loss: 0.9976688027381897,grad_norm: 0.9916305336031861, iteration: 127912
loss: 0.9733772873878479,grad_norm: 0.9344835141825419, iteration: 127913
loss: 0.999765157699585,grad_norm: 0.9999989672975347, iteration: 127914
loss: 0.9657890796661377,grad_norm: 0.8938320114283613, iteration: 127915
loss: 1.0087999105453491,grad_norm: 0.9467806471334644, iteration: 127916
loss: 0.9859960079193115,grad_norm: 0.9999992207425441, iteration: 127917
loss: 1.0035874843597412,grad_norm: 0.9999992082699084, iteration: 127918
loss: 0.9924439787864685,grad_norm: 0.9999992800924276, iteration: 127919
loss: 0.9811820983886719,grad_norm: 0.9545162422766464, iteration: 127920
loss: 0.9838120937347412,grad_norm: 0.9277411629821952, iteration: 127921
loss: 1.012006402015686,grad_norm: 0.9106425933988951, iteration: 127922
loss: 1.0145877599716187,grad_norm: 0.9999994064222048, iteration: 127923
loss: 0.9817407727241516,grad_norm: 0.921191431262552, iteration: 127924
loss: 0.9823892116546631,grad_norm: 0.9999990145162895, iteration: 127925
loss: 1.0264500379562378,grad_norm: 0.9999993049042101, iteration: 127926
loss: 0.9816820025444031,grad_norm: 0.95233568154609, iteration: 127927
loss: 0.9990237951278687,grad_norm: 0.9999991856690549, iteration: 127928
loss: 1.0205143690109253,grad_norm: 0.9999992296372581, iteration: 127929
loss: 0.9754234552383423,grad_norm: 0.9999990853065568, iteration: 127930
loss: 0.9812929034233093,grad_norm: 0.9531166159065447, iteration: 127931
loss: 0.9782295227050781,grad_norm: 0.9873489514052358, iteration: 127932
loss: 1.0377329587936401,grad_norm: 0.9999991913480072, iteration: 127933
loss: 0.990071177482605,grad_norm: 0.8590268798430447, iteration: 127934
loss: 0.9846042394638062,grad_norm: 0.9999990868830618, iteration: 127935
loss: 0.9957306385040283,grad_norm: 0.9999990338452259, iteration: 127936
loss: 0.9720889925956726,grad_norm: 0.9999993551603537, iteration: 127937
loss: 0.973396897315979,grad_norm: 0.9421817923235101, iteration: 127938
loss: 0.9999824166297913,grad_norm: 0.9999991632322202, iteration: 127939
loss: 1.009423017501831,grad_norm: 0.9879244137740233, iteration: 127940
loss: 1.0218287706375122,grad_norm: 0.9654388454195224, iteration: 127941
loss: 0.9924540519714355,grad_norm: 0.9999990376000759, iteration: 127942
loss: 0.9583944082260132,grad_norm: 0.9999990776368439, iteration: 127943
loss: 0.9607241749763489,grad_norm: 0.9999991693450819, iteration: 127944
loss: 0.9940652847290039,grad_norm: 0.9285771281961404, iteration: 127945
loss: 0.9977818727493286,grad_norm: 0.999999177832589, iteration: 127946
loss: 1.0372824668884277,grad_norm: 0.9999992949004803, iteration: 127947
loss: 0.9882267117500305,grad_norm: 0.9999992292410136, iteration: 127948
loss: 1.0153981447219849,grad_norm: 0.9999992972277412, iteration: 127949
loss: 1.0136264562606812,grad_norm: 0.9001309986549334, iteration: 127950
loss: 1.023582935333252,grad_norm: 0.9999998322086389, iteration: 127951
loss: 1.0138907432556152,grad_norm: 0.9999990739367777, iteration: 127952
loss: 0.9914255738258362,grad_norm: 0.9999991370780016, iteration: 127953
loss: 1.0375248193740845,grad_norm: 0.9999989142048783, iteration: 127954
loss: 1.0292555093765259,grad_norm: 0.9999990560755052, iteration: 127955
loss: 1.020442008972168,grad_norm: 0.9956890044129019, iteration: 127956
loss: 0.983858585357666,grad_norm: 0.9999993487682995, iteration: 127957
loss: 1.0910313129425049,grad_norm: 0.9787502658480415, iteration: 127958
loss: 0.9833648204803467,grad_norm: 0.9999991869453614, iteration: 127959
loss: 0.9637093544006348,grad_norm: 0.9999992441704793, iteration: 127960
loss: 1.0089646577835083,grad_norm: 0.9583063590588359, iteration: 127961
loss: 0.9928207993507385,grad_norm: 0.9170914344305234, iteration: 127962
loss: 0.9883303642272949,grad_norm: 0.999999063135287, iteration: 127963
loss: 1.0118014812469482,grad_norm: 0.9999991252796242, iteration: 127964
loss: 0.9966840147972107,grad_norm: 0.9999989932301651, iteration: 127965
loss: 1.011143445968628,grad_norm: 0.9999991543396112, iteration: 127966
loss: 0.9992469549179077,grad_norm: 0.9999992063699684, iteration: 127967
loss: 1.036329984664917,grad_norm: 0.9999990214516907, iteration: 127968
loss: 0.9992116093635559,grad_norm: 0.8710027542284237, iteration: 127969
loss: 0.9959750771522522,grad_norm: 0.9999991289530048, iteration: 127970
loss: 0.9948763847351074,grad_norm: 0.922859107897277, iteration: 127971
loss: 1.030004620552063,grad_norm: 0.9999992133647124, iteration: 127972
loss: 0.9907876253128052,grad_norm: 0.8221058542592381, iteration: 127973
loss: 1.0159074068069458,grad_norm: 0.8482577755444315, iteration: 127974
loss: 0.9984789490699768,grad_norm: 0.9999990698359715, iteration: 127975
loss: 1.0141587257385254,grad_norm: 0.9999990492246359, iteration: 127976
loss: 1.0100346803665161,grad_norm: 0.9999989906816691, iteration: 127977
loss: 1.0261563062667847,grad_norm: 0.9999991529021134, iteration: 127978
loss: 0.9758468866348267,grad_norm: 0.8183124434288459, iteration: 127979
loss: 0.9911640882492065,grad_norm: 0.8999145719640609, iteration: 127980
loss: 0.9963862299919128,grad_norm: 0.9999990640463196, iteration: 127981
loss: 0.987435519695282,grad_norm: 0.9999990941965766, iteration: 127982
loss: 1.0210847854614258,grad_norm: 0.999999161375592, iteration: 127983
loss: 0.9772459864616394,grad_norm: 0.9999991052697436, iteration: 127984
loss: 1.0178035497665405,grad_norm: 0.9999990249318808, iteration: 127985
loss: 0.9989904761314392,grad_norm: 0.9474604715856184, iteration: 127986
loss: 0.9923912286758423,grad_norm: 0.834455986343858, iteration: 127987
loss: 1.0095347166061401,grad_norm: 0.9999991111232692, iteration: 127988
loss: 1.0130672454833984,grad_norm: 0.9999991175053994, iteration: 127989
loss: 0.9664309024810791,grad_norm: 0.9002040036874452, iteration: 127990
loss: 1.0323916673660278,grad_norm: 0.9934651865474932, iteration: 127991
loss: 0.9778314828872681,grad_norm: 0.9999989812377034, iteration: 127992
loss: 0.9919345378875732,grad_norm: 0.9999991266573731, iteration: 127993
loss: 1.0218596458435059,grad_norm: 0.9235373975651567, iteration: 127994
loss: 1.0171852111816406,grad_norm: 0.9999991547314425, iteration: 127995
loss: 0.99643474817276,grad_norm: 0.8906842836914015, iteration: 127996
loss: 0.9958450198173523,grad_norm: 0.9999992739267575, iteration: 127997
loss: 0.9921349883079529,grad_norm: 0.9956465886288296, iteration: 127998
loss: 1.008413314819336,grad_norm: 0.9999991466995718, iteration: 127999
loss: 0.9812630414962769,grad_norm: 0.9999992255792566, iteration: 128000
loss: 1.0015496015548706,grad_norm: 0.9999992824532912, iteration: 128001
loss: 0.9710150957107544,grad_norm: 0.9999990819669679, iteration: 128002
loss: 1.0050255060195923,grad_norm: 0.9341308480242292, iteration: 128003
loss: 0.9929622411727905,grad_norm: 0.9404965897311459, iteration: 128004
loss: 0.9659493565559387,grad_norm: 0.9999991470774292, iteration: 128005
loss: 1.0172804594039917,grad_norm: 0.9999992291864614, iteration: 128006
loss: 0.9991408586502075,grad_norm: 0.8724566224883937, iteration: 128007
loss: 0.979520857334137,grad_norm: 0.9152500514156439, iteration: 128008
loss: 1.0396374464035034,grad_norm: 0.9814297076088256, iteration: 128009
loss: 0.9767686724662781,grad_norm: 0.9999998524128831, iteration: 128010
loss: 1.0288660526275635,grad_norm: 0.999999180470866, iteration: 128011
loss: 0.9946943521499634,grad_norm: 0.9999991001085254, iteration: 128012
loss: 1.0069856643676758,grad_norm: 0.9999991801900504, iteration: 128013
loss: 0.9764640927314758,grad_norm: 0.9969461672656748, iteration: 128014
loss: 0.9900025129318237,grad_norm: 0.9999994529758504, iteration: 128015
loss: 1.034006953239441,grad_norm: 0.9999989726063704, iteration: 128016
loss: 1.018250584602356,grad_norm: 0.9271436125920054, iteration: 128017
loss: 1.0113136768341064,grad_norm: 0.9274327839919841, iteration: 128018
loss: 1.011755347251892,grad_norm: 0.9999991660920177, iteration: 128019
loss: 0.9899063110351562,grad_norm: 0.9999994344663181, iteration: 128020
loss: 0.9776526093482971,grad_norm: 0.8751988835193649, iteration: 128021
loss: 0.9644882678985596,grad_norm: 0.999999193244679, iteration: 128022
loss: 1.0059159994125366,grad_norm: 0.9782008964037395, iteration: 128023
loss: 1.0097960233688354,grad_norm: 0.9232561770868757, iteration: 128024
loss: 0.9922852516174316,grad_norm: 0.9592216655771502, iteration: 128025
loss: 0.9659644365310669,grad_norm: 0.9999991437535679, iteration: 128026
loss: 1.0252506732940674,grad_norm: 0.9999991620184718, iteration: 128027
loss: 0.9732337594032288,grad_norm: 0.9569776936493568, iteration: 128028
loss: 1.0198822021484375,grad_norm: 0.9999991674253997, iteration: 128029
loss: 0.9554176330566406,grad_norm: 0.9999991163781282, iteration: 128030
loss: 1.0100072622299194,grad_norm: 0.9999990877260668, iteration: 128031
loss: 1.0106475353240967,grad_norm: 0.9793251487193172, iteration: 128032
loss: 0.9780770540237427,grad_norm: 0.9999990421491157, iteration: 128033
loss: 1.0085134506225586,grad_norm: 0.7910956294519114, iteration: 128034
loss: 1.020900011062622,grad_norm: 0.9999990724206639, iteration: 128035
loss: 1.0165133476257324,grad_norm: 0.9999992210403817, iteration: 128036
loss: 0.9976323246955872,grad_norm: 0.9999989943170893, iteration: 128037
loss: 1.0138163566589355,grad_norm: 0.9999991287352153, iteration: 128038
loss: 0.9860261082649231,grad_norm: 0.999999189899425, iteration: 128039
loss: 0.9838204979896545,grad_norm: 0.9438666534137808, iteration: 128040
loss: 0.9904268383979797,grad_norm: 0.9999992133066551, iteration: 128041
loss: 1.026750087738037,grad_norm: 0.954275716588673, iteration: 128042
loss: 0.9723798632621765,grad_norm: 0.9999990041620516, iteration: 128043
loss: 1.0336382389068604,grad_norm: 0.9999990935658836, iteration: 128044
loss: 1.0448163747787476,grad_norm: 0.9493496403382157, iteration: 128045
loss: 1.0269043445587158,grad_norm: 0.9999993606849151, iteration: 128046
loss: 1.0210540294647217,grad_norm: 0.999998932834212, iteration: 128047
loss: 1.005599021911621,grad_norm: 0.9999991085252725, iteration: 128048
loss: 1.0096712112426758,grad_norm: 0.9999990780385873, iteration: 128049
loss: 0.9927464723587036,grad_norm: 0.9536870590955977, iteration: 128050
loss: 0.9665501713752747,grad_norm: 0.9999991336783696, iteration: 128051
loss: 0.9495227932929993,grad_norm: 0.9999990412311252, iteration: 128052
loss: 1.0118615627288818,grad_norm: 0.9999991137154909, iteration: 128053
loss: 1.0080338716506958,grad_norm: 0.9999993703470228, iteration: 128054
loss: 0.9870201349258423,grad_norm: 0.8982640292049021, iteration: 128055
loss: 0.9766262769699097,grad_norm: 0.9801827409221061, iteration: 128056
loss: 0.9995608925819397,grad_norm: 0.9726776827268624, iteration: 128057
loss: 0.9756752252578735,grad_norm: 0.9999991212268816, iteration: 128058
loss: 0.9781612753868103,grad_norm: 0.9999992423710532, iteration: 128059
loss: 1.0198163986206055,grad_norm: 0.9999990615509455, iteration: 128060
loss: 1.0040373802185059,grad_norm: 0.9999989784637522, iteration: 128061
loss: 1.0778694152832031,grad_norm: 0.9999994039116327, iteration: 128062
loss: 1.0090889930725098,grad_norm: 0.9608681200661383, iteration: 128063
loss: 1.009573221206665,grad_norm: 0.9999991064486056, iteration: 128064
loss: 1.0261805057525635,grad_norm: 0.9999992140763531, iteration: 128065
loss: 1.054004430770874,grad_norm: 0.9999997097239672, iteration: 128066
loss: 1.0317600965499878,grad_norm: 0.999999288274742, iteration: 128067
loss: 1.0120574235916138,grad_norm: 0.9999989665102191, iteration: 128068
loss: 1.0103658437728882,grad_norm: 0.9999991224698496, iteration: 128069
loss: 1.0069812536239624,grad_norm: 0.9999993180573943, iteration: 128070
loss: 1.0323983430862427,grad_norm: 0.8750775079588823, iteration: 128071
loss: 1.027053952217102,grad_norm: 0.9791383291868235, iteration: 128072
loss: 1.052040457725525,grad_norm: 0.9999991563694166, iteration: 128073
loss: 0.9928390383720398,grad_norm: 0.8947472185087457, iteration: 128074
loss: 1.0218846797943115,grad_norm: 0.9999989713398613, iteration: 128075
loss: 1.0348294973373413,grad_norm: 1.0000000099525161, iteration: 128076
loss: 1.014070987701416,grad_norm: 0.9999990988739358, iteration: 128077
loss: 1.0133683681488037,grad_norm: 0.9575960582555458, iteration: 128078
loss: 1.0200998783111572,grad_norm: 0.9999995628490639, iteration: 128079
loss: 1.0490145683288574,grad_norm: 0.9999996602011132, iteration: 128080
loss: 1.009641170501709,grad_norm: 0.9999991400487248, iteration: 128081
loss: 1.005894422531128,grad_norm: 0.8554853510330724, iteration: 128082
loss: 1.0406988859176636,grad_norm: 0.9999991650100585, iteration: 128083
loss: 0.9908746480941772,grad_norm: 0.9999989638150207, iteration: 128084
loss: 0.9958041906356812,grad_norm: 0.9999990756033318, iteration: 128085
loss: 0.9988919496536255,grad_norm: 0.9739294894509539, iteration: 128086
loss: 1.0001369714736938,grad_norm: 0.9999991621716066, iteration: 128087
loss: 1.011717438697815,grad_norm: 0.9999988467994598, iteration: 128088
loss: 1.0031503438949585,grad_norm: 0.9999992238304213, iteration: 128089
loss: 1.068381667137146,grad_norm: 0.9999992028339862, iteration: 128090
loss: 1.0178231000900269,grad_norm: 0.9547643334454031, iteration: 128091
loss: 0.9964948296546936,grad_norm: 0.9999991025847978, iteration: 128092
loss: 0.9849869012832642,grad_norm: 0.9999991117331177, iteration: 128093
loss: 0.973495602607727,grad_norm: 0.9999992914102072, iteration: 128094
loss: 0.9958240389823914,grad_norm: 0.9291390879854151, iteration: 128095
loss: 1.0115193128585815,grad_norm: 0.9174276529370026, iteration: 128096
loss: 1.0740256309509277,grad_norm: 0.9999992167014905, iteration: 128097
loss: 1.0583232641220093,grad_norm: 0.9999991077593254, iteration: 128098
loss: 1.0223697423934937,grad_norm: 0.9999990949781404, iteration: 128099
loss: 0.9945130944252014,grad_norm: 0.9999991136340389, iteration: 128100
loss: 1.0133845806121826,grad_norm: 0.9999993593049625, iteration: 128101
loss: 1.0560013055801392,grad_norm: 0.9999991329208037, iteration: 128102
loss: 1.0283417701721191,grad_norm: 0.9999991633710171, iteration: 128103
loss: 0.975750744342804,grad_norm: 0.9999991233686781, iteration: 128104
loss: 0.9887341856956482,grad_norm: 0.9999991234891886, iteration: 128105
loss: 1.0303148031234741,grad_norm: 0.9999991710399979, iteration: 128106
loss: 1.0096501111984253,grad_norm: 0.999999115578939, iteration: 128107
loss: 1.0189474821090698,grad_norm: 0.9999998416518593, iteration: 128108
loss: 1.045607089996338,grad_norm: 1.0000000189379346, iteration: 128109
loss: 0.9648653268814087,grad_norm: 0.9999992927279825, iteration: 128110
loss: 0.9862837195396423,grad_norm: 0.8866395847182548, iteration: 128111
loss: 1.0541772842407227,grad_norm: 0.9999992877085524, iteration: 128112
loss: 1.0248936414718628,grad_norm: 0.9999990880068238, iteration: 128113
loss: 1.0548332929611206,grad_norm: 0.9999998577679464, iteration: 128114
loss: 0.988510251045227,grad_norm: 0.9999994076394866, iteration: 128115
loss: 0.9900404214859009,grad_norm: 0.9999990491543697, iteration: 128116
loss: 1.0051913261413574,grad_norm: 0.9999993372973247, iteration: 128117
loss: 1.105602502822876,grad_norm: 0.9512799770447867, iteration: 128118
loss: 1.018043875694275,grad_norm: 0.9999995445170604, iteration: 128119
loss: 1.026085376739502,grad_norm: 0.9999992057077496, iteration: 128120
loss: 1.077412724494934,grad_norm: 0.9999992335782691, iteration: 128121
loss: 1.1389296054840088,grad_norm: 1.0000000758018397, iteration: 128122
loss: 1.0621020793914795,grad_norm: 0.9999992757373148, iteration: 128123
loss: 1.0787689685821533,grad_norm: 0.9999992577263785, iteration: 128124
loss: 1.2630468606948853,grad_norm: 0.9999994835704358, iteration: 128125
loss: 1.0004444122314453,grad_norm: 0.9999990757988352, iteration: 128126
loss: 1.0828356742858887,grad_norm: 0.9999992129245109, iteration: 128127
loss: 1.0088911056518555,grad_norm: 0.999999255515043, iteration: 128128
loss: 1.024635672569275,grad_norm: 0.9999992113293401, iteration: 128129
loss: 0.9882917404174805,grad_norm: 0.9999994786915009, iteration: 128130
loss: 0.9935930371284485,grad_norm: 0.9999991312622323, iteration: 128131
loss: 0.9562960863113403,grad_norm: 0.9999991180851425, iteration: 128132
loss: 1.0508191585540771,grad_norm: 0.9999998893228583, iteration: 128133
loss: 1.1989535093307495,grad_norm: 0.9999997876954207, iteration: 128134
loss: 1.046281099319458,grad_norm: 0.9999998324526733, iteration: 128135
loss: 1.0526564121246338,grad_norm: 0.9999992054803762, iteration: 128136
loss: 1.013663649559021,grad_norm: 0.999999290029085, iteration: 128137
loss: 1.1764988899230957,grad_norm: 0.9999992284410648, iteration: 128138
loss: 1.010274052619934,grad_norm: 0.979023077782952, iteration: 128139
loss: 1.0098276138305664,grad_norm: 0.9999995074879, iteration: 128140
loss: 1.0101486444473267,grad_norm: 0.9999992191894527, iteration: 128141
loss: 1.1926136016845703,grad_norm: 0.9999998915107899, iteration: 128142
loss: 1.0373852252960205,grad_norm: 0.9999989488924124, iteration: 128143
loss: 1.0202572345733643,grad_norm: 0.9999994918629364, iteration: 128144
loss: 1.0850112438201904,grad_norm: 0.9999998644019807, iteration: 128145
loss: 1.5377286672592163,grad_norm: 0.999999885869603, iteration: 128146
loss: 1.0545094013214111,grad_norm: 0.9999990086387947, iteration: 128147
loss: 1.2045717239379883,grad_norm: 0.9999998940283952, iteration: 128148
loss: 1.0074410438537598,grad_norm: 0.983251635198938, iteration: 128149
loss: 1.0111490488052368,grad_norm: 0.9007112471501282, iteration: 128150
loss: 1.0303847789764404,grad_norm: 0.9999992003465452, iteration: 128151
loss: 1.0007542371749878,grad_norm: 0.9280252584556345, iteration: 128152
loss: 0.9885231256484985,grad_norm: 0.92361168340743, iteration: 128153
loss: 1.0094726085662842,grad_norm: 0.9999991194483042, iteration: 128154
loss: 1.0749694108963013,grad_norm: 0.9999994778026857, iteration: 128155
loss: 1.0138112306594849,grad_norm: 0.999999395391992, iteration: 128156
loss: 0.9832167625427246,grad_norm: 0.9999991920720507, iteration: 128157
loss: 1.0235097408294678,grad_norm: 0.9999992507426103, iteration: 128158
loss: 0.9907214045524597,grad_norm: 0.960595980732552, iteration: 128159
loss: 1.0180085897445679,grad_norm: 0.9999993894092243, iteration: 128160
loss: 1.0699174404144287,grad_norm: 0.9999997754553681, iteration: 128161
loss: 1.0102235078811646,grad_norm: 0.9761134860062602, iteration: 128162
loss: 1.0236496925354004,grad_norm: 0.9999992162357735, iteration: 128163
loss: 1.0450228452682495,grad_norm: 0.9999990809221196, iteration: 128164
loss: 0.9915737509727478,grad_norm: 0.9999991465372595, iteration: 128165
loss: 1.0529578924179077,grad_norm: 0.9999993572402687, iteration: 128166
loss: 0.9825783967971802,grad_norm: 0.9999992649953333, iteration: 128167
loss: 1.0000959634780884,grad_norm: 0.999999304610899, iteration: 128168
loss: 0.9859557151794434,grad_norm: 0.9999990678278218, iteration: 128169
loss: 1.0424367189407349,grad_norm: 0.9999996345748434, iteration: 128170
loss: 1.0798534154891968,grad_norm: 0.9999990529253442, iteration: 128171
loss: 0.9913071393966675,grad_norm: 0.9999990193763356, iteration: 128172
loss: 1.3155897855758667,grad_norm: 0.9999995835739734, iteration: 128173
loss: 0.9812827110290527,grad_norm: 0.9999998570301261, iteration: 128174
loss: 0.9967983961105347,grad_norm: 0.9999996291676958, iteration: 128175
loss: 0.9865675568580627,grad_norm: 0.9167795605073982, iteration: 128176
loss: 1.0933607816696167,grad_norm: 0.9999999366142792, iteration: 128177
loss: 1.028541922569275,grad_norm: 0.9999991183474074, iteration: 128178
loss: 0.9883155226707458,grad_norm: 0.9363711493545743, iteration: 128179
loss: 1.0069246292114258,grad_norm: 0.9999990067215769, iteration: 128180
loss: 1.028359293937683,grad_norm: 0.9799005819702483, iteration: 128181
loss: 0.980357825756073,grad_norm: 0.9999991580409061, iteration: 128182
loss: 1.0933457612991333,grad_norm: 0.9999996583844165, iteration: 128183
loss: 1.0312756299972534,grad_norm: 0.9999995069245811, iteration: 128184
loss: 1.0250331163406372,grad_norm: 0.9999992618858055, iteration: 128185
loss: 0.9768242239952087,grad_norm: 0.9999990912786171, iteration: 128186
loss: 1.0046682357788086,grad_norm: 0.9617956599946195, iteration: 128187
loss: 0.9977288246154785,grad_norm: 0.9999992800680032, iteration: 128188
loss: 0.9504124522209167,grad_norm: 0.9350269178226543, iteration: 128189
loss: 1.0520515441894531,grad_norm: 0.9999997344057638, iteration: 128190
loss: 1.0627949237823486,grad_norm: 0.9999998092290346, iteration: 128191
loss: 1.0011980533599854,grad_norm: 0.9874854555615944, iteration: 128192
loss: 1.0397560596466064,grad_norm: 0.9999990493447176, iteration: 128193
loss: 1.0171966552734375,grad_norm: 0.8722113661497671, iteration: 128194
loss: 0.991902232170105,grad_norm: 0.9999998027608242, iteration: 128195
loss: 1.0050026178359985,grad_norm: 0.8971090205852716, iteration: 128196
loss: 0.9748455286026001,grad_norm: 0.9911827627327378, iteration: 128197
loss: 1.0353275537490845,grad_norm: 0.9999991539573981, iteration: 128198
loss: 1.0014190673828125,grad_norm: 0.9999990529181038, iteration: 128199
loss: 1.011221170425415,grad_norm: 0.9603998857538898, iteration: 128200
loss: 1.0416311025619507,grad_norm: 0.99999915481735, iteration: 128201
loss: 0.992241382598877,grad_norm: 0.9999991032682126, iteration: 128202
loss: 1.042062759399414,grad_norm: 0.999999199779728, iteration: 128203
loss: 0.9751467704772949,grad_norm: 0.9999992546408734, iteration: 128204
loss: 0.9823172092437744,grad_norm: 0.9999990801692731, iteration: 128205
loss: 1.2067216634750366,grad_norm: 0.9999998247002827, iteration: 128206
loss: 1.0120131969451904,grad_norm: 0.952266657286058, iteration: 128207
loss: 1.003129005432129,grad_norm: 0.908480537052996, iteration: 128208
loss: 0.9922035336494446,grad_norm: 0.9999990400748101, iteration: 128209
loss: 1.0701550245285034,grad_norm: 0.9999990782733137, iteration: 128210
loss: 1.333266019821167,grad_norm: 0.999999783065303, iteration: 128211
loss: 1.0064833164215088,grad_norm: 0.9999991471423304, iteration: 128212
loss: 0.9769344925880432,grad_norm: 0.9999991002990887, iteration: 128213
loss: 1.027158260345459,grad_norm: 0.9999992705413161, iteration: 128214
loss: 1.0066167116165161,grad_norm: 0.9999999391919498, iteration: 128215
loss: 0.9743707776069641,grad_norm: 0.9868985447249896, iteration: 128216
loss: 0.992480456829071,grad_norm: 0.8889178966446338, iteration: 128217
loss: 1.0161882638931274,grad_norm: 0.9196383027824031, iteration: 128218
loss: 1.0707823038101196,grad_norm: 0.9999999856936984, iteration: 128219
loss: 0.9873998165130615,grad_norm: 0.9999991154762781, iteration: 128220
loss: 1.013307809829712,grad_norm: 0.9999991869088188, iteration: 128221
loss: 0.9766761660575867,grad_norm: 0.999998960781745, iteration: 128222
loss: 1.0317150354385376,grad_norm: 0.9999990721873251, iteration: 128223
loss: 1.0773195028305054,grad_norm: 0.9999995448386302, iteration: 128224
loss: 1.0157653093338013,grad_norm: 0.999999218205093, iteration: 128225
loss: 0.9950163960456848,grad_norm: 0.8368432803936527, iteration: 128226
loss: 0.966730535030365,grad_norm: 0.92531131594345, iteration: 128227
loss: 1.0040749311447144,grad_norm: 0.9999992609194205, iteration: 128228
loss: 0.982294499874115,grad_norm: 0.9999990265438478, iteration: 128229
loss: 0.9834645986557007,grad_norm: 0.999998971253947, iteration: 128230
loss: 1.0159127712249756,grad_norm: 0.8116537896633332, iteration: 128231
loss: 0.9996819496154785,grad_norm: 0.9999989968284057, iteration: 128232
loss: 0.9940657615661621,grad_norm: 0.9227714053815056, iteration: 128233
loss: 1.0169843435287476,grad_norm: 0.9999992568177593, iteration: 128234
loss: 1.0037482976913452,grad_norm: 0.999999151245038, iteration: 128235
loss: 0.9794452786445618,grad_norm: 0.99999912446186, iteration: 128236
loss: 1.0495049953460693,grad_norm: 0.9999990539182924, iteration: 128237
loss: 1.0167012214660645,grad_norm: 0.9603217339296838, iteration: 128238
loss: 0.9925653338432312,grad_norm: 0.9999990011622444, iteration: 128239
loss: 0.9896910786628723,grad_norm: 0.9925710032167022, iteration: 128240
loss: 0.9675248861312866,grad_norm: 0.9999991782600813, iteration: 128241
loss: 0.9722336530685425,grad_norm: 0.9999990534713445, iteration: 128242
loss: 0.9632439017295837,grad_norm: 0.9495967715777274, iteration: 128243
loss: 0.9292242527008057,grad_norm: 0.9999993338751233, iteration: 128244
loss: 1.0009649991989136,grad_norm: 0.9999990871359146, iteration: 128245
loss: 0.990470826625824,grad_norm: 0.9999991624761886, iteration: 128246
loss: 0.9798464179039001,grad_norm: 0.9999991139639439, iteration: 128247
loss: 1.0006804466247559,grad_norm: 0.9999991836110498, iteration: 128248
loss: 1.005824089050293,grad_norm: 0.999999200193047, iteration: 128249
loss: 0.9818161129951477,grad_norm: 0.9999991815956915, iteration: 128250
loss: 1.0027222633361816,grad_norm: 0.9999991620765457, iteration: 128251
loss: 1.0174381732940674,grad_norm: 0.9999990339242076, iteration: 128252
loss: 1.0168063640594482,grad_norm: 0.9999991881456651, iteration: 128253
loss: 1.0383645296096802,grad_norm: 0.9999993177469038, iteration: 128254
loss: 1.0072463750839233,grad_norm: 0.9999990841990026, iteration: 128255
loss: 1.0126546621322632,grad_norm: 0.9999991077225864, iteration: 128256
loss: 0.9818387031555176,grad_norm: 0.9999992617830666, iteration: 128257
loss: 0.9769704341888428,grad_norm: 0.9999991232126771, iteration: 128258
loss: 0.9950792789459229,grad_norm: 0.9999990254815225, iteration: 128259
loss: 1.008078694343567,grad_norm: 0.9999991393565252, iteration: 128260
loss: 0.9868248105049133,grad_norm: 0.9999990325281973, iteration: 128261
loss: 0.980918824672699,grad_norm: 0.9096821902977499, iteration: 128262
loss: 0.9942302703857422,grad_norm: 0.9999991650660109, iteration: 128263
loss: 0.993502676486969,grad_norm: 0.9999992679973995, iteration: 128264
loss: 1.0410549640655518,grad_norm: 0.9999993651619719, iteration: 128265
loss: 1.0024960041046143,grad_norm: 0.9999994940562263, iteration: 128266
loss: 0.9820435643196106,grad_norm: 0.999999009881449, iteration: 128267
loss: 0.9960336685180664,grad_norm: 0.9999991467484732, iteration: 128268
loss: 1.005170226097107,grad_norm: 0.9999991154198481, iteration: 128269
loss: 1.004140019416809,grad_norm: 0.999999263227401, iteration: 128270
loss: 0.9632864594459534,grad_norm: 0.9999993355210062, iteration: 128271
loss: 0.9938592910766602,grad_norm: 0.9999991925364883, iteration: 128272
loss: 1.0236992835998535,grad_norm: 0.9999991038009451, iteration: 128273
loss: 0.981273889541626,grad_norm: 0.9999991399592498, iteration: 128274
loss: 1.0726312398910522,grad_norm: 0.9999993018888055, iteration: 128275
loss: 1.044560432434082,grad_norm: 0.9999992142372115, iteration: 128276
loss: 0.9833844304084778,grad_norm: 0.8474928752562318, iteration: 128277
loss: 1.0127580165863037,grad_norm: 0.9927850316242288, iteration: 128278
loss: 0.9856892228126526,grad_norm: 0.865659229556369, iteration: 128279
loss: 1.048902988433838,grad_norm: 0.9408721809930537, iteration: 128280
loss: 0.9724084734916687,grad_norm: 0.8637316141730612, iteration: 128281
loss: 1.0179646015167236,grad_norm: 0.9999991154052823, iteration: 128282
loss: 0.9862350225448608,grad_norm: 0.908089026267649, iteration: 128283
loss: 1.0319476127624512,grad_norm: 0.9567705529554885, iteration: 128284
loss: 1.0041613578796387,grad_norm: 0.9918432563249641, iteration: 128285
loss: 1.0238559246063232,grad_norm: 0.9999991752903515, iteration: 128286
loss: 0.97801673412323,grad_norm: 0.9999992255247744, iteration: 128287
loss: 1.0022464990615845,grad_norm: 0.9323518611392058, iteration: 128288
loss: 1.0502405166625977,grad_norm: 0.9999995702005836, iteration: 128289
loss: 1.0084419250488281,grad_norm: 0.9678314419997397, iteration: 128290
loss: 1.0004782676696777,grad_norm: 0.9999991831897531, iteration: 128291
loss: 0.9944117665290833,grad_norm: 0.9433743630311212, iteration: 128292
loss: 1.0245532989501953,grad_norm: 0.9999991769617584, iteration: 128293
loss: 1.0378713607788086,grad_norm: 0.9999997976657669, iteration: 128294
loss: 0.9947493076324463,grad_norm: 0.9999991920585042, iteration: 128295
loss: 1.0127373933792114,grad_norm: 0.8830736988478206, iteration: 128296
loss: 0.9880308508872986,grad_norm: 0.9864207007398711, iteration: 128297
loss: 0.9947555065155029,grad_norm: 0.9999990750951145, iteration: 128298
loss: 1.0141578912734985,grad_norm: 0.9249532630367441, iteration: 128299
loss: 1.0140031576156616,grad_norm: 0.9207774502684822, iteration: 128300
loss: 1.0352882146835327,grad_norm: 0.9999995408563738, iteration: 128301
loss: 0.955031156539917,grad_norm: 0.9642124607759694, iteration: 128302
loss: 0.9831385612487793,grad_norm: 0.999999181355867, iteration: 128303
loss: 0.9821556210517883,grad_norm: 0.99999907895877, iteration: 128304
loss: 0.9936046004295349,grad_norm: 0.8612949250917197, iteration: 128305
loss: 1.0189709663391113,grad_norm: 0.9015202634848515, iteration: 128306
loss: 0.9846410155296326,grad_norm: 0.9999989485306527, iteration: 128307
loss: 0.9950657486915588,grad_norm: 0.9201062005057414, iteration: 128308
loss: 0.9819462299346924,grad_norm: 0.9999992848894567, iteration: 128309
loss: 1.0175553560256958,grad_norm: 0.9999990805652555, iteration: 128310
loss: 0.9834346771240234,grad_norm: 0.9021585773901908, iteration: 128311
loss: 1.0007729530334473,grad_norm: 0.9999992084964839, iteration: 128312
loss: 0.99225914478302,grad_norm: 0.9999991868208499, iteration: 128313
loss: 0.9864808917045593,grad_norm: 0.9944970686188364, iteration: 128314
loss: 1.033896803855896,grad_norm: 0.9879344538070923, iteration: 128315
loss: 0.9932174682617188,grad_norm: 0.9999990950904936, iteration: 128316
loss: 0.9726328253746033,grad_norm: 0.9999994199231776, iteration: 128317
loss: 0.983957827091217,grad_norm: 0.9926349375976666, iteration: 128318
loss: 1.005038857460022,grad_norm: 0.9999991981693669, iteration: 128319
loss: 1.038699746131897,grad_norm: 0.9515813012453557, iteration: 128320
loss: 0.9866113066673279,grad_norm: 0.9721192849013554, iteration: 128321
loss: 0.9870527982711792,grad_norm: 0.9999990329694899, iteration: 128322
loss: 1.0124938488006592,grad_norm: 0.9264020094148343, iteration: 128323
loss: 1.0121835470199585,grad_norm: 0.9999991970035846, iteration: 128324
loss: 1.0091638565063477,grad_norm: 0.9918326948606714, iteration: 128325
loss: 0.9652960896492004,grad_norm: 0.9999995733836943, iteration: 128326
loss: 1.006587266921997,grad_norm: 0.9999991062551514, iteration: 128327
loss: 1.000351071357727,grad_norm: 0.8909476058849033, iteration: 128328
loss: 0.9958069324493408,grad_norm: 0.922539802158, iteration: 128329
loss: 1.043340802192688,grad_norm: 0.999999440017063, iteration: 128330
loss: 1.0049911737442017,grad_norm: 0.9999992437513583, iteration: 128331
loss: 0.9832773208618164,grad_norm: 0.982552362279253, iteration: 128332
loss: 1.0365862846374512,grad_norm: 0.9999991512256347, iteration: 128333
loss: 1.007846474647522,grad_norm: 0.9999990694215655, iteration: 128334
loss: 0.9750319719314575,grad_norm: 0.9150427007044715, iteration: 128335
loss: 1.0308505296707153,grad_norm: 0.9999990968483169, iteration: 128336
loss: 1.0142439603805542,grad_norm: 0.9950234256467344, iteration: 128337
loss: 0.9712004065513611,grad_norm: 0.9999991292883651, iteration: 128338
loss: 1.0012331008911133,grad_norm: 0.9999990772731611, iteration: 128339
loss: 1.0220905542373657,grad_norm: 0.9025175023266819, iteration: 128340
loss: 1.0004961490631104,grad_norm: 0.9999998843559001, iteration: 128341
loss: 0.9983844757080078,grad_norm: 0.9516816749218957, iteration: 128342
loss: 1.0543906688690186,grad_norm: 0.9999995730829455, iteration: 128343
loss: 0.9802547693252563,grad_norm: 0.999999169191376, iteration: 128344
loss: 0.9871875047683716,grad_norm: 0.9987374139388229, iteration: 128345
loss: 0.997390627861023,grad_norm: 0.9999991935444963, iteration: 128346
loss: 1.015999674797058,grad_norm: 0.8460210617430011, iteration: 128347
loss: 1.0452827215194702,grad_norm: 0.999999077503919, iteration: 128348
loss: 1.037768006324768,grad_norm: 0.8732673524248522, iteration: 128349
loss: 1.1696897745132446,grad_norm: 0.9999991559789163, iteration: 128350
loss: 0.98851078748703,grad_norm: 0.9582585496754229, iteration: 128351
loss: 1.067384123802185,grad_norm: 0.9999991368328014, iteration: 128352
loss: 0.9870210886001587,grad_norm: 0.987164778764797, iteration: 128353
loss: 0.972589910030365,grad_norm: 0.9731648717634869, iteration: 128354
loss: 1.0855377912521362,grad_norm: 0.9999996456555096, iteration: 128355
loss: 0.9841805100440979,grad_norm: 0.9999992323759082, iteration: 128356
loss: 1.0095982551574707,grad_norm: 0.9855851007151196, iteration: 128357
loss: 1.0112330913543701,grad_norm: 0.9774792392976903, iteration: 128358
loss: 1.0229425430297852,grad_norm: 0.9999990995532101, iteration: 128359
loss: 0.9608795046806335,grad_norm: 0.9999990638634607, iteration: 128360
loss: 1.0186021327972412,grad_norm: 0.9999990515127692, iteration: 128361
loss: 1.033461332321167,grad_norm: 0.9999991937769037, iteration: 128362
loss: 0.9297223687171936,grad_norm: 0.9768698888701225, iteration: 128363
loss: 0.976578414440155,grad_norm: 0.9509386473134315, iteration: 128364
loss: 0.9974459409713745,grad_norm: 0.9999990461146643, iteration: 128365
loss: 1.04110586643219,grad_norm: 0.9999990042166343, iteration: 128366
loss: 1.035671353340149,grad_norm: 0.9999991092185324, iteration: 128367
loss: 1.0136289596557617,grad_norm: 0.9999991131762944, iteration: 128368
loss: 0.9807302355766296,grad_norm: 0.9554142376873871, iteration: 128369
loss: 1.0281161069869995,grad_norm: 0.999999805240174, iteration: 128370
loss: 1.0256832838058472,grad_norm: 0.9999989913895706, iteration: 128371
loss: 0.9714154005050659,grad_norm: 0.9999993354601714, iteration: 128372
loss: 1.015329122543335,grad_norm: 0.9999990423402404, iteration: 128373
loss: 0.9829054474830627,grad_norm: 0.9908674354975142, iteration: 128374
loss: 0.9896152019500732,grad_norm: 0.999999094412863, iteration: 128375
loss: 1.0246233940124512,grad_norm: 0.9999992475283792, iteration: 128376
loss: 0.9778481125831604,grad_norm: 0.937806203629319, iteration: 128377
loss: 1.0063554048538208,grad_norm: 0.779165808932631, iteration: 128378
loss: 0.985447108745575,grad_norm: 0.9999990825354235, iteration: 128379
loss: 1.0216107368469238,grad_norm: 0.9999997140204807, iteration: 128380
loss: 1.0098263025283813,grad_norm: 0.9999990043630211, iteration: 128381
loss: 1.0028717517852783,grad_norm: 0.8850484220813346, iteration: 128382
loss: 0.9552940130233765,grad_norm: 0.8732177830491534, iteration: 128383
loss: 1.0295212268829346,grad_norm: 0.9815670545339635, iteration: 128384
loss: 1.0053800344467163,grad_norm: 0.9999991532467434, iteration: 128385
loss: 0.9743028879165649,grad_norm: 0.9395308676544606, iteration: 128386
loss: 1.0195072889328003,grad_norm: 0.9718893084646323, iteration: 128387
loss: 1.0110188722610474,grad_norm: 0.9999989948904869, iteration: 128388
loss: 1.0073758363723755,grad_norm: 0.8353386452125044, iteration: 128389
loss: 1.0093127489089966,grad_norm: 0.9335857041430224, iteration: 128390
loss: 0.9673399925231934,grad_norm: 0.8847172214477677, iteration: 128391
loss: 1.142939567565918,grad_norm: 0.9999992018818582, iteration: 128392
loss: 0.9931319355964661,grad_norm: 0.8882169615365584, iteration: 128393
loss: 1.0208126306533813,grad_norm: 0.8762102647796698, iteration: 128394
loss: 0.9807313680648804,grad_norm: 0.9435060109794767, iteration: 128395
loss: 1.141508936882019,grad_norm: 0.9999998462136479, iteration: 128396
loss: 0.9574389457702637,grad_norm: 0.9999990502591288, iteration: 128397
loss: 1.082980990409851,grad_norm: 0.9614724483368415, iteration: 128398
loss: 0.9768307209014893,grad_norm: 0.9999991400792403, iteration: 128399
loss: 1.0572757720947266,grad_norm: 0.999999218377184, iteration: 128400
loss: 1.0031111240386963,grad_norm: 0.9999991449844503, iteration: 128401
loss: 1.0230662822723389,grad_norm: 0.9999995796779302, iteration: 128402
loss: 1.0126855373382568,grad_norm: 0.9999990703265836, iteration: 128403
loss: 0.983396589756012,grad_norm: 0.9982083308444339, iteration: 128404
loss: 0.9857708811759949,grad_norm: 0.9999991846116868, iteration: 128405
loss: 0.9819284081459045,grad_norm: 0.9035026881505306, iteration: 128406
loss: 1.028972864151001,grad_norm: 0.9862858658682868, iteration: 128407
loss: 1.0191981792449951,grad_norm: 0.9435509901007448, iteration: 128408
loss: 1.0235987901687622,grad_norm: 0.9999991230476749, iteration: 128409
loss: 1.0050793886184692,grad_norm: 0.9999997185948896, iteration: 128410
loss: 0.9661890864372253,grad_norm: 0.9999990350486113, iteration: 128411
loss: 1.0080562829971313,grad_norm: 0.9999991107961884, iteration: 128412
loss: 0.9861448407173157,grad_norm: 0.9999990715433328, iteration: 128413
loss: 1.0072035789489746,grad_norm: 0.9999992588229761, iteration: 128414
loss: 0.9596655964851379,grad_norm: 0.9089377397276056, iteration: 128415
loss: 0.9942097663879395,grad_norm: 0.9999991080909845, iteration: 128416
loss: 1.004343867301941,grad_norm: 0.999999260061679, iteration: 128417
loss: 0.9870186448097229,grad_norm: 0.9999992172096979, iteration: 128418
loss: 0.972402811050415,grad_norm: 0.9999990391956921, iteration: 128419
loss: 0.9842309951782227,grad_norm: 0.9925173767484063, iteration: 128420
loss: 0.9883231520652771,grad_norm: 0.9999990471195151, iteration: 128421
loss: 1.0148167610168457,grad_norm: 0.9999992846321775, iteration: 128422
loss: 1.0000112056732178,grad_norm: 0.8406822357023962, iteration: 128423
loss: 1.0658749341964722,grad_norm: 0.999999415304282, iteration: 128424
loss: 0.9856694936752319,grad_norm: 0.9999991702102328, iteration: 128425
loss: 1.186363935470581,grad_norm: 0.999999837178755, iteration: 128426
loss: 1.0212969779968262,grad_norm: 0.9999991180378884, iteration: 128427
loss: 1.0125722885131836,grad_norm: 0.9300753802943057, iteration: 128428
loss: 1.0135929584503174,grad_norm: 0.9999991175292948, iteration: 128429
loss: 0.9933059215545654,grad_norm: 0.9999992920153778, iteration: 128430
loss: 1.0212383270263672,grad_norm: 0.9840837415950807, iteration: 128431
loss: 1.0200779438018799,grad_norm: 0.9999991343053963, iteration: 128432
loss: 1.0155235528945923,grad_norm: 0.9999992713558825, iteration: 128433
loss: 0.9679134488105774,grad_norm: 0.9289692494721189, iteration: 128434
loss: 1.0662682056427002,grad_norm: 0.9999990880329028, iteration: 128435
loss: 1.0438730716705322,grad_norm: 0.9999990832544793, iteration: 128436
loss: 0.9705498218536377,grad_norm: 0.9726129156422331, iteration: 128437
loss: 1.0024700164794922,grad_norm: 0.9999994217864437, iteration: 128438
loss: 1.0223792791366577,grad_norm: 0.9999991059427923, iteration: 128439
loss: 1.0200657844543457,grad_norm: 0.9796080635026508, iteration: 128440
loss: 1.0412001609802246,grad_norm: 0.9999995321619938, iteration: 128441
loss: 0.9818358421325684,grad_norm: 0.9999989893066756, iteration: 128442
loss: 0.992706835269928,grad_norm: 0.9999992542055617, iteration: 128443
loss: 1.007382869720459,grad_norm: 0.9999993998265465, iteration: 128444
loss: 0.9917465448379517,grad_norm: 0.9940372218328414, iteration: 128445
loss: 1.0653783082962036,grad_norm: 0.9999991672862902, iteration: 128446
loss: 1.0115869045257568,grad_norm: 0.9999990248693608, iteration: 128447
loss: 0.9993528127670288,grad_norm: 0.999999309464718, iteration: 128448
loss: 0.9815152883529663,grad_norm: 0.8495639559171119, iteration: 128449
loss: 1.0086420774459839,grad_norm: 0.9374266604442283, iteration: 128450
loss: 0.9589729905128479,grad_norm: 0.9999990960187162, iteration: 128451
loss: 0.9932966828346252,grad_norm: 0.9999992489331676, iteration: 128452
loss: 0.9426541924476624,grad_norm: 0.9999992427992681, iteration: 128453
loss: 1.0519282817840576,grad_norm: 0.9999993434587842, iteration: 128454
loss: 0.9973277449607849,grad_norm: 0.9999990843453862, iteration: 128455
loss: 0.9944679737091064,grad_norm: 0.90242195467947, iteration: 128456
loss: 0.9904366731643677,grad_norm: 0.9649649665813612, iteration: 128457
loss: 1.0142735242843628,grad_norm: 0.9999992055575289, iteration: 128458
loss: 1.0315577983856201,grad_norm: 0.9999994298901572, iteration: 128459
loss: 0.9759977459907532,grad_norm: 0.9999991627365982, iteration: 128460
loss: 0.9720156192779541,grad_norm: 0.981058430882133, iteration: 128461
loss: 0.9750174283981323,grad_norm: 0.999999090032133, iteration: 128462
loss: 1.021378993988037,grad_norm: 0.9999991990253729, iteration: 128463
loss: 1.0540217161178589,grad_norm: 0.9999996727132384, iteration: 128464
loss: 0.9687473773956299,grad_norm: 0.9842629233262932, iteration: 128465
loss: 1.0280903577804565,grad_norm: 0.9921641068423935, iteration: 128466
loss: 1.00519859790802,grad_norm: 0.9999991678366577, iteration: 128467
loss: 1.036880373954773,grad_norm: 0.885055938155715, iteration: 128468
loss: 1.0144163370132446,grad_norm: 0.9999992348208965, iteration: 128469
loss: 1.0109821557998657,grad_norm: 0.9217632272296304, iteration: 128470
loss: 0.9978863000869751,grad_norm: 0.9999990834153181, iteration: 128471
loss: 1.0337245464324951,grad_norm: 0.9999989379919694, iteration: 128472
loss: 0.9860981106758118,grad_norm: 0.9336442975842959, iteration: 128473
loss: 1.045357346534729,grad_norm: 0.9999992389192951, iteration: 128474
loss: 0.9887233972549438,grad_norm: 0.9999991755869762, iteration: 128475
loss: 0.9655263423919678,grad_norm: 0.999999017026767, iteration: 128476
loss: 1.0074750185012817,grad_norm: 0.9999993635806484, iteration: 128477
loss: 1.0381348133087158,grad_norm: 0.9999996722287093, iteration: 128478
loss: 0.992409884929657,grad_norm: 0.9996544156457486, iteration: 128479
loss: 0.9887945652008057,grad_norm: 0.999999556848287, iteration: 128480
loss: 0.980756402015686,grad_norm: 0.9999990601953103, iteration: 128481
loss: 1.0001481771469116,grad_norm: 0.9999992176388091, iteration: 128482
loss: 0.9795955419540405,grad_norm: 0.9999990518287929, iteration: 128483
loss: 0.9700241088867188,grad_norm: 0.9141775619549901, iteration: 128484
loss: 0.9867222905158997,grad_norm: 0.9424124311523361, iteration: 128485
loss: 1.0257151126861572,grad_norm: 0.999999436673859, iteration: 128486
loss: 0.9730259776115417,grad_norm: 0.869816588840733, iteration: 128487
loss: 1.0010313987731934,grad_norm: 0.9999992645407709, iteration: 128488
loss: 1.0098845958709717,grad_norm: 0.9239979900805204, iteration: 128489
loss: 1.0160530805587769,grad_norm: 0.9999991737256196, iteration: 128490
loss: 0.9909399747848511,grad_norm: 0.9999990504607701, iteration: 128491
loss: 1.0295212268829346,grad_norm: 0.9999998775792729, iteration: 128492
loss: 1.02592134475708,grad_norm: 0.9999992110046949, iteration: 128493
loss: 0.9887552261352539,grad_norm: 0.999999258699445, iteration: 128494
loss: 1.0283390283584595,grad_norm: 0.9724145699361147, iteration: 128495
loss: 1.000388264656067,grad_norm: 0.9514679497084757, iteration: 128496
loss: 0.9732801914215088,grad_norm: 0.9999991382051276, iteration: 128497
loss: 0.9956984519958496,grad_norm: 0.926131921317272, iteration: 128498
loss: 0.9968070387840271,grad_norm: 0.9999991027704372, iteration: 128499
loss: 0.9727368354797363,grad_norm: 0.9999990217360103, iteration: 128500
loss: 1.015798807144165,grad_norm: 0.9999990471055001, iteration: 128501
loss: 0.9655691385269165,grad_norm: 0.9999990332610234, iteration: 128502
loss: 1.0373493432998657,grad_norm: 0.9999992171067302, iteration: 128503
loss: 1.0235813856124878,grad_norm: 0.9999990317937468, iteration: 128504
loss: 0.995284914970398,grad_norm: 0.9999989609237211, iteration: 128505
loss: 1.0810920000076294,grad_norm: 0.9999992014564058, iteration: 128506
loss: 1.000970721244812,grad_norm: 0.9999992607968248, iteration: 128507
loss: 0.9588291645050049,grad_norm: 0.9390755562400923, iteration: 128508
loss: 1.0154321193695068,grad_norm: 0.9999990873076791, iteration: 128509
loss: 0.9888213276863098,grad_norm: 0.9713380006359147, iteration: 128510
loss: 1.0013571977615356,grad_norm: 0.9999990427948832, iteration: 128511
loss: 0.9926410913467407,grad_norm: 0.8787105326147774, iteration: 128512
loss: 1.0125670433044434,grad_norm: 0.999999260052949, iteration: 128513
loss: 0.9790596961975098,grad_norm: 0.9999995287587209, iteration: 128514
loss: 0.9666765928268433,grad_norm: 0.9999992018430943, iteration: 128515
loss: 0.9793375134468079,grad_norm: 0.9923236698471274, iteration: 128516
loss: 1.0119428634643555,grad_norm: 0.7890501156537759, iteration: 128517
loss: 0.999122679233551,grad_norm: 0.9737631651570081, iteration: 128518
loss: 0.9764444231987,grad_norm: 0.9999990558908359, iteration: 128519
loss: 1.0041900873184204,grad_norm: 0.9999991455534324, iteration: 128520
loss: 0.99897700548172,grad_norm: 0.9999990346304815, iteration: 128521
loss: 1.029492735862732,grad_norm: 0.944209194228364, iteration: 128522
loss: 0.9973805546760559,grad_norm: 0.9999992639161355, iteration: 128523
loss: 1.0101571083068848,grad_norm: 0.999999120951827, iteration: 128524
loss: 1.0036978721618652,grad_norm: 0.9603527726433476, iteration: 128525
loss: 1.0332144498825073,grad_norm: 0.8598672687328703, iteration: 128526
loss: 1.0279852151870728,grad_norm: 0.9999996278862165, iteration: 128527
loss: 0.9851090908050537,grad_norm: 0.9999991259021193, iteration: 128528
loss: 0.992818295955658,grad_norm: 0.9884997255846095, iteration: 128529
loss: 0.9923744201660156,grad_norm: 0.9999992816970179, iteration: 128530
loss: 1.0221906900405884,grad_norm: 0.9436606131492595, iteration: 128531
loss: 1.0552650690078735,grad_norm: 0.906595879937335, iteration: 128532
loss: 1.018620252609253,grad_norm: 0.9947998038077702, iteration: 128533
loss: 1.0216586589813232,grad_norm: 0.9915754567790157, iteration: 128534
loss: 0.9869510531425476,grad_norm: 0.9546497734453097, iteration: 128535
loss: 1.0247344970703125,grad_norm: 0.999999102860676, iteration: 128536
loss: 1.069166660308838,grad_norm: 0.9999992487343599, iteration: 128537
loss: 1.009970784187317,grad_norm: 0.9999992434429686, iteration: 128538
loss: 0.9796507954597473,grad_norm: 0.9999990932957223, iteration: 128539
loss: 1.0325355529785156,grad_norm: 0.999999628318087, iteration: 128540
loss: 0.9865175485610962,grad_norm: 0.9999990937234746, iteration: 128541
loss: 1.001689076423645,grad_norm: 0.8906210215997303, iteration: 128542
loss: 0.943467378616333,grad_norm: 0.9999991357067955, iteration: 128543
loss: 1.0500900745391846,grad_norm: 0.9999991581441694, iteration: 128544
loss: 0.9805238842964172,grad_norm: 0.9999990835997182, iteration: 128545
loss: 0.9926608204841614,grad_norm: 0.9999992053211122, iteration: 128546
loss: 1.0179507732391357,grad_norm: 0.999999524300277, iteration: 128547
loss: 0.9692368507385254,grad_norm: 0.9999990619522962, iteration: 128548
loss: 1.0298231840133667,grad_norm: 0.9999991414502029, iteration: 128549
loss: 0.9691858887672424,grad_norm: 0.9268704885621675, iteration: 128550
loss: 1.007060170173645,grad_norm: 0.999999823436588, iteration: 128551
loss: 1.0393272638320923,grad_norm: 0.8704154352894142, iteration: 128552
loss: 1.0217620134353638,grad_norm: 0.9999991582051708, iteration: 128553
loss: 0.9960530996322632,grad_norm: 0.9999992651528948, iteration: 128554
loss: 0.9678487181663513,grad_norm: 0.9999991457128292, iteration: 128555
loss: 0.9955034852027893,grad_norm: 0.93683651503487, iteration: 128556
loss: 0.9978297352790833,grad_norm: 0.9999991897075937, iteration: 128557
loss: 1.0228930711746216,grad_norm: 0.8178553479612812, iteration: 128558
loss: 1.014267086982727,grad_norm: 0.9999990621378941, iteration: 128559
loss: 1.0129457712173462,grad_norm: 0.923851664618762, iteration: 128560
loss: 0.9996792674064636,grad_norm: 0.9179425476788432, iteration: 128561
loss: 1.0031390190124512,grad_norm: 0.9999991236779056, iteration: 128562
loss: 1.0627856254577637,grad_norm: 0.999999305776896, iteration: 128563
loss: 0.9672895669937134,grad_norm: 0.8945784818365713, iteration: 128564
loss: 1.0000240802764893,grad_norm: 0.999999137920993, iteration: 128565
loss: 1.1441972255706787,grad_norm: 0.9999999248734666, iteration: 128566
loss: 0.9881292581558228,grad_norm: 0.9999989816227571, iteration: 128567
loss: 1.0176048278808594,grad_norm: 0.9999991382584339, iteration: 128568
loss: 1.018760085105896,grad_norm: 0.9999999614953741, iteration: 128569
loss: 1.0274012088775635,grad_norm: 0.9800111043226571, iteration: 128570
loss: 0.9778818488121033,grad_norm: 0.9999991715843816, iteration: 128571
loss: 1.011717438697815,grad_norm: 0.9652543210192919, iteration: 128572
loss: 1.0086870193481445,grad_norm: 0.9999991606663953, iteration: 128573
loss: 0.9758407473564148,grad_norm: 0.9999991223179954, iteration: 128574
loss: 1.0005336999893188,grad_norm: 0.9999992242174048, iteration: 128575
loss: 1.0287519693374634,grad_norm: 0.9999990914425156, iteration: 128576
loss: 0.9907494187355042,grad_norm: 0.9623547580997058, iteration: 128577
loss: 1.0463472604751587,grad_norm: 0.9999999788311338, iteration: 128578
loss: 1.136278510093689,grad_norm: 0.9999990866394016, iteration: 128579
loss: 0.991631031036377,grad_norm: 0.9999990595220366, iteration: 128580
loss: 0.9889662861824036,grad_norm: 0.9999992184373627, iteration: 128581
loss: 1.0332224369049072,grad_norm: 0.8467174327296276, iteration: 128582
loss: 0.9967222809791565,grad_norm: 0.9999991290763072, iteration: 128583
loss: 1.0188895463943481,grad_norm: 0.9931522451711066, iteration: 128584
loss: 0.996025562286377,grad_norm: 0.9999990864369364, iteration: 128585
loss: 1.0118097066879272,grad_norm: 0.9999995690757316, iteration: 128586
loss: 1.0258325338363647,grad_norm: 0.999999698281549, iteration: 128587
loss: 1.016179084777832,grad_norm: 0.9999989425372048, iteration: 128588
loss: 1.0230822563171387,grad_norm: 0.9999991364636744, iteration: 128589
loss: 1.0512958765029907,grad_norm: 0.9999994149384404, iteration: 128590
loss: 1.0616976022720337,grad_norm: 0.9999994394309146, iteration: 128591
loss: 0.9966967105865479,grad_norm: 0.9702597750954208, iteration: 128592
loss: 0.9698584079742432,grad_norm: 0.999999103825356, iteration: 128593
loss: 1.0518839359283447,grad_norm: 0.9999993860840938, iteration: 128594
loss: 1.0628162622451782,grad_norm: 0.9999997224105147, iteration: 128595
loss: 1.0062551498413086,grad_norm: 0.9999990564732908, iteration: 128596
loss: 1.0186632871627808,grad_norm: 0.9999991688064293, iteration: 128597
loss: 0.9782778024673462,grad_norm: 0.9999991754563068, iteration: 128598
loss: 1.0346590280532837,grad_norm: 0.9999993472632762, iteration: 128599
loss: 1.0095508098602295,grad_norm: 0.9999992622328927, iteration: 128600
loss: 1.0155202150344849,grad_norm: 0.9350976363147077, iteration: 128601
loss: 1.0265631675720215,grad_norm: 0.9999991267791377, iteration: 128602
loss: 1.0053255558013916,grad_norm: 0.9678251127982056, iteration: 128603
loss: 1.0115457773208618,grad_norm: 0.9999992127131342, iteration: 128604
loss: 1.025418996810913,grad_norm: 0.9045333165204713, iteration: 128605
loss: 1.0012861490249634,grad_norm: 0.999999161994731, iteration: 128606
loss: 1.350730061531067,grad_norm: 0.999999611219623, iteration: 128607
loss: 1.0428627729415894,grad_norm: 0.9999993702473323, iteration: 128608
loss: 0.995600700378418,grad_norm: 0.9343188400510897, iteration: 128609
loss: 1.0164895057678223,grad_norm: 0.9999991015415522, iteration: 128610
loss: 0.9895566701889038,grad_norm: 0.9999989474703752, iteration: 128611
loss: 0.9658005833625793,grad_norm: 0.9999992768142791, iteration: 128612
loss: 1.0268808603286743,grad_norm: 0.9999991643552466, iteration: 128613
loss: 0.9914140105247498,grad_norm: 0.9999989671751943, iteration: 128614
loss: 1.016975998878479,grad_norm: 0.9999992610861546, iteration: 128615
loss: 1.0426945686340332,grad_norm: 0.9999991395238351, iteration: 128616
loss: 0.9914857745170593,grad_norm: 0.9999997016767148, iteration: 128617
loss: 1.0190367698669434,grad_norm: 0.9641312735796254, iteration: 128618
loss: 1.0327918529510498,grad_norm: 0.9999991246350244, iteration: 128619
loss: 1.0738880634307861,grad_norm: 0.9999992771283692, iteration: 128620
loss: 1.0535510778427124,grad_norm: 0.9999992315977108, iteration: 128621
loss: 1.026879072189331,grad_norm: 0.9800392788338561, iteration: 128622
loss: 1.040939211845398,grad_norm: 0.9999997621451685, iteration: 128623
loss: 1.000715732574463,grad_norm: 0.9999992045008753, iteration: 128624
loss: 1.0288240909576416,grad_norm: 0.9999998633614141, iteration: 128625
loss: 1.0265474319458008,grad_norm: 0.9999991622168835, iteration: 128626
loss: 0.994013786315918,grad_norm: 0.9999991129025558, iteration: 128627
loss: 0.9968259930610657,grad_norm: 0.9999998751437259, iteration: 128628
loss: 0.984555721282959,grad_norm: 0.9913801970993645, iteration: 128629
loss: 1.0673952102661133,grad_norm: 0.9999989866315097, iteration: 128630
loss: 0.9848915934562683,grad_norm: 0.9999991734017092, iteration: 128631
loss: 1.028550624847412,grad_norm: 0.9999991162807191, iteration: 128632
loss: 1.01252281665802,grad_norm: 0.9526036488004299, iteration: 128633
loss: 1.0146766901016235,grad_norm: 0.9999991383338143, iteration: 128634
loss: 1.0246798992156982,grad_norm: 0.9999992987081844, iteration: 128635
loss: 1.0024065971374512,grad_norm: 0.996056054915411, iteration: 128636
loss: 0.975654661655426,grad_norm: 0.9999991631049843, iteration: 128637
loss: 1.0111033916473389,grad_norm: 0.9773542110535228, iteration: 128638
loss: 1.0183396339416504,grad_norm: 0.9999995514014065, iteration: 128639
loss: 1.0070996284484863,grad_norm: 0.9999993445762717, iteration: 128640
loss: 1.0249122381210327,grad_norm: 0.9999997233874901, iteration: 128641
loss: 0.9969584345817566,grad_norm: 0.9999993535186168, iteration: 128642
loss: 0.9681375622749329,grad_norm: 0.960929366356556, iteration: 128643
loss: 1.0183460712432861,grad_norm: 0.9999992735662268, iteration: 128644
loss: 0.9988335967063904,grad_norm: 0.8782868477433868, iteration: 128645
loss: 0.9892560243606567,grad_norm: 0.9648071938168791, iteration: 128646
loss: 1.0034419298171997,grad_norm: 0.9768870525800664, iteration: 128647
loss: 0.9706960320472717,grad_norm: 0.9999994284287921, iteration: 128648
loss: 1.0101970434188843,grad_norm: 0.9999991998950944, iteration: 128649
loss: 0.9985899329185486,grad_norm: 0.9903923325477604, iteration: 128650
loss: 0.9914243817329407,grad_norm: 0.9974592046074803, iteration: 128651
loss: 1.016271948814392,grad_norm: 0.9999991570449838, iteration: 128652
loss: 1.0008912086486816,grad_norm: 0.9999992422753738, iteration: 128653
loss: 0.9685319662094116,grad_norm: 0.999999120792647, iteration: 128654
loss: 1.0168402194976807,grad_norm: 0.9158936590489022, iteration: 128655
loss: 0.9908366203308105,grad_norm: 0.999998945994858, iteration: 128656
loss: 1.013532280921936,grad_norm: 0.9999992134968981, iteration: 128657
loss: 0.9912394285202026,grad_norm: 0.9442314057387639, iteration: 128658
loss: 0.9842805862426758,grad_norm: 0.9999994326198043, iteration: 128659
loss: 0.9969475269317627,grad_norm: 0.9999991767585796, iteration: 128660
loss: 1.000539779663086,grad_norm: 0.9999993262662257, iteration: 128661
loss: 0.9768054485321045,grad_norm: 0.9999991030082614, iteration: 128662
loss: 1.0763360261917114,grad_norm: 0.9999991381425443, iteration: 128663
loss: 0.9924994111061096,grad_norm: 0.9999989677780271, iteration: 128664
loss: 0.968707799911499,grad_norm: 0.9999990199519826, iteration: 128665
loss: 1.017621636390686,grad_norm: 0.9999991483396976, iteration: 128666
loss: 1.0267547369003296,grad_norm: 0.9999990039909845, iteration: 128667
loss: 1.0164958238601685,grad_norm: 0.9999995310288542, iteration: 128668
loss: 1.0230497121810913,grad_norm: 0.9446333453378722, iteration: 128669
loss: 1.0340570211410522,grad_norm: 0.8806364215890088, iteration: 128670
loss: 1.0101795196533203,grad_norm: 0.9471997036607877, iteration: 128671
loss: 0.9987772107124329,grad_norm: 0.9999989132142255, iteration: 128672
loss: 1.0179855823516846,grad_norm: 0.9999998132644246, iteration: 128673
loss: 1.0711216926574707,grad_norm: 0.999999812966454, iteration: 128674
loss: 1.0307074785232544,grad_norm: 0.9999990543634104, iteration: 128675
loss: 0.9608651995658875,grad_norm: 0.9863448897488706, iteration: 128676
loss: 1.0188871622085571,grad_norm: 0.99999926938597, iteration: 128677
loss: 1.0130528211593628,grad_norm: 0.9999991316537707, iteration: 128678
loss: 1.0150996446609497,grad_norm: 0.999999697265785, iteration: 128679
loss: 1.0267937183380127,grad_norm: 0.9999991739194299, iteration: 128680
loss: 0.9544028639793396,grad_norm: 0.9999989229579364, iteration: 128681
loss: 0.9994772672653198,grad_norm: 0.9999999296874557, iteration: 128682
loss: 1.151721715927124,grad_norm: 0.9999993199442863, iteration: 128683
loss: 1.0240765810012817,grad_norm: 0.9999990895195748, iteration: 128684
loss: 1.001819372177124,grad_norm: 0.9999991219280366, iteration: 128685
loss: 1.0057283639907837,grad_norm: 0.9999990168586577, iteration: 128686
loss: 1.0382440090179443,grad_norm: 0.9213902135356715, iteration: 128687
loss: 1.0325745344161987,grad_norm: 0.9148692704060394, iteration: 128688
loss: 1.0231666564941406,grad_norm: 0.9999990639530122, iteration: 128689
loss: 1.002993106842041,grad_norm: 0.999999676338899, iteration: 128690
loss: 1.0188432931900024,grad_norm: 0.9193971278261496, iteration: 128691
loss: 0.9897026419639587,grad_norm: 0.9443585880998838, iteration: 128692
loss: 0.9827109575271606,grad_norm: 0.9999992531611298, iteration: 128693
loss: 1.0030734539031982,grad_norm: 0.9999993278117889, iteration: 128694
loss: 0.9895050525665283,grad_norm: 0.8926682004712831, iteration: 128695
loss: 1.0482407808303833,grad_norm: 0.9999991314257701, iteration: 128696
loss: 1.0851294994354248,grad_norm: 0.9999998706022177, iteration: 128697
loss: 0.9962884187698364,grad_norm: 0.927745150421189, iteration: 128698
loss: 0.9738922119140625,grad_norm: 0.9777150031393934, iteration: 128699
loss: 0.996061384677887,grad_norm: 0.9999993500887207, iteration: 128700
loss: 1.00088632106781,grad_norm: 0.999999171913225, iteration: 128701
loss: 0.9883089065551758,grad_norm: 0.9999990138829249, iteration: 128702
loss: 0.9979327321052551,grad_norm: 0.9999991233872463, iteration: 128703
loss: 1.0009262561798096,grad_norm: 0.9764085467050548, iteration: 128704
loss: 0.9596816301345825,grad_norm: 0.9062001626191637, iteration: 128705
loss: 1.1190811395645142,grad_norm: 0.9999998026235324, iteration: 128706
loss: 1.0026946067810059,grad_norm: 0.8842823540776464, iteration: 128707
loss: 1.0268638134002686,grad_norm: 0.9999990626322305, iteration: 128708
loss: 0.9918017387390137,grad_norm: 0.9999991911795202, iteration: 128709
loss: 1.0002703666687012,grad_norm: 0.999999409752975, iteration: 128710
loss: 1.0698193311691284,grad_norm: 0.978465166018779, iteration: 128711
loss: 1.0265967845916748,grad_norm: 0.9250080426368495, iteration: 128712
loss: 0.9926642775535583,grad_norm: 0.999999183383199, iteration: 128713
loss: 1.0288594961166382,grad_norm: 0.9999991819675219, iteration: 128714
loss: 1.0154982805252075,grad_norm: 0.9999991994857452, iteration: 128715
loss: 0.9602951407432556,grad_norm: 0.9619206639435219, iteration: 128716
loss: 0.9988862872123718,grad_norm: 0.9999991028778354, iteration: 128717
loss: 0.9949893355369568,grad_norm: 0.9214055007889634, iteration: 128718
loss: 1.0021170377731323,grad_norm: 0.999999304866086, iteration: 128719
loss: 0.9776082634925842,grad_norm: 0.9999990031354629, iteration: 128720
loss: 0.9922425746917725,grad_norm: 0.9999991243999423, iteration: 128721
loss: 1.1181421279907227,grad_norm: 0.9999994703884011, iteration: 128722
loss: 1.2013534307479858,grad_norm: 0.9999998682168987, iteration: 128723
loss: 1.0208003520965576,grad_norm: 0.9484902978443652, iteration: 128724
loss: 1.0063220262527466,grad_norm: 0.9327084698884179, iteration: 128725
loss: 1.0155531167984009,grad_norm: 0.9999991228779617, iteration: 128726
loss: 1.004836916923523,grad_norm: 0.9999991434567655, iteration: 128727
loss: 0.9970570206642151,grad_norm: 0.999999018923676, iteration: 128728
loss: 1.0552796125411987,grad_norm: 0.999999337521959, iteration: 128729
loss: 1.0510153770446777,grad_norm: 0.9999990196729318, iteration: 128730
loss: 1.0129305124282837,grad_norm: 0.999999639708064, iteration: 128731
loss: 1.005863070487976,grad_norm: 0.9999991115703386, iteration: 128732
loss: 1.0643965005874634,grad_norm: 0.970619949322416, iteration: 128733
loss: 1.0212591886520386,grad_norm: 0.9999990685762821, iteration: 128734
loss: 1.0165013074874878,grad_norm: 0.9999991913085629, iteration: 128735
loss: 1.0046294927597046,grad_norm: 0.9999991632475717, iteration: 128736
loss: 1.0326862335205078,grad_norm: 0.9999994936920745, iteration: 128737
loss: 0.9913959503173828,grad_norm: 0.9999992007552075, iteration: 128738
loss: 1.0329400300979614,grad_norm: 0.9999992248100038, iteration: 128739
loss: 0.9939069151878357,grad_norm: 0.9541534994286112, iteration: 128740
loss: 0.9936239123344421,grad_norm: 0.999999130628137, iteration: 128741
loss: 1.0351964235305786,grad_norm: 0.9999991678123429, iteration: 128742
loss: 0.9794425368309021,grad_norm: 0.9999989902014453, iteration: 128743
loss: 1.0042798519134521,grad_norm: 0.999999615732738, iteration: 128744
loss: 1.0019062757492065,grad_norm: 0.999999222648971, iteration: 128745
loss: 0.9898443818092346,grad_norm: 0.9999990811483649, iteration: 128746
loss: 0.9825187921524048,grad_norm: 0.8269184266619519, iteration: 128747
loss: 1.0263471603393555,grad_norm: 0.8528140764502926, iteration: 128748
loss: 1.0564905405044556,grad_norm: 0.9999992482953333, iteration: 128749
loss: 1.0346678495407104,grad_norm: 0.9999993437181575, iteration: 128750
loss: 1.0169355869293213,grad_norm: 0.9891888610650592, iteration: 128751
loss: 0.9882810711860657,grad_norm: 0.9999991667593532, iteration: 128752
loss: 0.9986587166786194,grad_norm: 0.9961682600539485, iteration: 128753
loss: 1.0153313875198364,grad_norm: 0.9999993148626779, iteration: 128754
loss: 1.060805082321167,grad_norm: 0.9999991378813147, iteration: 128755
loss: 1.0037199258804321,grad_norm: 0.9999990606581745, iteration: 128756
loss: 1.0015956163406372,grad_norm: 0.9999989085773099, iteration: 128757
loss: 0.9980204105377197,grad_norm: 0.9999990834165093, iteration: 128758
loss: 0.9818590879440308,grad_norm: 0.9999990035328755, iteration: 128759
loss: 1.0301088094711304,grad_norm: 0.9999991983048058, iteration: 128760
loss: 0.9982609152793884,grad_norm: 0.9999991656316511, iteration: 128761
loss: 0.9771187901496887,grad_norm: 0.9999991979484868, iteration: 128762
loss: 1.003860592842102,grad_norm: 0.9999989608102884, iteration: 128763
loss: 0.9924123287200928,grad_norm: 0.9167909743430183, iteration: 128764
loss: 1.037236213684082,grad_norm: 0.9524220317829916, iteration: 128765
loss: 1.0022870302200317,grad_norm: 0.9822048473653194, iteration: 128766
loss: 1.0019538402557373,grad_norm: 0.9999990786626299, iteration: 128767
loss: 0.9592081308364868,grad_norm: 0.9999991241347688, iteration: 128768
loss: 1.0087429285049438,grad_norm: 0.9999990010295057, iteration: 128769
loss: 0.9790580868721008,grad_norm: 0.9999991690005233, iteration: 128770
loss: 0.9910835027694702,grad_norm: 0.9999991695003997, iteration: 128771
loss: 0.9713875651359558,grad_norm: 0.9832051847856247, iteration: 128772
loss: 1.0500812530517578,grad_norm: 0.999999353210758, iteration: 128773
loss: 0.9181385040283203,grad_norm: 0.9999992173363939, iteration: 128774
loss: 1.0026925802230835,grad_norm: 0.9999996106404925, iteration: 128775
loss: 0.9996660351753235,grad_norm: 0.8372215424681889, iteration: 128776
loss: 1.0188415050506592,grad_norm: 0.814943798869372, iteration: 128777
loss: 0.9728781580924988,grad_norm: 0.8656760400261995, iteration: 128778
loss: 1.0171246528625488,grad_norm: 0.9999992401267439, iteration: 128779
loss: 1.0170577764511108,grad_norm: 0.9999992392805878, iteration: 128780
loss: 0.9946849942207336,grad_norm: 0.9999992656408492, iteration: 128781
loss: 1.0393321514129639,grad_norm: 0.9999996372996033, iteration: 128782
loss: 1.0213192701339722,grad_norm: 0.9999990919593883, iteration: 128783
loss: 0.9857422113418579,grad_norm: 0.9999993404293948, iteration: 128784
loss: 0.9637929797172546,grad_norm: 0.9999989903119353, iteration: 128785
loss: 1.0096487998962402,grad_norm: 0.9957662179982718, iteration: 128786
loss: 1.0285742282867432,grad_norm: 0.9999991065059718, iteration: 128787
loss: 1.0256544351577759,grad_norm: 0.9904843114946559, iteration: 128788
loss: 0.9856321811676025,grad_norm: 0.9999991479915342, iteration: 128789
loss: 1.0084651708602905,grad_norm: 0.9685306489283297, iteration: 128790
loss: 1.023202657699585,grad_norm: 0.9999991358258057, iteration: 128791
loss: 1.0109423398971558,grad_norm: 0.9999991255320437, iteration: 128792
loss: 0.9589641094207764,grad_norm: 0.9999990413400683, iteration: 128793
loss: 0.9916768074035645,grad_norm: 0.999999291195152, iteration: 128794
loss: 1.0136953592300415,grad_norm: 0.9999991363452736, iteration: 128795
loss: 1.026764154434204,grad_norm: 0.9999993465596472, iteration: 128796
loss: 1.024915099143982,grad_norm: 0.9999991357474772, iteration: 128797
loss: 1.0220354795455933,grad_norm: 0.9999990821474904, iteration: 128798
loss: 1.0299972295761108,grad_norm: 0.9660558447067145, iteration: 128799
loss: 1.0041612386703491,grad_norm: 0.9999994599102427, iteration: 128800
loss: 0.9932817220687866,grad_norm: 0.9999990924979121, iteration: 128801
loss: 0.9946127533912659,grad_norm: 0.999999220871942, iteration: 128802
loss: 0.9850530028343201,grad_norm: 0.9242800825300579, iteration: 128803
loss: 0.9645523428916931,grad_norm: 0.9985693275600169, iteration: 128804
loss: 1.0182982683181763,grad_norm: 0.9380986698705338, iteration: 128805
loss: 1.0162041187286377,grad_norm: 0.9537376938532653, iteration: 128806
loss: 1.0177757740020752,grad_norm: 0.9136197809273465, iteration: 128807
loss: 0.9610404372215271,grad_norm: 0.9651730891908175, iteration: 128808
loss: 1.0350219011306763,grad_norm: 0.9755246763433897, iteration: 128809
loss: 1.0612425804138184,grad_norm: 0.9999997415190989, iteration: 128810
loss: 1.0357682704925537,grad_norm: 0.999999281900416, iteration: 128811
loss: 0.9760320782661438,grad_norm: 0.9677087752433619, iteration: 128812
loss: 1.0215204954147339,grad_norm: 0.9999991079264237, iteration: 128813
loss: 0.9486463665962219,grad_norm: 0.9999991507793625, iteration: 128814
loss: 1.1369794607162476,grad_norm: 0.9999992799833515, iteration: 128815
loss: 1.1341785192489624,grad_norm: 0.9999990847626895, iteration: 128816
loss: 0.9919629693031311,grad_norm: 0.999999035392317, iteration: 128817
loss: 0.9662917852401733,grad_norm: 0.9999992043294671, iteration: 128818
loss: 0.9895464777946472,grad_norm: 0.9999991359503292, iteration: 128819
loss: 0.997563898563385,grad_norm: 0.9999991310932252, iteration: 128820
loss: 1.0533922910690308,grad_norm: 0.999999215299553, iteration: 128821
loss: 1.055027723312378,grad_norm: 0.9840259212673721, iteration: 128822
loss: 1.0871093273162842,grad_norm: 0.9999991750656356, iteration: 128823
loss: 0.9986986517906189,grad_norm: 0.9999991610555771, iteration: 128824
loss: 0.9758633375167847,grad_norm: 0.9259469970545271, iteration: 128825
loss: 1.026018738746643,grad_norm: 0.9999999088391058, iteration: 128826
loss: 0.9653416275978088,grad_norm: 0.9999991412648421, iteration: 128827
loss: 0.9884551763534546,grad_norm: 0.9999991005732048, iteration: 128828
loss: 1.107454776763916,grad_norm: 0.999999272213541, iteration: 128829
loss: 0.992407500743866,grad_norm: 0.9984163566302912, iteration: 128830
loss: 1.022891879081726,grad_norm: 0.9358289324321851, iteration: 128831
loss: 1.0125789642333984,grad_norm: 0.9934409959231987, iteration: 128832
loss: 0.9925645589828491,grad_norm: 0.9999994188440976, iteration: 128833
loss: 1.0250096321105957,grad_norm: 0.9999990617120909, iteration: 128834
loss: 0.9816464781761169,grad_norm: 0.9999990733649908, iteration: 128835
loss: 1.008095622062683,grad_norm: 0.9316756300411363, iteration: 128836
loss: 1.0173555612564087,grad_norm: 0.7800030302426086, iteration: 128837
loss: 1.2063804864883423,grad_norm: 0.999999250628448, iteration: 128838
loss: 0.991248607635498,grad_norm: 0.9218349021365104, iteration: 128839
loss: 1.0194697380065918,grad_norm: 0.9166525130432966, iteration: 128840
loss: 0.9939108490943909,grad_norm: 0.9999990702323269, iteration: 128841
loss: 1.0643144845962524,grad_norm: 1.0000000101121465, iteration: 128842
loss: 0.9939287900924683,grad_norm: 0.999999175879477, iteration: 128843
loss: 1.011946201324463,grad_norm: 0.9999991285007672, iteration: 128844
loss: 1.1092408895492554,grad_norm: 0.9999998396037018, iteration: 128845
loss: 1.028542399406433,grad_norm: 0.9553494432426896, iteration: 128846
loss: 0.9819920659065247,grad_norm: 0.9999991198030129, iteration: 128847
loss: 1.0428649187088013,grad_norm: 0.9999990390766088, iteration: 128848
loss: 1.0105406045913696,grad_norm: 0.9999989367420914, iteration: 128849
loss: 0.9908328652381897,grad_norm: 0.9999991718028378, iteration: 128850
loss: 0.9864968657493591,grad_norm: 0.9230283624513372, iteration: 128851
loss: 1.0966126918792725,grad_norm: 0.999999872789415, iteration: 128852
loss: 0.9966059327125549,grad_norm: 0.9999991126118515, iteration: 128853
loss: 1.0375453233718872,grad_norm: 0.9999994044336417, iteration: 128854
loss: 1.0056440830230713,grad_norm: 0.9983716927094691, iteration: 128855
loss: 1.021647334098816,grad_norm: 0.9966348467413184, iteration: 128856
loss: 1.0201005935668945,grad_norm: 0.9999989875918093, iteration: 128857
loss: 1.0984917879104614,grad_norm: 0.9999992666601664, iteration: 128858
loss: 1.0030643939971924,grad_norm: 0.999999187764565, iteration: 128859
loss: 1.0231914520263672,grad_norm: 0.9905455194091469, iteration: 128860
loss: 1.1653389930725098,grad_norm: 0.9999992095447071, iteration: 128861
loss: 1.063593864440918,grad_norm: 0.999999788806241, iteration: 128862
loss: 0.9531254172325134,grad_norm: 0.8902989513813979, iteration: 128863
loss: 1.0856437683105469,grad_norm: 0.9935984812209692, iteration: 128864
loss: 1.2098993062973022,grad_norm: 0.9999992210631733, iteration: 128865
loss: 1.0763905048370361,grad_norm: 1.0000000453310454, iteration: 128866
loss: 1.016153335571289,grad_norm: 0.9999989923671269, iteration: 128867
loss: 1.003851294517517,grad_norm: 0.8891220449900018, iteration: 128868
loss: 1.0066455602645874,grad_norm: 0.9999990902032323, iteration: 128869
loss: 1.0070031881332397,grad_norm: 0.9999991817400083, iteration: 128870
loss: 1.0009067058563232,grad_norm: 0.999999092352901, iteration: 128871
loss: 1.2111332416534424,grad_norm: 0.999999425726566, iteration: 128872
loss: 1.0212334394454956,grad_norm: 0.9999990422103684, iteration: 128873
loss: 1.0266178846359253,grad_norm: 0.9999989451674796, iteration: 128874
loss: 1.0164622068405151,grad_norm: 0.9999990017546619, iteration: 128875
loss: 0.9844695329666138,grad_norm: 0.9999992335737449, iteration: 128876
loss: 1.0369043350219727,grad_norm: 0.9999995214821452, iteration: 128877
loss: 0.995514452457428,grad_norm: 0.9999991363543497, iteration: 128878
loss: 0.9851450324058533,grad_norm: 0.9999991205572225, iteration: 128879
loss: 0.9882420301437378,grad_norm: 0.9008270049776856, iteration: 128880
loss: 0.9675164222717285,grad_norm: 0.9999997726177956, iteration: 128881
loss: 1.0117230415344238,grad_norm: 0.9999992258969754, iteration: 128882
loss: 0.9833315014839172,grad_norm: 0.9905036469223529, iteration: 128883
loss: 1.01760995388031,grad_norm: 0.9999991605055372, iteration: 128884
loss: 1.0016885995864868,grad_norm: 0.9999992088543448, iteration: 128885
loss: 0.9829227924346924,grad_norm: 0.7504095213165791, iteration: 128886
loss: 1.0315264463424683,grad_norm: 0.9999992372849447, iteration: 128887
loss: 1.1241952180862427,grad_norm: 0.9999998391906972, iteration: 128888
loss: 1.0273642539978027,grad_norm: 0.9999996658622902, iteration: 128889
loss: 1.030268907546997,grad_norm: 0.9999995789033074, iteration: 128890
loss: 1.006790041923523,grad_norm: 0.9999990946699913, iteration: 128891
loss: 1.0700894594192505,grad_norm: 0.9999997049031298, iteration: 128892
loss: 0.9966558814048767,grad_norm: 0.9999992345586467, iteration: 128893
loss: 1.0118595361709595,grad_norm: 0.9999990735354283, iteration: 128894
loss: 1.0423641204833984,grad_norm: 0.9999991192108841, iteration: 128895
loss: 1.0038704872131348,grad_norm: 0.9999991602552208, iteration: 128896
loss: 1.0119030475616455,grad_norm: 0.9999991578290193, iteration: 128897
loss: 0.9929038882255554,grad_norm: 0.9999991716451708, iteration: 128898
loss: 1.0289812088012695,grad_norm: 0.9463426832980824, iteration: 128899
loss: 1.025282859802246,grad_norm: 0.8109482007282495, iteration: 128900
loss: 1.0077035427093506,grad_norm: 0.9999990550977003, iteration: 128901
loss: 1.0271241664886475,grad_norm: 0.8500473223736681, iteration: 128902
loss: 0.9853811860084534,grad_norm: 0.9999993078888725, iteration: 128903
loss: 1.0430908203125,grad_norm: 0.995480103765895, iteration: 128904
loss: 0.9709177017211914,grad_norm: 0.9999992373946, iteration: 128905
loss: 1.014627456665039,grad_norm: 0.9999992684294763, iteration: 128906
loss: 0.9945530891418457,grad_norm: 0.9882801667118709, iteration: 128907
loss: 1.0007672309875488,grad_norm: 0.9999994505958851, iteration: 128908
loss: 0.9666643142700195,grad_norm: 0.897098809446021, iteration: 128909
loss: 0.9806551337242126,grad_norm: 0.9999990579900313, iteration: 128910
loss: 1.0169167518615723,grad_norm: 0.9354166150132568, iteration: 128911
loss: 1.0190917253494263,grad_norm: 0.9881529291238829, iteration: 128912
loss: 0.9848420023918152,grad_norm: 0.9999989035191151, iteration: 128913
loss: 0.9899580478668213,grad_norm: 0.9999991132358116, iteration: 128914
loss: 0.9758580327033997,grad_norm: 0.9999991624915829, iteration: 128915
loss: 0.9917898774147034,grad_norm: 0.9999991211468385, iteration: 128916
loss: 0.9881532788276672,grad_norm: 0.9439114676351201, iteration: 128917
loss: 0.9722669720649719,grad_norm: 0.9708771312196532, iteration: 128918
loss: 1.012188196182251,grad_norm: 0.999999185959582, iteration: 128919
loss: 1.0395030975341797,grad_norm: 0.9999992498659763, iteration: 128920
loss: 0.9658920168876648,grad_norm: 0.9999992007837639, iteration: 128921
loss: 1.0103774070739746,grad_norm: 0.9999992639391637, iteration: 128922
loss: 1.0486875772476196,grad_norm: 0.9999994461257782, iteration: 128923
loss: 0.9864557981491089,grad_norm: 0.9547383613792396, iteration: 128924
loss: 1.0166093111038208,grad_norm: 0.999999024705635, iteration: 128925
loss: 1.0484702587127686,grad_norm: 0.9999993137610056, iteration: 128926
loss: 0.9607962369918823,grad_norm: 0.9922531281546867, iteration: 128927
loss: 0.9817708134651184,grad_norm: 0.8757532559172377, iteration: 128928
loss: 1.0789791345596313,grad_norm: 0.9999992777393344, iteration: 128929
loss: 1.0456897020339966,grad_norm: 0.9999990769676341, iteration: 128930
loss: 0.9826843738555908,grad_norm: 0.9999992574710018, iteration: 128931
loss: 1.0331393480300903,grad_norm: 0.9999991213130406, iteration: 128932
loss: 0.9581146240234375,grad_norm: 0.8796173919549304, iteration: 128933
loss: 0.9719487428665161,grad_norm: 0.9999990705294636, iteration: 128934
loss: 1.0051202774047852,grad_norm: 0.9999990378053335, iteration: 128935
loss: 0.9874258637428284,grad_norm: 0.9999991167566166, iteration: 128936
loss: 0.9980834722518921,grad_norm: 0.9999992498445636, iteration: 128937
loss: 1.0140613317489624,grad_norm: 0.9779581326065397, iteration: 128938
loss: 1.0284721851348877,grad_norm: 0.9999992717508245, iteration: 128939
loss: 1.0156935453414917,grad_norm: 0.9999991608716069, iteration: 128940
loss: 0.9761514663696289,grad_norm: 0.9421471437905754, iteration: 128941
loss: 1.015132188796997,grad_norm: 0.9999991791155441, iteration: 128942
loss: 1.0253690481185913,grad_norm: 0.9999995041784203, iteration: 128943
loss: 1.005113124847412,grad_norm: 0.9999990508431895, iteration: 128944
loss: 1.0298686027526855,grad_norm: 0.9999991650114631, iteration: 128945
loss: 0.9866203665733337,grad_norm: 0.9973528280402164, iteration: 128946
loss: 0.9936808347702026,grad_norm: 0.9999991661202987, iteration: 128947
loss: 1.0030224323272705,grad_norm: 0.9999991797476387, iteration: 128948
loss: 0.992892324924469,grad_norm: 0.9776730929740719, iteration: 128949
loss: 0.9851788878440857,grad_norm: 0.999999083814413, iteration: 128950
loss: 0.9819719791412354,grad_norm: 0.9999989804453346, iteration: 128951
loss: 1.0036038160324097,grad_norm: 0.8923996240362126, iteration: 128952
loss: 0.9706064462661743,grad_norm: 0.9641845114610132, iteration: 128953
loss: 0.9892361760139465,grad_norm: 0.9999991987857255, iteration: 128954
loss: 1.0460740327835083,grad_norm: 0.9999993829005095, iteration: 128955
loss: 0.9758116602897644,grad_norm: 0.9999991167321834, iteration: 128956
loss: 1.0232698917388916,grad_norm: 0.9999991902078448, iteration: 128957
loss: 0.9891812205314636,grad_norm: 0.9999991891894431, iteration: 128958
loss: 0.9676172733306885,grad_norm: 0.9999992190016829, iteration: 128959
loss: 0.981199324131012,grad_norm: 0.9277246922234724, iteration: 128960
loss: 0.9883853197097778,grad_norm: 0.9330902380523094, iteration: 128961
loss: 0.970689594745636,grad_norm: 0.9999991595624118, iteration: 128962
loss: 0.984748363494873,grad_norm: 0.9999990084789329, iteration: 128963
loss: 0.9745091795921326,grad_norm: 0.8594842447556824, iteration: 128964
loss: 0.9833301305770874,grad_norm: 0.999999176965652, iteration: 128965
loss: 1.0004775524139404,grad_norm: 0.9796978597894545, iteration: 128966
loss: 1.0285621881484985,grad_norm: 0.9999992021378182, iteration: 128967
loss: 0.9691712260246277,grad_norm: 0.9092839563302257, iteration: 128968
loss: 0.9834273457527161,grad_norm: 0.9292876585239191, iteration: 128969
loss: 1.0342551469802856,grad_norm: 0.9905422798600049, iteration: 128970
loss: 1.0174068212509155,grad_norm: 0.9855015650682328, iteration: 128971
loss: 0.9994016289710999,grad_norm: 0.999999162892855, iteration: 128972
loss: 0.9937434196472168,grad_norm: 0.8738427029583301, iteration: 128973
loss: 0.9867232441902161,grad_norm: 0.9999989753952533, iteration: 128974
loss: 1.0034605264663696,grad_norm: 0.8506133861633813, iteration: 128975
loss: 1.048410177230835,grad_norm: 0.9999995733348572, iteration: 128976
loss: 1.0027226209640503,grad_norm: 0.9999990445927323, iteration: 128977
loss: 1.0366564989089966,grad_norm: 0.9999991368335069, iteration: 128978
loss: 0.9700026512145996,grad_norm: 0.9999990713950689, iteration: 128979
loss: 0.9862259030342102,grad_norm: 0.9613605120190417, iteration: 128980
loss: 1.0104395151138306,grad_norm: 0.9999991559793948, iteration: 128981
loss: 0.9608156681060791,grad_norm: 0.999999054785246, iteration: 128982
loss: 0.9776609539985657,grad_norm: 0.9899637769499157, iteration: 128983
loss: 1.009940266609192,grad_norm: 0.9551922909703005, iteration: 128984
loss: 1.0134645700454712,grad_norm: 0.9999995597338489, iteration: 128985
loss: 0.9823969006538391,grad_norm: 0.9999990068449515, iteration: 128986
loss: 0.9869969487190247,grad_norm: 0.9999991912750076, iteration: 128987
loss: 0.9824097156524658,grad_norm: 0.9999988963739275, iteration: 128988
loss: 1.0046985149383545,grad_norm: 0.815874402747218, iteration: 128989
loss: 1.0105561017990112,grad_norm: 0.9917045798279632, iteration: 128990
loss: 0.9724982380867004,grad_norm: 0.8475266801879952, iteration: 128991
loss: 1.0074318647384644,grad_norm: 0.9999990047428334, iteration: 128992
loss: 1.0127499103546143,grad_norm: 0.999999169110185, iteration: 128993
loss: 1.0240508317947388,grad_norm: 0.9748912331373695, iteration: 128994
loss: 0.9836682081222534,grad_norm: 0.9999992433579242, iteration: 128995
loss: 1.0089470148086548,grad_norm: 0.9795064044071478, iteration: 128996
loss: 0.9974076747894287,grad_norm: 0.9791632506614807, iteration: 128997
loss: 1.01643967628479,grad_norm: 0.9999992454699909, iteration: 128998
loss: 0.9706908464431763,grad_norm: 0.9999990379317696, iteration: 128999
loss: 0.9928564429283142,grad_norm: 0.8689907382762508, iteration: 129000
loss: 0.9807254076004028,grad_norm: 0.9786787986254973, iteration: 129001
loss: 1.0112348794937134,grad_norm: 0.9999992235691614, iteration: 129002
loss: 1.0276515483856201,grad_norm: 0.9999991821931122, iteration: 129003
loss: 1.0175210237503052,grad_norm: 0.9999992711046315, iteration: 129004
loss: 1.0285223722457886,grad_norm: 0.9999991488546702, iteration: 129005
loss: 1.0121756792068481,grad_norm: 0.9463680399661744, iteration: 129006
loss: 1.0270155668258667,grad_norm: 0.9269514920305325, iteration: 129007
loss: 0.994766891002655,grad_norm: 0.8752124392058208, iteration: 129008
loss: 1.003300428390503,grad_norm: 0.999999384224913, iteration: 129009
loss: 1.0161031484603882,grad_norm: 0.9999991028941717, iteration: 129010
loss: 0.9962787628173828,grad_norm: 0.99999913917703, iteration: 129011
loss: 1.0146089792251587,grad_norm: 0.8552141323833766, iteration: 129012
loss: 1.0204858779907227,grad_norm: 0.9926275770512673, iteration: 129013
loss: 1.0186036825180054,grad_norm: 0.9999993126643084, iteration: 129014
loss: 1.033549189567566,grad_norm: 0.8898972391132586, iteration: 129015
loss: 1.0122109651565552,grad_norm: 0.9999991539828316, iteration: 129016
loss: 1.0226829051971436,grad_norm: 0.9999991812814196, iteration: 129017
loss: 1.02874755859375,grad_norm: 0.9999989393635015, iteration: 129018
loss: 0.9952395558357239,grad_norm: 0.8882166770307756, iteration: 129019
loss: 1.0119224786758423,grad_norm: 0.9999990896609106, iteration: 129020
loss: 0.9969539642333984,grad_norm: 0.9999991548329122, iteration: 129021
loss: 0.9646472930908203,grad_norm: 0.9722218481940725, iteration: 129022
loss: 0.9825645089149475,grad_norm: 0.8174055705766639, iteration: 129023
loss: 1.0180423259735107,grad_norm: 0.987080243627019, iteration: 129024
loss: 0.995526909828186,grad_norm: 0.9999990770906533, iteration: 129025
loss: 1.0008474588394165,grad_norm: 0.9999990870716569, iteration: 129026
loss: 1.0204476118087769,grad_norm: 0.9999993042808477, iteration: 129027
loss: 1.0459264516830444,grad_norm: 0.9999990569672469, iteration: 129028
loss: 0.9943333268165588,grad_norm: 0.9679026364168063, iteration: 129029
loss: 1.0063660144805908,grad_norm: 0.9999991234127272, iteration: 129030
loss: 1.0015188455581665,grad_norm: 0.8644044191098581, iteration: 129031
loss: 1.0132979154586792,grad_norm: 0.9071274241214051, iteration: 129032
loss: 0.9835982322692871,grad_norm: 0.9999990856223095, iteration: 129033
loss: 1.0025421380996704,grad_norm: 0.8547701236697198, iteration: 129034
loss: 1.007413387298584,grad_norm: 0.9999990683549134, iteration: 129035
loss: 1.035681128501892,grad_norm: 0.9999991876203295, iteration: 129036
loss: 0.9960582852363586,grad_norm: 0.9999990529600614, iteration: 129037
loss: 1.0246169567108154,grad_norm: 0.9999991756483303, iteration: 129038
loss: 1.0180612802505493,grad_norm: 0.9999992111229045, iteration: 129039
loss: 1.0237048864364624,grad_norm: 0.9876188879403385, iteration: 129040
loss: 0.9900917410850525,grad_norm: 0.9723782119054405, iteration: 129041
loss: 1.0581191778182983,grad_norm: 0.9999992434562304, iteration: 129042
loss: 1.0054410696029663,grad_norm: 0.9999990722237088, iteration: 129043
loss: 0.9900256991386414,grad_norm: 0.9999990629709486, iteration: 129044
loss: 1.0446891784667969,grad_norm: 0.9999992108723306, iteration: 129045
loss: 1.0063037872314453,grad_norm: 0.9517754843571377, iteration: 129046
loss: 1.0022318363189697,grad_norm: 0.9999990678619662, iteration: 129047
loss: 1.0187731981277466,grad_norm: 0.9999990772348728, iteration: 129048
loss: 1.0038338899612427,grad_norm: 0.9999991516262409, iteration: 129049
loss: 0.9796720743179321,grad_norm: 0.9999991106035069, iteration: 129050
loss: 0.9840027689933777,grad_norm: 0.9573798897768558, iteration: 129051
loss: 0.9781648516654968,grad_norm: 0.9999990070839652, iteration: 129052
loss: 0.9959737658500671,grad_norm: 0.9999991964008824, iteration: 129053
loss: 0.992442786693573,grad_norm: 0.9999991315860145, iteration: 129054
loss: 0.9614023566246033,grad_norm: 0.9999992900501077, iteration: 129055
loss: 0.9784350991249084,grad_norm: 0.9999991359873576, iteration: 129056
loss: 0.9964848756790161,grad_norm: 0.9999992145917734, iteration: 129057
loss: 1.0053620338439941,grad_norm: 0.9999991070455485, iteration: 129058
loss: 0.9837360978126526,grad_norm: 0.9999990607326285, iteration: 129059
loss: 1.010811686515808,grad_norm: 0.9999991285168697, iteration: 129060
loss: 0.9756684899330139,grad_norm: 0.9999992490460121, iteration: 129061
loss: 0.995491087436676,grad_norm: 0.9999990473401613, iteration: 129062
loss: 1.0117766857147217,grad_norm: 0.8521876161104304, iteration: 129063
loss: 0.996300995349884,grad_norm: 0.9999990519397757, iteration: 129064
loss: 0.9730669260025024,grad_norm: 0.9004696658466946, iteration: 129065
loss: 0.9838029742240906,grad_norm: 0.999999010863956, iteration: 129066
loss: 1.018979787826538,grad_norm: 0.8727078278998419, iteration: 129067
loss: 0.9790230393409729,grad_norm: 0.9999990500555267, iteration: 129068
loss: 0.9911452531814575,grad_norm: 0.9999991533672228, iteration: 129069
loss: 1.0360771417617798,grad_norm: 0.9996530110409454, iteration: 129070
loss: 1.0118509531021118,grad_norm: 0.9856910087078201, iteration: 129071
loss: 1.0042762756347656,grad_norm: 0.9999990299891839, iteration: 129072
loss: 0.987093448638916,grad_norm: 0.9999991140643996, iteration: 129073
loss: 1.0106017589569092,grad_norm: 0.8579556268814194, iteration: 129074
loss: 1.0153356790542603,grad_norm: 0.9999998206790677, iteration: 129075
loss: 0.9843338131904602,grad_norm: 0.9537730901286579, iteration: 129076
loss: 0.983945369720459,grad_norm: 0.9999991434418724, iteration: 129077
loss: 1.0303585529327393,grad_norm: 0.9999991637391336, iteration: 129078
loss: 0.9738627672195435,grad_norm: 0.999999194773041, iteration: 129079
loss: 1.0034458637237549,grad_norm: 0.9999991574969633, iteration: 129080
loss: 1.0029969215393066,grad_norm: 0.9999989464527824, iteration: 129081
loss: 0.9694300889968872,grad_norm: 0.9999994740971183, iteration: 129082
loss: 1.0038831233978271,grad_norm: 0.9999991078558186, iteration: 129083
loss: 1.0041745901107788,grad_norm: 0.9868615072971858, iteration: 129084
loss: 1.0314213037490845,grad_norm: 0.9999990605353025, iteration: 129085
loss: 1.011813759803772,grad_norm: 0.9633822733474876, iteration: 129086
loss: 1.005303144454956,grad_norm: 0.9502004764028713, iteration: 129087
loss: 1.0050222873687744,grad_norm: 0.9999992378505016, iteration: 129088
loss: 0.9873053431510925,grad_norm: 0.9848427975761074, iteration: 129089
loss: 0.9849293231964111,grad_norm: 0.9048988829963949, iteration: 129090
loss: 1.028875708580017,grad_norm: 0.9999991314879072, iteration: 129091
loss: 1.0336148738861084,grad_norm: 0.9739836458051578, iteration: 129092
loss: 1.008254885673523,grad_norm: 0.9999993034835623, iteration: 129093
loss: 1.0346168279647827,grad_norm: 0.999999041767672, iteration: 129094
loss: 1.0567967891693115,grad_norm: 0.9999991853909205, iteration: 129095
loss: 0.9631562829017639,grad_norm: 0.9918946486372635, iteration: 129096
loss: 0.9944519400596619,grad_norm: 0.9510019277781836, iteration: 129097
loss: 1.0173282623291016,grad_norm: 0.970250286491242, iteration: 129098
loss: 0.9948710799217224,grad_norm: 0.9999991648049513, iteration: 129099
loss: 1.0084137916564941,grad_norm: 0.9372851210305851, iteration: 129100
loss: 1.001020908355713,grad_norm: 0.9999992554398154, iteration: 129101
loss: 1.010182499885559,grad_norm: 0.9799863306733233, iteration: 129102
loss: 0.9920693635940552,grad_norm: 0.9999989643970949, iteration: 129103
loss: 0.9979275465011597,grad_norm: 0.9999990374302599, iteration: 129104
loss: 0.9880866408348083,grad_norm: 0.9442821184875279, iteration: 129105
loss: 0.9893190264701843,grad_norm: 0.9999992105401265, iteration: 129106
loss: 1.0319942235946655,grad_norm: 0.9999990000738607, iteration: 129107
loss: 0.9834365248680115,grad_norm: 0.8830110090348646, iteration: 129108
loss: 0.9816616773605347,grad_norm: 0.9999990870968786, iteration: 129109
loss: 1.0262808799743652,grad_norm: 0.9843495890867608, iteration: 129110
loss: 0.9998201131820679,grad_norm: 0.8696715511547886, iteration: 129111
loss: 0.9714134335517883,grad_norm: 0.999999184662398, iteration: 129112
loss: 1.0047192573547363,grad_norm: 0.9999990458616077, iteration: 129113
loss: 1.0026803016662598,grad_norm: 0.9311756965305752, iteration: 129114
loss: 0.9963396191596985,grad_norm: 0.9242779288052136, iteration: 129115
loss: 1.0187441110610962,grad_norm: 0.9999992276782725, iteration: 129116
loss: 1.0248404741287231,grad_norm: 0.9999992794855951, iteration: 129117
loss: 0.9975630640983582,grad_norm: 0.9999991375834596, iteration: 129118
loss: 0.9877581596374512,grad_norm: 0.9079588330709362, iteration: 129119
loss: 1.001554012298584,grad_norm: 0.9999989965551179, iteration: 129120
loss: 0.9377567172050476,grad_norm: 0.929833275228168, iteration: 129121
loss: 1.006919264793396,grad_norm: 0.9048149365246593, iteration: 129122
loss: 0.9989992380142212,grad_norm: 0.9999991218201445, iteration: 129123
loss: 0.9885280132293701,grad_norm: 0.9065529756459534, iteration: 129124
loss: 0.9864858388900757,grad_norm: 0.9999989777605544, iteration: 129125
loss: 0.9652340412139893,grad_norm: 0.9672844032647282, iteration: 129126
loss: 0.9551125168800354,grad_norm: 0.9639717960705027, iteration: 129127
loss: 0.9783597588539124,grad_norm: 0.9404111165304664, iteration: 129128
loss: 1.0042363405227661,grad_norm: 0.9771624025640158, iteration: 129129
loss: 1.032172679901123,grad_norm: 0.9999990959260916, iteration: 129130
loss: 0.9929929375648499,grad_norm: 0.9999992492530883, iteration: 129131
loss: 0.9778754115104675,grad_norm: 0.9999991706694155, iteration: 129132
loss: 0.9532891511917114,grad_norm: 0.8898206350530756, iteration: 129133
loss: 1.0057998895645142,grad_norm: 0.9921414882167477, iteration: 129134
loss: 1.023535132408142,grad_norm: 0.9718380857329098, iteration: 129135
loss: 1.0134690999984741,grad_norm: 0.9570405158456244, iteration: 129136
loss: 1.0356206893920898,grad_norm: 0.9999997676986191, iteration: 129137
loss: 1.0413315296173096,grad_norm: 0.9999990754055752, iteration: 129138
loss: 1.0033776760101318,grad_norm: 0.9999990112807222, iteration: 129139
loss: 0.9891803860664368,grad_norm: 0.9999990826286468, iteration: 129140
loss: 1.0700973272323608,grad_norm: 0.9999992929702479, iteration: 129141
loss: 1.0040048360824585,grad_norm: 0.9999990743128525, iteration: 129142
loss: 1.0213226079940796,grad_norm: 0.9999990966880605, iteration: 129143
loss: 0.9849034547805786,grad_norm: 0.9696589703789862, iteration: 129144
loss: 0.9963065385818481,grad_norm: 0.8455497275101486, iteration: 129145
loss: 1.000802993774414,grad_norm: 0.9999993098365585, iteration: 129146
loss: 1.0096803903579712,grad_norm: 0.9999989511082752, iteration: 129147
loss: 0.9850700497627258,grad_norm: 0.9134981380226729, iteration: 129148
loss: 0.9936732053756714,grad_norm: 0.9999992017122556, iteration: 129149
loss: 0.9209474921226501,grad_norm: 0.9392595284185229, iteration: 129150
loss: 0.9974128007888794,grad_norm: 0.9999988994860127, iteration: 129151
loss: 0.9949629902839661,grad_norm: 0.9999990944234648, iteration: 129152
loss: 1.0035364627838135,grad_norm: 0.9999990329105364, iteration: 129153
loss: 1.0149610042572021,grad_norm: 0.999999490663228, iteration: 129154
loss: 1.0147581100463867,grad_norm: 0.9737461949701438, iteration: 129155
loss: 0.9967896342277527,grad_norm: 0.9999991448520188, iteration: 129156
loss: 0.9719046950340271,grad_norm: 0.8477553790000004, iteration: 129157
loss: 0.9826900362968445,grad_norm: 0.9807070626034892, iteration: 129158
loss: 1.0470634698867798,grad_norm: 0.9999991601114275, iteration: 129159
loss: 1.0044857263565063,grad_norm: 0.9999991861482155, iteration: 129160
loss: 0.973945140838623,grad_norm: 0.9999991146243379, iteration: 129161
loss: 1.010974645614624,grad_norm: 0.9999990682768133, iteration: 129162
loss: 1.0609265565872192,grad_norm: 0.999999682615823, iteration: 129163
loss: 1.0124318599700928,grad_norm: 0.9999990700394831, iteration: 129164
loss: 1.0314877033233643,grad_norm: 0.9999991104344793, iteration: 129165
loss: 0.9681056141853333,grad_norm: 0.8729622789689226, iteration: 129166
loss: 1.0098434686660767,grad_norm: 0.9999991771311347, iteration: 129167
loss: 0.9916917085647583,grad_norm: 0.9999992034728412, iteration: 129168
loss: 1.0122673511505127,grad_norm: 0.9999992279400021, iteration: 129169
loss: 0.992893636226654,grad_norm: 0.893363396232897, iteration: 129170
loss: 0.9674697518348694,grad_norm: 0.9922036872932789, iteration: 129171
loss: 0.9887487292289734,grad_norm: 0.999999223574178, iteration: 129172
loss: 0.9726794958114624,grad_norm: 0.9999991714946405, iteration: 129173
loss: 1.0152734518051147,grad_norm: 0.9999991218717531, iteration: 129174
loss: 0.997711181640625,grad_norm: 0.8952577396482045, iteration: 129175
loss: 0.9811784029006958,grad_norm: 0.9999991225926035, iteration: 129176
loss: 0.9650119543075562,grad_norm: 0.9999993118134222, iteration: 129177
loss: 0.983978271484375,grad_norm: 0.924281982407048, iteration: 129178
loss: 0.9923297166824341,grad_norm: 0.9326881468974427, iteration: 129179
loss: 0.9881885051727295,grad_norm: 0.999999155486105, iteration: 129180
loss: 1.0190616846084595,grad_norm: 0.999999254155379, iteration: 129181
loss: 1.0015043020248413,grad_norm: 0.9579284499865849, iteration: 129182
loss: 0.9964246153831482,grad_norm: 0.8592534274937331, iteration: 129183
loss: 0.9906221628189087,grad_norm: 0.9999988819457721, iteration: 129184
loss: 0.9695422053337097,grad_norm: 0.9644644112495058, iteration: 129185
loss: 0.9564995765686035,grad_norm: 0.9999991039846621, iteration: 129186
loss: 0.9794316291809082,grad_norm: 0.8995246566393729, iteration: 129187
loss: 0.9757024049758911,grad_norm: 0.9999991494532294, iteration: 129188
loss: 0.9994841814041138,grad_norm: 0.9999991295287995, iteration: 129189
loss: 0.9900233745574951,grad_norm: 0.8998093625387429, iteration: 129190
loss: 1.00101637840271,grad_norm: 0.9327926795601716, iteration: 129191
loss: 1.0010381937026978,grad_norm: 0.9999991253594499, iteration: 129192
loss: 0.9832783937454224,grad_norm: 0.9999990770192172, iteration: 129193
loss: 1.0207988023757935,grad_norm: 0.9999991545969642, iteration: 129194
loss: 0.9981194138526917,grad_norm: 0.877129805509443, iteration: 129195
loss: 0.9849021434783936,grad_norm: 0.9999992137238071, iteration: 129196
loss: 1.0014094114303589,grad_norm: 0.9135954691481835, iteration: 129197
loss: 1.0111795663833618,grad_norm: 0.8046457325932931, iteration: 129198
loss: 1.0192927122116089,grad_norm: 0.9646823518726476, iteration: 129199
loss: 0.9776806831359863,grad_norm: 0.8072519135863901, iteration: 129200
loss: 1.0323677062988281,grad_norm: 0.99999919151465, iteration: 129201
loss: 0.9968647956848145,grad_norm: 0.9123949000276381, iteration: 129202
loss: 0.9728979468345642,grad_norm: 0.9999991239250079, iteration: 129203
loss: 1.016947627067566,grad_norm: 0.9967304259599818, iteration: 129204
loss: 1.0028235912322998,grad_norm: 0.9999990591418483, iteration: 129205
loss: 1.073153018951416,grad_norm: 0.9999995182399857, iteration: 129206
loss: 1.000802755355835,grad_norm: 0.8929708116117541, iteration: 129207
loss: 1.029934048652649,grad_norm: 0.9736449708144241, iteration: 129208
loss: 1.0051870346069336,grad_norm: 0.957213123844272, iteration: 129209
loss: 1.1429078578948975,grad_norm: 0.9999998093409688, iteration: 129210
loss: 1.0037254095077515,grad_norm: 0.9999992388653302, iteration: 129211
loss: 1.0341722965240479,grad_norm: 0.9999992710582659, iteration: 129212
loss: 1.0341249704360962,grad_norm: 0.9999998308782962, iteration: 129213
loss: 0.998701274394989,grad_norm: 0.9435864956559058, iteration: 129214
loss: 0.9857802391052246,grad_norm: 0.8680294023844838, iteration: 129215
loss: 0.9869108200073242,grad_norm: 0.898030453136885, iteration: 129216
loss: 1.0184999704360962,grad_norm: 0.9251333002702037, iteration: 129217
loss: 0.9843404293060303,grad_norm: 0.9725891421927613, iteration: 129218
loss: 1.0149357318878174,grad_norm: 0.9999992192122742, iteration: 129219
loss: 1.0175076723098755,grad_norm: 0.999999048912926, iteration: 129220
loss: 1.0303905010223389,grad_norm: 0.9999991786490667, iteration: 129221
loss: 0.9719070196151733,grad_norm: 0.9958376221434087, iteration: 129222
loss: 1.0359835624694824,grad_norm: 0.9999990366154676, iteration: 129223
loss: 1.0328913927078247,grad_norm: 0.9999993182156109, iteration: 129224
loss: 1.066140055656433,grad_norm: 0.9999997785837504, iteration: 129225
loss: 1.0131256580352783,grad_norm: 0.9999990818567153, iteration: 129226
loss: 1.0258527994155884,grad_norm: 0.9513145869905825, iteration: 129227
loss: 0.989676296710968,grad_norm: 0.8496555792436504, iteration: 129228
loss: 0.9989944100379944,grad_norm: 0.9999991617465637, iteration: 129229
loss: 1.0197445154190063,grad_norm: 0.981307491177398, iteration: 129230
loss: 0.9355319738388062,grad_norm: 0.8370094676438681, iteration: 129231
loss: 1.028555154800415,grad_norm: 0.8500624515325473, iteration: 129232
loss: 0.957242488861084,grad_norm: 0.973292721425727, iteration: 129233
loss: 0.9830538630485535,grad_norm: 0.9999990921624011, iteration: 129234
loss: 1.0053200721740723,grad_norm: 0.9999998400051423, iteration: 129235
loss: 0.9964085221290588,grad_norm: 0.9999991492081383, iteration: 129236
loss: 1.0329070091247559,grad_norm: 0.9999997854417101, iteration: 129237
loss: 0.9976233839988708,grad_norm: 0.9934942398818363, iteration: 129238
loss: 0.9943318963050842,grad_norm: 0.9999991728171264, iteration: 129239
loss: 1.0213583707809448,grad_norm: 0.9999990384478367, iteration: 129240
loss: 0.9920976758003235,grad_norm: 0.9652785855948428, iteration: 129241
loss: 0.9661464691162109,grad_norm: 0.9999989827071536, iteration: 129242
loss: 1.0000197887420654,grad_norm: 0.9999990686230238, iteration: 129243
loss: 0.9697093963623047,grad_norm: 0.9999991881809079, iteration: 129244
loss: 1.0095516443252563,grad_norm: 0.9788182162050896, iteration: 129245
loss: 1.0048884153366089,grad_norm: 0.8616326952711657, iteration: 129246
loss: 0.992108941078186,grad_norm: 0.9611525220262613, iteration: 129247
loss: 1.0204626321792603,grad_norm: 0.9999990850317707, iteration: 129248
loss: 1.03685462474823,grad_norm: 0.9999995850197194, iteration: 129249
loss: 1.0138673782348633,grad_norm: 0.9999990817254876, iteration: 129250
loss: 1.0261317491531372,grad_norm: 0.9999990361834383, iteration: 129251
loss: 1.009475588798523,grad_norm: 0.9999991954791169, iteration: 129252
loss: 0.9826244711875916,grad_norm: 0.9999993223267765, iteration: 129253
loss: 1.003839135169983,grad_norm: 0.9999991419031935, iteration: 129254
loss: 0.9859543442726135,grad_norm: 0.9527801421436793, iteration: 129255
loss: 0.9813968539237976,grad_norm: 0.9838183258452812, iteration: 129256
loss: 1.041096806526184,grad_norm: 0.9999989543178487, iteration: 129257
loss: 1.0470317602157593,grad_norm: 0.9999991704393287, iteration: 129258
loss: 0.9585676193237305,grad_norm: 0.9999991287140826, iteration: 129259
loss: 1.0225712060928345,grad_norm: 0.9999991002601335, iteration: 129260
loss: 1.0182687044143677,grad_norm: 0.9846554951080212, iteration: 129261
loss: 1.0046477317810059,grad_norm: 0.9322378179869144, iteration: 129262
loss: 1.020554542541504,grad_norm: 0.999999220402422, iteration: 129263
loss: 1.001074194908142,grad_norm: 0.999999203727653, iteration: 129264
loss: 1.0216538906097412,grad_norm: 0.8351844917087095, iteration: 129265
loss: 1.004074215888977,grad_norm: 0.9064812840820081, iteration: 129266
loss: 0.9914228916168213,grad_norm: 0.9999991528410822, iteration: 129267
loss: 0.9873040318489075,grad_norm: 0.999998991505506, iteration: 129268
loss: 1.0127464532852173,grad_norm: 0.9999999052417495, iteration: 129269
loss: 0.9851387143135071,grad_norm: 0.9999990652562161, iteration: 129270
loss: 1.0002005100250244,grad_norm: 0.9999991231100968, iteration: 129271
loss: 0.9713789820671082,grad_norm: 0.9999989818512266, iteration: 129272
loss: 0.995477557182312,grad_norm: 0.9315014683787126, iteration: 129273
loss: 1.0126785039901733,grad_norm: 0.9999991178832962, iteration: 129274
loss: 0.9885455965995789,grad_norm: 0.9173301254990839, iteration: 129275
loss: 1.0070632696151733,grad_norm: 0.9999991105756971, iteration: 129276
loss: 1.0102745294570923,grad_norm: 0.9999990754522897, iteration: 129277
loss: 0.9826643466949463,grad_norm: 0.8438889856281243, iteration: 129278
loss: 0.9667690992355347,grad_norm: 0.9120669217659841, iteration: 129279
loss: 0.9848469495773315,grad_norm: 0.9999994607890494, iteration: 129280
loss: 0.9968901872634888,grad_norm: 0.9508089533874717, iteration: 129281
loss: 0.9831735491752625,grad_norm: 0.9613836477056174, iteration: 129282
loss: 0.9729713201522827,grad_norm: 0.8073071974091829, iteration: 129283
loss: 0.9993205070495605,grad_norm: 0.9323851115665023, iteration: 129284
loss: 1.018325924873352,grad_norm: 0.7832058940218568, iteration: 129285
loss: 0.9969659447669983,grad_norm: 0.9999993297974481, iteration: 129286
loss: 1.018479585647583,grad_norm: 0.9248779936450723, iteration: 129287
loss: 1.0046919584274292,grad_norm: 0.9609802932425167, iteration: 129288
loss: 1.0353703498840332,grad_norm: 0.9999990508985389, iteration: 129289
loss: 1.0111595392227173,grad_norm: 0.9865838376092195, iteration: 129290
loss: 0.9947826266288757,grad_norm: 0.9622637693681735, iteration: 129291
loss: 1.006020188331604,grad_norm: 0.9382634174029337, iteration: 129292
loss: 0.9871284365653992,grad_norm: 0.9756349160834253, iteration: 129293
loss: 1.0169557332992554,grad_norm: 0.9999990063672273, iteration: 129294
loss: 1.0180803537368774,grad_norm: 0.9727932830801763, iteration: 129295
loss: 1.0024864673614502,grad_norm: 0.9999990798020956, iteration: 129296
loss: 0.9862967133522034,grad_norm: 0.8042152295317466, iteration: 129297
loss: 0.9963343739509583,grad_norm: 0.9423989853830105, iteration: 129298
loss: 1.0297751426696777,grad_norm: 0.9999989940455883, iteration: 129299
loss: 1.0128270387649536,grad_norm: 0.9999991648141214, iteration: 129300
loss: 1.0418920516967773,grad_norm: 0.9999991629044125, iteration: 129301
loss: 1.0478911399841309,grad_norm: 0.9999991104347138, iteration: 129302
loss: 1.0167171955108643,grad_norm: 0.908383249387746, iteration: 129303
loss: 1.0013184547424316,grad_norm: 0.999998994094251, iteration: 129304
loss: 1.019499659538269,grad_norm: 0.9999999342347586, iteration: 129305
loss: 0.9972577095031738,grad_norm: 0.9690225437242678, iteration: 129306
loss: 1.0419976711273193,grad_norm: 0.9999993400753938, iteration: 129307
loss: 0.9653376936912537,grad_norm: 0.999982152452477, iteration: 129308
loss: 0.9971315860748291,grad_norm: 0.9999991990269107, iteration: 129309
loss: 1.0068902969360352,grad_norm: 0.9999991396804349, iteration: 129310
loss: 1.0975346565246582,grad_norm: 0.9999993021088831, iteration: 129311
loss: 1.0097277164459229,grad_norm: 0.9999990749031334, iteration: 129312
loss: 0.9947336316108704,grad_norm: 0.9999991116884595, iteration: 129313
loss: 1.014886498451233,grad_norm: 0.8986789493104249, iteration: 129314
loss: 1.0390514135360718,grad_norm: 0.9914189727847625, iteration: 129315
loss: 1.0036852359771729,grad_norm: 0.9620663242402087, iteration: 129316
loss: 0.9568698406219482,grad_norm: 0.9725000451257303, iteration: 129317
loss: 1.014426589012146,grad_norm: 0.9966027624329145, iteration: 129318
loss: 1.0401557683944702,grad_norm: 0.9999992336472988, iteration: 129319
loss: 0.9625513553619385,grad_norm: 0.999999292147864, iteration: 129320
loss: 1.026660680770874,grad_norm: 0.9628542498841896, iteration: 129321
loss: 0.968716561794281,grad_norm: 0.9887249438757669, iteration: 129322
loss: 0.9791854023933411,grad_norm: 0.9999992413713371, iteration: 129323
loss: 1.0445436239242554,grad_norm: 0.9999991098363595, iteration: 129324
loss: 1.0017743110656738,grad_norm: 0.9371269478011128, iteration: 129325
loss: 1.0385593175888062,grad_norm: 0.9063080237500015, iteration: 129326
loss: 1.020432949066162,grad_norm: 0.8643016435573176, iteration: 129327
loss: 1.026382327079773,grad_norm: 0.9667861893085395, iteration: 129328
loss: 0.9628589153289795,grad_norm: 0.999999168560514, iteration: 129329
loss: 1.0120362043380737,grad_norm: 0.9999998165224231, iteration: 129330
loss: 1.0474704504013062,grad_norm: 0.999999530562481, iteration: 129331
loss: 1.0295766592025757,grad_norm: 0.9999992230021685, iteration: 129332
loss: 1.029245376586914,grad_norm: 0.9999990786925365, iteration: 129333
loss: 1.015777587890625,grad_norm: 0.8802747540051522, iteration: 129334
loss: 0.9929504990577698,grad_norm: 0.99999908567167, iteration: 129335
loss: 0.9904524683952332,grad_norm: 0.962559738392355, iteration: 129336
loss: 0.9630120992660522,grad_norm: 0.9999991620492782, iteration: 129337
loss: 1.009877324104309,grad_norm: 0.9999989881909898, iteration: 129338
loss: 1.0130674839019775,grad_norm: 0.9999988992755531, iteration: 129339
loss: 0.9977099895477295,grad_norm: 0.9999992579162342, iteration: 129340
loss: 0.9918679594993591,grad_norm: 0.9999991190963493, iteration: 129341
loss: 0.9995300769805908,grad_norm: 0.9999991361396681, iteration: 129342
loss: 0.9759690761566162,grad_norm: 0.9545934062005313, iteration: 129343
loss: 1.0340824127197266,grad_norm: 0.9734811412590335, iteration: 129344
loss: 1.0296450853347778,grad_norm: 0.9999989879922105, iteration: 129345
loss: 1.0057786703109741,grad_norm: 0.9999992357193367, iteration: 129346
loss: 1.0239700078964233,grad_norm: 0.9999993144036715, iteration: 129347
loss: 1.0298627614974976,grad_norm: 0.999998844701277, iteration: 129348
loss: 1.0218138694763184,grad_norm: 0.963224602083626, iteration: 129349
loss: 1.0027068853378296,grad_norm: 0.9999990225357278, iteration: 129350
loss: 0.9557563066482544,grad_norm: 0.9999990933799326, iteration: 129351
loss: 0.9700223803520203,grad_norm: 0.9999990596377615, iteration: 129352
loss: 1.0365241765975952,grad_norm: 0.9999989845087269, iteration: 129353
loss: 1.0475331544876099,grad_norm: 0.9999994217819304, iteration: 129354
loss: 1.0546034574508667,grad_norm: 0.9999991922898782, iteration: 129355
loss: 1.081553339958191,grad_norm: 0.9999990826478015, iteration: 129356
loss: 0.9894487261772156,grad_norm: 0.9999991541250317, iteration: 129357
loss: 0.9818779826164246,grad_norm: 0.9999990671830439, iteration: 129358
loss: 1.0010809898376465,grad_norm: 0.9999991362669483, iteration: 129359
loss: 0.9960241913795471,grad_norm: 0.9062055880642992, iteration: 129360
loss: 0.9578427076339722,grad_norm: 0.9999991331906901, iteration: 129361
loss: 0.9997376799583435,grad_norm: 0.9569453174174232, iteration: 129362
loss: 0.9905731081962585,grad_norm: 0.911886423682566, iteration: 129363
loss: 1.0085499286651611,grad_norm: 0.9863992925221698, iteration: 129364
loss: 1.004441499710083,grad_norm: 0.9999993997434473, iteration: 129365
loss: 1.008945107460022,grad_norm: 0.9737388691427203, iteration: 129366
loss: 0.9945080280303955,grad_norm: 0.9999996858402547, iteration: 129367
loss: 1.0092109441757202,grad_norm: 0.987862523148698, iteration: 129368
loss: 1.0059759616851807,grad_norm: 0.9603292782608119, iteration: 129369
loss: 1.0126686096191406,grad_norm: 0.9999990775231774, iteration: 129370
loss: 1.030709147453308,grad_norm: 0.9999992030973862, iteration: 129371
loss: 1.0112898349761963,grad_norm: 0.9999990161787243, iteration: 129372
loss: 0.9854092001914978,grad_norm: 0.9061726168575533, iteration: 129373
loss: 1.024466872215271,grad_norm: 0.9480606411096094, iteration: 129374
loss: 0.957521378993988,grad_norm: 0.999999249986366, iteration: 129375
loss: 1.0090560913085938,grad_norm: 0.9999991639034415, iteration: 129376
loss: 1.0191038846969604,grad_norm: 0.957802179888356, iteration: 129377
loss: 1.0250259637832642,grad_norm: 0.9999991787170973, iteration: 129378
loss: 1.0196675062179565,grad_norm: 0.9999993368312156, iteration: 129379
loss: 0.979896068572998,grad_norm: 0.9999991693658252, iteration: 129380
loss: 1.0138006210327148,grad_norm: 0.8497528892521928, iteration: 129381
loss: 1.026686429977417,grad_norm: 0.9999989986352171, iteration: 129382
loss: 0.9977324604988098,grad_norm: 0.8929221438036604, iteration: 129383
loss: 0.9916091561317444,grad_norm: 0.9999991918718787, iteration: 129384
loss: 1.0014256238937378,grad_norm: 0.9999989922175991, iteration: 129385
loss: 1.0204871892929077,grad_norm: 0.9618545096272342, iteration: 129386
loss: 1.0266716480255127,grad_norm: 0.9999996142876575, iteration: 129387
loss: 0.9903431534767151,grad_norm: 0.8394131766495851, iteration: 129388
loss: 0.9965224266052246,grad_norm: 0.9999991260587443, iteration: 129389
loss: 0.9695298075675964,grad_norm: 0.9999990718254562, iteration: 129390
loss: 1.062461256980896,grad_norm: 0.9999991833634204, iteration: 129391
loss: 0.9699745774269104,grad_norm: 0.9662336550304783, iteration: 129392
loss: 1.0063941478729248,grad_norm: 0.999999100371708, iteration: 129393
loss: 0.975023090839386,grad_norm: 0.9202611045009329, iteration: 129394
loss: 1.019730567932129,grad_norm: 0.9201354959639162, iteration: 129395
loss: 1.2214957475662231,grad_norm: 0.999999773841589, iteration: 129396
loss: 1.0274395942687988,grad_norm: 0.9835322817652059, iteration: 129397
loss: 1.035744309425354,grad_norm: 0.9485013413747296, iteration: 129398
loss: 0.98548823595047,grad_norm: 0.9999992699775916, iteration: 129399
loss: 0.985388457775116,grad_norm: 0.9999991756738108, iteration: 129400
loss: 1.0294369459152222,grad_norm: 0.9999991083356992, iteration: 129401
loss: 0.9603592753410339,grad_norm: 0.9999990578694736, iteration: 129402
loss: 0.990003228187561,grad_norm: 0.9999998512546866, iteration: 129403
loss: 1.0489195585250854,grad_norm: 0.999999456817036, iteration: 129404
loss: 1.0086781978607178,grad_norm: 0.9999988436929039, iteration: 129405
loss: 0.9934042692184448,grad_norm: 0.9999991355324039, iteration: 129406
loss: 1.0124174356460571,grad_norm: 0.9156125081805794, iteration: 129407
loss: 1.0174773931503296,grad_norm: 0.9999990371017285, iteration: 129408
loss: 1.009238839149475,grad_norm: 0.980170459967703, iteration: 129409
loss: 1.0020267963409424,grad_norm: 0.9999991036509944, iteration: 129410
loss: 0.984917402267456,grad_norm: 0.9999990950853425, iteration: 129411
loss: 1.0121560096740723,grad_norm: 0.9999992209901251, iteration: 129412
loss: 1.0130037069320679,grad_norm: 0.9999991879772892, iteration: 129413
loss: 0.9678109288215637,grad_norm: 0.9999991819480446, iteration: 129414
loss: 1.0264407396316528,grad_norm: 0.9999989582924805, iteration: 129415
loss: 0.9808324575424194,grad_norm: 0.9999993411635706, iteration: 129416
loss: 1.0121426582336426,grad_norm: 0.951573956553506, iteration: 129417
loss: 1.0150408744812012,grad_norm: 0.9712880006214463, iteration: 129418
loss: 0.9732182025909424,grad_norm: 0.999999080808114, iteration: 129419
loss: 0.9899490475654602,grad_norm: 0.9018167780503012, iteration: 129420
loss: 0.9873523712158203,grad_norm: 0.9729048984615499, iteration: 129421
loss: 1.0108020305633545,grad_norm: 0.9567367114036094, iteration: 129422
loss: 1.0792346000671387,grad_norm: 0.999999193705322, iteration: 129423
loss: 1.0109373331069946,grad_norm: 0.9999990030096422, iteration: 129424
loss: 1.0218455791473389,grad_norm: 0.9999990243545641, iteration: 129425
loss: 1.019325613975525,grad_norm: 0.9999990511070718, iteration: 129426
loss: 1.017624020576477,grad_norm: 0.9999990433918685, iteration: 129427
loss: 1.0102367401123047,grad_norm: 0.8876499238012808, iteration: 129428
loss: 1.0173139572143555,grad_norm: 0.9999991627196245, iteration: 129429
loss: 1.0303419828414917,grad_norm: 0.9999992367577767, iteration: 129430
loss: 1.011855959892273,grad_norm: 0.9999990523525191, iteration: 129431
loss: 0.9902210235595703,grad_norm: 0.8888750151859922, iteration: 129432
loss: 1.011129379272461,grad_norm: 0.9882492614630676, iteration: 129433
loss: 1.0214680433273315,grad_norm: 0.9999992627130821, iteration: 129434
loss: 1.0113357305526733,grad_norm: 0.9999992159151899, iteration: 129435
loss: 1.0087507963180542,grad_norm: 0.9999990165057858, iteration: 129436
loss: 0.9814099669456482,grad_norm: 0.999999124094431, iteration: 129437
loss: 1.0079957246780396,grad_norm: 0.9999996413107369, iteration: 129438
loss: 0.9990562796592712,grad_norm: 0.999999105793746, iteration: 129439
loss: 1.015019416809082,grad_norm: 0.9666750819647849, iteration: 129440
loss: 1.0146484375,grad_norm: 0.9999989821678551, iteration: 129441
loss: 1.0035929679870605,grad_norm: 0.9999992607082298, iteration: 129442
loss: 0.9995980262756348,grad_norm: 0.999999090922417, iteration: 129443
loss: 0.971829891204834,grad_norm: 0.9999991550122486, iteration: 129444
loss: 0.9945899844169617,grad_norm: 0.9435950173962828, iteration: 129445
loss: 0.9536104798316956,grad_norm: 0.9091198632392067, iteration: 129446
loss: 1.0498756170272827,grad_norm: 0.8783073612287577, iteration: 129447
loss: 0.9819971323013306,grad_norm: 0.9999990276113679, iteration: 129448
loss: 1.0190967321395874,grad_norm: 0.9999990631252312, iteration: 129449
loss: 0.9997684359550476,grad_norm: 0.9999990999109181, iteration: 129450
loss: 0.9813218712806702,grad_norm: 0.9999991917903889, iteration: 129451
loss: 1.0506538152694702,grad_norm: 0.999999185699347, iteration: 129452
loss: 0.9998983144760132,grad_norm: 0.999999059157534, iteration: 129453
loss: 0.957558810710907,grad_norm: 0.9999990265372639, iteration: 129454
loss: 0.991991400718689,grad_norm: 0.9180397154671678, iteration: 129455
loss: 0.9784437417984009,grad_norm: 0.874091850591378, iteration: 129456
loss: 0.9821417927742004,grad_norm: 0.9999997247472031, iteration: 129457
loss: 0.9509789347648621,grad_norm: 0.9562361066672307, iteration: 129458
loss: 1.0034868717193604,grad_norm: 0.8794491878809078, iteration: 129459
loss: 0.9766350984573364,grad_norm: 0.9999992216349294, iteration: 129460
loss: 1.0142992734909058,grad_norm: 0.9632475907437483, iteration: 129461
loss: 1.0124763250350952,grad_norm: 0.9999993020438962, iteration: 129462
loss: 0.9996405839920044,grad_norm: 0.9999991714150632, iteration: 129463
loss: 1.0378698110580444,grad_norm: 0.9594711498316549, iteration: 129464
loss: 1.0036426782608032,grad_norm: 0.8176434880812725, iteration: 129465
loss: 0.9928380250930786,grad_norm: 0.9999990384103986, iteration: 129466
loss: 0.9948107600212097,grad_norm: 0.9999990887277733, iteration: 129467
loss: 0.9686241745948792,grad_norm: 0.9999991595381874, iteration: 129468
loss: 1.0345582962036133,grad_norm: 0.9999998781100646, iteration: 129469
loss: 1.0225175619125366,grad_norm: 0.9711194484634192, iteration: 129470
loss: 0.9668298959732056,grad_norm: 0.8898683031359249, iteration: 129471
loss: 1.012800693511963,grad_norm: 0.9038482799699719, iteration: 129472
loss: 1.021427869796753,grad_norm: 0.8845444277700395, iteration: 129473
loss: 0.9563326835632324,grad_norm: 0.99999921057995, iteration: 129474
loss: 0.9920136332511902,grad_norm: 0.861857688853775, iteration: 129475
loss: 1.0260258913040161,grad_norm: 0.9999996201825379, iteration: 129476
loss: 1.0135148763656616,grad_norm: 0.9718226363810982, iteration: 129477
loss: 1.0057966709136963,grad_norm: 0.9999991034630683, iteration: 129478
loss: 0.9922330379486084,grad_norm: 0.9675797705926557, iteration: 129479
loss: 1.0050525665283203,grad_norm: 0.9999992269389306, iteration: 129480
loss: 1.0161523818969727,grad_norm: 0.967784685976132, iteration: 129481
loss: 1.05287766456604,grad_norm: 0.9999990198847855, iteration: 129482
loss: 1.021457552909851,grad_norm: 0.999999056105165, iteration: 129483
loss: 1.0062965154647827,grad_norm: 0.9380252059883554, iteration: 129484
loss: 1.0070692300796509,grad_norm: 0.9999991020553592, iteration: 129485
loss: 1.0162681341171265,grad_norm: 0.7796438616316211, iteration: 129486
loss: 0.9873659610748291,grad_norm: 0.954100155774541, iteration: 129487
loss: 1.0268733501434326,grad_norm: 0.9999990465460236, iteration: 129488
loss: 1.01198148727417,grad_norm: 0.9999991768093812, iteration: 129489
loss: 1.0279299020767212,grad_norm: 0.9999994057135331, iteration: 129490
loss: 1.0388144254684448,grad_norm: 0.9999992317279639, iteration: 129491
loss: 0.9964438676834106,grad_norm: 0.9848764638816495, iteration: 129492
loss: 0.9929478168487549,grad_norm: 0.9163026677086507, iteration: 129493
loss: 1.0084211826324463,grad_norm: 0.999999152216944, iteration: 129494
loss: 1.002670407295227,grad_norm: 0.9999989952645694, iteration: 129495
loss: 1.011142373085022,grad_norm: 0.9568691375731759, iteration: 129496
loss: 0.9979923963546753,grad_norm: 0.9532142347816156, iteration: 129497
loss: 0.9674620032310486,grad_norm: 0.9999991150101646, iteration: 129498
loss: 1.0098778009414673,grad_norm: 0.9999990964690653, iteration: 129499
loss: 1.0021201372146606,grad_norm: 0.9999990338058523, iteration: 129500
loss: 1.0222002267837524,grad_norm: 0.9986777985675297, iteration: 129501
loss: 0.9906651973724365,grad_norm: 0.9433336943701365, iteration: 129502
loss: 1.0042579174041748,grad_norm: 0.9340811520040365, iteration: 129503
loss: 0.9938237071037292,grad_norm: 0.9673937399405336, iteration: 129504
loss: 1.0197490453720093,grad_norm: 0.9090171137080818, iteration: 129505
loss: 1.0172395706176758,grad_norm: 0.9999990854507387, iteration: 129506
loss: 1.017106533050537,grad_norm: 0.946904516549323, iteration: 129507
loss: 1.0054861307144165,grad_norm: 0.9999992060007683, iteration: 129508
loss: 1.0291224718093872,grad_norm: 0.9999994074150699, iteration: 129509
loss: 1.0777928829193115,grad_norm: 0.9999997208847492, iteration: 129510
loss: 0.994468629360199,grad_norm: 0.9999991074206007, iteration: 129511
loss: 0.9940952658653259,grad_norm: 0.9878010194636517, iteration: 129512
loss: 1.007212519645691,grad_norm: 0.9169451641074143, iteration: 129513
loss: 1.0302823781967163,grad_norm: 0.9999994010899793, iteration: 129514
loss: 1.015610694885254,grad_norm: 0.9999991951175787, iteration: 129515
loss: 0.994778573513031,grad_norm: 0.8925515965718744, iteration: 129516
loss: 1.013540506362915,grad_norm: 0.9899938143312556, iteration: 129517
loss: 0.9640305638313293,grad_norm: 0.9946628263139007, iteration: 129518
loss: 1.012437343597412,grad_norm: 0.9999992010346747, iteration: 129519
loss: 1.0356539487838745,grad_norm: 0.9743462295375962, iteration: 129520
loss: 1.0360876321792603,grad_norm: 0.9521189105034874, iteration: 129521
loss: 1.01124107837677,grad_norm: 0.9321216342055882, iteration: 129522
loss: 1.0102355480194092,grad_norm: 0.9731174237343482, iteration: 129523
loss: 0.9766115546226501,grad_norm: 0.9999990249706914, iteration: 129524
loss: 1.0476813316345215,grad_norm: 0.9866925226793525, iteration: 129525
loss: 1.0025328397750854,grad_norm: 0.9999990476222947, iteration: 129526
loss: 1.016133427619934,grad_norm: 0.9999991953056441, iteration: 129527
loss: 1.009521484375,grad_norm: 0.999999133998286, iteration: 129528
loss: 1.0139824151992798,grad_norm: 0.8773035082705811, iteration: 129529
loss: 0.9979485273361206,grad_norm: 0.9999991660929057, iteration: 129530
loss: 0.9907083511352539,grad_norm: 0.9999992438758988, iteration: 129531
loss: 0.993691623210907,grad_norm: 0.9999993050412649, iteration: 129532
loss: 0.996273398399353,grad_norm: 0.9196863578281296, iteration: 129533
loss: 1.03508460521698,grad_norm: 0.9569064390744804, iteration: 129534
loss: 1.0056922435760498,grad_norm: 0.9784678884376725, iteration: 129535
loss: 1.0192251205444336,grad_norm: 0.9999992004182441, iteration: 129536
loss: 0.9643175005912781,grad_norm: 0.9533795515142628, iteration: 129537
loss: 0.9915950298309326,grad_norm: 0.9999989859538262, iteration: 129538
loss: 1.011608362197876,grad_norm: 0.9999991795186692, iteration: 129539
loss: 1.0048564672470093,grad_norm: 0.9999989973792981, iteration: 129540
loss: 0.9976780414581299,grad_norm: 0.9847302279590283, iteration: 129541
loss: 0.9719863533973694,grad_norm: 0.9456228923663093, iteration: 129542
loss: 0.9776716828346252,grad_norm: 0.9999992971096806, iteration: 129543
loss: 1.0100834369659424,grad_norm: 0.9999991260186002, iteration: 129544
loss: 1.0147053003311157,grad_norm: 0.9999990519108104, iteration: 129545
loss: 1.009252905845642,grad_norm: 0.9999993076142599, iteration: 129546
loss: 0.9772734045982361,grad_norm: 0.9999991591481951, iteration: 129547
loss: 1.0050638914108276,grad_norm: 0.9998994942070951, iteration: 129548
loss: 1.0154222249984741,grad_norm: 0.9524976508298613, iteration: 129549
loss: 1.010434627532959,grad_norm: 0.9120515512227684, iteration: 129550
loss: 0.9938077330589294,grad_norm: 0.9850100464841682, iteration: 129551
loss: 0.9822378754615784,grad_norm: 0.9999990975858538, iteration: 129552
loss: 0.9554934501647949,grad_norm: 0.99999922365298, iteration: 129553
loss: 0.991587221622467,grad_norm: 0.9816295839339063, iteration: 129554
loss: 0.9608737826347351,grad_norm: 0.9999993623709248, iteration: 129555
loss: 0.9662814736366272,grad_norm: 0.9999990573794333, iteration: 129556
loss: 1.0002480745315552,grad_norm: 0.9999991061981588, iteration: 129557
loss: 1.1043078899383545,grad_norm: 0.9999995241261734, iteration: 129558
loss: 0.970229983329773,grad_norm: 0.9283714673377379, iteration: 129559
loss: 1.008141040802002,grad_norm: 0.9999990065094589, iteration: 129560
loss: 0.9977157115936279,grad_norm: 0.9999992087033314, iteration: 129561
loss: 0.9816995859146118,grad_norm: 0.892496888270897, iteration: 129562
loss: 0.9726345539093018,grad_norm: 0.9395100132716399, iteration: 129563
loss: 1.0901901721954346,grad_norm: 0.9954363837251838, iteration: 129564
loss: 1.081191897392273,grad_norm: 0.9999996166223281, iteration: 129565
loss: 0.990547776222229,grad_norm: 0.8557721551141497, iteration: 129566
loss: 1.0296658277511597,grad_norm: 0.9682533929117614, iteration: 129567
loss: 1.0157363414764404,grad_norm: 0.9999991557719794, iteration: 129568
loss: 0.9957218170166016,grad_norm: 0.9999991386174796, iteration: 129569
loss: 1.0518271923065186,grad_norm: 0.9999990882449492, iteration: 129570
loss: 0.9945025444030762,grad_norm: 0.9999992253308484, iteration: 129571
loss: 0.961608350276947,grad_norm: 0.9999992676388505, iteration: 129572
loss: 1.0107604265213013,grad_norm: 0.8783953132494451, iteration: 129573
loss: 1.0201396942138672,grad_norm: 0.9999992086576278, iteration: 129574
loss: 1.0006002187728882,grad_norm: 0.9999991184930301, iteration: 129575
loss: 0.9600071907043457,grad_norm: 0.9999992571571793, iteration: 129576
loss: 0.9915842413902283,grad_norm: 0.9999990890700708, iteration: 129577
loss: 1.002734899520874,grad_norm: 0.9682942924086361, iteration: 129578
loss: 1.0051252841949463,grad_norm: 0.9808695196373239, iteration: 129579
loss: 1.0259220600128174,grad_norm: 0.9999990996235968, iteration: 129580
loss: 1.0425615310668945,grad_norm: 0.999999053563534, iteration: 129581
loss: 0.9914931654930115,grad_norm: 0.7993657310881966, iteration: 129582
loss: 0.9804118275642395,grad_norm: 0.9999989382851872, iteration: 129583
loss: 0.9893683791160583,grad_norm: 0.9264472315997809, iteration: 129584
loss: 0.9950857758522034,grad_norm: 0.9632638899071772, iteration: 129585
loss: 1.0461902618408203,grad_norm: 0.9999998401043257, iteration: 129586
loss: 1.0402802228927612,grad_norm: 0.9999989551790138, iteration: 129587
loss: 1.0446916818618774,grad_norm: 0.9999991292433241, iteration: 129588
loss: 0.9903631210327148,grad_norm: 0.9999991645121719, iteration: 129589
loss: 0.981112003326416,grad_norm: 0.9999991343278744, iteration: 129590
loss: 0.9945748448371887,grad_norm: 0.9999991400821628, iteration: 129591
loss: 1.023206353187561,grad_norm: 0.9005797401034873, iteration: 129592
loss: 0.9929118752479553,grad_norm: 0.9999991355008936, iteration: 129593
loss: 1.0028815269470215,grad_norm: 0.9999989076871428, iteration: 129594
loss: 1.0305259227752686,grad_norm: 0.9999992685499671, iteration: 129595
loss: 0.9557963013648987,grad_norm: 0.9999991154184974, iteration: 129596
loss: 1.0011532306671143,grad_norm: 0.980510021974039, iteration: 129597
loss: 1.0153857469558716,grad_norm: 0.9999992167858707, iteration: 129598
loss: 0.9313780069351196,grad_norm: 0.9999990615137094, iteration: 129599
loss: 1.0108418464660645,grad_norm: 0.9999989611416228, iteration: 129600
loss: 1.0298959016799927,grad_norm: 0.9769054429122803, iteration: 129601
loss: 1.0254520177841187,grad_norm: 0.9999990331955424, iteration: 129602
loss: 1.0059309005737305,grad_norm: 0.9999990341230247, iteration: 129603
loss: 1.1063528060913086,grad_norm: 0.9999996413780579, iteration: 129604
loss: 0.966062068939209,grad_norm: 0.9999992278942863, iteration: 129605
loss: 1.0046710968017578,grad_norm: 0.9872221193626084, iteration: 129606
loss: 1.0209801197052002,grad_norm: 0.9999993359815738, iteration: 129607
loss: 1.0662243366241455,grad_norm: 0.9999992418787099, iteration: 129608
loss: 1.0115673542022705,grad_norm: 0.9999999819354749, iteration: 129609
loss: 1.0040303468704224,grad_norm: 0.9751362367579145, iteration: 129610
loss: 0.9857838749885559,grad_norm: 0.9684610987435682, iteration: 129611
loss: 1.007976770401001,grad_norm: 0.9999991964253345, iteration: 129612
loss: 0.9740756154060364,grad_norm: 0.9999992283003132, iteration: 129613
loss: 0.9915146827697754,grad_norm: 0.9762161354235541, iteration: 129614
loss: 1.0099108219146729,grad_norm: 0.9999992889409975, iteration: 129615
loss: 1.0687860250473022,grad_norm: 0.9798409376885792, iteration: 129616
loss: 0.9844316840171814,grad_norm: 0.9999992104514319, iteration: 129617
loss: 1.0015629529953003,grad_norm: 0.9999990233934096, iteration: 129618
loss: 1.009371280670166,grad_norm: 0.9066432798536682, iteration: 129619
loss: 0.9987491965293884,grad_norm: 0.8908824993185445, iteration: 129620
loss: 0.9762489795684814,grad_norm: 0.9999990122948985, iteration: 129621
loss: 1.0221867561340332,grad_norm: 0.9325095531090968, iteration: 129622
loss: 0.9918367266654968,grad_norm: 0.8739581907145184, iteration: 129623
loss: 1.0083715915679932,grad_norm: 0.9066218012746492, iteration: 129624
loss: 1.0253115892410278,grad_norm: 0.8748376800732149, iteration: 129625
loss: 0.9794530868530273,grad_norm: 0.9999991920708039, iteration: 129626
loss: 1.025113821029663,grad_norm: 0.9528217960740583, iteration: 129627
loss: 0.9635626673698425,grad_norm: 0.9999990728261537, iteration: 129628
loss: 0.9915808439254761,grad_norm: 0.9999990293403554, iteration: 129629
loss: 0.9901726245880127,grad_norm: 0.9999989313661526, iteration: 129630
loss: 0.9996541738510132,grad_norm: 0.9958505827351808, iteration: 129631
loss: 1.0144715309143066,grad_norm: 0.903852239295378, iteration: 129632
loss: 1.008968710899353,grad_norm: 0.9999989673233572, iteration: 129633
loss: 1.0584162473678589,grad_norm: 0.9999990973928007, iteration: 129634
loss: 0.9782963395118713,grad_norm: 0.9999990596064096, iteration: 129635
loss: 0.9899541735649109,grad_norm: 0.9679256877305984, iteration: 129636
loss: 1.0370525121688843,grad_norm: 0.9999991180457813, iteration: 129637
loss: 1.0008063316345215,grad_norm: 0.8216092993171203, iteration: 129638
loss: 0.9534909129142761,grad_norm: 0.999999072048053, iteration: 129639
loss: 1.0061293840408325,grad_norm: 0.9999989762641365, iteration: 129640
loss: 1.0020710229873657,grad_norm: 0.9999991003640827, iteration: 129641
loss: 1.0188111066818237,grad_norm: 0.9999998714164618, iteration: 129642
loss: 0.9674471616744995,grad_norm: 0.8793151594567422, iteration: 129643
loss: 1.006490707397461,grad_norm: 0.9999990626360452, iteration: 129644
loss: 1.0206252336502075,grad_norm: 0.9999991465240792, iteration: 129645
loss: 0.9631296992301941,grad_norm: 0.9343957349255143, iteration: 129646
loss: 0.9955769181251526,grad_norm: 0.9999991532828812, iteration: 129647
loss: 1.0178207159042358,grad_norm: 0.9697743418338352, iteration: 129648
loss: 0.9956281185150146,grad_norm: 0.999999212990845, iteration: 129649
loss: 0.9911713600158691,grad_norm: 0.9999996761779386, iteration: 129650
loss: 1.0056544542312622,grad_norm: 0.9999992216221552, iteration: 129651
loss: 0.98126220703125,grad_norm: 0.9999990871975499, iteration: 129652
loss: 1.0298041105270386,grad_norm: 0.9502883965408209, iteration: 129653
loss: 0.9979103803634644,grad_norm: 0.9815090375927841, iteration: 129654
loss: 0.9716604948043823,grad_norm: 0.8914303165799548, iteration: 129655
loss: 1.0133998394012451,grad_norm: 0.8635674583285916, iteration: 129656
loss: 1.0261636972427368,grad_norm: 0.9999990949070454, iteration: 129657
loss: 1.007914423942566,grad_norm: 0.8707866396351597, iteration: 129658
loss: 0.9679528474807739,grad_norm: 0.9740985044727645, iteration: 129659
loss: 0.9876469969749451,grad_norm: 0.9999992091379182, iteration: 129660
loss: 1.0080833435058594,grad_norm: 0.999999068825749, iteration: 129661
loss: 1.0284982919692993,grad_norm: 0.9740599594327528, iteration: 129662
loss: 0.9882220029830933,grad_norm: 0.9553767948778096, iteration: 129663
loss: 0.9930562376976013,grad_norm: 0.9999990249227675, iteration: 129664
loss: 1.005523920059204,grad_norm: 0.9999989140876141, iteration: 129665
loss: 0.9986197352409363,grad_norm: 0.9999991608581728, iteration: 129666
loss: 0.9918600916862488,grad_norm: 0.9999989163294059, iteration: 129667
loss: 1.019952654838562,grad_norm: 0.9999990716552667, iteration: 129668
loss: 1.0356143712997437,grad_norm: 0.9999991715707427, iteration: 129669
loss: 1.0069596767425537,grad_norm: 0.9999989043841755, iteration: 129670
loss: 1.0146511793136597,grad_norm: 0.9271149776826597, iteration: 129671
loss: 1.0017037391662598,grad_norm: 0.9999991480855795, iteration: 129672
loss: 0.9695680141448975,grad_norm: 0.9999991618843357, iteration: 129673
loss: 0.9910678267478943,grad_norm: 0.9999993435916086, iteration: 129674
loss: 1.0108011960983276,grad_norm: 0.9999990821738536, iteration: 129675
loss: 0.9940478801727295,grad_norm: 0.9999992967250029, iteration: 129676
loss: 0.9816210269927979,grad_norm: 0.9999990381899955, iteration: 129677
loss: 1.0049148797988892,grad_norm: 0.9999991432109158, iteration: 129678
loss: 1.023036241531372,grad_norm: 0.989658438435276, iteration: 129679
loss: 0.9861154556274414,grad_norm: 0.9999992034126427, iteration: 129680
loss: 1.0251156091690063,grad_norm: 0.9999991157889144, iteration: 129681
loss: 0.9664487838745117,grad_norm: 0.9999991276661728, iteration: 129682
loss: 1.0002222061157227,grad_norm: 0.9999991548276961, iteration: 129683
loss: 0.9865992665290833,grad_norm: 0.9905687485273189, iteration: 129684
loss: 0.9643589854240417,grad_norm: 0.8546416193694611, iteration: 129685
loss: 0.9884329438209534,grad_norm: 0.9999990172097186, iteration: 129686
loss: 1.014965534210205,grad_norm: 0.9437202450495367, iteration: 129687
loss: 0.9858425855636597,grad_norm: 0.9999990827666447, iteration: 129688
loss: 1.0080878734588623,grad_norm: 0.9322879474496368, iteration: 129689
loss: 0.997050940990448,grad_norm: 0.9962686033327688, iteration: 129690
loss: 1.0647780895233154,grad_norm: 0.9999990658864071, iteration: 129691
loss: 1.0143241882324219,grad_norm: 0.9999989598887212, iteration: 129692
loss: 1.1078753471374512,grad_norm: 1.000000030114795, iteration: 129693
loss: 1.0125058889389038,grad_norm: 0.9311891005286516, iteration: 129694
loss: 0.9840883016586304,grad_norm: 0.9999991462932212, iteration: 129695
loss: 0.9800512790679932,grad_norm: 0.9999991758946708, iteration: 129696
loss: 1.0271166563034058,grad_norm: 0.9999992038767976, iteration: 129697
loss: 0.9831980466842651,grad_norm: 0.9808155018173305, iteration: 129698
loss: 1.0273770093917847,grad_norm: 0.9999996561974803, iteration: 129699
loss: 0.9369922280311584,grad_norm: 0.9999991257300882, iteration: 129700
loss: 0.9973081350326538,grad_norm: 0.9999990588355859, iteration: 129701
loss: 0.9989711046218872,grad_norm: 0.897642358656311, iteration: 129702
loss: 1.0023155212402344,grad_norm: 0.9035015389051033, iteration: 129703
loss: 0.9759087562561035,grad_norm: 0.9474583886920718, iteration: 129704
loss: 0.9647929072380066,grad_norm: 0.9999990596938247, iteration: 129705
loss: 1.0297133922576904,grad_norm: 0.9323707088272161, iteration: 129706
loss: 1.0308223962783813,grad_norm: 0.9999992146637222, iteration: 129707
loss: 0.9913753867149353,grad_norm: 0.970628202942326, iteration: 129708
loss: 0.9925024509429932,grad_norm: 0.9999988726229442, iteration: 129709
loss: 0.9887789487838745,grad_norm: 0.9999993584559971, iteration: 129710
loss: 1.015298843383789,grad_norm: 0.891489800816873, iteration: 129711
loss: 1.0181740522384644,grad_norm: 0.9746362263814865, iteration: 129712
loss: 0.9450618624687195,grad_norm: 0.999999075394852, iteration: 129713
loss: 1.0087283849716187,grad_norm: 0.9999990303605403, iteration: 129714
loss: 0.9761713147163391,grad_norm: 0.9999991914798428, iteration: 129715
loss: 1.038461446762085,grad_norm: 0.9999991232427619, iteration: 129716
loss: 1.0220056772232056,grad_norm: 0.9999992151179246, iteration: 129717
loss: 0.9701379537582397,grad_norm: 0.9608350275543341, iteration: 129718
loss: 1.0082019567489624,grad_norm: 0.9869721398363661, iteration: 129719
loss: 1.009444236755371,grad_norm: 0.9999991944367929, iteration: 129720
loss: 0.992224395275116,grad_norm: 0.9557896301530369, iteration: 129721
loss: 1.0018317699432373,grad_norm: 0.999999057287132, iteration: 129722
loss: 1.0091079473495483,grad_norm: 0.9999991264073018, iteration: 129723
loss: 0.998160719871521,grad_norm: 0.9753169085981455, iteration: 129724
loss: 0.9290266036987305,grad_norm: 0.9732462758186542, iteration: 129725
loss: 0.9708520770072937,grad_norm: 0.95658966795118, iteration: 129726
loss: 1.0295219421386719,grad_norm: 0.9999991715441303, iteration: 129727
loss: 0.9967196583747864,grad_norm: 0.9104456479266653, iteration: 129728
loss: 1.027091145515442,grad_norm: 0.9999993123295121, iteration: 129729
loss: 0.990501344203949,grad_norm: 0.9999991129353537, iteration: 129730
loss: 0.9514410495758057,grad_norm: 0.9818483467934598, iteration: 129731
loss: 1.0258612632751465,grad_norm: 0.9999991222455633, iteration: 129732
loss: 1.0090408325195312,grad_norm: 0.8853680260098356, iteration: 129733
loss: 0.9941253662109375,grad_norm: 0.9999992694579413, iteration: 129734
loss: 0.9819589257240295,grad_norm: 0.9028804661162606, iteration: 129735
loss: 1.0181125402450562,grad_norm: 0.9999993274708331, iteration: 129736
loss: 0.9629981517791748,grad_norm: 0.9915599060126978, iteration: 129737
loss: 0.9995958209037781,grad_norm: 0.9274770234584683, iteration: 129738
loss: 1.0171842575073242,grad_norm: 0.9999991523571427, iteration: 129739
loss: 0.9890053272247314,grad_norm: 0.9999991106466819, iteration: 129740
loss: 0.9861744046211243,grad_norm: 0.999999194189795, iteration: 129741
loss: 0.9862669706344604,grad_norm: 0.9944643408633789, iteration: 129742
loss: 0.9842416048049927,grad_norm: 0.9999991999939668, iteration: 129743
loss: 1.0026499032974243,grad_norm: 0.9456379217191773, iteration: 129744
loss: 0.9981144070625305,grad_norm: 0.9999990454813128, iteration: 129745
loss: 1.0163780450820923,grad_norm: 0.9505308235849204, iteration: 129746
loss: 1.0265018939971924,grad_norm: 0.9999996702889472, iteration: 129747
loss: 0.9937131404876709,grad_norm: 0.9369027663973589, iteration: 129748
loss: 1.0146602392196655,grad_norm: 0.9999991458163572, iteration: 129749
loss: 0.9988011121749878,grad_norm: 0.9999992418819841, iteration: 129750
loss: 1.013357162475586,grad_norm: 0.9999993775247988, iteration: 129751
loss: 0.9872847199440002,grad_norm: 0.9999992933291908, iteration: 129752
loss: 1.0136823654174805,grad_norm: 0.9999991746297121, iteration: 129753
loss: 0.9890518188476562,grad_norm: 0.9999992995814114, iteration: 129754
loss: 0.9904376268386841,grad_norm: 0.9003155971188577, iteration: 129755
loss: 0.9884714484214783,grad_norm: 0.9999992766077094, iteration: 129756
loss: 1.0339970588684082,grad_norm: 0.9999992779942852, iteration: 129757
loss: 1.0173338651657104,grad_norm: 0.999999200804837, iteration: 129758
loss: 0.9478986859321594,grad_norm: 0.9999999304121142, iteration: 129759
loss: 1.0065727233886719,grad_norm: 0.999999116249524, iteration: 129760
loss: 1.0052963495254517,grad_norm: 0.9999992939030108, iteration: 129761
loss: 1.0068824291229248,grad_norm: 0.9430200438956239, iteration: 129762
loss: 1.0645973682403564,grad_norm: 0.9999991077316088, iteration: 129763
loss: 1.0063072443008423,grad_norm: 0.95701214754504, iteration: 129764
loss: 0.9947989583015442,grad_norm: 0.8998030180556986, iteration: 129765
loss: 1.0165526866912842,grad_norm: 0.99999924513969, iteration: 129766
loss: 0.9612245559692383,grad_norm: 0.9300718700554388, iteration: 129767
loss: 0.991526186466217,grad_norm: 0.950976421205799, iteration: 129768
loss: 0.9794699549674988,grad_norm: 0.9999992000839361, iteration: 129769
loss: 0.9652448296546936,grad_norm: 0.9622917039123695, iteration: 129770
loss: 0.9466463327407837,grad_norm: 0.9956011122965994, iteration: 129771
loss: 1.0383752584457397,grad_norm: 0.9999993131528938, iteration: 129772
loss: 0.981921374797821,grad_norm: 0.9900467138136635, iteration: 129773
loss: 1.0211557149887085,grad_norm: 0.9999991813668923, iteration: 129774
loss: 0.9943528771400452,grad_norm: 0.9999991442093323, iteration: 129775
loss: 1.0100563764572144,grad_norm: 0.8927192347561325, iteration: 129776
loss: 1.0035505294799805,grad_norm: 0.9999992183390174, iteration: 129777
loss: 0.9981845617294312,grad_norm: 0.9999990694924813, iteration: 129778
loss: 0.9865708947181702,grad_norm: 0.9849606547392474, iteration: 129779
loss: 1.0257114171981812,grad_norm: 0.999999852932413, iteration: 129780
loss: 0.9989742636680603,grad_norm: 0.8748409383831276, iteration: 129781
loss: 0.9815965294837952,grad_norm: 0.9999991803450198, iteration: 129782
loss: 0.9628375172615051,grad_norm: 0.9999991526582528, iteration: 129783
loss: 0.9991395473480225,grad_norm: 0.9999992676312196, iteration: 129784
loss: 1.0115797519683838,grad_norm: 0.8110055575511791, iteration: 129785
loss: 0.9797772765159607,grad_norm: 0.930129847090776, iteration: 129786
loss: 0.9794690012931824,grad_norm: 0.9972519436549119, iteration: 129787
loss: 0.9964755177497864,grad_norm: 0.976489082801567, iteration: 129788
loss: 0.9676413536071777,grad_norm: 0.9999990842832054, iteration: 129789
loss: 0.9832522869110107,grad_norm: 0.9039489941575434, iteration: 129790
loss: 0.9954059720039368,grad_norm: 0.9393616374365196, iteration: 129791
loss: 0.9769532680511475,grad_norm: 0.9990613563930691, iteration: 129792
loss: 0.9922407865524292,grad_norm: 0.9999990783266804, iteration: 129793
loss: 1.0095447301864624,grad_norm: 0.9999990702492039, iteration: 129794
loss: 1.048803687095642,grad_norm: 0.9999991321653214, iteration: 129795
loss: 0.9843761920928955,grad_norm: 0.9515532182949178, iteration: 129796
loss: 0.9997071623802185,grad_norm: 0.9999992595262708, iteration: 129797
loss: 0.9874064326286316,grad_norm: 0.8942259619763715, iteration: 129798
loss: 0.9942365884780884,grad_norm: 0.9999992408446763, iteration: 129799
loss: 1.025063157081604,grad_norm: 0.9999989962323181, iteration: 129800
loss: 0.9747987985610962,grad_norm: 0.9522353820655077, iteration: 129801
loss: 0.9756338000297546,grad_norm: 0.9999992207316547, iteration: 129802
loss: 1.0438282489776611,grad_norm: 0.9999997716041995, iteration: 129803
loss: 0.9772677421569824,grad_norm: 0.9999989046757274, iteration: 129804
loss: 0.9906560778617859,grad_norm: 0.7685848950606753, iteration: 129805
loss: 0.9972801804542542,grad_norm: 0.9999992826864142, iteration: 129806
loss: 0.9891501665115356,grad_norm: 0.8915418599827833, iteration: 129807
loss: 0.9953046441078186,grad_norm: 0.9769411803814678, iteration: 129808
loss: 0.9954680800437927,grad_norm: 0.9999992191992904, iteration: 129809
loss: 0.9956756830215454,grad_norm: 0.9744432966461231, iteration: 129810
loss: 0.9931208491325378,grad_norm: 0.999999334512896, iteration: 129811
loss: 0.9822863340377808,grad_norm: 0.9497216658976891, iteration: 129812
loss: 1.0091606378555298,grad_norm: 0.9999991744020836, iteration: 129813
loss: 1.013426423072815,grad_norm: 0.9999992459276095, iteration: 129814
loss: 1.0398058891296387,grad_norm: 0.9999992191764009, iteration: 129815
loss: 1.050042748451233,grad_norm: 0.9999989370303258, iteration: 129816
loss: 1.0166600942611694,grad_norm: 0.9999991084102156, iteration: 129817
loss: 1.0041199922561646,grad_norm: 0.9999992062539238, iteration: 129818
loss: 1.0167651176452637,grad_norm: 0.9999990048846321, iteration: 129819
loss: 1.003040075302124,grad_norm: 0.9460022967790923, iteration: 129820
loss: 0.9581135511398315,grad_norm: 0.9771502453663771, iteration: 129821
loss: 1.0109708309173584,grad_norm: 0.9999991073224762, iteration: 129822
loss: 1.0678693056106567,grad_norm: 0.999999031357431, iteration: 129823
loss: 1.006723403930664,grad_norm: 0.9999992739763193, iteration: 129824
loss: 0.9809229373931885,grad_norm: 0.989498214427052, iteration: 129825
loss: 0.9800466895103455,grad_norm: 0.9999992434397034, iteration: 129826
loss: 1.0140833854675293,grad_norm: 0.9622744708946569, iteration: 129827
loss: 1.0327051877975464,grad_norm: 0.9999991971372008, iteration: 129828
loss: 0.9921613335609436,grad_norm: 0.9999990151216274, iteration: 129829
loss: 1.0101749897003174,grad_norm: 0.9999990850733382, iteration: 129830
loss: 0.9887979626655579,grad_norm: 0.999999083926834, iteration: 129831
loss: 1.0043730735778809,grad_norm: 0.9999992585483206, iteration: 129832
loss: 0.9988798499107361,grad_norm: 0.8474243951839959, iteration: 129833
loss: 0.9905535578727722,grad_norm: 0.950238517361378, iteration: 129834
loss: 1.013717532157898,grad_norm: 0.9999991347943074, iteration: 129835
loss: 1.027726411819458,grad_norm: 0.9999990439751816, iteration: 129836
loss: 0.9790710210800171,grad_norm: 0.9654080129738901, iteration: 129837
loss: 1.0049548149108887,grad_norm: 0.9999990487089729, iteration: 129838
loss: 1.0017974376678467,grad_norm: 0.9999991850530302, iteration: 129839
loss: 1.0000919103622437,grad_norm: 0.9694082328304865, iteration: 129840
loss: 0.9964589476585388,grad_norm: 0.9999992132746259, iteration: 129841
loss: 1.0054386854171753,grad_norm: 0.99999921760838, iteration: 129842
loss: 0.9872135519981384,grad_norm: 0.9263019387300473, iteration: 129843
loss: 0.9522491097450256,grad_norm: 0.8552134174999065, iteration: 129844
loss: 0.9615721106529236,grad_norm: 0.9686179648648265, iteration: 129845
loss: 0.9590660333633423,grad_norm: 0.9288664447883277, iteration: 129846
loss: 0.9922992587089539,grad_norm: 0.9298522125612353, iteration: 129847
loss: 1.0177514553070068,grad_norm: 0.9999991534758992, iteration: 129848
loss: 0.9842547178268433,grad_norm: 0.9490315185784746, iteration: 129849
loss: 0.9716805219650269,grad_norm: 0.9999995902160455, iteration: 129850
loss: 1.0270923376083374,grad_norm: 0.9999990944269425, iteration: 129851
loss: 1.0102733373641968,grad_norm: 0.999999109572129, iteration: 129852
loss: 1.0515235662460327,grad_norm: 0.9999992210556569, iteration: 129853
loss: 0.9902666807174683,grad_norm: 0.9999992396720793, iteration: 129854
loss: 1.004305124282837,grad_norm: 0.9999991319791044, iteration: 129855
loss: 0.973958432674408,grad_norm: 0.9999991998484311, iteration: 129856
loss: 1.0684568881988525,grad_norm: 0.9999998622886546, iteration: 129857
loss: 1.0144081115722656,grad_norm: 0.93567891148512, iteration: 129858
loss: 1.0013900995254517,grad_norm: 0.9999990909420774, iteration: 129859
loss: 0.9988707304000854,grad_norm: 0.9999991086597396, iteration: 129860
loss: 1.0102710723876953,grad_norm: 0.9999991850972965, iteration: 129861
loss: 0.9847850203514099,grad_norm: 0.9783793295884756, iteration: 129862
loss: 0.9534252285957336,grad_norm: 0.9999993742858667, iteration: 129863
loss: 1.0077308416366577,grad_norm: 0.9999991707441838, iteration: 129864
loss: 1.0081254243850708,grad_norm: 0.9999990087012685, iteration: 129865
loss: 0.9761881232261658,grad_norm: 0.9738831884297151, iteration: 129866
loss: 1.0041368007659912,grad_norm: 0.9636826054098724, iteration: 129867
loss: 0.9672062397003174,grad_norm: 0.9999991581803332, iteration: 129868
loss: 0.9959761500358582,grad_norm: 0.9999991388071057, iteration: 129869
loss: 1.0404021739959717,grad_norm: 0.9999997377472832, iteration: 129870
loss: 0.9575950503349304,grad_norm: 0.9824287625965135, iteration: 129871
loss: 1.0181587934494019,grad_norm: 0.9736993243702639, iteration: 129872
loss: 0.9650024771690369,grad_norm: 0.9999991760618195, iteration: 129873
loss: 1.002415418624878,grad_norm: 0.9999991044014392, iteration: 129874
loss: 1.0089452266693115,grad_norm: 0.999999137678088, iteration: 129875
loss: 1.0035072565078735,grad_norm: 0.940492682281875, iteration: 129876
loss: 1.0064185857772827,grad_norm: 0.9918630495212687, iteration: 129877
loss: 1.0284837484359741,grad_norm: 0.9999991418142353, iteration: 129878
loss: 0.9641203284263611,grad_norm: 0.9925036621171881, iteration: 129879
loss: 0.9916692972183228,grad_norm: 0.9999990189962532, iteration: 129880
loss: 0.969789445400238,grad_norm: 0.9621021508668184, iteration: 129881
loss: 1.010553240776062,grad_norm: 0.9999990818195715, iteration: 129882
loss: 0.9577832818031311,grad_norm: 0.9406721926156566, iteration: 129883
loss: 1.0324680805206299,grad_norm: 0.9999992779296932, iteration: 129884
loss: 0.991463303565979,grad_norm: 0.999999081805433, iteration: 129885
loss: 1.0304334163665771,grad_norm: 0.9590452502621776, iteration: 129886
loss: 0.97454833984375,grad_norm: 0.9999989729487169, iteration: 129887
loss: 0.99576336145401,grad_norm: 0.9999992552307184, iteration: 129888
loss: 1.0084517002105713,grad_norm: 0.9300314135911958, iteration: 129889
loss: 1.0053437948226929,grad_norm: 0.9999989484554681, iteration: 129890
loss: 0.9643892049789429,grad_norm: 0.9999990226051609, iteration: 129891
loss: 1.0249731540679932,grad_norm: 0.8540816409534925, iteration: 129892
loss: 1.008908748626709,grad_norm: 0.9999990821365858, iteration: 129893
loss: 0.995328426361084,grad_norm: 0.9999988982962449, iteration: 129894
loss: 0.9380179643630981,grad_norm: 0.9999990115987154, iteration: 129895
loss: 1.0213299989700317,grad_norm: 0.8058618271438308, iteration: 129896
loss: 1.001134991645813,grad_norm: 0.9522975083660373, iteration: 129897
loss: 1.007432460784912,grad_norm: 0.9607068442973803, iteration: 129898
loss: 1.05517578125,grad_norm: 0.9999990939314616, iteration: 129899
loss: 0.9751018285751343,grad_norm: 0.9395627946779221, iteration: 129900
loss: 1.0153728723526,grad_norm: 0.9999992813133409, iteration: 129901
loss: 0.9622975587844849,grad_norm: 0.9036652944702671, iteration: 129902
loss: 0.9729372262954712,grad_norm: 0.8631720451409786, iteration: 129903
loss: 1.0117709636688232,grad_norm: 0.999999355173649, iteration: 129904
loss: 1.030632734298706,grad_norm: 0.9345229976035101, iteration: 129905
loss: 0.9559317827224731,grad_norm: 0.9984043450248763, iteration: 129906
loss: 0.9803096652030945,grad_norm: 0.9999991202992232, iteration: 129907
loss: 0.9787986278533936,grad_norm: 0.9999990532775611, iteration: 129908
loss: 1.0304378271102905,grad_norm: 0.9999991606548302, iteration: 129909
loss: 1.045164942741394,grad_norm: 0.9525808181152556, iteration: 129910
loss: 1.0516703128814697,grad_norm: 0.9999990325126179, iteration: 129911
loss: 0.9680171012878418,grad_norm: 0.9497237800098098, iteration: 129912
loss: 0.9791282415390015,grad_norm: 0.9999991501779021, iteration: 129913
loss: 1.0424166917800903,grad_norm: 0.9999991678373065, iteration: 129914
loss: 0.9988385438919067,grad_norm: 0.9999991937778746, iteration: 129915
loss: 0.9801835417747498,grad_norm: 0.9999990571868401, iteration: 129916
loss: 1.0056275129318237,grad_norm: 0.9999990693743823, iteration: 129917
loss: 0.9925982356071472,grad_norm: 0.9587411112267326, iteration: 129918
loss: 1.0055781602859497,grad_norm: 0.9261071231874205, iteration: 129919
loss: 1.0121291875839233,grad_norm: 0.999998989737229, iteration: 129920
loss: 0.9700080752372742,grad_norm: 0.8873668530866796, iteration: 129921
loss: 1.0175946950912476,grad_norm: 0.9999991057147326, iteration: 129922
loss: 1.0234158039093018,grad_norm: 0.9999992213335958, iteration: 129923
loss: 1.0440843105316162,grad_norm: 0.8416762517279545, iteration: 129924
loss: 1.0099592208862305,grad_norm: 0.9999992024401155, iteration: 129925
loss: 0.9880570769309998,grad_norm: 0.9999990827363714, iteration: 129926
loss: 0.9981304407119751,grad_norm: 0.9999991944785303, iteration: 129927
loss: 1.0052400827407837,grad_norm: 0.9999992086623299, iteration: 129928
loss: 1.0108864307403564,grad_norm: 0.9999992088749181, iteration: 129929
loss: 1.0223978757858276,grad_norm: 0.9999989585299665, iteration: 129930
loss: 1.0124144554138184,grad_norm: 0.970240761453163, iteration: 129931
loss: 0.9967957139015198,grad_norm: 0.8710363449161351, iteration: 129932
loss: 1.0387943983078003,grad_norm: 0.9999991411887129, iteration: 129933
loss: 1.005916953086853,grad_norm: 0.9462441119321167, iteration: 129934
loss: 1.0237834453582764,grad_norm: 0.9999991396779463, iteration: 129935
loss: 0.9746532440185547,grad_norm: 0.9712520726732989, iteration: 129936
loss: 0.9867436289787292,grad_norm: 0.9740288690371561, iteration: 129937
loss: 0.9783375263214111,grad_norm: 0.9999991161869268, iteration: 129938
loss: 0.9947884678840637,grad_norm: 0.9999992630718795, iteration: 129939
loss: 1.041752576828003,grad_norm: 0.999999080467506, iteration: 129940
loss: 0.9750379323959351,grad_norm: 0.8934474360704793, iteration: 129941
loss: 0.9922325611114502,grad_norm: 0.8995925842051825, iteration: 129942
loss: 0.9984056353569031,grad_norm: 0.9999990603087177, iteration: 129943
loss: 0.9887707829475403,grad_norm: 0.9368292807775289, iteration: 129944
loss: 1.010359764099121,grad_norm: 0.9909277025011383, iteration: 129945
loss: 1.0307490825653076,grad_norm: 0.9999991774144327, iteration: 129946
loss: 1.0038601160049438,grad_norm: 0.9999989926706233, iteration: 129947
loss: 0.9668639898300171,grad_norm: 0.9290731204968085, iteration: 129948
loss: 0.9781625866889954,grad_norm: 0.8669618528953805, iteration: 129949
loss: 0.9817054271697998,grad_norm: 0.9999991837902382, iteration: 129950
loss: 1.0144652128219604,grad_norm: 0.9999990657318865, iteration: 129951
loss: 1.015165090560913,grad_norm: 0.9999990997961912, iteration: 129952
loss: 1.0060315132141113,grad_norm: 0.9799947985550894, iteration: 129953
loss: 1.0288708209991455,grad_norm: 0.9999990399288164, iteration: 129954
loss: 0.9919236898422241,grad_norm: 0.8575035694620989, iteration: 129955
loss: 1.0040459632873535,grad_norm: 0.9557825360061812, iteration: 129956
loss: 0.9749509692192078,grad_norm: 0.9999990818153852, iteration: 129957
loss: 1.0422974824905396,grad_norm: 0.9999991154816638, iteration: 129958
loss: 1.0099040269851685,grad_norm: 0.9999990694222733, iteration: 129959
loss: 1.0030765533447266,grad_norm: 0.9808418328169569, iteration: 129960
loss: 1.0051937103271484,grad_norm: 0.9580195127008241, iteration: 129961
loss: 0.9979380965232849,grad_norm: 0.9999989852445543, iteration: 129962
loss: 0.990153968334198,grad_norm: 0.9887610103134401, iteration: 129963
loss: 0.9843510985374451,grad_norm: 0.9999990058736745, iteration: 129964
loss: 0.9760571122169495,grad_norm: 0.8720097538179847, iteration: 129965
loss: 0.9639837741851807,grad_norm: 0.9999991476442945, iteration: 129966
loss: 1.0102410316467285,grad_norm: 0.9999990030385477, iteration: 129967
loss: 0.9921743869781494,grad_norm: 0.8977027961104681, iteration: 129968
loss: 1.0018179416656494,grad_norm: 0.999999284066542, iteration: 129969
loss: 0.9948018193244934,grad_norm: 0.999999261133556, iteration: 129970
loss: 0.9908702373504639,grad_norm: 0.9999990423399233, iteration: 129971
loss: 1.0039026737213135,grad_norm: 0.9999991005497101, iteration: 129972
loss: 1.042367935180664,grad_norm: 0.9253978495271886, iteration: 129973
loss: 1.029707908630371,grad_norm: 0.9999992739243732, iteration: 129974
loss: 0.9642846584320068,grad_norm: 0.9999990606555719, iteration: 129975
loss: 0.9787470102310181,grad_norm: 0.9999992491680135, iteration: 129976
loss: 0.9804134368896484,grad_norm: 0.9347643473473184, iteration: 129977
loss: 0.9536819458007812,grad_norm: 0.999999086443325, iteration: 129978
loss: 0.9717039465904236,grad_norm: 0.943237822191435, iteration: 129979
loss: 0.996156632900238,grad_norm: 0.9999990531741849, iteration: 129980
loss: 1.0001028776168823,grad_norm: 0.94283355593833, iteration: 129981
loss: 1.0017824172973633,grad_norm: 0.9840411487045396, iteration: 129982
loss: 1.0334621667861938,grad_norm: 0.9999999226490758, iteration: 129983
loss: 1.0052398443222046,grad_norm: 0.9999991099138059, iteration: 129984
loss: 0.9925679564476013,grad_norm: 0.9999992984420855, iteration: 129985
loss: 1.0054553747177124,grad_norm: 0.99999954438094, iteration: 129986
loss: 0.9888394474983215,grad_norm: 0.999998995740198, iteration: 129987
loss: 0.9879279136657715,grad_norm: 0.9193489547055573, iteration: 129988
loss: 1.0227211713790894,grad_norm: 0.9999990357859481, iteration: 129989
loss: 0.9880174398422241,grad_norm: 0.999999090263907, iteration: 129990
loss: 1.016173243522644,grad_norm: 0.999999377066187, iteration: 129991
loss: 0.9900475144386292,grad_norm: 0.9999990654470607, iteration: 129992
loss: 1.0496877431869507,grad_norm: 0.9999994737418948, iteration: 129993
loss: 0.9617388248443604,grad_norm: 0.983498622876043, iteration: 129994
loss: 1.0327694416046143,grad_norm: 0.9999989652851885, iteration: 129995
loss: 0.9931043386459351,grad_norm: 0.9999993040607221, iteration: 129996
loss: 1.0102399587631226,grad_norm: 0.9354309735504929, iteration: 129997
loss: 1.0396767854690552,grad_norm: 0.9450547048351939, iteration: 129998
loss: 1.006213903427124,grad_norm: 0.953273847198696, iteration: 129999
loss: 1.019471526145935,grad_norm: 0.9835477305780139, iteration: 130000
Evaluating at step 130000
{'val': 0.9958135895431042, 'test': 2.5910643830466955}
loss: 0.9830734729766846,grad_norm: 0.887236586718956, iteration: 130001
loss: 1.010490894317627,grad_norm: 0.9715810340496621, iteration: 130002
loss: 0.9405674934387207,grad_norm: 0.9999991550162118, iteration: 130003
loss: 1.0197362899780273,grad_norm: 0.8672997903270534, iteration: 130004
loss: 1.005958080291748,grad_norm: 0.9999990061963401, iteration: 130005
loss: 0.9587833881378174,grad_norm: 0.9747788713639055, iteration: 130006
loss: 1.1096010208129883,grad_norm: 0.9999998558580979, iteration: 130007
loss: 1.015230655670166,grad_norm: 0.9999990868261229, iteration: 130008
loss: 1.0098415613174438,grad_norm: 0.9999994836745639, iteration: 130009
loss: 1.029895544052124,grad_norm: 0.9999991110508604, iteration: 130010
loss: 0.9764224886894226,grad_norm: 0.9999992556133331, iteration: 130011
loss: 1.016410231590271,grad_norm: 0.999999228377454, iteration: 130012
loss: 0.9955635666847229,grad_norm: 0.9999991189824956, iteration: 130013
loss: 1.1018832921981812,grad_norm: 0.9999999781848401, iteration: 130014
loss: 0.983782172203064,grad_norm: 0.826793260167718, iteration: 130015
loss: 0.9767925143241882,grad_norm: 0.9999993066147248, iteration: 130016
loss: 0.9980921745300293,grad_norm: 0.9728876770424366, iteration: 130017
loss: 0.9901623725891113,grad_norm: 0.999999125094855, iteration: 130018
loss: 0.9820870161056519,grad_norm: 0.9412327334750914, iteration: 130019
loss: 1.0345330238342285,grad_norm: 0.9999991623904312, iteration: 130020
loss: 0.9856933355331421,grad_norm: 0.8967044782136575, iteration: 130021
loss: 0.9943723082542419,grad_norm: 0.9999992988690346, iteration: 130022
loss: 1.0124353170394897,grad_norm: 0.9999996033535572, iteration: 130023
loss: 1.0268815755844116,grad_norm: 0.9999990724693115, iteration: 130024
loss: 0.995414674282074,grad_norm: 0.9999990795435184, iteration: 130025
loss: 0.9875757098197937,grad_norm: 0.8188559186402122, iteration: 130026
loss: 1.0297974348068237,grad_norm: 0.9999992865192943, iteration: 130027
loss: 1.0398705005645752,grad_norm: 0.8961692624733806, iteration: 130028
loss: 1.0202882289886475,grad_norm: 0.9999992143351558, iteration: 130029
loss: 1.0273866653442383,grad_norm: 0.9999991431918409, iteration: 130030
loss: 1.024046778678894,grad_norm: 0.9999990824458601, iteration: 130031
loss: 1.0027856826782227,grad_norm: 0.9999990967145949, iteration: 130032
loss: 1.0249271392822266,grad_norm: 0.9999992135462373, iteration: 130033
loss: 1.052207589149475,grad_norm: 0.9999991578634324, iteration: 130034
loss: 1.019436240196228,grad_norm: 0.9999990562662375, iteration: 130035
loss: 0.9968996644020081,grad_norm: 0.8941156292289814, iteration: 130036
loss: 1.00386381149292,grad_norm: 0.9999992381519521, iteration: 130037
loss: 0.9792786836624146,grad_norm: 0.9999991885496564, iteration: 130038
loss: 0.9925290942192078,grad_norm: 0.9999992349106851, iteration: 130039
loss: 1.0194560289382935,grad_norm: 0.9999991304779278, iteration: 130040
loss: 0.9755143523216248,grad_norm: 0.9872491452678474, iteration: 130041
loss: 0.9982524514198303,grad_norm: 0.889584349584181, iteration: 130042
loss: 1.012122392654419,grad_norm: 0.9999992909155518, iteration: 130043
loss: 0.9840034246444702,grad_norm: 0.9999991052098846, iteration: 130044
loss: 0.987882137298584,grad_norm: 0.9999991319944839, iteration: 130045
loss: 0.9881159663200378,grad_norm: 0.9883016466978397, iteration: 130046
loss: 1.0125479698181152,grad_norm: 0.9999991958192257, iteration: 130047
loss: 1.037830114364624,grad_norm: 0.9957299367032602, iteration: 130048
loss: 0.9753447771072388,grad_norm: 0.8577698331154944, iteration: 130049
loss: 1.0170515775680542,grad_norm: 0.8885881712889807, iteration: 130050
loss: 1.0315589904785156,grad_norm: 0.9992663022547444, iteration: 130051
loss: 0.9806476831436157,grad_norm: 0.9999989737696491, iteration: 130052
loss: 0.9567124247550964,grad_norm: 0.9999990236287817, iteration: 130053
loss: 1.0277639627456665,grad_norm: 0.9969976214956968, iteration: 130054
loss: 0.9888735413551331,grad_norm: 0.9680821101264635, iteration: 130055
loss: 0.9805249571800232,grad_norm: 0.9999993258524866, iteration: 130056
loss: 0.9771758913993835,grad_norm: 0.9656783457059664, iteration: 130057
loss: 0.9864206314086914,grad_norm: 0.8450599931174848, iteration: 130058
loss: 0.9882866740226746,grad_norm: 0.9999993071115394, iteration: 130059
loss: 1.009955883026123,grad_norm: 0.9999989658239515, iteration: 130060
loss: 1.0093344449996948,grad_norm: 0.9849665132383536, iteration: 130061
loss: 1.0447540283203125,grad_norm: 0.9999997975132635, iteration: 130062
loss: 0.9889249205589294,grad_norm: 0.9999992079306466, iteration: 130063
loss: 0.9808262586593628,grad_norm: 0.9999991473279792, iteration: 130064
loss: 0.9815126657485962,grad_norm: 0.9999993720584833, iteration: 130065
loss: 0.9921653270721436,grad_norm: 0.9314256372564024, iteration: 130066
loss: 1.0352197885513306,grad_norm: 0.9999989967329252, iteration: 130067
loss: 0.9593328833580017,grad_norm: 0.9413280793992284, iteration: 130068
loss: 0.9617446660995483,grad_norm: 0.9999992192925604, iteration: 130069
loss: 1.0034860372543335,grad_norm: 0.8713036985312801, iteration: 130070
loss: 1.0035924911499023,grad_norm: 0.9999990818061818, iteration: 130071
loss: 1.0250204801559448,grad_norm: 0.9999990483800598, iteration: 130072
loss: 1.0049068927764893,grad_norm: 0.9999990722531681, iteration: 130073
loss: 1.0336568355560303,grad_norm: 0.999999122335105, iteration: 130074
loss: 0.9919964075088501,grad_norm: 0.8250513280261893, iteration: 130075
loss: 0.9489478468894958,grad_norm: 0.8643084238482641, iteration: 130076
loss: 1.0198153257369995,grad_norm: 0.9999990491345646, iteration: 130077
loss: 0.9660778045654297,grad_norm: 0.883278208063946, iteration: 130078
loss: 0.9927005767822266,grad_norm: 0.893011552972499, iteration: 130079
loss: 1.0032356977462769,grad_norm: 0.9184415214229953, iteration: 130080
loss: 1.0006698369979858,grad_norm: 0.9999992207187024, iteration: 130081
loss: 0.9572950601577759,grad_norm: 0.9999991804183053, iteration: 130082
loss: 1.0037667751312256,grad_norm: 0.9999989088315303, iteration: 130083
loss: 0.9880774617195129,grad_norm: 0.923932245244285, iteration: 130084
loss: 0.9996173977851868,grad_norm: 0.9129292513398743, iteration: 130085
loss: 0.9988772869110107,grad_norm: 0.9254772881485455, iteration: 130086
loss: 0.9492672681808472,grad_norm: 0.9812577985976764, iteration: 130087
loss: 1.022254228591919,grad_norm: 0.8388732119223586, iteration: 130088
loss: 1.0220742225646973,grad_norm: 0.9999990927241704, iteration: 130089
loss: 0.9786792397499084,grad_norm: 0.8642587680855205, iteration: 130090
loss: 0.996374249458313,grad_norm: 0.9588917982479679, iteration: 130091
loss: 0.9884629845619202,grad_norm: 0.9560656790866039, iteration: 130092
loss: 0.9833921790122986,grad_norm: 0.9766200520678144, iteration: 130093
loss: 1.0041576623916626,grad_norm: 0.9242753199619493, iteration: 130094
loss: 0.9890567064285278,grad_norm: 0.9999991426145444, iteration: 130095
loss: 1.0102087259292603,grad_norm: 0.9029218091495219, iteration: 130096
loss: 1.01125168800354,grad_norm: 0.893247439724292, iteration: 130097
loss: 1.0195846557617188,grad_norm: 0.999999047383885, iteration: 130098
loss: 1.005242109298706,grad_norm: 0.999999265997865, iteration: 130099
loss: 0.9706102609634399,grad_norm: 0.9999992537362303, iteration: 130100
loss: 1.013506293296814,grad_norm: 0.9999990047674232, iteration: 130101
loss: 1.0330851078033447,grad_norm: 0.999999177407095, iteration: 130102
loss: 0.9588104486465454,grad_norm: 0.8714736002860141, iteration: 130103
loss: 0.9808807373046875,grad_norm: 0.8523741968650519, iteration: 130104
loss: 1.0080870389938354,grad_norm: 0.8076475593408636, iteration: 130105
loss: 1.0168976783752441,grad_norm: 0.9999992397507915, iteration: 130106
loss: 0.9935540556907654,grad_norm: 0.9999992530332789, iteration: 130107
loss: 0.986318826675415,grad_norm: 0.9262574024422724, iteration: 130108
loss: 1.0187076330184937,grad_norm: 0.999999082177296, iteration: 130109
loss: 0.9999296069145203,grad_norm: 0.9999993084810728, iteration: 130110
loss: 1.0229439735412598,grad_norm: 0.9999991100089304, iteration: 130111
loss: 1.0229241847991943,grad_norm: 0.9986768784676819, iteration: 130112
loss: 1.0168540477752686,grad_norm: 0.9999990190603966, iteration: 130113
loss: 0.9776419997215271,grad_norm: 0.943400246543282, iteration: 130114
loss: 0.9976329803466797,grad_norm: 0.999999014843581, iteration: 130115
loss: 0.9778465032577515,grad_norm: 0.999999070246115, iteration: 130116
loss: 0.9975852370262146,grad_norm: 0.9533506672759424, iteration: 130117
loss: 1.0210188627243042,grad_norm: 0.9999990644197438, iteration: 130118
loss: 1.008815884590149,grad_norm: 0.9999992102067294, iteration: 130119
loss: 0.9604566097259521,grad_norm: 0.9999990031329615, iteration: 130120
loss: 0.999262273311615,grad_norm: 0.9999990888884693, iteration: 130121
loss: 0.9945502877235413,grad_norm: 0.9999991759766828, iteration: 130122
loss: 1.0168274641036987,grad_norm: 0.9746197571605378, iteration: 130123
loss: 0.9660318493843079,grad_norm: 0.9999990842856872, iteration: 130124
loss: 1.038700819015503,grad_norm: 0.9999989446989216, iteration: 130125
loss: 1.01212739944458,grad_norm: 0.9999995040960704, iteration: 130126
loss: 1.0046054124832153,grad_norm: 0.8336214890429656, iteration: 130127
loss: 1.0093942880630493,grad_norm: 0.9999990821198368, iteration: 130128
loss: 1.0114130973815918,grad_norm: 0.9999989980096657, iteration: 130129
loss: 0.9964956641197205,grad_norm: 0.9999991922475471, iteration: 130130
loss: 0.983109712600708,grad_norm: 0.9999991121662246, iteration: 130131
loss: 0.9894253611564636,grad_norm: 0.9999990836761307, iteration: 130132
loss: 0.9959437251091003,grad_norm: 0.9999992087326445, iteration: 130133
loss: 0.9880746603012085,grad_norm: 0.8663380628581447, iteration: 130134
loss: 1.0184005498886108,grad_norm: 0.9785816713540585, iteration: 130135
loss: 0.9863179922103882,grad_norm: 0.9999992127960405, iteration: 130136
loss: 0.9995370507240295,grad_norm: 0.988177369905033, iteration: 130137
loss: 1.0221099853515625,grad_norm: 0.9999992272655507, iteration: 130138
loss: 0.9916458129882812,grad_norm: 0.9999991707627807, iteration: 130139
loss: 0.9600871205329895,grad_norm: 0.9999990885448967, iteration: 130140
loss: 1.0085713863372803,grad_norm: 0.9762553287547592, iteration: 130141
loss: 0.9945011734962463,grad_norm: 0.9999991896582593, iteration: 130142
loss: 1.0216078758239746,grad_norm: 0.9999990289770452, iteration: 130143
loss: 0.992759108543396,grad_norm: 0.9720616052181236, iteration: 130144
loss: 0.9974421858787537,grad_norm: 0.9999990126486584, iteration: 130145
loss: 1.0047740936279297,grad_norm: 0.9677999356234798, iteration: 130146
loss: 0.9808704853057861,grad_norm: 0.9999992322633011, iteration: 130147
loss: 1.0266242027282715,grad_norm: 0.9999997336677755, iteration: 130148
loss: 0.9544256329536438,grad_norm: 0.9999990095401433, iteration: 130149
loss: 1.0037888288497925,grad_norm: 0.99999918443795, iteration: 130150
loss: 0.9686112403869629,grad_norm: 0.9734365741162943, iteration: 130151
loss: 1.007534384727478,grad_norm: 0.9999991152621555, iteration: 130152
loss: 1.0137377977371216,grad_norm: 0.8655781398966317, iteration: 130153
loss: 1.0214349031448364,grad_norm: 0.9999991348681575, iteration: 130154
loss: 0.9837662577629089,grad_norm: 0.9884831599630359, iteration: 130155
loss: 0.9901188611984253,grad_norm: 0.9999991393046063, iteration: 130156
loss: 0.9740753173828125,grad_norm: 0.9999989967197392, iteration: 130157
loss: 0.9606565833091736,grad_norm: 0.9999992674600083, iteration: 130158
loss: 0.9811407923698425,grad_norm: 0.9908582448143393, iteration: 130159
loss: 0.9952692985534668,grad_norm: 0.9999990321819647, iteration: 130160
loss: 0.9631161689758301,grad_norm: 0.9999989417109332, iteration: 130161
loss: 0.9946538209915161,grad_norm: 0.7812556182079652, iteration: 130162
loss: 0.9978759288787842,grad_norm: 0.9999990466214435, iteration: 130163
loss: 0.9961636066436768,grad_norm: 0.8548323768926904, iteration: 130164
loss: 0.9935691356658936,grad_norm: 0.9747469173217989, iteration: 130165
loss: 0.985053539276123,grad_norm: 0.8736442832915843, iteration: 130166
loss: 0.977688193321228,grad_norm: 0.8712055697681649, iteration: 130167
loss: 1.0239719152450562,grad_norm: 0.8854335553440229, iteration: 130168
loss: 1.0232479572296143,grad_norm: 0.8518171941450037, iteration: 130169
loss: 1.038011074066162,grad_norm: 0.8972987719019331, iteration: 130170
loss: 0.9902786612510681,grad_norm: 0.8727798899654756, iteration: 130171
loss: 1.0197254419326782,grad_norm: 0.9999990009751442, iteration: 130172
loss: 1.016937017440796,grad_norm: 0.9999996570203721, iteration: 130173
loss: 1.0229130983352661,grad_norm: 0.9999992479152497, iteration: 130174
loss: 0.9780444502830505,grad_norm: 0.760436918041762, iteration: 130175
loss: 0.9844998121261597,grad_norm: 0.9999990943199464, iteration: 130176
loss: 1.0069477558135986,grad_norm: 0.9454204821487662, iteration: 130177
loss: 0.9929935336112976,grad_norm: 0.9999991219023393, iteration: 130178
loss: 0.9468416571617126,grad_norm: 0.9999990961857567, iteration: 130179
loss: 1.037889838218689,grad_norm: 0.9999991087990661, iteration: 130180
loss: 0.9770277738571167,grad_norm: 0.999999129284618, iteration: 130181
loss: 1.0154316425323486,grad_norm: 0.999999628585122, iteration: 130182
loss: 0.9976010918617249,grad_norm: 0.9999991877150756, iteration: 130183
loss: 1.0182037353515625,grad_norm: 0.9999990037150686, iteration: 130184
loss: 0.9831407070159912,grad_norm: 0.9999997029308036, iteration: 130185
loss: 1.0041279792785645,grad_norm: 0.9597489985691038, iteration: 130186
loss: 0.9721260666847229,grad_norm: 0.9999991053573342, iteration: 130187
loss: 0.9781436324119568,grad_norm: 0.9991618126474773, iteration: 130188
loss: 1.0075256824493408,grad_norm: 0.9999991441744573, iteration: 130189
loss: 0.9935397505760193,grad_norm: 0.9641375713387624, iteration: 130190
loss: 0.987881064414978,grad_norm: 0.9999992814096567, iteration: 130191
loss: 1.0003281831741333,grad_norm: 0.8944590814900848, iteration: 130192
loss: 0.9926117062568665,grad_norm: 0.9516179785649699, iteration: 130193
loss: 0.9805828332901001,grad_norm: 0.9999991201193685, iteration: 130194
loss: 1.032220721244812,grad_norm: 0.9999992712818516, iteration: 130195
loss: 1.0155373811721802,grad_norm: 0.9999991142813959, iteration: 130196
loss: 0.9909979701042175,grad_norm: 0.9999994964716266, iteration: 130197
loss: 0.9915762543678284,grad_norm: 0.9999993335344124, iteration: 130198
loss: 1.0244081020355225,grad_norm: 0.9999992385823384, iteration: 130199
loss: 1.0002355575561523,grad_norm: 0.9347254577630623, iteration: 130200
loss: 1.0429683923721313,grad_norm: 0.9999991359531776, iteration: 130201
loss: 1.0300700664520264,grad_norm: 0.9999991158489414, iteration: 130202
loss: 0.9872276782989502,grad_norm: 0.9999991017677495, iteration: 130203
loss: 0.9732781052589417,grad_norm: 0.8959226561200766, iteration: 130204
loss: 0.99326092004776,grad_norm: 0.9586713854462058, iteration: 130205
loss: 1.0565130710601807,grad_norm: 0.9999992280040255, iteration: 130206
loss: 1.021801471710205,grad_norm: 0.9326283601983241, iteration: 130207
loss: 1.0297720432281494,grad_norm: 0.9999995976067345, iteration: 130208
loss: 0.975649356842041,grad_norm: 0.9026665336158584, iteration: 130209
loss: 1.0038942098617554,grad_norm: 0.9999991422684515, iteration: 130210
loss: 0.9950499534606934,grad_norm: 0.9999990965114873, iteration: 130211
loss: 0.9765666723251343,grad_norm: 0.9362639022558149, iteration: 130212
loss: 0.9820338487625122,grad_norm: 0.9999998320861342, iteration: 130213
loss: 1.0045162439346313,grad_norm: 0.9999991012386977, iteration: 130214
loss: 1.022957444190979,grad_norm: 0.9999991352492557, iteration: 130215
loss: 1.0349692106246948,grad_norm: 0.9999991556199762, iteration: 130216
loss: 0.9963705539703369,grad_norm: 0.9481851442510331, iteration: 130217
loss: 0.9912047386169434,grad_norm: 0.9111808274477122, iteration: 130218
loss: 0.9741260409355164,grad_norm: 0.9999991814359062, iteration: 130219
loss: 1.0454597473144531,grad_norm: 0.9999990608245342, iteration: 130220
loss: 0.9840165972709656,grad_norm: 0.9999993311657747, iteration: 130221
loss: 0.9823732972145081,grad_norm: 0.8823904779358324, iteration: 130222
loss: 0.9676958322525024,grad_norm: 0.9999990765701818, iteration: 130223
loss: 0.9642568230628967,grad_norm: 0.9999990569750238, iteration: 130224
loss: 0.9708106517791748,grad_norm: 0.999999072173626, iteration: 130225
loss: 1.0054625272750854,grad_norm: 0.9999989847610978, iteration: 130226
loss: 0.9704064726829529,grad_norm: 0.9999992088094884, iteration: 130227
loss: 1.0060043334960938,grad_norm: 0.8932627128585618, iteration: 130228
loss: 1.0154441595077515,grad_norm: 0.9999991151255855, iteration: 130229
loss: 0.9780281782150269,grad_norm: 0.9787564471860923, iteration: 130230
loss: 0.9922957420349121,grad_norm: 0.994484235232857, iteration: 130231
loss: 0.977242112159729,grad_norm: 0.9879349081595991, iteration: 130232
loss: 1.0047801733016968,grad_norm: 0.9999991099635847, iteration: 130233
loss: 1.102770447731018,grad_norm: 0.9999996639062605, iteration: 130234
loss: 0.9848536849021912,grad_norm: 0.9658232593668068, iteration: 130235
loss: 0.9820916056632996,grad_norm: 0.9205507251513939, iteration: 130236
loss: 0.9805729985237122,grad_norm: 0.9999990625439596, iteration: 130237
loss: 0.9860748052597046,grad_norm: 0.8563332295468296, iteration: 130238
loss: 0.9979370832443237,grad_norm: 0.90255859896112, iteration: 130239
loss: 0.9782916903495789,grad_norm: 0.9999991067033452, iteration: 130240
loss: 1.0092473030090332,grad_norm: 0.9999996975406862, iteration: 130241
loss: 0.9751077890396118,grad_norm: 0.9999993612340401, iteration: 130242
loss: 1.017246127128601,grad_norm: 0.9999991937232063, iteration: 130243
loss: 1.0116658210754395,grad_norm: 0.99999918308394, iteration: 130244
loss: 1.0041171312332153,grad_norm: 0.9999991655554912, iteration: 130245
loss: 0.9901204109191895,grad_norm: 0.9999990791871296, iteration: 130246
loss: 1.0143331289291382,grad_norm: 0.999998930339885, iteration: 130247
loss: 1.000087857246399,grad_norm: 0.9999992477438537, iteration: 130248
loss: 1.1185622215270996,grad_norm: 0.9999997373876227, iteration: 130249
loss: 1.009482979774475,grad_norm: 0.9999991815854558, iteration: 130250
loss: 0.9768730401992798,grad_norm: 0.9999989351966753, iteration: 130251
loss: 0.9678500294685364,grad_norm: 0.9853953575396143, iteration: 130252
loss: 1.0076978206634521,grad_norm: 0.9999992349269491, iteration: 130253
loss: 1.0857411623001099,grad_norm: 0.999999851541323, iteration: 130254
loss: 1.0243626832962036,grad_norm: 0.9225741471712062, iteration: 130255
loss: 0.9928586483001709,grad_norm: 0.9999991825617164, iteration: 130256
loss: 1.0087860822677612,grad_norm: 0.9999989634859114, iteration: 130257
loss: 0.9946942925453186,grad_norm: 0.9406164933368637, iteration: 130258
loss: 1.0038946866989136,grad_norm: 0.8771092965845647, iteration: 130259
loss: 0.9815807342529297,grad_norm: 0.983151485870782, iteration: 130260
loss: 1.0303221940994263,grad_norm: 0.9999997466217327, iteration: 130261
loss: 1.006879448890686,grad_norm: 0.9999991412224484, iteration: 130262
loss: 0.9943299889564514,grad_norm: 0.9199633935857654, iteration: 130263
loss: 1.0046309232711792,grad_norm: 0.9999992372666623, iteration: 130264
loss: 1.0604054927825928,grad_norm: 0.999999888801585, iteration: 130265
loss: 1.0136839151382446,grad_norm: 0.9999993640139201, iteration: 130266
loss: 0.9922062158584595,grad_norm: 0.9999992547694753, iteration: 130267
loss: 1.0061334371566772,grad_norm: 0.9396764060554432, iteration: 130268
loss: 1.0815539360046387,grad_norm: 0.9999994091382993, iteration: 130269
loss: 0.9825204014778137,grad_norm: 0.8616890756224799, iteration: 130270
loss: 1.010754108428955,grad_norm: 0.9999992810867655, iteration: 130271
loss: 1.0062185525894165,grad_norm: 0.9201506099328394, iteration: 130272
loss: 1.0065478086471558,grad_norm: 0.9999991450870618, iteration: 130273
loss: 0.9788343906402588,grad_norm: 0.971636349992217, iteration: 130274
loss: 1.0324366092681885,grad_norm: 0.9999991111726523, iteration: 130275
loss: 0.9989936947822571,grad_norm: 0.9999992243091892, iteration: 130276
loss: 1.0089383125305176,grad_norm: 0.9999995202204525, iteration: 130277
loss: 1.025274395942688,grad_norm: 0.9999991433166077, iteration: 130278
loss: 1.081461787223816,grad_norm: 0.9999991633839028, iteration: 130279
loss: 1.0279902219772339,grad_norm: 0.9999990290504552, iteration: 130280
loss: 0.9812502861022949,grad_norm: 0.8880946141564675, iteration: 130281
loss: 0.9935063719749451,grad_norm: 0.9999992342239516, iteration: 130282
loss: 1.0188473463058472,grad_norm: 0.9999999096715048, iteration: 130283
loss: 0.9965358972549438,grad_norm: 0.9708660005450793, iteration: 130284
loss: 0.991298496723175,grad_norm: 0.8994496421954867, iteration: 130285
loss: 1.0160279273986816,grad_norm: 0.9999994077393518, iteration: 130286
loss: 0.9617588520050049,grad_norm: 0.883673975800737, iteration: 130287
loss: 1.005389928817749,grad_norm: 0.8511978633399271, iteration: 130288
loss: 1.009610891342163,grad_norm: 0.9999990759990135, iteration: 130289
loss: 1.0595110654830933,grad_norm: 0.9999993740098858, iteration: 130290
loss: 1.001254677772522,grad_norm: 0.999999043012148, iteration: 130291
loss: 1.0242877006530762,grad_norm: 0.9999996730207554, iteration: 130292
loss: 0.9922565221786499,grad_norm: 0.9625248465759708, iteration: 130293
loss: 0.9725245833396912,grad_norm: 0.9560225711010399, iteration: 130294
loss: 0.9562849998474121,grad_norm: 0.9999993007765138, iteration: 130295
loss: 0.9899457693099976,grad_norm: 0.9412515557167526, iteration: 130296
loss: 0.9698194861412048,grad_norm: 0.9999989767271757, iteration: 130297
loss: 1.0306061506271362,grad_norm: 0.9999993403725396, iteration: 130298
loss: 0.9854745268821716,grad_norm: 0.9999989719736594, iteration: 130299
loss: 1.0067293643951416,grad_norm: 0.9999990183503018, iteration: 130300
loss: 1.055039644241333,grad_norm: 0.9999991605570903, iteration: 130301
loss: 1.029039740562439,grad_norm: 0.9999991367432829, iteration: 130302
loss: 0.9827510714530945,grad_norm: 0.949958161462012, iteration: 130303
loss: 1.0217593908309937,grad_norm: 0.9999991713794862, iteration: 130304
loss: 1.0179959535598755,grad_norm: 0.9999992439505675, iteration: 130305
loss: 0.9720835089683533,grad_norm: 0.9999991530421435, iteration: 130306
loss: 1.0029339790344238,grad_norm: 0.9174145513192804, iteration: 130307
loss: 1.0030275583267212,grad_norm: 0.9999992899656364, iteration: 130308
loss: 1.0587893724441528,grad_norm: 0.999999086673722, iteration: 130309
loss: 0.9781871438026428,grad_norm: 0.9999989160846452, iteration: 130310
loss: 1.0559325218200684,grad_norm: 0.9999992068085245, iteration: 130311
loss: 0.9958540797233582,grad_norm: 0.9178491707828279, iteration: 130312
loss: 1.0273548364639282,grad_norm: 0.9999990277595961, iteration: 130313
loss: 1.0085300207138062,grad_norm: 0.9999994036582548, iteration: 130314
loss: 0.9951850175857544,grad_norm: 0.9999992336322296, iteration: 130315
loss: 1.1447103023529053,grad_norm: 0.9999995764513279, iteration: 130316
loss: 1.0079336166381836,grad_norm: 0.985636307392228, iteration: 130317
loss: 0.9379040598869324,grad_norm: 0.9505905435172438, iteration: 130318
loss: 1.0094327926635742,grad_norm: 0.9999990125111075, iteration: 130319
loss: 1.0111480951309204,grad_norm: 0.9999991637817467, iteration: 130320
loss: 1.1157267093658447,grad_norm: 0.9999998193189679, iteration: 130321
loss: 1.0196229219436646,grad_norm: 0.8155102416385076, iteration: 130322
loss: 0.9842921495437622,grad_norm: 0.9336021045978521, iteration: 130323
loss: 1.012913703918457,grad_norm: 0.9352499639167471, iteration: 130324
loss: 0.9917360544204712,grad_norm: 0.8810663510355581, iteration: 130325
loss: 0.990077793598175,grad_norm: 0.9999991083012346, iteration: 130326
loss: 1.0176198482513428,grad_norm: 0.9999990761527977, iteration: 130327
loss: 1.0169923305511475,grad_norm: 0.9999990637462275, iteration: 130328
loss: 1.0196480751037598,grad_norm: 0.9999992552230429, iteration: 130329
loss: 0.986352264881134,grad_norm: 0.9232983205810796, iteration: 130330
loss: 1.0239545106887817,grad_norm: 0.9999992044230346, iteration: 130331
loss: 0.9906261563301086,grad_norm: 0.9405833712552046, iteration: 130332
loss: 0.9981176257133484,grad_norm: 0.9999991576280856, iteration: 130333
loss: 0.9651380777359009,grad_norm: 0.9688947405826883, iteration: 130334
loss: 1.0518945455551147,grad_norm: 0.9999995216860332, iteration: 130335
loss: 1.0071406364440918,grad_norm: 0.999999015704957, iteration: 130336
loss: 0.9961416721343994,grad_norm: 0.9999991451554194, iteration: 130337
loss: 1.0015625953674316,grad_norm: 0.9527804370463723, iteration: 130338
loss: 1.0087358951568604,grad_norm: 0.9999990608481532, iteration: 130339
loss: 1.0346580743789673,grad_norm: 0.8029398219346242, iteration: 130340
loss: 1.0137262344360352,grad_norm: 0.999999170968774, iteration: 130341
loss: 1.0139826536178589,grad_norm: 0.9999991576393052, iteration: 130342
loss: 1.0264687538146973,grad_norm: 0.9999990613719463, iteration: 130343
loss: 1.0202960968017578,grad_norm: 0.9391010812708517, iteration: 130344
loss: 0.9843708276748657,grad_norm: 0.9999991246064889, iteration: 130345
loss: 1.0123918056488037,grad_norm: 0.9999992185740582, iteration: 130346
loss: 1.0158361196517944,grad_norm: 0.9999997542587195, iteration: 130347
loss: 1.0132797956466675,grad_norm: 0.9766840918084075, iteration: 130348
loss: 1.0207401514053345,grad_norm: 0.9999993550994098, iteration: 130349
loss: 1.0094928741455078,grad_norm: 0.9999993058504494, iteration: 130350
loss: 1.1035206317901611,grad_norm: 0.999999306110783, iteration: 130351
loss: 0.9939560890197754,grad_norm: 0.999999022475525, iteration: 130352
loss: 0.9852943420410156,grad_norm: 0.999999153950979, iteration: 130353
loss: 1.0071018934249878,grad_norm: 0.9995138996978971, iteration: 130354
loss: 0.9893209338188171,grad_norm: 0.8715743048586799, iteration: 130355
loss: 0.9784262180328369,grad_norm: 0.9999991214638625, iteration: 130356
loss: 1.0030293464660645,grad_norm: 0.999999072191031, iteration: 130357
loss: 0.9595818519592285,grad_norm: 0.9545544687338218, iteration: 130358
loss: 0.992892324924469,grad_norm: 0.9439313096470695, iteration: 130359
loss: 1.0346605777740479,grad_norm: 0.9999991716261107, iteration: 130360
loss: 0.9528594017028809,grad_norm: 0.9999990426147142, iteration: 130361
loss: 1.0109400749206543,grad_norm: 0.8095637521014961, iteration: 130362
loss: 0.9762253165245056,grad_norm: 0.9830926301763183, iteration: 130363
loss: 1.0042476654052734,grad_norm: 0.9320462262110567, iteration: 130364
loss: 1.0375841856002808,grad_norm: 0.999999238553093, iteration: 130365
loss: 0.9994205236434937,grad_norm: 0.9999989980382605, iteration: 130366
loss: 1.0020792484283447,grad_norm: 0.9999989346981127, iteration: 130367
loss: 1.003075361251831,grad_norm: 0.9999991692561694, iteration: 130368
loss: 0.9881007075309753,grad_norm: 0.9435093641614217, iteration: 130369
loss: 1.0107812881469727,grad_norm: 0.9999991287959134, iteration: 130370
loss: 1.0133353471755981,grad_norm: 0.9137791899583889, iteration: 130371
loss: 1.03672456741333,grad_norm: 0.9381638490074143, iteration: 130372
loss: 0.9952329993247986,grad_norm: 0.9999991571008752, iteration: 130373
loss: 0.9891061186790466,grad_norm: 0.9999991345360042, iteration: 130374
loss: 1.0406320095062256,grad_norm: 0.9999990833691222, iteration: 130375
loss: 0.9904255867004395,grad_norm: 0.9629873488423507, iteration: 130376
loss: 0.9987221956253052,grad_norm: 0.9999992231246594, iteration: 130377
loss: 1.0081830024719238,grad_norm: 0.9999993103394207, iteration: 130378
loss: 0.9948284029960632,grad_norm: 0.9318428272928047, iteration: 130379
loss: 1.0497184991836548,grad_norm: 0.9999994654927516, iteration: 130380
loss: 0.9943904876708984,grad_norm: 0.9999992780070595, iteration: 130381
loss: 0.9844874739646912,grad_norm: 0.9999990321910756, iteration: 130382
loss: 0.9592069983482361,grad_norm: 0.9600145728061719, iteration: 130383
loss: 1.0202059745788574,grad_norm: 0.9999990958612358, iteration: 130384
loss: 1.030357003211975,grad_norm: 0.9999989741877182, iteration: 130385
loss: 0.9851871132850647,grad_norm: 0.9408875037936794, iteration: 130386
loss: 0.9924492239952087,grad_norm: 0.9999991406896602, iteration: 130387
loss: 1.0042600631713867,grad_norm: 0.7719953707507332, iteration: 130388
loss: 0.9253670573234558,grad_norm: 0.9999991051718179, iteration: 130389
loss: 1.0166808366775513,grad_norm: 0.9999991783364809, iteration: 130390
loss: 0.9948113560676575,grad_norm: 0.9697149186322364, iteration: 130391
loss: 1.001662254333496,grad_norm: 0.925659136175404, iteration: 130392
loss: 0.9998154640197754,grad_norm: 0.9683845334463456, iteration: 130393
loss: 0.9743641018867493,grad_norm: 0.9999990776751363, iteration: 130394
loss: 1.004821538925171,grad_norm: 0.9170851442299006, iteration: 130395
loss: 1.0013136863708496,grad_norm: 0.9999999464430968, iteration: 130396
loss: 1.0007065534591675,grad_norm: 0.9563396218769937, iteration: 130397
loss: 1.0017956495285034,grad_norm: 0.9237368456431385, iteration: 130398
loss: 1.0140966176986694,grad_norm: 0.999999056821413, iteration: 130399
loss: 1.0034419298171997,grad_norm: 0.9999990463167788, iteration: 130400
loss: 0.9664320945739746,grad_norm: 0.8713665828573582, iteration: 130401
loss: 0.9964259266853333,grad_norm: 0.99999908180162, iteration: 130402
loss: 1.0420807600021362,grad_norm: 0.9999992780519463, iteration: 130403
loss: 0.9757863879203796,grad_norm: 0.9750980886929592, iteration: 130404
loss: 1.0023168325424194,grad_norm: 0.9999992192969966, iteration: 130405
loss: 1.0205634832382202,grad_norm: 0.9999993031097159, iteration: 130406
loss: 0.9805569648742676,grad_norm: 0.9547854640137008, iteration: 130407
loss: 1.0170994997024536,grad_norm: 0.8898316339840916, iteration: 130408
loss: 1.0058072805404663,grad_norm: 0.9047734325262053, iteration: 130409
loss: 0.9497321844100952,grad_norm: 0.9999989700520959, iteration: 130410
loss: 1.006691336631775,grad_norm: 0.999999404130382, iteration: 130411
loss: 1.0008598566055298,grad_norm: 0.828646998762668, iteration: 130412
loss: 1.0087871551513672,grad_norm: 0.9999991724685788, iteration: 130413
loss: 1.0054770708084106,grad_norm: 0.9999993007065116, iteration: 130414
loss: 0.9746850728988647,grad_norm: 0.8405357725827723, iteration: 130415
loss: 0.9797461628913879,grad_norm: 0.999999169390203, iteration: 130416
loss: 0.9824123978614807,grad_norm: 0.9999993364711517, iteration: 130417
loss: 0.9720022678375244,grad_norm: 0.9665532636200418, iteration: 130418
loss: 1.027001142501831,grad_norm: 0.999999131552044, iteration: 130419
loss: 0.9996536374092102,grad_norm: 0.9999990758612476, iteration: 130420
loss: 1.01487398147583,grad_norm: 0.9056929396105522, iteration: 130421
loss: 1.007871389389038,grad_norm: 0.9999992625869355, iteration: 130422
loss: 0.9896032214164734,grad_norm: 0.9384533711759679, iteration: 130423
loss: 0.9759838581085205,grad_norm: 0.9999989375245012, iteration: 130424
loss: 1.0018861293792725,grad_norm: 0.9888555807505012, iteration: 130425
loss: 1.0032436847686768,grad_norm: 0.9999992330496534, iteration: 130426
loss: 1.0056229829788208,grad_norm: 0.9999990782792866, iteration: 130427
loss: 1.0122768878936768,grad_norm: 0.9999990761663943, iteration: 130428
loss: 1.027937889099121,grad_norm: 0.9999992594299388, iteration: 130429
loss: 1.035019040107727,grad_norm: 0.9999990780774491, iteration: 130430
loss: 1.0176740884780884,grad_norm: 0.9999991138825152, iteration: 130431
loss: 0.9772751927375793,grad_norm: 0.9999991184739708, iteration: 130432
loss: 0.9695056676864624,grad_norm: 0.9999989477702106, iteration: 130433
loss: 0.9937718510627747,grad_norm: 0.9999991465982511, iteration: 130434
loss: 1.0325552225112915,grad_norm: 0.9999994093578127, iteration: 130435
loss: 0.9971075654029846,grad_norm: 0.9999990591494535, iteration: 130436
loss: 0.9826796650886536,grad_norm: 0.9999989781146071, iteration: 130437
loss: 0.9968443512916565,grad_norm: 0.9713760638262227, iteration: 130438
loss: 1.021217703819275,grad_norm: 0.9999992075961491, iteration: 130439
loss: 0.9958922266960144,grad_norm: 0.9999991733279128, iteration: 130440
loss: 1.031917691230774,grad_norm: 0.9999996088931387, iteration: 130441
loss: 1.0169893503189087,grad_norm: 0.9999991494730992, iteration: 130442
loss: 0.9773542284965515,grad_norm: 0.9999990119648496, iteration: 130443
loss: 0.9869204759597778,grad_norm: 0.9149834458767584, iteration: 130444
loss: 0.9837586879730225,grad_norm: 0.999999027211663, iteration: 130445
loss: 1.0252445936203003,grad_norm: 0.9999992812800736, iteration: 130446
loss: 1.0162702798843384,grad_norm: 0.9999990650030283, iteration: 130447
loss: 0.9649264812469482,grad_norm: 0.9999991230328464, iteration: 130448
loss: 0.9629419445991516,grad_norm: 0.9999992977868307, iteration: 130449
loss: 0.9567934274673462,grad_norm: 0.9999989858047402, iteration: 130450
loss: 1.019328236579895,grad_norm: 0.9999988583355858, iteration: 130451
loss: 1.0133123397827148,grad_norm: 0.9999991224162417, iteration: 130452
loss: 0.9816702008247375,grad_norm: 0.8535615803622184, iteration: 130453
loss: 1.0498672723770142,grad_norm: 0.9999991058119713, iteration: 130454
loss: 1.0039100646972656,grad_norm: 0.9022418557089215, iteration: 130455
loss: 0.9799321293830872,grad_norm: 0.8976527981910738, iteration: 130456
loss: 1.0159786939620972,grad_norm: 0.9999992176696111, iteration: 130457
loss: 0.9988455772399902,grad_norm: 0.9672033100363204, iteration: 130458
loss: 1.0774834156036377,grad_norm: 0.9999996366221908, iteration: 130459
loss: 0.9591641426086426,grad_norm: 0.832115317630529, iteration: 130460
loss: 0.9954971075057983,grad_norm: 0.9806860998360198, iteration: 130461
loss: 1.0021456480026245,grad_norm: 0.9999990660971008, iteration: 130462
loss: 1.0231719017028809,grad_norm: 0.9862110671031493, iteration: 130463
loss: 0.9989604353904724,grad_norm: 0.9182357777494751, iteration: 130464
loss: 0.9911178350448608,grad_norm: 0.999999397686343, iteration: 130465
loss: 1.020756721496582,grad_norm: 0.9999992781044854, iteration: 130466
loss: 0.986112117767334,grad_norm: 0.9999990192462017, iteration: 130467
loss: 0.9777857065200806,grad_norm: 0.9999996113368229, iteration: 130468
loss: 1.008847713470459,grad_norm: 0.9999993337029228, iteration: 130469
loss: 0.9958021640777588,grad_norm: 0.9999991366310995, iteration: 130470
loss: 0.9773961305618286,grad_norm: 0.8930201622884949, iteration: 130471
loss: 1.0092217922210693,grad_norm: 0.9999992006833156, iteration: 130472
loss: 1.0420520305633545,grad_norm: 0.9999991796064661, iteration: 130473
loss: 1.0182061195373535,grad_norm: 0.9104943317499634, iteration: 130474
loss: 1.0187225341796875,grad_norm: 0.9999991707491886, iteration: 130475
loss: 1.0105611085891724,grad_norm: 0.999998979009275, iteration: 130476
loss: 0.9840279221534729,grad_norm: 0.9999992213399612, iteration: 130477
loss: 1.0126287937164307,grad_norm: 0.9999991789703848, iteration: 130478
loss: 1.0078095197677612,grad_norm: 0.999999143547406, iteration: 130479
loss: 1.108989953994751,grad_norm: 0.9999991165062169, iteration: 130480
loss: 0.9852064847946167,grad_norm: 0.9999992150646081, iteration: 130481
loss: 1.0019928216934204,grad_norm: 0.9999991408770661, iteration: 130482
loss: 1.0069694519042969,grad_norm: 0.9999991354384293, iteration: 130483
loss: 1.040550708770752,grad_norm: 0.9999991882776411, iteration: 130484
loss: 0.963642954826355,grad_norm: 0.99999899053987, iteration: 130485
loss: 1.0044004917144775,grad_norm: 0.9111878404348435, iteration: 130486
loss: 1.0166093111038208,grad_norm: 0.9999991024397453, iteration: 130487
loss: 1.0297982692718506,grad_norm: 0.7854955478919221, iteration: 130488
loss: 1.0136624574661255,grad_norm: 0.999999175145394, iteration: 130489
loss: 0.9842230081558228,grad_norm: 0.9786777738017813, iteration: 130490
loss: 1.0103859901428223,grad_norm: 0.9547164093596479, iteration: 130491
loss: 0.9878654479980469,grad_norm: 0.9999992054639268, iteration: 130492
loss: 0.986238420009613,grad_norm: 0.9999990244218959, iteration: 130493
loss: 1.0051404237747192,grad_norm: 0.9999992757105378, iteration: 130494
loss: 0.9679418802261353,grad_norm: 0.9757790532559543, iteration: 130495
loss: 0.9931284785270691,grad_norm: 0.9999991477333097, iteration: 130496
loss: 1.0104695558547974,grad_norm: 0.9999998259837116, iteration: 130497
loss: 0.9962738752365112,grad_norm: 0.9999992694059584, iteration: 130498
loss: 1.0026249885559082,grad_norm: 0.8964383592080118, iteration: 130499
loss: 1.0153180360794067,grad_norm: 0.9422054280389994, iteration: 130500
loss: 0.9817559719085693,grad_norm: 0.9999994117915584, iteration: 130501
loss: 1.0090973377227783,grad_norm: 0.9999991947021956, iteration: 130502
loss: 1.012750267982483,grad_norm: 0.801062692383321, iteration: 130503
loss: 1.0141692161560059,grad_norm: 0.9999990716393984, iteration: 130504
loss: 1.0071182250976562,grad_norm: 0.9999989715039928, iteration: 130505
loss: 1.0121108293533325,grad_norm: 0.9999996231766844, iteration: 130506
loss: 1.029482364654541,grad_norm: 0.9999992031057005, iteration: 130507
loss: 0.9739633798599243,grad_norm: 0.9999990308163131, iteration: 130508
loss: 1.0004487037658691,grad_norm: 0.9999990683213573, iteration: 130509
loss: 1.021373987197876,grad_norm: 0.9999992851558248, iteration: 130510
loss: 1.0003960132598877,grad_norm: 0.9244039801354865, iteration: 130511
loss: 0.9843291640281677,grad_norm: 0.9999990458018316, iteration: 130512
loss: 0.9972560405731201,grad_norm: 0.9999990806412562, iteration: 130513
loss: 0.9841563701629639,grad_norm: 0.8708726240395069, iteration: 130514
loss: 1.0127344131469727,grad_norm: 0.9734467219567455, iteration: 130515
loss: 1.0130953788757324,grad_norm: 0.9999992057115302, iteration: 130516
loss: 1.0067836046218872,grad_norm: 0.8812263377471521, iteration: 130517
loss: 0.9889983534812927,grad_norm: 0.8807848896967642, iteration: 130518
loss: 0.9659777879714966,grad_norm: 0.9371771335861626, iteration: 130519
loss: 1.0038127899169922,grad_norm: 0.9235539188933114, iteration: 130520
loss: 1.0504329204559326,grad_norm: 0.9999990643377437, iteration: 130521
loss: 1.002516269683838,grad_norm: 0.9999990767047913, iteration: 130522
loss: 1.0076148509979248,grad_norm: 0.9999991236149175, iteration: 130523
loss: 0.9895181655883789,grad_norm: 0.9999992101359549, iteration: 130524
loss: 0.9935013055801392,grad_norm: 0.9999991989935014, iteration: 130525
loss: 1.0259751081466675,grad_norm: 0.9806066681731515, iteration: 130526
loss: 0.9870212078094482,grad_norm: 0.9999992498495925, iteration: 130527
loss: 0.9857565760612488,grad_norm: 0.999999044149391, iteration: 130528
loss: 0.9990553855895996,grad_norm: 0.9999990944892128, iteration: 130529
loss: 0.994161069393158,grad_norm: 0.999999217478598, iteration: 130530
loss: 1.017311692237854,grad_norm: 0.9999991233923876, iteration: 130531
loss: 1.0205168724060059,grad_norm: 0.999999205593881, iteration: 130532
loss: 1.0428217649459839,grad_norm: 0.9999992992971898, iteration: 130533
loss: 0.9719380140304565,grad_norm: 0.9999989749826494, iteration: 130534
loss: 1.0169321298599243,grad_norm: 0.9999992110509255, iteration: 130535
loss: 0.9727082252502441,grad_norm: 0.9999990306208603, iteration: 130536
loss: 0.9841634631156921,grad_norm: 0.8989609717686122, iteration: 130537
loss: 0.9878143072128296,grad_norm: 0.9999992069368416, iteration: 130538
loss: 0.9844778180122375,grad_norm: 0.9999990448216911, iteration: 130539
loss: 0.9740899801254272,grad_norm: 0.9999992719674553, iteration: 130540
loss: 0.9769182801246643,grad_norm: 0.9999989945924855, iteration: 130541
loss: 1.055757761001587,grad_norm: 0.9999995305741549, iteration: 130542
loss: 1.0147764682769775,grad_norm: 0.9226641565930553, iteration: 130543
loss: 0.9840537309646606,grad_norm: 0.999999216470302, iteration: 130544
loss: 0.9764587879180908,grad_norm: 0.9999990869006827, iteration: 130545
loss: 1.0062094926834106,grad_norm: 0.9999990584220286, iteration: 130546
loss: 0.9923975467681885,grad_norm: 0.9999990134072803, iteration: 130547
loss: 0.9512438178062439,grad_norm: 0.984301929766316, iteration: 130548
loss: 0.9890117645263672,grad_norm: 0.9999993706479174, iteration: 130549
loss: 1.0318001508712769,grad_norm: 0.9664228061861335, iteration: 130550
loss: 1.0175057649612427,grad_norm: 0.9659347596522201, iteration: 130551
loss: 0.9925794005393982,grad_norm: 0.9999990977896437, iteration: 130552
loss: 0.9847241044044495,grad_norm: 0.9583186218799085, iteration: 130553
loss: 1.012747883796692,grad_norm: 0.999999150438465, iteration: 130554
loss: 1.003360390663147,grad_norm: 0.9999990489324903, iteration: 130555
loss: 1.0122720003128052,grad_norm: 0.9540330151666411, iteration: 130556
loss: 0.9636844992637634,grad_norm: 0.9866146018028016, iteration: 130557
loss: 1.014801263809204,grad_norm: 0.9999990694265039, iteration: 130558
loss: 0.9754065871238708,grad_norm: 0.9999991199223975, iteration: 130559
loss: 0.9861100316047668,grad_norm: 0.9216712795552701, iteration: 130560
loss: 0.9806637167930603,grad_norm: 0.9999990590036016, iteration: 130561
loss: 0.9896636009216309,grad_norm: 0.8783386367721173, iteration: 130562
loss: 0.9850964546203613,grad_norm: 0.976772041486466, iteration: 130563
loss: 0.9935817122459412,grad_norm: 0.902967869612199, iteration: 130564
loss: 0.9703854322433472,grad_norm: 0.9999994492202535, iteration: 130565
loss: 0.9714808464050293,grad_norm: 0.9999990644474972, iteration: 130566
loss: 0.9901941418647766,grad_norm: 0.999999163739885, iteration: 130567
loss: 0.9904811382293701,grad_norm: 0.9999990653752621, iteration: 130568
loss: 0.9949858784675598,grad_norm: 0.9596588015011087, iteration: 130569
loss: 1.0052738189697266,grad_norm: 0.8878644537156748, iteration: 130570
loss: 1.0130451917648315,grad_norm: 0.9999989490990046, iteration: 130571
loss: 0.9867138862609863,grad_norm: 0.8808582270209475, iteration: 130572
loss: 1.0059022903442383,grad_norm: 0.9999993497841729, iteration: 130573
loss: 1.020480990409851,grad_norm: 0.9999991628055261, iteration: 130574
loss: 1.015759825706482,grad_norm: 0.9052496985756981, iteration: 130575
loss: 0.9957275390625,grad_norm: 0.9698249398495061, iteration: 130576
loss: 0.9591120481491089,grad_norm: 0.9676410392230653, iteration: 130577
loss: 1.04123055934906,grad_norm: 0.9999991667428928, iteration: 130578
loss: 0.9984754920005798,grad_norm: 0.9999994501674887, iteration: 130579
loss: 1.1459441184997559,grad_norm: 0.999999344414423, iteration: 130580
loss: 1.0321556329727173,grad_norm: 0.9999992124469034, iteration: 130581
loss: 1.0017374753952026,grad_norm: 0.9999992115881032, iteration: 130582
loss: 0.9739709496498108,grad_norm: 0.99681882840147, iteration: 130583
loss: 0.9729982018470764,grad_norm: 0.8621953402471887, iteration: 130584
loss: 1.0300335884094238,grad_norm: 0.999999001539938, iteration: 130585
loss: 0.9875422716140747,grad_norm: 0.9999990761532633, iteration: 130586
loss: 1.0098763704299927,grad_norm: 0.9999989919430012, iteration: 130587
loss: 0.9975836873054504,grad_norm: 0.9999993821651743, iteration: 130588
loss: 1.0108698606491089,grad_norm: 0.9999990368347775, iteration: 130589
loss: 0.9763414859771729,grad_norm: 0.9999991293114211, iteration: 130590
loss: 1.0332247018814087,grad_norm: 0.999999124555106, iteration: 130591
loss: 1.0086147785186768,grad_norm: 0.9999992605064141, iteration: 130592
loss: 1.0229191780090332,grad_norm: 0.9999990200248994, iteration: 130593
loss: 0.9884410500526428,grad_norm: 0.920919879516337, iteration: 130594
loss: 0.9772826433181763,grad_norm: 0.9999991972706072, iteration: 130595
loss: 0.9767069220542908,grad_norm: 0.9211149854228766, iteration: 130596
loss: 1.0370903015136719,grad_norm: 0.9999993131767747, iteration: 130597
loss: 1.0109196901321411,grad_norm: 0.922055315254974, iteration: 130598
loss: 1.028343677520752,grad_norm: 0.9999990286679775, iteration: 130599
loss: 1.10107421875,grad_norm: 0.9999999974310881, iteration: 130600
loss: 0.9352242350578308,grad_norm: 0.8833666020908183, iteration: 130601
loss: 0.9697888493537903,grad_norm: 0.9999990923021114, iteration: 130602
loss: 1.0663617849349976,grad_norm: 0.9999994122210133, iteration: 130603
loss: 0.9966106414794922,grad_norm: 0.9999992021207449, iteration: 130604
loss: 1.0021796226501465,grad_norm: 0.9999992219918573, iteration: 130605
loss: 1.0536493062973022,grad_norm: 0.9999994191035532, iteration: 130606
loss: 1.0424479246139526,grad_norm: 0.9999994904423039, iteration: 130607
loss: 0.9983088374137878,grad_norm: 0.8821860492016182, iteration: 130608
loss: 1.0137156248092651,grad_norm: 0.9999989496792225, iteration: 130609
loss: 1.0450421571731567,grad_norm: 0.9414805741761944, iteration: 130610
loss: 1.0914226770401,grad_norm: 0.9999992342957612, iteration: 130611
loss: 0.9864420890808105,grad_norm: 0.9999997084546113, iteration: 130612
loss: 0.9859530925750732,grad_norm: 0.8574030945070715, iteration: 130613
loss: 1.042248249053955,grad_norm: 0.9999989699868115, iteration: 130614
loss: 1.0221017599105835,grad_norm: 0.999999635032883, iteration: 130615
loss: 1.0652308464050293,grad_norm: 0.9999996532451517, iteration: 130616
loss: 1.0035521984100342,grad_norm: 0.9192999529182432, iteration: 130617
loss: 0.9829803109169006,grad_norm: 0.9884029881179246, iteration: 130618
loss: 1.0161678791046143,grad_norm: 0.9999999063486911, iteration: 130619
loss: 0.9990187287330627,grad_norm: 0.9999991526276044, iteration: 130620
loss: 1.017501950263977,grad_norm: 0.9999991419423747, iteration: 130621
loss: 0.9921013712882996,grad_norm: 0.999999140820302, iteration: 130622
loss: 0.9806683659553528,grad_norm: 0.9999991385799523, iteration: 130623
loss: 0.9615391492843628,grad_norm: 0.9999991622118187, iteration: 130624
loss: 0.9826100468635559,grad_norm: 0.9999991048143714, iteration: 130625
loss: 0.9863408803939819,grad_norm: 0.9183370356324794, iteration: 130626
loss: 0.9741164445877075,grad_norm: 0.9999990550162506, iteration: 130627
loss: 1.0244472026824951,grad_norm: 0.9999992335572916, iteration: 130628
loss: 1.031604528427124,grad_norm: 0.9999991756884649, iteration: 130629
loss: 1.0095794200897217,grad_norm: 0.9999991900346625, iteration: 130630
loss: 0.9710975885391235,grad_norm: 0.9999991698019622, iteration: 130631
loss: 0.9911999106407166,grad_norm: 0.9999991208893002, iteration: 130632
loss: 1.0035573244094849,grad_norm: 0.9999993276796164, iteration: 130633
loss: 1.0015068054199219,grad_norm: 0.9999991806078493, iteration: 130634
loss: 0.957051694393158,grad_norm: 0.9999992002184398, iteration: 130635
loss: 0.9741772413253784,grad_norm: 0.7814904850680956, iteration: 130636
loss: 0.9941840767860413,grad_norm: 0.9999992839803318, iteration: 130637
loss: 0.9809903502464294,grad_norm: 0.9999991178234308, iteration: 130638
loss: 1.0346704721450806,grad_norm: 0.9653285367028426, iteration: 130639
loss: 0.9936913251876831,grad_norm: 0.9999993112461967, iteration: 130640
loss: 1.0364447832107544,grad_norm: 0.9500720792800966, iteration: 130641
loss: 1.019341230392456,grad_norm: 0.8719579992754667, iteration: 130642
loss: 1.0041241645812988,grad_norm: 0.9932651066329786, iteration: 130643
loss: 0.993610143661499,grad_norm: 0.999999128019008, iteration: 130644
loss: 1.0121115446090698,grad_norm: 0.8782183646383356, iteration: 130645
loss: 0.9908835291862488,grad_norm: 0.9999998369176837, iteration: 130646
loss: 0.9764410257339478,grad_norm: 0.9999993278641874, iteration: 130647
loss: 0.9886205792427063,grad_norm: 0.9999998323567569, iteration: 130648
loss: 1.007623553276062,grad_norm: 0.9999991273778759, iteration: 130649
loss: 1.0433285236358643,grad_norm: 0.9999993565940175, iteration: 130650
loss: 1.0112122297286987,grad_norm: 0.9999990388407675, iteration: 130651
loss: 0.9856138825416565,grad_norm: 0.972965438771968, iteration: 130652
loss: 0.9872310161590576,grad_norm: 0.9999989953386829, iteration: 130653
loss: 0.9866368174552917,grad_norm: 0.9999990961990771, iteration: 130654
loss: 1.0268880128860474,grad_norm: 0.9999992451790737, iteration: 130655
loss: 1.0125951766967773,grad_norm: 0.9253462199252539, iteration: 130656
loss: 0.9892871379852295,grad_norm: 0.9752761597850444, iteration: 130657
loss: 1.01031494140625,grad_norm: 0.9847690902872251, iteration: 130658
loss: 0.9640923142433167,grad_norm: 0.9999990117282825, iteration: 130659
loss: 0.9794055819511414,grad_norm: 0.999999061122748, iteration: 130660
loss: 1.0181879997253418,grad_norm: 0.9267165505550538, iteration: 130661
loss: 0.9872878789901733,grad_norm: 0.9999991687458374, iteration: 130662
loss: 0.9459554553031921,grad_norm: 0.9999990307834158, iteration: 130663
loss: 1.0296605825424194,grad_norm: 0.9999992111250761, iteration: 130664
loss: 1.0010169744491577,grad_norm: 0.9595901090108403, iteration: 130665
loss: 1.0033748149871826,grad_norm: 0.9999991605632436, iteration: 130666
loss: 0.9939316511154175,grad_norm: 0.8933799475343188, iteration: 130667
loss: 0.9840783476829529,grad_norm: 0.9999993463851522, iteration: 130668
loss: 0.9974091649055481,grad_norm: 0.999999086445505, iteration: 130669
loss: 1.0406451225280762,grad_norm: 0.9490764426003145, iteration: 130670
loss: 0.9919512867927551,grad_norm: 0.9999990146309399, iteration: 130671
loss: 1.0303181409835815,grad_norm: 0.9266205718293758, iteration: 130672
loss: 0.9982717633247375,grad_norm: 0.999999096718471, iteration: 130673
loss: 0.9632626175880432,grad_norm: 0.9337649870038018, iteration: 130674
loss: 1.0030517578125,grad_norm: 0.9700148934304069, iteration: 130675
loss: 1.033970832824707,grad_norm: 0.9999990239050249, iteration: 130676
loss: 1.0375947952270508,grad_norm: 0.9999992910623814, iteration: 130677
loss: 0.9844340085983276,grad_norm: 0.9999991074044725, iteration: 130678
loss: 1.0081632137298584,grad_norm: 0.8272236082971365, iteration: 130679
loss: 0.9980648756027222,grad_norm: 0.9999990715038171, iteration: 130680
loss: 1.0848087072372437,grad_norm: 0.9999995353163982, iteration: 130681
loss: 0.9833866953849792,grad_norm: 0.9999990274084875, iteration: 130682
loss: 1.0064095258712769,grad_norm: 0.9999994549927427, iteration: 130683
loss: 0.9777399897575378,grad_norm: 0.9270932728527237, iteration: 130684
loss: 0.9628079533576965,grad_norm: 0.9999991465091425, iteration: 130685
loss: 0.9893231391906738,grad_norm: 0.9999992206544317, iteration: 130686
loss: 0.9711886644363403,grad_norm: 0.999999113460808, iteration: 130687
loss: 1.0256272554397583,grad_norm: 0.9999991224153736, iteration: 130688
loss: 0.9954475164413452,grad_norm: 0.9999989482273582, iteration: 130689
loss: 0.9981098771095276,grad_norm: 0.9810371929309554, iteration: 130690
loss: 0.9901208877563477,grad_norm: 0.9999990456128153, iteration: 130691
loss: 1.0190463066101074,grad_norm: 0.999999021591918, iteration: 130692
loss: 1.0092966556549072,grad_norm: 0.9999990651795249, iteration: 130693
loss: 0.998521625995636,grad_norm: 0.9437737024800421, iteration: 130694
loss: 0.9509122967720032,grad_norm: 0.9999991154107349, iteration: 130695
loss: 0.9472223520278931,grad_norm: 0.999999146224135, iteration: 130696
loss: 1.0003892183303833,grad_norm: 0.9691860733303848, iteration: 130697
loss: 0.9905217885971069,grad_norm: 0.9990307596556723, iteration: 130698
loss: 0.9916912913322449,grad_norm: 0.9136157643331945, iteration: 130699
loss: 0.9770585894584656,grad_norm: 0.9261517788594299, iteration: 130700
loss: 0.964817225933075,grad_norm: 0.9301749023667482, iteration: 130701
loss: 1.0369352102279663,grad_norm: 0.9999996355612712, iteration: 130702
loss: 0.9941054582595825,grad_norm: 0.9490973123375397, iteration: 130703
loss: 1.003706693649292,grad_norm: 0.9677360985169708, iteration: 130704
loss: 0.9920732975006104,grad_norm: 0.9999995456294642, iteration: 130705
loss: 1.0143187046051025,grad_norm: 0.9999990570113976, iteration: 130706
loss: 1.0236583948135376,grad_norm: 0.9616917886209242, iteration: 130707
loss: 1.0657747983932495,grad_norm: 0.999999078624089, iteration: 130708
loss: 1.0206401348114014,grad_norm: 0.999999183213421, iteration: 130709
loss: 0.9555966854095459,grad_norm: 0.9999991936786783, iteration: 130710
loss: 1.0131046772003174,grad_norm: 0.9999998261777848, iteration: 130711
loss: 0.9843085408210754,grad_norm: 0.9496562449533257, iteration: 130712
loss: 1.0142782926559448,grad_norm: 0.9999992070140933, iteration: 130713
loss: 0.972137987613678,grad_norm: 0.974341058000371, iteration: 130714
loss: 1.0182194709777832,grad_norm: 0.9999991574007213, iteration: 130715
loss: 0.9880868196487427,grad_norm: 0.9065527640146999, iteration: 130716
loss: 0.9927178621292114,grad_norm: 0.9999990283190369, iteration: 130717
loss: 0.979834258556366,grad_norm: 0.9999991418003951, iteration: 130718
loss: 0.9683996438980103,grad_norm: 0.9999993357543533, iteration: 130719
loss: 0.9882943034172058,grad_norm: 0.9999991678098213, iteration: 130720
loss: 0.989284098148346,grad_norm: 0.9778886512663425, iteration: 130721
loss: 1.0026215314865112,grad_norm: 0.9382199676988301, iteration: 130722
loss: 1.0043561458587646,grad_norm: 0.8983987199928365, iteration: 130723
loss: 0.975616991519928,grad_norm: 0.999999058932232, iteration: 130724
loss: 1.008326530456543,grad_norm: 0.9999991494068968, iteration: 130725
loss: 0.9988629221916199,grad_norm: 0.9877185270013293, iteration: 130726
loss: 1.0063250064849854,grad_norm: 0.999999840816036, iteration: 130727
loss: 1.0256016254425049,grad_norm: 0.9999992208948362, iteration: 130728
loss: 1.0093072652816772,grad_norm: 0.9999991040782572, iteration: 130729
loss: 0.9910977482795715,grad_norm: 0.9521262416332681, iteration: 130730
loss: 0.9931977987289429,grad_norm: 0.9999991356473077, iteration: 130731
loss: 1.0051872730255127,grad_norm: 0.9999990878339713, iteration: 130732
loss: 0.9861573576927185,grad_norm: 0.9080226261275035, iteration: 130733
loss: 1.001704454421997,grad_norm: 0.8901126619639764, iteration: 130734
loss: 0.9786573648452759,grad_norm: 0.999999181255515, iteration: 130735
loss: 0.99483323097229,grad_norm: 0.9756704108731429, iteration: 130736
loss: 1.0024604797363281,grad_norm: 0.9252465163092672, iteration: 130737
loss: 1.011794090270996,grad_norm: 0.9999991606971276, iteration: 130738
loss: 1.0245639085769653,grad_norm: 0.9987345164507455, iteration: 130739
loss: 0.9784213900566101,grad_norm: 0.9999992470390707, iteration: 130740
loss: 0.9907545447349548,grad_norm: 0.9999992509170701, iteration: 130741
loss: 0.9769541025161743,grad_norm: 0.9999993193925434, iteration: 130742
loss: 1.0639152526855469,grad_norm: 0.999999058507285, iteration: 130743
loss: 0.9972086548805237,grad_norm: 0.9999991407134051, iteration: 130744
loss: 0.9880263805389404,grad_norm: 0.8143851920958982, iteration: 130745
loss: 1.030438780784607,grad_norm: 0.966429186210159, iteration: 130746
loss: 1.0035419464111328,grad_norm: 0.9999991380121852, iteration: 130747
loss: 1.023032546043396,grad_norm: 0.9999997456039699, iteration: 130748
loss: 1.0133066177368164,grad_norm: 0.9999991862795963, iteration: 130749
loss: 0.9741248488426208,grad_norm: 0.9999992706991284, iteration: 130750
loss: 1.0309926271438599,grad_norm: 0.9805695503624459, iteration: 130751
loss: 0.9835005402565002,grad_norm: 0.9999991302735334, iteration: 130752
loss: 0.9857431054115295,grad_norm: 0.9999991866437105, iteration: 130753
loss: 1.0172340869903564,grad_norm: 0.9999991568181793, iteration: 130754
loss: 1.037069320678711,grad_norm: 0.999998981919945, iteration: 130755
loss: 0.990032970905304,grad_norm: 0.957557899760887, iteration: 130756
loss: 0.9560905694961548,grad_norm: 0.9999992251720026, iteration: 130757
loss: 1.0088908672332764,grad_norm: 0.9999992658519968, iteration: 130758
loss: 1.0054185390472412,grad_norm: 0.9900927742411867, iteration: 130759
loss: 0.9871724843978882,grad_norm: 0.8709524040577963, iteration: 130760
loss: 1.0006122589111328,grad_norm: 0.8969137539004052, iteration: 130761
loss: 1.0004037618637085,grad_norm: 0.9999990610830505, iteration: 130762
loss: 0.957037091255188,grad_norm: 0.8910738024911482, iteration: 130763
loss: 0.9920297265052795,grad_norm: 0.9999990998586138, iteration: 130764
loss: 1.0169514417648315,grad_norm: 0.9992108389150294, iteration: 130765
loss: 1.0177228450775146,grad_norm: 0.9999991954470645, iteration: 130766
loss: 1.0172975063323975,grad_norm: 0.999999131144581, iteration: 130767
loss: 1.0160900354385376,grad_norm: 0.9999991052294438, iteration: 130768
loss: 0.994941234588623,grad_norm: 0.999999361938865, iteration: 130769
loss: 1.018152117729187,grad_norm: 0.9308253377118028, iteration: 130770
loss: 1.0163956880569458,grad_norm: 0.9999991357977477, iteration: 130771
loss: 1.0385987758636475,grad_norm: 0.9763360027409221, iteration: 130772
loss: 1.034718632698059,grad_norm: 0.9999990682780406, iteration: 130773
loss: 1.0354804992675781,grad_norm: 0.9999992205475451, iteration: 130774
loss: 1.016605019569397,grad_norm: 0.9581330419874445, iteration: 130775
loss: 1.0046814680099487,grad_norm: 0.8823967913865244, iteration: 130776
loss: 1.0118720531463623,grad_norm: 0.9577695926655143, iteration: 130777
loss: 0.9977013468742371,grad_norm: 0.9999992332991514, iteration: 130778
loss: 1.018702745437622,grad_norm: 0.9999990891620233, iteration: 130779
loss: 0.9881715774536133,grad_norm: 0.9194838791882093, iteration: 130780
loss: 1.0514496564865112,grad_norm: 0.9999990601549351, iteration: 130781
loss: 0.9977763891220093,grad_norm: 0.9790612264806212, iteration: 130782
loss: 1.1518828868865967,grad_norm: 0.9999995640587529, iteration: 130783
loss: 0.9589976668357849,grad_norm: 0.9999991411204855, iteration: 130784
loss: 1.022873878479004,grad_norm: 0.9999990798082906, iteration: 130785
loss: 1.0030341148376465,grad_norm: 0.9999991306669526, iteration: 130786
loss: 1.0001813173294067,grad_norm: 0.8858031240191688, iteration: 130787
loss: 1.0076189041137695,grad_norm: 0.9999997036578585, iteration: 130788
loss: 1.002909779548645,grad_norm: 0.9564424303936893, iteration: 130789
loss: 1.007089614868164,grad_norm: 0.9999990537438119, iteration: 130790
loss: 1.0058891773223877,grad_norm: 0.833388601947709, iteration: 130791
loss: 0.9957191944122314,grad_norm: 0.9939019249104579, iteration: 130792
loss: 1.0195152759552002,grad_norm: 0.9999990747596668, iteration: 130793
loss: 0.9924048185348511,grad_norm: 0.9207491818903375, iteration: 130794
loss: 1.0027192831039429,grad_norm: 0.9999989853955134, iteration: 130795
loss: 1.0327014923095703,grad_norm: 0.9999989868445981, iteration: 130796
loss: 0.9885541200637817,grad_norm: 0.9401208900638917, iteration: 130797
loss: 1.0539860725402832,grad_norm: 0.9999997894160297, iteration: 130798
loss: 1.0532413721084595,grad_norm: 0.9999993232514681, iteration: 130799
loss: 0.9799864888191223,grad_norm: 0.9999992028975554, iteration: 130800
loss: 1.0163321495056152,grad_norm: 0.9768646175161804, iteration: 130801
loss: 1.0081688165664673,grad_norm: 0.9999991434292516, iteration: 130802
loss: 1.0340327024459839,grad_norm: 0.9999990618394465, iteration: 130803
loss: 0.9965443015098572,grad_norm: 0.9999991625473804, iteration: 130804
loss: 1.003331184387207,grad_norm: 0.7860611057866211, iteration: 130805
loss: 1.0124461650848389,grad_norm: 0.9999995535660154, iteration: 130806
loss: 0.9973322153091431,grad_norm: 0.9999990858901645, iteration: 130807
loss: 1.0127636194229126,grad_norm: 0.9999997615451194, iteration: 130808
loss: 0.9961995482444763,grad_norm: 0.9999990738768456, iteration: 130809
loss: 1.0298010110855103,grad_norm: 0.999999162437656, iteration: 130810
loss: 1.0465484857559204,grad_norm: 0.9999991010285515, iteration: 130811
loss: 1.0314427614212036,grad_norm: 0.9999996354437257, iteration: 130812
loss: 0.9868630170822144,grad_norm: 0.9999991573240284, iteration: 130813
loss: 1.00696861743927,grad_norm: 0.9999991649024791, iteration: 130814
loss: 1.0390779972076416,grad_norm: 0.9999994199480707, iteration: 130815
loss: 1.0200608968734741,grad_norm: 0.9999995447224476, iteration: 130816
loss: 1.0426039695739746,grad_norm: 0.9999991389410942, iteration: 130817
loss: 0.9818605780601501,grad_norm: 0.9999991531000845, iteration: 130818
loss: 0.9933180212974548,grad_norm: 0.9904928841707954, iteration: 130819
loss: 1.0074074268341064,grad_norm: 0.9999992213657551, iteration: 130820
loss: 0.9801138639450073,grad_norm: 0.9999991157016157, iteration: 130821
loss: 0.9644970297813416,grad_norm: 0.9075355124273777, iteration: 130822
loss: 0.9973960518836975,grad_norm: 0.9991501820519112, iteration: 130823
loss: 1.0403157472610474,grad_norm: 0.9746457566667553, iteration: 130824
loss: 1.001833438873291,grad_norm: 0.9716638519728255, iteration: 130825
loss: 0.9995567798614502,grad_norm: 0.8791189167815957, iteration: 130826
loss: 1.0381848812103271,grad_norm: 0.9999991282147674, iteration: 130827
loss: 0.9582453370094299,grad_norm: 0.9441889493657531, iteration: 130828
loss: 1.0232616662979126,grad_norm: 0.9971968540531774, iteration: 130829
loss: 0.9832900762557983,grad_norm: 0.9999990728882567, iteration: 130830
loss: 0.9742464423179626,grad_norm: 0.9999991024453965, iteration: 130831
loss: 1.0120465755462646,grad_norm: 0.9999991238828365, iteration: 130832
loss: 0.9891766905784607,grad_norm: 0.8589205585626685, iteration: 130833
loss: 0.9585191607475281,grad_norm: 0.872481241067645, iteration: 130834
loss: 0.9944060444831848,grad_norm: 0.9784747234134111, iteration: 130835
loss: 1.0288711786270142,grad_norm: 0.9999990237879814, iteration: 130836
loss: 0.9907777309417725,grad_norm: 0.999998999121934, iteration: 130837
loss: 0.9875351786613464,grad_norm: 0.8725351822739924, iteration: 130838
loss: 1.0343900918960571,grad_norm: 0.9924835672007007, iteration: 130839
loss: 1.0405124425888062,grad_norm: 0.9999991429550226, iteration: 130840
loss: 0.9771615266799927,grad_norm: 0.9999990060073941, iteration: 130841
loss: 0.9988004565238953,grad_norm: 0.9999991581500166, iteration: 130842
loss: 0.9973534941673279,grad_norm: 0.9999992243190865, iteration: 130843
loss: 0.9802163243293762,grad_norm: 0.9999992801465263, iteration: 130844
loss: 1.0187389850616455,grad_norm: 0.9153222685075547, iteration: 130845
loss: 1.0158697366714478,grad_norm: 0.9999990717208732, iteration: 130846
loss: 0.9966040253639221,grad_norm: 0.9265136160453232, iteration: 130847
loss: 1.0003098249435425,grad_norm: 0.9999989796035067, iteration: 130848
loss: 0.9937874674797058,grad_norm: 0.999999151465983, iteration: 130849
loss: 1.0058393478393555,grad_norm: 0.9999990422037324, iteration: 130850
loss: 1.0050179958343506,grad_norm: 0.9861793951006649, iteration: 130851
loss: 0.9986894726753235,grad_norm: 0.9318275087890179, iteration: 130852
loss: 0.9768598079681396,grad_norm: 0.9427313042314875, iteration: 130853
loss: 1.0060210227966309,grad_norm: 0.9999990603118418, iteration: 130854
loss: 0.9644548892974854,grad_norm: 0.9040174845113009, iteration: 130855
loss: 1.0103623867034912,grad_norm: 0.999999125535934, iteration: 130856
loss: 1.0230348110198975,grad_norm: 0.999999136061252, iteration: 130857
loss: 0.9779025912284851,grad_norm: 0.9999991388308173, iteration: 130858
loss: 1.0018752813339233,grad_norm: 0.999999016270857, iteration: 130859
loss: 0.9735643267631531,grad_norm: 0.9999992454304899, iteration: 130860
loss: 1.0355581045150757,grad_norm: 0.9999991087033889, iteration: 130861
loss: 0.9778915047645569,grad_norm: 0.9999991360995517, iteration: 130862
loss: 1.006330966949463,grad_norm: 0.9437160085826052, iteration: 130863
loss: 1.0023645162582397,grad_norm: 0.9999990955917302, iteration: 130864
loss: 0.9973545074462891,grad_norm: 0.8406495593721204, iteration: 130865
loss: 1.0032447576522827,grad_norm: 0.9930898772376723, iteration: 130866
loss: 0.9924855828285217,grad_norm: 0.9999995483800471, iteration: 130867
loss: 0.9950612783432007,grad_norm: 0.9230102255495649, iteration: 130868
loss: 0.9694299697875977,grad_norm: 0.9999990140240737, iteration: 130869
loss: 1.0146816968917847,grad_norm: 0.887318001924034, iteration: 130870
loss: 0.9659193158149719,grad_norm: 0.8761763030972743, iteration: 130871
loss: 1.0100795030593872,grad_norm: 0.9999992191824838, iteration: 130872
loss: 0.9991787075996399,grad_norm: 0.9999992268789671, iteration: 130873
loss: 1.0065174102783203,grad_norm: 0.9999992797699786, iteration: 130874
loss: 0.9746106863021851,grad_norm: 0.9999990898359409, iteration: 130875
loss: 0.998828113079071,grad_norm: 0.9999988990566184, iteration: 130876
loss: 0.9848606586456299,grad_norm: 0.9999990783710856, iteration: 130877
loss: 0.9566336870193481,grad_norm: 0.99999908478087, iteration: 130878
loss: 0.9778414368629456,grad_norm: 0.9999992212703074, iteration: 130879
loss: 0.9944761991500854,grad_norm: 0.9999990573882369, iteration: 130880
loss: 1.0406761169433594,grad_norm: 0.8953225134128598, iteration: 130881
loss: 0.9814846515655518,grad_norm: 0.9999990268541854, iteration: 130882
loss: 1.0330123901367188,grad_norm: 0.9665572715045713, iteration: 130883
loss: 0.9843938946723938,grad_norm: 0.9999991758973006, iteration: 130884
loss: 0.9965521097183228,grad_norm: 0.8869538410028545, iteration: 130885
loss: 0.9918021559715271,grad_norm: 0.9808602568925882, iteration: 130886
loss: 0.9542391896247864,grad_norm: 0.9999990188678627, iteration: 130887
loss: 1.0323983430862427,grad_norm: 0.9999991495593478, iteration: 130888
loss: 1.0287187099456787,grad_norm: 0.9999991701230843, iteration: 130889
loss: 0.9999489188194275,grad_norm: 0.9999989965862566, iteration: 130890
loss: 1.008213758468628,grad_norm: 0.8988343808357371, iteration: 130891
loss: 0.9992724061012268,grad_norm: 0.9999990254884067, iteration: 130892
loss: 0.9858682155609131,grad_norm: 0.9697167640265173, iteration: 130893
loss: 1.0135663747787476,grad_norm: 0.9658214291170006, iteration: 130894
loss: 1.0229675769805908,grad_norm: 0.9999991086832195, iteration: 130895
loss: 0.9527279138565063,grad_norm: 0.950968342453207, iteration: 130896
loss: 0.9827600121498108,grad_norm: 0.9999992235384985, iteration: 130897
loss: 0.9631108045578003,grad_norm: 0.9999989789121655, iteration: 130898
loss: 0.9930859208106995,grad_norm: 0.9446161839125817, iteration: 130899
loss: 1.0354853868484497,grad_norm: 0.9999990725681676, iteration: 130900
loss: 1.0175999402999878,grad_norm: 0.9260429054785985, iteration: 130901
loss: 0.9869696497917175,grad_norm: 0.7406427307941728, iteration: 130902
loss: 1.0177347660064697,grad_norm: 0.9068543071254468, iteration: 130903
loss: 1.0061246156692505,grad_norm: 0.9999991716049511, iteration: 130904
loss: 0.9510436058044434,grad_norm: 0.8253726208281321, iteration: 130905
loss: 1.0006543397903442,grad_norm: 0.9999989879287642, iteration: 130906
loss: 0.9847634434700012,grad_norm: 0.9285058486799205, iteration: 130907
loss: 0.9731417298316956,grad_norm: 0.9999991216357519, iteration: 130908
loss: 1.0055515766143799,grad_norm: 0.9840588463341394, iteration: 130909
loss: 1.0226179361343384,grad_norm: 0.950487190262506, iteration: 130910
loss: 0.9730719923973083,grad_norm: 0.9534277867021218, iteration: 130911
loss: 1.0029469728469849,grad_norm: 0.9999990984530558, iteration: 130912
loss: 1.0156605243682861,grad_norm: 0.9999991795969297, iteration: 130913
loss: 0.9722566604614258,grad_norm: 0.9999990462953772, iteration: 130914
loss: 1.0170729160308838,grad_norm: 0.9781513316654032, iteration: 130915
loss: 0.9985378980636597,grad_norm: 0.8783452592096839, iteration: 130916
loss: 0.9996961355209351,grad_norm: 0.908122507813015, iteration: 130917
loss: 0.9678884744644165,grad_norm: 0.9283389052424375, iteration: 130918
loss: 1.0014286041259766,grad_norm: 0.9637418900333478, iteration: 130919
loss: 1.0236254930496216,grad_norm: 0.9999990183235652, iteration: 130920
loss: 0.9844350814819336,grad_norm: 0.986116767416411, iteration: 130921
loss: 0.9997219443321228,grad_norm: 0.9810377579249697, iteration: 130922
loss: 0.9548578858375549,grad_norm: 0.9797998205889104, iteration: 130923
loss: 0.9860396385192871,grad_norm: 0.9182513957161187, iteration: 130924
loss: 0.9772258996963501,grad_norm: 0.8578490842264723, iteration: 130925
loss: 1.0199726819992065,grad_norm: 0.9999990971094989, iteration: 130926
loss: 1.0088595151901245,grad_norm: 0.982579972379257, iteration: 130927
loss: 1.018647313117981,grad_norm: 0.9999990125671254, iteration: 130928
loss: 1.007637858390808,grad_norm: 0.9999991476660205, iteration: 130929
loss: 0.9994362592697144,grad_norm: 0.9999992228334514, iteration: 130930
loss: 0.9941649436950684,grad_norm: 0.8671577833512374, iteration: 130931
loss: 1.0042893886566162,grad_norm: 0.9447755613840608, iteration: 130932
loss: 1.0290371179580688,grad_norm: 0.9999991639179123, iteration: 130933
loss: 1.0049712657928467,grad_norm: 0.9696912797894323, iteration: 130934
loss: 1.0174494981765747,grad_norm: 0.9999989515303256, iteration: 130935
loss: 0.9759621620178223,grad_norm: 0.944495826652999, iteration: 130936
loss: 1.023676872253418,grad_norm: 0.9385056503222206, iteration: 130937
loss: 1.006905198097229,grad_norm: 0.9999989693983459, iteration: 130938
loss: 1.0149376392364502,grad_norm: 0.9999992944526265, iteration: 130939
loss: 0.9667356610298157,grad_norm: 0.9999989941314084, iteration: 130940
loss: 1.001253604888916,grad_norm: 0.9999991048613773, iteration: 130941
loss: 1.0363999605178833,grad_norm: 0.9999998480378777, iteration: 130942
loss: 1.0294808149337769,grad_norm: 0.9999990931638727, iteration: 130943
loss: 1.0120669603347778,grad_norm: 0.9999994002555016, iteration: 130944
loss: 1.0396182537078857,grad_norm: 0.9999996284052348, iteration: 130945
loss: 1.0279498100280762,grad_norm: 0.9999992573740315, iteration: 130946
loss: 1.0152990818023682,grad_norm: 0.999999034510257, iteration: 130947
loss: 0.9565461874008179,grad_norm: 0.9999993468379791, iteration: 130948
loss: 1.0139588117599487,grad_norm: 0.9999991688243798, iteration: 130949
loss: 0.9705970287322998,grad_norm: 0.999999078978837, iteration: 130950
loss: 0.9955886006355286,grad_norm: 0.9148053281680095, iteration: 130951
loss: 1.034204125404358,grad_norm: 0.9999990412048841, iteration: 130952
loss: 1.0130678415298462,grad_norm: 0.9203328575181878, iteration: 130953
loss: 1.0334761142730713,grad_norm: 0.9859297101867128, iteration: 130954
loss: 1.0233443975448608,grad_norm: 0.9789231438472182, iteration: 130955
loss: 1.0030056238174438,grad_norm: 0.9999992221531342, iteration: 130956
loss: 0.9744774699211121,grad_norm: 0.9160767539800859, iteration: 130957
loss: 1.0013507604599,grad_norm: 0.9358500865280163, iteration: 130958
loss: 1.0054177045822144,grad_norm: 0.9999989204277807, iteration: 130959
loss: 1.016361117362976,grad_norm: 0.9767523625416931, iteration: 130960
loss: 0.9632151126861572,grad_norm: 0.9487937210730494, iteration: 130961
loss: 0.993636429309845,grad_norm: 0.9999990970149981, iteration: 130962
loss: 1.012912631034851,grad_norm: 0.9999991235497112, iteration: 130963
loss: 1.0035170316696167,grad_norm: 0.999999048428245, iteration: 130964
loss: 0.9691417217254639,grad_norm: 0.999999159952499, iteration: 130965
loss: 1.0176377296447754,grad_norm: 0.9999991464914433, iteration: 130966
loss: 1.0172703266143799,grad_norm: 0.999999324704481, iteration: 130967
loss: 0.9927085638046265,grad_norm: 0.9999991335632066, iteration: 130968
loss: 0.9702699184417725,grad_norm: 0.9999993180455315, iteration: 130969
loss: 1.022173523902893,grad_norm: 0.9999991511678887, iteration: 130970
loss: 1.0263255834579468,grad_norm: 0.9410231928157571, iteration: 130971
loss: 0.992428719997406,grad_norm: 0.9999990018534781, iteration: 130972
loss: 0.9783841371536255,grad_norm: 0.9999991777753428, iteration: 130973
loss: 0.9750130772590637,grad_norm: 0.9999989902755784, iteration: 130974
loss: 1.0118293762207031,grad_norm: 0.9999991404151156, iteration: 130975
loss: 1.0355480909347534,grad_norm: 0.9999991904237866, iteration: 130976
loss: 1.0191808938980103,grad_norm: 0.9999990139492416, iteration: 130977
loss: 0.9935851693153381,grad_norm: 0.9999990350100899, iteration: 130978
loss: 0.9684520363807678,grad_norm: 0.9998813959981288, iteration: 130979
loss: 0.9610884189605713,grad_norm: 0.9999992545464009, iteration: 130980
loss: 0.9946768879890442,grad_norm: 0.9683670777196532, iteration: 130981
loss: 0.9941926598548889,grad_norm: 0.9999991929116114, iteration: 130982
loss: 0.9606879949569702,grad_norm: 0.9999991427107543, iteration: 130983
loss: 1.0301551818847656,grad_norm: 0.9999993509882532, iteration: 130984
loss: 0.990044116973877,grad_norm: 0.9999990417007918, iteration: 130985
loss: 0.9783507585525513,grad_norm: 0.9999989799033356, iteration: 130986
loss: 1.0392218828201294,grad_norm: 0.9587817742468581, iteration: 130987
loss: 1.0149407386779785,grad_norm: 0.9999991950264971, iteration: 130988
loss: 1.0070459842681885,grad_norm: 0.9999992922458012, iteration: 130989
loss: 1.012557864189148,grad_norm: 0.9999990831950103, iteration: 130990
loss: 0.9783518314361572,grad_norm: 0.9999992146384415, iteration: 130991
loss: 0.9757853150367737,grad_norm: 0.9999992210131149, iteration: 130992
loss: 0.9598615765571594,grad_norm: 0.9604581407204776, iteration: 130993
loss: 1.0364938974380493,grad_norm: 0.959885409680658, iteration: 130994
loss: 1.0029300451278687,grad_norm: 0.9313606303970238, iteration: 130995
loss: 0.9976359009742737,grad_norm: 0.9999992244753665, iteration: 130996
loss: 0.9807101488113403,grad_norm: 0.931337343944097, iteration: 130997
loss: 0.9760228991508484,grad_norm: 0.9999990880152606, iteration: 130998
loss: 1.00260329246521,grad_norm: 0.9999991188091305, iteration: 130999
loss: 1.0200809240341187,grad_norm: 0.999999048450586, iteration: 131000
loss: 0.9573173522949219,grad_norm: 0.9959245629806074, iteration: 131001
loss: 0.9911972880363464,grad_norm: 0.99999909079931, iteration: 131002
loss: 0.9841152429580688,grad_norm: 0.9999991464078526, iteration: 131003
loss: 0.9772342443466187,grad_norm: 0.9455396726185509, iteration: 131004
loss: 1.004744052886963,grad_norm: 0.9458054512972313, iteration: 131005
loss: 1.017738938331604,grad_norm: 0.916407937356533, iteration: 131006
loss: 0.9708777666091919,grad_norm: 0.8560525960942471, iteration: 131007
loss: 0.9774325489997864,grad_norm: 0.9999992773907391, iteration: 131008
loss: 0.9944924712181091,grad_norm: 0.9999991546048324, iteration: 131009
loss: 1.00484037399292,grad_norm: 0.9056200203726136, iteration: 131010
loss: 0.9969189763069153,grad_norm: 0.9334457604996135, iteration: 131011
loss: 0.9789958596229553,grad_norm: 0.9999990715660271, iteration: 131012
loss: 0.9944697618484497,grad_norm: 0.9999994740949495, iteration: 131013
loss: 1.0098251104354858,grad_norm: 0.9999990923855792, iteration: 131014
loss: 0.9857473373413086,grad_norm: 0.9999994679634511, iteration: 131015
loss: 1.0130270719528198,grad_norm: 0.9999991354300767, iteration: 131016
loss: 0.9796198606491089,grad_norm: 0.9999990725807494, iteration: 131017
loss: 0.9974493980407715,grad_norm: 0.9358452015653056, iteration: 131018
loss: 0.9835704565048218,grad_norm: 0.9999991592227635, iteration: 131019
loss: 0.983525812625885,grad_norm: 0.9537788909371164, iteration: 131020
loss: 1.002373218536377,grad_norm: 0.982639371963249, iteration: 131021
loss: 1.0089739561080933,grad_norm: 0.999999288591413, iteration: 131022
loss: 1.0209589004516602,grad_norm: 0.8988779566296362, iteration: 131023
loss: 1.0006626844406128,grad_norm: 0.9530810256890634, iteration: 131024
loss: 0.9949500560760498,grad_norm: 0.8715730619933909, iteration: 131025
loss: 1.0210165977478027,grad_norm: 0.9333611386435146, iteration: 131026
loss: 1.0094046592712402,grad_norm: 0.9999996723088569, iteration: 131027
loss: 0.9993773698806763,grad_norm: 0.9999991696985796, iteration: 131028
loss: 1.0220037698745728,grad_norm: 0.9999991352721374, iteration: 131029
loss: 1.0428539514541626,grad_norm: 0.9999992213549099, iteration: 131030
loss: 0.9847951531410217,grad_norm: 0.9999990636522036, iteration: 131031
loss: 1.0339559316635132,grad_norm: 0.890106141898923, iteration: 131032
loss: 1.0672495365142822,grad_norm: 0.999999647823227, iteration: 131033
loss: 0.9937940835952759,grad_norm: 0.8521848943944478, iteration: 131034
loss: 0.9841367602348328,grad_norm: 0.9999992517268069, iteration: 131035
loss: 0.9973387718200684,grad_norm: 0.9999993275135257, iteration: 131036
loss: 0.9752090573310852,grad_norm: 0.8568598720788763, iteration: 131037
loss: 1.0099623203277588,grad_norm: 0.8610505152894968, iteration: 131038
loss: 1.0105806589126587,grad_norm: 0.9999990757035901, iteration: 131039
loss: 0.9893400073051453,grad_norm: 0.9999992137020044, iteration: 131040
loss: 1.001163363456726,grad_norm: 0.9151575803020494, iteration: 131041
loss: 0.9787425994873047,grad_norm: 0.9999990829385905, iteration: 131042
loss: 1.0109031200408936,grad_norm: 0.9830057550644077, iteration: 131043
loss: 1.0231289863586426,grad_norm: 0.9413344735760536, iteration: 131044
loss: 1.051779866218567,grad_norm: 0.9999991995974795, iteration: 131045
loss: 0.9858073592185974,grad_norm: 0.950816562124323, iteration: 131046
loss: 1.0042093992233276,grad_norm: 0.9999992267103749, iteration: 131047
loss: 0.9844534993171692,grad_norm: 0.9999990842246814, iteration: 131048
loss: 1.0320191383361816,grad_norm: 0.9999991596509605, iteration: 131049
loss: 0.9850159287452698,grad_norm: 0.991964119856551, iteration: 131050
loss: 1.0193240642547607,grad_norm: 0.9512098114321755, iteration: 131051
loss: 1.017561435699463,grad_norm: 0.9999991038777317, iteration: 131052
loss: 1.0231566429138184,grad_norm: 0.999999114220684, iteration: 131053
loss: 1.0049278736114502,grad_norm: 0.9473596199254308, iteration: 131054
loss: 1.0279908180236816,grad_norm: 0.861839001858348, iteration: 131055
loss: 0.9964800477027893,grad_norm: 0.99999909250514, iteration: 131056
loss: 1.023244857788086,grad_norm: 0.9999990954839739, iteration: 131057
loss: 1.0233136415481567,grad_norm: 0.999999103383711, iteration: 131058
loss: 0.9685692191123962,grad_norm: 0.9361001220473577, iteration: 131059
loss: 0.9893221855163574,grad_norm: 0.9999994031442128, iteration: 131060
loss: 0.9963153600692749,grad_norm: 0.9013573929186779, iteration: 131061
loss: 1.040256142616272,grad_norm: 0.9999992437525326, iteration: 131062
loss: 1.0209813117980957,grad_norm: 0.9999993176878164, iteration: 131063
loss: 1.0006873607635498,grad_norm: 0.9964267565465165, iteration: 131064
loss: 0.9635658860206604,grad_norm: 0.9299042479029969, iteration: 131065
loss: 1.004468560218811,grad_norm: 0.9429332590319772, iteration: 131066
loss: 1.0595543384552002,grad_norm: 0.9999993086326271, iteration: 131067
loss: 1.0188957452774048,grad_norm: 0.8908069315358936, iteration: 131068
loss: 1.0159509181976318,grad_norm: 0.9999990041644551, iteration: 131069
loss: 1.011825680732727,grad_norm: 0.8631205558388262, iteration: 131070
loss: 0.985255241394043,grad_norm: 0.9146460505333001, iteration: 131071
loss: 0.9735705256462097,grad_norm: 0.999999256156539, iteration: 131072
loss: 0.9665027856826782,grad_norm: 0.9715252061143608, iteration: 131073
loss: 1.0067253112792969,grad_norm: 0.9982182777306075, iteration: 131074
loss: 1.0035048723220825,grad_norm: 0.926094759282439, iteration: 131075
loss: 1.0502970218658447,grad_norm: 0.999999250061636, iteration: 131076
loss: 1.000992774963379,grad_norm: 0.9999991878500353, iteration: 131077
loss: 1.0049713850021362,grad_norm: 0.9999990959962272, iteration: 131078
loss: 1.014967441558838,grad_norm: 0.9999991896914695, iteration: 131079
loss: 0.9751467704772949,grad_norm: 0.9999990036909742, iteration: 131080
loss: 1.019744634628296,grad_norm: 0.9999991579324173, iteration: 131081
loss: 0.9996856451034546,grad_norm: 0.9999991787659172, iteration: 131082
loss: 1.0104495286941528,grad_norm: 0.9999995141242551, iteration: 131083
loss: 1.0063384771347046,grad_norm: 0.9999998121898156, iteration: 131084
loss: 1.006624460220337,grad_norm: 0.9999991201887468, iteration: 131085
loss: 1.0537973642349243,grad_norm: 0.9999995813410885, iteration: 131086
loss: 1.0034065246582031,grad_norm: 0.9999992393393883, iteration: 131087
loss: 1.0312665700912476,grad_norm: 0.999999503831117, iteration: 131088
loss: 0.9695568084716797,grad_norm: 0.9999991307963638, iteration: 131089
loss: 0.9904888868331909,grad_norm: 0.9999991177289229, iteration: 131090
loss: 0.9724721312522888,grad_norm: 0.9999993132871576, iteration: 131091
loss: 1.000968337059021,grad_norm: 0.9465488239193265, iteration: 131092
loss: 1.0070031881332397,grad_norm: 0.999999222486671, iteration: 131093
loss: 1.0681854486465454,grad_norm: 0.9917055753008143, iteration: 131094
loss: 1.0389913320541382,grad_norm: 0.9608298762488561, iteration: 131095
loss: 1.0057541131973267,grad_norm: 0.9092443549228967, iteration: 131096
loss: 0.9993651509284973,grad_norm: 0.9999991951029142, iteration: 131097
loss: 1.0266227722167969,grad_norm: 0.8758644395693114, iteration: 131098
loss: 1.012769103050232,grad_norm: 0.971455634120696, iteration: 131099
loss: 1.016196370124817,grad_norm: 0.999999021117509, iteration: 131100
loss: 1.0165338516235352,grad_norm: 0.999999321232537, iteration: 131101
loss: 1.0648785829544067,grad_norm: 0.9718595210436718, iteration: 131102
loss: 1.0337175130844116,grad_norm: 0.8674152642165186, iteration: 131103
loss: 0.9975511431694031,grad_norm: 0.9999996315591837, iteration: 131104
loss: 1.0283474922180176,grad_norm: 0.9487192844018931, iteration: 131105
loss: 1.0156816244125366,grad_norm: 0.979979101039647, iteration: 131106
loss: 1.0245593786239624,grad_norm: 0.999999170001559, iteration: 131107
loss: 1.017571210861206,grad_norm: 0.9999990120217559, iteration: 131108
loss: 0.9699158668518066,grad_norm: 0.9999991748867947, iteration: 131109
loss: 1.0649975538253784,grad_norm: 0.903620237266716, iteration: 131110
loss: 0.9928833246231079,grad_norm: 0.9999991339750711, iteration: 131111
loss: 0.9817102551460266,grad_norm: 0.9999990269282867, iteration: 131112
loss: 1.0397889614105225,grad_norm: 0.9999991790937608, iteration: 131113
loss: 0.9814598560333252,grad_norm: 0.957258197701155, iteration: 131114
loss: 0.9927799105644226,grad_norm: 0.9217933845846525, iteration: 131115
loss: 0.9918172955513,grad_norm: 0.9999989788114811, iteration: 131116
loss: 1.018390417098999,grad_norm: 0.9999993819174836, iteration: 131117
loss: 0.9702785015106201,grad_norm: 0.955164290588138, iteration: 131118
loss: 0.9842552542686462,grad_norm: 0.9001157167989491, iteration: 131119
loss: 1.083808183670044,grad_norm: 0.9999997637324928, iteration: 131120
loss: 1.0779460668563843,grad_norm: 0.9999992846055892, iteration: 131121
loss: 0.9879831075668335,grad_norm: 0.9675565724923726, iteration: 131122
loss: 1.0280905961990356,grad_norm: 0.8990841165192791, iteration: 131123
loss: 1.0146512985229492,grad_norm: 0.9999990504780528, iteration: 131124
loss: 1.0081934928894043,grad_norm: 0.9438659931581549, iteration: 131125
loss: 0.976675271987915,grad_norm: 0.9999991072406063, iteration: 131126
loss: 0.9987989068031311,grad_norm: 0.817194712296894, iteration: 131127
loss: 1.003915548324585,grad_norm: 0.9999991394834347, iteration: 131128
loss: 0.9881106019020081,grad_norm: 0.9999992216099269, iteration: 131129
loss: 0.9664730429649353,grad_norm: 0.9999989962180673, iteration: 131130
loss: 0.9920513033866882,grad_norm: 0.9144512923244058, iteration: 131131
loss: 0.9805909991264343,grad_norm: 0.9999991914254315, iteration: 131132
loss: 0.9587978720664978,grad_norm: 0.9999990907912641, iteration: 131133
loss: 1.0017006397247314,grad_norm: 0.9423099314415377, iteration: 131134
loss: 0.9843813180923462,grad_norm: 0.8523900193569381, iteration: 131135
loss: 1.0765841007232666,grad_norm: 0.9999991251532562, iteration: 131136
loss: 1.0173208713531494,grad_norm: 0.9900799920930808, iteration: 131137
loss: 0.9758247137069702,grad_norm: 0.9543781744648765, iteration: 131138
loss: 1.0074511766433716,grad_norm: 0.9999995254515422, iteration: 131139
loss: 1.0167149305343628,grad_norm: 0.9999992431735067, iteration: 131140
loss: 1.0199015140533447,grad_norm: 0.9999993648014143, iteration: 131141
loss: 0.9982787370681763,grad_norm: 0.9714820499403369, iteration: 131142
loss: 1.0151917934417725,grad_norm: 0.9999990303058262, iteration: 131143
loss: 0.9806105494499207,grad_norm: 0.9062920168902704, iteration: 131144
loss: 0.9645569920539856,grad_norm: 0.9086128409744285, iteration: 131145
loss: 0.9897537231445312,grad_norm: 0.8989058145835293, iteration: 131146
loss: 0.9688311815261841,grad_norm: 0.9999992098522527, iteration: 131147
loss: 1.0974757671356201,grad_norm: 0.9999998361938907, iteration: 131148
loss: 1.0007787942886353,grad_norm: 0.9619197672511266, iteration: 131149
loss: 1.0103724002838135,grad_norm: 0.9999991506090538, iteration: 131150
loss: 1.007149577140808,grad_norm: 0.999999178109094, iteration: 131151
loss: 1.024204134941101,grad_norm: 0.9999990727020738, iteration: 131152
loss: 0.9636437296867371,grad_norm: 0.8560403889051824, iteration: 131153
loss: 1.009995460510254,grad_norm: 0.8547208419984971, iteration: 131154
loss: 0.9843693375587463,grad_norm: 0.9649509475392893, iteration: 131155
loss: 1.0186494588851929,grad_norm: 0.9999989125537655, iteration: 131156
loss: 0.997697651386261,grad_norm: 0.9759709489105342, iteration: 131157
loss: 1.0129668712615967,grad_norm: 0.999999215499378, iteration: 131158
loss: 0.9733113646507263,grad_norm: 0.9999991235741538, iteration: 131159
loss: 1.0101903676986694,grad_norm: 0.9421559810889022, iteration: 131160
loss: 1.00873601436615,grad_norm: 0.9999989757176442, iteration: 131161
loss: 0.9877549409866333,grad_norm: 0.9999997841866533, iteration: 131162
loss: 1.051330804824829,grad_norm: 0.9999996718978326, iteration: 131163
loss: 0.9892552495002747,grad_norm: 0.9999991117409835, iteration: 131164
loss: 1.020039439201355,grad_norm: 0.9100394829901609, iteration: 131165
loss: 1.028902292251587,grad_norm: 0.9999993245887724, iteration: 131166
loss: 0.9795675873756409,grad_norm: 0.9999990755361634, iteration: 131167
loss: 1.0273048877716064,grad_norm: 0.9999991300864135, iteration: 131168
loss: 1.007821798324585,grad_norm: 0.9999992080527488, iteration: 131169
loss: 1.0728169679641724,grad_norm: 0.9999995188638464, iteration: 131170
loss: 0.983731210231781,grad_norm: 0.999998996144797, iteration: 131171
loss: 0.985794186592102,grad_norm: 0.9999993225912397, iteration: 131172
loss: 1.0187292098999023,grad_norm: 0.9999999013605693, iteration: 131173
loss: 0.9874675869941711,grad_norm: 0.9999990013679857, iteration: 131174
loss: 1.0268296003341675,grad_norm: 0.999999463759919, iteration: 131175
loss: 1.001039743423462,grad_norm: 0.9999989759379588, iteration: 131176
loss: 0.9877055883407593,grad_norm: 0.9999991550422804, iteration: 131177
loss: 1.0204801559448242,grad_norm: 0.9999989956905393, iteration: 131178
loss: 1.0225383043289185,grad_norm: 0.9999989783624701, iteration: 131179
loss: 0.9973095059394836,grad_norm: 0.9871133836320816, iteration: 131180
loss: 1.1161925792694092,grad_norm: 0.9999994995285502, iteration: 131181
loss: 0.9750608801841736,grad_norm: 0.9999997528078033, iteration: 131182
loss: 0.9863624572753906,grad_norm: 0.8123526838333542, iteration: 131183
loss: 1.0000463724136353,grad_norm: 0.9999991268760803, iteration: 131184
loss: 1.0514116287231445,grad_norm: 0.9999990588174296, iteration: 131185
loss: 1.0095136165618896,grad_norm: 0.9999991621406348, iteration: 131186
loss: 1.019124150276184,grad_norm: 0.9513697879952272, iteration: 131187
loss: 1.010202407836914,grad_norm: 0.9999991099979235, iteration: 131188
loss: 1.0390475988388062,grad_norm: 0.9661696028875258, iteration: 131189
loss: 0.9970375895500183,grad_norm: 0.9999991010542921, iteration: 131190
loss: 0.9755706787109375,grad_norm: 0.9620523821247675, iteration: 131191
loss: 0.9973106384277344,grad_norm: 0.9999990129372036, iteration: 131192
loss: 1.022121787071228,grad_norm: 0.9999991415781578, iteration: 131193
loss: 0.9774054884910583,grad_norm: 0.9999990316568896, iteration: 131194
loss: 1.0347954034805298,grad_norm: 0.9999992184385417, iteration: 131195
loss: 0.9741382002830505,grad_norm: 0.9999991647766088, iteration: 131196
loss: 1.0034208297729492,grad_norm: 0.9999992106177791, iteration: 131197
loss: 0.9773959517478943,grad_norm: 0.9985488757169542, iteration: 131198
loss: 1.0185880661010742,grad_norm: 0.9999991694818161, iteration: 131199
loss: 1.0402055978775024,grad_norm: 0.9999992279896779, iteration: 131200
loss: 0.9856116771697998,grad_norm: 0.9999990932704315, iteration: 131201
loss: 1.0047866106033325,grad_norm: 0.9347011637396496, iteration: 131202
loss: 1.0022103786468506,grad_norm: 0.9999994265249365, iteration: 131203
loss: 0.9591032266616821,grad_norm: 0.9999991132289254, iteration: 131204
loss: 0.9990285038948059,grad_norm: 0.9232525950496084, iteration: 131205
loss: 1.0190600156784058,grad_norm: 0.999999139707966, iteration: 131206
loss: 0.9822157025337219,grad_norm: 0.8784652829300924, iteration: 131207
loss: 0.9871655106544495,grad_norm: 0.9999991713143211, iteration: 131208
loss: 1.0003618001937866,grad_norm: 0.9999990555890855, iteration: 131209
loss: 1.0058495998382568,grad_norm: 0.7923663680290496, iteration: 131210
loss: 1.0352343320846558,grad_norm: 0.9999992427875741, iteration: 131211
loss: 1.0135256052017212,grad_norm: 0.955561639519308, iteration: 131212
loss: 1.0567636489868164,grad_norm: 0.9999990237928243, iteration: 131213
loss: 1.0159111022949219,grad_norm: 0.9761770327866651, iteration: 131214
loss: 1.0131354331970215,grad_norm: 0.919978671627617, iteration: 131215
loss: 0.9888457655906677,grad_norm: 0.999998995765947, iteration: 131216
loss: 0.980882465839386,grad_norm: 0.9999991526145173, iteration: 131217
loss: 1.0066074132919312,grad_norm: 0.8270855397914125, iteration: 131218
loss: 1.0248794555664062,grad_norm: 0.9999990018331212, iteration: 131219
loss: 0.9979951977729797,grad_norm: 0.9418383582786292, iteration: 131220
loss: 1.0203465223312378,grad_norm: 0.999999055243163, iteration: 131221
loss: 0.996324896812439,grad_norm: 0.9999992029658171, iteration: 131222
loss: 1.0316256284713745,grad_norm: 0.9999999009581811, iteration: 131223
loss: 0.9965649247169495,grad_norm: 0.9656132059813878, iteration: 131224
loss: 0.9608955979347229,grad_norm: 0.9999990427299169, iteration: 131225
loss: 0.99461430311203,grad_norm: 0.9764337550195075, iteration: 131226
loss: 0.9984390139579773,grad_norm: 0.9999990343923014, iteration: 131227
loss: 1.0102578401565552,grad_norm: 0.9999990908346982, iteration: 131228
loss: 0.987282931804657,grad_norm: 0.9083787399396827, iteration: 131229
loss: 0.9782732129096985,grad_norm: 0.9651026815350907, iteration: 131230
loss: 1.0219275951385498,grad_norm: 0.9999994831772885, iteration: 131231
loss: 0.9933335781097412,grad_norm: 0.9999990536056481, iteration: 131232
loss: 0.9809951782226562,grad_norm: 0.9071950919533783, iteration: 131233
loss: 0.9611583352088928,grad_norm: 0.9999990416686294, iteration: 131234
loss: 1.010998249053955,grad_norm: 0.8520563110941275, iteration: 131235
loss: 1.044952630996704,grad_norm: 0.9999994736233948, iteration: 131236
loss: 1.0477745532989502,grad_norm: 0.9999995306890257, iteration: 131237
loss: 0.949845552444458,grad_norm: 0.9999990076178589, iteration: 131238
loss: 1.0133453607559204,grad_norm: 0.9999990772176344, iteration: 131239
loss: 0.9924754500389099,grad_norm: 0.9999991696372508, iteration: 131240
loss: 0.9997962713241577,grad_norm: 0.9319039882391, iteration: 131241
loss: 1.0261684656143188,grad_norm: 0.9999990928539118, iteration: 131242
loss: 0.9999005198478699,grad_norm: 0.9610475897074219, iteration: 131243
loss: 0.9911685585975647,grad_norm: 0.9015760267168259, iteration: 131244
loss: 0.9914348125457764,grad_norm: 0.9999991777318263, iteration: 131245
loss: 0.9906530976295471,grad_norm: 0.9999990549046331, iteration: 131246
loss: 0.9762430787086487,grad_norm: 0.9999991182486992, iteration: 131247
loss: 1.0254839658737183,grad_norm: 0.9281136253003236, iteration: 131248
loss: 0.9662211537361145,grad_norm: 0.9999990256843524, iteration: 131249
loss: 1.008703351020813,grad_norm: 0.9050245180226132, iteration: 131250
loss: 0.9897852540016174,grad_norm: 0.9296896971302204, iteration: 131251
loss: 1.1036475896835327,grad_norm: 0.9999993409243989, iteration: 131252
loss: 0.9477182626724243,grad_norm: 0.9999989787637369, iteration: 131253
loss: 0.9988847374916077,grad_norm: 0.9999990764683919, iteration: 131254
loss: 1.010670781135559,grad_norm: 0.9999991544977564, iteration: 131255
loss: 1.0013285875320435,grad_norm: 0.9999991028500523, iteration: 131256
loss: 1.0146279335021973,grad_norm: 0.9999991734928774, iteration: 131257
loss: 0.9961293935775757,grad_norm: 0.9999990939488681, iteration: 131258
loss: 1.0120636224746704,grad_norm: 0.9951141159849901, iteration: 131259
loss: 0.9908086061477661,grad_norm: 0.9999990673339139, iteration: 131260
loss: 0.9743507504463196,grad_norm: 0.8557112019390134, iteration: 131261
loss: 1.021936058998108,grad_norm: 0.9999990460012467, iteration: 131262
loss: 1.1462644338607788,grad_norm: 0.9999991872205802, iteration: 131263
loss: 0.9860356450080872,grad_norm: 0.9999994568022313, iteration: 131264
loss: 1.0334389209747314,grad_norm: 0.9999992285286647, iteration: 131265
loss: 0.9878891706466675,grad_norm: 0.8963029954687983, iteration: 131266
loss: 1.0810167789459229,grad_norm: 0.948674540427434, iteration: 131267
loss: 1.0290827751159668,grad_norm: 0.9999997160762262, iteration: 131268
loss: 0.988339900970459,grad_norm: 0.8865297756549256, iteration: 131269
loss: 0.9918681979179382,grad_norm: 0.9999990326673726, iteration: 131270
loss: 1.0145857334136963,grad_norm: 0.9527275300396681, iteration: 131271
loss: 0.9869775772094727,grad_norm: 0.9934561892179052, iteration: 131272
loss: 1.0091791152954102,grad_norm: 0.9242046598110504, iteration: 131273
loss: 0.9617714285850525,grad_norm: 0.9485925118787952, iteration: 131274
loss: 1.0089205503463745,grad_norm: 0.9763050833882332, iteration: 131275
loss: 0.9848982095718384,grad_norm: 0.9999992336876586, iteration: 131276
loss: 0.9960321187973022,grad_norm: 0.9586159094255959, iteration: 131277
loss: 1.0224171876907349,grad_norm: 0.8336863741412128, iteration: 131278
loss: 0.9727828502655029,grad_norm: 0.9999994868386021, iteration: 131279
loss: 1.0937446355819702,grad_norm: 0.9999998535041947, iteration: 131280
loss: 1.0177452564239502,grad_norm: 0.9491172563420345, iteration: 131281
loss: 0.9395126104354858,grad_norm: 0.9999991503266294, iteration: 131282
loss: 1.0060054063796997,grad_norm: 0.8841634960984122, iteration: 131283
loss: 1.0100144147872925,grad_norm: 0.9999990133559864, iteration: 131284
loss: 0.9850568175315857,grad_norm: 0.9010279285566111, iteration: 131285
loss: 1.027889370918274,grad_norm: 0.9999998571344124, iteration: 131286
loss: 0.9562825560569763,grad_norm: 0.999999164468607, iteration: 131287
loss: 1.004415512084961,grad_norm: 0.8551124542505469, iteration: 131288
loss: 1.0235264301300049,grad_norm: 0.9999992274487371, iteration: 131289
loss: 1.0036035776138306,grad_norm: 0.9809753447434417, iteration: 131290
loss: 1.011291742324829,grad_norm: 0.9999989891427269, iteration: 131291
loss: 1.0102797746658325,grad_norm: 0.9999991666902835, iteration: 131292
loss: 0.9855830073356628,grad_norm: 0.999999150901586, iteration: 131293
loss: 1.0100922584533691,grad_norm: 0.9728463736277102, iteration: 131294
loss: 1.0025547742843628,grad_norm: 0.9224903624719455, iteration: 131295
loss: 0.9977449774742126,grad_norm: 0.9080219059081903, iteration: 131296
loss: 1.0041625499725342,grad_norm: 0.9999992327554535, iteration: 131297
loss: 0.9992145299911499,grad_norm: 0.9999990712870582, iteration: 131298
loss: 0.9757817983627319,grad_norm: 0.9999990946764749, iteration: 131299
loss: 0.9753638505935669,grad_norm: 0.9485410953719821, iteration: 131300
loss: 0.9864278435707092,grad_norm: 0.8678207905869727, iteration: 131301
loss: 0.9786787033081055,grad_norm: 0.9999991390886673, iteration: 131302
loss: 1.0012195110321045,grad_norm: 0.9999995349570254, iteration: 131303
loss: 1.0502721071243286,grad_norm: 0.9999993744510922, iteration: 131304
loss: 1.037030577659607,grad_norm: 0.9999995075804933, iteration: 131305
loss: 1.0042105913162231,grad_norm: 0.9999992131982376, iteration: 131306
loss: 1.012392282485962,grad_norm: 0.9999991760471049, iteration: 131307
loss: 0.9792389273643494,grad_norm: 0.862915393015092, iteration: 131308
loss: 1.0221130847930908,grad_norm: 0.9404119009469715, iteration: 131309
loss: 1.0175338983535767,grad_norm: 0.999998983270729, iteration: 131310
loss: 0.9955965876579285,grad_norm: 0.9999990713851254, iteration: 131311
loss: 1.0166133642196655,grad_norm: 0.9999991294969867, iteration: 131312
loss: 1.0000864267349243,grad_norm: 0.995299135744958, iteration: 131313
loss: 0.9880819916725159,grad_norm: 0.8695541656724749, iteration: 131314
loss: 1.0338128805160522,grad_norm: 0.9770565903147153, iteration: 131315
loss: 1.0162771940231323,grad_norm: 0.9123064764016794, iteration: 131316
loss: 1.0299227237701416,grad_norm: 0.9999991591563762, iteration: 131317
loss: 0.975464940071106,grad_norm: 0.849108000547302, iteration: 131318
loss: 1.0221213102340698,grad_norm: 0.9999992147162688, iteration: 131319
loss: 1.0158867835998535,grad_norm: 0.8901090057782065, iteration: 131320
loss: 0.980607271194458,grad_norm: 0.9288594665353499, iteration: 131321
loss: 1.0072345733642578,grad_norm: 0.9457971929828453, iteration: 131322
loss: 1.0026417970657349,grad_norm: 0.9211804455165475, iteration: 131323
loss: 1.0359416007995605,grad_norm: 0.9496572632012341, iteration: 131324
loss: 0.9976850748062134,grad_norm: 0.9999991661845617, iteration: 131325
loss: 1.0096219778060913,grad_norm: 0.9999991916458472, iteration: 131326
loss: 1.053627371788025,grad_norm: 0.9999991162292754, iteration: 131327
loss: 1.0235520601272583,grad_norm: 0.9999990355808042, iteration: 131328
loss: 1.0012415647506714,grad_norm: 0.9999991507894919, iteration: 131329
loss: 1.016963243484497,grad_norm: 0.9999991457038132, iteration: 131330
loss: 1.0063390731811523,grad_norm: 0.8988963560623383, iteration: 131331
loss: 1.0240898132324219,grad_norm: 0.9999990883086769, iteration: 131332
loss: 0.9758351445198059,grad_norm: 0.9999990653764664, iteration: 131333
loss: 1.0106823444366455,grad_norm: 0.9999991847983958, iteration: 131334
loss: 0.951132595539093,grad_norm: 0.9374575142926804, iteration: 131335
loss: 1.0130783319473267,grad_norm: 0.8183920903125093, iteration: 131336
loss: 0.9990131258964539,grad_norm: 0.9315149452983635, iteration: 131337
loss: 0.9889254570007324,grad_norm: 0.9999991369256879, iteration: 131338
loss: 0.9706517457962036,grad_norm: 0.9999990742684907, iteration: 131339
loss: 1.002288579940796,grad_norm: 0.9619517326523587, iteration: 131340
loss: 1.0096368789672852,grad_norm: 0.9711362393329834, iteration: 131341
loss: 0.991129457950592,grad_norm: 0.9590074315528101, iteration: 131342
loss: 0.9670207500457764,grad_norm: 0.9389117805471097, iteration: 131343
loss: 1.0350103378295898,grad_norm: 0.9999990637297751, iteration: 131344
loss: 0.9885094165802002,grad_norm: 0.9999991385654803, iteration: 131345
loss: 1.0336414575576782,grad_norm: 0.9999991727156273, iteration: 131346
loss: 1.0001698732376099,grad_norm: 0.9999991837859943, iteration: 131347
loss: 0.998141884803772,grad_norm: 0.9093845266311357, iteration: 131348
loss: 0.9800839424133301,grad_norm: 0.9999992005703154, iteration: 131349
loss: 1.0044342279434204,grad_norm: 0.9999991751318671, iteration: 131350
loss: 0.9739037156105042,grad_norm: 0.9999990700703396, iteration: 131351
loss: 0.975166916847229,grad_norm: 0.9999992435289858, iteration: 131352
loss: 0.9776586294174194,grad_norm: 0.999998942504509, iteration: 131353
loss: 0.9972257614135742,grad_norm: 0.975446221245131, iteration: 131354
loss: 0.971072793006897,grad_norm: 0.9999991109015659, iteration: 131355
loss: 1.0213890075683594,grad_norm: 0.9748548188769661, iteration: 131356
loss: 0.9814560413360596,grad_norm: 0.9999991539407831, iteration: 131357
loss: 1.0201337337493896,grad_norm: 0.9999989532842793, iteration: 131358
loss: 1.0044944286346436,grad_norm: 0.8500354717769456, iteration: 131359
loss: 1.014233946800232,grad_norm: 0.9999993094807073, iteration: 131360
loss: 0.9795097708702087,grad_norm: 0.9999992597241631, iteration: 131361
loss: 1.074779987335205,grad_norm: 0.9999991532020689, iteration: 131362
loss: 1.020463466644287,grad_norm: 0.9999993007970743, iteration: 131363
loss: 0.9949672818183899,grad_norm: 0.9977602385269249, iteration: 131364
loss: 1.0175801515579224,grad_norm: 0.9375254892442852, iteration: 131365
loss: 1.034509301185608,grad_norm: 0.9999990716627741, iteration: 131366
loss: 0.9985980987548828,grad_norm: 0.9999993297846105, iteration: 131367
loss: 1.034464955329895,grad_norm: 0.9999990547662507, iteration: 131368
loss: 1.0400723218917847,grad_norm: 0.9999992805933415, iteration: 131369
loss: 0.9902341961860657,grad_norm: 0.9471707663789315, iteration: 131370
loss: 0.9825397729873657,grad_norm: 0.9999991667935215, iteration: 131371
loss: 0.9545811414718628,grad_norm: 0.9999991377971789, iteration: 131372
loss: 1.0253421068191528,grad_norm: 0.9999991750831437, iteration: 131373
loss: 1.1344842910766602,grad_norm: 0.9999997669504969, iteration: 131374
loss: 0.9737405776977539,grad_norm: 0.9293158569795833, iteration: 131375
loss: 0.979342520236969,grad_norm: 0.9999991343742197, iteration: 131376
loss: 0.9640166759490967,grad_norm: 0.9999991148810624, iteration: 131377
loss: 0.9893317222595215,grad_norm: 0.9951375220435297, iteration: 131378
loss: 1.0188714265823364,grad_norm: 0.9999990927784957, iteration: 131379
loss: 0.9876630306243896,grad_norm: 0.8499268450813304, iteration: 131380
loss: 0.9986770749092102,grad_norm: 0.954390799593445, iteration: 131381
loss: 0.986764132976532,grad_norm: 0.9999990905453345, iteration: 131382
loss: 0.9702343940734863,grad_norm: 0.9256067631221696, iteration: 131383
loss: 1.0135585069656372,grad_norm: 0.9999989458951968, iteration: 131384
loss: 1.0146023035049438,grad_norm: 0.9617432251467688, iteration: 131385
loss: 1.0272643566131592,grad_norm: 0.9999990927927629, iteration: 131386
loss: 1.035815715789795,grad_norm: 0.9999993299361317, iteration: 131387
loss: 0.9749985337257385,grad_norm: 0.9999990745541691, iteration: 131388
loss: 0.9932392835617065,grad_norm: 0.9999991037262782, iteration: 131389
loss: 0.9976032972335815,grad_norm: 0.9999999216586452, iteration: 131390
loss: 1.0044835805892944,grad_norm: 0.9999991550418719, iteration: 131391
loss: 0.9950335621833801,grad_norm: 0.9999990417050836, iteration: 131392
loss: 1.0311952829360962,grad_norm: 0.9999991989421343, iteration: 131393
loss: 1.018629789352417,grad_norm: 0.9999993190446697, iteration: 131394
loss: 0.9702917337417603,grad_norm: 0.9999992067631356, iteration: 131395
loss: 1.0291138887405396,grad_norm: 0.9999998112795596, iteration: 131396
loss: 1.0049500465393066,grad_norm: 0.9999992521709583, iteration: 131397
loss: 1.0015989542007446,grad_norm: 0.9555197774289786, iteration: 131398
loss: 0.9973484873771667,grad_norm: 0.9999990925728551, iteration: 131399
loss: 1.0225635766983032,grad_norm: 0.9999992133477958, iteration: 131400
loss: 1.0114628076553345,grad_norm: 0.9724141043862107, iteration: 131401
loss: 1.0289967060089111,grad_norm: 0.9999991670408155, iteration: 131402
loss: 0.9995179176330566,grad_norm: 0.9999990103466783, iteration: 131403
loss: 0.9678666591644287,grad_norm: 0.9999991360618478, iteration: 131404
loss: 1.0315849781036377,grad_norm: 0.9999996690357094, iteration: 131405
loss: 0.9870920777320862,grad_norm: 0.999999113607245, iteration: 131406
loss: 1.0039504766464233,grad_norm: 0.9999991748367012, iteration: 131407
loss: 1.0048110485076904,grad_norm: 0.9999990796835253, iteration: 131408
loss: 1.0787864923477173,grad_norm: 0.9999998718773462, iteration: 131409
loss: 0.9661197066307068,grad_norm: 0.9770165959488909, iteration: 131410
loss: 1.0071539878845215,grad_norm: 0.9999992320639761, iteration: 131411
loss: 1.046098232269287,grad_norm: 0.9999991835366746, iteration: 131412
loss: 1.013545274734497,grad_norm: 0.9999989798799223, iteration: 131413
loss: 0.9637281894683838,grad_norm: 0.9176514496382459, iteration: 131414
loss: 0.9931540489196777,grad_norm: 0.9999996584482078, iteration: 131415
loss: 1.0562570095062256,grad_norm: 0.9999992804734578, iteration: 131416
loss: 1.0268346071243286,grad_norm: 0.9999991337883918, iteration: 131417
loss: 1.010965347290039,grad_norm: 0.9675188203994655, iteration: 131418
loss: 1.0023770332336426,grad_norm: 0.8774587832895194, iteration: 131419
loss: 1.0020359754562378,grad_norm: 0.9999991303928403, iteration: 131420
loss: 0.98004150390625,grad_norm: 0.9746554340287723, iteration: 131421
loss: 1.1115589141845703,grad_norm: 0.999999630989684, iteration: 131422
loss: 1.0258294343948364,grad_norm: 0.9999991382019034, iteration: 131423
loss: 1.0042973756790161,grad_norm: 0.9999989987357345, iteration: 131424
loss: 0.9994068741798401,grad_norm: 0.9999997801238529, iteration: 131425
loss: 1.0144184827804565,grad_norm: 0.9999991149885421, iteration: 131426
loss: 1.0446600914001465,grad_norm: 0.9492697734451992, iteration: 131427
loss: 1.0129047632217407,grad_norm: 0.9999991029757253, iteration: 131428
loss: 0.9901981353759766,grad_norm: 0.8334110453061766, iteration: 131429
loss: 1.0165168046951294,grad_norm: 0.9330542982730801, iteration: 131430
loss: 1.0002045631408691,grad_norm: 0.9999991781178177, iteration: 131431
loss: 0.9840229153633118,grad_norm: 0.9529813891077424, iteration: 131432
loss: 1.0478094816207886,grad_norm: 0.9999990988725068, iteration: 131433
loss: 1.0008149147033691,grad_norm: 0.9146656186209945, iteration: 131434
loss: 0.9668019413948059,grad_norm: 0.9999991533449673, iteration: 131435
loss: 1.007934808731079,grad_norm: 0.9999990184650843, iteration: 131436
loss: 0.995783269405365,grad_norm: 0.9999990418554682, iteration: 131437
loss: 1.0064936876296997,grad_norm: 0.9999991215166136, iteration: 131438
loss: 0.9924736022949219,grad_norm: 0.9999991218079972, iteration: 131439
loss: 0.9931724071502686,grad_norm: 0.9999991137945762, iteration: 131440
loss: 1.014330267906189,grad_norm: 0.999999090606686, iteration: 131441
loss: 1.020301103591919,grad_norm: 0.999999184095629, iteration: 131442
loss: 1.0649086236953735,grad_norm: 0.9999993150271933, iteration: 131443
loss: 1.023959755897522,grad_norm: 0.9999991274727263, iteration: 131444
loss: 1.0151174068450928,grad_norm: 0.9999990992523743, iteration: 131445
loss: 1.0153404474258423,grad_norm: 0.9999992296231497, iteration: 131446
loss: 0.9855013489723206,grad_norm: 0.9809097864584284, iteration: 131447
loss: 0.9915730357170105,grad_norm: 0.8758751432495865, iteration: 131448
loss: 1.0304856300354004,grad_norm: 0.9999991343314129, iteration: 131449
loss: 1.006911039352417,grad_norm: 0.9992727636553154, iteration: 131450
loss: 1.0202282667160034,grad_norm: 0.999998991342805, iteration: 131451
loss: 1.0023677349090576,grad_norm: 0.9999990851678914, iteration: 131452
loss: 1.0510456562042236,grad_norm: 0.999999023380549, iteration: 131453
loss: 0.9984447956085205,grad_norm: 0.9982596384745579, iteration: 131454
loss: 1.002726674079895,grad_norm: 0.9416904928967954, iteration: 131455
loss: 0.9930748343467712,grad_norm: 0.9999993369603405, iteration: 131456
loss: 0.9752772450447083,grad_norm: 0.9999991715642425, iteration: 131457
loss: 1.0126473903656006,grad_norm: 0.9843824619526933, iteration: 131458
loss: 1.0075494050979614,grad_norm: 0.9999991100471198, iteration: 131459
loss: 0.9969696998596191,grad_norm: 0.9999990433660131, iteration: 131460
loss: 0.9958818554878235,grad_norm: 0.9999990133677588, iteration: 131461
loss: 1.0356649160385132,grad_norm: 0.9138988938110082, iteration: 131462
loss: 1.0040708780288696,grad_norm: 0.9999989758940874, iteration: 131463
loss: 1.0092493295669556,grad_norm: 0.9999990999010598, iteration: 131464
loss: 1.017762541770935,grad_norm: 0.999999103190617, iteration: 131465
loss: 0.9845837354660034,grad_norm: 0.9029736934040123, iteration: 131466
loss: 1.0000934600830078,grad_norm: 0.999999040227766, iteration: 131467
loss: 1.0020169019699097,grad_norm: 0.9999998223466037, iteration: 131468
loss: 1.005125641822815,grad_norm: 0.9008860145452111, iteration: 131469
loss: 1.0033267736434937,grad_norm: 0.9999990984706252, iteration: 131470
loss: 0.9871468544006348,grad_norm: 0.8914328917677976, iteration: 131471
loss: 1.0322054624557495,grad_norm: 0.9999993211133668, iteration: 131472
loss: 1.0055692195892334,grad_norm: 0.9999992168208276, iteration: 131473
loss: 1.0176405906677246,grad_norm: 0.9999989653594873, iteration: 131474
loss: 1.018505334854126,grad_norm: 0.9999990200062254, iteration: 131475
loss: 1.0183959007263184,grad_norm: 0.9999990284147107, iteration: 131476
loss: 0.9978660941123962,grad_norm: 0.9504726802366185, iteration: 131477
loss: 1.0073565244674683,grad_norm: 0.9561451575110325, iteration: 131478
loss: 1.001035451889038,grad_norm: 0.9507709384481685, iteration: 131479
loss: 1.0038522481918335,grad_norm: 0.96578760237512, iteration: 131480
loss: 1.0222705602645874,grad_norm: 0.99999912061706, iteration: 131481
loss: 1.008196234703064,grad_norm: 0.9999991365610894, iteration: 131482
loss: 0.999903678894043,grad_norm: 0.9999991319578453, iteration: 131483
loss: 0.9900718927383423,grad_norm: 0.9999991743590321, iteration: 131484
loss: 0.9978516101837158,grad_norm: 0.99999926831172, iteration: 131485
loss: 1.027794599533081,grad_norm: 0.999999120733721, iteration: 131486
loss: 1.03759765625,grad_norm: 0.8890333134541545, iteration: 131487
loss: 1.085744857788086,grad_norm: 0.9999996631972304, iteration: 131488
loss: 0.987905740737915,grad_norm: 0.9530015289945558, iteration: 131489
loss: 1.0322775840759277,grad_norm: 0.9905646732298942, iteration: 131490
loss: 1.0079914331436157,grad_norm: 0.9860561937521684, iteration: 131491
loss: 0.9518365263938904,grad_norm: 0.9999992241630835, iteration: 131492
loss: 1.0256543159484863,grad_norm: 0.821728070286006, iteration: 131493
loss: 1.0175930261611938,grad_norm: 0.9999991699950155, iteration: 131494
loss: 1.0232737064361572,grad_norm: 0.9555599354268147, iteration: 131495
loss: 1.0371421575546265,grad_norm: 0.9999991110302093, iteration: 131496
loss: 0.9891385436058044,grad_norm: 0.9592089112830835, iteration: 131497
loss: 0.9489814639091492,grad_norm: 0.9999991592401947, iteration: 131498
loss: 1.0049769878387451,grad_norm: 0.9999992281041215, iteration: 131499
loss: 1.0128519535064697,grad_norm: 0.999999080088679, iteration: 131500
loss: 1.0178519487380981,grad_norm: 0.9999991569721922, iteration: 131501
loss: 1.0167614221572876,grad_norm: 0.9999992090832808, iteration: 131502
loss: 1.0155302286148071,grad_norm: 0.9233528293701257, iteration: 131503
loss: 0.9857103228569031,grad_norm: 0.9278512057001428, iteration: 131504
loss: 0.9950212240219116,grad_norm: 0.9999989429832907, iteration: 131505
loss: 1.0144144296646118,grad_norm: 0.9999989389230562, iteration: 131506
loss: 0.9790008664131165,grad_norm: 0.999999713422388, iteration: 131507
loss: 1.0224956274032593,grad_norm: 0.9999991695205134, iteration: 131508
loss: 0.984731137752533,grad_norm: 0.9999991447737896, iteration: 131509
loss: 0.983228862285614,grad_norm: 0.8541398490769889, iteration: 131510
loss: 1.007872462272644,grad_norm: 0.9437511005982289, iteration: 131511
loss: 1.013688564300537,grad_norm: 0.999999283403029, iteration: 131512
loss: 0.9844592809677124,grad_norm: 0.999999135027047, iteration: 131513
loss: 1.0114552974700928,grad_norm: 0.9999989817132627, iteration: 131514
loss: 1.0436005592346191,grad_norm: 0.9999990817963766, iteration: 131515
loss: 1.0255934000015259,grad_norm: 0.9797984966521855, iteration: 131516
loss: 1.024337887763977,grad_norm: 0.9999990440292048, iteration: 131517
loss: 1.0031936168670654,grad_norm: 0.9481927079960679, iteration: 131518
loss: 0.9919450879096985,grad_norm: 0.9999992713946146, iteration: 131519
loss: 0.9773159027099609,grad_norm: 0.9999992688779101, iteration: 131520
loss: 1.023065209388733,grad_norm: 0.999999785120349, iteration: 131521
loss: 0.9589056968688965,grad_norm: 0.9999991124785698, iteration: 131522
loss: 1.0002878904342651,grad_norm: 0.9999991248540684, iteration: 131523
loss: 0.9880026578903198,grad_norm: 0.9999992667768126, iteration: 131524
loss: 0.9998234510421753,grad_norm: 0.9999990072638026, iteration: 131525
loss: 0.9788618683815002,grad_norm: 0.9999991846290354, iteration: 131526
loss: 0.9899567365646362,grad_norm: 0.939671230274609, iteration: 131527
loss: 0.9733240008354187,grad_norm: 0.99517563121426, iteration: 131528
loss: 1.005756139755249,grad_norm: 0.9999992044121078, iteration: 131529
loss: 0.9711142778396606,grad_norm: 0.999999111026682, iteration: 131530
loss: 1.0318022966384888,grad_norm: 0.999999137921988, iteration: 131531
loss: 1.0293493270874023,grad_norm: 0.999999237235476, iteration: 131532
loss: 0.978786051273346,grad_norm: 0.9167170145555064, iteration: 131533
loss: 0.9930763840675354,grad_norm: 0.9316168414992604, iteration: 131534
loss: 1.009427547454834,grad_norm: 0.9532856428575985, iteration: 131535
loss: 0.989082932472229,grad_norm: 0.9999990048706808, iteration: 131536
loss: 1.00113046169281,grad_norm: 0.9999992593066573, iteration: 131537
loss: 1.0322120189666748,grad_norm: 0.9999991702314216, iteration: 131538
loss: 0.997395396232605,grad_norm: 0.9999990028084923, iteration: 131539
loss: 0.9819154143333435,grad_norm: 0.8852786225954987, iteration: 131540
loss: 0.9932557940483093,grad_norm: 0.9999989888886365, iteration: 131541
loss: 1.0230069160461426,grad_norm: 0.999999163158002, iteration: 131542
loss: 0.991377055644989,grad_norm: 0.969979394858163, iteration: 131543
loss: 1.0307713747024536,grad_norm: 0.9968833027166052, iteration: 131544
loss: 1.0326807498931885,grad_norm: 0.9999990616739692, iteration: 131545
loss: 0.9987077713012695,grad_norm: 0.9999991567075098, iteration: 131546
loss: 1.0146293640136719,grad_norm: 0.8693223299709705, iteration: 131547
loss: 0.9764321446418762,grad_norm: 0.9999991852978736, iteration: 131548
loss: 0.9967837333679199,grad_norm: 0.964448952768486, iteration: 131549
loss: 0.9965903759002686,grad_norm: 0.9999991776475932, iteration: 131550
loss: 1.0124993324279785,grad_norm: 0.9999990725661498, iteration: 131551
loss: 0.9886137247085571,grad_norm: 0.9999991100218673, iteration: 131552
loss: 0.9722933173179626,grad_norm: 0.9999992271765877, iteration: 131553
loss: 1.0111509561538696,grad_norm: 0.9999990753459461, iteration: 131554
loss: 0.9713320732116699,grad_norm: 0.9999990288772705, iteration: 131555
loss: 1.0203757286071777,grad_norm: 0.8477533147262194, iteration: 131556
loss: 0.9884620904922485,grad_norm: 0.9999991849781732, iteration: 131557
loss: 1.0015149116516113,grad_norm: 0.8439399359697379, iteration: 131558
loss: 1.0208611488342285,grad_norm: 0.9999993680023669, iteration: 131559
loss: 1.0002021789550781,grad_norm: 0.9999991394209266, iteration: 131560
loss: 1.0185309648513794,grad_norm: 0.9001485282621906, iteration: 131561
loss: 0.9876235127449036,grad_norm: 0.999999016326147, iteration: 131562
loss: 1.0259768962860107,grad_norm: 0.970542224616401, iteration: 131563
loss: 0.9616648554801941,grad_norm: 0.9999992449987051, iteration: 131564
loss: 0.9528510570526123,grad_norm: 0.8519652941570647, iteration: 131565
loss: 0.9877194762229919,grad_norm: 0.9999991428630682, iteration: 131566
loss: 0.9921531081199646,grad_norm: 0.9060580761987596, iteration: 131567
loss: 1.080603837966919,grad_norm: 0.9999998880282351, iteration: 131568
loss: 0.9791964888572693,grad_norm: 0.9665888484883169, iteration: 131569
loss: 1.0275925397872925,grad_norm: 0.9478648302590831, iteration: 131570
loss: 0.984430730342865,grad_norm: 0.9741232890299579, iteration: 131571
loss: 0.9971968531608582,grad_norm: 0.999999034381503, iteration: 131572
loss: 1.060610294342041,grad_norm: 0.9999993658962272, iteration: 131573
loss: 0.9857538342475891,grad_norm: 0.9886046007611216, iteration: 131574
loss: 0.9903516173362732,grad_norm: 0.9136327017587149, iteration: 131575
loss: 1.0214204788208008,grad_norm: 0.999999473447022, iteration: 131576
loss: 1.034387230873108,grad_norm: 0.9999991124652611, iteration: 131577
loss: 1.0237387418746948,grad_norm: 0.9999989220125912, iteration: 131578
loss: 0.991523802280426,grad_norm: 0.9999992993090046, iteration: 131579
loss: 0.9701165556907654,grad_norm: 0.9999990833514093, iteration: 131580
loss: 1.0416538715362549,grad_norm: 0.9999991284924966, iteration: 131581
loss: 1.0794425010681152,grad_norm: 0.9999996837435887, iteration: 131582
loss: 0.9944342374801636,grad_norm: 0.9694441731285427, iteration: 131583
loss: 0.9709379076957703,grad_norm: 0.9438101242627438, iteration: 131584
loss: 0.999315619468689,grad_norm: 0.9999990308953652, iteration: 131585
loss: 0.9857600331306458,grad_norm: 0.9301251119746686, iteration: 131586
loss: 1.0277036428451538,grad_norm: 0.9863133038953555, iteration: 131587
loss: 1.0099323987960815,grad_norm: 0.9999990779587037, iteration: 131588
loss: 0.9748116135597229,grad_norm: 0.8752553237139256, iteration: 131589
loss: 1.0352729558944702,grad_norm: 0.9406191908802597, iteration: 131590
loss: 1.0158382654190063,grad_norm: 0.9958743232658663, iteration: 131591
loss: 1.0142159461975098,grad_norm: 0.9999993042725825, iteration: 131592
loss: 1.000852108001709,grad_norm: 0.9999991961667016, iteration: 131593
loss: 1.0288498401641846,grad_norm: 0.9999990965192829, iteration: 131594
loss: 0.9713403582572937,grad_norm: 0.9999991638800109, iteration: 131595
loss: 1.0145211219787598,grad_norm: 0.8292854793213738, iteration: 131596
loss: 1.0510222911834717,grad_norm: 0.9999999297335501, iteration: 131597
loss: 0.9732375741004944,grad_norm: 0.9999992380157554, iteration: 131598
loss: 0.9983169436454773,grad_norm: 0.9809241955962609, iteration: 131599
loss: 0.9937595129013062,grad_norm: 0.9999992912296812, iteration: 131600
loss: 1.00741708278656,grad_norm: 0.9999993164387643, iteration: 131601
loss: 1.0141881704330444,grad_norm: 0.9999993611454391, iteration: 131602
loss: 0.9429215788841248,grad_norm: 0.9999989417581804, iteration: 131603
loss: 0.9917711615562439,grad_norm: 0.9999990375493413, iteration: 131604
loss: 0.9988166093826294,grad_norm: 0.999999171894782, iteration: 131605
loss: 1.0070890188217163,grad_norm: 0.999999225002373, iteration: 131606
loss: 1.0372200012207031,grad_norm: 0.9999989288253057, iteration: 131607
loss: 0.9931232929229736,grad_norm: 0.9999991201957203, iteration: 131608
loss: 0.9953830242156982,grad_norm: 0.9999991567681276, iteration: 131609
loss: 0.9832592606544495,grad_norm: 0.9243270256896654, iteration: 131610
loss: 1.001839518547058,grad_norm: 0.9999991304569474, iteration: 131611
loss: 1.008652687072754,grad_norm: 0.9671549506858561, iteration: 131612
loss: 1.010500192642212,grad_norm: 0.9981227696048144, iteration: 131613
loss: 0.9679762721061707,grad_norm: 0.9640800803525326, iteration: 131614
loss: 0.9965625405311584,grad_norm: 0.9455735204068978, iteration: 131615
loss: 0.9898261427879333,grad_norm: 0.9999991090907602, iteration: 131616
loss: 1.0478427410125732,grad_norm: 0.9999997668291843, iteration: 131617
loss: 1.0093655586242676,grad_norm: 0.9999991414759012, iteration: 131618
loss: 0.9359749555587769,grad_norm: 0.9153323249596496, iteration: 131619
loss: 0.9708420038223267,grad_norm: 0.9999990972609151, iteration: 131620
loss: 0.9510465264320374,grad_norm: 0.9312154693178829, iteration: 131621
loss: 1.0342553853988647,grad_norm: 0.999998996672641, iteration: 131622
loss: 0.99066561460495,grad_norm: 0.999999204498947, iteration: 131623
loss: 0.9818951487541199,grad_norm: 0.9520989672259812, iteration: 131624
loss: 0.9987319111824036,grad_norm: 0.9198185538757063, iteration: 131625
loss: 0.9681597352027893,grad_norm: 0.9999991691569743, iteration: 131626
loss: 1.0020177364349365,grad_norm: 0.8893815552599058, iteration: 131627
loss: 0.9963890314102173,grad_norm: 0.9999991729377121, iteration: 131628
loss: 0.9863483309745789,grad_norm: 0.999999052161905, iteration: 131629
loss: 1.0041706562042236,grad_norm: 0.9999991477027631, iteration: 131630
loss: 0.9979647397994995,grad_norm: 0.9340484573250646, iteration: 131631
loss: 0.9830275774002075,grad_norm: 0.9999992386137779, iteration: 131632
loss: 1.009901762008667,grad_norm: 0.8961463749233495, iteration: 131633
loss: 1.015014410018921,grad_norm: 0.8612272817521497, iteration: 131634
loss: 0.995796799659729,grad_norm: 0.9999991922845223, iteration: 131635
loss: 1.0114266872406006,grad_norm: 0.9001838704388161, iteration: 131636
loss: 1.0015132427215576,grad_norm: 0.9999990463056097, iteration: 131637
loss: 0.9920809864997864,grad_norm: 0.9999990514893291, iteration: 131638
loss: 1.0056989192962646,grad_norm: 0.9999991592935263, iteration: 131639
loss: 0.9967496991157532,grad_norm: 0.9999991855840591, iteration: 131640
loss: 0.9813307523727417,grad_norm: 0.9856810571732461, iteration: 131641
loss: 1.0010204315185547,grad_norm: 0.9212520531675633, iteration: 131642
loss: 0.9637762308120728,grad_norm: 0.9999994006413171, iteration: 131643
loss: 0.9854008555412292,grad_norm: 0.999998950450501, iteration: 131644
loss: 1.022261142730713,grad_norm: 0.8130355436500228, iteration: 131645
loss: 1.0108648538589478,grad_norm: 0.9999991915689058, iteration: 131646
loss: 0.9806331992149353,grad_norm: 0.9999990516387426, iteration: 131647
loss: 0.993065595626831,grad_norm: 0.9999995973056887, iteration: 131648
loss: 0.9687251448631287,grad_norm: 0.968337549850927, iteration: 131649
loss: 0.9433901906013489,grad_norm: 0.9999992526503825, iteration: 131650
loss: 0.9999508857727051,grad_norm: 0.9613800728022224, iteration: 131651
loss: 1.0035332441329956,grad_norm: 0.9999991692767407, iteration: 131652
loss: 0.9614347815513611,grad_norm: 0.9999990272223318, iteration: 131653
loss: 1.020565390586853,grad_norm: 0.9999991879321134, iteration: 131654
loss: 0.9786338210105896,grad_norm: 0.9999991361929826, iteration: 131655
loss: 0.9714612364768982,grad_norm: 0.9914585942113815, iteration: 131656
loss: 0.9921296238899231,grad_norm: 0.9692171978867954, iteration: 131657
loss: 1.0164084434509277,grad_norm: 0.9900158027857794, iteration: 131658
loss: 0.9721182584762573,grad_norm: 0.959511506488988, iteration: 131659
loss: 1.0251578092575073,grad_norm: 0.999999111056878, iteration: 131660
loss: 0.9406915307044983,grad_norm: 0.9809666891523979, iteration: 131661
loss: 0.9887098073959351,grad_norm: 0.8970654062598674, iteration: 131662
loss: 0.9926006197929382,grad_norm: 0.9999991020369463, iteration: 131663
loss: 1.0049134492874146,grad_norm: 0.9999991204283609, iteration: 131664
loss: 1.0651707649230957,grad_norm: 0.9999998406055733, iteration: 131665
loss: 0.9912965297698975,grad_norm: 0.9999989925320372, iteration: 131666
loss: 1.013316035270691,grad_norm: 0.9999990638827048, iteration: 131667
loss: 1.002250075340271,grad_norm: 0.9999989964094181, iteration: 131668
loss: 1.016067624092102,grad_norm: 0.999999127527467, iteration: 131669
loss: 1.010986328125,grad_norm: 0.9999992261626214, iteration: 131670
loss: 1.0005865097045898,grad_norm: 0.9999992291817447, iteration: 131671
loss: 1.021423578262329,grad_norm: 0.9999992804862127, iteration: 131672
loss: 0.9970764517784119,grad_norm: 0.9279930703971146, iteration: 131673
loss: 1.0574606657028198,grad_norm: 0.9999990073203744, iteration: 131674
loss: 1.0193904638290405,grad_norm: 0.9999991187934173, iteration: 131675
loss: 1.030707597732544,grad_norm: 0.9999999786350141, iteration: 131676
loss: 0.9783990383148193,grad_norm: 0.9999990840941068, iteration: 131677
loss: 1.022801399230957,grad_norm: 0.9999993797561721, iteration: 131678
loss: 1.009088158607483,grad_norm: 0.9999990886890538, iteration: 131679
loss: 1.0026490688323975,grad_norm: 0.9999990596915966, iteration: 131680
loss: 0.9646794199943542,grad_norm: 0.9999989734196059, iteration: 131681
loss: 0.9923004508018494,grad_norm: 0.9999990118613087, iteration: 131682
loss: 1.0165680646896362,grad_norm: 0.9999991133080844, iteration: 131683
loss: 0.9890850186347961,grad_norm: 0.9999989642247569, iteration: 131684
loss: 1.013927698135376,grad_norm: 0.9999990936572407, iteration: 131685
loss: 0.9768543839454651,grad_norm: 0.9999990447240591, iteration: 131686
loss: 0.9814156889915466,grad_norm: 0.9399844189644664, iteration: 131687
loss: 1.023329257965088,grad_norm: 0.9283702075509538, iteration: 131688
loss: 1.0231817960739136,grad_norm: 0.942298580644959, iteration: 131689
loss: 0.9572278261184692,grad_norm: 0.9999990434355169, iteration: 131690
loss: 0.9867686033248901,grad_norm: 0.9999991166554218, iteration: 131691
loss: 1.0477672815322876,grad_norm: 0.9999993563160184, iteration: 131692
loss: 0.9805921316146851,grad_norm: 0.8891808012959602, iteration: 131693
loss: 0.9812782406806946,grad_norm: 0.8976351031036093, iteration: 131694
loss: 0.9964478015899658,grad_norm: 0.9827231735098315, iteration: 131695
loss: 0.9935598373413086,grad_norm: 0.9999991936026676, iteration: 131696
loss: 1.0080745220184326,grad_norm: 0.9635464997737688, iteration: 131697
loss: 0.9629243016242981,grad_norm: 0.9460328217586045, iteration: 131698
loss: 0.9545289874076843,grad_norm: 0.959760038747216, iteration: 131699
loss: 1.0058488845825195,grad_norm: 0.9999990239941645, iteration: 131700
loss: 1.0282009840011597,grad_norm: 0.9999992212093605, iteration: 131701
loss: 0.983708918094635,grad_norm: 0.9999992017175151, iteration: 131702
loss: 1.0078368186950684,grad_norm: 0.9471449016232776, iteration: 131703
loss: 1.010411262512207,grad_norm: 0.9384034457712278, iteration: 131704
loss: 1.0024347305297852,grad_norm: 0.9999991291595959, iteration: 131705
loss: 1.050041675567627,grad_norm: 0.9601779197138189, iteration: 131706
loss: 1.0146909952163696,grad_norm: 0.9999991853598745, iteration: 131707
loss: 1.0168483257293701,grad_norm: 0.8644629589007501, iteration: 131708
loss: 1.2269669771194458,grad_norm: 0.9999997528689295, iteration: 131709
loss: 1.007441759109497,grad_norm: 0.9999989800540791, iteration: 131710
loss: 1.003450870513916,grad_norm: 0.999999278691195, iteration: 131711
loss: 1.000895380973816,grad_norm: 0.9999990839801113, iteration: 131712
loss: 0.9570329785346985,grad_norm: 0.9999990704541357, iteration: 131713
loss: 1.0054956674575806,grad_norm: 0.990586520884213, iteration: 131714
loss: 1.0469573736190796,grad_norm: 0.999999167226414, iteration: 131715
loss: 1.0045547485351562,grad_norm: 0.8824041854348119, iteration: 131716
loss: 1.0190798044204712,grad_norm: 0.896336772935101, iteration: 131717
loss: 1.0178978443145752,grad_norm: 0.982042808497123, iteration: 131718
loss: 0.9851089715957642,grad_norm: 0.999999012669155, iteration: 131719
loss: 1.0082310438156128,grad_norm: 0.9999990424835175, iteration: 131720
loss: 1.0066572427749634,grad_norm: 0.9618122531773212, iteration: 131721
loss: 0.9961647987365723,grad_norm: 0.9999991666852281, iteration: 131722
loss: 1.0333799123764038,grad_norm: 0.9222343021332068, iteration: 131723
loss: 1.0103565454483032,grad_norm: 0.9999992711714103, iteration: 131724
loss: 0.9700389504432678,grad_norm: 0.9999992806282231, iteration: 131725
loss: 0.9875450730323792,grad_norm: 0.7944373032415191, iteration: 131726
loss: 0.992890477180481,grad_norm: 0.9302766168103545, iteration: 131727
loss: 0.9863333106040955,grad_norm: 0.9999992201302188, iteration: 131728
loss: 0.9588273167610168,grad_norm: 0.9932290034034725, iteration: 131729
loss: 0.9754139184951782,grad_norm: 0.8506939573522908, iteration: 131730
loss: 1.0241845846176147,grad_norm: 0.8789156050521443, iteration: 131731
loss: 0.9810947179794312,grad_norm: 0.9341545775034835, iteration: 131732
loss: 1.009150743484497,grad_norm: 0.9999996027910076, iteration: 131733
loss: 0.9893767833709717,grad_norm: 0.9999991983888727, iteration: 131734
loss: 0.9948803782463074,grad_norm: 0.9999990900645553, iteration: 131735
loss: 1.0162715911865234,grad_norm: 0.9967379071671182, iteration: 131736
loss: 0.9832781553268433,grad_norm: 0.9943073984949929, iteration: 131737
loss: 0.9862843751907349,grad_norm: 0.9999991187752693, iteration: 131738
loss: 1.0379478931427002,grad_norm: 0.999998921408101, iteration: 131739
loss: 0.9898676872253418,grad_norm: 0.9830121989848142, iteration: 131740
loss: 1.001587986946106,grad_norm: 0.9567250037505024, iteration: 131741
loss: 1.0255182981491089,grad_norm: 0.999999202586709, iteration: 131742
loss: 1.0392509698867798,grad_norm: 0.9999991779382003, iteration: 131743
loss: 0.9815714955329895,grad_norm: 0.8698170450369225, iteration: 131744
loss: 0.971889317035675,grad_norm: 0.9999990587916958, iteration: 131745
loss: 1.0165789127349854,grad_norm: 0.9999991069991516, iteration: 131746
loss: 1.002970814704895,grad_norm: 0.9999992727477236, iteration: 131747
loss: 1.0210713148117065,grad_norm: 0.9999990149500508, iteration: 131748
loss: 0.9892878532409668,grad_norm: 0.8886041075408782, iteration: 131749
loss: 0.9930126667022705,grad_norm: 0.9999991624140887, iteration: 131750
loss: 1.027280330657959,grad_norm: 0.8770455995414396, iteration: 131751
loss: 1.0005195140838623,grad_norm: 0.9999989922030024, iteration: 131752
loss: 1.0250022411346436,grad_norm: 0.999999110434011, iteration: 131753
loss: 0.9939700961112976,grad_norm: 0.9480390885081649, iteration: 131754
loss: 0.9740723371505737,grad_norm: 0.9999991959301361, iteration: 131755
loss: 1.0203003883361816,grad_norm: 0.9818022594016493, iteration: 131756
loss: 1.0135655403137207,grad_norm: 0.999999164513665, iteration: 131757
loss: 1.0037872791290283,grad_norm: 0.9999990556910106, iteration: 131758
loss: 0.9684827923774719,grad_norm: 0.9551507602965235, iteration: 131759
loss: 0.981855034828186,grad_norm: 0.9999991274741236, iteration: 131760
loss: 1.019547700881958,grad_norm: 0.9999992670861853, iteration: 131761
loss: 1.0209710597991943,grad_norm: 0.9999992508312001, iteration: 131762
loss: 0.9373501539230347,grad_norm: 0.9999991332918065, iteration: 131763
loss: 1.0125759840011597,grad_norm: 0.9695208309064812, iteration: 131764
loss: 1.0218746662139893,grad_norm: 0.8611102921166437, iteration: 131765
loss: 0.9806181192398071,grad_norm: 0.9999990643457185, iteration: 131766
loss: 1.0057793855667114,grad_norm: 0.9176535765471809, iteration: 131767
loss: 1.0241233110427856,grad_norm: 0.9999990082506227, iteration: 131768
loss: 0.9867417216300964,grad_norm: 0.9753975454907027, iteration: 131769
loss: 1.0833804607391357,grad_norm: 0.9999991052992548, iteration: 131770
loss: 0.9883054494857788,grad_norm: 0.9774436262906083, iteration: 131771
loss: 1.037192463874817,grad_norm: 0.9999990452310007, iteration: 131772
loss: 0.9926717877388,grad_norm: 0.9270894410757891, iteration: 131773
loss: 1.0080710649490356,grad_norm: 0.9999990636520716, iteration: 131774
loss: 0.9916754961013794,grad_norm: 0.93273167178341, iteration: 131775
loss: 1.0245691537857056,grad_norm: 0.8920259963552908, iteration: 131776
loss: 1.0412098169326782,grad_norm: 0.9999992101278831, iteration: 131777
loss: 1.0161885023117065,grad_norm: 0.9734305567147709, iteration: 131778
loss: 0.9957761764526367,grad_norm: 0.9803169620444112, iteration: 131779
loss: 0.9818015694618225,grad_norm: 0.9999992061358953, iteration: 131780
loss: 1.010001540184021,grad_norm: 0.9999991053186186, iteration: 131781
loss: 1.0284277200698853,grad_norm: 0.9767685925831944, iteration: 131782
loss: 1.0039652585983276,grad_norm: 0.9999999168393658, iteration: 131783
loss: 1.0363657474517822,grad_norm: 0.9999991610670528, iteration: 131784
loss: 0.9836704730987549,grad_norm: 0.9999989673236845, iteration: 131785
loss: 0.977672278881073,grad_norm: 0.9999992153665671, iteration: 131786
loss: 1.0493437051773071,grad_norm: 0.9999993795257804, iteration: 131787
loss: 0.9738976359367371,grad_norm: 0.9999992453436332, iteration: 131788
loss: 0.9415377974510193,grad_norm: 0.9999990646858281, iteration: 131789
loss: 1.0359183549880981,grad_norm: 0.9999992147690234, iteration: 131790
loss: 0.9864460825920105,grad_norm: 0.8696322132897494, iteration: 131791
loss: 1.0189954042434692,grad_norm: 0.9999991530663661, iteration: 131792
loss: 1.011515498161316,grad_norm: 0.9999991793516969, iteration: 131793
loss: 0.982221245765686,grad_norm: 0.8749762802392292, iteration: 131794
loss: 1.0118566751480103,grad_norm: 0.9999991143409172, iteration: 131795
loss: 0.9650676846504211,grad_norm: 0.9999990674571282, iteration: 131796
loss: 0.9877481460571289,grad_norm: 0.9946157891997884, iteration: 131797
loss: 1.0044748783111572,grad_norm: 0.9999992369073658, iteration: 131798
loss: 1.0222233533859253,grad_norm: 0.9999991311440724, iteration: 131799
loss: 1.0267231464385986,grad_norm: 0.9999992128684433, iteration: 131800
loss: 1.0454542636871338,grad_norm: 0.9999991298489194, iteration: 131801
loss: 0.9823614954948425,grad_norm: 0.9392274400662324, iteration: 131802
loss: 1.0120928287506104,grad_norm: 0.9999991453116428, iteration: 131803
loss: 0.9771793484687805,grad_norm: 0.9999991892867686, iteration: 131804
loss: 0.9846059679985046,grad_norm: 0.9999993312212401, iteration: 131805
loss: 1.0067754983901978,grad_norm: 0.9999990790846985, iteration: 131806
loss: 1.0294469594955444,grad_norm: 0.9999996883030025, iteration: 131807
loss: 1.0226755142211914,grad_norm: 0.868909741197021, iteration: 131808
loss: 1.00318443775177,grad_norm: 0.9839203109107951, iteration: 131809
loss: 1.0073597431182861,grad_norm: 0.9999990847165816, iteration: 131810
loss: 1.0224370956420898,grad_norm: 0.9069915842663571, iteration: 131811
loss: 1.0336437225341797,grad_norm: 0.9999991796156895, iteration: 131812
loss: 1.0068222284317017,grad_norm: 0.9761684180550687, iteration: 131813
loss: 0.9773810505867004,grad_norm: 0.9999991239409736, iteration: 131814
loss: 0.9974730014801025,grad_norm: 0.998688798340577, iteration: 131815
loss: 0.9614319801330566,grad_norm: 0.9999991557287888, iteration: 131816
loss: 0.998925507068634,grad_norm: 0.9999991318917006, iteration: 131817
loss: 0.9766097664833069,grad_norm: 0.9999992370179507, iteration: 131818
loss: 0.9830204248428345,grad_norm: 0.9999989694180561, iteration: 131819
loss: 0.9843969345092773,grad_norm: 0.9999991531018518, iteration: 131820
loss: 1.0207295417785645,grad_norm: 0.9999992254463491, iteration: 131821
loss: 0.983288049697876,grad_norm: 0.999999830406485, iteration: 131822
loss: 0.996951162815094,grad_norm: 0.999999208758185, iteration: 131823
loss: 1.0059434175491333,grad_norm: 0.8996727660106105, iteration: 131824
loss: 1.0509058237075806,grad_norm: 0.9999998974938181, iteration: 131825
loss: 1.0252642631530762,grad_norm: 0.9999990972688624, iteration: 131826
loss: 1.0045245885849,grad_norm: 1.0000000629090455, iteration: 131827
loss: 0.9822568893432617,grad_norm: 0.9999988858452451, iteration: 131828
loss: 1.0933301448822021,grad_norm: 0.9999997089634965, iteration: 131829
loss: 1.0059367418289185,grad_norm: 0.9999990511231547, iteration: 131830
loss: 0.9969978928565979,grad_norm: 0.999999027837072, iteration: 131831
loss: 1.0167447328567505,grad_norm: 0.9363710883467603, iteration: 131832
loss: 1.019709825515747,grad_norm: 0.999999172733348, iteration: 131833
loss: 0.9925934076309204,grad_norm: 0.9426830824693078, iteration: 131834
loss: 0.9952008128166199,grad_norm: 0.9999991972528103, iteration: 131835
loss: 1.0113548040390015,grad_norm: 0.9999991765503278, iteration: 131836
loss: 0.9900498986244202,grad_norm: 0.985531252074199, iteration: 131837
loss: 0.9542472958564758,grad_norm: 0.9999990724450804, iteration: 131838
loss: 0.9930199980735779,grad_norm: 0.9999994706734305, iteration: 131839
loss: 1.0342910289764404,grad_norm: 0.9999989470185325, iteration: 131840
loss: 1.0070831775665283,grad_norm: 0.999999067838634, iteration: 131841
loss: 0.9801658987998962,grad_norm: 0.9999991142332644, iteration: 131842
loss: 1.0055360794067383,grad_norm: 0.999999151750927, iteration: 131843
loss: 1.0106106996536255,grad_norm: 0.9999997902250246, iteration: 131844
loss: 1.0040638446807861,grad_norm: 0.9054899336548183, iteration: 131845
loss: 1.0164952278137207,grad_norm: 0.9999991039467045, iteration: 131846
loss: 0.9696276783943176,grad_norm: 0.999999077537429, iteration: 131847
loss: 1.091646671295166,grad_norm: 0.9999992134119782, iteration: 131848
loss: 0.9964807033538818,grad_norm: 0.999999285086679, iteration: 131849
loss: 0.9931091070175171,grad_norm: 0.999999111067298, iteration: 131850
loss: 1.0185246467590332,grad_norm: 0.9999991378417916, iteration: 131851
loss: 1.0517339706420898,grad_norm: 0.99999988547007, iteration: 131852
loss: 0.9812230467796326,grad_norm: 0.9999991090465414, iteration: 131853
loss: 1.0019699335098267,grad_norm: 0.8682629667153443, iteration: 131854
loss: 1.006721019744873,grad_norm: 0.9999991384405016, iteration: 131855
loss: 1.0720165967941284,grad_norm: 0.8526788753837309, iteration: 131856
loss: 1.0163429975509644,grad_norm: 0.9177883644843596, iteration: 131857
loss: 1.010796070098877,grad_norm: 0.9999991671172951, iteration: 131858
loss: 1.0166685581207275,grad_norm: 0.9436635730166814, iteration: 131859
loss: 1.0322582721710205,grad_norm: 0.9999991200125997, iteration: 131860
loss: 1.059249997138977,grad_norm: 0.9999989658869911, iteration: 131861
loss: 0.9744129776954651,grad_norm: 0.9999990699092133, iteration: 131862
loss: 0.99656742811203,grad_norm: 0.9999994131837031, iteration: 131863
loss: 1.015570044517517,grad_norm: 0.8514808738028147, iteration: 131864
loss: 1.0174646377563477,grad_norm: 0.9999990986816472, iteration: 131865
loss: 0.9644027948379517,grad_norm: 0.9999989617590481, iteration: 131866
loss: 0.9897913336753845,grad_norm: 0.9999991061098491, iteration: 131867
loss: 0.9944915771484375,grad_norm: 0.9161735403381284, iteration: 131868
loss: 0.9943138360977173,grad_norm: 0.9999990780689811, iteration: 131869
loss: 0.9880346059799194,grad_norm: 0.878681938437999, iteration: 131870
loss: 0.9772236943244934,grad_norm: 0.8956735388228716, iteration: 131871
loss: 0.9851294755935669,grad_norm: 0.999998947574345, iteration: 131872
loss: 0.990501880645752,grad_norm: 0.9999992262128453, iteration: 131873
loss: 1.0158791542053223,grad_norm: 0.9999992839926881, iteration: 131874
loss: 1.0053021907806396,grad_norm: 0.999999156023717, iteration: 131875
loss: 0.9995976090431213,grad_norm: 0.9848400907094341, iteration: 131876
loss: 1.022964596748352,grad_norm: 0.9999993479967719, iteration: 131877
loss: 1.0314573049545288,grad_norm: 0.9999989984929953, iteration: 131878
loss: 0.9932718873023987,grad_norm: 0.9927944745536584, iteration: 131879
loss: 0.989009439945221,grad_norm: 0.89140711944117, iteration: 131880
loss: 1.0092161893844604,grad_norm: 0.9999993154709408, iteration: 131881
loss: 1.0015536546707153,grad_norm: 0.8701203546146141, iteration: 131882
loss: 1.0162204504013062,grad_norm: 0.9999990965609704, iteration: 131883
loss: 0.9718141555786133,grad_norm: 0.9999997326687933, iteration: 131884
loss: 0.970483124256134,grad_norm: 0.9614066839958377, iteration: 131885
loss: 0.9690959453582764,grad_norm: 0.8737984778379662, iteration: 131886
loss: 1.0193758010864258,grad_norm: 0.9140621577181168, iteration: 131887
loss: 1.0108323097229004,grad_norm: 0.9999994583707642, iteration: 131888
loss: 1.003077745437622,grad_norm: 0.9999991514539278, iteration: 131889
loss: 1.0124081373214722,grad_norm: 0.9999990949645862, iteration: 131890
loss: 0.9750518798828125,grad_norm: 0.9470276034311699, iteration: 131891
loss: 1.026187539100647,grad_norm: 0.9999991909744557, iteration: 131892
loss: 1.0181328058242798,grad_norm: 0.9999995174933344, iteration: 131893
loss: 1.0351758003234863,grad_norm: 0.9073260616141937, iteration: 131894
loss: 1.0032989978790283,grad_norm: 0.9999992504159402, iteration: 131895
loss: 1.014125943183899,grad_norm: 0.9999991587560869, iteration: 131896
loss: 0.9896788597106934,grad_norm: 0.9829210323442713, iteration: 131897
loss: 0.9977514743804932,grad_norm: 0.9999997563375578, iteration: 131898
loss: 0.9982050657272339,grad_norm: 0.9999997218514044, iteration: 131899
loss: 1.007779836654663,grad_norm: 0.8854280874320558, iteration: 131900
loss: 1.0050965547561646,grad_norm: 0.9999989044344056, iteration: 131901
loss: 0.9611696600914001,grad_norm: 0.9999992038061515, iteration: 131902
loss: 0.9885964393615723,grad_norm: 0.9999991335386468, iteration: 131903
loss: 1.0259411334991455,grad_norm: 0.9675864335915314, iteration: 131904
loss: 0.9921284914016724,grad_norm: 0.9999991574330831, iteration: 131905
loss: 1.0286239385604858,grad_norm: 0.925006668855656, iteration: 131906
loss: 0.9870105981826782,grad_norm: 0.999999291040246, iteration: 131907
loss: 0.9775503277778625,grad_norm: 0.9059569394414109, iteration: 131908
loss: 1.044588565826416,grad_norm: 0.9999992117700411, iteration: 131909
loss: 0.9897729754447937,grad_norm: 0.9999994176386744, iteration: 131910
loss: 1.0072060823440552,grad_norm: 0.9874731974691228, iteration: 131911
loss: 1.0071120262145996,grad_norm: 0.9999990084109839, iteration: 131912
loss: 1.0418548583984375,grad_norm: 0.9999991008946407, iteration: 131913
loss: 0.9544094800949097,grad_norm: 0.9363325750993432, iteration: 131914
loss: 1.0150470733642578,grad_norm: 0.9077726355458525, iteration: 131915
loss: 1.0311744213104248,grad_norm: 0.9999991210760124, iteration: 131916
loss: 0.9349358677864075,grad_norm: 0.9999991126801202, iteration: 131917
loss: 0.9951950311660767,grad_norm: 0.9668072070988663, iteration: 131918
loss: 1.001434326171875,grad_norm: 0.9999990976916159, iteration: 131919
loss: 0.9689878225326538,grad_norm: 0.9999992513003998, iteration: 131920
loss: 1.007238745689392,grad_norm: 0.9999991807079032, iteration: 131921
loss: 0.996612548828125,grad_norm: 0.9572306481623353, iteration: 131922
loss: 1.0176830291748047,grad_norm: 0.9999991714457578, iteration: 131923
loss: 0.9851750731468201,grad_norm: 0.999999010405838, iteration: 131924
loss: 0.9783031940460205,grad_norm: 0.9999991825042144, iteration: 131925
loss: 0.9725300669670105,grad_norm: 0.9999992788222347, iteration: 131926
loss: 0.9776797294616699,grad_norm: 0.9999992554863228, iteration: 131927
loss: 1.0216288566589355,grad_norm: 0.9557644951031269, iteration: 131928
loss: 0.9607877135276794,grad_norm: 0.9999992947001735, iteration: 131929
loss: 0.998313307762146,grad_norm: 0.9999991600885482, iteration: 131930
loss: 1.0012223720550537,grad_norm: 0.9999992265695344, iteration: 131931
loss: 0.9738466143608093,grad_norm: 0.9999991689643903, iteration: 131932
loss: 0.9771162271499634,grad_norm: 0.9999991019678672, iteration: 131933
loss: 0.9943426847457886,grad_norm: 0.9999990037609456, iteration: 131934
loss: 1.0157194137573242,grad_norm: 0.9999991066648342, iteration: 131935
loss: 0.9872094392776489,grad_norm: 0.9184851050000181, iteration: 131936
loss: 1.0047683715820312,grad_norm: 0.8061214959963648, iteration: 131937
loss: 0.9962390065193176,grad_norm: 0.9072373503140048, iteration: 131938
loss: 0.98778235912323,grad_norm: 0.9826399632927854, iteration: 131939
loss: 1.0109655857086182,grad_norm: 0.9999992010683086, iteration: 131940
loss: 1.012598991394043,grad_norm: 0.8149305851576274, iteration: 131941
loss: 1.0005254745483398,grad_norm: 0.9999990861876884, iteration: 131942
loss: 0.952601432800293,grad_norm: 0.9302898874566433, iteration: 131943
loss: 1.0335546731948853,grad_norm: 0.9608324548227257, iteration: 131944
loss: 0.9992629885673523,grad_norm: 0.9999990402067593, iteration: 131945
loss: 1.0018612146377563,grad_norm: 0.9999991597300272, iteration: 131946
loss: 0.9867409467697144,grad_norm: 0.8889088279720828, iteration: 131947
loss: 1.010083794593811,grad_norm: 0.9431615965748592, iteration: 131948
loss: 1.0061984062194824,grad_norm: 0.9196238335042342, iteration: 131949
loss: 1.0031832456588745,grad_norm: 0.9999990142195109, iteration: 131950
loss: 1.0337879657745361,grad_norm: 0.9171273176065946, iteration: 131951
loss: 0.9798352718353271,grad_norm: 0.9999990926250951, iteration: 131952
loss: 1.0063509941101074,grad_norm: 0.8870735263300719, iteration: 131953
loss: 1.039866328239441,grad_norm: 0.9999991792362867, iteration: 131954
loss: 1.0342837572097778,grad_norm: 0.9999992492118563, iteration: 131955
loss: 0.9896771311759949,grad_norm: 0.9999993594699947, iteration: 131956
loss: 0.9593889117240906,grad_norm: 0.9999991895376242, iteration: 131957
loss: 1.000139832496643,grad_norm: 0.9999991846736601, iteration: 131958
loss: 1.0422825813293457,grad_norm: 0.9999991876867346, iteration: 131959
loss: 0.9906444549560547,grad_norm: 0.9999990345965247, iteration: 131960
loss: 0.9991613030433655,grad_norm: 0.9999990250536893, iteration: 131961
loss: 0.9997332096099854,grad_norm: 0.9999990678888547, iteration: 131962
loss: 1.0196728706359863,grad_norm: 0.9675695268650485, iteration: 131963
loss: 1.0143940448760986,grad_norm: 0.9999990978340665, iteration: 131964
loss: 0.9964942336082458,grad_norm: 0.963042663182824, iteration: 131965
loss: 0.9547882080078125,grad_norm: 0.9999992041818719, iteration: 131966
loss: 0.9462307095527649,grad_norm: 0.9812097297059632, iteration: 131967
loss: 0.9967309832572937,grad_norm: 0.9621234161852081, iteration: 131968
loss: 0.9916632175445557,grad_norm: 0.9433592012964274, iteration: 131969
loss: 1.025736927986145,grad_norm: 0.9999991872402675, iteration: 131970
loss: 0.9894164204597473,grad_norm: 0.9999990016436603, iteration: 131971
loss: 0.9716088175773621,grad_norm: 0.8890780140866752, iteration: 131972
loss: 1.001911997795105,grad_norm: 0.9881109730867488, iteration: 131973
loss: 1.0024231672286987,grad_norm: 0.9213758332130259, iteration: 131974
loss: 1.0072122812271118,grad_norm: 0.9999991679655769, iteration: 131975
loss: 1.016148328781128,grad_norm: 0.9999992437293159, iteration: 131976
loss: 1.0213340520858765,grad_norm: 0.9240922579813884, iteration: 131977
loss: 1.043768048286438,grad_norm: 0.9999991742519067, iteration: 131978
loss: 1.0042133331298828,grad_norm: 0.9999992239177242, iteration: 131979
loss: 1.0174529552459717,grad_norm: 0.9999992625076883, iteration: 131980
loss: 0.9982808828353882,grad_norm: 0.8827616146876801, iteration: 131981
loss: 1.0228362083435059,grad_norm: 0.9999997961581186, iteration: 131982
loss: 0.9977896213531494,grad_norm: 0.9999989906121768, iteration: 131983
loss: 0.9457360506057739,grad_norm: 0.9999992652549393, iteration: 131984
loss: 0.9858476519584656,grad_norm: 0.9595238132052728, iteration: 131985
loss: 1.0007566213607788,grad_norm: 0.9470703694376406, iteration: 131986
loss: 0.997511088848114,grad_norm: 0.9999995148211773, iteration: 131987
loss: 0.9895180463790894,grad_norm: 0.9999991368075827, iteration: 131988
loss: 1.0058780908584595,grad_norm: 0.9999991399850044, iteration: 131989
loss: 1.0318800210952759,grad_norm: 0.9999991159854149, iteration: 131990
loss: 1.002670407295227,grad_norm: 0.999999297110644, iteration: 131991
loss: 1.002590537071228,grad_norm: 0.9999990858894086, iteration: 131992
loss: 0.9703040719032288,grad_norm: 0.9999990293410123, iteration: 131993
loss: 1.0084573030471802,grad_norm: 0.8885349811666754, iteration: 131994
loss: 1.0253286361694336,grad_norm: 0.9999990925861193, iteration: 131995
loss: 0.9840877056121826,grad_norm: 0.9999991883425731, iteration: 131996
loss: 0.9498114585876465,grad_norm: 0.8621226073746809, iteration: 131997
loss: 1.0058742761611938,grad_norm: 0.8586006929631083, iteration: 131998
loss: 0.9838302731513977,grad_norm: 0.9897400218855832, iteration: 131999
loss: 1.0020973682403564,grad_norm: 0.9999990638740757, iteration: 132000
loss: 0.9913614392280579,grad_norm: 0.8565656365634251, iteration: 132001
loss: 1.0059224367141724,grad_norm: 0.9773210705365619, iteration: 132002
loss: 1.0299569368362427,grad_norm: 0.9352370309871837, iteration: 132003
loss: 0.9921198487281799,grad_norm: 0.9787624843477605, iteration: 132004
loss: 0.9964873194694519,grad_norm: 0.9972301607905343, iteration: 132005
loss: 1.0568907260894775,grad_norm: 0.9596787583213389, iteration: 132006
loss: 0.9732993245124817,grad_norm: 0.9999990727234033, iteration: 132007
loss: 0.9620320200920105,grad_norm: 0.8947850817360659, iteration: 132008
loss: 0.9814534187316895,grad_norm: 0.9999999173358992, iteration: 132009
loss: 1.0120761394500732,grad_norm: 0.9999993029580259, iteration: 132010
loss: 0.9922319054603577,grad_norm: 0.9999990252324863, iteration: 132011
loss: 1.0088810920715332,grad_norm: 0.9074914324857899, iteration: 132012
loss: 1.030461311340332,grad_norm: 0.9999991351068143, iteration: 132013
loss: 0.9810411334037781,grad_norm: 0.9999991692493189, iteration: 132014
loss: 1.0173968076705933,grad_norm: 0.9999992447639175, iteration: 132015
loss: 1.0139144659042358,grad_norm: 0.9832037831312314, iteration: 132016
loss: 0.9940564036369324,grad_norm: 0.8574599614297548, iteration: 132017
loss: 1.0177700519561768,grad_norm: 0.9999991986239409, iteration: 132018
loss: 1.0063220262527466,grad_norm: 0.9612211500986849, iteration: 132019
loss: 1.0034457445144653,grad_norm: 0.8685468319666073, iteration: 132020
loss: 0.9860708713531494,grad_norm: 0.9999990734486196, iteration: 132021
loss: 0.9537436962127686,grad_norm: 0.9999990946015331, iteration: 132022
loss: 0.9765877723693848,grad_norm: 0.9999991833785508, iteration: 132023
loss: 1.0037763118743896,grad_norm: 0.947895348330531, iteration: 132024
loss: 1.0119956731796265,grad_norm: 0.9999990353625703, iteration: 132025
loss: 0.994768500328064,grad_norm: 0.9442358002107558, iteration: 132026
loss: 0.9597032070159912,grad_norm: 0.8539713379168155, iteration: 132027
loss: 1.0174881219863892,grad_norm: 0.8960285007488622, iteration: 132028
loss: 1.000423789024353,grad_norm: 0.9999992051152401, iteration: 132029
loss: 0.9901670813560486,grad_norm: 0.9463718313562066, iteration: 132030
loss: 0.988932728767395,grad_norm: 0.9999991019693805, iteration: 132031
loss: 0.9877631664276123,grad_norm: 0.9589223633256003, iteration: 132032
loss: 1.0030689239501953,grad_norm: 0.8739866466629378, iteration: 132033
loss: 0.9682958722114563,grad_norm: 0.9204840940590402, iteration: 132034
loss: 0.99186110496521,grad_norm: 0.999999095378521, iteration: 132035
loss: 1.0346524715423584,grad_norm: 0.9999991157055758, iteration: 132036
loss: 1.0466703176498413,grad_norm: 0.9999991402040296, iteration: 132037
loss: 0.9869663715362549,grad_norm: 0.9999991714303851, iteration: 132038
loss: 1.0420445203781128,grad_norm: 0.9999993965917279, iteration: 132039
loss: 0.9856845736503601,grad_norm: 0.9999991240247637, iteration: 132040
loss: 0.9799244999885559,grad_norm: 0.9358076540236088, iteration: 132041
loss: 1.004947304725647,grad_norm: 0.9999992281417577, iteration: 132042
loss: 0.9751675128936768,grad_norm: 0.8881207852168217, iteration: 132043
loss: 0.95381760597229,grad_norm: 0.9999989791755525, iteration: 132044
loss: 1.010135531425476,grad_norm: 0.9999990627328582, iteration: 132045
loss: 0.9770593047142029,grad_norm: 0.9999991279688519, iteration: 132046
loss: 0.9994720816612244,grad_norm: 0.9647094031807982, iteration: 132047
loss: 1.0259687900543213,grad_norm: 0.9999990152418196, iteration: 132048
loss: 0.9947941899299622,grad_norm: 0.9999991453496103, iteration: 132049
loss: 1.0121846199035645,grad_norm: 0.9219213096067027, iteration: 132050
loss: 1.037920355796814,grad_norm: 0.9999991106423302, iteration: 132051
loss: 0.9806391596794128,grad_norm: 0.99999909686416, iteration: 132052
loss: 0.9585500359535217,grad_norm: 0.9372740822449237, iteration: 132053
loss: 0.9913889765739441,grad_norm: 0.9372227621493471, iteration: 132054
loss: 0.9899749755859375,grad_norm: 0.9856138200013318, iteration: 132055
loss: 0.9990309476852417,grad_norm: 0.999999167473697, iteration: 132056
loss: 0.986579954624176,grad_norm: 0.9051839406660052, iteration: 132057
loss: 0.9716095924377441,grad_norm: 0.9597364497552728, iteration: 132058
loss: 1.0318260192871094,grad_norm: 0.9960144233155626, iteration: 132059
loss: 0.9752107262611389,grad_norm: 0.9203997689221386, iteration: 132060
loss: 1.0028817653656006,grad_norm: 0.9999990835719187, iteration: 132061
loss: 0.9978702068328857,grad_norm: 0.9084901311041802, iteration: 132062
loss: 1.0189461708068848,grad_norm: 0.9999993242832133, iteration: 132063
loss: 1.0361697673797607,grad_norm: 0.9999992814360806, iteration: 132064
loss: 1.0247963666915894,grad_norm: 0.999999118843273, iteration: 132065
loss: 0.9777160882949829,grad_norm: 0.999999157747935, iteration: 132066
loss: 1.0513124465942383,grad_norm: 0.9999997725723755, iteration: 132067
loss: 0.9756962656974792,grad_norm: 0.9999988773316945, iteration: 132068
loss: 1.012216329574585,grad_norm: 0.999999188501991, iteration: 132069
loss: 1.0009090900421143,grad_norm: 0.9693652527285835, iteration: 132070
loss: 1.0025417804718018,grad_norm: 0.9280385493145807, iteration: 132071
loss: 1.0003440380096436,grad_norm: 0.9999990331552656, iteration: 132072
loss: 1.005265474319458,grad_norm: 0.9200967927920491, iteration: 132073
loss: 1.0182178020477295,grad_norm: 0.9912479091253185, iteration: 132074
loss: 0.995521068572998,grad_norm: 0.9999991886525821, iteration: 132075
loss: 0.9808784127235413,grad_norm: 0.9413207992185791, iteration: 132076
loss: 0.9729517698287964,grad_norm: 0.9871948402703244, iteration: 132077
loss: 0.9954304099082947,grad_norm: 0.999999044100778, iteration: 132078
loss: 1.0410106182098389,grad_norm: 0.9999994122686828, iteration: 132079
loss: 0.9783356189727783,grad_norm: 0.999999340431767, iteration: 132080
loss: 1.0011900663375854,grad_norm: 0.9904666608317145, iteration: 132081
loss: 0.9815024733543396,grad_norm: 0.9999990326023698, iteration: 132082
loss: 0.995426595211029,grad_norm: 0.9626553562981746, iteration: 132083
loss: 1.0237939357757568,grad_norm: 0.9999988957676564, iteration: 132084
loss: 1.0218952894210815,grad_norm: 0.999999124752384, iteration: 132085
loss: 0.9824994802474976,grad_norm: 0.9999993196004633, iteration: 132086
loss: 1.0304484367370605,grad_norm: 0.8893212748615065, iteration: 132087
loss: 1.0023616552352905,grad_norm: 0.9171721309747566, iteration: 132088
loss: 0.9825156927108765,grad_norm: 0.9999991804198501, iteration: 132089
loss: 0.9722687005996704,grad_norm: 0.9999992405471434, iteration: 132090
loss: 1.0100984573364258,grad_norm: 0.9626870490417279, iteration: 132091
loss: 0.9564377665519714,grad_norm: 0.974560438066571, iteration: 132092
loss: 1.0000642538070679,grad_norm: 0.9999992412493116, iteration: 132093
loss: 0.9886080622673035,grad_norm: 0.9999990722408074, iteration: 132094
loss: 1.0300158262252808,grad_norm: 0.9999998559089077, iteration: 132095
loss: 1.014033317565918,grad_norm: 0.9999990948575038, iteration: 132096
loss: 1.0038889646530151,grad_norm: 0.8789769157276401, iteration: 132097
loss: 1.0037968158721924,grad_norm: 0.9581257291033871, iteration: 132098
loss: 0.9984762668609619,grad_norm: 0.999999123607411, iteration: 132099
loss: 0.9962319135665894,grad_norm: 0.9661830039348221, iteration: 132100
loss: 1.0018993616104126,grad_norm: 0.999999052916437, iteration: 132101
loss: 0.9644593596458435,grad_norm: 0.99999900844256, iteration: 132102
loss: 0.9668323993682861,grad_norm: 0.9728619979189886, iteration: 132103
loss: 1.0089551210403442,grad_norm: 0.9874929922519845, iteration: 132104
loss: 1.0163044929504395,grad_norm: 0.9999992238113172, iteration: 132105
loss: 1.0252050161361694,grad_norm: 0.9999996474289096, iteration: 132106
loss: 1.0008126497268677,grad_norm: 0.9999990407570648, iteration: 132107
loss: 1.0200562477111816,grad_norm: 0.9999990993882869, iteration: 132108
loss: 0.98246830701828,grad_norm: 0.9999991861992478, iteration: 132109
loss: 1.0136055946350098,grad_norm: 0.9999992130371761, iteration: 132110
loss: 0.9762299656867981,grad_norm: 0.8096416408894287, iteration: 132111
loss: 0.9950079917907715,grad_norm: 0.9999989604291379, iteration: 132112
loss: 1.0372154712677002,grad_norm: 0.9513902593588675, iteration: 132113
loss: 1.0229514837265015,grad_norm: 0.9999999608637367, iteration: 132114
loss: 1.0225234031677246,grad_norm: 0.9999990327667322, iteration: 132115
loss: 0.9762711524963379,grad_norm: 0.9999990900492883, iteration: 132116
loss: 1.0099316835403442,grad_norm: 0.9999989778953178, iteration: 132117
loss: 1.0072225332260132,grad_norm: 0.8282031102278098, iteration: 132118
loss: 1.005084753036499,grad_norm: 0.9999994097645089, iteration: 132119
loss: 0.9936484694480896,grad_norm: 0.9909066137535995, iteration: 132120
loss: 0.9880186915397644,grad_norm: 0.9018105412314592, iteration: 132121
loss: 0.994143009185791,grad_norm: 0.8591519659511704, iteration: 132122
loss: 1.0325464010238647,grad_norm: 0.999999639681254, iteration: 132123
loss: 1.0086674690246582,grad_norm: 0.990235152624189, iteration: 132124
loss: 0.9693835973739624,grad_norm: 0.9999992143551066, iteration: 132125
loss: 1.0283679962158203,grad_norm: 0.9254216929435153, iteration: 132126
loss: 0.9807441830635071,grad_norm: 0.997274519006054, iteration: 132127
loss: 0.974664032459259,grad_norm: 0.827607257875124, iteration: 132128
loss: 1.002166748046875,grad_norm: 0.9404861417385741, iteration: 132129
loss: 0.9991886615753174,grad_norm: 0.9999991986569913, iteration: 132130
loss: 0.9894232749938965,grad_norm: 0.9999991255408699, iteration: 132131
loss: 0.9607056379318237,grad_norm: 0.9049539106067715, iteration: 132132
loss: 1.071481466293335,grad_norm: 1.0000000786034564, iteration: 132133
loss: 1.0176072120666504,grad_norm: 0.9999991698653926, iteration: 132134
loss: 0.9808999300003052,grad_norm: 0.9999992842477889, iteration: 132135
loss: 1.035301923751831,grad_norm: 0.9999992585181604, iteration: 132136
loss: 1.0124001502990723,grad_norm: 0.9999991551105385, iteration: 132137
loss: 0.9914485812187195,grad_norm: 0.9999992709004121, iteration: 132138
loss: 0.9926045536994934,grad_norm: 0.9715376165347673, iteration: 132139
loss: 0.9758032560348511,grad_norm: 0.9999990675955557, iteration: 132140
loss: 0.9808497428894043,grad_norm: 0.9999988656820764, iteration: 132141
loss: 0.9761952757835388,grad_norm: 0.9999990580834238, iteration: 132142
loss: 0.9745985865592957,grad_norm: 0.999999100935887, iteration: 132143
loss: 0.9986630082130432,grad_norm: 0.9885114009935027, iteration: 132144
loss: 0.9761416912078857,grad_norm: 0.9999991157964241, iteration: 132145
loss: 1.0223959684371948,grad_norm: 0.9409622613764125, iteration: 132146
loss: 0.9992839694023132,grad_norm: 0.9999997799286143, iteration: 132147
loss: 0.9937571883201599,grad_norm: 0.9999991343991174, iteration: 132148
loss: 1.02701735496521,grad_norm: 0.9999992293666315, iteration: 132149
loss: 0.968123733997345,grad_norm: 0.9999989566242773, iteration: 132150
loss: 0.9917791485786438,grad_norm: 0.9999992905250105, iteration: 132151
loss: 1.0067681074142456,grad_norm: 0.9999990199613678, iteration: 132152
loss: 1.0004292726516724,grad_norm: 0.8645292486593794, iteration: 132153
loss: 0.9727655649185181,grad_norm: 0.999999174788756, iteration: 132154
loss: 1.0002851486206055,grad_norm: 0.8203445925743861, iteration: 132155
loss: 1.023342490196228,grad_norm: 0.9999992069847433, iteration: 132156
loss: 0.9884312152862549,grad_norm: 0.996370470831972, iteration: 132157
loss: 1.0038540363311768,grad_norm: 0.9999992184125186, iteration: 132158
loss: 0.9530916213989258,grad_norm: 0.9868615899366024, iteration: 132159
loss: 1.0171895027160645,grad_norm: 0.9999991479790468, iteration: 132160
loss: 0.9718275666236877,grad_norm: 0.999999092264132, iteration: 132161
loss: 1.0079020261764526,grad_norm: 0.9999993353114544, iteration: 132162
loss: 0.9910244941711426,grad_norm: 0.9819919149816495, iteration: 132163
loss: 1.0052376985549927,grad_norm: 0.9999990704818887, iteration: 132164
loss: 1.0140714645385742,grad_norm: 0.9900381649399711, iteration: 132165
loss: 1.0323331356048584,grad_norm: 0.8450272592558373, iteration: 132166
loss: 1.0110634565353394,grad_norm: 0.9999990177344721, iteration: 132167
loss: 0.9738427996635437,grad_norm: 0.9999990430894812, iteration: 132168
loss: 0.9653462767601013,grad_norm: 0.9999992289784941, iteration: 132169
loss: 1.010715126991272,grad_norm: 0.9999991784884443, iteration: 132170
loss: 0.9877319931983948,grad_norm: 0.9999991175918462, iteration: 132171
loss: 0.9969146251678467,grad_norm: 0.9526963029635171, iteration: 132172
loss: 1.0435304641723633,grad_norm: 0.9999991306087356, iteration: 132173
loss: 0.9991110563278198,grad_norm: 0.999999228892045, iteration: 132174
loss: 0.9800868034362793,grad_norm: 0.9509853205462981, iteration: 132175
loss: 1.014655351638794,grad_norm: 0.9999991371194252, iteration: 132176
loss: 1.0514346361160278,grad_norm: 0.9999996603664014, iteration: 132177
loss: 1.037483811378479,grad_norm: 0.9999999493453983, iteration: 132178
loss: 1.0073260068893433,grad_norm: 0.9575931266819242, iteration: 132179
loss: 1.0230398178100586,grad_norm: 0.8610639435910202, iteration: 132180
loss: 0.9711849689483643,grad_norm: 0.9999989999255093, iteration: 132181
loss: 1.0359619855880737,grad_norm: 0.9260891570548093, iteration: 132182
loss: 1.0138564109802246,grad_norm: 0.9999996649485352, iteration: 132183
loss: 1.027269721031189,grad_norm: 1.0000000228368346, iteration: 132184
loss: 1.000535011291504,grad_norm: 0.9999993102942427, iteration: 132185
loss: 0.9973127245903015,grad_norm: 0.9999991683287996, iteration: 132186
loss: 1.0256637334823608,grad_norm: 0.999999015101908, iteration: 132187
loss: 0.9835529327392578,grad_norm: 0.999999163915807, iteration: 132188
loss: 0.9949049949645996,grad_norm: 0.9739084716172671, iteration: 132189
loss: 1.0347168445587158,grad_norm: 0.8072740687030374, iteration: 132190
loss: 0.9968971014022827,grad_norm: 0.9255614965464813, iteration: 132191
loss: 1.0022292137145996,grad_norm: 0.9999991747040441, iteration: 132192
loss: 1.0107885599136353,grad_norm: 0.89350738310923, iteration: 132193
loss: 1.0227489471435547,grad_norm: 0.9999990356934214, iteration: 132194
loss: 0.9923369884490967,grad_norm: 0.9656745620090722, iteration: 132195
loss: 1.0099432468414307,grad_norm: 0.999999035788984, iteration: 132196
loss: 0.9919177293777466,grad_norm: 0.999999168459913, iteration: 132197
loss: 1.027657389640808,grad_norm: 0.9165101373374794, iteration: 132198
loss: 1.017467737197876,grad_norm: 0.9999990995533952, iteration: 132199
loss: 1.012898325920105,grad_norm: 0.9999990473876866, iteration: 132200
loss: 1.0383172035217285,grad_norm: 0.99999916775853, iteration: 132201
loss: 0.9852400422096252,grad_norm: 0.9999991210239815, iteration: 132202
loss: 0.9669668674468994,grad_norm: 0.9909914336845017, iteration: 132203
loss: 1.0278321504592896,grad_norm: 0.9999990607865985, iteration: 132204
loss: 1.0078315734863281,grad_norm: 0.9999995276274747, iteration: 132205
loss: 0.9905381202697754,grad_norm: 0.9999990427276912, iteration: 132206
loss: 1.0028616189956665,grad_norm: 0.9999990153252997, iteration: 132207
loss: 1.0107454061508179,grad_norm: 0.9999990283951649, iteration: 132208
loss: 1.0263960361480713,grad_norm: 0.9999991632955173, iteration: 132209
loss: 1.0230425596237183,grad_norm: 0.9270873298770512, iteration: 132210
loss: 1.0217176675796509,grad_norm: 0.9999990813995415, iteration: 132211
loss: 0.988610029220581,grad_norm: 0.9218885727938123, iteration: 132212
loss: 1.0202540159225464,grad_norm: 0.999999089629326, iteration: 132213
loss: 0.9690977334976196,grad_norm: 0.9999990146632551, iteration: 132214
loss: 1.0308003425598145,grad_norm: 0.9999991738318426, iteration: 132215
loss: 1.0220704078674316,grad_norm: 0.9999991504179435, iteration: 132216
loss: 0.9620697498321533,grad_norm: 0.9999992367105119, iteration: 132217
loss: 0.9308578372001648,grad_norm: 0.9999991639823365, iteration: 132218
loss: 1.0571125745773315,grad_norm: 0.9999990943012489, iteration: 132219
loss: 0.9469509124755859,grad_norm: 0.8989301435110088, iteration: 132220
loss: 0.9898035526275635,grad_norm: 0.9999990438356358, iteration: 132221
loss: 1.002261757850647,grad_norm: 0.9999990670277038, iteration: 132222
loss: 0.9645465612411499,grad_norm: 0.9907028386082156, iteration: 132223
loss: 0.9972807765007019,grad_norm: 0.9999991842223913, iteration: 132224
loss: 0.9978707432746887,grad_norm: 0.8749528139998664, iteration: 132225
loss: 1.0080385208129883,grad_norm: 0.9999990220584617, iteration: 132226
loss: 1.0344574451446533,grad_norm: 0.9265739340698088, iteration: 132227
loss: 1.0406389236450195,grad_norm: 0.9999992140478544, iteration: 132228
loss: 0.9565651416778564,grad_norm: 0.9953659695193195, iteration: 132229
loss: 1.0137041807174683,grad_norm: 0.9999992702181889, iteration: 132230
loss: 1.032536268234253,grad_norm: 0.9613174076770448, iteration: 132231
loss: 0.9937163591384888,grad_norm: 0.9999991803337445, iteration: 132232
loss: 1.0066531896591187,grad_norm: 0.852914239489833, iteration: 132233
loss: 0.9632855653762817,grad_norm: 0.9999992074623287, iteration: 132234
loss: 1.017264723777771,grad_norm: 0.999999087940788, iteration: 132235
loss: 1.0366430282592773,grad_norm: 0.9768423601916202, iteration: 132236
loss: 0.9769049286842346,grad_norm: 0.8653111854880868, iteration: 132237
loss: 0.9939067959785461,grad_norm: 0.9999990879816951, iteration: 132238
loss: 0.9570231437683105,grad_norm: 0.9999990548996798, iteration: 132239
loss: 1.004929780960083,grad_norm: 0.9999994050705364, iteration: 132240
loss: 1.028692603111267,grad_norm: 0.9999991074386625, iteration: 132241
loss: 1.0077531337738037,grad_norm: 0.9751184121988552, iteration: 132242
loss: 1.0449740886688232,grad_norm: 0.9999991994767344, iteration: 132243
loss: 1.0964759588241577,grad_norm: 0.999999072301421, iteration: 132244
loss: 1.089076280593872,grad_norm: 0.9999991969631159, iteration: 132245
loss: 1.0152722597122192,grad_norm: 0.9419218003428489, iteration: 132246
loss: 0.9690784811973572,grad_norm: 0.9999991926474461, iteration: 132247
loss: 0.9989131093025208,grad_norm: 0.9999996159924547, iteration: 132248
loss: 0.9947662949562073,grad_norm: 0.9731541260489239, iteration: 132249
loss: 1.028499722480774,grad_norm: 0.9999990711994163, iteration: 132250
loss: 0.9845995306968689,grad_norm: 0.9999990099246344, iteration: 132251
loss: 1.0149511098861694,grad_norm: 0.9999991324636938, iteration: 132252
loss: 0.9786226153373718,grad_norm: 0.913583684580159, iteration: 132253
loss: 0.9632627367973328,grad_norm: 0.9999991221001248, iteration: 132254
loss: 0.9658578038215637,grad_norm: 0.9181434773216187, iteration: 132255
loss: 0.9898611307144165,grad_norm: 0.9999989961986879, iteration: 132256
loss: 0.9867576956748962,grad_norm: 0.9999989949088007, iteration: 132257
loss: 0.9989290237426758,grad_norm: 0.9999989623883467, iteration: 132258
loss: 0.9647967219352722,grad_norm: 0.9999991151514999, iteration: 132259
loss: 1.032947301864624,grad_norm: 0.9999990110291701, iteration: 132260
loss: 0.9897692799568176,grad_norm: 0.9999992410428864, iteration: 132261
loss: 0.9981095790863037,grad_norm: 0.9999990265169878, iteration: 132262
loss: 1.0003000497817993,grad_norm: 0.9999992170685889, iteration: 132263
loss: 0.9271332621574402,grad_norm: 0.9999989453206658, iteration: 132264
loss: 0.9902717471122742,grad_norm: 0.9999991189764649, iteration: 132265
loss: 1.004958987236023,grad_norm: 0.9999991889209359, iteration: 132266
loss: 0.9795876741409302,grad_norm: 0.9999990045118512, iteration: 132267
loss: 1.0071831941604614,grad_norm: 0.8675040425433884, iteration: 132268
loss: 1.0104879140853882,grad_norm: 0.8664102144474867, iteration: 132269
loss: 1.0032857656478882,grad_norm: 0.9999991009068525, iteration: 132270
loss: 0.997389018535614,grad_norm: 0.9999991521745694, iteration: 132271
loss: 0.9897053241729736,grad_norm: 0.9402148496051269, iteration: 132272
loss: 0.9790701270103455,grad_norm: 0.9999990660067742, iteration: 132273
loss: 1.0002326965332031,grad_norm: 0.9280959856699833, iteration: 132274
loss: 1.0283523797988892,grad_norm: 0.8268149720476556, iteration: 132275
loss: 0.9682360291481018,grad_norm: 0.9999991728536402, iteration: 132276
loss: 1.0211780071258545,grad_norm: 0.9999992128976234, iteration: 132277
loss: 0.9846615195274353,grad_norm: 0.9121921026161733, iteration: 132278
loss: 1.007569432258606,grad_norm: 0.9999990289188784, iteration: 132279
loss: 1.0031406879425049,grad_norm: 0.961634012606424, iteration: 132280
loss: 1.0178416967391968,grad_norm: 0.9999992825102655, iteration: 132281
loss: 1.0040223598480225,grad_norm: 0.9999992888866471, iteration: 132282
loss: 1.0192285776138306,grad_norm: 0.869045972222892, iteration: 132283
loss: 0.9972219467163086,grad_norm: 0.9448876784789935, iteration: 132284
loss: 0.9925036430358887,grad_norm: 0.9999990653783196, iteration: 132285
loss: 0.9623830318450928,grad_norm: 0.8902487366381447, iteration: 132286
loss: 1.012257695198059,grad_norm: 0.8488394567833347, iteration: 132287
loss: 0.99089115858078,grad_norm: 0.9999990809350205, iteration: 132288
loss: 1.006354570388794,grad_norm: 0.9999990983904077, iteration: 132289
loss: 0.9974908828735352,grad_norm: 0.9981541786575407, iteration: 132290
loss: 1.0244190692901611,grad_norm: 0.9999991993150706, iteration: 132291
loss: 0.989132285118103,grad_norm: 0.9840312235941195, iteration: 132292
loss: 1.023943305015564,grad_norm: 0.9999991959405726, iteration: 132293
loss: 0.9908719658851624,grad_norm: 0.9999990715085455, iteration: 132294
loss: 0.9681715369224548,grad_norm: 0.9999991580910285, iteration: 132295
loss: 0.9711880683898926,grad_norm: 0.9999990900155556, iteration: 132296
loss: 1.0416642427444458,grad_norm: 0.9999991635808474, iteration: 132297
loss: 0.9419106245040894,grad_norm: 0.9999991213623657, iteration: 132298
loss: 0.9941657781600952,grad_norm: 0.9999989254955532, iteration: 132299
loss: 0.9685045480728149,grad_norm: 0.9204631337181336, iteration: 132300
loss: 0.9570444822311401,grad_norm: 0.9999992316450562, iteration: 132301
loss: 0.9901567101478577,grad_norm: 0.8859852922112953, iteration: 132302
loss: 0.9919261932373047,grad_norm: 0.9999990297473713, iteration: 132303
loss: 0.9838707447052002,grad_norm: 0.9999992720682619, iteration: 132304
loss: 1.0244661569595337,grad_norm: 0.9985751197795536, iteration: 132305
loss: 0.9909647107124329,grad_norm: 0.9853768352179048, iteration: 132306
loss: 0.9949973225593567,grad_norm: 0.9999991136694408, iteration: 132307
loss: 1.0063652992248535,grad_norm: 0.9999991571378382, iteration: 132308
loss: 0.9991173148155212,grad_norm: 0.9999998121667418, iteration: 132309
loss: 0.9970626831054688,grad_norm: 0.9348419589319008, iteration: 132310
loss: 1.0362552404403687,grad_norm: 0.9999992735807401, iteration: 132311
loss: 1.0272403955459595,grad_norm: 0.9999991532576104, iteration: 132312
loss: 0.9811691641807556,grad_norm: 0.9999991513040476, iteration: 132313
loss: 0.9812536835670471,grad_norm: 0.8875107949482329, iteration: 132314
loss: 1.0887020826339722,grad_norm: 0.9999991296249282, iteration: 132315
loss: 1.0343016386032104,grad_norm: 0.9820964732024671, iteration: 132316
loss: 1.0056883096694946,grad_norm: 0.9999991006688785, iteration: 132317
loss: 1.0148980617523193,grad_norm: 0.9999991766134039, iteration: 132318
loss: 1.022558331489563,grad_norm: 0.9999991120095524, iteration: 132319
loss: 0.9957875609397888,grad_norm: 0.9999991764045757, iteration: 132320
loss: 0.9956294298171997,grad_norm: 0.9999993323196487, iteration: 132321
loss: 0.9855315685272217,grad_norm: 0.9867816854187845, iteration: 132322
loss: 1.0044951438903809,grad_norm: 0.9999990721255605, iteration: 132323
loss: 1.0237656831741333,grad_norm: 0.9825144667152511, iteration: 132324
loss: 1.0353829860687256,grad_norm: 0.9999994802173982, iteration: 132325
loss: 1.0104666948318481,grad_norm: 0.9438523320957369, iteration: 132326
loss: 0.976140558719635,grad_norm: 0.9596357600818436, iteration: 132327
loss: 1.0040100812911987,grad_norm: 0.9631643058189443, iteration: 132328
loss: 0.9200648069381714,grad_norm: 0.999999124684151, iteration: 132329
loss: 0.9837483167648315,grad_norm: 0.9999991156941688, iteration: 132330
loss: 1.0325127840042114,grad_norm: 0.9999991267780238, iteration: 132331
loss: 0.9975752234458923,grad_norm: 0.999999007007177, iteration: 132332
loss: 0.9755734801292419,grad_norm: 0.9999990914211664, iteration: 132333
loss: 1.0312672853469849,grad_norm: 0.9999991410232862, iteration: 132334
loss: 1.0260529518127441,grad_norm: 0.9999991474696563, iteration: 132335
loss: 0.9942694306373596,grad_norm: 0.9999990915038215, iteration: 132336
loss: 1.0197986364364624,grad_norm: 0.9999991084540983, iteration: 132337
loss: 0.9686065912246704,grad_norm: 0.9999993890552186, iteration: 132338
loss: 0.9643117189407349,grad_norm: 0.9427730602648038, iteration: 132339
loss: 1.026526689529419,grad_norm: 0.8318509321506159, iteration: 132340
loss: 1.0084691047668457,grad_norm: 0.9999992387944451, iteration: 132341
loss: 1.008596658706665,grad_norm: 0.999999255043458, iteration: 132342
loss: 1.0025830268859863,grad_norm: 0.9999991392792554, iteration: 132343
loss: 1.0309772491455078,grad_norm: 0.8717295248242226, iteration: 132344
loss: 1.0197786092758179,grad_norm: 0.9999992685403477, iteration: 132345
loss: 1.020959734916687,grad_norm: 0.9999993479520216, iteration: 132346
loss: 1.0223346948623657,grad_norm: 0.9999990389656301, iteration: 132347
loss: 0.9986406564712524,grad_norm: 0.9606852844444481, iteration: 132348
loss: 0.9998899102210999,grad_norm: 0.9999989963088669, iteration: 132349
loss: 1.0128278732299805,grad_norm: 0.9999989601716568, iteration: 132350
loss: 1.006856083869934,grad_norm: 0.9999992056860039, iteration: 132351
loss: 0.9926044344902039,grad_norm: 0.9999990393280619, iteration: 132352
loss: 0.9894105792045593,grad_norm: 0.9999991890945419, iteration: 132353
loss: 1.0107942819595337,grad_norm: 0.9999990064661429, iteration: 132354
loss: 0.9910917282104492,grad_norm: 0.9999991545704835, iteration: 132355
loss: 0.9714778065681458,grad_norm: 0.9999991697828416, iteration: 132356
loss: 1.001511812210083,grad_norm: 0.9999993467058209, iteration: 132357
loss: 0.9733069539070129,grad_norm: 0.9993252733871089, iteration: 132358
loss: 1.0059115886688232,grad_norm: 0.9999989385241758, iteration: 132359
loss: 1.0010305643081665,grad_norm: 0.879259884218216, iteration: 132360
loss: 0.9995876550674438,grad_norm: 0.968454402010515, iteration: 132361
loss: 0.9958614110946655,grad_norm: 0.9999988958268882, iteration: 132362
loss: 0.9568438529968262,grad_norm: 0.9409340658472701, iteration: 132363
loss: 1.0210145711898804,grad_norm: 0.9999990787743507, iteration: 132364
loss: 0.9961800575256348,grad_norm: 0.9411748711107908, iteration: 132365
loss: 1.0127593278884888,grad_norm: 0.999999208525063, iteration: 132366
loss: 0.976628839969635,grad_norm: 0.9374349383703724, iteration: 132367
loss: 0.9646663665771484,grad_norm: 0.9450183849360446, iteration: 132368
loss: 0.9851047992706299,grad_norm: 0.999999071791968, iteration: 132369
loss: 0.9946733713150024,grad_norm: 0.9999991445702678, iteration: 132370
loss: 1.0473130941390991,grad_norm: 0.9999990662979664, iteration: 132371
loss: 1.0050811767578125,grad_norm: 0.9288600911139178, iteration: 132372
loss: 1.015331745147705,grad_norm: 0.9999992300268998, iteration: 132373
loss: 1.0030885934829712,grad_norm: 0.9999990966831274, iteration: 132374
loss: 0.9688124656677246,grad_norm: 0.9999990292870676, iteration: 132375
loss: 0.9995288252830505,grad_norm: 0.999998971362086, iteration: 132376
loss: 1.0654792785644531,grad_norm: 0.9999992102743323, iteration: 132377
loss: 1.0287301540374756,grad_norm: 0.9566813483867257, iteration: 132378
loss: 1.035620927810669,grad_norm: 0.9369002450365762, iteration: 132379
loss: 1.007001280784607,grad_norm: 0.999999052085182, iteration: 132380
loss: 1.0068175792694092,grad_norm: 0.9999990792028408, iteration: 132381
loss: 1.0057954788208008,grad_norm: 0.9325984514442284, iteration: 132382
loss: 1.0060960054397583,grad_norm: 0.9447815767270892, iteration: 132383
loss: 0.9513617157936096,grad_norm: 0.9999989779829901, iteration: 132384
loss: 1.0081369876861572,grad_norm: 0.9999989475159279, iteration: 132385
loss: 1.0075736045837402,grad_norm: 0.9999992025584417, iteration: 132386
loss: 1.0240905284881592,grad_norm: 0.9999993469997246, iteration: 132387
loss: 1.029729962348938,grad_norm: 0.9731433832631613, iteration: 132388
loss: 0.9840097427368164,grad_norm: 0.999999156764226, iteration: 132389
loss: 0.9995505809783936,grad_norm: 0.9072981166737475, iteration: 132390
loss: 0.9657133221626282,grad_norm: 0.9777934878750969, iteration: 132391
loss: 0.9850237369537354,grad_norm: 0.9999990549272805, iteration: 132392
loss: 0.9846641421318054,grad_norm: 0.9462781770926438, iteration: 132393
loss: 0.9745253324508667,grad_norm: 0.9375820093219303, iteration: 132394
loss: 0.9783506393432617,grad_norm: 0.9999990249278479, iteration: 132395
loss: 1.0108262300491333,grad_norm: 0.9999991593387594, iteration: 132396
loss: 0.9623157978057861,grad_norm: 0.9999992223812881, iteration: 132397
loss: 1.0303717851638794,grad_norm: 0.9772440727608239, iteration: 132398
loss: 1.043404459953308,grad_norm: 0.9999990739762874, iteration: 132399
loss: 0.9868366122245789,grad_norm: 0.8894921726525087, iteration: 132400
loss: 1.0100497007369995,grad_norm: 0.9999990314465294, iteration: 132401
loss: 0.9739929437637329,grad_norm: 0.9999990388094261, iteration: 132402
loss: 0.9780337810516357,grad_norm: 0.9899320855523317, iteration: 132403
loss: 1.0002732276916504,grad_norm: 0.9999990321223606, iteration: 132404
loss: 1.041001796722412,grad_norm: 0.9457673440709065, iteration: 132405
loss: 1.0119726657867432,grad_norm: 0.9999990304676067, iteration: 132406
loss: 0.9799871444702148,grad_norm: 0.9481544755872086, iteration: 132407
loss: 1.007775902748108,grad_norm: 0.9999991781693957, iteration: 132408
loss: 0.9812562465667725,grad_norm: 0.9999989936039612, iteration: 132409
loss: 0.966931164264679,grad_norm: 0.9999994369651227, iteration: 132410
loss: 0.9866666197776794,grad_norm: 0.9999992857718153, iteration: 132411
loss: 1.0001702308654785,grad_norm: 0.9999991286236631, iteration: 132412
loss: 1.037246584892273,grad_norm: 0.9999991307386128, iteration: 132413
loss: 0.9735114574432373,grad_norm: 0.987346945889636, iteration: 132414
loss: 1.0233854055404663,grad_norm: 0.9999992721920081, iteration: 132415
loss: 0.976227343082428,grad_norm: 0.9760907732339891, iteration: 132416
loss: 1.0250240564346313,grad_norm: 0.9987426911731042, iteration: 132417
loss: 0.9988358020782471,grad_norm: 0.9608947404202643, iteration: 132418
loss: 1.0990217924118042,grad_norm: 0.9738954102798681, iteration: 132419
loss: 1.0254079103469849,grad_norm: 0.9999991938233956, iteration: 132420
loss: 1.0322513580322266,grad_norm: 0.9465505977596584, iteration: 132421
loss: 0.9983125329017639,grad_norm: 0.9999990375241482, iteration: 132422
loss: 1.0158122777938843,grad_norm: 0.999999726420147, iteration: 132423
loss: 0.9952805638313293,grad_norm: 0.9370446091037546, iteration: 132424
loss: 1.034088134765625,grad_norm: 0.9407793277291849, iteration: 132425
loss: 0.9802076816558838,grad_norm: 0.9999990960054018, iteration: 132426
loss: 0.9889271259307861,grad_norm: 0.9485748175307985, iteration: 132427
loss: 0.9863762855529785,grad_norm: 0.9999993077201454, iteration: 132428
loss: 0.9844112396240234,grad_norm: 0.8797743227851582, iteration: 132429
loss: 0.9857264757156372,grad_norm: 0.8510838545980245, iteration: 132430
loss: 1.008611798286438,grad_norm: 0.9999992371610505, iteration: 132431
loss: 1.0010031461715698,grad_norm: 0.9999997488289938, iteration: 132432
loss: 1.0257022380828857,grad_norm: 0.9999992129235963, iteration: 132433
loss: 0.9862231612205505,grad_norm: 0.9417252856431519, iteration: 132434
loss: 1.0152868032455444,grad_norm: 0.8532130368649672, iteration: 132435
loss: 0.9688869714736938,grad_norm: 0.9637233803624399, iteration: 132436
loss: 0.9890472292900085,grad_norm: 0.9999991947192264, iteration: 132437
loss: 1.0142722129821777,grad_norm: 0.999999135146325, iteration: 132438
loss: 1.0319920778274536,grad_norm: 0.9331143459182011, iteration: 132439
loss: 0.9720040559768677,grad_norm: 0.999999075991979, iteration: 132440
loss: 0.9947761297225952,grad_norm: 0.8614666022989703, iteration: 132441
loss: 0.9818751215934753,grad_norm: 0.9626429326772286, iteration: 132442
loss: 1.0191107988357544,grad_norm: 0.9999991141532013, iteration: 132443
loss: 0.9761117100715637,grad_norm: 0.9999991955899777, iteration: 132444
loss: 0.9758927226066589,grad_norm: 0.8739898174284628, iteration: 132445
loss: 1.0060380697250366,grad_norm: 0.9999992823547251, iteration: 132446
loss: 1.028407096862793,grad_norm: 0.9999990779473338, iteration: 132447
loss: 1.0028427839279175,grad_norm: 0.9999990562878256, iteration: 132448
loss: 1.0019781589508057,grad_norm: 0.9486039825863807, iteration: 132449
loss: 0.9820969700813293,grad_norm: 0.7986429527294908, iteration: 132450
loss: 0.9924660921096802,grad_norm: 0.9999991170734571, iteration: 132451
loss: 1.0051594972610474,grad_norm: 0.9999991432056441, iteration: 132452
loss: 0.9911016821861267,grad_norm: 0.999998916235575, iteration: 132453
loss: 1.0026555061340332,grad_norm: 0.9369660866002844, iteration: 132454
loss: 0.9957654476165771,grad_norm: 0.90039056769519, iteration: 132455
loss: 0.9931929707527161,grad_norm: 0.9262305032584789, iteration: 132456
loss: 0.9716306924819946,grad_norm: 0.9999990616503388, iteration: 132457
loss: 0.9837389588356018,grad_norm: 0.9999991123502381, iteration: 132458
loss: 1.0109509229660034,grad_norm: 0.963052603752093, iteration: 132459
loss: 0.9786787033081055,grad_norm: 0.9412102947103742, iteration: 132460
loss: 0.986926794052124,grad_norm: 0.9999989226274478, iteration: 132461
loss: 0.9835418462753296,grad_norm: 0.9999991174728641, iteration: 132462
loss: 1.022286057472229,grad_norm: 0.9860886170088161, iteration: 132463
loss: 1.0556381940841675,grad_norm: 0.9867706039408083, iteration: 132464
loss: 1.0130287408828735,grad_norm: 0.9554399783363363, iteration: 132465
loss: 1.0271776914596558,grad_norm: 0.9999991267444527, iteration: 132466
loss: 1.010632038116455,grad_norm: 0.963632782966202, iteration: 132467
loss: 1.0197975635528564,grad_norm: 0.999999111233373, iteration: 132468
loss: 1.0013220310211182,grad_norm: 0.9999992580921728, iteration: 132469
loss: 0.9886670708656311,grad_norm: 0.9999991416432176, iteration: 132470
loss: 1.0059075355529785,grad_norm: 0.8579840423828033, iteration: 132471
loss: 1.0385875701904297,grad_norm: 0.9999991672460172, iteration: 132472
loss: 1.0106306076049805,grad_norm: 0.8280507833849924, iteration: 132473
loss: 0.9962465167045593,grad_norm: 0.9096102776440613, iteration: 132474
loss: 1.029523491859436,grad_norm: 0.9564150740662413, iteration: 132475
loss: 1.0265648365020752,grad_norm: 0.9999990193575293, iteration: 132476
loss: 1.0166921615600586,grad_norm: 0.9853742106515273, iteration: 132477
loss: 1.027044415473938,grad_norm: 0.9999992831893141, iteration: 132478
loss: 1.0878760814666748,grad_norm: 0.9999993267502, iteration: 132479
loss: 0.9639299511909485,grad_norm: 0.9056032190019211, iteration: 132480
loss: 0.9938769340515137,grad_norm: 0.8409311015154342, iteration: 132481
loss: 1.0091593265533447,grad_norm: 0.9999991941720686, iteration: 132482
loss: 1.0425130128860474,grad_norm: 0.9999991871299881, iteration: 132483
loss: 0.9875703454017639,grad_norm: 0.9999991102766244, iteration: 132484
loss: 1.0097789764404297,grad_norm: 0.9999991522787991, iteration: 132485
loss: 1.0073097944259644,grad_norm: 0.9999993650863841, iteration: 132486
loss: 0.9746325016021729,grad_norm: 0.9999990094335945, iteration: 132487
loss: 1.0003366470336914,grad_norm: 0.9999991075609329, iteration: 132488
loss: 0.9724913835525513,grad_norm: 0.8506695684898755, iteration: 132489
loss: 1.0725332498550415,grad_norm: 0.9999994539552846, iteration: 132490
loss: 1.0074104070663452,grad_norm: 0.999999049726989, iteration: 132491
loss: 0.9860708117485046,grad_norm: 0.9999991416124142, iteration: 132492
loss: 0.975981593132019,grad_norm: 0.9614015145350047, iteration: 132493
loss: 0.994415819644928,grad_norm: 0.9423626885973309, iteration: 132494
loss: 0.9975862503051758,grad_norm: 0.8460138884007306, iteration: 132495
loss: 1.0154051780700684,grad_norm: 0.9999991864334968, iteration: 132496
loss: 1.0072951316833496,grad_norm: 0.8555067941711813, iteration: 132497
loss: 1.00858736038208,grad_norm: 0.9280024533275852, iteration: 132498
loss: 0.9687331318855286,grad_norm: 0.9999991525574989, iteration: 132499
loss: 0.9555290341377258,grad_norm: 0.9999992372429651, iteration: 132500
loss: 1.0238202810287476,grad_norm: 0.9453226154156287, iteration: 132501
loss: 1.0303928852081299,grad_norm: 0.9999991368870091, iteration: 132502
loss: 0.9574321508407593,grad_norm: 0.9799318354989994, iteration: 132503
loss: 0.979417622089386,grad_norm: 0.9999991830764332, iteration: 132504
loss: 0.9839906096458435,grad_norm: 0.825734282368354, iteration: 132505
loss: 1.0001314878463745,grad_norm: 0.9999993403025602, iteration: 132506
loss: 0.9981862902641296,grad_norm: 0.9999996553172545, iteration: 132507
loss: 1.0221161842346191,grad_norm: 0.9999992663481332, iteration: 132508
loss: 0.9853261113166809,grad_norm: 0.9999990881065172, iteration: 132509
loss: 1.0108044147491455,grad_norm: 0.880623779873527, iteration: 132510
loss: 1.0077643394470215,grad_norm: 0.9999991946635196, iteration: 132511
loss: 0.9706863760948181,grad_norm: 0.9999990354938152, iteration: 132512
loss: 0.99383944272995,grad_norm: 0.9510177161475648, iteration: 132513
loss: 0.9704328179359436,grad_norm: 0.9999994800098349, iteration: 132514
loss: 1.120398759841919,grad_norm: 0.9999995505381369, iteration: 132515
loss: 1.0039623975753784,grad_norm: 0.9429356871522975, iteration: 132516
loss: 1.0116944313049316,grad_norm: 0.9450250664055321, iteration: 132517
loss: 0.9521782398223877,grad_norm: 0.9838453483113976, iteration: 132518
loss: 0.9770067930221558,grad_norm: 0.8830907419113467, iteration: 132519
loss: 1.0215071439743042,grad_norm: 0.941707833724099, iteration: 132520
loss: 1.0319610834121704,grad_norm: 0.9626648692689224, iteration: 132521
loss: 1.0408188104629517,grad_norm: 0.9999992691135644, iteration: 132522
loss: 0.9977115988731384,grad_norm: 0.8319513432258864, iteration: 132523
loss: 1.0031782388687134,grad_norm: 0.9999993074475192, iteration: 132524
loss: 0.9986567497253418,grad_norm: 0.9765411728778626, iteration: 132525
loss: 0.9972999095916748,grad_norm: 0.9999992026476415, iteration: 132526
loss: 1.0369465351104736,grad_norm: 0.9999991080252725, iteration: 132527
loss: 1.0062357187271118,grad_norm: 0.999998928869649, iteration: 132528
loss: 1.0279796123504639,grad_norm: 0.9541050416185174, iteration: 132529
loss: 0.9830663800239563,grad_norm: 0.9216708459335958, iteration: 132530
loss: 1.009726643562317,grad_norm: 0.9999991898137797, iteration: 132531
loss: 1.0111989974975586,grad_norm: 0.9999991269083464, iteration: 132532
loss: 1.0230392217636108,grad_norm: 0.9478057008658894, iteration: 132533
loss: 1.00655198097229,grad_norm: 0.9551276926411996, iteration: 132534
loss: 1.0165992975234985,grad_norm: 0.9889119840091003, iteration: 132535
loss: 1.001818060874939,grad_norm: 0.9578908612369516, iteration: 132536
loss: 0.9816140532493591,grad_norm: 0.8120718496373848, iteration: 132537
loss: 0.9953079223632812,grad_norm: 0.9873620572497072, iteration: 132538
loss: 1.023672103881836,grad_norm: 0.9297887744141297, iteration: 132539
loss: 0.9729938507080078,grad_norm: 0.9999993248943876, iteration: 132540
loss: 0.9690384864807129,grad_norm: 0.9999989443976063, iteration: 132541
loss: 1.0356260538101196,grad_norm: 0.9999990790282933, iteration: 132542
loss: 1.1822551488876343,grad_norm: 0.999999256117098, iteration: 132543
loss: 1.0361775159835815,grad_norm: 0.9999992496977506, iteration: 132544
loss: 0.9811351299285889,grad_norm: 0.9309170644435478, iteration: 132545
loss: 1.002388834953308,grad_norm: 0.9646441481847499, iteration: 132546
loss: 0.9948510527610779,grad_norm: 0.9999996787329835, iteration: 132547
loss: 0.9935601949691772,grad_norm: 0.9999990972921873, iteration: 132548
loss: 0.9717867970466614,grad_norm: 0.99999906905276, iteration: 132549
loss: 0.9847016930580139,grad_norm: 0.97522655743244, iteration: 132550
loss: 1.0266706943511963,grad_norm: 0.9999990826498348, iteration: 132551
loss: 0.9656418561935425,grad_norm: 0.7681147744261966, iteration: 132552
loss: 1.0154659748077393,grad_norm: 0.9999991358111036, iteration: 132553
loss: 1.0310652256011963,grad_norm: 0.9214810383687208, iteration: 132554
loss: 1.0044175386428833,grad_norm: 0.9561336070669606, iteration: 132555
loss: 1.0089025497436523,grad_norm: 0.9999991375465612, iteration: 132556
loss: 1.016619086265564,grad_norm: 0.9999990661617714, iteration: 132557
loss: 0.9667941927909851,grad_norm: 0.999999105065599, iteration: 132558
loss: 0.973423182964325,grad_norm: 0.8793752168296252, iteration: 132559
loss: 1.0059417486190796,grad_norm: 0.8443209396201429, iteration: 132560
loss: 0.9620482921600342,grad_norm: 0.9999989996341809, iteration: 132561
loss: 0.9765619039535522,grad_norm: 0.9999992351363544, iteration: 132562
loss: 0.9938258528709412,grad_norm: 0.9125976497450197, iteration: 132563
loss: 1.0251330137252808,grad_norm: 0.993026267618214, iteration: 132564
loss: 1.0174603462219238,grad_norm: 0.9999991468678824, iteration: 132565
loss: 0.9994953870773315,grad_norm: 0.885009996558698, iteration: 132566
loss: 0.9942467212677002,grad_norm: 0.9318151270450757, iteration: 132567
loss: 1.0227280855178833,grad_norm: 0.9999991045885384, iteration: 132568
loss: 1.0192327499389648,grad_norm: 0.9999992946166917, iteration: 132569
loss: 1.0634307861328125,grad_norm: 0.9999997082786356, iteration: 132570
loss: 1.0027211904525757,grad_norm: 0.9815842476381201, iteration: 132571
loss: 1.0037308931350708,grad_norm: 0.9222569308962365, iteration: 132572
loss: 1.0127813816070557,grad_norm: 0.9999992109639629, iteration: 132573
loss: 0.983839213848114,grad_norm: 0.9999991384275982, iteration: 132574
loss: 0.9868932962417603,grad_norm: 0.9039867294145596, iteration: 132575
loss: 0.9788153171539307,grad_norm: 0.9999990895146076, iteration: 132576
loss: 0.9740781784057617,grad_norm: 0.8923902515835355, iteration: 132577
loss: 1.0127238035202026,grad_norm: 0.9743960368755896, iteration: 132578
loss: 0.9970446228981018,grad_norm: 0.9999989126273177, iteration: 132579
loss: 1.0079371929168701,grad_norm: 0.9887557188983377, iteration: 132580
loss: 1.0519100427627563,grad_norm: 0.999999230582316, iteration: 132581
loss: 0.9816604852676392,grad_norm: 0.9999991058006918, iteration: 132582
loss: 1.0138906240463257,grad_norm: 0.9999991138551299, iteration: 132583
loss: 1.0130054950714111,grad_norm: 0.9999989148884686, iteration: 132584
loss: 0.9871762990951538,grad_norm: 0.9999991550804669, iteration: 132585
loss: 1.0091263055801392,grad_norm: 0.9317091959670462, iteration: 132586
loss: 0.9884158968925476,grad_norm: 0.8950824597463339, iteration: 132587
loss: 1.0896668434143066,grad_norm: 0.915177529136612, iteration: 132588
loss: 1.0061415433883667,grad_norm: 0.9999991897878221, iteration: 132589
loss: 0.989290714263916,grad_norm: 0.9999992466616311, iteration: 132590
loss: 0.978484034538269,grad_norm: 0.9774853795620511, iteration: 132591
loss: 0.9903267621994019,grad_norm: 0.9906503292841751, iteration: 132592
loss: 1.0018360614776611,grad_norm: 0.9999991387895457, iteration: 132593
loss: 1.0153182744979858,grad_norm: 0.824204965562079, iteration: 132594
loss: 0.9835230708122253,grad_norm: 0.9886319218008288, iteration: 132595
loss: 1.0030096769332886,grad_norm: 0.999999081933874, iteration: 132596
loss: 0.9972796440124512,grad_norm: 0.9999990278583611, iteration: 132597
loss: 0.9995117783546448,grad_norm: 0.9999990667308868, iteration: 132598
loss: 1.0120408535003662,grad_norm: 0.9999991306564133, iteration: 132599
loss: 1.050339698791504,grad_norm: 0.9999992424622752, iteration: 132600
loss: 0.9838372468948364,grad_norm: 0.9907472869667695, iteration: 132601
loss: 0.9961063861846924,grad_norm: 0.9999990627345424, iteration: 132602
loss: 1.0156829357147217,grad_norm: 0.9216042540283471, iteration: 132603
loss: 1.0160291194915771,grad_norm: 0.9999991807250512, iteration: 132604
loss: 0.9884414076805115,grad_norm: 0.8622157499341732, iteration: 132605
loss: 1.0122175216674805,grad_norm: 0.9999989574162933, iteration: 132606
loss: 1.018536925315857,grad_norm: 0.9999992049952047, iteration: 132607
loss: 0.980604887008667,grad_norm: 0.9999992113790667, iteration: 132608
loss: 0.9935068488121033,grad_norm: 0.9495362809520489, iteration: 132609
loss: 1.0575144290924072,grad_norm: 0.9999991921212636, iteration: 132610
loss: 0.9742693901062012,grad_norm: 0.983486868062606, iteration: 132611
loss: 1.0513536930084229,grad_norm: 0.9999992180053778, iteration: 132612
loss: 1.0000439882278442,grad_norm: 0.8679527776242432, iteration: 132613
loss: 1.0820307731628418,grad_norm: 0.9999992327472516, iteration: 132614
loss: 0.9679580926895142,grad_norm: 0.9420625577455883, iteration: 132615
loss: 0.9944608807563782,grad_norm: 0.9525150809866788, iteration: 132616
loss: 1.0003818273544312,grad_norm: 0.9999989995533015, iteration: 132617
loss: 0.9989566206932068,grad_norm: 0.8739886540156652, iteration: 132618
loss: 1.0109232664108276,grad_norm: 0.9707711558473456, iteration: 132619
loss: 1.0182679891586304,grad_norm: 0.9999995712997283, iteration: 132620
loss: 0.985585629940033,grad_norm: 0.8475764341188263, iteration: 132621
loss: 0.9965745210647583,grad_norm: 0.9999990178867794, iteration: 132622
loss: 1.0116767883300781,grad_norm: 0.9999992469836436, iteration: 132623
loss: 0.9911447167396545,grad_norm: 0.999999118106528, iteration: 132624
loss: 0.9986163377761841,grad_norm: 0.9999990615135106, iteration: 132625
loss: 0.9815767407417297,grad_norm: 0.9242518183205519, iteration: 132626
loss: 0.9744446277618408,grad_norm: 0.9999990452692857, iteration: 132627
loss: 0.9934672713279724,grad_norm: 0.9999989305824274, iteration: 132628
loss: 1.0135483741760254,grad_norm: 0.9999991285233765, iteration: 132629
loss: 1.0179089307785034,grad_norm: 0.9999990379446871, iteration: 132630
loss: 0.9655959606170654,grad_norm: 0.9999990430348876, iteration: 132631
loss: 0.9827783107757568,grad_norm: 0.9999999552218634, iteration: 132632
loss: 0.9898075461387634,grad_norm: 0.9999992146266279, iteration: 132633
loss: 0.9702281355857849,grad_norm: 0.948188145490134, iteration: 132634
loss: 1.0185966491699219,grad_norm: 0.999999013248149, iteration: 132635
loss: 1.0301679372787476,grad_norm: 0.9571188917416178, iteration: 132636
loss: 0.9868733286857605,grad_norm: 0.9859281351666628, iteration: 132637
loss: 1.0069414377212524,grad_norm: 0.9999991718837125, iteration: 132638
loss: 0.9549168944358826,grad_norm: 0.9873216062414742, iteration: 132639
loss: 1.0149286985397339,grad_norm: 0.9999990139867708, iteration: 132640
loss: 1.0219701528549194,grad_norm: 0.9141406561548144, iteration: 132641
loss: 0.9694782495498657,grad_norm: 0.9999992903219559, iteration: 132642
loss: 1.005691409111023,grad_norm: 0.9999990808994793, iteration: 132643
loss: 1.0036360025405884,grad_norm: 0.9999991359465982, iteration: 132644
loss: 0.9634609818458557,grad_norm: 0.9057620537329514, iteration: 132645
loss: 1.02707040309906,grad_norm: 0.9999990263910987, iteration: 132646
loss: 1.0162540674209595,grad_norm: 0.9999991303049129, iteration: 132647
loss: 0.972566545009613,grad_norm: 0.9999992419851702, iteration: 132648
loss: 1.00093412399292,grad_norm: 0.9999992105008048, iteration: 132649
loss: 0.9874267578125,grad_norm: 0.9244864767109275, iteration: 132650
loss: 0.9874285459518433,grad_norm: 0.8194442503194659, iteration: 132651
loss: 0.9980763792991638,grad_norm: 0.9564218344073468, iteration: 132652
loss: 0.9952216744422913,grad_norm: 0.9999991725194205, iteration: 132653
loss: 0.9839099645614624,grad_norm: 0.9999990883791671, iteration: 132654
loss: 0.9866006374359131,grad_norm: 0.9999991932990736, iteration: 132655
loss: 1.0290104150772095,grad_norm: 0.9999990432943865, iteration: 132656
loss: 1.0009444952011108,grad_norm: 0.9999991817596348, iteration: 132657
loss: 1.0494706630706787,grad_norm: 0.9999993172247015, iteration: 132658
loss: 1.0089497566223145,grad_norm: 0.9999990362943281, iteration: 132659
loss: 1.0037841796875,grad_norm: 0.9999993208850574, iteration: 132660
loss: 1.0425914525985718,grad_norm: 0.9999991074105136, iteration: 132661
loss: 0.9822748303413391,grad_norm: 0.9850482803147907, iteration: 132662
loss: 1.003180980682373,grad_norm: 0.9999992352153997, iteration: 132663
loss: 1.0734620094299316,grad_norm: 0.9999991645720991, iteration: 132664
loss: 1.0568259954452515,grad_norm: 0.9999991531972662, iteration: 132665
loss: 1.001442551612854,grad_norm: 0.9734383605023001, iteration: 132666
loss: 0.9694149494171143,grad_norm: 0.9999990793264556, iteration: 132667
loss: 1.0062153339385986,grad_norm: 0.9999990987775836, iteration: 132668
loss: 1.1019046306610107,grad_norm: 0.9999995847538375, iteration: 132669
loss: 0.9935512542724609,grad_norm: 0.9999989794897217, iteration: 132670
loss: 0.9932108521461487,grad_norm: 0.9999990096038103, iteration: 132671
loss: 0.9624620079994202,grad_norm: 0.9999990059571277, iteration: 132672
loss: 0.9827603101730347,grad_norm: 0.9999991172531716, iteration: 132673
loss: 0.9838556051254272,grad_norm: 0.9822077541252051, iteration: 132674
loss: 1.0151023864746094,grad_norm: 0.9015572623294498, iteration: 132675
loss: 1.0177077054977417,grad_norm: 0.7684811603315106, iteration: 132676
loss: 1.028104305267334,grad_norm: 0.9868467409593049, iteration: 132677
loss: 0.981836199760437,grad_norm: 0.855926787589673, iteration: 132678
loss: 0.9807332754135132,grad_norm: 0.9999991726971929, iteration: 132679
loss: 0.9970407485961914,grad_norm: 0.9999990782962827, iteration: 132680
loss: 1.01986825466156,grad_norm: 0.9386947468274889, iteration: 132681
loss: 0.9893138408660889,grad_norm: 0.9464179480097438, iteration: 132682
loss: 0.9766823649406433,grad_norm: 0.9999992874404734, iteration: 132683
loss: 1.040349006652832,grad_norm: 0.9999989393483509, iteration: 132684
loss: 1.0145485401153564,grad_norm: 0.9999991076943012, iteration: 132685
loss: 1.0251284837722778,grad_norm: 0.9999999009146023, iteration: 132686
loss: 0.948335587978363,grad_norm: 0.9669444385025815, iteration: 132687
loss: 0.9872767925262451,grad_norm: 0.9999991573934347, iteration: 132688
loss: 1.0302926301956177,grad_norm: 0.9956210296781687, iteration: 132689
loss: 0.981739342212677,grad_norm: 0.8817303227148716, iteration: 132690
loss: 1.0016437768936157,grad_norm: 0.9999991770448684, iteration: 132691
loss: 1.0041574239730835,grad_norm: 0.9792230089723021, iteration: 132692
loss: 0.9815759658813477,grad_norm: 0.9988429477375638, iteration: 132693
loss: 1.0035709142684937,grad_norm: 0.9248258255963514, iteration: 132694
loss: 0.9962642192840576,grad_norm: 0.9999990267961926, iteration: 132695
loss: 1.0380878448486328,grad_norm: 0.8908472838058195, iteration: 132696
loss: 0.9773855209350586,grad_norm: 0.999999212158543, iteration: 132697
loss: 1.00773024559021,grad_norm: 0.9999990663428029, iteration: 132698
loss: 1.0302287340164185,grad_norm: 0.9728349256048816, iteration: 132699
loss: 0.952991247177124,grad_norm: 0.9999991881639214, iteration: 132700
loss: 1.019411325454712,grad_norm: 0.9453956798157404, iteration: 132701
loss: 1.0082803964614868,grad_norm: 0.9999992048141324, iteration: 132702
loss: 1.0130153894424438,grad_norm: 0.9999993178506295, iteration: 132703
loss: 0.9879463315010071,grad_norm: 0.9999992740431431, iteration: 132704
loss: 1.0148210525512695,grad_norm: 0.9796387810807754, iteration: 132705
loss: 1.00349760055542,grad_norm: 0.88431752077022, iteration: 132706
loss: 1.020909309387207,grad_norm: 0.8867170842079382, iteration: 132707
loss: 1.0210014581680298,grad_norm: 0.9957347147308133, iteration: 132708
loss: 1.0018205642700195,grad_norm: 0.9471299352362381, iteration: 132709
loss: 0.9892850518226624,grad_norm: 0.9783832957297589, iteration: 132710
loss: 1.0035299062728882,grad_norm: 0.9999991384688938, iteration: 132711
loss: 0.9719295501708984,grad_norm: 0.9999991631760985, iteration: 132712
loss: 0.9665321707725525,grad_norm: 0.9999990518234411, iteration: 132713
loss: 0.9894691109657288,grad_norm: 0.9917057408515297, iteration: 132714
loss: 0.9742799997329712,grad_norm: 0.9603376915811981, iteration: 132715
loss: 1.0748333930969238,grad_norm: 0.9999991706568955, iteration: 132716
loss: 1.0224145650863647,grad_norm: 0.999999303112471, iteration: 132717
loss: 1.0095806121826172,grad_norm: 0.9999992813950641, iteration: 132718
loss: 0.9613807797431946,grad_norm: 0.9999992140306959, iteration: 132719
loss: 0.9840648174285889,grad_norm: 0.9822053061548058, iteration: 132720
loss: 1.0151150226593018,grad_norm: 0.9870905254725834, iteration: 132721
loss: 1.0455782413482666,grad_norm: 0.9567003762808867, iteration: 132722
loss: 0.9529752135276794,grad_norm: 0.9189804535813371, iteration: 132723
loss: 1.0046477317810059,grad_norm: 0.8990469185624509, iteration: 132724
loss: 0.9737526774406433,grad_norm: 0.9999991182602498, iteration: 132725
loss: 0.9743700623512268,grad_norm: 0.9999990739437612, iteration: 132726
loss: 0.9851577877998352,grad_norm: 0.9999992488150072, iteration: 132727
loss: 0.9599292874336243,grad_norm: 0.9999990428380551, iteration: 132728
loss: 0.9812372326850891,grad_norm: 0.9999992917203444, iteration: 132729
loss: 1.0014941692352295,grad_norm: 0.8559757155892047, iteration: 132730
loss: 0.9873591065406799,grad_norm: 0.9999991005117864, iteration: 132731
loss: 0.9736385345458984,grad_norm: 0.9999992547405758, iteration: 132732
loss: 1.0062888860702515,grad_norm: 0.8717931858239849, iteration: 132733
loss: 0.9763163924217224,grad_norm: 0.9639710675371714, iteration: 132734
loss: 0.9875906109809875,grad_norm: 0.9999989674583093, iteration: 132735
loss: 1.0146777629852295,grad_norm: 0.9999990865385014, iteration: 132736
loss: 0.9715211391448975,grad_norm: 0.9999990849252208, iteration: 132737
loss: 0.9702621698379517,grad_norm: 0.999999152652017, iteration: 132738
loss: 1.0209641456604004,grad_norm: 0.9403324011562947, iteration: 132739
loss: 1.0354905128479004,grad_norm: 0.9999990823775694, iteration: 132740
loss: 0.983671247959137,grad_norm: 0.9999993424223662, iteration: 132741
loss: 0.996247410774231,grad_norm: 0.9999992454102549, iteration: 132742
loss: 1.0113255977630615,grad_norm: 0.9999992596771734, iteration: 132743
loss: 1.0125789642333984,grad_norm: 0.9882546491667273, iteration: 132744
loss: 0.9890884757041931,grad_norm: 0.9999990613023733, iteration: 132745
loss: 1.0025876760482788,grad_norm: 0.9999991186956482, iteration: 132746
loss: 1.0276015996932983,grad_norm: 0.9999992311275501, iteration: 132747
loss: 1.0279169082641602,grad_norm: 0.9405462343798272, iteration: 132748
loss: 0.999686062335968,grad_norm: 0.9999988908588724, iteration: 132749
loss: 1.0153999328613281,grad_norm: 0.9384098581784817, iteration: 132750
loss: 1.0021171569824219,grad_norm: 0.995947198191559, iteration: 132751
loss: 1.0058335065841675,grad_norm: 0.9999993536860001, iteration: 132752
loss: 0.9967982769012451,grad_norm: 0.9881938746362835, iteration: 132753
loss: 0.9855285286903381,grad_norm: 0.9999990925836848, iteration: 132754
loss: 1.0106267929077148,grad_norm: 0.9999993147166643, iteration: 132755
loss: 1.0104016065597534,grad_norm: 0.9704461299831441, iteration: 132756
loss: 1.0176376104354858,grad_norm: 0.989421872470479, iteration: 132757
loss: 0.9963271617889404,grad_norm: 0.999999162583175, iteration: 132758
loss: 1.0656956434249878,grad_norm: 0.999999821659889, iteration: 132759
loss: 0.9726373553276062,grad_norm: 0.9755926090012624, iteration: 132760
loss: 0.9702972769737244,grad_norm: 0.9999991891393677, iteration: 132761
loss: 1.0000466108322144,grad_norm: 0.9295445987598199, iteration: 132762
loss: 0.9894145131111145,grad_norm: 0.9948060749356097, iteration: 132763
loss: 1.0071594715118408,grad_norm: 0.9999991811351763, iteration: 132764
loss: 1.001157522201538,grad_norm: 0.8285196890353538, iteration: 132765
loss: 0.9727349281311035,grad_norm: 0.9999993715263781, iteration: 132766
loss: 1.0043145418167114,grad_norm: 0.9999990460301429, iteration: 132767
loss: 1.0118186473846436,grad_norm: 0.9999998344531024, iteration: 132768
loss: 0.999999463558197,grad_norm: 0.9347103226965553, iteration: 132769
loss: 1.093846321105957,grad_norm: 0.9999991625941103, iteration: 132770
loss: 1.0371626615524292,grad_norm: 0.9999991643302935, iteration: 132771
loss: 1.0087032318115234,grad_norm: 0.9999991557297992, iteration: 132772
loss: 0.9685452580451965,grad_norm: 0.9999991268017246, iteration: 132773
loss: 0.9951126575469971,grad_norm: 0.999999023148003, iteration: 132774
loss: 1.0068790912628174,grad_norm: 0.7970231515381434, iteration: 132775
loss: 0.9612036943435669,grad_norm: 0.9780427920644076, iteration: 132776
loss: 0.9883559942245483,grad_norm: 0.9497244734363504, iteration: 132777
loss: 0.9825268983840942,grad_norm: 0.9999992138091343, iteration: 132778
loss: 0.9766889214515686,grad_norm: 0.9999990919053918, iteration: 132779
loss: 1.0341730117797852,grad_norm: 0.9179518855072134, iteration: 132780
loss: 1.0234451293945312,grad_norm: 0.9999993671649456, iteration: 132781
loss: 1.0258762836456299,grad_norm: 0.9999990613352893, iteration: 132782
loss: 1.0216081142425537,grad_norm: 0.9176797609552151, iteration: 132783
loss: 1.010077714920044,grad_norm: 0.9106501999991771, iteration: 132784
loss: 1.0006693601608276,grad_norm: 0.9999991770808618, iteration: 132785
loss: 0.9736201167106628,grad_norm: 0.9999990803096801, iteration: 132786
loss: 0.9630118608474731,grad_norm: 0.9999992915624619, iteration: 132787
loss: 1.0261030197143555,grad_norm: 0.8835483101316528, iteration: 132788
loss: 1.0050464868545532,grad_norm: 0.9999993584751501, iteration: 132789
loss: 0.9627626538276672,grad_norm: 0.9999990548893225, iteration: 132790
loss: 0.9915400147438049,grad_norm: 0.9802804136347774, iteration: 132791
loss: 1.0176066160202026,grad_norm: 0.9999995159887353, iteration: 132792
loss: 1.0271202325820923,grad_norm: 0.9999993098905268, iteration: 132793
loss: 1.0185279846191406,grad_norm: 0.999999459426643, iteration: 132794
loss: 0.9955489039421082,grad_norm: 0.9999990684779914, iteration: 132795
loss: 1.0056959390640259,grad_norm: 0.9999991602902192, iteration: 132796
loss: 1.0033622980117798,grad_norm: 0.9390372491586241, iteration: 132797
loss: 0.9941192865371704,grad_norm: 0.8914356100308856, iteration: 132798
loss: 0.9971454739570618,grad_norm: 0.9999991089385525, iteration: 132799
loss: 1.0103527307510376,grad_norm: 0.9999990651790333, iteration: 132800
loss: 1.0046218633651733,grad_norm: 0.8513432989215709, iteration: 132801
loss: 0.9704558849334717,grad_norm: 0.9999991396837473, iteration: 132802
loss: 1.055298089981079,grad_norm: 0.9999990725836321, iteration: 132803
loss: 1.0462521314620972,grad_norm: 0.9999991828251199, iteration: 132804
loss: 0.98619145154953,grad_norm: 0.999999087787097, iteration: 132805
loss: 0.9913122653961182,grad_norm: 0.9999992379488525, iteration: 132806
loss: 0.9615623950958252,grad_norm: 0.9593414298548786, iteration: 132807
loss: 1.0919705629348755,grad_norm: 0.9999994157477412, iteration: 132808
loss: 1.0270049571990967,grad_norm: 0.9999989610056995, iteration: 132809
loss: 1.0128192901611328,grad_norm: 0.991907110566145, iteration: 132810
loss: 1.0102766752243042,grad_norm: 0.9486677901304953, iteration: 132811
loss: 0.9605687856674194,grad_norm: 0.8995218105260537, iteration: 132812
loss: 0.9835891723632812,grad_norm: 0.9999990947983676, iteration: 132813
loss: 1.0314984321594238,grad_norm: 0.9999991546901917, iteration: 132814
loss: 1.0581231117248535,grad_norm: 0.9324593216604949, iteration: 132815
loss: 0.9774162173271179,grad_norm: 0.9588985426369889, iteration: 132816
loss: 1.045577049255371,grad_norm: 0.9999993187700037, iteration: 132817
loss: 1.0227032899856567,grad_norm: 0.9999992263575811, iteration: 132818
loss: 0.9960047006607056,grad_norm: 0.9999990289984131, iteration: 132819
loss: 1.0731415748596191,grad_norm: 0.9999994448523487, iteration: 132820
loss: 0.95128333568573,grad_norm: 0.9999992446145601, iteration: 132821
loss: 0.961578369140625,grad_norm: 0.9613468506667442, iteration: 132822
loss: 1.033859372138977,grad_norm: 0.9795902912663672, iteration: 132823
loss: 0.9428468346595764,grad_norm: 0.9999990937665784, iteration: 132824
loss: 0.9959481954574585,grad_norm: 0.9999991387656934, iteration: 132825
loss: 0.9640833735466003,grad_norm: 0.9044035478604806, iteration: 132826
loss: 0.9882134199142456,grad_norm: 0.9810217432215333, iteration: 132827
loss: 1.023764729499817,grad_norm: 0.9164041490223735, iteration: 132828
loss: 1.0146207809448242,grad_norm: 0.9731311351108393, iteration: 132829
loss: 0.9912587404251099,grad_norm: 0.9928945129134001, iteration: 132830
loss: 0.9990665912628174,grad_norm: 0.9999991060687571, iteration: 132831
loss: 0.9910595417022705,grad_norm: 0.9999991737788835, iteration: 132832
loss: 0.995958685874939,grad_norm: 0.9999991655675244, iteration: 132833
loss: 0.981159508228302,grad_norm: 0.9999990434316197, iteration: 132834
loss: 1.0159170627593994,grad_norm: 0.9922456312597041, iteration: 132835
loss: 0.9330562353134155,grad_norm: 0.9182753436703023, iteration: 132836
loss: 0.9876270890235901,grad_norm: 0.9999993190650419, iteration: 132837
loss: 1.003799319267273,grad_norm: 0.9999990369568248, iteration: 132838
loss: 0.9633257985115051,grad_norm: 0.9999992052491129, iteration: 132839
loss: 1.015856385231018,grad_norm: 0.9999993751446522, iteration: 132840
loss: 1.018721103668213,grad_norm: 0.9999995246486476, iteration: 132841
loss: 1.0179072618484497,grad_norm: 0.9999991755321903, iteration: 132842
loss: 0.9902739524841309,grad_norm: 0.9973442695916411, iteration: 132843
loss: 1.1087692975997925,grad_norm: 0.9999995487621527, iteration: 132844
loss: 0.9972131252288818,grad_norm: 0.9999991185078188, iteration: 132845
loss: 0.9702005386352539,grad_norm: 0.9999990826924147, iteration: 132846
loss: 0.9953234195709229,grad_norm: 0.8936594293922326, iteration: 132847
loss: 1.0084303617477417,grad_norm: 0.9999989850799043, iteration: 132848
loss: 0.9952694177627563,grad_norm: 0.9036979867822095, iteration: 132849
loss: 1.0080654621124268,grad_norm: 0.9724382492265111, iteration: 132850
loss: 0.9754979014396667,grad_norm: 0.9999989961501472, iteration: 132851
loss: 0.9795028567314148,grad_norm: 0.9999991098268918, iteration: 132852
loss: 0.9935798048973083,grad_norm: 0.9977785929740971, iteration: 132853
loss: 0.9726207852363586,grad_norm: 0.9999992593754333, iteration: 132854
loss: 0.9872819781303406,grad_norm: 0.9397528339194836, iteration: 132855
loss: 1.009613275527954,grad_norm: 0.9517508406626429, iteration: 132856
loss: 1.0146915912628174,grad_norm: 0.9999991816407189, iteration: 132857
loss: 1.0002155303955078,grad_norm: 0.9999992253761942, iteration: 132858
loss: 0.9648810625076294,grad_norm: 0.9609374904761351, iteration: 132859
loss: 1.0170092582702637,grad_norm: 0.9999991804429001, iteration: 132860
loss: 1.0205693244934082,grad_norm: 0.9999990808603448, iteration: 132861
loss: 1.00935959815979,grad_norm: 0.9323654821921896, iteration: 132862
loss: 0.9885526299476624,grad_norm: 0.8836823716836998, iteration: 132863
loss: 1.0098034143447876,grad_norm: 0.9999990102944518, iteration: 132864
loss: 0.970359206199646,grad_norm: 0.8584824618943656, iteration: 132865
loss: 0.98987877368927,grad_norm: 0.9999991384550841, iteration: 132866
loss: 1.0132763385772705,grad_norm: 0.9212879590548915, iteration: 132867
loss: 1.0131607055664062,grad_norm: 0.9886280834324483, iteration: 132868
loss: 0.9918168783187866,grad_norm: 0.9999993519034164, iteration: 132869
loss: 0.9782719612121582,grad_norm: 0.9999990052484098, iteration: 132870
loss: 1.0178908109664917,grad_norm: 0.9999995852974032, iteration: 132871
loss: 1.0083656311035156,grad_norm: 0.9999991371301821, iteration: 132872
loss: 0.9979137182235718,grad_norm: 0.9999991776850389, iteration: 132873
loss: 1.0118458271026611,grad_norm: 0.9999991661774925, iteration: 132874
loss: 1.0283455848693848,grad_norm: 0.9281601299997612, iteration: 132875
loss: 1.0168979167938232,grad_norm: 0.9999997714937268, iteration: 132876
loss: 1.022105097770691,grad_norm: 0.9912956018097491, iteration: 132877
loss: 1.0203986167907715,grad_norm: 0.9160645885996234, iteration: 132878
loss: 0.9997740983963013,grad_norm: 0.8985945550267957, iteration: 132879
loss: 0.9500164985656738,grad_norm: 0.9999991827593052, iteration: 132880
loss: 0.9895477890968323,grad_norm: 0.9999990483208564, iteration: 132881
loss: 0.9896818995475769,grad_norm: 0.9305154223836645, iteration: 132882
loss: 0.9927695393562317,grad_norm: 0.9999991364094153, iteration: 132883
loss: 0.9629826545715332,grad_norm: 0.945029516401068, iteration: 132884
loss: 1.0385048389434814,grad_norm: 0.9427720498716362, iteration: 132885
loss: 1.0273562669754028,grad_norm: 0.9999990803503035, iteration: 132886
loss: 1.0000386238098145,grad_norm: 0.9999989820889501, iteration: 132887
loss: 1.0089267492294312,grad_norm: 0.9738513780292095, iteration: 132888
loss: 1.0065525770187378,grad_norm: 0.9999994536765099, iteration: 132889
loss: 0.9950942397117615,grad_norm: 0.8568850995843826, iteration: 132890
loss: 0.9960727691650391,grad_norm: 0.999999227615022, iteration: 132891
loss: 0.9774945378303528,grad_norm: 0.9602641649838349, iteration: 132892
loss: 1.0002388954162598,grad_norm: 0.9999992233079358, iteration: 132893
loss: 1.029177188873291,grad_norm: 0.9999991029318578, iteration: 132894
loss: 1.0091986656188965,grad_norm: 0.9999992664273303, iteration: 132895
loss: 0.9955067038536072,grad_norm: 0.9163710167378565, iteration: 132896
loss: 1.0162336826324463,grad_norm: 0.9999991900228375, iteration: 132897
loss: 0.981758177280426,grad_norm: 0.9280642192848063, iteration: 132898
loss: 0.9808785319328308,grad_norm: 0.9999991558376746, iteration: 132899
loss: 0.9608959555625916,grad_norm: 0.8461300532157646, iteration: 132900
loss: 0.9617050886154175,grad_norm: 0.9999990921094364, iteration: 132901
loss: 1.024084210395813,grad_norm: 0.9999988935780106, iteration: 132902
loss: 0.9727436900138855,grad_norm: 0.9999992463284934, iteration: 132903
loss: 1.0153874158859253,grad_norm: 0.9999990911183165, iteration: 132904
loss: 0.988172709941864,grad_norm: 0.9015035611845582, iteration: 132905
loss: 0.9849437475204468,grad_norm: 0.9242217838702104, iteration: 132906
loss: 1.0117604732513428,grad_norm: 0.9999993621894007, iteration: 132907
loss: 1.0200668573379517,grad_norm: 0.9419414425754862, iteration: 132908
loss: 1.0439566373825073,grad_norm: 0.999999207684307, iteration: 132909
loss: 1.0416146516799927,grad_norm: 0.9361332207147185, iteration: 132910
loss: 1.0309475660324097,grad_norm: 0.9017083526382663, iteration: 132911
loss: 1.0277365446090698,grad_norm: 0.9102441364738174, iteration: 132912
loss: 1.010532259941101,grad_norm: 0.9999988482055138, iteration: 132913
loss: 0.9875941872596741,grad_norm: 0.9206494684275291, iteration: 132914
loss: 0.9903410077095032,grad_norm: 0.8439553586625733, iteration: 132915
loss: 1.0217957496643066,grad_norm: 0.9971583301401553, iteration: 132916
loss: 1.0442216396331787,grad_norm: 0.9999991860115258, iteration: 132917
loss: 0.9865258932113647,grad_norm: 0.8300235119652516, iteration: 132918
loss: 0.9897469282150269,grad_norm: 0.9999991149231976, iteration: 132919
loss: 1.0166659355163574,grad_norm: 0.9999990330527294, iteration: 132920
loss: 1.0062137842178345,grad_norm: 0.981245627660064, iteration: 132921
loss: 0.9751771688461304,grad_norm: 0.9999990720596216, iteration: 132922
loss: 1.0309666395187378,grad_norm: 0.9401749882164075, iteration: 132923
loss: 0.9883749485015869,grad_norm: 0.999999038562536, iteration: 132924
loss: 1.0178886651992798,grad_norm: 0.9999990338072301, iteration: 132925
loss: 0.9938588738441467,grad_norm: 0.9999990602408333, iteration: 132926
loss: 1.0202168226242065,grad_norm: 0.8236765020453883, iteration: 132927
loss: 0.9905728101730347,grad_norm: 0.9999990915304799, iteration: 132928
loss: 0.9730784893035889,grad_norm: 0.9999991939604577, iteration: 132929
loss: 0.9802951812744141,grad_norm: 0.9999991743581125, iteration: 132930
loss: 0.9594466090202332,grad_norm: 0.9960743738282697, iteration: 132931
loss: 1.020146369934082,grad_norm: 0.9605201016400536, iteration: 132932
loss: 0.9935089349746704,grad_norm: 0.9969218404872577, iteration: 132933
loss: 0.9846820831298828,grad_norm: 0.9752678182853449, iteration: 132934
loss: 0.9394927620887756,grad_norm: 0.9781534543005308, iteration: 132935
loss: 0.9584435820579529,grad_norm: 0.9999992383722932, iteration: 132936
loss: 0.9764467477798462,grad_norm: 0.9069030646555992, iteration: 132937
loss: 0.9948429465293884,grad_norm: 0.9999992929680435, iteration: 132938
loss: 1.0121479034423828,grad_norm: 0.9999991284678909, iteration: 132939
loss: 0.9975143671035767,grad_norm: 0.9999991359565132, iteration: 132940
loss: 0.9935938715934753,grad_norm: 0.9999991133043308, iteration: 132941
loss: 1.0727009773254395,grad_norm: 0.9999997767040288, iteration: 132942
loss: 1.0202546119689941,grad_norm: 0.9746513772421559, iteration: 132943
loss: 0.974758505821228,grad_norm: 0.999999229800423, iteration: 132944
loss: 0.9864152073860168,grad_norm: 0.989105575335711, iteration: 132945
loss: 1.0073845386505127,grad_norm: 0.9999991728045714, iteration: 132946
loss: 1.2476786375045776,grad_norm: 0.9999998109914874, iteration: 132947
loss: 1.0185319185256958,grad_norm: 0.9999994457489283, iteration: 132948
loss: 1.005915880203247,grad_norm: 0.9106423408268068, iteration: 132949
loss: 1.02085280418396,grad_norm: 0.9999991704252431, iteration: 132950
loss: 0.9557802677154541,grad_norm: 0.9999990584589699, iteration: 132951
loss: 1.0090744495391846,grad_norm: 0.9999992127156403, iteration: 132952
loss: 1.0217654705047607,grad_norm: 0.9999991157923372, iteration: 132953
loss: 0.9571139812469482,grad_norm: 0.9749277033027509, iteration: 132954
loss: 1.0366014242172241,grad_norm: 0.9999991100909891, iteration: 132955
loss: 1.0038864612579346,grad_norm: 0.999999172058633, iteration: 132956
loss: 1.024619460105896,grad_norm: 0.8361823506726496, iteration: 132957
loss: 1.0094541311264038,grad_norm: 0.9491438687081933, iteration: 132958
loss: 1.0255440473556519,grad_norm: 0.999998875074703, iteration: 132959
loss: 0.9991089701652527,grad_norm: 0.9999991077586042, iteration: 132960
loss: 1.0199850797653198,grad_norm: 0.9400856143553005, iteration: 132961
loss: 1.026698112487793,grad_norm: 0.9710684323224562, iteration: 132962
loss: 1.0147361755371094,grad_norm: 0.9999991512827698, iteration: 132963
loss: 0.9895183444023132,grad_norm: 0.9999992415857032, iteration: 132964
loss: 1.1493678092956543,grad_norm: 0.99999931016776, iteration: 132965
loss: 1.0383236408233643,grad_norm: 0.9041850233220169, iteration: 132966
loss: 1.0552139282226562,grad_norm: 0.999999138236146, iteration: 132967
loss: 0.9746469855308533,grad_norm: 0.9999990241275616, iteration: 132968
loss: 1.0004370212554932,grad_norm: 0.9999990704509738, iteration: 132969
loss: 1.0471967458724976,grad_norm: 0.9783274380624363, iteration: 132970
loss: 1.0073949098587036,grad_norm: 0.9686010628063886, iteration: 132971
loss: 1.0083931684494019,grad_norm: 0.8947980463685498, iteration: 132972
loss: 1.0175738334655762,grad_norm: 0.9999992895183902, iteration: 132973
loss: 0.9865307807922363,grad_norm: 0.8293170508814149, iteration: 132974
loss: 0.9973574876785278,grad_norm: 0.9763255731214711, iteration: 132975
loss: 1.0242522954940796,grad_norm: 0.9999991315464715, iteration: 132976
loss: 1.0115982294082642,grad_norm: 0.9999995482592949, iteration: 132977
loss: 0.982161283493042,grad_norm: 0.8155959532653073, iteration: 132978
loss: 0.952986478805542,grad_norm: 0.9999997899992146, iteration: 132979
loss: 0.9899621605873108,grad_norm: 0.9999990408583564, iteration: 132980
loss: 0.9785025119781494,grad_norm: 0.9638834925391767, iteration: 132981
loss: 1.0876919031143188,grad_norm: 0.9999992170366941, iteration: 132982
loss: 1.0050878524780273,grad_norm: 0.9999991211043971, iteration: 132983
loss: 0.9885684251785278,grad_norm: 0.8650540666589047, iteration: 132984
loss: 1.0213391780853271,grad_norm: 0.9376544886940079, iteration: 132985
loss: 1.0104551315307617,grad_norm: 0.9895599971072702, iteration: 132986
loss: 0.9864873290061951,grad_norm: 0.9999990934254869, iteration: 132987
loss: 0.9808632731437683,grad_norm: 0.9999991060152196, iteration: 132988
loss: 1.0205167531967163,grad_norm: 0.9999993096829589, iteration: 132989
loss: 0.9618399739265442,grad_norm: 0.9595823011696496, iteration: 132990
loss: 0.9953994750976562,grad_norm: 0.9542089909175614, iteration: 132991
loss: 0.9626071453094482,grad_norm: 0.8281684430345918, iteration: 132992
loss: 1.0075117349624634,grad_norm: 0.9999991518330235, iteration: 132993
loss: 1.0021756887435913,grad_norm: 0.9999990717557404, iteration: 132994
loss: 0.9978275299072266,grad_norm: 0.9177605568401213, iteration: 132995
loss: 1.0528208017349243,grad_norm: 0.9999991345246406, iteration: 132996
loss: 0.9702264666557312,grad_norm: 0.9829909113182632, iteration: 132997
loss: 0.9948912858963013,grad_norm: 0.9999991367345679, iteration: 132998
loss: 1.0451040267944336,grad_norm: 0.9999992146062939, iteration: 132999
loss: 1.0095858573913574,grad_norm: 0.9999990444443284, iteration: 133000
loss: 0.9802617430686951,grad_norm: 0.9702217306796653, iteration: 133001
loss: 0.9929339289665222,grad_norm: 0.9999992043440207, iteration: 133002
loss: 0.9966568946838379,grad_norm: 0.9999991280406364, iteration: 133003
loss: 1.0038013458251953,grad_norm: 0.9909611857026106, iteration: 133004
loss: 1.012312412261963,grad_norm: 0.9177237375871801, iteration: 133005
loss: 0.9615432620048523,grad_norm: 0.9805557301047585, iteration: 133006
loss: 1.0275894403457642,grad_norm: 0.9999991312308637, iteration: 133007
loss: 1.005226731300354,grad_norm: 0.999999136473053, iteration: 133008
loss: 0.9434443116188049,grad_norm: 0.9999991760625704, iteration: 133009
loss: 0.9878489375114441,grad_norm: 0.9999989327116301, iteration: 133010
loss: 1.0105055570602417,grad_norm: 0.999998979100736, iteration: 133011
loss: 0.997218132019043,grad_norm: 0.9999991284925914, iteration: 133012
loss: 0.9746028780937195,grad_norm: 0.9999992100883178, iteration: 133013
loss: 1.0066276788711548,grad_norm: 0.9474255266943084, iteration: 133014
loss: 0.9894095659255981,grad_norm: 0.937210635232075, iteration: 133015
loss: 0.9983154535293579,grad_norm: 0.9251759280803128, iteration: 133016
loss: 1.0000478029251099,grad_norm: 0.9999990503574379, iteration: 133017
loss: 0.9914085268974304,grad_norm: 0.9999992114613943, iteration: 133018
loss: 1.0473146438598633,grad_norm: 0.9999995810910489, iteration: 133019
loss: 1.0079219341278076,grad_norm: 0.9999992032886379, iteration: 133020
loss: 0.9808841347694397,grad_norm: 0.961918001357764, iteration: 133021
loss: 1.0304073095321655,grad_norm: 0.9999990898606329, iteration: 133022
loss: 1.002637267112732,grad_norm: 0.999999130351099, iteration: 133023
loss: 0.9567314982414246,grad_norm: 0.9999990465090421, iteration: 133024
loss: 0.9690734148025513,grad_norm: 0.9999994533518327, iteration: 133025
loss: 1.0176888704299927,grad_norm: 0.9999993575216868, iteration: 133026
loss: 1.0021387338638306,grad_norm: 0.9999991788794481, iteration: 133027
loss: 1.0274752378463745,grad_norm: 0.8559413465492907, iteration: 133028
loss: 1.0263957977294922,grad_norm: 0.9999991565783908, iteration: 133029
loss: 0.9817991256713867,grad_norm: 0.9999992379897142, iteration: 133030
loss: 0.9692928791046143,grad_norm: 0.9999990682070011, iteration: 133031
loss: 0.9668383002281189,grad_norm: 0.9999990607491746, iteration: 133032
loss: 0.9829716682434082,grad_norm: 0.9325411878228627, iteration: 133033
loss: 1.0166840553283691,grad_norm: 0.942973957671612, iteration: 133034
loss: 1.0131378173828125,grad_norm: 0.9999992295400346, iteration: 133035
loss: 0.9308494925498962,grad_norm: 0.9999990763304153, iteration: 133036
loss: 0.9851365089416504,grad_norm: 0.999999188326288, iteration: 133037
loss: 1.004638671875,grad_norm: 0.9999991238123257, iteration: 133038
loss: 0.9690370559692383,grad_norm: 0.9999990292733133, iteration: 133039
loss: 0.9999601244926453,grad_norm: 0.9999991035840107, iteration: 133040
loss: 0.9731370806694031,grad_norm: 0.9999990873810967, iteration: 133041
loss: 0.9779146909713745,grad_norm: 0.954303807624346, iteration: 133042
loss: 1.0162525177001953,grad_norm: 0.9999992036658488, iteration: 133043
loss: 1.01844322681427,grad_norm: 0.9999991467730608, iteration: 133044
loss: 1.0210891962051392,grad_norm: 0.999999481854499, iteration: 133045
loss: 0.9729700088500977,grad_norm: 0.9999991574215287, iteration: 133046
loss: 0.990110456943512,grad_norm: 0.9999990911772465, iteration: 133047
loss: 0.9876183271408081,grad_norm: 0.9562855779038911, iteration: 133048
loss: 0.9825466275215149,grad_norm: 0.9936765385159897, iteration: 133049
loss: 0.9871757626533508,grad_norm: 0.9999989921417379, iteration: 133050
loss: 1.0061367750167847,grad_norm: 0.9999991656169266, iteration: 133051
loss: 0.9815672039985657,grad_norm: 0.9034050446008572, iteration: 133052
loss: 1.0025968551635742,grad_norm: 0.9999990948332483, iteration: 133053
loss: 1.0165594816207886,grad_norm: 0.9124953377956876, iteration: 133054
loss: 1.1213005781173706,grad_norm: 0.9999995583386279, iteration: 133055
loss: 1.043466329574585,grad_norm: 0.9999991037637613, iteration: 133056
loss: 0.9726739525794983,grad_norm: 0.999998992771196, iteration: 133057
loss: 1.0215548276901245,grad_norm: 0.9999989090295451, iteration: 133058
loss: 0.982852041721344,grad_norm: 0.9999991451188438, iteration: 133059
loss: 0.9768152236938477,grad_norm: 0.9999990430064057, iteration: 133060
loss: 1.0212093591690063,grad_norm: 0.9814794543998457, iteration: 133061
loss: 0.9742190837860107,grad_norm: 0.9999991149478658, iteration: 133062
loss: 0.9609001874923706,grad_norm: 0.9668206994102445, iteration: 133063
loss: 0.999569833278656,grad_norm: 0.9999991325770794, iteration: 133064
loss: 0.994045078754425,grad_norm: 0.9920057074342512, iteration: 133065
loss: 0.9950044751167297,grad_norm: 0.7973384809642411, iteration: 133066
loss: 1.015388011932373,grad_norm: 0.9806812129913526, iteration: 133067
loss: 0.9908434748649597,grad_norm: 0.9999991801728761, iteration: 133068
loss: 0.95768803358078,grad_norm: 0.9999992771865516, iteration: 133069
loss: 0.9571729302406311,grad_norm: 0.9999992082666481, iteration: 133070
loss: 0.9857332706451416,grad_norm: 0.9999991207351927, iteration: 133071
loss: 0.9949621558189392,grad_norm: 0.9999992815779967, iteration: 133072
loss: 1.0071145296096802,grad_norm: 0.9606308603949104, iteration: 133073
loss: 1.0432419776916504,grad_norm: 0.9999989843045591, iteration: 133074
loss: 0.9898210167884827,grad_norm: 0.9999991176552927, iteration: 133075
loss: 1.0399184226989746,grad_norm: 0.9999992984502288, iteration: 133076
loss: 0.9938140511512756,grad_norm: 0.8604633906214124, iteration: 133077
loss: 1.0278308391571045,grad_norm: 0.9999990366750775, iteration: 133078
loss: 0.9984571933746338,grad_norm: 0.9827048023922546, iteration: 133079
loss: 1.0398958921432495,grad_norm: 0.9999992401794, iteration: 133080
loss: 0.9965341091156006,grad_norm: 0.9999992564917926, iteration: 133081
loss: 0.9792475700378418,grad_norm: 0.990596329245097, iteration: 133082
loss: 0.9942879676818848,grad_norm: 0.9658634772496238, iteration: 133083
loss: 0.9867763519287109,grad_norm: 0.9963934841855742, iteration: 133084
loss: 0.9709001779556274,grad_norm: 0.9999991941920193, iteration: 133085
loss: 0.9976904988288879,grad_norm: 0.9999989669254652, iteration: 133086
loss: 0.9666408896446228,grad_norm: 0.9999991141301258, iteration: 133087
loss: 0.9519633650779724,grad_norm: 0.9999991170315908, iteration: 133088
loss: 1.0076375007629395,grad_norm: 0.9999990425871355, iteration: 133089
loss: 1.0287277698516846,grad_norm: 0.8879708936438092, iteration: 133090
loss: 0.984773576259613,grad_norm: 0.9396070774826693, iteration: 133091
loss: 1.0206276178359985,grad_norm: 0.9999989246371214, iteration: 133092
loss: 0.9626701474189758,grad_norm: 0.9999992068622463, iteration: 133093
loss: 1.036108374595642,grad_norm: 0.9728846464384004, iteration: 133094
loss: 0.9677025079727173,grad_norm: 0.9999991435453769, iteration: 133095
loss: 1.0102262496948242,grad_norm: 0.9999991677970401, iteration: 133096
loss: 0.979363739490509,grad_norm: 0.999999112314691, iteration: 133097
loss: 0.9795044660568237,grad_norm: 0.9999990709315466, iteration: 133098
loss: 1.012437105178833,grad_norm: 0.9999991369521192, iteration: 133099
loss: 0.996519148349762,grad_norm: 0.9999992292289108, iteration: 133100
loss: 0.9816405177116394,grad_norm: 0.9267663681008621, iteration: 133101
loss: 1.003591775894165,grad_norm: 0.9999991198284678, iteration: 133102
loss: 1.064517617225647,grad_norm: 0.9971724746716307, iteration: 133103
loss: 1.0316541194915771,grad_norm: 0.9999989474943689, iteration: 133104
loss: 1.010115623474121,grad_norm: 0.9632996027999137, iteration: 133105
loss: 1.012386679649353,grad_norm: 0.999999152088567, iteration: 133106
loss: 0.995743453502655,grad_norm: 0.9999991210626831, iteration: 133107
loss: 0.9958866238594055,grad_norm: 0.999999220042595, iteration: 133108
loss: 1.0024504661560059,grad_norm: 0.9999992558429606, iteration: 133109
loss: 1.0187840461730957,grad_norm: 0.9999989019455923, iteration: 133110
loss: 0.9631397724151611,grad_norm: 0.9999990935713258, iteration: 133111
loss: 1.0077168941497803,grad_norm: 0.9999991130416864, iteration: 133112
loss: 0.9853681325912476,grad_norm: 0.9618736176447885, iteration: 133113
loss: 1.0004925727844238,grad_norm: 0.9999989631020904, iteration: 133114
loss: 0.9962091445922852,grad_norm: 0.9743152350446839, iteration: 133115
loss: 1.0301036834716797,grad_norm: 0.9999992417122011, iteration: 133116
loss: 1.0148102045059204,grad_norm: 0.9999990395395459, iteration: 133117
loss: 0.9863477945327759,grad_norm: 0.7904508709711445, iteration: 133118
loss: 1.0151904821395874,grad_norm: 0.8392417670583024, iteration: 133119
loss: 1.032792091369629,grad_norm: 0.9999990272903332, iteration: 133120
loss: 1.0014134645462036,grad_norm: 0.9999993090924306, iteration: 133121
loss: 1.0181547403335571,grad_norm: 0.8855355911969857, iteration: 133122
loss: 0.9972437620162964,grad_norm: 0.9999992282992867, iteration: 133123
loss: 1.0202378034591675,grad_norm: 0.9999991854990538, iteration: 133124
loss: 0.9847134947776794,grad_norm: 0.9999991345465994, iteration: 133125
loss: 0.9594932794570923,grad_norm: 0.9999991618973647, iteration: 133126
loss: 0.9982835650444031,grad_norm: 1.0000000215904186, iteration: 133127
loss: 0.9984374046325684,grad_norm: 0.9999990175713648, iteration: 133128
loss: 1.0071293115615845,grad_norm: 0.9999992469105311, iteration: 133129
loss: 1.013903260231018,grad_norm: 0.8851065688303137, iteration: 133130
loss: 1.0869659185409546,grad_norm: 0.999999206511467, iteration: 133131
loss: 1.009264588356018,grad_norm: 0.9999991903867188, iteration: 133132
loss: 1.019709587097168,grad_norm: 0.9079563064152526, iteration: 133133
loss: 1.0309054851531982,grad_norm: 0.9175859809560484, iteration: 133134
loss: 1.0206671953201294,grad_norm: 0.999999216681429, iteration: 133135
loss: 1.03557288646698,grad_norm: 0.9999991717908856, iteration: 133136
loss: 0.9941688776016235,grad_norm: 0.9330833841062204, iteration: 133137
loss: 1.0120700597763062,grad_norm: 0.9999992345395341, iteration: 133138
loss: 1.0310137271881104,grad_norm: 0.9414694276909032, iteration: 133139
loss: 0.9828980565071106,grad_norm: 0.9771086270872336, iteration: 133140
loss: 0.9672673344612122,grad_norm: 0.9999990699375393, iteration: 133141
loss: 1.0140631198883057,grad_norm: 0.9999991891670251, iteration: 133142
loss: 1.0075641870498657,grad_norm: 0.9999993428377768, iteration: 133143
loss: 1.0026934146881104,grad_norm: 0.9412210872346383, iteration: 133144
loss: 1.0004034042358398,grad_norm: 0.9999993040631602, iteration: 133145
loss: 1.014190673828125,grad_norm: 0.9999990254717203, iteration: 133146
loss: 0.9893649816513062,grad_norm: 0.9999995821708847, iteration: 133147
loss: 0.9990132451057434,grad_norm: 0.9999990000778172, iteration: 133148
loss: 1.020596981048584,grad_norm: 0.9999992069349489, iteration: 133149
loss: 1.0241137742996216,grad_norm: 0.9999991307920234, iteration: 133150
loss: 1.0013858079910278,grad_norm: 0.9999991163170837, iteration: 133151
loss: 0.9958285689353943,grad_norm: 0.9999994129918989, iteration: 133152
loss: 1.002670407295227,grad_norm: 0.9999992643004707, iteration: 133153
loss: 1.019890546798706,grad_norm: 0.9999989570409586, iteration: 133154
loss: 1.0498988628387451,grad_norm: 1.0000000463808965, iteration: 133155
loss: 0.9783972501754761,grad_norm: 0.9999991983585881, iteration: 133156
loss: 1.0471733808517456,grad_norm: 0.999999040700066, iteration: 133157
loss: 0.9732905626296997,grad_norm: 0.9999996816937361, iteration: 133158
loss: 1.0011333227157593,grad_norm: 0.9999990415749707, iteration: 133159
loss: 1.0442613363265991,grad_norm: 0.9984511930307376, iteration: 133160
loss: 1.0026633739471436,grad_norm: 0.9999995148762986, iteration: 133161
loss: 1.0134766101837158,grad_norm: 0.9999990772533185, iteration: 133162
loss: 0.9997848868370056,grad_norm: 0.9999997888058468, iteration: 133163
loss: 1.0032070875167847,grad_norm: 0.8941847218766668, iteration: 133164
loss: 1.147841453552246,grad_norm: 0.99999988109459, iteration: 133165
loss: 1.0742089748382568,grad_norm: 0.9999994013316627, iteration: 133166
loss: 0.9928767681121826,grad_norm: 0.9176700295875813, iteration: 133167
loss: 0.9918043613433838,grad_norm: 0.9999991866595146, iteration: 133168
loss: 1.0434492826461792,grad_norm: 0.9999991488254418, iteration: 133169
loss: 1.0118768215179443,grad_norm: 0.9999999794507584, iteration: 133170
loss: 0.9913464784622192,grad_norm: 0.9408085020219612, iteration: 133171
loss: 1.0052481889724731,grad_norm: 0.9999990439904733, iteration: 133172
loss: 1.0228615999221802,grad_norm: 0.9999990669847723, iteration: 133173
loss: 0.9839122891426086,grad_norm: 0.9999991003017894, iteration: 133174
loss: 1.1066217422485352,grad_norm: 0.9999999806600737, iteration: 133175
loss: 1.0916757583618164,grad_norm: 0.9999991283888329, iteration: 133176
loss: 1.0147079229354858,grad_norm: 0.9999993154558788, iteration: 133177
loss: 1.0918577909469604,grad_norm: 0.9768039013619546, iteration: 133178
loss: 1.0047088861465454,grad_norm: 0.9999997284361344, iteration: 133179
loss: 1.1505202054977417,grad_norm: 0.9999999624685835, iteration: 133180
loss: 1.1062158346176147,grad_norm: 0.9999993149379504, iteration: 133181
loss: 0.9827477335929871,grad_norm: 0.8108243230850918, iteration: 133182
loss: 1.104004979133606,grad_norm: 0.9999995984062723, iteration: 133183
loss: 0.9936046004295349,grad_norm: 0.9999998466603113, iteration: 133184
loss: 1.0065076351165771,grad_norm: 0.9999989715433106, iteration: 133185
loss: 1.0040444135665894,grad_norm: 0.9999993052536007, iteration: 133186
loss: 1.0374128818511963,grad_norm: 0.9999992816287842, iteration: 133187
loss: 1.1902382373809814,grad_norm: 0.9999992961041154, iteration: 133188
loss: 1.0348427295684814,grad_norm: 0.999999272705212, iteration: 133189
loss: 1.03214430809021,grad_norm: 0.9999999849804936, iteration: 133190
loss: 0.9843616485595703,grad_norm: 0.8767874902000057, iteration: 133191
loss: 0.9842275381088257,grad_norm: 0.9462114294437457, iteration: 133192
loss: 0.9887723326683044,grad_norm: 0.9999990504033921, iteration: 133193
loss: 0.9736024737358093,grad_norm: 0.917137737189176, iteration: 133194
loss: 0.9918337464332581,grad_norm: 0.9999991857276836, iteration: 133195
loss: 1.0069066286087036,grad_norm: 0.9999992940834851, iteration: 133196
loss: 0.9832877516746521,grad_norm: 0.9714927166552296, iteration: 133197
loss: 0.9966539740562439,grad_norm: 0.8891713356765888, iteration: 133198
loss: 1.0143544673919678,grad_norm: 0.9094818592618424, iteration: 133199
loss: 1.0219422578811646,grad_norm: 0.9792483185907102, iteration: 133200
loss: 1.007791519165039,grad_norm: 0.9999989289272291, iteration: 133201
loss: 1.0255610942840576,grad_norm: 0.9999991780226725, iteration: 133202
loss: 0.9686250686645508,grad_norm: 0.9999998319078007, iteration: 133203
loss: 0.9662220478057861,grad_norm: 0.9999990822061107, iteration: 133204
loss: 1.0030080080032349,grad_norm: 0.933114032220302, iteration: 133205
loss: 1.0363348722457886,grad_norm: 0.9906398927492103, iteration: 133206
loss: 0.97772616147995,grad_norm: 0.9999990004552133, iteration: 133207
loss: 1.0002388954162598,grad_norm: 0.9999991993681786, iteration: 133208
loss: 1.0307092666625977,grad_norm: 0.9999991334348859, iteration: 133209
loss: 1.0499515533447266,grad_norm: 0.9999998450314915, iteration: 133210
loss: 0.997882068157196,grad_norm: 0.9229588020084711, iteration: 133211
loss: 0.9773162007331848,grad_norm: 0.9025698864278777, iteration: 133212
loss: 1.0489078760147095,grad_norm: 0.9999994195431899, iteration: 133213
loss: 1.0063672065734863,grad_norm: 0.9296862597505458, iteration: 133214
loss: 1.020469307899475,grad_norm: 0.9999996416073816, iteration: 133215
loss: 0.9759854674339294,grad_norm: 0.9999991995900755, iteration: 133216
loss: 0.9747485518455505,grad_norm: 0.8542838308933206, iteration: 133217
loss: 0.9666497111320496,grad_norm: 0.9999993150911113, iteration: 133218
loss: 1.004853367805481,grad_norm: 0.9999990352410585, iteration: 133219
loss: 0.9589828252792358,grad_norm: 0.9999991370915142, iteration: 133220
loss: 0.9591610431671143,grad_norm: 0.9999990866557882, iteration: 133221
loss: 1.0528959035873413,grad_norm: 0.9999993356161452, iteration: 133222
loss: 1.0049740076065063,grad_norm: 0.9999992852403196, iteration: 133223
loss: 1.0190753936767578,grad_norm: 0.9999988444670442, iteration: 133224
loss: 1.1379725933074951,grad_norm: 0.9999996413943214, iteration: 133225
loss: 0.9859099388122559,grad_norm: 0.9999990400219261, iteration: 133226
loss: 1.0118427276611328,grad_norm: 0.9999991228098962, iteration: 133227
loss: 1.004865050315857,grad_norm: 0.9999993700850802, iteration: 133228
loss: 0.9509220123291016,grad_norm: 0.9999990080474392, iteration: 133229
loss: 1.0115163326263428,grad_norm: 0.9999991401463957, iteration: 133230
loss: 1.0126525163650513,grad_norm: 0.9501996851585186, iteration: 133231
loss: 1.0008970499038696,grad_norm: 0.9999990432078816, iteration: 133232
loss: 1.0019826889038086,grad_norm: 0.9999995608045251, iteration: 133233
loss: 0.9921246767044067,grad_norm: 0.9999990969927803, iteration: 133234
loss: 1.0035656690597534,grad_norm: 0.888952635732973, iteration: 133235
loss: 1.0025177001953125,grad_norm: 0.988297007626027, iteration: 133236
loss: 1.0081390142440796,grad_norm: 0.9999996125210445, iteration: 133237
loss: 0.9809187054634094,grad_norm: 0.8869192879666318, iteration: 133238
loss: 1.1105785369873047,grad_norm: 0.9999996481112146, iteration: 133239
loss: 1.0550628900527954,grad_norm: 0.9999991338235884, iteration: 133240
loss: 1.000261902809143,grad_norm: 0.8818185357122819, iteration: 133241
loss: 0.9954137206077576,grad_norm: 0.8276170082824642, iteration: 133242
loss: 1.0051417350769043,grad_norm: 0.8414397238689535, iteration: 133243
loss: 0.9782175421714783,grad_norm: 0.9999991330394667, iteration: 133244
loss: 1.0108025074005127,grad_norm: 0.9999990837703323, iteration: 133245
loss: 0.9881200194358826,grad_norm: 0.8569674861944878, iteration: 133246
loss: 1.090579867362976,grad_norm: 0.9999994688726322, iteration: 133247
loss: 1.0119670629501343,grad_norm: 0.9907684451509581, iteration: 133248
loss: 0.9817678332328796,grad_norm: 0.9999990883402552, iteration: 133249
loss: 1.1747788190841675,grad_norm: 0.9999991232392256, iteration: 133250
loss: 1.0278816223144531,grad_norm: 0.9999995492676595, iteration: 133251
loss: 1.00225031375885,grad_norm: 0.9999992151255386, iteration: 133252
loss: 1.0170234441757202,grad_norm: 0.9999992064001788, iteration: 133253
loss: 1.057932734489441,grad_norm: 0.9999994859910069, iteration: 133254
loss: 1.1452357769012451,grad_norm: 0.9999993817791025, iteration: 133255
loss: 0.9913406372070312,grad_norm: 0.9999998902203414, iteration: 133256
loss: 1.0571881532669067,grad_norm: 0.9999999618477233, iteration: 133257
loss: 1.0162464380264282,grad_norm: 0.9999991131748445, iteration: 133258
loss: 1.0152562856674194,grad_norm: 0.9999991948542705, iteration: 133259
loss: 1.1639498472213745,grad_norm: 0.999999151712626, iteration: 133260
loss: 1.060713768005371,grad_norm: 0.9999998082005085, iteration: 133261
loss: 0.9830883145332336,grad_norm: 0.9999191297362332, iteration: 133262
loss: 0.9848785400390625,grad_norm: 0.9999991651176078, iteration: 133263
loss: 1.0578268766403198,grad_norm: 0.9999998786878797, iteration: 133264
loss: 1.0233920812606812,grad_norm: 0.9999991010309409, iteration: 133265
loss: 1.003433346748352,grad_norm: 0.9162053715993433, iteration: 133266
loss: 1.0777755975723267,grad_norm: 0.9999997310074676, iteration: 133267
loss: 1.0402675867080688,grad_norm: 0.9999990419109758, iteration: 133268
loss: 1.0007703304290771,grad_norm: 0.9999992312407077, iteration: 133269
loss: 0.9838390350341797,grad_norm: 0.99999916533846, iteration: 133270
loss: 0.9947373270988464,grad_norm: 0.9999989970532913, iteration: 133271
loss: 1.0305458307266235,grad_norm: 0.9999994114302618, iteration: 133272
loss: 0.9935203790664673,grad_norm: 0.9999993641129546, iteration: 133273
loss: 1.0017997026443481,grad_norm: 0.9672710295558606, iteration: 133274
loss: 0.9745604395866394,grad_norm: 0.9999992943230263, iteration: 133275
loss: 1.0772007703781128,grad_norm: 0.9999993839359618, iteration: 133276
loss: 1.2614140510559082,grad_norm: 0.999999626508073, iteration: 133277
loss: 1.3293743133544922,grad_norm: 0.9999999526880072, iteration: 133278
loss: 1.0909196138381958,grad_norm: 0.9999992983980388, iteration: 133279
loss: 1.1283366680145264,grad_norm: 0.9999998417650542, iteration: 133280
loss: 1.0259219408035278,grad_norm: 0.9400670139698898, iteration: 133281
loss: 1.0057885646820068,grad_norm: 0.9999991831464149, iteration: 133282
loss: 1.0613601207733154,grad_norm: 0.9999993719862044, iteration: 133283
loss: 1.0065606832504272,grad_norm: 0.999999262690343, iteration: 133284
loss: 1.154922604560852,grad_norm: 0.9999992182745172, iteration: 133285
loss: 0.9851232767105103,grad_norm: 0.9999991403775863, iteration: 133286
loss: 1.0183802843093872,grad_norm: 0.9999991558342224, iteration: 133287
loss: 1.0031503438949585,grad_norm: 0.9999993596870114, iteration: 133288
loss: 1.019188404083252,grad_norm: 0.999999247172375, iteration: 133289
loss: 1.0148956775665283,grad_norm: 0.9999998616489854, iteration: 133290
loss: 1.0041816234588623,grad_norm: 0.9802390183951071, iteration: 133291
loss: 1.0360188484191895,grad_norm: 0.9999995206900121, iteration: 133292
loss: 0.9797017574310303,grad_norm: 0.9863662076198628, iteration: 133293
loss: 1.007259726524353,grad_norm: 0.9263707998682676, iteration: 133294
loss: 0.9706578254699707,grad_norm: 0.9999990985560372, iteration: 133295
loss: 1.0164496898651123,grad_norm: 0.9999990959503052, iteration: 133296
loss: 0.9967146515846252,grad_norm: 0.9999994819135823, iteration: 133297
loss: 0.9955536127090454,grad_norm: 0.9999992282310025, iteration: 133298
loss: 1.0074182748794556,grad_norm: 0.9999992076593519, iteration: 133299
loss: 1.0094109773635864,grad_norm: 0.9170624016434982, iteration: 133300
loss: 0.9888589382171631,grad_norm: 0.9655674922428427, iteration: 133301
loss: 1.0132083892822266,grad_norm: 0.9999991807331894, iteration: 133302
loss: 0.9854618906974792,grad_norm: 0.999999214640565, iteration: 133303
loss: 1.027917742729187,grad_norm: 0.891581767530465, iteration: 133304
loss: 0.9683523178100586,grad_norm: 0.99999979173331, iteration: 133305
loss: 1.0216137170791626,grad_norm: 0.9999991268091073, iteration: 133306
loss: 0.9961367845535278,grad_norm: 0.9999992449182022, iteration: 133307
loss: 1.0228192806243896,grad_norm: 0.9999992075356218, iteration: 133308
loss: 0.9945981502532959,grad_norm: 0.9933531458404783, iteration: 133309
loss: 1.0198932886123657,grad_norm: 0.9999996341324807, iteration: 133310
loss: 0.9355446696281433,grad_norm: 0.9999991042632473, iteration: 133311
loss: 0.9886460304260254,grad_norm: 0.9999990814401413, iteration: 133312
loss: 0.9946673512458801,grad_norm: 0.8791064864854488, iteration: 133313
loss: 0.9973558187484741,grad_norm: 0.9999992228431772, iteration: 133314
loss: 1.01299250125885,grad_norm: 0.9745663658797145, iteration: 133315
loss: 0.9972084164619446,grad_norm: 0.9999990414765361, iteration: 133316
loss: 0.9861372113227844,grad_norm: 0.999998986278664, iteration: 133317
loss: 1.1185120344161987,grad_norm: 0.9999998707588238, iteration: 133318
loss: 0.9797539710998535,grad_norm: 0.9246876934585797, iteration: 133319
loss: 0.9623759984970093,grad_norm: 0.9999992206328718, iteration: 133320
loss: 0.9914132356643677,grad_norm: 0.9952932224763315, iteration: 133321
loss: 1.0184000730514526,grad_norm: 0.9824037383857086, iteration: 133322
loss: 0.9818483591079712,grad_norm: 0.9956752036492419, iteration: 133323
loss: 1.0998387336730957,grad_norm: 0.9999991796752453, iteration: 133324
loss: 0.9960969686508179,grad_norm: 0.9999990406652557, iteration: 133325
loss: 0.9927956461906433,grad_norm: 0.8678208671655461, iteration: 133326
loss: 1.0162630081176758,grad_norm: 0.9405907718822117, iteration: 133327
loss: 1.0126286745071411,grad_norm: 0.9999992939218018, iteration: 133328
loss: 0.9878292083740234,grad_norm: 0.9999990088024396, iteration: 133329
loss: 0.9834784865379333,grad_norm: 0.9999992705572379, iteration: 133330
loss: 1.0013113021850586,grad_norm: 0.999999128110834, iteration: 133331
loss: 0.9920409321784973,grad_norm: 0.9851425537577183, iteration: 133332
loss: 1.0013424158096313,grad_norm: 0.9999990685934461, iteration: 133333
loss: 0.9946494698524475,grad_norm: 0.8082707168707381, iteration: 133334
loss: 1.006670355796814,grad_norm: 0.9999992280650973, iteration: 133335
loss: 1.0277810096740723,grad_norm: 0.9998640232690578, iteration: 133336
loss: 0.9944338202476501,grad_norm: 0.999999122329472, iteration: 133337
loss: 0.987795889377594,grad_norm: 0.9999992262099835, iteration: 133338
loss: 1.0066198110580444,grad_norm: 0.9999993105374578, iteration: 133339
loss: 1.0169005393981934,grad_norm: 0.9075467492404008, iteration: 133340
loss: 0.9773492813110352,grad_norm: 0.9380660226104036, iteration: 133341
loss: 1.0067858695983887,grad_norm: 0.9999999086164536, iteration: 133342
loss: 0.9960231781005859,grad_norm: 0.8886188636613228, iteration: 133343
loss: 1.0544277429580688,grad_norm: 0.9696298228603216, iteration: 133344
loss: 0.9946757555007935,grad_norm: 0.95055250831175, iteration: 133345
loss: 1.02817964553833,grad_norm: 0.9999992064296601, iteration: 133346
loss: 1.010940432548523,grad_norm: 0.874618377398811, iteration: 133347
loss: 1.0179656744003296,grad_norm: 0.9999990865333177, iteration: 133348
loss: 0.9888894557952881,grad_norm: 0.9999990560413855, iteration: 133349
loss: 1.019235610961914,grad_norm: 0.9999993411691233, iteration: 133350
loss: 1.0061417818069458,grad_norm: 0.9999992752729046, iteration: 133351
loss: 0.9838902354240417,grad_norm: 0.8499352025097421, iteration: 133352
loss: 1.0177146196365356,grad_norm: 0.8843611075035218, iteration: 133353
loss: 0.9734604358673096,grad_norm: 0.8943190880232761, iteration: 133354
loss: 0.9909480810165405,grad_norm: 0.8984576754294525, iteration: 133355
loss: 0.9929296970367432,grad_norm: 0.9999989756197224, iteration: 133356
loss: 0.992530882358551,grad_norm: 0.9999991152267064, iteration: 133357
loss: 1.0089079141616821,grad_norm: 0.9999990186451401, iteration: 133358
loss: 0.9934473037719727,grad_norm: 0.9127000503194997, iteration: 133359
loss: 1.0292026996612549,grad_norm: 0.9296023811950317, iteration: 133360
loss: 1.1394727230072021,grad_norm: 0.9999991586890813, iteration: 133361
loss: 1.0054621696472168,grad_norm: 0.9999990455730161, iteration: 133362
loss: 0.9822646975517273,grad_norm: 0.9285117638941209, iteration: 133363
loss: 0.9829918146133423,grad_norm: 0.9999992421972597, iteration: 133364
loss: 1.0434011220932007,grad_norm: 0.9999993034204638, iteration: 133365
loss: 0.9595317840576172,grad_norm: 0.9999992961842519, iteration: 133366
loss: 1.0155006647109985,grad_norm: 0.824732140657173, iteration: 133367
loss: 0.9823428392410278,grad_norm: 0.9999989664045964, iteration: 133368
loss: 0.9891128540039062,grad_norm: 0.9837102093214094, iteration: 133369
loss: 1.12245774269104,grad_norm: 0.9999992624719246, iteration: 133370
loss: 1.0908993482589722,grad_norm: 0.9999994577298467, iteration: 133371
loss: 0.9957308769226074,grad_norm: 0.9999992199087618, iteration: 133372
loss: 0.990536093711853,grad_norm: 0.9999992597821107, iteration: 133373
loss: 0.9759250283241272,grad_norm: 0.9999990757251698, iteration: 133374
loss: 1.077144980430603,grad_norm: 0.916197830857521, iteration: 133375
loss: 0.9961498379707336,grad_norm: 0.9835239093476322, iteration: 133376
loss: 1.0418280363082886,grad_norm: 0.9999990876440756, iteration: 133377
loss: 1.0275707244873047,grad_norm: 0.9960598344493385, iteration: 133378
loss: 1.0000603199005127,grad_norm: 0.9368728353077674, iteration: 133379
loss: 0.9764318466186523,grad_norm: 0.8991345367560488, iteration: 133380
loss: 1.0119210481643677,grad_norm: 0.9218070826355208, iteration: 133381
loss: 1.000183343887329,grad_norm: 0.9999990196825997, iteration: 133382
loss: 1.003549575805664,grad_norm: 0.9638741498213177, iteration: 133383
loss: 1.0202311277389526,grad_norm: 0.9999991661982344, iteration: 133384
loss: 1.0248662233352661,grad_norm: 0.999999092595717, iteration: 133385
loss: 1.0051881074905396,grad_norm: 0.9999992758765116, iteration: 133386
loss: 0.9883054494857788,grad_norm: 0.9999992156682856, iteration: 133387
loss: 0.9785707592964172,grad_norm: 0.9999992990327446, iteration: 133388
loss: 1.0109854936599731,grad_norm: 0.995993703337724, iteration: 133389
loss: 0.984394371509552,grad_norm: 0.9396595609269058, iteration: 133390
loss: 0.9700794816017151,grad_norm: 0.9999992111718774, iteration: 133391
loss: 1.0076566934585571,grad_norm: 0.9999992203772805, iteration: 133392
loss: 0.9851593375205994,grad_norm: 0.9999989787877435, iteration: 133393
loss: 1.0473641157150269,grad_norm: 0.9999997541039993, iteration: 133394
loss: 0.9850471615791321,grad_norm: 0.9999991054983515, iteration: 133395
loss: 1.0008689165115356,grad_norm: 0.9017537033499466, iteration: 133396
loss: 0.9868223071098328,grad_norm: 0.9999991830480197, iteration: 133397
loss: 1.0012651681900024,grad_norm: 0.9999992944999478, iteration: 133398
loss: 1.05156409740448,grad_norm: 0.9999993609882262, iteration: 133399
loss: 0.9954565763473511,grad_norm: 0.9792418055444317, iteration: 133400
loss: 0.9984836578369141,grad_norm: 0.9380603225526499, iteration: 133401
loss: 1.0349255800247192,grad_norm: 0.9999991990186252, iteration: 133402
loss: 0.9822853803634644,grad_norm: 0.9999991702600394, iteration: 133403
loss: 0.9974610805511475,grad_norm: 0.9999991503733605, iteration: 133404
loss: 0.9863951206207275,grad_norm: 0.9999991469844701, iteration: 133405
loss: 0.9885247945785522,grad_norm: 0.9999990779143761, iteration: 133406
loss: 1.0425001382827759,grad_norm: 0.9999996580987867, iteration: 133407
loss: 0.9763275384902954,grad_norm: 0.9850331653934926, iteration: 133408
loss: 1.009650468826294,grad_norm: 0.9999992790947889, iteration: 133409
loss: 1.0025802850723267,grad_norm: 0.9154648613622249, iteration: 133410
loss: 1.084987998008728,grad_norm: 0.9999998934026848, iteration: 133411
loss: 0.9797555804252625,grad_norm: 0.9999994957384224, iteration: 133412
loss: 1.0043094158172607,grad_norm: 0.9782434984845302, iteration: 133413
loss: 1.044000506401062,grad_norm: 0.9877195138186161, iteration: 133414
loss: 0.9840576648712158,grad_norm: 0.9077896029694534, iteration: 133415
loss: 1.0183489322662354,grad_norm: 0.9999991658749935, iteration: 133416
loss: 1.000199794769287,grad_norm: 0.9999992348775257, iteration: 133417
loss: 0.981638491153717,grad_norm: 0.9884912847504269, iteration: 133418
loss: 1.0137463808059692,grad_norm: 0.9999992287038655, iteration: 133419
loss: 1.0633354187011719,grad_norm: 0.9999998255425225, iteration: 133420
loss: 0.9959967136383057,grad_norm: 0.9999990644064226, iteration: 133421
loss: 1.0015630722045898,grad_norm: 0.9999989490288821, iteration: 133422
loss: 1.0443345308303833,grad_norm: 0.9999990135446774, iteration: 133423
loss: 0.9974899291992188,grad_norm: 0.999999917741906, iteration: 133424
loss: 0.9864898324012756,grad_norm: 0.9999990726175852, iteration: 133425
loss: 1.0417211055755615,grad_norm: 0.9999997258394469, iteration: 133426
loss: 0.9976267218589783,grad_norm: 0.99935669655279, iteration: 133427
loss: 0.9977236390113831,grad_norm: 0.9851794500759882, iteration: 133428
loss: 1.0110712051391602,grad_norm: 0.9999990076892209, iteration: 133429
loss: 1.0062636137008667,grad_norm: 0.9999989218978032, iteration: 133430
loss: 1.021036148071289,grad_norm: 0.9999993626419857, iteration: 133431
loss: 1.009403944015503,grad_norm: 0.8445999343416376, iteration: 133432
loss: 1.033202052116394,grad_norm: 0.9999991115569714, iteration: 133433
loss: 1.0266228914260864,grad_norm: 0.9999990743772761, iteration: 133434
loss: 1.0466924905776978,grad_norm: 0.9999998380071656, iteration: 133435
loss: 1.056755781173706,grad_norm: 0.9999994043644024, iteration: 133436
loss: 1.0819791555404663,grad_norm: 0.9999999275097262, iteration: 133437
loss: 0.977103590965271,grad_norm: 0.9999992216951026, iteration: 133438
loss: 1.0012019872665405,grad_norm: 0.9999992118071183, iteration: 133439
loss: 1.0131899118423462,grad_norm: 0.9999990594990534, iteration: 133440
loss: 1.018350601196289,grad_norm: 0.9881310751417053, iteration: 133441
loss: 1.0045918226242065,grad_norm: 0.9999991513017423, iteration: 133442
loss: 1.038657546043396,grad_norm: 0.9755551760123388, iteration: 133443
loss: 1.0264990329742432,grad_norm: 0.9999991462751723, iteration: 133444
loss: 1.01662278175354,grad_norm: 0.9999989656849467, iteration: 133445
loss: 1.0148667097091675,grad_norm: 0.8481484526928722, iteration: 133446
loss: 1.0162510871887207,grad_norm: 0.7481513801294382, iteration: 133447
loss: 0.9753654599189758,grad_norm: 0.9999992371840463, iteration: 133448
loss: 1.038773536682129,grad_norm: 0.9999996400233511, iteration: 133449
loss: 0.9897259473800659,grad_norm: 0.9999990420769033, iteration: 133450
loss: 1.015095829963684,grad_norm: 0.9603847579340155, iteration: 133451
loss: 0.9835317730903625,grad_norm: 0.9999991622098027, iteration: 133452
loss: 0.9928674101829529,grad_norm: 0.9359867506864058, iteration: 133453
loss: 0.971688985824585,grad_norm: 0.9999993443163165, iteration: 133454
loss: 0.9599302411079407,grad_norm: 0.9999990518067522, iteration: 133455
loss: 0.9753219485282898,grad_norm: 0.9185686806037472, iteration: 133456
loss: 1.0075557231903076,grad_norm: 0.9999990322393427, iteration: 133457
loss: 0.9842031002044678,grad_norm: 0.9956795705291386, iteration: 133458
loss: 0.9985853433609009,grad_norm: 0.9999990358088531, iteration: 133459
loss: 1.026538372039795,grad_norm: 0.9999995031932867, iteration: 133460
loss: 0.9697505235671997,grad_norm: 0.9999992559746719, iteration: 133461
loss: 0.9696788191795349,grad_norm: 0.9999991101446187, iteration: 133462
loss: 1.019215703010559,grad_norm: 0.9999992501421037, iteration: 133463
loss: 0.97553950548172,grad_norm: 0.9999990932091102, iteration: 133464
loss: 0.9752799272537231,grad_norm: 0.9999990067580269, iteration: 133465
loss: 1.0123862028121948,grad_norm: 0.9999990473931034, iteration: 133466
loss: 0.9968180060386658,grad_norm: 0.8934237837514103, iteration: 133467
loss: 1.0019327402114868,grad_norm: 0.8563639867562896, iteration: 133468
loss: 1.0018324851989746,grad_norm: 0.9999990888330025, iteration: 133469
loss: 1.0109974145889282,grad_norm: 0.9999989931186465, iteration: 133470
loss: 1.0607093572616577,grad_norm: 0.99999963219515, iteration: 133471
loss: 1.0224316120147705,grad_norm: 0.8796937983056735, iteration: 133472
loss: 0.9925731420516968,grad_norm: 0.9698169980640902, iteration: 133473
loss: 0.9650543332099915,grad_norm: 0.9459516770651484, iteration: 133474
loss: 1.0231592655181885,grad_norm: 0.9999991259563665, iteration: 133475
loss: 1.0216368436813354,grad_norm: 0.9493735353877347, iteration: 133476
loss: 1.0251283645629883,grad_norm: 0.9999989723657727, iteration: 133477
loss: 1.0135468244552612,grad_norm: 0.9309962020865867, iteration: 133478
loss: 0.9923585653305054,grad_norm: 0.9999993119918181, iteration: 133479
loss: 1.0667765140533447,grad_norm: 0.9999993018738135, iteration: 133480
loss: 0.9989991188049316,grad_norm: 0.9999991663604371, iteration: 133481
loss: 0.9909306764602661,grad_norm: 0.9999992038722104, iteration: 133482
loss: 1.054855227470398,grad_norm: 0.999999878656398, iteration: 133483
loss: 0.9969724416732788,grad_norm: 0.9442316753600092, iteration: 133484
loss: 0.9912375211715698,grad_norm: 0.9094151669738354, iteration: 133485
loss: 1.0055865049362183,grad_norm: 0.8451473343374031, iteration: 133486
loss: 0.9904636740684509,grad_norm: 0.9999989982440277, iteration: 133487
loss: 0.9736837148666382,grad_norm: 0.9167342666748552, iteration: 133488
loss: 0.9804885387420654,grad_norm: 0.9999990609047642, iteration: 133489
loss: 0.9972280859947205,grad_norm: 0.9999992067333184, iteration: 133490
loss: 0.9845362305641174,grad_norm: 0.9999991295999502, iteration: 133491
loss: 1.1001790761947632,grad_norm: 0.9999997619616822, iteration: 133492
loss: 1.0459831953048706,grad_norm: 0.9999994998988717, iteration: 133493
loss: 1.059165120124817,grad_norm: 0.9676685689871339, iteration: 133494
loss: 1.0189720392227173,grad_norm: 0.9999991546360515, iteration: 133495
loss: 0.9850760698318481,grad_norm: 0.999999329097893, iteration: 133496
loss: 0.974194347858429,grad_norm: 0.9999990970142463, iteration: 133497
loss: 1.0359537601470947,grad_norm: 0.9835818118886099, iteration: 133498
loss: 1.0053595304489136,grad_norm: 0.939832480254798, iteration: 133499
loss: 1.0146963596343994,grad_norm: 0.99999909179409, iteration: 133500
loss: 1.0022921562194824,grad_norm: 0.9999990234937399, iteration: 133501
loss: 0.987721860408783,grad_norm: 0.9999992296002213, iteration: 133502
loss: 1.0308687686920166,grad_norm: 0.9999990606347333, iteration: 133503
loss: 0.9770408868789673,grad_norm: 0.9810883147260588, iteration: 133504
loss: 1.0401482582092285,grad_norm: 0.99999925830351, iteration: 133505
loss: 1.0252749919891357,grad_norm: 0.9999994513759352, iteration: 133506
loss: 1.0834906101226807,grad_norm: 0.9999998471584147, iteration: 133507
loss: 1.1507971286773682,grad_norm: 0.9999994026945933, iteration: 133508
loss: 0.9761410355567932,grad_norm: 0.9414394125529149, iteration: 133509
loss: 0.9790129065513611,grad_norm: 0.9999993522156925, iteration: 133510
loss: 1.0059114694595337,grad_norm: 0.999999207903257, iteration: 133511
loss: 1.0133137702941895,grad_norm: 0.9999991823401624, iteration: 133512
loss: 1.0435190200805664,grad_norm: 0.9999991257267846, iteration: 133513
loss: 1.032400131225586,grad_norm: 0.9999991587731635, iteration: 133514
loss: 0.9721271395683289,grad_norm: 0.9579992424326891, iteration: 133515
loss: 0.9922763705253601,grad_norm: 0.9999990556981733, iteration: 133516
loss: 0.9911242723464966,grad_norm: 0.9999990979004963, iteration: 133517
loss: 1.0092276334762573,grad_norm: 0.9999991428961166, iteration: 133518
loss: 1.0569325685501099,grad_norm: 0.9999991336713621, iteration: 133519
loss: 1.026768445968628,grad_norm: 0.9999991442067082, iteration: 133520
loss: 0.9887891411781311,grad_norm: 0.9999994757195825, iteration: 133521
loss: 0.9689836502075195,grad_norm: 0.9999990173736126, iteration: 133522
loss: 0.9806315898895264,grad_norm: 0.9999990628342122, iteration: 133523
loss: 0.9753109216690063,grad_norm: 0.9999992514945198, iteration: 133524
loss: 1.0098634958267212,grad_norm: 0.8936677683038197, iteration: 133525
loss: 1.0620465278625488,grad_norm: 0.9999996259148575, iteration: 133526
loss: 1.0293669700622559,grad_norm: 0.9568137051869878, iteration: 133527
loss: 1.0732280015945435,grad_norm: 0.999999147909214, iteration: 133528
loss: 1.1174342632293701,grad_norm: 0.9999998650922342, iteration: 133529
loss: 1.0303071737289429,grad_norm: 0.9865096360513088, iteration: 133530
loss: 0.9458001852035522,grad_norm: 0.99999937961449, iteration: 133531
loss: 1.0152318477630615,grad_norm: 0.9999992118115858, iteration: 133532
loss: 0.9852876663208008,grad_norm: 0.9046104086274628, iteration: 133533
loss: 1.0450292825698853,grad_norm: 0.9999993225397666, iteration: 133534
loss: 1.0226794481277466,grad_norm: 0.9999992302168683, iteration: 133535
loss: 0.9725131392478943,grad_norm: 0.9999989455987627, iteration: 133536
loss: 0.9872623085975647,grad_norm: 0.9999992386102124, iteration: 133537
loss: 1.0127084255218506,grad_norm: 0.9999990797345448, iteration: 133538
loss: 1.0434455871582031,grad_norm: 0.9999993758259129, iteration: 133539
loss: 1.0209101438522339,grad_norm: 0.9999990406134791, iteration: 133540
loss: 0.9762422442436218,grad_norm: 0.8665637166963045, iteration: 133541
loss: 0.9894769787788391,grad_norm: 0.9999992668194537, iteration: 133542
loss: 1.0288991928100586,grad_norm: 0.9023245188142276, iteration: 133543
loss: 1.0151774883270264,grad_norm: 0.9999990379563517, iteration: 133544
loss: 0.991877019405365,grad_norm: 0.9999990687407132, iteration: 133545
loss: 0.9985705018043518,grad_norm: 0.9110882869145697, iteration: 133546
loss: 0.9997576475143433,grad_norm: 0.9999989999556572, iteration: 133547
loss: 1.076550006866455,grad_norm: 0.9999991281625152, iteration: 133548
loss: 0.9786438345909119,grad_norm: 0.999999062140214, iteration: 133549
loss: 0.9904129505157471,grad_norm: 0.9246444701907109, iteration: 133550
loss: 0.9837784171104431,grad_norm: 0.9999993019501359, iteration: 133551
loss: 0.9979473352432251,grad_norm: 0.8405763527618695, iteration: 133552
loss: 1.0099802017211914,grad_norm: 0.9999990550207287, iteration: 133553
loss: 1.015767216682434,grad_norm: 0.9999990476566246, iteration: 133554
loss: 1.0155479907989502,grad_norm: 0.8488286608648307, iteration: 133555
loss: 1.0307607650756836,grad_norm: 0.9999992356605457, iteration: 133556
loss: 0.9897563457489014,grad_norm: 0.9481643916785994, iteration: 133557
loss: 1.002159595489502,grad_norm: 0.9999990732801737, iteration: 133558
loss: 1.0097564458847046,grad_norm: 0.999999036381903, iteration: 133559
loss: 0.9649458527565002,grad_norm: 0.9999990240452187, iteration: 133560
loss: 1.004401445388794,grad_norm: 0.9999989403601184, iteration: 133561
loss: 0.9611311554908752,grad_norm: 0.9999991199969731, iteration: 133562
loss: 0.9759489297866821,grad_norm: 0.9999989444381285, iteration: 133563
loss: 1.0019373893737793,grad_norm: 0.9072843319965945, iteration: 133564
loss: 0.9642692804336548,grad_norm: 0.9999990684698932, iteration: 133565
loss: 0.9936396479606628,grad_norm: 0.9999994788976209, iteration: 133566
loss: 1.0091798305511475,grad_norm: 0.9715513713990173, iteration: 133567
loss: 0.9852390289306641,grad_norm: 0.9999990560803104, iteration: 133568
loss: 1.031980276107788,grad_norm: 0.9999990906426027, iteration: 133569
loss: 0.9974361062049866,grad_norm: 0.999999073583786, iteration: 133570
loss: 0.9790396094322205,grad_norm: 0.8736396975589708, iteration: 133571
loss: 1.0068047046661377,grad_norm: 0.999999071028755, iteration: 133572
loss: 1.0223382711410522,grad_norm: 0.9999988580272648, iteration: 133573
loss: 1.0176068544387817,grad_norm: 0.9999991291309627, iteration: 133574
loss: 0.9966011047363281,grad_norm: 0.9802204464248997, iteration: 133575
loss: 1.0034836530685425,grad_norm: 0.9188560811939883, iteration: 133576
loss: 1.0025616884231567,grad_norm: 0.9820465407277386, iteration: 133577
loss: 1.024248480796814,grad_norm: 0.9999990504567177, iteration: 133578
loss: 1.0102711915969849,grad_norm: 0.9999990524051405, iteration: 133579
loss: 1.0288063287734985,grad_norm: 0.9999990582936237, iteration: 133580
loss: 0.9826784729957581,grad_norm: 0.9999991917104801, iteration: 133581
loss: 1.0432027578353882,grad_norm: 0.9524476747680937, iteration: 133582
loss: 1.0256977081298828,grad_norm: 0.9999991156694651, iteration: 133583
loss: 1.0382647514343262,grad_norm: 0.9999995171983179, iteration: 133584
loss: 0.9918660521507263,grad_norm: 0.9999989705833564, iteration: 133585
loss: 0.9809731245040894,grad_norm: 0.9999991664909867, iteration: 133586
loss: 1.0107619762420654,grad_norm: 0.941998517634303, iteration: 133587
loss: 1.0072877407073975,grad_norm: 0.8592498123842036, iteration: 133588
loss: 1.028794288635254,grad_norm: 0.9793239084225287, iteration: 133589
loss: 0.9919139742851257,grad_norm: 0.9999990730254769, iteration: 133590
loss: 1.005737543106079,grad_norm: 0.9999990314944696, iteration: 133591
loss: 1.0092109441757202,grad_norm: 0.9999990461683105, iteration: 133592
loss: 0.9899671673774719,grad_norm: 0.9068632916485228, iteration: 133593
loss: 1.0048227310180664,grad_norm: 0.9999991157044693, iteration: 133594
loss: 1.0444549322128296,grad_norm: 0.9999999416908043, iteration: 133595
loss: 1.0034148693084717,grad_norm: 0.9999991319223099, iteration: 133596
loss: 1.051132321357727,grad_norm: 0.9999994098028954, iteration: 133597
loss: 1.0338448286056519,grad_norm: 0.8882632160802544, iteration: 133598
loss: 1.0144102573394775,grad_norm: 0.9999990362325036, iteration: 133599
loss: 1.0122849941253662,grad_norm: 0.9999992004014029, iteration: 133600
loss: 1.0246622562408447,grad_norm: 0.9999993432703737, iteration: 133601
loss: 0.9935129880905151,grad_norm: 0.833061984256791, iteration: 133602
loss: 1.0658351182937622,grad_norm: 0.9908564243602741, iteration: 133603
loss: 0.9818437695503235,grad_norm: 0.973809311884433, iteration: 133604
loss: 0.9708849787712097,grad_norm: 0.9594837718941489, iteration: 133605
loss: 1.0148338079452515,grad_norm: 0.999999189491805, iteration: 133606
loss: 1.0536402463912964,grad_norm: 0.999999484984395, iteration: 133607
loss: 1.1089805364608765,grad_norm: 0.9999991525829032, iteration: 133608
loss: 1.0262479782104492,grad_norm: 0.9952528240005202, iteration: 133609
loss: 0.9755308032035828,grad_norm: 0.9999989758548566, iteration: 133610
loss: 0.980981707572937,grad_norm: 0.9999991324386472, iteration: 133611
loss: 0.9547907710075378,grad_norm: 0.9999991577028772, iteration: 133612
loss: 1.0110727548599243,grad_norm: 0.9520544216391084, iteration: 133613
loss: 0.972659707069397,grad_norm: 0.9999989663010871, iteration: 133614
loss: 0.998201847076416,grad_norm: 0.9477342744257747, iteration: 133615
loss: 1.0310840606689453,grad_norm: 0.9987679456031076, iteration: 133616
loss: 1.0039077997207642,grad_norm: 0.9999993626101272, iteration: 133617
loss: 0.9966858625411987,grad_norm: 0.9999990685889554, iteration: 133618
loss: 1.0193946361541748,grad_norm: 0.9999991387243372, iteration: 133619
loss: 1.0496158599853516,grad_norm: 0.9999991672425517, iteration: 133620
loss: 0.9926705360412598,grad_norm: 0.9999989837440937, iteration: 133621
loss: 1.0116314888000488,grad_norm: 0.9152947701952732, iteration: 133622
loss: 1.0064343214035034,grad_norm: 0.9999990464986813, iteration: 133623
loss: 1.0070316791534424,grad_norm: 0.9999990207990104, iteration: 133624
loss: 0.954539954662323,grad_norm: 0.9999992953518084, iteration: 133625
loss: 1.0272459983825684,grad_norm: 0.9999992085979581, iteration: 133626
loss: 0.9916210770606995,grad_norm: 0.9296925902725387, iteration: 133627
loss: 1.0032035112380981,grad_norm: 0.9631472346802818, iteration: 133628
loss: 0.9786262512207031,grad_norm: 0.9999992775431881, iteration: 133629
loss: 1.0836387872695923,grad_norm: 1.0000000052335496, iteration: 133630
loss: 1.0034452676773071,grad_norm: 0.9276275030420805, iteration: 133631
loss: 1.0225059986114502,grad_norm: 0.9854734576577868, iteration: 133632
loss: 1.026409387588501,grad_norm: 0.9999990365560275, iteration: 133633
loss: 1.001848578453064,grad_norm: 0.9969683708236451, iteration: 133634
loss: 0.9985543489456177,grad_norm: 0.9999991586633323, iteration: 133635
loss: 1.1963913440704346,grad_norm: 0.9999995318981473, iteration: 133636
loss: 1.1063728332519531,grad_norm: 0.9999998792174164, iteration: 133637
loss: 1.0207878351211548,grad_norm: 0.7797921383544567, iteration: 133638
loss: 1.1564253568649292,grad_norm: 0.9999996064440922, iteration: 133639
loss: 0.9392969012260437,grad_norm: 0.9999990710589649, iteration: 133640
loss: 1.0012553930282593,grad_norm: 0.8797353843762978, iteration: 133641
loss: 0.9713826775550842,grad_norm: 0.9999991600634607, iteration: 133642
loss: 1.1132179498672485,grad_norm: 0.9999995512123747, iteration: 133643
loss: 1.1325396299362183,grad_norm: 0.999999086129815, iteration: 133644
loss: 1.096443772315979,grad_norm: 0.9313713786383218, iteration: 133645
loss: 0.9412561058998108,grad_norm: 0.9999991970143347, iteration: 133646
loss: 1.2444034814834595,grad_norm: 0.9999995283993227, iteration: 133647
loss: 0.9581298232078552,grad_norm: 0.9999992603011213, iteration: 133648
loss: 1.1277395486831665,grad_norm: 0.9999997951233441, iteration: 133649
loss: 0.9956763982772827,grad_norm: 0.99999944221658, iteration: 133650
loss: 0.9856559634208679,grad_norm: 0.9999991996050263, iteration: 133651
loss: 1.0938175916671753,grad_norm: 0.999999475708665, iteration: 133652
loss: 0.9671692848205566,grad_norm: 0.9625017508458859, iteration: 133653
loss: 1.0497328042984009,grad_norm: 0.9999991269987243, iteration: 133654
loss: 1.0761208534240723,grad_norm: 0.9999999040333724, iteration: 133655
loss: 1.0074586868286133,grad_norm: 0.9129299261560495, iteration: 133656
loss: 0.9828602075576782,grad_norm: 0.9999992732518153, iteration: 133657
loss: 0.9986026883125305,grad_norm: 0.872316514240622, iteration: 133658
loss: 0.9722828269004822,grad_norm: 0.9999991528827258, iteration: 133659
loss: 1.0187785625457764,grad_norm: 0.9999990437198645, iteration: 133660
loss: 1.0161573886871338,grad_norm: 0.9999991519750705, iteration: 133661
loss: 1.0233595371246338,grad_norm: 0.9999989993844782, iteration: 133662
loss: 0.9778234958648682,grad_norm: 0.9999991688369165, iteration: 133663
loss: 1.097760796546936,grad_norm: 0.999999538433119, iteration: 133664
loss: 1.0365216732025146,grad_norm: 0.9999992335245901, iteration: 133665
loss: 1.0119037628173828,grad_norm: 0.999999103158612, iteration: 133666
loss: 0.959313690662384,grad_norm: 0.9999990960256985, iteration: 133667
loss: 0.9863224625587463,grad_norm: 0.999999237543313, iteration: 133668
loss: 1.0204497575759888,grad_norm: 0.9999991422085783, iteration: 133669
loss: 1.0958378314971924,grad_norm: 0.9999997571588298, iteration: 133670
loss: 1.088906168937683,grad_norm: 0.9999992118340593, iteration: 133671
loss: 0.9784877300262451,grad_norm: 0.99999912703063, iteration: 133672
loss: 0.9409821033477783,grad_norm: 0.9393495369974516, iteration: 133673
loss: 1.0219261646270752,grad_norm: 0.9350401347921571, iteration: 133674
loss: 0.9911924600601196,grad_norm: 0.9557152799416558, iteration: 133675
loss: 1.0614315271377563,grad_norm: 0.9999997316890068, iteration: 133676
loss: 1.0058810710906982,grad_norm: 0.9470054548236091, iteration: 133677
loss: 0.9636012315750122,grad_norm: 0.9298126807701048, iteration: 133678
loss: 1.0082581043243408,grad_norm: 0.8900685844420225, iteration: 133679
loss: 1.0235720872879028,grad_norm: 0.9999999441250293, iteration: 133680
loss: 0.9890444278717041,grad_norm: 0.9999992018752433, iteration: 133681
loss: 0.9790741205215454,grad_norm: 0.9999991450125131, iteration: 133682
loss: 1.0119678974151611,grad_norm: 0.999999220902759, iteration: 133683
loss: 1.0072659254074097,grad_norm: 0.9999990472455171, iteration: 133684
loss: 1.04893159866333,grad_norm: 0.9999996968556853, iteration: 133685
loss: 0.984817624092102,grad_norm: 0.9523327549855543, iteration: 133686
loss: 1.0063331127166748,grad_norm: 0.953927500446663, iteration: 133687
loss: 0.9735181927680969,grad_norm: 0.9999991567804488, iteration: 133688
loss: 1.0275547504425049,grad_norm: 0.9999991810254919, iteration: 133689
loss: 1.0266207456588745,grad_norm: 0.9999992711980974, iteration: 133690
loss: 1.00188148021698,grad_norm: 0.9999990863550293, iteration: 133691
loss: 1.1944221258163452,grad_norm: 0.9999998014628072, iteration: 133692
loss: 0.9954272508621216,grad_norm: 0.9999990775806962, iteration: 133693
loss: 1.0185061693191528,grad_norm: 0.9999991301538369, iteration: 133694
loss: 0.9915098547935486,grad_norm: 0.9999991452400906, iteration: 133695
loss: 1.0342203378677368,grad_norm: 0.999999388053952, iteration: 133696
loss: 1.073320984840393,grad_norm: 0.9999992990904565, iteration: 133697
loss: 1.0267775058746338,grad_norm: 0.9973321148129659, iteration: 133698
loss: 0.9967878460884094,grad_norm: 0.9999992292558503, iteration: 133699
loss: 1.0281953811645508,grad_norm: 0.9878051412538197, iteration: 133700
loss: 0.986691951751709,grad_norm: 0.9999992824158752, iteration: 133701
loss: 1.050890326499939,grad_norm: 0.9999992087257352, iteration: 133702
loss: 0.977043867111206,grad_norm: 0.9299169838938355, iteration: 133703
loss: 0.9944782853126526,grad_norm: 0.9577024828429577, iteration: 133704
loss: 0.981158435344696,grad_norm: 0.9999991360973373, iteration: 133705
loss: 1.002487063407898,grad_norm: 0.999999189386901, iteration: 133706
loss: 1.0170196294784546,grad_norm: 0.9999991783048029, iteration: 133707
loss: 1.0758451223373413,grad_norm: 0.877655315353672, iteration: 133708
loss: 1.0094153881072998,grad_norm: 0.9342813666504154, iteration: 133709
loss: 1.0063892602920532,grad_norm: 0.9476861848264441, iteration: 133710
loss: 0.9995805621147156,grad_norm: 0.9999990472631608, iteration: 133711
loss: 1.0105535984039307,grad_norm: 0.9999991817155264, iteration: 133712
loss: 1.0199650526046753,grad_norm: 0.9659041015973907, iteration: 133713
loss: 1.1464354991912842,grad_norm: 0.999999299767366, iteration: 133714
loss: 0.9895077347755432,grad_norm: 0.9999995856979678, iteration: 133715
loss: 1.0071967840194702,grad_norm: 0.9999994094452767, iteration: 133716
loss: 0.9679238200187683,grad_norm: 0.78463320698969, iteration: 133717
loss: 1.0197383165359497,grad_norm: 0.9863095899420479, iteration: 133718
loss: 1.0584813356399536,grad_norm: 0.9999991751101813, iteration: 133719
loss: 0.999439001083374,grad_norm: 0.9999991783422807, iteration: 133720
loss: 0.9762043952941895,grad_norm: 0.9999993085529787, iteration: 133721
loss: 1.0336426496505737,grad_norm: 0.9110915007420134, iteration: 133722
loss: 0.9795051217079163,grad_norm: 0.9999991631039234, iteration: 133723
loss: 0.9662753939628601,grad_norm: 0.9999991910568463, iteration: 133724
loss: 1.004850149154663,grad_norm: 0.8827314400610721, iteration: 133725
loss: 0.9755764603614807,grad_norm: 0.9664478575504706, iteration: 133726
loss: 0.9773204922676086,grad_norm: 0.9611092477159103, iteration: 133727
loss: 1.0906978845596313,grad_norm: 0.9999990777012442, iteration: 133728
loss: 1.0137226581573486,grad_norm: 0.9999990678802836, iteration: 133729
loss: 1.0252385139465332,grad_norm: 0.9999993704646518, iteration: 133730
loss: 1.0006228685379028,grad_norm: 0.9999991518231207, iteration: 133731
loss: 1.0230703353881836,grad_norm: 0.8503486725856898, iteration: 133732
loss: 1.0862356424331665,grad_norm: 0.9999994817682193, iteration: 133733
loss: 1.0138065814971924,grad_norm: 0.9999991619971665, iteration: 133734
loss: 1.0127900838851929,grad_norm: 0.905832862726088, iteration: 133735
loss: 0.99562007188797,grad_norm: 0.9355360899983123, iteration: 133736
loss: 0.9780210256576538,grad_norm: 0.9999992998825641, iteration: 133737
loss: 0.9891299605369568,grad_norm: 0.9999989937129187, iteration: 133738
loss: 1.011735439300537,grad_norm: 0.9999989708527016, iteration: 133739
loss: 1.0080912113189697,grad_norm: 0.9999991861231776, iteration: 133740
loss: 0.9670039415359497,grad_norm: 0.9999992036420267, iteration: 133741
loss: 1.0044612884521484,grad_norm: 0.9999991256285683, iteration: 133742
loss: 1.0034888982772827,grad_norm: 0.9999991633129675, iteration: 133743
loss: 1.0571513175964355,grad_norm: 0.9999998514635342, iteration: 133744
loss: 1.0734221935272217,grad_norm: 0.9999990215837914, iteration: 133745
loss: 0.9774407744407654,grad_norm: 0.9606372054605201, iteration: 133746
loss: 1.0075459480285645,grad_norm: 0.9999993994275658, iteration: 133747
loss: 0.9974091649055481,grad_norm: 0.9389923219461807, iteration: 133748
loss: 0.9967157244682312,grad_norm: 0.9779784248426944, iteration: 133749
loss: 1.0099678039550781,grad_norm: 0.902812876615204, iteration: 133750
loss: 1.0236876010894775,grad_norm: 0.9999993003550167, iteration: 133751
loss: 1.002784252166748,grad_norm: 0.9529877331997564, iteration: 133752
loss: 0.972412109375,grad_norm: 0.996179387372357, iteration: 133753
loss: 1.2057780027389526,grad_norm: 0.9999999548007986, iteration: 133754
loss: 1.0397238731384277,grad_norm: 0.995427808676276, iteration: 133755
loss: 1.0111949443817139,grad_norm: 0.9999998335784074, iteration: 133756
loss: 0.988541841506958,grad_norm: 0.9999990768053938, iteration: 133757
loss: 1.0628007650375366,grad_norm: 0.9999998743949473, iteration: 133758
loss: 1.0000982284545898,grad_norm: 0.8889628434850475, iteration: 133759
loss: 1.0063179731369019,grad_norm: 0.9999992293345071, iteration: 133760
loss: 1.0194991827011108,grad_norm: 0.999999008542794, iteration: 133761
loss: 1.0299036502838135,grad_norm: 0.9999991447303219, iteration: 133762
loss: 1.1338592767715454,grad_norm: 0.9999999195751529, iteration: 133763
loss: 1.0548676252365112,grad_norm: 0.999999203041697, iteration: 133764
loss: 0.9950504302978516,grad_norm: 0.9999990338868538, iteration: 133765
loss: 1.1733876466751099,grad_norm: 0.9999998427430198, iteration: 133766
loss: 1.0766855478286743,grad_norm: 0.9999999024132462, iteration: 133767
loss: 1.0142344236373901,grad_norm: 0.9999993817773589, iteration: 133768
loss: 1.052330732345581,grad_norm: 0.9999991244430056, iteration: 133769
loss: 1.0106120109558105,grad_norm: 0.9763421916361221, iteration: 133770
loss: 0.9862754940986633,grad_norm: 0.9882868964957535, iteration: 133771
loss: 0.9888968467712402,grad_norm: 0.9999995030796714, iteration: 133772
loss: 1.0116022825241089,grad_norm: 0.9308139049040124, iteration: 133773
loss: 0.9892866611480713,grad_norm: 0.9214790292249759, iteration: 133774
loss: 1.057207465171814,grad_norm: 0.9999996232638609, iteration: 133775
loss: 1.083448886871338,grad_norm: 0.9999992346678928, iteration: 133776
loss: 0.9658984541893005,grad_norm: 0.9999989643259843, iteration: 133777
loss: 1.0050673484802246,grad_norm: 0.9685962810750351, iteration: 133778
loss: 0.9904621839523315,grad_norm: 0.9999990589013102, iteration: 133779
loss: 0.9848319292068481,grad_norm: 0.9999991860246733, iteration: 133780
loss: 1.0367190837860107,grad_norm: 0.9999996548367346, iteration: 133781
loss: 0.9913265705108643,grad_norm: 0.952495270073635, iteration: 133782
loss: 0.9599961042404175,grad_norm: 0.9482152058205158, iteration: 133783
loss: 0.9685255885124207,grad_norm: 0.8430953595219215, iteration: 133784
loss: 1.0043306350708008,grad_norm: 0.9999991493516309, iteration: 133785
loss: 1.0047197341918945,grad_norm: 0.9999998298545777, iteration: 133786
loss: 1.0739953517913818,grad_norm: 0.9999995146329845, iteration: 133787
loss: 1.005381464958191,grad_norm: 0.9999990789333825, iteration: 133788
loss: 0.9981104135513306,grad_norm: 0.8394001506976816, iteration: 133789
loss: 1.0164085626602173,grad_norm: 0.9999991826788118, iteration: 133790
loss: 1.0089625120162964,grad_norm: 0.9999990801927419, iteration: 133791
loss: 0.9836397171020508,grad_norm: 0.9776935447496163, iteration: 133792
loss: 0.9607546925544739,grad_norm: 0.9313382406141151, iteration: 133793
loss: 1.0135152339935303,grad_norm: 0.999999511792856, iteration: 133794
loss: 0.9922858476638794,grad_norm: 0.9999990914571528, iteration: 133795
loss: 0.97563636302948,grad_norm: 0.9428120894549815, iteration: 133796
loss: 1.0853550434112549,grad_norm: 0.9999991057472633, iteration: 133797
loss: 0.9923483729362488,grad_norm: 0.9786525586207547, iteration: 133798
loss: 0.9751761555671692,grad_norm: 0.9659485521631006, iteration: 133799
loss: 0.9842827320098877,grad_norm: 0.9999991696760981, iteration: 133800
loss: 0.9930320978164673,grad_norm: 0.9371514808488992, iteration: 133801
loss: 1.0031957626342773,grad_norm: 0.9136042175141984, iteration: 133802
loss: 0.9754117727279663,grad_norm: 0.8843580819621324, iteration: 133803
loss: 0.9890363812446594,grad_norm: 0.9499046753378628, iteration: 133804
loss: 1.0330100059509277,grad_norm: 0.9999998507046998, iteration: 133805
loss: 1.0119813680648804,grad_norm: 0.9999995519275767, iteration: 133806
loss: 1.0186012983322144,grad_norm: 0.9105755382506902, iteration: 133807
loss: 1.01687753200531,grad_norm: 0.9844020347348709, iteration: 133808
loss: 1.0369235277175903,grad_norm: 0.9612608351769899, iteration: 133809
loss: 1.0369453430175781,grad_norm: 0.999999259833169, iteration: 133810
loss: 1.000249981880188,grad_norm: 0.9999990501909342, iteration: 133811
loss: 0.9985800385475159,grad_norm: 0.9999990552158421, iteration: 133812
loss: 0.9983869194984436,grad_norm: 0.9999991653637593, iteration: 133813
loss: 1.0103309154510498,grad_norm: 0.9999999439887517, iteration: 133814
loss: 1.0157470703125,grad_norm: 0.999999216158442, iteration: 133815
loss: 0.9905798435211182,grad_norm: 0.9999991737295708, iteration: 133816
loss: 0.993057131767273,grad_norm: 0.9504211219701311, iteration: 133817
loss: 0.9864622950553894,grad_norm: 0.8552121776638174, iteration: 133818
loss: 0.9998645186424255,grad_norm: 0.9999991291613675, iteration: 133819
loss: 0.994879424571991,grad_norm: 0.999999060490362, iteration: 133820
loss: 1.084185242652893,grad_norm: 0.9999991765715076, iteration: 133821
loss: 1.0218021869659424,grad_norm: 0.9901421572037986, iteration: 133822
loss: 0.9818177223205566,grad_norm: 0.8875344388666067, iteration: 133823
loss: 1.018112063407898,grad_norm: 0.9999990475175771, iteration: 133824
loss: 0.9826126098632812,grad_norm: 0.9999992178909095, iteration: 133825
loss: 1.0317463874816895,grad_norm: 0.8899383539956519, iteration: 133826
loss: 1.0336326360702515,grad_norm: 0.9999994574857208, iteration: 133827
loss: 0.9810750484466553,grad_norm: 0.958054944491438, iteration: 133828
loss: 1.029988408088684,grad_norm: 0.9999991573441501, iteration: 133829
loss: 1.029229760169983,grad_norm: 0.9999999046721062, iteration: 133830
loss: 0.9867328405380249,grad_norm: 0.9999991246120598, iteration: 133831
loss: 0.9951555728912354,grad_norm: 0.9669225388114215, iteration: 133832
loss: 0.9618692398071289,grad_norm: 0.8723903819354971, iteration: 133833
loss: 1.0255311727523804,grad_norm: 0.8769089499418874, iteration: 133834
loss: 0.9891833066940308,grad_norm: 0.9999991011591818, iteration: 133835
loss: 1.0195202827453613,grad_norm: 0.8658988011772196, iteration: 133836
loss: 1.0224043130874634,grad_norm: 0.999999089585673, iteration: 133837
loss: 0.9501544833183289,grad_norm: 0.9999990338748467, iteration: 133838
loss: 0.9975207448005676,grad_norm: 0.9999990932878523, iteration: 133839
loss: 0.9947702884674072,grad_norm: 0.9999993172987979, iteration: 133840
loss: 1.0052999258041382,grad_norm: 0.9992274263249129, iteration: 133841
loss: 1.0431076288223267,grad_norm: 0.9999990278568504, iteration: 133842
loss: 0.9887161254882812,grad_norm: 0.9999990711922874, iteration: 133843
loss: 1.0203938484191895,grad_norm: 0.9999990422270534, iteration: 133844
loss: 0.9621550440788269,grad_norm: 0.9862224625644018, iteration: 133845
loss: 0.9533324837684631,grad_norm: 0.9999992489829069, iteration: 133846
loss: 1.0060646533966064,grad_norm: 0.9999990738433484, iteration: 133847
loss: 1.0566647052764893,grad_norm: 0.9999994484375261, iteration: 133848
loss: 1.0414385795593262,grad_norm: 0.9999991015616191, iteration: 133849
loss: 1.0074284076690674,grad_norm: 0.9999992259202225, iteration: 133850
loss: 1.0112658739089966,grad_norm: 0.8777809539677525, iteration: 133851
loss: 1.01900315284729,grad_norm: 0.9999992600950393, iteration: 133852
loss: 0.9792317152023315,grad_norm: 0.9999992080566246, iteration: 133853
loss: 0.9879370927810669,grad_norm: 0.9100945324083173, iteration: 133854
loss: 0.9858042597770691,grad_norm: 0.9999992314461602, iteration: 133855
loss: 1.092004418373108,grad_norm: 0.9999995717821848, iteration: 133856
loss: 1.0209063291549683,grad_norm: 0.9999994180925297, iteration: 133857
loss: 0.9729132652282715,grad_norm: 0.999999779653958, iteration: 133858
loss: 1.0212641954421997,grad_norm: 0.9999991598644221, iteration: 133859
loss: 1.0833005905151367,grad_norm: 0.999999623280035, iteration: 133860
loss: 0.9831204414367676,grad_norm: 0.9999992108799629, iteration: 133861
loss: 1.004346251487732,grad_norm: 0.9626449355335094, iteration: 133862
loss: 1.0586211681365967,grad_norm: 0.9999996543625421, iteration: 133863
loss: 0.9855101108551025,grad_norm: 0.9163706511194211, iteration: 133864
loss: 1.168624758720398,grad_norm: 0.9999998255511805, iteration: 133865
loss: 0.9863173365592957,grad_norm: 0.9999991469827102, iteration: 133866
loss: 0.9723910093307495,grad_norm: 0.9021677805474352, iteration: 133867
loss: 0.987666130065918,grad_norm: 0.9999996162835272, iteration: 133868
loss: 0.9938967823982239,grad_norm: 0.9233807827348461, iteration: 133869
loss: 1.0395970344543457,grad_norm: 0.9999991405461549, iteration: 133870
loss: 1.0022250413894653,grad_norm: 0.9999991865105331, iteration: 133871
loss: 1.0438700914382935,grad_norm: 0.999999468356525, iteration: 133872
loss: 0.9713261127471924,grad_norm: 0.999999197504211, iteration: 133873
loss: 1.08457350730896,grad_norm: 0.9999991890178318, iteration: 133874
loss: 1.0037249326705933,grad_norm: 0.999998979484866, iteration: 133875
loss: 1.0230464935302734,grad_norm: 0.9999990919303563, iteration: 133876
loss: 1.4025204181671143,grad_norm: 0.9999998117840014, iteration: 133877
loss: 1.0962573289871216,grad_norm: 0.9999994694509688, iteration: 133878
loss: 1.1178783178329468,grad_norm: 0.9999995365502076, iteration: 133879
loss: 1.0000523328781128,grad_norm: 0.9999991131350906, iteration: 133880
loss: 0.996801495552063,grad_norm: 0.9740716886324128, iteration: 133881
loss: 1.0228883028030396,grad_norm: 0.9999990559624453, iteration: 133882
loss: 0.9705853462219238,grad_norm: 0.9967477105035092, iteration: 133883
loss: 0.9920147061347961,grad_norm: 0.9984132191956346, iteration: 133884
loss: 1.0358976125717163,grad_norm: 0.999999775040404, iteration: 133885
loss: 1.005187749862671,grad_norm: 0.999999087484087, iteration: 133886
loss: 0.9754259586334229,grad_norm: 0.9999990018599736, iteration: 133887
loss: 1.0067874193191528,grad_norm: 0.9999991340617402, iteration: 133888
loss: 1.0014773607254028,grad_norm: 0.9999991311261666, iteration: 133889
loss: 0.9949727058410645,grad_norm: 0.9999990917562551, iteration: 133890
loss: 1.0277659893035889,grad_norm: 0.9999991001905344, iteration: 133891
loss: 1.0005710124969482,grad_norm: 0.9999991456054926, iteration: 133892
loss: 1.018939733505249,grad_norm: 0.9999992421015742, iteration: 133893
loss: 1.0200375318527222,grad_norm: 0.9999991627864029, iteration: 133894
loss: 0.99345463514328,grad_norm: 0.9999990963324, iteration: 133895
loss: 0.9979814291000366,grad_norm: 0.9999991861012542, iteration: 133896
loss: 1.0066850185394287,grad_norm: 0.8748113247885592, iteration: 133897
loss: 0.9790990352630615,grad_norm: 0.9681234350606202, iteration: 133898
loss: 1.0011816024780273,grad_norm: 0.9999992714729623, iteration: 133899
loss: 1.011829137802124,grad_norm: 0.9999990913388845, iteration: 133900
loss: 1.0085067749023438,grad_norm: 0.9999991045888519, iteration: 133901
loss: 1.0108206272125244,grad_norm: 0.9999990032888126, iteration: 133902
loss: 1.0019205808639526,grad_norm: 0.8856294209202543, iteration: 133903
loss: 0.9792090654373169,grad_norm: 0.8619036904162091, iteration: 133904
loss: 1.0150502920150757,grad_norm: 0.9999994005065692, iteration: 133905
loss: 1.0158414840698242,grad_norm: 0.9999990300103263, iteration: 133906
loss: 1.011253833770752,grad_norm: 0.9999996646006714, iteration: 133907
loss: 1.002610206604004,grad_norm: 0.9999991493749821, iteration: 133908
loss: 1.0123063325881958,grad_norm: 0.9991013058899291, iteration: 133909
loss: 0.9821372032165527,grad_norm: 0.9999992756330868, iteration: 133910
loss: 1.00016188621521,grad_norm: 0.9075635634631606, iteration: 133911
loss: 0.9981034994125366,grad_norm: 0.9758999573943892, iteration: 133912
loss: 1.041047215461731,grad_norm: 0.9999990899775738, iteration: 133913
loss: 1.0101737976074219,grad_norm: 0.9580497946939515, iteration: 133914
loss: 1.1466182470321655,grad_norm: 0.9999991844499735, iteration: 133915
loss: 1.0004669427871704,grad_norm: 0.9999992350990239, iteration: 133916
loss: 0.9865866303443909,grad_norm: 0.9999992211966001, iteration: 133917
loss: 1.0144236087799072,grad_norm: 0.999999002293795, iteration: 133918
loss: 1.0879263877868652,grad_norm: 0.9999993647926785, iteration: 133919
loss: 1.017862319946289,grad_norm: 0.9999992548773576, iteration: 133920
loss: 1.016335129737854,grad_norm: 0.9999991478357262, iteration: 133921
loss: 0.9779189229011536,grad_norm: 0.9999998499191443, iteration: 133922
loss: 1.0510103702545166,grad_norm: 0.9999990605142792, iteration: 133923
loss: 1.015811800956726,grad_norm: 0.8367134330436978, iteration: 133924
loss: 1.0095341205596924,grad_norm: 0.8960490102240958, iteration: 133925
loss: 1.010697841644287,grad_norm: 0.9999991106615507, iteration: 133926
loss: 1.0032418966293335,grad_norm: 0.9126335498528799, iteration: 133927
loss: 0.9691619277000427,grad_norm: 0.9999992055685648, iteration: 133928
loss: 0.9608414173126221,grad_norm: 0.9999990748704415, iteration: 133929
loss: 1.009484052658081,grad_norm: 0.9546586382403903, iteration: 133930
loss: 0.9908970594406128,grad_norm: 0.9999992481370774, iteration: 133931
loss: 1.0755517482757568,grad_norm: 0.99999937762053, iteration: 133932
loss: 1.0022956132888794,grad_norm: 0.9677009198415414, iteration: 133933
loss: 1.019911289215088,grad_norm: 0.9999991998713037, iteration: 133934
loss: 1.0684375762939453,grad_norm: 0.9999994224107109, iteration: 133935
loss: 1.014883279800415,grad_norm: 0.9999992098651527, iteration: 133936
loss: 0.9805092215538025,grad_norm: 0.937685183599063, iteration: 133937
loss: 1.0207148790359497,grad_norm: 0.9999991047922038, iteration: 133938
loss: 1.0114328861236572,grad_norm: 0.9999991161540813, iteration: 133939
loss: 1.0031660795211792,grad_norm: 0.9999992010821052, iteration: 133940
loss: 0.9620142579078674,grad_norm: 0.9999990251036047, iteration: 133941
loss: 0.9923068284988403,grad_norm: 0.904622579557486, iteration: 133942
loss: 0.9842506051063538,grad_norm: 0.9999992808865253, iteration: 133943
loss: 1.0228772163391113,grad_norm: 0.9999994285487939, iteration: 133944
loss: 1.0028972625732422,grad_norm: 0.9516013019841891, iteration: 133945
loss: 0.9915232062339783,grad_norm: 0.864519963262439, iteration: 133946
loss: 0.9942619800567627,grad_norm: 0.9999990326327541, iteration: 133947
loss: 1.0246926546096802,grad_norm: 0.8546356199050047, iteration: 133948
loss: 1.0130482912063599,grad_norm: 0.9999990317206716, iteration: 133949
loss: 1.011023759841919,grad_norm: 0.9999992772936946, iteration: 133950
loss: 0.9887403845787048,grad_norm: 0.9999990666841773, iteration: 133951
loss: 1.0094218254089355,grad_norm: 0.9999992097003101, iteration: 133952
loss: 0.9937804937362671,grad_norm: 0.9999991404862121, iteration: 133953
loss: 1.0107166767120361,grad_norm: 0.9717914047921586, iteration: 133954
loss: 1.1046142578125,grad_norm: 0.9999995185140168, iteration: 133955
loss: 1.000646710395813,grad_norm: 0.8968002562755649, iteration: 133956
loss: 0.9918302297592163,grad_norm: 0.9999991156572376, iteration: 133957
loss: 1.047440528869629,grad_norm: 0.9999994943038837, iteration: 133958
loss: 1.0537910461425781,grad_norm: 0.9999999014401493, iteration: 133959
loss: 0.9843583703041077,grad_norm: 0.9999993096483422, iteration: 133960
loss: 0.9864165186882019,grad_norm: 0.9999992080945731, iteration: 133961
loss: 0.9225184917449951,grad_norm: 0.9999990986548227, iteration: 133962
loss: 0.9967522025108337,grad_norm: 0.9999989658848029, iteration: 133963
loss: 1.0023390054702759,grad_norm: 0.9999989673639454, iteration: 133964
loss: 1.0049493312835693,grad_norm: 0.9312534255605147, iteration: 133965
loss: 0.9826681017875671,grad_norm: 0.9984104737084886, iteration: 133966
loss: 0.9800925254821777,grad_norm: 0.9999991678480841, iteration: 133967
loss: 1.0052541494369507,grad_norm: 0.8639874519694472, iteration: 133968
loss: 0.9957762360572815,grad_norm: 0.9687056516548035, iteration: 133969
loss: 1.0299785137176514,grad_norm: 0.9999990595847326, iteration: 133970
loss: 0.9981985092163086,grad_norm: 0.9999991080567757, iteration: 133971
loss: 1.0434919595718384,grad_norm: 0.9999996352333966, iteration: 133972
loss: 1.0150421857833862,grad_norm: 0.9999991209060266, iteration: 133973
loss: 1.0338623523712158,grad_norm: 0.9999997134755814, iteration: 133974
loss: 1.0189564228057861,grad_norm: 0.9999996557395464, iteration: 133975
loss: 1.023712158203125,grad_norm: 0.9452664552396508, iteration: 133976
loss: 0.9850810766220093,grad_norm: 0.950275255823488, iteration: 133977
loss: 1.0377548933029175,grad_norm: 0.9999990906876974, iteration: 133978
loss: 0.9901370406150818,grad_norm: 0.9999990206966121, iteration: 133979
loss: 1.0069111585617065,grad_norm: 0.999999185439326, iteration: 133980
loss: 1.0222511291503906,grad_norm: 0.9056543391263218, iteration: 133981
loss: 0.9985948204994202,grad_norm: 0.9679521760829494, iteration: 133982
loss: 1.0104331970214844,grad_norm: 0.9999991465117104, iteration: 133983
loss: 0.986162006855011,grad_norm: 0.9999995127246262, iteration: 133984
loss: 1.0145065784454346,grad_norm: 0.9419503810666583, iteration: 133985
loss: 0.984565794467926,grad_norm: 0.894545072080867, iteration: 133986
loss: 0.9838235378265381,grad_norm: 0.8897105969035738, iteration: 133987
loss: 1.0167535543441772,grad_norm: 0.9999992360161389, iteration: 133988
loss: 0.9994128346443176,grad_norm: 0.9999991429994848, iteration: 133989
loss: 1.0047794580459595,grad_norm: 0.9978669743536162, iteration: 133990
loss: 0.9729832410812378,grad_norm: 0.9597226727105115, iteration: 133991
loss: 0.9755178093910217,grad_norm: 0.827015173348464, iteration: 133992
loss: 1.021106243133545,grad_norm: 0.9999992724182525, iteration: 133993
loss: 1.0196229219436646,grad_norm: 0.9184214887909175, iteration: 133994
loss: 0.9897413849830627,grad_norm: 0.9999990917522333, iteration: 133995
loss: 0.9583781957626343,grad_norm: 0.8286274770586801, iteration: 133996
loss: 0.9987595081329346,grad_norm: 0.8619417437187317, iteration: 133997
loss: 0.9581854343414307,grad_norm: 0.9694503628154617, iteration: 133998
loss: 0.9762576222419739,grad_norm: 0.9999992047182246, iteration: 133999
loss: 1.0163697004318237,grad_norm: 0.9561292870337456, iteration: 134000
loss: 0.995523989200592,grad_norm: 0.9999991595913256, iteration: 134001
loss: 1.045034408569336,grad_norm: 0.9661836730238684, iteration: 134002
loss: 1.0651228427886963,grad_norm: 0.9999998323132369, iteration: 134003
loss: 1.0326143503189087,grad_norm: 0.9999998928310677, iteration: 134004
loss: 1.0291848182678223,grad_norm: 0.9999991067056474, iteration: 134005
loss: 1.0670194625854492,grad_norm: 0.999999460295168, iteration: 134006
loss: 1.0058971643447876,grad_norm: 0.8599048413822867, iteration: 134007
loss: 1.0057865381240845,grad_norm: 0.9999991798236781, iteration: 134008
loss: 0.9866172075271606,grad_norm: 0.9999990027209206, iteration: 134009
loss: 0.9977438449859619,grad_norm: 0.7815233401893548, iteration: 134010
loss: 1.0359950065612793,grad_norm: 0.9034073820837766, iteration: 134011
loss: 1.0027481317520142,grad_norm: 0.9754418043732561, iteration: 134012
loss: 0.9698734283447266,grad_norm: 0.9885463086467623, iteration: 134013
loss: 0.9974011778831482,grad_norm: 0.9999989856762372, iteration: 134014
loss: 0.9896711111068726,grad_norm: 0.9409164252815521, iteration: 134015
loss: 0.9584290385246277,grad_norm: 0.9999991517396523, iteration: 134016
loss: 0.9504963159561157,grad_norm: 0.9999992215401682, iteration: 134017
loss: 0.9880486726760864,grad_norm: 0.99999913724827, iteration: 134018
loss: 0.9678114056587219,grad_norm: 0.9889172677161246, iteration: 134019
loss: 0.9760459065437317,grad_norm: 0.9999991344930085, iteration: 134020
loss: 1.00192129611969,grad_norm: 0.955663756353577, iteration: 134021
loss: 0.9789111614227295,grad_norm: 0.999999238483887, iteration: 134022
loss: 0.992749810218811,grad_norm: 0.9999990278283392, iteration: 134023
loss: 1.007568359375,grad_norm: 0.9999990031428209, iteration: 134024
loss: 0.962223470211029,grad_norm: 0.9999993001816186, iteration: 134025
loss: 0.9715255498886108,grad_norm: 0.9999991040335758, iteration: 134026
loss: 0.9984742403030396,grad_norm: 0.9999993023082238, iteration: 134027
loss: 1.0159841775894165,grad_norm: 0.8910874111256795, iteration: 134028
loss: 1.0327632427215576,grad_norm: 0.9999992178455982, iteration: 134029
loss: 0.9947457909584045,grad_norm: 0.849085557603369, iteration: 134030
loss: 0.9755935668945312,grad_norm: 0.9999992627981796, iteration: 134031
loss: 1.0166929960250854,grad_norm: 0.8923186291322047, iteration: 134032
loss: 1.0123298168182373,grad_norm: 0.9564325650702494, iteration: 134033
loss: 1.043209433555603,grad_norm: 0.8950635501562878, iteration: 134034
loss: 1.0237929821014404,grad_norm: 0.9999990569862952, iteration: 134035
loss: 1.024056077003479,grad_norm: 0.9808155560000424, iteration: 134036
loss: 0.9461032152175903,grad_norm: 0.9999991329345729, iteration: 134037
loss: 1.0127865076065063,grad_norm: 0.9954421591116867, iteration: 134038
loss: 1.0092881917953491,grad_norm: 0.9999992314331004, iteration: 134039
loss: 1.0124331712722778,grad_norm: 0.9999991937357262, iteration: 134040
loss: 0.9676399827003479,grad_norm: 0.9296064635770722, iteration: 134041
loss: 1.0036345720291138,grad_norm: 0.9567135938528472, iteration: 134042
loss: 1.0001715421676636,grad_norm: 0.9999990860169674, iteration: 134043
loss: 0.9632864594459534,grad_norm: 0.9999991280297417, iteration: 134044
loss: 0.9881219267845154,grad_norm: 0.920252077869219, iteration: 134045
loss: 1.0043456554412842,grad_norm: 0.9057702739517894, iteration: 134046
loss: 1.0326658487319946,grad_norm: 0.9876125416655946, iteration: 134047
loss: 1.04124116897583,grad_norm: 0.999999111675385, iteration: 134048
loss: 1.014880657196045,grad_norm: 0.9937157035733177, iteration: 134049
loss: 1.025709629058838,grad_norm: 0.9999992852627234, iteration: 134050
loss: 0.9918985962867737,grad_norm: 0.9999992467708001, iteration: 134051
loss: 1.0241585969924927,grad_norm: 0.9999992207503493, iteration: 134052
loss: 0.9993125796318054,grad_norm: 0.960668398046931, iteration: 134053
loss: 0.9691937565803528,grad_norm: 0.987519593820422, iteration: 134054
loss: 1.021188497543335,grad_norm: 0.9999989405576564, iteration: 134055
loss: 0.9768921732902527,grad_norm: 0.9423898683773686, iteration: 134056
loss: 0.9824545979499817,grad_norm: 0.999999157867463, iteration: 134057
loss: 0.9906185269355774,grad_norm: 0.9158886946562258, iteration: 134058
loss: 0.9958482384681702,grad_norm: 0.9410108392434244, iteration: 134059
loss: 0.9855555891990662,grad_norm: 0.9999992833832191, iteration: 134060
loss: 1.0038235187530518,grad_norm: 0.9999990876768441, iteration: 134061
loss: 1.0057414770126343,grad_norm: 0.9999992416829868, iteration: 134062
loss: 1.0011824369430542,grad_norm: 0.8927918307258728, iteration: 134063
loss: 0.9434760808944702,grad_norm: 0.9999991403176072, iteration: 134064
loss: 1.0007243156433105,grad_norm: 0.9999991429438764, iteration: 134065
loss: 1.0122817754745483,grad_norm: 0.9999992700493442, iteration: 134066
loss: 1.0586237907409668,grad_norm: 0.9999991990864666, iteration: 134067
loss: 1.091454267501831,grad_norm: 0.9999992015064094, iteration: 134068
loss: 1.0160688161849976,grad_norm: 0.9999992744211763, iteration: 134069
loss: 1.0085961818695068,grad_norm: 0.9999991286549348, iteration: 134070
loss: 1.0185829401016235,grad_norm: 0.9999991489433432, iteration: 134071
loss: 1.0342230796813965,grad_norm: 0.9999990615418268, iteration: 134072
loss: 0.9873238801956177,grad_norm: 0.8903668442265844, iteration: 134073
loss: 1.017095685005188,grad_norm: 0.9755009517388639, iteration: 134074
loss: 1.0158973932266235,grad_norm: 0.9999991197864239, iteration: 134075
loss: 1.0074926614761353,grad_norm: 0.9010979759035797, iteration: 134076
loss: 0.9983637928962708,grad_norm: 0.9999991278377456, iteration: 134077
loss: 0.9912638664245605,grad_norm: 0.9999992620998532, iteration: 134078
loss: 1.0195692777633667,grad_norm: 0.9999991393222725, iteration: 134079
loss: 0.9776732325553894,grad_norm: 0.9499193353415204, iteration: 134080
loss: 1.0333843231201172,grad_norm: 0.9999991066588707, iteration: 134081
loss: 1.0086420774459839,grad_norm: 0.9999990642694276, iteration: 134082
loss: 0.9526107907295227,grad_norm: 0.9999992092208735, iteration: 134083
loss: 1.051421046257019,grad_norm: 0.9999990645977076, iteration: 134084
loss: 0.9716655015945435,grad_norm: 0.9999991948493571, iteration: 134085
loss: 0.9812145233154297,grad_norm: 0.9658641820802795, iteration: 134086
loss: 0.9753885865211487,grad_norm: 0.9911632181490067, iteration: 134087
loss: 0.9768930077552795,grad_norm: 0.9663094871432544, iteration: 134088
loss: 0.978067934513092,grad_norm: 0.9830499555807684, iteration: 134089
loss: 1.0063092708587646,grad_norm: 0.9999992994789957, iteration: 134090
loss: 0.966239333152771,grad_norm: 0.8505662530035205, iteration: 134091
loss: 0.9888436198234558,grad_norm: 0.9829508963463718, iteration: 134092
loss: 0.9914423823356628,grad_norm: 0.9999991181922488, iteration: 134093
loss: 0.996576189994812,grad_norm: 0.999999151573603, iteration: 134094
loss: 0.956072986125946,grad_norm: 0.9206360749399883, iteration: 134095
loss: 1.0059762001037598,grad_norm: 0.9952031892565547, iteration: 134096
loss: 1.0127979516983032,grad_norm: 0.9999992493135937, iteration: 134097
loss: 1.0513359308242798,grad_norm: 0.9999994009549088, iteration: 134098
loss: 0.9876116514205933,grad_norm: 0.9662323613685422, iteration: 134099
loss: 1.0257182121276855,grad_norm: 0.9999990951463478, iteration: 134100
loss: 0.9804661870002747,grad_norm: 0.9999993117077423, iteration: 134101
loss: 1.011391520500183,grad_norm: 0.9999991851157705, iteration: 134102
loss: 1.0098720788955688,grad_norm: 0.912231550877042, iteration: 134103
loss: 1.0812723636627197,grad_norm: 0.9999993211966234, iteration: 134104
loss: 1.0328770875930786,grad_norm: 0.9999997242782908, iteration: 134105
loss: 1.044301986694336,grad_norm: 0.9467384284026393, iteration: 134106
loss: 0.9717442393302917,grad_norm: 0.9999991101341136, iteration: 134107
loss: 1.0014935731887817,grad_norm: 0.9999993145581064, iteration: 134108
loss: 0.9982147812843323,grad_norm: 0.9999992693620485, iteration: 134109
loss: 1.0249966382980347,grad_norm: 0.9999990798558692, iteration: 134110
loss: 1.0004637241363525,grad_norm: 0.9999991703893074, iteration: 134111
loss: 0.9881719350814819,grad_norm: 0.974194664627419, iteration: 134112
loss: 0.9713887572288513,grad_norm: 0.9270397771598383, iteration: 134113
loss: 0.9912648797035217,grad_norm: 0.9000436186828427, iteration: 134114
loss: 1.0028496980667114,grad_norm: 0.9208497975039462, iteration: 134115
loss: 0.9925695657730103,grad_norm: 0.999999104983772, iteration: 134116
loss: 0.9920297265052795,grad_norm: 0.9999990497431915, iteration: 134117
loss: 1.0071035623550415,grad_norm: 0.9884011099189662, iteration: 134118
loss: 0.9419198036193848,grad_norm: 0.9977309156949882, iteration: 134119
loss: 1.036363124847412,grad_norm: 0.8710352723436362, iteration: 134120
loss: 1.0158623456954956,grad_norm: 0.9999992393391639, iteration: 134121
loss: 0.9968686699867249,grad_norm: 0.8918442247952382, iteration: 134122
loss: 1.0418857336044312,grad_norm: 0.9999990722971882, iteration: 134123
loss: 0.9742083549499512,grad_norm: 0.9999991640405537, iteration: 134124
loss: 1.0033128261566162,grad_norm: 0.9358551016655, iteration: 134125
loss: 0.9851089715957642,grad_norm: 0.9421702504661366, iteration: 134126
loss: 1.0051919221878052,grad_norm: 0.8697502844485813, iteration: 134127
loss: 0.9716198444366455,grad_norm: 0.9541503629764646, iteration: 134128
loss: 1.0072546005249023,grad_norm: 0.9999990469543001, iteration: 134129
loss: 1.04743230342865,grad_norm: 0.9999999522339313, iteration: 134130
loss: 1.000327229499817,grad_norm: 0.9999992966585932, iteration: 134131
loss: 1.0101162195205688,grad_norm: 0.9999992021123465, iteration: 134132
loss: 1.001291275024414,grad_norm: 0.9999991536985622, iteration: 134133
loss: 1.006106972694397,grad_norm: 0.9999989622881044, iteration: 134134
loss: 1.0848664045333862,grad_norm: 0.9999992380774096, iteration: 134135
loss: 1.1075878143310547,grad_norm: 0.9999994670335569, iteration: 134136
loss: 0.9992190003395081,grad_norm: 0.9999990927886917, iteration: 134137
loss: 1.0568780899047852,grad_norm: 0.9356246360181727, iteration: 134138
loss: 1.0042073726654053,grad_norm: 0.9999992713740956, iteration: 134139
loss: 0.9964058995246887,grad_norm: 0.9999992984510598, iteration: 134140
loss: 1.021523118019104,grad_norm: 0.9596025693140663, iteration: 134141
loss: 1.0007175207138062,grad_norm: 0.9999992219351657, iteration: 134142
loss: 1.0079880952835083,grad_norm: 0.9999995010712941, iteration: 134143
loss: 1.011276125907898,grad_norm: 0.9999995612176171, iteration: 134144
loss: 0.9898553490638733,grad_norm: 0.9999991804568534, iteration: 134145
loss: 1.0210813283920288,grad_norm: 0.9999992083142355, iteration: 134146
loss: 0.9857105612754822,grad_norm: 0.9929699672377128, iteration: 134147
loss: 1.023917555809021,grad_norm: 0.9999990692957473, iteration: 134148
loss: 0.9930087327957153,grad_norm: 0.9613626752933091, iteration: 134149
loss: 1.0612571239471436,grad_norm: 0.9999993368352283, iteration: 134150
loss: 1.0228242874145508,grad_norm: 0.8732551169596895, iteration: 134151
loss: 0.9677959084510803,grad_norm: 0.9999990533127756, iteration: 134152
loss: 1.043357014656067,grad_norm: 0.9999990700555905, iteration: 134153
loss: 0.956406831741333,grad_norm: 0.9999990194876596, iteration: 134154
loss: 1.0188324451446533,grad_norm: 0.999999946918061, iteration: 134155
loss: 1.0166882276535034,grad_norm: 0.9999989419517429, iteration: 134156
loss: 0.9836664795875549,grad_norm: 0.9999990825017159, iteration: 134157
loss: 1.0026122331619263,grad_norm: 0.9999989980109029, iteration: 134158
loss: 1.072495698928833,grad_norm: 0.9999992170381055, iteration: 134159
loss: 1.0494621992111206,grad_norm: 0.9999997447355388, iteration: 134160
loss: 0.9696825742721558,grad_norm: 0.9999992636220019, iteration: 134161
loss: 0.9994212985038757,grad_norm: 0.9999992721914261, iteration: 134162
loss: 0.9971073865890503,grad_norm: 0.984851391949365, iteration: 134163
loss: 1.075863003730774,grad_norm: 0.9999997532807263, iteration: 134164
loss: 0.9843557476997375,grad_norm: 0.9462171530621203, iteration: 134165
loss: 0.990025520324707,grad_norm: 0.8035274413099404, iteration: 134166
loss: 0.986981987953186,grad_norm: 0.9999990105358317, iteration: 134167
loss: 1.0173170566558838,grad_norm: 0.9999997129892805, iteration: 134168
loss: 1.0048460960388184,grad_norm: 0.9999991240323712, iteration: 134169
loss: 0.9749322533607483,grad_norm: 0.9150719161013793, iteration: 134170
loss: 0.9711385369300842,grad_norm: 0.9999992718558032, iteration: 134171
loss: 0.9951141476631165,grad_norm: 0.999998980367896, iteration: 134172
loss: 0.9909225702285767,grad_norm: 0.999999129183641, iteration: 134173
loss: 1.003822922706604,grad_norm: 0.9999990506858544, iteration: 134174
loss: 1.0800193548202515,grad_norm: 0.99999994893953, iteration: 134175
loss: 0.9912703633308411,grad_norm: 0.9999990955222088, iteration: 134176
loss: 1.0052483081817627,grad_norm: 0.99999908650999, iteration: 134177
loss: 1.0018188953399658,grad_norm: 0.8481853806290048, iteration: 134178
loss: 0.9596862196922302,grad_norm: 0.904323786483778, iteration: 134179
loss: 0.9618133306503296,grad_norm: 0.9999990194081466, iteration: 134180
loss: 0.9945517778396606,grad_norm: 0.8992477662140497, iteration: 134181
loss: 1.1375197172164917,grad_norm: 0.9999993600005891, iteration: 134182
loss: 0.9793546795845032,grad_norm: 0.9999990627475212, iteration: 134183
loss: 0.9928556680679321,grad_norm: 0.9999990947670174, iteration: 134184
loss: 1.002316951751709,grad_norm: 0.9999989403481293, iteration: 134185
loss: 0.9878523349761963,grad_norm: 0.9969285804258842, iteration: 134186
loss: 0.9487846493721008,grad_norm: 0.9999992347940956, iteration: 134187
loss: 0.9924147129058838,grad_norm: 0.9091643202288607, iteration: 134188
loss: 0.9709888100624084,grad_norm: 0.9831022391175303, iteration: 134189
loss: 0.9988624453544617,grad_norm: 0.9999990907049364, iteration: 134190
loss: 0.9453229904174805,grad_norm: 0.9999991396406771, iteration: 134191
loss: 0.9785314798355103,grad_norm: 0.9952361432294206, iteration: 134192
loss: 1.0097774267196655,grad_norm: 0.9999991724680972, iteration: 134193
loss: 1.0125819444656372,grad_norm: 0.991191247843902, iteration: 134194
loss: 1.0091631412506104,grad_norm: 0.9432409092375418, iteration: 134195
loss: 0.9436711668968201,grad_norm: 0.9999992033219952, iteration: 134196
loss: 1.0137360095977783,grad_norm: 0.9999995312921044, iteration: 134197
loss: 1.0189645290374756,grad_norm: 0.9999999629751434, iteration: 134198
loss: 1.0007166862487793,grad_norm: 0.9999990172883216, iteration: 134199
loss: 1.0168722867965698,grad_norm: 0.9999991579889258, iteration: 134200
loss: 0.9638786911964417,grad_norm: 0.9999988856992256, iteration: 134201
loss: 1.089000940322876,grad_norm: 0.9999992530632293, iteration: 134202
loss: 0.9804597496986389,grad_norm: 0.9999991389957101, iteration: 134203
loss: 0.9858485460281372,grad_norm: 0.9628028303530475, iteration: 134204
loss: 1.0001009702682495,grad_norm: 0.9999992687896446, iteration: 134205
loss: 1.0301400423049927,grad_norm: 0.9797274986793008, iteration: 134206
loss: 0.9982534646987915,grad_norm: 0.9600540389880048, iteration: 134207
loss: 0.9985250234603882,grad_norm: 0.8351535103274491, iteration: 134208
loss: 1.0271915197372437,grad_norm: 0.999999277112528, iteration: 134209
loss: 1.05820894241333,grad_norm: 0.9999990100976385, iteration: 134210
loss: 1.0512335300445557,grad_norm: 0.8853345953078701, iteration: 134211
loss: 0.9862614274024963,grad_norm: 0.9297030672755886, iteration: 134212
loss: 1.0033888816833496,grad_norm: 0.9999990562769969, iteration: 134213
loss: 1.0060304403305054,grad_norm: 0.986425385432179, iteration: 134214
loss: 0.9873420596122742,grad_norm: 0.9999991944084914, iteration: 134215
loss: 1.0158977508544922,grad_norm: 0.9999990404676671, iteration: 134216
loss: 1.0213576555252075,grad_norm: 0.8314849834728909, iteration: 134217
loss: 1.0076377391815186,grad_norm: 0.9999994382009169, iteration: 134218
loss: 1.0245059728622437,grad_norm: 0.915903943867384, iteration: 134219
loss: 0.980475902557373,grad_norm: 0.8539939035868633, iteration: 134220
loss: 1.0098909139633179,grad_norm: 0.9999991163461586, iteration: 134221
loss: 1.0393991470336914,grad_norm: 0.813119527529447, iteration: 134222
loss: 0.9769532084465027,grad_norm: 0.9999992989537947, iteration: 134223
loss: 0.9902456402778625,grad_norm: 0.8589906413605565, iteration: 134224
loss: 1.0238001346588135,grad_norm: 0.9999990625862817, iteration: 134225
loss: 0.961800217628479,grad_norm: 0.9999991198746256, iteration: 134226
loss: 0.9965102672576904,grad_norm: 0.9999992915141651, iteration: 134227
loss: 1.0877286195755005,grad_norm: 0.9999997223215621, iteration: 134228
loss: 1.036342740058899,grad_norm: 0.8882429651476189, iteration: 134229
loss: 1.0081651210784912,grad_norm: 0.8442238307905998, iteration: 134230
loss: 1.0196243524551392,grad_norm: 0.9999990788001597, iteration: 134231
loss: 0.9914684295654297,grad_norm: 0.9242015417714307, iteration: 134232
loss: 1.0104180574417114,grad_norm: 0.9999989212939996, iteration: 134233
loss: 1.0247812271118164,grad_norm: 0.9999991918125859, iteration: 134234
loss: 1.0256108045578003,grad_norm: 0.826910698008901, iteration: 134235
loss: 1.0146358013153076,grad_norm: 0.8836925552769049, iteration: 134236
loss: 0.9876527190208435,grad_norm: 0.9286502504394435, iteration: 134237
loss: 1.006911039352417,grad_norm: 0.9999992715413778, iteration: 134238
loss: 1.0214979648590088,grad_norm: 0.8896072679133125, iteration: 134239
loss: 1.0264620780944824,grad_norm: 0.9999992082744409, iteration: 134240
loss: 0.9880311489105225,grad_norm: 0.9457574388214276, iteration: 134241
loss: 0.9940462708473206,grad_norm: 0.9999991678514951, iteration: 134242
loss: 0.9967014789581299,grad_norm: 0.8258120691055073, iteration: 134243
loss: 1.02620267868042,grad_norm: 0.9999989760341426, iteration: 134244
loss: 0.9867784380912781,grad_norm: 0.8873128058875669, iteration: 134245
loss: 0.9839695692062378,grad_norm: 0.999999054574098, iteration: 134246
loss: 0.9804556965827942,grad_norm: 0.9999990850657802, iteration: 134247
loss: 0.9862359762191772,grad_norm: 0.848847171066399, iteration: 134248
loss: 1.0294822454452515,grad_norm: 0.9399589143363204, iteration: 134249
loss: 1.0141847133636475,grad_norm: 0.8797120302092194, iteration: 134250
loss: 1.0116794109344482,grad_norm: 0.9421624350277695, iteration: 134251
loss: 0.9841734766960144,grad_norm: 0.9999990587514559, iteration: 134252
loss: 0.9605389833450317,grad_norm: 0.9999991309143689, iteration: 134253
loss: 0.9803532958030701,grad_norm: 0.9999991062039396, iteration: 134254
loss: 0.9828603863716125,grad_norm: 0.8617078525266045, iteration: 134255
loss: 1.0008056163787842,grad_norm: 0.999999137627428, iteration: 134256
loss: 0.9710987210273743,grad_norm: 0.9999989731311595, iteration: 134257
loss: 0.9551848769187927,grad_norm: 0.9999991836448054, iteration: 134258
loss: 1.0167316198349,grad_norm: 0.9999992846649244, iteration: 134259
loss: 0.9875867366790771,grad_norm: 0.9999990416146697, iteration: 134260
loss: 1.0016725063323975,grad_norm: 0.9967701375988864, iteration: 134261
loss: 1.0177408456802368,grad_norm: 0.9901814794704399, iteration: 134262
loss: 1.0090898275375366,grad_norm: 0.9565232421624997, iteration: 134263
loss: 0.9841527938842773,grad_norm: 0.9030356827171514, iteration: 134264
loss: 1.0216386318206787,grad_norm: 0.9951266096843217, iteration: 134265
loss: 1.0327121019363403,grad_norm: 0.9999990942145308, iteration: 134266
loss: 1.003307580947876,grad_norm: 0.999999158881824, iteration: 134267
loss: 1.0180606842041016,grad_norm: 0.9999990964170778, iteration: 134268
loss: 0.9957582354545593,grad_norm: 0.9999990473566208, iteration: 134269
loss: 1.015760898590088,grad_norm: 0.9999993679027127, iteration: 134270
loss: 1.0008430480957031,grad_norm: 0.9277705271861283, iteration: 134271
loss: 0.9863559007644653,grad_norm: 0.9625606130139114, iteration: 134272
loss: 1.002194881439209,grad_norm: 0.9111104282244447, iteration: 134273
loss: 1.0066779851913452,grad_norm: 0.9571051659978703, iteration: 134274
loss: 1.000871181488037,grad_norm: 0.9731183683538164, iteration: 134275
loss: 0.9857661724090576,grad_norm: 0.9999989860613342, iteration: 134276
loss: 1.0174179077148438,grad_norm: 0.9999993021824046, iteration: 134277
loss: 1.0142180919647217,grad_norm: 0.9999992319226287, iteration: 134278
loss: 1.0498685836791992,grad_norm: 0.999999500770433, iteration: 134279
loss: 0.9988206624984741,grad_norm: 0.9999991438660398, iteration: 134280
loss: 1.0344127416610718,grad_norm: 0.9999990969530285, iteration: 134281
loss: 0.9749855399131775,grad_norm: 0.8741142358379898, iteration: 134282
loss: 0.9884010553359985,grad_norm: 0.9999991215812187, iteration: 134283
loss: 0.9600032567977905,grad_norm: 0.9999996658010708, iteration: 134284
loss: 1.0091183185577393,grad_norm: 0.9618919494152667, iteration: 134285
loss: 1.0048850774765015,grad_norm: 0.8221568278568627, iteration: 134286
loss: 1.0180753469467163,grad_norm: 0.9999990425067076, iteration: 134287
loss: 1.0251400470733643,grad_norm: 0.9999990163065958, iteration: 134288
loss: 0.9802823066711426,grad_norm: 0.95043929937785, iteration: 134289
loss: 1.0242223739624023,grad_norm: 0.9999990700782407, iteration: 134290
loss: 0.9833335280418396,grad_norm: 0.999999301574686, iteration: 134291
loss: 1.145572543144226,grad_norm: 0.9999998071535279, iteration: 134292
loss: 1.0046643018722534,grad_norm: 0.9999989579783292, iteration: 134293
loss: 0.9988411664962769,grad_norm: 0.9028508447897718, iteration: 134294
loss: 1.0250319242477417,grad_norm: 0.9905334214360015, iteration: 134295
loss: 0.9778202176094055,grad_norm: 0.9999992174981909, iteration: 134296
loss: 0.978362500667572,grad_norm: 0.9999990976974683, iteration: 134297
loss: 0.9947720766067505,grad_norm: 0.9999992069564575, iteration: 134298
loss: 0.961592435836792,grad_norm: 0.9999990979801311, iteration: 134299
loss: 0.9893636107444763,grad_norm: 0.8893606355992563, iteration: 134300
loss: 1.0141749382019043,grad_norm: 0.9999990368955218, iteration: 134301
loss: 0.9901220798492432,grad_norm: 0.9999991114051775, iteration: 134302
loss: 0.9924072623252869,grad_norm: 0.9999989522759088, iteration: 134303
loss: 0.9756864905357361,grad_norm: 0.9999990175342683, iteration: 134304
loss: 0.9867185354232788,grad_norm: 0.9999990950076487, iteration: 134305
loss: 0.9670870304107666,grad_norm: 0.9377711828708075, iteration: 134306
loss: 1.0209707021713257,grad_norm: 0.9999991450728207, iteration: 134307
loss: 0.9897089004516602,grad_norm: 0.9999991181546968, iteration: 134308
loss: 0.9919769167900085,grad_norm: 0.9809230813178905, iteration: 134309
loss: 0.9935879707336426,grad_norm: 0.9999992410139621, iteration: 134310
loss: 0.9879770874977112,grad_norm: 0.9834388390556716, iteration: 134311
loss: 1.028529405593872,grad_norm: 0.9176160323633222, iteration: 134312
loss: 0.9847537279129028,grad_norm: 0.9999992224578135, iteration: 134313
loss: 1.0254368782043457,grad_norm: 0.999999000260234, iteration: 134314
loss: 0.9897297620773315,grad_norm: 0.9999991026085839, iteration: 134315
loss: 1.0012822151184082,grad_norm: 0.999999069309226, iteration: 134316
loss: 0.9724819660186768,grad_norm: 0.9999991931438403, iteration: 134317
loss: 0.9701561331748962,grad_norm: 0.999999870698196, iteration: 134318
loss: 0.9853779673576355,grad_norm: 0.99999909732743, iteration: 134319
loss: 0.9700316190719604,grad_norm: 0.9999991528981509, iteration: 134320
loss: 0.9568223357200623,grad_norm: 0.999999027288349, iteration: 134321
loss: 0.9589586853981018,grad_norm: 0.9689473724669411, iteration: 134322
loss: 1.0037851333618164,grad_norm: 0.9095714415642021, iteration: 134323
loss: 1.006476640701294,grad_norm: 0.9549298449370388, iteration: 134324
loss: 0.9938963055610657,grad_norm: 0.9999991724657503, iteration: 134325
loss: 1.0184926986694336,grad_norm: 0.9999993363880506, iteration: 134326
loss: 0.9648650288581848,grad_norm: 0.9999991891594789, iteration: 134327
loss: 0.9844068884849548,grad_norm: 0.7800316162315973, iteration: 134328
loss: 1.0222351551055908,grad_norm: 0.9999993447836233, iteration: 134329
loss: 0.9880489706993103,grad_norm: 0.9999989579458965, iteration: 134330
loss: 1.0445541143417358,grad_norm: 0.9999995336507914, iteration: 134331
loss: 1.0220658779144287,grad_norm: 0.9999990908538469, iteration: 134332
loss: 0.9927535057067871,grad_norm: 0.9999990059072383, iteration: 134333
loss: 0.9891535043716431,grad_norm: 0.9197710561279694, iteration: 134334
loss: 0.9882552027702332,grad_norm: 0.8974272265427305, iteration: 134335
loss: 0.9745640158653259,grad_norm: 0.9999992547987647, iteration: 134336
loss: 1.0101470947265625,grad_norm: 0.9999542042446364, iteration: 134337
loss: 1.015462040901184,grad_norm: 0.9907245428466795, iteration: 134338
loss: 1.014370083808899,grad_norm: 0.9999989586166486, iteration: 134339
loss: 0.9946489930152893,grad_norm: 0.9999990093028959, iteration: 134340
loss: 1.006464958190918,grad_norm: 0.9999997002733555, iteration: 134341
loss: 0.9943581819534302,grad_norm: 0.9032174757714345, iteration: 134342
loss: 1.0401411056518555,grad_norm: 0.9494465297865458, iteration: 134343
loss: 1.0107046365737915,grad_norm: 0.9999992445046003, iteration: 134344
loss: 0.9969887733459473,grad_norm: 0.9967658211469406, iteration: 134345
loss: 1.0061577558517456,grad_norm: 0.9999989983741581, iteration: 134346
loss: 1.0158991813659668,grad_norm: 0.917616650828594, iteration: 134347
loss: 1.0101091861724854,grad_norm: 0.9379854142431959, iteration: 134348
loss: 0.9878360629081726,grad_norm: 0.9999991518539939, iteration: 134349
loss: 0.9715147018432617,grad_norm: 0.8181273713105094, iteration: 134350
loss: 1.0375218391418457,grad_norm: 0.9999991445237629, iteration: 134351
loss: 0.9960129857063293,grad_norm: 0.999999267122134, iteration: 134352
loss: 0.9709783792495728,grad_norm: 0.9704531679634879, iteration: 134353
loss: 0.9746887683868408,grad_norm: 0.8905731312798036, iteration: 134354
loss: 1.0024234056472778,grad_norm: 0.9999992383954692, iteration: 134355
loss: 1.0317901372909546,grad_norm: 0.9999991998725835, iteration: 134356
loss: 1.0205308198928833,grad_norm: 0.9999991988326132, iteration: 134357
loss: 1.0161362886428833,grad_norm: 0.9999989903521911, iteration: 134358
loss: 1.008636236190796,grad_norm: 0.86443515970056, iteration: 134359
loss: 0.9803502559661865,grad_norm: 0.9340073604812007, iteration: 134360
loss: 1.018336296081543,grad_norm: 0.9999991451218183, iteration: 134361
loss: 1.029304027557373,grad_norm: 0.9903151557440032, iteration: 134362
loss: 0.9945063591003418,grad_norm: 0.9999991287925851, iteration: 134363
loss: 0.97234046459198,grad_norm: 0.9820108029564345, iteration: 134364
loss: 1.0129358768463135,grad_norm: 0.9973729258887649, iteration: 134365
loss: 0.9898999333381653,grad_norm: 0.9999990264037742, iteration: 134366
loss: 1.0590981245040894,grad_norm: 0.9999991978715098, iteration: 134367
loss: 0.9962858557701111,grad_norm: 0.9782322345082325, iteration: 134368
loss: 1.034023642539978,grad_norm: 0.9999993680936604, iteration: 134369
loss: 1.0225434303283691,grad_norm: 0.8953999739115976, iteration: 134370
loss: 0.9733021855354309,grad_norm: 0.8903107030077356, iteration: 134371
loss: 1.0199627876281738,grad_norm: 0.9470730848450857, iteration: 134372
loss: 1.015073299407959,grad_norm: 0.999999047058155, iteration: 134373
loss: 1.0316967964172363,grad_norm: 0.9319306551157605, iteration: 134374
loss: 1.037652611732483,grad_norm: 0.9691595917550717, iteration: 134375
loss: 1.019843339920044,grad_norm: 0.9999992888486898, iteration: 134376
loss: 0.9954333901405334,grad_norm: 0.8977719436318546, iteration: 134377
loss: 1.0171501636505127,grad_norm: 0.9999991687917598, iteration: 134378
loss: 1.0055322647094727,grad_norm: 0.9999991628672008, iteration: 134379
loss: 0.9965919852256775,grad_norm: 0.8491706031836961, iteration: 134380
loss: 1.002636432647705,grad_norm: 0.9999990741148015, iteration: 134381
loss: 1.0013402700424194,grad_norm: 0.9999990950464257, iteration: 134382
loss: 0.9989568591117859,grad_norm: 0.9723435725851044, iteration: 134383
loss: 1.0024818181991577,grad_norm: 0.9401692514083906, iteration: 134384
loss: 0.9882506728172302,grad_norm: 0.9999990387332407, iteration: 134385
loss: 1.0059921741485596,grad_norm: 0.9999989577054719, iteration: 134386
loss: 1.0192272663116455,grad_norm: 0.999999752645654, iteration: 134387
loss: 1.0569028854370117,grad_norm: 0.9999992700042943, iteration: 134388
loss: 0.9921972155570984,grad_norm: 0.999998993563988, iteration: 134389
loss: 0.9884849190711975,grad_norm: 0.9999990455832213, iteration: 134390
loss: 0.9964156746864319,grad_norm: 0.9705667427142615, iteration: 134391
loss: 1.0856647491455078,grad_norm: 0.9177603547300499, iteration: 134392
loss: 0.9990392923355103,grad_norm: 0.9880735386164022, iteration: 134393
loss: 1.0214139223098755,grad_norm: 0.865512454440518, iteration: 134394
loss: 1.0158590078353882,grad_norm: 0.9999988697363187, iteration: 134395
loss: 0.9667896032333374,grad_norm: 0.9999991958633749, iteration: 134396
loss: 0.9987016320228577,grad_norm: 0.9071710517295564, iteration: 134397
loss: 1.0389028787612915,grad_norm: 0.9917362329149669, iteration: 134398
loss: 1.0134984254837036,grad_norm: 0.999999198369532, iteration: 134399
loss: 0.9906786680221558,grad_norm: 0.9587220063738691, iteration: 134400
loss: 0.9986084699630737,grad_norm: 0.9999991444713097, iteration: 134401
loss: 0.9997538328170776,grad_norm: 0.9924150612123691, iteration: 134402
loss: 1.0270954370498657,grad_norm: 0.9999989890935786, iteration: 134403
loss: 1.0008710622787476,grad_norm: 0.8665670122629276, iteration: 134404
loss: 0.9974567294120789,grad_norm: 0.9999992232519389, iteration: 134405
loss: 0.9559755325317383,grad_norm: 0.9999991131557078, iteration: 134406
loss: 1.0352075099945068,grad_norm: 0.9999991479189531, iteration: 134407
loss: 0.9755123257637024,grad_norm: 0.8998921191168119, iteration: 134408
loss: 1.002220630645752,grad_norm: 0.9999994139248041, iteration: 134409
loss: 0.9507835507392883,grad_norm: 0.9038018106588812, iteration: 134410
loss: 0.9910595417022705,grad_norm: 0.9081501367449519, iteration: 134411
loss: 1.0027272701263428,grad_norm: 0.9999990811909009, iteration: 134412
loss: 0.9998462796211243,grad_norm: 0.9999993372491135, iteration: 134413
loss: 1.0051196813583374,grad_norm: 0.9999995297828941, iteration: 134414
loss: 0.9986774921417236,grad_norm: 0.8811027146774767, iteration: 134415
loss: 1.0198161602020264,grad_norm: 0.9999990881644025, iteration: 134416
loss: 0.9657196998596191,grad_norm: 0.970431535190137, iteration: 134417
loss: 0.9995366334915161,grad_norm: 0.9865671016841621, iteration: 134418
loss: 1.0136281251907349,grad_norm: 0.952228024750186, iteration: 134419
loss: 1.002292275428772,grad_norm: 0.9999993986995203, iteration: 134420
loss: 1.0409995317459106,grad_norm: 0.9999991221186979, iteration: 134421
loss: 0.9626317620277405,grad_norm: 0.9799855588747988, iteration: 134422
loss: 0.9763357043266296,grad_norm: 0.9999990787921078, iteration: 134423
loss: 0.9855460524559021,grad_norm: 0.9999992536201303, iteration: 134424
loss: 0.9960174560546875,grad_norm: 0.880631994378359, iteration: 134425
loss: 0.9804071187973022,grad_norm: 0.999999097370534, iteration: 134426
loss: 0.991469144821167,grad_norm: 0.9999991660604617, iteration: 134427
loss: 1.010545253753662,grad_norm: 0.9698733557837054, iteration: 134428
loss: 0.9903056025505066,grad_norm: 0.927758544218205, iteration: 134429
loss: 1.0213942527770996,grad_norm: 0.8551050340673312, iteration: 134430
loss: 1.0296292304992676,grad_norm: 0.9999990852508642, iteration: 134431
loss: 0.9973745346069336,grad_norm: 0.9999990742180903, iteration: 134432
loss: 1.022518277168274,grad_norm: 0.9999991800027714, iteration: 134433
loss: 0.9921919107437134,grad_norm: 0.9999991042068714, iteration: 134434
loss: 0.9947693347930908,grad_norm: 0.9999992469254629, iteration: 134435
loss: 1.0368123054504395,grad_norm: 0.8682322128863122, iteration: 134436
loss: 0.9969989657402039,grad_norm: 0.9999989124936003, iteration: 134437
loss: 1.0521775484085083,grad_norm: 0.999999867878261, iteration: 134438
loss: 1.0360863208770752,grad_norm: 0.9999990524830231, iteration: 134439
loss: 0.955998957157135,grad_norm: 0.9999997821989135, iteration: 134440
loss: 1.0044689178466797,grad_norm: 0.8179736596564448, iteration: 134441
loss: 0.9909581542015076,grad_norm: 0.9190984434205713, iteration: 134442
loss: 1.0154147148132324,grad_norm: 0.9999991573350689, iteration: 134443
loss: 0.9892442226409912,grad_norm: 0.9916298195645239, iteration: 134444
loss: 0.9670654535293579,grad_norm: 0.9999992019715398, iteration: 134445
loss: 0.9856498837471008,grad_norm: 0.9999989298534082, iteration: 134446
loss: 1.0596566200256348,grad_norm: 0.9999992745714981, iteration: 134447
loss: 0.9989840388298035,grad_norm: 0.8865740657818904, iteration: 134448
loss: 1.0284258127212524,grad_norm: 0.9999992082212105, iteration: 134449
loss: 0.9998127818107605,grad_norm: 0.9953287503890192, iteration: 134450
loss: 1.0188089609146118,grad_norm: 0.9999991777426397, iteration: 134451
loss: 1.13154935836792,grad_norm: 0.9999995790765671, iteration: 134452
loss: 0.9544748067855835,grad_norm: 0.9999992472765328, iteration: 134453
loss: 1.0250047445297241,grad_norm: 0.9999991658733774, iteration: 134454
loss: 0.9986842274665833,grad_norm: 0.9999990311846076, iteration: 134455
loss: 0.9975488781929016,grad_norm: 0.9999991757205705, iteration: 134456
loss: 0.9584099054336548,grad_norm: 0.9999991896584199, iteration: 134457
loss: 1.0250391960144043,grad_norm: 0.956987279200923, iteration: 134458
loss: 0.9869036674499512,grad_norm: 0.9999990093954165, iteration: 134459
loss: 0.9655248522758484,grad_norm: 0.9999992420478517, iteration: 134460
loss: 0.9901071786880493,grad_norm: 0.9999990677248118, iteration: 134461
loss: 0.9912291169166565,grad_norm: 0.9999992927277436, iteration: 134462
loss: 1.0126267671585083,grad_norm: 0.9900668424356645, iteration: 134463
loss: 0.9943408966064453,grad_norm: 0.9170279054031155, iteration: 134464
loss: 0.9633508324623108,grad_norm: 0.9999990043249032, iteration: 134465
loss: 1.0003972053527832,grad_norm: 0.8719567953224618, iteration: 134466
loss: 0.9978931546211243,grad_norm: 0.9999992607361903, iteration: 134467
loss: 0.9958019852638245,grad_norm: 0.9999991213660656, iteration: 134468
loss: 1.0359572172164917,grad_norm: 0.9756387407148154, iteration: 134469
loss: 1.0404918193817139,grad_norm: 0.9999991059773813, iteration: 134470
loss: 0.9993534088134766,grad_norm: 0.9998130596240079, iteration: 134471
loss: 1.0235854387283325,grad_norm: 0.999998991094307, iteration: 134472
loss: 1.0145307779312134,grad_norm: 0.9058121764835154, iteration: 134473
loss: 1.0026179552078247,grad_norm: 0.999999874150978, iteration: 134474
loss: 0.9988496899604797,grad_norm: 0.8676552835102735, iteration: 134475
loss: 0.9734193682670593,grad_norm: 0.999999013767456, iteration: 134476
loss: 1.020772933959961,grad_norm: 0.9621605433619155, iteration: 134477
loss: 0.9951608180999756,grad_norm: 0.921356422630807, iteration: 134478
loss: 1.0106827020645142,grad_norm: 0.9999992533650417, iteration: 134479
loss: 1.0100111961364746,grad_norm: 0.8817302317533069, iteration: 134480
loss: 1.0376259088516235,grad_norm: 0.9999990451617158, iteration: 134481
loss: 0.9444501996040344,grad_norm: 0.9999993188137923, iteration: 134482
loss: 0.9933078289031982,grad_norm: 0.9999991672169919, iteration: 134483
loss: 0.9852258563041687,grad_norm: 0.9999992273677794, iteration: 134484
loss: 1.0132510662078857,grad_norm: 0.999999157629489, iteration: 134485
loss: 0.9897922277450562,grad_norm: 0.9999991805394328, iteration: 134486
loss: 1.0094621181488037,grad_norm: 0.9999991351250295, iteration: 134487
loss: 1.1554521322250366,grad_norm: 0.9999992743002964, iteration: 134488
loss: 1.0148040056228638,grad_norm: 0.999999047341315, iteration: 134489
loss: 0.9901402592658997,grad_norm: 0.9166076770465337, iteration: 134490
loss: 0.9895031452178955,grad_norm: 0.9999993110351553, iteration: 134491
loss: 1.0711698532104492,grad_norm: 0.9999992184674379, iteration: 134492
loss: 1.0509346723556519,grad_norm: 0.9483744322449987, iteration: 134493
loss: 0.9832828044891357,grad_norm: 0.9999992332466143, iteration: 134494
loss: 0.9951362013816833,grad_norm: 0.999999253859601, iteration: 134495
loss: 0.9781971573829651,grad_norm: 0.8784692190466198, iteration: 134496
loss: 0.9731820225715637,grad_norm: 0.8534013439286032, iteration: 134497
loss: 1.018676519393921,grad_norm: 0.9999990231769676, iteration: 134498
loss: 1.0413727760314941,grad_norm: 0.9779149207626049, iteration: 134499
loss: 0.9772413969039917,grad_norm: 0.9988505423529489, iteration: 134500
loss: 1.0146613121032715,grad_norm: 0.9999989792510129, iteration: 134501
loss: 1.013658046722412,grad_norm: 0.9999990542902835, iteration: 134502
loss: 1.0166006088256836,grad_norm: 0.9999990195192711, iteration: 134503
loss: 0.9756046533584595,grad_norm: 0.9999989448310753, iteration: 134504
loss: 1.0447393655776978,grad_norm: 0.9999994002470864, iteration: 134505
loss: 1.0079073905944824,grad_norm: 0.8537001256830273, iteration: 134506
loss: 1.0279150009155273,grad_norm: 0.9999991945394495, iteration: 134507
loss: 1.002120852470398,grad_norm: 0.9551562584560545, iteration: 134508
loss: 1.0002034902572632,grad_norm: 0.9999991473355292, iteration: 134509
loss: 0.9607797861099243,grad_norm: 0.9999992184674907, iteration: 134510
loss: 0.9779800772666931,grad_norm: 0.8984928917932222, iteration: 134511
loss: 1.0130141973495483,grad_norm: 0.9999991596599254, iteration: 134512
loss: 1.0147908926010132,grad_norm: 0.9999991508109717, iteration: 134513
loss: 0.9711222052574158,grad_norm: 0.8584204829910084, iteration: 134514
loss: 1.0525987148284912,grad_norm: 0.99999917115152, iteration: 134515
loss: 1.0171935558319092,grad_norm: 0.999999894873065, iteration: 134516
loss: 0.9962365031242371,grad_norm: 0.936218104338865, iteration: 134517
loss: 0.9576866030693054,grad_norm: 0.9999990441645958, iteration: 134518
loss: 0.990610659122467,grad_norm: 0.9999991792899489, iteration: 134519
loss: 0.9971336722373962,grad_norm: 0.9999990696281088, iteration: 134520
loss: 1.0069411993026733,grad_norm: 0.999999024268796, iteration: 134521
loss: 0.9921151995658875,grad_norm: 0.8480926571207433, iteration: 134522
loss: 0.9695037007331848,grad_norm: 0.9999999149294453, iteration: 134523
loss: 1.0438464879989624,grad_norm: 0.9999992397722318, iteration: 134524
loss: 0.9856581091880798,grad_norm: 0.9866261810050746, iteration: 134525
loss: 0.988491415977478,grad_norm: 0.9999991776516582, iteration: 134526
loss: 1.0411397218704224,grad_norm: 0.9999991173956753, iteration: 134527
loss: 0.9988542199134827,grad_norm: 0.9999990979287616, iteration: 134528
loss: 0.9934919476509094,grad_norm: 0.9999991528641403, iteration: 134529
loss: 1.0181653499603271,grad_norm: 0.9299071895389042, iteration: 134530
loss: 0.9936062097549438,grad_norm: 0.9999990375138913, iteration: 134531
loss: 1.0004469156265259,grad_norm: 0.9287559832998173, iteration: 134532
loss: 1.0059778690338135,grad_norm: 0.9675896144242806, iteration: 134533
loss: 0.9956673979759216,grad_norm: 0.999999610665841, iteration: 134534
loss: 1.0200934410095215,grad_norm: 0.9999991025112557, iteration: 134535
loss: 0.9918889403343201,grad_norm: 0.9999990989558163, iteration: 134536
loss: 0.997793972492218,grad_norm: 0.9999991873002815, iteration: 134537
loss: 0.9791987538337708,grad_norm: 0.9367290371860979, iteration: 134538
loss: 1.0092813968658447,grad_norm: 0.8087785039207591, iteration: 134539
loss: 0.9884721040725708,grad_norm: 0.9999991013567289, iteration: 134540
loss: 1.0239368677139282,grad_norm: 0.9999992517688182, iteration: 134541
loss: 0.9971266984939575,grad_norm: 0.9999991462787738, iteration: 134542
loss: 0.974005401134491,grad_norm: 0.8377742030155109, iteration: 134543
loss: 1.0141726732254028,grad_norm: 0.9442741064394466, iteration: 134544
loss: 0.9623056650161743,grad_norm: 0.9999991655222897, iteration: 134545
loss: 1.0208892822265625,grad_norm: 0.8791358711401852, iteration: 134546
loss: 0.9976817965507507,grad_norm: 0.9999991393975856, iteration: 134547
loss: 0.9846681952476501,grad_norm: 0.9721332999129392, iteration: 134548
loss: 1.0132097005844116,grad_norm: 0.8423274426065888, iteration: 134549
loss: 0.9814234972000122,grad_norm: 0.9999991275830928, iteration: 134550
loss: 0.9844716191291809,grad_norm: 0.8532569668017214, iteration: 134551
loss: 1.0161900520324707,grad_norm: 0.99999909747335, iteration: 134552
loss: 1.0047187805175781,grad_norm: 0.9999990332736638, iteration: 134553
loss: 1.0323115587234497,grad_norm: 0.999998936293018, iteration: 134554
loss: 0.9923499226570129,grad_norm: 0.9839503231998595, iteration: 134555
loss: 0.9951835870742798,grad_norm: 0.9278626581076959, iteration: 134556
loss: 1.0126183032989502,grad_norm: 0.9512883764868844, iteration: 134557
loss: 0.9589757323265076,grad_norm: 0.9999990965306927, iteration: 134558
loss: 0.9943860769271851,grad_norm: 0.9216170800634264, iteration: 134559
loss: 1.0350040197372437,grad_norm: 0.9999992168801985, iteration: 134560
loss: 0.9868174195289612,grad_norm: 0.8612989960370161, iteration: 134561
loss: 1.0123567581176758,grad_norm: 0.9232756175013879, iteration: 134562
loss: 1.0323246717453003,grad_norm: 0.9999992123293036, iteration: 134563
loss: 0.9886523485183716,grad_norm: 0.9999989964888761, iteration: 134564
loss: 1.0001943111419678,grad_norm: 0.9999990691443825, iteration: 134565
loss: 0.9940432906150818,grad_norm: 0.8704467556792422, iteration: 134566
loss: 0.973544716835022,grad_norm: 0.917153730605308, iteration: 134567
loss: 0.9539157152175903,grad_norm: 0.9999991098566179, iteration: 134568
loss: 1.0285701751708984,grad_norm: 0.9999990591069764, iteration: 134569
loss: 1.0035321712493896,grad_norm: 0.9999992158112967, iteration: 134570
loss: 1.0281131267547607,grad_norm: 0.9999990461511366, iteration: 134571
loss: 0.9712557792663574,grad_norm: 0.9999988977142578, iteration: 134572
loss: 1.004364013671875,grad_norm: 0.9999992847949893, iteration: 134573
loss: 0.9522117376327515,grad_norm: 0.9999992349840227, iteration: 134574
loss: 1.0093647241592407,grad_norm: 0.9999989761827548, iteration: 134575
loss: 0.9848575592041016,grad_norm: 0.9999989765850724, iteration: 134576
loss: 1.0080140829086304,grad_norm: 0.9999992628324192, iteration: 134577
loss: 1.0053702592849731,grad_norm: 0.9999992443422537, iteration: 134578
loss: 0.9945350885391235,grad_norm: 0.8775349567366028, iteration: 134579
loss: 0.9708446264266968,grad_norm: 0.796434711499762, iteration: 134580
loss: 0.9493970274925232,grad_norm: 0.964420302585291, iteration: 134581
loss: 0.9864934682846069,grad_norm: 0.964026778871961, iteration: 134582
loss: 0.9775992035865784,grad_norm: 0.9999991513025828, iteration: 134583
loss: 0.966438889503479,grad_norm: 0.9191473455945828, iteration: 134584
loss: 0.9945003390312195,grad_norm: 0.8260045045510881, iteration: 134585
loss: 1.0104557275772095,grad_norm: 0.9999990343062236, iteration: 134586
loss: 0.99183189868927,grad_norm: 0.9999991438851942, iteration: 134587
loss: 1.0004135370254517,grad_norm: 0.9660837938170149, iteration: 134588
loss: 0.9544819593429565,grad_norm: 0.9600093351114414, iteration: 134589
loss: 0.9955466389656067,grad_norm: 0.9999992241010357, iteration: 134590
loss: 0.9855806827545166,grad_norm: 0.9999991172901656, iteration: 134591
loss: 1.0226823091506958,grad_norm: 0.9377252747112187, iteration: 134592
loss: 1.0068241357803345,grad_norm: 0.807507214822037, iteration: 134593
loss: 0.9841766357421875,grad_norm: 0.9170388173282192, iteration: 134594
loss: 0.9886474609375,grad_norm: 0.8924790027019407, iteration: 134595
loss: 0.9732043743133545,grad_norm: 0.9016959485635779, iteration: 134596
loss: 0.9964892268180847,grad_norm: 0.999999162758359, iteration: 134597
loss: 0.9913221001625061,grad_norm: 0.9999992886583037, iteration: 134598
loss: 0.9828174114227295,grad_norm: 0.999999274247573, iteration: 134599
loss: 1.000177264213562,grad_norm: 0.8472884011124798, iteration: 134600
loss: 1.023477554321289,grad_norm: 0.9999989979604408, iteration: 134601
loss: 1.0601638555526733,grad_norm: 0.9999999431636827, iteration: 134602
loss: 1.0085668563842773,grad_norm: 0.9999991509182359, iteration: 134603
loss: 1.024280071258545,grad_norm: 0.9999991544906366, iteration: 134604
loss: 1.0355581045150757,grad_norm: 0.9999991375296111, iteration: 134605
loss: 1.0005711317062378,grad_norm: 0.9000249771206479, iteration: 134606
loss: 0.9891939759254456,grad_norm: 0.9999992077037784, iteration: 134607
loss: 1.0041030645370483,grad_norm: 0.8822862239108182, iteration: 134608
loss: 1.0172253847122192,grad_norm: 0.8935934060996082, iteration: 134609
loss: 0.9892690181732178,grad_norm: 0.9387680510095601, iteration: 134610
loss: 0.9804763197898865,grad_norm: 0.9999990428755917, iteration: 134611
loss: 1.0033953189849854,grad_norm: 0.9999992677033336, iteration: 134612
loss: 0.9661344289779663,grad_norm: 0.9999990361986899, iteration: 134613
loss: 1.0116987228393555,grad_norm: 0.9999991261342556, iteration: 134614
loss: 0.9988462328910828,grad_norm: 0.9999990444877961, iteration: 134615
loss: 0.96006178855896,grad_norm: 0.9999989951522631, iteration: 134616
loss: 0.9929827451705933,grad_norm: 0.9999991673349503, iteration: 134617
loss: 0.9952386617660522,grad_norm: 0.99999924867283, iteration: 134618
loss: 1.0010871887207031,grad_norm: 0.9999990559412023, iteration: 134619
loss: 1.0314762592315674,grad_norm: 0.999999074477118, iteration: 134620
loss: 1.0012743473052979,grad_norm: 0.9813430196012866, iteration: 134621
loss: 1.0284839868545532,grad_norm: 0.9999998462062016, iteration: 134622
loss: 1.0006681680679321,grad_norm: 0.9999990336803186, iteration: 134623
loss: 1.000163197517395,grad_norm: 0.9230852870416768, iteration: 134624
loss: 0.9610334634780884,grad_norm: 0.8499448198661508, iteration: 134625
loss: 0.9837556481361389,grad_norm: 0.9785056202612092, iteration: 134626
loss: 1.020795464515686,grad_norm: 0.9529711195414406, iteration: 134627
loss: 0.9984327554702759,grad_norm: 0.9999990183453794, iteration: 134628
loss: 0.9687346816062927,grad_norm: 0.9999992713220837, iteration: 134629
loss: 0.9733762145042419,grad_norm: 0.8288783387278428, iteration: 134630
loss: 0.9927275776863098,grad_norm: 0.9999991626029937, iteration: 134631
loss: 0.9988662600517273,grad_norm: 0.9999990815598774, iteration: 134632
loss: 0.986365795135498,grad_norm: 0.9999990570001521, iteration: 134633
loss: 0.9913915395736694,grad_norm: 0.9999994743671645, iteration: 134634
loss: 1.002312421798706,grad_norm: 0.9946742793492129, iteration: 134635
loss: 1.046099305152893,grad_norm: 0.9351434437983408, iteration: 134636
loss: 0.9992789626121521,grad_norm: 0.885232469612372, iteration: 134637
loss: 0.9834111332893372,grad_norm: 0.9134622270533286, iteration: 134638
loss: 0.9778655171394348,grad_norm: 0.9999991455238258, iteration: 134639
loss: 1.007137656211853,grad_norm: 0.9999990121182251, iteration: 134640
loss: 0.9674512147903442,grad_norm: 0.9878552312791147, iteration: 134641
loss: 1.04239022731781,grad_norm: 0.8416452254863791, iteration: 134642
loss: 0.9800635576248169,grad_norm: 0.9908497518217408, iteration: 134643
loss: 1.0152535438537598,grad_norm: 0.9999990789067573, iteration: 134644
loss: 1.0093104839324951,grad_norm: 0.9742480382911497, iteration: 134645
loss: 0.9922114610671997,grad_norm: 0.9639583145421937, iteration: 134646
loss: 0.9729740023612976,grad_norm: 0.9999991311323856, iteration: 134647
loss: 0.9974673986434937,grad_norm: 0.9999990840749449, iteration: 134648
loss: 0.9961907267570496,grad_norm: 0.965752295296848, iteration: 134649
loss: 1.0078608989715576,grad_norm: 0.9999998833632314, iteration: 134650
loss: 1.0151108503341675,grad_norm: 0.8584133676595768, iteration: 134651
loss: 1.0354056358337402,grad_norm: 0.9999992490028422, iteration: 134652
loss: 1.0059497356414795,grad_norm: 0.9999991802548541, iteration: 134653
loss: 0.9720559120178223,grad_norm: 0.9911820475845784, iteration: 134654
loss: 0.98653644323349,grad_norm: 0.9999991783325326, iteration: 134655
loss: 1.0032200813293457,grad_norm: 0.9999990858621937, iteration: 134656
loss: 1.013492226600647,grad_norm: 0.9999991359675247, iteration: 134657
loss: 0.9793094396591187,grad_norm: 0.9999993652403367, iteration: 134658
loss: 0.994854211807251,grad_norm: 0.9999991803050327, iteration: 134659
loss: 1.0032581090927124,grad_norm: 0.9999992970625592, iteration: 134660
loss: 1.0254130363464355,grad_norm: 0.9999991365857985, iteration: 134661
loss: 1.0371673107147217,grad_norm: 0.9768191323257034, iteration: 134662
loss: 0.9875394701957703,grad_norm: 0.969194189637787, iteration: 134663
loss: 0.9786401987075806,grad_norm: 0.9999990524854772, iteration: 134664
loss: 0.9711639881134033,grad_norm: 0.999999296183228, iteration: 134665
loss: 0.9852919578552246,grad_norm: 0.9999991237458389, iteration: 134666
loss: 1.0144951343536377,grad_norm: 0.9999989227985233, iteration: 134667
loss: 1.0204620361328125,grad_norm: 0.9999992825270264, iteration: 134668
loss: 1.0216931104660034,grad_norm: 0.9999990614447962, iteration: 134669
loss: 1.0117257833480835,grad_norm: 0.9999992971511805, iteration: 134670
loss: 1.0155268907546997,grad_norm: 0.9918548181917803, iteration: 134671
loss: 0.9940662384033203,grad_norm: 0.9999992891145676, iteration: 134672
loss: 0.9811328649520874,grad_norm: 0.8937305789123094, iteration: 134673
loss: 1.0229545831680298,grad_norm: 0.9999990400231046, iteration: 134674
loss: 0.9915434718132019,grad_norm: 0.9529892498275404, iteration: 134675
loss: 0.9344099164009094,grad_norm: 0.9999991665641541, iteration: 134676
loss: 0.9874811768531799,grad_norm: 0.9999990792289183, iteration: 134677
loss: 1.0580050945281982,grad_norm: 0.9432657753898694, iteration: 134678
loss: 1.024403691291809,grad_norm: 0.9999991608235944, iteration: 134679
loss: 1.0183253288269043,grad_norm: 0.9999991042472905, iteration: 134680
loss: 1.0243326425552368,grad_norm: 0.9999992305871536, iteration: 134681
loss: 1.0219534635543823,grad_norm: 0.9638453463354496, iteration: 134682
loss: 0.9809603095054626,grad_norm: 0.9422105924235851, iteration: 134683
loss: 1.0160490274429321,grad_norm: 0.9999991726659035, iteration: 134684
loss: 1.001625418663025,grad_norm: 0.9999989664953158, iteration: 134685
loss: 1.0042157173156738,grad_norm: 0.9999990007298166, iteration: 134686
loss: 1.013522744178772,grad_norm: 0.933274440347393, iteration: 134687
loss: 0.9850701093673706,grad_norm: 0.9999991343535225, iteration: 134688
loss: 0.985617458820343,grad_norm: 0.9999992013539388, iteration: 134689
loss: 1.0100194215774536,grad_norm: 0.9138802172838011, iteration: 134690
loss: 0.9744663238525391,grad_norm: 0.9999999552258498, iteration: 134691
loss: 1.0293241739273071,grad_norm: 0.9999991207586674, iteration: 134692
loss: 1.0205211639404297,grad_norm: 0.8525258977861072, iteration: 134693
loss: 1.0377461910247803,grad_norm: 0.9324602438038576, iteration: 134694
loss: 0.9892786741256714,grad_norm: 0.9999991952081249, iteration: 134695
loss: 1.0029810667037964,grad_norm: 0.9999997494491404, iteration: 134696
loss: 0.9926954507827759,grad_norm: 0.9383323236133964, iteration: 134697
loss: 1.013949990272522,grad_norm: 0.9001002861982222, iteration: 134698
loss: 0.9927037954330444,grad_norm: 0.8414816991178729, iteration: 134699
loss: 0.9786865711212158,grad_norm: 0.9316069182852323, iteration: 134700
loss: 1.0105267763137817,grad_norm: 0.9966028885615265, iteration: 134701
loss: 0.9945328235626221,grad_norm: 0.9206874183362316, iteration: 134702
loss: 0.9525343179702759,grad_norm: 0.94336030422626, iteration: 134703
loss: 0.9629523158073425,grad_norm: 0.9999991106441516, iteration: 134704
loss: 0.9999253749847412,grad_norm: 0.9999990619964045, iteration: 134705
loss: 0.9524566531181335,grad_norm: 0.9269270329574661, iteration: 134706
loss: 1.0183122158050537,grad_norm: 0.9906474184782721, iteration: 134707
loss: 0.9472329616546631,grad_norm: 0.9999991861527494, iteration: 134708
loss: 0.9969985485076904,grad_norm: 0.9999991649415275, iteration: 134709
loss: 0.9652568101882935,grad_norm: 0.9108204072401067, iteration: 134710
loss: 1.0004364252090454,grad_norm: 0.9999990804126432, iteration: 134711
loss: 0.9901461601257324,grad_norm: 0.9999991720063608, iteration: 134712
loss: 0.9731731414794922,grad_norm: 0.9999992430011841, iteration: 134713
loss: 0.9834074378013611,grad_norm: 0.9999991397428396, iteration: 134714
loss: 0.9924948811531067,grad_norm: 0.9999992167615084, iteration: 134715
loss: 0.9906237125396729,grad_norm: 0.9999990621837573, iteration: 134716
loss: 0.9658801555633545,grad_norm: 0.8311346711255159, iteration: 134717
loss: 0.9794225096702576,grad_norm: 0.9463453884960484, iteration: 134718
loss: 0.9931597113609314,grad_norm: 0.9389976484292301, iteration: 134719
loss: 1.0075538158416748,grad_norm: 0.984379124924053, iteration: 134720
loss: 1.0056661367416382,grad_norm: 0.9999991212554916, iteration: 134721
loss: 1.0159567594528198,grad_norm: 0.9906670580907809, iteration: 134722
loss: 0.9748827815055847,grad_norm: 0.9999990605934568, iteration: 134723
loss: 0.9877904057502747,grad_norm: 0.9999993830183802, iteration: 134724
loss: 1.0283780097961426,grad_norm: 0.9691056574843925, iteration: 134725
loss: 1.0398401021957397,grad_norm: 0.9734083422567604, iteration: 134726
loss: 1.0311323404312134,grad_norm: 0.9661282439583941, iteration: 134727
loss: 1.0082627534866333,grad_norm: 0.9999990290551889, iteration: 134728
loss: 0.9675257802009583,grad_norm: 0.979466065002579, iteration: 134729
loss: 1.0252923965454102,grad_norm: 0.9999990414036124, iteration: 134730
loss: 1.0002180337905884,grad_norm: 0.999999017759558, iteration: 134731
loss: 0.9869866371154785,grad_norm: 0.8879571670389198, iteration: 134732
loss: 1.003013253211975,grad_norm: 0.8412471564721062, iteration: 134733
loss: 1.0261476039886475,grad_norm: 0.9999991074850954, iteration: 134734
loss: 1.002227783203125,grad_norm: 0.9217292507403397, iteration: 134735
loss: 1.015938639640808,grad_norm: 0.9999990798836046, iteration: 134736
loss: 0.9476452469825745,grad_norm: 0.9999991895352375, iteration: 134737
loss: 0.9898926615715027,grad_norm: 0.9453562821418863, iteration: 134738
loss: 1.0151338577270508,grad_norm: 0.9970933028691589, iteration: 134739
loss: 0.9846588373184204,grad_norm: 0.9999991344138156, iteration: 134740
loss: 1.001875877380371,grad_norm: 0.9999991154509303, iteration: 134741
loss: 1.018432855606079,grad_norm: 0.879258468947397, iteration: 134742
loss: 1.0218825340270996,grad_norm: 0.9303404364193222, iteration: 134743
loss: 0.9934929013252258,grad_norm: 0.914609433975238, iteration: 134744
loss: 0.998039186000824,grad_norm: 0.9999991644733409, iteration: 134745
loss: 1.0061579942703247,grad_norm: 0.9999989940621647, iteration: 134746
loss: 1.009636402130127,grad_norm: 0.9311675689321947, iteration: 134747
loss: 1.0549862384796143,grad_norm: 0.9999995902757174, iteration: 134748
loss: 0.9773550629615784,grad_norm: 0.9845771372786948, iteration: 134749
loss: 0.9856533408164978,grad_norm: 0.8781968844765399, iteration: 134750
loss: 0.9903821349143982,grad_norm: 0.9999992081546007, iteration: 134751
loss: 0.9942633509635925,grad_norm: 0.9999991398814284, iteration: 134752
loss: 0.9948733448982239,grad_norm: 0.9999991999114556, iteration: 134753
loss: 1.0311627388000488,grad_norm: 0.9999990699308998, iteration: 134754
loss: 0.9793183207511902,grad_norm: 0.9999992592487004, iteration: 134755
loss: 0.9921443462371826,grad_norm: 0.901438999480299, iteration: 134756
loss: 1.0030056238174438,grad_norm: 0.9999990587397518, iteration: 134757
loss: 0.9827936887741089,grad_norm: 0.9999993339177784, iteration: 134758
loss: 1.0181801319122314,grad_norm: 0.9658749366367024, iteration: 134759
loss: 1.0194474458694458,grad_norm: 0.9841166266079671, iteration: 134760
loss: 0.9909552931785583,grad_norm: 0.9999992178303231, iteration: 134761
loss: 1.038761019706726,grad_norm: 0.9657230323468645, iteration: 134762
loss: 1.0338187217712402,grad_norm: 0.9999991900195232, iteration: 134763
loss: 0.9938587546348572,grad_norm: 0.9999990844907561, iteration: 134764
loss: 1.0114264488220215,grad_norm: 0.9999992870938476, iteration: 134765
loss: 1.0262198448181152,grad_norm: 0.9291362710268439, iteration: 134766
loss: 0.9641788601875305,grad_norm: 0.9999991515326927, iteration: 134767
loss: 1.0356948375701904,grad_norm: 0.9999991468275138, iteration: 134768
loss: 0.9723617434501648,grad_norm: 0.9240637733630225, iteration: 134769
loss: 0.9866164326667786,grad_norm: 0.999998979212739, iteration: 134770
loss: 1.0119088888168335,grad_norm: 0.9999991075966489, iteration: 134771
loss: 1.0064239501953125,grad_norm: 0.9971533949590834, iteration: 134772
loss: 0.9893513321876526,grad_norm: 0.9103924944691774, iteration: 134773
loss: 0.9830955862998962,grad_norm: 0.9491056997341835, iteration: 134774
loss: 1.001522421836853,grad_norm: 0.9032504748873227, iteration: 134775
loss: 0.9948981404304504,grad_norm: 0.9720084143247832, iteration: 134776
loss: 1.0052179098129272,grad_norm: 0.9733953949061935, iteration: 134777
loss: 0.9843282103538513,grad_norm: 0.9503515963282644, iteration: 134778
loss: 0.9946722984313965,grad_norm: 0.9999996830006569, iteration: 134779
loss: 0.9824452996253967,grad_norm: 0.9999990800070117, iteration: 134780
loss: 1.0231646299362183,grad_norm: 0.9999991289292602, iteration: 134781
loss: 1.0052005052566528,grad_norm: 0.999999077922169, iteration: 134782
loss: 1.012063980102539,grad_norm: 0.9999989998370247, iteration: 134783
loss: 0.995810866355896,grad_norm: 0.9999991833732856, iteration: 134784
loss: 1.0224756002426147,grad_norm: 0.9300889248679716, iteration: 134785
loss: 0.9691126346588135,grad_norm: 0.9942149712365302, iteration: 134786
loss: 1.0044232606887817,grad_norm: 0.8683080981889149, iteration: 134787
loss: 1.012666940689087,grad_norm: 0.9999992229675696, iteration: 134788
loss: 0.9992488026618958,grad_norm: 0.8920317352195697, iteration: 134789
loss: 1.0303808450698853,grad_norm: 0.9690955151082143, iteration: 134790
loss: 1.02115797996521,grad_norm: 0.9999992162006712, iteration: 134791
loss: 1.0335770845413208,grad_norm: 0.9999991371157708, iteration: 134792
loss: 1.0069682598114014,grad_norm: 0.999999088470934, iteration: 134793
loss: 1.0188435316085815,grad_norm: 0.9999992346915959, iteration: 134794
loss: 0.9997006058692932,grad_norm: 0.9999992407155611, iteration: 134795
loss: 1.0309926271438599,grad_norm: 0.999999131764056, iteration: 134796
loss: 0.99619060754776,grad_norm: 0.9999989763401695, iteration: 134797
loss: 0.985424280166626,grad_norm: 0.9999991536139353, iteration: 134798
loss: 1.0378998517990112,grad_norm: 0.9999990318163223, iteration: 134799
loss: 0.9745398759841919,grad_norm: 0.9999992992631758, iteration: 134800
loss: 1.0011937618255615,grad_norm: 0.9999990860788014, iteration: 134801
loss: 1.0048537254333496,grad_norm: 0.9999990863619244, iteration: 134802
loss: 1.0165098905563354,grad_norm: 0.9999990950806341, iteration: 134803
loss: 1.0191227197647095,grad_norm: 0.9999992021708791, iteration: 134804
loss: 0.9881156086921692,grad_norm: 0.9227846255736245, iteration: 134805
loss: 1.0085374116897583,grad_norm: 0.9999991026558415, iteration: 134806
loss: 0.9712738394737244,grad_norm: 0.9913353079092162, iteration: 134807
loss: 0.97429358959198,grad_norm: 0.9557084095770609, iteration: 134808
loss: 1.0231598615646362,grad_norm: 0.9577558049915506, iteration: 134809
loss: 0.9445892572402954,grad_norm: 0.9870926123552637, iteration: 134810
loss: 0.981814444065094,grad_norm: 0.9999991209232881, iteration: 134811
loss: 0.9969260692596436,grad_norm: 0.9999990773243851, iteration: 134812
loss: 0.9830791354179382,grad_norm: 0.9113567981818108, iteration: 134813
loss: 0.9790594577789307,grad_norm: 0.8242332966762985, iteration: 134814
loss: 1.0230419635772705,grad_norm: 0.9999989809518056, iteration: 134815
loss: 0.9631675481796265,grad_norm: 0.9999991456738634, iteration: 134816
loss: 0.9997820258140564,grad_norm: 0.9999991631055162, iteration: 134817
loss: 0.9938343167304993,grad_norm: 0.9999992119453973, iteration: 134818
loss: 0.9958060383796692,grad_norm: 0.9999991943789798, iteration: 134819
loss: 0.9738867282867432,grad_norm: 0.9999991825180351, iteration: 134820
loss: 0.9825938940048218,grad_norm: 0.9999991219869474, iteration: 134821
loss: 0.9786110520362854,grad_norm: 0.9999992024732922, iteration: 134822
loss: 1.0035370588302612,grad_norm: 0.999999141788892, iteration: 134823
loss: 1.0115163326263428,grad_norm: 0.9999990631911903, iteration: 134824
loss: 0.9898908734321594,grad_norm: 0.9363888119743105, iteration: 134825
loss: 1.0148831605911255,grad_norm: 0.8730390648055727, iteration: 134826
loss: 0.9873660802841187,grad_norm: 0.9135644033396128, iteration: 134827
loss: 0.9950965642929077,grad_norm: 0.9999991133177345, iteration: 134828
loss: 1.0138328075408936,grad_norm: 0.9999989174447838, iteration: 134829
loss: 0.9910363554954529,grad_norm: 0.9627112834210612, iteration: 134830
loss: 1.0141392946243286,grad_norm: 0.9999994810324019, iteration: 134831
loss: 1.001059889793396,grad_norm: 0.9999992329212032, iteration: 134832
loss: 1.0177303552627563,grad_norm: 0.9999989721871477, iteration: 134833
loss: 1.0002796649932861,grad_norm: 0.9999990619153685, iteration: 134834
loss: 1.0175877809524536,grad_norm: 0.848665643172007, iteration: 134835
loss: 1.0094020366668701,grad_norm: 0.9723548857271312, iteration: 134836
loss: 0.9802476167678833,grad_norm: 0.9999991517032498, iteration: 134837
loss: 1.0229800939559937,grad_norm: 0.9999990848960725, iteration: 134838
loss: 0.9804815053939819,grad_norm: 0.9276984755368993, iteration: 134839
loss: 0.9928056597709656,grad_norm: 0.8228801865620345, iteration: 134840
loss: 0.9697733521461487,grad_norm: 0.9661049912624757, iteration: 134841
loss: 1.034266710281372,grad_norm: 0.9999990985819974, iteration: 134842
loss: 1.0226359367370605,grad_norm: 0.8922198629213235, iteration: 134843
loss: 0.9943817853927612,grad_norm: 0.9999990830027595, iteration: 134844
loss: 0.9968059062957764,grad_norm: 0.9999992426634938, iteration: 134845
loss: 0.9817701578140259,grad_norm: 0.9999991245808515, iteration: 134846
loss: 0.9960921406745911,grad_norm: 0.9999992060770224, iteration: 134847
loss: 0.9844732880592346,grad_norm: 0.9528382376249175, iteration: 134848
loss: 1.010860800743103,grad_norm: 0.9999991171589269, iteration: 134849
loss: 1.005158543586731,grad_norm: 0.9139493043714568, iteration: 134850
loss: 1.0533136129379272,grad_norm: 0.9996716915384231, iteration: 134851
loss: 1.0307525396347046,grad_norm: 0.9999991657499617, iteration: 134852
loss: 0.980996310710907,grad_norm: 0.9999991049188465, iteration: 134853
loss: 1.00045907497406,grad_norm: 0.9412832705125812, iteration: 134854
loss: 0.97220778465271,grad_norm: 0.9471450585629767, iteration: 134855
loss: 0.9729065895080566,grad_norm: 0.9247723711880752, iteration: 134856
loss: 0.9840037822723389,grad_norm: 0.9505314544860356, iteration: 134857
loss: 1.0506409406661987,grad_norm: 0.9999991677642484, iteration: 134858
loss: 0.9987776875495911,grad_norm: 0.9057596262736359, iteration: 134859
loss: 1.0012420415878296,grad_norm: 0.9999990088931553, iteration: 134860
loss: 0.9753105044364929,grad_norm: 0.9708226575145117, iteration: 134861
loss: 0.9685977697372437,grad_norm: 0.9999992042047637, iteration: 134862
loss: 0.9978224635124207,grad_norm: 0.9999991866391025, iteration: 134863
loss: 0.9743939638137817,grad_norm: 0.9509059973477089, iteration: 134864
loss: 0.980685830116272,grad_norm: 0.999999195102039, iteration: 134865
loss: 1.0173276662826538,grad_norm: 0.9324211048217336, iteration: 134866
loss: 1.0236188173294067,grad_norm: 0.7691170456056043, iteration: 134867
loss: 0.9552236199378967,grad_norm: 0.9687523225227891, iteration: 134868
loss: 0.9894958138465881,grad_norm: 0.9999990895109001, iteration: 134869
loss: 0.9696367383003235,grad_norm: 0.9942809416934492, iteration: 134870
loss: 1.0182040929794312,grad_norm: 0.9999991476616097, iteration: 134871
loss: 0.9547430872917175,grad_norm: 0.9999992196331954, iteration: 134872
loss: 0.9766437411308289,grad_norm: 0.8580273637488588, iteration: 134873
loss: 0.978851318359375,grad_norm: 0.9999994419367823, iteration: 134874
loss: 0.9846757650375366,grad_norm: 0.9999994121332839, iteration: 134875
loss: 1.1067657470703125,grad_norm: 0.99999917246294, iteration: 134876
loss: 0.9894835948944092,grad_norm: 0.999999243120014, iteration: 134877
loss: 1.0359032154083252,grad_norm: 0.9999992177084515, iteration: 134878
loss: 0.9952822923660278,grad_norm: 0.9986028567605012, iteration: 134879
loss: 0.9957438111305237,grad_norm: 0.955242062632815, iteration: 134880
loss: 0.995110273361206,grad_norm: 0.8505911214496047, iteration: 134881
loss: 0.9928610324859619,grad_norm: 0.8834774937171028, iteration: 134882
loss: 0.9805681109428406,grad_norm: 0.942476523532404, iteration: 134883
loss: 0.9790080189704895,grad_norm: 0.9960369349685252, iteration: 134884
loss: 0.9936516880989075,grad_norm: 0.9999990846989072, iteration: 134885
loss: 1.0010528564453125,grad_norm: 0.9072306104657467, iteration: 134886
loss: 0.9650956988334656,grad_norm: 0.9999990795417556, iteration: 134887
loss: 1.0113413333892822,grad_norm: 0.9999991146405718, iteration: 134888
loss: 1.0111095905303955,grad_norm: 0.9999991481496165, iteration: 134889
loss: 1.0092979669570923,grad_norm: 0.999998930926203, iteration: 134890
loss: 1.0110751390457153,grad_norm: 0.9292898427086946, iteration: 134891
loss: 1.0130457878112793,grad_norm: 0.9087336624721145, iteration: 134892
loss: 0.9679083824157715,grad_norm: 0.8870131416502207, iteration: 134893
loss: 1.011713981628418,grad_norm: 0.9999991238091371, iteration: 134894
loss: 1.0122427940368652,grad_norm: 0.8297058036187024, iteration: 134895
loss: 0.9647486805915833,grad_norm: 0.9999990201486307, iteration: 134896
loss: 0.9747052788734436,grad_norm: 0.7991413834398191, iteration: 134897
loss: 0.9766069054603577,grad_norm: 0.999999195366006, iteration: 134898
loss: 0.993439793586731,grad_norm: 0.964384528307325, iteration: 134899
loss: 1.0083554983139038,grad_norm: 0.9999992838893524, iteration: 134900
loss: 1.0065851211547852,grad_norm: 0.9999990756779511, iteration: 134901
loss: 1.0024003982543945,grad_norm: 0.9500929904904653, iteration: 134902
loss: 0.9884308576583862,grad_norm: 0.9999991563784222, iteration: 134903
loss: 1.0297578573226929,grad_norm: 0.9999996306994411, iteration: 134904
loss: 0.9769483208656311,grad_norm: 0.9874562309059975, iteration: 134905
loss: 1.0100374221801758,grad_norm: 0.9416296501607098, iteration: 134906
loss: 1.009339451789856,grad_norm: 0.9999994260069818, iteration: 134907
loss: 1.0631932020187378,grad_norm: 0.9999997315016989, iteration: 134908
loss: 1.4652514457702637,grad_norm: 0.999999863989352, iteration: 134909
loss: 1.0568023920059204,grad_norm: 0.9999996979676173, iteration: 134910
loss: 0.997314989566803,grad_norm: 0.9999991629298555, iteration: 134911
loss: 0.9936964511871338,grad_norm: 0.9080558432718844, iteration: 134912
loss: 0.9853822588920593,grad_norm: 0.9999992472194713, iteration: 134913
loss: 1.0085729360580444,grad_norm: 0.9930224355197815, iteration: 134914
loss: 1.0806642770767212,grad_norm: 0.9999999636235684, iteration: 134915
loss: 0.9933516383171082,grad_norm: 0.9999991614722655, iteration: 134916
loss: 0.9928502440452576,grad_norm: 0.9621037845150584, iteration: 134917
loss: 1.0341014862060547,grad_norm: 0.9765988751844286, iteration: 134918
loss: 0.9849278330802917,grad_norm: 0.999999007434981, iteration: 134919
loss: 1.0016944408416748,grad_norm: 0.9191714953642449, iteration: 134920
loss: 1.0112248659133911,grad_norm: 0.9805668370743893, iteration: 134921
loss: 0.9789448380470276,grad_norm: 0.900843420278064, iteration: 134922
loss: 0.9706035852432251,grad_norm: 0.920949060795344, iteration: 134923
loss: 1.0114564895629883,grad_norm: 0.9321020900330832, iteration: 134924
loss: 1.078052282333374,grad_norm: 0.9999992781992335, iteration: 134925
loss: 0.9831688404083252,grad_norm: 0.999999012685422, iteration: 134926
loss: 1.0059560537338257,grad_norm: 0.999999193220684, iteration: 134927
loss: 0.9480099678039551,grad_norm: 0.9999992581074972, iteration: 134928
loss: 1.063594102859497,grad_norm: 1.0000000720953421, iteration: 134929
loss: 1.0107107162475586,grad_norm: 0.9926538823801209, iteration: 134930
loss: 0.9773253202438354,grad_norm: 0.9999992282262028, iteration: 134931
loss: 1.016626000404358,grad_norm: 0.9999998150830385, iteration: 134932
loss: 0.9712450504302979,grad_norm: 0.9999990485389262, iteration: 134933
loss: 0.9893007874488831,grad_norm: 0.9479664952333224, iteration: 134934
loss: 1.00296151638031,grad_norm: 0.9999990248385653, iteration: 134935
loss: 1.0052690505981445,grad_norm: 0.999999070387778, iteration: 134936
loss: 1.0129553079605103,grad_norm: 0.8979927022815583, iteration: 134937
loss: 1.0058270692825317,grad_norm: 0.9999991589099418, iteration: 134938
loss: 0.9843029379844666,grad_norm: 0.9999990036493462, iteration: 134939
loss: 0.9930095076560974,grad_norm: 0.9584427183060558, iteration: 134940
loss: 1.0490896701812744,grad_norm: 0.9277525559204866, iteration: 134941
loss: 1.026819109916687,grad_norm: 0.99999905148192, iteration: 134942
loss: 0.9978509545326233,grad_norm: 0.976462025208918, iteration: 134943
loss: 0.9776590466499329,grad_norm: 0.990003952285725, iteration: 134944
loss: 1.0253220796585083,grad_norm: 0.9999991605488565, iteration: 134945
loss: 1.0398963689804077,grad_norm: 0.9331195032572188, iteration: 134946
loss: 1.0088112354278564,grad_norm: 0.969076016705811, iteration: 134947
loss: 0.9663397669792175,grad_norm: 0.9999993288163908, iteration: 134948
loss: 1.0044137239456177,grad_norm: 0.9673767556572115, iteration: 134949
loss: 0.9828398823738098,grad_norm: 0.9916066008089558, iteration: 134950
loss: 1.0098001956939697,grad_norm: 0.9999992358256524, iteration: 134951
loss: 0.9931368827819824,grad_norm: 0.9999991186603637, iteration: 134952
loss: 1.0190088748931885,grad_norm: 0.9162997518322014, iteration: 134953
loss: 1.0054281949996948,grad_norm: 0.9999992665027004, iteration: 134954
loss: 1.041696548461914,grad_norm: 0.95189538293843, iteration: 134955
loss: 0.9883198142051697,grad_norm: 0.999999145596961, iteration: 134956
loss: 0.9643445014953613,grad_norm: 0.9999989117821344, iteration: 134957
loss: 0.9939010739326477,grad_norm: 0.9869065231884208, iteration: 134958
loss: 1.0047411918640137,grad_norm: 0.9999992372117249, iteration: 134959
loss: 0.9994129538536072,grad_norm: 0.9999992669115184, iteration: 134960
loss: 1.002334713935852,grad_norm: 0.9146473575732335, iteration: 134961
loss: 0.9964890480041504,grad_norm: 0.9999990592493117, iteration: 134962
loss: 1.0183583498001099,grad_norm: 0.9999992828876225, iteration: 134963
loss: 0.9631220102310181,grad_norm: 0.8819015962326024, iteration: 134964
loss: 1.0068764686584473,grad_norm: 0.9999990213075642, iteration: 134965
loss: 0.9813304543495178,grad_norm: 0.9999989116621824, iteration: 134966
loss: 0.9741985201835632,grad_norm: 0.8661309645185596, iteration: 134967
loss: 1.1074316501617432,grad_norm: 0.9999990953111728, iteration: 134968
loss: 0.9820284247398376,grad_norm: 0.9999991792974169, iteration: 134969
loss: 1.031529188156128,grad_norm: 0.9999992173755475, iteration: 134970
loss: 1.0205376148223877,grad_norm: 0.9999990819504225, iteration: 134971
loss: 1.0109003782272339,grad_norm: 0.83007197542964, iteration: 134972
loss: 0.9609352946281433,grad_norm: 0.9999991565029801, iteration: 134973
loss: 0.9937834143638611,grad_norm: 0.9999991537424797, iteration: 134974
loss: 1.020769476890564,grad_norm: 0.9935230264369044, iteration: 134975
loss: 1.0149614810943604,grad_norm: 0.9999991039591821, iteration: 134976
loss: 0.9741464257240295,grad_norm: 0.9402376656399329, iteration: 134977
loss: 1.0076907873153687,grad_norm: 0.9999989705581228, iteration: 134978
loss: 0.9685169458389282,grad_norm: 0.9620646740911333, iteration: 134979
loss: 0.9894349575042725,grad_norm: 0.9999990523811633, iteration: 134980
loss: 0.9801293015480042,grad_norm: 0.9999991559410812, iteration: 134981
loss: 1.0240082740783691,grad_norm: 0.9999991087521839, iteration: 134982
loss: 1.0286380052566528,grad_norm: 0.9812096115246979, iteration: 134983
loss: 1.0200458765029907,grad_norm: 0.9999990880308443, iteration: 134984
loss: 0.9643034934997559,grad_norm: 0.9232211812184062, iteration: 134985
loss: 0.979034423828125,grad_norm: 0.9999991959850089, iteration: 134986
loss: 1.0321635007858276,grad_norm: 0.8578862185537064, iteration: 134987
loss: 0.9908223152160645,grad_norm: 0.9279149631565333, iteration: 134988
loss: 1.4433802366256714,grad_norm: 0.9999996565118808, iteration: 134989
loss: 1.0221004486083984,grad_norm: 0.9999992540422682, iteration: 134990
loss: 0.9863656163215637,grad_norm: 0.999999118636823, iteration: 134991
loss: 0.9946747422218323,grad_norm: 0.8972528407771178, iteration: 134992
loss: 0.9919381737709045,grad_norm: 0.9585878344040889, iteration: 134993
loss: 1.0158584117889404,grad_norm: 0.9999990607517891, iteration: 134994
loss: 1.0013502836227417,grad_norm: 0.9999990112579974, iteration: 134995
loss: 0.975277841091156,grad_norm: 0.9425801153024119, iteration: 134996
loss: 1.0368272066116333,grad_norm: 0.9999990010475247, iteration: 134997
loss: 0.9925578832626343,grad_norm: 0.9999992083800138, iteration: 134998
loss: 0.9877262711524963,grad_norm: 0.937627040298012, iteration: 134999
loss: 1.0109308958053589,grad_norm: 0.9999992510440556, iteration: 135000
loss: 1.0240858793258667,grad_norm: 0.9999990135447918, iteration: 135001
loss: 1.0290920734405518,grad_norm: 0.9490740050913723, iteration: 135002
loss: 1.020544171333313,grad_norm: 0.9014898454539734, iteration: 135003
loss: 0.9753655195236206,grad_norm: 0.8941906301011133, iteration: 135004
loss: 1.0284099578857422,grad_norm: 0.943007640460586, iteration: 135005
loss: 1.0019747018814087,grad_norm: 0.9999999356720982, iteration: 135006
loss: 0.9443626403808594,grad_norm: 0.9999991459900229, iteration: 135007
loss: 1.0021898746490479,grad_norm: 0.9796477537512581, iteration: 135008
loss: 0.9923447370529175,grad_norm: 0.9999990651213098, iteration: 135009
loss: 1.0194778442382812,grad_norm: 0.9598406831197586, iteration: 135010
loss: 1.0337499380111694,grad_norm: 0.9999991702998597, iteration: 135011
loss: 0.9998297095298767,grad_norm: 0.9999990480218435, iteration: 135012
loss: 0.9879341721534729,grad_norm: 0.872754805714766, iteration: 135013
loss: 1.0310983657836914,grad_norm: 0.9565857550424615, iteration: 135014
loss: 0.9929147362709045,grad_norm: 0.9999991512339031, iteration: 135015
loss: 0.9966356754302979,grad_norm: 0.999999065979691, iteration: 135016
loss: 1.0236141681671143,grad_norm: 0.9999992061127103, iteration: 135017
loss: 0.9777478575706482,grad_norm: 0.9999994427173335, iteration: 135018
loss: 0.9941867589950562,grad_norm: 0.9999992874304182, iteration: 135019
loss: 1.0229600667953491,grad_norm: 0.9999991467271547, iteration: 135020
loss: 1.037459373474121,grad_norm: 0.9545166024699762, iteration: 135021
loss: 1.0326318740844727,grad_norm: 0.9749683908741129, iteration: 135022
loss: 0.966545045375824,grad_norm: 0.9890301811452519, iteration: 135023
loss: 1.0036953687667847,grad_norm: 0.818005700911503, iteration: 135024
loss: 0.9858735203742981,grad_norm: 0.9999990308255661, iteration: 135025
loss: 1.0099139213562012,grad_norm: 0.9999990880760413, iteration: 135026
loss: 0.9978830218315125,grad_norm: 0.9775422254621176, iteration: 135027
loss: 0.9841843247413635,grad_norm: 0.9586915124802918, iteration: 135028
loss: 0.9688654541969299,grad_norm: 0.9999990327850863, iteration: 135029
loss: 0.9884468913078308,grad_norm: 0.980346459354391, iteration: 135030
loss: 1.0127710103988647,grad_norm: 0.9999989745381005, iteration: 135031
loss: 0.9980765581130981,grad_norm: 0.9999990723810697, iteration: 135032
loss: 1.0412064790725708,grad_norm: 0.9265912735211203, iteration: 135033
loss: 1.0177979469299316,grad_norm: 0.8228300078373461, iteration: 135034
loss: 1.0142444372177124,grad_norm: 0.8859198143844921, iteration: 135035
loss: 1.0016183853149414,grad_norm: 0.9969796239924507, iteration: 135036
loss: 0.9786298871040344,grad_norm: 0.8928309838022676, iteration: 135037
loss: 0.9712161421775818,grad_norm: 0.9999990461092136, iteration: 135038
loss: 0.9390242099761963,grad_norm: 0.9999991608333705, iteration: 135039
loss: 1.0346704721450806,grad_norm: 0.9999991284035876, iteration: 135040
loss: 0.9889283776283264,grad_norm: 0.9999990662471744, iteration: 135041
loss: 0.9950821399688721,grad_norm: 0.999999074036998, iteration: 135042
loss: 0.9838293194770813,grad_norm: 0.9068061799519642, iteration: 135043
loss: 1.0079952478408813,grad_norm: 0.8371403880469721, iteration: 135044
loss: 1.0015028715133667,grad_norm: 0.9999990933652297, iteration: 135045
loss: 0.9840623736381531,grad_norm: 0.948769929719634, iteration: 135046
loss: 1.0003180503845215,grad_norm: 0.9976027543081201, iteration: 135047
loss: 1.017703652381897,grad_norm: 0.9199562275735303, iteration: 135048
loss: 1.0393651723861694,grad_norm: 0.9999990806564902, iteration: 135049
loss: 1.026658058166504,grad_norm: 0.9999993310110936, iteration: 135050
loss: 1.0531790256500244,grad_norm: 0.95233573133937, iteration: 135051
loss: 0.984477698802948,grad_norm: 0.9447324836711392, iteration: 135052
loss: 1.0334738492965698,grad_norm: 0.9798160474929657, iteration: 135053
loss: 0.9924982786178589,grad_norm: 0.9815648324009428, iteration: 135054
loss: 1.0259716510772705,grad_norm: 0.9999992395440849, iteration: 135055
loss: 1.0172199010849,grad_norm: 0.9999990717591446, iteration: 135056
loss: 0.9903393387794495,grad_norm: 0.9999990272856634, iteration: 135057
loss: 0.977249801158905,grad_norm: 0.9999992211194364, iteration: 135058
loss: 1.0143001079559326,grad_norm: 0.9999991975659733, iteration: 135059
loss: 1.0125619173049927,grad_norm: 0.9598894666678329, iteration: 135060
loss: 0.9641927480697632,grad_norm: 0.9999992551580675, iteration: 135061
loss: 1.0208317041397095,grad_norm: 0.8583634640606768, iteration: 135062
loss: 1.0106172561645508,grad_norm: 0.8215305598325903, iteration: 135063
loss: 0.9929303526878357,grad_norm: 0.9622189101277662, iteration: 135064
loss: 0.99626624584198,grad_norm: 0.9999990732557398, iteration: 135065
loss: 0.9605281352996826,grad_norm: 0.9999991811808808, iteration: 135066
loss: 1.0143895149230957,grad_norm: 0.999999377876135, iteration: 135067
loss: 1.0142747163772583,grad_norm: 0.9999992859532048, iteration: 135068
loss: 0.9799661636352539,grad_norm: 0.8839497872219235, iteration: 135069
loss: 0.9842539429664612,grad_norm: 0.9896176351317189, iteration: 135070
loss: 0.987942636013031,grad_norm: 0.9999990154419094, iteration: 135071
loss: 0.9898092150688171,grad_norm: 0.9999990021194026, iteration: 135072
loss: 0.9928617477416992,grad_norm: 0.9999993033014511, iteration: 135073
loss: 1.0362504720687866,grad_norm: 0.9999992231407624, iteration: 135074
loss: 0.9812878966331482,grad_norm: 0.999999416717413, iteration: 135075
loss: 1.0234379768371582,grad_norm: 0.9492460426287963, iteration: 135076
loss: 0.9777157306671143,grad_norm: 0.8568705070769747, iteration: 135077
loss: 0.9843044281005859,grad_norm: 0.9999990163793526, iteration: 135078
loss: 1.006873369216919,grad_norm: 0.9999988706286576, iteration: 135079
loss: 1.0087100267410278,grad_norm: 0.9999992717859627, iteration: 135080
loss: 1.0114001035690308,grad_norm: 0.9416899205740434, iteration: 135081
loss: 0.9988172054290771,grad_norm: 0.9800588270258347, iteration: 135082
loss: 1.0133247375488281,grad_norm: 0.7903101009771064, iteration: 135083
loss: 0.987515926361084,grad_norm: 0.9999991591439696, iteration: 135084
loss: 1.0266534090042114,grad_norm: 0.9240437207726063, iteration: 135085
loss: 0.9819732904434204,grad_norm: 0.9999992068209478, iteration: 135086
loss: 1.0209755897521973,grad_norm: 0.9999990853587671, iteration: 135087
loss: 1.0087792873382568,grad_norm: 0.9999990425500578, iteration: 135088
loss: 1.009955644607544,grad_norm: 0.9528510250204226, iteration: 135089
loss: 1.0029895305633545,grad_norm: 0.9778147860508147, iteration: 135090
loss: 1.010636806488037,grad_norm: 0.8140500215862211, iteration: 135091
loss: 0.974689245223999,grad_norm: 0.9999992092158911, iteration: 135092
loss: 0.974226713180542,grad_norm: 0.9422731402307345, iteration: 135093
loss: 0.9611039757728577,grad_norm: 0.9644180626022043, iteration: 135094
loss: 0.9720747470855713,grad_norm: 0.9999992265653166, iteration: 135095
loss: 1.0325120687484741,grad_norm: 0.999999530599119, iteration: 135096
loss: 1.0019811391830444,grad_norm: 0.946583827405448, iteration: 135097
loss: 1.045423984527588,grad_norm: 0.9621242876971752, iteration: 135098
loss: 0.987044632434845,grad_norm: 0.8793098575595104, iteration: 135099
loss: 1.0239897966384888,grad_norm: 0.999999186433403, iteration: 135100
loss: 0.9968389868736267,grad_norm: 0.8875652555404201, iteration: 135101
loss: 0.982596218585968,grad_norm: 0.9178750637122398, iteration: 135102
loss: 1.0312869548797607,grad_norm: 0.9909534123747628, iteration: 135103
loss: 1.0319814682006836,grad_norm: 0.9999989956000055, iteration: 135104
loss: 0.9857397079467773,grad_norm: 0.9722610607880857, iteration: 135105
loss: 1.0049514770507812,grad_norm: 0.9999993391654772, iteration: 135106
loss: 1.0246421098709106,grad_norm: 0.9880414064324534, iteration: 135107
loss: 1.0004298686981201,grad_norm: 0.9999991030400178, iteration: 135108
loss: 0.9966300129890442,grad_norm: 0.866159823044067, iteration: 135109
loss: 0.9646198153495789,grad_norm: 0.9999991958415139, iteration: 135110
loss: 1.005383014678955,grad_norm: 0.9999991429166666, iteration: 135111
loss: 1.0823317766189575,grad_norm: 0.9999996197712241, iteration: 135112
loss: 1.0214622020721436,grad_norm: 0.9451783038101569, iteration: 135113
loss: 0.9753710627555847,grad_norm: 0.9544970003060367, iteration: 135114
loss: 0.9797822833061218,grad_norm: 0.9299245311668406, iteration: 135115
loss: 0.9751672744750977,grad_norm: 0.9999992427720489, iteration: 135116
loss: 1.027796745300293,grad_norm: 0.999999046681365, iteration: 135117
loss: 1.0390868186950684,grad_norm: 0.9999993164995669, iteration: 135118
loss: 1.0203996896743774,grad_norm: 0.9937914100315647, iteration: 135119
loss: 1.01351797580719,grad_norm: 0.9443545114465165, iteration: 135120
loss: 1.0320231914520264,grad_norm: 0.9999993221175154, iteration: 135121
loss: 1.004616618156433,grad_norm: 0.9999994261010239, iteration: 135122
loss: 1.0174797773361206,grad_norm: 0.9999992392572766, iteration: 135123
loss: 1.031283974647522,grad_norm: 0.9701557851266925, iteration: 135124
loss: 0.9830183982849121,grad_norm: 0.9708244687448928, iteration: 135125
loss: 1.0168819427490234,grad_norm: 0.9838412201434003, iteration: 135126
loss: 0.9985947012901306,grad_norm: 0.9999998604821996, iteration: 135127
loss: 0.9838215708732605,grad_norm: 0.9999991437464109, iteration: 135128
loss: 1.0012779235839844,grad_norm: 0.9999990079148441, iteration: 135129
loss: 0.9858836531639099,grad_norm: 0.9778639640943575, iteration: 135130
loss: 0.9883127212524414,grad_norm: 0.9167894988196034, iteration: 135131
loss: 0.9556072354316711,grad_norm: 0.9252979827419002, iteration: 135132
loss: 0.9951303601264954,grad_norm: 0.9999990197717722, iteration: 135133
loss: 1.0001893043518066,grad_norm: 0.999999229890334, iteration: 135134
loss: 0.9710472226142883,grad_norm: 0.9999992586139165, iteration: 135135
loss: 0.9677284955978394,grad_norm: 0.999999254499326, iteration: 135136
loss: 1.0180705785751343,grad_norm: 0.9999991132446054, iteration: 135137
loss: 1.0529862642288208,grad_norm: 0.9999990945172459, iteration: 135138
loss: 0.9629735946655273,grad_norm: 0.9999990068645745, iteration: 135139
loss: 0.9836874008178711,grad_norm: 0.9456292034061692, iteration: 135140
loss: 1.0141310691833496,grad_norm: 0.9999992594868065, iteration: 135141
loss: 0.9975689649581909,grad_norm: 0.9205699498610898, iteration: 135142
loss: 0.9526196718215942,grad_norm: 0.9999991842778615, iteration: 135143
loss: 1.0197333097457886,grad_norm: 0.9999990599108594, iteration: 135144
loss: 0.9875831007957458,grad_norm: 0.9312376468673075, iteration: 135145
loss: 0.9864377975463867,grad_norm: 0.9500958824698345, iteration: 135146
loss: 1.023173451423645,grad_norm: 0.9999990832976158, iteration: 135147
loss: 1.0000478029251099,grad_norm: 0.8659181391269559, iteration: 135148
loss: 0.9617328643798828,grad_norm: 0.9999991760300401, iteration: 135149
loss: 0.9811866283416748,grad_norm: 0.9999991083370107, iteration: 135150
loss: 1.0252360105514526,grad_norm: 0.9999991298240263, iteration: 135151
loss: 1.0044209957122803,grad_norm: 0.9999991575694023, iteration: 135152
loss: 1.0101349353790283,grad_norm: 0.9999990638460873, iteration: 135153
loss: 0.9768149852752686,grad_norm: 0.9634725136586458, iteration: 135154
loss: 0.9979937672615051,grad_norm: 0.999998989723136, iteration: 135155
loss: 0.9856680631637573,grad_norm: 0.9999994090920366, iteration: 135156
loss: 1.0218596458435059,grad_norm: 0.9999999122288596, iteration: 135157
loss: 1.0366578102111816,grad_norm: 0.8424934343089605, iteration: 135158
loss: 0.9629440903663635,grad_norm: 0.9999990530526971, iteration: 135159
loss: 1.0183957815170288,grad_norm: 0.9256846135266945, iteration: 135160
loss: 1.0275307893753052,grad_norm: 0.9999991334132475, iteration: 135161
loss: 1.00796377658844,grad_norm: 0.9999992128806423, iteration: 135162
loss: 0.9896243810653687,grad_norm: 0.972725744240144, iteration: 135163
loss: 0.9904837608337402,grad_norm: 0.9999990070544988, iteration: 135164
loss: 1.0291099548339844,grad_norm: 0.9999990442802521, iteration: 135165
loss: 0.9738143086433411,grad_norm: 0.9709571569994183, iteration: 135166
loss: 1.051600456237793,grad_norm: 0.9999991367775627, iteration: 135167
loss: 0.958898663520813,grad_norm: 0.9813852021494667, iteration: 135168
loss: 0.9801873564720154,grad_norm: 0.9999997363662033, iteration: 135169
loss: 0.9547149538993835,grad_norm: 0.999999167264832, iteration: 135170
loss: 0.9973958730697632,grad_norm: 0.999999189158178, iteration: 135171
loss: 0.9653467535972595,grad_norm: 0.9999991328852833, iteration: 135172
loss: 1.0215656757354736,grad_norm: 0.9999990745970847, iteration: 135173
loss: 1.0162161588668823,grad_norm: 0.9743014018514004, iteration: 135174
loss: 0.9496169090270996,grad_norm: 0.9999989782415879, iteration: 135175
loss: 1.0160634517669678,grad_norm: 0.9196265761961316, iteration: 135176
loss: 1.015217900276184,grad_norm: 0.9999991783567018, iteration: 135177
loss: 1.0214866399765015,grad_norm: 0.9933895939748214, iteration: 135178
loss: 0.9900376796722412,grad_norm: 0.9999990083080654, iteration: 135179
loss: 0.9893190264701843,grad_norm: 0.9999991260224074, iteration: 135180
loss: 0.9878946542739868,grad_norm: 0.9999991437190191, iteration: 135181
loss: 0.9723542928695679,grad_norm: 0.8798048186081093, iteration: 135182
loss: 1.0082446336746216,grad_norm: 0.9999989764868822, iteration: 135183
loss: 1.001387596130371,grad_norm: 0.99999910980487, iteration: 135184
loss: 1.0206098556518555,grad_norm: 0.9232305402620341, iteration: 135185
loss: 1.0210485458374023,grad_norm: 0.9376067589755207, iteration: 135186
loss: 0.9800248146057129,grad_norm: 0.9919384673138876, iteration: 135187
loss: 0.98757404088974,grad_norm: 0.9999992728678169, iteration: 135188
loss: 1.0281236171722412,grad_norm: 0.9999992048303018, iteration: 135189
loss: 1.0200204849243164,grad_norm: 0.9999990388375414, iteration: 135190
loss: 1.0322778224945068,grad_norm: 0.9999991718391106, iteration: 135191
loss: 0.9732264280319214,grad_norm: 0.9999991044026908, iteration: 135192
loss: 1.0125186443328857,grad_norm: 0.9999991949199286, iteration: 135193
loss: 1.001711130142212,grad_norm: 0.9999991561290326, iteration: 135194
loss: 1.0325394868850708,grad_norm: 0.969449557188576, iteration: 135195
loss: 1.0230157375335693,grad_norm: 0.9339149940358638, iteration: 135196
loss: 0.9671822190284729,grad_norm: 0.9999991481036343, iteration: 135197
loss: 0.9982227683067322,grad_norm: 0.9637210775636353, iteration: 135198
loss: 1.0254013538360596,grad_norm: 0.9999990469531376, iteration: 135199
loss: 1.0058907270431519,grad_norm: 0.9999990285568328, iteration: 135200
loss: 1.0159245729446411,grad_norm: 0.9179886745705947, iteration: 135201
loss: 0.9664449691772461,grad_norm: 0.9999992628504673, iteration: 135202
loss: 0.9725958704948425,grad_norm: 0.9999992315511703, iteration: 135203
loss: 1.0345979928970337,grad_norm: 0.9050697823942657, iteration: 135204
loss: 1.0014907121658325,grad_norm: 0.9999991330961346, iteration: 135205
loss: 0.951071560382843,grad_norm: 0.999999232356931, iteration: 135206
loss: 1.0226609706878662,grad_norm: 0.9999992132119763, iteration: 135207
loss: 1.011804223060608,grad_norm: 0.847324287877264, iteration: 135208
loss: 1.011902928352356,grad_norm: 0.8005200056676867, iteration: 135209
loss: 1.0007244348526,grad_norm: 0.999999139588002, iteration: 135210
loss: 1.010614275932312,grad_norm: 0.9999992840110009, iteration: 135211
loss: 1.0087114572525024,grad_norm: 0.973387154568573, iteration: 135212
loss: 0.9822705984115601,grad_norm: 0.9442129199352444, iteration: 135213
loss: 0.998740553855896,grad_norm: 0.9999991639360836, iteration: 135214
loss: 0.9501044154167175,grad_norm: 0.9999991339242058, iteration: 135215
loss: 0.9774237275123596,grad_norm: 0.9202312548088233, iteration: 135216
loss: 1.0128679275512695,grad_norm: 0.9999992683194562, iteration: 135217
loss: 0.9941475987434387,grad_norm: 0.9999989579599636, iteration: 135218
loss: 0.996610701084137,grad_norm: 0.9999992415858153, iteration: 135219
loss: 1.0112125873565674,grad_norm: 0.9902683589759544, iteration: 135220
loss: 1.0094231367111206,grad_norm: 0.9313612965315661, iteration: 135221
loss: 1.004663109779358,grad_norm: 0.9999992554614895, iteration: 135222
loss: 1.0109467506408691,grad_norm: 0.9999992768723847, iteration: 135223
loss: 0.9512518644332886,grad_norm: 0.8090877195604932, iteration: 135224
loss: 1.0149606466293335,grad_norm: 0.8831841285806359, iteration: 135225
loss: 0.9936462044715881,grad_norm: 0.9999991297713244, iteration: 135226
loss: 0.9566600918769836,grad_norm: 0.9999990631039889, iteration: 135227
loss: 1.0297802686691284,grad_norm: 0.9360047554879698, iteration: 135228
loss: 0.9846898317337036,grad_norm: 0.9999989969540873, iteration: 135229
loss: 0.9729593396186829,grad_norm: 0.9347391034858317, iteration: 135230
loss: 0.9731748700141907,grad_norm: 0.9999991779457638, iteration: 135231
loss: 0.9969136714935303,grad_norm: 0.9407852900833731, iteration: 135232
loss: 0.9745737910270691,grad_norm: 0.9877070393193603, iteration: 135233
loss: 1.0151087045669556,grad_norm: 0.9025614367738056, iteration: 135234
loss: 0.9722568988800049,grad_norm: 0.7570075010572369, iteration: 135235
loss: 1.0050528049468994,grad_norm: 0.9999991896702631, iteration: 135236
loss: 0.9863559603691101,grad_norm: 0.9075568091669057, iteration: 135237
loss: 0.9692554473876953,grad_norm: 0.9466279423351193, iteration: 135238
loss: 1.0090514421463013,grad_norm: 0.9999990936164204, iteration: 135239
loss: 0.9969314932823181,grad_norm: 0.9999990990267792, iteration: 135240
loss: 1.0098551511764526,grad_norm: 0.8496408817624448, iteration: 135241
loss: 1.012467861175537,grad_norm: 0.9999992003047491, iteration: 135242
loss: 1.0165573358535767,grad_norm: 0.9999990831266534, iteration: 135243
loss: 1.0148913860321045,grad_norm: 0.9999990937450501, iteration: 135244
loss: 1.0015246868133545,grad_norm: 0.9999991985502337, iteration: 135245
loss: 1.0285180807113647,grad_norm: 0.9999990342125425, iteration: 135246
loss: 1.0070106983184814,grad_norm: 0.9810292624183521, iteration: 135247
loss: 1.020943522453308,grad_norm: 0.9466963886043337, iteration: 135248
loss: 1.034417748451233,grad_norm: 0.8875571371449121, iteration: 135249
loss: 0.9972603917121887,grad_norm: 0.9999991772897248, iteration: 135250
loss: 0.9834673404693604,grad_norm: 0.9999991931964639, iteration: 135251
loss: 0.9628281593322754,grad_norm: 0.999999202017857, iteration: 135252
loss: 1.01204514503479,grad_norm: 0.9999990419502935, iteration: 135253
loss: 1.004379153251648,grad_norm: 0.8791992102790116, iteration: 135254
loss: 1.087508201599121,grad_norm: 0.9999996566723358, iteration: 135255
loss: 1.0015190839767456,grad_norm: 0.9999992251535172, iteration: 135256
loss: 1.0059967041015625,grad_norm: 0.8363623552599315, iteration: 135257
loss: 1.006483554840088,grad_norm: 0.9410690149633986, iteration: 135258
loss: 1.0459972620010376,grad_norm: 0.9819993678244662, iteration: 135259
loss: 1.0119459629058838,grad_norm: 0.9870425763271057, iteration: 135260
loss: 1.0182812213897705,grad_norm: 0.999999164060336, iteration: 135261
loss: 0.9844287633895874,grad_norm: 0.9999991056031196, iteration: 135262
loss: 1.000977635383606,grad_norm: 0.9999991488821879, iteration: 135263
loss: 0.9414165616035461,grad_norm: 0.9939536769876729, iteration: 135264
loss: 0.9973965883255005,grad_norm: 0.9999992097683421, iteration: 135265
loss: 1.0170786380767822,grad_norm: 0.8936247807054055, iteration: 135266
loss: 0.9656121134757996,grad_norm: 0.99999920861905, iteration: 135267
loss: 1.004529595375061,grad_norm: 0.8860166527789397, iteration: 135268
loss: 0.998309850692749,grad_norm: 0.9999992701577216, iteration: 135269
loss: 0.9875274300575256,grad_norm: 0.9999992365730221, iteration: 135270
loss: 0.9652602672576904,grad_norm: 0.9513574028856324, iteration: 135271
loss: 1.0124409198760986,grad_norm: 0.9999990105509041, iteration: 135272
loss: 0.995846688747406,grad_norm: 0.9945423764478156, iteration: 135273
loss: 1.0208935737609863,grad_norm: 0.9611108232300329, iteration: 135274
loss: 1.0007777214050293,grad_norm: 0.9999990866183891, iteration: 135275
loss: 0.9746376872062683,grad_norm: 0.9843332379891742, iteration: 135276
loss: 1.02336585521698,grad_norm: 0.9999990509705731, iteration: 135277
loss: 1.0305018424987793,grad_norm: 0.9999990879354598, iteration: 135278
loss: 0.9839134216308594,grad_norm: 0.8639965034510708, iteration: 135279
loss: 1.0391459465026855,grad_norm: 0.9999991055540204, iteration: 135280
loss: 0.9915308356285095,grad_norm: 0.9999990159643877, iteration: 135281
loss: 0.9785664677619934,grad_norm: 0.9951563596348146, iteration: 135282
loss: 1.0001213550567627,grad_norm: 0.9999995121590421, iteration: 135283
loss: 1.0070658922195435,grad_norm: 0.9999990629456357, iteration: 135284
loss: 0.9878726005554199,grad_norm: 0.9999992213770083, iteration: 135285
loss: 0.9862948060035706,grad_norm: 0.8901220697509968, iteration: 135286
loss: 1.0184701681137085,grad_norm: 0.9999989963801318, iteration: 135287
loss: 1.0353771448135376,grad_norm: 0.9999999006508499, iteration: 135288
loss: 0.9706915616989136,grad_norm: 0.9632780578143677, iteration: 135289
loss: 1.019169807434082,grad_norm: 0.9999991739857763, iteration: 135290
loss: 0.9894172549247742,grad_norm: 0.9999990726946073, iteration: 135291
loss: 1.001535415649414,grad_norm: 0.999998918479185, iteration: 135292
loss: 1.036173701286316,grad_norm: 0.9999991012072403, iteration: 135293
loss: 1.0103498697280884,grad_norm: 0.9999991523088472, iteration: 135294
loss: 0.9876546859741211,grad_norm: 0.8919586932035987, iteration: 135295
loss: 0.9612550139427185,grad_norm: 0.9999991986643357, iteration: 135296
loss: 1.0422368049621582,grad_norm: 0.9999993161519191, iteration: 135297
loss: 1.00534188747406,grad_norm: 0.9493926119568764, iteration: 135298
loss: 0.971785306930542,grad_norm: 0.9999992644921422, iteration: 135299
loss: 0.9846973419189453,grad_norm: 0.9999991039145756, iteration: 135300
loss: 0.9742211699485779,grad_norm: 0.9999991693956843, iteration: 135301
loss: 0.9673062562942505,grad_norm: 0.8835626932207553, iteration: 135302
loss: 0.9907370805740356,grad_norm: 0.9999989549468856, iteration: 135303
loss: 0.9717429280281067,grad_norm: 0.9999989321626602, iteration: 135304
loss: 0.9898054003715515,grad_norm: 0.9999991465782735, iteration: 135305
loss: 1.012073040008545,grad_norm: 0.8632493239869162, iteration: 135306
loss: 0.995744526386261,grad_norm: 0.9933102337584112, iteration: 135307
loss: 0.9832205772399902,grad_norm: 0.8034483105953886, iteration: 135308
loss: 1.0363414287567139,grad_norm: 0.9488695771624182, iteration: 135309
loss: 0.9934218525886536,grad_norm: 0.953952343152526, iteration: 135310
loss: 0.9706190228462219,grad_norm: 0.9999990132921025, iteration: 135311
loss: 0.9851280450820923,grad_norm: 0.9999991643485893, iteration: 135312
loss: 0.9803458452224731,grad_norm: 0.999999097012294, iteration: 135313
loss: 1.000648856163025,grad_norm: 0.9794807082620897, iteration: 135314
loss: 1.0287797451019287,grad_norm: 0.9999991065028346, iteration: 135315
loss: 1.0063153505325317,grad_norm: 0.9999992203178476, iteration: 135316
loss: 1.0149550437927246,grad_norm: 0.9999992566972858, iteration: 135317
loss: 1.0052412748336792,grad_norm: 0.9999991163281385, iteration: 135318
loss: 1.0059547424316406,grad_norm: 0.999999064388454, iteration: 135319
loss: 1.0024696588516235,grad_norm: 0.9254557334198279, iteration: 135320
loss: 0.9985045790672302,grad_norm: 0.9764131403550292, iteration: 135321
loss: 1.0367258787155151,grad_norm: 0.9999991003139918, iteration: 135322
loss: 1.0147384405136108,grad_norm: 0.9732632686287739, iteration: 135323
loss: 0.9985764026641846,grad_norm: 0.9999992128385409, iteration: 135324
loss: 0.9982729554176331,grad_norm: 0.9999992776134301, iteration: 135325
loss: 0.984464168548584,grad_norm: 0.9999992240974924, iteration: 135326
loss: 0.9456701874732971,grad_norm: 0.9616769410403794, iteration: 135327
loss: 0.9870920181274414,grad_norm: 0.9999991528228287, iteration: 135328
loss: 1.0402213335037231,grad_norm: 0.9999990819255068, iteration: 135329
loss: 1.0272024869918823,grad_norm: 0.9999990718895935, iteration: 135330
loss: 0.9680002331733704,grad_norm: 0.9999992607310703, iteration: 135331
loss: 1.004472255706787,grad_norm: 0.8650778872234207, iteration: 135332
loss: 1.0329920053482056,grad_norm: 0.9999991120363717, iteration: 135333
loss: 0.9945509433746338,grad_norm: 0.9999992052317903, iteration: 135334
loss: 0.9734806418418884,grad_norm: 0.9999991434240532, iteration: 135335
loss: 1.0230275392532349,grad_norm: 0.9153818823507501, iteration: 135336
loss: 0.9918684363365173,grad_norm: 0.9999991303796839, iteration: 135337
loss: 0.9504721164703369,grad_norm: 0.9999990847050677, iteration: 135338
loss: 1.002131462097168,grad_norm: 0.8726833806177848, iteration: 135339
loss: 1.0778602361679077,grad_norm: 0.9999997862913373, iteration: 135340
loss: 1.0265631675720215,grad_norm: 0.9999992304811965, iteration: 135341
loss: 0.98538738489151,grad_norm: 0.999999902648093, iteration: 135342
loss: 1.0406242609024048,grad_norm: 0.9999996856170194, iteration: 135343
loss: 0.9801570773124695,grad_norm: 0.9999991657366756, iteration: 135344
loss: 0.9796714782714844,grad_norm: 0.9530343151280085, iteration: 135345
loss: 0.9810761213302612,grad_norm: 0.9999991316800881, iteration: 135346
loss: 1.069924235343933,grad_norm: 0.9999990515390186, iteration: 135347
loss: 0.9805244207382202,grad_norm: 0.8410953983627908, iteration: 135348
loss: 1.0007702112197876,grad_norm: 0.9999991121807904, iteration: 135349
loss: 0.9929039478302002,grad_norm: 0.9999990212624547, iteration: 135350
loss: 1.0050549507141113,grad_norm: 0.9999990407616728, iteration: 135351
loss: 1.0309579372406006,grad_norm: 0.9999992009024338, iteration: 135352
loss: 0.9757674932479858,grad_norm: 0.9999991863327629, iteration: 135353
loss: 1.0202856063842773,grad_norm: 0.9829563425547186, iteration: 135354
loss: 1.0113728046417236,grad_norm: 0.9999990106725771, iteration: 135355
loss: 1.0272632837295532,grad_norm: 0.9999990901394126, iteration: 135356
loss: 1.033730387687683,grad_norm: 0.9999990843488801, iteration: 135357
loss: 1.0249649286270142,grad_norm: 0.9999993660833012, iteration: 135358
loss: 1.0143330097198486,grad_norm: 0.9999989924157267, iteration: 135359
loss: 0.9840297698974609,grad_norm: 0.9999992530351658, iteration: 135360
loss: 1.002338171005249,grad_norm: 0.999999318674262, iteration: 135361
loss: 0.9782916903495789,grad_norm: 0.9654596819987457, iteration: 135362
loss: 0.973297119140625,grad_norm: 0.984985002084208, iteration: 135363
loss: 0.9628020524978638,grad_norm: 0.9999992793414078, iteration: 135364
loss: 0.9703256487846375,grad_norm: 0.9577432221778447, iteration: 135365
loss: 0.9611005783081055,grad_norm: 0.999999171352907, iteration: 135366
loss: 0.9877021908760071,grad_norm: 0.9999989820478065, iteration: 135367
loss: 0.9565187692642212,grad_norm: 0.9999996756916814, iteration: 135368
loss: 0.9892691969871521,grad_norm: 0.9999990840033182, iteration: 135369
loss: 0.989804208278656,grad_norm: 0.9999991443526098, iteration: 135370
loss: 0.9995722770690918,grad_norm: 0.9999992110483905, iteration: 135371
loss: 1.0652482509613037,grad_norm: 0.9999994143376401, iteration: 135372
loss: 1.030074954032898,grad_norm: 0.9999992759271843, iteration: 135373
loss: 0.964630663394928,grad_norm: 0.9270294123526481, iteration: 135374
loss: 0.9723211526870728,grad_norm: 0.9678954036048274, iteration: 135375
loss: 1.0407065153121948,grad_norm: 0.9999990844205944, iteration: 135376
loss: 0.978496789932251,grad_norm: 0.9999991994519956, iteration: 135377
loss: 1.0048969984054565,grad_norm: 0.9527946369322281, iteration: 135378
loss: 1.0110608339309692,grad_norm: 0.8335778533337853, iteration: 135379
loss: 0.9849885106086731,grad_norm: 0.9999990235926248, iteration: 135380
loss: 1.038519024848938,grad_norm: 0.9999992845680535, iteration: 135381
loss: 0.9877042770385742,grad_norm: 0.9999992071294127, iteration: 135382
loss: 1.0607595443725586,grad_norm: 0.999999376063557, iteration: 135383
loss: 1.1273056268692017,grad_norm: 0.9999991929333277, iteration: 135384
loss: 1.0418344736099243,grad_norm: 0.992154123048531, iteration: 135385
loss: 0.9882477521896362,grad_norm: 0.9999992924832689, iteration: 135386
loss: 1.0066224336624146,grad_norm: 0.8612160488854155, iteration: 135387
loss: 1.049573302268982,grad_norm: 0.9999990919927043, iteration: 135388
loss: 1.123532772064209,grad_norm: 0.9999995405825807, iteration: 135389
loss: 0.9604964852333069,grad_norm: 0.9999991614220899, iteration: 135390
loss: 1.0052160024642944,grad_norm: 0.9999991166229641, iteration: 135391
loss: 1.1415854692459106,grad_norm: 0.999999414248524, iteration: 135392
loss: 0.9970964193344116,grad_norm: 0.9477010620045657, iteration: 135393
loss: 1.024414300918579,grad_norm: 0.9999992618213659, iteration: 135394
loss: 0.9617476463317871,grad_norm: 0.9999990377658913, iteration: 135395
loss: 1.0292491912841797,grad_norm: 0.8847369631345606, iteration: 135396
loss: 1.0158815383911133,grad_norm: 0.9999992646145187, iteration: 135397
loss: 1.0300523042678833,grad_norm: 0.9505549979422804, iteration: 135398
loss: 0.9934446811676025,grad_norm: 0.9994701595960804, iteration: 135399
loss: 1.0427815914154053,grad_norm: 0.9999992639815491, iteration: 135400
loss: 1.0258368253707886,grad_norm: 0.9803149165517003, iteration: 135401
loss: 1.039611577987671,grad_norm: 0.9999989757482837, iteration: 135402
loss: 1.0381263494491577,grad_norm: 0.9493000131972225, iteration: 135403
loss: 1.022256851196289,grad_norm: 0.9999992568691135, iteration: 135404
loss: 1.0050432682037354,grad_norm: 0.7829225852600286, iteration: 135405
loss: 0.9873285889625549,grad_norm: 0.9503200692238469, iteration: 135406
loss: 1.0012660026550293,grad_norm: 0.9999993159790014, iteration: 135407
loss: 0.9866454601287842,grad_norm: 0.9980269448079404, iteration: 135408
loss: 1.0007106065750122,grad_norm: 0.9223645672094833, iteration: 135409
loss: 0.9839392304420471,grad_norm: 0.9999992459334591, iteration: 135410
loss: 0.9865543246269226,grad_norm: 0.9999994146586552, iteration: 135411
loss: 0.9915816783905029,grad_norm: 0.8660395882457511, iteration: 135412
loss: 1.030663013458252,grad_norm: 0.9999992822002076, iteration: 135413
loss: 1.0134923458099365,grad_norm: 0.9999993084475598, iteration: 135414
loss: 0.9966322183609009,grad_norm: 0.940125172505374, iteration: 135415
loss: 1.0374994277954102,grad_norm: 0.999999238629315, iteration: 135416
loss: 1.0395804643630981,grad_norm: 0.9999990463852994, iteration: 135417
loss: 0.9836961627006531,grad_norm: 0.9999990285406103, iteration: 135418
loss: 1.003859043121338,grad_norm: 0.9835934606938697, iteration: 135419
loss: 0.9811465740203857,grad_norm: 0.999999068366118, iteration: 135420
loss: 1.0077688694000244,grad_norm: 0.9999995227745938, iteration: 135421
loss: 1.027478575706482,grad_norm: 0.9999997883894206, iteration: 135422
loss: 1.0262995958328247,grad_norm: 0.9999991880365441, iteration: 135423
loss: 0.9921551942825317,grad_norm: 0.851228436819914, iteration: 135424
loss: 0.9919559955596924,grad_norm: 0.9259648922660265, iteration: 135425
loss: 1.0058554410934448,grad_norm: 0.9999992290737935, iteration: 135426
loss: 0.9974648952484131,grad_norm: 0.9720474480643551, iteration: 135427
loss: 0.953766942024231,grad_norm: 0.9999991243760814, iteration: 135428
loss: 0.9734380841255188,grad_norm: 0.9994866730845013, iteration: 135429
loss: 1.0288318395614624,grad_norm: 0.9763411615558873, iteration: 135430
loss: 0.959775447845459,grad_norm: 0.9999990235451238, iteration: 135431
loss: 0.9961022734642029,grad_norm: 0.9999991464668627, iteration: 135432
loss: 0.9906858801841736,grad_norm: 0.9999992183952938, iteration: 135433
loss: 0.9917117357254028,grad_norm: 0.8635431878886937, iteration: 135434
loss: 1.0287878513336182,grad_norm: 0.9999990951248579, iteration: 135435
loss: 1.0271416902542114,grad_norm: 0.9999991355440426, iteration: 135436
loss: 0.9847066402435303,grad_norm: 0.9999992304598769, iteration: 135437
loss: 1.0400269031524658,grad_norm: 0.9999991181514816, iteration: 135438
loss: 1.065354585647583,grad_norm: 0.9999989609804909, iteration: 135439
loss: 0.9747408628463745,grad_norm: 0.9999991578345822, iteration: 135440
loss: 1.015221118927002,grad_norm: 0.9999991527232255, iteration: 135441
loss: 0.9536066651344299,grad_norm: 0.9999990531687233, iteration: 135442
loss: 1.04264235496521,grad_norm: 0.9999990441957275, iteration: 135443
loss: 0.9617844820022583,grad_norm: 0.9999992232235337, iteration: 135444
loss: 1.015417218208313,grad_norm: 0.9999994401407083, iteration: 135445
loss: 0.9937760233879089,grad_norm: 0.9999990712896119, iteration: 135446
loss: 0.9738568663597107,grad_norm: 0.950381757606769, iteration: 135447
loss: 1.019351840019226,grad_norm: 0.9999989299977728, iteration: 135448
loss: 1.0167862176895142,grad_norm: 0.9999991702040528, iteration: 135449
loss: 1.0409481525421143,grad_norm: 0.9999997167368919, iteration: 135450
loss: 1.0254852771759033,grad_norm: 0.9999991987155515, iteration: 135451
loss: 1.014033555984497,grad_norm: 0.9999990173766081, iteration: 135452
loss: 0.9680747389793396,grad_norm: 0.835840326598129, iteration: 135453
loss: 0.994885265827179,grad_norm: 0.999999073449021, iteration: 135454
loss: 0.9971073865890503,grad_norm: 0.9999990747714761, iteration: 135455
loss: 1.1123818159103394,grad_norm: 0.9999994388758479, iteration: 135456
loss: 1.1497225761413574,grad_norm: 0.999999748964276, iteration: 135457
loss: 1.0039807558059692,grad_norm: 0.9435913906802316, iteration: 135458
loss: 1.085252046585083,grad_norm: 0.999999834371456, iteration: 135459
loss: 0.9659836888313293,grad_norm: 0.977961594179588, iteration: 135460
loss: 0.9939342141151428,grad_norm: 0.9999991961965906, iteration: 135461
loss: 0.9757203459739685,grad_norm: 0.9999992158120364, iteration: 135462
loss: 1.0178595781326294,grad_norm: 0.999999108096835, iteration: 135463
loss: 1.0152751207351685,grad_norm: 0.9999991448751321, iteration: 135464
loss: 0.9927933216094971,grad_norm: 0.9719452666711821, iteration: 135465
loss: 0.9899533987045288,grad_norm: 0.9999991177018853, iteration: 135466
loss: 1.0193828344345093,grad_norm: 0.9828047215882978, iteration: 135467
loss: 1.0128644704818726,grad_norm: 0.8465034623347003, iteration: 135468
loss: 0.9759515523910522,grad_norm: 0.999999120608219, iteration: 135469
loss: 1.0270121097564697,grad_norm: 0.999999136283383, iteration: 135470
loss: 1.0000050067901611,grad_norm: 0.9999992286061659, iteration: 135471
loss: 0.9764745235443115,grad_norm: 0.9999993295726888, iteration: 135472
loss: 0.9848397970199585,grad_norm: 0.9999995180900573, iteration: 135473
loss: 0.9978432655334473,grad_norm: 0.9999990479635268, iteration: 135474
loss: 0.9913928508758545,grad_norm: 0.8712735894192527, iteration: 135475
loss: 1.0341718196868896,grad_norm: 0.8333798023325261, iteration: 135476
loss: 1.026351809501648,grad_norm: 0.9475751086232137, iteration: 135477
loss: 0.9895997047424316,grad_norm: 0.9999990246425401, iteration: 135478
loss: 1.0023921728134155,grad_norm: 0.8799358026314354, iteration: 135479
loss: 0.9652389883995056,grad_norm: 0.9092397779877054, iteration: 135480
loss: 0.983819305896759,grad_norm: 0.9999991657332183, iteration: 135481
loss: 0.986382246017456,grad_norm: 0.9999992450290835, iteration: 135482
loss: 0.9945205450057983,grad_norm: 0.999999052244118, iteration: 135483
loss: 1.0446603298187256,grad_norm: 0.9999992530176236, iteration: 135484
loss: 1.0201332569122314,grad_norm: 0.9133320757167793, iteration: 135485
loss: 1.0105462074279785,grad_norm: 0.9999990894121462, iteration: 135486
loss: 1.013373613357544,grad_norm: 0.999999339311204, iteration: 135487
loss: 1.0449448823928833,grad_norm: 0.9999992958273415, iteration: 135488
loss: 1.0151147842407227,grad_norm: 0.9999989918467662, iteration: 135489
loss: 1.0082144737243652,grad_norm: 0.9658546339934336, iteration: 135490
loss: 1.0368199348449707,grad_norm: 0.9964943654711304, iteration: 135491
loss: 0.9938336610794067,grad_norm: 0.9999990959627703, iteration: 135492
loss: 0.9934300184249878,grad_norm: 0.9011345786119765, iteration: 135493
loss: 0.9766380190849304,grad_norm: 0.9999994334080949, iteration: 135494
loss: 1.0081435441970825,grad_norm: 0.9867094118770527, iteration: 135495
loss: 1.1267215013504028,grad_norm: 0.9999996811624932, iteration: 135496
loss: 0.9635536670684814,grad_norm: 0.9999990903552128, iteration: 135497
loss: 1.0111197233200073,grad_norm: 0.9187456046192435, iteration: 135498
loss: 1.02448570728302,grad_norm: 0.9999991826636165, iteration: 135499
loss: 1.0165318250656128,grad_norm: 0.9999992232929806, iteration: 135500
loss: 1.0149019956588745,grad_norm: 0.9578614337142433, iteration: 135501
loss: 1.011260986328125,grad_norm: 0.999999184700163, iteration: 135502
loss: 0.9750564098358154,grad_norm: 0.99999928724041, iteration: 135503
loss: 1.0340038537979126,grad_norm: 0.9999998571365989, iteration: 135504
loss: 1.0237442255020142,grad_norm: 0.8475547055617937, iteration: 135505
loss: 0.9845396280288696,grad_norm: 0.9517252633523334, iteration: 135506
loss: 0.9795140624046326,grad_norm: 0.9999997343344579, iteration: 135507
loss: 1.0481195449829102,grad_norm: 0.8642646642255166, iteration: 135508
loss: 1.0027979612350464,grad_norm: 0.8787198515119162, iteration: 135509
loss: 0.983131468296051,grad_norm: 0.99999915836746, iteration: 135510
loss: 0.9917886853218079,grad_norm: 0.9999990131692807, iteration: 135511
loss: 1.0212246179580688,grad_norm: 0.9999991609820719, iteration: 135512
loss: 0.9952802658081055,grad_norm: 0.907020282132934, iteration: 135513
loss: 1.0118638277053833,grad_norm: 0.999999051537131, iteration: 135514
loss: 0.9997532963752747,grad_norm: 0.9999990093749044, iteration: 135515
loss: 0.9998776316642761,grad_norm: 0.9956620797446031, iteration: 135516
loss: 0.9854403138160706,grad_norm: 0.9999991696784322, iteration: 135517
loss: 0.9981964230537415,grad_norm: 0.9999991433602172, iteration: 135518
loss: 0.9770374894142151,grad_norm: 0.9999991183588147, iteration: 135519
loss: 0.9827931523323059,grad_norm: 0.9999990035226669, iteration: 135520
loss: 1.067638874053955,grad_norm: 0.9669671426492368, iteration: 135521
loss: 1.0167958736419678,grad_norm: 0.9999989857000379, iteration: 135522
loss: 0.9802279472351074,grad_norm: 0.9999990845897433, iteration: 135523
loss: 0.9673753380775452,grad_norm: 0.9999992171909713, iteration: 135524
loss: 0.9795735478401184,grad_norm: 0.9999991522170754, iteration: 135525
loss: 1.0209739208221436,grad_norm: 0.9974036139878728, iteration: 135526
loss: 0.9998247623443604,grad_norm: 0.9999990722570071, iteration: 135527
loss: 1.0006684064865112,grad_norm: 0.9999991207402288, iteration: 135528
loss: 0.9870341420173645,grad_norm: 0.9999993250472002, iteration: 135529
loss: 1.0053315162658691,grad_norm: 0.8500140365318455, iteration: 135530
loss: 0.9801291823387146,grad_norm: 0.8994075328386983, iteration: 135531
loss: 1.001715898513794,grad_norm: 0.8588408278751001, iteration: 135532
loss: 1.0183550119400024,grad_norm: 0.9999990801998477, iteration: 135533
loss: 0.9989367127418518,grad_norm: 0.9170488778340581, iteration: 135534
loss: 0.9755200147628784,grad_norm: 0.9999992253307254, iteration: 135535
loss: 1.0379406213760376,grad_norm: 0.999999922395894, iteration: 135536
loss: 1.0273044109344482,grad_norm: 0.9999990713038083, iteration: 135537
loss: 1.007685899734497,grad_norm: 0.9999993030770933, iteration: 135538
loss: 0.98211669921875,grad_norm: 0.9999991536945887, iteration: 135539
loss: 1.0338114500045776,grad_norm: 0.99999912357806, iteration: 135540
loss: 0.9960447549819946,grad_norm: 0.9731607656665872, iteration: 135541
loss: 1.0052151679992676,grad_norm: 0.9999991304113928, iteration: 135542
loss: 1.0195127725601196,grad_norm: 0.9999991037952396, iteration: 135543
loss: 0.9897006154060364,grad_norm: 0.8975261155225492, iteration: 135544
loss: 0.9972995519638062,grad_norm: 0.9999990443445617, iteration: 135545
loss: 1.0115077495574951,grad_norm: 0.9807526090496396, iteration: 135546
loss: 1.014545202255249,grad_norm: 0.915362908659454, iteration: 135547
loss: 1.0087721347808838,grad_norm: 0.9062915474527755, iteration: 135548
loss: 0.9773679971694946,grad_norm: 0.8916759711439041, iteration: 135549
loss: 1.006768822669983,grad_norm: 0.9742983289923038, iteration: 135550
loss: 0.9980871677398682,grad_norm: 0.9999992630466567, iteration: 135551
loss: 1.0157967805862427,grad_norm: 0.999999115401737, iteration: 135552
loss: 0.9935535192489624,grad_norm: 0.9999993214907348, iteration: 135553
loss: 0.972826361656189,grad_norm: 0.8316269380424655, iteration: 135554
loss: 0.9739405512809753,grad_norm: 0.9776723595353616, iteration: 135555
loss: 1.0056633949279785,grad_norm: 0.9519046920425656, iteration: 135556
loss: 0.9858450889587402,grad_norm: 0.9999991762670423, iteration: 135557
loss: 0.9879493117332458,grad_norm: 0.7692584474214085, iteration: 135558
loss: 0.9983510971069336,grad_norm: 0.9999990991063518, iteration: 135559
loss: 1.0452356338500977,grad_norm: 0.9999992019359738, iteration: 135560
loss: 0.9942620992660522,grad_norm: 0.9153561771125921, iteration: 135561
loss: 1.0228430032730103,grad_norm: 0.9999991585891091, iteration: 135562
loss: 1.0169774293899536,grad_norm: 0.8512050911580016, iteration: 135563
loss: 0.9633523225784302,grad_norm: 0.9999990588766909, iteration: 135564
loss: 1.023780107498169,grad_norm: 0.9999989939432136, iteration: 135565
loss: 1.0424566268920898,grad_norm: 0.9135924474389, iteration: 135566
loss: 1.024397373199463,grad_norm: 0.9999992329908473, iteration: 135567
loss: 1.0216331481933594,grad_norm: 0.989205279741955, iteration: 135568
loss: 1.0062029361724854,grad_norm: 0.9488114339350511, iteration: 135569
loss: 1.0093684196472168,grad_norm: 0.9999990138405421, iteration: 135570
loss: 1.005002498626709,grad_norm: 0.9999991627416818, iteration: 135571
loss: 0.9772856831550598,grad_norm: 0.9999990407341927, iteration: 135572
loss: 0.9891207218170166,grad_norm: 0.9999990653734666, iteration: 135573
loss: 0.9887431263923645,grad_norm: 0.999999076471121, iteration: 135574
loss: 0.9690967798233032,grad_norm: 0.9999991688646749, iteration: 135575
loss: 0.9727450013160706,grad_norm: 0.9622410051443501, iteration: 135576
loss: 1.0067681074142456,grad_norm: 0.9999991682348698, iteration: 135577
loss: 0.9865830540657043,grad_norm: 0.9999990391794362, iteration: 135578
loss: 1.010783076286316,grad_norm: 0.9999991245385711, iteration: 135579
loss: 0.9655539393424988,grad_norm: 0.935050246234685, iteration: 135580
loss: 0.9990639090538025,grad_norm: 0.9445837176606066, iteration: 135581
loss: 1.0376198291778564,grad_norm: 0.9999992913217326, iteration: 135582
loss: 0.979953944683075,grad_norm: 0.9665515115012832, iteration: 135583
loss: 1.006410002708435,grad_norm: 0.9999990553803503, iteration: 135584
loss: 1.0059541463851929,grad_norm: 0.9999990185081324, iteration: 135585
loss: 1.0398585796356201,grad_norm: 0.9999994913789736, iteration: 135586
loss: 0.9987221360206604,grad_norm: 0.9313266368836783, iteration: 135587
loss: 0.9797849655151367,grad_norm: 0.9551219689213349, iteration: 135588
loss: 1.047866702079773,grad_norm: 0.9999998062669745, iteration: 135589
loss: 1.029098629951477,grad_norm: 0.9999990156129295, iteration: 135590
loss: 0.9920872449874878,grad_norm: 0.9999990460248223, iteration: 135591
loss: 0.9878755807876587,grad_norm: 0.999999186880902, iteration: 135592
loss: 0.9960036873817444,grad_norm: 0.9999992329992677, iteration: 135593
loss: 0.9809977412223816,grad_norm: 0.9999992244837074, iteration: 135594
loss: 1.017472743988037,grad_norm: 0.9999992554028423, iteration: 135595
loss: 0.9934564828872681,grad_norm: 0.9922106648838592, iteration: 135596
loss: 1.0018975734710693,grad_norm: 0.9999991651097316, iteration: 135597
loss: 0.9772337079048157,grad_norm: 0.9999992352523382, iteration: 135598
loss: 0.9898021817207336,grad_norm: 0.8039443635361412, iteration: 135599
loss: 0.9942070245742798,grad_norm: 0.999999044359799, iteration: 135600
loss: 1.0068604946136475,grad_norm: 0.8257792283370684, iteration: 135601
loss: 1.0065268278121948,grad_norm: 0.9999995104430376, iteration: 135602
loss: 1.0408923625946045,grad_norm: 0.9999991847457743, iteration: 135603
loss: 0.990706741809845,grad_norm: 0.9999991029772269, iteration: 135604
loss: 1.0051581859588623,grad_norm: 0.9999990397988672, iteration: 135605
loss: 0.9831583499908447,grad_norm: 0.9999992174342427, iteration: 135606
loss: 0.9976930022239685,grad_norm: 0.9543875748254868, iteration: 135607
loss: 1.0017012357711792,grad_norm: 0.9402659799486603, iteration: 135608
loss: 1.0026440620422363,grad_norm: 0.9794626346589114, iteration: 135609
loss: 0.9687362313270569,grad_norm: 0.9376957973915981, iteration: 135610
loss: 1.021873950958252,grad_norm: 0.9999994112571413, iteration: 135611
loss: 1.0336244106292725,grad_norm: 0.9862851317070328, iteration: 135612
loss: 0.9886640310287476,grad_norm: 0.9999992163085304, iteration: 135613
loss: 1.0209927558898926,grad_norm: 0.9999992512418687, iteration: 135614
loss: 1.013092041015625,grad_norm: 0.9813489858451756, iteration: 135615
loss: 0.9990718364715576,grad_norm: 0.9999991187270352, iteration: 135616
loss: 0.9784327745437622,grad_norm: 0.9863553897459838, iteration: 135617
loss: 1.0348336696624756,grad_norm: 0.8756402639613008, iteration: 135618
loss: 0.9879370331764221,grad_norm: 0.9999990413169539, iteration: 135619
loss: 1.0014363527297974,grad_norm: 0.9999992062093482, iteration: 135620
loss: 0.9598650336265564,grad_norm: 0.9999992478285369, iteration: 135621
loss: 1.0221498012542725,grad_norm: 0.999999599220107, iteration: 135622
loss: 1.0062956809997559,grad_norm: 0.9999993151559932, iteration: 135623
loss: 0.9890713095664978,grad_norm: 0.999998999736567, iteration: 135624
loss: 1.028988242149353,grad_norm: 0.9999997927859509, iteration: 135625
loss: 0.9690762162208557,grad_norm: 0.9999992127884524, iteration: 135626
loss: 1.018808364868164,grad_norm: 0.9131401948292174, iteration: 135627
loss: 1.0259634256362915,grad_norm: 0.9647992865379071, iteration: 135628
loss: 1.0347130298614502,grad_norm: 0.9554279198200891, iteration: 135629
loss: 1.0036898851394653,grad_norm: 0.9999990986628876, iteration: 135630
loss: 0.9972386360168457,grad_norm: 0.8978407973442437, iteration: 135631
loss: 1.0051660537719727,grad_norm: 0.8034765898965693, iteration: 135632
loss: 1.025118350982666,grad_norm: 0.9999994720881442, iteration: 135633
loss: 0.9960092306137085,grad_norm: 0.9999991356163835, iteration: 135634
loss: 1.027937412261963,grad_norm: 0.974798034274196, iteration: 135635
loss: 1.0112558603286743,grad_norm: 0.9999993280979571, iteration: 135636
loss: 0.9822497367858887,grad_norm: 0.9999991182153627, iteration: 135637
loss: 1.0103684663772583,grad_norm: 0.962797477050994, iteration: 135638
loss: 1.00424325466156,grad_norm: 0.8735064322161564, iteration: 135639
loss: 1.003793716430664,grad_norm: 0.9999990176942735, iteration: 135640
loss: 1.0166090726852417,grad_norm: 0.8861262010825034, iteration: 135641
loss: 1.0176485776901245,grad_norm: 0.9999992850064127, iteration: 135642
loss: 0.9597616195678711,grad_norm: 0.9999990455303258, iteration: 135643
loss: 1.005509614944458,grad_norm: 0.9015643777804963, iteration: 135644
loss: 1.1217788457870483,grad_norm: 0.9521127332490705, iteration: 135645
loss: 1.0170807838439941,grad_norm: 0.999999171813274, iteration: 135646
loss: 0.9643787741661072,grad_norm: 0.9736909751832908, iteration: 135647
loss: 0.9935823082923889,grad_norm: 0.9999990678355739, iteration: 135648
loss: 0.9746707677841187,grad_norm: 0.9776131197818528, iteration: 135649
loss: 0.9350485801696777,grad_norm: 0.9999993161065703, iteration: 135650
loss: 0.9919754266738892,grad_norm: 0.9999992484286222, iteration: 135651
loss: 1.0123249292373657,grad_norm: 0.8862013770793176, iteration: 135652
loss: 1.0245758295059204,grad_norm: 0.8151823867944098, iteration: 135653
loss: 1.0101336240768433,grad_norm: 0.9999990284711374, iteration: 135654
loss: 0.9977051019668579,grad_norm: 0.9999992118760295, iteration: 135655
loss: 0.9571418166160583,grad_norm: 0.9999992596762282, iteration: 135656
loss: 1.011169195175171,grad_norm: 0.9999991353693356, iteration: 135657
loss: 1.0298346281051636,grad_norm: 0.9999995760496473, iteration: 135658
loss: 0.9582927823066711,grad_norm: 0.9999991125040651, iteration: 135659
loss: 1.0165399312973022,grad_norm: 0.9999991830969377, iteration: 135660
loss: 1.0011554956436157,grad_norm: 0.9999990205665669, iteration: 135661
loss: 1.0401854515075684,grad_norm: 0.940319895120218, iteration: 135662
loss: 1.0071481466293335,grad_norm: 0.9999991302743507, iteration: 135663
loss: 0.9800506234169006,grad_norm: 0.9813076137289956, iteration: 135664
loss: 1.0141595602035522,grad_norm: 0.9999992338054953, iteration: 135665
loss: 1.038223385810852,grad_norm: 0.8612719458044276, iteration: 135666
loss: 0.9988216757774353,grad_norm: 0.9143517978847644, iteration: 135667
loss: 1.0298699140548706,grad_norm: 1.0000000241577787, iteration: 135668
loss: 0.998487651348114,grad_norm: 0.9999990619401518, iteration: 135669
loss: 0.9916187524795532,grad_norm: 0.9999991102593921, iteration: 135670
loss: 1.0044819116592407,grad_norm: 0.9999990938323265, iteration: 135671
loss: 0.9892339110374451,grad_norm: 0.9907644536134047, iteration: 135672
loss: 1.02274751663208,grad_norm: 0.999999076144259, iteration: 135673
loss: 0.9688650965690613,grad_norm: 0.9999991699707609, iteration: 135674
loss: 0.9514917135238647,grad_norm: 0.9880024650044608, iteration: 135675
loss: 0.9972355961799622,grad_norm: 0.9590258507297481, iteration: 135676
loss: 1.0012668371200562,grad_norm: 0.9999990835291881, iteration: 135677
loss: 0.9873746633529663,grad_norm: 0.9999990674690467, iteration: 135678
loss: 0.9840137958526611,grad_norm: 0.9999990718974361, iteration: 135679
loss: 1.0207394361495972,grad_norm: 0.999999015463645, iteration: 135680
loss: 0.9726327061653137,grad_norm: 0.9999992431469367, iteration: 135681
loss: 1.0093978643417358,grad_norm: 0.9625160283873873, iteration: 135682
loss: 1.0146379470825195,grad_norm: 0.9999989468030339, iteration: 135683
loss: 1.0165293216705322,grad_norm: 0.9999990418112565, iteration: 135684
loss: 0.9700343608856201,grad_norm: 0.9806324720248212, iteration: 135685
loss: 1.0200402736663818,grad_norm: 0.9373157927741255, iteration: 135686
loss: 1.035220980644226,grad_norm: 0.9999992323026049, iteration: 135687
loss: 1.0218037366867065,grad_norm: 0.9039866452893132, iteration: 135688
loss: 0.9944185614585876,grad_norm: 0.999999207425585, iteration: 135689
loss: 1.008393406867981,grad_norm: 0.9999992173814387, iteration: 135690
loss: 1.0191175937652588,grad_norm: 0.9762232037897396, iteration: 135691
loss: 0.9872786402702332,grad_norm: 0.9999991020700696, iteration: 135692
loss: 1.0170989036560059,grad_norm: 0.9999992984009011, iteration: 135693
loss: 0.9843178391456604,grad_norm: 0.9999991375177542, iteration: 135694
loss: 0.9953973293304443,grad_norm: 0.9999990724801506, iteration: 135695
loss: 1.029533863067627,grad_norm: 0.9999989953456558, iteration: 135696
loss: 1.0069266557693481,grad_norm: 0.8935731869868974, iteration: 135697
loss: 0.9763079881668091,grad_norm: 0.9999998756079899, iteration: 135698
loss: 1.017886996269226,grad_norm: 0.9999991403729642, iteration: 135699
loss: 1.0310760736465454,grad_norm: 0.956096638561567, iteration: 135700
loss: 1.0092631578445435,grad_norm: 0.9728740509311933, iteration: 135701
loss: 1.0108327865600586,grad_norm: 0.999999220765616, iteration: 135702
loss: 0.9551043510437012,grad_norm: 0.9797177746925565, iteration: 135703
loss: 0.9767473340034485,grad_norm: 0.9999991444689094, iteration: 135704
loss: 0.9817358255386353,grad_norm: 0.9999992449838381, iteration: 135705
loss: 0.9971794486045837,grad_norm: 0.9999991210050271, iteration: 135706
loss: 1.022994041442871,grad_norm: 0.9812213225587101, iteration: 135707
loss: 1.005035161972046,grad_norm: 0.9908998848038328, iteration: 135708
loss: 0.9894902110099792,grad_norm: 0.9999992077105729, iteration: 135709
loss: 0.9938894510269165,grad_norm: 0.89101787974225, iteration: 135710
loss: 0.9953285455703735,grad_norm: 0.950331089350396, iteration: 135711
loss: 0.9687419533729553,grad_norm: 0.9999990726913226, iteration: 135712
loss: 0.986595630645752,grad_norm: 0.8451320548261466, iteration: 135713
loss: 1.0362896919250488,grad_norm: 0.999999192632914, iteration: 135714
loss: 1.0303893089294434,grad_norm: 0.9999992598592603, iteration: 135715
loss: 0.9819921255111694,grad_norm: 0.9374087082894026, iteration: 135716
loss: 1.0188180208206177,grad_norm: 0.9999991403351519, iteration: 135717
loss: 0.9734424352645874,grad_norm: 0.9999991618416423, iteration: 135718
loss: 1.0021077394485474,grad_norm: 0.9999990515812368, iteration: 135719
loss: 1.0053975582122803,grad_norm: 0.9999991843724375, iteration: 135720
loss: 1.0175780057907104,grad_norm: 0.9999990919673049, iteration: 135721
loss: 0.995185375213623,grad_norm: 0.8618359790936331, iteration: 135722
loss: 0.995879054069519,grad_norm: 0.999999260972696, iteration: 135723
loss: 1.0303294658660889,grad_norm: 0.9999992339483093, iteration: 135724
loss: 0.9951073527336121,grad_norm: 0.9999990502561207, iteration: 135725
loss: 0.9945240020751953,grad_norm: 0.9262684762830364, iteration: 135726
loss: 0.998638391494751,grad_norm: 0.9999990719197902, iteration: 135727
loss: 1.012528657913208,grad_norm: 0.9999990693870711, iteration: 135728
loss: 1.0368270874023438,grad_norm: 0.9999991527765248, iteration: 135729
loss: 1.0137666463851929,grad_norm: 0.9999991010247476, iteration: 135730
loss: 0.9910148978233337,grad_norm: 0.8587395780318031, iteration: 135731
loss: 1.026033878326416,grad_norm: 0.99999912604043, iteration: 135732
loss: 1.0160577297210693,grad_norm: 0.9999990647383187, iteration: 135733
loss: 0.9519364833831787,grad_norm: 0.9390012682649692, iteration: 135734
loss: 0.9832791090011597,grad_norm: 0.9935124481760155, iteration: 135735
loss: 0.9757552146911621,grad_norm: 0.9999992722070502, iteration: 135736
loss: 1.0052200555801392,grad_norm: 0.9999991947939384, iteration: 135737
loss: 0.9911708235740662,grad_norm: 0.9999991562542672, iteration: 135738
loss: 0.9745200872421265,grad_norm: 0.9999992137917912, iteration: 135739
loss: 1.0377124547958374,grad_norm: 0.9655661056088277, iteration: 135740
loss: 1.0383563041687012,grad_norm: 0.9999992220289287, iteration: 135741
loss: 1.201438546180725,grad_norm: 0.9999998842895289, iteration: 135742
loss: 0.9915271401405334,grad_norm: 0.9999990461348252, iteration: 135743
loss: 0.9808955788612366,grad_norm: 0.9999993770515844, iteration: 135744
loss: 1.2104846239089966,grad_norm: 0.9999995730527287, iteration: 135745
loss: 0.9585415124893188,grad_norm: 0.9999990348713991, iteration: 135746
loss: 0.9966612458229065,grad_norm: 0.9999992245720037, iteration: 135747
loss: 0.9973828792572021,grad_norm: 0.9999992842548778, iteration: 135748
loss: 0.9968249797821045,grad_norm: 0.9999991127860631, iteration: 135749
loss: 1.0247236490249634,grad_norm: 0.9999990482569818, iteration: 135750
loss: 0.9892638325691223,grad_norm: 0.9999992118032883, iteration: 135751
loss: 0.9714614748954773,grad_norm: 0.9657997010163388, iteration: 135752
loss: 0.9641863107681274,grad_norm: 0.9999992111931462, iteration: 135753
loss: 0.9850007891654968,grad_norm: 0.9999991055313548, iteration: 135754
loss: 1.0275301933288574,grad_norm: 0.999999109542652, iteration: 135755
loss: 0.9930749535560608,grad_norm: 0.9999991207757316, iteration: 135756
loss: 1.0218207836151123,grad_norm: 0.888158044231956, iteration: 135757
loss: 1.020928144454956,grad_norm: 0.9999993153113785, iteration: 135758
loss: 1.0226218700408936,grad_norm: 0.9999991278449791, iteration: 135759
loss: 1.0252015590667725,grad_norm: 0.9999993653974909, iteration: 135760
loss: 1.0462701320648193,grad_norm: 0.999999532867104, iteration: 135761
loss: 0.9780158400535583,grad_norm: 0.8892558114366318, iteration: 135762
loss: 0.9944005012512207,grad_norm: 0.9999993059813155, iteration: 135763
loss: 1.004069447517395,grad_norm: 0.8280515419665678, iteration: 135764
loss: 1.0076568126678467,grad_norm: 0.967801022381675, iteration: 135765
loss: 0.9962112903594971,grad_norm: 0.9999991798789108, iteration: 135766
loss: 0.9820072650909424,grad_norm: 0.9886597726031885, iteration: 135767
loss: 0.9915730953216553,grad_norm: 0.9537555147556762, iteration: 135768
loss: 1.0264579057693481,grad_norm: 0.9999992349845986, iteration: 135769
loss: 0.9933704733848572,grad_norm: 0.9999992167689075, iteration: 135770
loss: 1.012631893157959,grad_norm: 0.9270275162617204, iteration: 135771
loss: 0.9823061227798462,grad_norm: 0.8587421442973464, iteration: 135772
loss: 1.008023738861084,grad_norm: 0.9999992195181817, iteration: 135773
loss: 1.0388102531433105,grad_norm: 0.9999991119813794, iteration: 135774
loss: 1.0632683038711548,grad_norm: 0.999999155099984, iteration: 135775
loss: 1.0123491287231445,grad_norm: 0.9293590162285624, iteration: 135776
loss: 1.0102944374084473,grad_norm: 0.8928612144896523, iteration: 135777
loss: 0.9654945135116577,grad_norm: 0.9999990919228093, iteration: 135778
loss: 1.020740270614624,grad_norm: 0.8826497658426261, iteration: 135779
loss: 0.996653139591217,grad_norm: 0.999998990867002, iteration: 135780
loss: 0.9929086565971375,grad_norm: 0.9263991587116044, iteration: 135781
loss: 1.0151468515396118,grad_norm: 0.9999993525777408, iteration: 135782
loss: 1.013934850692749,grad_norm: 0.9999992720150775, iteration: 135783
loss: 1.0120633840560913,grad_norm: 0.9999991108191312, iteration: 135784
loss: 1.0247998237609863,grad_norm: 0.9748283450779801, iteration: 135785
loss: 1.0068186521530151,grad_norm: 0.9887068381479327, iteration: 135786
loss: 0.9976733922958374,grad_norm: 0.9999992649590087, iteration: 135787
loss: 0.9911965131759644,grad_norm: 0.9999990680585845, iteration: 135788
loss: 1.0509885549545288,grad_norm: 0.9999991406241454, iteration: 135789
loss: 0.9850431680679321,grad_norm: 0.9999991317016977, iteration: 135790
loss: 0.9931349754333496,grad_norm: 0.9999993461781246, iteration: 135791
loss: 1.0898942947387695,grad_norm: 0.9999998430755798, iteration: 135792
loss: 1.0069425106048584,grad_norm: 0.9596263788424265, iteration: 135793
loss: 0.9754472374916077,grad_norm: 0.9853196732969056, iteration: 135794
loss: 1.019202470779419,grad_norm: 0.9999991803949171, iteration: 135795
loss: 1.0278303623199463,grad_norm: 0.9520011330175433, iteration: 135796
loss: 1.0037195682525635,grad_norm: 0.9788251980121346, iteration: 135797
loss: 0.9642388820648193,grad_norm: 0.9129637753213502, iteration: 135798
loss: 1.0182498693466187,grad_norm: 0.9999989899930065, iteration: 135799
loss: 1.0411272048950195,grad_norm: 0.9999992133762127, iteration: 135800
loss: 1.0083696842193604,grad_norm: 0.9999992319021273, iteration: 135801
loss: 1.0058668851852417,grad_norm: 0.9999991181162989, iteration: 135802
loss: 0.9637942910194397,grad_norm: 0.999999146551197, iteration: 135803
loss: 1.0472865104675293,grad_norm: 0.9207286790667288, iteration: 135804
loss: 1.0298266410827637,grad_norm: 0.9999990269049119, iteration: 135805
loss: 0.9845054745674133,grad_norm: 0.9202249028274926, iteration: 135806
loss: 1.0059961080551147,grad_norm: 0.9999989434990919, iteration: 135807
loss: 0.9909725189208984,grad_norm: 0.9736526609468206, iteration: 135808
loss: 0.9855461716651917,grad_norm: 0.9999991793403508, iteration: 135809
loss: 0.9871519207954407,grad_norm: 0.9999991003999935, iteration: 135810
loss: 1.0306267738342285,grad_norm: 0.9999991775186665, iteration: 135811
loss: 0.9910085201263428,grad_norm: 0.9999991702036087, iteration: 135812
loss: 0.9806542992591858,grad_norm: 0.999999057724688, iteration: 135813
loss: 1.0480061769485474,grad_norm: 0.9881082106042588, iteration: 135814
loss: 1.0075281858444214,grad_norm: 0.9999990873910891, iteration: 135815
loss: 1.0081781148910522,grad_norm: 0.9417069892268447, iteration: 135816
loss: 0.9639056324958801,grad_norm: 0.9061612522182982, iteration: 135817
loss: 1.0140974521636963,grad_norm: 0.9999991036334374, iteration: 135818
loss: 1.0165201425552368,grad_norm: 0.999999004145947, iteration: 135819
loss: 0.9490920305252075,grad_norm: 0.999999235488474, iteration: 135820
loss: 1.0111181735992432,grad_norm: 0.8939042294023563, iteration: 135821
loss: 0.9852586984634399,grad_norm: 0.9999991006489609, iteration: 135822
loss: 1.0012584924697876,grad_norm: 0.8779087764513679, iteration: 135823
loss: 1.0290660858154297,grad_norm: 0.9999993612344923, iteration: 135824
loss: 1.0016722679138184,grad_norm: 0.9271351191986794, iteration: 135825
loss: 1.0126978158950806,grad_norm: 0.9999994096759948, iteration: 135826
loss: 0.990138590335846,grad_norm: 0.9999992270797744, iteration: 135827
loss: 0.9981269240379333,grad_norm: 0.9999992356338463, iteration: 135828
loss: 1.0022128820419312,grad_norm: 0.8348115502316307, iteration: 135829
loss: 0.9891695976257324,grad_norm: 0.8609339744408909, iteration: 135830
loss: 0.9928281307220459,grad_norm: 0.9999993477345468, iteration: 135831
loss: 0.9898648262023926,grad_norm: 0.9999989750439064, iteration: 135832
loss: 1.0195162296295166,grad_norm: 0.9999991138644186, iteration: 135833
loss: 0.9917250275611877,grad_norm: 0.9936276337501283, iteration: 135834
loss: 1.0175793170928955,grad_norm: 0.999999115662385, iteration: 135835
loss: 1.016292691230774,grad_norm: 0.9999990546901019, iteration: 135836
loss: 1.0338530540466309,grad_norm: 0.9999992811577715, iteration: 135837
loss: 1.020613431930542,grad_norm: 0.9999991887547842, iteration: 135838
loss: 1.0122510194778442,grad_norm: 0.9573333153605665, iteration: 135839
loss: 0.9934712648391724,grad_norm: 0.9999990287553476, iteration: 135840
loss: 0.9811480045318604,grad_norm: 0.9981425556721285, iteration: 135841
loss: 1.0006804466247559,grad_norm: 0.9999991926779819, iteration: 135842
loss: 0.9974110126495361,grad_norm: 0.9999991394178271, iteration: 135843
loss: 0.9930346608161926,grad_norm: 0.9663970843224005, iteration: 135844
loss: 0.958401083946228,grad_norm: 0.9999991072048049, iteration: 135845
loss: 0.9662524461746216,grad_norm: 0.9796000739750784, iteration: 135846
loss: 1.0254313945770264,grad_norm: 0.9999990667997037, iteration: 135847
loss: 1.007643699645996,grad_norm: 0.9999991023517855, iteration: 135848
loss: 0.9807796478271484,grad_norm: 0.9481084353576036, iteration: 135849
loss: 1.005281925201416,grad_norm: 0.9999992324391362, iteration: 135850
loss: 1.0733104944229126,grad_norm: 0.9999994852724488, iteration: 135851
loss: 1.0272620916366577,grad_norm: 0.9999992005194044, iteration: 135852
loss: 0.9804335832595825,grad_norm: 0.999999419420269, iteration: 135853
loss: 0.9982945322990417,grad_norm: 0.8985990472709372, iteration: 135854
loss: 1.0073375701904297,grad_norm: 0.9823225179689715, iteration: 135855
loss: 0.9362214803695679,grad_norm: 0.922795999856135, iteration: 135856
loss: 0.992068886756897,grad_norm: 0.9073630330111355, iteration: 135857
loss: 0.9983395934104919,grad_norm: 0.9999991104941394, iteration: 135858
loss: 0.9987089037895203,grad_norm: 0.9999990736116, iteration: 135859
loss: 1.0246530771255493,grad_norm: 0.9543380159004513, iteration: 135860
loss: 0.9990244507789612,grad_norm: 0.8989955446473805, iteration: 135861
loss: 0.9983646869659424,grad_norm: 0.9813788879851778, iteration: 135862
loss: 0.9863786101341248,grad_norm: 0.9141349527792724, iteration: 135863
loss: 1.022508978843689,grad_norm: 0.9999990570158523, iteration: 135864
loss: 0.9817246198654175,grad_norm: 0.9999991533747733, iteration: 135865
loss: 0.9959068894386292,grad_norm: 0.9999991577845532, iteration: 135866
loss: 1.0513275861740112,grad_norm: 0.9999996568494086, iteration: 135867
loss: 1.0114067792892456,grad_norm: 0.999999053677631, iteration: 135868
loss: 1.0031157732009888,grad_norm: 0.9999990950942135, iteration: 135869
loss: 0.9781810641288757,grad_norm: 0.9637310264208259, iteration: 135870
loss: 1.0240843296051025,grad_norm: 0.9999993691116101, iteration: 135871
loss: 0.9952557682991028,grad_norm: 0.9999997999796139, iteration: 135872
loss: 1.00322425365448,grad_norm: 0.8819960894905593, iteration: 135873
loss: 0.9896456599235535,grad_norm: 0.9999992349886501, iteration: 135874
loss: 1.0278888940811157,grad_norm: 0.9999991384157999, iteration: 135875
loss: 0.989651083946228,grad_norm: 0.9999990935239207, iteration: 135876
loss: 1.0282742977142334,grad_norm: 0.9800103350655505, iteration: 135877
loss: 1.008268117904663,grad_norm: 0.9999992645476843, iteration: 135878
loss: 0.9629728198051453,grad_norm: 0.9999995544523881, iteration: 135879
loss: 1.0394518375396729,grad_norm: 0.9999992832469642, iteration: 135880
loss: 1.0191611051559448,grad_norm: 0.9781547168943642, iteration: 135881
loss: 0.9560863971710205,grad_norm: 0.8929092050198142, iteration: 135882
loss: 1.0367988348007202,grad_norm: 0.9999991281082716, iteration: 135883
loss: 1.044753909111023,grad_norm: 0.9999990983158814, iteration: 135884
loss: 0.9844141602516174,grad_norm: 0.9625649753646717, iteration: 135885
loss: 1.0241706371307373,grad_norm: 0.9081677188488104, iteration: 135886
loss: 0.9910239577293396,grad_norm: 0.8673894842242348, iteration: 135887
loss: 1.066933512687683,grad_norm: 0.9999994017885666, iteration: 135888
loss: 0.9846449494361877,grad_norm: 0.9999992149524574, iteration: 135889
loss: 0.9933486580848694,grad_norm: 0.9999992066120166, iteration: 135890
loss: 0.990273654460907,grad_norm: 0.971580422493229, iteration: 135891
loss: 1.010943055152893,grad_norm: 0.9140211493040326, iteration: 135892
loss: 1.0319381952285767,grad_norm: 0.9999999195246165, iteration: 135893
loss: 0.9589575529098511,grad_norm: 0.9999991235754192, iteration: 135894
loss: 1.0002793073654175,grad_norm: 0.7962546361434079, iteration: 135895
loss: 0.9738970398902893,grad_norm: 0.9999996497521056, iteration: 135896
loss: 0.9894378185272217,grad_norm: 0.9464248952500209, iteration: 135897
loss: 0.9919886589050293,grad_norm: 0.999999107335164, iteration: 135898
loss: 1.099420189857483,grad_norm: 0.9999996001910214, iteration: 135899
loss: 0.970152735710144,grad_norm: 0.9630196101967409, iteration: 135900
loss: 1.0259090662002563,grad_norm: 0.999999214032223, iteration: 135901
loss: 1.0064371824264526,grad_norm: 0.9999991447404554, iteration: 135902
loss: 1.0061635971069336,grad_norm: 0.9241231508908456, iteration: 135903
loss: 0.9495560526847839,grad_norm: 0.9999991787884086, iteration: 135904
loss: 1.054624080657959,grad_norm: 0.9999994426292532, iteration: 135905
loss: 1.0024703741073608,grad_norm: 0.999999191630634, iteration: 135906
loss: 1.0392099618911743,grad_norm: 1.0000000197939185, iteration: 135907
loss: 1.018394112586975,grad_norm: 0.9562836936447574, iteration: 135908
loss: 1.0101819038391113,grad_norm: 0.9595772750257867, iteration: 135909
loss: 0.9905198216438293,grad_norm: 0.9999989980521818, iteration: 135910
loss: 1.018018364906311,grad_norm: 0.9999991130462468, iteration: 135911
loss: 1.005829930305481,grad_norm: 0.9999991774793983, iteration: 135912
loss: 1.0216203927993774,grad_norm: 0.9999990985138572, iteration: 135913
loss: 1.0033464431762695,grad_norm: 0.9999990746862881, iteration: 135914
loss: 1.0158050060272217,grad_norm: 0.9999989689863217, iteration: 135915
loss: 1.0104286670684814,grad_norm: 0.9999992545225243, iteration: 135916
loss: 0.9661813974380493,grad_norm: 0.9484468554114887, iteration: 135917
loss: 0.9913047552108765,grad_norm: 0.9999991261918632, iteration: 135918
loss: 0.9913288354873657,grad_norm: 0.8432599879784242, iteration: 135919
loss: 1.0172629356384277,grad_norm: 0.9999994916493714, iteration: 135920
loss: 0.9448868036270142,grad_norm: 0.9999991336803226, iteration: 135921
loss: 1.0411272048950195,grad_norm: 0.9999999100069676, iteration: 135922
loss: 1.0164371728897095,grad_norm: 0.9999991489937544, iteration: 135923
loss: 1.0187939405441284,grad_norm: 0.9999991803120148, iteration: 135924
loss: 0.9606891870498657,grad_norm: 0.935341212228044, iteration: 135925
loss: 1.0198957920074463,grad_norm: 0.9999991364983238, iteration: 135926
loss: 1.0243353843688965,grad_norm: 0.9999991749123368, iteration: 135927
loss: 0.9360916018486023,grad_norm: 0.9999990295528728, iteration: 135928
loss: 1.0107102394104004,grad_norm: 0.9999992972963151, iteration: 135929
loss: 0.982312023639679,grad_norm: 0.9991422744820876, iteration: 135930
loss: 0.9973586797714233,grad_norm: 0.8331141992655439, iteration: 135931
loss: 1.0198053121566772,grad_norm: 0.9999993796371279, iteration: 135932
loss: 0.9852641820907593,grad_norm: 0.9999993008233669, iteration: 135933
loss: 1.0202792882919312,grad_norm: 0.9999988794280666, iteration: 135934
loss: 1.013593077659607,grad_norm: 0.8409927122586139, iteration: 135935
loss: 0.9543340802192688,grad_norm: 0.9999991531100407, iteration: 135936
loss: 0.995855987071991,grad_norm: 0.7883913985057799, iteration: 135937
loss: 0.9870695471763611,grad_norm: 0.9999991971943846, iteration: 135938
loss: 0.9968932271003723,grad_norm: 0.951265011339633, iteration: 135939
loss: 1.0238747596740723,grad_norm: 0.9999992515686381, iteration: 135940
loss: 1.0082478523254395,grad_norm: 0.9999993588298141, iteration: 135941
loss: 1.0083526372909546,grad_norm: 0.9777803140360761, iteration: 135942
loss: 0.988961398601532,grad_norm: 0.999999166370445, iteration: 135943
loss: 1.0054339170455933,grad_norm: 0.999999217536642, iteration: 135944
loss: 1.0505379438400269,grad_norm: 0.9999992228853948, iteration: 135945
loss: 0.9559214115142822,grad_norm: 0.9606881287950666, iteration: 135946
loss: 0.9896218776702881,grad_norm: 0.999999248157824, iteration: 135947
loss: 1.0121374130249023,grad_norm: 0.999999114468552, iteration: 135948
loss: 1.030739426612854,grad_norm: 0.9999991627259894, iteration: 135949
loss: 0.9832948446273804,grad_norm: 0.9999990627582837, iteration: 135950
loss: 1.0026144981384277,grad_norm: 0.9999994638598686, iteration: 135951
loss: 1.014223575592041,grad_norm: 0.9591252724998867, iteration: 135952
loss: 1.012994408607483,grad_norm: 0.9999991153876605, iteration: 135953
loss: 0.994670033454895,grad_norm: 0.9999993009774545, iteration: 135954
loss: 0.9644822478294373,grad_norm: 0.9999991323857416, iteration: 135955
loss: 0.9918384552001953,grad_norm: 0.9999995507071274, iteration: 135956
loss: 0.9738252758979797,grad_norm: 0.9999990327717431, iteration: 135957
loss: 0.9964642524719238,grad_norm: 0.9999991734249173, iteration: 135958
loss: 0.981862485408783,grad_norm: 0.981478041491201, iteration: 135959
loss: 0.9696399569511414,grad_norm: 0.9999992189055136, iteration: 135960
loss: 0.986075758934021,grad_norm: 0.9999990322921862, iteration: 135961
loss: 0.9818055629730225,grad_norm: 0.9999991787996863, iteration: 135962
loss: 0.988815188407898,grad_norm: 0.9999991180755724, iteration: 135963
loss: 0.9959980845451355,grad_norm: 0.999999214045363, iteration: 135964
loss: 0.9883885383605957,grad_norm: 0.9515447505179179, iteration: 135965
loss: 0.9878034591674805,grad_norm: 0.9999991509595357, iteration: 135966
loss: 0.9968162178993225,grad_norm: 0.9999990202375069, iteration: 135967
loss: 1.0135507583618164,grad_norm: 0.9999991198229495, iteration: 135968
loss: 0.9904610514640808,grad_norm: 0.9999992293967714, iteration: 135969
loss: 0.9877216815948486,grad_norm: 0.9733454406588704, iteration: 135970
loss: 1.0064165592193604,grad_norm: 0.9477453698136628, iteration: 135971
loss: 0.9993005990982056,grad_norm: 0.8651397318032671, iteration: 135972
loss: 1.0832561254501343,grad_norm: 0.999999830704863, iteration: 135973
loss: 0.9587564468383789,grad_norm: 0.999999319059608, iteration: 135974
loss: 1.005743145942688,grad_norm: 0.9999990808841723, iteration: 135975
loss: 0.9603608250617981,grad_norm: 0.9889230109369176, iteration: 135976
loss: 0.9676588773727417,grad_norm: 0.9999994296245367, iteration: 135977
loss: 1.009543538093567,grad_norm: 0.9999991514783019, iteration: 135978
loss: 1.0113635063171387,grad_norm: 0.9999993497619108, iteration: 135979
loss: 0.9726038575172424,grad_norm: 0.9510918970037535, iteration: 135980
loss: 1.0126932859420776,grad_norm: 0.9999991810264851, iteration: 135981
loss: 0.9920973181724548,grad_norm: 0.9999991652498585, iteration: 135982
loss: 0.9861315488815308,grad_norm: 0.9015181806750393, iteration: 135983
loss: 1.0556482076644897,grad_norm: 0.9999992352669145, iteration: 135984
loss: 1.0048211812973022,grad_norm: 0.8657976978472913, iteration: 135985
loss: 1.0498610734939575,grad_norm: 0.9999992731609849, iteration: 135986
loss: 0.9841688275337219,grad_norm: 0.9057611628757262, iteration: 135987
loss: 0.9833388924598694,grad_norm: 0.9999992247667047, iteration: 135988
loss: 1.0163530111312866,grad_norm: 0.9999991948904472, iteration: 135989
loss: 1.013587236404419,grad_norm: 0.9999991450062957, iteration: 135990
loss: 1.0022839307785034,grad_norm: 0.930210518601325, iteration: 135991
loss: 0.987575888633728,grad_norm: 0.9999990896326214, iteration: 135992
loss: 1.0029085874557495,grad_norm: 0.9353063837049004, iteration: 135993
loss: 1.0081809759140015,grad_norm: 0.9999993246286587, iteration: 135994
loss: 1.0450654029846191,grad_norm: 0.9999990062122057, iteration: 135995
loss: 1.0033583641052246,grad_norm: 0.9999992952896105, iteration: 135996
loss: 1.0697972774505615,grad_norm: 0.9999998072379346, iteration: 135997
loss: 1.02500581741333,grad_norm: 0.9999999092244973, iteration: 135998
loss: 1.018470048904419,grad_norm: 0.9236097431822567, iteration: 135999
loss: 0.9924107193946838,grad_norm: 0.9531842826455863, iteration: 136000
loss: 1.0103930234909058,grad_norm: 0.999999246202436, iteration: 136001
loss: 1.0444947481155396,grad_norm: 0.9644440176087714, iteration: 136002
loss: 0.9936564564704895,grad_norm: 0.936577623244418, iteration: 136003
loss: 0.9737017750740051,grad_norm: 0.9999989734030753, iteration: 136004
loss: 0.9738317728042603,grad_norm: 0.9091290635363163, iteration: 136005
loss: 1.0107104778289795,grad_norm: 0.9999992349248837, iteration: 136006
loss: 0.9916349053382874,grad_norm: 0.9999990224961784, iteration: 136007
loss: 1.02263343334198,grad_norm: 0.9999991430957966, iteration: 136008
loss: 1.01151704788208,grad_norm: 0.9092279590007328, iteration: 136009
loss: 0.9899531602859497,grad_norm: 0.8403835985603433, iteration: 136010
loss: 0.9846555590629578,grad_norm: 0.9999991509260138, iteration: 136011
loss: 0.9873037934303284,grad_norm: 0.9999989827765812, iteration: 136012
loss: 0.9950439929962158,grad_norm: 0.9999991471679083, iteration: 136013
loss: 1.0417592525482178,grad_norm: 0.9999992034944362, iteration: 136014
loss: 1.0291731357574463,grad_norm: 0.9999991461124124, iteration: 136015
loss: 1.03300940990448,grad_norm: 0.9999998786027717, iteration: 136016
loss: 1.0142771005630493,grad_norm: 0.926591457839629, iteration: 136017
loss: 1.0069407224655151,grad_norm: 0.9999990106367208, iteration: 136018
loss: 1.0139055252075195,grad_norm: 0.9999993900975023, iteration: 136019
loss: 1.0082257986068726,grad_norm: 0.999999119509317, iteration: 136020
loss: 1.053910732269287,grad_norm: 0.9999998069393906, iteration: 136021
loss: 0.9982001781463623,grad_norm: 0.9999992488506374, iteration: 136022
loss: 1.0076959133148193,grad_norm: 0.999998965953142, iteration: 136023
loss: 0.9710836410522461,grad_norm: 0.9436653092438145, iteration: 136024
loss: 1.022359848022461,grad_norm: 0.9879597700953691, iteration: 136025
loss: 0.9944249391555786,grad_norm: 0.9999990125952707, iteration: 136026
loss: 1.040360689163208,grad_norm: 0.995586778548373, iteration: 136027
loss: 1.0236438512802124,grad_norm: 0.9999990457127197, iteration: 136028
loss: 1.0171351432800293,grad_norm: 0.9999990162602038, iteration: 136029
loss: 0.9767104983329773,grad_norm: 0.9179440713212861, iteration: 136030
loss: 0.9891601204872131,grad_norm: 0.8971471320591704, iteration: 136031
loss: 1.007392168045044,grad_norm: 0.9999998454255966, iteration: 136032
loss: 1.0710407495498657,grad_norm: 0.9987276249195328, iteration: 136033
loss: 1.0484826564788818,grad_norm: 0.9999990359281029, iteration: 136034
loss: 0.9964758157730103,grad_norm: 0.9999990471597783, iteration: 136035
loss: 0.9961802363395691,grad_norm: 0.9761275952025379, iteration: 136036
loss: 1.037713646888733,grad_norm: 0.9999992849169064, iteration: 136037
loss: 1.027653455734253,grad_norm: 0.9999992045051515, iteration: 136038
loss: 1.0165172815322876,grad_norm: 0.9999992048002769, iteration: 136039
loss: 1.0428608655929565,grad_norm: 1.000000022649632, iteration: 136040
loss: 1.060105562210083,grad_norm: 0.9999994070235366, iteration: 136041
loss: 0.9922056198120117,grad_norm: 0.9999990914674463, iteration: 136042
loss: 0.9745314717292786,grad_norm: 0.9999991276299786, iteration: 136043
loss: 1.0127394199371338,grad_norm: 0.9999991697670255, iteration: 136044
loss: 0.9750262498855591,grad_norm: 0.9607088149240541, iteration: 136045
loss: 0.9714328646659851,grad_norm: 0.9999995586125069, iteration: 136046
loss: 1.0040169954299927,grad_norm: 0.9999990994053718, iteration: 136047
loss: 1.0084973573684692,grad_norm: 0.999999257516236, iteration: 136048
loss: 1.0792086124420166,grad_norm: 0.9999994137448676, iteration: 136049
loss: 1.000934362411499,grad_norm: 0.9616217735423058, iteration: 136050
loss: 0.9767829775810242,grad_norm: 0.9531390286579323, iteration: 136051
loss: 0.992504894733429,grad_norm: 0.9999990023218271, iteration: 136052
loss: 0.9873409271240234,grad_norm: 0.9999989909206751, iteration: 136053
loss: 1.0459768772125244,grad_norm: 0.99999959207966, iteration: 136054
loss: 1.0097177028656006,grad_norm: 0.9999990913399567, iteration: 136055
loss: 0.998915433883667,grad_norm: 0.9773423165569003, iteration: 136056
loss: 1.0196983814239502,grad_norm: 0.9367092536261559, iteration: 136057
loss: 1.0037039518356323,grad_norm: 0.9999989835548369, iteration: 136058
loss: 0.9697772860527039,grad_norm: 0.960375692996943, iteration: 136059
loss: 0.9701016545295715,grad_norm: 0.952137183956821, iteration: 136060
loss: 1.0083664655685425,grad_norm: 0.9486296118984782, iteration: 136061
loss: 1.051756501197815,grad_norm: 0.9999991949885318, iteration: 136062
loss: 0.9830268025398254,grad_norm: 0.9999991980867338, iteration: 136063
loss: 1.0312992334365845,grad_norm: 0.9450778537101867, iteration: 136064
loss: 1.0312399864196777,grad_norm: 0.9999993383715113, iteration: 136065
loss: 0.9552105069160461,grad_norm: 0.9999992587316735, iteration: 136066
loss: 1.0039297342300415,grad_norm: 0.9551553824117649, iteration: 136067
loss: 0.9973645806312561,grad_norm: 0.9999992290104625, iteration: 136068
loss: 0.9957911968231201,grad_norm: 0.8191085662041471, iteration: 136069
loss: 1.0107239484786987,grad_norm: 0.9999993082530968, iteration: 136070
loss: 0.949334979057312,grad_norm: 0.999998992848687, iteration: 136071
loss: 0.9595409631729126,grad_norm: 0.9999990867423632, iteration: 136072
loss: 1.0051560401916504,grad_norm: 0.8379548346999075, iteration: 136073
loss: 1.0093857049942017,grad_norm: 0.9797678607057053, iteration: 136074
loss: 1.008022665977478,grad_norm: 0.9753941865025707, iteration: 136075
loss: 0.9505352973937988,grad_norm: 0.9765683669236302, iteration: 136076
loss: 1.0245287418365479,grad_norm: 0.9999990667981821, iteration: 136077
loss: 1.0355846881866455,grad_norm: 0.9999990825653093, iteration: 136078
loss: 1.0352587699890137,grad_norm: 0.9999991170693799, iteration: 136079
loss: 1.002009391784668,grad_norm: 0.999999028240421, iteration: 136080
loss: 0.9866800308227539,grad_norm: 0.9999991284827138, iteration: 136081
loss: 1.0051192045211792,grad_norm: 0.999999472650484, iteration: 136082
loss: 0.9783315062522888,grad_norm: 0.999999083840858, iteration: 136083
loss: 1.0040087699890137,grad_norm: 0.9999997537480136, iteration: 136084
loss: 1.0155068635940552,grad_norm: 0.9999990752496825, iteration: 136085
loss: 1.011886715888977,grad_norm: 0.9999992906256339, iteration: 136086
loss: 0.975678026676178,grad_norm: 0.9999991924408966, iteration: 136087
loss: 0.9708830118179321,grad_norm: 0.9999992745571871, iteration: 136088
loss: 1.007981777191162,grad_norm: 0.9999992113114088, iteration: 136089
loss: 1.0321996212005615,grad_norm: 0.999999226323021, iteration: 136090
loss: 0.9920457601547241,grad_norm: 0.8923697049258953, iteration: 136091
loss: 1.0089902877807617,grad_norm: 0.9999991323788884, iteration: 136092
loss: 1.0119876861572266,grad_norm: 0.9455294897656247, iteration: 136093
loss: 1.022348165512085,grad_norm: 0.9672258641522736, iteration: 136094
loss: 0.9713727235794067,grad_norm: 0.9999992944299574, iteration: 136095
loss: 0.9823874235153198,grad_norm: 0.9710163025627881, iteration: 136096
loss: 0.9682462811470032,grad_norm: 0.9999990664733671, iteration: 136097
loss: 1.026387095451355,grad_norm: 0.9413691029051273, iteration: 136098
loss: 0.9778363704681396,grad_norm: 0.8942623250589778, iteration: 136099
loss: 1.0193454027175903,grad_norm: 0.9713169731997666, iteration: 136100
loss: 1.00571608543396,grad_norm: 0.9999991575133682, iteration: 136101
loss: 0.9884152412414551,grad_norm: 0.9999990552798175, iteration: 136102
loss: 0.9955815672874451,grad_norm: 0.9999992726576737, iteration: 136103
loss: 1.0160342454910278,grad_norm: 0.9747440043766283, iteration: 136104
loss: 1.0097553730010986,grad_norm: 0.9953846484858778, iteration: 136105
loss: 0.9793387055397034,grad_norm: 0.9888418278867775, iteration: 136106
loss: 0.962282121181488,grad_norm: 0.9508336991732346, iteration: 136107
loss: 1.0105918645858765,grad_norm: 0.9999990585310977, iteration: 136108
loss: 1.0134005546569824,grad_norm: 0.9949318478744347, iteration: 136109
loss: 1.015305757522583,grad_norm: 0.9918865105848697, iteration: 136110
loss: 0.9806221127510071,grad_norm: 0.999999095678921, iteration: 136111
loss: 1.0059069395065308,grad_norm: 0.9999990338714326, iteration: 136112
loss: 1.0362263917922974,grad_norm: 0.964711180332834, iteration: 136113
loss: 1.0280102491378784,grad_norm: 0.9999992081733833, iteration: 136114
loss: 1.0401740074157715,grad_norm: 0.9999993101942134, iteration: 136115
loss: 1.032162070274353,grad_norm: 0.9734942827618878, iteration: 136116
loss: 0.9917887449264526,grad_norm: 0.9999991825535107, iteration: 136117
loss: 1.01766836643219,grad_norm: 0.9046961127436296, iteration: 136118
loss: 1.0095149278640747,grad_norm: 0.999999084698519, iteration: 136119
loss: 0.9932802319526672,grad_norm: 0.99999912529398, iteration: 136120
loss: 1.0199978351593018,grad_norm: 0.9999992438040625, iteration: 136121
loss: 0.9825838208198547,grad_norm: 0.9999993568173715, iteration: 136122
loss: 1.0015289783477783,grad_norm: 0.9647324579951079, iteration: 136123
loss: 0.9815046191215515,grad_norm: 0.999999176027958, iteration: 136124
loss: 1.0137168169021606,grad_norm: 0.9999990821743618, iteration: 136125
loss: 0.9766125082969666,grad_norm: 0.9999991762401926, iteration: 136126
loss: 1.0016283988952637,grad_norm: 0.9999991600473065, iteration: 136127
loss: 1.0029691457748413,grad_norm: 0.9102019750370942, iteration: 136128
loss: 1.0277758836746216,grad_norm: 0.9959010592198067, iteration: 136129
loss: 0.9939976334571838,grad_norm: 0.9999990915675044, iteration: 136130
loss: 1.0106492042541504,grad_norm: 0.999999100999483, iteration: 136131
loss: 0.9871112108230591,grad_norm: 0.8415910650803748, iteration: 136132
loss: 0.982252299785614,grad_norm: 0.9430408696319191, iteration: 136133
loss: 1.012676477432251,grad_norm: 0.8912294548633991, iteration: 136134
loss: 0.9958229660987854,grad_norm: 0.915477394334406, iteration: 136135
loss: 1.0109387636184692,grad_norm: 0.9999991211994351, iteration: 136136
loss: 0.9774746894836426,grad_norm: 0.9999994162386692, iteration: 136137
loss: 1.002770185470581,grad_norm: 0.9334114579618523, iteration: 136138
loss: 1.028595209121704,grad_norm: 0.999999390111081, iteration: 136139
loss: 1.038234829902649,grad_norm: 0.9999990770313261, iteration: 136140
loss: 1.017761468887329,grad_norm: 0.9999993766550257, iteration: 136141
loss: 0.988044798374176,grad_norm: 0.9999989676865452, iteration: 136142
loss: 0.9295213222503662,grad_norm: 0.9392961652817627, iteration: 136143
loss: 0.9820318818092346,grad_norm: 0.9999990984128303, iteration: 136144
loss: 0.9696028232574463,grad_norm: 0.9688768716315089, iteration: 136145
loss: 0.9910967946052551,grad_norm: 0.9999990025629298, iteration: 136146
loss: 1.0154297351837158,grad_norm: 0.9999990394924059, iteration: 136147
loss: 0.9847042560577393,grad_norm: 0.9999991085128535, iteration: 136148
loss: 1.037411093711853,grad_norm: 0.9804642231462557, iteration: 136149
loss: 0.9881191253662109,grad_norm: 0.9691048020643225, iteration: 136150
loss: 1.0313066244125366,grad_norm: 0.9838201060029671, iteration: 136151
loss: 1.0592825412750244,grad_norm: 0.9999991310702823, iteration: 136152
loss: 1.0098602771759033,grad_norm: 0.945092895880859, iteration: 136153
loss: 1.032961130142212,grad_norm: 0.8971602918157747, iteration: 136154
loss: 0.9707523584365845,grad_norm: 0.9999990887726875, iteration: 136155
loss: 0.9947354793548584,grad_norm: 0.9896377237573619, iteration: 136156
loss: 1.0179729461669922,grad_norm: 0.990147746504531, iteration: 136157
loss: 1.0152667760849,grad_norm: 0.9316107465306175, iteration: 136158
loss: 0.9907176494598389,grad_norm: 0.9011813971763399, iteration: 136159
loss: 0.9733965396881104,grad_norm: 0.9999991222112673, iteration: 136160
loss: 0.9852188229560852,grad_norm: 0.9999991261587431, iteration: 136161
loss: 0.9722146391868591,grad_norm: 0.9999990862125129, iteration: 136162
loss: 0.9756466150283813,grad_norm: 0.9999990936928139, iteration: 136163
loss: 1.0427381992340088,grad_norm: 0.9999990795023994, iteration: 136164
loss: 1.026438593864441,grad_norm: 0.9999988961045371, iteration: 136165
loss: 0.9680210947990417,grad_norm: 0.9999991531400018, iteration: 136166
loss: 0.9856939911842346,grad_norm: 0.85737235954177, iteration: 136167
loss: 1.011961817741394,grad_norm: 0.9999991250140129, iteration: 136168
loss: 1.0242841243743896,grad_norm: 0.9696622835483064, iteration: 136169
loss: 1.0362085103988647,grad_norm: 0.9999991728328314, iteration: 136170
loss: 0.9786471724510193,grad_norm: 0.9999989904993016, iteration: 136171
loss: 0.9610493779182434,grad_norm: 0.9801408852234544, iteration: 136172
loss: 0.9916002154350281,grad_norm: 0.9013444807833885, iteration: 136173
loss: 1.0139248371124268,grad_norm: 0.9999990551824011, iteration: 136174
loss: 0.9927517175674438,grad_norm: 0.9239040054768017, iteration: 136175
loss: 1.0028387308120728,grad_norm: 0.9899211166965539, iteration: 136176
loss: 1.0276532173156738,grad_norm: 0.9999991634533724, iteration: 136177
loss: 0.9881280064582825,grad_norm: 0.9999992494849563, iteration: 136178
loss: 0.9771420359611511,grad_norm: 0.9999990822456601, iteration: 136179
loss: 1.011626124382019,grad_norm: 0.9999991272965271, iteration: 136180
loss: 1.0345399379730225,grad_norm: 0.9344274079576195, iteration: 136181
loss: 0.9911117553710938,grad_norm: 0.999999528672033, iteration: 136182
loss: 0.9751530885696411,grad_norm: 0.9999990961552069, iteration: 136183
loss: 0.9905381798744202,grad_norm: 0.9999990654363641, iteration: 136184
loss: 0.957532525062561,grad_norm: 0.9999990044430958, iteration: 136185
loss: 0.9951105117797852,grad_norm: 0.999999057734656, iteration: 136186
loss: 0.9406977891921997,grad_norm: 0.9999990554451285, iteration: 136187
loss: 0.9687806963920593,grad_norm: 0.9999992720168418, iteration: 136188
loss: 1.0258491039276123,grad_norm: 0.9999991689818111, iteration: 136189
loss: 1.035874843597412,grad_norm: 0.9214821723746355, iteration: 136190
loss: 1.0151402950286865,grad_norm: 0.9999991257441256, iteration: 136191
loss: 1.030322551727295,grad_norm: 0.9754984266937148, iteration: 136192
loss: 0.9854214787483215,grad_norm: 0.9999992272594981, iteration: 136193
loss: 0.9784741401672363,grad_norm: 0.9613312024354292, iteration: 136194
loss: 0.9633598923683167,grad_norm: 0.9999992472466067, iteration: 136195
loss: 0.986291766166687,grad_norm: 0.9999995981824997, iteration: 136196
loss: 1.008380651473999,grad_norm: 0.9999991834542318, iteration: 136197
loss: 0.9867076277732849,grad_norm: 0.8830818616651674, iteration: 136198
loss: 1.0086064338684082,grad_norm: 0.8986390706092519, iteration: 136199
loss: 0.9892147779464722,grad_norm: 0.9999992153950272, iteration: 136200
loss: 1.0325921773910522,grad_norm: 0.9372011106811403, iteration: 136201
loss: 0.9679206013679504,grad_norm: 0.7791055602136224, iteration: 136202
loss: 1.0103245973587036,grad_norm: 0.9999993415146268, iteration: 136203
loss: 0.9916148781776428,grad_norm: 0.9999990483040416, iteration: 136204
loss: 0.991461992263794,grad_norm: 0.9999990801558967, iteration: 136205
loss: 1.0081093311309814,grad_norm: 0.9779477752351652, iteration: 136206
loss: 1.0489736795425415,grad_norm: 0.9880758403332248, iteration: 136207
loss: 0.9943661093711853,grad_norm: 0.9999991359320838, iteration: 136208
loss: 1.0040658712387085,grad_norm: 0.8786498696478998, iteration: 136209
loss: 1.001425862312317,grad_norm: 0.999999099150758, iteration: 136210
loss: 1.0090235471725464,grad_norm: 0.9999990953881167, iteration: 136211
loss: 0.9713979363441467,grad_norm: 0.9961096860653507, iteration: 136212
loss: 0.9487498998641968,grad_norm: 0.9324782737233659, iteration: 136213
loss: 0.9799015522003174,grad_norm: 0.9999992324832486, iteration: 136214
loss: 0.9915806651115417,grad_norm: 0.9999990824043415, iteration: 136215
loss: 0.9925938844680786,grad_norm: 0.999999106619688, iteration: 136216
loss: 1.0108840465545654,grad_norm: 0.9999991813552027, iteration: 136217
loss: 1.0209966897964478,grad_norm: 0.9145529540864317, iteration: 136218
loss: 0.9745586514472961,grad_norm: 0.9999994325945122, iteration: 136219
loss: 0.9525133371353149,grad_norm: 0.9999991122856488, iteration: 136220
loss: 1.0030707120895386,grad_norm: 0.9999991318878336, iteration: 136221
loss: 1.0016238689422607,grad_norm: 0.9999991257532599, iteration: 136222
loss: 0.9589003920555115,grad_norm: 0.8882715164528858, iteration: 136223
loss: 0.9959149956703186,grad_norm: 0.9999990069104093, iteration: 136224
loss: 0.999480128288269,grad_norm: 0.9999992154810791, iteration: 136225
loss: 0.9829176068305969,grad_norm: 0.9044439329551601, iteration: 136226
loss: 0.9902892112731934,grad_norm: 0.9189306868822262, iteration: 136227
loss: 1.0059852600097656,grad_norm: 0.9999994762377525, iteration: 136228
loss: 1.0304888486862183,grad_norm: 0.9999991161268917, iteration: 136229
loss: 0.9836134910583496,grad_norm: 0.9999991738483586, iteration: 136230
loss: 1.0031262636184692,grad_norm: 0.9999990981940441, iteration: 136231
loss: 1.013221025466919,grad_norm: 0.9999990864394389, iteration: 136232
loss: 1.0186471939086914,grad_norm: 0.9999992867025121, iteration: 136233
loss: 0.9723246693611145,grad_norm: 0.8820631483523395, iteration: 136234
loss: 1.0137351751327515,grad_norm: 0.9577180076918966, iteration: 136235
loss: 0.9752686023712158,grad_norm: 0.9999992386617648, iteration: 136236
loss: 0.9909372925758362,grad_norm: 0.9999989366577838, iteration: 136237
loss: 0.991775393486023,grad_norm: 0.990802664216298, iteration: 136238
loss: 0.9533082842826843,grad_norm: 0.9999991256064619, iteration: 136239
loss: 0.9722234010696411,grad_norm: 0.9999992216700845, iteration: 136240
loss: 0.9519816040992737,grad_norm: 0.9999990679967357, iteration: 136241
loss: 1.0882351398468018,grad_norm: 0.9999996721048696, iteration: 136242
loss: 1.0199055671691895,grad_norm: 0.9999992394986542, iteration: 136243
loss: 1.0177816152572632,grad_norm: 0.9999991361708097, iteration: 136244
loss: 1.018066167831421,grad_norm: 0.9999990581880278, iteration: 136245
loss: 0.9670259356498718,grad_norm: 0.9999991834757739, iteration: 136246
loss: 1.0195237398147583,grad_norm: 0.9999993205699531, iteration: 136247
loss: 1.0037304162979126,grad_norm: 0.9999991531603725, iteration: 136248
loss: 0.9797461628913879,grad_norm: 0.9999991490020549, iteration: 136249
loss: 1.0617761611938477,grad_norm: 0.9933686603926958, iteration: 136250
loss: 0.9699973464012146,grad_norm: 0.9526710703599797, iteration: 136251
loss: 1.0178269147872925,grad_norm: 0.9999992633379593, iteration: 136252
loss: 0.9976513981819153,grad_norm: 0.90099829167843, iteration: 136253
loss: 0.9637888073921204,grad_norm: 0.9999991000662324, iteration: 136254
loss: 0.9914602041244507,grad_norm: 0.9999991729249544, iteration: 136255
loss: 1.0066931247711182,grad_norm: 0.999999108149387, iteration: 136256
loss: 0.9764980673789978,grad_norm: 0.9424111278429533, iteration: 136257
loss: 0.9999914169311523,grad_norm: 0.9188824645647855, iteration: 136258
loss: 0.9945258498191833,grad_norm: 0.9453307947288695, iteration: 136259
loss: 1.0090484619140625,grad_norm: 0.9999991092860733, iteration: 136260
loss: 0.9590945839881897,grad_norm: 0.9075092172339173, iteration: 136261
loss: 1.0317326784133911,grad_norm: 0.999999205850898, iteration: 136262
loss: 1.0182548761367798,grad_norm: 0.9999992714482461, iteration: 136263
loss: 0.9807022213935852,grad_norm: 0.9999991757359814, iteration: 136264
loss: 1.0030479431152344,grad_norm: 0.9815830487069218, iteration: 136265
loss: 1.0260058641433716,grad_norm: 0.9999992329903696, iteration: 136266
loss: 0.9945418238639832,grad_norm: 0.9856853498966651, iteration: 136267
loss: 1.021953821182251,grad_norm: 0.9762386348295796, iteration: 136268
loss: 1.0231609344482422,grad_norm: 0.9187380657965679, iteration: 136269
loss: 1.0187898874282837,grad_norm: 0.9999992979301809, iteration: 136270
loss: 0.9797966480255127,grad_norm: 0.999999288074789, iteration: 136271
loss: 0.9820743799209595,grad_norm: 0.9999990921106854, iteration: 136272
loss: 1.0006731748580933,grad_norm: 0.9999991631627824, iteration: 136273
loss: 1.0327644348144531,grad_norm: 0.9999990122603469, iteration: 136274
loss: 1.0126538276672363,grad_norm: 0.9228158426681903, iteration: 136275
loss: 0.9754491448402405,grad_norm: 0.982150083532482, iteration: 136276
loss: 1.0095329284667969,grad_norm: 0.9005604697455383, iteration: 136277
loss: 0.9930933713912964,grad_norm: 0.9895967376114329, iteration: 136278
loss: 0.9543036222457886,grad_norm: 0.9999992533025238, iteration: 136279
loss: 0.9962388277053833,grad_norm: 0.999999164280551, iteration: 136280
loss: 1.0005130767822266,grad_norm: 0.9745870772858003, iteration: 136281
loss: 0.9615746736526489,grad_norm: 0.8897974460691795, iteration: 136282
loss: 0.9929788708686829,grad_norm: 0.9268852895275969, iteration: 136283
loss: 1.0013285875320435,grad_norm: 0.8815858321014259, iteration: 136284
loss: 1.0016038417816162,grad_norm: 0.9999991512613261, iteration: 136285
loss: 1.0152090787887573,grad_norm: 0.9999990620419564, iteration: 136286
loss: 1.0470024347305298,grad_norm: 0.9183781585954386, iteration: 136287
loss: 1.0620267391204834,grad_norm: 0.9066245158101307, iteration: 136288
loss: 1.0162901878356934,grad_norm: 0.999999230373534, iteration: 136289
loss: 1.0087623596191406,grad_norm: 0.9999990136383626, iteration: 136290
loss: 1.0509957075119019,grad_norm: 0.9386513697685581, iteration: 136291
loss: 0.9312314987182617,grad_norm: 0.9999991177049716, iteration: 136292
loss: 0.963933527469635,grad_norm: 0.9295560265855468, iteration: 136293
loss: 0.9247792959213257,grad_norm: 0.9852820781278827, iteration: 136294
loss: 0.9885334968566895,grad_norm: 0.9999990086793724, iteration: 136295
loss: 0.9857891201972961,grad_norm: 0.9964270196452079, iteration: 136296
loss: 1.0355387926101685,grad_norm: 0.9999992212706632, iteration: 136297
loss: 1.0199731588363647,grad_norm: 0.9975348830726533, iteration: 136298
loss: 0.9928297400474548,grad_norm: 0.877235054527048, iteration: 136299
loss: 1.0516295433044434,grad_norm: 0.9999990958243659, iteration: 136300
loss: 1.0189030170440674,grad_norm: 0.9999991176413824, iteration: 136301
loss: 1.0286775827407837,grad_norm: 0.9999991727620658, iteration: 136302
loss: 0.9886267185211182,grad_norm: 0.9999991441247271, iteration: 136303
loss: 1.004529356956482,grad_norm: 0.9999990621488911, iteration: 136304
loss: 0.9709629416465759,grad_norm: 0.8578145402438689, iteration: 136305
loss: 0.9837978482246399,grad_norm: 0.9999992523618819, iteration: 136306
loss: 1.006612777709961,grad_norm: 0.9668883669937584, iteration: 136307
loss: 1.0305813550949097,grad_norm: 0.9999991146896112, iteration: 136308
loss: 0.990999698638916,grad_norm: 0.9999998414807837, iteration: 136309
loss: 1.012886643409729,grad_norm: 0.9999992458578973, iteration: 136310
loss: 0.9837615489959717,grad_norm: 0.9983699764114335, iteration: 136311
loss: 0.9744191765785217,grad_norm: 0.9999991541457213, iteration: 136312
loss: 0.9864731431007385,grad_norm: 0.9999991898944663, iteration: 136313
loss: 0.9997915625572205,grad_norm: 0.9999992575033447, iteration: 136314
loss: 1.009583592414856,grad_norm: 0.9999989742567591, iteration: 136315
loss: 1.0181822776794434,grad_norm: 0.9263567967081935, iteration: 136316
loss: 0.9776597023010254,grad_norm: 0.8451208720435619, iteration: 136317
loss: 1.033576488494873,grad_norm: 0.9999992497611644, iteration: 136318
loss: 0.9985078573226929,grad_norm: 0.9999991576945236, iteration: 136319
loss: 0.9943749308586121,grad_norm: 0.9999990956590027, iteration: 136320
loss: 0.9683787822723389,grad_norm: 0.9999991501722807, iteration: 136321
loss: 0.9725666642189026,grad_norm: 0.9999990073905195, iteration: 136322
loss: 0.9987281560897827,grad_norm: 0.9999990777072488, iteration: 136323
loss: 0.9768739342689514,grad_norm: 0.9999992006696985, iteration: 136324
loss: 0.9772576093673706,grad_norm: 0.9999991262558512, iteration: 136325
loss: 0.9887838959693909,grad_norm: 0.9529301317732871, iteration: 136326
loss: 1.0207335948944092,grad_norm: 0.9449486596726561, iteration: 136327
loss: 0.9875790476799011,grad_norm: 0.9129724910762748, iteration: 136328
loss: 0.977203905582428,grad_norm: 0.9999990913816879, iteration: 136329
loss: 0.9960176944732666,grad_norm: 0.897859951849822, iteration: 136330
loss: 1.0167417526245117,grad_norm: 0.9999990514827408, iteration: 136331
loss: 0.9866761565208435,grad_norm: 0.9999991851366836, iteration: 136332
loss: 1.0310813188552856,grad_norm: 0.9999994995236685, iteration: 136333
loss: 1.0114787817001343,grad_norm: 0.999999234830884, iteration: 136334
loss: 1.039947509765625,grad_norm: 0.9999991422827402, iteration: 136335
loss: 1.0200555324554443,grad_norm: 0.9999991512860943, iteration: 136336
loss: 0.9931420087814331,grad_norm: 0.9999990555103998, iteration: 136337
loss: 0.9869435429573059,grad_norm: 0.9999990757030802, iteration: 136338
loss: 1.044240117073059,grad_norm: 0.9999992280607535, iteration: 136339
loss: 1.0172431468963623,grad_norm: 0.9999991079264159, iteration: 136340
loss: 0.9725595712661743,grad_norm: 0.9803751809276177, iteration: 136341
loss: 1.0080251693725586,grad_norm: 0.9999989991065072, iteration: 136342
loss: 1.0196614265441895,grad_norm: 0.9999990353393416, iteration: 136343
loss: 0.9976110458374023,grad_norm: 0.9999990807932669, iteration: 136344
loss: 0.9819165468215942,grad_norm: 0.999999097296165, iteration: 136345
loss: 0.9625401496887207,grad_norm: 0.99999903409239, iteration: 136346
loss: 1.014194130897522,grad_norm: 0.9999991447194401, iteration: 136347
loss: 1.0089830160140991,grad_norm: 0.9999991233352354, iteration: 136348
loss: 0.9950334429740906,grad_norm: 0.9999990603937473, iteration: 136349
loss: 1.024227499961853,grad_norm: 0.9999997112305221, iteration: 136350
loss: 1.0586642026901245,grad_norm: 0.9999996801634649, iteration: 136351
loss: 0.9848981499671936,grad_norm: 0.9999990376595868, iteration: 136352
loss: 0.9858178496360779,grad_norm: 0.8696793013923565, iteration: 136353
loss: 0.9984492659568787,grad_norm: 0.9999991025804985, iteration: 136354
loss: 0.9958255290985107,grad_norm: 0.9999990766101082, iteration: 136355
loss: 0.9902515411376953,grad_norm: 0.9999990182534797, iteration: 136356
loss: 0.9532982110977173,grad_norm: 0.9050946616589355, iteration: 136357
loss: 1.0284613370895386,grad_norm: 0.9999991201813571, iteration: 136358
loss: 0.9723978638648987,grad_norm: 0.9999990039562401, iteration: 136359
loss: 1.0249277353286743,grad_norm: 0.9999998029011898, iteration: 136360
loss: 1.0018396377563477,grad_norm: 0.999999016055452, iteration: 136361
loss: 0.9922101497650146,grad_norm: 0.9718819150560466, iteration: 136362
loss: 1.020952820777893,grad_norm: 0.9999990349211882, iteration: 136363
loss: 0.9788902997970581,grad_norm: 0.9575712211401572, iteration: 136364
loss: 0.9666398167610168,grad_norm: 0.9133372445471, iteration: 136365
loss: 1.0322539806365967,grad_norm: 0.9818073785912658, iteration: 136366
loss: 0.9585562944412231,grad_norm: 0.9137217891163093, iteration: 136367
loss: 0.9859280586242676,grad_norm: 0.9655866742981067, iteration: 136368
loss: 0.9828861951828003,grad_norm: 0.9732694746821277, iteration: 136369
loss: 1.0108681917190552,grad_norm: 0.9999990610002444, iteration: 136370
loss: 0.9987130761146545,grad_norm: 0.999999387085963, iteration: 136371
loss: 1.0007860660552979,grad_norm: 0.9999991549731362, iteration: 136372
loss: 1.0005710124969482,grad_norm: 0.9999991480573613, iteration: 136373
loss: 1.0040758848190308,grad_norm: 0.9228167906900142, iteration: 136374
loss: 0.9849022626876831,grad_norm: 0.9999990968695974, iteration: 136375
loss: 1.0587388277053833,grad_norm: 0.9999997860907723, iteration: 136376
loss: 1.013500452041626,grad_norm: 0.9999990441253538, iteration: 136377
loss: 0.9967084527015686,grad_norm: 0.9999991752866486, iteration: 136378
loss: 1.0344616174697876,grad_norm: 0.9999990916362805, iteration: 136379
loss: 1.0147018432617188,grad_norm: 0.9999992317491331, iteration: 136380
loss: 0.986095130443573,grad_norm: 0.9999990043140062, iteration: 136381
loss: 1.0004349946975708,grad_norm: 0.99999916142887, iteration: 136382
loss: 1.0261306762695312,grad_norm: 0.9006673467817778, iteration: 136383
loss: 1.010105848312378,grad_norm: 0.8678566606629201, iteration: 136384
loss: 0.9831693768501282,grad_norm: 0.9999991096116773, iteration: 136385
loss: 1.0036916732788086,grad_norm: 0.9999991712129773, iteration: 136386
loss: 1.0145379304885864,grad_norm: 0.9109275599898653, iteration: 136387
loss: 1.0067949295043945,grad_norm: 0.9999993345095847, iteration: 136388
loss: 1.0186556577682495,grad_norm: 0.9999990114843601, iteration: 136389
loss: 0.951723039150238,grad_norm: 0.9999990708906813, iteration: 136390
loss: 1.0184080600738525,grad_norm: 0.9999992540983454, iteration: 136391
loss: 0.994140625,grad_norm: 0.9999990797965977, iteration: 136392
loss: 1.0072869062423706,grad_norm: 0.999998988783568, iteration: 136393
loss: 0.9925033450126648,grad_norm: 0.950300683151582, iteration: 136394
loss: 1.0160441398620605,grad_norm: 0.9999991923183048, iteration: 136395
loss: 1.0508447885513306,grad_norm: 0.999999892793189, iteration: 136396
loss: 1.0109349489212036,grad_norm: 0.999999288194382, iteration: 136397
loss: 1.0129474401474,grad_norm: 0.9743828137196052, iteration: 136398
loss: 1.0083565711975098,grad_norm: 0.9999990992934769, iteration: 136399
loss: 1.016236424446106,grad_norm: 0.9999992127383042, iteration: 136400
loss: 1.0182347297668457,grad_norm: 0.9999995476414612, iteration: 136401
loss: 1.0009740591049194,grad_norm: 0.9047751783679205, iteration: 136402
loss: 0.9702906012535095,grad_norm: 0.9683724208781496, iteration: 136403
loss: 0.962975800037384,grad_norm: 0.9999990877711505, iteration: 136404
loss: 0.9820010662078857,grad_norm: 0.999999050340086, iteration: 136405
loss: 0.9915443062782288,grad_norm: 0.886337664564997, iteration: 136406
loss: 1.0213627815246582,grad_norm: 0.999999123215477, iteration: 136407
loss: 0.9623416662216187,grad_norm: 0.9999992408288955, iteration: 136408
loss: 0.9498938322067261,grad_norm: 0.9062333330949776, iteration: 136409
loss: 1.028069019317627,grad_norm: 0.9999991865980704, iteration: 136410
loss: 0.979187548160553,grad_norm: 0.9999992104994613, iteration: 136411
loss: 0.975316047668457,grad_norm: 0.8839585043438547, iteration: 136412
loss: 1.0158345699310303,grad_norm: 0.9999990797092201, iteration: 136413
loss: 0.999862015247345,grad_norm: 0.9999992103063728, iteration: 136414
loss: 0.9715660810470581,grad_norm: 0.9676205635846721, iteration: 136415
loss: 1.0159834623336792,grad_norm: 0.9069138167358918, iteration: 136416
loss: 0.9884182214736938,grad_norm: 0.9018872817889558, iteration: 136417
loss: 1.0387459993362427,grad_norm: 0.9999994119699273, iteration: 136418
loss: 0.9845380187034607,grad_norm: 0.9999992076165235, iteration: 136419
loss: 0.9941238164901733,grad_norm: 0.9999990509696449, iteration: 136420
loss: 0.9891560673713684,grad_norm: 0.9109926317587272, iteration: 136421
loss: 0.9656587839126587,grad_norm: 0.9698133369398924, iteration: 136422
loss: 1.0081108808517456,grad_norm: 0.9999991109622864, iteration: 136423
loss: 0.9869716763496399,grad_norm: 0.9999992772765329, iteration: 136424
loss: 1.0224666595458984,grad_norm: 0.9999991434692831, iteration: 136425
loss: 1.0001063346862793,grad_norm: 0.9999992032358017, iteration: 136426
loss: 1.0070387125015259,grad_norm: 0.9999990169458967, iteration: 136427
loss: 0.9906309247016907,grad_norm: 0.9999990674089747, iteration: 136428
loss: 0.9997128248214722,grad_norm: 0.9999989672363389, iteration: 136429
loss: 1.00868821144104,grad_norm: 0.9999990814437374, iteration: 136430
loss: 1.0071587562561035,grad_norm: 0.999999157373453, iteration: 136431
loss: 1.0077381134033203,grad_norm: 0.9999991910879139, iteration: 136432
loss: 0.9853886961936951,grad_norm: 0.9999990962541886, iteration: 136433
loss: 0.974176287651062,grad_norm: 0.9999992023643111, iteration: 136434
loss: 1.0324530601501465,grad_norm: 0.9999991199238132, iteration: 136435
loss: 1.0156118869781494,grad_norm: 0.978977445174525, iteration: 136436
loss: 0.973205029964447,grad_norm: 0.9999991874410551, iteration: 136437
loss: 0.9659556150436401,grad_norm: 0.9999992712615249, iteration: 136438
loss: 1.062274694442749,grad_norm: 0.9999991289837323, iteration: 136439
loss: 0.9799873232841492,grad_norm: 0.9814008145874495, iteration: 136440
loss: 0.9932394623756409,grad_norm: 0.9999991414756916, iteration: 136441
loss: 1.015005111694336,grad_norm: 0.9014792181530069, iteration: 136442
loss: 0.9929654002189636,grad_norm: 0.9999993599384273, iteration: 136443
loss: 1.0137723684310913,grad_norm: 0.84407175150265, iteration: 136444
loss: 1.0405213832855225,grad_norm: 0.9999991809601051, iteration: 136445
loss: 1.033632516860962,grad_norm: 0.9999998889700812, iteration: 136446
loss: 0.9837407469749451,grad_norm: 0.9217744639554858, iteration: 136447
loss: 1.0291521549224854,grad_norm: 0.9445374782836357, iteration: 136448
loss: 0.9716045260429382,grad_norm: 0.9912891162129563, iteration: 136449
loss: 1.0124077796936035,grad_norm: 0.9999989746238771, iteration: 136450
loss: 0.987367570400238,grad_norm: 0.9999990490175443, iteration: 136451
loss: 0.9966220855712891,grad_norm: 0.8532474828624053, iteration: 136452
loss: 1.0056620836257935,grad_norm: 0.9999991244531493, iteration: 136453
loss: 0.9897159337997437,grad_norm: 0.9999991648381207, iteration: 136454
loss: 0.9816359877586365,grad_norm: 0.9167633728874631, iteration: 136455
loss: 1.0265535116195679,grad_norm: 0.9999990233191381, iteration: 136456
loss: 0.961726188659668,grad_norm: 0.9968444595403566, iteration: 136457
loss: 1.2123043537139893,grad_norm: 0.9999996672389064, iteration: 136458
loss: 0.9958070516586304,grad_norm: 0.9999991448702885, iteration: 136459
loss: 0.9689581394195557,grad_norm: 0.9999992121112284, iteration: 136460
loss: 0.9901759624481201,grad_norm: 0.9999991052527599, iteration: 136461
loss: 0.9783310890197754,grad_norm: 0.9799417898515981, iteration: 136462
loss: 1.0240058898925781,grad_norm: 0.9999991566150155, iteration: 136463
loss: 0.9984433054924011,grad_norm: 0.999999085913804, iteration: 136464
loss: 0.992725670337677,grad_norm: 0.9801807199225254, iteration: 136465
loss: 1.0057817697525024,grad_norm: 0.9999990707055232, iteration: 136466
loss: 1.024742603302002,grad_norm: 0.9999992062292005, iteration: 136467
loss: 0.9856484532356262,grad_norm: 0.9982398335844759, iteration: 136468
loss: 0.9743940830230713,grad_norm: 0.956871811174874, iteration: 136469
loss: 1.0036143064498901,grad_norm: 0.9681581457182056, iteration: 136470
loss: 0.9865099787712097,grad_norm: 0.8975497633475271, iteration: 136471
loss: 0.9846655130386353,grad_norm: 0.9538146281637705, iteration: 136472
loss: 0.9598044753074646,grad_norm: 0.9999992106256015, iteration: 136473
loss: 1.0084484815597534,grad_norm: 0.9999991413412702, iteration: 136474
loss: 0.9854027032852173,grad_norm: 0.975913261353062, iteration: 136475
loss: 1.0285762548446655,grad_norm: 0.7733714341455454, iteration: 136476
loss: 1.0137112140655518,grad_norm: 0.9999994678410796, iteration: 136477
loss: 1.0233426094055176,grad_norm: 0.9831258259942055, iteration: 136478
loss: 0.9724303483963013,grad_norm: 0.9999991887049758, iteration: 136479
loss: 0.9816553592681885,grad_norm: 0.9999992358287623, iteration: 136480
loss: 1.0094209909439087,grad_norm: 0.9999991823045963, iteration: 136481
loss: 0.9716715812683105,grad_norm: 0.9909863695702852, iteration: 136482
loss: 0.9804180860519409,grad_norm: 0.9999989982145091, iteration: 136483
loss: 0.9970349073410034,grad_norm: 0.8933350042697806, iteration: 136484
loss: 0.9757494926452637,grad_norm: 0.984998727782714, iteration: 136485
loss: 0.9591252207756042,grad_norm: 0.9787636124448098, iteration: 136486
loss: 1.0102826356887817,grad_norm: 0.9915670621849166, iteration: 136487
loss: 0.9625210165977478,grad_norm: 0.9999990956330482, iteration: 136488
loss: 0.9813546538352966,grad_norm: 0.9685468177810039, iteration: 136489
loss: 1.0083264112472534,grad_norm: 0.9550149748151847, iteration: 136490
loss: 0.9968761801719666,grad_norm: 0.9999992724832091, iteration: 136491
loss: 1.0025874376296997,grad_norm: 0.9999991513676433, iteration: 136492
loss: 0.9890336990356445,grad_norm: 0.9776457628222777, iteration: 136493
loss: 1.0963412523269653,grad_norm: 0.9999998314294415, iteration: 136494
loss: 0.995306134223938,grad_norm: 0.9999990676796281, iteration: 136495
loss: 0.9983350038528442,grad_norm: 0.9999990994827858, iteration: 136496
loss: 0.9970009922981262,grad_norm: 0.9999992054652028, iteration: 136497
loss: 0.9992435574531555,grad_norm: 0.9466591610168141, iteration: 136498
loss: 0.9878999590873718,grad_norm: 0.9999992544153196, iteration: 136499
loss: 0.9973806738853455,grad_norm: 0.9999994242766156, iteration: 136500
loss: 0.9910606145858765,grad_norm: 0.8859525307545479, iteration: 136501
loss: 0.9846866130828857,grad_norm: 0.9889450277263602, iteration: 136502
loss: 0.9945598840713501,grad_norm: 0.9999990317537403, iteration: 136503
loss: 0.9846673607826233,grad_norm: 0.9263283627795261, iteration: 136504
loss: 1.0131763219833374,grad_norm: 0.999999082562646, iteration: 136505
loss: 1.0212000608444214,grad_norm: 0.9273002678255572, iteration: 136506
loss: 1.0278327465057373,grad_norm: 0.9999990596559868, iteration: 136507
loss: 0.976499617099762,grad_norm: 0.938201049203243, iteration: 136508
loss: 0.9822852611541748,grad_norm: 0.9999991522474379, iteration: 136509
loss: 0.9678834080696106,grad_norm: 0.9999991300548161, iteration: 136510
loss: 1.0238970518112183,grad_norm: 0.9676990937480403, iteration: 136511
loss: 0.9981038570404053,grad_norm: 0.9648223181679432, iteration: 136512
loss: 1.0306535959243774,grad_norm: 0.999999129106331, iteration: 136513
loss: 0.9840489029884338,grad_norm: 0.9703369230683875, iteration: 136514
loss: 1.0162763595581055,grad_norm: 0.9999990502844675, iteration: 136515
loss: 0.999394416809082,grad_norm: 0.9999992237581111, iteration: 136516
loss: 0.958487331867218,grad_norm: 0.9999990532465906, iteration: 136517
loss: 1.0370935201644897,grad_norm: 0.966612303454588, iteration: 136518
loss: 0.9766239523887634,grad_norm: 0.9578962983645751, iteration: 136519
loss: 0.9685348272323608,grad_norm: 0.9999992442204351, iteration: 136520
loss: 1.0272011756896973,grad_norm: 0.9156209829146346, iteration: 136521
loss: 0.9997732043266296,grad_norm: 0.9999993051925449, iteration: 136522
loss: 0.994829535484314,grad_norm: 0.9999988857537021, iteration: 136523
loss: 1.0120387077331543,grad_norm: 0.9999991361824854, iteration: 136524
loss: 0.9542072415351868,grad_norm: 0.9815512259090585, iteration: 136525
loss: 0.9850212335586548,grad_norm: 0.9999990402777275, iteration: 136526
loss: 0.9887968897819519,grad_norm: 0.9887572599672063, iteration: 136527
loss: 0.9949697256088257,grad_norm: 0.999999160101519, iteration: 136528
loss: 0.9692723751068115,grad_norm: 0.9897049080786906, iteration: 136529
loss: 1.0240182876586914,grad_norm: 0.999999262595537, iteration: 136530
loss: 0.9328872561454773,grad_norm: 0.999999015293977, iteration: 136531
loss: 0.9754946827888489,grad_norm: 0.9015398045476346, iteration: 136532
loss: 0.968410849571228,grad_norm: 0.9999990907476503, iteration: 136533
loss: 1.0015062093734741,grad_norm: 0.9999991043024395, iteration: 136534
loss: 1.0236637592315674,grad_norm: 0.9999991857126581, iteration: 136535
loss: 1.0060083866119385,grad_norm: 0.9999991357860595, iteration: 136536
loss: 0.9983453154563904,grad_norm: 0.9999991570008581, iteration: 136537
loss: 1.0226837396621704,grad_norm: 0.8989290648623812, iteration: 136538
loss: 1.035765528678894,grad_norm: 0.9999990355580035, iteration: 136539
loss: 1.0272189378738403,grad_norm: 0.8350637484621093, iteration: 136540
loss: 0.9711525440216064,grad_norm: 0.9999990336828953, iteration: 136541
loss: 0.9776089191436768,grad_norm: 0.9751745898674289, iteration: 136542
loss: 0.9900980591773987,grad_norm: 0.9177566393334723, iteration: 136543
loss: 1.0045862197875977,grad_norm: 0.9999990240251818, iteration: 136544
loss: 0.9977579116821289,grad_norm: 0.9999991132234323, iteration: 136545
loss: 1.092523217201233,grad_norm: 0.9999997482433436, iteration: 136546
loss: 1.00131356716156,grad_norm: 0.978438038708724, iteration: 136547
loss: 0.9544333815574646,grad_norm: 0.9999991511340586, iteration: 136548
loss: 0.991790235042572,grad_norm: 0.8277408561632433, iteration: 136549
loss: 1.0059173107147217,grad_norm: 0.9096899284624477, iteration: 136550
loss: 0.9815259575843811,grad_norm: 0.9859630964761131, iteration: 136551
loss: 1.0158231258392334,grad_norm: 0.9999990286080274, iteration: 136552
loss: 1.008615493774414,grad_norm: 0.997820894579141, iteration: 136553
loss: 1.0129612684249878,grad_norm: 0.9378404514590893, iteration: 136554
loss: 0.9869470000267029,grad_norm: 0.8290863451987361, iteration: 136555
loss: 1.0243139266967773,grad_norm: 0.9999996027815902, iteration: 136556
loss: 1.0007696151733398,grad_norm: 0.9999992141617687, iteration: 136557
loss: 1.0223308801651,grad_norm: 0.9065236866947912, iteration: 136558
loss: 0.9855016469955444,grad_norm: 0.9346754760584358, iteration: 136559
loss: 0.9859439730644226,grad_norm: 0.9464792637841462, iteration: 136560
loss: 0.9961410760879517,grad_norm: 0.9443840291179056, iteration: 136561
loss: 0.9996839165687561,grad_norm: 0.9999991139392274, iteration: 136562
loss: 1.0202994346618652,grad_norm: 0.8803364174255932, iteration: 136563
loss: 1.0272709131240845,grad_norm: 0.8757223157506699, iteration: 136564
loss: 0.9571332931518555,grad_norm: 0.9332097495164478, iteration: 136565
loss: 1.0077396631240845,grad_norm: 0.9664848404120817, iteration: 136566
loss: 0.99672532081604,grad_norm: 0.9981583091219882, iteration: 136567
loss: 1.1203066110610962,grad_norm: 0.9999990286750338, iteration: 136568
loss: 0.9862293004989624,grad_norm: 0.9999990265410634, iteration: 136569
loss: 1.0550540685653687,grad_norm: 0.9999998537190411, iteration: 136570
loss: 1.005523681640625,grad_norm: 0.9999992622685641, iteration: 136571
loss: 0.9969257712364197,grad_norm: 0.9999991087064553, iteration: 136572
loss: 1.0308655500411987,grad_norm: 0.9999991226022371, iteration: 136573
loss: 1.0069400072097778,grad_norm: 0.9999992957040885, iteration: 136574
loss: 1.0787875652313232,grad_norm: 0.9971867255064127, iteration: 136575
loss: 1.154407262802124,grad_norm: 0.9749417123170588, iteration: 136576
loss: 0.9737709760665894,grad_norm: 0.9999990906374963, iteration: 136577
loss: 0.981480062007904,grad_norm: 0.9091127559775715, iteration: 136578
loss: 1.0053627490997314,grad_norm: 0.9346234517142856, iteration: 136579
loss: 0.9688525199890137,grad_norm: 0.9999990619714707, iteration: 136580
loss: 1.079828143119812,grad_norm: 0.9999991666401896, iteration: 136581
loss: 0.9998190999031067,grad_norm: 0.9999991722423401, iteration: 136582
loss: 1.0099825859069824,grad_norm: 0.9898870034791456, iteration: 136583
loss: 0.9779613018035889,grad_norm: 0.895285240942786, iteration: 136584
loss: 1.0210274457931519,grad_norm: 0.9610910540979831, iteration: 136585
loss: 0.9957275390625,grad_norm: 0.8734234628312708, iteration: 136586
loss: 1.0257270336151123,grad_norm: 0.9999991169954026, iteration: 136587
loss: 0.9570369124412537,grad_norm: 0.8844073236656074, iteration: 136588
loss: 0.9757049083709717,grad_norm: 0.9999989791757481, iteration: 136589
loss: 1.0871504545211792,grad_norm: 0.9353404436306032, iteration: 136590
loss: 0.9986724853515625,grad_norm: 0.9999989767684498, iteration: 136591
loss: 1.0088796615600586,grad_norm: 0.9999991088740877, iteration: 136592
loss: 0.9935121536254883,grad_norm: 0.8580905711868327, iteration: 136593
loss: 1.055363416671753,grad_norm: 0.9999997356575676, iteration: 136594
loss: 1.054842233657837,grad_norm: 0.9999992291222521, iteration: 136595
loss: 1.0087474584579468,grad_norm: 0.9826406847685509, iteration: 136596
loss: 0.9807599186897278,grad_norm: 0.9091474450042321, iteration: 136597
loss: 1.0205438137054443,grad_norm: 0.9999991866907401, iteration: 136598
loss: 1.031832218170166,grad_norm: 0.999999910005584, iteration: 136599
loss: 1.0163685083389282,grad_norm: 0.9999992380255465, iteration: 136600
loss: 1.0399714708328247,grad_norm: 0.9999991951999309, iteration: 136601
loss: 0.9764752984046936,grad_norm: 0.836579750277555, iteration: 136602
loss: 0.9885746836662292,grad_norm: 0.8465112985756664, iteration: 136603
loss: 1.0237910747528076,grad_norm: 0.9805443785944604, iteration: 136604
loss: 0.9878930449485779,grad_norm: 0.8690996038819291, iteration: 136605
loss: 1.0792402029037476,grad_norm: 0.999999726206101, iteration: 136606
loss: 0.9904792308807373,grad_norm: 0.9576795191494756, iteration: 136607
loss: 1.005529761314392,grad_norm: 0.9999989812015909, iteration: 136608
loss: 1.0001389980316162,grad_norm: 0.9917175503437315, iteration: 136609
loss: 1.0052459239959717,grad_norm: 0.9876128262259267, iteration: 136610
loss: 0.9486913084983826,grad_norm: 0.9421416926768271, iteration: 136611
loss: 0.9955207705497742,grad_norm: 0.9611268997645515, iteration: 136612
loss: 0.9544726014137268,grad_norm: 0.9999992409885745, iteration: 136613
loss: 0.9962623715400696,grad_norm: 0.8851216772230693, iteration: 136614
loss: 1.0527783632278442,grad_norm: 0.9999991446535612, iteration: 136615
loss: 0.99476557970047,grad_norm: 0.8979688502454928, iteration: 136616
loss: 1.0030858516693115,grad_norm: 0.9999991509791035, iteration: 136617
loss: 1.010433316230774,grad_norm: 0.9923518454303105, iteration: 136618
loss: 1.0229002237319946,grad_norm: 0.9999993030220048, iteration: 136619
loss: 1.23135507106781,grad_norm: 0.9999996885311448, iteration: 136620
loss: 0.9845117330551147,grad_norm: 0.8552629919135183, iteration: 136621
loss: 1.0344734191894531,grad_norm: 0.9989107025006713, iteration: 136622
loss: 1.0167450904846191,grad_norm: 0.9999996802433161, iteration: 136623
loss: 1.0320355892181396,grad_norm: 0.9999990571178692, iteration: 136624
loss: 1.0163853168487549,grad_norm: 0.9999993538137287, iteration: 136625
loss: 1.031467318534851,grad_norm: 0.9999991267415943, iteration: 136626
loss: 1.0147175788879395,grad_norm: 0.8754636409692153, iteration: 136627
loss: 0.9935293793678284,grad_norm: 0.935556705958486, iteration: 136628
loss: 0.970244824886322,grad_norm: 0.927352976601242, iteration: 136629
loss: 1.0217556953430176,grad_norm: 0.9999991569183182, iteration: 136630
loss: 1.0398423671722412,grad_norm: 0.9999992060938776, iteration: 136631
loss: 1.0722333192825317,grad_norm: 0.9838408847112157, iteration: 136632
loss: 1.0032460689544678,grad_norm: 0.9410084369463335, iteration: 136633
loss: 1.0580888986587524,grad_norm: 0.9999992706787676, iteration: 136634
loss: 1.0133962631225586,grad_norm: 0.9999991472151887, iteration: 136635
loss: 0.9782556891441345,grad_norm: 0.9492157147417479, iteration: 136636
loss: 0.9874509572982788,grad_norm: 0.9147686773088581, iteration: 136637
loss: 0.9492185711860657,grad_norm: 0.9999992068158683, iteration: 136638
loss: 0.9731073379516602,grad_norm: 0.9999989858843068, iteration: 136639
loss: 0.9702619314193726,grad_norm: 0.9999990560578476, iteration: 136640
loss: 1.0155824422836304,grad_norm: 0.9999998489291729, iteration: 136641
loss: 1.1365785598754883,grad_norm: 0.999998987303947, iteration: 136642
loss: 0.9889652729034424,grad_norm: 0.9999990804662331, iteration: 136643
loss: 1.0968420505523682,grad_norm: 0.9999998102864194, iteration: 136644
loss: 1.0724936723709106,grad_norm: 0.9999992408134653, iteration: 136645
loss: 0.9784967303276062,grad_norm: 0.999999174646245, iteration: 136646
loss: 1.079563856124878,grad_norm: 0.9999992382276647, iteration: 136647
loss: 0.9968590140342712,grad_norm: 0.9833640484910388, iteration: 136648
loss: 0.9885337352752686,grad_norm: 0.966999870375781, iteration: 136649
loss: 1.0051698684692383,grad_norm: 0.9999991121409846, iteration: 136650
loss: 1.1006720066070557,grad_norm: 0.9999999196577195, iteration: 136651
loss: 1.0234978199005127,grad_norm: 0.9999990264367071, iteration: 136652
loss: 1.0195826292037964,grad_norm: 0.9150714618794372, iteration: 136653
loss: 1.009431004524231,grad_norm: 0.8097077667490813, iteration: 136654
loss: 0.9736801981925964,grad_norm: 0.945205925701305, iteration: 136655
loss: 1.0399514436721802,grad_norm: 0.9999991547020776, iteration: 136656
loss: 0.9877659678459167,grad_norm: 0.9999989931016179, iteration: 136657
loss: 1.0188238620758057,grad_norm: 0.9999991081164029, iteration: 136658
loss: 0.9966605305671692,grad_norm: 0.9999990592411165, iteration: 136659
loss: 1.045291543006897,grad_norm: 0.9999992043774228, iteration: 136660
loss: 1.0033111572265625,grad_norm: 0.872461390598594, iteration: 136661
loss: 1.0083738565444946,grad_norm: 0.99999945352663, iteration: 136662
loss: 1.0170936584472656,grad_norm: 0.974475980038403, iteration: 136663
loss: 1.0217357873916626,grad_norm: 0.9790359871862124, iteration: 136664
loss: 1.007377028465271,grad_norm: 0.9999991053469398, iteration: 136665
loss: 0.9985378980636597,grad_norm: 0.9804236509738619, iteration: 136666
loss: 0.9995567798614502,grad_norm: 0.9999991256059901, iteration: 136667
loss: 0.990098774433136,grad_norm: 0.9999991022015084, iteration: 136668
loss: 0.9808377623558044,grad_norm: 0.9999990051949675, iteration: 136669
loss: 1.090429663658142,grad_norm: 0.9999992153994405, iteration: 136670
loss: 0.9807097315788269,grad_norm: 0.9273871403076889, iteration: 136671
loss: 0.9552320837974548,grad_norm: 0.952889427406312, iteration: 136672
loss: 1.0197232961654663,grad_norm: 0.9999990872101757, iteration: 136673
loss: 0.9991326928138733,grad_norm: 0.9999990555232962, iteration: 136674
loss: 1.0153826475143433,grad_norm: 0.7917921371633918, iteration: 136675
loss: 1.0004900693893433,grad_norm: 0.7548027109234716, iteration: 136676
loss: 1.022605299949646,grad_norm: 0.9999991978466835, iteration: 136677
loss: 1.0444825887680054,grad_norm: 0.9999992676206394, iteration: 136678
loss: 0.9927375316619873,grad_norm: 0.9999990627690225, iteration: 136679
loss: 1.0228177309036255,grad_norm: 0.9999992747874394, iteration: 136680
loss: 1.0358082056045532,grad_norm: 0.9999992560170581, iteration: 136681
loss: 1.0082151889801025,grad_norm: 0.9999991256546232, iteration: 136682
loss: 1.002676248550415,grad_norm: 0.9096574569003776, iteration: 136683
loss: 0.9846316576004028,grad_norm: 0.9999989943741059, iteration: 136684
loss: 0.9953641295433044,grad_norm: 0.9999990172846934, iteration: 136685
loss: 0.9670255780220032,grad_norm: 0.9999991855194962, iteration: 136686
loss: 0.9629337787628174,grad_norm: 0.9999991350447451, iteration: 136687
loss: 0.9793202877044678,grad_norm: 0.969323408436987, iteration: 136688
loss: 1.041887879371643,grad_norm: 0.9999990791538258, iteration: 136689
loss: 0.9944796562194824,grad_norm: 0.9854154837443685, iteration: 136690
loss: 1.01235830783844,grad_norm: 0.8809532390643164, iteration: 136691
loss: 1.0410807132720947,grad_norm: 0.9543539787175119, iteration: 136692
loss: 0.9984962344169617,grad_norm: 0.9999991825818676, iteration: 136693
loss: 1.0267027616500854,grad_norm: 0.971367443993127, iteration: 136694
loss: 1.007649302482605,grad_norm: 0.9999990406358613, iteration: 136695
loss: 0.9601038694381714,grad_norm: 0.9999991094512315, iteration: 136696
loss: 0.997287929058075,grad_norm: 0.99999910516591, iteration: 136697
loss: 0.9632537364959717,grad_norm: 0.9664207461683763, iteration: 136698
loss: 0.9780248403549194,grad_norm: 0.9999990478166697, iteration: 136699
loss: 0.9926642775535583,grad_norm: 0.9725640724702536, iteration: 136700
loss: 1.011695384979248,grad_norm: 0.9791965007218476, iteration: 136701
loss: 0.9977025389671326,grad_norm: 0.9966361947065806, iteration: 136702
loss: 1.0168179273605347,grad_norm: 0.8769998437597225, iteration: 136703
loss: 0.9769505262374878,grad_norm: 0.9635259533269396, iteration: 136704
loss: 0.984758198261261,grad_norm: 0.9355202051994093, iteration: 136705
loss: 1.0097461938858032,grad_norm: 0.9999991551707876, iteration: 136706
loss: 1.0012297630310059,grad_norm: 0.9428383000277315, iteration: 136707
loss: 0.9750053882598877,grad_norm: 0.873159305121135, iteration: 136708
loss: 1.0019168853759766,grad_norm: 0.9999990621193008, iteration: 136709
loss: 1.0127792358398438,grad_norm: 0.9999992238381958, iteration: 136710
loss: 1.0266493558883667,grad_norm: 0.9999994171144603, iteration: 136711
loss: 0.9807391166687012,grad_norm: 0.9081508242791603, iteration: 136712
loss: 1.023539662361145,grad_norm: 0.9999991837045653, iteration: 136713
loss: 0.9710691571235657,grad_norm: 0.9999992074874028, iteration: 136714
loss: 1.0173786878585815,grad_norm: 0.9999992052013099, iteration: 136715
loss: 1.009541392326355,grad_norm: 0.9652167202418175, iteration: 136716
loss: 0.9865884184837341,grad_norm: 0.99999928297931, iteration: 136717
loss: 1.0037751197814941,grad_norm: 0.9322861742858574, iteration: 136718
loss: 1.019613265991211,grad_norm: 0.9999991127120207, iteration: 136719
loss: 1.0218491554260254,grad_norm: 0.9999990657620988, iteration: 136720
loss: 1.010837435722351,grad_norm: 0.90652153921318, iteration: 136721
loss: 0.9945511221885681,grad_norm: 0.9999990225694113, iteration: 136722
loss: 1.0083396434783936,grad_norm: 0.999998975356174, iteration: 136723
loss: 0.9680480360984802,grad_norm: 0.9640046684844364, iteration: 136724
loss: 1.0217347145080566,grad_norm: 0.9396627596420499, iteration: 136725
loss: 1.034615159034729,grad_norm: 0.9999990916521492, iteration: 136726
loss: 1.0378631353378296,grad_norm: 0.9999990311485236, iteration: 136727
loss: 1.010912299156189,grad_norm: 0.9154905102132513, iteration: 136728
loss: 1.002294659614563,grad_norm: 0.9231068293727164, iteration: 136729
loss: 1.003461480140686,grad_norm: 0.9999991823530748, iteration: 136730
loss: 1.0296599864959717,grad_norm: 0.9999992123257399, iteration: 136731
loss: 1.0154813528060913,grad_norm: 0.9999989934384189, iteration: 136732
loss: 1.0199589729309082,grad_norm: 0.9999991451590385, iteration: 136733
loss: 0.9841744303703308,grad_norm: 0.9966339393663847, iteration: 136734
loss: 0.9956671595573425,grad_norm: 0.9651038895471038, iteration: 136735
loss: 1.0024985074996948,grad_norm: 0.9718840323698749, iteration: 136736
loss: 0.9778477549552917,grad_norm: 0.9999989927541761, iteration: 136737
loss: 0.9821977019309998,grad_norm: 0.9905567426039066, iteration: 136738
loss: 1.0097870826721191,grad_norm: 0.999999138142147, iteration: 136739
loss: 1.0085159540176392,grad_norm: 0.9402423764570979, iteration: 136740
loss: 0.9902751445770264,grad_norm: 0.9999990083781439, iteration: 136741
loss: 1.0183156728744507,grad_norm: 0.9999991594419778, iteration: 136742
loss: 1.0235453844070435,grad_norm: 0.8489045325863984, iteration: 136743
loss: 0.9748453497886658,grad_norm: 0.9999990836714698, iteration: 136744
loss: 0.9541330933570862,grad_norm: 0.9999990600121419, iteration: 136745
loss: 0.9989304542541504,grad_norm: 0.9999993638899911, iteration: 136746
loss: 0.9907568097114563,grad_norm: 0.9999991684778147, iteration: 136747
loss: 0.9977685809135437,grad_norm: 0.9823808941322462, iteration: 136748
loss: 1.0057905912399292,grad_norm: 0.9999990463179185, iteration: 136749
loss: 0.9909063577651978,grad_norm: 0.9008282462056842, iteration: 136750
loss: 1.011204481124878,grad_norm: 0.9999992959153626, iteration: 136751
loss: 0.9846864342689514,grad_norm: 0.9538064306049263, iteration: 136752
loss: 0.9976673722267151,grad_norm: 0.9999992796169113, iteration: 136753
loss: 0.9825959801673889,grad_norm: 0.9999990138183856, iteration: 136754
loss: 0.9977362751960754,grad_norm: 0.9999992113595837, iteration: 136755
loss: 0.983040452003479,grad_norm: 0.9986498137408423, iteration: 136756
loss: 0.9765910506248474,grad_norm: 0.9915193464686842, iteration: 136757
loss: 1.0040723085403442,grad_norm: 0.9999990139776413, iteration: 136758
loss: 0.9616242051124573,grad_norm: 0.9999990331524753, iteration: 136759
loss: 0.9990930557250977,grad_norm: 0.9604697323991196, iteration: 136760
loss: 1.0206815004348755,grad_norm: 0.9823903732767262, iteration: 136761
loss: 0.9929184317588806,grad_norm: 0.9995082621808736, iteration: 136762
loss: 1.0038940906524658,grad_norm: 0.9109068419092191, iteration: 136763
loss: 0.9860666394233704,grad_norm: 0.9999992186369392, iteration: 136764
loss: 1.0216200351715088,grad_norm: 0.9999990862328819, iteration: 136765
loss: 1.0495293140411377,grad_norm: 0.9867028828484093, iteration: 136766
loss: 0.9796313047409058,grad_norm: 0.9999998539585055, iteration: 136767
loss: 0.9669899344444275,grad_norm: 0.9999991036456766, iteration: 136768
loss: 0.9987444877624512,grad_norm: 0.9852309183835073, iteration: 136769
loss: 0.9886250495910645,grad_norm: 0.9693977777837314, iteration: 136770
loss: 1.0109506845474243,grad_norm: 0.9999991979696223, iteration: 136771
loss: 0.9882057309150696,grad_norm: 0.999999070910874, iteration: 136772
loss: 1.098705768585205,grad_norm: 0.9999990413363, iteration: 136773
loss: 1.024671196937561,grad_norm: 0.999999161588349, iteration: 136774
loss: 1.0156539678573608,grad_norm: 0.9999991755129033, iteration: 136775
loss: 0.9449740648269653,grad_norm: 0.8026684316435496, iteration: 136776
loss: 0.9834769368171692,grad_norm: 0.9044004564334672, iteration: 136777
loss: 0.9638512134552002,grad_norm: 0.9999992063169021, iteration: 136778
loss: 1.0091584920883179,grad_norm: 0.9999991192566211, iteration: 136779
loss: 1.0009766817092896,grad_norm: 0.9995679399309827, iteration: 136780
loss: 0.9702350497245789,grad_norm: 0.9999991048705924, iteration: 136781
loss: 0.9965320229530334,grad_norm: 0.9999991273440823, iteration: 136782
loss: 0.9955577850341797,grad_norm: 0.999999150336406, iteration: 136783
loss: 0.997958779335022,grad_norm: 0.8556708443130407, iteration: 136784
loss: 0.9901279211044312,grad_norm: 0.9999991237562661, iteration: 136785
loss: 0.9954172372817993,grad_norm: 0.9999991775774867, iteration: 136786
loss: 0.979529857635498,grad_norm: 0.8634075521162515, iteration: 136787
loss: 0.962700366973877,grad_norm: 0.9588990881581619, iteration: 136788
loss: 0.9769881963729858,grad_norm: 0.9351851868504129, iteration: 136789
loss: 1.0163100957870483,grad_norm: 0.9999992058500542, iteration: 136790
loss: 1.0657232999801636,grad_norm: 0.9999991247371574, iteration: 136791
loss: 0.9654513597488403,grad_norm: 0.999999067268322, iteration: 136792
loss: 1.0252666473388672,grad_norm: 0.9999997926483404, iteration: 136793
loss: 0.9422603845596313,grad_norm: 0.9999991689826733, iteration: 136794
loss: 1.026718020439148,grad_norm: 0.9999997379049175, iteration: 136795
loss: 0.985884964466095,grad_norm: 0.9793847948421878, iteration: 136796
loss: 0.9689631462097168,grad_norm: 0.8886721844538096, iteration: 136797
loss: 1.028320550918579,grad_norm: 0.9927984353577859, iteration: 136798
loss: 0.9491376876831055,grad_norm: 0.9999991124536061, iteration: 136799
loss: 0.9725680351257324,grad_norm: 0.9999990607226283, iteration: 136800
loss: 1.027626633644104,grad_norm: 0.9999992158156311, iteration: 136801
loss: 1.034575343132019,grad_norm: 0.9695416793805791, iteration: 136802
loss: 1.0163486003875732,grad_norm: 0.9482591754509127, iteration: 136803
loss: 0.9732962846755981,grad_norm: 0.9983698306340976, iteration: 136804
loss: 1.0114877223968506,grad_norm: 0.9999993045908464, iteration: 136805
loss: 0.9873904585838318,grad_norm: 0.9611338283444738, iteration: 136806
loss: 1.0251481533050537,grad_norm: 0.754683440574232, iteration: 136807
loss: 1.0615342855453491,grad_norm: 0.9999991973005825, iteration: 136808
loss: 1.035338044166565,grad_norm: 0.9051124513980536, iteration: 136809
loss: 0.983277440071106,grad_norm: 0.9999989401232718, iteration: 136810
loss: 1.0019105672836304,grad_norm: 0.9999990518633902, iteration: 136811
loss: 0.9892901182174683,grad_norm: 0.9999992116192555, iteration: 136812
loss: 1.0304359197616577,grad_norm: 0.9999991768299796, iteration: 136813
loss: 0.9465543627738953,grad_norm: 0.9999990904894904, iteration: 136814
loss: 1.003574252128601,grad_norm: 0.9999991977285733, iteration: 136815
loss: 1.0688272714614868,grad_norm: 0.9999993787361563, iteration: 136816
loss: 0.9599517583847046,grad_norm: 0.9172599975349541, iteration: 136817
loss: 1.0108132362365723,grad_norm: 0.9999994639725893, iteration: 136818
loss: 1.0442750453948975,grad_norm: 0.9999990810067673, iteration: 136819
loss: 0.9940008521080017,grad_norm: 0.9962053011366115, iteration: 136820
loss: 0.9758009910583496,grad_norm: 0.9999991007555463, iteration: 136821
loss: 0.9898547530174255,grad_norm: 0.9414801534574688, iteration: 136822
loss: 0.9726710915565491,grad_norm: 0.9999990542801679, iteration: 136823
loss: 0.9907865524291992,grad_norm: 0.9999991866127549, iteration: 136824
loss: 1.0035383701324463,grad_norm: 0.9999991070566916, iteration: 136825
loss: 0.985418975353241,grad_norm: 0.9999990065830138, iteration: 136826
loss: 1.0011988878250122,grad_norm: 0.9493623280100328, iteration: 136827
loss: 0.9982572793960571,grad_norm: 0.9174293305389633, iteration: 136828
loss: 1.0237979888916016,grad_norm: 0.9584290586457578, iteration: 136829
loss: 0.97919100522995,grad_norm: 0.9999991039095065, iteration: 136830
loss: 1.0227432250976562,grad_norm: 0.9999992032440623, iteration: 136831
loss: 0.9887933135032654,grad_norm: 0.9912888073904866, iteration: 136832
loss: 0.9597914814949036,grad_norm: 0.999999221110074, iteration: 136833
loss: 1.0364335775375366,grad_norm: 0.9999991975041276, iteration: 136834
loss: 1.0070220232009888,grad_norm: 0.9999991281616758, iteration: 136835
loss: 0.9763259887695312,grad_norm: 0.9999990295681888, iteration: 136836
loss: 1.0240792036056519,grad_norm: 0.920585278475656, iteration: 136837
loss: 0.9763784408569336,grad_norm: 0.9467932617791076, iteration: 136838
loss: 1.1442818641662598,grad_norm: 0.9999996942276504, iteration: 136839
loss: 0.9953896999359131,grad_norm: 0.9999990451278337, iteration: 136840
loss: 0.9672499299049377,grad_norm: 0.9698511758348968, iteration: 136841
loss: 1.0531166791915894,grad_norm: 0.9999993201630071, iteration: 136842
loss: 1.0051875114440918,grad_norm: 0.863116800479921, iteration: 136843
loss: 0.9745002388954163,grad_norm: 0.7854493737693391, iteration: 136844
loss: 0.9924798607826233,grad_norm: 0.999999149928098, iteration: 136845
loss: 1.0368326902389526,grad_norm: 0.9999990299694055, iteration: 136846
loss: 1.022674560546875,grad_norm: 0.9999990390665154, iteration: 136847
loss: 0.9863755702972412,grad_norm: 0.9237032538686992, iteration: 136848
loss: 1.0384830236434937,grad_norm: 0.9229643162972195, iteration: 136849
loss: 1.0237714052200317,grad_norm: 0.9999992728719408, iteration: 136850
loss: 0.9567780494689941,grad_norm: 0.965522364988072, iteration: 136851
loss: 1.0133264064788818,grad_norm: 0.9999991537935609, iteration: 136852
loss: 0.9792250990867615,grad_norm: 0.8815106375040339, iteration: 136853
loss: 0.9881393313407898,grad_norm: 0.9999991735405611, iteration: 136854
loss: 0.9872400760650635,grad_norm: 0.9975031127623281, iteration: 136855
loss: 0.9804761409759521,grad_norm: 0.9999993298693325, iteration: 136856
loss: 0.9104107022285461,grad_norm: 0.9999991970665533, iteration: 136857
loss: 1.04727303981781,grad_norm: 0.9458181617855087, iteration: 136858
loss: 1.0325223207473755,grad_norm: 0.8811594728134291, iteration: 136859
loss: 0.9881083369255066,grad_norm: 0.7885429474066863, iteration: 136860
loss: 1.0003646612167358,grad_norm: 0.9999990526485419, iteration: 136861
loss: 1.0291433334350586,grad_norm: 0.8815472628156058, iteration: 136862
loss: 1.0135395526885986,grad_norm: 0.999999265491507, iteration: 136863
loss: 1.0128886699676514,grad_norm: 0.943565557290739, iteration: 136864
loss: 1.0405962467193604,grad_norm: 0.9999992636413649, iteration: 136865
loss: 0.9999213814735413,grad_norm: 0.9999996524096495, iteration: 136866
loss: 0.992343544960022,grad_norm: 0.9999992370356973, iteration: 136867
loss: 0.9701324701309204,grad_norm: 0.9577540647951658, iteration: 136868
loss: 1.0195761919021606,grad_norm: 0.9119518749240012, iteration: 136869
loss: 0.9882932305335999,grad_norm: 0.8420791722482196, iteration: 136870
loss: 1.0592331886291504,grad_norm: 0.9999992688005703, iteration: 136871
loss: 1.0048836469650269,grad_norm: 0.8981707134510151, iteration: 136872
loss: 1.0335032939910889,grad_norm: 0.9999991994385846, iteration: 136873
loss: 1.039718508720398,grad_norm: 0.9999990417041729, iteration: 136874
loss: 0.9965209364891052,grad_norm: 0.9999992664878937, iteration: 136875
loss: 0.9955602288246155,grad_norm: 0.9999990836779679, iteration: 136876
loss: 0.9888648986816406,grad_norm: 0.9163267434281469, iteration: 136877
loss: 1.0434770584106445,grad_norm: 0.9999991718244349, iteration: 136878
loss: 0.9698702096939087,grad_norm: 0.9999991027170795, iteration: 136879
loss: 0.9857028126716614,grad_norm: 0.9999995389419938, iteration: 136880
loss: 1.009103775024414,grad_norm: 0.9999992313934563, iteration: 136881
loss: 0.9734017848968506,grad_norm: 0.9536561558640322, iteration: 136882
loss: 1.0007144212722778,grad_norm: 0.9785428799562275, iteration: 136883
loss: 1.0050712823867798,grad_norm: 0.855045877972229, iteration: 136884
loss: 0.9947845339775085,grad_norm: 0.9999989813508099, iteration: 136885
loss: 1.001245379447937,grad_norm: 0.9582692579721643, iteration: 136886
loss: 1.0107167959213257,grad_norm: 0.9925315040884818, iteration: 136887
loss: 0.9750530123710632,grad_norm: 0.9999990982063913, iteration: 136888
loss: 1.004379391670227,grad_norm: 0.9943084909051925, iteration: 136889
loss: 1.0031754970550537,grad_norm: 0.9999991916016051, iteration: 136890
loss: 0.9617999196052551,grad_norm: 0.9999992419202433, iteration: 136891
loss: 0.9955549240112305,grad_norm: 0.9999992025412607, iteration: 136892
loss: 1.0271319150924683,grad_norm: 0.9999994040881498, iteration: 136893
loss: 0.9643440842628479,grad_norm: 0.9484580685545854, iteration: 136894
loss: 1.0547962188720703,grad_norm: 0.9176339183736608, iteration: 136895
loss: 1.0068004131317139,grad_norm: 0.9642057895404731, iteration: 136896
loss: 0.9702613949775696,grad_norm: 0.9999991515287209, iteration: 136897
loss: 1.0089412927627563,grad_norm: 0.9711361647739561, iteration: 136898
loss: 1.0037102699279785,grad_norm: 0.9284235740538443, iteration: 136899
loss: 1.015286922454834,grad_norm: 0.9210225239997019, iteration: 136900
loss: 0.9612160921096802,grad_norm: 0.9249829061620414, iteration: 136901
loss: 1.1438484191894531,grad_norm: 0.9999998215520263, iteration: 136902
loss: 1.0063056945800781,grad_norm: 0.9979054397270115, iteration: 136903
loss: 1.0195238590240479,grad_norm: 0.9047337118555051, iteration: 136904
loss: 1.0306320190429688,grad_norm: 0.9999990290348072, iteration: 136905
loss: 0.974644660949707,grad_norm: 0.9999992056643322, iteration: 136906
loss: 0.9691815376281738,grad_norm: 0.943072205381369, iteration: 136907
loss: 1.0127019882202148,grad_norm: 0.9999991414124435, iteration: 136908
loss: 1.026943564414978,grad_norm: 0.9181736005979534, iteration: 136909
loss: 1.0143519639968872,grad_norm: 0.9757141041143257, iteration: 136910
loss: 0.9952749609947205,grad_norm: 0.9567830078145148, iteration: 136911
loss: 0.9989125728607178,grad_norm: 0.9999989910769481, iteration: 136912
loss: 1.0605641603469849,grad_norm: 0.9999991451630007, iteration: 136913
loss: 0.9975256323814392,grad_norm: 0.9174754354344599, iteration: 136914
loss: 0.9769085645675659,grad_norm: 0.8772119667116022, iteration: 136915
loss: 1.0014289617538452,grad_norm: 0.9869998543072214, iteration: 136916
loss: 1.0035688877105713,grad_norm: 0.9999990186718923, iteration: 136917
loss: 1.0076500177383423,grad_norm: 0.9999992150548863, iteration: 136918
loss: 1.0154167413711548,grad_norm: 0.9999993990413277, iteration: 136919
loss: 0.977786660194397,grad_norm: 0.9883439132681315, iteration: 136920
loss: 0.9993491768836975,grad_norm: 0.9999990382616488, iteration: 136921
loss: 0.9815732836723328,grad_norm: 0.9148728699245718, iteration: 136922
loss: 0.9778987765312195,grad_norm: 0.8268105738030238, iteration: 136923
loss: 0.9968101382255554,grad_norm: 0.8502769733876265, iteration: 136924
loss: 1.0238605737686157,grad_norm: 0.9999992148494897, iteration: 136925
loss: 1.0195872783660889,grad_norm: 0.9032878438569751, iteration: 136926
loss: 1.00174081325531,grad_norm: 0.9753110056397093, iteration: 136927
loss: 0.9730470180511475,grad_norm: 0.9999992780999747, iteration: 136928
loss: 0.9936234354972839,grad_norm: 0.9490371257213205, iteration: 136929
loss: 1.019081950187683,grad_norm: 0.8561236979098469, iteration: 136930
loss: 0.9960218071937561,grad_norm: 0.9683071623365143, iteration: 136931
loss: 0.99581378698349,grad_norm: 0.9999991730042168, iteration: 136932
loss: 0.9928033351898193,grad_norm: 0.9786597486325624, iteration: 136933
loss: 1.0424400568008423,grad_norm: 0.9999991964021877, iteration: 136934
loss: 1.036283016204834,grad_norm: 0.9560133336304654, iteration: 136935
loss: 0.967792272567749,grad_norm: 0.9999993003881983, iteration: 136936
loss: 1.0158264636993408,grad_norm: 0.9999990753216522, iteration: 136937
loss: 0.9824628233909607,grad_norm: 0.9999991822941352, iteration: 136938
loss: 1.0048260688781738,grad_norm: 0.9999990245201604, iteration: 136939
loss: 1.0245928764343262,grad_norm: 0.9999990875033179, iteration: 136940
loss: 1.0748438835144043,grad_norm: 0.9999996869646681, iteration: 136941
loss: 0.9867562651634216,grad_norm: 0.9729374174529309, iteration: 136942
loss: 1.0097696781158447,grad_norm: 0.9213033993206876, iteration: 136943
loss: 0.9704948663711548,grad_norm: 0.9999990556099511, iteration: 136944
loss: 0.9695723056793213,grad_norm: 0.9742188088309806, iteration: 136945
loss: 1.0439112186431885,grad_norm: 0.9882743038629255, iteration: 136946
loss: 0.9954305291175842,grad_norm: 0.9856451901043997, iteration: 136947
loss: 1.009511947631836,grad_norm: 0.999999212911929, iteration: 136948
loss: 0.9980646967887878,grad_norm: 0.9999991288493348, iteration: 136949
loss: 0.9423590302467346,grad_norm: 0.9999991734659095, iteration: 136950
loss: 0.988810658454895,grad_norm: 0.9999989895975567, iteration: 136951
loss: 0.9899936318397522,grad_norm: 0.9999991132522861, iteration: 136952
loss: 1.034279704093933,grad_norm: 0.9999990378916366, iteration: 136953
loss: 1.057016372680664,grad_norm: 0.9999996756524164, iteration: 136954
loss: 1.0304515361785889,grad_norm: 0.9999991133154632, iteration: 136955
loss: 1.008175015449524,grad_norm: 0.8896766040528138, iteration: 136956
loss: 0.9952450394630432,grad_norm: 0.9999990627751131, iteration: 136957
loss: 1.0274405479431152,grad_norm: 0.9999990705393816, iteration: 136958
loss: 0.9955129027366638,grad_norm: 0.9999992373583106, iteration: 136959
loss: 1.0156553983688354,grad_norm: 0.9999992319966449, iteration: 136960
loss: 1.0024793148040771,grad_norm: 0.9999990232427267, iteration: 136961
loss: 0.9918631315231323,grad_norm: 0.9999992769599716, iteration: 136962
loss: 0.9638758897781372,grad_norm: 0.9999993450945123, iteration: 136963
loss: 0.9770383834838867,grad_norm: 0.9999991859755099, iteration: 136964
loss: 1.0099817514419556,grad_norm: 0.9999991467723799, iteration: 136965
loss: 0.9778730869293213,grad_norm: 0.9508305462837808, iteration: 136966
loss: 1.0641909837722778,grad_norm: 0.9999997501593129, iteration: 136967
loss: 1.0047801733016968,grad_norm: 0.8912876455255561, iteration: 136968
loss: 0.9886517524719238,grad_norm: 0.98222748469702, iteration: 136969
loss: 1.0295318365097046,grad_norm: 0.9999991778135691, iteration: 136970
loss: 1.0015538930892944,grad_norm: 0.9999992267296836, iteration: 136971
loss: 0.9958161115646362,grad_norm: 0.9131853564288176, iteration: 136972
loss: 1.0119211673736572,grad_norm: 0.931792810934906, iteration: 136973
loss: 0.9859510064125061,grad_norm: 0.9305244213163623, iteration: 136974
loss: 0.9925466179847717,grad_norm: 0.9999992512144298, iteration: 136975
loss: 0.9859734177589417,grad_norm: 0.9865805236556419, iteration: 136976
loss: 1.0067112445831299,grad_norm: 0.930575165114036, iteration: 136977
loss: 0.9560284614562988,grad_norm: 0.9449530983117752, iteration: 136978
loss: 1.0036720037460327,grad_norm: 0.9634127601584195, iteration: 136979
loss: 0.997636616230011,grad_norm: 0.8334524668712515, iteration: 136980
loss: 0.9756742715835571,grad_norm: 0.926573694703858, iteration: 136981
loss: 1.0008304119110107,grad_norm: 0.9293381568562045, iteration: 136982
loss: 1.0094860792160034,grad_norm: 0.9592104549544833, iteration: 136983
loss: 0.9729887843132019,grad_norm: 0.9999989676177864, iteration: 136984
loss: 1.0032485723495483,grad_norm: 0.956738316405786, iteration: 136985
loss: 1.0053480863571167,grad_norm: 0.9331238456563653, iteration: 136986
loss: 1.0069490671157837,grad_norm: 0.9999990560486584, iteration: 136987
loss: 0.9778105020523071,grad_norm: 0.999999072051193, iteration: 136988
loss: 0.985206127166748,grad_norm: 0.9999991930191161, iteration: 136989
loss: 0.9851117134094238,grad_norm: 0.9999990501051825, iteration: 136990
loss: 0.9617837071418762,grad_norm: 0.9999991313349379, iteration: 136991
loss: 1.053764820098877,grad_norm: 0.999999232665751, iteration: 136992
loss: 1.0027647018432617,grad_norm: 0.8625539973130792, iteration: 136993
loss: 1.0095492601394653,grad_norm: 0.99999945532148, iteration: 136994
loss: 0.9823124408721924,grad_norm: 0.9969565383299546, iteration: 136995
loss: 0.9911936521530151,grad_norm: 0.9999991881426242, iteration: 136996
loss: 1.0157732963562012,grad_norm: 0.9999990672974074, iteration: 136997
loss: 1.0130691528320312,grad_norm: 0.9999990915426737, iteration: 136998
loss: 1.01289963722229,grad_norm: 0.958589932625837, iteration: 136999
loss: 0.9934631586074829,grad_norm: 0.9999991375232185, iteration: 137000
loss: 1.0086164474487305,grad_norm: 0.9999989597044985, iteration: 137001
loss: 1.0068999528884888,grad_norm: 0.9403344106236241, iteration: 137002
loss: 0.9966844320297241,grad_norm: 0.9006947398355436, iteration: 137003
loss: 1.0253456830978394,grad_norm: 0.9999992948617237, iteration: 137004
loss: 1.0682547092437744,grad_norm: 0.9999990395402518, iteration: 137005
loss: 0.9811124801635742,grad_norm: 0.9736086152259176, iteration: 137006
loss: 0.9830648899078369,grad_norm: 0.9151787520994397, iteration: 137007
loss: 0.9886190891265869,grad_norm: 0.9999990022596751, iteration: 137008
loss: 1.033568024635315,grad_norm: 0.9075138232859989, iteration: 137009
loss: 0.9951421618461609,grad_norm: 0.999999285211967, iteration: 137010
loss: 0.9900060892105103,grad_norm: 0.9999991915318253, iteration: 137011
loss: 1.1323763132095337,grad_norm: 0.9999994751823729, iteration: 137012
loss: 0.9999906420707703,grad_norm: 0.999999151667488, iteration: 137013
loss: 1.020450472831726,grad_norm: 0.9999990923340882, iteration: 137014
loss: 1.0403317213058472,grad_norm: 0.9999995602733189, iteration: 137015
loss: 1.0120160579681396,grad_norm: 0.9101181302048021, iteration: 137016
loss: 0.9749603271484375,grad_norm: 0.8532051783263506, iteration: 137017
loss: 1.0104573965072632,grad_norm: 0.954973984943965, iteration: 137018
loss: 1.0050835609436035,grad_norm: 0.9057506193106767, iteration: 137019
loss: 0.9718007445335388,grad_norm: 0.9999991919419524, iteration: 137020
loss: 1.0091044902801514,grad_norm: 0.9999991742062879, iteration: 137021
loss: 1.0013139247894287,grad_norm: 0.9999993718096585, iteration: 137022
loss: 1.0065635442733765,grad_norm: 0.9999992242666912, iteration: 137023
loss: 1.0355838537216187,grad_norm: 0.9999996566757623, iteration: 137024
loss: 0.9822258353233337,grad_norm: 0.999999208354034, iteration: 137025
loss: 0.9918351173400879,grad_norm: 0.9999990613481373, iteration: 137026
loss: 0.9863637089729309,grad_norm: 0.9591349288092971, iteration: 137027
loss: 1.0191186666488647,grad_norm: 0.8262611553649504, iteration: 137028
loss: 1.0082237720489502,grad_norm: 0.9999992386739738, iteration: 137029
loss: 0.9810988903045654,grad_norm: 0.9999990835153776, iteration: 137030
loss: 0.9947152137756348,grad_norm: 0.9999988935261097, iteration: 137031
loss: 0.9914325475692749,grad_norm: 0.9999998279595028, iteration: 137032
loss: 0.9829590320587158,grad_norm: 0.9999990942877965, iteration: 137033
loss: 1.0046181678771973,grad_norm: 0.9999990848878964, iteration: 137034
loss: 1.054823637008667,grad_norm: 0.9999993167254283, iteration: 137035
loss: 0.9894918203353882,grad_norm: 0.999999303646688, iteration: 137036
loss: 1.0045109987258911,grad_norm: 0.9268279040329199, iteration: 137037
loss: 1.0331289768218994,grad_norm: 0.9999992462505098, iteration: 137038
loss: 0.9899773001670837,grad_norm: 0.9150473140342108, iteration: 137039
loss: 0.9643190503120422,grad_norm: 0.9999992329048809, iteration: 137040
loss: 1.02292001247406,grad_norm: 0.7913238565858914, iteration: 137041
loss: 1.0018105506896973,grad_norm: 0.9081057248715407, iteration: 137042
loss: 0.9941171407699585,grad_norm: 0.8686374982848284, iteration: 137043
loss: 1.0521008968353271,grad_norm: 0.9999992732724639, iteration: 137044
loss: 1.054921269416809,grad_norm: 0.9999994882832016, iteration: 137045
loss: 0.9849742650985718,grad_norm: 0.95725272357815, iteration: 137046
loss: 1.0150947570800781,grad_norm: 0.999999163628968, iteration: 137047
loss: 1.1164300441741943,grad_norm: 0.9999998004894943, iteration: 137048
loss: 0.9812014102935791,grad_norm: 0.8918947199115338, iteration: 137049
loss: 1.0157909393310547,grad_norm: 0.9952836581758238, iteration: 137050
loss: 0.9463998079299927,grad_norm: 0.9999991203997733, iteration: 137051
loss: 1.0126326084136963,grad_norm: 0.8406062098926989, iteration: 137052
loss: 1.0188878774642944,grad_norm: 0.9999991333021068, iteration: 137053
loss: 1.0094208717346191,grad_norm: 0.9999992934291271, iteration: 137054
loss: 0.9623123407363892,grad_norm: 0.9766178725606667, iteration: 137055
loss: 1.0035860538482666,grad_norm: 0.9999992077340846, iteration: 137056
loss: 0.9991698861122131,grad_norm: 0.9999990885810267, iteration: 137057
loss: 1.0395864248275757,grad_norm: 0.9999995377909359, iteration: 137058
loss: 1.0164576768875122,grad_norm: 0.8896223066219865, iteration: 137059
loss: 0.987956166267395,grad_norm: 0.8328162419311365, iteration: 137060
loss: 1.062717318534851,grad_norm: 0.9773791225876086, iteration: 137061
loss: 1.0392894744873047,grad_norm: 0.9999991521914612, iteration: 137062
loss: 0.9803818464279175,grad_norm: 0.9999989753604931, iteration: 137063
loss: 0.9866580963134766,grad_norm: 0.9999990727363309, iteration: 137064
loss: 0.9733574390411377,grad_norm: 0.9999991877806418, iteration: 137065
loss: 1.0079978704452515,grad_norm: 0.9999992201553223, iteration: 137066
loss: 1.0017244815826416,grad_norm: 0.9999991612539862, iteration: 137067
loss: 1.040794849395752,grad_norm: 0.9999992098577387, iteration: 137068
loss: 0.9879310727119446,grad_norm: 0.9999990641446108, iteration: 137069
loss: 0.9864997267723083,grad_norm: 0.9999992276594039, iteration: 137070
loss: 1.0228453874588013,grad_norm: 0.9999997412710631, iteration: 137071
loss: 0.9932435154914856,grad_norm: 0.9999991061035813, iteration: 137072
loss: 0.9745253324508667,grad_norm: 0.9999994362554435, iteration: 137073
loss: 0.9996139407157898,grad_norm: 0.9999991033611004, iteration: 137074
loss: 0.9761433601379395,grad_norm: 0.8595835464273547, iteration: 137075
loss: 1.0049439668655396,grad_norm: 0.999999166814484, iteration: 137076
loss: 1.0300904512405396,grad_norm: 0.9999995102016206, iteration: 137077
loss: 0.9896736741065979,grad_norm: 0.9999989561575114, iteration: 137078
loss: 1.0110515356063843,grad_norm: 0.9999992254793528, iteration: 137079
loss: 1.0283015966415405,grad_norm: 0.9999991885041877, iteration: 137080
loss: 0.9883356690406799,grad_norm: 0.908016057753783, iteration: 137081
loss: 1.0042647123336792,grad_norm: 0.999999213563974, iteration: 137082
loss: 1.0287935733795166,grad_norm: 0.999998865772256, iteration: 137083
loss: 1.058370590209961,grad_norm: 0.9999997515681034, iteration: 137084
loss: 0.9847608804702759,grad_norm: 0.9999990689063147, iteration: 137085
loss: 1.0575593709945679,grad_norm: 0.9999992743644618, iteration: 137086
loss: 1.0279461145401,grad_norm: 0.999999025081828, iteration: 137087
loss: 1.0011117458343506,grad_norm: 0.9999993431304487, iteration: 137088
loss: 0.9855518937110901,grad_norm: 0.8900665890067743, iteration: 137089
loss: 0.9927824139595032,grad_norm: 0.999999266135831, iteration: 137090
loss: 1.0061275959014893,grad_norm: 0.7944048297567806, iteration: 137091
loss: 0.9788393378257751,grad_norm: 0.9969939746026362, iteration: 137092
loss: 0.9714148044586182,grad_norm: 0.9755228790386756, iteration: 137093
loss: 1.0047792196273804,grad_norm: 0.9999992646894019, iteration: 137094
loss: 1.0044505596160889,grad_norm: 0.9999991214965193, iteration: 137095
loss: 0.9761403203010559,grad_norm: 0.9999990006509464, iteration: 137096
loss: 1.003706693649292,grad_norm: 0.9999992670142777, iteration: 137097
loss: 1.0535098314285278,grad_norm: 0.9999993915367178, iteration: 137098
loss: 0.965831458568573,grad_norm: 0.9725326219043493, iteration: 137099
loss: 0.9954585433006287,grad_norm: 0.9490806160390779, iteration: 137100
loss: 1.0244121551513672,grad_norm: 0.999999345816882, iteration: 137101
loss: 1.0058306455612183,grad_norm: 0.8493914257244479, iteration: 137102
loss: 0.981243371963501,grad_norm: 0.9569057788391419, iteration: 137103
loss: 1.0130025148391724,grad_norm: 0.9999991561960495, iteration: 137104
loss: 1.0263174772262573,grad_norm: 0.999999017769922, iteration: 137105
loss: 1.0105781555175781,grad_norm: 0.9450103698190369, iteration: 137106
loss: 0.9752506017684937,grad_norm: 0.8904878484022061, iteration: 137107
loss: 1.0166518688201904,grad_norm: 0.9999990934474343, iteration: 137108
loss: 0.986994206905365,grad_norm: 0.9001905045937019, iteration: 137109
loss: 1.0086594820022583,grad_norm: 0.9999991471211064, iteration: 137110
loss: 0.9925529956817627,grad_norm: 0.9999991174104822, iteration: 137111
loss: 1.0021400451660156,grad_norm: 0.9999990619423236, iteration: 137112
loss: 1.0591236352920532,grad_norm: 0.9999989945668402, iteration: 137113
loss: 1.0206035375595093,grad_norm: 0.9544263991103449, iteration: 137114
loss: 1.0454200506210327,grad_norm: 0.9999992342458538, iteration: 137115
loss: 1.0195451974868774,grad_norm: 0.9999993094395797, iteration: 137116
loss: 0.9876253008842468,grad_norm: 0.9494694694421839, iteration: 137117
loss: 1.0467606782913208,grad_norm: 0.9999992343136677, iteration: 137118
loss: 0.9890079498291016,grad_norm: 0.9999989209315983, iteration: 137119
loss: 1.0018354654312134,grad_norm: 0.9190024354411254, iteration: 137120
loss: 1.0103590488433838,grad_norm: 0.9743488730574061, iteration: 137121
loss: 1.0007244348526,grad_norm: 0.9999992567834365, iteration: 137122
loss: 1.0324370861053467,grad_norm: 0.9999993214116257, iteration: 137123
loss: 1.0254037380218506,grad_norm: 0.9180392282550692, iteration: 137124
loss: 1.0012410879135132,grad_norm: 0.9624729959679817, iteration: 137125
loss: 0.9976747035980225,grad_norm: 0.9999991202777714, iteration: 137126
loss: 1.0040991306304932,grad_norm: 0.8744665274770759, iteration: 137127
loss: 0.999058723449707,grad_norm: 0.9999992237002705, iteration: 137128
loss: 1.0669190883636475,grad_norm: 0.9999994664743015, iteration: 137129
loss: 1.0184082984924316,grad_norm: 0.9999990849718259, iteration: 137130
loss: 1.0160620212554932,grad_norm: 0.999999075841711, iteration: 137131
loss: 1.0682141780853271,grad_norm: 0.9999994798310714, iteration: 137132
loss: 1.0188840627670288,grad_norm: 0.9567121989908791, iteration: 137133
loss: 1.0695819854736328,grad_norm: 0.9641306128979549, iteration: 137134
loss: 0.9700648784637451,grad_norm: 0.9545424597147244, iteration: 137135
loss: 1.0111416578292847,grad_norm: 0.9999990822456245, iteration: 137136
loss: 1.0144405364990234,grad_norm: 0.9545258552570498, iteration: 137137
loss: 1.0218455791473389,grad_norm: 0.9999991526416567, iteration: 137138
loss: 0.9741080403327942,grad_norm: 0.9999990344625206, iteration: 137139
loss: 1.0009357929229736,grad_norm: 0.9833110936897451, iteration: 137140
loss: 0.9937143325805664,grad_norm: 0.9390633484417437, iteration: 137141
loss: 0.9846317768096924,grad_norm: 0.8609184412133669, iteration: 137142
loss: 0.966679036617279,grad_norm: 0.9999990503794869, iteration: 137143
loss: 0.9998968243598938,grad_norm: 0.9999994409808365, iteration: 137144
loss: 1.0043829679489136,grad_norm: 0.9676091416407681, iteration: 137145
loss: 0.9958856701850891,grad_norm: 0.9999993502109156, iteration: 137146
loss: 0.990771472454071,grad_norm: 0.9949717900118786, iteration: 137147
loss: 0.9848839640617371,grad_norm: 0.9999991617777516, iteration: 137148
loss: 1.0000410079956055,grad_norm: 0.9831740764924448, iteration: 137149
loss: 1.0168871879577637,grad_norm: 0.9999991321905211, iteration: 137150
loss: 1.0027014017105103,grad_norm: 0.8744531146076091, iteration: 137151
loss: 1.0208591222763062,grad_norm: 0.9999995510255718, iteration: 137152
loss: 0.9914281368255615,grad_norm: 0.9303534311377348, iteration: 137153
loss: 0.9408078193664551,grad_norm: 0.9259646204286563, iteration: 137154
loss: 0.9941529035568237,grad_norm: 0.9999990974246846, iteration: 137155
loss: 0.9829044938087463,grad_norm: 0.9124986264136672, iteration: 137156
loss: 1.008730411529541,grad_norm: 0.8821030703304785, iteration: 137157
loss: 1.003535509109497,grad_norm: 0.9999991267352256, iteration: 137158
loss: 1.0115644931793213,grad_norm: 0.9999994487913558, iteration: 137159
loss: 1.025536298751831,grad_norm: 0.9999991208073584, iteration: 137160
loss: 1.0106432437896729,grad_norm: 0.9977096321083342, iteration: 137161
loss: 0.9595433473587036,grad_norm: 0.9215455685235019, iteration: 137162
loss: 0.9972682595252991,grad_norm: 0.9999992818052353, iteration: 137163
loss: 0.9562751650810242,grad_norm: 0.9048186950705253, iteration: 137164
loss: 1.0226532220840454,grad_norm: 0.9999990187307178, iteration: 137165
loss: 1.0310431718826294,grad_norm: 0.9999992664780969, iteration: 137166
loss: 1.0202795267105103,grad_norm: 0.9056122407341575, iteration: 137167
loss: 1.0286635160446167,grad_norm: 0.8501246616920649, iteration: 137168
loss: 1.0152267217636108,grad_norm: 0.9999992231967304, iteration: 137169
loss: 1.0159518718719482,grad_norm: 0.9906290609570584, iteration: 137170
loss: 0.9931318759918213,grad_norm: 0.9999990589017239, iteration: 137171
loss: 0.9778152704238892,grad_norm: 0.8742661076601279, iteration: 137172
loss: 1.0619832277297974,grad_norm: 0.9999995109940001, iteration: 137173
loss: 0.9772603511810303,grad_norm: 0.9540884980249147, iteration: 137174
loss: 1.0014983415603638,grad_norm: 0.9999991719738883, iteration: 137175
loss: 1.0661410093307495,grad_norm: 0.9999994470495004, iteration: 137176
loss: 1.0056416988372803,grad_norm: 0.9999990982443852, iteration: 137177
loss: 1.0115529298782349,grad_norm: 0.8613914321631961, iteration: 137178
loss: 1.0222183465957642,grad_norm: 0.9999989411642952, iteration: 137179
loss: 1.0387001037597656,grad_norm: 0.9732023062312346, iteration: 137180
loss: 1.013701319694519,grad_norm: 0.9284647532799823, iteration: 137181
loss: 0.9857869148254395,grad_norm: 0.9999991845490726, iteration: 137182
loss: 0.9972032904624939,grad_norm: 0.9999991187992661, iteration: 137183
loss: 1.0365159511566162,grad_norm: 0.8799851809200039, iteration: 137184
loss: 1.0139973163604736,grad_norm: 0.8654136304200873, iteration: 137185
loss: 0.964178740978241,grad_norm: 0.999999377110955, iteration: 137186
loss: 1.0139503479003906,grad_norm: 0.9999992382778855, iteration: 137187
loss: 1.0215270519256592,grad_norm: 0.9999991635758402, iteration: 137188
loss: 1.0021164417266846,grad_norm: 0.9687807195084717, iteration: 137189
loss: 1.0102899074554443,grad_norm: 0.9656440091773973, iteration: 137190
loss: 1.0072669982910156,grad_norm: 0.9999990405658334, iteration: 137191
loss: 0.9785029292106628,grad_norm: 0.9999992348427298, iteration: 137192
loss: 1.0996936559677124,grad_norm: 0.9999995715680369, iteration: 137193
loss: 1.0016002655029297,grad_norm: 0.9999992637297903, iteration: 137194
loss: 1.0070501565933228,grad_norm: 0.999999065266036, iteration: 137195
loss: 0.9798272848129272,grad_norm: 0.9999991489892555, iteration: 137196
loss: 1.0157744884490967,grad_norm: 0.9999992185164207, iteration: 137197
loss: 1.0206965208053589,grad_norm: 0.8713058255160456, iteration: 137198
loss: 1.012202262878418,grad_norm: 0.9999991115686586, iteration: 137199
loss: 1.018875002861023,grad_norm: 0.8540494174699315, iteration: 137200
loss: 0.9885135889053345,grad_norm: 0.9999991467228158, iteration: 137201
loss: 1.0021977424621582,grad_norm: 0.9999991529205889, iteration: 137202
loss: 1.0338295698165894,grad_norm: 0.9408667381389211, iteration: 137203
loss: 0.9820486307144165,grad_norm: 0.9999993316405541, iteration: 137204
loss: 1.0050569772720337,grad_norm: 0.8845435131861347, iteration: 137205
loss: 1.0019705295562744,grad_norm: 0.9999990682210467, iteration: 137206
loss: 0.949641227722168,grad_norm: 0.9594649306610241, iteration: 137207
loss: 1.0128440856933594,grad_norm: 0.9577704828050506, iteration: 137208
loss: 0.9926927089691162,grad_norm: 0.9999991504733498, iteration: 137209
loss: 0.9561334252357483,grad_norm: 0.937250917828041, iteration: 137210
loss: 0.9992953538894653,grad_norm: 0.9724042036742042, iteration: 137211
loss: 1.018916130065918,grad_norm: 0.999999268441771, iteration: 137212
loss: 0.9792131185531616,grad_norm: 0.9999993328581661, iteration: 137213
loss: 0.9658775329589844,grad_norm: 0.9999998562371757, iteration: 137214
loss: 0.9753121733665466,grad_norm: 0.9999992795807482, iteration: 137215
loss: 1.0274356603622437,grad_norm: 0.9999991829954876, iteration: 137216
loss: 1.0364654064178467,grad_norm: 0.9999992509974832, iteration: 137217
loss: 0.997545599937439,grad_norm: 0.9999992235863869, iteration: 137218
loss: 1.010558009147644,grad_norm: 0.9999989645345723, iteration: 137219
loss: 0.995419979095459,grad_norm: 0.7985651389983824, iteration: 137220
loss: 1.0166118144989014,grad_norm: 0.994798877422292, iteration: 137221
loss: 0.9885771870613098,grad_norm: 0.9999991497228466, iteration: 137222
loss: 0.9841582179069519,grad_norm: 0.9266714018393792, iteration: 137223
loss: 1.003737449645996,grad_norm: 0.9184592124244839, iteration: 137224
loss: 0.9903228878974915,grad_norm: 0.9947584751003711, iteration: 137225
loss: 1.0905194282531738,grad_norm: 0.9999999656065289, iteration: 137226
loss: 1.010036587715149,grad_norm: 0.9999990442627017, iteration: 137227
loss: 1.0139867067337036,grad_norm: 0.999999009316314, iteration: 137228
loss: 0.983246922492981,grad_norm: 0.8833333703621308, iteration: 137229
loss: 0.9954124093055725,grad_norm: 0.9999989178876552, iteration: 137230
loss: 0.9898879528045654,grad_norm: 0.999999152347102, iteration: 137231
loss: 0.9735517501831055,grad_norm: 0.9999992588664582, iteration: 137232
loss: 1.026940941810608,grad_norm: 0.9488008550863244, iteration: 137233
loss: 1.00194251537323,grad_norm: 0.8889853508060106, iteration: 137234
loss: 1.0227521657943726,grad_norm: 0.8387797100697093, iteration: 137235
loss: 0.976585865020752,grad_norm: 0.9842000961799845, iteration: 137236
loss: 0.9936786890029907,grad_norm: 0.979646167657525, iteration: 137237
loss: 0.9905948638916016,grad_norm: 0.9205473394973472, iteration: 137238
loss: 1.0026359558105469,grad_norm: 0.88549500611692, iteration: 137239
loss: 1.0351691246032715,grad_norm: 0.9540020820228395, iteration: 137240
loss: 0.9694783687591553,grad_norm: 0.9889677640295922, iteration: 137241
loss: 0.9850155711174011,grad_norm: 0.9889753399523555, iteration: 137242
loss: 1.0000841617584229,grad_norm: 0.9403698791774855, iteration: 137243
loss: 1.145965337753296,grad_norm: 0.9999992369010489, iteration: 137244
loss: 1.0388528108596802,grad_norm: 0.9999992163491903, iteration: 137245
loss: 1.0202142000198364,grad_norm: 0.9999992164689524, iteration: 137246
loss: 1.0268808603286743,grad_norm: 0.9999991880340364, iteration: 137247
loss: 0.9513566493988037,grad_norm: 0.9105599479875637, iteration: 137248
loss: 1.0234196186065674,grad_norm: 0.9999990947641183, iteration: 137249
loss: 0.9981362819671631,grad_norm: 0.9824139059752535, iteration: 137250
loss: 1.0156365633010864,grad_norm: 0.9999989695256549, iteration: 137251
loss: 1.0013858079910278,grad_norm: 0.9999988999678795, iteration: 137252
loss: 1.0207922458648682,grad_norm: 0.9707483842304641, iteration: 137253
loss: 1.0175096988677979,grad_norm: 0.999999328140476, iteration: 137254
loss: 0.9855566024780273,grad_norm: 0.9999990037887524, iteration: 137255
loss: 1.008207082748413,grad_norm: 0.9267479187588578, iteration: 137256
loss: 1.0104479789733887,grad_norm: 0.9653680387388364, iteration: 137257
loss: 1.003754734992981,grad_norm: 0.9263613971180856, iteration: 137258
loss: 0.9772180914878845,grad_norm: 0.9952350998188232, iteration: 137259
loss: 1.0002135038375854,grad_norm: 0.9611257292648022, iteration: 137260
loss: 0.9928505420684814,grad_norm: 0.9999990838163962, iteration: 137261
loss: 1.0039880275726318,grad_norm: 0.815553590944678, iteration: 137262
loss: 1.0499722957611084,grad_norm: 0.9999991088360533, iteration: 137263
loss: 1.0111074447631836,grad_norm: 0.9999989902511628, iteration: 137264
loss: 1.043082356452942,grad_norm: 0.9999990826134808, iteration: 137265
loss: 1.0211056470870972,grad_norm: 0.9362607243859906, iteration: 137266
loss: 1.0388057231903076,grad_norm: 0.9845885959059842, iteration: 137267
loss: 1.01581871509552,grad_norm: 0.9248826280631397, iteration: 137268
loss: 1.0104835033416748,grad_norm: 0.9999992871666251, iteration: 137269
loss: 0.9940191507339478,grad_norm: 0.9944091890772123, iteration: 137270
loss: 0.9782573580741882,grad_norm: 0.9476920213052624, iteration: 137271
loss: 1.0084282159805298,grad_norm: 0.9919177944908089, iteration: 137272
loss: 1.0301973819732666,grad_norm: 0.9329708913737619, iteration: 137273
loss: 1.0073919296264648,grad_norm: 0.9999993118417818, iteration: 137274
loss: 1.007617712020874,grad_norm: 0.9999993462238372, iteration: 137275
loss: 0.9849625825881958,grad_norm: 0.9999990506557781, iteration: 137276
loss: 1.028578281402588,grad_norm: 0.9999992607091618, iteration: 137277
loss: 1.0103559494018555,grad_norm: 0.9999991784115139, iteration: 137278
loss: 0.9921133518218994,grad_norm: 0.9808661729855352, iteration: 137279
loss: 1.0075684785842896,grad_norm: 0.8887409947354332, iteration: 137280
loss: 0.9774743914604187,grad_norm: 0.9999989633356365, iteration: 137281
loss: 0.9997612237930298,grad_norm: 0.9999992395915515, iteration: 137282
loss: 1.0147390365600586,grad_norm: 0.9999991265132128, iteration: 137283
loss: 0.9926126003265381,grad_norm: 0.999999101949584, iteration: 137284
loss: 0.9990799427032471,grad_norm: 0.9999990684233085, iteration: 137285
loss: 1.0146435499191284,grad_norm: 0.9999990706595142, iteration: 137286
loss: 1.0033265352249146,grad_norm: 0.9775622991316624, iteration: 137287
loss: 0.9973545670509338,grad_norm: 0.9801713178667021, iteration: 137288
loss: 0.9966742992401123,grad_norm: 0.9999991064807079, iteration: 137289
loss: 1.0204360485076904,grad_norm: 0.8533467539732947, iteration: 137290
loss: 1.0351542234420776,grad_norm: 0.9794595570249435, iteration: 137291
loss: 0.9824983477592468,grad_norm: 0.999998987127291, iteration: 137292
loss: 0.9868905544281006,grad_norm: 0.9999992010917925, iteration: 137293
loss: 0.9890247583389282,grad_norm: 0.9999991822851578, iteration: 137294
loss: 0.9875203967094421,grad_norm: 0.9868323919307289, iteration: 137295
loss: 1.0438032150268555,grad_norm: 0.999999224043086, iteration: 137296
loss: 0.9776626825332642,grad_norm: 0.9436065743329819, iteration: 137297
loss: 0.9852834343910217,grad_norm: 0.8585559060899297, iteration: 137298
loss: 0.9994162321090698,grad_norm: 0.9999990889046034, iteration: 137299
loss: 1.0134199857711792,grad_norm: 0.9513042469399953, iteration: 137300
loss: 0.968256950378418,grad_norm: 0.9999989350254087, iteration: 137301
loss: 0.9928607940673828,grad_norm: 0.9801842342461532, iteration: 137302
loss: 0.9899973273277283,grad_norm: 0.9999993803522791, iteration: 137303
loss: 0.9623303413391113,grad_norm: 0.9999991987974723, iteration: 137304
loss: 0.9600927233695984,grad_norm: 0.9999991086449218, iteration: 137305
loss: 1.0027521848678589,grad_norm: 0.9999994909024238, iteration: 137306
loss: 1.044013261795044,grad_norm: 0.9999991107308883, iteration: 137307
loss: 1.0228936672210693,grad_norm: 0.9999994718969132, iteration: 137308
loss: 0.9768303036689758,grad_norm: 0.9999991212841915, iteration: 137309
loss: 0.991456151008606,grad_norm: 0.9999992124441407, iteration: 137310
loss: 1.0108834505081177,grad_norm: 0.9999991642144698, iteration: 137311
loss: 0.9944367408752441,grad_norm: 0.9999994817945377, iteration: 137312
loss: 1.0186305046081543,grad_norm: 0.9999992972593955, iteration: 137313
loss: 1.0762815475463867,grad_norm: 0.9999998614423925, iteration: 137314
loss: 1.0295734405517578,grad_norm: 0.9056658709795516, iteration: 137315
loss: 0.9772179126739502,grad_norm: 0.9855607754442532, iteration: 137316
loss: 1.0121960639953613,grad_norm: 0.9661628809582882, iteration: 137317
loss: 1.0073963403701782,grad_norm: 0.9999991135486943, iteration: 137318
loss: 0.9630105495452881,grad_norm: 0.9999992182837593, iteration: 137319
loss: 0.9879180192947388,grad_norm: 0.9783730925881087, iteration: 137320
loss: 0.9790819883346558,grad_norm: 0.9999993901184947, iteration: 137321
loss: 0.9832044839859009,grad_norm: 0.999999138799908, iteration: 137322
loss: 0.9651983976364136,grad_norm: 0.9542231051025207, iteration: 137323
loss: 0.974375307559967,grad_norm: 0.999999175282631, iteration: 137324
loss: 0.974488377571106,grad_norm: 0.8427986034996963, iteration: 137325
loss: 1.1129487752914429,grad_norm: 0.9999994434998473, iteration: 137326
loss: 1.0271400213241577,grad_norm: 0.9999991471678306, iteration: 137327
loss: 1.0202845335006714,grad_norm: 0.9999993307360523, iteration: 137328
loss: 0.9786644577980042,grad_norm: 0.9105859423708053, iteration: 137329
loss: 1.0577131509780884,grad_norm: 0.9999990293173477, iteration: 137330
loss: 0.9986112713813782,grad_norm: 0.9612574012295075, iteration: 137331
loss: 0.9534730911254883,grad_norm: 0.9999989707671443, iteration: 137332
loss: 1.0479240417480469,grad_norm: 0.9999991316120385, iteration: 137333
loss: 1.0262775421142578,grad_norm: 0.9811389885864471, iteration: 137334
loss: 0.9470070004463196,grad_norm: 0.9999992317820187, iteration: 137335
loss: 1.0099536180496216,grad_norm: 0.9485446989612435, iteration: 137336
loss: 0.9926316142082214,grad_norm: 0.9999992394628858, iteration: 137337
loss: 0.9862297773361206,grad_norm: 0.999999058154336, iteration: 137338
loss: 1.015358567237854,grad_norm: 0.9999989963086368, iteration: 137339
loss: 1.0102715492248535,grad_norm: 0.9999990795193952, iteration: 137340
loss: 0.9645288586616516,grad_norm: 0.9999991934815403, iteration: 137341
loss: 1.0110794305801392,grad_norm: 0.9999989779750263, iteration: 137342
loss: 1.0344947576522827,grad_norm: 0.9999992869951123, iteration: 137343
loss: 0.9731641411781311,grad_norm: 0.9999991568166394, iteration: 137344
loss: 1.0097178220748901,grad_norm: 0.9999990233297191, iteration: 137345
loss: 0.9492802023887634,grad_norm: 0.920906614442153, iteration: 137346
loss: 0.9984508752822876,grad_norm: 0.8893760975167487, iteration: 137347
loss: 1.0092322826385498,grad_norm: 0.9802666717292131, iteration: 137348
loss: 1.0045521259307861,grad_norm: 0.9999992409729389, iteration: 137349
loss: 0.9965737462043762,grad_norm: 0.9999992495311969, iteration: 137350
loss: 1.0218393802642822,grad_norm: 0.8823444619360149, iteration: 137351
loss: 0.9923145771026611,grad_norm: 0.9726541149032588, iteration: 137352
loss: 1.0262622833251953,grad_norm: 0.9999991753497697, iteration: 137353
loss: 1.0202817916870117,grad_norm: 0.8871261804184111, iteration: 137354
loss: 0.9739995002746582,grad_norm: 0.9999992002083831, iteration: 137355
loss: 1.0162913799285889,grad_norm: 0.8506878650190779, iteration: 137356
loss: 0.9617325067520142,grad_norm: 0.9509811818635128, iteration: 137357
loss: 0.9876048564910889,grad_norm: 0.9999993576748863, iteration: 137358
loss: 1.0025697946548462,grad_norm: 0.99999905439337, iteration: 137359
loss: 0.9710930585861206,grad_norm: 0.99999914885421, iteration: 137360
loss: 0.9695171117782593,grad_norm: 0.9999991031734604, iteration: 137361
loss: 0.9961403608322144,grad_norm: 0.9849957346297076, iteration: 137362
loss: 0.979124903678894,grad_norm: 0.9999989706266044, iteration: 137363
loss: 1.0260255336761475,grad_norm: 0.9999994845117167, iteration: 137364
loss: 0.9978068470954895,grad_norm: 0.9999991503562949, iteration: 137365
loss: 1.011330008506775,grad_norm: 0.9196819855950754, iteration: 137366
loss: 1.000357985496521,grad_norm: 0.9999992151008726, iteration: 137367
loss: 1.034783124923706,grad_norm: 0.9999995480722753, iteration: 137368
loss: 1.024946928024292,grad_norm: 0.9999992508866457, iteration: 137369
loss: 1.0103298425674438,grad_norm: 0.999999048169797, iteration: 137370
loss: 0.9932928681373596,grad_norm: 0.8567103900449061, iteration: 137371
loss: 0.9790777564048767,grad_norm: 0.9999996617199428, iteration: 137372
loss: 1.003857135772705,grad_norm: 0.9999991415053076, iteration: 137373
loss: 1.0240740776062012,grad_norm: 0.9999993320118507, iteration: 137374
loss: 0.9807091951370239,grad_norm: 0.9928979751742008, iteration: 137375
loss: 1.0476657152175903,grad_norm: 0.9999994207301658, iteration: 137376
loss: 0.9927623867988586,grad_norm: 0.985719697923565, iteration: 137377
loss: 0.995527982711792,grad_norm: 0.9999990601327009, iteration: 137378
loss: 0.995209276676178,grad_norm: 0.9999993439109116, iteration: 137379
loss: 1.000223994255066,grad_norm: 0.9658294925072944, iteration: 137380
loss: 1.0001356601715088,grad_norm: 0.9999993269985273, iteration: 137381
loss: 1.0058273077011108,grad_norm: 0.9999990033995362, iteration: 137382
loss: 0.967955470085144,grad_norm: 0.8730886161464857, iteration: 137383
loss: 1.0007894039154053,grad_norm: 0.9999990588311626, iteration: 137384
loss: 1.0298354625701904,grad_norm: 0.893482593932404, iteration: 137385
loss: 0.9991006255149841,grad_norm: 0.9999989350738321, iteration: 137386
loss: 1.0106998682022095,grad_norm: 0.9999991731514418, iteration: 137387
loss: 1.0462098121643066,grad_norm: 0.9205961893100193, iteration: 137388
loss: 1.0207154750823975,grad_norm: 0.9999992821733514, iteration: 137389
loss: 0.988818347454071,grad_norm: 0.999999011813484, iteration: 137390
loss: 1.0315250158309937,grad_norm: 0.9916991177180143, iteration: 137391
loss: 1.007729172706604,grad_norm: 0.9999995843125211, iteration: 137392
loss: 1.0034486055374146,grad_norm: 0.9503914051394065, iteration: 137393
loss: 0.9794632196426392,grad_norm: 0.8349481548213226, iteration: 137394
loss: 0.9605275988578796,grad_norm: 0.9728751169284457, iteration: 137395
loss: 1.0095911026000977,grad_norm: 0.9999990157721247, iteration: 137396
loss: 1.027283787727356,grad_norm: 0.9999994569825689, iteration: 137397
loss: 0.9839236736297607,grad_norm: 0.9999991292257224, iteration: 137398
loss: 1.003344178199768,grad_norm: 0.9999992227607613, iteration: 137399
loss: 0.9694114923477173,grad_norm: 0.9999991097617277, iteration: 137400
loss: 0.9746767282485962,grad_norm: 0.9737846747722998, iteration: 137401
loss: 0.9820731282234192,grad_norm: 0.999999115614229, iteration: 137402
loss: 1.006903052330017,grad_norm: 0.9999992656372577, iteration: 137403
loss: 1.034966230392456,grad_norm: 0.9999990337278604, iteration: 137404
loss: 1.0103684663772583,grad_norm: 0.9053088331144274, iteration: 137405
loss: 0.9927873015403748,grad_norm: 0.9999991440394619, iteration: 137406
loss: 0.977547287940979,grad_norm: 0.999999144938668, iteration: 137407
loss: 0.9946567416191101,grad_norm: 0.8744788753328011, iteration: 137408
loss: 0.9792440533638,grad_norm: 0.9388291528796707, iteration: 137409
loss: 0.9849697351455688,grad_norm: 0.9850593778310852, iteration: 137410
loss: 0.976264476776123,grad_norm: 0.9999992500870056, iteration: 137411
loss: 1.0263367891311646,grad_norm: 0.9575443791862122, iteration: 137412
loss: 1.029028296470642,grad_norm: 0.9999994327948355, iteration: 137413
loss: 0.9887147545814514,grad_norm: 0.9999992039213329, iteration: 137414
loss: 1.2647510766983032,grad_norm: 0.9999998292040978, iteration: 137415
loss: 0.9794482588768005,grad_norm: 0.9999992395730849, iteration: 137416
loss: 1.0164158344268799,grad_norm: 0.9343145800807177, iteration: 137417
loss: 0.9771920442581177,grad_norm: 0.9919281647446802, iteration: 137418
loss: 1.024591326713562,grad_norm: 0.9999990871031286, iteration: 137419
loss: 1.0324525833129883,grad_norm: 0.7577033375768057, iteration: 137420
loss: 1.2343294620513916,grad_norm: 0.9999999076186714, iteration: 137421
loss: 1.0830096006393433,grad_norm: 0.9999992912294853, iteration: 137422
loss: 1.032388687133789,grad_norm: 0.9999998832896729, iteration: 137423
loss: 1.0880448818206787,grad_norm: 0.99999928077424, iteration: 137424
loss: 1.0879021883010864,grad_norm: 0.9999997178755596, iteration: 137425
loss: 1.0191469192504883,grad_norm: 0.9999990957299185, iteration: 137426
loss: 1.0833832025527954,grad_norm: 0.9999995402187152, iteration: 137427
loss: 1.0269004106521606,grad_norm: 0.9737197494523783, iteration: 137428
loss: 0.9990130066871643,grad_norm: 0.9915351000703647, iteration: 137429
loss: 0.9803442358970642,grad_norm: 1.0000000503238031, iteration: 137430
loss: 0.9991504549980164,grad_norm: 0.9999992080526394, iteration: 137431
loss: 0.9386841058731079,grad_norm: 0.9632885705285098, iteration: 137432
loss: 0.9953832626342773,grad_norm: 0.9999992695665677, iteration: 137433
loss: 0.9740215539932251,grad_norm: 0.9999989822763654, iteration: 137434
loss: 0.982579231262207,grad_norm: 0.999999245916213, iteration: 137435
loss: 0.9809533953666687,grad_norm: 0.9138331567929018, iteration: 137436
loss: 0.9627525210380554,grad_norm: 0.9096273381770023, iteration: 137437
loss: 1.0115814208984375,grad_norm: 0.9999991360297824, iteration: 137438
loss: 1.020821452140808,grad_norm: 0.9999999507786335, iteration: 137439
loss: 1.0063016414642334,grad_norm: 0.9999991609736354, iteration: 137440
loss: 1.0324559211730957,grad_norm: 0.9999990194666184, iteration: 137441
loss: 1.07345449924469,grad_norm: 0.9999994946644737, iteration: 137442
loss: 1.0084469318389893,grad_norm: 0.9999990927732167, iteration: 137443
loss: 0.9865870475769043,grad_norm: 0.9999991315589671, iteration: 137444
loss: 0.9839076399803162,grad_norm: 0.881046550785767, iteration: 137445
loss: 1.0098108053207397,grad_norm: 0.9401823419614902, iteration: 137446
loss: 1.017545223236084,grad_norm: 0.9392183253160412, iteration: 137447
loss: 1.026058554649353,grad_norm: 0.9999990348086216, iteration: 137448
loss: 1.0060813426971436,grad_norm: 0.8891092673775155, iteration: 137449
loss: 1.0140454769134521,grad_norm: 0.9052913388580353, iteration: 137450
loss: 0.9730551242828369,grad_norm: 0.9711019391886089, iteration: 137451
loss: 1.0121594667434692,grad_norm: 0.8048130169096275, iteration: 137452
loss: 0.988515317440033,grad_norm: 0.9999991674331364, iteration: 137453
loss: 1.0237587690353394,grad_norm: 0.9999989892565466, iteration: 137454
loss: 0.9859251976013184,grad_norm: 0.9252986050405676, iteration: 137455
loss: 1.0192524194717407,grad_norm: 0.9999991190771164, iteration: 137456
loss: 0.9924052953720093,grad_norm: 0.9999988836425799, iteration: 137457
loss: 1.0491973161697388,grad_norm: 0.9999991755670551, iteration: 137458
loss: 1.0454272031784058,grad_norm: 0.9999993916128416, iteration: 137459
loss: 1.0069540739059448,grad_norm: 0.9999992811244945, iteration: 137460
loss: 1.0158950090408325,grad_norm: 0.9069562259680735, iteration: 137461
loss: 0.9763281941413879,grad_norm: 0.961832065575993, iteration: 137462
loss: 1.0061243772506714,grad_norm: 0.9947084247223009, iteration: 137463
loss: 1.0201185941696167,grad_norm: 0.9618577023956297, iteration: 137464
loss: 0.9828436374664307,grad_norm: 0.9999991249250194, iteration: 137465
loss: 0.9933702349662781,grad_norm: 0.9486213312441023, iteration: 137466
loss: 0.9905011057853699,grad_norm: 0.9999991956132035, iteration: 137467
loss: 0.9902088046073914,grad_norm: 0.8385802012727868, iteration: 137468
loss: 0.9711527228355408,grad_norm: 0.980511431511825, iteration: 137469
loss: 1.0223486423492432,grad_norm: 0.9536284785796854, iteration: 137470
loss: 1.0026793479919434,grad_norm: 0.999999254718725, iteration: 137471
loss: 1.0377769470214844,grad_norm: 0.9765234053773812, iteration: 137472
loss: 1.003278136253357,grad_norm: 0.9999991436348836, iteration: 137473
loss: 0.9651670455932617,grad_norm: 0.986995107555158, iteration: 137474
loss: 1.0059508085250854,grad_norm: 0.9816417110439919, iteration: 137475
loss: 1.0303640365600586,grad_norm: 0.9999991045378759, iteration: 137476
loss: 1.0100462436676025,grad_norm: 0.8650587173844821, iteration: 137477
loss: 1.0548478364944458,grad_norm: 0.9605487942180732, iteration: 137478
loss: 1.009005069732666,grad_norm: 0.7665095891586378, iteration: 137479
loss: 0.9877950549125671,grad_norm: 0.9999991841968938, iteration: 137480
loss: 0.9991508722305298,grad_norm: 0.9999991858686129, iteration: 137481
loss: 0.9667892456054688,grad_norm: 0.9880129359535655, iteration: 137482
loss: 1.0029209852218628,grad_norm: 0.9999991363103773, iteration: 137483
loss: 1.0185296535491943,grad_norm: 0.9462327995479602, iteration: 137484
loss: 0.9961295127868652,grad_norm: 0.9999991986671136, iteration: 137485
loss: 1.0080119371414185,grad_norm: 0.9999990777158044, iteration: 137486
loss: 0.9992589950561523,grad_norm: 0.9783263114069597, iteration: 137487
loss: 1.0131397247314453,grad_norm: 0.9999990391002916, iteration: 137488
loss: 1.0126264095306396,grad_norm: 0.9999990784355006, iteration: 137489
loss: 1.031520962715149,grad_norm: 0.999999120941753, iteration: 137490
loss: 0.9794952273368835,grad_norm: 0.9999990155977434, iteration: 137491
loss: 0.9448641538619995,grad_norm: 0.985779001128492, iteration: 137492
loss: 0.9852447509765625,grad_norm: 0.977075596653587, iteration: 137493
loss: 0.9962281584739685,grad_norm: 0.983479102281289, iteration: 137494
loss: 1.0234888792037964,grad_norm: 0.9999991366184352, iteration: 137495
loss: 0.9750464558601379,grad_norm: 0.9999990832787067, iteration: 137496
loss: 0.9945667386054993,grad_norm: 0.9516959611369611, iteration: 137497
loss: 1.0069501399993896,grad_norm: 0.9999989422622413, iteration: 137498
loss: 0.9638215899467468,grad_norm: 0.7764640157652772, iteration: 137499
loss: 1.0304750204086304,grad_norm: 0.9999991829942309, iteration: 137500
loss: 1.0561927556991577,grad_norm: 0.9999992390254563, iteration: 137501
loss: 1.003366470336914,grad_norm: 0.9056703145369943, iteration: 137502
loss: 0.9808846712112427,grad_norm: 0.9506018561601607, iteration: 137503
loss: 0.9956738948822021,grad_norm: 0.9581302328685626, iteration: 137504
loss: 0.9940075874328613,grad_norm: 0.9999992736898834, iteration: 137505
loss: 0.9801849722862244,grad_norm: 0.9999991507166271, iteration: 137506
loss: 1.0010651350021362,grad_norm: 0.8420992538970803, iteration: 137507
loss: 1.0060385465621948,grad_norm: 0.9036328406098947, iteration: 137508
loss: 1.0413610935211182,grad_norm: 0.9999996181815569, iteration: 137509
loss: 1.0082855224609375,grad_norm: 0.9999989151866189, iteration: 137510
loss: 1.0076769590377808,grad_norm: 0.8613610799563748, iteration: 137511
loss: 1.0336101055145264,grad_norm: 0.9999991672053012, iteration: 137512
loss: 1.0053058862686157,grad_norm: 0.9999992886061755, iteration: 137513
loss: 0.9708220362663269,grad_norm: 0.9999991267233939, iteration: 137514
loss: 1.0010786056518555,grad_norm: 0.9999990872026305, iteration: 137515
loss: 1.031754732131958,grad_norm: 0.9999997835913997, iteration: 137516
loss: 0.9871273636817932,grad_norm: 0.9999993418229433, iteration: 137517
loss: 1.0636653900146484,grad_norm: 0.9383521290516627, iteration: 137518
loss: 1.0473155975341797,grad_norm: 0.999999311317443, iteration: 137519
loss: 1.0366929769515991,grad_norm: 0.9999990498647993, iteration: 137520
loss: 0.9957318902015686,grad_norm: 0.8394689778631628, iteration: 137521
loss: 1.032914161682129,grad_norm: 0.9999989589261713, iteration: 137522
loss: 1.0045263767242432,grad_norm: 0.9999990470728952, iteration: 137523
loss: 0.993543267250061,grad_norm: 0.9999990417672516, iteration: 137524
loss: 0.9917184710502625,grad_norm: 0.9999991624906508, iteration: 137525
loss: 1.0148711204528809,grad_norm: 0.8630973315434536, iteration: 137526
loss: 1.0438486337661743,grad_norm: 0.9999991045278855, iteration: 137527
loss: 1.0003077983856201,grad_norm: 0.999998931545702, iteration: 137528
loss: 0.9758563041687012,grad_norm: 0.9999990352388844, iteration: 137529
loss: 1.0021438598632812,grad_norm: 0.9999991591222887, iteration: 137530
loss: 0.9806268811225891,grad_norm: 0.9837080104706716, iteration: 137531
loss: 1.0017694234848022,grad_norm: 0.9999991505653771, iteration: 137532
loss: 0.9854803681373596,grad_norm: 0.9021257724168443, iteration: 137533
loss: 1.0088270902633667,grad_norm: 0.9999992277203976, iteration: 137534
loss: 1.000749945640564,grad_norm: 0.9999991123014086, iteration: 137535
loss: 0.9914747476577759,grad_norm: 0.984241040377922, iteration: 137536
loss: 0.9813689589500427,grad_norm: 0.9999990606465744, iteration: 137537
loss: 1.009108304977417,grad_norm: 0.9999991223266156, iteration: 137538
loss: 1.0143588781356812,grad_norm: 0.9999990552686787, iteration: 137539
loss: 0.981509804725647,grad_norm: 0.9999991875662438, iteration: 137540
loss: 0.9708808064460754,grad_norm: 0.9999991765339096, iteration: 137541
loss: 0.9707647562026978,grad_norm: 0.9999989064074767, iteration: 137542
loss: 1.0524811744689941,grad_norm: 0.9999994500730119, iteration: 137543
loss: 1.0058658123016357,grad_norm: 0.9999992035292128, iteration: 137544
loss: 0.9988587498664856,grad_norm: 0.9999990298354321, iteration: 137545
loss: 0.9682726860046387,grad_norm: 0.999999169919213, iteration: 137546
loss: 0.9772385954856873,grad_norm: 0.8462292965576479, iteration: 137547
loss: 1.030476450920105,grad_norm: 0.9999990169901148, iteration: 137548
loss: 1.0125972032546997,grad_norm: 0.9944517807838782, iteration: 137549
loss: 1.0058015584945679,grad_norm: 0.9999991969161475, iteration: 137550
loss: 0.9992024898529053,grad_norm: 0.9999990965576726, iteration: 137551
loss: 1.0485063791275024,grad_norm: 0.9999991358713962, iteration: 137552
loss: 1.0130445957183838,grad_norm: 0.9999995088078495, iteration: 137553
loss: 1.001037359237671,grad_norm: 0.9999992313201952, iteration: 137554
loss: 1.0095512866973877,grad_norm: 0.9399038236141181, iteration: 137555
loss: 1.0146647691726685,grad_norm: 0.9999989763316707, iteration: 137556
loss: 1.061723232269287,grad_norm: 0.9999990914778194, iteration: 137557
loss: 1.010603904724121,grad_norm: 0.9574018073460716, iteration: 137558
loss: 0.9906260967254639,grad_norm: 0.9999990455150224, iteration: 137559
loss: 0.9888622164726257,grad_norm: 0.99999899537143, iteration: 137560
loss: 0.9810618162155151,grad_norm: 0.9999991352923243, iteration: 137561
loss: 1.0093249082565308,grad_norm: 0.996939687533419, iteration: 137562
loss: 0.9880069494247437,grad_norm: 0.9999991123092881, iteration: 137563
loss: 1.0036919116973877,grad_norm: 0.9999991125277705, iteration: 137564
loss: 0.9780811667442322,grad_norm: 0.9999989267096704, iteration: 137565
loss: 1.0114606618881226,grad_norm: 0.9999992275732762, iteration: 137566
loss: 1.0275685787200928,grad_norm: 0.9999994408883449, iteration: 137567
loss: 0.9757106304168701,grad_norm: 0.9999992131050619, iteration: 137568
loss: 0.9628339409828186,grad_norm: 0.9999992247729648, iteration: 137569
loss: 1.0167073011398315,grad_norm: 0.9999992595022523, iteration: 137570
loss: 1.015477180480957,grad_norm: 0.9999992613175254, iteration: 137571
loss: 1.032301425933838,grad_norm: 0.9567156482077197, iteration: 137572
loss: 0.9760767221450806,grad_norm: 0.9999992075228757, iteration: 137573
loss: 1.0335196256637573,grad_norm: 0.9999995517995969, iteration: 137574
loss: 1.020582675933838,grad_norm: 0.9385024308857342, iteration: 137575
loss: 1.0133203268051147,grad_norm: 0.9999989941041711, iteration: 137576
loss: 0.9787888526916504,grad_norm: 0.9850117423496729, iteration: 137577
loss: 0.9855655431747437,grad_norm: 0.9999991582025124, iteration: 137578
loss: 1.0030206441879272,grad_norm: 0.999998995451373, iteration: 137579
loss: 1.002937912940979,grad_norm: 0.9999992326241285, iteration: 137580
loss: 0.9773145318031311,grad_norm: 0.9999990418416825, iteration: 137581
loss: 0.9968349933624268,grad_norm: 0.9999990591545672, iteration: 137582
loss: 1.0172704458236694,grad_norm: 0.9390577152664317, iteration: 137583
loss: 0.9912160634994507,grad_norm: 0.9999991652668282, iteration: 137584
loss: 1.0102335214614868,grad_norm: 0.999999068884393, iteration: 137585
loss: 0.9953412413597107,grad_norm: 0.9818912622307417, iteration: 137586
loss: 0.9835789799690247,grad_norm: 0.7616116544995432, iteration: 137587
loss: 0.9937475323677063,grad_norm: 0.9999990135293423, iteration: 137588
loss: 1.0050345659255981,grad_norm: 0.9999990711680928, iteration: 137589
loss: 1.049521803855896,grad_norm: 0.999999802697777, iteration: 137590
loss: 1.0261675119400024,grad_norm: 0.9999993029772632, iteration: 137591
loss: 0.9891115427017212,grad_norm: 0.9999998436331902, iteration: 137592
loss: 1.0148415565490723,grad_norm: 0.9999995692807168, iteration: 137593
loss: 0.9944295883178711,grad_norm: 0.8255713824446943, iteration: 137594
loss: 0.9791505932807922,grad_norm: 0.9497924294746781, iteration: 137595
loss: 1.0188324451446533,grad_norm: 0.9999992597903352, iteration: 137596
loss: 1.0178390741348267,grad_norm: 0.9999998505241903, iteration: 137597
loss: 0.9623883366584778,grad_norm: 0.9999989843961001, iteration: 137598
loss: 1.0338995456695557,grad_norm: 0.9999992678752858, iteration: 137599
loss: 0.9899724721908569,grad_norm: 0.999999011084931, iteration: 137600
loss: 0.966513454914093,grad_norm: 0.9999990955119638, iteration: 137601
loss: 1.0128763914108276,grad_norm: 0.940549967042043, iteration: 137602
loss: 0.9817545413970947,grad_norm: 0.9437497619544335, iteration: 137603
loss: 1.0124125480651855,grad_norm: 0.8767243943161557, iteration: 137604
loss: 1.017151951789856,grad_norm: 0.9999990933038915, iteration: 137605
loss: 1.0002883672714233,grad_norm: 0.9999990468912487, iteration: 137606
loss: 1.0263170003890991,grad_norm: 0.9999990580607419, iteration: 137607
loss: 0.9952292442321777,grad_norm: 0.9607147626577311, iteration: 137608
loss: 0.9891587495803833,grad_norm: 0.9548589021642148, iteration: 137609
loss: 1.0113154649734497,grad_norm: 0.99999909301536, iteration: 137610
loss: 0.9682385325431824,grad_norm: 0.8897608331711885, iteration: 137611
loss: 0.988772451877594,grad_norm: 0.9999992599335985, iteration: 137612
loss: 1.0238641500473022,grad_norm: 0.9999989845857953, iteration: 137613
loss: 1.0023950338363647,grad_norm: 0.999999101205696, iteration: 137614
loss: 1.0046747922897339,grad_norm: 0.9999992684848927, iteration: 137615
loss: 0.9706025123596191,grad_norm: 0.971934630074205, iteration: 137616
loss: 0.9879919290542603,grad_norm: 0.9999991988756969, iteration: 137617
loss: 1.0003808736801147,grad_norm: 0.8567030109050178, iteration: 137618
loss: 1.041109323501587,grad_norm: 0.999999290599831, iteration: 137619
loss: 1.0166209936141968,grad_norm: 0.9999991063663567, iteration: 137620
loss: 0.9928090572357178,grad_norm: 0.9848309923405295, iteration: 137621
loss: 0.9937165975570679,grad_norm: 0.9999989835354564, iteration: 137622
loss: 1.0116254091262817,grad_norm: 0.9999991164586488, iteration: 137623
loss: 0.9936324954032898,grad_norm: 0.999999193240769, iteration: 137624
loss: 1.0032401084899902,grad_norm: 0.9999990155823979, iteration: 137625
loss: 1.0219779014587402,grad_norm: 0.9999990305268004, iteration: 137626
loss: 1.011195421218872,grad_norm: 0.9999990236406339, iteration: 137627
loss: 0.9960243105888367,grad_norm: 0.980748773663967, iteration: 137628
loss: 1.0032873153686523,grad_norm: 0.9834273568817764, iteration: 137629
loss: 0.9796273112297058,grad_norm: 0.9999990596892347, iteration: 137630
loss: 1.0293161869049072,grad_norm: 0.9999991312284489, iteration: 137631
loss: 1.0182664394378662,grad_norm: 0.9999993533511158, iteration: 137632
loss: 1.0091121196746826,grad_norm: 0.9999992053583461, iteration: 137633
loss: 1.0199079513549805,grad_norm: 0.9999992159116633, iteration: 137634
loss: 1.0150071382522583,grad_norm: 0.9999990175272199, iteration: 137635
loss: 0.9599291682243347,grad_norm: 0.9999991893556721, iteration: 137636
loss: 0.9755182862281799,grad_norm: 0.9951425300933581, iteration: 137637
loss: 1.0299614667892456,grad_norm: 0.9249156358130425, iteration: 137638
loss: 1.0347496271133423,grad_norm: 0.9999991826391373, iteration: 137639
loss: 1.0157203674316406,grad_norm: 0.999999148215355, iteration: 137640
loss: 1.009218454360962,grad_norm: 0.940607818023302, iteration: 137641
loss: 0.9989778399467468,grad_norm: 0.9999991566028023, iteration: 137642
loss: 0.9900450706481934,grad_norm: 0.829489099520005, iteration: 137643
loss: 1.0205961465835571,grad_norm: 0.9999990940616235, iteration: 137644
loss: 0.9977898597717285,grad_norm: 0.9092763169822755, iteration: 137645
loss: 0.9893703460693359,grad_norm: 0.8685452124823502, iteration: 137646
loss: 0.9902873635292053,grad_norm: 0.9999991970283217, iteration: 137647
loss: 0.9724520444869995,grad_norm: 0.9999991837890794, iteration: 137648
loss: 1.0011489391326904,grad_norm: 0.9999991409798955, iteration: 137649
loss: 1.0025269985198975,grad_norm: 0.999999061318534, iteration: 137650
loss: 0.9746643304824829,grad_norm: 0.9334838637399837, iteration: 137651
loss: 1.0239568948745728,grad_norm: 0.9999991083090649, iteration: 137652
loss: 0.967865526676178,grad_norm: 0.9566331548612148, iteration: 137653
loss: 0.9768990874290466,grad_norm: 0.9999991099640861, iteration: 137654
loss: 1.013032078742981,grad_norm: 0.999999080401243, iteration: 137655
loss: 0.9930378794670105,grad_norm: 0.9999990530086741, iteration: 137656
loss: 0.963472843170166,grad_norm: 0.9999991880969777, iteration: 137657
loss: 1.0079121589660645,grad_norm: 0.9410200388984954, iteration: 137658
loss: 1.0030940771102905,grad_norm: 0.8949235161385005, iteration: 137659
loss: 1.0000364780426025,grad_norm: 0.999999253665506, iteration: 137660
loss: 1.0159610509872437,grad_norm: 0.9228112981644897, iteration: 137661
loss: 0.9737957119941711,grad_norm: 0.9464351846550466, iteration: 137662
loss: 0.9980022311210632,grad_norm: 0.9999991315557275, iteration: 137663
loss: 1.0010215044021606,grad_norm: 0.999999162863652, iteration: 137664
loss: 1.0218594074249268,grad_norm: 0.9999991427412666, iteration: 137665
loss: 0.9680909514427185,grad_norm: 0.9058697119017176, iteration: 137666
loss: 0.9919669032096863,grad_norm: 0.8731536732787222, iteration: 137667
loss: 0.9922436475753784,grad_norm: 0.9999992510224618, iteration: 137668
loss: 0.9910334944725037,grad_norm: 0.9999991489491131, iteration: 137669
loss: 1.0393949747085571,grad_norm: 0.9999993282806968, iteration: 137670
loss: 0.9413884878158569,grad_norm: 0.9315079086698028, iteration: 137671
loss: 1.037859320640564,grad_norm: 0.9999990213567814, iteration: 137672
loss: 1.0335612297058105,grad_norm: 0.9999991427929386, iteration: 137673
loss: 1.0327134132385254,grad_norm: 0.9191916570765507, iteration: 137674
loss: 0.9770088791847229,grad_norm: 0.9999990769934624, iteration: 137675
loss: 1.0107117891311646,grad_norm: 0.9999990224859479, iteration: 137676
loss: 0.9969473481178284,grad_norm: 0.9999990644392134, iteration: 137677
loss: 1.0412311553955078,grad_norm: 0.9962516266667854, iteration: 137678
loss: 1.005487322807312,grad_norm: 0.8585930431126142, iteration: 137679
loss: 1.0186564922332764,grad_norm: 0.9999990337224166, iteration: 137680
loss: 0.9869776964187622,grad_norm: 0.9352825552477717, iteration: 137681
loss: 0.9933474063873291,grad_norm: 0.8925662856528064, iteration: 137682
loss: 0.9862171411514282,grad_norm: 0.900790962943032, iteration: 137683
loss: 1.0413111448287964,grad_norm: 0.9999995150603848, iteration: 137684
loss: 1.010754108428955,grad_norm: 0.9999996245577114, iteration: 137685
loss: 0.9859784841537476,grad_norm: 0.9999990788639879, iteration: 137686
loss: 1.014681339263916,grad_norm: 0.9999990561130851, iteration: 137687
loss: 1.0090306997299194,grad_norm: 0.9999991387946212, iteration: 137688
loss: 0.9941598773002625,grad_norm: 0.9999993173671295, iteration: 137689
loss: 0.9876708984375,grad_norm: 0.9302104424200525, iteration: 137690
loss: 1.0349231958389282,grad_norm: 0.9999990542863907, iteration: 137691
loss: 0.9889089465141296,grad_norm: 0.9999996097466762, iteration: 137692
loss: 1.020638346672058,grad_norm: 0.9810136589698735, iteration: 137693
loss: 0.9599844217300415,grad_norm: 0.999999147398418, iteration: 137694
loss: 1.009799838066101,grad_norm: 0.9999990247298798, iteration: 137695
loss: 0.9958091974258423,grad_norm: 0.9770739667307746, iteration: 137696
loss: 0.9967356324195862,grad_norm: 0.9999990863302157, iteration: 137697
loss: 0.985089123249054,grad_norm: 0.8389341700110966, iteration: 137698
loss: 1.0028713941574097,grad_norm: 0.99999906091963, iteration: 137699
loss: 0.9966902732849121,grad_norm: 0.9670230730763664, iteration: 137700
loss: 1.0021506547927856,grad_norm: 0.9999991684253878, iteration: 137701
loss: 0.9981187582015991,grad_norm: 0.9999991166995551, iteration: 137702
loss: 0.9890900254249573,grad_norm: 0.9457729053470859, iteration: 137703
loss: 0.9921983480453491,grad_norm: 0.8583519812140558, iteration: 137704
loss: 1.0153330564498901,grad_norm: 0.999999315342708, iteration: 137705
loss: 0.9745447635650635,grad_norm: 0.892710282127026, iteration: 137706
loss: 0.9897171258926392,grad_norm: 0.995469563572065, iteration: 137707
loss: 0.9933232665061951,grad_norm: 0.9999992537007502, iteration: 137708
loss: 1.0004581212997437,grad_norm: 0.8197159234256656, iteration: 137709
loss: 0.9925692081451416,grad_norm: 0.9999989776269874, iteration: 137710
loss: 0.9920440912246704,grad_norm: 0.9999991033758713, iteration: 137711
loss: 1.0119887590408325,grad_norm: 0.8539800660405008, iteration: 137712
loss: 0.9985307455062866,grad_norm: 0.9999990648591917, iteration: 137713
loss: 1.02027428150177,grad_norm: 0.999999265513051, iteration: 137714
loss: 1.0412148237228394,grad_norm: 0.9213350680567888, iteration: 137715
loss: 1.00716233253479,grad_norm: 0.9999993407505308, iteration: 137716
loss: 1.017958641052246,grad_norm: 0.9999990730014529, iteration: 137717
loss: 1.0055378675460815,grad_norm: 0.9677028064859101, iteration: 137718
loss: 0.9786803126335144,grad_norm: 0.9999991238371202, iteration: 137719
loss: 0.9759120941162109,grad_norm: 0.9680264791938026, iteration: 137720
loss: 1.0059823989868164,grad_norm: 0.9425449999928794, iteration: 137721
loss: 1.0313622951507568,grad_norm: 0.9013151914947757, iteration: 137722
loss: 0.995296835899353,grad_norm: 0.999999192602018, iteration: 137723
loss: 1.0263872146606445,grad_norm: 0.8324230096923179, iteration: 137724
loss: 1.001036286354065,grad_norm: 0.9282423548966883, iteration: 137725
loss: 0.9961539506912231,grad_norm: 0.9701544782106898, iteration: 137726
loss: 0.9884707927703857,grad_norm: 0.9999991905769968, iteration: 137727
loss: 1.0192334651947021,grad_norm: 0.8574943984741897, iteration: 137728
loss: 1.0036488771438599,grad_norm: 0.9999991167752698, iteration: 137729
loss: 0.9985504150390625,grad_norm: 0.8550803435545586, iteration: 137730
loss: 0.9961095452308655,grad_norm: 0.9999992292450689, iteration: 137731
loss: 1.0218783617019653,grad_norm: 0.9999995312817238, iteration: 137732
loss: 1.0174996852874756,grad_norm: 0.9999991689455248, iteration: 137733
loss: 1.0021131038665771,grad_norm: 0.96341006733559, iteration: 137734
loss: 1.071227788925171,grad_norm: 0.9999992438318626, iteration: 137735
loss: 0.9920865893363953,grad_norm: 0.9999990624610726, iteration: 137736
loss: 1.0594600439071655,grad_norm: 0.9999997198807695, iteration: 137737
loss: 1.0101618766784668,grad_norm: 0.9999993555572677, iteration: 137738
loss: 1.0259904861450195,grad_norm: 0.99999910368795, iteration: 137739
loss: 1.0005329847335815,grad_norm: 0.9514765217398634, iteration: 137740
loss: 0.9930932521820068,grad_norm: 0.9211284577380218, iteration: 137741
loss: 0.9780269265174866,grad_norm: 0.8930710152576501, iteration: 137742
loss: 1.00318443775177,grad_norm: 0.9926370361172018, iteration: 137743
loss: 0.9725107550621033,grad_norm: 0.9999990301193176, iteration: 137744
loss: 1.0187625885009766,grad_norm: 0.9083483135697376, iteration: 137745
loss: 1.0437536239624023,grad_norm: 0.9999991783392608, iteration: 137746
loss: 0.9859472513198853,grad_norm: 0.9114647598850293, iteration: 137747
loss: 0.9732409119606018,grad_norm: 0.9741473382074609, iteration: 137748
loss: 0.9905588030815125,grad_norm: 0.9826730390939304, iteration: 137749
loss: 0.9750657081604004,grad_norm: 0.9671212837490784, iteration: 137750
loss: 0.9537650346755981,grad_norm: 0.9999990048258365, iteration: 137751
loss: 0.983372151851654,grad_norm: 0.9417280892413856, iteration: 137752
loss: 0.9900029897689819,grad_norm: 0.8388692878222606, iteration: 137753
loss: 1.0170648097991943,grad_norm: 0.9715263502322404, iteration: 137754
loss: 0.9214910268783569,grad_norm: 0.880998752921612, iteration: 137755
loss: 0.9669913053512573,grad_norm: 0.999999017456359, iteration: 137756
loss: 1.0257141590118408,grad_norm: 0.9999991990213496, iteration: 137757
loss: 1.0105922222137451,grad_norm: 0.9013318277063673, iteration: 137758
loss: 1.0172957181930542,grad_norm: 0.9044678170407279, iteration: 137759
loss: 0.9910889267921448,grad_norm: 0.9999990854164784, iteration: 137760
loss: 1.0043087005615234,grad_norm: 0.9999991214804532, iteration: 137761
loss: 1.0123989582061768,grad_norm: 0.9533426634678194, iteration: 137762
loss: 1.0032726526260376,grad_norm: 0.9999991138519894, iteration: 137763
loss: 0.9988573789596558,grad_norm: 0.827368584777686, iteration: 137764
loss: 1.0022507905960083,grad_norm: 0.9999992923233865, iteration: 137765
loss: 1.0060237646102905,grad_norm: 0.9321825013628035, iteration: 137766
loss: 0.9868243932723999,grad_norm: 0.9290081219786241, iteration: 137767
loss: 1.0435283184051514,grad_norm: 0.9999990553292379, iteration: 137768
loss: 1.01150643825531,grad_norm: 0.9999992686028348, iteration: 137769
loss: 1.0291072130203247,grad_norm: 0.8484781699998083, iteration: 137770
loss: 1.0036211013793945,grad_norm: 0.9497677360927144, iteration: 137771
loss: 1.0232055187225342,grad_norm: 0.9999990734293797, iteration: 137772
loss: 1.0014097690582275,grad_norm: 0.9896963170869736, iteration: 137773
loss: 1.0298868417739868,grad_norm: 0.9999992258194048, iteration: 137774
loss: 1.002872347831726,grad_norm: 0.9999992531994414, iteration: 137775
loss: 1.0194711685180664,grad_norm: 0.99999912692063, iteration: 137776
loss: 1.0095994472503662,grad_norm: 0.9999991748180225, iteration: 137777
loss: 1.0103996992111206,grad_norm: 0.9329012850801714, iteration: 137778
loss: 1.003555178642273,grad_norm: 0.9999991864621831, iteration: 137779
loss: 1.0124281644821167,grad_norm: 0.9498570059117093, iteration: 137780
loss: 1.0308846235275269,grad_norm: 0.9999991827700909, iteration: 137781
loss: 0.9626327753067017,grad_norm: 0.99999925823, iteration: 137782
loss: 0.981408417224884,grad_norm: 0.9999990032861849, iteration: 137783
loss: 1.0165040493011475,grad_norm: 0.847372361297682, iteration: 137784
loss: 1.0120455026626587,grad_norm: 0.9999992120825564, iteration: 137785
loss: 0.991480827331543,grad_norm: 0.8972360323612807, iteration: 137786
loss: 0.9703311324119568,grad_norm: 0.9999991211440191, iteration: 137787
loss: 1.0173635482788086,grad_norm: 0.9679895445307001, iteration: 137788
loss: 0.9877853989601135,grad_norm: 0.9999990914425329, iteration: 137789
loss: 0.9819894433021545,grad_norm: 0.9999994290770219, iteration: 137790
loss: 0.9773745536804199,grad_norm: 0.8367530932144621, iteration: 137791
loss: 1.005495309829712,grad_norm: 0.9706137988965275, iteration: 137792
loss: 1.0059963464736938,grad_norm: 0.8761697871191783, iteration: 137793
loss: 0.9968990087509155,grad_norm: 0.9999991424395978, iteration: 137794
loss: 1.0122580528259277,grad_norm: 0.9999989118841184, iteration: 137795
loss: 1.0022454261779785,grad_norm: 0.9999998511913372, iteration: 137796
loss: 0.9606316089630127,grad_norm: 0.8458931490356847, iteration: 137797
loss: 1.061431884765625,grad_norm: 0.9999998769458396, iteration: 137798
loss: 1.0231096744537354,grad_norm: 0.9999990978085167, iteration: 137799
loss: 0.9908226728439331,grad_norm: 0.999999185467234, iteration: 137800
loss: 0.9870526194572449,grad_norm: 0.8701915269858833, iteration: 137801
loss: 1.0015199184417725,grad_norm: 0.9999991338469347, iteration: 137802
loss: 1.1090904474258423,grad_norm: 0.9999992916253293, iteration: 137803
loss: 1.008643627166748,grad_norm: 0.9999991630857741, iteration: 137804
loss: 0.9869343042373657,grad_norm: 0.892964572066961, iteration: 137805
loss: 1.0016279220581055,grad_norm: 0.8946933054701725, iteration: 137806
loss: 1.0878757238388062,grad_norm: 0.9999995233820622, iteration: 137807
loss: 1.000319004058838,grad_norm: 0.9474189637041983, iteration: 137808
loss: 1.0236972570419312,grad_norm: 0.9529239819504881, iteration: 137809
loss: 1.0035146474838257,grad_norm: 0.9999991541318121, iteration: 137810
loss: 1.011553406715393,grad_norm: 0.9999989569157255, iteration: 137811
loss: 0.9898545742034912,grad_norm: 0.9999991758804186, iteration: 137812
loss: 1.0245779752731323,grad_norm: 0.9999990991237314, iteration: 137813
loss: 0.9800440073013306,grad_norm: 0.8861747929051982, iteration: 137814
loss: 0.9923218488693237,grad_norm: 0.9999998293287594, iteration: 137815
loss: 0.9731675982475281,grad_norm: 0.9528926986162386, iteration: 137816
loss: 1.023051142692566,grad_norm: 0.9999992233589983, iteration: 137817
loss: 0.9775357246398926,grad_norm: 0.9850537982934401, iteration: 137818
loss: 0.9693632125854492,grad_norm: 0.9736882937794398, iteration: 137819
loss: 0.999443769454956,grad_norm: 0.9477070333740824, iteration: 137820
loss: 1.0186043977737427,grad_norm: 0.8314821686540687, iteration: 137821
loss: 0.9845893979072571,grad_norm: 0.9999990361055167, iteration: 137822
loss: 1.0032521486282349,grad_norm: 0.9999992424279502, iteration: 137823
loss: 1.0031136274337769,grad_norm: 0.9999990443828083, iteration: 137824
loss: 1.0175745487213135,grad_norm: 0.9632098963294407, iteration: 137825
loss: 1.0233758687973022,grad_norm: 0.9181551756508436, iteration: 137826
loss: 1.0154738426208496,grad_norm: 0.9167308770300752, iteration: 137827
loss: 1.053330898284912,grad_norm: 0.9999995665440037, iteration: 137828
loss: 1.0087904930114746,grad_norm: 0.999999019988422, iteration: 137829
loss: 0.9940882325172424,grad_norm: 0.9999991163293542, iteration: 137830
loss: 1.0043854713439941,grad_norm: 0.9999992342856362, iteration: 137831
loss: 0.9986478090286255,grad_norm: 0.9347187640045983, iteration: 137832
loss: 1.0022172927856445,grad_norm: 0.9586137841665411, iteration: 137833
loss: 1.008829951286316,grad_norm: 0.9999990938241138, iteration: 137834
loss: 0.9694750308990479,grad_norm: 0.9730471258014082, iteration: 137835
loss: 1.0207984447479248,grad_norm: 0.9999990799846434, iteration: 137836
loss: 1.0043444633483887,grad_norm: 0.9366429671382214, iteration: 137837
loss: 0.9923439621925354,grad_norm: 0.9288940460899815, iteration: 137838
loss: 1.0198774337768555,grad_norm: 0.9999991555058068, iteration: 137839
loss: 1.000733733177185,grad_norm: 0.9999990839344877, iteration: 137840
loss: 1.0226035118103027,grad_norm: 0.9986608741619912, iteration: 137841
loss: 0.9927226901054382,grad_norm: 0.9999992703641444, iteration: 137842
loss: 1.0024516582489014,grad_norm: 0.9441999032397476, iteration: 137843
loss: 0.9935837388038635,grad_norm: 0.9999990414478086, iteration: 137844
loss: 1.0154716968536377,grad_norm: 0.9999989926224551, iteration: 137845
loss: 0.9737492203712463,grad_norm: 0.9486065117641377, iteration: 137846
loss: 0.9666622877120972,grad_norm: 0.9999991495239589, iteration: 137847
loss: 0.9923949241638184,grad_norm: 0.9999991712971965, iteration: 137848
loss: 1.0062696933746338,grad_norm: 0.9387538801521778, iteration: 137849
loss: 1.0290292501449585,grad_norm: 0.9880856320903632, iteration: 137850
loss: 1.0015459060668945,grad_norm: 0.9540640787320471, iteration: 137851
loss: 0.9794004559516907,grad_norm: 0.9999990829801995, iteration: 137852
loss: 1.0065488815307617,grad_norm: 0.9999996381970507, iteration: 137853
loss: 0.9976616501808167,grad_norm: 0.9999990073072004, iteration: 137854
loss: 0.9728670716285706,grad_norm: 0.9999989953558912, iteration: 137855
loss: 0.9855712652206421,grad_norm: 0.9999991317936419, iteration: 137856
loss: 0.9865515232086182,grad_norm: 0.9999990705567302, iteration: 137857
loss: 0.9921151995658875,grad_norm: 0.9488479092663682, iteration: 137858
loss: 1.0296449661254883,grad_norm: 0.949522845952327, iteration: 137859
loss: 0.9858437776565552,grad_norm: 0.9592714569014739, iteration: 137860
loss: 1.017012357711792,grad_norm: 0.9999992580592821, iteration: 137861
loss: 0.9823651909828186,grad_norm: 0.9999997168377004, iteration: 137862
loss: 0.9667001366615295,grad_norm: 0.99999918549021, iteration: 137863
loss: 0.9899083971977234,grad_norm: 0.9999990799507548, iteration: 137864
loss: 1.0092718601226807,grad_norm: 0.9999991125409425, iteration: 137865
loss: 0.9772112965583801,grad_norm: 0.9999990691686826, iteration: 137866
loss: 1.0026637315750122,grad_norm: 0.8274213079337803, iteration: 137867
loss: 0.9727131128311157,grad_norm: 0.9785036026234147, iteration: 137868
loss: 0.9971528649330139,grad_norm: 0.8797923695728961, iteration: 137869
loss: 1.0403094291687012,grad_norm: 0.9415165331242921, iteration: 137870
loss: 0.9852933287620544,grad_norm: 0.8459905223877144, iteration: 137871
loss: 0.9911821484565735,grad_norm: 0.9999989342600689, iteration: 137872
loss: 1.0149121284484863,grad_norm: 0.9999990578372552, iteration: 137873
loss: 1.0011892318725586,grad_norm: 0.9544077569155592, iteration: 137874
loss: 1.0046977996826172,grad_norm: 0.9936967055918436, iteration: 137875
loss: 0.9778519868850708,grad_norm: 0.9478045583470186, iteration: 137876
loss: 0.9957724809646606,grad_norm: 0.7685473591579703, iteration: 137877
loss: 0.9928681254386902,grad_norm: 0.9999991713301852, iteration: 137878
loss: 1.013754963874817,grad_norm: 0.8852978045087871, iteration: 137879
loss: 0.9894999265670776,grad_norm: 0.99999918496124, iteration: 137880
loss: 0.988492488861084,grad_norm: 0.9999991153077135, iteration: 137881
loss: 0.9759885668754578,grad_norm: 0.999999178428729, iteration: 137882
loss: 0.9683334231376648,grad_norm: 0.9999991355217178, iteration: 137883
loss: 1.0126543045043945,grad_norm: 0.9999991300501014, iteration: 137884
loss: 1.055983304977417,grad_norm: 0.9999993424742771, iteration: 137885
loss: 1.0083361864089966,grad_norm: 0.9338416483866082, iteration: 137886
loss: 0.980882465839386,grad_norm: 0.9999990295422932, iteration: 137887
loss: 1.004930853843689,grad_norm: 0.822334034884062, iteration: 137888
loss: 1.0258427858352661,grad_norm: 0.9999991030213906, iteration: 137889
loss: 1.0270130634307861,grad_norm: 0.9999989408841556, iteration: 137890
loss: 0.9880238771438599,grad_norm: 0.8732267831235212, iteration: 137891
loss: 1.0004956722259521,grad_norm: 0.8546142121231205, iteration: 137892
loss: 1.0174249410629272,grad_norm: 0.9999991440113298, iteration: 137893
loss: 0.9994838833808899,grad_norm: 0.9926391283161445, iteration: 137894
loss: 1.025343418121338,grad_norm: 0.8955835079470501, iteration: 137895
loss: 0.9615887403488159,grad_norm: 0.940954032546369, iteration: 137896
loss: 0.9676913619041443,grad_norm: 0.9419034369606093, iteration: 137897
loss: 1.0156224966049194,grad_norm: 0.9999990847363022, iteration: 137898
loss: 0.9682604670524597,grad_norm: 0.9652036703107105, iteration: 137899
loss: 1.0529999732971191,grad_norm: 0.9999995604347691, iteration: 137900
loss: 0.9891254901885986,grad_norm: 0.9999992309596194, iteration: 137901
loss: 1.0013407468795776,grad_norm: 0.9999990122818361, iteration: 137902
loss: 1.0078390836715698,grad_norm: 0.999999043117126, iteration: 137903
loss: 1.0118099451065063,grad_norm: 0.919045539140165, iteration: 137904
loss: 0.9610307216644287,grad_norm: 0.9999991290717953, iteration: 137905
loss: 1.0034775733947754,grad_norm: 0.984528267772416, iteration: 137906
loss: 0.9968557953834534,grad_norm: 0.8045758880657994, iteration: 137907
loss: 1.014579176902771,grad_norm: 0.8710074580920036, iteration: 137908
loss: 0.9965827465057373,grad_norm: 0.9395152358406567, iteration: 137909
loss: 0.997754693031311,grad_norm: 0.9999992042111864, iteration: 137910
loss: 1.0362310409545898,grad_norm: 0.9999990962717381, iteration: 137911
loss: 0.9966651201248169,grad_norm: 0.9999992111222125, iteration: 137912
loss: 1.0220941305160522,grad_norm: 0.924857806881446, iteration: 137913
loss: 1.0046888589859009,grad_norm: 0.9337417316358991, iteration: 137914
loss: 1.0068614482879639,grad_norm: 0.9999990190069078, iteration: 137915
loss: 0.9987654685974121,grad_norm: 0.9999990407340463, iteration: 137916
loss: 0.988888680934906,grad_norm: 0.999998987119595, iteration: 137917
loss: 0.9919871091842651,grad_norm: 0.9999992165336404, iteration: 137918
loss: 1.0296062231063843,grad_norm: 0.9628747464081515, iteration: 137919
loss: 0.9875708818435669,grad_norm: 0.9869468655693726, iteration: 137920
loss: 1.0102442502975464,grad_norm: 0.9999991134361139, iteration: 137921
loss: 1.0239903926849365,grad_norm: 0.9999991545346538, iteration: 137922
loss: 0.9853230118751526,grad_norm: 0.9999991427302262, iteration: 137923
loss: 1.0187617540359497,grad_norm: 0.9999991131714726, iteration: 137924
loss: 1.0042057037353516,grad_norm: 0.9999991786250769, iteration: 137925
loss: 1.0148814916610718,grad_norm: 0.9999990844021044, iteration: 137926
loss: 0.9951867461204529,grad_norm: 0.9999991103730835, iteration: 137927
loss: 1.0121228694915771,grad_norm: 0.9999992344506585, iteration: 137928
loss: 1.0332227945327759,grad_norm: 0.9999993009432017, iteration: 137929
loss: 1.0198835134506226,grad_norm: 0.9999991618239276, iteration: 137930
loss: 0.9900796413421631,grad_norm: 0.9999991290815099, iteration: 137931
loss: 0.953104555606842,grad_norm: 0.9999991725550017, iteration: 137932
loss: 0.9917511343955994,grad_norm: 0.9999990913577278, iteration: 137933
loss: 0.9797957539558411,grad_norm: 0.9999989218258653, iteration: 137934
loss: 1.0213865041732788,grad_norm: 0.9115643287026164, iteration: 137935
loss: 1.0023670196533203,grad_norm: 0.9939716887517109, iteration: 137936
loss: 1.0205543041229248,grad_norm: 0.9134460179434997, iteration: 137937
loss: 0.9886914491653442,grad_norm: 0.9098173289795651, iteration: 137938
loss: 0.9852869510650635,grad_norm: 0.9999990327728893, iteration: 137939
loss: 0.9826276898384094,grad_norm: 0.999999171323101, iteration: 137940
loss: 0.9726060032844543,grad_norm: 0.9947872276763718, iteration: 137941
loss: 1.0104092359542847,grad_norm: 0.9999991365102773, iteration: 137942
loss: 0.9797966480255127,grad_norm: 0.9006709753625991, iteration: 137943
loss: 1.0067791938781738,grad_norm: 0.9542599874695912, iteration: 137944
loss: 0.9766181707382202,grad_norm: 0.9062246999633274, iteration: 137945
loss: 0.9837337732315063,grad_norm: 0.9999995677181411, iteration: 137946
loss: 0.9703608155250549,grad_norm: 0.9999990727392432, iteration: 137947
loss: 0.9863042831420898,grad_norm: 0.9999991766019285, iteration: 137948
loss: 1.0252211093902588,grad_norm: 0.9844795268752253, iteration: 137949
loss: 0.9632120132446289,grad_norm: 0.9999988770571189, iteration: 137950
loss: 1.0190016031265259,grad_norm: 0.9999991277717566, iteration: 137951
loss: 0.9712777733802795,grad_norm: 0.9999991165003024, iteration: 137952
loss: 1.0353262424468994,grad_norm: 0.9677932325216895, iteration: 137953
loss: 1.0125298500061035,grad_norm: 0.814833410159139, iteration: 137954
loss: 1.0129292011260986,grad_norm: 0.9494668439888616, iteration: 137955
loss: 1.0338859558105469,grad_norm: 0.9999991383480404, iteration: 137956
loss: 0.9942763447761536,grad_norm: 0.8349985340576623, iteration: 137957
loss: 0.9991680383682251,grad_norm: 0.9999991439355033, iteration: 137958
loss: 1.0349416732788086,grad_norm: 0.9999990630427988, iteration: 137959
loss: 1.004479169845581,grad_norm: 0.8505138138642471, iteration: 137960
loss: 0.9919306635856628,grad_norm: 0.9999990186362063, iteration: 137961
loss: 1.0144016742706299,grad_norm: 0.999304991016698, iteration: 137962
loss: 0.9991741180419922,grad_norm: 0.9999990497978235, iteration: 137963
loss: 0.9988076686859131,grad_norm: 0.9728326858172351, iteration: 137964
loss: 1.0082708597183228,grad_norm: 0.9399515958865884, iteration: 137965
loss: 0.9997530579566956,grad_norm: 0.8844337025631649, iteration: 137966
loss: 1.007880449295044,grad_norm: 0.9999990631856059, iteration: 137967
loss: 0.9708421230316162,grad_norm: 0.9171885218642944, iteration: 137968
loss: 0.9815777540206909,grad_norm: 0.9999991115870646, iteration: 137969
loss: 1.0287081003189087,grad_norm: 0.9877395960680918, iteration: 137970
loss: 1.0224874019622803,grad_norm: 0.8916607820995459, iteration: 137971
loss: 1.0357457399368286,grad_norm: 0.9999991260293294, iteration: 137972
loss: 1.0078110694885254,grad_norm: 0.9999992405955774, iteration: 137973
loss: 0.999544084072113,grad_norm: 0.8367232049893438, iteration: 137974
loss: 0.9384108185768127,grad_norm: 0.9017993452001583, iteration: 137975
loss: 0.9886442422866821,grad_norm: 0.999999255270321, iteration: 137976
loss: 1.015668272972107,grad_norm: 0.9999990884673207, iteration: 137977
loss: 0.9508951306343079,grad_norm: 0.9999992157144694, iteration: 137978
loss: 0.9862006902694702,grad_norm: 0.9999990641763772, iteration: 137979
loss: 1.0133434534072876,grad_norm: 0.999998945133706, iteration: 137980
loss: 0.9635348916053772,grad_norm: 0.9999992191822897, iteration: 137981
loss: 0.9934391975402832,grad_norm: 0.9999996024740236, iteration: 137982
loss: 1.0164681673049927,grad_norm: 0.9583865813713776, iteration: 137983
loss: 1.0045908689498901,grad_norm: 0.999999130268982, iteration: 137984
loss: 0.9811398983001709,grad_norm: 0.8352252351557865, iteration: 137985
loss: 1.0018092393875122,grad_norm: 0.9999991688484466, iteration: 137986
loss: 1.002259373664856,grad_norm: 0.8762273430311932, iteration: 137987
loss: 1.0043684244155884,grad_norm: 0.9999990914834486, iteration: 137988
loss: 1.0017497539520264,grad_norm: 0.8830771568276993, iteration: 137989
loss: 1.0075833797454834,grad_norm: 0.9999991298191651, iteration: 137990
loss: 0.9941877126693726,grad_norm: 0.9999991280408042, iteration: 137991
loss: 0.9730989933013916,grad_norm: 0.999999104931446, iteration: 137992
loss: 0.9506464004516602,grad_norm: 0.9703363528032026, iteration: 137993
loss: 0.9963890314102173,grad_norm: 0.797757074335369, iteration: 137994
loss: 0.9867482781410217,grad_norm: 0.8251337018356995, iteration: 137995
loss: 1.017554521560669,grad_norm: 0.9205558085447269, iteration: 137996
loss: 1.0037590265274048,grad_norm: 0.9781993585274746, iteration: 137997
loss: 0.9723514914512634,grad_norm: 0.9989864388178612, iteration: 137998
loss: 1.0276752710342407,grad_norm: 0.9850133657659269, iteration: 137999
loss: 1.0740787982940674,grad_norm: 0.9999993300774015, iteration: 138000
loss: 1.0181870460510254,grad_norm: 0.9803843338916031, iteration: 138001
loss: 0.9923978447914124,grad_norm: 0.9999990368249023, iteration: 138002
loss: 1.0245392322540283,grad_norm: 0.9999992348828385, iteration: 138003
loss: 1.0000970363616943,grad_norm: 0.9157031461326359, iteration: 138004
loss: 0.986985981464386,grad_norm: 0.9806837127851806, iteration: 138005
loss: 0.9749858379364014,grad_norm: 0.9999992751414322, iteration: 138006
loss: 0.982480525970459,grad_norm: 0.9523906894142389, iteration: 138007
loss: 0.9378513097763062,grad_norm: 0.8900076089669252, iteration: 138008
loss: 1.0086883306503296,grad_norm: 0.8358231575627445, iteration: 138009
loss: 1.0384961366653442,grad_norm: 0.9999992494086584, iteration: 138010
loss: 0.9746707677841187,grad_norm: 0.9607103606964369, iteration: 138011
loss: 1.0105639696121216,grad_norm: 0.9999990299252278, iteration: 138012
loss: 0.9564921259880066,grad_norm: 0.9999991183479057, iteration: 138013
loss: 1.0244125127792358,grad_norm: 0.999999252411969, iteration: 138014
loss: 0.9733476638793945,grad_norm: 0.969503901536792, iteration: 138015
loss: 0.9889011979103088,grad_norm: 0.99999932312233, iteration: 138016
loss: 0.9508947730064392,grad_norm: 0.9696239502307067, iteration: 138017
loss: 0.9843488931655884,grad_norm: 0.8499101859417506, iteration: 138018
loss: 1.0044642686843872,grad_norm: 0.858674759223221, iteration: 138019
loss: 0.9959926605224609,grad_norm: 0.9999990590115323, iteration: 138020
loss: 0.9926754832267761,grad_norm: 0.9437773457305492, iteration: 138021
loss: 0.9798358082771301,grad_norm: 0.9999990204613266, iteration: 138022
loss: 0.9930365085601807,grad_norm: 0.9999990592155535, iteration: 138023
loss: 1.0134097337722778,grad_norm: 0.9999991483688165, iteration: 138024
loss: 1.0315711498260498,grad_norm: 0.999999233794288, iteration: 138025
loss: 0.9906107783317566,grad_norm: 0.9999991736800443, iteration: 138026
loss: 0.9613691568374634,grad_norm: 0.9285469450155899, iteration: 138027
loss: 1.231887698173523,grad_norm: 0.999999248212547, iteration: 138028
loss: 1.017918348312378,grad_norm: 0.9687017013109178, iteration: 138029
loss: 0.979352593421936,grad_norm: 0.9304240541376726, iteration: 138030
loss: 0.9836216568946838,grad_norm: 0.9999991624348362, iteration: 138031
loss: 1.0085985660552979,grad_norm: 0.9142341757365162, iteration: 138032
loss: 1.0223294496536255,grad_norm: 0.9999995788277198, iteration: 138033
loss: 1.0277867317199707,grad_norm: 0.9840483571487986, iteration: 138034
loss: 0.9940086007118225,grad_norm: 0.9999993625527934, iteration: 138035
loss: 0.9529305100440979,grad_norm: 0.9445937615540779, iteration: 138036
loss: 0.9930920004844666,grad_norm: 0.9989113696535957, iteration: 138037
loss: 0.9771060347557068,grad_norm: 0.9999991153526196, iteration: 138038
loss: 0.9935621023178101,grad_norm: 0.9999991450063215, iteration: 138039
loss: 1.016857385635376,grad_norm: 0.9615698465597473, iteration: 138040
loss: 1.0235613584518433,grad_norm: 0.9425129994137278, iteration: 138041
loss: 1.0466866493225098,grad_norm: 0.9999990153878974, iteration: 138042
loss: 0.9745553731918335,grad_norm: 0.9999993141276434, iteration: 138043
loss: 0.9959484934806824,grad_norm: 0.9999990608336701, iteration: 138044
loss: 0.9814151525497437,grad_norm: 0.9717313388065015, iteration: 138045
loss: 0.9843661189079285,grad_norm: 0.9946427828585446, iteration: 138046
loss: 0.9826498627662659,grad_norm: 0.9999992562742941, iteration: 138047
loss: 0.9956209063529968,grad_norm: 0.999999009390381, iteration: 138048
loss: 0.9646114110946655,grad_norm: 0.8566228501957088, iteration: 138049
loss: 0.9625388383865356,grad_norm: 0.9039612785125327, iteration: 138050
loss: 1.0196800231933594,grad_norm: 0.9999991150665051, iteration: 138051
loss: 0.9934813976287842,grad_norm: 0.9999992058036539, iteration: 138052
loss: 1.0002738237380981,grad_norm: 0.9999991450905592, iteration: 138053
loss: 0.9762857556343079,grad_norm: 0.9999991576867627, iteration: 138054
loss: 1.0045448541641235,grad_norm: 0.9999990193396499, iteration: 138055
loss: 0.9875839352607727,grad_norm: 0.9999993575716403, iteration: 138056
loss: 0.984855592250824,grad_norm: 0.9999992641995553, iteration: 138057
loss: 1.0059077739715576,grad_norm: 0.9999994269032461, iteration: 138058
loss: 1.005384922027588,grad_norm: 0.9999989856875426, iteration: 138059
loss: 1.019256591796875,grad_norm: 0.910013758658058, iteration: 138060
loss: 0.9699528217315674,grad_norm: 0.9999992179193254, iteration: 138061
loss: 0.992035984992981,grad_norm: 0.8558705788393286, iteration: 138062
loss: 1.0113638639450073,grad_norm: 0.9999991066760513, iteration: 138063
loss: 1.0118719339370728,grad_norm: 0.9522126605809175, iteration: 138064
loss: 1.0154967308044434,grad_norm: 0.9263984452612993, iteration: 138065
loss: 1.0086973905563354,grad_norm: 0.9999989377095788, iteration: 138066
loss: 1.040165662765503,grad_norm: 0.9999990831182345, iteration: 138067
loss: 1.0163432359695435,grad_norm: 0.9999991217648768, iteration: 138068
loss: 1.010233998298645,grad_norm: 0.9502152336375344, iteration: 138069
loss: 0.9827211499214172,grad_norm: 0.9999991676329928, iteration: 138070
loss: 1.0024211406707764,grad_norm: 0.9999991045109027, iteration: 138071
loss: 0.9688102602958679,grad_norm: 0.9999990809958141, iteration: 138072
loss: 1.0031468868255615,grad_norm: 0.9999992367301952, iteration: 138073
loss: 0.9850690364837646,grad_norm: 0.9999990707010126, iteration: 138074
loss: 1.0155022144317627,grad_norm: 0.9999992287959678, iteration: 138075
loss: 1.0034416913986206,grad_norm: 0.999999102050125, iteration: 138076
loss: 1.017000675201416,grad_norm: 0.9552569231839646, iteration: 138077
loss: 1.0013234615325928,grad_norm: 0.8950913871736395, iteration: 138078
loss: 0.9559541344642639,grad_norm: 0.9999990968332745, iteration: 138079
loss: 1.0068241357803345,grad_norm: 0.9984588150638439, iteration: 138080
loss: 0.9817162156105042,grad_norm: 0.9999993064765204, iteration: 138081
loss: 0.9988570809364319,grad_norm: 0.9174540886784933, iteration: 138082
loss: 0.9947464466094971,grad_norm: 0.9999989692528156, iteration: 138083
loss: 1.0074635744094849,grad_norm: 0.9072253626975159, iteration: 138084
loss: 0.9674036502838135,grad_norm: 0.9082234465399138, iteration: 138085
loss: 0.9778876900672913,grad_norm: 0.9999990878124915, iteration: 138086
loss: 0.9818326830863953,grad_norm: 0.9999991606240531, iteration: 138087
loss: 1.0257161855697632,grad_norm: 0.9999993104911348, iteration: 138088
loss: 1.006658911705017,grad_norm: 0.9999990585688927, iteration: 138089
loss: 1.0368162393569946,grad_norm: 0.9999991870920606, iteration: 138090
loss: 0.9890179634094238,grad_norm: 0.9443237638175943, iteration: 138091
loss: 0.9547237157821655,grad_norm: 0.8787477503130166, iteration: 138092
loss: 0.9989577531814575,grad_norm: 0.9860499755060451, iteration: 138093
loss: 0.9862280488014221,grad_norm: 0.9817562927619027, iteration: 138094
loss: 1.0010815858840942,grad_norm: 0.9999989329989051, iteration: 138095
loss: 0.9676069021224976,grad_norm: 0.9999992213621764, iteration: 138096
loss: 0.9662750363349915,grad_norm: 0.9999990381840064, iteration: 138097
loss: 1.0313440561294556,grad_norm: 0.9999992594324091, iteration: 138098
loss: 1.0168265104293823,grad_norm: 0.9999993596315675, iteration: 138099
loss: 1.0241402387619019,grad_norm: 0.999999528445337, iteration: 138100
loss: 0.9696375727653503,grad_norm: 0.92558038616965, iteration: 138101
loss: 1.015135407447815,grad_norm: 0.9927196440579914, iteration: 138102
loss: 0.9902781844139099,grad_norm: 0.9999989519378131, iteration: 138103
loss: 1.0406804084777832,grad_norm: 0.9810656635541232, iteration: 138104
loss: 0.940783679485321,grad_norm: 0.9999991164097866, iteration: 138105
loss: 1.0252387523651123,grad_norm: 0.9999991182112918, iteration: 138106
loss: 1.0325270891189575,grad_norm: 0.9999993087107555, iteration: 138107
loss: 0.9977006912231445,grad_norm: 0.9999991953458225, iteration: 138108
loss: 0.987470805644989,grad_norm: 0.9999991719300814, iteration: 138109
loss: 0.9818905591964722,grad_norm: 0.999998952683027, iteration: 138110
loss: 1.0228519439697266,grad_norm: 0.9999992400225738, iteration: 138111
loss: 0.9798702001571655,grad_norm: 0.9461112646665473, iteration: 138112
loss: 1.020418643951416,grad_norm: 0.9999992332491774, iteration: 138113
loss: 1.024255394935608,grad_norm: 0.9999991079722302, iteration: 138114
loss: 0.9984220266342163,grad_norm: 0.9878396760757452, iteration: 138115
loss: 1.0508403778076172,grad_norm: 0.9999992008579374, iteration: 138116
loss: 0.9925473928451538,grad_norm: 0.9041864136540945, iteration: 138117
loss: 1.0068869590759277,grad_norm: 0.9999991070975586, iteration: 138118
loss: 0.9792050719261169,grad_norm: 0.9112649724907309, iteration: 138119
loss: 0.9883764982223511,grad_norm: 0.8674912951699746, iteration: 138120
loss: 0.9901303648948669,grad_norm: 0.999999268671064, iteration: 138121
loss: 1.000301718711853,grad_norm: 0.9999990819468798, iteration: 138122
loss: 1.0686482191085815,grad_norm: 0.9999991706368209, iteration: 138123
loss: 1.0069434642791748,grad_norm: 0.8583092652460136, iteration: 138124
loss: 0.9885067343711853,grad_norm: 0.8877219207803493, iteration: 138125
loss: 1.0198179483413696,grad_norm: 0.9294337748872432, iteration: 138126
loss: 0.9850567579269409,grad_norm: 0.914319578747155, iteration: 138127
loss: 0.973206639289856,grad_norm: 0.9999991383365594, iteration: 138128
loss: 1.0038502216339111,grad_norm: 0.9999990846632427, iteration: 138129
loss: 1.0076090097427368,grad_norm: 0.9999991420685851, iteration: 138130
loss: 1.0027152299880981,grad_norm: 0.999999122354199, iteration: 138131
loss: 0.9772785902023315,grad_norm: 0.891595508219158, iteration: 138132
loss: 1.0467661619186401,grad_norm: 0.8866813060719924, iteration: 138133
loss: 0.9982571005821228,grad_norm: 0.9999990820221588, iteration: 138134
loss: 1.016013741493225,grad_norm: 0.9617783784147295, iteration: 138135
loss: 1.0045419931411743,grad_norm: 0.9099313603477648, iteration: 138136
loss: 0.9856672286987305,grad_norm: 0.9999990387833302, iteration: 138137
loss: 1.0314981937408447,grad_norm: 0.9999993158332245, iteration: 138138
loss: 1.0082496404647827,grad_norm: 0.9762432595770418, iteration: 138139
loss: 0.997887372970581,grad_norm: 0.8209493471504309, iteration: 138140
loss: 0.9951791167259216,grad_norm: 0.9999990449788316, iteration: 138141
loss: 1.030009150505066,grad_norm: 0.9999990368724867, iteration: 138142
loss: 0.9801062941551208,grad_norm: 0.8677173826062794, iteration: 138143
loss: 1.0132725238800049,grad_norm: 0.9999991744373545, iteration: 138144
loss: 0.9790960550308228,grad_norm: 0.9999992397259686, iteration: 138145
loss: 1.023317813873291,grad_norm: 0.9999992524780114, iteration: 138146
loss: 1.0042955875396729,grad_norm: 0.9999992616841331, iteration: 138147
loss: 1.019350290298462,grad_norm: 0.9173753623478312, iteration: 138148
loss: 0.9770249128341675,grad_norm: 0.9999991903844694, iteration: 138149
loss: 0.9795171618461609,grad_norm: 0.999999084791894, iteration: 138150
loss: 0.9977854490280151,grad_norm: 0.9999990651911231, iteration: 138151
loss: 0.9694738388061523,grad_norm: 0.9793039894924241, iteration: 138152
loss: 0.9915258288383484,grad_norm: 0.9074717057932067, iteration: 138153
loss: 0.9907214045524597,grad_norm: 0.9090828601753462, iteration: 138154
loss: 1.0627650022506714,grad_norm: 0.9355595731170482, iteration: 138155
loss: 0.9952818751335144,grad_norm: 0.9999991925212908, iteration: 138156
loss: 0.9925948977470398,grad_norm: 0.7664141831821266, iteration: 138157
loss: 1.0487209558486938,grad_norm: 0.9999993998344617, iteration: 138158
loss: 1.0055516958236694,grad_norm: 0.8633582078117346, iteration: 138159
loss: 1.0049625635147095,grad_norm: 0.9999993053879757, iteration: 138160
loss: 0.9736723899841309,grad_norm: 0.999999119773945, iteration: 138161
loss: 0.9762724041938782,grad_norm: 0.9999989968289058, iteration: 138162
loss: 0.974335253238678,grad_norm: 0.999999270801514, iteration: 138163
loss: 1.0633983612060547,grad_norm: 0.9999997290647544, iteration: 138164
loss: 0.9582717418670654,grad_norm: 0.9999991786115081, iteration: 138165
loss: 1.0046627521514893,grad_norm: 0.9692645748211323, iteration: 138166
loss: 1.0052613019943237,grad_norm: 0.9999992952284208, iteration: 138167
loss: 1.01313054561615,grad_norm: 0.9999989050949656, iteration: 138168
loss: 0.9833310842514038,grad_norm: 0.9356808036928642, iteration: 138169
loss: 0.9569169878959656,grad_norm: 0.8157601370679172, iteration: 138170
loss: 1.0324934720993042,grad_norm: 0.9999991843568714, iteration: 138171
loss: 1.016196846961975,grad_norm: 0.9999990151725886, iteration: 138172
loss: 1.0144890546798706,grad_norm: 0.8939454751685135, iteration: 138173
loss: 1.0083658695220947,grad_norm: 0.99999909481986, iteration: 138174
loss: 0.9796634316444397,grad_norm: 0.9744723933797957, iteration: 138175
loss: 0.9729956984519958,grad_norm: 0.9999989904176558, iteration: 138176
loss: 1.0003868341445923,grad_norm: 0.9999992171800041, iteration: 138177
loss: 0.9709808826446533,grad_norm: 0.874045363969368, iteration: 138178
loss: 0.9966298937797546,grad_norm: 0.9316277953015788, iteration: 138179
loss: 1.0564510822296143,grad_norm: 0.9999991961225806, iteration: 138180
loss: 0.9954528212547302,grad_norm: 0.9999990926683937, iteration: 138181
loss: 0.9823112487792969,grad_norm: 0.9999991603005174, iteration: 138182
loss: 1.0125880241394043,grad_norm: 0.9999990592505251, iteration: 138183
loss: 0.9442911744117737,grad_norm: 0.9999991012198935, iteration: 138184
loss: 0.9805536270141602,grad_norm: 0.7803817534041818, iteration: 138185
loss: 0.9866178035736084,grad_norm: 0.9351946748247763, iteration: 138186
loss: 0.9875463247299194,grad_norm: 0.9999991251660229, iteration: 138187
loss: 0.9859071373939514,grad_norm: 0.8389519527678785, iteration: 138188
loss: 1.00818932056427,grad_norm: 0.9722925604487462, iteration: 138189
loss: 1.021050214767456,grad_norm: 0.999999136374671, iteration: 138190
loss: 1.012210726737976,grad_norm: 0.9829597832947373, iteration: 138191
loss: 0.9896480441093445,grad_norm: 0.9780103166184995, iteration: 138192
loss: 0.9954647421836853,grad_norm: 0.9999991054895033, iteration: 138193
loss: 0.9854320883750916,grad_norm: 0.9486173583886651, iteration: 138194
loss: 1.0005102157592773,grad_norm: 0.979811160911636, iteration: 138195
loss: 1.0285160541534424,grad_norm: 0.9999993370641669, iteration: 138196
loss: 0.9810381531715393,grad_norm: 0.8702408638041998, iteration: 138197
loss: 1.0181245803833008,grad_norm: 0.9922917869335742, iteration: 138198
loss: 0.985096275806427,grad_norm: 0.9290266957482017, iteration: 138199
loss: 0.9810092449188232,grad_norm: 0.999999098079419, iteration: 138200
loss: 1.0472030639648438,grad_norm: 0.9831338739314706, iteration: 138201
loss: 1.0238831043243408,grad_norm: 0.9920315015907039, iteration: 138202
loss: 0.958464503288269,grad_norm: 0.9999989981714397, iteration: 138203
loss: 0.992059588432312,grad_norm: 0.978455389346316, iteration: 138204
loss: 0.9683738946914673,grad_norm: 0.9999991586300238, iteration: 138205
loss: 0.9770360589027405,grad_norm: 0.9112263092113311, iteration: 138206
loss: 1.0182123184204102,grad_norm: 0.9099961952468308, iteration: 138207
loss: 0.9881119132041931,grad_norm: 0.999594912366707, iteration: 138208
loss: 1.1079941987991333,grad_norm: 0.9999992816586494, iteration: 138209
loss: 1.0316060781478882,grad_norm: 0.9999990148536275, iteration: 138210
loss: 1.0490155220031738,grad_norm: 0.9567813122206514, iteration: 138211
loss: 1.028140902519226,grad_norm: 0.9999990507295649, iteration: 138212
loss: 1.015112280845642,grad_norm: 0.9999991779284312, iteration: 138213
loss: 0.9990766048431396,grad_norm: 0.9999990638754325, iteration: 138214
loss: 0.9939552545547485,grad_norm: 0.9999994060888256, iteration: 138215
loss: 0.9958586692810059,grad_norm: 0.9999991465905671, iteration: 138216
loss: 1.0058249235153198,grad_norm: 0.9999990544800468, iteration: 138217
loss: 0.9853130578994751,grad_norm: 0.9722487419236946, iteration: 138218
loss: 1.0045021772384644,grad_norm: 0.9629041417789008, iteration: 138219
loss: 1.0099756717681885,grad_norm: 0.9527591187943286, iteration: 138220
loss: 0.9778744578361511,grad_norm: 0.9271933901900651, iteration: 138221
loss: 1.01835298538208,grad_norm: 0.9999990815681215, iteration: 138222
loss: 0.9481737613677979,grad_norm: 0.9267910715872515, iteration: 138223
loss: 0.978686511516571,grad_norm: 0.8976546049324186, iteration: 138224
loss: 1.0261414051055908,grad_norm: 0.9999992076249641, iteration: 138225
loss: 0.9485415816307068,grad_norm: 0.9134285987886528, iteration: 138226
loss: 1.0230134725570679,grad_norm: 0.9874633961741319, iteration: 138227
loss: 1.02640700340271,grad_norm: 0.9999991777662841, iteration: 138228
loss: 0.991484522819519,grad_norm: 0.9999991237939333, iteration: 138229
loss: 1.0043716430664062,grad_norm: 0.9999991167604705, iteration: 138230
loss: 1.0034297704696655,grad_norm: 0.9999991055344205, iteration: 138231
loss: 1.0174452066421509,grad_norm: 0.9566925812940693, iteration: 138232
loss: 0.9971675872802734,grad_norm: 0.9835259221898978, iteration: 138233
loss: 0.9988618493080139,grad_norm: 0.9999990822432729, iteration: 138234
loss: 0.9705113768577576,grad_norm: 0.8996262918002833, iteration: 138235
loss: 1.0210320949554443,grad_norm: 0.9999990604211182, iteration: 138236
loss: 1.0071887969970703,grad_norm: 0.9999989880765146, iteration: 138237
loss: 1.01192045211792,grad_norm: 0.916031715102367, iteration: 138238
loss: 0.9975460767745972,grad_norm: 0.9999989592125196, iteration: 138239
loss: 1.0083285570144653,grad_norm: 0.9999991766685739, iteration: 138240
loss: 0.9884778261184692,grad_norm: 0.999999127453527, iteration: 138241
loss: 1.0119338035583496,grad_norm: 0.9885942319084373, iteration: 138242
loss: 0.9834176898002625,grad_norm: 0.9999993574408184, iteration: 138243
loss: 1.0150017738342285,grad_norm: 0.9999991837385657, iteration: 138244
loss: 1.0141481161117554,grad_norm: 0.932830203358858, iteration: 138245
loss: 0.9830895662307739,grad_norm: 0.9999992611003059, iteration: 138246
loss: 1.035485029220581,grad_norm: 0.9396989780216168, iteration: 138247
loss: 0.9624613523483276,grad_norm: 0.9999992092153913, iteration: 138248
loss: 1.002671718597412,grad_norm: 0.7890527039089715, iteration: 138249
loss: 1.00704824924469,grad_norm: 0.9999990815675509, iteration: 138250
loss: 0.9710413217544556,grad_norm: 0.9999990441541661, iteration: 138251
loss: 1.0233607292175293,grad_norm: 0.9999990615951242, iteration: 138252
loss: 1.0184681415557861,grad_norm: 0.9999991740905677, iteration: 138253
loss: 0.9752979278564453,grad_norm: 0.964626654717216, iteration: 138254
loss: 1.014290690422058,grad_norm: 0.9999988850159907, iteration: 138255
loss: 1.0222595930099487,grad_norm: 0.9999988131714532, iteration: 138256
loss: 1.003484845161438,grad_norm: 0.9999989179122242, iteration: 138257
loss: 0.9842423796653748,grad_norm: 0.999999100617276, iteration: 138258
loss: 1.02277672290802,grad_norm: 0.9649645240720925, iteration: 138259
loss: 0.9891183972358704,grad_norm: 0.9999990290073535, iteration: 138260
loss: 1.018937349319458,grad_norm: 0.892421253442709, iteration: 138261
loss: 1.0060648918151855,grad_norm: 0.9999991552165997, iteration: 138262
loss: 0.994641125202179,grad_norm: 0.9999990146764763, iteration: 138263
loss: 1.0389472246170044,grad_norm: 0.999999236422291, iteration: 138264
loss: 1.0058012008666992,grad_norm: 0.9999990246055092, iteration: 138265
loss: 1.0173269510269165,grad_norm: 0.8970447574540691, iteration: 138266
loss: 0.98193359375,grad_norm: 0.9765652823509255, iteration: 138267
loss: 0.9801251888275146,grad_norm: 0.9999993509264907, iteration: 138268
loss: 1.0246230363845825,grad_norm: 0.9999993192972371, iteration: 138269
loss: 0.9709657430648804,grad_norm: 0.9999991597892012, iteration: 138270
loss: 0.9876529574394226,grad_norm: 0.9999991148525181, iteration: 138271
loss: 0.945626974105835,grad_norm: 0.9751762661956113, iteration: 138272
loss: 0.9883262515068054,grad_norm: 0.9999991733545108, iteration: 138273
loss: 0.9806851744651794,grad_norm: 0.8870850434605695, iteration: 138274
loss: 0.9864797592163086,grad_norm: 0.999736364187854, iteration: 138275
loss: 1.0163065195083618,grad_norm: 0.998847451465877, iteration: 138276
loss: 0.96220862865448,grad_norm: 0.9479727276518156, iteration: 138277
loss: 1.0011711120605469,grad_norm: 0.9999992391400371, iteration: 138278
loss: 1.0064537525177002,grad_norm: 0.8552481432861501, iteration: 138279
loss: 0.9626376032829285,grad_norm: 0.9999991690600895, iteration: 138280
loss: 1.0375099182128906,grad_norm: 0.9999990585404255, iteration: 138281
loss: 0.9632388949394226,grad_norm: 0.9590213356184489, iteration: 138282
loss: 0.9949931502342224,grad_norm: 0.9999991077674308, iteration: 138283
loss: 0.9844213724136353,grad_norm: 0.9999991564882705, iteration: 138284
loss: 0.9875273108482361,grad_norm: 0.99999902063744, iteration: 138285
loss: 0.9831095337867737,grad_norm: 0.9998453528479984, iteration: 138286
loss: 1.0135416984558105,grad_norm: 0.9953167365242549, iteration: 138287
loss: 0.9850649237632751,grad_norm: 0.9999991441132655, iteration: 138288
loss: 0.9783830642700195,grad_norm: 0.9351479888608484, iteration: 138289
loss: 1.0303280353546143,grad_norm: 0.8904039189207789, iteration: 138290
loss: 0.9909418821334839,grad_norm: 0.9999990784861505, iteration: 138291
loss: 1.0204352140426636,grad_norm: 0.938128610560969, iteration: 138292
loss: 1.0059727430343628,grad_norm: 0.999998941397861, iteration: 138293
loss: 1.0318645238876343,grad_norm: 0.999999048717935, iteration: 138294
loss: 1.0216543674468994,grad_norm: 0.9624289703274641, iteration: 138295
loss: 0.9554827213287354,grad_norm: 0.9710344430718143, iteration: 138296
loss: 0.9705204963684082,grad_norm: 0.9999991902625658, iteration: 138297
loss: 0.9789016842842102,grad_norm: 0.999999208933103, iteration: 138298
loss: 1.0141838788986206,grad_norm: 0.9999990094554014, iteration: 138299
loss: 1.007735252380371,grad_norm: 0.9999991247621225, iteration: 138300
loss: 1.0347270965576172,grad_norm: 0.9999996383702728, iteration: 138301
loss: 0.9870255589485168,grad_norm: 0.9999992296666401, iteration: 138302
loss: 0.9972021579742432,grad_norm: 0.9999991731377004, iteration: 138303
loss: 0.9915399551391602,grad_norm: 0.9999991088657461, iteration: 138304
loss: 1.0321482419967651,grad_norm: 0.9999990972568048, iteration: 138305
loss: 0.9944747090339661,grad_norm: 0.979294884096495, iteration: 138306
loss: 0.9771279692649841,grad_norm: 0.9484658841562554, iteration: 138307
loss: 0.9823861718177795,grad_norm: 0.9999990988644167, iteration: 138308
loss: 0.967257022857666,grad_norm: 0.907775211290971, iteration: 138309
loss: 0.9817992448806763,grad_norm: 0.9999991183154048, iteration: 138310
loss: 1.0437078475952148,grad_norm: 0.9999992061235642, iteration: 138311
loss: 1.036807656288147,grad_norm: 0.8453849018115478, iteration: 138312
loss: 1.0438096523284912,grad_norm: 0.9287892245752599, iteration: 138313
loss: 1.002242088317871,grad_norm: 0.9999993402754438, iteration: 138314
loss: 1.0155504941940308,grad_norm: 0.9999991075852993, iteration: 138315
loss: 1.001194715499878,grad_norm: 0.8639181361487053, iteration: 138316
loss: 1.0003477334976196,grad_norm: 0.9470198947885666, iteration: 138317
loss: 0.996888279914856,grad_norm: 0.9906463585229691, iteration: 138318
loss: 1.0191128253936768,grad_norm: 0.9372188650276793, iteration: 138319
loss: 1.0153486728668213,grad_norm: 0.9999996499732394, iteration: 138320
loss: 1.014064908027649,grad_norm: 0.933494429327538, iteration: 138321
loss: 1.0160202980041504,grad_norm: 0.9999990689186931, iteration: 138322
loss: 1.0043374300003052,grad_norm: 0.8902221094904016, iteration: 138323
loss: 0.9515864253044128,grad_norm: 0.9689593633538652, iteration: 138324
loss: 0.9842944741249084,grad_norm: 0.9999994718343282, iteration: 138325
loss: 0.9685978889465332,grad_norm: 0.9999992238927791, iteration: 138326
loss: 1.014037013053894,grad_norm: 0.9999996648112406, iteration: 138327
loss: 1.0207403898239136,grad_norm: 0.9999991341523584, iteration: 138328
loss: 0.9951587915420532,grad_norm: 0.9341364690219975, iteration: 138329
loss: 0.9842464327812195,grad_norm: 0.9999990551831538, iteration: 138330
loss: 1.0332489013671875,grad_norm: 0.9847913596240168, iteration: 138331
loss: 1.0113976001739502,grad_norm: 0.9999998664155623, iteration: 138332
loss: 0.9868659973144531,grad_norm: 0.9869548207404697, iteration: 138333
loss: 1.0014817714691162,grad_norm: 0.9151094440972347, iteration: 138334
loss: 0.9940329790115356,grad_norm: 0.9395475903662122, iteration: 138335
loss: 1.0091323852539062,grad_norm: 0.9999989508673939, iteration: 138336
loss: 1.0234184265136719,grad_norm: 0.9376261950233094, iteration: 138337
loss: 1.0698935985565186,grad_norm: 0.9999996791191429, iteration: 138338
loss: 1.0046178102493286,grad_norm: 0.9999991362809065, iteration: 138339
loss: 0.9652365446090698,grad_norm: 0.9999990961277102, iteration: 138340
loss: 1.0097788572311401,grad_norm: 0.9999990923774942, iteration: 138341
loss: 1.039697289466858,grad_norm: 0.9999991016060764, iteration: 138342
loss: 0.9935899972915649,grad_norm: 0.9999990150190641, iteration: 138343
loss: 0.9885463118553162,grad_norm: 0.9999991815853819, iteration: 138344
loss: 1.0186738967895508,grad_norm: 0.9839104386727028, iteration: 138345
loss: 0.9783713221549988,grad_norm: 0.8970729075051471, iteration: 138346
loss: 1.0851162672042847,grad_norm: 0.9999991466169951, iteration: 138347
loss: 1.00210440158844,grad_norm: 0.9999990664721975, iteration: 138348
loss: 1.0052196979522705,grad_norm: 0.9999990513954105, iteration: 138349
loss: 1.0218687057495117,grad_norm: 0.9999991757067417, iteration: 138350
loss: 1.0330696105957031,grad_norm: 0.9999992919181363, iteration: 138351
loss: 0.9540667533874512,grad_norm: 0.9999991944054484, iteration: 138352
loss: 1.0285403728485107,grad_norm: 0.999999199081011, iteration: 138353
loss: 1.012420892715454,grad_norm: 0.9620247200444242, iteration: 138354
loss: 1.0304410457611084,grad_norm: 0.9999991876382973, iteration: 138355
loss: 0.991151750087738,grad_norm: 0.9999990791946036, iteration: 138356
loss: 1.0329179763793945,grad_norm: 0.9644616761405496, iteration: 138357
loss: 0.9977995753288269,grad_norm: 0.9999990599480789, iteration: 138358
loss: 1.0074219703674316,grad_norm: 0.938578798243175, iteration: 138359
loss: 0.9659625887870789,grad_norm: 0.9999991701884644, iteration: 138360
loss: 0.9958136677742004,grad_norm: 0.9999991036330719, iteration: 138361
loss: 1.0120587348937988,grad_norm: 0.884497181218826, iteration: 138362
loss: 1.0015860795974731,grad_norm: 0.9921317828152967, iteration: 138363
loss: 1.0195873975753784,grad_norm: 0.9835303897902503, iteration: 138364
loss: 0.9553583860397339,grad_norm: 0.9999990220862977, iteration: 138365
loss: 1.0317535400390625,grad_norm: 0.9371026588091735, iteration: 138366
loss: 1.0048891305923462,grad_norm: 0.9999990612738587, iteration: 138367
loss: 0.9763868451118469,grad_norm: 0.9999991223310198, iteration: 138368
loss: 1.0057859420776367,grad_norm: 0.991643453445041, iteration: 138369
loss: 0.9962424635887146,grad_norm: 0.815157133471949, iteration: 138370
loss: 1.0239026546478271,grad_norm: 0.9993117636702985, iteration: 138371
loss: 1.0592854022979736,grad_norm: 0.9999991520450215, iteration: 138372
loss: 0.9997643828392029,grad_norm: 0.9677444327198276, iteration: 138373
loss: 1.0314860343933105,grad_norm: 0.9999992486949874, iteration: 138374
loss: 0.9902233481407166,grad_norm: 0.9635610859428002, iteration: 138375
loss: 0.9922916889190674,grad_norm: 0.9999992918629682, iteration: 138376
loss: 1.0040740966796875,grad_norm: 0.9999992687314457, iteration: 138377
loss: 1.0118871927261353,grad_norm: 0.9999991948684717, iteration: 138378
loss: 0.9878216981887817,grad_norm: 0.88472639127478, iteration: 138379
loss: 1.022405743598938,grad_norm: 0.9999991644183458, iteration: 138380
loss: 1.0031311511993408,grad_norm: 0.9999994342276414, iteration: 138381
loss: 0.9879943132400513,grad_norm: 0.9999992990497545, iteration: 138382
loss: 0.9804888963699341,grad_norm: 0.9999990272911092, iteration: 138383
loss: 1.0135407447814941,grad_norm: 0.999999118865027, iteration: 138384
loss: 0.9925573468208313,grad_norm: 0.9999991908392492, iteration: 138385
loss: 0.9845765829086304,grad_norm: 0.9999992034723294, iteration: 138386
loss: 0.9711377024650574,grad_norm: 0.9999990612327919, iteration: 138387
loss: 1.0305795669555664,grad_norm: 0.9999989985708821, iteration: 138388
loss: 0.9890946745872498,grad_norm: 0.9687512892607394, iteration: 138389
loss: 1.029344916343689,grad_norm: 0.9999990514544816, iteration: 138390
loss: 0.9748778343200684,grad_norm: 0.9999989966924685, iteration: 138391
loss: 1.0289850234985352,grad_norm: 0.9164263908021649, iteration: 138392
loss: 1.0161470174789429,grad_norm: 0.9999991118575534, iteration: 138393
loss: 1.0169970989227295,grad_norm: 0.9159621648463948, iteration: 138394
loss: 0.9801574945449829,grad_norm: 0.9999989526199551, iteration: 138395
loss: 0.9695131182670593,grad_norm: 0.9999989609657541, iteration: 138396
loss: 1.014812707901001,grad_norm: 0.9589715141494802, iteration: 138397
loss: 0.9645355343818665,grad_norm: 0.9416421587487277, iteration: 138398
loss: 0.996037483215332,grad_norm: 0.9999989802304833, iteration: 138399
loss: 1.0003244876861572,grad_norm: 0.8191818432229905, iteration: 138400
loss: 1.0023143291473389,grad_norm: 0.9999991602374382, iteration: 138401
loss: 0.9913191199302673,grad_norm: 0.999999190114052, iteration: 138402
loss: 0.9905505776405334,grad_norm: 0.9999990102677242, iteration: 138403
loss: 0.9650633931159973,grad_norm: 0.9688122625039188, iteration: 138404
loss: 0.9750383496284485,grad_norm: 0.9999991085110727, iteration: 138405
loss: 0.9911097884178162,grad_norm: 0.9999991919804884, iteration: 138406
loss: 0.9909324645996094,grad_norm: 0.9999993357397249, iteration: 138407
loss: 0.9973639845848083,grad_norm: 0.945275908636068, iteration: 138408
loss: 1.0294437408447266,grad_norm: 0.7666473090100099, iteration: 138409
loss: 0.9630008935928345,grad_norm: 0.9022915121333881, iteration: 138410
loss: 0.9803295731544495,grad_norm: 0.9031072435280465, iteration: 138411
loss: 0.9727678894996643,grad_norm: 0.9999990188762105, iteration: 138412
loss: 1.0186587572097778,grad_norm: 0.9999989676305088, iteration: 138413
loss: 0.9712949395179749,grad_norm: 0.9999990330778503, iteration: 138414
loss: 1.026002049446106,grad_norm: 0.9999992007501455, iteration: 138415
loss: 0.9880744218826294,grad_norm: 0.999999165203078, iteration: 138416
loss: 0.9901724457740784,grad_norm: 0.9999991454078924, iteration: 138417
loss: 1.0012022256851196,grad_norm: 0.9999990709286918, iteration: 138418
loss: 1.0627915859222412,grad_norm: 0.9999997139314083, iteration: 138419
loss: 0.961006760597229,grad_norm: 0.9999991176414152, iteration: 138420
loss: 1.0178476572036743,grad_norm: 0.7862064971808781, iteration: 138421
loss: 1.0250709056854248,grad_norm: 0.9999990947076177, iteration: 138422
loss: 0.9962322115898132,grad_norm: 0.9999991857139762, iteration: 138423
loss: 0.9874307513237,grad_norm: 0.7814308171622072, iteration: 138424
loss: 0.9778542518615723,grad_norm: 0.9999992392102119, iteration: 138425
loss: 1.0775223970413208,grad_norm: 0.9999991721328568, iteration: 138426
loss: 1.038078784942627,grad_norm: 0.9999995552201693, iteration: 138427
loss: 0.9721277952194214,grad_norm: 0.956621417238783, iteration: 138428
loss: 0.965611457824707,grad_norm: 0.9999997780200302, iteration: 138429
loss: 1.0338060855865479,grad_norm: 0.9729951249396633, iteration: 138430
loss: 0.9701545238494873,grad_norm: 0.9186486407510782, iteration: 138431
loss: 1.0046180486679077,grad_norm: 0.9999993167762458, iteration: 138432
loss: 1.017983317375183,grad_norm: 0.9037781381340897, iteration: 138433
loss: 1.0039598941802979,grad_norm: 0.9227726102193993, iteration: 138434
loss: 1.0028080940246582,grad_norm: 0.9999991714429101, iteration: 138435
loss: 0.9945622682571411,grad_norm: 0.9999991362095109, iteration: 138436
loss: 0.9847438335418701,grad_norm: 0.9999991249863361, iteration: 138437
loss: 1.0382556915283203,grad_norm: 0.9999991804829932, iteration: 138438
loss: 1.017808437347412,grad_norm: 0.9937113700221072, iteration: 138439
loss: 1.0253180265426636,grad_norm: 0.9869439669183774, iteration: 138440
loss: 1.0033284425735474,grad_norm: 0.9999991076209733, iteration: 138441
loss: 0.9749942421913147,grad_norm: 0.9617566465720121, iteration: 138442
loss: 1.0098274946212769,grad_norm: 0.9999990149472362, iteration: 138443
loss: 1.0635662078857422,grad_norm: 0.9999995742390767, iteration: 138444
loss: 0.9662202596664429,grad_norm: 0.9389098054485459, iteration: 138445
loss: 0.9947271943092346,grad_norm: 0.9999991294078054, iteration: 138446
loss: 0.9475468993186951,grad_norm: 0.9658427649720349, iteration: 138447
loss: 1.0038343667984009,grad_norm: 0.8114416998501368, iteration: 138448
loss: 0.9980528950691223,grad_norm: 0.999999097423703, iteration: 138449
loss: 0.9729968309402466,grad_norm: 0.8751268688997947, iteration: 138450
loss: 0.9791045784950256,grad_norm: 0.8343280286936844, iteration: 138451
loss: 1.0277096033096313,grad_norm: 0.8613401811797576, iteration: 138452
loss: 1.0140944719314575,grad_norm: 0.824181352520879, iteration: 138453
loss: 0.9625341892242432,grad_norm: 0.9999477002591678, iteration: 138454
loss: 0.9531159400939941,grad_norm: 0.9999991824779838, iteration: 138455
loss: 0.9977516531944275,grad_norm: 0.9999990155164405, iteration: 138456
loss: 0.9814468622207642,grad_norm: 0.9999994320937244, iteration: 138457
loss: 0.9768809080123901,grad_norm: 0.9999991324219876, iteration: 138458
loss: 0.9903530478477478,grad_norm: 0.9999991696256028, iteration: 138459
loss: 0.9620997905731201,grad_norm: 0.9035089998354053, iteration: 138460
loss: 0.9572784304618835,grad_norm: 0.9999993256807893, iteration: 138461
loss: 1.036001443862915,grad_norm: 0.9999993171823057, iteration: 138462
loss: 0.9929398894309998,grad_norm: 0.8809569376429057, iteration: 138463
loss: 0.99834805727005,grad_norm: 0.8685631285829987, iteration: 138464
loss: 1.0065730810165405,grad_norm: 0.9749465327821516, iteration: 138465
loss: 0.9967648386955261,grad_norm: 0.9999990011978498, iteration: 138466
loss: 1.0166151523590088,grad_norm: 0.9999994755306496, iteration: 138467
loss: 1.0097136497497559,grad_norm: 0.9999990437938756, iteration: 138468
loss: 0.9829467535018921,grad_norm: 0.9976417475467173, iteration: 138469
loss: 1.0004290342330933,grad_norm: 0.9999991447132391, iteration: 138470
loss: 0.9879750609397888,grad_norm: 0.9999991118118469, iteration: 138471
loss: 1.0230638980865479,grad_norm: 0.999999510814043, iteration: 138472
loss: 1.0086301565170288,grad_norm: 0.8789561360815162, iteration: 138473
loss: 0.9610155820846558,grad_norm: 0.8977138594627997, iteration: 138474
loss: 0.9987369775772095,grad_norm: 0.9065850324046216, iteration: 138475
loss: 1.0135056972503662,grad_norm: 0.8543350140341718, iteration: 138476
loss: 1.0396679639816284,grad_norm: 0.9999990815692612, iteration: 138477
loss: 1.0208055973052979,grad_norm: 0.9999997492862438, iteration: 138478
loss: 1.0019139051437378,grad_norm: 0.9999991600391867, iteration: 138479
loss: 0.984367311000824,grad_norm: 0.9463744281320647, iteration: 138480
loss: 0.9696424603462219,grad_norm: 0.9999990579939891, iteration: 138481
loss: 1.003103494644165,grad_norm: 0.9993918408269213, iteration: 138482
loss: 1.0314877033233643,grad_norm: 0.9999990816932115, iteration: 138483
loss: 0.978183925151825,grad_norm: 0.9999990866460292, iteration: 138484
loss: 0.987096905708313,grad_norm: 0.8745855553439925, iteration: 138485
loss: 0.9894489645957947,grad_norm: 0.9281934549541199, iteration: 138486
loss: 0.9892125725746155,grad_norm: 0.9999993101529933, iteration: 138487
loss: 0.9816733002662659,grad_norm: 0.9999990224880453, iteration: 138488
loss: 0.999169111251831,grad_norm: 0.8077815023243212, iteration: 138489
loss: 1.0144054889678955,grad_norm: 0.9999992066643995, iteration: 138490
loss: 1.0231059789657593,grad_norm: 0.9999992382574793, iteration: 138491
loss: 1.0184787511825562,grad_norm: 0.9747636023949631, iteration: 138492
loss: 0.997748851776123,grad_norm: 0.9999991446848846, iteration: 138493
loss: 0.981110692024231,grad_norm: 0.9584660528592749, iteration: 138494
loss: 0.9933056235313416,grad_norm: 0.9999991004225335, iteration: 138495
loss: 0.9837899208068848,grad_norm: 0.9999991174969906, iteration: 138496
loss: 0.9976751804351807,grad_norm: 0.9999990584610867, iteration: 138497
loss: 1.019223928451538,grad_norm: 0.9999991443177839, iteration: 138498
loss: 0.942234992980957,grad_norm: 0.9806713635391406, iteration: 138499
loss: 0.9810394644737244,grad_norm: 0.9970937994532321, iteration: 138500
loss: 0.9995091557502747,grad_norm: 0.9999990881091843, iteration: 138501
loss: 0.9803462028503418,grad_norm: 0.8445486799713188, iteration: 138502
loss: 1.0043953657150269,grad_norm: 0.9999991547428138, iteration: 138503
loss: 0.9666303992271423,grad_norm: 0.9999990372360745, iteration: 138504
loss: 1.0433017015457153,grad_norm: 0.9999993253245741, iteration: 138505
loss: 1.0161935091018677,grad_norm: 0.999998962480567, iteration: 138506
loss: 1.0475932359695435,grad_norm: 0.9999991018738272, iteration: 138507
loss: 1.0390137434005737,grad_norm: 0.9381966918844046, iteration: 138508
loss: 1.0296646356582642,grad_norm: 0.9578144134827671, iteration: 138509
loss: 1.0264812707901,grad_norm: 0.807901675294927, iteration: 138510
loss: 1.0044336318969727,grad_norm: 0.9999992593526364, iteration: 138511
loss: 1.0201092958450317,grad_norm: 0.9999999304026627, iteration: 138512
loss: 0.9947797656059265,grad_norm: 0.8813784085450685, iteration: 138513
loss: 0.9897165894508362,grad_norm: 0.9999990855902118, iteration: 138514
loss: 0.9596452713012695,grad_norm: 0.9999991805313873, iteration: 138515
loss: 0.9698778390884399,grad_norm: 0.99999901265052, iteration: 138516
loss: 0.9748584032058716,grad_norm: 0.9999991265408157, iteration: 138517
loss: 0.9664705395698547,grad_norm: 0.9999991556341356, iteration: 138518
loss: 0.9969242811203003,grad_norm: 0.9476641177285942, iteration: 138519
loss: 0.9762090444564819,grad_norm: 0.9999990815429168, iteration: 138520
loss: 0.9271112680435181,grad_norm: 0.9416811544732704, iteration: 138521
loss: 0.9934870600700378,grad_norm: 0.9999994813582591, iteration: 138522
loss: 0.978725016117096,grad_norm: 0.9999992032540428, iteration: 138523
loss: 0.9901382327079773,grad_norm: 0.8406201959053433, iteration: 138524
loss: 0.96818608045578,grad_norm: 0.890749643833828, iteration: 138525
loss: 1.020076870918274,grad_norm: 0.8686131033228316, iteration: 138526
loss: 0.9764734506607056,grad_norm: 0.8822623660962351, iteration: 138527
loss: 1.0156769752502441,grad_norm: 0.9065992032709761, iteration: 138528
loss: 1.0146747827529907,grad_norm: 0.9047099572593481, iteration: 138529
loss: 1.0032366514205933,grad_norm: 0.9591439054513686, iteration: 138530
loss: 1.0441441535949707,grad_norm: 0.9999993999318533, iteration: 138531
loss: 0.9993671178817749,grad_norm: 0.963465640370798, iteration: 138532
loss: 0.9939055442810059,grad_norm: 0.8890433828960577, iteration: 138533
loss: 1.0116509199142456,grad_norm: 0.9999990684313095, iteration: 138534
loss: 1.0356674194335938,grad_norm: 0.9999992118509557, iteration: 138535
loss: 1.0137221813201904,grad_norm: 0.9789828487643714, iteration: 138536
loss: 0.9980290532112122,grad_norm: 0.99999919338886, iteration: 138537
loss: 1.0522540807724,grad_norm: 0.99999907298002, iteration: 138538
loss: 1.0186693668365479,grad_norm: 0.9999991110565594, iteration: 138539
loss: 0.9924261569976807,grad_norm: 0.883951425544208, iteration: 138540
loss: 1.0117366313934326,grad_norm: 0.9999997530466793, iteration: 138541
loss: 1.0293201208114624,grad_norm: 0.9999992995078013, iteration: 138542
loss: 1.051663875579834,grad_norm: 0.9999994742719931, iteration: 138543
loss: 1.0139824151992798,grad_norm: 0.943073053184112, iteration: 138544
loss: 1.000881552696228,grad_norm: 0.9999989183174257, iteration: 138545
loss: 0.9679551124572754,grad_norm: 0.9999990895022739, iteration: 138546
loss: 1.0208163261413574,grad_norm: 0.999999382880574, iteration: 138547
loss: 1.046103596687317,grad_norm: 0.9999992206700512, iteration: 138548
loss: 1.0192149877548218,grad_norm: 0.9999993848894004, iteration: 138549
loss: 1.0585631132125854,grad_norm: 0.9999997607136724, iteration: 138550
loss: 0.9837604761123657,grad_norm: 0.9999990128217168, iteration: 138551
loss: 1.1131881475448608,grad_norm: 0.9999996330054578, iteration: 138552
loss: 0.9967218637466431,grad_norm: 0.9832330329547482, iteration: 138553
loss: 1.0291160345077515,grad_norm: 0.9813687379063012, iteration: 138554
loss: 0.9646687507629395,grad_norm: 0.9999991475781114, iteration: 138555
loss: 0.9641450643539429,grad_norm: 0.9999990742516748, iteration: 138556
loss: 0.9961863160133362,grad_norm: 0.9999991481466359, iteration: 138557
loss: 0.9908118844032288,grad_norm: 0.9999991649466209, iteration: 138558
loss: 1.0141699314117432,grad_norm: 0.9999991496388635, iteration: 138559
loss: 1.0123684406280518,grad_norm: 0.9999991360377151, iteration: 138560
loss: 0.9912029504776001,grad_norm: 0.8724760232811152, iteration: 138561
loss: 0.9766233563423157,grad_norm: 0.9999990944836158, iteration: 138562
loss: 1.0126198530197144,grad_norm: 0.9999990938978519, iteration: 138563
loss: 1.0033516883850098,grad_norm: 0.9999994224551954, iteration: 138564
loss: 1.0175386667251587,grad_norm: 0.99999917804664, iteration: 138565
loss: 1.0422377586364746,grad_norm: 0.9999991552461335, iteration: 138566
loss: 1.0124621391296387,grad_norm: 0.9999991666704942, iteration: 138567
loss: 1.0436955690383911,grad_norm: 0.9999995250556611, iteration: 138568
loss: 0.9983240962028503,grad_norm: 0.9125725493738944, iteration: 138569
loss: 1.0204743146896362,grad_norm: 0.9099775296792078, iteration: 138570
loss: 1.0287795066833496,grad_norm: 0.9796324585419086, iteration: 138571
loss: 1.0176135301589966,grad_norm: 0.9999991093166565, iteration: 138572
loss: 1.0042448043823242,grad_norm: 0.930413775366996, iteration: 138573
loss: 0.9891843199729919,grad_norm: 0.9658637393348354, iteration: 138574
loss: 0.9971932172775269,grad_norm: 0.9090310352833403, iteration: 138575
loss: 0.9941909313201904,grad_norm: 0.9999991864092855, iteration: 138576
loss: 1.0347518920898438,grad_norm: 0.9999991317485668, iteration: 138577
loss: 1.0117759704589844,grad_norm: 0.8926695153942634, iteration: 138578
loss: 1.0071492195129395,grad_norm: 0.9104116853072223, iteration: 138579
loss: 1.0256246328353882,grad_norm: 0.9999991797124566, iteration: 138580
loss: 1.0011664628982544,grad_norm: 0.9725557837314958, iteration: 138581
loss: 0.9620383381843567,grad_norm: 0.9367581286999659, iteration: 138582
loss: 1.0341469049453735,grad_norm: 0.9999994474088628, iteration: 138583
loss: 1.0309067964553833,grad_norm: 0.9999991205955007, iteration: 138584
loss: 0.9875233173370361,grad_norm: 0.9700799168282922, iteration: 138585
loss: 0.9778826832771301,grad_norm: 0.9959758383031894, iteration: 138586
loss: 1.008697748184204,grad_norm: 0.933489256980461, iteration: 138587
loss: 1.015815258026123,grad_norm: 0.999999252561207, iteration: 138588
loss: 0.9920963048934937,grad_norm: 0.9999991692235373, iteration: 138589
loss: 1.0133955478668213,grad_norm: 0.9999990597107751, iteration: 138590
loss: 1.0131893157958984,grad_norm: 0.9999992101077285, iteration: 138591
loss: 0.9809620976448059,grad_norm: 0.9999990120782477, iteration: 138592
loss: 0.9901782870292664,grad_norm: 0.9999992792007288, iteration: 138593
loss: 1.0331156253814697,grad_norm: 0.9734139778493915, iteration: 138594
loss: 1.0046724081039429,grad_norm: 0.948984159412973, iteration: 138595
loss: 1.0198792219161987,grad_norm: 0.9999992599371347, iteration: 138596
loss: 1.0158305168151855,grad_norm: 0.814231918332564, iteration: 138597
loss: 0.9856741428375244,grad_norm: 0.9999991245514949, iteration: 138598
loss: 0.9751836061477661,grad_norm: 0.953043330023021, iteration: 138599
loss: 1.0133355855941772,grad_norm: 0.9991091464283801, iteration: 138600
loss: 1.142006278038025,grad_norm: 0.9999992122070175, iteration: 138601
loss: 1.0041018724441528,grad_norm: 0.9999991236838242, iteration: 138602
loss: 0.99538254737854,grad_norm: 0.9040984929234144, iteration: 138603
loss: 1.0248479843139648,grad_norm: 0.9999989696993783, iteration: 138604
loss: 1.0256917476654053,grad_norm: 0.9999998352001962, iteration: 138605
loss: 0.9690711498260498,grad_norm: 0.918824906088653, iteration: 138606
loss: 0.9876871705055237,grad_norm: 0.8971122597311693, iteration: 138607
loss: 0.9885347485542297,grad_norm: 0.999999095736057, iteration: 138608
loss: 0.9774693846702576,grad_norm: 0.9999990377855937, iteration: 138609
loss: 0.9864498972892761,grad_norm: 0.999999022727336, iteration: 138610
loss: 1.0908780097961426,grad_norm: 0.9999994787203056, iteration: 138611
loss: 1.0001033544540405,grad_norm: 0.8589848379536624, iteration: 138612
loss: 0.9720304608345032,grad_norm: 0.9056606733355103, iteration: 138613
loss: 1.0190757513046265,grad_norm: 0.999999084128489, iteration: 138614
loss: 0.9982828497886658,grad_norm: 0.9999989857640109, iteration: 138615
loss: 1.0187371969223022,grad_norm: 0.999999112813505, iteration: 138616
loss: 1.0101457834243774,grad_norm: 0.9999990066483788, iteration: 138617
loss: 0.995471179485321,grad_norm: 0.9604602756823701, iteration: 138618
loss: 1.0076189041137695,grad_norm: 0.946068024843285, iteration: 138619
loss: 0.985792338848114,grad_norm: 0.9999991881248802, iteration: 138620
loss: 0.9887291789054871,grad_norm: 0.9934982884891886, iteration: 138621
loss: 1.1835743188858032,grad_norm: 0.9999992142159375, iteration: 138622
loss: 1.038940668106079,grad_norm: 0.999999056942068, iteration: 138623
loss: 1.086326003074646,grad_norm: 0.9999998447105469, iteration: 138624
loss: 0.9637775421142578,grad_norm: 0.8938774582645141, iteration: 138625
loss: 0.9927735924720764,grad_norm: 0.9999988852291652, iteration: 138626
loss: 1.0164614915847778,grad_norm: 0.9999991756163293, iteration: 138627
loss: 1.0150395631790161,grad_norm: 0.953535539212437, iteration: 138628
loss: 1.0500181913375854,grad_norm: 0.8918910679915865, iteration: 138629
loss: 1.0104641914367676,grad_norm: 0.9077069497850556, iteration: 138630
loss: 1.0158523321151733,grad_norm: 0.9999990875846074, iteration: 138631
loss: 1.0422650575637817,grad_norm: 0.981318330879075, iteration: 138632
loss: 0.9997755885124207,grad_norm: 0.9999992040606436, iteration: 138633
loss: 0.9880430698394775,grad_norm: 0.9272033267782105, iteration: 138634
loss: 0.9793155193328857,grad_norm: 0.9999990511925576, iteration: 138635
loss: 0.9936606287956238,grad_norm: 0.9999991702422449, iteration: 138636
loss: 0.9359305500984192,grad_norm: 0.999999105967574, iteration: 138637
loss: 1.0125714540481567,grad_norm: 0.9999992208751313, iteration: 138638
loss: 0.9699247479438782,grad_norm: 0.9999990551096335, iteration: 138639
loss: 0.9721183180809021,grad_norm: 0.999999096741919, iteration: 138640
loss: 0.9723803400993347,grad_norm: 0.9999990790009818, iteration: 138641
loss: 1.0108745098114014,grad_norm: 0.9999992050906945, iteration: 138642
loss: 1.0187872648239136,grad_norm: 0.9999991004227808, iteration: 138643
loss: 1.0155847072601318,grad_norm: 0.9999992625552659, iteration: 138644
loss: 0.9757487773895264,grad_norm: 0.999999121660951, iteration: 138645
loss: 1.0111463069915771,grad_norm: 0.8580019206638638, iteration: 138646
loss: 1.0013662576675415,grad_norm: 0.946158387198506, iteration: 138647
loss: 1.0180253982543945,grad_norm: 0.7830038979971319, iteration: 138648
loss: 1.0018755197525024,grad_norm: 0.9999991525918347, iteration: 138649
loss: 1.0265533924102783,grad_norm: 0.999999229423156, iteration: 138650
loss: 1.4040945768356323,grad_norm: 0.9999999354147658, iteration: 138651
loss: 0.9914894104003906,grad_norm: 0.9999992255717992, iteration: 138652
loss: 0.9813624620437622,grad_norm: 0.9999990828730713, iteration: 138653
loss: 0.9661819338798523,grad_norm: 0.9999992305864159, iteration: 138654
loss: 0.988387405872345,grad_norm: 0.9999991339188855, iteration: 138655
loss: 1.0056911706924438,grad_norm: 0.9624089625517099, iteration: 138656
loss: 1.0146288871765137,grad_norm: 0.9999990918049843, iteration: 138657
loss: 0.9942073822021484,grad_norm: 0.9999990251085856, iteration: 138658
loss: 0.9917066693305969,grad_norm: 0.9999990829921153, iteration: 138659
loss: 1.008487582206726,grad_norm: 0.9999991842288111, iteration: 138660
loss: 1.0048428773880005,grad_norm: 0.9999993880329504, iteration: 138661
loss: 0.9739951491355896,grad_norm: 0.9999990814505854, iteration: 138662
loss: 1.0032093524932861,grad_norm: 0.9999992572575072, iteration: 138663
loss: 1.0272151231765747,grad_norm: 0.9999992469978408, iteration: 138664
loss: 0.9925530552864075,grad_norm: 0.9788614906312094, iteration: 138665
loss: 0.9848846197128296,grad_norm: 0.9999990064413937, iteration: 138666
loss: 1.0335743427276611,grad_norm: 0.9999993367121756, iteration: 138667
loss: 1.022376537322998,grad_norm: 0.8934987319235378, iteration: 138668
loss: 0.9823222756385803,grad_norm: 0.8928520590930535, iteration: 138669
loss: 0.9886290431022644,grad_norm: 0.9714370554592607, iteration: 138670
loss: 1.0107957124710083,grad_norm: 0.944554422519951, iteration: 138671
loss: 0.9631603360176086,grad_norm: 0.9398970889573125, iteration: 138672
loss: 1.0211337804794312,grad_norm: 0.9999994353089056, iteration: 138673
loss: 0.9768805503845215,grad_norm: 0.9615274553171558, iteration: 138674
loss: 0.9874538779258728,grad_norm: 0.9999991856027679, iteration: 138675
loss: 0.9898685812950134,grad_norm: 0.9999990066407124, iteration: 138676
loss: 0.9966592788696289,grad_norm: 0.877416805688248, iteration: 138677
loss: 0.9814680218696594,grad_norm: 0.9999992132801377, iteration: 138678
loss: 0.9749820232391357,grad_norm: 0.9999992515901582, iteration: 138679
loss: 0.9849648475646973,grad_norm: 0.9999991633534382, iteration: 138680
loss: 1.0534197092056274,grad_norm: 0.9999997423937405, iteration: 138681
loss: 0.9787256121635437,grad_norm: 0.9914226718162441, iteration: 138682
loss: 1.010524034500122,grad_norm: 0.9999991964325732, iteration: 138683
loss: 0.9953308701515198,grad_norm: 0.9999990706735848, iteration: 138684
loss: 1.009968876838684,grad_norm: 0.9999992264420321, iteration: 138685
loss: 1.0247724056243896,grad_norm: 0.9999993173114334, iteration: 138686
loss: 1.0264297723770142,grad_norm: 0.9020940238619497, iteration: 138687
loss: 0.9910298585891724,grad_norm: 0.9023297475187759, iteration: 138688
loss: 0.9871575832366943,grad_norm: 0.9918730474929573, iteration: 138689
loss: 0.9884028434753418,grad_norm: 0.9999990698798259, iteration: 138690
loss: 1.0132108926773071,grad_norm: 0.8917334454998994, iteration: 138691
loss: 0.9519585967063904,grad_norm: 0.9959492724524232, iteration: 138692
loss: 0.9980180859565735,grad_norm: 0.9999991758249599, iteration: 138693
loss: 0.9726651310920715,grad_norm: 0.999999107466028, iteration: 138694
loss: 1.0060781240463257,grad_norm: 0.9999992529887141, iteration: 138695
loss: 1.0070078372955322,grad_norm: 0.9999999989043191, iteration: 138696
loss: 1.00263249874115,grad_norm: 0.9999992559284121, iteration: 138697
loss: 0.966372013092041,grad_norm: 0.9999993022583153, iteration: 138698
loss: 0.9724400639533997,grad_norm: 0.999999274902456, iteration: 138699
loss: 0.9758893847465515,grad_norm: 0.9971978863861466, iteration: 138700
loss: 1.0025750398635864,grad_norm: 0.9250016368457473, iteration: 138701
loss: 1.008544921875,grad_norm: 0.9999991051880445, iteration: 138702
loss: 0.9767628908157349,grad_norm: 0.9999990419453246, iteration: 138703
loss: 1.0001448392868042,grad_norm: 0.9741197698526529, iteration: 138704
loss: 0.9822083711624146,grad_norm: 0.9834174466064639, iteration: 138705
loss: 1.0184820890426636,grad_norm: 0.9999992281791038, iteration: 138706
loss: 1.013485312461853,grad_norm: 0.9999991616825764, iteration: 138707
loss: 0.9880081415176392,grad_norm: 0.9999991344962227, iteration: 138708
loss: 1.0150327682495117,grad_norm: 0.9999993014362054, iteration: 138709
loss: 1.0163664817810059,grad_norm: 0.9999991541345793, iteration: 138710
loss: 0.9819909930229187,grad_norm: 0.9999989498074554, iteration: 138711
loss: 1.024224877357483,grad_norm: 0.9999989933557125, iteration: 138712
loss: 1.0271492004394531,grad_norm: 0.9483403424447937, iteration: 138713
loss: 0.9718765020370483,grad_norm: 0.9651497181037583, iteration: 138714
loss: 0.9929470419883728,grad_norm: 0.9999990681561225, iteration: 138715
loss: 1.007992148399353,grad_norm: 0.9999991891371406, iteration: 138716
loss: 0.9792846441268921,grad_norm: 0.9999989779099214, iteration: 138717
loss: 1.0038752555847168,grad_norm: 0.9660241294530231, iteration: 138718
loss: 0.9992577433586121,grad_norm: 0.9157438936754897, iteration: 138719
loss: 0.996668815612793,grad_norm: 0.948081881925016, iteration: 138720
loss: 0.9658169150352478,grad_norm: 0.9999992096806078, iteration: 138721
loss: 1.0166125297546387,grad_norm: 0.9413523516923167, iteration: 138722
loss: 0.9978055953979492,grad_norm: 0.9999992020237222, iteration: 138723
loss: 1.0074999332427979,grad_norm: 0.9305295016273889, iteration: 138724
loss: 0.9784348607063293,grad_norm: 0.9295642094957509, iteration: 138725
loss: 1.0016440153121948,grad_norm: 0.9999990928014553, iteration: 138726
loss: 1.0105512142181396,grad_norm: 0.9999993457603199, iteration: 138727
loss: 1.0018134117126465,grad_norm: 0.9732362900162029, iteration: 138728
loss: 1.0245845317840576,grad_norm: 0.9999990854769679, iteration: 138729
loss: 0.9787817597389221,grad_norm: 0.9999990881935725, iteration: 138730
loss: 0.9776086807250977,grad_norm: 0.9999990995720429, iteration: 138731
loss: 1.0014872550964355,grad_norm: 0.9999989537352781, iteration: 138732
loss: 1.0151296854019165,grad_norm: 0.9999990376033704, iteration: 138733
loss: 1.0269588232040405,grad_norm: 0.9626864932395415, iteration: 138734
loss: 0.9899318814277649,grad_norm: 0.9999991760457164, iteration: 138735
loss: 1.0318113565444946,grad_norm: 0.8837878307051518, iteration: 138736
loss: 1.0166304111480713,grad_norm: 0.9999993409622188, iteration: 138737
loss: 0.990938663482666,grad_norm: 0.999999102611859, iteration: 138738
loss: 0.9561300873756409,grad_norm: 0.999999119323205, iteration: 138739
loss: 0.9903510808944702,grad_norm: 0.9597174496206263, iteration: 138740
loss: 1.0159717798233032,grad_norm: 0.9999992763266066, iteration: 138741
loss: 1.039035439491272,grad_norm: 0.9750656452506283, iteration: 138742
loss: 0.9851895570755005,grad_norm: 0.9999991278557123, iteration: 138743
loss: 1.0016570091247559,grad_norm: 0.99999916748957, iteration: 138744
loss: 1.0142545700073242,grad_norm: 0.9999991979590968, iteration: 138745
loss: 0.9862068891525269,grad_norm: 0.9999990355833701, iteration: 138746
loss: 0.9536912441253662,grad_norm: 0.9999990810468046, iteration: 138747
loss: 1.0535014867782593,grad_norm: 0.9999991320235129, iteration: 138748
loss: 0.9859910607337952,grad_norm: 0.8670727325792362, iteration: 138749
loss: 1.0234431028366089,grad_norm: 0.9999989485401098, iteration: 138750
loss: 0.9737325310707092,grad_norm: 0.9804451837166724, iteration: 138751
loss: 0.9871518015861511,grad_norm: 0.9803383035962477, iteration: 138752
loss: 1.0119606256484985,grad_norm: 0.9825809405977166, iteration: 138753
loss: 0.9829531311988831,grad_norm: 0.8680713525784925, iteration: 138754
loss: 1.010716438293457,grad_norm: 0.95555797755423, iteration: 138755
loss: 0.9709574580192566,grad_norm: 0.8602106929538088, iteration: 138756
loss: 0.9994965195655823,grad_norm: 0.9999990985791956, iteration: 138757
loss: 0.9909341931343079,grad_norm: 0.9573439109156189, iteration: 138758
loss: 0.993159830570221,grad_norm: 0.9999990079525095, iteration: 138759
loss: 1.0206226110458374,grad_norm: 0.9999991495727947, iteration: 138760
loss: 0.9798654913902283,grad_norm: 0.9041723408861879, iteration: 138761
loss: 0.989469587802887,grad_norm: 0.9113954016331011, iteration: 138762
loss: 1.0702482461929321,grad_norm: 0.9999990701956509, iteration: 138763
loss: 0.9988270998001099,grad_norm: 0.9999992480330386, iteration: 138764
loss: 1.026656985282898,grad_norm: 0.9999991350956228, iteration: 138765
loss: 0.991914689540863,grad_norm: 0.999999173991122, iteration: 138766
loss: 0.9599819183349609,grad_norm: 0.8832027770449309, iteration: 138767
loss: 0.9836567640304565,grad_norm: 0.9999990014100323, iteration: 138768
loss: 0.983769953250885,grad_norm: 0.9999991281911704, iteration: 138769
loss: 0.9899272322654724,grad_norm: 0.9423873902066326, iteration: 138770
loss: 1.047736644744873,grad_norm: 0.9999996086041172, iteration: 138771
loss: 1.0058073997497559,grad_norm: 0.8589638127342736, iteration: 138772
loss: 0.9679312705993652,grad_norm: 0.9999989585616439, iteration: 138773
loss: 0.9842664003372192,grad_norm: 0.9855222622337965, iteration: 138774
loss: 1.010258436203003,grad_norm: 0.9999992683180585, iteration: 138775
loss: 1.0213849544525146,grad_norm: 0.9999990504293823, iteration: 138776
loss: 1.0138918161392212,grad_norm: 0.9804284556420917, iteration: 138777
loss: 0.9938340783119202,grad_norm: 0.9999990032143222, iteration: 138778
loss: 1.0341905355453491,grad_norm: 0.9873583312109391, iteration: 138779
loss: 1.0005797147750854,grad_norm: 0.9787390021964689, iteration: 138780
loss: 0.9930686354637146,grad_norm: 0.9999990046287041, iteration: 138781
loss: 1.0300205945968628,grad_norm: 0.9999992695220057, iteration: 138782
loss: 0.9816698431968689,grad_norm: 0.891324005146956, iteration: 138783
loss: 0.9761403799057007,grad_norm: 0.9999989840355238, iteration: 138784
loss: 1.0000584125518799,grad_norm: 0.9120114252735858, iteration: 138785
loss: 1.017368197441101,grad_norm: 0.9579132129344132, iteration: 138786
loss: 0.9975720643997192,grad_norm: 0.9999992283171253, iteration: 138787
loss: 1.008571743965149,grad_norm: 0.8801203811606255, iteration: 138788
loss: 1.0123943090438843,grad_norm: 0.9999990887875587, iteration: 138789
loss: 0.9983360767364502,grad_norm: 0.9904087642146453, iteration: 138790
loss: 0.9992329478263855,grad_norm: 0.7609531467641315, iteration: 138791
loss: 1.0335842370986938,grad_norm: 0.9550050269733118, iteration: 138792
loss: 1.0042628049850464,grad_norm: 0.9926990352987682, iteration: 138793
loss: 1.0089317560195923,grad_norm: 0.9999990220520419, iteration: 138794
loss: 1.002477765083313,grad_norm: 0.9999992223574727, iteration: 138795
loss: 0.9950510263442993,grad_norm: 0.9783892399329128, iteration: 138796
loss: 0.9955407977104187,grad_norm: 0.9999990823899505, iteration: 138797
loss: 0.9712079763412476,grad_norm: 0.864244662622558, iteration: 138798
loss: 0.9731610417366028,grad_norm: 0.9097542630887774, iteration: 138799
loss: 0.9850053787231445,grad_norm: 0.9999993202776989, iteration: 138800
loss: 0.9996045231819153,grad_norm: 0.9999990507211843, iteration: 138801
loss: 0.9831019639968872,grad_norm: 0.9521303731028111, iteration: 138802
loss: 1.0036811828613281,grad_norm: 0.9999991544346377, iteration: 138803
loss: 0.9845002293586731,grad_norm: 0.9999989863488404, iteration: 138804
loss: 1.0174514055252075,grad_norm: 0.9639987062081816, iteration: 138805
loss: 0.9798925518989563,grad_norm: 0.953095024222419, iteration: 138806
loss: 1.0136011838912964,grad_norm: 0.9999990419428283, iteration: 138807
loss: 0.965117335319519,grad_norm: 0.9999992506122719, iteration: 138808
loss: 1.0361272096633911,grad_norm: 0.9909170781153331, iteration: 138809
loss: 1.0111359357833862,grad_norm: 0.9144945209108498, iteration: 138810
loss: 1.021898627281189,grad_norm: 0.9999990537942911, iteration: 138811
loss: 1.0095665454864502,grad_norm: 0.955844541076431, iteration: 138812
loss: 1.0016423463821411,grad_norm: 0.9999990776182852, iteration: 138813
loss: 1.0040959119796753,grad_norm: 0.9999991438927395, iteration: 138814
loss: 1.0031288862228394,grad_norm: 0.8982726992110329, iteration: 138815
loss: 0.9770718216896057,grad_norm: 0.8728117036314095, iteration: 138816
loss: 0.9961364269256592,grad_norm: 0.9378044913335108, iteration: 138817
loss: 0.9874152541160583,grad_norm: 0.8369300581675112, iteration: 138818
loss: 1.0163651704788208,grad_norm: 0.8765154014964942, iteration: 138819
loss: 1.0250663757324219,grad_norm: 0.9999991482338324, iteration: 138820
loss: 0.995304524898529,grad_norm: 0.9999991261086817, iteration: 138821
loss: 0.9967647790908813,grad_norm: 0.9999992071557521, iteration: 138822
loss: 1.007575511932373,grad_norm: 0.9999990227216865, iteration: 138823
loss: 1.0167280435562134,grad_norm: 0.9242992300628703, iteration: 138824
loss: 0.9740684032440186,grad_norm: 0.9999990579481443, iteration: 138825
loss: 1.0172935724258423,grad_norm: 0.9833751139082343, iteration: 138826
loss: 0.9904912710189819,grad_norm: 0.8951121914511078, iteration: 138827
loss: 0.9673253893852234,grad_norm: 0.9581964689592525, iteration: 138828
loss: 0.9648913741111755,grad_norm: 0.9999991664914728, iteration: 138829
loss: 1.0139425992965698,grad_norm: 0.9999989961628372, iteration: 138830
loss: 0.9335730671882629,grad_norm: 0.9111273830258179, iteration: 138831
loss: 1.0038245916366577,grad_norm: 0.9006782464581206, iteration: 138832
loss: 0.983194887638092,grad_norm: 0.9457429759926167, iteration: 138833
loss: 1.0001262426376343,grad_norm: 0.9528172593109643, iteration: 138834
loss: 1.0023407936096191,grad_norm: 0.999999364652743, iteration: 138835
loss: 1.00796377658844,grad_norm: 0.9999991412459381, iteration: 138836
loss: 1.0168921947479248,grad_norm: 0.9999990932666902, iteration: 138837
loss: 0.9756398797035217,grad_norm: 0.9011688133278299, iteration: 138838
loss: 0.9645074009895325,grad_norm: 0.8529277684145784, iteration: 138839
loss: 1.0243384838104248,grad_norm: 0.9045086516215392, iteration: 138840
loss: 1.0288569927215576,grad_norm: 0.99999935918874, iteration: 138841
loss: 0.9758493304252625,grad_norm: 0.999999085457653, iteration: 138842
loss: 0.9775556921958923,grad_norm: 0.9999990563277802, iteration: 138843
loss: 0.9894681572914124,grad_norm: 0.9706188714249292, iteration: 138844
loss: 0.9999278783798218,grad_norm: 0.9156822677704075, iteration: 138845
loss: 1.0188966989517212,grad_norm: 0.9999990596826247, iteration: 138846
loss: 0.994826078414917,grad_norm: 0.9999992168878311, iteration: 138847
loss: 1.030022144317627,grad_norm: 0.9999992477482572, iteration: 138848
loss: 1.0009702444076538,grad_norm: 0.9999993274664452, iteration: 138849
loss: 0.9603410363197327,grad_norm: 0.9999990269368955, iteration: 138850
loss: 1.0173592567443848,grad_norm: 0.9999990395992343, iteration: 138851
loss: 0.995029628276825,grad_norm: 0.9999990754479162, iteration: 138852
loss: 1.0116018056869507,grad_norm: 0.9999992407916632, iteration: 138853
loss: 1.0031152963638306,grad_norm: 0.9250728660212324, iteration: 138854
loss: 1.0270277261734009,grad_norm: 0.9510353813925666, iteration: 138855
loss: 1.0072335004806519,grad_norm: 0.9999991235769269, iteration: 138856
loss: 1.0101724863052368,grad_norm: 0.9760708576241334, iteration: 138857
loss: 0.9854869246482849,grad_norm: 0.9999992883019307, iteration: 138858
loss: 0.9427564740180969,grad_norm: 0.9957385367055637, iteration: 138859
loss: 0.9816375970840454,grad_norm: 0.920742904426937, iteration: 138860
loss: 0.9867663979530334,grad_norm: 0.9970676250110817, iteration: 138861
loss: 1.0041905641555786,grad_norm: 0.9999991831868018, iteration: 138862
loss: 0.9910924434661865,grad_norm: 0.8481650378764553, iteration: 138863
loss: 0.9894046187400818,grad_norm: 0.9851343001867848, iteration: 138864
loss: 1.0096067190170288,grad_norm: 0.9999990616982831, iteration: 138865
loss: 0.9876382946968079,grad_norm: 0.999999210989667, iteration: 138866
loss: 1.0401020050048828,grad_norm: 0.99999912475298, iteration: 138867
loss: 0.9799858927726746,grad_norm: 0.9999991980953585, iteration: 138868
loss: 0.9864212274551392,grad_norm: 0.9868289986262971, iteration: 138869
loss: 0.9695528149604797,grad_norm: 0.916173783413092, iteration: 138870
loss: 1.011411428451538,grad_norm: 0.9999990796158555, iteration: 138871
loss: 0.9650872349739075,grad_norm: 0.999999265913445, iteration: 138872
loss: 1.088131070137024,grad_norm: 0.87294549591753, iteration: 138873
loss: 1.0153316259384155,grad_norm: 0.9999991125025548, iteration: 138874
loss: 1.0299512147903442,grad_norm: 0.9999991613942797, iteration: 138875
loss: 0.990203320980072,grad_norm: 0.8947677988144486, iteration: 138876
loss: 0.9890967011451721,grad_norm: 0.9272152828319717, iteration: 138877
loss: 0.9803315997123718,grad_norm: 0.999999097250184, iteration: 138878
loss: 0.9731453061103821,grad_norm: 0.9527515780281877, iteration: 138879
loss: 1.0177342891693115,grad_norm: 0.9999990118730537, iteration: 138880
loss: 1.0299644470214844,grad_norm: 0.9737680547613213, iteration: 138881
loss: 1.0330493450164795,grad_norm: 0.844786749275494, iteration: 138882
loss: 1.0222450494766235,grad_norm: 0.9999991950105336, iteration: 138883
loss: 0.9634624719619751,grad_norm: 0.9999992963095997, iteration: 138884
loss: 1.012884259223938,grad_norm: 0.999999137777814, iteration: 138885
loss: 0.9836454391479492,grad_norm: 0.9999991662996746, iteration: 138886
loss: 1.0108171701431274,grad_norm: 0.999999150512606, iteration: 138887
loss: 1.0228253602981567,grad_norm: 0.9999991435108825, iteration: 138888
loss: 0.9566512107849121,grad_norm: 0.9999991345192495, iteration: 138889
loss: 0.9774090647697449,grad_norm: 0.9999992041460096, iteration: 138890
loss: 1.0157110691070557,grad_norm: 0.9999991779351988, iteration: 138891
loss: 1.0296449661254883,grad_norm: 0.9999996710780558, iteration: 138892
loss: 0.9915239214897156,grad_norm: 0.9999990266396344, iteration: 138893
loss: 0.9814363718032837,grad_norm: 0.9000183340431843, iteration: 138894
loss: 1.0395255088806152,grad_norm: 0.9587373274458589, iteration: 138895
loss: 1.0100975036621094,grad_norm: 0.9999992126575358, iteration: 138896
loss: 0.9990029335021973,grad_norm: 0.9999993523518369, iteration: 138897
loss: 0.9969052672386169,grad_norm: 0.9918033163850934, iteration: 138898
loss: 1.0295213460922241,grad_norm: 0.9999993361101759, iteration: 138899
loss: 0.9995430707931519,grad_norm: 0.9999990377606941, iteration: 138900
loss: 0.9776910543441772,grad_norm: 0.9999992354182383, iteration: 138901
loss: 1.0989009141921997,grad_norm: 0.9999998664777476, iteration: 138902
loss: 0.9948531985282898,grad_norm: 0.9999990282781472, iteration: 138903
loss: 1.0011107921600342,grad_norm: 0.9999992446150437, iteration: 138904
loss: 0.9999539852142334,grad_norm: 0.9999990453572115, iteration: 138905
loss: 0.9774688482284546,grad_norm: 0.9999989888491602, iteration: 138906
loss: 0.9913742542266846,grad_norm: 0.9999992348135249, iteration: 138907
loss: 0.9811808466911316,grad_norm: 0.9999992655521612, iteration: 138908
loss: 0.9958024621009827,grad_norm: 0.9999991115254817, iteration: 138909
loss: 0.9887407422065735,grad_norm: 0.9999990157396005, iteration: 138910
loss: 0.9864766597747803,grad_norm: 0.9465988934666039, iteration: 138911
loss: 0.9811283946037292,grad_norm: 0.895410289946458, iteration: 138912
loss: 0.9895098805427551,grad_norm: 0.8506624146740652, iteration: 138913
loss: 0.9972610473632812,grad_norm: 0.9999991461198039, iteration: 138914
loss: 1.0270150899887085,grad_norm: 0.9999997795067707, iteration: 138915
loss: 0.9726364612579346,grad_norm: 0.9999989937218919, iteration: 138916
loss: 0.9885868430137634,grad_norm: 0.9668544746854844, iteration: 138917
loss: 0.9802517294883728,grad_norm: 0.9999990823437354, iteration: 138918
loss: 0.9816909432411194,grad_norm: 0.9999991700910271, iteration: 138919
loss: 0.9862514138221741,grad_norm: 0.9105461026872905, iteration: 138920
loss: 0.9928956031799316,grad_norm: 0.8887957021693561, iteration: 138921
loss: 1.0277128219604492,grad_norm: 0.9999991484975655, iteration: 138922
loss: 1.0402590036392212,grad_norm: 0.9999990724228697, iteration: 138923
loss: 0.99267578125,grad_norm: 0.9819270215570661, iteration: 138924
loss: 0.9982138872146606,grad_norm: 0.999999262978311, iteration: 138925
loss: 0.9874051213264465,grad_norm: 0.9599923720523633, iteration: 138926
loss: 1.0394270420074463,grad_norm: 0.999999083031704, iteration: 138927
loss: 1.0265990495681763,grad_norm: 0.9999989759240436, iteration: 138928
loss: 0.9681878685951233,grad_norm: 0.9999990130694696, iteration: 138929
loss: 0.978343665599823,grad_norm: 0.9797957944608856, iteration: 138930
loss: 1.0034090280532837,grad_norm: 0.9999990831129166, iteration: 138931
loss: 0.9959852695465088,grad_norm: 0.8969548321300148, iteration: 138932
loss: 0.9863753318786621,grad_norm: 0.9999991266483712, iteration: 138933
loss: 0.9960102438926697,grad_norm: 0.9999990853307077, iteration: 138934
loss: 0.9760066270828247,grad_norm: 0.9402226796136438, iteration: 138935
loss: 0.9624125361442566,grad_norm: 0.9999990238642853, iteration: 138936
loss: 1.0628005266189575,grad_norm: 0.9999994934648346, iteration: 138937
loss: 1.0347063541412354,grad_norm: 0.9999992233543691, iteration: 138938
loss: 1.0050519704818726,grad_norm: 0.9999992969208438, iteration: 138939
loss: 1.0114634037017822,grad_norm: 0.9999991671203088, iteration: 138940
loss: 1.0149483680725098,grad_norm: 0.9151742039797862, iteration: 138941
loss: 1.0269335508346558,grad_norm: 0.999999179103623, iteration: 138942
loss: 0.987332284450531,grad_norm: 0.9999991803222895, iteration: 138943
loss: 0.9689211845397949,grad_norm: 0.9269456364306692, iteration: 138944
loss: 0.9832491278648376,grad_norm: 0.9999990416376539, iteration: 138945
loss: 1.0015454292297363,grad_norm: 0.9999992035104074, iteration: 138946
loss: 0.9951565861701965,grad_norm: 0.9549335530476876, iteration: 138947
loss: 1.025444507598877,grad_norm: 0.9999991693614374, iteration: 138948
loss: 0.9768021702766418,grad_norm: 0.9999992947940868, iteration: 138949
loss: 1.0342340469360352,grad_norm: 0.999999963845245, iteration: 138950
loss: 0.9917962551116943,grad_norm: 0.9999990524954883, iteration: 138951
loss: 0.9870076179504395,grad_norm: 0.9999990543447934, iteration: 138952
loss: 1.0110992193222046,grad_norm: 0.9999992615427575, iteration: 138953
loss: 0.9652449488639832,grad_norm: 0.9999991096854611, iteration: 138954
loss: 1.0120643377304077,grad_norm: 0.9999992115158012, iteration: 138955
loss: 1.039623737335205,grad_norm: 0.9999993920033348, iteration: 138956
loss: 1.0159432888031006,grad_norm: 0.9405048991383372, iteration: 138957
loss: 1.0022177696228027,grad_norm: 0.9999989596171764, iteration: 138958
loss: 0.9792966842651367,grad_norm: 0.9910266110524008, iteration: 138959
loss: 1.0208134651184082,grad_norm: 0.9820935371459097, iteration: 138960
loss: 1.0278302431106567,grad_norm: 0.9999993152351095, iteration: 138961
loss: 0.9938718676567078,grad_norm: 0.9999992375385895, iteration: 138962
loss: 0.9983872175216675,grad_norm: 0.9999990900942924, iteration: 138963
loss: 0.9963756799697876,grad_norm: 0.9703962621736947, iteration: 138964
loss: 0.9670268893241882,grad_norm: 0.9999991015976777, iteration: 138965
loss: 1.0071674585342407,grad_norm: 0.999999042538806, iteration: 138966
loss: 0.9995801448822021,grad_norm: 0.9999990309522342, iteration: 138967
loss: 0.9789683818817139,grad_norm: 0.9872270377155451, iteration: 138968
loss: 1.0478127002716064,grad_norm: 0.9275074393560169, iteration: 138969
loss: 1.024047613143921,grad_norm: 0.9244378783314346, iteration: 138970
loss: 0.9797987937927246,grad_norm: 0.9999991047748047, iteration: 138971
loss: 0.9872069954872131,grad_norm: 0.8886157618774813, iteration: 138972
loss: 0.9646249413490295,grad_norm: 0.9999991051313146, iteration: 138973
loss: 1.0034304857254028,grad_norm: 0.9277386324172285, iteration: 138974
loss: 0.9780955910682678,grad_norm: 0.9999993227375674, iteration: 138975
loss: 1.0118739604949951,grad_norm: 0.9999991588028616, iteration: 138976
loss: 1.0175589323043823,grad_norm: 0.9999993779801749, iteration: 138977
loss: 0.9794682860374451,grad_norm: 0.9999991381800603, iteration: 138978
loss: 0.9573193192481995,grad_norm: 0.9999992566781317, iteration: 138979
loss: 1.0236364603042603,grad_norm: 0.9372667392440277, iteration: 138980
loss: 0.9868963360786438,grad_norm: 0.9999991066760904, iteration: 138981
loss: 1.014954686164856,grad_norm: 0.9810033338224085, iteration: 138982
loss: 0.9701979756355286,grad_norm: 0.9926693862797772, iteration: 138983
loss: 1.0139137506484985,grad_norm: 0.9999990540291391, iteration: 138984
loss: 1.0014196634292603,grad_norm: 0.9999992176671556, iteration: 138985
loss: 1.0176310539245605,grad_norm: 0.9999991259439249, iteration: 138986
loss: 0.9996939301490784,grad_norm: 0.9775395821073624, iteration: 138987
loss: 1.0126140117645264,grad_norm: 0.9941226478209555, iteration: 138988
loss: 0.9769024848937988,grad_norm: 0.9253484101011263, iteration: 138989
loss: 1.031693696975708,grad_norm: 0.9999989989689219, iteration: 138990
loss: 0.9950589537620544,grad_norm: 0.9999991707598327, iteration: 138991
loss: 0.9592766165733337,grad_norm: 0.9559170250545654, iteration: 138992
loss: 0.9767010807991028,grad_norm: 0.9999993344751305, iteration: 138993
loss: 1.0017131567001343,grad_norm: 0.9999990589541151, iteration: 138994
loss: 0.9543953537940979,grad_norm: 0.8729852413013083, iteration: 138995
loss: 0.968166708946228,grad_norm: 0.9999990674095889, iteration: 138996
loss: 1.0180304050445557,grad_norm: 0.9999991062068503, iteration: 138997
loss: 0.985366702079773,grad_norm: 0.9332156588131089, iteration: 138998
loss: 0.9893513321876526,grad_norm: 0.925386096693649, iteration: 138999
loss: 0.999386727809906,grad_norm: 0.9953635326350169, iteration: 139000
loss: 1.0162745714187622,grad_norm: 0.9090301717773986, iteration: 139001
loss: 1.031860589981079,grad_norm: 0.9999990734465182, iteration: 139002
loss: 0.9754157066345215,grad_norm: 0.9999990985879981, iteration: 139003
loss: 1.009227991104126,grad_norm: 0.9173799893822213, iteration: 139004
loss: 0.9992436766624451,grad_norm: 0.9999989059005969, iteration: 139005
loss: 1.0026053190231323,grad_norm: 0.9999989732912625, iteration: 139006
loss: 1.0144273042678833,grad_norm: 0.9999991690955031, iteration: 139007
loss: 0.9900221824645996,grad_norm: 0.9222280038765085, iteration: 139008
loss: 0.9541770219802856,grad_norm: 0.955924545184414, iteration: 139009
loss: 1.0138790607452393,grad_norm: 0.999999175731908, iteration: 139010
loss: 0.9967340230941772,grad_norm: 0.9999990712105576, iteration: 139011
loss: 0.9984502792358398,grad_norm: 0.9800663410628142, iteration: 139012
loss: 1.0188106298446655,grad_norm: 0.9999992133212797, iteration: 139013
loss: 0.993785560131073,grad_norm: 0.9111303907477034, iteration: 139014
loss: 0.9602339267730713,grad_norm: 0.9393151242807272, iteration: 139015
loss: 1.007760763168335,grad_norm: 0.9999991145744054, iteration: 139016
loss: 0.9810215830802917,grad_norm: 0.9429975966098766, iteration: 139017
loss: 1.0402859449386597,grad_norm: 0.9044763103498478, iteration: 139018
loss: 0.983656644821167,grad_norm: 0.9999992332123692, iteration: 139019
loss: 0.9915566444396973,grad_norm: 0.9999989980936367, iteration: 139020
loss: 0.9714003801345825,grad_norm: 0.9371528325898885, iteration: 139021
loss: 0.9787139892578125,grad_norm: 0.8957793000589389, iteration: 139022
loss: 0.9939014315605164,grad_norm: 0.999999111192107, iteration: 139023
loss: 0.9821735620498657,grad_norm: 0.999999130291533, iteration: 139024
loss: 1.0043644905090332,grad_norm: 0.9487204398429974, iteration: 139025
loss: 0.9560438394546509,grad_norm: 0.9999992250403581, iteration: 139026
loss: 1.0316226482391357,grad_norm: 0.9999990472078881, iteration: 139027
loss: 1.0089654922485352,grad_norm: 0.9999990356424082, iteration: 139028
loss: 1.039199709892273,grad_norm: 0.9999991351985387, iteration: 139029
loss: 1.0021817684173584,grad_norm: 0.9999992193256986, iteration: 139030
loss: 0.9962826371192932,grad_norm: 0.9714208783594317, iteration: 139031
loss: 1.0113787651062012,grad_norm: 0.8443662758880677, iteration: 139032
loss: 0.9912598133087158,grad_norm: 0.99999910129466, iteration: 139033
loss: 0.9798469543457031,grad_norm: 0.9999991062304051, iteration: 139034
loss: 1.0099990367889404,grad_norm: 0.9999991540651173, iteration: 139035
loss: 1.0022560358047485,grad_norm: 0.9999990700663035, iteration: 139036
loss: 0.9795942902565002,grad_norm: 0.99999909832187, iteration: 139037
loss: 1.0171979665756226,grad_norm: 0.9999990903685272, iteration: 139038
loss: 1.0357741117477417,grad_norm: 0.9999991579451437, iteration: 139039
loss: 1.02353036403656,grad_norm: 0.9734195783965148, iteration: 139040
loss: 0.9943571090698242,grad_norm: 0.91178215605319, iteration: 139041
loss: 1.0128157138824463,grad_norm: 0.999999323731165, iteration: 139042
loss: 1.0129715204238892,grad_norm: 0.959781439273072, iteration: 139043
loss: 1.0424690246582031,grad_norm: 0.9386713301536316, iteration: 139044
loss: 1.024593472480774,grad_norm: 0.9999995876804918, iteration: 139045
loss: 0.9894854426383972,grad_norm: 0.8742959002452555, iteration: 139046
loss: 0.9446366429328918,grad_norm: 0.9785720607747915, iteration: 139047
loss: 0.9920666217803955,grad_norm: 0.9237964605324736, iteration: 139048
loss: 0.986027717590332,grad_norm: 0.999999103805157, iteration: 139049
loss: 0.9829612970352173,grad_norm: 0.9956431522254094, iteration: 139050
loss: 1.0134028196334839,grad_norm: 0.9999991516091655, iteration: 139051
loss: 1.0120577812194824,grad_norm: 0.9999992182658602, iteration: 139052
loss: 0.9694806933403015,grad_norm: 0.9999990889105771, iteration: 139053
loss: 1.0217233896255493,grad_norm: 0.9999990727273759, iteration: 139054
loss: 0.9782885313034058,grad_norm: 0.9596019033062316, iteration: 139055
loss: 1.011691689491272,grad_norm: 0.8550663785519642, iteration: 139056
loss: 0.9651941061019897,grad_norm: 0.9999989687605269, iteration: 139057
loss: 0.9784385561943054,grad_norm: 0.9999991560306121, iteration: 139058
loss: 1.0114190578460693,grad_norm: 0.974669144002264, iteration: 139059
loss: 0.998143196105957,grad_norm: 0.999999304130279, iteration: 139060
loss: 0.9961971640586853,grad_norm: 0.8412935837744964, iteration: 139061
loss: 1.0173183679580688,grad_norm: 0.8697358609264544, iteration: 139062
loss: 0.9952105283737183,grad_norm: 0.9999990621078669, iteration: 139063
loss: 1.0077893733978271,grad_norm: 0.9999991262909608, iteration: 139064
loss: 0.9865122437477112,grad_norm: 0.9713094476234516, iteration: 139065
loss: 0.9807325601577759,grad_norm: 0.9438767242180987, iteration: 139066
loss: 0.9706783294677734,grad_norm: 0.8275751211447315, iteration: 139067
loss: 1.0105559825897217,grad_norm: 0.9999993250650384, iteration: 139068
loss: 0.9771762490272522,grad_norm: 0.9999991076912148, iteration: 139069
loss: 0.9413052797317505,grad_norm: 0.9634258090986954, iteration: 139070
loss: 0.9988918900489807,grad_norm: 0.9999990116677282, iteration: 139071
loss: 1.0352718830108643,grad_norm: 0.9999995963138057, iteration: 139072
loss: 0.9944674968719482,grad_norm: 0.9999991499730081, iteration: 139073
loss: 1.0267910957336426,grad_norm: 0.9999993092503675, iteration: 139074
loss: 0.9857856035232544,grad_norm: 0.9999992683184386, iteration: 139075
loss: 1.0117403268814087,grad_norm: 0.9999992283985519, iteration: 139076
loss: 1.0504939556121826,grad_norm: 0.9999990268568943, iteration: 139077
loss: 0.993830144405365,grad_norm: 0.9999989584551569, iteration: 139078
loss: 0.9654066562652588,grad_norm: 0.9940156980945096, iteration: 139079
loss: 1.0217533111572266,grad_norm: 0.8352213232302385, iteration: 139080
loss: 0.9748210906982422,grad_norm: 0.9999991606004509, iteration: 139081
loss: 0.9582251310348511,grad_norm: 0.9400443059213387, iteration: 139082
loss: 0.9670271873474121,grad_norm: 0.9999989244246913, iteration: 139083
loss: 1.0171706676483154,grad_norm: 0.9999990777877412, iteration: 139084
loss: 0.9873665571212769,grad_norm: 0.9386312853029598, iteration: 139085
loss: 1.0076590776443481,grad_norm: 0.9999991704909492, iteration: 139086
loss: 1.0326802730560303,grad_norm: 0.9713799582062765, iteration: 139087
loss: 1.0008395910263062,grad_norm: 0.9999990590045087, iteration: 139088
loss: 0.9671971201896667,grad_norm: 0.9999990891263065, iteration: 139089
loss: 0.9967978596687317,grad_norm: 0.9999992239507369, iteration: 139090
loss: 0.9805496335029602,grad_norm: 0.8787223076419599, iteration: 139091
loss: 1.0183625221252441,grad_norm: 0.895019320020266, iteration: 139092
loss: 0.9712951183319092,grad_norm: 0.9409262829591921, iteration: 139093
loss: 1.001326560974121,grad_norm: 0.9999992637312608, iteration: 139094
loss: 0.9832849502563477,grad_norm: 0.990496160561242, iteration: 139095
loss: 1.0037097930908203,grad_norm: 0.9402957930010011, iteration: 139096
loss: 0.9978224039077759,grad_norm: 0.9525071440107631, iteration: 139097
loss: 0.9658675193786621,grad_norm: 0.9392914131045716, iteration: 139098
loss: 0.9977758526802063,grad_norm: 0.9434723639482456, iteration: 139099
loss: 0.9666479825973511,grad_norm: 0.9340621628876445, iteration: 139100
loss: 0.9785276055335999,grad_norm: 0.893812969814645, iteration: 139101
loss: 0.9784059524536133,grad_norm: 0.9999989885308265, iteration: 139102
loss: 1.0449668169021606,grad_norm: 0.8520179801577269, iteration: 139103
loss: 1.031395673751831,grad_norm: 0.9999991690772779, iteration: 139104
loss: 0.9929999113082886,grad_norm: 0.999999224928818, iteration: 139105
loss: 0.970302939414978,grad_norm: 0.9999991398064177, iteration: 139106
loss: 1.0180872678756714,grad_norm: 0.9999997839039849, iteration: 139107
loss: 0.9950666427612305,grad_norm: 0.971262839467858, iteration: 139108
loss: 0.9762948751449585,grad_norm: 0.9999989815635943, iteration: 139109
loss: 0.9911603331565857,grad_norm: 0.8977426818755456, iteration: 139110
loss: 1.0029047727584839,grad_norm: 0.8098600744056049, iteration: 139111
loss: 1.0586786270141602,grad_norm: 0.9022944365658043, iteration: 139112
loss: 1.0118519067764282,grad_norm: 0.9999990347479628, iteration: 139113
loss: 1.0242522954940796,grad_norm: 0.9999991059639356, iteration: 139114
loss: 1.0023236274719238,grad_norm: 0.8851751158922724, iteration: 139115
loss: 0.9678802490234375,grad_norm: 0.9999991448700265, iteration: 139116
loss: 0.9840778112411499,grad_norm: 0.999999028669459, iteration: 139117
loss: 1.0492206811904907,grad_norm: 0.9471362694241809, iteration: 139118
loss: 0.9973270297050476,grad_norm: 0.9999991408416542, iteration: 139119
loss: 0.9760063886642456,grad_norm: 0.9152308649076966, iteration: 139120
loss: 0.9862510561943054,grad_norm: 0.9940547108021776, iteration: 139121
loss: 1.0417896509170532,grad_norm: 0.9999991257008147, iteration: 139122
loss: 1.0078619718551636,grad_norm: 0.9999990801350859, iteration: 139123
loss: 1.0092713832855225,grad_norm: 0.9942233420263256, iteration: 139124
loss: 1.0106617212295532,grad_norm: 0.9551649498365786, iteration: 139125
loss: 1.0133670568466187,grad_norm: 0.9999990147375046, iteration: 139126
loss: 0.9828458428382874,grad_norm: 0.9999990957052802, iteration: 139127
loss: 1.0030441284179688,grad_norm: 0.9999990203601078, iteration: 139128
loss: 0.9921602010726929,grad_norm: 0.847686709176606, iteration: 139129
loss: 0.9778730273246765,grad_norm: 0.9999991229811278, iteration: 139130
loss: 0.9925776124000549,grad_norm: 0.8550838026376327, iteration: 139131
loss: 0.9819645881652832,grad_norm: 0.999999088744567, iteration: 139132
loss: 0.9935786128044128,grad_norm: 0.9999991838096226, iteration: 139133
loss: 0.9918141961097717,grad_norm: 0.9999989675537099, iteration: 139134
loss: 0.9913966655731201,grad_norm: 0.9963650414298159, iteration: 139135
loss: 0.990086019039154,grad_norm: 0.9999991334589722, iteration: 139136
loss: 1.030851125717163,grad_norm: 0.999999472685017, iteration: 139137
loss: 0.9808896780014038,grad_norm: 0.8793972644432404, iteration: 139138
loss: 0.9873538613319397,grad_norm: 0.8935136951254568, iteration: 139139
loss: 1.0005080699920654,grad_norm: 0.999999640338619, iteration: 139140
loss: 1.0251437425613403,grad_norm: 0.9999989264379885, iteration: 139141
loss: 1.0239084959030151,grad_norm: 0.9689574018763466, iteration: 139142
loss: 0.9764081239700317,grad_norm: 0.9999992263103215, iteration: 139143
loss: 0.954200804233551,grad_norm: 0.9999991115110956, iteration: 139144
loss: 1.007181167602539,grad_norm: 0.9999990011755694, iteration: 139145
loss: 0.9949089288711548,grad_norm: 0.9427599215885064, iteration: 139146
loss: 0.9802255034446716,grad_norm: 0.885896080891299, iteration: 139147
loss: 0.9795423746109009,grad_norm: 0.9999991560222256, iteration: 139148
loss: 0.981731653213501,grad_norm: 0.9999989949261654, iteration: 139149
loss: 0.9956624507904053,grad_norm: 0.9999991278064906, iteration: 139150
loss: 0.9933651089668274,grad_norm: 0.8896210632293097, iteration: 139151
loss: 1.0032532215118408,grad_norm: 0.9999993112946897, iteration: 139152
loss: 0.9848968982696533,grad_norm: 0.9999991048762475, iteration: 139153
loss: 0.9995701313018799,grad_norm: 0.9999990167162778, iteration: 139154
loss: 0.993750274181366,grad_norm: 0.9162189452931997, iteration: 139155
loss: 0.962646484375,grad_norm: 0.949315966491106, iteration: 139156
loss: 0.9652695655822754,grad_norm: 0.9305081846359518, iteration: 139157
loss: 1.0270419120788574,grad_norm: 0.989452813031672, iteration: 139158
loss: 0.9985295534133911,grad_norm: 0.9999991641518291, iteration: 139159
loss: 0.9988939166069031,grad_norm: 0.9999990874992949, iteration: 139160
loss: 0.9981800317764282,grad_norm: 0.9999990678827597, iteration: 139161
loss: 1.0244091749191284,grad_norm: 0.9217900864870119, iteration: 139162
loss: 0.9868264198303223,grad_norm: 0.9240769500171632, iteration: 139163
loss: 1.0234551429748535,grad_norm: 0.9999994252074988, iteration: 139164
loss: 1.0009502172470093,grad_norm: 0.8798524873654945, iteration: 139165
loss: 0.9997220039367676,grad_norm: 0.9999991464529459, iteration: 139166
loss: 1.0505458116531372,grad_norm: 0.9999992841296336, iteration: 139167
loss: 1.0020214319229126,grad_norm: 0.9999992369867413, iteration: 139168
loss: 0.9891608357429504,grad_norm: 0.9999990664387965, iteration: 139169
loss: 1.021591305732727,grad_norm: 0.9999990875951792, iteration: 139170
loss: 1.0142936706542969,grad_norm: 0.9999995321724123, iteration: 139171
loss: 1.0151052474975586,grad_norm: 0.9999991392082492, iteration: 139172
loss: 0.9480668306350708,grad_norm: 0.8678614677920854, iteration: 139173
loss: 1.005530595779419,grad_norm: 0.8709300106137141, iteration: 139174
loss: 0.9896796345710754,grad_norm: 0.9999990193387158, iteration: 139175
loss: 1.016033411026001,grad_norm: 0.9184758477591493, iteration: 139176
loss: 1.0233834981918335,grad_norm: 0.9999995398780857, iteration: 139177
loss: 1.0273330211639404,grad_norm: 0.9655909714406201, iteration: 139178
loss: 1.0198429822921753,grad_norm: 0.999999203847218, iteration: 139179
loss: 1.0086075067520142,grad_norm: 0.9080626551984606, iteration: 139180
loss: 0.9834282398223877,grad_norm: 0.9999990660544582, iteration: 139181
loss: 1.0205178260803223,grad_norm: 0.9953114001212051, iteration: 139182
loss: 1.0266112089157104,grad_norm: 0.9999990140953706, iteration: 139183
loss: 0.9975687265396118,grad_norm: 0.999999113918271, iteration: 139184
loss: 0.9892531633377075,grad_norm: 0.9999990108803406, iteration: 139185
loss: 0.987474262714386,grad_norm: 0.999999027701721, iteration: 139186
loss: 0.9950618147850037,grad_norm: 0.9999991756517026, iteration: 139187
loss: 1.0189790725708008,grad_norm: 0.9433917700544586, iteration: 139188
loss: 0.9911433458328247,grad_norm: 0.9468620141400903, iteration: 139189
loss: 0.9778472185134888,grad_norm: 0.9634202444739899, iteration: 139190
loss: 1.0204415321350098,grad_norm: 0.999999082787003, iteration: 139191
loss: 1.0516033172607422,grad_norm: 0.9999991204566911, iteration: 139192
loss: 0.9747965931892395,grad_norm: 0.9999991847730894, iteration: 139193
loss: 0.9857317805290222,grad_norm: 0.9999991608066241, iteration: 139194
loss: 0.9738646149635315,grad_norm: 0.9999992227835157, iteration: 139195
loss: 0.9636479020118713,grad_norm: 0.999999083510618, iteration: 139196
loss: 1.0285481214523315,grad_norm: 0.9999990133608931, iteration: 139197
loss: 1.000977635383606,grad_norm: 0.9500749705173909, iteration: 139198
loss: 0.986657440662384,grad_norm: 0.8717211595191839, iteration: 139199
loss: 1.069212794303894,grad_norm: 0.9999996645159817, iteration: 139200
loss: 0.9947023391723633,grad_norm: 0.9742856502080146, iteration: 139201
loss: 1.0405316352844238,grad_norm: 0.9999992313829343, iteration: 139202
loss: 0.9839236736297607,grad_norm: 0.896043374887357, iteration: 139203
loss: 0.9963454604148865,grad_norm: 0.9811428451780233, iteration: 139204
loss: 0.9577187299728394,grad_norm: 0.9664188072134093, iteration: 139205
loss: 0.9978113174438477,grad_norm: 0.9999990724783923, iteration: 139206
loss: 1.0285990238189697,grad_norm: 0.9999993977254635, iteration: 139207
loss: 0.9534657597541809,grad_norm: 0.9999990923390294, iteration: 139208
loss: 1.074613094329834,grad_norm: 0.9999990873074744, iteration: 139209
loss: 1.0060662031173706,grad_norm: 0.9129378118118409, iteration: 139210
loss: 0.998830258846283,grad_norm: 0.9999991161301104, iteration: 139211
loss: 0.9723483920097351,grad_norm: 0.9757366333831555, iteration: 139212
loss: 1.0147724151611328,grad_norm: 0.9999989725544864, iteration: 139213
loss: 0.9677936434745789,grad_norm: 0.9941111075691488, iteration: 139214
loss: 1.0031449794769287,grad_norm: 0.9999990987831225, iteration: 139215
loss: 0.9817208051681519,grad_norm: 0.9999991082607296, iteration: 139216
loss: 0.9976663589477539,grad_norm: 0.9583190416445173, iteration: 139217
loss: 0.9534951448440552,grad_norm: 0.9709051311444914, iteration: 139218
loss: 0.969456672668457,grad_norm: 0.9999994944270962, iteration: 139219
loss: 1.0060224533081055,grad_norm: 0.8724306769222439, iteration: 139220
loss: 0.9845410585403442,grad_norm: 0.9185391410323205, iteration: 139221
loss: 1.0174269676208496,grad_norm: 0.9999992068786284, iteration: 139222
loss: 0.9945117235183716,grad_norm: 0.9999989075294338, iteration: 139223
loss: 0.959092915058136,grad_norm: 0.9999990342096691, iteration: 139224
loss: 0.9775524735450745,grad_norm: 0.9999991309171196, iteration: 139225
loss: 0.9935463666915894,grad_norm: 0.9265302886122079, iteration: 139226
loss: 0.9615291953086853,grad_norm: 0.9999991081313602, iteration: 139227
loss: 1.001306414604187,grad_norm: 0.9999990694461426, iteration: 139228
loss: 0.987399160861969,grad_norm: 0.984502292726246, iteration: 139229
loss: 1.0062360763549805,grad_norm: 0.9924579551180841, iteration: 139230
loss: 0.9487369060516357,grad_norm: 0.9596247447809448, iteration: 139231
loss: 1.0024609565734863,grad_norm: 0.8742443883087888, iteration: 139232
loss: 1.0015734434127808,grad_norm: 0.999999140717687, iteration: 139233
loss: 1.0065075159072876,grad_norm: 0.999999024715106, iteration: 139234
loss: 0.9983243942260742,grad_norm: 0.9999991735159948, iteration: 139235
loss: 0.9897099137306213,grad_norm: 0.9999991389054443, iteration: 139236
loss: 1.032319188117981,grad_norm: 0.9481156967833816, iteration: 139237
loss: 1.0057334899902344,grad_norm: 0.9999991040047388, iteration: 139238
loss: 0.9891058802604675,grad_norm: 0.9999991543741936, iteration: 139239
loss: 1.0030732154846191,grad_norm: 0.9355718089179945, iteration: 139240
loss: 0.9872339367866516,grad_norm: 0.9548278626665858, iteration: 139241
loss: 1.0288158655166626,grad_norm: 0.8147759480948384, iteration: 139242
loss: 1.0493569374084473,grad_norm: 0.9999992421760888, iteration: 139243
loss: 1.0058543682098389,grad_norm: 0.8525676542943633, iteration: 139244
loss: 0.9807385206222534,grad_norm: 0.9999991636260293, iteration: 139245
loss: 0.973585844039917,grad_norm: 0.9999991649689421, iteration: 139246
loss: 0.9924304485321045,grad_norm: 0.9014318505606739, iteration: 139247
loss: 0.9693268537521362,grad_norm: 0.9159903836966865, iteration: 139248
loss: 1.036759614944458,grad_norm: 0.9999991012457881, iteration: 139249
loss: 1.002500057220459,grad_norm: 0.999999044718556, iteration: 139250
loss: 0.9714166522026062,grad_norm: 0.9275608676895589, iteration: 139251
loss: 1.0247623920440674,grad_norm: 0.9999991071416326, iteration: 139252
loss: 0.9459788799285889,grad_norm: 0.9971281920330098, iteration: 139253
loss: 0.979483962059021,grad_norm: 0.8938502777803874, iteration: 139254
loss: 1.0091979503631592,grad_norm: 0.9895209478264698, iteration: 139255
loss: 1.0142486095428467,grad_norm: 0.9523514324283239, iteration: 139256
loss: 1.0174634456634521,grad_norm: 0.99999937070818, iteration: 139257
loss: 1.0392378568649292,grad_norm: 0.9999992450884312, iteration: 139258
loss: 0.9413286447525024,grad_norm: 0.9722537107042668, iteration: 139259
loss: 1.0276243686676025,grad_norm: 0.9999991332322136, iteration: 139260
loss: 1.0226526260375977,grad_norm: 0.9999991258089527, iteration: 139261
loss: 1.0362281799316406,grad_norm: 0.9999990935631469, iteration: 139262
loss: 1.001335620880127,grad_norm: 0.9999993931536588, iteration: 139263
loss: 1.0844926834106445,grad_norm: 0.9973080509056098, iteration: 139264
loss: 0.9959288239479065,grad_norm: 0.9999993987616502, iteration: 139265
loss: 0.9960458874702454,grad_norm: 0.9061659731767726, iteration: 139266
loss: 0.965971827507019,grad_norm: 0.925633856590763, iteration: 139267
loss: 0.9893069863319397,grad_norm: 0.9999992934575633, iteration: 139268
loss: 0.9915385246276855,grad_norm: 0.9394113814922133, iteration: 139269
loss: 0.9658223390579224,grad_norm: 0.9999991992404026, iteration: 139270
loss: 0.9827072620391846,grad_norm: 0.9830330706166266, iteration: 139271
loss: 1.0415561199188232,grad_norm: 0.9999996484483237, iteration: 139272
loss: 0.9922866225242615,grad_norm: 0.9999992009479698, iteration: 139273
loss: 1.0025362968444824,grad_norm: 0.8794603104833629, iteration: 139274
loss: 0.9921621084213257,grad_norm: 0.9039751892886738, iteration: 139275
loss: 1.0274196863174438,grad_norm: 0.9999996142678943, iteration: 139276
loss: 1.005914330482483,grad_norm: 0.9550877140565888, iteration: 139277
loss: 1.0121724605560303,grad_norm: 0.9999991572390502, iteration: 139278
loss: 1.0142303705215454,grad_norm: 0.9999992818150936, iteration: 139279
loss: 1.026931643486023,grad_norm: 0.9999990370691716, iteration: 139280
loss: 1.0179134607315063,grad_norm: 0.9999991324160357, iteration: 139281
loss: 1.0067317485809326,grad_norm: 0.999999190030186, iteration: 139282
loss: 0.9720613956451416,grad_norm: 0.9622122342167078, iteration: 139283
loss: 1.0123212337493896,grad_norm: 0.9999991416528982, iteration: 139284
loss: 0.9891740083694458,grad_norm: 0.9999991847104325, iteration: 139285
loss: 1.0377897024154663,grad_norm: 0.9999998006206527, iteration: 139286
loss: 1.0102592706680298,grad_norm: 0.9999991554269875, iteration: 139287
loss: 0.9773067235946655,grad_norm: 0.9105540361082705, iteration: 139288
loss: 1.011581301689148,grad_norm: 0.8533783108271157, iteration: 139289
loss: 1.0541183948516846,grad_norm: 0.9999990352217016, iteration: 139290
loss: 1.0099455118179321,grad_norm: 0.9999991871330023, iteration: 139291
loss: 1.0090547800064087,grad_norm: 0.9653824086942745, iteration: 139292
loss: 1.0012249946594238,grad_norm: 0.9999993144073431, iteration: 139293
loss: 1.010500431060791,grad_norm: 0.9999999030493933, iteration: 139294
loss: 1.0174134969711304,grad_norm: 0.9210733959756249, iteration: 139295
loss: 0.9874256253242493,grad_norm: 0.9999991529599964, iteration: 139296
loss: 1.0067404508590698,grad_norm: 0.9168296594227692, iteration: 139297
loss: 0.995566725730896,grad_norm: 0.9817561026213185, iteration: 139298
loss: 1.01095449924469,grad_norm: 0.9999991496600619, iteration: 139299
loss: 1.0245956182479858,grad_norm: 0.9999993026876636, iteration: 139300
loss: 0.994577169418335,grad_norm: 0.9842324880457833, iteration: 139301
loss: 1.107486367225647,grad_norm: 0.9999995657811795, iteration: 139302
loss: 1.032807469367981,grad_norm: 0.9999992485655501, iteration: 139303
loss: 0.9855940341949463,grad_norm: 0.9607463013951277, iteration: 139304
loss: 1.01791250705719,grad_norm: 0.8683855176268201, iteration: 139305
loss: 1.0598775148391724,grad_norm: 0.9215780179142451, iteration: 139306
loss: 1.004802942276001,grad_norm: 0.921143309224068, iteration: 139307
loss: 0.9820711016654968,grad_norm: 0.9999989682538106, iteration: 139308
loss: 0.9876511693000793,grad_norm: 0.9999991879096054, iteration: 139309
loss: 1.0080031156539917,grad_norm: 0.9999990781412993, iteration: 139310
loss: 0.9840368032455444,grad_norm: 0.8813329197776198, iteration: 139311
loss: 1.0284335613250732,grad_norm: 0.9105108680204648, iteration: 139312
loss: 0.9925921559333801,grad_norm: 0.9870628424462866, iteration: 139313
loss: 1.0151150226593018,grad_norm: 0.9999992772897727, iteration: 139314
loss: 0.9851894974708557,grad_norm: 0.9838330258637362, iteration: 139315
loss: 0.9954211711883545,grad_norm: 0.989645504835663, iteration: 139316
loss: 1.090561866760254,grad_norm: 0.9999998500952032, iteration: 139317
loss: 0.9654384851455688,grad_norm: 0.957497209587764, iteration: 139318
loss: 0.9826396107673645,grad_norm: 0.8368549463672879, iteration: 139319
loss: 0.9888297319412231,grad_norm: 0.9654284265914954, iteration: 139320
loss: 1.031699776649475,grad_norm: 0.9861149419224747, iteration: 139321
loss: 0.981015145778656,grad_norm: 0.8672197576722619, iteration: 139322
loss: 1.0255979299545288,grad_norm: 0.999999090352257, iteration: 139323
loss: 1.0438432693481445,grad_norm: 0.940728009377937, iteration: 139324
loss: 0.9830090403556824,grad_norm: 0.9999991168912533, iteration: 139325
loss: 1.0244438648223877,grad_norm: 0.999999200251959, iteration: 139326
loss: 0.993753969669342,grad_norm: 0.9682887190377398, iteration: 139327
loss: 1.0287681818008423,grad_norm: 0.9293801662307787, iteration: 139328
loss: 0.9773381948471069,grad_norm: 0.820925635928649, iteration: 139329
loss: 1.0551543235778809,grad_norm: 0.9999995514233888, iteration: 139330
loss: 0.9826546311378479,grad_norm: 0.9112676802046447, iteration: 139331
loss: 0.9710962772369385,grad_norm: 0.8991509076060021, iteration: 139332
loss: 0.9838761687278748,grad_norm: 0.9197829540289327, iteration: 139333
loss: 0.9927827715873718,grad_norm: 0.9999992735478377, iteration: 139334
loss: 0.9951626062393188,grad_norm: 0.9999990681513747, iteration: 139335
loss: 0.9559543132781982,grad_norm: 0.9999991128241433, iteration: 139336
loss: 0.984726071357727,grad_norm: 0.9870130987011367, iteration: 139337
loss: 1.0381529331207275,grad_norm: 0.9999994138483491, iteration: 139338
loss: 0.9953382611274719,grad_norm: 0.9305789888261263, iteration: 139339
loss: 0.9856458902359009,grad_norm: 0.9836880229020187, iteration: 139340
loss: 1.0057977437973022,grad_norm: 0.9999991594462544, iteration: 139341
loss: 0.9897656440734863,grad_norm: 0.9999990025129316, iteration: 139342
loss: 0.9819850921630859,grad_norm: 0.9999991867843449, iteration: 139343
loss: 1.0205342769622803,grad_norm: 0.867363900972084, iteration: 139344
loss: 0.9891390204429626,grad_norm: 0.9999991777051686, iteration: 139345
loss: 1.0319836139678955,grad_norm: 0.9999990499742947, iteration: 139346
loss: 1.0376392602920532,grad_norm: 0.9999992564094565, iteration: 139347
loss: 0.9809596538543701,grad_norm: 0.9954980423347826, iteration: 139348
loss: 0.9923796057701111,grad_norm: 0.9999992345785814, iteration: 139349
loss: 0.9872239232063293,grad_norm: 0.9999990872530795, iteration: 139350
loss: 0.9996121525764465,grad_norm: 0.9999989932003185, iteration: 139351
loss: 0.9975061416625977,grad_norm: 0.9999998306002188, iteration: 139352
loss: 1.0038319826126099,grad_norm: 0.9999990939937216, iteration: 139353
loss: 1.01251220703125,grad_norm: 0.9999991624492086, iteration: 139354
loss: 1.0090152025222778,grad_norm: 0.9999992560109033, iteration: 139355
loss: 1.021288514137268,grad_norm: 0.9999994142778692, iteration: 139356
loss: 1.0077369213104248,grad_norm: 0.9263283116175601, iteration: 139357
loss: 1.0248167514801025,grad_norm: 0.9999990240644369, iteration: 139358
loss: 0.9989443421363831,grad_norm: 0.9999991779105455, iteration: 139359
loss: 0.9740246534347534,grad_norm: 0.9999992578187794, iteration: 139360
loss: 1.0104954242706299,grad_norm: 0.9999989817585613, iteration: 139361
loss: 0.9802464246749878,grad_norm: 0.9999991864417086, iteration: 139362
loss: 0.992718517780304,grad_norm: 0.9826840738635296, iteration: 139363
loss: 0.9510679841041565,grad_norm: 0.9999993551089502, iteration: 139364
loss: 1.104112148284912,grad_norm: 0.9961350305675845, iteration: 139365
loss: 1.0015679597854614,grad_norm: 0.9999990763372645, iteration: 139366
loss: 0.9876031875610352,grad_norm: 0.9999992359004212, iteration: 139367
loss: 1.0021413564682007,grad_norm: 0.9834064853143006, iteration: 139368
loss: 1.0124433040618896,grad_norm: 0.9550381176360112, iteration: 139369
loss: 1.0161699056625366,grad_norm: 0.9501381872819431, iteration: 139370
loss: 1.0013431310653687,grad_norm: 0.9553277315642899, iteration: 139371
loss: 0.9593986868858337,grad_norm: 0.9999989409768367, iteration: 139372
loss: 0.9836565256118774,grad_norm: 0.9999990994272212, iteration: 139373
loss: 1.0323708057403564,grad_norm: 0.999999133245993, iteration: 139374
loss: 0.995814323425293,grad_norm: 0.9140809949671701, iteration: 139375
loss: 1.0158779621124268,grad_norm: 0.9999990837539061, iteration: 139376
loss: 1.0019720792770386,grad_norm: 0.9999992239025429, iteration: 139377
loss: 1.0073539018630981,grad_norm: 0.9276407763911174, iteration: 139378
loss: 0.9839776754379272,grad_norm: 0.9999990850989169, iteration: 139379
loss: 1.0001157522201538,grad_norm: 0.999999089123073, iteration: 139380
loss: 0.9794573783874512,grad_norm: 0.9999992004181915, iteration: 139381
loss: 0.9688204526901245,grad_norm: 0.9999990305977111, iteration: 139382
loss: 0.9912371635437012,grad_norm: 0.999998990409757, iteration: 139383
loss: 1.0039674043655396,grad_norm: 0.9171050231731764, iteration: 139384
loss: 0.9942184090614319,grad_norm: 0.9193419376464445, iteration: 139385
loss: 1.0141874551773071,grad_norm: 0.9999991921359042, iteration: 139386
loss: 1.0054441690444946,grad_norm: 0.9999998414396533, iteration: 139387
loss: 0.9954675436019897,grad_norm: 0.9060092885704162, iteration: 139388
loss: 0.9918587803840637,grad_norm: 0.9795376010998068, iteration: 139389
loss: 0.9928035140037537,grad_norm: 0.828048678901947, iteration: 139390
loss: 1.0001527070999146,grad_norm: 0.9999991380232413, iteration: 139391
loss: 1.0160044431686401,grad_norm: 0.9225212575451917, iteration: 139392
loss: 1.0084112882614136,grad_norm: 0.999999845049289, iteration: 139393
loss: 0.9881359338760376,grad_norm: 0.999999214516587, iteration: 139394
loss: 1.0121265649795532,grad_norm: 0.9999990449021905, iteration: 139395
loss: 1.0098507404327393,grad_norm: 0.9999991285022298, iteration: 139396
loss: 1.0233335494995117,grad_norm: 0.9441002355067721, iteration: 139397
loss: 0.9892326593399048,grad_norm: 0.9881335554274829, iteration: 139398
loss: 0.9878371357917786,grad_norm: 0.9999992306378613, iteration: 139399
loss: 1.038957953453064,grad_norm: 0.9999991935569557, iteration: 139400
loss: 0.988139808177948,grad_norm: 0.9999992094694634, iteration: 139401
loss: 1.0089631080627441,grad_norm: 0.938721281586204, iteration: 139402
loss: 0.9909648299217224,grad_norm: 0.9999992400443487, iteration: 139403
loss: 0.976826548576355,grad_norm: 0.999999185570712, iteration: 139404
loss: 0.9516938328742981,grad_norm: 0.9999991395419945, iteration: 139405
loss: 0.9649375081062317,grad_norm: 0.8779749573813143, iteration: 139406
loss: 0.9963537454605103,grad_norm: 0.8572787515141477, iteration: 139407
loss: 0.9990676641464233,grad_norm: 0.9999991214866243, iteration: 139408
loss: 0.961769163608551,grad_norm: 0.8590860686776063, iteration: 139409
loss: 0.9972376823425293,grad_norm: 0.9999992894273155, iteration: 139410
loss: 1.0298618078231812,grad_norm: 0.9999990913513291, iteration: 139411
loss: 1.0007730722427368,grad_norm: 0.973787499655348, iteration: 139412
loss: 1.014090657234192,grad_norm: 0.9999992230831013, iteration: 139413
loss: 1.0368086099624634,grad_norm: 0.9999991368639971, iteration: 139414
loss: 1.000038981437683,grad_norm: 0.9828729185916015, iteration: 139415
loss: 0.9780498147010803,grad_norm: 0.8998779759569515, iteration: 139416
loss: 1.2039180994033813,grad_norm: 0.999999293987684, iteration: 139417
loss: 1.0040199756622314,grad_norm: 0.9361573064928079, iteration: 139418
loss: 0.9838284850120544,grad_norm: 0.9999992162964194, iteration: 139419
loss: 0.9412253499031067,grad_norm: 0.9999992393636687, iteration: 139420
loss: 0.9944440722465515,grad_norm: 0.9999992723851183, iteration: 139421
loss: 0.9900722503662109,grad_norm: 0.9999992060842084, iteration: 139422
loss: 0.9968600273132324,grad_norm: 0.999999077869566, iteration: 139423
loss: 0.962155282497406,grad_norm: 0.9377509336773968, iteration: 139424
loss: 1.0210684537887573,grad_norm: 0.9999992017647481, iteration: 139425
loss: 1.0660278797149658,grad_norm: 0.999999891096082, iteration: 139426
loss: 1.0310522317886353,grad_norm: 0.9999992523490934, iteration: 139427
loss: 0.9889473915100098,grad_norm: 0.9999990688389446, iteration: 139428
loss: 1.0159666538238525,grad_norm: 0.9999991683226254, iteration: 139429
loss: 0.9864047765731812,grad_norm: 0.999999262180198, iteration: 139430
loss: 0.9906958341598511,grad_norm: 0.9764003473872294, iteration: 139431
loss: 1.0090832710266113,grad_norm: 0.9999992056651295, iteration: 139432
loss: 1.0012181997299194,grad_norm: 0.9505716622187413, iteration: 139433
loss: 1.016757845878601,grad_norm: 0.9999999031191945, iteration: 139434
loss: 0.9862651228904724,grad_norm: 0.9999992097596514, iteration: 139435
loss: 0.9878093600273132,grad_norm: 0.9999992534965763, iteration: 139436
loss: 1.022304654121399,grad_norm: 0.9258864729553206, iteration: 139437
loss: 1.0586603879928589,grad_norm: 0.9999995066373863, iteration: 139438
loss: 1.031972885131836,grad_norm: 0.9181133333822348, iteration: 139439
loss: 1.0457991361618042,grad_norm: 0.999999693183198, iteration: 139440
loss: 1.0259978771209717,grad_norm: 0.9999990806331267, iteration: 139441
loss: 1.0105515718460083,grad_norm: 0.9999990822495572, iteration: 139442
loss: 1.0062288045883179,grad_norm: 0.9798870795189375, iteration: 139443
loss: 1.0208535194396973,grad_norm: 0.9999990971183672, iteration: 139444
loss: 1.000956416130066,grad_norm: 0.8912750063864313, iteration: 139445
loss: 0.9725736379623413,grad_norm: 0.9999991983540105, iteration: 139446
loss: 1.0089521408081055,grad_norm: 0.9999990472959064, iteration: 139447
loss: 0.9982393383979797,grad_norm: 0.9999998620038486, iteration: 139448
loss: 0.9852187633514404,grad_norm: 0.9999991338991575, iteration: 139449
loss: 1.0404165983200073,grad_norm: 0.9999992287748077, iteration: 139450
loss: 0.9934461712837219,grad_norm: 0.9999990998348375, iteration: 139451
loss: 0.9997950792312622,grad_norm: 0.9999990695354934, iteration: 139452
loss: 1.005661964416504,grad_norm: 0.999999099716124, iteration: 139453
loss: 0.9652628898620605,grad_norm: 0.999999180154034, iteration: 139454
loss: 1.0065643787384033,grad_norm: 0.9942882740513224, iteration: 139455
loss: 1.024649739265442,grad_norm: 0.9999991292072808, iteration: 139456
loss: 1.0146484375,grad_norm: 0.9999992375603112, iteration: 139457
loss: 1.0364266633987427,grad_norm: 0.9999993140802871, iteration: 139458
loss: 0.9816195368766785,grad_norm: 0.9777090655936777, iteration: 139459
loss: 1.0649713277816772,grad_norm: 0.9999996186154082, iteration: 139460
loss: 1.004539966583252,grad_norm: 0.9607287261839106, iteration: 139461
loss: 1.0048248767852783,grad_norm: 0.9503973426623827, iteration: 139462
loss: 1.018003225326538,grad_norm: 0.9238333086406334, iteration: 139463
loss: 0.9959709048271179,grad_norm: 0.999999131633289, iteration: 139464
loss: 0.9993019700050354,grad_norm: 0.9794263111356196, iteration: 139465
loss: 1.0275945663452148,grad_norm: 0.9279273455563386, iteration: 139466
loss: 1.0441646575927734,grad_norm: 0.9520432194627529, iteration: 139467
loss: 0.9982774257659912,grad_norm: 0.8734662572518104, iteration: 139468
loss: 0.9918118715286255,grad_norm: 0.9999991726997401, iteration: 139469
loss: 0.9826420545578003,grad_norm: 0.9949403800321305, iteration: 139470
loss: 1.0384101867675781,grad_norm: 0.9999991594439775, iteration: 139471
loss: 0.9846435785293579,grad_norm: 0.9739074714082206, iteration: 139472
loss: 1.0447826385498047,grad_norm: 0.9999993813561477, iteration: 139473
loss: 0.9837731719017029,grad_norm: 0.9999989529190589, iteration: 139474
loss: 0.9950132369995117,grad_norm: 0.9999990407566663, iteration: 139475
loss: 1.00191068649292,grad_norm: 0.9592891125695018, iteration: 139476
loss: 0.9919655323028564,grad_norm: 0.9999993185991677, iteration: 139477
loss: 1.195433497428894,grad_norm: 0.9999998250124731, iteration: 139478
loss: 0.9962009191513062,grad_norm: 0.9999991929541151, iteration: 139479
loss: 0.9644232392311096,grad_norm: 0.9999990938012991, iteration: 139480
loss: 1.0029268264770508,grad_norm: 0.90850467907225, iteration: 139481
loss: 0.9898075461387634,grad_norm: 0.9403874799774518, iteration: 139482
loss: 1.0348118543624878,grad_norm: 0.9988671221922972, iteration: 139483
loss: 1.012146234512329,grad_norm: 0.9999990825726046, iteration: 139484
loss: 1.0259819030761719,grad_norm: 0.890342826491222, iteration: 139485
loss: 0.9959509372711182,grad_norm: 0.9046287827121501, iteration: 139486
loss: 0.9816651344299316,grad_norm: 0.9999990852023908, iteration: 139487
loss: 1.0288771390914917,grad_norm: 0.9999991380687897, iteration: 139488
loss: 0.9882489442825317,grad_norm: 0.9999992685023551, iteration: 139489
loss: 0.9644051790237427,grad_norm: 0.9999991332006383, iteration: 139490
loss: 1.0104289054870605,grad_norm: 0.8832117287958147, iteration: 139491
loss: 1.0019105672836304,grad_norm: 0.9921628912120471, iteration: 139492
loss: 0.9906519651412964,grad_norm: 0.9999991873846289, iteration: 139493
loss: 1.0074822902679443,grad_norm: 0.9999990942870468, iteration: 139494
loss: 1.022168755531311,grad_norm: 0.9768796485039153, iteration: 139495
loss: 0.958220899105072,grad_norm: 0.9781523985257171, iteration: 139496
loss: 1.0312496423721313,grad_norm: 0.9999996930399299, iteration: 139497
loss: 0.9909681081771851,grad_norm: 0.9999992135831793, iteration: 139498
loss: 1.0056602954864502,grad_norm: 0.9999991625316875, iteration: 139499
loss: 0.9989631772041321,grad_norm: 0.8242928972155363, iteration: 139500
loss: 1.0031840801239014,grad_norm: 0.9999991076633064, iteration: 139501
loss: 0.9949140548706055,grad_norm: 0.9437233866895882, iteration: 139502
loss: 1.0293622016906738,grad_norm: 0.9811329895767588, iteration: 139503
loss: 0.9919682741165161,grad_norm: 0.914924879068501, iteration: 139504
loss: 1.0367008447647095,grad_norm: 0.9999996908990765, iteration: 139505
loss: 1.0040391683578491,grad_norm: 0.9999989659377014, iteration: 139506
loss: 1.0173513889312744,grad_norm: 0.9999992016843288, iteration: 139507
loss: 0.9940195679664612,grad_norm: 0.9193611158059198, iteration: 139508
loss: 1.048654556274414,grad_norm: 0.9999991544720637, iteration: 139509
loss: 0.9990472197532654,grad_norm: 0.960797546153512, iteration: 139510
loss: 1.0379102230072021,grad_norm: 0.9999995595164607, iteration: 139511
loss: 1.0272703170776367,grad_norm: 0.9999990392266102, iteration: 139512
loss: 0.9998006820678711,grad_norm: 0.9278541177167917, iteration: 139513
loss: 0.9887966513633728,grad_norm: 0.9927768244161564, iteration: 139514
loss: 0.9952973127365112,grad_norm: 0.9999990721187391, iteration: 139515
loss: 1.0306984186172485,grad_norm: 0.9999993958870642, iteration: 139516
loss: 1.0118305683135986,grad_norm: 0.9999991900009069, iteration: 139517
loss: 1.0335710048675537,grad_norm: 0.9999991578278384, iteration: 139518
loss: 1.0265977382659912,grad_norm: 0.9999990633393612, iteration: 139519
loss: 1.2801481485366821,grad_norm: 0.9999998439810932, iteration: 139520
loss: 0.9944652318954468,grad_norm: 0.9999991514127842, iteration: 139521
loss: 1.0324532985687256,grad_norm: 0.9999989373213456, iteration: 139522
loss: 0.9913221001625061,grad_norm: 0.9999992026584564, iteration: 139523
loss: 1.0156632661819458,grad_norm: 0.9858452113185621, iteration: 139524
loss: 1.0426493883132935,grad_norm: 0.9999990733919544, iteration: 139525
loss: 0.9884536862373352,grad_norm: 0.9999989702844987, iteration: 139526
loss: 1.0350326299667358,grad_norm: 0.9479413091015937, iteration: 139527
loss: 1.0885893106460571,grad_norm: 0.9999992831666809, iteration: 139528
loss: 1.0167125463485718,grad_norm: 0.9616509160031748, iteration: 139529
loss: 1.030078649520874,grad_norm: 0.9999993768840454, iteration: 139530
loss: 1.0981510877609253,grad_norm: 0.9999996567748273, iteration: 139531
loss: 1.1271401643753052,grad_norm: 0.9999994386960223, iteration: 139532
loss: 0.9877041578292847,grad_norm: 0.9999991534804585, iteration: 139533
loss: 0.9929615259170532,grad_norm: 0.9999992676720745, iteration: 139534
loss: 0.978242039680481,grad_norm: 0.9999992105934132, iteration: 139535
loss: 0.9964143633842468,grad_norm: 1.0000000274268206, iteration: 139536
loss: 1.0078880786895752,grad_norm: 0.9999995848066361, iteration: 139537
loss: 0.9945173859596252,grad_norm: 0.9999991558432593, iteration: 139538
loss: 1.0134024620056152,grad_norm: 0.9999990856369577, iteration: 139539
loss: 1.0788142681121826,grad_norm: 0.999999300042668, iteration: 139540
loss: 0.9670246839523315,grad_norm: 0.9145420173263442, iteration: 139541
loss: 1.0067005157470703,grad_norm: 0.9999991109297138, iteration: 139542
loss: 0.9830725789070129,grad_norm: 0.9208737155563388, iteration: 139543
loss: 0.9576067328453064,grad_norm: 0.9119374086893034, iteration: 139544
loss: 0.9908843636512756,grad_norm: 0.958450831535371, iteration: 139545
loss: 1.0131089687347412,grad_norm: 0.9999989491046656, iteration: 139546
loss: 0.9543473720550537,grad_norm: 0.9999990610395529, iteration: 139547
loss: 0.9964548945426941,grad_norm: 0.9228897936164866, iteration: 139548
loss: 1.0275958776474,grad_norm: 0.9999996669293576, iteration: 139549
loss: 1.0823280811309814,grad_norm: 0.9350931669882268, iteration: 139550
loss: 1.0042349100112915,grad_norm: 0.9999991089658917, iteration: 139551
loss: 0.9757257699966431,grad_norm: 0.869417546249645, iteration: 139552
loss: 0.9809072017669678,grad_norm: 0.9999990781245819, iteration: 139553
loss: 1.0510056018829346,grad_norm: 0.9999994928503514, iteration: 139554
loss: 0.9860333800315857,grad_norm: 0.999999121696179, iteration: 139555
loss: 1.009537696838379,grad_norm: 0.9999992546099651, iteration: 139556
loss: 1.0019340515136719,grad_norm: 0.9715722135865451, iteration: 139557
loss: 1.0065653324127197,grad_norm: 0.9999990517219588, iteration: 139558
loss: 0.9712165594100952,grad_norm: 0.9999992147997881, iteration: 139559
loss: 1.0982270240783691,grad_norm: 0.9999998246781432, iteration: 139560
loss: 1.0393223762512207,grad_norm: 0.9999991629964208, iteration: 139561
loss: 1.0549280643463135,grad_norm: 0.9999998684512048, iteration: 139562
loss: 1.0515557527542114,grad_norm: 0.9999995510807881, iteration: 139563
loss: 0.9902256727218628,grad_norm: 0.999999810423438, iteration: 139564
loss: 1.0424069166183472,grad_norm: 0.9999990604405044, iteration: 139565
loss: 0.9831494092941284,grad_norm: 0.9908780065697664, iteration: 139566
loss: 0.9717112183570862,grad_norm: 0.9999990920963421, iteration: 139567
loss: 1.0567176342010498,grad_norm: 0.9999992572504263, iteration: 139568
loss: 1.0297362804412842,grad_norm: 0.9360521266536819, iteration: 139569
loss: 0.9917959570884705,grad_norm: 0.9999991071142269, iteration: 139570
loss: 1.039438247680664,grad_norm: 0.9999993582244342, iteration: 139571
loss: 1.0287197828292847,grad_norm: 0.9894386031537681, iteration: 139572
loss: 1.0138033628463745,grad_norm: 0.9999990323489334, iteration: 139573
loss: 1.0250424146652222,grad_norm: 0.9999991966618019, iteration: 139574
loss: 0.9543313980102539,grad_norm: 0.9999994916995316, iteration: 139575
loss: 0.9871224164962769,grad_norm: 0.8111684415959655, iteration: 139576
loss: 0.9913418889045715,grad_norm: 0.9999990782616878, iteration: 139577
loss: 1.160554051399231,grad_norm: 0.9999996359573161, iteration: 139578
loss: 0.9750856757164001,grad_norm: 0.9037294006858168, iteration: 139579
loss: 0.9956891536712646,grad_norm: 0.9358539205821349, iteration: 139580
loss: 1.0143351554870605,grad_norm: 0.999999180718362, iteration: 139581
loss: 0.9917694926261902,grad_norm: 0.9894147622623686, iteration: 139582
loss: 0.9988107681274414,grad_norm: 0.9999992431450762, iteration: 139583
loss: 0.9823439717292786,grad_norm: 0.9999990455553266, iteration: 139584
loss: 0.9919483065605164,grad_norm: 0.896307733647781, iteration: 139585
loss: 1.056871771812439,grad_norm: 0.9999994740017359, iteration: 139586
loss: 0.9905792474746704,grad_norm: 0.9999989939987755, iteration: 139587
loss: 0.9805947542190552,grad_norm: 0.9999990882799722, iteration: 139588
loss: 1.0688141584396362,grad_norm: 0.9999992719641461, iteration: 139589
loss: 0.987699031829834,grad_norm: 0.9999992220717474, iteration: 139590
loss: 1.0072908401489258,grad_norm: 0.9999991036520398, iteration: 139591
loss: 0.9605283737182617,grad_norm: 0.9953145738688453, iteration: 139592
loss: 0.9898836016654968,grad_norm: 0.9999989592220373, iteration: 139593
loss: 1.0221747159957886,grad_norm: 0.999999106065333, iteration: 139594
loss: 1.0164231061935425,grad_norm: 0.9999991385170813, iteration: 139595
loss: 0.9710633754730225,grad_norm: 0.9999990219158138, iteration: 139596
loss: 1.0274089574813843,grad_norm: 0.9682924520034762, iteration: 139597
loss: 1.0384010076522827,grad_norm: 0.9245135948780764, iteration: 139598
loss: 1.0461262464523315,grad_norm: 0.9999993170375261, iteration: 139599
loss: 1.0194522142410278,grad_norm: 0.9999991574616671, iteration: 139600
loss: 0.999302327632904,grad_norm: 0.999999812659313, iteration: 139601
loss: 1.0059435367584229,grad_norm: 0.9921133193346828, iteration: 139602
loss: 0.986296534538269,grad_norm: 0.9999991069824411, iteration: 139603
loss: 1.0125151872634888,grad_norm: 0.9999993951271303, iteration: 139604
loss: 0.9742302894592285,grad_norm: 0.9891940429205947, iteration: 139605
loss: 1.0330467224121094,grad_norm: 0.9999995952479345, iteration: 139606
loss: 1.008136510848999,grad_norm: 0.9999990086020711, iteration: 139607
loss: 0.9972572922706604,grad_norm: 0.9999993630630768, iteration: 139608
loss: 0.9534333944320679,grad_norm: 0.8892591287730255, iteration: 139609
loss: 0.9916915893554688,grad_norm: 0.9775383390787609, iteration: 139610
loss: 1.0471086502075195,grad_norm: 0.9999999846860642, iteration: 139611
loss: 0.9327709674835205,grad_norm: 0.9237410546113596, iteration: 139612
loss: 0.9726907014846802,grad_norm: 0.9999992043299222, iteration: 139613
loss: 0.9921957850456238,grad_norm: 0.9069075078224584, iteration: 139614
loss: 1.0255615711212158,grad_norm: 0.999999327314337, iteration: 139615
loss: 1.0034743547439575,grad_norm: 0.919583265035307, iteration: 139616
loss: 1.0516375303268433,grad_norm: 0.9999992802137753, iteration: 139617
loss: 0.9823415875434875,grad_norm: 0.9999990452312981, iteration: 139618
loss: 0.9992925524711609,grad_norm: 0.9999991137661587, iteration: 139619
loss: 1.0397032499313354,grad_norm: 0.999999190062121, iteration: 139620
loss: 0.9658124446868896,grad_norm: 0.9999994816794752, iteration: 139621
loss: 0.995304524898529,grad_norm: 0.8388031918540318, iteration: 139622
loss: 1.015606164932251,grad_norm: 0.9220979439886609, iteration: 139623
loss: 0.9813209176063538,grad_norm: 0.999999139595454, iteration: 139624
loss: 0.996681809425354,grad_norm: 0.9999991613332906, iteration: 139625
loss: 0.9578929543495178,grad_norm: 0.999999188354866, iteration: 139626
loss: 0.9930258989334106,grad_norm: 0.9999992773778539, iteration: 139627
loss: 1.0347938537597656,grad_norm: 0.9999993450615035, iteration: 139628
loss: 1.0096994638442993,grad_norm: 0.999999165820566, iteration: 139629
loss: 1.0244910717010498,grad_norm: 0.9999994629227433, iteration: 139630
loss: 1.0844080448150635,grad_norm: 0.8955404466826439, iteration: 139631
loss: 1.0232429504394531,grad_norm: 0.8319438292206174, iteration: 139632
loss: 1.032189965248108,grad_norm: 0.9999989785006529, iteration: 139633
loss: 1.0212287902832031,grad_norm: 0.9999989938966078, iteration: 139634
loss: 0.998786985874176,grad_norm: 0.9220339600094252, iteration: 139635
loss: 0.9519710540771484,grad_norm: 0.9999991855288567, iteration: 139636
loss: 1.0444343090057373,grad_norm: 0.9999990178554243, iteration: 139637
loss: 1.0062072277069092,grad_norm: 0.9999990651929892, iteration: 139638
loss: 1.0263450145721436,grad_norm: 0.9999990634110445, iteration: 139639
loss: 1.0032734870910645,grad_norm: 0.9999999485471684, iteration: 139640
loss: 1.077207088470459,grad_norm: 0.9999999544754762, iteration: 139641
loss: 0.9841542840003967,grad_norm: 0.9999992402919811, iteration: 139642
loss: 1.0645947456359863,grad_norm: 0.9999992211628984, iteration: 139643
loss: 1.0416816473007202,grad_norm: 0.9999990973359015, iteration: 139644
loss: 1.0583815574645996,grad_norm: 0.9999991726455224, iteration: 139645
loss: 0.9825734496116638,grad_norm: 0.9658580142484174, iteration: 139646
loss: 0.9590159058570862,grad_norm: 0.9624190122480751, iteration: 139647
loss: 0.9933426976203918,grad_norm: 0.819565430183627, iteration: 139648
loss: 1.0361180305480957,grad_norm: 0.9999989541759315, iteration: 139649
loss: 0.9926257729530334,grad_norm: 0.9999992206468713, iteration: 139650
loss: 0.9850209355354309,grad_norm: 0.9999992756100575, iteration: 139651
loss: 0.9983327388763428,grad_norm: 0.9536881251473578, iteration: 139652
loss: 0.9916973114013672,grad_norm: 0.9833443216097066, iteration: 139653
loss: 1.0419193506240845,grad_norm: 0.9999993907529052, iteration: 139654
loss: 1.0082008838653564,grad_norm: 0.9999995317051722, iteration: 139655
loss: 1.0080698728561401,grad_norm: 0.9999992304114036, iteration: 139656
loss: 0.9578317403793335,grad_norm: 0.9999990219775247, iteration: 139657
loss: 1.1453388929367065,grad_norm: 0.9999995094556553, iteration: 139658
loss: 0.9839463829994202,grad_norm: 0.9957094031149123, iteration: 139659
loss: 0.9883332848548889,grad_norm: 0.9999991887700335, iteration: 139660
loss: 1.0058002471923828,grad_norm: 0.9526091824311098, iteration: 139661
loss: 1.0197665691375732,grad_norm: 0.8850901446233994, iteration: 139662
loss: 1.0297918319702148,grad_norm: 0.9999992011445418, iteration: 139663
loss: 1.0000561475753784,grad_norm: 0.8814089548160737, iteration: 139664
loss: 0.9866063594818115,grad_norm: 0.9999991396630945, iteration: 139665
loss: 1.027827262878418,grad_norm: 0.9999992341198736, iteration: 139666
loss: 0.993999719619751,grad_norm: 0.9999992598771705, iteration: 139667
loss: 1.0022032260894775,grad_norm: 0.9999989089890853, iteration: 139668
loss: 1.0296709537506104,grad_norm: 0.9999990386800383, iteration: 139669
loss: 1.0359514951705933,grad_norm: 0.9999992050532489, iteration: 139670
loss: 1.0237258672714233,grad_norm: 0.999999147062217, iteration: 139671
loss: 1.009497880935669,grad_norm: 0.9999996214226764, iteration: 139672
loss: 0.9913263320922852,grad_norm: 0.9999991880352492, iteration: 139673
loss: 1.020680546760559,grad_norm: 0.9801241464436756, iteration: 139674
loss: 1.005007028579712,grad_norm: 0.9999990697579658, iteration: 139675
loss: 0.9899972081184387,grad_norm: 0.9999992852915383, iteration: 139676
loss: 1.012082815170288,grad_norm: 0.8122697179506794, iteration: 139677
loss: 0.980292558670044,grad_norm: 0.955438958867964, iteration: 139678
loss: 0.9955308437347412,grad_norm: 0.9999991051032431, iteration: 139679
loss: 0.9855639934539795,grad_norm: 0.9999990980192163, iteration: 139680
loss: 0.9639734625816345,grad_norm: 0.9999992620042208, iteration: 139681
loss: 0.9887463450431824,grad_norm: 0.999999288160993, iteration: 139682
loss: 0.9949464201927185,grad_norm: 0.9999991464021704, iteration: 139683
loss: 0.9929085969924927,grad_norm: 0.9868628491875752, iteration: 139684
loss: 1.034011960029602,grad_norm: 0.9999994564327062, iteration: 139685
loss: 0.9837735295295715,grad_norm: 0.9999991928860263, iteration: 139686
loss: 0.9974551796913147,grad_norm: 0.9999991161702931, iteration: 139687
loss: 1.087665319442749,grad_norm: 0.9999993601197281, iteration: 139688
loss: 1.0258711576461792,grad_norm: 0.9999996215854947, iteration: 139689
loss: 0.996873676776886,grad_norm: 0.999999242723564, iteration: 139690
loss: 0.9781547784805298,grad_norm: 0.900626603936521, iteration: 139691
loss: 0.9796390533447266,grad_norm: 0.9999992605258858, iteration: 139692
loss: 0.9918726086616516,grad_norm: 0.9999991286045988, iteration: 139693
loss: 0.9466493129730225,grad_norm: 0.9999991102164529, iteration: 139694
loss: 1.0130535364151,grad_norm: 0.9999990907771826, iteration: 139695
loss: 0.9848342537879944,grad_norm: 0.9999993219519392, iteration: 139696
loss: 0.9596558809280396,grad_norm: 0.9999991338933554, iteration: 139697
loss: 0.9870425462722778,grad_norm: 0.9999993415075384, iteration: 139698
loss: 0.9984881281852722,grad_norm: 0.9999991670858162, iteration: 139699
loss: 1.0080511569976807,grad_norm: 0.899611950447358, iteration: 139700
loss: 1.0143033266067505,grad_norm: 0.951313560580579, iteration: 139701
loss: 1.015226125717163,grad_norm: 0.9999990475527561, iteration: 139702
loss: 0.9903342723846436,grad_norm: 0.9999992963873453, iteration: 139703
loss: 0.9950363636016846,grad_norm: 0.9999990858169371, iteration: 139704
loss: 0.9811919331550598,grad_norm: 0.9999989820792659, iteration: 139705
loss: 1.043350338935852,grad_norm: 0.9999991896571252, iteration: 139706
loss: 0.9979763031005859,grad_norm: 0.9999990562086174, iteration: 139707
loss: 1.0318220853805542,grad_norm: 0.999999192902413, iteration: 139708
loss: 0.9678391218185425,grad_norm: 0.9986724945437118, iteration: 139709
loss: 0.9724660515785217,grad_norm: 0.9985703379275674, iteration: 139710
loss: 0.9952855706214905,grad_norm: 0.9999989790701055, iteration: 139711
loss: 0.9915611743927002,grad_norm: 0.9999990944814261, iteration: 139712
loss: 0.9864301681518555,grad_norm: 0.7995029987097336, iteration: 139713
loss: 1.006100058555603,grad_norm: 0.9999991971628382, iteration: 139714
loss: 1.0362910032272339,grad_norm: 0.9999990993573088, iteration: 139715
loss: 1.0170915126800537,grad_norm: 0.999999120763108, iteration: 139716
loss: 1.067396640777588,grad_norm: 0.9999998354228535, iteration: 139717
loss: 0.9882932305335999,grad_norm: 0.9999990624116954, iteration: 139718
loss: 1.0258150100708008,grad_norm: 0.854573615067728, iteration: 139719
loss: 0.9925261735916138,grad_norm: 0.9999993826851499, iteration: 139720
loss: 0.9891170859336853,grad_norm: 0.9387434112829857, iteration: 139721
loss: 1.0370458364486694,grad_norm: 0.9999994576109743, iteration: 139722
loss: 0.9929598569869995,grad_norm: 0.9680815366111188, iteration: 139723
loss: 1.0304909944534302,grad_norm: 0.9999990627351645, iteration: 139724
loss: 0.9696512222290039,grad_norm: 0.9999992143793143, iteration: 139725
loss: 1.0057770013809204,grad_norm: 0.9999991931636943, iteration: 139726
loss: 1.0444200038909912,grad_norm: 0.9999997328340464, iteration: 139727
loss: 0.9816190004348755,grad_norm: 0.9999991883753194, iteration: 139728
loss: 1.0224744081497192,grad_norm: 0.9999999069638096, iteration: 139729
loss: 1.007797360420227,grad_norm: 0.9669486475264006, iteration: 139730
loss: 1.0322074890136719,grad_norm: 0.9487812996093159, iteration: 139731
loss: 1.014348030090332,grad_norm: 0.9999994839838777, iteration: 139732
loss: 1.1816383600234985,grad_norm: 0.9999999258142671, iteration: 139733
loss: 1.0372328758239746,grad_norm: 0.999999225542952, iteration: 139734
loss: 0.9929115772247314,grad_norm: 0.9999991683548954, iteration: 139735
loss: 1.0005061626434326,grad_norm: 0.9999990412125925, iteration: 139736
loss: 1.004042148590088,grad_norm: 0.8128841666487094, iteration: 139737
loss: 1.055092453956604,grad_norm: 0.999999345960911, iteration: 139738
loss: 0.9977468848228455,grad_norm: 0.9999991608148214, iteration: 139739
loss: 1.017878770828247,grad_norm: 0.9541439249857055, iteration: 139740
loss: 1.0306265354156494,grad_norm: 0.9999995914568339, iteration: 139741
loss: 0.9944571852684021,grad_norm: 0.9683458005076598, iteration: 139742
loss: 0.9977240562438965,grad_norm: 0.9611264665203952, iteration: 139743
loss: 1.0188316106796265,grad_norm: 0.9465292678412532, iteration: 139744
loss: 1.0046360492706299,grad_norm: 0.9999991039683318, iteration: 139745
loss: 0.9746353030204773,grad_norm: 0.8261385662360405, iteration: 139746
loss: 0.9721677899360657,grad_norm: 0.9999991210796795, iteration: 139747
loss: 1.0263214111328125,grad_norm: 0.9999990593021152, iteration: 139748
loss: 1.010201334953308,grad_norm: 0.9429253034797902, iteration: 139749
loss: 0.9802677035331726,grad_norm: 0.9734490702398066, iteration: 139750
loss: 0.9490591287612915,grad_norm: 0.9999990803155784, iteration: 139751
loss: 1.0166233777999878,grad_norm: 0.9999989629773617, iteration: 139752
loss: 0.9917413592338562,grad_norm: 0.8997908200548329, iteration: 139753
loss: 1.046661138534546,grad_norm: 0.9999990361939608, iteration: 139754
loss: 0.9799289703369141,grad_norm: 0.9263400951500903, iteration: 139755
loss: 0.9968036413192749,grad_norm: 0.9871979107485753, iteration: 139756
loss: 1.0071041584014893,grad_norm: 1.000000066978137, iteration: 139757
loss: 1.0512402057647705,grad_norm: 0.9999993228029297, iteration: 139758
loss: 1.0215389728546143,grad_norm: 0.9999995006937177, iteration: 139759
loss: 1.024639368057251,grad_norm: 0.999999081759721, iteration: 139760
loss: 0.9640927314758301,grad_norm: 0.999998998374947, iteration: 139761
loss: 0.9461142420768738,grad_norm: 0.9999991389989141, iteration: 139762
loss: 1.0115382671356201,grad_norm: 0.9999991799366889, iteration: 139763
loss: 1.0684527158737183,grad_norm: 0.8737770643967645, iteration: 139764
loss: 1.0360604524612427,grad_norm: 0.956916575763955, iteration: 139765
loss: 0.9871629476547241,grad_norm: 0.9416062845809371, iteration: 139766
loss: 0.9981810450553894,grad_norm: 0.9999991197492298, iteration: 139767
loss: 1.0155445337295532,grad_norm: 0.9999992672965403, iteration: 139768
loss: 1.0421055555343628,grad_norm: 0.9999991081218486, iteration: 139769
loss: 0.9323244094848633,grad_norm: 0.9529637463915487, iteration: 139770
loss: 0.986563503742218,grad_norm: 0.9999991283241048, iteration: 139771
loss: 0.9879655838012695,grad_norm: 0.8441119034795225, iteration: 139772
loss: 1.0419394969940186,grad_norm: 0.9999996555148319, iteration: 139773
loss: 1.0269701480865479,grad_norm: 0.9999990364968154, iteration: 139774
loss: 1.0340728759765625,grad_norm: 0.9467566154786975, iteration: 139775
loss: 1.0142107009887695,grad_norm: 0.9999991383221083, iteration: 139776
loss: 1.0585511922836304,grad_norm: 0.9999998012638257, iteration: 139777
loss: 1.0230106115341187,grad_norm: 0.9999993220505895, iteration: 139778
loss: 1.0114543437957764,grad_norm: 0.9999992376934356, iteration: 139779
loss: 1.0043106079101562,grad_norm: 0.9999989634954608, iteration: 139780
loss: 1.0167855024337769,grad_norm: 0.9999990523612577, iteration: 139781
loss: 1.008178949356079,grad_norm: 0.9999992078866993, iteration: 139782
loss: 1.067971110343933,grad_norm: 0.9999995872389184, iteration: 139783
loss: 0.9952975511550903,grad_norm: 0.983966671422167, iteration: 139784
loss: 1.0388821363449097,grad_norm: 0.9999991566104236, iteration: 139785
loss: 1.0067118406295776,grad_norm: 0.8341693219754722, iteration: 139786
loss: 1.0772082805633545,grad_norm: 0.9999990151327217, iteration: 139787
loss: 1.0141218900680542,grad_norm: 0.9999991737448954, iteration: 139788
loss: 1.0299874544143677,grad_norm: 0.9999990221779826, iteration: 139789
loss: 1.0286473035812378,grad_norm: 0.9999993775684342, iteration: 139790
loss: 1.0204020738601685,grad_norm: 0.9999992721428967, iteration: 139791
loss: 1.0682567358016968,grad_norm: 0.9999995278802003, iteration: 139792
loss: 1.0217556953430176,grad_norm: 0.9999990044696473, iteration: 139793
loss: 1.032797932624817,grad_norm: 0.9999994557102264, iteration: 139794
loss: 1.0033115148544312,grad_norm: 0.9999991777412978, iteration: 139795
loss: 0.9583683609962463,grad_norm: 0.9999992524661606, iteration: 139796
loss: 1.0520596504211426,grad_norm: 0.9999994790653675, iteration: 139797
loss: 1.0515156984329224,grad_norm: 0.9999991328330365, iteration: 139798
loss: 1.090026617050171,grad_norm: 0.9999992646515999, iteration: 139799
loss: 0.9785884022712708,grad_norm: 0.9999992612283097, iteration: 139800
loss: 1.0015650987625122,grad_norm: 0.9999991145129473, iteration: 139801
loss: 1.0326735973358154,grad_norm: 0.9999991716366391, iteration: 139802
loss: 1.0083165168762207,grad_norm: 0.9525242788323197, iteration: 139803
loss: 0.9687317609786987,grad_norm: 0.9999991618195027, iteration: 139804
loss: 1.0283191204071045,grad_norm: 0.9999991211094261, iteration: 139805
loss: 0.9773197174072266,grad_norm: 0.9999999521364816, iteration: 139806
loss: 1.0321581363677979,grad_norm: 0.8586931665886186, iteration: 139807
loss: 1.0147624015808105,grad_norm: 0.9707465810622263, iteration: 139808
loss: 1.008773684501648,grad_norm: 0.9999991009282754, iteration: 139809
loss: 1.0124086141586304,grad_norm: 0.9999989345340532, iteration: 139810
loss: 0.9967511296272278,grad_norm: 0.9999991426802173, iteration: 139811
loss: 1.0605030059814453,grad_norm: 0.9999991353898318, iteration: 139812
loss: 1.0500484704971313,grad_norm: 0.9999990577093143, iteration: 139813
loss: 1.0579084157943726,grad_norm: 0.9999997955051916, iteration: 139814
loss: 0.9725596904754639,grad_norm: 0.988545906959496, iteration: 139815
loss: 1.009296178817749,grad_norm: 0.9508855048125672, iteration: 139816
loss: 1.0054740905761719,grad_norm: 0.999999144264885, iteration: 139817
loss: 0.9629982709884644,grad_norm: 0.9112106450617807, iteration: 139818
loss: 0.9789718389511108,grad_norm: 0.9999991525165713, iteration: 139819
loss: 1.0038177967071533,grad_norm: 0.8527636275737357, iteration: 139820
loss: 0.9982411861419678,grad_norm: 0.9999991623147502, iteration: 139821
loss: 1.0177247524261475,grad_norm: 0.9999991198796154, iteration: 139822
loss: 0.987473726272583,grad_norm: 0.9999989691159559, iteration: 139823
loss: 0.9886654019355774,grad_norm: 0.9999997557816006, iteration: 139824
loss: 1.0207090377807617,grad_norm: 0.999999427031976, iteration: 139825
loss: 1.0512577295303345,grad_norm: 0.999999487470376, iteration: 139826
loss: 0.9778996706008911,grad_norm: 0.9707110055968337, iteration: 139827
loss: 1.0231364965438843,grad_norm: 0.9045917087236165, iteration: 139828
loss: 1.0151761770248413,grad_norm: 0.9999993017876849, iteration: 139829
loss: 0.997685432434082,grad_norm: 0.9966541073692867, iteration: 139830
loss: 1.0037089586257935,grad_norm: 0.8483933228767213, iteration: 139831
loss: 1.0049175024032593,grad_norm: 0.9999989547588967, iteration: 139832
loss: 1.0662500858306885,grad_norm: 0.999999450793884, iteration: 139833
loss: 1.089887261390686,grad_norm: 0.9999999841970564, iteration: 139834
loss: 0.986054003238678,grad_norm: 0.9999990548352479, iteration: 139835
loss: 1.011423945426941,grad_norm: 0.9999991572127357, iteration: 139836
loss: 0.9750155806541443,grad_norm: 0.9999990850979869, iteration: 139837
loss: 1.0094854831695557,grad_norm: 0.9999989658298493, iteration: 139838
loss: 1.0170197486877441,grad_norm: 0.9999998777218299, iteration: 139839
loss: 1.006133794784546,grad_norm: 0.9999992845619419, iteration: 139840
loss: 1.008657693862915,grad_norm: 0.8441364444280134, iteration: 139841
loss: 1.0069677829742432,grad_norm: 0.9752978946793294, iteration: 139842
loss: 1.0075147151947021,grad_norm: 0.9899557561831275, iteration: 139843
loss: 1.0089242458343506,grad_norm: 0.9999991893536305, iteration: 139844
loss: 0.9900883436203003,grad_norm: 0.9999990821608051, iteration: 139845
loss: 0.960544228553772,grad_norm: 0.9737065326853594, iteration: 139846
loss: 1.0358127355575562,grad_norm: 0.9999999719177178, iteration: 139847
loss: 1.032546877861023,grad_norm: 0.9999992283933987, iteration: 139848
loss: 1.0008286237716675,grad_norm: 0.999999107574103, iteration: 139849
loss: 1.0231306552886963,grad_norm: 0.9999992785272617, iteration: 139850
loss: 0.9955818057060242,grad_norm: 0.9999992676007682, iteration: 139851
loss: 1.0611952543258667,grad_norm: 0.9999993378825398, iteration: 139852
loss: 1.007506012916565,grad_norm: 0.9999991744898831, iteration: 139853
loss: 1.007583737373352,grad_norm: 0.9217248802686515, iteration: 139854
loss: 1.0138603448867798,grad_norm: 0.9999989948441153, iteration: 139855
loss: 1.0210338830947876,grad_norm: 0.9999991883082856, iteration: 139856
loss: 0.9705610871315002,grad_norm: 0.9999990003178408, iteration: 139857
loss: 0.9602323174476624,grad_norm: 0.9999992101396983, iteration: 139858
loss: 1.0181684494018555,grad_norm: 0.9999989590625478, iteration: 139859
loss: 1.0155795812606812,grad_norm: 0.9172759183101763, iteration: 139860
loss: 1.0153225660324097,grad_norm: 0.9999991418786415, iteration: 139861
loss: 0.9958459138870239,grad_norm: 0.9710707663608026, iteration: 139862
loss: 1.0615358352661133,grad_norm: 0.9999990783226242, iteration: 139863
loss: 1.0236915349960327,grad_norm: 0.9999990354714456, iteration: 139864
loss: 0.9940395951271057,grad_norm: 0.9999990211354146, iteration: 139865
loss: 1.0294771194458008,grad_norm: 0.9375341002759917, iteration: 139866
loss: 1.045779824256897,grad_norm: 0.9999993378742185, iteration: 139867
loss: 1.001172423362732,grad_norm: 0.9365876275683847, iteration: 139868
loss: 0.9733594059944153,grad_norm: 0.8627531040552024, iteration: 139869
loss: 0.9830902218818665,grad_norm: 0.9999993197095529, iteration: 139870
loss: 1.0056895017623901,grad_norm: 0.9999991446441847, iteration: 139871
loss: 0.9852327108383179,grad_norm: 0.9104869326481282, iteration: 139872
loss: 1.0023924112319946,grad_norm: 0.9999990785176958, iteration: 139873
loss: 1.0367828607559204,grad_norm: 0.9241389915212422, iteration: 139874
loss: 0.9933730363845825,grad_norm: 0.9999994172829787, iteration: 139875
loss: 1.364646553993225,grad_norm: 0.9999998606873955, iteration: 139876
loss: 1.006164312362671,grad_norm: 0.9999993481585178, iteration: 139877
loss: 1.0160380601882935,grad_norm: 0.8706592484271019, iteration: 139878
loss: 0.9782209992408752,grad_norm: 0.9546408778423564, iteration: 139879
loss: 0.9882060289382935,grad_norm: 0.9999991681244921, iteration: 139880
loss: 1.022436261177063,grad_norm: 0.9999991962922168, iteration: 139881
loss: 0.985027015209198,grad_norm: 0.9999990939850592, iteration: 139882
loss: 1.0037261247634888,grad_norm: 0.9616783545991903, iteration: 139883
loss: 0.9717013239860535,grad_norm: 0.9999992179075419, iteration: 139884
loss: 1.2087498903274536,grad_norm: 0.9999999743452754, iteration: 139885
loss: 1.016802191734314,grad_norm: 0.9999989035903094, iteration: 139886
loss: 1.018162727355957,grad_norm: 0.9750300937259873, iteration: 139887
loss: 1.0177249908447266,grad_norm: 0.9999992710458722, iteration: 139888
loss: 1.013794183731079,grad_norm: 0.9999992877760338, iteration: 139889
loss: 1.009827971458435,grad_norm: 0.9999996334988537, iteration: 139890
loss: 1.016086220741272,grad_norm: 0.9999991402207379, iteration: 139891
loss: 1.0002855062484741,grad_norm: 0.9999991436394984, iteration: 139892
loss: 0.9835606217384338,grad_norm: 0.9734238393165412, iteration: 139893
loss: 0.9885120987892151,grad_norm: 0.9999989154989657, iteration: 139894
loss: 0.971203625202179,grad_norm: 0.9999990772452056, iteration: 139895
loss: 0.9709181785583496,grad_norm: 0.9999990068271967, iteration: 139896
loss: 0.9965630173683167,grad_norm: 0.8721941163839636, iteration: 139897
loss: 0.9983362555503845,grad_norm: 0.9999989978964317, iteration: 139898
loss: 0.9786984920501709,grad_norm: 0.9999994838938588, iteration: 139899
loss: 1.0171587467193604,grad_norm: 0.9567939291067182, iteration: 139900
loss: 1.0225193500518799,grad_norm: 0.9503872760960324, iteration: 139901
loss: 0.9895166754722595,grad_norm: 0.9101722168013519, iteration: 139902
loss: 0.976514995098114,grad_norm: 0.9905478232576557, iteration: 139903
loss: 1.0210360288619995,grad_norm: 0.9330849651130114, iteration: 139904
loss: 0.9880070090293884,grad_norm: 0.9999992473722484, iteration: 139905
loss: 1.0021381378173828,grad_norm: 0.9999993345707969, iteration: 139906
loss: 0.98111492395401,grad_norm: 0.9999991552665738, iteration: 139907
loss: 1.010446310043335,grad_norm: 0.9728759820497511, iteration: 139908
loss: 1.0273975133895874,grad_norm: 0.9999990624387565, iteration: 139909
loss: 0.9972148537635803,grad_norm: 0.9999991508497216, iteration: 139910
loss: 1.04578697681427,grad_norm: 0.9999991646448816, iteration: 139911
loss: 0.9986379742622375,grad_norm: 0.9929257151330618, iteration: 139912
loss: 1.0045897960662842,grad_norm: 0.9999993689549803, iteration: 139913
loss: 0.9982631802558899,grad_norm: 0.9227869807024967, iteration: 139914
loss: 1.0550761222839355,grad_norm: 0.9999996636280409, iteration: 139915
loss: 1.0078426599502563,grad_norm: 0.9999990725497693, iteration: 139916
loss: 1.0011833906173706,grad_norm: 0.9999990133377887, iteration: 139917
loss: 1.008896827697754,grad_norm: 0.7913769797701767, iteration: 139918
loss: 1.0667245388031006,grad_norm: 0.9999999035419063, iteration: 139919
loss: 1.02910578250885,grad_norm: 0.9999992392075299, iteration: 139920
loss: 1.003880262374878,grad_norm: 0.9999997407718267, iteration: 139921
loss: 1.02781081199646,grad_norm: 0.9028860404093663, iteration: 139922
loss: 1.0236414670944214,grad_norm: 0.9353043484608939, iteration: 139923
loss: 0.974489152431488,grad_norm: 0.9999994718243491, iteration: 139924
loss: 0.9975202679634094,grad_norm: 0.9999990291134867, iteration: 139925
loss: 0.9694360494613647,grad_norm: 0.9999992230553, iteration: 139926
loss: 0.9759334325790405,grad_norm: 0.9999989714649947, iteration: 139927
loss: 0.9802608489990234,grad_norm: 0.999999224337439, iteration: 139928
loss: 1.0178625583648682,grad_norm: 0.8338647313698148, iteration: 139929
loss: 0.9810397028923035,grad_norm: 0.9999990929076321, iteration: 139930
loss: 0.993907630443573,grad_norm: 0.9999991288121759, iteration: 139931
loss: 1.044356107711792,grad_norm: 0.9999997063138478, iteration: 139932
loss: 0.9945548176765442,grad_norm: 0.9999990860081999, iteration: 139933
loss: 0.988235354423523,grad_norm: 0.9999991192396496, iteration: 139934
loss: 0.9617171883583069,grad_norm: 0.9046851661289406, iteration: 139935
loss: 0.997347891330719,grad_norm: 0.9765166547430685, iteration: 139936
loss: 1.0039879083633423,grad_norm: 0.9999994205155649, iteration: 139937
loss: 1.0133318901062012,grad_norm: 0.9999991619562213, iteration: 139938
loss: 1.042551875114441,grad_norm: 0.9999998133051045, iteration: 139939
loss: 1.016802430152893,grad_norm: 0.9999991463639998, iteration: 139940
loss: 1.0303311347961426,grad_norm: 0.9999992673708503, iteration: 139941
loss: 1.0353130102157593,grad_norm: 0.9999992176099967, iteration: 139942
loss: 1.0681718587875366,grad_norm: 0.9999992048083798, iteration: 139943
loss: 1.0932857990264893,grad_norm: 0.9999993786179715, iteration: 139944
loss: 1.039744257926941,grad_norm: 0.99999930311846, iteration: 139945
loss: 0.9884827733039856,grad_norm: 0.9999990482922934, iteration: 139946
loss: 1.001070499420166,grad_norm: 0.9685642682449088, iteration: 139947
loss: 1.000414252281189,grad_norm: 0.9905792763030962, iteration: 139948
loss: 1.0269930362701416,grad_norm: 0.9999998946533578, iteration: 139949
loss: 1.0299614667892456,grad_norm: 0.9999996092788875, iteration: 139950
loss: 0.9937049150466919,grad_norm: 0.9430396890865065, iteration: 139951
loss: 1.0075589418411255,grad_norm: 0.9999992374146941, iteration: 139952
loss: 1.1116163730621338,grad_norm: 0.9596526448483482, iteration: 139953
loss: 1.0786991119384766,grad_norm: 0.9999991443944252, iteration: 139954
loss: 1.0192346572875977,grad_norm: 0.9175002414764635, iteration: 139955
loss: 1.021294116973877,grad_norm: 0.922288569319594, iteration: 139956
loss: 0.9415515661239624,grad_norm: 0.9898885003367828, iteration: 139957
loss: 1.1057138442993164,grad_norm: 0.9999995279122251, iteration: 139958
loss: 1.008735179901123,grad_norm: 0.9999991451126776, iteration: 139959
loss: 1.1036064624786377,grad_norm: 0.9999998689780427, iteration: 139960
loss: 1.0022238492965698,grad_norm: 0.9549255668992338, iteration: 139961
loss: 0.9890145659446716,grad_norm: 0.9999992741489327, iteration: 139962
loss: 0.9824137091636658,grad_norm: 0.9455169603626954, iteration: 139963
loss: 1.2659553289413452,grad_norm: 0.999999814033371, iteration: 139964
loss: 1.027923345565796,grad_norm: 0.9999992763201955, iteration: 139965
loss: 1.2958650588989258,grad_norm: 0.9999994472836019, iteration: 139966
loss: 1.0196237564086914,grad_norm: 0.9999996554664368, iteration: 139967
loss: 0.9927688837051392,grad_norm: 0.9999991265581883, iteration: 139968
loss: 1.3443118333816528,grad_norm: 0.9999997168757844, iteration: 139969
loss: 1.1082321405410767,grad_norm: 0.999999411251029, iteration: 139970
loss: 1.3207895755767822,grad_norm: 0.9999997189083406, iteration: 139971
loss: 1.177395224571228,grad_norm: 0.9999992115986783, iteration: 139972
loss: 1.2674304246902466,grad_norm: 0.9999998977419888, iteration: 139973
loss: 1.2760419845581055,grad_norm: 0.9999998909202437, iteration: 139974
loss: 1.255650281906128,grad_norm: 0.9999993990149079, iteration: 139975
loss: 1.3626006841659546,grad_norm: 0.999999926229651, iteration: 139976
loss: 1.074277400970459,grad_norm: 0.9999994990621726, iteration: 139977
loss: 1.014430284500122,grad_norm: 0.9999992448820993, iteration: 139978
loss: 1.0107614994049072,grad_norm: 0.9999990455878406, iteration: 139979
loss: 1.1100244522094727,grad_norm: 0.9999999243012185, iteration: 139980
loss: 1.0291932821273804,grad_norm: 0.999999222174833, iteration: 139981
loss: 1.0048072338104248,grad_norm: 0.982782189921574, iteration: 139982
loss: 1.0246394872665405,grad_norm: 0.9999994959187135, iteration: 139983
loss: 0.9927005171775818,grad_norm: 0.9893413383716493, iteration: 139984
loss: 1.0382981300354004,grad_norm: 0.9999993948730789, iteration: 139985
loss: 0.9572506546974182,grad_norm: 0.9999995826624359, iteration: 139986
loss: 1.1398119926452637,grad_norm: 0.9999998696328748, iteration: 139987
loss: 1.0486317873001099,grad_norm: 0.9999991579497425, iteration: 139988
loss: 1.0553112030029297,grad_norm: 0.9254354484805081, iteration: 139989
loss: 1.0123591423034668,grad_norm: 0.9999992254479925, iteration: 139990
loss: 1.005731463432312,grad_norm: 0.9999991713660202, iteration: 139991
loss: 0.9994833469390869,grad_norm: 0.9999991675911836, iteration: 139992
loss: 1.0443141460418701,grad_norm: 0.9999990207877587, iteration: 139993
loss: 1.0318416357040405,grad_norm: 0.9663885199801483, iteration: 139994
loss: 1.0887371301651,grad_norm: 0.9618680589163222, iteration: 139995
loss: 0.9897024631500244,grad_norm: 0.999999690460647, iteration: 139996
loss: 0.9678550362586975,grad_norm: 0.975853911581366, iteration: 139997
loss: 1.0695079565048218,grad_norm: 0.9999998960833629, iteration: 139998
loss: 0.9413252472877502,grad_norm: 0.9999991877267433, iteration: 139999
loss: 1.0697503089904785,grad_norm: 0.999999687198826, iteration: 140000
Evaluating at step 140000
{'val': 0.9976154062896967, 'test': 2.5584179258194215}
loss: 1.0744960308074951,grad_norm: 0.9999995689652704, iteration: 140001
loss: 0.9725672006607056,grad_norm: 0.9680419445596238, iteration: 140002
loss: 1.015096664428711,grad_norm: 0.9847631040947812, iteration: 140003
loss: 1.0888442993164062,grad_norm: 0.999999644422409, iteration: 140004
loss: 0.9832736849784851,grad_norm: 0.9585108376792396, iteration: 140005
loss: 1.048555612564087,grad_norm: 0.9999997490500888, iteration: 140006
loss: 1.0080617666244507,grad_norm: 0.9183667513695091, iteration: 140007
loss: 0.9968485832214355,grad_norm: 0.9999994097808805, iteration: 140008
loss: 1.0335451364517212,grad_norm: 0.9999990452273722, iteration: 140009
loss: 0.9848020076751709,grad_norm: 0.9999990938463982, iteration: 140010
loss: 0.9677536487579346,grad_norm: 0.9813247695809694, iteration: 140011
loss: 1.0020456314086914,grad_norm: 0.9999990717720965, iteration: 140012
loss: 0.9940007925033569,grad_norm: 0.8765749317966088, iteration: 140013
loss: 1.0205023288726807,grad_norm: 0.9999992611085665, iteration: 140014
loss: 1.0011670589447021,grad_norm: 0.999999114666698, iteration: 140015
loss: 0.9746605753898621,grad_norm: 0.884584880033139, iteration: 140016
loss: 1.206391453742981,grad_norm: 0.9999992241775224, iteration: 140017
loss: 0.9833904504776001,grad_norm: 0.9999992097755678, iteration: 140018
loss: 0.981908917427063,grad_norm: 0.9632642716960403, iteration: 140019
loss: 1.0665005445480347,grad_norm: 0.9999994947245305, iteration: 140020
loss: 0.9866007566452026,grad_norm: 0.951154303600221, iteration: 140021
loss: 0.9750032424926758,grad_norm: 0.9999989784125776, iteration: 140022
loss: 1.0692362785339355,grad_norm: 0.9999992312667192, iteration: 140023
loss: 1.0062000751495361,grad_norm: 0.8883067082972521, iteration: 140024
loss: 0.9966505169868469,grad_norm: 0.999999271225194, iteration: 140025
loss: 0.9875995516777039,grad_norm: 0.9698865023885505, iteration: 140026
loss: 0.9933778643608093,grad_norm: 0.9948553203082638, iteration: 140027
loss: 1.0113059282302856,grad_norm: 0.9999993546616842, iteration: 140028
loss: 1.017167568206787,grad_norm: 0.9999990664104069, iteration: 140029
loss: 0.9880661964416504,grad_norm: 0.9999991101016911, iteration: 140030
loss: 0.9752670526504517,grad_norm: 0.9999990746497291, iteration: 140031
loss: 0.9726742506027222,grad_norm: 0.9999991491735525, iteration: 140032
loss: 1.014184832572937,grad_norm: 0.9417860497919797, iteration: 140033
loss: 1.0137457847595215,grad_norm: 0.9578922997532819, iteration: 140034
loss: 1.0557478666305542,grad_norm: 0.9999990605610132, iteration: 140035
loss: 1.0212360620498657,grad_norm: 0.9999990609375285, iteration: 140036
loss: 0.9831829071044922,grad_norm: 0.8556042034358964, iteration: 140037
loss: 1.0599442720413208,grad_norm: 0.9999994781899559, iteration: 140038
loss: 1.0303951501846313,grad_norm: 0.9999994597784228, iteration: 140039
loss: 0.9775996804237366,grad_norm: 0.9602580498995954, iteration: 140040
loss: 1.0136257410049438,grad_norm: 0.9692809709386941, iteration: 140041
loss: 1.1123915910720825,grad_norm: 0.999999983852466, iteration: 140042
loss: 1.0015031099319458,grad_norm: 0.7733609859837021, iteration: 140043
loss: 0.9854476451873779,grad_norm: 0.8325736546236867, iteration: 140044
loss: 1.006227731704712,grad_norm: 0.9999990777872502, iteration: 140045
loss: 0.9819238781929016,grad_norm: 0.9999991420284149, iteration: 140046
loss: 1.044631004333496,grad_norm: 0.9999992764059773, iteration: 140047
loss: 1.013278603553772,grad_norm: 0.8721693244947804, iteration: 140048
loss: 0.9575175642967224,grad_norm: 0.9999989975992859, iteration: 140049
loss: 1.0124616622924805,grad_norm: 0.9999991427586169, iteration: 140050
loss: 1.0600041151046753,grad_norm: 0.9999996857926402, iteration: 140051
loss: 1.0158965587615967,grad_norm: 0.9999991629076737, iteration: 140052
loss: 1.008771300315857,grad_norm: 0.9091635371012601, iteration: 140053
loss: 0.96745365858078,grad_norm: 0.9351524684454507, iteration: 140054
loss: 1.0212271213531494,grad_norm: 0.9999997187250165, iteration: 140055
loss: 1.015594244003296,grad_norm: 0.8943386845094448, iteration: 140056
loss: 0.9915674924850464,grad_norm: 0.9999990733424977, iteration: 140057
loss: 1.0536439418792725,grad_norm: 0.9999995750633495, iteration: 140058
loss: 0.969373881816864,grad_norm: 0.9999991671811029, iteration: 140059
loss: 1.0469120740890503,grad_norm: 0.9999996866939556, iteration: 140060
loss: 1.2055766582489014,grad_norm: 0.9999995449643478, iteration: 140061
loss: 1.0062795877456665,grad_norm: 0.9999991352130155, iteration: 140062
loss: 1.0045220851898193,grad_norm: 0.9999995494473408, iteration: 140063
loss: 0.9861791729927063,grad_norm: 0.9999992285056324, iteration: 140064
loss: 0.9896242022514343,grad_norm: 0.9999992146793769, iteration: 140065
loss: 0.9424331188201904,grad_norm: 0.999999248751664, iteration: 140066
loss: 1.0632706880569458,grad_norm: 0.9534909981834262, iteration: 140067
loss: 0.9866201281547546,grad_norm: 0.999999090189274, iteration: 140068
loss: 0.9998694658279419,grad_norm: 0.8748432436105164, iteration: 140069
loss: 1.0374855995178223,grad_norm: 0.9999993212858533, iteration: 140070
loss: 1.0264968872070312,grad_norm: 0.9999999087088943, iteration: 140071
loss: 1.0355408191680908,grad_norm: 0.9999991264640004, iteration: 140072
loss: 1.0422923564910889,grad_norm: 0.9999993603137756, iteration: 140073
loss: 0.9836709499359131,grad_norm: 0.999999390178769, iteration: 140074
loss: 1.0354915857315063,grad_norm: 0.99999919029122, iteration: 140075
loss: 0.9913526177406311,grad_norm: 0.9424247693940707, iteration: 140076
loss: 0.9730961322784424,grad_norm: 0.9999990888708545, iteration: 140077
loss: 1.008300542831421,grad_norm: 0.9999991620405733, iteration: 140078
loss: 1.0384563207626343,grad_norm: 1.000000015395858, iteration: 140079
loss: 0.9934346079826355,grad_norm: 0.999999081586126, iteration: 140080
loss: 1.0049269199371338,grad_norm: 0.9999992256492005, iteration: 140081
loss: 1.0619746446609497,grad_norm: 0.9999994108245125, iteration: 140082
loss: 1.0160993337631226,grad_norm: 0.9999992471570577, iteration: 140083
loss: 1.023895025253296,grad_norm: 0.9999990925972558, iteration: 140084
loss: 1.0017684698104858,grad_norm: 0.8549261524263101, iteration: 140085
loss: 0.960796594619751,grad_norm: 0.9999995710616264, iteration: 140086
loss: 0.9977708458900452,grad_norm: 0.9715575745575851, iteration: 140087
loss: 0.9630793333053589,grad_norm: 0.9999991534706661, iteration: 140088
loss: 1.0575886964797974,grad_norm: 0.9999998570103213, iteration: 140089
loss: 0.9795682430267334,grad_norm: 0.9499704823098122, iteration: 140090
loss: 0.9894911646842957,grad_norm: 0.9999992105715835, iteration: 140091
loss: 1.0397363901138306,grad_norm: 0.9999998220017049, iteration: 140092
loss: 0.9979928731918335,grad_norm: 0.9999992088823597, iteration: 140093
loss: 1.0328818559646606,grad_norm: 0.9999998503124375, iteration: 140094
loss: 1.0152188539505005,grad_norm: 0.9999997309751835, iteration: 140095
loss: 1.0181896686553955,grad_norm: 0.9999997348010728, iteration: 140096
loss: 1.011005163192749,grad_norm: 0.9999991339186622, iteration: 140097
loss: 1.0324195623397827,grad_norm: 0.9453540398085882, iteration: 140098
loss: 1.057044506072998,grad_norm: 0.9999996927513479, iteration: 140099
loss: 1.0344716310501099,grad_norm: 0.9999992191789357, iteration: 140100
loss: 1.014591932296753,grad_norm: 0.9999991702073233, iteration: 140101
loss: 0.9696241617202759,grad_norm: 0.9223450660396989, iteration: 140102
loss: 0.9822640419006348,grad_norm: 0.999999808108388, iteration: 140103
loss: 1.029897928237915,grad_norm: 0.9999991635197556, iteration: 140104
loss: 0.9948294758796692,grad_norm: 0.9629540571183116, iteration: 140105
loss: 0.9909061789512634,grad_norm: 0.9999991837056189, iteration: 140106
loss: 1.0115898847579956,grad_norm: 0.8544077234336804, iteration: 140107
loss: 1.0385265350341797,grad_norm: 0.9999992713068142, iteration: 140108
loss: 0.9904888868331909,grad_norm: 0.8912837527786407, iteration: 140109
loss: 1.0363670587539673,grad_norm: 0.9502623361298407, iteration: 140110
loss: 0.9884374737739563,grad_norm: 0.9999991699239857, iteration: 140111
loss: 1.0037579536437988,grad_norm: 0.9788016574907863, iteration: 140112
loss: 0.9810388088226318,grad_norm: 0.999999276596641, iteration: 140113
loss: 1.0235434770584106,grad_norm: 0.8820745952166885, iteration: 140114
loss: 1.087458610534668,grad_norm: 0.9999993058717925, iteration: 140115
loss: 1.020572304725647,grad_norm: 0.9999993547538948, iteration: 140116
loss: 0.9632152915000916,grad_norm: 0.9603338874071896, iteration: 140117
loss: 1.003534197807312,grad_norm: 0.9999990868579857, iteration: 140118
loss: 1.1122360229492188,grad_norm: 0.9999996062404909, iteration: 140119
loss: 1.0168818235397339,grad_norm: 0.9999991464513287, iteration: 140120
loss: 1.007604956626892,grad_norm: 0.9999991181568166, iteration: 140121
loss: 1.043976068496704,grad_norm: 0.999999154484869, iteration: 140122
loss: 0.9774640798568726,grad_norm: 0.9999991652583288, iteration: 140123
loss: 1.056287169456482,grad_norm: 0.9999998567234857, iteration: 140124
loss: 1.0056467056274414,grad_norm: 0.9999990787302769, iteration: 140125
loss: 0.9622001051902771,grad_norm: 0.9999992981457714, iteration: 140126
loss: 0.9763807058334351,grad_norm: 0.9999992211701692, iteration: 140127
loss: 1.0016765594482422,grad_norm: 0.9233949384038894, iteration: 140128
loss: 0.9928351044654846,grad_norm: 0.9999993110387561, iteration: 140129
loss: 1.0510374307632446,grad_norm: 0.9224318023133444, iteration: 140130
loss: 0.9709456562995911,grad_norm: 0.9219558845166582, iteration: 140131
loss: 1.0074946880340576,grad_norm: 0.9999993903932707, iteration: 140132
loss: 0.9898268580436707,grad_norm: 0.8171950866862592, iteration: 140133
loss: 1.030753254890442,grad_norm: 0.9999992805869184, iteration: 140134
loss: 0.9948469400405884,grad_norm: 0.9999991474160979, iteration: 140135
loss: 1.0145888328552246,grad_norm: 0.9999991685111658, iteration: 140136
loss: 0.9906240701675415,grad_norm: 0.9037490439119023, iteration: 140137
loss: 1.0333644151687622,grad_norm: 0.9999993212184499, iteration: 140138
loss: 0.980216383934021,grad_norm: 0.9999996353051079, iteration: 140139
loss: 1.0213372707366943,grad_norm: 0.9999991341393927, iteration: 140140
loss: 0.9960795044898987,grad_norm: 0.9178896407551569, iteration: 140141
loss: 0.9962978363037109,grad_norm: 0.9999990728302558, iteration: 140142
loss: 1.0583341121673584,grad_norm: 0.9999995222327428, iteration: 140143
loss: 1.018755555152893,grad_norm: 0.9999992865897819, iteration: 140144
loss: 1.10923433303833,grad_norm: 0.999999574518585, iteration: 140145
loss: 1.1097232103347778,grad_norm: 0.9999992954700776, iteration: 140146
loss: 1.0083709955215454,grad_norm: 0.8384671175826155, iteration: 140147
loss: 1.0036289691925049,grad_norm: 0.9713067656835266, iteration: 140148
loss: 1.0178000926971436,grad_norm: 0.999999117262018, iteration: 140149
loss: 0.973294734954834,grad_norm: 0.9999992140559572, iteration: 140150
loss: 0.9803250432014465,grad_norm: 0.9999991986332404, iteration: 140151
loss: 1.0152312517166138,grad_norm: 0.9999991678170782, iteration: 140152
loss: 1.0058685541152954,grad_norm: 0.8904029385797301, iteration: 140153
loss: 1.0162475109100342,grad_norm: 0.9834793800770233, iteration: 140154
loss: 1.0149754285812378,grad_norm: 0.9999991796698102, iteration: 140155
loss: 1.0060136318206787,grad_norm: 0.9999989755913345, iteration: 140156
loss: 1.0184673070907593,grad_norm: 0.9490922708625684, iteration: 140157
loss: 1.0006200075149536,grad_norm: 0.9999990738218602, iteration: 140158
loss: 1.0244399309158325,grad_norm: 0.9999989925717705, iteration: 140159
loss: 0.9802851676940918,grad_norm: 0.9999990856882793, iteration: 140160
loss: 1.0082999467849731,grad_norm: 0.9999990009948717, iteration: 140161
loss: 1.0020769834518433,grad_norm: 0.9999991148076922, iteration: 140162
loss: 1.0077407360076904,grad_norm: 0.9999991032779306, iteration: 140163
loss: 0.9930530190467834,grad_norm: 0.9999993327142078, iteration: 140164
loss: 0.9923620820045471,grad_norm: 0.9999989813445624, iteration: 140165
loss: 1.0096548795700073,grad_norm: 0.9999990531932309, iteration: 140166
loss: 0.9579470157623291,grad_norm: 0.9999991234411585, iteration: 140167
loss: 1.0184426307678223,grad_norm: 0.9999991462597836, iteration: 140168
loss: 0.991584062576294,grad_norm: 0.9999990229074526, iteration: 140169
loss: 1.004377841949463,grad_norm: 0.9999994920162784, iteration: 140170
loss: 1.0035865306854248,grad_norm: 0.9363887666663127, iteration: 140171
loss: 1.0138068199157715,grad_norm: 0.9999993558353294, iteration: 140172
loss: 0.9833425283432007,grad_norm: 0.864432311336692, iteration: 140173
loss: 0.9552301168441772,grad_norm: 0.9999990704987146, iteration: 140174
loss: 0.9752607345581055,grad_norm: 0.9727528092242698, iteration: 140175
loss: 0.9636943936347961,grad_norm: 0.9999991009088331, iteration: 140176
loss: 0.9958641529083252,grad_norm: 0.9999990995635638, iteration: 140177
loss: 1.0842257738113403,grad_norm: 0.9999991494383063, iteration: 140178
loss: 1.0275267362594604,grad_norm: 0.9999994544434063, iteration: 140179
loss: 1.0009872913360596,grad_norm: 0.9999992414192449, iteration: 140180
loss: 0.9574670791625977,grad_norm: 0.9999991532466569, iteration: 140181
loss: 1.0687766075134277,grad_norm: 0.9999999546083824, iteration: 140182
loss: 0.9478496313095093,grad_norm: 0.9999991245209313, iteration: 140183
loss: 1.006314992904663,grad_norm: 0.8871937222120347, iteration: 140184
loss: 1.034446358680725,grad_norm: 0.9999989866520378, iteration: 140185
loss: 1.0116643905639648,grad_norm: 0.9999991917889572, iteration: 140186
loss: 1.0113815069198608,grad_norm: 0.912691550419195, iteration: 140187
loss: 0.9884601831436157,grad_norm: 0.9382944315042578, iteration: 140188
loss: 1.0412033796310425,grad_norm: 0.9999991641609911, iteration: 140189
loss: 0.9851720333099365,grad_norm: 0.9999991457528763, iteration: 140190
loss: 1.0544801950454712,grad_norm: 0.9999990602152417, iteration: 140191
loss: 0.9726758599281311,grad_norm: 0.9095809871805298, iteration: 140192
loss: 0.9949303865432739,grad_norm: 0.9999992746181476, iteration: 140193
loss: 1.0087538957595825,grad_norm: 0.9999997230396916, iteration: 140194
loss: 1.0189690589904785,grad_norm: 0.9663280092198105, iteration: 140195
loss: 0.9740418195724487,grad_norm: 0.9716609669520402, iteration: 140196
loss: 1.0169073343276978,grad_norm: 0.8128721867234122, iteration: 140197
loss: 0.9868574142456055,grad_norm: 0.9999990213649464, iteration: 140198
loss: 1.0308973789215088,grad_norm: 0.9999990534989776, iteration: 140199
loss: 1.027650237083435,grad_norm: 0.9999993004166418, iteration: 140200
loss: 0.9988278746604919,grad_norm: 0.9165246928963566, iteration: 140201
loss: 0.9876237511634827,grad_norm: 0.9260232380355085, iteration: 140202
loss: 0.9916274547576904,grad_norm: 0.9999990994364392, iteration: 140203
loss: 0.9901215434074402,grad_norm: 0.9999991796249037, iteration: 140204
loss: 1.0085901021957397,grad_norm: 0.9999992403866153, iteration: 140205
loss: 1.0381286144256592,grad_norm: 0.9999989735313577, iteration: 140206
loss: 0.9917581677436829,grad_norm: 0.999999167996342, iteration: 140207
loss: 1.0341426134109497,grad_norm: 0.999999491761248, iteration: 140208
loss: 1.000899314880371,grad_norm: 0.9999991260649657, iteration: 140209
loss: 1.0120681524276733,grad_norm: 0.992496427935579, iteration: 140210
loss: 1.0096025466918945,grad_norm: 0.8680254811838363, iteration: 140211
loss: 1.0218919515609741,grad_norm: 0.9999991489503108, iteration: 140212
loss: 1.0062313079833984,grad_norm: 0.9355988452776843, iteration: 140213
loss: 1.0015543699264526,grad_norm: 0.9184595595648898, iteration: 140214
loss: 0.9539006948471069,grad_norm: 0.999999186851414, iteration: 140215
loss: 0.9936557412147522,grad_norm: 0.9607377862812094, iteration: 140216
loss: 1.0098930597305298,grad_norm: 0.9999990522761869, iteration: 140217
loss: 1.0287408828735352,grad_norm: 0.9519297676131753, iteration: 140218
loss: 1.0099588632583618,grad_norm: 0.975408836126849, iteration: 140219
loss: 1.008457899093628,grad_norm: 0.804887527132648, iteration: 140220
loss: 0.9820279479026794,grad_norm: 0.8906402121952945, iteration: 140221
loss: 0.9857396483421326,grad_norm: 0.8484787248510526, iteration: 140222
loss: 1.0064520835876465,grad_norm: 0.9999992609934453, iteration: 140223
loss: 1.0003982782363892,grad_norm: 0.9999990855419291, iteration: 140224
loss: 0.9993083477020264,grad_norm: 0.9999992100058444, iteration: 140225
loss: 0.9999315738677979,grad_norm: 0.9999990997363165, iteration: 140226
loss: 1.018986463546753,grad_norm: 0.9999991709737824, iteration: 140227
loss: 1.0033131837844849,grad_norm: 0.8846441198852187, iteration: 140228
loss: 1.0229530334472656,grad_norm: 0.9999992142008326, iteration: 140229
loss: 1.0197842121124268,grad_norm: 0.923125218545025, iteration: 140230
loss: 0.9484442472457886,grad_norm: 0.9999991264257041, iteration: 140231
loss: 1.0137851238250732,grad_norm: 0.8593238561502933, iteration: 140232
loss: 1.0065664052963257,grad_norm: 0.9999991341582782, iteration: 140233
loss: 0.9807320237159729,grad_norm: 0.9999990002271885, iteration: 140234
loss: 0.9742981791496277,grad_norm: 0.9999990585753488, iteration: 140235
loss: 0.9948570728302002,grad_norm: 0.8848106551195382, iteration: 140236
loss: 1.016025185585022,grad_norm: 0.9589508557874034, iteration: 140237
loss: 1.0030666589736938,grad_norm: 0.8983593611727085, iteration: 140238
loss: 1.022602915763855,grad_norm: 0.9625829870592781, iteration: 140239
loss: 1.0127253532409668,grad_norm: 0.9999992789323691, iteration: 140240
loss: 1.0508736371994019,grad_norm: 0.999999957618275, iteration: 140241
loss: 0.994998037815094,grad_norm: 0.9999990325120811, iteration: 140242
loss: 1.0067336559295654,grad_norm: 0.9999991462195652, iteration: 140243
loss: 1.0107207298278809,grad_norm: 0.9999991839648827, iteration: 140244
loss: 1.0341964960098267,grad_norm: 0.999999506557819, iteration: 140245
loss: 0.9714002013206482,grad_norm: 0.8952927441002997, iteration: 140246
loss: 1.0457795858383179,grad_norm: 0.9676768659091858, iteration: 140247
loss: 1.0039293766021729,grad_norm: 0.9999990524067325, iteration: 140248
loss: 1.0011268854141235,grad_norm: 0.9967550342001009, iteration: 140249
loss: 0.9680067300796509,grad_norm: 0.9999991756583836, iteration: 140250
loss: 1.031800627708435,grad_norm: 0.9999990661749999, iteration: 140251
loss: 1.0254772901535034,grad_norm: 0.9999998165706712, iteration: 140252
loss: 1.0345336198806763,grad_norm: 0.851353729366447, iteration: 140253
loss: 1.0668209791183472,grad_norm: 0.9999991916117045, iteration: 140254
loss: 1.0149593353271484,grad_norm: 0.9999990608171275, iteration: 140255
loss: 1.0126937627792358,grad_norm: 0.9999991560853426, iteration: 140256
loss: 1.0405683517456055,grad_norm: 0.9999992356934804, iteration: 140257
loss: 0.9693682193756104,grad_norm: 0.8664524707749838, iteration: 140258
loss: 0.9889243245124817,grad_norm: 0.9999991051412441, iteration: 140259
loss: 1.0061697959899902,grad_norm: 0.9999992518548304, iteration: 140260
loss: 0.9929342865943909,grad_norm: 0.9193678351588811, iteration: 140261
loss: 0.9844250082969666,grad_norm: 0.9999990608033777, iteration: 140262
loss: 1.0095752477645874,grad_norm: 0.9999999073086364, iteration: 140263
loss: 0.993297278881073,grad_norm: 0.9999992263888832, iteration: 140264
loss: 0.9673179984092712,grad_norm: 0.9999996354262405, iteration: 140265
loss: 0.944576621055603,grad_norm: 0.9999991861347117, iteration: 140266
loss: 0.9997084140777588,grad_norm: 0.9999991867190677, iteration: 140267
loss: 0.9710230231285095,grad_norm: 0.9999992716342977, iteration: 140268
loss: 1.0191211700439453,grad_norm: 0.9999990954530042, iteration: 140269
loss: 0.986203134059906,grad_norm: 0.9999990900866809, iteration: 140270
loss: 0.9907432198524475,grad_norm: 0.9999990399418455, iteration: 140271
loss: 0.9938422441482544,grad_norm: 0.7904897033487537, iteration: 140272
loss: 0.995934009552002,grad_norm: 0.9999993019773477, iteration: 140273
loss: 1.019392967224121,grad_norm: 0.9999991242407797, iteration: 140274
loss: 1.016782522201538,grad_norm: 0.9370371570127315, iteration: 140275
loss: 0.9841628074645996,grad_norm: 0.9999989853090884, iteration: 140276
loss: 1.0172617435455322,grad_norm: 0.9999992792436898, iteration: 140277
loss: 1.0297398567199707,grad_norm: 0.9999991818147385, iteration: 140278
loss: 1.0161067247390747,grad_norm: 0.9999992050290621, iteration: 140279
loss: 0.9926022887229919,grad_norm: 0.9999990754174725, iteration: 140280
loss: 0.9878065586090088,grad_norm: 0.9999991864130444, iteration: 140281
loss: 1.0093570947647095,grad_norm: 0.9999990772978045, iteration: 140282
loss: 0.9938319325447083,grad_norm: 0.9999995538540474, iteration: 140283
loss: 1.0413978099822998,grad_norm: 0.999999064637124, iteration: 140284
loss: 0.9906107783317566,grad_norm: 0.9714762580639499, iteration: 140285
loss: 0.9795636534690857,grad_norm: 0.9999992024407993, iteration: 140286
loss: 1.0101286172866821,grad_norm: 0.9999989648861455, iteration: 140287
loss: 0.9895195364952087,grad_norm: 0.9444225622579766, iteration: 140288
loss: 1.0151073932647705,grad_norm: 0.9999992572824404, iteration: 140289
loss: 0.9975240230560303,grad_norm: 0.9117040857036092, iteration: 140290
loss: 1.008924961090088,grad_norm: 0.9999992546143867, iteration: 140291
loss: 0.9686096906661987,grad_norm: 0.8645078753174492, iteration: 140292
loss: 0.978160560131073,grad_norm: 0.9343829507286263, iteration: 140293
loss: 0.988081157207489,grad_norm: 0.9176337098782639, iteration: 140294
loss: 0.9767403602600098,grad_norm: 0.7830700839182163, iteration: 140295
loss: 0.9765729904174805,grad_norm: 0.9999992342889393, iteration: 140296
loss: 0.9909862875938416,grad_norm: 0.9999990593924563, iteration: 140297
loss: 1.0788639783859253,grad_norm: 0.9999995489155603, iteration: 140298
loss: 1.022549033164978,grad_norm: 0.9999990943164914, iteration: 140299
loss: 0.9858145117759705,grad_norm: 0.9879848742448504, iteration: 140300
loss: 0.9912233352661133,grad_norm: 0.9999990579846921, iteration: 140301
loss: 0.9988057613372803,grad_norm: 0.999999788276549, iteration: 140302
loss: 0.9490857124328613,grad_norm: 0.9999991093528128, iteration: 140303
loss: 0.992976725101471,grad_norm: 0.9808654385617877, iteration: 140304
loss: 1.1358526945114136,grad_norm: 0.9999993975875051, iteration: 140305
loss: 1.0077297687530518,grad_norm: 0.9970065525431993, iteration: 140306
loss: 0.9859839677810669,grad_norm: 0.8605099241151606, iteration: 140307
loss: 1.077288269996643,grad_norm: 0.9999991869505499, iteration: 140308
loss: 1.0232402086257935,grad_norm: 0.9999991419892169, iteration: 140309
loss: 0.959736704826355,grad_norm: 0.9943872352632602, iteration: 140310
loss: 1.002671480178833,grad_norm: 0.9999991352561368, iteration: 140311
loss: 1.0284148454666138,grad_norm: 0.9999990068147601, iteration: 140312
loss: 1.0186383724212646,grad_norm: 0.999999370471844, iteration: 140313
loss: 1.0207488536834717,grad_norm: 0.9999995825866673, iteration: 140314
loss: 1.0521447658538818,grad_norm: 0.999999351885211, iteration: 140315
loss: 0.991244375705719,grad_norm: 0.9999991634270388, iteration: 140316
loss: 0.9646181464195251,grad_norm: 0.9999992336893904, iteration: 140317
loss: 1.0074522495269775,grad_norm: 0.9420485650972732, iteration: 140318
loss: 1.011222004890442,grad_norm: 0.9999994375695355, iteration: 140319
loss: 0.9711092114448547,grad_norm: 0.9999992110284326, iteration: 140320
loss: 1.015828013420105,grad_norm: 0.999999236419978, iteration: 140321
loss: 1.0404683351516724,grad_norm: 0.9999998612134865, iteration: 140322
loss: 0.9492834210395813,grad_norm: 0.9999990277420671, iteration: 140323
loss: 1.0928503274917603,grad_norm: 0.9999998708097435, iteration: 140324
loss: 1.0749331712722778,grad_norm: 0.999999073422031, iteration: 140325
loss: 1.0069901943206787,grad_norm: 0.9999992687739472, iteration: 140326
loss: 1.0044565200805664,grad_norm: 0.9999990204252663, iteration: 140327
loss: 0.9938790798187256,grad_norm: 0.9999992647012623, iteration: 140328
loss: 1.008991003036499,grad_norm: 0.9999993396468561, iteration: 140329
loss: 1.1108667850494385,grad_norm: 0.9999992931123548, iteration: 140330
loss: 0.9763170480728149,grad_norm: 0.9999991292625219, iteration: 140331
loss: 1.0684475898742676,grad_norm: 0.999999505530114, iteration: 140332
loss: 1.007232666015625,grad_norm: 0.9999991359604143, iteration: 140333
loss: 0.9660581946372986,grad_norm: 0.8970192764778124, iteration: 140334
loss: 1.0101182460784912,grad_norm: 0.9999994034094133, iteration: 140335
loss: 0.9799560904502869,grad_norm: 0.9350959326930305, iteration: 140336
loss: 0.9438227415084839,grad_norm: 0.9999991703191218, iteration: 140337
loss: 0.9689424633979797,grad_norm: 0.9999997518284867, iteration: 140338
loss: 1.0347117185592651,grad_norm: 0.9999992925185633, iteration: 140339
loss: 0.9992991089820862,grad_norm: 0.9999999402791558, iteration: 140340
loss: 0.9988827109336853,grad_norm: 0.9999990903546007, iteration: 140341
loss: 0.9980757832527161,grad_norm: 0.9999990439512197, iteration: 140342
loss: 1.030464768409729,grad_norm: 0.9999992677263952, iteration: 140343
loss: 1.038620114326477,grad_norm: 0.9999991237825926, iteration: 140344
loss: 1.0647958517074585,grad_norm: 0.9999990865537727, iteration: 140345
loss: 1.0938853025436401,grad_norm: 0.9999995989399703, iteration: 140346
loss: 1.0227426290512085,grad_norm: 0.9859534503472682, iteration: 140347
loss: 1.1578596830368042,grad_norm: 0.9999997526229092, iteration: 140348
loss: 1.3793445825576782,grad_norm: 0.9999997681046694, iteration: 140349
loss: 1.0731281042099,grad_norm: 0.99999979931968, iteration: 140350
loss: 1.1847453117370605,grad_norm: 0.9999999113555952, iteration: 140351
loss: 1.1208778619766235,grad_norm: 0.9999998782707085, iteration: 140352
loss: 1.1052864789962769,grad_norm: 0.9999994590012655, iteration: 140353
loss: 1.1001553535461426,grad_norm: 0.9999996380360989, iteration: 140354
loss: 1.102654218673706,grad_norm: 0.9999996797307018, iteration: 140355
loss: 1.038155198097229,grad_norm: 0.999999149611315, iteration: 140356
loss: 1.0745267868041992,grad_norm: 0.9999994184119673, iteration: 140357
loss: 1.0255388021469116,grad_norm: 0.9999993659330021, iteration: 140358
loss: 1.0059994459152222,grad_norm: 0.9999992161698246, iteration: 140359
loss: 1.0127146244049072,grad_norm: 0.9999991182836289, iteration: 140360
loss: 1.2689577341079712,grad_norm: 0.9999998231955053, iteration: 140361
loss: 1.0396393537521362,grad_norm: 0.9999998177862528, iteration: 140362
loss: 1.1273325681686401,grad_norm: 0.9999998119538233, iteration: 140363
loss: 1.1342257261276245,grad_norm: 0.9999998070272054, iteration: 140364
loss: 1.0459885597229004,grad_norm: 0.9999992476619565, iteration: 140365
loss: 1.0220838785171509,grad_norm: 0.9999990255439422, iteration: 140366
loss: 1.1024436950683594,grad_norm: 0.9999995246348921, iteration: 140367
loss: 0.976165771484375,grad_norm: 0.9999991473704961, iteration: 140368
loss: 1.1661982536315918,grad_norm: 0.9999993164274894, iteration: 140369
loss: 1.0200526714324951,grad_norm: 0.9999997019840478, iteration: 140370
loss: 1.0510332584381104,grad_norm: 0.9999992226945879, iteration: 140371
loss: 1.0225366353988647,grad_norm: 0.9999999953076658, iteration: 140372
loss: 1.0083115100860596,grad_norm: 0.935636829656841, iteration: 140373
loss: 1.0073895454406738,grad_norm: 0.9259775789140189, iteration: 140374
loss: 1.11510169506073,grad_norm: 0.9999995983119262, iteration: 140375
loss: 0.9940713047981262,grad_norm: 0.9962982314729542, iteration: 140376
loss: 0.9938685894012451,grad_norm: 0.9999993351598194, iteration: 140377
loss: 0.964662492275238,grad_norm: 0.9999990540946453, iteration: 140378
loss: 1.0092076063156128,grad_norm: 0.9999993475441187, iteration: 140379
loss: 1.012417197227478,grad_norm: 0.9999993654157996, iteration: 140380
loss: 1.0608211755752563,grad_norm: 0.999999199121325, iteration: 140381
loss: 0.9813830852508545,grad_norm: 0.9999990959031928, iteration: 140382
loss: 1.0971944332122803,grad_norm: 0.9999992448763149, iteration: 140383
loss: 1.0870800018310547,grad_norm: 0.9999998878700788, iteration: 140384
loss: 0.9944539070129395,grad_norm: 0.9999990893952364, iteration: 140385
loss: 1.0012712478637695,grad_norm: 0.9999992573367896, iteration: 140386
loss: 1.0087350606918335,grad_norm: 0.9999991762769516, iteration: 140387
loss: 1.0311516523361206,grad_norm: 0.9999991538669112, iteration: 140388
loss: 1.0236307382583618,grad_norm: 0.9999992710279619, iteration: 140389
loss: 1.0160181522369385,grad_norm: 0.851472159304237, iteration: 140390
loss: 0.9933637976646423,grad_norm: 0.999999164600109, iteration: 140391
loss: 1.0883420705795288,grad_norm: 0.9999997548603816, iteration: 140392
loss: 0.9989430904388428,grad_norm: 0.9999989674433671, iteration: 140393
loss: 1.004759430885315,grad_norm: 0.9345480748021926, iteration: 140394
loss: 0.978413462638855,grad_norm: 0.9999990955517404, iteration: 140395
loss: 0.9763822555541992,grad_norm: 0.9217251115484962, iteration: 140396
loss: 1.0044366121292114,grad_norm: 0.9999990776247228, iteration: 140397
loss: 0.9814325571060181,grad_norm: 0.9999990096777737, iteration: 140398
loss: 1.030819058418274,grad_norm: 0.9999996198304494, iteration: 140399
loss: 1.0153270959854126,grad_norm: 0.9999995561883998, iteration: 140400
loss: 1.0169717073440552,grad_norm: 0.875557746546986, iteration: 140401
loss: 0.9916700720787048,grad_norm: 0.9999992289105187, iteration: 140402
loss: 1.0194109678268433,grad_norm: 0.9999993617377136, iteration: 140403
loss: 1.020486831665039,grad_norm: 0.9232326664843357, iteration: 140404
loss: 1.0162972211837769,grad_norm: 0.9206405222596376, iteration: 140405
loss: 1.0048683881759644,grad_norm: 0.9999992222469896, iteration: 140406
loss: 0.9945833086967468,grad_norm: 0.9999991253153494, iteration: 140407
loss: 1.0242434740066528,grad_norm: 0.9999992008973223, iteration: 140408
loss: 1.0157665014266968,grad_norm: 0.9999995092941513, iteration: 140409
loss: 0.9729549884796143,grad_norm: 0.8365679751757725, iteration: 140410
loss: 1.030193567276001,grad_norm: 0.8765226442022783, iteration: 140411
loss: 0.9277840852737427,grad_norm: 0.9999991658674248, iteration: 140412
loss: 1.186955213546753,grad_norm: 0.9999994363034427, iteration: 140413
loss: 0.9871734976768494,grad_norm: 0.9700041328831385, iteration: 140414
loss: 0.9985414147377014,grad_norm: 0.9999991798284491, iteration: 140415
loss: 0.9809668660163879,grad_norm: 0.9999992036056348, iteration: 140416
loss: 0.9975404143333435,grad_norm: 0.888595339508649, iteration: 140417
loss: 1.0954946279525757,grad_norm: 0.9999994156758318, iteration: 140418
loss: 1.0259127616882324,grad_norm: 0.9999991767993652, iteration: 140419
loss: 0.9946441054344177,grad_norm: 0.9999990365368573, iteration: 140420
loss: 1.0103695392608643,grad_norm: 0.9999990724675865, iteration: 140421
loss: 1.0257878303527832,grad_norm: 0.9999992467428045, iteration: 140422
loss: 1.0021843910217285,grad_norm: 0.9999991114657805, iteration: 140423
loss: 0.9942832589149475,grad_norm: 0.9999991065845032, iteration: 140424
loss: 1.0760658979415894,grad_norm: 0.9999994859020026, iteration: 140425
loss: 1.022419810295105,grad_norm: 0.9999993390657432, iteration: 140426
loss: 1.0154889822006226,grad_norm: 0.9999992273047807, iteration: 140427
loss: 1.004429578781128,grad_norm: 0.9999992433255273, iteration: 140428
loss: 1.0740084648132324,grad_norm: 0.9999995609731396, iteration: 140429
loss: 0.9998437762260437,grad_norm: 0.9999990824010062, iteration: 140430
loss: 0.9952757358551025,grad_norm: 0.9999990188787452, iteration: 140431
loss: 0.9835324883460999,grad_norm: 0.9999990737684301, iteration: 140432
loss: 1.0010799169540405,grad_norm: 0.9999993690797577, iteration: 140433
loss: 1.0506088733673096,grad_norm: 0.9999996831153409, iteration: 140434
loss: 1.0072524547576904,grad_norm: 0.9999991880908161, iteration: 140435
loss: 0.9858800768852234,grad_norm: 0.9999991878747096, iteration: 140436
loss: 1.0587220191955566,grad_norm: 0.9999999000743143, iteration: 140437
loss: 0.9905312657356262,grad_norm: 0.9999992911850452, iteration: 140438
loss: 0.9976068735122681,grad_norm: 0.9999989698391163, iteration: 140439
loss: 1.0390098094940186,grad_norm: 0.9999990991517076, iteration: 140440
loss: 0.9820806980133057,grad_norm: 0.9999992700959176, iteration: 140441
loss: 0.9826520681381226,grad_norm: 0.9999991227150401, iteration: 140442
loss: 1.1969763040542603,grad_norm: 0.9999991174588082, iteration: 140443
loss: 0.9782226085662842,grad_norm: 0.9999992108853869, iteration: 140444
loss: 1.1693488359451294,grad_norm: 0.999999469034868, iteration: 140445
loss: 0.9983603358268738,grad_norm: 0.9999990797617886, iteration: 140446
loss: 1.0111920833587646,grad_norm: 0.9638833795859254, iteration: 140447
loss: 1.064440131187439,grad_norm: 0.9999991777784796, iteration: 140448
loss: 1.0391287803649902,grad_norm: 0.9800404034247808, iteration: 140449
loss: 1.0787150859832764,grad_norm: 0.9999998487632982, iteration: 140450
loss: 1.0232077836990356,grad_norm: 0.9999992934554107, iteration: 140451
loss: 1.0019617080688477,grad_norm: 0.992822219441782, iteration: 140452
loss: 0.9465177655220032,grad_norm: 0.9468334576900074, iteration: 140453
loss: 0.9615522027015686,grad_norm: 0.9999992287423597, iteration: 140454
loss: 1.0053666830062866,grad_norm: 0.9770587379855931, iteration: 140455
loss: 1.0050102472305298,grad_norm: 0.9999990221354581, iteration: 140456
loss: 0.9969401955604553,grad_norm: 0.9999993179476894, iteration: 140457
loss: 1.0308427810668945,grad_norm: 0.9999994449564443, iteration: 140458
loss: 1.0376245975494385,grad_norm: 0.9999994718667523, iteration: 140459
loss: 1.0080771446228027,grad_norm: 0.999999663019829, iteration: 140460
loss: 1.0243475437164307,grad_norm: 0.9999990077905797, iteration: 140461
loss: 1.029482126235962,grad_norm: 0.9999991552697686, iteration: 140462
loss: 1.0048799514770508,grad_norm: 0.999999083161564, iteration: 140463
loss: 1.0099958181381226,grad_norm: 0.9999996230049706, iteration: 140464
loss: 1.0063685178756714,grad_norm: 0.9999993645630015, iteration: 140465
loss: 1.029172420501709,grad_norm: 0.9999990983062736, iteration: 140466
loss: 1.0806576013565063,grad_norm: 0.9999994835796755, iteration: 140467
loss: 0.9870272278785706,grad_norm: 0.999999067075732, iteration: 140468
loss: 0.9626145958900452,grad_norm: 0.9999990385957644, iteration: 140469
loss: 1.0332859754562378,grad_norm: 0.9709967838526561, iteration: 140470
loss: 1.0494403839111328,grad_norm: 0.9999993815000526, iteration: 140471
loss: 1.172309398651123,grad_norm: 0.9999996414329965, iteration: 140472
loss: 1.111602544784546,grad_norm: 0.9999998658938292, iteration: 140473
loss: 1.050290584564209,grad_norm: 0.9999997868462, iteration: 140474
loss: 0.9917111992835999,grad_norm: 0.9999992771067082, iteration: 140475
loss: 1.0671217441558838,grad_norm: 0.9999993309925995, iteration: 140476
loss: 1.0321366786956787,grad_norm: 0.9999993090796768, iteration: 140477
loss: 1.0086199045181274,grad_norm: 0.9999996628668482, iteration: 140478
loss: 0.9781062602996826,grad_norm: 0.9454575170994842, iteration: 140479
loss: 1.0017699003219604,grad_norm: 0.9999991350711805, iteration: 140480
loss: 1.0709604024887085,grad_norm: 0.9999992248504481, iteration: 140481
loss: 0.9901511073112488,grad_norm: 0.9937089288940838, iteration: 140482
loss: 1.0478769540786743,grad_norm: 0.9999993875571133, iteration: 140483
loss: 1.0345500707626343,grad_norm: 0.9999993533624366, iteration: 140484
loss: 1.0025103092193604,grad_norm: 0.9999990703476568, iteration: 140485
loss: 1.0109798908233643,grad_norm: 0.9843167423815542, iteration: 140486
loss: 0.996425986289978,grad_norm: 0.9706162464877215, iteration: 140487
loss: 1.0412800312042236,grad_norm: 0.9999993747788531, iteration: 140488
loss: 1.027829647064209,grad_norm: 0.9993139349237704, iteration: 140489
loss: 1.0423842668533325,grad_norm: 0.9999996750470407, iteration: 140490
loss: 1.0329513549804688,grad_norm: 0.9999991457400225, iteration: 140491
loss: 0.9871245622634888,grad_norm: 0.9999990741364448, iteration: 140492
loss: 1.0123755931854248,grad_norm: 0.999999214530492, iteration: 140493
loss: 0.9746914505958557,grad_norm: 0.9999992570585738, iteration: 140494
loss: 0.9881753325462341,grad_norm: 0.9580114022292752, iteration: 140495
loss: 1.0267069339752197,grad_norm: 0.9999990925301202, iteration: 140496
loss: 0.9985235929489136,grad_norm: 0.8575190039149945, iteration: 140497
loss: 0.9950857758522034,grad_norm: 0.936886400525219, iteration: 140498
loss: 1.0500174760818481,grad_norm: 0.9999990952442964, iteration: 140499
loss: 0.9990583658218384,grad_norm: 0.9999992082109829, iteration: 140500
loss: 1.0053377151489258,grad_norm: 0.9999991809644561, iteration: 140501
loss: 1.0160788297653198,grad_norm: 0.999999191909224, iteration: 140502
loss: 0.9674124717712402,grad_norm: 0.9201800002571574, iteration: 140503
loss: 1.032773494720459,grad_norm: 1.0000000444684212, iteration: 140504
loss: 1.015478491783142,grad_norm: 0.8903665438217061, iteration: 140505
loss: 0.9806430339813232,grad_norm: 0.99999937659643, iteration: 140506
loss: 0.9839794039726257,grad_norm: 0.999999008771321, iteration: 140507
loss: 1.0013211965560913,grad_norm: 0.9999993538135403, iteration: 140508
loss: 0.9780799150466919,grad_norm: 0.9869569074830981, iteration: 140509
loss: 0.9487281441688538,grad_norm: 0.8646013742720905, iteration: 140510
loss: 1.0165905952453613,grad_norm: 0.9999990998883773, iteration: 140511
loss: 1.0996767282485962,grad_norm: 0.9999998071945401, iteration: 140512
loss: 0.9704782962799072,grad_norm: 0.9874447111007999, iteration: 140513
loss: 0.9936284422874451,grad_norm: 0.9999992358291113, iteration: 140514
loss: 1.0193796157836914,grad_norm: 0.9999992640454489, iteration: 140515
loss: 1.0042853355407715,grad_norm: 0.9999996644193762, iteration: 140516
loss: 1.0502439737319946,grad_norm: 0.9999991914464609, iteration: 140517
loss: 1.0506031513214111,grad_norm: 0.9999996405566203, iteration: 140518
loss: 1.0279395580291748,grad_norm: 0.9999994056597142, iteration: 140519
loss: 1.0106439590454102,grad_norm: 0.9427024924314816, iteration: 140520
loss: 1.0063828229904175,grad_norm: 0.9999991338283243, iteration: 140521
loss: 0.9498846530914307,grad_norm: 0.9999989571870301, iteration: 140522
loss: 0.9953354597091675,grad_norm: 0.9999998347268045, iteration: 140523
loss: 0.9746905565261841,grad_norm: 0.999999136534152, iteration: 140524
loss: 0.9874889254570007,grad_norm: 0.9999991953337424, iteration: 140525
loss: 1.0099998712539673,grad_norm: 0.9497475525508078, iteration: 140526
loss: 1.031284213066101,grad_norm: 0.9958960089488188, iteration: 140527
loss: 0.9574304223060608,grad_norm: 0.9999991812233467, iteration: 140528
loss: 1.084518551826477,grad_norm: 0.9999998335971123, iteration: 140529
loss: 0.9866667985916138,grad_norm: 0.999999129861193, iteration: 140530
loss: 1.0741673707962036,grad_norm: 0.99999923651611, iteration: 140531
loss: 1.0328731536865234,grad_norm: 0.9475273849049787, iteration: 140532
loss: 0.9683257937431335,grad_norm: 0.9999992727932292, iteration: 140533
loss: 1.012320876121521,grad_norm: 0.9999990441357844, iteration: 140534
loss: 0.9925122857093811,grad_norm: 0.9999990638343949, iteration: 140535
loss: 1.0508776903152466,grad_norm: 0.9405516846680961, iteration: 140536
loss: 1.0869606733322144,grad_norm: 0.9999993974874913, iteration: 140537
loss: 0.9980167746543884,grad_norm: 0.9999990043959707, iteration: 140538
loss: 1.0176669359207153,grad_norm: 0.9638291677619324, iteration: 140539
loss: 1.0110228061676025,grad_norm: 0.9349059565061834, iteration: 140540
loss: 1.0497230291366577,grad_norm: 0.9999993170715832, iteration: 140541
loss: 0.9834672808647156,grad_norm: 0.947643798282867, iteration: 140542
loss: 0.9869655966758728,grad_norm: 0.9879544305488385, iteration: 140543
loss: 0.9958884716033936,grad_norm: 0.9999991735216878, iteration: 140544
loss: 0.9991545081138611,grad_norm: 0.9999994124352878, iteration: 140545
loss: 0.9873129725456238,grad_norm: 0.999999228467055, iteration: 140546
loss: 0.9792803525924683,grad_norm: 0.9999989175142031, iteration: 140547
loss: 0.9978362321853638,grad_norm: 0.9999991899203817, iteration: 140548
loss: 1.0335502624511719,grad_norm: 0.9492856518579192, iteration: 140549
loss: 0.994225263595581,grad_norm: 0.9999992502908023, iteration: 140550
loss: 1.0193959474563599,grad_norm: 0.9999992072182131, iteration: 140551
loss: 0.9860109686851501,grad_norm: 0.9999990229624977, iteration: 140552
loss: 1.0125421285629272,grad_norm: 0.9999989932918321, iteration: 140553
loss: 0.9713680744171143,grad_norm: 0.9999990139050019, iteration: 140554
loss: 0.9449746608734131,grad_norm: 0.9867236585197132, iteration: 140555
loss: 0.9567950963973999,grad_norm: 0.9999990580655844, iteration: 140556
loss: 1.01747465133667,grad_norm: 0.9999993807308748, iteration: 140557
loss: 0.9776282906532288,grad_norm: 0.9999989714173894, iteration: 140558
loss: 0.9918517470359802,grad_norm: 0.8736476937788394, iteration: 140559
loss: 1.2090215682983398,grad_norm: 0.9999996891823231, iteration: 140560
loss: 0.9969328045845032,grad_norm: 0.9881414235418937, iteration: 140561
loss: 1.003249168395996,grad_norm: 0.9999990492860215, iteration: 140562
loss: 1.0281713008880615,grad_norm: 0.9999996174063258, iteration: 140563
loss: 1.0008289813995361,grad_norm: 0.9219181448400969, iteration: 140564
loss: 1.0398732423782349,grad_norm: 0.9999991274491454, iteration: 140565
loss: 1.045967698097229,grad_norm: 0.9999994449305528, iteration: 140566
loss: 1.0188665390014648,grad_norm: 0.9733161780521896, iteration: 140567
loss: 1.0154621601104736,grad_norm: 0.999999086053748, iteration: 140568
loss: 0.9978273510932922,grad_norm: 0.886252210978138, iteration: 140569
loss: 1.0073033571243286,grad_norm: 0.8918224733569615, iteration: 140570
loss: 1.0144709348678589,grad_norm: 0.999999702567118, iteration: 140571
loss: 1.0166398286819458,grad_norm: 0.9542696795680801, iteration: 140572
loss: 1.0168629884719849,grad_norm: 0.9999992170839909, iteration: 140573
loss: 1.030329704284668,grad_norm: 0.9999999000392842, iteration: 140574
loss: 1.0100648403167725,grad_norm: 0.999998980427676, iteration: 140575
loss: 1.0091702938079834,grad_norm: 0.9999992081911089, iteration: 140576
loss: 0.9920569062232971,grad_norm: 0.9999991321133008, iteration: 140577
loss: 1.0176388025283813,grad_norm: 0.9999993164052701, iteration: 140578
loss: 1.0051190853118896,grad_norm: 0.9992930410092604, iteration: 140579
loss: 1.0128899812698364,grad_norm: 0.9732059753020876, iteration: 140580
loss: 1.0205576419830322,grad_norm: 0.9908221634036998, iteration: 140581
loss: 0.9840404391288757,grad_norm: 0.9999992929346434, iteration: 140582
loss: 1.0402990579605103,grad_norm: 0.9999990979320822, iteration: 140583
loss: 0.9974310994148254,grad_norm: 0.9849606545191446, iteration: 140584
loss: 1.0122283697128296,grad_norm: 0.9031813734228926, iteration: 140585
loss: 0.9986885786056519,grad_norm: 0.8779126717696967, iteration: 140586
loss: 1.0001533031463623,grad_norm: 0.9373433877016678, iteration: 140587
loss: 1.0751409530639648,grad_norm: 0.9999993513681661, iteration: 140588
loss: 1.0247896909713745,grad_norm: 0.9999990020569336, iteration: 140589
loss: 1.025217056274414,grad_norm: 0.9999990650367973, iteration: 140590
loss: 1.0041675567626953,grad_norm: 0.9999998001788495, iteration: 140591
loss: 1.0046687126159668,grad_norm: 0.9058725655581531, iteration: 140592
loss: 0.9939009547233582,grad_norm: 0.9999995896424348, iteration: 140593
loss: 1.025693655014038,grad_norm: 0.9999992529182562, iteration: 140594
loss: 0.9854968786239624,grad_norm: 0.9999990630427917, iteration: 140595
loss: 0.9987550377845764,grad_norm: 0.9999991371339124, iteration: 140596
loss: 0.9942857623100281,grad_norm: 0.9999991097666625, iteration: 140597
loss: 0.9950507879257202,grad_norm: 0.9655328562840132, iteration: 140598
loss: 0.9909616112709045,grad_norm: 0.9999992781202459, iteration: 140599
loss: 1.0003234148025513,grad_norm: 0.9809236197220226, iteration: 140600
loss: 0.9781808853149414,grad_norm: 0.8490661385490497, iteration: 140601
loss: 1.0140800476074219,grad_norm: 0.9310340549985314, iteration: 140602
loss: 1.060116171836853,grad_norm: 0.9999995109019374, iteration: 140603
loss: 1.016591191291809,grad_norm: 0.9999992307397081, iteration: 140604
loss: 0.9967218637466431,grad_norm: 0.9660622235959659, iteration: 140605
loss: 1.010189175605774,grad_norm: 0.99999912928267, iteration: 140606
loss: 0.9828882217407227,grad_norm: 0.911339987512031, iteration: 140607
loss: 0.993792712688446,grad_norm: 0.9999992385238631, iteration: 140608
loss: 0.996027946472168,grad_norm: 0.8755093479036824, iteration: 140609
loss: 1.019386649131775,grad_norm: 0.999999155555896, iteration: 140610
loss: 1.043099045753479,grad_norm: 0.999999414025042, iteration: 140611
loss: 1.0029513835906982,grad_norm: 0.9999990626850022, iteration: 140612
loss: 1.0065457820892334,grad_norm: 0.9999993338722353, iteration: 140613
loss: 1.0133693218231201,grad_norm: 0.9619943431170261, iteration: 140614
loss: 0.9954909682273865,grad_norm: 0.9999989911704519, iteration: 140615
loss: 1.0068578720092773,grad_norm: 0.9081850027220546, iteration: 140616
loss: 0.9882591962814331,grad_norm: 0.970893590383854, iteration: 140617
loss: 0.990409255027771,grad_norm: 0.9999991492145069, iteration: 140618
loss: 1.0122098922729492,grad_norm: 0.9499430650594054, iteration: 140619
loss: 0.9456822872161865,grad_norm: 0.9999991280863785, iteration: 140620
loss: 0.9895848035812378,grad_norm: 0.916195519717323, iteration: 140621
loss: 0.9902507066726685,grad_norm: 0.9517037736701438, iteration: 140622
loss: 0.9885575771331787,grad_norm: 1.0000000093766834, iteration: 140623
loss: 1.0111761093139648,grad_norm: 0.9999992102360473, iteration: 140624
loss: 1.0306845903396606,grad_norm: 0.999999150181504, iteration: 140625
loss: 0.9873533248901367,grad_norm: 0.9999998289380477, iteration: 140626
loss: 0.9870993494987488,grad_norm: 0.9553024532979754, iteration: 140627
loss: 1.0111101865768433,grad_norm: 0.9999998568098072, iteration: 140628
loss: 0.9811038374900818,grad_norm: 0.9999991806698761, iteration: 140629
loss: 1.04099702835083,grad_norm: 0.999999454451959, iteration: 140630
loss: 1.0218555927276611,grad_norm: 0.999999466434436, iteration: 140631
loss: 1.0033178329467773,grad_norm: 0.9999991391202734, iteration: 140632
loss: 1.0238150358200073,grad_norm: 0.9782995312642586, iteration: 140633
loss: 1.0177204608917236,grad_norm: 0.9999991258155098, iteration: 140634
loss: 0.97917240858078,grad_norm: 0.9291208236970281, iteration: 140635
loss: 1.0254074335098267,grad_norm: 0.9999991369162257, iteration: 140636
loss: 1.0264949798583984,grad_norm: 0.9999992408564243, iteration: 140637
loss: 0.9940519332885742,grad_norm: 0.8995548291952968, iteration: 140638
loss: 0.9753615856170654,grad_norm: 0.9999992172453799, iteration: 140639
loss: 0.9890639781951904,grad_norm: 0.8998497678473761, iteration: 140640
loss: 1.0183011293411255,grad_norm: 0.9999991583204407, iteration: 140641
loss: 0.9972972869873047,grad_norm: 0.9999996434211219, iteration: 140642
loss: 0.953105092048645,grad_norm: 0.8999483864952688, iteration: 140643
loss: 1.0061042308807373,grad_norm: 0.9999989592261935, iteration: 140644
loss: 1.014409065246582,grad_norm: 0.9999990560822611, iteration: 140645
loss: 1.0997633934020996,grad_norm: 0.9999997000366144, iteration: 140646
loss: 1.0136324167251587,grad_norm: 0.8957369424816658, iteration: 140647
loss: 0.9917077422142029,grad_norm: 0.9999991886509783, iteration: 140648
loss: 1.0120774507522583,grad_norm: 0.9999990830871932, iteration: 140649
loss: 0.9816281795501709,grad_norm: 0.999999081628297, iteration: 140650
loss: 1.020266056060791,grad_norm: 0.9999990680284363, iteration: 140651
loss: 1.0420719385147095,grad_norm: 0.9064966457136046, iteration: 140652
loss: 1.0064647197723389,grad_norm: 0.9999991036618746, iteration: 140653
loss: 1.0048987865447998,grad_norm: 0.9999991413046014, iteration: 140654
loss: 0.9545449018478394,grad_norm: 0.9326792607952784, iteration: 140655
loss: 0.9885442852973938,grad_norm: 0.9669373028414742, iteration: 140656
loss: 1.0064120292663574,grad_norm: 0.999999136074267, iteration: 140657
loss: 1.0385061502456665,grad_norm: 0.9593976864345028, iteration: 140658
loss: 0.9885468482971191,grad_norm: 0.9763362240845302, iteration: 140659
loss: 0.9832961559295654,grad_norm: 0.9360763068364313, iteration: 140660
loss: 0.9629282355308533,grad_norm: 0.9880906059074563, iteration: 140661
loss: 1.001971960067749,grad_norm: 0.9999990702990127, iteration: 140662
loss: 0.9917933344841003,grad_norm: 0.9857708765004555, iteration: 140663
loss: 1.010722041130066,grad_norm: 0.9999989924190259, iteration: 140664
loss: 1.0216337442398071,grad_norm: 0.9549665957888488, iteration: 140665
loss: 1.0057693719863892,grad_norm: 0.9221965085908626, iteration: 140666
loss: 0.9734413623809814,grad_norm: 0.9833129528877474, iteration: 140667
loss: 0.9866244792938232,grad_norm: 0.9999991020246265, iteration: 140668
loss: 0.9816713333129883,grad_norm: 0.9999990898813291, iteration: 140669
loss: 0.9933353066444397,grad_norm: 0.9092581705210623, iteration: 140670
loss: 0.9845820665359497,grad_norm: 0.9999989969919048, iteration: 140671
loss: 1.025297999382019,grad_norm: 0.9999991137160513, iteration: 140672
loss: 1.0916041135787964,grad_norm: 0.9999996727688218, iteration: 140673
loss: 0.9853277802467346,grad_norm: 0.9999990553028919, iteration: 140674
loss: 0.9750435948371887,grad_norm: 0.999999349587731, iteration: 140675
loss: 1.004685401916504,grad_norm: 0.9999994430168659, iteration: 140676
loss: 1.0297538042068481,grad_norm: 0.9999992363025891, iteration: 140677
loss: 1.0077530145645142,grad_norm: 0.9999991830622446, iteration: 140678
loss: 1.0031120777130127,grad_norm: 0.9269066429573959, iteration: 140679
loss: 1.0097893476486206,grad_norm: 0.9999991944142803, iteration: 140680
loss: 0.9975845217704773,grad_norm: 0.9999991855194825, iteration: 140681
loss: 0.9995753169059753,grad_norm: 0.9973708278509508, iteration: 140682
loss: 1.005596399307251,grad_norm: 0.9999990812119331, iteration: 140683
loss: 0.9732583165168762,grad_norm: 0.9976961722898047, iteration: 140684
loss: 1.0089293718338013,grad_norm: 0.9527122810449077, iteration: 140685
loss: 1.0008666515350342,grad_norm: 0.999999023374698, iteration: 140686
loss: 0.9877598285675049,grad_norm: 0.9797341353558359, iteration: 140687
loss: 1.0039489269256592,grad_norm: 0.922043314687027, iteration: 140688
loss: 1.021754264831543,grad_norm: 0.9999991768670996, iteration: 140689
loss: 0.9973359704017639,grad_norm: 0.9999991316905031, iteration: 140690
loss: 0.9978790283203125,grad_norm: 0.9970031536274975, iteration: 140691
loss: 1.0271836519241333,grad_norm: 0.9977362231432653, iteration: 140692
loss: 1.0220142602920532,grad_norm: 0.999998942758261, iteration: 140693
loss: 1.0599079132080078,grad_norm: 0.9999991073979992, iteration: 140694
loss: 0.9513382315635681,grad_norm: 0.934445849927401, iteration: 140695
loss: 1.0115288496017456,grad_norm: 0.9999992190245637, iteration: 140696
loss: 0.9972984790802002,grad_norm: 0.9608800454556415, iteration: 140697
loss: 0.9985715746879578,grad_norm: 0.9999989725879094, iteration: 140698
loss: 1.0174858570098877,grad_norm: 0.9999991513022034, iteration: 140699
loss: 0.9877912998199463,grad_norm: 0.9999991508194056, iteration: 140700
loss: 1.0000296831130981,grad_norm: 0.9686330480360829, iteration: 140701
loss: 0.9899420738220215,grad_norm: 0.9864670099804767, iteration: 140702
loss: 1.0032494068145752,grad_norm: 0.9749474874285752, iteration: 140703
loss: 1.0472685098648071,grad_norm: 0.9999992544621492, iteration: 140704
loss: 1.0198262929916382,grad_norm: 0.9999989500232539, iteration: 140705
loss: 1.0143903493881226,grad_norm: 0.9999994994493717, iteration: 140706
loss: 1.0219650268554688,grad_norm: 0.9999992388400359, iteration: 140707
loss: 1.0055561065673828,grad_norm: 0.9437507825060414, iteration: 140708
loss: 0.9950731992721558,grad_norm: 0.945639964596395, iteration: 140709
loss: 1.0052193403244019,grad_norm: 0.9134459609663501, iteration: 140710
loss: 1.001685619354248,grad_norm: 0.9663834912100728, iteration: 140711
loss: 1.0267904996871948,grad_norm: 0.9999991184302707, iteration: 140712
loss: 0.9963246583938599,grad_norm: 0.6652838663283378, iteration: 140713
loss: 1.01771080493927,grad_norm: 0.9999990478036007, iteration: 140714
loss: 0.9651497602462769,grad_norm: 0.9192189704941981, iteration: 140715
loss: 1.0020852088928223,grad_norm: 0.99999911952053, iteration: 140716
loss: 0.9803646206855774,grad_norm: 0.9448073697481729, iteration: 140717
loss: 1.0290672779083252,grad_norm: 0.9999991675993584, iteration: 140718
loss: 0.9756183624267578,grad_norm: 0.9999991606698677, iteration: 140719
loss: 1.0097880363464355,grad_norm: 0.9999990229007057, iteration: 140720
loss: 1.0079759359359741,grad_norm: 0.9346619227396242, iteration: 140721
loss: 1.0025928020477295,grad_norm: 0.947547584550116, iteration: 140722
loss: 0.9453837275505066,grad_norm: 0.9999989021010891, iteration: 140723
loss: 0.9832988977432251,grad_norm: 0.9999990827659969, iteration: 140724
loss: 1.010136604309082,grad_norm: 0.9076965231983536, iteration: 140725
loss: 1.0014575719833374,grad_norm: 0.9391689018482876, iteration: 140726
loss: 0.9915479421615601,grad_norm: 0.9378263811015887, iteration: 140727
loss: 0.9920995831489563,grad_norm: 0.9999991924540432, iteration: 140728
loss: 0.9851548671722412,grad_norm: 0.9999991519335097, iteration: 140729
loss: 0.9826260209083557,grad_norm: 0.8201129447844155, iteration: 140730
loss: 1.0158461332321167,grad_norm: 0.9985731135448787, iteration: 140731
loss: 1.0339834690093994,grad_norm: 0.9999991916924814, iteration: 140732
loss: 1.0169296264648438,grad_norm: 0.9999992036396513, iteration: 140733
loss: 0.9924367666244507,grad_norm: 0.9999991242114146, iteration: 140734
loss: 0.9811504483222961,grad_norm: 0.9999989804930541, iteration: 140735
loss: 1.0339455604553223,grad_norm: 0.9999991982645515, iteration: 140736
loss: 1.0135184526443481,grad_norm: 0.9159881658755606, iteration: 140737
loss: 1.0018432140350342,grad_norm: 0.9999994370078834, iteration: 140738
loss: 1.00348699092865,grad_norm: 0.8958843125667378, iteration: 140739
loss: 0.9807726740837097,grad_norm: 0.9999990430593002, iteration: 140740
loss: 1.0130927562713623,grad_norm: 0.9999991890110275, iteration: 140741
loss: 0.9855291843414307,grad_norm: 0.9317750157844438, iteration: 140742
loss: 1.0277843475341797,grad_norm: 0.9999988861660178, iteration: 140743
loss: 1.0051339864730835,grad_norm: 0.9999991767577212, iteration: 140744
loss: 1.037041425704956,grad_norm: 0.9999991300302918, iteration: 140745
loss: 1.0329558849334717,grad_norm: 0.9999990245379302, iteration: 140746
loss: 1.020827293395996,grad_norm: 0.9657422647291369, iteration: 140747
loss: 1.0153491497039795,grad_norm: 0.9658576927505307, iteration: 140748
loss: 1.0066126585006714,grad_norm: 0.9999992667615231, iteration: 140749
loss: 1.031137228012085,grad_norm: 0.9999993174351227, iteration: 140750
loss: 0.9779362678527832,grad_norm: 0.9999994601816079, iteration: 140751
loss: 0.9900360107421875,grad_norm: 0.9764507191479448, iteration: 140752
loss: 1.020539402961731,grad_norm: 0.9999990349735898, iteration: 140753
loss: 1.0191186666488647,grad_norm: 0.9448315107200588, iteration: 140754
loss: 0.9826288223266602,grad_norm: 0.8560500896876355, iteration: 140755
loss: 1.0343787670135498,grad_norm: 0.9999997069973556, iteration: 140756
loss: 1.002751111984253,grad_norm: 0.9999989929365054, iteration: 140757
loss: 1.0414358377456665,grad_norm: 0.9999993487497659, iteration: 140758
loss: 1.0225493907928467,grad_norm: 0.9999991333163385, iteration: 140759
loss: 1.0051769018173218,grad_norm: 0.9294069676043051, iteration: 140760
loss: 0.9945693016052246,grad_norm: 0.8885189066168631, iteration: 140761
loss: 0.9940931797027588,grad_norm: 0.9999990922776092, iteration: 140762
loss: 1.0279855728149414,grad_norm: 0.999999175050778, iteration: 140763
loss: 0.993492066860199,grad_norm: 0.9742369166917499, iteration: 140764
loss: 0.9852070212364197,grad_norm: 0.9999989655883014, iteration: 140765
loss: 0.9937620162963867,grad_norm: 0.9999991476865221, iteration: 140766
loss: 0.9915788173675537,grad_norm: 0.9999990451195211, iteration: 140767
loss: 0.9879862666130066,grad_norm: 0.9137387751014049, iteration: 140768
loss: 0.9914860725402832,grad_norm: 0.999999208028414, iteration: 140769
loss: 0.9876519441604614,grad_norm: 0.9999991400254005, iteration: 140770
loss: 1.0178831815719604,grad_norm: 0.9999998005064754, iteration: 140771
loss: 0.9952952265739441,grad_norm: 0.9575244323979942, iteration: 140772
loss: 0.9683026075363159,grad_norm: 0.8870448293724489, iteration: 140773
loss: 0.9577077627182007,grad_norm: 0.9559966984651594, iteration: 140774
loss: 1.0100064277648926,grad_norm: 0.9999991697099537, iteration: 140775
loss: 1.0018494129180908,grad_norm: 0.999999146519079, iteration: 140776
loss: 0.9727426767349243,grad_norm: 0.9600607497203092, iteration: 140777
loss: 1.0412424802780151,grad_norm: 0.8882017665668502, iteration: 140778
loss: 1.0121418237686157,grad_norm: 0.9275431979400471, iteration: 140779
loss: 0.9821543097496033,grad_norm: 0.9276952390303911, iteration: 140780
loss: 0.9872557520866394,grad_norm: 0.9999989524649121, iteration: 140781
loss: 1.0150753259658813,grad_norm: 0.9999989994697452, iteration: 140782
loss: 1.0092077255249023,grad_norm: 0.9664797458271346, iteration: 140783
loss: 1.0152689218521118,grad_norm: 1.0000000287068942, iteration: 140784
loss: 1.045369267463684,grad_norm: 0.8888969604539878, iteration: 140785
loss: 1.0582019090652466,grad_norm: 0.9363423432250216, iteration: 140786
loss: 1.0167149305343628,grad_norm: 0.9296803416806991, iteration: 140787
loss: 0.9854699373245239,grad_norm: 0.9999992089341948, iteration: 140788
loss: 1.0227566957473755,grad_norm: 0.9999990532980509, iteration: 140789
loss: 1.0021559000015259,grad_norm: 0.9097987438857486, iteration: 140790
loss: 1.0438820123672485,grad_norm: 0.9578620131739568, iteration: 140791
loss: 1.0005744695663452,grad_norm: 0.9999991559687808, iteration: 140792
loss: 0.9999423027038574,grad_norm: 0.9999992343605346, iteration: 140793
loss: 1.0062499046325684,grad_norm: 0.999999538006661, iteration: 140794
loss: 1.0022461414337158,grad_norm: 0.9976939088505409, iteration: 140795
loss: 0.9944339394569397,grad_norm: 0.9369798545448903, iteration: 140796
loss: 1.0067793130874634,grad_norm: 0.990340764896403, iteration: 140797
loss: 1.0191035270690918,grad_norm: 0.999999186814426, iteration: 140798
loss: 0.9992162585258484,grad_norm: 0.9999990861743923, iteration: 140799
loss: 0.9516528844833374,grad_norm: 0.9999991282968848, iteration: 140800
loss: 1.0120433568954468,grad_norm: 0.8956185030950713, iteration: 140801
loss: 1.0091155767440796,grad_norm: 0.8624493730245814, iteration: 140802
loss: 1.0297869443893433,grad_norm: 0.9999989840146845, iteration: 140803
loss: 0.9851199388504028,grad_norm: 0.8407337816904051, iteration: 140804
loss: 1.000948190689087,grad_norm: 0.999999049746114, iteration: 140805
loss: 1.0227231979370117,grad_norm: 0.9965979805436058, iteration: 140806
loss: 1.0006661415100098,grad_norm: 0.999999816493485, iteration: 140807
loss: 1.0009655952453613,grad_norm: 0.9999991206077794, iteration: 140808
loss: 0.9851784110069275,grad_norm: 0.9999991211116473, iteration: 140809
loss: 1.0024806261062622,grad_norm: 0.9999991943847162, iteration: 140810
loss: 0.9991025924682617,grad_norm: 0.9050252721918894, iteration: 140811
loss: 1.018678069114685,grad_norm: 0.9999990321006679, iteration: 140812
loss: 0.9920722246170044,grad_norm: 0.9999991163778365, iteration: 140813
loss: 0.975409209728241,grad_norm: 0.9999990477559929, iteration: 140814
loss: 0.9907610416412354,grad_norm: 0.9999990252866537, iteration: 140815
loss: 0.9904729723930359,grad_norm: 0.8802716418924328, iteration: 140816
loss: 0.9963511228561401,grad_norm: 0.9570674580720343, iteration: 140817
loss: 1.0380911827087402,grad_norm: 0.99999935204874, iteration: 140818
loss: 0.9680458903312683,grad_norm: 0.9999991127056553, iteration: 140819
loss: 1.039618730545044,grad_norm: 0.7935918471620166, iteration: 140820
loss: 1.0255471467971802,grad_norm: 0.9999990845099793, iteration: 140821
loss: 1.01260244846344,grad_norm: 0.9091953168493871, iteration: 140822
loss: 0.9888012409210205,grad_norm: 0.880473379242035, iteration: 140823
loss: 0.989140510559082,grad_norm: 0.9999991732326926, iteration: 140824
loss: 1.003751277923584,grad_norm: 0.9999991944322534, iteration: 140825
loss: 0.9944944977760315,grad_norm: 0.8515960739176827, iteration: 140826
loss: 0.9907504916191101,grad_norm: 0.9999992163597373, iteration: 140827
loss: 1.01689875125885,grad_norm: 0.9999990122712374, iteration: 140828
loss: 1.0156548023223877,grad_norm: 0.9999990547982195, iteration: 140829
loss: 1.0228701829910278,grad_norm: 0.9999989409411212, iteration: 140830
loss: 1.031033992767334,grad_norm: 0.9999991657896372, iteration: 140831
loss: 0.977956235408783,grad_norm: 0.9999992334951245, iteration: 140832
loss: 0.9936423897743225,grad_norm: 0.999999060965478, iteration: 140833
loss: 0.9637172818183899,grad_norm: 0.8660741159447437, iteration: 140834
loss: 0.9794559478759766,grad_norm: 0.9112774791007927, iteration: 140835
loss: 0.9941108226776123,grad_norm: 0.9725988595795994, iteration: 140836
loss: 1.002327561378479,grad_norm: 0.9999991794887081, iteration: 140837
loss: 1.039046049118042,grad_norm: 0.9146926619987757, iteration: 140838
loss: 0.9915193915367126,grad_norm: 0.9877457262373385, iteration: 140839
loss: 1.0089410543441772,grad_norm: 0.999998999951397, iteration: 140840
loss: 0.962710976600647,grad_norm: 0.9999989740299149, iteration: 140841
loss: 0.9509994387626648,grad_norm: 0.935166602247691, iteration: 140842
loss: 1.0080196857452393,grad_norm: 0.9563327571075863, iteration: 140843
loss: 0.9952663779258728,grad_norm: 0.9999989434249571, iteration: 140844
loss: 1.0126211643218994,grad_norm: 0.9351223742225456, iteration: 140845
loss: 0.9791409373283386,grad_norm: 0.8937553789061724, iteration: 140846
loss: 0.9713851809501648,grad_norm: 0.9678824077335872, iteration: 140847
loss: 1.0069587230682373,grad_norm: 0.9999991791699144, iteration: 140848
loss: 1.0001866817474365,grad_norm: 0.9999991294625475, iteration: 140849
loss: 1.0169836282730103,grad_norm: 0.9999991419009315, iteration: 140850
loss: 1.0220446586608887,grad_norm: 0.9999991964729025, iteration: 140851
loss: 1.0067391395568848,grad_norm: 0.9088526140937128, iteration: 140852
loss: 0.9883358478546143,grad_norm: 0.9352576387524472, iteration: 140853
loss: 0.9998713731765747,grad_norm: 0.993354025039703, iteration: 140854
loss: 0.9936490058898926,grad_norm: 0.9999991107373971, iteration: 140855
loss: 0.9901156425476074,grad_norm: 0.9999992064976899, iteration: 140856
loss: 1.02574622631073,grad_norm: 0.9999990674393282, iteration: 140857
loss: 1.0355440378189087,grad_norm: 0.9999991799307301, iteration: 140858
loss: 0.9625480771064758,grad_norm: 0.9999991193741007, iteration: 140859
loss: 0.9958363771438599,grad_norm: 0.820044206607795, iteration: 140860
loss: 0.9819840788841248,grad_norm: 0.9254539222535402, iteration: 140861
loss: 0.9764606356620789,grad_norm: 0.9999991223483214, iteration: 140862
loss: 1.0451973676681519,grad_norm: 0.9948810625505925, iteration: 140863
loss: 0.9919384121894836,grad_norm: 0.999999306715502, iteration: 140864
loss: 1.006841778755188,grad_norm: 0.8562857695747528, iteration: 140865
loss: 0.9501767754554749,grad_norm: 0.999999151921975, iteration: 140866
loss: 0.9938404560089111,grad_norm: 0.8742608629542705, iteration: 140867
loss: 1.0531903505325317,grad_norm: 0.9999998235196526, iteration: 140868
loss: 1.0019952058792114,grad_norm: 0.999999150275862, iteration: 140869
loss: 1.0060302019119263,grad_norm: 0.8045264773105645, iteration: 140870
loss: 0.9983493089675903,grad_norm: 0.999999076856017, iteration: 140871
loss: 1.0143859386444092,grad_norm: 0.9999991141110551, iteration: 140872
loss: 0.9682705998420715,grad_norm: 0.9179504449335957, iteration: 140873
loss: 0.9638766050338745,grad_norm: 0.9999995356284878, iteration: 140874
loss: 0.9761813282966614,grad_norm: 0.9999990256626606, iteration: 140875
loss: 1.0328123569488525,grad_norm: 0.9999991886388129, iteration: 140876
loss: 1.0044997930526733,grad_norm: 0.9999991465870736, iteration: 140877
loss: 0.9930866956710815,grad_norm: 0.9999990981127341, iteration: 140878
loss: 0.972308337688446,grad_norm: 0.9681684998837135, iteration: 140879
loss: 0.9426281452178955,grad_norm: 0.9999991607453466, iteration: 140880
loss: 0.9952736496925354,grad_norm: 0.999999201139381, iteration: 140881
loss: 0.9823408126831055,grad_norm: 0.8930433558290218, iteration: 140882
loss: 0.9873906970024109,grad_norm: 0.9999993570096664, iteration: 140883
loss: 0.9668227434158325,grad_norm: 0.9629960941110347, iteration: 140884
loss: 1.0274724960327148,grad_norm: 0.9030225896308091, iteration: 140885
loss: 0.996280312538147,grad_norm: 0.9904678229427847, iteration: 140886
loss: 1.0254513025283813,grad_norm: 0.9487237071122756, iteration: 140887
loss: 1.0288221836090088,grad_norm: 0.9999991097361403, iteration: 140888
loss: 0.9826801419258118,grad_norm: 0.9945231351165632, iteration: 140889
loss: 1.027276635169983,grad_norm: 0.9999991189994686, iteration: 140890
loss: 1.0196176767349243,grad_norm: 0.9768650903767584, iteration: 140891
loss: 1.0190969705581665,grad_norm: 0.9999993353647024, iteration: 140892
loss: 1.0243234634399414,grad_norm: 0.9999991754176119, iteration: 140893
loss: 0.9720029234886169,grad_norm: 0.9999989893962826, iteration: 140894
loss: 1.002203345298767,grad_norm: 0.9999992241732032, iteration: 140895
loss: 0.9808165431022644,grad_norm: 0.9144220876858861, iteration: 140896
loss: 1.020424485206604,grad_norm: 0.8432618822814169, iteration: 140897
loss: 0.9791197180747986,grad_norm: 0.920043687125029, iteration: 140898
loss: 0.9521331787109375,grad_norm: 0.9999990778812227, iteration: 140899
loss: 1.012297511100769,grad_norm: 0.9999991530064061, iteration: 140900
loss: 0.9790672063827515,grad_norm: 0.9999991053917996, iteration: 140901
loss: 1.0113688707351685,grad_norm: 0.9999992727696689, iteration: 140902
loss: 0.9809186458587646,grad_norm: 0.9999991556868589, iteration: 140903
loss: 1.0339103937149048,grad_norm: 0.9999992908518107, iteration: 140904
loss: 0.9946072101593018,grad_norm: 0.9446319635142616, iteration: 140905
loss: 1.0215590000152588,grad_norm: 0.9999991851215505, iteration: 140906
loss: 0.9499108791351318,grad_norm: 0.9427643071362649, iteration: 140907
loss: 1.0024904012680054,grad_norm: 0.9635584168185886, iteration: 140908
loss: 1.0102146863937378,grad_norm: 0.9999991419038771, iteration: 140909
loss: 1.0213693380355835,grad_norm: 0.8635703895085513, iteration: 140910
loss: 1.0100833177566528,grad_norm: 0.9999991690900986, iteration: 140911
loss: 1.0055959224700928,grad_norm: 0.9999988683505334, iteration: 140912
loss: 0.9942224025726318,grad_norm: 0.9999991894123513, iteration: 140913
loss: 0.9895628094673157,grad_norm: 0.9830553588215252, iteration: 140914
loss: 1.00672447681427,grad_norm: 0.912107851277162, iteration: 140915
loss: 1.023403525352478,grad_norm: 0.9999992370777231, iteration: 140916
loss: 1.006954550743103,grad_norm: 0.9999992524703666, iteration: 140917
loss: 0.9859733581542969,grad_norm: 0.9999991138386181, iteration: 140918
loss: 0.9607425928115845,grad_norm: 0.9999991617820112, iteration: 140919
loss: 0.9710084795951843,grad_norm: 0.9999991327079216, iteration: 140920
loss: 1.0146384239196777,grad_norm: 0.999999203151986, iteration: 140921
loss: 0.9857568740844727,grad_norm: 0.957860282799471, iteration: 140922
loss: 1.0227491855621338,grad_norm: 0.9798394957873373, iteration: 140923
loss: 1.005472183227539,grad_norm: 0.8800975009689069, iteration: 140924
loss: 1.0082387924194336,grad_norm: 0.9999992713753567, iteration: 140925
loss: 0.9872930645942688,grad_norm: 0.959474946146419, iteration: 140926
loss: 1.047134280204773,grad_norm: 0.9999990818216752, iteration: 140927
loss: 0.9992277026176453,grad_norm: 0.8963561408803, iteration: 140928
loss: 0.9682943820953369,grad_norm: 0.9999990589504695, iteration: 140929
loss: 0.9610679745674133,grad_norm: 0.9999990061615045, iteration: 140930
loss: 0.9942705035209656,grad_norm: 0.9999994963560794, iteration: 140931
loss: 1.01093327999115,grad_norm: 0.9848232646010184, iteration: 140932
loss: 0.9924721717834473,grad_norm: 0.9999988945702701, iteration: 140933
loss: 1.0355961322784424,grad_norm: 0.9999990203617707, iteration: 140934
loss: 0.9768019318580627,grad_norm: 0.8792639930423134, iteration: 140935
loss: 1.0383036136627197,grad_norm: 0.965386936419764, iteration: 140936
loss: 1.0357208251953125,grad_norm: 0.9999992076848357, iteration: 140937
loss: 0.9762406945228577,grad_norm: 0.9648384026809221, iteration: 140938
loss: 1.0078871250152588,grad_norm: 0.8876047437868025, iteration: 140939
loss: 1.0012105703353882,grad_norm: 0.9999991118528171, iteration: 140940
loss: 1.0329393148422241,grad_norm: 0.9001772140299028, iteration: 140941
loss: 1.033730387687683,grad_norm: 0.9999992757048645, iteration: 140942
loss: 1.0301897525787354,grad_norm: 0.9999991297751911, iteration: 140943
loss: 0.9839571118354797,grad_norm: 0.8263483105706005, iteration: 140944
loss: 1.0090748071670532,grad_norm: 0.9999992200256291, iteration: 140945
loss: 1.0633413791656494,grad_norm: 0.9999993458029417, iteration: 140946
loss: 1.0267893075942993,grad_norm: 0.9999991109915191, iteration: 140947
loss: 0.9677553176879883,grad_norm: 0.9999990383018856, iteration: 140948
loss: 1.0278881788253784,grad_norm: 0.9567980534384333, iteration: 140949
loss: 1.0092257261276245,grad_norm: 0.9999990745512555, iteration: 140950
loss: 1.0079729557037354,grad_norm: 0.9999990854968701, iteration: 140951
loss: 0.9804969429969788,grad_norm: 0.9999990265410114, iteration: 140952
loss: 0.9976617693901062,grad_norm: 0.9999990971101921, iteration: 140953
loss: 0.9896870255470276,grad_norm: 0.9735011375513495, iteration: 140954
loss: 0.9983489513397217,grad_norm: 0.9999991603513921, iteration: 140955
loss: 0.991274893283844,grad_norm: 0.9999991408701269, iteration: 140956
loss: 0.9923781156539917,grad_norm: 0.9999991337377259, iteration: 140957
loss: 1.0467076301574707,grad_norm: 0.9999993883773812, iteration: 140958
loss: 0.9909051060676575,grad_norm: 0.9677380815518097, iteration: 140959
loss: 1.0097345113754272,grad_norm: 0.9510281462635279, iteration: 140960
loss: 0.9889124035835266,grad_norm: 0.9488198795973753, iteration: 140961
loss: 1.0105667114257812,grad_norm: 0.9999991184682181, iteration: 140962
loss: 1.0268899202346802,grad_norm: 0.9999990732447205, iteration: 140963
loss: 1.0062776803970337,grad_norm: 0.9999992481933752, iteration: 140964
loss: 0.9998365640640259,grad_norm: 0.9999992131735944, iteration: 140965
loss: 1.0382004976272583,grad_norm: 0.9999991140947986, iteration: 140966
loss: 0.9610612392425537,grad_norm: 0.9712758714222383, iteration: 140967
loss: 0.9860696196556091,grad_norm: 0.8632968097373195, iteration: 140968
loss: 1.0045627355575562,grad_norm: 0.9944565067279441, iteration: 140969
loss: 0.9947654604911804,grad_norm: 0.9539270977615301, iteration: 140970
loss: 1.0611110925674438,grad_norm: 0.9630480322213167, iteration: 140971
loss: 1.0114741325378418,grad_norm: 0.9999998085275222, iteration: 140972
loss: 1.0108555555343628,grad_norm: 0.9837004738817225, iteration: 140973
loss: 0.9837284088134766,grad_norm: 0.9999991370161534, iteration: 140974
loss: 0.9973477125167847,grad_norm: 0.999999077368643, iteration: 140975
loss: 0.9838391542434692,grad_norm: 0.999999183751299, iteration: 140976
loss: 0.9764578938484192,grad_norm: 0.9999991906966663, iteration: 140977
loss: 1.01509690284729,grad_norm: 0.9999988911879749, iteration: 140978
loss: 1.0133916139602661,grad_norm: 0.9999992275764212, iteration: 140979
loss: 1.0118638277053833,grad_norm: 0.9999991408258497, iteration: 140980
loss: 1.0335084199905396,grad_norm: 0.9641992103285584, iteration: 140981
loss: 0.9913233518600464,grad_norm: 0.9999990511335766, iteration: 140982
loss: 0.9977750778198242,grad_norm: 0.9999991499082812, iteration: 140983
loss: 0.9912837147712708,grad_norm: 0.8879713731353405, iteration: 140984
loss: 0.9689595699310303,grad_norm: 0.9999991700650841, iteration: 140985
loss: 0.9960938096046448,grad_norm: 0.9999990930356362, iteration: 140986
loss: 1.001212477684021,grad_norm: 0.99999922379158, iteration: 140987
loss: 1.008510708808899,grad_norm: 0.9999990686999471, iteration: 140988
loss: 0.9893839955329895,grad_norm: 0.9999990170857682, iteration: 140989
loss: 1.0388778448104858,grad_norm: 0.9999990883273662, iteration: 140990
loss: 1.0083812475204468,grad_norm: 0.9999989775876679, iteration: 140991
loss: 1.0120512247085571,grad_norm: 0.9999990558627377, iteration: 140992
loss: 1.0353606939315796,grad_norm: 0.9999992024527005, iteration: 140993
loss: 0.9678557515144348,grad_norm: 0.9901251835607637, iteration: 140994
loss: 0.9984806180000305,grad_norm: 0.9965634155748033, iteration: 140995
loss: 1.021493673324585,grad_norm: 0.9723067531524955, iteration: 140996
loss: 0.9860234260559082,grad_norm: 0.8527140587064236, iteration: 140997
loss: 0.9629567861557007,grad_norm: 0.9348277570346522, iteration: 140998
loss: 0.982012927532196,grad_norm: 0.9051841446833555, iteration: 140999
loss: 1.2278977632522583,grad_norm: 0.9999991255238402, iteration: 141000
loss: 0.9820104837417603,grad_norm: 0.9999991322620517, iteration: 141001
loss: 1.0222997665405273,grad_norm: 0.8555919331674696, iteration: 141002
loss: 1.015405535697937,grad_norm: 0.9999991370367202, iteration: 141003
loss: 0.993204653263092,grad_norm: 0.9379591297465585, iteration: 141004
loss: 1.0172488689422607,grad_norm: 0.933296001958531, iteration: 141005
loss: 0.9883766770362854,grad_norm: 0.9999991282593891, iteration: 141006
loss: 0.9855719208717346,grad_norm: 0.9999989437552681, iteration: 141007
loss: 1.0179370641708374,grad_norm: 0.9244281466403261, iteration: 141008
loss: 1.0133748054504395,grad_norm: 0.9999992564187689, iteration: 141009
loss: 1.0133308172225952,grad_norm: 0.8792506259427082, iteration: 141010
loss: 1.0236788988113403,grad_norm: 0.999998965235695, iteration: 141011
loss: 0.9466125965118408,grad_norm: 0.9999991647405597, iteration: 141012
loss: 1.004762887954712,grad_norm: 0.999179005990796, iteration: 141013
loss: 1.0178383588790894,grad_norm: 0.9999991477649028, iteration: 141014
loss: 1.0010986328125,grad_norm: 0.926403059689442, iteration: 141015
loss: 1.041281819343567,grad_norm: 0.9999990865352757, iteration: 141016
loss: 0.9842224717140198,grad_norm: 0.8582662243923512, iteration: 141017
loss: 0.9863192439079285,grad_norm: 0.9840602823295416, iteration: 141018
loss: 1.0127326250076294,grad_norm: 0.9999993037207381, iteration: 141019
loss: 1.008042335510254,grad_norm: 0.8857679660703115, iteration: 141020
loss: 0.9691027402877808,grad_norm: 0.9999990890617706, iteration: 141021
loss: 1.0104745626449585,grad_norm: 0.9999992289412464, iteration: 141022
loss: 1.0088857412338257,grad_norm: 0.9999992007794396, iteration: 141023
loss: 1.0418915748596191,grad_norm: 0.9999992561727793, iteration: 141024
loss: 1.0082045793533325,grad_norm: 0.9999989968045633, iteration: 141025
loss: 1.0070854425430298,grad_norm: 0.989346559294405, iteration: 141026
loss: 0.9888681769371033,grad_norm: 0.9999990526906793, iteration: 141027
loss: 1.0052931308746338,grad_norm: 0.9144936313615306, iteration: 141028
loss: 1.0061370134353638,grad_norm: 0.9652593886306298, iteration: 141029
loss: 1.017729640007019,grad_norm: 0.9999993492741365, iteration: 141030
loss: 0.9800636768341064,grad_norm: 0.8286342348217834, iteration: 141031
loss: 1.0125055313110352,grad_norm: 0.9999991267823721, iteration: 141032
loss: 0.9985674023628235,grad_norm: 0.9999989572377589, iteration: 141033
loss: 0.9775895476341248,grad_norm: 0.999999108075122, iteration: 141034
loss: 1.0328327417373657,grad_norm: 0.9999989997473988, iteration: 141035
loss: 0.9789115786552429,grad_norm: 0.9619023385949353, iteration: 141036
loss: 1.0127054452896118,grad_norm: 0.9999991660431419, iteration: 141037
loss: 1.0454661846160889,grad_norm: 0.944600750222161, iteration: 141038
loss: 0.9805143475532532,grad_norm: 0.999999065104945, iteration: 141039
loss: 1.0082366466522217,grad_norm: 0.9198291490430177, iteration: 141040
loss: 0.9900485277175903,grad_norm: 0.9178841126676966, iteration: 141041
loss: 0.9819929003715515,grad_norm: 0.9999991101374315, iteration: 141042
loss: 0.9829701781272888,grad_norm: 0.9813165047818116, iteration: 141043
loss: 0.9769720435142517,grad_norm: 0.9108385607835374, iteration: 141044
loss: 0.9723643660545349,grad_norm: 0.9558091972283744, iteration: 141045
loss: 0.9629369974136353,grad_norm: 0.8071054400098149, iteration: 141046
loss: 1.0149744749069214,grad_norm: 0.9999990973090852, iteration: 141047
loss: 1.0201514959335327,grad_norm: 0.9999992397867823, iteration: 141048
loss: 1.011567234992981,grad_norm: 0.9317072562121461, iteration: 141049
loss: 0.9993108510971069,grad_norm: 0.999999002433867, iteration: 141050
loss: 0.9944302439689636,grad_norm: 0.9999988726383998, iteration: 141051
loss: 0.9658447504043579,grad_norm: 0.9447942249441397, iteration: 141052
loss: 1.0523453950881958,grad_norm: 0.9738458673653885, iteration: 141053
loss: 1.007898211479187,grad_norm: 0.9999991863779992, iteration: 141054
loss: 1.0158534049987793,grad_norm: 0.9999990668265261, iteration: 141055
loss: 0.9931459426879883,grad_norm: 0.9999989605408557, iteration: 141056
loss: 0.9809309244155884,grad_norm: 0.8698701585974242, iteration: 141057
loss: 0.9811779856681824,grad_norm: 0.9999992588114344, iteration: 141058
loss: 1.0235587358474731,grad_norm: 0.9999990629878068, iteration: 141059
loss: 1.0133490562438965,grad_norm: 0.9999991605273856, iteration: 141060
loss: 0.9626563191413879,grad_norm: 0.9999991903996766, iteration: 141061
loss: 1.0259594917297363,grad_norm: 0.9999991277949122, iteration: 141062
loss: 1.004169225692749,grad_norm: 0.9827233044977954, iteration: 141063
loss: 0.9904156923294067,grad_norm: 0.9999992648199607, iteration: 141064
loss: 0.9547989368438721,grad_norm: 0.8786117066102267, iteration: 141065
loss: 1.0212589502334595,grad_norm: 0.9146121040541109, iteration: 141066
loss: 1.0099084377288818,grad_norm: 0.9361900152344727, iteration: 141067
loss: 1.005656123161316,grad_norm: 0.9999992919065899, iteration: 141068
loss: 1.070036768913269,grad_norm: 0.9999994248238049, iteration: 141069
loss: 0.9987087249755859,grad_norm: 0.9999990869947881, iteration: 141070
loss: 1.002733588218689,grad_norm: 0.9999991684330696, iteration: 141071
loss: 0.9943203330039978,grad_norm: 0.9767501243799295, iteration: 141072
loss: 0.9703519940376282,grad_norm: 0.9999992000826445, iteration: 141073
loss: 0.9949505925178528,grad_norm: 0.9967989387650704, iteration: 141074
loss: 1.044027328491211,grad_norm: 0.999999685690453, iteration: 141075
loss: 1.0241215229034424,grad_norm: 0.9664160474094959, iteration: 141076
loss: 0.9919412732124329,grad_norm: 0.9907679400245364, iteration: 141077
loss: 0.9869962930679321,grad_norm: 0.9999989757365783, iteration: 141078
loss: 1.0045093297958374,grad_norm: 0.9999992618301797, iteration: 141079
loss: 0.9783923625946045,grad_norm: 0.9999989470474574, iteration: 141080
loss: 0.9828345775604248,grad_norm: 0.9999992355208966, iteration: 141081
loss: 1.0294740200042725,grad_norm: 0.999999152060477, iteration: 141082
loss: 0.9791740775108337,grad_norm: 0.9999990370363593, iteration: 141083
loss: 0.9730826020240784,grad_norm: 0.9999991127045449, iteration: 141084
loss: 1.0319814682006836,grad_norm: 0.9999992154495049, iteration: 141085
loss: 0.9627105593681335,grad_norm: 0.9999990015962272, iteration: 141086
loss: 1.0254679918289185,grad_norm: 0.999999117072719, iteration: 141087
loss: 0.9795560836791992,grad_norm: 0.9524279316161821, iteration: 141088
loss: 1.039257526397705,grad_norm: 0.8815340889626214, iteration: 141089
loss: 1.0196956396102905,grad_norm: 0.8877396233240213, iteration: 141090
loss: 1.0382438898086548,grad_norm: 0.9999992149927392, iteration: 141091
loss: 0.9851603507995605,grad_norm: 0.9162501288963631, iteration: 141092
loss: 0.9925425052642822,grad_norm: 0.9702966252129259, iteration: 141093
loss: 1.0143592357635498,grad_norm: 0.948325369401881, iteration: 141094
loss: 0.9875450134277344,grad_norm: 0.9999990951647165, iteration: 141095
loss: 1.0007394552230835,grad_norm: 0.8951455909016263, iteration: 141096
loss: 1.0181405544281006,grad_norm: 0.9999992773512356, iteration: 141097
loss: 1.0185191631317139,grad_norm: 0.966873893752626, iteration: 141098
loss: 1.0424894094467163,grad_norm: 0.9999992301043077, iteration: 141099
loss: 0.9811850190162659,grad_norm: 0.9999991104192936, iteration: 141100
loss: 0.9922371506690979,grad_norm: 0.9868497092008437, iteration: 141101
loss: 1.0026912689208984,grad_norm: 0.9445559053584984, iteration: 141102
loss: 1.0379079580307007,grad_norm: 0.9999990649518461, iteration: 141103
loss: 0.9816723465919495,grad_norm: 0.9550230878698335, iteration: 141104
loss: 0.99029541015625,grad_norm: 0.9999990578603759, iteration: 141105
loss: 0.9603765606880188,grad_norm: 0.9345280436817694, iteration: 141106
loss: 1.0006382465362549,grad_norm: 0.9999993406704106, iteration: 141107
loss: 1.0047045946121216,grad_norm: 0.7982829217129352, iteration: 141108
loss: 1.0045608282089233,grad_norm: 0.9414523486878584, iteration: 141109
loss: 1.0082385540008545,grad_norm: 0.9623442505665334, iteration: 141110
loss: 1.040946364402771,grad_norm: 0.9737284271050718, iteration: 141111
loss: 1.009811520576477,grad_norm: 0.9999990291248986, iteration: 141112
loss: 1.0236071348190308,grad_norm: 0.9999990355393297, iteration: 141113
loss: 1.0139621496200562,grad_norm: 0.9999992182963894, iteration: 141114
loss: 0.9923940896987915,grad_norm: 0.9473227212324863, iteration: 141115
loss: 0.9654934406280518,grad_norm: 0.9999990263746815, iteration: 141116
loss: 0.9839314818382263,grad_norm: 0.9017598168546598, iteration: 141117
loss: 0.9618660807609558,grad_norm: 0.9462700004027753, iteration: 141118
loss: 1.0393555164337158,grad_norm: 0.9140092398840225, iteration: 141119
loss: 1.0036119222640991,grad_norm: 0.8543462752268374, iteration: 141120
loss: 0.9727636575698853,grad_norm: 0.9045183483776034, iteration: 141121
loss: 1.014962911605835,grad_norm: 0.9999990659379336, iteration: 141122
loss: 1.0173975229263306,grad_norm: 0.9961381725243583, iteration: 141123
loss: 0.9814668297767639,grad_norm: 0.9999991030038692, iteration: 141124
loss: 0.992393434047699,grad_norm: 0.890306143975935, iteration: 141125
loss: 1.0309877395629883,grad_norm: 0.8731061980108681, iteration: 141126
loss: 0.990695595741272,grad_norm: 0.9664942230138981, iteration: 141127
loss: 1.0186597108840942,grad_norm: 0.9999991864812378, iteration: 141128
loss: 1.0120404958724976,grad_norm: 0.9999992695354768, iteration: 141129
loss: 1.0458441972732544,grad_norm: 0.9736878020605944, iteration: 141130
loss: 1.017789602279663,grad_norm: 0.999999164145611, iteration: 141131
loss: 0.9916803240776062,grad_norm: 0.9999994769621348, iteration: 141132
loss: 0.9699162244796753,grad_norm: 0.9999989657794391, iteration: 141133
loss: 0.9822484254837036,grad_norm: 0.9997230045412568, iteration: 141134
loss: 1.0057026147842407,grad_norm: 0.9913798633261289, iteration: 141135
loss: 1.013989806175232,grad_norm: 0.836781555802426, iteration: 141136
loss: 1.012937068939209,grad_norm: 0.908524410534077, iteration: 141137
loss: 1.0148358345031738,grad_norm: 0.8469213864283194, iteration: 141138
loss: 0.9992527961730957,grad_norm: 0.9999992135489117, iteration: 141139
loss: 1.0407787561416626,grad_norm: 0.9518087140956106, iteration: 141140
loss: 0.993522047996521,grad_norm: 0.9999989579191273, iteration: 141141
loss: 1.001327395439148,grad_norm: 0.9222406874818133, iteration: 141142
loss: 1.0207409858703613,grad_norm: 0.9316812174126657, iteration: 141143
loss: 0.9872797727584839,grad_norm: 0.9468218061801706, iteration: 141144
loss: 1.030894160270691,grad_norm: 0.9287721250769766, iteration: 141145
loss: 1.0176153182983398,grad_norm: 0.9035641319678899, iteration: 141146
loss: 1.0316509008407593,grad_norm: 0.9999989639043841, iteration: 141147
loss: 1.0351970195770264,grad_norm: 0.9479451425050648, iteration: 141148
loss: 0.9955971837043762,grad_norm: 0.999999284794034, iteration: 141149
loss: 0.9550200700759888,grad_norm: 0.9999991270231167, iteration: 141150
loss: 0.9783096313476562,grad_norm: 0.9999991787921096, iteration: 141151
loss: 0.9938993453979492,grad_norm: 0.7734354190021117, iteration: 141152
loss: 0.9734461307525635,grad_norm: 0.8798180379450612, iteration: 141153
loss: 0.9997386336326599,grad_norm: 0.9999992319890514, iteration: 141154
loss: 1.012473464012146,grad_norm: 0.8786584727133908, iteration: 141155
loss: 0.9786930084228516,grad_norm: 0.9999991534730288, iteration: 141156
loss: 0.9978638887405396,grad_norm: 0.963199643773403, iteration: 141157
loss: 0.9698846936225891,grad_norm: 0.9999992285762338, iteration: 141158
loss: 0.9514385461807251,grad_norm: 0.9999991416595478, iteration: 141159
loss: 1.0069000720977783,grad_norm: 0.9999990556138056, iteration: 141160
loss: 1.0349600315093994,grad_norm: 0.9999991908696412, iteration: 141161
loss: 0.9885152578353882,grad_norm: 0.935030042835479, iteration: 141162
loss: 1.0246607065200806,grad_norm: 0.9999991550693886, iteration: 141163
loss: 0.9906219244003296,grad_norm: 0.9999991239818062, iteration: 141164
loss: 1.0026261806488037,grad_norm: 0.9056566423196962, iteration: 141165
loss: 0.9859079122543335,grad_norm: 0.999999292145599, iteration: 141166
loss: 1.0344878435134888,grad_norm: 0.9154697818200217, iteration: 141167
loss: 1.0027799606323242,grad_norm: 0.9999996096953895, iteration: 141168
loss: 1.027582049369812,grad_norm: 0.9999991457192663, iteration: 141169
loss: 0.995826244354248,grad_norm: 0.999999058762373, iteration: 141170
loss: 1.0245682001113892,grad_norm: 0.9252821009752468, iteration: 141171
loss: 1.0146887302398682,grad_norm: 0.9298362108262184, iteration: 141172
loss: 0.9613321423530579,grad_norm: 0.9999990503016944, iteration: 141173
loss: 0.9919893145561218,grad_norm: 0.9999990229201348, iteration: 141174
loss: 0.9920545816421509,grad_norm: 0.9999992440086473, iteration: 141175
loss: 0.9839879274368286,grad_norm: 0.9999990545496047, iteration: 141176
loss: 0.96761155128479,grad_norm: 0.8225478830115323, iteration: 141177
loss: 1.0020737648010254,grad_norm: 0.999999169915494, iteration: 141178
loss: 0.9941777586936951,grad_norm: 0.9999989741580854, iteration: 141179
loss: 0.946697473526001,grad_norm: 0.999999130403381, iteration: 141180
loss: 0.9785342216491699,grad_norm: 0.9945284171400363, iteration: 141181
loss: 0.9880790710449219,grad_norm: 0.9335019215924543, iteration: 141182
loss: 0.986334502696991,grad_norm: 0.8965006182548898, iteration: 141183
loss: 0.9987407326698303,grad_norm: 0.9999991313858714, iteration: 141184
loss: 1.0127904415130615,grad_norm: 0.9999992047579235, iteration: 141185
loss: 0.9890175461769104,grad_norm: 0.9191445141500493, iteration: 141186
loss: 0.9966225028038025,grad_norm: 0.9999991303007157, iteration: 141187
loss: 1.0161312818527222,grad_norm: 0.9999993540608428, iteration: 141188
loss: 1.0159411430358887,grad_norm: 0.999999217956761, iteration: 141189
loss: 0.9838821291923523,grad_norm: 0.9874399074353383, iteration: 141190
loss: 1.011032223701477,grad_norm: 0.9657297171489476, iteration: 141191
loss: 0.9690265655517578,grad_norm: 0.9999992050749381, iteration: 141192
loss: 1.0342903137207031,grad_norm: 0.9898402469743501, iteration: 141193
loss: 0.9964324235916138,grad_norm: 0.9999991869020946, iteration: 141194
loss: 0.9975728988647461,grad_norm: 0.9590080155159791, iteration: 141195
loss: 0.9540945291519165,grad_norm: 0.9999992003312562, iteration: 141196
loss: 0.9687434434890747,grad_norm: 0.7843720896924304, iteration: 141197
loss: 1.0272998809814453,grad_norm: 0.8717875616386015, iteration: 141198
loss: 1.0158158540725708,grad_norm: 0.9999994075041325, iteration: 141199
loss: 0.969739556312561,grad_norm: 0.8591852280133011, iteration: 141200
loss: 1.0285228490829468,grad_norm: 0.9999990557548581, iteration: 141201
loss: 1.0099691152572632,grad_norm: 0.999999207958695, iteration: 141202
loss: 0.9997149705886841,grad_norm: 0.9999993173052563, iteration: 141203
loss: 1.0127708911895752,grad_norm: 0.99999904734669, iteration: 141204
loss: 0.985090434551239,grad_norm: 0.9999992327510873, iteration: 141205
loss: 0.9936612248420715,grad_norm: 0.9112542977260766, iteration: 141206
loss: 1.015321969985962,grad_norm: 0.9999991771698392, iteration: 141207
loss: 0.9875946044921875,grad_norm: 0.9999991355528041, iteration: 141208
loss: 1.0103775262832642,grad_norm: 0.9999990131890559, iteration: 141209
loss: 0.9812259078025818,grad_norm: 0.9999990394622463, iteration: 141210
loss: 0.9843428134918213,grad_norm: 0.9999989359478892, iteration: 141211
loss: 1.0103917121887207,grad_norm: 0.9999990867341289, iteration: 141212
loss: 1.0041770935058594,grad_norm: 0.99999929187459, iteration: 141213
loss: 0.99644535779953,grad_norm: 0.929500013682732, iteration: 141214
loss: 1.022329330444336,grad_norm: 0.9551708592885431, iteration: 141215
loss: 1.0010721683502197,grad_norm: 0.999999085251335, iteration: 141216
loss: 1.0731489658355713,grad_norm: 0.999999623398963, iteration: 141217
loss: 1.0277179479599,grad_norm: 0.9764486858374514, iteration: 141218
loss: 0.9953799247741699,grad_norm: 0.9999990686375928, iteration: 141219
loss: 0.980349600315094,grad_norm: 0.9999989866703335, iteration: 141220
loss: 1.000846266746521,grad_norm: 0.9999992819678607, iteration: 141221
loss: 1.0099027156829834,grad_norm: 0.9772553717113835, iteration: 141222
loss: 1.0066730976104736,grad_norm: 0.9999993171997491, iteration: 141223
loss: 1.0176351070404053,grad_norm: 0.8157621322329329, iteration: 141224
loss: 1.0021880865097046,grad_norm: 0.8937341628738406, iteration: 141225
loss: 1.042158842086792,grad_norm: 0.9999990630907853, iteration: 141226
loss: 0.966518223285675,grad_norm: 0.9999989552398849, iteration: 141227
loss: 0.9940388202667236,grad_norm: 0.9999992380240202, iteration: 141228
loss: 1.0066194534301758,grad_norm: 0.9999990926195379, iteration: 141229
loss: 1.023614764213562,grad_norm: 0.9999991009528896, iteration: 141230
loss: 1.003064513206482,grad_norm: 0.9102705956025561, iteration: 141231
loss: 0.9723044633865356,grad_norm: 0.9999990912785209, iteration: 141232
loss: 1.0073528289794922,grad_norm: 0.9483506512030047, iteration: 141233
loss: 1.005696415901184,grad_norm: 0.907634296149916, iteration: 141234
loss: 0.9976875185966492,grad_norm: 0.9778556323777582, iteration: 141235
loss: 1.1560759544372559,grad_norm: 0.999999329095345, iteration: 141236
loss: 1.038941740989685,grad_norm: 0.95300949152355, iteration: 141237
loss: 0.980415940284729,grad_norm: 0.9704876547819222, iteration: 141238
loss: 0.979431688785553,grad_norm: 0.9999991189886139, iteration: 141239
loss: 1.006060242652893,grad_norm: 0.9999992277563289, iteration: 141240
loss: 1.0007266998291016,grad_norm: 0.9999991586941036, iteration: 141241
loss: 0.9811175465583801,grad_norm: 0.9999992373738212, iteration: 141242
loss: 1.0030227899551392,grad_norm: 0.9999992141654841, iteration: 141243
loss: 0.9753732681274414,grad_norm: 0.9827284474590728, iteration: 141244
loss: 1.0284771919250488,grad_norm: 0.9999990952696326, iteration: 141245
loss: 0.9747364521026611,grad_norm: 0.9999990416168186, iteration: 141246
loss: 1.088390827178955,grad_norm: 0.9999995763955859, iteration: 141247
loss: 0.9938235282897949,grad_norm: 0.9999990888977117, iteration: 141248
loss: 1.07533860206604,grad_norm: 0.9999992386858487, iteration: 141249
loss: 1.0126676559448242,grad_norm: 0.9999991685024241, iteration: 141250
loss: 0.9845502972602844,grad_norm: 0.9999991470558575, iteration: 141251
loss: 1.0181171894073486,grad_norm: 0.8166357413422637, iteration: 141252
loss: 1.0125457048416138,grad_norm: 0.999999102439945, iteration: 141253
loss: 1.0009896755218506,grad_norm: 0.9874681014248999, iteration: 141254
loss: 1.014419436454773,grad_norm: 0.9913395999232482, iteration: 141255
loss: 0.995980441570282,grad_norm: 0.9890888244341546, iteration: 141256
loss: 0.9844847321510315,grad_norm: 0.8301591532751841, iteration: 141257
loss: 0.9999205470085144,grad_norm: 0.9355112981012362, iteration: 141258
loss: 1.0270202159881592,grad_norm: 0.9999992601054803, iteration: 141259
loss: 1.0038540363311768,grad_norm: 0.9140753004379294, iteration: 141260
loss: 1.0152788162231445,grad_norm: 0.999999165570738, iteration: 141261
loss: 0.9945478439331055,grad_norm: 0.9999992289776721, iteration: 141262
loss: 1.0261656045913696,grad_norm: 0.9999996133981771, iteration: 141263
loss: 1.0011765956878662,grad_norm: 0.9999992073324931, iteration: 141264
loss: 1.0500015020370483,grad_norm: 0.9999991458261854, iteration: 141265
loss: 0.9649963974952698,grad_norm: 0.9999989187689744, iteration: 141266
loss: 1.00822114944458,grad_norm: 0.9999992311687287, iteration: 141267
loss: 1.015913486480713,grad_norm: 0.9935330963952528, iteration: 141268
loss: 0.9917736649513245,grad_norm: 0.9606768551533327, iteration: 141269
loss: 1.0489495992660522,grad_norm: 0.9999997937636866, iteration: 141270
loss: 0.9896652102470398,grad_norm: 0.9999992350881778, iteration: 141271
loss: 1.0095492601394653,grad_norm: 0.999999132656892, iteration: 141272
loss: 0.999235987663269,grad_norm: 0.8596198587684994, iteration: 141273
loss: 0.9735416173934937,grad_norm: 0.9999993342384257, iteration: 141274
loss: 0.9998799562454224,grad_norm: 0.9348293453404044, iteration: 141275
loss: 1.0302848815917969,grad_norm: 0.9999994494240564, iteration: 141276
loss: 1.032256841659546,grad_norm: 0.9999990166187745, iteration: 141277
loss: 0.9937890768051147,grad_norm: 0.9999994375595389, iteration: 141278
loss: 0.998145580291748,grad_norm: 0.9999991649956471, iteration: 141279
loss: 0.99037766456604,grad_norm: 0.9507708806601862, iteration: 141280
loss: 0.9908331632614136,grad_norm: 0.9999992223832254, iteration: 141281
loss: 0.9917513132095337,grad_norm: 0.9999989952019269, iteration: 141282
loss: 1.0383117198944092,grad_norm: 0.9999991819388553, iteration: 141283
loss: 0.9998947978019714,grad_norm: 0.9999989452713026, iteration: 141284
loss: 0.9874154329299927,grad_norm: 0.9392200737721809, iteration: 141285
loss: 0.984813392162323,grad_norm: 0.9999991529757087, iteration: 141286
loss: 0.9896551370620728,grad_norm: 0.9999992518205241, iteration: 141287
loss: 0.9836131930351257,grad_norm: 0.9290982111353738, iteration: 141288
loss: 1.0058588981628418,grad_norm: 0.9999991916277713, iteration: 141289
loss: 0.9620897173881531,grad_norm: 0.939716503419036, iteration: 141290
loss: 0.9885021448135376,grad_norm: 0.8199715234388788, iteration: 141291
loss: 1.0570731163024902,grad_norm: 0.9999990093402017, iteration: 141292
loss: 0.9813752174377441,grad_norm: 0.9472611243812722, iteration: 141293
loss: 0.9543265700340271,grad_norm: 0.9439033871726998, iteration: 141294
loss: 1.01018488407135,grad_norm: 0.9470493922316001, iteration: 141295
loss: 1.0004364252090454,grad_norm: 0.9999990608314446, iteration: 141296
loss: 0.9999991059303284,grad_norm: 0.9999991176974823, iteration: 141297
loss: 1.007602334022522,grad_norm: 0.9590221589141327, iteration: 141298
loss: 0.9952714443206787,grad_norm: 0.9558530384549621, iteration: 141299
loss: 0.9934862852096558,grad_norm: 0.9422072028329571, iteration: 141300
loss: 0.9673048853874207,grad_norm: 0.9660606904003097, iteration: 141301
loss: 1.0139764547348022,grad_norm: 0.926934181174689, iteration: 141302
loss: 0.9994665384292603,grad_norm: 0.9999995042248321, iteration: 141303
loss: 1.003675937652588,grad_norm: 0.9094758795186861, iteration: 141304
loss: 0.9807834029197693,grad_norm: 0.9999991973560136, iteration: 141305
loss: 1.008205533027649,grad_norm: 0.9999992322889844, iteration: 141306
loss: 0.983726441860199,grad_norm: 0.999999131452903, iteration: 141307
loss: 0.9805212020874023,grad_norm: 0.9999991615603154, iteration: 141308
loss: 0.9984675645828247,grad_norm: 0.9999990660140958, iteration: 141309
loss: 0.9707351922988892,grad_norm: 0.9999992130271819, iteration: 141310
loss: 1.004744052886963,grad_norm: 0.9999991586997468, iteration: 141311
loss: 1.0022577047348022,grad_norm: 0.9420280223854421, iteration: 141312
loss: 1.005111813545227,grad_norm: 0.9999991855504194, iteration: 141313
loss: 0.9808128476142883,grad_norm: 0.9999991529818496, iteration: 141314
loss: 1.010232925415039,grad_norm: 0.9714757033107343, iteration: 141315
loss: 1.0122095346450806,grad_norm: 0.9999991452740219, iteration: 141316
loss: 0.9775165319442749,grad_norm: 0.9999991366094052, iteration: 141317
loss: 1.0115123987197876,grad_norm: 0.9999990240462302, iteration: 141318
loss: 0.9466536641120911,grad_norm: 0.9597703652692192, iteration: 141319
loss: 0.9714503288269043,grad_norm: 0.9096803231276355, iteration: 141320
loss: 0.9730823636054993,grad_norm: 0.9999992218560274, iteration: 141321
loss: 0.9652416110038757,grad_norm: 0.937818740811309, iteration: 141322
loss: 1.0165390968322754,grad_norm: 0.9999992017698988, iteration: 141323
loss: 1.01609206199646,grad_norm: 0.9999993112165145, iteration: 141324
loss: 0.997429370880127,grad_norm: 0.9999990256732458, iteration: 141325
loss: 1.0079008340835571,grad_norm: 0.9739178431637541, iteration: 141326
loss: 0.9995959401130676,grad_norm: 0.9999997424100964, iteration: 141327
loss: 0.9990975260734558,grad_norm: 0.9635702169812396, iteration: 141328
loss: 0.9967216849327087,grad_norm: 0.8559774121549397, iteration: 141329
loss: 0.9749318361282349,grad_norm: 0.9999989582465959, iteration: 141330
loss: 0.987942636013031,grad_norm: 0.9999992407497948, iteration: 141331
loss: 1.0233573913574219,grad_norm: 0.9999992342199071, iteration: 141332
loss: 1.0336499214172363,grad_norm: 0.9999991246573634, iteration: 141333
loss: 0.9929250478744507,grad_norm: 0.9066093827015246, iteration: 141334
loss: 0.9862473607063293,grad_norm: 0.8434171612133771, iteration: 141335
loss: 1.0227446556091309,grad_norm: 0.9595589820478256, iteration: 141336
loss: 0.9614777565002441,grad_norm: 0.9457211343839098, iteration: 141337
loss: 1.0034281015396118,grad_norm: 0.8240272652217526, iteration: 141338
loss: 1.0337369441986084,grad_norm: 0.9999990982723115, iteration: 141339
loss: 1.0080512762069702,grad_norm: 0.8979737563810108, iteration: 141340
loss: 0.9964229464530945,grad_norm: 0.9453571557348448, iteration: 141341
loss: 1.0135678052902222,grad_norm: 0.9999990456471912, iteration: 141342
loss: 0.9810092449188232,grad_norm: 0.9423412482440285, iteration: 141343
loss: 1.0412659645080566,grad_norm: 0.9707792783655504, iteration: 141344
loss: 0.9963849186897278,grad_norm: 0.9999991477030452, iteration: 141345
loss: 1.0107239484786987,grad_norm: 0.9999991923940926, iteration: 141346
loss: 1.0011401176452637,grad_norm: 0.9993050838359014, iteration: 141347
loss: 1.0058821439743042,grad_norm: 0.9999992084344009, iteration: 141348
loss: 0.9806227684020996,grad_norm: 0.9999990174617661, iteration: 141349
loss: 0.9930859804153442,grad_norm: 0.9999989057192803, iteration: 141350
loss: 0.9985011219978333,grad_norm: 0.8356462477923486, iteration: 141351
loss: 0.9963063597679138,grad_norm: 0.7945505933387496, iteration: 141352
loss: 1.0055720806121826,grad_norm: 0.999998846083617, iteration: 141353
loss: 1.0339653491973877,grad_norm: 0.9999990264943721, iteration: 141354
loss: 0.9724229574203491,grad_norm: 0.9406749311236242, iteration: 141355
loss: 0.9835577011108398,grad_norm: 0.9999991630146003, iteration: 141356
loss: 0.9898629784584045,grad_norm: 0.9999991295226178, iteration: 141357
loss: 1.0099126100540161,grad_norm: 0.9553301219563691, iteration: 141358
loss: 1.0039020776748657,grad_norm: 0.9999991397113045, iteration: 141359
loss: 1.0104328393936157,grad_norm: 0.9999993619782869, iteration: 141360
loss: 0.9835954308509827,grad_norm: 0.9999990315841161, iteration: 141361
loss: 0.9692776203155518,grad_norm: 0.9999992008183343, iteration: 141362
loss: 1.0199850797653198,grad_norm: 0.9380537725854668, iteration: 141363
loss: 1.0005885362625122,grad_norm: 0.999998908955545, iteration: 141364
loss: 1.0065925121307373,grad_norm: 0.9647479654317602, iteration: 141365
loss: 0.9911332130432129,grad_norm: 0.9999991338253471, iteration: 141366
loss: 1.0134556293487549,grad_norm: 0.8980092613079201, iteration: 141367
loss: 1.030470371246338,grad_norm: 0.9999995787954196, iteration: 141368
loss: 1.0283302068710327,grad_norm: 0.9999991356996661, iteration: 141369
loss: 1.046715497970581,grad_norm: 0.999999201222452, iteration: 141370
loss: 0.9950944185256958,grad_norm: 0.9999992648107053, iteration: 141371
loss: 0.9886106252670288,grad_norm: 0.9386733614690127, iteration: 141372
loss: 0.9474442005157471,grad_norm: 0.9999991483108308, iteration: 141373
loss: 0.9971895813941956,grad_norm: 0.9999993312220702, iteration: 141374
loss: 0.9931493401527405,grad_norm: 0.9999992247051219, iteration: 141375
loss: 0.9982438683509827,grad_norm: 0.9999990763881644, iteration: 141376
loss: 1.038962960243225,grad_norm: 0.9999991505585893, iteration: 141377
loss: 1.0047348737716675,grad_norm: 0.999999156386138, iteration: 141378
loss: 0.9835123419761658,grad_norm: 0.9008988152439662, iteration: 141379
loss: 1.0825964212417603,grad_norm: 0.9999990994686956, iteration: 141380
loss: 1.0118759870529175,grad_norm: 0.9999991815537259, iteration: 141381
loss: 1.0241793394088745,grad_norm: 0.9999990917470962, iteration: 141382
loss: 1.0058039426803589,grad_norm: 0.9999996784513282, iteration: 141383
loss: 0.9854443669319153,grad_norm: 0.9999991311428383, iteration: 141384
loss: 1.0406737327575684,grad_norm: 0.9999992034555275, iteration: 141385
loss: 1.0040329694747925,grad_norm: 0.9999991141336373, iteration: 141386
loss: 1.0105276107788086,grad_norm: 0.9999992062559885, iteration: 141387
loss: 0.9504266977310181,grad_norm: 0.9220885051164889, iteration: 141388
loss: 0.9771317839622498,grad_norm: 0.9999990420327645, iteration: 141389
loss: 0.9946349859237671,grad_norm: 0.9149567769716008, iteration: 141390
loss: 0.9896048307418823,grad_norm: 0.9068020213846927, iteration: 141391
loss: 1.1217889785766602,grad_norm: 0.9999991872457948, iteration: 141392
loss: 1.0173307657241821,grad_norm: 0.9916081794671868, iteration: 141393
loss: 1.0100607872009277,grad_norm: 0.9907703121791792, iteration: 141394
loss: 1.0008503198623657,grad_norm: 0.9999991226406607, iteration: 141395
loss: 0.9557798504829407,grad_norm: 0.905546144879097, iteration: 141396
loss: 0.9808341860771179,grad_norm: 0.9999992041439033, iteration: 141397
loss: 0.9901459813117981,grad_norm: 0.9999990189746089, iteration: 141398
loss: 1.0297971963882446,grad_norm: 0.9999992315434295, iteration: 141399
loss: 0.9534152746200562,grad_norm: 0.9999991356636424, iteration: 141400
loss: 0.9899855852127075,grad_norm: 0.9999992508002788, iteration: 141401
loss: 0.97530597448349,grad_norm: 0.9990763924443975, iteration: 141402
loss: 0.9899910092353821,grad_norm: 0.9775028285544147, iteration: 141403
loss: 0.9978682398796082,grad_norm: 0.988842627174532, iteration: 141404
loss: 0.977843165397644,grad_norm: 0.8049326404459394, iteration: 141405
loss: 1.0507824420928955,grad_norm: 0.9439456485647784, iteration: 141406
loss: 1.040153980255127,grad_norm: 0.9315887803165454, iteration: 141407
loss: 0.9978323578834534,grad_norm: 0.9607855022077184, iteration: 141408
loss: 0.9776966571807861,grad_norm: 0.9999992568256082, iteration: 141409
loss: 1.0121872425079346,grad_norm: 0.9999989549971863, iteration: 141410
loss: 1.0033477544784546,grad_norm: 0.9999990142553566, iteration: 141411
loss: 1.003650188446045,grad_norm: 0.999999086852106, iteration: 141412
loss: 0.9814910292625427,grad_norm: 0.999999315808392, iteration: 141413
loss: 1.0231178998947144,grad_norm: 0.9999990024811583, iteration: 141414
loss: 1.0022190809249878,grad_norm: 0.9999991581384213, iteration: 141415
loss: 0.9713800549507141,grad_norm: 0.9965550693055778, iteration: 141416
loss: 1.0005931854248047,grad_norm: 0.9999990959937445, iteration: 141417
loss: 1.0325000286102295,grad_norm: 0.999999165266338, iteration: 141418
loss: 0.9889413118362427,grad_norm: 0.9624882127263276, iteration: 141419
loss: 0.9982776045799255,grad_norm: 0.9999990621482294, iteration: 141420
loss: 1.0058029890060425,grad_norm: 0.9999991645426531, iteration: 141421
loss: 1.0493741035461426,grad_norm: 0.9999991357668093, iteration: 141422
loss: 1.0068385601043701,grad_norm: 0.9999992056291923, iteration: 141423
loss: 1.0558797121047974,grad_norm: 0.8247228171427687, iteration: 141424
loss: 1.0011311769485474,grad_norm: 0.999999125290308, iteration: 141425
loss: 1.0060745477676392,grad_norm: 0.9999990533684245, iteration: 141426
loss: 0.9928819537162781,grad_norm: 0.9779041605741954, iteration: 141427
loss: 0.9972746968269348,grad_norm: 0.9307511722160601, iteration: 141428
loss: 0.9881215691566467,grad_norm: 0.9190984470500663, iteration: 141429
loss: 0.966037392616272,grad_norm: 0.9665573511236064, iteration: 141430
loss: 1.0851253271102905,grad_norm: 0.9891513213458, iteration: 141431
loss: 1.0071861743927002,grad_norm: 0.9410174892516925, iteration: 141432
loss: 1.0345348119735718,grad_norm: 0.9067868918792626, iteration: 141433
loss: 0.9539192318916321,grad_norm: 0.9999989625444287, iteration: 141434
loss: 0.998772919178009,grad_norm: 0.9663975991238746, iteration: 141435
loss: 0.9891520142555237,grad_norm: 0.9714568526414852, iteration: 141436
loss: 0.9966916441917419,grad_norm: 0.9777872624592323, iteration: 141437
loss: 0.9875307083129883,grad_norm: 0.9999992123717255, iteration: 141438
loss: 1.012291431427002,grad_norm: 0.9999991835851445, iteration: 141439
loss: 1.0150388479232788,grad_norm: 0.9349235414624508, iteration: 141440
loss: 0.9676265716552734,grad_norm: 0.9999992525484329, iteration: 141441
loss: 0.9864461421966553,grad_norm: 0.9439216161679362, iteration: 141442
loss: 1.007185459136963,grad_norm: 0.9999991108377889, iteration: 141443
loss: 0.9771208167076111,grad_norm: 0.9999992319304025, iteration: 141444
loss: 1.0112924575805664,grad_norm: 0.9999991852643614, iteration: 141445
loss: 1.0123785734176636,grad_norm: 0.9409139586972888, iteration: 141446
loss: 0.9764320254325867,grad_norm: 0.9999992392279556, iteration: 141447
loss: 0.9857349991798401,grad_norm: 0.9999992063185915, iteration: 141448
loss: 1.033078670501709,grad_norm: 0.9999995985192192, iteration: 141449
loss: 0.9951889514923096,grad_norm: 0.9999991266180557, iteration: 141450
loss: 1.0120275020599365,grad_norm: 0.9999991702662401, iteration: 141451
loss: 0.9964965581893921,grad_norm: 0.9913823450776217, iteration: 141452
loss: 1.016414761543274,grad_norm: 0.9999992539859699, iteration: 141453
loss: 1.0545979738235474,grad_norm: 0.999999288462432, iteration: 141454
loss: 0.9712172150611877,grad_norm: 0.9999991108542887, iteration: 141455
loss: 0.9890962839126587,grad_norm: 0.9999991569778032, iteration: 141456
loss: 1.0173680782318115,grad_norm: 0.9125398497959555, iteration: 141457
loss: 0.9820012450218201,grad_norm: 0.9999991537975401, iteration: 141458
loss: 1.0306164026260376,grad_norm: 0.9999990863288046, iteration: 141459
loss: 1.16267991065979,grad_norm: 0.9999998757400737, iteration: 141460
loss: 1.0091838836669922,grad_norm: 0.9999990180828795, iteration: 141461
loss: 1.0301355123519897,grad_norm: 0.9460005464638535, iteration: 141462
loss: 1.0192840099334717,grad_norm: 0.9999991235730212, iteration: 141463
loss: 1.0104880332946777,grad_norm: 0.9999992176488387, iteration: 141464
loss: 0.9723793864250183,grad_norm: 0.9999993176658494, iteration: 141465
loss: 1.0231963396072388,grad_norm: 0.901585091049256, iteration: 141466
loss: 0.9917393326759338,grad_norm: 0.999999126224686, iteration: 141467
loss: 0.997715950012207,grad_norm: 0.9388589295046393, iteration: 141468
loss: 0.977789044380188,grad_norm: 0.9999991969909133, iteration: 141469
loss: 1.0295652151107788,grad_norm: 0.9822420002578314, iteration: 141470
loss: 1.0103377103805542,grad_norm: 0.9999992427957846, iteration: 141471
loss: 0.9825202822685242,grad_norm: 0.9999991007943663, iteration: 141472
loss: 1.004270315170288,grad_norm: 0.9683065960704913, iteration: 141473
loss: 0.9894305467605591,grad_norm: 0.9999990201579079, iteration: 141474
loss: 0.9623175263404846,grad_norm: 0.9999992160514911, iteration: 141475
loss: 0.9804989099502563,grad_norm: 0.9999991935677499, iteration: 141476
loss: 1.0107641220092773,grad_norm: 0.9999989516163013, iteration: 141477
loss: 0.9770423173904419,grad_norm: 0.9994842132422809, iteration: 141478
loss: 0.9858076572418213,grad_norm: 0.9999990676928059, iteration: 141479
loss: 1.0424290895462036,grad_norm: 0.9775746973429884, iteration: 141480
loss: 0.9938446283340454,grad_norm: 0.9999991474116869, iteration: 141481
loss: 1.0477420091629028,grad_norm: 0.9999992118526839, iteration: 141482
loss: 0.9691939949989319,grad_norm: 0.9999990848061192, iteration: 141483
loss: 0.9697213768959045,grad_norm: 0.888431700845184, iteration: 141484
loss: 1.048366665840149,grad_norm: 0.9992800207087887, iteration: 141485
loss: 0.9962932467460632,grad_norm: 0.9656579665305707, iteration: 141486
loss: 1.023905873298645,grad_norm: 0.9999991806576122, iteration: 141487
loss: 1.028674840927124,grad_norm: 0.8408318670025985, iteration: 141488
loss: 0.9980244040489197,grad_norm: 0.7725238544961816, iteration: 141489
loss: 0.9870401620864868,grad_norm: 0.9999991676498142, iteration: 141490
loss: 0.9802986979484558,grad_norm: 0.9999991608792201, iteration: 141491
loss: 0.9658341407775879,grad_norm: 0.9532751348862436, iteration: 141492
loss: 0.9815754890441895,grad_norm: 0.9450777640954797, iteration: 141493
loss: 1.0276740789413452,grad_norm: 0.9999993228356597, iteration: 141494
loss: 0.9657232165336609,grad_norm: 0.999999093843372, iteration: 141495
loss: 0.972235381603241,grad_norm: 0.8577088498733132, iteration: 141496
loss: 1.0238322019577026,grad_norm: 0.9999990670491343, iteration: 141497
loss: 0.982595682144165,grad_norm: 0.8467435225467556, iteration: 141498
loss: 1.0168421268463135,grad_norm: 0.999614425611456, iteration: 141499
loss: 0.9643886089324951,grad_norm: 0.8273316612240331, iteration: 141500
loss: 0.9899206161499023,grad_norm: 0.9999989310643751, iteration: 141501
loss: 1.0425419807434082,grad_norm: 0.9775604292254677, iteration: 141502
loss: 1.030134677886963,grad_norm: 0.9999991535973177, iteration: 141503
loss: 1.000468134880066,grad_norm: 0.9999991358604875, iteration: 141504
loss: 0.9936139583587646,grad_norm: 0.872973460787229, iteration: 141505
loss: 1.1195043325424194,grad_norm: 0.9999995473760982, iteration: 141506
loss: 0.9840224385261536,grad_norm: 0.9168126865324192, iteration: 141507
loss: 0.9676955342292786,grad_norm: 0.9999992609200217, iteration: 141508
loss: 1.0206990242004395,grad_norm: 0.8835851368091342, iteration: 141509
loss: 0.9922483563423157,grad_norm: 0.9506969757449082, iteration: 141510
loss: 0.9663782715797424,grad_norm: 0.9999991215659002, iteration: 141511
loss: 0.9748129844665527,grad_norm: 0.9999990234170362, iteration: 141512
loss: 0.9840022921562195,grad_norm: 0.9999990918982335, iteration: 141513
loss: 0.9920887351036072,grad_norm: 0.9999991836438825, iteration: 141514
loss: 1.0042473077774048,grad_norm: 0.9891234249060713, iteration: 141515
loss: 1.0374557971954346,grad_norm: 0.9872620678300477, iteration: 141516
loss: 0.9950061440467834,grad_norm: 0.9999992658938888, iteration: 141517
loss: 0.9807168841362,grad_norm: 0.9999991490611013, iteration: 141518
loss: 1.0292450189590454,grad_norm: 0.9415064307587471, iteration: 141519
loss: 0.9963372945785522,grad_norm: 0.93285962364755, iteration: 141520
loss: 1.0550062656402588,grad_norm: 0.9999993835613651, iteration: 141521
loss: 0.9678587317466736,grad_norm: 0.9999990098842892, iteration: 141522
loss: 1.00544011592865,grad_norm: 0.9999991176838593, iteration: 141523
loss: 1.0695527791976929,grad_norm: 0.999999632490393, iteration: 141524
loss: 1.0095585584640503,grad_norm: 0.9764123750622067, iteration: 141525
loss: 1.0334291458129883,grad_norm: 0.999998969142712, iteration: 141526
loss: 1.0017763376235962,grad_norm: 0.9915265313454856, iteration: 141527
loss: 1.007729172706604,grad_norm: 0.9999990703964806, iteration: 141528
loss: 1.0218414068222046,grad_norm: 0.9822892596324552, iteration: 141529
loss: 1.0034295320510864,grad_norm: 0.9999991956696137, iteration: 141530
loss: 0.9986850619316101,grad_norm: 0.99999907026746, iteration: 141531
loss: 1.0217231512069702,grad_norm: 0.8300479178145053, iteration: 141532
loss: 1.012098789215088,grad_norm: 0.9999992461163401, iteration: 141533
loss: 0.9716710448265076,grad_norm: 0.9114019575964242, iteration: 141534
loss: 1.004826307296753,grad_norm: 0.9926133218886805, iteration: 141535
loss: 0.9739503860473633,grad_norm: 0.9999993365747321, iteration: 141536
loss: 1.0016400814056396,grad_norm: 0.8959497883997614, iteration: 141537
loss: 0.9804996848106384,grad_norm: 0.99999914568566, iteration: 141538
loss: 1.099817156791687,grad_norm: 0.9999994896224855, iteration: 141539
loss: 1.0072683095932007,grad_norm: 0.908102809402462, iteration: 141540
loss: 0.9928565621376038,grad_norm: 0.8838714113273687, iteration: 141541
loss: 1.003848910331726,grad_norm: 0.9999991251984142, iteration: 141542
loss: 0.9820930361747742,grad_norm: 0.9605374624189644, iteration: 141543
loss: 1.0071568489074707,grad_norm: 0.9514017997110004, iteration: 141544
loss: 1.0206916332244873,grad_norm: 1.0000000093339672, iteration: 141545
loss: 1.0288763046264648,grad_norm: 0.9375405920850614, iteration: 141546
loss: 1.0070226192474365,grad_norm: 0.9999990883281189, iteration: 141547
loss: 0.9785323739051819,grad_norm: 0.8966330985842527, iteration: 141548
loss: 1.0091065168380737,grad_norm: 0.9447381990937505, iteration: 141549
loss: 0.9854068160057068,grad_norm: 0.9999991448513745, iteration: 141550
loss: 1.0474709272384644,grad_norm: 0.8673026503814042, iteration: 141551
loss: 0.9797616600990295,grad_norm: 0.9999990220881627, iteration: 141552
loss: 1.028548002243042,grad_norm: 0.9626343504498749, iteration: 141553
loss: 0.9789979457855225,grad_norm: 0.9597776221842534, iteration: 141554
loss: 1.0032092332839966,grad_norm: 0.876378517740972, iteration: 141555
loss: 0.9981895089149475,grad_norm: 0.9999991494274744, iteration: 141556
loss: 1.0149824619293213,grad_norm: 0.8790229708370161, iteration: 141557
loss: 0.9944398403167725,grad_norm: 0.9999989856474442, iteration: 141558
loss: 1.0318320989608765,grad_norm: 0.9999990010429575, iteration: 141559
loss: 1.0120224952697754,grad_norm: 0.9832553179915673, iteration: 141560
loss: 0.9872564077377319,grad_norm: 0.9999991924496826, iteration: 141561
loss: 1.0067464113235474,grad_norm: 0.9999991772856134, iteration: 141562
loss: 0.972513735294342,grad_norm: 0.9999991273033623, iteration: 141563
loss: 0.9676898717880249,grad_norm: 0.9999991157333687, iteration: 141564
loss: 1.0070744752883911,grad_norm: 0.9140657268821614, iteration: 141565
loss: 1.0281530618667603,grad_norm: 0.9999989962349018, iteration: 141566
loss: 0.9916227459907532,grad_norm: 0.9804434576034624, iteration: 141567
loss: 0.9637786746025085,grad_norm: 0.9319806819293603, iteration: 141568
loss: 1.0323551893234253,grad_norm: 0.9999991506243276, iteration: 141569
loss: 0.9910793304443359,grad_norm: 0.9999992501765284, iteration: 141570
loss: 1.000400185585022,grad_norm: 0.9899602710635516, iteration: 141571
loss: 1.0039790868759155,grad_norm: 0.9883394538639056, iteration: 141572
loss: 1.0499451160430908,grad_norm: 1.0000000184560591, iteration: 141573
loss: 1.0013967752456665,grad_norm: 0.8888022301270787, iteration: 141574
loss: 0.9595323801040649,grad_norm: 0.9999989620080286, iteration: 141575
loss: 0.9797335863113403,grad_norm: 0.9999991581268415, iteration: 141576
loss: 0.9825414419174194,grad_norm: 0.9376941798710592, iteration: 141577
loss: 0.9835543036460876,grad_norm: 0.8710910168869476, iteration: 141578
loss: 1.0111031532287598,grad_norm: 0.9999998640131483, iteration: 141579
loss: 1.0588630437850952,grad_norm: 0.9999991083647699, iteration: 141580
loss: 0.9981203079223633,grad_norm: 0.9999991841264967, iteration: 141581
loss: 0.9917471408843994,grad_norm: 0.9999991672072984, iteration: 141582
loss: 1.0087218284606934,grad_norm: 0.9999995668258137, iteration: 141583
loss: 1.0265833139419556,grad_norm: 0.9854721011267731, iteration: 141584
loss: 1.0251657962799072,grad_norm: 0.8996725302029742, iteration: 141585
loss: 1.0297235250473022,grad_norm: 0.9999991039078738, iteration: 141586
loss: 1.035243272781372,grad_norm: 0.9178441292778058, iteration: 141587
loss: 1.0108416080474854,grad_norm: 0.9467621137438068, iteration: 141588
loss: 1.001655101776123,grad_norm: 0.9999990838938517, iteration: 141589
loss: 0.9953833222389221,grad_norm: 0.9824506616980644, iteration: 141590
loss: 1.0228934288024902,grad_norm: 0.8754308529643843, iteration: 141591
loss: 0.9733103513717651,grad_norm: 0.9999991483517738, iteration: 141592
loss: 1.0374468564987183,grad_norm: 0.9999990784993265, iteration: 141593
loss: 0.9853605031967163,grad_norm: 0.9261409871466215, iteration: 141594
loss: 1.0866433382034302,grad_norm: 0.999999113529784, iteration: 141595
loss: 1.0152273178100586,grad_norm: 0.9999989500396167, iteration: 141596
loss: 1.019993782043457,grad_norm: 0.9758714096984176, iteration: 141597
loss: 1.0366168022155762,grad_norm: 0.9999990080527388, iteration: 141598
loss: 1.0077061653137207,grad_norm: 0.9999991330279409, iteration: 141599
loss: 1.0089399814605713,grad_norm: 0.9999990151278275, iteration: 141600
loss: 1.0653557777404785,grad_norm: 0.9999996644593165, iteration: 141601
loss: 0.9856903553009033,grad_norm: 0.9248192205800141, iteration: 141602
loss: 0.9896830916404724,grad_norm: 0.9999991623290385, iteration: 141603
loss: 0.9731249809265137,grad_norm: 0.9999991213927714, iteration: 141604
loss: 0.9629940986633301,grad_norm: 0.9916795422244832, iteration: 141605
loss: 0.9587145447731018,grad_norm: 0.9999990550857852, iteration: 141606
loss: 1.0839937925338745,grad_norm: 0.9999993033345937, iteration: 141607
loss: 1.0333229303359985,grad_norm: 0.9999992394612237, iteration: 141608
loss: 0.9853489398956299,grad_norm: 0.9714883153394344, iteration: 141609
loss: 1.0050653219223022,grad_norm: 0.9108388995847243, iteration: 141610
loss: 0.9676380157470703,grad_norm: 0.9999992193901194, iteration: 141611
loss: 1.0343087911605835,grad_norm: 0.9999990479445462, iteration: 141612
loss: 0.9808225631713867,grad_norm: 0.8121341394164825, iteration: 141613
loss: 0.9985259175300598,grad_norm: 0.96180432195601, iteration: 141614
loss: 1.0092754364013672,grad_norm: 0.9999991780943518, iteration: 141615
loss: 1.007421851158142,grad_norm: 0.9999992931187526, iteration: 141616
loss: 0.9769029021263123,grad_norm: 0.9999991517299821, iteration: 141617
loss: 1.0297585725784302,grad_norm: 0.999999168330002, iteration: 141618
loss: 0.9976789355278015,grad_norm: 0.9113334722113821, iteration: 141619
loss: 0.9984048008918762,grad_norm: 0.8928032389249703, iteration: 141620
loss: 0.9758653044700623,grad_norm: 0.9999990695637798, iteration: 141621
loss: 1.010711669921875,grad_norm: 0.8073389979121177, iteration: 141622
loss: 1.0141128301620483,grad_norm: 0.9999992684642057, iteration: 141623
loss: 0.9961626529693604,grad_norm: 0.8321015807750123, iteration: 141624
loss: 0.9772327542304993,grad_norm: 0.9008216842097617, iteration: 141625
loss: 0.9964110851287842,grad_norm: 0.882691964496114, iteration: 141626
loss: 1.0283541679382324,grad_norm: 0.9908567752298296, iteration: 141627
loss: 1.0324960947036743,grad_norm: 0.9999989183046736, iteration: 141628
loss: 1.0008984804153442,grad_norm: 0.9779997371904664, iteration: 141629
loss: 1.008668303489685,grad_norm: 0.9554692664889769, iteration: 141630
loss: 0.9849467277526855,grad_norm: 0.9003553869159955, iteration: 141631
loss: 0.9688612222671509,grad_norm: 0.999999144864098, iteration: 141632
loss: 0.9731733202934265,grad_norm: 0.9113888743940421, iteration: 141633
loss: 0.9918656349182129,grad_norm: 0.999999215122312, iteration: 141634
loss: 0.9808217287063599,grad_norm: 0.9999991334022512, iteration: 141635
loss: 0.9841775298118591,grad_norm: 0.9999997297804982, iteration: 141636
loss: 0.9993583559989929,grad_norm: 0.9999991011895744, iteration: 141637
loss: 1.0090587139129639,grad_norm: 0.9999993481006196, iteration: 141638
loss: 0.9797140955924988,grad_norm: 0.9999989352480001, iteration: 141639
loss: 0.9900050163269043,grad_norm: 0.9996923821768375, iteration: 141640
loss: 0.9920627474784851,grad_norm: 0.9811767269434016, iteration: 141641
loss: 0.997789204120636,grad_norm: 0.9477564680617422, iteration: 141642
loss: 1.001320719718933,grad_norm: 0.9999997457167427, iteration: 141643
loss: 1.0370502471923828,grad_norm: 0.9999991787549364, iteration: 141644
loss: 0.9959686994552612,grad_norm: 0.9999991785789292, iteration: 141645
loss: 0.9913284778594971,grad_norm: 0.9279386548167086, iteration: 141646
loss: 1.0286879539489746,grad_norm: 0.9999995752859968, iteration: 141647
loss: 1.0305100679397583,grad_norm: 0.997742655337427, iteration: 141648
loss: 0.9802824854850769,grad_norm: 0.9082043634769276, iteration: 141649
loss: 1.0065969228744507,grad_norm: 0.9304999621862757, iteration: 141650
loss: 0.9856626391410828,grad_norm: 0.9999991681795379, iteration: 141651
loss: 0.9641052484512329,grad_norm: 0.9999990282351977, iteration: 141652
loss: 1.003301739692688,grad_norm: 0.9137253102820554, iteration: 141653
loss: 1.0026130676269531,grad_norm: 0.9999991973024513, iteration: 141654
loss: 0.9536383748054504,grad_norm: 0.9999990157668757, iteration: 141655
loss: 1.0335440635681152,grad_norm: 0.9539207703039536, iteration: 141656
loss: 0.9939456582069397,grad_norm: 0.9999992107062737, iteration: 141657
loss: 0.9996570944786072,grad_norm: 0.9999991858666697, iteration: 141658
loss: 1.013960599899292,grad_norm: 0.9330741781964094, iteration: 141659
loss: 0.9923843145370483,grad_norm: 0.9999991779183217, iteration: 141660
loss: 1.0117137432098389,grad_norm: 0.8816025866298207, iteration: 141661
loss: 0.991547703742981,grad_norm: 0.9743962307136371, iteration: 141662
loss: 1.0318819284439087,grad_norm: 0.9999998725349502, iteration: 141663
loss: 0.9774138331413269,grad_norm: 0.9999991268867453, iteration: 141664
loss: 1.0024733543395996,grad_norm: 0.9999991556152452, iteration: 141665
loss: 1.0130518674850464,grad_norm: 0.9999991246626005, iteration: 141666
loss: 0.9822517037391663,grad_norm: 0.9999991679170724, iteration: 141667
loss: 0.9830710291862488,grad_norm: 0.8482580000789733, iteration: 141668
loss: 1.0042858123779297,grad_norm: 0.999999189901767, iteration: 141669
loss: 1.0021817684173584,grad_norm: 0.8803758235337656, iteration: 141670
loss: 0.9801803231239319,grad_norm: 0.9999990739223977, iteration: 141671
loss: 1.021230697631836,grad_norm: 0.9999991418572757, iteration: 141672
loss: 1.0057345628738403,grad_norm: 0.8794498874981858, iteration: 141673
loss: 0.9418361186981201,grad_norm: 0.9803704605803043, iteration: 141674
loss: 1.042487621307373,grad_norm: 0.9837877837451551, iteration: 141675
loss: 1.017407774925232,grad_norm: 0.9999991136293565, iteration: 141676
loss: 0.9998640418052673,grad_norm: 0.9999998594161194, iteration: 141677
loss: 0.9691832661628723,grad_norm: 0.9999992508344278, iteration: 141678
loss: 0.992300271987915,grad_norm: 0.9999992654786237, iteration: 141679
loss: 0.9695030450820923,grad_norm: 0.9625379544624474, iteration: 141680
loss: 1.0115772485733032,grad_norm: 0.8092783701119067, iteration: 141681
loss: 1.016768217086792,grad_norm: 0.9282690052356734, iteration: 141682
loss: 0.9998406171798706,grad_norm: 0.9999990943475616, iteration: 141683
loss: 1.0230584144592285,grad_norm: 0.9999992095019473, iteration: 141684
loss: 1.0040850639343262,grad_norm: 0.9421607443158788, iteration: 141685
loss: 1.0038374662399292,grad_norm: 0.999998996907929, iteration: 141686
loss: 1.0033087730407715,grad_norm: 0.9999990994212246, iteration: 141687
loss: 0.9933690428733826,grad_norm: 0.8928953121654793, iteration: 141688
loss: 0.9959115982055664,grad_norm: 0.9999990550156019, iteration: 141689
loss: 0.9939832091331482,grad_norm: 0.9930944473026058, iteration: 141690
loss: 0.9885525107383728,grad_norm: 0.9999991938147877, iteration: 141691
loss: 0.986305296421051,grad_norm: 0.9999992729418113, iteration: 141692
loss: 1.0041145086288452,grad_norm: 0.9806202246860125, iteration: 141693
loss: 0.998873233795166,grad_norm: 0.9999991790726385, iteration: 141694
loss: 1.0759190320968628,grad_norm: 0.9999992419447277, iteration: 141695
loss: 1.0077238082885742,grad_norm: 0.9999989892134026, iteration: 141696
loss: 0.9842879176139832,grad_norm: 0.9999990466310228, iteration: 141697
loss: 0.9638173580169678,grad_norm: 0.9999992869236978, iteration: 141698
loss: 0.9836130738258362,grad_norm: 0.9265982418897936, iteration: 141699
loss: 0.9392523169517517,grad_norm: 0.9999991073391308, iteration: 141700
loss: 1.002064824104309,grad_norm: 0.9757627827354854, iteration: 141701
loss: 1.0055681467056274,grad_norm: 0.9999992581428047, iteration: 141702
loss: 0.9875697493553162,grad_norm: 0.9999992080718919, iteration: 141703
loss: 1.0192244052886963,grad_norm: 0.9999990977588625, iteration: 141704
loss: 0.9978982210159302,grad_norm: 0.9309503266608959, iteration: 141705
loss: 0.9940281510353088,grad_norm: 0.9999989134585519, iteration: 141706
loss: 1.0234993696212769,grad_norm: 0.9999990226371003, iteration: 141707
loss: 0.9983740448951721,grad_norm: 0.9999990508872209, iteration: 141708
loss: 0.9914435148239136,grad_norm: 0.9999988396245005, iteration: 141709
loss: 0.9729726910591125,grad_norm: 0.9999990909987067, iteration: 141710
loss: 0.961345374584198,grad_norm: 0.9336106216439983, iteration: 141711
loss: 0.9914920926094055,grad_norm: 0.9680320870830242, iteration: 141712
loss: 1.002840280532837,grad_norm: 0.8641787609790619, iteration: 141713
loss: 0.9471948742866516,grad_norm: 0.960335736520786, iteration: 141714
loss: 0.9977365136146545,grad_norm: 0.8862890617845645, iteration: 141715
loss: 0.9794919490814209,grad_norm: 0.9999991129008633, iteration: 141716
loss: 0.9883577227592468,grad_norm: 0.8823632366830017, iteration: 141717
loss: 0.995114266872406,grad_norm: 0.9999996361939657, iteration: 141718
loss: 0.9985881447792053,grad_norm: 0.9999992336476105, iteration: 141719
loss: 0.9926924109458923,grad_norm: 0.8955853963725373, iteration: 141720
loss: 0.9963076710700989,grad_norm: 0.905211374996056, iteration: 141721
loss: 1.0084527730941772,grad_norm: 0.9554735111284567, iteration: 141722
loss: 0.9842463135719299,grad_norm: 0.8534581506006854, iteration: 141723
loss: 0.9502817988395691,grad_norm: 0.9999990648261866, iteration: 141724
loss: 1.0173168182373047,grad_norm: 0.9999991443910369, iteration: 141725
loss: 1.001570701599121,grad_norm: 0.99999902334264, iteration: 141726
loss: 1.0073167085647583,grad_norm: 0.9999990871734387, iteration: 141727
loss: 1.0084055662155151,grad_norm: 0.9999990414026997, iteration: 141728
loss: 1.0245623588562012,grad_norm: 0.9999990216077731, iteration: 141729
loss: 0.9912577271461487,grad_norm: 0.9999990736552601, iteration: 141730
loss: 1.027254343032837,grad_norm: 0.9515211367724808, iteration: 141731
loss: 0.9837417602539062,grad_norm: 0.9999991133343162, iteration: 141732
loss: 0.9910199642181396,grad_norm: 0.9999992751039712, iteration: 141733
loss: 0.9649296998977661,grad_norm: 0.9376458115396402, iteration: 141734
loss: 0.9915142059326172,grad_norm: 0.9999992158700123, iteration: 141735
loss: 0.9723641872406006,grad_norm: 0.9999990618734672, iteration: 141736
loss: 0.9602739214897156,grad_norm: 0.8930504478072371, iteration: 141737
loss: 0.9824555516242981,grad_norm: 0.9999992483983198, iteration: 141738
loss: 1.0072917938232422,grad_norm: 0.9427584470835283, iteration: 141739
loss: 1.0298269987106323,grad_norm: 0.9999991758872642, iteration: 141740
loss: 1.030277132987976,grad_norm: 0.9999992359945725, iteration: 141741
loss: 0.9772529602050781,grad_norm: 0.9519087620543394, iteration: 141742
loss: 0.9632295966148376,grad_norm: 0.9999991856604685, iteration: 141743
loss: 1.0294936895370483,grad_norm: 0.9999992466063804, iteration: 141744
loss: 0.9899654388427734,grad_norm: 0.9291306000569024, iteration: 141745
loss: 0.9704477190971375,grad_norm: 0.945164648622936, iteration: 141746
loss: 0.9823648929595947,grad_norm: 0.9062200807145196, iteration: 141747
loss: 1.048261046409607,grad_norm: 0.9999996700161635, iteration: 141748
loss: 0.9801949262619019,grad_norm: 0.8176983013399287, iteration: 141749
loss: 0.9657665491104126,grad_norm: 0.9244934462858888, iteration: 141750
loss: 0.9954123497009277,grad_norm: 0.999999197987265, iteration: 141751
loss: 0.9764124155044556,grad_norm: 0.9999989926294208, iteration: 141752
loss: 1.0376074314117432,grad_norm: 0.8691401661506348, iteration: 141753
loss: 0.9735584855079651,grad_norm: 0.999999269781879, iteration: 141754
loss: 0.9857202768325806,grad_norm: 0.9906464660571037, iteration: 141755
loss: 0.9925094246864319,grad_norm: 0.999999038530477, iteration: 141756
loss: 1.019045352935791,grad_norm: 0.9999994043070928, iteration: 141757
loss: 0.9988241195678711,grad_norm: 0.8807188519777842, iteration: 141758
loss: 0.9911863207817078,grad_norm: 0.9565993148419218, iteration: 141759
loss: 1.029469609260559,grad_norm: 0.9999989281969126, iteration: 141760
loss: 0.9848023056983948,grad_norm: 0.8186402668218343, iteration: 141761
loss: 0.9999348521232605,grad_norm: 0.8517253699975781, iteration: 141762
loss: 0.9973573684692383,grad_norm: 0.786912182637353, iteration: 141763
loss: 0.9751394391059875,grad_norm: 0.9999991524115168, iteration: 141764
loss: 0.9706089496612549,grad_norm: 0.999999188262449, iteration: 141765
loss: 0.9355088472366333,grad_norm: 0.9999990495907127, iteration: 141766
loss: 1.0071073770523071,grad_norm: 0.9999990758518517, iteration: 141767
loss: 1.0109769105911255,grad_norm: 0.9999992482757819, iteration: 141768
loss: 0.9872070550918579,grad_norm: 0.9999991713031979, iteration: 141769
loss: 1.0017262697219849,grad_norm: 0.999999157797529, iteration: 141770
loss: 1.0136911869049072,grad_norm: 0.9430209719739875, iteration: 141771
loss: 1.021836757659912,grad_norm: 0.9999990763223949, iteration: 141772
loss: 1.001346230506897,grad_norm: 0.999999297536603, iteration: 141773
loss: 1.0228478908538818,grad_norm: 0.9999995413992776, iteration: 141774
loss: 1.0017738342285156,grad_norm: 0.9954154338551918, iteration: 141775
loss: 0.9883139133453369,grad_norm: 0.9999990046866877, iteration: 141776
loss: 0.9973455667495728,grad_norm: 0.9999993375875192, iteration: 141777
loss: 1.0171979665756226,grad_norm: 0.9999990209730213, iteration: 141778
loss: 1.055166244506836,grad_norm: 0.9999992462888763, iteration: 141779
loss: 1.0581769943237305,grad_norm: 0.9999992937722713, iteration: 141780
loss: 0.9815908074378967,grad_norm: 0.9999990817278842, iteration: 141781
loss: 1.0055466890335083,grad_norm: 0.9580274782218392, iteration: 141782
loss: 0.993362545967102,grad_norm: 0.9999990081954291, iteration: 141783
loss: 0.995739758014679,grad_norm: 0.9499746218953232, iteration: 141784
loss: 0.9757301211357117,grad_norm: 0.9106406926291365, iteration: 141785
loss: 0.9862037301063538,grad_norm: 0.9830142099693098, iteration: 141786
loss: 0.9930194020271301,grad_norm: 0.8734094301126775, iteration: 141787
loss: 0.9928766489028931,grad_norm: 0.999999127863694, iteration: 141788
loss: 1.0055253505706787,grad_norm: 0.999999158423662, iteration: 141789
loss: 1.0459548234939575,grad_norm: 0.9921980981283068, iteration: 141790
loss: 0.9776204228401184,grad_norm: 0.9999991121575912, iteration: 141791
loss: 0.994238555431366,grad_norm: 0.9550810768613478, iteration: 141792
loss: 0.9802001714706421,grad_norm: 0.9971794937193489, iteration: 141793
loss: 0.9778940081596375,grad_norm: 0.9999992675827858, iteration: 141794
loss: 0.9752986431121826,grad_norm: 0.9730248546716259, iteration: 141795
loss: 1.0146781206130981,grad_norm: 0.9999993109070352, iteration: 141796
loss: 0.9981573224067688,grad_norm: 0.9597808893817144, iteration: 141797
loss: 1.0213943719863892,grad_norm: 0.9999992718571592, iteration: 141798
loss: 0.9940440058708191,grad_norm: 0.890988014496034, iteration: 141799
loss: 0.9933662414550781,grad_norm: 0.8899286971111738, iteration: 141800
loss: 0.9838805794715881,grad_norm: 0.9427683059346994, iteration: 141801
loss: 1.0107617378234863,grad_norm: 0.9751870950352632, iteration: 141802
loss: 0.9985346794128418,grad_norm: 0.9344654013863427, iteration: 141803
loss: 0.9577993750572205,grad_norm: 0.9685496682983692, iteration: 141804
loss: 0.9855954647064209,grad_norm: 0.9962606174158077, iteration: 141805
loss: 0.9525976181030273,grad_norm: 0.9999991259452053, iteration: 141806
loss: 0.9575570225715637,grad_norm: 0.9142473565796261, iteration: 141807
loss: 1.03525710105896,grad_norm: 0.9495686807047257, iteration: 141808
loss: 0.9915796518325806,grad_norm: 0.8427414772964005, iteration: 141809
loss: 1.0072654485702515,grad_norm: 0.9999990719577688, iteration: 141810
loss: 1.0375133752822876,grad_norm: 0.9915124250652343, iteration: 141811
loss: 1.053396463394165,grad_norm: 0.9999999914842115, iteration: 141812
loss: 1.0090655088424683,grad_norm: 0.9376595115059688, iteration: 141813
loss: 1.0681703090667725,grad_norm: 0.9999991214661723, iteration: 141814
loss: 0.9879266023635864,grad_norm: 0.9999990474710982, iteration: 141815
loss: 1.0032732486724854,grad_norm: 0.9999992656904324, iteration: 141816
loss: 1.001826286315918,grad_norm: 0.9999990463786966, iteration: 141817
loss: 1.0096001625061035,grad_norm: 0.9999990620816485, iteration: 141818
loss: 1.0239640474319458,grad_norm: 0.999999014820815, iteration: 141819
loss: 0.9879927635192871,grad_norm: 0.9366240584318232, iteration: 141820
loss: 1.0080835819244385,grad_norm: 0.9999991567594313, iteration: 141821
loss: 1.0179027318954468,grad_norm: 0.8776300602783434, iteration: 141822
loss: 0.9757893681526184,grad_norm: 0.8995781943835638, iteration: 141823
loss: 0.9810754656791687,grad_norm: 0.9999992058223721, iteration: 141824
loss: 1.0073033571243286,grad_norm: 0.8849226558631014, iteration: 141825
loss: 1.0021023750305176,grad_norm: 0.9852944907745085, iteration: 141826
loss: 0.9728713631629944,grad_norm: 0.9382118349286849, iteration: 141827
loss: 1.0300078392028809,grad_norm: 0.9999990951882342, iteration: 141828
loss: 1.0365163087844849,grad_norm: 0.9999990287418582, iteration: 141829
loss: 1.046400785446167,grad_norm: 0.8954356587249007, iteration: 141830
loss: 0.9995548129081726,grad_norm: 0.9999991484441891, iteration: 141831
loss: 0.991445779800415,grad_norm: 0.9999991966546545, iteration: 141832
loss: 0.99627286195755,grad_norm: 0.9999991105238767, iteration: 141833
loss: 0.9988874793052673,grad_norm: 0.9999990542024564, iteration: 141834
loss: 0.9859687685966492,grad_norm: 0.8605069309333215, iteration: 141835
loss: 1.0241445302963257,grad_norm: 0.8893802802873974, iteration: 141836
loss: 1.0486186742782593,grad_norm: 0.9999992216846735, iteration: 141837
loss: 1.0142015218734741,grad_norm: 0.9999990147906476, iteration: 141838
loss: 0.975760817527771,grad_norm: 0.9941765849942872, iteration: 141839
loss: 1.0207897424697876,grad_norm: 0.8507896631927623, iteration: 141840
loss: 1.0109013319015503,grad_norm: 0.9999990915785247, iteration: 141841
loss: 1.018906593322754,grad_norm: 0.8041319227912671, iteration: 141842
loss: 0.9719879627227783,grad_norm: 0.9085701951267579, iteration: 141843
loss: 0.9948390126228333,grad_norm: 0.9999991999108933, iteration: 141844
loss: 1.1419373750686646,grad_norm: 0.9999991918907621, iteration: 141845
loss: 0.9880006313323975,grad_norm: 0.9999992551968057, iteration: 141846
loss: 0.9844756126403809,grad_norm: 0.9999989956993334, iteration: 141847
loss: 1.0137131214141846,grad_norm: 0.9999992328581213, iteration: 141848
loss: 1.0377213954925537,grad_norm: 0.9999990660861808, iteration: 141849
loss: 0.989146888256073,grad_norm: 0.9885536415655549, iteration: 141850
loss: 0.9873639941215515,grad_norm: 0.9999993083974635, iteration: 141851
loss: 0.9870434999465942,grad_norm: 0.992912333020327, iteration: 141852
loss: 1.0096768140792847,grad_norm: 0.9999992486227127, iteration: 141853
loss: 1.018723487854004,grad_norm: 0.9999992181694414, iteration: 141854
loss: 1.0207812786102295,grad_norm: 0.9353060749581708, iteration: 141855
loss: 1.0589431524276733,grad_norm: 0.9999994676502267, iteration: 141856
loss: 0.9845079183578491,grad_norm: 0.9694061603551815, iteration: 141857
loss: 1.0057657957077026,grad_norm: 0.9999990100136781, iteration: 141858
loss: 0.9990695118904114,grad_norm: 0.8009448626453154, iteration: 141859
loss: 1.0020734071731567,grad_norm: 0.9999991317144922, iteration: 141860
loss: 0.9968234896659851,grad_norm: 0.9999991061051774, iteration: 141861
loss: 0.9811655879020691,grad_norm: 0.9999990314806577, iteration: 141862
loss: 0.9993477463722229,grad_norm: 0.8977662511175045, iteration: 141863
loss: 0.9870654940605164,grad_norm: 0.9999990481357681, iteration: 141864
loss: 1.0164361000061035,grad_norm: 0.9999990818340319, iteration: 141865
loss: 0.9683263301849365,grad_norm: 0.9999998885499932, iteration: 141866
loss: 0.9709265828132629,grad_norm: 0.9999991817403286, iteration: 141867
loss: 0.9939262866973877,grad_norm: 0.9557176455580689, iteration: 141868
loss: 0.9809736013412476,grad_norm: 0.9999991072523035, iteration: 141869
loss: 1.023289680480957,grad_norm: 0.9995444450580111, iteration: 141870
loss: 1.0005558729171753,grad_norm: 0.9999992116417947, iteration: 141871
loss: 0.9924275279045105,grad_norm: 0.8653012862174119, iteration: 141872
loss: 1.0092788934707642,grad_norm: 0.9673243428554348, iteration: 141873
loss: 1.0552237033843994,grad_norm: 0.9999992113240936, iteration: 141874
loss: 1.0197787284851074,grad_norm: 0.9999992203776786, iteration: 141875
loss: 0.9732930064201355,grad_norm: 0.9999991366385299, iteration: 141876
loss: 0.9752002954483032,grad_norm: 0.9115626613641002, iteration: 141877
loss: 1.0189847946166992,grad_norm: 0.9999990627016714, iteration: 141878
loss: 0.993927001953125,grad_norm: 0.9999991429338116, iteration: 141879
loss: 0.9631397128105164,grad_norm: 0.9353177979582297, iteration: 141880
loss: 0.9615280628204346,grad_norm: 0.9999996711337675, iteration: 141881
loss: 0.9839965105056763,grad_norm: 0.9999989192182867, iteration: 141882
loss: 0.9668013453483582,grad_norm: 0.999999103843193, iteration: 141883
loss: 0.9886271953582764,grad_norm: 0.9999991548267654, iteration: 141884
loss: 1.0480406284332275,grad_norm: 0.9999991890382807, iteration: 141885
loss: 1.0005069971084595,grad_norm: 0.9999990754422924, iteration: 141886
loss: 0.9655001759529114,grad_norm: 0.9999989683948737, iteration: 141887
loss: 1.0295010805130005,grad_norm: 0.9999993915108717, iteration: 141888
loss: 1.0049077272415161,grad_norm: 0.9999990535660024, iteration: 141889
loss: 1.0089161396026611,grad_norm: 0.9861064813561702, iteration: 141890
loss: 1.0021333694458008,grad_norm: 0.9999992599033763, iteration: 141891
loss: 1.016653060913086,grad_norm: 0.9999991037741341, iteration: 141892
loss: 1.0237592458724976,grad_norm: 0.9999991763326533, iteration: 141893
loss: 1.004196047782898,grad_norm: 0.9299112118516024, iteration: 141894
loss: 1.0254809856414795,grad_norm: 0.999999075364756, iteration: 141895
loss: 1.0218913555145264,grad_norm: 0.9963393634786265, iteration: 141896
loss: 0.999974250793457,grad_norm: 0.8765663685928032, iteration: 141897
loss: 0.9839532971382141,grad_norm: 0.809957714607574, iteration: 141898
loss: 1.0151700973510742,grad_norm: 0.9999998385204826, iteration: 141899
loss: 1.0114126205444336,grad_norm: 0.9999991192601155, iteration: 141900
loss: 1.0441523790359497,grad_norm: 0.9999990539672813, iteration: 141901
loss: 0.992573082447052,grad_norm: 0.9999990991588966, iteration: 141902
loss: 1.012945294380188,grad_norm: 0.9999992253045582, iteration: 141903
loss: 0.9868209362030029,grad_norm: 0.999999119190252, iteration: 141904
loss: 0.9928954839706421,grad_norm: 0.9941904456115714, iteration: 141905
loss: 1.0086884498596191,grad_norm: 0.9999992306804099, iteration: 141906
loss: 0.9909229874610901,grad_norm: 0.9598702392900863, iteration: 141907
loss: 1.0227718353271484,grad_norm: 0.9999991386912714, iteration: 141908
loss: 0.9792124032974243,grad_norm: 0.7649547035165712, iteration: 141909
loss: 1.023630976676941,grad_norm: 0.9346044769335071, iteration: 141910
loss: 1.003679871559143,grad_norm: 0.9567836017490804, iteration: 141911
loss: 0.9774206876754761,grad_norm: 0.9999989319698303, iteration: 141912
loss: 0.9815956950187683,grad_norm: 0.9374415118528412, iteration: 141913
loss: 1.0159274339675903,grad_norm: 0.9999991971476168, iteration: 141914
loss: 1.0039646625518799,grad_norm: 0.9999990766116936, iteration: 141915
loss: 0.9952978491783142,grad_norm: 0.9999992079935468, iteration: 141916
loss: 1.0319819450378418,grad_norm: 0.975484995427037, iteration: 141917
loss: 1.0101487636566162,grad_norm: 0.9999991367969838, iteration: 141918
loss: 1.0117688179016113,grad_norm: 0.977076272849877, iteration: 141919
loss: 1.0692471265792847,grad_norm: 0.99999916316952, iteration: 141920
loss: 1.0084656476974487,grad_norm: 0.9946687687553205, iteration: 141921
loss: 0.9857650995254517,grad_norm: 0.9675144900764111, iteration: 141922
loss: 0.9961379170417786,grad_norm: 0.86524603622814, iteration: 141923
loss: 0.9553198218345642,grad_norm: 0.9999991259704365, iteration: 141924
loss: 0.9778894782066345,grad_norm: 0.9999992600083717, iteration: 141925
loss: 0.9567971229553223,grad_norm: 0.9999991361795185, iteration: 141926
loss: 0.9934883117675781,grad_norm: 0.7575235987703358, iteration: 141927
loss: 0.9871594309806824,grad_norm: 0.999999666750722, iteration: 141928
loss: 1.0007792711257935,grad_norm: 0.9999992139196683, iteration: 141929
loss: 1.0014830827713013,grad_norm: 0.9404477029661271, iteration: 141930
loss: 0.9910734295845032,grad_norm: 0.9999990954965982, iteration: 141931
loss: 0.9635857939720154,grad_norm: 0.9728955478038863, iteration: 141932
loss: 1.0211381912231445,grad_norm: 0.9999990656419319, iteration: 141933
loss: 1.0247797966003418,grad_norm: 0.9999990228479516, iteration: 141934
loss: 1.0726755857467651,grad_norm: 0.9999996329823677, iteration: 141935
loss: 1.0029165744781494,grad_norm: 0.8912169074182955, iteration: 141936
loss: 1.0108057260513306,grad_norm: 0.9069230231337773, iteration: 141937
loss: 0.9643908739089966,grad_norm: 0.9999992709030887, iteration: 141938
loss: 1.0149668455123901,grad_norm: 0.9999991542459172, iteration: 141939
loss: 0.9680768251419067,grad_norm: 0.9999991675319537, iteration: 141940
loss: 0.9752094745635986,grad_norm: 0.9999990079363642, iteration: 141941
loss: 0.9922332763671875,grad_norm: 0.9999990913379632, iteration: 141942
loss: 1.0105211734771729,grad_norm: 0.9999990561449954, iteration: 141943
loss: 0.9662669897079468,grad_norm: 0.9999992139388096, iteration: 141944
loss: 0.980737566947937,grad_norm: 0.970630097709564, iteration: 141945
loss: 1.0141191482543945,grad_norm: 0.9999992011570844, iteration: 141946
loss: 0.979425847530365,grad_norm: 0.9916645191377724, iteration: 141947
loss: 1.0057686567306519,grad_norm: 0.9999991584618305, iteration: 141948
loss: 0.9620633125305176,grad_norm: 0.9991884128548323, iteration: 141949
loss: 1.0233666896820068,grad_norm: 0.9999989612782121, iteration: 141950
loss: 0.9658763408660889,grad_norm: 0.999999199220485, iteration: 141951
loss: 0.9939960241317749,grad_norm: 0.9999991542291402, iteration: 141952
loss: 1.010710597038269,grad_norm: 0.9710419932094088, iteration: 141953
loss: 1.0095096826553345,grad_norm: 0.9318308742170878, iteration: 141954
loss: 0.9754083752632141,grad_norm: 0.8593173507113486, iteration: 141955
loss: 1.047983169555664,grad_norm: 0.9999992341868392, iteration: 141956
loss: 0.9679593443870544,grad_norm: 0.9820851297920556, iteration: 141957
loss: 0.9840536713600159,grad_norm: 0.9504024499804794, iteration: 141958
loss: 1.0064196586608887,grad_norm: 0.9999991032528311, iteration: 141959
loss: 0.9927963018417358,grad_norm: 0.9999991221462862, iteration: 141960
loss: 1.0250165462493896,grad_norm: 0.9999990585576195, iteration: 141961
loss: 0.9476325511932373,grad_norm: 0.9999990936381978, iteration: 141962
loss: 0.9843468070030212,grad_norm: 0.9999990498229554, iteration: 141963
loss: 0.9939727783203125,grad_norm: 0.9999990132860013, iteration: 141964
loss: 1.0240367650985718,grad_norm: 0.9999992713266604, iteration: 141965
loss: 1.03131902217865,grad_norm: 0.9999991851138137, iteration: 141966
loss: 1.1027116775512695,grad_norm: 0.9999996641949137, iteration: 141967
loss: 0.9787088632583618,grad_norm: 0.9999990866693098, iteration: 141968
loss: 1.0077589750289917,grad_norm: 0.9079924184758914, iteration: 141969
loss: 0.9782590866088867,grad_norm: 0.9999991705475635, iteration: 141970
loss: 1.0073039531707764,grad_norm: 0.9999992388277134, iteration: 141971
loss: 1.0178766250610352,grad_norm: 0.999999307819288, iteration: 141972
loss: 1.0076297521591187,grad_norm: 0.9999990024099162, iteration: 141973
loss: 0.982767641544342,grad_norm: 0.9999990052260407, iteration: 141974
loss: 1.0120937824249268,grad_norm: 0.9999993822176388, iteration: 141975
loss: 0.966424822807312,grad_norm: 0.9999990740107974, iteration: 141976
loss: 0.9983257055282593,grad_norm: 0.9999990905769338, iteration: 141977
loss: 0.9843953251838684,grad_norm: 0.8055650713886007, iteration: 141978
loss: 0.9613856077194214,grad_norm: 0.9811003582775178, iteration: 141979
loss: 0.9865788817405701,grad_norm: 0.9354839990946031, iteration: 141980
loss: 0.9913296699523926,grad_norm: 0.9999989872002711, iteration: 141981
loss: 1.0129073858261108,grad_norm: 0.9807608149242061, iteration: 141982
loss: 1.0094772577285767,grad_norm: 0.9999988568337386, iteration: 141983
loss: 1.0733428001403809,grad_norm: 0.8940690912966125, iteration: 141984
loss: 0.966166615486145,grad_norm: 0.8759513292450634, iteration: 141985
loss: 1.0239917039871216,grad_norm: 0.9562338310179114, iteration: 141986
loss: 1.0236953496932983,grad_norm: 0.978791068356677, iteration: 141987
loss: 0.9978093504905701,grad_norm: 0.9999990249722844, iteration: 141988
loss: 1.0153816938400269,grad_norm: 0.9999993403484283, iteration: 141989
loss: 0.9735875129699707,grad_norm: 0.9999989985108184, iteration: 141990
loss: 1.0367660522460938,grad_norm: 0.9999999761591056, iteration: 141991
loss: 1.016403317451477,grad_norm: 0.999999091320898, iteration: 141992
loss: 0.9665627479553223,grad_norm: 0.8466165768765458, iteration: 141993
loss: 1.0613727569580078,grad_norm: 0.9999992300046182, iteration: 141994
loss: 1.0364960432052612,grad_norm: 0.9673090735286345, iteration: 141995
loss: 1.0007299184799194,grad_norm: 0.8921391764470338, iteration: 141996
loss: 1.0079973936080933,grad_norm: 0.8933898594947721, iteration: 141997
loss: 0.9574542045593262,grad_norm: 0.9285800418723174, iteration: 141998
loss: 1.0142178535461426,grad_norm: 0.999999301263988, iteration: 141999
loss: 1.0371532440185547,grad_norm: 0.9999992770967991, iteration: 142000
loss: 0.964464545249939,grad_norm: 0.9026808549834449, iteration: 142001
loss: 1.030311107635498,grad_norm: 0.9806992843213388, iteration: 142002
loss: 0.9940223693847656,grad_norm: 0.9999991235400394, iteration: 142003
loss: 1.0200642347335815,grad_norm: 0.9999993196166833, iteration: 142004
loss: 1.0128077268600464,grad_norm: 0.9622124680816003, iteration: 142005
loss: 1.0120147466659546,grad_norm: 0.9982042585295646, iteration: 142006
loss: 0.9700798988342285,grad_norm: 0.9023288600319899, iteration: 142007
loss: 0.9730454683303833,grad_norm: 0.9616003372595151, iteration: 142008
loss: 1.0997586250305176,grad_norm: 0.9999989924276298, iteration: 142009
loss: 0.991992175579071,grad_norm: 0.9854258076015276, iteration: 142010
loss: 1.0470988750457764,grad_norm: 0.9999994631199984, iteration: 142011
loss: 1.0173786878585815,grad_norm: 0.9999992134734961, iteration: 142012
loss: 0.9986427426338196,grad_norm: 0.9999998050290243, iteration: 142013
loss: 1.010338306427002,grad_norm: 0.9999993046822248, iteration: 142014
loss: 1.0185905694961548,grad_norm: 0.9802837151317292, iteration: 142015
loss: 1.0337554216384888,grad_norm: 0.8381834849118361, iteration: 142016
loss: 1.0225824117660522,grad_norm: 0.9999991203577023, iteration: 142017
loss: 0.9665271639823914,grad_norm: 0.999998963505752, iteration: 142018
loss: 1.0087083578109741,grad_norm: 0.9999991968073644, iteration: 142019
loss: 1.041385531425476,grad_norm: 0.9999993178148566, iteration: 142020
loss: 0.9889922738075256,grad_norm: 0.9999991401090751, iteration: 142021
loss: 0.992322564125061,grad_norm: 0.9577012213925447, iteration: 142022
loss: 1.0664353370666504,grad_norm: 0.9999998836847543, iteration: 142023
loss: 1.0026898384094238,grad_norm: 0.9999990347007841, iteration: 142024
loss: 0.9760993719100952,grad_norm: 0.7733131618753705, iteration: 142025
loss: 0.9401777386665344,grad_norm: 0.9833769187971003, iteration: 142026
loss: 0.9964064359664917,grad_norm: 0.9999991673298977, iteration: 142027
loss: 0.9868865013122559,grad_norm: 0.9999992107989574, iteration: 142028
loss: 0.976953387260437,grad_norm: 0.8364218983940062, iteration: 142029
loss: 0.960189700126648,grad_norm: 0.9995444962697297, iteration: 142030
loss: 1.0622780323028564,grad_norm: 0.999999269589154, iteration: 142031
loss: 1.0750439167022705,grad_norm: 0.9999993379087013, iteration: 142032
loss: 0.9759822487831116,grad_norm: 0.9999995123597237, iteration: 142033
loss: 0.9855647683143616,grad_norm: 0.9501170810314992, iteration: 142034
loss: 0.9786550402641296,grad_norm: 0.9999991141835198, iteration: 142035
loss: 1.2827210426330566,grad_norm: 0.999999168860019, iteration: 142036
loss: 1.0021564960479736,grad_norm: 0.9999990901511202, iteration: 142037
loss: 1.0360337495803833,grad_norm: 0.9999992848613387, iteration: 142038
loss: 0.961676836013794,grad_norm: 0.9978318607482948, iteration: 142039
loss: 1.023877501487732,grad_norm: 0.9999990388042604, iteration: 142040
loss: 0.9884846806526184,grad_norm: 0.9999991522741661, iteration: 142041
loss: 1.0052834749221802,grad_norm: 0.9999991833478831, iteration: 142042
loss: 1.0074557065963745,grad_norm: 0.9279692424572826, iteration: 142043
loss: 0.9828705787658691,grad_norm: 0.9999990652437455, iteration: 142044
loss: 0.9673190712928772,grad_norm: 0.9273098888937704, iteration: 142045
loss: 1.0252654552459717,grad_norm: 0.9999992868629342, iteration: 142046
loss: 1.0229219198226929,grad_norm: 0.9999990020445008, iteration: 142047
loss: 1.0058268308639526,grad_norm: 0.999999148199201, iteration: 142048
loss: 1.0326979160308838,grad_norm: 0.9999993926125972, iteration: 142049
loss: 1.059443473815918,grad_norm: 0.9999990749822334, iteration: 142050
loss: 0.9954728484153748,grad_norm: 0.9098964927240435, iteration: 142051
loss: 0.9731079339981079,grad_norm: 0.9483799255526281, iteration: 142052
loss: 0.9887304306030273,grad_norm: 0.9999989772408733, iteration: 142053
loss: 0.9722167253494263,grad_norm: 0.9269462774580899, iteration: 142054
loss: 0.9809277057647705,grad_norm: 0.9999992986638626, iteration: 142055
loss: 1.0027174949645996,grad_norm: 0.9999990540299374, iteration: 142056
loss: 0.9875608086585999,grad_norm: 0.9758731585004008, iteration: 142057
loss: 1.0026830434799194,grad_norm: 0.9999997045456677, iteration: 142058
loss: 1.004752278327942,grad_norm: 0.9999992284810045, iteration: 142059
loss: 1.007137417793274,grad_norm: 0.9999991988656315, iteration: 142060
loss: 0.9903985261917114,grad_norm: 0.8964471360416124, iteration: 142061
loss: 0.9892869591712952,grad_norm: 0.9999991575394297, iteration: 142062
loss: 1.0136431455612183,grad_norm: 0.8787246344672146, iteration: 142063
loss: 1.0003141164779663,grad_norm: 0.9999989747297853, iteration: 142064
loss: 1.0157041549682617,grad_norm: 0.999999133067773, iteration: 142065
loss: 1.0114384889602661,grad_norm: 0.8887823088573588, iteration: 142066
loss: 1.072496771812439,grad_norm: 0.9999992415899862, iteration: 142067
loss: 1.0140002965927124,grad_norm: 0.9999992410321978, iteration: 142068
loss: 0.995875358581543,grad_norm: 0.9999990981435728, iteration: 142069
loss: 0.9796359539031982,grad_norm: 0.99999910998332, iteration: 142070
loss: 1.0083420276641846,grad_norm: 0.9766252842915709, iteration: 142071
loss: 0.9920079112052917,grad_norm: 0.999999169199822, iteration: 142072
loss: 1.0081368684768677,grad_norm: 0.9943635679903108, iteration: 142073
loss: 0.9880251288414001,grad_norm: 0.9999989170076838, iteration: 142074
loss: 1.0104070901870728,grad_norm: 0.9999992479010791, iteration: 142075
loss: 1.0397804975509644,grad_norm: 0.9999991634324171, iteration: 142076
loss: 0.9843342304229736,grad_norm: 0.9383186098456011, iteration: 142077
loss: 1.0065375566482544,grad_norm: 0.9451801886634555, iteration: 142078
loss: 1.0040467977523804,grad_norm: 0.9894657455440586, iteration: 142079
loss: 1.0723998546600342,grad_norm: 0.9999999100615936, iteration: 142080
loss: 0.9713299870491028,grad_norm: 0.9999991129668094, iteration: 142081
loss: 0.9930245280265808,grad_norm: 0.871738141874381, iteration: 142082
loss: 0.9879975318908691,grad_norm: 0.9999992564218454, iteration: 142083
loss: 1.0196514129638672,grad_norm: 0.999999225926494, iteration: 142084
loss: 1.0570648908615112,grad_norm: 0.999999994573993, iteration: 142085
loss: 0.9896632432937622,grad_norm: 0.9999991450575928, iteration: 142086
loss: 1.012836217880249,grad_norm: 0.9694783716580236, iteration: 142087
loss: 1.0297324657440186,grad_norm: 0.8869028972091391, iteration: 142088
loss: 1.027756929397583,grad_norm: 0.9174131209464584, iteration: 142089
loss: 1.0408729314804077,grad_norm: 0.9999995791630197, iteration: 142090
loss: 0.9769741296768188,grad_norm: 0.983595299652168, iteration: 142091
loss: 1.0087758302688599,grad_norm: 0.8943299276428441, iteration: 142092
loss: 0.9901673793792725,grad_norm: 0.9999991728525874, iteration: 142093
loss: 0.989541232585907,grad_norm: 0.9999991353070291, iteration: 142094
loss: 1.0122153759002686,grad_norm: 0.9999990864200702, iteration: 142095
loss: 0.9956427812576294,grad_norm: 0.9897273027344805, iteration: 142096
loss: 1.0116084814071655,grad_norm: 0.920628273842293, iteration: 142097
loss: 1.0110454559326172,grad_norm: 0.9999990850795459, iteration: 142098
loss: 0.999365508556366,grad_norm: 0.9999992279182092, iteration: 142099
loss: 0.9543227553367615,grad_norm: 0.9999990492877439, iteration: 142100
loss: 1.02166748046875,grad_norm: 0.9579197708729462, iteration: 142101
loss: 0.9911036491394043,grad_norm: 0.9999990889583783, iteration: 142102
loss: 0.9489594101905823,grad_norm: 0.974750519390709, iteration: 142103
loss: 0.9693310260772705,grad_norm: 0.8668886947533638, iteration: 142104
loss: 1.026546835899353,grad_norm: 0.9999991337217881, iteration: 142105
loss: 1.0050615072250366,grad_norm: 0.9031026621769017, iteration: 142106
loss: 1.0216678380966187,grad_norm: 0.9041790443778188, iteration: 142107
loss: 0.9940313696861267,grad_norm: 0.999999198651142, iteration: 142108
loss: 1.0188021659851074,grad_norm: 0.9999990932003973, iteration: 142109
loss: 1.0061259269714355,grad_norm: 0.9999991178559731, iteration: 142110
loss: 1.030558466911316,grad_norm: 0.9999991154961024, iteration: 142111
loss: 0.9901437759399414,grad_norm: 0.9999989350437094, iteration: 142112
loss: 1.0020655393600464,grad_norm: 0.9390493409376803, iteration: 142113
loss: 0.9933770298957825,grad_norm: 0.9999992936102876, iteration: 142114
loss: 1.010772943496704,grad_norm: 0.9267294580505497, iteration: 142115
loss: 1.0469046831130981,grad_norm: 0.9999990965936076, iteration: 142116
loss: 1.0093835592269897,grad_norm: 0.9999991221008644, iteration: 142117
loss: 1.0234332084655762,grad_norm: 0.9556162866883837, iteration: 142118
loss: 0.9960341453552246,grad_norm: 0.8884796489460979, iteration: 142119
loss: 1.013743281364441,grad_norm: 0.9999990151467973, iteration: 142120
loss: 0.9724860191345215,grad_norm: 0.9999990984386693, iteration: 142121
loss: 1.0401238203048706,grad_norm: 0.9999997209451331, iteration: 142122
loss: 0.9881361126899719,grad_norm: 0.9999990947397344, iteration: 142123
loss: 0.9925273656845093,grad_norm: 0.9999991874680497, iteration: 142124
loss: 0.9837531447410583,grad_norm: 0.8754082297865304, iteration: 142125
loss: 1.0007392168045044,grad_norm: 0.8266314887984241, iteration: 142126
loss: 0.975264310836792,grad_norm: 0.9999994543529181, iteration: 142127
loss: 1.0476665496826172,grad_norm: 0.9999994880831197, iteration: 142128
loss: 1.0200976133346558,grad_norm: 0.929938127596659, iteration: 142129
loss: 0.997042715549469,grad_norm: 0.9999991158099268, iteration: 142130
loss: 0.9958845973014832,grad_norm: 0.9520393172194183, iteration: 142131
loss: 1.0038186311721802,grad_norm: 0.9999989871228107, iteration: 142132
loss: 0.9918040633201599,grad_norm: 0.8631242415513466, iteration: 142133
loss: 1.1272635459899902,grad_norm: 0.9999991555238269, iteration: 142134
loss: 1.0183054208755493,grad_norm: 0.9999993156907719, iteration: 142135
loss: 0.9779268503189087,grad_norm: 0.9999990626391788, iteration: 142136
loss: 1.009607195854187,grad_norm: 0.9617583233629916, iteration: 142137
loss: 0.987533688545227,grad_norm: 0.9999988919529235, iteration: 142138
loss: 0.9802382588386536,grad_norm: 0.9999991616542502, iteration: 142139
loss: 1.0322383642196655,grad_norm: 0.9999994196355783, iteration: 142140
loss: 0.9865081906318665,grad_norm: 0.8341428499457827, iteration: 142141
loss: 1.0071814060211182,grad_norm: 0.9999990229693342, iteration: 142142
loss: 1.047530174255371,grad_norm: 0.999999158683623, iteration: 142143
loss: 1.0234944820404053,grad_norm: 0.9552203032567603, iteration: 142144
loss: 1.008101224899292,grad_norm: 0.999999238405421, iteration: 142145
loss: 0.9907728433609009,grad_norm: 0.9999998934110467, iteration: 142146
loss: 1.0087647438049316,grad_norm: 0.9999990893629873, iteration: 142147
loss: 1.0123881101608276,grad_norm: 0.9999991708329522, iteration: 142148
loss: 1.0042157173156738,grad_norm: 0.9406981547640133, iteration: 142149
loss: 1.0302395820617676,grad_norm: 0.9999991810987853, iteration: 142150
loss: 0.9774281978607178,grad_norm: 0.9999991349538659, iteration: 142151
loss: 1.03307044506073,grad_norm: 0.9923834214569631, iteration: 142152
loss: 1.0164132118225098,grad_norm: 0.837597659513983, iteration: 142153
loss: 1.0291141271591187,grad_norm: 0.9999992516591001, iteration: 142154
loss: 0.9957664012908936,grad_norm: 0.9999992462124402, iteration: 142155
loss: 0.9881031513214111,grad_norm: 0.9951293621426002, iteration: 142156
loss: 0.9922435879707336,grad_norm: 0.9524569313465543, iteration: 142157
loss: 1.0215972661972046,grad_norm: 0.8707718616579047, iteration: 142158
loss: 0.9880856871604919,grad_norm: 0.9234764066012905, iteration: 142159
loss: 1.0145763158798218,grad_norm: 0.9999990074573417, iteration: 142160
loss: 1.0580956935882568,grad_norm: 0.9999993301885713, iteration: 142161
loss: 1.0304739475250244,grad_norm: 0.9999991943079014, iteration: 142162
loss: 1.0203073024749756,grad_norm: 0.9297989298224042, iteration: 142163
loss: 0.9951119422912598,grad_norm: 0.9999991402271886, iteration: 142164
loss: 0.9800342917442322,grad_norm: 0.9999990100283777, iteration: 142165
loss: 0.9962064623832703,grad_norm: 0.9999990927903716, iteration: 142166
loss: 0.9913568496704102,grad_norm: 0.9999991018075969, iteration: 142167
loss: 1.0327534675598145,grad_norm: 0.9999991983564511, iteration: 142168
loss: 1.0175772905349731,grad_norm: 0.9999990598105316, iteration: 142169
loss: 0.9402482509613037,grad_norm: 0.9999992023441225, iteration: 142170
loss: 0.944175124168396,grad_norm: 0.8886812890739622, iteration: 142171
loss: 1.0195598602294922,grad_norm: 0.9999992200030986, iteration: 142172
loss: 1.0304784774780273,grad_norm: 0.9999992694183826, iteration: 142173
loss: 0.9990249276161194,grad_norm: 0.9185332253222998, iteration: 142174
loss: 0.9990854859352112,grad_norm: 0.9999990689708138, iteration: 142175
loss: 1.034509301185608,grad_norm: 0.9999990453835684, iteration: 142176
loss: 1.0038117170333862,grad_norm: 0.8903590347431058, iteration: 142177
loss: 1.0632885694503784,grad_norm: 0.9999988437147957, iteration: 142178
loss: 0.9983268976211548,grad_norm: 0.9738428000531509, iteration: 142179
loss: 1.0095181465148926,grad_norm: 0.9897673841339202, iteration: 142180
loss: 1.0325284004211426,grad_norm: 0.9999990368162348, iteration: 142181
loss: 1.025255799293518,grad_norm: 0.9915956645601121, iteration: 142182
loss: 0.9906846880912781,grad_norm: 0.9999991647897248, iteration: 142183
loss: 1.0000306367874146,grad_norm: 0.9734580254253112, iteration: 142184
loss: 0.9993951320648193,grad_norm: 0.999999238154975, iteration: 142185
loss: 1.0404675006866455,grad_norm: 0.9372835831149056, iteration: 142186
loss: 1.0130985975265503,grad_norm: 0.9999998910423178, iteration: 142187
loss: 0.9812541604042053,grad_norm: 0.8768273531060151, iteration: 142188
loss: 0.9836845397949219,grad_norm: 0.9999991828705918, iteration: 142189
loss: 0.9895464777946472,grad_norm: 0.9743382826922263, iteration: 142190
loss: 1.001643419265747,grad_norm: 0.9830594815463279, iteration: 142191
loss: 0.9824739098548889,grad_norm: 0.9999992753413509, iteration: 142192
loss: 0.9941593408584595,grad_norm: 0.99999920657982, iteration: 142193
loss: 1.0528864860534668,grad_norm: 0.9999993503260335, iteration: 142194
loss: 1.0339758396148682,grad_norm: 0.9858262862597414, iteration: 142195
loss: 1.0975654125213623,grad_norm: 0.9999989100054725, iteration: 142196
loss: 0.990729808807373,grad_norm: 0.9999989553561467, iteration: 142197
loss: 1.012780785560608,grad_norm: 0.9999990376009946, iteration: 142198
loss: 0.9850425720214844,grad_norm: 0.9999992348740383, iteration: 142199
loss: 0.9942454099655151,grad_norm: 0.9999999257020132, iteration: 142200
loss: 1.0081329345703125,grad_norm: 0.9999991176771668, iteration: 142201
loss: 1.0108442306518555,grad_norm: 0.9999992796666627, iteration: 142202
loss: 1.0043622255325317,grad_norm: 0.9999995249871687, iteration: 142203
loss: 0.9679507613182068,grad_norm: 0.999999084826519, iteration: 142204
loss: 1.0213263034820557,grad_norm: 0.9999990096640965, iteration: 142205
loss: 1.0035951137542725,grad_norm: 0.9999992390991606, iteration: 142206
loss: 0.978731095790863,grad_norm: 0.9999991931475125, iteration: 142207
loss: 0.993145227432251,grad_norm: 0.9999991776588136, iteration: 142208
loss: 0.9955968856811523,grad_norm: 0.999999269083583, iteration: 142209
loss: 0.9678792357444763,grad_norm: 0.9999992003505559, iteration: 142210
loss: 1.0572768449783325,grad_norm: 0.9999990751337785, iteration: 142211
loss: 1.0109108686447144,grad_norm: 0.999999377466086, iteration: 142212
loss: 0.9961875677108765,grad_norm: 0.9999990978470851, iteration: 142213
loss: 0.9936601519584656,grad_norm: 0.9646315391635896, iteration: 142214
loss: 1.0182358026504517,grad_norm: 0.999999110037314, iteration: 142215
loss: 0.9837573766708374,grad_norm: 0.9999989661183298, iteration: 142216
loss: 0.984463632106781,grad_norm: 0.8632273157801538, iteration: 142217
loss: 1.0333120822906494,grad_norm: 0.9999990504126489, iteration: 142218
loss: 1.0099047422409058,grad_norm: 0.9999991391555114, iteration: 142219
loss: 0.985975444316864,grad_norm: 0.8884197561332158, iteration: 142220
loss: 0.9729882478713989,grad_norm: 0.9600036211246936, iteration: 142221
loss: 0.977482795715332,grad_norm: 0.9999991014096326, iteration: 142222
loss: 1.0109682083129883,grad_norm: 0.9999995488797474, iteration: 142223
loss: 1.03135085105896,grad_norm: 0.9999993556822425, iteration: 142224
loss: 0.9816948771476746,grad_norm: 0.9999990401946978, iteration: 142225
loss: 1.0907213687896729,grad_norm: 0.999999661747288, iteration: 142226
loss: 0.9833661913871765,grad_norm: 0.999999474458038, iteration: 142227
loss: 1.0426552295684814,grad_norm: 0.9999999250781136, iteration: 142228
loss: 1.0451769828796387,grad_norm: 0.9833593628447417, iteration: 142229
loss: 1.0069389343261719,grad_norm: 0.9999996459832938, iteration: 142230
loss: 0.9963892698287964,grad_norm: 0.7972016414752968, iteration: 142231
loss: 1.0146143436431885,grad_norm: 0.9363413449771011, iteration: 142232
loss: 0.999160885810852,grad_norm: 0.8968987700726395, iteration: 142233
loss: 1.0064464807510376,grad_norm: 0.9999990053718537, iteration: 142234
loss: 1.0279614925384521,grad_norm: 0.999999829718238, iteration: 142235
loss: 0.9850674271583557,grad_norm: 0.9999993188157659, iteration: 142236
loss: 0.9787527322769165,grad_norm: 0.9999992485221145, iteration: 142237
loss: 1.0180028676986694,grad_norm: 0.9999991731048294, iteration: 142238
loss: 0.982970654964447,grad_norm: 0.9722539019897763, iteration: 142239
loss: 1.0215412378311157,grad_norm: 0.9999995410277749, iteration: 142240
loss: 1.015567421913147,grad_norm: 0.8970394794528058, iteration: 142241
loss: 1.0039573907852173,grad_norm: 0.9999990155269843, iteration: 142242
loss: 1.0150715112686157,grad_norm: 0.9465746267779929, iteration: 142243
loss: 0.9995973110198975,grad_norm: 0.9999992170077274, iteration: 142244
loss: 0.9816747903823853,grad_norm: 0.9737031011455313, iteration: 142245
loss: 1.0135453939437866,grad_norm: 0.9999990462615126, iteration: 142246
loss: 0.9669798016548157,grad_norm: 0.999999786591292, iteration: 142247
loss: 1.0142384767532349,grad_norm: 0.9999989822115287, iteration: 142248
loss: 1.0072202682495117,grad_norm: 0.8793280454771969, iteration: 142249
loss: 1.0200344324111938,grad_norm: 0.9999991829521228, iteration: 142250
loss: 1.007663607597351,grad_norm: 0.9999989829582339, iteration: 142251
loss: 0.9806444048881531,grad_norm: 0.9999990607045424, iteration: 142252
loss: 1.0316723585128784,grad_norm: 0.9999993308203992, iteration: 142253
loss: 1.0571231842041016,grad_norm: 0.9999993620918207, iteration: 142254
loss: 1.0031613111495972,grad_norm: 0.9303219155184415, iteration: 142255
loss: 1.0094139575958252,grad_norm: 0.9865953695071608, iteration: 142256
loss: 1.008548617362976,grad_norm: 0.8904306599288684, iteration: 142257
loss: 1.2098214626312256,grad_norm: 0.9999995764092555, iteration: 142258
loss: 1.019132137298584,grad_norm: 0.9181939217786396, iteration: 142259
loss: 1.0173662900924683,grad_norm: 0.8568260826718247, iteration: 142260
loss: 1.0018935203552246,grad_norm: 0.9649202719141426, iteration: 142261
loss: 1.0077372789382935,grad_norm: 0.9999994046347446, iteration: 142262
loss: 0.9962966442108154,grad_norm: 0.8039736197602213, iteration: 142263
loss: 1.0678130388259888,grad_norm: 0.9999992032469742, iteration: 142264
loss: 0.9982499480247498,grad_norm: 0.9999990304749462, iteration: 142265
loss: 1.0251262187957764,grad_norm: 0.9999993103903876, iteration: 142266
loss: 0.9774001836776733,grad_norm: 0.9999989803584285, iteration: 142267
loss: 1.0057772397994995,grad_norm: 0.999998977358011, iteration: 142268
loss: 1.0259425640106201,grad_norm: 0.9999992696005744, iteration: 142269
loss: 0.9919232130050659,grad_norm: 0.9999992505990557, iteration: 142270
loss: 0.9865096807479858,grad_norm: 0.9516625134458438, iteration: 142271
loss: 1.0118319988250732,grad_norm: 0.9999991922906863, iteration: 142272
loss: 0.9851941466331482,grad_norm: 0.9999989943003379, iteration: 142273
loss: 1.0470435619354248,grad_norm: 0.999998952214254, iteration: 142274
loss: 1.0034457445144653,grad_norm: 0.9999990731992165, iteration: 142275
loss: 0.9989102482795715,grad_norm: 0.9944147990773858, iteration: 142276
loss: 1.0095889568328857,grad_norm: 0.9999994176983893, iteration: 142277
loss: 0.9324280619621277,grad_norm: 0.9042367194922208, iteration: 142278
loss: 0.9495912790298462,grad_norm: 0.9999992156215625, iteration: 142279
loss: 0.9866816401481628,grad_norm: 0.9110625940506258, iteration: 142280
loss: 0.9661996960639954,grad_norm: 0.9999994604661279, iteration: 142281
loss: 0.9900282621383667,grad_norm: 0.99999931285459, iteration: 142282
loss: 0.9925930500030518,grad_norm: 0.9999991126997884, iteration: 142283
loss: 0.955416738986969,grad_norm: 0.8320060710671515, iteration: 142284
loss: 0.9881048798561096,grad_norm: 0.9999989775979357, iteration: 142285
loss: 1.0234720706939697,grad_norm: 0.9999995503079695, iteration: 142286
loss: 0.9975091814994812,grad_norm: 0.8948039645619421, iteration: 142287
loss: 1.00929856300354,grad_norm: 0.9848808831468955, iteration: 142288
loss: 0.9914873242378235,grad_norm: 0.9181595812829227, iteration: 142289
loss: 1.013746976852417,grad_norm: 0.9999999220599673, iteration: 142290
loss: 0.9789001941680908,grad_norm: 0.999999189868077, iteration: 142291
loss: 0.9765876531600952,grad_norm: 0.9398740390353765, iteration: 142292
loss: 0.9753366708755493,grad_norm: 0.9999990021004045, iteration: 142293
loss: 1.0552401542663574,grad_norm: 0.9389295585739296, iteration: 142294
loss: 1.0147770643234253,grad_norm: 0.9457251411045662, iteration: 142295
loss: 1.0072039365768433,grad_norm: 0.9999990951737926, iteration: 142296
loss: 0.9738151431083679,grad_norm: 0.9999991139297414, iteration: 142297
loss: 0.9753745794296265,grad_norm: 0.9999990034656878, iteration: 142298
loss: 1.0604945421218872,grad_norm: 0.9999992826675871, iteration: 142299
loss: 1.0077941417694092,grad_norm: 0.9999991396966402, iteration: 142300
loss: 0.9974313378334045,grad_norm: 0.9999991011619642, iteration: 142301
loss: 1.0271247625350952,grad_norm: 0.999999185211896, iteration: 142302
loss: 1.0035170316696167,grad_norm: 0.9999991049314562, iteration: 142303
loss: 0.9931378960609436,grad_norm: 0.999999130677045, iteration: 142304
loss: 1.0554472208023071,grad_norm: 0.9999997275753716, iteration: 142305
loss: 0.9902059435844421,grad_norm: 0.8794955959386815, iteration: 142306
loss: 0.9846907258033752,grad_norm: 0.980022156238033, iteration: 142307
loss: 1.0472962856292725,grad_norm: 0.9999990321872119, iteration: 142308
loss: 0.9942801594734192,grad_norm: 0.9999992726200418, iteration: 142309
loss: 0.9918018579483032,grad_norm: 0.9999991355405835, iteration: 142310
loss: 1.0368518829345703,grad_norm: 0.9999991942061771, iteration: 142311
loss: 1.0262998342514038,grad_norm: 0.9999993962775376, iteration: 142312
loss: 0.9948734641075134,grad_norm: 0.9999990079468664, iteration: 142313
loss: 1.0070977210998535,grad_norm: 0.9235236315617986, iteration: 142314
loss: 1.0177687406539917,grad_norm: 0.999998928446143, iteration: 142315
loss: 0.9970763921737671,grad_norm: 0.9999991253900395, iteration: 142316
loss: 1.0323060750961304,grad_norm: 0.9999993361232122, iteration: 142317
loss: 1.064951777458191,grad_norm: 0.9999997923437335, iteration: 142318
loss: 1.0233834981918335,grad_norm: 0.9999997944590404, iteration: 142319
loss: 1.0043435096740723,grad_norm: 0.9999991193849178, iteration: 142320
loss: 0.9807025790214539,grad_norm: 0.9999989230615163, iteration: 142321
loss: 1.0033142566680908,grad_norm: 0.9999998476990837, iteration: 142322
loss: 0.959671676158905,grad_norm: 0.9999990427627599, iteration: 142323
loss: 1.012452244758606,grad_norm: 0.99999910009264, iteration: 142324
loss: 1.0006365776062012,grad_norm: 0.9999993369286447, iteration: 142325
loss: 1.0037239789962769,grad_norm: 0.9999994102834913, iteration: 142326
loss: 1.0381988286972046,grad_norm: 0.9999991940987548, iteration: 142327
loss: 1.0067169666290283,grad_norm: 0.9795538975452815, iteration: 142328
loss: 0.9806292057037354,grad_norm: 0.9999992301699424, iteration: 142329
loss: 1.000578761100769,grad_norm: 0.9999990696826222, iteration: 142330
loss: 0.9832412004470825,grad_norm: 0.9999991491118966, iteration: 142331
loss: 0.9837096333503723,grad_norm: 0.8772293620759414, iteration: 142332
loss: 0.9694178104400635,grad_norm: 0.8898684777517419, iteration: 142333
loss: 0.9805809855461121,grad_norm: 0.9999990689538125, iteration: 142334
loss: 0.9814700484275818,grad_norm: 0.9999991867377869, iteration: 142335
loss: 0.9974859952926636,grad_norm: 0.802011317195572, iteration: 142336
loss: 1.008223533630371,grad_norm: 0.9999989817003087, iteration: 142337
loss: 0.9653792977333069,grad_norm: 0.9885070663388804, iteration: 142338
loss: 0.986752986907959,grad_norm: 0.933272699157833, iteration: 142339
loss: 0.9842469096183777,grad_norm: 0.9999993016343031, iteration: 142340
loss: 0.9933825135231018,grad_norm: 0.9999990100509077, iteration: 142341
loss: 0.9700459241867065,grad_norm: 0.9675125080359448, iteration: 142342
loss: 1.0187207460403442,grad_norm: 0.9999991206990688, iteration: 142343
loss: 0.9697772264480591,grad_norm: 0.9603540476971766, iteration: 142344
loss: 1.003854751586914,grad_norm: 0.9929647502679865, iteration: 142345
loss: 0.9818792343139648,grad_norm: 0.9999990628529284, iteration: 142346
loss: 1.0075178146362305,grad_norm: 0.9999987860818418, iteration: 142347
loss: 1.004269003868103,grad_norm: 0.9999991457882335, iteration: 142348
loss: 0.9872351884841919,grad_norm: 0.9999990529266481, iteration: 142349
loss: 0.992142915725708,grad_norm: 0.9730774579383997, iteration: 142350
loss: 1.0071946382522583,grad_norm: 0.8793992736299661, iteration: 142351
loss: 1.0137001276016235,grad_norm: 0.952407206728196, iteration: 142352
loss: 1.006499171257019,grad_norm: 0.9999992494682132, iteration: 142353
loss: 1.0088574886322021,grad_norm: 0.8473727051231983, iteration: 142354
loss: 0.9817798137664795,grad_norm: 0.9145471553095761, iteration: 142355
loss: 1.000599980354309,grad_norm: 0.9999993195226771, iteration: 142356
loss: 0.9914698600769043,grad_norm: 0.9999991151986514, iteration: 142357
loss: 0.9762647747993469,grad_norm: 0.9999990941799624, iteration: 142358
loss: 0.9635530114173889,grad_norm: 0.9999991428600786, iteration: 142359
loss: 1.0164090394973755,grad_norm: 0.914573294469813, iteration: 142360
loss: 0.9944027066230774,grad_norm: 0.9303969150336536, iteration: 142361
loss: 0.9758152365684509,grad_norm: 0.9745643759365905, iteration: 142362
loss: 1.0034626722335815,grad_norm: 0.9999991625060847, iteration: 142363
loss: 1.0127109289169312,grad_norm: 0.9999993688553843, iteration: 142364
loss: 0.9821659922599792,grad_norm: 0.9999992000646215, iteration: 142365
loss: 1.0035592317581177,grad_norm: 0.999999098301288, iteration: 142366
loss: 1.0182808637619019,grad_norm: 0.9890577905477201, iteration: 142367
loss: 0.9695870280265808,grad_norm: 0.9999989899226296, iteration: 142368
loss: 1.017270803451538,grad_norm: 0.9999992915480105, iteration: 142369
loss: 0.9851722121238708,grad_norm: 0.8480960043260617, iteration: 142370
loss: 1.0054771900177002,grad_norm: 0.9999991557561685, iteration: 142371
loss: 1.0026320219039917,grad_norm: 0.9144043826693681, iteration: 142372
loss: 0.9687836170196533,grad_norm: 0.9665763868496563, iteration: 142373
loss: 0.9582405686378479,grad_norm: 0.9807965546974745, iteration: 142374
loss: 1.0541577339172363,grad_norm: 0.9979257119622709, iteration: 142375
loss: 1.0343167781829834,grad_norm: 0.9999990763424306, iteration: 142376
loss: 0.9909859299659729,grad_norm: 0.9999990794726278, iteration: 142377
loss: 0.9861236214637756,grad_norm: 0.9999992468421384, iteration: 142378
loss: 1.0121510028839111,grad_norm: 0.9999990672121156, iteration: 142379
loss: 1.0460453033447266,grad_norm: 0.9999993494617462, iteration: 142380
loss: 0.9860407710075378,grad_norm: 0.9426128511740485, iteration: 142381
loss: 1.0207823514938354,grad_norm: 0.9999992702596415, iteration: 142382
loss: 1.009467363357544,grad_norm: 0.9161795497343973, iteration: 142383
loss: 1.0027357339859009,grad_norm: 0.9999990597721639, iteration: 142384
loss: 1.0019357204437256,grad_norm: 0.9999997712170176, iteration: 142385
loss: 1.0323998928070068,grad_norm: 0.97525526499336, iteration: 142386
loss: 0.9657500386238098,grad_norm: 0.8815730347596602, iteration: 142387
loss: 0.9790975451469421,grad_norm: 0.9999990449617785, iteration: 142388
loss: 0.9776069521903992,grad_norm: 0.9999991968604399, iteration: 142389
loss: 1.0025081634521484,grad_norm: 0.8951433552371434, iteration: 142390
loss: 0.9848217964172363,grad_norm: 0.9909675932182549, iteration: 142391
loss: 0.9521118402481079,grad_norm: 0.9999991645988624, iteration: 142392
loss: 0.9980778694152832,grad_norm: 0.9999990504574815, iteration: 142393
loss: 1.0037455558776855,grad_norm: 0.9424776127827981, iteration: 142394
loss: 1.0009363889694214,grad_norm: 0.9353292888678627, iteration: 142395
loss: 0.9919026494026184,grad_norm: 0.9688887484003907, iteration: 142396
loss: 1.001722812652588,grad_norm: 0.9999990933409217, iteration: 142397
loss: 1.0137630701065063,grad_norm: 0.999999083731086, iteration: 142398
loss: 1.0352511405944824,grad_norm: 0.9999994608382906, iteration: 142399
loss: 0.9917840957641602,grad_norm: 0.9999991121551143, iteration: 142400
loss: 0.9816052913665771,grad_norm: 0.9687225618187895, iteration: 142401
loss: 0.949946403503418,grad_norm: 0.8516411504572355, iteration: 142402
loss: 1.032059669494629,grad_norm: 0.9999990171142288, iteration: 142403
loss: 1.0003306865692139,grad_norm: 0.9999990988263511, iteration: 142404
loss: 0.9991390109062195,grad_norm: 0.9999992181348589, iteration: 142405
loss: 0.9907186627388,grad_norm: 0.9999991746413047, iteration: 142406
loss: 0.9857639670372009,grad_norm: 0.9999996172119303, iteration: 142407
loss: 0.9924685955047607,grad_norm: 0.9999990832753203, iteration: 142408
loss: 1.0455437898635864,grad_norm: 0.9990578201860703, iteration: 142409
loss: 1.0288305282592773,grad_norm: 0.999999101185664, iteration: 142410
loss: 0.9741235971450806,grad_norm: 0.8296994197031583, iteration: 142411
loss: 1.037986159324646,grad_norm: 0.9999992356572283, iteration: 142412
loss: 0.9745186567306519,grad_norm: 0.8594377319230956, iteration: 142413
loss: 0.9968034625053406,grad_norm: 0.999060029798654, iteration: 142414
loss: 0.9870087504386902,grad_norm: 0.9999991339007444, iteration: 142415
loss: 1.0280394554138184,grad_norm: 0.9999993957804066, iteration: 142416
loss: 1.0346753597259521,grad_norm: 0.9332543651493722, iteration: 142417
loss: 0.9950941205024719,grad_norm: 0.8610570954553278, iteration: 142418
loss: 1.0073096752166748,grad_norm: 0.9999989622161949, iteration: 142419
loss: 0.9926859736442566,grad_norm: 0.9999992042067728, iteration: 142420
loss: 1.0136302709579468,grad_norm: 0.9996381116188937, iteration: 142421
loss: 1.025112271308899,grad_norm: 0.9481253743977238, iteration: 142422
loss: 0.953561544418335,grad_norm: 0.9999992790626484, iteration: 142423
loss: 0.9643998146057129,grad_norm: 0.9743905368079968, iteration: 142424
loss: 1.0250622034072876,grad_norm: 0.9999994631042045, iteration: 142425
loss: 1.0051608085632324,grad_norm: 0.9999991282031534, iteration: 142426
loss: 0.9913396239280701,grad_norm: 0.9999989924666265, iteration: 142427
loss: 1.0260058641433716,grad_norm: 0.947234386495088, iteration: 142428
loss: 0.9997963905334473,grad_norm: 0.9419645324339778, iteration: 142429
loss: 0.9829989075660706,grad_norm: 0.8724681998045486, iteration: 142430
loss: 0.9862930774688721,grad_norm: 0.9999990756182843, iteration: 142431
loss: 0.9993717074394226,grad_norm: 0.9674818193098087, iteration: 142432
loss: 0.9766557216644287,grad_norm: 0.9999990587043465, iteration: 142433
loss: 0.9932607412338257,grad_norm: 0.9789601764701591, iteration: 142434
loss: 1.0892916917800903,grad_norm: 0.9999992274667516, iteration: 142435
loss: 0.9783485531806946,grad_norm: 0.8378825140825602, iteration: 142436
loss: 0.9696251749992371,grad_norm: 0.9999991268694878, iteration: 142437
loss: 0.96832674741745,grad_norm: 0.9999990945295105, iteration: 142438
loss: 1.0288821458816528,grad_norm: 0.9999990123719213, iteration: 142439
loss: 0.9835452437400818,grad_norm: 0.9999990717508965, iteration: 142440
loss: 0.9965739250183105,grad_norm: 0.9999990385977475, iteration: 142441
loss: 1.0354328155517578,grad_norm: 0.9999992493754425, iteration: 142442
loss: 0.9719988703727722,grad_norm: 0.8502384876837757, iteration: 142443
loss: 1.026566982269287,grad_norm: 0.7816264362101004, iteration: 142444
loss: 0.9766390919685364,grad_norm: 0.8364784622058604, iteration: 142445
loss: 1.002318263053894,grad_norm: 0.999999030391888, iteration: 142446
loss: 0.9947317838668823,grad_norm: 0.9999991552204965, iteration: 142447
loss: 1.024490237236023,grad_norm: 0.9999990990152657, iteration: 142448
loss: 1.0213860273361206,grad_norm: 0.9999992758484476, iteration: 142449
loss: 1.0252125263214111,grad_norm: 0.9999993595819208, iteration: 142450
loss: 1.005988359451294,grad_norm: 0.9845621041880901, iteration: 142451
loss: 1.0021919012069702,grad_norm: 0.999999111658472, iteration: 142452
loss: 0.9908096194267273,grad_norm: 0.9999999232163737, iteration: 142453
loss: 0.9997501373291016,grad_norm: 0.9999991661129815, iteration: 142454
loss: 0.9979124665260315,grad_norm: 0.9999990029884733, iteration: 142455
loss: 0.9670860767364502,grad_norm: 0.9999993440156106, iteration: 142456
loss: 1.0195436477661133,grad_norm: 0.9999991036732461, iteration: 142457
loss: 0.9984074234962463,grad_norm: 0.9705460375691646, iteration: 142458
loss: 1.0040698051452637,grad_norm: 0.9394769786025545, iteration: 142459
loss: 0.9621389508247375,grad_norm: 0.9999992469968167, iteration: 142460
loss: 1.0087127685546875,grad_norm: 0.9180351746913076, iteration: 142461
loss: 1.0053454637527466,grad_norm: 0.9999992681849776, iteration: 142462
loss: 1.0280729532241821,grad_norm: 0.9999993439584942, iteration: 142463
loss: 0.980618417263031,grad_norm: 0.9999994885701292, iteration: 142464
loss: 1.041744589805603,grad_norm: 0.9999990791604626, iteration: 142465
loss: 0.9703367352485657,grad_norm: 0.8817808980332094, iteration: 142466
loss: 0.9597035646438599,grad_norm: 0.9934988800817458, iteration: 142467
loss: 0.9800277948379517,grad_norm: 0.9999990987268527, iteration: 142468
loss: 1.0035679340362549,grad_norm: 0.9475107246327225, iteration: 142469
loss: 1.0188539028167725,grad_norm: 0.9999992718171683, iteration: 142470
loss: 1.0113385915756226,grad_norm: 0.9999991211728776, iteration: 142471
loss: 0.9659854173660278,grad_norm: 0.9999990975694626, iteration: 142472
loss: 0.9843597412109375,grad_norm: 0.9999991674905335, iteration: 142473
loss: 0.995480477809906,grad_norm: 0.8180889133907607, iteration: 142474
loss: 1.0104840993881226,grad_norm: 0.9999990370848807, iteration: 142475
loss: 0.9897111058235168,grad_norm: 0.9999990366948395, iteration: 142476
loss: 1.0033164024353027,grad_norm: 0.963240189070109, iteration: 142477
loss: 1.0535293817520142,grad_norm: 0.999999010459203, iteration: 142478
loss: 0.9929248690605164,grad_norm: 0.9999989469828129, iteration: 142479
loss: 1.0004655122756958,grad_norm: 0.7611716169443297, iteration: 142480
loss: 0.9791017770767212,grad_norm: 0.9790192015098746, iteration: 142481
loss: 1.0393701791763306,grad_norm: 0.9999991382306447, iteration: 142482
loss: 0.9794806838035583,grad_norm: 0.9999990963611985, iteration: 142483
loss: 0.998272716999054,grad_norm: 0.9300007695005887, iteration: 142484
loss: 1.026954174041748,grad_norm: 0.9999990652569478, iteration: 142485
loss: 0.9891316294670105,grad_norm: 0.9593478918435884, iteration: 142486
loss: 0.9971051812171936,grad_norm: 0.9999990869798137, iteration: 142487
loss: 0.9829680919647217,grad_norm: 0.936579045811068, iteration: 142488
loss: 0.992153525352478,grad_norm: 0.9999990770386762, iteration: 142489
loss: 0.9804332256317139,grad_norm: 0.979932363204762, iteration: 142490
loss: 1.023859977722168,grad_norm: 0.9999990819832111, iteration: 142491
loss: 1.0130887031555176,grad_norm: 0.9999990815851609, iteration: 142492
loss: 1.0107682943344116,grad_norm: 0.8269114725885077, iteration: 142493
loss: 0.9762945175170898,grad_norm: 0.9208675346990177, iteration: 142494
loss: 0.9923911094665527,grad_norm: 0.9999991343261949, iteration: 142495
loss: 1.003811001777649,grad_norm: 0.9221815927299362, iteration: 142496
loss: 0.9576418399810791,grad_norm: 0.9999990882622983, iteration: 142497
loss: 1.0143541097640991,grad_norm: 0.9000817658865753, iteration: 142498
loss: 0.9949140548706055,grad_norm: 0.9999990526500694, iteration: 142499
loss: 0.9728775024414062,grad_norm: 0.9776024890501157, iteration: 142500
loss: 0.9796226620674133,grad_norm: 0.8833260359962234, iteration: 142501
loss: 0.9856431484222412,grad_norm: 0.9999991622970359, iteration: 142502
loss: 1.047102451324463,grad_norm: 0.9999991286536287, iteration: 142503
loss: 1.0071437358856201,grad_norm: 0.9521267767212884, iteration: 142504
loss: 1.0246824026107788,grad_norm: 0.9235222865760983, iteration: 142505
loss: 0.9657748937606812,grad_norm: 0.9999991147763007, iteration: 142506
loss: 0.983552873134613,grad_norm: 0.9999991044389624, iteration: 142507
loss: 0.9912973642349243,grad_norm: 0.9231170543185117, iteration: 142508
loss: 1.0103321075439453,grad_norm: 0.999999110704068, iteration: 142509
loss: 1.0236839056015015,grad_norm: 0.9999990692445448, iteration: 142510
loss: 1.0402157306671143,grad_norm: 0.9999994218790117, iteration: 142511
loss: 0.9978565573692322,grad_norm: 0.8894102928290423, iteration: 142512
loss: 0.9878923296928406,grad_norm: 0.9041152583458713, iteration: 142513
loss: 1.0156968832015991,grad_norm: 0.9626055481551183, iteration: 142514
loss: 0.9745060205459595,grad_norm: 0.9805074340176163, iteration: 142515
loss: 0.9783318638801575,grad_norm: 0.9999994336241347, iteration: 142516
loss: 0.9959227442741394,grad_norm: 0.9931353473997044, iteration: 142517
loss: 0.9596408605575562,grad_norm: 0.9999991741945288, iteration: 142518
loss: 0.9886491298675537,grad_norm: 0.9190884699363363, iteration: 142519
loss: 0.9873882532119751,grad_norm: 0.999999294632661, iteration: 142520
loss: 0.9796621203422546,grad_norm: 0.9999990650108611, iteration: 142521
loss: 0.9971201419830322,grad_norm: 0.8082930471733312, iteration: 142522
loss: 0.9891771674156189,grad_norm: 0.948957209757884, iteration: 142523
loss: 1.024025797843933,grad_norm: 0.8877814677693779, iteration: 142524
loss: 1.011897325515747,grad_norm: 0.9999991016118144, iteration: 142525
loss: 1.0099538564682007,grad_norm: 0.9999991163952999, iteration: 142526
loss: 0.9996123313903809,grad_norm: 0.9999990198040012, iteration: 142527
loss: 0.9767724275588989,grad_norm: 0.9606892880198772, iteration: 142528
loss: 1.0603904724121094,grad_norm: 0.8855926608496492, iteration: 142529
loss: 0.9974809885025024,grad_norm: 0.9359352138998425, iteration: 142530
loss: 0.9967729449272156,grad_norm: 0.933306287726766, iteration: 142531
loss: 0.9876799583435059,grad_norm: 0.9568102831559193, iteration: 142532
loss: 0.9934874176979065,grad_norm: 0.9324418687510472, iteration: 142533
loss: 1.0177596807479858,grad_norm: 0.9999991725767564, iteration: 142534
loss: 0.9752426147460938,grad_norm: 0.9999991112471942, iteration: 142535
loss: 1.000567078590393,grad_norm: 0.8583947756658331, iteration: 142536
loss: 1.0275286436080933,grad_norm: 0.9999991796847201, iteration: 142537
loss: 1.0418810844421387,grad_norm: 0.9999990758429557, iteration: 142538
loss: 0.9948456287384033,grad_norm: 0.9999992453485951, iteration: 142539
loss: 1.0243779420852661,grad_norm: 0.8518690870217694, iteration: 142540
loss: 1.0352500677108765,grad_norm: 0.9999992367454732, iteration: 142541
loss: 1.0166373252868652,grad_norm: 0.9999991603363699, iteration: 142542
loss: 0.9905619621276855,grad_norm: 0.8974546633217758, iteration: 142543
loss: 1.0203983783721924,grad_norm: 0.999999040266237, iteration: 142544
loss: 1.0069934129714966,grad_norm: 0.9286706804038766, iteration: 142545
loss: 1.0000066757202148,grad_norm: 0.9999990604430689, iteration: 142546
loss: 1.0012662410736084,grad_norm: 0.9944819332995664, iteration: 142547
loss: 0.9682031869888306,grad_norm: 0.9999991468946573, iteration: 142548
loss: 0.9979707598686218,grad_norm: 0.9392708273661818, iteration: 142549
loss: 1.0043865442276,grad_norm: 0.9999991698221352, iteration: 142550
loss: 1.0110491514205933,grad_norm: 0.9999989557568361, iteration: 142551
loss: 1.0223537683486938,grad_norm: 0.9486119638993609, iteration: 142552
loss: 0.9797664284706116,grad_norm: 0.9559205371545282, iteration: 142553
loss: 1.0157493352890015,grad_norm: 0.965651082166363, iteration: 142554
loss: 1.0050150156021118,grad_norm: 0.999999196958855, iteration: 142555
loss: 0.9786076545715332,grad_norm: 0.9999992052308689, iteration: 142556
loss: 1.0348124504089355,grad_norm: 0.9999992378783088, iteration: 142557
loss: 0.9970924854278564,grad_norm: 0.999999179041799, iteration: 142558
loss: 1.0127190351486206,grad_norm: 0.9999993243985469, iteration: 142559
loss: 1.00095796585083,grad_norm: 0.9999991014108585, iteration: 142560
loss: 0.9982150793075562,grad_norm: 0.9999991552498898, iteration: 142561
loss: 1.0478957891464233,grad_norm: 0.9999992856804988, iteration: 142562
loss: 1.015040636062622,grad_norm: 0.9387189530353194, iteration: 142563
loss: 0.9946684837341309,grad_norm: 0.999999158144768, iteration: 142564
loss: 0.9689854383468628,grad_norm: 0.7877741251418219, iteration: 142565
loss: 1.0134543180465698,grad_norm: 0.9999991830130225, iteration: 142566
loss: 1.047184705734253,grad_norm: 0.9397384695994958, iteration: 142567
loss: 1.0297900438308716,grad_norm: 0.9999990681945548, iteration: 142568
loss: 0.999887228012085,grad_norm: 0.9999990799840631, iteration: 142569
loss: 0.985443115234375,grad_norm: 0.9135748912723586, iteration: 142570
loss: 0.9601442813873291,grad_norm: 0.9999991608442709, iteration: 142571
loss: 0.9998961687088013,grad_norm: 0.7819971449573192, iteration: 142572
loss: 1.032668948173523,grad_norm: 0.9573057675707641, iteration: 142573
loss: 0.9690855145454407,grad_norm: 0.9999991797538177, iteration: 142574
loss: 0.9475519061088562,grad_norm: 0.9999992397655707, iteration: 142575
loss: 1.002405047416687,grad_norm: 0.8405449750367815, iteration: 142576
loss: 0.9901881814002991,grad_norm: 0.9032651570042731, iteration: 142577
loss: 0.9927154183387756,grad_norm: 0.999999051203572, iteration: 142578
loss: 0.9784212708473206,grad_norm: 0.8502534530774737, iteration: 142579
loss: 0.9562264084815979,grad_norm: 0.999998979856872, iteration: 142580
loss: 0.995324969291687,grad_norm: 0.99999911600712, iteration: 142581
loss: 0.9863817095756531,grad_norm: 0.9999992201866469, iteration: 142582
loss: 1.0072576999664307,grad_norm: 0.9774282844302873, iteration: 142583
loss: 1.005544662475586,grad_norm: 0.9999992035139594, iteration: 142584
loss: 0.9929172992706299,grad_norm: 0.9999990677024987, iteration: 142585
loss: 1.024178385734558,grad_norm: 0.867132513963893, iteration: 142586
loss: 1.0147863626480103,grad_norm: 0.9981527293499819, iteration: 142587
loss: 1.0046309232711792,grad_norm: 0.7797613537938862, iteration: 142588
loss: 1.000967025756836,grad_norm: 0.9999988955906421, iteration: 142589
loss: 1.0545040369033813,grad_norm: 0.9999990535846072, iteration: 142590
loss: 1.0375845432281494,grad_norm: 0.9999991615441937, iteration: 142591
loss: 0.9957143664360046,grad_norm: 0.9999992013458479, iteration: 142592
loss: 1.033600926399231,grad_norm: 0.9999991889717984, iteration: 142593
loss: 0.9708744883537292,grad_norm: 0.9999990590420469, iteration: 142594
loss: 0.97864830493927,grad_norm: 0.9151448743902127, iteration: 142595
loss: 0.996746838092804,grad_norm: 0.911348784166552, iteration: 142596
loss: 1.0525121688842773,grad_norm: 0.9428596843316431, iteration: 142597
loss: 0.9748478531837463,grad_norm: 0.9307449205014268, iteration: 142598
loss: 0.9942547082901001,grad_norm: 0.9999991038998598, iteration: 142599
loss: 0.9635961055755615,grad_norm: 0.9308393821870655, iteration: 142600
loss: 0.9870562553405762,grad_norm: 0.9999992784607727, iteration: 142601
loss: 0.9988788366317749,grad_norm: 0.9875097936930716, iteration: 142602
loss: 1.01329505443573,grad_norm: 0.96979782731998, iteration: 142603
loss: 0.9784325361251831,grad_norm: 0.9000800931242535, iteration: 142604
loss: 1.0129907131195068,grad_norm: 0.9999991850332974, iteration: 142605
loss: 1.0156769752502441,grad_norm: 0.9999990777679753, iteration: 142606
loss: 0.9742488265037537,grad_norm: 0.9792437686756222, iteration: 142607
loss: 1.0288809537887573,grad_norm: 0.9999991901088293, iteration: 142608
loss: 1.024520754814148,grad_norm: 0.8915111851760169, iteration: 142609
loss: 0.989506721496582,grad_norm: 0.9999990580003184, iteration: 142610
loss: 0.9865056276321411,grad_norm: 0.9999990796542767, iteration: 142611
loss: 1.0026696920394897,grad_norm: 0.999999174726722, iteration: 142612
loss: 1.0157722234725952,grad_norm: 0.9999992214269793, iteration: 142613
loss: 0.993355393409729,grad_norm: 0.9999989748932823, iteration: 142614
loss: 0.9964459538459778,grad_norm: 0.9240437708234175, iteration: 142615
loss: 0.986867368221283,grad_norm: 0.9154456555334624, iteration: 142616
loss: 1.0010714530944824,grad_norm: 0.8813922732176096, iteration: 142617
loss: 1.008928894996643,grad_norm: 0.9517794609306837, iteration: 142618
loss: 0.9992323517799377,grad_norm: 0.8794267033053591, iteration: 142619
loss: 0.9915687441825867,grad_norm: 0.9999990774442802, iteration: 142620
loss: 0.9976423382759094,grad_norm: 0.9999991383507627, iteration: 142621
loss: 1.0360747575759888,grad_norm: 0.9999991034245768, iteration: 142622
loss: 0.9779155254364014,grad_norm: 0.9275010755342615, iteration: 142623
loss: 0.9773280620574951,grad_norm: 0.8367675974892712, iteration: 142624
loss: 0.969391405582428,grad_norm: 0.9999990946562605, iteration: 142625
loss: 1.002243161201477,grad_norm: 0.9999990909180201, iteration: 142626
loss: 1.0409382581710815,grad_norm: 0.9999995195738289, iteration: 142627
loss: 1.0052776336669922,grad_norm: 0.8548260210742863, iteration: 142628
loss: 1.0256190299987793,grad_norm: 0.917119496931086, iteration: 142629
loss: 1.0111457109451294,grad_norm: 0.9999991579848695, iteration: 142630
loss: 0.9737122654914856,grad_norm: 0.9368344603408878, iteration: 142631
loss: 1.0059728622436523,grad_norm: 0.9932912856225242, iteration: 142632
loss: 1.0376797914505005,grad_norm: 0.999999592879509, iteration: 142633
loss: 1.0108387470245361,grad_norm: 0.9999990695322845, iteration: 142634
loss: 1.001481533050537,grad_norm: 0.999999081490479, iteration: 142635
loss: 0.9962047934532166,grad_norm: 0.9999990320491607, iteration: 142636
loss: 1.0232032537460327,grad_norm: 0.9999990524482723, iteration: 142637
loss: 1.0283416509628296,grad_norm: 0.9999991944166967, iteration: 142638
loss: 0.957103431224823,grad_norm: 0.9677269714730831, iteration: 142639
loss: 0.9536858201026917,grad_norm: 0.9999990594994137, iteration: 142640
loss: 1.0231328010559082,grad_norm: 0.9086503506646093, iteration: 142641
loss: 1.043421745300293,grad_norm: 0.8765342191268117, iteration: 142642
loss: 1.0148051977157593,grad_norm: 0.9999991142897253, iteration: 142643
loss: 0.994850754737854,grad_norm: 0.8997659336363105, iteration: 142644
loss: 0.9848231673240662,grad_norm: 0.9561564336315704, iteration: 142645
loss: 1.0018954277038574,grad_norm: 0.9999993827788107, iteration: 142646
loss: 1.0003212690353394,grad_norm: 0.9999997340468746, iteration: 142647
loss: 1.0006422996520996,grad_norm: 0.9999989715767328, iteration: 142648
loss: 1.0091829299926758,grad_norm: 0.9737733138480197, iteration: 142649
loss: 1.0047920942306519,grad_norm: 0.9999990492546027, iteration: 142650
loss: 1.0039418935775757,grad_norm: 0.9999999112155346, iteration: 142651
loss: 1.028705358505249,grad_norm: 0.9999990961090471, iteration: 142652
loss: 0.9989592432975769,grad_norm: 0.9044681785537043, iteration: 142653
loss: 0.9883753657341003,grad_norm: 0.9999992335932728, iteration: 142654
loss: 1.0012593269348145,grad_norm: 0.9999990375625647, iteration: 142655
loss: 0.9742700457572937,grad_norm: 0.8797888467919637, iteration: 142656
loss: 0.9932418465614319,grad_norm: 0.8896664141008118, iteration: 142657
loss: 0.9734835624694824,grad_norm: 0.9729603619350577, iteration: 142658
loss: 0.9670249223709106,grad_norm: 0.9999991888037402, iteration: 142659
loss: 0.9887900948524475,grad_norm: 0.9999990760097087, iteration: 142660
loss: 0.9882405400276184,grad_norm: 0.9999991570263325, iteration: 142661
loss: 0.9716489911079407,grad_norm: 0.9112114888697536, iteration: 142662
loss: 0.9990196824073792,grad_norm: 0.9999992473453138, iteration: 142663
loss: 1.0205879211425781,grad_norm: 0.8961483567681985, iteration: 142664
loss: 1.030798316001892,grad_norm: 0.8841111822641023, iteration: 142665
loss: 1.0017677545547485,grad_norm: 0.9999991549739599, iteration: 142666
loss: 0.9746075868606567,grad_norm: 0.9999991208753697, iteration: 142667
loss: 0.9927912354469299,grad_norm: 0.9999991531735867, iteration: 142668
loss: 0.9944022297859192,grad_norm: 0.9999991102512319, iteration: 142669
loss: 0.9610921740531921,grad_norm: 0.999999183156781, iteration: 142670
loss: 0.9773657917976379,grad_norm: 0.9499755297511041, iteration: 142671
loss: 1.0090293884277344,grad_norm: 0.8798166742442869, iteration: 142672
loss: 0.9972208738327026,grad_norm: 0.9999996038338681, iteration: 142673
loss: 1.0085456371307373,grad_norm: 0.9999989591060983, iteration: 142674
loss: 1.0089974403381348,grad_norm: 0.9773811425469517, iteration: 142675
loss: 1.0279052257537842,grad_norm: 0.9999990959330223, iteration: 142676
loss: 1.0215425491333008,grad_norm: 0.9999990468912912, iteration: 142677
loss: 0.952289342880249,grad_norm: 0.9704038886055264, iteration: 142678
loss: 0.9872353672981262,grad_norm: 0.9999990344136591, iteration: 142679
loss: 1.0260484218597412,grad_norm: 0.8968629413080162, iteration: 142680
loss: 0.995240330696106,grad_norm: 0.9999991027658891, iteration: 142681
loss: 0.9836342334747314,grad_norm: 0.9999992617943766, iteration: 142682
loss: 1.046638011932373,grad_norm: 0.9441111432907418, iteration: 142683
loss: 1.0194783210754395,grad_norm: 0.9668090291389331, iteration: 142684
loss: 0.9958914518356323,grad_norm: 0.9999989643725921, iteration: 142685
loss: 0.9853149056434631,grad_norm: 0.9136812140348618, iteration: 142686
loss: 0.9893710017204285,grad_norm: 0.8739918096896984, iteration: 142687
loss: 0.9921447038650513,grad_norm: 0.8815118604270376, iteration: 142688
loss: 0.9928559064865112,grad_norm: 0.9999991657339583, iteration: 142689
loss: 0.9762745499610901,grad_norm: 0.8843863982075945, iteration: 142690
loss: 0.9952492117881775,grad_norm: 0.9999992180117101, iteration: 142691
loss: 1.0458073616027832,grad_norm: 0.9999990003189348, iteration: 142692
loss: 1.0170047283172607,grad_norm: 0.9685578703813243, iteration: 142693
loss: 0.9559416770935059,grad_norm: 0.9884849747286221, iteration: 142694
loss: 0.9988338947296143,grad_norm: 0.9999989402335907, iteration: 142695
loss: 0.9993028044700623,grad_norm: 0.8716252865776903, iteration: 142696
loss: 0.9923229813575745,grad_norm: 0.9999992343721622, iteration: 142697
loss: 0.9702219367027283,grad_norm: 0.9999992190364665, iteration: 142698
loss: 0.9602445363998413,grad_norm: 0.9139970293950551, iteration: 142699
loss: 1.0209170579910278,grad_norm: 0.9999992210798718, iteration: 142700
loss: 1.0126591920852661,grad_norm: 0.884187846166679, iteration: 142701
loss: 1.0162708759307861,grad_norm: 0.9997566774071432, iteration: 142702
loss: 0.9906814098358154,grad_norm: 0.9367283696102373, iteration: 142703
loss: 0.9864632487297058,grad_norm: 0.9202891634276519, iteration: 142704
loss: 0.9675432443618774,grad_norm: 0.9431474345228799, iteration: 142705
loss: 0.9907427430152893,grad_norm: 0.9999989807983187, iteration: 142706
loss: 1.021858811378479,grad_norm: 0.9423192458807667, iteration: 142707
loss: 1.0470130443572998,grad_norm: 0.9318802586578478, iteration: 142708
loss: 1.0138089656829834,grad_norm: 0.9999991434343714, iteration: 142709
loss: 0.9927672147750854,grad_norm: 0.9999991738486405, iteration: 142710
loss: 1.0260744094848633,grad_norm: 0.9185643136636426, iteration: 142711
loss: 1.0202502012252808,grad_norm: 0.999999071900814, iteration: 142712
loss: 1.0313671827316284,grad_norm: 0.9999989385242005, iteration: 142713
loss: 0.9794970750808716,grad_norm: 0.999999172778483, iteration: 142714
loss: 1.0021637678146362,grad_norm: 0.9999991315421105, iteration: 142715
loss: 1.0129673480987549,grad_norm: 0.9999990355157647, iteration: 142716
loss: 1.0291916131973267,grad_norm: 0.999999072180321, iteration: 142717
loss: 0.9910699129104614,grad_norm: 0.9999991573713489, iteration: 142718
loss: 1.0106043815612793,grad_norm: 0.999998888292697, iteration: 142719
loss: 0.9765878915786743,grad_norm: 0.9999991327973012, iteration: 142720
loss: 0.984741747379303,grad_norm: 0.9999992331912781, iteration: 142721
loss: 0.9755727052688599,grad_norm: 0.9999991529416449, iteration: 142722
loss: 1.0211765766143799,grad_norm: 0.9272460158453458, iteration: 142723
loss: 1.0005651712417603,grad_norm: 0.9999990579615241, iteration: 142724
loss: 1.0142409801483154,grad_norm: 0.8591281741853682, iteration: 142725
loss: 0.9797968864440918,grad_norm: 0.857549985395661, iteration: 142726
loss: 1.0135055780410767,grad_norm: 0.9999992033812068, iteration: 142727
loss: 0.9697076082229614,grad_norm: 0.9999990258693486, iteration: 142728
loss: 1.010077953338623,grad_norm: 0.9999991601697764, iteration: 142729
loss: 0.9870021939277649,grad_norm: 0.9731133635914653, iteration: 142730
loss: 0.9900601506233215,grad_norm: 0.9999992257665155, iteration: 142731
loss: 1.0021657943725586,grad_norm: 0.9999992101279884, iteration: 142732
loss: 0.9622700214385986,grad_norm: 0.8182461640097238, iteration: 142733
loss: 1.0243995189666748,grad_norm: 0.9999989481259404, iteration: 142734
loss: 0.9901937246322632,grad_norm: 0.9135329853565963, iteration: 142735
loss: 0.9852744340896606,grad_norm: 0.8153358534862658, iteration: 142736
loss: 1.011336088180542,grad_norm: 0.9999994681427417, iteration: 142737
loss: 1.017501711845398,grad_norm: 0.9999991228020692, iteration: 142738
loss: 0.9965994358062744,grad_norm: 0.9999991830189794, iteration: 142739
loss: 0.9523204565048218,grad_norm: 0.8069545048827007, iteration: 142740
loss: 1.000205159187317,grad_norm: 0.9506959915561446, iteration: 142741
loss: 1.0084651708602905,grad_norm: 0.9999993586037796, iteration: 142742
loss: 1.0163805484771729,grad_norm: 0.9151419422272243, iteration: 142743
loss: 0.9666380882263184,grad_norm: 0.9561214953366958, iteration: 142744
loss: 0.9967758655548096,grad_norm: 0.9410435094043483, iteration: 142745
loss: 0.9778256416320801,grad_norm: 0.9999991484220527, iteration: 142746
loss: 0.993766725063324,grad_norm: 0.9999990071558749, iteration: 142747
loss: 0.9688876867294312,grad_norm: 0.9999993436429199, iteration: 142748
loss: 0.9690616130828857,grad_norm: 0.9416605021881506, iteration: 142749
loss: 1.0019190311431885,grad_norm: 0.9999991259758071, iteration: 142750
loss: 1.0130589008331299,grad_norm: 0.9715930512319103, iteration: 142751
loss: 0.9920307993888855,grad_norm: 0.9999990808070729, iteration: 142752
loss: 0.9733765125274658,grad_norm: 0.9999990861923918, iteration: 142753
loss: 1.007002830505371,grad_norm: 0.9539479909360568, iteration: 142754
loss: 0.9602022767066956,grad_norm: 0.9387684221234451, iteration: 142755
loss: 1.0212295055389404,grad_norm: 0.9999992767272186, iteration: 142756
loss: 1.0507804155349731,grad_norm: 0.9999991704417542, iteration: 142757
loss: 1.0376273393630981,grad_norm: 0.999999096058832, iteration: 142758
loss: 1.0014078617095947,grad_norm: 0.9999990705389575, iteration: 142759
loss: 0.9926268458366394,grad_norm: 0.9999989677760346, iteration: 142760
loss: 1.0024003982543945,grad_norm: 0.9999991612434013, iteration: 142761
loss: 1.0023915767669678,grad_norm: 0.9713419767852512, iteration: 142762
loss: 1.030685305595398,grad_norm: 0.9999992332159875, iteration: 142763
loss: 0.9678558707237244,grad_norm: 0.9999991048898039, iteration: 142764
loss: 1.0299447774887085,grad_norm: 0.9999990307844949, iteration: 142765
loss: 1.0542093515396118,grad_norm: 0.9999990743090463, iteration: 142766
loss: 0.9881618618965149,grad_norm: 0.9717301623691297, iteration: 142767
loss: 0.9897630214691162,grad_norm: 0.9999989857418315, iteration: 142768
loss: 0.995082437992096,grad_norm: 0.9999991386174835, iteration: 142769
loss: 0.9926703572273254,grad_norm: 0.9999992227363251, iteration: 142770
loss: 0.9787982702255249,grad_norm: 0.9999990028516086, iteration: 142771
loss: 0.997949481010437,grad_norm: 0.9915363329515292, iteration: 142772
loss: 1.0104014873504639,grad_norm: 0.9999992648651027, iteration: 142773
loss: 0.9814716577529907,grad_norm: 0.9705162937225029, iteration: 142774
loss: 0.9934096932411194,grad_norm: 0.9999989269944487, iteration: 142775
loss: 1.0081745386123657,grad_norm: 0.8960978104207096, iteration: 142776
loss: 1.025095820426941,grad_norm: 0.8344677152091022, iteration: 142777
loss: 1.0292234420776367,grad_norm: 0.9999992022257688, iteration: 142778
loss: 0.991226851940155,grad_norm: 0.9112711334521993, iteration: 142779
loss: 0.9994850754737854,grad_norm: 0.9592188907096814, iteration: 142780
loss: 0.9969764947891235,grad_norm: 0.9999991407901726, iteration: 142781
loss: 1.0120772123336792,grad_norm: 0.8506381749856365, iteration: 142782
loss: 1.020925521850586,grad_norm: 0.9999990804133616, iteration: 142783
loss: 1.0272514820098877,grad_norm: 0.9999992146712234, iteration: 142784
loss: 0.9927144646644592,grad_norm: 0.8736242926847405, iteration: 142785
loss: 0.9506402611732483,grad_norm: 0.9392353843432545, iteration: 142786
loss: 1.0011029243469238,grad_norm: 0.8912788791362157, iteration: 142787
loss: 0.9700965881347656,grad_norm: 0.9084397775475722, iteration: 142788
loss: 0.9460291266441345,grad_norm: 0.9999991455849742, iteration: 142789
loss: 1.0042816400527954,grad_norm: 0.9448184815807947, iteration: 142790
loss: 1.0023905038833618,grad_norm: 0.9999992115985119, iteration: 142791
loss: 0.9934582114219666,grad_norm: 0.9754178095274584, iteration: 142792
loss: 1.0190916061401367,grad_norm: 0.9999990096422064, iteration: 142793
loss: 1.0141159296035767,grad_norm: 0.910233116614349, iteration: 142794
loss: 1.0315182209014893,grad_norm: 0.9999990427407857, iteration: 142795
loss: 1.0254849195480347,grad_norm: 0.9767728865324775, iteration: 142796
loss: 0.9984692335128784,grad_norm: 0.9999990649549978, iteration: 142797
loss: 1.001200556755066,grad_norm: 0.9999990853769802, iteration: 142798
loss: 0.9772788882255554,grad_norm: 0.9999992438222035, iteration: 142799
loss: 1.0402271747589111,grad_norm: 0.9999991379285939, iteration: 142800
loss: 1.007500410079956,grad_norm: 0.9449227086792013, iteration: 142801
loss: 1.0311825275421143,grad_norm: 0.9999990193994482, iteration: 142802
loss: 0.9635491371154785,grad_norm: 0.9999990933418546, iteration: 142803
loss: 1.010725975036621,grad_norm: 0.9999991935343374, iteration: 142804
loss: 1.028878927230835,grad_norm: 0.9994460661257359, iteration: 142805
loss: 1.0062122344970703,grad_norm: 0.9578910543027092, iteration: 142806
loss: 0.9858266711235046,grad_norm: 0.999999062577332, iteration: 142807
loss: 0.9478113651275635,grad_norm: 0.9999992587571437, iteration: 142808
loss: 0.9296377897262573,grad_norm: 0.9954140329705768, iteration: 142809
loss: 1.0226167440414429,grad_norm: 0.9999993368680128, iteration: 142810
loss: 0.9508202075958252,grad_norm: 0.8664858398764917, iteration: 142811
loss: 1.0042884349822998,grad_norm: 0.9999992218913178, iteration: 142812
loss: 1.0084397792816162,grad_norm: 0.8751357839439976, iteration: 142813
loss: 1.045477032661438,grad_norm: 0.9814877071546027, iteration: 142814
loss: 1.0534554719924927,grad_norm: 0.9999993014917251, iteration: 142815
loss: 1.0221679210662842,grad_norm: 0.9886690804465509, iteration: 142816
loss: 0.983703076839447,grad_norm: 0.9390601759318288, iteration: 142817
loss: 0.9751609563827515,grad_norm: 0.9999989581046824, iteration: 142818
loss: 0.9761335849761963,grad_norm: 0.8578017471020165, iteration: 142819
loss: 0.9946321249008179,grad_norm: 0.9386375684160833, iteration: 142820
loss: 0.9997818470001221,grad_norm: 0.915272007603009, iteration: 142821
loss: 0.9688423275947571,grad_norm: 0.9331815086993571, iteration: 142822
loss: 0.9841633439064026,grad_norm: 0.9999993911726562, iteration: 142823
loss: 1.0036377906799316,grad_norm: 0.9999992254655794, iteration: 142824
loss: 1.000386118888855,grad_norm: 0.9838641294467599, iteration: 142825
loss: 1.0090949535369873,grad_norm: 0.8782112442498231, iteration: 142826
loss: 1.0046547651290894,grad_norm: 0.9431068133227015, iteration: 142827
loss: 1.0316882133483887,grad_norm: 0.9999991480510794, iteration: 142828
loss: 1.0056191682815552,grad_norm: 0.999999215670078, iteration: 142829
loss: 0.9653570055961609,grad_norm: 0.8714137322656598, iteration: 142830
loss: 1.0569988489151,grad_norm: 0.9999991981317389, iteration: 142831
loss: 1.0441452264785767,grad_norm: 0.7829785971235346, iteration: 142832
loss: 1.002756953239441,grad_norm: 0.9817546897581019, iteration: 142833
loss: 1.005749225616455,grad_norm: 0.9999991268437864, iteration: 142834
loss: 1.0253772735595703,grad_norm: 0.9952293536332855, iteration: 142835
loss: 1.0000951290130615,grad_norm: 0.999999202867188, iteration: 142836
loss: 1.0221490859985352,grad_norm: 0.9999991777429199, iteration: 142837
loss: 1.000088095664978,grad_norm: 0.9999990652137806, iteration: 142838
loss: 0.9898102283477783,grad_norm: 0.9501179919777113, iteration: 142839
loss: 1.035059928894043,grad_norm: 0.9999992212082462, iteration: 142840
loss: 0.9926373958587646,grad_norm: 0.9316161053965479, iteration: 142841
loss: 1.0128753185272217,grad_norm: 0.9999992138356896, iteration: 142842
loss: 1.044175624847412,grad_norm: 0.987143966668756, iteration: 142843
loss: 1.0295048952102661,grad_norm: 0.9999992266049627, iteration: 142844
loss: 0.9618217945098877,grad_norm: 0.9591876909569944, iteration: 142845
loss: 1.0160531997680664,grad_norm: 0.9999990539086699, iteration: 142846
loss: 0.9989787936210632,grad_norm: 0.9999991790581848, iteration: 142847
loss: 0.9893672466278076,grad_norm: 0.9999991339625771, iteration: 142848
loss: 1.0049976110458374,grad_norm: 0.9999991213187258, iteration: 142849
loss: 0.961968183517456,grad_norm: 0.9999991281817755, iteration: 142850
loss: 1.0039081573486328,grad_norm: 0.9999990854382932, iteration: 142851
loss: 0.9941268563270569,grad_norm: 0.9267071220881671, iteration: 142852
loss: 1.0258746147155762,grad_norm: 0.8932452163311637, iteration: 142853
loss: 1.0174529552459717,grad_norm: 0.9256320672318579, iteration: 142854
loss: 1.0183550119400024,grad_norm: 0.9999990583626401, iteration: 142855
loss: 0.9867172837257385,grad_norm: 0.9999990683459562, iteration: 142856
loss: 0.9946954846382141,grad_norm: 0.9999990581748295, iteration: 142857
loss: 0.9904060959815979,grad_norm: 0.9320594517628726, iteration: 142858
loss: 1.0203884840011597,grad_norm: 0.9999991741044073, iteration: 142859
loss: 1.0168217420578003,grad_norm: 0.99999911064825, iteration: 142860
loss: 0.9837870597839355,grad_norm: 0.9999992269368702, iteration: 142861
loss: 0.9932429194450378,grad_norm: 0.9999993019938886, iteration: 142862
loss: 0.9938785433769226,grad_norm: 0.9999990307798327, iteration: 142863
loss: 1.0244808197021484,grad_norm: 0.9916485877494605, iteration: 142864
loss: 1.0106390714645386,grad_norm: 0.9446079085026711, iteration: 142865
loss: 0.9807475805282593,grad_norm: 0.9999991537721046, iteration: 142866
loss: 0.9862725734710693,grad_norm: 0.9999990592434268, iteration: 142867
loss: 0.9920099377632141,grad_norm: 0.9295415601134659, iteration: 142868
loss: 0.9541833400726318,grad_norm: 0.9999992004946776, iteration: 142869
loss: 1.0000691413879395,grad_norm: 0.9999991808081431, iteration: 142870
loss: 1.0229825973510742,grad_norm: 0.9752270797340798, iteration: 142871
loss: 1.0163490772247314,grad_norm: 0.9999990673850697, iteration: 142872
loss: 0.9906078577041626,grad_norm: 0.9813365857746047, iteration: 142873
loss: 0.9919208884239197,grad_norm: 0.9999993757490037, iteration: 142874
loss: 0.993705153465271,grad_norm: 0.999999115031633, iteration: 142875
loss: 1.0045063495635986,grad_norm: 0.9999991332856871, iteration: 142876
loss: 0.9869788289070129,grad_norm: 0.9999990315210099, iteration: 142877
loss: 1.0190634727478027,grad_norm: 0.9999991532749081, iteration: 142878
loss: 1.006306767463684,grad_norm: 0.9999991242093462, iteration: 142879
loss: 1.0429506301879883,grad_norm: 0.9999990627788932, iteration: 142880
loss: 1.0091352462768555,grad_norm: 0.9999990434445709, iteration: 142881
loss: 0.9795194268226624,grad_norm: 0.9999992394751609, iteration: 142882
loss: 0.998310923576355,grad_norm: 0.9027089666211632, iteration: 142883
loss: 1.0003527402877808,grad_norm: 0.9999989736884181, iteration: 142884
loss: 1.0289541482925415,grad_norm: 0.9894062167360048, iteration: 142885
loss: 0.9782467484474182,grad_norm: 0.9999990998897642, iteration: 142886
loss: 1.0244948863983154,grad_norm: 0.9999992942076752, iteration: 142887
loss: 0.9633108377456665,grad_norm: 0.9999992051802998, iteration: 142888
loss: 0.9935964941978455,grad_norm: 0.9589826594891399, iteration: 142889
loss: 0.9913482666015625,grad_norm: 0.9999992200161502, iteration: 142890
loss: 0.9803416132926941,grad_norm: 0.8656788539413214, iteration: 142891
loss: 1.0339478254318237,grad_norm: 0.9999993240718215, iteration: 142892
loss: 0.9830093383789062,grad_norm: 0.9999992714069601, iteration: 142893
loss: 0.979515016078949,grad_norm: 0.9999991065457222, iteration: 142894
loss: 1.00084388256073,grad_norm: 0.9322901612216653, iteration: 142895
loss: 0.964131236076355,grad_norm: 0.9999992180534363, iteration: 142896
loss: 1.0170398950576782,grad_norm: 0.9702365388728534, iteration: 142897
loss: 1.0178920030593872,grad_norm: 0.9999991933848852, iteration: 142898
loss: 0.977535605430603,grad_norm: 0.9999991032362535, iteration: 142899
loss: 0.9984667301177979,grad_norm: 0.8446074097541845, iteration: 142900
loss: 1.017356514930725,grad_norm: 0.9440970959598576, iteration: 142901
loss: 1.0117751359939575,grad_norm: 0.9999990211100387, iteration: 142902
loss: 1.0036498308181763,grad_norm: 0.9382420147555072, iteration: 142903
loss: 0.9700117707252502,grad_norm: 0.9999992093718617, iteration: 142904
loss: 0.9956431984901428,grad_norm: 0.9543942580503764, iteration: 142905
loss: 1.0414544343948364,grad_norm: 0.9999991133301672, iteration: 142906
loss: 0.983687698841095,grad_norm: 0.9999990225460537, iteration: 142907
loss: 0.9957565665245056,grad_norm: 0.9999992424988271, iteration: 142908
loss: 1.0052350759506226,grad_norm: 0.9999991889509737, iteration: 142909
loss: 1.0356642007827759,grad_norm: 0.999999230611969, iteration: 142910
loss: 1.0033984184265137,grad_norm: 0.9999992165336841, iteration: 142911
loss: 0.9812692403793335,grad_norm: 0.9062513178626573, iteration: 142912
loss: 0.9737586379051208,grad_norm: 0.9196138069302352, iteration: 142913
loss: 1.0190943479537964,grad_norm: 0.9653465642102754, iteration: 142914
loss: 0.994905948638916,grad_norm: 0.895220849302639, iteration: 142915
loss: 1.0063556432724,grad_norm: 0.9999989341127751, iteration: 142916
loss: 1.0196198225021362,grad_norm: 0.999998932371388, iteration: 142917
loss: 0.995719313621521,grad_norm: 0.991686787658844, iteration: 142918
loss: 0.9768995642662048,grad_norm: 0.9999989966745706, iteration: 142919
loss: 1.0317554473876953,grad_norm: 0.9999989307552152, iteration: 142920
loss: 1.0088601112365723,grad_norm: 0.9698337166510719, iteration: 142921
loss: 1.006363034248352,grad_norm: 0.9999989055431948, iteration: 142922
loss: 0.9958130717277527,grad_norm: 0.999999073238213, iteration: 142923
loss: 1.0021519660949707,grad_norm: 0.9999996060758576, iteration: 142924
loss: 0.9792597889900208,grad_norm: 0.9570518651607735, iteration: 142925
loss: 0.9898769855499268,grad_norm: 0.8670416276768083, iteration: 142926
loss: 1.025092363357544,grad_norm: 0.9999991886541479, iteration: 142927
loss: 0.9916800260543823,grad_norm: 0.9999991590351309, iteration: 142928
loss: 1.0107249021530151,grad_norm: 0.9999989984176219, iteration: 142929
loss: 0.9914480447769165,grad_norm: 0.9573628752941681, iteration: 142930
loss: 0.9843026399612427,grad_norm: 0.9999991372419523, iteration: 142931
loss: 0.9771491885185242,grad_norm: 0.8268423969911005, iteration: 142932
loss: 1.0316087007522583,grad_norm: 0.9999990064609945, iteration: 142933
loss: 1.0019420385360718,grad_norm: 0.9944623793240962, iteration: 142934
loss: 1.0327380895614624,grad_norm: 0.9999994491209898, iteration: 142935
loss: 1.0059882402420044,grad_norm: 0.9641974187855201, iteration: 142936
loss: 0.983791708946228,grad_norm: 0.912764944068054, iteration: 142937
loss: 0.9910591244697571,grad_norm: 0.9843875363026151, iteration: 142938
loss: 0.9745240807533264,grad_norm: 0.9999992293329032, iteration: 142939
loss: 1.0206011533737183,grad_norm: 0.9999991447650195, iteration: 142940
loss: 1.0049982070922852,grad_norm: 0.9662200737906081, iteration: 142941
loss: 0.9647471904754639,grad_norm: 0.998971210342749, iteration: 142942
loss: 0.989495038986206,grad_norm: 0.9999992033687135, iteration: 142943
loss: 0.9818874597549438,grad_norm: 0.9644501535578344, iteration: 142944
loss: 0.9831976294517517,grad_norm: 0.9792275260509851, iteration: 142945
loss: 0.960502028465271,grad_norm: 0.9816079244660583, iteration: 142946
loss: 1.0010720491409302,grad_norm: 0.9070138319282344, iteration: 142947
loss: 1.0235592126846313,grad_norm: 0.9999991781231571, iteration: 142948
loss: 1.023537516593933,grad_norm: 0.9999990150168621, iteration: 142949
loss: 1.0213913917541504,grad_norm: 0.9659079021513602, iteration: 142950
loss: 1.001743197441101,grad_norm: 0.870978044708784, iteration: 142951
loss: 0.9661304950714111,grad_norm: 0.9082806021961835, iteration: 142952
loss: 1.0393389463424683,grad_norm: 0.9999990821029651, iteration: 142953
loss: 1.0124149322509766,grad_norm: 0.9999989710637015, iteration: 142954
loss: 1.0120950937271118,grad_norm: 0.9999991295155055, iteration: 142955
loss: 0.9723308086395264,grad_norm: 0.9999997088933678, iteration: 142956
loss: 0.9639770984649658,grad_norm: 0.9999990721394225, iteration: 142957
loss: 0.9875356554985046,grad_norm: 0.9999990664750543, iteration: 142958
loss: 0.972131073474884,grad_norm: 0.9694223293700054, iteration: 142959
loss: 1.0099867582321167,grad_norm: 0.999999009520984, iteration: 142960
loss: 0.9912868142127991,grad_norm: 0.7934172411415937, iteration: 142961
loss: 1.026746153831482,grad_norm: 0.9999990615686417, iteration: 142962
loss: 1.022072196006775,grad_norm: 0.9999989088324316, iteration: 142963
loss: 1.0289353132247925,grad_norm: 0.9999990814645218, iteration: 142964
loss: 0.9749153256416321,grad_norm: 0.9999991347640304, iteration: 142965
loss: 1.0042036771774292,grad_norm: 0.9198397729591856, iteration: 142966
loss: 0.9915913343429565,grad_norm: 0.981044422381804, iteration: 142967
loss: 0.9855473637580872,grad_norm: 0.9238798130445395, iteration: 142968
loss: 1.0281546115875244,grad_norm: 0.9999990846803836, iteration: 142969
loss: 0.9685249924659729,grad_norm: 0.8919782323675219, iteration: 142970
loss: 1.0363918542861938,grad_norm: 0.9879540289644512, iteration: 142971
loss: 1.0248361825942993,grad_norm: 0.9999989880047128, iteration: 142972
loss: 0.9892341494560242,grad_norm: 0.9999990835130493, iteration: 142973
loss: 0.9766799211502075,grad_norm: 0.8773593724559935, iteration: 142974
loss: 0.9821648597717285,grad_norm: 0.9999991260692417, iteration: 142975
loss: 1.1030855178833008,grad_norm: 0.9999995922666558, iteration: 142976
loss: 1.0098484754562378,grad_norm: 0.9999995500164767, iteration: 142977
loss: 1.0091465711593628,grad_norm: 0.9973502904033581, iteration: 142978
loss: 1.0505216121673584,grad_norm: 0.9999990959208195, iteration: 142979
loss: 1.0193617343902588,grad_norm: 0.9999991810111979, iteration: 142980
loss: 0.979026734828949,grad_norm: 0.8996950758603689, iteration: 142981
loss: 0.9997689723968506,grad_norm: 0.9868455081768402, iteration: 142982
loss: 0.9600858092308044,grad_norm: 0.9999991707570599, iteration: 142983
loss: 0.9665037989616394,grad_norm: 0.9599650956853193, iteration: 142984
loss: 0.9943071007728577,grad_norm: 0.9972512045120395, iteration: 142985
loss: 1.1724374294281006,grad_norm: 0.9999998028373834, iteration: 142986
loss: 0.9933792352676392,grad_norm: 0.9999990567392723, iteration: 142987
loss: 0.9972837567329407,grad_norm: 0.9999990928599484, iteration: 142988
loss: 0.9866030812263489,grad_norm: 0.9999990629007686, iteration: 142989
loss: 1.0174814462661743,grad_norm: 0.8867216897598457, iteration: 142990
loss: 1.0415899753570557,grad_norm: 0.9999990763881256, iteration: 142991
loss: 1.0408214330673218,grad_norm: 0.9999992095387881, iteration: 142992
loss: 0.9958599209785461,grad_norm: 0.9110384769344702, iteration: 142993
loss: 1.0099254846572876,grad_norm: 0.9481870057735075, iteration: 142994
loss: 0.9679898023605347,grad_norm: 0.9789871583461865, iteration: 142995
loss: 1.0048288106918335,grad_norm: 0.9999993027564914, iteration: 142996
loss: 1.0228084325790405,grad_norm: 0.9999990983403995, iteration: 142997
loss: 1.0100023746490479,grad_norm: 0.999999195558962, iteration: 142998
loss: 1.0309338569641113,grad_norm: 0.9999992542148419, iteration: 142999
loss: 0.9727213382720947,grad_norm: 0.7733588463742143, iteration: 143000
loss: 0.9924938082695007,grad_norm: 0.9999992102097152, iteration: 143001
loss: 1.0053037405014038,grad_norm: 0.9999994815208415, iteration: 143002
loss: 0.9848101735115051,grad_norm: 0.999998977348891, iteration: 143003
loss: 1.0229729413986206,grad_norm: 0.9999991281734574, iteration: 143004
loss: 0.9837693572044373,grad_norm: 0.9999991721416702, iteration: 143005
loss: 1.023707389831543,grad_norm: 0.9999992124798588, iteration: 143006
loss: 0.9966704249382019,grad_norm: 0.9534322183228711, iteration: 143007
loss: 1.0059335231781006,grad_norm: 0.8776363055084412, iteration: 143008
loss: 0.9898700714111328,grad_norm: 0.884540056977341, iteration: 143009
loss: 1.0070735216140747,grad_norm: 0.9999992543677265, iteration: 143010
loss: 1.0287166833877563,grad_norm: 0.9999995230188031, iteration: 143011
loss: 1.0049151182174683,grad_norm: 0.9999993389389222, iteration: 143012
loss: 1.0122675895690918,grad_norm: 0.853703055744788, iteration: 143013
loss: 0.9930680394172668,grad_norm: 0.9999991810633111, iteration: 143014
loss: 1.0178970098495483,grad_norm: 0.9999992721956653, iteration: 143015
loss: 0.9875767827033997,grad_norm: 0.8921571886662019, iteration: 143016
loss: 0.996410608291626,grad_norm: 0.9999991161083207, iteration: 143017
loss: 0.9722720980644226,grad_norm: 0.9999992088254444, iteration: 143018
loss: 0.9850418567657471,grad_norm: 0.9381997828493536, iteration: 143019
loss: 1.0065878629684448,grad_norm: 0.8333339882840096, iteration: 143020
loss: 1.008360505104065,grad_norm: 0.858864282393308, iteration: 143021
loss: 0.9944096803665161,grad_norm: 0.9712246209916893, iteration: 143022
loss: 1.013034701347351,grad_norm: 0.916289663172398, iteration: 143023
loss: 0.9870407581329346,grad_norm: 0.9999995994631504, iteration: 143024
loss: 1.0053825378417969,grad_norm: 0.9999991792176567, iteration: 143025
loss: 1.025816559791565,grad_norm: 0.9020041706933165, iteration: 143026
loss: 1.02396821975708,grad_norm: 0.8585753633937269, iteration: 143027
loss: 1.0108925104141235,grad_norm: 0.999999548431585, iteration: 143028
loss: 1.0175679922103882,grad_norm: 0.9999989542894562, iteration: 143029
loss: 1.0071790218353271,grad_norm: 0.9938062709237226, iteration: 143030
loss: 1.0102511644363403,grad_norm: 0.9455713428169794, iteration: 143031
loss: 1.0137174129486084,grad_norm: 0.7970610667228077, iteration: 143032
loss: 1.0256156921386719,grad_norm: 0.9442719272846256, iteration: 143033
loss: 0.9735732674598694,grad_norm: 0.9790089940032766, iteration: 143034
loss: 0.9769325256347656,grad_norm: 0.9999990382287584, iteration: 143035
loss: 1.0047791004180908,grad_norm: 0.9999992621568281, iteration: 143036
loss: 0.9937023520469666,grad_norm: 0.9999991259309504, iteration: 143037
loss: 0.980998158454895,grad_norm: 0.9999989886636881, iteration: 143038
loss: 0.9739111661911011,grad_norm: 0.9999992825079509, iteration: 143039
loss: 1.0358545780181885,grad_norm: 0.9115436005221905, iteration: 143040
loss: 0.9929671287536621,grad_norm: 0.9999991155380906, iteration: 143041
loss: 0.9722914695739746,grad_norm: 0.9336885413676522, iteration: 143042
loss: 1.0224964618682861,grad_norm: 0.9999997437338629, iteration: 143043
loss: 0.9590128064155579,grad_norm: 0.9017270823818435, iteration: 143044
loss: 0.9849498271942139,grad_norm: 0.9999991892222195, iteration: 143045
loss: 0.9803886413574219,grad_norm: 0.9999990444761018, iteration: 143046
loss: 0.9920358061790466,grad_norm: 0.9293527280065313, iteration: 143047
loss: 1.0254358053207397,grad_norm: 0.9565267683065481, iteration: 143048
loss: 1.009595513343811,grad_norm: 0.9999991258660544, iteration: 143049
loss: 1.0154788494110107,grad_norm: 0.9201142975468347, iteration: 143050
loss: 0.9469238519668579,grad_norm: 0.9156074144417, iteration: 143051
loss: 0.99810791015625,grad_norm: 0.9999994994887957, iteration: 143052
loss: 1.0204366445541382,grad_norm: 0.9964113234914356, iteration: 143053
loss: 1.0279525518417358,grad_norm: 0.9999991303998764, iteration: 143054
loss: 1.0098382234573364,grad_norm: 0.9999993641640466, iteration: 143055
loss: 1.0752410888671875,grad_norm: 0.999999530520311, iteration: 143056
loss: 1.001745581626892,grad_norm: 0.918372578260668, iteration: 143057
loss: 1.0071858167648315,grad_norm: 0.999999154751423, iteration: 143058
loss: 0.9880117774009705,grad_norm: 0.9753112115825119, iteration: 143059
loss: 0.9988915920257568,grad_norm: 0.9999992293997881, iteration: 143060
loss: 0.9922526478767395,grad_norm: 0.9681872612370913, iteration: 143061
loss: 1.0087050199508667,grad_norm: 0.9999991007705444, iteration: 143062
loss: 0.9962011575698853,grad_norm: 0.9999990786490685, iteration: 143063
loss: 1.0177003145217896,grad_norm: 0.8183578863329082, iteration: 143064
loss: 0.9922807216644287,grad_norm: 0.9421667743094434, iteration: 143065
loss: 1.0025230646133423,grad_norm: 0.8666530807462154, iteration: 143066
loss: 0.9724071025848389,grad_norm: 0.9999991400858728, iteration: 143067
loss: 0.9846510291099548,grad_norm: 0.9999990166384262, iteration: 143068
loss: 0.9893908500671387,grad_norm: 0.9045221832769746, iteration: 143069
loss: 0.958422064781189,grad_norm: 0.9720085072265024, iteration: 143070
loss: 1.0013667345046997,grad_norm: 0.8576931455636263, iteration: 143071
loss: 1.0652161836624146,grad_norm: 0.9999995487677531, iteration: 143072
loss: 1.0038304328918457,grad_norm: 0.9999998721765443, iteration: 143073
loss: 0.993091881275177,grad_norm: 0.9239466766196311, iteration: 143074
loss: 1.0224426984786987,grad_norm: 0.9470618652649144, iteration: 143075
loss: 1.0030838251113892,grad_norm: 0.999999409129824, iteration: 143076
loss: 1.0026726722717285,grad_norm: 0.9999990585203393, iteration: 143077
loss: 0.9547752737998962,grad_norm: 0.9999991824892991, iteration: 143078
loss: 1.0136380195617676,grad_norm: 0.999999258751251, iteration: 143079
loss: 0.9490047693252563,grad_norm: 0.9999992765043081, iteration: 143080
loss: 0.9796172380447388,grad_norm: 0.9999991041860339, iteration: 143081
loss: 1.0177274942398071,grad_norm: 0.9534613154254914, iteration: 143082
loss: 0.9704323410987854,grad_norm: 0.9999991381476836, iteration: 143083
loss: 1.0062265396118164,grad_norm: 0.9999990864416927, iteration: 143084
loss: 1.028091311454773,grad_norm: 0.9999992079904171, iteration: 143085
loss: 1.0238944292068481,grad_norm: 0.9590853222456458, iteration: 143086
loss: 1.0187784433364868,grad_norm: 0.9999990945441036, iteration: 143087
loss: 1.0033166408538818,grad_norm: 0.9688413367590775, iteration: 143088
loss: 1.0438199043273926,grad_norm: 0.9999995668607657, iteration: 143089
loss: 1.0108442306518555,grad_norm: 0.9999991391392256, iteration: 143090
loss: 0.995001494884491,grad_norm: 0.9999991433483245, iteration: 143091
loss: 0.9938088059425354,grad_norm: 0.9999989969867382, iteration: 143092
loss: 1.008771538734436,grad_norm: 0.9590185029971128, iteration: 143093
loss: 0.9982250928878784,grad_norm: 0.9999994620387358, iteration: 143094
loss: 1.02216637134552,grad_norm: 0.9999991397873395, iteration: 143095
loss: 0.9732117056846619,grad_norm: 0.9999991036317659, iteration: 143096
loss: 1.0034716129302979,grad_norm: 0.9999990493139312, iteration: 143097
loss: 0.9988102912902832,grad_norm: 0.9999991229667612, iteration: 143098
loss: 0.9857715964317322,grad_norm: 0.9999990358847052, iteration: 143099
loss: 1.0163284540176392,grad_norm: 0.9999992948828799, iteration: 143100
loss: 0.9740762114524841,grad_norm: 0.9179637016126262, iteration: 143101
loss: 0.9900709390640259,grad_norm: 0.9999990907826641, iteration: 143102
loss: 0.99309241771698,grad_norm: 0.9999991473747272, iteration: 143103
loss: 1.0062367916107178,grad_norm: 0.9999993943121256, iteration: 143104
loss: 0.9788958430290222,grad_norm: 0.9972042614708172, iteration: 143105
loss: 1.0278019905090332,grad_norm: 0.999999096390613, iteration: 143106
loss: 1.027963638305664,grad_norm: 0.9999995000176757, iteration: 143107
loss: 1.00005042552948,grad_norm: 0.8727010485538018, iteration: 143108
loss: 0.9482700228691101,grad_norm: 0.9809693017895248, iteration: 143109
loss: 0.9634690284729004,grad_norm: 0.999999536985688, iteration: 143110
loss: 1.0274227857589722,grad_norm: 0.9999992185522675, iteration: 143111
loss: 0.9970628619194031,grad_norm: 0.9999992144272721, iteration: 143112
loss: 1.0043708086013794,grad_norm: 0.9999989835096503, iteration: 143113
loss: 1.0281401872634888,grad_norm: 0.999999101008063, iteration: 143114
loss: 1.0278301239013672,grad_norm: 0.9999991193154399, iteration: 143115
loss: 0.9765509366989136,grad_norm: 0.9999991957779808, iteration: 143116
loss: 0.972227931022644,grad_norm: 0.9999990335967137, iteration: 143117
loss: 1.0157387256622314,grad_norm: 0.9999992285739863, iteration: 143118
loss: 0.9411939978599548,grad_norm: 0.9308543847841534, iteration: 143119
loss: 1.0122182369232178,grad_norm: 0.9635091007969423, iteration: 143120
loss: 0.9946783781051636,grad_norm: 0.9035892055000518, iteration: 143121
loss: 1.0234439373016357,grad_norm: 0.9999989374703787, iteration: 143122
loss: 1.0396366119384766,grad_norm: 0.9999997637315647, iteration: 143123
loss: 1.0300896167755127,grad_norm: 0.9999989829131039, iteration: 143124
loss: 1.0237407684326172,grad_norm: 0.9999995657397531, iteration: 143125
loss: 1.0059541463851929,grad_norm: 0.9999990459863235, iteration: 143126
loss: 0.9763082265853882,grad_norm: 0.9999997387700343, iteration: 143127
loss: 1.0181318521499634,grad_norm: 0.9999991887412891, iteration: 143128
loss: 0.9829277396202087,grad_norm: 0.9726954011121702, iteration: 143129
loss: 1.0116301774978638,grad_norm: 0.9999991253123223, iteration: 143130
loss: 1.0064473152160645,grad_norm: 0.9999992692520722, iteration: 143131
loss: 0.9995282888412476,grad_norm: 0.8714602240001643, iteration: 143132
loss: 1.05698823928833,grad_norm: 0.9999992087814195, iteration: 143133
loss: 0.9758673906326294,grad_norm: 0.9105330284632769, iteration: 143134
loss: 1.0066436529159546,grad_norm: 0.9999990594519697, iteration: 143135
loss: 1.006257176399231,grad_norm: 0.9999990461638872, iteration: 143136
loss: 1.0019649267196655,grad_norm: 0.9999991854341747, iteration: 143137
loss: 0.9728465676307678,grad_norm: 0.8681373008117708, iteration: 143138
loss: 1.1576874256134033,grad_norm: 0.9999994971615075, iteration: 143139
loss: 1.0783228874206543,grad_norm: 0.9999993481944968, iteration: 143140
loss: 0.9716151356697083,grad_norm: 0.9999992043953838, iteration: 143141
loss: 1.0014621019363403,grad_norm: 0.9992179872430993, iteration: 143142
loss: 0.9799057245254517,grad_norm: 0.9999991732242434, iteration: 143143
loss: 1.0013782978057861,grad_norm: 0.9999991648684721, iteration: 143144
loss: 0.9917422533035278,grad_norm: 0.9999990612743179, iteration: 143145
loss: 0.9626368284225464,grad_norm: 0.9999990658972788, iteration: 143146
loss: 1.035165786743164,grad_norm: 0.9999990004761223, iteration: 143147
loss: 1.0189226865768433,grad_norm: 0.9999991252888311, iteration: 143148
loss: 1.001405119895935,grad_norm: 0.8529810407311648, iteration: 143149
loss: 0.999119222164154,grad_norm: 0.9999990849363426, iteration: 143150
loss: 1.0173286199569702,grad_norm: 0.999999234878225, iteration: 143151
loss: 1.0282422304153442,grad_norm: 0.9141102945974385, iteration: 143152
loss: 1.0484256744384766,grad_norm: 0.9999996885911178, iteration: 143153
loss: 1.0131142139434814,grad_norm: 0.9999993788211566, iteration: 143154
loss: 0.9911739230155945,grad_norm: 0.9999990497975152, iteration: 143155
loss: 1.0030369758605957,grad_norm: 0.9859930709041718, iteration: 143156
loss: 1.014845371246338,grad_norm: 0.9999991923534631, iteration: 143157
loss: 1.0045387744903564,grad_norm: 0.9999989528162716, iteration: 143158
loss: 0.9559231996536255,grad_norm: 0.9999992423682686, iteration: 143159
loss: 0.9978536367416382,grad_norm: 0.9999993056009795, iteration: 143160
loss: 0.9537543654441833,grad_norm: 0.9754789328568081, iteration: 143161
loss: 1.018284559249878,grad_norm: 0.9999991808415293, iteration: 143162
loss: 1.0351731777191162,grad_norm: 0.9999992258246778, iteration: 143163
loss: 1.0603899955749512,grad_norm: 0.9316490248836784, iteration: 143164
loss: 1.0087602138519287,grad_norm: 0.9441102009110336, iteration: 143165
loss: 1.0136433839797974,grad_norm: 0.7721660123525137, iteration: 143166
loss: 1.0095059871673584,grad_norm: 0.9999990917155328, iteration: 143167
loss: 0.9903751015663147,grad_norm: 0.952194163443376, iteration: 143168
loss: 1.0540183782577515,grad_norm: 0.9999996104411343, iteration: 143169
loss: 1.0214579105377197,grad_norm: 0.8513397285187558, iteration: 143170
loss: 1.0009626150131226,grad_norm: 0.9441654852773748, iteration: 143171
loss: 1.007899522781372,grad_norm: 0.9675193553988762, iteration: 143172
loss: 1.163747787475586,grad_norm: 0.999999902836056, iteration: 143173
loss: 1.0003845691680908,grad_norm: 0.8202551205508533, iteration: 143174
loss: 1.0087876319885254,grad_norm: 0.9999990385438433, iteration: 143175
loss: 1.0401285886764526,grad_norm: 0.9999991646444438, iteration: 143176
loss: 1.006906270980835,grad_norm: 0.9999993084294544, iteration: 143177
loss: 1.0350922346115112,grad_norm: 0.9999992066161418, iteration: 143178
loss: 1.0597057342529297,grad_norm: 0.9753346769121465, iteration: 143179
loss: 1.0086551904678345,grad_norm: 0.8748793649533104, iteration: 143180
loss: 0.9983670711517334,grad_norm: 0.9999991659450115, iteration: 143181
loss: 1.0156975984573364,grad_norm: 0.9648894229363935, iteration: 143182
loss: 1.018491506576538,grad_norm: 0.999999117280627, iteration: 143183
loss: 1.0190249681472778,grad_norm: 0.9999999027184236, iteration: 143184
loss: 0.9981958270072937,grad_norm: 0.9786590412278187, iteration: 143185
loss: 0.9607155323028564,grad_norm: 0.9999991085267509, iteration: 143186
loss: 1.0068495273590088,grad_norm: 0.999999156013489, iteration: 143187
loss: 0.9992020726203918,grad_norm: 0.9999988949527203, iteration: 143188
loss: 1.0123939514160156,grad_norm: 0.9374798034689216, iteration: 143189
loss: 1.0068161487579346,grad_norm: 0.9999994762687481, iteration: 143190
loss: 0.9850395917892456,grad_norm: 0.9999993122362354, iteration: 143191
loss: 0.9916079044342041,grad_norm: 0.999998929540827, iteration: 143192
loss: 1.010637640953064,grad_norm: 0.9155247457176243, iteration: 143193
loss: 1.0243196487426758,grad_norm: 0.9695827471049415, iteration: 143194
loss: 0.9978652000427246,grad_norm: 0.923430595305198, iteration: 143195
loss: 0.9994198083877563,grad_norm: 0.9310307899655407, iteration: 143196
loss: 1.0045615434646606,grad_norm: 0.9999990643058595, iteration: 143197
loss: 1.0024977922439575,grad_norm: 0.9999990608876157, iteration: 143198
loss: 1.0151773691177368,grad_norm: 0.9999993033361488, iteration: 143199
loss: 0.99980229139328,grad_norm: 0.9999991763536457, iteration: 143200
loss: 0.9837140440940857,grad_norm: 0.9999991398839306, iteration: 143201
loss: 1.013956069946289,grad_norm: 0.9999990236999075, iteration: 143202
loss: 1.0047129392623901,grad_norm: 0.9998638518486137, iteration: 143203
loss: 1.0214182138442993,grad_norm: 0.9999991859850884, iteration: 143204
loss: 0.9738529920578003,grad_norm: 0.9087039491250772, iteration: 143205
loss: 0.9427333474159241,grad_norm: 0.9999994600734446, iteration: 143206
loss: 1.0022368431091309,grad_norm: 0.9999991547979555, iteration: 143207
loss: 1.0176963806152344,grad_norm: 0.999999190221148, iteration: 143208
loss: 1.0000941753387451,grad_norm: 0.9999991108060328, iteration: 143209
loss: 0.9977799654006958,grad_norm: 0.9952873366466123, iteration: 143210
loss: 1.029564619064331,grad_norm: 0.8226415957250027, iteration: 143211
loss: 0.9752464890480042,grad_norm: 0.8701853113105454, iteration: 143212
loss: 0.9746981263160706,grad_norm: 0.8766867429792433, iteration: 143213
loss: 0.9883840680122375,grad_norm: 0.9999992377444367, iteration: 143214
loss: 1.026196002960205,grad_norm: 0.9999992354366689, iteration: 143215
loss: 1.0530582666397095,grad_norm: 0.8967621404714082, iteration: 143216
loss: 1.0788756608963013,grad_norm: 0.9999991528802059, iteration: 143217
loss: 1.0919288396835327,grad_norm: 0.9260347836755934, iteration: 143218
loss: 1.0128504037857056,grad_norm: 0.9456001442443576, iteration: 143219
loss: 1.061543583869934,grad_norm: 0.9999993046909383, iteration: 143220
loss: 0.9968594908714294,grad_norm: 0.9999992084155923, iteration: 143221
loss: 0.9881335496902466,grad_norm: 0.9435628389859694, iteration: 143222
loss: 0.9846044182777405,grad_norm: 0.9999991642911521, iteration: 143223
loss: 1.0064316987991333,grad_norm: 0.9964500842562799, iteration: 143224
loss: 1.0023361444473267,grad_norm: 0.9999991704064913, iteration: 143225
loss: 0.9918156862258911,grad_norm: 0.9999991753769226, iteration: 143226
loss: 1.0009653568267822,grad_norm: 0.9338674052363178, iteration: 143227
loss: 0.9601975083351135,grad_norm: 0.999999108776181, iteration: 143228
loss: 1.0132004022598267,grad_norm: 0.9999992464858192, iteration: 143229
loss: 1.0068881511688232,grad_norm: 0.9999990756580736, iteration: 143230
loss: 1.0143359899520874,grad_norm: 0.9999991994237828, iteration: 143231
loss: 1.083321452140808,grad_norm: 0.9999990151180294, iteration: 143232
loss: 0.9722188711166382,grad_norm: 0.8935322998135481, iteration: 143233
loss: 0.9755622148513794,grad_norm: 0.9999991604984726, iteration: 143234
loss: 1.012120008468628,grad_norm: 0.9999991515600566, iteration: 143235
loss: 1.0251978635787964,grad_norm: 0.9999989671079287, iteration: 143236
loss: 0.9942308068275452,grad_norm: 0.9999991651156104, iteration: 143237
loss: 1.0197343826293945,grad_norm: 0.8764990008621346, iteration: 143238
loss: 1.0103631019592285,grad_norm: 0.9999991717114562, iteration: 143239
loss: 0.9982547760009766,grad_norm: 0.9999990744004923, iteration: 143240
loss: 1.001948356628418,grad_norm: 0.9163898231446258, iteration: 143241
loss: 0.9618852138519287,grad_norm: 0.9900300017704238, iteration: 143242
loss: 1.0241820812225342,grad_norm: 0.999999207006993, iteration: 143243
loss: 0.9679896831512451,grad_norm: 0.9301978711885267, iteration: 143244
loss: 1.0729023218154907,grad_norm: 0.9999991080833486, iteration: 143245
loss: 0.9830861687660217,grad_norm: 0.9999990450413512, iteration: 143246
loss: 1.0104091167449951,grad_norm: 0.9999990928951784, iteration: 143247
loss: 1.0007166862487793,grad_norm: 0.999999077055885, iteration: 143248
loss: 0.9713796973228455,grad_norm: 0.9999990780772519, iteration: 143249
loss: 0.984820544719696,grad_norm: 0.8523256856842047, iteration: 143250
loss: 1.026168704032898,grad_norm: 0.9610789096303468, iteration: 143251
loss: 0.9770954847335815,grad_norm: 0.9714882162285586, iteration: 143252
loss: 1.0091328620910645,grad_norm: 0.9999990834338474, iteration: 143253
loss: 1.044027328491211,grad_norm: 0.9314426921269713, iteration: 143254
loss: 0.9767341613769531,grad_norm: 0.9999990564913658, iteration: 143255
loss: 1.0012726783752441,grad_norm: 0.9760721285629276, iteration: 143256
loss: 1.009523868560791,grad_norm: 0.9999990272748809, iteration: 143257
loss: 1.00279700756073,grad_norm: 0.9999991741202117, iteration: 143258
loss: 1.0076661109924316,grad_norm: 0.9999990611234941, iteration: 143259
loss: 1.0002014636993408,grad_norm: 0.9999990890325862, iteration: 143260
loss: 0.9866765737533569,grad_norm: 0.9880812200057013, iteration: 143261
loss: 0.9945093393325806,grad_norm: 0.9999990184930038, iteration: 143262
loss: 1.00568425655365,grad_norm: 0.9436924502964962, iteration: 143263
loss: 0.9986414313316345,grad_norm: 0.9394240604618058, iteration: 143264
loss: 1.0068961381912231,grad_norm: 0.9128173522140957, iteration: 143265
loss: 1.0180050134658813,grad_norm: 0.9950339586411657, iteration: 143266
loss: 0.9855978488922119,grad_norm: 0.8810089260814625, iteration: 143267
loss: 1.0188755989074707,grad_norm: 0.9999990477250639, iteration: 143268
loss: 0.9755913615226746,grad_norm: 0.9855494740579299, iteration: 143269
loss: 1.0198538303375244,grad_norm: 0.9999991195858426, iteration: 143270
loss: 0.9876288771629333,grad_norm: 0.9999991489863178, iteration: 143271
loss: 0.980547308921814,grad_norm: 0.8650088452875553, iteration: 143272
loss: 1.037161111831665,grad_norm: 0.99999916963514, iteration: 143273
loss: 0.992584764957428,grad_norm: 0.9648739517164486, iteration: 143274
loss: 1.016950011253357,grad_norm: 0.9999990447898064, iteration: 143275
loss: 0.9254317283630371,grad_norm: 0.9584526581959537, iteration: 143276
loss: 1.0130966901779175,grad_norm: 0.9999990870093834, iteration: 143277
loss: 0.9636862874031067,grad_norm: 0.9999989524566713, iteration: 143278
loss: 1.0199917554855347,grad_norm: 0.999998958386061, iteration: 143279
loss: 0.9920041561126709,grad_norm: 0.978447993115737, iteration: 143280
loss: 0.9971659779548645,grad_norm: 0.8891924064492256, iteration: 143281
loss: 0.9986032843589783,grad_norm: 0.9999990646509467, iteration: 143282
loss: 1.0922032594680786,grad_norm: 0.9999995887334951, iteration: 143283
loss: 0.9858773946762085,grad_norm: 0.8971604266397835, iteration: 143284
loss: 0.9898015856742859,grad_norm: 0.9999992190485437, iteration: 143285
loss: 1.0072355270385742,grad_norm: 0.9654408763241727, iteration: 143286
loss: 1.0210872888565063,grad_norm: 0.9999997871538704, iteration: 143287
loss: 0.9789978861808777,grad_norm: 0.99999925498411, iteration: 143288
loss: 1.003578543663025,grad_norm: 0.999999105936795, iteration: 143289
loss: 1.0122584104537964,grad_norm: 0.9999991197740261, iteration: 143290
loss: 0.9918044805526733,grad_norm: 0.9999993084840075, iteration: 143291
loss: 1.0139451026916504,grad_norm: 0.9999997545324137, iteration: 143292
loss: 1.0836032629013062,grad_norm: 0.9999999305257321, iteration: 143293
loss: 0.9968973398208618,grad_norm: 0.9999991880256879, iteration: 143294
loss: 1.0179741382598877,grad_norm: 0.9933534808781206, iteration: 143295
loss: 0.9921292066574097,grad_norm: 0.9647017845537471, iteration: 143296
loss: 0.9964070916175842,grad_norm: 0.8793072796913999, iteration: 143297
loss: 1.0253819227218628,grad_norm: 0.9680770649324759, iteration: 143298
loss: 1.0208429098129272,grad_norm: 0.9593115840007104, iteration: 143299
loss: 0.9825758337974548,grad_norm: 0.9044999238655986, iteration: 143300
loss: 1.0216548442840576,grad_norm: 0.9999990948057594, iteration: 143301
loss: 0.9600309133529663,grad_norm: 0.9051441506597063, iteration: 143302
loss: 0.996321976184845,grad_norm: 0.9999992063817633, iteration: 143303
loss: 1.004326581954956,grad_norm: 0.999999235532872, iteration: 143304
loss: 1.0360678434371948,grad_norm: 0.9999990566613178, iteration: 143305
loss: 1.0568389892578125,grad_norm: 0.9999997526902195, iteration: 143306
loss: 1.0251284837722778,grad_norm: 0.9950705185686066, iteration: 143307
loss: 1.023660659790039,grad_norm: 0.8157790014225164, iteration: 143308
loss: 1.0037959814071655,grad_norm: 0.8555432581052346, iteration: 143309
loss: 1.0292161703109741,grad_norm: 0.9999993085899477, iteration: 143310
loss: 1.0040295124053955,grad_norm: 0.9075192885686842, iteration: 143311
loss: 1.0016707181930542,grad_norm: 0.9999990173489417, iteration: 143312
loss: 1.0141774415969849,grad_norm: 0.9999995348731753, iteration: 143313
loss: 1.0850356817245483,grad_norm: 0.9999998325320736, iteration: 143314
loss: 0.9947367310523987,grad_norm: 0.9999992811416152, iteration: 143315
loss: 1.016473650932312,grad_norm: 0.9999992359552544, iteration: 143316
loss: 1.10970938205719,grad_norm: 0.9999995282742133, iteration: 143317
loss: 1.0019487142562866,grad_norm: 0.9999991914190381, iteration: 143318
loss: 1.0803662538528442,grad_norm: 0.9896483811946, iteration: 143319
loss: 0.9737157821655273,grad_norm: 0.9999990636361353, iteration: 143320
loss: 1.031410574913025,grad_norm: 0.9999990086088179, iteration: 143321
loss: 1.012125015258789,grad_norm: 0.9999991326329444, iteration: 143322
loss: 0.9589817523956299,grad_norm: 0.9999991312434532, iteration: 143323
loss: 0.9509239792823792,grad_norm: 0.999999135107624, iteration: 143324
loss: 0.9996749758720398,grad_norm: 0.9999991685319287, iteration: 143325
loss: 0.9984416365623474,grad_norm: 0.9999991449510864, iteration: 143326
loss: 1.0719027519226074,grad_norm: 0.9999991820468175, iteration: 143327
loss: 1.0479551553726196,grad_norm: 0.9999989324019327, iteration: 143328
loss: 0.9715706706047058,grad_norm: 0.8230164550688401, iteration: 143329
loss: 1.0110176801681519,grad_norm: 0.9933885538203012, iteration: 143330
loss: 1.2257753610610962,grad_norm: 0.9999999993147544, iteration: 143331
loss: 1.0005407333374023,grad_norm: 0.999999312582979, iteration: 143332
loss: 1.0162721872329712,grad_norm: 0.8971513385089177, iteration: 143333
loss: 1.024775505065918,grad_norm: 0.9999993538976873, iteration: 143334
loss: 1.0043346881866455,grad_norm: 0.8845416375924721, iteration: 143335
loss: 0.9690699577331543,grad_norm: 0.999999123748027, iteration: 143336
loss: 0.9693561792373657,grad_norm: 0.9999991228941874, iteration: 143337
loss: 0.9940859079360962,grad_norm: 0.9627057071698597, iteration: 143338
loss: 1.008225440979004,grad_norm: 0.9999992033337749, iteration: 143339
loss: 1.0703396797180176,grad_norm: 0.9999997225976145, iteration: 143340
loss: 1.0822718143463135,grad_norm: 0.9999991757851386, iteration: 143341
loss: 1.0456551313400269,grad_norm: 0.9999991406648926, iteration: 143342
loss: 1.004270076751709,grad_norm: 0.9999990509889599, iteration: 143343
loss: 0.9946821331977844,grad_norm: 0.843561271080577, iteration: 143344
loss: 0.9860580563545227,grad_norm: 0.9999992800947518, iteration: 143345
loss: 1.03616201877594,grad_norm: 0.9999991020851458, iteration: 143346
loss: 1.0162217617034912,grad_norm: 0.9999990727566518, iteration: 143347
loss: 0.9677129983901978,grad_norm: 0.852658156105508, iteration: 143348
loss: 1.0012744665145874,grad_norm: 0.9466709463781522, iteration: 143349
loss: 0.9603429436683655,grad_norm: 0.9999989860711748, iteration: 143350
loss: 1.0039221048355103,grad_norm: 0.859369248982784, iteration: 143351
loss: 0.9584588408470154,grad_norm: 0.9508738505998907, iteration: 143352
loss: 1.115793228149414,grad_norm: 0.9999998443459615, iteration: 143353
loss: 0.9834132194519043,grad_norm: 0.999999104382749, iteration: 143354
loss: 0.950258195400238,grad_norm: 0.9415386590675058, iteration: 143355
loss: 0.9579993486404419,grad_norm: 0.9999989415394184, iteration: 143356
loss: 0.9770953059196472,grad_norm: 0.9999991125993011, iteration: 143357
loss: 0.9920490980148315,grad_norm: 0.9999997930169116, iteration: 143358
loss: 1.006461501121521,grad_norm: 0.8805518660978039, iteration: 143359
loss: 1.0940895080566406,grad_norm: 0.999999301442753, iteration: 143360
loss: 1.0314698219299316,grad_norm: 0.9999991151801376, iteration: 143361
loss: 1.0066405534744263,grad_norm: 0.9999990415076361, iteration: 143362
loss: 1.0916639566421509,grad_norm: 0.9999991224425159, iteration: 143363
loss: 0.9834384918212891,grad_norm: 0.9999995529113044, iteration: 143364
loss: 1.028429388999939,grad_norm: 0.9041574319327016, iteration: 143365
loss: 1.0079450607299805,grad_norm: 0.9514923735429823, iteration: 143366
loss: 0.9706393480300903,grad_norm: 0.928084114206538, iteration: 143367
loss: 1.0064536333084106,grad_norm: 0.9999990708162436, iteration: 143368
loss: 0.9842645525932312,grad_norm: 0.999999187612718, iteration: 143369
loss: 1.060318112373352,grad_norm: 0.9999996013977663, iteration: 143370
loss: 0.9740598201751709,grad_norm: 0.9999990797281577, iteration: 143371
loss: 1.0355771780014038,grad_norm: 0.9999991871993359, iteration: 143372
loss: 0.9937072992324829,grad_norm: 0.9999991288246447, iteration: 143373
loss: 1.0127581357955933,grad_norm: 0.9999988698803925, iteration: 143374
loss: 1.0332354307174683,grad_norm: 0.9999991176056612, iteration: 143375
loss: 0.9929694533348083,grad_norm: 0.9999990116527332, iteration: 143376
loss: 0.9859291315078735,grad_norm: 0.9904104130178579, iteration: 143377
loss: 1.0025593042373657,grad_norm: 0.9999989116622713, iteration: 143378
loss: 1.0044442415237427,grad_norm: 0.9999990773435005, iteration: 143379
loss: 0.9829425811767578,grad_norm: 0.999999233952229, iteration: 143380
loss: 0.9819550514221191,grad_norm: 0.9933444898869721, iteration: 143381
loss: 0.9922631978988647,grad_norm: 0.9999991489945304, iteration: 143382
loss: 1.0036542415618896,grad_norm: 0.9999991057093922, iteration: 143383
loss: 1.0081952810287476,grad_norm: 0.9999993667598924, iteration: 143384
loss: 0.9949185252189636,grad_norm: 0.8245531937894861, iteration: 143385
loss: 0.9507384896278381,grad_norm: 0.9999990416567994, iteration: 143386
loss: 1.005522608757019,grad_norm: 0.9999990953638769, iteration: 143387
loss: 0.9865155220031738,grad_norm: 0.9999989884202735, iteration: 143388
loss: 0.9578090906143188,grad_norm: 0.9999992348688488, iteration: 143389
loss: 1.0122417211532593,grad_norm: 0.999999138807414, iteration: 143390
loss: 1.0206143856048584,grad_norm: 0.9999990593108804, iteration: 143391
loss: 1.0105671882629395,grad_norm: 0.9546290152807246, iteration: 143392
loss: 1.0707181692123413,grad_norm: 0.9999993194449541, iteration: 143393
loss: 1.0360815525054932,grad_norm: 0.9999991460023934, iteration: 143394
loss: 0.996728241443634,grad_norm: 0.9999993502790567, iteration: 143395
loss: 1.0036948919296265,grad_norm: 0.9586955017331049, iteration: 143396
loss: 0.9922047853469849,grad_norm: 0.900605892238495, iteration: 143397
loss: 1.0115101337432861,grad_norm: 0.9999992241093311, iteration: 143398
loss: 1.0096392631530762,grad_norm: 0.9999991324426486, iteration: 143399
loss: 0.9969521164894104,grad_norm: 0.9999991856942119, iteration: 143400
loss: 1.0100464820861816,grad_norm: 0.9999993892238987, iteration: 143401
loss: 0.9893367886543274,grad_norm: 0.9999991169292395, iteration: 143402
loss: 0.9915547370910645,grad_norm: 0.9999989948548381, iteration: 143403
loss: 0.9703267812728882,grad_norm: 0.9999991996672793, iteration: 143404
loss: 1.025638461112976,grad_norm: 0.9330472110843268, iteration: 143405
loss: 1.0059677362442017,grad_norm: 0.9999991796196399, iteration: 143406
loss: 0.9802990555763245,grad_norm: 0.9999992057407237, iteration: 143407
loss: 1.0527395009994507,grad_norm: 0.9999992696181842, iteration: 143408
loss: 1.0507516860961914,grad_norm: 0.999999930972733, iteration: 143409
loss: 1.0379092693328857,grad_norm: 0.9999996466306662, iteration: 143410
loss: 1.011155366897583,grad_norm: 0.9999992261328478, iteration: 143411
loss: 1.0083602666854858,grad_norm: 0.9999990010989965, iteration: 143412
loss: 1.0262221097946167,grad_norm: 0.9999992850681996, iteration: 143413
loss: 0.9909204840660095,grad_norm: 0.9999991620192925, iteration: 143414
loss: 1.0228633880615234,grad_norm: 0.9999990362350293, iteration: 143415
loss: 1.0007179975509644,grad_norm: 0.9488328315652528, iteration: 143416
loss: 1.0092467069625854,grad_norm: 0.9263230928125882, iteration: 143417
loss: 1.022072196006775,grad_norm: 0.999999662367988, iteration: 143418
loss: 1.02081298828125,grad_norm: 0.999999090563436, iteration: 143419
loss: 0.9998642206192017,grad_norm: 0.9999989817332521, iteration: 143420
loss: 0.9281889796257019,grad_norm: 0.9999991195826663, iteration: 143421
loss: 0.982499361038208,grad_norm: 0.9999992778840993, iteration: 143422
loss: 0.9959794878959656,grad_norm: 0.9999992683228627, iteration: 143423
loss: 1.0290359258651733,grad_norm: 0.9999993007746396, iteration: 143424
loss: 1.0093830823898315,grad_norm: 0.9999991167052185, iteration: 143425
loss: 1.0133030414581299,grad_norm: 0.944790856061389, iteration: 143426
loss: 0.9992207288742065,grad_norm: 0.9999994304482952, iteration: 143427
loss: 0.9961435794830322,grad_norm: 0.9999998542028099, iteration: 143428
loss: 0.9969161152839661,grad_norm: 0.9999992634718498, iteration: 143429
loss: 0.9659076929092407,grad_norm: 0.9999991674037382, iteration: 143430
loss: 1.022463321685791,grad_norm: 0.9791841285486558, iteration: 143431
loss: 1.0114251375198364,grad_norm: 0.999999114161056, iteration: 143432
loss: 0.9980225563049316,grad_norm: 0.9999989394652826, iteration: 143433
loss: 0.9816598296165466,grad_norm: 0.9999992640331395, iteration: 143434
loss: 1.0051814317703247,grad_norm: 0.9928106299037445, iteration: 143435
loss: 1.0304293632507324,grad_norm: 0.7730550249589859, iteration: 143436
loss: 1.0095610618591309,grad_norm: 0.9999990504715205, iteration: 143437
loss: 1.0107148885726929,grad_norm: 0.9152857170828799, iteration: 143438
loss: 1.0064034461975098,grad_norm: 0.9150619575978519, iteration: 143439
loss: 0.9536420702934265,grad_norm: 0.9648034712149468, iteration: 143440
loss: 0.994242250919342,grad_norm: 0.97302435715044, iteration: 143441
loss: 1.0514030456542969,grad_norm: 0.9362843128948465, iteration: 143442
loss: 0.9646657109260559,grad_norm: 0.9141923877726793, iteration: 143443
loss: 1.0444740056991577,grad_norm: 0.9341362562691555, iteration: 143444
loss: 1.0063611268997192,grad_norm: 0.9557108898977491, iteration: 143445
loss: 1.1059107780456543,grad_norm: 0.9999995789364123, iteration: 143446
loss: 1.0062744617462158,grad_norm: 0.9999992267791503, iteration: 143447
loss: 1.0211901664733887,grad_norm: 0.8370734111874593, iteration: 143448
loss: 0.9613407254219055,grad_norm: 0.9019193848792334, iteration: 143449
loss: 1.0144274234771729,grad_norm: 0.8628532484490732, iteration: 143450
loss: 0.9912563562393188,grad_norm: 0.8147603724610629, iteration: 143451
loss: 1.0457329750061035,grad_norm: 0.9973889632443801, iteration: 143452
loss: 1.0141899585723877,grad_norm: 0.9999995498360567, iteration: 143453
loss: 0.9780544638633728,grad_norm: 0.9259513590232462, iteration: 143454
loss: 1.0548661947250366,grad_norm: 0.9999999794021013, iteration: 143455
loss: 1.00858473777771,grad_norm: 0.9506092047818271, iteration: 143456
loss: 1.1148062944412231,grad_norm: 0.8945692538473461, iteration: 143457
loss: 0.9791756272315979,grad_norm: 0.999999630750043, iteration: 143458
loss: 1.0845105648040771,grad_norm: 0.9999991429885016, iteration: 143459
loss: 0.9530103206634521,grad_norm: 0.9999990340510315, iteration: 143460
loss: 1.18068528175354,grad_norm: 0.9999994710228507, iteration: 143461
loss: 1.0360931158065796,grad_norm: 0.9999998491054839, iteration: 143462
loss: 1.0816088914871216,grad_norm: 0.9999993264771705, iteration: 143463
loss: 0.9602669477462769,grad_norm: 0.9999990945963255, iteration: 143464
loss: 1.032633900642395,grad_norm: 0.8798668372350978, iteration: 143465
loss: 1.0125867128372192,grad_norm: 0.9999990686743061, iteration: 143466
loss: 0.9997509717941284,grad_norm: 0.9999992434214672, iteration: 143467
loss: 1.0441783666610718,grad_norm: 0.9999991545924858, iteration: 143468
loss: 0.9999843239784241,grad_norm: 0.8782730914727434, iteration: 143469
loss: 1.028488039970398,grad_norm: 0.999999833504263, iteration: 143470
loss: 1.030232548713684,grad_norm: 0.9999994246934456, iteration: 143471
loss: 1.1503771543502808,grad_norm: 0.9999999541157608, iteration: 143472
loss: 1.2810343503952026,grad_norm: 0.9999998009837545, iteration: 143473
loss: 0.9805060625076294,grad_norm: 0.9999991362260738, iteration: 143474
loss: 1.0221312046051025,grad_norm: 0.999999532792134, iteration: 143475
loss: 0.9912539124488831,grad_norm: 0.8080810857840248, iteration: 143476
loss: 0.9828543066978455,grad_norm: 0.9999997968154319, iteration: 143477
loss: 1.1641370058059692,grad_norm: 0.9999995341823293, iteration: 143478
loss: 0.9909661412239075,grad_norm: 0.999999284956801, iteration: 143479
loss: 0.9883772730827332,grad_norm: 0.999999105254459, iteration: 143480
loss: 1.028145670890808,grad_norm: 0.8497311483805989, iteration: 143481
loss: 1.068405270576477,grad_norm: 0.9999993105736401, iteration: 143482
loss: 1.0443137884140015,grad_norm: 0.999999265047907, iteration: 143483
loss: 1.1464934349060059,grad_norm: 0.9999995114945477, iteration: 143484
loss: 0.9765975475311279,grad_norm: 0.8603309430381759, iteration: 143485
loss: 1.0709303617477417,grad_norm: 0.9999998853152066, iteration: 143486
loss: 1.0848640203475952,grad_norm: 0.9999995600700395, iteration: 143487
loss: 0.9503602385520935,grad_norm: 0.9999995986955923, iteration: 143488
loss: 1.4500337839126587,grad_norm: 0.9999999266993836, iteration: 143489
loss: 1.1124323606491089,grad_norm: 0.9999993956772522, iteration: 143490
loss: 1.2327914237976074,grad_norm: 0.9999998817721562, iteration: 143491
loss: 1.2844008207321167,grad_norm: 0.999999623571537, iteration: 143492
loss: 1.0037394762039185,grad_norm: 0.9999998253514693, iteration: 143493
loss: 1.0072288513183594,grad_norm: 0.9999992715289336, iteration: 143494
loss: 1.000644564628601,grad_norm: 0.9999999126710764, iteration: 143495
loss: 1.2614731788635254,grad_norm: 1.0000000823010622, iteration: 143496
loss: 1.404309868812561,grad_norm: 0.9999996041348335, iteration: 143497
loss: 1.2480127811431885,grad_norm: 0.999999479483558, iteration: 143498
loss: 1.0254801511764526,grad_norm: 0.9999992047407145, iteration: 143499
loss: 1.083984136581421,grad_norm: 0.9999994923675044, iteration: 143500
loss: 1.1512993574142456,grad_norm: 0.9999999248664979, iteration: 143501
loss: 1.0723172426223755,grad_norm: 0.9999997988967994, iteration: 143502
loss: 1.207810878753662,grad_norm: 0.9999997467881171, iteration: 143503
loss: 1.0129376649856567,grad_norm: 0.9999992259271651, iteration: 143504
loss: 1.1479525566101074,grad_norm: 0.9999995580721426, iteration: 143505
loss: 1.015756607055664,grad_norm: 0.9999993983247939, iteration: 143506
loss: 1.1839436292648315,grad_norm: 0.9999998670671993, iteration: 143507
loss: 1.1747254133224487,grad_norm: 0.9999999559511454, iteration: 143508
loss: 1.0394760370254517,grad_norm: 0.9999992307854147, iteration: 143509
loss: 1.0210663080215454,grad_norm: 0.9999991901536219, iteration: 143510
loss: 1.112265706062317,grad_norm: 0.9999999627730142, iteration: 143511
loss: 1.0216047763824463,grad_norm: 0.9999990312115598, iteration: 143512
loss: 1.031022310256958,grad_norm: 0.9999990709016587, iteration: 143513
loss: 0.9818466901779175,grad_norm: 0.999999141430174, iteration: 143514
loss: 1.03121817111969,grad_norm: 0.99999916971208, iteration: 143515
loss: 1.0931295156478882,grad_norm: 0.9999993425681657, iteration: 143516
loss: 1.0195215940475464,grad_norm: 0.9999998337000364, iteration: 143517
loss: 0.9846779108047485,grad_norm: 0.9999990573572547, iteration: 143518
loss: 0.9980776906013489,grad_norm: 0.9687582611860885, iteration: 143519
loss: 1.0634956359863281,grad_norm: 0.9578388467947124, iteration: 143520
loss: 0.9958532452583313,grad_norm: 0.9021028709644358, iteration: 143521
loss: 1.038173794746399,grad_norm: 0.9999999048972656, iteration: 143522
loss: 1.0022021532058716,grad_norm: 0.9792388615873748, iteration: 143523
loss: 1.0576634407043457,grad_norm: 0.9999997607066877, iteration: 143524
loss: 0.9941696524620056,grad_norm: 0.9999991021218578, iteration: 143525
loss: 1.0234421491622925,grad_norm: 0.9999991196025773, iteration: 143526
loss: 1.0109169483184814,grad_norm: 0.9999992890760663, iteration: 143527
loss: 0.9846742153167725,grad_norm: 0.8693017778311405, iteration: 143528
loss: 0.9860702753067017,grad_norm: 0.9999991401668412, iteration: 143529
loss: 1.0293761491775513,grad_norm: 0.9999992705314087, iteration: 143530
loss: 0.9906745553016663,grad_norm: 0.9999991860344379, iteration: 143531
loss: 1.0207902193069458,grad_norm: 0.9667246623683778, iteration: 143532
loss: 1.0075066089630127,grad_norm: 0.9999990823574996, iteration: 143533
loss: 1.0068360567092896,grad_norm: 0.9999994310913678, iteration: 143534
loss: 0.9652902483940125,grad_norm: 0.8743167092286245, iteration: 143535
loss: 1.11317777633667,grad_norm: 0.9999997892183454, iteration: 143536
loss: 0.988739550113678,grad_norm: 0.9999992084355263, iteration: 143537
loss: 1.0331045389175415,grad_norm: 0.9999989047082521, iteration: 143538
loss: 1.0299992561340332,grad_norm: 0.9999990793245437, iteration: 143539
loss: 0.9941543936729431,grad_norm: 0.999999104972706, iteration: 143540
loss: 1.003463864326477,grad_norm: 0.9999991338417338, iteration: 143541
loss: 0.9838457703590393,grad_norm: 0.9999991188633799, iteration: 143542
loss: 1.0058220624923706,grad_norm: 0.9999990530040987, iteration: 143543
loss: 1.0011261701583862,grad_norm: 0.9999990911329292, iteration: 143544
loss: 1.0314533710479736,grad_norm: 0.9764882899046582, iteration: 143545
loss: 1.002232313156128,grad_norm: 0.9999991583603438, iteration: 143546
loss: 0.9853298664093018,grad_norm: 0.9999991749681867, iteration: 143547
loss: 0.9939954876899719,grad_norm: 0.9999991061909768, iteration: 143548
loss: 1.0307528972625732,grad_norm: 0.999998988085678, iteration: 143549
loss: 1.0610181093215942,grad_norm: 0.9999991988519958, iteration: 143550
loss: 0.9955935478210449,grad_norm: 0.9999990610702775, iteration: 143551
loss: 0.9621096253395081,grad_norm: 0.9999991323408577, iteration: 143552
loss: 1.1751890182495117,grad_norm: 0.9999992883218998, iteration: 143553
loss: 1.0062670707702637,grad_norm: 0.9999992198556801, iteration: 143554
loss: 0.9608659744262695,grad_norm: 0.9774596595956735, iteration: 143555
loss: 1.0189485549926758,grad_norm: 0.9999990800678754, iteration: 143556
loss: 1.019054889678955,grad_norm: 0.9999990221973446, iteration: 143557
loss: 0.9743024706840515,grad_norm: 0.8835339091492276, iteration: 143558
loss: 1.0264499187469482,grad_norm: 0.9155411836166438, iteration: 143559
loss: 0.9959278106689453,grad_norm: 0.8267235417473078, iteration: 143560
loss: 0.9951480031013489,grad_norm: 0.9151718263453541, iteration: 143561
loss: 0.9763407707214355,grad_norm: 0.9999989930053191, iteration: 143562
loss: 1.0343047380447388,grad_norm: 0.9999990863063771, iteration: 143563
loss: 1.029470682144165,grad_norm: 0.9999991526907364, iteration: 143564
loss: 1.021822214126587,grad_norm: 0.9999991485499692, iteration: 143565
loss: 0.9959481954574585,grad_norm: 0.9808744296002319, iteration: 143566
loss: 1.011457085609436,grad_norm: 0.9999990232044743, iteration: 143567
loss: 0.9592017531394958,grad_norm: 0.9661556254093998, iteration: 143568
loss: 0.9952498078346252,grad_norm: 0.9999992822834463, iteration: 143569
loss: 0.9939643144607544,grad_norm: 0.9639043666516199, iteration: 143570
loss: 0.9947733283042908,grad_norm: 0.9999991687804977, iteration: 143571
loss: 1.0004500150680542,grad_norm: 0.9308245825202385, iteration: 143572
loss: 1.034311056137085,grad_norm: 0.9999990377460642, iteration: 143573
loss: 0.9726352691650391,grad_norm: 0.9999991810637832, iteration: 143574
loss: 0.9944059252738953,grad_norm: 0.9999991922244895, iteration: 143575
loss: 1.02811861038208,grad_norm: 0.9999990510621689, iteration: 143576
loss: 1.0119235515594482,grad_norm: 0.9195999653763259, iteration: 143577
loss: 1.0284796953201294,grad_norm: 0.9385677029767201, iteration: 143578
loss: 0.9830523729324341,grad_norm: 0.9999990207165278, iteration: 143579
loss: 0.9895662665367126,grad_norm: 0.9999992151236748, iteration: 143580
loss: 1.013311743736267,grad_norm: 0.999999097204027, iteration: 143581
loss: 1.0115095376968384,grad_norm: 0.9999991011481936, iteration: 143582
loss: 1.0017967224121094,grad_norm: 0.9645471915859741, iteration: 143583
loss: 0.9999052286148071,grad_norm: 0.9999990786897737, iteration: 143584
loss: 0.9715510606765747,grad_norm: 0.9003416934888167, iteration: 143585
loss: 1.0127030611038208,grad_norm: 0.9999991349828949, iteration: 143586
loss: 1.0053898096084595,grad_norm: 0.9999992010696949, iteration: 143587
loss: 0.9920440316200256,grad_norm: 0.9999991486185843, iteration: 143588
loss: 0.9785045385360718,grad_norm: 0.9999991380164567, iteration: 143589
loss: 0.9606286287307739,grad_norm: 0.9999991729266755, iteration: 143590
loss: 1.0054506063461304,grad_norm: 0.9641078790459194, iteration: 143591
loss: 1.0117698907852173,grad_norm: 0.9131615159519245, iteration: 143592
loss: 0.9889634251594543,grad_norm: 0.9389333345694808, iteration: 143593
loss: 0.9847877621650696,grad_norm: 0.9544268390544519, iteration: 143594
loss: 0.9995831251144409,grad_norm: 0.9846082811249867, iteration: 143595
loss: 1.0485652685165405,grad_norm: 0.9999991695495153, iteration: 143596
loss: 1.0408142805099487,grad_norm: 0.9999991476761283, iteration: 143597
loss: 0.9796075820922852,grad_norm: 0.9999989952880247, iteration: 143598
loss: 0.9716349840164185,grad_norm: 0.9999996447386675, iteration: 143599
loss: 0.9850837588310242,grad_norm: 0.9132949780837591, iteration: 143600
loss: 0.9985937476158142,grad_norm: 0.9999991422013991, iteration: 143601
loss: 0.9925884008407593,grad_norm: 0.8615254658119059, iteration: 143602
loss: 1.002570390701294,grad_norm: 0.9999992988938234, iteration: 143603
loss: 1.0250993967056274,grad_norm: 0.9989772032469585, iteration: 143604
loss: 1.041609525680542,grad_norm: 0.9999992793368115, iteration: 143605
loss: 1.0151711702346802,grad_norm: 0.9999990590954942, iteration: 143606
loss: 0.971470832824707,grad_norm: 0.9243475899631541, iteration: 143607
loss: 1.0064481496810913,grad_norm: 0.9999991375270353, iteration: 143608
loss: 0.9774430990219116,grad_norm: 0.999999135856521, iteration: 143609
loss: 1.0093733072280884,grad_norm: 0.9524493109568979, iteration: 143610
loss: 1.0111323595046997,grad_norm: 0.9999994348090385, iteration: 143611
loss: 1.028218388557434,grad_norm: 0.9999992519061419, iteration: 143612
loss: 1.0160412788391113,grad_norm: 0.9999996392785278, iteration: 143613
loss: 1.0300555229187012,grad_norm: 0.9999991432014413, iteration: 143614
loss: 0.991807222366333,grad_norm: 0.9999992369693064, iteration: 143615
loss: 1.070314884185791,grad_norm: 0.9999994709952028, iteration: 143616
loss: 0.971393346786499,grad_norm: 0.9784137283944078, iteration: 143617
loss: 0.992836058139801,grad_norm: 0.9420950544921513, iteration: 143618
loss: 1.0260264873504639,grad_norm: 0.9999991975272641, iteration: 143619
loss: 0.9613438844680786,grad_norm: 0.9999991877161377, iteration: 143620
loss: 0.9993057250976562,grad_norm: 0.9999991272474547, iteration: 143621
loss: 1.0240353345870972,grad_norm: 0.7723339872426553, iteration: 143622
loss: 0.995972216129303,grad_norm: 0.975942844543198, iteration: 143623
loss: 1.0152461528778076,grad_norm: 0.9999992476862786, iteration: 143624
loss: 1.0010963678359985,grad_norm: 0.951113502566827, iteration: 143625
loss: 1.008915662765503,grad_norm: 0.9526822544745852, iteration: 143626
loss: 1.017958402633667,grad_norm: 0.9999991095699476, iteration: 143627
loss: 1.038058876991272,grad_norm: 0.9999990904222295, iteration: 143628
loss: 0.9989703297615051,grad_norm: 0.8745004417229321, iteration: 143629
loss: 0.9957536458969116,grad_norm: 0.8412015936832759, iteration: 143630
loss: 0.998953640460968,grad_norm: 0.9533408378227235, iteration: 143631
loss: 0.988585352897644,grad_norm: 0.9999992222081949, iteration: 143632
loss: 0.989833652973175,grad_norm: 0.9991794328099652, iteration: 143633
loss: 1.0017284154891968,grad_norm: 0.999999470406762, iteration: 143634
loss: 1.0252724885940552,grad_norm: 0.9999990607989953, iteration: 143635
loss: 1.0498430728912354,grad_norm: 0.9999997951801751, iteration: 143636
loss: 1.017396092414856,grad_norm: 0.9999992517665253, iteration: 143637
loss: 0.9838230013847351,grad_norm: 0.9701378865579504, iteration: 143638
loss: 1.016685962677002,grad_norm: 0.9999997874944127, iteration: 143639
loss: 1.011732578277588,grad_norm: 0.9999998706436667, iteration: 143640
loss: 0.9926595687866211,grad_norm: 0.9208955674526841, iteration: 143641
loss: 0.9837505221366882,grad_norm: 0.9682840983703155, iteration: 143642
loss: 1.0508712530136108,grad_norm: 0.9999995629858353, iteration: 143643
loss: 0.9570047855377197,grad_norm: 0.885446527642383, iteration: 143644
loss: 1.060861349105835,grad_norm: 0.8885643768611959, iteration: 143645
loss: 1.0183520317077637,grad_norm: 0.9999990610982769, iteration: 143646
loss: 1.0362340211868286,grad_norm: 0.9347871599536687, iteration: 143647
loss: 0.9990419149398804,grad_norm: 0.9124998912729999, iteration: 143648
loss: 0.9844726920127869,grad_norm: 0.999999319028364, iteration: 143649
loss: 1.0015413761138916,grad_norm: 0.9405006344564237, iteration: 143650
loss: 1.0112974643707275,grad_norm: 0.9999992782336818, iteration: 143651
loss: 0.9822171926498413,grad_norm: 0.9999992418662759, iteration: 143652
loss: 0.9683020710945129,grad_norm: 0.9999991377027364, iteration: 143653
loss: 1.0277197360992432,grad_norm: 0.999999215430298, iteration: 143654
loss: 1.0458050966262817,grad_norm: 0.999999172791022, iteration: 143655
loss: 1.0571964979171753,grad_norm: 0.9999997083370992, iteration: 143656
loss: 1.0295788049697876,grad_norm: 0.9999994465817488, iteration: 143657
loss: 1.070621371269226,grad_norm: 0.9999991122159348, iteration: 143658
loss: 1.0869654417037964,grad_norm: 0.9999996907668882, iteration: 143659
loss: 0.9915562868118286,grad_norm: 0.9999992384976076, iteration: 143660
loss: 0.9909456968307495,grad_norm: 0.9887723712321451, iteration: 143661
loss: 0.9804908037185669,grad_norm: 0.9999989620569331, iteration: 143662
loss: 1.0581313371658325,grad_norm: 0.9999990668707046, iteration: 143663
loss: 1.1425261497497559,grad_norm: 0.9999998424923413, iteration: 143664
loss: 0.9645009636878967,grad_norm: 0.9999991710987707, iteration: 143665
loss: 0.9861754775047302,grad_norm: 0.966981093203414, iteration: 143666
loss: 0.983788251876831,grad_norm: 0.9999991858494564, iteration: 143667
loss: 0.9931071996688843,grad_norm: 0.9955142852614024, iteration: 143668
loss: 1.0172775983810425,grad_norm: 0.8985754913782916, iteration: 143669
loss: 1.019885778427124,grad_norm: 0.9999993118777631, iteration: 143670
loss: 1.004186987876892,grad_norm: 0.9444937966775868, iteration: 143671
loss: 0.9661293625831604,grad_norm: 0.9999990958365502, iteration: 143672
loss: 1.0166856050491333,grad_norm: 0.8752169287961783, iteration: 143673
loss: 1.2205541133880615,grad_norm: 0.9999995443057708, iteration: 143674
loss: 0.9992683529853821,grad_norm: 0.9999990436228404, iteration: 143675
loss: 1.0120083093643188,grad_norm: 0.9999995858681887, iteration: 143676
loss: 0.9790861010551453,grad_norm: 0.99999927171394, iteration: 143677
loss: 1.0225825309753418,grad_norm: 0.9999991308221265, iteration: 143678
loss: 1.021458387374878,grad_norm: 0.9999991927954962, iteration: 143679
loss: 0.9843458533287048,grad_norm: 0.9682096675941565, iteration: 143680
loss: 0.9955717921257019,grad_norm: 0.9713309058551783, iteration: 143681
loss: 0.9913560748100281,grad_norm: 0.9999991840883291, iteration: 143682
loss: 1.0221861600875854,grad_norm: 0.9999997917256663, iteration: 143683
loss: 1.0038137435913086,grad_norm: 0.9999992365760954, iteration: 143684
loss: 1.0127698183059692,grad_norm: 0.9999992209421661, iteration: 143685
loss: 1.0048465728759766,grad_norm: 0.9261982076017592, iteration: 143686
loss: 0.9698449373245239,grad_norm: 0.9358869503335093, iteration: 143687
loss: 1.0146490335464478,grad_norm: 0.9999994436398417, iteration: 143688
loss: 1.0095361471176147,grad_norm: 0.9999992153291557, iteration: 143689
loss: 1.0397075414657593,grad_norm: 0.9999991522776204, iteration: 143690
loss: 1.0111150741577148,grad_norm: 0.9999991878601753, iteration: 143691
loss: 1.0409748554229736,grad_norm: 0.9999990890198613, iteration: 143692
loss: 1.0190472602844238,grad_norm: 0.9999991329853285, iteration: 143693
loss: 0.9861879944801331,grad_norm: 0.995219373736689, iteration: 143694
loss: 1.0048730373382568,grad_norm: 0.9206598690426525, iteration: 143695
loss: 0.9942328333854675,grad_norm: 0.9275620891136991, iteration: 143696
loss: 0.9761388301849365,grad_norm: 0.9969961667287482, iteration: 143697
loss: 0.9570521712303162,grad_norm: 0.9999994312072157, iteration: 143698
loss: 1.0441312789916992,grad_norm: 0.9999990823551536, iteration: 143699
loss: 1.018925428390503,grad_norm: 0.9999992119213303, iteration: 143700
loss: 0.9995505809783936,grad_norm: 0.9999992951132405, iteration: 143701
loss: 1.021864414215088,grad_norm: 0.9999992011103165, iteration: 143702
loss: 1.1275850534439087,grad_norm: 0.9999998555431344, iteration: 143703
loss: 0.9998273849487305,grad_norm: 0.9999988771905054, iteration: 143704
loss: 1.0296831130981445,grad_norm: 0.9999990901447561, iteration: 143705
loss: 0.96877121925354,grad_norm: 0.9999990134796353, iteration: 143706
loss: 0.9712697267532349,grad_norm: 0.9999991223361757, iteration: 143707
loss: 1.0159629583358765,grad_norm: 0.999999196315288, iteration: 143708
loss: 1.1174043416976929,grad_norm: 0.9999996556281794, iteration: 143709
loss: 1.038159728050232,grad_norm: 0.9999991408025432, iteration: 143710
loss: 0.9666374325752258,grad_norm: 0.9382111436699339, iteration: 143711
loss: 1.0151214599609375,grad_norm: 0.9999995142709561, iteration: 143712
loss: 0.9784585237503052,grad_norm: 0.9999991057191546, iteration: 143713
loss: 1.0099678039550781,grad_norm: 0.8834070024446666, iteration: 143714
loss: 1.011348843574524,grad_norm: 0.9999991214813027, iteration: 143715
loss: 1.0240298509597778,grad_norm: 0.9999992967260368, iteration: 143716
loss: 0.9977886080741882,grad_norm: 0.9999990821214028, iteration: 143717
loss: 1.0382541418075562,grad_norm: 0.958007943853967, iteration: 143718
loss: 1.0135283470153809,grad_norm: 0.9999993611398774, iteration: 143719
loss: 0.9788849949836731,grad_norm: 0.9999990649122605, iteration: 143720
loss: 1.0113246440887451,grad_norm: 0.7894344329965217, iteration: 143721
loss: 1.0247377157211304,grad_norm: 0.9999993489497382, iteration: 143722
loss: 0.9744992852210999,grad_norm: 0.9999991739703954, iteration: 143723
loss: 1.1110568046569824,grad_norm: 0.9999996474397254, iteration: 143724
loss: 1.05355703830719,grad_norm: 0.9662937038180333, iteration: 143725
loss: 0.9913622736930847,grad_norm: 0.9957136260214705, iteration: 143726
loss: 1.036519169807434,grad_norm: 0.9999994938004972, iteration: 143727
loss: 1.0638889074325562,grad_norm: 0.9999996027363228, iteration: 143728
loss: 1.000204086303711,grad_norm: 0.9999997684334082, iteration: 143729
loss: 1.0520269870758057,grad_norm: 0.9999997318736491, iteration: 143730
loss: 1.042462706565857,grad_norm: 0.9999992750580405, iteration: 143731
loss: 1.0158361196517944,grad_norm: 0.999999016885885, iteration: 143732
loss: 1.05745530128479,grad_norm: 0.9999990251963919, iteration: 143733
loss: 1.0099022388458252,grad_norm: 0.9999990949864088, iteration: 143734
loss: 1.0151915550231934,grad_norm: 0.9697093698548369, iteration: 143735
loss: 0.9960161447525024,grad_norm: 0.9216689212818313, iteration: 143736
loss: 0.97449791431427,grad_norm: 0.9224666663379215, iteration: 143737
loss: 0.9751133322715759,grad_norm: 0.9999990237163195, iteration: 143738
loss: 0.990700364112854,grad_norm: 0.9999994331905022, iteration: 143739
loss: 1.0159202814102173,grad_norm: 0.9999991305890101, iteration: 143740
loss: 0.9914623498916626,grad_norm: 0.9640679080220184, iteration: 143741
loss: 1.0090522766113281,grad_norm: 0.99999900810056, iteration: 143742
loss: 1.034157395362854,grad_norm: 0.9999990178306871, iteration: 143743
loss: 0.9959025979042053,grad_norm: 0.9362049010641429, iteration: 143744
loss: 1.3416908979415894,grad_norm: 0.9999998491722923, iteration: 143745
loss: 1.0588600635528564,grad_norm: 0.9999991849776227, iteration: 143746
loss: 1.058558464050293,grad_norm: 0.9999993485675631, iteration: 143747
loss: 1.0357334613800049,grad_norm: 0.9999991345919789, iteration: 143748
loss: 1.025417685508728,grad_norm: 0.9999994655411085, iteration: 143749
loss: 1.0467665195465088,grad_norm: 0.9999996043673197, iteration: 143750
loss: 0.966088056564331,grad_norm: 0.747510923356109, iteration: 143751
loss: 1.006427526473999,grad_norm: 0.9162500950231359, iteration: 143752
loss: 1.0144942998886108,grad_norm: 0.8542248177724359, iteration: 143753
loss: 1.0817238092422485,grad_norm: 0.9373214851150719, iteration: 143754
loss: 1.0256004333496094,grad_norm: 0.9219349697666346, iteration: 143755
loss: 0.9745275974273682,grad_norm: 0.997047699916004, iteration: 143756
loss: 1.0220352411270142,grad_norm: 0.9255008970216745, iteration: 143757
loss: 0.9945962429046631,grad_norm: 0.9999991222145286, iteration: 143758
loss: 1.2335515022277832,grad_norm: 0.9999999795419172, iteration: 143759
loss: 1.2560102939605713,grad_norm: 0.9999997515034497, iteration: 143760
loss: 1.1543028354644775,grad_norm: 0.9999995351457016, iteration: 143761
loss: 1.0003007650375366,grad_norm: 0.999999048067655, iteration: 143762
loss: 1.0319534540176392,grad_norm: 0.9999998878943764, iteration: 143763
loss: 1.031903624534607,grad_norm: 0.9999990599057156, iteration: 143764
loss: 0.9770081639289856,grad_norm: 0.9999996548895346, iteration: 143765
loss: 1.0075199604034424,grad_norm: 0.8597860045137647, iteration: 143766
loss: 1.0386302471160889,grad_norm: 0.999999068553104, iteration: 143767
loss: 0.9826906323432922,grad_norm: 0.9999990831639483, iteration: 143768
loss: 0.9987538456916809,grad_norm: 0.9546507378293344, iteration: 143769
loss: 0.9994217753410339,grad_norm: 0.9650235566794657, iteration: 143770
loss: 0.9876807332038879,grad_norm: 0.9555532293726261, iteration: 143771
loss: 1.0055032968521118,grad_norm: 0.9999990755868348, iteration: 143772
loss: 1.0015816688537598,grad_norm: 0.9999990894807362, iteration: 143773
loss: 1.0915700197219849,grad_norm: 0.9999995068960225, iteration: 143774
loss: 1.0288785696029663,grad_norm: 0.9999991425698582, iteration: 143775
loss: 1.0087662935256958,grad_norm: 0.9999991315629709, iteration: 143776
loss: 1.0023661851882935,grad_norm: 0.9999992115630199, iteration: 143777
loss: 1.084926724433899,grad_norm: 0.9999992358974036, iteration: 143778
loss: 1.0869358777999878,grad_norm: 0.9999994930008367, iteration: 143779
loss: 1.0620853900909424,grad_norm: 0.9999991418295743, iteration: 143780
loss: 1.0014172792434692,grad_norm: 0.9999992971480286, iteration: 143781
loss: 1.0267753601074219,grad_norm: 0.9999991950959252, iteration: 143782
loss: 0.9676193594932556,grad_norm: 0.885400266898884, iteration: 143783
loss: 0.9881229996681213,grad_norm: 0.918999655874197, iteration: 143784
loss: 1.0404999256134033,grad_norm: 0.9999996046933969, iteration: 143785
loss: 0.9920870661735535,grad_norm: 0.9293256375704337, iteration: 143786
loss: 1.11326265335083,grad_norm: 0.9738453931312964, iteration: 143787
loss: 1.0181605815887451,grad_norm: 0.8491651759454745, iteration: 143788
loss: 0.9920465350151062,grad_norm: 0.986425397309847, iteration: 143789
loss: 1.176571011543274,grad_norm: 0.9999996037629696, iteration: 143790
loss: 1.015884518623352,grad_norm: 0.8682069902060388, iteration: 143791
loss: 1.0234968662261963,grad_norm: 0.999999289780385, iteration: 143792
loss: 1.3591941595077515,grad_norm: 0.9999999607482063, iteration: 143793
loss: 1.0100033283233643,grad_norm: 0.9999989824252741, iteration: 143794
loss: 0.9842401146888733,grad_norm: 0.9999989403790824, iteration: 143795
loss: 1.157425045967102,grad_norm: 0.9999997720540977, iteration: 143796
loss: 0.9744274616241455,grad_norm: 0.9999991628015671, iteration: 143797
loss: 1.017024278640747,grad_norm: 0.9999991322122647, iteration: 143798
loss: 1.0624102354049683,grad_norm: 0.9999996484115639, iteration: 143799
loss: 0.9826991558074951,grad_norm: 0.8476004632340632, iteration: 143800
loss: 0.9833587408065796,grad_norm: 0.9217272184205695, iteration: 143801
loss: 1.0195609331130981,grad_norm: 0.9999992766749218, iteration: 143802
loss: 0.9809131026268005,grad_norm: 0.9315084156297844, iteration: 143803
loss: 0.9842807054519653,grad_norm: 0.9999991366735128, iteration: 143804
loss: 0.9902698993682861,grad_norm: 0.9073151888161624, iteration: 143805
loss: 0.995367169380188,grad_norm: 0.9085970389414264, iteration: 143806
loss: 1.0699167251586914,grad_norm: 0.9999995841169628, iteration: 143807
loss: 1.05404794216156,grad_norm: 0.99999923981214, iteration: 143808
loss: 1.067909836769104,grad_norm: 0.9999994403205503, iteration: 143809
loss: 0.9995316863059998,grad_norm: 0.9999990849550826, iteration: 143810
loss: 0.9766461253166199,grad_norm: 0.9999991251897461, iteration: 143811
loss: 1.0613921880722046,grad_norm: 0.99999917894791, iteration: 143812
loss: 1.0045231580734253,grad_norm: 0.971395656482197, iteration: 143813
loss: 1.0795376300811768,grad_norm: 0.9999995974288397, iteration: 143814
loss: 1.0975111722946167,grad_norm: 0.9999996003444747, iteration: 143815
loss: 1.0196958780288696,grad_norm: 0.9999991285974023, iteration: 143816
loss: 1.0284464359283447,grad_norm: 0.9999993752811024, iteration: 143817
loss: 1.007928729057312,grad_norm: 0.9442794024663871, iteration: 143818
loss: 1.129603385925293,grad_norm: 0.9999998608951428, iteration: 143819
loss: 1.2831907272338867,grad_norm: 0.9999997033400357, iteration: 143820
loss: 1.0278931856155396,grad_norm: 0.9999991938753563, iteration: 143821
loss: 1.0075370073318481,grad_norm: 0.9999991240177188, iteration: 143822
loss: 1.0091041326522827,grad_norm: 0.9999991335779209, iteration: 143823
loss: 1.0336414575576782,grad_norm: 0.9999992913474604, iteration: 143824
loss: 1.01265549659729,grad_norm: 0.999999261370564, iteration: 143825
loss: 0.9981077313423157,grad_norm: 0.9999990245286499, iteration: 143826
loss: 0.9830774068832397,grad_norm: 0.9999990511596495, iteration: 143827
loss: 0.9756425619125366,grad_norm: 0.9440284577729158, iteration: 143828
loss: 1.0030717849731445,grad_norm: 0.999999084900661, iteration: 143829
loss: 1.0838969945907593,grad_norm: 0.9999993669754547, iteration: 143830
loss: 1.1195712089538574,grad_norm: 0.9999997887370651, iteration: 143831
loss: 0.9803708791732788,grad_norm: 0.9086865701990696, iteration: 143832
loss: 1.3246592283248901,grad_norm: 0.9999998964582906, iteration: 143833
loss: 1.18130362033844,grad_norm: 0.999999761611932, iteration: 143834
loss: 0.9898194074630737,grad_norm: 0.9999990952120685, iteration: 143835
loss: 1.0513463020324707,grad_norm: 0.9999991389472872, iteration: 143836
loss: 1.0161840915679932,grad_norm: 0.9999990640573766, iteration: 143837
loss: 0.9989842176437378,grad_norm: 0.9999991727149443, iteration: 143838
loss: 1.053019404411316,grad_norm: 0.9999993220980328, iteration: 143839
loss: 1.069529414176941,grad_norm: 0.9999990441012635, iteration: 143840
loss: 0.9938844442367554,grad_norm: 0.9999994743232761, iteration: 143841
loss: 1.0283794403076172,grad_norm: 0.9999990580228769, iteration: 143842
loss: 0.9960347414016724,grad_norm: 0.9999993197891794, iteration: 143843
loss: 1.028140902519226,grad_norm: 0.9999992299074909, iteration: 143844
loss: 1.0779203176498413,grad_norm: 0.9999995234122813, iteration: 143845
loss: 1.0079786777496338,grad_norm: 0.999999729326602, iteration: 143846
loss: 0.9994996786117554,grad_norm: 0.9999991905111548, iteration: 143847
loss: 0.9822315573692322,grad_norm: 0.9999995926695479, iteration: 143848
loss: 1.0028666257858276,grad_norm: 0.9999991212524634, iteration: 143849
loss: 1.030285120010376,grad_norm: 0.9999997297601902, iteration: 143850
loss: 1.0088434219360352,grad_norm: 0.9999999253255362, iteration: 143851
loss: 1.015980839729309,grad_norm: 0.9999991396163516, iteration: 143852
loss: 0.9931142330169678,grad_norm: 0.9070826558268593, iteration: 143853
loss: 0.9948161840438843,grad_norm: 0.9999991251749396, iteration: 143854
loss: 1.003714680671692,grad_norm: 0.8920378162190148, iteration: 143855
loss: 1.0121151208877563,grad_norm: 0.9999991613235354, iteration: 143856
loss: 1.0087203979492188,grad_norm: 0.9999992812064736, iteration: 143857
loss: 1.0157248973846436,grad_norm: 0.7562972836986818, iteration: 143858
loss: 1.018742561340332,grad_norm: 0.9999993765282077, iteration: 143859
loss: 0.9892295002937317,grad_norm: 0.9999994545236759, iteration: 143860
loss: 1.0053757429122925,grad_norm: 0.843808678745373, iteration: 143861
loss: 1.0600676536560059,grad_norm: 0.9999996665232191, iteration: 143862
loss: 0.9856579303741455,grad_norm: 0.9999992638137881, iteration: 143863
loss: 0.9703277349472046,grad_norm: 0.9999991765487617, iteration: 143864
loss: 1.0753368139266968,grad_norm: 0.9999991899143128, iteration: 143865
loss: 1.087092638015747,grad_norm: 0.9999998543723676, iteration: 143866
loss: 1.2031292915344238,grad_norm: 0.9999992602228405, iteration: 143867
loss: 1.0830669403076172,grad_norm: 0.9999991935278675, iteration: 143868
loss: 1.0114320516586304,grad_norm: 0.964024637395554, iteration: 143869
loss: 1.0235711336135864,grad_norm: 0.9999991706940728, iteration: 143870
loss: 0.9937346577644348,grad_norm: 0.992757446611025, iteration: 143871
loss: 1.0356974601745605,grad_norm: 0.9999993233879255, iteration: 143872
loss: 0.9653751850128174,grad_norm: 0.9669069106400946, iteration: 143873
loss: 1.0847012996673584,grad_norm: 0.9999998741602073, iteration: 143874
loss: 1.0218803882598877,grad_norm: 0.9999990878367362, iteration: 143875
loss: 1.0904107093811035,grad_norm: 0.9999997011157746, iteration: 143876
loss: 1.0143123865127563,grad_norm: 0.9999990073488493, iteration: 143877
loss: 1.0221366882324219,grad_norm: 0.9999991034214777, iteration: 143878
loss: 1.0228978395462036,grad_norm: 0.9999996289245214, iteration: 143879
loss: 1.0221984386444092,grad_norm: 0.8373512974095738, iteration: 143880
loss: 0.9693660140037537,grad_norm: 0.9272602582872915, iteration: 143881
loss: 0.9625774621963501,grad_norm: 0.9999990435062991, iteration: 143882
loss: 0.982334554195404,grad_norm: 0.9999991284831783, iteration: 143883
loss: 1.0096861124038696,grad_norm: 0.9999990405667487, iteration: 143884
loss: 1.1134415864944458,grad_norm: 0.9999998044950764, iteration: 143885
loss: 1.0004812479019165,grad_norm: 0.9999991566036928, iteration: 143886
loss: 1.0834331512451172,grad_norm: 0.9999999257687309, iteration: 143887
loss: 1.0625072717666626,grad_norm: 0.999999833442649, iteration: 143888
loss: 1.000563144683838,grad_norm: 0.9502042405752456, iteration: 143889
loss: 0.9917373061180115,grad_norm: 0.9999992639429536, iteration: 143890
loss: 1.0671696662902832,grad_norm: 0.9999992727367378, iteration: 143891
loss: 1.0327680110931396,grad_norm: 0.8686920201478903, iteration: 143892
loss: 1.0998725891113281,grad_norm: 0.999999786575309, iteration: 143893
loss: 0.9634193181991577,grad_norm: 0.9999991727294063, iteration: 143894
loss: 1.0528489351272583,grad_norm: 0.9999993504823157, iteration: 143895
loss: 1.0069372653961182,grad_norm: 0.9999990981503991, iteration: 143896
loss: 1.006301999092102,grad_norm: 0.999998964529143, iteration: 143897
loss: 1.048166275024414,grad_norm: 0.9999994959526668, iteration: 143898
loss: 0.9844467639923096,grad_norm: 0.9142694227592963, iteration: 143899
loss: 1.0461089611053467,grad_norm: 0.8838129018620623, iteration: 143900
loss: 1.0004241466522217,grad_norm: 0.9653706716469239, iteration: 143901
loss: 1.0218760967254639,grad_norm: 0.9999993609441092, iteration: 143902
loss: 0.9871248006820679,grad_norm: 0.9999990388206007, iteration: 143903
loss: 1.0178570747375488,grad_norm: 0.977114413031025, iteration: 143904
loss: 0.986422061920166,grad_norm: 0.9325300062303729, iteration: 143905
loss: 1.0186169147491455,grad_norm: 0.9999996095223833, iteration: 143906
loss: 1.0385527610778809,grad_norm: 0.9999993729463533, iteration: 143907
loss: 0.9922551512718201,grad_norm: 0.8445780175973792, iteration: 143908
loss: 0.9641671776771545,grad_norm: 0.9934718736931414, iteration: 143909
loss: 1.032509207725525,grad_norm: 0.9999991038668012, iteration: 143910
loss: 1.0203475952148438,grad_norm: 0.9999997785754146, iteration: 143911
loss: 0.9967826008796692,grad_norm: 0.9999994715664358, iteration: 143912
loss: 1.0009621381759644,grad_norm: 0.9701183674662965, iteration: 143913
loss: 1.0240525007247925,grad_norm: 0.9999995173149946, iteration: 143914
loss: 1.1229350566864014,grad_norm: 0.9999997494185162, iteration: 143915
loss: 0.9857982993125916,grad_norm: 0.9999990909844686, iteration: 143916
loss: 1.1182392835617065,grad_norm: 0.9999995497598263, iteration: 143917
loss: 0.9631345868110657,grad_norm: 0.9999991363236217, iteration: 143918
loss: 0.9851804375648499,grad_norm: 0.999999066968154, iteration: 143919
loss: 1.0242077112197876,grad_norm: 0.9999995241825812, iteration: 143920
loss: 0.9889612793922424,grad_norm: 0.9039821090827361, iteration: 143921
loss: 1.0548404455184937,grad_norm: 0.9999994576449727, iteration: 143922
loss: 0.9953842163085938,grad_norm: 0.999999108575935, iteration: 143923
loss: 1.0281246900558472,grad_norm: 0.9999992289393564, iteration: 143924
loss: 1.015608310699463,grad_norm: 0.9999998644425833, iteration: 143925
loss: 1.023437261581421,grad_norm: 0.9932573905793726, iteration: 143926
loss: 1.0305579900741577,grad_norm: 0.9999997442091231, iteration: 143927
loss: 1.0062264204025269,grad_norm: 0.9999995428797283, iteration: 143928
loss: 0.9920011758804321,grad_norm: 0.9999991206463561, iteration: 143929
loss: 1.0775620937347412,grad_norm: 0.9999998302159655, iteration: 143930
loss: 1.5569119453430176,grad_norm: 0.9999999383534072, iteration: 143931
loss: 1.0625450611114502,grad_norm: 0.9999992081065561, iteration: 143932
loss: 1.0039432048797607,grad_norm: 0.9558201287621179, iteration: 143933
loss: 1.014767050743103,grad_norm: 0.999999174215773, iteration: 143934
loss: 0.9987654089927673,grad_norm: 0.9999992291962008, iteration: 143935
loss: 0.9954881072044373,grad_norm: 0.9860980340820981, iteration: 143936
loss: 1.0129715204238892,grad_norm: 0.9389286762327306, iteration: 143937
loss: 1.007449984550476,grad_norm: 0.9999990109854309, iteration: 143938
loss: 1.057320475578308,grad_norm: 0.962334748885319, iteration: 143939
loss: 1.0934401750564575,grad_norm: 0.9999998986219082, iteration: 143940
loss: 0.9742221236228943,grad_norm: 0.9999994479314228, iteration: 143941
loss: 1.0671019554138184,grad_norm: 0.9999998357342291, iteration: 143942
loss: 1.0130159854888916,grad_norm: 0.9790329897868513, iteration: 143943
loss: 1.0249385833740234,grad_norm: 0.9999992678689438, iteration: 143944
loss: 1.0069674253463745,grad_norm: 0.9999992234081246, iteration: 143945
loss: 1.015205979347229,grad_norm: 0.9999990097329671, iteration: 143946
loss: 0.9912721514701843,grad_norm: 0.9999990644707212, iteration: 143947
loss: 1.0028164386749268,grad_norm: 0.9999991850908826, iteration: 143948
loss: 1.0075583457946777,grad_norm: 0.9999992413302815, iteration: 143949
loss: 1.0405131578445435,grad_norm: 0.9999990625851968, iteration: 143950
loss: 1.0255448818206787,grad_norm: 0.9999991834418918, iteration: 143951
loss: 1.0155680179595947,grad_norm: 0.9999992766352287, iteration: 143952
loss: 0.9998846650123596,grad_norm: 0.9251533765055105, iteration: 143953
loss: 1.1444416046142578,grad_norm: 0.9999992252006596, iteration: 143954
loss: 1.0300016403198242,grad_norm: 0.9999992614387739, iteration: 143955
loss: 0.9990960955619812,grad_norm: 0.9999996481957423, iteration: 143956
loss: 0.9908310770988464,grad_norm: 0.932992615744773, iteration: 143957
loss: 0.9839646220207214,grad_norm: 0.9999991433779386, iteration: 143958
loss: 1.0561938285827637,grad_norm: 0.9999991057286662, iteration: 143959
loss: 1.0495964288711548,grad_norm: 0.999999072576868, iteration: 143960
loss: 1.0790749788284302,grad_norm: 0.9999998315147932, iteration: 143961
loss: 1.0074001550674438,grad_norm: 0.9999990863079782, iteration: 143962
loss: 1.0777506828308105,grad_norm: 0.999999078448775, iteration: 143963
loss: 1.362115740776062,grad_norm: 0.9999996237670457, iteration: 143964
loss: 1.019561529159546,grad_norm: 0.9823339574591412, iteration: 143965
loss: 1.0872732400894165,grad_norm: 0.9999991734025053, iteration: 143966
loss: 1.0746641159057617,grad_norm: 0.999999171607986, iteration: 143967
loss: 0.9969612956047058,grad_norm: 0.9854763847517217, iteration: 143968
loss: 1.0274596214294434,grad_norm: 0.9999991434485267, iteration: 143969
loss: 1.0164859294891357,grad_norm: 0.9999994214560224, iteration: 143970
loss: 0.9942020773887634,grad_norm: 0.999999506810065, iteration: 143971
loss: 1.0567065477371216,grad_norm: 0.9999994076978463, iteration: 143972
loss: 1.013559341430664,grad_norm: 0.9999995234556782, iteration: 143973
loss: 0.9739733338356018,grad_norm: 0.9611232480355669, iteration: 143974
loss: 0.9919652342796326,grad_norm: 0.999998962165007, iteration: 143975
loss: 0.9717848896980286,grad_norm: 0.9999991394583383, iteration: 143976
loss: 1.0611156225204468,grad_norm: 0.9999991890293815, iteration: 143977
loss: 0.9903052449226379,grad_norm: 0.927564046610091, iteration: 143978
loss: 1.0269837379455566,grad_norm: 0.9999997428648488, iteration: 143979
loss: 0.9957998394966125,grad_norm: 0.9244196311325521, iteration: 143980
loss: 0.9900179505348206,grad_norm: 0.9788464615939739, iteration: 143981
loss: 1.033171534538269,grad_norm: 0.999999836650007, iteration: 143982
loss: 0.9797369241714478,grad_norm: 0.9999990406631302, iteration: 143983
loss: 1.074933409690857,grad_norm: 0.9901093274539982, iteration: 143984
loss: 0.9909554123878479,grad_norm: 0.9999991229513511, iteration: 143985
loss: 1.0901987552642822,grad_norm: 0.9999991058505827, iteration: 143986
loss: 0.961214542388916,grad_norm: 0.9999990711917714, iteration: 143987
loss: 0.9936970472335815,grad_norm: 0.99999917392041, iteration: 143988
loss: 1.0905531644821167,grad_norm: 0.9999995950670562, iteration: 143989
loss: 1.050667643547058,grad_norm: 0.9999993460664387, iteration: 143990
loss: 1.0034016370773315,grad_norm: 0.9524048222584742, iteration: 143991
loss: 1.0180085897445679,grad_norm: 0.9999990872313513, iteration: 143992
loss: 0.9985463619232178,grad_norm: 0.9999992628523852, iteration: 143993
loss: 1.0199406147003174,grad_norm: 0.9999993979254627, iteration: 143994
loss: 1.0473867654800415,grad_norm: 0.9999991179902773, iteration: 143995
loss: 0.9815552830696106,grad_norm: 0.9999991031593306, iteration: 143996
loss: 1.0170196294784546,grad_norm: 0.9999995224497622, iteration: 143997
loss: 1.0251778364181519,grad_norm: 0.9377711861683011, iteration: 143998
loss: 1.1100609302520752,grad_norm: 0.9999991966854181, iteration: 143999
loss: 1.033453345298767,grad_norm: 0.9999993641391891, iteration: 144000
loss: 0.9755192399024963,grad_norm: 0.999999015146516, iteration: 144001
loss: 1.0097098350524902,grad_norm: 0.9999991219798284, iteration: 144002
loss: 1.0016453266143799,grad_norm: 0.999998945911989, iteration: 144003
loss: 0.9848811626434326,grad_norm: 0.9999991224856981, iteration: 144004
loss: 1.045318603515625,grad_norm: 0.8819430934510717, iteration: 144005
loss: 1.0002809762954712,grad_norm: 0.9999991443389645, iteration: 144006
loss: 1.0052070617675781,grad_norm: 0.9999990752528028, iteration: 144007
loss: 1.0042144060134888,grad_norm: 0.9999991346155518, iteration: 144008
loss: 1.0063642263412476,grad_norm: 0.9901209551578796, iteration: 144009
loss: 1.0366517305374146,grad_norm: 0.9999992883396872, iteration: 144010
loss: 1.1246083974838257,grad_norm: 0.9999996817100731, iteration: 144011
loss: 1.0103819370269775,grad_norm: 0.9999992527260496, iteration: 144012
loss: 0.9896849393844604,grad_norm: 0.9999993170165564, iteration: 144013
loss: 1.0143234729766846,grad_norm: 0.9639082285348366, iteration: 144014
loss: 1.0511738061904907,grad_norm: 0.9999991996845876, iteration: 144015
loss: 1.0283230543136597,grad_norm: 0.9999990285669166, iteration: 144016
loss: 1.0099444389343262,grad_norm: 0.9540711406182297, iteration: 144017
loss: 0.9802169799804688,grad_norm: 0.9999991562216587, iteration: 144018
loss: 0.9932872653007507,grad_norm: 0.9167233149909197, iteration: 144019
loss: 0.9739356637001038,grad_norm: 0.908900038759844, iteration: 144020
loss: 1.0246472358703613,grad_norm: 0.9999990160478277, iteration: 144021
loss: 0.9936802983283997,grad_norm: 0.9602800424722405, iteration: 144022
loss: 1.037229299545288,grad_norm: 0.9638705232464634, iteration: 144023
loss: 1.0626587867736816,grad_norm: 0.9999993032116953, iteration: 144024
loss: 0.9959375858306885,grad_norm: 0.9999991624638316, iteration: 144025
loss: 1.0049316883087158,grad_norm: 0.8772537209785022, iteration: 144026
loss: 0.9916203022003174,grad_norm: 0.999998908618263, iteration: 144027
loss: 1.0420230627059937,grad_norm: 0.9731478229962933, iteration: 144028
loss: 0.9936484694480896,grad_norm: 0.9843783791471962, iteration: 144029
loss: 1.0205961465835571,grad_norm: 0.9999991634911033, iteration: 144030
loss: 1.0244864225387573,grad_norm: 0.9489352163215102, iteration: 144031
loss: 0.994004487991333,grad_norm: 0.9959763736724749, iteration: 144032
loss: 0.9954549670219421,grad_norm: 0.9999991061195053, iteration: 144033
loss: 1.0130928754806519,grad_norm: 0.9999992037215311, iteration: 144034
loss: 1.0005091428756714,grad_norm: 0.9999999196490323, iteration: 144035
loss: 1.0405027866363525,grad_norm: 0.8108916616886096, iteration: 144036
loss: 1.0054378509521484,grad_norm: 0.9858440211768541, iteration: 144037
loss: 1.051804780960083,grad_norm: 0.999999645216662, iteration: 144038
loss: 1.0130391120910645,grad_norm: 0.9999990603759469, iteration: 144039
loss: 0.9872545003890991,grad_norm: 0.8366859044861008, iteration: 144040
loss: 0.9978174567222595,grad_norm: 0.9999992598272486, iteration: 144041
loss: 0.9650077223777771,grad_norm: 0.9489690118351215, iteration: 144042
loss: 1.0285063982009888,grad_norm: 0.9310412174314333, iteration: 144043
loss: 0.9793040752410889,grad_norm: 0.9999991424767565, iteration: 144044
loss: 0.9704697728157043,grad_norm: 0.9489926544855547, iteration: 144045
loss: 0.9834408760070801,grad_norm: 0.9310052265085755, iteration: 144046
loss: 0.9901198744773865,grad_norm: 0.999999104039971, iteration: 144047
loss: 0.9925991296768188,grad_norm: 0.9999991450999409, iteration: 144048
loss: 1.0219913721084595,grad_norm: 0.9999992566455181, iteration: 144049
loss: 0.9720535278320312,grad_norm: 0.9873142512085492, iteration: 144050
loss: 1.0126627683639526,grad_norm: 0.9999995955697896, iteration: 144051
loss: 0.9848399758338928,grad_norm: 0.8187881588199774, iteration: 144052
loss: 1.0053969621658325,grad_norm: 0.8889766787142315, iteration: 144053
loss: 1.0017120838165283,grad_norm: 0.9999996382655205, iteration: 144054
loss: 1.0013872385025024,grad_norm: 0.9258710670824553, iteration: 144055
loss: 0.992727518081665,grad_norm: 0.9999989986078268, iteration: 144056
loss: 1.0702875852584839,grad_norm: 0.9999995313371003, iteration: 144057
loss: 1.0107024908065796,grad_norm: 0.9999992188738616, iteration: 144058
loss: 1.0538023710250854,grad_norm: 0.9999997822892257, iteration: 144059
loss: 1.0368919372558594,grad_norm: 0.9999991394730181, iteration: 144060
loss: 1.0087449550628662,grad_norm: 0.9512292353830726, iteration: 144061
loss: 1.01385498046875,grad_norm: 0.9591941095328941, iteration: 144062
loss: 0.9969635009765625,grad_norm: 0.9999993022947424, iteration: 144063
loss: 0.9852389693260193,grad_norm: 0.9381184087231729, iteration: 144064
loss: 1.038510799407959,grad_norm: 0.9999990042381675, iteration: 144065
loss: 1.022218108177185,grad_norm: 0.9777215619158862, iteration: 144066
loss: 0.9966863989830017,grad_norm: 0.9999990466879394, iteration: 144067
loss: 0.9893303513526917,grad_norm: 0.8017627564614545, iteration: 144068
loss: 0.9833476543426514,grad_norm: 0.7831863124267203, iteration: 144069
loss: 0.9882173538208008,grad_norm: 0.999999437648254, iteration: 144070
loss: 0.98465496301651,grad_norm: 0.9040036961242831, iteration: 144071
loss: 1.0348786115646362,grad_norm: 0.9981126682596441, iteration: 144072
loss: 1.007763385772705,grad_norm: 0.999998974211472, iteration: 144073
loss: 1.0135447978973389,grad_norm: 0.9999992879702683, iteration: 144074
loss: 1.0278353691101074,grad_norm: 0.9999990867063271, iteration: 144075
loss: 0.9800126552581787,grad_norm: 0.9999992106336265, iteration: 144076
loss: 0.98333740234375,grad_norm: 0.9999991865522213, iteration: 144077
loss: 1.0093090534210205,grad_norm: 0.9999991749007063, iteration: 144078
loss: 1.0201199054718018,grad_norm: 0.9616967600466741, iteration: 144079
loss: 1.0102626085281372,grad_norm: 0.999999082773331, iteration: 144080
loss: 0.9732129573822021,grad_norm: 0.9999991226236639, iteration: 144081
loss: 1.0352303981781006,grad_norm: 0.9999990724142824, iteration: 144082
loss: 0.9944949150085449,grad_norm: 0.9999993004794073, iteration: 144083
loss: 1.0109583139419556,grad_norm: 0.9999996195204113, iteration: 144084
loss: 1.038305640220642,grad_norm: 0.9999990906228002, iteration: 144085
loss: 0.9940434098243713,grad_norm: 0.9999990449501107, iteration: 144086
loss: 1.0100785493850708,grad_norm: 0.9999989166146299, iteration: 144087
loss: 1.0146914720535278,grad_norm: 0.9471489448899888, iteration: 144088
loss: 0.9903095960617065,grad_norm: 0.9999992360626847, iteration: 144089
loss: 0.9718256592750549,grad_norm: 0.8930465129783324, iteration: 144090
loss: 0.9927965402603149,grad_norm: 0.9365535519046359, iteration: 144091
loss: 0.9831143617630005,grad_norm: 0.9592951145773105, iteration: 144092
loss: 0.9993597269058228,grad_norm: 0.9999990043257204, iteration: 144093
loss: 0.988078773021698,grad_norm: 0.9999992423392066, iteration: 144094
loss: 1.038170337677002,grad_norm: 0.9999994601276903, iteration: 144095
loss: 1.0149617195129395,grad_norm: 0.9999992157914813, iteration: 144096
loss: 1.0508309602737427,grad_norm: 0.9999994810846518, iteration: 144097
loss: 1.063896656036377,grad_norm: 0.9779287679763912, iteration: 144098
loss: 1.0252470970153809,grad_norm: 0.9999998825038809, iteration: 144099
loss: 1.0125340223312378,grad_norm: 0.9999991000555705, iteration: 144100
loss: 0.9745875597000122,grad_norm: 0.9148320091348705, iteration: 144101
loss: 0.9522671103477478,grad_norm: 0.9999990762861986, iteration: 144102
loss: 1.0475101470947266,grad_norm: 0.99999909653303, iteration: 144103
loss: 1.0205351114273071,grad_norm: 0.9622276102626354, iteration: 144104
loss: 1.009034514427185,grad_norm: 0.99999921346966, iteration: 144105
loss: 0.9631114602088928,grad_norm: 0.9999992618704663, iteration: 144106
loss: 1.0464447736740112,grad_norm: 0.9999998724798056, iteration: 144107
loss: 1.017887830734253,grad_norm: 0.9999995440098516, iteration: 144108
loss: 1.0302579402923584,grad_norm: 0.9999996389042045, iteration: 144109
loss: 0.9975520372390747,grad_norm: 0.9851518761845056, iteration: 144110
loss: 1.1099331378936768,grad_norm: 0.9999991032625045, iteration: 144111
loss: 1.5906325578689575,grad_norm: 0.9999997977137717, iteration: 144112
loss: 1.1865780353546143,grad_norm: 0.9999993336020666, iteration: 144113
loss: 0.9766293168067932,grad_norm: 0.9999990616255684, iteration: 144114
loss: 1.1595898866653442,grad_norm: 0.9999993952044147, iteration: 144115
loss: 1.2021018266677856,grad_norm: 0.9999995632875495, iteration: 144116
loss: 1.463874101638794,grad_norm: 1.000000030393896, iteration: 144117
loss: 1.0160936117172241,grad_norm: 0.9999997816050674, iteration: 144118
loss: 1.2435953617095947,grad_norm: 0.9999998972339258, iteration: 144119
loss: 1.5277756452560425,grad_norm: 0.9999999743993974, iteration: 144120
loss: 1.0688905715942383,grad_norm: 0.9999998973095966, iteration: 144121
loss: 1.094724416732788,grad_norm: 0.999999346658729, iteration: 144122
loss: 1.1887538433074951,grad_norm: 0.999999964804756, iteration: 144123
loss: 1.129067063331604,grad_norm: 0.9999996856076585, iteration: 144124
loss: 1.1733111143112183,grad_norm: 0.9999993099706117, iteration: 144125
loss: 1.2688719034194946,grad_norm: 0.9999996737755188, iteration: 144126
loss: 0.9839682579040527,grad_norm: 0.913631149612533, iteration: 144127
loss: 1.0143998861312866,grad_norm: 0.9999992856102449, iteration: 144128
loss: 1.039365291595459,grad_norm: 0.9999992994405434, iteration: 144129
loss: 1.0505855083465576,grad_norm: 0.9999994353123851, iteration: 144130
loss: 1.192410945892334,grad_norm: 0.9999992968596942, iteration: 144131
loss: 1.055572509765625,grad_norm: 0.9869435716492411, iteration: 144132
loss: 1.0491583347320557,grad_norm: 0.9963990392283834, iteration: 144133
loss: 1.1157445907592773,grad_norm: 0.9999996856781396, iteration: 144134
loss: 1.2731562852859497,grad_norm: 0.9999999768316743, iteration: 144135
loss: 1.1705873012542725,grad_norm: 0.999999950198673, iteration: 144136
loss: 1.0536354780197144,grad_norm: 0.9999992983047951, iteration: 144137
loss: 1.0016611814498901,grad_norm: 0.9999992759186591, iteration: 144138
loss: 1.067740797996521,grad_norm: 0.9741278977317704, iteration: 144139
loss: 1.0565319061279297,grad_norm: 0.999999224442405, iteration: 144140
loss: 0.9636490345001221,grad_norm: 0.9999990381119356, iteration: 144141
loss: 1.008057951927185,grad_norm: 0.9999992036209067, iteration: 144142
loss: 1.2231340408325195,grad_norm: 0.9999998766042651, iteration: 144143
loss: 1.0366408824920654,grad_norm: 0.948666360205087, iteration: 144144
loss: 1.0006656646728516,grad_norm: 0.812484048717681, iteration: 144145
loss: 1.0014636516571045,grad_norm: 0.9999990764811489, iteration: 144146
loss: 1.0025913715362549,grad_norm: 0.9999992645700297, iteration: 144147
loss: 0.9792084097862244,grad_norm: 0.9999995489181065, iteration: 144148
loss: 1.0110009908676147,grad_norm: 0.9999992629471977, iteration: 144149
loss: 1.0822874307632446,grad_norm: 0.9999992683859613, iteration: 144150
loss: 1.0326850414276123,grad_norm: 0.9648260507525652, iteration: 144151
loss: 1.0274466276168823,grad_norm: 0.9999993365690166, iteration: 144152
loss: 1.0478848218917847,grad_norm: 0.9999992999319588, iteration: 144153
loss: 1.120276689529419,grad_norm: 0.9999994563420385, iteration: 144154
loss: 1.104123592376709,grad_norm: 0.9999996178309524, iteration: 144155
loss: 0.9985451698303223,grad_norm: 0.9999990586647703, iteration: 144156
loss: 1.0667133331298828,grad_norm: 0.9999993203871191, iteration: 144157
loss: 0.9972159266471863,grad_norm: 1.0000000145679377, iteration: 144158
loss: 0.9808080792427063,grad_norm: 0.9999992060434786, iteration: 144159
loss: 1.0057599544525146,grad_norm: 0.9545016259917647, iteration: 144160
loss: 1.0017766952514648,grad_norm: 0.9377852795219489, iteration: 144161
loss: 0.9656635522842407,grad_norm: 0.9999991828228444, iteration: 144162
loss: 1.051142692565918,grad_norm: 0.9999991646474166, iteration: 144163
loss: 0.974250316619873,grad_norm: 0.9999992449211385, iteration: 144164
loss: 0.9916309118270874,grad_norm: 0.9999991667343278, iteration: 144165
loss: 1.2103691101074219,grad_norm: 0.9999999099505245, iteration: 144166
loss: 0.9891476631164551,grad_norm: 0.9999992382780464, iteration: 144167
loss: 1.0202730894088745,grad_norm: 0.9999990983336811, iteration: 144168
loss: 0.9942829608917236,grad_norm: 0.9999993470239986, iteration: 144169
loss: 1.0279760360717773,grad_norm: 0.9999993435121358, iteration: 144170
loss: 1.046382188796997,grad_norm: 0.9999992896105426, iteration: 144171
loss: 0.9860587120056152,grad_norm: 0.9614772208426883, iteration: 144172
loss: 1.0287965536117554,grad_norm: 0.9999996486894537, iteration: 144173
loss: 1.0481610298156738,grad_norm: 0.9999990114324919, iteration: 144174
loss: 1.0027166604995728,grad_norm: 0.9147406773192476, iteration: 144175
loss: 1.0348442792892456,grad_norm: 0.9999994683243965, iteration: 144176
loss: 0.9747216105461121,grad_norm: 0.9999994952723129, iteration: 144177
loss: 0.9959023594856262,grad_norm: 0.9330103920062383, iteration: 144178
loss: 1.0338144302368164,grad_norm: 0.8828508101494307, iteration: 144179
loss: 0.9759324789047241,grad_norm: 0.9999990669724589, iteration: 144180
loss: 1.0057392120361328,grad_norm: 0.9999992130405063, iteration: 144181
loss: 1.0599993467330933,grad_norm: 0.9999991569321314, iteration: 144182
loss: 0.9908713698387146,grad_norm: 0.9999990618698779, iteration: 144183
loss: 0.973349928855896,grad_norm: 0.9092627467540532, iteration: 144184
loss: 1.1231701374053955,grad_norm: 0.9999999544460589, iteration: 144185
loss: 0.9913870692253113,grad_norm: 0.999999279500006, iteration: 144186
loss: 1.0320038795471191,grad_norm: 0.9725067134577364, iteration: 144187
loss: 1.015089750289917,grad_norm: 0.9999992887384133, iteration: 144188
loss: 1.0537711381912231,grad_norm: 0.9999997352635767, iteration: 144189
loss: 1.031467318534851,grad_norm: 0.9999992547741113, iteration: 144190
loss: 1.0552090406417847,grad_norm: 0.9999996052766523, iteration: 144191
loss: 1.0053818225860596,grad_norm: 0.8838982212397927, iteration: 144192
loss: 1.028656005859375,grad_norm: 0.9999992314208422, iteration: 144193
loss: 1.0017391443252563,grad_norm: 0.9999993149938324, iteration: 144194
loss: 0.9859597086906433,grad_norm: 0.9999990027443211, iteration: 144195
loss: 1.014886736869812,grad_norm: 0.9999994965232684, iteration: 144196
loss: 1.0031273365020752,grad_norm: 0.9362856263020654, iteration: 144197
loss: 1.0273098945617676,grad_norm: 0.9999991980245487, iteration: 144198
loss: 1.0244406461715698,grad_norm: 0.9128926002060666, iteration: 144199
loss: 1.066859245300293,grad_norm: 0.9999994400065987, iteration: 144200
loss: 1.1053732633590698,grad_norm: 0.9999998914315942, iteration: 144201
loss: 1.0212812423706055,grad_norm: 0.9999994706025548, iteration: 144202
loss: 1.0272294282913208,grad_norm: 0.9999994692664359, iteration: 144203
loss: 1.0257024765014648,grad_norm: 0.9179611455232, iteration: 144204
loss: 1.0031379461288452,grad_norm: 0.9993525108585793, iteration: 144205
loss: 0.9893894791603088,grad_norm: 0.8631880445181848, iteration: 144206
loss: 1.0485879182815552,grad_norm: 0.9999995647627808, iteration: 144207
loss: 1.0300242900848389,grad_norm: 0.9999993454818142, iteration: 144208
loss: 0.9885991215705872,grad_norm: 0.9999994457796553, iteration: 144209
loss: 1.0256668329238892,grad_norm: 0.9999992609473207, iteration: 144210
loss: 0.997147798538208,grad_norm: 0.9768948780756382, iteration: 144211
loss: 1.0588828325271606,grad_norm: 0.9999996111690156, iteration: 144212
loss: 1.018416166305542,grad_norm: 0.9999992506264587, iteration: 144213
loss: 1.0123646259307861,grad_norm: 0.9999989977256546, iteration: 144214
loss: 1.0860556364059448,grad_norm: 0.9999997533383447, iteration: 144215
loss: 1.0281249284744263,grad_norm: 0.9999992205499064, iteration: 144216
loss: 1.0196584463119507,grad_norm: 0.9999991951143665, iteration: 144217
loss: 0.9855954051017761,grad_norm: 0.9999993355943784, iteration: 144218
loss: 1.0898432731628418,grad_norm: 0.999999881089956, iteration: 144219
loss: 1.0170278549194336,grad_norm: 0.999999281914857, iteration: 144220
loss: 1.0499900579452515,grad_norm: 0.9999995614517174, iteration: 144221
loss: 0.986025333404541,grad_norm: 0.9992203584589964, iteration: 144222
loss: 0.9994230270385742,grad_norm: 0.999999365161227, iteration: 144223
loss: 1.0141680240631104,grad_norm: 0.9999993842843131, iteration: 144224
loss: 1.02153480052948,grad_norm: 0.9999995026346096, iteration: 144225
loss: 1.067413330078125,grad_norm: 0.9999996428619486, iteration: 144226
loss: 0.9912246465682983,grad_norm: 0.9999994148989582, iteration: 144227
loss: 1.0121935606002808,grad_norm: 0.9999993078109256, iteration: 144228
loss: 0.9985823631286621,grad_norm: 0.9999990283977891, iteration: 144229
loss: 0.9688964486122131,grad_norm: 0.9999992540477092, iteration: 144230
loss: 0.9911462068557739,grad_norm: 0.9999990625916864, iteration: 144231
loss: 1.0158652067184448,grad_norm: 0.8305430481651299, iteration: 144232
loss: 1.0095758438110352,grad_norm: 0.9999993603366799, iteration: 144233
loss: 1.0169439315795898,grad_norm: 0.9935081336103229, iteration: 144234
loss: 0.9955345988273621,grad_norm: 0.9999990565197135, iteration: 144235
loss: 0.9825119376182556,grad_norm: 0.999999808764696, iteration: 144236
loss: 0.9679855108261108,grad_norm: 0.9999991785843096, iteration: 144237
loss: 0.9878412485122681,grad_norm: 0.9999991998271719, iteration: 144238
loss: 1.0177226066589355,grad_norm: 0.999998901124422, iteration: 144239
loss: 1.021032452583313,grad_norm: 0.9999990989630712, iteration: 144240
loss: 1.009387493133545,grad_norm: 0.9999990607620517, iteration: 144241
loss: 0.9885977506637573,grad_norm: 0.9999991207353448, iteration: 144242
loss: 1.0728133916854858,grad_norm: 0.9999991206177021, iteration: 144243
loss: 0.9921911358833313,grad_norm: 0.9635695936664324, iteration: 144244
loss: 0.9980044960975647,grad_norm: 0.9999990508930772, iteration: 144245
loss: 0.9913166761398315,grad_norm: 0.9999991927499607, iteration: 144246
loss: 1.0224000215530396,grad_norm: 0.919600642929119, iteration: 144247
loss: 1.0108567476272583,grad_norm: 0.9968321106907819, iteration: 144248
loss: 0.9452993869781494,grad_norm: 0.9999990590156526, iteration: 144249
loss: 1.0344069004058838,grad_norm: 0.9999995985604909, iteration: 144250
loss: 1.0022426843643188,grad_norm: 0.9734654905845357, iteration: 144251
loss: 1.0218864679336548,grad_norm: 0.999999062336249, iteration: 144252
loss: 0.969733476638794,grad_norm: 0.988961642837558, iteration: 144253
loss: 1.0017973184585571,grad_norm: 0.9999991953553509, iteration: 144254
loss: 1.103426218032837,grad_norm: 0.9999990418484846, iteration: 144255
loss: 1.022444725036621,grad_norm: 0.9999993416013692, iteration: 144256
loss: 1.0343354940414429,grad_norm: 0.9999992110059338, iteration: 144257
loss: 0.9957551956176758,grad_norm: 0.9999994458027454, iteration: 144258
loss: 1.0237172842025757,grad_norm: 0.9999990907187298, iteration: 144259
loss: 1.0059877634048462,grad_norm: 0.9999989759806455, iteration: 144260
loss: 1.0048598051071167,grad_norm: 0.9999994094604701, iteration: 144261
loss: 0.9937452077865601,grad_norm: 0.9999992783591296, iteration: 144262
loss: 0.9964485168457031,grad_norm: 0.9999990779213263, iteration: 144263
loss: 0.9582488536834717,grad_norm: 0.9999993253218219, iteration: 144264
loss: 1.0120739936828613,grad_norm: 0.9999992092122377, iteration: 144265
loss: 0.991187334060669,grad_norm: 0.999999132008071, iteration: 144266
loss: 1.1018925905227661,grad_norm: 0.9999994256468864, iteration: 144267
loss: 1.0570541620254517,grad_norm: 0.9999990916471732, iteration: 144268
loss: 1.1421210765838623,grad_norm: 0.9999993699962875, iteration: 144269
loss: 1.0679811239242554,grad_norm: 0.9999997059930176, iteration: 144270
loss: 1.0138919353485107,grad_norm: 0.9999993070672057, iteration: 144271
loss: 1.0255509614944458,grad_norm: 0.9671246803788273, iteration: 144272
loss: 1.0483742952346802,grad_norm: 0.9999993700534929, iteration: 144273
loss: 1.0254061222076416,grad_norm: 0.999999172740298, iteration: 144274
loss: 1.01585853099823,grad_norm: 0.8965937524087088, iteration: 144275
loss: 0.9869281053543091,grad_norm: 0.9487856305253928, iteration: 144276
loss: 1.0053269863128662,grad_norm: 0.9999991948218441, iteration: 144277
loss: 0.9803833961486816,grad_norm: 0.9999990624400921, iteration: 144278
loss: 1.0128893852233887,grad_norm: 0.9999993198695821, iteration: 144279
loss: 0.9897060394287109,grad_norm: 0.8845860636713325, iteration: 144280
loss: 0.9804657697677612,grad_norm: 0.9166500283060188, iteration: 144281
loss: 0.9892276525497437,grad_norm: 0.9999991426417462, iteration: 144282
loss: 1.0440176725387573,grad_norm: 0.9999997592798894, iteration: 144283
loss: 1.0018969774246216,grad_norm: 0.9999998369400896, iteration: 144284
loss: 1.1068928241729736,grad_norm: 0.9999994403410518, iteration: 144285
loss: 1.001206874847412,grad_norm: 0.9999991503424812, iteration: 144286
loss: 1.0081651210784912,grad_norm: 0.931696084149133, iteration: 144287
loss: 1.0005635023117065,grad_norm: 0.9999989703636144, iteration: 144288
loss: 0.9950641989707947,grad_norm: 0.9999991430079166, iteration: 144289
loss: 0.9691726565361023,grad_norm: 0.9999991353502448, iteration: 144290
loss: 0.9907852411270142,grad_norm: 0.8933213338491679, iteration: 144291
loss: 0.9941931962966919,grad_norm: 0.999999100171477, iteration: 144292
loss: 1.0975067615509033,grad_norm: 0.9999991757199654, iteration: 144293
loss: 0.9931408166885376,grad_norm: 0.8478791538986267, iteration: 144294
loss: 1.019823431968689,grad_norm: 0.9860848557076113, iteration: 144295
loss: 0.9873874187469482,grad_norm: 0.9999991996650692, iteration: 144296
loss: 0.9562684893608093,grad_norm: 0.9999992175347182, iteration: 144297
loss: 1.01473867893219,grad_norm: 0.9151606741859162, iteration: 144298
loss: 0.9792523384094238,grad_norm: 0.8656136690613677, iteration: 144299
loss: 1.053170919418335,grad_norm: 0.9999992475734546, iteration: 144300
loss: 1.0251176357269287,grad_norm: 0.9464645683163087, iteration: 144301
loss: 1.0182119607925415,grad_norm: 0.9591044261062363, iteration: 144302
loss: 0.9913510084152222,grad_norm: 0.9999991729283076, iteration: 144303
loss: 0.9849103689193726,grad_norm: 0.9999989576261101, iteration: 144304
loss: 0.9999640583992004,grad_norm: 0.8744242259565419, iteration: 144305
loss: 0.9940649271011353,grad_norm: 0.911104214471845, iteration: 144306
loss: 1.0718272924423218,grad_norm: 0.9999993811724693, iteration: 144307
loss: 1.0121204853057861,grad_norm: 0.9999990698107036, iteration: 144308
loss: 1.0771605968475342,grad_norm: 0.9999993883253667, iteration: 144309
loss: 1.0040397644042969,grad_norm: 0.9999991496981537, iteration: 144310
loss: 0.9986310601234436,grad_norm: 0.9999992182639237, iteration: 144311
loss: 1.008884072303772,grad_norm: 0.9197643671573207, iteration: 144312
loss: 1.0188556909561157,grad_norm: 0.9353890956983836, iteration: 144313
loss: 1.0100032091140747,grad_norm: 0.9861741245819384, iteration: 144314
loss: 1.0336397886276245,grad_norm: 0.9999992292091688, iteration: 144315
loss: 1.0709344148635864,grad_norm: 0.9999990625402203, iteration: 144316
loss: 1.011846661567688,grad_norm: 0.9999995769926439, iteration: 144317
loss: 1.0414245128631592,grad_norm: 0.9999991014111531, iteration: 144318
loss: 0.9877430200576782,grad_norm: 0.9999990928678409, iteration: 144319
loss: 0.99781733751297,grad_norm: 0.9303952585134437, iteration: 144320
loss: 1.0071192979812622,grad_norm: 0.9832504252719317, iteration: 144321
loss: 1.003064751625061,grad_norm: 0.9564979444198582, iteration: 144322
loss: 1.000351905822754,grad_norm: 0.9099451477039123, iteration: 144323
loss: 1.069248914718628,grad_norm: 0.9999999525442937, iteration: 144324
loss: 0.9946932792663574,grad_norm: 0.9999991062958749, iteration: 144325
loss: 0.9968911409378052,grad_norm: 0.99999918893038, iteration: 144326
loss: 0.9927929639816284,grad_norm: 0.9999994058934992, iteration: 144327
loss: 1.0327215194702148,grad_norm: 0.9999994774434274, iteration: 144328
loss: 1.0046889781951904,grad_norm: 0.9999991368693251, iteration: 144329
loss: 0.9608330726623535,grad_norm: 0.9900746319710052, iteration: 144330
loss: 1.0146338939666748,grad_norm: 0.9720096812529632, iteration: 144331
loss: 0.9776018261909485,grad_norm: 0.9999990813631973, iteration: 144332
loss: 0.9882393479347229,grad_norm: 0.9999991975402138, iteration: 144333
loss: 1.0350490808486938,grad_norm: 0.9999993523606289, iteration: 144334
loss: 1.0031646490097046,grad_norm: 0.9999994581274386, iteration: 144335
loss: 0.9850496053695679,grad_norm: 0.9999992380870686, iteration: 144336
loss: 1.0666775703430176,grad_norm: 0.9999993129310032, iteration: 144337
loss: 1.1217809915542603,grad_norm: 0.9970741798151986, iteration: 144338
loss: 0.9512252807617188,grad_norm: 0.9999993072364601, iteration: 144339
loss: 1.0071007013320923,grad_norm: 0.9999991671152176, iteration: 144340
loss: 1.010128378868103,grad_norm: 0.939386580758117, iteration: 144341
loss: 0.9895316958427429,grad_norm: 0.9506886802711714, iteration: 144342
loss: 1.055787444114685,grad_norm: 0.9999991885977325, iteration: 144343
loss: 1.0911242961883545,grad_norm: 0.9999992851251784, iteration: 144344
loss: 0.9841915965080261,grad_norm: 0.9999990786561649, iteration: 144345
loss: 0.9769197106361389,grad_norm: 0.9999991340565945, iteration: 144346
loss: 0.9672650694847107,grad_norm: 0.9999990793693194, iteration: 144347
loss: 1.0166938304901123,grad_norm: 0.9838937410860056, iteration: 144348
loss: 1.0199809074401855,grad_norm: 0.9210919087258426, iteration: 144349
loss: 1.0628347396850586,grad_norm: 0.9999998391474334, iteration: 144350
loss: 1.0496594905853271,grad_norm: 0.999999682355366, iteration: 144351
loss: 1.0197430849075317,grad_norm: 0.9999992118276217, iteration: 144352
loss: 1.0311150550842285,grad_norm: 0.9999991819519982, iteration: 144353
loss: 1.040075421333313,grad_norm: 0.9999993187857703, iteration: 144354
loss: 1.0432528257369995,grad_norm: 0.9781387255348911, iteration: 144355
loss: 0.9991031885147095,grad_norm: 0.9999991322054638, iteration: 144356
loss: 1.017874836921692,grad_norm: 0.9999991786587797, iteration: 144357
loss: 0.997032880783081,grad_norm: 0.9999991857066133, iteration: 144358
loss: 1.0690054893493652,grad_norm: 0.9999995216318198, iteration: 144359
loss: 0.9915733933448792,grad_norm: 0.8691793640243264, iteration: 144360
loss: 0.9875664710998535,grad_norm: 0.9999991958743574, iteration: 144361
loss: 1.0172009468078613,grad_norm: 0.889538592004643, iteration: 144362
loss: 1.0116242170333862,grad_norm: 0.9999994006995974, iteration: 144363
loss: 1.0202831029891968,grad_norm: 0.8053903495748151, iteration: 144364
loss: 1.037993311882019,grad_norm: 0.9999995055638415, iteration: 144365
loss: 1.0038437843322754,grad_norm: 0.9999991983693448, iteration: 144366
loss: 0.9704620242118835,grad_norm: 0.9999991205179571, iteration: 144367
loss: 1.058361291885376,grad_norm: 0.9999991715818438, iteration: 144368
loss: 1.0121996402740479,grad_norm: 0.8769109556255636, iteration: 144369
loss: 0.9915995001792908,grad_norm: 0.9999991824325073, iteration: 144370
loss: 0.9932368397712708,grad_norm: 0.9999995601911877, iteration: 144371
loss: 0.9763118028640747,grad_norm: 0.9999991546483307, iteration: 144372
loss: 1.0215857028961182,grad_norm: 0.9186134148305377, iteration: 144373
loss: 1.0029319524765015,grad_norm: 0.9999991006239578, iteration: 144374
loss: 0.9884057641029358,grad_norm: 0.9999997713610336, iteration: 144375
loss: 0.9928081631660461,grad_norm: 0.9556499329618561, iteration: 144376
loss: 1.0301491022109985,grad_norm: 0.9999991828746376, iteration: 144377
loss: 0.9604257941246033,grad_norm: 0.9999989885087207, iteration: 144378
loss: 1.0340540409088135,grad_norm: 0.9999992595102767, iteration: 144379
loss: 0.987126886844635,grad_norm: 0.8506396979880346, iteration: 144380
loss: 1.017588496208191,grad_norm: 0.9999999602481926, iteration: 144381
loss: 0.9175334572792053,grad_norm: 0.9999992645971796, iteration: 144382
loss: 1.0352236032485962,grad_norm: 0.999999305458683, iteration: 144383
loss: 0.9937705397605896,grad_norm: 0.992481347185232, iteration: 144384
loss: 0.9863048791885376,grad_norm: 0.9908845331266236, iteration: 144385
loss: 0.9727638363838196,grad_norm: 0.9129195499939967, iteration: 144386
loss: 0.9974446296691895,grad_norm: 0.9999990464023044, iteration: 144387
loss: 1.003905177116394,grad_norm: 0.9999993121156971, iteration: 144388
loss: 0.9578418135643005,grad_norm: 0.9790486327172744, iteration: 144389
loss: 1.0319117307662964,grad_norm: 0.9999992747049129, iteration: 144390
loss: 1.0064257383346558,grad_norm: 0.999999015086685, iteration: 144391
loss: 1.0171918869018555,grad_norm: 0.9999990073920771, iteration: 144392
loss: 1.0687681436538696,grad_norm: 0.9999996365815393, iteration: 144393
loss: 1.2000058889389038,grad_norm: 0.9999994744125043, iteration: 144394
loss: 1.004184365272522,grad_norm: 0.9999996388316349, iteration: 144395
loss: 1.028743028640747,grad_norm: 0.9000118334039963, iteration: 144396
loss: 0.9768527746200562,grad_norm: 0.9999992019130958, iteration: 144397
loss: 0.9977008104324341,grad_norm: 0.9494436952031747, iteration: 144398
loss: 1.0143921375274658,grad_norm: 0.975607873089845, iteration: 144399
loss: 0.9924529194831848,grad_norm: 0.9999992401021006, iteration: 144400
loss: 1.0422868728637695,grad_norm: 0.9999992983334016, iteration: 144401
loss: 1.0119370222091675,grad_norm: 0.9999991018488177, iteration: 144402
loss: 1.128250002861023,grad_norm: 0.9999997295470103, iteration: 144403
loss: 1.030779242515564,grad_norm: 0.9999992354623074, iteration: 144404
loss: 0.9933308362960815,grad_norm: 0.9999992638811548, iteration: 144405
loss: 1.132657766342163,grad_norm: 0.9999992580977264, iteration: 144406
loss: 0.9812221527099609,grad_norm: 0.9999991173563645, iteration: 144407
loss: 1.0407077074050903,grad_norm: 0.9999995100428901, iteration: 144408
loss: 0.9769010543823242,grad_norm: 0.9999990114029034, iteration: 144409
loss: 1.017066240310669,grad_norm: 0.9999991150461006, iteration: 144410
loss: 1.0613478422164917,grad_norm: 0.9999991785131664, iteration: 144411
loss: 1.0256495475769043,grad_norm: 0.999999137443326, iteration: 144412
loss: 0.9921764731407166,grad_norm: 0.9999990307509443, iteration: 144413
loss: 0.9800286293029785,grad_norm: 0.9269637113380861, iteration: 144414
loss: 0.9817094802856445,grad_norm: 0.9811351674597681, iteration: 144415
loss: 1.0047365427017212,grad_norm: 0.999999121095728, iteration: 144416
loss: 0.9966897368431091,grad_norm: 0.9631518942887806, iteration: 144417
loss: 0.9971498250961304,grad_norm: 0.9999992937619546, iteration: 144418
loss: 1.0399901866912842,grad_norm: 0.9999993048659169, iteration: 144419
loss: 0.9820306301116943,grad_norm: 0.9995155152064344, iteration: 144420
loss: 0.9850731492042542,grad_norm: 0.999999174873034, iteration: 144421
loss: 1.0288361310958862,grad_norm: 0.9999989898357543, iteration: 144422
loss: 1.0182386636734009,grad_norm: 0.8491913720468801, iteration: 144423
loss: 1.006142497062683,grad_norm: 0.9999994407553082, iteration: 144424
loss: 1.0133947134017944,grad_norm: 0.980613451237106, iteration: 144425
loss: 0.993279755115509,grad_norm: 0.9999991357717605, iteration: 144426
loss: 1.0112322568893433,grad_norm: 0.9058775876773727, iteration: 144427
loss: 0.9834387898445129,grad_norm: 0.999999235538698, iteration: 144428
loss: 1.016041874885559,grad_norm: 0.9999990681592742, iteration: 144429
loss: 1.0288020372390747,grad_norm: 0.9939558084645554, iteration: 144430
loss: 1.0322679281234741,grad_norm: 0.9999996188601791, iteration: 144431
loss: 0.9510854482650757,grad_norm: 0.9999989888685543, iteration: 144432
loss: 1.18351411819458,grad_norm: 0.9999992560481099, iteration: 144433
loss: 0.9990275502204895,grad_norm: 0.9884762995619867, iteration: 144434
loss: 0.9677543640136719,grad_norm: 0.9396768217920023, iteration: 144435
loss: 0.9877975583076477,grad_norm: 0.9015148608261015, iteration: 144436
loss: 1.0293556451797485,grad_norm: 0.9959832293448283, iteration: 144437
loss: 1.0043696165084839,grad_norm: 0.9999989805193488, iteration: 144438
loss: 1.0257930755615234,grad_norm: 0.9997158134437065, iteration: 144439
loss: 1.0044752359390259,grad_norm: 0.9999990237566798, iteration: 144440
loss: 1.0497394800186157,grad_norm: 0.9999993790628174, iteration: 144441
loss: 1.018526315689087,grad_norm: 0.9999993151012497, iteration: 144442
loss: 1.0058565139770508,grad_norm: 0.9560451122921088, iteration: 144443
loss: 0.9988300800323486,grad_norm: 0.9999996983468049, iteration: 144444
loss: 0.9694743156433105,grad_norm: 0.9999990748988264, iteration: 144445
loss: 1.0599088668823242,grad_norm: 0.9999998273893225, iteration: 144446
loss: 1.0165284872055054,grad_norm: 0.999999191070375, iteration: 144447
loss: 1.0126352310180664,grad_norm: 0.9999990828517284, iteration: 144448
loss: 0.9478548169136047,grad_norm: 0.9999991632169367, iteration: 144449
loss: 0.9948254227638245,grad_norm: 0.9999992160380009, iteration: 144450
loss: 0.9901394844055176,grad_norm: 0.9999991529622047, iteration: 144451
loss: 1.0153472423553467,grad_norm: 0.9999989488446012, iteration: 144452
loss: 1.0421086549758911,grad_norm: 0.9999993170825249, iteration: 144453
loss: 1.025303840637207,grad_norm: 0.9999993425125943, iteration: 144454
loss: 1.3111335039138794,grad_norm: 0.9999994323144497, iteration: 144455
loss: 1.0012532472610474,grad_norm: 0.9999992733622703, iteration: 144456
loss: 0.9456968903541565,grad_norm: 0.9999990976517416, iteration: 144457
loss: 1.0070841312408447,grad_norm: 0.9999990964769573, iteration: 144458
loss: 1.0300321578979492,grad_norm: 0.999999234472192, iteration: 144459
loss: 1.0003831386566162,grad_norm: 0.9118828223835731, iteration: 144460
loss: 1.028878092765808,grad_norm: 0.9999999794183735, iteration: 144461
loss: 1.0002492666244507,grad_norm: 0.9999993224384834, iteration: 144462
loss: 1.0387749671936035,grad_norm: 0.9487401488926214, iteration: 144463
loss: 0.993606448173523,grad_norm: 0.999999164763132, iteration: 144464
loss: 0.99521803855896,grad_norm: 0.9999991167309018, iteration: 144465
loss: 1.0159173011779785,grad_norm: 0.9999990835411555, iteration: 144466
loss: 1.0015372037887573,grad_norm: 0.9999998248188676, iteration: 144467
loss: 0.9894733428955078,grad_norm: 0.999999127112803, iteration: 144468
loss: 1.0378955602645874,grad_norm: 0.9999995370808479, iteration: 144469
loss: 1.0094292163848877,grad_norm: 0.9999993369935238, iteration: 144470
loss: 1.0160290002822876,grad_norm: 0.9999999293439114, iteration: 144471
loss: 1.0166499614715576,grad_norm: 0.9999991783470052, iteration: 144472
loss: 0.9864587187767029,grad_norm: 0.999999062452097, iteration: 144473
loss: 1.0031397342681885,grad_norm: 0.8701762318439273, iteration: 144474
loss: 1.0433522462844849,grad_norm: 0.9999989947544503, iteration: 144475
loss: 1.0762332677841187,grad_norm: 0.9999995574898771, iteration: 144476
loss: 1.0053043365478516,grad_norm: 0.8812502746040421, iteration: 144477
loss: 0.9966345429420471,grad_norm: 0.9865377260938286, iteration: 144478
loss: 1.005201816558838,grad_norm: 0.9999989626891223, iteration: 144479
loss: 1.0151900053024292,grad_norm: 0.8760743031665987, iteration: 144480
loss: 0.9520379900932312,grad_norm: 0.9548250773889388, iteration: 144481
loss: 1.0173304080963135,grad_norm: 0.8788468290256343, iteration: 144482
loss: 0.9997869729995728,grad_norm: 0.939939884786823, iteration: 144483
loss: 0.9939984083175659,grad_norm: 0.9999996638804667, iteration: 144484
loss: 1.0457831621170044,grad_norm: 0.9999997923896023, iteration: 144485
loss: 1.0247505903244019,grad_norm: 0.8922028099689406, iteration: 144486
loss: 1.0206376314163208,grad_norm: 0.9999993797595635, iteration: 144487
loss: 0.9762754440307617,grad_norm: 0.9999991543242763, iteration: 144488
loss: 0.9808264374732971,grad_norm: 0.9999992058643759, iteration: 144489
loss: 0.983690619468689,grad_norm: 0.9469310268487253, iteration: 144490
loss: 0.9789636135101318,grad_norm: 0.930255693180038, iteration: 144491
loss: 0.9961344599723816,grad_norm: 0.9417360672952014, iteration: 144492
loss: 0.9901143908500671,grad_norm: 0.9400559527079617, iteration: 144493
loss: 1.013787031173706,grad_norm: 0.9514691632711936, iteration: 144494
loss: 0.9883837699890137,grad_norm: 0.787064755273279, iteration: 144495
loss: 0.9857057332992554,grad_norm: 0.9999989644360404, iteration: 144496
loss: 1.0059406757354736,grad_norm: 0.9999990984048711, iteration: 144497
loss: 0.9533420205116272,grad_norm: 0.9999989106005044, iteration: 144498
loss: 1.0209791660308838,grad_norm: 0.9999990236325564, iteration: 144499
loss: 0.9960132837295532,grad_norm: 0.9972950914514676, iteration: 144500
loss: 0.9782504439353943,grad_norm: 0.9594910803253651, iteration: 144501
loss: 1.1331961154937744,grad_norm: 0.9999992167476638, iteration: 144502
loss: 0.9861149787902832,grad_norm: 0.9999990346546072, iteration: 144503
loss: 1.004529595375061,grad_norm: 0.9999994887261885, iteration: 144504
loss: 0.9795331954956055,grad_norm: 0.9999992961556432, iteration: 144505
loss: 0.9694692492485046,grad_norm: 0.9999989918588221, iteration: 144506
loss: 1.0330134630203247,grad_norm: 0.9999993456508284, iteration: 144507
loss: 1.0280054807662964,grad_norm: 0.9999992047911823, iteration: 144508
loss: 1.0454661846160889,grad_norm: 0.9999990034282775, iteration: 144509
loss: 1.0096487998962402,grad_norm: 0.999999291454511, iteration: 144510
loss: 1.0389307737350464,grad_norm: 0.9999991585515045, iteration: 144511
loss: 1.003480315208435,grad_norm: 0.8751392516651786, iteration: 144512
loss: 1.0075761079788208,grad_norm: 0.9999990289805778, iteration: 144513
loss: 1.0258249044418335,grad_norm: 1.0000000008008412, iteration: 144514
loss: 1.0709339380264282,grad_norm: 0.9999993481573941, iteration: 144515
loss: 1.036545753479004,grad_norm: 0.8775871009703452, iteration: 144516
loss: 0.9964674115180969,grad_norm: 0.9999991094236282, iteration: 144517
loss: 0.9926105737686157,grad_norm: 0.9518238334242118, iteration: 144518
loss: 1.0096964836120605,grad_norm: 0.9999992914115252, iteration: 144519
loss: 0.9661465287208557,grad_norm: 0.9024550299027628, iteration: 144520
loss: 1.0004438161849976,grad_norm: 0.9999991776207491, iteration: 144521
loss: 1.0096769332885742,grad_norm: 0.9999991180084238, iteration: 144522
loss: 0.9623551964759827,grad_norm: 0.9514460005252632, iteration: 144523
loss: 0.9878748059272766,grad_norm: 0.9999989856846033, iteration: 144524
loss: 0.9866967797279358,grad_norm: 0.9760555807831294, iteration: 144525
loss: 1.02005136013031,grad_norm: 0.9964187808208714, iteration: 144526
loss: 1.085345983505249,grad_norm: 0.9999991149977874, iteration: 144527
loss: 1.0447367429733276,grad_norm: 0.9999989907421932, iteration: 144528
loss: 0.9978524446487427,grad_norm: 0.9999991080879155, iteration: 144529
loss: 1.0057474374771118,grad_norm: 0.9999990982108953, iteration: 144530
loss: 1.0084903240203857,grad_norm: 0.9999990527608055, iteration: 144531
loss: 1.0161744356155396,grad_norm: 0.9299885680829038, iteration: 144532
loss: 1.024281620979309,grad_norm: 0.9999994622072701, iteration: 144533
loss: 1.0095881223678589,grad_norm: 0.9999999176232461, iteration: 144534
loss: 1.0213017463684082,grad_norm: 0.9999990166654773, iteration: 144535
loss: 1.03044855594635,grad_norm: 0.9999991501204278, iteration: 144536
loss: 1.033510446548462,grad_norm: 0.9999993288564077, iteration: 144537
loss: 1.028624176979065,grad_norm: 0.999999123266011, iteration: 144538
loss: 0.9762204885482788,grad_norm: 0.9999992076895784, iteration: 144539
loss: 1.0294567346572876,grad_norm: 0.9214238741654673, iteration: 144540
loss: 0.9954013824462891,grad_norm: 0.9999992216739148, iteration: 144541
loss: 0.9965539574623108,grad_norm: 0.999999041906702, iteration: 144542
loss: 0.9970974922180176,grad_norm: 0.9188084118476447, iteration: 144543
loss: 1.0273995399475098,grad_norm: 0.9999991638784888, iteration: 144544
loss: 1.0224074125289917,grad_norm: 0.9929640478793792, iteration: 144545
loss: 0.9905723929405212,grad_norm: 0.9999990958059942, iteration: 144546
loss: 0.96994948387146,grad_norm: 0.9704646107585416, iteration: 144547
loss: 1.0522667169570923,grad_norm: 0.999999296949356, iteration: 144548
loss: 0.9845368266105652,grad_norm: 0.9759002705876129, iteration: 144549
loss: 0.9851527214050293,grad_norm: 0.924689872614184, iteration: 144550
loss: 0.9645142555236816,grad_norm: 0.9238579730199609, iteration: 144551
loss: 1.0244417190551758,grad_norm: 0.9999994340976943, iteration: 144552
loss: 1.0051615238189697,grad_norm: 0.9999991996927717, iteration: 144553
loss: 1.0077906847000122,grad_norm: 0.9999991642553558, iteration: 144554
loss: 0.9882436394691467,grad_norm: 0.8817228498602739, iteration: 144555
loss: 0.9772092700004578,grad_norm: 0.9999990611878792, iteration: 144556
loss: 1.0402214527130127,grad_norm: 0.9999991460151549, iteration: 144557
loss: 0.968774676322937,grad_norm: 0.9394028494430346, iteration: 144558
loss: 0.9876284599304199,grad_norm: 0.9999989977838768, iteration: 144559
loss: 1.0251986980438232,grad_norm: 0.9913966706238542, iteration: 144560
loss: 1.0119608640670776,grad_norm: 0.9861524551305203, iteration: 144561
loss: 1.0306183099746704,grad_norm: 0.9999995204000275, iteration: 144562
loss: 1.0112521648406982,grad_norm: 0.98696403478608, iteration: 144563
loss: 1.0344383716583252,grad_norm: 0.9999992077342158, iteration: 144564
loss: 1.0601845979690552,grad_norm: 0.999999376725133, iteration: 144565
loss: 1.0376620292663574,grad_norm: 0.9999990658574527, iteration: 144566
loss: 1.0026417970657349,grad_norm: 0.9999991663833429, iteration: 144567
loss: 0.9714032411575317,grad_norm: 0.9024829850004932, iteration: 144568
loss: 1.0225489139556885,grad_norm: 0.961690092520397, iteration: 144569
loss: 1.0131739377975464,grad_norm: 0.9834054956716557, iteration: 144570
loss: 1.0130198001861572,grad_norm: 0.9999990253162603, iteration: 144571
loss: 1.0232658386230469,grad_norm: 0.9823820162052512, iteration: 144572
loss: 0.9543679356575012,grad_norm: 0.9999990540204697, iteration: 144573
loss: 1.0206096172332764,grad_norm: 0.999999014225055, iteration: 144574
loss: 0.9823858737945557,grad_norm: 0.9999989963870404, iteration: 144575
loss: 1.0495630502700806,grad_norm: 0.9999998170268551, iteration: 144576
loss: 0.979836642742157,grad_norm: 0.9999993373606174, iteration: 144577
loss: 1.040502667427063,grad_norm: 0.9999989715933367, iteration: 144578
loss: 0.9845775961875916,grad_norm: 0.8974523477331273, iteration: 144579
loss: 0.9880069494247437,grad_norm: 0.9999991642762237, iteration: 144580
loss: 1.0230517387390137,grad_norm: 0.999999340101681, iteration: 144581
loss: 0.9844442009925842,grad_norm: 0.9999990069800515, iteration: 144582
loss: 1.0829334259033203,grad_norm: 0.9999999279426145, iteration: 144583
loss: 0.9908339977264404,grad_norm: 0.9999992065884388, iteration: 144584
loss: 0.9975286722183228,grad_norm: 0.9999992677048021, iteration: 144585
loss: 0.9915347099304199,grad_norm: 0.8405607758546226, iteration: 144586
loss: 1.0204005241394043,grad_norm: 0.9999991275321483, iteration: 144587
loss: 1.001718521118164,grad_norm: 0.9607229032423072, iteration: 144588
loss: 1.0228546857833862,grad_norm: 0.9999991398732306, iteration: 144589
loss: 1.0083107948303223,grad_norm: 0.9999991292120348, iteration: 144590
loss: 1.030966877937317,grad_norm: 0.9401451323464247, iteration: 144591
loss: 0.9942915439605713,grad_norm: 0.9999993061835132, iteration: 144592
loss: 0.9618899822235107,grad_norm: 0.9453903956307538, iteration: 144593
loss: 1.0055776834487915,grad_norm: 0.9999998627521528, iteration: 144594
loss: 1.0886121988296509,grad_norm: 0.9256289075926353, iteration: 144595
loss: 0.9737856388092041,grad_norm: 0.9202688843598583, iteration: 144596
loss: 1.0151183605194092,grad_norm: 0.9409694024468079, iteration: 144597
loss: 0.9998844265937805,grad_norm: 0.9999991355799962, iteration: 144598
loss: 1.069390892982483,grad_norm: 0.9999991361604017, iteration: 144599
loss: 0.9752609133720398,grad_norm: 0.9999995594717489, iteration: 144600
loss: 0.9768372178077698,grad_norm: 0.9747345922570974, iteration: 144601
loss: 0.972062885761261,grad_norm: 0.904884748482442, iteration: 144602
loss: 1.0056697130203247,grad_norm: 0.9999992988931902, iteration: 144603
loss: 0.9910991787910461,grad_norm: 0.9214347305953161, iteration: 144604
loss: 0.9819151759147644,grad_norm: 0.903261897574607, iteration: 144605
loss: 1.0297589302062988,grad_norm: 0.976267449659328, iteration: 144606
loss: 0.9922304749488831,grad_norm: 0.8870966417538103, iteration: 144607
loss: 0.9827092289924622,grad_norm: 0.999999065591215, iteration: 144608
loss: 1.014917254447937,grad_norm: 0.9569142684439417, iteration: 144609
loss: 0.9336786270141602,grad_norm: 0.9999992083925785, iteration: 144610
loss: 1.0168750286102295,grad_norm: 0.9999991299470454, iteration: 144611
loss: 0.9985518455505371,grad_norm: 0.9999990227716784, iteration: 144612
loss: 1.0108602046966553,grad_norm: 0.9153212122425004, iteration: 144613
loss: 1.0082401037216187,grad_norm: 0.9999990225890711, iteration: 144614
loss: 0.9759624600410461,grad_norm: 0.9711267579728524, iteration: 144615
loss: 1.0000046491622925,grad_norm: 0.999999130283704, iteration: 144616
loss: 1.1010072231292725,grad_norm: 0.9999992346201244, iteration: 144617
loss: 0.9852257966995239,grad_norm: 0.9982530551047375, iteration: 144618
loss: 1.06704843044281,grad_norm: 0.9999996312745201, iteration: 144619
loss: 1.0023930072784424,grad_norm: 0.9456685963881968, iteration: 144620
loss: 0.9833651185035706,grad_norm: 0.9999989742773675, iteration: 144621
loss: 0.9967811107635498,grad_norm: 0.9999993258473596, iteration: 144622
loss: 1.0048370361328125,grad_norm: 0.9999993093805974, iteration: 144623
loss: 1.011649489402771,grad_norm: 0.9750675902756848, iteration: 144624
loss: 1.0270072221755981,grad_norm: 0.8618950590999113, iteration: 144625
loss: 0.9881333112716675,grad_norm: 0.9999990687540319, iteration: 144626
loss: 0.9875653386116028,grad_norm: 0.9999992562962772, iteration: 144627
loss: 0.9768316149711609,grad_norm: 0.9999990883454212, iteration: 144628
loss: 1.0373966693878174,grad_norm: 0.9999995630945097, iteration: 144629
loss: 1.0020482540130615,grad_norm: 0.9999989436359212, iteration: 144630
loss: 1.0227891206741333,grad_norm: 0.9999990557095996, iteration: 144631
loss: 0.9453737139701843,grad_norm: 0.9999992499328877, iteration: 144632
loss: 0.9756774306297302,grad_norm: 0.9999991381115814, iteration: 144633
loss: 0.990453839302063,grad_norm: 0.999999118911424, iteration: 144634
loss: 0.99550861120224,grad_norm: 0.9271031176489326, iteration: 144635
loss: 0.9842007160186768,grad_norm: 0.9999990738946417, iteration: 144636
loss: 1.022399663925171,grad_norm: 0.9955096075981033, iteration: 144637
loss: 0.9473070502281189,grad_norm: 0.9999990787612293, iteration: 144638
loss: 0.997782289981842,grad_norm: 0.9576186476768701, iteration: 144639
loss: 0.9916073083877563,grad_norm: 0.97994145564634, iteration: 144640
loss: 0.9975517988204956,grad_norm: 0.9999989254872572, iteration: 144641
loss: 0.9654116630554199,grad_norm: 0.852742571766071, iteration: 144642
loss: 1.0490741729736328,grad_norm: 0.9999992887466328, iteration: 144643
loss: 1.0354067087173462,grad_norm: 0.9999999327483697, iteration: 144644
loss: 0.9732589721679688,grad_norm: 0.9999991702397986, iteration: 144645
loss: 0.9907704591751099,grad_norm: 0.9999992501214192, iteration: 144646
loss: 1.0062311887741089,grad_norm: 0.9999994043015326, iteration: 144647
loss: 1.0434447526931763,grad_norm: 0.9999990964906862, iteration: 144648
loss: 1.0071860551834106,grad_norm: 0.8362060976982479, iteration: 144649
loss: 1.0254474878311157,grad_norm: 0.9264168817237329, iteration: 144650
loss: 1.0037825107574463,grad_norm: 0.9029914267112343, iteration: 144651
loss: 0.994049608707428,grad_norm: 0.9987878678394788, iteration: 144652
loss: 1.025686502456665,grad_norm: 0.8816044802135843, iteration: 144653
loss: 1.0805144309997559,grad_norm: 0.9999991743364253, iteration: 144654
loss: 1.0214550495147705,grad_norm: 0.9999991075459059, iteration: 144655
loss: 0.9349563717842102,grad_norm: 0.9031653648748588, iteration: 144656
loss: 0.9619596600532532,grad_norm: 0.9999990595755741, iteration: 144657
loss: 1.0031980276107788,grad_norm: 0.9999990410883267, iteration: 144658
loss: 1.0013030767440796,grad_norm: 0.9999992019447889, iteration: 144659
loss: 0.971743106842041,grad_norm: 0.9999989996177115, iteration: 144660
loss: 1.0460671186447144,grad_norm: 0.9999990076657606, iteration: 144661
loss: 1.001561164855957,grad_norm: 0.9999990767589437, iteration: 144662
loss: 0.9905453324317932,grad_norm: 0.9999990876359763, iteration: 144663
loss: 1.042279601097107,grad_norm: 0.9999989364851953, iteration: 144664
loss: 0.9974314570426941,grad_norm: 0.9999991274362307, iteration: 144665
loss: 1.0053966045379639,grad_norm: 0.9999992805678425, iteration: 144666
loss: 1.055082082748413,grad_norm: 0.9999992013146798, iteration: 144667
loss: 1.0266190767288208,grad_norm: 0.9999994023050369, iteration: 144668
loss: 1.0081806182861328,grad_norm: 0.8747496591732946, iteration: 144669
loss: 1.0130468606948853,grad_norm: 0.9999991646557795, iteration: 144670
loss: 1.0046826601028442,grad_norm: 0.9966199825876623, iteration: 144671
loss: 0.9871122241020203,grad_norm: 0.9999992543789343, iteration: 144672
loss: 1.0337430238723755,grad_norm: 0.9999990566784378, iteration: 144673
loss: 0.985747218132019,grad_norm: 0.9999992546706653, iteration: 144674
loss: 1.0283942222595215,grad_norm: 0.9999991991243746, iteration: 144675
loss: 0.9939121603965759,grad_norm: 0.8531751751573048, iteration: 144676
loss: 0.9851703643798828,grad_norm: 0.999999056758728, iteration: 144677
loss: 0.9698606729507446,grad_norm: 0.9406829157920429, iteration: 144678
loss: 1.1200453042984009,grad_norm: 0.9999993718299974, iteration: 144679
loss: 1.0199739933013916,grad_norm: 0.9999991931851604, iteration: 144680
loss: 1.0080803632736206,grad_norm: 0.9689168074931276, iteration: 144681
loss: 0.987248957157135,grad_norm: 0.9999990521017871, iteration: 144682
loss: 0.9936316609382629,grad_norm: 0.9494888423034253, iteration: 144683
loss: 1.0291837453842163,grad_norm: 0.9999990730260232, iteration: 144684
loss: 0.9638878107070923,grad_norm: 0.9910581445269958, iteration: 144685
loss: 0.9914414882659912,grad_norm: 0.9645371583430319, iteration: 144686
loss: 0.987830400466919,grad_norm: 0.9685132642936, iteration: 144687
loss: 0.9633836150169373,grad_norm: 0.9999992382572996, iteration: 144688
loss: 1.0006587505340576,grad_norm: 0.9492859254938584, iteration: 144689
loss: 1.0031923055648804,grad_norm: 0.9941897482756707, iteration: 144690
loss: 0.9821110367774963,grad_norm: 0.9816587891408296, iteration: 144691
loss: 1.043074607849121,grad_norm: 0.9999996119703318, iteration: 144692
loss: 0.9950070381164551,grad_norm: 0.855220774888692, iteration: 144693
loss: 1.0090858936309814,grad_norm: 0.9999992572609476, iteration: 144694
loss: 0.9847683310508728,grad_norm: 0.9999991157141731, iteration: 144695
loss: 1.0144267082214355,grad_norm: 0.9999990786074936, iteration: 144696
loss: 0.9680641293525696,grad_norm: 0.9914495000595847, iteration: 144697
loss: 0.9913131594657898,grad_norm: 0.9999991110208579, iteration: 144698
loss: 0.982694685459137,grad_norm: 0.9645104734625858, iteration: 144699
loss: 0.9898855090141296,grad_norm: 0.949671437799444, iteration: 144700
loss: 1.032791018486023,grad_norm: 0.9999991228635924, iteration: 144701
loss: 0.9888706803321838,grad_norm: 0.9147718779102624, iteration: 144702
loss: 1.0100038051605225,grad_norm: 0.999999186962172, iteration: 144703
loss: 0.9903864860534668,grad_norm: 0.861665840891966, iteration: 144704
loss: 0.9759207367897034,grad_norm: 0.8584877690543845, iteration: 144705
loss: 1.1958565711975098,grad_norm: 0.9999998566003967, iteration: 144706
loss: 1.0603440999984741,grad_norm: 0.999999512959627, iteration: 144707
loss: 1.010226845741272,grad_norm: 0.9441081895651839, iteration: 144708
loss: 1.0230382680892944,grad_norm: 0.948483756346657, iteration: 144709
loss: 1.0366626977920532,grad_norm: 0.999999168962643, iteration: 144710
loss: 1.0050122737884521,grad_norm: 0.9271660685392847, iteration: 144711
loss: 1.0103435516357422,grad_norm: 0.9591859603583198, iteration: 144712
loss: 1.0107249021530151,grad_norm: 0.9066080194964068, iteration: 144713
loss: 1.0026432275772095,grad_norm: 0.999998977523524, iteration: 144714
loss: 1.0130068063735962,grad_norm: 0.9999992126917603, iteration: 144715
loss: 1.014800786972046,grad_norm: 0.9999991817819235, iteration: 144716
loss: 0.9576554298400879,grad_norm: 0.9999991505724077, iteration: 144717
loss: 0.9882031083106995,grad_norm: 0.99999890353625, iteration: 144718
loss: 1.005092978477478,grad_norm: 0.9999994163323237, iteration: 144719
loss: 1.0293079614639282,grad_norm: 0.9999994497542666, iteration: 144720
loss: 1.0012571811676025,grad_norm: 0.9999991611663407, iteration: 144721
loss: 0.9951571226119995,grad_norm: 0.9999991323591059, iteration: 144722
loss: 0.9720797538757324,grad_norm: 0.9331904720479759, iteration: 144723
loss: 1.010062575340271,grad_norm: 0.9999990619214549, iteration: 144724
loss: 0.9978573322296143,grad_norm: 0.9999991839067844, iteration: 144725
loss: 1.0166666507720947,grad_norm: 0.9324664285497267, iteration: 144726
loss: 1.0112463235855103,grad_norm: 0.8759282743710767, iteration: 144727
loss: 1.0154352188110352,grad_norm: 0.9999990547187888, iteration: 144728
loss: 1.0437594652175903,grad_norm: 0.9999991841566894, iteration: 144729
loss: 1.0175868272781372,grad_norm: 0.999999096771229, iteration: 144730
loss: 1.0259428024291992,grad_norm: 0.9999989293641579, iteration: 144731
loss: 1.0091590881347656,grad_norm: 0.9999991106040474, iteration: 144732
loss: 1.0187464952468872,grad_norm: 0.9999991343107647, iteration: 144733
loss: 1.00723135471344,grad_norm: 0.9999992308329915, iteration: 144734
loss: 1.0082837343215942,grad_norm: 0.999999115673197, iteration: 144735
loss: 0.941921591758728,grad_norm: 0.9999991042195698, iteration: 144736
loss: 0.9922229647636414,grad_norm: 0.8890957689194339, iteration: 144737
loss: 0.9982669949531555,grad_norm: 0.9999991655271387, iteration: 144738
loss: 1.0154330730438232,grad_norm: 0.9999989815528743, iteration: 144739
loss: 0.957435667514801,grad_norm: 0.9999989827369792, iteration: 144740
loss: 1.0099133253097534,grad_norm: 0.9170572787329555, iteration: 144741
loss: 1.015844702720642,grad_norm: 0.9999992196842769, iteration: 144742
loss: 0.9934378266334534,grad_norm: 0.9039096840682619, iteration: 144743
loss: 0.9797353744506836,grad_norm: 0.976377400456461, iteration: 144744
loss: 1.001901388168335,grad_norm: 0.999998928761562, iteration: 144745
loss: 1.0167791843414307,grad_norm: 0.8694476229024918, iteration: 144746
loss: 1.045658826828003,grad_norm: 0.99999927900738, iteration: 144747
loss: 1.0147117376327515,grad_norm: 0.9999990893475801, iteration: 144748
loss: 1.020058274269104,grad_norm: 0.9999993402386923, iteration: 144749
loss: 1.0117549896240234,grad_norm: 0.8344210459063401, iteration: 144750
loss: 1.0595096349716187,grad_norm: 0.9999995223341344, iteration: 144751
loss: 0.9754976630210876,grad_norm: 0.9979149109262637, iteration: 144752
loss: 0.9719480872154236,grad_norm: 0.999999205924708, iteration: 144753
loss: 0.9993546009063721,grad_norm: 0.9527809838535397, iteration: 144754
loss: 1.0142419338226318,grad_norm: 0.9999990246699924, iteration: 144755
loss: 1.000220537185669,grad_norm: 0.8427958843744607, iteration: 144756
loss: 0.999333918094635,grad_norm: 0.999999297373297, iteration: 144757
loss: 1.042958378791809,grad_norm: 0.9999990903950421, iteration: 144758
loss: 0.9583734273910522,grad_norm: 0.9999990357693572, iteration: 144759
loss: 0.9861487150192261,grad_norm: 0.9999991389285995, iteration: 144760
loss: 0.9939175248146057,grad_norm: 0.9059111379820733, iteration: 144761
loss: 0.992074191570282,grad_norm: 0.8637150625291685, iteration: 144762
loss: 0.9846176505088806,grad_norm: 0.9744198924202289, iteration: 144763
loss: 1.0071969032287598,grad_norm: 0.9999991469550982, iteration: 144764
loss: 1.0236533880233765,grad_norm: 0.9999990174264636, iteration: 144765
loss: 0.9688212275505066,grad_norm: 0.966640058933503, iteration: 144766
loss: 1.0252141952514648,grad_norm: 0.9252044078960621, iteration: 144767
loss: 1.0285053253173828,grad_norm: 0.9999990250890051, iteration: 144768
loss: 0.9962357878684998,grad_norm: 0.9999993193976718, iteration: 144769
loss: 1.245202898979187,grad_norm: 0.9999995867204268, iteration: 144770
loss: 1.0917844772338867,grad_norm: 0.9999992161298213, iteration: 144771
loss: 1.0208841562271118,grad_norm: 0.9999990169736845, iteration: 144772
loss: 1.0207741260528564,grad_norm: 0.9348377884203726, iteration: 144773
loss: 0.9954811930656433,grad_norm: 0.999999108250697, iteration: 144774
loss: 0.9971975088119507,grad_norm: 0.8785493375165601, iteration: 144775
loss: 0.963861882686615,grad_norm: 0.9165891183900223, iteration: 144776
loss: 0.9932222366333008,grad_norm: 0.9892560459874076, iteration: 144777
loss: 0.9950588941574097,grad_norm: 0.9999991336621138, iteration: 144778
loss: 0.9610714912414551,grad_norm: 0.8605556445090077, iteration: 144779
loss: 1.112526535987854,grad_norm: 0.9999996493896363, iteration: 144780
loss: 0.9909873008728027,grad_norm: 0.9999991127323944, iteration: 144781
loss: 0.98056560754776,grad_norm: 0.999998995689155, iteration: 144782
loss: 1.060895562171936,grad_norm: 0.9999995038381048, iteration: 144783
loss: 1.0426266193389893,grad_norm: 0.9999994809941561, iteration: 144784
loss: 1.028529405593872,grad_norm: 0.9999991418170108, iteration: 144785
loss: 1.221374273300171,grad_norm: 0.9999998155646507, iteration: 144786
loss: 0.9947853684425354,grad_norm: 0.8403230984552568, iteration: 144787
loss: 1.0171020030975342,grad_norm: 0.9999990152902223, iteration: 144788
loss: 1.0194164514541626,grad_norm: 0.999999207892172, iteration: 144789
loss: 1.0209637880325317,grad_norm: 0.999999230978092, iteration: 144790
loss: 1.04469895362854,grad_norm: 0.9999990456253397, iteration: 144791
loss: 0.9626399278640747,grad_norm: 0.9999991268595813, iteration: 144792
loss: 1.2651978731155396,grad_norm: 0.9999992350816762, iteration: 144793
loss: 0.9921796917915344,grad_norm: 0.9999997963119536, iteration: 144794
loss: 0.9982502460479736,grad_norm: 0.9999995573021483, iteration: 144795
loss: 1.010040044784546,grad_norm: 0.9999990450144224, iteration: 144796
loss: 1.0054513216018677,grad_norm: 0.9999991928534848, iteration: 144797
loss: 0.9764177799224854,grad_norm: 0.9798705869056744, iteration: 144798
loss: 0.9783003926277161,grad_norm: 0.9999990232180715, iteration: 144799
loss: 0.9959270358085632,grad_norm: 0.9562587975308966, iteration: 144800
loss: 0.9663102626800537,grad_norm: 0.999999121273601, iteration: 144801
loss: 1.1471636295318604,grad_norm: 0.9999996806128351, iteration: 144802
loss: 1.1428513526916504,grad_norm: 0.999999624619228, iteration: 144803
loss: 1.2125401496887207,grad_norm: 0.9999995399228926, iteration: 144804
loss: 1.108333945274353,grad_norm: 0.9999990320345918, iteration: 144805
loss: 1.0140752792358398,grad_norm: 0.9999991722234907, iteration: 144806
loss: 1.1333874464035034,grad_norm: 1.0000000907803241, iteration: 144807
loss: 1.009381890296936,grad_norm: 0.9999992178084308, iteration: 144808
loss: 1.099626064300537,grad_norm: 0.9999996670108057, iteration: 144809
loss: 1.0882714986801147,grad_norm: 0.9999991715636825, iteration: 144810
loss: 1.1014983654022217,grad_norm: 0.9922980040598365, iteration: 144811
loss: 1.035585641860962,grad_norm: 0.9882615182823725, iteration: 144812
loss: 0.9807772636413574,grad_norm: 0.9999991022484169, iteration: 144813
loss: 0.9958832263946533,grad_norm: 0.9999990306448039, iteration: 144814
loss: 1.0548927783966064,grad_norm: 0.9999995036608891, iteration: 144815
loss: 1.21937894821167,grad_norm: 0.9999998500674022, iteration: 144816
loss: 1.035174012184143,grad_norm: 0.9999990837463143, iteration: 144817
loss: 1.1835383176803589,grad_norm: 0.9999998836427678, iteration: 144818
loss: 0.9844210147857666,grad_norm: 0.999999161401587, iteration: 144819
loss: 1.180093765258789,grad_norm: 1.000000022268539, iteration: 144820
loss: 1.2177696228027344,grad_norm: 0.9999997653046881, iteration: 144821
loss: 1.2036712169647217,grad_norm: 0.9999999084996136, iteration: 144822
loss: 1.190679669380188,grad_norm: 0.9999997424664806, iteration: 144823
loss: 1.1366561651229858,grad_norm: 0.9999998731318079, iteration: 144824
loss: 1.1306209564208984,grad_norm: 0.9999998575995167, iteration: 144825
loss: 1.3500711917877197,grad_norm: 0.99999983930351, iteration: 144826
loss: 1.173655390739441,grad_norm: 0.9999991995960502, iteration: 144827
loss: 1.2584484815597534,grad_norm: 0.9999998491396096, iteration: 144828
loss: 1.128849744796753,grad_norm: 0.9999991222996095, iteration: 144829
loss: 1.1108506917953491,grad_norm: 0.9999996956781332, iteration: 144830
loss: 1.5717835426330566,grad_norm: 0.9999998735232564, iteration: 144831
loss: 1.1868952512741089,grad_norm: 0.9999999069737281, iteration: 144832
loss: 0.968997597694397,grad_norm: 0.9999993812521913, iteration: 144833
loss: 1.779227614402771,grad_norm: 0.9999999630430746, iteration: 144834
loss: 1.0205154418945312,grad_norm: 0.9999992313846705, iteration: 144835
loss: 1.2707571983337402,grad_norm: 0.9999998840953016, iteration: 144836
loss: 1.255151391029358,grad_norm: 0.999999912631866, iteration: 144837
loss: 1.1033011674880981,grad_norm: 0.999999968083779, iteration: 144838
loss: 1.263516902923584,grad_norm: 0.9999999028366287, iteration: 144839
loss: 1.033218264579773,grad_norm: 0.9999991506397419, iteration: 144840
loss: 1.2087823152542114,grad_norm: 1.0000000834986422, iteration: 144841
loss: 1.2910919189453125,grad_norm: 0.9999999039335546, iteration: 144842
loss: 1.2146961688995361,grad_norm: 0.9999998023119906, iteration: 144843
loss: 1.1479172706604004,grad_norm: 0.9999993949260937, iteration: 144844
loss: 1.3123902082443237,grad_norm: 1.0000000975455494, iteration: 144845
loss: 1.3484636545181274,grad_norm: 0.9999998433187209, iteration: 144846
loss: 1.1594773530960083,grad_norm: 0.9999996613452155, iteration: 144847
loss: 1.2471956014633179,grad_norm: 0.9999998464613221, iteration: 144848
loss: 1.2514656782150269,grad_norm: 0.9999999138514621, iteration: 144849
loss: 1.197375774383545,grad_norm: 0.9999997334135695, iteration: 144850
loss: 1.0939139127731323,grad_norm: 0.9999997850201029, iteration: 144851
loss: 1.1340855360031128,grad_norm: 0.999999991626311, iteration: 144852
loss: 1.1351135969161987,grad_norm: 0.999999186923793, iteration: 144853
loss: 1.0976436138153076,grad_norm: 0.9999995169156151, iteration: 144854
loss: 0.9897485375404358,grad_norm: 0.9999993461505858, iteration: 144855
loss: 1.264163851737976,grad_norm: 1.0000000243782274, iteration: 144856
loss: 1.201117992401123,grad_norm: 0.9999997387989078, iteration: 144857
loss: 1.052344560623169,grad_norm: 0.9999997958269942, iteration: 144858
loss: 1.0759782791137695,grad_norm: 0.9999998768405867, iteration: 144859
loss: 1.0155770778656006,grad_norm: 0.9999991374711191, iteration: 144860
loss: 1.011509656906128,grad_norm: 0.9999989988987925, iteration: 144861
loss: 0.9892287254333496,grad_norm: 0.9999991419892937, iteration: 144862
loss: 1.0416969060897827,grad_norm: 0.9999991220866944, iteration: 144863
loss: 0.9774417877197266,grad_norm: 0.9999990801073825, iteration: 144864
loss: 1.0470986366271973,grad_norm: 0.9999995571388276, iteration: 144865
loss: 1.107412576675415,grad_norm: 0.9999997920018359, iteration: 144866
loss: 1.0494754314422607,grad_norm: 0.999999142865267, iteration: 144867
loss: 1.0059705972671509,grad_norm: 0.9999995806936416, iteration: 144868
loss: 1.0040117502212524,grad_norm: 0.9999994066996053, iteration: 144869
loss: 1.07748281955719,grad_norm: 0.9999998955249976, iteration: 144870
loss: 1.0449812412261963,grad_norm: 0.9999991142407973, iteration: 144871
loss: 1.0105215311050415,grad_norm: 0.931430641526324, iteration: 144872
loss: 0.9978397488594055,grad_norm: 0.9999991313959226, iteration: 144873
loss: 0.9735156893730164,grad_norm: 0.9999997183139717, iteration: 144874
loss: 1.071408987045288,grad_norm: 0.9999992646908595, iteration: 144875
loss: 0.9982792139053345,grad_norm: 0.9999991126353418, iteration: 144876
loss: 0.988890528678894,grad_norm: 0.9765690470307367, iteration: 144877
loss: 1.0385581254959106,grad_norm: 0.9999991066608858, iteration: 144878
loss: 1.041538953781128,grad_norm: 0.999999038001759, iteration: 144879
loss: 0.9653375148773193,grad_norm: 0.9700789620600729, iteration: 144880
loss: 0.9964590668678284,grad_norm: 0.9999991622485561, iteration: 144881
loss: 1.2611756324768066,grad_norm: 0.9999999504806651, iteration: 144882
loss: 1.0189191102981567,grad_norm: 0.9999992602926933, iteration: 144883
loss: 0.9766600131988525,grad_norm: 0.9999992309557696, iteration: 144884
loss: 1.0493360757827759,grad_norm: 0.9966858324120516, iteration: 144885
loss: 1.0062239170074463,grad_norm: 0.9999991125329172, iteration: 144886
loss: 1.0578725337982178,grad_norm: 0.982922141724091, iteration: 144887
loss: 0.9711686968803406,grad_norm: 0.9679753134786006, iteration: 144888
loss: 0.9846218228340149,grad_norm: 0.9271041916770693, iteration: 144889
loss: 1.0194363594055176,grad_norm: 0.9999991164836499, iteration: 144890
loss: 1.0627952814102173,grad_norm: 0.9999999781875036, iteration: 144891
loss: 1.0756150484085083,grad_norm: 0.9999995927386776, iteration: 144892
loss: 0.9969598650932312,grad_norm: 0.9999992991743134, iteration: 144893
loss: 1.002123236656189,grad_norm: 0.9680031275194109, iteration: 144894
loss: 1.0001933574676514,grad_norm: 0.99999911262606, iteration: 144895
loss: 1.0060243606567383,grad_norm: 0.9740859724934433, iteration: 144896
loss: 0.9998785257339478,grad_norm: 0.9999990147618653, iteration: 144897
loss: 1.017229437828064,grad_norm: 0.8904808245467823, iteration: 144898
loss: 1.0063952207565308,grad_norm: 0.9999992443160571, iteration: 144899
loss: 0.9813385009765625,grad_norm: 0.999999021922283, iteration: 144900
loss: 0.9982604384422302,grad_norm: 0.8368627784167689, iteration: 144901
loss: 0.9975704550743103,grad_norm: 0.9999992284480357, iteration: 144902
loss: 0.9970900416374207,grad_norm: 0.9999992467955138, iteration: 144903
loss: 0.9916011691093445,grad_norm: 0.9580677940671528, iteration: 144904
loss: 1.0080842971801758,grad_norm: 0.9999994767127829, iteration: 144905
loss: 0.9983603954315186,grad_norm: 0.9381117248173907, iteration: 144906
loss: 1.0050424337387085,grad_norm: 0.9582156961164088, iteration: 144907
loss: 0.991637110710144,grad_norm: 0.9943261714882563, iteration: 144908
loss: 1.009671926498413,grad_norm: 0.8814037849000566, iteration: 144909
loss: 0.9858286380767822,grad_norm: 0.9999990143032664, iteration: 144910
loss: 0.9797223806381226,grad_norm: 0.9999993447166854, iteration: 144911
loss: 0.9859510660171509,grad_norm: 0.9999990882818596, iteration: 144912
loss: 1.0637075901031494,grad_norm: 0.9999993566171197, iteration: 144913
loss: 0.9657993316650391,grad_norm: 0.9999991842653964, iteration: 144914
loss: 1.0243072509765625,grad_norm: 0.9999992010579687, iteration: 144915
loss: 1.0738654136657715,grad_norm: 0.9999994714845976, iteration: 144916
loss: 0.999129593372345,grad_norm: 0.989609076339642, iteration: 144917
loss: 1.0137441158294678,grad_norm: 0.9999995881984354, iteration: 144918
loss: 1.0429719686508179,grad_norm: 0.9999992038651175, iteration: 144919
loss: 0.9994154572486877,grad_norm: 0.9999991512760752, iteration: 144920
loss: 1.0163424015045166,grad_norm: 0.99999914367955, iteration: 144921
loss: 1.0063910484313965,grad_norm: 0.999999536815558, iteration: 144922
loss: 1.01264488697052,grad_norm: 0.9942311725304491, iteration: 144923
loss: 0.9817743897438049,grad_norm: 0.9999990752463754, iteration: 144924
loss: 1.0067130327224731,grad_norm: 0.9999998784110069, iteration: 144925
loss: 1.1062504053115845,grad_norm: 0.9999992508176014, iteration: 144926
loss: 1.0078176259994507,grad_norm: 0.9999992876806045, iteration: 144927
loss: 1.0118999481201172,grad_norm: 0.9999989682598726, iteration: 144928
loss: 1.024964690208435,grad_norm: 0.999999322147468, iteration: 144929
loss: 0.9726476073265076,grad_norm: 0.9999991215648047, iteration: 144930
loss: 1.043182611465454,grad_norm: 0.9999990953095174, iteration: 144931
loss: 0.9740541577339172,grad_norm: 0.9999990844691158, iteration: 144932
loss: 1.0210493803024292,grad_norm: 0.9999989852022502, iteration: 144933
loss: 0.9603375792503357,grad_norm: 0.7842384532996219, iteration: 144934
loss: 1.0205610990524292,grad_norm: 0.9649696918848139, iteration: 144935
loss: 0.9936590194702148,grad_norm: 0.9999992481024077, iteration: 144936
loss: 1.0334175825119019,grad_norm: 0.9999989501147123, iteration: 144937
loss: 0.9940998554229736,grad_norm: 0.9979397389844937, iteration: 144938
loss: 1.011370062828064,grad_norm: 0.9999990555293741, iteration: 144939
loss: 1.0463860034942627,grad_norm: 0.9999991107960227, iteration: 144940
loss: 0.9919517040252686,grad_norm: 0.9775519083038252, iteration: 144941
loss: 1.0132725238800049,grad_norm: 0.9999990297006488, iteration: 144942
loss: 1.1028350591659546,grad_norm: 0.9999998971245618, iteration: 144943
loss: 1.0273919105529785,grad_norm: 0.999999158507638, iteration: 144944
loss: 0.9982684254646301,grad_norm: 0.9999998654036272, iteration: 144945
loss: 1.0021705627441406,grad_norm: 0.8870251063499762, iteration: 144946
loss: 1.020699143409729,grad_norm: 0.9626090213855317, iteration: 144947
loss: 0.9858325719833374,grad_norm: 0.9999989418802082, iteration: 144948
loss: 1.0271488428115845,grad_norm: 0.9999991154660661, iteration: 144949
loss: 0.9638074040412903,grad_norm: 0.9890344924536405, iteration: 144950
loss: 0.9934658408164978,grad_norm: 0.9999992054047063, iteration: 144951
loss: 0.9922344088554382,grad_norm: 0.9999991767183424, iteration: 144952
loss: 1.0186835527420044,grad_norm: 0.9999992449355497, iteration: 144953
loss: 1.007668137550354,grad_norm: 0.9999990444736923, iteration: 144954
loss: 1.032075047492981,grad_norm: 0.9723842732864817, iteration: 144955
loss: 0.9900287985801697,grad_norm: 0.8599839664261707, iteration: 144956
loss: 1.0042575597763062,grad_norm: 0.9999994488595888, iteration: 144957
loss: 1.0213677883148193,grad_norm: 0.9999991921363636, iteration: 144958
loss: 0.9813751578330994,grad_norm: 0.9866918152222768, iteration: 144959
loss: 0.9805742502212524,grad_norm: 0.9451828817439042, iteration: 144960
loss: 1.0036485195159912,grad_norm: 0.9999990309566105, iteration: 144961
loss: 0.9926614165306091,grad_norm: 0.9376622112377886, iteration: 144962
loss: 1.0073201656341553,grad_norm: 0.9999991317602639, iteration: 144963
loss: 1.0177111625671387,grad_norm: 0.9999993798725153, iteration: 144964
loss: 0.9986209273338318,grad_norm: 0.8835632525149049, iteration: 144965
loss: 1.0317195653915405,grad_norm: 0.9999994997850277, iteration: 144966
loss: 1.0137780904769897,grad_norm: 0.999998965811845, iteration: 144967
loss: 1.0050668716430664,grad_norm: 0.9572923535226, iteration: 144968
loss: 1.0397231578826904,grad_norm: 0.8797733445639987, iteration: 144969
loss: 0.986937940120697,grad_norm: 0.9999992895215908, iteration: 144970
loss: 1.0110883712768555,grad_norm: 0.9999991102977545, iteration: 144971
loss: 1.0099899768829346,grad_norm: 0.9999991144815602, iteration: 144972
loss: 0.9813945293426514,grad_norm: 0.9999991370449508, iteration: 144973
loss: 0.97824627161026,grad_norm: 0.999999108388874, iteration: 144974
loss: 0.9858930706977844,grad_norm: 0.999999017868329, iteration: 144975
loss: 1.10210120677948,grad_norm: 0.9999995709941384, iteration: 144976
loss: 0.9935777187347412,grad_norm: 0.9999991398062493, iteration: 144977
loss: 0.96586012840271,grad_norm: 0.8454875772162387, iteration: 144978
loss: 1.0137304067611694,grad_norm: 0.9628151400987303, iteration: 144979
loss: 1.0205568075180054,grad_norm: 0.9999990930964937, iteration: 144980
loss: 1.003708839416504,grad_norm: 0.861942669804327, iteration: 144981
loss: 1.0325065851211548,grad_norm: 0.982289241318754, iteration: 144982
loss: 0.9636201858520508,grad_norm: 0.9999992567110212, iteration: 144983
loss: 1.0115214586257935,grad_norm: 0.9999991810094887, iteration: 144984
loss: 1.0165553092956543,grad_norm: 0.9999991066401297, iteration: 144985
loss: 0.9775996208190918,grad_norm: 0.9999992999242587, iteration: 144986
loss: 0.9623596668243408,grad_norm: 0.9428948019947597, iteration: 144987
loss: 0.9922462701797485,grad_norm: 0.9999991532629338, iteration: 144988
loss: 0.9843032956123352,grad_norm: 0.8947875629625676, iteration: 144989
loss: 0.9584354162216187,grad_norm: 0.9072857620734688, iteration: 144990
loss: 0.9947168231010437,grad_norm: 0.999999108998162, iteration: 144991
loss: 1.0629384517669678,grad_norm: 0.9999994835280153, iteration: 144992
loss: 0.992806613445282,grad_norm: 0.9999991391388466, iteration: 144993
loss: 1.0262593030929565,grad_norm: 0.9999989530228941, iteration: 144994
loss: 0.9764693379402161,grad_norm: 0.9999991269803926, iteration: 144995
loss: 1.0073224306106567,grad_norm: 0.9999991729292284, iteration: 144996
loss: 0.9999173283576965,grad_norm: 0.999999140715965, iteration: 144997
loss: 1.010380506515503,grad_norm: 0.9999991200996817, iteration: 144998
loss: 1.0014257431030273,grad_norm: 0.999999245113726, iteration: 144999
loss: 1.0064955949783325,grad_norm: 0.8889136409863492, iteration: 145000
loss: 1.0512316226959229,grad_norm: 0.9999991739259183, iteration: 145001
loss: 1.0315916538238525,grad_norm: 0.9529330263459871, iteration: 145002
loss: 0.9790564775466919,grad_norm: 0.8301899935318032, iteration: 145003
loss: 1.008764624595642,grad_norm: 0.9275325854817107, iteration: 145004
loss: 1.0678191184997559,grad_norm: 0.999999083374314, iteration: 145005
loss: 0.9756481051445007,grad_norm: 0.9888071327747021, iteration: 145006
loss: 1.0147829055786133,grad_norm: 0.9999992591475557, iteration: 145007
loss: 1.0200233459472656,grad_norm: 0.9508827038528229, iteration: 145008
loss: 1.0372365713119507,grad_norm: 0.9999992688225143, iteration: 145009
loss: 1.1039482355117798,grad_norm: 0.9999989791467357, iteration: 145010
loss: 1.0186853408813477,grad_norm: 0.9999990070641537, iteration: 145011
loss: 1.0136191844940186,grad_norm: 0.976971136189756, iteration: 145012
loss: 1.0247538089752197,grad_norm: 0.9999990553161937, iteration: 145013
loss: 0.9955323934555054,grad_norm: 0.9999991927480769, iteration: 145014
loss: 0.9958791136741638,grad_norm: 0.9478054688806489, iteration: 145015
loss: 0.9788280129432678,grad_norm: 0.9999990549101152, iteration: 145016
loss: 0.9922014474868774,grad_norm: 0.9999992861248231, iteration: 145017
loss: 1.0214645862579346,grad_norm: 0.9999991484487385, iteration: 145018
loss: 1.0000689029693604,grad_norm: 0.9999997296677368, iteration: 145019
loss: 1.032150149345398,grad_norm: 0.999999166966992, iteration: 145020
loss: 1.0258684158325195,grad_norm: 0.9999991219741113, iteration: 145021
loss: 1.0821112394332886,grad_norm: 0.9999994380681543, iteration: 145022
loss: 1.0496762990951538,grad_norm: 0.9999990245590459, iteration: 145023
loss: 0.9938876032829285,grad_norm: 0.9999991551886452, iteration: 145024
loss: 1.0216102600097656,grad_norm: 0.9962515130998426, iteration: 145025
loss: 1.0416924953460693,grad_norm: 0.9999990947886961, iteration: 145026
loss: 0.979202926158905,grad_norm: 0.9999992602464789, iteration: 145027
loss: 1.0558327436447144,grad_norm: 0.9999991939968295, iteration: 145028
loss: 0.9891902208328247,grad_norm: 0.9999989974440171, iteration: 145029
loss: 0.9954773783683777,grad_norm: 0.9999991749238416, iteration: 145030
loss: 1.0287368297576904,grad_norm: 0.999999164073093, iteration: 145031
loss: 0.983646810054779,grad_norm: 0.9999989892480781, iteration: 145032
loss: 1.0171842575073242,grad_norm: 0.9470763589783506, iteration: 145033
loss: 1.3687528371810913,grad_norm: 0.9999998238192647, iteration: 145034
loss: 0.9634243845939636,grad_norm: 0.9999989897228553, iteration: 145035
loss: 1.0086253881454468,grad_norm: 0.9443724713487117, iteration: 145036
loss: 0.9797986149787903,grad_norm: 0.9999998720996038, iteration: 145037
loss: 1.0349931716918945,grad_norm: 0.9999994172939197, iteration: 145038
loss: 0.9761788845062256,grad_norm: 0.9960726942725351, iteration: 145039
loss: 1.0784121751785278,grad_norm: 0.9999992547551111, iteration: 145040
loss: 0.9829002022743225,grad_norm: 0.992121493438981, iteration: 145041
loss: 1.0025205612182617,grad_norm: 0.9860572975403246, iteration: 145042
loss: 1.0172919034957886,grad_norm: 0.9999990531434939, iteration: 145043
loss: 1.0067198276519775,grad_norm: 0.9999991010081722, iteration: 145044
loss: 0.9955405592918396,grad_norm: 0.9999993037413868, iteration: 145045
loss: 0.9994848370552063,grad_norm: 0.8986286659210656, iteration: 145046
loss: 0.9747772812843323,grad_norm: 0.9490153244188896, iteration: 145047
loss: 1.0046287775039673,grad_norm: 0.8451815774143094, iteration: 145048
loss: 1.0374566316604614,grad_norm: 0.9999990916987744, iteration: 145049
loss: 1.0203219652175903,grad_norm: 0.999999145833634, iteration: 145050
loss: 1.0231274366378784,grad_norm: 0.9999999726166926, iteration: 145051
loss: 1.0254594087600708,grad_norm: 0.9999990893615072, iteration: 145052
loss: 1.0268117189407349,grad_norm: 0.9999991061496738, iteration: 145053
loss: 0.9984228610992432,grad_norm: 0.9999996857532003, iteration: 145054
loss: 1.00278639793396,grad_norm: 0.937141758316396, iteration: 145055
loss: 1.0278843641281128,grad_norm: 0.9999990283684205, iteration: 145056
loss: 0.9729495048522949,grad_norm: 0.9845393813453536, iteration: 145057
loss: 1.0014225244522095,grad_norm: 0.9999990941480433, iteration: 145058
loss: 0.9696893692016602,grad_norm: 0.8500950856572029, iteration: 145059
loss: 1.0092506408691406,grad_norm: 0.8804162171548534, iteration: 145060
loss: 1.0027633905410767,grad_norm: 0.9999990907429874, iteration: 145061
loss: 1.0134435892105103,grad_norm: 0.9643199830901125, iteration: 145062
loss: 1.0178804397583008,grad_norm: 0.9999991550665767, iteration: 145063
loss: 1.025343418121338,grad_norm: 0.9999998530534233, iteration: 145064
loss: 1.0191410779953003,grad_norm: 0.9478818095691158, iteration: 145065
loss: 0.9481596350669861,grad_norm: 0.9999992333489761, iteration: 145066
loss: 1.0177342891693115,grad_norm: 0.8711578909486176, iteration: 145067
loss: 0.9831783771514893,grad_norm: 0.9999991791088008, iteration: 145068
loss: 1.0170481204986572,grad_norm: 0.9999989799881308, iteration: 145069
loss: 0.961198091506958,grad_norm: 0.919975192664214, iteration: 145070
loss: 1.001702070236206,grad_norm: 0.9999992047177543, iteration: 145071
loss: 1.0995159149169922,grad_norm: 0.9999998203487552, iteration: 145072
loss: 0.9950116872787476,grad_norm: 0.9999989906132842, iteration: 145073
loss: 0.9991458654403687,grad_norm: 0.9999991860896512, iteration: 145074
loss: 1.0260543823242188,grad_norm: 0.9999993357908168, iteration: 145075
loss: 1.0053070783615112,grad_norm: 0.9999991469910036, iteration: 145076
loss: 1.045274019241333,grad_norm: 0.9999997183284468, iteration: 145077
loss: 1.129239559173584,grad_norm: 0.9999999485028805, iteration: 145078
loss: 0.9952897429466248,grad_norm: 0.9999990912757545, iteration: 145079
loss: 0.9815021753311157,grad_norm: 0.9999990664057155, iteration: 145080
loss: 1.0159252882003784,grad_norm: 0.9886344191243561, iteration: 145081
loss: 1.0508248805999756,grad_norm: 0.9329736997775998, iteration: 145082
loss: 1.00083589553833,grad_norm: 0.9999992363536417, iteration: 145083
loss: 0.9797328114509583,grad_norm: 0.8763599196473848, iteration: 145084
loss: 1.00593101978302,grad_norm: 0.9999994727969951, iteration: 145085
loss: 1.0028717517852783,grad_norm: 0.999999104174521, iteration: 145086
loss: 1.055843472480774,grad_norm: 1.0000000295041769, iteration: 145087
loss: 1.0064257383346558,grad_norm: 0.8403901599709903, iteration: 145088
loss: 0.9605929255485535,grad_norm: 0.9504599096078145, iteration: 145089
loss: 0.9624737501144409,grad_norm: 0.9999993606665246, iteration: 145090
loss: 1.113247275352478,grad_norm: 0.9999993691987155, iteration: 145091
loss: 1.0403128862380981,grad_norm: 0.9877862821627691, iteration: 145092
loss: 0.9566825032234192,grad_norm: 0.9999989149988925, iteration: 145093
loss: 1.001802682876587,grad_norm: 0.9999990789072586, iteration: 145094
loss: 0.9429198503494263,grad_norm: 0.9999992462874752, iteration: 145095
loss: 1.0113344192504883,grad_norm: 0.9323142400544838, iteration: 145096
loss: 0.9958279132843018,grad_norm: 0.8500412769544121, iteration: 145097
loss: 1.0287139415740967,grad_norm: 0.9999992642676456, iteration: 145098
loss: 1.1101419925689697,grad_norm: 0.9999991711946504, iteration: 145099
loss: 1.0480502843856812,grad_norm: 0.9999998049150838, iteration: 145100
loss: 1.0018506050109863,grad_norm: 0.9599477370709196, iteration: 145101
loss: 0.9881156086921692,grad_norm: 0.9999991474960492, iteration: 145102
loss: 0.9790568947792053,grad_norm: 0.999999249400527, iteration: 145103
loss: 1.0096033811569214,grad_norm: 0.9999992396821377, iteration: 145104
loss: 1.007760763168335,grad_norm: 0.9999992392772686, iteration: 145105
loss: 0.974481463432312,grad_norm: 0.9999991003909476, iteration: 145106
loss: 1.0145115852355957,grad_norm: 0.999999835753572, iteration: 145107
loss: 0.9964501261711121,grad_norm: 0.9757081601843836, iteration: 145108
loss: 0.9536062479019165,grad_norm: 0.9109717151156604, iteration: 145109
loss: 0.9901973009109497,grad_norm: 0.9999990485153198, iteration: 145110
loss: 1.0459940433502197,grad_norm: 0.9999995648699073, iteration: 145111
loss: 1.0255303382873535,grad_norm: 0.9999990707768716, iteration: 145112
loss: 1.030527949333191,grad_norm: 0.9999991391189249, iteration: 145113
loss: 1.0079408884048462,grad_norm: 0.9056039043714158, iteration: 145114
loss: 1.0278292894363403,grad_norm: 0.9999992458443105, iteration: 145115
loss: 1.0176860094070435,grad_norm: 0.9999990848147142, iteration: 145116
loss: 1.0519698858261108,grad_norm: 0.9999991720983109, iteration: 145117
loss: 1.0443589687347412,grad_norm: 0.9999991800665626, iteration: 145118
loss: 1.020795226097107,grad_norm: 0.9587731365199693, iteration: 145119
loss: 0.9711686968803406,grad_norm: 0.9999989994236534, iteration: 145120
loss: 1.03135085105896,grad_norm: 0.9999990349464016, iteration: 145121
loss: 0.97896808385849,grad_norm: 0.99999904048979, iteration: 145122
loss: 0.9822165966033936,grad_norm: 0.9485434838049543, iteration: 145123
loss: 1.0353167057037354,grad_norm: 0.9999992450364568, iteration: 145124
loss: 0.9911487102508545,grad_norm: 0.9999991056258173, iteration: 145125
loss: 1.0105785131454468,grad_norm: 0.9999991673824685, iteration: 145126
loss: 1.0566002130508423,grad_norm: 0.999999354743353, iteration: 145127
loss: 1.0167361497879028,grad_norm: 0.9999990128031018, iteration: 145128
loss: 1.0536580085754395,grad_norm: 0.9999995633430406, iteration: 145129
loss: 1.027779221534729,grad_norm: 0.9662730836111836, iteration: 145130
loss: 1.0083668231964111,grad_norm: 0.9420815520816468, iteration: 145131
loss: 0.9812910556793213,grad_norm: 0.9999991923235914, iteration: 145132
loss: 0.9731044173240662,grad_norm: 0.9233557255788734, iteration: 145133
loss: 0.9791799187660217,grad_norm: 0.999999067611101, iteration: 145134
loss: 1.0039889812469482,grad_norm: 0.9999990233535552, iteration: 145135
loss: 1.0145868062973022,grad_norm: 0.9331011732446565, iteration: 145136
loss: 0.9962549209594727,grad_norm: 0.951172128178926, iteration: 145137
loss: 0.9867392778396606,grad_norm: 0.9999990987559915, iteration: 145138
loss: 1.024521827697754,grad_norm: 0.9999992635106918, iteration: 145139
loss: 0.9876106381416321,grad_norm: 0.9999990943709187, iteration: 145140
loss: 1.0472716093063354,grad_norm: 0.9204192830481298, iteration: 145141
loss: 1.0359349250793457,grad_norm: 0.9999991960907032, iteration: 145142
loss: 1.0015628337860107,grad_norm: 0.9999990733208686, iteration: 145143
loss: 1.0312917232513428,grad_norm: 0.9960274890827481, iteration: 145144
loss: 1.0372563600540161,grad_norm: 0.9717605182739778, iteration: 145145
loss: 0.9678083658218384,grad_norm: 0.99999906764388, iteration: 145146
loss: 1.0384337902069092,grad_norm: 0.999999104777311, iteration: 145147
loss: 0.982401967048645,grad_norm: 0.9039180439230733, iteration: 145148
loss: 0.9858560562133789,grad_norm: 0.9999991444702335, iteration: 145149
loss: 1.0185348987579346,grad_norm: 0.8460145377548053, iteration: 145150
loss: 1.037689208984375,grad_norm: 0.9999991108341519, iteration: 145151
loss: 0.9922574162483215,grad_norm: 0.9077227361025212, iteration: 145152
loss: 1.013620138168335,grad_norm: 0.9999990157871879, iteration: 145153
loss: 1.004309892654419,grad_norm: 0.9435304550670957, iteration: 145154
loss: 0.9760230183601379,grad_norm: 0.8726541998202818, iteration: 145155
loss: 1.0170060396194458,grad_norm: 0.8527852339740849, iteration: 145156
loss: 1.0513473749160767,grad_norm: 0.9999992913532477, iteration: 145157
loss: 0.9855673909187317,grad_norm: 0.999999013997117, iteration: 145158
loss: 0.9512901902198792,grad_norm: 0.9999992227936764, iteration: 145159
loss: 1.0179452896118164,grad_norm: 0.9999991024819402, iteration: 145160
loss: 1.002573013305664,grad_norm: 0.8518865466473301, iteration: 145161
loss: 1.0165859460830688,grad_norm: 0.9279762772648409, iteration: 145162
loss: 0.9934380054473877,grad_norm: 0.9618908692807733, iteration: 145163
loss: 1.032450795173645,grad_norm: 0.9999990018408738, iteration: 145164
loss: 1.0301470756530762,grad_norm: 0.9033373418806049, iteration: 145165
loss: 1.0453680753707886,grad_norm: 0.9999999001204528, iteration: 145166
loss: 1.0177258253097534,grad_norm: 0.9999991119024427, iteration: 145167
loss: 1.0007916688919067,grad_norm: 0.8639801671411873, iteration: 145168
loss: 1.0740790367126465,grad_norm: 0.9999992231723575, iteration: 145169
loss: 0.9895570874214172,grad_norm: 0.9999990606715777, iteration: 145170
loss: 0.9578621983528137,grad_norm: 0.9999991195269076, iteration: 145171
loss: 1.1369097232818604,grad_norm: 0.9999991971478287, iteration: 145172
loss: 0.9894530773162842,grad_norm: 0.8812715519511647, iteration: 145173
loss: 1.0013759136199951,grad_norm: 0.9999991013968013, iteration: 145174
loss: 0.9938651323318481,grad_norm: 0.9690914308505145, iteration: 145175
loss: 0.985495924949646,grad_norm: 0.9999993150984509, iteration: 145176
loss: 1.0036473274230957,grad_norm: 0.9194791220049201, iteration: 145177
loss: 0.9900223016738892,grad_norm: 0.9999993183042162, iteration: 145178
loss: 0.9960934519767761,grad_norm: 0.9999990864276027, iteration: 145179
loss: 0.9976135492324829,grad_norm: 0.9999991555850956, iteration: 145180
loss: 1.0044817924499512,grad_norm: 0.8096766391180286, iteration: 145181
loss: 1.011590838432312,grad_norm: 0.9999992093560816, iteration: 145182
loss: 0.9788973927497864,grad_norm: 0.9613072153615043, iteration: 145183
loss: 0.9613200426101685,grad_norm: 0.9999991990422405, iteration: 145184
loss: 1.0234761238098145,grad_norm: 0.9999992340307625, iteration: 145185
loss: 0.9789435267448425,grad_norm: 0.9999991392395435, iteration: 145186
loss: 0.9982438087463379,grad_norm: 0.9917581185681099, iteration: 145187
loss: 1.0236809253692627,grad_norm: 0.9999998696842445, iteration: 145188
loss: 1.0079983472824097,grad_norm: 0.8697517199494341, iteration: 145189
loss: 1.005671501159668,grad_norm: 0.9809298394001968, iteration: 145190
loss: 1.0094183683395386,grad_norm: 0.9999989757228226, iteration: 145191
loss: 0.9780125617980957,grad_norm: 0.850291849420964, iteration: 145192
loss: 1.0052365064620972,grad_norm: 0.9999990211944318, iteration: 145193
loss: 1.0648523569107056,grad_norm: 0.9999997455610098, iteration: 145194
loss: 0.9916112422943115,grad_norm: 0.9791101103178583, iteration: 145195
loss: 1.0478712320327759,grad_norm: 0.999999156898018, iteration: 145196
loss: 1.007168173789978,grad_norm: 0.9653902310906366, iteration: 145197
loss: 0.9902138113975525,grad_norm: 0.9592517027981055, iteration: 145198
loss: 1.0141360759735107,grad_norm: 0.8902718301113147, iteration: 145199
loss: 1.0034863948822021,grad_norm: 0.9999992321205281, iteration: 145200
loss: 1.0151516199111938,grad_norm: 0.9999990526216385, iteration: 145201
loss: 1.0094881057739258,grad_norm: 0.9627129606405729, iteration: 145202
loss: 1.0027508735656738,grad_norm: 0.9999990115088879, iteration: 145203
loss: 0.9930346012115479,grad_norm: 0.9282529401930787, iteration: 145204
loss: 0.9969092607498169,grad_norm: 0.9999996648774476, iteration: 145205
loss: 0.9895318746566772,grad_norm: 0.99999887849226, iteration: 145206
loss: 1.0000606775283813,grad_norm: 0.8806704763356933, iteration: 145207
loss: 1.075028896331787,grad_norm: 0.9715104775061267, iteration: 145208
loss: 0.9929957389831543,grad_norm: 0.9999991500893853, iteration: 145209
loss: 1.0216398239135742,grad_norm: 0.9877853493424054, iteration: 145210
loss: 1.0223665237426758,grad_norm: 0.9999990935389453, iteration: 145211
loss: 1.0168862342834473,grad_norm: 0.9999990957459339, iteration: 145212
loss: 1.0405670404434204,grad_norm: 0.9999992575769443, iteration: 145213
loss: 1.0067827701568604,grad_norm: 0.9999998127718259, iteration: 145214
loss: 0.9504789710044861,grad_norm: 0.9777874831795978, iteration: 145215
loss: 0.9919485449790955,grad_norm: 0.9999992087288309, iteration: 145216
loss: 1.0230988264083862,grad_norm: 0.9883445941954507, iteration: 145217
loss: 0.9992305040359497,grad_norm: 0.9712309598785438, iteration: 145218
loss: 0.963093101978302,grad_norm: 0.9999991558908327, iteration: 145219
loss: 1.0508137941360474,grad_norm: 0.9999992512452195, iteration: 145220
loss: 0.9993041157722473,grad_norm: 0.9493983759943367, iteration: 145221
loss: 1.0698940753936768,grad_norm: 0.9999995116760118, iteration: 145222
loss: 1.012790322303772,grad_norm: 0.98057258328153, iteration: 145223
loss: 0.9854625463485718,grad_norm: 0.8357520529919472, iteration: 145224
loss: 0.9779999852180481,grad_norm: 0.8944880495275752, iteration: 145225
loss: 1.0357288122177124,grad_norm: 0.9403672452691529, iteration: 145226
loss: 0.9997809529304504,grad_norm: 0.9999991006748374, iteration: 145227
loss: 0.9775300621986389,grad_norm: 0.848222941701917, iteration: 145228
loss: 1.0022753477096558,grad_norm: 0.9999991869972384, iteration: 145229
loss: 0.9797710180282593,grad_norm: 0.9917651438268019, iteration: 145230
loss: 1.0248394012451172,grad_norm: 0.9999992409757397, iteration: 145231
loss: 1.0358942747116089,grad_norm: 0.9999992440730959, iteration: 145232
loss: 0.9945544600486755,grad_norm: 0.9999992715273751, iteration: 145233
loss: 0.9746909141540527,grad_norm: 0.9874944068598964, iteration: 145234
loss: 1.0227934122085571,grad_norm: 0.9656712041086482, iteration: 145235
loss: 1.0027496814727783,grad_norm: 0.9999990916132088, iteration: 145236
loss: 1.1049386262893677,grad_norm: 0.999999729689577, iteration: 145237
loss: 0.9819120764732361,grad_norm: 0.982558171276905, iteration: 145238
loss: 1.0302635431289673,grad_norm: 0.9543951192869692, iteration: 145239
loss: 1.0076591968536377,grad_norm: 0.9999990722703379, iteration: 145240
loss: 0.9659035801887512,grad_norm: 0.9815683179606317, iteration: 145241
loss: 1.0384267568588257,grad_norm: 0.9999992941479552, iteration: 145242
loss: 1.0023696422576904,grad_norm: 0.9999992918951919, iteration: 145243
loss: 1.0608478784561157,grad_norm: 0.999999359547068, iteration: 145244
loss: 0.9908992052078247,grad_norm: 0.999999122470826, iteration: 145245
loss: 0.9959547519683838,grad_norm: 0.9999991836455526, iteration: 145246
loss: 1.008926510810852,grad_norm: 0.9999995915767523, iteration: 145247
loss: 1.0075076818466187,grad_norm: 0.9999991896065175, iteration: 145248
loss: 0.9937111139297485,grad_norm: 0.9999990929458423, iteration: 145249
loss: 0.9613344073295593,grad_norm: 0.9971140910322343, iteration: 145250
loss: 1.0238323211669922,grad_norm: 0.9727975456462093, iteration: 145251
loss: 0.9903689026832581,grad_norm: 0.9380588716257567, iteration: 145252
loss: 0.9607714414596558,grad_norm: 0.9130780795483154, iteration: 145253
loss: 1.0293771028518677,grad_norm: 0.999999268871472, iteration: 145254
loss: 0.9743123650550842,grad_norm: 0.999999388987348, iteration: 145255
loss: 1.0050406455993652,grad_norm: 0.9999991412487652, iteration: 145256
loss: 1.0028246641159058,grad_norm: 0.9999991325294605, iteration: 145257
loss: 0.9808192849159241,grad_norm: 0.9999990305328772, iteration: 145258
loss: 0.9931156039237976,grad_norm: 0.9999990440016185, iteration: 145259
loss: 0.9920943379402161,grad_norm: 0.9899331528399733, iteration: 145260
loss: 0.999853789806366,grad_norm: 0.9062963230274828, iteration: 145261
loss: 1.0048863887786865,grad_norm: 0.9999992087654277, iteration: 145262
loss: 1.0043102502822876,grad_norm: 0.8135845043619717, iteration: 145263
loss: 1.04757821559906,grad_norm: 0.9999998811568988, iteration: 145264
loss: 0.9664739370346069,grad_norm: 0.9999992826559992, iteration: 145265
loss: 0.9909462332725525,grad_norm: 0.9999990536021598, iteration: 145266
loss: 0.9921948313713074,grad_norm: 0.9710971503407645, iteration: 145267
loss: 1.0295791625976562,grad_norm: 0.9951571275227663, iteration: 145268
loss: 0.9850977659225464,grad_norm: 0.9999989685638829, iteration: 145269
loss: 1.0006279945373535,grad_norm: 0.999999245241534, iteration: 145270
loss: 1.0025690793991089,grad_norm: 1.000000034024137, iteration: 145271
loss: 1.0306440591812134,grad_norm: 0.9999996672408221, iteration: 145272
loss: 0.991291880607605,grad_norm: 0.8512744581121902, iteration: 145273
loss: 0.9964380264282227,grad_norm: 0.9264455372262526, iteration: 145274
loss: 1.0234938859939575,grad_norm: 0.9999991357049617, iteration: 145275
loss: 1.011905550956726,grad_norm: 0.909243224636439, iteration: 145276
loss: 1.0086323022842407,grad_norm: 0.9541082015249756, iteration: 145277
loss: 1.00458824634552,grad_norm: 0.959833124924173, iteration: 145278
loss: 0.9736658334732056,grad_norm: 0.9999990919848664, iteration: 145279
loss: 1.0225393772125244,grad_norm: 0.9999993087295721, iteration: 145280
loss: 1.0066792964935303,grad_norm: 0.8626001440585342, iteration: 145281
loss: 0.9853622913360596,grad_norm: 0.9844462802793875, iteration: 145282
loss: 0.9884470105171204,grad_norm: 0.9999992075675638, iteration: 145283
loss: 0.9740620851516724,grad_norm: 0.7895419944169803, iteration: 145284
loss: 0.9669641852378845,grad_norm: 0.9323711147713173, iteration: 145285
loss: 1.0061159133911133,grad_norm: 0.9365886696034755, iteration: 145286
loss: 1.073886513710022,grad_norm: 0.9999999489714881, iteration: 145287
loss: 0.9940261244773865,grad_norm: 0.9999990176204918, iteration: 145288
loss: 1.0200735330581665,grad_norm: 0.9999992125952323, iteration: 145289
loss: 1.026595950126648,grad_norm: 0.9506233586797785, iteration: 145290
loss: 1.0315393209457397,grad_norm: 0.9498934547580488, iteration: 145291
loss: 0.995394229888916,grad_norm: 0.9999991274701457, iteration: 145292
loss: 1.0128402709960938,grad_norm: 0.9999991121942291, iteration: 145293
loss: 0.9634718894958496,grad_norm: 0.9999991264834233, iteration: 145294
loss: 1.0058672428131104,grad_norm: 0.9718398980868115, iteration: 145295
loss: 1.0696978569030762,grad_norm: 0.9999996106209403, iteration: 145296
loss: 0.9903764128684998,grad_norm: 0.9717988999281768, iteration: 145297
loss: 0.9791584610939026,grad_norm: 0.8236923497686256, iteration: 145298
loss: 1.0811289548873901,grad_norm: 0.8942103376411948, iteration: 145299
loss: 0.9855995774269104,grad_norm: 0.9999990569649605, iteration: 145300
loss: 1.0042518377304077,grad_norm: 0.9999992403787122, iteration: 145301
loss: 0.9995637536048889,grad_norm: 0.9999992549197835, iteration: 145302
loss: 1.0488015413284302,grad_norm: 0.9999993048129353, iteration: 145303
loss: 1.0441138744354248,grad_norm: 0.9999991672199927, iteration: 145304
loss: 1.0045971870422363,grad_norm: 0.9999991087838015, iteration: 145305
loss: 1.03030264377594,grad_norm: 0.9999990701918554, iteration: 145306
loss: 1.0101711750030518,grad_norm: 0.9999991427991105, iteration: 145307
loss: 0.9791134595870972,grad_norm: 0.9999990852527791, iteration: 145308
loss: 1.0335839986801147,grad_norm: 0.9999992064906309, iteration: 145309
loss: 0.9938897490501404,grad_norm: 0.9999990727996207, iteration: 145310
loss: 0.9728638529777527,grad_norm: 0.9709774270241573, iteration: 145311
loss: 1.0048483610153198,grad_norm: 0.9999989973561155, iteration: 145312
loss: 1.0157772302627563,grad_norm: 0.9533970817028243, iteration: 145313
loss: 0.9873515963554382,grad_norm: 0.9999989930612931, iteration: 145314
loss: 1.0116353034973145,grad_norm: 0.9999991196093867, iteration: 145315
loss: 0.9674264788627625,grad_norm: 0.950693960623676, iteration: 145316
loss: 0.9894742965698242,grad_norm: 0.9999989855983721, iteration: 145317
loss: 1.013867974281311,grad_norm: 0.9695630955092235, iteration: 145318
loss: 1.0056363344192505,grad_norm: 0.9999992213366964, iteration: 145319
loss: 1.0059458017349243,grad_norm: 0.9835215655510671, iteration: 145320
loss: 1.0069875717163086,grad_norm: 0.9999990572170355, iteration: 145321
loss: 1.0103193521499634,grad_norm: 0.9999991511778237, iteration: 145322
loss: 0.9839828014373779,grad_norm: 0.8712657437778855, iteration: 145323
loss: 0.991666316986084,grad_norm: 0.9999991970474997, iteration: 145324
loss: 1.0149720907211304,grad_norm: 0.9999991434953719, iteration: 145325
loss: 1.0355525016784668,grad_norm: 0.9999990950865053, iteration: 145326
loss: 0.9601810574531555,grad_norm: 0.9177728431684475, iteration: 145327
loss: 0.9974948763847351,grad_norm: 0.9999990075070173, iteration: 145328
loss: 1.0223780870437622,grad_norm: 0.9999990862988072, iteration: 145329
loss: 0.9770122766494751,grad_norm: 0.9999991150202807, iteration: 145330
loss: 1.0383790731430054,grad_norm: 0.9999992832067742, iteration: 145331
loss: 1.0382080078125,grad_norm: 0.9999992565947281, iteration: 145332
loss: 0.9979985356330872,grad_norm: 0.999999009866542, iteration: 145333
loss: 1.0567171573638916,grad_norm: 0.9999995361697616, iteration: 145334
loss: 0.9923721551895142,grad_norm: 0.9999996681469003, iteration: 145335
loss: 1.003788709640503,grad_norm: 0.999999233409891, iteration: 145336
loss: 1.01129949092865,grad_norm: 0.9262046547129943, iteration: 145337
loss: 0.9930489659309387,grad_norm: 0.9999993576381729, iteration: 145338
loss: 0.9875555634498596,grad_norm: 0.9999989008981957, iteration: 145339
loss: 1.0108649730682373,grad_norm: 0.9999991531452256, iteration: 145340
loss: 0.9956185221672058,grad_norm: 0.9999991027429178, iteration: 145341
loss: 1.0384889841079712,grad_norm: 0.9999992586803874, iteration: 145342
loss: 0.9958555102348328,grad_norm: 0.9999991570059223, iteration: 145343
loss: 1.0117316246032715,grad_norm: 0.9919280858069075, iteration: 145344
loss: 0.9959887862205505,grad_norm: 0.9789969159181412, iteration: 145345
loss: 0.9759475588798523,grad_norm: 0.9999991819686281, iteration: 145346
loss: 0.9843341112136841,grad_norm: 0.8779476907535572, iteration: 145347
loss: 0.9765220284461975,grad_norm: 0.9999992918798976, iteration: 145348
loss: 0.9509373307228088,grad_norm: 0.9999991142417893, iteration: 145349
loss: 1.079002022743225,grad_norm: 0.9999993983382562, iteration: 145350
loss: 1.0082976818084717,grad_norm: 0.9999990093836892, iteration: 145351
loss: 1.0134928226470947,grad_norm: 0.8336487240075664, iteration: 145352
loss: 0.9928776621818542,grad_norm: 0.9999989090907452, iteration: 145353
loss: 1.0070915222167969,grad_norm: 0.9999992616092895, iteration: 145354
loss: 0.9618685245513916,grad_norm: 0.9999994631335003, iteration: 145355
loss: 1.0068727731704712,grad_norm: 0.9134289082109698, iteration: 145356
loss: 0.9824416637420654,grad_norm: 0.9999990404710246, iteration: 145357
loss: 1.088516354560852,grad_norm: 0.9999992883139456, iteration: 145358
loss: 0.9792273640632629,grad_norm: 0.9988144849493148, iteration: 145359
loss: 0.9829844832420349,grad_norm: 0.9570442561364414, iteration: 145360
loss: 0.9979259371757507,grad_norm: 0.9999991404135093, iteration: 145361
loss: 0.9871673583984375,grad_norm: 0.9999991984647776, iteration: 145362
loss: 1.0288445949554443,grad_norm: 0.9999992336693427, iteration: 145363
loss: 0.9699009656906128,grad_norm: 0.9999997347916812, iteration: 145364
loss: 1.0017390251159668,grad_norm: 0.9602824743423035, iteration: 145365
loss: 0.9970753192901611,grad_norm: 0.9999991750987105, iteration: 145366
loss: 0.9924331903457642,grad_norm: 0.9999991732532323, iteration: 145367
loss: 1.0126032829284668,grad_norm: 0.9999996147026454, iteration: 145368
loss: 0.9847884774208069,grad_norm: 0.8019207743496055, iteration: 145369
loss: 0.9722705483436584,grad_norm: 0.9999992763650664, iteration: 145370
loss: 0.9464339017868042,grad_norm: 0.9911598546470419, iteration: 145371
loss: 0.9711791276931763,grad_norm: 0.9584804843566712, iteration: 145372
loss: 0.993719220161438,grad_norm: 0.8737096646432629, iteration: 145373
loss: 1.0335371494293213,grad_norm: 0.9999992545390178, iteration: 145374
loss: 0.9882153272628784,grad_norm: 0.958935498045018, iteration: 145375
loss: 0.9799251556396484,grad_norm: 0.8917130313366036, iteration: 145376
loss: 0.9991583228111267,grad_norm: 0.9899168660031848, iteration: 145377
loss: 1.006914496421814,grad_norm: 0.9999993062718525, iteration: 145378
loss: 1.0002100467681885,grad_norm: 0.9999990387708148, iteration: 145379
loss: 0.9931493401527405,grad_norm: 0.9999992026503115, iteration: 145380
loss: 1.0047763586044312,grad_norm: 0.999999210881219, iteration: 145381
loss: 0.9743078351020813,grad_norm: 0.9999991299065001, iteration: 145382
loss: 1.0160919427871704,grad_norm: 0.9999990480301207, iteration: 145383
loss: 1.015136957168579,grad_norm: 0.864288356925681, iteration: 145384
loss: 0.9623532891273499,grad_norm: 0.9999989414002967, iteration: 145385
loss: 1.0010806322097778,grad_norm: 0.999999448987791, iteration: 145386
loss: 0.9665444493293762,grad_norm: 0.9174114577857508, iteration: 145387
loss: 1.0160061120986938,grad_norm: 0.9999991092334828, iteration: 145388
loss: 1.0051651000976562,grad_norm: 0.9999990439208178, iteration: 145389
loss: 0.9798116087913513,grad_norm: 0.9601921598728594, iteration: 145390
loss: 0.9596194624900818,grad_norm: 0.9848095347761805, iteration: 145391
loss: 1.0038745403289795,grad_norm: 0.8371637985705753, iteration: 145392
loss: 1.020372748374939,grad_norm: 0.9999992106156598, iteration: 145393
loss: 1.041292667388916,grad_norm: 0.957294660220789, iteration: 145394
loss: 1.0228582620620728,grad_norm: 0.9999991369790249, iteration: 145395
loss: 0.9491695165634155,grad_norm: 0.9999990348284903, iteration: 145396
loss: 0.991832971572876,grad_norm: 0.9999991700219044, iteration: 145397
loss: 1.0159785747528076,grad_norm: 0.999998977438364, iteration: 145398
loss: 0.9843420386314392,grad_norm: 0.984502628713804, iteration: 145399
loss: 1.0112711191177368,grad_norm: 0.9999990868784473, iteration: 145400
loss: 1.0269646644592285,grad_norm: 0.9999993522509408, iteration: 145401
loss: 1.0303019285202026,grad_norm: 0.999998938590678, iteration: 145402
loss: 1.002374529838562,grad_norm: 0.9999990668241945, iteration: 145403
loss: 0.9970976710319519,grad_norm: 0.9013823233209365, iteration: 145404
loss: 1.0170083045959473,grad_norm: 0.9999991470741041, iteration: 145405
loss: 0.9635759592056274,grad_norm: 0.8948207854603962, iteration: 145406
loss: 1.010495901107788,grad_norm: 0.9503244091400728, iteration: 145407
loss: 0.9716335535049438,grad_norm: 0.8903539849461494, iteration: 145408
loss: 0.9989450573921204,grad_norm: 0.9999991300434384, iteration: 145409
loss: 1.0128344297409058,grad_norm: 0.9509926001434224, iteration: 145410
loss: 1.1664615869522095,grad_norm: 0.9999993532225574, iteration: 145411
loss: 1.056634783744812,grad_norm: 0.9999997458648655, iteration: 145412
loss: 1.001973032951355,grad_norm: 0.9999993126856381, iteration: 145413
loss: 0.9820863008499146,grad_norm: 0.9408358545953684, iteration: 145414
loss: 1.0874165296554565,grad_norm: 0.9999993709729856, iteration: 145415
loss: 1.0045498609542847,grad_norm: 0.9999992707819795, iteration: 145416
loss: 0.9716213345527649,grad_norm: 0.8737856545133174, iteration: 145417
loss: 0.9868536591529846,grad_norm: 0.8560346188320822, iteration: 145418
loss: 0.9610348343849182,grad_norm: 0.9101969023439246, iteration: 145419
loss: 0.9693881273269653,grad_norm: 0.9999992324691733, iteration: 145420
loss: 1.0203382968902588,grad_norm: 0.9999990567486086, iteration: 145421
loss: 0.975285530090332,grad_norm: 0.9999991443858256, iteration: 145422
loss: 0.9796500205993652,grad_norm: 0.9546928023280027, iteration: 145423
loss: 1.0133332014083862,grad_norm: 0.9999991313551476, iteration: 145424
loss: 0.9898266792297363,grad_norm: 0.9999995164898271, iteration: 145425
loss: 1.0207022428512573,grad_norm: 0.9999990415917706, iteration: 145426
loss: 0.9972319602966309,grad_norm: 0.9999991196613205, iteration: 145427
loss: 1.0206100940704346,grad_norm: 0.9999993927190909, iteration: 145428
loss: 1.0020467042922974,grad_norm: 0.9002677864348531, iteration: 145429
loss: 0.9913082122802734,grad_norm: 0.9999991856456734, iteration: 145430
loss: 1.0311137437820435,grad_norm: 0.9419440678208468, iteration: 145431
loss: 0.9702150821685791,grad_norm: 0.966212610482514, iteration: 145432
loss: 0.9748386740684509,grad_norm: 0.9233663706635037, iteration: 145433
loss: 1.0021858215332031,grad_norm: 0.9999990056569479, iteration: 145434
loss: 1.0247749090194702,grad_norm: 0.9545119777036668, iteration: 145435
loss: 0.999413788318634,grad_norm: 0.9999997786325426, iteration: 145436
loss: 0.9913370609283447,grad_norm: 0.9641093490283024, iteration: 145437
loss: 0.9925724267959595,grad_norm: 0.999999289825677, iteration: 145438
loss: 0.9853349924087524,grad_norm: 0.9999990356527296, iteration: 145439
loss: 0.9538870453834534,grad_norm: 0.8782168880586219, iteration: 145440
loss: 0.9988908767700195,grad_norm: 0.9999991325691641, iteration: 145441
loss: 0.9746095538139343,grad_norm: 0.9999991401327054, iteration: 145442
loss: 0.9623875617980957,grad_norm: 0.8516795634032917, iteration: 145443
loss: 1.0216765403747559,grad_norm: 0.9999991195786546, iteration: 145444
loss: 1.0116254091262817,grad_norm: 0.9999996141093495, iteration: 145445
loss: 0.9860067367553711,grad_norm: 0.9999997804131615, iteration: 145446
loss: 1.0126216411590576,grad_norm: 0.999998917684949, iteration: 145447
loss: 0.990126371383667,grad_norm: 0.9999992830958997, iteration: 145448
loss: 0.9903964996337891,grad_norm: 0.9999992748874769, iteration: 145449
loss: 1.0039726495742798,grad_norm: 0.9999992222599258, iteration: 145450
loss: 1.0158729553222656,grad_norm: 0.9999991076503287, iteration: 145451
loss: 1.017640471458435,grad_norm: 0.9999990938467679, iteration: 145452
loss: 1.0195807218551636,grad_norm: 0.9999990256307835, iteration: 145453
loss: 1.0434374809265137,grad_norm: 0.9999990573642568, iteration: 145454
loss: 1.0583287477493286,grad_norm: 0.9999997486704544, iteration: 145455
loss: 0.9779003262519836,grad_norm: 0.9999989225494729, iteration: 145456
loss: 0.9806790351867676,grad_norm: 0.9633618038269202, iteration: 145457
loss: 1.0288928747177124,grad_norm: 0.9999991235356598, iteration: 145458
loss: 1.0088067054748535,grad_norm: 0.9999990274436944, iteration: 145459
loss: 1.0071322917938232,grad_norm: 0.9129379557970483, iteration: 145460
loss: 1.0004997253417969,grad_norm: 0.9999989594022259, iteration: 145461
loss: 1.0026966333389282,grad_norm: 0.9979003952954288, iteration: 145462
loss: 1.0033037662506104,grad_norm: 0.9999992164391182, iteration: 145463
loss: 0.9627044796943665,grad_norm: 0.9471877845090025, iteration: 145464
loss: 0.9984800815582275,grad_norm: 0.9999989482460879, iteration: 145465
loss: 0.9853257536888123,grad_norm: 0.9999992212806494, iteration: 145466
loss: 0.9929483532905579,grad_norm: 0.9999989835447812, iteration: 145467
loss: 1.0087755918502808,grad_norm: 0.9999990858326783, iteration: 145468
loss: 0.9903628826141357,grad_norm: 0.9999990482144089, iteration: 145469
loss: 1.0257874727249146,grad_norm: 0.9999992673858431, iteration: 145470
loss: 1.0538208484649658,grad_norm: 0.9325377507541495, iteration: 145471
loss: 0.9701506495475769,grad_norm: 0.9999998038126576, iteration: 145472
loss: 0.9972121119499207,grad_norm: 0.7939022648525276, iteration: 145473
loss: 1.012973666191101,grad_norm: 0.9999990980367691, iteration: 145474
loss: 0.9826365113258362,grad_norm: 0.963760742442999, iteration: 145475
loss: 1.019351840019226,grad_norm: 0.9999991482131566, iteration: 145476
loss: 1.0168243646621704,grad_norm: 0.9999990789107561, iteration: 145477
loss: 1.0129492282867432,grad_norm: 0.9999990935547117, iteration: 145478
loss: 1.0321170091629028,grad_norm: 0.8904318206184022, iteration: 145479
loss: 0.9865797758102417,grad_norm: 0.9007640088211304, iteration: 145480
loss: 1.0119481086730957,grad_norm: 0.8751992995515792, iteration: 145481
loss: 0.9963535070419312,grad_norm: 0.9343302495654789, iteration: 145482
loss: 1.01425302028656,grad_norm: 0.9999990642365609, iteration: 145483
loss: 0.99089115858078,grad_norm: 0.9999992411776949, iteration: 145484
loss: 0.9529911875724792,grad_norm: 0.9999993799026893, iteration: 145485
loss: 1.0102030038833618,grad_norm: 0.9012718284708982, iteration: 145486
loss: 0.9741446375846863,grad_norm: 0.9999991510775577, iteration: 145487
loss: 1.0566033124923706,grad_norm: 0.9999992911277212, iteration: 145488
loss: 0.9932801127433777,grad_norm: 0.9999993584200771, iteration: 145489
loss: 0.9463276863098145,grad_norm: 0.9999991774567306, iteration: 145490
loss: 1.0045498609542847,grad_norm: 0.9999994259403538, iteration: 145491
loss: 1.015296220779419,grad_norm: 0.9999992105089429, iteration: 145492
loss: 1.0076255798339844,grad_norm: 0.9999990419745806, iteration: 145493
loss: 0.9976261258125305,grad_norm: 0.9999990896448899, iteration: 145494
loss: 0.9711161255836487,grad_norm: 0.9964748212949665, iteration: 145495
loss: 1.0287108421325684,grad_norm: 0.9999990620352334, iteration: 145496
loss: 1.2375901937484741,grad_norm: 0.9999991850160697, iteration: 145497
loss: 0.9958214163780212,grad_norm: 0.9999991206526372, iteration: 145498
loss: 1.0320638418197632,grad_norm: 0.9999992035734285, iteration: 145499
loss: 1.0154696702957153,grad_norm: 0.9999991989032919, iteration: 145500
loss: 1.0071210861206055,grad_norm: 0.9755578821389972, iteration: 145501
loss: 1.0250905752182007,grad_norm: 0.9901984206269109, iteration: 145502
loss: 1.0287904739379883,grad_norm: 0.9172840500834253, iteration: 145503
loss: 1.0024911165237427,grad_norm: 0.9210382131399774, iteration: 145504
loss: 1.0132607221603394,grad_norm: 0.8303723454771704, iteration: 145505
loss: 0.994258463382721,grad_norm: 0.9999990503263652, iteration: 145506
loss: 1.0088882446289062,grad_norm: 0.999999117837212, iteration: 145507
loss: 1.0215139389038086,grad_norm: 0.9585557849140799, iteration: 145508
loss: 0.9853116869926453,grad_norm: 0.9334087997290254, iteration: 145509
loss: 0.989604651927948,grad_norm: 0.8438901904892923, iteration: 145510
loss: 1.0159634351730347,grad_norm: 0.9625102853582173, iteration: 145511
loss: 1.0410881042480469,grad_norm: 0.9999993458652898, iteration: 145512
loss: 1.0978301763534546,grad_norm: 0.9999995685396952, iteration: 145513
loss: 1.0349243879318237,grad_norm: 0.9573775780634409, iteration: 145514
loss: 1.0086901187896729,grad_norm: 0.999999185796986, iteration: 145515
loss: 1.003873348236084,grad_norm: 0.9999991501569012, iteration: 145516
loss: 1.0130270719528198,grad_norm: 0.9999992112752718, iteration: 145517
loss: 0.9765619039535522,grad_norm: 0.9711256756149738, iteration: 145518
loss: 1.011540412902832,grad_norm: 0.9999993104754636, iteration: 145519
loss: 1.1758289337158203,grad_norm: 0.9999997709764892, iteration: 145520
loss: 0.9842968583106995,grad_norm: 0.999999240556111, iteration: 145521
loss: 1.0409232378005981,grad_norm: 0.9999997408826087, iteration: 145522
loss: 1.0144039392471313,grad_norm: 0.952954960862566, iteration: 145523
loss: 1.002732276916504,grad_norm: 0.9999995269882642, iteration: 145524
loss: 0.9945995807647705,grad_norm: 0.9869576642550933, iteration: 145525
loss: 0.9964607357978821,grad_norm: 0.9999990633729448, iteration: 145526
loss: 1.0317291021347046,grad_norm: 0.8659581856460037, iteration: 145527
loss: 0.9372833371162415,grad_norm: 0.999999009942325, iteration: 145528
loss: 1.0113110542297363,grad_norm: 0.9999993027569112, iteration: 145529
loss: 1.0407463312149048,grad_norm: 0.9586338116363314, iteration: 145530
loss: 1.0041745901107788,grad_norm: 0.9999990358494448, iteration: 145531
loss: 1.0033442974090576,grad_norm: 0.999998973522895, iteration: 145532
loss: 0.9635639786720276,grad_norm: 0.9604042220503803, iteration: 145533
loss: 1.0141100883483887,grad_norm: 0.9999991447674926, iteration: 145534
loss: 1.039559245109558,grad_norm: 0.9999993220985268, iteration: 145535
loss: 1.0113625526428223,grad_norm: 0.9580494802779138, iteration: 145536
loss: 0.9885804057121277,grad_norm: 0.8634164958367885, iteration: 145537
loss: 0.9833129644393921,grad_norm: 0.8672882581424087, iteration: 145538
loss: 0.9988154172897339,grad_norm: 0.9939009549212463, iteration: 145539
loss: 0.9753221273422241,grad_norm: 0.9999990966670707, iteration: 145540
loss: 0.9931380748748779,grad_norm: 0.9325495624329235, iteration: 145541
loss: 0.9897339344024658,grad_norm: 0.9999997349458587, iteration: 145542
loss: 1.015981674194336,grad_norm: 0.9999990886339721, iteration: 145543
loss: 1.0259582996368408,grad_norm: 0.9186917786088938, iteration: 145544
loss: 0.9785366654396057,grad_norm: 0.9999993662029533, iteration: 145545
loss: 1.020151972770691,grad_norm: 0.974003036780382, iteration: 145546
loss: 1.0046169757843018,grad_norm: 0.8509627304898298, iteration: 145547
loss: 0.9850877523422241,grad_norm: 0.8877867015624926, iteration: 145548
loss: 1.019901156425476,grad_norm: 0.9999990115187575, iteration: 145549
loss: 1.0079911947250366,grad_norm: 0.8915321568088016, iteration: 145550
loss: 1.0063385963439941,grad_norm: 0.9999990039371243, iteration: 145551
loss: 0.9985466599464417,grad_norm: 0.9428767962796322, iteration: 145552
loss: 0.9803318977355957,grad_norm: 0.9999991364376144, iteration: 145553
loss: 0.9986447095870972,grad_norm: 0.9999992511815432, iteration: 145554
loss: 1.031765103340149,grad_norm: 0.916253413440918, iteration: 145555
loss: 1.0180370807647705,grad_norm: 0.9999989799863744, iteration: 145556
loss: 1.0145474672317505,grad_norm: 0.9999990031495253, iteration: 145557
loss: 1.0469685792922974,grad_norm: 0.998048825147915, iteration: 145558
loss: 1.008467674255371,grad_norm: 0.9999991003414841, iteration: 145559
loss: 1.0411349534988403,grad_norm: 0.9999991483294217, iteration: 145560
loss: 1.013803482055664,grad_norm: 0.9999998130060854, iteration: 145561
loss: 0.9928264617919922,grad_norm: 0.9999991034530965, iteration: 145562
loss: 0.9884563684463501,grad_norm: 0.9999992491363232, iteration: 145563
loss: 1.0088690519332886,grad_norm: 0.8758786453147699, iteration: 145564
loss: 0.9935088157653809,grad_norm: 0.9999996871088688, iteration: 145565
loss: 1.0209546089172363,grad_norm: 0.9999991543772847, iteration: 145566
loss: 1.0099061727523804,grad_norm: 0.999999684489641, iteration: 145567
loss: 0.9936838746070862,grad_norm: 0.923631252269831, iteration: 145568
loss: 0.9909800887107849,grad_norm: 0.9999988890956668, iteration: 145569
loss: 0.970602810382843,grad_norm: 0.9999990067651022, iteration: 145570
loss: 0.9780440926551819,grad_norm: 0.9128228692508008, iteration: 145571
loss: 1.0043684244155884,grad_norm: 0.8374678604128798, iteration: 145572
loss: 1.0138012170791626,grad_norm: 0.9754313320919196, iteration: 145573
loss: 0.9892523884773254,grad_norm: 0.9999991085332331, iteration: 145574
loss: 1.0046942234039307,grad_norm: 0.999999126224003, iteration: 145575
loss: 0.9871849417686462,grad_norm: 0.9999991194398106, iteration: 145576
loss: 0.965288519859314,grad_norm: 0.9248350720001085, iteration: 145577
loss: 0.9789601564407349,grad_norm: 0.9629927028640254, iteration: 145578
loss: 0.9834304451942444,grad_norm: 0.9153823437772936, iteration: 145579
loss: 1.0890198945999146,grad_norm: 0.9999992521062577, iteration: 145580
loss: 0.9650509357452393,grad_norm: 0.9481144483482763, iteration: 145581
loss: 0.9776688814163208,grad_norm: 0.9999989229628707, iteration: 145582
loss: 0.997401237487793,grad_norm: 0.913262626439215, iteration: 145583
loss: 0.9613093733787537,grad_norm: 0.9695276812135467, iteration: 145584
loss: 0.9946781992912292,grad_norm: 0.9999990494679566, iteration: 145585
loss: 0.9582247138023376,grad_norm: 0.981603084042582, iteration: 145586
loss: 0.977588415145874,grad_norm: 0.9278936248139917, iteration: 145587
loss: 1.0162959098815918,grad_norm: 0.842517994778388, iteration: 145588
loss: 0.9872828722000122,grad_norm: 0.9472747782145319, iteration: 145589
loss: 0.9974812269210815,grad_norm: 0.9999990474581668, iteration: 145590
loss: 1.039411187171936,grad_norm: 0.9999990670686951, iteration: 145591
loss: 0.9878710508346558,grad_norm: 0.9999991012609, iteration: 145592
loss: 0.9821488261222839,grad_norm: 0.9317359439764418, iteration: 145593
loss: 0.9937195777893066,grad_norm: 0.8690534338594907, iteration: 145594
loss: 1.0176142454147339,grad_norm: 0.9999997937754365, iteration: 145595
loss: 0.9781188368797302,grad_norm: 0.9195725462280783, iteration: 145596
loss: 1.004330039024353,grad_norm: 0.9999990963250005, iteration: 145597
loss: 0.9717676043510437,grad_norm: 0.8196050785810807, iteration: 145598
loss: 1.0007387399673462,grad_norm: 0.8810331871100772, iteration: 145599
loss: 1.0152661800384521,grad_norm: 0.9021161173926836, iteration: 145600
loss: 0.9774572849273682,grad_norm: 0.9999989697285616, iteration: 145601
loss: 1.0014480352401733,grad_norm: 0.9657396145557007, iteration: 145602
loss: 1.0179578065872192,grad_norm: 0.9999991287137681, iteration: 145603
loss: 1.080446720123291,grad_norm: 0.9999992795712237, iteration: 145604
loss: 1.0186240673065186,grad_norm: 0.9933299672738983, iteration: 145605
loss: 1.0062892436981201,grad_norm: 0.9999987819813871, iteration: 145606
loss: 1.025386095046997,grad_norm: 0.9999990217384342, iteration: 145607
loss: 1.0030392408370972,grad_norm: 0.9999991445681407, iteration: 145608
loss: 1.0160861015319824,grad_norm: 0.9880903088883881, iteration: 145609
loss: 0.9693244695663452,grad_norm: 0.7988637280958454, iteration: 145610
loss: 1.033632755279541,grad_norm: 0.900811193698368, iteration: 145611
loss: 1.0075773000717163,grad_norm: 0.8579446999148451, iteration: 145612
loss: 0.9973738193511963,grad_norm: 0.9999992908152344, iteration: 145613
loss: 0.9928476214408875,grad_norm: 0.9999992936756118, iteration: 145614
loss: 1.0074983835220337,grad_norm: 0.9999990622953113, iteration: 145615
loss: 0.9964581727981567,grad_norm: 0.9999990102317621, iteration: 145616
loss: 0.9862145185470581,grad_norm: 0.874671403184617, iteration: 145617
loss: 0.9691076874732971,grad_norm: 0.9999990305390364, iteration: 145618
loss: 0.9759683012962341,grad_norm: 0.9740769407812757, iteration: 145619
loss: 0.9865068793296814,grad_norm: 0.9999992138039181, iteration: 145620
loss: 0.9995040893554688,grad_norm: 0.9999991687354824, iteration: 145621
loss: 0.9782984256744385,grad_norm: 0.9999989369170346, iteration: 145622
loss: 0.9631028771400452,grad_norm: 0.975961541799418, iteration: 145623
loss: 0.9974326491355896,grad_norm: 0.9593593962599952, iteration: 145624
loss: 0.991016685962677,grad_norm: 0.9828817835098488, iteration: 145625
loss: 1.008817434310913,grad_norm: 0.9999990161417773, iteration: 145626
loss: 1.0159941911697388,grad_norm: 0.904161610174672, iteration: 145627
loss: 0.9995458126068115,grad_norm: 0.9063133945782919, iteration: 145628
loss: 0.9950200319290161,grad_norm: 0.9999991166056224, iteration: 145629
loss: 0.9542351365089417,grad_norm: 0.9999989403771091, iteration: 145630
loss: 1.0199809074401855,grad_norm: 0.8830921581532647, iteration: 145631
loss: 1.0097678899765015,grad_norm: 0.9999991863844364, iteration: 145632
loss: 0.9927470684051514,grad_norm: 0.9716136193360775, iteration: 145633
loss: 1.0252892971038818,grad_norm: 0.9999990062715488, iteration: 145634
loss: 1.0000441074371338,grad_norm: 0.9999991158288202, iteration: 145635
loss: 0.9900756478309631,grad_norm: 0.9999989212027507, iteration: 145636
loss: 1.0186954736709595,grad_norm: 0.8776109996261795, iteration: 145637
loss: 0.9631012082099915,grad_norm: 0.9594070396237474, iteration: 145638
loss: 1.0398081541061401,grad_norm: 0.9999992709148737, iteration: 145639
loss: 1.0183568000793457,grad_norm: 0.9722811133594467, iteration: 145640
loss: 0.9978193640708923,grad_norm: 0.9999991663153274, iteration: 145641
loss: 0.9671339988708496,grad_norm: 0.999999087915102, iteration: 145642
loss: 1.016548991203308,grad_norm: 0.9999991361872058, iteration: 145643
loss: 0.9971315264701843,grad_norm: 0.8847250719685199, iteration: 145644
loss: 0.984343409538269,grad_norm: 0.9999989406197252, iteration: 145645
loss: 0.9924147725105286,grad_norm: 0.9980256799460451, iteration: 145646
loss: 1.0291508436203003,grad_norm: 0.9999992628462301, iteration: 145647
loss: 0.9903952479362488,grad_norm: 0.979575276595315, iteration: 145648
loss: 0.9886980056762695,grad_norm: 0.9058480405221341, iteration: 145649
loss: 1.0222163200378418,grad_norm: 0.9748539025970228, iteration: 145650
loss: 0.966517984867096,grad_norm: 0.8947090504687666, iteration: 145651
loss: 0.9821594953536987,grad_norm: 0.9999993956107225, iteration: 145652
loss: 0.9698190093040466,grad_norm: 0.9999990135108299, iteration: 145653
loss: 0.9695654511451721,grad_norm: 0.9636586758786666, iteration: 145654
loss: 1.020689845085144,grad_norm: 0.9999992522381075, iteration: 145655
loss: 1.0297155380249023,grad_norm: 0.9273018039554919, iteration: 145656
loss: 0.9733841419219971,grad_norm: 0.8599988243614335, iteration: 145657
loss: 0.994009256362915,grad_norm: 0.9999990282348773, iteration: 145658
loss: 0.9786891937255859,grad_norm: 0.9999992438853624, iteration: 145659
loss: 0.9795788526535034,grad_norm: 0.9413020517420662, iteration: 145660
loss: 0.9948421120643616,grad_norm: 0.9999990462918824, iteration: 145661
loss: 0.9752272963523865,grad_norm: 0.9787649448027427, iteration: 145662
loss: 0.9968077540397644,grad_norm: 0.9716778298609409, iteration: 145663
loss: 0.9866117835044861,grad_norm: 0.9999990115140514, iteration: 145664
loss: 0.9675889611244202,grad_norm: 0.9999990940124356, iteration: 145665
loss: 0.9862673878669739,grad_norm: 0.9477186048808263, iteration: 145666
loss: 1.0172621011734009,grad_norm: 0.9939153749704409, iteration: 145667
loss: 1.016619324684143,grad_norm: 0.7709885851583763, iteration: 145668
loss: 1.0093988180160522,grad_norm: 0.9999990111559129, iteration: 145669
loss: 0.9932904243469238,grad_norm: 0.8427006495233336, iteration: 145670
loss: 0.9779539704322815,grad_norm: 0.9355763361588891, iteration: 145671
loss: 1.0230203866958618,grad_norm: 0.9175212335348951, iteration: 145672
loss: 0.9907929301261902,grad_norm: 0.8986831185906049, iteration: 145673
loss: 1.0186501741409302,grad_norm: 0.9999991920390896, iteration: 145674
loss: 0.9850922226905823,grad_norm: 0.8763445476795628, iteration: 145675
loss: 1.0452187061309814,grad_norm: 0.9999990007177545, iteration: 145676
loss: 0.9858185052871704,grad_norm: 0.999999207937683, iteration: 145677
loss: 1.006812572479248,grad_norm: 0.9880789098080773, iteration: 145678
loss: 1.005068302154541,grad_norm: 0.9999990141510081, iteration: 145679
loss: 0.9647378325462341,grad_norm: 0.8731012531403386, iteration: 145680
loss: 0.9818768501281738,grad_norm: 0.9999991276644427, iteration: 145681
loss: 0.9945605397224426,grad_norm: 0.8719137716030309, iteration: 145682
loss: 1.0015852451324463,grad_norm: 0.8144745518378661, iteration: 145683
loss: 1.002227783203125,grad_norm: 0.8279703860855082, iteration: 145684
loss: 0.9861667156219482,grad_norm: 0.9999992217193607, iteration: 145685
loss: 0.939053475856781,grad_norm: 0.9655753772966256, iteration: 145686
loss: 0.975952684879303,grad_norm: 0.9577840723624215, iteration: 145687
loss: 0.9745708107948303,grad_norm: 0.9275774535864774, iteration: 145688
loss: 1.0048236846923828,grad_norm: 0.9766795889509673, iteration: 145689
loss: 0.9945458769798279,grad_norm: 0.9999998730150883, iteration: 145690
loss: 1.0272164344787598,grad_norm: 0.963088433275688, iteration: 145691
loss: 0.9945915937423706,grad_norm: 0.9511445768622362, iteration: 145692
loss: 0.9863174557685852,grad_norm: 0.9999993310531781, iteration: 145693
loss: 1.021597146987915,grad_norm: 0.8986781337393744, iteration: 145694
loss: 0.9642765522003174,grad_norm: 0.9420757469183905, iteration: 145695
loss: 1.041242241859436,grad_norm: 0.9999991325296854, iteration: 145696
loss: 0.9713724255561829,grad_norm: 0.9709536317016171, iteration: 145697
loss: 0.9976082444190979,grad_norm: 0.9999990682677229, iteration: 145698
loss: 0.9954837560653687,grad_norm: 0.9999989982810193, iteration: 145699
loss: 0.9838898181915283,grad_norm: 0.9156493547614492, iteration: 145700
loss: 0.9920614361763,grad_norm: 0.999999094697437, iteration: 145701
loss: 0.9673821330070496,grad_norm: 0.9128251258398358, iteration: 145702
loss: 0.9701851010322571,grad_norm: 0.8891684969652869, iteration: 145703
loss: 0.9473018050193787,grad_norm: 0.9999991237345162, iteration: 145704
loss: 1.0396499633789062,grad_norm: 0.9950151495177557, iteration: 145705
loss: 0.9955905675888062,grad_norm: 0.9103692914662249, iteration: 145706
loss: 1.0321099758148193,grad_norm: 0.9999998002418594, iteration: 145707
loss: 0.9715198874473572,grad_norm: 0.8703298780392983, iteration: 145708
loss: 0.9989693760871887,grad_norm: 0.9999990378239106, iteration: 145709
loss: 1.0237587690353394,grad_norm: 0.9999991723376325, iteration: 145710
loss: 1.0482172966003418,grad_norm: 0.9999990676800021, iteration: 145711
loss: 1.0575255155563354,grad_norm: 0.999999430655747, iteration: 145712
loss: 0.9634145498275757,grad_norm: 0.9999991747098433, iteration: 145713
loss: 1.0084726810455322,grad_norm: 0.9999990264110692, iteration: 145714
loss: 0.9872297048568726,grad_norm: 0.866442590198628, iteration: 145715
loss: 0.9893316626548767,grad_norm: 0.9999990467029343, iteration: 145716
loss: 1.0347564220428467,grad_norm: 0.9999989677010703, iteration: 145717
loss: 1.0027083158493042,grad_norm: 0.9999990851046956, iteration: 145718
loss: 1.0079503059387207,grad_norm: 0.9999994058736031, iteration: 145719
loss: 1.0164320468902588,grad_norm: 0.999999221957405, iteration: 145720
loss: 0.9587863087654114,grad_norm: 0.9997301780751499, iteration: 145721
loss: 0.9855220913887024,grad_norm: 0.9999990482158903, iteration: 145722
loss: 1.0278767347335815,grad_norm: 0.9999991220949255, iteration: 145723
loss: 1.0470386743545532,grad_norm: 0.9999992065014907, iteration: 145724
loss: 0.9611589908599854,grad_norm: 0.9738098692673018, iteration: 145725
loss: 1.0120223760604858,grad_norm: 0.835761504315591, iteration: 145726
loss: 1.0114535093307495,grad_norm: 0.8742703816372351, iteration: 145727
loss: 0.9960535764694214,grad_norm: 0.9146580077775586, iteration: 145728
loss: 0.9988962411880493,grad_norm: 0.9999991213758701, iteration: 145729
loss: 1.0110167264938354,grad_norm: 0.9999992075368661, iteration: 145730
loss: 1.0181174278259277,grad_norm: 0.9759912867297164, iteration: 145731
loss: 1.0087015628814697,grad_norm: 0.8060434331251625, iteration: 145732
loss: 0.9660085439682007,grad_norm: 0.9999991278037785, iteration: 145733
loss: 0.9890695214271545,grad_norm: 0.99989320211311, iteration: 145734
loss: 0.9979928135871887,grad_norm: 0.9999991017895045, iteration: 145735
loss: 1.0131572484970093,grad_norm: 0.95253634113503, iteration: 145736
loss: 1.0131818056106567,grad_norm: 0.9208753478064288, iteration: 145737
loss: 0.9883897304534912,grad_norm: 0.9999991775493444, iteration: 145738
loss: 0.9560900330543518,grad_norm: 0.9999991427491963, iteration: 145739
loss: 0.9798693656921387,grad_norm: 0.9999991336678185, iteration: 145740
loss: 0.9863777756690979,grad_norm: 0.9042756738544476, iteration: 145741
loss: 0.9755178093910217,grad_norm: 0.9999989472507629, iteration: 145742
loss: 1.0249437093734741,grad_norm: 0.9999991298150133, iteration: 145743
loss: 0.9977978467941284,grad_norm: 0.9999991973665902, iteration: 145744
loss: 1.0147678852081299,grad_norm: 0.9874899937929197, iteration: 145745
loss: 0.9976813197135925,grad_norm: 0.9999992739789921, iteration: 145746
loss: 0.9928857088088989,grad_norm: 0.91353904514746, iteration: 145747
loss: 0.9613544940948486,grad_norm: 0.9999989800660489, iteration: 145748
loss: 1.0333009958267212,grad_norm: 0.8215479784277856, iteration: 145749
loss: 0.9888353943824768,grad_norm: 0.9999993954516587, iteration: 145750
loss: 0.9698660373687744,grad_norm: 0.9127334029239351, iteration: 145751
loss: 1.0842329263687134,grad_norm: 0.9999992681699212, iteration: 145752
loss: 1.0603498220443726,grad_norm: 0.9999997758825069, iteration: 145753
loss: 1.012934684753418,grad_norm: 0.9948718100456148, iteration: 145754
loss: 1.0402438640594482,grad_norm: 0.9999990441663461, iteration: 145755
loss: 0.991478681564331,grad_norm: 0.9999992495515806, iteration: 145756
loss: 0.9959610104560852,grad_norm: 0.8877887460737541, iteration: 145757
loss: 1.0029964447021484,grad_norm: 0.9999991210199279, iteration: 145758
loss: 0.9684303998947144,grad_norm: 0.9061154444384404, iteration: 145759
loss: 1.0238782167434692,grad_norm: 0.9977650045456954, iteration: 145760
loss: 1.0346989631652832,grad_norm: 0.999999059355316, iteration: 145761
loss: 1.0016992092132568,grad_norm: 0.9999990775146558, iteration: 145762
loss: 1.0171303749084473,grad_norm: 0.9999992753375929, iteration: 145763
loss: 0.9939690828323364,grad_norm: 0.9999991841653725, iteration: 145764
loss: 0.9930998086929321,grad_norm: 0.8809347046471765, iteration: 145765
loss: 0.9866306185722351,grad_norm: 0.9999991621076119, iteration: 145766
loss: 0.9760589599609375,grad_norm: 0.9999991285368524, iteration: 145767
loss: 0.9787262082099915,grad_norm: 0.8876216735041161, iteration: 145768
loss: 1.0083967447280884,grad_norm: 0.9849111516961673, iteration: 145769
loss: 1.0249483585357666,grad_norm: 0.9999990095502957, iteration: 145770
loss: 1.0079632997512817,grad_norm: 0.9999990337278788, iteration: 145771
loss: 0.9749661684036255,grad_norm: 0.9999991237839313, iteration: 145772
loss: 1.013375997543335,grad_norm: 0.9999990730412729, iteration: 145773
loss: 0.9573865532875061,grad_norm: 0.9999990392484455, iteration: 145774
loss: 0.9813321232795715,grad_norm: 0.9999990534237053, iteration: 145775
loss: 1.011768102645874,grad_norm: 0.9552909397462194, iteration: 145776
loss: 1.0017286539077759,grad_norm: 0.9999990235692564, iteration: 145777
loss: 0.9764136075973511,grad_norm: 0.8090716323334631, iteration: 145778
loss: 1.0151299238204956,grad_norm: 0.9999992000065208, iteration: 145779
loss: 0.9976086020469666,grad_norm: 0.9999989830426741, iteration: 145780
loss: 0.9888786673545837,grad_norm: 0.9999989676267709, iteration: 145781
loss: 1.002229928970337,grad_norm: 0.9999990327010494, iteration: 145782
loss: 0.9944018125534058,grad_norm: 0.8447121941056205, iteration: 145783
loss: 1.001509189605713,grad_norm: 0.7814197748701674, iteration: 145784
loss: 0.9767743945121765,grad_norm: 0.981710478837718, iteration: 145785
loss: 1.0342210531234741,grad_norm: 0.9999990425569292, iteration: 145786
loss: 0.9911205172538757,grad_norm: 0.8693625778114162, iteration: 145787
loss: 0.944553017616272,grad_norm: 0.9999991332876494, iteration: 145788
loss: 0.9974528551101685,grad_norm: 0.999999006992162, iteration: 145789
loss: 1.028340220451355,grad_norm: 0.9999990833683953, iteration: 145790
loss: 1.013639211654663,grad_norm: 0.9358889251499861, iteration: 145791
loss: 1.0077420473098755,grad_norm: 0.9999991468144425, iteration: 145792
loss: 1.0036035776138306,grad_norm: 0.9285021105730361, iteration: 145793
loss: 1.0343010425567627,grad_norm: 0.9999991082031736, iteration: 145794
loss: 0.9687091708183289,grad_norm: 0.9999992317282417, iteration: 145795
loss: 0.994148850440979,grad_norm: 0.9999992547483635, iteration: 145796
loss: 0.9803568720817566,grad_norm: 0.9999991735301623, iteration: 145797
loss: 1.0233999490737915,grad_norm: 0.9999992120087687, iteration: 145798
loss: 1.010972499847412,grad_norm: 0.9776035363604091, iteration: 145799
loss: 1.0206838846206665,grad_norm: 0.9970960242467806, iteration: 145800
loss: 0.9581533074378967,grad_norm: 0.9759829321616944, iteration: 145801
loss: 1.0160081386566162,grad_norm: 0.9999992473386398, iteration: 145802
loss: 1.0092579126358032,grad_norm: 0.9999991541931795, iteration: 145803
loss: 1.0236583948135376,grad_norm: 0.9999992309992525, iteration: 145804
loss: 0.9585684537887573,grad_norm: 0.9999990553910526, iteration: 145805
loss: 0.9841670393943787,grad_norm: 0.9348093613483582, iteration: 145806
loss: 1.0101476907730103,grad_norm: 0.9999990241745257, iteration: 145807
loss: 0.9972424507141113,grad_norm: 0.9211491850264261, iteration: 145808
loss: 0.9610856175422668,grad_norm: 0.9999992592815847, iteration: 145809
loss: 1.0221039056777954,grad_norm: 0.9999991488447793, iteration: 145810
loss: 0.9699149131774902,grad_norm: 0.9999990907477571, iteration: 145811
loss: 0.9740027189254761,grad_norm: 0.9999990978216142, iteration: 145812
loss: 0.9996870160102844,grad_norm: 0.9999990531946628, iteration: 145813
loss: 0.995937705039978,grad_norm: 0.8560958433490149, iteration: 145814
loss: 1.0210773944854736,grad_norm: 0.9999991205876055, iteration: 145815
loss: 1.045150637626648,grad_norm: 0.9999992559371211, iteration: 145816
loss: 0.9965575933456421,grad_norm: 0.9746228623811067, iteration: 145817
loss: 0.9967947602272034,grad_norm: 0.9999990318945666, iteration: 145818
loss: 1.0040847063064575,grad_norm: 0.999999058935631, iteration: 145819
loss: 0.9911424517631531,grad_norm: 0.9337913836536863, iteration: 145820
loss: 1.0016239881515503,grad_norm: 0.9999996208128357, iteration: 145821
loss: 0.948855459690094,grad_norm: 0.9999995104501687, iteration: 145822
loss: 1.001440405845642,grad_norm: 0.9529370204136677, iteration: 145823
loss: 1.0126034021377563,grad_norm: 0.9052516913592883, iteration: 145824
loss: 1.0173050165176392,grad_norm: 0.9521954763582858, iteration: 145825
loss: 0.9842106103897095,grad_norm: 0.8901062469363671, iteration: 145826
loss: 0.9979313015937805,grad_norm: 0.9999992081846477, iteration: 145827
loss: 0.9778671860694885,grad_norm: 0.8650351876322503, iteration: 145828
loss: 1.0324342250823975,grad_norm: 0.9999992878291787, iteration: 145829
loss: 0.9944483041763306,grad_norm: 0.9999993759729329, iteration: 145830
loss: 0.9986048340797424,grad_norm: 0.999999073074799, iteration: 145831
loss: 0.9722049832344055,grad_norm: 0.9999990762933542, iteration: 145832
loss: 0.9657591581344604,grad_norm: 0.9999990671924334, iteration: 145833
loss: 1.0165355205535889,grad_norm: 0.9918057049647669, iteration: 145834
loss: 1.0202767848968506,grad_norm: 0.9999991554461655, iteration: 145835
loss: 0.9921945929527283,grad_norm: 0.9475300919210436, iteration: 145836
loss: 1.022400975227356,grad_norm: 0.999999343908691, iteration: 145837
loss: 1.0145262479782104,grad_norm: 0.9441000000593552, iteration: 145838
loss: 0.9958420991897583,grad_norm: 0.9999992196460022, iteration: 145839
loss: 1.1748037338256836,grad_norm: 0.9999998528665394, iteration: 145840
loss: 0.9799596071243286,grad_norm: 0.9452497652236498, iteration: 145841
loss: 1.0098904371261597,grad_norm: 0.9999988824550022, iteration: 145842
loss: 0.9987145066261292,grad_norm: 0.9999989781650964, iteration: 145843
loss: 1.0299540758132935,grad_norm: 0.9999991711358103, iteration: 145844
loss: 1.0009304285049438,grad_norm: 0.9999991705535954, iteration: 145845
loss: 0.9810013771057129,grad_norm: 0.9592303255357566, iteration: 145846
loss: 1.0426476001739502,grad_norm: 0.9999990732437842, iteration: 145847
loss: 0.9929586052894592,grad_norm: 0.8886751659829144, iteration: 145848
loss: 1.002578616142273,grad_norm: 0.9999992464568845, iteration: 145849
loss: 0.9093851447105408,grad_norm: 0.9999990060722346, iteration: 145850
loss: 1.0361599922180176,grad_norm: 0.999999087796318, iteration: 145851
loss: 1.0134344100952148,grad_norm: 0.9223518698875484, iteration: 145852
loss: 0.9885172247886658,grad_norm: 0.9932749621612914, iteration: 145853
loss: 1.0077468156814575,grad_norm: 0.9874992497834602, iteration: 145854
loss: 0.9773901700973511,grad_norm: 0.9217290907014122, iteration: 145855
loss: 1.0040339231491089,grad_norm: 0.999999063408177, iteration: 145856
loss: 0.9952250719070435,grad_norm: 0.9999991607713319, iteration: 145857
loss: 1.0386154651641846,grad_norm: 0.999999657052241, iteration: 145858
loss: 1.0528537034988403,grad_norm: 0.8999797855777903, iteration: 145859
loss: 0.9996155500411987,grad_norm: 0.7910981091015871, iteration: 145860
loss: 1.008110761642456,grad_norm: 0.9755911640262078, iteration: 145861
loss: 0.9930447936058044,grad_norm: 0.8311094984333539, iteration: 145862
loss: 1.011163353919983,grad_norm: 0.9999990882244322, iteration: 145863
loss: 0.9834730625152588,grad_norm: 0.9999992543356907, iteration: 145864
loss: 1.0444176197052002,grad_norm: 0.9495027893067847, iteration: 145865
loss: 1.0125588178634644,grad_norm: 0.9714607552981909, iteration: 145866
loss: 1.0061848163604736,grad_norm: 0.9999990918938453, iteration: 145867
loss: 0.9712389707565308,grad_norm: 0.9999990522684297, iteration: 145868
loss: 1.0029116868972778,grad_norm: 0.979513552986657, iteration: 145869
loss: 1.0288008451461792,grad_norm: 0.9999990357120477, iteration: 145870
loss: 1.0104018449783325,grad_norm: 0.9999990936098452, iteration: 145871
loss: 1.014223337173462,grad_norm: 0.9999992287515335, iteration: 145872
loss: 1.015479564666748,grad_norm: 0.9999990757623156, iteration: 145873
loss: 0.9886224269866943,grad_norm: 0.9179468979311773, iteration: 145874
loss: 1.0155978202819824,grad_norm: 0.9151419110521924, iteration: 145875
loss: 1.029270052909851,grad_norm: 0.999999004200989, iteration: 145876
loss: 0.9937398433685303,grad_norm: 0.9999991536579792, iteration: 145877
loss: 0.9788206219673157,grad_norm: 0.9999990662604502, iteration: 145878
loss: 1.011764645576477,grad_norm: 0.9999989367931175, iteration: 145879
loss: 0.9912519454956055,grad_norm: 0.999999224329965, iteration: 145880
loss: 1.0082730054855347,grad_norm: 0.8971661447885567, iteration: 145881
loss: 0.9794158935546875,grad_norm: 0.8650329205865747, iteration: 145882
loss: 1.0325567722320557,grad_norm: 0.999999932239854, iteration: 145883
loss: 1.0095298290252686,grad_norm: 0.9999992696120187, iteration: 145884
loss: 1.0198944807052612,grad_norm: 0.9792520798165707, iteration: 145885
loss: 1.0194838047027588,grad_norm: 0.9833171160199818, iteration: 145886
loss: 1.025785207748413,grad_norm: 0.9999991340352228, iteration: 145887
loss: 0.946764349937439,grad_norm: 0.8762932828385935, iteration: 145888
loss: 1.0056393146514893,grad_norm: 0.9999992550781548, iteration: 145889
loss: 1.0354640483856201,grad_norm: 0.9999990278926698, iteration: 145890
loss: 0.9929747581481934,grad_norm: 0.9999990376552482, iteration: 145891
loss: 1.0171632766723633,grad_norm: 0.8665895996172918, iteration: 145892
loss: 1.0012240409851074,grad_norm: 0.8034778917524336, iteration: 145893
loss: 0.992875337600708,grad_norm: 0.9999992081995583, iteration: 145894
loss: 1.0302917957305908,grad_norm: 0.9999990734288824, iteration: 145895
loss: 0.9931643009185791,grad_norm: 0.9445345224317787, iteration: 145896
loss: 0.9849919676780701,grad_norm: 0.856840688996181, iteration: 145897
loss: 1.004105806350708,grad_norm: 0.9626052882310118, iteration: 145898
loss: 1.0133960247039795,grad_norm: 0.9999989754467541, iteration: 145899
loss: 1.0221552848815918,grad_norm: 0.9842142855611237, iteration: 145900
loss: 0.998697817325592,grad_norm: 0.9999991200573172, iteration: 145901
loss: 1.0160294771194458,grad_norm: 0.9913796808620601, iteration: 145902
loss: 0.942335844039917,grad_norm: 0.9999992102236289, iteration: 145903
loss: 1.0341540575027466,grad_norm: 0.999999020165807, iteration: 145904
loss: 1.0222618579864502,grad_norm: 0.9328701090643089, iteration: 145905
loss: 1.0180145502090454,grad_norm: 0.9761757617187229, iteration: 145906
loss: 0.9378910064697266,grad_norm: 0.9999992351739201, iteration: 145907
loss: 0.997122049331665,grad_norm: 0.9999991026967283, iteration: 145908
loss: 0.9889110922813416,grad_norm: 0.9999991527577906, iteration: 145909
loss: 0.9609280824661255,grad_norm: 0.9941511319667155, iteration: 145910
loss: 1.0295023918151855,grad_norm: 0.8687109290028182, iteration: 145911
loss: 1.0132139921188354,grad_norm: 0.9999990240109414, iteration: 145912
loss: 1.005630373954773,grad_norm: 0.999999139233238, iteration: 145913
loss: 0.9791075587272644,grad_norm: 0.9999989440203395, iteration: 145914
loss: 0.9637823700904846,grad_norm: 0.9999993842831263, iteration: 145915
loss: 1.0079694986343384,grad_norm: 0.9018105373456626, iteration: 145916
loss: 1.036022663116455,grad_norm: 0.9999991098607511, iteration: 145917
loss: 1.0234415531158447,grad_norm: 0.8798885934463786, iteration: 145918
loss: 1.0014796257019043,grad_norm: 0.9070769099266461, iteration: 145919
loss: 0.996850311756134,grad_norm: 0.9875421098093222, iteration: 145920
loss: 0.9997320175170898,grad_norm: 0.9999989959188295, iteration: 145921
loss: 0.9780302047729492,grad_norm: 0.9999988528393814, iteration: 145922
loss: 0.9964982271194458,grad_norm: 0.9211965142961017, iteration: 145923
loss: 0.9897512793540955,grad_norm: 0.9691789816102531, iteration: 145924
loss: 1.0330755710601807,grad_norm: 0.8771883746466933, iteration: 145925
loss: 0.9948145151138306,grad_norm: 0.9999991069308879, iteration: 145926
loss: 0.9803391098976135,grad_norm: 0.7661624477768794, iteration: 145927
loss: 1.0276761054992676,grad_norm: 0.9430394577428773, iteration: 145928
loss: 0.9664784073829651,grad_norm: 0.9999991838980425, iteration: 145929
loss: 0.986566424369812,grad_norm: 0.9999990622206804, iteration: 145930
loss: 0.9797254800796509,grad_norm: 0.99999910567453, iteration: 145931
loss: 0.9996740818023682,grad_norm: 0.9999991589280904, iteration: 145932
loss: 1.0243818759918213,grad_norm: 0.9999991372577561, iteration: 145933
loss: 0.9933421015739441,grad_norm: 0.9894929109814402, iteration: 145934
loss: 0.9821042418479919,grad_norm: 0.999998956811576, iteration: 145935
loss: 0.9800265431404114,grad_norm: 0.9169407586103994, iteration: 145936
loss: 0.992232620716095,grad_norm: 0.9999992052127622, iteration: 145937
loss: 0.9874358773231506,grad_norm: 0.9999991677515291, iteration: 145938
loss: 0.9970303177833557,grad_norm: 0.9999991947579135, iteration: 145939
loss: 1.0113954544067383,grad_norm: 0.951571833748085, iteration: 145940
loss: 0.9895378947257996,grad_norm: 0.9704330884922283, iteration: 145941
loss: 1.0188959836959839,grad_norm: 0.999999221398032, iteration: 145942
loss: 1.0283386707305908,grad_norm: 0.9999996690701626, iteration: 145943
loss: 1.0084553956985474,grad_norm: 0.9589633671918565, iteration: 145944
loss: 1.0063867568969727,grad_norm: 0.9983907668542689, iteration: 145945
loss: 0.9810457229614258,grad_norm: 0.999999102117034, iteration: 145946
loss: 0.9682462811470032,grad_norm: 0.9999990734203916, iteration: 145947
loss: 0.9677954316139221,grad_norm: 0.999999044613296, iteration: 145948
loss: 1.0067545175552368,grad_norm: 0.9999991452092384, iteration: 145949
loss: 0.9963254332542419,grad_norm: 0.9999991703796837, iteration: 145950
loss: 1.0141657590866089,grad_norm: 0.9863085434314183, iteration: 145951
loss: 0.9792193174362183,grad_norm: 0.9855136507902413, iteration: 145952
loss: 1.0222238302230835,grad_norm: 0.9999990349932015, iteration: 145953
loss: 0.9999225735664368,grad_norm: 0.9007527382670242, iteration: 145954
loss: 1.0079830884933472,grad_norm: 0.9999991667134285, iteration: 145955
loss: 1.0110418796539307,grad_norm: 0.9999991344067406, iteration: 145956
loss: 0.9971194863319397,grad_norm: 0.9999991259831899, iteration: 145957
loss: 0.9854415059089661,grad_norm: 0.9660855109864092, iteration: 145958
loss: 1.0314947366714478,grad_norm: 0.9205006683713073, iteration: 145959
loss: 0.9762048125267029,grad_norm: 0.9684483884473628, iteration: 145960
loss: 1.0785915851593018,grad_norm: 0.9999995625376068, iteration: 145961
loss: 0.9751641750335693,grad_norm: 0.9079795885218797, iteration: 145962
loss: 1.032501220703125,grad_norm: 0.9999991129591334, iteration: 145963
loss: 1.0131887197494507,grad_norm: 0.9999991013210158, iteration: 145964
loss: 1.0864331722259521,grad_norm: 0.8891203798251847, iteration: 145965
loss: 1.011279582977295,grad_norm: 0.9999991223382656, iteration: 145966
loss: 0.9797589778900146,grad_norm: 0.9999991421023892, iteration: 145967
loss: 0.9947841167449951,grad_norm: 0.9999992478027605, iteration: 145968
loss: 1.0227060317993164,grad_norm: 0.9999991645303999, iteration: 145969
loss: 0.9793573021888733,grad_norm: 0.8433694256888974, iteration: 145970
loss: 1.041114091873169,grad_norm: 0.9999991340228106, iteration: 145971
loss: 1.0007452964782715,grad_norm: 0.999999281296367, iteration: 145972
loss: 1.013706088066101,grad_norm: 0.9999991516885696, iteration: 145973
loss: 0.9856792688369751,grad_norm: 0.8935187888771827, iteration: 145974
loss: 1.0191665887832642,grad_norm: 0.999999207676015, iteration: 145975
loss: 0.9774935245513916,grad_norm: 0.9719743713614323, iteration: 145976
loss: 0.9686020612716675,grad_norm: 0.8900692286711277, iteration: 145977
loss: 0.977135956287384,grad_norm: 0.9999993205342005, iteration: 145978
loss: 1.072605848312378,grad_norm: 0.9999991437606996, iteration: 145979
loss: 1.011155128479004,grad_norm: 0.98969720608322, iteration: 145980
loss: 1.0121923685073853,grad_norm: 0.9999991528313952, iteration: 145981
loss: 1.0208098888397217,grad_norm: 0.9999991786822908, iteration: 145982
loss: 0.9859363436698914,grad_norm: 0.9999991830089059, iteration: 145983
loss: 1.0124237537384033,grad_norm: 0.9554715834517421, iteration: 145984
loss: 1.0026944875717163,grad_norm: 0.9999992423570271, iteration: 145985
loss: 1.022602915763855,grad_norm: 0.9999991192434129, iteration: 145986
loss: 0.9898295998573303,grad_norm: 0.9999992793084473, iteration: 145987
loss: 0.994443416595459,grad_norm: 0.9250769317798383, iteration: 145988
loss: 0.9773775339126587,grad_norm: 0.9999990724212365, iteration: 145989
loss: 0.998953104019165,grad_norm: 0.7875469886836114, iteration: 145990
loss: 1.0154539346694946,grad_norm: 0.9995786748962985, iteration: 145991
loss: 1.0378628969192505,grad_norm: 0.9999992479282943, iteration: 145992
loss: 1.0295897722244263,grad_norm: 0.999999267804834, iteration: 145993
loss: 0.9945419430732727,grad_norm: 0.9550488052946225, iteration: 145994
loss: 0.9870366454124451,grad_norm: 0.9999991834815377, iteration: 145995
loss: 0.9705254435539246,grad_norm: 0.999999108661446, iteration: 145996
loss: 0.9728952646255493,grad_norm: 0.9410454342865869, iteration: 145997
loss: 1.0053236484527588,grad_norm: 0.9999991930085579, iteration: 145998
loss: 1.0002849102020264,grad_norm: 0.999999140113288, iteration: 145999
loss: 0.9875805974006653,grad_norm: 0.9999990016484167, iteration: 146000
loss: 0.9681059122085571,grad_norm: 0.9788166150825343, iteration: 146001
loss: 1.0427305698394775,grad_norm: 0.9999993989441618, iteration: 146002
loss: 0.9693099856376648,grad_norm: 0.9999992476802047, iteration: 146003
loss: 1.002439022064209,grad_norm: 0.9999991511786945, iteration: 146004
loss: 0.9728068709373474,grad_norm: 0.9775626642560398, iteration: 146005
loss: 1.0020570755004883,grad_norm: 0.96637802984899, iteration: 146006
loss: 0.9629493355751038,grad_norm: 0.9749052995628873, iteration: 146007
loss: 0.9865378141403198,grad_norm: 0.985341007234949, iteration: 146008
loss: 1.030740737915039,grad_norm: 0.9688771950008958, iteration: 146009
loss: 1.0008504390716553,grad_norm: 0.9334778085410125, iteration: 146010
loss: 0.9830506443977356,grad_norm: 0.8971914408331116, iteration: 146011
loss: 1.020211100578308,grad_norm: 0.9999990779889951, iteration: 146012
loss: 1.0146784782409668,grad_norm: 0.9070529426111924, iteration: 146013
loss: 1.0040323734283447,grad_norm: 0.928010152721305, iteration: 146014
loss: 0.9993070363998413,grad_norm: 0.9826668105519961, iteration: 146015
loss: 0.9807214736938477,grad_norm: 0.9153965378697249, iteration: 146016
loss: 0.9887769222259521,grad_norm: 0.999999051076953, iteration: 146017
loss: 1.0076812505722046,grad_norm: 0.9792487960165611, iteration: 146018
loss: 0.995389997959137,grad_norm: 0.8733327197340461, iteration: 146019
loss: 0.9432557225227356,grad_norm: 0.9010120413795875, iteration: 146020
loss: 1.0038013458251953,grad_norm: 0.9321059613053537, iteration: 146021
loss: 1.063759446144104,grad_norm: 0.9999993231959962, iteration: 146022
loss: 1.0054283142089844,grad_norm: 0.8776525442805025, iteration: 146023
loss: 1.0029078722000122,grad_norm: 0.9999991312294296, iteration: 146024
loss: 1.0873323678970337,grad_norm: 0.999999182931631, iteration: 146025
loss: 1.045464277267456,grad_norm: 0.9999989921433409, iteration: 146026
loss: 1.0248337984085083,grad_norm: 0.9999989908629279, iteration: 146027
loss: 0.9938229322433472,grad_norm: 0.9999992949911615, iteration: 146028
loss: 0.9813429117202759,grad_norm: 0.8443389486789935, iteration: 146029
loss: 0.9941086173057556,grad_norm: 0.9999990263320772, iteration: 146030
loss: 1.0325978994369507,grad_norm: 0.9999994411100016, iteration: 146031
loss: 1.0023736953735352,grad_norm: 0.9999993686651616, iteration: 146032
loss: 1.0405755043029785,grad_norm: 0.9999991011634178, iteration: 146033
loss: 0.9721823334693909,grad_norm: 0.9042490464833747, iteration: 146034
loss: 0.9790054559707642,grad_norm: 0.9999991942211581, iteration: 146035
loss: 0.9857405424118042,grad_norm: 0.9999992623292917, iteration: 146036
loss: 0.9695726633071899,grad_norm: 0.9848702629198869, iteration: 146037
loss: 1.1994736194610596,grad_norm: 0.9999996442261523, iteration: 146038
loss: 0.9701492190361023,grad_norm: 0.9999990990008658, iteration: 146039
loss: 1.0033835172653198,grad_norm: 0.99999900580827, iteration: 146040
loss: 1.0052334070205688,grad_norm: 0.9999991897348728, iteration: 146041
loss: 1.0117906332015991,grad_norm: 0.9999990712611478, iteration: 146042
loss: 0.9717098474502563,grad_norm: 0.9999990225645217, iteration: 146043
loss: 0.9719694256782532,grad_norm: 0.9999990611238508, iteration: 146044
loss: 0.9875476360321045,grad_norm: 0.893889453250969, iteration: 146045
loss: 0.9634956121444702,grad_norm: 0.9467338031422207, iteration: 146046
loss: 0.963858962059021,grad_norm: 0.9999990885825165, iteration: 146047
loss: 0.9889714121818542,grad_norm: 0.9534758695977398, iteration: 146048
loss: 1.0352394580841064,grad_norm: 0.9999991561502376, iteration: 146049
loss: 0.9794907569885254,grad_norm: 0.8472914057917191, iteration: 146050
loss: 1.0047378540039062,grad_norm: 0.9999993111146106, iteration: 146051
loss: 0.9830187559127808,grad_norm: 0.833235571070925, iteration: 146052
loss: 0.971868634223938,grad_norm: 0.9999992565339775, iteration: 146053
loss: 1.042892575263977,grad_norm: 0.9999990253687306, iteration: 146054
loss: 0.9832488894462585,grad_norm: 0.8388901285663044, iteration: 146055
loss: 0.999011218547821,grad_norm: 0.999998996190455, iteration: 146056
loss: 0.9843186736106873,grad_norm: 0.9999991426207316, iteration: 146057
loss: 1.0129393339157104,grad_norm: 0.9999992883129607, iteration: 146058
loss: 1.013733983039856,grad_norm: 0.9086394167287348, iteration: 146059
loss: 1.0403441190719604,grad_norm: 0.9999996825504804, iteration: 146060
loss: 1.0058722496032715,grad_norm: 0.9999992166819142, iteration: 146061
loss: 1.0216610431671143,grad_norm: 0.9890299346764518, iteration: 146062
loss: 0.9955071210861206,grad_norm: 0.9999990024460328, iteration: 146063
loss: 0.9862164855003357,grad_norm: 0.9000405121537898, iteration: 146064
loss: 0.9844395518302917,grad_norm: 0.8703173695961942, iteration: 146065
loss: 1.0035876035690308,grad_norm: 0.9999990342482536, iteration: 146066
loss: 0.9946684241294861,grad_norm: 0.9999991171970691, iteration: 146067
loss: 1.0075243711471558,grad_norm: 0.9999989860951052, iteration: 146068
loss: 0.9562905430793762,grad_norm: 0.9999990308324798, iteration: 146069
loss: 0.988888144493103,grad_norm: 0.9732424707042877, iteration: 146070
loss: 0.9750557541847229,grad_norm: 0.9464284427944297, iteration: 146071
loss: 0.9759699106216431,grad_norm: 0.8205944655849322, iteration: 146072
loss: 0.9752112030982971,grad_norm: 0.9477369232069041, iteration: 146073
loss: 1.0369454622268677,grad_norm: 0.9999999104617524, iteration: 146074
loss: 1.0061531066894531,grad_norm: 0.9300241536754303, iteration: 146075
loss: 0.9907907247543335,grad_norm: 0.9999989569335961, iteration: 146076
loss: 0.9930593967437744,grad_norm: 0.9999990908246709, iteration: 146077
loss: 1.0452345609664917,grad_norm: 0.9400685890576005, iteration: 146078
loss: 1.0009199380874634,grad_norm: 0.9999991716176059, iteration: 146079
loss: 1.0021262168884277,grad_norm: 0.9248872773186084, iteration: 146080
loss: 0.9846481084823608,grad_norm: 0.9999990723659025, iteration: 146081
loss: 0.9820389151573181,grad_norm: 0.9592696561849804, iteration: 146082
loss: 0.984704852104187,grad_norm: 0.9999989290452796, iteration: 146083
loss: 1.007685899734497,grad_norm: 0.9999989573860381, iteration: 146084
loss: 0.9636023044586182,grad_norm: 0.9999990441077826, iteration: 146085
loss: 0.9719904065132141,grad_norm: 0.9999990060695575, iteration: 146086
loss: 1.0479836463928223,grad_norm: 0.9999993022093263, iteration: 146087
loss: 1.0084402561187744,grad_norm: 0.9232548866619914, iteration: 146088
loss: 1.0120322704315186,grad_norm: 0.9999990427114651, iteration: 146089
loss: 1.0072541236877441,grad_norm: 0.9476425389610562, iteration: 146090
loss: 1.0273420810699463,grad_norm: 0.9999992534053833, iteration: 146091
loss: 1.0176000595092773,grad_norm: 0.8807010754642782, iteration: 146092
loss: 1.0040751695632935,grad_norm: 0.9999990831855998, iteration: 146093
loss: 1.0219011306762695,grad_norm: 0.8564854356053543, iteration: 146094
loss: 1.0357000827789307,grad_norm: 0.9999992996947131, iteration: 146095
loss: 0.9430964589118958,grad_norm: 0.9999990216066943, iteration: 146096
loss: 1.0070480108261108,grad_norm: 0.9999991761644125, iteration: 146097
loss: 0.9945343732833862,grad_norm: 0.9999990799181641, iteration: 146098
loss: 0.9988574385643005,grad_norm: 0.9999990293518779, iteration: 146099
loss: 1.0000770092010498,grad_norm: 0.8626765987474478, iteration: 146100
loss: 0.9711011052131653,grad_norm: 0.9999990899496631, iteration: 146101
loss: 0.9901849031448364,grad_norm: 0.9891784844175374, iteration: 146102
loss: 0.9915369153022766,grad_norm: 0.964794396658174, iteration: 146103
loss: 1.0124918222427368,grad_norm: 0.828864063614118, iteration: 146104
loss: 0.9909107685089111,grad_norm: 0.9999991643731652, iteration: 146105
loss: 1.0067720413208008,grad_norm: 0.9999997383895471, iteration: 146106
loss: 1.0337306261062622,grad_norm: 0.999999206430211, iteration: 146107
loss: 0.976287841796875,grad_norm: 0.9232123788368849, iteration: 146108
loss: 0.9945837259292603,grad_norm: 0.9865886963232051, iteration: 146109
loss: 1.0259596109390259,grad_norm: 0.8976552335193302, iteration: 146110
loss: 1.0080047845840454,grad_norm: 0.9999991533933759, iteration: 146111
loss: 1.0142303705215454,grad_norm: 0.9999989248473506, iteration: 146112
loss: 0.9845877289772034,grad_norm: 0.9605918248595501, iteration: 146113
loss: 1.0027934312820435,grad_norm: 0.9999992569882856, iteration: 146114
loss: 1.0095630884170532,grad_norm: 0.9244347897325018, iteration: 146115
loss: 0.9953764081001282,grad_norm: 0.9802660376189202, iteration: 146116
loss: 0.9742939472198486,grad_norm: 0.9948953939484431, iteration: 146117
loss: 1.029256820678711,grad_norm: 0.9849093473937668, iteration: 146118
loss: 1.0182687044143677,grad_norm: 0.9346980189555091, iteration: 146119
loss: 1.0258924961090088,grad_norm: 0.9319867927844379, iteration: 146120
loss: 0.9836727976799011,grad_norm: 0.999999293973268, iteration: 146121
loss: 1.0259865522384644,grad_norm: 0.9715093510978138, iteration: 146122
loss: 0.9976021647453308,grad_norm: 0.9999990953831839, iteration: 146123
loss: 1.0047141313552856,grad_norm: 0.8495096790880482, iteration: 146124
loss: 1.0312167406082153,grad_norm: 0.9513556200019282, iteration: 146125
loss: 1.0220794677734375,grad_norm: 0.8508714167043991, iteration: 146126
loss: 0.9939569234848022,grad_norm: 0.9999995185121308, iteration: 146127
loss: 1.0158638954162598,grad_norm: 0.8228922349761846, iteration: 146128
loss: 0.994683027267456,grad_norm: 0.9999991646351202, iteration: 146129
loss: 1.0403645038604736,grad_norm: 0.999999328118451, iteration: 146130
loss: 1.0001376867294312,grad_norm: 0.9999995373796835, iteration: 146131
loss: 0.9792426824569702,grad_norm: 0.9999991594662927, iteration: 146132
loss: 0.9763814210891724,grad_norm: 0.9072632113318794, iteration: 146133
loss: 1.0364060401916504,grad_norm: 0.9699838840102137, iteration: 146134
loss: 0.9874431490898132,grad_norm: 0.999999108865345, iteration: 146135
loss: 1.0216401815414429,grad_norm: 0.9999992058427795, iteration: 146136
loss: 0.9842039942741394,grad_norm: 0.8494013247885915, iteration: 146137
loss: 0.9856600761413574,grad_norm: 0.946666529138522, iteration: 146138
loss: 0.9900816082954407,grad_norm: 0.9999991819356835, iteration: 146139
loss: 0.9985485672950745,grad_norm: 0.9929427481698438, iteration: 146140
loss: 0.9735338687896729,grad_norm: 0.8982136120031341, iteration: 146141
loss: 1.0073509216308594,grad_norm: 0.9563881992153754, iteration: 146142
loss: 0.9998784065246582,grad_norm: 0.9144669524339188, iteration: 146143
loss: 1.0109096765518188,grad_norm: 0.999999583071974, iteration: 146144
loss: 0.9956406354904175,grad_norm: 0.9999991174948707, iteration: 146145
loss: 0.9666741490364075,grad_norm: 0.9048912326175216, iteration: 146146
loss: 1.0072288513183594,grad_norm: 0.924424291436115, iteration: 146147
loss: 0.987418532371521,grad_norm: 0.9999990383196661, iteration: 146148
loss: 0.995323121547699,grad_norm: 0.9999992309960173, iteration: 146149
loss: 1.0102053880691528,grad_norm: 0.9485756994390974, iteration: 146150
loss: 1.0223673582077026,grad_norm: 0.8863977922216594, iteration: 146151
loss: 1.0669212341308594,grad_norm: 0.9298464057841566, iteration: 146152
loss: 1.0899468660354614,grad_norm: 0.9999992381943331, iteration: 146153
loss: 0.9952042698860168,grad_norm: 0.9999992018691133, iteration: 146154
loss: 1.0028327703475952,grad_norm: 0.9999992969787881, iteration: 146155
loss: 1.0041024684906006,grad_norm: 0.9554479870232018, iteration: 146156
loss: 1.0338659286499023,grad_norm: 0.9120536869732235, iteration: 146157
loss: 0.9605751633644104,grad_norm: 0.9999991122629387, iteration: 146158
loss: 1.0179308652877808,grad_norm: 0.8464341612223467, iteration: 146159
loss: 0.9882344603538513,grad_norm: 0.9999990502508168, iteration: 146160
loss: 0.9708602428436279,grad_norm: 0.9999992114797774, iteration: 146161
loss: 0.9843800663948059,grad_norm: 0.9999991990018333, iteration: 146162
loss: 1.0085086822509766,grad_norm: 0.9999991767177404, iteration: 146163
loss: 0.9954661130905151,grad_norm: 0.9999990540456406, iteration: 146164
loss: 0.9966967105865479,grad_norm: 0.901741781264614, iteration: 146165
loss: 0.9555738568305969,grad_norm: 0.9114812065900066, iteration: 146166
loss: 0.9863561391830444,grad_norm: 0.950061795244977, iteration: 146167
loss: 1.0160521268844604,grad_norm: 0.9999991011823169, iteration: 146168
loss: 0.995570719242096,grad_norm: 0.9999992625563225, iteration: 146169
loss: 0.9838945865631104,grad_norm: 0.9999992826853064, iteration: 146170
loss: 0.9956250190734863,grad_norm: 0.9290403281579438, iteration: 146171
loss: 1.0059468746185303,grad_norm: 0.9999989552238889, iteration: 146172
loss: 0.980929434299469,grad_norm: 0.9999990519551767, iteration: 146173
loss: 1.0085734128952026,grad_norm: 0.9999991601913941, iteration: 146174
loss: 1.011189341545105,grad_norm: 0.9999990924612374, iteration: 146175
loss: 1.0133304595947266,grad_norm: 0.9999991908110794, iteration: 146176
loss: 1.0490906238555908,grad_norm: 0.9999999166219922, iteration: 146177
loss: 1.009016752243042,grad_norm: 0.9954831040874331, iteration: 146178
loss: 1.02781081199646,grad_norm: 0.999998956882422, iteration: 146179
loss: 1.002639651298523,grad_norm: 0.9999991222142526, iteration: 146180
loss: 0.9472559094429016,grad_norm: 0.9933315113192641, iteration: 146181
loss: 0.9896047115325928,grad_norm: 0.8372161392064917, iteration: 146182
loss: 0.9960577487945557,grad_norm: 0.9999991019371136, iteration: 146183
loss: 1.0411607027053833,grad_norm: 0.999999018443607, iteration: 146184
loss: 1.0169907808303833,grad_norm: 0.9999992233283972, iteration: 146185
loss: 0.9971134662628174,grad_norm: 0.8703811929835406, iteration: 146186
loss: 0.9608661532402039,grad_norm: 0.9999992362837714, iteration: 146187
loss: 0.9798188805580139,grad_norm: 0.9060839992609657, iteration: 146188
loss: 1.0234087705612183,grad_norm: 0.9999990961138203, iteration: 146189
loss: 0.971531867980957,grad_norm: 0.9999990465460445, iteration: 146190
loss: 1.0169901847839355,grad_norm: 0.9768041760980479, iteration: 146191
loss: 0.9971185326576233,grad_norm: 0.9999989500660335, iteration: 146192
loss: 0.9847574830055237,grad_norm: 0.9151865945166555, iteration: 146193
loss: 0.9576800465583801,grad_norm: 0.9999991407951835, iteration: 146194
loss: 1.0116547346115112,grad_norm: 0.9999991038969565, iteration: 146195
loss: 1.000278353691101,grad_norm: 0.9994333104759767, iteration: 146196
loss: 0.9721968173980713,grad_norm: 0.9748766744842443, iteration: 146197
loss: 1.014438509941101,grad_norm: 0.9999991367948059, iteration: 146198
loss: 0.9773921966552734,grad_norm: 0.9999990782987818, iteration: 146199
loss: 1.0295031070709229,grad_norm: 0.9999990981208101, iteration: 146200
loss: 0.9998288750648499,grad_norm: 0.9372550965160119, iteration: 146201
loss: 0.990746021270752,grad_norm: 0.9635128428321977, iteration: 146202
loss: 0.9382177591323853,grad_norm: 0.9999990396807571, iteration: 146203
loss: 0.9911943078041077,grad_norm: 0.9182262362369904, iteration: 146204
loss: 1.0116053819656372,grad_norm: 0.9999990449388039, iteration: 146205
loss: 1.0449963808059692,grad_norm: 0.991340474243872, iteration: 146206
loss: 1.0025914907455444,grad_norm: 0.9999990906034565, iteration: 146207
loss: 0.9974988102912903,grad_norm: 0.9999990425493331, iteration: 146208
loss: 0.9785767793655396,grad_norm: 0.9999991496133034, iteration: 146209
loss: 0.9722743034362793,grad_norm: 0.999999149596649, iteration: 146210
loss: 1.0155141353607178,grad_norm: 0.9999992115559707, iteration: 146211
loss: 1.0310180187225342,grad_norm: 0.9718666499325781, iteration: 146212
loss: 0.9832702875137329,grad_norm: 0.9419621668226246, iteration: 146213
loss: 0.9910200238227844,grad_norm: 0.999999280026378, iteration: 146214
loss: 1.0200631618499756,grad_norm: 0.9406695497668583, iteration: 146215
loss: 1.0108247995376587,grad_norm: 0.9999991051039904, iteration: 146216
loss: 0.99952632188797,grad_norm: 0.9999990501684924, iteration: 146217
loss: 1.0032469034194946,grad_norm: 0.9852259972307361, iteration: 146218
loss: 0.9937869310379028,grad_norm: 0.9999991555517179, iteration: 146219
loss: 0.9636754989624023,grad_norm: 0.9599169004837538, iteration: 146220
loss: 0.9761559367179871,grad_norm: 0.8954458820877942, iteration: 146221
loss: 1.0238898992538452,grad_norm: 0.9999991569486401, iteration: 146222
loss: 0.9599360227584839,grad_norm: 0.8162038279616743, iteration: 146223
loss: 1.0028184652328491,grad_norm: 0.9999992998427789, iteration: 146224
loss: 1.005088448524475,grad_norm: 0.9507463722966923, iteration: 146225
loss: 1.0269914865493774,grad_norm: 0.9009606002136814, iteration: 146226
loss: 1.0025454759597778,grad_norm: 0.9033994046440169, iteration: 146227
loss: 1.0241512060165405,grad_norm: 0.9999992081659761, iteration: 146228
loss: 1.0300732851028442,grad_norm: 0.9999991317174952, iteration: 146229
loss: 1.0053046941757202,grad_norm: 0.9850204827661331, iteration: 146230
loss: 0.9982911944389343,grad_norm: 0.999999115110019, iteration: 146231
loss: 1.0091770887374878,grad_norm: 0.8061767522918557, iteration: 146232
loss: 0.9756396412849426,grad_norm: 0.8903039841967034, iteration: 146233
loss: 0.9738282561302185,grad_norm: 0.9999991720868832, iteration: 146234
loss: 1.0079548358917236,grad_norm: 0.9296307561282242, iteration: 146235
loss: 0.9898885488510132,grad_norm: 0.9548537920204405, iteration: 146236
loss: 0.9918712377548218,grad_norm: 0.9999991431136042, iteration: 146237
loss: 0.9778537154197693,grad_norm: 0.9022830239810135, iteration: 146238
loss: 0.9892526865005493,grad_norm: 0.9999992455050767, iteration: 146239
loss: 1.0058681964874268,grad_norm: 0.9049562530535552, iteration: 146240
loss: 1.0233930349349976,grad_norm: 0.9750240151069944, iteration: 146241
loss: 1.0429701805114746,grad_norm: 0.9597952415080053, iteration: 146242
loss: 0.9978066086769104,grad_norm: 0.9568705852300756, iteration: 146243
loss: 0.9529180526733398,grad_norm: 0.7831824567366349, iteration: 146244
loss: 1.013373613357544,grad_norm: 0.9928153126679181, iteration: 146245
loss: 0.9933158755302429,grad_norm: 0.9999992218179761, iteration: 146246
loss: 1.0144736766815186,grad_norm: 0.999999240190124, iteration: 146247
loss: 0.9763390421867371,grad_norm: 0.9999990688630541, iteration: 146248
loss: 1.0031706094741821,grad_norm: 0.9936198048905801, iteration: 146249
loss: 0.9915826320648193,grad_norm: 0.8871882188505754, iteration: 146250
loss: 1.0140243768692017,grad_norm: 0.8911780299255171, iteration: 146251
loss: 1.0273150205612183,grad_norm: 0.9999990208694746, iteration: 146252
loss: 1.0429985523223877,grad_norm: 0.9999991176693576, iteration: 146253
loss: 0.9993159770965576,grad_norm: 0.9999992253264545, iteration: 146254
loss: 0.9892145991325378,grad_norm: 0.9267396486886339, iteration: 146255
loss: 1.0206574201583862,grad_norm: 0.9999992964229398, iteration: 146256
loss: 0.9793636202812195,grad_norm: 0.9999989387473341, iteration: 146257
loss: 0.9941676259040833,grad_norm: 0.9999991441018259, iteration: 146258
loss: 0.9929910898208618,grad_norm: 0.913443113018087, iteration: 146259
loss: 0.9871366024017334,grad_norm: 0.9082950525377715, iteration: 146260
loss: 0.9372732639312744,grad_norm: 0.9999989861700459, iteration: 146261
loss: 0.9996628165245056,grad_norm: 0.9608017514523167, iteration: 146262
loss: 0.9936498403549194,grad_norm: 0.9999990703737737, iteration: 146263
loss: 1.0206494331359863,grad_norm: 0.999999262563249, iteration: 146264
loss: 1.0135257244110107,grad_norm: 0.9742672911767852, iteration: 146265
loss: 1.0054047107696533,grad_norm: 0.9999992264555636, iteration: 146266
loss: 1.0086315870285034,grad_norm: 0.9281810767733467, iteration: 146267
loss: 1.0231707096099854,grad_norm: 0.9999990731490431, iteration: 146268
loss: 0.9851142764091492,grad_norm: 0.9999991119223838, iteration: 146269
loss: 0.993396520614624,grad_norm: 0.9999990436113011, iteration: 146270
loss: 0.9913998246192932,grad_norm: 0.9210606753041897, iteration: 146271
loss: 1.0097509622573853,grad_norm: 0.9999992236580977, iteration: 146272
loss: 1.0184125900268555,grad_norm: 0.9311813967156685, iteration: 146273
loss: 1.0096107721328735,grad_norm: 0.9181237996234676, iteration: 146274
loss: 1.0211743116378784,grad_norm: 0.9999991077374444, iteration: 146275
loss: 1.0044971704483032,grad_norm: 0.9939020265911587, iteration: 146276
loss: 1.0540717840194702,grad_norm: 0.9999995447347032, iteration: 146277
loss: 1.0215158462524414,grad_norm: 0.9999990801295848, iteration: 146278
loss: 0.9944314956665039,grad_norm: 0.9659114807838692, iteration: 146279
loss: 0.9979045987129211,grad_norm: 0.9999992739669306, iteration: 146280
loss: 1.0018749237060547,grad_norm: 0.9999990120288517, iteration: 146281
loss: 1.018049955368042,grad_norm: 0.8750805858565268, iteration: 146282
loss: 0.9809924960136414,grad_norm: 0.7899556197965312, iteration: 146283
loss: 0.9915279746055603,grad_norm: 0.9999992450170956, iteration: 146284
loss: 0.9632676839828491,grad_norm: 0.9999991284975378, iteration: 146285
loss: 0.9566283822059631,grad_norm: 0.8303745841633949, iteration: 146286
loss: 1.0022717714309692,grad_norm: 0.999999142407515, iteration: 146287
loss: 0.9847020506858826,grad_norm: 0.9458609034843571, iteration: 146288
loss: 0.9953953623771667,grad_norm: 0.9999992149387869, iteration: 146289
loss: 0.9806154370307922,grad_norm: 0.9999991861838446, iteration: 146290
loss: 1.023796558380127,grad_norm: 0.8393783255998223, iteration: 146291
loss: 0.9720326066017151,grad_norm: 0.9999989942305596, iteration: 146292
loss: 1.0209523439407349,grad_norm: 0.9999992961785028, iteration: 146293
loss: 1.0173839330673218,grad_norm: 0.9648563915208548, iteration: 146294
loss: 0.9561022520065308,grad_norm: 0.999999322669978, iteration: 146295
loss: 1.0129005908966064,grad_norm: 0.9999992378408797, iteration: 146296
loss: 1.0241235494613647,grad_norm: 0.9999991379094106, iteration: 146297
loss: 1.0187989473342896,grad_norm: 0.9999990969996292, iteration: 146298
loss: 1.0107369422912598,grad_norm: 0.9999991149537761, iteration: 146299
loss: 0.9812768697738647,grad_norm: 0.9999991053780631, iteration: 146300
loss: 0.9633462429046631,grad_norm: 0.9999992232572692, iteration: 146301
loss: 0.981272280216217,grad_norm: 0.9999991316761816, iteration: 146302
loss: 1.0151275396347046,grad_norm: 0.8894654976965274, iteration: 146303
loss: 0.9787096977233887,grad_norm: 0.9999990700059971, iteration: 146304
loss: 0.9818517565727234,grad_norm: 0.9999991039585852, iteration: 146305
loss: 1.0135842561721802,grad_norm: 0.9999991376260831, iteration: 146306
loss: 0.9876261949539185,grad_norm: 0.9758385694062515, iteration: 146307
loss: 1.0101408958435059,grad_norm: 0.9999991084883619, iteration: 146308
loss: 1.0016295909881592,grad_norm: 0.9376693805111674, iteration: 146309
loss: 0.9910661578178406,grad_norm: 0.9875678806181499, iteration: 146310
loss: 0.9979549646377563,grad_norm: 0.934542693013699, iteration: 146311
loss: 0.9881260991096497,grad_norm: 0.9999990844309508, iteration: 146312
loss: 0.9860355854034424,grad_norm: 0.9073025285397364, iteration: 146313
loss: 1.017568588256836,grad_norm: 0.9999990187798768, iteration: 146314
loss: 1.0082939863204956,grad_norm: 0.9999990810215701, iteration: 146315
loss: 0.96144700050354,grad_norm: 0.9999991844947475, iteration: 146316
loss: 1.007654070854187,grad_norm: 0.9106619437790832, iteration: 146317
loss: 0.957756519317627,grad_norm: 0.9999990950828045, iteration: 146318
loss: 0.9776250123977661,grad_norm: 0.9999992238229501, iteration: 146319
loss: 0.9875990748405457,grad_norm: 0.9999990493837676, iteration: 146320
loss: 0.9999764561653137,grad_norm: 0.9894119564312088, iteration: 146321
loss: 0.9946333765983582,grad_norm: 0.8863495918694124, iteration: 146322
loss: 1.0106867551803589,grad_norm: 0.9999991326154295, iteration: 146323
loss: 1.036566972732544,grad_norm: 0.9999991897775279, iteration: 146324
loss: 1.0027225017547607,grad_norm: 0.8039244905446531, iteration: 146325
loss: 1.009749174118042,grad_norm: 0.8738569410774107, iteration: 146326
loss: 1.0012720823287964,grad_norm: 0.9710113902098313, iteration: 146327
loss: 1.0351554155349731,grad_norm: 0.9364676155198681, iteration: 146328
loss: 1.0119918584823608,grad_norm: 0.9999990238744254, iteration: 146329
loss: 0.9891793131828308,grad_norm: 0.9462380567529884, iteration: 146330
loss: 0.963985800743103,grad_norm: 0.8653307154461228, iteration: 146331
loss: 0.9883258938789368,grad_norm: 0.9999990786849341, iteration: 146332
loss: 1.0193378925323486,grad_norm: 0.99862959725074, iteration: 146333
loss: 0.9989897012710571,grad_norm: 0.9703221331177677, iteration: 146334
loss: 0.9714514017105103,grad_norm: 0.9903770881177313, iteration: 146335
loss: 0.9952313303947449,grad_norm: 0.9999990861680346, iteration: 146336
loss: 1.0341644287109375,grad_norm: 0.9999991683808478, iteration: 146337
loss: 1.0036104917526245,grad_norm: 0.9999991570789878, iteration: 146338
loss: 1.0052604675292969,grad_norm: 0.9999992844511325, iteration: 146339
loss: 0.9998459815979004,grad_norm: 0.9999990816511235, iteration: 146340
loss: 1.0171722173690796,grad_norm: 0.9861181102868555, iteration: 146341
loss: 1.0201057195663452,grad_norm: 0.9999990570436774, iteration: 146342
loss: 1.003989577293396,grad_norm: 0.9999990925063574, iteration: 146343
loss: 0.9945310354232788,grad_norm: 0.8694455976539545, iteration: 146344
loss: 0.9752722382545471,grad_norm: 0.9999993363069356, iteration: 146345
loss: 1.004062533378601,grad_norm: 0.9999991210002436, iteration: 146346
loss: 1.0049949884414673,grad_norm: 0.9999995349066111, iteration: 146347
loss: 1.0260089635849,grad_norm: 0.9999995453796527, iteration: 146348
loss: 0.9989712238311768,grad_norm: 0.9999991351529003, iteration: 146349
loss: 0.9704711437225342,grad_norm: 0.9999991289299026, iteration: 146350
loss: 1.0176752805709839,grad_norm: 0.9999990811563652, iteration: 146351
loss: 0.9974110126495361,grad_norm: 0.828983292291244, iteration: 146352
loss: 1.0270339250564575,grad_norm: 0.9886107675809175, iteration: 146353
loss: 0.9926197528839111,grad_norm: 0.9466963874891067, iteration: 146354
loss: 1.0040395259857178,grad_norm: 0.8829796549983551, iteration: 146355
loss: 1.027969479560852,grad_norm: 0.9999992624373178, iteration: 146356
loss: 1.017903208732605,grad_norm: 0.9656124336262403, iteration: 146357
loss: 0.9832449555397034,grad_norm: 0.9999989865745438, iteration: 146358
loss: 0.9905075430870056,grad_norm: 0.9999992870416904, iteration: 146359
loss: 1.0214799642562866,grad_norm: 0.9209263538492909, iteration: 146360
loss: 0.9732946157455444,grad_norm: 0.9701025264924344, iteration: 146361
loss: 0.9988207221031189,grad_norm: 0.9172365793378581, iteration: 146362
loss: 1.0665853023529053,grad_norm: 0.9999997798182668, iteration: 146363
loss: 0.9665220379829407,grad_norm: 0.960503880433191, iteration: 146364
loss: 1.019750952720642,grad_norm: 0.9999990442785545, iteration: 146365
loss: 0.9859137535095215,grad_norm: 0.9999992706307204, iteration: 146366
loss: 0.9942856431007385,grad_norm: 0.9210729550083686, iteration: 146367
loss: 1.0210145711898804,grad_norm: 0.9344253122162814, iteration: 146368
loss: 1.0075844526290894,grad_norm: 0.9999990789975415, iteration: 146369
loss: 1.0206843614578247,grad_norm: 0.999999112774625, iteration: 146370
loss: 1.0044386386871338,grad_norm: 0.999999105920284, iteration: 146371
loss: 1.0149590969085693,grad_norm: 0.9999992797912316, iteration: 146372
loss: 0.969970166683197,grad_norm: 0.9656072422149519, iteration: 146373
loss: 0.9561892151832581,grad_norm: 0.9999992581192618, iteration: 146374
loss: 0.9604871869087219,grad_norm: 0.9999991599982639, iteration: 146375
loss: 0.9969651699066162,grad_norm: 0.9999992327029813, iteration: 146376
loss: 0.9967696070671082,grad_norm: 0.8467866999578613, iteration: 146377
loss: 0.9909598231315613,grad_norm: 0.9999990591801393, iteration: 146378
loss: 0.9706665277481079,grad_norm: 0.9999991438820865, iteration: 146379
loss: 0.9981327056884766,grad_norm: 0.9999990866444503, iteration: 146380
loss: 0.9846592545509338,grad_norm: 0.9999991698573872, iteration: 146381
loss: 1.0209916830062866,grad_norm: 0.9999998972053707, iteration: 146382
loss: 0.9777030348777771,grad_norm: 0.9720582565671528, iteration: 146383
loss: 1.0002188682556152,grad_norm: 0.9999991693196197, iteration: 146384
loss: 1.0008695125579834,grad_norm: 0.9999991390807451, iteration: 146385
loss: 1.0024762153625488,grad_norm: 0.9999992231864884, iteration: 146386
loss: 0.980833888053894,grad_norm: 0.9999990486356858, iteration: 146387
loss: 0.9961079359054565,grad_norm: 0.9472960431153186, iteration: 146388
loss: 0.9994585514068604,grad_norm: 0.9999989464458963, iteration: 146389
loss: 1.043116569519043,grad_norm: 0.9999989937258454, iteration: 146390
loss: 1.036049246788025,grad_norm: 0.9999990848755542, iteration: 146391
loss: 1.0519468784332275,grad_norm: 0.9999994934306135, iteration: 146392
loss: 1.0202481746673584,grad_norm: 0.9999991050577804, iteration: 146393
loss: 0.9537734985351562,grad_norm: 0.9948276277364432, iteration: 146394
loss: 1.0288360118865967,grad_norm: 0.9999992592601604, iteration: 146395
loss: 1.0215487480163574,grad_norm: 0.906714950892982, iteration: 146396
loss: 0.9767788052558899,grad_norm: 0.9918489307096356, iteration: 146397
loss: 1.0095019340515137,grad_norm: 0.9999990615115764, iteration: 146398
loss: 1.0032063722610474,grad_norm: 0.9999989921855944, iteration: 146399
loss: 1.0159624814987183,grad_norm: 0.9999991538827667, iteration: 146400
loss: 0.9896129369735718,grad_norm: 0.9993598526785464, iteration: 146401
loss: 1.0064069032669067,grad_norm: 0.9999991476076034, iteration: 146402
loss: 0.9849932193756104,grad_norm: 0.999998911065431, iteration: 146403
loss: 0.9905133843421936,grad_norm: 0.9999992112559609, iteration: 146404
loss: 0.9886945486068726,grad_norm: 0.8708324145916388, iteration: 146405
loss: 0.999140202999115,grad_norm: 0.8039024819645841, iteration: 146406
loss: 0.9830753803253174,grad_norm: 0.9999990884826939, iteration: 146407
loss: 1.015078067779541,grad_norm: 0.9999991311392296, iteration: 146408
loss: 0.9977639317512512,grad_norm: 0.9099564737682506, iteration: 146409
loss: 0.9739706516265869,grad_norm: 0.9999989474832218, iteration: 146410
loss: 1.0067164897918701,grad_norm: 0.9999993213767833, iteration: 146411
loss: 1.0226953029632568,grad_norm: 0.9144282415698135, iteration: 146412
loss: 0.9743515253067017,grad_norm: 0.9472358126012551, iteration: 146413
loss: 1.010422706604004,grad_norm: 0.9999990199192225, iteration: 146414
loss: 1.0092408657073975,grad_norm: 0.9999992444777492, iteration: 146415
loss: 1.0074280500411987,grad_norm: 0.9999991412972853, iteration: 146416
loss: 1.011856198310852,grad_norm: 0.9999990526023641, iteration: 146417
loss: 1.031724452972412,grad_norm: 0.9999990823854404, iteration: 146418
loss: 1.0019960403442383,grad_norm: 0.9866834981260706, iteration: 146419
loss: 1.0086489915847778,grad_norm: 0.9579613606279239, iteration: 146420
loss: 1.017685890197754,grad_norm: 0.9766970575886376, iteration: 146421
loss: 0.9829270839691162,grad_norm: 0.9874619307371733, iteration: 146422
loss: 0.991461992263794,grad_norm: 0.9999991949464554, iteration: 146423
loss: 1.0231319665908813,grad_norm: 0.9999990915304425, iteration: 146424
loss: 1.0017201900482178,grad_norm: 0.9724960721411947, iteration: 146425
loss: 1.0185792446136475,grad_norm: 0.9999990155261974, iteration: 146426
loss: 1.0451991558074951,grad_norm: 0.9999991009628341, iteration: 146427
loss: 1.0200304985046387,grad_norm: 0.9999991872587202, iteration: 146428
loss: 0.9800257086753845,grad_norm: 0.9999989967284211, iteration: 146429
loss: 0.9742790460586548,grad_norm: 0.8511566855867614, iteration: 146430
loss: 0.9917246103286743,grad_norm: 0.9999991299337876, iteration: 146431
loss: 1.0114306211471558,grad_norm: 0.9999990993969275, iteration: 146432
loss: 0.9647952318191528,grad_norm: 0.9999992085923843, iteration: 146433
loss: 1.028032898902893,grad_norm: 0.9999990070469825, iteration: 146434
loss: 1.0185260772705078,grad_norm: 0.9999992759010029, iteration: 146435
loss: 0.9849714636802673,grad_norm: 0.9999991410229957, iteration: 146436
loss: 1.0068391561508179,grad_norm: 0.9999991156837338, iteration: 146437
loss: 0.9927166700363159,grad_norm: 0.9999992607761775, iteration: 146438
loss: 0.9978194236755371,grad_norm: 0.897683853290416, iteration: 146439
loss: 1.014147400856018,grad_norm: 0.8708767309818871, iteration: 146440
loss: 0.9818322658538818,grad_norm: 0.9999992484909259, iteration: 146441
loss: 1.0243217945098877,grad_norm: 0.9494018360202088, iteration: 146442
loss: 0.9745219945907593,grad_norm: 0.957076385609808, iteration: 146443
loss: 0.9759645462036133,grad_norm: 0.9999990634301678, iteration: 146444
loss: 0.9916515946388245,grad_norm: 0.9706647794386758, iteration: 146445
loss: 0.9730058312416077,grad_norm: 0.8740581876802536, iteration: 146446
loss: 0.9893296957015991,grad_norm: 0.9769858361009778, iteration: 146447
loss: 1.0171706676483154,grad_norm: 0.9463599707368844, iteration: 146448
loss: 1.0493286848068237,grad_norm: 0.9999990258167859, iteration: 146449
loss: 0.9694721698760986,grad_norm: 0.8535641215974344, iteration: 146450
loss: 0.9673511385917664,grad_norm: 0.9999992759544962, iteration: 146451
loss: 1.0158296823501587,grad_norm: 0.9999990846415836, iteration: 146452
loss: 0.9911007285118103,grad_norm: 0.9999990879081552, iteration: 146453
loss: 1.0348330736160278,grad_norm: 0.9999990531071097, iteration: 146454
loss: 1.0079503059387207,grad_norm: 0.9999990167668023, iteration: 146455
loss: 1.002469778060913,grad_norm: 0.999999041237107, iteration: 146456
loss: 0.9743875861167908,grad_norm: 0.9496477131391609, iteration: 146457
loss: 1.0221271514892578,grad_norm: 0.905226100036366, iteration: 146458
loss: 0.9662483930587769,grad_norm: 0.9117525098393752, iteration: 146459
loss: 1.012845516204834,grad_norm: 0.9217798577771088, iteration: 146460
loss: 1.0060778856277466,grad_norm: 0.9999990556059324, iteration: 146461
loss: 1.0067931413650513,grad_norm: 0.9999993309257642, iteration: 146462
loss: 1.020005226135254,grad_norm: 0.9740588019363869, iteration: 146463
loss: 1.0035285949707031,grad_norm: 0.9047608580537719, iteration: 146464
loss: 0.9755983948707581,grad_norm: 0.9999990778823484, iteration: 146465
loss: 1.0425723791122437,grad_norm: 0.9569440182130443, iteration: 146466
loss: 1.0219626426696777,grad_norm: 0.999999156190773, iteration: 146467
loss: 0.9821266531944275,grad_norm: 0.999998961498684, iteration: 146468
loss: 1.0130571126937866,grad_norm: 0.9822169634991273, iteration: 146469
loss: 1.030233383178711,grad_norm: 0.9402182788760931, iteration: 146470
loss: 1.0030525922775269,grad_norm: 0.7691701839869581, iteration: 146471
loss: 0.9777547717094421,grad_norm: 0.9999992194207665, iteration: 146472
loss: 0.9792316555976868,grad_norm: 0.9999990078812224, iteration: 146473
loss: 0.9716222882270813,grad_norm: 0.8444205389602677, iteration: 146474
loss: 0.9948121905326843,grad_norm: 0.9999989887833226, iteration: 146475
loss: 1.0090112686157227,grad_norm: 0.9999991149578341, iteration: 146476
loss: 0.9801898002624512,grad_norm: 0.9999990668002493, iteration: 146477
loss: 0.9646506309509277,grad_norm: 0.9609068438228278, iteration: 146478
loss: 0.9606216549873352,grad_norm: 0.988718380248552, iteration: 146479
loss: 1.0120240449905396,grad_norm: 0.9999989989119964, iteration: 146480
loss: 0.9859039783477783,grad_norm: 0.925506151399228, iteration: 146481
loss: 0.9830548763275146,grad_norm: 0.9286744109045966, iteration: 146482
loss: 1.007788896560669,grad_norm: 0.999999058877124, iteration: 146483
loss: 1.0112425088882446,grad_norm: 0.999999393463607, iteration: 146484
loss: 1.00709867477417,grad_norm: 0.9888032212503799, iteration: 146485
loss: 0.9622815251350403,grad_norm: 0.9999990283192803, iteration: 146486
loss: 1.0267698764801025,grad_norm: 0.999999156172514, iteration: 146487
loss: 1.0254656076431274,grad_norm: 0.9999992294215824, iteration: 146488
loss: 0.9917957782745361,grad_norm: 0.8930566299399406, iteration: 146489
loss: 0.9990493059158325,grad_norm: 0.9999990888995555, iteration: 146490
loss: 1.0456938743591309,grad_norm: 0.999999027746016, iteration: 146491
loss: 1.041576623916626,grad_norm: 0.9900057459852646, iteration: 146492
loss: 1.0213202238082886,grad_norm: 0.9410136871617, iteration: 146493
loss: 1.0092012882232666,grad_norm: 0.9999990917055798, iteration: 146494
loss: 0.9870010614395142,grad_norm: 0.8784482354030977, iteration: 146495
loss: 1.027560830116272,grad_norm: 0.9103556262029254, iteration: 146496
loss: 1.0155701637268066,grad_norm: 0.8990197498187597, iteration: 146497
loss: 1.0712486505508423,grad_norm: 0.9999993621520231, iteration: 146498
loss: 0.9603280425071716,grad_norm: 0.999998988923725, iteration: 146499
loss: 1.030182957649231,grad_norm: 0.9999991529104472, iteration: 146500
loss: 0.9808710217475891,grad_norm: 0.99999901589373, iteration: 146501
loss: 1.0399266481399536,grad_norm: 0.9585786360656217, iteration: 146502
loss: 1.0208622217178345,grad_norm: 0.999999060898522, iteration: 146503
loss: 1.0161362886428833,grad_norm: 0.9999992728992552, iteration: 146504
loss: 1.034804105758667,grad_norm: 0.9999990545705737, iteration: 146505
loss: 0.9699669480323792,grad_norm: 0.9999991642349128, iteration: 146506
loss: 0.9680826663970947,grad_norm: 0.9999993006484604, iteration: 146507
loss: 0.9802343249320984,grad_norm: 0.8136526316119433, iteration: 146508
loss: 1.0271689891815186,grad_norm: 0.9999991130859087, iteration: 146509
loss: 0.9817312359809875,grad_norm: 0.974468151321062, iteration: 146510
loss: 1.0059447288513184,grad_norm: 0.9999990970814654, iteration: 146511
loss: 0.9945873022079468,grad_norm: 0.9999991086477161, iteration: 146512
loss: 1.004123330116272,grad_norm: 0.8805870299771856, iteration: 146513
loss: 1.0365235805511475,grad_norm: 0.9999990869154787, iteration: 146514
loss: 1.0472102165222168,grad_norm: 0.9999991638051157, iteration: 146515
loss: 1.013405203819275,grad_norm: 0.8090173589834181, iteration: 146516
loss: 1.0103354454040527,grad_norm: 0.862732811917979, iteration: 146517
loss: 1.0021402835845947,grad_norm: 0.8403225829202196, iteration: 146518
loss: 1.1596858501434326,grad_norm: 0.9999994424553773, iteration: 146519
loss: 0.9864528179168701,grad_norm: 0.970090577797635, iteration: 146520
loss: 1.0358558893203735,grad_norm: 0.9999990864630607, iteration: 146521
loss: 1.041934609413147,grad_norm: 0.982189296386831, iteration: 146522
loss: 1.0094718933105469,grad_norm: 0.9999991253807192, iteration: 146523
loss: 0.9989213943481445,grad_norm: 0.9875934693006215, iteration: 146524
loss: 0.9926566481590271,grad_norm: 0.9999990103201963, iteration: 146525
loss: 0.9743793606758118,grad_norm: 0.9999991540042085, iteration: 146526
loss: 0.994734525680542,grad_norm: 0.9999991731333951, iteration: 146527
loss: 0.9903359413146973,grad_norm: 0.9999991687932384, iteration: 146528
loss: 0.9530037641525269,grad_norm: 0.9999991852347848, iteration: 146529
loss: 0.9925343990325928,grad_norm: 0.9999992068483564, iteration: 146530
loss: 1.0194650888442993,grad_norm: 0.9999991938593829, iteration: 146531
loss: 1.0049694776535034,grad_norm: 0.969607731810134, iteration: 146532
loss: 0.9939876794815063,grad_norm: 0.9658641625999491, iteration: 146533
loss: 1.0144991874694824,grad_norm: 0.9620751511148496, iteration: 146534
loss: 1.013044834136963,grad_norm: 0.9999991015500019, iteration: 146535
loss: 1.0516830682754517,grad_norm: 0.9999991098886832, iteration: 146536
loss: 1.0196857452392578,grad_norm: 0.9751208587084305, iteration: 146537
loss: 0.9969999194145203,grad_norm: 0.7596553416593115, iteration: 146538
loss: 1.0797901153564453,grad_norm: 0.9999997393298014, iteration: 146539
loss: 1.0441780090332031,grad_norm: 0.9999991070900707, iteration: 146540
loss: 0.9937619566917419,grad_norm: 0.9999997786633986, iteration: 146541
loss: 1.0074281692504883,grad_norm: 0.8415894341708005, iteration: 146542
loss: 0.9950399994850159,grad_norm: 0.9999989400227346, iteration: 146543
loss: 1.025002360343933,grad_norm: 0.9735498985208854, iteration: 146544
loss: 1.0238876342773438,grad_norm: 0.9528272245082, iteration: 146545
loss: 1.005957007408142,grad_norm: 0.9999991183650097, iteration: 146546
loss: 0.9931555390357971,grad_norm: 0.9954532333120849, iteration: 146547
loss: 1.1394507884979248,grad_norm: 0.9999990449735391, iteration: 146548
loss: 0.9949952960014343,grad_norm: 0.9909118301021549, iteration: 146549
loss: 0.9862486124038696,grad_norm: 0.999999871052213, iteration: 146550
loss: 1.0083668231964111,grad_norm: 0.9999991631256052, iteration: 146551
loss: 1.00672447681427,grad_norm: 0.8889877483577269, iteration: 146552
loss: 0.9790114164352417,grad_norm: 0.9999991011437165, iteration: 146553
loss: 1.0146260261535645,grad_norm: 0.999999211494699, iteration: 146554
loss: 0.9792823195457458,grad_norm: 0.9999991627422516, iteration: 146555
loss: 1.053385853767395,grad_norm: 0.9999996698229643, iteration: 146556
loss: 0.9971851706504822,grad_norm: 0.9999989677399077, iteration: 146557
loss: 0.9846047163009644,grad_norm: 0.9982342743108135, iteration: 146558
loss: 0.9736106395721436,grad_norm: 0.904074785224486, iteration: 146559
loss: 1.0181236267089844,grad_norm: 0.9297613721064236, iteration: 146560
loss: 1.013770580291748,grad_norm: 0.9865294644451993, iteration: 146561
loss: 1.0099678039550781,grad_norm: 0.9999993106862786, iteration: 146562
loss: 0.9643718004226685,grad_norm: 0.9308025834581855, iteration: 146563
loss: 1.0222043991088867,grad_norm: 0.9999991958544658, iteration: 146564
loss: 1.035054326057434,grad_norm: 0.9999991710142416, iteration: 146565
loss: 0.9811570644378662,grad_norm: 0.988531939490908, iteration: 146566
loss: 1.0130037069320679,grad_norm: 0.9999991780545201, iteration: 146567
loss: 0.9854986071586609,grad_norm: 0.999999117412332, iteration: 146568
loss: 1.0196044445037842,grad_norm: 0.9999990338710867, iteration: 146569
loss: 1.0038777589797974,grad_norm: 0.9999992829095349, iteration: 146570
loss: 0.9878139495849609,grad_norm: 0.9999991687923748, iteration: 146571
loss: 0.9708552360534668,grad_norm: 0.9999991988391028, iteration: 146572
loss: 1.0154608488082886,grad_norm: 0.9999991817912517, iteration: 146573
loss: 0.9900011420249939,grad_norm: 0.999999057339683, iteration: 146574
loss: 0.9620048999786377,grad_norm: 0.899668584333064, iteration: 146575
loss: 0.9785512685775757,grad_norm: 0.9999991018901236, iteration: 146576
loss: 0.9861605167388916,grad_norm: 0.9999991405448021, iteration: 146577
loss: 1.024746298789978,grad_norm: 0.9999991119472218, iteration: 146578
loss: 1.0193657875061035,grad_norm: 0.8527464526377979, iteration: 146579
loss: 0.9966228604316711,grad_norm: 0.8877631504750206, iteration: 146580
loss: 1.001441240310669,grad_norm: 0.9999991324132947, iteration: 146581
loss: 0.972949743270874,grad_norm: 0.9983747057679079, iteration: 146582
loss: 0.9942798018455505,grad_norm: 0.8587636384563244, iteration: 146583
loss: 0.9775524139404297,grad_norm: 0.9975242816342512, iteration: 146584
loss: 1.02522611618042,grad_norm: 0.9999991282117978, iteration: 146585
loss: 1.1287837028503418,grad_norm: 0.999999107591488, iteration: 146586
loss: 0.9788145422935486,grad_norm: 0.9270975460511618, iteration: 146587
loss: 0.95672607421875,grad_norm: 0.9512642132436248, iteration: 146588
loss: 1.006825566291809,grad_norm: 0.9999992060622683, iteration: 146589
loss: 1.0393474102020264,grad_norm: 0.9999991127592627, iteration: 146590
loss: 1.0247859954833984,grad_norm: 0.9999992512269306, iteration: 146591
loss: 0.9915587902069092,grad_norm: 0.8916768981911549, iteration: 146592
loss: 1.008551836013794,grad_norm: 0.9881869974834601, iteration: 146593
loss: 0.961531937122345,grad_norm: 0.9999992225232613, iteration: 146594
loss: 0.994520902633667,grad_norm: 0.9636739106613482, iteration: 146595
loss: 1.0293623208999634,grad_norm: 0.9999990096234221, iteration: 146596
loss: 0.989748477935791,grad_norm: 0.8637603563993915, iteration: 146597
loss: 0.9945341944694519,grad_norm: 0.9866368811357846, iteration: 146598
loss: 0.9641483426094055,grad_norm: 0.9999990593735494, iteration: 146599
loss: 1.01608145236969,grad_norm: 0.9712160625632607, iteration: 146600
loss: 1.006515622138977,grad_norm: 0.9999990847314579, iteration: 146601
loss: 0.9907987713813782,grad_norm: 0.9999990276367845, iteration: 146602
loss: 0.96159827709198,grad_norm: 0.892743604998161, iteration: 146603
loss: 1.0322943925857544,grad_norm: 0.9999991759281853, iteration: 146604
loss: 1.0161837339401245,grad_norm: 0.9999989765284111, iteration: 146605
loss: 0.9872145652770996,grad_norm: 0.9999990718869782, iteration: 146606
loss: 1.0102702379226685,grad_norm: 0.9615097429228151, iteration: 146607
loss: 1.0117979049682617,grad_norm: 0.999999116219679, iteration: 146608
loss: 1.017982006072998,grad_norm: 0.9999994584681848, iteration: 146609
loss: 0.9724027514457703,grad_norm: 0.9999991496128129, iteration: 146610
loss: 0.9961439371109009,grad_norm: 0.9999990921228136, iteration: 146611
loss: 1.0115662813186646,grad_norm: 0.9999991765149411, iteration: 146612
loss: 1.000969648361206,grad_norm: 0.9999990171581381, iteration: 146613
loss: 0.9779977798461914,grad_norm: 0.9999990655698121, iteration: 146614
loss: 1.001010537147522,grad_norm: 0.8737294566216814, iteration: 146615
loss: 0.9741305112838745,grad_norm: 0.9999991158743075, iteration: 146616
loss: 1.0285214185714722,grad_norm: 0.9999990827395171, iteration: 146617
loss: 0.9966662526130676,grad_norm: 0.9999991102551443, iteration: 146618
loss: 1.0097993612289429,grad_norm: 0.9495012679873062, iteration: 146619
loss: 1.0580891370773315,grad_norm: 0.9999995569354952, iteration: 146620
loss: 0.9944499731063843,grad_norm: 0.9901384026696047, iteration: 146621
loss: 0.98493891954422,grad_norm: 0.993056742672463, iteration: 146622
loss: 1.0017832517623901,grad_norm: 0.9583759731212369, iteration: 146623
loss: 1.0093859434127808,grad_norm: 0.9999991678216499, iteration: 146624
loss: 0.9786334037780762,grad_norm: 0.9999993380281064, iteration: 146625
loss: 0.9945639371871948,grad_norm: 0.97630173213691, iteration: 146626
loss: 0.9927522540092468,grad_norm: 0.9069156442911368, iteration: 146627
loss: 1.0009938478469849,grad_norm: 0.9999991879985523, iteration: 146628
loss: 0.9384658336639404,grad_norm: 0.9513280366086636, iteration: 146629
loss: 1.007412075996399,grad_norm: 0.8560947556093511, iteration: 146630
loss: 0.999525785446167,grad_norm: 0.9999994183180179, iteration: 146631
loss: 0.9488847255706787,grad_norm: 0.9156860873787073, iteration: 146632
loss: 1.0115782022476196,grad_norm: 0.9881637115794527, iteration: 146633
loss: 1.0077745914459229,grad_norm: 0.9999989800774683, iteration: 146634
loss: 0.9798070192337036,grad_norm: 0.9971979783182636, iteration: 146635
loss: 1.0221494436264038,grad_norm: 0.9614094798896888, iteration: 146636
loss: 0.967556893825531,grad_norm: 0.9081275027350593, iteration: 146637
loss: 1.0401639938354492,grad_norm: 0.9911450883860474, iteration: 146638
loss: 0.9967896342277527,grad_norm: 0.9882319649184326, iteration: 146639
loss: 0.9987576603889465,grad_norm: 0.8633906781536433, iteration: 146640
loss: 1.0011093616485596,grad_norm: 0.8976039740425887, iteration: 146641
loss: 1.0120922327041626,grad_norm: 0.999999163923926, iteration: 146642
loss: 1.0061084032058716,grad_norm: 0.9999991503956118, iteration: 146643
loss: 1.0009818077087402,grad_norm: 0.999999662641273, iteration: 146644
loss: 0.9837565422058105,grad_norm: 0.9147833401382327, iteration: 146645
loss: 0.9995431900024414,grad_norm: 0.8037881466323403, iteration: 146646
loss: 1.014294981956482,grad_norm: 0.9999990650173808, iteration: 146647
loss: 0.9988523125648499,grad_norm: 0.9999992063986605, iteration: 146648
loss: 0.9791894555091858,grad_norm: 0.9999992060294226, iteration: 146649
loss: 0.9878860116004944,grad_norm: 0.9999993806448969, iteration: 146650
loss: 0.9840987920761108,grad_norm: 0.9106718307733808, iteration: 146651
loss: 1.0178853273391724,grad_norm: 0.9999992968788022, iteration: 146652
loss: 0.97384113073349,grad_norm: 0.9484110353357286, iteration: 146653
loss: 0.976410448551178,grad_norm: 0.9999989978272132, iteration: 146654
loss: 0.9751307964324951,grad_norm: 0.899356852058708, iteration: 146655
loss: 0.9995390176773071,grad_norm: 0.9999992827082216, iteration: 146656
loss: 0.982535719871521,grad_norm: 0.992411153824411, iteration: 146657
loss: 1.0030055046081543,grad_norm: 0.8966299068827587, iteration: 146658
loss: 0.9608968496322632,grad_norm: 0.9434371767801344, iteration: 146659
loss: 1.0109703540802002,grad_norm: 0.9999989592035372, iteration: 146660
loss: 1.015123963356018,grad_norm: 0.8127412462070387, iteration: 146661
loss: 0.9624337553977966,grad_norm: 0.9999991729288955, iteration: 146662
loss: 0.995362401008606,grad_norm: 0.9999990681721984, iteration: 146663
loss: 0.9911993741989136,grad_norm: 0.9999993169458299, iteration: 146664
loss: 0.9641947150230408,grad_norm: 0.9999990462068796, iteration: 146665
loss: 0.9969704151153564,grad_norm: 0.9408996932636913, iteration: 146666
loss: 1.013641357421875,grad_norm: 0.999999084810524, iteration: 146667
loss: 0.9889106750488281,grad_norm: 0.9999991573674012, iteration: 146668
loss: 1.0001393556594849,grad_norm: 0.9999991038028267, iteration: 146669
loss: 0.9886376261711121,grad_norm: 0.9162733767109963, iteration: 146670
loss: 1.024927020072937,grad_norm: 0.9203712388753276, iteration: 146671
loss: 0.9934504628181458,grad_norm: 0.8423707433019544, iteration: 146672
loss: 0.998095691204071,grad_norm: 0.9999992661878521, iteration: 146673
loss: 1.0103358030319214,grad_norm: 0.9999991337217066, iteration: 146674
loss: 1.0421065092086792,grad_norm: 0.9999989180406942, iteration: 146675
loss: 0.9548179507255554,grad_norm: 0.99999911773023, iteration: 146676
loss: 1.0322757959365845,grad_norm: 0.8969033545621385, iteration: 146677
loss: 1.0140130519866943,grad_norm: 0.9515128605252351, iteration: 146678
loss: 0.9845197796821594,grad_norm: 0.9999991456448664, iteration: 146679
loss: 0.9974854588508606,grad_norm: 0.8719433003835337, iteration: 146680
loss: 1.07471764087677,grad_norm: 0.9999993528315377, iteration: 146681
loss: 0.9754217267036438,grad_norm: 0.9999991960401676, iteration: 146682
loss: 0.9950722455978394,grad_norm: 0.9999991092124471, iteration: 146683
loss: 1.0328866243362427,grad_norm: 0.9767613652281723, iteration: 146684
loss: 0.9902015328407288,grad_norm: 0.9999991749664947, iteration: 146685
loss: 0.9695297479629517,grad_norm: 0.805406123258385, iteration: 146686
loss: 0.9741711020469666,grad_norm: 0.9532533372167412, iteration: 146687
loss: 0.9746670722961426,grad_norm: 0.9833865517097747, iteration: 146688
loss: 0.9992300868034363,grad_norm: 0.9999991657868754, iteration: 146689
loss: 1.0025850534439087,grad_norm: 0.9999990943486654, iteration: 146690
loss: 1.023069143295288,grad_norm: 0.9999990110971191, iteration: 146691
loss: 0.9755126237869263,grad_norm: 0.9999989849858657, iteration: 146692
loss: 1.0205494165420532,grad_norm: 0.9999992393920659, iteration: 146693
loss: 0.9912693500518799,grad_norm: 0.9963343361339986, iteration: 146694
loss: 0.9939574599266052,grad_norm: 0.9999991924240008, iteration: 146695
loss: 1.0401960611343384,grad_norm: 0.9999998991619033, iteration: 146696
loss: 0.9955204725265503,grad_norm: 0.9774685769739848, iteration: 146697
loss: 1.0279608964920044,grad_norm: 0.9748886863034879, iteration: 146698
loss: 1.0062830448150635,grad_norm: 0.9999991011779109, iteration: 146699
loss: 0.9985177516937256,grad_norm: 0.9999991244194555, iteration: 146700
loss: 0.9823645949363708,grad_norm: 0.9999991033842232, iteration: 146701
loss: 1.0105081796646118,grad_norm: 0.9999991872450588, iteration: 146702
loss: 0.9719231128692627,grad_norm: 0.9656013578449893, iteration: 146703
loss: 1.010555386543274,grad_norm: 0.8721559000822167, iteration: 146704
loss: 1.012341856956482,grad_norm: 0.9969042100407469, iteration: 146705
loss: 0.9447505474090576,grad_norm: 0.9013653432375444, iteration: 146706
loss: 0.9940374493598938,grad_norm: 0.8378788387895474, iteration: 146707
loss: 0.9885330200195312,grad_norm: 0.9850664652250023, iteration: 146708
loss: 0.9803876876831055,grad_norm: 0.8786242547607126, iteration: 146709
loss: 1.005815029144287,grad_norm: 0.9999991998271245, iteration: 146710
loss: 1.0041910409927368,grad_norm: 0.9473461223238423, iteration: 146711
loss: 1.0241268873214722,grad_norm: 0.8076916282348819, iteration: 146712
loss: 1.0226422548294067,grad_norm: 0.9999991803489455, iteration: 146713
loss: 0.9741469025611877,grad_norm: 0.9999990185364261, iteration: 146714
loss: 0.9403946399688721,grad_norm: 0.9999992230072144, iteration: 146715
loss: 0.951148271560669,grad_norm: 0.9999992752158053, iteration: 146716
loss: 1.0291286706924438,grad_norm: 0.9999990858782228, iteration: 146717
loss: 1.0179790258407593,grad_norm: 0.9917729852100275, iteration: 146718
loss: 0.9950027465820312,grad_norm: 0.8892736798230905, iteration: 146719
loss: 0.9938399195671082,grad_norm: 0.999999094793592, iteration: 146720
loss: 1.0112968683242798,grad_norm: 0.9630863996930538, iteration: 146721
loss: 0.9769912958145142,grad_norm: 0.9999990362321355, iteration: 146722
loss: 0.9860583543777466,grad_norm: 0.9999991248666432, iteration: 146723
loss: 1.0545896291732788,grad_norm: 0.9999990659682934, iteration: 146724
loss: 0.9800469279289246,grad_norm: 0.999999230961909, iteration: 146725
loss: 1.0017626285552979,grad_norm: 0.9280170808589815, iteration: 146726
loss: 1.0468659400939941,grad_norm: 0.834706367584342, iteration: 146727
loss: 0.9742570519447327,grad_norm: 0.9403719288067481, iteration: 146728
loss: 1.0173040628433228,grad_norm: 0.8507490483857008, iteration: 146729
loss: 1.0592573881149292,grad_norm: 0.9999995277174384, iteration: 146730
loss: 1.0083625316619873,grad_norm: 0.9999991003499226, iteration: 146731
loss: 1.0032620429992676,grad_norm: 0.9999991260286859, iteration: 146732
loss: 1.0507827997207642,grad_norm: 0.9999998069752675, iteration: 146733
loss: 1.0079854726791382,grad_norm: 0.9999991809513461, iteration: 146734
loss: 0.9309052228927612,grad_norm: 0.939744050355772, iteration: 146735
loss: 0.9758539199829102,grad_norm: 0.8933378298967692, iteration: 146736
loss: 0.998417854309082,grad_norm: 0.9999991434933398, iteration: 146737
loss: 1.033490777015686,grad_norm: 0.9999992819881705, iteration: 146738
loss: 0.9638400077819824,grad_norm: 0.999999014990679, iteration: 146739
loss: 0.9853298664093018,grad_norm: 0.9655839232667782, iteration: 146740
loss: 1.0379369258880615,grad_norm: 0.959693498541532, iteration: 146741
loss: 1.0456990003585815,grad_norm: 0.9999989887788787, iteration: 146742
loss: 1.0054233074188232,grad_norm: 0.9999990283821258, iteration: 146743
loss: 1.0098059177398682,grad_norm: 0.9747033984052922, iteration: 146744
loss: 0.9569551944732666,grad_norm: 0.8579419170911617, iteration: 146745
loss: 1.035347580909729,grad_norm: 0.9999992237444604, iteration: 146746
loss: 1.0106288194656372,grad_norm: 0.9999990776701568, iteration: 146747
loss: 0.9826671481132507,grad_norm: 0.7830852747129388, iteration: 146748
loss: 0.9922798871994019,grad_norm: 0.9999991443377936, iteration: 146749
loss: 0.9789722561836243,grad_norm: 0.9388956146954057, iteration: 146750
loss: 1.0005617141723633,grad_norm: 0.9999990277553078, iteration: 146751
loss: 1.0261012315750122,grad_norm: 0.999999265680426, iteration: 146752
loss: 0.9640519618988037,grad_norm: 0.9999992201763632, iteration: 146753
loss: 1.0045175552368164,grad_norm: 1.0000000603646868, iteration: 146754
loss: 0.9832354784011841,grad_norm: 0.9973792294849559, iteration: 146755
loss: 1.0027356147766113,grad_norm: 0.9999990783139494, iteration: 146756
loss: 0.9758848547935486,grad_norm: 0.9999991382101657, iteration: 146757
loss: 0.9826651215553284,grad_norm: 0.975474404860682, iteration: 146758
loss: 0.9622796177864075,grad_norm: 0.9999991162281634, iteration: 146759
loss: 1.0307567119598389,grad_norm: 0.9775293038153584, iteration: 146760
loss: 1.0235823392868042,grad_norm: 0.9999994685428203, iteration: 146761
loss: 1.0087039470672607,grad_norm: 0.9878739571360845, iteration: 146762
loss: 0.9825079441070557,grad_norm: 0.999999136995519, iteration: 146763
loss: 1.020357370376587,grad_norm: 0.9999991992399299, iteration: 146764
loss: 1.0082008838653564,grad_norm: 0.9999991237809781, iteration: 146765
loss: 0.9984850883483887,grad_norm: 0.9999992465396883, iteration: 146766
loss: 1.0204479694366455,grad_norm: 0.9999992820320021, iteration: 146767
loss: 1.0046952962875366,grad_norm: 0.9999991658689533, iteration: 146768
loss: 0.9966894388198853,grad_norm: 0.9937506834246991, iteration: 146769
loss: 1.0385572910308838,grad_norm: 0.9999990311257042, iteration: 146770
loss: 1.0056076049804688,grad_norm: 0.9999991662960807, iteration: 146771
loss: 0.9968990087509155,grad_norm: 0.9999991058997636, iteration: 146772
loss: 1.0315204858779907,grad_norm: 0.999999029708911, iteration: 146773
loss: 1.015721321105957,grad_norm: 0.9999991080737601, iteration: 146774
loss: 1.0105478763580322,grad_norm: 0.9999994959316186, iteration: 146775
loss: 1.006542682647705,grad_norm: 0.804783204711695, iteration: 146776
loss: 0.9630314707756042,grad_norm: 0.9999992702359158, iteration: 146777
loss: 1.0131367444992065,grad_norm: 0.999999356194227, iteration: 146778
loss: 1.0015157461166382,grad_norm: 0.9999989067489138, iteration: 146779
loss: 0.9590625762939453,grad_norm: 0.8647804195020592, iteration: 146780
loss: 1.0001784563064575,grad_norm: 0.9295722415920065, iteration: 146781
loss: 1.1372618675231934,grad_norm: 0.9999992770479065, iteration: 146782
loss: 0.9745699167251587,grad_norm: 0.9999990702654599, iteration: 146783
loss: 0.9833362102508545,grad_norm: 0.9592192198099276, iteration: 146784
loss: 0.9627103805541992,grad_norm: 0.9999990882256505, iteration: 146785
loss: 1.0180903673171997,grad_norm: 0.9999990523422806, iteration: 146786
loss: 1.0488353967666626,grad_norm: 0.8938328927674398, iteration: 146787
loss: 0.9994367957115173,grad_norm: 0.9999990450206161, iteration: 146788
loss: 1.035064697265625,grad_norm: 0.9606160786721833, iteration: 146789
loss: 0.9753034114837646,grad_norm: 0.9268819746653388, iteration: 146790
loss: 0.966352105140686,grad_norm: 0.9999990914155924, iteration: 146791
loss: 0.9622515439987183,grad_norm: 0.9999991572024354, iteration: 146792
loss: 1.0190930366516113,grad_norm: 0.9999991053231154, iteration: 146793
loss: 1.0055211782455444,grad_norm: 0.9999990940961119, iteration: 146794
loss: 0.9787595868110657,grad_norm: 0.8558916910445645, iteration: 146795
loss: 0.9850015044212341,grad_norm: 0.9999989942291685, iteration: 146796
loss: 0.9482187628746033,grad_norm: 0.9438892868279537, iteration: 146797
loss: 1.0320611000061035,grad_norm: 0.9999996716158824, iteration: 146798
loss: 1.008651852607727,grad_norm: 0.9988621618974909, iteration: 146799
loss: 1.0210506916046143,grad_norm: 0.999999048325354, iteration: 146800
loss: 1.018784523010254,grad_norm: 0.9999990706930694, iteration: 146801
loss: 1.0057754516601562,grad_norm: 0.8522394295185857, iteration: 146802
loss: 0.9834408164024353,grad_norm: 0.840088525123577, iteration: 146803
loss: 1.003118634223938,grad_norm: 0.9101449641871765, iteration: 146804
loss: 1.0230437517166138,grad_norm: 0.9714568604080235, iteration: 146805
loss: 1.0097131729125977,grad_norm: 0.8450801731540699, iteration: 146806
loss: 1.0058976411819458,grad_norm: 0.9999990123026852, iteration: 146807
loss: 0.9997765421867371,grad_norm: 0.9999991520882607, iteration: 146808
loss: 1.0142966508865356,grad_norm: 0.9355697128657937, iteration: 146809
loss: 1.0302907228469849,grad_norm: 0.9999995310747912, iteration: 146810
loss: 0.9746248722076416,grad_norm: 0.8271521114188868, iteration: 146811
loss: 0.9754503965377808,grad_norm: 0.999999813190656, iteration: 146812
loss: 0.9336121082305908,grad_norm: 0.9999992602407184, iteration: 146813
loss: 0.9783005118370056,grad_norm: 0.9999990522884182, iteration: 146814
loss: 0.9777745008468628,grad_norm: 0.9280554534434263, iteration: 146815
loss: 1.0065850019454956,grad_norm: 0.8385385472434765, iteration: 146816
loss: 0.9843357801437378,grad_norm: 0.8965340832460041, iteration: 146817
loss: 0.9695610404014587,grad_norm: 0.8087876186804159, iteration: 146818
loss: 1.0106861591339111,grad_norm: 0.9381073755997985, iteration: 146819
loss: 1.015105128288269,grad_norm: 0.9999990147147829, iteration: 146820
loss: 0.9909061193466187,grad_norm: 0.9999992414960021, iteration: 146821
loss: 0.9777237772941589,grad_norm: 0.9999990677416885, iteration: 146822
loss: 0.9801810383796692,grad_norm: 0.9999989713911475, iteration: 146823
loss: 1.0015441179275513,grad_norm: 0.9855387488795317, iteration: 146824
loss: 1.0254137516021729,grad_norm: 0.9999990547193742, iteration: 146825
loss: 1.0047155618667603,grad_norm: 0.8964795920957617, iteration: 146826
loss: 0.975183367729187,grad_norm: 0.9440900391458219, iteration: 146827
loss: 0.9709622263908386,grad_norm: 0.9079503684567207, iteration: 146828
loss: 1.0353622436523438,grad_norm: 0.9999991032486126, iteration: 146829
loss: 0.9962171316146851,grad_norm: 0.9533007187142192, iteration: 146830
loss: 0.9458062648773193,grad_norm: 0.9999998797903612, iteration: 146831
loss: 1.015755295753479,grad_norm: 0.9190845763841095, iteration: 146832
loss: 1.0103243589401245,grad_norm: 0.9552819253591219, iteration: 146833
loss: 0.9910154342651367,grad_norm: 0.9999990785328844, iteration: 146834
loss: 1.03383469581604,grad_norm: 0.9999990291217522, iteration: 146835
loss: 0.9695631861686707,grad_norm: 0.8617361909387945, iteration: 146836
loss: 1.0007133483886719,grad_norm: 0.9999991928905496, iteration: 146837
loss: 1.039438009262085,grad_norm: 0.999999089537839, iteration: 146838
loss: 0.9777252674102783,grad_norm: 0.9999989894568353, iteration: 146839
loss: 0.9691691994667053,grad_norm: 0.898137616240949, iteration: 146840
loss: 0.979395866394043,grad_norm: 0.9999991118798828, iteration: 146841
loss: 0.9669008255004883,grad_norm: 0.9944226393364513, iteration: 146842
loss: 1.0015833377838135,grad_norm: 0.9336746845350551, iteration: 146843
loss: 1.0159788131713867,grad_norm: 0.9999991507419984, iteration: 146844
loss: 1.026472806930542,grad_norm: 0.9999992153921967, iteration: 146845
loss: 0.9700632691383362,grad_norm: 0.9999991276020525, iteration: 146846
loss: 1.0020980834960938,grad_norm: 0.8943684180885101, iteration: 146847
loss: 0.9816983342170715,grad_norm: 0.9298170689822028, iteration: 146848
loss: 0.9756079912185669,grad_norm: 0.9107149609767344, iteration: 146849
loss: 0.9852883219718933,grad_norm: 0.8710410397726973, iteration: 146850
loss: 1.023474097251892,grad_norm: 0.8288882776078486, iteration: 146851
loss: 1.00112783908844,grad_norm: 0.9999991257454354, iteration: 146852
loss: 0.9977544546127319,grad_norm: 0.999998988558745, iteration: 146853
loss: 0.975798487663269,grad_norm: 0.999999154910491, iteration: 146854
loss: 1.0126302242279053,grad_norm: 0.9999991404200045, iteration: 146855
loss: 0.9628828167915344,grad_norm: 0.9635101763397177, iteration: 146856
loss: 0.9942899942398071,grad_norm: 0.9999990047996445, iteration: 146857
loss: 0.9948408007621765,grad_norm: 0.9999992515995372, iteration: 146858
loss: 1.0172977447509766,grad_norm: 0.9467934995104665, iteration: 146859
loss: 1.0214402675628662,grad_norm: 0.9869194184939846, iteration: 146860
loss: 1.0439003705978394,grad_norm: 0.9999996756543208, iteration: 146861
loss: 1.0161105394363403,grad_norm: 0.9999990499699418, iteration: 146862
loss: 1.0117336511611938,grad_norm: 0.9999990499429775, iteration: 146863
loss: 1.0386085510253906,grad_norm: 0.8769032109706553, iteration: 146864
loss: 1.0226458311080933,grad_norm: 0.999999057948197, iteration: 146865
loss: 0.9793688058853149,grad_norm: 0.9201414453156365, iteration: 146866
loss: 1.0034635066986084,grad_norm: 0.9999991448061231, iteration: 146867
loss: 0.9632790684700012,grad_norm: 0.9999991359329914, iteration: 146868
loss: 0.9968150854110718,grad_norm: 0.9999993329711249, iteration: 146869
loss: 0.989858090877533,grad_norm: 0.9999992508901567, iteration: 146870
loss: 0.9912878274917603,grad_norm: 0.9999992859223956, iteration: 146871
loss: 0.9786781668663025,grad_norm: 0.9525652695159718, iteration: 146872
loss: 0.9689708948135376,grad_norm: 0.9999995671654159, iteration: 146873
loss: 0.9960598349571228,grad_norm: 0.999999053938806, iteration: 146874
loss: 0.9968559741973877,grad_norm: 0.8724438136979782, iteration: 146875
loss: 0.9977256059646606,grad_norm: 0.8072916821062746, iteration: 146876
loss: 1.0421626567840576,grad_norm: 0.9999990513356433, iteration: 146877
loss: 0.948322057723999,grad_norm: 0.9999991906185897, iteration: 146878
loss: 0.9875336289405823,grad_norm: 0.9633347400510757, iteration: 146879
loss: 1.0501198768615723,grad_norm: 0.9999990883064312, iteration: 146880
loss: 0.9833277463912964,grad_norm: 0.999999180923748, iteration: 146881
loss: 1.0001587867736816,grad_norm: 0.9999989704254556, iteration: 146882
loss: 0.994473397731781,grad_norm: 0.9999991656538898, iteration: 146883
loss: 0.9630069732666016,grad_norm: 0.9165264300460308, iteration: 146884
loss: 0.994844377040863,grad_norm: 0.9999990669138308, iteration: 146885
loss: 1.023200273513794,grad_norm: 0.9999993560407215, iteration: 146886
loss: 1.0021580457687378,grad_norm: 0.913744269852488, iteration: 146887
loss: 0.9923605918884277,grad_norm: 0.9793389024843548, iteration: 146888
loss: 1.001014232635498,grad_norm: 0.9999990107392726, iteration: 146889
loss: 0.9783164858818054,grad_norm: 0.8544856776107924, iteration: 146890
loss: 0.9980806708335876,grad_norm: 0.9999991595827222, iteration: 146891
loss: 0.996123194694519,grad_norm: 0.9697954407861404, iteration: 146892
loss: 0.9829567670822144,grad_norm: 0.999999270152331, iteration: 146893
loss: 0.9977592825889587,grad_norm: 0.9999990842837827, iteration: 146894
loss: 1.0439848899841309,grad_norm: 0.9991231337874007, iteration: 146895
loss: 0.9783017039299011,grad_norm: 0.9999991843548995, iteration: 146896
loss: 0.9821306467056274,grad_norm: 0.9999991908172049, iteration: 146897
loss: 0.9629034996032715,grad_norm: 0.9999990346576143, iteration: 146898
loss: 1.0047791004180908,grad_norm: 0.9999993120671046, iteration: 146899
loss: 1.0169382095336914,grad_norm: 0.9999990918962468, iteration: 146900
loss: 0.9745569825172424,grad_norm: 0.9999992225947518, iteration: 146901
loss: 0.9868475198745728,grad_norm: 0.849485434195282, iteration: 146902
loss: 0.9900584816932678,grad_norm: 0.9304545814148435, iteration: 146903
loss: 1.0040743350982666,grad_norm: 0.9999990245155418, iteration: 146904
loss: 1.0114493370056152,grad_norm: 0.8720174685975463, iteration: 146905
loss: 0.9772279262542725,grad_norm: 0.9568762821337106, iteration: 146906
loss: 0.9948064088821411,grad_norm: 0.9588894189680156, iteration: 146907
loss: 0.9777010083198547,grad_norm: 0.9706294092236355, iteration: 146908
loss: 0.9867789149284363,grad_norm: 0.9999990217565606, iteration: 146909
loss: 1.020763874053955,grad_norm: 0.9234788279634815, iteration: 146910
loss: 1.0245976448059082,grad_norm: 0.9594671852738991, iteration: 146911
loss: 1.052046537399292,grad_norm: 0.9999992625936697, iteration: 146912
loss: 1.0009269714355469,grad_norm: 0.8986848814977039, iteration: 146913
loss: 1.0455080270767212,grad_norm: 0.9999989125380894, iteration: 146914
loss: 0.9849873781204224,grad_norm: 0.9999992787044686, iteration: 146915
loss: 1.0098167657852173,grad_norm: 0.9999992449496456, iteration: 146916
loss: 1.0127774477005005,grad_norm: 0.9999990080177805, iteration: 146917
loss: 0.994101345539093,grad_norm: 0.8693379253755477, iteration: 146918
loss: 1.0124387741088867,grad_norm: 0.8666726423239278, iteration: 146919
loss: 1.0006563663482666,grad_norm: 0.9999992003139273, iteration: 146920
loss: 1.0146574974060059,grad_norm: 0.8903857630647349, iteration: 146921
loss: 1.0036261081695557,grad_norm: 0.9359147907886209, iteration: 146922
loss: 0.97524094581604,grad_norm: 0.9999991883291373, iteration: 146923
loss: 0.9972958564758301,grad_norm: 0.9999993278614774, iteration: 146924
loss: 1.0528665781021118,grad_norm: 0.9999993364252309, iteration: 146925
loss: 0.9968197345733643,grad_norm: 0.9892630102192675, iteration: 146926
loss: 0.9603208899497986,grad_norm: 0.9253785451787883, iteration: 146927
loss: 1.0212411880493164,grad_norm: 0.99999928124657, iteration: 146928
loss: 0.9827378988265991,grad_norm: 0.9437591823839113, iteration: 146929
loss: 1.0111849308013916,grad_norm: 0.9647502602084459, iteration: 146930
loss: 0.9903091788291931,grad_norm: 0.9461844711322064, iteration: 146931
loss: 0.9583127498626709,grad_norm: 0.9999990300126794, iteration: 146932
loss: 1.0204410552978516,grad_norm: 0.8152462966573266, iteration: 146933
loss: 0.990105390548706,grad_norm: 0.9999999241397586, iteration: 146934
loss: 0.984578013420105,grad_norm: 0.9194209838199484, iteration: 146935
loss: 1.014503836631775,grad_norm: 0.9500689796471209, iteration: 146936
loss: 0.9972596168518066,grad_norm: 0.9999990343317564, iteration: 146937
loss: 1.0172621011734009,grad_norm: 0.9999991122890346, iteration: 146938
loss: 1.002901315689087,grad_norm: 0.9999990411658589, iteration: 146939
loss: 0.978047788143158,grad_norm: 0.9789396114253879, iteration: 146940
loss: 0.9991025328636169,grad_norm: 0.9999992233153553, iteration: 146941
loss: 1.0213817358016968,grad_norm: 0.999999233702343, iteration: 146942
loss: 1.021149754524231,grad_norm: 0.8801354678132444, iteration: 146943
loss: 1.0083584785461426,grad_norm: 0.9999990963432727, iteration: 146944
loss: 0.9698793292045593,grad_norm: 0.9999991827100373, iteration: 146945
loss: 1.014035940170288,grad_norm: 0.9999991951878969, iteration: 146946
loss: 1.0365676879882812,grad_norm: 0.9999991767224891, iteration: 146947
loss: 0.9603449106216431,grad_norm: 0.9696139459033097, iteration: 146948
loss: 0.9945239424705505,grad_norm: 0.9999992068888849, iteration: 146949
loss: 0.9899322986602783,grad_norm: 0.999999085928894, iteration: 146950
loss: 1.0057485103607178,grad_norm: 0.9999992184802609, iteration: 146951
loss: 1.0179076194763184,grad_norm: 0.9469963237323857, iteration: 146952
loss: 1.0120329856872559,grad_norm: 0.9999991767008324, iteration: 146953
loss: 0.9898642897605896,grad_norm: 0.9999992730443004, iteration: 146954
loss: 0.9771208167076111,grad_norm: 0.9999991346682467, iteration: 146955
loss: 0.9945461750030518,grad_norm: 0.9999992153679625, iteration: 146956
loss: 1.0176578760147095,grad_norm: 0.9999990292799085, iteration: 146957
loss: 1.0000996589660645,grad_norm: 0.9999991116118788, iteration: 146958
loss: 0.9943708777427673,grad_norm: 0.9999992431490161, iteration: 146959
loss: 1.0045530796051025,grad_norm: 0.9820361580783424, iteration: 146960
loss: 1.0046066045761108,grad_norm: 0.8763894356249358, iteration: 146961
loss: 0.9621115922927856,grad_norm: 0.954221214246209, iteration: 146962
loss: 1.01634681224823,grad_norm: 0.9999993009115253, iteration: 146963
loss: 1.000596284866333,grad_norm: 0.9999990908051389, iteration: 146964
loss: 0.9721312522888184,grad_norm: 0.9999992081049178, iteration: 146965
loss: 0.9739038944244385,grad_norm: 0.9413690385023353, iteration: 146966
loss: 0.9899913668632507,grad_norm: 0.9999990339167566, iteration: 146967
loss: 1.0166701078414917,grad_norm: 0.9999992641009072, iteration: 146968
loss: 1.0249888896942139,grad_norm: 0.9486261834573685, iteration: 146969
loss: 0.9907893538475037,grad_norm: 0.9999995255982582, iteration: 146970
loss: 1.0182300806045532,grad_norm: 0.9549994960709091, iteration: 146971
loss: 0.990959882736206,grad_norm: 0.9999990591636178, iteration: 146972
loss: 0.9994383454322815,grad_norm: 0.999999112301037, iteration: 146973
loss: 1.034365177154541,grad_norm: 0.9691122056874768, iteration: 146974
loss: 0.9956336617469788,grad_norm: 0.994031548760085, iteration: 146975
loss: 1.012926697731018,grad_norm: 0.9763415639452953, iteration: 146976
loss: 1.0224303007125854,grad_norm: 0.9999995797410617, iteration: 146977
loss: 0.9765538573265076,grad_norm: 0.9048155735182898, iteration: 146978
loss: 1.004347801208496,grad_norm: 0.9999990176554613, iteration: 146979
loss: 1.0000016689300537,grad_norm: 0.9999991103092568, iteration: 146980
loss: 1.0258846282958984,grad_norm: 0.920493793545809, iteration: 146981
loss: 1.033015251159668,grad_norm: 0.8735307485654134, iteration: 146982
loss: 1.019922137260437,grad_norm: 0.9999992004948914, iteration: 146983
loss: 0.9691028594970703,grad_norm: 0.9344474832038722, iteration: 146984
loss: 0.98569256067276,grad_norm: 0.841360915183533, iteration: 146985
loss: 1.0278700590133667,grad_norm: 0.9999995027488353, iteration: 146986
loss: 1.0217912197113037,grad_norm: 0.9999991680800141, iteration: 146987
loss: 1.0294477939605713,grad_norm: 0.9999990748077476, iteration: 146988
loss: 0.9931700229644775,grad_norm: 0.999999380063053, iteration: 146989
loss: 0.9933270215988159,grad_norm: 0.9999990686351421, iteration: 146990
loss: 0.9905769228935242,grad_norm: 0.8639764165965775, iteration: 146991
loss: 0.97994464635849,grad_norm: 0.9999990149047283, iteration: 146992
loss: 1.0230004787445068,grad_norm: 0.9618701574243241, iteration: 146993
loss: 1.0436595678329468,grad_norm: 0.9999989921801835, iteration: 146994
loss: 1.034878134727478,grad_norm: 0.8896334232428897, iteration: 146995
loss: 1.047994613647461,grad_norm: 0.9999992587545102, iteration: 146996
loss: 0.9985776543617249,grad_norm: 0.9749145054511424, iteration: 146997
loss: 1.0308361053466797,grad_norm: 0.974036083602027, iteration: 146998
loss: 1.0079156160354614,grad_norm: 0.9131411850298713, iteration: 146999
loss: 1.0253304243087769,grad_norm: 0.9999990946071783, iteration: 147000
loss: 0.9830803275108337,grad_norm: 0.9999996731281785, iteration: 147001
loss: 1.0040853023529053,grad_norm: 0.9999994008434606, iteration: 147002
loss: 1.0031403303146362,grad_norm: 0.9999991835863266, iteration: 147003
loss: 1.017562985420227,grad_norm: 0.9999990063305007, iteration: 147004
loss: 0.980362594127655,grad_norm: 0.9999989867724924, iteration: 147005
loss: 0.9812413454055786,grad_norm: 0.9390857798867117, iteration: 147006
loss: 1.0020023584365845,grad_norm: 0.9999991373343204, iteration: 147007
loss: 1.0229994058609009,grad_norm: 0.9999997937066569, iteration: 147008
loss: 0.9814094305038452,grad_norm: 0.9857715070626442, iteration: 147009
loss: 0.9924329519271851,grad_norm: 0.9429263290399095, iteration: 147010
loss: 0.9556844234466553,grad_norm: 0.999999518862125, iteration: 147011
loss: 1.0092623233795166,grad_norm: 0.997707533174252, iteration: 147012
loss: 1.0366336107254028,grad_norm: 0.9999991550864467, iteration: 147013
loss: 0.9737709164619446,grad_norm: 0.8934946604267406, iteration: 147014
loss: 1.0240429639816284,grad_norm: 0.9758523643409726, iteration: 147015
loss: 0.9398436546325684,grad_norm: 0.9851274274193942, iteration: 147016
loss: 0.9911861419677734,grad_norm: 0.9999992900433653, iteration: 147017
loss: 1.0306838750839233,grad_norm: 0.8573257521454536, iteration: 147018
loss: 1.0370129346847534,grad_norm: 0.9999991800247876, iteration: 147019
loss: 1.02692449092865,grad_norm: 0.9999995032350797, iteration: 147020
loss: 1.020493507385254,grad_norm: 0.9999991082828329, iteration: 147021
loss: 1.0017437934875488,grad_norm: 0.9628332943292593, iteration: 147022
loss: 0.9657420516014099,grad_norm: 0.8719994547669768, iteration: 147023
loss: 0.9693242907524109,grad_norm: 0.8487523443816697, iteration: 147024
loss: 0.985398530960083,grad_norm: 0.8677660725996175, iteration: 147025
loss: 0.9695009589195251,grad_norm: 0.8875682148802437, iteration: 147026
loss: 0.9963648915290833,grad_norm: 0.9999990987834692, iteration: 147027
loss: 1.0287314653396606,grad_norm: 0.9999991283184102, iteration: 147028
loss: 0.9921467304229736,grad_norm: 0.9999989639955735, iteration: 147029
loss: 1.0443532466888428,grad_norm: 0.9999993625221817, iteration: 147030
loss: 0.9994257092475891,grad_norm: 0.9999990768049097, iteration: 147031
loss: 1.0295296907424927,grad_norm: 0.9999990745584791, iteration: 147032
loss: 0.9919499158859253,grad_norm: 0.9357494163841965, iteration: 147033
loss: 0.9879921674728394,grad_norm: 0.9999991505395321, iteration: 147034
loss: 1.0450135469436646,grad_norm: 0.9999998699032464, iteration: 147035
loss: 1.0101004838943481,grad_norm: 0.9999992083506424, iteration: 147036
loss: 0.9839847683906555,grad_norm: 0.9122196833860761, iteration: 147037
loss: 1.0086100101470947,grad_norm: 0.8372925172650804, iteration: 147038
loss: 1.0042905807495117,grad_norm: 0.999998996023903, iteration: 147039
loss: 1.022430419921875,grad_norm: 0.9999992904565222, iteration: 147040
loss: 0.9707017540931702,grad_norm: 0.9549670701224481, iteration: 147041
loss: 0.982871413230896,grad_norm: 0.8770497279342068, iteration: 147042
loss: 1.075339913368225,grad_norm: 0.9999999307961334, iteration: 147043
loss: 0.9822630286216736,grad_norm: 0.8475999875813165, iteration: 147044
loss: 1.0382444858551025,grad_norm: 0.9999996762148338, iteration: 147045
loss: 0.986383855342865,grad_norm: 0.999999009162517, iteration: 147046
loss: 1.0118076801300049,grad_norm: 0.9999995507590517, iteration: 147047
loss: 1.025331735610962,grad_norm: 0.9999991802754067, iteration: 147048
loss: 0.9894396066665649,grad_norm: 0.8918863625709605, iteration: 147049
loss: 1.0264146327972412,grad_norm: 0.9999991257579973, iteration: 147050
loss: 1.0352418422698975,grad_norm: 0.9805984815288982, iteration: 147051
loss: 1.0153104066848755,grad_norm: 0.9999992007850294, iteration: 147052
loss: 0.9802625179290771,grad_norm: 0.9570794515286696, iteration: 147053
loss: 0.9897465109825134,grad_norm: 0.8630708447632721, iteration: 147054
loss: 1.0102002620697021,grad_norm: 0.8346173961191486, iteration: 147055
loss: 1.0577315092086792,grad_norm: 0.9999999225766506, iteration: 147056
loss: 0.9719089269638062,grad_norm: 0.9782741946916708, iteration: 147057
loss: 0.9974812269210815,grad_norm: 0.991008446963353, iteration: 147058
loss: 0.9987599849700928,grad_norm: 0.9999992125332499, iteration: 147059
loss: 0.9750819206237793,grad_norm: 0.9507026047822956, iteration: 147060
loss: 0.9961143732070923,grad_norm: 0.8409397459553767, iteration: 147061
loss: 0.9934797883033752,grad_norm: 0.9999991674763208, iteration: 147062
loss: 1.003772497177124,grad_norm: 0.9816661931286066, iteration: 147063
loss: 0.9766691327095032,grad_norm: 0.9588063067903809, iteration: 147064
loss: 1.0035876035690308,grad_norm: 0.9999991441768711, iteration: 147065
loss: 0.9977611303329468,grad_norm: 0.8633326579581808, iteration: 147066
loss: 1.03541100025177,grad_norm: 0.9999993383359475, iteration: 147067
loss: 0.9800119996070862,grad_norm: 0.9999991976744487, iteration: 147068
loss: 0.9989295601844788,grad_norm: 0.9411701280430814, iteration: 147069
loss: 0.9993351697921753,grad_norm: 0.998906066778219, iteration: 147070
loss: 1.0187667608261108,grad_norm: 0.9730439083932161, iteration: 147071
loss: 0.9606408476829529,grad_norm: 0.9999990320890044, iteration: 147072
loss: 0.9834548234939575,grad_norm: 0.9999989664211644, iteration: 147073
loss: 1.0163699388504028,grad_norm: 0.9466310093328947, iteration: 147074
loss: 0.9961139559745789,grad_norm: 0.9999990964793927, iteration: 147075
loss: 1.0127263069152832,grad_norm: 0.9999990916357274, iteration: 147076
loss: 0.990071177482605,grad_norm: 0.9999990052536847, iteration: 147077
loss: 0.9865679144859314,grad_norm: 0.9999998979467586, iteration: 147078
loss: 0.9995848536491394,grad_norm: 0.9999990980883372, iteration: 147079
loss: 1.0041927099227905,grad_norm: 0.9999991296422088, iteration: 147080
loss: 0.9667103290557861,grad_norm: 0.9999991811664014, iteration: 147081
loss: 1.0071810483932495,grad_norm: 0.999999159871333, iteration: 147082
loss: 1.017653465270996,grad_norm: 0.9999993151616317, iteration: 147083
loss: 0.9856230020523071,grad_norm: 0.9999989931986151, iteration: 147084
loss: 0.9756964445114136,grad_norm: 0.9234231250248011, iteration: 147085
loss: 1.0042494535446167,grad_norm: 0.9999991289507737, iteration: 147086
loss: 0.9988290667533875,grad_norm: 0.9999990341892223, iteration: 147087
loss: 1.0324115753173828,grad_norm: 0.9999994785687736, iteration: 147088
loss: 1.0019211769104004,grad_norm: 0.9999991932661808, iteration: 147089
loss: 1.0246154069900513,grad_norm: 0.9033608038024866, iteration: 147090
loss: 1.0217688083648682,grad_norm: 0.9999992220514052, iteration: 147091
loss: 0.9802255630493164,grad_norm: 0.9511362332524258, iteration: 147092
loss: 0.9874210357666016,grad_norm: 0.8429903658392742, iteration: 147093
loss: 0.9829824566841125,grad_norm: 0.9725775286057815, iteration: 147094
loss: 1.0231657028198242,grad_norm: 0.9654523253613255, iteration: 147095
loss: 1.014049768447876,grad_norm: 0.9999992022669544, iteration: 147096
loss: 0.971869945526123,grad_norm: 0.9259194245591943, iteration: 147097
loss: 1.0134034156799316,grad_norm: 0.9999991968603882, iteration: 147098
loss: 1.035610556602478,grad_norm: 0.9460793253328552, iteration: 147099
loss: 1.0045771598815918,grad_norm: 0.9950823564117355, iteration: 147100
loss: 1.0029321908950806,grad_norm: 0.9781524164027787, iteration: 147101
loss: 0.9559035301208496,grad_norm: 0.9999991099976427, iteration: 147102
loss: 1.0086143016815186,grad_norm: 0.9999992517880327, iteration: 147103
loss: 1.0301604270935059,grad_norm: 0.9424981271430054, iteration: 147104
loss: 0.9956011772155762,grad_norm: 0.9999989059472693, iteration: 147105
loss: 0.9947950839996338,grad_norm: 0.9999992210579449, iteration: 147106
loss: 1.0085080862045288,grad_norm: 0.9999990322777188, iteration: 147107
loss: 0.9924768209457397,grad_norm: 0.9999990229501657, iteration: 147108
loss: 1.0348621606826782,grad_norm: 0.9999992249643194, iteration: 147109
loss: 1.0162984132766724,grad_norm: 0.9999991355713099, iteration: 147110
loss: 0.9770421385765076,grad_norm: 0.9999990757371222, iteration: 147111
loss: 0.9921687245368958,grad_norm: 0.9999991276430339, iteration: 147112
loss: 1.0051568746566772,grad_norm: 0.9574273704212216, iteration: 147113
loss: 1.0109634399414062,grad_norm: 0.9999992255675262, iteration: 147114
loss: 1.003430962562561,grad_norm: 0.8212300450545278, iteration: 147115
loss: 1.0221669673919678,grad_norm: 0.9999992023164684, iteration: 147116
loss: 1.0062962770462036,grad_norm: 0.930311430061794, iteration: 147117
loss: 1.022464632987976,grad_norm: 0.9999990830650566, iteration: 147118
loss: 1.0036654472351074,grad_norm: 0.9999993198088358, iteration: 147119
loss: 0.9893559813499451,grad_norm: 0.9999990567069232, iteration: 147120
loss: 1.0551717281341553,grad_norm: 0.9999989813587113, iteration: 147121
loss: 0.9693654179573059,grad_norm: 0.9999992238090565, iteration: 147122
loss: 1.0871069431304932,grad_norm: 0.9999991923818102, iteration: 147123
loss: 1.0034887790679932,grad_norm: 0.9339927166426363, iteration: 147124
loss: 1.0466831922531128,grad_norm: 0.9164234989561223, iteration: 147125
loss: 0.9731135368347168,grad_norm: 0.9999990365420143, iteration: 147126
loss: 1.0006837844848633,grad_norm: 0.9858583861722848, iteration: 147127
loss: 1.038773775100708,grad_norm: 0.9999991194816392, iteration: 147128
loss: 1.002177119255066,grad_norm: 0.9999990659330942, iteration: 147129
loss: 0.9958832263946533,grad_norm: 0.8504159536554462, iteration: 147130
loss: 1.003900170326233,grad_norm: 0.9999990190355726, iteration: 147131
loss: 0.9892213940620422,grad_norm: 0.9999990489318313, iteration: 147132
loss: 0.9967136979103088,grad_norm: 0.9999991377561188, iteration: 147133
loss: 1.0195682048797607,grad_norm: 0.8578974196015576, iteration: 147134
loss: 1.0133310556411743,grad_norm: 0.999999070176746, iteration: 147135
loss: 0.9906904697418213,grad_norm: 0.9231727240308116, iteration: 147136
loss: 1.0127993822097778,grad_norm: 0.9999988965670367, iteration: 147137
loss: 0.9463319182395935,grad_norm: 0.9999991958247956, iteration: 147138
loss: 1.000209093093872,grad_norm: 0.9076894606135277, iteration: 147139
loss: 1.0065667629241943,grad_norm: 0.8741844211810006, iteration: 147140
loss: 1.0036485195159912,grad_norm: 0.9645010753552645, iteration: 147141
loss: 1.027935266494751,grad_norm: 0.9999990318419326, iteration: 147142
loss: 1.028026819229126,grad_norm: 0.8591872319806367, iteration: 147143
loss: 0.959136426448822,grad_norm: 0.9485655011359269, iteration: 147144
loss: 0.9787681102752686,grad_norm: 0.9999992526021976, iteration: 147145
loss: 1.0024198293685913,grad_norm: 0.9794501232734828, iteration: 147146
loss: 1.0385249853134155,grad_norm: 0.9963022245954897, iteration: 147147
loss: 0.9772221446037292,grad_norm: 0.9999992075341476, iteration: 147148
loss: 1.001772165298462,grad_norm: 0.9999991074946923, iteration: 147149
loss: 1.0176912546157837,grad_norm: 0.999999135845724, iteration: 147150
loss: 1.0179752111434937,grad_norm: 0.8734571337519561, iteration: 147151
loss: 1.0221483707427979,grad_norm: 0.9999992345398837, iteration: 147152
loss: 0.9938117265701294,grad_norm: 0.9999991143319439, iteration: 147153
loss: 1.021781086921692,grad_norm: 0.9091160362072199, iteration: 147154
loss: 0.9707127213478088,grad_norm: 0.9465252032152689, iteration: 147155
loss: 0.9696102738380432,grad_norm: 0.9999994827184021, iteration: 147156
loss: 0.9749069809913635,grad_norm: 0.9729041459635206, iteration: 147157
loss: 0.9831536412239075,grad_norm: 0.9871714271885321, iteration: 147158
loss: 0.9982755780220032,grad_norm: 0.9999990213629675, iteration: 147159
loss: 1.0727131366729736,grad_norm: 1.0000000009879486, iteration: 147160
loss: 1.070909857749939,grad_norm: 0.9999998439222268, iteration: 147161
loss: 0.9766570329666138,grad_norm: 0.8704570353118385, iteration: 147162
loss: 0.9907141327857971,grad_norm: 0.9999991409755102, iteration: 147163
loss: 0.9831098914146423,grad_norm: 0.9999990565375438, iteration: 147164
loss: 1.0253236293792725,grad_norm: 0.9133770804298113, iteration: 147165
loss: 0.9555327892303467,grad_norm: 0.9999989581813636, iteration: 147166
loss: 0.9638097882270813,grad_norm: 0.8197151189593297, iteration: 147167
loss: 0.9961419701576233,grad_norm: 0.7732723800345059, iteration: 147168
loss: 1.0436865091323853,grad_norm: 0.9265113374782465, iteration: 147169
loss: 0.9430655837059021,grad_norm: 0.9999997045516363, iteration: 147170
loss: 0.972007155418396,grad_norm: 0.9804701473649021, iteration: 147171
loss: 1.030316948890686,grad_norm: 0.9999991585236188, iteration: 147172
loss: 0.9798398017883301,grad_norm: 0.9999992169821021, iteration: 147173
loss: 1.0270525217056274,grad_norm: 0.9964380699436693, iteration: 147174
loss: 0.9620798826217651,grad_norm: 0.999999667675701, iteration: 147175
loss: 0.9936534762382507,grad_norm: 0.9999991889610481, iteration: 147176
loss: 0.9788980484008789,grad_norm: 0.9999991980876136, iteration: 147177
loss: 1.0226963758468628,grad_norm: 0.99999907513255, iteration: 147178
loss: 0.9975572824478149,grad_norm: 0.9999989875270958, iteration: 147179
loss: 1.0154229402542114,grad_norm: 0.7963309140211653, iteration: 147180
loss: 1.0117645263671875,grad_norm: 0.9999992390173802, iteration: 147181
loss: 1.0282008647918701,grad_norm: 0.9999991159907553, iteration: 147182
loss: 0.9274675250053406,grad_norm: 0.9999990389050868, iteration: 147183
loss: 1.0074671506881714,grad_norm: 0.9999991216882876, iteration: 147184
loss: 0.9869924783706665,grad_norm: 0.9912832159386686, iteration: 147185
loss: 1.0052101612091064,grad_norm: 0.9458634953454552, iteration: 147186
loss: 0.9953858256340027,grad_norm: 0.8707339423395912, iteration: 147187
loss: 0.9994982481002808,grad_norm: 0.8919460022945651, iteration: 147188
loss: 1.0091166496276855,grad_norm: 0.9999991918586513, iteration: 147189
loss: 1.0029027462005615,grad_norm: 0.9999990583709957, iteration: 147190
loss: 1.005220651626587,grad_norm: 0.9999990360465925, iteration: 147191
loss: 1.0384739637374878,grad_norm: 0.9911338176326293, iteration: 147192
loss: 0.9919038414955139,grad_norm: 0.9999990904163397, iteration: 147193
loss: 0.9735382795333862,grad_norm: 0.9999991825554613, iteration: 147194
loss: 0.9640382528305054,grad_norm: 0.8440181493029524, iteration: 147195
loss: 0.9987612962722778,grad_norm: 0.9999991746384066, iteration: 147196
loss: 1.0829391479492188,grad_norm: 0.9999996946314764, iteration: 147197
loss: 0.9917396306991577,grad_norm: 0.9316625308267317, iteration: 147198
loss: 0.9806565046310425,grad_norm: 0.9983223951419894, iteration: 147199
loss: 1.024973750114441,grad_norm: 0.999999202796444, iteration: 147200
loss: 0.9958285093307495,grad_norm: 0.9999991419230768, iteration: 147201
loss: 1.0067005157470703,grad_norm: 0.9943062991400946, iteration: 147202
loss: 0.9877167344093323,grad_norm: 0.9021511891078892, iteration: 147203
loss: 0.9782626628875732,grad_norm: 0.9999991762356311, iteration: 147204
loss: 1.0017552375793457,grad_norm: 0.999999098431263, iteration: 147205
loss: 0.9913696050643921,grad_norm: 0.9999990415931228, iteration: 147206
loss: 1.0147123336791992,grad_norm: 0.9999992519281793, iteration: 147207
loss: 0.9899968504905701,grad_norm: 0.9948993748248173, iteration: 147208
loss: 0.9812095165252686,grad_norm: 0.9999990982930711, iteration: 147209
loss: 1.0205714702606201,grad_norm: 0.999999526401654, iteration: 147210
loss: 1.0057412385940552,grad_norm: 0.779763324933494, iteration: 147211
loss: 1.002442717552185,grad_norm: 0.9435186211164271, iteration: 147212
loss: 0.9912652373313904,grad_norm: 0.9851050958656845, iteration: 147213
loss: 0.997290849685669,grad_norm: 0.9999990818952281, iteration: 147214
loss: 1.0275325775146484,grad_norm: 0.9999991857328387, iteration: 147215
loss: 1.0167241096496582,grad_norm: 0.9999991292343292, iteration: 147216
loss: 0.9606321454048157,grad_norm: 0.9999991568112463, iteration: 147217
loss: 0.9782864451408386,grad_norm: 0.9301943245372701, iteration: 147218
loss: 1.0218837261199951,grad_norm: 0.9999992088501474, iteration: 147219
loss: 0.9988961815834045,grad_norm: 0.9999991346542957, iteration: 147220
loss: 1.0217986106872559,grad_norm: 0.9999992217610951, iteration: 147221
loss: 0.9621021747589111,grad_norm: 0.9379874204624393, iteration: 147222
loss: 0.9823911190032959,grad_norm: 0.9999991034421356, iteration: 147223
loss: 1.0854454040527344,grad_norm: 0.9999995397307182, iteration: 147224
loss: 0.9946579337120056,grad_norm: 0.8693174744527264, iteration: 147225
loss: 0.9858207702636719,grad_norm: 0.9999992292574106, iteration: 147226
loss: 1.0166802406311035,grad_norm: 0.999999130662907, iteration: 147227
loss: 0.9593468904495239,grad_norm: 0.9532655300332773, iteration: 147228
loss: 1.0646876096725464,grad_norm: 0.9999993511647216, iteration: 147229
loss: 1.0108739137649536,grad_norm: 0.9936920685607633, iteration: 147230
loss: 0.9757014513015747,grad_norm: 0.8362550753605972, iteration: 147231
loss: 1.0342307090759277,grad_norm: 0.9999994996228375, iteration: 147232
loss: 0.9887187480926514,grad_norm: 0.99999907218592, iteration: 147233
loss: 0.9945546984672546,grad_norm: 0.9811744601878905, iteration: 147234
loss: 0.9657108783721924,grad_norm: 0.9999991493999021, iteration: 147235
loss: 0.9977868795394897,grad_norm: 0.9999989318739474, iteration: 147236
loss: 1.0489647388458252,grad_norm: 0.9999991248023009, iteration: 147237
loss: 0.9955363869667053,grad_norm: 0.9999992750465313, iteration: 147238
loss: 1.018007516860962,grad_norm: 0.9942335425331577, iteration: 147239
loss: 0.9906585812568665,grad_norm: 0.9999989406855891, iteration: 147240
loss: 0.9764406681060791,grad_norm: 0.9999991252384737, iteration: 147241
loss: 0.9842619299888611,grad_norm: 0.9375055063804527, iteration: 147242
loss: 0.9971920251846313,grad_norm: 0.9134892478186543, iteration: 147243
loss: 0.9580326080322266,grad_norm: 0.9999990997755365, iteration: 147244
loss: 1.0011146068572998,grad_norm: 0.9999992152501361, iteration: 147245
loss: 1.0052273273468018,grad_norm: 0.9999991144909558, iteration: 147246
loss: 1.0001633167266846,grad_norm: 0.9284062921023368, iteration: 147247
loss: 1.0113519430160522,grad_norm: 0.9999993490621042, iteration: 147248
loss: 1.0008124113082886,grad_norm: 0.875933927220642, iteration: 147249
loss: 0.9765455722808838,grad_norm: 0.9804620880880298, iteration: 147250
loss: 1.0288193225860596,grad_norm: 0.9999991255128948, iteration: 147251
loss: 0.9821729063987732,grad_norm: 0.9999993869972233, iteration: 147252
loss: 1.0549664497375488,grad_norm: 0.9534059219200024, iteration: 147253
loss: 0.9790018200874329,grad_norm: 0.8480512352660587, iteration: 147254
loss: 1.0190094709396362,grad_norm: 0.8535473891155236, iteration: 147255
loss: 1.0134727954864502,grad_norm: 0.9999990602659333, iteration: 147256
loss: 1.052667260169983,grad_norm: 0.9999999234720922, iteration: 147257
loss: 0.9699399471282959,grad_norm: 0.9797121979305541, iteration: 147258
loss: 0.9652686715126038,grad_norm: 0.9725107647264537, iteration: 147259
loss: 1.0291693210601807,grad_norm: 0.9999990253565445, iteration: 147260
loss: 0.9694094061851501,grad_norm: 1.000000020093892, iteration: 147261
loss: 1.0622637271881104,grad_norm: 0.9999991529699637, iteration: 147262
loss: 0.9830396771430969,grad_norm: 0.9999991077281615, iteration: 147263
loss: 0.9942629337310791,grad_norm: 0.8952432554991347, iteration: 147264
loss: 1.016001582145691,grad_norm: 0.9999991912164965, iteration: 147265
loss: 0.9587633013725281,grad_norm: 0.9999991217862552, iteration: 147266
loss: 1.0039310455322266,grad_norm: 0.9999996259296459, iteration: 147267
loss: 1.2041728496551514,grad_norm: 0.9999992740332846, iteration: 147268
loss: 1.006321907043457,grad_norm: 0.999999160167214, iteration: 147269
loss: 0.9907495975494385,grad_norm: 0.9999989111711622, iteration: 147270
loss: 1.042286992073059,grad_norm: 0.9999991108273685, iteration: 147271
loss: 1.0145148038864136,grad_norm: 0.977283839492215, iteration: 147272
loss: 0.9970736503601074,grad_norm: 0.9332664788125172, iteration: 147273
loss: 1.05028235912323,grad_norm: 0.9999997874243532, iteration: 147274
loss: 0.98372882604599,grad_norm: 0.9999990722629813, iteration: 147275
loss: 0.9809346795082092,grad_norm: 0.9999991522143726, iteration: 147276
loss: 0.9765264391899109,grad_norm: 0.9999991121908642, iteration: 147277
loss: 1.0340644121170044,grad_norm: 0.8927687525366166, iteration: 147278
loss: 1.0186749696731567,grad_norm: 0.9399278092084498, iteration: 147279
loss: 1.0232231616973877,grad_norm: 0.9198723126577587, iteration: 147280
loss: 0.9692264795303345,grad_norm: 0.9004085707359724, iteration: 147281
loss: 1.0192896127700806,grad_norm: 0.9999993732976825, iteration: 147282
loss: 1.002306580543518,grad_norm: 0.9999990274212726, iteration: 147283
loss: 1.023693323135376,grad_norm: 0.9999990921536978, iteration: 147284
loss: 0.9595983028411865,grad_norm: 0.9283614675758743, iteration: 147285
loss: 0.999016284942627,grad_norm: 0.9999990912362361, iteration: 147286
loss: 1.0164395570755005,grad_norm: 0.9088295445265141, iteration: 147287
loss: 1.0383589267730713,grad_norm: 0.9898312778410346, iteration: 147288
loss: 1.0109083652496338,grad_norm: 0.9999991859934416, iteration: 147289
loss: 1.0094560384750366,grad_norm: 0.9750939567655474, iteration: 147290
loss: 0.9703962802886963,grad_norm: 0.926419251251479, iteration: 147291
loss: 1.0135273933410645,grad_norm: 0.9999988798689283, iteration: 147292
loss: 1.0191830396652222,grad_norm: 0.9999991394755758, iteration: 147293
loss: 0.9825388193130493,grad_norm: 0.9999995491475774, iteration: 147294
loss: 1.0515222549438477,grad_norm: 0.9999992285841394, iteration: 147295
loss: 0.9669060707092285,grad_norm: 0.7979873790459706, iteration: 147296
loss: 0.9705759286880493,grad_norm: 0.8153185968498388, iteration: 147297
loss: 1.0079982280731201,grad_norm: 0.9999991903971787, iteration: 147298
loss: 0.9933946132659912,grad_norm: 0.9999989938544449, iteration: 147299
loss: 1.0018503665924072,grad_norm: 0.9777657951377954, iteration: 147300
loss: 0.9975164532661438,grad_norm: 0.9999991175509414, iteration: 147301
loss: 1.296125888824463,grad_norm: 0.9999993647620364, iteration: 147302
loss: 0.9917745590209961,grad_norm: 0.9098301857144839, iteration: 147303
loss: 0.9988135695457458,grad_norm: 0.9999991860361, iteration: 147304
loss: 0.9692285656929016,grad_norm: 0.9999997355074777, iteration: 147305
loss: 1.0048565864562988,grad_norm: 0.9999992404207929, iteration: 147306
loss: 1.0137253999710083,grad_norm: 0.9999991157432025, iteration: 147307
loss: 1.0154756307601929,grad_norm: 0.9761090073445139, iteration: 147308
loss: 1.032057762145996,grad_norm: 0.9905608131279121, iteration: 147309
loss: 1.0324480533599854,grad_norm: 0.9999992247257022, iteration: 147310
loss: 1.045217752456665,grad_norm: 0.999999126163962, iteration: 147311
loss: 1.033908486366272,grad_norm: 0.9419118723812806, iteration: 147312
loss: 0.9870393872261047,grad_norm: 0.9999991264364283, iteration: 147313
loss: 1.0271477699279785,grad_norm: 0.9598706743763594, iteration: 147314
loss: 1.1031171083450317,grad_norm: 1.000000013771995, iteration: 147315
loss: 0.970761239528656,grad_norm: 0.8809845564945057, iteration: 147316
loss: 0.9906759858131409,grad_norm: 0.9446890841112765, iteration: 147317
loss: 1.0014384984970093,grad_norm: 0.8193407818637948, iteration: 147318
loss: 0.9962430000305176,grad_norm: 0.9716415843538599, iteration: 147319
loss: 0.9941015839576721,grad_norm: 0.9999989784948129, iteration: 147320
loss: 1.0119309425354004,grad_norm: 0.9999991910919253, iteration: 147321
loss: 0.9842925071716309,grad_norm: 0.9999990254381712, iteration: 147322
loss: 0.9592636823654175,grad_norm: 0.828761392029378, iteration: 147323
loss: 0.9735972285270691,grad_norm: 0.9630174410612954, iteration: 147324
loss: 0.9640337228775024,grad_norm: 0.999999289819729, iteration: 147325
loss: 0.9873974919319153,grad_norm: 0.977160924651366, iteration: 147326
loss: 0.9936724305152893,grad_norm: 0.9149626656392849, iteration: 147327
loss: 1.0301263332366943,grad_norm: 0.9999996609993053, iteration: 147328
loss: 1.0354653596878052,grad_norm: 0.9999991233121887, iteration: 147329
loss: 1.0172570943832397,grad_norm: 0.9999990525828821, iteration: 147330
loss: 1.042134165763855,grad_norm: 0.9999991124003326, iteration: 147331
loss: 1.004040241241455,grad_norm: 0.9999989937081083, iteration: 147332
loss: 1.005953073501587,grad_norm: 0.9624394133662061, iteration: 147333
loss: 1.0157239437103271,grad_norm: 0.9999991125254055, iteration: 147334
loss: 1.0018280744552612,grad_norm: 0.9999991267385103, iteration: 147335
loss: 0.9890384674072266,grad_norm: 0.9704778212405176, iteration: 147336
loss: 0.9876773953437805,grad_norm: 0.9306797892497796, iteration: 147337
loss: 1.0653862953186035,grad_norm: 0.9999994649478048, iteration: 147338
loss: 0.9957873821258545,grad_norm: 0.999999042082659, iteration: 147339
loss: 0.979620635509491,grad_norm: 0.9999991094270386, iteration: 147340
loss: 0.9696247577667236,grad_norm: 0.9999992241823631, iteration: 147341
loss: 0.9812055826187134,grad_norm: 0.999999065931614, iteration: 147342
loss: 0.9902442097663879,grad_norm: 0.9031959017058738, iteration: 147343
loss: 0.9863008260726929,grad_norm: 0.9999991703411812, iteration: 147344
loss: 1.0091105699539185,grad_norm: 0.9999992041487287, iteration: 147345
loss: 0.9929243922233582,grad_norm: 0.9999992940503075, iteration: 147346
loss: 0.9598625302314758,grad_norm: 0.9999989928184384, iteration: 147347
loss: 0.9671618342399597,grad_norm: 0.9999991080525882, iteration: 147348
loss: 0.9596741795539856,grad_norm: 0.9999992005727741, iteration: 147349
loss: 1.072504997253418,grad_norm: 0.9999995554641422, iteration: 147350
loss: 1.0376754999160767,grad_norm: 0.999999064529424, iteration: 147351
loss: 0.9206633567810059,grad_norm: 0.9781222850925854, iteration: 147352
loss: 1.0194202661514282,grad_norm: 0.8801971194215047, iteration: 147353
loss: 0.9773381948471069,grad_norm: 0.942852542512404, iteration: 147354
loss: 1.0145878791809082,grad_norm: 0.9955889150352659, iteration: 147355
loss: 1.0017579793930054,grad_norm: 0.9999991648354544, iteration: 147356
loss: 0.9960227012634277,grad_norm: 0.9999992294346328, iteration: 147357
loss: 1.0224719047546387,grad_norm: 0.9999991860405941, iteration: 147358
loss: 1.0305571556091309,grad_norm: 0.9264558128031218, iteration: 147359
loss: 1.0291842222213745,grad_norm: 0.9999990979799098, iteration: 147360
loss: 0.9850882887840271,grad_norm: 0.9999991378011994, iteration: 147361
loss: 0.9481956362724304,grad_norm: 0.9999990725022818, iteration: 147362
loss: 0.9791610836982727,grad_norm: 0.9757342320029088, iteration: 147363
loss: 0.9744349122047424,grad_norm: 0.8290495293490123, iteration: 147364
loss: 0.9744651913642883,grad_norm: 0.8250890430641001, iteration: 147365
loss: 1.028817057609558,grad_norm: 0.999999318893918, iteration: 147366
loss: 0.9629425406455994,grad_norm: 0.9999989786944429, iteration: 147367
loss: 0.9877550005912781,grad_norm: 0.9999989584059533, iteration: 147368
loss: 1.017529845237732,grad_norm: 0.9999991237105075, iteration: 147369
loss: 0.9505347013473511,grad_norm: 0.9999990476405983, iteration: 147370
loss: 1.0222960710525513,grad_norm: 0.9999990118188818, iteration: 147371
loss: 1.0318260192871094,grad_norm: 0.9753824303443851, iteration: 147372
loss: 0.9658397436141968,grad_norm: 0.9999990608082208, iteration: 147373
loss: 1.0226818323135376,grad_norm: 0.9999989594475234, iteration: 147374
loss: 1.00044584274292,grad_norm: 0.9999990955611915, iteration: 147375
loss: 1.0216543674468994,grad_norm: 0.9999991359666286, iteration: 147376
loss: 1.0102044343948364,grad_norm: 0.9208191646823431, iteration: 147377
loss: 0.9584695100784302,grad_norm: 0.9999991351066201, iteration: 147378
loss: 1.003896713256836,grad_norm: 0.999999002361965, iteration: 147379
loss: 0.9698310494422913,grad_norm: 0.9999991429073427, iteration: 147380
loss: 1.0352206230163574,grad_norm: 0.9999991421350725, iteration: 147381
loss: 1.0322576761245728,grad_norm: 0.9225054752639658, iteration: 147382
loss: 1.0109264850616455,grad_norm: 0.9999990016426262, iteration: 147383
loss: 0.9825758934020996,grad_norm: 0.9999991132298324, iteration: 147384
loss: 1.0309619903564453,grad_norm: 0.9999993294012065, iteration: 147385
loss: 0.9891460537910461,grad_norm: 0.9493455883820958, iteration: 147386
loss: 0.9403244853019714,grad_norm: 0.9999990403310742, iteration: 147387
loss: 0.9938666820526123,grad_norm: 0.9999996305749816, iteration: 147388
loss: 1.0110270977020264,grad_norm: 0.9999990248112124, iteration: 147389
loss: 0.9929400682449341,grad_norm: 0.9999992104652997, iteration: 147390
loss: 1.02329683303833,grad_norm: 0.9063490574556282, iteration: 147391
loss: 0.9518912434577942,grad_norm: 0.9999992282020201, iteration: 147392
loss: 1.0036838054656982,grad_norm: 0.9999990604853602, iteration: 147393
loss: 0.9973235726356506,grad_norm: 0.999999273569182, iteration: 147394
loss: 0.9737389087677002,grad_norm: 0.8782310926896019, iteration: 147395
loss: 1.006989598274231,grad_norm: 0.9999992163955692, iteration: 147396
loss: 1.001114010810852,grad_norm: 0.9999989051222642, iteration: 147397
loss: 0.9827575087547302,grad_norm: 0.9999991930938817, iteration: 147398
loss: 0.99428391456604,grad_norm: 0.9999991171610729, iteration: 147399
loss: 1.0089777708053589,grad_norm: 0.9999992966336009, iteration: 147400
loss: 1.0286614894866943,grad_norm: 0.9270091590494178, iteration: 147401
loss: 0.9506412148475647,grad_norm: 0.9906682345597279, iteration: 147402
loss: 1.0124692916870117,grad_norm: 0.9723415257585311, iteration: 147403
loss: 1.0209712982177734,grad_norm: 0.8767507480221073, iteration: 147404
loss: 0.9563148021697998,grad_norm: 0.9999990349318634, iteration: 147405
loss: 0.9810114502906799,grad_norm: 0.9999989797380444, iteration: 147406
loss: 0.998661994934082,grad_norm: 0.9999990337117239, iteration: 147407
loss: 0.9805130958557129,grad_norm: 0.99999911446196, iteration: 147408
loss: 1.011598825454712,grad_norm: 0.9999990448562196, iteration: 147409
loss: 0.981614351272583,grad_norm: 0.9999992846872373, iteration: 147410
loss: 1.0262831449508667,grad_norm: 0.9999990633428146, iteration: 147411
loss: 0.9984250664710999,grad_norm: 0.8143992186926821, iteration: 147412
loss: 0.9993603229522705,grad_norm: 0.9999990406811103, iteration: 147413
loss: 1.061969518661499,grad_norm: 0.9999991047346382, iteration: 147414
loss: 0.9669268131256104,grad_norm: 0.9885720612187268, iteration: 147415
loss: 1.0184063911437988,grad_norm: 0.9358335465025123, iteration: 147416
loss: 0.9799959063529968,grad_norm: 0.9999989143223599, iteration: 147417
loss: 1.010805606842041,grad_norm: 0.9344689256427291, iteration: 147418
loss: 0.9544680118560791,grad_norm: 0.9694890832638059, iteration: 147419
loss: 1.0153948068618774,grad_norm: 0.9150160608542021, iteration: 147420
loss: 1.0201983451843262,grad_norm: 0.9999994788094176, iteration: 147421
loss: 0.9727373123168945,grad_norm: 0.9438965923895148, iteration: 147422
loss: 1.0113604068756104,grad_norm: 0.9824541873177569, iteration: 147423
loss: 1.0210199356079102,grad_norm: 0.9457398331527175, iteration: 147424
loss: 0.9876747727394104,grad_norm: 0.8668722971144087, iteration: 147425
loss: 1.0111474990844727,grad_norm: 0.9846950745619863, iteration: 147426
loss: 1.0054470300674438,grad_norm: 0.9397359223280762, iteration: 147427
loss: 1.018237829208374,grad_norm: 0.999999086068327, iteration: 147428
loss: 0.9424031972885132,grad_norm: 0.968811422970147, iteration: 147429
loss: 0.9358683824539185,grad_norm: 0.9826299976339957, iteration: 147430
loss: 1.0158147811889648,grad_norm: 0.8616598932959826, iteration: 147431
loss: 1.0468761920928955,grad_norm: 0.999999121980556, iteration: 147432
loss: 1.0091521739959717,grad_norm: 0.9715293980158549, iteration: 147433
loss: 1.0254788398742676,grad_norm: 0.879959248523044, iteration: 147434
loss: 1.058661699295044,grad_norm: 0.9999991845620194, iteration: 147435
loss: 1.0172500610351562,grad_norm: 0.8756044221074393, iteration: 147436
loss: 0.9722927212715149,grad_norm: 0.9155831145930374, iteration: 147437
loss: 1.0156421661376953,grad_norm: 0.9999991088231048, iteration: 147438
loss: 1.0252056121826172,grad_norm: 0.9999994299842557, iteration: 147439
loss: 0.9817125201225281,grad_norm: 0.9999990993717442, iteration: 147440
loss: 1.020052194595337,grad_norm: 0.9999990772094809, iteration: 147441
loss: 0.9658336639404297,grad_norm: 0.9461788682081574, iteration: 147442
loss: 0.9842885732650757,grad_norm: 0.9944933478626732, iteration: 147443
loss: 1.0105242729187012,grad_norm: 0.9999990710893202, iteration: 147444
loss: 1.021498203277588,grad_norm: 0.9999992376493043, iteration: 147445
loss: 1.0034133195877075,grad_norm: 0.9999991630197709, iteration: 147446
loss: 0.9396830201148987,grad_norm: 0.99999914111247, iteration: 147447
loss: 0.991328239440918,grad_norm: 0.7989103960790072, iteration: 147448
loss: 1.032429575920105,grad_norm: 0.9999989532275148, iteration: 147449
loss: 1.0255610942840576,grad_norm: 0.9999993417614043, iteration: 147450
loss: 1.0551244020462036,grad_norm: 0.9999992324843714, iteration: 147451
loss: 0.9986754655838013,grad_norm: 0.9999992081461947, iteration: 147452
loss: 0.9818161725997925,grad_norm: 0.9607735198220979, iteration: 147453
loss: 0.9746546745300293,grad_norm: 0.9999990653442474, iteration: 147454
loss: 0.9767242670059204,grad_norm: 0.9999990938589616, iteration: 147455
loss: 0.9776979684829712,grad_norm: 0.904234407800042, iteration: 147456
loss: 0.9560390710830688,grad_norm: 0.9083975210281283, iteration: 147457
loss: 1.0062257051467896,grad_norm: 0.835058943719549, iteration: 147458
loss: 0.9839745759963989,grad_norm: 0.9999991862574664, iteration: 147459
loss: 1.050973653793335,grad_norm: 0.9999992248521852, iteration: 147460
loss: 0.9881188273429871,grad_norm: 0.9959832973068246, iteration: 147461
loss: 1.0151952505111694,grad_norm: 0.9999991349018176, iteration: 147462
loss: 0.9909539222717285,grad_norm: 0.9999994132801173, iteration: 147463
loss: 1.0300843715667725,grad_norm: 0.9999991215639197, iteration: 147464
loss: 1.015883445739746,grad_norm: 0.9999992624023579, iteration: 147465
loss: 1.0000935792922974,grad_norm: 0.9245876014072081, iteration: 147466
loss: 1.0121190547943115,grad_norm: 0.9999992719959766, iteration: 147467
loss: 0.9981467723846436,grad_norm: 0.8981714504314139, iteration: 147468
loss: 1.0098192691802979,grad_norm: 0.8116609729852347, iteration: 147469
loss: 1.0011149644851685,grad_norm: 0.9999991040076691, iteration: 147470
loss: 1.0080081224441528,grad_norm: 0.9999991643356874, iteration: 147471
loss: 0.9876131415367126,grad_norm: 0.9172942358118216, iteration: 147472
loss: 1.0602487325668335,grad_norm: 0.999999565515506, iteration: 147473
loss: 1.0119191408157349,grad_norm: 0.9999991755092397, iteration: 147474
loss: 0.9972993731498718,grad_norm: 0.9428289938611754, iteration: 147475
loss: 1.0094839334487915,grad_norm: 0.931413098143714, iteration: 147476
loss: 1.0174081325531006,grad_norm: 0.9913565589957372, iteration: 147477
loss: 1.017877221107483,grad_norm: 0.9940674384418804, iteration: 147478
loss: 0.985297679901123,grad_norm: 0.9999992512990401, iteration: 147479
loss: 0.9955411553382874,grad_norm: 0.9936150334129238, iteration: 147480
loss: 1.0799800157546997,grad_norm: 0.9999992704771407, iteration: 147481
loss: 1.002571940422058,grad_norm: 0.9714537751663147, iteration: 147482
loss: 0.9476534128189087,grad_norm: 0.9051192510492285, iteration: 147483
loss: 1.0246613025665283,grad_norm: 0.9999990671936815, iteration: 147484
loss: 0.9855804443359375,grad_norm: 0.9728142733023782, iteration: 147485
loss: 0.9882824420928955,grad_norm: 0.9838679808780851, iteration: 147486
loss: 0.9730268120765686,grad_norm: 0.9148972646628539, iteration: 147487
loss: 0.9615523219108582,grad_norm: 0.9999989241196955, iteration: 147488
loss: 0.9727329611778259,grad_norm: 0.999999129843362, iteration: 147489
loss: 1.002191185951233,grad_norm: 0.999999098890586, iteration: 147490
loss: 1.0358268022537231,grad_norm: 0.9999992911818381, iteration: 147491
loss: 0.9840682744979858,grad_norm: 0.8669252152551357, iteration: 147492
loss: 0.9835705757141113,grad_norm: 0.8549187948859063, iteration: 147493
loss: 0.9932366609573364,grad_norm: 0.9999991429180066, iteration: 147494
loss: 0.9769165515899658,grad_norm: 0.821379335263796, iteration: 147495
loss: 0.9624365568161011,grad_norm: 0.9694306542746108, iteration: 147496
loss: 1.0054068565368652,grad_norm: 0.9999990321377233, iteration: 147497
loss: 1.0138944387435913,grad_norm: 0.9999991034879356, iteration: 147498
loss: 0.9881640672683716,grad_norm: 0.9999989063199276, iteration: 147499
loss: 1.0236855745315552,grad_norm: 0.9198055461941852, iteration: 147500
loss: 0.9623185396194458,grad_norm: 0.8909503292696023, iteration: 147501
loss: 0.9817594289779663,grad_norm: 0.9999990528966425, iteration: 147502
loss: 1.0029017925262451,grad_norm: 0.9999992731768609, iteration: 147503
loss: 0.9891496896743774,grad_norm: 0.9901703197152668, iteration: 147504
loss: 0.9962044954299927,grad_norm: 0.8456868571554033, iteration: 147505
loss: 0.9656385183334351,grad_norm: 0.9999991629917482, iteration: 147506
loss: 1.0118950605392456,grad_norm: 0.999999065446735, iteration: 147507
loss: 0.9945518374443054,grad_norm: 0.9370378398744197, iteration: 147508
loss: 0.9614495635032654,grad_norm: 0.9999990967622762, iteration: 147509
loss: 0.9893124103546143,grad_norm: 0.9176067458216158, iteration: 147510
loss: 1.0255666971206665,grad_norm: 0.9999989787958958, iteration: 147511
loss: 0.9839514493942261,grad_norm: 0.99999925034232, iteration: 147512
loss: 1.0102852582931519,grad_norm: 0.9999993530802446, iteration: 147513
loss: 0.9889088273048401,grad_norm: 0.9893662282041504, iteration: 147514
loss: 1.0239530801773071,grad_norm: 0.9999990798171726, iteration: 147515
loss: 0.9637362360954285,grad_norm: 0.9999991398477955, iteration: 147516
loss: 0.9910515546798706,grad_norm: 0.9999991262720503, iteration: 147517
loss: 1.004237413406372,grad_norm: 0.9999990399507782, iteration: 147518
loss: 1.0026918649673462,grad_norm: 0.9999991515130408, iteration: 147519
loss: 0.9851042032241821,grad_norm: 0.9999991856767322, iteration: 147520
loss: 0.9735524654388428,grad_norm: 0.9954502101552714, iteration: 147521
loss: 0.9845982789993286,grad_norm: 0.999999311700111, iteration: 147522
loss: 1.0113428831100464,grad_norm: 0.9999990907764279, iteration: 147523
loss: 1.0080565214157104,grad_norm: 0.9999990959426305, iteration: 147524
loss: 0.9609267711639404,grad_norm: 0.9650898595488143, iteration: 147525
loss: 0.9985502362251282,grad_norm: 0.9105572124934262, iteration: 147526
loss: 1.0206663608551025,grad_norm: 0.9130243661509558, iteration: 147527
loss: 1.022312879562378,grad_norm: 0.9999992779048976, iteration: 147528
loss: 0.9635490775108337,grad_norm: 0.9999990922948097, iteration: 147529
loss: 1.019608736038208,grad_norm: 0.9736206418949117, iteration: 147530
loss: 1.0119198560714722,grad_norm: 0.9999989777921786, iteration: 147531
loss: 1.0597825050354004,grad_norm: 0.9999991130885767, iteration: 147532
loss: 1.0161799192428589,grad_norm: 0.9957299275427193, iteration: 147533
loss: 1.015235424041748,grad_norm: 0.9999992189098741, iteration: 147534
loss: 1.0086286067962646,grad_norm: 0.9365148121619922, iteration: 147535
loss: 0.9911221265792847,grad_norm: 0.999999315198604, iteration: 147536
loss: 1.0091876983642578,grad_norm: 0.8784824834136827, iteration: 147537
loss: 0.9960751533508301,grad_norm: 0.9999990387015442, iteration: 147538
loss: 0.9775099754333496,grad_norm: 0.8762691597145144, iteration: 147539
loss: 0.9965566396713257,grad_norm: 0.999999085682599, iteration: 147540
loss: 0.9642558097839355,grad_norm: 0.846813194674055, iteration: 147541
loss: 0.9835497140884399,grad_norm: 0.9468014234286328, iteration: 147542
loss: 1.0497593879699707,grad_norm: 0.999999779123844, iteration: 147543
loss: 0.9925424456596375,grad_norm: 0.9999989627104963, iteration: 147544
loss: 0.9832025170326233,grad_norm: 0.9999990607528104, iteration: 147545
loss: 0.9648876190185547,grad_norm: 0.9351753659222446, iteration: 147546
loss: 0.9617546200752258,grad_norm: 0.9042423831963229, iteration: 147547
loss: 1.0041049718856812,grad_norm: 0.9999992653087327, iteration: 147548
loss: 0.9826287031173706,grad_norm: 0.9999991010465631, iteration: 147549
loss: 0.9931061267852783,grad_norm: 0.9143715842901128, iteration: 147550
loss: 0.9992160797119141,grad_norm: 0.9999991589445155, iteration: 147551
loss: 0.971351146697998,grad_norm: 0.9323877260962147, iteration: 147552
loss: 1.0264372825622559,grad_norm: 0.999999257825349, iteration: 147553
loss: 0.9948155283927917,grad_norm: 0.9900546416691036, iteration: 147554
loss: 1.0248202085494995,grad_norm: 0.999999108056853, iteration: 147555
loss: 1.0144270658493042,grad_norm: 0.9999991533098807, iteration: 147556
loss: 0.9845239520072937,grad_norm: 0.9999991846862515, iteration: 147557
loss: 0.9626560807228088,grad_norm: 0.9065607672101105, iteration: 147558
loss: 1.0297125577926636,grad_norm: 0.9999990753682898, iteration: 147559
loss: 1.0155106782913208,grad_norm: 0.9999992003731962, iteration: 147560
loss: 1.0267763137817383,grad_norm: 0.9559445986901417, iteration: 147561
loss: 1.0063124895095825,grad_norm: 0.9999990168907378, iteration: 147562
loss: 0.946094274520874,grad_norm: 0.9167106454848108, iteration: 147563
loss: 0.9732483625411987,grad_norm: 0.9070486279839188, iteration: 147564
loss: 1.0141655206680298,grad_norm: 0.9999992259206744, iteration: 147565
loss: 0.9733644127845764,grad_norm: 0.9999990417855793, iteration: 147566
loss: 0.9930752515792847,grad_norm: 0.894831916375472, iteration: 147567
loss: 0.9501210451126099,grad_norm: 0.999999098743928, iteration: 147568
loss: 1.0165411233901978,grad_norm: 0.9999992009314181, iteration: 147569
loss: 1.1305989027023315,grad_norm: 0.9999997483817745, iteration: 147570
loss: 1.029086947441101,grad_norm: 0.9999991812650784, iteration: 147571
loss: 0.9726175665855408,grad_norm: 0.999999258533224, iteration: 147572
loss: 0.9729233384132385,grad_norm: 0.7949729356245276, iteration: 147573
loss: 1.0568170547485352,grad_norm: 0.9999997659311582, iteration: 147574
loss: 1.0018690824508667,grad_norm: 0.9759624608662675, iteration: 147575
loss: 1.0074101686477661,grad_norm: 0.9147207609557795, iteration: 147576
loss: 1.0490344762802124,grad_norm: 0.9999992060461861, iteration: 147577
loss: 1.0198585987091064,grad_norm: 0.9999991034336488, iteration: 147578
loss: 0.9854699969291687,grad_norm: 0.9889976574098128, iteration: 147579
loss: 0.9887533187866211,grad_norm: 0.9803395073029629, iteration: 147580
loss: 0.9682166576385498,grad_norm: 0.9999991471469096, iteration: 147581
loss: 1.0404678583145142,grad_norm: 0.9999991617720092, iteration: 147582
loss: 0.9659903049468994,grad_norm: 0.9999997826938009, iteration: 147583
loss: 0.9792571067810059,grad_norm: 0.9842520984363758, iteration: 147584
loss: 0.966241717338562,grad_norm: 0.9999990505466626, iteration: 147585
loss: 1.0181207656860352,grad_norm: 0.9219730924001553, iteration: 147586
loss: 1.0018278360366821,grad_norm: 0.9999991552535425, iteration: 147587
loss: 0.9839133620262146,grad_norm: 0.9985045135899215, iteration: 147588
loss: 0.9782401919364929,grad_norm: 0.971634785538973, iteration: 147589
loss: 0.9887797236442566,grad_norm: 0.9270227823771069, iteration: 147590
loss: 1.0850188732147217,grad_norm: 0.9999993880379926, iteration: 147591
loss: 0.9946932196617126,grad_norm: 0.9999991301441834, iteration: 147592
loss: 1.0584166049957275,grad_norm: 0.9999997700847156, iteration: 147593
loss: 1.0087523460388184,grad_norm: 0.999999325361303, iteration: 147594
loss: 0.991832435131073,grad_norm: 0.9253930125929164, iteration: 147595
loss: 0.9931472539901733,grad_norm: 0.8510936593755244, iteration: 147596
loss: 0.9914552569389343,grad_norm: 0.8843853385592348, iteration: 147597
loss: 0.9876911044120789,grad_norm: 0.965086611222078, iteration: 147598
loss: 1.0084892511367798,grad_norm: 0.9999993238911552, iteration: 147599
loss: 1.012382984161377,grad_norm: 0.9999991556971771, iteration: 147600
loss: 0.9732394218444824,grad_norm: 0.9203441351018449, iteration: 147601
loss: 0.9766778349876404,grad_norm: 0.9999990285871387, iteration: 147602
loss: 0.9725944995880127,grad_norm: 0.9853066542178418, iteration: 147603
loss: 1.0050878524780273,grad_norm: 0.9999992000560117, iteration: 147604
loss: 1.0131237506866455,grad_norm: 0.9870667175136525, iteration: 147605
loss: 1.0417191982269287,grad_norm: 0.9999996005099924, iteration: 147606
loss: 0.9989685416221619,grad_norm: 0.9364131709680289, iteration: 147607
loss: 1.0216937065124512,grad_norm: 0.9999993252880675, iteration: 147608
loss: 1.0208706855773926,grad_norm: 0.9999992651344063, iteration: 147609
loss: 1.0130174160003662,grad_norm: 0.8757746370616132, iteration: 147610
loss: 0.9345201849937439,grad_norm: 0.9999991449781379, iteration: 147611
loss: 1.00520920753479,grad_norm: 0.9500407351905319, iteration: 147612
loss: 1.034816861152649,grad_norm: 0.999999302002315, iteration: 147613
loss: 1.0166690349578857,grad_norm: 0.9156019903290535, iteration: 147614
loss: 1.004526138305664,grad_norm: 0.8554345816138101, iteration: 147615
loss: 1.0075082778930664,grad_norm: 0.9999992304231043, iteration: 147616
loss: 0.9600774645805359,grad_norm: 0.9894737259950194, iteration: 147617
loss: 1.0275958776474,grad_norm: 0.8634455794768864, iteration: 147618
loss: 0.9804590940475464,grad_norm: 0.9999990161998883, iteration: 147619
loss: 1.0102275609970093,grad_norm: 0.946006287820337, iteration: 147620
loss: 1.0065414905548096,grad_norm: 0.9999991288840886, iteration: 147621
loss: 0.9984232783317566,grad_norm: 0.8376541319707534, iteration: 147622
loss: 1.0128990411758423,grad_norm: 0.9999993272367261, iteration: 147623
loss: 0.9966827034950256,grad_norm: 0.9999991528138303, iteration: 147624
loss: 1.0094481706619263,grad_norm: 0.9331360877270021, iteration: 147625
loss: 0.9817760586738586,grad_norm: 0.9433851415366822, iteration: 147626
loss: 1.0555509328842163,grad_norm: 0.9999991085827722, iteration: 147627
loss: 1.0045907497406006,grad_norm: 0.9343961752398051, iteration: 147628
loss: 1.022600769996643,grad_norm: 0.9218332153342156, iteration: 147629
loss: 1.037522315979004,grad_norm: 0.9999991570220448, iteration: 147630
loss: 1.0118802785873413,grad_norm: 0.9999990364396855, iteration: 147631
loss: 0.970332682132721,grad_norm: 0.9999993177980322, iteration: 147632
loss: 0.9854174852371216,grad_norm: 0.9381520108788567, iteration: 147633
loss: 1.0405946969985962,grad_norm: 0.9999991481564232, iteration: 147634
loss: 1.0133206844329834,grad_norm: 0.9025464529097728, iteration: 147635
loss: 1.038962721824646,grad_norm: 0.9999991270395243, iteration: 147636
loss: 1.004009485244751,grad_norm: 0.9422028443635467, iteration: 147637
loss: 1.0447361469268799,grad_norm: 0.9999992095759678, iteration: 147638
loss: 1.0180402994155884,grad_norm: 0.9999991900365538, iteration: 147639
loss: 0.9823741912841797,grad_norm: 0.9882070448774184, iteration: 147640
loss: 1.0230854749679565,grad_norm: 0.9999990996956148, iteration: 147641
loss: 1.0187047719955444,grad_norm: 0.9733587886389703, iteration: 147642
loss: 0.9788335561752319,grad_norm: 0.999999123197558, iteration: 147643
loss: 0.9824897050857544,grad_norm: 0.9999991172679258, iteration: 147644
loss: 1.0113755464553833,grad_norm: 0.9855659927338781, iteration: 147645
loss: 0.9822472929954529,grad_norm: 0.9633489789112024, iteration: 147646
loss: 1.0026886463165283,grad_norm: 0.9045842624560811, iteration: 147647
loss: 1.0206844806671143,grad_norm: 0.9999990739158982, iteration: 147648
loss: 1.0031288862228394,grad_norm: 0.9661717646850734, iteration: 147649
loss: 1.0275681018829346,grad_norm: 0.8950592707937365, iteration: 147650
loss: 1.0018327236175537,grad_norm: 0.999999330159372, iteration: 147651
loss: 1.0165221691131592,grad_norm: 0.9771800049798784, iteration: 147652
loss: 0.9918267726898193,grad_norm: 0.9999992888999671, iteration: 147653
loss: 1.0001095533370972,grad_norm: 0.9999993451110377, iteration: 147654
loss: 1.0072057247161865,grad_norm: 0.90514729398534, iteration: 147655
loss: 1.0890229940414429,grad_norm: 0.9999990415719759, iteration: 147656
loss: 0.9993665814399719,grad_norm: 0.8080124282876171, iteration: 147657
loss: 0.9911531209945679,grad_norm: 0.9999990289946606, iteration: 147658
loss: 1.0196573734283447,grad_norm: 0.9999993250088741, iteration: 147659
loss: 0.9795544147491455,grad_norm: 0.9999990143914568, iteration: 147660
loss: 0.985697329044342,grad_norm: 0.9999991984233434, iteration: 147661
loss: 0.962405264377594,grad_norm: 0.9547617463441973, iteration: 147662
loss: 0.9977442026138306,grad_norm: 0.9999990443774104, iteration: 147663
loss: 1.0749804973602295,grad_norm: 0.9999994324571855, iteration: 147664
loss: 1.0274286270141602,grad_norm: 0.9999990370396669, iteration: 147665
loss: 1.0354965925216675,grad_norm: 0.999999075307652, iteration: 147666
loss: 1.3113031387329102,grad_norm: 0.9999999317808336, iteration: 147667
loss: 0.9835766553878784,grad_norm: 0.8730465518971421, iteration: 147668
loss: 0.9988735914230347,grad_norm: 0.9999991107341686, iteration: 147669
loss: 1.0058621168136597,grad_norm: 0.9972405522097935, iteration: 147670
loss: 1.0143629312515259,grad_norm: 0.9999993222886785, iteration: 147671
loss: 0.979485273361206,grad_norm: 0.8998315465953446, iteration: 147672
loss: 1.0090934038162231,grad_norm: 0.8490253125714649, iteration: 147673
loss: 0.9770302176475525,grad_norm: 0.9999991966070896, iteration: 147674
loss: 1.0088194608688354,grad_norm: 0.9999991340754517, iteration: 147675
loss: 1.000149130821228,grad_norm: 0.999999320381564, iteration: 147676
loss: 1.0338941812515259,grad_norm: 0.9999999472138317, iteration: 147677
loss: 0.9805673360824585,grad_norm: 0.9269515075214906, iteration: 147678
loss: 1.0231001377105713,grad_norm: 0.9999991159937253, iteration: 147679
loss: 1.031360149383545,grad_norm: 0.999999256262318, iteration: 147680
loss: 0.9697948694229126,grad_norm: 0.8608446934836047, iteration: 147681
loss: 0.9502514004707336,grad_norm: 0.9385248033202772, iteration: 147682
loss: 1.013727068901062,grad_norm: 0.9999990431981111, iteration: 147683
loss: 1.011725664138794,grad_norm: 0.999999206715724, iteration: 147684
loss: 1.0480233430862427,grad_norm: 0.9999989289465124, iteration: 147685
loss: 0.963176965713501,grad_norm: 0.8987022660041708, iteration: 147686
loss: 1.0015244483947754,grad_norm: 0.8749863772215084, iteration: 147687
loss: 0.9458764791488647,grad_norm: 0.9999990616407842, iteration: 147688
loss: 0.992224931716919,grad_norm: 0.9999988876212942, iteration: 147689
loss: 0.9701316952705383,grad_norm: 0.9816237764661276, iteration: 147690
loss: 0.9755486845970154,grad_norm: 0.9998076356058665, iteration: 147691
loss: 1.0176339149475098,grad_norm: 0.9999992761284746, iteration: 147692
loss: 1.0111771821975708,grad_norm: 0.999999136791344, iteration: 147693
loss: 0.9941198825836182,grad_norm: 0.9999990207001456, iteration: 147694
loss: 1.022714614868164,grad_norm: 0.9999990790441002, iteration: 147695
loss: 1.0301105976104736,grad_norm: 0.973913822053, iteration: 147696
loss: 1.0261807441711426,grad_norm: 0.9836517083208017, iteration: 147697
loss: 1.0356104373931885,grad_norm: 0.9999993056225344, iteration: 147698
loss: 0.9986519813537598,grad_norm: 0.8096502210924115, iteration: 147699
loss: 1.002923846244812,grad_norm: 0.9999989831550411, iteration: 147700
loss: 1.032172679901123,grad_norm: 0.9835417459604529, iteration: 147701
loss: 0.9910542964935303,grad_norm: 0.9999991709327785, iteration: 147702
loss: 0.9912559986114502,grad_norm: 0.7917186489873788, iteration: 147703
loss: 1.0070127248764038,grad_norm: 0.8896291481723033, iteration: 147704
loss: 0.9920641183853149,grad_norm: 0.9912120828794555, iteration: 147705
loss: 1.026219367980957,grad_norm: 0.9999990653424206, iteration: 147706
loss: 1.0287187099456787,grad_norm: 0.9999991210834906, iteration: 147707
loss: 1.0084811449050903,grad_norm: 0.9999990303578679, iteration: 147708
loss: 0.9904578328132629,grad_norm: 0.9999989823539488, iteration: 147709
loss: 1.0412110090255737,grad_norm: 0.9999991413542687, iteration: 147710
loss: 1.014380693435669,grad_norm: 0.8911076975416172, iteration: 147711
loss: 1.0095734596252441,grad_norm: 0.9999991114849468, iteration: 147712
loss: 0.9951746463775635,grad_norm: 0.9400904332149177, iteration: 147713
loss: 0.9995917677879333,grad_norm: 0.9659831512901331, iteration: 147714
loss: 0.983765184879303,grad_norm: 0.999999201008176, iteration: 147715
loss: 0.9822221994400024,grad_norm: 0.9976626165951051, iteration: 147716
loss: 1.0013961791992188,grad_norm: 0.9999990996968662, iteration: 147717
loss: 0.9720917344093323,grad_norm: 0.9999992304114743, iteration: 147718
loss: 1.0101312398910522,grad_norm: 0.9999990666509938, iteration: 147719
loss: 0.9866535663604736,grad_norm: 0.9382538270242514, iteration: 147720
loss: 1.0038982629776,grad_norm: 0.8994892867628596, iteration: 147721
loss: 1.0270287990570068,grad_norm: 0.9578253537140813, iteration: 147722
loss: 1.0591069459915161,grad_norm: 0.9999996651048468, iteration: 147723
loss: 0.9756337404251099,grad_norm: 0.935218857094629, iteration: 147724
loss: 0.9718929529190063,grad_norm: 0.9999990729837547, iteration: 147725
loss: 1.0343490839004517,grad_norm: 0.9838665998699946, iteration: 147726
loss: 0.9923177361488342,grad_norm: 0.9999992927733425, iteration: 147727
loss: 1.0041680335998535,grad_norm: 0.9583649910869787, iteration: 147728
loss: 0.9691622257232666,grad_norm: 0.9828023923969663, iteration: 147729
loss: 0.9985204935073853,grad_norm: 0.8915172544006664, iteration: 147730
loss: 0.9734839797019958,grad_norm: 0.855183000811375, iteration: 147731
loss: 1.0360885858535767,grad_norm: 0.9999991226352476, iteration: 147732
loss: 0.9958202242851257,grad_norm: 0.9999991153659387, iteration: 147733
loss: 0.9957622289657593,grad_norm: 0.999999192379807, iteration: 147734
loss: 0.9899994730949402,grad_norm: 0.8733649792704233, iteration: 147735
loss: 1.01973295211792,grad_norm: 0.9999994208697428, iteration: 147736
loss: 1.0141375064849854,grad_norm: 0.9703201767000841, iteration: 147737
loss: 0.973672091960907,grad_norm: 0.8268995171035785, iteration: 147738
loss: 0.9916307330131531,grad_norm: 0.9549074889142406, iteration: 147739
loss: 0.9858827590942383,grad_norm: 0.943460536137686, iteration: 147740
loss: 1.0474287271499634,grad_norm: 0.9999991692417916, iteration: 147741
loss: 1.0282320976257324,grad_norm: 0.8944066795440224, iteration: 147742
loss: 0.9901577234268188,grad_norm: 0.9999992033988632, iteration: 147743
loss: 1.0130702257156372,grad_norm: 0.9999994102753149, iteration: 147744
loss: 0.9860003590583801,grad_norm: 0.906490261939398, iteration: 147745
loss: 1.0046555995941162,grad_norm: 0.9999996971975416, iteration: 147746
loss: 1.022194504737854,grad_norm: 0.9999993110975133, iteration: 147747
loss: 0.9693364500999451,grad_norm: 0.9999991840963235, iteration: 147748
loss: 1.0047955513000488,grad_norm: 0.8958174514355872, iteration: 147749
loss: 1.0029115676879883,grad_norm: 0.992239665419237, iteration: 147750
loss: 0.9666022658348083,grad_norm: 0.8647967445831007, iteration: 147751
loss: 0.9825414419174194,grad_norm: 0.9999989595774625, iteration: 147752
loss: 1.025749921798706,grad_norm: 0.9233582049128595, iteration: 147753
loss: 1.0018105506896973,grad_norm: 0.9999988968710596, iteration: 147754
loss: 0.9917799830436707,grad_norm: 0.9999992800213363, iteration: 147755
loss: 1.017407774925232,grad_norm: 0.9999992447320941, iteration: 147756
loss: 0.9845064282417297,grad_norm: 0.9144565457670246, iteration: 147757
loss: 0.9926090240478516,grad_norm: 0.9999991076299733, iteration: 147758
loss: 1.002648115158081,grad_norm: 0.9999990983709034, iteration: 147759
loss: 0.9977325797080994,grad_norm: 0.9999997193770284, iteration: 147760
loss: 0.9884938597679138,grad_norm: 0.9999992225602923, iteration: 147761
loss: 1.0004549026489258,grad_norm: 0.9999991153978657, iteration: 147762
loss: 1.16371750831604,grad_norm: 0.9999995389795999, iteration: 147763
loss: 1.0164611339569092,grad_norm: 0.9378037199030459, iteration: 147764
loss: 1.0239052772521973,grad_norm: 0.9999990136925164, iteration: 147765
loss: 0.978648841381073,grad_norm: 0.900812717673049, iteration: 147766
loss: 1.0003583431243896,grad_norm: 0.9999992321887994, iteration: 147767
loss: 1.0010201930999756,grad_norm: 0.9999991307496425, iteration: 147768
loss: 0.9891298413276672,grad_norm: 0.9616587061307067, iteration: 147769
loss: 0.9672495126724243,grad_norm: 0.9999990390621764, iteration: 147770
loss: 1.0239754915237427,grad_norm: 0.9933529555729412, iteration: 147771
loss: 0.991344690322876,grad_norm: 0.8683158848010032, iteration: 147772
loss: 1.0052074193954468,grad_norm: 0.9999990308984674, iteration: 147773
loss: 1.0208005905151367,grad_norm: 0.9040852324598868, iteration: 147774
loss: 0.9866726398468018,grad_norm: 0.8902542916022426, iteration: 147775
loss: 0.9704216122627258,grad_norm: 0.9999990741158121, iteration: 147776
loss: 1.0077104568481445,grad_norm: 0.9779876353759237, iteration: 147777
loss: 1.0401257276535034,grad_norm: 0.9999990425010477, iteration: 147778
loss: 0.9815078973770142,grad_norm: 0.9999990411929726, iteration: 147779
loss: 0.9956160187721252,grad_norm: 0.9999991234313479, iteration: 147780
loss: 1.0034315586090088,grad_norm: 0.9999991739999234, iteration: 147781
loss: 1.025792121887207,grad_norm: 0.9999990693003586, iteration: 147782
loss: 1.0251960754394531,grad_norm: 0.9139958464755339, iteration: 147783
loss: 1.00724196434021,grad_norm: 0.9999992366419299, iteration: 147784
loss: 0.9748753309249878,grad_norm: 0.9241537256405952, iteration: 147785
loss: 0.974877655506134,grad_norm: 0.9999989616292835, iteration: 147786
loss: 1.0447704792022705,grad_norm: 0.9999991834633034, iteration: 147787
loss: 0.9853047728538513,grad_norm: 0.9432539044400429, iteration: 147788
loss: 1.0271720886230469,grad_norm: 0.9999991553819224, iteration: 147789
loss: 1.0065535306930542,grad_norm: 0.9999990243716406, iteration: 147790
loss: 1.0150846242904663,grad_norm: 0.9999992868866079, iteration: 147791
loss: 1.0159938335418701,grad_norm: 0.8275852063087834, iteration: 147792
loss: 1.0252046585083008,grad_norm: 0.9999989372264727, iteration: 147793
loss: 1.0156216621398926,grad_norm: 0.9999994102085463, iteration: 147794
loss: 0.9930480122566223,grad_norm: 0.88142474489596, iteration: 147795
loss: 0.9954854249954224,grad_norm: 0.9999990054644341, iteration: 147796
loss: 0.9863372445106506,grad_norm: 0.999999227204727, iteration: 147797
loss: 0.9611629843711853,grad_norm: 0.9238734253349984, iteration: 147798
loss: 1.1248359680175781,grad_norm: 0.9999998931093754, iteration: 147799
loss: 1.000504493713379,grad_norm: 0.9938251242752045, iteration: 147800
loss: 0.9934645891189575,grad_norm: 0.9332531720995102, iteration: 147801
loss: 1.0037224292755127,grad_norm: 0.9999996823482027, iteration: 147802
loss: 0.9676759839057922,grad_norm: 0.9999990564510649, iteration: 147803
loss: 1.033420443534851,grad_norm: 0.999999368450012, iteration: 147804
loss: 0.9848668575286865,grad_norm: 0.9999990674091989, iteration: 147805
loss: 0.970731258392334,grad_norm: 0.9999991921479616, iteration: 147806
loss: 0.978047788143158,grad_norm: 0.9999992099493004, iteration: 147807
loss: 0.9999075531959534,grad_norm: 0.9999992308459211, iteration: 147808
loss: 1.0574465990066528,grad_norm: 0.9999990599499262, iteration: 147809
loss: 1.016804575920105,grad_norm: 0.9999991194715165, iteration: 147810
loss: 0.9723906517028809,grad_norm: 0.9825733119486351, iteration: 147811
loss: 1.000465750694275,grad_norm: 0.9999990250853097, iteration: 147812
loss: 1.005163311958313,grad_norm: 0.807578688286453, iteration: 147813
loss: 0.976669192314148,grad_norm: 0.975509200257084, iteration: 147814
loss: 1.0083980560302734,grad_norm: 0.9999991518018185, iteration: 147815
loss: 0.9963513016700745,grad_norm: 0.9999991167911292, iteration: 147816
loss: 1.1176661252975464,grad_norm: 0.9999993946302758, iteration: 147817
loss: 1.0439552068710327,grad_norm: 0.9999993125584483, iteration: 147818
loss: 0.983016312122345,grad_norm: 0.9999990228097453, iteration: 147819
loss: 1.0107755661010742,grad_norm: 0.8522857132253155, iteration: 147820
loss: 1.0062569379806519,grad_norm: 0.9999990334500857, iteration: 147821
loss: 0.9749115705490112,grad_norm: 0.9674273216248178, iteration: 147822
loss: 0.9994047284126282,grad_norm: 0.9999992415807131, iteration: 147823
loss: 0.9986953139305115,grad_norm: 0.9449297768292129, iteration: 147824
loss: 1.017817497253418,grad_norm: 0.9999994823666204, iteration: 147825
loss: 0.9960101246833801,grad_norm: 0.995238821728936, iteration: 147826
loss: 0.9736927151679993,grad_norm: 0.9999990555512346, iteration: 147827
loss: 0.990710973739624,grad_norm: 0.9827498882760526, iteration: 147828
loss: 1.0070059299468994,grad_norm: 0.9999991348194935, iteration: 147829
loss: 1.0167824029922485,grad_norm: 0.9614832685647275, iteration: 147830
loss: 0.998742401599884,grad_norm: 0.9999991625624594, iteration: 147831
loss: 0.9798853993415833,grad_norm: 0.9612145112688603, iteration: 147832
loss: 1.0192705392837524,grad_norm: 0.9999989055425199, iteration: 147833
loss: 0.9781820774078369,grad_norm: 0.8418377701222955, iteration: 147834
loss: 0.9689538478851318,grad_norm: 0.9423252854026943, iteration: 147835
loss: 1.0072921514511108,grad_norm: 0.9750685927363558, iteration: 147836
loss: 0.9480555057525635,grad_norm: 0.8352158962168839, iteration: 147837
loss: 1.0026811361312866,grad_norm: 0.9836179275165955, iteration: 147838
loss: 0.9897732734680176,grad_norm: 0.9999989922105021, iteration: 147839
loss: 0.977453887462616,grad_norm: 0.9999991165649541, iteration: 147840
loss: 1.000467300415039,grad_norm: 0.9999991646009764, iteration: 147841
loss: 1.0043199062347412,grad_norm: 0.7844194675551243, iteration: 147842
loss: 0.9812747240066528,grad_norm: 0.9999993859336666, iteration: 147843
loss: 0.9845889806747437,grad_norm: 0.9999990880222644, iteration: 147844
loss: 1.0135389566421509,grad_norm: 0.9817728903732764, iteration: 147845
loss: 0.9783751368522644,grad_norm: 0.8859359117322252, iteration: 147846
loss: 0.9836918711662292,grad_norm: 0.9999990246112146, iteration: 147847
loss: 1.0026071071624756,grad_norm: 0.9999995091880538, iteration: 147848
loss: 1.0394104719161987,grad_norm: 0.9999992072974808, iteration: 147849
loss: 0.9885815382003784,grad_norm: 0.9999990936668638, iteration: 147850
loss: 1.026444673538208,grad_norm: 0.8967012743082515, iteration: 147851
loss: 0.9708415269851685,grad_norm: 0.9412034753711349, iteration: 147852
loss: 1.0082851648330688,grad_norm: 0.9999997793715654, iteration: 147853
loss: 0.9957072138786316,grad_norm: 0.8883967586478614, iteration: 147854
loss: 0.9790667295455933,grad_norm: 0.967888126747296, iteration: 147855
loss: 0.9858136177062988,grad_norm: 0.9054483249073028, iteration: 147856
loss: 1.030873417854309,grad_norm: 0.9999995890422445, iteration: 147857
loss: 1.0369305610656738,grad_norm: 0.9999991888826247, iteration: 147858
loss: 1.0179638862609863,grad_norm: 0.9614110091196159, iteration: 147859
loss: 0.9863110184669495,grad_norm: 0.9999990944569701, iteration: 147860
loss: 1.0093371868133545,grad_norm: 0.9999990878162245, iteration: 147861
loss: 0.9698922038078308,grad_norm: 0.999999114574281, iteration: 147862
loss: 0.9854676723480225,grad_norm: 0.9999990076820711, iteration: 147863
loss: 1.005623459815979,grad_norm: 0.974218706004694, iteration: 147864
loss: 0.971612811088562,grad_norm: 0.9999992172327796, iteration: 147865
loss: 1.012579321861267,grad_norm: 0.9999989378539063, iteration: 147866
loss: 0.9999649524688721,grad_norm: 0.9999991359821198, iteration: 147867
loss: 0.9442493319511414,grad_norm: 0.9999991677662152, iteration: 147868
loss: 1.0092226266860962,grad_norm: 0.9547122459758349, iteration: 147869
loss: 1.0092840194702148,grad_norm: 0.9999990242959098, iteration: 147870
loss: 0.9910317063331604,grad_norm: 0.8804204509965575, iteration: 147871
loss: 0.9766712188720703,grad_norm: 0.9999989573787181, iteration: 147872
loss: 0.9806771278381348,grad_norm: 0.9999990675000634, iteration: 147873
loss: 1.0204159021377563,grad_norm: 0.9999991454519216, iteration: 147874
loss: 0.9982336163520813,grad_norm: 0.9999990603069587, iteration: 147875
loss: 1.0064362287521362,grad_norm: 0.9213961403137675, iteration: 147876
loss: 1.0000568628311157,grad_norm: 0.8516139205150709, iteration: 147877
loss: 1.0673106908798218,grad_norm: 0.9999991752375562, iteration: 147878
loss: 0.9723393321037292,grad_norm: 0.9999992199047611, iteration: 147879
loss: 0.9941899180412292,grad_norm: 0.9740399156255569, iteration: 147880
loss: 1.0282645225524902,grad_norm: 0.9314580108678342, iteration: 147881
loss: 1.007834792137146,grad_norm: 0.9565246902770652, iteration: 147882
loss: 1.0007891654968262,grad_norm: 0.9867090453204195, iteration: 147883
loss: 0.9830960035324097,grad_norm: 0.999999434124136, iteration: 147884
loss: 0.9974642992019653,grad_norm: 0.8115043138328408, iteration: 147885
loss: 0.9735263586044312,grad_norm: 0.9999991555626813, iteration: 147886
loss: 1.043303370475769,grad_norm: 0.9999991541811973, iteration: 147887
loss: 1.0093159675598145,grad_norm: 0.9999990352112367, iteration: 147888
loss: 1.0267449617385864,grad_norm: 0.9999988674006492, iteration: 147889
loss: 1.0145559310913086,grad_norm: 0.999998877762929, iteration: 147890
loss: 1.0033258199691772,grad_norm: 0.9239047586519888, iteration: 147891
loss: 1.0146853923797607,grad_norm: 0.999999247768697, iteration: 147892
loss: 1.0006104707717896,grad_norm: 0.9279584712543504, iteration: 147893
loss: 1.0226458311080933,grad_norm: 0.9999999298056499, iteration: 147894
loss: 0.9915338754653931,grad_norm: 0.9381590553734671, iteration: 147895
loss: 1.0559171438217163,grad_norm: 0.999999084814272, iteration: 147896
loss: 0.9980781674385071,grad_norm: 0.9999990874965399, iteration: 147897
loss: 0.9924855828285217,grad_norm: 0.993297548034323, iteration: 147898
loss: 0.9981716275215149,grad_norm: 0.9663290891743027, iteration: 147899
loss: 0.9979537725448608,grad_norm: 0.9955758699742339, iteration: 147900
loss: 1.0301891565322876,grad_norm: 0.9695396197029638, iteration: 147901
loss: 0.9847133755683899,grad_norm: 0.932377147004118, iteration: 147902
loss: 0.9653366208076477,grad_norm: 0.9999990053394067, iteration: 147903
loss: 0.9768025279045105,grad_norm: 0.9999990818578497, iteration: 147904
loss: 1.0094501972198486,grad_norm: 0.943929953300684, iteration: 147905
loss: 1.0008774995803833,grad_norm: 0.9915629781476653, iteration: 147906
loss: 0.9783899188041687,grad_norm: 0.999999338848109, iteration: 147907
loss: 1.0108633041381836,grad_norm: 0.8822602284573552, iteration: 147908
loss: 0.9894344806671143,grad_norm: 0.9297882344884223, iteration: 147909
loss: 1.005637526512146,grad_norm: 0.9908197926279042, iteration: 147910
loss: 1.0081852674484253,grad_norm: 0.9999990988845411, iteration: 147911
loss: 1.0006396770477295,grad_norm: 0.9999992269947248, iteration: 147912
loss: 0.9899569749832153,grad_norm: 0.8714285433643705, iteration: 147913
loss: 0.9774374961853027,grad_norm: 0.9999990717054771, iteration: 147914
loss: 0.9800669550895691,grad_norm: 0.9999991873666285, iteration: 147915
loss: 1.0477603673934937,grad_norm: 0.9999992924874326, iteration: 147916
loss: 0.9987747073173523,grad_norm: 0.999999075788582, iteration: 147917
loss: 0.9886812567710876,grad_norm: 0.8462286495697456, iteration: 147918
loss: 1.0355844497680664,grad_norm: 0.999998911492108, iteration: 147919
loss: 1.0612857341766357,grad_norm: 0.9635247170388281, iteration: 147920
loss: 1.047858715057373,grad_norm: 0.9999990246290412, iteration: 147921
loss: 1.0013409852981567,grad_norm: 0.8262917827606499, iteration: 147922
loss: 1.01478910446167,grad_norm: 0.9008230607828458, iteration: 147923
loss: 0.9911327958106995,grad_norm: 0.9999992527171884, iteration: 147924
loss: 0.9648954272270203,grad_norm: 0.9298437197829713, iteration: 147925
loss: 1.004173994064331,grad_norm: 0.9999992692421552, iteration: 147926
loss: 0.990421712398529,grad_norm: 0.9813023509611256, iteration: 147927
loss: 0.9847539663314819,grad_norm: 0.999999227757922, iteration: 147928
loss: 1.0718809366226196,grad_norm: 0.9999998456753659, iteration: 147929
loss: 0.9492016434669495,grad_norm: 0.9253373438628146, iteration: 147930
loss: 0.9722056984901428,grad_norm: 0.999999084078312, iteration: 147931
loss: 0.9878216981887817,grad_norm: 0.999999149234758, iteration: 147932
loss: 1.022707462310791,grad_norm: 0.9125341338879269, iteration: 147933
loss: 1.0092742443084717,grad_norm: 0.9999992117147468, iteration: 147934
loss: 0.9972369074821472,grad_norm: 0.9999990871588218, iteration: 147935
loss: 0.9582505822181702,grad_norm: 0.9999989489615271, iteration: 147936
loss: 1.0087506771087646,grad_norm: 0.9999990986705928, iteration: 147937
loss: 0.9905439019203186,grad_norm: 0.9541620545059204, iteration: 147938
loss: 0.9963060617446899,grad_norm: 0.8911469385376635, iteration: 147939
loss: 0.9990264177322388,grad_norm: 0.9077253955399506, iteration: 147940
loss: 1.013384461402893,grad_norm: 0.9990127865637808, iteration: 147941
loss: 0.9949527978897095,grad_norm: 0.9999990812495569, iteration: 147942
loss: 0.9966449737548828,grad_norm: 0.9250066231957075, iteration: 147943
loss: 1.0213884115219116,grad_norm: 0.9999991129820278, iteration: 147944
loss: 1.0180418491363525,grad_norm: 0.9999994183090255, iteration: 147945
loss: 1.0077083110809326,grad_norm: 0.999999166053756, iteration: 147946
loss: 0.9649311900138855,grad_norm: 0.8306319991244048, iteration: 147947
loss: 0.9933847188949585,grad_norm: 0.999999256741249, iteration: 147948
loss: 0.9846799969673157,grad_norm: 0.9741371771313546, iteration: 147949
loss: 1.0249162912368774,grad_norm: 0.999998981457765, iteration: 147950
loss: 0.9568219184875488,grad_norm: 0.891791530971409, iteration: 147951
loss: 1.0187307596206665,grad_norm: 0.8839371162618808, iteration: 147952
loss: 1.0061179399490356,grad_norm: 0.9999991653961906, iteration: 147953
loss: 1.0036070346832275,grad_norm: 0.9074347037246616, iteration: 147954
loss: 1.001829981803894,grad_norm: 0.9999992632386008, iteration: 147955
loss: 1.0074949264526367,grad_norm: 0.9999991250496223, iteration: 147956
loss: 0.9930452704429626,grad_norm: 0.9561833937031954, iteration: 147957
loss: 0.996468722820282,grad_norm: 0.9999992296586981, iteration: 147958
loss: 0.9836154580116272,grad_norm: 0.9870402056506935, iteration: 147959
loss: 0.9754958748817444,grad_norm: 0.933442468659764, iteration: 147960
loss: 0.9550401568412781,grad_norm: 0.8926916188388778, iteration: 147961
loss: 1.0349688529968262,grad_norm: 0.9999991250588962, iteration: 147962
loss: 1.0247269868850708,grad_norm: 0.9700106334023764, iteration: 147963
loss: 1.003553032875061,grad_norm: 0.8682316379500644, iteration: 147964
loss: 1.024552583694458,grad_norm: 0.9999991853250773, iteration: 147965
loss: 1.0168952941894531,grad_norm: 0.9999990574682323, iteration: 147966
loss: 1.0064789056777954,grad_norm: 0.9999989759347034, iteration: 147967
loss: 1.0339282751083374,grad_norm: 0.999999603981346, iteration: 147968
loss: 1.0195571184158325,grad_norm: 0.8917831696979961, iteration: 147969
loss: 1.0096614360809326,grad_norm: 0.946968775313225, iteration: 147970
loss: 0.9984815716743469,grad_norm: 0.9999992064918236, iteration: 147971
loss: 1.0011005401611328,grad_norm: 0.9660957675204811, iteration: 147972
loss: 1.0087815523147583,grad_norm: 0.9999991143712861, iteration: 147973
loss: 1.0070198774337769,grad_norm: 0.9591205133149058, iteration: 147974
loss: 0.9882189035415649,grad_norm: 0.9496077117958281, iteration: 147975
loss: 1.0210694074630737,grad_norm: 0.9256545487136685, iteration: 147976
loss: 0.992928683757782,grad_norm: 0.947356671714107, iteration: 147977
loss: 1.003151297569275,grad_norm: 0.915669270737681, iteration: 147978
loss: 1.0020686388015747,grad_norm: 0.8860343143919819, iteration: 147979
loss: 0.9677309393882751,grad_norm: 0.999999176802659, iteration: 147980
loss: 1.014466643333435,grad_norm: 0.9999990860586943, iteration: 147981
loss: 0.9799121618270874,grad_norm: 0.9318780493761913, iteration: 147982
loss: 1.0014103651046753,grad_norm: 0.9999990553219519, iteration: 147983
loss: 0.9977328777313232,grad_norm: 0.9999991297204797, iteration: 147984
loss: 1.0161556005477905,grad_norm: 0.9879334193289238, iteration: 147985
loss: 1.0273702144622803,grad_norm: 0.9999989999524959, iteration: 147986
loss: 1.0169743299484253,grad_norm: 0.9248897035912658, iteration: 147987
loss: 1.0052804946899414,grad_norm: 0.999999137319945, iteration: 147988
loss: 0.9999808669090271,grad_norm: 0.9411756453966741, iteration: 147989
loss: 0.9896858930587769,grad_norm: 0.8850654401085984, iteration: 147990
loss: 0.9978877902030945,grad_norm: 0.9995945629516843, iteration: 147991
loss: 0.9758479595184326,grad_norm: 0.9999991124116027, iteration: 147992
loss: 1.008378505706787,grad_norm: 0.9999991293518081, iteration: 147993
loss: 1.004667043685913,grad_norm: 0.9725992002754301, iteration: 147994
loss: 0.9755582809448242,grad_norm: 0.8603663500912844, iteration: 147995
loss: 1.05527925491333,grad_norm: 0.9999993753984022, iteration: 147996
loss: 1.0169771909713745,grad_norm: 0.8436275228468967, iteration: 147997
loss: 0.9792017936706543,grad_norm: 0.9999991294533448, iteration: 147998
loss: 1.0244249105453491,grad_norm: 0.9999992024054194, iteration: 147999
loss: 0.9497848749160767,grad_norm: 0.9999990802847483, iteration: 148000
loss: 1.002151370048523,grad_norm: 0.9999992487328417, iteration: 148001
loss: 1.0022013187408447,grad_norm: 0.8858822217930478, iteration: 148002
loss: 0.9829212427139282,grad_norm: 0.9157778649783496, iteration: 148003
loss: 0.974131166934967,grad_norm: 0.8752960637676179, iteration: 148004
loss: 0.9657411575317383,grad_norm: 0.999999209973223, iteration: 148005
loss: 1.0111136436462402,grad_norm: 0.9999991119903325, iteration: 148006
loss: 0.9442898631095886,grad_norm: 0.999999018333031, iteration: 148007
loss: 0.9845476746559143,grad_norm: 0.9999989880898921, iteration: 148008
loss: 1.0003139972686768,grad_norm: 0.9999990645320072, iteration: 148009
loss: 0.9871170520782471,grad_norm: 0.9999989377759904, iteration: 148010
loss: 1.018778681755066,grad_norm: 0.9999989659860884, iteration: 148011
loss: 1.0670446157455444,grad_norm: 0.9999991018397324, iteration: 148012
loss: 0.9746878743171692,grad_norm: 0.7124825337229528, iteration: 148013
loss: 0.9589042663574219,grad_norm: 0.8881595852050562, iteration: 148014
loss: 1.0167843103408813,grad_norm: 0.916866656489815, iteration: 148015
loss: 1.0109753608703613,grad_norm: 0.9999998821083386, iteration: 148016
loss: 0.9842972755432129,grad_norm: 0.9999990451946741, iteration: 148017
loss: 0.9933448433876038,grad_norm: 0.9999990543106688, iteration: 148018
loss: 0.9937273263931274,grad_norm: 0.9999990125557594, iteration: 148019
loss: 1.0163722038269043,grad_norm: 0.9971813863424489, iteration: 148020
loss: 1.0186551809310913,grad_norm: 0.9972410782868502, iteration: 148021
loss: 1.0099680423736572,grad_norm: 0.9298648605996891, iteration: 148022
loss: 0.9900678396224976,grad_norm: 0.9999991416315731, iteration: 148023
loss: 0.9920799732208252,grad_norm: 0.99999929778227, iteration: 148024
loss: 0.9943040609359741,grad_norm: 0.9143696228244217, iteration: 148025
loss: 1.0024504661560059,grad_norm: 0.999998934982036, iteration: 148026
loss: 1.0471127033233643,grad_norm: 0.9999993299467551, iteration: 148027
loss: 1.0243757963180542,grad_norm: 0.821652369074915, iteration: 148028
loss: 0.9674739241600037,grad_norm: 0.9999989639285093, iteration: 148029
loss: 1.014926552772522,grad_norm: 0.9999992561131824, iteration: 148030
loss: 1.0102421045303345,grad_norm: 0.9999989470319565, iteration: 148031
loss: 1.0150084495544434,grad_norm: 0.9909769597728759, iteration: 148032
loss: 1.0038608312606812,grad_norm: 0.9999997175661363, iteration: 148033
loss: 1.0037842988967896,grad_norm: 0.9999990171352162, iteration: 148034
loss: 0.9940058588981628,grad_norm: 0.8098240229049333, iteration: 148035
loss: 1.0012513399124146,grad_norm: 0.8375028031956832, iteration: 148036
loss: 1.009994387626648,grad_norm: 0.9999990208010147, iteration: 148037
loss: 0.96282958984375,grad_norm: 0.9999991380293043, iteration: 148038
loss: 1.0199519395828247,grad_norm: 0.999999275773104, iteration: 148039
loss: 1.0271427631378174,grad_norm: 0.9999993145660044, iteration: 148040
loss: 1.0046635866165161,grad_norm: 0.9999992285781774, iteration: 148041
loss: 1.0279573202133179,grad_norm: 0.9999991725515115, iteration: 148042
loss: 0.9975114464759827,grad_norm: 0.9999991241764111, iteration: 148043
loss: 0.9816336035728455,grad_norm: 0.9388369028419965, iteration: 148044
loss: 0.9658159017562866,grad_norm: 0.9999990765272319, iteration: 148045
loss: 0.9727497100830078,grad_norm: 0.9557227974278042, iteration: 148046
loss: 1.0289051532745361,grad_norm: 0.9999991567809218, iteration: 148047
loss: 0.9934156537055969,grad_norm: 0.9999989744231143, iteration: 148048
loss: 1.0203503370285034,grad_norm: 0.9999992205880119, iteration: 148049
loss: 1.083695650100708,grad_norm: 0.9999993250113536, iteration: 148050
loss: 1.0133212804794312,grad_norm: 0.9999999420653343, iteration: 148051
loss: 1.0048686265945435,grad_norm: 0.8111936159812819, iteration: 148052
loss: 0.9852470755577087,grad_norm: 0.9999990806788033, iteration: 148053
loss: 0.9915894865989685,grad_norm: 0.7806733926358957, iteration: 148054
loss: 0.9785354137420654,grad_norm: 0.999999174488706, iteration: 148055
loss: 0.9709621071815491,grad_norm: 0.9935179552868894, iteration: 148056
loss: 1.0274513959884644,grad_norm: 0.9999989857561095, iteration: 148057
loss: 0.9716551303863525,grad_norm: 0.9999991743669523, iteration: 148058
loss: 0.9992476105690002,grad_norm: 0.9999990819797167, iteration: 148059
loss: 1.019002079963684,grad_norm: 0.9999990058611491, iteration: 148060
loss: 0.9998192191123962,grad_norm: 0.9999991378771569, iteration: 148061
loss: 1.0104678869247437,grad_norm: 0.9999990519102415, iteration: 148062
loss: 0.980457067489624,grad_norm: 0.9177023047350801, iteration: 148063
loss: 0.9760392308235168,grad_norm: 0.9999990595659957, iteration: 148064
loss: 1.0511494874954224,grad_norm: 0.9999992183963233, iteration: 148065
loss: 1.1017037630081177,grad_norm: 0.9999995670252447, iteration: 148066
loss: 1.0028570890426636,grad_norm: 0.9999990746155364, iteration: 148067
loss: 1.016663908958435,grad_norm: 0.9999996678264312, iteration: 148068
loss: 0.9979001879692078,grad_norm: 0.9999993297445272, iteration: 148069
loss: 1.0499963760375977,grad_norm: 0.9807730104119728, iteration: 148070
loss: 1.007336139678955,grad_norm: 0.8789425132074834, iteration: 148071
loss: 0.9640599489212036,grad_norm: 0.9999990015055281, iteration: 148072
loss: 0.989154577255249,grad_norm: 0.9999993998541424, iteration: 148073
loss: 1.0273274183273315,grad_norm: 0.9999992220803249, iteration: 148074
loss: 0.9891411662101746,grad_norm: 0.8481562891602658, iteration: 148075
loss: 1.013022541999817,grad_norm: 0.9886360694248283, iteration: 148076
loss: 1.0060675144195557,grad_norm: 0.9999992820398309, iteration: 148077
loss: 1.0520985126495361,grad_norm: 0.9999992904718389, iteration: 148078
loss: 0.9698349833488464,grad_norm: 0.999999179162985, iteration: 148079
loss: 0.9824444651603699,grad_norm: 0.9999991903122527, iteration: 148080
loss: 1.0184913873672485,grad_norm: 0.9999991772695397, iteration: 148081
loss: 1.0026733875274658,grad_norm: 0.9297239520357278, iteration: 148082
loss: 0.9680771231651306,grad_norm: 0.9999991219968413, iteration: 148083
loss: 0.9752845168113708,grad_norm: 0.9999995743043721, iteration: 148084
loss: 0.9882550239562988,grad_norm: 0.9305882326240746, iteration: 148085
loss: 1.0261069536209106,grad_norm: 0.9999990827958303, iteration: 148086
loss: 1.0097917318344116,grad_norm: 0.9174618056117689, iteration: 148087
loss: 1.0506088733673096,grad_norm: 0.999999108602772, iteration: 148088
loss: 1.0080214738845825,grad_norm: 0.9621715112810328, iteration: 148089
loss: 0.990943431854248,grad_norm: 0.9999991141614908, iteration: 148090
loss: 0.9551216959953308,grad_norm: 0.9999992087900196, iteration: 148091
loss: 1.0481888055801392,grad_norm: 0.9999991182840755, iteration: 148092
loss: 1.0291225910186768,grad_norm: 0.999999217996783, iteration: 148093
loss: 1.039459466934204,grad_norm: 0.9999991775927004, iteration: 148094
loss: 0.9970963597297668,grad_norm: 0.9999997437331088, iteration: 148095
loss: 0.9830927848815918,grad_norm: 0.9999990839773283, iteration: 148096
loss: 0.9988568425178528,grad_norm: 0.9999989862843353, iteration: 148097
loss: 0.9819286465644836,grad_norm: 0.9999992860143111, iteration: 148098
loss: 1.0120021104812622,grad_norm: 0.9999991168495576, iteration: 148099
loss: 0.99728924036026,grad_norm: 0.7873127202063167, iteration: 148100
loss: 1.0409551858901978,grad_norm: 0.9999992578829342, iteration: 148101
loss: 0.9979633688926697,grad_norm: 0.9556173486512839, iteration: 148102
loss: 0.9782145023345947,grad_norm: 0.9650464035359975, iteration: 148103
loss: 0.9971321225166321,grad_norm: 0.9999991714667515, iteration: 148104
loss: 0.991066575050354,grad_norm: 0.9999990687617027, iteration: 148105
loss: 0.9529261589050293,grad_norm: 0.9999991851178772, iteration: 148106
loss: 0.9713941216468811,grad_norm: 0.9999990360944639, iteration: 148107
loss: 1.0141267776489258,grad_norm: 0.9999990430433958, iteration: 148108
loss: 0.9999012351036072,grad_norm: 0.999999213437134, iteration: 148109
loss: 1.0098508596420288,grad_norm: 0.9999992719556873, iteration: 148110
loss: 1.0310282707214355,grad_norm: 0.9999990761959922, iteration: 148111
loss: 0.968451976776123,grad_norm: 0.9559153793673208, iteration: 148112
loss: 0.9519892930984497,grad_norm: 0.8452608702071628, iteration: 148113
loss: 1.0198156833648682,grad_norm: 0.9999991766683892, iteration: 148114
loss: 0.9955864548683167,grad_norm: 0.9072832997215255, iteration: 148115
loss: 0.987528920173645,grad_norm: 0.9534757767674139, iteration: 148116
loss: 1.026486873626709,grad_norm: 0.9999996577199561, iteration: 148117
loss: 1.0226540565490723,grad_norm: 0.999998996318758, iteration: 148118
loss: 1.0008689165115356,grad_norm: 0.999999162885899, iteration: 148119
loss: 1.0141196250915527,grad_norm: 0.999999020759796, iteration: 148120
loss: 1.0141022205352783,grad_norm: 0.9707896939726536, iteration: 148121
loss: 0.9760589003562927,grad_norm: 0.9561776022932003, iteration: 148122
loss: 1.0098645687103271,grad_norm: 0.9859021243077667, iteration: 148123
loss: 0.9589748978614807,grad_norm: 0.9932279404527969, iteration: 148124
loss: 0.9841393232345581,grad_norm: 0.8243558732687502, iteration: 148125
loss: 1.0099183320999146,grad_norm: 0.9932356063288272, iteration: 148126
loss: 0.99835205078125,grad_norm: 0.9522160663343223, iteration: 148127
loss: 1.0330839157104492,grad_norm: 0.9999991339605395, iteration: 148128
loss: 0.9826537370681763,grad_norm: 0.9999990925112714, iteration: 148129
loss: 1.0008665323257446,grad_norm: 0.9282928375741231, iteration: 148130
loss: 1.1063374280929565,grad_norm: 0.9999999016620286, iteration: 148131
loss: 1.0170611143112183,grad_norm: 0.9882843694266139, iteration: 148132
loss: 0.9876448512077332,grad_norm: 0.999999134719742, iteration: 148133
loss: 0.9690191745758057,grad_norm: 0.9999991924959813, iteration: 148134
loss: 1.035107970237732,grad_norm: 0.9999990535746884, iteration: 148135
loss: 0.9917870163917542,grad_norm: 0.8795925320774022, iteration: 148136
loss: 1.007529854774475,grad_norm: 0.9999994123972255, iteration: 148137
loss: 1.0299469232559204,grad_norm: 0.9999990739828573, iteration: 148138
loss: 1.023943305015564,grad_norm: 0.871100881284132, iteration: 148139
loss: 1.0009790658950806,grad_norm: 0.8754564518571978, iteration: 148140
loss: 1.1238967180252075,grad_norm: 0.9999991929206556, iteration: 148141
loss: 0.9776337146759033,grad_norm: 0.9999990356220155, iteration: 148142
loss: 0.991955578327179,grad_norm: 0.9492205266648875, iteration: 148143
loss: 0.9852506518363953,grad_norm: 0.8562308422997319, iteration: 148144
loss: 1.013498067855835,grad_norm: 0.9999992939816077, iteration: 148145
loss: 1.007582187652588,grad_norm: 0.9999991903762627, iteration: 148146
loss: 0.9740367531776428,grad_norm: 0.9678577174261163, iteration: 148147
loss: 1.0223816633224487,grad_norm: 0.9483325263740751, iteration: 148148
loss: 0.994391679763794,grad_norm: 0.9999992357725124, iteration: 148149
loss: 1.0511493682861328,grad_norm: 0.9999992272043, iteration: 148150
loss: 1.0043648481369019,grad_norm: 0.9999989970119159, iteration: 148151
loss: 1.0052666664123535,grad_norm: 0.9305889281531993, iteration: 148152
loss: 0.9505090117454529,grad_norm: 0.9709899125586301, iteration: 148153
loss: 1.0288808345794678,grad_norm: 0.9999999481811164, iteration: 148154
loss: 0.9875587224960327,grad_norm: 0.999999582734008, iteration: 148155
loss: 0.9851206541061401,grad_norm: 0.9393712579300053, iteration: 148156
loss: 0.9839729070663452,grad_norm: 0.9999991453736801, iteration: 148157
loss: 0.9805469512939453,grad_norm: 0.9241816844632814, iteration: 148158
loss: 1.0007684230804443,grad_norm: 0.999999092561402, iteration: 148159
loss: 0.9945269227027893,grad_norm: 0.9210331804057473, iteration: 148160
loss: 0.9896283149719238,grad_norm: 0.7682543179387656, iteration: 148161
loss: 1.01458740234375,grad_norm: 0.9999990853338577, iteration: 148162
loss: 1.0224133729934692,grad_norm: 0.9999991758286096, iteration: 148163
loss: 0.9877220988273621,grad_norm: 0.9999991189407205, iteration: 148164
loss: 0.9822129011154175,grad_norm: 0.9415172335993561, iteration: 148165
loss: 0.9942294955253601,grad_norm: 0.9999998534694912, iteration: 148166
loss: 1.0519318580627441,grad_norm: 0.9999998601764148, iteration: 148167
loss: 1.0020811557769775,grad_norm: 0.9999998147576707, iteration: 148168
loss: 0.9634621143341064,grad_norm: 0.9999991279721842, iteration: 148169
loss: 0.9948105216026306,grad_norm: 0.9402942198104834, iteration: 148170
loss: 1.0181561708450317,grad_norm: 0.9059369014370984, iteration: 148171
loss: 0.9848779439926147,grad_norm: 0.8250620955067791, iteration: 148172
loss: 0.9942517876625061,grad_norm: 0.9999990905330526, iteration: 148173
loss: 0.9945810437202454,grad_norm: 0.9999991642250797, iteration: 148174
loss: 0.9829595685005188,grad_norm: 0.999999186322595, iteration: 148175
loss: 0.9820857048034668,grad_norm: 0.9559482728579641, iteration: 148176
loss: 0.9924576878547668,grad_norm: 0.9999991344082786, iteration: 148177
loss: 0.9886969327926636,grad_norm: 0.9999989006024552, iteration: 148178
loss: 0.9759666323661804,grad_norm: 0.9675988567710881, iteration: 148179
loss: 0.9742328524589539,grad_norm: 0.8900634228952551, iteration: 148180
loss: 0.983822226524353,grad_norm: 0.9973522503459122, iteration: 148181
loss: 1.0139532089233398,grad_norm: 0.9999990217645075, iteration: 148182
loss: 0.9579198360443115,grad_norm: 0.9175725389813372, iteration: 148183
loss: 1.0117976665496826,grad_norm: 0.9999996323877638, iteration: 148184
loss: 1.0203803777694702,grad_norm: 0.9258452295693416, iteration: 148185
loss: 1.016587495803833,grad_norm: 0.9999991460646706, iteration: 148186
loss: 1.0044053792953491,grad_norm: 0.9999991526009813, iteration: 148187
loss: 0.9876782298088074,grad_norm: 0.8645057476896789, iteration: 148188
loss: 1.0023049116134644,grad_norm: 0.8352092924617855, iteration: 148189
loss: 1.005947232246399,grad_norm: 0.9999993018289299, iteration: 148190
loss: 0.9499748349189758,grad_norm: 0.9603329707665264, iteration: 148191
loss: 1.0683377981185913,grad_norm: 0.9999995161425117, iteration: 148192
loss: 0.9908875823020935,grad_norm: 0.9999990572589692, iteration: 148193
loss: 0.9811325669288635,grad_norm: 0.9999990357381386, iteration: 148194
loss: 0.9721774458885193,grad_norm: 0.999999132429549, iteration: 148195
loss: 1.0026183128356934,grad_norm: 0.8690918445999279, iteration: 148196
loss: 0.9929589033126831,grad_norm: 0.9667644311922, iteration: 148197
loss: 0.9920541644096375,grad_norm: 0.9999991709560745, iteration: 148198
loss: 1.0759698152542114,grad_norm: 0.9999998343015651, iteration: 148199
loss: 1.0104713439941406,grad_norm: 0.9999991441739797, iteration: 148200
loss: 0.9810441732406616,grad_norm: 0.925351019141048, iteration: 148201
loss: 1.0259116888046265,grad_norm: 0.9999991188712787, iteration: 148202
loss: 0.983025312423706,grad_norm: 0.9999990656695894, iteration: 148203
loss: 0.9838466048240662,grad_norm: 0.9999989851117553, iteration: 148204
loss: 1.0244367122650146,grad_norm: 0.999999187569968, iteration: 148205
loss: 1.0299460887908936,grad_norm: 0.9999990397743014, iteration: 148206
loss: 1.0200341939926147,grad_norm: 0.9999991072310506, iteration: 148207
loss: 1.002402901649475,grad_norm: 0.991675555431149, iteration: 148208
loss: 0.9721965789794922,grad_norm: 0.9999992069544171, iteration: 148209
loss: 0.9951413869857788,grad_norm: 0.9999988900625357, iteration: 148210
loss: 0.9735787510871887,grad_norm: 0.9751064632783566, iteration: 148211
loss: 1.0105301141738892,grad_norm: 0.9999990789240837, iteration: 148212
loss: 1.0602401494979858,grad_norm: 0.999999590065155, iteration: 148213
loss: 1.0318464040756226,grad_norm: 0.9999997296120683, iteration: 148214
loss: 0.9822635650634766,grad_norm: 0.9999993744435898, iteration: 148215
loss: 0.9993695616722107,grad_norm: 0.8371544932055346, iteration: 148216
loss: 1.0334935188293457,grad_norm: 0.9999989515752388, iteration: 148217
loss: 1.0280858278274536,grad_norm: 0.9999993482238788, iteration: 148218
loss: 0.9682677984237671,grad_norm: 0.9999991575435305, iteration: 148219
loss: 1.0070641040802002,grad_norm: 0.8587514910603247, iteration: 148220
loss: 0.9916465282440186,grad_norm: 0.9999991073247442, iteration: 148221
loss: 1.0110079050064087,grad_norm: 0.9999991467152894, iteration: 148222
loss: 1.033019781112671,grad_norm: 0.978294018415341, iteration: 148223
loss: 1.0145903825759888,grad_norm: 0.9999992010843034, iteration: 148224
loss: 0.9991523623466492,grad_norm: 0.9999991216480818, iteration: 148225
loss: 1.0196926593780518,grad_norm: 0.8244388586253, iteration: 148226
loss: 0.9827962517738342,grad_norm: 0.9999994242561173, iteration: 148227
loss: 1.0244437456130981,grad_norm: 0.9977125787331698, iteration: 148228
loss: 0.9905638694763184,grad_norm: 0.9329717927402876, iteration: 148229
loss: 1.0456022024154663,grad_norm: 0.9636243685475743, iteration: 148230
loss: 0.9933921098709106,grad_norm: 0.9642347868742014, iteration: 148231
loss: 0.9916139841079712,grad_norm: 0.9999990055002024, iteration: 148232
loss: 0.9687357544898987,grad_norm: 0.9999992822914184, iteration: 148233
loss: 0.9790446162223816,grad_norm: 0.8925616957243203, iteration: 148234
loss: 1.0200196504592896,grad_norm: 0.9999990490136927, iteration: 148235
loss: 0.9582511186599731,grad_norm: 0.9999991701865127, iteration: 148236
loss: 0.9472665786743164,grad_norm: 0.9999990914448779, iteration: 148237
loss: 0.9838294982910156,grad_norm: 0.999999175825724, iteration: 148238
loss: 0.979853093624115,grad_norm: 0.9999991706236373, iteration: 148239
loss: 1.1427804231643677,grad_norm: 0.999998967146795, iteration: 148240
loss: 1.020754098892212,grad_norm: 0.999999000417504, iteration: 148241
loss: 0.9945545196533203,grad_norm: 0.9999993418204396, iteration: 148242
loss: 1.029348373413086,grad_norm: 0.9999990459532343, iteration: 148243
loss: 1.0160255432128906,grad_norm: 0.9999991436278536, iteration: 148244
loss: 1.096669316291809,grad_norm: 0.9999998716417053, iteration: 148245
loss: 0.984717845916748,grad_norm: 0.9092620351822001, iteration: 148246
loss: 0.9907341599464417,grad_norm: 0.98122788789311, iteration: 148247
loss: 0.9565922617912292,grad_norm: 0.9999991845309285, iteration: 148248
loss: 1.000762701034546,grad_norm: 0.9318969932149181, iteration: 148249
loss: 0.9924325942993164,grad_norm: 0.9999989940206487, iteration: 148250
loss: 0.9876559376716614,grad_norm: 0.9999992643985338, iteration: 148251
loss: 1.009924054145813,grad_norm: 0.9999991167548073, iteration: 148252
loss: 1.095314621925354,grad_norm: 0.9999990815600731, iteration: 148253
loss: 0.9974294304847717,grad_norm: 0.9999997085955908, iteration: 148254
loss: 0.9818113446235657,grad_norm: 0.9466293484693381, iteration: 148255
loss: 1.055355429649353,grad_norm: 0.999999734011635, iteration: 148256
loss: 1.01437509059906,grad_norm: 0.9999991528941996, iteration: 148257
loss: 0.9782907962799072,grad_norm: 0.9999992089488383, iteration: 148258
loss: 1.0030759572982788,grad_norm: 0.9999991370640137, iteration: 148259
loss: 1.0376206636428833,grad_norm: 0.8169852590127197, iteration: 148260
loss: 1.0589466094970703,grad_norm: 0.9999992781844679, iteration: 148261
loss: 1.0113469362258911,grad_norm: 0.999999326086641, iteration: 148262
loss: 0.9954057931900024,grad_norm: 0.9839981387676116, iteration: 148263
loss: 0.9698491096496582,grad_norm: 0.9827718138470823, iteration: 148264
loss: 1.0004979372024536,grad_norm: 0.9999995806446415, iteration: 148265
loss: 1.006192922592163,grad_norm: 0.951652952644979, iteration: 148266
loss: 1.063496708869934,grad_norm: 0.9999998081111199, iteration: 148267
loss: 0.9941558837890625,grad_norm: 0.999999201420525, iteration: 148268
loss: 0.9630131721496582,grad_norm: 0.999999089630169, iteration: 148269
loss: 0.9905747175216675,grad_norm: 0.9417131108338181, iteration: 148270
loss: 0.9831566214561462,grad_norm: 0.995221855640696, iteration: 148271
loss: 1.0019468069076538,grad_norm: 0.9999989318181469, iteration: 148272
loss: 1.0507134199142456,grad_norm: 0.9999998294433591, iteration: 148273
loss: 1.0077877044677734,grad_norm: 0.9999991467184114, iteration: 148274
loss: 1.0874346494674683,grad_norm: 0.9999993102768041, iteration: 148275
loss: 1.0202133655548096,grad_norm: 0.9115624395793076, iteration: 148276
loss: 1.0155686140060425,grad_norm: 0.9999996701175073, iteration: 148277
loss: 1.0481916666030884,grad_norm: 0.9999998357404961, iteration: 148278
loss: 0.9875468611717224,grad_norm: 0.9055444617633687, iteration: 148279
loss: 1.089888095855713,grad_norm: 0.9999997775023907, iteration: 148280
loss: 1.0369852781295776,grad_norm: 0.999999046140502, iteration: 148281
loss: 1.1785954236984253,grad_norm: 0.9999997761964861, iteration: 148282
loss: 1.0580533742904663,grad_norm: 0.99999957507181, iteration: 148283
loss: 1.110460877418518,grad_norm: 0.9999997953048531, iteration: 148284
loss: 0.981955885887146,grad_norm: 0.9999991680791283, iteration: 148285
loss: 1.1022950410842896,grad_norm: 0.999999534535993, iteration: 148286
loss: 1.0205597877502441,grad_norm: 0.8620681880259834, iteration: 148287
loss: 1.38437020778656,grad_norm: 0.999999915812209, iteration: 148288
loss: 1.0696868896484375,grad_norm: 0.9999992206330576, iteration: 148289
loss: 1.1743381023406982,grad_norm: 0.9999995276066685, iteration: 148290
loss: 1.2089289426803589,grad_norm: 0.9999998638210686, iteration: 148291
loss: 1.2916682958602905,grad_norm: 0.9999993622105441, iteration: 148292
loss: 1.1336067914962769,grad_norm: 0.9999996647206064, iteration: 148293
loss: 1.0593725442886353,grad_norm: 0.9999994050275202, iteration: 148294
loss: 0.9889761805534363,grad_norm: 0.9999991716118153, iteration: 148295
loss: 0.9998503923416138,grad_norm: 0.9999991246412929, iteration: 148296
loss: 1.0099283456802368,grad_norm: 0.9999994389332442, iteration: 148297
loss: 0.9802187085151672,grad_norm: 0.9881068738836704, iteration: 148298
loss: 1.146220326423645,grad_norm: 0.9999998901872765, iteration: 148299
loss: 1.031851887702942,grad_norm: 0.9613605433180714, iteration: 148300
loss: 1.0064393281936646,grad_norm: 0.9454036821835182, iteration: 148301
loss: 1.0162235498428345,grad_norm: 1.0000000059333396, iteration: 148302
loss: 1.0236984491348267,grad_norm: 0.9999990014313549, iteration: 148303
loss: 1.1642248630523682,grad_norm: 0.9999994047810499, iteration: 148304
loss: 0.994417667388916,grad_norm: 0.9999990588571896, iteration: 148305
loss: 1.0191082954406738,grad_norm: 0.9999991524641356, iteration: 148306
loss: 1.0364415645599365,grad_norm: 0.999999258528124, iteration: 148307
loss: 1.0112218856811523,grad_norm: 0.999999243447193, iteration: 148308
loss: 1.0277196168899536,grad_norm: 0.8582720852108765, iteration: 148309
loss: 1.0100749731063843,grad_norm: 0.9999992082912214, iteration: 148310
loss: 1.008637547492981,grad_norm: 0.999999233919485, iteration: 148311
loss: 1.0146077871322632,grad_norm: 0.999999489735169, iteration: 148312
loss: 1.0034167766571045,grad_norm: 0.9999991477377747, iteration: 148313
loss: 1.1037547588348389,grad_norm: 1.00000000844523, iteration: 148314
loss: 1.0510891675949097,grad_norm: 0.9999991307256956, iteration: 148315
loss: 1.0195525884628296,grad_norm: 0.9999993523132397, iteration: 148316
loss: 0.9664904475212097,grad_norm: 0.9412373093961066, iteration: 148317
loss: 1.0047152042388916,grad_norm: 0.9422005809473653, iteration: 148318
loss: 1.0277159214019775,grad_norm: 0.9999991970193512, iteration: 148319
loss: 1.0474859476089478,grad_norm: 0.9822439916551573, iteration: 148320
loss: 0.9765576124191284,grad_norm: 0.9999990063234461, iteration: 148321
loss: 1.0369040966033936,grad_norm: 0.9999991972455344, iteration: 148322
loss: 1.0102934837341309,grad_norm: 0.999999162367612, iteration: 148323
loss: 1.032913088798523,grad_norm: 0.9999996435135675, iteration: 148324
loss: 0.9854716658592224,grad_norm: 0.9045030941537512, iteration: 148325
loss: 1.032903790473938,grad_norm: 0.9999991142304662, iteration: 148326
loss: 0.9865012168884277,grad_norm: 0.9999990898049259, iteration: 148327
loss: 1.0263895988464355,grad_norm: 0.9999997764746418, iteration: 148328
loss: 1.015831708908081,grad_norm: 0.9999992003163647, iteration: 148329
loss: 1.0031579732894897,grad_norm: 0.9999992791132352, iteration: 148330
loss: 0.9879310727119446,grad_norm: 0.999999494653588, iteration: 148331
loss: 0.9791050553321838,grad_norm: 0.8659216673679804, iteration: 148332
loss: 1.0078644752502441,grad_norm: 0.9999991907179755, iteration: 148333
loss: 1.0200594663619995,grad_norm: 0.9999991438702486, iteration: 148334
loss: 1.0040074586868286,grad_norm: 0.9999994060969548, iteration: 148335
loss: 0.9735761284828186,grad_norm: 0.9999992753885192, iteration: 148336
loss: 1.0407311916351318,grad_norm: 0.999999134677635, iteration: 148337
loss: 1.0103565454483032,grad_norm: 0.9408722698807392, iteration: 148338
loss: 0.9908111691474915,grad_norm: 0.9999990825503324, iteration: 148339
loss: 1.0195276737213135,grad_norm: 0.9999991695550191, iteration: 148340
loss: 0.9665442705154419,grad_norm: 0.9561379007383958, iteration: 148341
loss: 1.002269983291626,grad_norm: 0.9999990890355394, iteration: 148342
loss: 1.071433186531067,grad_norm: 0.9549661057389193, iteration: 148343
loss: 1.0332990884780884,grad_norm: 0.9999995401060974, iteration: 148344
loss: 0.9835752248764038,grad_norm: 0.9760844643933866, iteration: 148345
loss: 1.0428310632705688,grad_norm: 0.9999993173062383, iteration: 148346
loss: 0.997983455657959,grad_norm: 0.7598148589403744, iteration: 148347
loss: 1.0432392358779907,grad_norm: 0.9999993716351586, iteration: 148348
loss: 0.9999189972877502,grad_norm: 0.877684216607734, iteration: 148349
loss: 1.0461499691009521,grad_norm: 0.9999996738795175, iteration: 148350
loss: 1.0517278909683228,grad_norm: 0.9999998712970245, iteration: 148351
loss: 1.0041096210479736,grad_norm: 0.9999991493996521, iteration: 148352
loss: 0.9903028011322021,grad_norm: 0.9999990988774454, iteration: 148353
loss: 1.1008177995681763,grad_norm: 0.9999999143216354, iteration: 148354
loss: 1.024440050125122,grad_norm: 0.9999993928345591, iteration: 148355
loss: 1.1217749118804932,grad_norm: 0.9999992178978367, iteration: 148356
loss: 1.0647495985031128,grad_norm: 0.9999994770999998, iteration: 148357
loss: 0.9741212725639343,grad_norm: 0.9999991207605596, iteration: 148358
loss: 1.029948115348816,grad_norm: 0.9999999574955166, iteration: 148359
loss: 1.1333836317062378,grad_norm: 0.9999997158056131, iteration: 148360
loss: 1.0104234218597412,grad_norm: 0.9999995974067798, iteration: 148361
loss: 1.017917513847351,grad_norm: 0.9999993248011328, iteration: 148362
loss: 0.9932772517204285,grad_norm: 0.9970890336921251, iteration: 148363
loss: 0.9956068396568298,grad_norm: 0.9999992382817763, iteration: 148364
loss: 1.0235323905944824,grad_norm: 0.9999992409253837, iteration: 148365
loss: 1.023484706878662,grad_norm: 0.9999989894344625, iteration: 148366
loss: 1.0815908908843994,grad_norm: 0.9999998460780531, iteration: 148367
loss: 0.9748371243476868,grad_norm: 0.9999993404318896, iteration: 148368
loss: 0.9914180636405945,grad_norm: 0.9999990835469107, iteration: 148369
loss: 0.9856724739074707,grad_norm: 0.9999992583173322, iteration: 148370
loss: 1.0307832956314087,grad_norm: 0.9999993555155327, iteration: 148371
loss: 1.0028393268585205,grad_norm: 0.9999990184266372, iteration: 148372
loss: 1.0178357362747192,grad_norm: 0.8547165946830767, iteration: 148373
loss: 1.0314767360687256,grad_norm: 0.9763842644361322, iteration: 148374
loss: 1.083807110786438,grad_norm: 0.9999991674148918, iteration: 148375
loss: 0.9868074655532837,grad_norm: 0.9999992573775953, iteration: 148376
loss: 1.0123211145401,grad_norm: 0.9999993608769444, iteration: 148377
loss: 0.999803364276886,grad_norm: 0.9918667448679228, iteration: 148378
loss: 0.9737231135368347,grad_norm: 0.9999991285576749, iteration: 148379
loss: 0.9825109839439392,grad_norm: 0.9999997087156627, iteration: 148380
loss: 1.0103176832199097,grad_norm: 0.9335134865041068, iteration: 148381
loss: 0.9897961616516113,grad_norm: 0.8776842035800607, iteration: 148382
loss: 1.0097277164459229,grad_norm: 0.9750646847384933, iteration: 148383
loss: 1.012770175933838,grad_norm: 0.9999991671894249, iteration: 148384
loss: 1.0226795673370361,grad_norm: 0.9763397339323366, iteration: 148385
loss: 0.9898380637168884,grad_norm: 0.951359571476749, iteration: 148386
loss: 0.9705895185470581,grad_norm: 0.9556949125748271, iteration: 148387
loss: 0.9887779951095581,grad_norm: 0.9490619366409723, iteration: 148388
loss: 1.002990484237671,grad_norm: 0.9995323228723555, iteration: 148389
loss: 0.9713546633720398,grad_norm: 0.999999163790048, iteration: 148390
loss: 0.9878660440444946,grad_norm: 0.9946136713849554, iteration: 148391
loss: 1.0347602367401123,grad_norm: 0.9287647073570567, iteration: 148392
loss: 1.043432593345642,grad_norm: 0.9999990560044518, iteration: 148393
loss: 1.0120710134506226,grad_norm: 0.9999999626573564, iteration: 148394
loss: 1.0181013345718384,grad_norm: 0.9999994924255714, iteration: 148395
loss: 1.0105400085449219,grad_norm: 0.9708883481628167, iteration: 148396
loss: 0.9842035174369812,grad_norm: 0.968325732125427, iteration: 148397
loss: 0.9552934169769287,grad_norm: 0.9999991276266953, iteration: 148398
loss: 0.9938851594924927,grad_norm: 0.9999991504867353, iteration: 148399
loss: 1.035826325416565,grad_norm: 0.9999989974729189, iteration: 148400
loss: 1.0199756622314453,grad_norm: 0.9999989668022969, iteration: 148401
loss: 0.9839026927947998,grad_norm: 0.999999102797409, iteration: 148402
loss: 1.011296033859253,grad_norm: 0.8771717212308715, iteration: 148403
loss: 1.1527208089828491,grad_norm: 0.9999998793287728, iteration: 148404
loss: 1.0069652795791626,grad_norm: 0.999999189669601, iteration: 148405
loss: 0.9594654440879822,grad_norm: 0.9999991020539208, iteration: 148406
loss: 1.102523684501648,grad_norm: 0.9999994952032194, iteration: 148407
loss: 1.013961672782898,grad_norm: 0.9841331528586235, iteration: 148408
loss: 0.9751993417739868,grad_norm: 0.8889631560833172, iteration: 148409
loss: 0.9702721238136292,grad_norm: 0.9999990690981568, iteration: 148410
loss: 0.9859556555747986,grad_norm: 0.9999991500032394, iteration: 148411
loss: 0.9854989647865295,grad_norm: 0.992702230881167, iteration: 148412
loss: 0.9802238345146179,grad_norm: 0.9999991445040728, iteration: 148413
loss: 1.113335132598877,grad_norm: 0.9999990175817404, iteration: 148414
loss: 0.9893574714660645,grad_norm: 0.9172665978163035, iteration: 148415
loss: 0.9796500205993652,grad_norm: 0.9999992921596286, iteration: 148416
loss: 1.0113461017608643,grad_norm: 0.9700046592585517, iteration: 148417
loss: 0.9722384214401245,grad_norm: 0.9292282408051888, iteration: 148418
loss: 0.9860801696777344,grad_norm: 0.9999993349290123, iteration: 148419
loss: 0.9772313237190247,grad_norm: 0.999999192193188, iteration: 148420
loss: 1.0471924543380737,grad_norm: 0.9904675576878537, iteration: 148421
loss: 1.0094879865646362,grad_norm: 0.9999991701108223, iteration: 148422
loss: 1.015231966972351,grad_norm: 0.9072639120623726, iteration: 148423
loss: 1.037204384803772,grad_norm: 0.9938092093375578, iteration: 148424
loss: 1.001799464225769,grad_norm: 0.9891650976137168, iteration: 148425
loss: 1.0156596899032593,grad_norm: 0.9999990865737269, iteration: 148426
loss: 0.9872283935546875,grad_norm: 0.960824262618484, iteration: 148427
loss: 1.0349242687225342,grad_norm: 0.9999992353304974, iteration: 148428
loss: 0.9848605394363403,grad_norm: 0.9999991028404521, iteration: 148429
loss: 1.0057228803634644,grad_norm: 0.9999991201449346, iteration: 148430
loss: 1.0308729410171509,grad_norm: 0.9692914243541471, iteration: 148431
loss: 1.015429973602295,grad_norm: 0.9671166024082439, iteration: 148432
loss: 1.0091581344604492,grad_norm: 0.95674160712977, iteration: 148433
loss: 1.0004366636276245,grad_norm: 0.9054120031713216, iteration: 148434
loss: 0.9925553798675537,grad_norm: 0.9999991453308149, iteration: 148435
loss: 1.0388530492782593,grad_norm: 0.9999995924486764, iteration: 148436
loss: 0.9831766486167908,grad_norm: 0.9999991239495828, iteration: 148437
loss: 1.027608036994934,grad_norm: 0.8785648185632706, iteration: 148438
loss: 1.0375391244888306,grad_norm: 0.999999200343697, iteration: 148439
loss: 1.0168334245681763,grad_norm: 0.9476004344053119, iteration: 148440
loss: 0.9962328672409058,grad_norm: 0.9999991874582169, iteration: 148441
loss: 1.0242488384246826,grad_norm: 0.9363219510607046, iteration: 148442
loss: 0.9791000485420227,grad_norm: 0.8973551509940122, iteration: 148443
loss: 0.9987004399299622,grad_norm: 0.999999132955145, iteration: 148444
loss: 1.0167325735092163,grad_norm: 0.9660810526397523, iteration: 148445
loss: 0.9608502984046936,grad_norm: 0.9999991387434047, iteration: 148446
loss: 1.0162309408187866,grad_norm: 0.9999992536435441, iteration: 148447
loss: 1.1763648986816406,grad_norm: 0.999999255058749, iteration: 148448
loss: 1.0101218223571777,grad_norm: 0.9999992154118567, iteration: 148449
loss: 1.0036076307296753,grad_norm: 0.9267866112808276, iteration: 148450
loss: 1.0080972909927368,grad_norm: 0.9999990201514929, iteration: 148451
loss: 0.9987531900405884,grad_norm: 0.8576664172272922, iteration: 148452
loss: 1.021612524986267,grad_norm: 0.9999997002707678, iteration: 148453
loss: 1.0258389711380005,grad_norm: 0.9999998922526556, iteration: 148454
loss: 0.9653891324996948,grad_norm: 0.9999992124601891, iteration: 148455
loss: 0.9942312836647034,grad_norm: 0.999999105538673, iteration: 148456
loss: 1.0212706327438354,grad_norm: 0.9684715311062619, iteration: 148457
loss: 0.9744641184806824,grad_norm: 0.7552672742532841, iteration: 148458
loss: 0.9868372678756714,grad_norm: 1.0000000218633445, iteration: 148459
loss: 1.0170964002609253,grad_norm: 0.9999990454304567, iteration: 148460
loss: 1.0209672451019287,grad_norm: 0.9999990947211947, iteration: 148461
loss: 0.9786643981933594,grad_norm: 0.999999094239669, iteration: 148462
loss: 0.9713943600654602,grad_norm: 0.9999990973599769, iteration: 148463
loss: 1.0149199962615967,grad_norm: 0.9999990082222148, iteration: 148464
loss: 1.0049437284469604,grad_norm: 0.999998951247561, iteration: 148465
loss: 0.9756247401237488,grad_norm: 0.8290588595871371, iteration: 148466
loss: 1.0067088603973389,grad_norm: 0.9593494479140935, iteration: 148467
loss: 0.986131489276886,grad_norm: 0.9343395190521858, iteration: 148468
loss: 1.0139920711517334,grad_norm: 0.9999992711688616, iteration: 148469
loss: 0.946249783039093,grad_norm: 0.9999990095151481, iteration: 148470
loss: 1.0104873180389404,grad_norm: 0.9999991914766084, iteration: 148471
loss: 1.038673758506775,grad_norm: 0.9976201924024335, iteration: 148472
loss: 0.980276882648468,grad_norm: 0.9999993480500449, iteration: 148473
loss: 1.0493500232696533,grad_norm: 0.9999991507384735, iteration: 148474
loss: 1.0011042356491089,grad_norm: 0.9999991059780923, iteration: 148475
loss: 1.0091520547866821,grad_norm: 0.9999993087180685, iteration: 148476
loss: 0.9586573243141174,grad_norm: 0.9127413927365347, iteration: 148477
loss: 1.0092380046844482,grad_norm: 0.8759216599844389, iteration: 148478
loss: 0.9536061882972717,grad_norm: 0.9999989851532468, iteration: 148479
loss: 0.9936623573303223,grad_norm: 0.9875468746390614, iteration: 148480
loss: 1.0168863534927368,grad_norm: 0.9583151551632527, iteration: 148481
loss: 0.9726871252059937,grad_norm: 0.9999991594732752, iteration: 148482
loss: 1.01735258102417,grad_norm: 0.9155728567637883, iteration: 148483
loss: 1.0055053234100342,grad_norm: 0.9796544169126936, iteration: 148484
loss: 1.0175211429595947,grad_norm: 0.8121489336288165, iteration: 148485
loss: 1.0436419248580933,grad_norm: 0.9748792884808762, iteration: 148486
loss: 1.0312455892562866,grad_norm: 0.9999990618890031, iteration: 148487
loss: 1.0035552978515625,grad_norm: 0.9999990647666034, iteration: 148488
loss: 1.0413641929626465,grad_norm: 0.9999990910617376, iteration: 148489
loss: 1.0133683681488037,grad_norm: 0.9999997701078173, iteration: 148490
loss: 1.0156432390213013,grad_norm: 0.9999990266594154, iteration: 148491
loss: 1.0279481410980225,grad_norm: 0.9999991533209887, iteration: 148492
loss: 0.9933732151985168,grad_norm: 0.9382972507371096, iteration: 148493
loss: 0.9601912498474121,grad_norm: 0.9531463041917544, iteration: 148494
loss: 0.9388343095779419,grad_norm: 0.8880400938258493, iteration: 148495
loss: 1.0131455659866333,grad_norm: 0.999999075266348, iteration: 148496
loss: 0.9611493945121765,grad_norm: 0.9999991532233614, iteration: 148497
loss: 0.9899474382400513,grad_norm: 0.9999991422166609, iteration: 148498
loss: 1.018384575843811,grad_norm: 0.9999991181716339, iteration: 148499
loss: 0.9845157265663147,grad_norm: 0.9999992197794552, iteration: 148500
loss: 1.0627601146697998,grad_norm: 0.9999995804410162, iteration: 148501
loss: 1.0098700523376465,grad_norm: 0.943467837966362, iteration: 148502
loss: 1.0012247562408447,grad_norm: 0.986060843480732, iteration: 148503
loss: 0.9735502004623413,grad_norm: 0.8873549756640711, iteration: 148504
loss: 1.0121314525604248,grad_norm: 0.9999989673564339, iteration: 148505
loss: 1.0242953300476074,grad_norm: 0.970807297299329, iteration: 148506
loss: 1.0096464157104492,grad_norm: 0.9999990589809568, iteration: 148507
loss: 1.0317858457565308,grad_norm: 0.9999990817820666, iteration: 148508
loss: 1.0070990324020386,grad_norm: 0.9703825421182849, iteration: 148509
loss: 0.9959651827812195,grad_norm: 0.9999991589638054, iteration: 148510
loss: 0.994795024394989,grad_norm: 0.9999991075447192, iteration: 148511
loss: 1.0013740062713623,grad_norm: 0.9999990701643688, iteration: 148512
loss: 1.0179277658462524,grad_norm: 0.8916631316046857, iteration: 148513
loss: 0.9853193163871765,grad_norm: 0.865932329061703, iteration: 148514
loss: 0.9893680810928345,grad_norm: 0.9999989913118297, iteration: 148515
loss: 1.0049265623092651,grad_norm: 0.9999990488076018, iteration: 148516
loss: 1.0351027250289917,grad_norm: 0.8245374058980921, iteration: 148517
loss: 1.0157824754714966,grad_norm: 0.9999993376580188, iteration: 148518
loss: 1.0114167928695679,grad_norm: 0.8593626447366964, iteration: 148519
loss: 1.0124496221542358,grad_norm: 0.9999992203123402, iteration: 148520
loss: 0.9673143029212952,grad_norm: 0.9984865557637073, iteration: 148521
loss: 1.027212381362915,grad_norm: 0.9999990720917338, iteration: 148522
loss: 0.9611992835998535,grad_norm: 0.9999991250430146, iteration: 148523
loss: 1.0241045951843262,grad_norm: 0.9999995161704994, iteration: 148524
loss: 0.9939760565757751,grad_norm: 0.9999998603968322, iteration: 148525
loss: 1.0035505294799805,grad_norm: 0.9999990766237651, iteration: 148526
loss: 1.0149792432785034,grad_norm: 0.9999989958971455, iteration: 148527
loss: 1.0158255100250244,grad_norm: 0.9417800160032227, iteration: 148528
loss: 1.0135895013809204,grad_norm: 0.999999067265457, iteration: 148529
loss: 1.0155810117721558,grad_norm: 0.9594436038952562, iteration: 148530
loss: 0.9828563332557678,grad_norm: 0.8112242704778044, iteration: 148531
loss: 1.0364292860031128,grad_norm: 0.9669359665009477, iteration: 148532
loss: 1.0059043169021606,grad_norm: 0.988624315772534, iteration: 148533
loss: 1.0136691331863403,grad_norm: 0.928405048096839, iteration: 148534
loss: 0.9802219867706299,grad_norm: 0.9999992649753383, iteration: 148535
loss: 1.0111496448516846,grad_norm: 0.9999990940079064, iteration: 148536
loss: 1.004630446434021,grad_norm: 0.9304840360353815, iteration: 148537
loss: 1.028645396232605,grad_norm: 0.9999990196377834, iteration: 148538
loss: 0.9885169863700867,grad_norm: 0.9999998044955641, iteration: 148539
loss: 1.0234986543655396,grad_norm: 0.923788540110781, iteration: 148540
loss: 1.0061681270599365,grad_norm: 0.9999989376046353, iteration: 148541
loss: 0.9937781095504761,grad_norm: 0.9185394684491793, iteration: 148542
loss: 1.007328987121582,grad_norm: 0.9999991220155717, iteration: 148543
loss: 1.0027483701705933,grad_norm: 0.9723636911154191, iteration: 148544
loss: 0.9573361873626709,grad_norm: 0.9999991242223768, iteration: 148545
loss: 1.025152325630188,grad_norm: 0.999999211543442, iteration: 148546
loss: 0.9875878095626831,grad_norm: 0.9495958992991543, iteration: 148547
loss: 1.001577377319336,grad_norm: 0.9999989616178742, iteration: 148548
loss: 1.008901596069336,grad_norm: 0.9936158771915448, iteration: 148549
loss: 0.9921227097511292,grad_norm: 0.999999016441044, iteration: 148550
loss: 0.9971411824226379,grad_norm: 0.9999991415794528, iteration: 148551
loss: 1.0094853639602661,grad_norm: 0.999999155470743, iteration: 148552
loss: 0.9996908903121948,grad_norm: 0.9479616271075841, iteration: 148553
loss: 0.9968127012252808,grad_norm: 0.9128219127778241, iteration: 148554
loss: 1.0207301378250122,grad_norm: 0.9999991324981878, iteration: 148555
loss: 1.016466736793518,grad_norm: 0.9657478139593231, iteration: 148556
loss: 0.9880447387695312,grad_norm: 0.9433002880326337, iteration: 148557
loss: 1.0034204721450806,grad_norm: 0.9538802671306037, iteration: 148558
loss: 1.1466056108474731,grad_norm: 0.9999990277267777, iteration: 148559
loss: 0.9970202445983887,grad_norm: 0.9793551920396226, iteration: 148560
loss: 0.997808039188385,grad_norm: 0.9999992369411628, iteration: 148561
loss: 1.0124801397323608,grad_norm: 0.9999998265768206, iteration: 148562
loss: 1.0030863285064697,grad_norm: 0.9999991592672698, iteration: 148563
loss: 0.9982178807258606,grad_norm: 0.972791609330707, iteration: 148564
loss: 0.994861900806427,grad_norm: 0.9792270713468998, iteration: 148565
loss: 1.0166211128234863,grad_norm: 0.999999211512114, iteration: 148566
loss: 0.9689212441444397,grad_norm: 0.9086129414141761, iteration: 148567
loss: 1.0063986778259277,grad_norm: 0.9999990067160525, iteration: 148568
loss: 0.9720870852470398,grad_norm: 0.9990782002158963, iteration: 148569
loss: 1.020551323890686,grad_norm: 0.9999991572560873, iteration: 148570
loss: 1.013110637664795,grad_norm: 0.9999994878522829, iteration: 148571
loss: 1.0154417753219604,grad_norm: 0.9524721366211282, iteration: 148572
loss: 1.0075891017913818,grad_norm: 0.9999991466586376, iteration: 148573
loss: 1.0126341581344604,grad_norm: 0.9281720373168167, iteration: 148574
loss: 0.9832144975662231,grad_norm: 0.9999989317774589, iteration: 148575
loss: 0.9942943453788757,grad_norm: 0.9999992957284394, iteration: 148576
loss: 1.0174856185913086,grad_norm: 0.9463757922405226, iteration: 148577
loss: 1.0237115621566772,grad_norm: 0.9999991859868742, iteration: 148578
loss: 1.0016814470291138,grad_norm: 0.9999990572711546, iteration: 148579
loss: 0.9860232472419739,grad_norm: 0.9896857041934736, iteration: 148580
loss: 0.9743321537971497,grad_norm: 0.9999989849269257, iteration: 148581
loss: 0.9861035346984863,grad_norm: 0.9644677959981338, iteration: 148582
loss: 1.0057889223098755,grad_norm: 0.9999990406383292, iteration: 148583
loss: 0.988136887550354,grad_norm: 0.9389596600017557, iteration: 148584
loss: 0.9953615665435791,grad_norm: 0.999999177409661, iteration: 148585
loss: 0.9999650716781616,grad_norm: 0.9772813104870889, iteration: 148586
loss: 1.0061262845993042,grad_norm: 0.9999991545550379, iteration: 148587
loss: 1.0132571458816528,grad_norm: 0.9999992050595734, iteration: 148588
loss: 0.9987583160400391,grad_norm: 0.9999990271956091, iteration: 148589
loss: 0.9935714602470398,grad_norm: 0.9322896355295496, iteration: 148590
loss: 1.036110758781433,grad_norm: 0.9999992635729577, iteration: 148591
loss: 1.0378361940383911,grad_norm: 0.999999041056333, iteration: 148592
loss: 1.0030776262283325,grad_norm: 0.933716456059681, iteration: 148593
loss: 1.0255405902862549,grad_norm: 0.9999993954595985, iteration: 148594
loss: 1.00214684009552,grad_norm: 0.9956111849843395, iteration: 148595
loss: 1.0273524522781372,grad_norm: 0.9386953835365571, iteration: 148596
loss: 0.9893895387649536,grad_norm: 0.9472463234471328, iteration: 148597
loss: 1.0113108158111572,grad_norm: 0.9999990913881204, iteration: 148598
loss: 1.0595036745071411,grad_norm: 0.9828144860461873, iteration: 148599
loss: 0.995893120765686,grad_norm: 0.9297290955337353, iteration: 148600
loss: 0.9941151142120361,grad_norm: 0.9999991920720492, iteration: 148601
loss: 1.013290286064148,grad_norm: 0.9122475067713313, iteration: 148602
loss: 1.0145055055618286,grad_norm: 0.930365380893562, iteration: 148603
loss: 0.9952896237373352,grad_norm: 0.9999991566082721, iteration: 148604
loss: 0.9875206351280212,grad_norm: 0.9999990836049296, iteration: 148605
loss: 0.9858414530754089,grad_norm: 0.9999991508246577, iteration: 148606
loss: 0.9627382159233093,grad_norm: 0.999999083776933, iteration: 148607
loss: 0.9800044298171997,grad_norm: 0.9999992359386363, iteration: 148608
loss: 0.9962603449821472,grad_norm: 0.9756344411065591, iteration: 148609
loss: 1.0627257823944092,grad_norm: 0.9999992431010377, iteration: 148610
loss: 1.0104377269744873,grad_norm: 0.9999991001981623, iteration: 148611
loss: 1.012060523033142,grad_norm: 0.9999991138147035, iteration: 148612
loss: 0.9713919162750244,grad_norm: 0.9999991800703886, iteration: 148613
loss: 1.0237582921981812,grad_norm: 0.9999992485851001, iteration: 148614
loss: 1.0069615840911865,grad_norm: 0.9999990680612502, iteration: 148615
loss: 1.02120840549469,grad_norm: 0.999999366214343, iteration: 148616
loss: 0.9533584117889404,grad_norm: 0.9343622677994472, iteration: 148617
loss: 1.017357587814331,grad_norm: 0.9604782468050901, iteration: 148618
loss: 0.9979569315910339,grad_norm: 0.9540315341780019, iteration: 148619
loss: 0.9932665228843689,grad_norm: 0.9623205083685359, iteration: 148620
loss: 0.980232834815979,grad_norm: 0.9999993002996092, iteration: 148621
loss: 1.0306386947631836,grad_norm: 0.9999993069061757, iteration: 148622
loss: 0.972758412361145,grad_norm: 0.8244447691637693, iteration: 148623
loss: 1.0012868642807007,grad_norm: 0.9313690396418104, iteration: 148624
loss: 1.046822428703308,grad_norm: 0.9909985485448355, iteration: 148625
loss: 0.9878511428833008,grad_norm: 0.9999993618144523, iteration: 148626
loss: 0.9762892723083496,grad_norm: 0.9953068053221017, iteration: 148627
loss: 0.9925130009651184,grad_norm: 0.8881998770268829, iteration: 148628
loss: 1.019387125968933,grad_norm: 0.999999100318472, iteration: 148629
loss: 0.9885549545288086,grad_norm: 0.9999990584594114, iteration: 148630
loss: 1.0406444072723389,grad_norm: 0.999999173989079, iteration: 148631
loss: 0.9882412552833557,grad_norm: 0.980440608057917, iteration: 148632
loss: 0.9861185550689697,grad_norm: 0.9648885671315358, iteration: 148633
loss: 0.9841432571411133,grad_norm: 0.9999992547040627, iteration: 148634
loss: 0.9869236946105957,grad_norm: 0.9664085839405266, iteration: 148635
loss: 1.012620210647583,grad_norm: 0.9014968846363507, iteration: 148636
loss: 1.010972023010254,grad_norm: 0.9300991697507781, iteration: 148637
loss: 1.0101652145385742,grad_norm: 0.9999990036214793, iteration: 148638
loss: 0.9942626953125,grad_norm: 0.9799041616804453, iteration: 148639
loss: 0.9917404055595398,grad_norm: 0.8508658028692557, iteration: 148640
loss: 0.9938505291938782,grad_norm: 0.9999990883440142, iteration: 148641
loss: 1.0011255741119385,grad_norm: 0.9999992254666689, iteration: 148642
loss: 0.9693710803985596,grad_norm: 0.9999991047120402, iteration: 148643
loss: 1.0262532234191895,grad_norm: 0.930089681736685, iteration: 148644
loss: 1.0033351182937622,grad_norm: 0.9999991913336044, iteration: 148645
loss: 1.027819275856018,grad_norm: 0.9877144679588888, iteration: 148646
loss: 0.9748675227165222,grad_norm: 0.9999992650025595, iteration: 148647
loss: 0.9879847764968872,grad_norm: 0.999999217817761, iteration: 148648
loss: 1.0611464977264404,grad_norm: 0.9999992406855726, iteration: 148649
loss: 1.037216067314148,grad_norm: 0.9999990949138551, iteration: 148650
loss: 1.0225120782852173,grad_norm: 0.8896951037124017, iteration: 148651
loss: 0.9485966563224792,grad_norm: 0.9710674957580614, iteration: 148652
loss: 1.0020287036895752,grad_norm: 0.9999990505369416, iteration: 148653
loss: 1.0459709167480469,grad_norm: 0.999999247656975, iteration: 148654
loss: 1.007238745689392,grad_norm: 0.9999990627091235, iteration: 148655
loss: 0.9904588460922241,grad_norm: 0.9819208587412473, iteration: 148656
loss: 1.017680287361145,grad_norm: 0.9383977854552084, iteration: 148657
loss: 0.9864787459373474,grad_norm: 0.9999990698203509, iteration: 148658
loss: 1.0048524141311646,grad_norm: 0.9200552432450498, iteration: 148659
loss: 1.001805067062378,grad_norm: 0.9781789177017329, iteration: 148660
loss: 0.996342658996582,grad_norm: 0.9999992055043778, iteration: 148661
loss: 0.9912339448928833,grad_norm: 0.9932710002005107, iteration: 148662
loss: 0.9915214776992798,grad_norm: 0.9999990249906241, iteration: 148663
loss: 0.9923335909843445,grad_norm: 0.9460321277555809, iteration: 148664
loss: 1.0385218858718872,grad_norm: 0.9999992982800855, iteration: 148665
loss: 1.0041019916534424,grad_norm: 0.9999992529855218, iteration: 148666
loss: 1.0055477619171143,grad_norm: 0.8900296670461242, iteration: 148667
loss: 0.9991527199745178,grad_norm: 0.9999993046207025, iteration: 148668
loss: 0.966823160648346,grad_norm: 0.9919370409657426, iteration: 148669
loss: 1.0046005249023438,grad_norm: 0.9374793708522847, iteration: 148670
loss: 0.9801428318023682,grad_norm: 0.9999990669763603, iteration: 148671
loss: 0.9926199316978455,grad_norm: 0.9999991875145805, iteration: 148672
loss: 1.0066511631011963,grad_norm: 0.9402299005073502, iteration: 148673
loss: 1.0040525197982788,grad_norm: 0.9756222947929303, iteration: 148674
loss: 1.0047945976257324,grad_norm: 0.9999990690230725, iteration: 148675
loss: 0.9708400964736938,grad_norm: 0.9534201480321185, iteration: 148676
loss: 0.9632359743118286,grad_norm: 0.9999991159456992, iteration: 148677
loss: 1.0060312747955322,grad_norm: 0.9829709305897045, iteration: 148678
loss: 1.0372360944747925,grad_norm: 0.9999990909928026, iteration: 148679
loss: 0.9934772253036499,grad_norm: 0.9999991077256783, iteration: 148680
loss: 0.9973924160003662,grad_norm: 0.9999990624942395, iteration: 148681
loss: 1.0075101852416992,grad_norm: 0.9783390424376484, iteration: 148682
loss: 1.0302414894104004,grad_norm: 0.9999991504128503, iteration: 148683
loss: 0.9587141871452332,grad_norm: 0.9999990984729165, iteration: 148684
loss: 0.9380561709403992,grad_norm: 0.9829739942345318, iteration: 148685
loss: 0.9986779093742371,grad_norm: 0.9999991536275379, iteration: 148686
loss: 0.9916707277297974,grad_norm: 0.9135955541375184, iteration: 148687
loss: 0.9526463747024536,grad_norm: 0.9999991841984546, iteration: 148688
loss: 1.0346335172653198,grad_norm: 0.9931400879284522, iteration: 148689
loss: 1.0004128217697144,grad_norm: 0.9833578448590556, iteration: 148690
loss: 1.0547716617584229,grad_norm: 0.9999998838205629, iteration: 148691
loss: 0.9983199834823608,grad_norm: 0.9744367966375199, iteration: 148692
loss: 0.9996649026870728,grad_norm: 0.9999993848100794, iteration: 148693
loss: 0.9639848470687866,grad_norm: 0.9756374307291273, iteration: 148694
loss: 0.9788509607315063,grad_norm: 0.9999992215039635, iteration: 148695
loss: 0.9812514781951904,grad_norm: 0.990574352910054, iteration: 148696
loss: 0.9987389445304871,grad_norm: 0.9786779500465008, iteration: 148697
loss: 1.0107340812683105,grad_norm: 0.9999990717530306, iteration: 148698
loss: 0.998906135559082,grad_norm: 0.917937519821657, iteration: 148699
loss: 1.025861144065857,grad_norm: 0.9999990717634998, iteration: 148700
loss: 1.0070232152938843,grad_norm: 0.9510265728770066, iteration: 148701
loss: 1.0219024419784546,grad_norm: 0.9999991055013854, iteration: 148702
loss: 1.026821494102478,grad_norm: 0.9993016628376997, iteration: 148703
loss: 1.0076392889022827,grad_norm: 0.9441462246687143, iteration: 148704
loss: 0.9941608905792236,grad_norm: 0.8666223855375592, iteration: 148705
loss: 0.9721124172210693,grad_norm: 0.895431213753495, iteration: 148706
loss: 1.0010087490081787,grad_norm: 0.9999991335569619, iteration: 148707
loss: 0.9746248722076416,grad_norm: 0.9630659104698832, iteration: 148708
loss: 1.000939130783081,grad_norm: 0.9499206334966048, iteration: 148709
loss: 1.0490695238113403,grad_norm: 0.9558454865023998, iteration: 148710
loss: 0.9960379600524902,grad_norm: 0.9984336242690746, iteration: 148711
loss: 0.9717009663581848,grad_norm: 0.9999991072924543, iteration: 148712
loss: 0.9997801780700684,grad_norm: 0.9999992893438876, iteration: 148713
loss: 1.0827186107635498,grad_norm: 0.9999990598050403, iteration: 148714
loss: 1.0727436542510986,grad_norm: 0.9999997009506197, iteration: 148715
loss: 0.9797216057777405,grad_norm: 0.9799080473988746, iteration: 148716
loss: 0.9759638905525208,grad_norm: 0.9001074927384024, iteration: 148717
loss: 0.9787350296974182,grad_norm: 0.9611997185813995, iteration: 148718
loss: 0.9926980137825012,grad_norm: 0.9999992189756258, iteration: 148719
loss: 0.993086040019989,grad_norm: 0.9999990621958932, iteration: 148720
loss: 1.0765334367752075,grad_norm: 0.9999992317611134, iteration: 148721
loss: 1.0041834115982056,grad_norm: 0.9999990930944935, iteration: 148722
loss: 1.0113120079040527,grad_norm: 0.9553857764486058, iteration: 148723
loss: 0.9897185564041138,grad_norm: 0.9999990640549211, iteration: 148724
loss: 1.0291509628295898,grad_norm: 0.9999992747978689, iteration: 148725
loss: 0.9925981760025024,grad_norm: 0.9999991194903234, iteration: 148726
loss: 0.9868801236152649,grad_norm: 0.9998040833199409, iteration: 148727
loss: 0.97222501039505,grad_norm: 0.999998971917014, iteration: 148728
loss: 1.0376170873641968,grad_norm: 0.9999999698248779, iteration: 148729
loss: 0.9797080755233765,grad_norm: 0.9999992830126865, iteration: 148730
loss: 0.9915881752967834,grad_norm: 0.9232632329508543, iteration: 148731
loss: 0.9822694659233093,grad_norm: 0.8183514886268414, iteration: 148732
loss: 0.9551911354064941,grad_norm: 0.8135281970402335, iteration: 148733
loss: 1.0229825973510742,grad_norm: 0.9999993373213132, iteration: 148734
loss: 0.9777166247367859,grad_norm: 0.9502799170355771, iteration: 148735
loss: 1.0353912115097046,grad_norm: 0.8901113521987698, iteration: 148736
loss: 0.9888412952423096,grad_norm: 0.8544090739255229, iteration: 148737
loss: 1.0259376764297485,grad_norm: 0.9999992018447272, iteration: 148738
loss: 0.9926545023918152,grad_norm: 0.9999990075941773, iteration: 148739
loss: 1.053289532661438,grad_norm: 0.9999994961240006, iteration: 148740
loss: 0.9855373501777649,grad_norm: 0.9999993146824954, iteration: 148741
loss: 1.0002801418304443,grad_norm: 0.9331040802699395, iteration: 148742
loss: 1.065840482711792,grad_norm: 0.9999991411927427, iteration: 148743
loss: 0.9952834844589233,grad_norm: 0.9999991558772433, iteration: 148744
loss: 1.0032333135604858,grad_norm: 0.9999992701328562, iteration: 148745
loss: 0.9766250848770142,grad_norm: 0.9628186067829196, iteration: 148746
loss: 0.9856424331665039,grad_norm: 0.9999990397818777, iteration: 148747
loss: 1.0025761127471924,grad_norm: 0.9534536623014971, iteration: 148748
loss: 1.0417877435684204,grad_norm: 0.9999991155273934, iteration: 148749
loss: 0.9761921763420105,grad_norm: 0.8987525072228036, iteration: 148750
loss: 0.9764949679374695,grad_norm: 0.9509658240712725, iteration: 148751
loss: 0.9983525276184082,grad_norm: 0.9999989365614934, iteration: 148752
loss: 1.0024375915527344,grad_norm: 0.9999989570068033, iteration: 148753
loss: 0.9860453009605408,grad_norm: 0.9999990369762152, iteration: 148754
loss: 1.0390104055404663,grad_norm: 0.9999989959227179, iteration: 148755
loss: 0.9887534379959106,grad_norm: 0.999999110215482, iteration: 148756
loss: 1.0278657674789429,grad_norm: 0.9274704147715114, iteration: 148757
loss: 0.989891529083252,grad_norm: 0.9999991497152645, iteration: 148758
loss: 1.0072219371795654,grad_norm: 0.9984853798208433, iteration: 148759
loss: 0.9500647187232971,grad_norm: 0.9999991914437969, iteration: 148760
loss: 1.0316911935806274,grad_norm: 0.9754216947181311, iteration: 148761
loss: 1.0219738483428955,grad_norm: 0.999999144744334, iteration: 148762
loss: 1.0179131031036377,grad_norm: 0.9999988959943246, iteration: 148763
loss: 1.0063133239746094,grad_norm: 0.9999991339652986, iteration: 148764
loss: 1.0041143894195557,grad_norm: 0.9999991709607811, iteration: 148765
loss: 0.9856842160224915,grad_norm: 0.9999989872508339, iteration: 148766
loss: 1.028541088104248,grad_norm: 0.9999990822701121, iteration: 148767
loss: 0.9803107380867004,grad_norm: 0.9999997312906042, iteration: 148768
loss: 1.021182656288147,grad_norm: 0.9599404191904656, iteration: 148769
loss: 0.9858971238136292,grad_norm: 0.9066149467968294, iteration: 148770
loss: 0.99387526512146,grad_norm: 0.8725045465713027, iteration: 148771
loss: 1.033745288848877,grad_norm: 0.9176165464007747, iteration: 148772
loss: 0.9919333457946777,grad_norm: 0.9999991773993688, iteration: 148773
loss: 0.9736871123313904,grad_norm: 0.9999991890099797, iteration: 148774
loss: 0.9833851456642151,grad_norm: 0.999999147952876, iteration: 148775
loss: 0.984697163105011,grad_norm: 0.9999991212162523, iteration: 148776
loss: 0.9823523163795471,grad_norm: 0.9999992816283897, iteration: 148777
loss: 1.019822597503662,grad_norm: 0.9999990556374342, iteration: 148778
loss: 0.9635850787162781,grad_norm: 0.9085935388714195, iteration: 148779
loss: 0.9721748232841492,grad_norm: 0.9416951743036622, iteration: 148780
loss: 0.9599308967590332,grad_norm: 0.977868927391808, iteration: 148781
loss: 1.0053280591964722,grad_norm: 0.8476125257478309, iteration: 148782
loss: 1.0244700908660889,grad_norm: 0.99999975885671, iteration: 148783
loss: 0.9929039478302002,grad_norm: 0.910060764241761, iteration: 148784
loss: 1.0127372741699219,grad_norm: 0.8783295983917707, iteration: 148785
loss: 1.0013959407806396,grad_norm: 0.8955609362228798, iteration: 148786
loss: 1.0145951509475708,grad_norm: 0.9379719931266749, iteration: 148787
loss: 0.9849045276641846,grad_norm: 0.9999991122642495, iteration: 148788
loss: 0.9960101842880249,grad_norm: 0.9999991162376207, iteration: 148789
loss: 1.0298713445663452,grad_norm: 0.9999991292301015, iteration: 148790
loss: 1.0123263597488403,grad_norm: 0.9027383825391889, iteration: 148791
loss: 0.974966824054718,grad_norm: 0.9999991147541081, iteration: 148792
loss: 1.0362966060638428,grad_norm: 0.999999198584992, iteration: 148793
loss: 0.9935672879219055,grad_norm: 0.9999990910675688, iteration: 148794
loss: 0.9921780228614807,grad_norm: 0.9999991842097615, iteration: 148795
loss: 1.0476737022399902,grad_norm: 0.9999990844346808, iteration: 148796
loss: 1.0755069255828857,grad_norm: 0.9999991113632662, iteration: 148797
loss: 1.0058647394180298,grad_norm: 0.8343882708976171, iteration: 148798
loss: 0.995625913143158,grad_norm: 0.9999989521759578, iteration: 148799
loss: 0.9921513795852661,grad_norm: 0.9999992420073556, iteration: 148800
loss: 1.0428540706634521,grad_norm: 0.9841183715794919, iteration: 148801
loss: 1.0071065425872803,grad_norm: 0.8712282968501018, iteration: 148802
loss: 0.970795214176178,grad_norm: 0.9999991008367303, iteration: 148803
loss: 1.0336872339248657,grad_norm: 0.9936652395780048, iteration: 148804
loss: 0.9772037267684937,grad_norm: 0.9999991932551053, iteration: 148805
loss: 0.9746769666671753,grad_norm: 0.9999990849000342, iteration: 148806
loss: 0.9647922515869141,grad_norm: 0.9980347700022653, iteration: 148807
loss: 0.9616063237190247,grad_norm: 0.9576136179291079, iteration: 148808
loss: 1.0081337690353394,grad_norm: 0.9999990905208825, iteration: 148809
loss: 1.0120761394500732,grad_norm: 0.9530709390456598, iteration: 148810
loss: 0.9707831144332886,grad_norm: 0.9999991080963572, iteration: 148811
loss: 0.9787331223487854,grad_norm: 0.9999989568242206, iteration: 148812
loss: 0.966020405292511,grad_norm: 0.9421349163539585, iteration: 148813
loss: 1.006280779838562,grad_norm: 0.8782113616810466, iteration: 148814
loss: 1.035783290863037,grad_norm: 0.9999989385796865, iteration: 148815
loss: 0.9885626435279846,grad_norm: 0.9999991507943532, iteration: 148816
loss: 1.0487987995147705,grad_norm: 0.9999991232771367, iteration: 148817
loss: 0.9997450113296509,grad_norm: 0.9949333072626992, iteration: 148818
loss: 1.000302791595459,grad_norm: 0.9057224516367222, iteration: 148819
loss: 0.973741352558136,grad_norm: 0.9999991863806234, iteration: 148820
loss: 0.9960834383964539,grad_norm: 0.9999990222868358, iteration: 148821
loss: 1.0220963954925537,grad_norm: 0.8190956227110303, iteration: 148822
loss: 1.0444039106369019,grad_norm: 0.9999989566146252, iteration: 148823
loss: 1.0261058807373047,grad_norm: 0.9013363401200546, iteration: 148824
loss: 1.0212119817733765,grad_norm: 0.9288858463328935, iteration: 148825
loss: 1.004012107849121,grad_norm: 0.9999992230886001, iteration: 148826
loss: 1.0230324268341064,grad_norm: 0.9999991873602461, iteration: 148827
loss: 0.9813016653060913,grad_norm: 0.9406837939418701, iteration: 148828
loss: 1.066236138343811,grad_norm: 0.9999998589196989, iteration: 148829
loss: 1.03982675075531,grad_norm: 0.8931414668709794, iteration: 148830
loss: 0.9848047494888306,grad_norm: 0.9999995580642825, iteration: 148831
loss: 0.9862720370292664,grad_norm: 0.9999991718593197, iteration: 148832
loss: 1.004746437072754,grad_norm: 0.9278137423618115, iteration: 148833
loss: 0.9518814086914062,grad_norm: 0.97444176241518, iteration: 148834
loss: 0.9895633459091187,grad_norm: 0.9999991370700975, iteration: 148835
loss: 0.9932559728622437,grad_norm: 0.9999991438667446, iteration: 148836
loss: 1.050583839416504,grad_norm: 0.9999990862858903, iteration: 148837
loss: 0.9837150573730469,grad_norm: 0.9999990038553036, iteration: 148838
loss: 1.0205893516540527,grad_norm: 0.9999991416611996, iteration: 148839
loss: 0.998562753200531,grad_norm: 0.9001795208671252, iteration: 148840
loss: 0.946342408657074,grad_norm: 0.9999991222751988, iteration: 148841
loss: 0.9713043570518494,grad_norm: 0.9999992183868274, iteration: 148842
loss: 1.0151137113571167,grad_norm: 0.9384905806245794, iteration: 148843
loss: 1.0051019191741943,grad_norm: 0.9611342403015886, iteration: 148844
loss: 1.0295093059539795,grad_norm: 0.8873693045513845, iteration: 148845
loss: 1.0310660600662231,grad_norm: 0.9421966585990172, iteration: 148846
loss: 1.009740948677063,grad_norm: 0.945740878074939, iteration: 148847
loss: 0.9713566303253174,grad_norm: 0.9423203303746954, iteration: 148848
loss: 0.9822468161582947,grad_norm: 0.913852710826948, iteration: 148849
loss: 0.9816365838050842,grad_norm: 0.8786437947623202, iteration: 148850
loss: 1.0061129331588745,grad_norm: 0.9882821651390987, iteration: 148851
loss: 1.020244836807251,grad_norm: 0.9999991193371134, iteration: 148852
loss: 0.9817453026771545,grad_norm: 0.9999990196015001, iteration: 148853
loss: 0.9806389212608337,grad_norm: 0.9878269819736347, iteration: 148854
loss: 1.0006417036056519,grad_norm: 0.9999992091205017, iteration: 148855
loss: 0.9952550530433655,grad_norm: 0.9553829372127136, iteration: 148856
loss: 0.9779530167579651,grad_norm: 0.936486512254057, iteration: 148857
loss: 1.0331224203109741,grad_norm: 0.9999990896781294, iteration: 148858
loss: 0.9814232587814331,grad_norm: 0.9422886889945076, iteration: 148859
loss: 1.0262165069580078,grad_norm: 0.9999990394364807, iteration: 148860
loss: 1.0412085056304932,grad_norm: 0.9999992872509643, iteration: 148861
loss: 1.0137525796890259,grad_norm: 0.9638512648616416, iteration: 148862
loss: 0.9931322336196899,grad_norm: 0.9082071221836682, iteration: 148863
loss: 1.0062955617904663,grad_norm: 0.9472704388712255, iteration: 148864
loss: 0.9979671239852905,grad_norm: 0.9999990810648031, iteration: 148865
loss: 1.001402735710144,grad_norm: 0.9999990794448581, iteration: 148866
loss: 1.0402928590774536,grad_norm: 0.9910895497036073, iteration: 148867
loss: 1.0151379108428955,grad_norm: 0.8716988011502871, iteration: 148868
loss: 1.0719910860061646,grad_norm: 0.9999997382095634, iteration: 148869
loss: 1.001599669456482,grad_norm: 0.9999190750854607, iteration: 148870
loss: 1.013318419456482,grad_norm: 0.8798264071628866, iteration: 148871
loss: 0.9306273460388184,grad_norm: 0.848138645924534, iteration: 148872
loss: 1.0095394849777222,grad_norm: 0.9999990108872048, iteration: 148873
loss: 1.009810209274292,grad_norm: 0.9999991012822, iteration: 148874
loss: 0.9497905373573303,grad_norm: 0.9712195177071185, iteration: 148875
loss: 0.9609683156013489,grad_norm: 0.9597928743981788, iteration: 148876
loss: 1.0051250457763672,grad_norm: 0.9459393678067958, iteration: 148877
loss: 0.9906584620475769,grad_norm: 0.916925673510088, iteration: 148878
loss: 1.0062148571014404,grad_norm: 0.9985949243993447, iteration: 148879
loss: 1.0085735321044922,grad_norm: 0.9879573103766529, iteration: 148880
loss: 1.0102781057357788,grad_norm: 0.9999992023731092, iteration: 148881
loss: 1.0145509243011475,grad_norm: 0.8914657414629854, iteration: 148882
loss: 0.9777973890304565,grad_norm: 0.994405900492903, iteration: 148883
loss: 1.0251851081848145,grad_norm: 0.9248092584047028, iteration: 148884
loss: 0.9839526414871216,grad_norm: 0.9999990149091478, iteration: 148885
loss: 1.0001393556594849,grad_norm: 0.9999990400873351, iteration: 148886
loss: 0.9833361506462097,grad_norm: 0.9999991566278371, iteration: 148887
loss: 1.033090591430664,grad_norm: 0.9797142616561799, iteration: 148888
loss: 0.9883084297180176,grad_norm: 0.9999990762766642, iteration: 148889
loss: 1.005396842956543,grad_norm: 0.99999906959096, iteration: 148890
loss: 0.9982152581214905,grad_norm: 0.9999991237300662, iteration: 148891
loss: 1.0453485250473022,grad_norm: 0.9999991690841611, iteration: 148892
loss: 0.9978603720664978,grad_norm: 0.8500829750611048, iteration: 148893
loss: 0.997257649898529,grad_norm: 0.9982663836794464, iteration: 148894
loss: 1.0287054777145386,grad_norm: 0.9999990855267555, iteration: 148895
loss: 0.9872079491615295,grad_norm: 0.9999990570062883, iteration: 148896
loss: 0.976184606552124,grad_norm: 0.9999990719465305, iteration: 148897
loss: 1.0357359647750854,grad_norm: 0.9999990777035593, iteration: 148898
loss: 1.0169774293899536,grad_norm: 0.9999990231053184, iteration: 148899
loss: 0.970572292804718,grad_norm: 0.9459095458916958, iteration: 148900
loss: 0.9958644509315491,grad_norm: 0.8755073012352929, iteration: 148901
loss: 0.9637799263000488,grad_norm: 0.9999991376946813, iteration: 148902
loss: 1.048315405845642,grad_norm: 0.9999991150322212, iteration: 148903
loss: 1.0105175971984863,grad_norm: 0.8107621841571482, iteration: 148904
loss: 0.9874396324157715,grad_norm: 0.9999991070996876, iteration: 148905
loss: 1.0245916843414307,grad_norm: 0.9999996971187515, iteration: 148906
loss: 0.9944625496864319,grad_norm: 0.9999992458878973, iteration: 148907
loss: 1.009635329246521,grad_norm: 0.9999991092184585, iteration: 148908
loss: 0.9991154074668884,grad_norm: 0.9999991395805117, iteration: 148909
loss: 1.0040075778961182,grad_norm: 0.8558261384046937, iteration: 148910
loss: 1.0254203081130981,grad_norm: 0.999999101748042, iteration: 148911
loss: 1.0102530717849731,grad_norm: 0.8651837108367251, iteration: 148912
loss: 0.9672442078590393,grad_norm: 0.9999992959522906, iteration: 148913
loss: 0.9633544683456421,grad_norm: 0.902673298591897, iteration: 148914
loss: 1.017667293548584,grad_norm: 0.965538020381222, iteration: 148915
loss: 0.9885091781616211,grad_norm: 0.9999989998758606, iteration: 148916
loss: 1.069972276687622,grad_norm: 0.9749120266398318, iteration: 148917
loss: 1.0159533023834229,grad_norm: 0.9749660585402224, iteration: 148918
loss: 1.0413157939910889,grad_norm: 0.9999991441816776, iteration: 148919
loss: 1.0129870176315308,grad_norm: 0.8786418195609227, iteration: 148920
loss: 1.0045527219772339,grad_norm: 0.9999989208866824, iteration: 148921
loss: 1.0129966735839844,grad_norm: 0.9999992669332392, iteration: 148922
loss: 1.0087085962295532,grad_norm: 0.9999991932253305, iteration: 148923
loss: 1.0030360221862793,grad_norm: 0.923165332652385, iteration: 148924
loss: 1.0497699975967407,grad_norm: 0.9999991108397225, iteration: 148925
loss: 0.9807143211364746,grad_norm: 0.8397593125112189, iteration: 148926
loss: 0.9893938899040222,grad_norm: 0.8936760252939204, iteration: 148927
loss: 0.9710697531700134,grad_norm: 0.957128013539119, iteration: 148928
loss: 0.994124710559845,grad_norm: 0.7466918862960015, iteration: 148929
loss: 0.9976798295974731,grad_norm: 0.9834851749453076, iteration: 148930
loss: 1.0028103590011597,grad_norm: 0.9999991797629485, iteration: 148931
loss: 1.0908199548721313,grad_norm: 0.9999990609297952, iteration: 148932
loss: 1.004281759262085,grad_norm: 0.9999991341685895, iteration: 148933
loss: 0.9858983159065247,grad_norm: 0.9014974145343688, iteration: 148934
loss: 0.969944417476654,grad_norm: 0.9140409529239804, iteration: 148935
loss: 1.0352187156677246,grad_norm: 0.943718572283944, iteration: 148936
loss: 1.006009817123413,grad_norm: 0.9999991036926293, iteration: 148937
loss: 0.9993699193000793,grad_norm: 0.9999991538478586, iteration: 148938
loss: 1.039987564086914,grad_norm: 0.9999992536870347, iteration: 148939
loss: 0.9836606383323669,grad_norm: 0.8965864907186185, iteration: 148940
loss: 0.9735621213912964,grad_norm: 0.9314295298209965, iteration: 148941
loss: 0.9919072985649109,grad_norm: 0.9534827550410261, iteration: 148942
loss: 0.9840245246887207,grad_norm: 0.986680594146757, iteration: 148943
loss: 0.9810143113136292,grad_norm: 0.9366670831907693, iteration: 148944
loss: 1.0067486763000488,grad_norm: 0.9326135784368582, iteration: 148945
loss: 1.128372073173523,grad_norm: 0.9999999281198831, iteration: 148946
loss: 0.9537482857704163,grad_norm: 0.9662043872826357, iteration: 148947
loss: 1.0115619897842407,grad_norm: 0.8318164779264088, iteration: 148948
loss: 0.9877845048904419,grad_norm: 0.9129261528215248, iteration: 148949
loss: 0.9837659001350403,grad_norm: 0.9999990727304284, iteration: 148950
loss: 0.9897823333740234,grad_norm: 0.957281408676516, iteration: 148951
loss: 0.987817108631134,grad_norm: 0.9999991690220122, iteration: 148952
loss: 1.015467643737793,grad_norm: 0.9999996734035183, iteration: 148953
loss: 1.0622016191482544,grad_norm: 0.9999996454548153, iteration: 148954
loss: 1.017088770866394,grad_norm: 0.8432515026176505, iteration: 148955
loss: 0.9749807715415955,grad_norm: 0.8651563942084761, iteration: 148956
loss: 0.9632200002670288,grad_norm: 0.9943812942141597, iteration: 148957
loss: 1.007447600364685,grad_norm: 0.9999990026924126, iteration: 148958
loss: 1.0155905485153198,grad_norm: 0.857966901266767, iteration: 148959
loss: 0.9733419418334961,grad_norm: 0.9930040770776949, iteration: 148960
loss: 0.9706275463104248,grad_norm: 0.9107603329015312, iteration: 148961
loss: 1.0025088787078857,grad_norm: 0.8713631332577662, iteration: 148962
loss: 1.129833459854126,grad_norm: 0.999999289899742, iteration: 148963
loss: 1.0422972440719604,grad_norm: 0.9464182920363723, iteration: 148964
loss: 1.0818123817443848,grad_norm: 0.9999995299863489, iteration: 148965
loss: 0.9483340978622437,grad_norm: 0.9999990477411956, iteration: 148966
loss: 0.9912702441215515,grad_norm: 0.8774456414766555, iteration: 148967
loss: 0.976889967918396,grad_norm: 0.9371203620773066, iteration: 148968
loss: 0.9976668357849121,grad_norm: 0.9120668431725184, iteration: 148969
loss: 1.028928279876709,grad_norm: 0.9999989506807435, iteration: 148970
loss: 0.9942089319229126,grad_norm: 0.9999990635759711, iteration: 148971
loss: 0.9953933358192444,grad_norm: 0.9999991449789363, iteration: 148972
loss: 1.002181887626648,grad_norm: 0.9999990404709518, iteration: 148973
loss: 0.9826037287712097,grad_norm: 0.9999991748487408, iteration: 148974
loss: 0.9979069828987122,grad_norm: 0.9999996292885452, iteration: 148975
loss: 1.0321122407913208,grad_norm: 0.99999937092839, iteration: 148976
loss: 1.0088318586349487,grad_norm: 0.9999992070329042, iteration: 148977
loss: 0.9682502746582031,grad_norm: 0.9999990317867942, iteration: 148978
loss: 0.9819473624229431,grad_norm: 0.9158327082851645, iteration: 148979
loss: 0.9944313168525696,grad_norm: 0.9628597188319007, iteration: 148980
loss: 1.0515642166137695,grad_norm: 0.9999992363878362, iteration: 148981
loss: 0.9988515973091125,grad_norm: 0.9999992801446781, iteration: 148982
loss: 0.9899733066558838,grad_norm: 0.9999991514401987, iteration: 148983
loss: 0.9847394227981567,grad_norm: 0.9999991295277859, iteration: 148984
loss: 0.980569064617157,grad_norm: 0.8249434710950797, iteration: 148985
loss: 1.0187687873840332,grad_norm: 0.9999991316806505, iteration: 148986
loss: 1.015708565711975,grad_norm: 0.9999991931363789, iteration: 148987
loss: 0.9831667542457581,grad_norm: 0.9999992812970809, iteration: 148988
loss: 0.9942181706428528,grad_norm: 0.9999990473865253, iteration: 148989
loss: 0.9618722796440125,grad_norm: 0.9563551105591445, iteration: 148990
loss: 0.9625452756881714,grad_norm: 0.9999992292563196, iteration: 148991
loss: 1.0218614339828491,grad_norm: 0.9999991323066441, iteration: 148992
loss: 0.9965909123420715,grad_norm: 0.999999327488638, iteration: 148993
loss: 1.003633975982666,grad_norm: 0.9999989574936178, iteration: 148994
loss: 1.024979829788208,grad_norm: 0.9999992075648593, iteration: 148995
loss: 0.9708060026168823,grad_norm: 0.9401955672596574, iteration: 148996
loss: 1.0074738264083862,grad_norm: 0.7497638042961922, iteration: 148997
loss: 1.0189043283462524,grad_norm: 0.9999991786090356, iteration: 148998
loss: 0.9888208508491516,grad_norm: 0.9999989751672094, iteration: 148999
loss: 0.9540082812309265,grad_norm: 0.9999990451320112, iteration: 149000
loss: 0.9922558069229126,grad_norm: 0.9509059393029592, iteration: 149001
loss: 1.0113335847854614,grad_norm: 0.9999992095969035, iteration: 149002
loss: 1.025102138519287,grad_norm: 0.9406209968098032, iteration: 149003
loss: 1.0119974613189697,grad_norm: 0.9845200665136399, iteration: 149004
loss: 0.9972174763679504,grad_norm: 0.9150161489327674, iteration: 149005
loss: 0.9891883134841919,grad_norm: 0.9325045725259299, iteration: 149006
loss: 1.0139507055282593,grad_norm: 0.9893742759293994, iteration: 149007
loss: 1.0179190635681152,grad_norm: 0.9387658748443255, iteration: 149008
loss: 0.96635502576828,grad_norm: 0.9033617752499427, iteration: 149009
loss: 1.0105836391448975,grad_norm: 0.9227957013397055, iteration: 149010
loss: 1.0003087520599365,grad_norm: 0.9999991758770669, iteration: 149011
loss: 1.0308330059051514,grad_norm: 1.0000000040810355, iteration: 149012
loss: 1.0095651149749756,grad_norm: 0.9999991575552548, iteration: 149013
loss: 0.9878823757171631,grad_norm: 0.9999991211179151, iteration: 149014
loss: 1.0592637062072754,grad_norm: 0.9999991154393386, iteration: 149015
loss: 0.9846017360687256,grad_norm: 0.9430676559199861, iteration: 149016
loss: 1.0222744941711426,grad_norm: 0.9999990496125942, iteration: 149017
loss: 1.0073798894882202,grad_norm: 0.8678371758709361, iteration: 149018
loss: 0.9833456873893738,grad_norm: 0.9828811382199512, iteration: 149019
loss: 1.0079312324523926,grad_norm: 0.9999991240033448, iteration: 149020
loss: 1.0079680681228638,grad_norm: 0.9999992322524213, iteration: 149021
loss: 1.0015429258346558,grad_norm: 0.8615389900922215, iteration: 149022
loss: 0.9872028231620789,grad_norm: 0.9999995361972621, iteration: 149023
loss: 1.0003254413604736,grad_norm: 0.9999993446272329, iteration: 149024
loss: 1.0174425840377808,grad_norm: 0.9277110069225447, iteration: 149025
loss: 0.9438285231590271,grad_norm: 0.9999992355465697, iteration: 149026
loss: 0.971253514289856,grad_norm: 0.8253033675161833, iteration: 149027
loss: 0.9825884103775024,grad_norm: 0.9999990107742652, iteration: 149028
loss: 1.0180914402008057,grad_norm: 0.9999991579627067, iteration: 149029
loss: 1.0106604099273682,grad_norm: 0.9999992592614599, iteration: 149030
loss: 1.0074986219406128,grad_norm: 0.9999991251964735, iteration: 149031
loss: 1.0317105054855347,grad_norm: 0.9999990145302822, iteration: 149032
loss: 0.9781041145324707,grad_norm: 0.999999176512335, iteration: 149033
loss: 0.9861239790916443,grad_norm: 0.9999991140969877, iteration: 149034
loss: 1.0354442596435547,grad_norm: 0.9999990454162596, iteration: 149035
loss: 1.0259509086608887,grad_norm: 0.9947079469457163, iteration: 149036
loss: 1.0225489139556885,grad_norm: 0.9999990706873202, iteration: 149037
loss: 0.970037043094635,grad_norm: 0.9999990882406437, iteration: 149038
loss: 1.0335044860839844,grad_norm: 0.901389483498373, iteration: 149039
loss: 0.9737340807914734,grad_norm: 0.9999992272031255, iteration: 149040
loss: 1.0310909748077393,grad_norm: 0.9999998125759958, iteration: 149041
loss: 0.9418139457702637,grad_norm: 0.9999991971163895, iteration: 149042
loss: 1.0030065774917603,grad_norm: 0.9999991693419449, iteration: 149043
loss: 1.065556287765503,grad_norm: 0.9999999142559374, iteration: 149044
loss: 1.005892276763916,grad_norm: 0.9640304777690202, iteration: 149045
loss: 1.073840856552124,grad_norm: 0.9999991587965702, iteration: 149046
loss: 0.9944933652877808,grad_norm: 0.9999992205112823, iteration: 149047
loss: 1.0017173290252686,grad_norm: 0.9601282161806411, iteration: 149048
loss: 0.9849564433097839,grad_norm: 0.9989451552904136, iteration: 149049
loss: 0.9892641305923462,grad_norm: 0.925087772409339, iteration: 149050
loss: 1.0998868942260742,grad_norm: 0.9999990638508524, iteration: 149051
loss: 1.009876012802124,grad_norm: 0.9999991145080062, iteration: 149052
loss: 1.0017406940460205,grad_norm: 0.9999993227880246, iteration: 149053
loss: 1.0014269351959229,grad_norm: 0.8891045596029302, iteration: 149054
loss: 1.014622688293457,grad_norm: 0.9999991686595192, iteration: 149055
loss: 0.9915582537651062,grad_norm: 0.9999996146841383, iteration: 149056
loss: 0.997992992401123,grad_norm: 0.8525462092994396, iteration: 149057
loss: 1.01634681224823,grad_norm: 0.9104578888720999, iteration: 149058
loss: 0.9916078448295593,grad_norm: 0.9523485390857706, iteration: 149059
loss: 1.009926438331604,grad_norm: 0.9592564805647256, iteration: 149060
loss: 0.9939686059951782,grad_norm: 0.9574412852578796, iteration: 149061
loss: 0.9747747182846069,grad_norm: 0.9679128358070258, iteration: 149062
loss: 1.0212615728378296,grad_norm: 0.9805555308912278, iteration: 149063
loss: 0.9855014085769653,grad_norm: 0.8306388606118043, iteration: 149064
loss: 0.9872681498527527,grad_norm: 0.9999990706164478, iteration: 149065
loss: 0.9894969463348389,grad_norm: 0.9999990573612527, iteration: 149066
loss: 1.0200929641723633,grad_norm: 0.9999990470946133, iteration: 149067
loss: 1.0283410549163818,grad_norm: 0.9999993388226973, iteration: 149068
loss: 1.0243455171585083,grad_norm: 0.9999992282921393, iteration: 149069
loss: 1.0194624662399292,grad_norm: 0.9999995455731842, iteration: 149070
loss: 0.945953369140625,grad_norm: 0.9999995968888176, iteration: 149071
loss: 1.0284315347671509,grad_norm: 0.9999990477758669, iteration: 149072
loss: 1.0130013227462769,grad_norm: 0.9840228351474848, iteration: 149073
loss: 0.9878897070884705,grad_norm: 0.8828240748563193, iteration: 149074
loss: 1.0330356359481812,grad_norm: 0.9999990412708767, iteration: 149075
loss: 0.9505631923675537,grad_norm: 0.9999991421608957, iteration: 149076
loss: 1.0153063535690308,grad_norm: 0.9999991658940044, iteration: 149077
loss: 0.9943696856498718,grad_norm: 0.9842807706343312, iteration: 149078
loss: 0.9907082319259644,grad_norm: 0.9257406112548383, iteration: 149079
loss: 0.9910756349563599,grad_norm: 0.9798150061227759, iteration: 149080
loss: 0.9788857102394104,grad_norm: 0.9999991576639181, iteration: 149081
loss: 0.9917893409729004,grad_norm: 0.9999992033169866, iteration: 149082
loss: 1.013238787651062,grad_norm: 0.9999993594305057, iteration: 149083
loss: 1.0028468370437622,grad_norm: 0.9625112872775518, iteration: 149084
loss: 1.0301661491394043,grad_norm: 0.8086762830265108, iteration: 149085
loss: 1.0258204936981201,grad_norm: 0.9160278703477596, iteration: 149086
loss: 1.0234923362731934,grad_norm: 0.9474391552144527, iteration: 149087
loss: 1.0448620319366455,grad_norm: 0.9999991767853642, iteration: 149088
loss: 0.9857926368713379,grad_norm: 0.9999991260215289, iteration: 149089
loss: 1.0603218078613281,grad_norm: 0.8997506256363559, iteration: 149090
loss: 0.9921979308128357,grad_norm: 0.9999991244278813, iteration: 149091
loss: 1.015518307685852,grad_norm: 0.9711507043151603, iteration: 149092
loss: 1.0063633918762207,grad_norm: 0.9735780365158532, iteration: 149093
loss: 0.9820507764816284,grad_norm: 0.856387251917164, iteration: 149094
loss: 1.006381630897522,grad_norm: 0.9999991069254537, iteration: 149095
loss: 1.0053982734680176,grad_norm: 0.9999992463819586, iteration: 149096
loss: 0.9797421097755432,grad_norm: 0.9999989529263572, iteration: 149097
loss: 0.9932760000228882,grad_norm: 0.9999991047151437, iteration: 149098
loss: 0.9710325598716736,grad_norm: 0.9999992446604676, iteration: 149099
loss: 0.9656845331192017,grad_norm: 0.9234698074417298, iteration: 149100
loss: 0.9737955927848816,grad_norm: 0.9999991795738163, iteration: 149101
loss: 1.052133321762085,grad_norm: 0.9999992432257684, iteration: 149102
loss: 1.017275094985962,grad_norm: 0.9999990068388166, iteration: 149103
loss: 0.9765996932983398,grad_norm: 0.9646679198981134, iteration: 149104
loss: 1.0109035968780518,grad_norm: 0.8800842119031838, iteration: 149105
loss: 0.99381023645401,grad_norm: 0.9995110671889544, iteration: 149106
loss: 1.0053472518920898,grad_norm: 0.9011445687967202, iteration: 149107
loss: 1.0392701625823975,grad_norm: 0.999998961916697, iteration: 149108
loss: 0.9857396483421326,grad_norm: 0.9999996997939472, iteration: 149109
loss: 1.003273606300354,grad_norm: 0.9999991860907517, iteration: 149110
loss: 1.0013242959976196,grad_norm: 0.9290269520032823, iteration: 149111
loss: 0.9771869778633118,grad_norm: 0.9999990624532717, iteration: 149112
loss: 0.9974570870399475,grad_norm: 0.9093681928370262, iteration: 149113
loss: 0.9939296245574951,grad_norm: 0.9077970505790149, iteration: 149114
loss: 0.9556831121444702,grad_norm: 0.999999104364893, iteration: 149115
loss: 0.9980189204216003,grad_norm: 0.9141097601036523, iteration: 149116
loss: 0.9816677570343018,grad_norm: 0.9999991986051707, iteration: 149117
loss: 0.9664899110794067,grad_norm: 0.9999992163168683, iteration: 149118
loss: 0.966765284538269,grad_norm: 0.9543838852231329, iteration: 149119
loss: 0.9921014308929443,grad_norm: 0.899985442034255, iteration: 149120
loss: 0.984859824180603,grad_norm: 0.9999990986986589, iteration: 149121
loss: 0.9905508160591125,grad_norm: 0.9999990979231478, iteration: 149122
loss: 1.0288686752319336,grad_norm: 0.9999990286622816, iteration: 149123
loss: 1.0039279460906982,grad_norm: 0.9999988919799468, iteration: 149124
loss: 0.9937296509742737,grad_norm: 0.9999991264531483, iteration: 149125
loss: 1.0200501680374146,grad_norm: 0.9999991585176368, iteration: 149126
loss: 1.0216341018676758,grad_norm: 0.9542000209895918, iteration: 149127
loss: 0.9886119961738586,grad_norm: 0.9999992205912364, iteration: 149128
loss: 1.0679759979248047,grad_norm: 0.999999042488086, iteration: 149129
loss: 1.0195131301879883,grad_norm: 0.9999988714649359, iteration: 149130
loss: 1.0055800676345825,grad_norm: 0.9999991729189166, iteration: 149131
loss: 1.0217549800872803,grad_norm: 0.9999991992402604, iteration: 149132
loss: 0.9889855980873108,grad_norm: 0.8832323455071156, iteration: 149133
loss: 1.0149413347244263,grad_norm: 0.9999990175555807, iteration: 149134
loss: 1.0488731861114502,grad_norm: 0.9999992028554078, iteration: 149135
loss: 1.034719467163086,grad_norm: 0.9999993360517215, iteration: 149136
loss: 1.0475518703460693,grad_norm: 0.9999997364383566, iteration: 149137
loss: 0.99819016456604,grad_norm: 0.8984003622152625, iteration: 149138
loss: 1.0269229412078857,grad_norm: 0.9999990609323174, iteration: 149139
loss: 1.0305958986282349,grad_norm: 0.9999991717226734, iteration: 149140
loss: 0.9851311445236206,grad_norm: 0.999999159657623, iteration: 149141
loss: 0.9978550672531128,grad_norm: 0.9999990031523602, iteration: 149142
loss: 1.0380353927612305,grad_norm: 0.9999992756601077, iteration: 149143
loss: 1.0111207962036133,grad_norm: 0.9999990598859477, iteration: 149144
loss: 1.1019703149795532,grad_norm: 0.9999999602882799, iteration: 149145
loss: 1.000003695487976,grad_norm: 0.9999992253126021, iteration: 149146
loss: 1.0084811449050903,grad_norm: 0.9999991763471949, iteration: 149147
loss: 1.0243396759033203,grad_norm: 0.9587261231600414, iteration: 149148
loss: 0.9956429600715637,grad_norm: 0.9999991869798049, iteration: 149149
loss: 1.000106692314148,grad_norm: 0.9999992178299403, iteration: 149150
loss: 0.9868778586387634,grad_norm: 0.9999991999050687, iteration: 149151
loss: 1.0214779376983643,grad_norm: 0.9999990118851967, iteration: 149152
loss: 1.0028554201126099,grad_norm: 0.9999990488137808, iteration: 149153
loss: 1.0034528970718384,grad_norm: 0.9999989971016519, iteration: 149154
loss: 0.9833715558052063,grad_norm: 0.8811905715088522, iteration: 149155
loss: 0.9831491112709045,grad_norm: 0.9999997816657609, iteration: 149156
loss: 0.993980884552002,grad_norm: 0.9999990372193008, iteration: 149157
loss: 0.9906549453735352,grad_norm: 0.9999989721731396, iteration: 149158
loss: 0.9872523546218872,grad_norm: 0.9999990957317827, iteration: 149159
loss: 0.9826704263687134,grad_norm: 0.9999992211013187, iteration: 149160
loss: 1.0499236583709717,grad_norm: 0.9259478483692243, iteration: 149161
loss: 1.0132678747177124,grad_norm: 0.9999990818985266, iteration: 149162
loss: 1.030928373336792,grad_norm: 0.8388286975093956, iteration: 149163
loss: 0.983945369720459,grad_norm: 0.9999989789460861, iteration: 149164
loss: 0.9988883137702942,grad_norm: 0.9999992571583101, iteration: 149165
loss: 1.008590579032898,grad_norm: 0.9848954885629922, iteration: 149166
loss: 1.0217554569244385,grad_norm: 0.946414422456289, iteration: 149167
loss: 1.0207085609436035,grad_norm: 0.9999991233701095, iteration: 149168
loss: 0.9962379336357117,grad_norm: 0.9999991812586916, iteration: 149169
loss: 0.9733832478523254,grad_norm: 0.9999991192132254, iteration: 149170
loss: 0.9809255599975586,grad_norm: 0.9999990856930606, iteration: 149171
loss: 0.9758869409561157,grad_norm: 0.943725373086433, iteration: 149172
loss: 1.0153346061706543,grad_norm: 0.9999991517827799, iteration: 149173
loss: 0.9615277051925659,grad_norm: 0.8516675103510316, iteration: 149174
loss: 1.0206823348999023,grad_norm: 0.9139719894052953, iteration: 149175
loss: 0.9951010346412659,grad_norm: 0.999999197883065, iteration: 149176
loss: 1.0074901580810547,grad_norm: 0.9999992634369653, iteration: 149177
loss: 1.014175534248352,grad_norm: 0.9245828883867048, iteration: 149178
loss: 0.9808216094970703,grad_norm: 0.9999991716681386, iteration: 149179
loss: 1.0162826776504517,grad_norm: 0.999999306139362, iteration: 149180
loss: 1.0140035152435303,grad_norm: 0.9359030454368681, iteration: 149181
loss: 0.9927684664726257,grad_norm: 0.8099664222739619, iteration: 149182
loss: 0.992158055305481,grad_norm: 0.9999990500166608, iteration: 149183
loss: 1.0653151273727417,grad_norm: 0.999999084554091, iteration: 149184
loss: 1.0250135660171509,grad_norm: 0.99999909381986, iteration: 149185
loss: 0.9859736561775208,grad_norm: 0.9999992937245912, iteration: 149186
loss: 1.056463599205017,grad_norm: 0.9999995176772859, iteration: 149187
loss: 1.0393002033233643,grad_norm: 0.9999992946528138, iteration: 149188
loss: 0.9913948774337769,grad_norm: 0.9999990236982055, iteration: 149189
loss: 0.995550274848938,grad_norm: 0.9999996430936078, iteration: 149190
loss: 0.976091206073761,grad_norm: 0.9289395951170113, iteration: 149191
loss: 1.0179232358932495,grad_norm: 0.9999990404120815, iteration: 149192
loss: 0.9692175984382629,grad_norm: 0.9904147683821013, iteration: 149193
loss: 1.0317243337631226,grad_norm: 0.8698640768029261, iteration: 149194
loss: 0.9936286211013794,grad_norm: 0.9319037542140262, iteration: 149195
loss: 1.0159863233566284,grad_norm: 0.8587657341522683, iteration: 149196
loss: 1.0175158977508545,grad_norm: 0.9999990963206877, iteration: 149197
loss: 0.9933875799179077,grad_norm: 0.9999991781424702, iteration: 149198
loss: 0.99570232629776,grad_norm: 0.9999993752470135, iteration: 149199
loss: 0.9665205478668213,grad_norm: 0.9999991649417532, iteration: 149200
loss: 1.011017918586731,grad_norm: 0.9433284554413427, iteration: 149201
loss: 0.9693190455436707,grad_norm: 0.9999991621668105, iteration: 149202
loss: 1.0213536024093628,grad_norm: 0.8765599271445453, iteration: 149203
loss: 0.9733084440231323,grad_norm: 0.7882611569583216, iteration: 149204
loss: 0.9604918360710144,grad_norm: 0.9733836882840364, iteration: 149205
loss: 1.0007277727127075,grad_norm: 0.9999992362168559, iteration: 149206
loss: 1.0309118032455444,grad_norm: 0.9999990667321443, iteration: 149207
loss: 1.002354621887207,grad_norm: 0.9471607300122599, iteration: 149208
loss: 0.9974656105041504,grad_norm: 0.9903503523858171, iteration: 149209
loss: 0.9934805035591125,grad_norm: 0.9999991489193699, iteration: 149210
loss: 1.0025320053100586,grad_norm: 0.8911798717995969, iteration: 149211
loss: 1.0003923177719116,grad_norm: 0.9080595289107252, iteration: 149212
loss: 0.9920287728309631,grad_norm: 0.935866697597913, iteration: 149213
loss: 1.02194082736969,grad_norm: 0.999999109701679, iteration: 149214
loss: 0.9913570880889893,grad_norm: 0.9231890714827983, iteration: 149215
loss: 1.0283164978027344,grad_norm: 0.9999993949478462, iteration: 149216
loss: 1.0198793411254883,grad_norm: 0.9999991141993619, iteration: 149217
loss: 1.0261985063552856,grad_norm: 0.999999696260445, iteration: 149218
loss: 0.9764685034751892,grad_norm: 0.9798693232482267, iteration: 149219
loss: 0.9802863597869873,grad_norm: 0.9552462285111855, iteration: 149220
loss: 1.0290160179138184,grad_norm: 0.9999991577231385, iteration: 149221
loss: 1.0281935930252075,grad_norm: 0.9999990973030688, iteration: 149222
loss: 1.020066499710083,grad_norm: 0.9299785418118955, iteration: 149223
loss: 1.0085300207138062,grad_norm: 0.9432041801963905, iteration: 149224
loss: 1.0005303621292114,grad_norm: 0.9830996806659534, iteration: 149225
loss: 1.0210716724395752,grad_norm: 0.999999188439017, iteration: 149226
loss: 0.9863472580909729,grad_norm: 0.9369016932096672, iteration: 149227
loss: 1.041303038597107,grad_norm: 0.9999991809816122, iteration: 149228
loss: 0.9835596680641174,grad_norm: 0.9999991344200954, iteration: 149229
loss: 0.9865040183067322,grad_norm: 0.9571334360744429, iteration: 149230
loss: 0.9744470119476318,grad_norm: 0.9999990546930749, iteration: 149231
loss: 1.0234932899475098,grad_norm: 0.8919469876240056, iteration: 149232
loss: 1.0268698930740356,grad_norm: 0.9999991602091849, iteration: 149233
loss: 1.0370455980300903,grad_norm: 0.8671671368035347, iteration: 149234
loss: 0.9728700518608093,grad_norm: 0.999999266805988, iteration: 149235
loss: 1.0098680257797241,grad_norm: 0.9675730346280342, iteration: 149236
loss: 1.0231220722198486,grad_norm: 0.9974257795282595, iteration: 149237
loss: 0.9858954548835754,grad_norm: 0.9999990718221252, iteration: 149238
loss: 0.9787931442260742,grad_norm: 0.9829844773595292, iteration: 149239
loss: 1.02932608127594,grad_norm: 0.9623507331053202, iteration: 149240
loss: 0.968488335609436,grad_norm: 0.8270194428013258, iteration: 149241
loss: 0.9953632354736328,grad_norm: 0.9029211865329275, iteration: 149242
loss: 1.025874376296997,grad_norm: 0.9147422127851713, iteration: 149243
loss: 1.0223578214645386,grad_norm: 0.9999990896282406, iteration: 149244
loss: 1.0311896800994873,grad_norm: 0.9999991879314496, iteration: 149245
loss: 1.021659255027771,grad_norm: 0.9898037291484282, iteration: 149246
loss: 1.025027871131897,grad_norm: 0.7496534864654673, iteration: 149247
loss: 0.9887917637825012,grad_norm: 0.938203685334405, iteration: 149248
loss: 1.0016597509384155,grad_norm: 0.9999990141635214, iteration: 149249
loss: 0.9817070960998535,grad_norm: 0.8286696273359674, iteration: 149250
loss: 0.9886656403541565,grad_norm: 0.9789793818302688, iteration: 149251
loss: 0.9702185988426208,grad_norm: 0.9999992090423667, iteration: 149252
loss: 0.9941692352294922,grad_norm: 0.9934690608683369, iteration: 149253
loss: 1.0269614458084106,grad_norm: 0.9999990768147716, iteration: 149254
loss: 1.0324997901916504,grad_norm: 0.9970632154921417, iteration: 149255
loss: 0.9721580147743225,grad_norm: 0.9423208004524097, iteration: 149256
loss: 1.0259522199630737,grad_norm: 0.9999990491573304, iteration: 149257
loss: 0.9872170090675354,grad_norm: 0.9999989226199795, iteration: 149258
loss: 0.9821606278419495,grad_norm: 0.8500705213257073, iteration: 149259
loss: 0.998249351978302,grad_norm: 0.9719186688516948, iteration: 149260
loss: 1.0372958183288574,grad_norm: 0.9703373072320286, iteration: 149261
loss: 1.0629382133483887,grad_norm: 0.9999990947349358, iteration: 149262
loss: 0.9682778120040894,grad_norm: 0.9999990129136167, iteration: 149263
loss: 1.0249171257019043,grad_norm: 0.9150636250482421, iteration: 149264
loss: 0.9975451827049255,grad_norm: 0.9772514050288209, iteration: 149265
loss: 0.9898726940155029,grad_norm: 0.9761301553898931, iteration: 149266
loss: 1.0071824789047241,grad_norm: 0.8599668080427818, iteration: 149267
loss: 0.9702576994895935,grad_norm: 0.8798462746934974, iteration: 149268
loss: 0.9505928158760071,grad_norm: 0.9999991376729129, iteration: 149269
loss: 0.9913932681083679,grad_norm: 0.9999992705633528, iteration: 149270
loss: 1.0822272300720215,grad_norm: 0.9999999429528612, iteration: 149271
loss: 0.9806588888168335,grad_norm: 0.9166180507793449, iteration: 149272
loss: 1.052749514579773,grad_norm: 0.9760831068800392, iteration: 149273
loss: 1.02205228805542,grad_norm: 0.8754686653238801, iteration: 149274
loss: 0.9958263039588928,grad_norm: 0.8509214339389356, iteration: 149275
loss: 0.9610654711723328,grad_norm: 0.9635084963460333, iteration: 149276
loss: 1.0101293325424194,grad_norm: 0.9279842795749847, iteration: 149277
loss: 1.0080008506774902,grad_norm: 0.9999990305558621, iteration: 149278
loss: 1.0161166191101074,grad_norm: 0.9589241297551961, iteration: 149279
loss: 1.0369367599487305,grad_norm: 0.9557297641075253, iteration: 149280
loss: 0.9735706448554993,grad_norm: 0.999999119811682, iteration: 149281
loss: 1.0477122068405151,grad_norm: 0.9999991055425476, iteration: 149282
loss: 0.9810939431190491,grad_norm: 0.9599549846436316, iteration: 149283
loss: 0.9911127686500549,grad_norm: 0.9999996040998771, iteration: 149284
loss: 1.0071935653686523,grad_norm: 0.9999991193362289, iteration: 149285
loss: 0.9945826530456543,grad_norm: 0.9999990390382965, iteration: 149286
loss: 0.9749383926391602,grad_norm: 0.9999991996076294, iteration: 149287
loss: 0.9743979573249817,grad_norm: 0.8206131744883227, iteration: 149288
loss: 0.9698622226715088,grad_norm: 0.9999993196263476, iteration: 149289
loss: 1.0006314516067505,grad_norm: 0.8065325270656678, iteration: 149290
loss: 1.0092483758926392,grad_norm: 0.947349597140416, iteration: 149291
loss: 0.9489499926567078,grad_norm: 0.999998995026859, iteration: 149292
loss: 1.0386449098587036,grad_norm: 0.9404214459566581, iteration: 149293
loss: 0.9995251297950745,grad_norm: 0.9999990721097837, iteration: 149294
loss: 0.9879359006881714,grad_norm: 0.863078099394989, iteration: 149295
loss: 1.003577709197998,grad_norm: 0.99999907321454, iteration: 149296
loss: 0.97882080078125,grad_norm: 0.9178228974975622, iteration: 149297
loss: 1.0028966665267944,grad_norm: 0.9573329500205722, iteration: 149298
loss: 0.9783296585083008,grad_norm: 0.8902419837573119, iteration: 149299
loss: 0.9877983927726746,grad_norm: 0.8095241042565567, iteration: 149300
loss: 0.9857466816902161,grad_norm: 0.9999990758758506, iteration: 149301
loss: 1.0071418285369873,grad_norm: 0.9004125442822108, iteration: 149302
loss: 0.9982357621192932,grad_norm: 0.9999991774408146, iteration: 149303
loss: 0.989723265171051,grad_norm: 0.9409875835090055, iteration: 149304
loss: 1.0134778022766113,grad_norm: 0.8482687332047801, iteration: 149305
loss: 0.9854190945625305,grad_norm: 0.9999990221710351, iteration: 149306
loss: 0.9747895002365112,grad_norm: 0.9221370737448586, iteration: 149307
loss: 0.9875143766403198,grad_norm: 0.8195611297489119, iteration: 149308
loss: 0.973077654838562,grad_norm: 0.9884613638626041, iteration: 149309
loss: 1.0087743997573853,grad_norm: 0.9568968137723086, iteration: 149310
loss: 1.0022364854812622,grad_norm: 0.9999991279744261, iteration: 149311
loss: 0.9856224656105042,grad_norm: 0.9873030153674477, iteration: 149312
loss: 1.0197676420211792,grad_norm: 0.9999998209513513, iteration: 149313
loss: 1.000372290611267,grad_norm: 0.908195297743817, iteration: 149314
loss: 1.0192041397094727,grad_norm: 0.9999992187961483, iteration: 149315
loss: 0.9939470887184143,grad_norm: 0.9817947961816684, iteration: 149316
loss: 0.9677186608314514,grad_norm: 0.9999991358732139, iteration: 149317
loss: 1.0264812707901,grad_norm: 0.9999988979620924, iteration: 149318
loss: 1.024906873703003,grad_norm: 0.9999990868724967, iteration: 149319
loss: 0.9729644656181335,grad_norm: 0.9999991615229445, iteration: 149320
loss: 0.9840623140335083,grad_norm: 0.9999991205242118, iteration: 149321
loss: 0.970471203327179,grad_norm: 0.8581034570450039, iteration: 149322
loss: 1.0086696147918701,grad_norm: 0.9999990342181692, iteration: 149323
loss: 0.9327185750007629,grad_norm: 0.9202908215537317, iteration: 149324
loss: 0.9573193788528442,grad_norm: 0.9999992065764255, iteration: 149325
loss: 1.0230083465576172,grad_norm: 0.9912001235495924, iteration: 149326
loss: 1.0087306499481201,grad_norm: 0.9999990354666488, iteration: 149327
loss: 0.9798064231872559,grad_norm: 0.9999991269832103, iteration: 149328
loss: 1.0055299997329712,grad_norm: 0.9999989794977132, iteration: 149329
loss: 1.0301433801651,grad_norm: 0.999999189483301, iteration: 149330
loss: 1.0012271404266357,grad_norm: 0.9201623055274908, iteration: 149331
loss: 1.0224391222000122,grad_norm: 0.9999991315330488, iteration: 149332
loss: 1.018607497215271,grad_norm: 0.9999990819529979, iteration: 149333
loss: 1.0062114000320435,grad_norm: 0.8363781382911426, iteration: 149334
loss: 0.9976189136505127,grad_norm: 0.9999993209165221, iteration: 149335
loss: 1.0116443634033203,grad_norm: 0.9178433953515946, iteration: 149336
loss: 0.9916759729385376,grad_norm: 0.9999991946529428, iteration: 149337
loss: 0.9981971979141235,grad_norm: 0.9504329465806617, iteration: 149338
loss: 0.9964857697486877,grad_norm: 0.999999299159406, iteration: 149339
loss: 0.981425940990448,grad_norm: 0.8235763745988348, iteration: 149340
loss: 0.9986594915390015,grad_norm: 0.9999991288997887, iteration: 149341
loss: 1.0451319217681885,grad_norm: 0.974399940411486, iteration: 149342
loss: 0.9832697510719299,grad_norm: 0.9078223205790176, iteration: 149343
loss: 1.0356087684631348,grad_norm: 0.9999998678118199, iteration: 149344
loss: 1.0045430660247803,grad_norm: 0.9136473518046755, iteration: 149345
loss: 1.038705825805664,grad_norm: 0.9999996071185838, iteration: 149346
loss: 1.0065476894378662,grad_norm: 0.9999990484763354, iteration: 149347
loss: 1.0238252878189087,grad_norm: 0.9999990898074066, iteration: 149348
loss: 0.9638078808784485,grad_norm: 0.9725940675118069, iteration: 149349
loss: 1.0306451320648193,grad_norm: 0.999999018495568, iteration: 149350
loss: 1.008674144744873,grad_norm: 0.9512679026876324, iteration: 149351
loss: 0.9576478600502014,grad_norm: 0.9999989454163297, iteration: 149352
loss: 0.9949020147323608,grad_norm: 0.9999992087769218, iteration: 149353
loss: 1.0039831399917603,grad_norm: 0.9999992899441227, iteration: 149354
loss: 0.9653260707855225,grad_norm: 0.9924536419093986, iteration: 149355
loss: 0.9983252882957458,grad_norm: 0.9999990353924874, iteration: 149356
loss: 1.042992353439331,grad_norm: 0.9999991491596723, iteration: 149357
loss: 1.014266014099121,grad_norm: 0.9999989579657645, iteration: 149358
loss: 0.9869495630264282,grad_norm: 0.9999989836441944, iteration: 149359
loss: 0.9834519028663635,grad_norm: 0.8033789144145183, iteration: 149360
loss: 1.0199370384216309,grad_norm: 0.999999048067284, iteration: 149361
loss: 0.9994568824768066,grad_norm: 0.9772852111699781, iteration: 149362
loss: 0.9951247572898865,grad_norm: 0.9999990369703948, iteration: 149363
loss: 1.0105026960372925,grad_norm: 0.9999991764739217, iteration: 149364
loss: 0.9911356568336487,grad_norm: 0.9461731549389181, iteration: 149365
loss: 1.0023068189620972,grad_norm: 0.8948146847477115, iteration: 149366
loss: 0.9896606802940369,grad_norm: 0.9999993024003041, iteration: 149367
loss: 0.9996340274810791,grad_norm: 0.999999183638887, iteration: 149368
loss: 1.0080828666687012,grad_norm: 0.9999990343968348, iteration: 149369
loss: 1.0044012069702148,grad_norm: 0.9999991350433419, iteration: 149370
loss: 1.02306067943573,grad_norm: 0.9999991253000114, iteration: 149371
loss: 0.9674195647239685,grad_norm: 0.9399380315709194, iteration: 149372
loss: 1.009462833404541,grad_norm: 0.9265062091696576, iteration: 149373
loss: 0.9939823150634766,grad_norm: 0.9999991440466678, iteration: 149374
loss: 1.0033918619155884,grad_norm: 0.9597697597979109, iteration: 149375
loss: 1.0214399099349976,grad_norm: 0.8224898697723556, iteration: 149376
loss: 0.9983207583427429,grad_norm: 0.9916711820915972, iteration: 149377
loss: 0.9645332098007202,grad_norm: 0.9999991105789737, iteration: 149378
loss: 1.0290361642837524,grad_norm: 0.9999990194890731, iteration: 149379
loss: 1.0318677425384521,grad_norm: 0.9999990560672138, iteration: 149380
loss: 0.9655609130859375,grad_norm: 0.8603014978889953, iteration: 149381
loss: 0.9959372878074646,grad_norm: 0.9999991594417976, iteration: 149382
loss: 0.9828266501426697,grad_norm: 0.9285506922277021, iteration: 149383
loss: 0.9996131062507629,grad_norm: 0.9999990070800765, iteration: 149384
loss: 0.9714531898498535,grad_norm: 0.996151343216092, iteration: 149385
loss: 1.0271532535552979,grad_norm: 0.9999992444836246, iteration: 149386
loss: 0.9843257665634155,grad_norm: 0.977501247905485, iteration: 149387
loss: 1.0188454389572144,grad_norm: 0.9999991533052892, iteration: 149388
loss: 1.0012831687927246,grad_norm: 0.9999992071836102, iteration: 149389
loss: 1.0147428512573242,grad_norm: 0.9909657805185245, iteration: 149390
loss: 1.004929780960083,grad_norm: 0.9999991551032267, iteration: 149391
loss: 1.003699541091919,grad_norm: 0.9379624612058406, iteration: 149392
loss: 1.0055257081985474,grad_norm: 0.999999289217464, iteration: 149393
loss: 0.9886631369590759,grad_norm: 0.8534301975173764, iteration: 149394
loss: 1.0220766067504883,grad_norm: 0.9999990113025741, iteration: 149395
loss: 0.9858925938606262,grad_norm: 0.99999905624521, iteration: 149396
loss: 1.0629099607467651,grad_norm: 0.9999992488270598, iteration: 149397
loss: 1.0035374164581299,grad_norm: 0.8213814616453428, iteration: 149398
loss: 0.9881683588027954,grad_norm: 0.9999989771456079, iteration: 149399
loss: 1.040718674659729,grad_norm: 0.9999990029317619, iteration: 149400
loss: 1.0333837270736694,grad_norm: 0.9645605020668097, iteration: 149401
loss: 0.9829013347625732,grad_norm: 0.99999912917826, iteration: 149402
loss: 0.9950534105300903,grad_norm: 0.9999992415492943, iteration: 149403
loss: 0.9841813445091248,grad_norm: 0.999999262491545, iteration: 149404
loss: 1.0253493785858154,grad_norm: 0.999999070937888, iteration: 149405
loss: 1.0427515506744385,grad_norm: 0.9999991957406708, iteration: 149406
loss: 0.9838215708732605,grad_norm: 0.999998974438421, iteration: 149407
loss: 0.9976991415023804,grad_norm: 0.9061487530814014, iteration: 149408
loss: 1.025247573852539,grad_norm: 0.9999989073938775, iteration: 149409
loss: 1.010122537612915,grad_norm: 0.9999991284852827, iteration: 149410
loss: 1.017462968826294,grad_norm: 0.9399546681509892, iteration: 149411
loss: 1.0674786567687988,grad_norm: 0.9999990369083578, iteration: 149412
loss: 1.03559148311615,grad_norm: 0.9999991413925815, iteration: 149413
loss: 0.9811179637908936,grad_norm: 0.9116692256055825, iteration: 149414
loss: 1.0156238079071045,grad_norm: 0.9999990090260155, iteration: 149415
loss: 1.0229594707489014,grad_norm: 0.9999992143061017, iteration: 149416
loss: 0.9854352474212646,grad_norm: 0.890133649196634, iteration: 149417
loss: 0.9978346228599548,grad_norm: 0.9719857590542237, iteration: 149418
loss: 1.022998571395874,grad_norm: 0.9570502831528859, iteration: 149419
loss: 0.9487884044647217,grad_norm: 0.9999991359863418, iteration: 149420
loss: 1.02609121799469,grad_norm: 0.9999992353169453, iteration: 149421
loss: 0.981343686580658,grad_norm: 0.9121947727302269, iteration: 149422
loss: 1.0115526914596558,grad_norm: 0.9999991357634288, iteration: 149423
loss: 1.019757628440857,grad_norm: 0.9999990999503514, iteration: 149424
loss: 1.0166058540344238,grad_norm: 0.9999990202881035, iteration: 149425
loss: 1.0065571069717407,grad_norm: 0.9352074533929954, iteration: 149426
loss: 0.9831362366676331,grad_norm: 0.813799380995354, iteration: 149427
loss: 0.9823575019836426,grad_norm: 0.999999338980481, iteration: 149428
loss: 0.9839166402816772,grad_norm: 0.8505962243016937, iteration: 149429
loss: 1.0108572244644165,grad_norm: 0.9865560692467393, iteration: 149430
loss: 0.9936811327934265,grad_norm: 0.9961489601813617, iteration: 149431
loss: 1.047063946723938,grad_norm: 0.9999991261018298, iteration: 149432
loss: 0.9874510765075684,grad_norm: 0.9999991792560037, iteration: 149433
loss: 0.985095202922821,grad_norm: 0.9999990203749439, iteration: 149434
loss: 0.9962635040283203,grad_norm: 0.9999991318250067, iteration: 149435
loss: 1.003800392150879,grad_norm: 0.956120419496776, iteration: 149436
loss: 1.0071154832839966,grad_norm: 0.9999996797003824, iteration: 149437
loss: 1.0276031494140625,grad_norm: 0.9999991543144086, iteration: 149438
loss: 1.0396090745925903,grad_norm: 0.9999990551009083, iteration: 149439
loss: 1.026598572731018,grad_norm: 0.9523849803075991, iteration: 149440
loss: 1.027326226234436,grad_norm: 0.9999989661669318, iteration: 149441
loss: 1.0150922536849976,grad_norm: 0.9999991640414949, iteration: 149442
loss: 1.0338776111602783,grad_norm: 0.7795764047429431, iteration: 149443
loss: 0.9744845628738403,grad_norm: 0.9999989812742797, iteration: 149444
loss: 0.9634866714477539,grad_norm: 0.9999990536440466, iteration: 149445
loss: 0.9762507677078247,grad_norm: 0.97114163592053, iteration: 149446
loss: 0.9928346872329712,grad_norm: 0.8694217356989558, iteration: 149447
loss: 1.0107672214508057,grad_norm: 0.9999991167409418, iteration: 149448
loss: 0.9741354584693909,grad_norm: 0.9566794981513941, iteration: 149449
loss: 1.0114140510559082,grad_norm: 0.9999991762184797, iteration: 149450
loss: 0.9736863374710083,grad_norm: 0.9999990648883285, iteration: 149451
loss: 1.0001211166381836,grad_norm: 0.8853640902345162, iteration: 149452
loss: 1.0078439712524414,grad_norm: 0.9999990251299399, iteration: 149453
loss: 0.9826319217681885,grad_norm: 0.999999053137542, iteration: 149454
loss: 1.0252480506896973,grad_norm: 0.9999992655892523, iteration: 149455
loss: 1.0121123790740967,grad_norm: 0.9999991445345928, iteration: 149456
loss: 1.0137807130813599,grad_norm: 0.9999992183938305, iteration: 149457
loss: 0.9804924726486206,grad_norm: 0.9999990456808459, iteration: 149458
loss: 1.027286171913147,grad_norm: 0.980677950758055, iteration: 149459
loss: 0.9943070411682129,grad_norm: 0.99999918595139, iteration: 149460
loss: 1.0440791845321655,grad_norm: 0.9979891899264212, iteration: 149461
loss: 1.000685214996338,grad_norm: 0.9732809133435175, iteration: 149462
loss: 0.9818404316902161,grad_norm: 0.9999990717056397, iteration: 149463
loss: 1.0149275064468384,grad_norm: 0.9953961759072466, iteration: 149464
loss: 1.0003968477249146,grad_norm: 0.9999989041688221, iteration: 149465
loss: 0.9891286492347717,grad_norm: 0.9731344040633777, iteration: 149466
loss: 0.9865765571594238,grad_norm: 0.9999991126153938, iteration: 149467
loss: 0.9927888512611389,grad_norm: 0.9999991253707841, iteration: 149468
loss: 1.0267750024795532,grad_norm: 0.9877944805104517, iteration: 149469
loss: 0.9937810897827148,grad_norm: 0.9026901378286909, iteration: 149470
loss: 1.0318812131881714,grad_norm: 0.9999992447134507, iteration: 149471
loss: 1.005761742591858,grad_norm: 0.9999992041969169, iteration: 149472
loss: 0.9996035695075989,grad_norm: 0.8943030276900985, iteration: 149473
loss: 1.0029504299163818,grad_norm: 0.9600909523284857, iteration: 149474
loss: 1.0076231956481934,grad_norm: 0.9864618495136365, iteration: 149475
loss: 1.0015217065811157,grad_norm: 0.9999991095759184, iteration: 149476
loss: 0.9907965660095215,grad_norm: 0.999999016554192, iteration: 149477
loss: 1.0076488256454468,grad_norm: 0.8842201090731838, iteration: 149478
loss: 1.015612006187439,grad_norm: 0.9999991391289157, iteration: 149479
loss: 1.0172362327575684,grad_norm: 0.9999991531910126, iteration: 149480
loss: 0.990064799785614,grad_norm: 0.9999992506769848, iteration: 149481
loss: 1.0071545839309692,grad_norm: 0.9999992323212541, iteration: 149482
loss: 0.9732862114906311,grad_norm: 0.951796726375404, iteration: 149483
loss: 1.0087788105010986,grad_norm: 0.9999991182174318, iteration: 149484
loss: 0.9506431818008423,grad_norm: 0.9999991276311128, iteration: 149485
loss: 1.0086166858673096,grad_norm: 0.9694727506542664, iteration: 149486
loss: 0.9608219861984253,grad_norm: 0.9969392731911325, iteration: 149487
loss: 0.9860228896141052,grad_norm: 0.9999992677580049, iteration: 149488
loss: 0.9540877342224121,grad_norm: 0.9999992568765876, iteration: 149489
loss: 1.0166631937026978,grad_norm: 0.9999990010495289, iteration: 149490
loss: 1.0257387161254883,grad_norm: 0.9143792806403793, iteration: 149491
loss: 1.0373189449310303,grad_norm: 0.9187597585391569, iteration: 149492
loss: 0.9723596572875977,grad_norm: 0.9999990643085412, iteration: 149493
loss: 1.0027425289154053,grad_norm: 0.9845103039296181, iteration: 149494
loss: 0.9943832159042358,grad_norm: 0.9524452146620199, iteration: 149495
loss: 0.994775116443634,grad_norm: 0.9999990823303855, iteration: 149496
loss: 1.0431797504425049,grad_norm: 0.9999990802012049, iteration: 149497
loss: 0.996518611907959,grad_norm: 0.8731148621538842, iteration: 149498
loss: 1.0084328651428223,grad_norm: 0.9999990389805098, iteration: 149499
loss: 0.9766567349433899,grad_norm: 0.999999121353219, iteration: 149500
loss: 0.9946132898330688,grad_norm: 0.999999112451466, iteration: 149501
loss: 0.9993045926094055,grad_norm: 0.9390457708920658, iteration: 149502
loss: 1.007419228553772,grad_norm: 0.9643024403357582, iteration: 149503
loss: 0.9943060278892517,grad_norm: 0.9728558110458977, iteration: 149504
loss: 0.9818392992019653,grad_norm: 0.985553320758982, iteration: 149505
loss: 1.0016882419586182,grad_norm: 0.9999992326779634, iteration: 149506
loss: 0.9829709529876709,grad_norm: 0.9245157643178348, iteration: 149507
loss: 1.005530834197998,grad_norm: 0.999999182726769, iteration: 149508
loss: 0.9610704779624939,grad_norm: 0.9452336450824366, iteration: 149509
loss: 0.9849711656570435,grad_norm: 0.9386218127964225, iteration: 149510
loss: 0.9913382530212402,grad_norm: 0.9999990110812083, iteration: 149511
loss: 1.2119460105895996,grad_norm: 0.9999993528548149, iteration: 149512
loss: 1.0092198848724365,grad_norm: 0.8699571278811667, iteration: 149513
loss: 1.0343307256698608,grad_norm: 0.9999991450663094, iteration: 149514
loss: 1.030283808708191,grad_norm: 0.9999992363426733, iteration: 149515
loss: 1.0085368156433105,grad_norm: 0.999999115124479, iteration: 149516
loss: 0.9842740893363953,grad_norm: 0.882807188985419, iteration: 149517
loss: 0.9858542680740356,grad_norm: 0.9999991431740388, iteration: 149518
loss: 1.0017096996307373,grad_norm: 0.8535047239675432, iteration: 149519
loss: 0.9741881489753723,grad_norm: 0.8790140015727531, iteration: 149520
loss: 1.0063048601150513,grad_norm: 0.9999990163860341, iteration: 149521
loss: 1.0168927907943726,grad_norm: 0.9625549287506554, iteration: 149522
loss: 1.0115107297897339,grad_norm: 0.99999920027462, iteration: 149523
loss: 1.028333306312561,grad_norm: 0.9999992201591441, iteration: 149524
loss: 1.0376946926116943,grad_norm: 0.9999990849439413, iteration: 149525
loss: 0.9834721684455872,grad_norm: 0.9927756904072864, iteration: 149526
loss: 0.969988226890564,grad_norm: 0.9999991377839949, iteration: 149527
loss: 1.015336275100708,grad_norm: 0.8749876047661154, iteration: 149528
loss: 0.987351655960083,grad_norm: 0.9999991602160874, iteration: 149529
loss: 0.977806568145752,grad_norm: 0.9014631496320761, iteration: 149530
loss: 0.9952692985534668,grad_norm: 0.9999990781986627, iteration: 149531
loss: 0.9769088625907898,grad_norm: 0.9999993352855412, iteration: 149532
loss: 0.9703413844108582,grad_norm: 0.8486590914990763, iteration: 149533
loss: 1.0131747722625732,grad_norm: 0.851357451901227, iteration: 149534
loss: 0.9889090657234192,grad_norm: 0.9999991486402047, iteration: 149535
loss: 1.012710452079773,grad_norm: 0.9999992241042046, iteration: 149536
loss: 1.0315686464309692,grad_norm: 0.9999992353344135, iteration: 149537
loss: 0.9879732131958008,grad_norm: 0.9433100959821281, iteration: 149538
loss: 1.0171886682510376,grad_norm: 0.9999991249859432, iteration: 149539
loss: 0.9788724780082703,grad_norm: 0.9999990249704978, iteration: 149540
loss: 0.9722252488136292,grad_norm: 0.9534021644873201, iteration: 149541
loss: 1.0200915336608887,grad_norm: 0.9486680490925932, iteration: 149542
loss: 0.9850097894668579,grad_norm: 0.9999991380303005, iteration: 149543
loss: 0.980868935585022,grad_norm: 0.9999990316268962, iteration: 149544
loss: 0.9966276288032532,grad_norm: 0.8953522361911559, iteration: 149545
loss: 1.0210644006729126,grad_norm: 0.8364162127766617, iteration: 149546
loss: 1.0057213306427002,grad_norm: 0.9999995873408661, iteration: 149547
loss: 1.0192570686340332,grad_norm: 0.9317630033292433, iteration: 149548
loss: 0.9634611010551453,grad_norm: 0.999999123240961, iteration: 149549
loss: 0.9570063948631287,grad_norm: 0.9999989540790375, iteration: 149550
loss: 0.9729885458946228,grad_norm: 0.9926637001508526, iteration: 149551
loss: 1.0014883279800415,grad_norm: 0.9999990846395195, iteration: 149552
loss: 0.9951349496841431,grad_norm: 0.9294434258830526, iteration: 149553
loss: 0.983881413936615,grad_norm: 0.999999128199901, iteration: 149554
loss: 1.015908122062683,grad_norm: 0.8833825779836253, iteration: 149555
loss: 0.9801027774810791,grad_norm: 0.9999992628786768, iteration: 149556
loss: 1.0412741899490356,grad_norm: 0.988171007446793, iteration: 149557
loss: 1.0126707553863525,grad_norm: 0.9999990877926244, iteration: 149558
loss: 1.012048363685608,grad_norm: 0.9999991364006655, iteration: 149559
loss: 1.0234671831130981,grad_norm: 0.9999990401731643, iteration: 149560
loss: 0.9855607748031616,grad_norm: 0.9999991195888094, iteration: 149561
loss: 1.0124456882476807,grad_norm: 0.8426478644513459, iteration: 149562
loss: 0.9883449077606201,grad_norm: 0.7473115160005994, iteration: 149563
loss: 0.9666395783424377,grad_norm: 0.9880109511623311, iteration: 149564
loss: 0.9627519845962524,grad_norm: 0.9999991702379937, iteration: 149565
loss: 0.9824445843696594,grad_norm: 0.9999992357059213, iteration: 149566
loss: 1.0159509181976318,grad_norm: 0.9999991026972838, iteration: 149567
loss: 0.9709644317626953,grad_norm: 0.9999991871530143, iteration: 149568
loss: 1.0160924196243286,grad_norm: 0.9623561765537726, iteration: 149569
loss: 0.9929829835891724,grad_norm: 0.9999991272167555, iteration: 149570
loss: 0.9971364736557007,grad_norm: 0.9999990158376446, iteration: 149571
loss: 1.0156149864196777,grad_norm: 0.999999076828455, iteration: 149572
loss: 0.9933170080184937,grad_norm: 0.9999990457964794, iteration: 149573
loss: 1.0115654468536377,grad_norm: 0.95498893694129, iteration: 149574
loss: 1.0070122480392456,grad_norm: 0.9999991819730887, iteration: 149575
loss: 0.9606252908706665,grad_norm: 0.9823163916968293, iteration: 149576
loss: 0.9721643924713135,grad_norm: 0.9999991588456869, iteration: 149577
loss: 1.0348868370056152,grad_norm: 0.9321457947105071, iteration: 149578
loss: 1.0057967901229858,grad_norm: 0.9999991025470425, iteration: 149579
loss: 0.995255708694458,grad_norm: 0.9999990548234692, iteration: 149580
loss: 1.083956241607666,grad_norm: 0.9999992846187621, iteration: 149581
loss: 1.050876498222351,grad_norm: 0.999999172121685, iteration: 149582
loss: 1.0328627824783325,grad_norm: 0.9999991902661031, iteration: 149583
loss: 0.999472439289093,grad_norm: 0.8809044564239384, iteration: 149584
loss: 0.9910053014755249,grad_norm: 0.9789671705355146, iteration: 149585
loss: 0.9964464902877808,grad_norm: 0.9196734989995963, iteration: 149586
loss: 1.0091958045959473,grad_norm: 0.999999030320817, iteration: 149587
loss: 0.9874848127365112,grad_norm: 0.9999991847967022, iteration: 149588
loss: 0.9894276261329651,grad_norm: 0.9999991081743377, iteration: 149589
loss: 1.0111559629440308,grad_norm: 0.8952111469159033, iteration: 149590
loss: 1.002719759941101,grad_norm: 0.9999991211507987, iteration: 149591
loss: 0.9935994148254395,grad_norm: 0.8764917386245028, iteration: 149592
loss: 1.015828251838684,grad_norm: 0.9999990114877566, iteration: 149593
loss: 0.9952747225761414,grad_norm: 0.9999989740596013, iteration: 149594
loss: 1.0275976657867432,grad_norm: 0.999999210302763, iteration: 149595
loss: 1.034266710281372,grad_norm: 0.9999990872769347, iteration: 149596
loss: 0.9974706172943115,grad_norm: 0.9999989713484909, iteration: 149597
loss: 1.0020002126693726,grad_norm: 0.9999992620086345, iteration: 149598
loss: 0.968238890171051,grad_norm: 0.9999992691874652, iteration: 149599
loss: 0.984036386013031,grad_norm: 0.976876673515232, iteration: 149600
loss: 1.0101640224456787,grad_norm: 0.9999991910970959, iteration: 149601
loss: 0.996207058429718,grad_norm: 0.9999991317054965, iteration: 149602
loss: 1.0123279094696045,grad_norm: 0.962975932515281, iteration: 149603
loss: 0.977493166923523,grad_norm: 0.9999991021750724, iteration: 149604
loss: 0.9544060826301575,grad_norm: 0.9999991564487757, iteration: 149605
loss: 0.9718149900436401,grad_norm: 0.8989519204503897, iteration: 149606
loss: 1.0153213739395142,grad_norm: 0.999999140134724, iteration: 149607
loss: 1.0106836557388306,grad_norm: 0.9675775084794603, iteration: 149608
loss: 1.04539155960083,grad_norm: 0.9999990250107809, iteration: 149609
loss: 1.015102505683899,grad_norm: 0.9999992023127903, iteration: 149610
loss: 0.921802818775177,grad_norm: 0.9999992872814936, iteration: 149611
loss: 0.9425241351127625,grad_norm: 0.9999991223810835, iteration: 149612
loss: 1.0021629333496094,grad_norm: 0.9615658871720656, iteration: 149613
loss: 0.9865803122520447,grad_norm: 0.7581418644874044, iteration: 149614
loss: 1.047125220298767,grad_norm: 0.9922016672837524, iteration: 149615
loss: 1.0295274257659912,grad_norm: 0.9999992323202023, iteration: 149616
loss: 0.9934914112091064,grad_norm: 0.8774979501792323, iteration: 149617
loss: 1.006545066833496,grad_norm: 0.9999990898269997, iteration: 149618
loss: 1.0066176652908325,grad_norm: 0.9614524944028094, iteration: 149619
loss: 0.9957095980644226,grad_norm: 0.9999991294295265, iteration: 149620
loss: 1.0466159582138062,grad_norm: 0.9999999629970912, iteration: 149621
loss: 0.9814690351486206,grad_norm: 0.9537001482695765, iteration: 149622
loss: 0.9988595247268677,grad_norm: 0.9999991258057208, iteration: 149623
loss: 0.9495738744735718,grad_norm: 0.9232799729123753, iteration: 149624
loss: 1.0080748796463013,grad_norm: 0.9709640626288283, iteration: 149625
loss: 1.0333842039108276,grad_norm: 0.9659092988649847, iteration: 149626
loss: 0.990181028842926,grad_norm: 0.9999991636135697, iteration: 149627
loss: 1.060915470123291,grad_norm: 0.9999991680725805, iteration: 149628
loss: 0.9958119988441467,grad_norm: 0.885109889688557, iteration: 149629
loss: 0.9781005382537842,grad_norm: 0.8273601085713262, iteration: 149630
loss: 0.9824244976043701,grad_norm: 0.999998893011161, iteration: 149631
loss: 0.9836350083351135,grad_norm: 0.9784451124400799, iteration: 149632
loss: 1.0038377046585083,grad_norm: 0.9999990911547156, iteration: 149633
loss: 1.0002837181091309,grad_norm: 0.9615047821358512, iteration: 149634
loss: 1.0158333778381348,grad_norm: 0.9843125745673557, iteration: 149635
loss: 1.0092159509658813,grad_norm: 0.9999991946508796, iteration: 149636
loss: 0.9762734174728394,grad_norm: 0.9999991258506654, iteration: 149637
loss: 0.975945234298706,grad_norm: 0.999999129137353, iteration: 149638
loss: 1.0142210721969604,grad_norm: 0.91988956171338, iteration: 149639
loss: 1.0118592977523804,grad_norm: 0.9968665164842867, iteration: 149640
loss: 0.9772243499755859,grad_norm: 0.9999992955239175, iteration: 149641
loss: 1.0200411081314087,grad_norm: 0.9623957967566324, iteration: 149642
loss: 1.0149492025375366,grad_norm: 0.9999991026259015, iteration: 149643
loss: 1.0022000074386597,grad_norm: 0.8440457445275777, iteration: 149644
loss: 1.0246589183807373,grad_norm: 0.9999990454615054, iteration: 149645
loss: 1.0011427402496338,grad_norm: 0.8969282876460228, iteration: 149646
loss: 1.0171129703521729,grad_norm: 0.9999993204600891, iteration: 149647
loss: 1.009090781211853,grad_norm: 0.9999993331014797, iteration: 149648
loss: 0.99295574426651,grad_norm: 0.9999991399410538, iteration: 149649
loss: 0.9547358751296997,grad_norm: 0.9702319121862765, iteration: 149650
loss: 1.0282450914382935,grad_norm: 0.9928168278887011, iteration: 149651
loss: 1.0180588960647583,grad_norm: 0.9999993128521749, iteration: 149652
loss: 0.9555335640907288,grad_norm: 0.9999990779849492, iteration: 149653
loss: 1.0019389390945435,grad_norm: 0.9999990268978199, iteration: 149654
loss: 0.993950366973877,grad_norm: 0.9235059908483466, iteration: 149655
loss: 1.01887047290802,grad_norm: 0.9999993384815001, iteration: 149656
loss: 0.9987665414810181,grad_norm: 0.9999991525857749, iteration: 149657
loss: 0.9940145611763,grad_norm: 0.9030312508034243, iteration: 149658
loss: 1.0165156126022339,grad_norm: 0.9244565334512808, iteration: 149659
loss: 0.9817127585411072,grad_norm: 0.9999990395153837, iteration: 149660
loss: 0.9658308029174805,grad_norm: 0.999999171496888, iteration: 149661
loss: 0.9727978110313416,grad_norm: 0.9999998734566727, iteration: 149662
loss: 0.9838609099388123,grad_norm: 0.9106362849697106, iteration: 149663
loss: 0.9529861211776733,grad_norm: 0.968074726654158, iteration: 149664
loss: 1.004714012145996,grad_norm: 0.8805755352166049, iteration: 149665
loss: 0.9850846529006958,grad_norm: 0.9999989700512062, iteration: 149666
loss: 1.0015769004821777,grad_norm: 0.9999991367672041, iteration: 149667
loss: 0.9986644983291626,grad_norm: 0.9759063476435661, iteration: 149668
loss: 1.0142699480056763,grad_norm: 0.8915945371244519, iteration: 149669
loss: 1.0520310401916504,grad_norm: 0.9999991305265951, iteration: 149670
loss: 1.032906413078308,grad_norm: 0.9999991201237152, iteration: 149671
loss: 1.025460124015808,grad_norm: 0.9999993890161695, iteration: 149672
loss: 0.978786289691925,grad_norm: 0.9999989365930576, iteration: 149673
loss: 0.9915717840194702,grad_norm: 0.8560134532788356, iteration: 149674
loss: 1.0230027437210083,grad_norm: 0.9448386221463246, iteration: 149675
loss: 1.0090855360031128,grad_norm: 0.8527391943485574, iteration: 149676
loss: 0.9879932999610901,grad_norm: 0.9810583345969993, iteration: 149677
loss: 1.0232312679290771,grad_norm: 0.9999993559925332, iteration: 149678
loss: 0.9978215098381042,grad_norm: 0.8732923621858771, iteration: 149679
loss: 1.006628394126892,grad_norm: 0.999999154679097, iteration: 149680
loss: 1.0070195198059082,grad_norm: 0.8483148118644708, iteration: 149681
loss: 1.0071659088134766,grad_norm: 0.9999989350351955, iteration: 149682
loss: 1.001587986946106,grad_norm: 0.9999990973632783, iteration: 149683
loss: 0.992337703704834,grad_norm: 0.9999990879105761, iteration: 149684
loss: 1.0073078870773315,grad_norm: 0.8817139133092511, iteration: 149685
loss: 1.0004515647888184,grad_norm: 0.9362412193870393, iteration: 149686
loss: 1.0157902240753174,grad_norm: 0.9999990228531536, iteration: 149687
loss: 1.0020390748977661,grad_norm: 0.9999989496992406, iteration: 149688
loss: 0.9531505107879639,grad_norm: 0.9999991540504857, iteration: 149689
loss: 0.9954110383987427,grad_norm: 0.9999992079192374, iteration: 149690
loss: 0.9992616176605225,grad_norm: 0.8909654092502017, iteration: 149691
loss: 0.9960693717002869,grad_norm: 0.9866963118667474, iteration: 149692
loss: 0.9755755662918091,grad_norm: 0.9576647800658643, iteration: 149693
loss: 0.9982002377510071,grad_norm: 0.9874505185805784, iteration: 149694
loss: 1.0073994398117065,grad_norm: 0.9999991137879414, iteration: 149695
loss: 0.9903376698493958,grad_norm: 0.9999991889600116, iteration: 149696
loss: 0.980755090713501,grad_norm: 0.9999990168512632, iteration: 149697
loss: 1.022261619567871,grad_norm: 0.9999989185687611, iteration: 149698
loss: 1.0062423944473267,grad_norm: 0.9999990422220365, iteration: 149699
loss: 0.9908910989761353,grad_norm: 0.9045041589454403, iteration: 149700
loss: 1.0201845169067383,grad_norm: 0.9999990389710116, iteration: 149701
loss: 1.0107696056365967,grad_norm: 0.9999991859520299, iteration: 149702
loss: 0.9740042090415955,grad_norm: 0.8129325386624908, iteration: 149703
loss: 0.9647836089134216,grad_norm: 0.8667617048580255, iteration: 149704
loss: 0.9727911353111267,grad_norm: 0.9225032843088193, iteration: 149705
loss: 0.9578811526298523,grad_norm: 0.9999991034653536, iteration: 149706
loss: 0.9698945879936218,grad_norm: 0.9999991490516221, iteration: 149707
loss: 0.9924247860908508,grad_norm: 0.9156552020165492, iteration: 149708
loss: 0.9704272150993347,grad_norm: 0.9999991056095159, iteration: 149709
loss: 1.021124243736267,grad_norm: 0.999999148533748, iteration: 149710
loss: 1.0175727605819702,grad_norm: 0.9999991270292152, iteration: 149711
loss: 1.0117729902267456,grad_norm: 0.9999992051973197, iteration: 149712
loss: 1.0154304504394531,grad_norm: 0.9530771168751968, iteration: 149713
loss: 1.008658766746521,grad_norm: 0.9999990045411357, iteration: 149714
loss: 1.0057599544525146,grad_norm: 0.9999997724298166, iteration: 149715
loss: 1.0044059753417969,grad_norm: 0.9999989704659775, iteration: 149716
loss: 0.9700717329978943,grad_norm: 0.9449796161199676, iteration: 149717
loss: 1.002501368522644,grad_norm: 0.9999991708745305, iteration: 149718
loss: 0.996867299079895,grad_norm: 0.9999991087570741, iteration: 149719
loss: 1.009657859802246,grad_norm: 0.9999991850724349, iteration: 149720
loss: 0.9521738886833191,grad_norm: 0.9999990225576582, iteration: 149721
loss: 1.0136395692825317,grad_norm: 0.9999990563391726, iteration: 149722
loss: 1.0193274021148682,grad_norm: 0.9984376860950879, iteration: 149723
loss: 0.9657403826713562,grad_norm: 0.9999990560337819, iteration: 149724
loss: 1.0128788948059082,grad_norm: 0.937207078139061, iteration: 149725
loss: 1.0041087865829468,grad_norm: 0.9235182269833051, iteration: 149726
loss: 0.9914325475692749,grad_norm: 0.8783999022234369, iteration: 149727
loss: 1.0245599746704102,grad_norm: 0.9999991440249149, iteration: 149728
loss: 1.0381741523742676,grad_norm: 0.999999233643217, iteration: 149729
loss: 0.9829643964767456,grad_norm: 0.9999991117695225, iteration: 149730
loss: 1.030165672302246,grad_norm: 0.9999992737203783, iteration: 149731
loss: 1.0117329359054565,grad_norm: 0.9426441932663608, iteration: 149732
loss: 0.971432626247406,grad_norm: 0.9999991450497664, iteration: 149733
loss: 1.0083726644515991,grad_norm: 0.999999160728241, iteration: 149734
loss: 0.9984652996063232,grad_norm: 0.9837914059677657, iteration: 149735
loss: 0.9859864711761475,grad_norm: 0.9351545189635931, iteration: 149736
loss: 1.0161207914352417,grad_norm: 0.9413397941544612, iteration: 149737
loss: 1.0191662311553955,grad_norm: 0.9999989863426282, iteration: 149738
loss: 1.0366129875183105,grad_norm: 0.9471308467455052, iteration: 149739
loss: 0.9706297516822815,grad_norm: 0.9999991091861106, iteration: 149740
loss: 1.0296238660812378,grad_norm: 0.9999992413286298, iteration: 149741
loss: 1.008429765701294,grad_norm: 0.9999988768455647, iteration: 149742
loss: 0.9855610132217407,grad_norm: 0.8712988986835759, iteration: 149743
loss: 1.049923062324524,grad_norm: 0.9999998180272776, iteration: 149744
loss: 1.0039607286453247,grad_norm: 0.9999991424512235, iteration: 149745
loss: 1.0438505411148071,grad_norm: 0.9999991328903195, iteration: 149746
loss: 0.9554919004440308,grad_norm: 0.9774292656336643, iteration: 149747
loss: 1.0247973203659058,grad_norm: 0.9997594361299053, iteration: 149748
loss: 0.9633939862251282,grad_norm: 0.9728215178562399, iteration: 149749
loss: 0.9726600646972656,grad_norm: 0.940177130766914, iteration: 149750
loss: 1.034691333770752,grad_norm: 0.9999992751485978, iteration: 149751
loss: 1.0040181875228882,grad_norm: 0.8661634192037063, iteration: 149752
loss: 0.9934717416763306,grad_norm: 0.999999177250505, iteration: 149753
loss: 1.005275845527649,grad_norm: 0.9999991836172423, iteration: 149754
loss: 1.0070184469223022,grad_norm: 0.9999990859925142, iteration: 149755
loss: 0.9890357255935669,grad_norm: 0.9285234589512914, iteration: 149756
loss: 0.9755268692970276,grad_norm: 0.9999991284754972, iteration: 149757
loss: 0.983495831489563,grad_norm: 0.9849237225927905, iteration: 149758
loss: 1.0271434783935547,grad_norm: 0.9999991405684038, iteration: 149759
loss: 0.9973756670951843,grad_norm: 0.9999991519167643, iteration: 149760
loss: 0.9946446418762207,grad_norm: 0.9691640083358483, iteration: 149761
loss: 0.9984696507453918,grad_norm: 0.9999991452083103, iteration: 149762
loss: 1.0242292881011963,grad_norm: 0.9999994847190552, iteration: 149763
loss: 0.9799361228942871,grad_norm: 0.999998990709365, iteration: 149764
loss: 1.0003083944320679,grad_norm: 0.999999260631467, iteration: 149765
loss: 1.0086493492126465,grad_norm: 0.9150965076957998, iteration: 149766
loss: 1.0106531381607056,grad_norm: 0.9999991613158821, iteration: 149767
loss: 0.9990150332450867,grad_norm: 0.9999991018967409, iteration: 149768
loss: 1.000286340713501,grad_norm: 0.9049533616175949, iteration: 149769
loss: 0.97371906042099,grad_norm: 0.9820501580355492, iteration: 149770
loss: 0.9660435914993286,grad_norm: 0.9999990037875256, iteration: 149771
loss: 0.9954326748847961,grad_norm: 0.9359409450151583, iteration: 149772
loss: 0.9938610196113586,grad_norm: 0.7911593592523107, iteration: 149773
loss: 1.0056273937225342,grad_norm: 0.8975737361329569, iteration: 149774
loss: 1.0155839920043945,grad_norm: 0.999999133639913, iteration: 149775
loss: 1.051688313484192,grad_norm: 0.9621279679061568, iteration: 149776
loss: 1.1757451295852661,grad_norm: 0.9999995455333948, iteration: 149777
loss: 0.987153172492981,grad_norm: 0.9999993204233824, iteration: 149778
loss: 1.004262924194336,grad_norm: 0.999999244071994, iteration: 149779
loss: 1.0178704261779785,grad_norm: 0.9908181439256549, iteration: 149780
loss: 0.9747455716133118,grad_norm: 0.9834723149292558, iteration: 149781
loss: 1.032361388206482,grad_norm: 0.9919392406282632, iteration: 149782
loss: 1.0496342182159424,grad_norm: 0.8682182942116844, iteration: 149783
loss: 1.000331163406372,grad_norm: 0.9747718659414002, iteration: 149784
loss: 0.965340793132782,grad_norm: 0.9999989849003453, iteration: 149785
loss: 0.9967708587646484,grad_norm: 0.9561508884188962, iteration: 149786
loss: 0.9758507609367371,grad_norm: 0.9999990769470506, iteration: 149787
loss: 1.0279133319854736,grad_norm: 0.8616943173760475, iteration: 149788
loss: 0.9922788143157959,grad_norm: 0.9625379805573043, iteration: 149789
loss: 0.987331748008728,grad_norm: 0.955543858740889, iteration: 149790
loss: 1.0128047466278076,grad_norm: 0.8409863771636287, iteration: 149791
loss: 0.9824519157409668,grad_norm: 0.8464211460266535, iteration: 149792
loss: 1.0000511407852173,grad_norm: 0.9999990200885477, iteration: 149793
loss: 0.9923540949821472,grad_norm: 0.9999991048357347, iteration: 149794
loss: 1.0257025957107544,grad_norm: 0.9682364457462296, iteration: 149795
loss: 0.9743885397911072,grad_norm: 0.9999989506438666, iteration: 149796
loss: 0.9794517755508423,grad_norm: 0.9999990576431399, iteration: 149797
loss: 0.9515098333358765,grad_norm: 0.8599879888611278, iteration: 149798
loss: 0.9622974395751953,grad_norm: 0.9709069468756109, iteration: 149799
loss: 0.9983018040657043,grad_norm: 0.9999990457594113, iteration: 149800
loss: 1.0067024230957031,grad_norm: 0.9423447148638854, iteration: 149801
loss: 0.9822679758071899,grad_norm: 0.9999990188887642, iteration: 149802
loss: 1.008744239807129,grad_norm: 0.9999991431168739, iteration: 149803
loss: 0.973747193813324,grad_norm: 0.970399748967649, iteration: 149804
loss: 0.9455821514129639,grad_norm: 0.9677429105664894, iteration: 149805
loss: 0.9954961538314819,grad_norm: 0.9999992103208307, iteration: 149806
loss: 0.976935088634491,grad_norm: 0.9886726291089132, iteration: 149807
loss: 1.009135127067566,grad_norm: 0.9999990962738714, iteration: 149808
loss: 1.0109832286834717,grad_norm: 0.8773406009473669, iteration: 149809
loss: 1.0113970041275024,grad_norm: 0.9999991876197439, iteration: 149810
loss: 1.0072160959243774,grad_norm: 0.9999991157326907, iteration: 149811
loss: 0.9917504191398621,grad_norm: 0.9999991694493121, iteration: 149812
loss: 1.0176308155059814,grad_norm: 0.9999991985123294, iteration: 149813
loss: 0.9708691239356995,grad_norm: 0.9999993196633356, iteration: 149814
loss: 0.9830846190452576,grad_norm: 0.9999991100821485, iteration: 149815
loss: 1.0112276077270508,grad_norm: 0.9391931295822608, iteration: 149816
loss: 1.0183521509170532,grad_norm: 0.9999989784412947, iteration: 149817
loss: 0.9928455352783203,grad_norm: 0.9900039316206882, iteration: 149818
loss: 1.083268404006958,grad_norm: 0.9086022850879399, iteration: 149819
loss: 1.00523841381073,grad_norm: 0.9999991757949986, iteration: 149820
loss: 1.0048785209655762,grad_norm: 0.9148467948129825, iteration: 149821
loss: 1.0166218280792236,grad_norm: 0.7854744206214973, iteration: 149822
loss: 0.970858097076416,grad_norm: 0.9936967370022987, iteration: 149823
loss: 0.997998833656311,grad_norm: 0.99956850899356, iteration: 149824
loss: 1.0024648904800415,grad_norm: 0.9999991652417912, iteration: 149825
loss: 0.9722519516944885,grad_norm: 0.9361872018850365, iteration: 149826
loss: 0.9728045463562012,grad_norm: 0.9999991286570801, iteration: 149827
loss: 1.0221508741378784,grad_norm: 0.9779045029719062, iteration: 149828
loss: 1.0205718278884888,grad_norm: 0.9999992366578319, iteration: 149829
loss: 0.9916797876358032,grad_norm: 0.9999993410258428, iteration: 149830
loss: 0.9971686601638794,grad_norm: 0.9999992329731623, iteration: 149831
loss: 0.9753341674804688,grad_norm: 0.896600027277659, iteration: 149832
loss: 1.0132360458374023,grad_norm: 0.9999994174829525, iteration: 149833
loss: 0.9634253978729248,grad_norm: 0.9683011091826351, iteration: 149834
loss: 0.9874927997589111,grad_norm: 0.9999990365481252, iteration: 149835
loss: 1.0158766508102417,grad_norm: 0.9999991542480655, iteration: 149836
loss: 0.9937935471534729,grad_norm: 0.9999992006516837, iteration: 149837
loss: 0.9803904891014099,grad_norm: 0.9999989219590916, iteration: 149838
loss: 0.9904042482376099,grad_norm: 0.9999990702509745, iteration: 149839
loss: 0.9526954889297485,grad_norm: 0.9999993821165511, iteration: 149840
loss: 1.0108649730682373,grad_norm: 0.9999991550450965, iteration: 149841
loss: 1.000167727470398,grad_norm: 0.9999990864041214, iteration: 149842
loss: 1.039091944694519,grad_norm: 0.9999991534843907, iteration: 149843
loss: 0.9880273938179016,grad_norm: 0.9644385285351053, iteration: 149844
loss: 0.9853983521461487,grad_norm: 0.9784189102370163, iteration: 149845
loss: 1.0126484632492065,grad_norm: 0.934146519418768, iteration: 149846
loss: 1.0008690357208252,grad_norm: 0.9999990600348028, iteration: 149847
loss: 1.025844931602478,grad_norm: 0.9999994829097659, iteration: 149848
loss: 1.0187549591064453,grad_norm: 0.9999990449544177, iteration: 149849
loss: 1.0050883293151855,grad_norm: 0.9999991295418185, iteration: 149850
loss: 1.0032439231872559,grad_norm: 0.9999991157593805, iteration: 149851
loss: 0.9932711720466614,grad_norm: 0.9540063454480389, iteration: 149852
loss: 0.972968339920044,grad_norm: 0.9999990878091782, iteration: 149853
loss: 0.9953336119651794,grad_norm: 0.8808917898905246, iteration: 149854
loss: 0.9559829235076904,grad_norm: 0.9999992600180628, iteration: 149855
loss: 1.0153158903121948,grad_norm: 0.9999992655528765, iteration: 149856
loss: 0.9418954253196716,grad_norm: 0.9999992401111808, iteration: 149857
loss: 1.012345314025879,grad_norm: 0.9999991706661988, iteration: 149858
loss: 1.1234267950057983,grad_norm: 0.999999011678222, iteration: 149859
loss: 1.034468173980713,grad_norm: 0.9999993077838357, iteration: 149860
loss: 0.9712164998054504,grad_norm: 0.9674175310117366, iteration: 149861
loss: 1.0217090845108032,grad_norm: 0.9999991868679531, iteration: 149862
loss: 1.0262820720672607,grad_norm: 0.8471673057824202, iteration: 149863
loss: 1.0158036947250366,grad_norm: 0.9999991604718887, iteration: 149864
loss: 1.0265010595321655,grad_norm: 0.8752451625720027, iteration: 149865
loss: 1.01181161403656,grad_norm: 0.999998965815092, iteration: 149866
loss: 1.0282483100891113,grad_norm: 0.9999989296753433, iteration: 149867
loss: 0.9947391152381897,grad_norm: 0.9999989512563986, iteration: 149868
loss: 1.0370689630508423,grad_norm: 0.9999990928548446, iteration: 149869
loss: 1.0038731098175049,grad_norm: 0.9585217837973168, iteration: 149870
loss: 0.9701082110404968,grad_norm: 0.9999990057736878, iteration: 149871
loss: 0.9944976568222046,grad_norm: 0.9715401082400364, iteration: 149872
loss: 1.0089792013168335,grad_norm: 0.9999992487929337, iteration: 149873
loss: 1.0775777101516724,grad_norm: 0.9999998102154717, iteration: 149874
loss: 1.0365978479385376,grad_norm: 0.9212965167940775, iteration: 149875
loss: 0.9788704514503479,grad_norm: 0.9397730489215345, iteration: 149876
loss: 1.032058835029602,grad_norm: 0.999998980866458, iteration: 149877
loss: 1.0194374322891235,grad_norm: 0.8926922107649996, iteration: 149878
loss: 1.0070135593414307,grad_norm: 0.9999992740906598, iteration: 149879
loss: 0.9676747918128967,grad_norm: 0.9947857249352995, iteration: 149880
loss: 1.0021740198135376,grad_norm: 0.8928208740618201, iteration: 149881
loss: 1.0155365467071533,grad_norm: 0.9641363078723398, iteration: 149882
loss: 1.056698203086853,grad_norm: 0.999999317102062, iteration: 149883
loss: 0.9833089709281921,grad_norm: 0.9117027734261082, iteration: 149884
loss: 1.019788146018982,grad_norm: 0.999999116225582, iteration: 149885
loss: 0.9814361333847046,grad_norm: 0.8049169110915846, iteration: 149886
loss: 1.0028588771820068,grad_norm: 0.9413495315216736, iteration: 149887
loss: 0.9893316626548767,grad_norm: 0.9155809539374918, iteration: 149888
loss: 0.9679509997367859,grad_norm: 0.9999991295547109, iteration: 149889
loss: 0.9906301498413086,grad_norm: 0.8100366425603709, iteration: 149890
loss: 0.9807541966438293,grad_norm: 0.9643592214515031, iteration: 149891
loss: 0.996329128742218,grad_norm: 0.999999114958989, iteration: 149892
loss: 0.9817835092544556,grad_norm: 0.8858378060461704, iteration: 149893
loss: 0.9822543263435364,grad_norm: 0.9999992028370431, iteration: 149894
loss: 1.0032199621200562,grad_norm: 0.9999991195803715, iteration: 149895
loss: 1.0015454292297363,grad_norm: 0.8845791849652108, iteration: 149896
loss: 1.0368622541427612,grad_norm: 0.9999990971327336, iteration: 149897
loss: 0.9859409928321838,grad_norm: 0.9987424960963637, iteration: 149898
loss: 0.992520272731781,grad_norm: 0.9999991856433018, iteration: 149899
loss: 1.003642201423645,grad_norm: 0.9999991570539386, iteration: 149900
loss: 1.0041656494140625,grad_norm: 0.9012587643130529, iteration: 149901
loss: 1.0062357187271118,grad_norm: 0.999999119413328, iteration: 149902
loss: 1.0537067651748657,grad_norm: 0.9999992058794303, iteration: 149903
loss: 0.9654303193092346,grad_norm: 0.9433816927119153, iteration: 149904
loss: 0.9589259028434753,grad_norm: 0.9240945505034512, iteration: 149905
loss: 0.9732157588005066,grad_norm: 0.873222177726331, iteration: 149906
loss: 0.9612191915512085,grad_norm: 0.9377330517419323, iteration: 149907
loss: 1.0016270875930786,grad_norm: 0.9999991188819854, iteration: 149908
loss: 0.9867231249809265,grad_norm: 0.9999993008202543, iteration: 149909
loss: 1.0051580667495728,grad_norm: 0.9999991371392135, iteration: 149910
loss: 0.9802483916282654,grad_norm: 0.9750921913278984, iteration: 149911
loss: 1.0010919570922852,grad_norm: 0.822866875153521, iteration: 149912
loss: 0.9868661761283875,grad_norm: 0.999999714322207, iteration: 149913
loss: 0.9975566864013672,grad_norm: 0.999999032575378, iteration: 149914
loss: 1.0017155408859253,grad_norm: 0.9999990569162334, iteration: 149915
loss: 0.9607275724411011,grad_norm: 0.9999990631973684, iteration: 149916
loss: 1.0024040937423706,grad_norm: 0.9999991433729412, iteration: 149917
loss: 1.0188499689102173,grad_norm: 0.9285735271934786, iteration: 149918
loss: 0.990719199180603,grad_norm: 0.9999989198134067, iteration: 149919
loss: 1.026918649673462,grad_norm: 0.9999990635442514, iteration: 149920
loss: 0.9954986572265625,grad_norm: 0.8881661773849393, iteration: 149921
loss: 0.9944130182266235,grad_norm: 0.9999991145373529, iteration: 149922
loss: 0.968961775302887,grad_norm: 0.8277869224350455, iteration: 149923
loss: 0.9787359833717346,grad_norm: 0.9999991379670402, iteration: 149924
loss: 0.969294011592865,grad_norm: 0.9999991381126675, iteration: 149925
loss: 1.0044978857040405,grad_norm: 0.878406271602493, iteration: 149926
loss: 1.0123497247695923,grad_norm: 0.8179872648605597, iteration: 149927
loss: 1.0167148113250732,grad_norm: 0.999999717529872, iteration: 149928
loss: 0.9894336462020874,grad_norm: 0.8986752102998583, iteration: 149929
loss: 0.9961771965026855,grad_norm: 0.9618623762948407, iteration: 149930
loss: 1.019963264465332,grad_norm: 0.8538951726408517, iteration: 149931
loss: 0.9952266216278076,grad_norm: 0.9999991951678988, iteration: 149932
loss: 0.953242838382721,grad_norm: 0.969283743477453, iteration: 149933
loss: 1.0367029905319214,grad_norm: 0.9999992280770038, iteration: 149934
loss: 0.9603528380393982,grad_norm: 0.9999991992972285, iteration: 149935
loss: 1.0254038572311401,grad_norm: 0.9026313555159932, iteration: 149936
loss: 0.9585123062133789,grad_norm: 0.9679651159630491, iteration: 149937
loss: 0.9929720163345337,grad_norm: 0.999999031375148, iteration: 149938
loss: 0.97627854347229,grad_norm: 0.999999067926568, iteration: 149939
loss: 0.9772433042526245,grad_norm: 0.9332599876076065, iteration: 149940
loss: 0.9916264414787292,grad_norm: 0.9999991977591957, iteration: 149941
loss: 1.0164600610733032,grad_norm: 0.9693745164061732, iteration: 149942
loss: 1.01930832862854,grad_norm: 0.9999990094983714, iteration: 149943
loss: 1.000511646270752,grad_norm: 0.9999992371677566, iteration: 149944
loss: 0.9991338849067688,grad_norm: 0.9999988806254247, iteration: 149945
loss: 0.952010452747345,grad_norm: 0.9999989405473313, iteration: 149946
loss: 1.0038516521453857,grad_norm: 0.933540780446975, iteration: 149947
loss: 1.0196632146835327,grad_norm: 0.9999990846076316, iteration: 149948
loss: 1.014822244644165,grad_norm: 0.99999905607136, iteration: 149949
loss: 1.0253691673278809,grad_norm: 0.9999989454464859, iteration: 149950
loss: 0.9720945358276367,grad_norm: 0.9461214941427621, iteration: 149951
loss: 1.0149307250976562,grad_norm: 0.9999992951075688, iteration: 149952
loss: 0.9835003614425659,grad_norm: 0.8972531845980374, iteration: 149953
loss: 1.0089221000671387,grad_norm: 0.9999993654639906, iteration: 149954
loss: 0.9712732434272766,grad_norm: 0.9214095196380622, iteration: 149955
loss: 1.0104780197143555,grad_norm: 0.8520988027160777, iteration: 149956
loss: 1.0188820362091064,grad_norm: 0.9891601687802763, iteration: 149957
loss: 1.0008906126022339,grad_norm: 0.9999991168907794, iteration: 149958
loss: 1.0246127843856812,grad_norm: 0.9999991301831483, iteration: 149959
loss: 0.9906324148178101,grad_norm: 0.9608847480810027, iteration: 149960
loss: 0.9946945905685425,grad_norm: 0.9999992007460375, iteration: 149961
loss: 0.997467041015625,grad_norm: 0.9999991827263247, iteration: 149962
loss: 0.9893112778663635,grad_norm: 0.9847738161293342, iteration: 149963
loss: 1.0201424360275269,grad_norm: 0.9674356250801754, iteration: 149964
loss: 1.0009922981262207,grad_norm: 0.9691209452894475, iteration: 149965
loss: 1.0167229175567627,grad_norm: 0.9999991070697977, iteration: 149966
loss: 1.0015053749084473,grad_norm: 0.9999991842388034, iteration: 149967
loss: 1.0474427938461304,grad_norm: 0.9999991455845925, iteration: 149968
loss: 1.0060029029846191,grad_norm: 0.8062903806403146, iteration: 149969
loss: 0.9685009717941284,grad_norm: 0.9390061144565013, iteration: 149970
loss: 1.002518653869629,grad_norm: 0.9496326344427533, iteration: 149971
loss: 0.9644908308982849,grad_norm: 0.9999990623457296, iteration: 149972
loss: 0.9603942632675171,grad_norm: 0.9999989710161566, iteration: 149973
loss: 1.0393092632293701,grad_norm: 0.9181322973881686, iteration: 149974
loss: 0.9599437713623047,grad_norm: 0.9999989894430547, iteration: 149975
loss: 1.0173132419586182,grad_norm: 0.9999991403476782, iteration: 149976
loss: 1.0238977670669556,grad_norm: 0.974932857953188, iteration: 149977
loss: 0.9729369282722473,grad_norm: 0.9999991319602836, iteration: 149978
loss: 1.05955171585083,grad_norm: 0.9999995819518644, iteration: 149979
loss: 0.9561362266540527,grad_norm: 0.9999996230800386, iteration: 149980
loss: 1.0073775053024292,grad_norm: 0.9999991984938097, iteration: 149981
loss: 0.9786560535430908,grad_norm: 0.9151918456852743, iteration: 149982
loss: 1.027418851852417,grad_norm: 0.9999991395563358, iteration: 149983
loss: 0.9999000430107117,grad_norm: 0.99999907758598, iteration: 149984
loss: 0.9802695512771606,grad_norm: 0.9511402193469978, iteration: 149985
loss: 0.9873712658882141,grad_norm: 0.9999990426460713, iteration: 149986
loss: 1.0013659000396729,grad_norm: 0.9471438050507306, iteration: 149987
loss: 0.989833414554596,grad_norm: 0.9368544515577164, iteration: 149988
loss: 1.0062607526779175,grad_norm: 0.9809146653253842, iteration: 149989
loss: 0.9909828901290894,grad_norm: 0.9748179560890746, iteration: 149990
loss: 1.024634599685669,grad_norm: 0.9344333978327175, iteration: 149991
loss: 1.0088064670562744,grad_norm: 0.9999991923852286, iteration: 149992
loss: 0.9729767441749573,grad_norm: 0.9999990057825888, iteration: 149993
loss: 1.016830563545227,grad_norm: 0.9845968495874939, iteration: 149994
loss: 0.9945420622825623,grad_norm: 0.9999991892986061, iteration: 149995
loss: 0.9904223084449768,grad_norm: 0.999998934176915, iteration: 149996
loss: 1.0181896686553955,grad_norm: 0.926721936341654, iteration: 149997
loss: 1.0241634845733643,grad_norm: 0.9999991030244529, iteration: 149998
loss: 0.9684446454048157,grad_norm: 0.9999989708904087, iteration: 149999
loss: 0.9961110353469849,grad_norm: 0.8746460746021648, iteration: 150000
Evaluating at step 150000
{'val': 0.9952408242970705, 'test': 3.0095992286365254}
loss: 1.0216013193130493,grad_norm: 0.9999989991790482, iteration: 150001
loss: 1.0115101337432861,grad_norm: 0.9999991055490887, iteration: 150002
loss: 1.0503302812576294,grad_norm: 0.9999999826341286, iteration: 150003
loss: 0.982475757598877,grad_norm: 0.8826322703673402, iteration: 150004
loss: 1.0001946687698364,grad_norm: 0.999999276648461, iteration: 150005
loss: 0.9843857884407043,grad_norm: 0.9103931120667116, iteration: 150006
loss: 1.0186593532562256,grad_norm: 0.9999991470520809, iteration: 150007
loss: 1.013567328453064,grad_norm: 0.8208519703916077, iteration: 150008
loss: 1.0099139213562012,grad_norm: 0.9999990177671678, iteration: 150009
loss: 1.0261495113372803,grad_norm: 0.9965212828437112, iteration: 150010
loss: 0.9833546876907349,grad_norm: 0.9999991535068781, iteration: 150011
loss: 1.0167192220687866,grad_norm: 0.9882684255127475, iteration: 150012
loss: 0.9908257126808167,grad_norm: 0.9948388419335072, iteration: 150013
loss: 1.018676996231079,grad_norm: 0.99999914278421, iteration: 150014
loss: 0.9829512238502502,grad_norm: 0.9999990899689172, iteration: 150015
loss: 0.9833915829658508,grad_norm: 0.9999989209400558, iteration: 150016
loss: 1.0010148286819458,grad_norm: 0.9538363418122427, iteration: 150017
loss: 1.000149130821228,grad_norm: 0.9937826436504085, iteration: 150018
loss: 1.0438545942306519,grad_norm: 0.9999991048114507, iteration: 150019
loss: 1.0039254426956177,grad_norm: 0.8157064782254068, iteration: 150020
loss: 1.0021836757659912,grad_norm: 0.9999991093316332, iteration: 150021
loss: 0.9963297843933105,grad_norm: 0.999999179179744, iteration: 150022
loss: 1.01051664352417,grad_norm: 0.9999989810272008, iteration: 150023
loss: 1.1014846563339233,grad_norm: 0.9999993223453407, iteration: 150024
loss: 1.002618432044983,grad_norm: 0.9999991462109571, iteration: 150025
loss: 0.9933990836143494,grad_norm: 0.9992811634771429, iteration: 150026
loss: 0.9959242939949036,grad_norm: 0.9725676387606084, iteration: 150027
loss: 1.0207111835479736,grad_norm: 0.9031908640749223, iteration: 150028
loss: 0.9951401352882385,grad_norm: 0.9581970007858894, iteration: 150029
loss: 1.0303775072097778,grad_norm: 0.9999990383158455, iteration: 150030
loss: 0.992257833480835,grad_norm: 0.8838862003407743, iteration: 150031
loss: 1.0034161806106567,grad_norm: 0.9999991240304704, iteration: 150032
loss: 0.9973060488700867,grad_norm: 0.9999994870604136, iteration: 150033
loss: 0.968063235282898,grad_norm: 0.9999991968747657, iteration: 150034
loss: 0.9908421635627747,grad_norm: 0.9896936151671539, iteration: 150035
loss: 0.9769712686538696,grad_norm: 0.9864452614352089, iteration: 150036
loss: 1.0128127336502075,grad_norm: 0.9999991034217213, iteration: 150037
loss: 1.0786733627319336,grad_norm: 0.999999430325363, iteration: 150038
loss: 0.9802188277244568,grad_norm: 0.9418096335640123, iteration: 150039
loss: 1.0088191032409668,grad_norm: 0.9999989761347646, iteration: 150040
loss: 0.9569111466407776,grad_norm: 0.9863952641704637, iteration: 150041
loss: 1.0091949701309204,grad_norm: 0.9999989580739154, iteration: 150042
loss: 1.0239825248718262,grad_norm: 0.8606870943837919, iteration: 150043
loss: 1.0263313055038452,grad_norm: 0.9999994431564952, iteration: 150044
loss: 1.01749587059021,grad_norm: 0.999999083402866, iteration: 150045
loss: 0.9750301241874695,grad_norm: 0.9366944861368693, iteration: 150046
loss: 1.025660753250122,grad_norm: 0.9483910084708322, iteration: 150047
loss: 0.9699543118476868,grad_norm: 0.9329139913647735, iteration: 150048
loss: 0.9483603835105896,grad_norm: 0.9999990787722451, iteration: 150049
loss: 1.0182456970214844,grad_norm: 0.9105622929376215, iteration: 150050
loss: 1.062564492225647,grad_norm: 0.9999992168535096, iteration: 150051
loss: 1.0043971538543701,grad_norm: 0.9150332257954805, iteration: 150052
loss: 0.9821246266365051,grad_norm: 0.9999991283348977, iteration: 150053
loss: 1.019115686416626,grad_norm: 0.9183379392980786, iteration: 150054
loss: 0.9928888082504272,grad_norm: 0.999999343124618, iteration: 150055
loss: 1.0271291732788086,grad_norm: 0.9960212634259868, iteration: 150056
loss: 0.9910123944282532,grad_norm: 0.8956857442258338, iteration: 150057
loss: 0.9954693913459778,grad_norm: 0.999999225960979, iteration: 150058
loss: 0.9446123838424683,grad_norm: 0.9999991988024055, iteration: 150059
loss: 0.9957253336906433,grad_norm: 0.999999128194506, iteration: 150060
loss: 1.0301941633224487,grad_norm: 0.9999991368498041, iteration: 150061
loss: 1.0380100011825562,grad_norm: 0.9618342288791528, iteration: 150062
loss: 1.0155311822891235,grad_norm: 0.8895364434801192, iteration: 150063
loss: 0.9806267619132996,grad_norm: 0.9264777343756527, iteration: 150064
loss: 0.9986504912376404,grad_norm: 0.9999991728597106, iteration: 150065
loss: 1.014460802078247,grad_norm: 0.9999990194209499, iteration: 150066
loss: 1.0524612665176392,grad_norm: 0.999999105870919, iteration: 150067
loss: 1.0070979595184326,grad_norm: 0.9999990952812585, iteration: 150068
loss: 0.9790371656417847,grad_norm: 0.9999992839404988, iteration: 150069
loss: 0.9966490864753723,grad_norm: 0.9999990800477668, iteration: 150070
loss: 0.9963170886039734,grad_norm: 0.9476046718393449, iteration: 150071
loss: 0.9872819781303406,grad_norm: 0.9999990980048301, iteration: 150072
loss: 0.990064799785614,grad_norm: 0.9085554670279279, iteration: 150073
loss: 0.9603535532951355,grad_norm: 0.9339808570798239, iteration: 150074
loss: 1.0319877862930298,grad_norm: 0.9256503901624644, iteration: 150075
loss: 0.9473977088928223,grad_norm: 0.9365691589374052, iteration: 150076
loss: 1.0089491605758667,grad_norm: 0.9999992404744172, iteration: 150077
loss: 0.9757897257804871,grad_norm: 0.9999992601894061, iteration: 150078
loss: 0.961945116519928,grad_norm: 0.9999991719672935, iteration: 150079
loss: 1.059948444366455,grad_norm: 0.9999991570808168, iteration: 150080
loss: 0.9652289748191833,grad_norm: 0.9999992389609673, iteration: 150081
loss: 0.9829017519950867,grad_norm: 0.9999991966809627, iteration: 150082
loss: 1.0056421756744385,grad_norm: 0.9440371219392641, iteration: 150083
loss: 0.9368367195129395,grad_norm: 0.90239618248862, iteration: 150084
loss: 0.9831362366676331,grad_norm: 0.9999990558587055, iteration: 150085
loss: 1.0408250093460083,grad_norm: 0.9518105912503172, iteration: 150086
loss: 1.012320637702942,grad_norm: 0.99999895308713, iteration: 150087
loss: 0.989776074886322,grad_norm: 0.999999176527807, iteration: 150088
loss: 0.9887281060218811,grad_norm: 0.862687172074731, iteration: 150089
loss: 1.101396918296814,grad_norm: 0.999999625798238, iteration: 150090
loss: 0.9687982797622681,grad_norm: 0.8867851953119719, iteration: 150091
loss: 1.0033323764801025,grad_norm: 0.9999991195350212, iteration: 150092
loss: 1.0044091939926147,grad_norm: 0.9999991972036512, iteration: 150093
loss: 1.0439467430114746,grad_norm: 0.9999993159238446, iteration: 150094
loss: 1.0123299360275269,grad_norm: 0.9999990924699121, iteration: 150095
loss: 0.9662217497825623,grad_norm: 0.99999892979743, iteration: 150096
loss: 0.9986376762390137,grad_norm: 0.9999991268537366, iteration: 150097
loss: 0.9793416857719421,grad_norm: 0.9999991068497076, iteration: 150098
loss: 0.9925673604011536,grad_norm: 0.999999015946646, iteration: 150099
loss: 1.0016220808029175,grad_norm: 0.9535716166031931, iteration: 150100
loss: 1.0219531059265137,grad_norm: 0.9999990546376152, iteration: 150101
loss: 1.0149768590927124,grad_norm: 0.9999991282069698, iteration: 150102
loss: 1.0151069164276123,grad_norm: 0.9999991893218338, iteration: 150103
loss: 1.0138078927993774,grad_norm: 0.9999990600331707, iteration: 150104
loss: 0.9711840152740479,grad_norm: 0.8178601538821576, iteration: 150105
loss: 1.0084519386291504,grad_norm: 0.9856439116939276, iteration: 150106
loss: 0.9854933023452759,grad_norm: 0.9381952613727198, iteration: 150107
loss: 1.0235168933868408,grad_norm: 0.8583044871680361, iteration: 150108
loss: 1.0240377187728882,grad_norm: 0.8500885893368422, iteration: 150109
loss: 0.9749614000320435,grad_norm: 0.9403200899076869, iteration: 150110
loss: 0.9897822737693787,grad_norm: 0.7879401675442913, iteration: 150111
loss: 0.9876253008842468,grad_norm: 0.9688871502319689, iteration: 150112
loss: 1.0591049194335938,grad_norm: 0.9999994646887336, iteration: 150113
loss: 0.9789647459983826,grad_norm: 0.9618374852437337, iteration: 150114
loss: 0.9920428991317749,grad_norm: 0.9396850567592477, iteration: 150115
loss: 1.0316110849380493,grad_norm: 0.9999991435632992, iteration: 150116
loss: 1.0190495252609253,grad_norm: 0.9844943540193833, iteration: 150117
loss: 0.9781132936477661,grad_norm: 0.8883957445762212, iteration: 150118
loss: 1.0249488353729248,grad_norm: 0.9036179286063218, iteration: 150119
loss: 0.9896544814109802,grad_norm: 0.9057076781618197, iteration: 150120
loss: 0.9702849984169006,grad_norm: 0.8819575022741143, iteration: 150121
loss: 1.0045862197875977,grad_norm: 0.9074047090960639, iteration: 150122
loss: 1.0200914144515991,grad_norm: 0.9836706884238923, iteration: 150123
loss: 0.9999745488166809,grad_norm: 0.9451816322082893, iteration: 150124
loss: 1.0061835050582886,grad_norm: 0.8096954965571947, iteration: 150125
loss: 0.9997149109840393,grad_norm: 0.9999991127125275, iteration: 150126
loss: 1.029890537261963,grad_norm: 0.9999989929290263, iteration: 150127
loss: 1.0258574485778809,grad_norm: 0.9999990926612686, iteration: 150128
loss: 0.9744531512260437,grad_norm: 0.9999990050928838, iteration: 150129
loss: 1.008508563041687,grad_norm: 0.8407556510440682, iteration: 150130
loss: 0.9940780997276306,grad_norm: 0.8378957707782722, iteration: 150131
loss: 1.0061222314834595,grad_norm: 0.9569821906720203, iteration: 150132
loss: 0.9837334156036377,grad_norm: 0.9235341213900867, iteration: 150133
loss: 1.011000156402588,grad_norm: 0.9889352394146872, iteration: 150134
loss: 0.9850083589553833,grad_norm: 0.8735670871432333, iteration: 150135
loss: 1.0040732622146606,grad_norm: 0.8956116019950267, iteration: 150136
loss: 1.0108059644699097,grad_norm: 0.9220716256978103, iteration: 150137
loss: 0.9976809024810791,grad_norm: 0.8879297684134906, iteration: 150138
loss: 0.9556071162223816,grad_norm: 0.9999989370848987, iteration: 150139
loss: 1.0160224437713623,grad_norm: 0.9999991297366383, iteration: 150140
loss: 0.9981760382652283,grad_norm: 0.9999991619121611, iteration: 150141
loss: 1.033847451210022,grad_norm: 0.9999990547534262, iteration: 150142
loss: 0.9984034299850464,grad_norm: 0.9475331516958022, iteration: 150143
loss: 0.9818729758262634,grad_norm: 0.9999992467456092, iteration: 150144
loss: 0.9905443787574768,grad_norm: 0.9620646917139188, iteration: 150145
loss: 1.003648042678833,grad_norm: 0.9306596635118983, iteration: 150146
loss: 1.0120447874069214,grad_norm: 0.9999990020516707, iteration: 150147
loss: 0.9921261072158813,grad_norm: 0.9762560084481275, iteration: 150148
loss: 0.9830822348594666,grad_norm: 0.9428421232816918, iteration: 150149
loss: 1.0011279582977295,grad_norm: 0.9186966615846451, iteration: 150150
loss: 0.9655900001525879,grad_norm: 0.9999991603850596, iteration: 150151
loss: 0.9678345322608948,grad_norm: 0.999999104697265, iteration: 150152
loss: 1.005743384361267,grad_norm: 0.9999992907189567, iteration: 150153
loss: 1.0418769121170044,grad_norm: 0.9999991406684261, iteration: 150154
loss: 1.0386074781417847,grad_norm: 0.9999990224579813, iteration: 150155
loss: 0.9725162982940674,grad_norm: 0.9104574817986181, iteration: 150156
loss: 0.9590560793876648,grad_norm: 0.9999990023055194, iteration: 150157
loss: 1.0018130540847778,grad_norm: 0.999999160585964, iteration: 150158
loss: 1.000739336013794,grad_norm: 0.999999030513077, iteration: 150159
loss: 1.0042287111282349,grad_norm: 0.849487669295207, iteration: 150160
loss: 0.999809741973877,grad_norm: 0.9542018090212927, iteration: 150161
loss: 1.0013468265533447,grad_norm: 0.9986004557015177, iteration: 150162
loss: 0.9696147441864014,grad_norm: 0.9999991914174897, iteration: 150163
loss: 1.0154589414596558,grad_norm: 0.9999991672880743, iteration: 150164
loss: 1.003368854522705,grad_norm: 0.898002740186345, iteration: 150165
loss: 1.0453044176101685,grad_norm: 0.9999991204075716, iteration: 150166
loss: 1.0081926584243774,grad_norm: 0.9999991132193742, iteration: 150167
loss: 1.0080115795135498,grad_norm: 0.999999095604145, iteration: 150168
loss: 0.9595368504524231,grad_norm: 0.9175635803333996, iteration: 150169
loss: 1.006625771522522,grad_norm: 0.999999203801855, iteration: 150170
loss: 0.9977306127548218,grad_norm: 0.9999991363999725, iteration: 150171
loss: 0.9991430640220642,grad_norm: 0.9976869668841936, iteration: 150172
loss: 1.0184341669082642,grad_norm: 0.9113750298192659, iteration: 150173
loss: 0.9607803225517273,grad_norm: 0.9999992787616075, iteration: 150174
loss: 0.9774906039237976,grad_norm: 0.8704036440145607, iteration: 150175
loss: 0.9623619914054871,grad_norm: 0.913075652117282, iteration: 150176
loss: 0.9855775237083435,grad_norm: 0.9999991628953281, iteration: 150177
loss: 0.9963961839675903,grad_norm: 0.9999990098615684, iteration: 150178
loss: 1.0192033052444458,grad_norm: 0.8952237881376037, iteration: 150179
loss: 1.0184321403503418,grad_norm: 0.9999991418254354, iteration: 150180
loss: 0.9785298109054565,grad_norm: 0.9999992292365596, iteration: 150181
loss: 1.0120152235031128,grad_norm: 0.9251813650337533, iteration: 150182
loss: 1.0115669965744019,grad_norm: 0.9872302880461553, iteration: 150183
loss: 1.02810537815094,grad_norm: 0.9738206245049774, iteration: 150184
loss: 0.9904958605766296,grad_norm: 0.9999992192750452, iteration: 150185
loss: 1.0060008764266968,grad_norm: 0.8911349210841762, iteration: 150186
loss: 1.0052815675735474,grad_norm: 0.9999992088723038, iteration: 150187
loss: 0.995987594127655,grad_norm: 0.9999989665416796, iteration: 150188
loss: 1.0113253593444824,grad_norm: 0.9921875792699667, iteration: 150189
loss: 0.9868040680885315,grad_norm: 0.9310008585902525, iteration: 150190
loss: 0.9876840114593506,grad_norm: 0.9741633582865628, iteration: 150191
loss: 0.9952603578567505,grad_norm: 0.9785748615672833, iteration: 150192
loss: 0.975034773349762,grad_norm: 0.9999990563579123, iteration: 150193
loss: 1.0363348722457886,grad_norm: 0.9999996653874111, iteration: 150194
loss: 1.0084325075149536,grad_norm: 0.9246092012845197, iteration: 150195
loss: 1.015310287475586,grad_norm: 0.862183280493609, iteration: 150196
loss: 1.0075753927230835,grad_norm: 0.937271577453575, iteration: 150197
loss: 0.9999216794967651,grad_norm: 0.9999990148108905, iteration: 150198
loss: 0.9956996440887451,grad_norm: 0.9999989919339782, iteration: 150199
loss: 0.9507369995117188,grad_norm: 0.9999992451279321, iteration: 150200
loss: 1.013033390045166,grad_norm: 0.9717577291020154, iteration: 150201
loss: 1.0016323328018188,grad_norm: 0.9999989163798828, iteration: 150202
loss: 0.9858905076980591,grad_norm: 0.972693842002789, iteration: 150203
loss: 0.9999494552612305,grad_norm: 0.999999400453117, iteration: 150204
loss: 0.9794365167617798,grad_norm: 0.9591332886030894, iteration: 150205
loss: 1.0085662603378296,grad_norm: 0.9496481438955696, iteration: 150206
loss: 0.9863783121109009,grad_norm: 0.9999990674033087, iteration: 150207
loss: 1.0048540830612183,grad_norm: 0.9999990921028891, iteration: 150208
loss: 0.9785290360450745,grad_norm: 0.9999990621820904, iteration: 150209
loss: 1.016757845878601,grad_norm: 0.8763606098383506, iteration: 150210
loss: 1.026810646057129,grad_norm: 0.9999990278225147, iteration: 150211
loss: 0.9701959490776062,grad_norm: 0.8625853783914567, iteration: 150212
loss: 1.0086973905563354,grad_norm: 0.9999991868047581, iteration: 150213
loss: 1.0009925365447998,grad_norm: 0.9368854428788983, iteration: 150214
loss: 1.0204049348831177,grad_norm: 0.9999997559311293, iteration: 150215
loss: 0.9831088781356812,grad_norm: 0.9999990726224435, iteration: 150216
loss: 1.0037686824798584,grad_norm: 0.9999991475795912, iteration: 150217
loss: 1.0102993249893188,grad_norm: 0.9999993323664496, iteration: 150218
loss: 1.0361838340759277,grad_norm: 0.9999990858326393, iteration: 150219
loss: 1.0224123001098633,grad_norm: 0.9999990771757422, iteration: 150220
loss: 0.9874514937400818,grad_norm: 0.9383708016281266, iteration: 150221
loss: 0.9650772213935852,grad_norm: 0.9999990787682108, iteration: 150222
loss: 1.0217351913452148,grad_norm: 0.8829748226448384, iteration: 150223
loss: 1.00734281539917,grad_norm: 0.9541404193859369, iteration: 150224
loss: 1.0058517456054688,grad_norm: 0.9999989689452529, iteration: 150225
loss: 1.0067718029022217,grad_norm: 0.999999350532822, iteration: 150226
loss: 0.9728295207023621,grad_norm: 0.9771366156137941, iteration: 150227
loss: 0.9921740293502808,grad_norm: 0.9999993724192137, iteration: 150228
loss: 0.9816346168518066,grad_norm: 0.9999990242510052, iteration: 150229
loss: 0.9794632792472839,grad_norm: 0.9900111594994475, iteration: 150230
loss: 1.0024383068084717,grad_norm: 0.9999990884612215, iteration: 150231
loss: 0.9721691608428955,grad_norm: 0.9560706665606712, iteration: 150232
loss: 0.9881996512413025,grad_norm: 0.9999989768917653, iteration: 150233
loss: 0.9730641841888428,grad_norm: 0.788873844963511, iteration: 150234
loss: 0.9849366545677185,grad_norm: 0.999999155126177, iteration: 150235
loss: 0.980415403842926,grad_norm: 0.9999991812948404, iteration: 150236
loss: 0.9965342283248901,grad_norm: 0.9487375945720242, iteration: 150237
loss: 0.9902817010879517,grad_norm: 0.9999992417594222, iteration: 150238
loss: 1.0019475221633911,grad_norm: 0.9999992270950722, iteration: 150239
loss: 0.9920027852058411,grad_norm: 0.9999990796256094, iteration: 150240
loss: 0.9926912188529968,grad_norm: 0.999999077686616, iteration: 150241
loss: 1.0069975852966309,grad_norm: 0.9319275086353714, iteration: 150242
loss: 0.9686896204948425,grad_norm: 0.9607422905481214, iteration: 150243
loss: 1.015718936920166,grad_norm: 0.9999990401821645, iteration: 150244
loss: 1.0042415857315063,grad_norm: 0.8189154584500761, iteration: 150245
loss: 0.9862846732139587,grad_norm: 0.9999990574299534, iteration: 150246
loss: 0.9700683951377869,grad_norm: 0.9999990008701526, iteration: 150247
loss: 1.0087846517562866,grad_norm: 0.8340634590040883, iteration: 150248
loss: 1.0248918533325195,grad_norm: 0.8593940020005812, iteration: 150249
loss: 1.0078047513961792,grad_norm: 0.9999990352420777, iteration: 150250
loss: 0.9966570138931274,grad_norm: 0.9999992521837853, iteration: 150251
loss: 0.9944426417350769,grad_norm: 0.93750537908195, iteration: 150252
loss: 0.9573372602462769,grad_norm: 0.9999991607106979, iteration: 150253
loss: 0.9913908243179321,grad_norm: 0.8563236955037508, iteration: 150254
loss: 0.9763481616973877,grad_norm: 0.9999990646342141, iteration: 150255
loss: 0.9945436716079712,grad_norm: 0.9440807433081325, iteration: 150256
loss: 0.9834453463554382,grad_norm: 0.8973456113743556, iteration: 150257
loss: 1.0028088092803955,grad_norm: 0.9999993185661371, iteration: 150258
loss: 0.9934269189834595,grad_norm: 0.999999119374928, iteration: 150259
loss: 0.9737075567245483,grad_norm: 0.9999989749514835, iteration: 150260
loss: 0.9966108202934265,grad_norm: 0.9543462406424166, iteration: 150261
loss: 0.9406902194023132,grad_norm: 0.9999993039113569, iteration: 150262
loss: 1.0005557537078857,grad_norm: 0.9999991086096282, iteration: 150263
loss: 0.9992608428001404,grad_norm: 0.9999989538670306, iteration: 150264
loss: 1.0228862762451172,grad_norm: 0.9999990994754312, iteration: 150265
loss: 0.9947102665901184,grad_norm: 0.9999995543968478, iteration: 150266
loss: 1.0162875652313232,grad_norm: 0.8980880381492711, iteration: 150267
loss: 0.9942245483398438,grad_norm: 0.999999090189779, iteration: 150268
loss: 0.9900871515274048,grad_norm: 0.777423482150181, iteration: 150269
loss: 0.9888052344322205,grad_norm: 0.8661124671479798, iteration: 150270
loss: 1.0308324098587036,grad_norm: 0.9999994752277384, iteration: 150271
loss: 1.0350841283798218,grad_norm: 0.9999991467034578, iteration: 150272
loss: 0.9911109805107117,grad_norm: 0.97501114329588, iteration: 150273
loss: 1.021862506866455,grad_norm: 0.8592567045632834, iteration: 150274
loss: 0.9750487208366394,grad_norm: 0.999999042466584, iteration: 150275
loss: 0.9657602310180664,grad_norm: 0.9510309986762975, iteration: 150276
loss: 1.0137664079666138,grad_norm: 0.8272940872988171, iteration: 150277
loss: 1.0197488069534302,grad_norm: 0.8677456089180416, iteration: 150278
loss: 1.0131951570510864,grad_norm: 0.9466226496276204, iteration: 150279
loss: 0.9934921860694885,grad_norm: 0.9999991851418192, iteration: 150280
loss: 1.0255337953567505,grad_norm: 0.9999991572975677, iteration: 150281
loss: 0.9974431991577148,grad_norm: 0.9999990458126065, iteration: 150282
loss: 0.9623653888702393,grad_norm: 0.9999991230291569, iteration: 150283
loss: 0.9912466406822205,grad_norm: 0.9999991673900942, iteration: 150284
loss: 1.001442790031433,grad_norm: 0.9098277010907516, iteration: 150285
loss: 1.0233092308044434,grad_norm: 0.9999991780063376, iteration: 150286
loss: 0.9779864549636841,grad_norm: 0.9999991224276017, iteration: 150287
loss: 0.9758115410804749,grad_norm: 0.9999988923856074, iteration: 150288
loss: 1.0083518028259277,grad_norm: 0.900952483096364, iteration: 150289
loss: 1.013701319694519,grad_norm: 0.8785662912107475, iteration: 150290
loss: 1.0247164964675903,grad_norm: 0.9957941543162385, iteration: 150291
loss: 0.9920597672462463,grad_norm: 0.9999991156871647, iteration: 150292
loss: 1.027895450592041,grad_norm: 0.9999990831233634, iteration: 150293
loss: 0.9988806843757629,grad_norm: 0.875785051366736, iteration: 150294
loss: 0.9978541731834412,grad_norm: 0.9999991353590617, iteration: 150295
loss: 1.0060325860977173,grad_norm: 0.9999991088120167, iteration: 150296
loss: 1.0031838417053223,grad_norm: 0.8688930206384319, iteration: 150297
loss: 1.0301722288131714,grad_norm: 0.9999991984689494, iteration: 150298
loss: 1.006377100944519,grad_norm: 0.9999989564195059, iteration: 150299
loss: 0.9689633846282959,grad_norm: 0.8749305074478055, iteration: 150300
loss: 0.9788589477539062,grad_norm: 0.9208548523362354, iteration: 150301
loss: 0.9786868691444397,grad_norm: 0.9999994721107089, iteration: 150302
loss: 0.9780582189559937,grad_norm: 0.9999989412452006, iteration: 150303
loss: 1.0290435552597046,grad_norm: 0.9999990938521337, iteration: 150304
loss: 0.9898651838302612,grad_norm: 0.9999989454452447, iteration: 150305
loss: 1.0124201774597168,grad_norm: 0.8931515779104384, iteration: 150306
loss: 1.0050503015518188,grad_norm: 0.9999992941336964, iteration: 150307
loss: 0.9860780239105225,grad_norm: 0.9999989782216709, iteration: 150308
loss: 0.9984631538391113,grad_norm: 0.9999996892768735, iteration: 150309
loss: 1.0281972885131836,grad_norm: 0.9999993009183217, iteration: 150310
loss: 1.0159310102462769,grad_norm: 0.9999992068262895, iteration: 150311
loss: 1.0370148420333862,grad_norm: 0.9194852998725144, iteration: 150312
loss: 1.023097276687622,grad_norm: 0.8546767708068325, iteration: 150313
loss: 1.049353837966919,grad_norm: 0.9999990997481435, iteration: 150314
loss: 0.9450163841247559,grad_norm: 0.9458757256149626, iteration: 150315
loss: 0.9967927932739258,grad_norm: 0.9999991979389788, iteration: 150316
loss: 0.9730116128921509,grad_norm: 0.9999992574091987, iteration: 150317
loss: 0.9812822341918945,grad_norm: 0.9954851524228937, iteration: 150318
loss: 0.9322162866592407,grad_norm: 0.9390317070016236, iteration: 150319
loss: 1.0477713346481323,grad_norm: 0.9999992909427865, iteration: 150320
loss: 1.017660140991211,grad_norm: 0.9176438764047778, iteration: 150321
loss: 1.0100888013839722,grad_norm: 0.9999991369415601, iteration: 150322
loss: 1.0157380104064941,grad_norm: 0.9999990634048385, iteration: 150323
loss: 0.9915498495101929,grad_norm: 0.9532833267747644, iteration: 150324
loss: 0.9678483009338379,grad_norm: 0.9022399383178811, iteration: 150325
loss: 0.9863295555114746,grad_norm: 0.9742128892276093, iteration: 150326
loss: 1.0075145959854126,grad_norm: 0.9999989968194627, iteration: 150327
loss: 0.9810420870780945,grad_norm: 0.9668887496187644, iteration: 150328
loss: 0.997257649898529,grad_norm: 0.9728076024533087, iteration: 150329
loss: 1.0114964246749878,grad_norm: 0.9999991714117195, iteration: 150330
loss: 0.9696139097213745,grad_norm: 0.9146880878365065, iteration: 150331
loss: 1.0004334449768066,grad_norm: 0.9999991639809517, iteration: 150332
loss: 0.9801875948905945,grad_norm: 0.999999218411529, iteration: 150333
loss: 1.036616325378418,grad_norm: 0.9999993839499689, iteration: 150334
loss: 0.9738110303878784,grad_norm: 0.9536901494515991, iteration: 150335
loss: 1.010206699371338,grad_norm: 0.9999992247788402, iteration: 150336
loss: 1.012262225151062,grad_norm: 0.9999992556768259, iteration: 150337
loss: 1.0111651420593262,grad_norm: 0.9999992972580567, iteration: 150338
loss: 1.0385228395462036,grad_norm: 0.9328051506721674, iteration: 150339
loss: 1.0019673109054565,grad_norm: 0.9999992114498839, iteration: 150340
loss: 0.9874857664108276,grad_norm: 0.999999215345972, iteration: 150341
loss: 1.028883457183838,grad_norm: 0.8625821076942297, iteration: 150342
loss: 0.9900256395339966,grad_norm: 0.9999991516650109, iteration: 150343
loss: 1.0170719623565674,grad_norm: 0.9999991269994521, iteration: 150344
loss: 1.0263179540634155,grad_norm: 0.999999109746574, iteration: 150345
loss: 0.9673600792884827,grad_norm: 0.9870865781889047, iteration: 150346
loss: 0.9617196917533875,grad_norm: 0.999999465104998, iteration: 150347
loss: 1.0102698802947998,grad_norm: 0.9999989786522498, iteration: 150348
loss: 1.0357950925827026,grad_norm: 0.999999250124695, iteration: 150349
loss: 0.9506652355194092,grad_norm: 0.9999992766980483, iteration: 150350
loss: 1.0307953357696533,grad_norm: 0.9999991729659056, iteration: 150351
loss: 1.0480965375900269,grad_norm: 0.9999990247039368, iteration: 150352
loss: 1.0230612754821777,grad_norm: 0.999999245878054, iteration: 150353
loss: 0.9602270722389221,grad_norm: 0.9999993113279294, iteration: 150354
loss: 0.9837798476219177,grad_norm: 0.9415213211744148, iteration: 150355
loss: 1.0139575004577637,grad_norm: 0.9999991056503222, iteration: 150356
loss: 1.0317363739013672,grad_norm: 0.9999990465598992, iteration: 150357
loss: 0.9879975914955139,grad_norm: 0.8886058058583568, iteration: 150358
loss: 0.9782849550247192,grad_norm: 0.999999045868767, iteration: 150359
loss: 1.0150913000106812,grad_norm: 0.9999991566482069, iteration: 150360
loss: 1.0124660730361938,grad_norm: 0.8545571445832146, iteration: 150361
loss: 0.9786738753318787,grad_norm: 0.8801213889174831, iteration: 150362
loss: 0.9994282722473145,grad_norm: 0.9999991786977303, iteration: 150363
loss: 1.0033061504364014,grad_norm: 0.9799030378827162, iteration: 150364
loss: 1.0061404705047607,grad_norm: 0.9348956435765585, iteration: 150365
loss: 1.004896640777588,grad_norm: 0.9999991248056123, iteration: 150366
loss: 1.023311972618103,grad_norm: 0.9999992425216423, iteration: 150367
loss: 0.9996817708015442,grad_norm: 0.9999990909506188, iteration: 150368
loss: 1.063871145248413,grad_norm: 0.999999246150614, iteration: 150369
loss: 1.0195432901382446,grad_norm: 0.8582123514320188, iteration: 150370
loss: 0.9289917349815369,grad_norm: 0.9999992270602992, iteration: 150371
loss: 0.9941825270652771,grad_norm: 0.9807853696192176, iteration: 150372
loss: 0.9742235541343689,grad_norm: 0.8980842094265779, iteration: 150373
loss: 1.0152665376663208,grad_norm: 0.9800361740243043, iteration: 150374
loss: 0.959879994392395,grad_norm: 0.9999991041300395, iteration: 150375
loss: 1.0672695636749268,grad_norm: 0.9999991610793116, iteration: 150376
loss: 0.9894952178001404,grad_norm: 0.9999992049807477, iteration: 150377
loss: 0.9853599071502686,grad_norm: 0.9999991589710507, iteration: 150378
loss: 0.9810692667961121,grad_norm: 0.9999991113524744, iteration: 150379
loss: 1.0143733024597168,grad_norm: 0.9999992970684135, iteration: 150380
loss: 1.0198583602905273,grad_norm: 0.9999992227121032, iteration: 150381
loss: 1.0021389722824097,grad_norm: 0.9999993576389992, iteration: 150382
loss: 0.9588927626609802,grad_norm: 0.9808900904223865, iteration: 150383
loss: 0.980464518070221,grad_norm: 0.8170434474846597, iteration: 150384
loss: 0.9908125400543213,grad_norm: 0.9420325870199029, iteration: 150385
loss: 0.9963124394416809,grad_norm: 0.9321841092495328, iteration: 150386
loss: 0.998197078704834,grad_norm: 0.9818202079764419, iteration: 150387
loss: 0.9807236790657043,grad_norm: 0.9408440326413234, iteration: 150388
loss: 1.0040643215179443,grad_norm: 0.9999990417395535, iteration: 150389
loss: 1.0113853216171265,grad_norm: 0.9999990912888027, iteration: 150390
loss: 1.0196014642715454,grad_norm: 0.9670736183785528, iteration: 150391
loss: 0.9931886792182922,grad_norm: 0.9999991304339283, iteration: 150392
loss: 0.9761512875556946,grad_norm: 0.986996501822047, iteration: 150393
loss: 0.9964359402656555,grad_norm: 0.9999991555097494, iteration: 150394
loss: 0.9963861107826233,grad_norm: 0.9999991059388924, iteration: 150395
loss: 1.0244362354278564,grad_norm: 0.9999991839834257, iteration: 150396
loss: 1.0061359405517578,grad_norm: 0.9999991051517149, iteration: 150397
loss: 0.9974585175514221,grad_norm: 0.9718457716384772, iteration: 150398
loss: 0.9439753890037537,grad_norm: 0.9999990524110498, iteration: 150399
loss: 0.9698895812034607,grad_norm: 0.963943483180578, iteration: 150400
loss: 0.9884685277938843,grad_norm: 0.9999990943965551, iteration: 150401
loss: 1.018505573272705,grad_norm: 0.9999990501261656, iteration: 150402
loss: 0.9843947291374207,grad_norm: 0.9951644036437943, iteration: 150403
loss: 1.0064927339553833,grad_norm: 0.9935192894691884, iteration: 150404
loss: 0.9919038414955139,grad_norm: 0.8900729168542899, iteration: 150405
loss: 1.0032167434692383,grad_norm: 0.8971083555874692, iteration: 150406
loss: 1.0028529167175293,grad_norm: 0.9265473743023566, iteration: 150407
loss: 0.9911599159240723,grad_norm: 0.9974238752151731, iteration: 150408
loss: 0.9874860644340515,grad_norm: 0.9999991161432478, iteration: 150409
loss: 1.008352279663086,grad_norm: 0.8576255028686512, iteration: 150410
loss: 0.9702226519584656,grad_norm: 0.999998987619203, iteration: 150411
loss: 0.9951918721199036,grad_norm: 0.999999344201973, iteration: 150412
loss: 1.0588593482971191,grad_norm: 0.9999994455565233, iteration: 150413
loss: 1.0310776233673096,grad_norm: 0.9999990761126675, iteration: 150414
loss: 1.0137790441513062,grad_norm: 0.9999991256572366, iteration: 150415
loss: 0.9868834614753723,grad_norm: 0.9999990230885931, iteration: 150416
loss: 0.9977290034294128,grad_norm: 0.9999990976223808, iteration: 150417
loss: 1.0026366710662842,grad_norm: 0.9999991398179268, iteration: 150418
loss: 1.0084048509597778,grad_norm: 0.9999991269427034, iteration: 150419
loss: 1.0818746089935303,grad_norm: 0.9999994086253519, iteration: 150420
loss: 0.9787458777427673,grad_norm: 0.9999992087589361, iteration: 150421
loss: 1.0247080326080322,grad_norm: 0.8516561870736552, iteration: 150422
loss: 1.1160539388656616,grad_norm: 0.9999992992873001, iteration: 150423
loss: 0.9960687756538391,grad_norm: 0.9999991134340732, iteration: 150424
loss: 0.9724605679512024,grad_norm: 0.8716829304913459, iteration: 150425
loss: 1.005562424659729,grad_norm: 0.999999502636894, iteration: 150426
loss: 1.028464913368225,grad_norm: 0.9378174395864234, iteration: 150427
loss: 1.0237807035446167,grad_norm: 0.9822370332908821, iteration: 150428
loss: 1.038434386253357,grad_norm: 0.9999991109853379, iteration: 150429
loss: 0.954698383808136,grad_norm: 0.8884719714974835, iteration: 150430
loss: 1.0087591409683228,grad_norm: 0.9999992503376963, iteration: 150431
loss: 0.9950555562973022,grad_norm: 0.9999990737646571, iteration: 150432
loss: 0.9834833741188049,grad_norm: 0.9999989497988253, iteration: 150433
loss: 0.9985907077789307,grad_norm: 0.9999990501373276, iteration: 150434
loss: 0.9811804294586182,grad_norm: 0.8768466505187172, iteration: 150435
loss: 0.9863462448120117,grad_norm: 0.9999991694077672, iteration: 150436
loss: 0.975233793258667,grad_norm: 0.9685114487157374, iteration: 150437
loss: 0.9799175262451172,grad_norm: 0.9999991990126378, iteration: 150438
loss: 1.0010960102081299,grad_norm: 0.9999990585728967, iteration: 150439
loss: 0.9908669590950012,grad_norm: 0.9053795400909013, iteration: 150440
loss: 0.9797092080116272,grad_norm: 0.9999989986304881, iteration: 150441
loss: 1.0011783838272095,grad_norm: 0.9999990776068853, iteration: 150442
loss: 1.0045634508132935,grad_norm: 0.9999989723141177, iteration: 150443
loss: 0.9616366028785706,grad_norm: 0.9999991022808369, iteration: 150444
loss: 1.0200912952423096,grad_norm: 0.9999989546619567, iteration: 150445
loss: 0.9492518901824951,grad_norm: 0.9866015040999178, iteration: 150446
loss: 1.0440051555633545,grad_norm: 0.999999201000833, iteration: 150447
loss: 1.0102670192718506,grad_norm: 0.9999998914537768, iteration: 150448
loss: 0.9871335029602051,grad_norm: 0.9958295531492201, iteration: 150449
loss: 1.0019288063049316,grad_norm: 0.775301226600277, iteration: 150450
loss: 0.9837270379066467,grad_norm: 0.9999990439079698, iteration: 150451
loss: 0.956005334854126,grad_norm: 0.9692737771322449, iteration: 150452
loss: 1.0251787900924683,grad_norm: 0.9999992202460204, iteration: 150453
loss: 0.9693747162818909,grad_norm: 0.9999992194790516, iteration: 150454
loss: 0.9816820025444031,grad_norm: 0.9999991756415137, iteration: 150455
loss: 1.028439998626709,grad_norm: 0.931396871692958, iteration: 150456
loss: 0.9937598705291748,grad_norm: 0.99999900076737, iteration: 150457
loss: 0.9854455590248108,grad_norm: 0.9996307382106777, iteration: 150458
loss: 0.9737591743469238,grad_norm: 0.8977653495370784, iteration: 150459
loss: 0.997996985912323,grad_norm: 0.999999315289529, iteration: 150460
loss: 0.9812160730361938,grad_norm: 0.9999991817383659, iteration: 150461
loss: 0.9867514371871948,grad_norm: 0.9999990042088147, iteration: 150462
loss: 1.0252695083618164,grad_norm: 0.9999990532076708, iteration: 150463
loss: 1.0131651163101196,grad_norm: 0.9999990913390839, iteration: 150464
loss: 1.028882384300232,grad_norm: 0.9710542008196184, iteration: 150465
loss: 1.0009303092956543,grad_norm: 0.9999992528522583, iteration: 150466
loss: 0.9961250424385071,grad_norm: 0.9472163739743461, iteration: 150467
loss: 1.0172010660171509,grad_norm: 0.9497440667694954, iteration: 150468
loss: 0.9738599061965942,grad_norm: 0.9999993489462289, iteration: 150469
loss: 0.9652472138404846,grad_norm: 0.9999989451681394, iteration: 150470
loss: 1.0166966915130615,grad_norm: 0.9385379512793509, iteration: 150471
loss: 0.970518171787262,grad_norm: 0.9999992226897868, iteration: 150472
loss: 1.0134460926055908,grad_norm: 0.9999996461653747, iteration: 150473
loss: 0.9723556637763977,grad_norm: 0.9999990458750054, iteration: 150474
loss: 1.0066982507705688,grad_norm: 0.9999991950880672, iteration: 150475
loss: 0.9922926425933838,grad_norm: 0.9999990994914674, iteration: 150476
loss: 1.035904049873352,grad_norm: 0.9155863165577309, iteration: 150477
loss: 0.9784519672393799,grad_norm: 0.9999991631858481, iteration: 150478
loss: 1.0260034799575806,grad_norm: 0.9999992600428913, iteration: 150479
loss: 0.9702305197715759,grad_norm: 0.999999061904654, iteration: 150480
loss: 1.0274773836135864,grad_norm: 0.9874985194759186, iteration: 150481
loss: 0.9927777647972107,grad_norm: 0.8651027134980463, iteration: 150482
loss: 1.0056788921356201,grad_norm: 0.9999992358721275, iteration: 150483
loss: 1.0150208473205566,grad_norm: 0.991051136098845, iteration: 150484
loss: 1.0261938571929932,grad_norm: 0.9999993681800577, iteration: 150485
loss: 0.9741249680519104,grad_norm: 0.9999994115567028, iteration: 150486
loss: 0.9767075181007385,grad_norm: 0.9516913390135218, iteration: 150487
loss: 1.0018092393875122,grad_norm: 0.9999991611106452, iteration: 150488
loss: 0.988471269607544,grad_norm: 0.8888095759702618, iteration: 150489
loss: 1.0071836709976196,grad_norm: 0.9186304556409081, iteration: 150490
loss: 0.981045126914978,grad_norm: 0.8751119141205871, iteration: 150491
loss: 0.9784623980522156,grad_norm: 0.9999991942042156, iteration: 150492
loss: 1.049723505973816,grad_norm: 0.9999992673883391, iteration: 150493
loss: 1.1195546388626099,grad_norm: 0.9999995815453097, iteration: 150494
loss: 1.0320048332214355,grad_norm: 0.9999991183789895, iteration: 150495
loss: 1.0123071670532227,grad_norm: 0.9999990949968824, iteration: 150496
loss: 1.005321741104126,grad_norm: 0.9999990815598523, iteration: 150497
loss: 0.9818671345710754,grad_norm: 0.9472488301859835, iteration: 150498
loss: 1.0172154903411865,grad_norm: 0.9639670569673997, iteration: 150499
loss: 1.015448808670044,grad_norm: 0.9999991815906808, iteration: 150500
loss: 1.0471950769424438,grad_norm: 0.9999990281347936, iteration: 150501
loss: 0.9819250702857971,grad_norm: 0.7730323627006492, iteration: 150502
loss: 1.0404086112976074,grad_norm: 0.9999999576342666, iteration: 150503
loss: 1.0197927951812744,grad_norm: 0.9999993212133462, iteration: 150504
loss: 0.9961288571357727,grad_norm: 0.999999147733351, iteration: 150505
loss: 1.0748445987701416,grad_norm: 0.9999991057315742, iteration: 150506
loss: 0.9886531233787537,grad_norm: 0.9951075045910595, iteration: 150507
loss: 1.011139988899231,grad_norm: 0.9598699166207256, iteration: 150508
loss: 0.9633498191833496,grad_norm: 0.9919836274523754, iteration: 150509
loss: 1.0180354118347168,grad_norm: 0.9999991210810925, iteration: 150510
loss: 1.0326627492904663,grad_norm: 0.9999994049741107, iteration: 150511
loss: 0.9954056739807129,grad_norm: 0.9999991107037622, iteration: 150512
loss: 1.0047228336334229,grad_norm: 0.8629357885051341, iteration: 150513
loss: 1.0077641010284424,grad_norm: 0.9999992126943438, iteration: 150514
loss: 0.9697157740592957,grad_norm: 0.9999993057095201, iteration: 150515
loss: 0.9538232088088989,grad_norm: 0.9999991773450815, iteration: 150516
loss: 0.9906060099601746,grad_norm: 0.9256713636304993, iteration: 150517
loss: 1.067071557044983,grad_norm: 0.9999991060139524, iteration: 150518
loss: 1.0098986625671387,grad_norm: 0.9927179202564649, iteration: 150519
loss: 0.9695504307746887,grad_norm: 0.9999990018551138, iteration: 150520
loss: 0.9791736602783203,grad_norm: 0.9411106492798772, iteration: 150521
loss: 1.0188137292861938,grad_norm: 0.9999991833231354, iteration: 150522
loss: 0.9851902723312378,grad_norm: 0.9832542363893674, iteration: 150523
loss: 0.9976924657821655,grad_norm: 0.8903420092038914, iteration: 150524
loss: 0.9862775802612305,grad_norm: 0.881983979742743, iteration: 150525
loss: 1.0262808799743652,grad_norm: 0.9823090187426685, iteration: 150526
loss: 1.0111873149871826,grad_norm: 0.8936168634190109, iteration: 150527
loss: 0.9646750688552856,grad_norm: 0.9999991867471336, iteration: 150528
loss: 0.994348406791687,grad_norm: 0.9999991674523496, iteration: 150529
loss: 1.0094799995422363,grad_norm: 0.9999989655935718, iteration: 150530
loss: 0.9674120545387268,grad_norm: 0.9999990928004674, iteration: 150531
loss: 0.9951608180999756,grad_norm: 0.99999919254582, iteration: 150532
loss: 1.0020910501480103,grad_norm: 0.9999990634536055, iteration: 150533
loss: 1.0001635551452637,grad_norm: 0.9902582067443662, iteration: 150534
loss: 0.9913521409034729,grad_norm: 0.9999992046251798, iteration: 150535
loss: 1.0224480628967285,grad_norm: 0.9485823829137372, iteration: 150536
loss: 1.0166599750518799,grad_norm: 0.9999993293956843, iteration: 150537
loss: 1.0346477031707764,grad_norm: 0.9999992620586754, iteration: 150538
loss: 0.9921397566795349,grad_norm: 0.9868254466768266, iteration: 150539
loss: 1.016044020652771,grad_norm: 0.9999991035272651, iteration: 150540
loss: 1.0026254653930664,grad_norm: 0.9999991710101861, iteration: 150541
loss: 0.9815820455551147,grad_norm: 0.9999991139274017, iteration: 150542
loss: 0.9863397479057312,grad_norm: 0.9471815222047868, iteration: 150543
loss: 0.9587050676345825,grad_norm: 0.9999990609679588, iteration: 150544
loss: 0.9843674302101135,grad_norm: 0.9999989240717853, iteration: 150545
loss: 0.9984127283096313,grad_norm: 0.9999990337596548, iteration: 150546
loss: 0.9836825132369995,grad_norm: 0.9999991555755102, iteration: 150547
loss: 1.029686450958252,grad_norm: 0.91523683575191, iteration: 150548
loss: 0.9905680418014526,grad_norm: 0.999999120000307, iteration: 150549
loss: 0.997155487537384,grad_norm: 0.999999082461435, iteration: 150550
loss: 1.0056452751159668,grad_norm: 0.942769540321098, iteration: 150551
loss: 0.9973916411399841,grad_norm: 0.9999990204667341, iteration: 150552
loss: 1.0286622047424316,grad_norm: 0.9999990924049026, iteration: 150553
loss: 1.0017248392105103,grad_norm: 0.9999992244777213, iteration: 150554
loss: 0.9792527556419373,grad_norm: 0.999999074837623, iteration: 150555
loss: 0.9987735152244568,grad_norm: 0.9999990732231553, iteration: 150556
loss: 0.9806565642356873,grad_norm: 0.8143976213330106, iteration: 150557
loss: 0.9986243844032288,grad_norm: 0.9999991828329303, iteration: 150558
loss: 0.9941076636314392,grad_norm: 0.9773080126201119, iteration: 150559
loss: 0.9998407363891602,grad_norm: 0.999999032776982, iteration: 150560
loss: 1.0171414613723755,grad_norm: 0.9999996638157073, iteration: 150561
loss: 1.0268902778625488,grad_norm: 0.9999991054458142, iteration: 150562
loss: 1.0174435377120972,grad_norm: 0.9999991550872681, iteration: 150563
loss: 1.0136380195617676,grad_norm: 0.9999998433278531, iteration: 150564
loss: 0.9678190350532532,grad_norm: 0.9888917444989872, iteration: 150565
loss: 1.0403836965560913,grad_norm: 0.9377036220028466, iteration: 150566
loss: 1.051908016204834,grad_norm: 0.9501514886715542, iteration: 150567
loss: 0.9725711941719055,grad_norm: 0.999998945335713, iteration: 150568
loss: 1.004530906677246,grad_norm: 0.9999989903296116, iteration: 150569
loss: 0.9722451567649841,grad_norm: 0.9999990131697192, iteration: 150570
loss: 1.0085365772247314,grad_norm: 0.9999991198133771, iteration: 150571
loss: 0.9925105571746826,grad_norm: 0.9999991623885981, iteration: 150572
loss: 1.0020581483840942,grad_norm: 0.9999991653754977, iteration: 150573
loss: 1.018010139465332,grad_norm: 0.9336109272232955, iteration: 150574
loss: 0.9963771104812622,grad_norm: 0.9817001792834515, iteration: 150575
loss: 1.0499391555786133,grad_norm: 0.9999989074041008, iteration: 150576
loss: 0.9433348178863525,grad_norm: 0.9999993219999084, iteration: 150577
loss: 1.0061079263687134,grad_norm: 0.9999991290710813, iteration: 150578
loss: 1.0412770509719849,grad_norm: 0.9999990296659568, iteration: 150579
loss: 1.0017576217651367,grad_norm: 0.9999991990472531, iteration: 150580
loss: 1.0250014066696167,grad_norm: 0.9977170349900282, iteration: 150581
loss: 1.0047084093093872,grad_norm: 0.8910987249744341, iteration: 150582
loss: 1.057199478149414,grad_norm: 0.9999990044159809, iteration: 150583
loss: 1.0221362113952637,grad_norm: 0.9999990788219183, iteration: 150584
loss: 1.005194902420044,grad_norm: 0.9999991416623965, iteration: 150585
loss: 1.0089904069900513,grad_norm: 0.9999992825217373, iteration: 150586
loss: 0.9884607791900635,grad_norm: 0.9448088519197249, iteration: 150587
loss: 1.0618674755096436,grad_norm: 0.9999998199249985, iteration: 150588
loss: 0.9869072437286377,grad_norm: 0.9999990556213689, iteration: 150589
loss: 1.006284236907959,grad_norm: 0.9999993234077212, iteration: 150590
loss: 1.0292809009552002,grad_norm: 0.9999992188966216, iteration: 150591
loss: 0.9940765500068665,grad_norm: 0.9633457067811813, iteration: 150592
loss: 0.9795500636100769,grad_norm: 0.999999549772834, iteration: 150593
loss: 1.0315755605697632,grad_norm: 0.9999995956883017, iteration: 150594
loss: 1.0131433010101318,grad_norm: 0.9175337911553397, iteration: 150595
loss: 1.0006638765335083,grad_norm: 0.9999990540532813, iteration: 150596
loss: 1.0205634832382202,grad_norm: 0.9999993408571941, iteration: 150597
loss: 0.9380295276641846,grad_norm: 0.9663919992465302, iteration: 150598
loss: 1.0474295616149902,grad_norm: 0.829771325859053, iteration: 150599
loss: 0.9885356426239014,grad_norm: 0.9491718765770161, iteration: 150600
loss: 0.9993911981582642,grad_norm: 0.9999991824755564, iteration: 150601
loss: 1.0014933347702026,grad_norm: 0.9999991213252922, iteration: 150602
loss: 1.022781491279602,grad_norm: 0.9999990070227613, iteration: 150603
loss: 0.9970938563346863,grad_norm: 0.9999991459532837, iteration: 150604
loss: 1.0062772035598755,grad_norm: 0.9999989488294242, iteration: 150605
loss: 0.9542370438575745,grad_norm: 0.9999991369943807, iteration: 150606
loss: 0.9797088503837585,grad_norm: 0.9999991527631924, iteration: 150607
loss: 0.9768774509429932,grad_norm: 0.7740118626948739, iteration: 150608
loss: 0.993609607219696,grad_norm: 0.9999994228406672, iteration: 150609
loss: 0.9862756729125977,grad_norm: 0.9999992917342907, iteration: 150610
loss: 0.997565746307373,grad_norm: 0.956092459497701, iteration: 150611
loss: 1.019251823425293,grad_norm: 0.9297344911363012, iteration: 150612
loss: 1.0152732133865356,grad_norm: 0.9999992075588512, iteration: 150613
loss: 0.975806713104248,grad_norm: 0.9999990975330441, iteration: 150614
loss: 1.0154213905334473,grad_norm: 0.9778522446411853, iteration: 150615
loss: 1.0178747177124023,grad_norm: 0.9999994954873671, iteration: 150616
loss: 0.9688540697097778,grad_norm: 0.9489337117578346, iteration: 150617
loss: 1.022322654724121,grad_norm: 0.9209685237207462, iteration: 150618
loss: 0.9728913307189941,grad_norm: 0.8064690903335173, iteration: 150619
loss: 0.9911232590675354,grad_norm: 0.9805902040938307, iteration: 150620
loss: 1.0093947649002075,grad_norm: 0.999999170373843, iteration: 150621
loss: 0.9819100499153137,grad_norm: 0.9999992002959583, iteration: 150622
loss: 0.9776560664176941,grad_norm: 0.9698906342186299, iteration: 150623
loss: 0.9926706552505493,grad_norm: 0.999999066193154, iteration: 150624
loss: 1.022900104522705,grad_norm: 0.8198977731630875, iteration: 150625
loss: 1.0279817581176758,grad_norm: 0.9278793650275804, iteration: 150626
loss: 1.0203255414962769,grad_norm: 0.9999991912774846, iteration: 150627
loss: 1.0298014879226685,grad_norm: 0.9923369036001785, iteration: 150628
loss: 0.9858118891716003,grad_norm: 0.8907813143521951, iteration: 150629
loss: 0.9643999338150024,grad_norm: 0.96905472344599, iteration: 150630
loss: 1.0280793905258179,grad_norm: 0.9112884005685579, iteration: 150631
loss: 1.0166382789611816,grad_norm: 0.9514707983401716, iteration: 150632
loss: 1.0111511945724487,grad_norm: 0.9804817248708018, iteration: 150633
loss: 1.0228307247161865,grad_norm: 0.9854398403118523, iteration: 150634
loss: 0.9552682638168335,grad_norm: 0.9953243933361386, iteration: 150635
loss: 1.0284171104431152,grad_norm: 0.999999125392883, iteration: 150636
loss: 0.9757946729660034,grad_norm: 0.967467380463251, iteration: 150637
loss: 1.0109705924987793,grad_norm: 0.9999991272778237, iteration: 150638
loss: 0.9890015125274658,grad_norm: 0.9999991052915911, iteration: 150639
loss: 1.014184594154358,grad_norm: 0.9999992884432133, iteration: 150640
loss: 0.987952470779419,grad_norm: 0.9999991590952563, iteration: 150641
loss: 0.9792609214782715,grad_norm: 0.9999991608316383, iteration: 150642
loss: 0.9913627505302429,grad_norm: 0.916981012741952, iteration: 150643
loss: 1.019304633140564,grad_norm: 0.9145618101875056, iteration: 150644
loss: 0.9889217019081116,grad_norm: 0.9999991193978354, iteration: 150645
loss: 0.9594576954841614,grad_norm: 0.8051932793995793, iteration: 150646
loss: 0.9693387746810913,grad_norm: 0.9999990192755617, iteration: 150647
loss: 1.0182042121887207,grad_norm: 0.9999990364375624, iteration: 150648
loss: 0.9990279078483582,grad_norm: 0.9999991571996508, iteration: 150649
loss: 0.9816147089004517,grad_norm: 0.999999093303947, iteration: 150650
loss: 0.9933967590332031,grad_norm: 0.9999990472301339, iteration: 150651
loss: 0.9981172680854797,grad_norm: 0.9999990344354263, iteration: 150652
loss: 0.9887533187866211,grad_norm: 0.9945905701681775, iteration: 150653
loss: 0.9931219220161438,grad_norm: 0.999999001592457, iteration: 150654
loss: 1.0106523036956787,grad_norm: 0.9991817290001004, iteration: 150655
loss: 1.011111855506897,grad_norm: 0.980858180273381, iteration: 150656
loss: 0.9885616898536682,grad_norm: 0.9999991402826937, iteration: 150657
loss: 1.0460847616195679,grad_norm: 0.9999990533758043, iteration: 150658
loss: 1.0134484767913818,grad_norm: 0.9999995185897643, iteration: 150659
loss: 1.0179038047790527,grad_norm: 0.9999991371728013, iteration: 150660
loss: 0.9761962890625,grad_norm: 0.9648172162266917, iteration: 150661
loss: 1.010624647140503,grad_norm: 0.9999991635653758, iteration: 150662
loss: 0.9951130747795105,grad_norm: 0.9999991032965638, iteration: 150663
loss: 0.9964563846588135,grad_norm: 0.9083858180760338, iteration: 150664
loss: 1.159852385520935,grad_norm: 0.9999992056147018, iteration: 150665
loss: 1.0398898124694824,grad_norm: 0.9999992902723174, iteration: 150666
loss: 1.0273631811141968,grad_norm: 0.9094701657397143, iteration: 150667
loss: 1.0220590829849243,grad_norm: 0.9999992067500704, iteration: 150668
loss: 1.0112718343734741,grad_norm: 0.9442419517443603, iteration: 150669
loss: 0.990035355091095,grad_norm: 0.9468971852840954, iteration: 150670
loss: 1.0389378070831299,grad_norm: 0.9433537599986717, iteration: 150671
loss: 0.9836781620979309,grad_norm: 0.8661547571632174, iteration: 150672
loss: 0.9601352214813232,grad_norm: 0.9999991074367807, iteration: 150673
loss: 0.98446124792099,grad_norm: 0.9999991645650036, iteration: 150674
loss: 0.9827942848205566,grad_norm: 0.9832446611586415, iteration: 150675
loss: 1.0422132015228271,grad_norm: 0.9999992785464021, iteration: 150676
loss: 1.0048770904541016,grad_norm: 0.9111952370192545, iteration: 150677
loss: 0.9690154194831848,grad_norm: 0.8909319906211121, iteration: 150678
loss: 1.0395514965057373,grad_norm: 0.999999192933273, iteration: 150679
loss: 1.032442331314087,grad_norm: 0.9999990995419585, iteration: 150680
loss: 1.0011619329452515,grad_norm: 0.8816742215790008, iteration: 150681
loss: 0.9422944188117981,grad_norm: 0.9999990294583948, iteration: 150682
loss: 0.9865821599960327,grad_norm: 0.9999991345749193, iteration: 150683
loss: 0.9885356426239014,grad_norm: 0.9999992095375065, iteration: 150684
loss: 1.0020931959152222,grad_norm: 0.9680502986691072, iteration: 150685
loss: 0.9761525392532349,grad_norm: 0.9999992642763026, iteration: 150686
loss: 1.0010571479797363,grad_norm: 0.9999485206957477, iteration: 150687
loss: 1.0314793586730957,grad_norm: 0.9212621576261949, iteration: 150688
loss: 0.9866194725036621,grad_norm: 0.9816347973132443, iteration: 150689
loss: 1.0178810358047485,grad_norm: 0.9616354249311985, iteration: 150690
loss: 0.9902241230010986,grad_norm: 0.7842923877739577, iteration: 150691
loss: 0.9839525818824768,grad_norm: 0.9999989954385273, iteration: 150692
loss: 1.0177193880081177,grad_norm: 0.9999990955317783, iteration: 150693
loss: 0.99146968126297,grad_norm: 0.999998996672804, iteration: 150694
loss: 0.9493858218193054,grad_norm: 0.889985561155092, iteration: 150695
loss: 1.0332521200180054,grad_norm: 0.9999990673173048, iteration: 150696
loss: 0.9859163165092468,grad_norm: 0.9999990699723988, iteration: 150697
loss: 1.1091023683547974,grad_norm: 0.9999999462854998, iteration: 150698
loss: 1.0245091915130615,grad_norm: 0.9999995412760055, iteration: 150699
loss: 1.0473960638046265,grad_norm: 0.9999992212526785, iteration: 150700
loss: 0.9907621741294861,grad_norm: 0.8923324747286342, iteration: 150701
loss: 0.999767541885376,grad_norm: 0.9804973618543384, iteration: 150702
loss: 1.0082944631576538,grad_norm: 0.8685350685461927, iteration: 150703
loss: 0.975257396697998,grad_norm: 0.9999991985957024, iteration: 150704
loss: 1.005523443222046,grad_norm: 0.8301226919188077, iteration: 150705
loss: 1.038195013999939,grad_norm: 0.9682880724365478, iteration: 150706
loss: 1.011033296585083,grad_norm: 0.9999991759709873, iteration: 150707
loss: 0.9921998381614685,grad_norm: 0.9999991328560498, iteration: 150708
loss: 0.9945870041847229,grad_norm: 0.9999990684257731, iteration: 150709
loss: 0.9882162809371948,grad_norm: 0.9999991744545532, iteration: 150710
loss: 0.9541549682617188,grad_norm: 0.9999990502652166, iteration: 150711
loss: 1.000925898551941,grad_norm: 0.9999991834800899, iteration: 150712
loss: 1.0081794261932373,grad_norm: 0.9999992626650109, iteration: 150713
loss: 1.0206507444381714,grad_norm: 0.9999990096066081, iteration: 150714
loss: 0.9982820153236389,grad_norm: 0.869982259788776, iteration: 150715
loss: 1.0225518941879272,grad_norm: 0.9999990442638117, iteration: 150716
loss: 1.0216269493103027,grad_norm: 0.9999990864666657, iteration: 150717
loss: 1.012706995010376,grad_norm: 0.999998926860381, iteration: 150718
loss: 1.020751953125,grad_norm: 0.9999991621463333, iteration: 150719
loss: 1.0061657428741455,grad_norm: 0.957630416406743, iteration: 150720
loss: 0.9974915981292725,grad_norm: 0.8910090919480141, iteration: 150721
loss: 1.0478404760360718,grad_norm: 0.99999984652337, iteration: 150722
loss: 1.0258581638336182,grad_norm: 0.9999992712714969, iteration: 150723
loss: 1.0340691804885864,grad_norm: 0.9323761840293731, iteration: 150724
loss: 0.9907284379005432,grad_norm: 0.9999997218564929, iteration: 150725
loss: 0.9757398366928101,grad_norm: 0.9999990744648981, iteration: 150726
loss: 1.0104026794433594,grad_norm: 0.9999992554826599, iteration: 150727
loss: 1.022306203842163,grad_norm: 0.9999991078575049, iteration: 150728
loss: 0.9939743280410767,grad_norm: 0.999999219533561, iteration: 150729
loss: 0.9913977384567261,grad_norm: 0.93439942653096, iteration: 150730
loss: 0.9565485715866089,grad_norm: 0.9798755496140371, iteration: 150731
loss: 0.9939979314804077,grad_norm: 0.9999990651213452, iteration: 150732
loss: 0.9956899285316467,grad_norm: 0.8107467517952214, iteration: 150733
loss: 0.9861740469932556,grad_norm: 0.9999992949866037, iteration: 150734
loss: 0.9972976446151733,grad_norm: 0.9999989079299764, iteration: 150735
loss: 1.0146431922912598,grad_norm: 0.9999992557709912, iteration: 150736
loss: 1.0016709566116333,grad_norm: 0.86030243445054, iteration: 150737
loss: 0.9959059953689575,grad_norm: 0.9999991588195655, iteration: 150738
loss: 0.9850678443908691,grad_norm: 0.9999990402593987, iteration: 150739
loss: 0.9906293153762817,grad_norm: 0.8539379275701071, iteration: 150740
loss: 0.9905489683151245,grad_norm: 0.9999993099574908, iteration: 150741
loss: 0.9804696440696716,grad_norm: 0.99999924323706, iteration: 150742
loss: 0.9788177013397217,grad_norm: 0.8975573336418705, iteration: 150743
loss: 1.0088037252426147,grad_norm: 0.9326568223789254, iteration: 150744
loss: 1.012155294418335,grad_norm: 0.9999991454499073, iteration: 150745
loss: 0.9588941931724548,grad_norm: 0.8459250858265627, iteration: 150746
loss: 1.0239782333374023,grad_norm: 0.9999991800948885, iteration: 150747
loss: 1.0125706195831299,grad_norm: 0.999999226055114, iteration: 150748
loss: 0.9594880938529968,grad_norm: 0.828967932168607, iteration: 150749
loss: 1.0223283767700195,grad_norm: 0.8862210265554932, iteration: 150750
loss: 1.044647455215454,grad_norm: 0.9253780155842859, iteration: 150751
loss: 1.017794132232666,grad_norm: 0.999999103016808, iteration: 150752
loss: 1.0084363222122192,grad_norm: 0.917893024563341, iteration: 150753
loss: 1.0037004947662354,grad_norm: 0.9809877334167572, iteration: 150754
loss: 1.0105432271957397,grad_norm: 0.9999992088754706, iteration: 150755
loss: 0.9934456944465637,grad_norm: 0.9628392728976402, iteration: 150756
loss: 0.9994729161262512,grad_norm: 0.9999992952592389, iteration: 150757
loss: 0.962101936340332,grad_norm: 0.9999990862285251, iteration: 150758
loss: 0.9884704947471619,grad_norm: 0.9999997177998213, iteration: 150759
loss: 0.943466305732727,grad_norm: 0.999999605323869, iteration: 150760
loss: 0.9960812330245972,grad_norm: 0.9154582525653125, iteration: 150761
loss: 0.9663815498352051,grad_norm: 0.9999991565914305, iteration: 150762
loss: 1.0300034284591675,grad_norm: 0.9999999075101825, iteration: 150763
loss: 1.0137939453125,grad_norm: 0.9999991184636383, iteration: 150764
loss: 1.0271497964859009,grad_norm: 0.9999992620313969, iteration: 150765
loss: 0.9907886981964111,grad_norm: 0.9999992093228158, iteration: 150766
loss: 1.0038138628005981,grad_norm: 0.9999991983016145, iteration: 150767
loss: 1.0005773305892944,grad_norm: 0.974624193540229, iteration: 150768
loss: 0.9514530897140503,grad_norm: 0.9999991328538511, iteration: 150769
loss: 1.0049445629119873,grad_norm: 0.8590912556581476, iteration: 150770
loss: 1.0395021438598633,grad_norm: 0.999999222841501, iteration: 150771
loss: 0.9946977496147156,grad_norm: 0.9479799379972185, iteration: 150772
loss: 0.9856932759284973,grad_norm: 0.999284885950171, iteration: 150773
loss: 1.0074065923690796,grad_norm: 0.9468903768908256, iteration: 150774
loss: 1.0160459280014038,grad_norm: 0.9999991184769836, iteration: 150775
loss: 1.0511517524719238,grad_norm: 0.9999998301460779, iteration: 150776
loss: 0.9376695156097412,grad_norm: 0.9999991761859952, iteration: 150777
loss: 1.003955602645874,grad_norm: 0.9999990297529474, iteration: 150778
loss: 1.000408411026001,grad_norm: 0.9999991371596215, iteration: 150779
loss: 0.9611608386039734,grad_norm: 0.9500184817752994, iteration: 150780
loss: 1.000886082649231,grad_norm: 0.9999991510635833, iteration: 150781
loss: 1.0154221057891846,grad_norm: 0.9433373517246353, iteration: 150782
loss: 1.1113650798797607,grad_norm: 1.000000016337762, iteration: 150783
loss: 1.0235679149627686,grad_norm: 0.9303347151551696, iteration: 150784
loss: 0.9783716797828674,grad_norm: 0.999999091871532, iteration: 150785
loss: 0.9799596667289734,grad_norm: 0.9155299550494911, iteration: 150786
loss: 0.995692253112793,grad_norm: 0.9999991115009345, iteration: 150787
loss: 1.0084507465362549,grad_norm: 0.9549614107086163, iteration: 150788
loss: 1.0027414560317993,grad_norm: 0.9323754274164991, iteration: 150789
loss: 1.001525640487671,grad_norm: 0.9365310139452069, iteration: 150790
loss: 1.1869920492172241,grad_norm: 0.9999994113137672, iteration: 150791
loss: 0.9764329195022583,grad_norm: 0.9673953636161263, iteration: 150792
loss: 1.0863701105117798,grad_norm: 0.9999991335740476, iteration: 150793
loss: 1.022478699684143,grad_norm: 0.9603653914934455, iteration: 150794
loss: 1.3462013006210327,grad_norm: 0.9999993796453912, iteration: 150795
loss: 0.995806872844696,grad_norm: 0.9999992391903153, iteration: 150796
loss: 1.221422553062439,grad_norm: 0.9999998956659729, iteration: 150797
loss: 1.023189663887024,grad_norm: 0.9177688447272949, iteration: 150798
loss: 1.0232555866241455,grad_norm: 0.9602204154951044, iteration: 150799
loss: 1.0433052778244019,grad_norm: 0.999999022609632, iteration: 150800
loss: 1.1355626583099365,grad_norm: 0.9977300670231141, iteration: 150801
loss: 0.9984588623046875,grad_norm: 0.9732588620999705, iteration: 150802
loss: 1.063080072402954,grad_norm: 0.9999998154376591, iteration: 150803
loss: 1.29366135597229,grad_norm: 0.9999995694524685, iteration: 150804
loss: 1.021774172782898,grad_norm: 0.9999992150129974, iteration: 150805
loss: 1.020573377609253,grad_norm: 0.9999991882580158, iteration: 150806
loss: 0.989209771156311,grad_norm: 0.9771425918189723, iteration: 150807
loss: 0.9655699729919434,grad_norm: 0.9999990700233571, iteration: 150808
loss: 1.0373809337615967,grad_norm: 0.9999997485809246, iteration: 150809
loss: 1.1446866989135742,grad_norm: 0.9999997193819977, iteration: 150810
loss: 1.0323421955108643,grad_norm: 1.0000000034928405, iteration: 150811
loss: 1.1324009895324707,grad_norm: 0.999999318314396, iteration: 150812
loss: 1.0406216382980347,grad_norm: 0.9999991471798786, iteration: 150813
loss: 1.2497905492782593,grad_norm: 0.9999995046703329, iteration: 150814
loss: 1.04275381565094,grad_norm: 0.9999991779351692, iteration: 150815
loss: 1.0016740560531616,grad_norm: 0.9605688508290656, iteration: 150816
loss: 1.0117536783218384,grad_norm: 0.9999991370636945, iteration: 150817
loss: 0.9453952312469482,grad_norm: 0.9999992656679086, iteration: 150818
loss: 1.038370132446289,grad_norm: 0.9999995973706864, iteration: 150819
loss: 1.0034213066101074,grad_norm: 0.9999991498547184, iteration: 150820
loss: 0.9819976091384888,grad_norm: 0.9005564462341321, iteration: 150821
loss: 1.0609644651412964,grad_norm: 0.999999251028352, iteration: 150822
loss: 1.121767520904541,grad_norm: 0.999999174688501, iteration: 150823
loss: 0.9956914186477661,grad_norm: 0.9999991365364498, iteration: 150824
loss: 0.9779714345932007,grad_norm: 0.8652205124659411, iteration: 150825
loss: 1.0398380756378174,grad_norm: 0.9999991354299828, iteration: 150826
loss: 1.0315296649932861,grad_norm: 0.9999997499820673, iteration: 150827
loss: 1.0780234336853027,grad_norm: 0.965242950852007, iteration: 150828
loss: 0.9888235330581665,grad_norm: 0.9999994047948625, iteration: 150829
loss: 1.0272181034088135,grad_norm: 0.9545748444289832, iteration: 150830
loss: 1.024634599685669,grad_norm: 0.9999991471684889, iteration: 150831
loss: 1.02395498752594,grad_norm: 0.999999307577526, iteration: 150832
loss: 1.064099669456482,grad_norm: 0.9999992720520786, iteration: 150833
loss: 1.003606915473938,grad_norm: 0.9999992661224876, iteration: 150834
loss: 0.9837496280670166,grad_norm: 0.9233929827738023, iteration: 150835
loss: 1.0986772775650024,grad_norm: 0.9999991736341572, iteration: 150836
loss: 1.0059945583343506,grad_norm: 0.9999997293360054, iteration: 150837
loss: 1.0835844278335571,grad_norm: 0.9999991995390467, iteration: 150838
loss: 0.9693818092346191,grad_norm: 0.9999990825246714, iteration: 150839
loss: 0.9829911589622498,grad_norm: 0.999999187603136, iteration: 150840
loss: 0.9808943867683411,grad_norm: 0.9141660041710387, iteration: 150841
loss: 0.9702909588813782,grad_norm: 0.8519122648074917, iteration: 150842
loss: 1.0077382326126099,grad_norm: 0.9904764533303201, iteration: 150843
loss: 1.0103199481964111,grad_norm: 0.9840100941869878, iteration: 150844
loss: 0.9840647578239441,grad_norm: 0.9832976804807463, iteration: 150845
loss: 0.971644937992096,grad_norm: 0.8569957486234039, iteration: 150846
loss: 0.9828726649284363,grad_norm: 0.8301600643019544, iteration: 150847
loss: 0.9984952807426453,grad_norm: 0.9999991303222636, iteration: 150848
loss: 1.0165654420852661,grad_norm: 0.987832996581085, iteration: 150849
loss: 0.9883451461791992,grad_norm: 0.9933423032503726, iteration: 150850
loss: 0.9962786436080933,grad_norm: 0.9999991008421089, iteration: 150851
loss: 0.9715885519981384,grad_norm: 0.9999993055138447, iteration: 150852
loss: 1.020493984222412,grad_norm: 0.9792798633886278, iteration: 150853
loss: 1.0189003944396973,grad_norm: 0.9889840501538247, iteration: 150854
loss: 1.008705735206604,grad_norm: 0.9999990879472308, iteration: 150855
loss: 1.01883065700531,grad_norm: 0.9258240356175863, iteration: 150856
loss: 0.9875446557998657,grad_norm: 0.9999995781050951, iteration: 150857
loss: 0.9623165130615234,grad_norm: 0.9278622393632736, iteration: 150858
loss: 1.0389095544815063,grad_norm: 0.9999991161149997, iteration: 150859
loss: 0.9649513363838196,grad_norm: 0.999999119439539, iteration: 150860
loss: 0.9879973530769348,grad_norm: 0.931235360573912, iteration: 150861
loss: 0.9918668866157532,grad_norm: 0.9999990064513429, iteration: 150862
loss: 1.0452626943588257,grad_norm: 0.9999990423897674, iteration: 150863
loss: 0.9905627965927124,grad_norm: 0.9999992019238588, iteration: 150864
loss: 1.0336931943893433,grad_norm: 0.9999992222512035, iteration: 150865
loss: 1.0199933052062988,grad_norm: 0.9999990581485348, iteration: 150866
loss: 0.9929623007774353,grad_norm: 0.9733206816355665, iteration: 150867
loss: 1.0042024850845337,grad_norm: 0.9999990717277135, iteration: 150868
loss: 1.0542250871658325,grad_norm: 0.999999892990889, iteration: 150869
loss: 1.1226067543029785,grad_norm: 0.9999993529468935, iteration: 150870
loss: 0.9660848379135132,grad_norm: 0.9754008762740365, iteration: 150871
loss: 0.9867308139801025,grad_norm: 0.868328709916586, iteration: 150872
loss: 0.9888413548469543,grad_norm: 0.9181340041849024, iteration: 150873
loss: 0.9696207642555237,grad_norm: 0.9740175269238027, iteration: 150874
loss: 1.0160719156265259,grad_norm: 0.9999993629889052, iteration: 150875
loss: 1.0105212926864624,grad_norm: 0.8856999018766932, iteration: 150876
loss: 1.0115551948547363,grad_norm: 0.9999991741412642, iteration: 150877
loss: 0.9938684701919556,grad_norm: 0.9999992218582118, iteration: 150878
loss: 1.02321457862854,grad_norm: 0.9999992624130738, iteration: 150879
loss: 1.0153998136520386,grad_norm: 0.9999993339283345, iteration: 150880
loss: 1.0191560983657837,grad_norm: 0.9999992578318601, iteration: 150881
loss: 1.016316294670105,grad_norm: 0.9241601588442799, iteration: 150882
loss: 0.9908031821250916,grad_norm: 0.9999991502172602, iteration: 150883
loss: 0.9848344326019287,grad_norm: 0.9999990895644636, iteration: 150884
loss: 0.9872691631317139,grad_norm: 0.9999989956407578, iteration: 150885
loss: 1.0014221668243408,grad_norm: 0.8880167759225551, iteration: 150886
loss: 0.9762507677078247,grad_norm: 0.8874957701020767, iteration: 150887
loss: 1.105979084968567,grad_norm: 0.9999990675763888, iteration: 150888
loss: 0.9780380725860596,grad_norm: 0.9999992606092404, iteration: 150889
loss: 0.9771690964698792,grad_norm: 0.9662783382840434, iteration: 150890
loss: 1.0177620649337769,grad_norm: 0.9215098965074991, iteration: 150891
loss: 1.0511027574539185,grad_norm: 0.9999998584625996, iteration: 150892
loss: 1.019548773765564,grad_norm: 0.9730461078556772, iteration: 150893
loss: 1.0295244455337524,grad_norm: 0.9999990386257436, iteration: 150894
loss: 1.1969035863876343,grad_norm: 0.9999998358954113, iteration: 150895
loss: 1.0455961227416992,grad_norm: 0.9999990980707277, iteration: 150896
loss: 0.9827098846435547,grad_norm: 0.9999992207698227, iteration: 150897
loss: 1.0251318216323853,grad_norm: 0.919547210994484, iteration: 150898
loss: 0.9855108261108398,grad_norm: 0.9365152906694962, iteration: 150899
loss: 0.9985145330429077,grad_norm: 0.8962284265038969, iteration: 150900
loss: 0.9969127774238586,grad_norm: 0.9999990904398063, iteration: 150901
loss: 0.9887664318084717,grad_norm: 0.9999991147897345, iteration: 150902
loss: 0.9951416254043579,grad_norm: 0.9941777842892691, iteration: 150903
loss: 1.0323072671890259,grad_norm: 0.9999993897225551, iteration: 150904
loss: 0.9938040971755981,grad_norm: 0.985168302560673, iteration: 150905
loss: 1.01240873336792,grad_norm: 0.9999990566627414, iteration: 150906
loss: 0.9949079155921936,grad_norm: 0.9430400592422492, iteration: 150907
loss: 0.9663825035095215,grad_norm: 0.9792994454925432, iteration: 150908
loss: 0.9847800135612488,grad_norm: 0.9777167664858896, iteration: 150909
loss: 0.9999823570251465,grad_norm: 0.9999992485590565, iteration: 150910
loss: 1.0148996114730835,grad_norm: 0.9219072102114956, iteration: 150911
loss: 1.0242149829864502,grad_norm: 0.9999990510163884, iteration: 150912
loss: 0.9916302561759949,grad_norm: 0.954834172218497, iteration: 150913
loss: 0.9843604564666748,grad_norm: 0.9698921164739291, iteration: 150914
loss: 0.9936028122901917,grad_norm: 0.8711780116745512, iteration: 150915
loss: 0.9756758809089661,grad_norm: 0.9999990171510629, iteration: 150916
loss: 0.9876039028167725,grad_norm: 0.8848495167758291, iteration: 150917
loss: 1.0293861627578735,grad_norm: 0.9999991782058713, iteration: 150918
loss: 1.0597623586654663,grad_norm: 0.9999993690247109, iteration: 150919
loss: 0.9744223356246948,grad_norm: 0.9999991606714858, iteration: 150920
loss: 0.9839025139808655,grad_norm: 0.999999213776352, iteration: 150921
loss: 0.9890384674072266,grad_norm: 0.9999990884166914, iteration: 150922
loss: 1.0063645839691162,grad_norm: 0.846502879331604, iteration: 150923
loss: 1.0005311965942383,grad_norm: 0.9571633318219377, iteration: 150924
loss: 1.101330280303955,grad_norm: 0.999999224138102, iteration: 150925
loss: 1.0035618543624878,grad_norm: 0.9733179435705369, iteration: 150926
loss: 0.9741198420524597,grad_norm: 0.9999993047682544, iteration: 150927
loss: 1.1876106262207031,grad_norm: 0.9999994598658035, iteration: 150928
loss: 0.9976126551628113,grad_norm: 0.9999991523669448, iteration: 150929
loss: 0.9803164601325989,grad_norm: 0.9540515392691203, iteration: 150930
loss: 0.968597412109375,grad_norm: 0.9222449529709323, iteration: 150931
loss: 0.9997788667678833,grad_norm: 0.9999993000091844, iteration: 150932
loss: 1.0080711841583252,grad_norm: 0.9999992430670464, iteration: 150933
loss: 1.0124449729919434,grad_norm: 0.9999989345376183, iteration: 150934
loss: 0.9783608317375183,grad_norm: 0.999999249768388, iteration: 150935
loss: 0.9955488443374634,grad_norm: 0.9999991713920979, iteration: 150936
loss: 1.0498697757720947,grad_norm: 0.9999997262138495, iteration: 150937
loss: 0.9997231364250183,grad_norm: 0.9999990157762015, iteration: 150938
loss: 1.0250188112258911,grad_norm: 0.9999991625737386, iteration: 150939
loss: 1.016900658607483,grad_norm: 0.8909948180412476, iteration: 150940
loss: 1.0500833988189697,grad_norm: 0.9999990742574422, iteration: 150941
loss: 1.0146676301956177,grad_norm: 0.9999990490655493, iteration: 150942
loss: 1.0198352336883545,grad_norm: 0.980347955208486, iteration: 150943
loss: 1.0103847980499268,grad_norm: 0.9683088325730377, iteration: 150944
loss: 0.9798163175582886,grad_norm: 0.9825241924460963, iteration: 150945
loss: 1.0298106670379639,grad_norm: 0.8963987496898623, iteration: 150946
loss: 0.9986469745635986,grad_norm: 0.9008303121060655, iteration: 150947
loss: 1.01021146774292,grad_norm: 0.999999074327391, iteration: 150948
loss: 0.9654930233955383,grad_norm: 0.9369297314804602, iteration: 150949
loss: 0.9720361828804016,grad_norm: 0.9999990445500689, iteration: 150950
loss: 0.9822580814361572,grad_norm: 0.999999114313791, iteration: 150951
loss: 0.9710630178451538,grad_norm: 0.9999991091444655, iteration: 150952
loss: 1.0511375665664673,grad_norm: 0.9999991385151125, iteration: 150953
loss: 1.0192521810531616,grad_norm: 0.9905053275282067, iteration: 150954
loss: 1.0150917768478394,grad_norm: 0.9212582527461047, iteration: 150955
loss: 1.0194823741912842,grad_norm: 0.9615021225725653, iteration: 150956
loss: 0.984029233455658,grad_norm: 0.9904150530843755, iteration: 150957
loss: 1.0393855571746826,grad_norm: 0.9999990240598411, iteration: 150958
loss: 1.039857268333435,grad_norm: 0.9612137896918161, iteration: 150959
loss: 0.968278706073761,grad_norm: 0.8974940990131043, iteration: 150960
loss: 1.0091384649276733,grad_norm: 0.9999992596957907, iteration: 150961
loss: 1.0115488767623901,grad_norm: 0.9999990715873174, iteration: 150962
loss: 0.9707590341567993,grad_norm: 0.9999991000083126, iteration: 150963
loss: 0.9776529669761658,grad_norm: 0.8568958600974858, iteration: 150964
loss: 0.9746381640434265,grad_norm: 0.9903165249990858, iteration: 150965
loss: 1.0024821758270264,grad_norm: 0.9130875936391868, iteration: 150966
loss: 1.0500191450119019,grad_norm: 0.9049479199499043, iteration: 150967
loss: 0.9941661953926086,grad_norm: 0.8826004434697082, iteration: 150968
loss: 0.9726411700248718,grad_norm: 0.9938262578489545, iteration: 150969
loss: 0.9752658009529114,grad_norm: 0.9251921179714275, iteration: 150970
loss: 0.9715939164161682,grad_norm: 0.999999084849318, iteration: 150971
loss: 1.0244550704956055,grad_norm: 0.9999993902933522, iteration: 150972
loss: 0.997251033782959,grad_norm: 0.874075582701459, iteration: 150973
loss: 0.988579511642456,grad_norm: 0.99999917787683, iteration: 150974
loss: 0.9867157936096191,grad_norm: 0.9899080191905302, iteration: 150975
loss: 0.9798004627227783,grad_norm: 0.9861517138436103, iteration: 150976
loss: 1.0188546180725098,grad_norm: 0.9999990144305242, iteration: 150977
loss: 0.9667215943336487,grad_norm: 0.9999993156649344, iteration: 150978
loss: 0.9950839281082153,grad_norm: 0.9999990351373877, iteration: 150979
loss: 1.0028886795043945,grad_norm: 0.9999990248252364, iteration: 150980
loss: 1.0174119472503662,grad_norm: 0.9999992056753181, iteration: 150981
loss: 0.9797936081886292,grad_norm: 0.9999992262798714, iteration: 150982
loss: 0.9965721964836121,grad_norm: 0.9965537699147029, iteration: 150983
loss: 1.0119950771331787,grad_norm: 0.9999992491505726, iteration: 150984
loss: 1.008279800415039,grad_norm: 0.9505287420882286, iteration: 150985
loss: 1.022158145904541,grad_norm: 0.8903113435174136, iteration: 150986
loss: 0.9818757772445679,grad_norm: 0.9999992532421726, iteration: 150987
loss: 1.001824140548706,grad_norm: 0.885777200292875, iteration: 150988
loss: 0.9946282505989075,grad_norm: 0.8665095999862238, iteration: 150989
loss: 1.0225521326065063,grad_norm: 0.9999991415146628, iteration: 150990
loss: 0.9803548455238342,grad_norm: 0.9999991154381235, iteration: 150991
loss: 1.006829023361206,grad_norm: 0.9799017397616981, iteration: 150992
loss: 0.9900584816932678,grad_norm: 0.9545284504612401, iteration: 150993
loss: 1.0170559883117676,grad_norm: 0.927862257665137, iteration: 150994
loss: 1.0228773355484009,grad_norm: 0.7499370544188076, iteration: 150995
loss: 0.9884678721427917,grad_norm: 0.9999992035700477, iteration: 150996
loss: 0.98017817735672,grad_norm: 0.9999990172889973, iteration: 150997
loss: 1.0606727600097656,grad_norm: 0.9775966834317106, iteration: 150998
loss: 1.02610445022583,grad_norm: 0.9999990533210091, iteration: 150999
loss: 0.986574113368988,grad_norm: 0.9999991756807401, iteration: 151000
loss: 1.0175600051879883,grad_norm: 0.8474542077572899, iteration: 151001
loss: 0.996312141418457,grad_norm: 0.8585471227373089, iteration: 151002
loss: 0.9736203551292419,grad_norm: 0.9999991598051998, iteration: 151003
loss: 0.9855664968490601,grad_norm: 0.9999991164411425, iteration: 151004
loss: 0.9987315535545349,grad_norm: 0.9999994111975273, iteration: 151005
loss: 0.998901903629303,grad_norm: 0.9999991858792233, iteration: 151006
loss: 0.9996897578239441,grad_norm: 0.8510107557278673, iteration: 151007
loss: 0.9967416524887085,grad_norm: 0.9372203883271787, iteration: 151008
loss: 0.9996113777160645,grad_norm: 0.9403875896629026, iteration: 151009
loss: 0.9774708151817322,grad_norm: 0.9229557136720222, iteration: 151010
loss: 1.006145715713501,grad_norm: 0.999998976476551, iteration: 151011
loss: 0.9718399047851562,grad_norm: 0.9457583916885286, iteration: 151012
loss: 1.0205585956573486,grad_norm: 0.9798983625952268, iteration: 151013
loss: 1.0083004236221313,grad_norm: 0.9817298564323722, iteration: 151014
loss: 1.0016186237335205,grad_norm: 0.999999131641408, iteration: 151015
loss: 0.9882574677467346,grad_norm: 0.8982615789809453, iteration: 151016
loss: 0.9844212532043457,grad_norm: 0.908803491193777, iteration: 151017
loss: 1.0198720693588257,grad_norm: 0.9520375070848925, iteration: 151018
loss: 0.9806882739067078,grad_norm: 0.9053914141026306, iteration: 151019
loss: 0.971269965171814,grad_norm: 0.9999992224867112, iteration: 151020
loss: 1.122565746307373,grad_norm: 0.9999996100433789, iteration: 151021
loss: 0.9510863423347473,grad_norm: 0.9999991027679123, iteration: 151022
loss: 0.971544623374939,grad_norm: 0.9531522355371781, iteration: 151023
loss: 0.9572754502296448,grad_norm: 0.9999991942915945, iteration: 151024
loss: 0.9690918326377869,grad_norm: 0.9999991200952557, iteration: 151025
loss: 1.0767207145690918,grad_norm: 0.999999790510651, iteration: 151026
loss: 1.0164412260055542,grad_norm: 0.948481826171059, iteration: 151027
loss: 0.9546443819999695,grad_norm: 0.9209032961261082, iteration: 151028
loss: 1.0014643669128418,grad_norm: 0.8212124335712981, iteration: 151029
loss: 1.0028468370437622,grad_norm: 0.9999990909934753, iteration: 151030
loss: 1.0247745513916016,grad_norm: 0.9037419046295119, iteration: 151031
loss: 1.0084182024002075,grad_norm: 0.9999991682252395, iteration: 151032
loss: 0.978153645992279,grad_norm: 0.9142634814959584, iteration: 151033
loss: 0.9970777630805969,grad_norm: 0.8713752586503836, iteration: 151034
loss: 0.9862034916877747,grad_norm: 0.9313739975967931, iteration: 151035
loss: 0.9571565389633179,grad_norm: 0.9512837339548423, iteration: 151036
loss: 0.9854373335838318,grad_norm: 0.9582035094759803, iteration: 151037
loss: 0.9899677038192749,grad_norm: 0.9999991628047677, iteration: 151038
loss: 1.0068646669387817,grad_norm: 0.9328685703715377, iteration: 151039
loss: 0.9996465444564819,grad_norm: 0.9999992045652764, iteration: 151040
loss: 1.0068951845169067,grad_norm: 0.9999991372513224, iteration: 151041
loss: 1.0271294116973877,grad_norm: 0.9999993541316924, iteration: 151042
loss: 1.0150288343429565,grad_norm: 0.7779446815925527, iteration: 151043
loss: 1.0003503561019897,grad_norm: 0.9999991579228124, iteration: 151044
loss: 0.9726035594940186,grad_norm: 0.9999991341747312, iteration: 151045
loss: 1.0453945398330688,grad_norm: 0.9999994812128488, iteration: 151046
loss: 0.9568524956703186,grad_norm: 0.9999991039161257, iteration: 151047
loss: 1.0382310152053833,grad_norm: 0.9999990199482253, iteration: 151048
loss: 0.9775647521018982,grad_norm: 0.9999992782825259, iteration: 151049
loss: 1.0899286270141602,grad_norm: 0.99999991899735, iteration: 151050
loss: 1.0232598781585693,grad_norm: 0.9999990947792095, iteration: 151051
loss: 0.9702417254447937,grad_norm: 0.9711180426066616, iteration: 151052
loss: 1.0276137590408325,grad_norm: 0.9999991664522124, iteration: 151053
loss: 1.1330950260162354,grad_norm: 0.9999995739626789, iteration: 151054
loss: 1.0065510272979736,grad_norm: 0.9999990690976526, iteration: 151055
loss: 0.9897891283035278,grad_norm: 0.9999989777364016, iteration: 151056
loss: 0.9870286583900452,grad_norm: 0.9142860998960293, iteration: 151057
loss: 0.9843556880950928,grad_norm: 0.9999992087154073, iteration: 151058
loss: 0.9516218900680542,grad_norm: 0.9490854744818789, iteration: 151059
loss: 0.9674021005630493,grad_norm: 0.9999991591808985, iteration: 151060
loss: 1.0500553846359253,grad_norm: 0.9999991289933069, iteration: 151061
loss: 1.0159125328063965,grad_norm: 0.9239047157408743, iteration: 151062
loss: 1.0006150007247925,grad_norm: 0.9999991157587256, iteration: 151063
loss: 1.0190943479537964,grad_norm: 0.8146826665334149, iteration: 151064
loss: 0.9765171408653259,grad_norm: 0.8965213224070326, iteration: 151065
loss: 1.0194978713989258,grad_norm: 0.999999167116118, iteration: 151066
loss: 1.0083231925964355,grad_norm: 0.9999994483852305, iteration: 151067
loss: 1.0097465515136719,grad_norm: 0.9999991898853343, iteration: 151068
loss: 1.0918470621109009,grad_norm: 0.9999995364687716, iteration: 151069
loss: 0.9954437017440796,grad_norm: 0.9999991457403336, iteration: 151070
loss: 0.9875637292861938,grad_norm: 0.9999990614925292, iteration: 151071
loss: 0.9629788994789124,grad_norm: 0.7571113755581732, iteration: 151072
loss: 1.0505106449127197,grad_norm: 0.9999997995942552, iteration: 151073
loss: 1.0321941375732422,grad_norm: 0.9999992885857688, iteration: 151074
loss: 1.0410019159317017,grad_norm: 0.9999992366146279, iteration: 151075
loss: 1.0233687162399292,grad_norm: 0.9999989157614331, iteration: 151076
loss: 1.018009901046753,grad_norm: 0.9999991307066513, iteration: 151077
loss: 1.0090339183807373,grad_norm: 0.9999991197918293, iteration: 151078
loss: 1.0301408767700195,grad_norm: 0.9999997069008479, iteration: 151079
loss: 1.0101115703582764,grad_norm: 0.9999989047863461, iteration: 151080
loss: 1.0078479051589966,grad_norm: 0.9999992485881046, iteration: 151081
loss: 1.0370256900787354,grad_norm: 0.9294087235008848, iteration: 151082
loss: 1.099853754043579,grad_norm: 0.9999991615555412, iteration: 151083
loss: 0.9806674122810364,grad_norm: 0.9734232343662808, iteration: 151084
loss: 1.0137685537338257,grad_norm: 0.9999990668061741, iteration: 151085
loss: 0.9911473393440247,grad_norm: 0.9467987972684402, iteration: 151086
loss: 1.0678985118865967,grad_norm: 0.8763754956821913, iteration: 151087
loss: 0.9565788507461548,grad_norm: 0.9850101241784451, iteration: 151088
loss: 1.0171624422073364,grad_norm: 0.9999992707424222, iteration: 151089
loss: 0.983489990234375,grad_norm: 0.9818962509320354, iteration: 151090
loss: 1.0730209350585938,grad_norm: 0.9999996312082118, iteration: 151091
loss: 1.072601079940796,grad_norm: 0.9999992583389202, iteration: 151092
loss: 1.0008372068405151,grad_norm: 0.9999991570482455, iteration: 151093
loss: 0.9714747071266174,grad_norm: 0.999999081977389, iteration: 151094
loss: 0.9947565793991089,grad_norm: 0.9648681978855365, iteration: 151095
loss: 0.9994804859161377,grad_norm: 0.9882651889159505, iteration: 151096
loss: 1.0772743225097656,grad_norm: 0.9926245302192886, iteration: 151097
loss: 1.0076146125793457,grad_norm: 0.9183377469566301, iteration: 151098
loss: 0.9848700761795044,grad_norm: 0.999999287096629, iteration: 151099
loss: 1.0025120973587036,grad_norm: 0.9999991209194748, iteration: 151100
loss: 0.9939095973968506,grad_norm: 0.9079202449319057, iteration: 151101
loss: 0.9984583854675293,grad_norm: 0.927549946922773, iteration: 151102
loss: 1.0564184188842773,grad_norm: 0.9999996968073102, iteration: 151103
loss: 1.045119047164917,grad_norm: 0.9999998759216925, iteration: 151104
loss: 1.0397998094558716,grad_norm: 0.999999022574806, iteration: 151105
loss: 1.0432265996932983,grad_norm: 0.9999992604542486, iteration: 151106
loss: 0.9727625846862793,grad_norm: 0.9999990965621667, iteration: 151107
loss: 0.974776566028595,grad_norm: 0.9431353518713023, iteration: 151108
loss: 0.9906195402145386,grad_norm: 0.9391833120743368, iteration: 151109
loss: 0.969218373298645,grad_norm: 0.8875311120756824, iteration: 151110
loss: 0.9588921070098877,grad_norm: 0.9999991606006785, iteration: 151111
loss: 0.9817432165145874,grad_norm: 0.8900412551128821, iteration: 151112
loss: 1.0917184352874756,grad_norm: 0.9999992252699125, iteration: 151113
loss: 0.9798979759216309,grad_norm: 0.7987333386734725, iteration: 151114
loss: 1.0123748779296875,grad_norm: 0.9997879958114824, iteration: 151115
loss: 1.0343601703643799,grad_norm: 0.9999991436119874, iteration: 151116
loss: 1.0527499914169312,grad_norm: 0.9950505837966095, iteration: 151117
loss: 1.088301420211792,grad_norm: 0.999999342778663, iteration: 151118
loss: 0.9895799160003662,grad_norm: 0.999999749904867, iteration: 151119
loss: 1.0367666482925415,grad_norm: 0.9999990412465649, iteration: 151120
loss: 0.9737840890884399,grad_norm: 0.9999989940527204, iteration: 151121
loss: 0.9786484241485596,grad_norm: 0.9999990915685144, iteration: 151122
loss: 1.0090304613113403,grad_norm: 0.9999990219240158, iteration: 151123
loss: 0.998245894908905,grad_norm: 0.907183816456076, iteration: 151124
loss: 1.137379765510559,grad_norm: 0.999999921861958, iteration: 151125
loss: 0.9943719506263733,grad_norm: 0.9587126005901, iteration: 151126
loss: 1.049926996231079,grad_norm: 0.9999995515835681, iteration: 151127
loss: 0.999146580696106,grad_norm: 0.9999994575820754, iteration: 151128
loss: 0.9958824515342712,grad_norm: 0.9551279952558237, iteration: 151129
loss: 1.008191704750061,grad_norm: 0.9999991583472242, iteration: 151130
loss: 1.0880591869354248,grad_norm: 0.9912364660927092, iteration: 151131
loss: 0.9978792071342468,grad_norm: 0.9999990700460514, iteration: 151132
loss: 0.9730517268180847,grad_norm: 0.9599467406665064, iteration: 151133
loss: 1.0255482196807861,grad_norm: 0.8997413157926774, iteration: 151134
loss: 0.9867622256278992,grad_norm: 0.9249743161597535, iteration: 151135
loss: 1.067051887512207,grad_norm: 0.9999994278863659, iteration: 151136
loss: 0.964983344078064,grad_norm: 0.9999991373663191, iteration: 151137
loss: 1.0186657905578613,grad_norm: 0.99999989786002, iteration: 151138
loss: 1.0091809034347534,grad_norm: 0.999998978477192, iteration: 151139
loss: 0.9675538539886475,grad_norm: 0.9999991637321548, iteration: 151140
loss: 1.0101886987686157,grad_norm: 0.9999990534389831, iteration: 151141
loss: 1.0287456512451172,grad_norm: 0.9949780313563175, iteration: 151142
loss: 1.0347572565078735,grad_norm: 0.9999991668354176, iteration: 151143
loss: 0.9837931394577026,grad_norm: 0.9999991365663494, iteration: 151144
loss: 0.9504912495613098,grad_norm: 0.9999992345721409, iteration: 151145
loss: 1.0412516593933105,grad_norm: 0.9999998283570392, iteration: 151146
loss: 1.1104750633239746,grad_norm: 0.9999994239107494, iteration: 151147
loss: 1.0099858045578003,grad_norm: 0.7866235457034602, iteration: 151148
loss: 1.0258145332336426,grad_norm: 0.9388167306906315, iteration: 151149
loss: 1.0111265182495117,grad_norm: 0.9894317657815284, iteration: 151150
loss: 1.039528250694275,grad_norm: 0.999999069037592, iteration: 151151
loss: 1.0038613080978394,grad_norm: 0.969266064596606, iteration: 151152
loss: 1.0332651138305664,grad_norm: 0.9135640013299727, iteration: 151153
loss: 1.0193419456481934,grad_norm: 0.9999990288445828, iteration: 151154
loss: 0.9988152384757996,grad_norm: 0.8795975559955642, iteration: 151155
loss: 0.988954484462738,grad_norm: 0.9734568744633335, iteration: 151156
loss: 1.0329049825668335,grad_norm: 0.9999992411696528, iteration: 151157
loss: 1.0195515155792236,grad_norm: 0.9399117627351313, iteration: 151158
loss: 0.9871371388435364,grad_norm: 0.9999992932042865, iteration: 151159
loss: 1.00596284866333,grad_norm: 0.9999991209713648, iteration: 151160
loss: 0.9912897348403931,grad_norm: 0.9999991517457594, iteration: 151161
loss: 1.0035786628723145,grad_norm: 0.9999991606260491, iteration: 151162
loss: 1.0203765630722046,grad_norm: 0.9999997130880144, iteration: 151163
loss: 1.0079786777496338,grad_norm: 0.9794327404257617, iteration: 151164
loss: 0.9908934831619263,grad_norm: 0.8771721772827169, iteration: 151165
loss: 1.0245380401611328,grad_norm: 0.9510273380981469, iteration: 151166
loss: 0.9996997714042664,grad_norm: 0.9999991940939854, iteration: 151167
loss: 1.0359867811203003,grad_norm: 0.9999992173285225, iteration: 151168
loss: 1.0185339450836182,grad_norm: 0.9999993460096168, iteration: 151169
loss: 1.0417276620864868,grad_norm: 0.9999992127312186, iteration: 151170
loss: 1.0325937271118164,grad_norm: 0.8870137759240084, iteration: 151171
loss: 1.027913212776184,grad_norm: 0.9999990638344106, iteration: 151172
loss: 0.9607135057449341,grad_norm: 0.9999991154095028, iteration: 151173
loss: 1.01347815990448,grad_norm: 0.999999082421224, iteration: 151174
loss: 0.9700421690940857,grad_norm: 0.9999991116064708, iteration: 151175
loss: 1.0119975805282593,grad_norm: 0.9179348376071788, iteration: 151176
loss: 0.9747950434684753,grad_norm: 0.9999993799331792, iteration: 151177
loss: 1.010179042816162,grad_norm: 0.8916407351476404, iteration: 151178
loss: 1.0500560998916626,grad_norm: 0.9999995115761462, iteration: 151179
loss: 0.9855994582176208,grad_norm: 0.9999991321112174, iteration: 151180
loss: 0.9871350526809692,grad_norm: 0.9999991968016442, iteration: 151181
loss: 1.0026812553405762,grad_norm: 0.9999991787069821, iteration: 151182
loss: 1.0052061080932617,grad_norm: 0.9999992620344824, iteration: 151183
loss: 0.9565953016281128,grad_norm: 0.9692414726469756, iteration: 151184
loss: 0.9972019791603088,grad_norm: 0.9503146970725043, iteration: 151185
loss: 0.9874023795127869,grad_norm: 0.9523633402891846, iteration: 151186
loss: 0.9922642111778259,grad_norm: 0.9423316342841735, iteration: 151187
loss: 1.1244276762008667,grad_norm: 0.9999999036684212, iteration: 151188
loss: 0.983163595199585,grad_norm: 0.9999990346885559, iteration: 151189
loss: 0.9539907574653625,grad_norm: 0.9319177246970095, iteration: 151190
loss: 1.0002344846725464,grad_norm: 0.9999990246819613, iteration: 151191
loss: 1.0117613077163696,grad_norm: 0.993929464818753, iteration: 151192
loss: 0.9863386750221252,grad_norm: 0.9999990352298183, iteration: 151193
loss: 1.0010629892349243,grad_norm: 0.9999992322204182, iteration: 151194
loss: 0.9993253350257874,grad_norm: 0.9029293753374423, iteration: 151195
loss: 1.0222077369689941,grad_norm: 0.9999991443042754, iteration: 151196
loss: 1.0264735221862793,grad_norm: 0.9999998460095331, iteration: 151197
loss: 0.9891469478607178,grad_norm: 0.9999992782534677, iteration: 151198
loss: 1.0479477643966675,grad_norm: 0.9999996271535262, iteration: 151199
loss: 1.018882155418396,grad_norm: 0.9999991127953208, iteration: 151200
loss: 0.9963697195053101,grad_norm: 0.9999991891137535, iteration: 151201
loss: 1.026034951210022,grad_norm: 0.8893655367120124, iteration: 151202
loss: 0.9956858158111572,grad_norm: 0.9999992049714362, iteration: 151203
loss: 1.0106253623962402,grad_norm: 0.999999047696016, iteration: 151204
loss: 0.9994981288909912,grad_norm: 0.8902144009365218, iteration: 151205
loss: 1.0082545280456543,grad_norm: 0.9999991084111053, iteration: 151206
loss: 0.9984856843948364,grad_norm: 0.9999991533410886, iteration: 151207
loss: 1.0449355840682983,grad_norm: 0.9999994137571294, iteration: 151208
loss: 1.0215460062026978,grad_norm: 0.9444512915879929, iteration: 151209
loss: 0.9876460433006287,grad_norm: 0.999999065877488, iteration: 151210
loss: 0.9890727400779724,grad_norm: 0.9706199458567791, iteration: 151211
loss: 1.0090779066085815,grad_norm: 0.9999991162071888, iteration: 151212
loss: 1.0156015157699585,grad_norm: 0.9999993201969184, iteration: 151213
loss: 1.0054287910461426,grad_norm: 0.9999991297161065, iteration: 151214
loss: 0.9604275822639465,grad_norm: 0.9999992665540807, iteration: 151215
loss: 1.0383715629577637,grad_norm: 0.9999990594316666, iteration: 151216
loss: 0.9845314621925354,grad_norm: 0.9999989099884797, iteration: 151217
loss: 0.9782999753952026,grad_norm: 0.9999992353092441, iteration: 151218
loss: 1.0929083824157715,grad_norm: 0.9999994047147618, iteration: 151219
loss: 0.9693598747253418,grad_norm: 0.8809020558787997, iteration: 151220
loss: 1.0372111797332764,grad_norm: 0.9999992145030271, iteration: 151221
loss: 1.032958984375,grad_norm: 0.9416921743239252, iteration: 151222
loss: 1.0188404321670532,grad_norm: 0.9999990674339901, iteration: 151223
loss: 1.0066052675247192,grad_norm: 0.9625222073639659, iteration: 151224
loss: 0.9915914535522461,grad_norm: 0.9999991498372786, iteration: 151225
loss: 0.9674100279808044,grad_norm: 0.9770310195028112, iteration: 151226
loss: 1.047471523284912,grad_norm: 0.8815737213910967, iteration: 151227
loss: 0.9954144358634949,grad_norm: 0.9999992278261922, iteration: 151228
loss: 1.0165364742279053,grad_norm: 0.9004517805124705, iteration: 151229
loss: 1.0321866273880005,grad_norm: 0.9999991170864029, iteration: 151230
loss: 0.9916151762008667,grad_norm: 0.9757929875694367, iteration: 151231
loss: 1.0009154081344604,grad_norm: 0.9570353062240232, iteration: 151232
loss: 0.985977053642273,grad_norm: 0.9509069691181466, iteration: 151233
loss: 1.0127888917922974,grad_norm: 0.9940256783238781, iteration: 151234
loss: 1.0580817461013794,grad_norm: 0.9535983378865406, iteration: 151235
loss: 1.009879231452942,grad_norm: 0.9999991284950349, iteration: 151236
loss: 1.0556001663208008,grad_norm: 0.9999990051228671, iteration: 151237
loss: 0.9718929529190063,grad_norm: 0.9999991085584176, iteration: 151238
loss: 1.0597000122070312,grad_norm: 0.9999993529726848, iteration: 151239
loss: 0.9705198407173157,grad_norm: 0.997032605316081, iteration: 151240
loss: 1.0248048305511475,grad_norm: 0.9742169590856751, iteration: 151241
loss: 0.988718569278717,grad_norm: 0.999999307743185, iteration: 151242
loss: 0.9453770518302917,grad_norm: 0.9999991497689575, iteration: 151243
loss: 1.020302414894104,grad_norm: 0.9999993135418087, iteration: 151244
loss: 0.9793433547019958,grad_norm: 0.9999991120601156, iteration: 151245
loss: 1.0308476686477661,grad_norm: 0.9999995467646722, iteration: 151246
loss: 0.9968231916427612,grad_norm: 0.9999990819719893, iteration: 151247
loss: 1.0465387105941772,grad_norm: 0.9999991570732955, iteration: 151248
loss: 0.9878844618797302,grad_norm: 0.9905358564756939, iteration: 151249
loss: 1.027534008026123,grad_norm: 0.9999990535348826, iteration: 151250
loss: 1.00927734375,grad_norm: 0.8263651184429261, iteration: 151251
loss: 1.0058668851852417,grad_norm: 0.9999990984451073, iteration: 151252
loss: 1.0889378786087036,grad_norm: 0.9999999399453878, iteration: 151253
loss: 0.9731882810592651,grad_norm: 0.9967852835094293, iteration: 151254
loss: 1.0290307998657227,grad_norm: 0.9999991127962027, iteration: 151255
loss: 0.9672777056694031,grad_norm: 0.9762491192647145, iteration: 151256
loss: 1.0024429559707642,grad_norm: 0.9999991759023152, iteration: 151257
loss: 0.9482601881027222,grad_norm: 0.9458833096644563, iteration: 151258
loss: 1.0004328489303589,grad_norm: 0.9999991785276047, iteration: 151259
loss: 0.9730724096298218,grad_norm: 0.9808904826207285, iteration: 151260
loss: 1.0099765062332153,grad_norm: 0.9501716643143169, iteration: 151261
loss: 1.0026907920837402,grad_norm: 0.9815380966368873, iteration: 151262
loss: 0.944999098777771,grad_norm: 0.9999990924026378, iteration: 151263
loss: 1.1250295639038086,grad_norm: 0.9999991463955761, iteration: 151264
loss: 1.0104769468307495,grad_norm: 0.999999218840979, iteration: 151265
loss: 1.0051072835922241,grad_norm: 0.9999997965833538, iteration: 151266
loss: 0.9970462322235107,grad_norm: 0.9365182521024781, iteration: 151267
loss: 0.9722819924354553,grad_norm: 0.9999989916512325, iteration: 151268
loss: 0.9804753661155701,grad_norm: 0.8610101052284372, iteration: 151269
loss: 1.0699266195297241,grad_norm: 0.9506989439724749, iteration: 151270
loss: 0.9851928353309631,grad_norm: 0.9931442813937013, iteration: 151271
loss: 1.0066617727279663,grad_norm: 0.9999989763297498, iteration: 151272
loss: 1.0234575271606445,grad_norm: 0.9999991332678605, iteration: 151273
loss: 1.0242304801940918,grad_norm: 0.9999991338181462, iteration: 151274
loss: 1.02946138381958,grad_norm: 0.9999997569520982, iteration: 151275
loss: 1.0110152959823608,grad_norm: 0.9676154343133531, iteration: 151276
loss: 1.002150535583496,grad_norm: 0.8795163646726718, iteration: 151277
loss: 1.0175718069076538,grad_norm: 0.9999990318357097, iteration: 151278
loss: 1.0207394361495972,grad_norm: 0.9999990899711406, iteration: 151279
loss: 0.9905625581741333,grad_norm: 0.8265384687975796, iteration: 151280
loss: 0.9720693826675415,grad_norm: 0.9999990419680616, iteration: 151281
loss: 1.0231289863586426,grad_norm: 0.935777603131717, iteration: 151282
loss: 0.978094756603241,grad_norm: 0.9999989796435389, iteration: 151283
loss: 0.9945395588874817,grad_norm: 0.8013344472456182, iteration: 151284
loss: 0.9894271492958069,grad_norm: 0.935133462462029, iteration: 151285
loss: 0.988712728023529,grad_norm: 0.9125726654649255, iteration: 151286
loss: 0.9822439551353455,grad_norm: 0.9575669390719379, iteration: 151287
loss: 0.9699140787124634,grad_norm: 0.9999992577080988, iteration: 151288
loss: 0.9781330823898315,grad_norm: 0.8817952987664034, iteration: 151289
loss: 1.0048960447311401,grad_norm: 0.9999994580701042, iteration: 151290
loss: 1.018639326095581,grad_norm: 0.9999992329701568, iteration: 151291
loss: 1.069597840309143,grad_norm: 0.9999990336544857, iteration: 151292
loss: 1.0346602201461792,grad_norm: 0.9999989175609956, iteration: 151293
loss: 1.007986068725586,grad_norm: 0.9999991304225041, iteration: 151294
loss: 1.013892412185669,grad_norm: 0.8874017450575783, iteration: 151295
loss: 1.0194313526153564,grad_norm: 0.999999448564954, iteration: 151296
loss: 0.9831617474555969,grad_norm: 0.999999072690069, iteration: 151297
loss: 1.0453985929489136,grad_norm: 0.9999993648892653, iteration: 151298
loss: 1.0302971601486206,grad_norm: 0.9999993384381146, iteration: 151299
loss: 0.9954321384429932,grad_norm: 0.9999991644933647, iteration: 151300
loss: 0.9896315336227417,grad_norm: 0.9999991006599889, iteration: 151301
loss: 0.9949294924736023,grad_norm: 0.9229899761245681, iteration: 151302
loss: 0.9800428152084351,grad_norm: 0.9999991108263431, iteration: 151303
loss: 1.010992169380188,grad_norm: 0.9999991917439282, iteration: 151304
loss: 1.0177162885665894,grad_norm: 0.9999989034647512, iteration: 151305
loss: 1.0215766429901123,grad_norm: 0.9999990084348331, iteration: 151306
loss: 1.0030118227005005,grad_norm: 0.9999989880828277, iteration: 151307
loss: 0.9765048027038574,grad_norm: 0.9999989670719748, iteration: 151308
loss: 0.9930059313774109,grad_norm: 0.9999990691572472, iteration: 151309
loss: 1.0125458240509033,grad_norm: 0.8692369235984821, iteration: 151310
loss: 0.9871687293052673,grad_norm: 0.9999991321339434, iteration: 151311
loss: 0.9791967272758484,grad_norm: 0.9999991934307199, iteration: 151312
loss: 1.0012186765670776,grad_norm: 0.9991672151776866, iteration: 151313
loss: 1.0037224292755127,grad_norm: 0.9999992801165861, iteration: 151314
loss: 0.9935805797576904,grad_norm: 0.9811582559864954, iteration: 151315
loss: 0.9574947953224182,grad_norm: 0.7325116269930175, iteration: 151316
loss: 0.93815678358078,grad_norm: 0.820012253476602, iteration: 151317
loss: 0.9981449246406555,grad_norm: 0.9375327715096008, iteration: 151318
loss: 1.036948561668396,grad_norm: 0.9999991063479482, iteration: 151319
loss: 1.0058395862579346,grad_norm: 0.9999992019595876, iteration: 151320
loss: 1.000167727470398,grad_norm: 0.9999989606250638, iteration: 151321
loss: 0.991719126701355,grad_norm: 0.9699893914875163, iteration: 151322
loss: 1.0150328874588013,grad_norm: 0.9999990367832303, iteration: 151323
loss: 0.9768469333648682,grad_norm: 0.9999991409922955, iteration: 151324
loss: 0.9932090044021606,grad_norm: 0.999999102997207, iteration: 151325
loss: 0.9785523414611816,grad_norm: 0.9999990737342532, iteration: 151326
loss: 1.0169956684112549,grad_norm: 0.999999413053696, iteration: 151327
loss: 0.9845662117004395,grad_norm: 0.9999991812652561, iteration: 151328
loss: 1.0227694511413574,grad_norm: 0.9999991097294895, iteration: 151329
loss: 1.0396039485931396,grad_norm: 0.9660166148325079, iteration: 151330
loss: 0.975870668888092,grad_norm: 0.9999992021685361, iteration: 151331
loss: 1.0093337297439575,grad_norm: 0.9999994514864523, iteration: 151332
loss: 1.0165691375732422,grad_norm: 0.9999993099497039, iteration: 151333
loss: 0.9582933187484741,grad_norm: 0.9160197379083919, iteration: 151334
loss: 1.0058879852294922,grad_norm: 0.975243366439407, iteration: 151335
loss: 1.0518864393234253,grad_norm: 0.9999990562239056, iteration: 151336
loss: 1.0051308870315552,grad_norm: 0.9542707132297209, iteration: 151337
loss: 1.0231115818023682,grad_norm: 0.9999993531130947, iteration: 151338
loss: 1.005074143409729,grad_norm: 0.9999991006133975, iteration: 151339
loss: 0.9708157181739807,grad_norm: 0.9999992461565081, iteration: 151340
loss: 0.9792115092277527,grad_norm: 0.9055724284288683, iteration: 151341
loss: 1.0239187479019165,grad_norm: 0.8659744854447222, iteration: 151342
loss: 0.9876537919044495,grad_norm: 0.9999990406739729, iteration: 151343
loss: 1.0170185565948486,grad_norm: 0.9706974584770255, iteration: 151344
loss: 0.9636823534965515,grad_norm: 0.9999991375267127, iteration: 151345
loss: 0.9779518842697144,grad_norm: 0.999999278938736, iteration: 151346
loss: 1.0062735080718994,grad_norm: 0.9999991757096183, iteration: 151347
loss: 0.9832409024238586,grad_norm: 0.9585206848388828, iteration: 151348
loss: 1.0135570764541626,grad_norm: 0.9528695454386013, iteration: 151349
loss: 1.0005600452423096,grad_norm: 0.9999990673623349, iteration: 151350
loss: 1.0174075365066528,grad_norm: 0.9015547477267267, iteration: 151351
loss: 1.0257997512817383,grad_norm: 0.9999990892900062, iteration: 151352
loss: 1.0099549293518066,grad_norm: 0.9999992167330164, iteration: 151353
loss: 0.9966024160385132,grad_norm: 0.929077282278008, iteration: 151354
loss: 0.985504150390625,grad_norm: 0.9182253123495504, iteration: 151355
loss: 1.0339833498001099,grad_norm: 0.9999989622224827, iteration: 151356
loss: 0.9971786737442017,grad_norm: 0.8295978293838587, iteration: 151357
loss: 1.0246491432189941,grad_norm: 0.999999284390901, iteration: 151358
loss: 0.9961016178131104,grad_norm: 0.8332878520841208, iteration: 151359
loss: 1.0156465768814087,grad_norm: 0.9999988931886937, iteration: 151360
loss: 1.0021467208862305,grad_norm: 0.9825049182000452, iteration: 151361
loss: 1.004542350769043,grad_norm: 0.9999993114827653, iteration: 151362
loss: 0.946323573589325,grad_norm: 0.9999990573724921, iteration: 151363
loss: 1.0283342599868774,grad_norm: 0.9077680894108189, iteration: 151364
loss: 0.9812043905258179,grad_norm: 0.9999991856012934, iteration: 151365
loss: 0.9854559898376465,grad_norm: 0.932539136382668, iteration: 151366
loss: 1.0430396795272827,grad_norm: 0.9999993039995047, iteration: 151367
loss: 1.005761981010437,grad_norm: 0.7701327042969012, iteration: 151368
loss: 1.0113177299499512,grad_norm: 0.9999990247595738, iteration: 151369
loss: 0.9876905083656311,grad_norm: 0.9237012112411658, iteration: 151370
loss: 1.009018898010254,grad_norm: 0.9697442371745114, iteration: 151371
loss: 0.9870914220809937,grad_norm: 0.9764000782621992, iteration: 151372
loss: 0.9513022303581238,grad_norm: 0.9999991473549399, iteration: 151373
loss: 1.026976466178894,grad_norm: 0.9852817993052998, iteration: 151374
loss: 1.0317060947418213,grad_norm: 0.9999990155897214, iteration: 151375
loss: 1.0641638040542603,grad_norm: 0.9999989861874558, iteration: 151376
loss: 0.9695736169815063,grad_norm: 0.945373850953862, iteration: 151377
loss: 0.9871031045913696,grad_norm: 0.9330227962733868, iteration: 151378
loss: 1.0207751989364624,grad_norm: 0.9563121632394228, iteration: 151379
loss: 0.983303964138031,grad_norm: 0.9999993432908609, iteration: 151380
loss: 0.981476366519928,grad_norm: 0.9999991666798755, iteration: 151381
loss: 1.0035319328308105,grad_norm: 0.9536871144727191, iteration: 151382
loss: 1.0081795454025269,grad_norm: 0.9999989336003678, iteration: 151383
loss: 0.978889524936676,grad_norm: 0.9999991556425016, iteration: 151384
loss: 1.0218029022216797,grad_norm: 0.9999989456268669, iteration: 151385
loss: 0.9763627052307129,grad_norm: 0.999998988913144, iteration: 151386
loss: 1.0087716579437256,grad_norm: 0.9703839368269566, iteration: 151387
loss: 1.0003119707107544,grad_norm: 0.9959235124205527, iteration: 151388
loss: 1.0108473300933838,grad_norm: 0.9047147112623403, iteration: 151389
loss: 1.002048134803772,grad_norm: 0.9999991177148011, iteration: 151390
loss: 1.0013517141342163,grad_norm: 0.9999991689782924, iteration: 151391
loss: 1.0355650186538696,grad_norm: 0.9999990172133628, iteration: 151392
loss: 1.0248645544052124,grad_norm: 0.946177232280274, iteration: 151393
loss: 0.9610065221786499,grad_norm: 0.9999991746238978, iteration: 151394
loss: 1.0011268854141235,grad_norm: 0.9495626999940737, iteration: 151395
loss: 0.9718964099884033,grad_norm: 0.9999992094322638, iteration: 151396
loss: 1.0022330284118652,grad_norm: 0.9094467721723151, iteration: 151397
loss: 1.0157498121261597,grad_norm: 0.9105790799298671, iteration: 151398
loss: 0.9966278076171875,grad_norm: 0.9999990362934075, iteration: 151399
loss: 0.9739400148391724,grad_norm: 0.9722170454851704, iteration: 151400
loss: 1.0285428762435913,grad_norm: 0.9999992829236326, iteration: 151401
loss: 0.9970366358757019,grad_norm: 0.9331109242375302, iteration: 151402
loss: 1.0300167798995972,grad_norm: 0.9734808515182423, iteration: 151403
loss: 1.0157426595687866,grad_norm: 0.8624240500702546, iteration: 151404
loss: 0.9738003015518188,grad_norm: 0.9595522477746065, iteration: 151405
loss: 0.9669975638389587,grad_norm: 0.9033844288514025, iteration: 151406
loss: 1.0057750940322876,grad_norm: 0.9286063315040207, iteration: 151407
loss: 1.0130566358566284,grad_norm: 0.999998989824603, iteration: 151408
loss: 0.9852870106697083,grad_norm: 0.9999990858737478, iteration: 151409
loss: 0.9869279265403748,grad_norm: 0.8699740284614168, iteration: 151410
loss: 1.0314669609069824,grad_norm: 0.9999990559875965, iteration: 151411
loss: 1.0160162448883057,grad_norm: 0.9784679956854294, iteration: 151412
loss: 1.0188333988189697,grad_norm: 0.9999991407109698, iteration: 151413
loss: 0.9768270254135132,grad_norm: 0.7587207068575791, iteration: 151414
loss: 0.9887925386428833,grad_norm: 0.9981247192899791, iteration: 151415
loss: 0.9934026002883911,grad_norm: 0.9825713720524889, iteration: 151416
loss: 0.9911796450614929,grad_norm: 0.9414379275363914, iteration: 151417
loss: 1.0171087980270386,grad_norm: 0.916890959992326, iteration: 151418
loss: 0.9875597357749939,grad_norm: 0.8519410576140023, iteration: 151419
loss: 0.9872707724571228,grad_norm: 0.9572331995947606, iteration: 151420
loss: 1.0007351636886597,grad_norm: 0.9385433107227303, iteration: 151421
loss: 0.9893909692764282,grad_norm: 0.8418820860078537, iteration: 151422
loss: 0.965283989906311,grad_norm: 0.9999992021969127, iteration: 151423
loss: 0.9881837368011475,grad_norm: 0.9672022330565531, iteration: 151424
loss: 1.0044595003128052,grad_norm: 0.9999995749242215, iteration: 151425
loss: 1.0144726037979126,grad_norm: 0.9476501470170358, iteration: 151426
loss: 0.9994185566902161,grad_norm: 0.9999991817108903, iteration: 151427
loss: 1.0024080276489258,grad_norm: 0.9999991482994536, iteration: 151428
loss: 0.9964767694473267,grad_norm: 0.999999137913105, iteration: 151429
loss: 1.000373125076294,grad_norm: 0.9940817108216728, iteration: 151430
loss: 1.0304909944534302,grad_norm: 0.9999992091729037, iteration: 151431
loss: 0.984002411365509,grad_norm: 0.9291141921299525, iteration: 151432
loss: 0.9809510707855225,grad_norm: 0.8273583035017938, iteration: 151433
loss: 0.9673975110054016,grad_norm: 0.8532487054842997, iteration: 151434
loss: 1.0743385553359985,grad_norm: 0.9999993355193186, iteration: 151435
loss: 0.9670018553733826,grad_norm: 0.9999990640344936, iteration: 151436
loss: 0.9919943809509277,grad_norm: 0.9343317418706175, iteration: 151437
loss: 1.0019410848617554,grad_norm: 0.9999045991645537, iteration: 151438
loss: 1.0240073204040527,grad_norm: 0.8685017337563739, iteration: 151439
loss: 0.9890239834785461,grad_norm: 0.9999990520070303, iteration: 151440
loss: 1.0325467586517334,grad_norm: 0.9999993337203238, iteration: 151441
loss: 0.9778319597244263,grad_norm: 0.9596833618598216, iteration: 151442
loss: 0.9785528182983398,grad_norm: 0.9999992278539439, iteration: 151443
loss: 0.9957214593887329,grad_norm: 0.9999990870852641, iteration: 151444
loss: 1.0240780115127563,grad_norm: 0.99999923719415, iteration: 151445
loss: 0.9727472066879272,grad_norm: 0.8977840305704827, iteration: 151446
loss: 1.0176715850830078,grad_norm: 0.9999993678466568, iteration: 151447
loss: 1.0121887922286987,grad_norm: 0.9608440005039223, iteration: 151448
loss: 0.9849280118942261,grad_norm: 0.9999990857289173, iteration: 151449
loss: 0.9940125346183777,grad_norm: 0.9999990620600598, iteration: 151450
loss: 0.9799573421478271,grad_norm: 0.9999991756714721, iteration: 151451
loss: 0.9913696646690369,grad_norm: 0.9702114260115672, iteration: 151452
loss: 0.9938450455665588,grad_norm: 0.968367801458029, iteration: 151453
loss: 1.0054206848144531,grad_norm: 0.9999991502547538, iteration: 151454
loss: 1.0640676021575928,grad_norm: 0.955976538294008, iteration: 151455
loss: 1.005647897720337,grad_norm: 0.9999991205124826, iteration: 151456
loss: 1.023951768875122,grad_norm: 0.9999990735906445, iteration: 151457
loss: 1.032442331314087,grad_norm: 0.9576467592111184, iteration: 151458
loss: 0.9906432032585144,grad_norm: 0.9528385109607019, iteration: 151459
loss: 1.0379011631011963,grad_norm: 0.9999990712079556, iteration: 151460
loss: 0.9919141530990601,grad_norm: 0.9999991675027425, iteration: 151461
loss: 1.0007429122924805,grad_norm: 0.9050070620304695, iteration: 151462
loss: 0.9883164167404175,grad_norm: 0.9797803108150918, iteration: 151463
loss: 0.9998639822006226,grad_norm: 0.9999989730548822, iteration: 151464
loss: 0.9563013911247253,grad_norm: 0.9522164536764612, iteration: 151465
loss: 0.9505045413970947,grad_norm: 0.9414385118566903, iteration: 151466
loss: 0.9812179207801819,grad_norm: 0.925343356059398, iteration: 151467
loss: 1.0073957443237305,grad_norm: 0.8974433402199747, iteration: 151468
loss: 0.9604036211967468,grad_norm: 0.9999999829932791, iteration: 151469
loss: 0.9988071322441101,grad_norm: 0.855421740866067, iteration: 151470
loss: 1.1733933687210083,grad_norm: 0.9999992119479526, iteration: 151471
loss: 0.9791507720947266,grad_norm: 0.9424952979399693, iteration: 151472
loss: 1.178248405456543,grad_norm: 0.999999203271072, iteration: 151473
loss: 0.9329083561897278,grad_norm: 0.9519962028730721, iteration: 151474
loss: 1.0261917114257812,grad_norm: 0.9999992169124837, iteration: 151475
loss: 1.1672699451446533,grad_norm: 0.9999995744803707, iteration: 151476
loss: 1.1422070264816284,grad_norm: 0.9999994273419259, iteration: 151477
loss: 0.9710431694984436,grad_norm: 0.9169593192403789, iteration: 151478
loss: 1.0318868160247803,grad_norm: 0.9999993466334797, iteration: 151479
loss: 1.1354807615280151,grad_norm: 0.9999992504938481, iteration: 151480
loss: 1.1725294589996338,grad_norm: 0.9999992986696646, iteration: 151481
loss: 1.0143837928771973,grad_norm: 0.9291018592642656, iteration: 151482
loss: 0.9900437593460083,grad_norm: 0.9999990076790247, iteration: 151483
loss: 1.0261188745498657,grad_norm: 0.9999990815592674, iteration: 151484
loss: 1.1166495084762573,grad_norm: 0.9962364592982239, iteration: 151485
loss: 1.0080909729003906,grad_norm: 0.9237122520755194, iteration: 151486
loss: 1.014734148979187,grad_norm: 0.9999992118694713, iteration: 151487
loss: 0.9676873683929443,grad_norm: 0.999999172802064, iteration: 151488
loss: 1.074690818786621,grad_norm: 0.9999991901076797, iteration: 151489
loss: 1.067915678024292,grad_norm: 0.999999192175565, iteration: 151490
loss: 1.0305163860321045,grad_norm: 0.982197801930951, iteration: 151491
loss: 1.0270596742630005,grad_norm: 0.9999992358351828, iteration: 151492
loss: 0.9695150852203369,grad_norm: 0.9999995149569183, iteration: 151493
loss: 0.9876502752304077,grad_norm: 0.9999991224401776, iteration: 151494
loss: 1.038526177406311,grad_norm: 0.8922559643893299, iteration: 151495
loss: 0.9670012593269348,grad_norm: 0.9033079264605615, iteration: 151496
loss: 0.9634590148925781,grad_norm: 0.9999992227510164, iteration: 151497
loss: 0.9951111078262329,grad_norm: 0.8828225944398647, iteration: 151498
loss: 0.9743810892105103,grad_norm: 0.9518311214037087, iteration: 151499
loss: 0.95115065574646,grad_norm: 0.9999992567392944, iteration: 151500
loss: 1.0045366287231445,grad_norm: 0.8930490767427142, iteration: 151501
loss: 1.0108373165130615,grad_norm: 0.8570062520927576, iteration: 151502
loss: 1.0173286199569702,grad_norm: 0.9999992231134104, iteration: 151503
loss: 0.9990986585617065,grad_norm: 0.9624204814116722, iteration: 151504
loss: 0.9874961376190186,grad_norm: 0.8107064616717425, iteration: 151505
loss: 0.9853221774101257,grad_norm: 0.9999992543292084, iteration: 151506
loss: 1.0595442056655884,grad_norm: 0.9999992699778146, iteration: 151507
loss: 1.0353648662567139,grad_norm: 0.9034457334905224, iteration: 151508
loss: 1.0187842845916748,grad_norm: 0.9999992981980824, iteration: 151509
loss: 1.0291354656219482,grad_norm: 0.999999157339671, iteration: 151510
loss: 0.9798722267150879,grad_norm: 0.9999990607511026, iteration: 151511
loss: 0.9796775579452515,grad_norm: 0.9120131621303814, iteration: 151512
loss: 0.995330810546875,grad_norm: 0.9999991319390982, iteration: 151513
loss: 1.0105937719345093,grad_norm: 0.9999993025781482, iteration: 151514
loss: 0.9809061288833618,grad_norm: 0.9999989793956974, iteration: 151515
loss: 1.0500999689102173,grad_norm: 0.8512796626858211, iteration: 151516
loss: 1.0000334978103638,grad_norm: 0.999999142204306, iteration: 151517
loss: 1.0112218856811523,grad_norm: 0.9999990913210536, iteration: 151518
loss: 0.9938140511512756,grad_norm: 0.8625387447457026, iteration: 151519
loss: 0.9992185831069946,grad_norm: 0.8305158545459599, iteration: 151520
loss: 1.0313985347747803,grad_norm: 0.9999991524750156, iteration: 151521
loss: 1.0172518491744995,grad_norm: 0.9999991988632018, iteration: 151522
loss: 1.0075404644012451,grad_norm: 0.9999991317625783, iteration: 151523
loss: 1.0309109687805176,grad_norm: 0.9678134236904988, iteration: 151524
loss: 1.0050394535064697,grad_norm: 0.8912263041705031, iteration: 151525
loss: 1.0057158470153809,grad_norm: 0.9419492821684835, iteration: 151526
loss: 0.9961642622947693,grad_norm: 0.999999151007348, iteration: 151527
loss: 0.9926449656486511,grad_norm: 0.9766178109161269, iteration: 151528
loss: 0.9875566363334656,grad_norm: 0.9999990053963305, iteration: 151529
loss: 1.0210553407669067,grad_norm: 0.8914403527976447, iteration: 151530
loss: 0.9544199109077454,grad_norm: 0.9999990640992171, iteration: 151531
loss: 0.9976762533187866,grad_norm: 0.9999990601426575, iteration: 151532
loss: 0.977085292339325,grad_norm: 0.999999156133781, iteration: 151533
loss: 1.034808874130249,grad_norm: 0.967467013373981, iteration: 151534
loss: 0.9878509640693665,grad_norm: 0.9999990859581225, iteration: 151535
loss: 1.0218710899353027,grad_norm: 0.9932739906663944, iteration: 151536
loss: 1.0220707654953003,grad_norm: 0.9999991206695259, iteration: 151537
loss: 1.011084794998169,grad_norm: 0.9999990328200592, iteration: 151538
loss: 1.0587327480316162,grad_norm: 0.9999999389886135, iteration: 151539
loss: 0.9887607097625732,grad_norm: 0.8981641532269844, iteration: 151540
loss: 1.0393345355987549,grad_norm: 0.999999483306077, iteration: 151541
loss: 0.9979231357574463,grad_norm: 0.9999991064098424, iteration: 151542
loss: 1.0094094276428223,grad_norm: 0.9153688620492212, iteration: 151543
loss: 0.9850386381149292,grad_norm: 0.9999991394250414, iteration: 151544
loss: 0.9826794266700745,grad_norm: 0.9965252572949369, iteration: 151545
loss: 0.979479193687439,grad_norm: 0.9999990032853189, iteration: 151546
loss: 0.9799907803535461,grad_norm: 0.9170142186765957, iteration: 151547
loss: 0.9900105595588684,grad_norm: 0.8858010000000445, iteration: 151548
loss: 1.0159298181533813,grad_norm: 0.9999992933395032, iteration: 151549
loss: 1.0134994983673096,grad_norm: 0.999999239555273, iteration: 151550
loss: 1.017735481262207,grad_norm: 0.9999993179485751, iteration: 151551
loss: 1.0005779266357422,grad_norm: 0.9291422772374947, iteration: 151552
loss: 0.9986314177513123,grad_norm: 0.9999991146169682, iteration: 151553
loss: 1.005444049835205,grad_norm: 0.9483196661567106, iteration: 151554
loss: 1.0570846796035767,grad_norm: 0.841368632895722, iteration: 151555
loss: 1.0166305303573608,grad_norm: 0.9398431214900459, iteration: 151556
loss: 1.0117106437683105,grad_norm: 0.9999992458040329, iteration: 151557
loss: 1.0178345441818237,grad_norm: 0.8819424152854468, iteration: 151558
loss: 0.9823924899101257,grad_norm: 0.9999990270321045, iteration: 151559
loss: 1.0301142930984497,grad_norm: 0.8931004216568406, iteration: 151560
loss: 0.9947142004966736,grad_norm: 0.9999990807591715, iteration: 151561
loss: 0.9511554837226868,grad_norm: 0.9498389254088436, iteration: 151562
loss: 0.9961063265800476,grad_norm: 0.9999992154011444, iteration: 151563
loss: 0.9949601888656616,grad_norm: 0.9999991284927785, iteration: 151564
loss: 1.0078223943710327,grad_norm: 0.9999990643867736, iteration: 151565
loss: 1.033950686454773,grad_norm: 0.999999162286315, iteration: 151566
loss: 1.0102304220199585,grad_norm: 0.9999991727171224, iteration: 151567
loss: 0.9945899248123169,grad_norm: 0.8667865015083045, iteration: 151568
loss: 1.006780982017517,grad_norm: 0.923071085146779, iteration: 151569
loss: 1.044377326965332,grad_norm: 0.8501188844269518, iteration: 151570
loss: 1.0151616334915161,grad_norm: 0.9705003199638986, iteration: 151571
loss: 0.9619624614715576,grad_norm: 0.9999993356243575, iteration: 151572
loss: 0.9929805994033813,grad_norm: 0.9329048076225701, iteration: 151573
loss: 0.9888606071472168,grad_norm: 0.9999991485437278, iteration: 151574
loss: 1.0170937776565552,grad_norm: 0.9845291311899202, iteration: 151575
loss: 0.998777449131012,grad_norm: 0.8573346205941953, iteration: 151576
loss: 1.0239322185516357,grad_norm: 0.9999998013840808, iteration: 151577
loss: 0.9856978058815002,grad_norm: 0.990323804073773, iteration: 151578
loss: 1.0145295858383179,grad_norm: 0.999999221829503, iteration: 151579
loss: 1.0132594108581543,grad_norm: 0.9321700287329815, iteration: 151580
loss: 0.9986289143562317,grad_norm: 0.8489835969439423, iteration: 151581
loss: 0.9919273257255554,grad_norm: 0.9999992104857848, iteration: 151582
loss: 1.029629111289978,grad_norm: 0.9242691707072815, iteration: 151583
loss: 1.0154446363449097,grad_norm: 0.9999990171503652, iteration: 151584
loss: 1.0185906887054443,grad_norm: 0.9999992396009691, iteration: 151585
loss: 0.9817717671394348,grad_norm: 0.9999989924925363, iteration: 151586
loss: 0.9943135976791382,grad_norm: 0.9999990026014023, iteration: 151587
loss: 1.0112838745117188,grad_norm: 0.9999990430419085, iteration: 151588
loss: 0.977199137210846,grad_norm: 0.999998973158996, iteration: 151589
loss: 0.9927611947059631,grad_norm: 0.9494289982711164, iteration: 151590
loss: 1.0146245956420898,grad_norm: 0.99999908802923, iteration: 151591
loss: 1.0038964748382568,grad_norm: 0.8911838271181298, iteration: 151592
loss: 1.0187921524047852,grad_norm: 0.9999990678442303, iteration: 151593
loss: 1.0150998830795288,grad_norm: 0.9999991586782421, iteration: 151594
loss: 1.0096873044967651,grad_norm: 0.9999989897007122, iteration: 151595
loss: 0.9965121150016785,grad_norm: 0.9999991262245805, iteration: 151596
loss: 1.0088191032409668,grad_norm: 0.9999991564906536, iteration: 151597
loss: 1.0215719938278198,grad_norm: 0.9184024734964604, iteration: 151598
loss: 0.9998067617416382,grad_norm: 0.9999990915208693, iteration: 151599
loss: 1.022706151008606,grad_norm: 0.7924841581416285, iteration: 151600
loss: 0.9793826937675476,grad_norm: 0.9999991604516605, iteration: 151601
loss: 1.0148780345916748,grad_norm: 0.9999991458018335, iteration: 151602
loss: 0.9618204832077026,grad_norm: 0.8998216802761146, iteration: 151603
loss: 0.9898613691329956,grad_norm: 0.9999990632100924, iteration: 151604
loss: 0.9959871172904968,grad_norm: 0.9999991814965679, iteration: 151605
loss: 0.9878359436988831,grad_norm: 0.9999991553603061, iteration: 151606
loss: 1.0012286901474,grad_norm: 0.999998977747736, iteration: 151607
loss: 1.0229660272598267,grad_norm: 0.9530081326397043, iteration: 151608
loss: 0.958740234375,grad_norm: 0.9433178637747225, iteration: 151609
loss: 1.0229705572128296,grad_norm: 0.9999992933740787, iteration: 151610
loss: 0.9912826418876648,grad_norm: 0.9999995593785717, iteration: 151611
loss: 0.9801251292228699,grad_norm: 0.8870310046334735, iteration: 151612
loss: 0.9764581918716431,grad_norm: 0.9999989824222086, iteration: 151613
loss: 1.0070432424545288,grad_norm: 0.8667244142404394, iteration: 151614
loss: 1.036837100982666,grad_norm: 0.8809453421460292, iteration: 151615
loss: 0.9957463145256042,grad_norm: 0.8943033537237907, iteration: 151616
loss: 0.9989591240882874,grad_norm: 0.9999991132046502, iteration: 151617
loss: 0.9953636527061462,grad_norm: 0.9999991761012704, iteration: 151618
loss: 1.0317808389663696,grad_norm: 0.953541376571418, iteration: 151619
loss: 1.0000534057617188,grad_norm: 0.9999992487682525, iteration: 151620
loss: 1.0078190565109253,grad_norm: 0.9999991393549287, iteration: 151621
loss: 1.0013489723205566,grad_norm: 0.9999989666731754, iteration: 151622
loss: 1.0108990669250488,grad_norm: 0.9393656359904912, iteration: 151623
loss: 1.0009868144989014,grad_norm: 0.8815772101050993, iteration: 151624
loss: 1.0209275484085083,grad_norm: 0.9999989877192658, iteration: 151625
loss: 0.9955291748046875,grad_norm: 0.9999992081447966, iteration: 151626
loss: 1.0041390657424927,grad_norm: 0.8736437903289266, iteration: 151627
loss: 1.0013774633407593,grad_norm: 0.9222022666516678, iteration: 151628
loss: 0.9753367304801941,grad_norm: 0.9999990989173653, iteration: 151629
loss: 1.0013983249664307,grad_norm: 0.9999991435799077, iteration: 151630
loss: 0.9676116704940796,grad_norm: 0.9999991181876247, iteration: 151631
loss: 0.9798505306243896,grad_norm: 0.9841897606261585, iteration: 151632
loss: 0.9986481666564941,grad_norm: 0.8812022885658841, iteration: 151633
loss: 0.9821150302886963,grad_norm: 0.9347056824751327, iteration: 151634
loss: 1.0111876726150513,grad_norm: 0.999999023843081, iteration: 151635
loss: 1.0253503322601318,grad_norm: 0.9557778368760584, iteration: 151636
loss: 1.0123366117477417,grad_norm: 0.9999990108439536, iteration: 151637
loss: 1.0420223474502563,grad_norm: 0.9999991985631453, iteration: 151638
loss: 1.0253896713256836,grad_norm: 0.9547167091716003, iteration: 151639
loss: 0.9947335720062256,grad_norm: 0.9999990365319744, iteration: 151640
loss: 1.0092569589614868,grad_norm: 0.8920085503901456, iteration: 151641
loss: 1.0085395574569702,grad_norm: 0.9999991713378512, iteration: 151642
loss: 1.01039457321167,grad_norm: 0.9308836672032379, iteration: 151643
loss: 0.9958310127258301,grad_norm: 0.860943089922289, iteration: 151644
loss: 0.9954527020454407,grad_norm: 0.9676298836261163, iteration: 151645
loss: 0.9725171327590942,grad_norm: 0.8719016792083174, iteration: 151646
loss: 0.9837522506713867,grad_norm: 0.9439249146728844, iteration: 151647
loss: 1.0449845790863037,grad_norm: 0.9999993288272938, iteration: 151648
loss: 1.0132102966308594,grad_norm: 0.999999037183828, iteration: 151649
loss: 0.9899536371231079,grad_norm: 0.999999317727247, iteration: 151650
loss: 0.9772884249687195,grad_norm: 0.9413189535656389, iteration: 151651
loss: 1.0301991701126099,grad_norm: 0.999999307485602, iteration: 151652
loss: 1.0167559385299683,grad_norm: 0.999998995276857, iteration: 151653
loss: 0.9798731803894043,grad_norm: 0.9989587111412207, iteration: 151654
loss: 0.9969867467880249,grad_norm: 0.9490502438362174, iteration: 151655
loss: 1.0107587575912476,grad_norm: 0.9999993743036265, iteration: 151656
loss: 1.0139833688735962,grad_norm: 0.9999989909243063, iteration: 151657
loss: 1.0212764739990234,grad_norm: 0.9999994109370376, iteration: 151658
loss: 0.9794361591339111,grad_norm: 0.9971539394117401, iteration: 151659
loss: 1.0208230018615723,grad_norm: 0.9999991124071541, iteration: 151660
loss: 1.0286446809768677,grad_norm: 0.9999996948048469, iteration: 151661
loss: 1.019897699356079,grad_norm: 0.9583887045419109, iteration: 151662
loss: 1.0619444847106934,grad_norm: 0.9312125137409127, iteration: 151663
loss: 0.9387865662574768,grad_norm: 0.9893319332155115, iteration: 151664
loss: 1.0360769033432007,grad_norm: 0.9176706284478653, iteration: 151665
loss: 0.9719269871711731,grad_norm: 0.9999990860595361, iteration: 151666
loss: 0.974001944065094,grad_norm: 0.9999990642295273, iteration: 151667
loss: 1.0194820165634155,grad_norm: 0.8663481722740303, iteration: 151668
loss: 1.0190364122390747,grad_norm: 0.9886486025776481, iteration: 151669
loss: 1.006706953048706,grad_norm: 0.8793018614988125, iteration: 151670
loss: 0.996232807636261,grad_norm: 0.9999991335884905, iteration: 151671
loss: 1.0149120092391968,grad_norm: 0.9437856578281758, iteration: 151672
loss: 1.0435150861740112,grad_norm: 0.999999092719561, iteration: 151673
loss: 0.9809674024581909,grad_norm: 0.9477612039535286, iteration: 151674
loss: 0.9840691089630127,grad_norm: 0.999998965151448, iteration: 151675
loss: 1.0142838954925537,grad_norm: 0.9167308276049377, iteration: 151676
loss: 0.9734814167022705,grad_norm: 0.9999991129895128, iteration: 151677
loss: 0.9898073673248291,grad_norm: 0.8867267381197373, iteration: 151678
loss: 1.0060293674468994,grad_norm: 0.9999990884116745, iteration: 151679
loss: 0.9922795295715332,grad_norm: 0.999999134921061, iteration: 151680
loss: 1.0156183242797852,grad_norm: 0.9999988811872131, iteration: 151681
loss: 1.0099276304244995,grad_norm: 0.9999990384669458, iteration: 151682
loss: 0.9692085981369019,grad_norm: 0.99999896490746, iteration: 151683
loss: 0.9970045685768127,grad_norm: 0.9178839985048176, iteration: 151684
loss: 1.019931435585022,grad_norm: 0.9489580986478798, iteration: 151685
loss: 1.0000485181808472,grad_norm: 0.999999204723035, iteration: 151686
loss: 1.046856164932251,grad_norm: 0.9999998528509791, iteration: 151687
loss: 0.9874847531318665,grad_norm: 0.9999990026059355, iteration: 151688
loss: 1.0155595541000366,grad_norm: 0.9910621479024931, iteration: 151689
loss: 0.997124969959259,grad_norm: 0.9999991810879919, iteration: 151690
loss: 1.0137841701507568,grad_norm: 0.9999991536761211, iteration: 151691
loss: 0.9846585988998413,grad_norm: 0.8057394514292815, iteration: 151692
loss: 1.014088749885559,grad_norm: 0.9701480810268256, iteration: 151693
loss: 0.9878782033920288,grad_norm: 0.920448597008549, iteration: 151694
loss: 0.9925490021705627,grad_norm: 0.8410229920455415, iteration: 151695
loss: 0.992348313331604,grad_norm: 0.8673716347952578, iteration: 151696
loss: 1.0136514902114868,grad_norm: 0.9610996885735515, iteration: 151697
loss: 0.9927611351013184,grad_norm: 0.9999990872604668, iteration: 151698
loss: 0.9989326596260071,grad_norm: 0.9999991960099298, iteration: 151699
loss: 0.9977663159370422,grad_norm: 0.9999992116913453, iteration: 151700
loss: 1.0259265899658203,grad_norm: 0.9999992516889747, iteration: 151701
loss: 1.0280935764312744,grad_norm: 0.9283208543729969, iteration: 151702
loss: 1.032688021659851,grad_norm: 0.9999991607631793, iteration: 151703
loss: 0.9949743151664734,grad_norm: 0.9544408594024636, iteration: 151704
loss: 1.0229460000991821,grad_norm: 0.9999990762826284, iteration: 151705
loss: 1.0105364322662354,grad_norm: 0.9772443991291104, iteration: 151706
loss: 1.0072845220565796,grad_norm: 0.999999121454259, iteration: 151707
loss: 0.9612529873847961,grad_norm: 0.8795045397835065, iteration: 151708
loss: 0.9966217279434204,grad_norm: 0.99999911514786, iteration: 151709
loss: 0.9996871948242188,grad_norm: 0.9999992669447316, iteration: 151710
loss: 1.0144789218902588,grad_norm: 0.9999991264172631, iteration: 151711
loss: 1.0005215406417847,grad_norm: 0.9812807263872214, iteration: 151712
loss: 1.0062808990478516,grad_norm: 0.9999991126518271, iteration: 151713
loss: 1.0053632259368896,grad_norm: 0.9999992681511513, iteration: 151714
loss: 1.027184247970581,grad_norm: 0.9999991142579806, iteration: 151715
loss: 0.9959391355514526,grad_norm: 0.9157790479567476, iteration: 151716
loss: 0.9921976923942566,grad_norm: 0.8179351862841087, iteration: 151717
loss: 1.0369940996170044,grad_norm: 0.9999991300577672, iteration: 151718
loss: 1.0014017820358276,grad_norm: 0.9999991081662258, iteration: 151719
loss: 0.9991164803504944,grad_norm: 0.9999990724597095, iteration: 151720
loss: 0.9920305609703064,grad_norm: 0.9999992159854156, iteration: 151721
loss: 1.009508490562439,grad_norm: 0.824977750319619, iteration: 151722
loss: 0.9882411360740662,grad_norm: 0.9178178971306677, iteration: 151723
loss: 1.0136457681655884,grad_norm: 0.9999992844239799, iteration: 151724
loss: 1.040177345275879,grad_norm: 0.9999993094336045, iteration: 151725
loss: 0.9968162775039673,grad_norm: 0.9999991970314109, iteration: 151726
loss: 0.9848440885543823,grad_norm: 0.999999052715962, iteration: 151727
loss: 0.9826260805130005,grad_norm: 0.9728346349613061, iteration: 151728
loss: 1.042923092842102,grad_norm: 0.9999991791869378, iteration: 151729
loss: 1.0036133527755737,grad_norm: 0.9999990885821056, iteration: 151730
loss: 0.9824226498603821,grad_norm: 0.9999993301687461, iteration: 151731
loss: 0.9845203161239624,grad_norm: 0.9048048327392346, iteration: 151732
loss: 0.9901478886604309,grad_norm: 0.9910777279682665, iteration: 151733
loss: 1.008162260055542,grad_norm: 0.9999990949254777, iteration: 151734
loss: 1.0166535377502441,grad_norm: 0.8620881729831632, iteration: 151735
loss: 0.9922075271606445,grad_norm: 0.9348046336311893, iteration: 151736
loss: 1.0046926736831665,grad_norm: 0.9297672926447512, iteration: 151737
loss: 1.013712763786316,grad_norm: 0.9999991823633358, iteration: 151738
loss: 0.9904236793518066,grad_norm: 0.9982594591923539, iteration: 151739
loss: 1.0289337635040283,grad_norm: 0.8641734594771974, iteration: 151740
loss: 0.9898743629455566,grad_norm: 0.9999990880402979, iteration: 151741
loss: 1.007211446762085,grad_norm: 0.841600652634319, iteration: 151742
loss: 0.977794885635376,grad_norm: 0.9506370454441615, iteration: 151743
loss: 0.988370418548584,grad_norm: 0.9999990645138347, iteration: 151744
loss: 0.9920612573623657,grad_norm: 0.9999992429067673, iteration: 151745
loss: 1.0303210020065308,grad_norm: 0.9999991865067459, iteration: 151746
loss: 1.017912745475769,grad_norm: 0.9999991647223875, iteration: 151747
loss: 1.046311855316162,grad_norm: 0.9999991757520839, iteration: 151748
loss: 0.9879848957061768,grad_norm: 0.8312707748307328, iteration: 151749
loss: 1.008987307548523,grad_norm: 0.9999991741646813, iteration: 151750
loss: 0.9905052781105042,grad_norm: 0.9222685844996463, iteration: 151751
loss: 1.0013396739959717,grad_norm: 0.8566186952053979, iteration: 151752
loss: 0.9844653606414795,grad_norm: 0.9189918655998485, iteration: 151753
loss: 0.998399019241333,grad_norm: 0.9999989894094686, iteration: 151754
loss: 1.0157617330551147,grad_norm: 0.9818569719799367, iteration: 151755
loss: 0.958885133266449,grad_norm: 0.8598371455195069, iteration: 151756
loss: 0.9935161471366882,grad_norm: 0.9999990638864079, iteration: 151757
loss: 1.0012716054916382,grad_norm: 0.9166659705339472, iteration: 151758
loss: 1.0046037435531616,grad_norm: 0.9999991733925697, iteration: 151759
loss: 0.9792882800102234,grad_norm: 0.9139859538280836, iteration: 151760
loss: 0.9984578490257263,grad_norm: 0.9999990933437041, iteration: 151761
loss: 0.9861356019973755,grad_norm: 0.9999991712965108, iteration: 151762
loss: 1.0072427988052368,grad_norm: 0.9999991382984313, iteration: 151763
loss: 0.9904536604881287,grad_norm: 0.9577490020780818, iteration: 151764
loss: 1.0163378715515137,grad_norm: 0.9976770247808335, iteration: 151765
loss: 1.09725821018219,grad_norm: 0.999999074122589, iteration: 151766
loss: 0.9934327602386475,grad_norm: 0.8913962123595647, iteration: 151767
loss: 1.0241906642913818,grad_norm: 0.9999991488112756, iteration: 151768
loss: 0.9937215447425842,grad_norm: 0.9999991939426184, iteration: 151769
loss: 1.0426222085952759,grad_norm: 0.9999993559725319, iteration: 151770
loss: 1.0131206512451172,grad_norm: 0.9999991556217783, iteration: 151771
loss: 1.0012704133987427,grad_norm: 0.9999990709477462, iteration: 151772
loss: 1.0044128894805908,grad_norm: 0.9667087820146181, iteration: 151773
loss: 0.9652233719825745,grad_norm: 0.8801564785083651, iteration: 151774
loss: 0.9902689456939697,grad_norm: 0.9534909908584035, iteration: 151775
loss: 1.0039801597595215,grad_norm: 0.9905412931062872, iteration: 151776
loss: 0.9779491424560547,grad_norm: 0.9999990000001088, iteration: 151777
loss: 1.0014392137527466,grad_norm: 0.9999991151761761, iteration: 151778
loss: 0.9948291182518005,grad_norm: 0.8984408468139264, iteration: 151779
loss: 0.987389326095581,grad_norm: 0.9999990095108444, iteration: 151780
loss: 1.0293270349502563,grad_norm: 0.9999998513801821, iteration: 151781
loss: 0.9884572625160217,grad_norm: 0.9611515550966162, iteration: 151782
loss: 0.9828643798828125,grad_norm: 0.9192065314031577, iteration: 151783
loss: 0.9643108248710632,grad_norm: 0.862549703054953, iteration: 151784
loss: 0.9888140559196472,grad_norm: 0.9999992294454136, iteration: 151785
loss: 1.00630784034729,grad_norm: 0.8415043929398787, iteration: 151786
loss: 0.9937490820884705,grad_norm: 0.9999993686776376, iteration: 151787
loss: 0.9586973786354065,grad_norm: 0.8986342596237454, iteration: 151788
loss: 0.9895642399787903,grad_norm: 0.8607542802313992, iteration: 151789
loss: 1.0295861959457397,grad_norm: 0.9999997093260566, iteration: 151790
loss: 1.0180672407150269,grad_norm: 0.9694527355209324, iteration: 151791
loss: 1.0003052949905396,grad_norm: 0.8707817462044393, iteration: 151792
loss: 1.0384471416473389,grad_norm: 0.9999990894615605, iteration: 151793
loss: 1.0220510959625244,grad_norm: 0.8893663632395257, iteration: 151794
loss: 1.0018303394317627,grad_norm: 0.9999991296921041, iteration: 151795
loss: 0.992654025554657,grad_norm: 0.8749020554835305, iteration: 151796
loss: 1.0292943716049194,grad_norm: 0.9999993956457173, iteration: 151797
loss: 1.002807855606079,grad_norm: 0.9999990323337056, iteration: 151798
loss: 0.980299711227417,grad_norm: 0.989521669590538, iteration: 151799
loss: 1.0025029182434082,grad_norm: 0.8950464456066624, iteration: 151800
loss: 1.0155366659164429,grad_norm: 0.939153510136551, iteration: 151801
loss: 1.028027057647705,grad_norm: 0.999999150332096, iteration: 151802
loss: 1.0106006860733032,grad_norm: 0.999999106881113, iteration: 151803
loss: 0.984866738319397,grad_norm: 0.9404984585189333, iteration: 151804
loss: 0.9832813143730164,grad_norm: 0.8467459429790622, iteration: 151805
loss: 0.9846009612083435,grad_norm: 0.9645809930573417, iteration: 151806
loss: 1.0030205249786377,grad_norm: 0.9999992480563066, iteration: 151807
loss: 1.0302947759628296,grad_norm: 0.9999993208771816, iteration: 151808
loss: 0.958308219909668,grad_norm: 0.9337736025967925, iteration: 151809
loss: 0.9984518885612488,grad_norm: 0.9999990557364348, iteration: 151810
loss: 1.0050437450408936,grad_norm: 0.9999990895111, iteration: 151811
loss: 1.0352524518966675,grad_norm: 0.9999989775016178, iteration: 151812
loss: 1.0002336502075195,grad_norm: 0.9999992120468814, iteration: 151813
loss: 0.9907535910606384,grad_norm: 0.9719130705044943, iteration: 151814
loss: 0.9542216062545776,grad_norm: 0.9543157398487117, iteration: 151815
loss: 1.0123790502548218,grad_norm: 0.9999990685043394, iteration: 151816
loss: 0.9727011322975159,grad_norm: 0.9255594217878492, iteration: 151817
loss: 0.9485852718353271,grad_norm: 0.9999992216130269, iteration: 151818
loss: 1.0363445281982422,grad_norm: 0.9999991609983835, iteration: 151819
loss: 1.0137181282043457,grad_norm: 0.9999990148960788, iteration: 151820
loss: 1.030671238899231,grad_norm: 0.9999990495756222, iteration: 151821
loss: 1.0006074905395508,grad_norm: 0.8623841403053781, iteration: 151822
loss: 1.0137155055999756,grad_norm: 0.9763480369461953, iteration: 151823
loss: 0.9872768521308899,grad_norm: 0.9189597399326883, iteration: 151824
loss: 1.0427029132843018,grad_norm: 0.999999353750287, iteration: 151825
loss: 1.003031849861145,grad_norm: 0.9999991659342439, iteration: 151826
loss: 1.0285155773162842,grad_norm: 0.956987065912144, iteration: 151827
loss: 1.011592984199524,grad_norm: 0.9999990600898098, iteration: 151828
loss: 1.0419503450393677,grad_norm: 0.9999990479408337, iteration: 151829
loss: 0.9940222501754761,grad_norm: 0.9999991316683015, iteration: 151830
loss: 0.9397439956665039,grad_norm: 0.9999991157106035, iteration: 151831
loss: 0.988013505935669,grad_norm: 0.8588801985454851, iteration: 151832
loss: 1.0095362663269043,grad_norm: 0.8776937341048834, iteration: 151833
loss: 1.0471526384353638,grad_norm: 0.8639656935465427, iteration: 151834
loss: 1.0181126594543457,grad_norm: 0.8725199836888666, iteration: 151835
loss: 0.9833500385284424,grad_norm: 0.9085306398548746, iteration: 151836
loss: 1.0028892755508423,grad_norm: 0.9598670261681629, iteration: 151837
loss: 0.988791286945343,grad_norm: 0.9999990705602911, iteration: 151838
loss: 0.9933433532714844,grad_norm: 0.9999992558084653, iteration: 151839
loss: 0.9929887652397156,grad_norm: 0.9999991667917155, iteration: 151840
loss: 0.9807789921760559,grad_norm: 0.999998964256707, iteration: 151841
loss: 1.0534515380859375,grad_norm: 0.999999287547424, iteration: 151842
loss: 1.001651406288147,grad_norm: 0.9999990708958658, iteration: 151843
loss: 1.0024696588516235,grad_norm: 0.9999990277152238, iteration: 151844
loss: 0.9461550712585449,grad_norm: 0.9999989934519203, iteration: 151845
loss: 1.0564409494400024,grad_norm: 0.9999996223083144, iteration: 151846
loss: 1.0580562353134155,grad_norm: 0.9999993741190216, iteration: 151847
loss: 0.9752132296562195,grad_norm: 0.9089471629188164, iteration: 151848
loss: 1.0230919122695923,grad_norm: 0.9999994742545331, iteration: 151849
loss: 1.0117403268814087,grad_norm: 0.9438475425279211, iteration: 151850
loss: 1.0088444948196411,grad_norm: 0.9999991654740018, iteration: 151851
loss: 1.0114139318466187,grad_norm: 0.8136229779974656, iteration: 151852
loss: 1.0146089792251587,grad_norm: 0.8322013043012557, iteration: 151853
loss: 1.0408326387405396,grad_norm: 0.8858441253691396, iteration: 151854
loss: 1.0246926546096802,grad_norm: 0.9999990543520294, iteration: 151855
loss: 0.9699258804321289,grad_norm: 0.9999990917718575, iteration: 151856
loss: 1.0128326416015625,grad_norm: 0.9999991577669917, iteration: 151857
loss: 1.004589557647705,grad_norm: 0.9401683275834963, iteration: 151858
loss: 0.9903178215026855,grad_norm: 0.9161578744332841, iteration: 151859
loss: 1.0615224838256836,grad_norm: 0.9999991352437098, iteration: 151860
loss: 0.9925854802131653,grad_norm: 0.9086005883054608, iteration: 151861
loss: 0.9746872186660767,grad_norm: 0.9753403818197453, iteration: 151862
loss: 1.0264402627944946,grad_norm: 0.9999991662703954, iteration: 151863
loss: 1.0179803371429443,grad_norm: 0.9326184631704489, iteration: 151864
loss: 0.9893125295639038,grad_norm: 0.9999990773246671, iteration: 151865
loss: 0.9627440571784973,grad_norm: 0.934139313270042, iteration: 151866
loss: 1.0061345100402832,grad_norm: 0.9482462746834377, iteration: 151867
loss: 0.9720445275306702,grad_norm: 0.9999991135570441, iteration: 151868
loss: 1.0053452253341675,grad_norm: 0.9999991865646215, iteration: 151869
loss: 0.9688379764556885,grad_norm: 0.9769428842042354, iteration: 151870
loss: 0.9830152988433838,grad_norm: 0.9999990806343193, iteration: 151871
loss: 0.9863496422767639,grad_norm: 0.9222697920257015, iteration: 151872
loss: 1.014299988746643,grad_norm: 0.9497613174228176, iteration: 151873
loss: 1.0063867568969727,grad_norm: 0.9999991068883389, iteration: 151874
loss: 1.0579415559768677,grad_norm: 0.9976309616194918, iteration: 151875
loss: 1.0210288763046265,grad_norm: 0.9999989321146723, iteration: 151876
loss: 0.9757433533668518,grad_norm: 0.9487491502167019, iteration: 151877
loss: 0.9820975661277771,grad_norm: 0.9999990186945081, iteration: 151878
loss: 1.0485433340072632,grad_norm: 0.9999992667573152, iteration: 151879
loss: 0.9918548464775085,grad_norm: 0.9999991778722762, iteration: 151880
loss: 1.0482814311981201,grad_norm: 0.99999988539181, iteration: 151881
loss: 0.9719027876853943,grad_norm: 0.9638726832230837, iteration: 151882
loss: 1.0082601308822632,grad_norm: 0.9617782721673837, iteration: 151883
loss: 0.9982732534408569,grad_norm: 0.9294324658561304, iteration: 151884
loss: 1.0233354568481445,grad_norm: 0.9999993613146262, iteration: 151885
loss: 1.0157718658447266,grad_norm: 0.9999992170439631, iteration: 151886
loss: 0.9861100912094116,grad_norm: 0.9999989771821269, iteration: 151887
loss: 0.9894827008247375,grad_norm: 0.9999992430934949, iteration: 151888
loss: 1.0730669498443604,grad_norm: 0.9227958116549817, iteration: 151889
loss: 0.9613425135612488,grad_norm: 0.9999991915180061, iteration: 151890
loss: 1.004030704498291,grad_norm: 0.9999990849633201, iteration: 151891
loss: 1.0100265741348267,grad_norm: 0.9732078989094816, iteration: 151892
loss: 1.0373835563659668,grad_norm: 0.960243966030936, iteration: 151893
loss: 1.0290451049804688,grad_norm: 0.9791295409510111, iteration: 151894
loss: 0.9963757991790771,grad_norm: 0.999999706467854, iteration: 151895
loss: 0.9415065050125122,grad_norm: 0.8470412772522492, iteration: 151896
loss: 0.9935773611068726,grad_norm: 0.9896169756829547, iteration: 151897
loss: 0.9747564196586609,grad_norm: 0.9999992331439579, iteration: 151898
loss: 0.9816729426383972,grad_norm: 0.9999991144650575, iteration: 151899
loss: 1.006316900253296,grad_norm: 0.9999992073896075, iteration: 151900
loss: 1.0358285903930664,grad_norm: 0.9999990037343912, iteration: 151901
loss: 0.9566347002983093,grad_norm: 0.9999991924700077, iteration: 151902
loss: 0.9896892309188843,grad_norm: 0.8756001785377847, iteration: 151903
loss: 0.9627020359039307,grad_norm: 0.9999990530828368, iteration: 151904
loss: 0.9826058149337769,grad_norm: 0.9835802068004238, iteration: 151905
loss: 0.9762110114097595,grad_norm: 0.9519872811414783, iteration: 151906
loss: 0.987058162689209,grad_norm: 0.9999991905711902, iteration: 151907
loss: 1.005043387413025,grad_norm: 0.9371807049980055, iteration: 151908
loss: 0.9912987351417542,grad_norm: 0.9705119298787386, iteration: 151909
loss: 1.0108091831207275,grad_norm: 0.9999990952458716, iteration: 151910
loss: 1.0223300457000732,grad_norm: 0.9982470474715833, iteration: 151911
loss: 0.9542823433876038,grad_norm: 0.999999170680125, iteration: 151912
loss: 0.9972794651985168,grad_norm: 0.9999990368771189, iteration: 151913
loss: 1.0046484470367432,grad_norm: 0.9009129754203553, iteration: 151914
loss: 1.0319267511367798,grad_norm: 0.969702662233443, iteration: 151915
loss: 1.0036108493804932,grad_norm: 0.9999993050096445, iteration: 151916
loss: 0.9811373949050903,grad_norm: 0.9703142500381943, iteration: 151917
loss: 1.0116902589797974,grad_norm: 0.9999990854083914, iteration: 151918
loss: 0.9884101748466492,grad_norm: 0.9410359922842957, iteration: 151919
loss: 1.0242457389831543,grad_norm: 0.99999903021249, iteration: 151920
loss: 1.04216730594635,grad_norm: 0.9999993098423339, iteration: 151921
loss: 1.0035467147827148,grad_norm: 0.9074763446375018, iteration: 151922
loss: 1.0422495603561401,grad_norm: 0.9724927587108118, iteration: 151923
loss: 1.0207834243774414,grad_norm: 0.821807379814059, iteration: 151924
loss: 0.9601308107376099,grad_norm: 0.9999992701955622, iteration: 151925
loss: 0.9739102721214294,grad_norm: 0.9537100359606856, iteration: 151926
loss: 1.0255986452102661,grad_norm: 0.8131676896888491, iteration: 151927
loss: 1.0049734115600586,grad_norm: 0.9543149109089164, iteration: 151928
loss: 1.001874566078186,grad_norm: 0.9188958246944516, iteration: 151929
loss: 0.9758036136627197,grad_norm: 0.9136238757932206, iteration: 151930
loss: 0.9906494617462158,grad_norm: 0.9626606389107781, iteration: 151931
loss: 1.0442841053009033,grad_norm: 0.9154268336285516, iteration: 151932
loss: 0.9476156830787659,grad_norm: 0.996144524617907, iteration: 151933
loss: 1.0351524353027344,grad_norm: 0.9999990746409393, iteration: 151934
loss: 0.9979130625724792,grad_norm: 0.8562749618898108, iteration: 151935
loss: 1.026565670967102,grad_norm: 0.9999992249728923, iteration: 151936
loss: 1.019860029220581,grad_norm: 0.999999021758975, iteration: 151937
loss: 1.0132572650909424,grad_norm: 0.9999991521247037, iteration: 151938
loss: 0.9826125502586365,grad_norm: 0.9437443320688719, iteration: 151939
loss: 0.9688882231712341,grad_norm: 0.9999991855402616, iteration: 151940
loss: 0.9817160964012146,grad_norm: 0.9999989927957489, iteration: 151941
loss: 1.0087755918502808,grad_norm: 0.9259839545435959, iteration: 151942
loss: 0.9824254512786865,grad_norm: 0.9999990626979963, iteration: 151943
loss: 1.0313925743103027,grad_norm: 0.7965780007810925, iteration: 151944
loss: 1.0059865713119507,grad_norm: 0.9115456395256987, iteration: 151945
loss: 0.9919666051864624,grad_norm: 0.8813310450739708, iteration: 151946
loss: 1.0111808776855469,grad_norm: 0.9555088619616279, iteration: 151947
loss: 1.0286263227462769,grad_norm: 0.9930666957785685, iteration: 151948
loss: 0.9977913498878479,grad_norm: 0.943662935940726, iteration: 151949
loss: 0.9901091456413269,grad_norm: 0.9999991591820642, iteration: 151950
loss: 1.0166486501693726,grad_norm: 0.7957538257167153, iteration: 151951
loss: 1.016119122505188,grad_norm: 0.9491200180445739, iteration: 151952
loss: 1.0325244665145874,grad_norm: 0.9999996399687373, iteration: 151953
loss: 1.0374157428741455,grad_norm: 0.999999609003298, iteration: 151954
loss: 1.0330177545547485,grad_norm: 0.9999992700302263, iteration: 151955
loss: 1.033906102180481,grad_norm: 0.999999149618866, iteration: 151956
loss: 1.0068186521530151,grad_norm: 0.986524739941798, iteration: 151957
loss: 1.0090808868408203,grad_norm: 0.8419527171315636, iteration: 151958
loss: 0.9728383421897888,grad_norm: 0.9999990765086806, iteration: 151959
loss: 0.989852786064148,grad_norm: 0.9999989656927618, iteration: 151960
loss: 1.0314987897872925,grad_norm: 0.9999992106870407, iteration: 151961
loss: 0.9724379181861877,grad_norm: 0.9722381881223213, iteration: 151962
loss: 0.9919411540031433,grad_norm: 0.9814657821639916, iteration: 151963
loss: 0.9868510961532593,grad_norm: 0.9999990153462746, iteration: 151964
loss: 0.9954236149787903,grad_norm: 0.9783084385421116, iteration: 151965
loss: 1.025958776473999,grad_norm: 0.9999991572799501, iteration: 151966
loss: 0.9829135537147522,grad_norm: 0.9277417131962771, iteration: 151967
loss: 0.9878541827201843,grad_norm: 0.9999992331625283, iteration: 151968
loss: 0.9503085613250732,grad_norm: 0.969653392999105, iteration: 151969
loss: 1.005905032157898,grad_norm: 0.9014038908967608, iteration: 151970
loss: 1.0192302465438843,grad_norm: 0.8667766557778382, iteration: 151971
loss: 1.0275952816009521,grad_norm: 0.9999995996201714, iteration: 151972
loss: 0.9891305565834045,grad_norm: 0.9819188133233764, iteration: 151973
loss: 1.0130342245101929,grad_norm: 0.8267505916953681, iteration: 151974
loss: 1.002518892288208,grad_norm: 0.9999990549206723, iteration: 151975
loss: 1.0155583620071411,grad_norm: 0.9999990456910149, iteration: 151976
loss: 0.9984588623046875,grad_norm: 0.9999989297571482, iteration: 151977
loss: 1.0192558765411377,grad_norm: 0.9999991374117158, iteration: 151978
loss: 0.992182195186615,grad_norm: 0.9999990071586846, iteration: 151979
loss: 1.0091073513031006,grad_norm: 0.8495956578698054, iteration: 151980
loss: 1.0397415161132812,grad_norm: 0.9999992824399618, iteration: 151981
loss: 1.0004100799560547,grad_norm: 0.9999991329298864, iteration: 151982
loss: 1.0521080493927002,grad_norm: 0.999999210674846, iteration: 151983
loss: 0.9965505003929138,grad_norm: 0.9999991788636546, iteration: 151984
loss: 0.9874849319458008,grad_norm: 0.9999990436223086, iteration: 151985
loss: 1.0234087705612183,grad_norm: 0.8928243659935734, iteration: 151986
loss: 1.0057237148284912,grad_norm: 0.9864714933886192, iteration: 151987
loss: 0.983421266078949,grad_norm: 0.9580510582066698, iteration: 151988
loss: 1.0199546813964844,grad_norm: 0.9999990810491013, iteration: 151989
loss: 0.9453531503677368,grad_norm: 0.8725577401802033, iteration: 151990
loss: 0.9879336357116699,grad_norm: 0.9999990758947841, iteration: 151991
loss: 0.9885862469673157,grad_norm: 0.981327522388615, iteration: 151992
loss: 0.9714041948318481,grad_norm: 0.8541214322170555, iteration: 151993
loss: 0.9794486165046692,grad_norm: 0.9999992216551804, iteration: 151994
loss: 0.9902735352516174,grad_norm: 0.9999991762865201, iteration: 151995
loss: 0.9867414832115173,grad_norm: 0.9999990967147142, iteration: 151996
loss: 0.9884699583053589,grad_norm: 0.9999991870181699, iteration: 151997
loss: 1.0067981481552124,grad_norm: 0.9999989930850747, iteration: 151998
loss: 0.9635211825370789,grad_norm: 0.9563297988856527, iteration: 151999
loss: 0.9859175086021423,grad_norm: 0.9878932246003441, iteration: 152000
loss: 1.0039384365081787,grad_norm: 0.9999991571013899, iteration: 152001
loss: 0.986336886882782,grad_norm: 0.7969300839806641, iteration: 152002
loss: 0.9957025051116943,grad_norm: 0.9999990778472675, iteration: 152003
loss: 0.9910931587219238,grad_norm: 0.9102228842375817, iteration: 152004
loss: 0.9911845326423645,grad_norm: 0.8937048841479286, iteration: 152005
loss: 0.9921475648880005,grad_norm: 0.9800192276536354, iteration: 152006
loss: 1.0198594331741333,grad_norm: 0.9999990922183132, iteration: 152007
loss: 0.986057460308075,grad_norm: 0.9439892849324049, iteration: 152008
loss: 1.01573646068573,grad_norm: 0.9999992410722921, iteration: 152009
loss: 1.0287803411483765,grad_norm: 0.9999991285002436, iteration: 152010
loss: 0.9852462410926819,grad_norm: 0.9631944347804009, iteration: 152011
loss: 0.9984254837036133,grad_norm: 0.9505555750439156, iteration: 152012
loss: 1.0313881635665894,grad_norm: 0.9999992155476375, iteration: 152013
loss: 0.9737036228179932,grad_norm: 0.9999990976501735, iteration: 152014
loss: 0.971768319606781,grad_norm: 0.9999993188062695, iteration: 152015
loss: 0.9920417666435242,grad_norm: 0.9993955615740697, iteration: 152016
loss: 0.9910853505134583,grad_norm: 0.9999990997087941, iteration: 152017
loss: 0.9953723549842834,grad_norm: 0.9999990576756579, iteration: 152018
loss: 1.0141061544418335,grad_norm: 0.9999990040591561, iteration: 152019
loss: 0.9782284498214722,grad_norm: 0.8945739780386545, iteration: 152020
loss: 0.9702286124229431,grad_norm: 0.9999990773714209, iteration: 152021
loss: 1.0056507587432861,grad_norm: 0.9999993896293307, iteration: 152022
loss: 1.0187479257583618,grad_norm: 0.9999991804273046, iteration: 152023
loss: 0.9464103579521179,grad_norm: 0.961289243606635, iteration: 152024
loss: 1.0313694477081299,grad_norm: 0.8886477740218544, iteration: 152025
loss: 1.0189791917800903,grad_norm: 0.9999990544581517, iteration: 152026
loss: 0.9861431121826172,grad_norm: 0.9999991608989377, iteration: 152027
loss: 1.0246018171310425,grad_norm: 0.9999991288134747, iteration: 152028
loss: 1.00093412399292,grad_norm: 0.9999991978359949, iteration: 152029
loss: 0.9804624319076538,grad_norm: 0.9980391909971932, iteration: 152030
loss: 0.97874516248703,grad_norm: 0.9599234003356116, iteration: 152031
loss: 0.9953882098197937,grad_norm: 0.9999992473395313, iteration: 152032
loss: 0.9846706390380859,grad_norm: 0.9999992377585706, iteration: 152033
loss: 0.9616318345069885,grad_norm: 0.9999990881644164, iteration: 152034
loss: 1.0094114542007446,grad_norm: 0.8756180776090204, iteration: 152035
loss: 1.0122332572937012,grad_norm: 0.9999990344585277, iteration: 152036
loss: 1.0000276565551758,grad_norm: 0.9999991064943123, iteration: 152037
loss: 1.028180718421936,grad_norm: 0.9724999237236915, iteration: 152038
loss: 0.9538324475288391,grad_norm: 0.9192617265291824, iteration: 152039
loss: 1.0168503522872925,grad_norm: 0.9999991504895559, iteration: 152040
loss: 1.0131655931472778,grad_norm: 0.8804351940628676, iteration: 152041
loss: 1.047163486480713,grad_norm: 0.9697011130483748, iteration: 152042
loss: 0.9948256015777588,grad_norm: 0.9448214070533985, iteration: 152043
loss: 1.0373717546463013,grad_norm: 0.9379182827591569, iteration: 152044
loss: 1.0102126598358154,grad_norm: 0.9999992437390914, iteration: 152045
loss: 0.999179482460022,grad_norm: 0.9562946538905721, iteration: 152046
loss: 0.9761691689491272,grad_norm: 0.9338592065814972, iteration: 152047
loss: 1.0030499696731567,grad_norm: 0.9331025663483123, iteration: 152048
loss: 0.97142493724823,grad_norm: 0.9554146886368604, iteration: 152049
loss: 1.0303282737731934,grad_norm: 0.8363340563345807, iteration: 152050
loss: 0.9974432587623596,grad_norm: 0.9999989934865579, iteration: 152051
loss: 0.9790292382240295,grad_norm: 0.9556129938267627, iteration: 152052
loss: 0.9963504076004028,grad_norm: 0.9383028203645002, iteration: 152053
loss: 1.1042085886001587,grad_norm: 0.9999999090325448, iteration: 152054
loss: 1.002142071723938,grad_norm: 0.97290880445094, iteration: 152055
loss: 0.9978300929069519,grad_norm: 0.9481008397432146, iteration: 152056
loss: 0.9835811853408813,grad_norm: 0.9999991725722448, iteration: 152057
loss: 0.9991350173950195,grad_norm: 0.7666608406314273, iteration: 152058
loss: 0.9973847270011902,grad_norm: 0.9654224423960485, iteration: 152059
loss: 1.0003365278244019,grad_norm: 0.9999990250985206, iteration: 152060
loss: 1.0023655891418457,grad_norm: 0.9792916815799495, iteration: 152061
loss: 1.0018202066421509,grad_norm: 0.9093464569313604, iteration: 152062
loss: 1.020391821861267,grad_norm: 0.9999992202935443, iteration: 152063
loss: 1.0117504596710205,grad_norm: 0.8957172275726685, iteration: 152064
loss: 0.9949840903282166,grad_norm: 0.9999991124663989, iteration: 152065
loss: 0.9815086722373962,grad_norm: 0.8952188212578618, iteration: 152066
loss: 0.9977768659591675,grad_norm: 0.9999991709533504, iteration: 152067
loss: 0.990619421005249,grad_norm: 0.999999198662581, iteration: 152068
loss: 0.9773215651512146,grad_norm: 0.94406939067506, iteration: 152069
loss: 0.9815276265144348,grad_norm: 1.0000001345884462, iteration: 152070
loss: 1.0256752967834473,grad_norm: 0.9999989743766536, iteration: 152071
loss: 0.9887760281562805,grad_norm: 0.9999991691078148, iteration: 152072
loss: 0.994475245475769,grad_norm: 0.999999280826384, iteration: 152073
loss: 1.0491394996643066,grad_norm: 0.9541849926108795, iteration: 152074
loss: 1.0463569164276123,grad_norm: 0.9999990652205766, iteration: 152075
loss: 0.9897400736808777,grad_norm: 0.999998974831776, iteration: 152076
loss: 0.9839497208595276,grad_norm: 0.9212810773539282, iteration: 152077
loss: 1.0115000009536743,grad_norm: 0.9999993150146015, iteration: 152078
loss: 1.0111677646636963,grad_norm: 0.9905692673642089, iteration: 152079
loss: 1.0111154317855835,grad_norm: 0.9988169184784939, iteration: 152080
loss: 0.9893966913223267,grad_norm: 0.9619812618508029, iteration: 152081
loss: 1.0253452062606812,grad_norm: 0.9999991850143336, iteration: 152082
loss: 1.0029404163360596,grad_norm: 0.9999991122169113, iteration: 152083
loss: 0.9737818837165833,grad_norm: 0.9414065262357662, iteration: 152084
loss: 1.0076197385787964,grad_norm: 0.992251563987238, iteration: 152085
loss: 1.007452130317688,grad_norm: 0.999998976712826, iteration: 152086
loss: 0.9914253950119019,grad_norm: 0.9836995367799464, iteration: 152087
loss: 0.9939825534820557,grad_norm: 0.9999990701707749, iteration: 152088
loss: 1.0229357481002808,grad_norm: 0.9999991577282644, iteration: 152089
loss: 1.0016610622406006,grad_norm: 0.999999174154069, iteration: 152090
loss: 1.0075052976608276,grad_norm: 0.8914398710961471, iteration: 152091
loss: 1.003761649131775,grad_norm: 0.999998996690374, iteration: 152092
loss: 1.0086766481399536,grad_norm: 0.8943040761564844, iteration: 152093
loss: 0.9900445938110352,grad_norm: 0.9999992154548966, iteration: 152094
loss: 1.027894377708435,grad_norm: 0.8256559302424971, iteration: 152095
loss: 1.0955411195755005,grad_norm: 1.0000000831304212, iteration: 152096
loss: 1.0248892307281494,grad_norm: 0.9516595151849643, iteration: 152097
loss: 1.0073680877685547,grad_norm: 0.9999991955076161, iteration: 152098
loss: 1.0163100957870483,grad_norm: 0.9999990454537065, iteration: 152099
loss: 1.009426236152649,grad_norm: 0.9999990210361692, iteration: 152100
loss: 1.0190374851226807,grad_norm: 0.9999990320897668, iteration: 152101
loss: 0.993986189365387,grad_norm: 0.8887216677073312, iteration: 152102
loss: 0.9424226880073547,grad_norm: 0.9999992227716695, iteration: 152103
loss: 1.0187221765518188,grad_norm: 0.8880411972605513, iteration: 152104
loss: 0.9767739176750183,grad_norm: 0.9999992785963026, iteration: 152105
loss: 1.0211803913116455,grad_norm: 0.9999990777658385, iteration: 152106
loss: 0.9802468419075012,grad_norm: 0.9333436617459078, iteration: 152107
loss: 1.0264906883239746,grad_norm: 0.9999991612958731, iteration: 152108
loss: 0.9730263352394104,grad_norm: 0.8839932511812876, iteration: 152109
loss: 0.9960474967956543,grad_norm: 0.9999990063535967, iteration: 152110
loss: 0.9661818742752075,grad_norm: 0.999999263503666, iteration: 152111
loss: 1.0568228960037231,grad_norm: 0.9999991002689085, iteration: 152112
loss: 0.9758629202842712,grad_norm: 0.9598475005882787, iteration: 152113
loss: 0.9691134691238403,grad_norm: 0.9426589612514131, iteration: 152114
loss: 0.9718413352966309,grad_norm: 0.9999994041527415, iteration: 152115
loss: 0.9986564517021179,grad_norm: 0.9194122453102975, iteration: 152116
loss: 0.994846522808075,grad_norm: 0.9899718399351214, iteration: 152117
loss: 0.9798830151557922,grad_norm: 0.9999991275117714, iteration: 152118
loss: 0.9864541888237,grad_norm: 0.9999991645048647, iteration: 152119
loss: 0.9738215208053589,grad_norm: 0.9999990839800346, iteration: 152120
loss: 1.0018845796585083,grad_norm: 0.9999989448779306, iteration: 152121
loss: 0.9680110216140747,grad_norm: 0.9888734642122602, iteration: 152122
loss: 0.9772775173187256,grad_norm: 0.9576978190285805, iteration: 152123
loss: 1.004386067390442,grad_norm: 0.9701268761480669, iteration: 152124
loss: 1.014790415763855,grad_norm: 0.9999991551210021, iteration: 152125
loss: 0.9773150682449341,grad_norm: 0.9078866126406254, iteration: 152126
loss: 1.0083824396133423,grad_norm: 0.9999992596704647, iteration: 152127
loss: 0.9756226539611816,grad_norm: 0.9999991805203966, iteration: 152128
loss: 1.0347994565963745,grad_norm: 0.9999997435966785, iteration: 152129
loss: 0.982485294342041,grad_norm: 0.9999990254489359, iteration: 152130
loss: 0.9672382473945618,grad_norm: 0.9860525496495532, iteration: 152131
loss: 1.0055758953094482,grad_norm: 0.9673048711650738, iteration: 152132
loss: 0.9691648483276367,grad_norm: 0.9999990321925685, iteration: 152133
loss: 0.9972413778305054,grad_norm: 0.9999990294555489, iteration: 152134
loss: 1.0120446681976318,grad_norm: 0.999999220147121, iteration: 152135
loss: 1.0054137706756592,grad_norm: 0.9999991843541467, iteration: 152136
loss: 1.0082114934921265,grad_norm: 0.9999990758088805, iteration: 152137
loss: 1.0104539394378662,grad_norm: 0.9025773124706432, iteration: 152138
loss: 0.9827030897140503,grad_norm: 0.90542588954241, iteration: 152139
loss: 0.9905218482017517,grad_norm: 0.9999992651473403, iteration: 152140
loss: 1.0307199954986572,grad_norm: 0.9944392013021073, iteration: 152141
loss: 0.9980140328407288,grad_norm: 0.9893535669524444, iteration: 152142
loss: 0.9809385538101196,grad_norm: 0.9999991799991835, iteration: 152143
loss: 1.0122731924057007,grad_norm: 0.999999152148665, iteration: 152144
loss: 1.0173618793487549,grad_norm: 0.9999993447590512, iteration: 152145
loss: 0.9813423752784729,grad_norm: 0.942244658976914, iteration: 152146
loss: 0.9975905418395996,grad_norm: 0.9999992077019753, iteration: 152147
loss: 0.992321252822876,grad_norm: 0.999999093460474, iteration: 152148
loss: 0.9919345378875732,grad_norm: 0.9999992054010598, iteration: 152149
loss: 1.0079337358474731,grad_norm: 0.999999155962604, iteration: 152150
loss: 0.9998807311058044,grad_norm: 0.9512093090554006, iteration: 152151
loss: 0.9856135249137878,grad_norm: 0.9999991142824997, iteration: 152152
loss: 0.9936225414276123,grad_norm: 0.9013726542146977, iteration: 152153
loss: 1.0254696607589722,grad_norm: 0.9745071315220193, iteration: 152154
loss: 1.0280890464782715,grad_norm: 0.99999907619559, iteration: 152155
loss: 0.9772564768791199,grad_norm: 0.99999920819106, iteration: 152156
loss: 1.0150667428970337,grad_norm: 0.9126763132732523, iteration: 152157
loss: 0.9826095700263977,grad_norm: 0.9516578619900936, iteration: 152158
loss: 1.037200689315796,grad_norm: 0.9999991933014295, iteration: 152159
loss: 1.0110880136489868,grad_norm: 0.9717444141767961, iteration: 152160
loss: 0.9837335348129272,grad_norm: 0.9999992106244036, iteration: 152161
loss: 0.9501627683639526,grad_norm: 0.9607173340048677, iteration: 152162
loss: 1.0017307996749878,grad_norm: 0.9903060273839123, iteration: 152163
loss: 0.9579936265945435,grad_norm: 0.9999992239684247, iteration: 152164
loss: 1.0042781829833984,grad_norm: 0.875904096587211, iteration: 152165
loss: 0.9646841883659363,grad_norm: 0.9999990607071323, iteration: 152166
loss: 0.9995793104171753,grad_norm: 0.9999991157300827, iteration: 152167
loss: 0.9879780411720276,grad_norm: 0.9999991444900733, iteration: 152168
loss: 1.0388643741607666,grad_norm: 0.9999991068874652, iteration: 152169
loss: 0.9774695634841919,grad_norm: 0.9999989968270795, iteration: 152170
loss: 1.0126521587371826,grad_norm: 0.9297678428421626, iteration: 152171
loss: 0.9916215538978577,grad_norm: 0.9999990311108514, iteration: 152172
loss: 0.984650194644928,grad_norm: 0.9999990515137774, iteration: 152173
loss: 1.0085079669952393,grad_norm: 0.9999991032806469, iteration: 152174
loss: 1.0005558729171753,grad_norm: 0.9999992121001775, iteration: 152175
loss: 0.9891889691352844,grad_norm: 0.9834015187383549, iteration: 152176
loss: 0.9848899245262146,grad_norm: 0.9137787246012241, iteration: 152177
loss: 1.0058646202087402,grad_norm: 0.8756336266052864, iteration: 152178
loss: 0.9867714047431946,grad_norm: 0.9994833364481126, iteration: 152179
loss: 0.9776156544685364,grad_norm: 0.9999992755291283, iteration: 152180
loss: 0.9657998085021973,grad_norm: 0.9999991128882891, iteration: 152181
loss: 1.0142039060592651,grad_norm: 0.9999991806078476, iteration: 152182
loss: 0.9976678490638733,grad_norm: 0.9999991940242771, iteration: 152183
loss: 0.9765475392341614,grad_norm: 0.9181629607380548, iteration: 152184
loss: 0.9792892336845398,grad_norm: 0.8806782612937764, iteration: 152185
loss: 0.985790491104126,grad_norm: 0.8969741371171224, iteration: 152186
loss: 1.0365854501724243,grad_norm: 0.9999996174048557, iteration: 152187
loss: 0.9721435904502869,grad_norm: 0.9548103195421455, iteration: 152188
loss: 1.0057041645050049,grad_norm: 0.9999990665454012, iteration: 152189
loss: 0.985084056854248,grad_norm: 0.9678442675774065, iteration: 152190
loss: 1.0127958059310913,grad_norm: 0.9999992186386327, iteration: 152191
loss: 1.0326424837112427,grad_norm: 0.868712102025826, iteration: 152192
loss: 1.0029679536819458,grad_norm: 0.9923949037338362, iteration: 152193
loss: 0.9979632496833801,grad_norm: 0.9999991163359828, iteration: 152194
loss: 1.022200345993042,grad_norm: 0.9999991098816252, iteration: 152195
loss: 1.0071038007736206,grad_norm: 0.866463823784174, iteration: 152196
loss: 1.020591378211975,grad_norm: 0.931596292587485, iteration: 152197
loss: 1.04274582862854,grad_norm: 0.9999991667232335, iteration: 152198
loss: 1.0045270919799805,grad_norm: 0.9999991809380976, iteration: 152199
loss: 1.007351040840149,grad_norm: 0.9602300501008096, iteration: 152200
loss: 0.9934517741203308,grad_norm: 0.9500598578956156, iteration: 152201
loss: 1.0203157663345337,grad_norm: 0.9999989271126242, iteration: 152202
loss: 1.0188672542572021,grad_norm: 0.9999991039227427, iteration: 152203
loss: 0.9971285462379456,grad_norm: 0.9672887999348388, iteration: 152204
loss: 0.9797373414039612,grad_norm: 0.9999990569482161, iteration: 152205
loss: 0.9931345582008362,grad_norm: 0.9999989668353815, iteration: 152206
loss: 1.0184880495071411,grad_norm: 0.9999991863423748, iteration: 152207
loss: 1.0207021236419678,grad_norm: 0.8756600331002979, iteration: 152208
loss: 1.0338523387908936,grad_norm: 0.9999991729966388, iteration: 152209
loss: 1.0102839469909668,grad_norm: 0.9999990389557899, iteration: 152210
loss: 1.0335931777954102,grad_norm: 0.9999997255067834, iteration: 152211
loss: 1.0122466087341309,grad_norm: 0.9340681652402455, iteration: 152212
loss: 1.0062295198440552,grad_norm: 0.9999989932494205, iteration: 152213
loss: 0.9871935248374939,grad_norm: 0.9999991982401236, iteration: 152214
loss: 1.0100834369659424,grad_norm: 0.9999990417294425, iteration: 152215
loss: 1.024802803993225,grad_norm: 0.7882909513962045, iteration: 152216
loss: 0.976537823677063,grad_norm: 0.9999993077389028, iteration: 152217
loss: 1.0112491846084595,grad_norm: 0.9999992130134056, iteration: 152218
loss: 0.9656158089637756,grad_norm: 0.9999992519461086, iteration: 152219
loss: 1.0127332210540771,grad_norm: 0.9988750544728555, iteration: 152220
loss: 0.9750624299049377,grad_norm: 0.999999135919769, iteration: 152221
loss: 1.0254617929458618,grad_norm: 0.9999993019524661, iteration: 152222
loss: 1.0134423971176147,grad_norm: 0.9789475724588813, iteration: 152223
loss: 1.003294825553894,grad_norm: 0.9352600326691413, iteration: 152224
loss: 1.042429804801941,grad_norm: 0.9999991020974835, iteration: 152225
loss: 0.9988822340965271,grad_norm: 0.9999991952150913, iteration: 152226
loss: 1.0462150573730469,grad_norm: 0.9999992780097914, iteration: 152227
loss: 1.0207960605621338,grad_norm: 0.9839084354540912, iteration: 152228
loss: 0.9943881034851074,grad_norm: 0.9569560643682329, iteration: 152229
loss: 1.0214786529541016,grad_norm: 0.9999991773973252, iteration: 152230
loss: 1.0022876262664795,grad_norm: 0.9610440897528713, iteration: 152231
loss: 0.9973976016044617,grad_norm: 0.9999991392942051, iteration: 152232
loss: 0.9812346696853638,grad_norm: 0.9999990910657726, iteration: 152233
loss: 0.9899620413780212,grad_norm: 0.9999989999338105, iteration: 152234
loss: 1.0012991428375244,grad_norm: 0.8889896044999199, iteration: 152235
loss: 0.9545353055000305,grad_norm: 0.9825535856212815, iteration: 152236
loss: 1.0058499574661255,grad_norm: 0.9999991982472354, iteration: 152237
loss: 1.010972499847412,grad_norm: 0.9999991282785047, iteration: 152238
loss: 1.0061129331588745,grad_norm: 0.9999992477243351, iteration: 152239
loss: 1.0102272033691406,grad_norm: 0.9999990164591565, iteration: 152240
loss: 0.9934578537940979,grad_norm: 0.9999990067567512, iteration: 152241
loss: 1.017166256904602,grad_norm: 0.9999995847964169, iteration: 152242
loss: 1.0067914724349976,grad_norm: 0.9410862559011831, iteration: 152243
loss: 0.9977763295173645,grad_norm: 0.8722331963915815, iteration: 152244
loss: 1.0285062789916992,grad_norm: 0.9704809006015787, iteration: 152245
loss: 0.9806232452392578,grad_norm: 0.9173582858732201, iteration: 152246
loss: 0.9919747114181519,grad_norm: 0.9999992576988469, iteration: 152247
loss: 1.019443392753601,grad_norm: 0.9999992234511128, iteration: 152248
loss: 1.0097416639328003,grad_norm: 0.9719173383469033, iteration: 152249
loss: 0.998911440372467,grad_norm: 0.9999989862565863, iteration: 152250
loss: 0.9872047305107117,grad_norm: 0.9405432552915928, iteration: 152251
loss: 1.0091370344161987,grad_norm: 0.9649129130426217, iteration: 152252
loss: 1.0828394889831543,grad_norm: 0.9999991545345086, iteration: 152253
loss: 1.0364245176315308,grad_norm: 0.999999064830817, iteration: 152254
loss: 0.9711264371871948,grad_norm: 0.9999990826804197, iteration: 152255
loss: 1.0038838386535645,grad_norm: 0.9999991018073623, iteration: 152256
loss: 0.9766364097595215,grad_norm: 0.8932102927399663, iteration: 152257
loss: 1.0096970796585083,grad_norm: 0.9999991023929904, iteration: 152258
loss: 0.9980441331863403,grad_norm: 0.9497892423073837, iteration: 152259
loss: 1.0057028532028198,grad_norm: 0.9169576551260364, iteration: 152260
loss: 1.009477972984314,grad_norm: 0.9696359677884948, iteration: 152261
loss: 0.9947962164878845,grad_norm: 0.9915324702265208, iteration: 152262
loss: 1.0214755535125732,grad_norm: 0.9999990909957308, iteration: 152263
loss: 1.0184149742126465,grad_norm: 0.9999990863814984, iteration: 152264
loss: 1.0267525911331177,grad_norm: 0.9822898032394195, iteration: 152265
loss: 0.980974555015564,grad_norm: 0.9822679623915692, iteration: 152266
loss: 1.0005744695663452,grad_norm: 0.9828493523774605, iteration: 152267
loss: 0.9861620664596558,grad_norm: 0.840142820555873, iteration: 152268
loss: 0.9811158776283264,grad_norm: 0.9999991589221617, iteration: 152269
loss: 0.9612574577331543,grad_norm: 0.9733035713821578, iteration: 152270
loss: 1.0120140314102173,grad_norm: 0.9999989413211195, iteration: 152271
loss: 1.015945553779602,grad_norm: 0.9999990116454608, iteration: 152272
loss: 0.9848543405532837,grad_norm: 0.9999992447003236, iteration: 152273
loss: 1.0237979888916016,grad_norm: 0.9999992039191143, iteration: 152274
loss: 0.970180332660675,grad_norm: 0.9999993075868776, iteration: 152275
loss: 0.9994795322418213,grad_norm: 0.7624741925685397, iteration: 152276
loss: 1.0190807580947876,grad_norm: 0.9136837646073244, iteration: 152277
loss: 1.0108058452606201,grad_norm: 0.9999991289731812, iteration: 152278
loss: 0.9907180666923523,grad_norm: 0.8727847809713842, iteration: 152279
loss: 0.9706185460090637,grad_norm: 0.9480915516700652, iteration: 152280
loss: 1.010604977607727,grad_norm: 0.9999992392102591, iteration: 152281
loss: 0.9851191639900208,grad_norm: 0.9999992993594555, iteration: 152282
loss: 1.0293327569961548,grad_norm: 0.9880428885127325, iteration: 152283
loss: 1.001650094985962,grad_norm: 0.9711547907237165, iteration: 152284
loss: 0.9864072799682617,grad_norm: 0.9852536797751736, iteration: 152285
loss: 1.0269453525543213,grad_norm: 0.9999990897935609, iteration: 152286
loss: 1.0004066228866577,grad_norm: 0.9503564726618978, iteration: 152287
loss: 1.0197523832321167,grad_norm: 0.9895633999369884, iteration: 152288
loss: 1.0008878707885742,grad_norm: 0.9999990636500219, iteration: 152289
loss: 1.0086091756820679,grad_norm: 0.9999992287557742, iteration: 152290
loss: 1.0325943231582642,grad_norm: 0.9999991982327746, iteration: 152291
loss: 1.0241405963897705,grad_norm: 0.9999991968767666, iteration: 152292
loss: 0.9656527042388916,grad_norm: 0.999999005563722, iteration: 152293
loss: 0.9972619414329529,grad_norm: 0.9999992203034331, iteration: 152294
loss: 1.0059924125671387,grad_norm: 0.9991431974234151, iteration: 152295
loss: 0.9830265045166016,grad_norm: 0.8752359721714282, iteration: 152296
loss: 0.976646900177002,grad_norm: 0.9999992682331482, iteration: 152297
loss: 1.0186257362365723,grad_norm: 0.9158847719295362, iteration: 152298
loss: 0.945206880569458,grad_norm: 0.9999991019731992, iteration: 152299
loss: 1.0270226001739502,grad_norm: 0.9999991155695053, iteration: 152300
loss: 0.9931314587593079,grad_norm: 0.9999992131944047, iteration: 152301
loss: 0.9713415503501892,grad_norm: 0.9284160281667186, iteration: 152302
loss: 0.9678457379341125,grad_norm: 0.9999993902288472, iteration: 152303
loss: 1.0165010690689087,grad_norm: 0.9999990143995574, iteration: 152304
loss: 1.0030544996261597,grad_norm: 0.9808540099906687, iteration: 152305
loss: 1.0267640352249146,grad_norm: 0.9999992021673819, iteration: 152306
loss: 1.0159393548965454,grad_norm: 0.882749262831081, iteration: 152307
loss: 1.012280821800232,grad_norm: 0.9999990365209845, iteration: 152308
loss: 0.9981355667114258,grad_norm: 0.9999991747810768, iteration: 152309
loss: 0.9344090223312378,grad_norm: 0.9999991628116597, iteration: 152310
loss: 1.0137648582458496,grad_norm: 0.9999991710596462, iteration: 152311
loss: 0.948788046836853,grad_norm: 0.9791618032681363, iteration: 152312
loss: 0.9790232181549072,grad_norm: 0.9999989289834659, iteration: 152313
loss: 1.0026522874832153,grad_norm: 0.9305593793965604, iteration: 152314
loss: 1.0008628368377686,grad_norm: 0.9999990496708314, iteration: 152315
loss: 1.0305951833724976,grad_norm: 0.9999991476177787, iteration: 152316
loss: 1.0065064430236816,grad_norm: 0.8962385610334305, iteration: 152317
loss: 0.9717731475830078,grad_norm: 0.9999990870766866, iteration: 152318
loss: 0.9700658321380615,grad_norm: 0.8412001419972249, iteration: 152319
loss: 0.985338568687439,grad_norm: 0.9495521077536663, iteration: 152320
loss: 0.9846798777580261,grad_norm: 0.9999992018818104, iteration: 152321
loss: 0.9876397848129272,grad_norm: 0.9999989766463423, iteration: 152322
loss: 0.9891992807388306,grad_norm: 0.9999991752797721, iteration: 152323
loss: 0.9641444087028503,grad_norm: 0.903254341560038, iteration: 152324
loss: 1.009773850440979,grad_norm: 0.9999991748734177, iteration: 152325
loss: 1.0043889284133911,grad_norm: 0.9999990063773213, iteration: 152326
loss: 0.9855911731719971,grad_norm: 0.8845691898923123, iteration: 152327
loss: 1.0219026803970337,grad_norm: 0.9999990700247078, iteration: 152328
loss: 0.9683714509010315,grad_norm: 0.9696044924678405, iteration: 152329
loss: 1.0222716331481934,grad_norm: 0.9999997197985969, iteration: 152330
loss: 1.0062065124511719,grad_norm: 0.9446289139893008, iteration: 152331
loss: 1.024323582649231,grad_norm: 0.9999991506984898, iteration: 152332
loss: 1.0148239135742188,grad_norm: 0.9535672872564951, iteration: 152333
loss: 1.0255018472671509,grad_norm: 0.9999997948449433, iteration: 152334
loss: 0.9928401708602905,grad_norm: 0.8972612825354542, iteration: 152335
loss: 0.9807224869728088,grad_norm: 0.932691844365492, iteration: 152336
loss: 1.0231379270553589,grad_norm: 0.9750919218077219, iteration: 152337
loss: 0.9960948824882507,grad_norm: 0.9999991654119778, iteration: 152338
loss: 1.004004955291748,grad_norm: 0.9999991773636558, iteration: 152339
loss: 0.9841603636741638,grad_norm: 0.9999991146603916, iteration: 152340
loss: 1.0475355386734009,grad_norm: 0.9616226934708659, iteration: 152341
loss: 1.0234898328781128,grad_norm: 0.8979083685745946, iteration: 152342
loss: 1.0204499959945679,grad_norm: 0.9638819344047659, iteration: 152343
loss: 0.9813714027404785,grad_norm: 0.9014733455531708, iteration: 152344
loss: 0.9866869449615479,grad_norm: 0.8445831951695634, iteration: 152345
loss: 0.9784944653511047,grad_norm: 0.9855621432524664, iteration: 152346
loss: 1.0018559694290161,grad_norm: 0.8714251360262903, iteration: 152347
loss: 1.0888659954071045,grad_norm: 0.9999991691828902, iteration: 152348
loss: 0.9918447136878967,grad_norm: 0.999999206784109, iteration: 152349
loss: 0.9830998182296753,grad_norm: 0.9999995604778739, iteration: 152350
loss: 1.0206706523895264,grad_norm: 0.9999991019059756, iteration: 152351
loss: 1.0048938989639282,grad_norm: 0.9999990498826855, iteration: 152352
loss: 1.0373973846435547,grad_norm: 0.9867946090946252, iteration: 152353
loss: 1.0279802083969116,grad_norm: 0.9999992976452469, iteration: 152354
loss: 1.01768958568573,grad_norm: 0.931998130881598, iteration: 152355
loss: 0.9930596351623535,grad_norm: 0.9999990811782733, iteration: 152356
loss: 0.9966530799865723,grad_norm: 0.7984124802428276, iteration: 152357
loss: 1.0118193626403809,grad_norm: 0.9957311675253079, iteration: 152358
loss: 0.9998621940612793,grad_norm: 0.9511234027643781, iteration: 152359
loss: 0.9915995001792908,grad_norm: 0.9706168177449864, iteration: 152360
loss: 0.9992009997367859,grad_norm: 0.8242127096543325, iteration: 152361
loss: 1.0159965753555298,grad_norm: 0.9999990760249718, iteration: 152362
loss: 1.0094194412231445,grad_norm: 0.9169021599925381, iteration: 152363
loss: 0.9927900433540344,grad_norm: 0.8988326836845938, iteration: 152364
loss: 1.002669095993042,grad_norm: 0.9999992502880438, iteration: 152365
loss: 1.0107043981552124,grad_norm: 0.9959557313739559, iteration: 152366
loss: 0.9884160161018372,grad_norm: 0.9999990042458681, iteration: 152367
loss: 0.9965800642967224,grad_norm: 0.9837203269936962, iteration: 152368
loss: 1.0152807235717773,grad_norm: 0.9901989141471406, iteration: 152369
loss: 0.9808871150016785,grad_norm: 0.9999991510424823, iteration: 152370
loss: 0.9855057597160339,grad_norm: 0.999999273373482, iteration: 152371
loss: 0.9501569271087646,grad_norm: 0.924037917841015, iteration: 152372
loss: 0.9889498949050903,grad_norm: 0.9139986864975814, iteration: 152373
loss: 1.0042012929916382,grad_norm: 0.9486412079682923, iteration: 152374
loss: 0.9887436032295227,grad_norm: 0.9470857714592908, iteration: 152375
loss: 0.9915653467178345,grad_norm: 0.9999990879423432, iteration: 152376
loss: 0.9541470408439636,grad_norm: 0.9999991262887094, iteration: 152377
loss: 0.9841640591621399,grad_norm: 0.978974727057018, iteration: 152378
loss: 0.9935675263404846,grad_norm: 0.9999991786203243, iteration: 152379
loss: 0.9770329594612122,grad_norm: 0.9999993364696809, iteration: 152380
loss: 1.0055540800094604,grad_norm: 0.9999991389394826, iteration: 152381
loss: 1.0575248003005981,grad_norm: 0.9999991130183222, iteration: 152382
loss: 1.0419321060180664,grad_norm: 0.9999991219506332, iteration: 152383
loss: 1.0255229473114014,grad_norm: 0.9999995114837639, iteration: 152384
loss: 0.9941204190254211,grad_norm: 0.9999992747657359, iteration: 152385
loss: 0.9977102279663086,grad_norm: 0.8443891327999085, iteration: 152386
loss: 0.9759023189544678,grad_norm: 0.8433979596460814, iteration: 152387
loss: 1.0070747137069702,grad_norm: 0.926270811652682, iteration: 152388
loss: 1.0270154476165771,grad_norm: 0.9260434340833472, iteration: 152389
loss: 1.0271713733673096,grad_norm: 0.9999996889325822, iteration: 152390
loss: 0.961298942565918,grad_norm: 0.9999990709116648, iteration: 152391
loss: 0.9734057784080505,grad_norm: 0.9307393258796165, iteration: 152392
loss: 1.0278929471969604,grad_norm: 0.999999161581306, iteration: 152393
loss: 0.9868373870849609,grad_norm: 0.9803804250690482, iteration: 152394
loss: 0.9588618278503418,grad_norm: 0.9999993756718142, iteration: 152395
loss: 1.0427794456481934,grad_norm: 0.9467458006963225, iteration: 152396
loss: 1.0174713134765625,grad_norm: 0.9215307882468985, iteration: 152397
loss: 0.9830341339111328,grad_norm: 0.99999916202371, iteration: 152398
loss: 0.9657514095306396,grad_norm: 0.9999991610381203, iteration: 152399
loss: 0.9755271077156067,grad_norm: 0.9999993518137027, iteration: 152400
loss: 1.12958824634552,grad_norm: 0.999999025128505, iteration: 152401
loss: 1.0150623321533203,grad_norm: 0.9877444072823479, iteration: 152402
loss: 1.0097519159317017,grad_norm: 0.9999991176162797, iteration: 152403
loss: 1.0122534036636353,grad_norm: 0.9999992251213928, iteration: 152404
loss: 0.9922252893447876,grad_norm: 0.974458619472079, iteration: 152405
loss: 0.995710551738739,grad_norm: 0.9999990796226028, iteration: 152406
loss: 0.9716235399246216,grad_norm: 0.9153710690217615, iteration: 152407
loss: 1.007544994354248,grad_norm: 0.9849347641403051, iteration: 152408
loss: 0.9642394185066223,grad_norm: 0.8690152552486878, iteration: 152409
loss: 0.9812230467796326,grad_norm: 0.9999989284628409, iteration: 152410
loss: 0.95611572265625,grad_norm: 0.9166615886573308, iteration: 152411
loss: 1.0130468606948853,grad_norm: 0.9328339671044316, iteration: 152412
loss: 0.9952765107154846,grad_norm: 0.9999992190559616, iteration: 152413
loss: 1.0133832693099976,grad_norm: 0.9999995238058146, iteration: 152414
loss: 1.0159295797348022,grad_norm: 0.999999056829182, iteration: 152415
loss: 1.0308884382247925,grad_norm: 0.9999991286815815, iteration: 152416
loss: 0.9670076370239258,grad_norm: 0.9999991315445234, iteration: 152417
loss: 1.0036404132843018,grad_norm: 0.944783036584778, iteration: 152418
loss: 1.0181819200515747,grad_norm: 0.9378912690602906, iteration: 152419
loss: 0.9863189458847046,grad_norm: 0.9999993917496013, iteration: 152420
loss: 0.9910426139831543,grad_norm: 0.9999988826609753, iteration: 152421
loss: 0.9584973454475403,grad_norm: 0.9970743593090299, iteration: 152422
loss: 0.9724355340003967,grad_norm: 0.9999992783123826, iteration: 152423
loss: 1.025924563407898,grad_norm: 0.9099035692256187, iteration: 152424
loss: 1.0065395832061768,grad_norm: 0.9999991165512648, iteration: 152425
loss: 1.0354483127593994,grad_norm: 0.9649972806667138, iteration: 152426
loss: 0.9718168377876282,grad_norm: 0.7340025884575169, iteration: 152427
loss: 0.9826321601867676,grad_norm: 0.999999017564758, iteration: 152428
loss: 1.0047565698623657,grad_norm: 0.8758199016573233, iteration: 152429
loss: 0.9516036510467529,grad_norm: 0.9999992345950651, iteration: 152430
loss: 0.9926191568374634,grad_norm: 0.9999991081927107, iteration: 152431
loss: 1.0183156728744507,grad_norm: 0.9999991400515303, iteration: 152432
loss: 1.0500450134277344,grad_norm: 0.9999998481389565, iteration: 152433
loss: 1.0186400413513184,grad_norm: 0.9462537371856105, iteration: 152434
loss: 0.9871783256530762,grad_norm: 0.9999990150237278, iteration: 152435
loss: 0.9913386702537537,grad_norm: 0.8615649242207248, iteration: 152436
loss: 1.030579686164856,grad_norm: 0.9095943234416252, iteration: 152437
loss: 0.9842681884765625,grad_norm: 0.9672428968488721, iteration: 152438
loss: 1.0194416046142578,grad_norm: 0.9658357043138351, iteration: 152439
loss: 1.0190762281417847,grad_norm: 0.9159758819476341, iteration: 152440
loss: 0.9693346619606018,grad_norm: 0.9999990985329642, iteration: 152441
loss: 1.0194000005722046,grad_norm: 0.9134854834266738, iteration: 152442
loss: 1.008623719215393,grad_norm: 0.9831486234809655, iteration: 152443
loss: 1.0092604160308838,grad_norm: 0.9999990565846647, iteration: 152444
loss: 1.0000325441360474,grad_norm: 0.9999994459747201, iteration: 152445
loss: 0.9861840605735779,grad_norm: 0.9947772058353128, iteration: 152446
loss: 0.9877007603645325,grad_norm: 0.9999990446732199, iteration: 152447
loss: 0.9899512529373169,grad_norm: 0.9999991191307238, iteration: 152448
loss: 0.9423767924308777,grad_norm: 0.9999990878114446, iteration: 152449
loss: 0.9764006733894348,grad_norm: 0.9472965651294145, iteration: 152450
loss: 0.9854618310928345,grad_norm: 0.905418814478475, iteration: 152451
loss: 0.988207995891571,grad_norm: 0.9236561650962899, iteration: 152452
loss: 0.9782910943031311,grad_norm: 0.9732447777768124, iteration: 152453
loss: 0.9764725565910339,grad_norm: 0.9763391177318698, iteration: 152454
loss: 1.0119937658309937,grad_norm: 0.856033765141084, iteration: 152455
loss: 1.0173625946044922,grad_norm: 0.978038605083088, iteration: 152456
loss: 0.9993869662284851,grad_norm: 0.7536093111033327, iteration: 152457
loss: 1.0154513120651245,grad_norm: 0.9231219947747988, iteration: 152458
loss: 1.0215873718261719,grad_norm: 0.8046188810745409, iteration: 152459
loss: 1.0096172094345093,grad_norm: 0.9999990553341417, iteration: 152460
loss: 0.9961686730384827,grad_norm: 0.9999990611131593, iteration: 152461
loss: 1.0323593616485596,grad_norm: 0.9999992654898695, iteration: 152462
loss: 0.9854565262794495,grad_norm: 0.965202527645405, iteration: 152463
loss: 1.0393483638763428,grad_norm: 0.999999574722673, iteration: 152464
loss: 1.0250980854034424,grad_norm: 0.9999992345692037, iteration: 152465
loss: 1.0120975971221924,grad_norm: 0.9999991271376203, iteration: 152466
loss: 0.9877732992172241,grad_norm: 0.9999990792763079, iteration: 152467
loss: 1.0964763164520264,grad_norm: 0.9999996117475451, iteration: 152468
loss: 1.000931739807129,grad_norm: 0.9444699077880266, iteration: 152469
loss: 1.003799557685852,grad_norm: 0.9793930412269911, iteration: 152470
loss: 0.9960352778434753,grad_norm: 0.8963520460746771, iteration: 152471
loss: 1.0156700611114502,grad_norm: 0.9999993255434282, iteration: 152472
loss: 0.9875932335853577,grad_norm: 0.9999991374746079, iteration: 152473
loss: 1.008250117301941,grad_norm: 0.9895640174076527, iteration: 152474
loss: 0.9477761387825012,grad_norm: 0.90007762621633, iteration: 152475
loss: 0.9439037442207336,grad_norm: 0.9040679034214124, iteration: 152476
loss: 0.989830493927002,grad_norm: 0.9026170914733085, iteration: 152477
loss: 1.0043193101882935,grad_norm: 0.9999992855862977, iteration: 152478
loss: 1.0428801774978638,grad_norm: 0.9999993208282254, iteration: 152479
loss: 1.0087518692016602,grad_norm: 0.8802088162336736, iteration: 152480
loss: 1.0014495849609375,grad_norm: 0.9999990265726774, iteration: 152481
loss: 0.9716647863388062,grad_norm: 0.9792694987233183, iteration: 152482
loss: 1.0150285959243774,grad_norm: 0.9652731370031342, iteration: 152483
loss: 0.974837064743042,grad_norm: 0.9999990935377473, iteration: 152484
loss: 0.99922114610672,grad_norm: 0.9342915557362399, iteration: 152485
loss: 0.9975417852401733,grad_norm: 0.9999991504757809, iteration: 152486
loss: 1.0296640396118164,grad_norm: 0.9999991100691458, iteration: 152487
loss: 0.9972241520881653,grad_norm: 0.9999991358889119, iteration: 152488
loss: 0.9746075868606567,grad_norm: 0.9999991366289127, iteration: 152489
loss: 1.0930558443069458,grad_norm: 0.9999996962172463, iteration: 152490
loss: 1.0223907232284546,grad_norm: 0.9999992202640652, iteration: 152491
loss: 1.0282323360443115,grad_norm: 0.9696728458525004, iteration: 152492
loss: 1.0241204500198364,grad_norm: 0.99999919500224, iteration: 152493
loss: 1.0172213315963745,grad_norm: 0.9999994840476184, iteration: 152494
loss: 1.0035560131072998,grad_norm: 0.9999991276184591, iteration: 152495
loss: 0.9956536889076233,grad_norm: 0.9631856536982212, iteration: 152496
loss: 1.017480492591858,grad_norm: 0.9999991795073141, iteration: 152497
loss: 1.0006916522979736,grad_norm: 0.9113558172511653, iteration: 152498
loss: 0.9930830001831055,grad_norm: 0.9999989432253455, iteration: 152499
loss: 0.9755755066871643,grad_norm: 0.9999990576847347, iteration: 152500
loss: 1.0018424987792969,grad_norm: 0.9999999296969181, iteration: 152501
loss: 1.0034693479537964,grad_norm: 0.9999991359544468, iteration: 152502
loss: 0.9934810996055603,grad_norm: 0.9476972985397141, iteration: 152503
loss: 1.021693468093872,grad_norm: 0.8896396661984322, iteration: 152504
loss: 1.0422167778015137,grad_norm: 0.9999990218778694, iteration: 152505
loss: 0.9932419061660767,grad_norm: 0.999999157843723, iteration: 152506
loss: 1.0397554636001587,grad_norm: 0.9999991613331616, iteration: 152507
loss: 0.9832123517990112,grad_norm: 0.9590691758268285, iteration: 152508
loss: 0.9746888875961304,grad_norm: 0.8883940324833143, iteration: 152509
loss: 1.0108616352081299,grad_norm: 0.8730050478432935, iteration: 152510
loss: 0.988581657409668,grad_norm: 0.9999991092568882, iteration: 152511
loss: 1.0397266149520874,grad_norm: 0.9325650890800579, iteration: 152512
loss: 0.9337205290794373,grad_norm: 0.9999993981466153, iteration: 152513
loss: 1.0003665685653687,grad_norm: 0.9999989144135282, iteration: 152514
loss: 1.0213185548782349,grad_norm: 0.9999991786848025, iteration: 152515
loss: 1.0458263158798218,grad_norm: 0.9999991848616003, iteration: 152516
loss: 0.9970748424530029,grad_norm: 0.9340456905769854, iteration: 152517
loss: 1.0068085193634033,grad_norm: 0.9999990928456328, iteration: 152518
loss: 1.00193190574646,grad_norm: 0.7924953342271939, iteration: 152519
loss: 1.0471127033233643,grad_norm: 0.874704610582677, iteration: 152520
loss: 0.9950031638145447,grad_norm: 0.9999990014436582, iteration: 152521
loss: 0.9901183247566223,grad_norm: 0.9999991872746593, iteration: 152522
loss: 1.0034128427505493,grad_norm: 0.9999994913185373, iteration: 152523
loss: 1.0032378435134888,grad_norm: 0.9999990446851096, iteration: 152524
loss: 0.992238461971283,grad_norm: 0.9999990091354272, iteration: 152525
loss: 1.0264397859573364,grad_norm: 0.9999992157041231, iteration: 152526
loss: 1.0334879159927368,grad_norm: 0.9999990508804703, iteration: 152527
loss: 0.997129499912262,grad_norm: 0.9428568727494442, iteration: 152528
loss: 0.995807945728302,grad_norm: 0.9999998850463991, iteration: 152529
loss: 1.009594202041626,grad_norm: 0.8868423600310946, iteration: 152530
loss: 1.0242259502410889,grad_norm: 0.9999991892152381, iteration: 152531
loss: 1.0391297340393066,grad_norm: 0.9702452368871838, iteration: 152532
loss: 0.990210771560669,grad_norm: 0.9999990452119105, iteration: 152533
loss: 0.9731688499450684,grad_norm: 0.9999991631810335, iteration: 152534
loss: 1.0783281326293945,grad_norm: 0.9999989308260854, iteration: 152535
loss: 1.0611791610717773,grad_norm: 0.9999993143493802, iteration: 152536
loss: 1.075322151184082,grad_norm: 0.9999990159490394, iteration: 152537
loss: 1.0164589881896973,grad_norm: 0.9999999640175778, iteration: 152538
loss: 1.008962869644165,grad_norm: 0.8968588872405827, iteration: 152539
loss: 1.0425509214401245,grad_norm: 0.9999998637092767, iteration: 152540
loss: 0.9654383063316345,grad_norm: 0.9999996173435814, iteration: 152541
loss: 0.9897144436836243,grad_norm: 0.8894899206130884, iteration: 152542
loss: 1.012982726097107,grad_norm: 0.9999999144964221, iteration: 152543
loss: 1.0005179643630981,grad_norm: 0.9999991769949638, iteration: 152544
loss: 1.048764705657959,grad_norm: 0.9999991625593283, iteration: 152545
loss: 1.0269581079483032,grad_norm: 0.999999268090745, iteration: 152546
loss: 0.9976840615272522,grad_norm: 0.9999996048978318, iteration: 152547
loss: 1.0269124507904053,grad_norm: 0.9999991645907922, iteration: 152548
loss: 1.013490080833435,grad_norm: 0.999999246551456, iteration: 152549
loss: 0.9952382445335388,grad_norm: 0.8681156892978494, iteration: 152550
loss: 1.0341039896011353,grad_norm: 0.9999990041565795, iteration: 152551
loss: 0.9670804738998413,grad_norm: 0.9999998440432417, iteration: 152552
loss: 0.9989196062088013,grad_norm: 0.9726885201829663, iteration: 152553
loss: 1.0011019706726074,grad_norm: 0.9037625573554083, iteration: 152554
loss: 0.9847835302352905,grad_norm: 0.9863131145167754, iteration: 152555
loss: 1.0965639352798462,grad_norm: 0.9999997980328756, iteration: 152556
loss: 1.0063406229019165,grad_norm: 0.9999991956698373, iteration: 152557
loss: 1.007658839225769,grad_norm: 0.9999991207875486, iteration: 152558
loss: 1.0096286535263062,grad_norm: 0.9999990638168664, iteration: 152559
loss: 1.069811224937439,grad_norm: 0.9999994153188706, iteration: 152560
loss: 1.0134145021438599,grad_norm: 0.9999991278208765, iteration: 152561
loss: 0.9954493045806885,grad_norm: 0.9999990885460629, iteration: 152562
loss: 0.9824579358100891,grad_norm: 0.999999068741435, iteration: 152563
loss: 1.1327097415924072,grad_norm: 0.9999990720029867, iteration: 152564
loss: 1.009477138519287,grad_norm: 0.9638299128046702, iteration: 152565
loss: 1.1226160526275635,grad_norm: 0.9999993022100975, iteration: 152566
loss: 1.0149903297424316,grad_norm: 0.9999996187961, iteration: 152567
loss: 1.0639866590499878,grad_norm: 0.999999960997107, iteration: 152568
loss: 1.0074723958969116,grad_norm: 0.9977671194842577, iteration: 152569
loss: 1.0729625225067139,grad_norm: 0.9999993139715133, iteration: 152570
loss: 1.012067198753357,grad_norm: 0.999999067124959, iteration: 152571
loss: 1.0544182062149048,grad_norm: 0.9515985576668056, iteration: 152572
loss: 0.978548526763916,grad_norm: 0.908978370529638, iteration: 152573
loss: 0.9638181328773499,grad_norm: 0.9203377758871467, iteration: 152574
loss: 1.0195527076721191,grad_norm: 0.9505671483606772, iteration: 152575
loss: 1.185386300086975,grad_norm: 0.9999992099803039, iteration: 152576
loss: 0.9868983030319214,grad_norm: 0.9999992473069333, iteration: 152577
loss: 1.0219420194625854,grad_norm: 0.9999999246340316, iteration: 152578
loss: 1.015222430229187,grad_norm: 0.999999209871601, iteration: 152579
loss: 0.9734437465667725,grad_norm: 0.9999992510017396, iteration: 152580
loss: 1.0306122303009033,grad_norm: 0.9999994613440816, iteration: 152581
loss: 1.0696271657943726,grad_norm: 0.9999994769334262, iteration: 152582
loss: 1.0157628059387207,grad_norm: 0.9381078408571016, iteration: 152583
loss: 1.1348923444747925,grad_norm: 0.9999997712787864, iteration: 152584
loss: 1.0257877111434937,grad_norm: 0.9999989978668979, iteration: 152585
loss: 1.078298807144165,grad_norm: 0.9999994322382919, iteration: 152586
loss: 1.0796387195587158,grad_norm: 0.9999995536234828, iteration: 152587
loss: 1.0136377811431885,grad_norm: 0.8845789081116858, iteration: 152588
loss: 1.0267537832260132,grad_norm: 0.9999990579495717, iteration: 152589
loss: 1.1372417211532593,grad_norm: 0.9999996402752503, iteration: 152590
loss: 1.0331841707229614,grad_norm: 0.9999993714401291, iteration: 152591
loss: 1.0697335004806519,grad_norm: 0.9999996900062221, iteration: 152592
loss: 1.0117219686508179,grad_norm: 0.9983001559904249, iteration: 152593
loss: 0.9732686281204224,grad_norm: 0.9999990797171875, iteration: 152594
loss: 1.0368884801864624,grad_norm: 0.9999998063315788, iteration: 152595
loss: 0.9976904988288879,grad_norm: 0.9999991964997703, iteration: 152596
loss: 1.1196662187576294,grad_norm: 0.9999991425257088, iteration: 152597
loss: 1.1644766330718994,grad_norm: 0.9999996887677148, iteration: 152598
loss: 1.1618263721466064,grad_norm: 0.9999996382548413, iteration: 152599
loss: 1.0029419660568237,grad_norm: 0.9999994256653726, iteration: 152600
loss: 1.2424708604812622,grad_norm: 0.9999995387902508, iteration: 152601
loss: 1.0603060722351074,grad_norm: 0.9999999413769617, iteration: 152602
loss: 1.1869757175445557,grad_norm: 0.9999992212433837, iteration: 152603
loss: 1.0261560678482056,grad_norm: 0.999999143002893, iteration: 152604
loss: 1.0929503440856934,grad_norm: 0.999999844419455, iteration: 152605
loss: 1.0470231771469116,grad_norm: 0.9999993444890106, iteration: 152606
loss: 1.645158290863037,grad_norm: 0.9999999744471806, iteration: 152607
loss: 1.0813488960266113,grad_norm: 0.9999999253721396, iteration: 152608
loss: 1.0293891429901123,grad_norm: 0.9470120062244781, iteration: 152609
loss: 1.0273191928863525,grad_norm: 0.9999990650291053, iteration: 152610
loss: 1.0517770051956177,grad_norm: 0.9999992155373401, iteration: 152611
loss: 1.024923324584961,grad_norm: 0.9999992247776612, iteration: 152612
loss: 0.9750868082046509,grad_norm: 0.9999991589576337, iteration: 152613
loss: 1.0610885620117188,grad_norm: 0.9999993608059662, iteration: 152614
loss: 0.9777998924255371,grad_norm: 0.9546071160381024, iteration: 152615
loss: 1.0236114263534546,grad_norm: 0.9858149102223754, iteration: 152616
loss: 0.9849011301994324,grad_norm: 0.9999997291010857, iteration: 152617
loss: 1.0398417711257935,grad_norm: 0.9999993073164808, iteration: 152618
loss: 0.9881505966186523,grad_norm: 0.9999991856904066, iteration: 152619
loss: 0.9874599575996399,grad_norm: 0.908103392627681, iteration: 152620
loss: 1.000878095626831,grad_norm: 0.9999990745244626, iteration: 152621
loss: 1.002712607383728,grad_norm: 0.9256333762417309, iteration: 152622
loss: 1.1325340270996094,grad_norm: 0.999999631443389, iteration: 152623
loss: 0.9812714457511902,grad_norm: 0.99999905348694, iteration: 152624
loss: 1.0471082925796509,grad_norm: 0.9999993344581368, iteration: 152625
loss: 1.0261386632919312,grad_norm: 0.9999991917784031, iteration: 152626
loss: 1.010313868522644,grad_norm: 0.999999076705064, iteration: 152627
loss: 1.0251051187515259,grad_norm: 0.9017454657649892, iteration: 152628
loss: 1.0147209167480469,grad_norm: 0.9999991620508779, iteration: 152629
loss: 1.0050827264785767,grad_norm: 0.999999379033893, iteration: 152630
loss: 1.024554967880249,grad_norm: 0.792313827221175, iteration: 152631
loss: 1.0100387334823608,grad_norm: 0.9999994088779366, iteration: 152632
loss: 1.0265631675720215,grad_norm: 0.9999992434176822, iteration: 152633
loss: 0.9816686511039734,grad_norm: 0.9999991378747383, iteration: 152634
loss: 0.9960500597953796,grad_norm: 0.999999150655773, iteration: 152635
loss: 0.9839016199111938,grad_norm: 0.9999991967981805, iteration: 152636
loss: 0.9753028154373169,grad_norm: 0.8529334246682169, iteration: 152637
loss: 1.0291755199432373,grad_norm: 0.9999997646738849, iteration: 152638
loss: 0.9893943667411804,grad_norm: 0.9828277130858126, iteration: 152639
loss: 1.0205341577529907,grad_norm: 0.9707085552523024, iteration: 152640
loss: 0.9877898097038269,grad_norm: 0.9999991071571354, iteration: 152641
loss: 0.9943795800209045,grad_norm: 0.973883935811749, iteration: 152642
loss: 1.0286263227462769,grad_norm: 0.9999998241141249, iteration: 152643
loss: 0.9684939980506897,grad_norm: 0.9451569749392662, iteration: 152644
loss: 0.9910827279090881,grad_norm: 0.9999989899441236, iteration: 152645
loss: 1.027112603187561,grad_norm: 0.9999994798043343, iteration: 152646
loss: 0.9500105381011963,grad_norm: 0.9999996605639414, iteration: 152647
loss: 0.9811630845069885,grad_norm: 0.9999995107750715, iteration: 152648
loss: 1.0437685251235962,grad_norm: 0.9682584415906687, iteration: 152649
loss: 0.9907785654067993,grad_norm: 0.9999991733180604, iteration: 152650
loss: 0.9985625743865967,grad_norm: 0.9999991168475547, iteration: 152651
loss: 1.0345691442489624,grad_norm: 0.9984876676807722, iteration: 152652
loss: 1.0415761470794678,grad_norm: 0.9999991134719706, iteration: 152653
loss: 1.0467529296875,grad_norm: 0.9999993334606777, iteration: 152654
loss: 1.0307350158691406,grad_norm: 0.9999992760424888, iteration: 152655
loss: 0.9672644734382629,grad_norm: 0.981047943105391, iteration: 152656
loss: 0.9954041242599487,grad_norm: 0.8996380685647445, iteration: 152657
loss: 0.9907039403915405,grad_norm: 0.9999992519341693, iteration: 152658
loss: 1.0348095893859863,grad_norm: 0.9999992558165444, iteration: 152659
loss: 0.9787448644638062,grad_norm: 0.999999234969343, iteration: 152660
loss: 1.0140624046325684,grad_norm: 0.8779398269281717, iteration: 152661
loss: 1.052966594696045,grad_norm: 0.9999995273006538, iteration: 152662
loss: 0.9979618191719055,grad_norm: 0.9999991043634875, iteration: 152663
loss: 1.0095627307891846,grad_norm: 0.9581733219454167, iteration: 152664
loss: 1.0277637243270874,grad_norm: 0.9999993117927644, iteration: 152665
loss: 0.992413341999054,grad_norm: 0.8437717861321231, iteration: 152666
loss: 0.9853440523147583,grad_norm: 0.9999992293674785, iteration: 152667
loss: 1.0013519525527954,grad_norm: 0.9999991700458793, iteration: 152668
loss: 0.9855407476425171,grad_norm: 0.8075229506648272, iteration: 152669
loss: 1.0294973850250244,grad_norm: 0.9999992185301217, iteration: 152670
loss: 0.9865712523460388,grad_norm: 0.8725112780101438, iteration: 152671
loss: 0.9826194047927856,grad_norm: 0.9999990111085931, iteration: 152672
loss: 0.9906949400901794,grad_norm: 0.9999991293614596, iteration: 152673
loss: 1.0158743858337402,grad_norm: 0.9999991748975139, iteration: 152674
loss: 0.9993090629577637,grad_norm: 0.9999991368571417, iteration: 152675
loss: 1.0111212730407715,grad_norm: 0.999999173791512, iteration: 152676
loss: 1.0024536848068237,grad_norm: 0.9267556929255076, iteration: 152677
loss: 1.0078761577606201,grad_norm: 0.9999992504292853, iteration: 152678
loss: 0.9950461387634277,grad_norm: 0.9999991500654128, iteration: 152679
loss: 0.9552225470542908,grad_norm: 0.9999989777072376, iteration: 152680
loss: 0.9891903400421143,grad_norm: 0.8361853344567941, iteration: 152681
loss: 1.015951156616211,grad_norm: 0.9448095223781114, iteration: 152682
loss: 1.0266278982162476,grad_norm: 0.947564661646867, iteration: 152683
loss: 1.056482195854187,grad_norm: 0.9999998444291177, iteration: 152684
loss: 0.9641560316085815,grad_norm: 0.9999991570233311, iteration: 152685
loss: 0.9963704943656921,grad_norm: 0.916180742560405, iteration: 152686
loss: 0.9747540950775146,grad_norm: 0.9999991530187896, iteration: 152687
loss: 0.9637601375579834,grad_norm: 0.999999481932006, iteration: 152688
loss: 1.0061217546463013,grad_norm: 0.8891462135912351, iteration: 152689
loss: 0.9418110847473145,grad_norm: 0.9999991775781597, iteration: 152690
loss: 0.980449914932251,grad_norm: 0.9999992189117493, iteration: 152691
loss: 1.0027592182159424,grad_norm: 0.9999990456613493, iteration: 152692
loss: 1.0105760097503662,grad_norm: 0.9942816494211245, iteration: 152693
loss: 0.9612113237380981,grad_norm: 0.9999990734621963, iteration: 152694
loss: 1.0137827396392822,grad_norm: 0.9999992974664161, iteration: 152695
loss: 0.9866085648536682,grad_norm: 0.9999991688365114, iteration: 152696
loss: 0.9891473054885864,grad_norm: 0.9999990307012411, iteration: 152697
loss: 0.942029595375061,grad_norm: 0.9999990892611661, iteration: 152698
loss: 1.010365605354309,grad_norm: 0.9999991287986705, iteration: 152699
loss: 1.0049102306365967,grad_norm: 0.9711795649720891, iteration: 152700
loss: 0.9882643222808838,grad_norm: 0.8881478582545489, iteration: 152701
loss: 1.0285602807998657,grad_norm: 0.9999990780653353, iteration: 152702
loss: 0.9805209636688232,grad_norm: 0.9999993477732758, iteration: 152703
loss: 1.0359361171722412,grad_norm: 0.9999991790696916, iteration: 152704
loss: 1.0019807815551758,grad_norm: 0.9827749069461353, iteration: 152705
loss: 1.0168184041976929,grad_norm: 0.8804400805029742, iteration: 152706
loss: 0.9817571640014648,grad_norm: 0.9999997815429069, iteration: 152707
loss: 1.0133780241012573,grad_norm: 0.9833522372964384, iteration: 152708
loss: 0.9944021701812744,grad_norm: 0.9999989426263801, iteration: 152709
loss: 1.030478596687317,grad_norm: 0.9999992534221352, iteration: 152710
loss: 1.013100028038025,grad_norm: 0.999999099448446, iteration: 152711
loss: 0.9666129350662231,grad_norm: 0.9419366377105212, iteration: 152712
loss: 0.9964261651039124,grad_norm: 0.9999991591508315, iteration: 152713
loss: 1.0087231397628784,grad_norm: 0.9052371473491212, iteration: 152714
loss: 0.9873804450035095,grad_norm: 0.9218549356476525, iteration: 152715
loss: 1.0167385339736938,grad_norm: 0.9999989660251387, iteration: 152716
loss: 1.070205569267273,grad_norm: 0.9999996827166044, iteration: 152717
loss: 1.0133763551712036,grad_norm: 0.9746622871108843, iteration: 152718
loss: 1.0018846988677979,grad_norm: 0.9999991231805991, iteration: 152719
loss: 1.023696780204773,grad_norm: 0.950614689547943, iteration: 152720
loss: 0.9992148876190186,grad_norm: 0.9999990323285959, iteration: 152721
loss: 1.0013928413391113,grad_norm: 0.9999990119747832, iteration: 152722
loss: 1.0441862344741821,grad_norm: 0.9999996280648776, iteration: 152723
loss: 1.0153892040252686,grad_norm: 0.8334942934121963, iteration: 152724
loss: 1.0539824962615967,grad_norm: 0.9999995897045306, iteration: 152725
loss: 0.999062716960907,grad_norm: 0.9943911648718047, iteration: 152726
loss: 1.0338901281356812,grad_norm: 0.9404819387263379, iteration: 152727
loss: 1.0103493928909302,grad_norm: 0.9999991249440124, iteration: 152728
loss: 0.9858885407447815,grad_norm: 0.9999997420373352, iteration: 152729
loss: 0.9980266690254211,grad_norm: 0.9546746286786583, iteration: 152730
loss: 0.9854781627655029,grad_norm: 0.999999453673073, iteration: 152731
loss: 1.017461895942688,grad_norm: 0.9999990666495692, iteration: 152732
loss: 0.9911757111549377,grad_norm: 0.9406958275146163, iteration: 152733
loss: 1.0026499032974243,grad_norm: 0.9501346454188183, iteration: 152734
loss: 1.0127731561660767,grad_norm: 0.9999991404507195, iteration: 152735
loss: 1.0215587615966797,grad_norm: 0.9999991079008651, iteration: 152736
loss: 1.0195324420928955,grad_norm: 0.9999990233652485, iteration: 152737
loss: 1.0606653690338135,grad_norm: 0.9542521963709989, iteration: 152738
loss: 0.998988151550293,grad_norm: 0.9999990887106618, iteration: 152739
loss: 1.0150882005691528,grad_norm: 0.9999995081343531, iteration: 152740
loss: 1.0220130681991577,grad_norm: 0.9999990916082961, iteration: 152741
loss: 1.0240293741226196,grad_norm: 0.9339943073297768, iteration: 152742
loss: 1.0033087730407715,grad_norm: 0.9999990843792488, iteration: 152743
loss: 1.003164529800415,grad_norm: 0.8851682045936401, iteration: 152744
loss: 1.0291064977645874,grad_norm: 0.9999990406487695, iteration: 152745
loss: 0.9771335124969482,grad_norm: 0.9930687181100671, iteration: 152746
loss: 0.9652644395828247,grad_norm: 0.9999991533962019, iteration: 152747
loss: 0.9828425645828247,grad_norm: 0.9999991947579929, iteration: 152748
loss: 1.0184885263442993,grad_norm: 0.865201249229987, iteration: 152749
loss: 0.9475785493850708,grad_norm: 0.9999991510783809, iteration: 152750
loss: 0.9825771450996399,grad_norm: 0.9757631987407561, iteration: 152751
loss: 1.2410787343978882,grad_norm: 0.9999995632286959, iteration: 152752
loss: 0.9964058995246887,grad_norm: 0.9999991969855794, iteration: 152753
loss: 0.9523252248764038,grad_norm: 0.879206074104257, iteration: 152754
loss: 1.0740370750427246,grad_norm: 0.9999994672167385, iteration: 152755
loss: 1.093213438987732,grad_norm: 0.9999991110866846, iteration: 152756
loss: 1.007901906967163,grad_norm: 0.9999997797345982, iteration: 152757
loss: 1.037007212638855,grad_norm: 0.9903185196235443, iteration: 152758
loss: 0.982524573802948,grad_norm: 0.9700129075175613, iteration: 152759
loss: 0.9940255880355835,grad_norm: 0.908983006599328, iteration: 152760
loss: 1.0209842920303345,grad_norm: 0.9999991452452359, iteration: 152761
loss: 1.0315814018249512,grad_norm: 0.8399439373768959, iteration: 152762
loss: 1.0140475034713745,grad_norm: 0.9999991670114016, iteration: 152763
loss: 1.012503981590271,grad_norm: 0.9999992159974437, iteration: 152764
loss: 1.0028725862503052,grad_norm: 0.9999991250099698, iteration: 152765
loss: 1.008154034614563,grad_norm: 0.9419667145294887, iteration: 152766
loss: 1.0011036396026611,grad_norm: 0.9732591178375541, iteration: 152767
loss: 1.0384483337402344,grad_norm: 0.9999990105745308, iteration: 152768
loss: 0.9803324937820435,grad_norm: 0.9611651638253511, iteration: 152769
loss: 0.9978931546211243,grad_norm: 0.9999991366161306, iteration: 152770
loss: 0.9783374667167664,grad_norm: 0.9999992482242644, iteration: 152771
loss: 1.0103859901428223,grad_norm: 0.9999991311258081, iteration: 152772
loss: 0.9786489605903625,grad_norm: 0.9960952146609106, iteration: 152773
loss: 1.0188899040222168,grad_norm: 0.8473542100840222, iteration: 152774
loss: 1.027817964553833,grad_norm: 0.9999990283873788, iteration: 152775
loss: 0.961402177810669,grad_norm: 0.9815170224107361, iteration: 152776
loss: 0.9830319285392761,grad_norm: 0.9442683960448071, iteration: 152777
loss: 0.9928743839263916,grad_norm: 0.9538819733334707, iteration: 152778
loss: 1.0082823038101196,grad_norm: 0.9999992771765079, iteration: 152779
loss: 0.9656350612640381,grad_norm: 0.9999990840293099, iteration: 152780
loss: 0.9776487350463867,grad_norm: 0.9509411043460547, iteration: 152781
loss: 1.0030441284179688,grad_norm: 0.999999297196694, iteration: 152782
loss: 1.0250312089920044,grad_norm: 0.9999991638752165, iteration: 152783
loss: 1.0007786750793457,grad_norm: 0.9999990127941408, iteration: 152784
loss: 1.1318483352661133,grad_norm: 0.999999464178436, iteration: 152785
loss: 1.0917474031448364,grad_norm: 0.9999990621427977, iteration: 152786
loss: 1.0020171403884888,grad_norm: 0.922635905388195, iteration: 152787
loss: 0.9831425547599792,grad_norm: 0.9999991007654156, iteration: 152788
loss: 0.9725254774093628,grad_norm: 0.9127027117357303, iteration: 152789
loss: 0.9930763840675354,grad_norm: 0.9999998976703325, iteration: 152790
loss: 0.9770731329917908,grad_norm: 0.9999990376816358, iteration: 152791
loss: 0.9756397604942322,grad_norm: 0.9141839597700183, iteration: 152792
loss: 1.0214993953704834,grad_norm: 0.9503766414366808, iteration: 152793
loss: 1.0310163497924805,grad_norm: 0.9999990713765173, iteration: 152794
loss: 0.9922409653663635,grad_norm: 0.977500897195287, iteration: 152795
loss: 1.0061215162277222,grad_norm: 0.9999991378165878, iteration: 152796
loss: 0.9699387550354004,grad_norm: 0.9999990653539057, iteration: 152797
loss: 1.0698550939559937,grad_norm: 0.9999996741555205, iteration: 152798
loss: 1.023106575012207,grad_norm: 0.9999990075956336, iteration: 152799
loss: 1.0321730375289917,grad_norm: 0.9999992246756086, iteration: 152800
loss: 1.0167564153671265,grad_norm: 0.7946928993056264, iteration: 152801
loss: 1.009190320968628,grad_norm: 0.9785764120533285, iteration: 152802
loss: 0.9698670506477356,grad_norm: 0.9999990108647957, iteration: 152803
loss: 0.9873570203781128,grad_norm: 0.8650254389005546, iteration: 152804
loss: 0.9994370341300964,grad_norm: 0.9999994717955449, iteration: 152805
loss: 1.0246260166168213,grad_norm: 0.9863599740322233, iteration: 152806
loss: 0.9968786835670471,grad_norm: 0.9970420763970169, iteration: 152807
loss: 1.0169644355773926,grad_norm: 0.9999990993223093, iteration: 152808
loss: 1.081879734992981,grad_norm: 0.999999740852368, iteration: 152809
loss: 1.0339090824127197,grad_norm: 0.9999993868656231, iteration: 152810
loss: 1.046739101409912,grad_norm: 0.9999997349645073, iteration: 152811
loss: 0.9976295828819275,grad_norm: 0.8101617156851877, iteration: 152812
loss: 1.015334129333496,grad_norm: 0.9999992138513029, iteration: 152813
loss: 0.9986094832420349,grad_norm: 0.8999242159777134, iteration: 152814
loss: 0.9989613890647888,grad_norm: 0.9999990890385229, iteration: 152815
loss: 1.151112675666809,grad_norm: 0.9999998662300258, iteration: 152816
loss: 1.0007991790771484,grad_norm: 0.9999991004665381, iteration: 152817
loss: 1.0868314504623413,grad_norm: 0.9999995334303453, iteration: 152818
loss: 0.9760796427726746,grad_norm: 0.9999991599602576, iteration: 152819
loss: 1.029516339302063,grad_norm: 0.9163330887305867, iteration: 152820
loss: 1.0111452341079712,grad_norm: 0.9999993846911521, iteration: 152821
loss: 1.0256644487380981,grad_norm: 0.9999994804566604, iteration: 152822
loss: 0.9955288171768188,grad_norm: 0.9153464282167625, iteration: 152823
loss: 1.0011672973632812,grad_norm: 0.9999992811968663, iteration: 152824
loss: 0.9790269732475281,grad_norm: 0.994396648663212, iteration: 152825
loss: 1.0864465236663818,grad_norm: 0.9999995220018865, iteration: 152826
loss: 1.0116335153579712,grad_norm: 0.9999992882315772, iteration: 152827
loss: 0.9922099113464355,grad_norm: 0.9999993268658127, iteration: 152828
loss: 1.0432074069976807,grad_norm: 0.9999994061686737, iteration: 152829
loss: 0.9968050718307495,grad_norm: 0.9999991797158944, iteration: 152830
loss: 0.9961019158363342,grad_norm: 0.9999991436922021, iteration: 152831
loss: 1.0003042221069336,grad_norm: 0.9999991743735197, iteration: 152832
loss: 1.0211879014968872,grad_norm: 0.9999996334839624, iteration: 152833
loss: 0.9899784922599792,grad_norm: 0.9999991711934082, iteration: 152834
loss: 1.0110808610916138,grad_norm: 0.9999990249311753, iteration: 152835
loss: 1.0382498502731323,grad_norm: 0.9999991952187822, iteration: 152836
loss: 1.0127183198928833,grad_norm: 0.9999989897561232, iteration: 152837
loss: 1.046783447265625,grad_norm: 0.9999991887936979, iteration: 152838
loss: 0.9856033325195312,grad_norm: 0.9999991430063363, iteration: 152839
loss: 0.9893751740455627,grad_norm: 0.999999123013165, iteration: 152840
loss: 1.018996000289917,grad_norm: 0.9894620163789362, iteration: 152841
loss: 1.1314202547073364,grad_norm: 0.999999714414989, iteration: 152842
loss: 0.996560275554657,grad_norm: 0.9999990218933587, iteration: 152843
loss: 1.0241267681121826,grad_norm: 0.9898242854292085, iteration: 152844
loss: 1.2484157085418701,grad_norm: 0.9999994823079351, iteration: 152845
loss: 0.9995805025100708,grad_norm: 0.999999174011681, iteration: 152846
loss: 1.0158003568649292,grad_norm: 0.999999270005773, iteration: 152847
loss: 0.9793530106544495,grad_norm: 0.9999990905989331, iteration: 152848
loss: 1.006981611251831,grad_norm: 0.9999992269986799, iteration: 152849
loss: 0.9863713979721069,grad_norm: 0.9999990443871133, iteration: 152850
loss: 1.0140010118484497,grad_norm: 0.9999995930558313, iteration: 152851
loss: 0.9986320734024048,grad_norm: 0.9999991212053547, iteration: 152852
loss: 1.0257823467254639,grad_norm: 0.999999179994387, iteration: 152853
loss: 1.0205200910568237,grad_norm: 0.9999998543552034, iteration: 152854
loss: 0.9897624254226685,grad_norm: 0.9999991700613513, iteration: 152855
loss: 0.9743925333023071,grad_norm: 0.9999990998458012, iteration: 152856
loss: 0.9817891120910645,grad_norm: 0.9999990059516349, iteration: 152857
loss: 1.0171489715576172,grad_norm: 0.9999992732375883, iteration: 152858
loss: 1.035629153251648,grad_norm: 0.9769464681263158, iteration: 152859
loss: 0.9526239037513733,grad_norm: 0.9999995433244148, iteration: 152860
loss: 0.9880295991897583,grad_norm: 0.9999991985619405, iteration: 152861
loss: 1.0143353939056396,grad_norm: 0.9999992758552473, iteration: 152862
loss: 1.0635244846343994,grad_norm: 0.9999997943894173, iteration: 152863
loss: 0.9785203337669373,grad_norm: 0.9043521989963836, iteration: 152864
loss: 0.9793097972869873,grad_norm: 0.9937831340113205, iteration: 152865
loss: 1.0170692205429077,grad_norm: 0.9999991954513533, iteration: 152866
loss: 1.0437754392623901,grad_norm: 0.9999991666405087, iteration: 152867
loss: 0.9889759421348572,grad_norm: 0.9999992074331336, iteration: 152868
loss: 1.0239894390106201,grad_norm: 0.9999990308442384, iteration: 152869
loss: 1.0031719207763672,grad_norm: 0.978183246404211, iteration: 152870
loss: 0.9973171949386597,grad_norm: 0.9999992279998253, iteration: 152871
loss: 0.9811463356018066,grad_norm: 0.8256743237611064, iteration: 152872
loss: 1.0697975158691406,grad_norm: 0.9999991119889278, iteration: 152873
loss: 0.9755688905715942,grad_norm: 0.9787146088265389, iteration: 152874
loss: 1.0039633512496948,grad_norm: 0.99999909366346, iteration: 152875
loss: 1.063475251197815,grad_norm: 0.9999997234693491, iteration: 152876
loss: 0.985419750213623,grad_norm: 0.9759706860766936, iteration: 152877
loss: 0.9880590438842773,grad_norm: 0.9999991442362467, iteration: 152878
loss: 0.9573957920074463,grad_norm: 0.9638757714221255, iteration: 152879
loss: 1.009753704071045,grad_norm: 0.921630483499356, iteration: 152880
loss: 0.9951027631759644,grad_norm: 0.929945697012166, iteration: 152881
loss: 0.9826875329017639,grad_norm: 0.9812095168774874, iteration: 152882
loss: 1.0155912637710571,grad_norm: 0.8786066983298801, iteration: 152883
loss: 0.9996352195739746,grad_norm: 0.9129013050459666, iteration: 152884
loss: 1.0626685619354248,grad_norm: 0.9999996522883712, iteration: 152885
loss: 1.0264431238174438,grad_norm: 0.8772291885158662, iteration: 152886
loss: 0.9854799509048462,grad_norm: 0.9999991737662018, iteration: 152887
loss: 0.9827832579612732,grad_norm: 0.9999990907947621, iteration: 152888
loss: 0.9879872798919678,grad_norm: 0.9999990338944865, iteration: 152889
loss: 0.9989824295043945,grad_norm: 0.999999053457433, iteration: 152890
loss: 0.9801969528198242,grad_norm: 0.8110371621683123, iteration: 152891
loss: 0.9869179129600525,grad_norm: 0.9999992164641612, iteration: 152892
loss: 1.051763653755188,grad_norm: 0.8670653213501632, iteration: 152893
loss: 1.030442237854004,grad_norm: 0.9999992372985835, iteration: 152894
loss: 0.9831249713897705,grad_norm: 0.9683996358447463, iteration: 152895
loss: 0.9732934832572937,grad_norm: 0.8833651199488547, iteration: 152896
loss: 0.9942277073860168,grad_norm: 0.8920345998409692, iteration: 152897
loss: 0.9974535703659058,grad_norm: 0.999999190934142, iteration: 152898
loss: 1.0108635425567627,grad_norm: 0.9053755040345346, iteration: 152899
loss: 0.9701040387153625,grad_norm: 0.9463125119620976, iteration: 152900
loss: 1.0330859422683716,grad_norm: 0.9114398938759238, iteration: 152901
loss: 0.9777681231498718,grad_norm: 0.9999991439710109, iteration: 152902
loss: 0.9960451126098633,grad_norm: 0.9999991791200753, iteration: 152903
loss: 0.9982856512069702,grad_norm: 0.9999990703488142, iteration: 152904
loss: 1.0113799571990967,grad_norm: 0.9999991515579265, iteration: 152905
loss: 0.9672476649284363,grad_norm: 0.9999990226024166, iteration: 152906
loss: 0.9853297472000122,grad_norm: 0.8154591259138808, iteration: 152907
loss: 1.0293428897857666,grad_norm: 0.9999990702246803, iteration: 152908
loss: 1.0263367891311646,grad_norm: 0.9247599659626474, iteration: 152909
loss: 1.016493558883667,grad_norm: 0.9419896682452246, iteration: 152910
loss: 1.0021605491638184,grad_norm: 0.9999990816718357, iteration: 152911
loss: 0.9649193286895752,grad_norm: 0.9999990593103092, iteration: 152912
loss: 1.0069193840026855,grad_norm: 0.9999990557785119, iteration: 152913
loss: 0.9925898313522339,grad_norm: 0.8536096547209935, iteration: 152914
loss: 1.002001166343689,grad_norm: 0.8230491808574789, iteration: 152915
loss: 0.9993800520896912,grad_norm: 0.9999992180737708, iteration: 152916
loss: 1.0071359872817993,grad_norm: 0.9999993004647083, iteration: 152917
loss: 1.005409598350525,grad_norm: 0.9999990238729157, iteration: 152918
loss: 1.0149341821670532,grad_norm: 0.9844150550910228, iteration: 152919
loss: 1.0233904123306274,grad_norm: 0.9999997279891745, iteration: 152920
loss: 1.0290356874465942,grad_norm: 0.9999991420626314, iteration: 152921
loss: 0.9812634587287903,grad_norm: 0.9419474973357274, iteration: 152922
loss: 1.0218690633773804,grad_norm: 0.9999989902590233, iteration: 152923
loss: 0.9870740175247192,grad_norm: 0.9012545158445232, iteration: 152924
loss: 1.0248931646347046,grad_norm: 0.9999992095401986, iteration: 152925
loss: 1.0436452627182007,grad_norm: 0.9999993692587051, iteration: 152926
loss: 0.9959403276443481,grad_norm: 0.9999990742958478, iteration: 152927
loss: 1.0044560432434082,grad_norm: 0.9230491605886291, iteration: 152928
loss: 1.011305570602417,grad_norm: 0.9425667199296184, iteration: 152929
loss: 1.0001276731491089,grad_norm: 0.9985492316061717, iteration: 152930
loss: 0.967420756816864,grad_norm: 0.9999989152437342, iteration: 152931
loss: 1.0020924806594849,grad_norm: 0.8601094011106855, iteration: 152932
loss: 0.9720328450202942,grad_norm: 0.999999093167434, iteration: 152933
loss: 0.988619327545166,grad_norm: 0.9999996104076568, iteration: 152934
loss: 0.9908814430236816,grad_norm: 0.9999991301154949, iteration: 152935
loss: 1.0160737037658691,grad_norm: 0.8465650395240244, iteration: 152936
loss: 0.9791363477706909,grad_norm: 0.9999991330676553, iteration: 152937
loss: 1.0050106048583984,grad_norm: 0.9643102088567276, iteration: 152938
loss: 1.0130219459533691,grad_norm: 0.8729575392396131, iteration: 152939
loss: 0.9810736179351807,grad_norm: 0.9838360669082175, iteration: 152940
loss: 1.0266870260238647,grad_norm: 0.9469458734745515, iteration: 152941
loss: 0.9817765951156616,grad_norm: 0.9999991736544968, iteration: 152942
loss: 1.0025869607925415,grad_norm: 0.9881516359933054, iteration: 152943
loss: 1.0515713691711426,grad_norm: 0.9999990774071007, iteration: 152944
loss: 1.0388333797454834,grad_norm: 0.9999994001016866, iteration: 152945
loss: 1.0123093128204346,grad_norm: 0.9999991324635582, iteration: 152946
loss: 0.9996308088302612,grad_norm: 0.9999990120682158, iteration: 152947
loss: 1.0258479118347168,grad_norm: 0.9999995446687961, iteration: 152948
loss: 0.9778581261634827,grad_norm: 0.922429313546468, iteration: 152949
loss: 0.9784027338027954,grad_norm: 0.9490440155459139, iteration: 152950
loss: 1.0041934251785278,grad_norm: 0.9454724795850379, iteration: 152951
loss: 0.9886723756790161,grad_norm: 0.9189506562083753, iteration: 152952
loss: 1.0215003490447998,grad_norm: 0.9999995399744024, iteration: 152953
loss: 0.9880626797676086,grad_norm: 0.9745476721138239, iteration: 152954
loss: 1.0015815496444702,grad_norm: 0.9749702132431276, iteration: 152955
loss: 0.9936078786849976,grad_norm: 0.9999991827741704, iteration: 152956
loss: 1.0434142351150513,grad_norm: 0.9999989541994669, iteration: 152957
loss: 0.9638608694076538,grad_norm: 0.9999991250773554, iteration: 152958
loss: 1.0068042278289795,grad_norm: 0.9999991426874854, iteration: 152959
loss: 0.9905322790145874,grad_norm: 0.9999992906237706, iteration: 152960
loss: 1.0175504684448242,grad_norm: 0.8868331932326299, iteration: 152961
loss: 0.9884336590766907,grad_norm: 0.95354825755089, iteration: 152962
loss: 1.0094577074050903,grad_norm: 0.9999992660696366, iteration: 152963
loss: 1.0112642049789429,grad_norm: 0.999999174631124, iteration: 152964
loss: 1.0089309215545654,grad_norm: 0.9345027869631684, iteration: 152965
loss: 0.9651022553443909,grad_norm: 0.9999992384142841, iteration: 152966
loss: 0.9873647689819336,grad_norm: 0.9999990307486919, iteration: 152967
loss: 0.9966042637825012,grad_norm: 0.9999990394574163, iteration: 152968
loss: 1.0102572441101074,grad_norm: 0.999999095197917, iteration: 152969
loss: 1.0142300128936768,grad_norm: 0.8819660149070426, iteration: 152970
loss: 1.0063947439193726,grad_norm: 0.9999991392802438, iteration: 152971
loss: 1.0160601139068604,grad_norm: 0.9999996345265547, iteration: 152972
loss: 1.0738542079925537,grad_norm: 0.9044489007215734, iteration: 152973
loss: 0.9887262582778931,grad_norm: 0.9044667475939087, iteration: 152974
loss: 1.0480419397354126,grad_norm: 0.8958058497624101, iteration: 152975
loss: 1.0013186931610107,grad_norm: 0.956163640824855, iteration: 152976
loss: 1.008981466293335,grad_norm: 0.914375394412125, iteration: 152977
loss: 1.0201412439346313,grad_norm: 0.9838952017655395, iteration: 152978
loss: 0.9728184938430786,grad_norm: 0.909049971959291, iteration: 152979
loss: 0.9773731231689453,grad_norm: 0.999999284622101, iteration: 152980
loss: 0.9967244267463684,grad_norm: 0.9999991171594581, iteration: 152981
loss: 1.0659135580062866,grad_norm: 0.9999990532430757, iteration: 152982
loss: 0.9895201921463013,grad_norm: 0.945623540110795, iteration: 152983
loss: 0.9888172745704651,grad_norm: 0.9894755010628048, iteration: 152984
loss: 1.0067983865737915,grad_norm: 0.9999991196799045, iteration: 152985
loss: 0.9863309860229492,grad_norm: 0.9314170058604299, iteration: 152986
loss: 0.9985118508338928,grad_norm: 0.9405469381126025, iteration: 152987
loss: 0.9770848155021667,grad_norm: 0.9999990604472645, iteration: 152988
loss: 0.9958099722862244,grad_norm: 0.999999136370415, iteration: 152989
loss: 1.105082631111145,grad_norm: 0.999999659664012, iteration: 152990
loss: 1.0046414136886597,grad_norm: 0.9999990355161942, iteration: 152991
loss: 0.9755051136016846,grad_norm: 0.9999991450002119, iteration: 152992
loss: 1.0286810398101807,grad_norm: 0.9999992140618904, iteration: 152993
loss: 1.0326350927352905,grad_norm: 0.9999990981533295, iteration: 152994
loss: 1.007424235343933,grad_norm: 0.8459686089147629, iteration: 152995
loss: 0.9799739122390747,grad_norm: 0.8981369834857666, iteration: 152996
loss: 1.0097615718841553,grad_norm: 0.8476809233369992, iteration: 152997
loss: 0.9831039309501648,grad_norm: 0.9820966047245406, iteration: 152998
loss: 0.981293797492981,grad_norm: 0.9999990912303852, iteration: 152999
loss: 1.0188292264938354,grad_norm: 0.8086049791065479, iteration: 153000
loss: 1.009164571762085,grad_norm: 0.9999992666188177, iteration: 153001
loss: 1.001589059829712,grad_norm: 0.966495277298338, iteration: 153002
loss: 0.9570602178573608,grad_norm: 0.9999990026307574, iteration: 153003
loss: 1.052811622619629,grad_norm: 0.9681365317258751, iteration: 153004
loss: 1.010209083557129,grad_norm: 0.945736121739998, iteration: 153005
loss: 0.9753136038780212,grad_norm: 0.9166663125855586, iteration: 153006
loss: 1.0054923295974731,grad_norm: 0.932557527287954, iteration: 153007
loss: 0.9957308173179626,grad_norm: 0.9999990325994345, iteration: 153008
loss: 1.0480568408966064,grad_norm: 0.9740454086737922, iteration: 153009
loss: 0.9992552995681763,grad_norm: 0.9383080188078916, iteration: 153010
loss: 1.025735855102539,grad_norm: 0.9999992944807189, iteration: 153011
loss: 1.0045756101608276,grad_norm: 0.9582157648257412, iteration: 153012
loss: 1.0488277673721313,grad_norm: 0.9999993284455271, iteration: 153013
loss: 1.0227835178375244,grad_norm: 0.9999991258570384, iteration: 153014
loss: 0.9950065612792969,grad_norm: 0.9440642533790763, iteration: 153015
loss: 0.974865734577179,grad_norm: 0.8519943376200787, iteration: 153016
loss: 1.0245099067687988,grad_norm: 0.8082827749177471, iteration: 153017
loss: 0.9871861338615417,grad_norm: 0.9452113955985139, iteration: 153018
loss: 1.0007842779159546,grad_norm: 0.9912227128799495, iteration: 153019
loss: 0.9691869020462036,grad_norm: 0.9549735341398455, iteration: 153020
loss: 0.9654485583305359,grad_norm: 0.9813290420755738, iteration: 153021
loss: 1.0077341794967651,grad_norm: 0.769988460969416, iteration: 153022
loss: 1.0137165784835815,grad_norm: 0.9999991365180014, iteration: 153023
loss: 0.993218183517456,grad_norm: 0.9999991639233556, iteration: 153024
loss: 1.0011568069458008,grad_norm: 0.9697090761619946, iteration: 153025
loss: 1.0102434158325195,grad_norm: 0.9343887159192209, iteration: 153026
loss: 1.0050368309020996,grad_norm: 0.8104710142063073, iteration: 153027
loss: 0.9942581653594971,grad_norm: 0.99999912404869, iteration: 153028
loss: 1.006789207458496,grad_norm: 0.8770779914718325, iteration: 153029
loss: 0.9810991883277893,grad_norm: 0.9999991475657497, iteration: 153030
loss: 1.0400364398956299,grad_norm: 0.9999990690778933, iteration: 153031
loss: 1.0156731605529785,grad_norm: 0.9662144929336993, iteration: 153032
loss: 0.9879614114761353,grad_norm: 0.9999991262383023, iteration: 153033
loss: 0.9859579801559448,grad_norm: 0.9999993487297522, iteration: 153034
loss: 1.0025936365127563,grad_norm: 0.9814236104011157, iteration: 153035
loss: 0.9810521602630615,grad_norm: 0.8760483907067539, iteration: 153036
loss: 1.1282026767730713,grad_norm: 0.999999512379626, iteration: 153037
loss: 0.9736802577972412,grad_norm: 0.9999990690288346, iteration: 153038
loss: 0.9844595789909363,grad_norm: 0.9790777958765696, iteration: 153039
loss: 1.020710825920105,grad_norm: 0.9999990338957588, iteration: 153040
loss: 0.9909283518791199,grad_norm: 0.9999992619008684, iteration: 153041
loss: 1.0277706384658813,grad_norm: 0.9683154649893518, iteration: 153042
loss: 1.0160489082336426,grad_norm: 0.9999993323409736, iteration: 153043
loss: 1.004784107208252,grad_norm: 0.914456589294458, iteration: 153044
loss: 1.0314265489578247,grad_norm: 0.8636492875448393, iteration: 153045
loss: 1.0030335187911987,grad_norm: 0.9999990754262483, iteration: 153046
loss: 0.9876804947853088,grad_norm: 0.9960234803949309, iteration: 153047
loss: 0.9682251811027527,grad_norm: 0.9999991730637955, iteration: 153048
loss: 1.0053695440292358,grad_norm: 0.9498121209660214, iteration: 153049
loss: 0.9909117221832275,grad_norm: 0.9999991570783585, iteration: 153050
loss: 0.99187171459198,grad_norm: 0.8852535740843225, iteration: 153051
loss: 1.0681043863296509,grad_norm: 0.9999990490113719, iteration: 153052
loss: 1.020247459411621,grad_norm: 0.9900437917345287, iteration: 153053
loss: 1.002750277519226,grad_norm: 0.9999990632762871, iteration: 153054
loss: 1.0241622924804688,grad_norm: 0.9887138697450819, iteration: 153055
loss: 1.0116292238235474,grad_norm: 0.8852821236870702, iteration: 153056
loss: 0.9955283999443054,grad_norm: 0.9999992628164185, iteration: 153057
loss: 1.0102957487106323,grad_norm: 0.9999991764428051, iteration: 153058
loss: 0.9976035356521606,grad_norm: 0.9999992988169311, iteration: 153059
loss: 1.0087831020355225,grad_norm: 0.9406652645898052, iteration: 153060
loss: 0.9833917617797852,grad_norm: 0.9176552433863305, iteration: 153061
loss: 0.9864416122436523,grad_norm: 0.9999991408910988, iteration: 153062
loss: 0.9966686964035034,grad_norm: 0.999999153124144, iteration: 153063
loss: 0.9923490881919861,grad_norm: 0.9999992138007527, iteration: 153064
loss: 1.0080162286758423,grad_norm: 0.999999282619509, iteration: 153065
loss: 0.9952540993690491,grad_norm: 0.9999992999031395, iteration: 153066
loss: 0.9901973009109497,grad_norm: 0.9166250550596751, iteration: 153067
loss: 0.9725891947746277,grad_norm: 0.9999991463362061, iteration: 153068
loss: 0.9786096215248108,grad_norm: 0.9858806068823551, iteration: 153069
loss: 0.980611264705658,grad_norm: 0.9082859464235343, iteration: 153070
loss: 1.0381641387939453,grad_norm: 0.9999990447354865, iteration: 153071
loss: 1.0057942867279053,grad_norm: 0.9999991855536337, iteration: 153072
loss: 1.0046299695968628,grad_norm: 0.9999997511642653, iteration: 153073
loss: 1.0198278427124023,grad_norm: 0.9999992468047437, iteration: 153074
loss: 0.9856677651405334,grad_norm: 0.9999991343044105, iteration: 153075
loss: 1.0749812126159668,grad_norm: 0.9999994527291216, iteration: 153076
loss: 0.9865761995315552,grad_norm: 0.9999991589998696, iteration: 153077
loss: 1.0034284591674805,grad_norm: 0.9999992150035231, iteration: 153078
loss: 1.0119085311889648,grad_norm: 0.9471917696718206, iteration: 153079
loss: 0.9870235919952393,grad_norm: 0.9999990370366055, iteration: 153080
loss: 1.0181299448013306,grad_norm: 0.9999990723099218, iteration: 153081
loss: 1.009318470954895,grad_norm: 0.9869142914251965, iteration: 153082
loss: 0.983053982257843,grad_norm: 0.9999991506404141, iteration: 153083
loss: 0.977978527545929,grad_norm: 0.9999997524258877, iteration: 153084
loss: 0.9525759816169739,grad_norm: 0.9999990844184623, iteration: 153085
loss: 1.0414645671844482,grad_norm: 0.9999992379796242, iteration: 153086
loss: 1.0255441665649414,grad_norm: 0.9999993849583114, iteration: 153087
loss: 1.0091664791107178,grad_norm: 0.9715684566175397, iteration: 153088
loss: 0.9883788824081421,grad_norm: 0.9025129283541321, iteration: 153089
loss: 1.021327018737793,grad_norm: 0.9826666821347638, iteration: 153090
loss: 0.9930891394615173,grad_norm: 0.9581323892008452, iteration: 153091
loss: 0.9940725564956665,grad_norm: 0.9999992626949957, iteration: 153092
loss: 1.01301109790802,grad_norm: 0.999999086671793, iteration: 153093
loss: 1.02162504196167,grad_norm: 0.9999993253893114, iteration: 153094
loss: 0.9593741297721863,grad_norm: 0.8307252573376377, iteration: 153095
loss: 1.0015312433242798,grad_norm: 0.8953690386025637, iteration: 153096
loss: 1.0305795669555664,grad_norm: 0.9999998436760925, iteration: 153097
loss: 1.016653299331665,grad_norm: 0.9050739361677458, iteration: 153098
loss: 1.0045713186264038,grad_norm: 0.9999991706100501, iteration: 153099
loss: 1.0126426219940186,grad_norm: 0.8561574137475375, iteration: 153100
loss: 0.9835253953933716,grad_norm: 0.9999991344286397, iteration: 153101
loss: 0.9432681202888489,grad_norm: 0.9999991632933732, iteration: 153102
loss: 1.0503801107406616,grad_norm: 0.9999990670839338, iteration: 153103
loss: 0.9929361343383789,grad_norm: 0.8590435320795526, iteration: 153104
loss: 1.0664607286453247,grad_norm: 0.9999994723245319, iteration: 153105
loss: 1.021041989326477,grad_norm: 0.9295816658472273, iteration: 153106
loss: 1.0525299310684204,grad_norm: 0.9999992752201825, iteration: 153107
loss: 0.9768440127372742,grad_norm: 0.998949214653057, iteration: 153108
loss: 0.9891408681869507,grad_norm: 0.9526205593426205, iteration: 153109
loss: 0.9903654456138611,grad_norm: 0.9157450342435183, iteration: 153110
loss: 1.0190743207931519,grad_norm: 0.9999991554516203, iteration: 153111
loss: 0.9948943257331848,grad_norm: 0.9999991686878986, iteration: 153112
loss: 1.0254648923873901,grad_norm: 0.9999990991527419, iteration: 153113
loss: 1.0234873294830322,grad_norm: 0.9999990654075122, iteration: 153114
loss: 1.0349940061569214,grad_norm: 0.9999992647699308, iteration: 153115
loss: 0.9789500832557678,grad_norm: 0.9999992005213562, iteration: 153116
loss: 1.0053961277008057,grad_norm: 0.9999990945216091, iteration: 153117
loss: 1.001414179801941,grad_norm: 0.999999219725741, iteration: 153118
loss: 1.0223495960235596,grad_norm: 0.9659482547894844, iteration: 153119
loss: 1.060577392578125,grad_norm: 0.9999996054288853, iteration: 153120
loss: 0.9879441857337952,grad_norm: 0.8775442976502086, iteration: 153121
loss: 0.998938798904419,grad_norm: 0.9999994669721146, iteration: 153122
loss: 1.0637761354446411,grad_norm: 0.9999992365270439, iteration: 153123
loss: 1.0211273431777954,grad_norm: 0.943050847228237, iteration: 153124
loss: 1.014275074005127,grad_norm: 0.9999990831927634, iteration: 153125
loss: 1.0060077905654907,grad_norm: 0.999999250306969, iteration: 153126
loss: 1.0163869857788086,grad_norm: 0.9999991948903312, iteration: 153127
loss: 1.024107575416565,grad_norm: 0.8292052016032441, iteration: 153128
loss: 0.9610791206359863,grad_norm: 0.8923650056117822, iteration: 153129
loss: 0.9778825044631958,grad_norm: 0.9999991343102125, iteration: 153130
loss: 1.023612141609192,grad_norm: 0.9454724701100978, iteration: 153131
loss: 1.0362015962600708,grad_norm: 0.9541858954621697, iteration: 153132
loss: 0.9824650883674622,grad_norm: 0.9733840403174074, iteration: 153133
loss: 1.0022865533828735,grad_norm: 0.9851603861077507, iteration: 153134
loss: 1.0444036722183228,grad_norm: 0.9999996955233547, iteration: 153135
loss: 1.193085789680481,grad_norm: 0.9948545252695895, iteration: 153136
loss: 1.0175241231918335,grad_norm: 0.9767281213974996, iteration: 153137
loss: 0.9658946990966797,grad_norm: 0.9396690116907793, iteration: 153138
loss: 0.988548219203949,grad_norm: 0.9862136477871299, iteration: 153139
loss: 0.9750298261642456,grad_norm: 0.9834887929357037, iteration: 153140
loss: 0.980469286441803,grad_norm: 0.99999905573543, iteration: 153141
loss: 0.9927220940589905,grad_norm: 0.9999989796837001, iteration: 153142
loss: 1.0046138763427734,grad_norm: 0.9398640522193218, iteration: 153143
loss: 1.0001174211502075,grad_norm: 0.9999990256781444, iteration: 153144
loss: 1.053559422492981,grad_norm: 0.9999990968545377, iteration: 153145
loss: 0.9972241520881653,grad_norm: 0.951335444116182, iteration: 153146
loss: 0.985542893409729,grad_norm: 0.9999990995555453, iteration: 153147
loss: 1.0198291540145874,grad_norm: 0.8657914187166781, iteration: 153148
loss: 0.9857292771339417,grad_norm: 0.9603566665363956, iteration: 153149
loss: 1.0969117879867554,grad_norm: 0.9999994154090348, iteration: 153150
loss: 1.0046188831329346,grad_norm: 0.9715204695657436, iteration: 153151
loss: 0.9888009428977966,grad_norm: 0.9999990944158129, iteration: 153152
loss: 0.9838238954544067,grad_norm: 0.9863990160105599, iteration: 153153
loss: 1.0134133100509644,grad_norm: 0.9379000637041258, iteration: 153154
loss: 0.9786455035209656,grad_norm: 0.9999990261446309, iteration: 153155
loss: 1.0057499408721924,grad_norm: 0.9058856189529816, iteration: 153156
loss: 0.9856614470481873,grad_norm: 0.9569234278738683, iteration: 153157
loss: 1.0102250576019287,grad_norm: 0.9387699946597885, iteration: 153158
loss: 1.0030847787857056,grad_norm: 0.9999991908738679, iteration: 153159
loss: 0.9928653240203857,grad_norm: 0.9999990986587488, iteration: 153160
loss: 1.0295549631118774,grad_norm: 0.980213997767878, iteration: 153161
loss: 1.0291694402694702,grad_norm: 0.8629331875060261, iteration: 153162
loss: 0.9652931690216064,grad_norm: 0.9999990652356191, iteration: 153163
loss: 0.9867428541183472,grad_norm: 0.9999991244741403, iteration: 153164
loss: 1.03164803981781,grad_norm: 0.9999990650160256, iteration: 153165
loss: 1.0252094268798828,grad_norm: 0.9111031017957284, iteration: 153166
loss: 1.0187431573867798,grad_norm: 0.9999992572779266, iteration: 153167
loss: 1.0159029960632324,grad_norm: 0.9999991553377556, iteration: 153168
loss: 1.007292628288269,grad_norm: 0.9999990360097815, iteration: 153169
loss: 1.027907371520996,grad_norm: 0.9999995417841743, iteration: 153170
loss: 0.9947015047073364,grad_norm: 0.9734815610124531, iteration: 153171
loss: 0.997934103012085,grad_norm: 0.9999991673561824, iteration: 153172
loss: 1.0074647665023804,grad_norm: 0.8484840054064676, iteration: 153173
loss: 1.0408625602722168,grad_norm: 0.9999990880085718, iteration: 153174
loss: 0.9939639568328857,grad_norm: 0.9999993179043754, iteration: 153175
loss: 0.9923723936080933,grad_norm: 0.9999991296545976, iteration: 153176
loss: 1.0251215696334839,grad_norm: 0.9999992843609349, iteration: 153177
loss: 1.0295201539993286,grad_norm: 0.9796855500298846, iteration: 153178
loss: 0.9760556221008301,grad_norm: 0.9999990644390654, iteration: 153179
loss: 1.0043295621871948,grad_norm: 0.9462593876491429, iteration: 153180
loss: 1.0757089853286743,grad_norm: 0.999999825095786, iteration: 153181
loss: 0.989159882068634,grad_norm: 0.9999990327967639, iteration: 153182
loss: 1.0060142278671265,grad_norm: 0.9999991219368816, iteration: 153183
loss: 1.0096135139465332,grad_norm: 0.9999991359124034, iteration: 153184
loss: 0.9883878231048584,grad_norm: 0.9999991807162583, iteration: 153185
loss: 1.004574179649353,grad_norm: 0.9708000916824668, iteration: 153186
loss: 1.0092732906341553,grad_norm: 0.9999991466652725, iteration: 153187
loss: 0.9892610907554626,grad_norm: 0.9999991527851935, iteration: 153188
loss: 1.0165719985961914,grad_norm: 0.7742007580296347, iteration: 153189
loss: 0.9804746508598328,grad_norm: 0.9999992462251514, iteration: 153190
loss: 0.9674715399742126,grad_norm: 0.8324466188083243, iteration: 153191
loss: 1.0176512002944946,grad_norm: 0.9999989778913914, iteration: 153192
loss: 1.0828814506530762,grad_norm: 0.9999997753868598, iteration: 153193
loss: 1.0202579498291016,grad_norm: 0.999999615032802, iteration: 153194
loss: 0.9558693766593933,grad_norm: 0.9782893269479618, iteration: 153195
loss: 0.9848013520240784,grad_norm: 0.9262030356824307, iteration: 153196
loss: 1.0523743629455566,grad_norm: 0.9999991987964821, iteration: 153197
loss: 0.9788854718208313,grad_norm: 0.9999991298041845, iteration: 153198
loss: 0.9802228212356567,grad_norm: 0.9999991048900562, iteration: 153199
loss: 0.98195880651474,grad_norm: 0.9999990669939322, iteration: 153200
loss: 0.9910280704498291,grad_norm: 0.8468905173696298, iteration: 153201
loss: 0.9916715025901794,grad_norm: 0.9999989618369568, iteration: 153202
loss: 1.023758053779602,grad_norm: 0.8327803342488406, iteration: 153203
loss: 0.9896564483642578,grad_norm: 0.9999998131734502, iteration: 153204
loss: 1.0177550315856934,grad_norm: 0.999999127636276, iteration: 153205
loss: 0.9661368727684021,grad_norm: 0.9640605347493383, iteration: 153206
loss: 0.989288330078125,grad_norm: 0.9999992891353855, iteration: 153207
loss: 1.0068929195404053,grad_norm: 0.9204531308728795, iteration: 153208
loss: 1.021507978439331,grad_norm: 0.999999157715896, iteration: 153209
loss: 1.0047175884246826,grad_norm: 0.9240960198964622, iteration: 153210
loss: 1.0620228052139282,grad_norm: 0.9999995428792416, iteration: 153211
loss: 1.0647364854812622,grad_norm: 0.9782133929197522, iteration: 153212
loss: 0.9806937575340271,grad_norm: 0.9999993406741317, iteration: 153213
loss: 1.0284851789474487,grad_norm: 0.9999991638854074, iteration: 153214
loss: 0.9833229184150696,grad_norm: 0.8880251167828693, iteration: 153215
loss: 1.0200971364974976,grad_norm: 0.8719635372342938, iteration: 153216
loss: 1.0275142192840576,grad_norm: 0.9999990277782399, iteration: 153217
loss: 1.0015414953231812,grad_norm: 0.8845341479321662, iteration: 153218
loss: 1.0067343711853027,grad_norm: 0.9389991949690722, iteration: 153219
loss: 1.0121955871582031,grad_norm: 0.9999991631786083, iteration: 153220
loss: 1.0070439577102661,grad_norm: 0.7865797744186773, iteration: 153221
loss: 1.0336214303970337,grad_norm: 0.9999991023632707, iteration: 153222
loss: 1.0160654783248901,grad_norm: 0.9294282073481804, iteration: 153223
loss: 1.0293065309524536,grad_norm: 0.999999260476549, iteration: 153224
loss: 0.9542434215545654,grad_norm: 0.9945844731665503, iteration: 153225
loss: 1.0089256763458252,grad_norm: 0.9999990967472037, iteration: 153226
loss: 0.9999470710754395,grad_norm: 0.9999990188461219, iteration: 153227
loss: 1.0036406517028809,grad_norm: 0.9850755515189004, iteration: 153228
loss: 1.0061242580413818,grad_norm: 0.8435527475172911, iteration: 153229
loss: 1.021873950958252,grad_norm: 0.9291857811971396, iteration: 153230
loss: 1.030073642730713,grad_norm: 0.9999990300288519, iteration: 153231
loss: 1.0236082077026367,grad_norm: 0.9243679906396619, iteration: 153232
loss: 1.0045963525772095,grad_norm: 0.854453145278966, iteration: 153233
loss: 0.9997648596763611,grad_norm: 0.9999992875723819, iteration: 153234
loss: 0.9894976615905762,grad_norm: 0.9999992726063556, iteration: 153235
loss: 0.9919778108596802,grad_norm: 0.9999991038662588, iteration: 153236
loss: 0.9996400475502014,grad_norm: 0.9999999073857587, iteration: 153237
loss: 1.0246479511260986,grad_norm: 0.9999990685401376, iteration: 153238
loss: 1.0107768774032593,grad_norm: 0.9999991119703405, iteration: 153239
loss: 1.0227807760238647,grad_norm: 0.8977829291260409, iteration: 153240
loss: 0.9645677804946899,grad_norm: 0.9993594123356603, iteration: 153241
loss: 1.029781460762024,grad_norm: 0.9999993938167806, iteration: 153242
loss: 0.9986826777458191,grad_norm: 0.9999990130299746, iteration: 153243
loss: 0.9940343499183655,grad_norm: 0.9999991559935814, iteration: 153244
loss: 1.023544430732727,grad_norm: 0.9653989505464026, iteration: 153245
loss: 1.01290762424469,grad_norm: 0.7684380835683907, iteration: 153246
loss: 1.023332953453064,grad_norm: 0.9702594838821084, iteration: 153247
loss: 1.0230897665023804,grad_norm: 0.8562474623782061, iteration: 153248
loss: 1.0283185243606567,grad_norm: 0.9999990678440084, iteration: 153249
loss: 1.0014609098434448,grad_norm: 0.8653094591251448, iteration: 153250
loss: 1.001646637916565,grad_norm: 0.9999993638499669, iteration: 153251
loss: 1.002853512763977,grad_norm: 0.920654228440438, iteration: 153252
loss: 0.9785072207450867,grad_norm: 0.9984276302677385, iteration: 153253
loss: 1.0288071632385254,grad_norm: 0.9999990766313083, iteration: 153254
loss: 1.012858271598816,grad_norm: 0.9320479343292036, iteration: 153255
loss: 1.0015324354171753,grad_norm: 0.9420192896763914, iteration: 153256
loss: 1.018904447555542,grad_norm: 0.9999991148008147, iteration: 153257
loss: 1.0080031156539917,grad_norm: 0.9999990949110796, iteration: 153258
loss: 1.0098271369934082,grad_norm: 0.8841857424982087, iteration: 153259
loss: 1.0084069967269897,grad_norm: 0.9999990644732307, iteration: 153260
loss: 1.0091924667358398,grad_norm: 0.7971100734554685, iteration: 153261
loss: 0.9968163371086121,grad_norm: 0.9999992215628709, iteration: 153262
loss: 0.979397177696228,grad_norm: 0.9435466031438616, iteration: 153263
loss: 1.0248774290084839,grad_norm: 0.9999990389283482, iteration: 153264
loss: 0.9759567379951477,grad_norm: 0.9999990366006395, iteration: 153265
loss: 0.9776754975318909,grad_norm: 0.9999989697404367, iteration: 153266
loss: 1.0277283191680908,grad_norm: 0.9236039919321914, iteration: 153267
loss: 1.0049614906311035,grad_norm: 0.8649753217265885, iteration: 153268
loss: 0.9929037690162659,grad_norm: 0.8799269517041394, iteration: 153269
loss: 1.0308102369308472,grad_norm: 0.999999105116116, iteration: 153270
loss: 0.9945694208145142,grad_norm: 0.99999923102299, iteration: 153271
loss: 1.0085864067077637,grad_norm: 0.9999997391783784, iteration: 153272
loss: 0.9994143843650818,grad_norm: 0.9999991348663785, iteration: 153273
loss: 1.0094529390335083,grad_norm: 0.9917464562480567, iteration: 153274
loss: 1.0255292654037476,grad_norm: 0.9999992460861848, iteration: 153275
loss: 1.0121315717697144,grad_norm: 0.9999991748870368, iteration: 153276
loss: 0.9712265133857727,grad_norm: 0.9808306919429505, iteration: 153277
loss: 0.9616483449935913,grad_norm: 0.9023551575214999, iteration: 153278
loss: 1.0087324380874634,grad_norm: 0.9999990449113204, iteration: 153279
loss: 0.9790877103805542,grad_norm: 0.9618500167937684, iteration: 153280
loss: 1.0170108079910278,grad_norm: 0.9999989770006228, iteration: 153281
loss: 1.0670121908187866,grad_norm: 0.999999763551397, iteration: 153282
loss: 1.0176761150360107,grad_norm: 0.9999991979027814, iteration: 153283
loss: 1.0097695589065552,grad_norm: 0.9999989653692057, iteration: 153284
loss: 1.0027952194213867,grad_norm: 0.9999990386624851, iteration: 153285
loss: 1.0028855800628662,grad_norm: 0.9429192109462813, iteration: 153286
loss: 1.0080373287200928,grad_norm: 0.9551550198025247, iteration: 153287
loss: 0.9694013595581055,grad_norm: 0.7853233826877642, iteration: 153288
loss: 0.9965914487838745,grad_norm: 0.9917030136202412, iteration: 153289
loss: 0.9984530210494995,grad_norm: 0.9999990062192414, iteration: 153290
loss: 0.963786780834198,grad_norm: 0.8525935203466322, iteration: 153291
loss: 1.008615493774414,grad_norm: 0.9190731320500912, iteration: 153292
loss: 0.9910660982131958,grad_norm: 0.9999991382102353, iteration: 153293
loss: 1.004641056060791,grad_norm: 0.9999991355802829, iteration: 153294
loss: 1.0179321765899658,grad_norm: 0.9999992029658975, iteration: 153295
loss: 0.9792481064796448,grad_norm: 0.8917047752123523, iteration: 153296
loss: 0.9941762685775757,grad_norm: 0.9999990129530032, iteration: 153297
loss: 1.0084935426712036,grad_norm: 0.9999993089364079, iteration: 153298
loss: 0.9926892518997192,grad_norm: 0.901553405480819, iteration: 153299
loss: 1.011795163154602,grad_norm: 0.9999992739407793, iteration: 153300
loss: 0.9974937438964844,grad_norm: 0.839051405142288, iteration: 153301
loss: 1.0008200407028198,grad_norm: 0.9814857769234085, iteration: 153302
loss: 1.0208107233047485,grad_norm: 0.999999150809399, iteration: 153303
loss: 1.1219624280929565,grad_norm: 0.9999997089296867, iteration: 153304
loss: 0.9947345852851868,grad_norm: 0.9999992450818458, iteration: 153305
loss: 1.0009492635726929,grad_norm: 0.9939933376773358, iteration: 153306
loss: 1.0092203617095947,grad_norm: 0.999999071896838, iteration: 153307
loss: 0.9781655073165894,grad_norm: 0.9999991872255865, iteration: 153308
loss: 1.002447485923767,grad_norm: 0.8763430897796035, iteration: 153309
loss: 0.9978451132774353,grad_norm: 0.8488502583203834, iteration: 153310
loss: 0.9827843904495239,grad_norm: 0.9999991260956675, iteration: 153311
loss: 1.0219204425811768,grad_norm: 0.9999993170038509, iteration: 153312
loss: 0.9799575805664062,grad_norm: 0.9999992024169294, iteration: 153313
loss: 1.0098930597305298,grad_norm: 0.8298746464853547, iteration: 153314
loss: 1.0128320455551147,grad_norm: 0.9993179135119639, iteration: 153315
loss: 0.9870472550392151,grad_norm: 0.8603897487975041, iteration: 153316
loss: 0.9952998161315918,grad_norm: 0.9974895688647889, iteration: 153317
loss: 0.9959739446640015,grad_norm: 0.9999991352462949, iteration: 153318
loss: 0.9955419301986694,grad_norm: 0.999999288750585, iteration: 153319
loss: 1.0078539848327637,grad_norm: 0.9557630644494, iteration: 153320
loss: 0.9892682433128357,grad_norm: 0.892861419585403, iteration: 153321
loss: 1.0048372745513916,grad_norm: 0.9085130427209309, iteration: 153322
loss: 0.9985440969467163,grad_norm: 0.9999989965932763, iteration: 153323
loss: 0.9922245144844055,grad_norm: 0.9999991577911108, iteration: 153324
loss: 0.987849771976471,grad_norm: 0.9619946538416486, iteration: 153325
loss: 1.024454116821289,grad_norm: 0.9999991635118646, iteration: 153326
loss: 0.9807729125022888,grad_norm: 0.7873673436989813, iteration: 153327
loss: 1.0012279748916626,grad_norm: 0.7499963378554229, iteration: 153328
loss: 0.996849000453949,grad_norm: 0.854603111749126, iteration: 153329
loss: 1.009631872177124,grad_norm: 0.9999991673864813, iteration: 153330
loss: 0.977543294429779,grad_norm: 0.9715379452635111, iteration: 153331
loss: 1.018204689025879,grad_norm: 0.9705414991414809, iteration: 153332
loss: 0.9746732711791992,grad_norm: 0.9397749604542919, iteration: 153333
loss: 0.9725230932235718,grad_norm: 0.9999991841289544, iteration: 153334
loss: 0.995121419429779,grad_norm: 0.9999991080836722, iteration: 153335
loss: 1.0078741312026978,grad_norm: 0.9999991948935057, iteration: 153336
loss: 1.0478936433792114,grad_norm: 0.9999992082215827, iteration: 153337
loss: 1.0215061902999878,grad_norm: 0.9435660114470125, iteration: 153338
loss: 0.9954068064689636,grad_norm: 0.9032363210894212, iteration: 153339
loss: 1.029977798461914,grad_norm: 0.9999990708145622, iteration: 153340
loss: 0.955698549747467,grad_norm: 0.9999992653998276, iteration: 153341
loss: 0.9974668025970459,grad_norm: 0.9999992549770839, iteration: 153342
loss: 0.9959613084793091,grad_norm: 0.9925911849706484, iteration: 153343
loss: 0.9925130605697632,grad_norm: 0.9999989924532787, iteration: 153344
loss: 0.971187174320221,grad_norm: 0.8997808693096286, iteration: 153345
loss: 0.9907194972038269,grad_norm: 0.9999992202149086, iteration: 153346
loss: 1.0044692754745483,grad_norm: 0.9999990358420586, iteration: 153347
loss: 0.9992443323135376,grad_norm: 0.9559245681581818, iteration: 153348
loss: 1.0202534198760986,grad_norm: 0.9683092919556153, iteration: 153349
loss: 1.0134197473526,grad_norm: 0.9999991328446964, iteration: 153350
loss: 1.0558711290359497,grad_norm: 0.9999997136798565, iteration: 153351
loss: 0.9653439521789551,grad_norm: 0.9999990597097274, iteration: 153352
loss: 1.0295155048370361,grad_norm: 0.9999991609620382, iteration: 153353
loss: 1.0171234607696533,grad_norm: 0.9999990408384134, iteration: 153354
loss: 0.992401123046875,grad_norm: 0.9999990961998242, iteration: 153355
loss: 0.9668989777565002,grad_norm: 0.9999990219960194, iteration: 153356
loss: 0.9846931099891663,grad_norm: 0.9999991475747584, iteration: 153357
loss: 1.0559240579605103,grad_norm: 0.9999991290272889, iteration: 153358
loss: 1.0079333782196045,grad_norm: 0.8809276787402452, iteration: 153359
loss: 1.0178141593933105,grad_norm: 0.9999991387211983, iteration: 153360
loss: 1.0065007209777832,grad_norm: 0.9999991888282375, iteration: 153361
loss: 0.9572155475616455,grad_norm: 0.9251727879277093, iteration: 153362
loss: 1.0002286434173584,grad_norm: 0.9999990080036926, iteration: 153363
loss: 1.0003942251205444,grad_norm: 0.9792893464191277, iteration: 153364
loss: 1.0047526359558105,grad_norm: 0.8247517322154423, iteration: 153365
loss: 0.9681037664413452,grad_norm: 0.999999264946573, iteration: 153366
loss: 0.9964239001274109,grad_norm: 0.9999990483872608, iteration: 153367
loss: 1.0090745687484741,grad_norm: 0.9999991015149565, iteration: 153368
loss: 1.0216768980026245,grad_norm: 0.9999991132326794, iteration: 153369
loss: 0.9761097431182861,grad_norm: 0.9285333695873, iteration: 153370
loss: 1.0025990009307861,grad_norm: 0.9999997670906016, iteration: 153371
loss: 1.000300645828247,grad_norm: 0.9999992614808957, iteration: 153372
loss: 1.0082311630249023,grad_norm: 0.9999991408178472, iteration: 153373
loss: 1.0245864391326904,grad_norm: 0.9999989244390264, iteration: 153374
loss: 0.9986385107040405,grad_norm: 0.9999989799838469, iteration: 153375
loss: 1.0388468503952026,grad_norm: 0.9999990201068818, iteration: 153376
loss: 0.9944138526916504,grad_norm: 0.9999991895648712, iteration: 153377
loss: 1.029199242591858,grad_norm: 0.9999993161968155, iteration: 153378
loss: 1.0633243322372437,grad_norm: 0.9999992617907619, iteration: 153379
loss: 0.9884346127510071,grad_norm: 0.9999992196562018, iteration: 153380
loss: 1.0150270462036133,grad_norm: 0.8992429490708609, iteration: 153381
loss: 1.0026090145111084,grad_norm: 0.9999989826112439, iteration: 153382
loss: 1.009313702583313,grad_norm: 0.8212994909676848, iteration: 153383
loss: 0.9980015754699707,grad_norm: 0.9999992083222975, iteration: 153384
loss: 1.003363013267517,grad_norm: 0.9999992104671132, iteration: 153385
loss: 1.0303345918655396,grad_norm: 0.9999994164592989, iteration: 153386
loss: 1.0211082696914673,grad_norm: 0.9053361572021931, iteration: 153387
loss: 1.1206153631210327,grad_norm: 0.9999994094418209, iteration: 153388
loss: 1.0246930122375488,grad_norm: 0.9999991303457806, iteration: 153389
loss: 1.0312775373458862,grad_norm: 0.9999990709214152, iteration: 153390
loss: 0.9752912521362305,grad_norm: 0.9205112879196912, iteration: 153391
loss: 0.9894846081733704,grad_norm: 0.979964931968894, iteration: 153392
loss: 1.009433627128601,grad_norm: 0.9999990612396941, iteration: 153393
loss: 1.2180986404418945,grad_norm: 0.999999866180728, iteration: 153394
loss: 1.1052883863449097,grad_norm: 0.9999999329167296, iteration: 153395
loss: 0.9869418144226074,grad_norm: 0.9999992643437414, iteration: 153396
loss: 1.0279706716537476,grad_norm: 0.9999990088245383, iteration: 153397
loss: 1.0791071653366089,grad_norm: 0.9655938517415317, iteration: 153398
loss: 1.013284683227539,grad_norm: 0.9722961462424426, iteration: 153399
loss: 0.9964934587478638,grad_norm: 0.9999990374087933, iteration: 153400
loss: 1.0059961080551147,grad_norm: 0.9199745889456381, iteration: 153401
loss: 1.057357907295227,grad_norm: 0.9999991948309349, iteration: 153402
loss: 1.045453667640686,grad_norm: 0.9999998576193758, iteration: 153403
loss: 1.0029487609863281,grad_norm: 0.9999992303852006, iteration: 153404
loss: 1.014829158782959,grad_norm: 0.9999990645725152, iteration: 153405
loss: 1.0207544565200806,grad_norm: 0.9233192861593099, iteration: 153406
loss: 1.0189329385757446,grad_norm: 0.9999991338307472, iteration: 153407
loss: 0.9797053337097168,grad_norm: 0.9999990747632889, iteration: 153408
loss: 1.0069701671600342,grad_norm: 0.9999991099447463, iteration: 153409
loss: 0.9847000241279602,grad_norm: 0.9999992436589614, iteration: 153410
loss: 1.0753644704818726,grad_norm: 0.9999994981921617, iteration: 153411
loss: 0.9925419092178345,grad_norm: 0.9737495126285992, iteration: 153412
loss: 0.9745050072669983,grad_norm: 0.8654208957858793, iteration: 153413
loss: 0.9669095277786255,grad_norm: 0.9999991731277662, iteration: 153414
loss: 0.9911349415779114,grad_norm: 0.8656908965905489, iteration: 153415
loss: 1.0013900995254517,grad_norm: 0.9999989643462617, iteration: 153416
loss: 0.9901672601699829,grad_norm: 0.9999989765226177, iteration: 153417
loss: 0.9964520931243896,grad_norm: 0.8927508018320764, iteration: 153418
loss: 1.0340718030929565,grad_norm: 0.999999824781665, iteration: 153419
loss: 1.0350642204284668,grad_norm: 0.9999992844432186, iteration: 153420
loss: 0.9907949566841125,grad_norm: 0.9417036937222772, iteration: 153421
loss: 0.9959254264831543,grad_norm: 0.9594752712980216, iteration: 153422
loss: 1.0348044633865356,grad_norm: 0.9999996719995589, iteration: 153423
loss: 1.0083041191101074,grad_norm: 0.8812531904536612, iteration: 153424
loss: 1.0185222625732422,grad_norm: 0.8770478885806724, iteration: 153425
loss: 0.996667206287384,grad_norm: 0.9999990770894406, iteration: 153426
loss: 1.0044077634811401,grad_norm: 0.9887972245735389, iteration: 153427
loss: 1.0147254467010498,grad_norm: 0.9387811434003789, iteration: 153428
loss: 0.9883589148521423,grad_norm: 0.999999113137365, iteration: 153429
loss: 0.9674339890480042,grad_norm: 0.8313104908960642, iteration: 153430
loss: 0.9820502400398254,grad_norm: 0.9999993030178942, iteration: 153431
loss: 1.004011631011963,grad_norm: 0.9999991742715633, iteration: 153432
loss: 0.992805004119873,grad_norm: 0.9999993213115005, iteration: 153433
loss: 1.0232281684875488,grad_norm: 0.9999991390780671, iteration: 153434
loss: 1.0330758094787598,grad_norm: 0.9999992213546606, iteration: 153435
loss: 1.0038162469863892,grad_norm: 0.8550663676665394, iteration: 153436
loss: 0.9442180395126343,grad_norm: 0.9479952718997365, iteration: 153437
loss: 0.9890241622924805,grad_norm: 0.9999994211473197, iteration: 153438
loss: 1.0096431970596313,grad_norm: 0.9815104947852978, iteration: 153439
loss: 1.0162065029144287,grad_norm: 0.8930921941449214, iteration: 153440
loss: 0.991378664970398,grad_norm: 0.9999990737925033, iteration: 153441
loss: 0.9646855592727661,grad_norm: 0.9999991732404847, iteration: 153442
loss: 0.9989573955535889,grad_norm: 0.9999990461631058, iteration: 153443
loss: 0.9959344863891602,grad_norm: 0.9999990495090979, iteration: 153444
loss: 0.994134247303009,grad_norm: 0.999999118115491, iteration: 153445
loss: 1.0678458213806152,grad_norm: 0.9999999401375053, iteration: 153446
loss: 0.9749882221221924,grad_norm: 0.9999991503716934, iteration: 153447
loss: 0.9797423481941223,grad_norm: 0.9559286899529483, iteration: 153448
loss: 1.025769591331482,grad_norm: 0.999999168857686, iteration: 153449
loss: 1.047500491142273,grad_norm: 0.9999998134701115, iteration: 153450
loss: 0.9841527938842773,grad_norm: 0.9386263872631726, iteration: 153451
loss: 0.9990134239196777,grad_norm: 0.9999991673945501, iteration: 153452
loss: 1.0214873552322388,grad_norm: 0.8426299872530443, iteration: 153453
loss: 1.0038691759109497,grad_norm: 0.9054847793798811, iteration: 153454
loss: 1.0217845439910889,grad_norm: 0.9999998651867552, iteration: 153455
loss: 0.9851565361022949,grad_norm: 0.9656982411524119, iteration: 153456
loss: 1.005287528038025,grad_norm: 0.9999992880725005, iteration: 153457
loss: 1.02164626121521,grad_norm: 0.9999991565311404, iteration: 153458
loss: 1.0213539600372314,grad_norm: 0.9999990821627777, iteration: 153459
loss: 1.0046342611312866,grad_norm: 0.9298978765541414, iteration: 153460
loss: 0.9967488646507263,grad_norm: 0.9751968642360428, iteration: 153461
loss: 1.0011069774627686,grad_norm: 0.9999991587494745, iteration: 153462
loss: 1.008516550064087,grad_norm: 0.9999991490113642, iteration: 153463
loss: 0.9925881028175354,grad_norm: 0.9774682075026387, iteration: 153464
loss: 1.0321258306503296,grad_norm: 0.99999900657761, iteration: 153465
loss: 0.9845865964889526,grad_norm: 0.8787284111444664, iteration: 153466
loss: 1.0151771306991577,grad_norm: 0.9866815536744067, iteration: 153467
loss: 0.971875011920929,grad_norm: 0.9999990923426532, iteration: 153468
loss: 0.9934728741645813,grad_norm: 0.9312935740880709, iteration: 153469
loss: 0.9935529828071594,grad_norm: 0.8941330636083171, iteration: 153470
loss: 1.0062259435653687,grad_norm: 0.9999990970929915, iteration: 153471
loss: 1.0048388242721558,grad_norm: 0.8393010829029832, iteration: 153472
loss: 1.02562415599823,grad_norm: 0.9999990677591385, iteration: 153473
loss: 1.036919116973877,grad_norm: 0.890452756783057, iteration: 153474
loss: 0.9996086359024048,grad_norm: 0.9802490695004408, iteration: 153475
loss: 1.0158946514129639,grad_norm: 0.9685764398625907, iteration: 153476
loss: 0.9942281246185303,grad_norm: 0.9999993453222706, iteration: 153477
loss: 1.0513063669204712,grad_norm: 0.9999995957134135, iteration: 153478
loss: 0.9992618560791016,grad_norm: 0.9999989750032897, iteration: 153479
loss: 1.003446340560913,grad_norm: 0.999999292209877, iteration: 153480
loss: 1.0166178941726685,grad_norm: 0.9999990654740172, iteration: 153481
loss: 1.0126761198043823,grad_norm: 0.9999991814092121, iteration: 153482
loss: 1.0127688646316528,grad_norm: 0.9999991145380088, iteration: 153483
loss: 1.0246045589447021,grad_norm: 0.9747323739725299, iteration: 153484
loss: 1.0374934673309326,grad_norm: 0.9999992615969373, iteration: 153485
loss: 1.0126569271087646,grad_norm: 0.9999990342837731, iteration: 153486
loss: 1.0099351406097412,grad_norm: 0.9999991250137347, iteration: 153487
loss: 0.9569241404533386,grad_norm: 0.8643897352676586, iteration: 153488
loss: 1.006448745727539,grad_norm: 0.9910938861146767, iteration: 153489
loss: 0.9771808981895447,grad_norm: 0.9354745053105386, iteration: 153490
loss: 0.9759523272514343,grad_norm: 0.9683609074379405, iteration: 153491
loss: 1.0038691759109497,grad_norm: 0.9999991271930654, iteration: 153492
loss: 0.9971250295639038,grad_norm: 0.9380197858625331, iteration: 153493
loss: 0.9928166270256042,grad_norm: 0.9911099269054123, iteration: 153494
loss: 1.0190731287002563,grad_norm: 0.9268226568448688, iteration: 153495
loss: 0.979333758354187,grad_norm: 0.964664040879796, iteration: 153496
loss: 0.9996277689933777,grad_norm: 0.9999991513100739, iteration: 153497
loss: 1.015038013458252,grad_norm: 0.9999989529547844, iteration: 153498
loss: 0.974025547504425,grad_norm: 0.9827340371068116, iteration: 153499
loss: 1.0072755813598633,grad_norm: 0.9999990466757382, iteration: 153500
loss: 1.0123462677001953,grad_norm: 0.943865153714215, iteration: 153501
loss: 0.9982859492301941,grad_norm: 0.9043091024965458, iteration: 153502
loss: 1.0126795768737793,grad_norm: 0.9902767529638412, iteration: 153503
loss: 0.9687497019767761,grad_norm: 0.8188675863783077, iteration: 153504
loss: 0.9976072907447815,grad_norm: 0.9701512637381082, iteration: 153505
loss: 0.9801129698753357,grad_norm: 0.9999991833888757, iteration: 153506
loss: 0.9816969037055969,grad_norm: 0.9999991054112498, iteration: 153507
loss: 0.957684338092804,grad_norm: 0.999999183116128, iteration: 153508
loss: 1.0248960256576538,grad_norm: 0.9999989436972962, iteration: 153509
loss: 0.9949440956115723,grad_norm: 0.9704930997823065, iteration: 153510
loss: 1.004726529121399,grad_norm: 0.9999992755853095, iteration: 153511
loss: 1.0443511009216309,grad_norm: 0.9104035770941699, iteration: 153512
loss: 1.0144803524017334,grad_norm: 0.9443866110891287, iteration: 153513
loss: 0.9845088124275208,grad_norm: 0.9629604181755124, iteration: 153514
loss: 1.1473219394683838,grad_norm: 0.9999990636581096, iteration: 153515
loss: 1.0378869771957397,grad_norm: 0.9999992991273382, iteration: 153516
loss: 1.0238803625106812,grad_norm: 0.9868758512770883, iteration: 153517
loss: 1.0014082193374634,grad_norm: 0.999999096795626, iteration: 153518
loss: 0.9988639950752258,grad_norm: 0.9999989990390614, iteration: 153519
loss: 0.9777647852897644,grad_norm: 0.9779400205074614, iteration: 153520
loss: 1.012852430343628,grad_norm: 0.9195068580608726, iteration: 153521
loss: 0.9754862189292908,grad_norm: 0.9827971452192124, iteration: 153522
loss: 1.018936276435852,grad_norm: 0.9934941886404745, iteration: 153523
loss: 0.9873781800270081,grad_norm: 0.9675160130578254, iteration: 153524
loss: 1.007515788078308,grad_norm: 0.9701514722232685, iteration: 153525
loss: 1.0090088844299316,grad_norm: 0.999999044103959, iteration: 153526
loss: 0.965132474899292,grad_norm: 0.9805079601344269, iteration: 153527
loss: 0.9803714752197266,grad_norm: 0.846217697575959, iteration: 153528
loss: 1.034404993057251,grad_norm: 0.9999990776729669, iteration: 153529
loss: 0.9990072250366211,grad_norm: 0.9999992275507056, iteration: 153530
loss: 0.9416818618774414,grad_norm: 0.999999098983866, iteration: 153531
loss: 1.004684329032898,grad_norm: 0.9048115068334297, iteration: 153532
loss: 1.013108491897583,grad_norm: 0.9781124683809526, iteration: 153533
loss: 1.0029996633529663,grad_norm: 0.999999053121497, iteration: 153534
loss: 0.9807103276252747,grad_norm: 0.9999992728130026, iteration: 153535
loss: 0.9945772290229797,grad_norm: 0.9999996493379176, iteration: 153536
loss: 0.9864592552185059,grad_norm: 0.8289098329490039, iteration: 153537
loss: 1.0527124404907227,grad_norm: 0.9999997589136013, iteration: 153538
loss: 0.9974411129951477,grad_norm: 0.9999990047430608, iteration: 153539
loss: 0.9965181946754456,grad_norm: 0.9999992149751834, iteration: 153540
loss: 0.9678013324737549,grad_norm: 0.9999989428193045, iteration: 153541
loss: 1.0000766515731812,grad_norm: 0.9999992178879287, iteration: 153542
loss: 0.9739794135093689,grad_norm: 0.8561947147223729, iteration: 153543
loss: 1.0286130905151367,grad_norm: 0.9999990945433058, iteration: 153544
loss: 0.9829435348510742,grad_norm: 0.9272623433504581, iteration: 153545
loss: 1.0387057065963745,grad_norm: 0.9389445176506177, iteration: 153546
loss: 0.9935005903244019,grad_norm: 0.9999992750433491, iteration: 153547
loss: 0.983437716960907,grad_norm: 0.9259470864131226, iteration: 153548
loss: 0.9784261584281921,grad_norm: 0.9999992075715549, iteration: 153549
loss: 1.0305674076080322,grad_norm: 0.9999993588032088, iteration: 153550
loss: 1.0242568254470825,grad_norm: 0.9999989234840337, iteration: 153551
loss: 0.9934328198432922,grad_norm: 0.8066100207637857, iteration: 153552
loss: 1.06502103805542,grad_norm: 0.9550576263437235, iteration: 153553
loss: 0.9797341823577881,grad_norm: 0.9999990573351565, iteration: 153554
loss: 1.0693625211715698,grad_norm: 0.9999993616477412, iteration: 153555
loss: 0.9839944243431091,grad_norm: 0.8550666702498494, iteration: 153556
loss: 1.0019396543502808,grad_norm: 0.9638490111507936, iteration: 153557
loss: 0.9652746915817261,grad_norm: 0.9480743188883942, iteration: 153558
loss: 1.0049138069152832,grad_norm: 0.9999991711328869, iteration: 153559
loss: 0.9908056855201721,grad_norm: 0.9999994400600408, iteration: 153560
loss: 0.9772305488586426,grad_norm: 0.9999991558442323, iteration: 153561
loss: 0.9802481532096863,grad_norm: 0.9999991289661196, iteration: 153562
loss: 0.991629421710968,grad_norm: 0.9999992129961521, iteration: 153563
loss: 0.9857577085494995,grad_norm: 0.9570386014543909, iteration: 153564
loss: 0.9884132742881775,grad_norm: 0.9999989935856102, iteration: 153565
loss: 0.9869680404663086,grad_norm: 0.950125132350191, iteration: 153566
loss: 1.017852544784546,grad_norm: 0.9717128485892409, iteration: 153567
loss: 1.0095775127410889,grad_norm: 0.9999990549300625, iteration: 153568
loss: 1.0215915441513062,grad_norm: 0.999999126323679, iteration: 153569
loss: 1.0128848552703857,grad_norm: 0.999999022875427, iteration: 153570
loss: 1.000890851020813,grad_norm: 0.9439992365945061, iteration: 153571
loss: 0.9847989082336426,grad_norm: 0.904594158901787, iteration: 153572
loss: 1.014359712600708,grad_norm: 0.9279902003740359, iteration: 153573
loss: 1.0080233812332153,grad_norm: 0.8612709016520232, iteration: 153574
loss: 1.0958236455917358,grad_norm: 0.9999992820412476, iteration: 153575
loss: 1.0274935960769653,grad_norm: 0.9999991341745627, iteration: 153576
loss: 1.0074236392974854,grad_norm: 0.9999989467369069, iteration: 153577
loss: 1.0015769004821777,grad_norm: 0.9679486574096479, iteration: 153578
loss: 1.1702090501785278,grad_norm: 0.9999993552955981, iteration: 153579
loss: 1.0149297714233398,grad_norm: 0.9999990950990061, iteration: 153580
loss: 0.9784026741981506,grad_norm: 0.911680122758576, iteration: 153581
loss: 1.0581765174865723,grad_norm: 0.9999990787288802, iteration: 153582
loss: 1.0106905698776245,grad_norm: 0.9999991001126026, iteration: 153583
loss: 0.984662652015686,grad_norm: 0.9999992697607455, iteration: 153584
loss: 1.0051507949829102,grad_norm: 0.9188074634477227, iteration: 153585
loss: 0.9968897700309753,grad_norm: 0.8761388829207425, iteration: 153586
loss: 1.015106439590454,grad_norm: 0.9616528882745636, iteration: 153587
loss: 0.9938291311264038,grad_norm: 0.8810125082767168, iteration: 153588
loss: 0.9696263074874878,grad_norm: 0.9999990552296798, iteration: 153589
loss: 1.0201350450515747,grad_norm: 0.9999990072584138, iteration: 153590
loss: 1.0095921754837036,grad_norm: 0.9999989314788876, iteration: 153591
loss: 1.0279983282089233,grad_norm: 0.9673374187686051, iteration: 153592
loss: 0.9981689453125,grad_norm: 0.889668237254713, iteration: 153593
loss: 0.9923049211502075,grad_norm: 0.8305425232481052, iteration: 153594
loss: 1.0159165859222412,grad_norm: 0.9557100555755682, iteration: 153595
loss: 1.0067071914672852,grad_norm: 0.999999107788703, iteration: 153596
loss: 0.9731518626213074,grad_norm: 0.9999990868532379, iteration: 153597
loss: 1.006445288658142,grad_norm: 0.994462180330099, iteration: 153598
loss: 1.0130373239517212,grad_norm: 0.9999992535249158, iteration: 153599
loss: 1.0010424852371216,grad_norm: 0.8461314296087912, iteration: 153600
loss: 1.0097873210906982,grad_norm: 0.9999995165989736, iteration: 153601
loss: 0.9983859062194824,grad_norm: 0.999999167842963, iteration: 153602
loss: 1.032247543334961,grad_norm: 0.9521398712274165, iteration: 153603
loss: 0.9822550415992737,grad_norm: 0.9999990559528059, iteration: 153604
loss: 1.0070751905441284,grad_norm: 0.9035514173950009, iteration: 153605
loss: 1.007635474205017,grad_norm: 0.999999226549883, iteration: 153606
loss: 1.0080101490020752,grad_norm: 0.9999997817773467, iteration: 153607
loss: 0.990423321723938,grad_norm: 0.8900989200950102, iteration: 153608
loss: 0.9836090803146362,grad_norm: 0.9999990045300893, iteration: 153609
loss: 0.9740591645240784,grad_norm: 0.9768924978543252, iteration: 153610
loss: 1.0137463808059692,grad_norm: 0.8572019693383853, iteration: 153611
loss: 0.9967259168624878,grad_norm: 0.999999125083068, iteration: 153612
loss: 0.9653374552726746,grad_norm: 0.9999991408518665, iteration: 153613
loss: 1.025619387626648,grad_norm: 0.9999991020337536, iteration: 153614
loss: 1.0274240970611572,grad_norm: 0.9944247897730935, iteration: 153615
loss: 1.0072134733200073,grad_norm: 0.9999991441301133, iteration: 153616
loss: 0.9860809445381165,grad_norm: 0.9999991992596746, iteration: 153617
loss: 1.0299055576324463,grad_norm: 0.9999995368180403, iteration: 153618
loss: 1.0119069814682007,grad_norm: 0.9999989671057473, iteration: 153619
loss: 0.982627809047699,grad_norm: 0.9299940451611652, iteration: 153620
loss: 0.9794556498527527,grad_norm: 0.7958742212292875, iteration: 153621
loss: 1.0009061098098755,grad_norm: 0.9999993340942729, iteration: 153622
loss: 0.9954724311828613,grad_norm: 0.9937256398730325, iteration: 153623
loss: 0.9865123629570007,grad_norm: 0.999998971729032, iteration: 153624
loss: 0.9984439611434937,grad_norm: 0.9312593474101943, iteration: 153625
loss: 1.003523588180542,grad_norm: 0.9999989382273611, iteration: 153626
loss: 1.020207166671753,grad_norm: 0.8709664937336933, iteration: 153627
loss: 0.9770892262458801,grad_norm: 0.9999990394058914, iteration: 153628
loss: 0.981548547744751,grad_norm: 0.9999991839563787, iteration: 153629
loss: 0.9944208860397339,grad_norm: 0.9999990950146456, iteration: 153630
loss: 1.0195294618606567,grad_norm: 0.9999992160261103, iteration: 153631
loss: 1.030794620513916,grad_norm: 0.9999992536083577, iteration: 153632
loss: 0.9874070882797241,grad_norm: 0.957290880014543, iteration: 153633
loss: 0.9826783537864685,grad_norm: 0.9636410922729912, iteration: 153634
loss: 1.0053273439407349,grad_norm: 0.8666024505755666, iteration: 153635
loss: 1.0120793581008911,grad_norm: 0.8509867493410637, iteration: 153636
loss: 1.011823296546936,grad_norm: 0.993914547744764, iteration: 153637
loss: 1.0262354612350464,grad_norm: 0.9999991156747886, iteration: 153638
loss: 1.0063117742538452,grad_norm: 0.999999168437882, iteration: 153639
loss: 0.9974619150161743,grad_norm: 0.9999991024432174, iteration: 153640
loss: 1.0113660097122192,grad_norm: 0.9455174392883517, iteration: 153641
loss: 1.003665566444397,grad_norm: 0.9999990147986823, iteration: 153642
loss: 0.9875790476799011,grad_norm: 0.9382395142528084, iteration: 153643
loss: 0.9954497814178467,grad_norm: 0.9706781882634317, iteration: 153644
loss: 0.9732277393341064,grad_norm: 0.9999992085840618, iteration: 153645
loss: 1.0039128065109253,grad_norm: 0.866081023113789, iteration: 153646
loss: 0.9623690843582153,grad_norm: 0.9538142796293421, iteration: 153647
loss: 1.0390329360961914,grad_norm: 0.9999990705059583, iteration: 153648
loss: 0.9900385737419128,grad_norm: 0.9181194493592231, iteration: 153649
loss: 0.9815826416015625,grad_norm: 0.9999991517821644, iteration: 153650
loss: 1.0002976655960083,grad_norm: 0.8725571382210645, iteration: 153651
loss: 1.007237195968628,grad_norm: 0.999999171968986, iteration: 153652
loss: 0.994615375995636,grad_norm: 0.9999990632793014, iteration: 153653
loss: 1.015028476715088,grad_norm: 0.9999991684716059, iteration: 153654
loss: 0.9659521579742432,grad_norm: 0.9999992725383864, iteration: 153655
loss: 1.022118091583252,grad_norm: 0.9999991879143519, iteration: 153656
loss: 1.0066134929656982,grad_norm: 0.9999992218191598, iteration: 153657
loss: 0.9921452403068542,grad_norm: 0.9253385799757183, iteration: 153658
loss: 1.005981206893921,grad_norm: 0.9999991138011892, iteration: 153659
loss: 1.017511248588562,grad_norm: 0.9977551063449344, iteration: 153660
loss: 1.025402545928955,grad_norm: 0.8834503059772806, iteration: 153661
loss: 1.0429917573928833,grad_norm: 0.9269831376480726, iteration: 153662
loss: 0.9960713386535645,grad_norm: 0.9999992504972458, iteration: 153663
loss: 0.9980192184448242,grad_norm: 0.8956853682840394, iteration: 153664
loss: 1.0410337448120117,grad_norm: 0.9999994170642493, iteration: 153665
loss: 1.0168334245681763,grad_norm: 0.9999989560799184, iteration: 153666
loss: 0.9729490280151367,grad_norm: 0.9999992425321775, iteration: 153667
loss: 1.0044101476669312,grad_norm: 0.9999992761993132, iteration: 153668
loss: 1.0145635604858398,grad_norm: 0.9999990860470024, iteration: 153669
loss: 1.05366849899292,grad_norm: 0.9999992979997208, iteration: 153670
loss: 1.0337811708450317,grad_norm: 0.8332198629693407, iteration: 153671
loss: 1.0178543329238892,grad_norm: 0.999999199639177, iteration: 153672
loss: 0.9668220281600952,grad_norm: 0.9761752184301142, iteration: 153673
loss: 1.0011318922042847,grad_norm: 0.9999990992517065, iteration: 153674
loss: 1.0139762163162231,grad_norm: 0.9999992791422765, iteration: 153675
loss: 0.9887587428092957,grad_norm: 0.996921557737991, iteration: 153676
loss: 1.0003337860107422,grad_norm: 0.9999991435977785, iteration: 153677
loss: 0.9798383712768555,grad_norm: 0.999999189728338, iteration: 153678
loss: 1.0426039695739746,grad_norm: 0.879533125191436, iteration: 153679
loss: 1.0452691316604614,grad_norm: 0.999999671150465, iteration: 153680
loss: 1.011104702949524,grad_norm: 0.9999990073170159, iteration: 153681
loss: 0.97300124168396,grad_norm: 0.9999990707600068, iteration: 153682
loss: 0.982149064540863,grad_norm: 0.8884374552271097, iteration: 153683
loss: 1.0368279218673706,grad_norm: 0.9941806894528501, iteration: 153684
loss: 0.9903663396835327,grad_norm: 0.9999992538762866, iteration: 153685
loss: 1.0107733011245728,grad_norm: 0.9359793823913809, iteration: 153686
loss: 0.9923142790794373,grad_norm: 0.9112562863774213, iteration: 153687
loss: 1.054282307624817,grad_norm: 0.9999991339172559, iteration: 153688
loss: 0.9683979749679565,grad_norm: 0.8469910238658737, iteration: 153689
loss: 1.0462887287139893,grad_norm: 0.9999996571730653, iteration: 153690
loss: 0.9805713295936584,grad_norm: 0.9965576143009559, iteration: 153691
loss: 1.0031428337097168,grad_norm: 0.9999991035849509, iteration: 153692
loss: 1.009602665901184,grad_norm: 0.9547249201012779, iteration: 153693
loss: 0.9974024295806885,grad_norm: 0.8643947908922778, iteration: 153694
loss: 1.0226320028305054,grad_norm: 0.9887050705465056, iteration: 153695
loss: 1.0339807271957397,grad_norm: 0.9999990073043028, iteration: 153696
loss: 0.9951596856117249,grad_norm: 0.9999992209686469, iteration: 153697
loss: 0.9763349890708923,grad_norm: 0.9999991704378202, iteration: 153698
loss: 0.9759472608566284,grad_norm: 0.8189188806136282, iteration: 153699
loss: 1.2062268257141113,grad_norm: 0.9999997933226735, iteration: 153700
loss: 0.9884002208709717,grad_norm: 0.9999991080551239, iteration: 153701
loss: 1.015036940574646,grad_norm: 0.9999991422939993, iteration: 153702
loss: 1.001293659210205,grad_norm: 0.9580960520053854, iteration: 153703
loss: 1.011899709701538,grad_norm: 0.8628792488532551, iteration: 153704
loss: 0.9994490146636963,grad_norm: 0.9999991913892492, iteration: 153705
loss: 0.9988794922828674,grad_norm: 0.9999992732940808, iteration: 153706
loss: 0.9909308552742004,grad_norm: 0.8795427222277306, iteration: 153707
loss: 0.9974260926246643,grad_norm: 0.9999991755195862, iteration: 153708
loss: 1.0474928617477417,grad_norm: 0.9999991496802948, iteration: 153709
loss: 1.0220701694488525,grad_norm: 0.999999123099652, iteration: 153710
loss: 1.0325366258621216,grad_norm: 0.99999927273485, iteration: 153711
loss: 0.999243974685669,grad_norm: 0.9778388568616139, iteration: 153712
loss: 0.9943355917930603,grad_norm: 0.999999126996211, iteration: 153713
loss: 0.9958370923995972,grad_norm: 0.9999990257371382, iteration: 153714
loss: 1.0480446815490723,grad_norm: 0.9999993846749203, iteration: 153715
loss: 1.038967251777649,grad_norm: 0.9999992565180079, iteration: 153716
loss: 0.9809736013412476,grad_norm: 0.8891410805715685, iteration: 153717
loss: 0.999320924282074,grad_norm: 0.9480661408863054, iteration: 153718
loss: 1.0244494676589966,grad_norm: 0.9964821094430392, iteration: 153719
loss: 1.0005826950073242,grad_norm: 0.9034891330780623, iteration: 153720
loss: 1.0078623294830322,grad_norm: 0.9999990713374322, iteration: 153721
loss: 0.9963439702987671,grad_norm: 0.9999991927644336, iteration: 153722
loss: 0.9716542959213257,grad_norm: 0.999999073804845, iteration: 153723
loss: 0.9620046019554138,grad_norm: 0.9999990467439215, iteration: 153724
loss: 0.9786415100097656,grad_norm: 0.9999992252050836, iteration: 153725
loss: 1.0102649927139282,grad_norm: 0.999999225878862, iteration: 153726
loss: 0.9988659024238586,grad_norm: 0.9999992467184842, iteration: 153727
loss: 1.0133421421051025,grad_norm: 0.9999990829897609, iteration: 153728
loss: 0.9863468408584595,grad_norm: 0.9445786127810906, iteration: 153729
loss: 0.9720252752304077,grad_norm: 0.9999992093319215, iteration: 153730
loss: 1.0011863708496094,grad_norm: 0.9999989829672581, iteration: 153731
loss: 1.0521682500839233,grad_norm: 0.9999992368354714, iteration: 153732
loss: 1.0345675945281982,grad_norm: 0.8984936447511818, iteration: 153733
loss: 1.005908489227295,grad_norm: 0.8452579433575482, iteration: 153734
loss: 1.1129848957061768,grad_norm: 0.9999998602594904, iteration: 153735
loss: 0.9730656743049622,grad_norm: 0.9999990157024975, iteration: 153736
loss: 0.9996809959411621,grad_norm: 0.9999991513924515, iteration: 153737
loss: 1.0267517566680908,grad_norm: 0.9904417758382662, iteration: 153738
loss: 0.9666620492935181,grad_norm: 0.9879230872651033, iteration: 153739
loss: 0.9926345944404602,grad_norm: 0.9999990151604502, iteration: 153740
loss: 1.006531000137329,grad_norm: 0.9354058649841037, iteration: 153741
loss: 0.977655291557312,grad_norm: 0.999999213780336, iteration: 153742
loss: 0.9829532504081726,grad_norm: 0.8867843709307002, iteration: 153743
loss: 1.0340962409973145,grad_norm: 0.9999990442028637, iteration: 153744
loss: 0.9953254461288452,grad_norm: 0.9999992356345359, iteration: 153745
loss: 0.9993368983268738,grad_norm: 0.9999992445080604, iteration: 153746
loss: 1.0023608207702637,grad_norm: 0.8863250020963708, iteration: 153747
loss: 1.0231550931930542,grad_norm: 0.9999993075965572, iteration: 153748
loss: 0.9977966547012329,grad_norm: 0.9857118928663872, iteration: 153749
loss: 0.9549395442008972,grad_norm: 0.9999989537640301, iteration: 153750
loss: 0.9901963472366333,grad_norm: 0.9999992076978294, iteration: 153751
loss: 0.9788450598716736,grad_norm: 0.9999992003486512, iteration: 153752
loss: 1.0279189348220825,grad_norm: 0.9999992867195255, iteration: 153753
loss: 1.0048637390136719,grad_norm: 0.9999990528853249, iteration: 153754
loss: 0.9676499366760254,grad_norm: 0.9999993150947859, iteration: 153755
loss: 0.9889771938323975,grad_norm: 0.955005174641034, iteration: 153756
loss: 1.0080900192260742,grad_norm: 0.9483931994353286, iteration: 153757
loss: 0.9856858253479004,grad_norm: 0.8888261568677859, iteration: 153758
loss: 1.0042911767959595,grad_norm: 0.9999991388769683, iteration: 153759
loss: 0.9409507513046265,grad_norm: 0.970312523491853, iteration: 153760
loss: 1.0399205684661865,grad_norm: 0.999999074224602, iteration: 153761
loss: 1.0361745357513428,grad_norm: 0.9999991471780042, iteration: 153762
loss: 1.0082485675811768,grad_norm: 0.8298946087975612, iteration: 153763
loss: 0.9758092164993286,grad_norm: 0.9999990110402783, iteration: 153764
loss: 1.0224740505218506,grad_norm: 0.9999990886628172, iteration: 153765
loss: 1.068832516670227,grad_norm: 0.9999998357845232, iteration: 153766
loss: 1.0194939374923706,grad_norm: 0.983549971723478, iteration: 153767
loss: 0.9853136539459229,grad_norm: 0.999999120302367, iteration: 153768
loss: 0.9914355278015137,grad_norm: 0.9518438168122743, iteration: 153769
loss: 1.0305655002593994,grad_norm: 0.9842743805556894, iteration: 153770
loss: 1.0255166292190552,grad_norm: 0.9049849551612084, iteration: 153771
loss: 0.9498512744903564,grad_norm: 0.9999991801080095, iteration: 153772
loss: 0.9958034753799438,grad_norm: 0.999999131853689, iteration: 153773
loss: 0.9882359504699707,grad_norm: 0.9766620585914386, iteration: 153774
loss: 1.0295512676239014,grad_norm: 0.9999990409096646, iteration: 153775
loss: 0.9980010986328125,grad_norm: 0.9441962611827711, iteration: 153776
loss: 0.9827978610992432,grad_norm: 0.8529274526446685, iteration: 153777
loss: 0.9722853302955627,grad_norm: 0.9257493651452283, iteration: 153778
loss: 0.9894442558288574,grad_norm: 0.9094197772367775, iteration: 153779
loss: 1.0190147161483765,grad_norm: 0.9999991338131861, iteration: 153780
loss: 1.0263988971710205,grad_norm: 0.9999995618727485, iteration: 153781
loss: 1.0200104713439941,grad_norm: 0.9999990716452272, iteration: 153782
loss: 0.9815934896469116,grad_norm: 0.9189224639578786, iteration: 153783
loss: 1.002698540687561,grad_norm: 0.9999990522003736, iteration: 153784
loss: 1.0164085626602173,grad_norm: 0.9999991181119555, iteration: 153785
loss: 0.9891983270645142,grad_norm: 0.9999991247571802, iteration: 153786
loss: 1.0192042589187622,grad_norm: 0.8998861569793593, iteration: 153787
loss: 1.0031049251556396,grad_norm: 0.9999991750068435, iteration: 153788
loss: 1.0134546756744385,grad_norm: 0.984644295498349, iteration: 153789
loss: 1.0318238735198975,grad_norm: 0.9141377811727422, iteration: 153790
loss: 1.2449769973754883,grad_norm: 0.9999993362833696, iteration: 153791
loss: 1.0030213594436646,grad_norm: 0.998047053150469, iteration: 153792
loss: 0.980462908744812,grad_norm: 0.8603124169514678, iteration: 153793
loss: 0.9838578701019287,grad_norm: 0.9812389943080834, iteration: 153794
loss: 0.9744603633880615,grad_norm: 0.9888777353077458, iteration: 153795
loss: 0.9825618863105774,grad_norm: 0.9626169826761023, iteration: 153796
loss: 1.0057774782180786,grad_norm: 0.8587153944969654, iteration: 153797
loss: 1.0221394300460815,grad_norm: 0.9999991547028443, iteration: 153798
loss: 0.9894287586212158,grad_norm: 0.9999992007687762, iteration: 153799
loss: 0.9795030355453491,grad_norm: 0.9849372259361643, iteration: 153800
loss: 1.0013103485107422,grad_norm: 0.8628316661996879, iteration: 153801
loss: 0.9959052801132202,grad_norm: 0.9999991167694248, iteration: 153802
loss: 1.0066440105438232,grad_norm: 0.9999990475197204, iteration: 153803
loss: 1.0019456148147583,grad_norm: 0.999999060304017, iteration: 153804
loss: 0.9660980701446533,grad_norm: 0.9346242683835873, iteration: 153805
loss: 1.00453519821167,grad_norm: 0.9999991920966225, iteration: 153806
loss: 1.0024250745773315,grad_norm: 0.9999990941102666, iteration: 153807
loss: 1.0308717489242554,grad_norm: 0.999999379939027, iteration: 153808
loss: 1.0339587926864624,grad_norm: 0.9999990789159581, iteration: 153809
loss: 0.9794533848762512,grad_norm: 0.8537240957550442, iteration: 153810
loss: 1.0066440105438232,grad_norm: 0.9308846990232909, iteration: 153811
loss: 1.0325758457183838,grad_norm: 0.9765470232179289, iteration: 153812
loss: 1.0291309356689453,grad_norm: 0.9999991275354745, iteration: 153813
loss: 1.0994879007339478,grad_norm: 0.999998983971562, iteration: 153814
loss: 1.0207597017288208,grad_norm: 0.9999991746451672, iteration: 153815
loss: 0.9803563952445984,grad_norm: 0.8786846042098874, iteration: 153816
loss: 0.9841165542602539,grad_norm: 0.9981750763702445, iteration: 153817
loss: 0.9975237250328064,grad_norm: 0.9999992279424988, iteration: 153818
loss: 0.992695689201355,grad_norm: 0.999999390920435, iteration: 153819
loss: 1.0063480138778687,grad_norm: 0.985436002995152, iteration: 153820
loss: 0.9873771071434021,grad_norm: 0.9999990412741951, iteration: 153821
loss: 0.9898740649223328,grad_norm: 0.9179095527781526, iteration: 153822
loss: 0.9746206402778625,grad_norm: 0.9859586020309423, iteration: 153823
loss: 0.9841725826263428,grad_norm: 0.9999991209010002, iteration: 153824
loss: 1.0373526811599731,grad_norm: 0.9999990092674227, iteration: 153825
loss: 1.0019034147262573,grad_norm: 0.9999991850111327, iteration: 153826
loss: 1.0259321928024292,grad_norm: 0.9999992081289459, iteration: 153827
loss: 0.9628333449363708,grad_norm: 0.999999179994892, iteration: 153828
loss: 1.0061936378479004,grad_norm: 0.9999989949265784, iteration: 153829
loss: 1.0054311752319336,grad_norm: 0.9290122691752737, iteration: 153830
loss: 0.9861745238304138,grad_norm: 0.9999991273819285, iteration: 153831
loss: 0.9954498410224915,grad_norm: 0.9999991033140657, iteration: 153832
loss: 0.9727647304534912,grad_norm: 0.9999991209764378, iteration: 153833
loss: 1.0180895328521729,grad_norm: 0.9999992109236863, iteration: 153834
loss: 1.0284301042556763,grad_norm: 0.9999991124136673, iteration: 153835
loss: 0.9369670152664185,grad_norm: 0.906594056905095, iteration: 153836
loss: 1.0108624696731567,grad_norm: 0.9999991775303592, iteration: 153837
loss: 1.030541181564331,grad_norm: 0.9770349009350813, iteration: 153838
loss: 0.9486933350563049,grad_norm: 0.9999990204697081, iteration: 153839
loss: 0.999916672706604,grad_norm: 0.9999992148209782, iteration: 153840
loss: 1.0336004495620728,grad_norm: 0.9999990981289584, iteration: 153841
loss: 0.9994049072265625,grad_norm: 0.9999992154937741, iteration: 153842
loss: 0.9777757525444031,grad_norm: 0.9999991711932896, iteration: 153843
loss: 0.988396167755127,grad_norm: 0.9163903641980091, iteration: 153844
loss: 0.9942776560783386,grad_norm: 0.9999992432662812, iteration: 153845
loss: 0.9799810647964478,grad_norm: 0.9999992671819835, iteration: 153846
loss: 1.0074585676193237,grad_norm: 0.9999991519296038, iteration: 153847
loss: 0.9761274456977844,grad_norm: 0.9999991371354793, iteration: 153848
loss: 1.0294106006622314,grad_norm: 0.9535513039976828, iteration: 153849
loss: 1.038283348083496,grad_norm: 0.9935201905946778, iteration: 153850
loss: 1.003190517425537,grad_norm: 0.8121560539050355, iteration: 153851
loss: 1.0171432495117188,grad_norm: 0.9999991160942608, iteration: 153852
loss: 1.0270371437072754,grad_norm: 0.9649110204457573, iteration: 153853
loss: 1.007954478263855,grad_norm: 0.9997423443749712, iteration: 153854
loss: 0.9864984154701233,grad_norm: 0.894237026640222, iteration: 153855
loss: 0.9994012117385864,grad_norm: 0.8493194689412381, iteration: 153856
loss: 0.9919824600219727,grad_norm: 0.9042562663349238, iteration: 153857
loss: 0.9927002191543579,grad_norm: 0.9999991052013631, iteration: 153858
loss: 0.9952324628829956,grad_norm: 0.9882238705805798, iteration: 153859
loss: 0.9768713712692261,grad_norm: 0.8991314352222824, iteration: 153860
loss: 1.081208348274231,grad_norm: 0.999999713241122, iteration: 153861
loss: 0.9910138249397278,grad_norm: 0.9999992731701761, iteration: 153862
loss: 0.9401687979698181,grad_norm: 0.9999991981417314, iteration: 153863
loss: 0.994961678981781,grad_norm: 0.9174802242669823, iteration: 153864
loss: 1.0024760961532593,grad_norm: 0.974418865503811, iteration: 153865
loss: 0.9848148822784424,grad_norm: 0.9999992030398752, iteration: 153866
loss: 1.0409626960754395,grad_norm: 0.9999994941807295, iteration: 153867
loss: 0.9845308661460876,grad_norm: 0.9786307606989202, iteration: 153868
loss: 1.0093432664871216,grad_norm: 0.993881631951642, iteration: 153869
loss: 1.01524019241333,grad_norm: 0.9999990763721428, iteration: 153870
loss: 1.0056066513061523,grad_norm: 0.9999990958653446, iteration: 153871
loss: 0.9782817363739014,grad_norm: 0.871333658368306, iteration: 153872
loss: 1.022847294807434,grad_norm: 0.9968070019212694, iteration: 153873
loss: 1.0130995512008667,grad_norm: 0.9730028020773283, iteration: 153874
loss: 1.0109421014785767,grad_norm: 0.9999991436518921, iteration: 153875
loss: 1.0162343978881836,grad_norm: 0.9999990336215518, iteration: 153876
loss: 1.005927324295044,grad_norm: 0.9999992247261876, iteration: 153877
loss: 0.977546751499176,grad_norm: 0.9834711415475476, iteration: 153878
loss: 0.9912768602371216,grad_norm: 0.9551997339642441, iteration: 153879
loss: 0.9814953207969666,grad_norm: 0.9636534285122775, iteration: 153880
loss: 1.012131929397583,grad_norm: 0.9924984637093542, iteration: 153881
loss: 1.0010074377059937,grad_norm: 0.8676639391256913, iteration: 153882
loss: 0.9781726598739624,grad_norm: 0.9999992251697748, iteration: 153883
loss: 0.9901368021965027,grad_norm: 0.9243346871214726, iteration: 153884
loss: 0.9853172898292542,grad_norm: 0.9939401668897876, iteration: 153885
loss: 1.024133324623108,grad_norm: 0.9999991804279273, iteration: 153886
loss: 1.0218936204910278,grad_norm: 0.9999990324961354, iteration: 153887
loss: 0.9847635626792908,grad_norm: 0.999999163104642, iteration: 153888
loss: 0.9914815425872803,grad_norm: 0.9999993638700249, iteration: 153889
loss: 0.9809466004371643,grad_norm: 0.999999048542164, iteration: 153890
loss: 1.0077975988388062,grad_norm: 0.9543505558023586, iteration: 153891
loss: 0.9949202537536621,grad_norm: 0.9999992512901771, iteration: 153892
loss: 0.9915858507156372,grad_norm: 0.9682079872091557, iteration: 153893
loss: 1.0288172960281372,grad_norm: 0.9999990501204497, iteration: 153894
loss: 1.0059581995010376,grad_norm: 0.9881204917899964, iteration: 153895
loss: 1.0013433694839478,grad_norm: 0.9590642577854618, iteration: 153896
loss: 0.9748725295066833,grad_norm: 0.9999991244196044, iteration: 153897
loss: 0.9882729649543762,grad_norm: 0.9278577524288335, iteration: 153898
loss: 0.9545901417732239,grad_norm: 0.9999990719969162, iteration: 153899
loss: 0.977113664150238,grad_norm: 0.9496623623245447, iteration: 153900
loss: 0.9786869883537292,grad_norm: 0.9252321636167652, iteration: 153901
loss: 1.0004308223724365,grad_norm: 0.9262345137443554, iteration: 153902
loss: 0.96470046043396,grad_norm: 0.9999991004491113, iteration: 153903
loss: 0.9884508848190308,grad_norm: 0.9567041772925606, iteration: 153904
loss: 0.9978017210960388,grad_norm: 0.9194418144780014, iteration: 153905
loss: 1.0098131895065308,grad_norm: 0.9999992126281374, iteration: 153906
loss: 0.9784380197525024,grad_norm: 0.9999991372179768, iteration: 153907
loss: 1.0031529664993286,grad_norm: 0.9999990919904336, iteration: 153908
loss: 1.006667971611023,grad_norm: 0.9999990955262237, iteration: 153909
loss: 1.004852533340454,grad_norm: 0.9999991564043417, iteration: 153910
loss: 1.0131278038024902,grad_norm: 0.9371937378412918, iteration: 153911
loss: 1.0029748678207397,grad_norm: 0.9999992209469358, iteration: 153912
loss: 0.9703757762908936,grad_norm: 0.9657858424702076, iteration: 153913
loss: 0.9937734007835388,grad_norm: 0.8747354142935149, iteration: 153914
loss: 0.990531325340271,grad_norm: 0.9999990557764241, iteration: 153915
loss: 1.0046827793121338,grad_norm: 0.9999991711527025, iteration: 153916
loss: 1.0078272819519043,grad_norm: 0.9831445078972364, iteration: 153917
loss: 1.0012280941009521,grad_norm: 0.9999991588372394, iteration: 153918
loss: 1.0215145349502563,grad_norm: 0.9999992251892172, iteration: 153919
loss: 0.9926441311836243,grad_norm: 0.9999991049101465, iteration: 153920
loss: 0.9872656464576721,grad_norm: 0.9918008152963267, iteration: 153921
loss: 1.0469036102294922,grad_norm: 0.9999994649277081, iteration: 153922
loss: 1.0018818378448486,grad_norm: 0.999999074352555, iteration: 153923
loss: 1.0069921016693115,grad_norm: 0.9535758941756749, iteration: 153924
loss: 0.9862117171287537,grad_norm: 0.9504325407151629, iteration: 153925
loss: 0.9928826689720154,grad_norm: 0.9503616402950638, iteration: 153926
loss: 1.0080164670944214,grad_norm: 0.944849265791862, iteration: 153927
loss: 0.982175350189209,grad_norm: 0.9201982346899965, iteration: 153928
loss: 0.9946996569633484,grad_norm: 0.9594770030493965, iteration: 153929
loss: 1.0334988832473755,grad_norm: 0.9999990540426343, iteration: 153930
loss: 1.0020719766616821,grad_norm: 0.9999989548814463, iteration: 153931
loss: 0.9951236248016357,grad_norm: 0.987612477040128, iteration: 153932
loss: 0.9777970910072327,grad_norm: 0.9999991034022241, iteration: 153933
loss: 0.9918904900550842,grad_norm: 0.9027576443547619, iteration: 153934
loss: 0.994245707988739,grad_norm: 0.9587627992835279, iteration: 153935
loss: 0.9876207709312439,grad_norm: 0.9934499663404357, iteration: 153936
loss: 1.0169378519058228,grad_norm: 0.9796827234112507, iteration: 153937
loss: 1.009501576423645,grad_norm: 0.8790960297454818, iteration: 153938
loss: 0.9925114512443542,grad_norm: 0.9999991618179451, iteration: 153939
loss: 1.0139700174331665,grad_norm: 0.9999991989659263, iteration: 153940
loss: 0.987112283706665,grad_norm: 0.9999992052960402, iteration: 153941
loss: 1.0013986825942993,grad_norm: 0.9999989382930368, iteration: 153942
loss: 0.9953832030296326,grad_norm: 0.9999992129495489, iteration: 153943
loss: 1.0124601125717163,grad_norm: 0.9796372171221138, iteration: 153944
loss: 1.0314990282058716,grad_norm: 0.8978057314466269, iteration: 153945
loss: 0.9998238682746887,grad_norm: 0.9999991396563448, iteration: 153946
loss: 1.0412251949310303,grad_norm: 0.8828498066517932, iteration: 153947
loss: 1.0055127143859863,grad_norm: 0.9999991352880653, iteration: 153948
loss: 0.97590172290802,grad_norm: 0.9999991934554856, iteration: 153949
loss: 0.987622082233429,grad_norm: 0.9631068436829233, iteration: 153950
loss: 1.0165438652038574,grad_norm: 0.9601328587430464, iteration: 153951
loss: 1.0157948732376099,grad_norm: 0.9999991363619086, iteration: 153952
loss: 1.0160244703292847,grad_norm: 0.999999137190236, iteration: 153953
loss: 1.0201057195663452,grad_norm: 0.9379191584940048, iteration: 153954
loss: 1.0256282091140747,grad_norm: 0.9999990480640487, iteration: 153955
loss: 1.0291889905929565,grad_norm: 0.9549683415865838, iteration: 153956
loss: 1.008476972579956,grad_norm: 0.9999990033586688, iteration: 153957
loss: 0.9805925488471985,grad_norm: 0.8883503322237593, iteration: 153958
loss: 0.9561736583709717,grad_norm: 0.9999989671820947, iteration: 153959
loss: 0.9827804565429688,grad_norm: 0.9536980407020937, iteration: 153960
loss: 1.0278394222259521,grad_norm: 0.9999990864607382, iteration: 153961
loss: 0.972038745880127,grad_norm: 0.9999989411568306, iteration: 153962
loss: 1.06191885471344,grad_norm: 0.9999993690701392, iteration: 153963
loss: 1.0107569694519043,grad_norm: 0.9999991094835552, iteration: 153964
loss: 0.9845981597900391,grad_norm: 0.9830874969275039, iteration: 153965
loss: 1.0219194889068604,grad_norm: 0.9560007375629207, iteration: 153966
loss: 1.0517878532409668,grad_norm: 0.999999101330687, iteration: 153967
loss: 0.9999845027923584,grad_norm: 0.9999991232818572, iteration: 153968
loss: 0.9834135174751282,grad_norm: 0.9087638334821904, iteration: 153969
loss: 1.0149694681167603,grad_norm: 0.9467984218728439, iteration: 153970
loss: 1.0753486156463623,grad_norm: 0.9999993535295566, iteration: 153971
loss: 0.9948650598526001,grad_norm: 0.8633011538905101, iteration: 153972
loss: 1.0203876495361328,grad_norm: 0.9250536127977229, iteration: 153973
loss: 0.9996957182884216,grad_norm: 0.99999900288025, iteration: 153974
loss: 1.0250788927078247,grad_norm: 0.9999991664591623, iteration: 153975
loss: 1.0101889371871948,grad_norm: 0.9999990856152524, iteration: 153976
loss: 0.9693557620048523,grad_norm: 0.999999111893919, iteration: 153977
loss: 1.0068222284317017,grad_norm: 0.9892437636453092, iteration: 153978
loss: 0.9894493222236633,grad_norm: 0.9999991111261102, iteration: 153979
loss: 0.9963006973266602,grad_norm: 0.9999991297317916, iteration: 153980
loss: 0.9864687323570251,grad_norm: 0.8675385426311716, iteration: 153981
loss: 1.0074539184570312,grad_norm: 0.9999991866624794, iteration: 153982
loss: 1.000931739807129,grad_norm: 0.9999990165195602, iteration: 153983
loss: 0.9590290188789368,grad_norm: 0.999999307040523, iteration: 153984
loss: 1.0192546844482422,grad_norm: 0.9999991178462273, iteration: 153985
loss: 0.9708407521247864,grad_norm: 0.9999990902106174, iteration: 153986
loss: 1.0170657634735107,grad_norm: 0.9999991597243312, iteration: 153987
loss: 0.9900054335594177,grad_norm: 0.9597883964611443, iteration: 153988
loss: 0.9978489875793457,grad_norm: 0.999999024134688, iteration: 153989
loss: 0.9963138699531555,grad_norm: 0.9999991022980806, iteration: 153990
loss: 0.9791573286056519,grad_norm: 0.9723952555963603, iteration: 153991
loss: 1.0072025060653687,grad_norm: 0.9222456254083233, iteration: 153992
loss: 0.9691970944404602,grad_norm: 0.9999991121132689, iteration: 153993
loss: 1.0471980571746826,grad_norm: 0.9999991237436638, iteration: 153994
loss: 0.9799692034721375,grad_norm: 0.9999990090220128, iteration: 153995
loss: 0.9848771095275879,grad_norm: 0.9521054012573854, iteration: 153996
loss: 1.011592149734497,grad_norm: 0.9999992702398525, iteration: 153997
loss: 0.9961024522781372,grad_norm: 0.9388166923259657, iteration: 153998
loss: 0.9985277056694031,grad_norm: 0.9861403519750838, iteration: 153999
loss: 0.989743173122406,grad_norm: 0.96963452796061, iteration: 154000
loss: 1.0124584436416626,grad_norm: 0.999999048197461, iteration: 154001
loss: 1.0557823181152344,grad_norm: 0.8525044664540907, iteration: 154002
loss: 1.0140339136123657,grad_norm: 0.9999992439286988, iteration: 154003
loss: 0.9768410325050354,grad_norm: 0.999999578648761, iteration: 154004
loss: 1.0179060697555542,grad_norm: 0.9999997274758843, iteration: 154005
loss: 0.9667465090751648,grad_norm: 0.999999145433124, iteration: 154006
loss: 1.0218364000320435,grad_norm: 0.9999991689256866, iteration: 154007
loss: 1.0027837753295898,grad_norm: 0.9878810342988589, iteration: 154008
loss: 0.984786331653595,grad_norm: 0.9999992732131109, iteration: 154009
loss: 1.0067094564437866,grad_norm: 0.9999989107498548, iteration: 154010
loss: 1.0198336839675903,grad_norm: 0.9153079623447877, iteration: 154011
loss: 0.9894794225692749,grad_norm: 0.8527612114134205, iteration: 154012
loss: 0.9782170057296753,grad_norm: 0.9346849247575351, iteration: 154013
loss: 1.021165132522583,grad_norm: 0.9241133475831517, iteration: 154014
loss: 1.0205879211425781,grad_norm: 0.9999991748300838, iteration: 154015
loss: 0.9588420987129211,grad_norm: 0.9270496167799395, iteration: 154016
loss: 0.9947572946548462,grad_norm: 0.8817084372478045, iteration: 154017
loss: 0.9985862970352173,grad_norm: 0.9999992259375342, iteration: 154018
loss: 0.982235848903656,grad_norm: 0.999999105758711, iteration: 154019
loss: 1.0132131576538086,grad_norm: 0.9999992302969026, iteration: 154020
loss: 1.0048527717590332,grad_norm: 0.9223349009391705, iteration: 154021
loss: 1.002124309539795,grad_norm: 0.8252186136155593, iteration: 154022
loss: 1.0847041606903076,grad_norm: 0.999999272605619, iteration: 154023
loss: 0.9981471300125122,grad_norm: 0.9999988800878409, iteration: 154024
loss: 0.9831294417381287,grad_norm: 0.9501312941377412, iteration: 154025
loss: 0.9965274930000305,grad_norm: 0.9999990132795981, iteration: 154026
loss: 0.9955034852027893,grad_norm: 0.9999992971520861, iteration: 154027
loss: 0.9891987442970276,grad_norm: 0.9999991275533899, iteration: 154028
loss: 0.9760432243347168,grad_norm: 0.9999992118714472, iteration: 154029
loss: 1.0111416578292847,grad_norm: 0.9999991929337326, iteration: 154030
loss: 1.0069316625595093,grad_norm: 0.9999989652047511, iteration: 154031
loss: 1.0158263444900513,grad_norm: 0.9226486634273572, iteration: 154032
loss: 0.9564351439476013,grad_norm: 0.9795960029689961, iteration: 154033
loss: 1.0263882875442505,grad_norm: 0.9999990315077499, iteration: 154034
loss: 0.999122679233551,grad_norm: 0.8857548676858813, iteration: 154035
loss: 0.9942492842674255,grad_norm: 0.9151579723079202, iteration: 154036
loss: 1.0021469593048096,grad_norm: 0.9999992897578881, iteration: 154037
loss: 1.015575885772705,grad_norm: 0.9999991036184642, iteration: 154038
loss: 1.0427532196044922,grad_norm: 0.9999990976737422, iteration: 154039
loss: 0.994421124458313,grad_norm: 0.974785896088756, iteration: 154040
loss: 1.001321792602539,grad_norm: 0.9999992976345723, iteration: 154041
loss: 1.0083023309707642,grad_norm: 0.9999992193761624, iteration: 154042
loss: 0.9499852657318115,grad_norm: 0.9999991751625722, iteration: 154043
loss: 1.0257946252822876,grad_norm: 0.9999998802435802, iteration: 154044
loss: 1.0200122594833374,grad_norm: 0.9999991153391724, iteration: 154045
loss: 1.0169177055358887,grad_norm: 0.9999991342750214, iteration: 154046
loss: 1.044772744178772,grad_norm: 0.8493549682652998, iteration: 154047
loss: 1.0200779438018799,grad_norm: 0.9999990782168895, iteration: 154048
loss: 0.9762478470802307,grad_norm: 0.9999992088723826, iteration: 154049
loss: 1.0082968473434448,grad_norm: 0.9999992206247474, iteration: 154050
loss: 1.0108470916748047,grad_norm: 0.9999994615787788, iteration: 154051
loss: 1.003257155418396,grad_norm: 0.999999053655426, iteration: 154052
loss: 1.0017030239105225,grad_norm: 0.9999991856497913, iteration: 154053
loss: 1.0389485359191895,grad_norm: 0.999999142206198, iteration: 154054
loss: 0.9715785384178162,grad_norm: 0.9999992209775221, iteration: 154055
loss: 1.029206395149231,grad_norm: 0.9493911491755644, iteration: 154056
loss: 1.0289016962051392,grad_norm: 0.9999992155536219, iteration: 154057
loss: 1.0288147926330566,grad_norm: 0.9376361863843495, iteration: 154058
loss: 1.035202145576477,grad_norm: 0.9999990204359802, iteration: 154059
loss: 1.0025568008422852,grad_norm: 0.927520561940305, iteration: 154060
loss: 1.0039910078048706,grad_norm: 0.999999121254916, iteration: 154061
loss: 1.0120844841003418,grad_norm: 0.9999991865965602, iteration: 154062
loss: 0.9950327277183533,grad_norm: 0.9999992459567946, iteration: 154063
loss: 0.9622703194618225,grad_norm: 0.8851991884021614, iteration: 154064
loss: 0.9783835411071777,grad_norm: 0.9091642482882971, iteration: 154065
loss: 0.9809497594833374,grad_norm: 0.965825384985509, iteration: 154066
loss: 0.9872273802757263,grad_norm: 0.9999989561285548, iteration: 154067
loss: 1.0236492156982422,grad_norm: 0.9103025019457781, iteration: 154068
loss: 0.9964448809623718,grad_norm: 0.8530139462391921, iteration: 154069
loss: 1.016251802444458,grad_norm: 0.9999993261196143, iteration: 154070
loss: 0.9919235110282898,grad_norm: 0.9999991736558831, iteration: 154071
loss: 1.002738356590271,grad_norm: 0.9999995854612974, iteration: 154072
loss: 0.9809377789497375,grad_norm: 0.8590980575562637, iteration: 154073
loss: 1.0008054971694946,grad_norm: 0.9999991437927621, iteration: 154074
loss: 1.0104851722717285,grad_norm: 0.9581705679275268, iteration: 154075
loss: 0.9890586137771606,grad_norm: 0.9999990886628044, iteration: 154076
loss: 0.9939132928848267,grad_norm: 0.9999990943526678, iteration: 154077
loss: 1.0157840251922607,grad_norm: 0.9999990508338115, iteration: 154078
loss: 0.9882826209068298,grad_norm: 0.9999990154074767, iteration: 154079
loss: 0.9600430130958557,grad_norm: 0.9999992038665811, iteration: 154080
loss: 0.9977014660835266,grad_norm: 0.9616315863131586, iteration: 154081
loss: 1.0207048654556274,grad_norm: 0.9592226670455805, iteration: 154082
loss: 1.0107582807540894,grad_norm: 0.9999992507422287, iteration: 154083
loss: 0.9840324521064758,grad_norm: 0.9999990742347993, iteration: 154084
loss: 1.0431301593780518,grad_norm: 0.999999140736047, iteration: 154085
loss: 0.979766845703125,grad_norm: 0.8612135058236763, iteration: 154086
loss: 1.039204716682434,grad_norm: 0.8546971725449269, iteration: 154087
loss: 0.991462230682373,grad_norm: 0.9244307090426916, iteration: 154088
loss: 0.9957678914070129,grad_norm: 0.7998687269309585, iteration: 154089
loss: 1.0037050247192383,grad_norm: 0.9571383927609806, iteration: 154090
loss: 0.998769223690033,grad_norm: 0.9997015879586406, iteration: 154091
loss: 0.9867445826530457,grad_norm: 0.8721589500236765, iteration: 154092
loss: 0.9814282059669495,grad_norm: 0.9576585914306852, iteration: 154093
loss: 1.0032597780227661,grad_norm: 0.9999990111714113, iteration: 154094
loss: 1.0375593900680542,grad_norm: 0.9999992087546594, iteration: 154095
loss: 1.0278257131576538,grad_norm: 0.9999996816023163, iteration: 154096
loss: 1.0287531614303589,grad_norm: 0.999999112867457, iteration: 154097
loss: 0.9862713813781738,grad_norm: 0.9401109457615648, iteration: 154098
loss: 0.9847490787506104,grad_norm: 0.9999991542515853, iteration: 154099
loss: 0.980533242225647,grad_norm: 0.7667920677684165, iteration: 154100
loss: 0.9873138666152954,grad_norm: 0.9999993297671655, iteration: 154101
loss: 0.9945762157440186,grad_norm: 0.9614424355223988, iteration: 154102
loss: 0.9823543429374695,grad_norm: 0.898707021404175, iteration: 154103
loss: 0.9689794182777405,grad_norm: 0.9999990325755677, iteration: 154104
loss: 0.9945132732391357,grad_norm: 0.8187575801581213, iteration: 154105
loss: 0.9662958979606628,grad_norm: 0.9500585604262496, iteration: 154106
loss: 0.9627766609191895,grad_norm: 0.999999145307198, iteration: 154107
loss: 1.005948543548584,grad_norm: 0.99999914302607, iteration: 154108
loss: 0.9980471730232239,grad_norm: 0.9999990723138876, iteration: 154109
loss: 0.9948366284370422,grad_norm: 0.942954626148995, iteration: 154110
loss: 0.9795367121696472,grad_norm: 0.9999989526488947, iteration: 154111
loss: 1.0285831689834595,grad_norm: 0.9999991652095414, iteration: 154112
loss: 0.960475742816925,grad_norm: 0.9945630287350699, iteration: 154113
loss: 1.0516315698623657,grad_norm: 0.999999215647724, iteration: 154114
loss: 1.019417643547058,grad_norm: 0.9999990790901737, iteration: 154115
loss: 1.0133945941925049,grad_norm: 0.9999991124393347, iteration: 154116
loss: 1.0103274583816528,grad_norm: 0.8774842115775127, iteration: 154117
loss: 1.01089608669281,grad_norm: 0.9771764453948647, iteration: 154118
loss: 1.0176442861557007,grad_norm: 0.9999992201764684, iteration: 154119
loss: 0.9989699721336365,grad_norm: 0.9999993453669198, iteration: 154120
loss: 1.008571982383728,grad_norm: 0.9999993101236794, iteration: 154121
loss: 0.9733938574790955,grad_norm: 0.9999994490042056, iteration: 154122
loss: 0.987113893032074,grad_norm: 0.9999989125934959, iteration: 154123
loss: 0.9966189861297607,grad_norm: 0.8266162729676668, iteration: 154124
loss: 0.9630905985832214,grad_norm: 0.9999994879898173, iteration: 154125
loss: 1.1000559329986572,grad_norm: 0.9999993828610495, iteration: 154126
loss: 0.9941078424453735,grad_norm: 0.9999991234880463, iteration: 154127
loss: 1.0712003707885742,grad_norm: 0.9999990876348898, iteration: 154128
loss: 1.015673041343689,grad_norm: 0.999999450295804, iteration: 154129
loss: 1.0326405763626099,grad_norm: 0.9999998221662847, iteration: 154130
loss: 1.010008454322815,grad_norm: 0.9999991689212138, iteration: 154131
loss: 1.0086616277694702,grad_norm: 0.9999990504477111, iteration: 154132
loss: 1.0012909173965454,grad_norm: 0.9999990676887142, iteration: 154133
loss: 1.0082859992980957,grad_norm: 0.999999088562972, iteration: 154134
loss: 0.9739237427711487,grad_norm: 0.821102109374909, iteration: 154135
loss: 1.0228279829025269,grad_norm: 0.9999992773034295, iteration: 154136
loss: 1.0150421857833862,grad_norm: 0.7823403669603679, iteration: 154137
loss: 1.016796350479126,grad_norm: 0.9999991468221061, iteration: 154138
loss: 0.9848531484603882,grad_norm: 0.8917837636165874, iteration: 154139
loss: 0.9885375499725342,grad_norm: 0.9067785737567049, iteration: 154140
loss: 1.034635305404663,grad_norm: 0.9999989579978088, iteration: 154141
loss: 1.0115835666656494,grad_norm: 0.9623925104490815, iteration: 154142
loss: 0.9889218211174011,grad_norm: 0.8342191823778248, iteration: 154143
loss: 1.0029786825180054,grad_norm: 0.9513856694878593, iteration: 154144
loss: 1.0084553956985474,grad_norm: 0.9999991173190241, iteration: 154145
loss: 0.9664480686187744,grad_norm: 0.9855110851912129, iteration: 154146
loss: 0.987765908241272,grad_norm: 0.9405651646019019, iteration: 154147
loss: 1.0116137266159058,grad_norm: 0.9999995537740154, iteration: 154148
loss: 1.0781276226043701,grad_norm: 0.9999999309135311, iteration: 154149
loss: 0.9707163572311401,grad_norm: 0.9416149661745868, iteration: 154150
loss: 1.0261772871017456,grad_norm: 0.9999991952037347, iteration: 154151
loss: 0.9807300567626953,grad_norm: 0.8446549283013478, iteration: 154152
loss: 0.9926424622535706,grad_norm: 0.9999992189013118, iteration: 154153
loss: 1.0209388732910156,grad_norm: 0.9476604119005086, iteration: 154154
loss: 1.063849687576294,grad_norm: 0.9248788668952946, iteration: 154155
loss: 1.0446761846542358,grad_norm: 0.9999995102635294, iteration: 154156
loss: 0.9499969482421875,grad_norm: 0.9953179285266363, iteration: 154157
loss: 1.0098093748092651,grad_norm: 0.9999994238557555, iteration: 154158
loss: 1.0149176120758057,grad_norm: 0.9999991213917839, iteration: 154159
loss: 1.0106931924819946,grad_norm: 0.9139141214798986, iteration: 154160
loss: 1.0916444063186646,grad_norm: 0.9999992455618995, iteration: 154161
loss: 1.0321778059005737,grad_norm: 0.9999993875266973, iteration: 154162
loss: 1.024753212928772,grad_norm: 0.9999999318415311, iteration: 154163
loss: 1.0664021968841553,grad_norm: 0.9999997269526714, iteration: 154164
loss: 1.1200379133224487,grad_norm: 0.9999990304334418, iteration: 154165
loss: 1.0739151239395142,grad_norm: 0.9999995679367495, iteration: 154166
loss: 1.2254453897476196,grad_norm: 0.9999999104714885, iteration: 154167
loss: 1.1454716920852661,grad_norm: 0.999999923135208, iteration: 154168
loss: 1.0632494688034058,grad_norm: 0.9999991625928096, iteration: 154169
loss: 1.0221426486968994,grad_norm: 0.9999992811887549, iteration: 154170
loss: 0.9991836547851562,grad_norm: 0.9999990151709198, iteration: 154171
loss: 1.0410804748535156,grad_norm: 0.9228534594337396, iteration: 154172
loss: 1.1058114767074585,grad_norm: 0.9999991738650165, iteration: 154173
loss: 1.141571044921875,grad_norm: 0.9999991814316209, iteration: 154174
loss: 1.1118746995925903,grad_norm: 0.9999990936570039, iteration: 154175
loss: 1.0228495597839355,grad_norm: 0.9999998609285224, iteration: 154176
loss: 1.0007603168487549,grad_norm: 0.8640733472577639, iteration: 154177
loss: 1.031112790107727,grad_norm: 0.9999990475870648, iteration: 154178
loss: 1.0693532228469849,grad_norm: 0.999999157988629, iteration: 154179
loss: 1.037928819656372,grad_norm: 0.9390863341867527, iteration: 154180
loss: 1.0394906997680664,grad_norm: 0.9999993337349147, iteration: 154181
loss: 1.0877395868301392,grad_norm: 0.999999701927819, iteration: 154182
loss: 0.9912626147270203,grad_norm: 0.9936944972401135, iteration: 154183
loss: 1.1006420850753784,grad_norm: 0.9999998261286143, iteration: 154184
loss: 1.0041228532791138,grad_norm: 0.9500992777079771, iteration: 154185
loss: 1.0027048587799072,grad_norm: 0.9999990973869833, iteration: 154186
loss: 1.10518217086792,grad_norm: 0.9999997352154101, iteration: 154187
loss: 1.053591012954712,grad_norm: 0.9999995320535003, iteration: 154188
loss: 1.1420633792877197,grad_norm: 0.9999992836865167, iteration: 154189
loss: 1.0859177112579346,grad_norm: 0.9999997404084243, iteration: 154190
loss: 1.2564433813095093,grad_norm: 0.9999996641965414, iteration: 154191
loss: 1.0129120349884033,grad_norm: 0.9999991729794745, iteration: 154192
loss: 1.0330359935760498,grad_norm: 0.9201022012710649, iteration: 154193
loss: 0.9784425497055054,grad_norm: 0.999999026697309, iteration: 154194
loss: 1.2731540203094482,grad_norm: 0.9999999109879578, iteration: 154195
loss: 1.1116597652435303,grad_norm: 0.9999996271225633, iteration: 154196
loss: 0.999729573726654,grad_norm: 0.9256752759643567, iteration: 154197
loss: 1.109373688697815,grad_norm: 0.999999537858697, iteration: 154198
loss: 1.2934632301330566,grad_norm: 0.9999992259881061, iteration: 154199
loss: 1.0650215148925781,grad_norm: 0.9999995822514383, iteration: 154200
loss: 0.9553640484809875,grad_norm: 0.9999992584321802, iteration: 154201
loss: 1.0141648054122925,grad_norm: 0.999999624697968, iteration: 154202
loss: 1.2377793788909912,grad_norm: 0.9999992722109792, iteration: 154203
loss: 1.0160000324249268,grad_norm: 0.9689027346927387, iteration: 154204
loss: 0.9845489859580994,grad_norm: 0.965897484708755, iteration: 154205
loss: 1.035079836845398,grad_norm: 0.9999990992827479, iteration: 154206
loss: 1.327253818511963,grad_norm: 0.9999999359370877, iteration: 154207
loss: 0.9729520678520203,grad_norm: 0.9999990372734997, iteration: 154208
loss: 0.9913082122802734,grad_norm: 0.8552312740845145, iteration: 154209
loss: 1.0270222425460815,grad_norm: 0.9999993194446247, iteration: 154210
loss: 1.0247560739517212,grad_norm: 0.9328777207713004, iteration: 154211
loss: 1.038651704788208,grad_norm: 0.9999995675495311, iteration: 154212
loss: 1.0392589569091797,grad_norm: 0.9999991101460802, iteration: 154213
loss: 0.9852735996246338,grad_norm: 0.8933289776721931, iteration: 154214
loss: 1.1063661575317383,grad_norm: 0.9999997726866745, iteration: 154215
loss: 1.127726674079895,grad_norm: 0.9999998345076223, iteration: 154216
loss: 1.1096941232681274,grad_norm: 0.9999998011832147, iteration: 154217
loss: 1.0375381708145142,grad_norm: 0.999999913172635, iteration: 154218
loss: 1.004392147064209,grad_norm: 0.9714387781724583, iteration: 154219
loss: 1.020929217338562,grad_norm: 0.9699687072143527, iteration: 154220
loss: 1.0699951648712158,grad_norm: 0.9999998393833054, iteration: 154221
loss: 1.0681315660476685,grad_norm: 0.9999996067962512, iteration: 154222
loss: 0.9834806323051453,grad_norm: 0.9558752305037735, iteration: 154223
loss: 0.9994847178459167,grad_norm: 0.9999990521576971, iteration: 154224
loss: 1.02500319480896,grad_norm: 0.9999990326194883, iteration: 154225
loss: 1.0006701946258545,grad_norm: 0.9999989783122692, iteration: 154226
loss: 0.9518105387687683,grad_norm: 0.9999994906227019, iteration: 154227
loss: 0.9923723340034485,grad_norm: 0.9999989531354169, iteration: 154228
loss: 1.0024358034133911,grad_norm: 0.9999992174482785, iteration: 154229
loss: 1.0014305114746094,grad_norm: 0.9204756231840209, iteration: 154230
loss: 1.0355207920074463,grad_norm: 0.9999992934232077, iteration: 154231
loss: 1.0545885562896729,grad_norm: 0.8609035903137755, iteration: 154232
loss: 0.9790876507759094,grad_norm: 0.9999990896145675, iteration: 154233
loss: 1.0309321880340576,grad_norm: 0.8955521208567909, iteration: 154234
loss: 1.008671522140503,grad_norm: 0.9999992805249014, iteration: 154235
loss: 0.9973093271255493,grad_norm: 0.999999594045801, iteration: 154236
loss: 1.0526137351989746,grad_norm: 0.999999320885623, iteration: 154237
loss: 0.9754266142845154,grad_norm: 0.8856029695277191, iteration: 154238
loss: 0.972255527973175,grad_norm: 0.9671482232951211, iteration: 154239
loss: 0.945955216884613,grad_norm: 0.999998990477137, iteration: 154240
loss: 1.0061482191085815,grad_norm: 0.9999988863248678, iteration: 154241
loss: 1.0026904344558716,grad_norm: 0.9999993256066138, iteration: 154242
loss: 1.003494143486023,grad_norm: 0.9463801765775668, iteration: 154243
loss: 1.0222159624099731,grad_norm: 0.9999992273037326, iteration: 154244
loss: 0.9857175946235657,grad_norm: 0.9769677010984428, iteration: 154245
loss: 1.0750728845596313,grad_norm: 0.9999997290160231, iteration: 154246
loss: 1.0186303853988647,grad_norm: 0.9999998486032823, iteration: 154247
loss: 0.9803789854049683,grad_norm: 0.9999989106966587, iteration: 154248
loss: 0.9991849660873413,grad_norm: 0.9329285597235402, iteration: 154249
loss: 1.0335954427719116,grad_norm: 0.9999993095622798, iteration: 154250
loss: 1.0394043922424316,grad_norm: 0.9999993592846299, iteration: 154251
loss: 0.9693944454193115,grad_norm: 0.9324707366969166, iteration: 154252
loss: 0.9573171138763428,grad_norm: 0.9999990349342236, iteration: 154253
loss: 1.023932933807373,grad_norm: 0.9391664442821043, iteration: 154254
loss: 1.0329121351242065,grad_norm: 0.9999990819165766, iteration: 154255
loss: 1.0118308067321777,grad_norm: 0.9958802657654858, iteration: 154256
loss: 1.0545357465744019,grad_norm: 0.9999996679108669, iteration: 154257
loss: 0.9716458916664124,grad_norm: 0.915629699596446, iteration: 154258
loss: 0.9991527795791626,grad_norm: 0.9534311349228202, iteration: 154259
loss: 0.9694981575012207,grad_norm: 0.8451390652713416, iteration: 154260
loss: 1.1347339153289795,grad_norm: 0.9999993244402628, iteration: 154261
loss: 1.0503755807876587,grad_norm: 0.9999992057533673, iteration: 154262
loss: 1.042262315750122,grad_norm: 0.999999464128403, iteration: 154263
loss: 1.1516609191894531,grad_norm: 0.9999998344613674, iteration: 154264
loss: 0.9870136976242065,grad_norm: 0.9999991407976054, iteration: 154265
loss: 0.970619261264801,grad_norm: 0.9191818285754763, iteration: 154266
loss: 1.0710582733154297,grad_norm: 0.9999994346921757, iteration: 154267
loss: 1.0286743640899658,grad_norm: 0.9999992034770048, iteration: 154268
loss: 1.0235469341278076,grad_norm: 0.9324070524587812, iteration: 154269
loss: 0.9849600195884705,grad_norm: 0.8394801243958732, iteration: 154270
loss: 1.0875805616378784,grad_norm: 0.9756729954705219, iteration: 154271
loss: 1.0350027084350586,grad_norm: 0.9999992643902863, iteration: 154272
loss: 1.0007363557815552,grad_norm: 0.9999992175825706, iteration: 154273
loss: 1.0217201709747314,grad_norm: 0.9999990568252716, iteration: 154274
loss: 1.0113322734832764,grad_norm: 0.946756376180679, iteration: 154275
loss: 1.0400456190109253,grad_norm: 0.9999992379969417, iteration: 154276
loss: 0.9958510398864746,grad_norm: 0.934038186146509, iteration: 154277
loss: 1.0175129175186157,grad_norm: 0.9999995868322635, iteration: 154278
loss: 1.0151914358139038,grad_norm: 0.999999543419479, iteration: 154279
loss: 1.0744668245315552,grad_norm: 0.9999995869494047, iteration: 154280
loss: 1.046983003616333,grad_norm: 0.9999993079622287, iteration: 154281
loss: 0.9924883246421814,grad_norm: 0.9999991668853234, iteration: 154282
loss: 1.0085216760635376,grad_norm: 0.9999990557097658, iteration: 154283
loss: 1.0320225954055786,grad_norm: 0.9999999696479437, iteration: 154284
loss: 0.9777368307113647,grad_norm: 0.9809632098775792, iteration: 154285
loss: 1.0130621194839478,grad_norm: 0.999999222585057, iteration: 154286
loss: 1.0213230848312378,grad_norm: 0.9836120274638462, iteration: 154287
loss: 1.0664070844650269,grad_norm: 0.9999996382485158, iteration: 154288
loss: 1.0188030004501343,grad_norm: 0.9696979494931238, iteration: 154289
loss: 1.003779411315918,grad_norm: 0.999999228716759, iteration: 154290
loss: 1.0751948356628418,grad_norm: 0.9999993370205887, iteration: 154291
loss: 0.9942178130149841,grad_norm: 0.9999990654453921, iteration: 154292
loss: 1.0236963033676147,grad_norm: 0.9999992040953003, iteration: 154293
loss: 0.9949513673782349,grad_norm: 0.9600266362709645, iteration: 154294
loss: 1.0014384984970093,grad_norm: 0.9332289544534339, iteration: 154295
loss: 0.9819554090499878,grad_norm: 0.9613486971157167, iteration: 154296
loss: 1.009163737297058,grad_norm: 0.9999990178217705, iteration: 154297
loss: 0.9858423471450806,grad_norm: 0.9080478603505171, iteration: 154298
loss: 0.9848058223724365,grad_norm: 0.7962100682112476, iteration: 154299
loss: 1.071956753730774,grad_norm: 1.0000000032777905, iteration: 154300
loss: 1.0147721767425537,grad_norm: 0.9999992866887403, iteration: 154301
loss: 1.0113343000411987,grad_norm: 0.999999086309421, iteration: 154302
loss: 0.9854316115379333,grad_norm: 0.9999996973114603, iteration: 154303
loss: 1.0567954778671265,grad_norm: 0.9999991479379985, iteration: 154304
loss: 0.9572998881340027,grad_norm: 0.9078916388633642, iteration: 154305
loss: 1.0154322385787964,grad_norm: 0.9999991186398628, iteration: 154306
loss: 0.9907315969467163,grad_norm: 0.9913378313698088, iteration: 154307
loss: 1.0505720376968384,grad_norm: 0.9999998857547877, iteration: 154308
loss: 1.0405157804489136,grad_norm: 0.9999992546421409, iteration: 154309
loss: 1.042993426322937,grad_norm: 0.999999247651836, iteration: 154310
loss: 1.0212576389312744,grad_norm: 0.9999988708995856, iteration: 154311
loss: 1.0918259620666504,grad_norm: 0.9999991284767491, iteration: 154312
loss: 1.0148866176605225,grad_norm: 0.878579035667748, iteration: 154313
loss: 1.0335500240325928,grad_norm: 0.999999742986646, iteration: 154314
loss: 0.9557866454124451,grad_norm: 0.9999989803143305, iteration: 154315
loss: 0.9994584918022156,grad_norm: 0.8107627618572452, iteration: 154316
loss: 1.0038496255874634,grad_norm: 0.9999992087522686, iteration: 154317
loss: 1.0229885578155518,grad_norm: 0.9999999648216549, iteration: 154318
loss: 0.972508430480957,grad_norm: 0.9145786350351727, iteration: 154319
loss: 0.9811475872993469,grad_norm: 0.9485756502033997, iteration: 154320
loss: 1.0375460386276245,grad_norm: 0.9999996336480507, iteration: 154321
loss: 0.9961174130439758,grad_norm: 0.9999990651916755, iteration: 154322
loss: 1.0177022218704224,grad_norm: 0.9000172063577613, iteration: 154323
loss: 1.0266753435134888,grad_norm: 0.9999990620982298, iteration: 154324
loss: 0.9804493188858032,grad_norm: 0.9576219381192014, iteration: 154325
loss: 0.9889448285102844,grad_norm: 0.9999992048254692, iteration: 154326
loss: 0.9992231130599976,grad_norm: 0.9999992694760287, iteration: 154327
loss: 0.995112419128418,grad_norm: 0.9931617315034333, iteration: 154328
loss: 0.9739848375320435,grad_norm: 0.9999990629044121, iteration: 154329
loss: 0.9720751047134399,grad_norm: 0.9560594995967742, iteration: 154330
loss: 0.9799820780754089,grad_norm: 0.9999997480598548, iteration: 154331
loss: 0.9947401285171509,grad_norm: 0.8944011687818453, iteration: 154332
loss: 0.9809786081314087,grad_norm: 0.9999991648896179, iteration: 154333
loss: 0.9920633435249329,grad_norm: 0.7892436052547787, iteration: 154334
loss: 1.0235294103622437,grad_norm: 0.9999991816408326, iteration: 154335
loss: 1.0680443048477173,grad_norm: 1.0000000494918955, iteration: 154336
loss: 0.9863388538360596,grad_norm: 0.999999840646934, iteration: 154337
loss: 1.1862361431121826,grad_norm: 0.999999464364235, iteration: 154338
loss: 0.9845196008682251,grad_norm: 0.9999989259800685, iteration: 154339
loss: 0.9988259673118591,grad_norm: 0.9999992594328683, iteration: 154340
loss: 1.0790741443634033,grad_norm: 0.9999998283149054, iteration: 154341
loss: 1.0031471252441406,grad_norm: 0.7734476779094579, iteration: 154342
loss: 1.0206923484802246,grad_norm: 0.9999991504496831, iteration: 154343
loss: 0.9687057137489319,grad_norm: 0.999999071900375, iteration: 154344
loss: 1.019342064857483,grad_norm: 0.9999991425823166, iteration: 154345
loss: 1.0076682567596436,grad_norm: 0.9999993031738437, iteration: 154346
loss: 1.000258445739746,grad_norm: 0.8972344113358869, iteration: 154347
loss: 0.9872052669525146,grad_norm: 0.9552241121448451, iteration: 154348
loss: 1.019620656967163,grad_norm: 0.9999997773753089, iteration: 154349
loss: 0.9797517657279968,grad_norm: 0.9999991070901, iteration: 154350
loss: 1.017109990119934,grad_norm: 0.9999990968231337, iteration: 154351
loss: 1.0296289920806885,grad_norm: 0.9999994076375931, iteration: 154352
loss: 0.993524432182312,grad_norm: 0.99999896340173, iteration: 154353
loss: 1.012630581855774,grad_norm: 0.9999992022216961, iteration: 154354
loss: 1.0192501544952393,grad_norm: 0.9881191519553265, iteration: 154355
loss: 1.0185178518295288,grad_norm: 0.9999991744801237, iteration: 154356
loss: 0.9943642616271973,grad_norm: 0.9894542406879862, iteration: 154357
loss: 1.00035560131073,grad_norm: 0.9076213614260944, iteration: 154358
loss: 1.0100114345550537,grad_norm: 0.9999990573813419, iteration: 154359
loss: 0.9983339905738831,grad_norm: 0.9468293600258202, iteration: 154360
loss: 1.016271948814392,grad_norm: 0.9999991835655896, iteration: 154361
loss: 1.0358359813690186,grad_norm: 0.9999990105106655, iteration: 154362
loss: 0.9873661398887634,grad_norm: 0.936691800861681, iteration: 154363
loss: 0.9827359318733215,grad_norm: 0.996537006709342, iteration: 154364
loss: 1.0127612352371216,grad_norm: 0.9999990383285086, iteration: 154365
loss: 0.9787077307701111,grad_norm: 0.8468568058028924, iteration: 154366
loss: 0.9876812100410461,grad_norm: 0.8932811217715881, iteration: 154367
loss: 1.0065311193466187,grad_norm: 0.9999991902092339, iteration: 154368
loss: 0.9957934021949768,grad_norm: 0.9999991288432903, iteration: 154369
loss: 0.9874369502067566,grad_norm: 0.9999989966653475, iteration: 154370
loss: 0.9940715432167053,grad_norm: 0.9999989915218841, iteration: 154371
loss: 1.0429900884628296,grad_norm: 0.9999992519105396, iteration: 154372
loss: 0.9884014129638672,grad_norm: 0.9741239083107778, iteration: 154373
loss: 1.021934151649475,grad_norm: 0.9999992346526134, iteration: 154374
loss: 1.0020796060562134,grad_norm: 0.9925288011088752, iteration: 154375
loss: 1.000644326210022,grad_norm: 0.9999992303891031, iteration: 154376
loss: 0.975673496723175,grad_norm: 0.987669394336399, iteration: 154377
loss: 1.0124239921569824,grad_norm: 0.9999991611833435, iteration: 154378
loss: 0.9716623425483704,grad_norm: 0.9999998374259, iteration: 154379
loss: 1.0274934768676758,grad_norm: 0.9999991145312864, iteration: 154380
loss: 1.0790674686431885,grad_norm: 0.9999998778092741, iteration: 154381
loss: 1.0284132957458496,grad_norm: 0.9278778224733544, iteration: 154382
loss: 1.0085433721542358,grad_norm: 0.9841574089204571, iteration: 154383
loss: 0.9871006011962891,grad_norm: 0.9999991837209411, iteration: 154384
loss: 0.9902134537696838,grad_norm: 0.9097853396111166, iteration: 154385
loss: 0.9711936116218567,grad_norm: 0.9999992264196499, iteration: 154386
loss: 0.9780341386795044,grad_norm: 0.9999990866215709, iteration: 154387
loss: 1.191088318824768,grad_norm: 0.9999998301675798, iteration: 154388
loss: 1.0458723306655884,grad_norm: 0.9999998065110675, iteration: 154389
loss: 0.995171308517456,grad_norm: 0.9196506235784283, iteration: 154390
loss: 1.0989795923233032,grad_norm: 0.9999996009913957, iteration: 154391
loss: 0.9987289309501648,grad_norm: 0.9962655932837288, iteration: 154392
loss: 0.9575658440589905,grad_norm: 0.9999990741065649, iteration: 154393
loss: 0.9989321827888489,grad_norm: 0.9999999566559304, iteration: 154394
loss: 1.0120140314102173,grad_norm: 0.9635163807853215, iteration: 154395
loss: 1.0411492586135864,grad_norm: 0.9999995892748974, iteration: 154396
loss: 1.0021882057189941,grad_norm: 0.9696790846774026, iteration: 154397
loss: 1.0139122009277344,grad_norm: 0.9999991979952018, iteration: 154398
loss: 1.0051414966583252,grad_norm: 0.9999992294739227, iteration: 154399
loss: 0.9873519539833069,grad_norm: 0.99999928420187, iteration: 154400
loss: 0.994225025177002,grad_norm: 0.9901875912297156, iteration: 154401
loss: 0.9849975109100342,grad_norm: 0.9999993438885584, iteration: 154402
loss: 0.9679445028305054,grad_norm: 0.8592128115939118, iteration: 154403
loss: 1.0097808837890625,grad_norm: 0.9999990836524815, iteration: 154404
loss: 1.0478695631027222,grad_norm: 0.9999992250598415, iteration: 154405
loss: 1.0141557455062866,grad_norm: 0.999998972238901, iteration: 154406
loss: 1.2914490699768066,grad_norm: 0.9999991940954588, iteration: 154407
loss: 0.9854511022567749,grad_norm: 0.999999223328705, iteration: 154408
loss: 0.965067982673645,grad_norm: 0.8351177451634771, iteration: 154409
loss: 0.9531722068786621,grad_norm: 0.999999156809581, iteration: 154410
loss: 1.0211313962936401,grad_norm: 0.9768947187272949, iteration: 154411
loss: 0.971316397190094,grad_norm: 0.9832494290657355, iteration: 154412
loss: 1.0629749298095703,grad_norm: 0.9999999741053517, iteration: 154413
loss: 1.0369470119476318,grad_norm: 0.9999992096720318, iteration: 154414
loss: 0.9683658480644226,grad_norm: 0.9349955757790083, iteration: 154415
loss: 1.0191148519515991,grad_norm: 0.9655926593711487, iteration: 154416
loss: 1.0719860792160034,grad_norm: 0.9999992852510401, iteration: 154417
loss: 0.9548653364181519,grad_norm: 0.999999173454819, iteration: 154418
loss: 1.0069259405136108,grad_norm: 0.8854955837094312, iteration: 154419
loss: 1.0242934226989746,grad_norm: 0.936165813307281, iteration: 154420
loss: 1.005739688873291,grad_norm: 0.9553434347353531, iteration: 154421
loss: 1.014195203781128,grad_norm: 0.9999994060315138, iteration: 154422
loss: 0.9772443175315857,grad_norm: 0.9302443372513913, iteration: 154423
loss: 0.99305260181427,grad_norm: 0.9242304134610292, iteration: 154424
loss: 0.9950308203697205,grad_norm: 0.9849334375973151, iteration: 154425
loss: 0.9926414489746094,grad_norm: 0.9999990148383298, iteration: 154426
loss: 0.9727606177330017,grad_norm: 0.9737695584840834, iteration: 154427
loss: 1.0268887281417847,grad_norm: 0.9896344060623469, iteration: 154428
loss: 0.9724884033203125,grad_norm: 0.9999990706283753, iteration: 154429
loss: 1.0078715085983276,grad_norm: 0.9999993158768384, iteration: 154430
loss: 0.9983015656471252,grad_norm: 0.9999990293777054, iteration: 154431
loss: 1.0296037197113037,grad_norm: 0.9999991286530359, iteration: 154432
loss: 1.0219000577926636,grad_norm: 0.9999991321726281, iteration: 154433
loss: 1.0086066722869873,grad_norm: 0.9999990830348859, iteration: 154434
loss: 1.0405526161193848,grad_norm: 0.91828070110093, iteration: 154435
loss: 1.0076723098754883,grad_norm: 0.9999990732125682, iteration: 154436
loss: 1.0186628103256226,grad_norm: 0.9999992964685294, iteration: 154437
loss: 1.008718729019165,grad_norm: 0.8445459593645298, iteration: 154438
loss: 1.0166767835617065,grad_norm: 0.9999991871885742, iteration: 154439
loss: 0.979883074760437,grad_norm: 0.9566278567891764, iteration: 154440
loss: 0.9727777242660522,grad_norm: 0.9141585515393038, iteration: 154441
loss: 0.9853856563568115,grad_norm: 0.9999991228990521, iteration: 154442
loss: 1.0302950143814087,grad_norm: 0.9999990183147431, iteration: 154443
loss: 1.0221549272537231,grad_norm: 0.9999991023855501, iteration: 154444
loss: 0.9853296875953674,grad_norm: 0.9999992550979002, iteration: 154445
loss: 1.0330814123153687,grad_norm: 0.9999991629830747, iteration: 154446
loss: 1.0167516469955444,grad_norm: 0.9999990548236479, iteration: 154447
loss: 0.9989530444145203,grad_norm: 0.855094715425067, iteration: 154448
loss: 0.9786997437477112,grad_norm: 0.9999992269094937, iteration: 154449
loss: 1.0200371742248535,grad_norm: 0.9999989300785227, iteration: 154450
loss: 0.9699084162712097,grad_norm: 0.9680196875794704, iteration: 154451
loss: 0.9891434907913208,grad_norm: 0.999999342448902, iteration: 154452
loss: 0.9680255055427551,grad_norm: 0.9755833910434422, iteration: 154453
loss: 0.9544345736503601,grad_norm: 0.8502948260116536, iteration: 154454
loss: 0.9807215332984924,grad_norm: 0.9999990853093695, iteration: 154455
loss: 1.003230094909668,grad_norm: 0.9999992648612188, iteration: 154456
loss: 0.956052839756012,grad_norm: 0.9999989826467094, iteration: 154457
loss: 0.9877374768257141,grad_norm: 0.8939118384578305, iteration: 154458
loss: 1.0181015729904175,grad_norm: 0.9999992950229234, iteration: 154459
loss: 1.0334161520004272,grad_norm: 0.9999994652423991, iteration: 154460
loss: 1.0196548700332642,grad_norm: 0.8922781473279057, iteration: 154461
loss: 0.9704270362854004,grad_norm: 0.9397442816041636, iteration: 154462
loss: 1.0135629177093506,grad_norm: 0.9999991026657703, iteration: 154463
loss: 1.0017489194869995,grad_norm: 0.9999992526447093, iteration: 154464
loss: 1.0120564699172974,grad_norm: 0.9999991831635457, iteration: 154465
loss: 1.0175588130950928,grad_norm: 0.9999992450072888, iteration: 154466
loss: 0.9953722357749939,grad_norm: 0.9052861951687764, iteration: 154467
loss: 1.0200363397598267,grad_norm: 0.9999991252514625, iteration: 154468
loss: 1.0074251890182495,grad_norm: 0.9999990172987655, iteration: 154469
loss: 0.9838694334030151,grad_norm: 0.9999993007786849, iteration: 154470
loss: 0.9971698522567749,grad_norm: 0.9999991916936594, iteration: 154471
loss: 0.983445942401886,grad_norm: 0.9332404207246253, iteration: 154472
loss: 1.0096455812454224,grad_norm: 0.8262615152119028, iteration: 154473
loss: 0.9958747625350952,grad_norm: 0.9999991053893881, iteration: 154474
loss: 0.9942042231559753,grad_norm: 0.9999992472039675, iteration: 154475
loss: 1.0005335807800293,grad_norm: 0.9419587673033326, iteration: 154476
loss: 0.9943479299545288,grad_norm: 0.9999990902102215, iteration: 154477
loss: 0.9594570994377136,grad_norm: 0.8318268582736879, iteration: 154478
loss: 1.0059207677841187,grad_norm: 0.9999990171712727, iteration: 154479
loss: 1.042549729347229,grad_norm: 0.9325308103106423, iteration: 154480
loss: 0.9930605888366699,grad_norm: 0.9999990490887221, iteration: 154481
loss: 1.0195040702819824,grad_norm: 0.9691912003890382, iteration: 154482
loss: 1.0105667114257812,grad_norm: 0.9999991440592809, iteration: 154483
loss: 0.9549478888511658,grad_norm: 0.9999997025832287, iteration: 154484
loss: 1.0151746273040771,grad_norm: 0.9999992409022034, iteration: 154485
loss: 0.968325138092041,grad_norm: 0.9999992300653319, iteration: 154486
loss: 1.0180985927581787,grad_norm: 0.9999992119230893, iteration: 154487
loss: 0.9647154211997986,grad_norm: 0.9999992882292681, iteration: 154488
loss: 0.973773181438446,grad_norm: 0.9999992981922875, iteration: 154489
loss: 0.9757890105247498,grad_norm: 0.9999990290218185, iteration: 154490
loss: 0.9322054386138916,grad_norm: 0.9480850155550676, iteration: 154491
loss: 0.9793878197669983,grad_norm: 0.8168626867576674, iteration: 154492
loss: 0.9817292094230652,grad_norm: 0.9378195435113238, iteration: 154493
loss: 1.0505244731903076,grad_norm: 0.9999993251316546, iteration: 154494
loss: 1.0434201955795288,grad_norm: 0.8078531636028149, iteration: 154495
loss: 1.0169932842254639,grad_norm: 0.8399329416516398, iteration: 154496
loss: 0.9743231534957886,grad_norm: 0.975394786357283, iteration: 154497
loss: 0.9679529070854187,grad_norm: 0.9999991487161862, iteration: 154498
loss: 1.013572335243225,grad_norm: 0.8757614925036151, iteration: 154499
loss: 1.0023020505905151,grad_norm: 0.9575593375894911, iteration: 154500
loss: 0.9672202467918396,grad_norm: 0.8454642589537792, iteration: 154501
loss: 1.0029950141906738,grad_norm: 0.9280683062478013, iteration: 154502
loss: 1.106367588043213,grad_norm: 0.9999995103458513, iteration: 154503
loss: 0.986392080783844,grad_norm: 0.9999991118205728, iteration: 154504
loss: 1.0179640054702759,grad_norm: 0.8338196376210679, iteration: 154505
loss: 1.0405219793319702,grad_norm: 0.999999987378051, iteration: 154506
loss: 0.9573026299476624,grad_norm: 0.9665356831697605, iteration: 154507
loss: 0.9960518479347229,grad_norm: 0.9999991126194461, iteration: 154508
loss: 1.0045136213302612,grad_norm: 0.999999121295947, iteration: 154509
loss: 1.0290073156356812,grad_norm: 0.9999991961006538, iteration: 154510
loss: 1.0068113803863525,grad_norm: 0.9999991223099518, iteration: 154511
loss: 1.0240528583526611,grad_norm: 0.9999991229615819, iteration: 154512
loss: 0.9721114635467529,grad_norm: 0.9999990941053521, iteration: 154513
loss: 0.985769510269165,grad_norm: 0.9999990469930211, iteration: 154514
loss: 0.9955400824546814,grad_norm: 0.9999989094934016, iteration: 154515
loss: 0.9924671649932861,grad_norm: 0.9436286766838193, iteration: 154516
loss: 0.9886656999588013,grad_norm: 0.9878880927128777, iteration: 154517
loss: 1.0040531158447266,grad_norm: 0.9999991519120814, iteration: 154518
loss: 1.0083707571029663,grad_norm: 0.9999991613165494, iteration: 154519
loss: 0.9927141666412354,grad_norm: 0.9999990039339569, iteration: 154520
loss: 0.9768880605697632,grad_norm: 0.8731774110155269, iteration: 154521
loss: 0.9721601009368896,grad_norm: 0.9999991583698863, iteration: 154522
loss: 0.9845999479293823,grad_norm: 0.9999990444621429, iteration: 154523
loss: 0.9972606897354126,grad_norm: 0.9999990213689512, iteration: 154524
loss: 1.0316988229751587,grad_norm: 0.9999992306351345, iteration: 154525
loss: 1.0101367235183716,grad_norm: 0.999999197450162, iteration: 154526
loss: 1.0250566005706787,grad_norm: 0.9999989924700305, iteration: 154527
loss: 1.0176538228988647,grad_norm: 0.9999990410062579, iteration: 154528
loss: 1.0473573207855225,grad_norm: 0.9999993831889117, iteration: 154529
loss: 0.9978988170623779,grad_norm: 0.999999049302867, iteration: 154530
loss: 0.9707260727882385,grad_norm: 0.9418558228075249, iteration: 154531
loss: 0.9645702242851257,grad_norm: 0.9999990835615106, iteration: 154532
loss: 0.992318332195282,grad_norm: 0.9010938678041077, iteration: 154533
loss: 1.030821442604065,grad_norm: 0.99999915471468, iteration: 154534
loss: 0.9719066619873047,grad_norm: 0.9810588033604299, iteration: 154535
loss: 1.0309950113296509,grad_norm: 0.9999992819326691, iteration: 154536
loss: 0.9911139607429504,grad_norm: 0.931457483514151, iteration: 154537
loss: 0.9775577187538147,grad_norm: 0.9168057305579265, iteration: 154538
loss: 0.9862068891525269,grad_norm: 0.8981481583544907, iteration: 154539
loss: 1.0210105180740356,grad_norm: 0.9999991000294455, iteration: 154540
loss: 0.9843348264694214,grad_norm: 0.9999990502725681, iteration: 154541
loss: 1.0451968908309937,grad_norm: 0.9999997922327217, iteration: 154542
loss: 1.0327990055084229,grad_norm: 0.9999990990519413, iteration: 154543
loss: 0.9687954187393188,grad_norm: 0.9999991209007136, iteration: 154544
loss: 1.0027565956115723,grad_norm: 0.9999991338433181, iteration: 154545
loss: 1.0055034160614014,grad_norm: 0.9610453428599434, iteration: 154546
loss: 0.9770431518554688,grad_norm: 0.9763075418964312, iteration: 154547
loss: 1.0285428762435913,grad_norm: 0.9493481776853291, iteration: 154548
loss: 0.9805319309234619,grad_norm: 0.9999990653017311, iteration: 154549
loss: 0.9696113467216492,grad_norm: 0.9060659250802829, iteration: 154550
loss: 1.0333272218704224,grad_norm: 0.9999990646610358, iteration: 154551
loss: 1.0096060037612915,grad_norm: 0.9703019058907842, iteration: 154552
loss: 0.97450852394104,grad_norm: 0.9992122752115576, iteration: 154553
loss: 0.9764031767845154,grad_norm: 0.9868526262511864, iteration: 154554
loss: 1.0286993980407715,grad_norm: 0.9530594688943449, iteration: 154555
loss: 0.9930289387702942,grad_norm: 0.9999992372875633, iteration: 154556
loss: 0.993522047996521,grad_norm: 0.9412442488892069, iteration: 154557
loss: 0.9712408185005188,grad_norm: 0.9419874612265469, iteration: 154558
loss: 0.9582058787345886,grad_norm: 0.9606824074064099, iteration: 154559
loss: 1.047831416130066,grad_norm: 0.9999990370154584, iteration: 154560
loss: 0.9825013279914856,grad_norm: 0.9999990680462034, iteration: 154561
loss: 1.0808435678482056,grad_norm: 0.9999992892024091, iteration: 154562
loss: 0.9623176455497742,grad_norm: 0.9714091602291748, iteration: 154563
loss: 1.0250705480575562,grad_norm: 0.9999991485371843, iteration: 154564
loss: 0.9996607899665833,grad_norm: 0.9999991470823467, iteration: 154565
loss: 0.9863471388816833,grad_norm: 0.9999992295589547, iteration: 154566
loss: 0.9561035633087158,grad_norm: 0.9999990928645107, iteration: 154567
loss: 1.0040960311889648,grad_norm: 0.9999991915737052, iteration: 154568
loss: 1.016230583190918,grad_norm: 0.9999998207680015, iteration: 154569
loss: 1.009726643562317,grad_norm: 0.9999994541912254, iteration: 154570
loss: 1.0062710046768188,grad_norm: 0.999999183185956, iteration: 154571
loss: 1.0216896533966064,grad_norm: 0.9999994461484836, iteration: 154572
loss: 1.0132899284362793,grad_norm: 0.9319512743608696, iteration: 154573
loss: 1.0118821859359741,grad_norm: 0.9171509558826982, iteration: 154574
loss: 1.0084728002548218,grad_norm: 0.9366162297790892, iteration: 154575
loss: 0.9875085353851318,grad_norm: 0.9930135999576662, iteration: 154576
loss: 1.0914952754974365,grad_norm: 0.9999990869860252, iteration: 154577
loss: 1.0044890642166138,grad_norm: 0.999999030002256, iteration: 154578
loss: 0.9854028820991516,grad_norm: 0.950508719053995, iteration: 154579
loss: 1.1842924356460571,grad_norm: 0.9999998482422656, iteration: 154580
loss: 1.04856538772583,grad_norm: 0.9999996580004916, iteration: 154581
loss: 0.9900864958763123,grad_norm: 0.9401103611364795, iteration: 154582
loss: 0.970894455909729,grad_norm: 0.9999992293040373, iteration: 154583
loss: 0.963618278503418,grad_norm: 0.8773187579866042, iteration: 154584
loss: 1.0574356317520142,grad_norm: 0.9999993018493795, iteration: 154585
loss: 0.9983353614807129,grad_norm: 0.999999259897596, iteration: 154586
loss: 0.9704478979110718,grad_norm: 0.827108212576856, iteration: 154587
loss: 1.0041680335998535,grad_norm: 0.9999991968214355, iteration: 154588
loss: 1.0280243158340454,grad_norm: 0.9999996446089664, iteration: 154589
loss: 1.0031081438064575,grad_norm: 0.8937897191413517, iteration: 154590
loss: 1.0371308326721191,grad_norm: 0.9999990303953633, iteration: 154591
loss: 1.0090771913528442,grad_norm: 0.9352272415013404, iteration: 154592
loss: 1.0053162574768066,grad_norm: 0.999999295060032, iteration: 154593
loss: 1.0681780576705933,grad_norm: 0.9999990907576434, iteration: 154594
loss: 1.005461573600769,grad_norm: 0.8318496596253265, iteration: 154595
loss: 0.9792959094047546,grad_norm: 0.9999991925468286, iteration: 154596
loss: 0.9989093542098999,grad_norm: 0.999999171315089, iteration: 154597
loss: 1.0106968879699707,grad_norm: 0.9035551731599667, iteration: 154598
loss: 1.0760703086853027,grad_norm: 0.9999992849459519, iteration: 154599
loss: 1.0145057439804077,grad_norm: 0.9800499952785532, iteration: 154600
loss: 0.9801150560379028,grad_norm: 0.9999991212321541, iteration: 154601
loss: 0.9926198720932007,grad_norm: 0.9999997113537932, iteration: 154602
loss: 1.0444213151931763,grad_norm: 0.9999991237139191, iteration: 154603
loss: 1.126004695892334,grad_norm: 0.999999568965336, iteration: 154604
loss: 0.9429372549057007,grad_norm: 0.9999990707343271, iteration: 154605
loss: 1.0145025253295898,grad_norm: 0.9253087780920846, iteration: 154606
loss: 0.9995308518409729,grad_norm: 0.9999992742627039, iteration: 154607
loss: 0.9979526996612549,grad_norm: 0.9999989438688526, iteration: 154608
loss: 1.0232354402542114,grad_norm: 0.9999991713825773, iteration: 154609
loss: 1.0591915845870972,grad_norm: 0.9999990804887254, iteration: 154610
loss: 1.0231579542160034,grad_norm: 0.9810244585271657, iteration: 154611
loss: 1.0647956132888794,grad_norm: 0.9999991584076994, iteration: 154612
loss: 0.9942060708999634,grad_norm: 0.9999992193739857, iteration: 154613
loss: 0.9899348020553589,grad_norm: 0.9308403757882767, iteration: 154614
loss: 0.956966757774353,grad_norm: 0.999230111143459, iteration: 154615
loss: 0.9886007308959961,grad_norm: 0.9999990711320133, iteration: 154616
loss: 1.0316762924194336,grad_norm: 0.87170822493615, iteration: 154617
loss: 1.1216343641281128,grad_norm: 0.9999995321550742, iteration: 154618
loss: 1.0046440362930298,grad_norm: 0.8971887227374807, iteration: 154619
loss: 0.9977708458900452,grad_norm: 0.8911588515576175, iteration: 154620
loss: 0.9894364476203918,grad_norm: 0.999999170935899, iteration: 154621
loss: 0.971973717212677,grad_norm: 0.9729250579115915, iteration: 154622
loss: 1.0025410652160645,grad_norm: 0.9335604169919978, iteration: 154623
loss: 0.9615298509597778,grad_norm: 0.795572202869311, iteration: 154624
loss: 0.9742841720581055,grad_norm: 0.9683199024328968, iteration: 154625
loss: 1.001116394996643,grad_norm: 0.9732913368527067, iteration: 154626
loss: 1.0117243528366089,grad_norm: 0.9999990636822901, iteration: 154627
loss: 0.9810624718666077,grad_norm: 0.9999991687926675, iteration: 154628
loss: 0.9632816314697266,grad_norm: 0.9603158455960518, iteration: 154629
loss: 1.0710285902023315,grad_norm: 0.999999963834567, iteration: 154630
loss: 1.0171691179275513,grad_norm: 0.9999995734443519, iteration: 154631
loss: 0.9881951808929443,grad_norm: 0.999999177207221, iteration: 154632
loss: 0.9874646663665771,grad_norm: 0.9390074543143151, iteration: 154633
loss: 0.9941025376319885,grad_norm: 0.9999992386627087, iteration: 154634
loss: 1.0338902473449707,grad_norm: 0.9999998867973611, iteration: 154635
loss: 1.0023049116134644,grad_norm: 0.9999991421687349, iteration: 154636
loss: 0.9956099390983582,grad_norm: 0.9999990979931701, iteration: 154637
loss: 0.9972306489944458,grad_norm: 0.9797843957615667, iteration: 154638
loss: 1.020066499710083,grad_norm: 0.9999990733885395, iteration: 154639
loss: 0.9608444571495056,grad_norm: 0.9117354133314336, iteration: 154640
loss: 0.9924980998039246,grad_norm: 0.9999990235211372, iteration: 154641
loss: 1.008180856704712,grad_norm: 0.9999996223052671, iteration: 154642
loss: 0.9997747540473938,grad_norm: 0.8748325076186954, iteration: 154643
loss: 1.005035400390625,grad_norm: 0.9999991651839721, iteration: 154644
loss: 1.0135197639465332,grad_norm: 0.9999998770954504, iteration: 154645
loss: 1.0208970308303833,grad_norm: 0.9947870800138505, iteration: 154646
loss: 1.0319876670837402,grad_norm: 0.9999991283535341, iteration: 154647
loss: 0.9919590950012207,grad_norm: 0.9072089116431562, iteration: 154648
loss: 0.9904439449310303,grad_norm: 0.9999991389459021, iteration: 154649
loss: 1.0008903741836548,grad_norm: 0.9592261099740124, iteration: 154650
loss: 1.025891661643982,grad_norm: 0.9648759817121549, iteration: 154651
loss: 1.0201114416122437,grad_norm: 0.9999991779430017, iteration: 154652
loss: 0.9871106147766113,grad_norm: 0.9999991544083492, iteration: 154653
loss: 0.9925675988197327,grad_norm: 0.9999992939390142, iteration: 154654
loss: 1.0127943754196167,grad_norm: 0.9910361936188877, iteration: 154655
loss: 0.9841417670249939,grad_norm: 0.9999990554536817, iteration: 154656
loss: 1.011034369468689,grad_norm: 0.9999993425939038, iteration: 154657
loss: 1.0206416845321655,grad_norm: 0.999999107128298, iteration: 154658
loss: 1.0375441312789917,grad_norm: 0.9999999238694206, iteration: 154659
loss: 1.270264983177185,grad_norm: 0.9999992054221424, iteration: 154660
loss: 0.971336841583252,grad_norm: 0.9712376594231388, iteration: 154661
loss: 1.0458728075027466,grad_norm: 0.9999996341060688, iteration: 154662
loss: 1.0041810274124146,grad_norm: 0.845474960757446, iteration: 154663
loss: 1.0307680368423462,grad_norm: 0.999999195389324, iteration: 154664
loss: 1.0176072120666504,grad_norm: 0.9999992034144577, iteration: 154665
loss: 0.9678170680999756,grad_norm: 0.9999992248733192, iteration: 154666
loss: 1.005017638206482,grad_norm: 0.9845282121281957, iteration: 154667
loss: 0.9953176379203796,grad_norm: 0.9999995479792336, iteration: 154668
loss: 1.0117924213409424,grad_norm: 0.9999990146800024, iteration: 154669
loss: 0.9882665276527405,grad_norm: 0.9529479888756058, iteration: 154670
loss: 0.9985146522521973,grad_norm: 0.9999991002120939, iteration: 154671
loss: 0.9830062985420227,grad_norm: 0.899357855079884, iteration: 154672
loss: 1.0011156797409058,grad_norm: 0.9522326966949266, iteration: 154673
loss: 1.0017009973526,grad_norm: 0.9529247888629604, iteration: 154674
loss: 1.0561741590499878,grad_norm: 0.999999229361962, iteration: 154675
loss: 1.0040614604949951,grad_norm: 0.9999990876462419, iteration: 154676
loss: 1.0367788076400757,grad_norm: 0.9999995129458533, iteration: 154677
loss: 1.0325547456741333,grad_norm: 0.9999990667219979, iteration: 154678
loss: 0.931182861328125,grad_norm: 0.9417575116296917, iteration: 154679
loss: 1.0156577825546265,grad_norm: 0.9999992476277253, iteration: 154680
loss: 0.9836437106132507,grad_norm: 0.999999570986069, iteration: 154681
loss: 1.0265766382217407,grad_norm: 0.9999990429521272, iteration: 154682
loss: 0.985198438167572,grad_norm: 0.9999990109127657, iteration: 154683
loss: 0.9956859946250916,grad_norm: 0.9601069522686775, iteration: 154684
loss: 1.0305088758468628,grad_norm: 0.9998321874272651, iteration: 154685
loss: 0.9961152076721191,grad_norm: 0.9618039728290785, iteration: 154686
loss: 1.0416299104690552,grad_norm: 0.9999993761380447, iteration: 154687
loss: 0.9855871796607971,grad_norm: 0.9999990921033028, iteration: 154688
loss: 0.9922066330909729,grad_norm: 0.9043322664395914, iteration: 154689
loss: 1.021041989326477,grad_norm: 0.9999989759239799, iteration: 154690
loss: 1.079026699066162,grad_norm: 0.9999996962335527, iteration: 154691
loss: 0.9944934248924255,grad_norm: 0.958440756708398, iteration: 154692
loss: 0.9890633225440979,grad_norm: 0.9999992328256472, iteration: 154693
loss: 1.0031205415725708,grad_norm: 0.8726121028103604, iteration: 154694
loss: 1.0028585195541382,grad_norm: 0.9999990515048611, iteration: 154695
loss: 0.9921727776527405,grad_norm: 0.9999990725149522, iteration: 154696
loss: 0.9907308220863342,grad_norm: 0.794449325543309, iteration: 154697
loss: 1.107487678527832,grad_norm: 0.9999991029753417, iteration: 154698
loss: 1.0238112211227417,grad_norm: 0.954701059023374, iteration: 154699
loss: 0.9822693467140198,grad_norm: 0.9271050864419815, iteration: 154700
loss: 1.018350601196289,grad_norm: 0.9999991232090825, iteration: 154701
loss: 0.9754104018211365,grad_norm: 0.9999991417530698, iteration: 154702
loss: 1.0311427116394043,grad_norm: 0.9999989645307512, iteration: 154703
loss: 1.0119820833206177,grad_norm: 0.9110821645951659, iteration: 154704
loss: 1.0503835678100586,grad_norm: 0.8928882776794453, iteration: 154705
loss: 1.066382884979248,grad_norm: 0.9999992282713207, iteration: 154706
loss: 0.9835577011108398,grad_norm: 0.9999991186118413, iteration: 154707
loss: 1.0458370447158813,grad_norm: 0.9999995508405038, iteration: 154708
loss: 1.0449315309524536,grad_norm: 0.999999366679049, iteration: 154709
loss: 0.970784604549408,grad_norm: 0.9999991099179586, iteration: 154710
loss: 1.019266128540039,grad_norm: 0.9387623957640611, iteration: 154711
loss: 0.9726438522338867,grad_norm: 0.9999995238228675, iteration: 154712
loss: 1.040330410003662,grad_norm: 0.9999998924466798, iteration: 154713
loss: 0.9830508828163147,grad_norm: 0.9308301505233616, iteration: 154714
loss: 0.972352147102356,grad_norm: 0.9456212978345853, iteration: 154715
loss: 0.9875309467315674,grad_norm: 0.999999285669671, iteration: 154716
loss: 1.0376310348510742,grad_norm: 0.8784703526383342, iteration: 154717
loss: 0.96675705909729,grad_norm: 0.9999993092427107, iteration: 154718
loss: 1.1519272327423096,grad_norm: 0.9999996758856368, iteration: 154719
loss: 1.0155082941055298,grad_norm: 0.9999995862874343, iteration: 154720
loss: 1.0067967176437378,grad_norm: 0.9999990986015093, iteration: 154721
loss: 0.9603872299194336,grad_norm: 0.9859987011593525, iteration: 154722
loss: 1.0228078365325928,grad_norm: 0.821105582169546, iteration: 154723
loss: 1.0214837789535522,grad_norm: 0.9999991183058223, iteration: 154724
loss: 0.9754503965377808,grad_norm: 0.9999990568605495, iteration: 154725
loss: 0.9867739081382751,grad_norm: 0.9999991063156513, iteration: 154726
loss: 1.0113376379013062,grad_norm: 0.9999993070304022, iteration: 154727
loss: 1.032745599746704,grad_norm: 0.9999991902235184, iteration: 154728
loss: 1.0418789386749268,grad_norm: 0.999999095590825, iteration: 154729
loss: 0.995918333530426,grad_norm: 0.9999990022760908, iteration: 154730
loss: 0.9682551622390747,grad_norm: 0.9999992100139541, iteration: 154731
loss: 1.0039606094360352,grad_norm: 0.9999989869672543, iteration: 154732
loss: 0.9883196949958801,grad_norm: 0.999999046104973, iteration: 154733
loss: 1.0063244104385376,grad_norm: 0.9504872042780493, iteration: 154734
loss: 0.9961275458335876,grad_norm: 0.9999997253389018, iteration: 154735
loss: 1.005293369293213,grad_norm: 0.867466581437103, iteration: 154736
loss: 0.9811228513717651,grad_norm: 0.9999989634138293, iteration: 154737
loss: 1.0533812046051025,grad_norm: 0.9402258642391412, iteration: 154738
loss: 1.0273469686508179,grad_norm: 0.9122840836739814, iteration: 154739
loss: 0.9911191463470459,grad_norm: 0.9999990640372513, iteration: 154740
loss: 0.9947199821472168,grad_norm: 0.9910893161579479, iteration: 154741
loss: 1.0102941989898682,grad_norm: 0.8710252462781856, iteration: 154742
loss: 1.0527263879776,grad_norm: 0.9999993134100185, iteration: 154743
loss: 0.9713364243507385,grad_norm: 0.9149560902131265, iteration: 154744
loss: 1.0163509845733643,grad_norm: 0.9999995139424672, iteration: 154745
loss: 1.0039355754852295,grad_norm: 0.9999989256556335, iteration: 154746
loss: 1.0581406354904175,grad_norm: 0.9999993044537127, iteration: 154747
loss: 1.0016345977783203,grad_norm: 0.9999991340331215, iteration: 154748
loss: 0.9851861000061035,grad_norm: 0.9547198486041178, iteration: 154749
loss: 0.978217363357544,grad_norm: 0.9766076101827925, iteration: 154750
loss: 0.9939479827880859,grad_norm: 0.9627841340585573, iteration: 154751
loss: 1.0023555755615234,grad_norm: 0.9882723247611301, iteration: 154752
loss: 0.9776986837387085,grad_norm: 0.9999993402118473, iteration: 154753
loss: 1.0773217678070068,grad_norm: 0.9999989807662488, iteration: 154754
loss: 1.1127550601959229,grad_norm: 0.9999989840948527, iteration: 154755
loss: 1.026015281677246,grad_norm: 0.9999990440310363, iteration: 154756
loss: 0.9704474806785583,grad_norm: 0.9119619660882825, iteration: 154757
loss: 1.0272642374038696,grad_norm: 0.9603093439864184, iteration: 154758
loss: 0.9924546480178833,grad_norm: 0.9999991694139538, iteration: 154759
loss: 1.0074090957641602,grad_norm: 0.9999992980231828, iteration: 154760
loss: 0.9898300766944885,grad_norm: 0.9999992253382197, iteration: 154761
loss: 1.0360928773880005,grad_norm: 0.8835756903918366, iteration: 154762
loss: 0.9939254522323608,grad_norm: 0.9999992185110368, iteration: 154763
loss: 1.005409598350525,grad_norm: 0.9999992887621869, iteration: 154764
loss: 1.0053420066833496,grad_norm: 0.8942590012564766, iteration: 154765
loss: 0.9934836626052856,grad_norm: 0.9852236621062131, iteration: 154766
loss: 0.9906648397445679,grad_norm: 0.9999992373122042, iteration: 154767
loss: 1.0619772672653198,grad_norm: 0.99999990697441, iteration: 154768
loss: 1.030696153640747,grad_norm: 0.9222270218197243, iteration: 154769
loss: 1.0370832681655884,grad_norm: 0.9999994717618731, iteration: 154770
loss: 0.9994034171104431,grad_norm: 0.8549364922606963, iteration: 154771
loss: 1.0073449611663818,grad_norm: 0.9917108863697637, iteration: 154772
loss: 0.988010048866272,grad_norm: 0.8819363628238369, iteration: 154773
loss: 0.9889059066772461,grad_norm: 0.9999990046123426, iteration: 154774
loss: 1.0082733631134033,grad_norm: 0.9999994021331773, iteration: 154775
loss: 1.015879511833191,grad_norm: 0.999999083637557, iteration: 154776
loss: 1.07381010055542,grad_norm: 0.9999997681607696, iteration: 154777
loss: 1.0234131813049316,grad_norm: 0.9999992148458092, iteration: 154778
loss: 0.9944855570793152,grad_norm: 0.999999098068679, iteration: 154779
loss: 1.0200759172439575,grad_norm: 0.993584181722675, iteration: 154780
loss: 1.0205142498016357,grad_norm: 0.9999992418988395, iteration: 154781
loss: 0.9950519800186157,grad_norm: 0.8597982066388965, iteration: 154782
loss: 0.9614189267158508,grad_norm: 0.996026357233915, iteration: 154783
loss: 1.0495651960372925,grad_norm: 0.9999996054669921, iteration: 154784
loss: 1.0327962636947632,grad_norm: 0.9999991102922661, iteration: 154785
loss: 1.028297781944275,grad_norm: 0.999999286718758, iteration: 154786
loss: 0.993149995803833,grad_norm: 0.8577552404351463, iteration: 154787
loss: 0.9894017577171326,grad_norm: 0.9999992509796748, iteration: 154788
loss: 0.9854969382286072,grad_norm: 0.9999996210951921, iteration: 154789
loss: 0.9695671796798706,grad_norm: 0.9999991150918247, iteration: 154790
loss: 0.9949849843978882,grad_norm: 0.9351540921683322, iteration: 154791
loss: 1.0104751586914062,grad_norm: 0.9999991463746012, iteration: 154792
loss: 1.0271703004837036,grad_norm: 0.9999993364420975, iteration: 154793
loss: 0.9894524812698364,grad_norm: 0.9999998692689939, iteration: 154794
loss: 1.0305697917938232,grad_norm: 0.999999223882805, iteration: 154795
loss: 0.9715718030929565,grad_norm: 0.9999995675822421, iteration: 154796
loss: 1.0345205068588257,grad_norm: 0.9274161946484825, iteration: 154797
loss: 1.1866408586502075,grad_norm: 0.9999994282899979, iteration: 154798
loss: 0.9988884925842285,grad_norm: 0.9999992154382582, iteration: 154799
loss: 0.9841097593307495,grad_norm: 0.9999989493338245, iteration: 154800
loss: 0.999487042427063,grad_norm: 0.8994797987115292, iteration: 154801
loss: 1.019268274307251,grad_norm: 0.9999993136478699, iteration: 154802
loss: 1.0858467817306519,grad_norm: 0.9858645056232668, iteration: 154803
loss: 0.961742639541626,grad_norm: 0.8497310060121026, iteration: 154804
loss: 0.9779437184333801,grad_norm: 0.9999990356015198, iteration: 154805
loss: 0.9966073036193848,grad_norm: 0.9999991337418509, iteration: 154806
loss: 0.9736649990081787,grad_norm: 0.9999991580912543, iteration: 154807
loss: 1.0409222841262817,grad_norm: 0.9999992490281987, iteration: 154808
loss: 1.0134849548339844,grad_norm: 0.9999990717353247, iteration: 154809
loss: 1.014106035232544,grad_norm: 0.8948282243166463, iteration: 154810
loss: 0.9713245034217834,grad_norm: 0.9999991865915028, iteration: 154811
loss: 0.9980974197387695,grad_norm: 0.9999991457614882, iteration: 154812
loss: 0.9932569861412048,grad_norm: 0.9999990902242661, iteration: 154813
loss: 0.9665727019309998,grad_norm: 0.999999227742423, iteration: 154814
loss: 0.9808797240257263,grad_norm: 0.8945241070130621, iteration: 154815
loss: 1.0036925077438354,grad_norm: 0.9999991731572669, iteration: 154816
loss: 1.0087015628814697,grad_norm: 0.9999993622201477, iteration: 154817
loss: 0.9849532842636108,grad_norm: 0.9916742344833289, iteration: 154818
loss: 1.002322793006897,grad_norm: 0.9055500446247379, iteration: 154819
loss: 1.0602484941482544,grad_norm: 0.9999995824409987, iteration: 154820
loss: 1.0384204387664795,grad_norm: 0.9999995512817834, iteration: 154821
loss: 1.102547526359558,grad_norm: 0.9999989988812531, iteration: 154822
loss: 0.9882038831710815,grad_norm: 0.9706659738657939, iteration: 154823
loss: 1.0037106275558472,grad_norm: 0.865846083707953, iteration: 154824
loss: 0.9808395504951477,grad_norm: 0.9319716277898907, iteration: 154825
loss: 0.9757615327835083,grad_norm: 0.9105647960079026, iteration: 154826
loss: 1.0630004405975342,grad_norm: 0.9999995129896555, iteration: 154827
loss: 0.9824844598770142,grad_norm: 0.9999991808724734, iteration: 154828
loss: 0.9999402761459351,grad_norm: 0.9999990854475598, iteration: 154829
loss: 1.0157243013381958,grad_norm: 0.9999992513543476, iteration: 154830
loss: 0.9939323663711548,grad_norm: 0.9179329398440321, iteration: 154831
loss: 1.0249508619308472,grad_norm: 0.9999991424160964, iteration: 154832
loss: 0.9910169839859009,grad_norm: 0.9999990803966237, iteration: 154833
loss: 1.0055131912231445,grad_norm: 0.9336634335182776, iteration: 154834
loss: 1.1344401836395264,grad_norm: 0.9999996032376397, iteration: 154835
loss: 0.9556182622909546,grad_norm: 0.9999992020901184, iteration: 154836
loss: 1.0456700325012207,grad_norm: 0.9999995941949277, iteration: 154837
loss: 0.9783643484115601,grad_norm: 0.929653795077714, iteration: 154838
loss: 1.0068731307983398,grad_norm: 0.9999990610153865, iteration: 154839
loss: 1.117577075958252,grad_norm: 0.9999992073395622, iteration: 154840
loss: 0.9957191944122314,grad_norm: 0.9999991580157723, iteration: 154841
loss: 1.0660874843597412,grad_norm: 0.9999994068052035, iteration: 154842
loss: 1.010514736175537,grad_norm: 0.9846143745950556, iteration: 154843
loss: 0.9991475939750671,grad_norm: 0.890130588682604, iteration: 154844
loss: 1.0164974927902222,grad_norm: 0.9471881116781462, iteration: 154845
loss: 0.9303669929504395,grad_norm: 0.9855538351653511, iteration: 154846
loss: 0.977340817451477,grad_norm: 0.9999991615007329, iteration: 154847
loss: 0.9961392879486084,grad_norm: 0.8948890096691778, iteration: 154848
loss: 0.9747936129570007,grad_norm: 0.9999989988490374, iteration: 154849
loss: 0.9923873543739319,grad_norm: 0.9999989861213444, iteration: 154850
loss: 1.0033316612243652,grad_norm: 0.9999990413544426, iteration: 154851
loss: 1.024924874305725,grad_norm: 0.9999991003599148, iteration: 154852
loss: 1.0350475311279297,grad_norm: 0.999998984777389, iteration: 154853
loss: 1.028687596321106,grad_norm: 0.9999990643994494, iteration: 154854
loss: 1.003726840019226,grad_norm: 0.8956706730213702, iteration: 154855
loss: 0.9720685482025146,grad_norm: 0.8467769869023913, iteration: 154856
loss: 0.9879335165023804,grad_norm: 0.9999991254671353, iteration: 154857
loss: 1.0163373947143555,grad_norm: 0.9999991883343553, iteration: 154858
loss: 1.0146058797836304,grad_norm: 0.9890751597951813, iteration: 154859
loss: 0.9882001280784607,grad_norm: 0.999999042477362, iteration: 154860
loss: 1.1177712678909302,grad_norm: 0.9909027936516743, iteration: 154861
loss: 0.9885640740394592,grad_norm: 0.885797033897021, iteration: 154862
loss: 1.0087816715240479,grad_norm: 0.999999075547773, iteration: 154863
loss: 1.0058881044387817,grad_norm: 0.9999991871081728, iteration: 154864
loss: 1.039886236190796,grad_norm: 0.9999991590015234, iteration: 154865
loss: 1.0512062311172485,grad_norm: 0.999999874225455, iteration: 154866
loss: 0.9918341040611267,grad_norm: 0.9234674167677093, iteration: 154867
loss: 0.9947172999382019,grad_norm: 0.9184844253104385, iteration: 154868
loss: 1.0020495653152466,grad_norm: 0.9518569229837845, iteration: 154869
loss: 0.9793239235877991,grad_norm: 0.8815737297222952, iteration: 154870
loss: 1.0366671085357666,grad_norm: 0.9999990430362252, iteration: 154871
loss: 0.9824727773666382,grad_norm: 0.9931353019354157, iteration: 154872
loss: 1.0067440271377563,grad_norm: 0.960253588017464, iteration: 154873
loss: 1.0257989168167114,grad_norm: 0.999999383690901, iteration: 154874
loss: 1.091442584991455,grad_norm: 0.9999994835019816, iteration: 154875
loss: 1.0150641202926636,grad_norm: 0.9999990677131312, iteration: 154876
loss: 1.0124022960662842,grad_norm: 0.9965199159431617, iteration: 154877
loss: 1.0356321334838867,grad_norm: 0.9999999332869037, iteration: 154878
loss: 1.1457414627075195,grad_norm: 0.9999992569233663, iteration: 154879
loss: 1.0951207876205444,grad_norm: 0.9442208347615593, iteration: 154880
loss: 1.166925311088562,grad_norm: 0.9999991875679765, iteration: 154881
loss: 1.0283927917480469,grad_norm: 0.9999994070811886, iteration: 154882
loss: 1.043879508972168,grad_norm: 0.9999994800086921, iteration: 154883
loss: 1.0008269548416138,grad_norm: 0.9999994465998197, iteration: 154884
loss: 1.0299137830734253,grad_norm: 0.8758223533752187, iteration: 154885
loss: 0.9980159997940063,grad_norm: 0.9999993178733263, iteration: 154886
loss: 1.0200756788253784,grad_norm: 0.9999989189393166, iteration: 154887
loss: 1.0036128759384155,grad_norm: 0.9999992204843599, iteration: 154888
loss: 1.2439550161361694,grad_norm: 0.9999998392019408, iteration: 154889
loss: 1.003678798675537,grad_norm: 0.9999991501030977, iteration: 154890
loss: 0.9839369654655457,grad_norm: 0.8831423463687356, iteration: 154891
loss: 1.0873080492019653,grad_norm: 0.999999326542084, iteration: 154892
loss: 1.1409071683883667,grad_norm: 0.9999992482859417, iteration: 154893
loss: 0.9994148015975952,grad_norm: 0.9999990707350693, iteration: 154894
loss: 0.9720268249511719,grad_norm: 0.9999995014045809, iteration: 154895
loss: 1.0011816024780273,grad_norm: 0.9720022420214661, iteration: 154896
loss: 1.0009292364120483,grad_norm: 0.9999995879723943, iteration: 154897
loss: 1.0436747074127197,grad_norm: 0.9999990197994507, iteration: 154898
loss: 1.0016770362854004,grad_norm: 0.9999993599439153, iteration: 154899
loss: 0.9665333032608032,grad_norm: 0.9999997772208422, iteration: 154900
loss: 1.0902947187423706,grad_norm: 0.99999942092369, iteration: 154901
loss: 1.0068045854568481,grad_norm: 0.9999991856560394, iteration: 154902
loss: 1.0313211679458618,grad_norm: 0.9999991433493834, iteration: 154903
loss: 1.0086357593536377,grad_norm: 0.9999991536018982, iteration: 154904
loss: 1.247570514678955,grad_norm: 0.9999996699785766, iteration: 154905
loss: 0.9873897433280945,grad_norm: 0.975363746517142, iteration: 154906
loss: 1.023154854774475,grad_norm: 0.8646308199695779, iteration: 154907
loss: 1.2313262224197388,grad_norm: 0.9999994622318694, iteration: 154908
loss: 1.018035650253296,grad_norm: 0.9999991471763584, iteration: 154909
loss: 1.0039985179901123,grad_norm: 0.9357910026359048, iteration: 154910
loss: 1.000692367553711,grad_norm: 0.9715288854107833, iteration: 154911
loss: 1.157468318939209,grad_norm: 0.999999396935022, iteration: 154912
loss: 1.0138418674468994,grad_norm: 0.9632339280601436, iteration: 154913
loss: 1.0233347415924072,grad_norm: 0.999999253306764, iteration: 154914
loss: 1.005576252937317,grad_norm: 0.9999993253147168, iteration: 154915
loss: 1.046619176864624,grad_norm: 0.9999998747878163, iteration: 154916
loss: 1.0042500495910645,grad_norm: 0.9999989585004229, iteration: 154917
loss: 1.1040809154510498,grad_norm: 0.9999991859716084, iteration: 154918
loss: 1.025931477546692,grad_norm: 0.9999991122690777, iteration: 154919
loss: 1.0197621583938599,grad_norm: 0.9984628720400314, iteration: 154920
loss: 0.9649626612663269,grad_norm: 0.9999990813811767, iteration: 154921
loss: 1.1284418106079102,grad_norm: 0.9999991907707056, iteration: 154922
loss: 1.0030840635299683,grad_norm: 0.9999991536884018, iteration: 154923
loss: 0.9866281151771545,grad_norm: 0.9999989929729542, iteration: 154924
loss: 0.9877907633781433,grad_norm: 0.9999990816123359, iteration: 154925
loss: 1.0215486288070679,grad_norm: 0.9999992710475549, iteration: 154926
loss: 0.9739631414413452,grad_norm: 0.9999991920372776, iteration: 154927
loss: 1.0610114336013794,grad_norm: 0.9999994648897109, iteration: 154928
loss: 1.1606061458587646,grad_norm: 0.9999993952848774, iteration: 154929
loss: 1.3168659210205078,grad_norm: 0.9999995587864192, iteration: 154930
loss: 1.0622847080230713,grad_norm: 0.999999734738513, iteration: 154931
loss: 1.032438039779663,grad_norm: 0.9999991089191036, iteration: 154932
loss: 0.9611362218856812,grad_norm: 0.9999991244465087, iteration: 154933
loss: 1.0314710140228271,grad_norm: 0.9720401368229951, iteration: 154934
loss: 1.0388779640197754,grad_norm: 0.9999996597715078, iteration: 154935
loss: 1.0665029287338257,grad_norm: 0.9999990989741587, iteration: 154936
loss: 0.9834109544754028,grad_norm: 0.9999988928075622, iteration: 154937
loss: 1.0156595706939697,grad_norm: 0.9473670408002803, iteration: 154938
loss: 0.9937334060668945,grad_norm: 0.9999991022601064, iteration: 154939
loss: 1.0086902379989624,grad_norm: 0.9999989954043642, iteration: 154940
loss: 1.00253427028656,grad_norm: 0.99999926332657, iteration: 154941
loss: 1.3818978071212769,grad_norm: 0.9999996164521227, iteration: 154942
loss: 0.9930611252784729,grad_norm: 0.9364564337719279, iteration: 154943
loss: 1.1173604726791382,grad_norm: 0.9999994290742865, iteration: 154944
loss: 1.0193997621536255,grad_norm: 0.9999992641795544, iteration: 154945
loss: 1.1068776845932007,grad_norm: 0.9999995342523023, iteration: 154946
loss: 0.9591067433357239,grad_norm: 0.7905422575428066, iteration: 154947
loss: 1.026658058166504,grad_norm: 0.9999990306019848, iteration: 154948
loss: 1.082735300064087,grad_norm: 0.9999992067551929, iteration: 154949
loss: 1.0093495845794678,grad_norm: 0.9999991627091829, iteration: 154950
loss: 1.0288453102111816,grad_norm: 0.9999996654445995, iteration: 154951
loss: 1.0619783401489258,grad_norm: 0.9999992930717704, iteration: 154952
loss: 1.0316455364227295,grad_norm: 0.9999991578100543, iteration: 154953
loss: 0.9593269228935242,grad_norm: 0.999999103922318, iteration: 154954
loss: 0.9871909022331238,grad_norm: 0.9999992610544565, iteration: 154955
loss: 1.0559006929397583,grad_norm: 0.9999996783369192, iteration: 154956
loss: 1.00700044631958,grad_norm: 0.9999996155870339, iteration: 154957
loss: 1.005691647529602,grad_norm: 0.9999992630798977, iteration: 154958
loss: 1.0900520086288452,grad_norm: 0.9999991927198542, iteration: 154959
loss: 1.007676362991333,grad_norm: 0.9999992052118832, iteration: 154960
loss: 1.099495768547058,grad_norm: 0.9999997991729027, iteration: 154961
loss: 1.0095789432525635,grad_norm: 0.9999991682910324, iteration: 154962
loss: 1.363145112991333,grad_norm: 0.9999998753759749, iteration: 154963
loss: 1.4562480449676514,grad_norm: 0.9999997277242444, iteration: 154964
loss: 1.6342912912368774,grad_norm: 0.9999998642534192, iteration: 154965
loss: 2.0479795932769775,grad_norm: 0.9999998684343939, iteration: 154966
loss: 1.313085913658142,grad_norm: 0.9999997520751, iteration: 154967
loss: 1.218922734260559,grad_norm: 0.9999997069660121, iteration: 154968
loss: 1.116912603378296,grad_norm: 0.9999998985050956, iteration: 154969
loss: 1.1778393983840942,grad_norm: 0.9999998145263193, iteration: 154970
loss: 1.2199047803878784,grad_norm: 0.9999996802078948, iteration: 154971
loss: 1.2104853391647339,grad_norm: 0.9999992611696156, iteration: 154972
loss: 1.0518155097961426,grad_norm: 0.9999993922844448, iteration: 154973
loss: 0.9715883135795593,grad_norm: 0.999999237906703, iteration: 154974
loss: 1.2143402099609375,grad_norm: 0.9999999744885804, iteration: 154975
loss: 1.018473744392395,grad_norm: 1.0000000012221941, iteration: 154976
loss: 1.0353072881698608,grad_norm: 0.9999992239801002, iteration: 154977
loss: 1.0437605381011963,grad_norm: 0.9999992056494775, iteration: 154978
loss: 1.0615198612213135,grad_norm: 0.9999996676570366, iteration: 154979
loss: 1.0019499063491821,grad_norm: 0.9712495170468692, iteration: 154980
loss: 1.0427229404449463,grad_norm: 0.9999991948153224, iteration: 154981
loss: 0.9699630737304688,grad_norm: 0.9999991203837075, iteration: 154982
loss: 0.9902349710464478,grad_norm: 0.999999562538603, iteration: 154983
loss: 1.0330119132995605,grad_norm: 0.9558780729417714, iteration: 154984
loss: 1.0012105703353882,grad_norm: 0.9999991195871983, iteration: 154985
loss: 0.9712293148040771,grad_norm: 0.9999993167225091, iteration: 154986
loss: 1.0049046277999878,grad_norm: 0.9931645853573414, iteration: 154987
loss: 0.9881370067596436,grad_norm: 0.9583981942318566, iteration: 154988
loss: 1.054847240447998,grad_norm: 0.9999993828630414, iteration: 154989
loss: 0.964644193649292,grad_norm: 0.9999990872503693, iteration: 154990
loss: 1.0756992101669312,grad_norm: 0.9999993707582058, iteration: 154991
loss: 0.998772919178009,grad_norm: 0.987646028757351, iteration: 154992
loss: 1.104536533355713,grad_norm: 0.9999994114392735, iteration: 154993
loss: 0.9728841185569763,grad_norm: 0.9999990970811126, iteration: 154994
loss: 0.9814130663871765,grad_norm: 0.9999990947093811, iteration: 154995
loss: 1.0048903226852417,grad_norm: 0.9999994665947585, iteration: 154996
loss: 0.9619393348693848,grad_norm: 0.9487845073986954, iteration: 154997
loss: 1.0160678625106812,grad_norm: 0.9999993480408631, iteration: 154998
loss: 1.084585428237915,grad_norm: 0.9999998553132835, iteration: 154999
loss: 0.9802058339118958,grad_norm: 0.9210522699366155, iteration: 155000
loss: 1.0103468894958496,grad_norm: 0.9999995648820643, iteration: 155001
loss: 1.0438734292984009,grad_norm: 0.9999990095416103, iteration: 155002
loss: 1.0503119230270386,grad_norm: 0.9886599152324087, iteration: 155003
loss: 1.0047396421432495,grad_norm: 0.9999990958562451, iteration: 155004
loss: 0.9777325391769409,grad_norm: 0.9999991637912012, iteration: 155005
loss: 1.0123094320297241,grad_norm: 0.9460201049181778, iteration: 155006
loss: 1.042690396308899,grad_norm: 0.9999992050518497, iteration: 155007
loss: 0.9994217753410339,grad_norm: 0.9539999539396729, iteration: 155008
loss: 0.9959728717803955,grad_norm: 0.9999990738212352, iteration: 155009
loss: 1.1313352584838867,grad_norm: 0.9999993532971335, iteration: 155010
loss: 1.0125209093093872,grad_norm: 0.9999991060520325, iteration: 155011
loss: 1.0113260746002197,grad_norm: 0.9226086340102734, iteration: 155012
loss: 0.9780614376068115,grad_norm: 0.9999990884955681, iteration: 155013
loss: 0.9278400540351868,grad_norm: 0.999999081345077, iteration: 155014
loss: 1.0420637130737305,grad_norm: 0.9999992606201252, iteration: 155015
loss: 1.0854474306106567,grad_norm: 0.9231980706014693, iteration: 155016
loss: 1.0924538373947144,grad_norm: 0.9999996136271377, iteration: 155017
loss: 0.9694860577583313,grad_norm: 0.997663058867618, iteration: 155018
loss: 1.0305031538009644,grad_norm: 0.9424254125465698, iteration: 155019
loss: 0.988224446773529,grad_norm: 0.9106739402518044, iteration: 155020
loss: 1.0126599073410034,grad_norm: 0.9033547689014882, iteration: 155021
loss: 0.9906439185142517,grad_norm: 0.9999993255942924, iteration: 155022
loss: 1.0168004035949707,grad_norm: 0.8965061667622136, iteration: 155023
loss: 1.0129259824752808,grad_norm: 0.836887457382543, iteration: 155024
loss: 1.0029335021972656,grad_norm: 0.9906099699185748, iteration: 155025
loss: 0.9785528182983398,grad_norm: 0.8718397039130444, iteration: 155026
loss: 1.0009232759475708,grad_norm: 0.9999995763241128, iteration: 155027
loss: 0.9775024652481079,grad_norm: 0.9471785009216074, iteration: 155028
loss: 1.0196863412857056,grad_norm: 0.9999993383220814, iteration: 155029
loss: 1.0344295501708984,grad_norm: 0.9999992063464275, iteration: 155030
loss: 1.0100237131118774,grad_norm: 0.9199572158874938, iteration: 155031
loss: 1.0303704738616943,grad_norm: 0.9510420265222345, iteration: 155032
loss: 1.0447195768356323,grad_norm: 0.999998979816114, iteration: 155033
loss: 0.9611984491348267,grad_norm: 0.9209081195566488, iteration: 155034
loss: 0.9668428301811218,grad_norm: 0.9999995283650386, iteration: 155035
loss: 0.996848464012146,grad_norm: 0.9440359052462979, iteration: 155036
loss: 0.9761004447937012,grad_norm: 0.9196683620846522, iteration: 155037
loss: 0.9746012687683105,grad_norm: 0.9325295386676556, iteration: 155038
loss: 0.9893879890441895,grad_norm: 0.93519226397301, iteration: 155039
loss: 0.96949303150177,grad_norm: 0.9569781173075113, iteration: 155040
loss: 0.9848373532295227,grad_norm: 0.9999990921520631, iteration: 155041
loss: 0.9620219469070435,grad_norm: 0.8767953117978926, iteration: 155042
loss: 0.9640696048736572,grad_norm: 0.8741632910106131, iteration: 155043
loss: 0.9985631704330444,grad_norm: 0.9999991779817372, iteration: 155044
loss: 0.9502860903739929,grad_norm: 0.9768266619708872, iteration: 155045
loss: 1.0176587104797363,grad_norm: 0.9112712088385132, iteration: 155046
loss: 0.9877338409423828,grad_norm: 0.9999989780365087, iteration: 155047
loss: 0.947909414768219,grad_norm: 0.9999997977170676, iteration: 155048
loss: 0.9849210977554321,grad_norm: 0.9388780690022911, iteration: 155049
loss: 0.9598235487937927,grad_norm: 0.9660510333190404, iteration: 155050
loss: 0.9543880224227905,grad_norm: 0.9999989967142519, iteration: 155051
loss: 0.9727675318717957,grad_norm: 0.9999992759772185, iteration: 155052
loss: 0.9771028757095337,grad_norm: 0.9999992329408094, iteration: 155053
loss: 1.0430004596710205,grad_norm: 0.999999015299007, iteration: 155054
loss: 0.9547857642173767,grad_norm: 0.9924642752011572, iteration: 155055
loss: 1.0249541997909546,grad_norm: 0.9999991791987479, iteration: 155056
loss: 1.0001611709594727,grad_norm: 0.9999993056720837, iteration: 155057
loss: 1.017866611480713,grad_norm: 0.8734191655776428, iteration: 155058
loss: 0.9768728613853455,grad_norm: 0.8986748936296197, iteration: 155059
loss: 1.0194300413131714,grad_norm: 0.9999995009084595, iteration: 155060
loss: 0.9851610064506531,grad_norm: 0.994921282175403, iteration: 155061
loss: 0.9745124578475952,grad_norm: 0.9046258356673125, iteration: 155062
loss: 1.05327308177948,grad_norm: 0.999999109744788, iteration: 155063
loss: 0.9923069477081299,grad_norm: 0.936972886791771, iteration: 155064
loss: 1.0119810104370117,grad_norm: 0.9999992274175363, iteration: 155065
loss: 0.9809183478355408,grad_norm: 0.8796303541626226, iteration: 155066
loss: 0.9661963582038879,grad_norm: 0.9384207781354945, iteration: 155067
loss: 0.9973136782646179,grad_norm: 0.9999996225841106, iteration: 155068
loss: 1.0129343271255493,grad_norm: 0.9999990891510704, iteration: 155069
loss: 0.9837294816970825,grad_norm: 0.964301811585718, iteration: 155070
loss: 0.9796348214149475,grad_norm: 0.999999134938201, iteration: 155071
loss: 0.9917176365852356,grad_norm: 0.9044333004168964, iteration: 155072
loss: 0.9829672574996948,grad_norm: 0.9976667283856846, iteration: 155073
loss: 1.016546368598938,grad_norm: 0.9999992753421312, iteration: 155074
loss: 1.0008021593093872,grad_norm: 0.9999991173215939, iteration: 155075
loss: 0.9749096035957336,grad_norm: 0.8589244632755326, iteration: 155076
loss: 1.0259759426116943,grad_norm: 0.862599341905344, iteration: 155077
loss: 0.9660114645957947,grad_norm: 0.9999991982743175, iteration: 155078
loss: 0.9902317523956299,grad_norm: 0.9999990013431488, iteration: 155079
loss: 0.9876548051834106,grad_norm: 0.999998987763498, iteration: 155080
loss: 1.0236214399337769,grad_norm: 0.9999989957927506, iteration: 155081
loss: 1.0073931217193604,grad_norm: 0.9999992042319326, iteration: 155082
loss: 1.0069327354431152,grad_norm: 0.9755678851991969, iteration: 155083
loss: 1.102197289466858,grad_norm: 0.9999997744122523, iteration: 155084
loss: 1.0221304893493652,grad_norm: 0.9999991341841833, iteration: 155085
loss: 0.9711310267448425,grad_norm: 0.9999992761519227, iteration: 155086
loss: 0.9596062898635864,grad_norm: 0.9549360686619934, iteration: 155087
loss: 1.003380537033081,grad_norm: 0.8639320643895843, iteration: 155088
loss: 1.0794024467468262,grad_norm: 0.9999994645061956, iteration: 155089
loss: 1.0350555181503296,grad_norm: 0.9999992787648515, iteration: 155090
loss: 0.9828706383705139,grad_norm: 0.9999990328821564, iteration: 155091
loss: 0.9657711982727051,grad_norm: 0.9999991034710256, iteration: 155092
loss: 0.9791499376296997,grad_norm: 0.9999991519349847, iteration: 155093
loss: 0.9477754235267639,grad_norm: 0.9916850508873281, iteration: 155094
loss: 0.9904388785362244,grad_norm: 0.9999991490413856, iteration: 155095
loss: 0.9922609329223633,grad_norm: 0.9999990000268199, iteration: 155096
loss: 0.9807171821594238,grad_norm: 0.9999991219117821, iteration: 155097
loss: 1.093088150024414,grad_norm: 0.9999991174611114, iteration: 155098
loss: 0.973839521408081,grad_norm: 0.9686207343881751, iteration: 155099
loss: 1.015535831451416,grad_norm: 0.854573734039452, iteration: 155100
loss: 0.9917398691177368,grad_norm: 0.946705108740159, iteration: 155101
loss: 1.009955644607544,grad_norm: 0.967365569419261, iteration: 155102
loss: 1.028601050376892,grad_norm: 0.9999991215341358, iteration: 155103
loss: 0.9951239228248596,grad_norm: 0.9886107650029357, iteration: 155104
loss: 0.9845888018608093,grad_norm: 0.9999998730796358, iteration: 155105
loss: 1.0146253108978271,grad_norm: 0.9999991337997609, iteration: 155106
loss: 0.9916248321533203,grad_norm: 0.8659716271479354, iteration: 155107
loss: 1.0062397718429565,grad_norm: 0.9999990597265952, iteration: 155108
loss: 1.0115175247192383,grad_norm: 0.8829988427274842, iteration: 155109
loss: 0.9936155676841736,grad_norm: 0.9999991311742027, iteration: 155110
loss: 0.9687478542327881,grad_norm: 0.9999991275689577, iteration: 155111
loss: 1.0393015146255493,grad_norm: 0.999999122071813, iteration: 155112
loss: 0.9414908289909363,grad_norm: 0.9999992198802612, iteration: 155113
loss: 0.9785217046737671,grad_norm: 0.9999989378796129, iteration: 155114
loss: 0.980884313583374,grad_norm: 0.9999992916766856, iteration: 155115
loss: 1.0174214839935303,grad_norm: 0.9999990651520602, iteration: 155116
loss: 0.9526339173316956,grad_norm: 0.8569705038077621, iteration: 155117
loss: 0.9840328097343445,grad_norm: 0.8986517482812758, iteration: 155118
loss: 1.0211783647537231,grad_norm: 0.960201089611064, iteration: 155119
loss: 1.053417444229126,grad_norm: 0.9999990267558242, iteration: 155120
loss: 1.0084583759307861,grad_norm: 0.9999999977558305, iteration: 155121
loss: 1.0617607831954956,grad_norm: 0.9999991899973232, iteration: 155122
loss: 0.9925379157066345,grad_norm: 0.9519724014675336, iteration: 155123
loss: 1.0461379289627075,grad_norm: 0.9999991736496475, iteration: 155124
loss: 1.271254062652588,grad_norm: 0.9999998805395739, iteration: 155125
loss: 1.0442672967910767,grad_norm: 0.9999998282199017, iteration: 155126
loss: 0.9912013411521912,grad_norm: 0.9714667975656958, iteration: 155127
loss: 1.0019837617874146,grad_norm: 0.9999993092237837, iteration: 155128
loss: 1.0143017768859863,grad_norm: 0.8594114455727195, iteration: 155129
loss: 1.0582963228225708,grad_norm: 0.9999992641522278, iteration: 155130
loss: 1.0178509950637817,grad_norm: 0.9999991421922556, iteration: 155131
loss: 0.9720958471298218,grad_norm: 0.9999991191794727, iteration: 155132
loss: 1.0326011180877686,grad_norm: 0.9240548518434085, iteration: 155133
loss: 1.0163439512252808,grad_norm: 0.8600850268614453, iteration: 155134
loss: 1.0116019248962402,grad_norm: 0.9999991245183817, iteration: 155135
loss: 0.9792540669441223,grad_norm: 0.8574117565314456, iteration: 155136
loss: 1.157478928565979,grad_norm: 0.9999993720584621, iteration: 155137
loss: 1.030197262763977,grad_norm: 0.9009992262531387, iteration: 155138
loss: 0.9558690786361694,grad_norm: 0.9999992127536101, iteration: 155139
loss: 0.9778921604156494,grad_norm: 0.9999992420776869, iteration: 155140
loss: 1.0237234830856323,grad_norm: 0.9999994159425832, iteration: 155141
loss: 1.0011014938354492,grad_norm: 0.9999999479522066, iteration: 155142
loss: 0.9816540479660034,grad_norm: 0.8932283651939675, iteration: 155143
loss: 0.9998424053192139,grad_norm: 0.9626217878581627, iteration: 155144
loss: 1.0126521587371826,grad_norm: 0.9999991292085505, iteration: 155145
loss: 0.9655901789665222,grad_norm: 0.99999901645839, iteration: 155146
loss: 1.0449060201644897,grad_norm: 0.9999993220804707, iteration: 155147
loss: 0.9850980043411255,grad_norm: 0.8626585074563362, iteration: 155148
loss: 1.0105777978897095,grad_norm: 0.9999991339209394, iteration: 155149
loss: 0.9843411445617676,grad_norm: 0.9999991460613369, iteration: 155150
loss: 0.9979109168052673,grad_norm: 0.981836069385466, iteration: 155151
loss: 0.971846878528595,grad_norm: 0.9868503373256557, iteration: 155152
loss: 1.020605206489563,grad_norm: 0.9999992222200959, iteration: 155153
loss: 1.1576074361801147,grad_norm: 0.9999990371598695, iteration: 155154
loss: 1.0078022480010986,grad_norm: 0.9999991691238195, iteration: 155155
loss: 1.0456360578536987,grad_norm: 0.999999079908447, iteration: 155156
loss: 1.007962942123413,grad_norm: 0.9383287492309541, iteration: 155157
loss: 1.0244938135147095,grad_norm: 0.9999991663364088, iteration: 155158
loss: 0.9561615586280823,grad_norm: 0.93614354643396, iteration: 155159
loss: 0.9900087118148804,grad_norm: 0.9999990530199725, iteration: 155160
loss: 1.000290870666504,grad_norm: 0.9999990889612718, iteration: 155161
loss: 1.0227813720703125,grad_norm: 0.8367841278708595, iteration: 155162
loss: 1.0417990684509277,grad_norm: 0.9999999399174522, iteration: 155163
loss: 1.0074115991592407,grad_norm: 0.9450599220595727, iteration: 155164
loss: 0.9932693243026733,grad_norm: 0.9551118873185774, iteration: 155165
loss: 0.9817320108413696,grad_norm: 0.9999990385965886, iteration: 155166
loss: 1.008368730545044,grad_norm: 0.9420472679086905, iteration: 155167
loss: 1.0034428834915161,grad_norm: 0.9857338312109907, iteration: 155168
loss: 0.9790348410606384,grad_norm: 0.9999997996412073, iteration: 155169
loss: 1.0722973346710205,grad_norm: 0.9999996459321259, iteration: 155170
loss: 0.9934727549552917,grad_norm: 0.9540717422207444, iteration: 155171
loss: 0.9966320395469666,grad_norm: 0.9999989904108313, iteration: 155172
loss: 1.043120265007019,grad_norm: 0.9360866662963848, iteration: 155173
loss: 1.0076714754104614,grad_norm: 0.8895228455550465, iteration: 155174
loss: 0.9924177527427673,grad_norm: 0.99999944598423, iteration: 155175
loss: 1.0335466861724854,grad_norm: 0.9999994935076122, iteration: 155176
loss: 1.0239324569702148,grad_norm: 0.981568138339412, iteration: 155177
loss: 1.0498896837234497,grad_norm: 0.9999993834346923, iteration: 155178
loss: 1.0132914781570435,grad_norm: 0.9999990655564142, iteration: 155179
loss: 1.0074501037597656,grad_norm: 0.9704056550235299, iteration: 155180
loss: 0.9794888496398926,grad_norm: 0.9999991484062505, iteration: 155181
loss: 0.9755238890647888,grad_norm: 0.8299385951459326, iteration: 155182
loss: 0.9972550272941589,grad_norm: 0.9999991850515607, iteration: 155183
loss: 0.973836362361908,grad_norm: 0.9999991151139599, iteration: 155184
loss: 1.000851035118103,grad_norm: 0.8994579618921476, iteration: 155185
loss: 0.9882065653800964,grad_norm: 0.9999991985132712, iteration: 155186
loss: 1.0043158531188965,grad_norm: 0.9395586696311172, iteration: 155187
loss: 0.9689086079597473,grad_norm: 0.9848043302417946, iteration: 155188
loss: 1.0082273483276367,grad_norm: 0.9999992737213902, iteration: 155189
loss: 0.9662747979164124,grad_norm: 0.9529692121467073, iteration: 155190
loss: 0.9905538558959961,grad_norm: 0.9999992259033181, iteration: 155191
loss: 0.9692831039428711,grad_norm: 0.9644433178121361, iteration: 155192
loss: 1.0762196779251099,grad_norm: 0.9999995934661728, iteration: 155193
loss: 1.05439293384552,grad_norm: 0.9999992957623252, iteration: 155194
loss: 0.9951077699661255,grad_norm: 0.907799120560864, iteration: 155195
loss: 0.9934860467910767,grad_norm: 0.9999990422058792, iteration: 155196
loss: 0.9814123511314392,grad_norm: 0.9999992701491308, iteration: 155197
loss: 1.0229181051254272,grad_norm: 0.8752244960599602, iteration: 155198
loss: 0.9738686680793762,grad_norm: 0.9999990776963248, iteration: 155199
loss: 0.9880650043487549,grad_norm: 0.988017516435317, iteration: 155200
loss: 1.0067044496536255,grad_norm: 0.9999991082851133, iteration: 155201
loss: 1.0132867097854614,grad_norm: 0.9999992286865477, iteration: 155202
loss: 0.987946629524231,grad_norm: 0.9999991885268891, iteration: 155203
loss: 1.1292797327041626,grad_norm: 0.999999590013903, iteration: 155204
loss: 1.0506222248077393,grad_norm: 0.9999992569396121, iteration: 155205
loss: 0.9879813194274902,grad_norm: 0.9999991602638549, iteration: 155206
loss: 1.0008232593536377,grad_norm: 0.9999991011080919, iteration: 155207
loss: 0.983241081237793,grad_norm: 0.9737660061545951, iteration: 155208
loss: 1.0326848030090332,grad_norm: 0.953527329183362, iteration: 155209
loss: 0.997947633266449,grad_norm: 0.8864650122976644, iteration: 155210
loss: 1.0096184015274048,grad_norm: 0.9999990912291622, iteration: 155211
loss: 1.002192497253418,grad_norm: 0.9957929218820573, iteration: 155212
loss: 1.0204285383224487,grad_norm: 0.9932413868843557, iteration: 155213
loss: 0.9749184250831604,grad_norm: 0.9747410613846413, iteration: 155214
loss: 0.9930340647697449,grad_norm: 0.999999329719541, iteration: 155215
loss: 1.00204336643219,grad_norm: 0.9393468184034754, iteration: 155216
loss: 1.01173734664917,grad_norm: 0.9899085767866255, iteration: 155217
loss: 0.9678401947021484,grad_norm: 0.9976460130407103, iteration: 155218
loss: 1.0019205808639526,grad_norm: 0.9999991722816953, iteration: 155219
loss: 0.9740145802497864,grad_norm: 0.9342133671003812, iteration: 155220
loss: 0.965371310710907,grad_norm: 0.9980236526200614, iteration: 155221
loss: 0.976952850818634,grad_norm: 0.8852348378656226, iteration: 155222
loss: 1.0030672550201416,grad_norm: 0.9999990543580668, iteration: 155223
loss: 1.0024192333221436,grad_norm: 0.9999989759129764, iteration: 155224
loss: 1.0293644666671753,grad_norm: 0.9999991165227102, iteration: 155225
loss: 0.9946134686470032,grad_norm: 0.9999991909870516, iteration: 155226
loss: 1.047110915184021,grad_norm: 0.9718573996586299, iteration: 155227
loss: 1.014822244644165,grad_norm: 0.9999992043170619, iteration: 155228
loss: 0.9743812084197998,grad_norm: 0.9181076470480694, iteration: 155229
loss: 0.9716841578483582,grad_norm: 0.9738596745465504, iteration: 155230
loss: 1.0213522911071777,grad_norm: 0.9999991257228789, iteration: 155231
loss: 1.0074516534805298,grad_norm: 0.8269248507343474, iteration: 155232
loss: 0.9744507074356079,grad_norm: 0.9129187953675998, iteration: 155233
loss: 1.1034573316574097,grad_norm: 0.9999999234067919, iteration: 155234
loss: 1.0048620700836182,grad_norm: 0.9999998105404564, iteration: 155235
loss: 1.0099540948867798,grad_norm: 0.9120772836158493, iteration: 155236
loss: 0.9890231490135193,grad_norm: 0.9999990311648767, iteration: 155237
loss: 1.0096648931503296,grad_norm: 0.9999993954061548, iteration: 155238
loss: 1.008325219154358,grad_norm: 0.9999992876077993, iteration: 155239
loss: 1.062064290046692,grad_norm: 0.9896424067942058, iteration: 155240
loss: 1.0230902433395386,grad_norm: 0.7868691238258435, iteration: 155241
loss: 0.9955365657806396,grad_norm: 0.9441456381861808, iteration: 155242
loss: 1.0106792449951172,grad_norm: 0.9422869839947589, iteration: 155243
loss: 0.9596144556999207,grad_norm: 0.9999989346253327, iteration: 155244
loss: 0.9810433387756348,grad_norm: 0.99999904925877, iteration: 155245
loss: 0.9408472180366516,grad_norm: 0.9067900926298803, iteration: 155246
loss: 1.0312745571136475,grad_norm: 0.9999991019148207, iteration: 155247
loss: 0.984696090221405,grad_norm: 0.9999991360524049, iteration: 155248
loss: 0.9944655299186707,grad_norm: 0.962805771463333, iteration: 155249
loss: 1.0250321626663208,grad_norm: 0.7954468374098408, iteration: 155250
loss: 1.0402785539627075,grad_norm: 0.9999991234034149, iteration: 155251
loss: 1.0399543046951294,grad_norm: 0.9999990573334444, iteration: 155252
loss: 1.0108710527420044,grad_norm: 0.9999992128560924, iteration: 155253
loss: 1.0153212547302246,grad_norm: 0.9999989616385883, iteration: 155254
loss: 1.1927026510238647,grad_norm: 0.9999997439872802, iteration: 155255
loss: 0.9503373503684998,grad_norm: 0.9999991107116362, iteration: 155256
loss: 0.9736164212226868,grad_norm: 0.9999989220645645, iteration: 155257
loss: 1.0168635845184326,grad_norm: 0.8589637411796804, iteration: 155258
loss: 1.2052267789840698,grad_norm: 0.999999941791242, iteration: 155259
loss: 0.9990421533584595,grad_norm: 0.9999992393122048, iteration: 155260
loss: 1.035410761833191,grad_norm: 0.999999584965944, iteration: 155261
loss: 0.9760043025016785,grad_norm: 0.9939137560325537, iteration: 155262
loss: 0.985945999622345,grad_norm: 0.9999990921363888, iteration: 155263
loss: 1.0593059062957764,grad_norm: 0.9999990595908446, iteration: 155264
loss: 1.103499174118042,grad_norm: 0.9999994557903165, iteration: 155265
loss: 0.9681550860404968,grad_norm: 0.9619498657730438, iteration: 155266
loss: 1.0384846925735474,grad_norm: 0.9999991153133069, iteration: 155267
loss: 0.9979975819587708,grad_norm: 0.9286011893153715, iteration: 155268
loss: 1.0474412441253662,grad_norm: 0.9999991570626113, iteration: 155269
loss: 1.0012273788452148,grad_norm: 0.9999990076195632, iteration: 155270
loss: 1.0050913095474243,grad_norm: 0.9999993148710916, iteration: 155271
loss: 0.9703366160392761,grad_norm: 0.8336020458586539, iteration: 155272
loss: 0.9721209406852722,grad_norm: 0.8187308523395616, iteration: 155273
loss: 1.02434241771698,grad_norm: 0.9999991989463181, iteration: 155274
loss: 1.00637686252594,grad_norm: 0.999998903116618, iteration: 155275
loss: 1.0151063203811646,grad_norm: 0.9999990204611009, iteration: 155276
loss: 1.1216766834259033,grad_norm: 0.9999994185421927, iteration: 155277
loss: 1.0010855197906494,grad_norm: 0.9999990927553993, iteration: 155278
loss: 1.0164052248001099,grad_norm: 0.9999991829304774, iteration: 155279
loss: 1.0332809686660767,grad_norm: 0.9999991207389858, iteration: 155280
loss: 0.9805936217308044,grad_norm: 0.9513056966485829, iteration: 155281
loss: 1.0281819105148315,grad_norm: 0.889428107353237, iteration: 155282
loss: 1.024800419807434,grad_norm: 0.9999992206134871, iteration: 155283
loss: 1.0343759059906006,grad_norm: 0.9087418677974725, iteration: 155284
loss: 0.9722374081611633,grad_norm: 0.977480633327003, iteration: 155285
loss: 0.9787970185279846,grad_norm: 0.9999996772382571, iteration: 155286
loss: 0.9949836730957031,grad_norm: 0.9102163981863302, iteration: 155287
loss: 1.2507914304733276,grad_norm: 0.9999993606977053, iteration: 155288
loss: 1.0127794742584229,grad_norm: 0.8994521095617595, iteration: 155289
loss: 0.9849658012390137,grad_norm: 0.9999992055902852, iteration: 155290
loss: 1.095637559890747,grad_norm: 0.9999996558751691, iteration: 155291
loss: 1.0156241655349731,grad_norm: 0.9294845695039674, iteration: 155292
loss: 0.9984255433082581,grad_norm: 0.9999993265561948, iteration: 155293
loss: 1.0041553974151611,grad_norm: 0.8936893413372732, iteration: 155294
loss: 1.0076735019683838,grad_norm: 0.9999991363315316, iteration: 155295
loss: 1.042357087135315,grad_norm: 0.9999991105182245, iteration: 155296
loss: 0.9984396696090698,grad_norm: 0.9999991424589811, iteration: 155297
loss: 1.0124237537384033,grad_norm: 0.9999990900301411, iteration: 155298
loss: 1.0020859241485596,grad_norm: 0.977277596327799, iteration: 155299
loss: 1.0125113725662231,grad_norm: 0.9715846948101443, iteration: 155300
loss: 0.9570900797843933,grad_norm: 0.9392126692365779, iteration: 155301
loss: 1.1571056842803955,grad_norm: 0.9999992270286906, iteration: 155302
loss: 0.9985033869743347,grad_norm: 0.9246754138250957, iteration: 155303
loss: 1.035111904144287,grad_norm: 0.9999990426141665, iteration: 155304
loss: 0.9957738518714905,grad_norm: 0.9367321984078628, iteration: 155305
loss: 1.0225168466567993,grad_norm: 0.9348842915857749, iteration: 155306
loss: 0.9914209842681885,grad_norm: 0.9999991679017978, iteration: 155307
loss: 0.9869107604026794,grad_norm: 0.9999991558144664, iteration: 155308
loss: 1.0029010772705078,grad_norm: 0.9357922994591064, iteration: 155309
loss: 1.027076005935669,grad_norm: 0.9999990130177383, iteration: 155310
loss: 1.0512995719909668,grad_norm: 0.9999992044055929, iteration: 155311
loss: 1.0048264265060425,grad_norm: 0.9999990005835585, iteration: 155312
loss: 0.9780415296554565,grad_norm: 0.8976526064447364, iteration: 155313
loss: 1.0093430280685425,grad_norm: 0.9679573517848343, iteration: 155314
loss: 1.026140570640564,grad_norm: 0.9999990975339751, iteration: 155315
loss: 0.9848601222038269,grad_norm: 0.9999991046196954, iteration: 155316
loss: 0.979027271270752,grad_norm: 0.9999992276418123, iteration: 155317
loss: 1.0056793689727783,grad_norm: 0.9968520345777201, iteration: 155318
loss: 0.9800453782081604,grad_norm: 0.9488196961736268, iteration: 155319
loss: 1.0076795816421509,grad_norm: 0.9999991332047401, iteration: 155320
loss: 1.0251160860061646,grad_norm: 0.9999993180486667, iteration: 155321
loss: 1.0058408975601196,grad_norm: 0.7728566911324539, iteration: 155322
loss: 1.0110328197479248,grad_norm: 0.9999992745693439, iteration: 155323
loss: 0.9835572838783264,grad_norm: 0.9999991338001772, iteration: 155324
loss: 1.0042905807495117,grad_norm: 0.8706220275382534, iteration: 155325
loss: 0.998995840549469,grad_norm: 0.9025155491120963, iteration: 155326
loss: 0.9999380707740784,grad_norm: 0.999032050785364, iteration: 155327
loss: 1.0524239540100098,grad_norm: 0.9999998379870235, iteration: 155328
loss: 1.0063194036483765,grad_norm: 0.9007851856945559, iteration: 155329
loss: 0.9971855282783508,grad_norm: 0.9999990310649792, iteration: 155330
loss: 0.986689031124115,grad_norm: 0.9949466106386083, iteration: 155331
loss: 0.9766448140144348,grad_norm: 0.9999992367460242, iteration: 155332
loss: 0.9535013437271118,grad_norm: 0.9598712307982207, iteration: 155333
loss: 0.984005868434906,grad_norm: 0.9999990296812404, iteration: 155334
loss: 1.0105297565460205,grad_norm: 0.9480430860549512, iteration: 155335
loss: 1.0422344207763672,grad_norm: 0.9999990799783351, iteration: 155336
loss: 1.0465432405471802,grad_norm: 0.9999990893710659, iteration: 155337
loss: 1.002445101737976,grad_norm: 0.999999079073941, iteration: 155338
loss: 1.0962661504745483,grad_norm: 0.999999061449021, iteration: 155339
loss: 0.9892799258232117,grad_norm: 0.9999990342330889, iteration: 155340
loss: 1.0065436363220215,grad_norm: 0.935393478359142, iteration: 155341
loss: 0.9745649695396423,grad_norm: 0.9772435783304878, iteration: 155342
loss: 0.9944730997085571,grad_norm: 0.9999991430075156, iteration: 155343
loss: 0.9918838143348694,grad_norm: 0.9553458928356836, iteration: 155344
loss: 1.0311318635940552,grad_norm: 0.9999989847108038, iteration: 155345
loss: 1.0199193954467773,grad_norm: 0.9494838575048701, iteration: 155346
loss: 1.0154366493225098,grad_norm: 0.9999990607826965, iteration: 155347
loss: 0.9921107888221741,grad_norm: 0.9485425545931221, iteration: 155348
loss: 1.0153045654296875,grad_norm: 0.9999989851230094, iteration: 155349
loss: 0.9953179955482483,grad_norm: 0.9999990917816048, iteration: 155350
loss: 0.9906946420669556,grad_norm: 0.9663266335930345, iteration: 155351
loss: 0.997769832611084,grad_norm: 0.9999991282871857, iteration: 155352
loss: 1.018808126449585,grad_norm: 0.9999990517583108, iteration: 155353
loss: 0.9962000250816345,grad_norm: 0.9999991852763557, iteration: 155354
loss: 0.9952138066291809,grad_norm: 0.9365053078909019, iteration: 155355
loss: 1.001916527748108,grad_norm: 0.9999990491221836, iteration: 155356
loss: 1.0269036293029785,grad_norm: 0.9999995080563875, iteration: 155357
loss: 1.044625163078308,grad_norm: 0.9999995533842695, iteration: 155358
loss: 1.008996844291687,grad_norm: 0.9283639645136993, iteration: 155359
loss: 1.0187350511550903,grad_norm: 0.9999990352091127, iteration: 155360
loss: 1.0268505811691284,grad_norm: 0.9999991306142496, iteration: 155361
loss: 1.008941650390625,grad_norm: 0.9999993144976592, iteration: 155362
loss: 0.9880205392837524,grad_norm: 0.9999990956322511, iteration: 155363
loss: 0.9940410852432251,grad_norm: 0.9999992009481437, iteration: 155364
loss: 0.9730901718139648,grad_norm: 0.9331768420811057, iteration: 155365
loss: 1.021773338317871,grad_norm: 0.9999993733312883, iteration: 155366
loss: 0.9891418218612671,grad_norm: 0.9722588360153864, iteration: 155367
loss: 0.9785394072532654,grad_norm: 0.999998966296245, iteration: 155368
loss: 1.087569236755371,grad_norm: 0.9999992271236261, iteration: 155369
loss: 0.9527000188827515,grad_norm: 0.9999990835207182, iteration: 155370
loss: 0.9985911846160889,grad_norm: 0.9999991497355489, iteration: 155371
loss: 0.9997697472572327,grad_norm: 0.9851045792040891, iteration: 155372
loss: 1.0302435159683228,grad_norm: 0.9999992434073675, iteration: 155373
loss: 0.9731335043907166,grad_norm: 0.9999992551367789, iteration: 155374
loss: 0.9849112629890442,grad_norm: 0.9999992496677743, iteration: 155375
loss: 1.020682692527771,grad_norm: 0.9999992646835165, iteration: 155376
loss: 1.0148372650146484,grad_norm: 0.999999150642829, iteration: 155377
loss: 1.102418065071106,grad_norm: 0.9999993515621379, iteration: 155378
loss: 1.0300782918930054,grad_norm: 0.9999990412473349, iteration: 155379
loss: 1.0898842811584473,grad_norm: 1.0000000251468937, iteration: 155380
loss: 1.0096759796142578,grad_norm: 0.9964465598406512, iteration: 155381
loss: 1.1347754001617432,grad_norm: 0.9999999180674342, iteration: 155382
loss: 0.9794579148292542,grad_norm: 0.9480363321894485, iteration: 155383
loss: 1.1818422079086304,grad_norm: 0.9999994913381002, iteration: 155384
loss: 0.9791626930236816,grad_norm: 0.9667918906042837, iteration: 155385
loss: 1.0570409297943115,grad_norm: 0.9713850954060808, iteration: 155386
loss: 0.9794869422912598,grad_norm: 0.999999191175259, iteration: 155387
loss: 1.018236756324768,grad_norm: 0.9999991118076649, iteration: 155388
loss: 1.0272523164749146,grad_norm: 0.9999989413082888, iteration: 155389
loss: 0.9782302975654602,grad_norm: 0.9999992519546399, iteration: 155390
loss: 1.0288805961608887,grad_norm: 0.9767194290602343, iteration: 155391
loss: 0.9891043901443481,grad_norm: 0.8728768856981428, iteration: 155392
loss: 1.05548095703125,grad_norm: 0.9999995066802231, iteration: 155393
loss: 0.9593177437782288,grad_norm: 0.999999063829254, iteration: 155394
loss: 1.202910304069519,grad_norm: 0.9999997009932182, iteration: 155395
loss: 1.0014415979385376,grad_norm: 0.9566526217866842, iteration: 155396
loss: 0.971397876739502,grad_norm: 0.9124969751689823, iteration: 155397
loss: 0.9862557649612427,grad_norm: 0.999999253337991, iteration: 155398
loss: 0.9707050323486328,grad_norm: 0.9267716419815328, iteration: 155399
loss: 0.9805446267127991,grad_norm: 0.9999991296871846, iteration: 155400
loss: 0.9921630620956421,grad_norm: 0.9999991178274302, iteration: 155401
loss: 0.9959341287612915,grad_norm: 0.999999261459973, iteration: 155402
loss: 0.9842495322227478,grad_norm: 0.9999989833109543, iteration: 155403
loss: 1.0109339952468872,grad_norm: 0.9999992448241274, iteration: 155404
loss: 1.3056972026824951,grad_norm: 0.9999995239974457, iteration: 155405
loss: 1.1674154996871948,grad_norm: 0.9999998280908334, iteration: 155406
loss: 1.0399428606033325,grad_norm: 0.9999991666142993, iteration: 155407
loss: 0.9882758259773254,grad_norm: 0.737157020212086, iteration: 155408
loss: 1.011836290359497,grad_norm: 0.9186333928530374, iteration: 155409
loss: 1.023058533668518,grad_norm: 0.9999991863896858, iteration: 155410
loss: 1.010016679763794,grad_norm: 0.9999993112712566, iteration: 155411
loss: 1.1533459424972534,grad_norm: 0.9999992041951722, iteration: 155412
loss: 1.2634633779525757,grad_norm: 0.9999993888345192, iteration: 155413
loss: 1.0027302503585815,grad_norm: 0.9695021781607353, iteration: 155414
loss: 1.057379126548767,grad_norm: 0.9999991659569867, iteration: 155415
loss: 1.0277600288391113,grad_norm: 0.9620055880796264, iteration: 155416
loss: 1.1182624101638794,grad_norm: 1.0000000325737504, iteration: 155417
loss: 1.3782625198364258,grad_norm: 0.9999992606429571, iteration: 155418
loss: 0.959570586681366,grad_norm: 0.9999989817378896, iteration: 155419
loss: 0.9926208853721619,grad_norm: 0.9999991134374449, iteration: 155420
loss: 1.1237698793411255,grad_norm: 0.9999992143128968, iteration: 155421
loss: 0.9720458984375,grad_norm: 0.9999991409549637, iteration: 155422
loss: 1.0161875486373901,grad_norm: 0.9999991533285852, iteration: 155423
loss: 0.9812436103820801,grad_norm: 0.9999998224966103, iteration: 155424
loss: 1.0451864004135132,grad_norm: 0.9999991813963239, iteration: 155425
loss: 0.9859111905097961,grad_norm: 0.9999991109897746, iteration: 155426
loss: 1.1864885091781616,grad_norm: 0.9999997153995811, iteration: 155427
loss: 1.036165714263916,grad_norm: 0.9999990957464787, iteration: 155428
loss: 1.0381118059158325,grad_norm: 0.9999996842528411, iteration: 155429
loss: 1.0129258632659912,grad_norm: 0.916528393208112, iteration: 155430
loss: 1.007063865661621,grad_norm: 0.9383687306333325, iteration: 155431
loss: 1.0013223886489868,grad_norm: 0.9987242450593425, iteration: 155432
loss: 1.000893473625183,grad_norm: 0.9862910986831303, iteration: 155433
loss: 1.0521093606948853,grad_norm: 0.9999989991766914, iteration: 155434
loss: 1.0410326719284058,grad_norm: 0.9642794104182913, iteration: 155435
loss: 1.0061734914779663,grad_norm: 0.999999205584513, iteration: 155436
loss: 0.9716275334358215,grad_norm: 0.9999991493989827, iteration: 155437
loss: 1.003491997718811,grad_norm: 0.9859530281478998, iteration: 155438
loss: 1.025300145149231,grad_norm: 0.9999989069138849, iteration: 155439
loss: 1.0442440509796143,grad_norm: 0.9367816191512364, iteration: 155440
loss: 0.9754951000213623,grad_norm: 0.9886827455330899, iteration: 155441
loss: 0.987277090549469,grad_norm: 0.9999991315353975, iteration: 155442
loss: 1.0224660634994507,grad_norm: 0.9999990577722493, iteration: 155443
loss: 1.007367491722107,grad_norm: 0.8182690649692284, iteration: 155444
loss: 1.0774189233779907,grad_norm: 0.9999999343194743, iteration: 155445
loss: 0.9669716954231262,grad_norm: 0.9718473287283953, iteration: 155446
loss: 1.0329642295837402,grad_norm: 0.918452403805481, iteration: 155447
loss: 0.9923097491264343,grad_norm: 0.9999990975904467, iteration: 155448
loss: 0.9273228645324707,grad_norm: 0.8858382647827318, iteration: 155449
loss: 0.9648993611335754,grad_norm: 0.8855832153959112, iteration: 155450
loss: 0.9934144020080566,grad_norm: 0.9859989263653962, iteration: 155451
loss: 0.9853629469871521,grad_norm: 0.9999991353996093, iteration: 155452
loss: 0.9899708032608032,grad_norm: 0.9999991189577275, iteration: 155453
loss: 1.010714054107666,grad_norm: 0.9245205580100077, iteration: 155454
loss: 1.000229001045227,grad_norm: 0.9999991874923189, iteration: 155455
loss: 1.056683897972107,grad_norm: 0.9999991275460298, iteration: 155456
loss: 1.0021125078201294,grad_norm: 0.9871749952726726, iteration: 155457
loss: 1.0607482194900513,grad_norm: 0.9999993597330846, iteration: 155458
loss: 0.9749548435211182,grad_norm: 0.9999990224707617, iteration: 155459
loss: 1.0208113193511963,grad_norm: 0.9999990488375832, iteration: 155460
loss: 1.010850191116333,grad_norm: 0.9999991469332654, iteration: 155461
loss: 1.0178978443145752,grad_norm: 0.8784379861878221, iteration: 155462
loss: 1.068666934967041,grad_norm: 0.9999993067469966, iteration: 155463
loss: 0.9848310947418213,grad_norm: 0.9506826367668771, iteration: 155464
loss: 1.004046082496643,grad_norm: 0.9999991722059308, iteration: 155465
loss: 1.0181165933609009,grad_norm: 0.9283726454141971, iteration: 155466
loss: 1.0134987831115723,grad_norm: 0.9999996475727506, iteration: 155467
loss: 1.0032273530960083,grad_norm: 0.9999993098228009, iteration: 155468
loss: 1.0012171268463135,grad_norm: 0.9999991525194251, iteration: 155469
loss: 0.9647783637046814,grad_norm: 0.802171443287971, iteration: 155470
loss: 0.958319365978241,grad_norm: 0.9999992191538871, iteration: 155471
loss: 0.984686017036438,grad_norm: 0.9642141006228314, iteration: 155472
loss: 0.9651038646697998,grad_norm: 0.9999991804381817, iteration: 155473
loss: 0.9338717460632324,grad_norm: 0.999999033095146, iteration: 155474
loss: 1.0226128101348877,grad_norm: 0.9640884766988945, iteration: 155475
loss: 1.0412471294403076,grad_norm: 0.9999992465088496, iteration: 155476
loss: 1.014190435409546,grad_norm: 0.9999991058250969, iteration: 155477
loss: 0.9758633375167847,grad_norm: 0.9999992514517597, iteration: 155478
loss: 1.0196479558944702,grad_norm: 0.9396553327100536, iteration: 155479
loss: 1.0297452211380005,grad_norm: 0.9121818265746146, iteration: 155480
loss: 1.0209373235702515,grad_norm: 0.9999990899177106, iteration: 155481
loss: 0.9912532567977905,grad_norm: 0.9999991030482346, iteration: 155482
loss: 0.958227276802063,grad_norm: 0.9999990350230679, iteration: 155483
loss: 0.9822249412536621,grad_norm: 0.9999992121754927, iteration: 155484
loss: 0.9435842037200928,grad_norm: 0.8625774932878036, iteration: 155485
loss: 0.9710925221443176,grad_norm: 0.8658873552269513, iteration: 155486
loss: 1.0838217735290527,grad_norm: 0.999999185293663, iteration: 155487
loss: 1.0085467100143433,grad_norm: 0.9824091786610378, iteration: 155488
loss: 1.0018008947372437,grad_norm: 0.9807546406015991, iteration: 155489
loss: 1.017834186553955,grad_norm: 0.9615134850000067, iteration: 155490
loss: 1.0246366262435913,grad_norm: 0.9999990757494338, iteration: 155491
loss: 0.9700292348861694,grad_norm: 0.9752557061336113, iteration: 155492
loss: 1.0115560293197632,grad_norm: 0.9698685700403126, iteration: 155493
loss: 0.9918026924133301,grad_norm: 0.9576320430827938, iteration: 155494
loss: 1.0169874429702759,grad_norm: 0.9999991297130698, iteration: 155495
loss: 0.9683793187141418,grad_norm: 0.9458407284381665, iteration: 155496
loss: 0.9620556235313416,grad_norm: 0.9740570914389812, iteration: 155497
loss: 1.0844372510910034,grad_norm: 0.9999997839757682, iteration: 155498
loss: 1.0320813655853271,grad_norm: 0.8955715503506895, iteration: 155499
loss: 1.0148755311965942,grad_norm: 0.9708705830582168, iteration: 155500
loss: 1.0168538093566895,grad_norm: 0.9999990705921441, iteration: 155501
loss: 0.9572892189025879,grad_norm: 0.9999993167198838, iteration: 155502
loss: 1.0270370244979858,grad_norm: 0.9558888349515547, iteration: 155503
loss: 0.9786438345909119,grad_norm: 0.999999049749797, iteration: 155504
loss: 1.0465089082717896,grad_norm: 0.9999999770599515, iteration: 155505
loss: 1.0017800331115723,grad_norm: 0.9999990775558254, iteration: 155506
loss: 1.1336538791656494,grad_norm: 0.9999996490894106, iteration: 155507
loss: 1.0395218133926392,grad_norm: 0.9999989299161942, iteration: 155508
loss: 0.9523706436157227,grad_norm: 0.9999990483697256, iteration: 155509
loss: 1.0190895795822144,grad_norm: 0.9816795799240146, iteration: 155510
loss: 0.9981557130813599,grad_norm: 0.9999991661493398, iteration: 155511
loss: 0.9912142157554626,grad_norm: 0.9595370933746808, iteration: 155512
loss: 0.9704127907752991,grad_norm: 0.9022453054695205, iteration: 155513
loss: 0.9619203805923462,grad_norm: 0.869595692860108, iteration: 155514
loss: 1.0096830129623413,grad_norm: 0.995674064625237, iteration: 155515
loss: 0.9557163715362549,grad_norm: 0.9999992240594995, iteration: 155516
loss: 0.9763520956039429,grad_norm: 0.9999990916374274, iteration: 155517
loss: 0.9891035556793213,grad_norm: 0.9999993670732386, iteration: 155518
loss: 0.9747370481491089,grad_norm: 0.9999992237609878, iteration: 155519
loss: 0.9979430437088013,grad_norm: 0.986638126702762, iteration: 155520
loss: 0.9465969204902649,grad_norm: 0.9999990382860232, iteration: 155521
loss: 1.0235563516616821,grad_norm: 0.9096054177155226, iteration: 155522
loss: 0.9600176811218262,grad_norm: 0.8510739762614558, iteration: 155523
loss: 1.0988599061965942,grad_norm: 0.9999994774833252, iteration: 155524
loss: 1.0013477802276611,grad_norm: 0.9669101943736191, iteration: 155525
loss: 1.0099352598190308,grad_norm: 0.9311917948071772, iteration: 155526
loss: 0.9686170816421509,grad_norm: 0.7951310156412825, iteration: 155527
loss: 0.9579105377197266,grad_norm: 0.9999991230763606, iteration: 155528
loss: 1.0124255418777466,grad_norm: 0.9951097330303684, iteration: 155529
loss: 0.9768383502960205,grad_norm: 0.999999209787851, iteration: 155530
loss: 1.0357568264007568,grad_norm: 0.9999992016041822, iteration: 155531
loss: 1.0165519714355469,grad_norm: 0.9999990175630266, iteration: 155532
loss: 0.9995213747024536,grad_norm: 0.9999991319896858, iteration: 155533
loss: 1.0220905542373657,grad_norm: 0.9999991117955714, iteration: 155534
loss: 0.9751595854759216,grad_norm: 0.9999990389193254, iteration: 155535
loss: 1.1023262739181519,grad_norm: 0.9999991853259053, iteration: 155536
loss: 0.9910904765129089,grad_norm: 0.9403963418618605, iteration: 155537
loss: 1.0257059335708618,grad_norm: 0.9814733143774041, iteration: 155538
loss: 1.0457617044448853,grad_norm: 0.9342818596379744, iteration: 155539
loss: 1.0228734016418457,grad_norm: 0.8601003360989209, iteration: 155540
loss: 1.0098227262496948,grad_norm: 0.9999991986974743, iteration: 155541
loss: 1.0461560487747192,grad_norm: 0.9999990940833754, iteration: 155542
loss: 1.0020997524261475,grad_norm: 0.9999990986076607, iteration: 155543
loss: 0.9924593567848206,grad_norm: 0.9156356754947595, iteration: 155544
loss: 0.9889055490493774,grad_norm: 0.9999991480132793, iteration: 155545
loss: 0.9636546969413757,grad_norm: 0.9158186892359064, iteration: 155546
loss: 0.9806867241859436,grad_norm: 0.9999990482519094, iteration: 155547
loss: 1.002899408340454,grad_norm: 0.9999991078158575, iteration: 155548
loss: 0.9751472473144531,grad_norm: 0.9999990729628542, iteration: 155549
loss: 0.9987995624542236,grad_norm: 0.8299537538919387, iteration: 155550
loss: 1.0102466344833374,grad_norm: 0.999999176525897, iteration: 155551
loss: 1.0205098390579224,grad_norm: 0.9999992633792864, iteration: 155552
loss: 0.9648137092590332,grad_norm: 0.9399730721395347, iteration: 155553
loss: 1.0098340511322021,grad_norm: 0.9167607772846911, iteration: 155554
loss: 0.9771097898483276,grad_norm: 0.91327152707524, iteration: 155555
loss: 0.9932184815406799,grad_norm: 0.9999990817133926, iteration: 155556
loss: 0.9810572266578674,grad_norm: 0.9653532822415983, iteration: 155557
loss: 0.986864447593689,grad_norm: 0.9370636165059621, iteration: 155558
loss: 1.0426700115203857,grad_norm: 0.9999992059303946, iteration: 155559
loss: 0.9687188267707825,grad_norm: 0.9999991619693294, iteration: 155560
loss: 1.0190098285675049,grad_norm: 0.9999992712858013, iteration: 155561
loss: 1.0036028623580933,grad_norm: 0.9999991983502388, iteration: 155562
loss: 0.9657725691795349,grad_norm: 0.9999990924176874, iteration: 155563
loss: 0.9813098311424255,grad_norm: 0.9999992024454167, iteration: 155564
loss: 1.012555480003357,grad_norm: 0.9999991349762829, iteration: 155565
loss: 0.9992066621780396,grad_norm: 0.979830170766978, iteration: 155566
loss: 1.0179340839385986,grad_norm: 0.8992322640336572, iteration: 155567
loss: 0.9649639129638672,grad_norm: 0.9999991220786036, iteration: 155568
loss: 0.9968817830085754,grad_norm: 0.9484411438760687, iteration: 155569
loss: 0.9785293340682983,grad_norm: 0.9144630112250178, iteration: 155570
loss: 1.010354995727539,grad_norm: 0.9999990581664326, iteration: 155571
loss: 0.9492884278297424,grad_norm: 0.9999990976164054, iteration: 155572
loss: 0.9973063468933105,grad_norm: 0.9999993875989848, iteration: 155573
loss: 1.0161114931106567,grad_norm: 0.9999992103332088, iteration: 155574
loss: 0.9714265465736389,grad_norm: 0.9542649176072375, iteration: 155575
loss: 1.0169907808303833,grad_norm: 0.9999997411045437, iteration: 155576
loss: 0.9922639727592468,grad_norm: 0.9999990943328019, iteration: 155577
loss: 1.0299954414367676,grad_norm: 0.9340506751292756, iteration: 155578
loss: 0.9765819311141968,grad_norm: 0.9009475772290703, iteration: 155579
loss: 0.965520441532135,grad_norm: 0.8736304218455279, iteration: 155580
loss: 0.9875199794769287,grad_norm: 0.9428464385051799, iteration: 155581
loss: 0.9751392006874084,grad_norm: 0.9386077888143761, iteration: 155582
loss: 1.0488630533218384,grad_norm: 0.9999991936170219, iteration: 155583
loss: 1.009463906288147,grad_norm: 0.9999989954676065, iteration: 155584
loss: 1.0074514150619507,grad_norm: 0.9999990035403411, iteration: 155585
loss: 0.9894983768463135,grad_norm: 0.9999995183308453, iteration: 155586
loss: 1.0290712118148804,grad_norm: 0.8971553697814415, iteration: 155587
loss: 0.9610545635223389,grad_norm: 0.9999991669354368, iteration: 155588
loss: 1.021470308303833,grad_norm: 0.9616331725577901, iteration: 155589
loss: 1.0869859457015991,grad_norm: 0.8850917720476643, iteration: 155590
loss: 1.0255333185195923,grad_norm: 0.966955064703687, iteration: 155591
loss: 0.9964907765388489,grad_norm: 0.9999990840722324, iteration: 155592
loss: 1.0170543193817139,grad_norm: 0.9999992072838867, iteration: 155593
loss: 1.0537447929382324,grad_norm: 0.9999990899208162, iteration: 155594
loss: 0.9715556502342224,grad_norm: 0.9999991099770581, iteration: 155595
loss: 1.0652464628219604,grad_norm: 0.9999992110466092, iteration: 155596
loss: 1.0031895637512207,grad_norm: 0.9731185844411268, iteration: 155597
loss: 1.0324273109436035,grad_norm: 0.9999990277279678, iteration: 155598
loss: 0.9911101460456848,grad_norm: 0.999999149489346, iteration: 155599
loss: 0.9945179224014282,grad_norm: 0.9999991518091174, iteration: 155600
loss: 0.9847041964530945,grad_norm: 0.8777956338332541, iteration: 155601
loss: 1.0480561256408691,grad_norm: 0.9027738411769268, iteration: 155602
loss: 1.014365792274475,grad_norm: 0.9999992474700269, iteration: 155603
loss: 1.0323656797409058,grad_norm: 0.9999996805261218, iteration: 155604
loss: 0.9812163710594177,grad_norm: 0.9749171499433981, iteration: 155605
loss: 0.9792088270187378,grad_norm: 0.9999991344686301, iteration: 155606
loss: 1.029952049255371,grad_norm: 0.9999990709323705, iteration: 155607
loss: 0.9906784892082214,grad_norm: 0.9999992119405583, iteration: 155608
loss: 0.9904078245162964,grad_norm: 0.9891571760437924, iteration: 155609
loss: 1.0601009130477905,grad_norm: 0.999999101367341, iteration: 155610
loss: 0.9973288774490356,grad_norm: 0.9585331608670382, iteration: 155611
loss: 0.9906943440437317,grad_norm: 0.9999990483337927, iteration: 155612
loss: 1.1418768167495728,grad_norm: 0.9999992982429141, iteration: 155613
loss: 0.9994999766349792,grad_norm: 0.9143117949239541, iteration: 155614
loss: 1.056008219718933,grad_norm: 1.0000000052548488, iteration: 155615
loss: 0.9898322224617004,grad_norm: 0.9999992352474831, iteration: 155616
loss: 1.0174756050109863,grad_norm: 0.9999991534048343, iteration: 155617
loss: 0.9933550357818604,grad_norm: 0.9999991681320892, iteration: 155618
loss: 1.0360056161880493,grad_norm: 0.9999991146051267, iteration: 155619
loss: 1.0165480375289917,grad_norm: 0.8883755622360807, iteration: 155620
loss: 1.0290178060531616,grad_norm: 0.9999992305646053, iteration: 155621
loss: 0.9872743487358093,grad_norm: 0.8913735833128136, iteration: 155622
loss: 0.9760434031486511,grad_norm: 0.9413376514068168, iteration: 155623
loss: 1.0059126615524292,grad_norm: 0.9999991823779527, iteration: 155624
loss: 1.0010473728179932,grad_norm: 0.9534033339221007, iteration: 155625
loss: 0.9988967776298523,grad_norm: 0.9606435968019367, iteration: 155626
loss: 1.0741499662399292,grad_norm: 0.9999998838686943, iteration: 155627
loss: 1.0362716913223267,grad_norm: 0.9895936058073611, iteration: 155628
loss: 1.0132434368133545,grad_norm: 0.9999991005690518, iteration: 155629
loss: 0.9564371705055237,grad_norm: 0.9999990886420445, iteration: 155630
loss: 0.9831119179725647,grad_norm: 0.9201583220950952, iteration: 155631
loss: 1.0004993677139282,grad_norm: 0.96742704308028, iteration: 155632
loss: 1.0050265789031982,grad_norm: 0.9910897615456303, iteration: 155633
loss: 1.0462310314178467,grad_norm: 0.999999111347522, iteration: 155634
loss: 1.007910132408142,grad_norm: 0.999999625429641, iteration: 155635
loss: 0.9750502109527588,grad_norm: 0.999999008384656, iteration: 155636
loss: 1.0382190942764282,grad_norm: 0.9999990798330645, iteration: 155637
loss: 1.0073809623718262,grad_norm: 0.9999991540636012, iteration: 155638
loss: 1.0113524198532104,grad_norm: 0.9999989625042558, iteration: 155639
loss: 0.9767320156097412,grad_norm: 0.9999991059635609, iteration: 155640
loss: 0.9520880579948425,grad_norm: 0.8683799406730098, iteration: 155641
loss: 1.0248794555664062,grad_norm: 0.9620019585741818, iteration: 155642
loss: 1.0102601051330566,grad_norm: 0.9056915968081721, iteration: 155643
loss: 0.9706135988235474,grad_norm: 0.9999991948957997, iteration: 155644
loss: 0.9672136306762695,grad_norm: 0.9002251784723196, iteration: 155645
loss: 0.9988331198692322,grad_norm: 0.9999989425380533, iteration: 155646
loss: 0.9715724587440491,grad_norm: 0.9999990241305781, iteration: 155647
loss: 1.0405529737472534,grad_norm: 0.9999992858242127, iteration: 155648
loss: 1.0019112825393677,grad_norm: 0.9999990775377378, iteration: 155649
loss: 0.9998992681503296,grad_norm: 0.845527447981944, iteration: 155650
loss: 0.998199462890625,grad_norm: 0.9999991654552993, iteration: 155651
loss: 0.985569179058075,grad_norm: 0.8310118082579007, iteration: 155652
loss: 0.9891815185546875,grad_norm: 0.9999990702531804, iteration: 155653
loss: 0.9772376418113708,grad_norm: 0.829421320708879, iteration: 155654
loss: 1.0030441284179688,grad_norm: 0.999998953656512, iteration: 155655
loss: 1.009298324584961,grad_norm: 0.9999991886899146, iteration: 155656
loss: 0.9888335466384888,grad_norm: 0.9999989602518466, iteration: 155657
loss: 1.0205888748168945,grad_norm: 0.8777391996824213, iteration: 155658
loss: 1.0272694826126099,grad_norm: 0.9999991516660885, iteration: 155659
loss: 1.062923550605774,grad_norm: 0.9999999794917245, iteration: 155660
loss: 1.078437089920044,grad_norm: 0.9999995061918245, iteration: 155661
loss: 1.0062004327774048,grad_norm: 0.9999990358995787, iteration: 155662
loss: 0.9784539341926575,grad_norm: 0.9999991722372759, iteration: 155663
loss: 1.0119839906692505,grad_norm: 0.9786440232838594, iteration: 155664
loss: 0.9803144931793213,grad_norm: 0.9999991253954288, iteration: 155665
loss: 0.9902316331863403,grad_norm: 0.9999992334585249, iteration: 155666
loss: 1.0024478435516357,grad_norm: 0.9999992445068608, iteration: 155667
loss: 1.0057061910629272,grad_norm: 0.9238398912358435, iteration: 155668
loss: 1.0173249244689941,grad_norm: 0.9999990486202881, iteration: 155669
loss: 1.056320309638977,grad_norm: 0.9999998729454848, iteration: 155670
loss: 0.9872331023216248,grad_norm: 0.9999991332882971, iteration: 155671
loss: 0.9889661073684692,grad_norm: 0.9398161834400807, iteration: 155672
loss: 1.028786540031433,grad_norm: 0.8880249840797673, iteration: 155673
loss: 1.0006135702133179,grad_norm: 0.9999991457940615, iteration: 155674
loss: 0.9884684085845947,grad_norm: 0.9860181059971367, iteration: 155675
loss: 0.9696885943412781,grad_norm: 0.8475865760518343, iteration: 155676
loss: 0.9980921745300293,grad_norm: 0.9999991502611574, iteration: 155677
loss: 1.0425597429275513,grad_norm: 0.9999992705665682, iteration: 155678
loss: 0.9927582740783691,grad_norm: 0.9999990806355159, iteration: 155679
loss: 0.9966785907745361,grad_norm: 0.9965090626513406, iteration: 155680
loss: 1.0351005792617798,grad_norm: 0.999999090231494, iteration: 155681
loss: 1.0446605682373047,grad_norm: 0.9434426683219608, iteration: 155682
loss: 0.9891742467880249,grad_norm: 0.9999990358062897, iteration: 155683
loss: 1.0013459920883179,grad_norm: 0.948295522566346, iteration: 155684
loss: 1.0842870473861694,grad_norm: 0.9999990843850429, iteration: 155685
loss: 1.0065491199493408,grad_norm: 0.9999991677419077, iteration: 155686
loss: 1.0820969343185425,grad_norm: 0.983916529737741, iteration: 155687
loss: 0.9964354038238525,grad_norm: 0.9999990788979518, iteration: 155688
loss: 1.01810884475708,grad_norm: 0.9962645682699979, iteration: 155689
loss: 0.9927119612693787,grad_norm: 0.99999925666886, iteration: 155690
loss: 1.017470121383667,grad_norm: 0.9728951050280181, iteration: 155691
loss: 0.9935610294342041,grad_norm: 0.9999991747487879, iteration: 155692
loss: 0.9953340888023376,grad_norm: 0.9999991003397187, iteration: 155693
loss: 0.967746913433075,grad_norm: 0.9023733546487758, iteration: 155694
loss: 1.0194798707962036,grad_norm: 0.9999990974513859, iteration: 155695
loss: 1.0009106397628784,grad_norm: 0.8842758415364865, iteration: 155696
loss: 1.0359773635864258,grad_norm: 0.8976015622674776, iteration: 155697
loss: 0.9701038002967834,grad_norm: 0.9999990417171498, iteration: 155698
loss: 0.9809480309486389,grad_norm: 0.9999992125270772, iteration: 155699
loss: 0.9988275170326233,grad_norm: 0.9624983392972268, iteration: 155700
loss: 1.0110905170440674,grad_norm: 0.9009491350481577, iteration: 155701
loss: 0.9613720178604126,grad_norm: 0.986187286041593, iteration: 155702
loss: 0.9838565587997437,grad_norm: 0.9999989368418377, iteration: 155703
loss: 1.0270577669143677,grad_norm: 0.8853462892069309, iteration: 155704
loss: 1.0127326250076294,grad_norm: 0.7595396581838709, iteration: 155705
loss: 0.9964512586593628,grad_norm: 0.9999991788977155, iteration: 155706
loss: 0.9913076758384705,grad_norm: 0.9999991911795444, iteration: 155707
loss: 1.0128567218780518,grad_norm: 0.9999991606603508, iteration: 155708
loss: 0.987037181854248,grad_norm: 0.8774125820436579, iteration: 155709
loss: 0.9609403014183044,grad_norm: 0.9744901779544469, iteration: 155710
loss: 1.0026273727416992,grad_norm: 0.9999991955953413, iteration: 155711
loss: 0.9757952690124512,grad_norm: 0.9450874172986203, iteration: 155712
loss: 1.0045642852783203,grad_norm: 0.9999990673979076, iteration: 155713
loss: 0.9829121828079224,grad_norm: 0.9352896170203674, iteration: 155714
loss: 1.0013755559921265,grad_norm: 0.9999990421548535, iteration: 155715
loss: 1.011662483215332,grad_norm: 0.9999994145742505, iteration: 155716
loss: 0.9720475673675537,grad_norm: 0.999999243542603, iteration: 155717
loss: 0.9918070435523987,grad_norm: 0.9481395198234109, iteration: 155718
loss: 0.9652798175811768,grad_norm: 0.7893238367489411, iteration: 155719
loss: 0.9881114363670349,grad_norm: 0.967308097948628, iteration: 155720
loss: 0.986727237701416,grad_norm: 0.9366842552806993, iteration: 155721
loss: 1.00093412399292,grad_norm: 0.9999995493332323, iteration: 155722
loss: 1.0019949674606323,grad_norm: 0.9728043863916586, iteration: 155723
loss: 1.0215579271316528,grad_norm: 0.9999991624019108, iteration: 155724
loss: 0.9848301410675049,grad_norm: 0.9927780064580837, iteration: 155725
loss: 0.9596683979034424,grad_norm: 0.9999990970539758, iteration: 155726
loss: 0.9973173141479492,grad_norm: 0.8891892678211166, iteration: 155727
loss: 1.0280083417892456,grad_norm: 0.9999989997059764, iteration: 155728
loss: 0.9993057250976562,grad_norm: 0.9789183845725857, iteration: 155729
loss: 0.9942466020584106,grad_norm: 0.9999991717233522, iteration: 155730
loss: 1.1497091054916382,grad_norm: 0.9999991792431766, iteration: 155731
loss: 0.9580902457237244,grad_norm: 0.9999992394812334, iteration: 155732
loss: 0.9793733954429626,grad_norm: 0.8866098368607203, iteration: 155733
loss: 1.015479326248169,grad_norm: 0.9850276794684096, iteration: 155734
loss: 1.033841609954834,grad_norm: 0.9418529466763634, iteration: 155735
loss: 0.9774420857429504,grad_norm: 0.9861625346832349, iteration: 155736
loss: 0.9708656072616577,grad_norm: 0.9999993314260343, iteration: 155737
loss: 1.0114011764526367,grad_norm: 0.9999994323387096, iteration: 155738
loss: 1.0469545125961304,grad_norm: 0.888295091901668, iteration: 155739
loss: 0.9755905866622925,grad_norm: 0.9310280023778024, iteration: 155740
loss: 1.0038328170776367,grad_norm: 0.9999990947311039, iteration: 155741
loss: 1.0187932252883911,grad_norm: 0.8595611006226018, iteration: 155742
loss: 0.9967815279960632,grad_norm: 0.965209220722988, iteration: 155743
loss: 0.9909330010414124,grad_norm: 0.9868380311074799, iteration: 155744
loss: 1.038605809211731,grad_norm: 0.9880923032228357, iteration: 155745
loss: 1.0145047903060913,grad_norm: 0.9999992288631749, iteration: 155746
loss: 0.9660634994506836,grad_norm: 0.9999991011291393, iteration: 155747
loss: 0.9626175761222839,grad_norm: 0.999999041130093, iteration: 155748
loss: 0.9714550375938416,grad_norm: 0.9999993008699634, iteration: 155749
loss: 1.0346819162368774,grad_norm: 0.9059670283913444, iteration: 155750
loss: 1.0110046863555908,grad_norm: 0.9999991728618316, iteration: 155751
loss: 1.0338982343673706,grad_norm: 0.9999990575213427, iteration: 155752
loss: 1.0094664096832275,grad_norm: 0.942046993144346, iteration: 155753
loss: 1.0539153814315796,grad_norm: 0.9468612412746924, iteration: 155754
loss: 0.9481010437011719,grad_norm: 0.9999990986773358, iteration: 155755
loss: 1.0186748504638672,grad_norm: 0.9999991616787666, iteration: 155756
loss: 0.968332052230835,grad_norm: 0.9784683805360531, iteration: 155757
loss: 1.0090466737747192,grad_norm: 0.9999990587470586, iteration: 155758
loss: 0.9955710172653198,grad_norm: 0.9999992157779786, iteration: 155759
loss: 0.9826452732086182,grad_norm: 0.9606145503632001, iteration: 155760
loss: 1.0016707181930542,grad_norm: 0.9999990791764168, iteration: 155761
loss: 0.9529346227645874,grad_norm: 0.9999989885851113, iteration: 155762
loss: 0.9996083974838257,grad_norm: 0.9999991247476475, iteration: 155763
loss: 1.009060025215149,grad_norm: 0.9999990348662291, iteration: 155764
loss: 0.9946182370185852,grad_norm: 0.9244730219311692, iteration: 155765
loss: 0.9963672757148743,grad_norm: 0.9999991903257993, iteration: 155766
loss: 0.9606032967567444,grad_norm: 0.99999898037633, iteration: 155767
loss: 1.0024598836898804,grad_norm: 0.9999998613878787, iteration: 155768
loss: 1.0304712057113647,grad_norm: 0.999999566803379, iteration: 155769
loss: 0.9872607588768005,grad_norm: 0.8674944035314657, iteration: 155770
loss: 1.003190279006958,grad_norm: 0.9888137128366603, iteration: 155771
loss: 0.9936138391494751,grad_norm: 0.9999990202649409, iteration: 155772
loss: 1.0404387712478638,grad_norm: 0.9999999196768996, iteration: 155773
loss: 1.003114938735962,grad_norm: 0.8836347527044162, iteration: 155774
loss: 0.98383629322052,grad_norm: 0.8543958377305765, iteration: 155775
loss: 1.0370957851409912,grad_norm: 0.9999991151787182, iteration: 155776
loss: 0.997287392616272,grad_norm: 0.9102109211988597, iteration: 155777
loss: 1.0133198499679565,grad_norm: 0.9999992145976707, iteration: 155778
loss: 1.0093131065368652,grad_norm: 0.9999990278757944, iteration: 155779
loss: 0.9692707657814026,grad_norm: 0.825780141342166, iteration: 155780
loss: 0.9891180992126465,grad_norm: 0.9999991686011961, iteration: 155781
loss: 0.9835753440856934,grad_norm: 0.9241383182293632, iteration: 155782
loss: 0.9724884033203125,grad_norm: 0.854560505542564, iteration: 155783
loss: 1.0137994289398193,grad_norm: 0.9999998103308707, iteration: 155784
loss: 0.9378334283828735,grad_norm: 0.9789272691075197, iteration: 155785
loss: 1.0008207559585571,grad_norm: 0.9999990233241842, iteration: 155786
loss: 1.0257951021194458,grad_norm: 0.9834430533355581, iteration: 155787
loss: 0.9800317287445068,grad_norm: 0.9419470224671096, iteration: 155788
loss: 0.9627912640571594,grad_norm: 0.9999992010710664, iteration: 155789
loss: 1.0031734704971313,grad_norm: 0.9577994559518572, iteration: 155790
loss: 1.049055814743042,grad_norm: 0.9524560588786007, iteration: 155791
loss: 1.0226805210113525,grad_norm: 0.9999990288660383, iteration: 155792
loss: 1.000986099243164,grad_norm: 0.9345781532240692, iteration: 155793
loss: 0.9858927726745605,grad_norm: 0.9999991820955975, iteration: 155794
loss: 1.014278769493103,grad_norm: 0.8573668787320642, iteration: 155795
loss: 1.0789825916290283,grad_norm: 0.9999991870279793, iteration: 155796
loss: 1.0099812746047974,grad_norm: 0.9999994156075463, iteration: 155797
loss: 1.1181005239486694,grad_norm: 0.9999992722858628, iteration: 155798
loss: 1.0052084922790527,grad_norm: 0.8728453562649501, iteration: 155799
loss: 1.0108363628387451,grad_norm: 0.9501006821437282, iteration: 155800
loss: 1.0825884342193604,grad_norm: 0.999999195429569, iteration: 155801
loss: 1.0052109956741333,grad_norm: 0.8667954645808043, iteration: 155802
loss: 0.995674192905426,grad_norm: 0.9044038193854692, iteration: 155803
loss: 0.9956446290016174,grad_norm: 0.936263325778292, iteration: 155804
loss: 0.952767550945282,grad_norm: 0.9748721609117377, iteration: 155805
loss: 1.0407365560531616,grad_norm: 0.9999992340786565, iteration: 155806
loss: 1.0153793096542358,grad_norm: 0.9999991826128901, iteration: 155807
loss: 0.9696376919746399,grad_norm: 0.9999991359878967, iteration: 155808
loss: 1.050232172012329,grad_norm: 0.9607978892593115, iteration: 155809
loss: 1.026860237121582,grad_norm: 0.9999992110232672, iteration: 155810
loss: 1.012540340423584,grad_norm: 0.9521012146586816, iteration: 155811
loss: 0.9882838726043701,grad_norm: 0.8461534029613388, iteration: 155812
loss: 1.0760948657989502,grad_norm: 0.9328892111043646, iteration: 155813
loss: 0.9610065221786499,grad_norm: 0.971291537003266, iteration: 155814
loss: 0.9793221354484558,grad_norm: 0.9999990956670642, iteration: 155815
loss: 0.97237628698349,grad_norm: 0.9999990349089146, iteration: 155816
loss: 1.013098120689392,grad_norm: 0.908905626159918, iteration: 155817
loss: 0.9786772727966309,grad_norm: 0.9999993088437984, iteration: 155818
loss: 1.0068726539611816,grad_norm: 0.9865759050233225, iteration: 155819
loss: 0.9771186113357544,grad_norm: 0.9665437167218398, iteration: 155820
loss: 1.0038787126541138,grad_norm: 0.9999991399715261, iteration: 155821
loss: 0.9840918183326721,grad_norm: 0.9999988890677864, iteration: 155822
loss: 1.0159870386123657,grad_norm: 0.9945316477039928, iteration: 155823
loss: 1.0220104455947876,grad_norm: 0.9999993092018752, iteration: 155824
loss: 0.9933512210845947,grad_norm: 0.954482132375505, iteration: 155825
loss: 1.009010672569275,grad_norm: 0.9999994939866006, iteration: 155826
loss: 1.0091291666030884,grad_norm: 0.999999050762563, iteration: 155827
loss: 0.9916654825210571,grad_norm: 0.9999993117883292, iteration: 155828
loss: 0.9999732375144958,grad_norm: 0.9999993490389546, iteration: 155829
loss: 1.0199635028839111,grad_norm: 0.9489971275663419, iteration: 155830
loss: 1.0198055505752563,grad_norm: 0.9999989582712182, iteration: 155831
loss: 0.9950560927391052,grad_norm: 0.9582593433798144, iteration: 155832
loss: 1.0076861381530762,grad_norm: 0.9639625318578037, iteration: 155833
loss: 1.0317175388336182,grad_norm: 0.9999989888884774, iteration: 155834
loss: 1.0255323648452759,grad_norm: 0.9999991036602782, iteration: 155835
loss: 1.00141441822052,grad_norm: 0.7546532674529451, iteration: 155836
loss: 0.9860942363739014,grad_norm: 0.9836166684085592, iteration: 155837
loss: 1.022626519203186,grad_norm: 0.8865079995200156, iteration: 155838
loss: 1.0319099426269531,grad_norm: 0.9999990553947574, iteration: 155839
loss: 1.0627459287643433,grad_norm: 0.990029861839692, iteration: 155840
loss: 1.0028151273727417,grad_norm: 0.9999991926174544, iteration: 155841
loss: 1.0117201805114746,grad_norm: 0.9962553447111495, iteration: 155842
loss: 0.9690044522285461,grad_norm: 0.8832989443522543, iteration: 155843
loss: 1.014067530632019,grad_norm: 0.9002336070101289, iteration: 155844
loss: 0.9442842602729797,grad_norm: 0.989497547621259, iteration: 155845
loss: 0.9937081933021545,grad_norm: 0.9792076616849112, iteration: 155846
loss: 1.0186296701431274,grad_norm: 0.999999221103113, iteration: 155847
loss: 0.9981526732444763,grad_norm: 0.9530252972401511, iteration: 155848
loss: 0.9958407878875732,grad_norm: 0.9999991091471994, iteration: 155849
loss: 0.997287929058075,grad_norm: 0.9999990146958138, iteration: 155850
loss: 0.9818686246871948,grad_norm: 0.9999991383034288, iteration: 155851
loss: 0.9914644360542297,grad_norm: 0.9999990368775878, iteration: 155852
loss: 0.9846786856651306,grad_norm: 0.9348090816801735, iteration: 155853
loss: 1.0044968128204346,grad_norm: 0.9994138434362011, iteration: 155854
loss: 1.0239973068237305,grad_norm: 0.9018544386212789, iteration: 155855
loss: 1.0016374588012695,grad_norm: 0.9999990552822389, iteration: 155856
loss: 0.9550471305847168,grad_norm: 0.9854723356550906, iteration: 155857
loss: 1.0653667449951172,grad_norm: 0.9999991245139024, iteration: 155858
loss: 0.9954430460929871,grad_norm: 0.9567182163814433, iteration: 155859
loss: 0.9913896918296814,grad_norm: 0.9522317903939947, iteration: 155860
loss: 1.0400618314743042,grad_norm: 0.9582714814331578, iteration: 155861
loss: 1.0189876556396484,grad_norm: 0.9999993398036907, iteration: 155862
loss: 1.0026354789733887,grad_norm: 0.9999991089543345, iteration: 155863
loss: 0.9901390671730042,grad_norm: 0.999999199295296, iteration: 155864
loss: 1.0074067115783691,grad_norm: 0.8295519853930368, iteration: 155865
loss: 1.0142983198165894,grad_norm: 0.9999991112682742, iteration: 155866
loss: 0.9965489506721497,grad_norm: 0.9999992262295264, iteration: 155867
loss: 0.9912685751914978,grad_norm: 0.9999991924964743, iteration: 155868
loss: 0.9783730506896973,grad_norm: 0.9999991749146773, iteration: 155869
loss: 0.9988412261009216,grad_norm: 0.9757637068979201, iteration: 155870
loss: 0.9858336448669434,grad_norm: 0.8486050711143744, iteration: 155871
loss: 1.0013655424118042,grad_norm: 0.9999991831364327, iteration: 155872
loss: 0.9736993312835693,grad_norm: 0.919824096960951, iteration: 155873
loss: 0.9673618674278259,grad_norm: 0.9999990939581354, iteration: 155874
loss: 0.9643789529800415,grad_norm: 0.9926817841995133, iteration: 155875
loss: 1.0040265321731567,grad_norm: 0.9999989850281391, iteration: 155876
loss: 0.9891169667243958,grad_norm: 0.9945687340891786, iteration: 155877
loss: 1.0274461507797241,grad_norm: 0.9999991087913683, iteration: 155878
loss: 1.007258653640747,grad_norm: 0.9999992130383771, iteration: 155879
loss: 0.9968607425689697,grad_norm: 0.9999992651225456, iteration: 155880
loss: 0.9939203262329102,grad_norm: 0.9999992138263161, iteration: 155881
loss: 0.9862008690834045,grad_norm: 0.9999991728051755, iteration: 155882
loss: 0.9804710149765015,grad_norm: 0.9999990799091643, iteration: 155883
loss: 1.0330568552017212,grad_norm: 0.9999990455765101, iteration: 155884
loss: 0.9986712336540222,grad_norm: 0.99999913167679, iteration: 155885
loss: 1.0062665939331055,grad_norm: 0.9999990410453955, iteration: 155886
loss: 1.0786693096160889,grad_norm: 0.9999992251495088, iteration: 155887
loss: 0.9916747808456421,grad_norm: 0.9999990778632414, iteration: 155888
loss: 0.9943606853485107,grad_norm: 0.9999990736394854, iteration: 155889
loss: 1.008212685585022,grad_norm: 0.9335745944049383, iteration: 155890
loss: 1.0156455039978027,grad_norm: 0.9999992065206365, iteration: 155891
loss: 1.0039746761322021,grad_norm: 0.8929301373999324, iteration: 155892
loss: 0.9968462586402893,grad_norm: 0.9195922573436314, iteration: 155893
loss: 0.9562605023384094,grad_norm: 0.9324197989242542, iteration: 155894
loss: 1.0254188776016235,grad_norm: 0.8479729754201665, iteration: 155895
loss: 0.9940225481987,grad_norm: 0.9999992036989568, iteration: 155896
loss: 1.0100297927856445,grad_norm: 0.8406477065287778, iteration: 155897
loss: 0.9909260869026184,grad_norm: 0.9999991993335923, iteration: 155898
loss: 0.9690518379211426,grad_norm: 0.9999991276282001, iteration: 155899
loss: 1.034753680229187,grad_norm: 0.9999990989970107, iteration: 155900
loss: 1.0160226821899414,grad_norm: 0.9999990167145344, iteration: 155901
loss: 1.000442624092102,grad_norm: 0.9999992090586098, iteration: 155902
loss: 1.0240023136138916,grad_norm: 0.9955782482876735, iteration: 155903
loss: 1.0231668949127197,grad_norm: 0.9824172504426235, iteration: 155904
loss: 0.9749445915222168,grad_norm: 0.9219851853058055, iteration: 155905
loss: 0.9920544624328613,grad_norm: 0.9999991103756103, iteration: 155906
loss: 1.0118472576141357,grad_norm: 0.9255034986117479, iteration: 155907
loss: 0.9873484373092651,grad_norm: 0.7797464883696317, iteration: 155908
loss: 1.0339494943618774,grad_norm: 0.999999266923561, iteration: 155909
loss: 1.003650426864624,grad_norm: 0.9999991617637612, iteration: 155910
loss: 0.9925517439842224,grad_norm: 0.947181014318703, iteration: 155911
loss: 0.9871475100517273,grad_norm: 0.9341726471216304, iteration: 155912
loss: 0.9849451780319214,grad_norm: 0.88465580397789, iteration: 155913
loss: 0.9724896550178528,grad_norm: 0.9999992200476825, iteration: 155914
loss: 1.0096503496170044,grad_norm: 0.9999992323656703, iteration: 155915
loss: 0.9678667187690735,grad_norm: 0.9999990264272883, iteration: 155916
loss: 0.9703807234764099,grad_norm: 0.9999997588996411, iteration: 155917
loss: 0.9522666931152344,grad_norm: 0.9402416480175976, iteration: 155918
loss: 0.9753015637397766,grad_norm: 0.9999993796099172, iteration: 155919
loss: 1.0043505430221558,grad_norm: 0.9999992704164212, iteration: 155920
loss: 0.9730731844902039,grad_norm: 0.999999234912378, iteration: 155921
loss: 0.9597439765930176,grad_norm: 0.9999990416758916, iteration: 155922
loss: 0.9943682551383972,grad_norm: 0.9306641618134025, iteration: 155923
loss: 1.0193703174591064,grad_norm: 0.9931079128193865, iteration: 155924
loss: 0.9884135723114014,grad_norm: 0.9461112528125216, iteration: 155925
loss: 0.957660973072052,grad_norm: 0.9098065948765599, iteration: 155926
loss: 0.9937986135482788,grad_norm: 0.9999989656691124, iteration: 155927
loss: 1.0109071731567383,grad_norm: 0.9558867588415187, iteration: 155928
loss: 0.9963405728340149,grad_norm: 0.8974253692714309, iteration: 155929
loss: 1.00335693359375,grad_norm: 0.9809389585561508, iteration: 155930
loss: 0.99653559923172,grad_norm: 0.9619514799324758, iteration: 155931
loss: 0.988669216632843,grad_norm: 0.9999989669565288, iteration: 155932
loss: 0.997622549533844,grad_norm: 0.9731876316345767, iteration: 155933
loss: 0.9849749803543091,grad_norm: 0.9693083292022548, iteration: 155934
loss: 1.0050761699676514,grad_norm: 0.9999991062615503, iteration: 155935
loss: 0.9662314653396606,grad_norm: 0.9999991646748289, iteration: 155936
loss: 0.9704804420471191,grad_norm: 0.9862711254634688, iteration: 155937
loss: 0.977703332901001,grad_norm: 0.9999991150851956, iteration: 155938
loss: 1.0599658489227295,grad_norm: 0.999999001305015, iteration: 155939
loss: 0.9967610836029053,grad_norm: 0.9999990912117663, iteration: 155940
loss: 1.0019453763961792,grad_norm: 0.9587998948851902, iteration: 155941
loss: 1.0020458698272705,grad_norm: 0.9999989846210783, iteration: 155942
loss: 0.9691991806030273,grad_norm: 0.9809936164993984, iteration: 155943
loss: 1.0304430723190308,grad_norm: 0.9999992518555461, iteration: 155944
loss: 1.0096052885055542,grad_norm: 0.9842732607115843, iteration: 155945
loss: 1.025286078453064,grad_norm: 0.9999991883491948, iteration: 155946
loss: 0.9846295714378357,grad_norm: 0.9999992240489654, iteration: 155947
loss: 1.038536787033081,grad_norm: 0.9999989890432601, iteration: 155948
loss: 0.9826269149780273,grad_norm: 0.9999990523498002, iteration: 155949
loss: 1.001555323600769,grad_norm: 0.9779409526601157, iteration: 155950
loss: 0.968848705291748,grad_norm: 0.8848582019344806, iteration: 155951
loss: 1.0129117965698242,grad_norm: 0.93531800064909, iteration: 155952
loss: 1.031002163887024,grad_norm: 0.976363656858408, iteration: 155953
loss: 0.9858075976371765,grad_norm: 0.9180299429968573, iteration: 155954
loss: 0.9979106187820435,grad_norm: 0.9999991269811043, iteration: 155955
loss: 1.0009090900421143,grad_norm: 0.9411916484555432, iteration: 155956
loss: 0.986505925655365,grad_norm: 0.9404047304239943, iteration: 155957
loss: 0.9495576620101929,grad_norm: 0.999999182381116, iteration: 155958
loss: 0.9969910383224487,grad_norm: 0.9999989742805907, iteration: 155959
loss: 1.0026943683624268,grad_norm: 0.999999196036724, iteration: 155960
loss: 1.0575560331344604,grad_norm: 0.9703092332991458, iteration: 155961
loss: 0.9638537764549255,grad_norm: 0.9999989840199172, iteration: 155962
loss: 0.9956303238868713,grad_norm: 0.9999991813643515, iteration: 155963
loss: 0.9784887433052063,grad_norm: 0.9393006252610728, iteration: 155964
loss: 1.0153535604476929,grad_norm: 0.9987212206224698, iteration: 155965
loss: 1.0013395547866821,grad_norm: 0.9999995166680814, iteration: 155966
loss: 0.9932854175567627,grad_norm: 0.9999989721557192, iteration: 155967
loss: 1.0020235776901245,grad_norm: 0.8809261873996728, iteration: 155968
loss: 0.9819300770759583,grad_norm: 0.999999192396686, iteration: 155969
loss: 0.9938371777534485,grad_norm: 0.9999991425140374, iteration: 155970
loss: 0.9988712072372437,grad_norm: 0.9999991415895684, iteration: 155971
loss: 0.9968806505203247,grad_norm: 0.999999221143763, iteration: 155972
loss: 1.0125250816345215,grad_norm: 0.9999990727594659, iteration: 155973
loss: 1.0497779846191406,grad_norm: 0.9999999271092183, iteration: 155974
loss: 0.9943636655807495,grad_norm: 0.9831136858044156, iteration: 155975
loss: 1.0162664651870728,grad_norm: 0.8726005189010118, iteration: 155976
loss: 1.0174590349197388,grad_norm: 0.9999990473837458, iteration: 155977
loss: 1.0003483295440674,grad_norm: 0.9999991453765569, iteration: 155978
loss: 1.0243942737579346,grad_norm: 0.9590381517485078, iteration: 155979
loss: 0.9919949769973755,grad_norm: 0.999999193566574, iteration: 155980
loss: 1.0182456970214844,grad_norm: 0.999999072809511, iteration: 155981
loss: 0.9639272689819336,grad_norm: 0.9999990555773997, iteration: 155982
loss: 0.987434446811676,grad_norm: 0.9999991718224764, iteration: 155983
loss: 0.9641819000244141,grad_norm: 0.9999990436126536, iteration: 155984
loss: 1.0321530103683472,grad_norm: 0.994639313940465, iteration: 155985
loss: 0.9691039323806763,grad_norm: 0.9999991216517552, iteration: 155986
loss: 0.9797120690345764,grad_norm: 0.9999997025872953, iteration: 155987
loss: 1.0152850151062012,grad_norm: 0.891474322993992, iteration: 155988
loss: 1.0112963914871216,grad_norm: 0.9999991578124399, iteration: 155989
loss: 1.0241098403930664,grad_norm: 0.9999994737318949, iteration: 155990
loss: 1.0192580223083496,grad_norm: 0.9999993362522867, iteration: 155991
loss: 0.9968265891075134,grad_norm: 0.9048217964410306, iteration: 155992
loss: 1.0028560161590576,grad_norm: 0.9999990239537874, iteration: 155993
loss: 1.0015724897384644,grad_norm: 0.8429787556552324, iteration: 155994
loss: 1.0057899951934814,grad_norm: 0.9751040804317463, iteration: 155995
loss: 1.0283809900283813,grad_norm: 0.9210453439799203, iteration: 155996
loss: 0.9787627458572388,grad_norm: 0.9720767778162094, iteration: 155997
loss: 0.9766978621482849,grad_norm: 0.9999992244191939, iteration: 155998
loss: 0.9885635375976562,grad_norm: 0.9999994770567471, iteration: 155999
loss: 1.0305291414260864,grad_norm: 0.9999991441464023, iteration: 156000
loss: 1.0033695697784424,grad_norm: 0.9624637465363931, iteration: 156001
loss: 0.988389790058136,grad_norm: 0.999999125921583, iteration: 156002
loss: 1.0212252140045166,grad_norm: 0.9999991638615638, iteration: 156003
loss: 0.9703131914138794,grad_norm: 0.9096319308186724, iteration: 156004
loss: 0.9962592124938965,grad_norm: 0.9999997771915319, iteration: 156005
loss: 0.9508225321769714,grad_norm: 0.999999162658837, iteration: 156006
loss: 1.0347174406051636,grad_norm: 0.9999992591884008, iteration: 156007
loss: 0.9907947182655334,grad_norm: 0.7716681670012416, iteration: 156008
loss: 0.9908761978149414,grad_norm: 0.8946874853840355, iteration: 156009
loss: 0.9724457263946533,grad_norm: 0.9774559135968304, iteration: 156010
loss: 0.9831568002700806,grad_norm: 0.9999991666666845, iteration: 156011
loss: 1.0113173723220825,grad_norm: 0.9999991193884653, iteration: 156012
loss: 0.9856915473937988,grad_norm: 0.8636956375280023, iteration: 156013
loss: 0.9672955870628357,grad_norm: 0.999999126848851, iteration: 156014
loss: 1.0260440111160278,grad_norm: 0.9999992444900182, iteration: 156015
loss: 0.9956893920898438,grad_norm: 0.9999990677462341, iteration: 156016
loss: 1.0164573192596436,grad_norm: 0.8015655376302268, iteration: 156017
loss: 1.0337200164794922,grad_norm: 0.9999993724421354, iteration: 156018
loss: 0.9952232837677002,grad_norm: 0.9351587602873639, iteration: 156019
loss: 1.022318959236145,grad_norm: 0.9498995925073693, iteration: 156020
loss: 0.9380241632461548,grad_norm: 0.9999991364119338, iteration: 156021
loss: 1.033182978630066,grad_norm: 0.9071764478888743, iteration: 156022
loss: 1.0745259523391724,grad_norm: 0.9999991628891451, iteration: 156023
loss: 1.01021146774292,grad_norm: 0.9241409313903255, iteration: 156024
loss: 1.0128803253173828,grad_norm: 0.9600897907615669, iteration: 156025
loss: 1.0179442167282104,grad_norm: 0.9779449621173049, iteration: 156026
loss: 0.9710280895233154,grad_norm: 0.999999239919299, iteration: 156027
loss: 1.0192011594772339,grad_norm: 0.9614571079624483, iteration: 156028
loss: 1.035688042640686,grad_norm: 0.9999990582899677, iteration: 156029
loss: 1.0205163955688477,grad_norm: 0.9999991770110904, iteration: 156030
loss: 1.0004909038543701,grad_norm: 0.9999991427625646, iteration: 156031
loss: 1.008366584777832,grad_norm: 0.9999991552295224, iteration: 156032
loss: 1.0120784044265747,grad_norm: 0.9999991248307568, iteration: 156033
loss: 1.0050908327102661,grad_norm: 0.8406633415205533, iteration: 156034
loss: 1.0298324823379517,grad_norm: 0.9999990891413122, iteration: 156035
loss: 0.9780628681182861,grad_norm: 0.8570591062861723, iteration: 156036
loss: 1.0040169954299927,grad_norm: 0.9355016679699468, iteration: 156037
loss: 1.01115882396698,grad_norm: 0.9044441574308748, iteration: 156038
loss: 1.024697184562683,grad_norm: 0.9781908637536416, iteration: 156039
loss: 0.9961913228034973,grad_norm: 0.9999989116156023, iteration: 156040
loss: 1.0270392894744873,grad_norm: 0.9817555566941095, iteration: 156041
loss: 0.9850818514823914,grad_norm: 0.9999990016675078, iteration: 156042
loss: 0.9945043325424194,grad_norm: 0.9742977985535145, iteration: 156043
loss: 1.0264796018600464,grad_norm: 0.9999993739344928, iteration: 156044
loss: 1.0291929244995117,grad_norm: 0.9999990444476832, iteration: 156045
loss: 0.998185932636261,grad_norm: 0.8261008700001197, iteration: 156046
loss: 0.9847684502601624,grad_norm: 0.9999990757841272, iteration: 156047
loss: 0.9922841787338257,grad_norm: 0.9999991820555516, iteration: 156048
loss: 1.0449048280715942,grad_norm: 0.9999994700784117, iteration: 156049
loss: 1.0163856744766235,grad_norm: 0.9999991824285221, iteration: 156050
loss: 0.9950007796287537,grad_norm: 0.999999287555906, iteration: 156051
loss: 0.9894879460334778,grad_norm: 0.9999991132929268, iteration: 156052
loss: 1.0029679536819458,grad_norm: 0.9999989210829752, iteration: 156053
loss: 0.9825186729431152,grad_norm: 0.9999990898483436, iteration: 156054
loss: 1.015242576599121,grad_norm: 0.9316294618948106, iteration: 156055
loss: 0.9825884699821472,grad_norm: 0.9999993629690874, iteration: 156056
loss: 0.9766339063644409,grad_norm: 0.9999991671343139, iteration: 156057
loss: 1.0160568952560425,grad_norm: 0.8726605222164707, iteration: 156058
loss: 0.9947633147239685,grad_norm: 0.97268958427612, iteration: 156059
loss: 0.9996682405471802,grad_norm: 0.9609944791743735, iteration: 156060
loss: 0.9683780074119568,grad_norm: 0.9702701644118682, iteration: 156061
loss: 0.993297278881073,grad_norm: 0.942810836855039, iteration: 156062
loss: 1.0447129011154175,grad_norm: 0.9699057981683398, iteration: 156063
loss: 0.9750438928604126,grad_norm: 0.9999992793092659, iteration: 156064
loss: 0.9708221554756165,grad_norm: 0.9999991362416235, iteration: 156065
loss: 0.9690964818000793,grad_norm: 0.9999998120983885, iteration: 156066
loss: 0.9731807708740234,grad_norm: 0.8291418028059508, iteration: 156067
loss: 1.051698088645935,grad_norm: 0.9999996451120433, iteration: 156068
loss: 0.9994068145751953,grad_norm: 0.999998956140964, iteration: 156069
loss: 0.9745269417762756,grad_norm: 0.999999017552277, iteration: 156070
loss: 1.0333155393600464,grad_norm: 0.9999990218794818, iteration: 156071
loss: 0.9801618456840515,grad_norm: 0.9276966178341866, iteration: 156072
loss: 0.9699658751487732,grad_norm: 0.9999990950025552, iteration: 156073
loss: 0.983933687210083,grad_norm: 0.9999991845811445, iteration: 156074
loss: 1.0334967374801636,grad_norm: 0.9999991841706867, iteration: 156075
loss: 0.9763533473014832,grad_norm: 0.9775020979601093, iteration: 156076
loss: 0.9822677969932556,grad_norm: 0.9074190222720309, iteration: 156077
loss: 0.9957167506217957,grad_norm: 0.9051904144060862, iteration: 156078
loss: 1.03205406665802,grad_norm: 0.9999992260321142, iteration: 156079
loss: 1.0121827125549316,grad_norm: 0.999999045663921, iteration: 156080
loss: 1.0366407632827759,grad_norm: 0.9999992972995606, iteration: 156081
loss: 0.9981593489646912,grad_norm: 0.9999991192110078, iteration: 156082
loss: 0.9720963835716248,grad_norm: 0.9999991905280601, iteration: 156083
loss: 1.005052924156189,grad_norm: 0.999999233242985, iteration: 156084
loss: 1.0212817192077637,grad_norm: 0.9999991643287787, iteration: 156085
loss: 1.005792260169983,grad_norm: 0.9999992165822292, iteration: 156086
loss: 1.0125964879989624,grad_norm: 0.9999991482721393, iteration: 156087
loss: 1.024999976158142,grad_norm: 0.8470777339200707, iteration: 156088
loss: 1.0088138580322266,grad_norm: 0.9999992987956684, iteration: 156089
loss: 1.020851492881775,grad_norm: 0.8117914993743954, iteration: 156090
loss: 1.0315420627593994,grad_norm: 0.9999990883508948, iteration: 156091
loss: 1.0159331560134888,grad_norm: 0.9567565559116548, iteration: 156092
loss: 0.955862283706665,grad_norm: 0.9999990784562988, iteration: 156093
loss: 0.9836639165878296,grad_norm: 0.9391770213052241, iteration: 156094
loss: 0.9903225898742676,grad_norm: 0.9888575214162427, iteration: 156095
loss: 0.9915122985839844,grad_norm: 0.9999991568642378, iteration: 156096
loss: 1.003101110458374,grad_norm: 0.9999990478013994, iteration: 156097
loss: 1.021892786026001,grad_norm: 0.9999991508406896, iteration: 156098
loss: 0.9814192652702332,grad_norm: 0.8339939290177066, iteration: 156099
loss: 0.9936171770095825,grad_norm: 0.9881632486346096, iteration: 156100
loss: 1.0232658386230469,grad_norm: 0.9999991421603939, iteration: 156101
loss: 0.9931799173355103,grad_norm: 0.9999992203937357, iteration: 156102
loss: 1.0005900859832764,grad_norm: 0.8567747221506754, iteration: 156103
loss: 0.9972965717315674,grad_norm: 0.9999990946953377, iteration: 156104
loss: 0.9962500333786011,grad_norm: 0.9763615376809872, iteration: 156105
loss: 1.0511119365692139,grad_norm: 0.999999086593457, iteration: 156106
loss: 1.0193088054656982,grad_norm: 0.9999996509541805, iteration: 156107
loss: 0.9987607002258301,grad_norm: 0.9624368152885981, iteration: 156108
loss: 1.0271281003952026,grad_norm: 0.9999990000936645, iteration: 156109
loss: 0.9966592192649841,grad_norm: 0.9999991269623361, iteration: 156110
loss: 0.9932100176811218,grad_norm: 0.982517264027862, iteration: 156111
loss: 1.000838041305542,grad_norm: 0.8709714220503684, iteration: 156112
loss: 1.0009779930114746,grad_norm: 0.9999989687206736, iteration: 156113
loss: 0.9950273036956787,grad_norm: 0.9999998775718214, iteration: 156114
loss: 1.0060385465621948,grad_norm: 0.9567737323618825, iteration: 156115
loss: 0.9499893188476562,grad_norm: 0.9999989094381447, iteration: 156116
loss: 1.0294033288955688,grad_norm: 0.9999992743873427, iteration: 156117
loss: 1.006312608718872,grad_norm: 0.9434723184261087, iteration: 156118
loss: 1.078769326210022,grad_norm: 0.999999168713333, iteration: 156119
loss: 0.9905382394790649,grad_norm: 0.9999990447526564, iteration: 156120
loss: 0.9957902431488037,grad_norm: 0.9999993218319162, iteration: 156121
loss: 1.019911766052246,grad_norm: 0.9204619388444081, iteration: 156122
loss: 1.0115901231765747,grad_norm: 0.8475215304764558, iteration: 156123
loss: 1.0020846128463745,grad_norm: 0.9999992353540392, iteration: 156124
loss: 1.0053629875183105,grad_norm: 0.9999991514180011, iteration: 156125
loss: 0.9797002673149109,grad_norm: 0.9999991984813519, iteration: 156126
loss: 0.994387686252594,grad_norm: 0.9999990954534498, iteration: 156127
loss: 0.9759639501571655,grad_norm: 0.9999991037964211, iteration: 156128
loss: 0.9739544987678528,grad_norm: 0.8609127706947628, iteration: 156129
loss: 1.2669123411178589,grad_norm: 0.9999991624255595, iteration: 156130
loss: 0.9937109351158142,grad_norm: 0.8816869658950127, iteration: 156131
loss: 1.027003526687622,grad_norm: 0.8697534156006267, iteration: 156132
loss: 0.9853664040565491,grad_norm: 0.9999992515606567, iteration: 156133
loss: 0.9929486513137817,grad_norm: 0.9805173899025528, iteration: 156134
loss: 1.0121417045593262,grad_norm: 0.8421598688133441, iteration: 156135
loss: 1.0045816898345947,grad_norm: 0.9075243247410723, iteration: 156136
loss: 0.9770900011062622,grad_norm: 0.8558369449456685, iteration: 156137
loss: 1.0324790477752686,grad_norm: 0.9999995307886517, iteration: 156138
loss: 1.0328724384307861,grad_norm: 0.975083312896605, iteration: 156139
loss: 0.9810383319854736,grad_norm: 0.945160461217792, iteration: 156140
loss: 1.0187269449234009,grad_norm: 0.9999991013779903, iteration: 156141
loss: 1.0327465534210205,grad_norm: 0.99999926026106, iteration: 156142
loss: 0.9520778656005859,grad_norm: 0.9958432824765441, iteration: 156143
loss: 0.989630401134491,grad_norm: 0.993604661975548, iteration: 156144
loss: 1.0009963512420654,grad_norm: 0.9054139977507274, iteration: 156145
loss: 0.9846357107162476,grad_norm: 0.9999990672032533, iteration: 156146
loss: 1.018175721168518,grad_norm: 0.9999998724732532, iteration: 156147
loss: 1.0242626667022705,grad_norm: 0.8982643610762574, iteration: 156148
loss: 1.0142592191696167,grad_norm: 0.9999991998680211, iteration: 156149
loss: 0.9741328954696655,grad_norm: 0.9655836730074785, iteration: 156150
loss: 0.9862945675849915,grad_norm: 0.9999990569014068, iteration: 156151
loss: 0.9924085736274719,grad_norm: 0.9999989287085591, iteration: 156152
loss: 0.9974418878555298,grad_norm: 0.9999992083203058, iteration: 156153
loss: 1.04719078540802,grad_norm: 0.9498612968808486, iteration: 156154
loss: 0.9938748478889465,grad_norm: 0.922057668929953, iteration: 156155
loss: 1.0057743787765503,grad_norm: 0.9942427970026102, iteration: 156156
loss: 0.971810519695282,grad_norm: 0.9999989827363711, iteration: 156157
loss: 0.9957804679870605,grad_norm: 0.9999990546027946, iteration: 156158
loss: 0.9826999306678772,grad_norm: 0.9999992216359411, iteration: 156159
loss: 1.0051368474960327,grad_norm: 0.9346266359381066, iteration: 156160
loss: 1.0517882108688354,grad_norm: 0.9999997100636063, iteration: 156161
loss: 0.9615497589111328,grad_norm: 0.8550337380784779, iteration: 156162
loss: 1.026439905166626,grad_norm: 0.9999990515214345, iteration: 156163
loss: 1.0099856853485107,grad_norm: 0.9999992080894083, iteration: 156164
loss: 0.9860936999320984,grad_norm: 0.9999994311005593, iteration: 156165
loss: 0.9923840761184692,grad_norm: 0.9999991434065351, iteration: 156166
loss: 0.9611971974372864,grad_norm: 0.9872933084388933, iteration: 156167
loss: 0.9777498841285706,grad_norm: 0.9654592814735259, iteration: 156168
loss: 0.9937804937362671,grad_norm: 0.9999990954435982, iteration: 156169
loss: 1.0158754587173462,grad_norm: 0.999999047031778, iteration: 156170
loss: 1.0064821243286133,grad_norm: 0.9999992869039608, iteration: 156171
loss: 1.0411715507507324,grad_norm: 0.999999718990065, iteration: 156172
loss: 0.9468870759010315,grad_norm: 0.9999990287428968, iteration: 156173
loss: 1.0064653158187866,grad_norm: 0.907283440551623, iteration: 156174
loss: 0.9882648587226868,grad_norm: 0.9998041525121464, iteration: 156175
loss: 0.9904536604881287,grad_norm: 0.9999991454667152, iteration: 156176
loss: 1.0105563402175903,grad_norm: 0.9999990987350826, iteration: 156177
loss: 1.0327643156051636,grad_norm: 0.9999992643667239, iteration: 156178
loss: 1.0342391729354858,grad_norm: 0.9999992248690667, iteration: 156179
loss: 1.0208415985107422,grad_norm: 0.9999991400722463, iteration: 156180
loss: 1.0084688663482666,grad_norm: 0.8960857488679521, iteration: 156181
loss: 1.0027191638946533,grad_norm: 0.9999997301358723, iteration: 156182
loss: 0.9983322024345398,grad_norm: 0.9451412836148952, iteration: 156183
loss: 1.0167784690856934,grad_norm: 0.9092914196478904, iteration: 156184
loss: 1.0235280990600586,grad_norm: 0.9999994504745543, iteration: 156185
loss: 1.0030900239944458,grad_norm: 0.9999990997997215, iteration: 156186
loss: 0.993776261806488,grad_norm: 0.9822977278295197, iteration: 156187
loss: 0.9525473713874817,grad_norm: 0.9999991392811157, iteration: 156188
loss: 1.0051500797271729,grad_norm: 0.9999992287048278, iteration: 156189
loss: 0.9973195195198059,grad_norm: 0.9487132201456502, iteration: 156190
loss: 1.014836311340332,grad_norm: 0.9939993085204838, iteration: 156191
loss: 0.9977587461471558,grad_norm: 0.9999991803570707, iteration: 156192
loss: 0.9903901219367981,grad_norm: 0.9421585582368308, iteration: 156193
loss: 0.9787310361862183,grad_norm: 0.9999989456685778, iteration: 156194
loss: 1.0291943550109863,grad_norm: 0.9712054957233387, iteration: 156195
loss: 1.020752191543579,grad_norm: 0.8769264151500298, iteration: 156196
loss: 0.9758366346359253,grad_norm: 0.9630497797596794, iteration: 156197
loss: 1.0214320421218872,grad_norm: 0.9818422082128001, iteration: 156198
loss: 1.0026799440383911,grad_norm: 0.9999989776221963, iteration: 156199
loss: 1.0089226961135864,grad_norm: 0.9932690894457558, iteration: 156200
loss: 0.9987920522689819,grad_norm: 0.9999990313218279, iteration: 156201
loss: 0.9734188914299011,grad_norm: 0.9999993000584649, iteration: 156202
loss: 1.0276693105697632,grad_norm: 0.9842454142491877, iteration: 156203
loss: 1.0611889362335205,grad_norm: 0.9999997839898321, iteration: 156204
loss: 0.9892823696136475,grad_norm: 0.937047541240068, iteration: 156205
loss: 0.997826874256134,grad_norm: 0.9999991476707394, iteration: 156206
loss: 1.0541990995407104,grad_norm: 0.9999990706425077, iteration: 156207
loss: 0.9745096564292908,grad_norm: 0.999999161306935, iteration: 156208
loss: 1.0157554149627686,grad_norm: 0.8408654968403201, iteration: 156209
loss: 0.9600826501846313,grad_norm: 0.9637237037667457, iteration: 156210
loss: 1.020972728729248,grad_norm: 0.9908429540442745, iteration: 156211
loss: 1.0275324583053589,grad_norm: 0.9999991656299203, iteration: 156212
loss: 0.9794631600379944,grad_norm: 0.9603685050655336, iteration: 156213
loss: 0.9906730055809021,grad_norm: 0.8440901991823104, iteration: 156214
loss: 0.9868558049201965,grad_norm: 0.9652960203900153, iteration: 156215
loss: 1.0102863311767578,grad_norm: 0.9436692408277586, iteration: 156216
loss: 0.9743571281433105,grad_norm: 0.9098318040089141, iteration: 156217
loss: 1.004173994064331,grad_norm: 0.9502208994649705, iteration: 156218
loss: 1.049891710281372,grad_norm: 0.9999993854787418, iteration: 156219
loss: 0.997399091720581,grad_norm: 0.9999991101783118, iteration: 156220
loss: 0.9795031547546387,grad_norm: 0.9999991870637648, iteration: 156221
loss: 1.0077401399612427,grad_norm: 0.9306350530320179, iteration: 156222
loss: 0.9999195337295532,grad_norm: 0.9967432501592179, iteration: 156223
loss: 0.9960095882415771,grad_norm: 0.9071554096096115, iteration: 156224
loss: 0.9828077554702759,grad_norm: 0.9999991864966699, iteration: 156225
loss: 0.9821297526359558,grad_norm: 0.9999989865686968, iteration: 156226
loss: 0.9968620538711548,grad_norm: 0.9999990854945473, iteration: 156227
loss: 0.9806564450263977,grad_norm: 0.9999990753360358, iteration: 156228
loss: 1.0040810108184814,grad_norm: 0.9999993141080752, iteration: 156229
loss: 1.0054478645324707,grad_norm: 0.9999991918527958, iteration: 156230
loss: 0.9826356768608093,grad_norm: 0.9292872818043849, iteration: 156231
loss: 1.0291142463684082,grad_norm: 0.9999991212751623, iteration: 156232
loss: 0.9724422693252563,grad_norm: 0.999999191904212, iteration: 156233
loss: 1.0224813222885132,grad_norm: 0.9388686295600347, iteration: 156234
loss: 0.9988967776298523,grad_norm: 0.9834593283131555, iteration: 156235
loss: 1.0113208293914795,grad_norm: 0.999999011866776, iteration: 156236
loss: 0.9759102463722229,grad_norm: 0.9999990932062816, iteration: 156237
loss: 1.0219144821166992,grad_norm: 0.9595126567654502, iteration: 156238
loss: 1.0081526041030884,grad_norm: 0.8113123752929089, iteration: 156239
loss: 1.024505376815796,grad_norm: 0.9673022140476742, iteration: 156240
loss: 1.0527074337005615,grad_norm: 0.999999184665233, iteration: 156241
loss: 1.0083645582199097,grad_norm: 0.9685792301076604, iteration: 156242
loss: 0.9793540239334106,grad_norm: 0.9999989802115142, iteration: 156243
loss: 1.044023871421814,grad_norm: 0.9999998669398951, iteration: 156244
loss: 0.9860215783119202,grad_norm: 0.9153296499223154, iteration: 156245
loss: 1.0097739696502686,grad_norm: 0.994685785918447, iteration: 156246
loss: 1.0261588096618652,grad_norm: 0.9999992153642833, iteration: 156247
loss: 1.0350781679153442,grad_norm: 0.9495102364351279, iteration: 156248
loss: 0.9910001158714294,grad_norm: 0.9279310757251232, iteration: 156249
loss: 0.9723374843597412,grad_norm: 0.9999990601189107, iteration: 156250
loss: 0.9747412800788879,grad_norm: 0.9999990209162603, iteration: 156251
loss: 1.0025585889816284,grad_norm: 0.9871976459560347, iteration: 156252
loss: 0.9713363647460938,grad_norm: 0.999999156848003, iteration: 156253
loss: 1.020065188407898,grad_norm: 0.9980551353922541, iteration: 156254
loss: 1.0179234743118286,grad_norm: 0.9999992036421596, iteration: 156255
loss: 0.990372359752655,grad_norm: 0.9999989765961246, iteration: 156256
loss: 0.9817649126052856,grad_norm: 0.9999991175539295, iteration: 156257
loss: 1.0160378217697144,grad_norm: 0.9999992041313246, iteration: 156258
loss: 0.993194580078125,grad_norm: 0.99999923727138, iteration: 156259
loss: 1.014390230178833,grad_norm: 0.9162226612246237, iteration: 156260
loss: 1.0031344890594482,grad_norm: 0.9789211175921798, iteration: 156261
loss: 0.9541714787483215,grad_norm: 0.9999992700275849, iteration: 156262
loss: 1.0620492696762085,grad_norm: 0.9999995732630685, iteration: 156263
loss: 1.0067559480667114,grad_norm: 0.9477753519824246, iteration: 156264
loss: 1.0011301040649414,grad_norm: 0.9979172840669585, iteration: 156265
loss: 1.0239434242248535,grad_norm: 0.9048605599184186, iteration: 156266
loss: 1.0282700061798096,grad_norm: 0.9999995021905097, iteration: 156267
loss: 0.9605598449707031,grad_norm: 0.9999992363114059, iteration: 156268
loss: 0.9803909659385681,grad_norm: 0.9999991752273164, iteration: 156269
loss: 0.9598914980888367,grad_norm: 0.9999991714053154, iteration: 156270
loss: 1.033993124961853,grad_norm: 0.9568175959344514, iteration: 156271
loss: 1.1146211624145508,grad_norm: 0.9999994105735113, iteration: 156272
loss: 1.093225359916687,grad_norm: 0.9999991645772134, iteration: 156273
loss: 0.9954573512077332,grad_norm: 0.9999989794019848, iteration: 156274
loss: 1.0132614374160767,grad_norm: 0.7599316757137671, iteration: 156275
loss: 0.9903010725975037,grad_norm: 0.9999992830301305, iteration: 156276
loss: 0.987751305103302,grad_norm: 0.8157174273139384, iteration: 156277
loss: 1.0128761529922485,grad_norm: 0.9999991738151264, iteration: 156278
loss: 1.0013715028762817,grad_norm: 0.9999991308019714, iteration: 156279
loss: 1.0777485370635986,grad_norm: 0.9999993156314197, iteration: 156280
loss: 1.01839280128479,grad_norm: 0.999999085808451, iteration: 156281
loss: 0.9697945713996887,grad_norm: 0.9394206226565135, iteration: 156282
loss: 0.9663691520690918,grad_norm: 0.9999989935609493, iteration: 156283
loss: 1.0517936944961548,grad_norm: 0.9999990898770457, iteration: 156284
loss: 0.9888703227043152,grad_norm: 0.9568020795384153, iteration: 156285
loss: 1.0251710414886475,grad_norm: 0.8225185204515804, iteration: 156286
loss: 0.9825624227523804,grad_norm: 0.9626406550777002, iteration: 156287
loss: 1.0167280435562134,grad_norm: 0.999999141982334, iteration: 156288
loss: 0.9926655292510986,grad_norm: 0.8715794274805367, iteration: 156289
loss: 0.9766502380371094,grad_norm: 0.9638715704404602, iteration: 156290
loss: 0.9700214266777039,grad_norm: 0.9622017633976115, iteration: 156291
loss: 1.0218251943588257,grad_norm: 0.9999992061423952, iteration: 156292
loss: 1.0072882175445557,grad_norm: 0.9999991522036656, iteration: 156293
loss: 1.005553126335144,grad_norm: 0.807072018261256, iteration: 156294
loss: 1.0208518505096436,grad_norm: 0.8851465905454908, iteration: 156295
loss: 1.0246433019638062,grad_norm: 0.9509753007976074, iteration: 156296
loss: 1.020411491394043,grad_norm: 0.8231041072378057, iteration: 156297
loss: 0.9662043452262878,grad_norm: 0.9999991166473408, iteration: 156298
loss: 0.9864606261253357,grad_norm: 0.9999991674882592, iteration: 156299
loss: 0.9990590810775757,grad_norm: 0.9768093766485753, iteration: 156300
loss: 0.9948506951332092,grad_norm: 0.9999994209678608, iteration: 156301
loss: 0.9933050274848938,grad_norm: 0.9999991416675622, iteration: 156302
loss: 1.0336498022079468,grad_norm: 0.9999992191735463, iteration: 156303
loss: 0.9719040989875793,grad_norm: 0.9479298445648303, iteration: 156304
loss: 1.0012766122817993,grad_norm: 0.9999991162367551, iteration: 156305
loss: 1.1227765083312988,grad_norm: 0.9999994929873464, iteration: 156306
loss: 0.9983242154121399,grad_norm: 0.8887655578267819, iteration: 156307
loss: 0.9999279379844666,grad_norm: 0.9910510841306512, iteration: 156308
loss: 1.0465419292449951,grad_norm: 0.9999991520221571, iteration: 156309
loss: 1.0132107734680176,grad_norm: 0.9998780709220042, iteration: 156310
loss: 1.0079156160354614,grad_norm: 0.9999989816698913, iteration: 156311
loss: 0.9966187477111816,grad_norm: 0.9999991238882981, iteration: 156312
loss: 0.9825111031532288,grad_norm: 0.9999992161628337, iteration: 156313
loss: 0.9429420232772827,grad_norm: 0.9999990543136574, iteration: 156314
loss: 1.00345778465271,grad_norm: 0.9999992438112554, iteration: 156315
loss: 0.9724280834197998,grad_norm: 0.883964620887084, iteration: 156316
loss: 0.9660833477973938,grad_norm: 0.9999991695856287, iteration: 156317
loss: 0.9990100860595703,grad_norm: 0.9999991232285147, iteration: 156318
loss: 1.0036647319793701,grad_norm: 0.9999990716391939, iteration: 156319
loss: 0.9779281616210938,grad_norm: 0.9695141696510552, iteration: 156320
loss: 1.0031694173812866,grad_norm: 0.9999991147632208, iteration: 156321
loss: 0.985853374004364,grad_norm: 0.9999999078058357, iteration: 156322
loss: 1.0056507587432861,grad_norm: 0.9999992931702285, iteration: 156323
loss: 0.9874731302261353,grad_norm: 0.9999989948846066, iteration: 156324
loss: 1.0300320386886597,grad_norm: 0.9999991150698119, iteration: 156325
loss: 1.017062783241272,grad_norm: 0.9999992084790406, iteration: 156326
loss: 0.9999678134918213,grad_norm: 0.9363595454519883, iteration: 156327
loss: 1.0106226205825806,grad_norm: 0.9999989758419187, iteration: 156328
loss: 0.9662760496139526,grad_norm: 0.9523239654946973, iteration: 156329
loss: 0.9694122672080994,grad_norm: 0.982469166031275, iteration: 156330
loss: 1.0117932558059692,grad_norm: 0.9999990019347437, iteration: 156331
loss: 1.0183475017547607,grad_norm: 0.9999991584496296, iteration: 156332
loss: 1.0279935598373413,grad_norm: 0.8312095861603146, iteration: 156333
loss: 1.008577823638916,grad_norm: 0.9471505655410807, iteration: 156334
loss: 0.9596394300460815,grad_norm: 0.9999991623523562, iteration: 156335
loss: 1.0154589414596558,grad_norm: 0.9999992031447106, iteration: 156336
loss: 0.9877422451972961,grad_norm: 0.9388443306597192, iteration: 156337
loss: 1.0105936527252197,grad_norm: 0.9622484499739138, iteration: 156338
loss: 0.985202431678772,grad_norm: 0.999999179911601, iteration: 156339
loss: 0.9687359929084778,grad_norm: 0.9999990671530535, iteration: 156340
loss: 1.0302314758300781,grad_norm: 0.9999995372939346, iteration: 156341
loss: 1.061791181564331,grad_norm: 0.9999991127559876, iteration: 156342
loss: 0.9973248243331909,grad_norm: 0.9999990482745146, iteration: 156343
loss: 0.9987786412239075,grad_norm: 0.9361470274720335, iteration: 156344
loss: 1.0071967840194702,grad_norm: 0.9999991615158499, iteration: 156345
loss: 0.969875156879425,grad_norm: 0.9774123994570603, iteration: 156346
loss: 1.0041006803512573,grad_norm: 0.8921476471226464, iteration: 156347
loss: 0.9943955540657043,grad_norm: 0.999999064265262, iteration: 156348
loss: 0.9588918685913086,grad_norm: 0.999999378865778, iteration: 156349
loss: 1.02125084400177,grad_norm: 0.999999118657889, iteration: 156350
loss: 1.0459997653961182,grad_norm: 0.9999989343015272, iteration: 156351
loss: 1.0377520322799683,grad_norm: 0.9999990435999524, iteration: 156352
loss: 0.9961778521537781,grad_norm: 0.9763270597646339, iteration: 156353
loss: 0.9843619465827942,grad_norm: 0.9999993347990687, iteration: 156354
loss: 1.0507465600967407,grad_norm: 0.9999991885445779, iteration: 156355
loss: 0.9743732213973999,grad_norm: 0.9999991200121405, iteration: 156356
loss: 1.04026460647583,grad_norm: 0.9999993043677899, iteration: 156357
loss: 1.0045077800750732,grad_norm: 0.923911453708796, iteration: 156358
loss: 0.9857633709907532,grad_norm: 0.9344393650897579, iteration: 156359
loss: 0.9461573958396912,grad_norm: 0.9645265524350183, iteration: 156360
loss: 0.9967687726020813,grad_norm: 0.9545606498943424, iteration: 156361
loss: 0.993624210357666,grad_norm: 0.9999991349800662, iteration: 156362
loss: 0.9870842099189758,grad_norm: 0.967769757314633, iteration: 156363
loss: 1.060598611831665,grad_norm: 0.9999992250297255, iteration: 156364
loss: 0.9884732961654663,grad_norm: 0.7431939686770888, iteration: 156365
loss: 0.9952085018157959,grad_norm: 0.9999993233648209, iteration: 156366
loss: 1.0254583358764648,grad_norm: 0.9999992950227758, iteration: 156367
loss: 0.9484798312187195,grad_norm: 0.9064186468716592, iteration: 156368
loss: 0.9850306510925293,grad_norm: 0.935841548175015, iteration: 156369
loss: 0.9887406826019287,grad_norm: 0.9457404944268212, iteration: 156370
loss: 0.9629047513008118,grad_norm: 0.9999990966162338, iteration: 156371
loss: 1.0013244152069092,grad_norm: 0.9999991673277807, iteration: 156372
loss: 1.013075590133667,grad_norm: 0.9233268247540701, iteration: 156373
loss: 0.9860643148422241,grad_norm: 0.9999991238588775, iteration: 156374
loss: 0.9907657504081726,grad_norm: 0.9999991398879374, iteration: 156375
loss: 0.9921907186508179,grad_norm: 0.9999990461026328, iteration: 156376
loss: 1.0177348852157593,grad_norm: 0.9999990057474678, iteration: 156377
loss: 0.9784418940544128,grad_norm: 0.9524887267867413, iteration: 156378
loss: 0.9848254919052124,grad_norm: 0.9999991556237003, iteration: 156379
loss: 1.0073438882827759,grad_norm: 0.9999991939849352, iteration: 156380
loss: 0.9852806329727173,grad_norm: 0.8988874524693586, iteration: 156381
loss: 1.0241820812225342,grad_norm: 0.9999989810575517, iteration: 156382
loss: 0.990067183971405,grad_norm: 0.9456778630652223, iteration: 156383
loss: 0.9685094356536865,grad_norm: 0.9999991074000852, iteration: 156384
loss: 1.0235941410064697,grad_norm: 0.9999992857617354, iteration: 156385
loss: 0.9923747181892395,grad_norm: 0.9836376521516922, iteration: 156386
loss: 0.9880632758140564,grad_norm: 0.9999992652289305, iteration: 156387
loss: 1.032393455505371,grad_norm: 0.9275419076103214, iteration: 156388
loss: 1.004156231880188,grad_norm: 0.999999049402547, iteration: 156389
loss: 1.0003306865692139,grad_norm: 0.8595069431508757, iteration: 156390
loss: 1.0352425575256348,grad_norm: 0.8689802913158491, iteration: 156391
loss: 1.009539246559143,grad_norm: 0.9812886834930469, iteration: 156392
loss: 0.9385482668876648,grad_norm: 0.9022093025471006, iteration: 156393
loss: 0.9971503019332886,grad_norm: 0.9999992296683412, iteration: 156394
loss: 1.0163452625274658,grad_norm: 0.9668079170287395, iteration: 156395
loss: 0.9860851168632507,grad_norm: 0.9999990714079452, iteration: 156396
loss: 0.9589788317680359,grad_norm: 0.9781262171758007, iteration: 156397
loss: 1.0021387338638306,grad_norm: 0.999999290217539, iteration: 156398
loss: 0.9872387647628784,grad_norm: 0.9570468627383181, iteration: 156399
loss: 1.0163248777389526,grad_norm: 0.9242865792085636, iteration: 156400
loss: 1.0109126567840576,grad_norm: 0.9999992011936232, iteration: 156401
loss: 0.9898473620414734,grad_norm: 0.9999991047156833, iteration: 156402
loss: 0.992729127407074,grad_norm: 0.999999133576709, iteration: 156403
loss: 0.9590399265289307,grad_norm: 0.8662054728363335, iteration: 156404
loss: 0.9724639058113098,grad_norm: 0.9984089728934199, iteration: 156405
loss: 0.9918680787086487,grad_norm: 0.9107420232139807, iteration: 156406
loss: 0.9946459531784058,grad_norm: 0.9999990488709638, iteration: 156407
loss: 0.9637459516525269,grad_norm: 0.8816199715601174, iteration: 156408
loss: 1.0088224411010742,grad_norm: 0.9999990446568335, iteration: 156409
loss: 1.0338618755340576,grad_norm: 0.9999991736900257, iteration: 156410
loss: 0.9763585925102234,grad_norm: 0.9999993158426639, iteration: 156411
loss: 0.9730962514877319,grad_norm: 0.8960097267827728, iteration: 156412
loss: 0.9723839163780212,grad_norm: 0.999999249693341, iteration: 156413
loss: 0.9873940348625183,grad_norm: 0.9999989177334769, iteration: 156414
loss: 0.9937817454338074,grad_norm: 0.9999993265799749, iteration: 156415
loss: 0.9962830543518066,grad_norm: 0.9999991559688809, iteration: 156416
loss: 1.0218236446380615,grad_norm: 0.9999989657148919, iteration: 156417
loss: 1.0138545036315918,grad_norm: 0.999999194987728, iteration: 156418
loss: 1.0004432201385498,grad_norm: 0.9999991816454408, iteration: 156419
loss: 0.9663967490196228,grad_norm: 0.9999990897432508, iteration: 156420
loss: 1.0154775381088257,grad_norm: 0.9370380772964224, iteration: 156421
loss: 1.0220412015914917,grad_norm: 0.9999991516972833, iteration: 156422
loss: 1.018876075744629,grad_norm: 0.974078070252828, iteration: 156423
loss: 1.0066102743148804,grad_norm: 0.9999990771440581, iteration: 156424
loss: 0.995703399181366,grad_norm: 0.9741975972206172, iteration: 156425
loss: 1.026502013206482,grad_norm: 0.9999990201106095, iteration: 156426
loss: 0.9873645901679993,grad_norm: 0.9999991461197806, iteration: 156427
loss: 1.0101741552352905,grad_norm: 0.9999991427166712, iteration: 156428
loss: 0.966916561126709,grad_norm: 0.9999990784963986, iteration: 156429
loss: 1.0135000944137573,grad_norm: 0.9207719506459896, iteration: 156430
loss: 1.0061798095703125,grad_norm: 0.9999990853413047, iteration: 156431
loss: 1.0175470113754272,grad_norm: 0.8648673832261603, iteration: 156432
loss: 1.0060275793075562,grad_norm: 0.9333309561508873, iteration: 156433
loss: 0.9791565537452698,grad_norm: 0.9999989799372058, iteration: 156434
loss: 1.0165762901306152,grad_norm: 0.9969199173568706, iteration: 156435
loss: 0.9798719882965088,grad_norm: 0.8761095289074584, iteration: 156436
loss: 1.0282386541366577,grad_norm: 0.9999991484451652, iteration: 156437
loss: 0.965088427066803,grad_norm: 0.8624106054838235, iteration: 156438
loss: 0.9963790774345398,grad_norm: 0.8994358022313672, iteration: 156439
loss: 1.036717414855957,grad_norm: 0.9999992662390198, iteration: 156440
loss: 1.033051609992981,grad_norm: 0.9541469533650128, iteration: 156441
loss: 1.026666283607483,grad_norm: 0.9999989069686884, iteration: 156442
loss: 0.968243420124054,grad_norm: 0.9467481156627192, iteration: 156443
loss: 1.0287160873413086,grad_norm: 0.8837965582919649, iteration: 156444
loss: 1.0495985746383667,grad_norm: 0.99999914137172, iteration: 156445
loss: 0.9773209691047668,grad_norm: 0.9999991373435296, iteration: 156446
loss: 0.9918130040168762,grad_norm: 0.8441382241361115, iteration: 156447
loss: 0.9793116450309753,grad_norm: 0.7861004288066443, iteration: 156448
loss: 0.9831399321556091,grad_norm: 0.9483751196722834, iteration: 156449
loss: 1.028444528579712,grad_norm: 0.94637737437935, iteration: 156450
loss: 1.0054640769958496,grad_norm: 0.9999990164622028, iteration: 156451
loss: 0.9980506300926208,grad_norm: 0.8263847292217927, iteration: 156452
loss: 0.9780158996582031,grad_norm: 0.9999991933571607, iteration: 156453
loss: 1.0026843547821045,grad_norm: 0.9999989701974034, iteration: 156454
loss: 1.0045222043991089,grad_norm: 0.9999992609913454, iteration: 156455
loss: 0.9612105488777161,grad_norm: 0.9999992383239616, iteration: 156456
loss: 1.0002098083496094,grad_norm: 0.999999044665742, iteration: 156457
loss: 1.0326725244522095,grad_norm: 0.9534077919467441, iteration: 156458
loss: 1.0034924745559692,grad_norm: 0.9654232728719596, iteration: 156459
loss: 1.0062689781188965,grad_norm: 0.9999991294011865, iteration: 156460
loss: 0.9765551686286926,grad_norm: 0.9999990639546865, iteration: 156461
loss: 0.9758983850479126,grad_norm: 0.999999137250874, iteration: 156462
loss: 0.9988325238227844,grad_norm: 0.9836552167501555, iteration: 156463
loss: 0.9852993488311768,grad_norm: 0.9999991139877199, iteration: 156464
loss: 0.9669939875602722,grad_norm: 0.9999992480147453, iteration: 156465
loss: 0.9900176525115967,grad_norm: 0.9557332644150838, iteration: 156466
loss: 0.9733753204345703,grad_norm: 0.8244123682168222, iteration: 156467
loss: 1.0382736921310425,grad_norm: 0.9999991373427055, iteration: 156468
loss: 1.0522605180740356,grad_norm: 0.9999990208947506, iteration: 156469
loss: 0.991400957107544,grad_norm: 0.9779715264087411, iteration: 156470
loss: 1.0096240043640137,grad_norm: 0.8736383426688475, iteration: 156471
loss: 1.0099331140518188,grad_norm: 0.9999992195400612, iteration: 156472
loss: 0.9808256030082703,grad_norm: 0.9185050249009508, iteration: 156473
loss: 1.1624529361724854,grad_norm: 0.9999991908883681, iteration: 156474
loss: 1.0220531225204468,grad_norm: 0.9999991751105517, iteration: 156475
loss: 1.0049755573272705,grad_norm: 0.9999998733774798, iteration: 156476
loss: 0.9996082186698914,grad_norm: 0.8724146033649929, iteration: 156477
loss: 0.9967740178108215,grad_norm: 0.9999992913271954, iteration: 156478
loss: 1.009720802307129,grad_norm: 0.999999084802485, iteration: 156479
loss: 0.9800594449043274,grad_norm: 0.999999172331534, iteration: 156480
loss: 1.0025343894958496,grad_norm: 0.9724043615598165, iteration: 156481
loss: 1.0276991128921509,grad_norm: 0.8831681279478655, iteration: 156482
loss: 0.9955703616142273,grad_norm: 0.9814929924182855, iteration: 156483
loss: 0.9658634662628174,grad_norm: 0.9999991058588306, iteration: 156484
loss: 1.0372055768966675,grad_norm: 0.9999989867490271, iteration: 156485
loss: 0.9937570095062256,grad_norm: 0.8673263645078737, iteration: 156486
loss: 1.0186036825180054,grad_norm: 0.9999988724432605, iteration: 156487
loss: 1.003772497177124,grad_norm: 0.9929227762200519, iteration: 156488
loss: 1.0293618440628052,grad_norm: 0.9999991837594968, iteration: 156489
loss: 0.9828703999519348,grad_norm: 0.9999990867858823, iteration: 156490
loss: 1.0100059509277344,grad_norm: 0.9999991139659892, iteration: 156491
loss: 0.9885588884353638,grad_norm: 0.9999990562487171, iteration: 156492
loss: 1.0028053522109985,grad_norm: 0.9999991800665944, iteration: 156493
loss: 1.0021284818649292,grad_norm: 0.8976866153401988, iteration: 156494
loss: 1.0143790245056152,grad_norm: 0.9999990779225905, iteration: 156495
loss: 1.0419410467147827,grad_norm: 0.9999991297751901, iteration: 156496
loss: 1.007419228553772,grad_norm: 0.9477040351706341, iteration: 156497
loss: 1.0108418464660645,grad_norm: 0.9999991244498311, iteration: 156498
loss: 0.9962300062179565,grad_norm: 0.99999915552859, iteration: 156499
loss: 0.9900592565536499,grad_norm: 0.999999127812239, iteration: 156500
loss: 0.9886923432350159,grad_norm: 0.9621805143313987, iteration: 156501
loss: 1.0025068521499634,grad_norm: 0.9171042036390903, iteration: 156502
loss: 1.0229648351669312,grad_norm: 0.999999124001984, iteration: 156503
loss: 0.9907914996147156,grad_norm: 0.8299962109767167, iteration: 156504
loss: 1.0178931951522827,grad_norm: 0.8895279138445628, iteration: 156505
loss: 1.0216915607452393,grad_norm: 0.9999993095845412, iteration: 156506
loss: 0.982609748840332,grad_norm: 0.7879898832544739, iteration: 156507
loss: 1.0066827535629272,grad_norm: 0.9999996542983909, iteration: 156508
loss: 1.1286731958389282,grad_norm: 0.9999992407172698, iteration: 156509
loss: 1.0195118188858032,grad_norm: 0.9999990434986269, iteration: 156510
loss: 0.9673375487327576,grad_norm: 0.9999990718492154, iteration: 156511
loss: 0.9723513722419739,grad_norm: 0.9723466579865571, iteration: 156512
loss: 1.0194261074066162,grad_norm: 0.9999991557945568, iteration: 156513
loss: 1.0025545358657837,grad_norm: 0.8431932496861697, iteration: 156514
loss: 0.987381637096405,grad_norm: 0.9999992048009378, iteration: 156515
loss: 1.007784366607666,grad_norm: 0.9935979947236001, iteration: 156516
loss: 0.9992735385894775,grad_norm: 0.999999029095151, iteration: 156517
loss: 1.031126618385315,grad_norm: 0.9999999030258185, iteration: 156518
loss: 0.9680488705635071,grad_norm: 0.9999990942902562, iteration: 156519
loss: 0.9902704358100891,grad_norm: 0.9999990964771377, iteration: 156520
loss: 1.0226562023162842,grad_norm: 0.9999991540692033, iteration: 156521
loss: 0.9762080907821655,grad_norm: 0.9501372356891399, iteration: 156522
loss: 0.9805284738540649,grad_norm: 0.9999990661621246, iteration: 156523
loss: 1.014356017112732,grad_norm: 0.999999179519032, iteration: 156524
loss: 1.0040191411972046,grad_norm: 0.9999990895965734, iteration: 156525
loss: 1.0069197416305542,grad_norm: 0.9099586304471986, iteration: 156526
loss: 0.9773698449134827,grad_norm: 0.9999990197712304, iteration: 156527
loss: 0.9933111667633057,grad_norm: 0.9999993167326715, iteration: 156528
loss: 1.0099042654037476,grad_norm: 0.9999993371328878, iteration: 156529
loss: 1.0057907104492188,grad_norm: 0.9999990229603358, iteration: 156530
loss: 1.0053881406784058,grad_norm: 0.8660006864636877, iteration: 156531
loss: 0.9955495595932007,grad_norm: 0.9999991394511538, iteration: 156532
loss: 1.01250159740448,grad_norm: 0.8274536222370483, iteration: 156533
loss: 0.9739123582839966,grad_norm: 0.9999990797113438, iteration: 156534
loss: 0.9991142153739929,grad_norm: 0.9478228995762118, iteration: 156535
loss: 0.9949711561203003,grad_norm: 0.9999991503175512, iteration: 156536
loss: 0.9687665700912476,grad_norm: 0.9654810447299566, iteration: 156537
loss: 1.007797122001648,grad_norm: 0.9450081332266024, iteration: 156538
loss: 1.013496994972229,grad_norm: 0.7694882930648327, iteration: 156539
loss: 1.0493881702423096,grad_norm: 0.9999993596790208, iteration: 156540
loss: 1.0070699453353882,grad_norm: 0.999999151771716, iteration: 156541
loss: 0.986812174320221,grad_norm: 0.999999239768516, iteration: 156542
loss: 1.0143262147903442,grad_norm: 0.9999992216278857, iteration: 156543
loss: 1.01297128200531,grad_norm: 0.9999993031979566, iteration: 156544
loss: 0.9526172876358032,grad_norm: 0.9999992071579042, iteration: 156545
loss: 0.9938812255859375,grad_norm: 0.9999991266177675, iteration: 156546
loss: 1.0044933557510376,grad_norm: 0.9999992103572998, iteration: 156547
loss: 1.1219524145126343,grad_norm: 0.9999995416464785, iteration: 156548
loss: 1.008522629737854,grad_norm: 0.9999991062417422, iteration: 156549
loss: 1.00235116481781,grad_norm: 0.9999990799587772, iteration: 156550
loss: 0.9855430722236633,grad_norm: 0.9999991076804053, iteration: 156551
loss: 0.9759107828140259,grad_norm: 0.9999990502973922, iteration: 156552
loss: 0.9938892722129822,grad_norm: 0.9507913642698065, iteration: 156553
loss: 0.9926255941390991,grad_norm: 0.9896031819738251, iteration: 156554
loss: 1.010574221611023,grad_norm: 0.9767930753101897, iteration: 156555
loss: 0.9962087869644165,grad_norm: 0.999998971404087, iteration: 156556
loss: 1.0027875900268555,grad_norm: 0.9999990818782073, iteration: 156557
loss: 0.974618673324585,grad_norm: 0.8786197473953418, iteration: 156558
loss: 0.9979797005653381,grad_norm: 0.955441692979532, iteration: 156559
loss: 1.0011204481124878,grad_norm: 0.9363964255186512, iteration: 156560
loss: 1.0278301239013672,grad_norm: 0.9979025915137406, iteration: 156561
loss: 0.9621205925941467,grad_norm: 0.9999989849788175, iteration: 156562
loss: 1.0256943702697754,grad_norm: 0.9999990186544855, iteration: 156563
loss: 0.9906175136566162,grad_norm: 0.9999992017791468, iteration: 156564
loss: 0.9654101133346558,grad_norm: 0.9999991699900705, iteration: 156565
loss: 1.0299618244171143,grad_norm: 0.9999990443374412, iteration: 156566
loss: 0.9809978604316711,grad_norm: 0.9520695809277347, iteration: 156567
loss: 1.0053433179855347,grad_norm: 0.9260296611060073, iteration: 156568
loss: 1.0150402784347534,grad_norm: 0.9999996418318966, iteration: 156569
loss: 1.0016053915023804,grad_norm: 0.8514470862536838, iteration: 156570
loss: 1.0139907598495483,grad_norm: 0.9488763597141602, iteration: 156571
loss: 1.0295734405517578,grad_norm: 0.9999989464409115, iteration: 156572
loss: 1.035959005355835,grad_norm: 0.882858420671214, iteration: 156573
loss: 0.9843186736106873,grad_norm: 0.9193235788883221, iteration: 156574
loss: 1.0057578086853027,grad_norm: 0.9947066097931198, iteration: 156575
loss: 0.993410587310791,grad_norm: 0.8131833237071767, iteration: 156576
loss: 0.9706599712371826,grad_norm: 0.9999992312615879, iteration: 156577
loss: 1.0109955072402954,grad_norm: 0.9412627828057124, iteration: 156578
loss: 0.987938404083252,grad_norm: 0.9999988985438241, iteration: 156579
loss: 0.9950648546218872,grad_norm: 0.9999992324278393, iteration: 156580
loss: 1.0391098260879517,grad_norm: 0.999998996736121, iteration: 156581
loss: 0.9669625163078308,grad_norm: 0.9999988734925168, iteration: 156582
loss: 0.9898701310157776,grad_norm: 0.9215611476810583, iteration: 156583
loss: 0.9905258417129517,grad_norm: 0.9687665748210084, iteration: 156584
loss: 1.0012125968933105,grad_norm: 0.9999991442115956, iteration: 156585
loss: 0.9766300916671753,grad_norm: 0.9999991669346591, iteration: 156586
loss: 0.9952141046524048,grad_norm: 0.9999995991849148, iteration: 156587
loss: 1.0307164192199707,grad_norm: 0.9999995497311472, iteration: 156588
loss: 1.0154708623886108,grad_norm: 0.9999991948013592, iteration: 156589
loss: 1.0033499002456665,grad_norm: 0.884701228988128, iteration: 156590
loss: 0.9579613208770752,grad_norm: 0.9774279475206505, iteration: 156591
loss: 1.0386656522750854,grad_norm: 0.9999991655037491, iteration: 156592
loss: 0.9817827343940735,grad_norm: 0.9162229334584377, iteration: 156593
loss: 1.003852367401123,grad_norm: 0.8199759342397859, iteration: 156594
loss: 1.0132434368133545,grad_norm: 0.9999990626326224, iteration: 156595
loss: 0.9927681684494019,grad_norm: 0.848719433356356, iteration: 156596
loss: 1.0100351572036743,grad_norm: 0.9999992315681611, iteration: 156597
loss: 0.9716471433639526,grad_norm: 0.8814823837010037, iteration: 156598
loss: 1.0184190273284912,grad_norm: 0.9999991239287228, iteration: 156599
loss: 1.0390194654464722,grad_norm: 0.9999990894555701, iteration: 156600
loss: 0.990878164768219,grad_norm: 0.9999992768400875, iteration: 156601
loss: 1.0319751501083374,grad_norm: 0.9999995884351628, iteration: 156602
loss: 0.9732887148857117,grad_norm: 0.9086247904262664, iteration: 156603
loss: 0.9922944903373718,grad_norm: 0.9967774525919434, iteration: 156604
loss: 1.0200942754745483,grad_norm: 0.9222115870589745, iteration: 156605
loss: 0.997337818145752,grad_norm: 0.9999991374919874, iteration: 156606
loss: 1.0067075490951538,grad_norm: 0.9999992920996088, iteration: 156607
loss: 0.9992632865905762,grad_norm: 0.9999992076126405, iteration: 156608
loss: 1.0048658847808838,grad_norm: 0.9711327359450168, iteration: 156609
loss: 1.024501919746399,grad_norm: 0.999999720237205, iteration: 156610
loss: 0.9816232919692993,grad_norm: 0.9396794881203878, iteration: 156611
loss: 0.9872069954872131,grad_norm: 0.9565900010920819, iteration: 156612
loss: 1.0131564140319824,grad_norm: 0.9987311520938359, iteration: 156613
loss: 0.981982409954071,grad_norm: 0.9935146494256658, iteration: 156614
loss: 1.021350383758545,grad_norm: 0.8952200921386567, iteration: 156615
loss: 1.0087833404541016,grad_norm: 0.9999991976558076, iteration: 156616
loss: 1.0119351148605347,grad_norm: 0.93743338779996, iteration: 156617
loss: 0.9921779036521912,grad_norm: 0.9999992072142988, iteration: 156618
loss: 0.9892960786819458,grad_norm: 0.9999989945625296, iteration: 156619
loss: 0.9920998215675354,grad_norm: 0.9999991478440572, iteration: 156620
loss: 1.0030014514923096,grad_norm: 0.9343298153259059, iteration: 156621
loss: 0.9817250370979309,grad_norm: 0.9607016948552455, iteration: 156622
loss: 0.9997594356536865,grad_norm: 0.9182428283163477, iteration: 156623
loss: 1.0206341743469238,grad_norm: 0.9103388734791495, iteration: 156624
loss: 1.0326321125030518,grad_norm: 0.9146158932371361, iteration: 156625
loss: 1.0560822486877441,grad_norm: 0.9999996414866232, iteration: 156626
loss: 0.9737434387207031,grad_norm: 0.9999990139726631, iteration: 156627
loss: 0.9694289565086365,grad_norm: 0.9999990669704861, iteration: 156628
loss: 0.9839678406715393,grad_norm: 0.9999991042238772, iteration: 156629
loss: 0.9798782467842102,grad_norm: 0.9999991947573039, iteration: 156630
loss: 1.0157099962234497,grad_norm: 0.9999990657287312, iteration: 156631
loss: 0.9954031705856323,grad_norm: 0.9753401789022832, iteration: 156632
loss: 0.9090802073478699,grad_norm: 0.9999991992431446, iteration: 156633
loss: 0.9876724481582642,grad_norm: 0.9797924969015589, iteration: 156634
loss: 0.9804896712303162,grad_norm: 0.9999989779364588, iteration: 156635
loss: 0.9971909523010254,grad_norm: 0.9999991332023112, iteration: 156636
loss: 0.9592596292495728,grad_norm: 0.9999990723300218, iteration: 156637
loss: 1.0185248851776123,grad_norm: 0.9999991503427248, iteration: 156638
loss: 0.9896659255027771,grad_norm: 0.9195791157016414, iteration: 156639
loss: 1.0678553581237793,grad_norm: 0.9999990819166641, iteration: 156640
loss: 1.0093345642089844,grad_norm: 0.9999991011848212, iteration: 156641
loss: 1.0103975534439087,grad_norm: 0.8140028645448109, iteration: 156642
loss: 1.0217159986495972,grad_norm: 0.9999992768555803, iteration: 156643
loss: 1.039312720298767,grad_norm: 0.8696347104887884, iteration: 156644
loss: 0.9966462850570679,grad_norm: 0.9999990400343073, iteration: 156645
loss: 1.0401768684387207,grad_norm: 0.9999993380139588, iteration: 156646
loss: 1.0329005718231201,grad_norm: 0.9477701536765272, iteration: 156647
loss: 1.0109877586364746,grad_norm: 0.9999991935531085, iteration: 156648
loss: 1.0327755212783813,grad_norm: 0.8889170334858556, iteration: 156649
loss: 0.992547869682312,grad_norm: 0.9999991503722466, iteration: 156650
loss: 0.9985808730125427,grad_norm: 1.0000000164773741, iteration: 156651
loss: 1.0312691926956177,grad_norm: 0.9999990917764583, iteration: 156652
loss: 0.9768881797790527,grad_norm: 0.8147154694676466, iteration: 156653
loss: 1.0252034664154053,grad_norm: 0.9190366690720441, iteration: 156654
loss: 1.033199667930603,grad_norm: 0.9475850117023181, iteration: 156655
loss: 0.9884130358695984,grad_norm: 0.9552325790760787, iteration: 156656
loss: 0.9665413498878479,grad_norm: 0.999999128276492, iteration: 156657
loss: 0.9615988731384277,grad_norm: 0.832312263030266, iteration: 156658
loss: 1.0063551664352417,grad_norm: 0.9999990744014675, iteration: 156659
loss: 1.0063648223876953,grad_norm: 0.9999991468596888, iteration: 156660
loss: 0.994845986366272,grad_norm: 0.9999991494712428, iteration: 156661
loss: 0.9971899390220642,grad_norm: 0.9999991983262783, iteration: 156662
loss: 0.9921074509620667,grad_norm: 0.9398500679760523, iteration: 156663
loss: 0.9908443093299866,grad_norm: 0.9999989857044733, iteration: 156664
loss: 1.0532335042953491,grad_norm: 0.9999991719614152, iteration: 156665
loss: 1.0375910997390747,grad_norm: 0.9999990869931858, iteration: 156666
loss: 0.9958496689796448,grad_norm: 0.999999122853484, iteration: 156667
loss: 1.036436676979065,grad_norm: 0.9999991144491946, iteration: 156668
loss: 1.000012755393982,grad_norm: 0.9128855804767719, iteration: 156669
loss: 0.9545843601226807,grad_norm: 0.9005417060137574, iteration: 156670
loss: 0.9820799231529236,grad_norm: 0.9999991035575476, iteration: 156671
loss: 0.9918901324272156,grad_norm: 0.9999991615008084, iteration: 156672
loss: 1.0163406133651733,grad_norm: 0.9197456801483859, iteration: 156673
loss: 0.9774812459945679,grad_norm: 0.9999992622749863, iteration: 156674
loss: 1.190882682800293,grad_norm: 0.9999997789756648, iteration: 156675
loss: 1.079452633857727,grad_norm: 0.9999990819042921, iteration: 156676
loss: 1.007907509803772,grad_norm: 0.9160945206912141, iteration: 156677
loss: 1.0039472579956055,grad_norm: 0.8954108532533049, iteration: 156678
loss: 1.0082801580429077,grad_norm: 0.9999990919335296, iteration: 156679
loss: 1.0410600900650024,grad_norm: 0.9999994174094093, iteration: 156680
loss: 1.0359046459197998,grad_norm: 0.9999990985407962, iteration: 156681
loss: 0.9865787029266357,grad_norm: 0.9792778552574589, iteration: 156682
loss: 1.0338448286056519,grad_norm: 0.9999991122490265, iteration: 156683
loss: 0.9835622310638428,grad_norm: 0.9999994271937187, iteration: 156684
loss: 0.9871123433113098,grad_norm: 0.9454462055149268, iteration: 156685
loss: 0.9757107496261597,grad_norm: 0.9999992359512891, iteration: 156686
loss: 1.059361457824707,grad_norm: 0.9999990054201242, iteration: 156687
loss: 1.0291352272033691,grad_norm: 0.9999994027126395, iteration: 156688
loss: 0.9798158407211304,grad_norm: 0.9999990726304467, iteration: 156689
loss: 1.0270174741744995,grad_norm: 0.9999991741989938, iteration: 156690
loss: 1.0228626728057861,grad_norm: 0.9999990723871361, iteration: 156691
loss: 0.9948687553405762,grad_norm: 0.9547656710539273, iteration: 156692
loss: 1.0159145593643188,grad_norm: 0.9999991227683155, iteration: 156693
loss: 0.9845654964447021,grad_norm: 0.999999338216029, iteration: 156694
loss: 1.0164896249771118,grad_norm: 0.9793357287348081, iteration: 156695
loss: 0.9972695112228394,grad_norm: 0.9999992453032125, iteration: 156696
loss: 0.9978007674217224,grad_norm: 0.8641700645624961, iteration: 156697
loss: 0.985550582408905,grad_norm: 0.9999991510828338, iteration: 156698
loss: 1.0836117267608643,grad_norm: 0.9999997505314944, iteration: 156699
loss: 1.0020508766174316,grad_norm: 0.9999990606482213, iteration: 156700
loss: 0.9909564852714539,grad_norm: 0.9824615726629117, iteration: 156701
loss: 0.9763538837432861,grad_norm: 0.8426748023956302, iteration: 156702
loss: 1.026894211769104,grad_norm: 0.9564651615118015, iteration: 156703
loss: 1.0041316747665405,grad_norm: 0.9999992161497724, iteration: 156704
loss: 0.9882046580314636,grad_norm: 0.9999991963654147, iteration: 156705
loss: 1.0907307863235474,grad_norm: 0.8331659871154669, iteration: 156706
loss: 1.0176653861999512,grad_norm: 0.8400421316797815, iteration: 156707
loss: 0.9849708080291748,grad_norm: 0.9327529514760717, iteration: 156708
loss: 0.9802301526069641,grad_norm: 0.9999990480883533, iteration: 156709
loss: 1.0125404596328735,grad_norm: 0.9138337797375027, iteration: 156710
loss: 1.0217218399047852,grad_norm: 0.999999171940913, iteration: 156711
loss: 1.0017119646072388,grad_norm: 0.9833053407833274, iteration: 156712
loss: 0.9914921522140503,grad_norm: 0.8648221497202164, iteration: 156713
loss: 1.0267988443374634,grad_norm: 0.9999990542208482, iteration: 156714
loss: 1.005186915397644,grad_norm: 0.9913994442652707, iteration: 156715
loss: 1.020663857460022,grad_norm: 0.9999990518501173, iteration: 156716
loss: 1.0297966003417969,grad_norm: 0.9999992056227168, iteration: 156717
loss: 1.0064975023269653,grad_norm: 0.9906786416030186, iteration: 156718
loss: 1.006424069404602,grad_norm: 0.8180310839736562, iteration: 156719
loss: 0.982093870639801,grad_norm: 0.9999990955334853, iteration: 156720
loss: 0.9821123480796814,grad_norm: 0.9405342934591733, iteration: 156721
loss: 0.9778120517730713,grad_norm: 0.956788328738909, iteration: 156722
loss: 1.0377576351165771,grad_norm: 0.9999990037411507, iteration: 156723
loss: 1.0372904539108276,grad_norm: 0.9999992110556788, iteration: 156724
loss: 0.9784120917320251,grad_norm: 0.8782252867625546, iteration: 156725
loss: 0.9964014291763306,grad_norm: 0.9999991473084563, iteration: 156726
loss: 0.9550058245658875,grad_norm: 0.9264849566676081, iteration: 156727
loss: 1.0091938972473145,grad_norm: 0.9312679520324948, iteration: 156728
loss: 1.0381615161895752,grad_norm: 0.999999192106208, iteration: 156729
loss: 1.0121397972106934,grad_norm: 0.999999094774862, iteration: 156730
loss: 0.9872440695762634,grad_norm: 0.9999992180580091, iteration: 156731
loss: 0.9643084406852722,grad_norm: 0.9999992511380619, iteration: 156732
loss: 0.9893454313278198,grad_norm: 0.9132772058407819, iteration: 156733
loss: 0.9690529108047485,grad_norm: 0.9999990288087224, iteration: 156734
loss: 1.0282641649246216,grad_norm: 0.9756130748586456, iteration: 156735
loss: 1.0034663677215576,grad_norm: 0.8952414482801431, iteration: 156736
loss: 0.9982699751853943,grad_norm: 0.9999990661936267, iteration: 156737
loss: 0.9900233149528503,grad_norm: 0.8608029845968762, iteration: 156738
loss: 1.0026997327804565,grad_norm: 0.99999919315822, iteration: 156739
loss: 0.9716883897781372,grad_norm: 0.9999990105284587, iteration: 156740
loss: 0.9614520072937012,grad_norm: 0.8859588827283945, iteration: 156741
loss: 0.9991795420646667,grad_norm: 0.999999184573148, iteration: 156742
loss: 0.9978283643722534,grad_norm: 0.8419895673619232, iteration: 156743
loss: 0.97989821434021,grad_norm: 0.9917403217202433, iteration: 156744
loss: 1.0092487335205078,grad_norm: 0.9999990817776151, iteration: 156745
loss: 0.9949495196342468,grad_norm: 0.999999274883063, iteration: 156746
loss: 1.0042637586593628,grad_norm: 0.9999991889797458, iteration: 156747
loss: 0.9779002070426941,grad_norm: 0.9999991268380568, iteration: 156748
loss: 0.9971399307250977,grad_norm: 0.999999048947156, iteration: 156749
loss: 1.013235092163086,grad_norm: 0.9999991979625861, iteration: 156750
loss: 0.9897964000701904,grad_norm: 0.9999992664890279, iteration: 156751
loss: 0.9901965856552124,grad_norm: 0.9872277713435682, iteration: 156752
loss: 1.008528470993042,grad_norm: 0.8701372919712822, iteration: 156753
loss: 0.9954553246498108,grad_norm: 0.8543485026234919, iteration: 156754
loss: 1.009132742881775,grad_norm: 0.8329776813859545, iteration: 156755
loss: 1.0243749618530273,grad_norm: 0.9928476144638823, iteration: 156756
loss: 0.992755651473999,grad_norm: 0.8673029056124604, iteration: 156757
loss: 1.0171242952346802,grad_norm: 0.9999991474784677, iteration: 156758
loss: 1.0232003927230835,grad_norm: 0.9079383995089441, iteration: 156759
loss: 1.0233317613601685,grad_norm: 0.9999991809464245, iteration: 156760
loss: 0.9894723296165466,grad_norm: 0.9560868594174969, iteration: 156761
loss: 0.9785409569740295,grad_norm: 0.9999992659112936, iteration: 156762
loss: 1.0105626583099365,grad_norm: 0.9999991090759567, iteration: 156763
loss: 0.9908723831176758,grad_norm: 0.9999992568438826, iteration: 156764
loss: 0.9913759827613831,grad_norm: 0.796998773863145, iteration: 156765
loss: 1.0233525037765503,grad_norm: 0.99999900568222, iteration: 156766
loss: 0.9909802675247192,grad_norm: 0.9999992392203013, iteration: 156767
loss: 0.9923924207687378,grad_norm: 0.9999992975098277, iteration: 156768
loss: 1.0218662023544312,grad_norm: 0.999999186556457, iteration: 156769
loss: 0.9719004034996033,grad_norm: 0.999999465735382, iteration: 156770
loss: 1.0016310214996338,grad_norm: 0.9999991257238331, iteration: 156771
loss: 1.035784363746643,grad_norm: 0.9143749695062043, iteration: 156772
loss: 1.0020133256912231,grad_norm: 0.9947937770167998, iteration: 156773
loss: 1.0205256938934326,grad_norm: 0.9999991384809901, iteration: 156774
loss: 1.00498628616333,grad_norm: 0.8835281421481002, iteration: 156775
loss: 0.9904643893241882,grad_norm: 0.8718353861398825, iteration: 156776
loss: 0.9816218018531799,grad_norm: 0.9999990318001125, iteration: 156777
loss: 1.0021299123764038,grad_norm: 0.9999994312251297, iteration: 156778
loss: 0.9849244952201843,grad_norm: 0.8810934227399116, iteration: 156779
loss: 1.021438479423523,grad_norm: 0.8495775841965679, iteration: 156780
loss: 1.0219557285308838,grad_norm: 0.9145612231620184, iteration: 156781
loss: 1.0117554664611816,grad_norm: 0.9767002015764122, iteration: 156782
loss: 0.9954575300216675,grad_norm: 0.9999990202548739, iteration: 156783
loss: 1.0430339574813843,grad_norm: 0.9983941790356461, iteration: 156784
loss: 1.0097665786743164,grad_norm: 0.9999991799156887, iteration: 156785
loss: 0.9935329556465149,grad_norm: 0.9814373120091461, iteration: 156786
loss: 1.0293759107589722,grad_norm: 0.9999990882522704, iteration: 156787
loss: 1.0060904026031494,grad_norm: 0.9999990251245879, iteration: 156788
loss: 1.0063412189483643,grad_norm: 0.9999990638895847, iteration: 156789
loss: 0.9789803624153137,grad_norm: 0.9999989722662168, iteration: 156790
loss: 0.9794941544532776,grad_norm: 0.9999990982937432, iteration: 156791
loss: 1.022326111793518,grad_norm: 0.9999992572536994, iteration: 156792
loss: 1.0372300148010254,grad_norm: 0.9999992186932092, iteration: 156793
loss: 0.9995012879371643,grad_norm: 0.8956014641462511, iteration: 156794
loss: 1.0005830526351929,grad_norm: 0.999999060609383, iteration: 156795
loss: 1.0173484086990356,grad_norm: 0.9000492678951386, iteration: 156796
loss: 1.0182795524597168,grad_norm: 0.8885660009777056, iteration: 156797
loss: 1.0168116092681885,grad_norm: 0.9721152895299237, iteration: 156798
loss: 0.9866441488265991,grad_norm: 0.9917768959582478, iteration: 156799
loss: 0.9877205491065979,grad_norm: 0.9999990024119212, iteration: 156800
loss: 1.0203359127044678,grad_norm: 0.8803221030858411, iteration: 156801
loss: 1.0127294063568115,grad_norm: 0.980042809974649, iteration: 156802
loss: 0.9967914819717407,grad_norm: 0.8136147477179025, iteration: 156803
loss: 1.071311116218567,grad_norm: 0.9999992393385404, iteration: 156804
loss: 0.9941550493240356,grad_norm: 0.9999989414060805, iteration: 156805
loss: 1.0029534101486206,grad_norm: 0.999999081716586, iteration: 156806
loss: 1.0135256052017212,grad_norm: 0.9437116895270602, iteration: 156807
loss: 0.9701212644577026,grad_norm: 0.9999991217539557, iteration: 156808
loss: 0.9972872138023376,grad_norm: 0.9999990642328647, iteration: 156809
loss: 0.9989233016967773,grad_norm: 0.8802662336165574, iteration: 156810
loss: 0.9579482078552246,grad_norm: 0.8284772071449819, iteration: 156811
loss: 0.9728684425354004,grad_norm: 0.9999991159414897, iteration: 156812
loss: 0.9985454678535461,grad_norm: 0.9999992224439064, iteration: 156813
loss: 0.9674181342124939,grad_norm: 0.9999992399828179, iteration: 156814
loss: 1.0114002227783203,grad_norm: 0.9999992030700957, iteration: 156815
loss: 1.0288797616958618,grad_norm: 0.9999997964811005, iteration: 156816
loss: 1.0318398475646973,grad_norm: 0.9999992110222853, iteration: 156817
loss: 0.9803128242492676,grad_norm: 0.9999991363605685, iteration: 156818
loss: 0.9687328338623047,grad_norm: 0.9999995300106814, iteration: 156819
loss: 0.9755516648292542,grad_norm: 0.9999991404718365, iteration: 156820
loss: 1.0333129167556763,grad_norm: 0.9359091807903117, iteration: 156821
loss: 1.0086381435394287,grad_norm: 0.9999993312874368, iteration: 156822
loss: 1.0401443243026733,grad_norm: 0.9999991768307331, iteration: 156823
loss: 0.9768002033233643,grad_norm: 0.9191577406333079, iteration: 156824
loss: 0.9762471318244934,grad_norm: 0.9717494363784368, iteration: 156825
loss: 0.9901158809661865,grad_norm: 0.9439930264753419, iteration: 156826
loss: 1.0137786865234375,grad_norm: 0.8425453922981759, iteration: 156827
loss: 1.01447594165802,grad_norm: 0.9470456651939233, iteration: 156828
loss: 1.034091591835022,grad_norm: 0.9468848262357182, iteration: 156829
loss: 0.9843617677688599,grad_norm: 0.9999992570699366, iteration: 156830
loss: 1.0127536058425903,grad_norm: 0.9999989818659676, iteration: 156831
loss: 0.9988237619400024,grad_norm: 0.949638131025663, iteration: 156832
loss: 1.0307060480117798,grad_norm: 0.9999991027533601, iteration: 156833
loss: 1.0001442432403564,grad_norm: 0.9999991974455541, iteration: 156834
loss: 1.026558756828308,grad_norm: 0.9953886864185882, iteration: 156835
loss: 1.011690378189087,grad_norm: 0.9999992642229513, iteration: 156836
loss: 1.0092048645019531,grad_norm: 0.999999097387136, iteration: 156837
loss: 0.9717323184013367,grad_norm: 0.9576784182933479, iteration: 156838
loss: 0.9804280996322632,grad_norm: 0.9999991109555508, iteration: 156839
loss: 1.0411738157272339,grad_norm: 0.9999992443987215, iteration: 156840
loss: 1.005142092704773,grad_norm: 0.9265609177443693, iteration: 156841
loss: 0.9966926574707031,grad_norm: 0.9034344909577416, iteration: 156842
loss: 0.9754596948623657,grad_norm: 0.9999992500525987, iteration: 156843
loss: 1.034577488899231,grad_norm: 0.9999996975812007, iteration: 156844
loss: 0.9932403564453125,grad_norm: 0.999999569656962, iteration: 156845
loss: 1.0196532011032104,grad_norm: 0.8847962242718831, iteration: 156846
loss: 1.0312448740005493,grad_norm: 0.9999989899575011, iteration: 156847
loss: 1.0105023384094238,grad_norm: 0.8916140034704222, iteration: 156848
loss: 0.9976410269737244,grad_norm: 0.9999990252351618, iteration: 156849
loss: 1.0090042352676392,grad_norm: 0.9999990155757755, iteration: 156850
loss: 0.9923534393310547,grad_norm: 0.9999990942740578, iteration: 156851
loss: 0.9895889759063721,grad_norm: 0.9999991213079985, iteration: 156852
loss: 1.0236468315124512,grad_norm: 0.9999990729089475, iteration: 156853
loss: 0.9932604432106018,grad_norm: 0.955695369323605, iteration: 156854
loss: 1.0220599174499512,grad_norm: 0.999999194918575, iteration: 156855
loss: 1.0004976987838745,grad_norm: 0.999999287297045, iteration: 156856
loss: 1.006683111190796,grad_norm: 0.9999992476815426, iteration: 156857
loss: 0.9801332354545593,grad_norm: 0.9999991057695817, iteration: 156858
loss: 1.0271705389022827,grad_norm: 0.9999992053015546, iteration: 156859
loss: 1.0000005960464478,grad_norm: 0.9276616394019328, iteration: 156860
loss: 1.0131508111953735,grad_norm: 0.8655502462391255, iteration: 156861
loss: 1.024462342262268,grad_norm: 0.9324382815903678, iteration: 156862
loss: 0.9730017781257629,grad_norm: 0.9999991413575975, iteration: 156863
loss: 1.0004088878631592,grad_norm: 0.9999989467020479, iteration: 156864
loss: 0.9942905902862549,grad_norm: 0.9999994434797638, iteration: 156865
loss: 1.0197241306304932,grad_norm: 0.9999991710411166, iteration: 156866
loss: 1.0069379806518555,grad_norm: 0.9999990391152579, iteration: 156867
loss: 0.9928772449493408,grad_norm: 0.9999992527736982, iteration: 156868
loss: 0.9761548638343811,grad_norm: 0.9999992097372676, iteration: 156869
loss: 0.9800795912742615,grad_norm: 0.9073882089798581, iteration: 156870
loss: 0.9891323447227478,grad_norm: 0.99999919932134, iteration: 156871
loss: 1.044755458831787,grad_norm: 0.9999992064965604, iteration: 156872
loss: 1.0143965482711792,grad_norm: 0.9253895430314658, iteration: 156873
loss: 1.0045547485351562,grad_norm: 0.8805274826972288, iteration: 156874
loss: 1.0079056024551392,grad_norm: 0.9999992020650641, iteration: 156875
loss: 1.0767868757247925,grad_norm: 0.9999995023793211, iteration: 156876
loss: 0.9757972955703735,grad_norm: 0.9999992425910307, iteration: 156877
loss: 0.9771293997764587,grad_norm: 0.9999992157369134, iteration: 156878
loss: 1.0590403079986572,grad_norm: 0.9999993035252762, iteration: 156879
loss: 1.0034472942352295,grad_norm: 0.9549565595099426, iteration: 156880
loss: 0.9536449909210205,grad_norm: 0.996239799071876, iteration: 156881
loss: 1.0212326049804688,grad_norm: 0.9548321335037436, iteration: 156882
loss: 1.0249391794204712,grad_norm: 0.8756575625376877, iteration: 156883
loss: 0.9930550456047058,grad_norm: 0.9805924160811681, iteration: 156884
loss: 1.0064241886138916,grad_norm: 0.982035231744845, iteration: 156885
loss: 0.9845976829528809,grad_norm: 0.910154119393719, iteration: 156886
loss: 1.0149540901184082,grad_norm: 0.9072388680913812, iteration: 156887
loss: 1.0098518133163452,grad_norm: 0.99999951344079, iteration: 156888
loss: 0.991920530796051,grad_norm: 0.9007144837359724, iteration: 156889
loss: 0.9993970990180969,grad_norm: 0.9999991415243222, iteration: 156890
loss: 1.0008811950683594,grad_norm: 0.9999991154850607, iteration: 156891
loss: 1.0214824676513672,grad_norm: 0.9734334612220737, iteration: 156892
loss: 0.9948282837867737,grad_norm: 0.9349071787430178, iteration: 156893
loss: 1.0069173574447632,grad_norm: 0.9999991776309413, iteration: 156894
loss: 1.0260510444641113,grad_norm: 0.9059216433089219, iteration: 156895
loss: 1.0166734457015991,grad_norm: 0.9999991326076867, iteration: 156896
loss: 1.0286457538604736,grad_norm: 0.9999995473473458, iteration: 156897
loss: 1.042819619178772,grad_norm: 0.9999992136854491, iteration: 156898
loss: 0.9820970296859741,grad_norm: 0.9999990983907188, iteration: 156899
loss: 0.9422374367713928,grad_norm: 0.9999990887220936, iteration: 156900
loss: 0.9965066313743591,grad_norm: 0.9718389665274331, iteration: 156901
loss: 0.9603517651557922,grad_norm: 0.9190751413244524, iteration: 156902
loss: 1.0101755857467651,grad_norm: 0.9999989768794071, iteration: 156903
loss: 1.009812593460083,grad_norm: 0.9146656267517753, iteration: 156904
loss: 0.9978111386299133,grad_norm: 0.9493063721638108, iteration: 156905
loss: 0.9868938326835632,grad_norm: 0.9461552874085878, iteration: 156906
loss: 1.0119011402130127,grad_norm: 0.8531050444555422, iteration: 156907
loss: 1.0051771402359009,grad_norm: 0.910989176347991, iteration: 156908
loss: 0.9940710663795471,grad_norm: 0.8677745184307366, iteration: 156909
loss: 1.0276561975479126,grad_norm: 0.9999992004358654, iteration: 156910
loss: 0.9819276928901672,grad_norm: 0.9999989816588276, iteration: 156911
loss: 1.012270450592041,grad_norm: 0.9999990919788172, iteration: 156912
loss: 0.9807150363922119,grad_norm: 0.7882888949541738, iteration: 156913
loss: 0.9996009469032288,grad_norm: 0.9999991051398135, iteration: 156914
loss: 1.0257055759429932,grad_norm: 0.9999989598153005, iteration: 156915
loss: 1.0169137716293335,grad_norm: 0.9999990837167566, iteration: 156916
loss: 1.0023319721221924,grad_norm: 0.9999990102752485, iteration: 156917
loss: 0.9710108637809753,grad_norm: 0.9999989529403013, iteration: 156918
loss: 1.0087367296218872,grad_norm: 0.9999991051565186, iteration: 156919
loss: 1.0140419006347656,grad_norm: 0.9512945899145506, iteration: 156920
loss: 0.98777174949646,grad_norm: 0.9999990821717344, iteration: 156921
loss: 1.0085947513580322,grad_norm: 0.9999992567750897, iteration: 156922
loss: 0.9677446484565735,grad_norm: 0.999999115549353, iteration: 156923
loss: 0.9699978232383728,grad_norm: 0.9999990425506503, iteration: 156924
loss: 1.0077608823776245,grad_norm: 0.9999991802638765, iteration: 156925
loss: 1.0139051675796509,grad_norm: 0.9999992533128497, iteration: 156926
loss: 0.9955101013183594,grad_norm: 0.999999553365195, iteration: 156927
loss: 0.9789751768112183,grad_norm: 0.9999992139228117, iteration: 156928
loss: 0.9935775399208069,grad_norm: 0.9999990063954718, iteration: 156929
loss: 0.9971000552177429,grad_norm: 0.9999991196744984, iteration: 156930
loss: 0.9935761094093323,grad_norm: 0.9408739371231982, iteration: 156931
loss: 0.9867479801177979,grad_norm: 0.9539986136903257, iteration: 156932
loss: 0.9901865124702454,grad_norm: 0.9999990544619816, iteration: 156933
loss: 0.9928926825523376,grad_norm: 0.9999989985000782, iteration: 156934
loss: 1.0301471948623657,grad_norm: 0.915643872554319, iteration: 156935
loss: 1.0157700777053833,grad_norm: 0.99999927317078, iteration: 156936
loss: 1.032644510269165,grad_norm: 0.9999992050679751, iteration: 156937
loss: 0.9777174592018127,grad_norm: 0.9999990690109235, iteration: 156938
loss: 1.028412938117981,grad_norm: 0.8757643926923054, iteration: 156939
loss: 1.0673683881759644,grad_norm: 0.9999991538545348, iteration: 156940
loss: 1.0168949365615845,grad_norm: 0.9897543154068631, iteration: 156941
loss: 0.9930719137191772,grad_norm: 0.8574838970821985, iteration: 156942
loss: 1.012389063835144,grad_norm: 0.9999992806781544, iteration: 156943
loss: 1.0318231582641602,grad_norm: 0.9999990726788698, iteration: 156944
loss: 0.9891319870948792,grad_norm: 0.9999989763919449, iteration: 156945
loss: 1.0276657342910767,grad_norm: 0.86818266831738, iteration: 156946
loss: 1.0168894529342651,grad_norm: 0.9533506940082757, iteration: 156947
loss: 0.9747940897941589,grad_norm: 0.999999061538245, iteration: 156948
loss: 0.9711810350418091,grad_norm: 0.9104525440108955, iteration: 156949
loss: 0.9753239154815674,grad_norm: 0.896675636337116, iteration: 156950
loss: 0.9908982515335083,grad_norm: 0.9722293151857159, iteration: 156951
loss: 1.00986647605896,grad_norm: 0.990460404077793, iteration: 156952
loss: 0.9775496125221252,grad_norm: 0.9999991953446824, iteration: 156953
loss: 1.0153164863586426,grad_norm: 0.9940074545685958, iteration: 156954
loss: 1.0137468576431274,grad_norm: 0.9999990336003697, iteration: 156955
loss: 1.0073882341384888,grad_norm: 0.971095184488416, iteration: 156956
loss: 0.9831169843673706,grad_norm: 0.9823060576305981, iteration: 156957
loss: 1.02641761302948,grad_norm: 0.999999283121374, iteration: 156958
loss: 0.9922683835029602,grad_norm: 0.9765745149670864, iteration: 156959
loss: 0.9920634031295776,grad_norm: 0.999999043220454, iteration: 156960
loss: 1.0213549137115479,grad_norm: 0.7859854224114664, iteration: 156961
loss: 0.9981712698936462,grad_norm: 0.8911093512078894, iteration: 156962
loss: 0.9848904609680176,grad_norm: 0.9999990668620544, iteration: 156963
loss: 1.0378994941711426,grad_norm: 0.9083751947351448, iteration: 156964
loss: 1.016723871231079,grad_norm: 0.9999994059551162, iteration: 156965
loss: 0.9813605546951294,grad_norm: 0.9866264356730606, iteration: 156966
loss: 1.021952748298645,grad_norm: 0.9651123105937914, iteration: 156967
loss: 0.9873830080032349,grad_norm: 0.9994484482711536, iteration: 156968
loss: 0.9800043106079102,grad_norm: 0.803831264331256, iteration: 156969
loss: 1.0151078701019287,grad_norm: 0.8953837675005086, iteration: 156970
loss: 0.9916159510612488,grad_norm: 0.9006188684273542, iteration: 156971
loss: 0.9905048608779907,grad_norm: 0.829364560495176, iteration: 156972
loss: 1.009300708770752,grad_norm: 0.9668432664087895, iteration: 156973
loss: 0.9792634844779968,grad_norm: 0.9999989572311755, iteration: 156974
loss: 1.0138243436813354,grad_norm: 0.8679209827336319, iteration: 156975
loss: 1.0235176086425781,grad_norm: 0.999998994639943, iteration: 156976
loss: 1.018520474433899,grad_norm: 0.9999990985894408, iteration: 156977
loss: 1.0046751499176025,grad_norm: 0.9002523940630124, iteration: 156978
loss: 1.0194965600967407,grad_norm: 0.9999991866870435, iteration: 156979
loss: 0.982233464717865,grad_norm: 0.9999990313847953, iteration: 156980
loss: 1.032299280166626,grad_norm: 0.9822917238563934, iteration: 156981
loss: 0.9938641786575317,grad_norm: 0.9999989949000179, iteration: 156982
loss: 1.0169800519943237,grad_norm: 0.9999992316670124, iteration: 156983
loss: 1.0138605833053589,grad_norm: 0.9958886580059503, iteration: 156984
loss: 1.0132330656051636,grad_norm: 0.9999989227364087, iteration: 156985
loss: 0.9664524793624878,grad_norm: 0.9999990664254068, iteration: 156986
loss: 1.020240306854248,grad_norm: 0.8864291139718158, iteration: 156987
loss: 0.9735497236251831,grad_norm: 0.9999990910524619, iteration: 156988
loss: 1.0485320091247559,grad_norm: 0.9999992015360715, iteration: 156989
loss: 1.005436658859253,grad_norm: 0.9999990732775591, iteration: 156990
loss: 1.010567545890808,grad_norm: 0.9806862916097341, iteration: 156991
loss: 0.9983834624290466,grad_norm: 0.9999992819545176, iteration: 156992
loss: 0.9657644629478455,grad_norm: 0.999999277324535, iteration: 156993
loss: 0.9616554379463196,grad_norm: 0.9999991145501413, iteration: 156994
loss: 0.9819342494010925,grad_norm: 0.9999992067471534, iteration: 156995
loss: 1.0082981586456299,grad_norm: 0.9999992995149106, iteration: 156996
loss: 0.9904651641845703,grad_norm: 0.99999924050377, iteration: 156997
loss: 0.9768111109733582,grad_norm: 0.9117878287852665, iteration: 156998
loss: 1.3537968397140503,grad_norm: 0.9999997300251572, iteration: 156999
loss: 0.9986428618431091,grad_norm: 0.9999991782719111, iteration: 157000
loss: 1.0054664611816406,grad_norm: 0.9488055600455579, iteration: 157001
loss: 1.007340669631958,grad_norm: 0.9999991669814302, iteration: 157002
loss: 1.00155770778656,grad_norm: 0.9606972083624817, iteration: 157003
loss: 1.0318692922592163,grad_norm: 0.9999995436669716, iteration: 157004
loss: 1.0334938764572144,grad_norm: 0.9619156686631275, iteration: 157005
loss: 0.9707736372947693,grad_norm: 0.9794518466461165, iteration: 157006
loss: 0.9867110252380371,grad_norm: 0.9696935336884807, iteration: 157007
loss: 0.9901275634765625,grad_norm: 0.9999989826064163, iteration: 157008
loss: 0.9739806056022644,grad_norm: 0.9962705852016015, iteration: 157009
loss: 1.0007246732711792,grad_norm: 0.9999990319270643, iteration: 157010
loss: 1.008948802947998,grad_norm: 0.9999990534222614, iteration: 157011
loss: 1.026435136795044,grad_norm: 0.9999991698546261, iteration: 157012
loss: 0.9839537143707275,grad_norm: 0.931450274942903, iteration: 157013
loss: 0.99702388048172,grad_norm: 0.9999991978499104, iteration: 157014
loss: 0.9818019270896912,grad_norm: 0.9931479996349759, iteration: 157015
loss: 0.9733954071998596,grad_norm: 0.9999992403899918, iteration: 157016
loss: 0.9890569448471069,grad_norm: 0.9599459662573447, iteration: 157017
loss: 1.0434441566467285,grad_norm: 0.9999995451063967, iteration: 157018
loss: 1.013297438621521,grad_norm: 0.9999991536948897, iteration: 157019
loss: 0.9746822714805603,grad_norm: 0.9999991071373918, iteration: 157020
loss: 0.9985413551330566,grad_norm: 0.9796560070753022, iteration: 157021
loss: 1.010194182395935,grad_norm: 0.9999991078145433, iteration: 157022
loss: 0.989881694316864,grad_norm: 0.8676304220572257, iteration: 157023
loss: 0.9966681003570557,grad_norm: 0.950746582557552, iteration: 157024
loss: 1.0263938903808594,grad_norm: 0.9999995001592502, iteration: 157025
loss: 1.0000498294830322,grad_norm: 0.9999992854684382, iteration: 157026
loss: 0.9885581135749817,grad_norm: 0.9495016771895458, iteration: 157027
loss: 0.9907370209693909,grad_norm: 0.999998984195762, iteration: 157028
loss: 0.9883683919906616,grad_norm: 0.9999992253454774, iteration: 157029
loss: 1.018949031829834,grad_norm: 0.9612166540749892, iteration: 157030
loss: 0.9975928664207458,grad_norm: 0.9832186836796474, iteration: 157031
loss: 0.962162435054779,grad_norm: 0.8982616362682665, iteration: 157032
loss: 0.9655715227127075,grad_norm: 0.999999167778415, iteration: 157033
loss: 0.9972819685935974,grad_norm: 0.9978294370123576, iteration: 157034
loss: 1.0285171270370483,grad_norm: 0.999999113138315, iteration: 157035
loss: 0.9768121242523193,grad_norm: 0.8512323982515853, iteration: 157036
loss: 1.0253143310546875,grad_norm: 0.9999997123650687, iteration: 157037
loss: 1.0185378789901733,grad_norm: 0.9905562486510174, iteration: 157038
loss: 1.006925344467163,grad_norm: 0.8644594719518913, iteration: 157039
loss: 0.9664360284805298,grad_norm: 0.9999991967645375, iteration: 157040
loss: 0.9986658692359924,grad_norm: 0.944083744569292, iteration: 157041
loss: 0.9866641759872437,grad_norm: 0.9046022003844054, iteration: 157042
loss: 0.9551069736480713,grad_norm: 0.9999989713561267, iteration: 157043
loss: 0.9798218607902527,grad_norm: 0.9931339022439365, iteration: 157044
loss: 0.9955553412437439,grad_norm: 0.9999990018287851, iteration: 157045
loss: 1.0047858953475952,grad_norm: 0.8763495069001688, iteration: 157046
loss: 0.9802868962287903,grad_norm: 0.9999992665554209, iteration: 157047
loss: 0.970074474811554,grad_norm: 0.9508654077749344, iteration: 157048
loss: 0.9857905507087708,grad_norm: 0.9999991204803558, iteration: 157049
loss: 0.9715674519538879,grad_norm: 0.9999990798151105, iteration: 157050
loss: 0.963545560836792,grad_norm: 0.8290088149858915, iteration: 157051
loss: 1.014258861541748,grad_norm: 0.9450171043672126, iteration: 157052
loss: 0.9988066554069519,grad_norm: 0.9281347445695561, iteration: 157053
loss: 1.0152370929718018,grad_norm: 0.9981072868967507, iteration: 157054
loss: 1.0070747137069702,grad_norm: 0.9623165169823882, iteration: 157055
loss: 0.9580603241920471,grad_norm: 0.9383977598798513, iteration: 157056
loss: 0.9973766803741455,grad_norm: 0.9671018896274269, iteration: 157057
loss: 1.0168200731277466,grad_norm: 0.9656269441250669, iteration: 157058
loss: 1.0064045190811157,grad_norm: 0.999999204022638, iteration: 157059
loss: 0.98382568359375,grad_norm: 0.9999989685916136, iteration: 157060
loss: 1.1285300254821777,grad_norm: 0.9999997336781957, iteration: 157061
loss: 0.9725583791732788,grad_norm: 0.8819109231211818, iteration: 157062
loss: 1.0163809061050415,grad_norm: 0.9999990240875657, iteration: 157063
loss: 1.018694281578064,grad_norm: 0.9999991182252473, iteration: 157064
loss: 1.0040875673294067,grad_norm: 0.9188338882969624, iteration: 157065
loss: 1.10099458694458,grad_norm: 0.9999990905777132, iteration: 157066
loss: 1.0014585256576538,grad_norm: 0.981102203262715, iteration: 157067
loss: 0.9591286182403564,grad_norm: 0.9129725906484296, iteration: 157068
loss: 1.0638550519943237,grad_norm: 0.8971937056814635, iteration: 157069
loss: 0.9899446368217468,grad_norm: 0.9289817832805123, iteration: 157070
loss: 1.028454303741455,grad_norm: 0.9999993264159318, iteration: 157071
loss: 0.9899857044219971,grad_norm: 0.9286533720257554, iteration: 157072
loss: 1.0126620531082153,grad_norm: 0.999999203488815, iteration: 157073
loss: 1.0061789751052856,grad_norm: 0.9065191441265107, iteration: 157074
loss: 1.0264086723327637,grad_norm: 0.9620261659677974, iteration: 157075
loss: 0.9887906312942505,grad_norm: 0.848281609087224, iteration: 157076
loss: 1.0318015813827515,grad_norm: 0.9999995327975706, iteration: 157077
loss: 1.0061348676681519,grad_norm: 0.9816640040230905, iteration: 157078
loss: 0.9807877540588379,grad_norm: 0.9207884212441628, iteration: 157079
loss: 0.988490104675293,grad_norm: 0.9999991084277923, iteration: 157080
loss: 0.9813955426216125,grad_norm: 0.9653516140747619, iteration: 157081
loss: 0.958743691444397,grad_norm: 0.9999990408586366, iteration: 157082
loss: 0.9704856276512146,grad_norm: 0.9847528104420997, iteration: 157083
loss: 1.0516515970230103,grad_norm: 0.9999993341537347, iteration: 157084
loss: 0.9717139601707458,grad_norm: 0.9589373206039569, iteration: 157085
loss: 0.975251317024231,grad_norm: 0.8613165972923009, iteration: 157086
loss: 0.9495207071304321,grad_norm: 0.9669316027806847, iteration: 157087
loss: 1.0121920108795166,grad_norm: 0.9999991447698646, iteration: 157088
loss: 0.9947240352630615,grad_norm: 0.953765645259146, iteration: 157089
loss: 0.9866457581520081,grad_norm: 0.9824379195949495, iteration: 157090
loss: 1.0320433378219604,grad_norm: 0.9999993177988834, iteration: 157091
loss: 1.0267837047576904,grad_norm: 0.9904858921123993, iteration: 157092
loss: 0.9846315383911133,grad_norm: 0.9999991090820273, iteration: 157093
loss: 1.021559715270996,grad_norm: 0.997482996075747, iteration: 157094
loss: 0.9749003052711487,grad_norm: 0.8744139317701571, iteration: 157095
loss: 1.0437365770339966,grad_norm: 0.999999094104148, iteration: 157096
loss: 1.0299257040023804,grad_norm: 0.9520937662975332, iteration: 157097
loss: 0.9994840025901794,grad_norm: 0.9142618548402105, iteration: 157098
loss: 0.9563418030738831,grad_norm: 0.9222132586159884, iteration: 157099
loss: 0.9577511548995972,grad_norm: 0.9999990937950521, iteration: 157100
loss: 0.9627608060836792,grad_norm: 0.9999990102601541, iteration: 157101
loss: 1.0047447681427002,grad_norm: 0.8611511217484852, iteration: 157102
loss: 0.9824720621109009,grad_norm: 0.9999990584538271, iteration: 157103
loss: 1.0175889730453491,grad_norm: 0.999999135711805, iteration: 157104
loss: 0.9993560314178467,grad_norm: 0.9260378692903135, iteration: 157105
loss: 0.9691383838653564,grad_norm: 0.9301161030298446, iteration: 157106
loss: 1.0120570659637451,grad_norm: 0.9999991907494208, iteration: 157107
loss: 0.9860545992851257,grad_norm: 0.99999909141909, iteration: 157108
loss: 0.9898089170455933,grad_norm: 0.9999991323008839, iteration: 157109
loss: 0.9983044266700745,grad_norm: 0.9056688864662006, iteration: 157110
loss: 1.0185173749923706,grad_norm: 0.9999991116929052, iteration: 157111
loss: 1.0129047632217407,grad_norm: 0.9999992090297783, iteration: 157112
loss: 0.9890449643135071,grad_norm: 0.9999991035682089, iteration: 157113
loss: 0.9644356369972229,grad_norm: 0.9999990935126245, iteration: 157114
loss: 1.0295658111572266,grad_norm: 0.8957549094397242, iteration: 157115
loss: 1.0447558164596558,grad_norm: 0.9734519667007309, iteration: 157116
loss: 1.0067087411880493,grad_norm: 0.843441617048049, iteration: 157117
loss: 1.00654137134552,grad_norm: 0.918350988972791, iteration: 157118
loss: 1.0412089824676514,grad_norm: 0.9999991628217977, iteration: 157119
loss: 1.0244684219360352,grad_norm: 0.999999175423886, iteration: 157120
loss: 1.0045530796051025,grad_norm: 0.9999991899539786, iteration: 157121
loss: 1.0092979669570923,grad_norm: 0.8786072507962565, iteration: 157122
loss: 1.0025808811187744,grad_norm: 0.999999051593719, iteration: 157123
loss: 0.9941930770874023,grad_norm: 0.9741309508002327, iteration: 157124
loss: 0.9623695611953735,grad_norm: 0.9999989825003566, iteration: 157125
loss: 1.0054237842559814,grad_norm: 0.9999991065957616, iteration: 157126
loss: 1.0355795621871948,grad_norm: 0.9081191180413436, iteration: 157127
loss: 0.9771348834037781,grad_norm: 0.8941194213124037, iteration: 157128
loss: 1.0220986604690552,grad_norm: 0.9258063037355365, iteration: 157129
loss: 0.985847532749176,grad_norm: 0.9999990414698796, iteration: 157130
loss: 0.997668445110321,grad_norm: 0.9308503973619899, iteration: 157131
loss: 0.9910578727722168,grad_norm: 0.9999992940209932, iteration: 157132
loss: 1.0047441720962524,grad_norm: 0.9999993278318691, iteration: 157133
loss: 0.9970939755439758,grad_norm: 0.9999991779328908, iteration: 157134
loss: 1.0469064712524414,grad_norm: 0.9999994431592341, iteration: 157135
loss: 1.005638599395752,grad_norm: 0.9999989637301465, iteration: 157136
loss: 1.014349341392517,grad_norm: 0.9999992091972982, iteration: 157137
loss: 1.0104498863220215,grad_norm: 0.8130144182670528, iteration: 157138
loss: 0.9690079689025879,grad_norm: 0.8761288276723231, iteration: 157139
loss: 0.9999622106552124,grad_norm: 0.9999989978923358, iteration: 157140
loss: 1.015397548675537,grad_norm: 0.9999990910916167, iteration: 157141
loss: 1.0213967561721802,grad_norm: 0.9549263827917349, iteration: 157142
loss: 1.010091781616211,grad_norm: 0.9999992965214315, iteration: 157143
loss: 0.9943695068359375,grad_norm: 0.9966966076331959, iteration: 157144
loss: 0.984515368938446,grad_norm: 0.9956389418348873, iteration: 157145
loss: 1.0264359712600708,grad_norm: 0.7984746899184242, iteration: 157146
loss: 1.0156066417694092,grad_norm: 0.9641907386206888, iteration: 157147
loss: 0.986258327960968,grad_norm: 0.9999992834445078, iteration: 157148
loss: 0.9695364236831665,grad_norm: 0.9551947669209971, iteration: 157149
loss: 0.9881805777549744,grad_norm: 0.9999990959660447, iteration: 157150
loss: 1.0188896656036377,grad_norm: 0.9488268643998822, iteration: 157151
loss: 1.0142021179199219,grad_norm: 0.8275885787754366, iteration: 157152
loss: 1.006742000579834,grad_norm: 0.9999991810812894, iteration: 157153
loss: 0.9869434833526611,grad_norm: 0.9263465081921249, iteration: 157154
loss: 0.9752215147018433,grad_norm: 0.9692052678673809, iteration: 157155
loss: 1.0150172710418701,grad_norm: 0.9999991146112006, iteration: 157156
loss: 1.0310242176055908,grad_norm: 0.9999991632799483, iteration: 157157
loss: 0.9711169600486755,grad_norm: 0.926147982834495, iteration: 157158
loss: 1.0094521045684814,grad_norm: 0.9999990686987295, iteration: 157159
loss: 0.9969365000724792,grad_norm: 0.9999990657232238, iteration: 157160
loss: 1.01616632938385,grad_norm: 0.9360870315829919, iteration: 157161
loss: 0.9536292552947998,grad_norm: 0.9999990834444443, iteration: 157162
loss: 0.9834240078926086,grad_norm: 0.9999997665274076, iteration: 157163
loss: 0.9947916269302368,grad_norm: 0.9999990218143436, iteration: 157164
loss: 1.0245877504348755,grad_norm: 0.9999991236355983, iteration: 157165
loss: 1.0260264873504639,grad_norm: 0.9436989619538093, iteration: 157166
loss: 1.010574221611023,grad_norm: 0.9999991625649146, iteration: 157167
loss: 1.000419020652771,grad_norm: 0.9823275179621304, iteration: 157168
loss: 1.0291047096252441,grad_norm: 0.9999990671096168, iteration: 157169
loss: 1.0004364252090454,grad_norm: 0.9226774181862573, iteration: 157170
loss: 0.9947770833969116,grad_norm: 0.9999990024992184, iteration: 157171
loss: 0.9952304363250732,grad_norm: 0.9999990233100435, iteration: 157172
loss: 1.0237705707550049,grad_norm: 0.9999989320852777, iteration: 157173
loss: 0.9859691262245178,grad_norm: 0.999999028010881, iteration: 157174
loss: 1.0152848958969116,grad_norm: 0.9999991268227463, iteration: 157175
loss: 1.0016679763793945,grad_norm: 0.8254875325848787, iteration: 157176
loss: 0.9867381453514099,grad_norm: 0.8994502143909539, iteration: 157177
loss: 0.9726499319076538,grad_norm: 0.9640816156131781, iteration: 157178
loss: 1.0215728282928467,grad_norm: 0.9999990718570485, iteration: 157179
loss: 1.0257506370544434,grad_norm: 0.9979187250660833, iteration: 157180
loss: 1.019518256187439,grad_norm: 0.9608962511702636, iteration: 157181
loss: 1.0342766046524048,grad_norm: 0.8990964568446054, iteration: 157182
loss: 0.9923679828643799,grad_norm: 0.9681177865079412, iteration: 157183
loss: 0.9756735563278198,grad_norm: 0.9999991411437307, iteration: 157184
loss: 1.0419878959655762,grad_norm: 0.8809958989718979, iteration: 157185
loss: 0.9773051142692566,grad_norm: 0.8587282467135051, iteration: 157186
loss: 1.0083945989608765,grad_norm: 0.9999991093647698, iteration: 157187
loss: 0.9871281981468201,grad_norm: 0.9999990502256988, iteration: 157188
loss: 1.0074249505996704,grad_norm: 0.9999992828318771, iteration: 157189
loss: 0.9831694960594177,grad_norm: 0.9559573202840491, iteration: 157190
loss: 1.0056228637695312,grad_norm: 0.9596900158092722, iteration: 157191
loss: 1.0341582298278809,grad_norm: 0.9289406696505231, iteration: 157192
loss: 1.013163447380066,grad_norm: 0.8702335633468793, iteration: 157193
loss: 0.9870359301567078,grad_norm: 0.9999990660994726, iteration: 157194
loss: 1.0151768922805786,grad_norm: 0.7725995607528647, iteration: 157195
loss: 0.992389440536499,grad_norm: 0.9999989866194409, iteration: 157196
loss: 0.9846851825714111,grad_norm: 0.9999989714279084, iteration: 157197
loss: 0.9827430248260498,grad_norm: 0.9999990111928158, iteration: 157198
loss: 1.0274678468704224,grad_norm: 0.9999989777100451, iteration: 157199
loss: 0.9832704663276672,grad_norm: 0.9905038207391608, iteration: 157200
loss: 0.9837098121643066,grad_norm: 0.9999991850970306, iteration: 157201
loss: 0.9678184390068054,grad_norm: 0.999999103406279, iteration: 157202
loss: 0.9970154166221619,grad_norm: 0.9999992000261857, iteration: 157203
loss: 0.9980872869491577,grad_norm: 0.9999990517604255, iteration: 157204
loss: 0.9891826510429382,grad_norm: 0.7696471378765232, iteration: 157205
loss: 0.9903268218040466,grad_norm: 0.999999314807055, iteration: 157206
loss: 1.0334625244140625,grad_norm: 0.9288538537989806, iteration: 157207
loss: 0.9665653109550476,grad_norm: 0.9105417070563616, iteration: 157208
loss: 0.9833559393882751,grad_norm: 0.9537726114920099, iteration: 157209
loss: 0.9790403842926025,grad_norm: 0.9291856143769086, iteration: 157210
loss: 0.999860405921936,grad_norm: 0.9999991929787067, iteration: 157211
loss: 1.00618314743042,grad_norm: 0.9372127253220861, iteration: 157212
loss: 0.9812125563621521,grad_norm: 0.8143498714319051, iteration: 157213
loss: 1.0217963457107544,grad_norm: 0.9999990213119582, iteration: 157214
loss: 1.0340263843536377,grad_norm: 0.9999994293941081, iteration: 157215
loss: 0.998566746711731,grad_norm: 0.9545837111231668, iteration: 157216
loss: 0.9906214475631714,grad_norm: 0.9450778129976036, iteration: 157217
loss: 0.9734328985214233,grad_norm: 0.9730388844164714, iteration: 157218
loss: 0.9841303825378418,grad_norm: 0.9999991825336358, iteration: 157219
loss: 0.9958701729774475,grad_norm: 0.871880650937375, iteration: 157220
loss: 0.9976108074188232,grad_norm: 0.9999989930978848, iteration: 157221
loss: 0.9890874624252319,grad_norm: 0.8302363545697654, iteration: 157222
loss: 0.9985294938087463,grad_norm: 0.7760521994930187, iteration: 157223
loss: 1.0180974006652832,grad_norm: 0.999999188956821, iteration: 157224
loss: 0.9527899026870728,grad_norm: 0.9315805938031677, iteration: 157225
loss: 0.9987190365791321,grad_norm: 0.9999994158867098, iteration: 157226
loss: 1.023384690284729,grad_norm: 0.9252863223957255, iteration: 157227
loss: 0.9793792963027954,grad_norm: 0.9999996259060562, iteration: 157228
loss: 1.0256320238113403,grad_norm: 0.9999992993755912, iteration: 157229
loss: 0.9865943789482117,grad_norm: 0.9016940603204474, iteration: 157230
loss: 0.9892800450325012,grad_norm: 0.8917300609379203, iteration: 157231
loss: 0.9926387667655945,grad_norm: 0.8571454422281395, iteration: 157232
loss: 1.0225353240966797,grad_norm: 0.9999990813877906, iteration: 157233
loss: 0.9860510230064392,grad_norm: 0.8274207547048166, iteration: 157234
loss: 1.0129656791687012,grad_norm: 0.9610130855006975, iteration: 157235
loss: 0.9682236313819885,grad_norm: 0.9760474565288197, iteration: 157236
loss: 1.015264630317688,grad_norm: 0.9999997495434553, iteration: 157237
loss: 0.984552264213562,grad_norm: 0.9999991024413205, iteration: 157238
loss: 0.9909016489982605,grad_norm: 0.9999990347576047, iteration: 157239
loss: 0.9693244695663452,grad_norm: 0.9643342549825508, iteration: 157240
loss: 0.972619891166687,grad_norm: 0.9470319850035984, iteration: 157241
loss: 0.9685613512992859,grad_norm: 0.903860771614722, iteration: 157242
loss: 1.0119311809539795,grad_norm: 0.9999993187752416, iteration: 157243
loss: 0.9859014749526978,grad_norm: 0.9999991781661164, iteration: 157244
loss: 1.0294311046600342,grad_norm: 0.9999991606701467, iteration: 157245
loss: 0.9706850647926331,grad_norm: 0.999999192228752, iteration: 157246
loss: 1.005727767944336,grad_norm: 0.9999992949949389, iteration: 157247
loss: 0.9925447106361389,grad_norm: 0.9999990991232068, iteration: 157248
loss: 0.968820333480835,grad_norm: 0.8946460283874957, iteration: 157249
loss: 1.0109549760818481,grad_norm: 0.9775615930910585, iteration: 157250
loss: 1.0022541284561157,grad_norm: 0.9999991235922625, iteration: 157251
loss: 1.0512596368789673,grad_norm: 0.9999996635894556, iteration: 157252
loss: 1.0410910844802856,grad_norm: 0.9999991853698034, iteration: 157253
loss: 1.0105339288711548,grad_norm: 0.9999991152704384, iteration: 157254
loss: 1.0366230010986328,grad_norm: 1.0000000116874248, iteration: 157255
loss: 0.9956377148628235,grad_norm: 0.9454072016776774, iteration: 157256
loss: 1.012763261795044,grad_norm: 0.9514554657686498, iteration: 157257
loss: 1.0032258033752441,grad_norm: 0.9999991683524021, iteration: 157258
loss: 1.0457258224487305,grad_norm: 0.864710240813666, iteration: 157259
loss: 0.9589475989341736,grad_norm: 0.9047294931323404, iteration: 157260
loss: 1.0081686973571777,grad_norm: 0.9436449258354416, iteration: 157261
loss: 0.9598988890647888,grad_norm: 0.8832359238082278, iteration: 157262
loss: 0.9785272479057312,grad_norm: 0.8625347817933269, iteration: 157263
loss: 0.9564037919044495,grad_norm: 0.9559793727972805, iteration: 157264
loss: 1.078983187675476,grad_norm: 0.9623089638543376, iteration: 157265
loss: 0.9966003894805908,grad_norm: 0.9999992330662028, iteration: 157266
loss: 0.9945255517959595,grad_norm: 0.9443681964164918, iteration: 157267
loss: 0.9938491582870483,grad_norm: 0.9968981868043499, iteration: 157268
loss: 1.0128329992294312,grad_norm: 0.9633239342348388, iteration: 157269
loss: 0.9879723191261292,grad_norm: 0.999999112357844, iteration: 157270
loss: 0.9911900162696838,grad_norm: 0.8578888089064132, iteration: 157271
loss: 1.0647372007369995,grad_norm: 0.9999997258056245, iteration: 157272
loss: 1.0196422338485718,grad_norm: 0.9999991242520231, iteration: 157273
loss: 0.986292839050293,grad_norm: 0.9999992970326076, iteration: 157274
loss: 0.9725831151008606,grad_norm: 0.9999993528732338, iteration: 157275
loss: 1.013322114944458,grad_norm: 0.9999990586783642, iteration: 157276
loss: 0.9889782071113586,grad_norm: 0.8898777787979467, iteration: 157277
loss: 0.9830046892166138,grad_norm: 0.9697075496409813, iteration: 157278
loss: 0.9602669477462769,grad_norm: 0.9929193260805022, iteration: 157279
loss: 0.9805898666381836,grad_norm: 0.9999990482051412, iteration: 157280
loss: 0.9874374270439148,grad_norm: 0.8934086451771556, iteration: 157281
loss: 1.0032079219818115,grad_norm: 0.993978898863174, iteration: 157282
loss: 1.0431632995605469,grad_norm: 0.9999993577581031, iteration: 157283
loss: 1.0143488645553589,grad_norm: 0.8097138555113071, iteration: 157284
loss: 1.1777653694152832,grad_norm: 0.9999993972595019, iteration: 157285
loss: 0.9652705192565918,grad_norm: 0.9986197849576894, iteration: 157286
loss: 0.9851924180984497,grad_norm: 0.9877303357480699, iteration: 157287
loss: 0.9909018278121948,grad_norm: 0.9600850933145042, iteration: 157288
loss: 1.0036156177520752,grad_norm: 0.9721993851230373, iteration: 157289
loss: 0.9866462349891663,grad_norm: 0.848992945202469, iteration: 157290
loss: 0.99189293384552,grad_norm: 0.8849739253319834, iteration: 157291
loss: 1.0045660734176636,grad_norm: 0.9730198220144912, iteration: 157292
loss: 1.025418996810913,grad_norm: 0.7933028954833178, iteration: 157293
loss: 1.050904393196106,grad_norm: 0.9999991880141174, iteration: 157294
loss: 0.9918379187583923,grad_norm: 0.9287526646296116, iteration: 157295
loss: 1.0070691108703613,grad_norm: 0.9999991506503304, iteration: 157296
loss: 1.0607006549835205,grad_norm: 0.999999375052543, iteration: 157297
loss: 0.9985158443450928,grad_norm: 0.99999897518061, iteration: 157298
loss: 1.1021331548690796,grad_norm: 0.9999993166675873, iteration: 157299
loss: 0.9824410080909729,grad_norm: 0.9999991319111703, iteration: 157300
loss: 1.1398749351501465,grad_norm: 0.9999992150714574, iteration: 157301
loss: 1.0210429430007935,grad_norm: 0.9999990585034452, iteration: 157302
loss: 1.013383150100708,grad_norm: 0.8968420484445208, iteration: 157303
loss: 1.033989429473877,grad_norm: 0.9999990195846364, iteration: 157304
loss: 1.0034369230270386,grad_norm: 0.9867829468848812, iteration: 157305
loss: 1.0030694007873535,grad_norm: 0.8439307751708869, iteration: 157306
loss: 1.048112392425537,grad_norm: 0.999999496047406, iteration: 157307
loss: 1.0165181159973145,grad_norm: 0.9999993269802461, iteration: 157308
loss: 1.0211502313613892,grad_norm: 0.858577007158216, iteration: 157309
loss: 1.0280119180679321,grad_norm: 0.9939701830628312, iteration: 157310
loss: 1.065575361251831,grad_norm: 0.9999992578664186, iteration: 157311
loss: 1.0169318914413452,grad_norm: 0.9999992428033526, iteration: 157312
loss: 1.0304478406906128,grad_norm: 0.9999991262121152, iteration: 157313
loss: 1.0137499570846558,grad_norm: 0.9999990775184376, iteration: 157314
loss: 1.0000516176223755,grad_norm: 0.9129785517784804, iteration: 157315
loss: 0.9788945317268372,grad_norm: 0.9999992790065872, iteration: 157316
loss: 1.0223091840744019,grad_norm: 0.9529193292147111, iteration: 157317
loss: 1.05423903465271,grad_norm: 0.9999991134472406, iteration: 157318
loss: 0.9929846525192261,grad_norm: 0.9999990816487171, iteration: 157319
loss: 0.9981689453125,grad_norm: 0.9299236253914522, iteration: 157320
loss: 1.000191330909729,grad_norm: 0.906505458333778, iteration: 157321
loss: 0.9617236256599426,grad_norm: 0.9999995990308324, iteration: 157322
loss: 1.0137114524841309,grad_norm: 0.9886476386074993, iteration: 157323
loss: 1.0095314979553223,grad_norm: 0.9999994026285214, iteration: 157324
loss: 0.9777221083641052,grad_norm: 0.9999991587559319, iteration: 157325
loss: 1.0102092027664185,grad_norm: 0.8954201130519084, iteration: 157326
loss: 0.9822813868522644,grad_norm: 0.9999989925230645, iteration: 157327
loss: 0.9990746378898621,grad_norm: 0.9999990908550871, iteration: 157328
loss: 1.019274353981018,grad_norm: 0.9527211661240328, iteration: 157329
loss: 1.0023045539855957,grad_norm: 0.9999990216841603, iteration: 157330
loss: 0.9732266068458557,grad_norm: 0.9999991475588347, iteration: 157331
loss: 1.0183690786361694,grad_norm: 0.9153332109705004, iteration: 157332
loss: 0.9927853941917419,grad_norm: 0.9999990531237423, iteration: 157333
loss: 0.9666992425918579,grad_norm: 0.9999990934917858, iteration: 157334
loss: 1.0296683311462402,grad_norm: 0.9303946169191784, iteration: 157335
loss: 0.9825831651687622,grad_norm: 0.9999992971082674, iteration: 157336
loss: 1.0222437381744385,grad_norm: 0.924513969088627, iteration: 157337
loss: 1.0133819580078125,grad_norm: 0.9999990379792445, iteration: 157338
loss: 0.9670819640159607,grad_norm: 0.9999994803891693, iteration: 157339
loss: 0.9507725238800049,grad_norm: 0.876718031946515, iteration: 157340
loss: 1.0035210847854614,grad_norm: 0.9423272377354426, iteration: 157341
loss: 0.9903578758239746,grad_norm: 0.9999990921731136, iteration: 157342
loss: 1.0090276002883911,grad_norm: 0.9999989801478414, iteration: 157343
loss: 0.9708343744277954,grad_norm: 0.9820454748490595, iteration: 157344
loss: 0.9804477095603943,grad_norm: 0.999999112167957, iteration: 157345
loss: 0.980152428150177,grad_norm: 0.9999992023559579, iteration: 157346
loss: 0.9684064984321594,grad_norm: 0.9106072866266142, iteration: 157347
loss: 0.9927906394004822,grad_norm: 0.9999991984420137, iteration: 157348
loss: 0.986006498336792,grad_norm: 0.9471299593669639, iteration: 157349
loss: 0.9918094277381897,grad_norm: 0.9611759978747823, iteration: 157350
loss: 0.9985451698303223,grad_norm: 0.9700251443575022, iteration: 157351
loss: 0.9930062890052795,grad_norm: 0.9999992636815014, iteration: 157352
loss: 0.941532552242279,grad_norm: 0.9729350157528622, iteration: 157353
loss: 0.9950542449951172,grad_norm: 0.9745842145082164, iteration: 157354
loss: 1.0290648937225342,grad_norm: 0.9251965928970021, iteration: 157355
loss: 1.1152077913284302,grad_norm: 0.9999999651921375, iteration: 157356
loss: 0.9463672041893005,grad_norm: 0.9999990706753646, iteration: 157357
loss: 1.012924313545227,grad_norm: 0.9999992242007022, iteration: 157358
loss: 0.9743370413780212,grad_norm: 0.9999990067404314, iteration: 157359
loss: 1.0040253400802612,grad_norm: 0.9999991934083776, iteration: 157360
loss: 0.9797776937484741,grad_norm: 0.9999990645796465, iteration: 157361
loss: 0.9827544093132019,grad_norm: 0.9999992990343627, iteration: 157362
loss: 0.9688513278961182,grad_norm: 0.8610097516318201, iteration: 157363
loss: 0.9890842437744141,grad_norm: 0.9694309122950421, iteration: 157364
loss: 0.9594332575798035,grad_norm: 0.9999989881880362, iteration: 157365
loss: 1.041252613067627,grad_norm: 0.9999996420586691, iteration: 157366
loss: 0.9701888561248779,grad_norm: 0.960897748807987, iteration: 157367
loss: 0.9939508438110352,grad_norm: 0.9999995772257471, iteration: 157368
loss: 1.0079624652862549,grad_norm: 0.9382115378580966, iteration: 157369
loss: 1.0077861547470093,grad_norm: 0.9999991947349386, iteration: 157370
loss: 1.0576937198638916,grad_norm: 0.9999993336406445, iteration: 157371
loss: 1.0380618572235107,grad_norm: 0.9999990307564733, iteration: 157372
loss: 1.019235610961914,grad_norm: 0.9999992370446926, iteration: 157373
loss: 1.0590155124664307,grad_norm: 0.999999376858324, iteration: 157374
loss: 0.9937077164649963,grad_norm: 0.9999991455591164, iteration: 157375
loss: 1.0412778854370117,grad_norm: 0.9999991437909858, iteration: 157376
loss: 0.9802992939949036,grad_norm: 0.9047295476185466, iteration: 157377
loss: 0.9744918346405029,grad_norm: 0.9322747581449428, iteration: 157378
loss: 0.9871187806129456,grad_norm: 0.9999991028263031, iteration: 157379
loss: 0.9927951097488403,grad_norm: 0.8742040021179905, iteration: 157380
loss: 1.0357081890106201,grad_norm: 0.999999095470514, iteration: 157381
loss: 1.0049972534179688,grad_norm: 0.839279070778264, iteration: 157382
loss: 0.9806522130966187,grad_norm: 0.9999991064911941, iteration: 157383
loss: 0.9873344898223877,grad_norm: 0.9999990497688764, iteration: 157384
loss: 1.0021249055862427,grad_norm: 0.9347223757203195, iteration: 157385
loss: 0.9655284881591797,grad_norm: 0.9167579940372913, iteration: 157386
loss: 0.9979231953620911,grad_norm: 0.9433175588335344, iteration: 157387
loss: 0.9942420125007629,grad_norm: 0.8115454812567159, iteration: 157388
loss: 1.0107110738754272,grad_norm: 0.9999990426727187, iteration: 157389
loss: 1.0133321285247803,grad_norm: 0.9096107271022329, iteration: 157390
loss: 0.9569342732429504,grad_norm: 0.9999992682580703, iteration: 157391
loss: 1.0206094980239868,grad_norm: 0.9999991070930744, iteration: 157392
loss: 1.0227900743484497,grad_norm: 0.9999991650926146, iteration: 157393
loss: 1.0008959770202637,grad_norm: 0.9478323329430763, iteration: 157394
loss: 1.027868628501892,grad_norm: 0.9012072831199365, iteration: 157395
loss: 0.9930766820907593,grad_norm: 0.9999989947364631, iteration: 157396
loss: 1.023802399635315,grad_norm: 0.9658155970460534, iteration: 157397
loss: 1.0197986364364624,grad_norm: 0.9915495555657302, iteration: 157398
loss: 0.9896611571311951,grad_norm: 0.9999991952880294, iteration: 157399
loss: 1.1555598974227905,grad_norm: 0.9999993741451026, iteration: 157400
loss: 0.9948443174362183,grad_norm: 0.9999991656597848, iteration: 157401
loss: 1.0146775245666504,grad_norm: 0.8622730624750591, iteration: 157402
loss: 1.0135149955749512,grad_norm: 0.9999990923294303, iteration: 157403
loss: 0.9504940509796143,grad_norm: 0.9767113569224424, iteration: 157404
loss: 1.0000866651535034,grad_norm: 0.9438805089257011, iteration: 157405
loss: 1.0132637023925781,grad_norm: 0.9224429991143062, iteration: 157406
loss: 1.0135607719421387,grad_norm: 0.997848981290697, iteration: 157407
loss: 1.0112658739089966,grad_norm: 0.9999990615156579, iteration: 157408
loss: 1.0045568943023682,grad_norm: 0.9845346592025729, iteration: 157409
loss: 1.003282904624939,grad_norm: 0.9231897366263383, iteration: 157410
loss: 1.0011427402496338,grad_norm: 0.9679212280485875, iteration: 157411
loss: 1.0033538341522217,grad_norm: 0.9999992166014365, iteration: 157412
loss: 1.0282105207443237,grad_norm: 0.9999989567434767, iteration: 157413
loss: 1.2802741527557373,grad_norm: 0.9999995742140997, iteration: 157414
loss: 1.0116758346557617,grad_norm: 0.9601120521625595, iteration: 157415
loss: 0.9792607426643372,grad_norm: 0.999999215352053, iteration: 157416
loss: 0.9844049215316772,grad_norm: 0.9999990694764809, iteration: 157417
loss: 1.0047742128372192,grad_norm: 0.9999991086091653, iteration: 157418
loss: 0.9667665958404541,grad_norm: 0.9573208897821525, iteration: 157419
loss: 0.9669020175933838,grad_norm: 0.9978655174501121, iteration: 157420
loss: 0.9735469818115234,grad_norm: 0.9999990466110152, iteration: 157421
loss: 0.9890244007110596,grad_norm: 0.9999990399635903, iteration: 157422
loss: 0.9777082800865173,grad_norm: 0.9039894776761536, iteration: 157423
loss: 1.031416416168213,grad_norm: 0.9999990682481975, iteration: 157424
loss: 1.0098952054977417,grad_norm: 0.9999991782596617, iteration: 157425
loss: 1.0244981050491333,grad_norm: 0.9999995461860745, iteration: 157426
loss: 0.9845927357673645,grad_norm: 0.922637922484123, iteration: 157427
loss: 1.0128856897354126,grad_norm: 0.9659061117391248, iteration: 157428
loss: 1.025733470916748,grad_norm: 0.9547728186000891, iteration: 157429
loss: 0.9822342991828918,grad_norm: 0.9475894790035894, iteration: 157430
loss: 0.9919636845588684,grad_norm: 0.999999123659515, iteration: 157431
loss: 0.974609375,grad_norm: 0.8376400346608827, iteration: 157432
loss: 1.0208218097686768,grad_norm: 0.9999991612070394, iteration: 157433
loss: 0.9839205741882324,grad_norm: 0.9684496191688556, iteration: 157434
loss: 0.9906951785087585,grad_norm: 0.9999991718305462, iteration: 157435
loss: 0.9555742144584656,grad_norm: 0.9999992737600998, iteration: 157436
loss: 0.9527036547660828,grad_norm: 0.999999083312085, iteration: 157437
loss: 1.0071773529052734,grad_norm: 0.9999990218458344, iteration: 157438
loss: 0.9753126502037048,grad_norm: 0.999999021012044, iteration: 157439
loss: 1.019632339477539,grad_norm: 0.9845883414394992, iteration: 157440
loss: 1.0193060636520386,grad_norm: 0.988302742691076, iteration: 157441
loss: 0.9895665645599365,grad_norm: 0.9615841435182755, iteration: 157442
loss: 0.9940806031227112,grad_norm: 0.9999990483286367, iteration: 157443
loss: 1.026491641998291,grad_norm: 0.9469263872510484, iteration: 157444
loss: 1.0308759212493896,grad_norm: 0.9999992068634753, iteration: 157445
loss: 1.013927936553955,grad_norm: 0.9401155437344628, iteration: 157446
loss: 1.0278043746948242,grad_norm: 0.8556850046233991, iteration: 157447
loss: 1.0146708488464355,grad_norm: 0.9911175092691049, iteration: 157448
loss: 1.0085117816925049,grad_norm: 0.9999992343171457, iteration: 157449
loss: 1.0160493850708008,grad_norm: 0.9999990794157824, iteration: 157450
loss: 0.9948024153709412,grad_norm: 0.8456164748251439, iteration: 157451
loss: 1.016281008720398,grad_norm: 0.8959765783514079, iteration: 157452
loss: 1.1007319688796997,grad_norm: 0.9999994149291319, iteration: 157453
loss: 1.0535714626312256,grad_norm: 0.9999991011096897, iteration: 157454
loss: 0.9935421347618103,grad_norm: 0.999998999444621, iteration: 157455
loss: 0.9998471736907959,grad_norm: 0.9999993284506122, iteration: 157456
loss: 0.9793941378593445,grad_norm: 0.9999992120720813, iteration: 157457
loss: 1.0087392330169678,grad_norm: 0.9999990871425148, iteration: 157458
loss: 0.9713371992111206,grad_norm: 0.964162703829238, iteration: 157459
loss: 1.0011979341506958,grad_norm: 0.9342066799909391, iteration: 157460
loss: 1.022984504699707,grad_norm: 0.9733616595731716, iteration: 157461
loss: 1.029761552810669,grad_norm: 0.9999992111653716, iteration: 157462
loss: 0.9861900210380554,grad_norm: 0.9999293769797694, iteration: 157463
loss: 1.030402421951294,grad_norm: 0.9999992306695986, iteration: 157464
loss: 0.9734614491462708,grad_norm: 0.9285393765368973, iteration: 157465
loss: 1.0142614841461182,grad_norm: 0.9999990146208001, iteration: 157466
loss: 1.0242282152175903,grad_norm: 0.9016505425493776, iteration: 157467
loss: 1.1136361360549927,grad_norm: 0.9999994557434866, iteration: 157468
loss: 1.0177375078201294,grad_norm: 0.9999991439704997, iteration: 157469
loss: 0.9786624908447266,grad_norm: 0.999999226203514, iteration: 157470
loss: 1.0050926208496094,grad_norm: 0.9999993158830772, iteration: 157471
loss: 1.0076522827148438,grad_norm: 0.9580599133625415, iteration: 157472
loss: 0.997208833694458,grad_norm: 0.9999991061873871, iteration: 157473
loss: 1.02948796749115,grad_norm: 0.9826718335075776, iteration: 157474
loss: 0.9778789281845093,grad_norm: 0.9653074889103563, iteration: 157475
loss: 1.024183750152588,grad_norm: 0.9389174996455187, iteration: 157476
loss: 1.042986512184143,grad_norm: 0.9999989820953846, iteration: 157477
loss: 0.9892479181289673,grad_norm: 0.9944532298295292, iteration: 157478
loss: 1.0042798519134521,grad_norm: 0.9414801452137277, iteration: 157479
loss: 1.0146162509918213,grad_norm: 0.9999991613285376, iteration: 157480
loss: 0.9996562004089355,grad_norm: 0.8921244308648979, iteration: 157481
loss: 0.9813677072525024,grad_norm: 0.9887313902895108, iteration: 157482
loss: 0.9878806471824646,grad_norm: 0.9626230559339958, iteration: 157483
loss: 0.976118266582489,grad_norm: 0.9825722613938643, iteration: 157484
loss: 1.0104610919952393,grad_norm: 0.9827637804962157, iteration: 157485
loss: 1.0366435050964355,grad_norm: 0.9999993119393106, iteration: 157486
loss: 1.0194048881530762,grad_norm: 0.9999992068119661, iteration: 157487
loss: 1.0290746688842773,grad_norm: 0.9499723931516564, iteration: 157488
loss: 0.9670475721359253,grad_norm: 0.9999991183572838, iteration: 157489
loss: 1.009930968284607,grad_norm: 0.8842617459336702, iteration: 157490
loss: 0.997506320476532,grad_norm: 0.9999990657670935, iteration: 157491
loss: 1.0183539390563965,grad_norm: 0.90437817743466, iteration: 157492
loss: 1.0359665155410767,grad_norm: 0.9323743045790207, iteration: 157493
loss: 0.9936598539352417,grad_norm: 0.9999990653331883, iteration: 157494
loss: 1.0002347230911255,grad_norm: 0.9999990506764834, iteration: 157495
loss: 1.005710482597351,grad_norm: 0.999999134655443, iteration: 157496
loss: 0.9810317754745483,grad_norm: 0.9978064178675409, iteration: 157497
loss: 1.0303245782852173,grad_norm: 0.8519734011160374, iteration: 157498
loss: 0.9861008524894714,grad_norm: 0.9061726314741124, iteration: 157499
loss: 1.0180917978286743,grad_norm: 0.9999989063087552, iteration: 157500
loss: 0.9838355183601379,grad_norm: 0.9999989419637527, iteration: 157501
loss: 0.9966092109680176,grad_norm: 0.999999083460942, iteration: 157502
loss: 1.0052937269210815,grad_norm: 0.999999216418662, iteration: 157503
loss: 1.0142226219177246,grad_norm: 0.9682311917137119, iteration: 157504
loss: 0.9932661056518555,grad_norm: 0.9955931808423882, iteration: 157505
loss: 0.9780735969543457,grad_norm: 0.9999991742553004, iteration: 157506
loss: 1.0149534940719604,grad_norm: 0.9999992518436814, iteration: 157507
loss: 0.9911319613456726,grad_norm: 0.9999990936766778, iteration: 157508
loss: 0.9988716840744019,grad_norm: 0.9765644214901495, iteration: 157509
loss: 0.980115532875061,grad_norm: 0.98528424774202, iteration: 157510
loss: 1.011683464050293,grad_norm: 0.9999990554377965, iteration: 157511
loss: 0.9580357670783997,grad_norm: 0.9999990880576946, iteration: 157512
loss: 1.0134141445159912,grad_norm: 0.8555018212689548, iteration: 157513
loss: 1.0306743383407593,grad_norm: 0.9999990810761495, iteration: 157514
loss: 0.9911726117134094,grad_norm: 0.9522903515820021, iteration: 157515
loss: 1.035938024520874,grad_norm: 0.8246520151335196, iteration: 157516
loss: 1.0032857656478882,grad_norm: 0.9999991037345539, iteration: 157517
loss: 0.9813563823699951,grad_norm: 0.9999991813248782, iteration: 157518
loss: 1.006685495376587,grad_norm: 0.9999993269280113, iteration: 157519
loss: 0.9499800205230713,grad_norm: 0.9999990841192073, iteration: 157520
loss: 0.9895126223564148,grad_norm: 0.8956829036564963, iteration: 157521
loss: 1.0060688257217407,grad_norm: 0.9646096060570891, iteration: 157522
loss: 1.0076731443405151,grad_norm: 0.9783031187605743, iteration: 157523
loss: 1.0068657398223877,grad_norm: 0.9999992872203537, iteration: 157524
loss: 1.0059616565704346,grad_norm: 0.9999992229209963, iteration: 157525
loss: 0.9908964037895203,grad_norm: 0.9999989968730433, iteration: 157526
loss: 1.024977207183838,grad_norm: 0.9999991697889423, iteration: 157527
loss: 1.0036730766296387,grad_norm: 0.9802859549927978, iteration: 157528
loss: 1.0307717323303223,grad_norm: 0.9702453602365505, iteration: 157529
loss: 1.0077123641967773,grad_norm: 0.8998699941525616, iteration: 157530
loss: 0.9866089820861816,grad_norm: 0.9847241501113606, iteration: 157531
loss: 1.0235729217529297,grad_norm: 0.8843677054076788, iteration: 157532
loss: 0.9783614873886108,grad_norm: 0.9510574913112372, iteration: 157533
loss: 1.0000853538513184,grad_norm: 0.8855405424900465, iteration: 157534
loss: 1.0240455865859985,grad_norm: 0.9999991949627303, iteration: 157535
loss: 0.9934779405593872,grad_norm: 0.9723108967286345, iteration: 157536
loss: 0.9832445383071899,grad_norm: 0.8401219926413123, iteration: 157537
loss: 1.0022255182266235,grad_norm: 0.8853386917031898, iteration: 157538
loss: 1.008339285850525,grad_norm: 0.9445881716001349, iteration: 157539
loss: 1.0217111110687256,grad_norm: 0.9999992310780296, iteration: 157540
loss: 0.9909567832946777,grad_norm: 0.9999991004859029, iteration: 157541
loss: 1.008205771446228,grad_norm: 0.9999989904059208, iteration: 157542
loss: 1.0068970918655396,grad_norm: 0.9999990802886918, iteration: 157543
loss: 1.0396599769592285,grad_norm: 0.999999079758799, iteration: 157544
loss: 1.0240288972854614,grad_norm: 0.8499360628456432, iteration: 157545
loss: 0.9943390488624573,grad_norm: 0.9776418532635014, iteration: 157546
loss: 1.0208706855773926,grad_norm: 0.9999997397885557, iteration: 157547
loss: 0.9756255745887756,grad_norm: 0.8882533682188753, iteration: 157548
loss: 0.9819230437278748,grad_norm: 0.9379433099429584, iteration: 157549
loss: 1.0063453912734985,grad_norm: 0.9999998046915965, iteration: 157550
loss: 1.0136220455169678,grad_norm: 0.9999993560966884, iteration: 157551
loss: 0.992699384689331,grad_norm: 0.8122662531688547, iteration: 157552
loss: 0.9748665690422058,grad_norm: 0.9999990121787242, iteration: 157553
loss: 1.0187699794769287,grad_norm: 0.8599483084581686, iteration: 157554
loss: 1.0148588418960571,grad_norm: 0.9999989343859323, iteration: 157555
loss: 1.0124664306640625,grad_norm: 0.9999991761057101, iteration: 157556
loss: 0.9973137974739075,grad_norm: 0.9999992603220473, iteration: 157557
loss: 1.0006394386291504,grad_norm: 0.9712112810818174, iteration: 157558
loss: 1.0246628522872925,grad_norm: 0.9999990112740823, iteration: 157559
loss: 0.9828537702560425,grad_norm: 0.914398294461201, iteration: 157560
loss: 0.9912310838699341,grad_norm: 0.9827157350857998, iteration: 157561
loss: 1.0610054731369019,grad_norm: 0.9999990027041056, iteration: 157562
loss: 0.9677377939224243,grad_norm: 0.9236572986874864, iteration: 157563
loss: 1.0036019086837769,grad_norm: 0.8811242341367742, iteration: 157564
loss: 0.9932366013526917,grad_norm: 0.8742535680731609, iteration: 157565
loss: 1.1133869886398315,grad_norm: 0.9999995650375714, iteration: 157566
loss: 1.136682152748108,grad_norm: 0.9999991748383235, iteration: 157567
loss: 1.0348254442214966,grad_norm: 0.9999997493799122, iteration: 157568
loss: 1.0321084260940552,grad_norm: 0.9223061683757122, iteration: 157569
loss: 1.0795979499816895,grad_norm: 0.9999994100339017, iteration: 157570
loss: 1.0174154043197632,grad_norm: 0.99999904793868, iteration: 157571
loss: 0.9919008612632751,grad_norm: 0.8430613157582184, iteration: 157572
loss: 1.0184085369110107,grad_norm: 0.9999991674457076, iteration: 157573
loss: 0.9928085803985596,grad_norm: 0.9384795355326953, iteration: 157574
loss: 1.0308600664138794,grad_norm: 0.9999991766932931, iteration: 157575
loss: 1.136271357536316,grad_norm: 0.9999992166220515, iteration: 157576
loss: 1.005095362663269,grad_norm: 0.9231534411883677, iteration: 157577
loss: 1.0099141597747803,grad_norm: 0.9071214643958035, iteration: 157578
loss: 1.0081969499588013,grad_norm: 0.9999990858388956, iteration: 157579
loss: 1.045430302619934,grad_norm: 0.9999991598312797, iteration: 157580
loss: 1.0126540660858154,grad_norm: 0.8963288937505652, iteration: 157581
loss: 1.0014129877090454,grad_norm: 0.9731676309127664, iteration: 157582
loss: 0.9894354343414307,grad_norm: 0.9580657127738053, iteration: 157583
loss: 0.9826547503471375,grad_norm: 0.999999143840128, iteration: 157584
loss: 1.028666615486145,grad_norm: 0.9999991076947639, iteration: 157585
loss: 1.0225845575332642,grad_norm: 0.9999991858400686, iteration: 157586
loss: 1.0099235773086548,grad_norm: 0.9999990228546501, iteration: 157587
loss: 0.9968786239624023,grad_norm: 0.9272754968008105, iteration: 157588
loss: 1.0265684127807617,grad_norm: 0.9777413607112281, iteration: 157589
loss: 1.0261293649673462,grad_norm: 0.8703120148173359, iteration: 157590
loss: 1.0090423822402954,grad_norm: 0.9999993869213418, iteration: 157591
loss: 0.9838125705718994,grad_norm: 0.9999989064926232, iteration: 157592
loss: 0.9863376617431641,grad_norm: 0.9567434071320262, iteration: 157593
loss: 0.9925183057785034,grad_norm: 0.8880953831822084, iteration: 157594
loss: 1.0118037462234497,grad_norm: 0.999999157404152, iteration: 157595
loss: 1.0077065229415894,grad_norm: 0.9999991594032107, iteration: 157596
loss: 0.9917031526565552,grad_norm: 0.9672979833687214, iteration: 157597
loss: 1.012087345123291,grad_norm: 0.9999990450319118, iteration: 157598
loss: 1.0305451154708862,grad_norm: 0.9343735283171027, iteration: 157599
loss: 0.9682033658027649,grad_norm: 0.9999990642518275, iteration: 157600
loss: 1.023642897605896,grad_norm: 0.8804236319269421, iteration: 157601
loss: 1.0219355821609497,grad_norm: 0.8862324573731158, iteration: 157602
loss: 1.0160177946090698,grad_norm: 0.9999990822744661, iteration: 157603
loss: 0.991287887096405,grad_norm: 0.9999990354032864, iteration: 157604
loss: 1.013015627861023,grad_norm: 0.9999991020582875, iteration: 157605
loss: 1.0007199048995972,grad_norm: 0.9999992846921739, iteration: 157606
loss: 1.0106115341186523,grad_norm: 0.9964314932607308, iteration: 157607
loss: 0.9948395490646362,grad_norm: 0.9392413869510629, iteration: 157608
loss: 1.0053215026855469,grad_norm: 0.917975397585703, iteration: 157609
loss: 1.015913963317871,grad_norm: 0.9999991770264514, iteration: 157610
loss: 0.9978657364845276,grad_norm: 0.9999990952542661, iteration: 157611
loss: 1.0023301839828491,grad_norm: 0.9999991894904744, iteration: 157612
loss: 1.0221469402313232,grad_norm: 0.9999992292785063, iteration: 157613
loss: 0.9585411548614502,grad_norm: 0.9982115572974095, iteration: 157614
loss: 0.9425804615020752,grad_norm: 0.9999991908650053, iteration: 157615
loss: 1.0101649761199951,grad_norm: 0.874550080536682, iteration: 157616
loss: 1.0126230716705322,grad_norm: 0.9999993658634228, iteration: 157617
loss: 0.9784401655197144,grad_norm: 0.9629592761684134, iteration: 157618
loss: 1.046545147895813,grad_norm: 0.9999992858644318, iteration: 157619
loss: 1.0331679582595825,grad_norm: 0.999999484988543, iteration: 157620
loss: 0.9878814220428467,grad_norm: 0.893900827434285, iteration: 157621
loss: 0.9821359515190125,grad_norm: 0.9999991486048903, iteration: 157622
loss: 1.005742073059082,grad_norm: 0.9999993924577528, iteration: 157623
loss: 0.994828999042511,grad_norm: 0.818266209463987, iteration: 157624
loss: 1.00949227809906,grad_norm: 0.9651332024930841, iteration: 157625
loss: 1.018312692642212,grad_norm: 0.9980988089523887, iteration: 157626
loss: 0.9942097663879395,grad_norm: 0.9999991640603398, iteration: 157627
loss: 0.9689223766326904,grad_norm: 0.9971690105097946, iteration: 157628
loss: 0.9921451807022095,grad_norm: 0.9999992999891562, iteration: 157629
loss: 1.027480125427246,grad_norm: 0.9471095064319257, iteration: 157630
loss: 1.0134247541427612,grad_norm: 0.7408484929250813, iteration: 157631
loss: 1.0210713148117065,grad_norm: 0.9999991520245026, iteration: 157632
loss: 1.0088149309158325,grad_norm: 0.8747472190412439, iteration: 157633
loss: 1.0171594619750977,grad_norm: 0.9999991705659356, iteration: 157634
loss: 0.995555579662323,grad_norm: 0.9999991722869821, iteration: 157635
loss: 1.149690866470337,grad_norm: 0.9999994647198136, iteration: 157636
loss: 1.004563570022583,grad_norm: 0.9999989564591845, iteration: 157637
loss: 0.9893838763237,grad_norm: 0.9999990127175675, iteration: 157638
loss: 1.0158618688583374,grad_norm: 0.9999991449753514, iteration: 157639
loss: 0.9990569949150085,grad_norm: 0.999999180150435, iteration: 157640
loss: 1.0100297927856445,grad_norm: 0.812778833497289, iteration: 157641
loss: 0.9795598387718201,grad_norm: 0.9999991009504612, iteration: 157642
loss: 1.015026330947876,grad_norm: 0.9999991463540369, iteration: 157643
loss: 1.0113756656646729,grad_norm: 0.9999990686596658, iteration: 157644
loss: 0.9636829495429993,grad_norm: 0.9999990120163132, iteration: 157645
loss: 0.982460081577301,grad_norm: 0.9137441269158845, iteration: 157646
loss: 0.9770687818527222,grad_norm: 0.9616777604269688, iteration: 157647
loss: 0.9957035183906555,grad_norm: 0.9999991861109438, iteration: 157648
loss: 0.9984556436538696,grad_norm: 0.9999992445046215, iteration: 157649
loss: 1.0234365463256836,grad_norm: 0.9999990333781872, iteration: 157650
loss: 1.0022563934326172,grad_norm: 0.9999991018298732, iteration: 157651
loss: 0.990338921546936,grad_norm: 0.857873106488508, iteration: 157652
loss: 1.040533423423767,grad_norm: 0.9964078114332997, iteration: 157653
loss: 0.996692419052124,grad_norm: 0.8706712649311339, iteration: 157654
loss: 0.9867373108863831,grad_norm: 0.9188277137089366, iteration: 157655
loss: 0.9368835091590881,grad_norm: 0.9999992267771435, iteration: 157656
loss: 1.021113395690918,grad_norm: 0.9574588262716338, iteration: 157657
loss: 1.0054898262023926,grad_norm: 0.8807008878273518, iteration: 157658
loss: 0.9882869720458984,grad_norm: 0.9999993503643945, iteration: 157659
loss: 0.977547287940979,grad_norm: 0.9999992166998888, iteration: 157660
loss: 1.0294690132141113,grad_norm: 0.8382692675023241, iteration: 157661
loss: 1.0106343030929565,grad_norm: 0.9999992080700851, iteration: 157662
loss: 0.9921778440475464,grad_norm: 0.9999991657023032, iteration: 157663
loss: 0.9639729857444763,grad_norm: 0.9446846571754778, iteration: 157664
loss: 0.976143479347229,grad_norm: 0.9514475029515064, iteration: 157665
loss: 0.9730456471443176,grad_norm: 0.9999992740871091, iteration: 157666
loss: 1.035136342048645,grad_norm: 0.9999989827849897, iteration: 157667
loss: 1.0294824838638306,grad_norm: 0.9999992284432153, iteration: 157668
loss: 0.9827672243118286,grad_norm: 0.9999992683579605, iteration: 157669
loss: 1.006935477256775,grad_norm: 0.8805180069539841, iteration: 157670
loss: 1.0085340738296509,grad_norm: 0.90758369056026, iteration: 157671
loss: 0.9892969131469727,grad_norm: 0.999999100331039, iteration: 157672
loss: 0.985011637210846,grad_norm: 0.9999991800788276, iteration: 157673
loss: 0.9957833290100098,grad_norm: 0.9654018582109862, iteration: 157674
loss: 1.0005602836608887,grad_norm: 0.8932944872039829, iteration: 157675
loss: 1.0038009881973267,grad_norm: 0.998802456996743, iteration: 157676
loss: 1.0440212488174438,grad_norm: 0.9999990182541718, iteration: 157677
loss: 0.9946660399436951,grad_norm: 0.8818149053453616, iteration: 157678
loss: 0.9682773351669312,grad_norm: 0.9561405918866814, iteration: 157679
loss: 0.9959894418716431,grad_norm: 0.9876428744713076, iteration: 157680
loss: 0.9699571132659912,grad_norm: 0.9655788414219462, iteration: 157681
loss: 1.0475250482559204,grad_norm: 0.9335064015360713, iteration: 157682
loss: 1.0114848613739014,grad_norm: 0.9299506370041629, iteration: 157683
loss: 0.9704264998435974,grad_norm: 0.894472082108301, iteration: 157684
loss: 1.0006955862045288,grad_norm: 0.928775405573798, iteration: 157685
loss: 1.0191367864608765,grad_norm: 0.9999998807106473, iteration: 157686
loss: 1.0405409336090088,grad_norm: 0.9680142593078401, iteration: 157687
loss: 1.025936484336853,grad_norm: 0.9999990687063097, iteration: 157688
loss: 0.9818786978721619,grad_norm: 0.8726615030417517, iteration: 157689
loss: 0.983005702495575,grad_norm: 0.999999222548151, iteration: 157690
loss: 0.9487150311470032,grad_norm: 0.9999991437939162, iteration: 157691
loss: 0.9972602128982544,grad_norm: 0.8769413196379204, iteration: 157692
loss: 0.9958086609840393,grad_norm: 0.9442469107254304, iteration: 157693
loss: 0.9920909404754639,grad_norm: 0.9861222864199346, iteration: 157694
loss: 0.9812130331993103,grad_norm: 0.9549487598915773, iteration: 157695
loss: 1.0228397846221924,grad_norm: 0.9999990991595541, iteration: 157696
loss: 1.027305245399475,grad_norm: 0.999999044624905, iteration: 157697
loss: 0.9900331497192383,grad_norm: 0.9999990634761043, iteration: 157698
loss: 0.9887590408325195,grad_norm: 0.9999990824605821, iteration: 157699
loss: 0.9997437000274658,grad_norm: 0.9759660488436532, iteration: 157700
loss: 1.0015602111816406,grad_norm: 0.9854053320962775, iteration: 157701
loss: 1.0825068950653076,grad_norm: 1.000000007014432, iteration: 157702
loss: 1.016827940940857,grad_norm: 0.9999991257189871, iteration: 157703
loss: 0.9851503372192383,grad_norm: 0.9999989936745629, iteration: 157704
loss: 1.0034180879592896,grad_norm: 0.8592276397112137, iteration: 157705
loss: 1.0307250022888184,grad_norm: 0.9999989997387512, iteration: 157706
loss: 0.9948579668998718,grad_norm: 0.8383196101181573, iteration: 157707
loss: 1.0193203687667847,grad_norm: 0.9586066376381921, iteration: 157708
loss: 0.9610612988471985,grad_norm: 0.9999991737336513, iteration: 157709
loss: 1.0395426750183105,grad_norm: 0.9999997411906643, iteration: 157710
loss: 0.9902508854866028,grad_norm: 0.8689514381216124, iteration: 157711
loss: 0.9827668070793152,grad_norm: 0.8303827718732676, iteration: 157712
loss: 0.9873519539833069,grad_norm: 0.9090597546373405, iteration: 157713
loss: 1.0258127450942993,grad_norm: 0.9999991457707003, iteration: 157714
loss: 1.003765344619751,grad_norm: 0.9819247698747684, iteration: 157715
loss: 1.0100839138031006,grad_norm: 0.8893507196973407, iteration: 157716
loss: 0.9956870675086975,grad_norm: 0.9999990734547152, iteration: 157717
loss: 1.0203663110733032,grad_norm: 0.9999990829321359, iteration: 157718
loss: 1.0252726078033447,grad_norm: 0.9999989566653201, iteration: 157719
loss: 1.0120418071746826,grad_norm: 0.8933590119477248, iteration: 157720
loss: 1.0353299379348755,grad_norm: 0.9999992219476673, iteration: 157721
loss: 1.0246758460998535,grad_norm: 0.9042295589453464, iteration: 157722
loss: 0.9617334008216858,grad_norm: 0.9999993398915574, iteration: 157723
loss: 0.9885362386703491,grad_norm: 0.9999992349596896, iteration: 157724
loss: 0.9776948690414429,grad_norm: 0.9771965754336059, iteration: 157725
loss: 1.0504766702651978,grad_norm: 0.972342848618472, iteration: 157726
loss: 1.0642966032028198,grad_norm: 0.9999991488283776, iteration: 157727
loss: 1.045465350151062,grad_norm: 0.9999991280733368, iteration: 157728
loss: 0.9843290448188782,grad_norm: 0.9999990188395613, iteration: 157729
loss: 0.9936015605926514,grad_norm: 0.9999992128415985, iteration: 157730
loss: 0.9831810593605042,grad_norm: 0.9718547665156925, iteration: 157731
loss: 1.0061159133911133,grad_norm: 0.9999992095905498, iteration: 157732
loss: 0.9906433820724487,grad_norm: 0.8602499029829721, iteration: 157733
loss: 1.012324571609497,grad_norm: 0.9999989429708559, iteration: 157734
loss: 0.9975360631942749,grad_norm: 0.8469832818336791, iteration: 157735
loss: 1.0185561180114746,grad_norm: 0.9386317131035677, iteration: 157736
loss: 1.002326250076294,grad_norm: 0.7988888559767083, iteration: 157737
loss: 1.0166912078857422,grad_norm: 0.9999992671284582, iteration: 157738
loss: 1.0196146965026855,grad_norm: 0.9681384988035338, iteration: 157739
loss: 1.005920171737671,grad_norm: 0.8420939656450478, iteration: 157740
loss: 0.9912620782852173,grad_norm: 0.9559290721089987, iteration: 157741
loss: 1.0053212642669678,grad_norm: 0.9328616908308534, iteration: 157742
loss: 0.9919489026069641,grad_norm: 0.9563929171815596, iteration: 157743
loss: 1.0195742845535278,grad_norm: 0.999999237700432, iteration: 157744
loss: 0.9855958819389343,grad_norm: 0.9999990913155526, iteration: 157745
loss: 1.006134033203125,grad_norm: 0.9999991903021626, iteration: 157746
loss: 0.9546632766723633,grad_norm: 0.9999990416171858, iteration: 157747
loss: 1.011634349822998,grad_norm: 0.9879377490665854, iteration: 157748
loss: 1.0124669075012207,grad_norm: 0.8298216453380384, iteration: 157749
loss: 1.0149704217910767,grad_norm: 0.9163875152219741, iteration: 157750
loss: 1.0246690511703491,grad_norm: 0.9999995551628014, iteration: 157751
loss: 0.9941850304603577,grad_norm: 0.9769054222346586, iteration: 157752
loss: 0.9663019776344299,grad_norm: 0.9999990304903351, iteration: 157753
loss: 0.9601870179176331,grad_norm: 0.9621520006904718, iteration: 157754
loss: 0.959510087966919,grad_norm: 0.9293913747359456, iteration: 157755
loss: 1.0258456468582153,grad_norm: 0.9295384768591446, iteration: 157756
loss: 0.9972758293151855,grad_norm: 0.9999998896886136, iteration: 157757
loss: 1.0001431703567505,grad_norm: 0.9999991109040977, iteration: 157758
loss: 1.0317769050598145,grad_norm: 0.9999990470324749, iteration: 157759
loss: 0.9935082197189331,grad_norm: 0.9999992085444853, iteration: 157760
loss: 1.0224831104278564,grad_norm: 0.9898115141962321, iteration: 157761
loss: 0.9875748157501221,grad_norm: 0.9392750539077878, iteration: 157762
loss: 0.9771559238433838,grad_norm: 0.9999991312083083, iteration: 157763
loss: 0.9925699830055237,grad_norm: 0.9999991441160836, iteration: 157764
loss: 0.9538698196411133,grad_norm: 0.9352846755433157, iteration: 157765
loss: 1.0026522874832153,grad_norm: 0.8755033476360737, iteration: 157766
loss: 1.012490153312683,grad_norm: 0.9877063757397537, iteration: 157767
loss: 1.0344117879867554,grad_norm: 0.9999991247177132, iteration: 157768
loss: 0.9661867022514343,grad_norm: 0.9999990634563184, iteration: 157769
loss: 1.0111169815063477,grad_norm: 0.8716058160624156, iteration: 157770
loss: 1.0155056715011597,grad_norm: 0.803035029371582, iteration: 157771
loss: 0.9878782629966736,grad_norm: 0.9452042438388388, iteration: 157772
loss: 0.9743881821632385,grad_norm: 0.9282665488506618, iteration: 157773
loss: 1.0327428579330444,grad_norm: 0.922445466623257, iteration: 157774
loss: 1.0465874671936035,grad_norm: 0.9999991899744629, iteration: 157775
loss: 1.0217329263687134,grad_norm: 0.9871825217540009, iteration: 157776
loss: 0.9684242606163025,grad_norm: 0.9999989895777643, iteration: 157777
loss: 1.0309796333312988,grad_norm: 0.8531941050361957, iteration: 157778
loss: 0.9812599420547485,grad_norm: 0.9353239032247335, iteration: 157779
loss: 0.98473060131073,grad_norm: 0.9999990144651402, iteration: 157780
loss: 0.9822988510131836,grad_norm: 0.9093509710147106, iteration: 157781
loss: 0.9679601192474365,grad_norm: 0.9999991825962267, iteration: 157782
loss: 0.9524677991867065,grad_norm: 0.9642378279707561, iteration: 157783
loss: 1.0319557189941406,grad_norm: 0.9999989998226263, iteration: 157784
loss: 0.9745659828186035,grad_norm: 0.9999989458760087, iteration: 157785
loss: 0.9590645432472229,grad_norm: 0.9668215577234761, iteration: 157786
loss: 1.0175575017929077,grad_norm: 0.9999989931408894, iteration: 157787
loss: 0.9990397095680237,grad_norm: 0.9056137875006123, iteration: 157788
loss: 0.9917986392974854,grad_norm: 0.9999996735837028, iteration: 157789
loss: 0.9886283874511719,grad_norm: 0.9999989351178373, iteration: 157790
loss: 0.9825635552406311,grad_norm: 0.9731306979645664, iteration: 157791
loss: 0.9761841297149658,grad_norm: 0.8875238648377097, iteration: 157792
loss: 1.0156441926956177,grad_norm: 0.9999992421331607, iteration: 157793
loss: 1.0886367559432983,grad_norm: 0.9999992059802044, iteration: 157794
loss: 1.0002379417419434,grad_norm: 0.9999993217302521, iteration: 157795
loss: 0.9810992479324341,grad_norm: 0.9569703457388271, iteration: 157796
loss: 1.0082379579544067,grad_norm: 0.9743268281555961, iteration: 157797
loss: 1.0381474494934082,grad_norm: 0.9893253480459401, iteration: 157798
loss: 1.040972113609314,grad_norm: 0.9988172584028384, iteration: 157799
loss: 0.9895271062850952,grad_norm: 0.9999991080209064, iteration: 157800
loss: 1.0182209014892578,grad_norm: 0.8769433400421718, iteration: 157801
loss: 0.9972686767578125,grad_norm: 0.9860027776436493, iteration: 157802
loss: 1.0034418106079102,grad_norm: 0.9609409303160527, iteration: 157803
loss: 0.9918776750564575,grad_norm: 0.9999991105388235, iteration: 157804
loss: 0.9881311058998108,grad_norm: 0.985928287324441, iteration: 157805
loss: 1.0278289318084717,grad_norm: 0.917436423396677, iteration: 157806
loss: 1.0222725868225098,grad_norm: 0.8837173046781869, iteration: 157807
loss: 0.9742199778556824,grad_norm: 0.8701485708931858, iteration: 157808
loss: 1.0294345617294312,grad_norm: 0.9999992458974271, iteration: 157809
loss: 1.0180882215499878,grad_norm: 0.9862269003942429, iteration: 157810
loss: 1.0311360359191895,grad_norm: 0.9532069017909043, iteration: 157811
loss: 1.0115183591842651,grad_norm: 0.9879240423809517, iteration: 157812
loss: 1.053389310836792,grad_norm: 0.9999993727689878, iteration: 157813
loss: 0.9847167134284973,grad_norm: 0.9999989852593129, iteration: 157814
loss: 0.9716671109199524,grad_norm: 0.8801238619386541, iteration: 157815
loss: 0.9896224737167358,grad_norm: 0.9999991657193283, iteration: 157816
loss: 1.0070315599441528,grad_norm: 0.9999992461930934, iteration: 157817
loss: 1.1750812530517578,grad_norm: 0.9999993033319187, iteration: 157818
loss: 0.9818059802055359,grad_norm: 0.9999997583958238, iteration: 157819
loss: 1.008791208267212,grad_norm: 0.9946479711005737, iteration: 157820
loss: 0.9840044975280762,grad_norm: 0.9999991734028385, iteration: 157821
loss: 1.0189920663833618,grad_norm: 0.9999992774490284, iteration: 157822
loss: 1.067589521408081,grad_norm: 0.9999997398837364, iteration: 157823
loss: 0.9524023532867432,grad_norm: 0.9999992798245987, iteration: 157824
loss: 1.0213806629180908,grad_norm: 0.9999991641541938, iteration: 157825
loss: 1.0001277923583984,grad_norm: 0.9999990515387605, iteration: 157826
loss: 0.9791995286941528,grad_norm: 0.9164574566820256, iteration: 157827
loss: 1.0282169580459595,grad_norm: 0.8761289166963533, iteration: 157828
loss: 0.9834986329078674,grad_norm: 0.8556447871882272, iteration: 157829
loss: 1.0267462730407715,grad_norm: 0.9999990859504194, iteration: 157830
loss: 0.9781572818756104,grad_norm: 0.980667006653051, iteration: 157831
loss: 1.0136879682540894,grad_norm: 0.9999992296668995, iteration: 157832
loss: 1.0025794506072998,grad_norm: 0.9999989896591701, iteration: 157833
loss: 0.994897723197937,grad_norm: 0.9169109674575313, iteration: 157834
loss: 0.9951040744781494,grad_norm: 0.9999992858485328, iteration: 157835
loss: 1.002036452293396,grad_norm: 0.9999990583602529, iteration: 157836
loss: 1.0315340757369995,grad_norm: 0.9999991806685707, iteration: 157837
loss: 0.9720138907432556,grad_norm: 0.9999991410324538, iteration: 157838
loss: 0.998123288154602,grad_norm: 0.9443245032523203, iteration: 157839
loss: 0.9672191739082336,grad_norm: 0.9999990481563303, iteration: 157840
loss: 1.024829626083374,grad_norm: 0.9999999447426408, iteration: 157841
loss: 1.0110180377960205,grad_norm: 0.8756344666141461, iteration: 157842
loss: 1.0090993642807007,grad_norm: 0.9999991529837826, iteration: 157843
loss: 0.9898511171340942,grad_norm: 0.9999989502796047, iteration: 157844
loss: 0.9851727485656738,grad_norm: 0.9999991328779013, iteration: 157845
loss: 0.9701027870178223,grad_norm: 0.9999991174918993, iteration: 157846
loss: 0.962458074092865,grad_norm: 0.9999992652058464, iteration: 157847
loss: 0.9699234962463379,grad_norm: 0.9289966294952429, iteration: 157848
loss: 0.9845083951950073,grad_norm: 0.999999029591773, iteration: 157849
loss: 0.996160626411438,grad_norm: 0.9999991507451449, iteration: 157850
loss: 0.9758461713790894,grad_norm: 0.9207090746057696, iteration: 157851
loss: 1.041661262512207,grad_norm: 0.999999218533798, iteration: 157852
loss: 0.9774671196937561,grad_norm: 0.9591486620098283, iteration: 157853
loss: 0.9882080554962158,grad_norm: 0.9999998223156353, iteration: 157854
loss: 1.0046987533569336,grad_norm: 0.9999992857731046, iteration: 157855
loss: 1.0610395669937134,grad_norm: 0.9999991315614485, iteration: 157856
loss: 0.9962859153747559,grad_norm: 0.8586693404986995, iteration: 157857
loss: 0.9782474637031555,grad_norm: 0.8149011040708019, iteration: 157858
loss: 1.0022709369659424,grad_norm: 0.9999991786656713, iteration: 157859
loss: 1.02366042137146,grad_norm: 0.9999992296459348, iteration: 157860
loss: 1.0173461437225342,grad_norm: 0.9488900034058879, iteration: 157861
loss: 0.973857045173645,grad_norm: 0.9123261093216766, iteration: 157862
loss: 0.9820743799209595,grad_norm: 0.8857056567664462, iteration: 157863
loss: 0.9949740767478943,grad_norm: 0.8731519548857266, iteration: 157864
loss: 0.999625563621521,grad_norm: 0.9999994121790532, iteration: 157865
loss: 1.0054596662521362,grad_norm: 0.9999993108225753, iteration: 157866
loss: 1.0080844163894653,grad_norm: 0.9999991089945945, iteration: 157867
loss: 1.057081937789917,grad_norm: 0.9999991790311931, iteration: 157868
loss: 0.9940255284309387,grad_norm: 0.8671823972538124, iteration: 157869
loss: 1.0247794389724731,grad_norm: 0.9999990402044809, iteration: 157870
loss: 1.0111446380615234,grad_norm: 0.9999990643235652, iteration: 157871
loss: 0.9701396226882935,grad_norm: 0.9999990853488467, iteration: 157872
loss: 0.9924922585487366,grad_norm: 0.9269229847178292, iteration: 157873
loss: 0.961455225944519,grad_norm: 0.9999990148442001, iteration: 157874
loss: 0.9784303903579712,grad_norm: 0.9999994907074521, iteration: 157875
loss: 0.9957987666130066,grad_norm: 0.9966924455122992, iteration: 157876
loss: 1.0086584091186523,grad_norm: 0.9999990494981481, iteration: 157877
loss: 1.0633245706558228,grad_norm: 0.999999814203402, iteration: 157878
loss: 0.9688184857368469,grad_norm: 0.9999992596256713, iteration: 157879
loss: 1.0065886974334717,grad_norm: 0.9836261628947648, iteration: 157880
loss: 0.9821157455444336,grad_norm: 0.9633112941455486, iteration: 157881
loss: 0.9906313419342041,grad_norm: 0.9294031778836358, iteration: 157882
loss: 1.006007194519043,grad_norm: 0.9248269393360143, iteration: 157883
loss: 0.9726983308792114,grad_norm: 0.9635995107913788, iteration: 157884
loss: 1.0114052295684814,grad_norm: 0.9999992024880856, iteration: 157885
loss: 1.0129917860031128,grad_norm: 0.7605316267377527, iteration: 157886
loss: 1.025073766708374,grad_norm: 0.9999990352418662, iteration: 157887
loss: 0.9845865964889526,grad_norm: 0.9051736053767443, iteration: 157888
loss: 0.9801548719406128,grad_norm: 0.9414639100913369, iteration: 157889
loss: 1.0056407451629639,grad_norm: 0.9097767823727284, iteration: 157890
loss: 1.0267482995986938,grad_norm: 0.8529152257628027, iteration: 157891
loss: 1.0686533451080322,grad_norm: 0.9999993209438207, iteration: 157892
loss: 0.9697050452232361,grad_norm: 0.9999992992358899, iteration: 157893
loss: 0.9752480983734131,grad_norm: 0.9999991647714166, iteration: 157894
loss: 1.0360442399978638,grad_norm: 0.9999998712203324, iteration: 157895
loss: 1.005853295326233,grad_norm: 0.9999991803119618, iteration: 157896
loss: 0.9923108816146851,grad_norm: 0.9999990879765438, iteration: 157897
loss: 0.9837031960487366,grad_norm: 0.9999991084451904, iteration: 157898
loss: 1.0100327730178833,grad_norm: 0.9999992111124935, iteration: 157899
loss: 1.0094913244247437,grad_norm: 0.9999991361660018, iteration: 157900
loss: 1.0053155422210693,grad_norm: 0.8637139902989838, iteration: 157901
loss: 0.9554398655891418,grad_norm: 0.8922229914879154, iteration: 157902
loss: 1.0326929092407227,grad_norm: 0.9692166152695627, iteration: 157903
loss: 1.0152634382247925,grad_norm: 0.9999991617060829, iteration: 157904
loss: 0.9751167297363281,grad_norm: 0.9999992704942627, iteration: 157905
loss: 1.004650592803955,grad_norm: 0.8357817108729972, iteration: 157906
loss: 0.9811469912528992,grad_norm: 0.846788709501259, iteration: 157907
loss: 0.9717483520507812,grad_norm: 0.993157366400667, iteration: 157908
loss: 1.0594985485076904,grad_norm: 0.999999598380854, iteration: 157909
loss: 1.0075428485870361,grad_norm: 0.9083662706088862, iteration: 157910
loss: 0.9863530993461609,grad_norm: 0.8262629961912523, iteration: 157911
loss: 1.0188956260681152,grad_norm: 0.8449109547625276, iteration: 157912
loss: 0.9928765892982483,grad_norm: 0.9600430673715328, iteration: 157913
loss: 0.9882497191429138,grad_norm: 0.9946944648130308, iteration: 157914
loss: 1.0524959564208984,grad_norm: 0.9999998161565116, iteration: 157915
loss: 1.027295470237732,grad_norm: 0.9999990150179283, iteration: 157916
loss: 1.0934779644012451,grad_norm: 0.9999993374176807, iteration: 157917
loss: 1.0290635824203491,grad_norm: 0.9999993528825185, iteration: 157918
loss: 1.0102064609527588,grad_norm: 0.9999990559710517, iteration: 157919
loss: 1.0088914632797241,grad_norm: 0.9831284801940926, iteration: 157920
loss: 0.9687243103981018,grad_norm: 0.9999989506051183, iteration: 157921
loss: 1.016785740852356,grad_norm: 0.9999991802551488, iteration: 157922
loss: 1.0162107944488525,grad_norm: 0.999999204367327, iteration: 157923
loss: 0.9964417219161987,grad_norm: 0.9528570913262907, iteration: 157924
loss: 0.9941797256469727,grad_norm: 0.9999990834675518, iteration: 157925
loss: 0.9951668381690979,grad_norm: 0.9999992739219914, iteration: 157926
loss: 1.0065295696258545,grad_norm: 0.999999031020861, iteration: 157927
loss: 1.0410962104797363,grad_norm: 0.9872348097694202, iteration: 157928
loss: 0.9625217914581299,grad_norm: 0.961693055279515, iteration: 157929
loss: 0.993712842464447,grad_norm: 0.8834311633236654, iteration: 157930
loss: 0.978346049785614,grad_norm: 0.9999991659892871, iteration: 157931
loss: 1.0443094968795776,grad_norm: 0.9999991769847656, iteration: 157932
loss: 1.0090258121490479,grad_norm: 0.9486364084835412, iteration: 157933
loss: 1.0005117654800415,grad_norm: 0.9999991324497657, iteration: 157934
loss: 1.0203626155853271,grad_norm: 0.9999993965646135, iteration: 157935
loss: 0.9693264961242676,grad_norm: 0.9520374630683655, iteration: 157936
loss: 1.005710244178772,grad_norm: 0.8020586220456193, iteration: 157937
loss: 1.0101361274719238,grad_norm: 0.9999994207302673, iteration: 157938
loss: 1.0021339654922485,grad_norm: 0.8931831775277832, iteration: 157939
loss: 0.9755569100379944,grad_norm: 0.8856910212959129, iteration: 157940
loss: 0.9984059929847717,grad_norm: 0.9999992685547714, iteration: 157941
loss: 0.9878008365631104,grad_norm: 0.999999010006027, iteration: 157942
loss: 0.9970151782035828,grad_norm: 0.9532573835620619, iteration: 157943
loss: 1.0371952056884766,grad_norm: 0.9999992707679356, iteration: 157944
loss: 1.0227490663528442,grad_norm: 0.9999992757212821, iteration: 157945
loss: 1.0713930130004883,grad_norm: 0.9999992302029593, iteration: 157946
loss: 0.9617593884468079,grad_norm: 0.9999990743274885, iteration: 157947
loss: 0.9774132966995239,grad_norm: 0.9999990739118509, iteration: 157948
loss: 1.061846137046814,grad_norm: 0.9999991333498478, iteration: 157949
loss: 1.0523144006729126,grad_norm: 0.9999992176776452, iteration: 157950
loss: 1.0043987035751343,grad_norm: 0.9999992044528289, iteration: 157951
loss: 1.0370732545852661,grad_norm: 0.9999997620312711, iteration: 157952
loss: 1.0641502141952515,grad_norm: 0.9414543584005108, iteration: 157953
loss: 0.9908299446105957,grad_norm: 0.8931444509910909, iteration: 157954
loss: 1.0160300731658936,grad_norm: 0.9999993621632678, iteration: 157955
loss: 1.039629578590393,grad_norm: 0.9999991364605896, iteration: 157956
loss: 1.0013102293014526,grad_norm: 0.9627994937753928, iteration: 157957
loss: 1.032668948173523,grad_norm: 0.9999993668360847, iteration: 157958
loss: 0.9919160604476929,grad_norm: 0.929135874235831, iteration: 157959
loss: 0.9819982647895813,grad_norm: 0.9999992779468828, iteration: 157960
loss: 1.1133071184158325,grad_norm: 0.9999998731006461, iteration: 157961
loss: 0.9916006326675415,grad_norm: 0.9999991463198662, iteration: 157962
loss: 0.9896766543388367,grad_norm: 0.9999991112119823, iteration: 157963
loss: 1.011311650276184,grad_norm: 0.9545784889894651, iteration: 157964
loss: 0.9349018335342407,grad_norm: 0.9080777876303937, iteration: 157965
loss: 1.0305417776107788,grad_norm: 0.9999990669715653, iteration: 157966
loss: 1.026655673980713,grad_norm: 0.9713834466080639, iteration: 157967
loss: 1.016303300857544,grad_norm: 0.9999991511869603, iteration: 157968
loss: 0.9742029309272766,grad_norm: 0.984286588547557, iteration: 157969
loss: 1.009351134300232,grad_norm: 0.99999911457017, iteration: 157970
loss: 1.0036934614181519,grad_norm: 0.9999991498850949, iteration: 157971
loss: 1.0020372867584229,grad_norm: 0.999999072934809, iteration: 157972
loss: 1.0549471378326416,grad_norm: 0.9999999070133231, iteration: 157973
loss: 0.9888148903846741,grad_norm: 0.8862855005498684, iteration: 157974
loss: 0.9699867963790894,grad_norm: 0.8623849362758003, iteration: 157975
loss: 0.9683024883270264,grad_norm: 0.9657276131174892, iteration: 157976
loss: 0.9395686388015747,grad_norm: 0.9999990588806392, iteration: 157977
loss: 0.9916841387748718,grad_norm: 0.9695930337040691, iteration: 157978
loss: 1.0051493644714355,grad_norm: 0.9999992138082244, iteration: 157979
loss: 1.0322833061218262,grad_norm: 0.9999989925322486, iteration: 157980
loss: 0.9765008687973022,grad_norm: 0.999999044861775, iteration: 157981
loss: 1.0159149169921875,grad_norm: 0.99999905636594, iteration: 157982
loss: 1.0474469661712646,grad_norm: 0.9999998500999692, iteration: 157983
loss: 1.0531845092773438,grad_norm: 0.9999991506003774, iteration: 157984
loss: 1.0116908550262451,grad_norm: 0.8984416522999878, iteration: 157985
loss: 0.9842392802238464,grad_norm: 0.9999992454495898, iteration: 157986
loss: 1.0149269104003906,grad_norm: 0.9226466131437444, iteration: 157987
loss: 0.9998487234115601,grad_norm: 0.9999991039111653, iteration: 157988
loss: 1.0452755689620972,grad_norm: 0.9999995993799232, iteration: 157989
loss: 1.0001190900802612,grad_norm: 0.999999087123849, iteration: 157990
loss: 1.0181504487991333,grad_norm: 0.999999858450623, iteration: 157991
loss: 0.9935435652732849,grad_norm: 0.9999991705558454, iteration: 157992
loss: 0.9937618970870972,grad_norm: 0.938858851938745, iteration: 157993
loss: 1.01002037525177,grad_norm: 0.9418101898966962, iteration: 157994
loss: 0.9562869668006897,grad_norm: 0.9827526246891964, iteration: 157995
loss: 1.025870442390442,grad_norm: 0.9999990128507217, iteration: 157996
loss: 1.030640959739685,grad_norm: 0.9999994139781566, iteration: 157997
loss: 1.026462435722351,grad_norm: 0.9999989945993162, iteration: 157998
loss: 1.0057523250579834,grad_norm: 0.9999991054751025, iteration: 157999
loss: 0.9924446940422058,grad_norm: 0.9999992705810381, iteration: 158000
loss: 1.010116457939148,grad_norm: 0.9077988333331154, iteration: 158001
loss: 0.9998930096626282,grad_norm: 0.8256879203108288, iteration: 158002
loss: 1.0165081024169922,grad_norm: 0.9548307282710705, iteration: 158003
loss: 1.007408618927002,grad_norm: 0.9999990220881496, iteration: 158004
loss: 0.9767959713935852,grad_norm: 0.9275005392859519, iteration: 158005
loss: 1.0403423309326172,grad_norm: 0.8316062358846228, iteration: 158006
loss: 1.0002728700637817,grad_norm: 0.9999991851792062, iteration: 158007
loss: 1.0309003591537476,grad_norm: 0.9999990399932673, iteration: 158008
loss: 0.9784725904464722,grad_norm: 0.9186718248578807, iteration: 158009
loss: 1.015326738357544,grad_norm: 0.8589957198574352, iteration: 158010
loss: 0.9870379567146301,grad_norm: 0.9999990841112479, iteration: 158011
loss: 1.0136685371398926,grad_norm: 0.9814400659319167, iteration: 158012
loss: 1.0257006883621216,grad_norm: 0.9780382108001777, iteration: 158013
loss: 0.9950332641601562,grad_norm: 0.9327612391468889, iteration: 158014
loss: 1.035272240638733,grad_norm: 0.9990261110509848, iteration: 158015
loss: 1.014522910118103,grad_norm: 0.8620558265518939, iteration: 158016
loss: 0.9820782542228699,grad_norm: 0.9999992051229866, iteration: 158017
loss: 0.9867759346961975,grad_norm: 0.9999989752511615, iteration: 158018
loss: 0.9995065927505493,grad_norm: 0.9749672003080293, iteration: 158019
loss: 1.0075987577438354,grad_norm: 0.9470955851889151, iteration: 158020
loss: 1.0015758275985718,grad_norm: 0.999999168087658, iteration: 158021
loss: 1.0518224239349365,grad_norm: 0.999999286645678, iteration: 158022
loss: 0.9723639488220215,grad_norm: 0.9445323438801694, iteration: 158023
loss: 1.0192451477050781,grad_norm: 0.9070458144457111, iteration: 158024
loss: 0.9765722155570984,grad_norm: 0.9999991777024277, iteration: 158025
loss: 1.0782707929611206,grad_norm: 0.9999991029177018, iteration: 158026
loss: 0.9857097268104553,grad_norm: 0.9298815577339419, iteration: 158027
loss: 1.0194365978240967,grad_norm: 0.9999991067995181, iteration: 158028
loss: 1.0162562131881714,grad_norm: 0.9999990523737398, iteration: 158029
loss: 1.0234220027923584,grad_norm: 0.9999991847183087, iteration: 158030
loss: 1.0353411436080933,grad_norm: 0.9377079958386232, iteration: 158031
loss: 1.0246912240982056,grad_norm: 0.9999992288841274, iteration: 158032
loss: 0.9905051589012146,grad_norm: 0.9999991910599311, iteration: 158033
loss: 0.9381912350654602,grad_norm: 0.9846504466188117, iteration: 158034
loss: 0.9846581220626831,grad_norm: 0.9999989221063611, iteration: 158035
loss: 0.9999590516090393,grad_norm: 0.7487521837670804, iteration: 158036
loss: 0.97950679063797,grad_norm: 0.9999991094547527, iteration: 158037
loss: 1.0054514408111572,grad_norm: 0.9999990525287835, iteration: 158038
loss: 1.015191912651062,grad_norm: 0.8545498207972303, iteration: 158039
loss: 1.0480992794036865,grad_norm: 0.9999998872002714, iteration: 158040
loss: 1.0022319555282593,grad_norm: 0.999999233369886, iteration: 158041
loss: 0.9717473983764648,grad_norm: 0.9999992600927178, iteration: 158042
loss: 1.0348479747772217,grad_norm: 0.9999998227623401, iteration: 158043
loss: 0.9815495610237122,grad_norm: 0.9027928406860443, iteration: 158044
loss: 1.0010968446731567,grad_norm: 0.9999990442144977, iteration: 158045
loss: 0.9870612621307373,grad_norm: 0.9342957809642143, iteration: 158046
loss: 1.0147089958190918,grad_norm: 0.9999990083867414, iteration: 158047
loss: 1.0219776630401611,grad_norm: 0.9999991141889782, iteration: 158048
loss: 0.9699361324310303,grad_norm: 0.9999990673520456, iteration: 158049
loss: 1.0182005167007446,grad_norm: 0.7491199098473417, iteration: 158050
loss: 0.987179696559906,grad_norm: 0.9998614737648683, iteration: 158051
loss: 0.9794836044311523,grad_norm: 0.9833286849542745, iteration: 158052
loss: 0.9850913882255554,grad_norm: 0.9999989967591966, iteration: 158053
loss: 1.0323739051818848,grad_norm: 0.9999990971971923, iteration: 158054
loss: 1.0093064308166504,grad_norm: 0.9999996272783183, iteration: 158055
loss: 1.0021110773086548,grad_norm: 0.9999990160185731, iteration: 158056
loss: 1.0094177722930908,grad_norm: 0.9999993204587625, iteration: 158057
loss: 0.9938134551048279,grad_norm: 0.999999167249144, iteration: 158058
loss: 0.9879489541053772,grad_norm: 0.9999990878916495, iteration: 158059
loss: 1.0242562294006348,grad_norm: 0.9747306800203575, iteration: 158060
loss: 0.9789233803749084,grad_norm: 0.9999991492841862, iteration: 158061
loss: 0.9718133807182312,grad_norm: 0.9288995987103301, iteration: 158062
loss: 1.0204768180847168,grad_norm: 0.9838473991334651, iteration: 158063
loss: 1.049375295639038,grad_norm: 0.9999997535143625, iteration: 158064
loss: 0.9802075624465942,grad_norm: 0.9930069765359237, iteration: 158065
loss: 0.9577394127845764,grad_norm: 0.9999991636866868, iteration: 158066
loss: 1.0325242280960083,grad_norm: 0.9999990938155306, iteration: 158067
loss: 1.037485122680664,grad_norm: 0.9577096644254098, iteration: 158068
loss: 1.0091785192489624,grad_norm: 0.9836391196434541, iteration: 158069
loss: 1.0168241262435913,grad_norm: 0.9999992413721973, iteration: 158070
loss: 1.0391467809677124,grad_norm: 0.9999991824869998, iteration: 158071
loss: 0.9874957203865051,grad_norm: 0.9999989900782323, iteration: 158072
loss: 0.9939064979553223,grad_norm: 0.9864378477324846, iteration: 158073
loss: 1.0296109914779663,grad_norm: 0.9999993395716166, iteration: 158074
loss: 1.0119469165802002,grad_norm: 0.8123807629939278, iteration: 158075
loss: 1.0317459106445312,grad_norm: 0.9705093522519647, iteration: 158076
loss: 0.9998672604560852,grad_norm: 0.954403696605679, iteration: 158077
loss: 0.9875115156173706,grad_norm: 0.999998917745579, iteration: 158078
loss: 0.994568407535553,grad_norm: 0.9999992436649161, iteration: 158079
loss: 1.0099773406982422,grad_norm: 0.9999993198959549, iteration: 158080
loss: 0.9999737739562988,grad_norm: 0.999999257206956, iteration: 158081
loss: 0.9893220067024231,grad_norm: 0.9320730061801049, iteration: 158082
loss: 1.0151829719543457,grad_norm: 0.9999991120118712, iteration: 158083
loss: 0.9895510077476501,grad_norm: 0.9999990859912208, iteration: 158084
loss: 1.0335811376571655,grad_norm: 0.9999989380380423, iteration: 158085
loss: 0.9924554228782654,grad_norm: 0.9999990832506456, iteration: 158086
loss: 0.9684154987335205,grad_norm: 0.9999990054344867, iteration: 158087
loss: 1.0001157522201538,grad_norm: 0.963800445128535, iteration: 158088
loss: 1.0025893449783325,grad_norm: 0.9999989795859575, iteration: 158089
loss: 0.9818316102027893,grad_norm: 0.9999991990825811, iteration: 158090
loss: 1.006109356880188,grad_norm: 0.9999991630198161, iteration: 158091
loss: 0.9963534474372864,grad_norm: 0.9192396013680726, iteration: 158092
loss: 0.9863494038581848,grad_norm: 0.9999992011264685, iteration: 158093
loss: 0.9970674514770508,grad_norm: 0.9999993326950094, iteration: 158094
loss: 0.9753662943840027,grad_norm: 0.999999140754784, iteration: 158095
loss: 1.053727149963379,grad_norm: 0.9999997306058749, iteration: 158096
loss: 0.9853321313858032,grad_norm: 0.7788390607029904, iteration: 158097
loss: 0.9877480864524841,grad_norm: 0.9039449016612432, iteration: 158098
loss: 1.0107219219207764,grad_norm: 0.9999991893462703, iteration: 158099
loss: 0.990943431854248,grad_norm: 0.9774454277757817, iteration: 158100
loss: 1.007347583770752,grad_norm: 0.9999992530435974, iteration: 158101
loss: 1.0022993087768555,grad_norm: 0.9999990812072178, iteration: 158102
loss: 1.0167744159698486,grad_norm: 0.8318539816945307, iteration: 158103
loss: 0.9989583492279053,grad_norm: 0.8687388645712046, iteration: 158104
loss: 1.025296926498413,grad_norm: 0.8807276909229995, iteration: 158105
loss: 0.9981647729873657,grad_norm: 0.9293332018225409, iteration: 158106
loss: 0.9930200576782227,grad_norm: 0.9999993085551038, iteration: 158107
loss: 0.9767715930938721,grad_norm: 0.958579773346972, iteration: 158108
loss: 0.9943985939025879,grad_norm: 0.9999990855784551, iteration: 158109
loss: 1.0134265422821045,grad_norm: 0.9999991977715089, iteration: 158110
loss: 1.0483505725860596,grad_norm: 0.999999144691017, iteration: 158111
loss: 0.9989689588546753,grad_norm: 0.8501933746729144, iteration: 158112
loss: 1.0267232656478882,grad_norm: 0.9507286051496353, iteration: 158113
loss: 1.0088961124420166,grad_norm: 0.9999991274376139, iteration: 158114
loss: 0.9781081676483154,grad_norm: 0.8591717034835026, iteration: 158115
loss: 0.9805100560188293,grad_norm: 0.813395091826726, iteration: 158116
loss: 1.0234497785568237,grad_norm: 0.9690755196196056, iteration: 158117
loss: 0.9710835814476013,grad_norm: 0.892679372638977, iteration: 158118
loss: 1.0197211503982544,grad_norm: 0.9252644769276981, iteration: 158119
loss: 0.9955288171768188,grad_norm: 0.9999993379237725, iteration: 158120
loss: 0.9630451798439026,grad_norm: 0.9999991869696866, iteration: 158121
loss: 0.9962636828422546,grad_norm: 0.9314740740667651, iteration: 158122
loss: 0.9941680431365967,grad_norm: 0.7851886334182732, iteration: 158123
loss: 1.0095925331115723,grad_norm: 0.98322150793388, iteration: 158124
loss: 0.9712826609611511,grad_norm: 0.9327086693181122, iteration: 158125
loss: 0.9742850065231323,grad_norm: 0.9999991037969482, iteration: 158126
loss: 0.9632363319396973,grad_norm: 0.9416772902591124, iteration: 158127
loss: 0.9832388758659363,grad_norm: 0.990568904647664, iteration: 158128
loss: 1.0232396125793457,grad_norm: 0.9042794357285591, iteration: 158129
loss: 1.0232574939727783,grad_norm: 0.9999991843050794, iteration: 158130
loss: 1.0184836387634277,grad_norm: 0.968287684913907, iteration: 158131
loss: 0.9881389737129211,grad_norm: 0.9999990048217087, iteration: 158132
loss: 1.0131455659866333,grad_norm: 0.9378844932573448, iteration: 158133
loss: 0.9684714078903198,grad_norm: 0.9210472577555758, iteration: 158134
loss: 1.0460381507873535,grad_norm: 0.9999991817226634, iteration: 158135
loss: 1.0070685148239136,grad_norm: 0.9999990765701755, iteration: 158136
loss: 0.9818755984306335,grad_norm: 0.9219426937134683, iteration: 158137
loss: 1.0108981132507324,grad_norm: 0.9999991476580546, iteration: 158138
loss: 1.0006530284881592,grad_norm: 0.9999992384857448, iteration: 158139
loss: 1.0269381999969482,grad_norm: 0.9678193862435492, iteration: 158140
loss: 0.9958458542823792,grad_norm: 0.9999990375410864, iteration: 158141
loss: 0.9909034371376038,grad_norm: 0.9999990759577367, iteration: 158142
loss: 1.0288459062576294,grad_norm: 0.854353107157002, iteration: 158143
loss: 1.0372507572174072,grad_norm: 0.9999998359762658, iteration: 158144
loss: 0.9567450284957886,grad_norm: 0.9999990358815162, iteration: 158145
loss: 1.0047048330307007,grad_norm: 0.9857755667970183, iteration: 158146
loss: 1.017296552658081,grad_norm: 0.9999992101768099, iteration: 158147
loss: 1.0097355842590332,grad_norm: 0.9999990880685118, iteration: 158148
loss: 1.0135431289672852,grad_norm: 0.9506926227020914, iteration: 158149
loss: 0.9847145080566406,grad_norm: 0.7275260241351712, iteration: 158150
loss: 0.9819746613502502,grad_norm: 0.9999990995087019, iteration: 158151
loss: 1.0061933994293213,grad_norm: 0.9999992138180465, iteration: 158152
loss: 1.0004011392593384,grad_norm: 0.8837518303811306, iteration: 158153
loss: 1.0304179191589355,grad_norm: 0.9999990649066677, iteration: 158154
loss: 1.0193190574645996,grad_norm: 0.9999992172409907, iteration: 158155
loss: 1.0094444751739502,grad_norm: 0.9999991185481645, iteration: 158156
loss: 0.9738264679908752,grad_norm: 0.9999989965318171, iteration: 158157
loss: 1.0039197206497192,grad_norm: 0.9999995837133496, iteration: 158158
loss: 1.0293055772781372,grad_norm: 0.9999991205193387, iteration: 158159
loss: 1.0263643264770508,grad_norm: 0.9999991350252788, iteration: 158160
loss: 1.0354065895080566,grad_norm: 0.9999990446614249, iteration: 158161
loss: 0.9931274652481079,grad_norm: 0.9420217255540663, iteration: 158162
loss: 1.004318356513977,grad_norm: 0.8572919589787676, iteration: 158163
loss: 1.0035884380340576,grad_norm: 0.9999991380476311, iteration: 158164
loss: 0.9911261200904846,grad_norm: 0.9021709929746162, iteration: 158165
loss: 1.0087169408798218,grad_norm: 0.9689316366235063, iteration: 158166
loss: 1.0167466402053833,grad_norm: 0.999999057364206, iteration: 158167
loss: 1.0328384637832642,grad_norm: 0.999999090051112, iteration: 158168
loss: 1.0094655752182007,grad_norm: 0.9214855582098591, iteration: 158169
loss: 1.0091181993484497,grad_norm: 0.9999991137314823, iteration: 158170
loss: 0.9965760707855225,grad_norm: 0.9500921113658969, iteration: 158171
loss: 0.9844191074371338,grad_norm: 0.9999992713990153, iteration: 158172
loss: 0.9908398389816284,grad_norm: 0.9829080784079604, iteration: 158173
loss: 1.0065102577209473,grad_norm: 0.9988323523650608, iteration: 158174
loss: 0.9633403420448303,grad_norm: 0.999999137679312, iteration: 158175
loss: 0.9988178610801697,grad_norm: 0.9999991118212617, iteration: 158176
loss: 1.0100704431533813,grad_norm: 0.9833174558661842, iteration: 158177
loss: 0.9869189262390137,grad_norm: 0.9778691572617549, iteration: 158178
loss: 1.004118800163269,grad_norm: 0.9999991593044053, iteration: 158179
loss: 0.9900146126747131,grad_norm: 0.8876001706375682, iteration: 158180
loss: 1.0000852346420288,grad_norm: 0.8539553384302978, iteration: 158181
loss: 1.0030375719070435,grad_norm: 0.9999992219748667, iteration: 158182
loss: 0.9912765026092529,grad_norm: 0.9999992184982357, iteration: 158183
loss: 0.991176962852478,grad_norm: 0.999999212127369, iteration: 158184
loss: 1.0127161741256714,grad_norm: 0.9999991435935602, iteration: 158185
loss: 1.003838062286377,grad_norm: 0.938388949416454, iteration: 158186
loss: 0.9923496246337891,grad_norm: 0.9636857475630626, iteration: 158187
loss: 0.9649486541748047,grad_norm: 0.986811375532678, iteration: 158188
loss: 1.011651635169983,grad_norm: 0.9999992583911247, iteration: 158189
loss: 0.9979564547538757,grad_norm: 0.9882223545338571, iteration: 158190
loss: 1.0169533491134644,grad_norm: 0.9880756337346273, iteration: 158191
loss: 0.99092036485672,grad_norm: 0.9769654423829823, iteration: 158192
loss: 1.0308811664581299,grad_norm: 0.908908705004362, iteration: 158193
loss: 0.9960200786590576,grad_norm: 0.9999992190858336, iteration: 158194
loss: 0.9897546172142029,grad_norm: 0.9756310816376729, iteration: 158195
loss: 0.9960644245147705,grad_norm: 0.9999992532917144, iteration: 158196
loss: 0.9858549237251282,grad_norm: 0.999999294630688, iteration: 158197
loss: 1.0118772983551025,grad_norm: 0.9999992169479205, iteration: 158198
loss: 1.022274374961853,grad_norm: 0.9708610097906493, iteration: 158199
loss: 1.0000582933425903,grad_norm: 0.7704410406564853, iteration: 158200
loss: 0.9926956295967102,grad_norm: 0.8730985896888969, iteration: 158201
loss: 0.9491857886314392,grad_norm: 0.9622371632666822, iteration: 158202
loss: 0.978477954864502,grad_norm: 0.9999991867664952, iteration: 158203
loss: 1.0230143070220947,grad_norm: 0.9227502720268261, iteration: 158204
loss: 0.9859960079193115,grad_norm: 0.9999990424418355, iteration: 158205
loss: 1.00413179397583,grad_norm: 0.9390870689697791, iteration: 158206
loss: 1.0789775848388672,grad_norm: 0.9999997136000573, iteration: 158207
loss: 0.9800785183906555,grad_norm: 0.9999992149408471, iteration: 158208
loss: 0.9827663898468018,grad_norm: 0.9999990705163622, iteration: 158209
loss: 0.9973902106285095,grad_norm: 0.9998332551271697, iteration: 158210
loss: 1.0047885179519653,grad_norm: 0.9999992372440467, iteration: 158211
loss: 1.0448342561721802,grad_norm: 0.9999991554726217, iteration: 158212
loss: 1.0117409229278564,grad_norm: 0.9999990932886053, iteration: 158213
loss: 0.9873332381248474,grad_norm: 0.9999992614429942, iteration: 158214
loss: 1.0084662437438965,grad_norm: 0.9999989676752672, iteration: 158215
loss: 0.9797624945640564,grad_norm: 0.9999991560030941, iteration: 158216
loss: 1.0112907886505127,grad_norm: 0.9999991811261627, iteration: 158217
loss: 1.027440071105957,grad_norm: 0.9230219720825227, iteration: 158218
loss: 1.006683588027954,grad_norm: 0.8536766054419543, iteration: 158219
loss: 0.9991109371185303,grad_norm: 0.8666211319431878, iteration: 158220
loss: 1.0391160249710083,grad_norm: 0.9999996060214914, iteration: 158221
loss: 0.9937222003936768,grad_norm: 0.99999927431349, iteration: 158222
loss: 1.0187790393829346,grad_norm: 0.9020242526013865, iteration: 158223
loss: 1.0112558603286743,grad_norm: 0.816022871729181, iteration: 158224
loss: 0.9956071376800537,grad_norm: 0.9685081188393796, iteration: 158225
loss: 0.9906130433082581,grad_norm: 0.9999992029262147, iteration: 158226
loss: 1.0002799034118652,grad_norm: 0.8897552963335508, iteration: 158227
loss: 1.01143217086792,grad_norm: 0.9644931986120764, iteration: 158228
loss: 1.0088659524917603,grad_norm: 0.9623581084448873, iteration: 158229
loss: 0.9765915870666504,grad_norm: 0.9747719984294249, iteration: 158230
loss: 0.9477882981300354,grad_norm: 0.8898412688293651, iteration: 158231
loss: 0.9766868352890015,grad_norm: 0.9999991357760486, iteration: 158232
loss: 0.9855380058288574,grad_norm: 0.9999991013999782, iteration: 158233
loss: 1.063411831855774,grad_norm: 0.9999993367532228, iteration: 158234
loss: 1.0392065048217773,grad_norm: 0.9999995799443989, iteration: 158235
loss: 1.0188885927200317,grad_norm: 0.9678837041488835, iteration: 158236
loss: 0.992304801940918,grad_norm: 0.9999991963641031, iteration: 158237
loss: 1.0174176692962646,grad_norm: 0.9999991851632349, iteration: 158238
loss: 1.0008118152618408,grad_norm: 0.9999992780913901, iteration: 158239
loss: 0.9839490652084351,grad_norm: 0.9999991325563723, iteration: 158240
loss: 0.9947404265403748,grad_norm: 0.9999990751336443, iteration: 158241
loss: 0.9866354465484619,grad_norm: 0.9999991256217259, iteration: 158242
loss: 0.9876683950424194,grad_norm: 0.9999992172411675, iteration: 158243
loss: 1.0160144567489624,grad_norm: 0.9350344858141638, iteration: 158244
loss: 1.0106208324432373,grad_norm: 0.9999993053883418, iteration: 158245
loss: 0.9724889993667603,grad_norm: 0.9999991719728533, iteration: 158246
loss: 1.0114160776138306,grad_norm: 0.9159255651246306, iteration: 158247
loss: 1.0078063011169434,grad_norm: 0.9247123861881076, iteration: 158248
loss: 1.0259546041488647,grad_norm: 0.999999242696602, iteration: 158249
loss: 1.0091254711151123,grad_norm: 0.9990089257613436, iteration: 158250
loss: 1.0033388137817383,grad_norm: 0.8868840751067506, iteration: 158251
loss: 1.0740587711334229,grad_norm: 0.9999997096636933, iteration: 158252
loss: 0.9722040891647339,grad_norm: 0.999999225003507, iteration: 158253
loss: 0.9988929629325867,grad_norm: 0.9999991137280597, iteration: 158254
loss: 0.9950302243232727,grad_norm: 0.9869596625471566, iteration: 158255
loss: 0.9975354671478271,grad_norm: 0.9825573327260235, iteration: 158256
loss: 1.0028550624847412,grad_norm: 0.9914980023428734, iteration: 158257
loss: 0.9536867737770081,grad_norm: 0.9099837695567393, iteration: 158258
loss: 0.9872552752494812,grad_norm: 0.9700359526625613, iteration: 158259
loss: 1.0609098672866821,grad_norm: 0.9999990951259157, iteration: 158260
loss: 0.9844433665275574,grad_norm: 0.9999992383672435, iteration: 158261
loss: 1.0216444730758667,grad_norm: 0.9999993698853598, iteration: 158262
loss: 0.9830528497695923,grad_norm: 0.9999991550246846, iteration: 158263
loss: 0.9903163909912109,grad_norm: 0.9999992852701195, iteration: 158264
loss: 1.0007249116897583,grad_norm: 0.9999991369821349, iteration: 158265
loss: 0.9825945496559143,grad_norm: 0.999999131887481, iteration: 158266
loss: 1.0242561101913452,grad_norm: 0.9460430233234152, iteration: 158267
loss: 0.977033793926239,grad_norm: 0.9999990778605694, iteration: 158268
loss: 1.0539271831512451,grad_norm: 0.9999993002748456, iteration: 158269
loss: 0.9526275396347046,grad_norm: 0.9828580118499326, iteration: 158270
loss: 1.0193413496017456,grad_norm: 0.9999995713681632, iteration: 158271
loss: 1.023885726928711,grad_norm: 0.9734912140301397, iteration: 158272
loss: 0.9528650045394897,grad_norm: 0.9999991334400106, iteration: 158273
loss: 1.0093884468078613,grad_norm: 0.999999154171727, iteration: 158274
loss: 0.9667424559593201,grad_norm: 0.9260006171850838, iteration: 158275
loss: 0.9879209399223328,grad_norm: 0.9999991232968555, iteration: 158276
loss: 1.005190372467041,grad_norm: 0.9731163341009972, iteration: 158277
loss: 1.012340784072876,grad_norm: 0.8263020517712164, iteration: 158278
loss: 1.0081326961517334,grad_norm: 0.9999990991139958, iteration: 158279
loss: 1.0056302547454834,grad_norm: 0.8323039181738162, iteration: 158280
loss: 1.0190743207931519,grad_norm: 0.9999991189638283, iteration: 158281
loss: 0.9783967137336731,grad_norm: 0.9999992397080011, iteration: 158282
loss: 0.9957367181777954,grad_norm: 0.9999989673226897, iteration: 158283
loss: 0.9993367791175842,grad_norm: 0.9999992152827755, iteration: 158284
loss: 1.0000765323638916,grad_norm: 0.9999991554244348, iteration: 158285
loss: 1.0449219942092896,grad_norm: 0.9999991512850935, iteration: 158286
loss: 1.0139144659042358,grad_norm: 0.9029233764130189, iteration: 158287
loss: 1.0469613075256348,grad_norm: 0.9999998151389315, iteration: 158288
loss: 0.9900375604629517,grad_norm: 0.9999990920158699, iteration: 158289
loss: 0.9850674271583557,grad_norm: 0.9759659402394085, iteration: 158290
loss: 1.0053353309631348,grad_norm: 0.9999989569575433, iteration: 158291
loss: 1.00327730178833,grad_norm: 0.9786676263061488, iteration: 158292
loss: 0.99405437707901,grad_norm: 0.9999992550015255, iteration: 158293
loss: 0.9646220803260803,grad_norm: 0.9823281885461844, iteration: 158294
loss: 0.9854198098182678,grad_norm: 0.9488468041187013, iteration: 158295
loss: 1.0031055212020874,grad_norm: 0.9999994460326738, iteration: 158296
loss: 0.9761794209480286,grad_norm: 0.9391891641302984, iteration: 158297
loss: 0.975665807723999,grad_norm: 0.8697125066685867, iteration: 158298
loss: 0.981844425201416,grad_norm: 0.9317173043885294, iteration: 158299
loss: 1.0523779392242432,grad_norm: 0.9543339435078617, iteration: 158300
loss: 1.0147589445114136,grad_norm: 0.9999991804648276, iteration: 158301
loss: 1.0092757940292358,grad_norm: 0.93793086883469, iteration: 158302
loss: 0.9666666388511658,grad_norm: 0.8990058528033448, iteration: 158303
loss: 1.0070490837097168,grad_norm: 0.9945264637207811, iteration: 158304
loss: 1.0393211841583252,grad_norm: 0.9999991878008255, iteration: 158305
loss: 0.987235426902771,grad_norm: 0.937744325746067, iteration: 158306
loss: 0.9911255240440369,grad_norm: 0.9999992722726302, iteration: 158307
loss: 1.0028386116027832,grad_norm: 0.8649221320802185, iteration: 158308
loss: 1.0045088529586792,grad_norm: 0.9235920352065298, iteration: 158309
loss: 0.9559390544891357,grad_norm: 0.9999989257676057, iteration: 158310
loss: 1.059038758277893,grad_norm: 0.9999992284922317, iteration: 158311
loss: 0.9972458481788635,grad_norm: 0.9999990694466047, iteration: 158312
loss: 1.0246529579162598,grad_norm: 0.9704460982306099, iteration: 158313
loss: 1.046186923980713,grad_norm: 0.9622695391302933, iteration: 158314
loss: 1.008721113204956,grad_norm: 0.9562936808337802, iteration: 158315
loss: 1.0705944299697876,grad_norm: 0.9999994123831466, iteration: 158316
loss: 0.9806377291679382,grad_norm: 0.924323081705715, iteration: 158317
loss: 1.0359296798706055,grad_norm: 0.999999184182576, iteration: 158318
loss: 0.9543989300727844,grad_norm: 0.9999992093334265, iteration: 158319
loss: 1.0200488567352295,grad_norm: 0.9999991195884411, iteration: 158320
loss: 1.0233769416809082,grad_norm: 0.9999990730060462, iteration: 158321
loss: 0.9694120287895203,grad_norm: 0.9750718695396596, iteration: 158322
loss: 0.9980078339576721,grad_norm: 0.7323674479648139, iteration: 158323
loss: 1.0162792205810547,grad_norm: 0.9967121268140807, iteration: 158324
loss: 1.0525884628295898,grad_norm: 0.8094274080860707, iteration: 158325
loss: 1.0468546152114868,grad_norm: 0.9999995926693124, iteration: 158326
loss: 0.9964900612831116,grad_norm: 0.9196189726635372, iteration: 158327
loss: 1.0088865756988525,grad_norm: 0.9999991455529226, iteration: 158328
loss: 0.9898247122764587,grad_norm: 0.9999991566009125, iteration: 158329
loss: 0.9831680655479431,grad_norm: 0.9999989955835591, iteration: 158330
loss: 0.9779708981513977,grad_norm: 0.9999994481573153, iteration: 158331
loss: 0.9844728112220764,grad_norm: 0.999999113279536, iteration: 158332
loss: 1.0097335577011108,grad_norm: 0.9999992555289003, iteration: 158333
loss: 0.9999423623085022,grad_norm: 0.9899577583622633, iteration: 158334
loss: 1.027897834777832,grad_norm: 0.999999590942799, iteration: 158335
loss: 0.9608272910118103,grad_norm: 0.9684245956526313, iteration: 158336
loss: 1.0279169082641602,grad_norm: 0.9999989694578232, iteration: 158337
loss: 0.988727331161499,grad_norm: 0.9999991106777254, iteration: 158338
loss: 1.0216751098632812,grad_norm: 0.9430853042534727, iteration: 158339
loss: 1.0307624340057373,grad_norm: 0.9999991115761566, iteration: 158340
loss: 1.0338715314865112,grad_norm: 0.9979479695237364, iteration: 158341
loss: 0.9937954545021057,grad_norm: 0.989303488786494, iteration: 158342
loss: 1.020898461341858,grad_norm: 0.9817155406775628, iteration: 158343
loss: 1.000495433807373,grad_norm: 0.9405555978211916, iteration: 158344
loss: 0.9800708293914795,grad_norm: 0.9999992123862377, iteration: 158345
loss: 1.0187102556228638,grad_norm: 0.9999992143991823, iteration: 158346
loss: 0.9839622378349304,grad_norm: 0.9999995581178301, iteration: 158347
loss: 1.010532021522522,grad_norm: 0.8831761427784088, iteration: 158348
loss: 0.9865851998329163,grad_norm: 0.9999992678427074, iteration: 158349
loss: 1.0167309045791626,grad_norm: 0.7911324690949528, iteration: 158350
loss: 1.2465296983718872,grad_norm: 0.9999996049199589, iteration: 158351
loss: 0.990075409412384,grad_norm: 0.9999992313404908, iteration: 158352
loss: 0.9912398457527161,grad_norm: 0.8904916859306703, iteration: 158353
loss: 1.0431897640228271,grad_norm: 0.9999991555563524, iteration: 158354
loss: 1.001655101776123,grad_norm: 0.9999990853873402, iteration: 158355
loss: 0.9653760194778442,grad_norm: 0.8892020296708585, iteration: 158356
loss: 1.0276167392730713,grad_norm: 0.96308347039211, iteration: 158357
loss: 0.9821898341178894,grad_norm: 0.9687684155828129, iteration: 158358
loss: 0.9961022734642029,grad_norm: 0.9999992146027346, iteration: 158359
loss: 1.002124547958374,grad_norm: 0.999999038740269, iteration: 158360
loss: 1.013298749923706,grad_norm: 0.9999992210664167, iteration: 158361
loss: 0.9979986548423767,grad_norm: 0.9999990326511823, iteration: 158362
loss: 1.0058271884918213,grad_norm: 0.9999991415898548, iteration: 158363
loss: 0.9682528972625732,grad_norm: 0.9999991576937672, iteration: 158364
loss: 1.013020634651184,grad_norm: 0.9999991165484018, iteration: 158365
loss: 1.040993094444275,grad_norm: 0.9999990732953329, iteration: 158366
loss: 0.9739716649055481,grad_norm: 0.9879775653758502, iteration: 158367
loss: 1.0026756525039673,grad_norm: 0.9754651210743801, iteration: 158368
loss: 0.9832122325897217,grad_norm: 0.914957253559775, iteration: 158369
loss: 0.9797953963279724,grad_norm: 0.8292109811838884, iteration: 158370
loss: 1.0277884006500244,grad_norm: 0.9999991048517446, iteration: 158371
loss: 0.9773833155632019,grad_norm: 0.816756682848871, iteration: 158372
loss: 1.0438085794448853,grad_norm: 0.9999992168069559, iteration: 158373
loss: 1.028944730758667,grad_norm: 0.9279770786892534, iteration: 158374
loss: 1.0217561721801758,grad_norm: 0.9758770517674953, iteration: 158375
loss: 1.0611121654510498,grad_norm: 0.9999994115797902, iteration: 158376
loss: 1.016496181488037,grad_norm: 0.9999999028982541, iteration: 158377
loss: 1.0044434070587158,grad_norm: 0.9336608728363124, iteration: 158378
loss: 0.99135822057724,grad_norm: 0.9174646176193165, iteration: 158379
loss: 1.0154424905776978,grad_norm: 0.9999991323901611, iteration: 158380
loss: 0.9953155517578125,grad_norm: 0.8573417387345766, iteration: 158381
loss: 1.0112054347991943,grad_norm: 0.9999992062233932, iteration: 158382
loss: 0.9885109663009644,grad_norm: 0.9999990999627217, iteration: 158383
loss: 0.9968221187591553,grad_norm: 0.9215682609673155, iteration: 158384
loss: 0.9823591709136963,grad_norm: 0.8780213685612632, iteration: 158385
loss: 1.0151441097259521,grad_norm: 0.999999153403808, iteration: 158386
loss: 0.9575599431991577,grad_norm: 0.9999991510999016, iteration: 158387
loss: 1.0935314893722534,grad_norm: 0.9999992278880939, iteration: 158388
loss: 1.0067702531814575,grad_norm: 0.9999990257196126, iteration: 158389
loss: 1.0521568059921265,grad_norm: 0.9999998330978727, iteration: 158390
loss: 0.9701336622238159,grad_norm: 0.9999992472128564, iteration: 158391
loss: 0.9891926646232605,grad_norm: 0.9592911131900241, iteration: 158392
loss: 1.06174898147583,grad_norm: 0.9488239600517548, iteration: 158393
loss: 1.0797193050384521,grad_norm: 0.9999992028467678, iteration: 158394
loss: 1.0255744457244873,grad_norm: 0.9999991820974371, iteration: 158395
loss: 0.9866690039634705,grad_norm: 0.8943934272866846, iteration: 158396
loss: 0.9731223583221436,grad_norm: 0.8324849376985863, iteration: 158397
loss: 1.031671404838562,grad_norm: 0.999999076471124, iteration: 158398
loss: 1.0005507469177246,grad_norm: 0.999999091855709, iteration: 158399
loss: 0.9818520545959473,grad_norm: 0.8676390704542147, iteration: 158400
loss: 1.0064258575439453,grad_norm: 0.9999991522672173, iteration: 158401
loss: 1.0388261079788208,grad_norm: 0.9999990987720152, iteration: 158402
loss: 1.0335456132888794,grad_norm: 0.9999990218409667, iteration: 158403
loss: 1.0280659198760986,grad_norm: 0.8889246035231332, iteration: 158404
loss: 0.9820735454559326,grad_norm: 0.9143372787389471, iteration: 158405
loss: 1.0047760009765625,grad_norm: 0.9999991218636696, iteration: 158406
loss: 0.9952276349067688,grad_norm: 0.9999991582800424, iteration: 158407
loss: 1.0062323808670044,grad_norm: 0.9999990516716777, iteration: 158408
loss: 0.9936420321464539,grad_norm: 0.8875107470169994, iteration: 158409
loss: 0.979489803314209,grad_norm: 0.9109357705461749, iteration: 158410
loss: 0.9853108525276184,grad_norm: 0.9120608843947536, iteration: 158411
loss: 0.9912420511245728,grad_norm: 0.9999991944269245, iteration: 158412
loss: 0.9735674262046814,grad_norm: 0.9271594526073681, iteration: 158413
loss: 1.0066465139389038,grad_norm: 0.999999091765339, iteration: 158414
loss: 1.0361860990524292,grad_norm: 0.9999996682016217, iteration: 158415
loss: 1.125833511352539,grad_norm: 0.9999992502829925, iteration: 158416
loss: 1.0585260391235352,grad_norm: 0.9210059746043439, iteration: 158417
loss: 1.0095641613006592,grad_norm: 0.9999993331106383, iteration: 158418
loss: 1.02385675907135,grad_norm: 0.9999992886549716, iteration: 158419
loss: 0.9890590906143188,grad_norm: 0.9263953134268943, iteration: 158420
loss: 0.9865157604217529,grad_norm: 0.9999991809235588, iteration: 158421
loss: 1.101063847541809,grad_norm: 0.999999251373168, iteration: 158422
loss: 1.0806305408477783,grad_norm: 0.9999991956999956, iteration: 158423
loss: 1.0526492595672607,grad_norm: 0.9999995194386576, iteration: 158424
loss: 1.0528661012649536,grad_norm: 0.8995167514071627, iteration: 158425
loss: 1.0082762241363525,grad_norm: 0.9999991331789986, iteration: 158426
loss: 0.9493575692176819,grad_norm: 0.9999990614648848, iteration: 158427
loss: 1.0635894536972046,grad_norm: 0.9706773999007399, iteration: 158428
loss: 1.028199315071106,grad_norm: 0.9999990608890972, iteration: 158429
loss: 0.9987876415252686,grad_norm: 0.9999991755790381, iteration: 158430
loss: 0.9616237282752991,grad_norm: 0.9999992068758573, iteration: 158431
loss: 1.017298698425293,grad_norm: 0.999999044605896, iteration: 158432
loss: 1.0045087337493896,grad_norm: 0.9824278633092892, iteration: 158433
loss: 1.0507588386535645,grad_norm: 0.9999999385698414, iteration: 158434
loss: 0.9454309940338135,grad_norm: 0.999999084051571, iteration: 158435
loss: 1.0097309350967407,grad_norm: 0.9211542127126359, iteration: 158436
loss: 1.028271198272705,grad_norm: 0.9999990702204737, iteration: 158437
loss: 0.9864076375961304,grad_norm: 0.9999989799670467, iteration: 158438
loss: 0.9912039637565613,grad_norm: 0.9834108269287697, iteration: 158439
loss: 0.9999786019325256,grad_norm: 0.9999992334159935, iteration: 158440
loss: 0.987442672252655,grad_norm: 0.999999014914724, iteration: 158441
loss: 0.9815638065338135,grad_norm: 0.8921975750321243, iteration: 158442
loss: 1.0035042762756348,grad_norm: 0.9323263614346686, iteration: 158443
loss: 1.02511727809906,grad_norm: 0.9803485564085714, iteration: 158444
loss: 1.0072237253189087,grad_norm: 0.9648075197978975, iteration: 158445
loss: 1.0129525661468506,grad_norm: 0.9999991631088543, iteration: 158446
loss: 0.9945732355117798,grad_norm: 0.9999990731739692, iteration: 158447
loss: 1.0311756134033203,grad_norm: 0.9099969859533692, iteration: 158448
loss: 0.9950916767120361,grad_norm: 0.9268234638858022, iteration: 158449
loss: 1.0096240043640137,grad_norm: 0.9999991049394142, iteration: 158450
loss: 0.9841141104698181,grad_norm: 0.8843590311680962, iteration: 158451
loss: 1.0777441263198853,grad_norm: 0.999999176055487, iteration: 158452
loss: 1.0264222621917725,grad_norm: 0.9998768850031138, iteration: 158453
loss: 1.0064469575881958,grad_norm: 0.9999991443998538, iteration: 158454
loss: 0.9523161053657532,grad_norm: 0.999999030019896, iteration: 158455
loss: 1.0072417259216309,grad_norm: 0.8756150772345032, iteration: 158456
loss: 1.0920811891555786,grad_norm: 0.9999991139927235, iteration: 158457
loss: 1.00247323513031,grad_norm: 0.9816818275723572, iteration: 158458
loss: 1.007643461227417,grad_norm: 0.9294704446934856, iteration: 158459
loss: 1.047048568725586,grad_norm: 0.9999994767586229, iteration: 158460
loss: 0.9455743432044983,grad_norm: 0.9999990849339604, iteration: 158461
loss: 0.9885985851287842,grad_norm: 0.9999990810136792, iteration: 158462
loss: 1.0114349126815796,grad_norm: 0.999999247752609, iteration: 158463
loss: 1.011368989944458,grad_norm: 0.9999991000846113, iteration: 158464
loss: 1.0136486291885376,grad_norm: 0.9040798709683199, iteration: 158465
loss: 1.0368574857711792,grad_norm: 0.9999991235923387, iteration: 158466
loss: 0.9727257490158081,grad_norm: 0.9999989708735326, iteration: 158467
loss: 1.0008265972137451,grad_norm: 0.9426063782774781, iteration: 158468
loss: 0.9674997329711914,grad_norm: 0.8335845152816616, iteration: 158469
loss: 0.9858014583587646,grad_norm: 0.9999991552327442, iteration: 158470
loss: 1.0195733308792114,grad_norm: 0.9999991565293788, iteration: 158471
loss: 0.9934509992599487,grad_norm: 0.9952991978523987, iteration: 158472
loss: 1.034479022026062,grad_norm: 0.9999991806982348, iteration: 158473
loss: 0.9880134463310242,grad_norm: 0.999999257632255, iteration: 158474
loss: 0.9925699234008789,grad_norm: 0.9290881306583566, iteration: 158475
loss: 0.9805333614349365,grad_norm: 0.9908465432410238, iteration: 158476
loss: 1.0691825151443481,grad_norm: 0.9394494006867482, iteration: 158477
loss: 1.0032849311828613,grad_norm: 0.9485845690327459, iteration: 158478
loss: 0.9650982022285461,grad_norm: 0.999999159651711, iteration: 158479
loss: 1.0307872295379639,grad_norm: 0.999999183502437, iteration: 158480
loss: 1.0235892534255981,grad_norm: 0.9999991502059467, iteration: 158481
loss: 0.9887738823890686,grad_norm: 0.9448669875110245, iteration: 158482
loss: 1.0355130434036255,grad_norm: 0.8679707610875492, iteration: 158483
loss: 0.9644008278846741,grad_norm: 0.9999990200217975, iteration: 158484
loss: 0.9930686950683594,grad_norm: 0.9999991081078735, iteration: 158485
loss: 0.9787901043891907,grad_norm: 0.999999258505691, iteration: 158486
loss: 1.1507949829101562,grad_norm: 0.9745590414999632, iteration: 158487
loss: 1.0190637111663818,grad_norm: 0.999999504899626, iteration: 158488
loss: 0.9903451800346375,grad_norm: 0.9970098376927895, iteration: 158489
loss: 1.045708417892456,grad_norm: 0.9458662013309502, iteration: 158490
loss: 1.0030922889709473,grad_norm: 0.9999994267945328, iteration: 158491
loss: 0.9872721433639526,grad_norm: 0.9999991867501619, iteration: 158492
loss: 0.9785968065261841,grad_norm: 0.999999296029637, iteration: 158493
loss: 0.9752886891365051,grad_norm: 0.8100482905362435, iteration: 158494
loss: 0.9912686347961426,grad_norm: 0.9999990586483813, iteration: 158495
loss: 0.9982212781906128,grad_norm: 0.9999991840775232, iteration: 158496
loss: 1.0175033807754517,grad_norm: 0.983581033842008, iteration: 158497
loss: 1.0463078022003174,grad_norm: 0.9999995875623878, iteration: 158498
loss: 0.9804428219795227,grad_norm: 0.9999991006812936, iteration: 158499
loss: 0.975799024105072,grad_norm: 0.9592580976457281, iteration: 158500
loss: 1.0094871520996094,grad_norm: 0.999999206826297, iteration: 158501
loss: 1.0357872247695923,grad_norm: 0.9999995461808671, iteration: 158502
loss: 0.9964550733566284,grad_norm: 0.9173876049681531, iteration: 158503
loss: 0.9803879857063293,grad_norm: 0.9999990952242132, iteration: 158504
loss: 1.0857532024383545,grad_norm: 0.999999349305835, iteration: 158505
loss: 1.0327831506729126,grad_norm: 0.9791215063045983, iteration: 158506
loss: 1.0311610698699951,grad_norm: 0.9156313724810018, iteration: 158507
loss: 0.9717311263084412,grad_norm: 0.8818006618973769, iteration: 158508
loss: 1.0189765691757202,grad_norm: 0.9999989605446828, iteration: 158509
loss: 0.9986405372619629,grad_norm: 0.9999990833476751, iteration: 158510
loss: 0.9987483620643616,grad_norm: 0.8780972726825713, iteration: 158511
loss: 0.9848644733428955,grad_norm: 0.9241790289434497, iteration: 158512
loss: 1.0058093070983887,grad_norm: 0.9766218012675739, iteration: 158513
loss: 0.9873911738395691,grad_norm: 0.9999991166054237, iteration: 158514
loss: 0.9918699860572815,grad_norm: 0.9686875760759668, iteration: 158515
loss: 1.07491934299469,grad_norm: 0.9999991729312959, iteration: 158516
loss: 0.980604350566864,grad_norm: 0.9999990154323751, iteration: 158517
loss: 1.0455623865127563,grad_norm: 0.9999991448588653, iteration: 158518
loss: 1.065670371055603,grad_norm: 0.9999996248282281, iteration: 158519
loss: 0.9875004291534424,grad_norm: 0.9192400483465606, iteration: 158520
loss: 0.9849835634231567,grad_norm: 0.9999992187805788, iteration: 158521
loss: 0.9884291887283325,grad_norm: 0.9999992538118013, iteration: 158522
loss: 0.9347683191299438,grad_norm: 0.8033318167403597, iteration: 158523
loss: 1.0299596786499023,grad_norm: 0.9999991139771606, iteration: 158524
loss: 1.0543228387832642,grad_norm: 0.9999998472018335, iteration: 158525
loss: 0.9867059588432312,grad_norm: 0.9832789436680379, iteration: 158526
loss: 1.0359636545181274,grad_norm: 0.9999991535902818, iteration: 158527
loss: 1.0603508949279785,grad_norm: 0.999999749078487, iteration: 158528
loss: 1.020554780960083,grad_norm: 0.9999990217534709, iteration: 158529
loss: 1.1752922534942627,grad_norm: 0.9999993787711107, iteration: 158530
loss: 0.9820389747619629,grad_norm: 0.999999035203225, iteration: 158531
loss: 1.0208542346954346,grad_norm: 0.949926418485368, iteration: 158532
loss: 1.0003710985183716,grad_norm: 0.9978886495435927, iteration: 158533
loss: 0.9933198094367981,grad_norm: 0.9431760005800881, iteration: 158534
loss: 0.9936471581459045,grad_norm: 0.9999992579743162, iteration: 158535
loss: 0.9923204779624939,grad_norm: 0.999998993214329, iteration: 158536
loss: 1.0132699012756348,grad_norm: 0.9999990378171171, iteration: 158537
loss: 1.0299209356307983,grad_norm: 0.9999990759141414, iteration: 158538
loss: 1.0150033235549927,grad_norm: 0.9837344282391213, iteration: 158539
loss: 1.0589540004730225,grad_norm: 0.9344736482939902, iteration: 158540
loss: 1.0398495197296143,grad_norm: 0.9651043033858624, iteration: 158541
loss: 1.0094859600067139,grad_norm: 0.9999989860247944, iteration: 158542
loss: 1.0086748600006104,grad_norm: 0.9999990450073893, iteration: 158543
loss: 1.0374226570129395,grad_norm: 0.9999990221400267, iteration: 158544
loss: 0.9985052943229675,grad_norm: 0.99999905160697, iteration: 158545
loss: 0.9739339351654053,grad_norm: 0.9999991629304921, iteration: 158546
loss: 1.0222355127334595,grad_norm: 0.9999991153711274, iteration: 158547
loss: 0.9964019060134888,grad_norm: 0.9999990627024046, iteration: 158548
loss: 1.0331799983978271,grad_norm: 0.9999992457998927, iteration: 158549
loss: 0.9556567668914795,grad_norm: 0.9798997179306537, iteration: 158550
loss: 1.0720839500427246,grad_norm: 0.999999112419977, iteration: 158551
loss: 0.9938054084777832,grad_norm: 0.8501746238477738, iteration: 158552
loss: 0.9856021404266357,grad_norm: 0.8994791992316487, iteration: 158553
loss: 1.0179823637008667,grad_norm: 0.939894370127365, iteration: 158554
loss: 1.0097700357437134,grad_norm: 0.9925307783589596, iteration: 158555
loss: 0.9932729005813599,grad_norm: 0.9999990507159305, iteration: 158556
loss: 0.9917627573013306,grad_norm: 0.9999991205962762, iteration: 158557
loss: 0.9762625098228455,grad_norm: 0.9807987007752369, iteration: 158558
loss: 0.9958701133728027,grad_norm: 0.9485596070828214, iteration: 158559
loss: 1.0106011629104614,grad_norm: 0.9570799668722296, iteration: 158560
loss: 0.9977307319641113,grad_norm: 0.9998757423801432, iteration: 158561
loss: 0.9887008666992188,grad_norm: 0.9999991740268054, iteration: 158562
loss: 0.9755470752716064,grad_norm: 0.9603206325493078, iteration: 158563
loss: 0.9691347479820251,grad_norm: 0.9999991492903394, iteration: 158564
loss: 1.006231665611267,grad_norm: 0.9999991845509781, iteration: 158565
loss: 1.00987708568573,grad_norm: 0.999999300731502, iteration: 158566
loss: 0.9800795316696167,grad_norm: 0.9999991007363396, iteration: 158567
loss: 0.9714402556419373,grad_norm: 0.9999991471761136, iteration: 158568
loss: 0.9799531698226929,grad_norm: 0.9999991146363573, iteration: 158569
loss: 0.9843595623970032,grad_norm: 0.9335739461181422, iteration: 158570
loss: 0.9958370923995972,grad_norm: 0.999999224769206, iteration: 158571
loss: 0.9899290204048157,grad_norm: 0.8386962888308916, iteration: 158572
loss: 0.9984697103500366,grad_norm: 0.9611570780565334, iteration: 158573
loss: 0.9677853584289551,grad_norm: 0.8968205616529749, iteration: 158574
loss: 1.1082649230957031,grad_norm: 0.9999997319360523, iteration: 158575
loss: 1.038552165031433,grad_norm: 0.9999992235803266, iteration: 158576
loss: 1.031278133392334,grad_norm: 0.9999991144263882, iteration: 158577
loss: 0.9808847308158875,grad_norm: 0.9999989793423891, iteration: 158578
loss: 1.0083268880844116,grad_norm: 0.9999991946216249, iteration: 158579
loss: 0.992388129234314,grad_norm: 0.8232664439118206, iteration: 158580
loss: 0.9934259057044983,grad_norm: 0.9274655069901526, iteration: 158581
loss: 1.0943127870559692,grad_norm: 0.9999998932229324, iteration: 158582
loss: 0.98870450258255,grad_norm: 0.9528624095854052, iteration: 158583
loss: 0.9936041235923767,grad_norm: 0.9999995789031029, iteration: 158584
loss: 0.9823657870292664,grad_norm: 0.9999991901705294, iteration: 158585
loss: 0.9783312678337097,grad_norm: 0.9356434010339418, iteration: 158586
loss: 1.0052692890167236,grad_norm: 0.9969033170022649, iteration: 158587
loss: 1.0266826152801514,grad_norm: 0.9999990833193736, iteration: 158588
loss: 0.9798746705055237,grad_norm: 0.9999990511927443, iteration: 158589
loss: 0.9766815900802612,grad_norm: 0.999999029845308, iteration: 158590
loss: 0.9598270058631897,grad_norm: 0.9999991994988302, iteration: 158591
loss: 1.0103849172592163,grad_norm: 0.9999992688212802, iteration: 158592
loss: 1.0147899389266968,grad_norm: 0.9580237584516299, iteration: 158593
loss: 0.9833940267562866,grad_norm: 0.9999990416315487, iteration: 158594
loss: 1.0345627069473267,grad_norm: 0.9999989669445557, iteration: 158595
loss: 1.0245181322097778,grad_norm: 0.9999990749929574, iteration: 158596
loss: 1.005549669265747,grad_norm: 0.9999992986916676, iteration: 158597
loss: 1.0120364427566528,grad_norm: 0.999999200456266, iteration: 158598
loss: 0.9617323279380798,grad_norm: 0.861025513526985, iteration: 158599
loss: 1.0254818201065063,grad_norm: 0.9753556873478295, iteration: 158600
loss: 0.9902978539466858,grad_norm: 0.9616492192493519, iteration: 158601
loss: 1.016444206237793,grad_norm: 0.9823114291627993, iteration: 158602
loss: 0.9913133978843689,grad_norm: 0.8519208107852263, iteration: 158603
loss: 0.9858987927436829,grad_norm: 0.9999990097776967, iteration: 158604
loss: 1.0249463319778442,grad_norm: 0.9999991246638187, iteration: 158605
loss: 1.0417536497116089,grad_norm: 0.898269603532666, iteration: 158606
loss: 1.007736325263977,grad_norm: 0.9999990496047158, iteration: 158607
loss: 0.96815025806427,grad_norm: 0.8917861613651774, iteration: 158608
loss: 1.0282363891601562,grad_norm: 0.999999509960365, iteration: 158609
loss: 1.0395596027374268,grad_norm: 0.9999995384938143, iteration: 158610
loss: 0.9829854369163513,grad_norm: 0.9999991386186806, iteration: 158611
loss: 1.0033073425292969,grad_norm: 0.8284464902100195, iteration: 158612
loss: 0.9854565262794495,grad_norm: 0.9999990184341215, iteration: 158613
loss: 1.011240839958191,grad_norm: 0.9999991393601954, iteration: 158614
loss: 1.0116770267486572,grad_norm: 0.9999990477161999, iteration: 158615
loss: 1.03400719165802,grad_norm: 0.9999991917813759, iteration: 158616
loss: 0.9681522250175476,grad_norm: 0.9999988369277789, iteration: 158617
loss: 1.004150390625,grad_norm: 0.9999991990573185, iteration: 158618
loss: 1.0138932466506958,grad_norm: 0.9999992286205895, iteration: 158619
loss: 1.0177158117294312,grad_norm: 0.9999991503818522, iteration: 158620
loss: 0.975699782371521,grad_norm: 0.9169516868904091, iteration: 158621
loss: 1.0079673528671265,grad_norm: 0.8016258390133203, iteration: 158622
loss: 0.9947291612625122,grad_norm: 0.9089787257303995, iteration: 158623
loss: 1.0064373016357422,grad_norm: 0.9999991778051159, iteration: 158624
loss: 1.0105501413345337,grad_norm: 0.9999992372785413, iteration: 158625
loss: 1.008618950843811,grad_norm: 0.9999997771241308, iteration: 158626
loss: 1.0131317377090454,grad_norm: 0.8733509242771325, iteration: 158627
loss: 0.970538318157196,grad_norm: 0.8981319784198742, iteration: 158628
loss: 1.0176020860671997,grad_norm: 0.9999992854213511, iteration: 158629
loss: 1.002387523651123,grad_norm: 0.999999104449723, iteration: 158630
loss: 0.9725685119628906,grad_norm: 0.9409985104814281, iteration: 158631
loss: 0.9864984750747681,grad_norm: 0.848467492899756, iteration: 158632
loss: 0.9998365640640259,grad_norm: 0.9999994927839909, iteration: 158633
loss: 1.0011712312698364,grad_norm: 0.9999990477860364, iteration: 158634
loss: 1.029313325881958,grad_norm: 0.9799163701974035, iteration: 158635
loss: 1.0360242128372192,grad_norm: 0.9999989866442469, iteration: 158636
loss: 0.988796055316925,grad_norm: 0.8665014734174028, iteration: 158637
loss: 0.9919331073760986,grad_norm: 0.9999992221974248, iteration: 158638
loss: 1.0050389766693115,grad_norm: 0.9999992215391318, iteration: 158639
loss: 1.0294594764709473,grad_norm: 0.9999990806500766, iteration: 158640
loss: 1.0183782577514648,grad_norm: 0.9486847022479892, iteration: 158641
loss: 0.9937232136726379,grad_norm: 0.9999991445488461, iteration: 158642
loss: 1.0098116397857666,grad_norm: 0.9999996916220555, iteration: 158643
loss: 0.9527031183242798,grad_norm: 0.9999991714639915, iteration: 158644
loss: 1.0309202671051025,grad_norm: 0.9826850124680795, iteration: 158645
loss: 1.0816956758499146,grad_norm: 0.9999990971243319, iteration: 158646
loss: 1.004744052886963,grad_norm: 0.9999990587634521, iteration: 158647
loss: 0.9719322323799133,grad_norm: 0.7826596309591174, iteration: 158648
loss: 0.9758870005607605,grad_norm: 0.8552532186524017, iteration: 158649
loss: 0.9896892309188843,grad_norm: 0.887781794273803, iteration: 158650
loss: 0.9820566177368164,grad_norm: 0.9999991628970302, iteration: 158651
loss: 1.0245535373687744,grad_norm: 0.9233867820073995, iteration: 158652
loss: 1.0104031562805176,grad_norm: 0.9073061595636898, iteration: 158653
loss: 0.9715712070465088,grad_norm: 0.9826459963021085, iteration: 158654
loss: 1.0234712362289429,grad_norm: 0.9999993132592732, iteration: 158655
loss: 1.0680720806121826,grad_norm: 0.999999544643419, iteration: 158656
loss: 1.00217604637146,grad_norm: 0.992727063193201, iteration: 158657
loss: 1.0056722164154053,grad_norm: 0.9999991470742156, iteration: 158658
loss: 1.0002373456954956,grad_norm: 0.9999990651075122, iteration: 158659
loss: 1.0432583093643188,grad_norm: 0.9999994317303608, iteration: 158660
loss: 1.0382113456726074,grad_norm: 0.9901610097397685, iteration: 158661
loss: 0.9919229745864868,grad_norm: 0.9999992359015891, iteration: 158662
loss: 0.996204674243927,grad_norm: 0.9598205607405541, iteration: 158663
loss: 1.0062235593795776,grad_norm: 0.8860975240693818, iteration: 158664
loss: 0.9663550853729248,grad_norm: 0.9860967373952253, iteration: 158665
loss: 0.9920302629470825,grad_norm: 0.8267594370018939, iteration: 158666
loss: 1.0215438604354858,grad_norm: 0.885657382638987, iteration: 158667
loss: 1.0028131008148193,grad_norm: 0.9999992239014128, iteration: 158668
loss: 0.9931306838989258,grad_norm: 0.999999229445979, iteration: 158669
loss: 0.9997918605804443,grad_norm: 0.959104558275558, iteration: 158670
loss: 0.9842197895050049,grad_norm: 0.999999144442462, iteration: 158671
loss: 0.9974334836006165,grad_norm: 0.949019034344564, iteration: 158672
loss: 1.0198261737823486,grad_norm: 0.999999054824852, iteration: 158673
loss: 1.0035308599472046,grad_norm: 0.9794838209772374, iteration: 158674
loss: 1.012861967086792,grad_norm: 0.999999084632522, iteration: 158675
loss: 0.985679566860199,grad_norm: 0.9117142454561558, iteration: 158676
loss: 0.9953301548957825,grad_norm: 0.999999029565152, iteration: 158677
loss: 0.9925149083137512,grad_norm: 0.9999989826876806, iteration: 158678
loss: 0.9899924993515015,grad_norm: 0.9438914645175729, iteration: 158679
loss: 0.9826194047927856,grad_norm: 0.8538121259957476, iteration: 158680
loss: 1.0118638277053833,grad_norm: 0.9999994428919532, iteration: 158681
loss: 0.9979431629180908,grad_norm: 0.9999995082378799, iteration: 158682
loss: 0.9851409792900085,grad_norm: 0.9999995355370833, iteration: 158683
loss: 0.9722047448158264,grad_norm: 0.9999990963721711, iteration: 158684
loss: 0.9806268811225891,grad_norm: 0.8813163142004031, iteration: 158685
loss: 1.0328233242034912,grad_norm: 0.9999991067512272, iteration: 158686
loss: 1.0344890356063843,grad_norm: 0.9999993810197276, iteration: 158687
loss: 0.9949272274971008,grad_norm: 0.9151664506714408, iteration: 158688
loss: 1.0040714740753174,grad_norm: 0.9717868798719033, iteration: 158689
loss: 0.9719095826148987,grad_norm: 0.9999990957201518, iteration: 158690
loss: 0.9822531938552856,grad_norm: 0.9999994879834456, iteration: 158691
loss: 1.0111678838729858,grad_norm: 0.9416730693135712, iteration: 158692
loss: 1.0281331539154053,grad_norm: 0.993019743898965, iteration: 158693
loss: 1.0010310411453247,grad_norm: 0.9999991174863715, iteration: 158694
loss: 0.9817479848861694,grad_norm: 0.9819966916318345, iteration: 158695
loss: 0.9848562479019165,grad_norm: 0.9226809823527701, iteration: 158696
loss: 1.010002613067627,grad_norm: 0.9999997902939263, iteration: 158697
loss: 1.0195071697235107,grad_norm: 0.9999991728084966, iteration: 158698
loss: 1.1233755350112915,grad_norm: 0.9999991686009092, iteration: 158699
loss: 0.990679144859314,grad_norm: 0.8624927790499307, iteration: 158700
loss: 1.0039788484573364,grad_norm: 0.8800939247894819, iteration: 158701
loss: 0.9957708716392517,grad_norm: 0.950405757110927, iteration: 158702
loss: 1.0031367540359497,grad_norm: 0.8237303294975439, iteration: 158703
loss: 0.9701902270317078,grad_norm: 0.9414665546803108, iteration: 158704
loss: 0.956453800201416,grad_norm: 0.9999989852268636, iteration: 158705
loss: 1.0026155710220337,grad_norm: 0.9745979693675995, iteration: 158706
loss: 0.9900997877120972,grad_norm: 0.9999991893523231, iteration: 158707
loss: 0.9912057518959045,grad_norm: 0.9999988525840262, iteration: 158708
loss: 0.9895745515823364,grad_norm: 0.9999991771484056, iteration: 158709
loss: 0.9837623238563538,grad_norm: 0.9999991067434686, iteration: 158710
loss: 0.9822776913642883,grad_norm: 0.8964092251090595, iteration: 158711
loss: 0.9925602674484253,grad_norm: 0.9314415531441019, iteration: 158712
loss: 0.964518129825592,grad_norm: 0.9374748975851176, iteration: 158713
loss: 0.9753158688545227,grad_norm: 0.9054044802554275, iteration: 158714
loss: 1.0089119672775269,grad_norm: 0.9024304376856395, iteration: 158715
loss: 1.0122674703598022,grad_norm: 0.9999991189325154, iteration: 158716
loss: 0.9995097517967224,grad_norm: 0.8786983976624065, iteration: 158717
loss: 0.9891749024391174,grad_norm: 0.9999991955814367, iteration: 158718
loss: 0.9702475666999817,grad_norm: 0.8330040273137403, iteration: 158719
loss: 1.0206923484802246,grad_norm: 0.9999995458820843, iteration: 158720
loss: 1.0358480215072632,grad_norm: 0.9780592193550416, iteration: 158721
loss: 0.9787246584892273,grad_norm: 0.9406549116191388, iteration: 158722
loss: 1.0406256914138794,grad_norm: 0.9999995858940556, iteration: 158723
loss: 0.9970741868019104,grad_norm: 0.9821224362518474, iteration: 158724
loss: 0.9872232675552368,grad_norm: 0.9377280779762269, iteration: 158725
loss: 1.0002514123916626,grad_norm: 0.9999990573674135, iteration: 158726
loss: 0.9725999236106873,grad_norm: 0.9861661790963241, iteration: 158727
loss: 0.9828422665596008,grad_norm: 0.9999991193486379, iteration: 158728
loss: 0.9900188446044922,grad_norm: 0.9999990652058496, iteration: 158729
loss: 0.97796630859375,grad_norm: 0.9999990758992211, iteration: 158730
loss: 1.0204205513000488,grad_norm: 0.9999998232378523, iteration: 158731
loss: 1.0068893432617188,grad_norm: 0.9999989926092291, iteration: 158732
loss: 0.9881157279014587,grad_norm: 0.9999999748014833, iteration: 158733
loss: 1.0367542505264282,grad_norm: 0.9353011985378804, iteration: 158734
loss: 0.9648555517196655,grad_norm: 0.9999991408598593, iteration: 158735
loss: 0.9989506602287292,grad_norm: 0.9999990952731508, iteration: 158736
loss: 1.0260937213897705,grad_norm: 0.9594189547168116, iteration: 158737
loss: 0.9565029144287109,grad_norm: 0.9999992362428441, iteration: 158738
loss: 1.0087593793869019,grad_norm: 0.9021137150579082, iteration: 158739
loss: 1.0068578720092773,grad_norm: 0.9999991176002387, iteration: 158740
loss: 1.010834813117981,grad_norm: 0.9587487852540271, iteration: 158741
loss: 1.0062483549118042,grad_norm: 0.9529902780847013, iteration: 158742
loss: 0.9885395765304565,grad_norm: 0.9999990494931715, iteration: 158743
loss: 1.042398452758789,grad_norm: 0.9999991562810115, iteration: 158744
loss: 1.002600908279419,grad_norm: 0.9999989794357942, iteration: 158745
loss: 1.001953363418579,grad_norm: 0.9999991164352495, iteration: 158746
loss: 1.0026907920837402,grad_norm: 0.9768506605823417, iteration: 158747
loss: 0.9729679226875305,grad_norm: 0.999999105068624, iteration: 158748
loss: 0.9760297536849976,grad_norm: 0.9999990330391885, iteration: 158749
loss: 0.998557984828949,grad_norm: 0.9999989887015557, iteration: 158750
loss: 0.9859965443611145,grad_norm: 0.8605506874123754, iteration: 158751
loss: 1.0220448970794678,grad_norm: 0.9499845690406019, iteration: 158752
loss: 0.9811089634895325,grad_norm: 0.9683798946383919, iteration: 158753
loss: 1.019647479057312,grad_norm: 0.999999235085339, iteration: 158754
loss: 1.043752670288086,grad_norm: 0.9625219377499805, iteration: 158755
loss: 0.9965159893035889,grad_norm: 0.9999990663568629, iteration: 158756
loss: 1.018628716468811,grad_norm: 0.9999990950315358, iteration: 158757
loss: 1.0213871002197266,grad_norm: 0.9965358283820992, iteration: 158758
loss: 1.0211868286132812,grad_norm: 0.9962636857235229, iteration: 158759
loss: 0.997334897518158,grad_norm: 0.8903860755361831, iteration: 158760
loss: 1.00344717502594,grad_norm: 0.9999990331344112, iteration: 158761
loss: 0.9585790038108826,grad_norm: 0.999998977196242, iteration: 158762
loss: 0.9833943247795105,grad_norm: 0.9525539039912952, iteration: 158763
loss: 0.9706845283508301,grad_norm: 0.9191727279304307, iteration: 158764
loss: 0.9828267097473145,grad_norm: 0.9999991560433003, iteration: 158765
loss: 0.9894710779190063,grad_norm: 0.9999990397246926, iteration: 158766
loss: 0.9966629147529602,grad_norm: 0.9999990134677714, iteration: 158767
loss: 0.9907448887825012,grad_norm: 0.8190277833969937, iteration: 158768
loss: 0.9789153933525085,grad_norm: 0.9999990489777232, iteration: 158769
loss: 0.9871839284896851,grad_norm: 0.9999991358607547, iteration: 158770
loss: 1.0076816082000732,grad_norm: 0.9999991488182254, iteration: 158771
loss: 0.9748674035072327,grad_norm: 0.9999991066113311, iteration: 158772
loss: 0.9944983124732971,grad_norm: 0.9999990903894984, iteration: 158773
loss: 0.9699617624282837,grad_norm: 0.9999990593524534, iteration: 158774
loss: 1.0377498865127563,grad_norm: 0.9274889361931785, iteration: 158775
loss: 0.99106764793396,grad_norm: 0.9721312449730685, iteration: 158776
loss: 0.9686098694801331,grad_norm: 0.9999990418307454, iteration: 158777
loss: 0.9724289774894714,grad_norm: 0.9999991198914694, iteration: 158778
loss: 1.0158809423446655,grad_norm: 0.9999991429346052, iteration: 158779
loss: 0.9758962392807007,grad_norm: 0.9999989771845247, iteration: 158780
loss: 0.9844104051589966,grad_norm: 0.947348412358759, iteration: 158781
loss: 0.9966031908988953,grad_norm: 0.8480583747775801, iteration: 158782
loss: 0.9994009137153625,grad_norm: 0.8848517554632201, iteration: 158783
loss: 0.9807979464530945,grad_norm: 0.9999990294143125, iteration: 158784
loss: 1.0185253620147705,grad_norm: 0.9852443666488416, iteration: 158785
loss: 1.1016349792480469,grad_norm: 0.9999992793193344, iteration: 158786
loss: 1.0239548683166504,grad_norm: 0.9999992330862489, iteration: 158787
loss: 0.9594274163246155,grad_norm: 0.999999040990462, iteration: 158788
loss: 1.0675173997879028,grad_norm: 0.9999992832814217, iteration: 158789
loss: 0.9896997213363647,grad_norm: 0.9999989306189795, iteration: 158790
loss: 0.96429044008255,grad_norm: 0.9987171797878286, iteration: 158791
loss: 1.0178608894348145,grad_norm: 0.9772061959472383, iteration: 158792
loss: 1.146743893623352,grad_norm: 0.9999995857787028, iteration: 158793
loss: 1.022420048713684,grad_norm: 0.9999988385042187, iteration: 158794
loss: 1.0339868068695068,grad_norm: 0.9999994466232581, iteration: 158795
loss: 1.006921410560608,grad_norm: 0.8970709692881873, iteration: 158796
loss: 0.9912914633750916,grad_norm: 0.9999991999706404, iteration: 158797
loss: 0.9943053722381592,grad_norm: 0.9999990824865367, iteration: 158798
loss: 1.0111373662948608,grad_norm: 0.9999991895860024, iteration: 158799
loss: 0.99900221824646,grad_norm: 0.8599521400280485, iteration: 158800
loss: 0.9997751712799072,grad_norm: 0.8749593486865296, iteration: 158801
loss: 0.9892507791519165,grad_norm: 0.964872937854657, iteration: 158802
loss: 1.0345420837402344,grad_norm: 0.9718734265064107, iteration: 158803
loss: 1.0001555681228638,grad_norm: 0.9999991238373539, iteration: 158804
loss: 1.0018547773361206,grad_norm: 0.9999989540435346, iteration: 158805
loss: 0.9974503517150879,grad_norm: 0.999998993265362, iteration: 158806
loss: 1.0013447999954224,grad_norm: 0.9284706617022852, iteration: 158807
loss: 0.9782735705375671,grad_norm: 0.9326077933696642, iteration: 158808
loss: 0.9907482862472534,grad_norm: 0.9554407183588955, iteration: 158809
loss: 0.9880315065383911,grad_norm: 0.984509704378701, iteration: 158810
loss: 1.1094907522201538,grad_norm: 0.9999992781017508, iteration: 158811
loss: 0.9802837371826172,grad_norm: 0.9920247111168333, iteration: 158812
loss: 1.001253604888916,grad_norm: 0.9483791270044155, iteration: 158813
loss: 1.013037085533142,grad_norm: 0.9999990468602941, iteration: 158814
loss: 1.0665336847305298,grad_norm: 0.9999994688785174, iteration: 158815
loss: 1.0041158199310303,grad_norm: 0.9298594266996005, iteration: 158816
loss: 1.0322747230529785,grad_norm: 0.9999991075267325, iteration: 158817
loss: 0.9970719218254089,grad_norm: 0.9999991846854703, iteration: 158818
loss: 0.9689173102378845,grad_norm: 0.9999992392249615, iteration: 158819
loss: 1.087990641593933,grad_norm: 0.9999991798044153, iteration: 158820
loss: 1.003936529159546,grad_norm: 0.9999990274130872, iteration: 158821
loss: 0.9915628433227539,grad_norm: 0.9706656866880733, iteration: 158822
loss: 1.0253747701644897,grad_norm: 0.99999962986434, iteration: 158823
loss: 0.9963902235031128,grad_norm: 0.9095599494401143, iteration: 158824
loss: 1.011226773262024,grad_norm: 0.9514776173495298, iteration: 158825
loss: 1.0103731155395508,grad_norm: 0.9999992223084445, iteration: 158826
loss: 0.9848268628120422,grad_norm: 0.9999990193337159, iteration: 158827
loss: 1.004508376121521,grad_norm: 0.7793316235510214, iteration: 158828
loss: 1.0115289688110352,grad_norm: 0.9812444170548867, iteration: 158829
loss: 1.0065964460372925,grad_norm: 0.9999992094342723, iteration: 158830
loss: 0.9913590550422668,grad_norm: 0.9999993208746384, iteration: 158831
loss: 0.9856699705123901,grad_norm: 0.9905961500564411, iteration: 158832
loss: 1.0274206399917603,grad_norm: 0.9568314220445621, iteration: 158833
loss: 1.0068193674087524,grad_norm: 0.9999992096503534, iteration: 158834
loss: 1.0050753355026245,grad_norm: 0.9999991378534053, iteration: 158835
loss: 0.9880046844482422,grad_norm: 0.973378929770438, iteration: 158836
loss: 1.0414198637008667,grad_norm: 0.9999991727367431, iteration: 158837
loss: 1.0284007787704468,grad_norm: 0.9004146213887976, iteration: 158838
loss: 1.0113129615783691,grad_norm: 0.9999989797403078, iteration: 158839
loss: 1.0075788497924805,grad_norm: 0.9557473587099927, iteration: 158840
loss: 1.0168728828430176,grad_norm: 0.9999992658179262, iteration: 158841
loss: 0.9777598977088928,grad_norm: 0.8346237380995281, iteration: 158842
loss: 0.9774482846260071,grad_norm: 0.9999990680968388, iteration: 158843
loss: 1.0075798034667969,grad_norm: 0.9999991469583901, iteration: 158844
loss: 1.0337337255477905,grad_norm: 0.9999991525850009, iteration: 158845
loss: 0.9886239171028137,grad_norm: 0.9999990706683994, iteration: 158846
loss: 0.9911791086196899,grad_norm: 0.999999362039602, iteration: 158847
loss: 0.9681825041770935,grad_norm: 0.9999992332283256, iteration: 158848
loss: 1.0134371519088745,grad_norm: 0.9999990195629143, iteration: 158849
loss: 1.004756212234497,grad_norm: 0.9806408146213536, iteration: 158850
loss: 1.0165196657180786,grad_norm: 0.9670674428308297, iteration: 158851
loss: 1.0182406902313232,grad_norm: 0.933185246343589, iteration: 158852
loss: 1.004349946975708,grad_norm: 0.9977212267142281, iteration: 158853
loss: 0.9843331575393677,grad_norm: 0.9999991443847273, iteration: 158854
loss: 1.009700059890747,grad_norm: 0.9999993526124148, iteration: 158855
loss: 0.9932562708854675,grad_norm: 0.9999992561509923, iteration: 158856
loss: 1.00457763671875,grad_norm: 0.9999991840911815, iteration: 158857
loss: 0.9950866103172302,grad_norm: 0.9999991388224629, iteration: 158858
loss: 1.0575478076934814,grad_norm: 0.999999857254995, iteration: 158859
loss: 1.076640009880066,grad_norm: 0.9999992256536196, iteration: 158860
loss: 0.983785092830658,grad_norm: 0.9999990685078951, iteration: 158861
loss: 0.9743800163269043,grad_norm: 0.8326114923009246, iteration: 158862
loss: 0.9831329584121704,grad_norm: 0.9999991071385611, iteration: 158863
loss: 0.9870738983154297,grad_norm: 0.9999992700054049, iteration: 158864
loss: 0.9788770079612732,grad_norm: 0.9305090757052786, iteration: 158865
loss: 0.9881646633148193,grad_norm: 0.9999992332199721, iteration: 158866
loss: 1.0637027025222778,grad_norm: 0.9999991304433082, iteration: 158867
loss: 0.988163948059082,grad_norm: 0.9278863539579325, iteration: 158868
loss: 0.9912990927696228,grad_norm: 0.9999991805327838, iteration: 158869
loss: 1.0198240280151367,grad_norm: 0.9999991397863567, iteration: 158870
loss: 1.0151519775390625,grad_norm: 0.8919785254274053, iteration: 158871
loss: 1.0467631816864014,grad_norm: 0.9750929613236511, iteration: 158872
loss: 1.077109694480896,grad_norm: 0.9999991726342511, iteration: 158873
loss: 1.0230940580368042,grad_norm: 0.8544949247665837, iteration: 158874
loss: 1.0488337278366089,grad_norm: 0.9999991989178157, iteration: 158875
loss: 1.1078137159347534,grad_norm: 0.999999099132561, iteration: 158876
loss: 1.0392454862594604,grad_norm: 0.8659619474919763, iteration: 158877
loss: 0.9837945103645325,grad_norm: 0.9779154829305539, iteration: 158878
loss: 1.3737542629241943,grad_norm: 0.9999997955992322, iteration: 158879
loss: 1.1717500686645508,grad_norm: 0.9999998259355496, iteration: 158880
loss: 0.9671828746795654,grad_norm: 0.8082930501260336, iteration: 158881
loss: 1.030444622039795,grad_norm: 0.9999996071578632, iteration: 158882
loss: 0.9934961199760437,grad_norm: 0.9694471396378733, iteration: 158883
loss: 1.0120570659637451,grad_norm: 0.9999996499043925, iteration: 158884
loss: 1.0219041109085083,grad_norm: 0.999999100390766, iteration: 158885
loss: 0.9986554980278015,grad_norm: 0.9902957410839855, iteration: 158886
loss: 1.0139544010162354,grad_norm: 0.9151607943170702, iteration: 158887
loss: 1.0624006986618042,grad_norm: 0.9999990990250168, iteration: 158888
loss: 0.9964818358421326,grad_norm: 0.9999991006306327, iteration: 158889
loss: 1.0476988554000854,grad_norm: 0.966019288259342, iteration: 158890
loss: 1.0104377269744873,grad_norm: 0.9999990717726938, iteration: 158891
loss: 1.0381968021392822,grad_norm: 0.9990073307237275, iteration: 158892
loss: 1.0206730365753174,grad_norm: 0.9999990885535751, iteration: 158893
loss: 1.015743613243103,grad_norm: 0.9343275060335745, iteration: 158894
loss: 0.9816057682037354,grad_norm: 0.9293453839698197, iteration: 158895
loss: 1.0109119415283203,grad_norm: 0.9999995536322731, iteration: 158896
loss: 0.9859273433685303,grad_norm: 0.9999991094115329, iteration: 158897
loss: 0.9923990964889526,grad_norm: 0.9325185624347347, iteration: 158898
loss: 1.0416592359542847,grad_norm: 0.9140035164600656, iteration: 158899
loss: 0.9815791249275208,grad_norm: 0.8560813442452915, iteration: 158900
loss: 1.0197820663452148,grad_norm: 0.999999209040868, iteration: 158901
loss: 1.0105197429656982,grad_norm: 0.9999989500612236, iteration: 158902
loss: 0.9755218029022217,grad_norm: 0.9723396782645692, iteration: 158903
loss: 1.006663203239441,grad_norm: 0.9999991994335478, iteration: 158904
loss: 1.0045626163482666,grad_norm: 0.9999991368246562, iteration: 158905
loss: 0.9912417531013489,grad_norm: 0.9999991868091798, iteration: 158906
loss: 1.006441593170166,grad_norm: 0.9105058312117628, iteration: 158907
loss: 0.9859148859977722,grad_norm: 0.9999990909782503, iteration: 158908
loss: 0.9981176853179932,grad_norm: 0.8752497509203343, iteration: 158909
loss: 0.9755561351776123,grad_norm: 0.88446897370549, iteration: 158910
loss: 1.0133308172225952,grad_norm: 0.9007599087003089, iteration: 158911
loss: 1.0020191669464111,grad_norm: 0.9999990739412842, iteration: 158912
loss: 1.0017406940460205,grad_norm: 0.9857838592574057, iteration: 158913
loss: 1.0080820322036743,grad_norm: 0.9999990968450195, iteration: 158914
loss: 1.0120869874954224,grad_norm: 0.9999991842997918, iteration: 158915
loss: 0.9987034797668457,grad_norm: 0.9999991229409192, iteration: 158916
loss: 0.9783126711845398,grad_norm: 0.9668389747390141, iteration: 158917
loss: 1.054800033569336,grad_norm: 0.9999991581216506, iteration: 158918
loss: 0.993351936340332,grad_norm: 0.862170033630061, iteration: 158919
loss: 0.968790590763092,grad_norm: 0.9436332650031175, iteration: 158920
loss: 0.9819730520248413,grad_norm: 0.9999989661817217, iteration: 158921
loss: 1.0072799921035767,grad_norm: 0.8143716014827842, iteration: 158922
loss: 0.9919946193695068,grad_norm: 0.9999992144296806, iteration: 158923
loss: 1.000298023223877,grad_norm: 0.9999992542979736, iteration: 158924
loss: 1.014983057975769,grad_norm: 0.9999995912311466, iteration: 158925
loss: 1.0264654159545898,grad_norm: 0.9999995718600571, iteration: 158926
loss: 1.0188140869140625,grad_norm: 0.9706320120976621, iteration: 158927
loss: 1.0160341262817383,grad_norm: 0.9999992399675971, iteration: 158928
loss: 1.0507060289382935,grad_norm: 0.9999990398656975, iteration: 158929
loss: 1.0303140878677368,grad_norm: 0.9469042546149723, iteration: 158930
loss: 0.9571433663368225,grad_norm: 0.9157312853903863, iteration: 158931
loss: 0.9806340336799622,grad_norm: 0.9999990064235194, iteration: 158932
loss: 0.9868420958518982,grad_norm: 0.9999990259441079, iteration: 158933
loss: 1.0058451890945435,grad_norm: 0.8156137372923511, iteration: 158934
loss: 0.9718512892723083,grad_norm: 0.9999991490132931, iteration: 158935
loss: 1.008379340171814,grad_norm: 0.9999992254717077, iteration: 158936
loss: 1.019754409790039,grad_norm: 0.9999990309518441, iteration: 158937
loss: 1.0117872953414917,grad_norm: 0.95521559155879, iteration: 158938
loss: 1.0310519933700562,grad_norm: 0.9948742846135958, iteration: 158939
loss: 0.9860692620277405,grad_norm: 0.9999990369732173, iteration: 158940
loss: 1.0037662982940674,grad_norm: 0.9999990497274373, iteration: 158941
loss: 0.9942036271095276,grad_norm: 0.8868886413545076, iteration: 158942
loss: 0.9618696570396423,grad_norm: 0.999999121967917, iteration: 158943
loss: 0.9927188158035278,grad_norm: 0.9761712403532721, iteration: 158944
loss: 1.0055543184280396,grad_norm: 0.9856324380737661, iteration: 158945
loss: 1.003421425819397,grad_norm: 0.9816773146209128, iteration: 158946
loss: 0.9865516424179077,grad_norm: 0.9999992579798875, iteration: 158947
loss: 1.0275954008102417,grad_norm: 0.9999992892082764, iteration: 158948
loss: 0.9992571473121643,grad_norm: 0.9999990555293248, iteration: 158949
loss: 1.0207241773605347,grad_norm: 0.9999990275297129, iteration: 158950
loss: 1.0078409910202026,grad_norm: 0.9999991937183687, iteration: 158951
loss: 0.9894613027572632,grad_norm: 0.999999194946902, iteration: 158952
loss: 1.0018360614776611,grad_norm: 0.9999992475173242, iteration: 158953
loss: 0.9992393255233765,grad_norm: 0.9999998425075961, iteration: 158954
loss: 1.002724289894104,grad_norm: 0.9999990794064623, iteration: 158955
loss: 1.0063605308532715,grad_norm: 0.9999994607896101, iteration: 158956
loss: 1.0438379049301147,grad_norm: 0.8551021197883226, iteration: 158957
loss: 0.9997196793556213,grad_norm: 0.9999990870062567, iteration: 158958
loss: 0.993370771408081,grad_norm: 0.9999989748260547, iteration: 158959
loss: 1.0058376789093018,grad_norm: 0.9889636628789537, iteration: 158960
loss: 0.9933328628540039,grad_norm: 0.9999992043845046, iteration: 158961
loss: 1.0283890962600708,grad_norm: 0.9999991237674879, iteration: 158962
loss: 1.0023316144943237,grad_norm: 0.9544783945583296, iteration: 158963
loss: 1.0107851028442383,grad_norm: 0.9999991213182795, iteration: 158964
loss: 1.0140408277511597,grad_norm: 0.9999990948285729, iteration: 158965
loss: 0.983340322971344,grad_norm: 0.9999996169522871, iteration: 158966
loss: 1.0222374200820923,grad_norm: 0.9999991538309378, iteration: 158967
loss: 1.019407033920288,grad_norm: 0.8533702776725784, iteration: 158968
loss: 0.9902623295783997,grad_norm: 0.8887838686381342, iteration: 158969
loss: 1.0119432210922241,grad_norm: 0.9999990610128204, iteration: 158970
loss: 0.993191659450531,grad_norm: 0.9999992530700077, iteration: 158971
loss: 0.9779648184776306,grad_norm: 0.9999990550203798, iteration: 158972
loss: 1.0141260623931885,grad_norm: 0.9999991648969713, iteration: 158973
loss: 0.9539833068847656,grad_norm: 0.981444787813571, iteration: 158974
loss: 1.0480842590332031,grad_norm: 0.885130433356289, iteration: 158975
loss: 0.9695245623588562,grad_norm: 0.9999992344508887, iteration: 158976
loss: 1.002245306968689,grad_norm: 0.9715243570518912, iteration: 158977
loss: 0.9673718810081482,grad_norm: 0.9999990396086683, iteration: 158978
loss: 1.0197956562042236,grad_norm: 0.7243288078855779, iteration: 158979
loss: 0.9967832565307617,grad_norm: 0.9640941880519798, iteration: 158980
loss: 0.9989428520202637,grad_norm: 0.9637847756643064, iteration: 158981
loss: 0.9972688555717468,grad_norm: 0.9999989776590106, iteration: 158982
loss: 1.0166324377059937,grad_norm: 0.9999990425278908, iteration: 158983
loss: 1.0458929538726807,grad_norm: 0.9999993633866003, iteration: 158984
loss: 0.9890299439430237,grad_norm: 0.9999991399868335, iteration: 158985
loss: 1.013879656791687,grad_norm: 0.931108043189813, iteration: 158986
loss: 1.026855707168579,grad_norm: 0.9571852304775781, iteration: 158987
loss: 1.035346269607544,grad_norm: 0.9304694180446242, iteration: 158988
loss: 0.9998518824577332,grad_norm: 0.9999992782729012, iteration: 158989
loss: 1.019144892692566,grad_norm: 0.9999991208996992, iteration: 158990
loss: 0.9902871251106262,grad_norm: 0.9999991568095618, iteration: 158991
loss: 1.0014222860336304,grad_norm: 0.9999991338517475, iteration: 158992
loss: 0.9827495217323303,grad_norm: 0.999999094619426, iteration: 158993
loss: 1.0000958442687988,grad_norm: 0.9999990987254695, iteration: 158994
loss: 1.0252896547317505,grad_norm: 0.9856970161115711, iteration: 158995
loss: 1.0150809288024902,grad_norm: 0.8311835972034044, iteration: 158996
loss: 0.9949225783348083,grad_norm: 0.9999990771054754, iteration: 158997
loss: 0.9919288158416748,grad_norm: 0.9521971803079501, iteration: 158998
loss: 1.0012187957763672,grad_norm: 0.9880049326507433, iteration: 158999
loss: 1.0018101930618286,grad_norm: 0.9999992551855124, iteration: 159000
loss: 1.0038504600524902,grad_norm: 0.945952768464869, iteration: 159001
loss: 1.0223416090011597,grad_norm: 0.9999992836229705, iteration: 159002
loss: 0.9897962212562561,grad_norm: 0.9999990026652497, iteration: 159003
loss: 0.9901903867721558,grad_norm: 0.9999990346658639, iteration: 159004
loss: 1.0054744482040405,grad_norm: 0.9487136403981593, iteration: 159005
loss: 0.983971357345581,grad_norm: 0.9999992937120925, iteration: 159006
loss: 1.0337523221969604,grad_norm: 0.7822274878740233, iteration: 159007
loss: 1.032178521156311,grad_norm: 0.9999991374818815, iteration: 159008
loss: 1.0398205518722534,grad_norm: 0.999998991502131, iteration: 159009
loss: 0.9992725253105164,grad_norm: 0.9999992550566804, iteration: 159010
loss: 0.9674102663993835,grad_norm: 0.948846724648759, iteration: 159011
loss: 1.0340735912322998,grad_norm: 0.9248493114704022, iteration: 159012
loss: 0.9734129309654236,grad_norm: 0.9999993091598071, iteration: 159013
loss: 1.0120540857315063,grad_norm: 0.8934683179920526, iteration: 159014
loss: 1.0099852085113525,grad_norm: 0.9312038458250127, iteration: 159015
loss: 1.0451644659042358,grad_norm: 0.9999996011746726, iteration: 159016
loss: 1.0105149745941162,grad_norm: 0.999999313804558, iteration: 159017
loss: 0.9811902046203613,grad_norm: 0.9281797459194977, iteration: 159018
loss: 1.015668272972107,grad_norm: 0.9999991171852746, iteration: 159019
loss: 0.9655263423919678,grad_norm: 0.8588864144923125, iteration: 159020
loss: 1.0348122119903564,grad_norm: 0.9667881207382439, iteration: 159021
loss: 0.9957002401351929,grad_norm: 0.9148301169374778, iteration: 159022
loss: 0.9708007574081421,grad_norm: 0.9999990164438851, iteration: 159023
loss: 0.9975100755691528,grad_norm: 0.9326361119238019, iteration: 159024
loss: 0.9845057725906372,grad_norm: 0.9999997114804914, iteration: 159025
loss: 0.9803439378738403,grad_norm: 0.9999991655434848, iteration: 159026
loss: 1.008181095123291,grad_norm: 0.9361607275790007, iteration: 159027
loss: 1.0294703245162964,grad_norm: 0.9827255606815368, iteration: 159028
loss: 1.0055598020553589,grad_norm: 0.9999990774846093, iteration: 159029
loss: 0.9755495190620422,grad_norm: 0.9792373681486014, iteration: 159030
loss: 0.973135769367218,grad_norm: 0.8780576892059573, iteration: 159031
loss: 0.973339855670929,grad_norm: 0.9999991121092983, iteration: 159032
loss: 0.9504743814468384,grad_norm: 0.9999991174829888, iteration: 159033
loss: 1.0097931623458862,grad_norm: 0.9559780499472341, iteration: 159034
loss: 0.9974873661994934,grad_norm: 0.9818791502798027, iteration: 159035
loss: 0.9984456896781921,grad_norm: 0.999999158343445, iteration: 159036
loss: 0.9820500612258911,grad_norm: 0.9999990478524906, iteration: 159037
loss: 1.0235893726348877,grad_norm: 0.9164017039460642, iteration: 159038
loss: 0.9914238452911377,grad_norm: 0.9810348439263094, iteration: 159039
loss: 0.9790916442871094,grad_norm: 0.943839097254919, iteration: 159040
loss: 1.0320128202438354,grad_norm: 0.9999990507435997, iteration: 159041
loss: 0.9241570830345154,grad_norm: 0.999999283000353, iteration: 159042
loss: 1.0057452917099,grad_norm: 0.9999993214978046, iteration: 159043
loss: 1.0704067945480347,grad_norm: 0.9999994030014211, iteration: 159044
loss: 0.9606984853744507,grad_norm: 0.9999991697309116, iteration: 159045
loss: 1.0049108266830444,grad_norm: 0.9999998672413165, iteration: 159046
loss: 1.0279359817504883,grad_norm: 0.9884935454564794, iteration: 159047
loss: 0.9948483109474182,grad_norm: 0.9898832045810583, iteration: 159048
loss: 0.9907392263412476,grad_norm: 0.9986639439439552, iteration: 159049
loss: 0.9669767022132874,grad_norm: 0.9843378637813046, iteration: 159050
loss: 0.9897246360778809,grad_norm: 0.9999990772618823, iteration: 159051
loss: 1.015191674232483,grad_norm: 0.9999990800376968, iteration: 159052
loss: 1.0118005275726318,grad_norm: 0.9894527184691303, iteration: 159053
loss: 0.9785110354423523,grad_norm: 0.7992537550200672, iteration: 159054
loss: 0.9876592755317688,grad_norm: 0.9553148906696478, iteration: 159055
loss: 0.9902377724647522,grad_norm: 0.8636724529612958, iteration: 159056
loss: 1.0316795110702515,grad_norm: 0.9999997542288691, iteration: 159057
loss: 0.986819863319397,grad_norm: 0.9999991620245321, iteration: 159058
loss: 1.0248795747756958,grad_norm: 0.9934485914863178, iteration: 159059
loss: 1.0291534662246704,grad_norm: 0.9999993563746337, iteration: 159060
loss: 0.9726064205169678,grad_norm: 0.9830911623788533, iteration: 159061
loss: 1.0105072259902954,grad_norm: 0.9236207770504407, iteration: 159062
loss: 1.009166955947876,grad_norm: 0.9999990123603508, iteration: 159063
loss: 1.0043009519577026,grad_norm: 0.8842971051437848, iteration: 159064
loss: 1.0180052518844604,grad_norm: 0.9999991589522237, iteration: 159065
loss: 1.0260666608810425,grad_norm: 0.9999991320799079, iteration: 159066
loss: 1.0052679777145386,grad_norm: 0.9619917143507727, iteration: 159067
loss: 0.9873262047767639,grad_norm: 0.9057369839883519, iteration: 159068
loss: 1.0301945209503174,grad_norm: 0.9999992563248555, iteration: 159069
loss: 0.9645335078239441,grad_norm: 0.9791835973303662, iteration: 159070
loss: 0.9964783191680908,grad_norm: 0.8963236259859076, iteration: 159071
loss: 1.0298466682434082,grad_norm: 0.9388049314350825, iteration: 159072
loss: 1.0250009298324585,grad_norm: 0.9413875255854647, iteration: 159073
loss: 1.0237711668014526,grad_norm: 0.9999995910102095, iteration: 159074
loss: 1.0083448886871338,grad_norm: 0.9999990422963488, iteration: 159075
loss: 1.0183368921279907,grad_norm: 0.9999991685932853, iteration: 159076
loss: 1.0143179893493652,grad_norm: 0.9999990012199784, iteration: 159077
loss: 1.0052521228790283,grad_norm: 0.9174661676975332, iteration: 159078
loss: 1.183037519454956,grad_norm: 0.9999994230870685, iteration: 159079
loss: 1.017414927482605,grad_norm: 0.991378571149693, iteration: 159080
loss: 0.9923938512802124,grad_norm: 0.9999991511203395, iteration: 159081
loss: 0.9801375269889832,grad_norm: 0.9999992274577423, iteration: 159082
loss: 1.0133293867111206,grad_norm: 0.9597407640400019, iteration: 159083
loss: 0.9837943911552429,grad_norm: 0.999999221485477, iteration: 159084
loss: 1.0195982456207275,grad_norm: 0.9999991012713564, iteration: 159085
loss: 0.9965910315513611,grad_norm: 0.9999991841519266, iteration: 159086
loss: 1.0215431451797485,grad_norm: 0.9894001580721586, iteration: 159087
loss: 1.075583815574646,grad_norm: 0.999999341469607, iteration: 159088
loss: 1.0296056270599365,grad_norm: 0.9876963094818558, iteration: 159089
loss: 1.011095643043518,grad_norm: 0.8861730476675141, iteration: 159090
loss: 1.0207828283309937,grad_norm: 0.9337320462755904, iteration: 159091
loss: 0.9950141310691833,grad_norm: 0.9614263253719607, iteration: 159092
loss: 0.963134229183197,grad_norm: 0.9999991995202683, iteration: 159093
loss: 1.033056616783142,grad_norm: 0.9999999400253714, iteration: 159094
loss: 0.9975981116294861,grad_norm: 0.9999992066934291, iteration: 159095
loss: 1.0244290828704834,grad_norm: 0.9999992053266962, iteration: 159096
loss: 1.0112627744674683,grad_norm: 0.924508949579179, iteration: 159097
loss: 1.0123100280761719,grad_norm: 0.9999991664127206, iteration: 159098
loss: 0.9894862771034241,grad_norm: 0.9999992256256718, iteration: 159099
loss: 0.9733474850654602,grad_norm: 0.9999991280193431, iteration: 159100
loss: 0.9867222905158997,grad_norm: 0.9999990160305375, iteration: 159101
loss: 1.000826358795166,grad_norm: 0.9367297353622095, iteration: 159102
loss: 1.0183916091918945,grad_norm: 0.9999991810311505, iteration: 159103
loss: 1.0023053884506226,grad_norm: 0.9760839397287002, iteration: 159104
loss: 1.013227105140686,grad_norm: 0.9637160130131972, iteration: 159105
loss: 0.9941473603248596,grad_norm: 0.9229908030433465, iteration: 159106
loss: 1.0008940696716309,grad_norm: 0.8733132225537041, iteration: 159107
loss: 1.0023235082626343,grad_norm: 0.9999992086915005, iteration: 159108
loss: 1.0208967924118042,grad_norm: 0.9881004437611804, iteration: 159109
loss: 0.996637761592865,grad_norm: 0.8117161036967702, iteration: 159110
loss: 1.0247325897216797,grad_norm: 0.9345437656495688, iteration: 159111
loss: 0.9998592734336853,grad_norm: 0.7904313567108563, iteration: 159112
loss: 1.0029089450836182,grad_norm: 0.9999991771566952, iteration: 159113
loss: 1.0550440549850464,grad_norm: 0.999998934437634, iteration: 159114
loss: 1.0362683534622192,grad_norm: 0.9999991526581397, iteration: 159115
loss: 0.9809996485710144,grad_norm: 0.9638008700018835, iteration: 159116
loss: 1.0175704956054688,grad_norm: 0.9999991102380038, iteration: 159117
loss: 0.9876696467399597,grad_norm: 0.9999991139459579, iteration: 159118
loss: 0.9648258686065674,grad_norm: 0.9999990422603127, iteration: 159119
loss: 0.9749762415885925,grad_norm: 0.936924172200472, iteration: 159120
loss: 1.0084648132324219,grad_norm: 0.9257360498263734, iteration: 159121
loss: 1.0015172958374023,grad_norm: 0.9515122796513444, iteration: 159122
loss: 0.9902414083480835,grad_norm: 0.9999990363405228, iteration: 159123
loss: 0.9715204238891602,grad_norm: 0.8440806436443332, iteration: 159124
loss: 0.988193154335022,grad_norm: 0.9999990664799533, iteration: 159125
loss: 1.0000559091567993,grad_norm: 0.8209499344044547, iteration: 159126
loss: 1.0038458108901978,grad_norm: 0.9999992381273123, iteration: 159127
loss: 0.9878080487251282,grad_norm: 0.9435615804381968, iteration: 159128
loss: 1.0219374895095825,grad_norm: 0.9999992192720981, iteration: 159129
loss: 0.9614675045013428,grad_norm: 0.9999993165343783, iteration: 159130
loss: 0.9931809306144714,grad_norm: 0.9679741973723186, iteration: 159131
loss: 0.9883806705474854,grad_norm: 0.9229105686264298, iteration: 159132
loss: 0.9619084596633911,grad_norm: 0.9999989530410698, iteration: 159133
loss: 1.0237758159637451,grad_norm: 0.9999992109084989, iteration: 159134
loss: 0.984329342842102,grad_norm: 0.9999992307537627, iteration: 159135
loss: 1.0328088998794556,grad_norm: 0.9999996563294169, iteration: 159136
loss: 1.0040628910064697,grad_norm: 0.9999992038618896, iteration: 159137
loss: 1.003945231437683,grad_norm: 0.9999990862964526, iteration: 159138
loss: 1.0164936780929565,grad_norm: 0.8806175954056289, iteration: 159139
loss: 1.0312738418579102,grad_norm: 0.8902964411330578, iteration: 159140
loss: 1.013106346130371,grad_norm: 0.9999989284855311, iteration: 159141
loss: 1.032274603843689,grad_norm: 0.9999998164005721, iteration: 159142
loss: 0.9460879564285278,grad_norm: 0.9999991986900558, iteration: 159143
loss: 1.0065854787826538,grad_norm: 0.9999991890603989, iteration: 159144
loss: 0.9716979265213013,grad_norm: 0.9958217993716303, iteration: 159145
loss: 0.9677473306655884,grad_norm: 0.9999990768398537, iteration: 159146
loss: 1.0336402654647827,grad_norm: 0.9999990734107412, iteration: 159147
loss: 1.0352866649627686,grad_norm: 0.8044826156466491, iteration: 159148
loss: 0.9772071838378906,grad_norm: 0.9999990710012022, iteration: 159149
loss: 1.0193543434143066,grad_norm: 0.9041344000759457, iteration: 159150
loss: 1.0128426551818848,grad_norm: 0.9999993824790402, iteration: 159151
loss: 1.0114644765853882,grad_norm: 0.9379375479880295, iteration: 159152
loss: 0.9580962061882019,grad_norm: 0.9999990303656163, iteration: 159153
loss: 0.9582690000534058,grad_norm: 0.9965362315674163, iteration: 159154
loss: 1.0059362649917603,grad_norm: 0.9999992568221323, iteration: 159155
loss: 0.9932171106338501,grad_norm: 0.9999994535087258, iteration: 159156
loss: 0.9852047562599182,grad_norm: 0.9999990566460951, iteration: 159157
loss: 1.0228654146194458,grad_norm: 0.9942167347802968, iteration: 159158
loss: 0.964026927947998,grad_norm: 0.9564549068459732, iteration: 159159
loss: 0.9505831599235535,grad_norm: 0.9999990417977349, iteration: 159160
loss: 1.006147027015686,grad_norm: 0.9319973468104711, iteration: 159161
loss: 1.0070384740829468,grad_norm: 0.9136476425378584, iteration: 159162
loss: 1.0088999271392822,grad_norm: 0.9999992621537914, iteration: 159163
loss: 0.9764789938926697,grad_norm: 0.9207288867947387, iteration: 159164
loss: 0.9998385310173035,grad_norm: 0.9999997148286002, iteration: 159165
loss: 0.9936109781265259,grad_norm: 0.9999990633658565, iteration: 159166
loss: 0.9815421104431152,grad_norm: 0.9999992009633223, iteration: 159167
loss: 1.008628487586975,grad_norm: 0.9944386828408757, iteration: 159168
loss: 0.9893125295639038,grad_norm: 0.9368573524098166, iteration: 159169
loss: 1.0018024444580078,grad_norm: 0.9999992459414979, iteration: 159170
loss: 1.0242992639541626,grad_norm: 0.9999992153673802, iteration: 159171
loss: 1.0128405094146729,grad_norm: 0.9360874878029819, iteration: 159172
loss: 1.1317905187606812,grad_norm: 0.9999998140888351, iteration: 159173
loss: 0.9864651560783386,grad_norm: 0.9999996455267725, iteration: 159174
loss: 0.9777389168739319,grad_norm: 0.9999991958822915, iteration: 159175
loss: 0.9948773384094238,grad_norm: 0.9999991726813835, iteration: 159176
loss: 1.0182276964187622,grad_norm: 0.9999992218307031, iteration: 159177
loss: 1.1219123601913452,grad_norm: 0.9999996968604374, iteration: 159178
loss: 0.9941250085830688,grad_norm: 0.9999991713651437, iteration: 159179
loss: 0.9652617573738098,grad_norm: 0.9999991214934084, iteration: 159180
loss: 1.0039986371994019,grad_norm: 0.8359600793194623, iteration: 159181
loss: 1.0221275091171265,grad_norm: 0.9999990529721094, iteration: 159182
loss: 0.9791749119758606,grad_norm: 0.9999991290754334, iteration: 159183
loss: 0.9888849854469299,grad_norm: 0.8309051973830536, iteration: 159184
loss: 1.0206525325775146,grad_norm: 0.9999990485391815, iteration: 159185
loss: 0.9926285743713379,grad_norm: 0.9999991226286843, iteration: 159186
loss: 0.9688336253166199,grad_norm: 0.9999991352102616, iteration: 159187
loss: 1.03437340259552,grad_norm: 0.9234275872910918, iteration: 159188
loss: 1.00446355342865,grad_norm: 0.9785967853440042, iteration: 159189
loss: 1.0266942977905273,grad_norm: 0.972253849201547, iteration: 159190
loss: 0.9654780030250549,grad_norm: 0.9875413973888948, iteration: 159191
loss: 0.9772845506668091,grad_norm: 0.9999990986096828, iteration: 159192
loss: 0.9995076060295105,grad_norm: 0.9999991051383277, iteration: 159193
loss: 0.9963287115097046,grad_norm: 0.9999991576040298, iteration: 159194
loss: 1.0326734781265259,grad_norm: 0.9999990944810939, iteration: 159195
loss: 0.9903356432914734,grad_norm: 0.9999990827996325, iteration: 159196
loss: 0.9696758985519409,grad_norm: 0.995438559966381, iteration: 159197
loss: 1.0289931297302246,grad_norm: 0.9999997321105352, iteration: 159198
loss: 0.9966359734535217,grad_norm: 0.9999990574578916, iteration: 159199
loss: 0.9763324856758118,grad_norm: 0.9999991024227669, iteration: 159200
loss: 0.9534097909927368,grad_norm: 0.9999991422590265, iteration: 159201
loss: 1.0066503286361694,grad_norm: 0.9999990518965275, iteration: 159202
loss: 1.0079320669174194,grad_norm: 0.9486905520979976, iteration: 159203
loss: 0.9776989221572876,grad_norm: 0.8554309668265173, iteration: 159204
loss: 0.9552773237228394,grad_norm: 0.9467632735370248, iteration: 159205
loss: 0.9941818118095398,grad_norm: 0.9999992488954874, iteration: 159206
loss: 1.0311580896377563,grad_norm: 0.9999990988655203, iteration: 159207
loss: 1.02694833278656,grad_norm: 0.9512990011951306, iteration: 159208
loss: 0.9590063691139221,grad_norm: 0.995781338697198, iteration: 159209
loss: 0.9970440864562988,grad_norm: 0.9999989148581425, iteration: 159210
loss: 1.009972333908081,grad_norm: 0.999999114355342, iteration: 159211
loss: 1.032334566116333,grad_norm: 0.9091271422208497, iteration: 159212
loss: 0.9945505857467651,grad_norm: 0.9500067233327911, iteration: 159213
loss: 0.9752954840660095,grad_norm: 0.9999992190949383, iteration: 159214
loss: 0.9847339391708374,grad_norm: 0.9798659395450812, iteration: 159215
loss: 0.9924629926681519,grad_norm: 0.9038882454496007, iteration: 159216
loss: 1.009670615196228,grad_norm: 0.9999990528143693, iteration: 159217
loss: 0.9661844372749329,grad_norm: 0.992875146172891, iteration: 159218
loss: 0.9785006046295166,grad_norm: 0.9999990826652141, iteration: 159219
loss: 1.0000944137573242,grad_norm: 0.9921259263132907, iteration: 159220
loss: 1.001966953277588,grad_norm: 0.9921204489983613, iteration: 159221
loss: 1.0273503065109253,grad_norm: 0.9087752486625865, iteration: 159222
loss: 0.9870223999023438,grad_norm: 0.986622697310251, iteration: 159223
loss: 1.0107206106185913,grad_norm: 0.9999990434320647, iteration: 159224
loss: 0.9772140979766846,grad_norm: 0.9999993229954087, iteration: 159225
loss: 0.945152759552002,grad_norm: 0.8534707498892463, iteration: 159226
loss: 0.9602452516555786,grad_norm: 0.9298191540272897, iteration: 159227
loss: 1.004607915878296,grad_norm: 0.9999992375282125, iteration: 159228
loss: 1.0081918239593506,grad_norm: 0.9999991016503453, iteration: 159229
loss: 0.9767020344734192,grad_norm: 0.9999992272482002, iteration: 159230
loss: 1.0183091163635254,grad_norm: 0.984394010021269, iteration: 159231
loss: 1.0048753023147583,grad_norm: 0.9999991532284155, iteration: 159232
loss: 0.9923688173294067,grad_norm: 0.874931603951884, iteration: 159233
loss: 1.0133687257766724,grad_norm: 0.8852320562663784, iteration: 159234
loss: 1.0133599042892456,grad_norm: 0.9999990912073109, iteration: 159235
loss: 1.0321100950241089,grad_norm: 0.9874257775777306, iteration: 159236
loss: 1.0214234590530396,grad_norm: 0.7512685864426623, iteration: 159237
loss: 0.9594995379447937,grad_norm: 0.9999990886720176, iteration: 159238
loss: 0.9764337539672852,grad_norm: 0.9999991237034587, iteration: 159239
loss: 1.0097049474716187,grad_norm: 0.9795046551951819, iteration: 159240
loss: 1.0379176139831543,grad_norm: 0.9508911317589634, iteration: 159241
loss: 0.9997509717941284,grad_norm: 0.8597426864129903, iteration: 159242
loss: 0.9879668951034546,grad_norm: 0.7881680878108515, iteration: 159243
loss: 1.045572280883789,grad_norm: 0.9999990835238525, iteration: 159244
loss: 1.0285755395889282,grad_norm: 0.9999990211991259, iteration: 159245
loss: 0.9939841032028198,grad_norm: 0.9018197156375202, iteration: 159246
loss: 1.044050931930542,grad_norm: 0.9999992906044898, iteration: 159247
loss: 0.9919168949127197,grad_norm: 0.8497518793323035, iteration: 159248
loss: 0.9809143543243408,grad_norm: 0.95445192005326, iteration: 159249
loss: 1.0175873041152954,grad_norm: 0.9999989716736531, iteration: 159250
loss: 1.018942952156067,grad_norm: 0.985730175017489, iteration: 159251
loss: 0.9612956047058105,grad_norm: 0.8573441757347754, iteration: 159252
loss: 1.0038028955459595,grad_norm: 0.95863864349168, iteration: 159253
loss: 1.0211671590805054,grad_norm: 0.9999989274500597, iteration: 159254
loss: 0.9666286706924438,grad_norm: 0.8246800287129259, iteration: 159255
loss: 1.0063462257385254,grad_norm: 0.999998948326665, iteration: 159256
loss: 0.98703533411026,grad_norm: 0.9614502647841706, iteration: 159257
loss: 0.9892411828041077,grad_norm: 0.9296318687212903, iteration: 159258
loss: 1.002086877822876,grad_norm: 0.9020910502168177, iteration: 159259
loss: 0.9795576930046082,grad_norm: 0.9999991453401461, iteration: 159260
loss: 1.005479097366333,grad_norm: 0.8963441555680338, iteration: 159261
loss: 0.9671895503997803,grad_norm: 0.911349157176081, iteration: 159262
loss: 0.9474852085113525,grad_norm: 0.9866779791871124, iteration: 159263
loss: 1.0189059972763062,grad_norm: 0.998194679591543, iteration: 159264
loss: 1.0228264331817627,grad_norm: 0.9999989990185275, iteration: 159265
loss: 1.0442233085632324,grad_norm: 0.9999991191237231, iteration: 159266
loss: 1.0008246898651123,grad_norm: 0.9999990714148368, iteration: 159267
loss: 1.0054454803466797,grad_norm: 0.9643332646207368, iteration: 159268
loss: 1.0380014181137085,grad_norm: 0.8531443802524533, iteration: 159269
loss: 1.044119119644165,grad_norm: 0.999999217780214, iteration: 159270
loss: 1.0335757732391357,grad_norm: 0.9532054738709428, iteration: 159271
loss: 1.0127484798431396,grad_norm: 0.9999991506486569, iteration: 159272
loss: 0.9936372637748718,grad_norm: 0.8717025211453125, iteration: 159273
loss: 1.0031453371047974,grad_norm: 0.9999992246874231, iteration: 159274
loss: 0.978875994682312,grad_norm: 0.986933860669077, iteration: 159275
loss: 0.9855502843856812,grad_norm: 0.9999998261512498, iteration: 159276
loss: 1.0108060836791992,grad_norm: 0.8314555655976001, iteration: 159277
loss: 1.0004838705062866,grad_norm: 0.8408057027872173, iteration: 159278
loss: 1.0081164836883545,grad_norm: 0.9229767015184308, iteration: 159279
loss: 1.0296766757965088,grad_norm: 0.9999991168187818, iteration: 159280
loss: 1.015336275100708,grad_norm: 0.8959619875885699, iteration: 159281
loss: 0.9664152264595032,grad_norm: 0.9931254838636291, iteration: 159282
loss: 1.0053770542144775,grad_norm: 0.9999993657938407, iteration: 159283
loss: 1.0299127101898193,grad_norm: 0.8971891897737642, iteration: 159284
loss: 0.9770538210868835,grad_norm: 0.9589405223918709, iteration: 159285
loss: 1.0484070777893066,grad_norm: 0.9999992317817783, iteration: 159286
loss: 1.0046169757843018,grad_norm: 0.9346333226079466, iteration: 159287
loss: 0.9901077151298523,grad_norm: 0.9999990531187231, iteration: 159288
loss: 1.002922773361206,grad_norm: 0.9826795357705957, iteration: 159289
loss: 1.0056618452072144,grad_norm: 0.9549065690333793, iteration: 159290
loss: 1.0335363149642944,grad_norm: 0.9999992652435274, iteration: 159291
loss: 0.9862856268882751,grad_norm: 0.9999990251042264, iteration: 159292
loss: 1.0100754499435425,grad_norm: 0.999999123419419, iteration: 159293
loss: 0.9855755567550659,grad_norm: 0.9519758060198114, iteration: 159294
loss: 1.011682152748108,grad_norm: 0.9999992560846523, iteration: 159295
loss: 0.9973298907279968,grad_norm: 0.9512948475410068, iteration: 159296
loss: 0.9846420288085938,grad_norm: 0.8854530166076809, iteration: 159297
loss: 0.97871994972229,grad_norm: 0.9999992377626737, iteration: 159298
loss: 0.9993450045585632,grad_norm: 0.999999187618315, iteration: 159299
loss: 1.0148192644119263,grad_norm: 0.9805867721972635, iteration: 159300
loss: 1.030655026435852,grad_norm: 0.9999990921078813, iteration: 159301
loss: 0.9872263073921204,grad_norm: 0.9161868120853877, iteration: 159302
loss: 1.0053074359893799,grad_norm: 0.9999991232055064, iteration: 159303
loss: 0.9580277800559998,grad_norm: 0.999999193071289, iteration: 159304
loss: 1.0015798807144165,grad_norm: 0.9239365217560708, iteration: 159305
loss: 1.012581706047058,grad_norm: 0.980009680447476, iteration: 159306
loss: 1.0024752616882324,grad_norm: 0.9739970572652702, iteration: 159307
loss: 1.009287714958191,grad_norm: 0.9999991892482383, iteration: 159308
loss: 1.0099780559539795,grad_norm: 0.9999992889913768, iteration: 159309
loss: 1.020736575126648,grad_norm: 0.8289775945197896, iteration: 159310
loss: 1.0559511184692383,grad_norm: 0.9999994744972445, iteration: 159311
loss: 1.025499701499939,grad_norm: 0.9999992624593482, iteration: 159312
loss: 1.0041818618774414,grad_norm: 0.9193382132294834, iteration: 159313
loss: 1.0181206464767456,grad_norm: 0.9906657339133156, iteration: 159314
loss: 0.9947527647018433,grad_norm: 0.7773175140951692, iteration: 159315
loss: 0.9529523253440857,grad_norm: 0.8326307084227307, iteration: 159316
loss: 1.0163547992706299,grad_norm: 0.9095127688601577, iteration: 159317
loss: 0.9806012511253357,grad_norm: 0.9999990792853717, iteration: 159318
loss: 1.0037131309509277,grad_norm: 0.9999992416540839, iteration: 159319
loss: 1.0009409189224243,grad_norm: 0.9999991064482825, iteration: 159320
loss: 1.0228216648101807,grad_norm: 0.999998999179126, iteration: 159321
loss: 1.0090364217758179,grad_norm: 0.9999991732698541, iteration: 159322
loss: 0.9983828663825989,grad_norm: 0.9999996866450498, iteration: 159323
loss: 1.016997218132019,grad_norm: 0.9999992695228929, iteration: 159324
loss: 0.9658835530281067,grad_norm: 0.9999989829999945, iteration: 159325
loss: 0.9662030935287476,grad_norm: 0.9688117514534448, iteration: 159326
loss: 0.9736762642860413,grad_norm: 0.9126545030052728, iteration: 159327
loss: 0.9775104522705078,grad_norm: 0.9037975126296254, iteration: 159328
loss: 0.9772129654884338,grad_norm: 0.9039504882843504, iteration: 159329
loss: 0.970157265663147,grad_norm: 0.999999035505763, iteration: 159330
loss: 1.0248796939849854,grad_norm: 0.9999990969539918, iteration: 159331
loss: 1.0216349363327026,grad_norm: 0.9999995596734871, iteration: 159332
loss: 0.9933215379714966,grad_norm: 0.9999991818954092, iteration: 159333
loss: 1.0102598667144775,grad_norm: 0.9999992100271828, iteration: 159334
loss: 1.0014656782150269,grad_norm: 0.9999993134713189, iteration: 159335
loss: 1.0009855031967163,grad_norm: 0.9245212715007077, iteration: 159336
loss: 1.0189228057861328,grad_norm: 0.9575690058982878, iteration: 159337
loss: 0.9961447715759277,grad_norm: 0.9906810138118969, iteration: 159338
loss: 1.0109578371047974,grad_norm: 0.843078742790412, iteration: 159339
loss: 1.0390639305114746,grad_norm: 0.9347834805743326, iteration: 159340
loss: 0.9915579557418823,grad_norm: 0.999999017520531, iteration: 159341
loss: 1.0050649642944336,grad_norm: 0.9533889459882466, iteration: 159342
loss: 0.9877803325653076,grad_norm: 0.9715241197861059, iteration: 159343
loss: 0.9984798431396484,grad_norm: 0.9999990684800775, iteration: 159344
loss: 1.0105235576629639,grad_norm: 0.9999989471839525, iteration: 159345
loss: 1.0137664079666138,grad_norm: 0.9999989995351543, iteration: 159346
loss: 1.0210907459259033,grad_norm: 0.9553681724365439, iteration: 159347
loss: 1.0048120021820068,grad_norm: 0.999999147123575, iteration: 159348
loss: 1.0284711122512817,grad_norm: 0.9403521063931034, iteration: 159349
loss: 1.0141017436981201,grad_norm: 0.8591424232039981, iteration: 159350
loss: 1.0214742422103882,grad_norm: 0.9999992145561827, iteration: 159351
loss: 1.0153573751449585,grad_norm: 0.9999991723545992, iteration: 159352
loss: 0.9833827018737793,grad_norm: 0.9999991314974925, iteration: 159353
loss: 0.9979612231254578,grad_norm: 0.9999991163553744, iteration: 159354
loss: 0.9806943535804749,grad_norm: 0.9999991545884142, iteration: 159355
loss: 0.9983371496200562,grad_norm: 0.9999990736088737, iteration: 159356
loss: 1.0045514106750488,grad_norm: 0.8746133774453814, iteration: 159357
loss: 1.0009400844573975,grad_norm: 0.9999991633554218, iteration: 159358
loss: 1.0214687585830688,grad_norm: 0.9999992130845585, iteration: 159359
loss: 1.0302362442016602,grad_norm: 0.9740025106278227, iteration: 159360
loss: 0.9943878054618835,grad_norm: 0.9265990236226832, iteration: 159361
loss: 0.987816572189331,grad_norm: 0.9456567104700289, iteration: 159362
loss: 1.0175859928131104,grad_norm: 0.9960215341753846, iteration: 159363
loss: 0.9980262517929077,grad_norm: 0.8770043797206929, iteration: 159364
loss: 1.0332014560699463,grad_norm: 0.9999994609257291, iteration: 159365
loss: 0.9783850908279419,grad_norm: 0.9480664794326996, iteration: 159366
loss: 0.9775524735450745,grad_norm: 0.8599465241249261, iteration: 159367
loss: 0.998128354549408,grad_norm: 0.9999990934841781, iteration: 159368
loss: 1.0304559469223022,grad_norm: 0.9756577025856256, iteration: 159369
loss: 1.1736891269683838,grad_norm: 0.9999996510653754, iteration: 159370
loss: 1.013386845588684,grad_norm: 0.999999251959345, iteration: 159371
loss: 0.990430474281311,grad_norm: 0.9548187114322856, iteration: 159372
loss: 1.0223515033721924,grad_norm: 0.9672357030827685, iteration: 159373
loss: 1.003017544746399,grad_norm: 0.9437076198661193, iteration: 159374
loss: 1.0590946674346924,grad_norm: 0.9999995533606214, iteration: 159375
loss: 1.0514249801635742,grad_norm: 0.9999990714860225, iteration: 159376
loss: 1.008220911026001,grad_norm: 0.9114911072483811, iteration: 159377
loss: 0.9886803030967712,grad_norm: 0.7850826508185521, iteration: 159378
loss: 1.0223219394683838,grad_norm: 0.9287520280074315, iteration: 159379
loss: 1.0464329719543457,grad_norm: 0.9999991263709489, iteration: 159380
loss: 1.0150011777877808,grad_norm: 0.9999992424308615, iteration: 159381
loss: 0.963238537311554,grad_norm: 0.9999990396462782, iteration: 159382
loss: 0.9705315828323364,grad_norm: 0.9490624339461535, iteration: 159383
loss: 1.0043652057647705,grad_norm: 0.9999993899980358, iteration: 159384
loss: 0.9907897114753723,grad_norm: 0.9731844498360253, iteration: 159385
loss: 1.0198965072631836,grad_norm: 0.9999990471334471, iteration: 159386
loss: 1.0103119611740112,grad_norm: 0.9206716312609543, iteration: 159387
loss: 1.0043224096298218,grad_norm: 0.9999993464020817, iteration: 159388
loss: 0.9721611738204956,grad_norm: 0.9944122926868447, iteration: 159389
loss: 0.9936046004295349,grad_norm: 0.9452345957464138, iteration: 159390
loss: 0.9837356209754944,grad_norm: 0.9999990911431855, iteration: 159391
loss: 1.0471882820129395,grad_norm: 0.902709823860309, iteration: 159392
loss: 0.9631475210189819,grad_norm: 0.9259452977010637, iteration: 159393
loss: 0.9841591715812683,grad_norm: 0.9999991529083719, iteration: 159394
loss: 1.0082488059997559,grad_norm: 0.9999993400009993, iteration: 159395
loss: 0.9533233642578125,grad_norm: 0.9999991968590625, iteration: 159396
loss: 0.9791083931922913,grad_norm: 0.9999990104436451, iteration: 159397
loss: 0.9940155744552612,grad_norm: 0.9999990308326941, iteration: 159398
loss: 1.0032923221588135,grad_norm: 0.9095918936516647, iteration: 159399
loss: 0.9896185398101807,grad_norm: 0.9999990820878897, iteration: 159400
loss: 0.9975974559783936,grad_norm: 0.9999996916120344, iteration: 159401
loss: 1.3510780334472656,grad_norm: 0.9999993896406659, iteration: 159402
loss: 1.0007187128067017,grad_norm: 0.9455514237647338, iteration: 159403
loss: 1.0470699071884155,grad_norm: 1.0000000020081696, iteration: 159404
loss: 0.9980973601341248,grad_norm: 0.9772634875872788, iteration: 159405
loss: 1.027996301651001,grad_norm: 0.99999889101013, iteration: 159406
loss: 0.9742074012756348,grad_norm: 0.9861542853499216, iteration: 159407
loss: 1.0076065063476562,grad_norm: 0.9999991357459785, iteration: 159408
loss: 0.998729944229126,grad_norm: 0.9141796393881649, iteration: 159409
loss: 0.9878868460655212,grad_norm: 0.9999997895772486, iteration: 159410
loss: 0.9955537915229797,grad_norm: 0.9920580572736533, iteration: 159411
loss: 1.0161055326461792,grad_norm: 0.8792462282160695, iteration: 159412
loss: 0.9618072509765625,grad_norm: 0.9999990669990443, iteration: 159413
loss: 1.0536383390426636,grad_norm: 0.9227369894525655, iteration: 159414
loss: 1.0087960958480835,grad_norm: 0.9999990884874985, iteration: 159415
loss: 1.0337433815002441,grad_norm: 0.9999991003514196, iteration: 159416
loss: 1.0023671388626099,grad_norm: 0.9746043923551438, iteration: 159417
loss: 0.9974491000175476,grad_norm: 0.9999992409198786, iteration: 159418
loss: 0.9877781867980957,grad_norm: 0.999999111427928, iteration: 159419
loss: 0.9851526618003845,grad_norm: 0.9999992286479048, iteration: 159420
loss: 1.013458490371704,grad_norm: 0.9578946477625899, iteration: 159421
loss: 1.000879168510437,grad_norm: 0.905332591371433, iteration: 159422
loss: 1.0333260297775269,grad_norm: 0.9150325588328687, iteration: 159423
loss: 1.0544723272323608,grad_norm: 0.9999991164008389, iteration: 159424
loss: 0.9853410720825195,grad_norm: 0.987312788134197, iteration: 159425
loss: 1.0036460161209106,grad_norm: 0.9999991590057195, iteration: 159426
loss: 0.9765952229499817,grad_norm: 0.9574051241874459, iteration: 159427
loss: 0.9926984310150146,grad_norm: 0.9244340589747468, iteration: 159428
loss: 0.9934710264205933,grad_norm: 0.9848611500157062, iteration: 159429
loss: 1.018385887145996,grad_norm: 0.9999994051600422, iteration: 159430
loss: 1.0019675493240356,grad_norm: 0.8223322340516926, iteration: 159431
loss: 0.9917649030685425,grad_norm: 0.9467379566190539, iteration: 159432
loss: 0.9980514645576477,grad_norm: 0.7514236964780887, iteration: 159433
loss: 1.009230375289917,grad_norm: 0.8499589723785865, iteration: 159434
loss: 1.00558602809906,grad_norm: 0.9999996170715397, iteration: 159435
loss: 1.0218504667282104,grad_norm: 0.9401201942682331, iteration: 159436
loss: 1.1105444431304932,grad_norm: 0.9999999081755342, iteration: 159437
loss: 0.9924272894859314,grad_norm: 0.9999999691262823, iteration: 159438
loss: 0.9772418141365051,grad_norm: 0.970155530829954, iteration: 159439
loss: 1.0163273811340332,grad_norm: 0.9999992482426334, iteration: 159440
loss: 1.0161975622177124,grad_norm: 0.9999992391778634, iteration: 159441
loss: 0.9944092035293579,grad_norm: 0.9413627501145033, iteration: 159442
loss: 0.9776297807693481,grad_norm: 0.9999990381052868, iteration: 159443
loss: 1.002156376838684,grad_norm: 0.9999990618116102, iteration: 159444
loss: 0.97838294506073,grad_norm: 0.8007134330572372, iteration: 159445
loss: 1.0162897109985352,grad_norm: 0.9081464560349716, iteration: 159446
loss: 1.014347791671753,grad_norm: 0.9738144566957538, iteration: 159447
loss: 0.9992811679840088,grad_norm: 0.8733775080501555, iteration: 159448
loss: 0.979278564453125,grad_norm: 0.9463582509289887, iteration: 159449
loss: 1.003197431564331,grad_norm: 0.9999991755705063, iteration: 159450
loss: 0.9759330153465271,grad_norm: 0.9019956520735453, iteration: 159451
loss: 0.940284252166748,grad_norm: 0.999999293882501, iteration: 159452
loss: 0.9969079494476318,grad_norm: 0.8408512688172268, iteration: 159453
loss: 1.005309820175171,grad_norm: 0.9999991683078405, iteration: 159454
loss: 0.9942135214805603,grad_norm: 0.9551258506711153, iteration: 159455
loss: 1.0001442432403564,grad_norm: 0.9497463814413514, iteration: 159456
loss: 0.9944755434989929,grad_norm: 0.964094144605074, iteration: 159457
loss: 1.0182603597640991,grad_norm: 0.9999992607779739, iteration: 159458
loss: 1.039412498474121,grad_norm: 0.9999989636133455, iteration: 159459
loss: 0.9964460134506226,grad_norm: 0.9752232761510327, iteration: 159460
loss: 0.9730203747749329,grad_norm: 0.9999992344452082, iteration: 159461
loss: 0.9694868326187134,grad_norm: 0.7955646785212604, iteration: 159462
loss: 1.0048730373382568,grad_norm: 0.9632318693373846, iteration: 159463
loss: 1.0129276514053345,grad_norm: 0.9999990583137314, iteration: 159464
loss: 0.9763004183769226,grad_norm: 0.9288597292043821, iteration: 159465
loss: 0.9977673292160034,grad_norm: 0.8572561191507427, iteration: 159466
loss: 0.965827465057373,grad_norm: 0.9328637735151378, iteration: 159467
loss: 0.9766377210617065,grad_norm: 0.9694376906535229, iteration: 159468
loss: 1.0211915969848633,grad_norm: 0.9999991862359786, iteration: 159469
loss: 1.008723258972168,grad_norm: 0.9369010303335881, iteration: 159470
loss: 1.0179362297058105,grad_norm: 0.9999991268020096, iteration: 159471
loss: 0.9938023090362549,grad_norm: 0.7962376550396796, iteration: 159472
loss: 0.9652764797210693,grad_norm: 0.972029510330319, iteration: 159473
loss: 1.0097166299819946,grad_norm: 0.9165332130085511, iteration: 159474
loss: 1.0638266801834106,grad_norm: 0.9749812681667347, iteration: 159475
loss: 0.9826902151107788,grad_norm: 0.9999992496688248, iteration: 159476
loss: 1.001866340637207,grad_norm: 0.9999990976439321, iteration: 159477
loss: 1.025776982307434,grad_norm: 0.9999991369788602, iteration: 159478
loss: 1.0493273735046387,grad_norm: 0.999999255505196, iteration: 159479
loss: 0.9897805452346802,grad_norm: 0.9474124803270486, iteration: 159480
loss: 0.9817417860031128,grad_norm: 0.9404849518480236, iteration: 159481
loss: 1.0089036226272583,grad_norm: 0.9999990970752478, iteration: 159482
loss: 1.0038599967956543,grad_norm: 0.9999991083926187, iteration: 159483
loss: 1.0333172082901,grad_norm: 0.999999719106832, iteration: 159484
loss: 0.9892595410346985,grad_norm: 0.9700674828980019, iteration: 159485
loss: 1.047280192375183,grad_norm: 0.9999990906635856, iteration: 159486
loss: 0.9922350645065308,grad_norm: 0.9610485596214541, iteration: 159487
loss: 0.9921644330024719,grad_norm: 0.8510814153442623, iteration: 159488
loss: 1.0199503898620605,grad_norm: 0.9300100631792944, iteration: 159489
loss: 0.9920095205307007,grad_norm: 0.999999404320873, iteration: 159490
loss: 1.0184334516525269,grad_norm: 0.999999322866842, iteration: 159491
loss: 1.0267269611358643,grad_norm: 0.9999991346706879, iteration: 159492
loss: 0.9892039895057678,grad_norm: 0.814518339418589, iteration: 159493
loss: 1.0107152462005615,grad_norm: 0.9999992206157213, iteration: 159494
loss: 1.0260181427001953,grad_norm: 0.9999992947954743, iteration: 159495
loss: 1.0976505279541016,grad_norm: 0.9999992261895885, iteration: 159496
loss: 1.0079853534698486,grad_norm: 0.9471870341066576, iteration: 159497
loss: 1.0051432847976685,grad_norm: 0.9892040857539772, iteration: 159498
loss: 1.0000931024551392,grad_norm: 0.9077867214822796, iteration: 159499
loss: 1.0153785943984985,grad_norm: 0.8709936620138846, iteration: 159500
loss: 1.0271044969558716,grad_norm: 0.8209302263825593, iteration: 159501
loss: 0.9765369296073914,grad_norm: 0.9375491010702361, iteration: 159502
loss: 0.9974589347839355,grad_norm: 0.9999999265394636, iteration: 159503
loss: 1.0043941736221313,grad_norm: 0.9999992559869653, iteration: 159504
loss: 1.0036578178405762,grad_norm: 0.9999991194766005, iteration: 159505
loss: 1.0083775520324707,grad_norm: 0.9999991717713692, iteration: 159506
loss: 1.0044341087341309,grad_norm: 0.9698801825378369, iteration: 159507
loss: 1.018467664718628,grad_norm: 0.9999991855095243, iteration: 159508
loss: 1.0315606594085693,grad_norm: 0.974448576691885, iteration: 159509
loss: 0.9544399380683899,grad_norm: 0.9999989834454309, iteration: 159510
loss: 1.0224899053573608,grad_norm: 0.8318162717600026, iteration: 159511
loss: 0.9841015338897705,grad_norm: 0.8802091169876014, iteration: 159512
loss: 1.0064380168914795,grad_norm: 0.9999989937561896, iteration: 159513
loss: 0.9955955147743225,grad_norm: 0.9999991871400621, iteration: 159514
loss: 1.0830124616622925,grad_norm: 0.9999998472003444, iteration: 159515
loss: 1.0094609260559082,grad_norm: 0.9792585699294888, iteration: 159516
loss: 0.9592101573944092,grad_norm: 0.9516037754647887, iteration: 159517
loss: 1.0066187381744385,grad_norm: 0.9999990618008766, iteration: 159518
loss: 1.0029354095458984,grad_norm: 0.8414257960881075, iteration: 159519
loss: 0.9801484942436218,grad_norm: 0.9899110706580592, iteration: 159520
loss: 1.0000674724578857,grad_norm: 0.9999990747813772, iteration: 159521
loss: 1.0969526767730713,grad_norm: 0.9999991946339349, iteration: 159522
loss: 1.0105607509613037,grad_norm: 0.999999107462507, iteration: 159523
loss: 0.9842265248298645,grad_norm: 0.9999991292931115, iteration: 159524
loss: 1.0239325761795044,grad_norm: 0.9999990111983598, iteration: 159525
loss: 1.030305027961731,grad_norm: 0.9799908355641482, iteration: 159526
loss: 1.0041732788085938,grad_norm: 0.8730819982166949, iteration: 159527
loss: 1.1080824136734009,grad_norm: 0.9999997985589352, iteration: 159528
loss: 0.9880173802375793,grad_norm: 0.9999992383721855, iteration: 159529
loss: 1.0176095962524414,grad_norm: 0.9999997683778612, iteration: 159530
loss: 1.0064141750335693,grad_norm: 0.99999908579107, iteration: 159531
loss: 0.9964625835418701,grad_norm: 0.9999995285729105, iteration: 159532
loss: 0.9906601309776306,grad_norm: 0.96633318565655, iteration: 159533
loss: 1.0397430658340454,grad_norm: 0.9999997111160533, iteration: 159534
loss: 1.0719140768051147,grad_norm: 0.9999998725749439, iteration: 159535
loss: 0.9759919047355652,grad_norm: 0.9999990951456112, iteration: 159536
loss: 1.01638925075531,grad_norm: 0.842046004656062, iteration: 159537
loss: 0.9812304377555847,grad_norm: 0.9999990859551215, iteration: 159538
loss: 0.9967657923698425,grad_norm: 0.9550615419927176, iteration: 159539
loss: 0.9810426831245422,grad_norm: 0.9999991951473918, iteration: 159540
loss: 0.9553401470184326,grad_norm: 0.9430837044394975, iteration: 159541
loss: 1.0245481729507446,grad_norm: 0.9999990653331561, iteration: 159542
loss: 0.9868497848510742,grad_norm: 0.9999992338682782, iteration: 159543
loss: 0.9872142672538757,grad_norm: 0.8804508290995986, iteration: 159544
loss: 0.9652373194694519,grad_norm: 0.9999994256460831, iteration: 159545
loss: 1.0264842510223389,grad_norm: 0.9999991307752708, iteration: 159546
loss: 0.9911175966262817,grad_norm: 0.9921388517983021, iteration: 159547
loss: 0.9844824075698853,grad_norm: 0.9999990693618696, iteration: 159548
loss: 1.0200527906417847,grad_norm: 0.9999993012564938, iteration: 159549
loss: 1.0030231475830078,grad_norm: 0.9634486500168619, iteration: 159550
loss: 0.9616051316261292,grad_norm: 0.9572970133465406, iteration: 159551
loss: 1.0157166719436646,grad_norm: 0.9999994492843731, iteration: 159552
loss: 0.9341555833816528,grad_norm: 0.9446757965817018, iteration: 159553
loss: 1.0121036767959595,grad_norm: 0.9722288677307587, iteration: 159554
loss: 1.0044019222259521,grad_norm: 0.9674475074948521, iteration: 159555
loss: 0.9738280177116394,grad_norm: 0.9386411825715446, iteration: 159556
loss: 1.0070968866348267,grad_norm: 0.929113608546498, iteration: 159557
loss: 1.0186585187911987,grad_norm: 0.9011413833100816, iteration: 159558
loss: 1.0144340991973877,grad_norm: 0.9999990566112453, iteration: 159559
loss: 1.0796566009521484,grad_norm: 0.9999996531988734, iteration: 159560
loss: 0.9763156175613403,grad_norm: 0.999998999783866, iteration: 159561
loss: 0.9602876305580139,grad_norm: 0.8582688720710978, iteration: 159562
loss: 0.9911044239997864,grad_norm: 0.999999126011549, iteration: 159563
loss: 1.0171518325805664,grad_norm: 0.9999990859692987, iteration: 159564
loss: 1.0155085325241089,grad_norm: 0.9999990013929706, iteration: 159565
loss: 1.0145010948181152,grad_norm: 0.9543376969678288, iteration: 159566
loss: 1.0028036832809448,grad_norm: 0.8652774398545431, iteration: 159567
loss: 0.9789870381355286,grad_norm: 0.9999989916518668, iteration: 159568
loss: 1.0259672403335571,grad_norm: 0.9999991111872497, iteration: 159569
loss: 0.9892504215240479,grad_norm: 0.9999992036398477, iteration: 159570
loss: 1.0207949876785278,grad_norm: 0.9755715064792245, iteration: 159571
loss: 0.9938424229621887,grad_norm: 0.9999990105860201, iteration: 159572
loss: 0.9956715106964111,grad_norm: 0.9999990713012532, iteration: 159573
loss: 0.9673690795898438,grad_norm: 0.9999990916267889, iteration: 159574
loss: 1.0198900699615479,grad_norm: 0.9999990949049666, iteration: 159575
loss: 1.0294495820999146,grad_norm: 0.9999995210200258, iteration: 159576
loss: 1.015246033668518,grad_norm: 0.9999993672307343, iteration: 159577
loss: 1.0257394313812256,grad_norm: 0.9988975201742764, iteration: 159578
loss: 0.9883365035057068,grad_norm: 0.8478449868227733, iteration: 159579
loss: 0.9721719026565552,grad_norm: 0.7907073668563239, iteration: 159580
loss: 1.0372681617736816,grad_norm: 0.9999990593185567, iteration: 159581
loss: 1.0107275247573853,grad_norm: 0.9999989948945789, iteration: 159582
loss: 0.9870463013648987,grad_norm: 0.9999993063255379, iteration: 159583
loss: 1.0933687686920166,grad_norm: 0.9999994907025833, iteration: 159584
loss: 1.0395326614379883,grad_norm: 0.9999991203167179, iteration: 159585
loss: 1.0141297578811646,grad_norm: 0.7878507503756403, iteration: 159586
loss: 1.0007269382476807,grad_norm: 0.9999991571391534, iteration: 159587
loss: 0.9912365674972534,grad_norm: 0.9696929546628072, iteration: 159588
loss: 0.9943415522575378,grad_norm: 0.943772016254718, iteration: 159589
loss: 1.030158519744873,grad_norm: 0.9999990788511977, iteration: 159590
loss: 0.9839975833892822,grad_norm: 0.99169297215484, iteration: 159591
loss: 0.9955976605415344,grad_norm: 0.9716814157181348, iteration: 159592
loss: 1.0249935388565063,grad_norm: 0.9531455927934586, iteration: 159593
loss: 0.9756964445114136,grad_norm: 0.802471460381182, iteration: 159594
loss: 1.0093382596969604,grad_norm: 0.9999991306514209, iteration: 159595
loss: 1.0516159534454346,grad_norm: 0.999999454958945, iteration: 159596
loss: 1.0238672494888306,grad_norm: 0.9251984604012805, iteration: 159597
loss: 0.9627065062522888,grad_norm: 0.9293432360235052, iteration: 159598
loss: 1.0093998908996582,grad_norm: 0.9999993376632097, iteration: 159599
loss: 1.0469841957092285,grad_norm: 0.9999992565502577, iteration: 159600
loss: 0.9568945169448853,grad_norm: 0.9999991368778818, iteration: 159601
loss: 0.9946512579917908,grad_norm: 0.9177412758206069, iteration: 159602
loss: 0.9900298118591309,grad_norm: 0.9147411560104061, iteration: 159603
loss: 1.0166360139846802,grad_norm: 0.9999990259396512, iteration: 159604
loss: 0.9811169505119324,grad_norm: 0.999998956311687, iteration: 159605
loss: 0.9719156622886658,grad_norm: 0.9999995606843228, iteration: 159606
loss: 1.0181736946105957,grad_norm: 0.9999992294867843, iteration: 159607
loss: 1.0017993450164795,grad_norm: 0.93290299502416, iteration: 159608
loss: 1.015516996383667,grad_norm: 0.9610515029638597, iteration: 159609
loss: 1.1082465648651123,grad_norm: 0.9999992638053589, iteration: 159610
loss: 0.97934889793396,grad_norm: 0.9700881154255665, iteration: 159611
loss: 0.985477864742279,grad_norm: 0.9999991398143814, iteration: 159612
loss: 1.0117368698120117,grad_norm: 0.9646459503704604, iteration: 159613
loss: 0.9800087213516235,grad_norm: 0.9999992702624958, iteration: 159614
loss: 0.96700519323349,grad_norm: 0.9974677454936416, iteration: 159615
loss: 1.01310396194458,grad_norm: 0.9271452029746262, iteration: 159616
loss: 0.9775040149688721,grad_norm: 0.9606978483080753, iteration: 159617
loss: 0.9707770347595215,grad_norm: 0.9999990317798149, iteration: 159618
loss: 0.9988577365875244,grad_norm: 0.9999998248539163, iteration: 159619
loss: 0.9653410315513611,grad_norm: 0.9999992160388756, iteration: 159620
loss: 0.9820971488952637,grad_norm: 0.9999992425023382, iteration: 159621
loss: 0.9977731704711914,grad_norm: 0.9135552853989807, iteration: 159622
loss: 0.9961194396018982,grad_norm: 0.9607299351671118, iteration: 159623
loss: 0.9958832263946533,grad_norm: 0.999999086596219, iteration: 159624
loss: 0.9935922026634216,grad_norm: 0.9720796878775435, iteration: 159625
loss: 0.9755295515060425,grad_norm: 0.8614860679679274, iteration: 159626
loss: 0.9657735824584961,grad_norm: 0.9448354233732993, iteration: 159627
loss: 0.9728282690048218,grad_norm: 0.9635251100439981, iteration: 159628
loss: 1.2850933074951172,grad_norm: 0.999999930246141, iteration: 159629
loss: 0.9833179116249084,grad_norm: 0.9999991615915397, iteration: 159630
loss: 1.0175585746765137,grad_norm: 0.991118854792682, iteration: 159631
loss: 1.0093549489974976,grad_norm: 0.9999989405568808, iteration: 159632
loss: 1.2078986167907715,grad_norm: 0.9999990943194006, iteration: 159633
loss: 1.043875813484192,grad_norm: 0.9876379105020464, iteration: 159634
loss: 1.0065699815750122,grad_norm: 0.9999992388449616, iteration: 159635
loss: 1.0363116264343262,grad_norm: 0.9999997632574301, iteration: 159636
loss: 1.0783425569534302,grad_norm: 0.9999994571498835, iteration: 159637
loss: 1.070601463317871,grad_norm: 0.9999993977304367, iteration: 159638
loss: 1.0055863857269287,grad_norm: 0.884289669256046, iteration: 159639
loss: 0.9930575489997864,grad_norm: 0.9999991658980452, iteration: 159640
loss: 0.975872278213501,grad_norm: 0.9999991924513147, iteration: 159641
loss: 0.9887609481811523,grad_norm: 0.9741485875694055, iteration: 159642
loss: 0.9806621074676514,grad_norm: 0.9999990443713977, iteration: 159643
loss: 1.0194978713989258,grad_norm: 0.9999997308034803, iteration: 159644
loss: 0.9668937921524048,grad_norm: 0.9999990694025235, iteration: 159645
loss: 0.9988436698913574,grad_norm: 0.9140872532639382, iteration: 159646
loss: 1.0026432275772095,grad_norm: 0.9999992294133447, iteration: 159647
loss: 0.9754078388214111,grad_norm: 0.9999990094459769, iteration: 159648
loss: 1.013420581817627,grad_norm: 0.9509293443654596, iteration: 159649
loss: 1.0207736492156982,grad_norm: 0.9983522824824961, iteration: 159650
loss: 1.023470163345337,grad_norm: 0.9999991397719589, iteration: 159651
loss: 1.0329753160476685,grad_norm: 0.9999994638128409, iteration: 159652
loss: 1.0089921951293945,grad_norm: 0.9999991686870474, iteration: 159653
loss: 1.1233726739883423,grad_norm: 0.9999998562686676, iteration: 159654
loss: 0.9937999248504639,grad_norm: 0.9999992640920918, iteration: 159655
loss: 0.9826372265815735,grad_norm: 0.9320800092177094, iteration: 159656
loss: 1.0217903852462769,grad_norm: 0.9999992663013356, iteration: 159657
loss: 0.9902094006538391,grad_norm: 0.999999289407775, iteration: 159658
loss: 0.992638111114502,grad_norm: 0.9221458731511938, iteration: 159659
loss: 1.071825385093689,grad_norm: 0.9999998075593147, iteration: 159660
loss: 1.0067304372787476,grad_norm: 0.9999994228467005, iteration: 159661
loss: 1.1080702543258667,grad_norm: 0.9999993633626169, iteration: 159662
loss: 1.0309092998504639,grad_norm: 0.9999989714183379, iteration: 159663
loss: 0.986443281173706,grad_norm: 0.9999989727170363, iteration: 159664
loss: 1.0315645933151245,grad_norm: 0.99999954146859, iteration: 159665
loss: 0.9548019766807556,grad_norm: 0.8541652573395292, iteration: 159666
loss: 1.0023796558380127,grad_norm: 0.8774670873248417, iteration: 159667
loss: 0.9495440125465393,grad_norm: 0.9999993072280141, iteration: 159668
loss: 0.9815492630004883,grad_norm: 0.8388399126297585, iteration: 159669
loss: 0.9763026833534241,grad_norm: 0.9999990725872548, iteration: 159670
loss: 0.9967215061187744,grad_norm: 0.9999991288747172, iteration: 159671
loss: 1.03341543674469,grad_norm: 0.9999991196118845, iteration: 159672
loss: 1.001566767692566,grad_norm: 0.8114493540084241, iteration: 159673
loss: 1.018241047859192,grad_norm: 0.9999992103269586, iteration: 159674
loss: 1.0004132986068726,grad_norm: 0.9202042468380512, iteration: 159675
loss: 1.0056201219558716,grad_norm: 0.996795002990457, iteration: 159676
loss: 1.0539977550506592,grad_norm: 0.999999239615544, iteration: 159677
loss: 1.039763331413269,grad_norm: 0.999999161698483, iteration: 159678
loss: 1.0100717544555664,grad_norm: 0.9712030134494355, iteration: 159679
loss: 1.0276999473571777,grad_norm: 0.9175059538012242, iteration: 159680
loss: 1.0090123414993286,grad_norm: 0.9806903242621757, iteration: 159681
loss: 0.9963975548744202,grad_norm: 0.9642412889076982, iteration: 159682
loss: 1.0264750719070435,grad_norm: 0.9243565581809362, iteration: 159683
loss: 1.0113409757614136,grad_norm: 0.9999990511314927, iteration: 159684
loss: 1.025305151939392,grad_norm: 0.9999990630342159, iteration: 159685
loss: 0.9863018989562988,grad_norm: 0.9115280262975488, iteration: 159686
loss: 0.9660776257514954,grad_norm: 0.9999990363511719, iteration: 159687
loss: 1.024825096130371,grad_norm: 0.9138860714332796, iteration: 159688
loss: 1.0366969108581543,grad_norm: 0.9999992275109763, iteration: 159689
loss: 0.9564669132232666,grad_norm: 0.8963720227617314, iteration: 159690
loss: 1.001487374305725,grad_norm: 0.9874303983436842, iteration: 159691
loss: 0.988920271396637,grad_norm: 0.8764443480186798, iteration: 159692
loss: 1.0195561647415161,grad_norm: 0.9999993655936803, iteration: 159693
loss: 0.9695128798484802,grad_norm: 0.989969083796522, iteration: 159694
loss: 1.0042113065719604,grad_norm: 0.9540501349846198, iteration: 159695
loss: 1.0232453346252441,grad_norm: 0.999487486341528, iteration: 159696
loss: 1.0324883460998535,grad_norm: 0.9999989976752331, iteration: 159697
loss: 0.9912747740745544,grad_norm: 0.8473996288203148, iteration: 159698
loss: 0.9844967126846313,grad_norm: 0.9999993926964237, iteration: 159699
loss: 1.0547183752059937,grad_norm: 0.999999855947021, iteration: 159700
loss: 0.9998490810394287,grad_norm: 0.7991503503580244, iteration: 159701
loss: 0.9922760128974915,grad_norm: 0.99999909810419, iteration: 159702
loss: 0.9921064972877502,grad_norm: 0.996974175073052, iteration: 159703
loss: 1.0037378072738647,grad_norm: 0.9999993619845938, iteration: 159704
loss: 0.9697747826576233,grad_norm: 0.9999990031138147, iteration: 159705
loss: 1.1458693742752075,grad_norm: 0.9999998506724452, iteration: 159706
loss: 1.0228040218353271,grad_norm: 0.9999989872138249, iteration: 159707
loss: 0.977165699005127,grad_norm: 0.9999992410103307, iteration: 159708
loss: 1.0010576248168945,grad_norm: 0.999998999384587, iteration: 159709
loss: 1.009019374847412,grad_norm: 0.9999991210130629, iteration: 159710
loss: 0.9876159429550171,grad_norm: 0.9999990135173328, iteration: 159711
loss: 1.0167354345321655,grad_norm: 0.9139383687924518, iteration: 159712
loss: 1.0299975872039795,grad_norm: 0.999999120939377, iteration: 159713
loss: 0.9863759279251099,grad_norm: 0.9999989755660936, iteration: 159714
loss: 1.0061765909194946,grad_norm: 0.922916639295919, iteration: 159715
loss: 0.9681326150894165,grad_norm: 0.9747893087460898, iteration: 159716
loss: 0.9523736238479614,grad_norm: 0.9999997137138323, iteration: 159717
loss: 1.0006825923919678,grad_norm: 0.9099644630608601, iteration: 159718
loss: 1.028430461883545,grad_norm: 0.9999990775287658, iteration: 159719
loss: 0.9955282211303711,grad_norm: 0.9041276255128324, iteration: 159720
loss: 1.1062763929367065,grad_norm: 0.9999996469614627, iteration: 159721
loss: 1.0036368370056152,grad_norm: 0.9999991473539174, iteration: 159722
loss: 0.9876530170440674,grad_norm: 0.9566674709832138, iteration: 159723
loss: 1.00095796585083,grad_norm: 0.8852804605645741, iteration: 159724
loss: 1.0330748558044434,grad_norm: 0.9999999388370857, iteration: 159725
loss: 1.0131691694259644,grad_norm: 0.9999998046461919, iteration: 159726
loss: 0.9591676592826843,grad_norm: 0.953043904348697, iteration: 159727
loss: 0.9948970079421997,grad_norm: 0.7965690335592502, iteration: 159728
loss: 1.030600905418396,grad_norm: 0.9010376188288535, iteration: 159729
loss: 1.0048623085021973,grad_norm: 0.9999990136119391, iteration: 159730
loss: 0.9571742415428162,grad_norm: 0.99999910144725, iteration: 159731
loss: 0.9960755109786987,grad_norm: 0.8905174557853551, iteration: 159732
loss: 1.0101109743118286,grad_norm: 0.9444198631807131, iteration: 159733
loss: 0.9843709468841553,grad_norm: 0.9999990664849356, iteration: 159734
loss: 0.9978501200675964,grad_norm: 0.9999992292723348, iteration: 159735
loss: 0.9864106178283691,grad_norm: 0.999999094810098, iteration: 159736
loss: 1.0137362480163574,grad_norm: 0.7716995869175041, iteration: 159737
loss: 1.0165313482284546,grad_norm: 0.9482341290187397, iteration: 159738
loss: 0.9714256525039673,grad_norm: 0.9619052218018336, iteration: 159739
loss: 1.0073881149291992,grad_norm: 0.9255214867814843, iteration: 159740
loss: 0.983302652835846,grad_norm: 0.99999915839116, iteration: 159741
loss: 0.9701234698295593,grad_norm: 0.9999992325337802, iteration: 159742
loss: 1.007379412651062,grad_norm: 0.9999991725518391, iteration: 159743
loss: 1.0127626657485962,grad_norm: 0.999999056615826, iteration: 159744
loss: 0.9940184950828552,grad_norm: 0.999999184757074, iteration: 159745
loss: 0.9519453644752502,grad_norm: 0.8903077136957683, iteration: 159746
loss: 0.9995518922805786,grad_norm: 0.8456327016051479, iteration: 159747
loss: 1.0309081077575684,grad_norm: 0.9967138507892846, iteration: 159748
loss: 0.9834870100021362,grad_norm: 0.8974549744839003, iteration: 159749
loss: 1.0004897117614746,grad_norm: 0.8451244414554658, iteration: 159750
loss: 1.055213212966919,grad_norm: 0.9999992783479572, iteration: 159751
loss: 1.02488374710083,grad_norm: 0.99999919413651, iteration: 159752
loss: 1.0181044340133667,grad_norm: 0.9338978487926579, iteration: 159753
loss: 0.9996324777603149,grad_norm: 0.9999990954867973, iteration: 159754
loss: 1.0147281885147095,grad_norm: 0.8880008234779085, iteration: 159755
loss: 0.9940839409828186,grad_norm: 0.9999989667238218, iteration: 159756
loss: 0.9803369045257568,grad_norm: 0.9999990464980704, iteration: 159757
loss: 1.140749216079712,grad_norm: 0.9999992192786652, iteration: 159758
loss: 1.0060908794403076,grad_norm: 0.98099861981143, iteration: 159759
loss: 1.0198158025741577,grad_norm: 0.9881114074822417, iteration: 159760
loss: 1.004614233970642,grad_norm: 0.9999991597815285, iteration: 159761
loss: 1.017871379852295,grad_norm: 0.9881426760834756, iteration: 159762
loss: 0.9828224182128906,grad_norm: 0.975328164842282, iteration: 159763
loss: 1.018407940864563,grad_norm: 0.9636989320166158, iteration: 159764
loss: 0.9938206076622009,grad_norm: 0.9999991280669684, iteration: 159765
loss: 1.019004464149475,grad_norm: 0.8685614358331541, iteration: 159766
loss: 1.0326628684997559,grad_norm: 0.9999998778563134, iteration: 159767
loss: 0.978767991065979,grad_norm: 0.8801176903548639, iteration: 159768
loss: 1.023883581161499,grad_norm: 0.8857886088755846, iteration: 159769
loss: 1.0020376443862915,grad_norm: 0.999999007024711, iteration: 159770
loss: 0.9958586096763611,grad_norm: 0.9999990172570563, iteration: 159771
loss: 0.9955061078071594,grad_norm: 0.9999991131010213, iteration: 159772
loss: 0.9877102971076965,grad_norm: 0.9129831051971106, iteration: 159773
loss: 1.0108059644699097,grad_norm: 0.9999990211576733, iteration: 159774
loss: 0.9903069734573364,grad_norm: 0.8752206094824957, iteration: 159775
loss: 1.0219212770462036,grad_norm: 0.9999993772368769, iteration: 159776
loss: 0.9781376123428345,grad_norm: 0.7194971308109467, iteration: 159777
loss: 1.0051344633102417,grad_norm: 0.8938776170159978, iteration: 159778
loss: 1.0116817951202393,grad_norm: 0.9999990947389179, iteration: 159779
loss: 0.9947401285171509,grad_norm: 0.999999295036598, iteration: 159780
loss: 0.9643750190734863,grad_norm: 0.9999991265410019, iteration: 159781
loss: 1.1607882976531982,grad_norm: 0.9999992663506726, iteration: 159782
loss: 1.0070966482162476,grad_norm: 0.9999992878629201, iteration: 159783
loss: 1.0016225576400757,grad_norm: 0.9999991451477144, iteration: 159784
loss: 1.0097105503082275,grad_norm: 0.9999990843501999, iteration: 159785
loss: 0.9814191460609436,grad_norm: 0.9999992777952321, iteration: 159786
loss: 0.9812126159667969,grad_norm: 0.9999991315756459, iteration: 159787
loss: 1.05201256275177,grad_norm: 0.9999996712497476, iteration: 159788
loss: 0.9925904273986816,grad_norm: 0.9999990498835104, iteration: 159789
loss: 0.9989632368087769,grad_norm: 0.8948079146052319, iteration: 159790
loss: 1.0005742311477661,grad_norm: 0.9999991545501624, iteration: 159791
loss: 1.0144381523132324,grad_norm: 0.9999993170809607, iteration: 159792
loss: 0.9807122945785522,grad_norm: 0.9870091992445392, iteration: 159793
loss: 0.9882702827453613,grad_norm: 0.8963712394962449, iteration: 159794
loss: 1.0136773586273193,grad_norm: 0.9999991423981979, iteration: 159795
loss: 0.9612151980400085,grad_norm: 0.999999221291263, iteration: 159796
loss: 1.015726089477539,grad_norm: 0.9999989858528565, iteration: 159797
loss: 1.004131555557251,grad_norm: 0.954917036417156, iteration: 159798
loss: 0.9978968501091003,grad_norm: 0.999999124157164, iteration: 159799
loss: 0.9726413488388062,grad_norm: 0.999998926799207, iteration: 159800
loss: 1.0302141904830933,grad_norm: 0.9999990689676629, iteration: 159801
loss: 1.009305715560913,grad_norm: 0.9999994906401136, iteration: 159802
loss: 1.023820400238037,grad_norm: 0.9134224251300466, iteration: 159803
loss: 0.9944297075271606,grad_norm: 0.9718376781745388, iteration: 159804
loss: 1.0150104761123657,grad_norm: 0.9439037790849985, iteration: 159805
loss: 1.0412532091140747,grad_norm: 0.9310406401388618, iteration: 159806
loss: 1.0078761577606201,grad_norm: 0.8349482183768698, iteration: 159807
loss: 1.0130976438522339,grad_norm: 0.9735697085143207, iteration: 159808
loss: 1.0152862071990967,grad_norm: 0.9999989883870053, iteration: 159809
loss: 0.9616901278495789,grad_norm: 0.9999990411467102, iteration: 159810
loss: 0.9756361842155457,grad_norm: 0.9999990024501002, iteration: 159811
loss: 0.9913426041603088,grad_norm: 0.9999990544857191, iteration: 159812
loss: 1.0123788118362427,grad_norm: 0.9999991112233633, iteration: 159813
loss: 0.9550307989120483,grad_norm: 0.9999993186942566, iteration: 159814
loss: 0.9928678274154663,grad_norm: 0.9999992097422774, iteration: 159815
loss: 1.0100367069244385,grad_norm: 0.9999992008327189, iteration: 159816
loss: 0.9987384080886841,grad_norm: 0.9564603644108253, iteration: 159817
loss: 1.013654351234436,grad_norm: 0.9452151645247899, iteration: 159818
loss: 1.0017527341842651,grad_norm: 0.8211103546723666, iteration: 159819
loss: 1.1609339714050293,grad_norm: 0.999999430109792, iteration: 159820
loss: 1.0012120008468628,grad_norm: 0.8154847453159431, iteration: 159821
loss: 1.0131195783615112,grad_norm: 0.7758022841113066, iteration: 159822
loss: 1.021499514579773,grad_norm: 0.9999991595824208, iteration: 159823
loss: 0.992842435836792,grad_norm: 0.999999229357234, iteration: 159824
loss: 1.1096726655960083,grad_norm: 0.9999992610604377, iteration: 159825
loss: 1.0178605318069458,grad_norm: 0.9999991362501421, iteration: 159826
loss: 1.009909987449646,grad_norm: 0.9752098768974815, iteration: 159827
loss: 1.005437970161438,grad_norm: 0.9999992331971689, iteration: 159828
loss: 1.048810362815857,grad_norm: 0.9999991063154433, iteration: 159829
loss: 1.0078811645507812,grad_norm: 0.999998996850423, iteration: 159830
loss: 0.9907662272453308,grad_norm: 0.9837497912079203, iteration: 159831
loss: 0.9695485234260559,grad_norm: 0.8616858274839291, iteration: 159832
loss: 0.989454984664917,grad_norm: 0.9999990348647778, iteration: 159833
loss: 0.9859834313392639,grad_norm: 0.9999990717579996, iteration: 159834
loss: 0.9853696227073669,grad_norm: 0.999999170837908, iteration: 159835
loss: 1.0052276849746704,grad_norm: 0.9548573773445773, iteration: 159836
loss: 1.0016204118728638,grad_norm: 0.9131976465004142, iteration: 159837
loss: 1.0069204568862915,grad_norm: 0.9017019394356554, iteration: 159838
loss: 0.9967350363731384,grad_norm: 0.9675263823063088, iteration: 159839
loss: 0.9613680839538574,grad_norm: 0.9007887975321491, iteration: 159840
loss: 1.0058989524841309,grad_norm: 0.9999990132318951, iteration: 159841
loss: 0.9527873992919922,grad_norm: 0.9999991682982756, iteration: 159842
loss: 0.9734310507774353,grad_norm: 0.9999993316884905, iteration: 159843
loss: 1.004772663116455,grad_norm: 0.9999989223596832, iteration: 159844
loss: 0.9868747591972351,grad_norm: 0.9999995244515886, iteration: 159845
loss: 0.9943060278892517,grad_norm: 0.784153776274475, iteration: 159846
loss: 1.0758341550827026,grad_norm: 0.9999994358610057, iteration: 159847
loss: 0.9748731255531311,grad_norm: 0.9999990224818673, iteration: 159848
loss: 1.0100783109664917,grad_norm: 0.9999992099781367, iteration: 159849
loss: 1.0092366933822632,grad_norm: 0.9999992210879421, iteration: 159850
loss: 0.9980323314666748,grad_norm: 0.9999990860497233, iteration: 159851
loss: 0.9984017014503479,grad_norm: 0.915605929458065, iteration: 159852
loss: 0.9647867679595947,grad_norm: 0.9893649690830616, iteration: 159853
loss: 0.9767313599586487,grad_norm: 0.8275928972867985, iteration: 159854
loss: 0.9714813828468323,grad_norm: 0.999998976840618, iteration: 159855
loss: 0.9979984760284424,grad_norm: 0.8934852622676343, iteration: 159856
loss: 0.989924430847168,grad_norm: 0.9999991890542426, iteration: 159857
loss: 1.0125763416290283,grad_norm: 0.9999991183359553, iteration: 159858
loss: 0.9944401979446411,grad_norm: 0.9999996947603088, iteration: 159859
loss: 1.0005162954330444,grad_norm: 0.9999992641649326, iteration: 159860
loss: 0.9609252214431763,grad_norm: 0.9999991815261084, iteration: 159861
loss: 1.0273549556732178,grad_norm: 0.9999998981040072, iteration: 159862
loss: 0.9587218761444092,grad_norm: 0.9999990782020216, iteration: 159863
loss: 1.0151543617248535,grad_norm: 0.9999992076783679, iteration: 159864
loss: 0.9801859855651855,grad_norm: 0.8267176070814247, iteration: 159865
loss: 0.9837203621864319,grad_norm: 0.9999990561353475, iteration: 159866
loss: 0.9557686448097229,grad_norm: 0.9999990547596698, iteration: 159867
loss: 1.0064237117767334,grad_norm: 0.9473768629443057, iteration: 159868
loss: 0.9984056949615479,grad_norm: 0.9999990232799653, iteration: 159869
loss: 1.0015294551849365,grad_norm: 0.9999991970328714, iteration: 159870
loss: 0.9846228957176208,grad_norm: 0.9999992592864998, iteration: 159871
loss: 1.0002522468566895,grad_norm: 0.9999990968608045, iteration: 159872
loss: 0.9895508885383606,grad_norm: 0.9999998703554899, iteration: 159873
loss: 0.9867321252822876,grad_norm: 0.973325043277037, iteration: 159874
loss: 1.01350998878479,grad_norm: 0.9968277852454257, iteration: 159875
loss: 1.0516116619110107,grad_norm: 0.9999992857525685, iteration: 159876
loss: 0.9737556576728821,grad_norm: 0.8738550960810251, iteration: 159877
loss: 1.0246223211288452,grad_norm: 0.9999991992427292, iteration: 159878
loss: 0.9926369786262512,grad_norm: 0.9984841539955477, iteration: 159879
loss: 1.0135321617126465,grad_norm: 0.9698223107536478, iteration: 159880
loss: 0.9666159749031067,grad_norm: 0.8316700811801305, iteration: 159881
loss: 0.9752566814422607,grad_norm: 0.9860473442240065, iteration: 159882
loss: 0.9727246761322021,grad_norm: 0.9439632477010792, iteration: 159883
loss: 1.0072956085205078,grad_norm: 0.9999990593718748, iteration: 159884
loss: 0.9835594296455383,grad_norm: 0.9883216485174783, iteration: 159885
loss: 0.969901442527771,grad_norm: 0.9425465154489665, iteration: 159886
loss: 1.0022821426391602,grad_norm: 0.9023513736196349, iteration: 159887
loss: 1.0160613059997559,grad_norm: 0.9999991376627302, iteration: 159888
loss: 1.0055161714553833,grad_norm: 0.9999991680890039, iteration: 159889
loss: 0.9980401992797852,grad_norm: 0.9391898553969628, iteration: 159890
loss: 0.9786685109138489,grad_norm: 0.9999990313136388, iteration: 159891
loss: 1.0121673345565796,grad_norm: 0.9398871515699186, iteration: 159892
loss: 1.0147531032562256,grad_norm: 0.9999992334389262, iteration: 159893
loss: 0.9689440727233887,grad_norm: 0.9702176950572939, iteration: 159894
loss: 0.9985955357551575,grad_norm: 0.8943143892157194, iteration: 159895
loss: 0.966092586517334,grad_norm: 0.9999990418887001, iteration: 159896
loss: 1.022779941558838,grad_norm: 0.9999991651538856, iteration: 159897
loss: 1.0446709394454956,grad_norm: 0.982529814866485, iteration: 159898
loss: 1.034276008605957,grad_norm: 0.9292064804625737, iteration: 159899
loss: 0.998556911945343,grad_norm: 0.9806943098839688, iteration: 159900
loss: 1.0122406482696533,grad_norm: 0.9999990564780245, iteration: 159901
loss: 0.9868409037590027,grad_norm: 0.9323806589161021, iteration: 159902
loss: 1.0318100452423096,grad_norm: 0.9999998364904039, iteration: 159903
loss: 1.0315074920654297,grad_norm: 0.9999991321585888, iteration: 159904
loss: 0.9778918027877808,grad_norm: 0.9742242180207128, iteration: 159905
loss: 0.9893123507499695,grad_norm: 0.9999991132464554, iteration: 159906
loss: 0.9921504855155945,grad_norm: 0.9999993400718157, iteration: 159907
loss: 0.9938266277313232,grad_norm: 0.9999990981309034, iteration: 159908
loss: 1.0105323791503906,grad_norm: 0.999999138640081, iteration: 159909
loss: 0.9756921529769897,grad_norm: 0.9999991595287978, iteration: 159910
loss: 0.9523097276687622,grad_norm: 0.8708117310855917, iteration: 159911
loss: 1.0074143409729004,grad_norm: 0.9747679170219139, iteration: 159912
loss: 1.007616400718689,grad_norm: 0.9999993669082791, iteration: 159913
loss: 1.0198709964752197,grad_norm: 0.9801385708805365, iteration: 159914
loss: 1.0263363122940063,grad_norm: 0.882417063564207, iteration: 159915
loss: 1.0270298719406128,grad_norm: 0.9999991213738559, iteration: 159916
loss: 0.9707880020141602,grad_norm: 0.822926110633387, iteration: 159917
loss: 0.9770215153694153,grad_norm: 0.9999991173383879, iteration: 159918
loss: 1.0325968265533447,grad_norm: 0.9825457797540437, iteration: 159919
loss: 0.9750164747238159,grad_norm: 0.9999990151129345, iteration: 159920
loss: 1.0036587715148926,grad_norm: 0.9709556996534177, iteration: 159921
loss: 1.0228030681610107,grad_norm: 0.9573568906605574, iteration: 159922
loss: 0.9831902384757996,grad_norm: 0.9999990547084275, iteration: 159923
loss: 1.0031912326812744,grad_norm: 0.9533515046074974, iteration: 159924
loss: 1.006252408027649,grad_norm: 0.9999992874753881, iteration: 159925
loss: 1.0038740634918213,grad_norm: 0.9999991257998262, iteration: 159926
loss: 0.9793444275856018,grad_norm: 0.9999997166057432, iteration: 159927
loss: 1.0170718431472778,grad_norm: 0.9999989646024652, iteration: 159928
loss: 1.082788109779358,grad_norm: 0.9999991920208747, iteration: 159929
loss: 0.9972145557403564,grad_norm: 0.9999990927268284, iteration: 159930
loss: 0.9979496598243713,grad_norm: 0.9999990254337704, iteration: 159931
loss: 0.964989960193634,grad_norm: 0.9999992194911561, iteration: 159932
loss: 0.9731618762016296,grad_norm: 0.8831905152747717, iteration: 159933
loss: 1.018838882446289,grad_norm: 0.9999989903283104, iteration: 159934
loss: 0.9836753606796265,grad_norm: 0.9885739501397548, iteration: 159935
loss: 1.020531177520752,grad_norm: 0.9999991474252041, iteration: 159936
loss: 1.002122402191162,grad_norm: 0.9130581262181559, iteration: 159937
loss: 0.9707847833633423,grad_norm: 0.9949427995540607, iteration: 159938
loss: 0.9807643890380859,grad_norm: 0.9942401252709485, iteration: 159939
loss: 1.014297604560852,grad_norm: 0.999999080618905, iteration: 159940
loss: 1.011249303817749,grad_norm: 0.9999991541160276, iteration: 159941
loss: 0.9899771213531494,grad_norm: 0.9999992337445679, iteration: 159942
loss: 0.972893238067627,grad_norm: 0.9999990236495697, iteration: 159943
loss: 0.9959242343902588,grad_norm: 0.9999991386940876, iteration: 159944
loss: 1.009900450706482,grad_norm: 0.9999989175213447, iteration: 159945
loss: 0.9896748065948486,grad_norm: 0.8639415575871311, iteration: 159946
loss: 1.0327692031860352,grad_norm: 0.9999990887510255, iteration: 159947
loss: 0.9892833232879639,grad_norm: 0.9999991628264423, iteration: 159948
loss: 1.015953779220581,grad_norm: 0.9999999598993148, iteration: 159949
loss: 0.999505341053009,grad_norm: 0.9888651711434044, iteration: 159950
loss: 0.9913448691368103,grad_norm: 0.8840051329603696, iteration: 159951
loss: 1.0138874053955078,grad_norm: 0.9951952125667479, iteration: 159952
loss: 0.9709308743476868,grad_norm: 0.9616647784700096, iteration: 159953
loss: 0.9946698546409607,grad_norm: 0.9444600265012155, iteration: 159954
loss: 1.0293946266174316,grad_norm: 0.96581752900413, iteration: 159955
loss: 1.020117998123169,grad_norm: 0.999999025221981, iteration: 159956
loss: 1.0302181243896484,grad_norm: 0.9999991161672493, iteration: 159957
loss: 1.0184330940246582,grad_norm: 0.9648370105674741, iteration: 159958
loss: 0.9967383146286011,grad_norm: 0.999999077689019, iteration: 159959
loss: 1.0351078510284424,grad_norm: 0.999999253942599, iteration: 159960
loss: 0.9489841461181641,grad_norm: 0.9999989974915487, iteration: 159961
loss: 0.9989925026893616,grad_norm: 0.9168820500594178, iteration: 159962
loss: 1.0053223371505737,grad_norm: 0.9999991440653649, iteration: 159963
loss: 0.9660286903381348,grad_norm: 0.9999990498293663, iteration: 159964
loss: 1.0140223503112793,grad_norm: 0.9367588884609696, iteration: 159965
loss: 0.9999940395355225,grad_norm: 0.9999990734118065, iteration: 159966
loss: 0.9734218120574951,grad_norm: 0.9092092881129074, iteration: 159967
loss: 0.9744052290916443,grad_norm: 0.9999991913797432, iteration: 159968
loss: 0.9779152870178223,grad_norm: 0.9226982949663185, iteration: 159969
loss: 0.9847659468650818,grad_norm: 0.9999992314092251, iteration: 159970
loss: 0.9925042390823364,grad_norm: 0.890612705935248, iteration: 159971
loss: 0.9477763175964355,grad_norm: 0.9999991058768458, iteration: 159972
loss: 0.992961585521698,grad_norm: 0.9491725970050436, iteration: 159973
loss: 0.9999890923500061,grad_norm: 0.9534317201703382, iteration: 159974
loss: 0.9652971029281616,grad_norm: 0.9999991606570388, iteration: 159975
loss: 1.0053391456604004,grad_norm: 0.9422550304095361, iteration: 159976
loss: 1.0348246097564697,grad_norm: 0.9994583478146623, iteration: 159977
loss: 1.0040737390518188,grad_norm: 0.9163779210647361, iteration: 159978
loss: 0.9639661908149719,grad_norm: 0.9002983380584031, iteration: 159979
loss: 1.0035687685012817,grad_norm: 0.9999991852728457, iteration: 159980
loss: 1.0339312553405762,grad_norm: 0.9644432389731942, iteration: 159981
loss: 0.9956453442573547,grad_norm: 0.805002961078189, iteration: 159982
loss: 0.9857082366943359,grad_norm: 0.9230084758301907, iteration: 159983
loss: 0.9939911961555481,grad_norm: 0.9999992307207733, iteration: 159984
loss: 0.987805962562561,grad_norm: 0.9999992009358059, iteration: 159985
loss: 0.9773476123809814,grad_norm: 0.7916975053891331, iteration: 159986
loss: 1.023247241973877,grad_norm: 0.9999992902884296, iteration: 159987
loss: 0.9949449300765991,grad_norm: 0.9786475792379762, iteration: 159988
loss: 0.9930419921875,grad_norm: 0.9190029086242365, iteration: 159989
loss: 0.9807066321372986,grad_norm: 0.8571171936468099, iteration: 159990
loss: 1.0166343450546265,grad_norm: 0.9999997696638666, iteration: 159991
loss: 1.0232861042022705,grad_norm: 0.9999990841817775, iteration: 159992
loss: 1.0038124322891235,grad_norm: 0.8670341842824989, iteration: 159993
loss: 0.9855136871337891,grad_norm: 0.9999990640160831, iteration: 159994
loss: 1.0283188819885254,grad_norm: 0.9696552208444142, iteration: 159995
loss: 0.9971049427986145,grad_norm: 0.989696438350581, iteration: 159996
loss: 1.0557506084442139,grad_norm: 0.9999995001207481, iteration: 159997
loss: 0.9644486904144287,grad_norm: 0.9999990540165586, iteration: 159998
loss: 1.0062875747680664,grad_norm: 0.9394882275819905, iteration: 159999
loss: 0.9666410684585571,grad_norm: 0.9999990349596526, iteration: 160000
Evaluating at step 160000
{'val': 0.9955732002854347, 'test': 2.442299910627615}
loss: 0.9607427716255188,grad_norm: 0.8548416542626974, iteration: 160001
loss: 1.0451228618621826,grad_norm: 0.9999992808072887, iteration: 160002
loss: 0.9816113710403442,grad_norm: 0.9040331909191972, iteration: 160003
loss: 0.9663568735122681,grad_norm: 0.86598110441438, iteration: 160004
loss: 0.9859912991523743,grad_norm: 0.9999992416949448, iteration: 160005
loss: 1.0666654109954834,grad_norm: 0.9999990219404743, iteration: 160006
loss: 0.9863290190696716,grad_norm: 0.9999993756848287, iteration: 160007
loss: 0.990406334400177,grad_norm: 0.8248808813493461, iteration: 160008
loss: 1.0587838888168335,grad_norm: 0.9999991253640211, iteration: 160009
loss: 1.1657910346984863,grad_norm: 0.999999119729593, iteration: 160010
loss: 1.0383644104003906,grad_norm: 0.9999993124196072, iteration: 160011
loss: 0.9619735479354858,grad_norm: 0.8860309817805816, iteration: 160012
loss: 1.0313279628753662,grad_norm: 0.8681464329515326, iteration: 160013
loss: 1.0143616199493408,grad_norm: 0.9607874867318188, iteration: 160014
loss: 1.0345672369003296,grad_norm: 0.9999994804302312, iteration: 160015
loss: 0.9943162798881531,grad_norm: 0.9999993678716698, iteration: 160016
loss: 1.0197093486785889,grad_norm: 0.8986016079329299, iteration: 160017
loss: 1.0070699453353882,grad_norm: 0.8051491211130385, iteration: 160018
loss: 1.0354914665222168,grad_norm: 0.9999991719787188, iteration: 160019
loss: 1.0749943256378174,grad_norm: 0.9999993672930667, iteration: 160020
loss: 1.018936276435852,grad_norm: 0.9999989783785656, iteration: 160021
loss: 1.0183470249176025,grad_norm: 0.9999993207013963, iteration: 160022
loss: 0.9825343489646912,grad_norm: 0.9651391860909123, iteration: 160023
loss: 1.0857452154159546,grad_norm: 0.9999998135494205, iteration: 160024
loss: 1.0011582374572754,grad_norm: 0.9166431959691386, iteration: 160025
loss: 1.0040630102157593,grad_norm: 0.9999992430389728, iteration: 160026
loss: 1.0193145275115967,grad_norm: 0.9999994402351835, iteration: 160027
loss: 1.0078970193862915,grad_norm: 0.9999992063653804, iteration: 160028
loss: 1.0138801336288452,grad_norm: 0.9999991969033273, iteration: 160029
loss: 1.0113288164138794,grad_norm: 0.9999992946374006, iteration: 160030
loss: 1.0185402631759644,grad_norm: 0.9227970685299478, iteration: 160031
loss: 0.9668826460838318,grad_norm: 0.8854991693884587, iteration: 160032
loss: 0.9965770840644836,grad_norm: 0.9563129942572405, iteration: 160033
loss: 0.9913274645805359,grad_norm: 0.9999991821728446, iteration: 160034
loss: 1.0239837169647217,grad_norm: 0.9999989599745421, iteration: 160035
loss: 0.9719811677932739,grad_norm: 0.9068730001622229, iteration: 160036
loss: 1.0405311584472656,grad_norm: 0.9999993896665076, iteration: 160037
loss: 1.017889142036438,grad_norm: 0.9999990852659475, iteration: 160038
loss: 0.9835459589958191,grad_norm: 0.9999991527873288, iteration: 160039
loss: 0.9852877259254456,grad_norm: 0.9906996366749797, iteration: 160040
loss: 1.000022292137146,grad_norm: 0.9466448723963597, iteration: 160041
loss: 0.9505364894866943,grad_norm: 0.9842758319412998, iteration: 160042
loss: 1.0092390775680542,grad_norm: 0.999999209713915, iteration: 160043
loss: 1.1470736265182495,grad_norm: 0.9999994307565034, iteration: 160044
loss: 0.9933634400367737,grad_norm: 0.9999991246094332, iteration: 160045
loss: 0.9880629777908325,grad_norm: 0.9104419513864767, iteration: 160046
loss: 1.0205188989639282,grad_norm: 0.9999990397626463, iteration: 160047
loss: 1.0022470951080322,grad_norm: 0.9545739249997522, iteration: 160048
loss: 0.9908044338226318,grad_norm: 0.9506542478194552, iteration: 160049
loss: 0.9760595560073853,grad_norm: 0.9999989614849741, iteration: 160050
loss: 1.0002379417419434,grad_norm: 0.9127288758554598, iteration: 160051
loss: 1.0094050168991089,grad_norm: 0.9999992398892853, iteration: 160052
loss: 0.9840352535247803,grad_norm: 0.7900656800876698, iteration: 160053
loss: 1.0149108171463013,grad_norm: 0.9999991147069471, iteration: 160054
loss: 1.0161231756210327,grad_norm: 0.9999995208846915, iteration: 160055
loss: 0.9972774386405945,grad_norm: 0.999999184748413, iteration: 160056
loss: 1.0638405084609985,grad_norm: 0.9999997663747259, iteration: 160057
loss: 1.017502784729004,grad_norm: 0.9999991490137415, iteration: 160058
loss: 0.9921640753746033,grad_norm: 0.9822050629381983, iteration: 160059
loss: 0.9847387075424194,grad_norm: 0.9999992539233254, iteration: 160060
loss: 0.9822099804878235,grad_norm: 0.999999115794891, iteration: 160061
loss: 1.0739116668701172,grad_norm: 0.9999994345231549, iteration: 160062
loss: 1.0170869827270508,grad_norm: 0.932622303315722, iteration: 160063
loss: 0.9780706167221069,grad_norm: 0.9999994437195787, iteration: 160064
loss: 0.9701834917068481,grad_norm: 0.8598644924807904, iteration: 160065
loss: 0.9792512655258179,grad_norm: 0.9064690230382622, iteration: 160066
loss: 1.034045696258545,grad_norm: 0.9999990694293882, iteration: 160067
loss: 0.9987064599990845,grad_norm: 0.9999991358621008, iteration: 160068
loss: 0.9874160885810852,grad_norm: 0.9999991605783043, iteration: 160069
loss: 1.003450870513916,grad_norm: 0.9999991929979862, iteration: 160070
loss: 0.9854164719581604,grad_norm: 0.9691534443460984, iteration: 160071
loss: 1.009811520576477,grad_norm: 0.9999991491421949, iteration: 160072
loss: 0.9992664456367493,grad_norm: 0.9464999270561816, iteration: 160073
loss: 0.9944421648979187,grad_norm: 0.9520467278586672, iteration: 160074
loss: 0.9418074488639832,grad_norm: 0.9552431260324282, iteration: 160075
loss: 1.0051360130310059,grad_norm: 0.9999991622819507, iteration: 160076
loss: 1.0267422199249268,grad_norm: 0.9999991359448682, iteration: 160077
loss: 0.9856150150299072,grad_norm: 0.8893885632751892, iteration: 160078
loss: 0.9903084635734558,grad_norm: 0.9999991301852772, iteration: 160079
loss: 0.9801201820373535,grad_norm: 0.9323796490804227, iteration: 160080
loss: 0.9925721287727356,grad_norm: 0.999999048378319, iteration: 160081
loss: 1.0159354209899902,grad_norm: 0.9999992294112748, iteration: 160082
loss: 0.9689040780067444,grad_norm: 0.9999991747145268, iteration: 160083
loss: 1.0071029663085938,grad_norm: 0.9115346247761077, iteration: 160084
loss: 1.007891297340393,grad_norm: 0.9999989232138977, iteration: 160085
loss: 0.9921157956123352,grad_norm: 0.9283340055924353, iteration: 160086
loss: 1.0303666591644287,grad_norm: 0.9999998143031109, iteration: 160087
loss: 1.0171397924423218,grad_norm: 0.9391829356693464, iteration: 160088
loss: 1.0185844898223877,grad_norm: 0.9991983064925957, iteration: 160089
loss: 1.002535104751587,grad_norm: 0.9999990077736606, iteration: 160090
loss: 0.9982372522354126,grad_norm: 0.9999989755489272, iteration: 160091
loss: 0.9811984300613403,grad_norm: 0.9468733055729917, iteration: 160092
loss: 1.0213065147399902,grad_norm: 0.9396876325582403, iteration: 160093
loss: 0.9841190576553345,grad_norm: 0.999999136443134, iteration: 160094
loss: 1.0146187543869019,grad_norm: 0.9358725136895252, iteration: 160095
loss: 1.0081965923309326,grad_norm: 0.9999990245530164, iteration: 160096
loss: 1.0540069341659546,grad_norm: 0.9999996274051856, iteration: 160097
loss: 0.9771666526794434,grad_norm: 0.9459233776184907, iteration: 160098
loss: 1.038496971130371,grad_norm: 0.8770725629443162, iteration: 160099
loss: 1.0021352767944336,grad_norm: 0.9483802407216928, iteration: 160100
loss: 1.033218502998352,grad_norm: 0.9569482584105193, iteration: 160101
loss: 0.9773927927017212,grad_norm: 0.9999990631012504, iteration: 160102
loss: 1.0083346366882324,grad_norm: 0.9911642765744633, iteration: 160103
loss: 1.0012643337249756,grad_norm: 0.9999992336590955, iteration: 160104
loss: 0.9963151812553406,grad_norm: 0.9999990195409173, iteration: 160105
loss: 0.9748018980026245,grad_norm: 0.9999991974563175, iteration: 160106
loss: 0.953306257724762,grad_norm: 0.999999168634099, iteration: 160107
loss: 0.9967692494392395,grad_norm: 0.9037036753998632, iteration: 160108
loss: 1.0294901132583618,grad_norm: 0.9999990047542079, iteration: 160109
loss: 0.9894664287567139,grad_norm: 0.8344001592763677, iteration: 160110
loss: 0.9872989058494568,grad_norm: 0.9999992349834388, iteration: 160111
loss: 0.9795103669166565,grad_norm: 0.9527754418419051, iteration: 160112
loss: 0.9759460687637329,grad_norm: 0.9122199738006326, iteration: 160113
loss: 0.9929119944572449,grad_norm: 0.9999990263485681, iteration: 160114
loss: 1.016219139099121,grad_norm: 0.9520999933017155, iteration: 160115
loss: 1.0049055814743042,grad_norm: 0.8942957279295849, iteration: 160116
loss: 1.0167179107666016,grad_norm: 0.9999998204297458, iteration: 160117
loss: 0.9939693808555603,grad_norm: 0.9410564554948819, iteration: 160118
loss: 1.0175567865371704,grad_norm: 0.999999072271015, iteration: 160119
loss: 0.992160439491272,grad_norm: 0.9999995635521254, iteration: 160120
loss: 0.9900083541870117,grad_norm: 0.9088569905198574, iteration: 160121
loss: 0.9901111125946045,grad_norm: 0.8982039982683204, iteration: 160122
loss: 0.9971476793289185,grad_norm: 0.9999990850341216, iteration: 160123
loss: 1.01181960105896,grad_norm: 0.9577723261932486, iteration: 160124
loss: 0.9696913361549377,grad_norm: 0.9999990166978913, iteration: 160125
loss: 0.9722174406051636,grad_norm: 0.9999992305424881, iteration: 160126
loss: 0.9975471496582031,grad_norm: 0.9225534905054651, iteration: 160127
loss: 1.032496452331543,grad_norm: 0.9999990920191513, iteration: 160128
loss: 1.006247878074646,grad_norm: 0.9999991110415392, iteration: 160129
loss: 0.9881346821784973,grad_norm: 0.9999989952102503, iteration: 160130
loss: 1.0193097591400146,grad_norm: 0.9999992279034028, iteration: 160131
loss: 1.037184238433838,grad_norm: 0.9971304264961137, iteration: 160132
loss: 0.9981981515884399,grad_norm: 0.943973046248106, iteration: 160133
loss: 0.9972888231277466,grad_norm: 0.8467585733366263, iteration: 160134
loss: 1.0047627687454224,grad_norm: 0.9999991232079771, iteration: 160135
loss: 0.9910357594490051,grad_norm: 0.9999991084802239, iteration: 160136
loss: 0.9687649011611938,grad_norm: 0.860245980651344, iteration: 160137
loss: 0.996432900428772,grad_norm: 0.9999990164348003, iteration: 160138
loss: 1.0261523723602295,grad_norm: 0.8818920207472577, iteration: 160139
loss: 0.9825155138969421,grad_norm: 0.9999991131285263, iteration: 160140
loss: 0.980763852596283,grad_norm: 0.9598519280915042, iteration: 160141
loss: 0.9923185706138611,grad_norm: 0.963728734008155, iteration: 160142
loss: 0.9920851588249207,grad_norm: 0.9964032718885165, iteration: 160143
loss: 0.9989476799964905,grad_norm: 0.9999991112149337, iteration: 160144
loss: 0.9986398816108704,grad_norm: 0.859406973487299, iteration: 160145
loss: 0.9827708005905151,grad_norm: 0.9999995143935014, iteration: 160146
loss: 0.9946708083152771,grad_norm: 0.9999991807129546, iteration: 160147
loss: 0.9782775640487671,grad_norm: 0.999999070808419, iteration: 160148
loss: 0.9776548147201538,grad_norm: 0.9349853913815444, iteration: 160149
loss: 0.9788779020309448,grad_norm: 0.9999991935349746, iteration: 160150
loss: 0.9599098563194275,grad_norm: 0.9705181258714543, iteration: 160151
loss: 1.0081250667572021,grad_norm: 0.9476665605578733, iteration: 160152
loss: 1.0159212350845337,grad_norm: 0.8956562949147502, iteration: 160153
loss: 1.0200917720794678,grad_norm: 0.9999990935920919, iteration: 160154
loss: 0.9658828377723694,grad_norm: 0.9999992588590186, iteration: 160155
loss: 0.9978666305541992,grad_norm: 0.9999992066890755, iteration: 160156
loss: 0.998662531375885,grad_norm: 0.9999992346024477, iteration: 160157
loss: 1.1201984882354736,grad_norm: 0.9999997361713338, iteration: 160158
loss: 0.9648334383964539,grad_norm: 0.9999992321850171, iteration: 160159
loss: 0.985679566860199,grad_norm: 0.9999990610607173, iteration: 160160
loss: 1.0263664722442627,grad_norm: 0.8639939543475246, iteration: 160161
loss: 1.026598334312439,grad_norm: 0.9274118262839806, iteration: 160162
loss: 0.9651003479957581,grad_norm: 0.9999990875774816, iteration: 160163
loss: 1.0227947235107422,grad_norm: 0.9999991944012067, iteration: 160164
loss: 0.9962167739868164,grad_norm: 0.8362000279223102, iteration: 160165
loss: 0.993507444858551,grad_norm: 0.949439666125966, iteration: 160166
loss: 0.9689295887947083,grad_norm: 0.9999990686507799, iteration: 160167
loss: 1.0351612567901611,grad_norm: 0.9742423068332711, iteration: 160168
loss: 1.0050299167633057,grad_norm: 0.8440571158956651, iteration: 160169
loss: 1.0152437686920166,grad_norm: 0.9885769423572008, iteration: 160170
loss: 1.0091161727905273,grad_norm: 0.9819240269346643, iteration: 160171
loss: 1.0366003513336182,grad_norm: 0.9999991640464235, iteration: 160172
loss: 1.0424572229385376,grad_norm: 0.9720191747099287, iteration: 160173
loss: 1.0137526988983154,grad_norm: 0.9999990535088027, iteration: 160174
loss: 1.0168956518173218,grad_norm: 0.9999990881233999, iteration: 160175
loss: 1.0273736715316772,grad_norm: 0.9999991395044412, iteration: 160176
loss: 0.9955760836601257,grad_norm: 0.917367878296643, iteration: 160177
loss: 0.9884228706359863,grad_norm: 0.9999990174140909, iteration: 160178
loss: 0.9569307565689087,grad_norm: 0.999999258853453, iteration: 160179
loss: 0.9597240686416626,grad_norm: 0.9522177463080628, iteration: 160180
loss: 0.9609199166297913,grad_norm: 0.8740950682833898, iteration: 160181
loss: 0.9823805689811707,grad_norm: 0.9999991810435009, iteration: 160182
loss: 1.004991888999939,grad_norm: 0.9250935993061167, iteration: 160183
loss: 0.9984620213508606,grad_norm: 0.9524388225371998, iteration: 160184
loss: 0.9791341423988342,grad_norm: 0.9494044865613106, iteration: 160185
loss: 0.9808783531188965,grad_norm: 0.923049216688333, iteration: 160186
loss: 1.0475434064865112,grad_norm: 0.9853870736493232, iteration: 160187
loss: 1.011650562286377,grad_norm: 0.9999991355506233, iteration: 160188
loss: 0.9770196080207825,grad_norm: 0.9120596538500765, iteration: 160189
loss: 0.95284104347229,grad_norm: 0.9317905760321925, iteration: 160190
loss: 1.001550555229187,grad_norm: 0.7853487123603441, iteration: 160191
loss: 1.0281271934509277,grad_norm: 0.9880115684006795, iteration: 160192
loss: 1.0630872249603271,grad_norm: 0.9999991780084015, iteration: 160193
loss: 1.0120192766189575,grad_norm: 0.9999998497186996, iteration: 160194
loss: 0.9617584943771362,grad_norm: 0.8881825502141225, iteration: 160195
loss: 0.9518351554870605,grad_norm: 0.9999992348212506, iteration: 160196
loss: 0.9820534586906433,grad_norm: 0.842039240391928, iteration: 160197
loss: 1.042345643043518,grad_norm: 0.9999991542632517, iteration: 160198
loss: 0.9956029057502747,grad_norm: 0.8573639550636342, iteration: 160199
loss: 1.0200586318969727,grad_norm: 0.8060473244831742, iteration: 160200
loss: 1.0309163331985474,grad_norm: 0.9999998427884598, iteration: 160201
loss: 0.9999444484710693,grad_norm: 0.999999261011206, iteration: 160202
loss: 1.020937442779541,grad_norm: 0.9189724720429713, iteration: 160203
loss: 0.9601258635520935,grad_norm: 0.9999992931976498, iteration: 160204
loss: 1.002712368965149,grad_norm: 0.9564742199069349, iteration: 160205
loss: 1.0032622814178467,grad_norm: 0.973761936544018, iteration: 160206
loss: 1.0079556703567505,grad_norm: 0.9999992171183537, iteration: 160207
loss: 0.9437878131866455,grad_norm: 0.9519525579061316, iteration: 160208
loss: 0.9786568880081177,grad_norm: 0.9460560094287314, iteration: 160209
loss: 0.970350444316864,grad_norm: 0.9169254931364408, iteration: 160210
loss: 1.0141750574111938,grad_norm: 0.9224677876134407, iteration: 160211
loss: 1.0265105962753296,grad_norm: 0.8227142548554347, iteration: 160212
loss: 0.9970443844795227,grad_norm: 0.9482480641755644, iteration: 160213
loss: 1.014164686203003,grad_norm: 0.9648839469188346, iteration: 160214
loss: 1.0223944187164307,grad_norm: 0.9999991482952909, iteration: 160215
loss: 1.0263645648956299,grad_norm: 0.9999992554459443, iteration: 160216
loss: 1.0329601764678955,grad_norm: 0.9202661845842005, iteration: 160217
loss: 0.9712066054344177,grad_norm: 0.9999990292890208, iteration: 160218
loss: 1.0137077569961548,grad_norm: 0.8452404823627486, iteration: 160219
loss: 0.9299500584602356,grad_norm: 0.9999990647711404, iteration: 160220
loss: 0.9998999238014221,grad_norm: 0.9442736489595471, iteration: 160221
loss: 1.0302562713623047,grad_norm: 0.9999991572028851, iteration: 160222
loss: 1.0126572847366333,grad_norm: 0.9945216558546458, iteration: 160223
loss: 1.0443751811981201,grad_norm: 0.9999995130330183, iteration: 160224
loss: 0.9774625897407532,grad_norm: 0.9999992311365643, iteration: 160225
loss: 0.9824876189231873,grad_norm: 0.9682881802291976, iteration: 160226
loss: 1.0288395881652832,grad_norm: 0.9999990411560491, iteration: 160227
loss: 1.0163178443908691,grad_norm: 0.9999991119318697, iteration: 160228
loss: 1.024444580078125,grad_norm: 0.9999991404869119, iteration: 160229
loss: 1.0207350254058838,grad_norm: 0.795777063393272, iteration: 160230
loss: 0.9863389730453491,grad_norm: 0.9143508670717364, iteration: 160231
loss: 0.9649613499641418,grad_norm: 0.9438271340096546, iteration: 160232
loss: 0.9916255474090576,grad_norm: 0.8730564159457594, iteration: 160233
loss: 1.027895212173462,grad_norm: 0.9999991212925649, iteration: 160234
loss: 1.0084673166275024,grad_norm: 0.9978873472897063, iteration: 160235
loss: 1.0079268217086792,grad_norm: 0.9951860897711855, iteration: 160236
loss: 1.0108768939971924,grad_norm: 0.8757011875385314, iteration: 160237
loss: 1.0437027215957642,grad_norm: 0.9999996588210007, iteration: 160238
loss: 1.0201691389083862,grad_norm: 0.9999990930108522, iteration: 160239
loss: 1.0101405382156372,grad_norm: 0.9999992129939484, iteration: 160240
loss: 1.0063284635543823,grad_norm: 0.9591691098936891, iteration: 160241
loss: 0.9883611798286438,grad_norm: 0.7976267744406407, iteration: 160242
loss: 0.9863271117210388,grad_norm: 0.9999991996884088, iteration: 160243
loss: 1.0309007167816162,grad_norm: 0.9999991264733499, iteration: 160244
loss: 0.9721139073371887,grad_norm: 0.9811019655321581, iteration: 160245
loss: 0.9967014789581299,grad_norm: 0.9763261217697429, iteration: 160246
loss: 1.0005254745483398,grad_norm: 0.9999990368467372, iteration: 160247
loss: 0.9677447080612183,grad_norm: 0.8901865734057974, iteration: 160248
loss: 1.011513352394104,grad_norm: 0.9999990759297566, iteration: 160249
loss: 0.9713630080223083,grad_norm: 0.9999991571688833, iteration: 160250
loss: 1.0069295167922974,grad_norm: 0.988108254356093, iteration: 160251
loss: 1.0149744749069214,grad_norm: 0.9999991526111819, iteration: 160252
loss: 0.9741294980049133,grad_norm: 0.9925596553596406, iteration: 160253
loss: 0.9532158970832825,grad_norm: 0.9999990562617782, iteration: 160254
loss: 1.0048998594284058,grad_norm: 0.9999991922970494, iteration: 160255
loss: 0.9956873655319214,grad_norm: 0.9815643835607003, iteration: 160256
loss: 0.9921344518661499,grad_norm: 0.9229889390016531, iteration: 160257
loss: 1.0042316913604736,grad_norm: 0.9999991065332064, iteration: 160258
loss: 1.0092216730117798,grad_norm: 0.9747307807325637, iteration: 160259
loss: 1.0044145584106445,grad_norm: 0.9078581437689813, iteration: 160260
loss: 1.0327422618865967,grad_norm: 0.976780303471611, iteration: 160261
loss: 0.9996526837348938,grad_norm: 0.9999991898095447, iteration: 160262
loss: 1.0200613737106323,grad_norm: 0.9801557763766363, iteration: 160263
loss: 0.9926326274871826,grad_norm: 0.9999991160960193, iteration: 160264
loss: 1.0119929313659668,grad_norm: 0.9344161004491884, iteration: 160265
loss: 1.023294448852539,grad_norm: 0.9999989348938498, iteration: 160266
loss: 0.9973720908164978,grad_norm: 0.89862146686904, iteration: 160267
loss: 0.9845782518386841,grad_norm: 0.9972923694307523, iteration: 160268
loss: 0.9999383687973022,grad_norm: 0.9207427477863975, iteration: 160269
loss: 0.9732098579406738,grad_norm: 0.9999989454561868, iteration: 160270
loss: 1.0008959770202637,grad_norm: 0.9999992591358184, iteration: 160271
loss: 0.9818143844604492,grad_norm: 0.9160952954289645, iteration: 160272
loss: 0.9954829812049866,grad_norm: 0.9999991169583636, iteration: 160273
loss: 1.0354454517364502,grad_norm: 0.9999990726400902, iteration: 160274
loss: 1.0421137809753418,grad_norm: 0.9999990854315884, iteration: 160275
loss: 1.0655680894851685,grad_norm: 0.9999991133823829, iteration: 160276
loss: 0.9889121055603027,grad_norm: 0.9999991427529988, iteration: 160277
loss: 1.0151447057724,grad_norm: 0.9147745129727674, iteration: 160278
loss: 1.0140169858932495,grad_norm: 0.9656933544033558, iteration: 160279
loss: 0.9847730994224548,grad_norm: 0.958677440054811, iteration: 160280
loss: 1.023114562034607,grad_norm: 0.9843222843804398, iteration: 160281
loss: 1.0149650573730469,grad_norm: 0.9088825624650382, iteration: 160282
loss: 1.0347625017166138,grad_norm: 0.9123451853563636, iteration: 160283
loss: 0.9780569076538086,grad_norm: 0.9368992056151081, iteration: 160284
loss: 0.9640136957168579,grad_norm: 0.934333702973855, iteration: 160285
loss: 1.015560507774353,grad_norm: 0.893364240847825, iteration: 160286
loss: 0.9710685014724731,grad_norm: 0.9999990888953733, iteration: 160287
loss: 0.994603157043457,grad_norm: 0.9999991909241531, iteration: 160288
loss: 0.9892310500144958,grad_norm: 0.8873672459799198, iteration: 160289
loss: 1.0047626495361328,grad_norm: 0.9999993005777599, iteration: 160290
loss: 1.0448404550552368,grad_norm: 0.9266657831858809, iteration: 160291
loss: 0.9738542437553406,grad_norm: 0.9999991811540727, iteration: 160292
loss: 0.9748062491416931,grad_norm: 0.8782647332421789, iteration: 160293
loss: 1.006070852279663,grad_norm: 0.8378468508746655, iteration: 160294
loss: 1.031418800354004,grad_norm: 0.8868740894297893, iteration: 160295
loss: 1.038802981376648,grad_norm: 0.9999990849549631, iteration: 160296
loss: 1.0014381408691406,grad_norm: 0.9786610895434826, iteration: 160297
loss: 0.9663174748420715,grad_norm: 0.9777719375339184, iteration: 160298
loss: 1.0119363069534302,grad_norm: 0.9999992217648277, iteration: 160299
loss: 0.9972941875457764,grad_norm: 0.9999990209848298, iteration: 160300
loss: 0.9999584555625916,grad_norm: 0.9999990422952937, iteration: 160301
loss: 1.0146554708480835,grad_norm: 0.9305180483962595, iteration: 160302
loss: 0.9980865716934204,grad_norm: 0.9100306553848793, iteration: 160303
loss: 0.9673718214035034,grad_norm: 0.9999991367015418, iteration: 160304
loss: 0.9556782245635986,grad_norm: 0.9999992199776778, iteration: 160305
loss: 0.9639000296592712,grad_norm: 0.9999990402788536, iteration: 160306
loss: 0.9935129880905151,grad_norm: 0.8715302656120972, iteration: 160307
loss: 1.0221866369247437,grad_norm: 0.9999989167725107, iteration: 160308
loss: 1.0152170658111572,grad_norm: 0.8054731475492497, iteration: 160309
loss: 0.9937165975570679,grad_norm: 0.931279319885452, iteration: 160310
loss: 0.9955552816390991,grad_norm: 0.9366978573419257, iteration: 160311
loss: 1.0133525133132935,grad_norm: 0.9263088651700104, iteration: 160312
loss: 1.0065910816192627,grad_norm: 0.9999990289618266, iteration: 160313
loss: 1.0095728635787964,grad_norm: 0.9999991525645384, iteration: 160314
loss: 0.9737746715545654,grad_norm: 0.9828821302709677, iteration: 160315
loss: 1.0260603427886963,grad_norm: 0.9348221752446584, iteration: 160316
loss: 0.9843493103981018,grad_norm: 0.9999989188026918, iteration: 160317
loss: 0.9739387631416321,grad_norm: 0.9999991498797448, iteration: 160318
loss: 0.9884130954742432,grad_norm: 0.9999990537432543, iteration: 160319
loss: 1.0044933557510376,grad_norm: 0.8597929768449265, iteration: 160320
loss: 0.9882182478904724,grad_norm: 0.9999999078227421, iteration: 160321
loss: 1.030996322631836,grad_norm: 0.9999992158895619, iteration: 160322
loss: 0.9735406637191772,grad_norm: 0.9999991200878691, iteration: 160323
loss: 0.9673527479171753,grad_norm: 0.9252156782174016, iteration: 160324
loss: 0.9675894379615784,grad_norm: 0.9881279870771638, iteration: 160325
loss: 1.032108187675476,grad_norm: 0.9794678648342949, iteration: 160326
loss: 1.0138558149337769,grad_norm: 0.9914260210918647, iteration: 160327
loss: 1.0058540105819702,grad_norm: 0.9656925508593245, iteration: 160328
loss: 0.9956396222114563,grad_norm: 0.9176568837270735, iteration: 160329
loss: 1.03514564037323,grad_norm: 0.9999998426688823, iteration: 160330
loss: 0.9831147193908691,grad_norm: 0.9999991128323765, iteration: 160331
loss: 1.0683283805847168,grad_norm: 0.9999998462280458, iteration: 160332
loss: 1.012654423713684,grad_norm: 0.9999990748603506, iteration: 160333
loss: 0.9676149487495422,grad_norm: 0.9999990273564275, iteration: 160334
loss: 0.9822036027908325,grad_norm: 0.9408652299986152, iteration: 160335
loss: 1.0000132322311401,grad_norm: 0.9999994906236715, iteration: 160336
loss: 1.0082088708877563,grad_norm: 0.9320852525180212, iteration: 160337
loss: 0.9573701024055481,grad_norm: 0.9999991409220027, iteration: 160338
loss: 1.012058973312378,grad_norm: 0.9447229411256557, iteration: 160339
loss: 0.9574739336967468,grad_norm: 0.9694518428599168, iteration: 160340
loss: 0.9863707423210144,grad_norm: 0.8954149732744852, iteration: 160341
loss: 0.9877879619598389,grad_norm: 0.8914395474034225, iteration: 160342
loss: 1.0151952505111694,grad_norm: 0.9999990665333424, iteration: 160343
loss: 1.011357069015503,grad_norm: 0.9999990387282867, iteration: 160344
loss: 0.98583984375,grad_norm: 0.8042316801346493, iteration: 160345
loss: 0.9912170171737671,grad_norm: 0.9999991944801995, iteration: 160346
loss: 0.9754612445831299,grad_norm: 0.9943443727820882, iteration: 160347
loss: 1.033045768737793,grad_norm: 0.9999992447360345, iteration: 160348
loss: 1.0337257385253906,grad_norm: 0.9999990627604619, iteration: 160349
loss: 0.9954016804695129,grad_norm: 0.966427541970026, iteration: 160350
loss: 0.9806128144264221,grad_norm: 0.9999992224271848, iteration: 160351
loss: 1.0156668424606323,grad_norm: 0.9999997164871517, iteration: 160352
loss: 1.0304819345474243,grad_norm: 0.9999991458333416, iteration: 160353
loss: 1.0410494804382324,grad_norm: 0.9999991648843914, iteration: 160354
loss: 1.0507454872131348,grad_norm: 0.9999990625264306, iteration: 160355
loss: 1.0596996545791626,grad_norm: 0.9999998712064057, iteration: 160356
loss: 0.9974274039268494,grad_norm: 0.9112058400708486, iteration: 160357
loss: 0.9979386329650879,grad_norm: 0.9999991786588575, iteration: 160358
loss: 0.9745127558708191,grad_norm: 0.9999991025329178, iteration: 160359
loss: 0.9714694619178772,grad_norm: 0.9119541109439046, iteration: 160360
loss: 1.0354725122451782,grad_norm: 0.9999993029683271, iteration: 160361
loss: 1.0127418041229248,grad_norm: 0.8644240922838251, iteration: 160362
loss: 1.0091394186019897,grad_norm: 0.9715276843166791, iteration: 160363
loss: 0.9954964518547058,grad_norm: 0.9999991315069222, iteration: 160364
loss: 0.9610141515731812,grad_norm: 0.9830720570142342, iteration: 160365
loss: 1.0044362545013428,grad_norm: 0.9139389393170771, iteration: 160366
loss: 0.9959142208099365,grad_norm: 0.9999993049924157, iteration: 160367
loss: 1.0379947423934937,grad_norm: 0.9999991773839024, iteration: 160368
loss: 0.9868423938751221,grad_norm: 0.9852583960691614, iteration: 160369
loss: 1.0251144170761108,grad_norm: 0.906695376979131, iteration: 160370
loss: 0.9733975529670715,grad_norm: 0.9999990631979382, iteration: 160371
loss: 1.0078670978546143,grad_norm: 0.9713373132905738, iteration: 160372
loss: 1.0236098766326904,grad_norm: 0.9999991675285143, iteration: 160373
loss: 1.0462840795516968,grad_norm: 0.999999244016863, iteration: 160374
loss: 0.9861368536949158,grad_norm: 0.9999991218740795, iteration: 160375
loss: 1.0364875793457031,grad_norm: 0.9999992359435528, iteration: 160376
loss: 1.0072412490844727,grad_norm: 0.9999990621381987, iteration: 160377
loss: 1.001920461654663,grad_norm: 0.899413094120022, iteration: 160378
loss: 1.042790412902832,grad_norm: 0.963730214750022, iteration: 160379
loss: 1.0122833251953125,grad_norm: 0.9999992508898881, iteration: 160380
loss: 1.0246323347091675,grad_norm: 0.8887106442822891, iteration: 160381
loss: 1.020811915397644,grad_norm: 0.9289476707761318, iteration: 160382
loss: 1.010145664215088,grad_norm: 0.8716209297248776, iteration: 160383
loss: 1.0161449909210205,grad_norm: 0.9889361242131721, iteration: 160384
loss: 0.9623206257820129,grad_norm: 0.9999991240428625, iteration: 160385
loss: 0.9918231964111328,grad_norm: 0.9971613657088563, iteration: 160386
loss: 1.0025612115859985,grad_norm: 0.9999990993418058, iteration: 160387
loss: 0.9817743301391602,grad_norm: 0.8503579693391504, iteration: 160388
loss: 1.1484414339065552,grad_norm: 0.9999992862367463, iteration: 160389
loss: 1.0190379619598389,grad_norm: 0.9999991197703249, iteration: 160390
loss: 1.0049492120742798,grad_norm: 0.82941793812588, iteration: 160391
loss: 1.0048810243606567,grad_norm: 0.9999989868161397, iteration: 160392
loss: 1.0154238939285278,grad_norm: 0.999999104604929, iteration: 160393
loss: 0.9823118448257446,grad_norm: 0.9999991540790274, iteration: 160394
loss: 0.9828633666038513,grad_norm: 0.89838192929925, iteration: 160395
loss: 1.0082614421844482,grad_norm: 0.9841594018733052, iteration: 160396
loss: 1.0123553276062012,grad_norm: 0.909674719612682, iteration: 160397
loss: 1.0304455757141113,grad_norm: 0.8807187116831267, iteration: 160398
loss: 0.997759222984314,grad_norm: 0.9046931676828158, iteration: 160399
loss: 0.9757091999053955,grad_norm: 0.9999991314945014, iteration: 160400
loss: 1.0027155876159668,grad_norm: 0.8733554900032844, iteration: 160401
loss: 0.9693337678909302,grad_norm: 0.9999991058376962, iteration: 160402
loss: 1.0019935369491577,grad_norm: 0.9693134044203103, iteration: 160403
loss: 1.0165622234344482,grad_norm: 0.9335401861934783, iteration: 160404
loss: 0.9812725782394409,grad_norm: 0.9999991179069725, iteration: 160405
loss: 0.9664983153343201,grad_norm: 0.9174668864440707, iteration: 160406
loss: 0.9852312803268433,grad_norm: 0.9641914036202334, iteration: 160407
loss: 1.0011045932769775,grad_norm: 0.9999991330526996, iteration: 160408
loss: 0.9923195242881775,grad_norm: 0.9110689355338979, iteration: 160409
loss: 0.9944096207618713,grad_norm: 0.9999991952344237, iteration: 160410
loss: 0.982994794845581,grad_norm: 0.9999989311836855, iteration: 160411
loss: 0.9969566464424133,grad_norm: 0.9543697500804265, iteration: 160412
loss: 0.9839773774147034,grad_norm: 0.9579170858292573, iteration: 160413
loss: 0.9772478938102722,grad_norm: 0.9803286218211525, iteration: 160414
loss: 0.9566707015037537,grad_norm: 0.8843882247047473, iteration: 160415
loss: 1.0235610008239746,grad_norm: 0.9261743368297107, iteration: 160416
loss: 1.004538893699646,grad_norm: 0.9550049441429025, iteration: 160417
loss: 0.9939810037612915,grad_norm: 0.9574340964624629, iteration: 160418
loss: 1.0193806886672974,grad_norm: 0.9999990699777731, iteration: 160419
loss: 0.9674756526947021,grad_norm: 0.9999990901850283, iteration: 160420
loss: 0.9879766702651978,grad_norm: 0.909527332001417, iteration: 160421
loss: 0.9957891702651978,grad_norm: 0.9999992511708848, iteration: 160422
loss: 0.9937323331832886,grad_norm: 0.999999293691014, iteration: 160423
loss: 1.0189692974090576,grad_norm: 0.9999991388864693, iteration: 160424
loss: 0.965140163898468,grad_norm: 0.8733296140516706, iteration: 160425
loss: 0.9997397065162659,grad_norm: 0.9999991747488766, iteration: 160426
loss: 1.0050179958343506,grad_norm: 0.964155327028364, iteration: 160427
loss: 1.1789971590042114,grad_norm: 0.9999991937473303, iteration: 160428
loss: 0.981637179851532,grad_norm: 0.9648633675969601, iteration: 160429
loss: 1.0279853343963623,grad_norm: 0.9529278542482954, iteration: 160430
loss: 0.9892193078994751,grad_norm: 0.9588344203318875, iteration: 160431
loss: 1.0248501300811768,grad_norm: 0.9999992277392101, iteration: 160432
loss: 1.0371651649475098,grad_norm: 0.9999991046554519, iteration: 160433
loss: 1.0198363065719604,grad_norm: 0.8289594855525241, iteration: 160434
loss: 1.0205814838409424,grad_norm: 0.935856641517306, iteration: 160435
loss: 0.9823766946792603,grad_norm: 0.9999991724368433, iteration: 160436
loss: 1.0024839639663696,grad_norm: 0.8318813182248145, iteration: 160437
loss: 1.0215004682540894,grad_norm: 0.9392536630045012, iteration: 160438
loss: 1.0130646228790283,grad_norm: 0.9931767047099981, iteration: 160439
loss: 1.4106838703155518,grad_norm: 0.9999994390002359, iteration: 160440
loss: 0.9984474182128906,grad_norm: 0.9999989915604592, iteration: 160441
loss: 1.0253177881240845,grad_norm: 0.9999993123588958, iteration: 160442
loss: 0.986372172832489,grad_norm: 0.8941390763734359, iteration: 160443
loss: 0.9684039950370789,grad_norm: 0.9999992722121426, iteration: 160444
loss: 0.9783238768577576,grad_norm: 0.9999990862100068, iteration: 160445
loss: 0.9935244917869568,grad_norm: 0.9999990848712196, iteration: 160446
loss: 0.9776565432548523,grad_norm: 0.9398573088230076, iteration: 160447
loss: 1.046066403388977,grad_norm: 0.9711264668601252, iteration: 160448
loss: 0.9828866124153137,grad_norm: 0.9999991954576315, iteration: 160449
loss: 1.303540825843811,grad_norm: 0.999999806613713, iteration: 160450
loss: 0.9897528290748596,grad_norm: 0.9999990627515967, iteration: 160451
loss: 0.9565832614898682,grad_norm: 0.9430130055427005, iteration: 160452
loss: 1.002079963684082,grad_norm: 0.9631279419609314, iteration: 160453
loss: 1.0064924955368042,grad_norm: 0.9999990159192473, iteration: 160454
loss: 1.0299551486968994,grad_norm: 0.9999992077249575, iteration: 160455
loss: 0.997992217540741,grad_norm: 0.9351723462150756, iteration: 160456
loss: 0.9733254909515381,grad_norm: 0.9999990437916798, iteration: 160457
loss: 1.0106287002563477,grad_norm: 0.9705737346751346, iteration: 160458
loss: 1.005037546157837,grad_norm: 0.9169887877123335, iteration: 160459
loss: 0.9875637888908386,grad_norm: 0.9999991340409142, iteration: 160460
loss: 1.0270214080810547,grad_norm: 0.9999992021995382, iteration: 160461
loss: 0.9634315967559814,grad_norm: 0.9658165218721182, iteration: 160462
loss: 1.045820951461792,grad_norm: 0.9999992013180354, iteration: 160463
loss: 0.9995946884155273,grad_norm: 0.9557218434399407, iteration: 160464
loss: 0.9686277508735657,grad_norm: 0.9808147693624845, iteration: 160465
loss: 1.025721549987793,grad_norm: 0.852878080673919, iteration: 160466
loss: 1.0077048540115356,grad_norm: 0.8587617409008387, iteration: 160467
loss: 1.0132261514663696,grad_norm: 0.9999993477340718, iteration: 160468
loss: 1.0099605321884155,grad_norm: 0.9999990495373069, iteration: 160469
loss: 1.0160903930664062,grad_norm: 0.999999242112854, iteration: 160470
loss: 0.9638085961341858,grad_norm: 0.9999991675214426, iteration: 160471
loss: 0.9696258902549744,grad_norm: 0.9999993118652482, iteration: 160472
loss: 1.0222902297973633,grad_norm: 0.9999992448224471, iteration: 160473
loss: 1.0063872337341309,grad_norm: 0.999999091118098, iteration: 160474
loss: 0.9903228878974915,grad_norm: 0.999999150847309, iteration: 160475
loss: 1.001900553703308,grad_norm: 0.897365222914433, iteration: 160476
loss: 0.9994228482246399,grad_norm: 0.9999990231491569, iteration: 160477
loss: 1.0387439727783203,grad_norm: 0.9999992263672944, iteration: 160478
loss: 1.0044066905975342,grad_norm: 0.9422618598409612, iteration: 160479
loss: 0.9689176678657532,grad_norm: 0.9778614542514639, iteration: 160480
loss: 0.9605334997177124,grad_norm: 0.9999990016562775, iteration: 160481
loss: 1.0402652025222778,grad_norm: 0.9999990608643194, iteration: 160482
loss: 1.0050166845321655,grad_norm: 0.9938006490276204, iteration: 160483
loss: 0.9738824963569641,grad_norm: 0.9289283784327885, iteration: 160484
loss: 1.0074294805526733,grad_norm: 0.999999633791701, iteration: 160485
loss: 0.9960718750953674,grad_norm: 0.9999990529846329, iteration: 160486
loss: 0.9773428440093994,grad_norm: 0.9238095553522387, iteration: 160487
loss: 0.9746471643447876,grad_norm: 0.9404870590805824, iteration: 160488
loss: 1.009700059890747,grad_norm: 0.9686875246146656, iteration: 160489
loss: 1.0169641971588135,grad_norm: 0.9999990238873196, iteration: 160490
loss: 1.0102770328521729,grad_norm: 0.9999989526512372, iteration: 160491
loss: 1.0512150526046753,grad_norm: 0.9999991618324406, iteration: 160492
loss: 1.0048449039459229,grad_norm: 0.9999991600549629, iteration: 160493
loss: 0.9969138503074646,grad_norm: 0.9055629302706405, iteration: 160494
loss: 1.008449673652649,grad_norm: 0.9118096238585138, iteration: 160495
loss: 1.023539423942566,grad_norm: 0.9161621679896033, iteration: 160496
loss: 0.9966839551925659,grad_norm: 0.9999991559530311, iteration: 160497
loss: 1.0063045024871826,grad_norm: 0.9999990558791351, iteration: 160498
loss: 1.0264760255813599,grad_norm: 0.999999082595147, iteration: 160499
loss: 1.025011420249939,grad_norm: 0.9999997121667141, iteration: 160500
loss: 1.0098490715026855,grad_norm: 0.9999989567444993, iteration: 160501
loss: 1.0070524215698242,grad_norm: 0.9999990835059598, iteration: 160502
loss: 0.9837772250175476,grad_norm: 0.8253813319959227, iteration: 160503
loss: 1.016696572303772,grad_norm: 0.9597224804287291, iteration: 160504
loss: 1.0160484313964844,grad_norm: 0.9848742738909179, iteration: 160505
loss: 0.9867401123046875,grad_norm: 0.9876799222686469, iteration: 160506
loss: 1.0311752557754517,grad_norm: 0.8359630799844499, iteration: 160507
loss: 1.0247902870178223,grad_norm: 0.8886323250647399, iteration: 160508
loss: 0.9908087253570557,grad_norm: 0.9999992204134892, iteration: 160509
loss: 1.0393848419189453,grad_norm: 0.9999991295901797, iteration: 160510
loss: 0.9651195406913757,grad_norm: 0.9999991768156086, iteration: 160511
loss: 0.9883954524993896,grad_norm: 0.9999991762899063, iteration: 160512
loss: 1.0230073928833008,grad_norm: 0.9999990708895423, iteration: 160513
loss: 1.0118578672409058,grad_norm: 0.9999990687707254, iteration: 160514
loss: 1.0125840902328491,grad_norm: 0.9999997098953098, iteration: 160515
loss: 0.9616357088088989,grad_norm: 0.9999991988127362, iteration: 160516
loss: 0.9984612464904785,grad_norm: 0.9999990651968769, iteration: 160517
loss: 0.9850491881370544,grad_norm: 0.9012205166195202, iteration: 160518
loss: 1.0054205656051636,grad_norm: 0.9999989731248932, iteration: 160519
loss: 0.9667591452598572,grad_norm: 0.9999990911237033, iteration: 160520
loss: 1.0233352184295654,grad_norm: 0.9999991477578309, iteration: 160521
loss: 0.9985319972038269,grad_norm: 0.8424501743626067, iteration: 160522
loss: 1.016318678855896,grad_norm: 0.9999993057381672, iteration: 160523
loss: 0.9801713824272156,grad_norm: 0.9999991392849105, iteration: 160524
loss: 1.0108169317245483,grad_norm: 0.9999993627335304, iteration: 160525
loss: 1.1160343885421753,grad_norm: 0.9999995596059964, iteration: 160526
loss: 0.9737420082092285,grad_norm: 0.9999993320319173, iteration: 160527
loss: 0.9829564094543457,grad_norm: 0.99999914473738, iteration: 160528
loss: 1.0385359525680542,grad_norm: 0.9999991251267621, iteration: 160529
loss: 1.0449551343917847,grad_norm: 0.9999992731685989, iteration: 160530
loss: 0.9869571924209595,grad_norm: 0.9999991059580016, iteration: 160531
loss: 1.0114002227783203,grad_norm: 0.807470842818505, iteration: 160532
loss: 0.9775408506393433,grad_norm: 0.8943891998951442, iteration: 160533
loss: 1.0502389669418335,grad_norm: 0.9999991185858312, iteration: 160534
loss: 1.0126667022705078,grad_norm: 0.9392797245139147, iteration: 160535
loss: 0.9982463717460632,grad_norm: 0.9999989224666587, iteration: 160536
loss: 1.0406314134597778,grad_norm: 0.9999992110724977, iteration: 160537
loss: 0.9966002106666565,grad_norm: 0.9999992442689243, iteration: 160538
loss: 0.9805338382720947,grad_norm: 0.9999991357397496, iteration: 160539
loss: 1.0561959743499756,grad_norm: 0.9999995886563271, iteration: 160540
loss: 1.0083608627319336,grad_norm: 0.9999990614192168, iteration: 160541
loss: 0.9826012253761292,grad_norm: 0.9339348199891331, iteration: 160542
loss: 0.9523066878318787,grad_norm: 0.8387880920381974, iteration: 160543
loss: 1.0130109786987305,grad_norm: 0.9999990549788141, iteration: 160544
loss: 1.0165079832077026,grad_norm: 0.9916190411157496, iteration: 160545
loss: 0.9708157777786255,grad_norm: 0.8098753972972857, iteration: 160546
loss: 1.006842017173767,grad_norm: 0.9999990840205903, iteration: 160547
loss: 1.0711359977722168,grad_norm: 0.9999998101076725, iteration: 160548
loss: 0.9861004948616028,grad_norm: 0.8970383694356129, iteration: 160549
loss: 0.9841587543487549,grad_norm: 0.8776248320796252, iteration: 160550
loss: 1.015128254890442,grad_norm: 0.9999990230749977, iteration: 160551
loss: 0.9970644116401672,grad_norm: 0.9999990034830615, iteration: 160552
loss: 1.0140231847763062,grad_norm: 0.999998966253192, iteration: 160553
loss: 0.9741659760475159,grad_norm: 0.9567812895369242, iteration: 160554
loss: 0.9976126551628113,grad_norm: 0.9829688960282278, iteration: 160555
loss: 0.9800252914428711,grad_norm: 0.9999990945810585, iteration: 160556
loss: 1.0030653476715088,grad_norm: 0.999999082345789, iteration: 160557
loss: 1.0204942226409912,grad_norm: 0.9999997945167387, iteration: 160558
loss: 1.024250864982605,grad_norm: 0.9692174899033627, iteration: 160559
loss: 0.9947609305381775,grad_norm: 0.9999991280377089, iteration: 160560
loss: 0.9825894236564636,grad_norm: 0.9420963266936406, iteration: 160561
loss: 0.9786034822463989,grad_norm: 0.9999991892652472, iteration: 160562
loss: 0.9661992788314819,grad_norm: 0.9615894286797655, iteration: 160563
loss: 0.9574324488639832,grad_norm: 0.9658042880786539, iteration: 160564
loss: 1.0122424364089966,grad_norm: 0.9754926740278612, iteration: 160565
loss: 1.0233335494995117,grad_norm: 0.9999993272910614, iteration: 160566
loss: 0.9876421093940735,grad_norm: 0.9897589596959735, iteration: 160567
loss: 1.0069339275360107,grad_norm: 0.9999990789977132, iteration: 160568
loss: 1.046445608139038,grad_norm: 0.9623629521743972, iteration: 160569
loss: 0.9849361777305603,grad_norm: 0.9789613848360378, iteration: 160570
loss: 1.0122095346450806,grad_norm: 0.9999990730879726, iteration: 160571
loss: 1.0055766105651855,grad_norm: 0.935768841340524, iteration: 160572
loss: 1.0161166191101074,grad_norm: 0.8613322409974749, iteration: 160573
loss: 0.9857214689254761,grad_norm: 0.9999993125832963, iteration: 160574
loss: 1.0038886070251465,grad_norm: 0.9827163460736681, iteration: 160575
loss: 1.0092934370040894,grad_norm: 0.999999954154517, iteration: 160576
loss: 1.01333749294281,grad_norm: 0.9999991457700026, iteration: 160577
loss: 1.024086833000183,grad_norm: 0.9999991774477281, iteration: 160578
loss: 1.0005954504013062,grad_norm: 0.8938894724750424, iteration: 160579
loss: 1.01372230052948,grad_norm: 0.9466020740906909, iteration: 160580
loss: 0.9902060031890869,grad_norm: 0.999999158279856, iteration: 160581
loss: 0.9901884198188782,grad_norm: 0.9301123072259331, iteration: 160582
loss: 1.010495662689209,grad_norm: 0.9999992066321055, iteration: 160583
loss: 1.0262799263000488,grad_norm: 0.9999991698104947, iteration: 160584
loss: 0.9861938953399658,grad_norm: 0.9999991141801198, iteration: 160585
loss: 0.9939591884613037,grad_norm: 0.9999991160144762, iteration: 160586
loss: 1.0036388635635376,grad_norm: 0.9999991305788544, iteration: 160587
loss: 0.9827903509140015,grad_norm: 0.999999145119569, iteration: 160588
loss: 1.0214256048202515,grad_norm: 0.9094729028237202, iteration: 160589
loss: 1.0213708877563477,grad_norm: 0.9995559378510742, iteration: 160590
loss: 0.9874860048294067,grad_norm: 0.9999988922744213, iteration: 160591
loss: 1.1052303314208984,grad_norm: 0.9999991828874663, iteration: 160592
loss: 0.9843710064888,grad_norm: 0.8947290405905193, iteration: 160593
loss: 0.9747362732887268,grad_norm: 0.9423813626706521, iteration: 160594
loss: 1.0158156156539917,grad_norm: 0.9999991370431471, iteration: 160595
loss: 0.9919407963752747,grad_norm: 0.8133520334716782, iteration: 160596
loss: 0.9788362383842468,grad_norm: 0.9999996709551612, iteration: 160597
loss: 1.0080543756484985,grad_norm: 0.9999989365494891, iteration: 160598
loss: 0.9688647389411926,grad_norm: 0.9753582823043003, iteration: 160599
loss: 0.9647383689880371,grad_norm: 0.981735193734035, iteration: 160600
loss: 0.9825092554092407,grad_norm: 0.8986740143556237, iteration: 160601
loss: 1.0003595352172852,grad_norm: 0.9880006088393616, iteration: 160602
loss: 0.9998873472213745,grad_norm: 0.9123920416485964, iteration: 160603
loss: 0.9877378940582275,grad_norm: 0.8863133633417876, iteration: 160604
loss: 1.0265735387802124,grad_norm: 0.9999992529311593, iteration: 160605
loss: 0.9820300936698914,grad_norm: 0.9561141001116322, iteration: 160606
loss: 1.009789228439331,grad_norm: 0.9443946661555305, iteration: 160607
loss: 0.9673591256141663,grad_norm: 0.9999991320608036, iteration: 160608
loss: 1.014991283416748,grad_norm: 0.9999995504924207, iteration: 160609
loss: 1.0319244861602783,grad_norm: 0.9792357724977405, iteration: 160610
loss: 1.021319031715393,grad_norm: 0.9999993553765105, iteration: 160611
loss: 0.9904417395591736,grad_norm: 0.948883579125702, iteration: 160612
loss: 1.0136229991912842,grad_norm: 0.9799048667673685, iteration: 160613
loss: 1.0000993013381958,grad_norm: 0.9098386334219359, iteration: 160614
loss: 0.9955364465713501,grad_norm: 0.9041521208140025, iteration: 160615
loss: 1.0214680433273315,grad_norm: 0.9999990182225557, iteration: 160616
loss: 0.9892140626907349,grad_norm: 0.999999039969484, iteration: 160617
loss: 0.9595953226089478,grad_norm: 0.9011857911453772, iteration: 160618
loss: 0.9827563762664795,grad_norm: 0.9999992238470008, iteration: 160619
loss: 1.0350089073181152,grad_norm: 0.9999989998784717, iteration: 160620
loss: 1.0269438028335571,grad_norm: 0.9999991479199988, iteration: 160621
loss: 1.0165430307388306,grad_norm: 0.99999907203115, iteration: 160622
loss: 0.9883957505226135,grad_norm: 0.8770117911658231, iteration: 160623
loss: 0.9887887835502625,grad_norm: 0.9197385545966172, iteration: 160624
loss: 1.0082203149795532,grad_norm: 0.999999190968715, iteration: 160625
loss: 1.0035419464111328,grad_norm: 0.999999129314237, iteration: 160626
loss: 0.9929854869842529,grad_norm: 0.9999998259163607, iteration: 160627
loss: 0.965670645236969,grad_norm: 0.9043647872890996, iteration: 160628
loss: 1.0054471492767334,grad_norm: 0.9999991147184366, iteration: 160629
loss: 0.9994255900382996,grad_norm: 0.9999992177891308, iteration: 160630
loss: 0.9939690828323364,grad_norm: 0.8904438658515403, iteration: 160631
loss: 1.0015517473220825,grad_norm: 0.8074408512032428, iteration: 160632
loss: 1.003684163093567,grad_norm: 0.9822508044582414, iteration: 160633
loss: 1.0171335935592651,grad_norm: 0.95260091291088, iteration: 160634
loss: 0.9827351570129395,grad_norm: 0.9999991095417516, iteration: 160635
loss: 0.9667925834655762,grad_norm: 0.9999991976140991, iteration: 160636
loss: 1.0426725149154663,grad_norm: 0.9823053396847026, iteration: 160637
loss: 0.9840580224990845,grad_norm: 0.999999143645667, iteration: 160638
loss: 0.99554443359375,grad_norm: 0.8756726273232163, iteration: 160639
loss: 1.0175834894180298,grad_norm: 0.9999990694536677, iteration: 160640
loss: 0.9939642548561096,grad_norm: 0.9999990090201539, iteration: 160641
loss: 0.9963791966438293,grad_norm: 0.9483761001192084, iteration: 160642
loss: 0.9970853924751282,grad_norm: 0.9999990047067567, iteration: 160643
loss: 1.019288182258606,grad_norm: 0.9999993082163439, iteration: 160644
loss: 1.0310693979263306,grad_norm: 0.944115461842036, iteration: 160645
loss: 1.005670189857483,grad_norm: 0.9999997972055916, iteration: 160646
loss: 1.0304657220840454,grad_norm: 0.9999989971214743, iteration: 160647
loss: 0.99664306640625,grad_norm: 0.9634876576905647, iteration: 160648
loss: 0.9869505763053894,grad_norm: 0.9047810500629857, iteration: 160649
loss: 1.0026593208312988,grad_norm: 0.9473589583523735, iteration: 160650
loss: 0.9933298230171204,grad_norm: 0.9999992115697216, iteration: 160651
loss: 0.9691317081451416,grad_norm: 0.878406685937876, iteration: 160652
loss: 1.0125216245651245,grad_norm: 0.8455125324858564, iteration: 160653
loss: 1.00307035446167,grad_norm: 0.9995425375806649, iteration: 160654
loss: 0.979720413684845,grad_norm: 0.8926671214089656, iteration: 160655
loss: 1.001571774482727,grad_norm: 0.9999990640696637, iteration: 160656
loss: 1.0178492069244385,grad_norm: 0.9999991050194946, iteration: 160657
loss: 0.9943307638168335,grad_norm: 0.9780410082341245, iteration: 160658
loss: 1.0057977437973022,grad_norm: 0.9582047706877214, iteration: 160659
loss: 1.00815749168396,grad_norm: 0.9559815953288872, iteration: 160660
loss: 1.015085220336914,grad_norm: 0.9999992470828898, iteration: 160661
loss: 0.9928900003433228,grad_norm: 0.999999213572366, iteration: 160662
loss: 0.9631745219230652,grad_norm: 0.9390888119984564, iteration: 160663
loss: 0.987335205078125,grad_norm: 0.8737576254068993, iteration: 160664
loss: 0.9742549061775208,grad_norm: 0.9570363280604294, iteration: 160665
loss: 0.9667363166809082,grad_norm: 0.902116105100263, iteration: 160666
loss: 0.9850901961326599,grad_norm: 0.9999990966303605, iteration: 160667
loss: 0.9979339241981506,grad_norm: 0.9999990916816193, iteration: 160668
loss: 0.9796159863471985,grad_norm: 0.9501191690455102, iteration: 160669
loss: 0.9684078693389893,grad_norm: 0.9999991677858733, iteration: 160670
loss: 1.0612338781356812,grad_norm: 0.9999998346219066, iteration: 160671
loss: 1.0014986991882324,grad_norm: 0.9039803290030979, iteration: 160672
loss: 0.9997727274894714,grad_norm: 0.9689808050385621, iteration: 160673
loss: 0.9715033173561096,grad_norm: 0.9999991509355793, iteration: 160674
loss: 0.9869593381881714,grad_norm: 0.9999990317360551, iteration: 160675
loss: 0.9811704158782959,grad_norm: 0.829658909063194, iteration: 160676
loss: 1.0090352296829224,grad_norm: 0.9556969720829586, iteration: 160677
loss: 1.0165098905563354,grad_norm: 0.9999989317864251, iteration: 160678
loss: 0.964118242263794,grad_norm: 0.9966925958147053, iteration: 160679
loss: 0.9573751091957092,grad_norm: 0.9627224794727942, iteration: 160680
loss: 1.025087833404541,grad_norm: 0.9999992376191907, iteration: 160681
loss: 0.9802578687667847,grad_norm: 0.9890293563065498, iteration: 160682
loss: 1.0089542865753174,grad_norm: 0.883521530875593, iteration: 160683
loss: 0.9569094777107239,grad_norm: 0.9999992141681731, iteration: 160684
loss: 1.0393952131271362,grad_norm: 0.9999992316139645, iteration: 160685
loss: 0.9428278803825378,grad_norm: 0.9999990595500051, iteration: 160686
loss: 1.000695824623108,grad_norm: 0.9999991388667644, iteration: 160687
loss: 0.9976683855056763,grad_norm: 0.9999990393492294, iteration: 160688
loss: 0.9850209355354309,grad_norm: 0.9999992880868137, iteration: 160689
loss: 0.9753864407539368,grad_norm: 0.9083806095179094, iteration: 160690
loss: 1.030228853225708,grad_norm: 0.9999990878456188, iteration: 160691
loss: 0.9896916747093201,grad_norm: 0.9999990780705839, iteration: 160692
loss: 1.008068323135376,grad_norm: 0.9999990697728618, iteration: 160693
loss: 0.9814915657043457,grad_norm: 0.9999991696220255, iteration: 160694
loss: 0.9968652725219727,grad_norm: 0.8900050246119896, iteration: 160695
loss: 1.0035645961761475,grad_norm: 0.9999991209794221, iteration: 160696
loss: 1.0306285619735718,grad_norm: 0.9999991419333842, iteration: 160697
loss: 0.9931119680404663,grad_norm: 0.9935277623804837, iteration: 160698
loss: 1.0074043273925781,grad_norm: 0.9999991992341877, iteration: 160699
loss: 0.9880408048629761,grad_norm: 0.8787223738623495, iteration: 160700
loss: 1.0034668445587158,grad_norm: 0.9999991864220565, iteration: 160701
loss: 0.9980878233909607,grad_norm: 0.9999989730986493, iteration: 160702
loss: 1.045498251914978,grad_norm: 0.9999991140998884, iteration: 160703
loss: 1.0194847583770752,grad_norm: 0.9179146184829264, iteration: 160704
loss: 1.0366321802139282,grad_norm: 0.9999992133532594, iteration: 160705
loss: 0.9817039966583252,grad_norm: 0.9999990962608333, iteration: 160706
loss: 1.004706621170044,grad_norm: 0.9999994600795277, iteration: 160707
loss: 0.9967980980873108,grad_norm: 0.9999991858632734, iteration: 160708
loss: 0.9722683429718018,grad_norm: 0.9999992530815212, iteration: 160709
loss: 0.9887384176254272,grad_norm: 0.9999991217445149, iteration: 160710
loss: 1.0235458612442017,grad_norm: 0.9999991432885346, iteration: 160711
loss: 1.0016275644302368,grad_norm: 0.8863192459644407, iteration: 160712
loss: 0.9659285545349121,grad_norm: 0.8753737577672539, iteration: 160713
loss: 0.9971421957015991,grad_norm: 0.7954647395899149, iteration: 160714
loss: 1.0156161785125732,grad_norm: 0.9999997682162433, iteration: 160715
loss: 0.9766885042190552,grad_norm: 0.8014494185185691, iteration: 160716
loss: 0.9831531047821045,grad_norm: 0.9691178754895922, iteration: 160717
loss: 1.032335877418518,grad_norm: 0.9999992536961602, iteration: 160718
loss: 1.0386685132980347,grad_norm: 0.9999992082859853, iteration: 160719
loss: 0.9741248488426208,grad_norm: 0.8414053292954914, iteration: 160720
loss: 1.007254719734192,grad_norm: 0.9264733012050484, iteration: 160721
loss: 0.9683851599693298,grad_norm: 0.9999992605823286, iteration: 160722
loss: 1.002581000328064,grad_norm: 0.9999992147576442, iteration: 160723
loss: 1.0039629936218262,grad_norm: 0.9417084191497086, iteration: 160724
loss: 1.0017765760421753,grad_norm: 0.8980158461105832, iteration: 160725
loss: 1.014064908027649,grad_norm: 0.9534317214399939, iteration: 160726
loss: 0.9398800134658813,grad_norm: 0.9999990200504822, iteration: 160727
loss: 0.9642787575721741,grad_norm: 0.9999991270071797, iteration: 160728
loss: 0.9893257021903992,grad_norm: 0.8975955881877267, iteration: 160729
loss: 0.9787846207618713,grad_norm: 0.9740537380647796, iteration: 160730
loss: 0.9968244433403015,grad_norm: 0.9725046244078253, iteration: 160731
loss: 1.0487244129180908,grad_norm: 0.9544739176867265, iteration: 160732
loss: 0.9964316487312317,grad_norm: 0.9725042363314579, iteration: 160733
loss: 0.9817056059837341,grad_norm: 0.999999007168058, iteration: 160734
loss: 1.0026650428771973,grad_norm: 0.9999998432579819, iteration: 160735
loss: 1.0201048851013184,grad_norm: 0.9675022341364374, iteration: 160736
loss: 0.9857432246208191,grad_norm: 0.8508764451082518, iteration: 160737
loss: 1.0722512006759644,grad_norm: 0.982142884519999, iteration: 160738
loss: 1.0135483741760254,grad_norm: 0.9999990648826199, iteration: 160739
loss: 0.9703342318534851,grad_norm: 0.8440752921069489, iteration: 160740
loss: 0.9809341430664062,grad_norm: 0.9059346670002981, iteration: 160741
loss: 0.9873733520507812,grad_norm: 0.9090824610460138, iteration: 160742
loss: 0.9849569201469421,grad_norm: 0.9999992070727453, iteration: 160743
loss: 0.9942493438720703,grad_norm: 0.9886034480463843, iteration: 160744
loss: 1.042930006980896,grad_norm: 0.9999991825358034, iteration: 160745
loss: 1.0266098976135254,grad_norm: 0.892914803458058, iteration: 160746
loss: 0.9327948093414307,grad_norm: 0.9999992335598751, iteration: 160747
loss: 0.9913054704666138,grad_norm: 0.9471391298714003, iteration: 160748
loss: 1.0194171667099,grad_norm: 0.999998904472634, iteration: 160749
loss: 1.0084621906280518,grad_norm: 0.8993405027369382, iteration: 160750
loss: 1.020939588546753,grad_norm: 0.9461746145512434, iteration: 160751
loss: 0.9824560880661011,grad_norm: 0.8932002442408925, iteration: 160752
loss: 0.9921950697898865,grad_norm: 0.9583190988246864, iteration: 160753
loss: 0.967731773853302,grad_norm: 0.9999991576229401, iteration: 160754
loss: 1.0520994663238525,grad_norm: 0.9999994084440693, iteration: 160755
loss: 1.027202844619751,grad_norm: 0.9999993471206879, iteration: 160756
loss: 0.984255850315094,grad_norm: 0.9999991089585137, iteration: 160757
loss: 0.9979457855224609,grad_norm: 0.9999992013781448, iteration: 160758
loss: 1.0467240810394287,grad_norm: 0.9999991491080608, iteration: 160759
loss: 1.0574387311935425,grad_norm: 0.999999096450199, iteration: 160760
loss: 1.0271700620651245,grad_norm: 0.9276105160229573, iteration: 160761
loss: 1.0295125246047974,grad_norm: 0.9999992817797597, iteration: 160762
loss: 0.9867932200431824,grad_norm: 0.9999992343903946, iteration: 160763
loss: 0.9754776954650879,grad_norm: 0.9375512375268815, iteration: 160764
loss: 1.0441603660583496,grad_norm: 0.9999995578022713, iteration: 160765
loss: 1.032583236694336,grad_norm: 0.9999993519677713, iteration: 160766
loss: 1.011126160621643,grad_norm: 0.945157616435347, iteration: 160767
loss: 1.0107040405273438,grad_norm: 0.9999992646926036, iteration: 160768
loss: 1.0007156133651733,grad_norm: 0.939857017579248, iteration: 160769
loss: 0.968112587928772,grad_norm: 0.9614473398415919, iteration: 160770
loss: 0.9964272975921631,grad_norm: 0.9258971530301497, iteration: 160771
loss: 1.0120242834091187,grad_norm: 0.9501677536884187, iteration: 160772
loss: 1.0022625923156738,grad_norm: 0.9669134640701825, iteration: 160773
loss: 0.9931932687759399,grad_norm: 0.999999163569175, iteration: 160774
loss: 0.9947931170463562,grad_norm: 0.960630562416253, iteration: 160775
loss: 1.0119081735610962,grad_norm: 0.8997031707521913, iteration: 160776
loss: 1.027624249458313,grad_norm: 0.9607461861836335, iteration: 160777
loss: 0.9845471382141113,grad_norm: 0.8432023650433278, iteration: 160778
loss: 0.9972127079963684,grad_norm: 0.9999991515075007, iteration: 160779
loss: 0.990389883518219,grad_norm: 0.9999991120477386, iteration: 160780
loss: 0.9837803840637207,grad_norm: 0.8630648352803711, iteration: 160781
loss: 0.9587427377700806,grad_norm: 0.9999990295308411, iteration: 160782
loss: 0.9486616253852844,grad_norm: 0.9999991354800826, iteration: 160783
loss: 1.0229334831237793,grad_norm: 0.9999989794821409, iteration: 160784
loss: 0.9785412549972534,grad_norm: 0.9269744840786798, iteration: 160785
loss: 1.031960129737854,grad_norm: 0.9999993225646768, iteration: 160786
loss: 1.0001075267791748,grad_norm: 0.9478230409848578, iteration: 160787
loss: 0.9937020540237427,grad_norm: 0.9999991602485052, iteration: 160788
loss: 0.9904743432998657,grad_norm: 0.9999990233165492, iteration: 160789
loss: 0.9905309081077576,grad_norm: 0.8838010015910656, iteration: 160790
loss: 0.9921171069145203,grad_norm: 0.9999989590294789, iteration: 160791
loss: 1.0073003768920898,grad_norm: 0.9999991461429051, iteration: 160792
loss: 1.02170991897583,grad_norm: 0.9928329146987217, iteration: 160793
loss: 1.0257090330123901,grad_norm: 0.9999991750107163, iteration: 160794
loss: 1.031116247177124,grad_norm: 0.9999992594890109, iteration: 160795
loss: 0.997862696647644,grad_norm: 0.9666923587969526, iteration: 160796
loss: 0.9999061226844788,grad_norm: 0.9223408878675443, iteration: 160797
loss: 1.0007290840148926,grad_norm: 0.9999989849596314, iteration: 160798
loss: 1.0191811323165894,grad_norm: 0.9192847063117264, iteration: 160799
loss: 1.0139065980911255,grad_norm: 0.9353976118397495, iteration: 160800
loss: 0.9757190942764282,grad_norm: 0.9313197177267928, iteration: 160801
loss: 0.9956203699111938,grad_norm: 0.9230939219231654, iteration: 160802
loss: 0.9966833591461182,grad_norm: 0.9999991048822707, iteration: 160803
loss: 0.9888278245925903,grad_norm: 0.9999990403626792, iteration: 160804
loss: 0.986223578453064,grad_norm: 0.999999174011166, iteration: 160805
loss: 0.9523227214813232,grad_norm: 0.9524976470360109, iteration: 160806
loss: 1.0193207263946533,grad_norm: 0.9444519752654839, iteration: 160807
loss: 0.989147961139679,grad_norm: 0.9999991198047595, iteration: 160808
loss: 1.033955693244934,grad_norm: 0.9999990198313106, iteration: 160809
loss: 0.9982960224151611,grad_norm: 0.9999992112383035, iteration: 160810
loss: 0.9990057945251465,grad_norm: 0.9471424694482241, iteration: 160811
loss: 1.009360671043396,grad_norm: 0.9424265486105646, iteration: 160812
loss: 0.972870409488678,grad_norm: 0.9999991328972827, iteration: 160813
loss: 1.0095765590667725,grad_norm: 0.9112085834853525, iteration: 160814
loss: 1.0273120403289795,grad_norm: 0.9999996346820497, iteration: 160815
loss: 0.98723965883255,grad_norm: 0.912319290711404, iteration: 160816
loss: 0.9916738271713257,grad_norm: 0.9696752057849376, iteration: 160817
loss: 0.991208016872406,grad_norm: 0.9999990125443337, iteration: 160818
loss: 1.0135375261306763,grad_norm: 0.999999089553437, iteration: 160819
loss: 1.0266960859298706,grad_norm: 0.9675335331169801, iteration: 160820
loss: 0.9708759784698486,grad_norm: 0.7837735324154818, iteration: 160821
loss: 0.965165913105011,grad_norm: 0.9980400598539305, iteration: 160822
loss: 1.0034923553466797,grad_norm: 0.9999990789893793, iteration: 160823
loss: 0.9993717074394226,grad_norm: 0.9637083318542019, iteration: 160824
loss: 0.9906789064407349,grad_norm: 0.9999993264830503, iteration: 160825
loss: 0.9962012767791748,grad_norm: 0.9999990816068259, iteration: 160826
loss: 1.0087426900863647,grad_norm: 0.9863962287264461, iteration: 160827
loss: 1.0136297941207886,grad_norm: 0.9585849015438123, iteration: 160828
loss: 1.013773798942566,grad_norm: 0.9999990505409935, iteration: 160829
loss: 1.0389827489852905,grad_norm: 0.9999990982494711, iteration: 160830
loss: 1.0237154960632324,grad_norm: 0.9590562736353113, iteration: 160831
loss: 1.02617609500885,grad_norm: 0.8729549194979032, iteration: 160832
loss: 0.9706217050552368,grad_norm: 0.931985295869051, iteration: 160833
loss: 0.9888073205947876,grad_norm: 0.9999990930417765, iteration: 160834
loss: 0.9780178666114807,grad_norm: 0.9999991002152909, iteration: 160835
loss: 0.9520251750946045,grad_norm: 0.999998959480627, iteration: 160836
loss: 0.9976738095283508,grad_norm: 0.9999990915459843, iteration: 160837
loss: 0.9830432534217834,grad_norm: 0.9007762980449594, iteration: 160838
loss: 0.9974603652954102,grad_norm: 0.9999990353534212, iteration: 160839
loss: 0.9869613647460938,grad_norm: 0.9999990322876562, iteration: 160840
loss: 0.9606636166572571,grad_norm: 0.9999992487790924, iteration: 160841
loss: 1.0287474393844604,grad_norm: 0.9999990910760908, iteration: 160842
loss: 0.9871789813041687,grad_norm: 0.9999988959477514, iteration: 160843
loss: 0.9922581315040588,grad_norm: 0.9999990327332797, iteration: 160844
loss: 1.0047838687896729,grad_norm: 0.9999993513388389, iteration: 160845
loss: 1.051839828491211,grad_norm: 0.8706103119830476, iteration: 160846
loss: 0.9900264739990234,grad_norm: 0.9999994082712025, iteration: 160847
loss: 1.0530736446380615,grad_norm: 0.9999990774405952, iteration: 160848
loss: 1.035480260848999,grad_norm: 0.9999991128736636, iteration: 160849
loss: 1.016572117805481,grad_norm: 0.999999687262744, iteration: 160850
loss: 0.9954047203063965,grad_norm: 0.9999994220561982, iteration: 160851
loss: 1.0017582178115845,grad_norm: 0.9999989845791574, iteration: 160852
loss: 0.9852252006530762,grad_norm: 0.9086338799837095, iteration: 160853
loss: 0.9751250743865967,grad_norm: 0.9195502172117572, iteration: 160854
loss: 0.9995254278182983,grad_norm: 0.9999991980030074, iteration: 160855
loss: 0.9857876300811768,grad_norm: 0.9732310623846171, iteration: 160856
loss: 0.9969544410705566,grad_norm: 0.9999989659836753, iteration: 160857
loss: 1.0284913778305054,grad_norm: 0.986893894402271, iteration: 160858
loss: 0.9479520320892334,grad_norm: 0.9594296789274444, iteration: 160859
loss: 1.0098923444747925,grad_norm: 0.8739938903830278, iteration: 160860
loss: 1.0114716291427612,grad_norm: 0.9695287760860674, iteration: 160861
loss: 0.9851233959197998,grad_norm: 0.9999991550671735, iteration: 160862
loss: 1.0381584167480469,grad_norm: 0.9999990954845452, iteration: 160863
loss: 0.940730094909668,grad_norm: 0.9865974930361038, iteration: 160864
loss: 0.9966357946395874,grad_norm: 0.9436358600501227, iteration: 160865
loss: 0.978663444519043,grad_norm: 0.9241470398704879, iteration: 160866
loss: 1.0517711639404297,grad_norm: 0.8933214478266357, iteration: 160867
loss: 1.082668423652649,grad_norm: 0.9999996689006618, iteration: 160868
loss: 1.0049161911010742,grad_norm: 0.8532213540006405, iteration: 160869
loss: 0.9669718742370605,grad_norm: 0.9999993401650669, iteration: 160870
loss: 1.0375986099243164,grad_norm: 0.9999990818632604, iteration: 160871
loss: 0.9961554408073425,grad_norm: 0.9999994122113532, iteration: 160872
loss: 0.9373194575309753,grad_norm: 0.9216532393137873, iteration: 160873
loss: 0.991854727268219,grad_norm: 0.9999991241776933, iteration: 160874
loss: 0.9657296538352966,grad_norm: 0.9139310687397656, iteration: 160875
loss: 1.001632571220398,grad_norm: 0.9278922626466019, iteration: 160876
loss: 0.9646639227867126,grad_norm: 0.9999991836175137, iteration: 160877
loss: 1.0096156597137451,grad_norm: 0.9796297112182188, iteration: 160878
loss: 1.1560167074203491,grad_norm: 0.9999998824087855, iteration: 160879
loss: 0.9926280379295349,grad_norm: 0.928652597022117, iteration: 160880
loss: 0.9844843745231628,grad_norm: 0.9999991514115586, iteration: 160881
loss: 1.060156226158142,grad_norm: 0.9522549116027595, iteration: 160882
loss: 0.9975067973136902,grad_norm: 0.8855473837817125, iteration: 160883
loss: 0.982096791267395,grad_norm: 0.9655031917436703, iteration: 160884
loss: 0.9626829624176025,grad_norm: 0.9972412835987707, iteration: 160885
loss: 0.9991496801376343,grad_norm: 0.9999992484458887, iteration: 160886
loss: 0.9985774159431458,grad_norm: 0.847919296548583, iteration: 160887
loss: 1.0081638097763062,grad_norm: 0.8841474819317383, iteration: 160888
loss: 1.0580958127975464,grad_norm: 0.9999990092879979, iteration: 160889
loss: 0.9673313498497009,grad_norm: 0.9819056361031241, iteration: 160890
loss: 0.991141140460968,grad_norm: 0.9999992384667408, iteration: 160891
loss: 0.9676456451416016,grad_norm: 0.8299355676433001, iteration: 160892
loss: 1.0339136123657227,grad_norm: 0.999999052367863, iteration: 160893
loss: 0.9865303039550781,grad_norm: 0.8789093782470525, iteration: 160894
loss: 1.030237078666687,grad_norm: 0.9469695208306157, iteration: 160895
loss: 0.9921278953552246,grad_norm: 0.9636608146769893, iteration: 160896
loss: 1.0063766241073608,grad_norm: 0.9436484217845247, iteration: 160897
loss: 1.0011593103408813,grad_norm: 0.9999992443005349, iteration: 160898
loss: 1.0220586061477661,grad_norm: 0.9999991080074334, iteration: 160899
loss: 1.0199536085128784,grad_norm: 0.9730407845730061, iteration: 160900
loss: 0.9894406199455261,grad_norm: 0.9999991067658076, iteration: 160901
loss: 0.9777992963790894,grad_norm: 0.9999991904640647, iteration: 160902
loss: 0.9832161068916321,grad_norm: 0.9999991587739178, iteration: 160903
loss: 0.9646186232566833,grad_norm: 0.9347665917063466, iteration: 160904
loss: 1.0668447017669678,grad_norm: 0.9999993439426947, iteration: 160905
loss: 0.9747648239135742,grad_norm: 0.999999216398838, iteration: 160906
loss: 1.0169233083724976,grad_norm: 0.9999991221794328, iteration: 160907
loss: 0.9600597620010376,grad_norm: 0.8978487172957679, iteration: 160908
loss: 0.9662734270095825,grad_norm: 0.9999990510724935, iteration: 160909
loss: 0.9624881148338318,grad_norm: 0.9999991982783543, iteration: 160910
loss: 1.0051140785217285,grad_norm: 0.8887832395527998, iteration: 160911
loss: 0.9972924590110779,grad_norm: 0.9812588635062898, iteration: 160912
loss: 0.9900556802749634,grad_norm: 0.9999992405479134, iteration: 160913
loss: 1.0029693841934204,grad_norm: 0.9999992776038281, iteration: 160914
loss: 0.9809616804122925,grad_norm: 0.9528152945201627, iteration: 160915
loss: 1.0454864501953125,grad_norm: 0.9999992781186378, iteration: 160916
loss: 0.9840085506439209,grad_norm: 0.9999991377077508, iteration: 160917
loss: 0.9752632975578308,grad_norm: 0.999999008044368, iteration: 160918
loss: 0.9849934577941895,grad_norm: 0.9650836160699422, iteration: 160919
loss: 1.0140448808670044,grad_norm: 0.9999991237339804, iteration: 160920
loss: 0.986674964427948,grad_norm: 0.9999990452590299, iteration: 160921
loss: 0.9853415489196777,grad_norm: 0.9315020948684056, iteration: 160922
loss: 1.0390197038650513,grad_norm: 0.9999991350924923, iteration: 160923
loss: 1.0182238817214966,grad_norm: 0.9999990712382401, iteration: 160924
loss: 1.0044225454330444,grad_norm: 0.999999154163367, iteration: 160925
loss: 1.0197349786758423,grad_norm: 0.9999991213306634, iteration: 160926
loss: 0.9894959330558777,grad_norm: 0.9999991803229381, iteration: 160927
loss: 1.038201093673706,grad_norm: 0.9111672764862092, iteration: 160928
loss: 0.9991145133972168,grad_norm: 0.9999993663411982, iteration: 160929
loss: 0.9771748781204224,grad_norm: 0.9999990430983093, iteration: 160930
loss: 0.9831744432449341,grad_norm: 0.9923286397238968, iteration: 160931
loss: 0.9811210632324219,grad_norm: 0.9722395989608247, iteration: 160932
loss: 0.9980736374855042,grad_norm: 0.999999070351484, iteration: 160933
loss: 0.9929621815681458,grad_norm: 0.9999991511010606, iteration: 160934
loss: 0.9832594394683838,grad_norm: 0.9999990782402478, iteration: 160935
loss: 0.9981674551963806,grad_norm: 0.8782853656749112, iteration: 160936
loss: 1.0312517881393433,grad_norm: 0.9952772500128856, iteration: 160937
loss: 1.0405690670013428,grad_norm: 0.9382454706745693, iteration: 160938
loss: 1.0157054662704468,grad_norm: 0.9999991798452845, iteration: 160939
loss: 1.0111771821975708,grad_norm: 0.8574860263909009, iteration: 160940
loss: 1.0218416452407837,grad_norm: 0.999999284790354, iteration: 160941
loss: 0.9656405448913574,grad_norm: 0.933490499519579, iteration: 160942
loss: 1.0229097604751587,grad_norm: 0.9999991724521352, iteration: 160943
loss: 1.1207880973815918,grad_norm: 0.9921803077627759, iteration: 160944
loss: 1.0481290817260742,grad_norm: 0.9925359750430983, iteration: 160945
loss: 1.0265439748764038,grad_norm: 0.9999998099824303, iteration: 160946
loss: 0.9788527488708496,grad_norm: 0.9082440778309058, iteration: 160947
loss: 1.031480073928833,grad_norm: 0.9999994747646115, iteration: 160948
loss: 1.005901575088501,grad_norm: 0.999999200435796, iteration: 160949
loss: 0.9845931529998779,grad_norm: 0.9117902943168334, iteration: 160950
loss: 1.0054312944412231,grad_norm: 0.9701206518771427, iteration: 160951
loss: 1.044236421585083,grad_norm: 0.9999990551044025, iteration: 160952
loss: 0.9780213236808777,grad_norm: 0.999999139325616, iteration: 160953
loss: 1.0313969850540161,grad_norm: 0.968919120117591, iteration: 160954
loss: 0.9807213544845581,grad_norm: 0.9999992367955246, iteration: 160955
loss: 1.0292786359786987,grad_norm: 0.9999991327378929, iteration: 160956
loss: 0.9872608184814453,grad_norm: 0.999999071368409, iteration: 160957
loss: 1.0026665925979614,grad_norm: 0.9999990157238269, iteration: 160958
loss: 0.9975335001945496,grad_norm: 0.9260394279303097, iteration: 160959
loss: 1.0055848360061646,grad_norm: 0.999999056221919, iteration: 160960
loss: 1.0054128170013428,grad_norm: 0.9658327141662707, iteration: 160961
loss: 1.0002912282943726,grad_norm: 0.9999992044657685, iteration: 160962
loss: 1.0009346008300781,grad_norm: 0.7807647929444956, iteration: 160963
loss: 0.9910075664520264,grad_norm: 0.8823397011471292, iteration: 160964
loss: 0.9728417992591858,grad_norm: 0.9999991041125154, iteration: 160965
loss: 0.9946635365486145,grad_norm: 0.9999991810463797, iteration: 160966
loss: 0.983306884765625,grad_norm: 0.9999991446852013, iteration: 160967
loss: 1.0010862350463867,grad_norm: 0.9999991201159872, iteration: 160968
loss: 0.9951735138893127,grad_norm: 0.9999990965038797, iteration: 160969
loss: 0.9946687817573547,grad_norm: 0.9636803522799227, iteration: 160970
loss: 1.0310513973236084,grad_norm: 0.8696653373943711, iteration: 160971
loss: 1.003456950187683,grad_norm: 0.9999990221631451, iteration: 160972
loss: 0.9884976148605347,grad_norm: 0.9999990650543952, iteration: 160973
loss: 0.9855515360832214,grad_norm: 0.885295631233534, iteration: 160974
loss: 1.0291812419891357,grad_norm: 0.9789985773373006, iteration: 160975
loss: 1.015217661857605,grad_norm: 0.9999990475056227, iteration: 160976
loss: 0.968695342540741,grad_norm: 0.9999990169584494, iteration: 160977
loss: 0.9915031790733337,grad_norm: 0.9999991959885031, iteration: 160978
loss: 1.0121817588806152,grad_norm: 0.9999990550503878, iteration: 160979
loss: 1.0012832880020142,grad_norm: 0.9999993214675194, iteration: 160980
loss: 0.9917125701904297,grad_norm: 0.9999991502532288, iteration: 160981
loss: 1.0262315273284912,grad_norm: 0.9999996781724094, iteration: 160982
loss: 0.9784172773361206,grad_norm: 0.9999989924703941, iteration: 160983
loss: 0.982323408126831,grad_norm: 0.9999991761704458, iteration: 160984
loss: 1.0011907815933228,grad_norm: 0.9999991244493973, iteration: 160985
loss: 0.9718134999275208,grad_norm: 0.8939444791101089, iteration: 160986
loss: 1.0085755586624146,grad_norm: 0.894671016606856, iteration: 160987
loss: 0.9831214547157288,grad_norm: 0.9999992146080677, iteration: 160988
loss: 0.998824954032898,grad_norm: 0.9999991626866167, iteration: 160989
loss: 1.010389804840088,grad_norm: 0.9999990251693078, iteration: 160990
loss: 1.0239739418029785,grad_norm: 0.9999997388916516, iteration: 160991
loss: 1.0370289087295532,grad_norm: 0.9999989706107292, iteration: 160992
loss: 0.9922918677330017,grad_norm: 0.9999990479514691, iteration: 160993
loss: 1.0394786596298218,grad_norm: 0.9999992653351921, iteration: 160994
loss: 0.9901659488677979,grad_norm: 0.9758077860316267, iteration: 160995
loss: 1.0105462074279785,grad_norm: 0.9999991954140456, iteration: 160996
loss: 1.0051743984222412,grad_norm: 0.9999992118547874, iteration: 160997
loss: 1.0752683877944946,grad_norm: 0.9999991797077082, iteration: 160998
loss: 1.0138475894927979,grad_norm: 0.999999148335982, iteration: 160999
loss: 0.9943443536758423,grad_norm: 0.8652986938351523, iteration: 161000
loss: 0.986625611782074,grad_norm: 0.990981797147519, iteration: 161001
loss: 0.9992400407791138,grad_norm: 0.8711957611656469, iteration: 161002
loss: 1.0237622261047363,grad_norm: 0.9999994159591165, iteration: 161003
loss: 1.0043509006500244,grad_norm: 0.9999996867947365, iteration: 161004
loss: 1.0003836154937744,grad_norm: 0.999999144414442, iteration: 161005
loss: 0.9975259304046631,grad_norm: 0.9702114918545058, iteration: 161006
loss: 1.0157692432403564,grad_norm: 0.9999992991994623, iteration: 161007
loss: 0.9908214211463928,grad_norm: 0.836638875332087, iteration: 161008
loss: 1.0156737565994263,grad_norm: 0.9999991352738564, iteration: 161009
loss: 1.020150065422058,grad_norm: 0.9999991771546993, iteration: 161010
loss: 0.9890440106391907,grad_norm: 0.9999991511326911, iteration: 161011
loss: 1.012954592704773,grad_norm: 0.90902646348988, iteration: 161012
loss: 0.9977854490280151,grad_norm: 0.9999989983513542, iteration: 161013
loss: 1.057064414024353,grad_norm: 0.9999996381567025, iteration: 161014
loss: 0.9715450406074524,grad_norm: 0.9854978797702942, iteration: 161015
loss: 1.0752708911895752,grad_norm: 0.9999991719230638, iteration: 161016
loss: 0.9724557995796204,grad_norm: 0.9664054529384475, iteration: 161017
loss: 0.9788078665733337,grad_norm: 0.8957824648209028, iteration: 161018
loss: 0.9966186285018921,grad_norm: 0.9722102483294561, iteration: 161019
loss: 0.9695934653282166,grad_norm: 0.9573061332989116, iteration: 161020
loss: 0.9649650454521179,grad_norm: 0.9999992827857843, iteration: 161021
loss: 1.0359941720962524,grad_norm: 0.9999993044053315, iteration: 161022
loss: 1.0148853063583374,grad_norm: 0.999999156030797, iteration: 161023
loss: 0.9787541031837463,grad_norm: 0.8796248114228066, iteration: 161024
loss: 1.0092494487762451,grad_norm: 0.9999991013469353, iteration: 161025
loss: 0.9638789892196655,grad_norm: 0.9999990796266275, iteration: 161026
loss: 0.9993413686752319,grad_norm: 0.9999989417250615, iteration: 161027
loss: 0.9687834978103638,grad_norm: 0.999999056603027, iteration: 161028
loss: 0.977668285369873,grad_norm: 0.999999046159286, iteration: 161029
loss: 0.9958601593971252,grad_norm: 0.9999990140256139, iteration: 161030
loss: 0.9580739736557007,grad_norm: 0.999998983229849, iteration: 161031
loss: 1.0392030477523804,grad_norm: 0.9971838127951365, iteration: 161032
loss: 1.0011017322540283,grad_norm: 0.8029988584713367, iteration: 161033
loss: 0.9661109447479248,grad_norm: 0.9862865417132269, iteration: 161034
loss: 1.0113296508789062,grad_norm: 0.913739237625947, iteration: 161035
loss: 0.9983593225479126,grad_norm: 0.9972865726592541, iteration: 161036
loss: 1.0035432577133179,grad_norm: 0.9999990525770273, iteration: 161037
loss: 1.0261070728302002,grad_norm: 0.9714690741505142, iteration: 161038
loss: 1.0079890489578247,grad_norm: 0.9999990427312593, iteration: 161039
loss: 1.006567358970642,grad_norm: 0.9407341236507428, iteration: 161040
loss: 0.9888195395469666,grad_norm: 0.999999246871248, iteration: 161041
loss: 1.0076130628585815,grad_norm: 0.9406523563386677, iteration: 161042
loss: 0.9871275424957275,grad_norm: 0.9999990020785225, iteration: 161043
loss: 1.0237095355987549,grad_norm: 0.9142374343360918, iteration: 161044
loss: 0.9816067814826965,grad_norm: 0.9999991585002167, iteration: 161045
loss: 0.978283703327179,grad_norm: 0.9999990784270818, iteration: 161046
loss: 1.0187439918518066,grad_norm: 0.8603171199245057, iteration: 161047
loss: 0.979292094707489,grad_norm: 0.809992969430558, iteration: 161048
loss: 0.9909002184867859,grad_norm: 0.8633273260467166, iteration: 161049
loss: 0.976685643196106,grad_norm: 0.9770085575930512, iteration: 161050
loss: 0.9761540293693542,grad_norm: 0.9581358131563085, iteration: 161051
loss: 1.0243756771087646,grad_norm: 0.8765737183559871, iteration: 161052
loss: 1.02285635471344,grad_norm: 0.9999991195019948, iteration: 161053
loss: 1.0017024278640747,grad_norm: 0.9999991537023607, iteration: 161054
loss: 0.9960510730743408,grad_norm: 0.9737984099444261, iteration: 161055
loss: 0.9556074142456055,grad_norm: 0.943495077931356, iteration: 161056
loss: 1.0359362363815308,grad_norm: 0.9999990307872406, iteration: 161057
loss: 1.0046499967575073,grad_norm: 0.9395780335498984, iteration: 161058
loss: 1.0229779481887817,grad_norm: 0.9999990464341445, iteration: 161059
loss: 1.0295723676681519,grad_norm: 0.9999994349535599, iteration: 161060
loss: 0.9391999244689941,grad_norm: 0.9999991259164811, iteration: 161061
loss: 1.0332595109939575,grad_norm: 0.9999989631059053, iteration: 161062
loss: 1.034610629081726,grad_norm: 0.9999991611823673, iteration: 161063
loss: 1.0159507989883423,grad_norm: 0.95251328933841, iteration: 161064
loss: 1.0174857378005981,grad_norm: 0.9852440797880002, iteration: 161065
loss: 0.9937910437583923,grad_norm: 0.9518445197324442, iteration: 161066
loss: 1.0256699323654175,grad_norm: 0.9999991013150334, iteration: 161067
loss: 1.0064712762832642,grad_norm: 0.9999992400829228, iteration: 161068
loss: 1.0133137702941895,grad_norm: 0.9211387781354046, iteration: 161069
loss: 1.0233986377716064,grad_norm: 0.9999989369636206, iteration: 161070
loss: 1.0097843408584595,grad_norm: 0.9999992705095487, iteration: 161071
loss: 0.9606684446334839,grad_norm: 0.8279090807334534, iteration: 161072
loss: 1.0138657093048096,grad_norm: 0.9287733795006461, iteration: 161073
loss: 0.9665694832801819,grad_norm: 0.9702676484965104, iteration: 161074
loss: 0.9705783724784851,grad_norm: 0.8760525877426718, iteration: 161075
loss: 1.0439109802246094,grad_norm: 0.9999993063822546, iteration: 161076
loss: 1.0073720216751099,grad_norm: 0.8239426953983314, iteration: 161077
loss: 1.0124881267547607,grad_norm: 0.8749192925949761, iteration: 161078
loss: 1.0061389207839966,grad_norm: 0.9999991225904038, iteration: 161079
loss: 1.0292375087738037,grad_norm: 0.9999993189379023, iteration: 161080
loss: 0.9885855317115784,grad_norm: 0.9778089658637855, iteration: 161081
loss: 1.0063318014144897,grad_norm: 0.992826473092452, iteration: 161082
loss: 0.9888705611228943,grad_norm: 0.9999990878451052, iteration: 161083
loss: 0.981629490852356,grad_norm: 0.9999990668944126, iteration: 161084
loss: 1.0024679899215698,grad_norm: 0.999999114576826, iteration: 161085
loss: 1.020213007926941,grad_norm: 0.9999990882397669, iteration: 161086
loss: 1.0303702354431152,grad_norm: 0.8819097867409726, iteration: 161087
loss: 0.9916729927062988,grad_norm: 0.9999991525089748, iteration: 161088
loss: 0.9808909893035889,grad_norm: 0.9999993019450838, iteration: 161089
loss: 0.9656549096107483,grad_norm: 0.8972343769427876, iteration: 161090
loss: 1.0222325325012207,grad_norm: 0.9999992225442408, iteration: 161091
loss: 1.0558900833129883,grad_norm: 0.999999173790964, iteration: 161092
loss: 1.0248421430587769,grad_norm: 0.9999991966107469, iteration: 161093
loss: 1.0008807182312012,grad_norm: 0.9999992776994698, iteration: 161094
loss: 0.9991446137428284,grad_norm: 0.9999988785446103, iteration: 161095
loss: 1.0059008598327637,grad_norm: 0.9528120299172648, iteration: 161096
loss: 0.9997170567512512,grad_norm: 0.9999991530081753, iteration: 161097
loss: 0.996195912361145,grad_norm: 0.9999991104532766, iteration: 161098
loss: 0.9909864664077759,grad_norm: 0.9999990001835162, iteration: 161099
loss: 0.9801042079925537,grad_norm: 0.8509381399130848, iteration: 161100
loss: 1.0141777992248535,grad_norm: 0.9999991880182758, iteration: 161101
loss: 1.0089043378829956,grad_norm: 0.8899588268385851, iteration: 161102
loss: 0.9947206974029541,grad_norm: 0.9535542574037474, iteration: 161103
loss: 1.0008043050765991,grad_norm: 0.999999053675318, iteration: 161104
loss: 1.0070966482162476,grad_norm: 0.999999221412223, iteration: 161105
loss: 1.0013011693954468,grad_norm: 0.9999990902738399, iteration: 161106
loss: 0.9890247583389282,grad_norm: 0.9999991405814966, iteration: 161107
loss: 0.9889552593231201,grad_norm: 0.9999992790681324, iteration: 161108
loss: 0.9663693904876709,grad_norm: 0.9999991672138914, iteration: 161109
loss: 0.9751206636428833,grad_norm: 0.999999101852358, iteration: 161110
loss: 1.0183864831924438,grad_norm: 0.9999991361373965, iteration: 161111
loss: 0.9534679055213928,grad_norm: 0.8752285443246981, iteration: 161112
loss: 0.981889545917511,grad_norm: 0.9999991767270594, iteration: 161113
loss: 0.9804201722145081,grad_norm: 0.9999991523619308, iteration: 161114
loss: 0.9610944986343384,grad_norm: 0.9999990862421456, iteration: 161115
loss: 0.9689522981643677,grad_norm: 0.9999991863793001, iteration: 161116
loss: 0.99568772315979,grad_norm: 0.9617427928170286, iteration: 161117
loss: 0.9857986569404602,grad_norm: 0.8664914798218074, iteration: 161118
loss: 0.9798583984375,grad_norm: 0.9999991599175562, iteration: 161119
loss: 1.0175179243087769,grad_norm: 0.9999991672125089, iteration: 161120
loss: 1.0108447074890137,grad_norm: 0.8161304190427316, iteration: 161121
loss: 1.0283044576644897,grad_norm: 0.9999989044952441, iteration: 161122
loss: 1.0054103136062622,grad_norm: 0.9974015680387992, iteration: 161123
loss: 0.9968739748001099,grad_norm: 0.9611767219402444, iteration: 161124
loss: 0.9936495423316956,grad_norm: 0.9999992428540591, iteration: 161125
loss: 0.9763498306274414,grad_norm: 0.9999991075207, iteration: 161126
loss: 0.9868159294128418,grad_norm: 0.8350040570643792, iteration: 161127
loss: 1.0096946954727173,grad_norm: 0.9174315674791642, iteration: 161128
loss: 1.0224542617797852,grad_norm: 0.9999992020752059, iteration: 161129
loss: 0.9779307246208191,grad_norm: 0.9796536895415884, iteration: 161130
loss: 1.0136712789535522,grad_norm: 0.9398742898851093, iteration: 161131
loss: 1.005054235458374,grad_norm: 0.8687026772362372, iteration: 161132
loss: 1.0011903047561646,grad_norm: 0.9463220670343954, iteration: 161133
loss: 1.0931315422058105,grad_norm: 0.8860465210647988, iteration: 161134
loss: 0.9784117341041565,grad_norm: 0.9999992205282924, iteration: 161135
loss: 1.0101397037506104,grad_norm: 0.9999992735490356, iteration: 161136
loss: 1.002135992050171,grad_norm: 0.9999991835769553, iteration: 161137
loss: 0.9705149531364441,grad_norm: 0.999999162152986, iteration: 161138
loss: 1.003835916519165,grad_norm: 0.9999989326491985, iteration: 161139
loss: 1.0104426145553589,grad_norm: 0.8767895757984019, iteration: 161140
loss: 0.9756214618682861,grad_norm: 0.8901924587201188, iteration: 161141
loss: 1.0199404954910278,grad_norm: 0.9999991435101715, iteration: 161142
loss: 1.0128610134124756,grad_norm: 0.9785425390992545, iteration: 161143
loss: 1.0334751605987549,grad_norm: 0.999999139486511, iteration: 161144
loss: 1.060816764831543,grad_norm: 0.9999998963514822, iteration: 161145
loss: 1.0201733112335205,grad_norm: 0.8910102626788572, iteration: 161146
loss: 0.9766493439674377,grad_norm: 0.930991063827838, iteration: 161147
loss: 0.9942038655281067,grad_norm: 0.9999990199425216, iteration: 161148
loss: 0.9903024435043335,grad_norm: 0.9157491068282921, iteration: 161149
loss: 1.037887692451477,grad_norm: 0.9999991480295358, iteration: 161150
loss: 0.9949417114257812,grad_norm: 0.9975349920785581, iteration: 161151
loss: 0.9764605760574341,grad_norm: 0.9119287833134266, iteration: 161152
loss: 0.9862801432609558,grad_norm: 0.8339226873989263, iteration: 161153
loss: 1.0039714574813843,grad_norm: 0.9999992674566291, iteration: 161154
loss: 1.1014231443405151,grad_norm: 0.9999992017808647, iteration: 161155
loss: 0.9810952544212341,grad_norm: 0.9999990421986696, iteration: 161156
loss: 1.0729798078536987,grad_norm: 0.9999994317616142, iteration: 161157
loss: 1.0278706550598145,grad_norm: 0.9581945066107648, iteration: 161158
loss: 1.0322966575622559,grad_norm: 0.8476816494229472, iteration: 161159
loss: 1.0136463642120361,grad_norm: 0.8135223221176201, iteration: 161160
loss: 1.0356966257095337,grad_norm: 0.9999991795863403, iteration: 161161
loss: 1.0188078880310059,grad_norm: 0.9999990011515878, iteration: 161162
loss: 1.0122445821762085,grad_norm: 0.9999996832037421, iteration: 161163
loss: 1.003430724143982,grad_norm: 0.9999995524306291, iteration: 161164
loss: 0.9823505878448486,grad_norm: 0.7864662880851543, iteration: 161165
loss: 1.2406961917877197,grad_norm: 0.999999238056907, iteration: 161166
loss: 0.9722662568092346,grad_norm: 0.9701441427599068, iteration: 161167
loss: 0.9749581217765808,grad_norm: 0.9999993372365052, iteration: 161168
loss: 0.932649552822113,grad_norm: 0.9999992026697089, iteration: 161169
loss: 1.0053541660308838,grad_norm: 0.9999992992358796, iteration: 161170
loss: 1.0337752103805542,grad_norm: 0.9999990758141898, iteration: 161171
loss: 0.9658083319664001,grad_norm: 0.9589281239718873, iteration: 161172
loss: 0.9401643872261047,grad_norm: 0.9999991014855043, iteration: 161173
loss: 1.0297222137451172,grad_norm: 0.9999991935560988, iteration: 161174
loss: 1.005932331085205,grad_norm: 0.9999991449999196, iteration: 161175
loss: 0.9899380803108215,grad_norm: 0.9999991182457885, iteration: 161176
loss: 0.9764828085899353,grad_norm: 0.9425864641422669, iteration: 161177
loss: 0.9566717743873596,grad_norm: 0.9510773035947964, iteration: 161178
loss: 1.0209779739379883,grad_norm: 0.8618523890175238, iteration: 161179
loss: 1.0329278707504272,grad_norm: 0.975770794351926, iteration: 161180
loss: 1.0154975652694702,grad_norm: 0.999999303734888, iteration: 161181
loss: 1.0189563035964966,grad_norm: 0.9999993015186962, iteration: 161182
loss: 0.9961710572242737,grad_norm: 0.9999990318719572, iteration: 161183
loss: 0.9835754036903381,grad_norm: 0.9493359111524132, iteration: 161184
loss: 1.0078357458114624,grad_norm: 0.9589447869868215, iteration: 161185
loss: 1.0265522003173828,grad_norm: 0.9876451807165832, iteration: 161186
loss: 0.9962603449821472,grad_norm: 0.9201034195098202, iteration: 161187
loss: 0.9926394820213318,grad_norm: 0.9999991114216898, iteration: 161188
loss: 1.010031819343567,grad_norm: 0.9510091489001733, iteration: 161189
loss: 1.0482233762741089,grad_norm: 0.9999997214514741, iteration: 161190
loss: 1.005325436592102,grad_norm: 0.9408906578466917, iteration: 161191
loss: 0.9901846647262573,grad_norm: 0.9999992998137213, iteration: 161192
loss: 1.0145429372787476,grad_norm: 0.999999693155044, iteration: 161193
loss: 1.017358422279358,grad_norm: 0.9047264831225398, iteration: 161194
loss: 0.9951398372650146,grad_norm: 0.9751743314312761, iteration: 161195
loss: 1.0038483142852783,grad_norm: 0.9999990608456035, iteration: 161196
loss: 1.0441646575927734,grad_norm: 0.9786544728454486, iteration: 161197
loss: 0.9890861511230469,grad_norm: 0.9999992368720197, iteration: 161198
loss: 1.0084270238876343,grad_norm: 0.8753062484110212, iteration: 161199
loss: 0.9851972460746765,grad_norm: 0.9769668847984971, iteration: 161200
loss: 0.993299126625061,grad_norm: 0.9453006084781941, iteration: 161201
loss: 0.9902313351631165,grad_norm: 0.9999992367802205, iteration: 161202
loss: 0.9971117377281189,grad_norm: 0.9780786995447028, iteration: 161203
loss: 1.0963870286941528,grad_norm: 0.9999996659973531, iteration: 161204
loss: 1.0004589557647705,grad_norm: 0.9165434338419546, iteration: 161205
loss: 1.0500537157058716,grad_norm: 0.9241035585884885, iteration: 161206
loss: 0.9829618334770203,grad_norm: 0.9999992061301126, iteration: 161207
loss: 1.0278265476226807,grad_norm: 0.9999990504687368, iteration: 161208
loss: 0.9894877076148987,grad_norm: 0.9999992371868403, iteration: 161209
loss: 1.04425847530365,grad_norm: 0.9999995575492632, iteration: 161210
loss: 0.988589882850647,grad_norm: 0.8417960836095234, iteration: 161211
loss: 1.0040032863616943,grad_norm: 0.9999990232897213, iteration: 161212
loss: 0.9860945343971252,grad_norm: 0.9999990829545718, iteration: 161213
loss: 1.049899697303772,grad_norm: 0.9999994618605127, iteration: 161214
loss: 1.011413812637329,grad_norm: 0.9927546325347743, iteration: 161215
loss: 1.0092380046844482,grad_norm: 0.8870638888018221, iteration: 161216
loss: 0.9601276516914368,grad_norm: 0.9999990671313527, iteration: 161217
loss: 0.9889693260192871,grad_norm: 0.9853479108601406, iteration: 161218
loss: 1.0276925563812256,grad_norm: 0.9040465332614955, iteration: 161219
loss: 1.0124931335449219,grad_norm: 0.9999993454090382, iteration: 161220
loss: 1.0230087041854858,grad_norm: 0.9932903488824659, iteration: 161221
loss: 0.9829803109169006,grad_norm: 0.9999991966130127, iteration: 161222
loss: 1.0149638652801514,grad_norm: 0.9560190151228137, iteration: 161223
loss: 1.0102921724319458,grad_norm: 0.8651828612683983, iteration: 161224
loss: 1.0149953365325928,grad_norm: 0.999999030871159, iteration: 161225
loss: 1.06708824634552,grad_norm: 0.9999992098215473, iteration: 161226
loss: 1.0369406938552856,grad_norm: 0.9844590324326098, iteration: 161227
loss: 1.031266689300537,grad_norm: 0.8438634309657407, iteration: 161228
loss: 0.9827902317047119,grad_norm: 0.8378280723665384, iteration: 161229
loss: 1.0152866840362549,grad_norm: 0.866405034943137, iteration: 161230
loss: 0.9976820349693298,grad_norm: 0.999999181762886, iteration: 161231
loss: 1.0372792482376099,grad_norm: 0.9277668723229863, iteration: 161232
loss: 1.0668401718139648,grad_norm: 0.9999997188014792, iteration: 161233
loss: 0.9577059745788574,grad_norm: 0.8731106761001718, iteration: 161234
loss: 1.0238134860992432,grad_norm: 0.9999997526359606, iteration: 161235
loss: 0.9904600381851196,grad_norm: 0.9999990649799702, iteration: 161236
loss: 1.0237901210784912,grad_norm: 0.9999990217881036, iteration: 161237
loss: 1.058605670928955,grad_norm: 0.9999992454960804, iteration: 161238
loss: 0.9740611910820007,grad_norm: 0.9106057623274976, iteration: 161239
loss: 0.9728593230247498,grad_norm: 0.9999991949295749, iteration: 161240
loss: 0.996536374092102,grad_norm: 0.9999992065851949, iteration: 161241
loss: 1.0397306680679321,grad_norm: 0.9999992836137139, iteration: 161242
loss: 0.9869330525398254,grad_norm: 0.9737480072154496, iteration: 161243
loss: 0.9655800461769104,grad_norm: 0.9498656419942938, iteration: 161244
loss: 0.9963066577911377,grad_norm: 0.9999990006627633, iteration: 161245
loss: 0.9872892498970032,grad_norm: 0.9999989948196596, iteration: 161246
loss: 0.9962249994277954,grad_norm: 0.9999991537011567, iteration: 161247
loss: 1.0022944211959839,grad_norm: 0.9332028356516806, iteration: 161248
loss: 0.9901816844940186,grad_norm: 0.9311859842889076, iteration: 161249
loss: 1.0043696165084839,grad_norm: 0.9480092761872431, iteration: 161250
loss: 1.0026801824569702,grad_norm: 0.9674417969451782, iteration: 161251
loss: 0.9864850044250488,grad_norm: 0.9999990131629919, iteration: 161252
loss: 0.9776036739349365,grad_norm: 0.9999994142503927, iteration: 161253
loss: 1.0043226480484009,grad_norm: 0.8934948746907524, iteration: 161254
loss: 1.0115139484405518,grad_norm: 0.9999992311909276, iteration: 161255
loss: 1.00578773021698,grad_norm: 0.999998976850102, iteration: 161256
loss: 0.9872916340827942,grad_norm: 0.8948252411109008, iteration: 161257
loss: 0.982297956943512,grad_norm: 0.9999992929763544, iteration: 161258
loss: 1.017859697341919,grad_norm: 0.9052413469442125, iteration: 161259
loss: 0.9696900248527527,grad_norm: 0.9163725439815592, iteration: 161260
loss: 0.9981521368026733,grad_norm: 0.9999990546644953, iteration: 161261
loss: 1.0041803121566772,grad_norm: 0.9999990572554217, iteration: 161262
loss: 0.9859400391578674,grad_norm: 0.8241020534012364, iteration: 161263
loss: 1.012478232383728,grad_norm: 0.9999992311678311, iteration: 161264
loss: 1.0087890625,grad_norm: 0.9999995626868641, iteration: 161265
loss: 1.0034468173980713,grad_norm: 0.9849213063434806, iteration: 161266
loss: 0.9882621765136719,grad_norm: 0.9988287231349564, iteration: 161267
loss: 1.041140079498291,grad_norm: 0.999999455650646, iteration: 161268
loss: 1.0135362148284912,grad_norm: 0.993794365861239, iteration: 161269
loss: 0.9453212022781372,grad_norm: 0.9590391151984679, iteration: 161270
loss: 0.9974803328514099,grad_norm: 0.9655528582188205, iteration: 161271
loss: 1.015919804573059,grad_norm: 0.9717999647125336, iteration: 161272
loss: 1.0065544843673706,grad_norm: 0.9460136375050089, iteration: 161273
loss: 1.036151647567749,grad_norm: 0.9999992840916637, iteration: 161274
loss: 0.9872105121612549,grad_norm: 0.9624738111189746, iteration: 161275
loss: 1.0008108615875244,grad_norm: 0.9999990082247809, iteration: 161276
loss: 0.9940365552902222,grad_norm: 0.8193933221655311, iteration: 161277
loss: 1.0080747604370117,grad_norm: 0.9999991752564114, iteration: 161278
loss: 1.0329996347427368,grad_norm: 0.9999992816881748, iteration: 161279
loss: 0.9956666827201843,grad_norm: 0.9999993827049737, iteration: 161280
loss: 1.0041403770446777,grad_norm: 0.917650881795121, iteration: 161281
loss: 0.9581933617591858,grad_norm: 0.9999992731496984, iteration: 161282
loss: 1.0112110376358032,grad_norm: 0.9999992527842346, iteration: 161283
loss: 0.979704737663269,grad_norm: 0.9846824141635234, iteration: 161284
loss: 0.9498854875564575,grad_norm: 0.9749531207116736, iteration: 161285
loss: 1.015995740890503,grad_norm: 0.999999070057974, iteration: 161286
loss: 1.0426441431045532,grad_norm: 0.9999990966838818, iteration: 161287
loss: 1.0544618368148804,grad_norm: 0.9999990938685032, iteration: 161288
loss: 1.0753241777420044,grad_norm: 0.9999997256246935, iteration: 161289
loss: 0.9951474070549011,grad_norm: 0.8391819250988972, iteration: 161290
loss: 0.988544225692749,grad_norm: 0.9469500355103474, iteration: 161291
loss: 1.0251102447509766,grad_norm: 0.9999990799309252, iteration: 161292
loss: 0.9862356185913086,grad_norm: 0.9999996085293623, iteration: 161293
loss: 0.9870560169219971,grad_norm: 0.9999990949051544, iteration: 161294
loss: 0.9939989447593689,grad_norm: 0.9999997396441941, iteration: 161295
loss: 1.0188755989074707,grad_norm: 0.8724232599773486, iteration: 161296
loss: 0.9855329990386963,grad_norm: 0.9999990871067249, iteration: 161297
loss: 0.9911564588546753,grad_norm: 0.9999991083200529, iteration: 161298
loss: 1.0207523107528687,grad_norm: 0.951529816759343, iteration: 161299
loss: 1.0082803964614868,grad_norm: 0.8600435011610207, iteration: 161300
loss: 1.0054810047149658,grad_norm: 0.9999991330509254, iteration: 161301
loss: 0.9773844480514526,grad_norm: 0.9999991300235848, iteration: 161302
loss: 0.9794699549674988,grad_norm: 0.9999993191107985, iteration: 161303
loss: 1.0082823038101196,grad_norm: 0.9999991735958378, iteration: 161304
loss: 0.9807748198509216,grad_norm: 0.9378373527966419, iteration: 161305
loss: 0.9970147013664246,grad_norm: 0.9999992105887708, iteration: 161306
loss: 1.02727210521698,grad_norm: 0.9032175598980388, iteration: 161307
loss: 1.024052381515503,grad_norm: 0.9207512571499439, iteration: 161308
loss: 1.0222132205963135,grad_norm: 0.9999993171882426, iteration: 161309
loss: 0.9814771413803101,grad_norm: 0.9215804256832758, iteration: 161310
loss: 0.9964558482170105,grad_norm: 0.9999997405371117, iteration: 161311
loss: 1.0260708332061768,grad_norm: 0.991846601806469, iteration: 161312
loss: 0.9997735023498535,grad_norm: 0.9999991469576412, iteration: 161313
loss: 0.9526373744010925,grad_norm: 0.9999990754475, iteration: 161314
loss: 1.0169174671173096,grad_norm: 0.9999990876903563, iteration: 161315
loss: 1.0562689304351807,grad_norm: 0.9733530974088496, iteration: 161316
loss: 1.0111445188522339,grad_norm: 0.999999111976657, iteration: 161317
loss: 1.0192234516143799,grad_norm: 0.8871466661351025, iteration: 161318
loss: 1.0015552043914795,grad_norm: 0.9893506345037183, iteration: 161319
loss: 0.9867166876792908,grad_norm: 0.9999991470215532, iteration: 161320
loss: 1.0000734329223633,grad_norm: 0.9674436784921107, iteration: 161321
loss: 1.0232645273208618,grad_norm: 0.9999990613512286, iteration: 161322
loss: 0.9880508780479431,grad_norm: 0.8679915572240805, iteration: 161323
loss: 0.9989057779312134,grad_norm: 0.8522808349901183, iteration: 161324
loss: 1.0261321067810059,grad_norm: 0.9999989788788556, iteration: 161325
loss: 0.9887629747390747,grad_norm: 0.9999992275228802, iteration: 161326
loss: 1.0160478353500366,grad_norm: 0.9999990470892434, iteration: 161327
loss: 0.9821323752403259,grad_norm: 0.9999992006871439, iteration: 161328
loss: 0.9957082271575928,grad_norm: 0.9999990795889965, iteration: 161329
loss: 0.9807500839233398,grad_norm: 0.9999991689067893, iteration: 161330
loss: 1.0115121603012085,grad_norm: 0.9999991529654363, iteration: 161331
loss: 1.0217102766036987,grad_norm: 0.8551468129635731, iteration: 161332
loss: 0.9777280688285828,grad_norm: 0.9999992796810168, iteration: 161333
loss: 0.9880785346031189,grad_norm: 0.9999992143821304, iteration: 161334
loss: 1.0084080696105957,grad_norm: 0.941328071693518, iteration: 161335
loss: 1.019601821899414,grad_norm: 0.9999991444620535, iteration: 161336
loss: 1.0140161514282227,grad_norm: 0.99999917492921, iteration: 161337
loss: 0.9921346306800842,grad_norm: 0.9999993861355634, iteration: 161338
loss: 0.994925856590271,grad_norm: 0.9999990502033566, iteration: 161339
loss: 1.0178560018539429,grad_norm: 0.999999034761779, iteration: 161340
loss: 1.0047690868377686,grad_norm: 0.999999164398683, iteration: 161341
loss: 1.0201698541641235,grad_norm: 0.9462291118973263, iteration: 161342
loss: 0.9571650624275208,grad_norm: 0.9850887640809954, iteration: 161343
loss: 1.0104970932006836,grad_norm: 0.950581086996116, iteration: 161344
loss: 1.010064721107483,grad_norm: 0.9999991431477692, iteration: 161345
loss: 0.99601149559021,grad_norm: 0.9971557531841211, iteration: 161346
loss: 1.007617712020874,grad_norm: 0.8487532919285733, iteration: 161347
loss: 1.0047506093978882,grad_norm: 0.8718863848943326, iteration: 161348
loss: 1.003159523010254,grad_norm: 0.9634763245465567, iteration: 161349
loss: 1.0217679738998413,grad_norm: 0.9444203186859529, iteration: 161350
loss: 0.9760192632675171,grad_norm: 0.9357104113452276, iteration: 161351
loss: 0.9915532469749451,grad_norm: 0.9999989707355668, iteration: 161352
loss: 0.9781029224395752,grad_norm: 0.9999990840478453, iteration: 161353
loss: 0.9839168787002563,grad_norm: 0.9999991190220858, iteration: 161354
loss: 1.0127266645431519,grad_norm: 0.9999990189804124, iteration: 161355
loss: 1.0079660415649414,grad_norm: 0.9999991475265735, iteration: 161356
loss: 1.0090335607528687,grad_norm: 0.9999990109458671, iteration: 161357
loss: 1.0152826309204102,grad_norm: 0.8917747068397301, iteration: 161358
loss: 0.9494088292121887,grad_norm: 0.9999990172586445, iteration: 161359
loss: 0.9582110047340393,grad_norm: 0.8409867718763717, iteration: 161360
loss: 1.030909776687622,grad_norm: 0.9999991604793548, iteration: 161361
loss: 0.9899395108222961,grad_norm: 0.957550846306161, iteration: 161362
loss: 1.023759365081787,grad_norm: 0.8861614343638226, iteration: 161363
loss: 1.0189372301101685,grad_norm: 0.8626099467716378, iteration: 161364
loss: 0.9959818720817566,grad_norm: 0.9999990373739193, iteration: 161365
loss: 0.9951598048210144,grad_norm: 0.9082123086391719, iteration: 161366
loss: 1.0109812021255493,grad_norm: 0.8906219323973522, iteration: 161367
loss: 0.9803500771522522,grad_norm: 0.9999991244654528, iteration: 161368
loss: 1.0146151781082153,grad_norm: 0.9999988210681107, iteration: 161369
loss: 0.9959234595298767,grad_norm: 0.9999990716535597, iteration: 161370
loss: 0.9931545257568359,grad_norm: 0.9999989783251446, iteration: 161371
loss: 0.9774811863899231,grad_norm: 0.9999989857124867, iteration: 161372
loss: 0.9792890548706055,grad_norm: 0.9676123307474973, iteration: 161373
loss: 1.0047718286514282,grad_norm: 0.8949764525663733, iteration: 161374
loss: 0.9808841347694397,grad_norm: 0.9999991134567434, iteration: 161375
loss: 1.0054327249526978,grad_norm: 0.9999990223063904, iteration: 161376
loss: 0.9823649525642395,grad_norm: 0.9500763503137301, iteration: 161377
loss: 0.9690316915512085,grad_norm: 0.858064127468415, iteration: 161378
loss: 0.991799533367157,grad_norm: 0.9999990688234751, iteration: 161379
loss: 0.9994921088218689,grad_norm: 0.9999989958101068, iteration: 161380
loss: 1.0081695318222046,grad_norm: 0.999999101661116, iteration: 161381
loss: 1.0376062393188477,grad_norm: 0.9285422992384789, iteration: 161382
loss: 1.0403636693954468,grad_norm: 0.8842893727901358, iteration: 161383
loss: 0.9780747890472412,grad_norm: 0.9999991819665202, iteration: 161384
loss: 0.9815806150436401,grad_norm: 0.9098733941430792, iteration: 161385
loss: 1.0269917249679565,grad_norm: 0.9510593446868865, iteration: 161386
loss: 1.0158491134643555,grad_norm: 0.904357530697628, iteration: 161387
loss: 1.0153228044509888,grad_norm: 0.9648522625133397, iteration: 161388
loss: 0.9858418107032776,grad_norm: 0.9999992372299844, iteration: 161389
loss: 0.9930574297904968,grad_norm: 0.9666119985471573, iteration: 161390
loss: 1.013880729675293,grad_norm: 0.9999989749394604, iteration: 161391
loss: 1.0080389976501465,grad_norm: 0.9999996016727369, iteration: 161392
loss: 0.9910561442375183,grad_norm: 0.9999991864089071, iteration: 161393
loss: 1.0065672397613525,grad_norm: 0.9340050345845339, iteration: 161394
loss: 0.9719394445419312,grad_norm: 0.9999992519272614, iteration: 161395
loss: 1.0167551040649414,grad_norm: 0.9384255797369297, iteration: 161396
loss: 1.1819889545440674,grad_norm: 0.9999990799857155, iteration: 161397
loss: 0.9926314949989319,grad_norm: 0.9154853154752816, iteration: 161398
loss: 1.033786416053772,grad_norm: 0.9999991574939686, iteration: 161399
loss: 0.9921145439147949,grad_norm: 0.9928869756573238, iteration: 161400
loss: 0.9925689697265625,grad_norm: 0.9014203030217813, iteration: 161401
loss: 0.9813882112503052,grad_norm: 0.929210841112791, iteration: 161402
loss: 0.9469617009162903,grad_norm: 0.9999989728411225, iteration: 161403
loss: 1.0045477151870728,grad_norm: 0.9325754766423339, iteration: 161404
loss: 0.9903815984725952,grad_norm: 0.9999991311111206, iteration: 161405
loss: 0.9918315410614014,grad_norm: 0.999999073217038, iteration: 161406
loss: 1.0085186958312988,grad_norm: 0.9999990602255087, iteration: 161407
loss: 0.9902215003967285,grad_norm: 0.9732048641782712, iteration: 161408
loss: 1.0018346309661865,grad_norm: 0.9999990545026912, iteration: 161409
loss: 1.1026175022125244,grad_norm: 0.9999990165286868, iteration: 161410
loss: 1.0048587322235107,grad_norm: 0.9999991413844137, iteration: 161411
loss: 1.0053611993789673,grad_norm: 0.9999991650925039, iteration: 161412
loss: 0.9957985877990723,grad_norm: 0.9717612366789513, iteration: 161413
loss: 0.9566017389297485,grad_norm: 0.9999991312503559, iteration: 161414
loss: 0.9986291527748108,grad_norm: 0.9281377381536576, iteration: 161415
loss: 0.985492467880249,grad_norm: 0.9568552428026718, iteration: 161416
loss: 0.9936041831970215,grad_norm: 0.8576974879810035, iteration: 161417
loss: 1.0140674114227295,grad_norm: 0.8239100719944694, iteration: 161418
loss: 1.0078831911087036,grad_norm: 0.948655427473463, iteration: 161419
loss: 0.999442458152771,grad_norm: 0.9113127760477591, iteration: 161420
loss: 0.9716808199882507,grad_norm: 0.999999037979985, iteration: 161421
loss: 0.9742814898490906,grad_norm: 0.7537300962034086, iteration: 161422
loss: 1.0241618156433105,grad_norm: 0.9999995050431926, iteration: 161423
loss: 0.9953543543815613,grad_norm: 0.999998923437445, iteration: 161424
loss: 1.0245975255966187,grad_norm: 0.9999991419223498, iteration: 161425
loss: 1.0107892751693726,grad_norm: 0.9999990415361915, iteration: 161426
loss: 1.0067682266235352,grad_norm: 0.9366623688336893, iteration: 161427
loss: 1.0257911682128906,grad_norm: 0.9888839878936023, iteration: 161428
loss: 1.0036520957946777,grad_norm: 0.8417943315699054, iteration: 161429
loss: 1.0117034912109375,grad_norm: 0.9272375964712273, iteration: 161430
loss: 0.9877780079841614,grad_norm: 0.9999990983243583, iteration: 161431
loss: 1.0040379762649536,grad_norm: 0.999999028607019, iteration: 161432
loss: 1.0243475437164307,grad_norm: 0.975237902278545, iteration: 161433
loss: 1.0248985290527344,grad_norm: 0.9999993453542806, iteration: 161434
loss: 0.987518310546875,grad_norm: 0.9999990856065368, iteration: 161435
loss: 1.0306001901626587,grad_norm: 0.9999991006655737, iteration: 161436
loss: 0.9730021953582764,grad_norm: 0.9999990602284793, iteration: 161437
loss: 1.0184011459350586,grad_norm: 0.9999992793783183, iteration: 161438
loss: 1.0084538459777832,grad_norm: 0.9595923039147499, iteration: 161439
loss: 1.0440630912780762,grad_norm: 0.9999991546921815, iteration: 161440
loss: 1.012650489807129,grad_norm: 0.9539563946665189, iteration: 161441
loss: 0.9788236618041992,grad_norm: 0.8347176754921822, iteration: 161442
loss: 0.9911324381828308,grad_norm: 0.9999992096285554, iteration: 161443
loss: 0.985293984413147,grad_norm: 0.9999989170935656, iteration: 161444
loss: 1.0378046035766602,grad_norm: 0.9988377795759602, iteration: 161445
loss: 1.0226223468780518,grad_norm: 0.9999990420186254, iteration: 161446
loss: 0.9961340427398682,grad_norm: 0.8075744333318694, iteration: 161447
loss: 1.0159549713134766,grad_norm: 0.9999991577739646, iteration: 161448
loss: 0.9813473224639893,grad_norm: 0.9999990758833962, iteration: 161449
loss: 1.0459129810333252,grad_norm: 0.9999991212784745, iteration: 161450
loss: 1.00706946849823,grad_norm: 0.9999991163111562, iteration: 161451
loss: 1.006673812866211,grad_norm: 0.9999989681377044, iteration: 161452
loss: 1.0069029331207275,grad_norm: 0.8988405567438033, iteration: 161453
loss: 1.0154259204864502,grad_norm: 0.9999997038187649, iteration: 161454
loss: 1.0214818716049194,grad_norm: 0.9476041208283914, iteration: 161455
loss: 0.9876808524131775,grad_norm: 0.9515247365206776, iteration: 161456
loss: 1.005935549736023,grad_norm: 0.9999991176005754, iteration: 161457
loss: 0.9975394606590271,grad_norm: 0.944341928900981, iteration: 161458
loss: 1.0072835683822632,grad_norm: 0.9999991357672229, iteration: 161459
loss: 1.0063209533691406,grad_norm: 0.958575420227393, iteration: 161460
loss: 1.0141947269439697,grad_norm: 0.9999999873022903, iteration: 161461
loss: 0.9803241491317749,grad_norm: 0.9429334917794244, iteration: 161462
loss: 0.9880056381225586,grad_norm: 0.9999990815609727, iteration: 161463
loss: 1.025688886642456,grad_norm: 0.9999991314403616, iteration: 161464
loss: 1.0093777179718018,grad_norm: 0.9590748985726767, iteration: 161465
loss: 0.9869512915611267,grad_norm: 0.9999992239806842, iteration: 161466
loss: 1.0075823068618774,grad_norm: 0.9999992397239147, iteration: 161467
loss: 0.951502799987793,grad_norm: 0.9999993846366596, iteration: 161468
loss: 1.006332516670227,grad_norm: 0.9467194952325074, iteration: 161469
loss: 0.9766663908958435,grad_norm: 0.9999989618462587, iteration: 161470
loss: 0.9971054196357727,grad_norm: 0.9974193273191568, iteration: 161471
loss: 0.9915947914123535,grad_norm: 0.9141795640175748, iteration: 161472
loss: 1.0094704627990723,grad_norm: 0.999999091047057, iteration: 161473
loss: 1.0068994760513306,grad_norm: 0.7038269895887407, iteration: 161474
loss: 0.9892695546150208,grad_norm: 0.9999991463248287, iteration: 161475
loss: 1.0028512477874756,grad_norm: 0.9999989915480586, iteration: 161476
loss: 0.9691938757896423,grad_norm: 0.9281225608428159, iteration: 161477
loss: 1.016861915588379,grad_norm: 0.9999991802663756, iteration: 161478
loss: 1.0047391653060913,grad_norm: 0.977468446653307, iteration: 161479
loss: 1.0023778676986694,grad_norm: 0.9773663063916735, iteration: 161480
loss: 0.9643853306770325,grad_norm: 0.9864515873736893, iteration: 161481
loss: 1.1182610988616943,grad_norm: 0.9999994448196958, iteration: 161482
loss: 0.9820271134376526,grad_norm: 0.9999992032363234, iteration: 161483
loss: 1.036844253540039,grad_norm: 0.9999991810850916, iteration: 161484
loss: 0.9745988845825195,grad_norm: 0.9999989326168269, iteration: 161485
loss: 0.999979555606842,grad_norm: 0.9999991479678351, iteration: 161486
loss: 1.0263519287109375,grad_norm: 0.9982621786058389, iteration: 161487
loss: 0.9966678023338318,grad_norm: 0.9999991911742604, iteration: 161488
loss: 1.020380973815918,grad_norm: 0.9730447390946362, iteration: 161489
loss: 0.9548457264900208,grad_norm: 0.9999992003707608, iteration: 161490
loss: 0.9529732465744019,grad_norm: 0.9879054076771835, iteration: 161491
loss: 0.9946004748344421,grad_norm: 0.9141419668670192, iteration: 161492
loss: 0.9899283647537231,grad_norm: 0.9999991444076772, iteration: 161493
loss: 1.0090144872665405,grad_norm: 0.9999991843302167, iteration: 161494
loss: 0.9794688820838928,grad_norm: 0.9068035144137774, iteration: 161495
loss: 1.010451078414917,grad_norm: 0.9897643794988797, iteration: 161496
loss: 0.982696533203125,grad_norm: 0.9945494193687249, iteration: 161497
loss: 0.9370794892311096,grad_norm: 0.8948582995733949, iteration: 161498
loss: 1.0022294521331787,grad_norm: 0.8882872776010831, iteration: 161499
loss: 1.033897876739502,grad_norm: 0.9999992654551759, iteration: 161500
loss: 0.9786906838417053,grad_norm: 0.9736780289232357, iteration: 161501
loss: 1.0158593654632568,grad_norm: 0.9976912168263181, iteration: 161502
loss: 0.9935154318809509,grad_norm: 0.9310506185555286, iteration: 161503
loss: 0.9971131086349487,grad_norm: 0.9837091603722354, iteration: 161504
loss: 1.0017859935760498,grad_norm: 0.9751292179593213, iteration: 161505
loss: 0.9639625549316406,grad_norm: 0.9999991552382694, iteration: 161506
loss: 0.9858475923538208,grad_norm: 0.9270823363828862, iteration: 161507
loss: 0.9499046206474304,grad_norm: 0.9999991082520765, iteration: 161508
loss: 1.0113881826400757,grad_norm: 0.9999991771721769, iteration: 161509
loss: 0.9906780123710632,grad_norm: 0.9999990883764497, iteration: 161510
loss: 0.9864677786827087,grad_norm: 0.9999990406916994, iteration: 161511
loss: 0.9772236347198486,grad_norm: 0.999999173798247, iteration: 161512
loss: 1.0245064496994019,grad_norm: 0.9419852258934304, iteration: 161513
loss: 1.0293458700180054,grad_norm: 0.8156956233799608, iteration: 161514
loss: 1.0101978778839111,grad_norm: 0.9999992317724602, iteration: 161515
loss: 1.0121334791183472,grad_norm: 0.9999991117723275, iteration: 161516
loss: 1.0055639743804932,grad_norm: 0.9999990967934089, iteration: 161517
loss: 1.0042275190353394,grad_norm: 0.9372908114150723, iteration: 161518
loss: 0.9788383841514587,grad_norm: 0.9571755630099404, iteration: 161519
loss: 0.9819985032081604,grad_norm: 0.9844803790254647, iteration: 161520
loss: 0.9632322788238525,grad_norm: 0.9999991431198006, iteration: 161521
loss: 1.0089396238327026,grad_norm: 0.9818045900815199, iteration: 161522
loss: 0.9923555850982666,grad_norm: 0.8402915827598552, iteration: 161523
loss: 1.008950114250183,grad_norm: 0.9926465780959053, iteration: 161524
loss: 1.0006452798843384,grad_norm: 0.9999991812308009, iteration: 161525
loss: 1.0493117570877075,grad_norm: 0.9999990180196947, iteration: 161526
loss: 1.008101224899292,grad_norm: 0.9887000922373166, iteration: 161527
loss: 0.9699856042861938,grad_norm: 0.9939492359533555, iteration: 161528
loss: 1.0163359642028809,grad_norm: 0.9970945658114132, iteration: 161529
loss: 1.0083318948745728,grad_norm: 0.9999993474317584, iteration: 161530
loss: 0.9969022274017334,grad_norm: 0.9754135027396327, iteration: 161531
loss: 0.9764186143875122,grad_norm: 0.999999067828908, iteration: 161532
loss: 1.0468040704727173,grad_norm: 0.9460790254897683, iteration: 161533
loss: 1.00077486038208,grad_norm: 0.9999992947070615, iteration: 161534
loss: 1.1027745008468628,grad_norm: 0.9999992877767336, iteration: 161535
loss: 1.0051859617233276,grad_norm: 0.999999196570985, iteration: 161536
loss: 0.9774536490440369,grad_norm: 0.8846154943669448, iteration: 161537
loss: 1.0347408056259155,grad_norm: 0.9999990797362266, iteration: 161538
loss: 0.9452869892120361,grad_norm: 0.9999989468269651, iteration: 161539
loss: 0.983070969581604,grad_norm: 0.9999991418544801, iteration: 161540
loss: 1.005428433418274,grad_norm: 0.797434654138214, iteration: 161541
loss: 1.0482932329177856,grad_norm: 0.8422935022684883, iteration: 161542
loss: 1.002636194229126,grad_norm: 0.9999990737609337, iteration: 161543
loss: 1.0299830436706543,grad_norm: 0.8511756432436127, iteration: 161544
loss: 1.0121235847473145,grad_norm: 0.9999990691829133, iteration: 161545
loss: 1.010655403137207,grad_norm: 0.9999993305714592, iteration: 161546
loss: 0.9785393476486206,grad_norm: 0.9744172863620015, iteration: 161547
loss: 0.9569336175918579,grad_norm: 0.8765013938172651, iteration: 161548
loss: 1.005053162574768,grad_norm: 0.9963963481792991, iteration: 161549
loss: 1.0924681425094604,grad_norm: 0.9999990785393347, iteration: 161550
loss: 1.0012917518615723,grad_norm: 0.9070454367875965, iteration: 161551
loss: 1.0016885995864868,grad_norm: 0.9999990299651212, iteration: 161552
loss: 0.9948369264602661,grad_norm: 0.9182618928504614, iteration: 161553
loss: 0.9681349396705627,grad_norm: 0.9999991555077175, iteration: 161554
loss: 0.9954886436462402,grad_norm: 0.9999991192869981, iteration: 161555
loss: 0.9651476740837097,grad_norm: 0.9210029869280283, iteration: 161556
loss: 1.0680559873580933,grad_norm: 0.9999993208685076, iteration: 161557
loss: 1.0128090381622314,grad_norm: 0.9999990845652302, iteration: 161558
loss: 0.9948757290840149,grad_norm: 0.9999989809777073, iteration: 161559
loss: 1.0060986280441284,grad_norm: 0.9998703312140093, iteration: 161560
loss: 0.9456223845481873,grad_norm: 0.9999991578140967, iteration: 161561
loss: 1.0269808769226074,grad_norm: 0.9999990744720649, iteration: 161562
loss: 0.9819498062133789,grad_norm: 0.9999992939456754, iteration: 161563
loss: 0.9474571943283081,grad_norm: 0.9294153071067086, iteration: 161564
loss: 1.0382264852523804,grad_norm: 0.878268450694027, iteration: 161565
loss: 1.0103821754455566,grad_norm: 0.9999989759462014, iteration: 161566
loss: 1.046044111251831,grad_norm: 0.8671968438192316, iteration: 161567
loss: 1.006373643875122,grad_norm: 0.9854666829408639, iteration: 161568
loss: 0.97585529088974,grad_norm: 0.9929151597246474, iteration: 161569
loss: 1.0548683404922485,grad_norm: 0.9999992373510705, iteration: 161570
loss: 0.9638503789901733,grad_norm: 0.9999991493794941, iteration: 161571
loss: 1.0100466012954712,grad_norm: 0.9999992217354292, iteration: 161572
loss: 1.013263463973999,grad_norm: 0.9999990042732494, iteration: 161573
loss: 1.0291802883148193,grad_norm: 0.9999992092182697, iteration: 161574
loss: 0.9857726693153381,grad_norm: 0.999999271952009, iteration: 161575
loss: 1.0186213254928589,grad_norm: 0.9999990934825568, iteration: 161576
loss: 1.0119227170944214,grad_norm: 0.9623212252018849, iteration: 161577
loss: 1.0111068487167358,grad_norm: 0.9999993758138972, iteration: 161578
loss: 1.0200036764144897,grad_norm: 0.9999990879104036, iteration: 161579
loss: 0.9896568655967712,grad_norm: 0.9999991469917787, iteration: 161580
loss: 0.9896646738052368,grad_norm: 0.946308975057355, iteration: 161581
loss: 0.9408230185508728,grad_norm: 0.9999989762891971, iteration: 161582
loss: 0.9889195561408997,grad_norm: 0.9091136245952207, iteration: 161583
loss: 0.9990753531455994,grad_norm: 0.9709192848840384, iteration: 161584
loss: 1.0593818426132202,grad_norm: 0.9999997516647613, iteration: 161585
loss: 1.004919171333313,grad_norm: 0.9471108821115924, iteration: 161586
loss: 0.9946333169937134,grad_norm: 0.9999990998175214, iteration: 161587
loss: 0.9885576367378235,grad_norm: 0.8653633016170925, iteration: 161588
loss: 1.02433180809021,grad_norm: 0.9013866208824586, iteration: 161589
loss: 1.0033217668533325,grad_norm: 0.9999991153516465, iteration: 161590
loss: 0.9867550730705261,grad_norm: 0.943955497676359, iteration: 161591
loss: 1.010669469833374,grad_norm: 0.8923860215489239, iteration: 161592
loss: 0.9864237308502197,grad_norm: 0.9918106502421791, iteration: 161593
loss: 0.9645886421203613,grad_norm: 0.9999996572141164, iteration: 161594
loss: 1.0112993717193604,grad_norm: 0.941741315441737, iteration: 161595
loss: 0.9651861786842346,grad_norm: 0.9999991256189152, iteration: 161596
loss: 0.9558312296867371,grad_norm: 0.9213453254013599, iteration: 161597
loss: 1.0227961540222168,grad_norm: 0.8450221268354131, iteration: 161598
loss: 0.9964138269424438,grad_norm: 0.999999055903696, iteration: 161599
loss: 1.011870265007019,grad_norm: 0.9310847894066431, iteration: 161600
loss: 0.9990477561950684,grad_norm: 0.9999991738551778, iteration: 161601
loss: 0.9822702407836914,grad_norm: 0.9999990854090456, iteration: 161602
loss: 1.0334018468856812,grad_norm: 0.8865086294019509, iteration: 161603
loss: 0.9830065369606018,grad_norm: 0.9999991347716986, iteration: 161604
loss: 0.9810084104537964,grad_norm: 0.9999991660492248, iteration: 161605
loss: 1.0483708381652832,grad_norm: 0.9517455508701743, iteration: 161606
loss: 1.0299667119979858,grad_norm: 0.9999991319868715, iteration: 161607
loss: 0.9573985934257507,grad_norm: 0.9755875232069696, iteration: 161608
loss: 1.0126867294311523,grad_norm: 0.9999992143574868, iteration: 161609
loss: 1.016984462738037,grad_norm: 0.9721004878354251, iteration: 161610
loss: 0.9718782305717468,grad_norm: 0.9999990583356736, iteration: 161611
loss: 1.0216351747512817,grad_norm: 0.9999992472792486, iteration: 161612
loss: 0.9823716878890991,grad_norm: 0.9395159249882318, iteration: 161613
loss: 0.9905908703804016,grad_norm: 0.9999990682384614, iteration: 161614
loss: 1.0144929885864258,grad_norm: 0.9468373942932194, iteration: 161615
loss: 0.9551067352294922,grad_norm: 0.9999991134496848, iteration: 161616
loss: 1.0056184530258179,grad_norm: 0.9272121318194702, iteration: 161617
loss: 1.008622407913208,grad_norm: 0.9324564352584278, iteration: 161618
loss: 1.0116664171218872,grad_norm: 0.9999991122127587, iteration: 161619
loss: 1.014772891998291,grad_norm: 0.9999991328591409, iteration: 161620
loss: 0.9878029823303223,grad_norm: 0.9999991387207093, iteration: 161621
loss: 0.9999366998672485,grad_norm: 0.9999990643187653, iteration: 161622
loss: 1.0207992792129517,grad_norm: 0.9746346523818867, iteration: 161623
loss: 1.009539008140564,grad_norm: 0.9910417964534907, iteration: 161624
loss: 1.0120830535888672,grad_norm: 0.994216677115769, iteration: 161625
loss: 1.0052940845489502,grad_norm: 0.8914734560005145, iteration: 161626
loss: 0.9903768301010132,grad_norm: 0.8644622728259947, iteration: 161627
loss: 0.9544206857681274,grad_norm: 0.9479358947058865, iteration: 161628
loss: 1.0145199298858643,grad_norm: 0.9999992984989982, iteration: 161629
loss: 0.9951880574226379,grad_norm: 0.9999990217579788, iteration: 161630
loss: 1.0110869407653809,grad_norm: 0.9842393425746071, iteration: 161631
loss: 1.019261121749878,grad_norm: 0.9808659983862423, iteration: 161632
loss: 1.0469642877578735,grad_norm: 0.9999994009196229, iteration: 161633
loss: 0.9957298636436462,grad_norm: 0.999999190479986, iteration: 161634
loss: 0.978289783000946,grad_norm: 0.8138108046248602, iteration: 161635
loss: 0.9979842901229858,grad_norm: 0.8752816671111618, iteration: 161636
loss: 1.0035163164138794,grad_norm: 0.9999991310639832, iteration: 161637
loss: 0.9823513627052307,grad_norm: 0.9999991515799254, iteration: 161638
loss: 0.9886703491210938,grad_norm: 0.9801984868932683, iteration: 161639
loss: 0.9946174025535583,grad_norm: 0.999999110638201, iteration: 161640
loss: 0.9944939613342285,grad_norm: 0.8591158098557572, iteration: 161641
loss: 0.9836851954460144,grad_norm: 0.9241822953554326, iteration: 161642
loss: 0.9799628853797913,grad_norm: 0.9999991652136436, iteration: 161643
loss: 0.9938992261886597,grad_norm: 0.944956447628692, iteration: 161644
loss: 0.9932746291160583,grad_norm: 0.8807731701788639, iteration: 161645
loss: 1.0084047317504883,grad_norm: 0.92021465258508, iteration: 161646
loss: 1.0013445615768433,grad_norm: 0.9999993056365393, iteration: 161647
loss: 1.0097088813781738,grad_norm: 0.831127455115227, iteration: 161648
loss: 1.0260207653045654,grad_norm: 0.9999990507051819, iteration: 161649
loss: 1.0225508213043213,grad_norm: 0.9999990006390378, iteration: 161650
loss: 1.033971905708313,grad_norm: 0.888894722432951, iteration: 161651
loss: 0.9908853769302368,grad_norm: 0.944991397548214, iteration: 161652
loss: 0.985977828502655,grad_norm: 0.9390032076834199, iteration: 161653
loss: 0.9886420369148254,grad_norm: 0.9854389381137297, iteration: 161654
loss: 1.0337178707122803,grad_norm: 0.9988932549904026, iteration: 161655
loss: 1.0727993249893188,grad_norm: 0.9999991274733551, iteration: 161656
loss: 0.9862773418426514,grad_norm: 0.845723600613256, iteration: 161657
loss: 0.9856998920440674,grad_norm: 0.8624326222689915, iteration: 161658
loss: 0.9963940382003784,grad_norm: 0.9525753170748545, iteration: 161659
loss: 0.9814452528953552,grad_norm: 0.9999990285337313, iteration: 161660
loss: 0.9668601751327515,grad_norm: 0.9999991450636323, iteration: 161661
loss: 1.0077009201049805,grad_norm: 0.9162556930965612, iteration: 161662
loss: 0.977985680103302,grad_norm: 0.9061996973155526, iteration: 161663
loss: 1.0155068635940552,grad_norm: 0.9999991221157843, iteration: 161664
loss: 1.0214548110961914,grad_norm: 0.9496306193544988, iteration: 161665
loss: 0.9991084337234497,grad_norm: 0.9999992385476255, iteration: 161666
loss: 0.9969368577003479,grad_norm: 0.999999112074673, iteration: 161667
loss: 0.9965164065361023,grad_norm: 0.9775592202601011, iteration: 161668
loss: 1.001052975654602,grad_norm: 0.9495641915426228, iteration: 161669
loss: 1.0124149322509766,grad_norm: 0.9260571084638795, iteration: 161670
loss: 0.9506298303604126,grad_norm: 0.9999991884990013, iteration: 161671
loss: 1.0003420114517212,grad_norm: 0.9999992106764971, iteration: 161672
loss: 0.9744443893432617,grad_norm: 0.9999991719725939, iteration: 161673
loss: 0.9935977458953857,grad_norm: 0.9999990760234065, iteration: 161674
loss: 1.0354045629501343,grad_norm: 0.9939845331559887, iteration: 161675
loss: 1.0004749298095703,grad_norm: 0.8468947378371329, iteration: 161676
loss: 0.9807588458061218,grad_norm: 0.9999989628976141, iteration: 161677
loss: 1.0048977136611938,grad_norm: 0.9612555037985165, iteration: 161678
loss: 1.0048719644546509,grad_norm: 0.9999992184611777, iteration: 161679
loss: 1.0209176540374756,grad_norm: 0.9999991541356265, iteration: 161680
loss: 0.9761892557144165,grad_norm: 0.9999990513472309, iteration: 161681
loss: 0.9916037917137146,grad_norm: 0.9999989958046652, iteration: 161682
loss: 1.0029851198196411,grad_norm: 0.999999243430685, iteration: 161683
loss: 1.0043374300003052,grad_norm: 0.9280619370517872, iteration: 161684
loss: 0.9716437458992004,grad_norm: 0.9682149398728465, iteration: 161685
loss: 0.9600539803504944,grad_norm: 0.9594383258541942, iteration: 161686
loss: 1.0023552179336548,grad_norm: 0.8605130485104012, iteration: 161687
loss: 1.025240421295166,grad_norm: 0.9999990700956168, iteration: 161688
loss: 1.0306613445281982,grad_norm: 0.9999992920613618, iteration: 161689
loss: 0.995926022529602,grad_norm: 0.999999034909057, iteration: 161690
loss: 0.9694932103157043,grad_norm: 0.9999991864573097, iteration: 161691
loss: 0.9882268309593201,grad_norm: 0.8319990252992331, iteration: 161692
loss: 1.0023881196975708,grad_norm: 0.9999991505275089, iteration: 161693
loss: 1.0133956670761108,grad_norm: 0.9999990957985053, iteration: 161694
loss: 1.0519325733184814,grad_norm: 0.9999992207639172, iteration: 161695
loss: 1.0006316900253296,grad_norm: 0.9999990595313085, iteration: 161696
loss: 0.9813280701637268,grad_norm: 0.9999991885093016, iteration: 161697
loss: 0.983325183391571,grad_norm: 0.938731632253859, iteration: 161698
loss: 1.0274949073791504,grad_norm: 0.8790973557931556, iteration: 161699
loss: 1.0008902549743652,grad_norm: 0.9800517281404196, iteration: 161700
loss: 0.9934980273246765,grad_norm: 0.9999991779680935, iteration: 161701
loss: 0.9535987973213196,grad_norm: 0.8545919277100958, iteration: 161702
loss: 0.9724109172821045,grad_norm: 0.9999992281328772, iteration: 161703
loss: 1.0123392343521118,grad_norm: 0.875314430401655, iteration: 161704
loss: 0.9638417363166809,grad_norm: 0.8578581619526716, iteration: 161705
loss: 0.9886159896850586,grad_norm: 0.9999994832104461, iteration: 161706
loss: 0.9986442923545837,grad_norm: 0.9621289103460127, iteration: 161707
loss: 0.9920781850814819,grad_norm: 0.9999991344544431, iteration: 161708
loss: 0.9487733244895935,grad_norm: 0.9446636001674561, iteration: 161709
loss: 1.0354613065719604,grad_norm: 0.9725816472040608, iteration: 161710
loss: 1.0256788730621338,grad_norm: 0.9999990744174152, iteration: 161711
loss: 1.0032635927200317,grad_norm: 0.9882321195953798, iteration: 161712
loss: 1.0323313474655151,grad_norm: 0.953066603852947, iteration: 161713
loss: 1.0496368408203125,grad_norm: 0.9999991076374253, iteration: 161714
loss: 0.997653067111969,grad_norm: 0.9728913081891053, iteration: 161715
loss: 1.0265527963638306,grad_norm: 0.9175225564617918, iteration: 161716
loss: 1.0218831300735474,grad_norm: 0.9529005077198011, iteration: 161717
loss: 0.9989519119262695,grad_norm: 0.9999990262752925, iteration: 161718
loss: 0.953436553478241,grad_norm: 0.9695936406511692, iteration: 161719
loss: 1.0020912885665894,grad_norm: 0.9999990695036316, iteration: 161720
loss: 0.9943162798881531,grad_norm: 0.8614384813614976, iteration: 161721
loss: 0.9632919430732727,grad_norm: 0.9999992651962376, iteration: 161722
loss: 1.0085397958755493,grad_norm: 0.9999992120397314, iteration: 161723
loss: 1.0004249811172485,grad_norm: 0.9999991127820476, iteration: 161724
loss: 1.0376733541488647,grad_norm: 0.7518404568112143, iteration: 161725
loss: 1.0249691009521484,grad_norm: 0.9999990539324716, iteration: 161726
loss: 1.0060590505599976,grad_norm: 0.7991317898644275, iteration: 161727
loss: 0.9717233777046204,grad_norm: 0.9999991930226768, iteration: 161728
loss: 1.0011781454086304,grad_norm: 0.9999990997507129, iteration: 161729
loss: 1.01944100856781,grad_norm: 0.8957000200451865, iteration: 161730
loss: 0.9837599992752075,grad_norm: 0.9453181119657228, iteration: 161731
loss: 0.9765808582305908,grad_norm: 0.8956862693465482, iteration: 161732
loss: 0.989270806312561,grad_norm: 0.9999991932873823, iteration: 161733
loss: 1.0186350345611572,grad_norm: 0.9999990820189848, iteration: 161734
loss: 1.0217615365982056,grad_norm: 0.9999991819929963, iteration: 161735
loss: 0.9805299639701843,grad_norm: 0.9999992080395061, iteration: 161736
loss: 1.0114867687225342,grad_norm: 0.9999992163329796, iteration: 161737
loss: 1.00375497341156,grad_norm: 0.9999991220538892, iteration: 161738
loss: 1.0193431377410889,grad_norm: 0.9999990616216855, iteration: 161739
loss: 1.0241515636444092,grad_norm: 0.9783539412113801, iteration: 161740
loss: 0.9838293194770813,grad_norm: 0.8193538675084316, iteration: 161741
loss: 1.0616196393966675,grad_norm: 0.9999995218345993, iteration: 161742
loss: 0.969301164150238,grad_norm: 0.9999991877101682, iteration: 161743
loss: 1.033012866973877,grad_norm: 0.9999992365915611, iteration: 161744
loss: 1.0495887994766235,grad_norm: 0.999999079823552, iteration: 161745
loss: 0.9778286218643188,grad_norm: 0.9110133058945469, iteration: 161746
loss: 0.9676439762115479,grad_norm: 0.891966615807546, iteration: 161747
loss: 1.02113676071167,grad_norm: 0.896330815062896, iteration: 161748
loss: 1.0225880146026611,grad_norm: 0.9999991638413578, iteration: 161749
loss: 0.9835512042045593,grad_norm: 0.9224084007889917, iteration: 161750
loss: 0.9871053099632263,grad_norm: 0.9999991807579611, iteration: 161751
loss: 0.9884126782417297,grad_norm: 0.9921865100661201, iteration: 161752
loss: 0.9898251295089722,grad_norm: 0.9999990390092366, iteration: 161753
loss: 0.9786086082458496,grad_norm: 0.9999991993904975, iteration: 161754
loss: 0.9998061060905457,grad_norm: 0.9999991320912627, iteration: 161755
loss: 0.9994715452194214,grad_norm: 0.9999989931420761, iteration: 161756
loss: 0.9887605309486389,grad_norm: 0.9999989978858174, iteration: 161757
loss: 0.9708313941955566,grad_norm: 0.8123346640259725, iteration: 161758
loss: 0.9974207878112793,grad_norm: 0.9923866914874832, iteration: 161759
loss: 0.9739197492599487,grad_norm: 0.9999991620552403, iteration: 161760
loss: 1.0135858058929443,grad_norm: 0.8069161664705542, iteration: 161761
loss: 1.009320855140686,grad_norm: 0.999999083782427, iteration: 161762
loss: 1.0193558931350708,grad_norm: 0.9485698733689167, iteration: 161763
loss: 0.9706192016601562,grad_norm: 0.9999990604816403, iteration: 161764
loss: 0.9861223101615906,grad_norm: 0.9866274698736386, iteration: 161765
loss: 0.9788074493408203,grad_norm: 0.9999991295923846, iteration: 161766
loss: 1.0403715372085571,grad_norm: 0.9510087855258303, iteration: 161767
loss: 1.0131433010101318,grad_norm: 0.828770106018976, iteration: 161768
loss: 0.9838506579399109,grad_norm: 0.920200558434583, iteration: 161769
loss: 1.0348858833312988,grad_norm: 0.9999994738496141, iteration: 161770
loss: 1.006246566772461,grad_norm: 0.999999145642561, iteration: 161771
loss: 0.9912376999855042,grad_norm: 0.9686229349839173, iteration: 161772
loss: 0.9716804623603821,grad_norm: 0.9999990742980503, iteration: 161773
loss: 1.068838119506836,grad_norm: 0.9999998179475306, iteration: 161774
loss: 0.9776505827903748,grad_norm: 0.9999992431591997, iteration: 161775
loss: 1.0548186302185059,grad_norm: 0.9999990941295988, iteration: 161776
loss: 0.9896200895309448,grad_norm: 0.9245389036486638, iteration: 161777
loss: 1.0373260974884033,grad_norm: 0.9999992830876295, iteration: 161778
loss: 0.9884039163589478,grad_norm: 0.9999990642004207, iteration: 161779
loss: 1.0187667608261108,grad_norm: 0.966352571044531, iteration: 161780
loss: 1.004660725593567,grad_norm: 0.9203988755486374, iteration: 161781
loss: 1.011574387550354,grad_norm: 0.9227652677744584, iteration: 161782
loss: 1.0153955221176147,grad_norm: 0.9999992942360212, iteration: 161783
loss: 0.9740117192268372,grad_norm: 0.9353751686681313, iteration: 161784
loss: 1.0214015245437622,grad_norm: 0.9980656194206495, iteration: 161785
loss: 1.0176812410354614,grad_norm: 0.9999990201435381, iteration: 161786
loss: 1.0074632167816162,grad_norm: 0.9447331309087131, iteration: 161787
loss: 0.9851033091545105,grad_norm: 0.8623368655850634, iteration: 161788
loss: 0.9998179078102112,grad_norm: 0.9999991211623886, iteration: 161789
loss: 1.048022985458374,grad_norm: 0.9999996249481318, iteration: 161790
loss: 0.9946461915969849,grad_norm: 0.9999992045854093, iteration: 161791
loss: 0.9742526412010193,grad_norm: 0.9800224949593505, iteration: 161792
loss: 1.0151313543319702,grad_norm: 0.9999990189979893, iteration: 161793
loss: 0.9634749889373779,grad_norm: 0.9999991658889076, iteration: 161794
loss: 1.01369309425354,grad_norm: 0.9209749062003194, iteration: 161795
loss: 1.0135931968688965,grad_norm: 0.9999993917077293, iteration: 161796
loss: 1.0536601543426514,grad_norm: 1.0000000095043666, iteration: 161797
loss: 0.995521068572998,grad_norm: 0.8974237973279968, iteration: 161798
loss: 1.0218188762664795,grad_norm: 0.9251763313134788, iteration: 161799
loss: 1.016792893409729,grad_norm: 0.9999990785733681, iteration: 161800
loss: 1.023019790649414,grad_norm: 0.9587450621526686, iteration: 161801
loss: 1.0017460584640503,grad_norm: 0.9999991705378395, iteration: 161802
loss: 1.0126352310180664,grad_norm: 0.9999992153960742, iteration: 161803
loss: 1.0044801235198975,grad_norm: 0.9999990070868343, iteration: 161804
loss: 1.0159728527069092,grad_norm: 0.990945010260173, iteration: 161805
loss: 1.0099204778671265,grad_norm: 0.9999991023530863, iteration: 161806
loss: 0.9771570563316345,grad_norm: 0.9999991403837406, iteration: 161807
loss: 1.0050208568572998,grad_norm: 0.9016223417527288, iteration: 161808
loss: 1.0028693675994873,grad_norm: 0.9604869350935074, iteration: 161809
loss: 0.9931963682174683,grad_norm: 0.9999990906838557, iteration: 161810
loss: 0.9955887794494629,grad_norm: 0.9410918971819925, iteration: 161811
loss: 0.9992936253547668,grad_norm: 0.9999990932356563, iteration: 161812
loss: 0.9864406585693359,grad_norm: 0.8938082634708039, iteration: 161813
loss: 0.9866865873336792,grad_norm: 0.9999990424699076, iteration: 161814
loss: 1.0052642822265625,grad_norm: 0.999999183070941, iteration: 161815
loss: 1.008726716041565,grad_norm: 0.9215323608507442, iteration: 161816
loss: 0.9555408358573914,grad_norm: 0.9999990380256537, iteration: 161817
loss: 1.0365060567855835,grad_norm: 0.9999991831567515, iteration: 161818
loss: 1.0149964094161987,grad_norm: 0.9999991788692805, iteration: 161819
loss: 0.9841884970664978,grad_norm: 0.8908828639689778, iteration: 161820
loss: 0.976439893245697,grad_norm: 0.9999990535898404, iteration: 161821
loss: 0.999226450920105,grad_norm: 0.8301772483126431, iteration: 161822
loss: 1.0179340839385986,grad_norm: 0.9999990678940348, iteration: 161823
loss: 0.9736358523368835,grad_norm: 0.8602408215064776, iteration: 161824
loss: 0.9842449426651001,grad_norm: 0.9999991935699345, iteration: 161825
loss: 0.9433601498603821,grad_norm: 0.8992642859817698, iteration: 161826
loss: 1.000477910041809,grad_norm: 0.9192635465172012, iteration: 161827
loss: 0.9783247113227844,grad_norm: 0.9688016743785652, iteration: 161828
loss: 1.0471247434616089,grad_norm: 0.9999991381059627, iteration: 161829
loss: 0.9947421550750732,grad_norm: 0.8795234212642874, iteration: 161830
loss: 0.9858348965644836,grad_norm: 0.9999990795110398, iteration: 161831
loss: 1.0219208002090454,grad_norm: 0.9999989943731409, iteration: 161832
loss: 1.0195996761322021,grad_norm: 0.8548289647253581, iteration: 161833
loss: 0.957184910774231,grad_norm: 0.802363918314754, iteration: 161834
loss: 1.0039080381393433,grad_norm: 0.86501069778969, iteration: 161835
loss: 0.9788471460342407,grad_norm: 0.999999188842207, iteration: 161836
loss: 1.0038946866989136,grad_norm: 0.9612499670999546, iteration: 161837
loss: 0.9774816036224365,grad_norm: 0.9324680264966853, iteration: 161838
loss: 0.9964489340782166,grad_norm: 0.8558299149958097, iteration: 161839
loss: 1.0267225503921509,grad_norm: 0.9999990767531072, iteration: 161840
loss: 0.9702534079551697,grad_norm: 0.9063338676630538, iteration: 161841
loss: 0.9473475813865662,grad_norm: 0.999999222856939, iteration: 161842
loss: 0.9750583171844482,grad_norm: 0.9999991271700893, iteration: 161843
loss: 1.004116415977478,grad_norm: 0.9999991192180417, iteration: 161844
loss: 0.9776512384414673,grad_norm: 0.8977481051019481, iteration: 161845
loss: 1.032131552696228,grad_norm: 0.9999989709559816, iteration: 161846
loss: 1.0348979234695435,grad_norm: 0.9999991228495384, iteration: 161847
loss: 0.9781394600868225,grad_norm: 0.9101026562827123, iteration: 161848
loss: 0.9767066836357117,grad_norm: 0.88851591740829, iteration: 161849
loss: 1.0079141855239868,grad_norm: 0.9999991381376537, iteration: 161850
loss: 1.039775013923645,grad_norm: 0.9713292146437028, iteration: 161851
loss: 1.0081816911697388,grad_norm: 0.9999998710646005, iteration: 161852
loss: 1.0103973150253296,grad_norm: 0.9375202136288318, iteration: 161853
loss: 1.0035649538040161,grad_norm: 0.9748348115563127, iteration: 161854
loss: 0.9734382033348083,grad_norm: 0.9999994376514323, iteration: 161855
loss: 1.0003814697265625,grad_norm: 0.9929831332658186, iteration: 161856
loss: 1.0126417875289917,grad_norm: 0.9999991228130366, iteration: 161857
loss: 1.0204321146011353,grad_norm: 0.999999084979029, iteration: 161858
loss: 0.9722884893417358,grad_norm: 0.9999990216416014, iteration: 161859
loss: 1.0143307447433472,grad_norm: 0.9826819705899845, iteration: 161860
loss: 1.005255103111267,grad_norm: 0.9999991217171439, iteration: 161861
loss: 0.9837072491645813,grad_norm: 0.9130873385478047, iteration: 161862
loss: 1.010581612586975,grad_norm: 0.9468461333569196, iteration: 161863
loss: 1.0003340244293213,grad_norm: 0.9999990068884775, iteration: 161864
loss: 1.0121289491653442,grad_norm: 0.8777238512063118, iteration: 161865
loss: 0.9805490970611572,grad_norm: 0.9999992285020434, iteration: 161866
loss: 1.033996820449829,grad_norm: 0.999999130471128, iteration: 161867
loss: 1.0107454061508179,grad_norm: 0.9623518365343522, iteration: 161868
loss: 1.0101382732391357,grad_norm: 0.9999993383198605, iteration: 161869
loss: 0.9863033890724182,grad_norm: 0.9999990856307267, iteration: 161870
loss: 1.0121660232543945,grad_norm: 0.9999991190112443, iteration: 161871
loss: 0.9894700646400452,grad_norm: 0.9783586955791082, iteration: 161872
loss: 1.0425533056259155,grad_norm: 0.9999991896938724, iteration: 161873
loss: 1.0265265703201294,grad_norm: 0.851551040918897, iteration: 161874
loss: 0.9916605353355408,grad_norm: 0.9999990788770222, iteration: 161875
loss: 1.007999062538147,grad_norm: 0.9570599382081402, iteration: 161876
loss: 1.0071760416030884,grad_norm: 0.9519967747474266, iteration: 161877
loss: 0.9716600179672241,grad_norm: 0.9999991996170332, iteration: 161878
loss: 0.9995875358581543,grad_norm: 0.9520438537300607, iteration: 161879
loss: 1.025105357170105,grad_norm: 0.9999991903264646, iteration: 161880
loss: 1.0033122301101685,grad_norm: 0.9999993647597375, iteration: 161881
loss: 1.0105992555618286,grad_norm: 0.9441694821645203, iteration: 161882
loss: 1.0319750308990479,grad_norm: 0.8641038901578801, iteration: 161883
loss: 1.0253440141677856,grad_norm: 0.9999989667620699, iteration: 161884
loss: 0.9973350167274475,grad_norm: 0.9999989220351237, iteration: 161885
loss: 1.0097113847732544,grad_norm: 0.8856258048968916, iteration: 161886
loss: 0.9703020453453064,grad_norm: 0.9999990556483004, iteration: 161887
loss: 0.9812390208244324,grad_norm: 0.9999992012778106, iteration: 161888
loss: 0.988444983959198,grad_norm: 0.859286314654929, iteration: 161889
loss: 1.0169546604156494,grad_norm: 0.9999992408875646, iteration: 161890
loss: 0.9929113984107971,grad_norm: 0.9999992321160348, iteration: 161891
loss: 1.0008187294006348,grad_norm: 0.9981412440176757, iteration: 161892
loss: 1.0050657987594604,grad_norm: 0.9999990892247508, iteration: 161893
loss: 0.990096390247345,grad_norm: 0.9631978871462306, iteration: 161894
loss: 0.9844253063201904,grad_norm: 0.9869070152206001, iteration: 161895
loss: 0.9915504455566406,grad_norm: 0.9999991536559443, iteration: 161896
loss: 1.0180590152740479,grad_norm: 0.8898028349733833, iteration: 161897
loss: 1.015779733657837,grad_norm: 0.9612012822193672, iteration: 161898
loss: 1.0013556480407715,grad_norm: 0.9518351438765069, iteration: 161899
loss: 1.0089044570922852,grad_norm: 0.9990327963139344, iteration: 161900
loss: 1.105294942855835,grad_norm: 0.9999999656174875, iteration: 161901
loss: 1.0080682039260864,grad_norm: 0.999999103899843, iteration: 161902
loss: 1.0243641138076782,grad_norm: 0.8864764009592818, iteration: 161903
loss: 1.0040301084518433,grad_norm: 0.9503974607308581, iteration: 161904
loss: 1.0123367309570312,grad_norm: 0.9999990740877713, iteration: 161905
loss: 0.9636135101318359,grad_norm: 0.9999990016910834, iteration: 161906
loss: 1.0319427251815796,grad_norm: 0.88665869160131, iteration: 161907
loss: 0.994172990322113,grad_norm: 0.9935676909495021, iteration: 161908
loss: 0.9642778038978577,grad_norm: 0.9999989679339673, iteration: 161909
loss: 1.0005213022232056,grad_norm: 0.9708646100328772, iteration: 161910
loss: 0.9993479251861572,grad_norm: 0.9216155486528297, iteration: 161911
loss: 1.0170838832855225,grad_norm: 0.9999992015551521, iteration: 161912
loss: 1.1747608184814453,grad_norm: 0.9999992370615464, iteration: 161913
loss: 0.9662626385688782,grad_norm: 0.9820738538766384, iteration: 161914
loss: 1.0002158880233765,grad_norm: 0.9109544902543186, iteration: 161915
loss: 0.9974942803382874,grad_norm: 0.9999992277608364, iteration: 161916
loss: 0.9852995872497559,grad_norm: 0.893195882287824, iteration: 161917
loss: 1.0106854438781738,grad_norm: 0.9999992201595931, iteration: 161918
loss: 1.0385966300964355,grad_norm: 0.9548777537165796, iteration: 161919
loss: 1.001420497894287,grad_norm: 0.9404596344591292, iteration: 161920
loss: 1.0284703969955444,grad_norm: 0.987950893866477, iteration: 161921
loss: 0.980188250541687,grad_norm: 0.8063399365280844, iteration: 161922
loss: 1.039104700088501,grad_norm: 0.8708043667156877, iteration: 161923
loss: 1.015388011932373,grad_norm: 0.9129057233757742, iteration: 161924
loss: 1.0204957723617554,grad_norm: 0.8340487451012317, iteration: 161925
loss: 0.9700384140014648,grad_norm: 0.9716887404444371, iteration: 161926
loss: 0.9871829748153687,grad_norm: 0.9999991860733235, iteration: 161927
loss: 1.002301573753357,grad_norm: 0.8206461135542519, iteration: 161928
loss: 0.9670610427856445,grad_norm: 0.9237341386470106, iteration: 161929
loss: 1.0193284749984741,grad_norm: 0.9718375476173953, iteration: 161930
loss: 0.9909955263137817,grad_norm: 0.9999989109900334, iteration: 161931
loss: 1.005235195159912,grad_norm: 0.9830189213407909, iteration: 161932
loss: 0.979370653629303,grad_norm: 0.9196569525745497, iteration: 161933
loss: 1.0201293230056763,grad_norm: 0.9999990785240628, iteration: 161934
loss: 0.9788459539413452,grad_norm: 0.8694166930077122, iteration: 161935
loss: 0.9790067076683044,grad_norm: 0.9999990751981085, iteration: 161936
loss: 0.9721136093139648,grad_norm: 0.9999991775545389, iteration: 161937
loss: 0.9927118420600891,grad_norm: 0.9704080558854222, iteration: 161938
loss: 0.9860939383506775,grad_norm: 0.999999074393768, iteration: 161939
loss: 1.0023633241653442,grad_norm: 0.926997514921063, iteration: 161940
loss: 1.0357264280319214,grad_norm: 0.9602245559772051, iteration: 161941
loss: 0.9812422394752502,grad_norm: 0.9519213457859768, iteration: 161942
loss: 1.0199047327041626,grad_norm: 0.9999996974446744, iteration: 161943
loss: 0.9719719290733337,grad_norm: 0.9759764985911821, iteration: 161944
loss: 1.0360925197601318,grad_norm: 0.8360781245979672, iteration: 161945
loss: 0.9995171427726746,grad_norm: 0.8134735108132619, iteration: 161946
loss: 1.0023454427719116,grad_norm: 0.9999990501090223, iteration: 161947
loss: 1.0384563207626343,grad_norm: 0.9999998318367616, iteration: 161948
loss: 0.984689474105835,grad_norm: 0.9999990341661731, iteration: 161949
loss: 1.0145833492279053,grad_norm: 0.9999992520363508, iteration: 161950
loss: 0.9810420870780945,grad_norm: 0.9310768506360435, iteration: 161951
loss: 1.0234891176223755,grad_norm: 0.8877499542764113, iteration: 161952
loss: 0.9948313236236572,grad_norm: 0.9999989525145326, iteration: 161953
loss: 0.9766647815704346,grad_norm: 0.982489392095891, iteration: 161954
loss: 0.9907611608505249,grad_norm: 0.9712406883043725, iteration: 161955
loss: 1.0195304155349731,grad_norm: 0.9999990885430732, iteration: 161956
loss: 1.033033013343811,grad_norm: 0.95546334659154, iteration: 161957
loss: 0.9790177941322327,grad_norm: 0.9606544727887089, iteration: 161958
loss: 0.9890651702880859,grad_norm: 0.9999990035098059, iteration: 161959
loss: 1.0078760385513306,grad_norm: 0.9999990808758613, iteration: 161960
loss: 0.9968584179878235,grad_norm: 0.9999991256658894, iteration: 161961
loss: 1.0019911527633667,grad_norm: 0.9999992615385831, iteration: 161962
loss: 1.0011740922927856,grad_norm: 0.8445820119499985, iteration: 161963
loss: 1.0070863962173462,grad_norm: 0.9999990621355136, iteration: 161964
loss: 0.9704268574714661,grad_norm: 0.9999991622635099, iteration: 161965
loss: 0.9860175848007202,grad_norm: 0.8438903307025518, iteration: 161966
loss: 0.9878722429275513,grad_norm: 0.9999989151113212, iteration: 161967
loss: 0.998769998550415,grad_norm: 0.9999991495493988, iteration: 161968
loss: 0.9857704639434814,grad_norm: 0.9442655284659833, iteration: 161969
loss: 1.00863516330719,grad_norm: 0.9999992463771858, iteration: 161970
loss: 0.986378014087677,grad_norm: 0.9999993057907498, iteration: 161971
loss: 0.9766974449157715,grad_norm: 0.929037369866582, iteration: 161972
loss: 1.0097686052322388,grad_norm: 0.9999991399434065, iteration: 161973
loss: 1.0288790464401245,grad_norm: 0.8094700310043675, iteration: 161974
loss: 1.005531907081604,grad_norm: 0.9452455308540574, iteration: 161975
loss: 1.024260401725769,grad_norm: 0.9999993865429105, iteration: 161976
loss: 1.1345891952514648,grad_norm: 0.9999998203615548, iteration: 161977
loss: 1.0024847984313965,grad_norm: 0.9782577891548403, iteration: 161978
loss: 0.992469072341919,grad_norm: 0.9697697329642657, iteration: 161979
loss: 0.9511846899986267,grad_norm: 0.8558522739933966, iteration: 161980
loss: 0.9894088506698608,grad_norm: 0.9999993392202619, iteration: 161981
loss: 1.0157393217086792,grad_norm: 0.772895314593884, iteration: 161982
loss: 0.990034818649292,grad_norm: 0.7546824300481867, iteration: 161983
loss: 1.01649010181427,grad_norm: 0.9572957770117252, iteration: 161984
loss: 1.0064594745635986,grad_norm: 0.9877005303790852, iteration: 161985
loss: 1.0303781032562256,grad_norm: 0.9934986379270255, iteration: 161986
loss: 0.9444698095321655,grad_norm: 0.9011771518728608, iteration: 161987
loss: 1.0180720090866089,grad_norm: 0.999999222830527, iteration: 161988
loss: 0.9874143600463867,grad_norm: 0.8779491195003607, iteration: 161989
loss: 0.9822449088096619,grad_norm: 0.9999993147353008, iteration: 161990
loss: 0.9922126531600952,grad_norm: 0.9999992430426311, iteration: 161991
loss: 0.9974108338356018,grad_norm: 0.9999991844289003, iteration: 161992
loss: 1.0390080213546753,grad_norm: 0.9999991416149514, iteration: 161993
loss: 1.0010311603546143,grad_norm: 0.9598918963218683, iteration: 161994
loss: 1.0055938959121704,grad_norm: 0.9999992085513111, iteration: 161995
loss: 1.026373267173767,grad_norm: 0.9999992616324862, iteration: 161996
loss: 1.002882480621338,grad_norm: 0.8669813868700904, iteration: 161997
loss: 0.9983537197113037,grad_norm: 0.9999991130373466, iteration: 161998
loss: 0.9841854572296143,grad_norm: 0.9999991174906354, iteration: 161999
loss: 1.0126534700393677,grad_norm: 0.8454229716313876, iteration: 162000
loss: 1.0195695161819458,grad_norm: 0.9736405978036234, iteration: 162001
loss: 1.0340502262115479,grad_norm: 0.999999247403516, iteration: 162002
loss: 0.9939664006233215,grad_norm: 0.937235715572028, iteration: 162003
loss: 1.0166945457458496,grad_norm: 0.9999990612975063, iteration: 162004
loss: 0.9822382926940918,grad_norm: 0.9999992071272976, iteration: 162005
loss: 0.9839989542961121,grad_norm: 0.9999991279455511, iteration: 162006
loss: 1.026214838027954,grad_norm: 0.9999992365839856, iteration: 162007
loss: 0.9979013800621033,grad_norm: 0.9880280225051103, iteration: 162008
loss: 1.022620677947998,grad_norm: 0.9999992747072441, iteration: 162009
loss: 0.987518310546875,grad_norm: 0.7787804382585628, iteration: 162010
loss: 0.9790452718734741,grad_norm: 0.9396466114859047, iteration: 162011
loss: 0.9696373343467712,grad_norm: 0.9999990817804018, iteration: 162012
loss: 0.990854024887085,grad_norm: 0.9993276673354824, iteration: 162013
loss: 0.9810735583305359,grad_norm: 0.9999991949555265, iteration: 162014
loss: 1.0112541913986206,grad_norm: 0.9594163854446687, iteration: 162015
loss: 0.9628089666366577,grad_norm: 0.8828945153797287, iteration: 162016
loss: 1.0014100074768066,grad_norm: 0.9926102688689104, iteration: 162017
loss: 1.010258674621582,grad_norm: 0.9052513694238853, iteration: 162018
loss: 0.9796971082687378,grad_norm: 0.9999991700310746, iteration: 162019
loss: 1.02112877368927,grad_norm: 0.9877995720767352, iteration: 162020
loss: 1.0020520687103271,grad_norm: 0.9999993534880511, iteration: 162021
loss: 0.981181800365448,grad_norm: 0.8995253359382731, iteration: 162022
loss: 0.9464307427406311,grad_norm: 0.9999990243616257, iteration: 162023
loss: 1.0294381380081177,grad_norm: 0.9999992561210113, iteration: 162024
loss: 0.9687231779098511,grad_norm: 0.8931474999483131, iteration: 162025
loss: 0.9632911086082458,grad_norm: 0.9999991599612783, iteration: 162026
loss: 1.000872254371643,grad_norm: 0.9758091169568809, iteration: 162027
loss: 0.9992735385894775,grad_norm: 0.943476074113201, iteration: 162028
loss: 0.9888482093811035,grad_norm: 0.999999132930857, iteration: 162029
loss: 1.0432276725769043,grad_norm: 0.999999533803133, iteration: 162030
loss: 1.0623269081115723,grad_norm: 0.9999990741675713, iteration: 162031
loss: 0.9985849857330322,grad_norm: 0.9999991748121423, iteration: 162032
loss: 0.9979350566864014,grad_norm: 0.9999989910393674, iteration: 162033
loss: 0.9724180698394775,grad_norm: 0.9999996409432498, iteration: 162034
loss: 1.0040909051895142,grad_norm: 0.999999016283699, iteration: 162035
loss: 1.0211690664291382,grad_norm: 0.988032639990735, iteration: 162036
loss: 0.99898761510849,grad_norm: 0.9999992089562595, iteration: 162037
loss: 1.0000293254852295,grad_norm: 0.8281695112785887, iteration: 162038
loss: 0.9622088670730591,grad_norm: 0.9683077088161638, iteration: 162039
loss: 0.9981232285499573,grad_norm: 0.8058563413212811, iteration: 162040
loss: 0.9897655248641968,grad_norm: 0.9999991192793434, iteration: 162041
loss: 0.9969775676727295,grad_norm: 0.9999989571661037, iteration: 162042
loss: 1.0166876316070557,grad_norm: 0.9567074598208227, iteration: 162043
loss: 1.0465787649154663,grad_norm: 0.9999993021495348, iteration: 162044
loss: 1.0105191469192505,grad_norm: 0.9547483902825364, iteration: 162045
loss: 1.0027940273284912,grad_norm: 0.9999995070799655, iteration: 162046
loss: 1.0563148260116577,grad_norm: 0.9999990607200028, iteration: 162047
loss: 0.9809057116508484,grad_norm: 0.8749732361098234, iteration: 162048
loss: 0.9741747379302979,grad_norm: 0.9159083787179526, iteration: 162049
loss: 0.9955459237098694,grad_norm: 0.9724806293686991, iteration: 162050
loss: 0.9999656081199646,grad_norm: 0.8675727744256199, iteration: 162051
loss: 0.9660742282867432,grad_norm: 0.999999011860884, iteration: 162052
loss: 1.004323124885559,grad_norm: 0.9999991751140852, iteration: 162053
loss: 0.9673851132392883,grad_norm: 0.9999990302871796, iteration: 162054
loss: 0.9992848038673401,grad_norm: 0.999999186875115, iteration: 162055
loss: 1.0014783143997192,grad_norm: 0.8645841594491783, iteration: 162056
loss: 0.9780423045158386,grad_norm: 0.9999991350741925, iteration: 162057
loss: 1.0170478820800781,grad_norm: 0.9999992107933641, iteration: 162058
loss: 0.968299388885498,grad_norm: 0.9999992001335325, iteration: 162059
loss: 0.9967487454414368,grad_norm: 0.9999991127485736, iteration: 162060
loss: 1.0325201749801636,grad_norm: 0.99999909314716, iteration: 162061
loss: 1.0142039060592651,grad_norm: 0.9999991393014873, iteration: 162062
loss: 1.0128926038742065,grad_norm: 0.9999995436552622, iteration: 162063
loss: 1.0054429769515991,grad_norm: 0.8906598147155246, iteration: 162064
loss: 0.9993368983268738,grad_norm: 0.9999991578210567, iteration: 162065
loss: 0.9951713681221008,grad_norm: 0.9999990673563904, iteration: 162066
loss: 0.9783174395561218,grad_norm: 0.9999991377308391, iteration: 162067
loss: 0.9737774133682251,grad_norm: 0.9999991639680357, iteration: 162068
loss: 0.9502483606338501,grad_norm: 0.9999989675523475, iteration: 162069
loss: 0.9920278191566467,grad_norm: 0.8663894652730966, iteration: 162070
loss: 0.9924923181533813,grad_norm: 0.8024814493885278, iteration: 162071
loss: 1.0056027173995972,grad_norm: 0.9999990503760388, iteration: 162072
loss: 1.0094020366668701,grad_norm: 0.9999990119226435, iteration: 162073
loss: 1.0242825746536255,grad_norm: 0.7755077320504394, iteration: 162074
loss: 0.9627382159233093,grad_norm: 0.9999992892976503, iteration: 162075
loss: 0.9875733256340027,grad_norm: 0.9999990429709739, iteration: 162076
loss: 1.0048567056655884,grad_norm: 0.9999991191628634, iteration: 162077
loss: 0.9998723864555359,grad_norm: 0.984560190905245, iteration: 162078
loss: 1.0030800104141235,grad_norm: 0.9999990904020651, iteration: 162079
loss: 0.9603530168533325,grad_norm: 0.9999990291293988, iteration: 162080
loss: 0.9992947578430176,grad_norm: 0.9999990761992729, iteration: 162081
loss: 1.00236976146698,grad_norm: 0.9041331679279315, iteration: 162082
loss: 1.016597032546997,grad_norm: 0.9346853824998108, iteration: 162083
loss: 0.9608305096626282,grad_norm: 0.9999990378749766, iteration: 162084
loss: 1.020042061805725,grad_norm: 0.9999991512084205, iteration: 162085
loss: 1.014081597328186,grad_norm: 0.9999990533985627, iteration: 162086
loss: 0.9830870032310486,grad_norm: 0.9999992623846886, iteration: 162087
loss: 0.9502803683280945,grad_norm: 0.9999991748951417, iteration: 162088
loss: 1.0447721481323242,grad_norm: 0.999999884769571, iteration: 162089
loss: 0.9697756767272949,grad_norm: 0.9324773672197243, iteration: 162090
loss: 0.9887319207191467,grad_norm: 0.9474882194194192, iteration: 162091
loss: 0.9958149194717407,grad_norm: 0.8837446237148247, iteration: 162092
loss: 0.9786752462387085,grad_norm: 0.9999990751152662, iteration: 162093
loss: 0.9990555047988892,grad_norm: 0.9999990826787171, iteration: 162094
loss: 0.9630114436149597,grad_norm: 0.8712524605607808, iteration: 162095
loss: 0.9728119969367981,grad_norm: 0.9999991683293755, iteration: 162096
loss: 1.033553123474121,grad_norm: 0.99999899698194, iteration: 162097
loss: 1.0197089910507202,grad_norm: 0.9361546149710597, iteration: 162098
loss: 1.0028014183044434,grad_norm: 0.999999335666258, iteration: 162099
loss: 1.0119036436080933,grad_norm: 0.9999991464210961, iteration: 162100
loss: 1.00168776512146,grad_norm: 0.9552108449626269, iteration: 162101
loss: 1.016886591911316,grad_norm: 0.9999991134741717, iteration: 162102
loss: 1.0082865953445435,grad_norm: 0.9727375433250863, iteration: 162103
loss: 0.9892973899841309,grad_norm: 0.9999990741239836, iteration: 162104
loss: 0.9745432734489441,grad_norm: 0.9999991547495372, iteration: 162105
loss: 1.031740665435791,grad_norm: 0.9945248993509994, iteration: 162106
loss: 1.0168687105178833,grad_norm: 0.9999991964674846, iteration: 162107
loss: 0.9638838171958923,grad_norm: 0.918901751305732, iteration: 162108
loss: 0.9784144759178162,grad_norm: 0.9999989041868815, iteration: 162109
loss: 1.0156526565551758,grad_norm: 0.9999997137436252, iteration: 162110
loss: 0.9942735433578491,grad_norm: 0.999999092368033, iteration: 162111
loss: 1.0106264352798462,grad_norm: 0.9491950258418453, iteration: 162112
loss: 0.9909139275550842,grad_norm: 0.8920200008899261, iteration: 162113
loss: 0.9705379009246826,grad_norm: 0.9779856297480387, iteration: 162114
loss: 1.0074665546417236,grad_norm: 0.9999991506543568, iteration: 162115
loss: 1.0091572999954224,grad_norm: 0.9741781685617769, iteration: 162116
loss: 0.9950114488601685,grad_norm: 0.8320010866616989, iteration: 162117
loss: 0.9918261170387268,grad_norm: 0.9938935099588362, iteration: 162118
loss: 1.011683702468872,grad_norm: 0.8963909956240621, iteration: 162119
loss: 0.9936938285827637,grad_norm: 0.9999991089166987, iteration: 162120
loss: 0.9815505146980286,grad_norm: 0.926689337993759, iteration: 162121
loss: 1.0000629425048828,grad_norm: 0.8389373538297029, iteration: 162122
loss: 1.033442497253418,grad_norm: 0.9999991413828502, iteration: 162123
loss: 0.9745128154754639,grad_norm: 0.9999990716502224, iteration: 162124
loss: 1.031098484992981,grad_norm: 0.9978148297215526, iteration: 162125
loss: 1.016391634941101,grad_norm: 0.9288590243002013, iteration: 162126
loss: 0.9859133958816528,grad_norm: 0.929236300294394, iteration: 162127
loss: 0.9888896346092224,grad_norm: 0.9066991946542174, iteration: 162128
loss: 1.0586382150650024,grad_norm: 0.9999996829978214, iteration: 162129
loss: 0.9839916229248047,grad_norm: 0.8106112991725792, iteration: 162130
loss: 1.0017365217208862,grad_norm: 0.917941282399321, iteration: 162131
loss: 1.017039179801941,grad_norm: 0.9676081334846875, iteration: 162132
loss: 1.0079843997955322,grad_norm: 0.9999998310240246, iteration: 162133
loss: 1.0085651874542236,grad_norm: 0.9999990468950924, iteration: 162134
loss: 1.0328648090362549,grad_norm: 0.9999990736872524, iteration: 162135
loss: 1.0405223369598389,grad_norm: 0.9999991684348568, iteration: 162136
loss: 0.966539204120636,grad_norm: 0.8811892433961955, iteration: 162137
loss: 0.9883520603179932,grad_norm: 0.9999990372137662, iteration: 162138
loss: 0.997305154800415,grad_norm: 0.9200427413481558, iteration: 162139
loss: 0.9866930246353149,grad_norm: 0.8661806690262726, iteration: 162140
loss: 1.0293807983398438,grad_norm: 0.9811936873305136, iteration: 162141
loss: 0.9534894824028015,grad_norm: 0.9999991854957678, iteration: 162142
loss: 1.0082998275756836,grad_norm: 0.8846916765652845, iteration: 162143
loss: 1.0217678546905518,grad_norm: 0.9999990402706997, iteration: 162144
loss: 1.0184552669525146,grad_norm: 0.8882655707134177, iteration: 162145
loss: 1.016745686531067,grad_norm: 0.9999992388003786, iteration: 162146
loss: 1.0305368900299072,grad_norm: 0.9999990190225134, iteration: 162147
loss: 1.017563819885254,grad_norm: 0.999999138015682, iteration: 162148
loss: 0.9796983599662781,grad_norm: 0.9301067723723007, iteration: 162149
loss: 0.9705645442008972,grad_norm: 0.9999989733400542, iteration: 162150
loss: 1.0052522420883179,grad_norm: 0.9999991517542702, iteration: 162151
loss: 0.9856910705566406,grad_norm: 0.9661454157893373, iteration: 162152
loss: 1.056278944015503,grad_norm: 0.9999994780373329, iteration: 162153
loss: 0.9652742147445679,grad_norm: 0.9999989612401576, iteration: 162154
loss: 0.9691852927207947,grad_norm: 0.9577343473745034, iteration: 162155
loss: 0.9895142316818237,grad_norm: 0.9661551666237689, iteration: 162156
loss: 1.0203015804290771,grad_norm: 0.999999024654144, iteration: 162157
loss: 0.9820390343666077,grad_norm: 0.9999991851659288, iteration: 162158
loss: 1.0009095668792725,grad_norm: 0.9382544061095128, iteration: 162159
loss: 1.0045802593231201,grad_norm: 0.9999991445496501, iteration: 162160
loss: 0.9946971535682678,grad_norm: 0.9571322886194861, iteration: 162161
loss: 0.994927167892456,grad_norm: 0.9113173539382898, iteration: 162162
loss: 0.9335899353027344,grad_norm: 0.8905501302999229, iteration: 162163
loss: 0.995120644569397,grad_norm: 0.9034210531980993, iteration: 162164
loss: 1.0137373208999634,grad_norm: 0.9147171953474784, iteration: 162165
loss: 1.02420175075531,grad_norm: 0.9999992812043343, iteration: 162166
loss: 0.9667778611183167,grad_norm: 0.9999990916069992, iteration: 162167
loss: 1.0129945278167725,grad_norm: 0.9999989072279409, iteration: 162168
loss: 0.9975359439849854,grad_norm: 0.9999991099725692, iteration: 162169
loss: 1.0333690643310547,grad_norm: 0.999999794192012, iteration: 162170
loss: 0.9794623851776123,grad_norm: 0.99999930807212, iteration: 162171
loss: 1.0273854732513428,grad_norm: 0.8569252022793187, iteration: 162172
loss: 0.9827152490615845,grad_norm: 0.898720868421932, iteration: 162173
loss: 1.040236473083496,grad_norm: 0.9999992177180362, iteration: 162174
loss: 1.013156533241272,grad_norm: 0.9999991292124206, iteration: 162175
loss: 0.9434596300125122,grad_norm: 0.9891024998986094, iteration: 162176
loss: 1.013811469078064,grad_norm: 0.9999990304759401, iteration: 162177
loss: 1.011702299118042,grad_norm: 0.9276620496500294, iteration: 162178
loss: 1.004380464553833,grad_norm: 0.9850874005683999, iteration: 162179
loss: 0.9957433342933655,grad_norm: 0.8396789168789269, iteration: 162180
loss: 0.975352942943573,grad_norm: 0.9999991824685482, iteration: 162181
loss: 1.002299189567566,grad_norm: 0.9999991819578831, iteration: 162182
loss: 1.0138535499572754,grad_norm: 0.9999990884670802, iteration: 162183
loss: 0.9872866272926331,grad_norm: 0.9999992068017517, iteration: 162184
loss: 1.021547555923462,grad_norm: 0.9784034913419134, iteration: 162185
loss: 1.0080755949020386,grad_norm: 0.9712476967012345, iteration: 162186
loss: 1.0097585916519165,grad_norm: 0.8813156468791935, iteration: 162187
loss: 0.9799783825874329,grad_norm: 0.9999992101286098, iteration: 162188
loss: 0.9969921708106995,grad_norm: 0.8428849375281502, iteration: 162189
loss: 1.0244419574737549,grad_norm: 0.9999991385103274, iteration: 162190
loss: 1.0226242542266846,grad_norm: 0.8602808844434916, iteration: 162191
loss: 0.992914080619812,grad_norm: 0.9999989937142428, iteration: 162192
loss: 0.9941007494926453,grad_norm: 0.8913114098402561, iteration: 162193
loss: 0.9796314835548401,grad_norm: 0.9999988994128163, iteration: 162194
loss: 1.0034469366073608,grad_norm: 0.9999991148584959, iteration: 162195
loss: 1.0112767219543457,grad_norm: 0.9625808677580954, iteration: 162196
loss: 1.0313183069229126,grad_norm: 0.8436740355181737, iteration: 162197
loss: 1.007033109664917,grad_norm: 0.9999991558197623, iteration: 162198
loss: 1.0369763374328613,grad_norm: 0.9504158103150018, iteration: 162199
loss: 1.0270862579345703,grad_norm: 0.9999993220237006, iteration: 162200
loss: 1.0270291566848755,grad_norm: 0.999999077322005, iteration: 162201
loss: 0.9723826050758362,grad_norm: 0.9405020755341744, iteration: 162202
loss: 0.9883714318275452,grad_norm: 0.9999991821039905, iteration: 162203
loss: 1.0061160326004028,grad_norm: 0.9999992412328661, iteration: 162204
loss: 1.01205313205719,grad_norm: 0.922168542764079, iteration: 162205
loss: 0.9876508116722107,grad_norm: 0.821232320893221, iteration: 162206
loss: 1.0343066453933716,grad_norm: 0.9999992651468604, iteration: 162207
loss: 1.014885425567627,grad_norm: 0.8763095523758225, iteration: 162208
loss: 0.9911127686500549,grad_norm: 0.9999990438070759, iteration: 162209
loss: 1.032550573348999,grad_norm: 0.890200810534487, iteration: 162210
loss: 1.0395947694778442,grad_norm: 0.9124016102406334, iteration: 162211
loss: 1.006090521812439,grad_norm: 0.7251571360552845, iteration: 162212
loss: 1.0141048431396484,grad_norm: 0.9688937265612341, iteration: 162213
loss: 0.9462372064590454,grad_norm: 0.9999990146284042, iteration: 162214
loss: 1.005974531173706,grad_norm: 0.9423401695483394, iteration: 162215
loss: 0.9730878472328186,grad_norm: 0.9999991046615019, iteration: 162216
loss: 1.0112688541412354,grad_norm: 0.8986799290240267, iteration: 162217
loss: 1.003202199935913,grad_norm: 0.9999991069656932, iteration: 162218
loss: 0.9935038685798645,grad_norm: 0.9184893687375655, iteration: 162219
loss: 1.0005282163619995,grad_norm: 0.9986261248575669, iteration: 162220
loss: 1.036766767501831,grad_norm: 0.9969479976651447, iteration: 162221
loss: 0.9630081057548523,grad_norm: 0.9999989738186608, iteration: 162222
loss: 0.9998877048492432,grad_norm: 0.9999990708462486, iteration: 162223
loss: 1.019413709640503,grad_norm: 0.9999992211771723, iteration: 162224
loss: 1.0457130670547485,grad_norm: 0.9999994216843057, iteration: 162225
loss: 1.0136750936508179,grad_norm: 0.9654668708948858, iteration: 162226
loss: 0.986454427242279,grad_norm: 0.9999991228209992, iteration: 162227
loss: 0.9738332629203796,grad_norm: 0.9999990995863959, iteration: 162228
loss: 0.991621196269989,grad_norm: 0.9999991089088875, iteration: 162229
loss: 1.0265060663223267,grad_norm: 0.7129781596138607, iteration: 162230
loss: 0.9871214628219604,grad_norm: 0.9999992370927496, iteration: 162231
loss: 1.0284513235092163,grad_norm: 0.8643865695044382, iteration: 162232
loss: 1.0123001337051392,grad_norm: 0.9999988822727763, iteration: 162233
loss: 1.0090131759643555,grad_norm: 0.9999994510079424, iteration: 162234
loss: 1.0044835805892944,grad_norm: 0.8592200416695055, iteration: 162235
loss: 0.9919575452804565,grad_norm: 0.9999994981648646, iteration: 162236
loss: 0.9738144874572754,grad_norm: 0.9933922055466046, iteration: 162237
loss: 0.9899046421051025,grad_norm: 0.9999992348685502, iteration: 162238
loss: 0.9911032319068909,grad_norm: 0.9999993701385197, iteration: 162239
loss: 1.0125536918640137,grad_norm: 0.9999993368307462, iteration: 162240
loss: 1.0688669681549072,grad_norm: 0.9999992492320302, iteration: 162241
loss: 0.9979994893074036,grad_norm: 0.999999167130904, iteration: 162242
loss: 1.025745153427124,grad_norm: 0.9999990911459439, iteration: 162243
loss: 0.9950973987579346,grad_norm: 0.925572532181628, iteration: 162244
loss: 1.010466456413269,grad_norm: 0.9725748200843766, iteration: 162245
loss: 0.9910717606544495,grad_norm: 0.9999991422045809, iteration: 162246
loss: 1.002148985862732,grad_norm: 0.9020811185793989, iteration: 162247
loss: 0.9604899883270264,grad_norm: 0.9999991566779507, iteration: 162248
loss: 0.9918145537376404,grad_norm: 0.9830516312417131, iteration: 162249
loss: 0.9902976751327515,grad_norm: 0.9999990844500027, iteration: 162250
loss: 1.0147212743759155,grad_norm: 0.8955048858846525, iteration: 162251
loss: 1.0145078897476196,grad_norm: 0.999999518775653, iteration: 162252
loss: 1.0191081762313843,grad_norm: 0.9999990522122252, iteration: 162253
loss: 1.0439945459365845,grad_norm: 0.9999991733319373, iteration: 162254
loss: 0.9782963991165161,grad_norm: 0.9999991988087683, iteration: 162255
loss: 0.9694029092788696,grad_norm: 0.9074660334478628, iteration: 162256
loss: 1.0244345664978027,grad_norm: 0.9999995508374399, iteration: 162257
loss: 0.9607189893722534,grad_norm: 0.9371339262676163, iteration: 162258
loss: 0.9725781083106995,grad_norm: 0.9350655023249059, iteration: 162259
loss: 1.0157129764556885,grad_norm: 0.9625476735512879, iteration: 162260
loss: 0.996367871761322,grad_norm: 0.8774287958950915, iteration: 162261
loss: 1.0192877054214478,grad_norm: 0.9863393085753834, iteration: 162262
loss: 0.9953128695487976,grad_norm: 0.9999990970241966, iteration: 162263
loss: 0.991557240486145,grad_norm: 0.8614193302359917, iteration: 162264
loss: 1.0493428707122803,grad_norm: 0.9575285944180181, iteration: 162265
loss: 1.0273327827453613,grad_norm: 0.9999992747515974, iteration: 162266
loss: 1.0025572776794434,grad_norm: 0.8679126465954748, iteration: 162267
loss: 1.0206096172332764,grad_norm: 0.843100767858772, iteration: 162268
loss: 0.9853454232215881,grad_norm: 0.9999991747626941, iteration: 162269
loss: 1.0216803550720215,grad_norm: 0.9282382153841126, iteration: 162270
loss: 0.9718176126480103,grad_norm: 0.9999991691778873, iteration: 162271
loss: 1.000891923904419,grad_norm: 0.9999989286054626, iteration: 162272
loss: 1.0000907182693481,grad_norm: 0.9999991706642202, iteration: 162273
loss: 0.9714879989624023,grad_norm: 0.9999991585405578, iteration: 162274
loss: 1.0140928030014038,grad_norm: 0.9733557443287257, iteration: 162275
loss: 1.0122132301330566,grad_norm: 0.999999177777161, iteration: 162276
loss: 0.9795367121696472,grad_norm: 0.9999990016951287, iteration: 162277
loss: 0.9839679002761841,grad_norm: 0.9999992844356405, iteration: 162278
loss: 0.982410192489624,grad_norm: 0.9999991061023096, iteration: 162279
loss: 1.0084019899368286,grad_norm: 0.9999997032899337, iteration: 162280
loss: 0.9880935549736023,grad_norm: 0.9925194023687999, iteration: 162281
loss: 1.0316716432571411,grad_norm: 0.9999990753578265, iteration: 162282
loss: 0.9856064915657043,grad_norm: 0.9999991198960975, iteration: 162283
loss: 1.011447548866272,grad_norm: 0.9931551990792, iteration: 162284
loss: 1.0125659704208374,grad_norm: 0.8593691438802094, iteration: 162285
loss: 1.0615710020065308,grad_norm: 0.9999994623385078, iteration: 162286
loss: 0.9786191582679749,grad_norm: 0.9999990549506157, iteration: 162287
loss: 0.9873660802841187,grad_norm: 0.9626537424565221, iteration: 162288
loss: 1.0016803741455078,grad_norm: 0.9595878820599223, iteration: 162289
loss: 0.9716895818710327,grad_norm: 0.9999990866904027, iteration: 162290
loss: 1.0138764381408691,grad_norm: 0.9999990005636044, iteration: 162291
loss: 0.9880016446113586,grad_norm: 0.9999992745512075, iteration: 162292
loss: 0.9961549639701843,grad_norm: 0.9999991455530274, iteration: 162293
loss: 1.0305503606796265,grad_norm: 0.9999992605010656, iteration: 162294
loss: 1.0229636430740356,grad_norm: 0.9176760779338792, iteration: 162295
loss: 0.990980327129364,grad_norm: 0.9999990741725641, iteration: 162296
loss: 0.9895036220550537,grad_norm: 0.988471738090172, iteration: 162297
loss: 0.9996969699859619,grad_norm: 0.884503793448582, iteration: 162298
loss: 1.0263441801071167,grad_norm: 0.9999995360593349, iteration: 162299
loss: 0.9900578856468201,grad_norm: 0.9999991348296433, iteration: 162300
loss: 0.960381805896759,grad_norm: 0.9406634570908627, iteration: 162301
loss: 1.021915316581726,grad_norm: 0.9685492029792223, iteration: 162302
loss: 0.9901736378669739,grad_norm: 0.9227441609310925, iteration: 162303
loss: 0.9812971353530884,grad_norm: 0.9789945502815236, iteration: 162304
loss: 1.0237687826156616,grad_norm: 0.9644589878959794, iteration: 162305
loss: 0.9809365272521973,grad_norm: 0.8843001033893834, iteration: 162306
loss: 1.0276284217834473,grad_norm: 0.9584983343275876, iteration: 162307
loss: 0.998866856098175,grad_norm: 0.9999990582298622, iteration: 162308
loss: 1.0126674175262451,grad_norm: 0.8399458945456661, iteration: 162309
loss: 0.9998504519462585,grad_norm: 0.9344336204478009, iteration: 162310
loss: 0.9844462275505066,grad_norm: 0.9535756896616833, iteration: 162311
loss: 0.9876244068145752,grad_norm: 0.976014834143606, iteration: 162312
loss: 0.9681759476661682,grad_norm: 0.9999991827170912, iteration: 162313
loss: 1.0144912004470825,grad_norm: 0.8998282121434605, iteration: 162314
loss: 0.9829407930374146,grad_norm: 0.9999991117113763, iteration: 162315
loss: 0.9588454961776733,grad_norm: 0.9999991261625574, iteration: 162316
loss: 1.0242583751678467,grad_norm: 0.9999994084195447, iteration: 162317
loss: 1.027217984199524,grad_norm: 0.9999992605249669, iteration: 162318
loss: 0.9788500070571899,grad_norm: 0.999999129295393, iteration: 162319
loss: 0.9948110580444336,grad_norm: 0.9809256219584934, iteration: 162320
loss: 0.9799609184265137,grad_norm: 0.9999990896120082, iteration: 162321
loss: 0.9728418588638306,grad_norm: 0.9999990907878905, iteration: 162322
loss: 0.9612042307853699,grad_norm: 0.9999990153903535, iteration: 162323
loss: 1.0075219869613647,grad_norm: 0.9999992697588489, iteration: 162324
loss: 0.9719500541687012,grad_norm: 0.7937593806950176, iteration: 162325
loss: 0.9936643838882446,grad_norm: 0.9928417268331056, iteration: 162326
loss: 0.9981375932693481,grad_norm: 0.9999991657764257, iteration: 162327
loss: 0.9819132089614868,grad_norm: 0.9999991115575507, iteration: 162328
loss: 0.973959743976593,grad_norm: 0.9342563236482412, iteration: 162329
loss: 1.0339665412902832,grad_norm: 0.9999990498116759, iteration: 162330
loss: 0.9837085008621216,grad_norm: 0.8862431562333247, iteration: 162331
loss: 1.0220199823379517,grad_norm: 0.8924824152888099, iteration: 162332
loss: 0.9904535412788391,grad_norm: 0.9772108870865788, iteration: 162333
loss: 0.9683583974838257,grad_norm: 0.9999993474355181, iteration: 162334
loss: 0.9939260482788086,grad_norm: 0.9999991070603741, iteration: 162335
loss: 0.9839698076248169,grad_norm: 0.9999991122374153, iteration: 162336
loss: 0.9851034283638,grad_norm: 0.9999991891062041, iteration: 162337
loss: 1.0201482772827148,grad_norm: 0.9730667245698699, iteration: 162338
loss: 1.0036635398864746,grad_norm: 0.9999991146371597, iteration: 162339
loss: 0.9951687455177307,grad_norm: 0.9999991791951278, iteration: 162340
loss: 1.0245130062103271,grad_norm: 0.9933356562857182, iteration: 162341
loss: 1.0080362558364868,grad_norm: 0.9598420969808703, iteration: 162342
loss: 1.018658995628357,grad_norm: 0.999999222423896, iteration: 162343
loss: 0.9749482274055481,grad_norm: 0.9999990238354665, iteration: 162344
loss: 0.9669195413589478,grad_norm: 0.9113810234799993, iteration: 162345
loss: 0.987529993057251,grad_norm: 0.954859144700933, iteration: 162346
loss: 0.9740933775901794,grad_norm: 0.9999992390829457, iteration: 162347
loss: 1.0248373746871948,grad_norm: 0.9999994279224689, iteration: 162348
loss: 1.0193805694580078,grad_norm: 0.9999990399627412, iteration: 162349
loss: 0.9890443682670593,grad_norm: 0.9562409864275735, iteration: 162350
loss: 1.0086438655853271,grad_norm: 0.9394332513352189, iteration: 162351
loss: 0.9869801998138428,grad_norm: 0.9999990436057816, iteration: 162352
loss: 1.017688274383545,grad_norm: 0.8325242392315033, iteration: 162353
loss: 1.0317027568817139,grad_norm: 0.9999991111742041, iteration: 162354
loss: 1.0090053081512451,grad_norm: 0.9999992135046947, iteration: 162355
loss: 0.9640287160873413,grad_norm: 0.9435769318398382, iteration: 162356
loss: 1.0001083612442017,grad_norm: 0.8102808552870202, iteration: 162357
loss: 1.002543568611145,grad_norm: 0.859179556918228, iteration: 162358
loss: 1.0293575525283813,grad_norm: 0.9926618280143565, iteration: 162359
loss: 1.0354400873184204,grad_norm: 0.9314694347879456, iteration: 162360
loss: 1.0293341875076294,grad_norm: 0.9477634424955639, iteration: 162361
loss: 0.9898460507392883,grad_norm: 0.9811453229750783, iteration: 162362
loss: 1.0315730571746826,grad_norm: 0.9999993619158176, iteration: 162363
loss: 0.9630063772201538,grad_norm: 0.9999990637117322, iteration: 162364
loss: 1.0333830118179321,grad_norm: 0.8316939355518771, iteration: 162365
loss: 1.0799542665481567,grad_norm: 0.9629124362400607, iteration: 162366
loss: 0.9676496982574463,grad_norm: 0.9607787676780978, iteration: 162367
loss: 1.0174908638000488,grad_norm: 0.969440687073644, iteration: 162368
loss: 0.9910315871238708,grad_norm: 0.9999993063942364, iteration: 162369
loss: 0.987400233745575,grad_norm: 0.999999163883976, iteration: 162370
loss: 0.9538919925689697,grad_norm: 0.9999992182049611, iteration: 162371
loss: 0.9731582999229431,grad_norm: 0.9999992357415499, iteration: 162372
loss: 0.9507961273193359,grad_norm: 0.9763737106120083, iteration: 162373
loss: 1.0298740863800049,grad_norm: 0.9219565285628901, iteration: 162374
loss: 1.021431565284729,grad_norm: 0.9999993411060396, iteration: 162375
loss: 1.0063166618347168,grad_norm: 0.9999992688522522, iteration: 162376
loss: 0.9623671174049377,grad_norm: 0.837067396424129, iteration: 162377
loss: 1.0691478252410889,grad_norm: 0.9999990686815933, iteration: 162378
loss: 1.0191214084625244,grad_norm: 0.9785843608822246, iteration: 162379
loss: 0.9932371973991394,grad_norm: 0.99999918101824, iteration: 162380
loss: 1.043485164642334,grad_norm: 0.9999992053990507, iteration: 162381
loss: 0.9782080054283142,grad_norm: 0.8339622066179779, iteration: 162382
loss: 1.0425279140472412,grad_norm: 0.9999991826970186, iteration: 162383
loss: 1.0048561096191406,grad_norm: 0.9191839472849358, iteration: 162384
loss: 1.0048168897628784,grad_norm: 0.8736103512517567, iteration: 162385
loss: 1.0042799711227417,grad_norm: 0.8852311510506119, iteration: 162386
loss: 0.9860153198242188,grad_norm: 0.9999990392955492, iteration: 162387
loss: 1.019742488861084,grad_norm: 0.9104608639708501, iteration: 162388
loss: 0.997279703617096,grad_norm: 0.9999991049847223, iteration: 162389
loss: 1.0043865442276,grad_norm: 0.9999991173747445, iteration: 162390
loss: 1.0267876386642456,grad_norm: 0.9999993059441978, iteration: 162391
loss: 1.0085654258728027,grad_norm: 0.999999095652359, iteration: 162392
loss: 1.0188547372817993,grad_norm: 0.9999991919069516, iteration: 162393
loss: 0.9796552658081055,grad_norm: 0.9781504467818622, iteration: 162394
loss: 0.9615479707717896,grad_norm: 0.9999992024879635, iteration: 162395
loss: 1.0396026372909546,grad_norm: 0.9999993483482031, iteration: 162396
loss: 0.9719581604003906,grad_norm: 0.9898432315736407, iteration: 162397
loss: 1.000712513923645,grad_norm: 0.8528156447145283, iteration: 162398
loss: 1.005240797996521,grad_norm: 0.8323868340956401, iteration: 162399
loss: 0.9842272996902466,grad_norm: 0.945078467259587, iteration: 162400
loss: 1.0175315141677856,grad_norm: 0.9999990863282152, iteration: 162401
loss: 1.0117076635360718,grad_norm: 0.9999990847252568, iteration: 162402
loss: 1.0157155990600586,grad_norm: 0.933794650420527, iteration: 162403
loss: 0.9759102463722229,grad_norm: 0.9999991498640489, iteration: 162404
loss: 0.9794067144393921,grad_norm: 0.9884478703902769, iteration: 162405
loss: 1.0158096551895142,grad_norm: 0.9337837459596948, iteration: 162406
loss: 0.9790048003196716,grad_norm: 0.9999991177566644, iteration: 162407
loss: 0.9870572686195374,grad_norm: 0.9999992035166504, iteration: 162408
loss: 1.0180885791778564,grad_norm: 0.9934653919475412, iteration: 162409
loss: 0.9590111970901489,grad_norm: 0.9999991922383619, iteration: 162410
loss: 1.0153532028198242,grad_norm: 0.9999991024212598, iteration: 162411
loss: 0.9707614183425903,grad_norm: 0.9999990770756447, iteration: 162412
loss: 0.9900538325309753,grad_norm: 0.8591429631398663, iteration: 162413
loss: 1.0408495664596558,grad_norm: 0.8079016783404838, iteration: 162414
loss: 0.9912416338920593,grad_norm: 0.9999994141806804, iteration: 162415
loss: 0.9906792044639587,grad_norm: 0.88540092132534, iteration: 162416
loss: 1.017353892326355,grad_norm: 0.9999990104290359, iteration: 162417
loss: 1.0146074295043945,grad_norm: 0.9999991452641261, iteration: 162418
loss: 1.020182728767395,grad_norm: 0.9999992553004976, iteration: 162419
loss: 0.9730802178382874,grad_norm: 0.9999991618560927, iteration: 162420
loss: 1.0039021968841553,grad_norm: 0.9999991868142067, iteration: 162421
loss: 1.0198675394058228,grad_norm: 0.9999991088895996, iteration: 162422
loss: 1.0153536796569824,grad_norm: 0.9176964434822844, iteration: 162423
loss: 0.9834486842155457,grad_norm: 0.9999991432839166, iteration: 162424
loss: 0.9901463389396667,grad_norm: 0.9999991301122436, iteration: 162425
loss: 0.9443687200546265,grad_norm: 0.999999158142188, iteration: 162426
loss: 0.9931071996688843,grad_norm: 0.7804202617124897, iteration: 162427
loss: 1.0019501447677612,grad_norm: 0.8854949964082096, iteration: 162428
loss: 1.0190708637237549,grad_norm: 0.8829068640765178, iteration: 162429
loss: 1.004862904548645,grad_norm: 0.8588689010459029, iteration: 162430
loss: 1.0007667541503906,grad_norm: 0.9999991317257376, iteration: 162431
loss: 0.9845108985900879,grad_norm: 0.9999990432255975, iteration: 162432
loss: 0.9858452677726746,grad_norm: 0.9973839098423835, iteration: 162433
loss: 0.9681869149208069,grad_norm: 0.9999992549895685, iteration: 162434
loss: 1.0185534954071045,grad_norm: 0.8653981410844789, iteration: 162435
loss: 1.0039399862289429,grad_norm: 0.9638360578834506, iteration: 162436
loss: 0.9769312143325806,grad_norm: 0.9999992551792278, iteration: 162437
loss: 0.9855444431304932,grad_norm: 0.9994641031219644, iteration: 162438
loss: 0.958003044128418,grad_norm: 0.999998962150454, iteration: 162439
loss: 1.0304057598114014,grad_norm: 0.9983260583141782, iteration: 162440
loss: 0.999840497970581,grad_norm: 0.9152458436876416, iteration: 162441
loss: 1.0091769695281982,grad_norm: 0.9999991348087834, iteration: 162442
loss: 0.9883456826210022,grad_norm: 0.9521158873674791, iteration: 162443
loss: 0.998102068901062,grad_norm: 0.8331563761930308, iteration: 162444
loss: 1.002814769744873,grad_norm: 0.9118969066898628, iteration: 162445
loss: 0.9900673031806946,grad_norm: 0.9272440601112403, iteration: 162446
loss: 1.018973469734192,grad_norm: 0.9999991311460205, iteration: 162447
loss: 1.0287106037139893,grad_norm: 0.9999990591846659, iteration: 162448
loss: 1.0193723440170288,grad_norm: 0.9999990959404466, iteration: 162449
loss: 1.0140436887741089,grad_norm: 0.9999992794490594, iteration: 162450
loss: 1.0321786403656006,grad_norm: 0.999999183004528, iteration: 162451
loss: 1.018563985824585,grad_norm: 0.9999991035411935, iteration: 162452
loss: 1.0236585140228271,grad_norm: 0.9999992367690477, iteration: 162453
loss: 0.9830408692359924,grad_norm: 0.9765676071412023, iteration: 162454
loss: 0.9719737768173218,grad_norm: 0.9999992507156095, iteration: 162455
loss: 0.9828068017959595,grad_norm: 0.993767369036831, iteration: 162456
loss: 1.0096609592437744,grad_norm: 0.9999990833667582, iteration: 162457
loss: 1.0003279447555542,grad_norm: 0.8874805352042048, iteration: 162458
loss: 0.9887087941169739,grad_norm: 0.921244459807888, iteration: 162459
loss: 1.0086270570755005,grad_norm: 0.943273348274458, iteration: 162460
loss: 0.9734290838241577,grad_norm: 0.9999991454910628, iteration: 162461
loss: 1.0064040422439575,grad_norm: 0.9600838925308453, iteration: 162462
loss: 0.9867404103279114,grad_norm: 0.9999991897352819, iteration: 162463
loss: 0.9864845275878906,grad_norm: 0.9999991696405682, iteration: 162464
loss: 1.0074740648269653,grad_norm: 0.9211916142093187, iteration: 162465
loss: 1.0205081701278687,grad_norm: 0.8798752202186984, iteration: 162466
loss: 1.0027906894683838,grad_norm: 0.9999989250447945, iteration: 162467
loss: 1.0018683671951294,grad_norm: 0.9999992441832911, iteration: 162468
loss: 1.0015848875045776,grad_norm: 0.985383543894711, iteration: 162469
loss: 0.996505081653595,grad_norm: 0.9474278307601942, iteration: 162470
loss: 1.0556819438934326,grad_norm: 0.9999991969087018, iteration: 162471
loss: 0.9976961612701416,grad_norm: 0.8760236604638135, iteration: 162472
loss: 1.034806489944458,grad_norm: 0.9999992586293973, iteration: 162473
loss: 1.0071059465408325,grad_norm: 0.9999990059122453, iteration: 162474
loss: 0.984334409236908,grad_norm: 0.999999208646833, iteration: 162475
loss: 0.9786319136619568,grad_norm: 0.9999991248148408, iteration: 162476
loss: 1.0272492170333862,grad_norm: 0.8459199494968761, iteration: 162477
loss: 1.017579436302185,grad_norm: 0.9356820663015528, iteration: 162478
loss: 0.9985628724098206,grad_norm: 0.9999990169304246, iteration: 162479
loss: 1.0364638566970825,grad_norm: 0.9999991405715603, iteration: 162480
loss: 0.9780226349830627,grad_norm: 0.9859724403950577, iteration: 162481
loss: 1.0167186260223389,grad_norm: 0.934529254105449, iteration: 162482
loss: 0.9595531225204468,grad_norm: 0.8441597128498609, iteration: 162483
loss: 1.02830970287323,grad_norm: 0.9999990770830515, iteration: 162484
loss: 0.9768987894058228,grad_norm: 0.870942911742228, iteration: 162485
loss: 1.0153433084487915,grad_norm: 0.9999988841112358, iteration: 162486
loss: 0.9908258318901062,grad_norm: 0.91995833360368, iteration: 162487
loss: 0.9886618256568909,grad_norm: 0.999999170922405, iteration: 162488
loss: 1.0122458934783936,grad_norm: 0.9999992117118739, iteration: 162489
loss: 1.0024127960205078,grad_norm: 0.8636684498084101, iteration: 162490
loss: 0.9855949878692627,grad_norm: 0.9586628206481927, iteration: 162491
loss: 0.9629102349281311,grad_norm: 0.891953141018955, iteration: 162492
loss: 1.0249652862548828,grad_norm: 0.999999270915557, iteration: 162493
loss: 1.0026888847351074,grad_norm: 0.9999989869218352, iteration: 162494
loss: 0.9945603013038635,grad_norm: 0.9999991362562896, iteration: 162495
loss: 1.0280324220657349,grad_norm: 0.9999991426426476, iteration: 162496
loss: 1.0271728038787842,grad_norm: 0.9529860229872322, iteration: 162497
loss: 0.9748862981796265,grad_norm: 0.9494460654543194, iteration: 162498
loss: 0.9634271860122681,grad_norm: 0.8428504896391038, iteration: 162499
loss: 0.9841108918190002,grad_norm: 0.9999989736017528, iteration: 162500
loss: 0.9990513324737549,grad_norm: 0.9932960614365874, iteration: 162501
loss: 0.9776033163070679,grad_norm: 0.9526473264226858, iteration: 162502
loss: 0.9869688153266907,grad_norm: 0.8635865109443679, iteration: 162503
loss: 1.001272439956665,grad_norm: 0.9999990592008156, iteration: 162504
loss: 0.994905948638916,grad_norm: 0.9999990330562395, iteration: 162505
loss: 1.0072400569915771,grad_norm: 0.882610221617626, iteration: 162506
loss: 0.9967221617698669,grad_norm: 0.9742489618280116, iteration: 162507
loss: 0.9900445938110352,grad_norm: 0.9238026220719197, iteration: 162508
loss: 1.0252635478973389,grad_norm: 0.9999989364766568, iteration: 162509
loss: 1.0230423212051392,grad_norm: 0.9257139848280145, iteration: 162510
loss: 1.0104048252105713,grad_norm: 0.9618900976108364, iteration: 162511
loss: 0.9777780175209045,grad_norm: 0.9715097054652378, iteration: 162512
loss: 1.0067777633666992,grad_norm: 0.9709426199444429, iteration: 162513
loss: 0.9926778078079224,grad_norm: 0.8900080971622679, iteration: 162514
loss: 0.9653592109680176,grad_norm: 0.9999991445474038, iteration: 162515
loss: 0.9756647348403931,grad_norm: 0.9999991036587681, iteration: 162516
loss: 1.0035661458969116,grad_norm: 0.9999990522852854, iteration: 162517
loss: 1.036887526512146,grad_norm: 0.9449964188995928, iteration: 162518
loss: 1.0249916315078735,grad_norm: 0.9844014961968117, iteration: 162519
loss: 0.9836795926094055,grad_norm: 0.9999990368953452, iteration: 162520
loss: 1.0027815103530884,grad_norm: 0.9999990333726596, iteration: 162521
loss: 1.0058271884918213,grad_norm: 0.9999992071383977, iteration: 162522
loss: 1.0329233407974243,grad_norm: 0.936605041909269, iteration: 162523
loss: 0.992548406124115,grad_norm: 0.999999001778755, iteration: 162524
loss: 1.015505075454712,grad_norm: 0.9075438019335166, iteration: 162525
loss: 0.9906406998634338,grad_norm: 0.9999992444917573, iteration: 162526
loss: 0.9727030992507935,grad_norm: 0.9999994280204317, iteration: 162527
loss: 1.0207493305206299,grad_norm: 0.9999991534373687, iteration: 162528
loss: 0.9740578532218933,grad_norm: 0.9528445940012156, iteration: 162529
loss: 1.0172357559204102,grad_norm: 0.9256627690765427, iteration: 162530
loss: 1.0154482126235962,grad_norm: 0.9999990796345899, iteration: 162531
loss: 0.9779086112976074,grad_norm: 0.9174096169068365, iteration: 162532
loss: 1.0315660238265991,grad_norm: 0.9999994073574779, iteration: 162533
loss: 0.9986742734909058,grad_norm: 0.981185015878964, iteration: 162534
loss: 1.0575965642929077,grad_norm: 0.9999997605465659, iteration: 162535
loss: 1.0166878700256348,grad_norm: 0.9054846884932634, iteration: 162536
loss: 0.9620843529701233,grad_norm: 0.999999162497342, iteration: 162537
loss: 0.9593912959098816,grad_norm: 0.9999991424651707, iteration: 162538
loss: 1.0377153158187866,grad_norm: 0.9999991249961021, iteration: 162539
loss: 1.0045703649520874,grad_norm: 0.9999990756048831, iteration: 162540
loss: 1.0130518674850464,grad_norm: 0.9148893581907049, iteration: 162541
loss: 0.9696021676063538,grad_norm: 0.9670230594806537, iteration: 162542
loss: 1.0070292949676514,grad_norm: 0.8533636275894919, iteration: 162543
loss: 0.9766446352005005,grad_norm: 0.9999989886217697, iteration: 162544
loss: 0.9784845113754272,grad_norm: 0.9316735901762505, iteration: 162545
loss: 1.0077953338623047,grad_norm: 0.9862138827905671, iteration: 162546
loss: 0.9959277510643005,grad_norm: 0.99999918762073, iteration: 162547
loss: 1.0015895366668701,grad_norm: 0.9999990560980991, iteration: 162548
loss: 0.995344877243042,grad_norm: 0.9999991701664767, iteration: 162549
loss: 1.0293800830841064,grad_norm: 0.9999990946816671, iteration: 162550
loss: 1.0075018405914307,grad_norm: 0.9999990668937906, iteration: 162551
loss: 0.9990186095237732,grad_norm: 0.999999243407682, iteration: 162552
loss: 0.997514545917511,grad_norm: 0.9999990509752501, iteration: 162553
loss: 0.9532790184020996,grad_norm: 0.9999992251181038, iteration: 162554
loss: 1.0158135890960693,grad_norm: 0.9887156404615909, iteration: 162555
loss: 1.0000922679901123,grad_norm: 0.9609553334011246, iteration: 162556
loss: 0.9957929253578186,grad_norm: 0.9999991641867316, iteration: 162557
loss: 0.9718725681304932,grad_norm: 0.9999993070028551, iteration: 162558
loss: 0.9642506241798401,grad_norm: 0.9289806431689787, iteration: 162559
loss: 1.0232316255569458,grad_norm: 0.9168744067062102, iteration: 162560
loss: 0.9518927335739136,grad_norm: 0.9999991272118702, iteration: 162561
loss: 0.9880332946777344,grad_norm: 0.81813348434408, iteration: 162562
loss: 1.0397993326187134,grad_norm: 0.9999996864856068, iteration: 162563
loss: 0.9722557663917542,grad_norm: 0.9758633849628927, iteration: 162564
loss: 1.0107136964797974,grad_norm: 0.9999989086411527, iteration: 162565
loss: 1.0028038024902344,grad_norm: 0.7727710816031866, iteration: 162566
loss: 0.9910280704498291,grad_norm: 0.999998968272662, iteration: 162567
loss: 0.9770664572715759,grad_norm: 0.9999991004537343, iteration: 162568
loss: 0.9706076979637146,grad_norm: 0.9034163628241664, iteration: 162569
loss: 1.0043286085128784,grad_norm: 0.9999994286818279, iteration: 162570
loss: 0.9907978773117065,grad_norm: 0.9999993380041259, iteration: 162571
loss: 1.0318118333816528,grad_norm: 0.9674848542056622, iteration: 162572
loss: 0.9718612432479858,grad_norm: 0.9999990771737536, iteration: 162573
loss: 0.9968976974487305,grad_norm: 0.9999991027921483, iteration: 162574
loss: 0.9752475619316101,grad_norm: 0.9999991980110041, iteration: 162575
loss: 0.9811973571777344,grad_norm: 0.9318565285805909, iteration: 162576
loss: 1.0062295198440552,grad_norm: 0.9386026979572307, iteration: 162577
loss: 0.9919472932815552,grad_norm: 0.9956121975253656, iteration: 162578
loss: 1.0124229192733765,grad_norm: 0.9999992159397492, iteration: 162579
loss: 1.0495784282684326,grad_norm: 0.999999068697018, iteration: 162580
loss: 1.0227230787277222,grad_norm: 0.9756042254410651, iteration: 162581
loss: 1.0016603469848633,grad_norm: 0.9999990494058578, iteration: 162582
loss: 1.013502836227417,grad_norm: 0.9999990517440164, iteration: 162583
loss: 0.9784654378890991,grad_norm: 0.9999993717055654, iteration: 162584
loss: 1.016446828842163,grad_norm: 0.9999991293625888, iteration: 162585
loss: 0.99859219789505,grad_norm: 0.9999991907010862, iteration: 162586
loss: 1.0228283405303955,grad_norm: 0.9743579023619222, iteration: 162587
loss: 0.9871926307678223,grad_norm: 0.8507042092266388, iteration: 162588
loss: 0.993584156036377,grad_norm: 0.9999995372305223, iteration: 162589
loss: 0.9953785538673401,grad_norm: 0.9561537624032721, iteration: 162590
loss: 1.0079675912857056,grad_norm: 0.8199612463226458, iteration: 162591
loss: 0.9834014177322388,grad_norm: 0.9805783535352992, iteration: 162592
loss: 1.0133050680160522,grad_norm: 0.9265914524842984, iteration: 162593
loss: 1.0191919803619385,grad_norm: 0.8783687614282161, iteration: 162594
loss: 1.0239633321762085,grad_norm: 0.9999991539400671, iteration: 162595
loss: 1.0497299432754517,grad_norm: 0.9999992770622554, iteration: 162596
loss: 0.9841797351837158,grad_norm: 0.8441169167420025, iteration: 162597
loss: 0.9730600714683533,grad_norm: 0.9999990077839488, iteration: 162598
loss: 1.013513207435608,grad_norm: 0.9999993733252912, iteration: 162599
loss: 0.994484543800354,grad_norm: 0.9999990401715979, iteration: 162600
loss: 1.0197844505310059,grad_norm: 0.9999992745787329, iteration: 162601
loss: 1.0001060962677002,grad_norm: 0.9198827314130406, iteration: 162602
loss: 0.9782696962356567,grad_norm: 0.7859669694555272, iteration: 162603
loss: 0.9952847361564636,grad_norm: 0.9360464699965366, iteration: 162604
loss: 0.989835798740387,grad_norm: 0.9999994580112133, iteration: 162605
loss: 0.9685858488082886,grad_norm: 0.8715224829695079, iteration: 162606
loss: 0.9928570985794067,grad_norm: 0.9070807451023216, iteration: 162607
loss: 0.9897106885910034,grad_norm: 0.9999992032797881, iteration: 162608
loss: 0.9785193800926208,grad_norm: 0.8701258875329585, iteration: 162609
loss: 0.9926995038986206,grad_norm: 0.9989844874177294, iteration: 162610
loss: 0.9798738360404968,grad_norm: 0.908201836890598, iteration: 162611
loss: 1.0091652870178223,grad_norm: 0.9999997547702403, iteration: 162612
loss: 1.0263912677764893,grad_norm: 0.9999995108123723, iteration: 162613
loss: 1.0182164907455444,grad_norm: 0.9760765961607907, iteration: 162614
loss: 0.992064893245697,grad_norm: 0.8621489079146389, iteration: 162615
loss: 1.0062291622161865,grad_norm: 0.9333930607896566, iteration: 162616
loss: 1.2240328788757324,grad_norm: 0.9999993334400653, iteration: 162617
loss: 1.0633937120437622,grad_norm: 0.9999993045847362, iteration: 162618
loss: 0.9461972117424011,grad_norm: 0.9999992468802933, iteration: 162619
loss: 0.9607787728309631,grad_norm: 0.9253826740855154, iteration: 162620
loss: 0.9749208092689514,grad_norm: 0.9999991856044852, iteration: 162621
loss: 0.9912827610969543,grad_norm: 0.9999993757688778, iteration: 162622
loss: 1.116136074066162,grad_norm: 0.9999997353227031, iteration: 162623
loss: 1.024920105934143,grad_norm: 0.9999997845321336, iteration: 162624
loss: 0.983744204044342,grad_norm: 0.9599905092823431, iteration: 162625
loss: 1.0719467401504517,grad_norm: 0.9999995724757144, iteration: 162626
loss: 1.0026748180389404,grad_norm: 0.9782111899060256, iteration: 162627
loss: 0.9948198795318604,grad_norm: 0.9999990617620917, iteration: 162628
loss: 1.0347331762313843,grad_norm: 0.9845777513277075, iteration: 162629
loss: 0.9927378296852112,grad_norm: 0.8220661689075713, iteration: 162630
loss: 1.000342845916748,grad_norm: 0.9999991850706096, iteration: 162631
loss: 1.0122169256210327,grad_norm: 0.9844771867184229, iteration: 162632
loss: 1.0085359811782837,grad_norm: 0.9999994363401468, iteration: 162633
loss: 0.9866814017295837,grad_norm: 0.999999368050199, iteration: 162634
loss: 0.9843518733978271,grad_norm: 0.999999041411371, iteration: 162635
loss: 1.0185880661010742,grad_norm: 0.9999989577472531, iteration: 162636
loss: 0.985038161277771,grad_norm: 0.9999992505145353, iteration: 162637
loss: 0.9794452786445618,grad_norm: 0.9999990291461152, iteration: 162638
loss: 1.0323339700698853,grad_norm: 0.9999990829068371, iteration: 162639
loss: 1.0090985298156738,grad_norm: 0.8134218271170282, iteration: 162640
loss: 1.0266811847686768,grad_norm: 0.996339130225525, iteration: 162641
loss: 0.9956437945365906,grad_norm: 0.9412302655146612, iteration: 162642
loss: 1.0195889472961426,grad_norm: 0.9999997149310278, iteration: 162643
loss: 0.9896092414855957,grad_norm: 0.8695726568649782, iteration: 162644
loss: 0.9949100613594055,grad_norm: 0.9999989829359605, iteration: 162645
loss: 0.9870341420173645,grad_norm: 0.9999990152223353, iteration: 162646
loss: 1.0347774028778076,grad_norm: 0.9999991654903976, iteration: 162647
loss: 0.993516206741333,grad_norm: 0.9999991867598944, iteration: 162648
loss: 1.0374257564544678,grad_norm: 0.999999088131628, iteration: 162649
loss: 1.021592140197754,grad_norm: 0.9999991780932748, iteration: 162650
loss: 0.9911242127418518,grad_norm: 0.972063248625325, iteration: 162651
loss: 1.0068811178207397,grad_norm: 0.9420863761129525, iteration: 162652
loss: 1.0108413696289062,grad_norm: 0.9999991753838553, iteration: 162653
loss: 1.011948823928833,grad_norm: 0.9999991487399751, iteration: 162654
loss: 1.055792212486267,grad_norm: 0.9999996825441267, iteration: 162655
loss: 1.0043072700500488,grad_norm: 0.9555285327717322, iteration: 162656
loss: 1.0082789659500122,grad_norm: 0.9012322696457485, iteration: 162657
loss: 0.9969337582588196,grad_norm: 0.9999989317211402, iteration: 162658
loss: 1.0357221364974976,grad_norm: 0.9999991078619179, iteration: 162659
loss: 0.993884801864624,grad_norm: 0.853784250039175, iteration: 162660
loss: 0.9784311056137085,grad_norm: 0.9999990579525514, iteration: 162661
loss: 0.9820656776428223,grad_norm: 0.9992675356917997, iteration: 162662
loss: 1.020617961883545,grad_norm: 0.9999992581423831, iteration: 162663
loss: 1.000204086303711,grad_norm: 0.9458881490933937, iteration: 162664
loss: 1.034227728843689,grad_norm: 0.9388714970780977, iteration: 162665
loss: 0.9980401992797852,grad_norm: 0.9101059915642338, iteration: 162666
loss: 1.0399640798568726,grad_norm: 0.999999231407, iteration: 162667
loss: 1.0571403503417969,grad_norm: 0.9999990846910158, iteration: 162668
loss: 1.0151536464691162,grad_norm: 0.9999994308443609, iteration: 162669
loss: 1.0428341627120972,grad_norm: 0.959610232219176, iteration: 162670
loss: 1.0125348567962646,grad_norm: 0.9999990326389538, iteration: 162671
loss: 1.0194611549377441,grad_norm: 0.8054090525073434, iteration: 162672
loss: 0.9672027230262756,grad_norm: 0.9555227350906503, iteration: 162673
loss: 0.9880667328834534,grad_norm: 0.9999991006531835, iteration: 162674
loss: 0.9923878312110901,grad_norm: 0.9693149650401763, iteration: 162675
loss: 0.9973753094673157,grad_norm: 0.9371040282342014, iteration: 162676
loss: 0.9974466562271118,grad_norm: 0.9999991904111337, iteration: 162677
loss: 0.994681179523468,grad_norm: 0.8928408855847086, iteration: 162678
loss: 0.9743082523345947,grad_norm: 0.838303008799982, iteration: 162679
loss: 1.0145151615142822,grad_norm: 0.9539445860191609, iteration: 162680
loss: 0.9782857298851013,grad_norm: 0.8612200266090149, iteration: 162681
loss: 0.9904244542121887,grad_norm: 0.884994597394352, iteration: 162682
loss: 0.9804054498672485,grad_norm: 0.9999989878850407, iteration: 162683
loss: 1.0030977725982666,grad_norm: 0.9254518947052824, iteration: 162684
loss: 1.0020716190338135,grad_norm: 0.9305087696033906, iteration: 162685
loss: 0.9781953692436218,grad_norm: 0.9496163882054001, iteration: 162686
loss: 0.9866178631782532,grad_norm: 0.9999991112585609, iteration: 162687
loss: 1.021375298500061,grad_norm: 0.9999997051955537, iteration: 162688
loss: 1.0749621391296387,grad_norm: 0.9999999112705167, iteration: 162689
loss: 0.970104992389679,grad_norm: 0.9999990861311651, iteration: 162690
loss: 0.9965910315513611,grad_norm: 0.9999990813081366, iteration: 162691
loss: 0.9802724719047546,grad_norm: 0.9999990202862284, iteration: 162692
loss: 0.9972925186157227,grad_norm: 0.9016864669476246, iteration: 162693
loss: 1.011847734451294,grad_norm: 0.9999992413496809, iteration: 162694
loss: 0.9521370530128479,grad_norm: 0.9447553248702636, iteration: 162695
loss: 0.9965692758560181,grad_norm: 0.9077626626880707, iteration: 162696
loss: 0.9924436807632446,grad_norm: 0.9999992115959517, iteration: 162697
loss: 0.9692955613136292,grad_norm: 0.7915746268885675, iteration: 162698
loss: 0.9968505501747131,grad_norm: 0.9999989538632597, iteration: 162699
loss: 1.0258558988571167,grad_norm: 0.8770760450955157, iteration: 162700
loss: 1.0199514627456665,grad_norm: 0.9999990633075044, iteration: 162701
loss: 0.9785350561141968,grad_norm: 0.9999992153089788, iteration: 162702
loss: 0.9851190447807312,grad_norm: 0.999999179568433, iteration: 162703
loss: 0.9948208332061768,grad_norm: 0.9999992265629196, iteration: 162704
loss: 1.175044298171997,grad_norm: 0.999999165057699, iteration: 162705
loss: 1.0057350397109985,grad_norm: 0.9446731507785034, iteration: 162706
loss: 0.9508108496665955,grad_norm: 0.9999990940949635, iteration: 162707
loss: 1.003897786140442,grad_norm: 0.8904140733204077, iteration: 162708
loss: 1.0199545621871948,grad_norm: 0.9837149457662876, iteration: 162709
loss: 1.0222108364105225,grad_norm: 0.9999997449246296, iteration: 162710
loss: 1.0229941606521606,grad_norm: 0.9999992595382278, iteration: 162711
loss: 0.9902486801147461,grad_norm: 0.999999066674139, iteration: 162712
loss: 0.9970486760139465,grad_norm: 0.9491257520140713, iteration: 162713
loss: 1.118637204170227,grad_norm: 0.9999992422316639, iteration: 162714
loss: 0.981361985206604,grad_norm: 0.9999996617797541, iteration: 162715
loss: 1.0382294654846191,grad_norm: 0.903041377535691, iteration: 162716
loss: 1.0171570777893066,grad_norm: 0.9999991687075879, iteration: 162717
loss: 1.0079084634780884,grad_norm: 0.9999990401910762, iteration: 162718
loss: 0.9952774047851562,grad_norm: 0.9999992180176314, iteration: 162719
loss: 0.9976444244384766,grad_norm: 0.9213143993203013, iteration: 162720
loss: 1.1020898818969727,grad_norm: 0.9999997186786989, iteration: 162721
loss: 0.9847190380096436,grad_norm: 0.9999999008381378, iteration: 162722
loss: 1.0449315309524536,grad_norm: 0.9999990955796543, iteration: 162723
loss: 1.034151315689087,grad_norm: 0.8916689306325248, iteration: 162724
loss: 0.9604266285896301,grad_norm: 0.9999991395542208, iteration: 162725
loss: 0.9697864055633545,grad_norm: 0.9999989865008703, iteration: 162726
loss: 1.0024256706237793,grad_norm: 0.9999992005956064, iteration: 162727
loss: 1.0174124240875244,grad_norm: 0.8414232506876386, iteration: 162728
loss: 0.9743407964706421,grad_norm: 0.9999991322818552, iteration: 162729
loss: 0.9746628403663635,grad_norm: 0.9931563252418605, iteration: 162730
loss: 1.0112653970718384,grad_norm: 0.9999991511078737, iteration: 162731
loss: 1.0360374450683594,grad_norm: 0.9999994510000874, iteration: 162732
loss: 0.9957262873649597,grad_norm: 0.9906917653598666, iteration: 162733
loss: 1.0148792266845703,grad_norm: 0.9999992206885345, iteration: 162734
loss: 0.9932873845100403,grad_norm: 0.9698175383554942, iteration: 162735
loss: 1.0058262348175049,grad_norm: 0.989744768829847, iteration: 162736
loss: 0.9777292013168335,grad_norm: 0.9999998128351886, iteration: 162737
loss: 1.0192031860351562,grad_norm: 0.8821152677464799, iteration: 162738
loss: 0.9624181389808655,grad_norm: 0.9999990002105611, iteration: 162739
loss: 0.9846445322036743,grad_norm: 0.7956592997822223, iteration: 162740
loss: 0.9975649118423462,grad_norm: 0.9714742627301228, iteration: 162741
loss: 1.0113252401351929,grad_norm: 0.9999992302282549, iteration: 162742
loss: 1.0118467807769775,grad_norm: 0.9999991492402945, iteration: 162743
loss: 1.0072555541992188,grad_norm: 0.9999989955683488, iteration: 162744
loss: 1.0006965398788452,grad_norm: 0.999999459947681, iteration: 162745
loss: 1.028527021408081,grad_norm: 0.8124578853309625, iteration: 162746
loss: 0.9925644993782043,grad_norm: 0.9999991057816822, iteration: 162747
loss: 0.9862322211265564,grad_norm: 0.9999992006790022, iteration: 162748
loss: 0.9925603866577148,grad_norm: 0.9999990399511508, iteration: 162749
loss: 1.0268670320510864,grad_norm: 0.9342166767647198, iteration: 162750
loss: 0.9979197978973389,grad_norm: 0.8621284461866177, iteration: 162751
loss: 1.0124061107635498,grad_norm: 0.9999992380483776, iteration: 162752
loss: 1.0014667510986328,grad_norm: 0.9999989690530763, iteration: 162753
loss: 0.957002580165863,grad_norm: 0.8469538783182461, iteration: 162754
loss: 1.0288925170898438,grad_norm: 0.9683203005539954, iteration: 162755
loss: 0.9728002548217773,grad_norm: 0.9634835369295329, iteration: 162756
loss: 1.0538538694381714,grad_norm: 0.9999991017147325, iteration: 162757
loss: 0.9677392244338989,grad_norm: 0.9999991192882131, iteration: 162758
loss: 1.0187115669250488,grad_norm: 0.9999991365002466, iteration: 162759
loss: 1.0117731094360352,grad_norm: 0.9999994084668641, iteration: 162760
loss: 0.9468967318534851,grad_norm: 0.925978196948082, iteration: 162761
loss: 1.0070058107376099,grad_norm: 0.9999993632941325, iteration: 162762
loss: 1.0425961017608643,grad_norm: 0.9999992107891664, iteration: 162763
loss: 1.0311765670776367,grad_norm: 0.9550188698612083, iteration: 162764
loss: 0.9941281080245972,grad_norm: 0.9999989853876514, iteration: 162765
loss: 0.987658679485321,grad_norm: 0.9999988691298024, iteration: 162766
loss: 1.0346989631652832,grad_norm: 0.9999990630352478, iteration: 162767
loss: 1.0027270317077637,grad_norm: 0.9912027939522838, iteration: 162768
loss: 0.9936280846595764,grad_norm: 0.9999992321724719, iteration: 162769
loss: 0.9815017580986023,grad_norm: 0.9999992579646235, iteration: 162770
loss: 0.9861075282096863,grad_norm: 0.9404548133687209, iteration: 162771
loss: 1.0091930627822876,grad_norm: 0.8986719629408512, iteration: 162772
loss: 0.9766275882720947,grad_norm: 0.9999991150595152, iteration: 162773
loss: 1.041433334350586,grad_norm: 0.9999992852472803, iteration: 162774
loss: 0.9621044993400574,grad_norm: 0.999999021247226, iteration: 162775
loss: 0.9801566004753113,grad_norm: 0.9999996696632889, iteration: 162776
loss: 0.9932812452316284,grad_norm: 0.8514185472983877, iteration: 162777
loss: 1.0170464515686035,grad_norm: 0.9999996343948473, iteration: 162778
loss: 0.9595425724983215,grad_norm: 0.9999990996315135, iteration: 162779
loss: 0.9758561253547668,grad_norm: 0.9426158461986016, iteration: 162780
loss: 0.9903934001922607,grad_norm: 0.9967417928591997, iteration: 162781
loss: 0.9830345511436462,grad_norm: 0.9999990654654118, iteration: 162782
loss: 1.0280382633209229,grad_norm: 0.9191772223692362, iteration: 162783
loss: 0.993889331817627,grad_norm: 0.9999991024163947, iteration: 162784
loss: 1.035353660583496,grad_norm: 0.9229896709287482, iteration: 162785
loss: 0.9890204071998596,grad_norm: 0.9999991494841973, iteration: 162786
loss: 0.9932273626327515,grad_norm: 0.8535405690707352, iteration: 162787
loss: 0.9764900207519531,grad_norm: 0.9590919357521864, iteration: 162788
loss: 1.0289729833602905,grad_norm: 0.9999990882423699, iteration: 162789
loss: 1.152999997138977,grad_norm: 0.999999389897046, iteration: 162790
loss: 1.0194752216339111,grad_norm: 0.9999990545201107, iteration: 162791
loss: 0.9823042750358582,grad_norm: 0.9080691148280385, iteration: 162792
loss: 1.0748870372772217,grad_norm: 0.9999991299283629, iteration: 162793
loss: 0.9859753251075745,grad_norm: 0.9999991188048244, iteration: 162794
loss: 1.0457465648651123,grad_norm: 0.9999991235294498, iteration: 162795
loss: 1.0084651708602905,grad_norm: 0.9616807417739853, iteration: 162796
loss: 0.9950869083404541,grad_norm: 0.886277603824362, iteration: 162797
loss: 1.0126328468322754,grad_norm: 0.9999991674234554, iteration: 162798
loss: 1.0249519348144531,grad_norm: 0.9171092983065025, iteration: 162799
loss: 0.9847571849822998,grad_norm: 0.9999992852700434, iteration: 162800
loss: 0.9950152635574341,grad_norm: 0.9585937387861628, iteration: 162801
loss: 1.0047272443771362,grad_norm: 0.9999993269830109, iteration: 162802
loss: 1.025359034538269,grad_norm: 0.999999288102622, iteration: 162803
loss: 1.0092133283615112,grad_norm: 0.9999991485818247, iteration: 162804
loss: 1.0025907754898071,grad_norm: 0.9999991427112546, iteration: 162805
loss: 0.9835222363471985,grad_norm: 0.9067355305128291, iteration: 162806
loss: 1.0237910747528076,grad_norm: 0.8428894668330448, iteration: 162807
loss: 0.9820656180381775,grad_norm: 0.9796636398414339, iteration: 162808
loss: 0.9961507320404053,grad_norm: 0.9999991629725227, iteration: 162809
loss: 1.0113168954849243,grad_norm: 0.9999991663815424, iteration: 162810
loss: 1.065171718597412,grad_norm: 0.99999912084297, iteration: 162811
loss: 1.011269211769104,grad_norm: 0.9131397853483412, iteration: 162812
loss: 1.0059847831726074,grad_norm: 0.9999992746518961, iteration: 162813
loss: 0.9755842089653015,grad_norm: 0.9999997567919331, iteration: 162814
loss: 1.013806700706482,grad_norm: 0.9594416036101654, iteration: 162815
loss: 1.0232417583465576,grad_norm: 0.9999991516372922, iteration: 162816
loss: 1.0262980461120605,grad_norm: 0.9877827219599923, iteration: 162817
loss: 1.0176987648010254,grad_norm: 0.8997476779173205, iteration: 162818
loss: 1.0360170602798462,grad_norm: 0.9769627629138251, iteration: 162819
loss: 1.022782325744629,grad_norm: 0.9999990631750052, iteration: 162820
loss: 0.9937785863876343,grad_norm: 0.898389740875418, iteration: 162821
loss: 0.9642921090126038,grad_norm: 0.9999991581644508, iteration: 162822
loss: 1.0235729217529297,grad_norm: 0.9507457051630169, iteration: 162823
loss: 0.9840435981750488,grad_norm: 0.8955250724933825, iteration: 162824
loss: 1.0247880220413208,grad_norm: 0.9999989897094596, iteration: 162825
loss: 1.0028502941131592,grad_norm: 0.9999990368277554, iteration: 162826
loss: 0.9886764883995056,grad_norm: 0.9631941383804679, iteration: 162827
loss: 0.9955555200576782,grad_norm: 0.9999990729401455, iteration: 162828
loss: 1.016233205795288,grad_norm: 0.9999991746861715, iteration: 162829
loss: 1.0322927236557007,grad_norm: 0.9551426385615098, iteration: 162830
loss: 0.9458803534507751,grad_norm: 0.8774372403278963, iteration: 162831
loss: 0.9845632314682007,grad_norm: 0.9602528281995925, iteration: 162832
loss: 1.0280627012252808,grad_norm: 0.9999994870773841, iteration: 162833
loss: 0.9992015361785889,grad_norm: 0.9251080846386857, iteration: 162834
loss: 1.0055642127990723,grad_norm: 0.9246299744434043, iteration: 162835
loss: 0.9954311847686768,grad_norm: 0.999999192728376, iteration: 162836
loss: 1.012489676475525,grad_norm: 0.9999990273711991, iteration: 162837
loss: 1.0041990280151367,grad_norm: 0.9999989990268651, iteration: 162838
loss: 0.9945285320281982,grad_norm: 0.9611628221076252, iteration: 162839
loss: 0.9604119658470154,grad_norm: 0.9999989518506802, iteration: 162840
loss: 1.0269373655319214,grad_norm: 0.9928253221365516, iteration: 162841
loss: 0.9830147624015808,grad_norm: 0.999999230519105, iteration: 162842
loss: 0.9783080220222473,grad_norm: 0.8783875601471965, iteration: 162843
loss: 0.9847267866134644,grad_norm: 0.9829440679284895, iteration: 162844
loss: 0.9822361469268799,grad_norm: 0.8995840330793226, iteration: 162845
loss: 1.013391137123108,grad_norm: 0.9369104843927762, iteration: 162846
loss: 1.0314666032791138,grad_norm: 0.9999991653728132, iteration: 162847
loss: 1.0235880613327026,grad_norm: 0.9169852362521053, iteration: 162848
loss: 1.001819133758545,grad_norm: 0.9999989466008907, iteration: 162849
loss: 0.9904083609580994,grad_norm: 0.9999990075504009, iteration: 162850
loss: 0.9980536699295044,grad_norm: 0.9629434555088211, iteration: 162851
loss: 0.994192898273468,grad_norm: 0.9999990446141509, iteration: 162852
loss: 1.0368106365203857,grad_norm: 0.8543574503896251, iteration: 162853
loss: 0.9887924194335938,grad_norm: 0.9999992110117095, iteration: 162854
loss: 0.9902137517929077,grad_norm: 0.995612166910726, iteration: 162855
loss: 1.0165743827819824,grad_norm: 0.9999990579575446, iteration: 162856
loss: 1.0064939260482788,grad_norm: 0.9999990478693399, iteration: 162857
loss: 1.0273438692092896,grad_norm: 0.9999992163305867, iteration: 162858
loss: 0.9879248738288879,grad_norm: 0.9357496500063868, iteration: 162859
loss: 0.9882574081420898,grad_norm: 0.8911072793573181, iteration: 162860
loss: 1.0899529457092285,grad_norm: 0.9999992252488087, iteration: 162861
loss: 0.9985028505325317,grad_norm: 0.850553630860345, iteration: 162862
loss: 0.9859894514083862,grad_norm: 0.955752415086163, iteration: 162863
loss: 0.9817905426025391,grad_norm: 0.9999988974290356, iteration: 162864
loss: 1.0271533727645874,grad_norm: 0.9999994470724584, iteration: 162865
loss: 1.0676214694976807,grad_norm: 0.9999991227476892, iteration: 162866
loss: 1.0049670934677124,grad_norm: 0.9361830917846066, iteration: 162867
loss: 1.0155731439590454,grad_norm: 0.9999997388959577, iteration: 162868
loss: 1.000293254852295,grad_norm: 0.949687073760016, iteration: 162869
loss: 1.055008888244629,grad_norm: 0.9999992569626194, iteration: 162870
loss: 1.0215840339660645,grad_norm: 0.9999992865946727, iteration: 162871
loss: 0.9656322002410889,grad_norm: 0.9999990946326714, iteration: 162872
loss: 1.11566960811615,grad_norm: 1.000000025883176, iteration: 162873
loss: 1.0081208944320679,grad_norm: 0.9999991381377076, iteration: 162874
loss: 1.0180755853652954,grad_norm: 0.9999989361295752, iteration: 162875
loss: 1.7198489904403687,grad_norm: 0.9999997786820125, iteration: 162876
loss: 1.0081998109817505,grad_norm: 0.9999989224070918, iteration: 162877
loss: 1.1500617265701294,grad_norm: 0.9995713421941224, iteration: 162878
loss: 1.0090968608856201,grad_norm: 0.9041371564894656, iteration: 162879
loss: 1.230310320854187,grad_norm: 0.9999996888878335, iteration: 162880
loss: 1.2663012742996216,grad_norm: 0.9999989263443605, iteration: 162881
loss: 1.3225425481796265,grad_norm: 0.9999996024299033, iteration: 162882
loss: 1.1442134380340576,grad_norm: 0.9999995200498324, iteration: 162883
loss: 1.4073342084884644,grad_norm: 0.9999996782449293, iteration: 162884
loss: 1.183558702468872,grad_norm: 0.9999993838813315, iteration: 162885
loss: 1.484694242477417,grad_norm: 0.9999995968196945, iteration: 162886
loss: 1.1209344863891602,grad_norm: 0.9999994071416297, iteration: 162887
loss: 1.4215048551559448,grad_norm: 0.9999995716057961, iteration: 162888
loss: 1.198634386062622,grad_norm: 0.999999685651485, iteration: 162889
loss: 1.3761824369430542,grad_norm: 0.9999997480188028, iteration: 162890
loss: 1.1925010681152344,grad_norm: 0.9999994906982073, iteration: 162891
loss: 1.2078297138214111,grad_norm: 0.9999994159978066, iteration: 162892
loss: 1.5280853509902954,grad_norm: 0.9999999449813076, iteration: 162893
loss: 1.0178056955337524,grad_norm: 0.9999992319069424, iteration: 162894
loss: 1.1286524534225464,grad_norm: 0.9999993302266048, iteration: 162895
loss: 1.1491214036941528,grad_norm: 0.999999282812354, iteration: 162896
loss: 1.124112606048584,grad_norm: 0.9999994433233587, iteration: 162897
loss: 1.396121859550476,grad_norm: 0.9999998426974779, iteration: 162898
loss: 1.2255040407180786,grad_norm: 0.999999472094651, iteration: 162899
loss: 1.0719341039657593,grad_norm: 0.9999991056930422, iteration: 162900
loss: 1.2544984817504883,grad_norm: 0.9999998230170413, iteration: 162901
loss: 1.212813377380371,grad_norm: 0.9999991415345426, iteration: 162902
loss: 1.1946377754211426,grad_norm: 0.9999996028713315, iteration: 162903
loss: 1.1711825132369995,grad_norm: 0.9999996544036647, iteration: 162904
loss: 1.0678815841674805,grad_norm: 0.9999993940416965, iteration: 162905
loss: 1.0813593864440918,grad_norm: 0.9999991148911102, iteration: 162906
loss: 1.1086536645889282,grad_norm: 0.9999992494752868, iteration: 162907
loss: 1.4108057022094727,grad_norm: 0.9999997549896593, iteration: 162908
loss: 1.3806062936782837,grad_norm: 0.9999995329022235, iteration: 162909
loss: 1.2190146446228027,grad_norm: 0.9999996143518559, iteration: 162910
loss: 1.3700026273727417,grad_norm: 0.9999994978104406, iteration: 162911
loss: 1.0295445919036865,grad_norm: 0.9999998255618018, iteration: 162912
loss: 1.1574848890304565,grad_norm: 0.9999994062350239, iteration: 162913
loss: 1.1026922464370728,grad_norm: 0.9999991186559428, iteration: 162914
loss: 1.0418543815612793,grad_norm: 0.9999992147031866, iteration: 162915
loss: 1.2195438146591187,grad_norm: 0.9999996088511632, iteration: 162916
loss: 1.0939545631408691,grad_norm: 0.9999992171010527, iteration: 162917
loss: 1.1801973581314087,grad_norm: 0.9999995230018582, iteration: 162918
loss: 1.0915424823760986,grad_norm: 0.9999994626879292, iteration: 162919
loss: 1.2321611642837524,grad_norm: 0.9999995023091641, iteration: 162920
loss: 1.083377480506897,grad_norm: 0.9999991364272715, iteration: 162921
loss: 1.1179426908493042,grad_norm: 0.9999998386416176, iteration: 162922
loss: 1.3724278211593628,grad_norm: 0.9999998939805397, iteration: 162923
loss: 1.3563164472579956,grad_norm: 0.9999998226647804, iteration: 162924
loss: 1.0237716436386108,grad_norm: 0.9999992830696678, iteration: 162925
loss: 1.0630239248275757,grad_norm: 0.9999990988686598, iteration: 162926
loss: 1.2660750150680542,grad_norm: 0.9999995450650886, iteration: 162927
loss: 1.067075490951538,grad_norm: 0.9999998415401005, iteration: 162928
loss: 1.556536316871643,grad_norm: 0.9999994987378696, iteration: 162929
loss: 1.2431601285934448,grad_norm: 0.9999998253369257, iteration: 162930
loss: 1.3470189571380615,grad_norm: 0.9999994741169067, iteration: 162931
loss: 1.3565680980682373,grad_norm: 0.9999995401613022, iteration: 162932
loss: 1.1263313293457031,grad_norm: 0.9999990631636493, iteration: 162933
loss: 1.2001889944076538,grad_norm: 0.9999996148444902, iteration: 162934
loss: 1.0716500282287598,grad_norm: 0.9999991682355464, iteration: 162935
loss: 1.5471645593643188,grad_norm: 0.9999997570545265, iteration: 162936
loss: 1.2557053565979004,grad_norm: 0.9999993695438003, iteration: 162937
loss: 1.1954090595245361,grad_norm: 0.9999993312024833, iteration: 162938
loss: 1.2037822008132935,grad_norm: 0.9999992979058747, iteration: 162939
loss: 1.1852872371673584,grad_norm: 0.9999991502447582, iteration: 162940
loss: 1.5687979459762573,grad_norm: 0.9999997789740882, iteration: 162941
loss: 1.195615291595459,grad_norm: 0.9999997914635258, iteration: 162942
loss: 0.9958310127258301,grad_norm: 0.9999992452743347, iteration: 162943
loss: 1.201151967048645,grad_norm: 0.9999997140264593, iteration: 162944
loss: 1.4109305143356323,grad_norm: 1.0000000973163383, iteration: 162945
loss: 1.2680944204330444,grad_norm: 0.9999994673124142, iteration: 162946
loss: 1.1383707523345947,grad_norm: 0.9999994806602864, iteration: 162947
loss: 1.6172765493392944,grad_norm: 0.9999995071806568, iteration: 162948
loss: 1.1724894046783447,grad_norm: 0.9999992349125345, iteration: 162949
loss: 1.1972744464874268,grad_norm: 0.9999995703443518, iteration: 162950
loss: 1.5163036584854126,grad_norm: 0.9999995319197271, iteration: 162951
loss: 1.0750781297683716,grad_norm: 0.9999999003373683, iteration: 162952
loss: 1.4683805704116821,grad_norm: 0.9999999996562862, iteration: 162953
loss: 1.3685553073883057,grad_norm: 0.9999996212706195, iteration: 162954
loss: 1.1810076236724854,grad_norm: 0.9999998884144423, iteration: 162955
loss: 1.1824193000793457,grad_norm: 0.9999999498287072, iteration: 162956
loss: 1.5228201150894165,grad_norm: 0.9999996849453422, iteration: 162957
loss: 1.0821819305419922,grad_norm: 0.9999999537077204, iteration: 162958
loss: 1.4216300249099731,grad_norm: 0.9999995965265449, iteration: 162959
loss: 1.237310528755188,grad_norm: 0.9999994345603455, iteration: 162960
loss: 1.2649539709091187,grad_norm: 0.999999773681364, iteration: 162961
loss: 1.0290485620498657,grad_norm: 0.9999992423809045, iteration: 162962
loss: 1.171302318572998,grad_norm: 0.9999992090442241, iteration: 162963
loss: 1.205793023109436,grad_norm: 0.9999997314853158, iteration: 162964
loss: 1.2364881038665771,grad_norm: 0.9999995166086851, iteration: 162965
loss: 1.3225164413452148,grad_norm: 0.9999999313924476, iteration: 162966
loss: 1.3032101392745972,grad_norm: 0.9999994033494146, iteration: 162967
loss: 1.5339773893356323,grad_norm: 0.999999937690629, iteration: 162968
loss: 1.4519013166427612,grad_norm: 0.9999994063280311, iteration: 162969
loss: 1.301560640335083,grad_norm: 0.9999996383865707, iteration: 162970
loss: 1.2238696813583374,grad_norm: 0.9999996051724461, iteration: 162971
loss: 1.309157371520996,grad_norm: 0.9999999448747985, iteration: 162972
loss: 1.1718930006027222,grad_norm: 0.9999995422685087, iteration: 162973
loss: 1.1803913116455078,grad_norm: 0.9999999340190686, iteration: 162974
loss: 1.049385905265808,grad_norm: 1.0000000483299052, iteration: 162975
loss: 1.3435477018356323,grad_norm: 0.9999997756770145, iteration: 162976
loss: 1.210915207862854,grad_norm: 0.9999997036605959, iteration: 162977
loss: 1.2631086111068726,grad_norm: 0.9999997575951157, iteration: 162978
loss: 1.2564328908920288,grad_norm: 0.9999999241390248, iteration: 162979
loss: 1.2792671918869019,grad_norm: 0.9999995246417952, iteration: 162980
loss: 1.3117505311965942,grad_norm: 0.9999997720934986, iteration: 162981
loss: 1.3165339231491089,grad_norm: 0.9999998632081993, iteration: 162982
loss: 1.167244553565979,grad_norm: 0.9999997522867891, iteration: 162983
loss: 1.3576065301895142,grad_norm: 0.9999995832262022, iteration: 162984
loss: 1.2300478219985962,grad_norm: 0.9999996682587373, iteration: 162985
loss: 1.390794277191162,grad_norm: 0.9999995910574752, iteration: 162986
loss: 1.227174997329712,grad_norm: 0.9999998103979973, iteration: 162987
loss: 1.1044180393218994,grad_norm: 0.999999699227189, iteration: 162988
loss: 1.225366473197937,grad_norm: 0.9999993891536564, iteration: 162989
loss: 1.0648910999298096,grad_norm: 0.9999993414404469, iteration: 162990
loss: 1.421356201171875,grad_norm: 0.999999622157471, iteration: 162991
loss: 1.1353797912597656,grad_norm: 0.9999994933400838, iteration: 162992
loss: 1.19268000125885,grad_norm: 0.9999993255916078, iteration: 162993
loss: 1.252172589302063,grad_norm: 0.9999996009221813, iteration: 162994
loss: 1.03385591506958,grad_norm: 0.9999993426517486, iteration: 162995
loss: 1.254263997077942,grad_norm: 0.9999999269869801, iteration: 162996
loss: 1.1503571271896362,grad_norm: 0.9999993987212079, iteration: 162997
loss: 1.1845569610595703,grad_norm: 0.9999994686302485, iteration: 162998
loss: 1.1193667650222778,grad_norm: 0.9999993065403602, iteration: 162999
loss: 1.3014134168624878,grad_norm: 0.999999568163025, iteration: 163000
loss: 1.2211430072784424,grad_norm: 0.9999991520224913, iteration: 163001
loss: 1.2251691818237305,grad_norm: 1.0000000300380443, iteration: 163002
loss: 1.0735585689544678,grad_norm: 0.9999992934206371, iteration: 163003
loss: 1.0475175380706787,grad_norm: 0.9999995188989365, iteration: 163004
loss: 1.0882673263549805,grad_norm: 0.9999991523104178, iteration: 163005
loss: 1.0364959239959717,grad_norm: 0.9999995094450931, iteration: 163006
loss: 1.2038042545318604,grad_norm: 0.9999994949027655, iteration: 163007
loss: 1.0251215696334839,grad_norm: 0.9999997292717873, iteration: 163008
loss: 1.052855372428894,grad_norm: 0.9999991440216686, iteration: 163009
loss: 1.0584791898727417,grad_norm: 1.0000000589202875, iteration: 163010
loss: 1.0293612480163574,grad_norm: 0.9999995928974211, iteration: 163011
loss: 1.040424108505249,grad_norm: 0.9999990119276567, iteration: 163012
loss: 1.0061651468276978,grad_norm: 0.9999997262453036, iteration: 163013
loss: 1.0232996940612793,grad_norm: 0.9999990079814606, iteration: 163014
loss: 1.1026142835617065,grad_norm: 0.9999996958760582, iteration: 163015
loss: 1.2990902662277222,grad_norm: 0.9999994766737962, iteration: 163016
loss: 1.0404690504074097,grad_norm: 0.9999997316774496, iteration: 163017
loss: 1.3106629848480225,grad_norm: 0.9999996834311564, iteration: 163018
loss: 1.1878005266189575,grad_norm: 1.0000000933028492, iteration: 163019
loss: 1.1195460557937622,grad_norm: 0.9999992364153204, iteration: 163020
loss: 1.6498574018478394,grad_norm: 0.9999999411640804, iteration: 163021
loss: 1.3612407445907593,grad_norm: 0.9999998813219168, iteration: 163022
loss: 1.1305867433547974,grad_norm: 0.9999997313521181, iteration: 163023
loss: 1.164045810699463,grad_norm: 0.9999993385974014, iteration: 163024
loss: 1.138308048248291,grad_norm: 0.9999998177044976, iteration: 163025
loss: 1.45894193649292,grad_norm: 0.9999998638032873, iteration: 163026
loss: 1.3155882358551025,grad_norm: 0.9999999017346783, iteration: 163027
loss: 1.043306827545166,grad_norm: 0.9999995336775067, iteration: 163028
loss: 1.083522081375122,grad_norm: 0.9999991061884386, iteration: 163029
loss: 1.105202555656433,grad_norm: 0.9999998534262955, iteration: 163030
loss: 1.13592529296875,grad_norm: 0.9999993945915417, iteration: 163031
loss: 1.13482666015625,grad_norm: 0.9999992959740702, iteration: 163032
loss: 0.9805042147636414,grad_norm: 0.9999992976528738, iteration: 163033
loss: 1.1133852005004883,grad_norm: 0.9999994883386314, iteration: 163034
loss: 1.18211829662323,grad_norm: 0.9999997340920765, iteration: 163035
loss: 1.0089876651763916,grad_norm: 0.9999991923047772, iteration: 163036
loss: 1.0699666738510132,grad_norm: 0.9999997741776142, iteration: 163037
loss: 1.1109896898269653,grad_norm: 0.9999993912850861, iteration: 163038
loss: 1.065775752067566,grad_norm: 0.9999993284806735, iteration: 163039
loss: 1.2724488973617554,grad_norm: 0.9999991382747092, iteration: 163040
loss: 0.9816614389419556,grad_norm: 0.9811543834225, iteration: 163041
loss: 0.9775249361991882,grad_norm: 0.9999991955536008, iteration: 163042
loss: 0.9976213574409485,grad_norm: 0.8974327019668504, iteration: 163043
loss: 1.0020524263381958,grad_norm: 0.9999991052901723, iteration: 163044
loss: 0.9841498136520386,grad_norm: 0.9999989343573042, iteration: 163045
loss: 1.025408148765564,grad_norm: 0.9999991583700394, iteration: 163046
loss: 1.0752573013305664,grad_norm: 0.999999218627593, iteration: 163047
loss: 1.0003013610839844,grad_norm: 0.9473231935803441, iteration: 163048
loss: 0.9745659232139587,grad_norm: 0.9999995096047357, iteration: 163049
loss: 1.001602292060852,grad_norm: 0.9999990931597079, iteration: 163050
loss: 0.9842618703842163,grad_norm: 0.9836572777135014, iteration: 163051
loss: 1.0581152439117432,grad_norm: 0.9999999041367317, iteration: 163052
loss: 0.987198531627655,grad_norm: 0.9771029769685223, iteration: 163053
loss: 1.0149521827697754,grad_norm: 0.9999996535253817, iteration: 163054
loss: 1.0705745220184326,grad_norm: 0.9999996619295759, iteration: 163055
loss: 1.0041224956512451,grad_norm: 0.9999995358252517, iteration: 163056
loss: 0.9874350428581238,grad_norm: 0.890348639519163, iteration: 163057
loss: 1.0065330266952515,grad_norm: 0.999999182801334, iteration: 163058
loss: 1.0715410709381104,grad_norm: 0.999999927009626, iteration: 163059
loss: 1.0090668201446533,grad_norm: 0.9289865666262607, iteration: 163060
loss: 0.9744100570678711,grad_norm: 0.9473671505828513, iteration: 163061
loss: 1.0365201234817505,grad_norm: 0.9999991028184092, iteration: 163062
loss: 1.0966545343399048,grad_norm: 0.9999991163301902, iteration: 163063
loss: 1.019834280014038,grad_norm: 0.9999992757624903, iteration: 163064
loss: 1.1280955076217651,grad_norm: 0.9999991008188627, iteration: 163065
loss: 1.0017259120941162,grad_norm: 0.9839853474909083, iteration: 163066
loss: 1.0118250846862793,grad_norm: 0.9875375008518587, iteration: 163067
loss: 1.0459892749786377,grad_norm: 0.9999992909895125, iteration: 163068
loss: 1.0264736413955688,grad_norm: 0.9999991624481221, iteration: 163069
loss: 1.028728723526001,grad_norm: 0.9031121559044234, iteration: 163070
loss: 0.9646545052528381,grad_norm: 0.9999992824994874, iteration: 163071
loss: 0.9897199273109436,grad_norm: 0.9734050590100017, iteration: 163072
loss: 0.9850888252258301,grad_norm: 0.9080790143876293, iteration: 163073
loss: 1.018004298210144,grad_norm: 0.9110605056164433, iteration: 163074
loss: 1.006644368171692,grad_norm: 0.9999991825138241, iteration: 163075
loss: 0.9861662983894348,grad_norm: 0.9999990633326136, iteration: 163076
loss: 1.0443085432052612,grad_norm: 0.9508155887068741, iteration: 163077
loss: 1.0116338729858398,grad_norm: 0.9999994137367417, iteration: 163078
loss: 1.1030421257019043,grad_norm: 0.9999999388880566, iteration: 163079
loss: 0.983433187007904,grad_norm: 0.9999994858469842, iteration: 163080
loss: 1.1312049627304077,grad_norm: 0.999999165104369, iteration: 163081
loss: 0.9635761380195618,grad_norm: 0.9999991583734734, iteration: 163082
loss: 1.0472121238708496,grad_norm: 0.9999999000386222, iteration: 163083
loss: 1.124655842781067,grad_norm: 0.9999992960193806, iteration: 163084
loss: 0.999696671962738,grad_norm: 0.8548001679643065, iteration: 163085
loss: 1.0931631326675415,grad_norm: 0.9999991585246679, iteration: 163086
loss: 0.9737685322761536,grad_norm: 0.8997975873437156, iteration: 163087
loss: 1.1109622716903687,grad_norm: 0.9999996597745291, iteration: 163088
loss: 1.0573482513427734,grad_norm: 0.9999995391405648, iteration: 163089
loss: 1.049386739730835,grad_norm: 0.9999994370766006, iteration: 163090
loss: 0.9746814966201782,grad_norm: 0.9999991020490282, iteration: 163091
loss: 1.0348985195159912,grad_norm: 0.9999991141499625, iteration: 163092
loss: 0.9915597438812256,grad_norm: 0.9999991883250651, iteration: 163093
loss: 0.9854095578193665,grad_norm: 0.9999990376963368, iteration: 163094
loss: 1.0044150352478027,grad_norm: 0.999999012855261, iteration: 163095
loss: 0.9936309456825256,grad_norm: 0.9961124769721476, iteration: 163096
loss: 0.9877004027366638,grad_norm: 0.9999998801796746, iteration: 163097
loss: 0.9954668283462524,grad_norm: 0.935339538134484, iteration: 163098
loss: 0.9708540439605713,grad_norm: 0.975978724842548, iteration: 163099
loss: 1.0066758394241333,grad_norm: 0.9029750935343671, iteration: 163100
loss: 1.0037875175476074,grad_norm: 0.9339117440204036, iteration: 163101
loss: 1.0207675695419312,grad_norm: 0.9999990402761842, iteration: 163102
loss: 0.9928434491157532,grad_norm: 0.9999992586914259, iteration: 163103
loss: 1.0207812786102295,grad_norm: 0.9999993454489036, iteration: 163104
loss: 0.971550464630127,grad_norm: 0.9999992899925564, iteration: 163105
loss: 1.0415611267089844,grad_norm: 0.9999992183817648, iteration: 163106
loss: 0.9968715906143188,grad_norm: 0.9670056669218409, iteration: 163107
loss: 1.1553025245666504,grad_norm: 0.9999990345184379, iteration: 163108
loss: 1.0206174850463867,grad_norm: 0.9999991004188619, iteration: 163109
loss: 1.0184403657913208,grad_norm: 0.9724141513743843, iteration: 163110
loss: 1.0150474309921265,grad_norm: 0.9999990672077312, iteration: 163111
loss: 1.0022367238998413,grad_norm: 0.9999990823058698, iteration: 163112
loss: 1.0031108856201172,grad_norm: 0.9999992082870434, iteration: 163113
loss: 1.0060803890228271,grad_norm: 0.7541253005983803, iteration: 163114
loss: 0.966621994972229,grad_norm: 0.9999991690946878, iteration: 163115
loss: 1.1315383911132812,grad_norm: 0.9999999487737907, iteration: 163116
loss: 0.9585659503936768,grad_norm: 0.99999910719334, iteration: 163117
loss: 1.0719616413116455,grad_norm: 0.9999992157327725, iteration: 163118
loss: 1.0005921125411987,grad_norm: 0.9999990640897463, iteration: 163119
loss: 1.0032789707183838,grad_norm: 0.9902599774223542, iteration: 163120
loss: 1.0313717126846313,grad_norm: 0.9999991734964759, iteration: 163121
loss: 1.0791213512420654,grad_norm: 0.9999996336957955, iteration: 163122
loss: 1.044243335723877,grad_norm: 0.9999991725504548, iteration: 163123
loss: 0.9991146326065063,grad_norm: 0.9999998166205872, iteration: 163124
loss: 1.3966450691223145,grad_norm: 0.9999997293038171, iteration: 163125
loss: 1.1652569770812988,grad_norm: 0.9999990911826574, iteration: 163126
loss: 1.001928687095642,grad_norm: 0.9999992407786356, iteration: 163127
loss: 1.0706119537353516,grad_norm: 0.965317977349104, iteration: 163128
loss: 1.0054136514663696,grad_norm: 0.9999991573565585, iteration: 163129
loss: 1.2059953212738037,grad_norm: 0.9999994782018018, iteration: 163130
loss: 0.9826134443283081,grad_norm: 0.9451639802546089, iteration: 163131
loss: 1.0570619106292725,grad_norm: 0.9999990891193177, iteration: 163132
loss: 1.012314796447754,grad_norm: 0.9999989702800982, iteration: 163133
loss: 1.053847074508667,grad_norm: 0.9999991665509684, iteration: 163134
loss: 1.0135356187820435,grad_norm: 0.9999991359838482, iteration: 163135
loss: 0.9776458740234375,grad_norm: 0.9496731331891393, iteration: 163136
loss: 1.0021332502365112,grad_norm: 0.8437493696837399, iteration: 163137
loss: 1.0187734365463257,grad_norm: 0.999999045544912, iteration: 163138
loss: 1.041817307472229,grad_norm: 0.9999989994785558, iteration: 163139
loss: 1.0149611234664917,grad_norm: 0.9610592157925102, iteration: 163140
loss: 0.9859914779663086,grad_norm: 0.966559542750916, iteration: 163141
loss: 1.0123883485794067,grad_norm: 0.8509702820280872, iteration: 163142
loss: 1.031042218208313,grad_norm: 0.9999991955990057, iteration: 163143
loss: 1.085208535194397,grad_norm: 0.9999991184722966, iteration: 163144
loss: 1.015771746635437,grad_norm: 0.9999992942640762, iteration: 163145
loss: 1.0141189098358154,grad_norm: 0.9999991483456055, iteration: 163146
loss: 1.0015757083892822,grad_norm: 0.9999991770164093, iteration: 163147
loss: 1.0319616794586182,grad_norm: 0.9999992605971731, iteration: 163148
loss: 1.2010588645935059,grad_norm: 0.9999993865786548, iteration: 163149
loss: 1.0120209455490112,grad_norm: 0.9999991599438032, iteration: 163150
loss: 1.0174200534820557,grad_norm: 0.9999993427716607, iteration: 163151
loss: 1.0247678756713867,grad_norm: 0.99999917130674, iteration: 163152
loss: 1.0008286237716675,grad_norm: 0.8139242081041925, iteration: 163153
loss: 1.0660607814788818,grad_norm: 0.9999992300681055, iteration: 163154
loss: 0.9884778261184692,grad_norm: 0.9999991624426993, iteration: 163155
loss: 1.0278514623641968,grad_norm: 0.9999990747916753, iteration: 163156
loss: 1.05315363407135,grad_norm: 0.9999998559786368, iteration: 163157
loss: 1.0021666288375854,grad_norm: 0.9999989653845496, iteration: 163158
loss: 1.171168327331543,grad_norm: 0.9999991309094265, iteration: 163159
loss: 1.0010175704956055,grad_norm: 0.979941940336424, iteration: 163160
loss: 0.9912813305854797,grad_norm: 0.9999991061274494, iteration: 163161
loss: 1.1655967235565186,grad_norm: 0.9999992845431313, iteration: 163162
loss: 0.9976449012756348,grad_norm: 0.9999989354614991, iteration: 163163
loss: 1.0072091817855835,grad_norm: 0.904409827768862, iteration: 163164
loss: 0.9978563189506531,grad_norm: 0.9999991099743136, iteration: 163165
loss: 0.9808950424194336,grad_norm: 0.9999991630629629, iteration: 163166
loss: 0.9783862829208374,grad_norm: 0.8726425592686358, iteration: 163167
loss: 1.103627324104309,grad_norm: 0.9999992706833418, iteration: 163168
loss: 0.9862437844276428,grad_norm: 0.9452859046540274, iteration: 163169
loss: 0.994512677192688,grad_norm: 0.8968869175023876, iteration: 163170
loss: 1.037541389465332,grad_norm: 0.9999991862062445, iteration: 163171
loss: 0.9673240184783936,grad_norm: 0.8435485383030301, iteration: 163172
loss: 1.0163850784301758,grad_norm: 0.9999992216057875, iteration: 163173
loss: 0.9929057359695435,grad_norm: 0.9999991001548663, iteration: 163174
loss: 1.0080980062484741,grad_norm: 0.9999995199246338, iteration: 163175
loss: 0.9811981320381165,grad_norm: 0.9054683756970684, iteration: 163176
loss: 1.0445663928985596,grad_norm: 0.9999992440622177, iteration: 163177
loss: 0.979663610458374,grad_norm: 0.9999991987172397, iteration: 163178
loss: 0.9872820973396301,grad_norm: 0.9411536553280341, iteration: 163179
loss: 1.0107089281082153,grad_norm: 0.9999992062597219, iteration: 163180
loss: 1.0146135091781616,grad_norm: 0.9999991518903469, iteration: 163181
loss: 1.0073572397232056,grad_norm: 0.8446360985185614, iteration: 163182
loss: 1.0373494625091553,grad_norm: 0.9999991004976623, iteration: 163183
loss: 1.0575870275497437,grad_norm: 0.9999993570989687, iteration: 163184
loss: 1.041632056236267,grad_norm: 0.9999993557078833, iteration: 163185
loss: 1.0067107677459717,grad_norm: 0.9999990738159343, iteration: 163186
loss: 1.0123188495635986,grad_norm: 0.9588742521330471, iteration: 163187
loss: 1.0207751989364624,grad_norm: 0.9719475903284754, iteration: 163188
loss: 0.9484683275222778,grad_norm: 0.9205871549900827, iteration: 163189
loss: 1.0256998538970947,grad_norm: 0.9999990063799125, iteration: 163190
loss: 1.0622754096984863,grad_norm: 0.9999991819407961, iteration: 163191
loss: 1.0049057006835938,grad_norm: 0.9354580054444728, iteration: 163192
loss: 0.9857281446456909,grad_norm: 0.9999993827322744, iteration: 163193
loss: 0.9623152017593384,grad_norm: 0.9999990767509456, iteration: 163194
loss: 1.0160422325134277,grad_norm: 0.955731210910121, iteration: 163195
loss: 0.9798026084899902,grad_norm: 0.8985715436396723, iteration: 163196
loss: 0.9966554641723633,grad_norm: 0.9999993159330997, iteration: 163197
loss: 0.9983654022216797,grad_norm: 0.9968008385162086, iteration: 163198
loss: 1.030139446258545,grad_norm: 0.9563712740275484, iteration: 163199
loss: 0.9946985244750977,grad_norm: 0.8165773655638408, iteration: 163200
loss: 1.0197292566299438,grad_norm: 0.9999989916949709, iteration: 163201
loss: 0.9905004501342773,grad_norm: 0.9999990343816292, iteration: 163202
loss: 1.0740584135055542,grad_norm: 0.9265308166724697, iteration: 163203
loss: 1.2065224647521973,grad_norm: 0.9999996257287262, iteration: 163204
loss: 1.016541838645935,grad_norm: 0.9518316268176524, iteration: 163205
loss: 1.1386040449142456,grad_norm: 0.9999991880723476, iteration: 163206
loss: 1.0391335487365723,grad_norm: 0.9373821685050382, iteration: 163207
loss: 0.9938188791275024,grad_norm: 0.9999991196495074, iteration: 163208
loss: 1.101345419883728,grad_norm: 0.9999993539140769, iteration: 163209
loss: 0.9906307458877563,grad_norm: 0.9999992066823098, iteration: 163210
loss: 1.0012929439544678,grad_norm: 0.9836572873465259, iteration: 163211
loss: 0.9723119735717773,grad_norm: 0.9999992057674784, iteration: 163212
loss: 1.0100218057632446,grad_norm: 0.8988224927896047, iteration: 163213
loss: 1.0227071046829224,grad_norm: 0.9999992713339683, iteration: 163214
loss: 1.0069854259490967,grad_norm: 0.9999990281958044, iteration: 163215
loss: 0.9869337677955627,grad_norm: 0.8897818624323549, iteration: 163216
loss: 1.064685344696045,grad_norm: 0.9999991575676475, iteration: 163217
loss: 1.020036220550537,grad_norm: 0.966276197038714, iteration: 163218
loss: 1.0091058015823364,grad_norm: 0.9190005617757712, iteration: 163219
loss: 1.0323805809020996,grad_norm: 0.9999993593325958, iteration: 163220
loss: 1.0474965572357178,grad_norm: 0.9999997177151663, iteration: 163221
loss: 1.0486087799072266,grad_norm: 0.9999999436163962, iteration: 163222
loss: 0.9729839563369751,grad_norm: 0.999999022417467, iteration: 163223
loss: 1.0090887546539307,grad_norm: 0.9999991890117965, iteration: 163224
loss: 1.0272555351257324,grad_norm: 0.9999991697071955, iteration: 163225
loss: 1.002423882484436,grad_norm: 0.9083666697794505, iteration: 163226
loss: 0.9903894066810608,grad_norm: 0.9999997498196592, iteration: 163227
loss: 1.00922691822052,grad_norm: 0.9999995648323847, iteration: 163228
loss: 1.0066395998001099,grad_norm: 0.9999990924119724, iteration: 163229
loss: 1.0072747468948364,grad_norm: 0.9999991282563326, iteration: 163230
loss: 0.9674628376960754,grad_norm: 0.881748280181991, iteration: 163231
loss: 1.146016240119934,grad_norm: 0.999999102852434, iteration: 163232
loss: 1.0046076774597168,grad_norm: 0.8869064522361159, iteration: 163233
loss: 1.0005323886871338,grad_norm: 0.9999991054193506, iteration: 163234
loss: 0.990294873714447,grad_norm: 0.9999992919381416, iteration: 163235
loss: 1.04731023311615,grad_norm: 0.9223279191190525, iteration: 163236
loss: 1.0099889039993286,grad_norm: 0.9999998331006495, iteration: 163237
loss: 0.96419358253479,grad_norm: 0.9999990418514583, iteration: 163238
loss: 1.0353537797927856,grad_norm: 0.9999991585095405, iteration: 163239
loss: 0.9786604046821594,grad_norm: 0.8791214378224186, iteration: 163240
loss: 0.996152937412262,grad_norm: 0.926569146419252, iteration: 163241
loss: 1.0356184244155884,grad_norm: 0.9999992556714802, iteration: 163242
loss: 1.005907416343689,grad_norm: 0.9182404748944749, iteration: 163243
loss: 1.0179170370101929,grad_norm: 0.9347017026813814, iteration: 163244
loss: 0.9886409640312195,grad_norm: 0.9999993198281278, iteration: 163245
loss: 1.0178695917129517,grad_norm: 0.8863594235459988, iteration: 163246
loss: 0.9880238175392151,grad_norm: 0.8322371568891836, iteration: 163247
loss: 0.9871276021003723,grad_norm: 0.999998959107044, iteration: 163248
loss: 1.0451172590255737,grad_norm: 0.9999989886798957, iteration: 163249
loss: 0.9930661916732788,grad_norm: 0.8853009690908882, iteration: 163250
loss: 1.0068113803863525,grad_norm: 0.9999992311753758, iteration: 163251
loss: 0.9920915365219116,grad_norm: 0.9250365501817385, iteration: 163252
loss: 0.9744846224784851,grad_norm: 0.9724651708821576, iteration: 163253
loss: 1.143185019493103,grad_norm: 0.9999996200241403, iteration: 163254
loss: 1.0014177560806274,grad_norm: 0.9999990112884604, iteration: 163255
loss: 1.042358160018921,grad_norm: 0.9999991614281939, iteration: 163256
loss: 0.9934942722320557,grad_norm: 0.9659056305876207, iteration: 163257
loss: 0.9679850935935974,grad_norm: 0.9354169776578376, iteration: 163258
loss: 1.0027047395706177,grad_norm: 0.9999992614861172, iteration: 163259
loss: 1.0235404968261719,grad_norm: 0.9999993003924291, iteration: 163260
loss: 1.013606071472168,grad_norm: 0.9999990235093377, iteration: 163261
loss: 0.9253976941108704,grad_norm: 0.9999992767507297, iteration: 163262
loss: 1.0265294313430786,grad_norm: 0.9999996824595012, iteration: 163263
loss: 1.0124433040618896,grad_norm: 0.9999990583565451, iteration: 163264
loss: 1.0060731172561646,grad_norm: 0.8000261382203914, iteration: 163265
loss: 1.0139398574829102,grad_norm: 0.999999090051795, iteration: 163266
loss: 0.9951688051223755,grad_norm: 0.9878004048193023, iteration: 163267
loss: 0.988064706325531,grad_norm: 0.9999992267857202, iteration: 163268
loss: 1.0371577739715576,grad_norm: 0.9999993537459131, iteration: 163269
loss: 0.9865904450416565,grad_norm: 0.9638443668812255, iteration: 163270
loss: 0.9564660787582397,grad_norm: 0.9999990698752883, iteration: 163271
loss: 0.9933561086654663,grad_norm: 0.9299662705987599, iteration: 163272
loss: 1.0293769836425781,grad_norm: 0.8660457174354521, iteration: 163273
loss: 0.9976577758789062,grad_norm: 0.9999998857250375, iteration: 163274
loss: 0.9840734004974365,grad_norm: 0.8337741838042584, iteration: 163275
loss: 1.0279157161712646,grad_norm: 0.999999831194063, iteration: 163276
loss: 0.9863795638084412,grad_norm: 0.999999135419625, iteration: 163277
loss: 1.0159177780151367,grad_norm: 0.8730733543564996, iteration: 163278
loss: 0.9851382374763489,grad_norm: 0.9999989265857472, iteration: 163279
loss: 0.9900773763656616,grad_norm: 0.9999993028825968, iteration: 163280
loss: 1.0112463235855103,grad_norm: 0.999999381192359, iteration: 163281
loss: 1.0154545307159424,grad_norm: 0.9096390866179457, iteration: 163282
loss: 1.0277256965637207,grad_norm: 0.9574886042814903, iteration: 163283
loss: 1.0130653381347656,grad_norm: 0.9245606749847566, iteration: 163284
loss: 1.0724879503250122,grad_norm: 0.9999990607500941, iteration: 163285
loss: 0.9762259125709534,grad_norm: 0.999998982815995, iteration: 163286
loss: 1.0648019313812256,grad_norm: 0.9999992616747736, iteration: 163287
loss: 1.0368074178695679,grad_norm: 0.9999991448851381, iteration: 163288
loss: 1.0225576162338257,grad_norm: 0.9206168346887259, iteration: 163289
loss: 1.0307475328445435,grad_norm: 0.9579032937385812, iteration: 163290
loss: 1.0312941074371338,grad_norm: 0.9999994289000663, iteration: 163291
loss: 1.1301991939544678,grad_norm: 0.9999995408182328, iteration: 163292
loss: 0.9812971353530884,grad_norm: 0.9999990883357157, iteration: 163293
loss: 0.9289014935493469,grad_norm: 0.9999991995915357, iteration: 163294
loss: 0.9615442156791687,grad_norm: 0.9999990620922348, iteration: 163295
loss: 1.0038646459579468,grad_norm: 0.9999991889709543, iteration: 163296
loss: 0.9958893060684204,grad_norm: 0.9850111427582229, iteration: 163297
loss: 1.0128669738769531,grad_norm: 0.8729452828708025, iteration: 163298
loss: 1.0192813873291016,grad_norm: 0.9060496525163504, iteration: 163299
loss: 1.0962997674942017,grad_norm: 0.9999992320271242, iteration: 163300
loss: 0.9965477585792542,grad_norm: 0.9999990881840917, iteration: 163301
loss: 0.9733302593231201,grad_norm: 0.9999990534063582, iteration: 163302
loss: 1.0457838773727417,grad_norm: 0.9999997439382845, iteration: 163303
loss: 0.9963699579238892,grad_norm: 0.8326918804590379, iteration: 163304
loss: 1.013904333114624,grad_norm: 0.91128191868383, iteration: 163305
loss: 1.0123028755187988,grad_norm: 0.9999992032060556, iteration: 163306
loss: 0.9968920946121216,grad_norm: 0.9999996211099071, iteration: 163307
loss: 1.0090686082839966,grad_norm: 0.999999107832491, iteration: 163308
loss: 0.9972004294395447,grad_norm: 0.973371017763212, iteration: 163309
loss: 1.1045269966125488,grad_norm: 0.9999999063590292, iteration: 163310
loss: 0.9979020953178406,grad_norm: 0.9846019705131521, iteration: 163311
loss: 1.0339868068695068,grad_norm: 0.9999991360295519, iteration: 163312
loss: 1.0499868392944336,grad_norm: 0.9999996800080008, iteration: 163313
loss: 1.0735623836517334,grad_norm: 0.9999991242025933, iteration: 163314
loss: 1.0047378540039062,grad_norm: 0.9119274150939438, iteration: 163315
loss: 1.072206735610962,grad_norm: 0.9157636479765121, iteration: 163316
loss: 1.0081249475479126,grad_norm: 0.9999991649101241, iteration: 163317
loss: 0.984579861164093,grad_norm: 0.9999990374553047, iteration: 163318
loss: 1.0165348052978516,grad_norm: 0.9999993668660709, iteration: 163319
loss: 0.9861999154090881,grad_norm: 0.9999991967949087, iteration: 163320
loss: 1.0438836812973022,grad_norm: 0.9999991071896878, iteration: 163321
loss: 0.9838457703590393,grad_norm: 0.8686174035590442, iteration: 163322
loss: 1.0211181640625,grad_norm: 0.922051193874662, iteration: 163323
loss: 1.0154060125350952,grad_norm: 0.9999989440280129, iteration: 163324
loss: 0.9845671653747559,grad_norm: 0.999999148768634, iteration: 163325
loss: 1.0231318473815918,grad_norm: 0.9999996503438314, iteration: 163326
loss: 0.9808050394058228,grad_norm: 0.9999990237655654, iteration: 163327
loss: 1.0857040882110596,grad_norm: 0.9999993219329862, iteration: 163328
loss: 0.9967759847640991,grad_norm: 0.9999993289093314, iteration: 163329
loss: 1.0177836418151855,grad_norm: 0.9999989988764472, iteration: 163330
loss: 1.0008749961853027,grad_norm: 0.9112066812226484, iteration: 163331
loss: 0.992098867893219,grad_norm: 0.999999419047343, iteration: 163332
loss: 0.9890923500061035,grad_norm: 0.9999993377794785, iteration: 163333
loss: 0.996116578578949,grad_norm: 0.9999992001711493, iteration: 163334
loss: 1.041847825050354,grad_norm: 0.9999991524785672, iteration: 163335
loss: 0.9877960681915283,grad_norm: 0.9999992071536773, iteration: 163336
loss: 1.000899076461792,grad_norm: 0.9999992067696573, iteration: 163337
loss: 0.9994310736656189,grad_norm: 0.9769222850944477, iteration: 163338
loss: 1.0560377836227417,grad_norm: 0.999999951241622, iteration: 163339
loss: 1.0224028825759888,grad_norm: 0.99999914231173, iteration: 163340
loss: 0.998323380947113,grad_norm: 0.9999994257355668, iteration: 163341
loss: 1.012835144996643,grad_norm: 0.9999991659293254, iteration: 163342
loss: 0.9753329157829285,grad_norm: 0.9999993153636009, iteration: 163343
loss: 1.0057414770126343,grad_norm: 0.9999991872522057, iteration: 163344
loss: 0.9684672951698303,grad_norm: 0.9999991616346005, iteration: 163345
loss: 1.0105708837509155,grad_norm: 0.9999989974229523, iteration: 163346
loss: 0.9969870448112488,grad_norm: 0.9999990009158419, iteration: 163347
loss: 1.0063496828079224,grad_norm: 0.9999990833446014, iteration: 163348
loss: 1.078368067741394,grad_norm: 0.9999993467121471, iteration: 163349
loss: 0.9900426268577576,grad_norm: 0.9704599095156901, iteration: 163350
loss: 1.016179084777832,grad_norm: 0.9999992811304957, iteration: 163351
loss: 0.9879834651947021,grad_norm: 0.993147024920674, iteration: 163352
loss: 0.9853753447532654,grad_norm: 0.947131782703278, iteration: 163353
loss: 0.9992823004722595,grad_norm: 0.9999991391135136, iteration: 163354
loss: 1.0432655811309814,grad_norm: 0.8837605159652122, iteration: 163355
loss: 0.9728958010673523,grad_norm: 0.999999214083673, iteration: 163356
loss: 1.0089927911758423,grad_norm: 0.9999992470194825, iteration: 163357
loss: 1.0136202573776245,grad_norm: 0.9999991545612741, iteration: 163358
loss: 1.0748026371002197,grad_norm: 0.9999994230067055, iteration: 163359
loss: 1.0728414058685303,grad_norm: 0.9999998019708569, iteration: 163360
loss: 1.025496482849121,grad_norm: 0.9999990526591347, iteration: 163361
loss: 1.005167007446289,grad_norm: 0.9999991920024404, iteration: 163362
loss: 1.030213475227356,grad_norm: 0.9999992554906452, iteration: 163363
loss: 0.9921302199363708,grad_norm: 0.9999989641143204, iteration: 163364
loss: 1.0111863613128662,grad_norm: 0.9999992480428063, iteration: 163365
loss: 0.9937165975570679,grad_norm: 0.9283927494533184, iteration: 163366
loss: 0.9853811264038086,grad_norm: 0.9999991814864561, iteration: 163367
loss: 1.0037485361099243,grad_norm: 0.9349291006861618, iteration: 163368
loss: 1.035807490348816,grad_norm: 0.9999992042243997, iteration: 163369
loss: 1.0102593898773193,grad_norm: 0.8367915265830506, iteration: 163370
loss: 0.9782500863075256,grad_norm: 0.9999991320873736, iteration: 163371
loss: 1.0517951250076294,grad_norm: 0.9999998397313957, iteration: 163372
loss: 1.0369316339492798,grad_norm: 0.9963129528370943, iteration: 163373
loss: 1.0333243608474731,grad_norm: 0.9999992304993123, iteration: 163374
loss: 0.9832649230957031,grad_norm: 0.948284980743579, iteration: 163375
loss: 0.997694194316864,grad_norm: 0.9999991632858388, iteration: 163376
loss: 1.048586130142212,grad_norm: 0.9999993834495726, iteration: 163377
loss: 1.0516512393951416,grad_norm: 0.9999991757021545, iteration: 163378
loss: 1.0156651735305786,grad_norm: 0.9999991045263658, iteration: 163379
loss: 1.0059407949447632,grad_norm: 0.9999992655205395, iteration: 163380
loss: 0.9875859022140503,grad_norm: 0.9999990398080891, iteration: 163381
loss: 1.0030779838562012,grad_norm: 0.9999989568954295, iteration: 163382
loss: 1.0121119022369385,grad_norm: 0.9630246985969342, iteration: 163383
loss: 1.0320885181427002,grad_norm: 0.9411872914394093, iteration: 163384
loss: 1.0069828033447266,grad_norm: 0.9800657680634113, iteration: 163385
loss: 0.966368556022644,grad_norm: 0.8588575049125549, iteration: 163386
loss: 1.0034326314926147,grad_norm: 0.9999991444562409, iteration: 163387
loss: 0.9762910008430481,grad_norm: 0.9999992884998714, iteration: 163388
loss: 0.9972096681594849,grad_norm: 0.999999425348725, iteration: 163389
loss: 0.9993064403533936,grad_norm: 0.9999991660650265, iteration: 163390
loss: 1.0167112350463867,grad_norm: 0.9999995463710522, iteration: 163391
loss: 1.0156139135360718,grad_norm: 0.999999094513231, iteration: 163392
loss: 0.9871960878372192,grad_norm: 1.0000000032290117, iteration: 163393
loss: 1.0132781267166138,grad_norm: 0.8709817045107481, iteration: 163394
loss: 1.0490626096725464,grad_norm: 0.9516882422032269, iteration: 163395
loss: 0.9766387939453125,grad_norm: 0.9999990598780607, iteration: 163396
loss: 0.9877562522888184,grad_norm: 0.9731796900924916, iteration: 163397
loss: 0.9984152913093567,grad_norm: 0.9631215806288456, iteration: 163398
loss: 0.9973415732383728,grad_norm: 0.9999996480562544, iteration: 163399
loss: 1.1156474351882935,grad_norm: 0.9999994051130238, iteration: 163400
loss: 0.984789252281189,grad_norm: 0.8874161500317042, iteration: 163401
loss: 1.0495009422302246,grad_norm: 0.9999996496044752, iteration: 163402
loss: 0.971783459186554,grad_norm: 0.9999991921398298, iteration: 163403
loss: 0.9834184050559998,grad_norm: 0.8715657037978275, iteration: 163404
loss: 1.0067201852798462,grad_norm: 0.9999991807976539, iteration: 163405
loss: 1.0153372287750244,grad_norm: 0.954794488274146, iteration: 163406
loss: 0.9959233403205872,grad_norm: 0.9999990929818928, iteration: 163407
loss: 0.9965706467628479,grad_norm: 0.9999996775714052, iteration: 163408
loss: 1.015602946281433,grad_norm: 0.9999992476380768, iteration: 163409
loss: 0.9660987257957458,grad_norm: 0.9999990849035915, iteration: 163410
loss: 1.1413019895553589,grad_norm: 0.9999998765912002, iteration: 163411
loss: 1.05367112159729,grad_norm: 0.9433454644318937, iteration: 163412
loss: 1.045290231704712,grad_norm: 0.9999990028638513, iteration: 163413
loss: 1.0086555480957031,grad_norm: 0.9999992344839317, iteration: 163414
loss: 1.0943055152893066,grad_norm: 0.9999990370504399, iteration: 163415
loss: 1.0385839939117432,grad_norm: 0.9999996248967848, iteration: 163416
loss: 1.0090378522872925,grad_norm: 0.999999014729473, iteration: 163417
loss: 1.1519615650177002,grad_norm: 0.99999925337103, iteration: 163418
loss: 1.0239593982696533,grad_norm: 0.9685578066826545, iteration: 163419
loss: 0.9821199774742126,grad_norm: 0.9539098335662912, iteration: 163420
loss: 1.0012966394424438,grad_norm: 0.9999998012582509, iteration: 163421
loss: 0.9903712272644043,grad_norm: 0.8752548794944237, iteration: 163422
loss: 1.115138292312622,grad_norm: 0.9999993605047779, iteration: 163423
loss: 0.9839696288108826,grad_norm: 0.9301311705186428, iteration: 163424
loss: 1.0010193586349487,grad_norm: 0.9999996902030629, iteration: 163425
loss: 1.2470399141311646,grad_norm: 0.9999992709960533, iteration: 163426
loss: 1.1135005950927734,grad_norm: 0.9999992396297827, iteration: 163427
loss: 1.0201189517974854,grad_norm: 0.9999991157941394, iteration: 163428
loss: 1.0428979396820068,grad_norm: 0.9109795809160125, iteration: 163429
loss: 1.005262017250061,grad_norm: 0.99999966161607, iteration: 163430
loss: 0.9845916628837585,grad_norm: 0.9696154913850009, iteration: 163431
loss: 1.020032286643982,grad_norm: 0.8881819764942038, iteration: 163432
loss: 0.9863036274909973,grad_norm: 0.9034735109610604, iteration: 163433
loss: 0.9931098222732544,grad_norm: 0.892576141784124, iteration: 163434
loss: 1.000386357307434,grad_norm: 0.9604936272784631, iteration: 163435
loss: 1.015878677368164,grad_norm: 0.9999997018162229, iteration: 163436
loss: 0.9802625179290771,grad_norm: 0.9499556669086529, iteration: 163437
loss: 1.0103670358657837,grad_norm: 0.9999990556857579, iteration: 163438
loss: 0.9935703277587891,grad_norm: 0.9092415026480994, iteration: 163439
loss: 0.9933444857597351,grad_norm: 0.8035358408986963, iteration: 163440
loss: 0.990380585193634,grad_norm: 0.9455652599671948, iteration: 163441
loss: 1.0164293050765991,grad_norm: 0.999999132291887, iteration: 163442
loss: 0.9920998811721802,grad_norm: 0.9709260401225411, iteration: 163443
loss: 0.9983291625976562,grad_norm: 0.9999993235609695, iteration: 163444
loss: 0.9824991822242737,grad_norm: 0.9999991300955332, iteration: 163445
loss: 0.9810424447059631,grad_norm: 0.8880486497238204, iteration: 163446
loss: 0.9901487231254578,grad_norm: 0.8415733744653466, iteration: 163447
loss: 1.0040291547775269,grad_norm: 0.9792911101657366, iteration: 163448
loss: 1.0638654232025146,grad_norm: 0.9999994994812236, iteration: 163449
loss: 1.0391733646392822,grad_norm: 0.9093617091668652, iteration: 163450
loss: 1.0284202098846436,grad_norm: 0.9999990399801875, iteration: 163451
loss: 0.9918486475944519,grad_norm: 0.999999086672417, iteration: 163452
loss: 1.0000125169754028,grad_norm: 0.9999991341827642, iteration: 163453
loss: 0.9928191900253296,grad_norm: 0.9999992714680963, iteration: 163454
loss: 1.0180084705352783,grad_norm: 0.9999991025579273, iteration: 163455
loss: 1.037229061126709,grad_norm: 0.9999990956957048, iteration: 163456
loss: 1.0251752138137817,grad_norm: 0.9999990870142155, iteration: 163457
loss: 0.9847202301025391,grad_norm: 0.9999990561662006, iteration: 163458
loss: 1.0027663707733154,grad_norm: 0.9999990896294337, iteration: 163459
loss: 0.9994449615478516,grad_norm: 0.9494915247043952, iteration: 163460
loss: 1.075333833694458,grad_norm: 0.9999995841311455, iteration: 163461
loss: 1.0128999948501587,grad_norm: 0.9999997618237323, iteration: 163462
loss: 1.007826805114746,grad_norm: 0.9980227859797236, iteration: 163463
loss: 0.9724398255348206,grad_norm: 0.9999991808353259, iteration: 163464
loss: 1.0136667490005493,grad_norm: 0.999999214813431, iteration: 163465
loss: 1.0046329498291016,grad_norm: 0.999999147601722, iteration: 163466
loss: 1.006508469581604,grad_norm: 0.999999138180433, iteration: 163467
loss: 0.9652842283248901,grad_norm: 0.9476656347577557, iteration: 163468
loss: 1.011579155921936,grad_norm: 0.9140286818293621, iteration: 163469
loss: 1.0027793645858765,grad_norm: 0.8539603541447331, iteration: 163470
loss: 1.02326238155365,grad_norm: 0.8964766987926941, iteration: 163471
loss: 0.9708009362220764,grad_norm: 0.999999072012339, iteration: 163472
loss: 0.9989573359489441,grad_norm: 0.9825172119257491, iteration: 163473
loss: 1.0219595432281494,grad_norm: 0.9999995603844014, iteration: 163474
loss: 0.971187174320221,grad_norm: 0.999999186011025, iteration: 163475
loss: 1.0376065969467163,grad_norm: 0.9999991781598142, iteration: 163476
loss: 0.9984135031700134,grad_norm: 0.8931290266112151, iteration: 163477
loss: 0.9825116991996765,grad_norm: 0.8655191430050861, iteration: 163478
loss: 0.9898025393486023,grad_norm: 0.9090269327776419, iteration: 163479
loss: 1.014802098274231,grad_norm: 0.9999995443189919, iteration: 163480
loss: 1.01213538646698,grad_norm: 0.9570530720909417, iteration: 163481
loss: 0.9958590269088745,grad_norm: 0.9999992720576175, iteration: 163482
loss: 0.984329879283905,grad_norm: 0.8640815300393544, iteration: 163483
loss: 0.9956369400024414,grad_norm: 0.9999991040283341, iteration: 163484
loss: 0.9875975847244263,grad_norm: 0.9999991673296266, iteration: 163485
loss: 1.0028209686279297,grad_norm: 0.9999990174390438, iteration: 163486
loss: 1.0062919855117798,grad_norm: 0.9520186678474913, iteration: 163487
loss: 1.010518193244934,grad_norm: 0.9816612261714507, iteration: 163488
loss: 1.0375466346740723,grad_norm: 0.886933706170471, iteration: 163489
loss: 1.0535556077957153,grad_norm: 0.9999990890636938, iteration: 163490
loss: 0.9844316840171814,grad_norm: 0.999999029809058, iteration: 163491
loss: 1.0219383239746094,grad_norm: 0.99999963463044, iteration: 163492
loss: 1.0369263887405396,grad_norm: 0.9207661590891524, iteration: 163493
loss: 0.9856718182563782,grad_norm: 0.8912046849443964, iteration: 163494
loss: 1.019263744354248,grad_norm: 0.8992453719762485, iteration: 163495
loss: 1.0014015436172485,grad_norm: 0.9313911499829313, iteration: 163496
loss: 0.9965832829475403,grad_norm: 0.9370589779994364, iteration: 163497
loss: 0.9743367433547974,grad_norm: 0.8883281243418562, iteration: 163498
loss: 1.0740923881530762,grad_norm: 0.9999997017160336, iteration: 163499
loss: 0.9768750071525574,grad_norm: 0.9690898335939139, iteration: 163500
loss: 1.0982005596160889,grad_norm: 0.9999998953693048, iteration: 163501
loss: 0.9829095005989075,grad_norm: 0.9999990279918618, iteration: 163502
loss: 1.026178240776062,grad_norm: 0.9999992318322684, iteration: 163503
loss: 1.0255067348480225,grad_norm: 0.9999995791512484, iteration: 163504
loss: 1.0057440996170044,grad_norm: 0.8010172706147903, iteration: 163505
loss: 1.0470664501190186,grad_norm: 0.9999991649457866, iteration: 163506
loss: 0.982025146484375,grad_norm: 0.9999991708924119, iteration: 163507
loss: 0.998117983341217,grad_norm: 0.9999990525404991, iteration: 163508
loss: 1.0381550788879395,grad_norm: 0.9999992629489691, iteration: 163509
loss: 1.0174933671951294,grad_norm: 0.9857232981570139, iteration: 163510
loss: 1.017207145690918,grad_norm: 0.9490674704346702, iteration: 163511
loss: 1.005604863166809,grad_norm: 0.9999990952494661, iteration: 163512
loss: 1.0047188997268677,grad_norm: 0.9999991973863859, iteration: 163513
loss: 1.019553780555725,grad_norm: 0.999999177926359, iteration: 163514
loss: 1.0560197830200195,grad_norm: 0.9999991381677528, iteration: 163515
loss: 1.0170369148254395,grad_norm: 0.9999989531605764, iteration: 163516
loss: 0.9869225025177002,grad_norm: 0.9999991212122936, iteration: 163517
loss: 1.0345524549484253,grad_norm: 0.9310235674138854, iteration: 163518
loss: 1.000090479850769,grad_norm: 0.9999990216202359, iteration: 163519
loss: 0.997532308101654,grad_norm: 0.9263874404451372, iteration: 163520
loss: 0.9479418992996216,grad_norm: 0.9999991666953097, iteration: 163521
loss: 1.0969337224960327,grad_norm: 0.9999998373156922, iteration: 163522
loss: 0.98460853099823,grad_norm: 0.8667087955744646, iteration: 163523
loss: 0.9844803810119629,grad_norm: 0.9879589808892066, iteration: 163524
loss: 1.0083674192428589,grad_norm: 0.8377627335416162, iteration: 163525
loss: 1.0051162242889404,grad_norm: 0.9999991512473647, iteration: 163526
loss: 1.027586579322815,grad_norm: 0.9672721199130794, iteration: 163527
loss: 1.020675778388977,grad_norm: 0.9999991877506361, iteration: 163528
loss: 1.021769404411316,grad_norm: 0.8545562697937834, iteration: 163529
loss: 1.0309545993804932,grad_norm: 0.9999997274445143, iteration: 163530
loss: 0.9667317271232605,grad_norm: 0.8927149717528513, iteration: 163531
loss: 1.0038508176803589,grad_norm: 0.9999989871135917, iteration: 163532
loss: 0.9750768542289734,grad_norm: 0.891281380029729, iteration: 163533
loss: 1.026342511177063,grad_norm: 0.9999998040886194, iteration: 163534
loss: 1.0282403230667114,grad_norm: 0.9999993402608126, iteration: 163535
loss: 1.0027029514312744,grad_norm: 0.9999991190884359, iteration: 163536
loss: 0.9759182333946228,grad_norm: 0.8852912845144987, iteration: 163537
loss: 1.046065092086792,grad_norm: 0.9999990383934089, iteration: 163538
loss: 0.9803887009620667,grad_norm: 0.9740458126881838, iteration: 163539
loss: 1.0708094835281372,grad_norm: 0.9999990628567978, iteration: 163540
loss: 1.0346424579620361,grad_norm: 0.9657447509703175, iteration: 163541
loss: 0.9911301732063293,grad_norm: 0.9999990323202989, iteration: 163542
loss: 1.0027220249176025,grad_norm: 0.9640136046761707, iteration: 163543
loss: 0.978217363357544,grad_norm: 0.9101579286460296, iteration: 163544
loss: 0.9872592687606812,grad_norm: 0.8977093396907802, iteration: 163545
loss: 1.0182822942733765,grad_norm: 0.8779033376365709, iteration: 163546
loss: 1.0287504196166992,grad_norm: 0.9951645992539639, iteration: 163547
loss: 0.9940319657325745,grad_norm: 0.8408039187552007, iteration: 163548
loss: 1.0256516933441162,grad_norm: 0.954598991658732, iteration: 163549
loss: 1.0164457559585571,grad_norm: 0.9956677581872688, iteration: 163550
loss: 0.9939408302307129,grad_norm: 0.9875500834882399, iteration: 163551
loss: 1.0261783599853516,grad_norm: 0.9999994047793267, iteration: 163552
loss: 0.999826967716217,grad_norm: 0.8730990940099681, iteration: 163553
loss: 0.9768304228782654,grad_norm: 0.9999990327858335, iteration: 163554
loss: 1.0057756900787354,grad_norm: 0.9826270579743696, iteration: 163555
loss: 0.9474290013313293,grad_norm: 0.9999990758251175, iteration: 163556
loss: 0.9806771874427795,grad_norm: 0.9999991678059363, iteration: 163557
loss: 0.9851431250572205,grad_norm: 0.9969189607809871, iteration: 163558
loss: 1.011574149131775,grad_norm: 0.9999990660080538, iteration: 163559
loss: 1.0291407108306885,grad_norm: 0.9999990494668917, iteration: 163560
loss: 0.975208580493927,grad_norm: 0.9637671918320192, iteration: 163561
loss: 1.0203096866607666,grad_norm: 0.9905946953243677, iteration: 163562
loss: 0.997015118598938,grad_norm: 0.9999991602458154, iteration: 163563
loss: 1.0213314294815063,grad_norm: 0.9742352209599607, iteration: 163564
loss: 1.0267642736434937,grad_norm: 0.9999998593760393, iteration: 163565
loss: 1.1990041732788086,grad_norm: 0.9999996378611336, iteration: 163566
loss: 0.992044985294342,grad_norm: 0.9999991476415665, iteration: 163567
loss: 0.9784348011016846,grad_norm: 0.9999993271709786, iteration: 163568
loss: 1.0105156898498535,grad_norm: 0.9583875850710512, iteration: 163569
loss: 1.0124143362045288,grad_norm: 0.9999990276909337, iteration: 163570
loss: 0.9881361722946167,grad_norm: 0.999999435182624, iteration: 163571
loss: 0.9877167344093323,grad_norm: 0.782998633919442, iteration: 163572
loss: 0.959852933883667,grad_norm: 0.9999990250496822, iteration: 163573
loss: 0.9862151145935059,grad_norm: 0.9999991442103535, iteration: 163574
loss: 1.0122194290161133,grad_norm: 0.9999993319649856, iteration: 163575
loss: 0.9685693979263306,grad_norm: 0.9905348673441059, iteration: 163576
loss: 1.0062042474746704,grad_norm: 0.9598466516854728, iteration: 163577
loss: 1.0121771097183228,grad_norm: 0.9999990298050356, iteration: 163578
loss: 1.0394539833068848,grad_norm: 0.9999992074645039, iteration: 163579
loss: 0.9725325107574463,grad_norm: 0.8323977932165488, iteration: 163580
loss: 0.9813497066497803,grad_norm: 0.9999992033192534, iteration: 163581
loss: 0.9700918197631836,grad_norm: 0.9999991880529019, iteration: 163582
loss: 0.9401967525482178,grad_norm: 0.9999992039204181, iteration: 163583
loss: 0.9698127508163452,grad_norm: 0.9577253661772239, iteration: 163584
loss: 0.9833031892776489,grad_norm: 0.9999996171545855, iteration: 163585
loss: 0.9985339641571045,grad_norm: 0.999999137588736, iteration: 163586
loss: 1.0159491300582886,grad_norm: 0.9715578805039586, iteration: 163587
loss: 0.9595628976821899,grad_norm: 0.9431889714268827, iteration: 163588
loss: 0.9875789880752563,grad_norm: 0.9966682306543875, iteration: 163589
loss: 0.9857908487319946,grad_norm: 0.9999991555958222, iteration: 163590
loss: 0.9824269413948059,grad_norm: 0.9203773071859015, iteration: 163591
loss: 0.9606921672821045,grad_norm: 0.8693557167238423, iteration: 163592
loss: 1.0039423704147339,grad_norm: 0.8074223794916925, iteration: 163593
loss: 1.0149949789047241,grad_norm: 0.9999991182088026, iteration: 163594
loss: 0.974794328212738,grad_norm: 0.9300372734682611, iteration: 163595
loss: 1.015971064567566,grad_norm: 0.9999991594271498, iteration: 163596
loss: 0.9671794772148132,grad_norm: 0.9999992906887079, iteration: 163597
loss: 1.0077482461929321,grad_norm: 0.9999990633475411, iteration: 163598
loss: 0.9640709757804871,grad_norm: 0.9999991754172618, iteration: 163599
loss: 0.9862778782844543,grad_norm: 0.9391035563519947, iteration: 163600
loss: 1.0482522249221802,grad_norm: 0.9999990703993995, iteration: 163601
loss: 1.001491665840149,grad_norm: 0.8045751180659158, iteration: 163602
loss: 0.9750567674636841,grad_norm: 0.9580626813506314, iteration: 163603
loss: 0.9754832983016968,grad_norm: 0.8621272147803045, iteration: 163604
loss: 1.0150232315063477,grad_norm: 0.9351268836559723, iteration: 163605
loss: 1.019097924232483,grad_norm: 0.9827661100317626, iteration: 163606
loss: 0.9581515789031982,grad_norm: 0.9105452839245597, iteration: 163607
loss: 0.9976826310157776,grad_norm: 0.9599554453553454, iteration: 163608
loss: 0.9820968508720398,grad_norm: 0.9999991821847453, iteration: 163609
loss: 1.0188267230987549,grad_norm: 0.9903741477086713, iteration: 163610
loss: 0.9969192147254944,grad_norm: 0.9999991121894345, iteration: 163611
loss: 0.9442771673202515,grad_norm: 0.9999991631907087, iteration: 163612
loss: 0.9959630370140076,grad_norm: 0.9811015549769774, iteration: 163613
loss: 1.0236928462982178,grad_norm: 0.999999277315236, iteration: 163614
loss: 1.0048410892486572,grad_norm: 0.999999129013632, iteration: 163615
loss: 1.0129073858261108,grad_norm: 0.9999991191413267, iteration: 163616
loss: 1.0072057247161865,grad_norm: 0.9999990929863732, iteration: 163617
loss: 1.0651615858078003,grad_norm: 0.9999992974751525, iteration: 163618
loss: 0.9881532788276672,grad_norm: 0.9044588238340404, iteration: 163619
loss: 1.0141518115997314,grad_norm: 0.9999992704351575, iteration: 163620
loss: 1.0480130910873413,grad_norm: 0.9999995829834634, iteration: 163621
loss: 0.9920785427093506,grad_norm: 0.9607564524104668, iteration: 163622
loss: 0.9931618571281433,grad_norm: 0.9999991993815722, iteration: 163623
loss: 0.9710412621498108,grad_norm: 0.9999991571125747, iteration: 163624
loss: 0.9775422811508179,grad_norm: 0.9260304919026436, iteration: 163625
loss: 1.0087339878082275,grad_norm: 0.9999990100962582, iteration: 163626
loss: 1.0074864625930786,grad_norm: 0.944076665808856, iteration: 163627
loss: 1.0235542058944702,grad_norm: 0.8976620785062412, iteration: 163628
loss: 1.024785041809082,grad_norm: 0.9479611297450818, iteration: 163629
loss: 1.00852370262146,grad_norm: 0.999999081745818, iteration: 163630
loss: 1.0110090970993042,grad_norm: 0.9999991770535365, iteration: 163631
loss: 0.9847963452339172,grad_norm: 0.999999218449941, iteration: 163632
loss: 0.9781361818313599,grad_norm: 0.8664687087889598, iteration: 163633
loss: 0.9921850562095642,grad_norm: 0.9999990956030953, iteration: 163634
loss: 0.9595969915390015,grad_norm: 0.873592353210136, iteration: 163635
loss: 0.9935940504074097,grad_norm: 0.8179504762974109, iteration: 163636
loss: 0.9571124911308289,grad_norm: 0.9999990928769205, iteration: 163637
loss: 0.9736026525497437,grad_norm: 0.9999991363321619, iteration: 163638
loss: 0.9963589906692505,grad_norm: 0.9999991594951603, iteration: 163639
loss: 1.0028223991394043,grad_norm: 0.9999990692664995, iteration: 163640
loss: 1.0120823383331299,grad_norm: 0.9421459644701677, iteration: 163641
loss: 1.0024720430374146,grad_norm: 0.9952435129394808, iteration: 163642
loss: 1.0302643775939941,grad_norm: 0.999999097481674, iteration: 163643
loss: 1.011672019958496,grad_norm: 0.9999990129134848, iteration: 163644
loss: 0.9380840063095093,grad_norm: 0.9999991636345344, iteration: 163645
loss: 1.0187019109725952,grad_norm: 0.9999990486356335, iteration: 163646
loss: 1.1482212543487549,grad_norm: 0.9999992400499478, iteration: 163647
loss: 0.9897661209106445,grad_norm: 0.9999994877820831, iteration: 163648
loss: 0.980231523513794,grad_norm: 0.999999120650212, iteration: 163649
loss: 1.0040189027786255,grad_norm: 0.9663598632837562, iteration: 163650
loss: 1.0289711952209473,grad_norm: 0.9999991549758646, iteration: 163651
loss: 0.9928199052810669,grad_norm: 0.9389853854870959, iteration: 163652
loss: 0.9728261232376099,grad_norm: 0.9615433526716686, iteration: 163653
loss: 0.9795230627059937,grad_norm: 0.9272489633010818, iteration: 163654
loss: 1.0859544277191162,grad_norm: 0.9999996431431811, iteration: 163655
loss: 0.9735021591186523,grad_norm: 0.9764768398005794, iteration: 163656
loss: 1.0062614679336548,grad_norm: 0.9367069916207881, iteration: 163657
loss: 1.0336053371429443,grad_norm: 0.9999992084193738, iteration: 163658
loss: 1.0040791034698486,grad_norm: 0.9999990134760077, iteration: 163659
loss: 1.0176702737808228,grad_norm: 0.9701929093238588, iteration: 163660
loss: 0.9976406097412109,grad_norm: 0.9360207038538754, iteration: 163661
loss: 0.9885594248771667,grad_norm: 0.9999991681289243, iteration: 163662
loss: 0.9623315930366516,grad_norm: 0.8602018210223353, iteration: 163663
loss: 1.0107775926589966,grad_norm: 0.9999990408341491, iteration: 163664
loss: 0.9769980311393738,grad_norm: 0.9999990235233895, iteration: 163665
loss: 0.9891220331192017,grad_norm: 0.999999112261195, iteration: 163666
loss: 0.9664328694343567,grad_norm: 0.9999995894945819, iteration: 163667
loss: 1.0294479131698608,grad_norm: 0.9999991534715038, iteration: 163668
loss: 1.0194870233535767,grad_norm: 0.9918929252245228, iteration: 163669
loss: 1.0130778551101685,grad_norm: 0.999999167285856, iteration: 163670
loss: 0.9993659853935242,grad_norm: 0.9086383277605048, iteration: 163671
loss: 1.0070229768753052,grad_norm: 0.85651953049858, iteration: 163672
loss: 0.9972143769264221,grad_norm: 0.9999990932507293, iteration: 163673
loss: 0.9959566593170166,grad_norm: 0.979764323837669, iteration: 163674
loss: 0.9837130904197693,grad_norm: 0.9999997609414945, iteration: 163675
loss: 1.0372029542922974,grad_norm: 0.9999993506716994, iteration: 163676
loss: 1.0379940271377563,grad_norm: 0.999999642986775, iteration: 163677
loss: 0.9652965664863586,grad_norm: 0.9999998556847165, iteration: 163678
loss: 1.026617169380188,grad_norm: 0.9355815153774001, iteration: 163679
loss: 1.0025190114974976,grad_norm: 0.8688598898513907, iteration: 163680
loss: 0.9987990260124207,grad_norm: 0.9999990798452698, iteration: 163681
loss: 0.9430786371231079,grad_norm: 0.9317319410079955, iteration: 163682
loss: 1.0345731973648071,grad_norm: 0.9999996741677082, iteration: 163683
loss: 1.1402751207351685,grad_norm: 0.9999990593854642, iteration: 163684
loss: 0.9698265194892883,grad_norm: 0.9581651926989447, iteration: 163685
loss: 1.0846996307373047,grad_norm: 0.9999997671697202, iteration: 163686
loss: 0.9720260500907898,grad_norm: 0.9148920756530412, iteration: 163687
loss: 0.9847363829612732,grad_norm: 0.9489083347088239, iteration: 163688
loss: 1.0326588153839111,grad_norm: 0.9999994650731274, iteration: 163689
loss: 1.021384358406067,grad_norm: 0.9208228848813835, iteration: 163690
loss: 1.0337661504745483,grad_norm: 0.9999994638291285, iteration: 163691
loss: 0.9860536456108093,grad_norm: 0.952371046397891, iteration: 163692
loss: 0.9873414039611816,grad_norm: 0.9999989664072918, iteration: 163693
loss: 0.9841389060020447,grad_norm: 0.9999992169850831, iteration: 163694
loss: 1.0850228071212769,grad_norm: 0.9658354429625859, iteration: 163695
loss: 0.994509220123291,grad_norm: 0.999999145867807, iteration: 163696
loss: 0.9762681722640991,grad_norm: 0.8918380474726856, iteration: 163697
loss: 1.1471669673919678,grad_norm: 0.9999996122611654, iteration: 163698
loss: 0.9934040307998657,grad_norm: 0.9999990284165085, iteration: 163699
loss: 0.9983642101287842,grad_norm: 0.9999991778812973, iteration: 163700
loss: 1.0095840692520142,grad_norm: 0.9999990738729274, iteration: 163701
loss: 0.9508790373802185,grad_norm: 0.8494990904725577, iteration: 163702
loss: 1.1046069860458374,grad_norm: 0.9999999369987385, iteration: 163703
loss: 1.063920259475708,grad_norm: 0.9999993301373735, iteration: 163704
loss: 0.965082585811615,grad_norm: 0.8535446022389632, iteration: 163705
loss: 1.0502699613571167,grad_norm: 0.999999111076336, iteration: 163706
loss: 0.9899965524673462,grad_norm: 0.9371482770386653, iteration: 163707
loss: 0.9997997879981995,grad_norm: 0.9886897334862331, iteration: 163708
loss: 1.002678632736206,grad_norm: 0.9999990471836522, iteration: 163709
loss: 0.9734870791435242,grad_norm: 0.8588110090959749, iteration: 163710
loss: 1.0242668390274048,grad_norm: 0.9999991995820591, iteration: 163711
loss: 0.9864485263824463,grad_norm: 0.9651043320809284, iteration: 163712
loss: 1.0558091402053833,grad_norm: 0.9999996519203568, iteration: 163713
loss: 1.0196775197982788,grad_norm: 0.9999991123337065, iteration: 163714
loss: 0.9792799949645996,grad_norm: 0.9999991303103516, iteration: 163715
loss: 1.0126017332077026,grad_norm: 0.852754806355732, iteration: 163716
loss: 0.9717934131622314,grad_norm: 0.9893070177568037, iteration: 163717
loss: 0.9938991665840149,grad_norm: 0.99999894563783, iteration: 163718
loss: 0.9883200526237488,grad_norm: 0.9585118717342993, iteration: 163719
loss: 1.0053271055221558,grad_norm: 0.9202625594132686, iteration: 163720
loss: 1.0215928554534912,grad_norm: 0.9999989362113163, iteration: 163721
loss: 1.0443223714828491,grad_norm: 0.9999988111375135, iteration: 163722
loss: 1.0189839601516724,grad_norm: 0.8831037101199449, iteration: 163723
loss: 0.9791809320449829,grad_norm: 0.9999991232667929, iteration: 163724
loss: 1.02805495262146,grad_norm: 0.99999909364264, iteration: 163725
loss: 0.9987242817878723,grad_norm: 0.8699991072678616, iteration: 163726
loss: 1.0160423517227173,grad_norm: 0.9999990552303964, iteration: 163727
loss: 1.0498172044754028,grad_norm: 0.9999992414900102, iteration: 163728
loss: 1.0241661071777344,grad_norm: 0.999999351842013, iteration: 163729
loss: 0.9644896984100342,grad_norm: 0.9400773117157332, iteration: 163730
loss: 1.0002917051315308,grad_norm: 0.9433776732206636, iteration: 163731
loss: 1.038711667060852,grad_norm: 0.9999991697709115, iteration: 163732
loss: 1.0258296728134155,grad_norm: 0.9999998782890892, iteration: 163733
loss: 0.975455105304718,grad_norm: 0.9176061997601938, iteration: 163734
loss: 1.0089006423950195,grad_norm: 0.9999991986041638, iteration: 163735
loss: 1.1211823225021362,grad_norm: 0.9999995831881058, iteration: 163736
loss: 1.030412197113037,grad_norm: 0.9999989952358646, iteration: 163737
loss: 1.0356242656707764,grad_norm: 0.8079529900470032, iteration: 163738
loss: 1.0417768955230713,grad_norm: 0.999999030057881, iteration: 163739
loss: 1.0277265310287476,grad_norm: 0.9999991401280175, iteration: 163740
loss: 1.0075256824493408,grad_norm: 0.9999992557818311, iteration: 163741
loss: 1.025995135307312,grad_norm: 0.8755121453016669, iteration: 163742
loss: 1.085858702659607,grad_norm: 0.9082979436247833, iteration: 163743
loss: 1.0231654644012451,grad_norm: 0.9599693003541553, iteration: 163744
loss: 1.0107378959655762,grad_norm: 0.8904669294542471, iteration: 163745
loss: 1.0999195575714111,grad_norm: 0.9999995122302455, iteration: 163746
loss: 1.1353975534439087,grad_norm: 0.9999996729529799, iteration: 163747
loss: 1.0313010215759277,grad_norm: 0.9999991030499145, iteration: 163748
loss: 0.9858007431030273,grad_norm: 0.8931095029235679, iteration: 163749
loss: 1.0395890474319458,grad_norm: 0.9999990357057101, iteration: 163750
loss: 1.0190831422805786,grad_norm: 0.9012893228460317, iteration: 163751
loss: 1.0469077825546265,grad_norm: 0.9959281933243708, iteration: 163752
loss: 1.1814758777618408,grad_norm: 0.9999998052356627, iteration: 163753
loss: 0.9610189199447632,grad_norm: 0.9999991244851789, iteration: 163754
loss: 1.1005983352661133,grad_norm: 1.0000000528033008, iteration: 163755
loss: 0.9941349029541016,grad_norm: 0.8362454879267003, iteration: 163756
loss: 0.9918957352638245,grad_norm: 0.8134972345518574, iteration: 163757
loss: 1.0350152254104614,grad_norm: 0.9999995286932303, iteration: 163758
loss: 0.9981818199157715,grad_norm: 0.9423722684294222, iteration: 163759
loss: 0.9417043328285217,grad_norm: 0.9999990609020292, iteration: 163760
loss: 0.9898181557655334,grad_norm: 0.999999059918533, iteration: 163761
loss: 1.0094351768493652,grad_norm: 0.999999216422946, iteration: 163762
loss: 1.0126744508743286,grad_norm: 0.9999992255558919, iteration: 163763
loss: 0.9733842015266418,grad_norm: 0.9999989935051303, iteration: 163764
loss: 1.0071488618850708,grad_norm: 0.9999991495782963, iteration: 163765
loss: 1.0236321687698364,grad_norm: 0.9999990543780529, iteration: 163766
loss: 1.0431365966796875,grad_norm: 0.9999999225346172, iteration: 163767
loss: 0.9908439517021179,grad_norm: 0.9999995725906682, iteration: 163768
loss: 1.062878966331482,grad_norm: 0.9999995823132989, iteration: 163769
loss: 1.0075656175613403,grad_norm: 0.9999991547814281, iteration: 163770
loss: 1.0159343481063843,grad_norm: 0.8740186398985335, iteration: 163771
loss: 1.0051993131637573,grad_norm: 0.9932071436051404, iteration: 163772
loss: 0.9469065070152283,grad_norm: 0.9859314490921383, iteration: 163773
loss: 1.0339981317520142,grad_norm: 0.9999990104765207, iteration: 163774
loss: 1.0249658823013306,grad_norm: 0.9999991626516405, iteration: 163775
loss: 1.0244641304016113,grad_norm: 0.9999991559717455, iteration: 163776
loss: 1.005577802658081,grad_norm: 0.9999991161992223, iteration: 163777
loss: 1.0928136110305786,grad_norm: 0.9999995916546497, iteration: 163778
loss: 1.0324997901916504,grad_norm: 0.999999869419399, iteration: 163779
loss: 1.0297411680221558,grad_norm: 0.9722458878878469, iteration: 163780
loss: 1.005776047706604,grad_norm: 0.8112847316330594, iteration: 163781
loss: 0.9843209385871887,grad_norm: 0.9186156035051786, iteration: 163782
loss: 1.0108028650283813,grad_norm: 0.9741312699052831, iteration: 163783
loss: 0.9802401661872864,grad_norm: 0.9999991172187901, iteration: 163784
loss: 1.0004936456680298,grad_norm: 0.9999992600846995, iteration: 163785
loss: 0.9874727129936218,grad_norm: 0.999999293924165, iteration: 163786
loss: 1.044969081878662,grad_norm: 0.9999992631220517, iteration: 163787
loss: 1.0463662147521973,grad_norm: 0.9399559655918486, iteration: 163788
loss: 0.9875781536102295,grad_norm: 0.8924196128924909, iteration: 163789
loss: 0.9918542504310608,grad_norm: 0.9033070534803217, iteration: 163790
loss: 1.026418924331665,grad_norm: 0.8293202570660713, iteration: 163791
loss: 0.963272750377655,grad_norm: 0.936240508259886, iteration: 163792
loss: 1.007948398590088,grad_norm: 0.9999989510686258, iteration: 163793
loss: 0.9965665936470032,grad_norm: 0.9999992766255024, iteration: 163794
loss: 1.0180736780166626,grad_norm: 0.8384651475090373, iteration: 163795
loss: 1.0141634941101074,grad_norm: 0.8139271538465717, iteration: 163796
loss: 1.0082253217697144,grad_norm: 0.9999991205599446, iteration: 163797
loss: 1.0201926231384277,grad_norm: 0.9750377521647472, iteration: 163798
loss: 0.9999480843544006,grad_norm: 0.9999991208453248, iteration: 163799
loss: 1.001263976097107,grad_norm: 0.9999990551957413, iteration: 163800
loss: 0.9855161309242249,grad_norm: 0.89283090664487, iteration: 163801
loss: 1.0490764379501343,grad_norm: 0.9999998380737912, iteration: 163802
loss: 0.9986081123352051,grad_norm: 0.9999989482372766, iteration: 163803
loss: 1.0478051900863647,grad_norm: 0.9999990251673543, iteration: 163804
loss: 0.9998506903648376,grad_norm: 0.9259925288648262, iteration: 163805
loss: 1.00617516040802,grad_norm: 0.9723922441422232, iteration: 163806
loss: 0.9984542727470398,grad_norm: 0.9410423982466058, iteration: 163807
loss: 0.9890360236167908,grad_norm: 0.999999134740425, iteration: 163808
loss: 1.0084059238433838,grad_norm: 0.9999991330328964, iteration: 163809
loss: 1.0191017389297485,grad_norm: 0.9999991622966472, iteration: 163810
loss: 0.973901629447937,grad_norm: 0.9999991487839446, iteration: 163811
loss: 0.9829166531562805,grad_norm: 0.9999992106537426, iteration: 163812
loss: 1.0150409936904907,grad_norm: 0.9412360015201667, iteration: 163813
loss: 0.9945289492607117,grad_norm: 0.9673067915197697, iteration: 163814
loss: 1.0550878047943115,grad_norm: 0.9549620656539398, iteration: 163815
loss: 0.9943828582763672,grad_norm: 0.999999629517781, iteration: 163816
loss: 0.9890117049217224,grad_norm: 0.9280194433793877, iteration: 163817
loss: 1.0004539489746094,grad_norm: 0.9438283802134533, iteration: 163818
loss: 0.958897054195404,grad_norm: 0.9955687287000666, iteration: 163819
loss: 1.0119633674621582,grad_norm: 0.9999991047525726, iteration: 163820
loss: 1.0405256748199463,grad_norm: 0.9999991327657693, iteration: 163821
loss: 0.9928542971611023,grad_norm: 0.9999991392094587, iteration: 163822
loss: 1.0199137926101685,grad_norm: 0.999999051772319, iteration: 163823
loss: 0.9845249652862549,grad_norm: 0.9999991471223195, iteration: 163824
loss: 0.9883450269699097,grad_norm: 0.9696928656913848, iteration: 163825
loss: 0.984771192073822,grad_norm: 0.99999898689664, iteration: 163826
loss: 1.0064324140548706,grad_norm: 0.9769246421628356, iteration: 163827
loss: 1.0234938859939575,grad_norm: 0.9999990515997369, iteration: 163828
loss: 0.9863397479057312,grad_norm: 0.9999991766660022, iteration: 163829
loss: 1.008651852607727,grad_norm: 0.9999990730270412, iteration: 163830
loss: 1.0218123197555542,grad_norm: 0.9627180520739251, iteration: 163831
loss: 0.9877271056175232,grad_norm: 0.9597707592409506, iteration: 163832
loss: 0.9955734610557556,grad_norm: 0.9999991286044352, iteration: 163833
loss: 1.0224006175994873,grad_norm: 0.8966739448003809, iteration: 163834
loss: 1.0388036966323853,grad_norm: 0.999998965456946, iteration: 163835
loss: 1.0099376440048218,grad_norm: 0.97478869866028, iteration: 163836
loss: 1.0465744733810425,grad_norm: 0.9999998203031921, iteration: 163837
loss: 0.9889892935752869,grad_norm: 0.8918531509980866, iteration: 163838
loss: 0.981673538684845,grad_norm: 0.9999991268536386, iteration: 163839
loss: 0.9608332514762878,grad_norm: 0.970145254414509, iteration: 163840
loss: 1.003035545349121,grad_norm: 0.9758088698725175, iteration: 163841
loss: 1.003982663154602,grad_norm: 0.9999989356870078, iteration: 163842
loss: 1.0336072444915771,grad_norm: 0.9999994260332131, iteration: 163843
loss: 0.9766901135444641,grad_norm: 0.9999991908487624, iteration: 163844
loss: 0.9727988243103027,grad_norm: 0.9999991039667552, iteration: 163845
loss: 1.0264294147491455,grad_norm: 0.9516296094079855, iteration: 163846
loss: 0.9732918739318848,grad_norm: 0.9131562862566288, iteration: 163847
loss: 0.9954341053962708,grad_norm: 0.9999991555552104, iteration: 163848
loss: 1.0043853521347046,grad_norm: 0.9999991080564493, iteration: 163849
loss: 0.9749077558517456,grad_norm: 0.8853153533122236, iteration: 163850
loss: 0.9986644983291626,grad_norm: 0.9999993098473209, iteration: 163851
loss: 1.0112369060516357,grad_norm: 0.983377707899077, iteration: 163852
loss: 1.022003412246704,grad_norm: 0.9999996141408767, iteration: 163853
loss: 0.9940838813781738,grad_norm: 0.9125952649898468, iteration: 163854
loss: 0.951521098613739,grad_norm: 0.9276300759857121, iteration: 163855
loss: 1.0010433197021484,grad_norm: 0.999999147191303, iteration: 163856
loss: 1.0103929042816162,grad_norm: 0.9999990782367635, iteration: 163857
loss: 1.0267800092697144,grad_norm: 0.9999991916251709, iteration: 163858
loss: 0.998731791973114,grad_norm: 0.9999992469962167, iteration: 163859
loss: 1.012986660003662,grad_norm: 0.9999990186162426, iteration: 163860
loss: 1.0205193758010864,grad_norm: 0.9562377250238081, iteration: 163861
loss: 0.9971151947975159,grad_norm: 0.9999991452055997, iteration: 163862
loss: 1.0148725509643555,grad_norm: 0.9999990215265162, iteration: 163863
loss: 1.0057456493377686,grad_norm: 0.932065198228325, iteration: 163864
loss: 1.0202760696411133,grad_norm: 0.9999991460712685, iteration: 163865
loss: 1.0389459133148193,grad_norm: 0.9999992804151713, iteration: 163866
loss: 0.9628652334213257,grad_norm: 0.8429446590893547, iteration: 163867
loss: 1.0266178846359253,grad_norm: 0.9999992448371883, iteration: 163868
loss: 0.967458188533783,grad_norm: 0.9999991409986742, iteration: 163869
loss: 0.9744925498962402,grad_norm: 0.9999992639422502, iteration: 163870
loss: 1.0292623043060303,grad_norm: 0.9999991111474285, iteration: 163871
loss: 1.0250931978225708,grad_norm: 0.9999990066284278, iteration: 163872
loss: 0.9997069835662842,grad_norm: 0.8907089834829206, iteration: 163873
loss: 1.0125471353530884,grad_norm: 0.9404963722461461, iteration: 163874
loss: 0.9957675337791443,grad_norm: 0.8873099204182174, iteration: 163875
loss: 0.9611615538597107,grad_norm: 0.9296461628404148, iteration: 163876
loss: 0.9873188138008118,grad_norm: 0.9999990675268995, iteration: 163877
loss: 1.042677640914917,grad_norm: 0.9340244851836885, iteration: 163878
loss: 1.0152579545974731,grad_norm: 0.8358640752914487, iteration: 163879
loss: 1.0096197128295898,grad_norm: 0.9963753747545163, iteration: 163880
loss: 0.9692882299423218,grad_norm: 0.9999991056399127, iteration: 163881
loss: 1.0187041759490967,grad_norm: 0.9013917703527273, iteration: 163882
loss: 1.007877230644226,grad_norm: 0.9999991448691826, iteration: 163883
loss: 1.00615656375885,grad_norm: 0.9999990839280971, iteration: 163884
loss: 1.0074338912963867,grad_norm: 0.8868831534916545, iteration: 163885
loss: 1.0414540767669678,grad_norm: 0.9999991424633287, iteration: 163886
loss: 0.9789184331893921,grad_norm: 0.9412247386982976, iteration: 163887
loss: 1.0179561376571655,grad_norm: 0.9999991522444405, iteration: 163888
loss: 1.0181289911270142,grad_norm: 0.9286873075252636, iteration: 163889
loss: 1.007522463798523,grad_norm: 0.9999992577758408, iteration: 163890
loss: 1.010619878768921,grad_norm: 0.902903729564248, iteration: 163891
loss: 0.9589353799819946,grad_norm: 0.844452995056451, iteration: 163892
loss: 0.991023063659668,grad_norm: 0.9999991187130813, iteration: 163893
loss: 1.0326893329620361,grad_norm: 0.9999990165439724, iteration: 163894
loss: 0.9623851180076599,grad_norm: 0.9757950951300169, iteration: 163895
loss: 0.9947819709777832,grad_norm: 0.9999991188809372, iteration: 163896
loss: 1.0086610317230225,grad_norm: 0.9999990922402, iteration: 163897
loss: 1.0131311416625977,grad_norm: 0.9999991074602406, iteration: 163898
loss: 1.00406014919281,grad_norm: 0.9353519452474911, iteration: 163899
loss: 1.000634789466858,grad_norm: 0.9999993205965646, iteration: 163900
loss: 1.017059564590454,grad_norm: 0.999999019254577, iteration: 163901
loss: 1.033717393875122,grad_norm: 0.99999916996837, iteration: 163902
loss: 0.9604130983352661,grad_norm: 0.9999991604081997, iteration: 163903
loss: 1.0045355558395386,grad_norm: 0.9999992195179349, iteration: 163904
loss: 1.016872763633728,grad_norm: 0.9999990980732456, iteration: 163905
loss: 1.007385015487671,grad_norm: 0.9999990584235544, iteration: 163906
loss: 1.0204623937606812,grad_norm: 0.9999991759675296, iteration: 163907
loss: 1.0111128091812134,grad_norm: 0.99999914615428, iteration: 163908
loss: 0.9973642230033875,grad_norm: 0.9013363547708994, iteration: 163909
loss: 1.03304123878479,grad_norm: 0.8482076564908493, iteration: 163910
loss: 1.019087791442871,grad_norm: 0.802899366322398, iteration: 163911
loss: 1.0234017372131348,grad_norm: 0.9999990420633881, iteration: 163912
loss: 1.006188988685608,grad_norm: 0.9640839480040944, iteration: 163913
loss: 1.0105090141296387,grad_norm: 0.8814969357328614, iteration: 163914
loss: 0.9982556104660034,grad_norm: 0.9211624527316186, iteration: 163915
loss: 0.9826235771179199,grad_norm: 0.9999992839527595, iteration: 163916
loss: 1.0001130104064941,grad_norm: 0.8826707321163125, iteration: 163917
loss: 1.0180761814117432,grad_norm: 0.8946948977480734, iteration: 163918
loss: 1.0350043773651123,grad_norm: 0.8814972484928207, iteration: 163919
loss: 0.9731972217559814,grad_norm: 0.9654928665129918, iteration: 163920
loss: 0.9723832607269287,grad_norm: 0.9010121529301282, iteration: 163921
loss: 0.984932005405426,grad_norm: 0.9364954746055316, iteration: 163922
loss: 1.0347177982330322,grad_norm: 0.9999990409813828, iteration: 163923
loss: 1.0084476470947266,grad_norm: 0.8753193160144245, iteration: 163924
loss: 0.9981284737586975,grad_norm: 0.9999992447036722, iteration: 163925
loss: 0.9880436062812805,grad_norm: 0.9671299115015588, iteration: 163926
loss: 0.9985935091972351,grad_norm: 0.9789367237290854, iteration: 163927
loss: 1.0031429529190063,grad_norm: 0.9999991863936916, iteration: 163928
loss: 0.9667984247207642,grad_norm: 0.9999991478852867, iteration: 163929
loss: 0.9708988070487976,grad_norm: 0.9050454101695722, iteration: 163930
loss: 1.0485122203826904,grad_norm: 0.9999993093159091, iteration: 163931
loss: 1.0228335857391357,grad_norm: 0.9999989988524558, iteration: 163932
loss: 0.9867709875106812,grad_norm: 0.922195533050328, iteration: 163933
loss: 1.0043588876724243,grad_norm: 0.9053014827801069, iteration: 163934
loss: 1.002751350402832,grad_norm: 0.999998967301491, iteration: 163935
loss: 1.01828134059906,grad_norm: 0.9999991541579412, iteration: 163936
loss: 1.005402684211731,grad_norm: 0.930882459799363, iteration: 163937
loss: 0.9975060224533081,grad_norm: 0.9999992179467687, iteration: 163938
loss: 1.02705979347229,grad_norm: 0.999999156229326, iteration: 163939
loss: 0.9935821294784546,grad_norm: 0.9769029452328498, iteration: 163940
loss: 1.01864492893219,grad_norm: 0.8426137567596627, iteration: 163941
loss: 1.0555745363235474,grad_norm: 0.9999996343696997, iteration: 163942
loss: 0.9555305242538452,grad_norm: 0.9999993314784305, iteration: 163943
loss: 0.9991745352745056,grad_norm: 0.886807009252111, iteration: 163944
loss: 0.9637318253517151,grad_norm: 0.9999992092679124, iteration: 163945
loss: 0.9931573867797852,grad_norm: 0.9762313867588572, iteration: 163946
loss: 1.022170901298523,grad_norm: 0.9999990973631626, iteration: 163947
loss: 0.9691729545593262,grad_norm: 0.970703208403355, iteration: 163948
loss: 0.9456605315208435,grad_norm: 0.9999991292044018, iteration: 163949
loss: 1.013313889503479,grad_norm: 0.9153300778640153, iteration: 163950
loss: 1.0128469467163086,grad_norm: 0.9851856928736636, iteration: 163951
loss: 1.008074402809143,grad_norm: 0.999999208231866, iteration: 163952
loss: 0.9731429815292358,grad_norm: 0.9999990952316095, iteration: 163953
loss: 0.9670647382736206,grad_norm: 0.9772458869790003, iteration: 163954
loss: 0.9639275074005127,grad_norm: 0.9999991428290247, iteration: 163955
loss: 1.0389232635498047,grad_norm: 0.9999990150479133, iteration: 163956
loss: 0.9781659245491028,grad_norm: 0.9999991440865049, iteration: 163957
loss: 1.0148401260375977,grad_norm: 0.999999072290078, iteration: 163958
loss: 0.9852192401885986,grad_norm: 0.998489789787671, iteration: 163959
loss: 0.9953060150146484,grad_norm: 0.8402811454451005, iteration: 163960
loss: 0.9744502305984497,grad_norm: 0.9132996676850612, iteration: 163961
loss: 0.9922713041305542,grad_norm: 0.9547940882998778, iteration: 163962
loss: 0.9886051416397095,grad_norm: 0.9999991282355303, iteration: 163963
loss: 0.9994704723358154,grad_norm: 0.9625237794364099, iteration: 163964
loss: 0.9831926822662354,grad_norm: 0.8956414287373213, iteration: 163965
loss: 0.9951109886169434,grad_norm: 0.9999992751396318, iteration: 163966
loss: 1.0622975826263428,grad_norm: 0.9999991607133258, iteration: 163967
loss: 1.0061976909637451,grad_norm: 0.9218757772254474, iteration: 163968
loss: 1.0162519216537476,grad_norm: 0.9999990807669892, iteration: 163969
loss: 1.0039422512054443,grad_norm: 0.9241625887581786, iteration: 163970
loss: 0.9609745740890503,grad_norm: 0.9567722797069574, iteration: 163971
loss: 1.0145010948181152,grad_norm: 0.9578126840070458, iteration: 163972
loss: 0.9619326591491699,grad_norm: 0.99999925822881, iteration: 163973
loss: 0.980143129825592,grad_norm: 0.9231490481793793, iteration: 163974
loss: 1.0089131593704224,grad_norm: 0.918330043304847, iteration: 163975
loss: 1.0131670236587524,grad_norm: 0.9999991308715594, iteration: 163976
loss: 1.0188218355178833,grad_norm: 0.9999990447937616, iteration: 163977
loss: 1.005813479423523,grad_norm: 0.9999990529983954, iteration: 163978
loss: 1.0002331733703613,grad_norm: 0.9999992049587254, iteration: 163979
loss: 0.9865874648094177,grad_norm: 0.9531455739204575, iteration: 163980
loss: 0.9973335266113281,grad_norm: 0.9999990502411978, iteration: 163981
loss: 0.9696618914604187,grad_norm: 0.9999992693499135, iteration: 163982
loss: 0.996137261390686,grad_norm: 0.9999991858680987, iteration: 163983
loss: 0.9977220296859741,grad_norm: 0.9346027230470983, iteration: 163984
loss: 1.0044848918914795,grad_norm: 0.9447103938346085, iteration: 163985
loss: 0.9950001239776611,grad_norm: 0.8235140598097894, iteration: 163986
loss: 1.0133023262023926,grad_norm: 0.8304932837336513, iteration: 163987
loss: 0.9952178001403809,grad_norm: 0.9893023571928224, iteration: 163988
loss: 0.974236249923706,grad_norm: 0.7980436403179462, iteration: 163989
loss: 0.997430145740509,grad_norm: 0.9999990846091493, iteration: 163990
loss: 0.9952352643013,grad_norm: 0.8770476356629094, iteration: 163991
loss: 0.9949908256530762,grad_norm: 0.9368242565431992, iteration: 163992
loss: 0.9664857983589172,grad_norm: 0.9999990538497806, iteration: 163993
loss: 0.9755667448043823,grad_norm: 0.9999991694702519, iteration: 163994
loss: 1.0004806518554688,grad_norm: 0.9999991435643508, iteration: 163995
loss: 0.9989429712295532,grad_norm: 0.9999991268481521, iteration: 163996
loss: 1.006624698638916,grad_norm: 0.9844963521936606, iteration: 163997
loss: 1.004876971244812,grad_norm: 0.9237702363779726, iteration: 163998
loss: 0.995589554309845,grad_norm: 0.9999991654470486, iteration: 163999
loss: 1.009883165359497,grad_norm: 0.8813412562695341, iteration: 164000
loss: 1.0038594007492065,grad_norm: 0.9999989595704216, iteration: 164001
loss: 1.038945198059082,grad_norm: 0.87797936644915, iteration: 164002
loss: 0.9818436503410339,grad_norm: 0.9519614403327452, iteration: 164003
loss: 0.9938890337944031,grad_norm: 0.9999990762933145, iteration: 164004
loss: 1.0088222026824951,grad_norm: 0.9999991099126799, iteration: 164005
loss: 0.9801061153411865,grad_norm: 0.9999989835085967, iteration: 164006
loss: 0.9998329281806946,grad_norm: 0.8556173210892437, iteration: 164007
loss: 1.0247316360473633,grad_norm: 0.8515222356999875, iteration: 164008
loss: 0.9979326128959656,grad_norm: 0.9369939451612602, iteration: 164009
loss: 1.0231518745422363,grad_norm: 0.999999031033165, iteration: 164010
loss: 1.0128313302993774,grad_norm: 0.9141308621427012, iteration: 164011
loss: 1.0090665817260742,grad_norm: 0.9999993012617334, iteration: 164012
loss: 0.9978218674659729,grad_norm: 0.999999036818642, iteration: 164013
loss: 1.0201311111450195,grad_norm: 0.9999992727735822, iteration: 164014
loss: 0.9491266012191772,grad_norm: 0.9999992462346693, iteration: 164015
loss: 0.9683780670166016,grad_norm: 0.9999989684499355, iteration: 164016
loss: 0.9896021485328674,grad_norm: 0.9215776170047447, iteration: 164017
loss: 0.9932569861412048,grad_norm: 0.9013110534844749, iteration: 164018
loss: 0.9861332178115845,grad_norm: 0.9664821542763002, iteration: 164019
loss: 0.9663553237915039,grad_norm: 0.9999990873788972, iteration: 164020
loss: 1.0046372413635254,grad_norm: 0.9709401523110123, iteration: 164021
loss: 0.9878737926483154,grad_norm: 0.9999990100969457, iteration: 164022
loss: 1.002665638923645,grad_norm: 0.9446622205086577, iteration: 164023
loss: 0.9936740398406982,grad_norm: 0.9999990982496295, iteration: 164024
loss: 1.0485074520111084,grad_norm: 0.9999989052027707, iteration: 164025
loss: 0.971686840057373,grad_norm: 0.8756472914775429, iteration: 164026
loss: 0.9813883304595947,grad_norm: 0.9999991950171367, iteration: 164027
loss: 1.0089248418807983,grad_norm: 0.9999992173631796, iteration: 164028
loss: 0.9945535063743591,grad_norm: 0.9759562570234089, iteration: 164029
loss: 1.0647187232971191,grad_norm: 0.9456269737824855, iteration: 164030
loss: 1.006221890449524,grad_norm: 0.9837812273227039, iteration: 164031
loss: 0.9853715300559998,grad_norm: 0.9424498597885991, iteration: 164032
loss: 1.0023282766342163,grad_norm: 0.9766642513394562, iteration: 164033
loss: 1.0047607421875,grad_norm: 0.8621655168408546, iteration: 164034
loss: 1.0482004880905151,grad_norm: 0.9999992652387834, iteration: 164035
loss: 0.9722402095794678,grad_norm: 0.8938728039106959, iteration: 164036
loss: 0.9775252938270569,grad_norm: 0.9685488296304466, iteration: 164037
loss: 1.0090644359588623,grad_norm: 0.9999990459642751, iteration: 164038
loss: 0.9921293258666992,grad_norm: 0.8525179150752957, iteration: 164039
loss: 1.0042221546173096,grad_norm: 0.9999991885265648, iteration: 164040
loss: 1.000028371810913,grad_norm: 0.9062956447776339, iteration: 164041
loss: 0.995207667350769,grad_norm: 0.89912546463248, iteration: 164042
loss: 1.0307879447937012,grad_norm: 0.9999991901485347, iteration: 164043
loss: 1.03265380859375,grad_norm: 0.9442979309162746, iteration: 164044
loss: 0.9799973368644714,grad_norm: 0.9824574138844131, iteration: 164045
loss: 0.9983881115913391,grad_norm: 0.9611129635242416, iteration: 164046
loss: 1.010219693183899,grad_norm: 0.9999991761522716, iteration: 164047
loss: 1.0042734146118164,grad_norm: 0.952742647750986, iteration: 164048
loss: 1.01887047290802,grad_norm: 0.9999991906110144, iteration: 164049
loss: 1.0002894401550293,grad_norm: 0.9999991585805974, iteration: 164050
loss: 0.9914646148681641,grad_norm: 0.9688575645720618, iteration: 164051
loss: 0.9922672510147095,grad_norm: 0.7744656113234932, iteration: 164052
loss: 0.9719564318656921,grad_norm: 0.9610938183015408, iteration: 164053
loss: 0.9528184533119202,grad_norm: 0.9519321239209291, iteration: 164054
loss: 1.0107747316360474,grad_norm: 0.9999990826239747, iteration: 164055
loss: 0.9839105606079102,grad_norm: 0.9363196793766624, iteration: 164056
loss: 1.0345711708068848,grad_norm: 0.9999991113792566, iteration: 164057
loss: 0.9781684875488281,grad_norm: 0.8929238128438143, iteration: 164058
loss: 1.010981798171997,grad_norm: 0.9999992418148645, iteration: 164059
loss: 1.0120548009872437,grad_norm: 0.8310262212850057, iteration: 164060
loss: 1.0380215644836426,grad_norm: 0.9733024210344076, iteration: 164061
loss: 0.988436758518219,grad_norm: 0.9964303968793201, iteration: 164062
loss: 0.9824185967445374,grad_norm: 0.9999989180000215, iteration: 164063
loss: 1.0036736726760864,grad_norm: 0.7153090297720507, iteration: 164064
loss: 1.0150272846221924,grad_norm: 0.9939115204289388, iteration: 164065
loss: 1.0005398988723755,grad_norm: 0.9999993298596941, iteration: 164066
loss: 1.0191582441329956,grad_norm: 0.9999990752522612, iteration: 164067
loss: 0.9843814969062805,grad_norm: 0.9999991182084425, iteration: 164068
loss: 1.0236245393753052,grad_norm: 0.9999991126105203, iteration: 164069
loss: 1.01144540309906,grad_norm: 0.8520917919396049, iteration: 164070
loss: 1.0239002704620361,grad_norm: 0.9999991362641836, iteration: 164071
loss: 0.9874445199966431,grad_norm: 0.9999993328625401, iteration: 164072
loss: 1.0312042236328125,grad_norm: 0.9999997233031109, iteration: 164073
loss: 0.9775177240371704,grad_norm: 0.8907683529724156, iteration: 164074
loss: 0.9348788261413574,grad_norm: 0.9999990935112983, iteration: 164075
loss: 0.9787091612815857,grad_norm: 0.9999992714689153, iteration: 164076
loss: 1.022325038909912,grad_norm: 0.9999990382737791, iteration: 164077
loss: 1.0053391456604004,grad_norm: 0.9999996970616638, iteration: 164078
loss: 0.9972231984138489,grad_norm: 0.9623438539998607, iteration: 164079
loss: 0.9784042239189148,grad_norm: 0.9507346452126493, iteration: 164080
loss: 1.0218491554260254,grad_norm: 0.999999872389907, iteration: 164081
loss: 1.010785698890686,grad_norm: 0.9999991196482503, iteration: 164082
loss: 0.9997637867927551,grad_norm: 0.9521304261252227, iteration: 164083
loss: 1.0179086923599243,grad_norm: 0.9999990611876988, iteration: 164084
loss: 1.017956018447876,grad_norm: 0.9999991873412174, iteration: 164085
loss: 0.9795379638671875,grad_norm: 0.7958283311297867, iteration: 164086
loss: 0.9942919015884399,grad_norm: 0.9999997334844409, iteration: 164087
loss: 0.996858537197113,grad_norm: 0.7640287488418559, iteration: 164088
loss: 0.9655236601829529,grad_norm: 0.999999083917834, iteration: 164089
loss: 1.0179473161697388,grad_norm: 0.9656476007844002, iteration: 164090
loss: 0.970640242099762,grad_norm: 0.999999114588239, iteration: 164091
loss: 1.0260792970657349,grad_norm: 0.9402082005886726, iteration: 164092
loss: 1.014353632926941,grad_norm: 0.9512812276481432, iteration: 164093
loss: 1.0285780429840088,grad_norm: 0.9801854247642833, iteration: 164094
loss: 1.005751371383667,grad_norm: 0.9545771127089023, iteration: 164095
loss: 1.010880708694458,grad_norm: 0.9999991306004323, iteration: 164096
loss: 1.0118550062179565,grad_norm: 0.9999991260055541, iteration: 164097
loss: 0.9836333394050598,grad_norm: 0.999999285598782, iteration: 164098
loss: 1.0472766160964966,grad_norm: 0.9999993204389575, iteration: 164099
loss: 0.9778836369514465,grad_norm: 0.9999991460195026, iteration: 164100
loss: 0.9875845313072205,grad_norm: 0.999999064581298, iteration: 164101
loss: 1.0009833574295044,grad_norm: 0.9999991165727691, iteration: 164102
loss: 0.9436312913894653,grad_norm: 0.9284755982483291, iteration: 164103
loss: 0.9999080896377563,grad_norm: 0.9999994809488033, iteration: 164104
loss: 1.0055011510849,grad_norm: 0.9999992209416507, iteration: 164105
loss: 1.025612711906433,grad_norm: 0.9999992347955369, iteration: 164106
loss: 1.0321500301361084,grad_norm: 0.9999991180795704, iteration: 164107
loss: 1.0085796117782593,grad_norm: 0.999999020560425, iteration: 164108
loss: 0.9862117171287537,grad_norm: 0.9196107610357074, iteration: 164109
loss: 0.9981050491333008,grad_norm: 0.8822319130641814, iteration: 164110
loss: 1.0070226192474365,grad_norm: 0.9287811259209013, iteration: 164111
loss: 0.9919298887252808,grad_norm: 0.9999989587196855, iteration: 164112
loss: 0.9935815930366516,grad_norm: 0.8834544941800896, iteration: 164113
loss: 1.0300862789154053,grad_norm: 0.9999992473486345, iteration: 164114
loss: 1.010161280632019,grad_norm: 0.9379041063934427, iteration: 164115
loss: 0.9700636267662048,grad_norm: 0.999999102656871, iteration: 164116
loss: 1.0072932243347168,grad_norm: 0.9999990626099695, iteration: 164117
loss: 1.009922981262207,grad_norm: 0.9151303677146089, iteration: 164118
loss: 0.9975240230560303,grad_norm: 0.999999224508255, iteration: 164119
loss: 1.0150091648101807,grad_norm: 0.9275274901462365, iteration: 164120
loss: 0.9800580739974976,grad_norm: 0.9999990169518653, iteration: 164121
loss: 0.9987308979034424,grad_norm: 0.9999991202365797, iteration: 164122
loss: 1.0017518997192383,grad_norm: 0.9999990526906056, iteration: 164123
loss: 0.9773562550544739,grad_norm: 0.8631210257452899, iteration: 164124
loss: 1.0227714776992798,grad_norm: 0.9999992382514853, iteration: 164125
loss: 1.0110461711883545,grad_norm: 0.9999993173877042, iteration: 164126
loss: 1.006397008895874,grad_norm: 0.9999990986927538, iteration: 164127
loss: 1.014178991317749,grad_norm: 0.9999990930476984, iteration: 164128
loss: 1.011745572090149,grad_norm: 0.9735462337599783, iteration: 164129
loss: 0.9937299489974976,grad_norm: 0.9999992344287884, iteration: 164130
loss: 1.0120927095413208,grad_norm: 0.9999992238841736, iteration: 164131
loss: 0.9939465522766113,grad_norm: 0.9680972183769924, iteration: 164132
loss: 1.0856894254684448,grad_norm: 0.9999995987558865, iteration: 164133
loss: 1.017469882965088,grad_norm: 0.9300446436248604, iteration: 164134
loss: 1.0029783248901367,grad_norm: 0.9083223335237174, iteration: 164135
loss: 0.9947853684425354,grad_norm: 0.999999114681102, iteration: 164136
loss: 0.9883133769035339,grad_norm: 0.9999992131529529, iteration: 164137
loss: 1.0256752967834473,grad_norm: 0.9999992358909613, iteration: 164138
loss: 1.0319762229919434,grad_norm: 0.9912763057264866, iteration: 164139
loss: 0.9993530511856079,grad_norm: 0.921193033131021, iteration: 164140
loss: 1.020483136177063,grad_norm: 0.9666444514052778, iteration: 164141
loss: 0.9897569417953491,grad_norm: 0.999999183851094, iteration: 164142
loss: 1.073485016822815,grad_norm: 0.9999993716368551, iteration: 164143
loss: 1.001850962638855,grad_norm: 0.9283587167610257, iteration: 164144
loss: 0.9831923246383667,grad_norm: 0.999999244581496, iteration: 164145
loss: 0.9734771847724915,grad_norm: 0.9638571445556179, iteration: 164146
loss: 1.0028032064437866,grad_norm: 0.9999992716881649, iteration: 164147
loss: 1.0416452884674072,grad_norm: 0.9701741377944376, iteration: 164148
loss: 1.0248541831970215,grad_norm: 0.999999235196824, iteration: 164149
loss: 1.0071470737457275,grad_norm: 0.9999995409713403, iteration: 164150
loss: 1.0198100805282593,grad_norm: 0.9526227190351885, iteration: 164151
loss: 1.014996886253357,grad_norm: 0.879352428875548, iteration: 164152
loss: 0.9920828938484192,grad_norm: 0.9999992429091425, iteration: 164153
loss: 1.0323160886764526,grad_norm: 0.999999670272236, iteration: 164154
loss: 0.990837812423706,grad_norm: 0.9534866397695125, iteration: 164155
loss: 1.0318526029586792,grad_norm: 0.9835180586466222, iteration: 164156
loss: 0.9853674173355103,grad_norm: 0.9999991815542911, iteration: 164157
loss: 0.9874498248100281,grad_norm: 0.9266014679598744, iteration: 164158
loss: 1.0030170679092407,grad_norm: 0.999999222201827, iteration: 164159
loss: 0.9606592655181885,grad_norm: 0.9999991436312247, iteration: 164160
loss: 1.0157092809677124,grad_norm: 0.8447205128835295, iteration: 164161
loss: 1.0745078325271606,grad_norm: 0.9999993579343639, iteration: 164162
loss: 1.022517204284668,grad_norm: 0.6955022306510295, iteration: 164163
loss: 0.9794367551803589,grad_norm: 0.999999028078957, iteration: 164164
loss: 0.9964028000831604,grad_norm: 0.9605615048333948, iteration: 164165
loss: 1.0243773460388184,grad_norm: 0.8907865104049667, iteration: 164166
loss: 0.9900055527687073,grad_norm: 0.9999996998479558, iteration: 164167
loss: 1.0059138536453247,grad_norm: 0.9560204549240336, iteration: 164168
loss: 1.0588799715042114,grad_norm: 0.999999764799014, iteration: 164169
loss: 0.9809991717338562,grad_norm: 0.9999991371918494, iteration: 164170
loss: 1.0109318494796753,grad_norm: 0.9999991933230441, iteration: 164171
loss: 1.0205118656158447,grad_norm: 0.9999989249897209, iteration: 164172
loss: 1.0879364013671875,grad_norm: 0.9999992741780832, iteration: 164173
loss: 1.0218443870544434,grad_norm: 0.9999998891596178, iteration: 164174
loss: 0.9783554077148438,grad_norm: 0.999999214054563, iteration: 164175
loss: 1.0276914834976196,grad_norm: 0.9556024479978799, iteration: 164176
loss: 1.0015642642974854,grad_norm: 0.9999991622087785, iteration: 164177
loss: 0.9625657200813293,grad_norm: 0.9715206479049324, iteration: 164178
loss: 0.9947472214698792,grad_norm: 0.9999990437114958, iteration: 164179
loss: 0.9628872871398926,grad_norm: 0.9712299278247172, iteration: 164180
loss: 1.006256103515625,grad_norm: 0.9819384013689768, iteration: 164181
loss: 1.0550967454910278,grad_norm: 0.9999997276254757, iteration: 164182
loss: 1.0198078155517578,grad_norm: 0.9999991604411804, iteration: 164183
loss: 0.9836062788963318,grad_norm: 0.8710349605609208, iteration: 164184
loss: 1.0426576137542725,grad_norm: 0.9999991782502275, iteration: 164185
loss: 1.0165013074874878,grad_norm: 0.9999991659264952, iteration: 164186
loss: 1.001201868057251,grad_norm: 0.9999990957584776, iteration: 164187
loss: 0.973722517490387,grad_norm: 0.9146091070789018, iteration: 164188
loss: 1.0144435167312622,grad_norm: 0.8889510377460125, iteration: 164189
loss: 1.0010185241699219,grad_norm: 0.9845024033930442, iteration: 164190
loss: 0.9668309688568115,grad_norm: 0.84239109562416, iteration: 164191
loss: 0.9880032539367676,grad_norm: 0.8810788379835333, iteration: 164192
loss: 1.0378233194351196,grad_norm: 0.9560332121621677, iteration: 164193
loss: 1.0027379989624023,grad_norm: 0.9999995420498005, iteration: 164194
loss: 1.024016261100769,grad_norm: 0.9570256340477884, iteration: 164195
loss: 0.9666723608970642,grad_norm: 0.9861819372273899, iteration: 164196
loss: 0.9943587779998779,grad_norm: 0.8765661351124846, iteration: 164197
loss: 0.9931953549385071,grad_norm: 0.9999993141251311, iteration: 164198
loss: 0.9560058116912842,grad_norm: 0.9999990802617549, iteration: 164199
loss: 0.9538666605949402,grad_norm: 0.9560092000740867, iteration: 164200
loss: 1.0807132720947266,grad_norm: 0.99999907780448, iteration: 164201
loss: 0.9930076003074646,grad_norm: 0.8873270448639576, iteration: 164202
loss: 0.9644898772239685,grad_norm: 0.9999990704730156, iteration: 164203
loss: 1.0429165363311768,grad_norm: 0.9999992472484192, iteration: 164204
loss: 0.980869472026825,grad_norm: 0.9608725919507678, iteration: 164205
loss: 1.0078400373458862,grad_norm: 0.9194978064760868, iteration: 164206
loss: 1.0203368663787842,grad_norm: 0.999999111696536, iteration: 164207
loss: 1.0443185567855835,grad_norm: 0.9999991562603335, iteration: 164208
loss: 0.998049259185791,grad_norm: 0.9999989563603484, iteration: 164209
loss: 0.994841456413269,grad_norm: 0.9999990026077017, iteration: 164210
loss: 1.0103713274002075,grad_norm: 0.9070772640489462, iteration: 164211
loss: 0.968729555606842,grad_norm: 0.9999992895509731, iteration: 164212
loss: 0.9931021332740784,grad_norm: 0.999999971933421, iteration: 164213
loss: 0.9888257384300232,grad_norm: 0.9999991502811717, iteration: 164214
loss: 0.9924228191375732,grad_norm: 0.9999990802061698, iteration: 164215
loss: 0.9634827971458435,grad_norm: 0.9079013795738337, iteration: 164216
loss: 1.0513561964035034,grad_norm: 0.9999993069488022, iteration: 164217
loss: 0.9836064577102661,grad_norm: 0.9999989367808038, iteration: 164218
loss: 1.0353251695632935,grad_norm: 0.9999992376744627, iteration: 164219
loss: 1.0371266603469849,grad_norm: 0.9586355843127339, iteration: 164220
loss: 0.9721702337265015,grad_norm: 0.9999991290662285, iteration: 164221
loss: 1.058788776397705,grad_norm: 0.9999997469557262, iteration: 164222
loss: 0.9928379058837891,grad_norm: 0.999999221293419, iteration: 164223
loss: 0.9724059700965881,grad_norm: 0.9436995506373203, iteration: 164224
loss: 0.9780117273330688,grad_norm: 0.9888169277709743, iteration: 164225
loss: 0.9932085275650024,grad_norm: 0.9999991579622538, iteration: 164226
loss: 0.968369722366333,grad_norm: 0.9762921261723475, iteration: 164227
loss: 1.0192521810531616,grad_norm: 0.9999990352463878, iteration: 164228
loss: 0.9859587550163269,grad_norm: 0.9657910408971174, iteration: 164229
loss: 0.9886025190353394,grad_norm: 0.9999992181435121, iteration: 164230
loss: 0.9870960712432861,grad_norm: 0.999999359317768, iteration: 164231
loss: 0.9937890768051147,grad_norm: 0.9247350827762472, iteration: 164232
loss: 1.0237959623336792,grad_norm: 0.9974836846213248, iteration: 164233
loss: 0.9964519739151001,grad_norm: 0.9999991692936123, iteration: 164234
loss: 1.021791696548462,grad_norm: 0.9999990654851454, iteration: 164235
loss: 1.028283715248108,grad_norm: 0.9999991090941228, iteration: 164236
loss: 0.9959508776664734,grad_norm: 0.9999990172296463, iteration: 164237
loss: 0.9897873997688293,grad_norm: 0.999999026544145, iteration: 164238
loss: 1.0279985666275024,grad_norm: 0.870991059584746, iteration: 164239
loss: 0.9848131537437439,grad_norm: 0.99999916620019, iteration: 164240
loss: 1.0100481510162354,grad_norm: 0.7682807817355185, iteration: 164241
loss: 0.9828259944915771,grad_norm: 0.9964947550863688, iteration: 164242
loss: 0.9661468863487244,grad_norm: 0.901271312039069, iteration: 164243
loss: 1.0363383293151855,grad_norm: 0.968919891461395, iteration: 164244
loss: 1.0098258256912231,grad_norm: 0.9999989537996907, iteration: 164245
loss: 0.9981359243392944,grad_norm: 0.9999993305314815, iteration: 164246
loss: 1.0842511653900146,grad_norm: 0.9999989939418423, iteration: 164247
loss: 0.9545063972473145,grad_norm: 0.9023851110556811, iteration: 164248
loss: 0.9660432934761047,grad_norm: 0.999999212840313, iteration: 164249
loss: 0.9826357364654541,grad_norm: 0.858043748432909, iteration: 164250
loss: 1.0262947082519531,grad_norm: 0.9999992046855577, iteration: 164251
loss: 0.9948926568031311,grad_norm: 0.9999992426750751, iteration: 164252
loss: 0.9646830558776855,grad_norm: 0.9999990802426664, iteration: 164253
loss: 0.984436571598053,grad_norm: 0.943322427062783, iteration: 164254
loss: 1.0066455602645874,grad_norm: 0.9999991468212487, iteration: 164255
loss: 0.99295973777771,grad_norm: 0.9587302406567025, iteration: 164256
loss: 0.9954826235771179,grad_norm: 0.9999990060820966, iteration: 164257
loss: 0.9974578022956848,grad_norm: 0.9999990118095997, iteration: 164258
loss: 0.9940231442451477,grad_norm: 0.9585318890415159, iteration: 164259
loss: 0.9809290766716003,grad_norm: 0.9741197130352532, iteration: 164260
loss: 0.996420681476593,grad_norm: 0.9999994482991196, iteration: 164261
loss: 0.9816297292709351,grad_norm: 0.796854584048284, iteration: 164262
loss: 1.0230101346969604,grad_norm: 0.9999990833522965, iteration: 164263
loss: 0.9771727919578552,grad_norm: 0.9999991669143103, iteration: 164264
loss: 1.0245188474655151,grad_norm: 0.9999989033394636, iteration: 164265
loss: 0.9946130514144897,grad_norm: 0.8359277961848037, iteration: 164266
loss: 1.0011160373687744,grad_norm: 0.985577549661578, iteration: 164267
loss: 0.9477870464324951,grad_norm: 0.927707502571434, iteration: 164268
loss: 0.9893683791160583,grad_norm: 0.9999993752288281, iteration: 164269
loss: 1.0054112672805786,grad_norm: 0.9999991398114099, iteration: 164270
loss: 0.9861429929733276,grad_norm: 0.9999989601465497, iteration: 164271
loss: 1.01711106300354,grad_norm: 0.9960422860903544, iteration: 164272
loss: 0.9910494685173035,grad_norm: 0.9999991515226102, iteration: 164273
loss: 0.9903037548065186,grad_norm: 0.9999991081024766, iteration: 164274
loss: 1.0118986368179321,grad_norm: 0.9999997976528396, iteration: 164275
loss: 1.0313200950622559,grad_norm: 0.9999991746863749, iteration: 164276
loss: 0.9987137913703918,grad_norm: 0.960342090557758, iteration: 164277
loss: 0.9666268825531006,grad_norm: 0.945298717483931, iteration: 164278
loss: 1.0184001922607422,grad_norm: 0.9999990749068077, iteration: 164279
loss: 0.9850400686264038,grad_norm: 0.9999990949023766, iteration: 164280
loss: 1.019191026687622,grad_norm: 0.9999992433867184, iteration: 164281
loss: 1.0105717182159424,grad_norm: 0.9999991192337592, iteration: 164282
loss: 0.9837931990623474,grad_norm: 0.9999991625366375, iteration: 164283
loss: 0.9871131777763367,grad_norm: 0.9955079214240743, iteration: 164284
loss: 1.0055105686187744,grad_norm: 0.9999991301324977, iteration: 164285
loss: 0.9889892339706421,grad_norm: 0.9000135963752669, iteration: 164286
loss: 1.0238927602767944,grad_norm: 0.8782204936427007, iteration: 164287
loss: 1.026333212852478,grad_norm: 0.8764437664617895, iteration: 164288
loss: 0.9778194427490234,grad_norm: 0.9999993768363644, iteration: 164289
loss: 0.9826616048812866,grad_norm: 0.902940032602209, iteration: 164290
loss: 1.0200814008712769,grad_norm: 0.9999991569322921, iteration: 164291
loss: 0.96189284324646,grad_norm: 0.956529171649246, iteration: 164292
loss: 1.0110831260681152,grad_norm: 0.9999992312500477, iteration: 164293
loss: 0.9888128638267517,grad_norm: 0.9948738151898768, iteration: 164294
loss: 0.960834801197052,grad_norm: 0.9464033320010766, iteration: 164295
loss: 0.9578641057014465,grad_norm: 0.9999991132403672, iteration: 164296
loss: 1.0120011568069458,grad_norm: 0.9664489906692751, iteration: 164297
loss: 0.9649999737739563,grad_norm: 0.9999991071448208, iteration: 164298
loss: 1.0099788904190063,grad_norm: 0.9999991327642066, iteration: 164299
loss: 0.9927199482917786,grad_norm: 0.9999992408700801, iteration: 164300
loss: 1.0177141427993774,grad_norm: 0.9999996385701463, iteration: 164301
loss: 1.04094398021698,grad_norm: 0.884641665978755, iteration: 164302
loss: 0.985488772392273,grad_norm: 0.9999990274242989, iteration: 164303
loss: 0.982611894607544,grad_norm: 0.8690922213522261, iteration: 164304
loss: 0.957524299621582,grad_norm: 0.9999991943841964, iteration: 164305
loss: 0.9953253865242004,grad_norm: 0.950247065228511, iteration: 164306
loss: 1.041210651397705,grad_norm: 0.999999179699322, iteration: 164307
loss: 0.9803508520126343,grad_norm: 0.8862310770433919, iteration: 164308
loss: 1.0200412273406982,grad_norm: 0.9195181618537764, iteration: 164309
loss: 0.9744745492935181,grad_norm: 0.9999992893657138, iteration: 164310
loss: 1.0082931518554688,grad_norm: 0.9999993682349362, iteration: 164311
loss: 1.000714898109436,grad_norm: 0.9999990115238839, iteration: 164312
loss: 1.041816234588623,grad_norm: 0.8702936212255389, iteration: 164313
loss: 0.9993786215782166,grad_norm: 0.9448958615256888, iteration: 164314
loss: 1.0424875020980835,grad_norm: 0.9999992040833259, iteration: 164315
loss: 1.0224719047546387,grad_norm: 0.9999992644524313, iteration: 164316
loss: 0.969552755355835,grad_norm: 0.9999989957942294, iteration: 164317
loss: 0.9921397566795349,grad_norm: 0.9999991305522968, iteration: 164318
loss: 1.0188003778457642,grad_norm: 0.9772520787094774, iteration: 164319
loss: 1.0228973627090454,grad_norm: 0.9999991413965703, iteration: 164320
loss: 0.9686421751976013,grad_norm: 0.8075238832749113, iteration: 164321
loss: 1.017637014389038,grad_norm: 0.9999990939994882, iteration: 164322
loss: 0.9559088945388794,grad_norm: 0.9999990243600545, iteration: 164323
loss: 0.9827236533164978,grad_norm: 0.9999991988011759, iteration: 164324
loss: 1.0159056186676025,grad_norm: 0.9999990304858581, iteration: 164325
loss: 1.0067306756973267,grad_norm: 0.9999993370585227, iteration: 164326
loss: 1.0241261720657349,grad_norm: 0.9226969037711539, iteration: 164327
loss: 0.9694022536277771,grad_norm: 0.9497956375929769, iteration: 164328
loss: 1.0275379419326782,grad_norm: 0.8989147809805809, iteration: 164329
loss: 0.9996723532676697,grad_norm: 0.9999991359161512, iteration: 164330
loss: 1.0027881860733032,grad_norm: 0.7744252176240894, iteration: 164331
loss: 1.0226199626922607,grad_norm: 0.9396548108941767, iteration: 164332
loss: 1.0208559036254883,grad_norm: 0.9999992227857238, iteration: 164333
loss: 1.008502721786499,grad_norm: 0.958890504070983, iteration: 164334
loss: 0.9942343235015869,grad_norm: 0.9999992241396258, iteration: 164335
loss: 0.976348340511322,grad_norm: 0.90709213165944, iteration: 164336
loss: 0.9954671859741211,grad_norm: 0.9999993143628764, iteration: 164337
loss: 1.0164697170257568,grad_norm: 0.9867659661331514, iteration: 164338
loss: 1.0140739679336548,grad_norm: 0.8622867784190666, iteration: 164339
loss: 0.9880182147026062,grad_norm: 0.9509601697914721, iteration: 164340
loss: 0.9931877851486206,grad_norm: 0.8527112977226748, iteration: 164341
loss: 0.9978127479553223,grad_norm: 0.9999991698128688, iteration: 164342
loss: 0.9881839156150818,grad_norm: 0.9999991104217714, iteration: 164343
loss: 0.9779477715492249,grad_norm: 0.9999989730614592, iteration: 164344
loss: 1.0116060972213745,grad_norm: 0.899134187296531, iteration: 164345
loss: 1.017944097518921,grad_norm: 0.9999989669997597, iteration: 164346
loss: 0.999289870262146,grad_norm: 0.8516135397511626, iteration: 164347
loss: 1.024863600730896,grad_norm: 0.9999990860248827, iteration: 164348
loss: 1.0052764415740967,grad_norm: 0.9163117344896378, iteration: 164349
loss: 1.0035215616226196,grad_norm: 0.8909629840118177, iteration: 164350
loss: 1.0125443935394287,grad_norm: 0.895094441795807, iteration: 164351
loss: 1.020271897315979,grad_norm: 0.9836854481025638, iteration: 164352
loss: 0.987949788570404,grad_norm: 0.9999817961712318, iteration: 164353
loss: 1.0054680109024048,grad_norm: 0.9919576357455294, iteration: 164354
loss: 1.0211007595062256,grad_norm: 0.9496999377819425, iteration: 164355
loss: 1.0067633390426636,grad_norm: 0.9076185693711167, iteration: 164356
loss: 0.9836941361427307,grad_norm: 0.9999992545784726, iteration: 164357
loss: 1.0006455183029175,grad_norm: 0.9999991664033174, iteration: 164358
loss: 1.008202314376831,grad_norm: 0.999999114795739, iteration: 164359
loss: 0.9711854457855225,grad_norm: 0.999998994593337, iteration: 164360
loss: 1.100599765777588,grad_norm: 0.9999997418072423, iteration: 164361
loss: 0.9899497628211975,grad_norm: 0.8802695515734907, iteration: 164362
loss: 1.0123035907745361,grad_norm: 0.9998977895383933, iteration: 164363
loss: 0.9853034019470215,grad_norm: 0.9999990306445778, iteration: 164364
loss: 1.044893503189087,grad_norm: 1.000000025414815, iteration: 164365
loss: 0.9906283020973206,grad_norm: 0.9079600133285737, iteration: 164366
loss: 1.0089263916015625,grad_norm: 0.9999992107615212, iteration: 164367
loss: 1.0066680908203125,grad_norm: 0.953912509328295, iteration: 164368
loss: 0.9806135892868042,grad_norm: 0.9999991364123214, iteration: 164369
loss: 0.9911513924598694,grad_norm: 0.9210914352939255, iteration: 164370
loss: 0.9810412526130676,grad_norm: 0.7974904366011762, iteration: 164371
loss: 0.9780017137527466,grad_norm: 0.9217628902924933, iteration: 164372
loss: 1.0110745429992676,grad_norm: 0.8287617314226193, iteration: 164373
loss: 1.0111610889434814,grad_norm: 0.9988218116627705, iteration: 164374
loss: 0.964536190032959,grad_norm: 0.9999993050933657, iteration: 164375
loss: 1.007174015045166,grad_norm: 0.9999992214265787, iteration: 164376
loss: 0.9620764255523682,grad_norm: 0.9999991396371252, iteration: 164377
loss: 0.9974369406700134,grad_norm: 0.8921147433401923, iteration: 164378
loss: 0.9984578490257263,grad_norm: 0.9999992188689147, iteration: 164379
loss: 0.988717794418335,grad_norm: 0.9999992263123819, iteration: 164380
loss: 0.9983081817626953,grad_norm: 0.9999991276530514, iteration: 164381
loss: 0.991022527217865,grad_norm: 0.9999992461076911, iteration: 164382
loss: 1.0151621103286743,grad_norm: 0.8575432736048539, iteration: 164383
loss: 0.9711633920669556,grad_norm: 0.999999160831798, iteration: 164384
loss: 1.032585859298706,grad_norm: 0.9862074162382775, iteration: 164385
loss: 1.0186513662338257,grad_norm: 0.9956880864286454, iteration: 164386
loss: 0.9438791871070862,grad_norm: 0.999999050154696, iteration: 164387
loss: 0.9779233336448669,grad_norm: 0.9999993610757774, iteration: 164388
loss: 1.002961277961731,grad_norm: 0.8847857724049867, iteration: 164389
loss: 0.9890908002853394,grad_norm: 0.7467729611745179, iteration: 164390
loss: 1.0349624156951904,grad_norm: 0.9389388634265824, iteration: 164391
loss: 1.0213184356689453,grad_norm: 0.9999991163955773, iteration: 164392
loss: 1.0181584358215332,grad_norm: 0.8841633456729299, iteration: 164393
loss: 0.9929285645484924,grad_norm: 0.9999991304495767, iteration: 164394
loss: 1.0189297199249268,grad_norm: 0.9626578093664421, iteration: 164395
loss: 1.0163390636444092,grad_norm: 0.9999990921792911, iteration: 164396
loss: 0.9858184456825256,grad_norm: 0.8784796838547886, iteration: 164397
loss: 0.9970739483833313,grad_norm: 0.904760784890314, iteration: 164398
loss: 1.0705130100250244,grad_norm: 0.9999996907787316, iteration: 164399
loss: 1.0061885118484497,grad_norm: 0.9999991827054536, iteration: 164400
loss: 0.9854685664176941,grad_norm: 0.9999990709934253, iteration: 164401
loss: 1.0335533618927002,grad_norm: 0.9046606739407038, iteration: 164402
loss: 0.9774485230445862,grad_norm: 0.9999992624280636, iteration: 164403
loss: 1.0092812776565552,grad_norm: 0.9999990190780031, iteration: 164404
loss: 0.9734837412834167,grad_norm: 0.9746187149492368, iteration: 164405
loss: 0.9993451833724976,grad_norm: 0.999999224710659, iteration: 164406
loss: 1.0267019271850586,grad_norm: 0.9863978692468323, iteration: 164407
loss: 1.0078824758529663,grad_norm: 0.9999989617434092, iteration: 164408
loss: 0.9656500220298767,grad_norm: 0.9465605007742246, iteration: 164409
loss: 0.97467041015625,grad_norm: 0.8661931163996918, iteration: 164410
loss: 0.9879690408706665,grad_norm: 0.8679638768922255, iteration: 164411
loss: 0.9642726182937622,grad_norm: 0.9999992355757502, iteration: 164412
loss: 0.9898861050605774,grad_norm: 0.9087075438244191, iteration: 164413
loss: 0.9990697503089905,grad_norm: 0.9999991244944657, iteration: 164414
loss: 1.022621989250183,grad_norm: 0.9999991696926885, iteration: 164415
loss: 1.0272451639175415,grad_norm: 0.9503738276740601, iteration: 164416
loss: 1.0341479778289795,grad_norm: 0.9142752526528102, iteration: 164417
loss: 0.9917982816696167,grad_norm: 0.9566955564195224, iteration: 164418
loss: 0.9883958697319031,grad_norm: 0.9999992055992065, iteration: 164419
loss: 1.0461643934249878,grad_norm: 0.9999992897375866, iteration: 164420
loss: 1.0006422996520996,grad_norm: 0.9950561287802175, iteration: 164421
loss: 1.0029371976852417,grad_norm: 0.9999991730269668, iteration: 164422
loss: 0.9914153218269348,grad_norm: 0.8900208390540714, iteration: 164423
loss: 0.964079737663269,grad_norm: 0.9999992965238695, iteration: 164424
loss: 0.9998185634613037,grad_norm: 0.9999991413240824, iteration: 164425
loss: 1.0006972551345825,grad_norm: 0.9795029494625642, iteration: 164426
loss: 1.0365041494369507,grad_norm: 0.9999992184503779, iteration: 164427
loss: 0.999972939491272,grad_norm: 0.880285820201316, iteration: 164428
loss: 0.9930680990219116,grad_norm: 0.9999991807144284, iteration: 164429
loss: 1.001098871231079,grad_norm: 0.9999990679713645, iteration: 164430
loss: 0.979941725730896,grad_norm: 0.9999991808614421, iteration: 164431
loss: 0.9924526214599609,grad_norm: 0.9999990013269571, iteration: 164432
loss: 1.0292354822158813,grad_norm: 0.9987233047827263, iteration: 164433
loss: 0.9902107119560242,grad_norm: 0.9025316796535547, iteration: 164434
loss: 0.9985402226448059,grad_norm: 0.9999991144441136, iteration: 164435
loss: 0.9836694598197937,grad_norm: 0.9999991505267538, iteration: 164436
loss: 0.9909824728965759,grad_norm: 0.8490643289794741, iteration: 164437
loss: 0.9801753759384155,grad_norm: 0.9999990971186129, iteration: 164438
loss: 1.035630464553833,grad_norm: 0.8995345333262994, iteration: 164439
loss: 0.9607241153717041,grad_norm: 0.9999990362181637, iteration: 164440
loss: 0.9905198812484741,grad_norm: 0.8763337219059596, iteration: 164441
loss: 0.976590633392334,grad_norm: 0.9695444998296388, iteration: 164442
loss: 0.971064031124115,grad_norm: 0.8246674757029965, iteration: 164443
loss: 1.0323463678359985,grad_norm: 0.9999996701481315, iteration: 164444
loss: 1.0299128293991089,grad_norm: 0.9999992034265467, iteration: 164445
loss: 1.0009177923202515,grad_norm: 0.9999991261672444, iteration: 164446
loss: 0.9734630584716797,grad_norm: 0.9999991443399692, iteration: 164447
loss: 0.9997026920318604,grad_norm: 0.9249978885271499, iteration: 164448
loss: 0.9992926716804504,grad_norm: 0.9220924848954046, iteration: 164449
loss: 1.0256751775741577,grad_norm: 0.9999991383098035, iteration: 164450
loss: 1.0001882314682007,grad_norm: 0.9999994396677284, iteration: 164451
loss: 0.9888045787811279,grad_norm: 0.9956874454060393, iteration: 164452
loss: 0.9747042059898376,grad_norm: 0.9999992333526195, iteration: 164453
loss: 0.995963454246521,grad_norm: 0.9021668135081325, iteration: 164454
loss: 1.0322602987289429,grad_norm: 0.9999991606991855, iteration: 164455
loss: 1.0283087491989136,grad_norm: 0.9999997692989456, iteration: 164456
loss: 0.970663845539093,grad_norm: 0.930805348794419, iteration: 164457
loss: 1.0427536964416504,grad_norm: 0.9999992159444228, iteration: 164458
loss: 1.02434504032135,grad_norm: 0.9999991392212325, iteration: 164459
loss: 0.9750229120254517,grad_norm: 0.9999993060004767, iteration: 164460
loss: 1.019273042678833,grad_norm: 0.9367981051950067, iteration: 164461
loss: 1.0026062726974487,grad_norm: 0.9999992266971182, iteration: 164462
loss: 0.9986209869384766,grad_norm: 0.9937353297388631, iteration: 164463
loss: 0.9833823442459106,grad_norm: 0.999999050828677, iteration: 164464
loss: 0.9897282123565674,grad_norm: 0.9782731023735061, iteration: 164465
loss: 1.0122599601745605,grad_norm: 0.9569171308078966, iteration: 164466
loss: 0.9855700731277466,grad_norm: 0.9999990466138831, iteration: 164467
loss: 1.0104914903640747,grad_norm: 0.8795457029624222, iteration: 164468
loss: 1.007967233657837,grad_norm: 0.8977600962383515, iteration: 164469
loss: 0.988314688205719,grad_norm: 0.9999996045666778, iteration: 164470
loss: 0.9824344515800476,grad_norm: 0.9747836520244428, iteration: 164471
loss: 0.9994395971298218,grad_norm: 0.9151047192584213, iteration: 164472
loss: 1.0569041967391968,grad_norm: 0.9999994779708644, iteration: 164473
loss: 0.9877148866653442,grad_norm: 0.953758140504954, iteration: 164474
loss: 0.9891451001167297,grad_norm: 0.9563873677299028, iteration: 164475
loss: 0.9797208905220032,grad_norm: 0.9973302216488422, iteration: 164476
loss: 1.013448715209961,grad_norm: 0.805482240222817, iteration: 164477
loss: 1.0116478204727173,grad_norm: 0.9438413044200245, iteration: 164478
loss: 0.9702863693237305,grad_norm: 0.9234501958974056, iteration: 164479
loss: 0.9886577725410461,grad_norm: 0.9999991941758013, iteration: 164480
loss: 1.0185996294021606,grad_norm: 0.8899734307645194, iteration: 164481
loss: 0.9867430329322815,grad_norm: 0.9844344872404569, iteration: 164482
loss: 0.934412956237793,grad_norm: 0.893246256880497, iteration: 164483
loss: 1.0168144702911377,grad_norm: 0.8866104082349661, iteration: 164484
loss: 0.9900951385498047,grad_norm: 0.8871268017708206, iteration: 164485
loss: 1.0057629346847534,grad_norm: 0.7878836537912135, iteration: 164486
loss: 0.9771607518196106,grad_norm: 0.9999989744908708, iteration: 164487
loss: 0.990552544593811,grad_norm: 0.9999990845052454, iteration: 164488
loss: 1.0144579410552979,grad_norm: 0.9999991605618704, iteration: 164489
loss: 1.0127102136611938,grad_norm: 0.9241977248403935, iteration: 164490
loss: 0.9873852729797363,grad_norm: 0.8773172153175449, iteration: 164491
loss: 1.0489308834075928,grad_norm: 0.99999910944396, iteration: 164492
loss: 1.0150336027145386,grad_norm: 0.9999991909449143, iteration: 164493
loss: 0.9886499047279358,grad_norm: 0.8642517633284978, iteration: 164494
loss: 1.0065093040466309,grad_norm: 0.8248039571527406, iteration: 164495
loss: 1.012080192565918,grad_norm: 0.9333980866886029, iteration: 164496
loss: 0.9539297223091125,grad_norm: 0.9999990995992336, iteration: 164497
loss: 0.9967778921127319,grad_norm: 0.9999990311935448, iteration: 164498
loss: 0.9686347246170044,grad_norm: 0.881498695282279, iteration: 164499
loss: 0.9813995361328125,grad_norm: 0.9999991848431576, iteration: 164500
loss: 1.0411266088485718,grad_norm: 0.9999992802581041, iteration: 164501
loss: 0.9899080991744995,grad_norm: 0.823905150130641, iteration: 164502
loss: 0.998931348323822,grad_norm: 0.8732737006857983, iteration: 164503
loss: 1.0065147876739502,grad_norm: 0.9999991682357134, iteration: 164504
loss: 1.0225223302841187,grad_norm: 0.9999994227444484, iteration: 164505
loss: 0.9847977757453918,grad_norm: 0.7841091252701676, iteration: 164506
loss: 0.9953161478042603,grad_norm: 0.999999175881502, iteration: 164507
loss: 0.9910548329353333,grad_norm: 0.857506922686586, iteration: 164508
loss: 1.0155394077301025,grad_norm: 0.9999989613505346, iteration: 164509
loss: 0.9868541359901428,grad_norm: 0.9999989242130489, iteration: 164510
loss: 1.0018059015274048,grad_norm: 0.9999990275130896, iteration: 164511
loss: 0.9658569693565369,grad_norm: 0.999999150330715, iteration: 164512
loss: 1.0170449018478394,grad_norm: 0.9999990167671532, iteration: 164513
loss: 0.9825624823570251,grad_norm: 0.9999990547592602, iteration: 164514
loss: 0.985200822353363,grad_norm: 0.9419969823292141, iteration: 164515
loss: 0.9822555184364319,grad_norm: 0.8963651383774567, iteration: 164516
loss: 0.9959700703620911,grad_norm: 0.9161508273080827, iteration: 164517
loss: 0.9948962926864624,grad_norm: 0.8888757421672019, iteration: 164518
loss: 1.017526626586914,grad_norm: 0.9999994340376163, iteration: 164519
loss: 1.043968915939331,grad_norm: 0.9999993937556887, iteration: 164520
loss: 1.0849813222885132,grad_norm: 0.9999990217226931, iteration: 164521
loss: 1.0245612859725952,grad_norm: 0.9999991937619177, iteration: 164522
loss: 0.9654311537742615,grad_norm: 0.9999991569602926, iteration: 164523
loss: 0.9922431111335754,grad_norm: 0.910752331286466, iteration: 164524
loss: 1.0135042667388916,grad_norm: 0.937234044445227, iteration: 164525
loss: 0.9756439328193665,grad_norm: 0.9999991694544216, iteration: 164526
loss: 0.9900180697441101,grad_norm: 0.9699155550064038, iteration: 164527
loss: 1.0215284824371338,grad_norm: 0.8401690123358172, iteration: 164528
loss: 1.0159337520599365,grad_norm: 0.9999993853014173, iteration: 164529
loss: 0.9666566252708435,grad_norm: 0.8467190988953797, iteration: 164530
loss: 0.9754099249839783,grad_norm: 0.8543252530352274, iteration: 164531
loss: 1.0001579523086548,grad_norm: 0.9999990165841309, iteration: 164532
loss: 1.0107535123825073,grad_norm: 0.8225331051318518, iteration: 164533
loss: 1.0743920803070068,grad_norm: 0.9999992193221288, iteration: 164534
loss: 0.9594425559043884,grad_norm: 0.9999990507791645, iteration: 164535
loss: 1.0211102962493896,grad_norm: 0.7461295586815054, iteration: 164536
loss: 1.0387681722640991,grad_norm: 0.8742359947104781, iteration: 164537
loss: 1.0209681987762451,grad_norm: 0.9999990887119031, iteration: 164538
loss: 0.9855560660362244,grad_norm: 0.7808128804929886, iteration: 164539
loss: 1.0076838731765747,grad_norm: 0.981141556156182, iteration: 164540
loss: 1.0191997289657593,grad_norm: 0.9999991437918634, iteration: 164541
loss: 1.0288366079330444,grad_norm: 0.9604179027016845, iteration: 164542
loss: 0.9936321377754211,grad_norm: 0.9999991618607911, iteration: 164543
loss: 1.1426639556884766,grad_norm: 0.9999992570617343, iteration: 164544
loss: 1.0087696313858032,grad_norm: 0.850641242344406, iteration: 164545
loss: 0.9910412430763245,grad_norm: 0.9021356113334191, iteration: 164546
loss: 0.9594160914421082,grad_norm: 0.9999991263617274, iteration: 164547
loss: 1.0068296194076538,grad_norm: 0.9999991340565763, iteration: 164548
loss: 1.0059523582458496,grad_norm: 0.9560918388124903, iteration: 164549
loss: 1.0190355777740479,grad_norm: 0.993401527336461, iteration: 164550
loss: 0.9905973076820374,grad_norm: 0.9999990965833752, iteration: 164551
loss: 1.008948564529419,grad_norm: 0.8838646639725584, iteration: 164552
loss: 1.0033717155456543,grad_norm: 0.9999991182217649, iteration: 164553
loss: 0.9997093677520752,grad_norm: 0.9999991250449345, iteration: 164554
loss: 0.9816427826881409,grad_norm: 0.999999215163415, iteration: 164555
loss: 0.9974638223648071,grad_norm: 0.8541424275272527, iteration: 164556
loss: 1.0124657154083252,grad_norm: 0.9999990439337126, iteration: 164557
loss: 1.0340092182159424,grad_norm: 0.9427382093787297, iteration: 164558
loss: 1.012335181236267,grad_norm: 0.9999991494950294, iteration: 164559
loss: 1.018128752708435,grad_norm: 0.9172793316519567, iteration: 164560
loss: 0.996452808380127,grad_norm: 0.7666403531374806, iteration: 164561
loss: 1.0382497310638428,grad_norm: 0.9999992791538169, iteration: 164562
loss: 1.0075606107711792,grad_norm: 0.9999991641916445, iteration: 164563
loss: 1.0129472017288208,grad_norm: 0.9464392244284403, iteration: 164564
loss: 1.0258426666259766,grad_norm: 0.8872610567786956, iteration: 164565
loss: 0.9648272395133972,grad_norm: 0.9999991873677172, iteration: 164566
loss: 1.0266180038452148,grad_norm: 0.9845219320903784, iteration: 164567
loss: 1.0444798469543457,grad_norm: 0.9999991291970647, iteration: 164568
loss: 1.0642659664154053,grad_norm: 0.9999992950785258, iteration: 164569
loss: 0.9971473217010498,grad_norm: 0.9999991508235765, iteration: 164570
loss: 0.9556431174278259,grad_norm: 0.9153427502387441, iteration: 164571
loss: 0.9844416975975037,grad_norm: 0.9452957063665767, iteration: 164572
loss: 1.0040042400360107,grad_norm: 0.9959935160193437, iteration: 164573
loss: 0.9864760041236877,grad_norm: 0.9543537806347353, iteration: 164574
loss: 0.9889349937438965,grad_norm: 0.9999991746113399, iteration: 164575
loss: 0.9963041543960571,grad_norm: 0.9999999460687341, iteration: 164576
loss: 1.002004623413086,grad_norm: 0.9999992036101463, iteration: 164577
loss: 0.9860063791275024,grad_norm: 0.9286861407371472, iteration: 164578
loss: 0.9544869661331177,grad_norm: 0.9484575228954957, iteration: 164579
loss: 1.0255557298660278,grad_norm: 0.9999990755572227, iteration: 164580
loss: 1.0034536123275757,grad_norm: 0.9999990980563799, iteration: 164581
loss: 0.9922995567321777,grad_norm: 0.8278640121638575, iteration: 164582
loss: 0.9903819561004639,grad_norm: 0.9999995994722418, iteration: 164583
loss: 0.9856333136558533,grad_norm: 0.9395254700599803, iteration: 164584
loss: 1.0149080753326416,grad_norm: 0.9417131593284229, iteration: 164585
loss: 0.9885386228561401,grad_norm: 0.999999053218357, iteration: 164586
loss: 0.9829873442649841,grad_norm: 0.9752267861564877, iteration: 164587
loss: 0.9882525205612183,grad_norm: 0.9999990515461017, iteration: 164588
loss: 1.0093563795089722,grad_norm: 0.8783603495835121, iteration: 164589
loss: 1.016501545906067,grad_norm: 0.8705268946595128, iteration: 164590
loss: 0.977579653263092,grad_norm: 0.9999991290931773, iteration: 164591
loss: 1.0208874940872192,grad_norm: 0.964803385674657, iteration: 164592
loss: 0.989378035068512,grad_norm: 0.8876502186216794, iteration: 164593
loss: 1.041401982307434,grad_norm: 0.9999997607969474, iteration: 164594
loss: 1.0590875148773193,grad_norm: 0.9999991616417568, iteration: 164595
loss: 1.109677791595459,grad_norm: 0.9999999860415276, iteration: 164596
loss: 1.069579839706421,grad_norm: 0.9999992259326603, iteration: 164597
loss: 1.0346198081970215,grad_norm: 0.999999225602557, iteration: 164598
loss: 1.030055046081543,grad_norm: 0.8750713508064139, iteration: 164599
loss: 1.0081994533538818,grad_norm: 0.9867565582732145, iteration: 164600
loss: 1.0473742485046387,grad_norm: 0.9999993952253818, iteration: 164601
loss: 1.02705717086792,grad_norm: 0.9999990360068255, iteration: 164602
loss: 0.9845606684684753,grad_norm: 0.9999993169347232, iteration: 164603
loss: 1.0131791830062866,grad_norm: 0.9869225133844938, iteration: 164604
loss: 1.0286208391189575,grad_norm: 0.8397908238332684, iteration: 164605
loss: 1.008485198020935,grad_norm: 0.9120334288244782, iteration: 164606
loss: 0.99343341588974,grad_norm: 0.9999991142246383, iteration: 164607
loss: 1.0338982343673706,grad_norm: 0.9559027634985424, iteration: 164608
loss: 1.0302276611328125,grad_norm: 0.9999994589252211, iteration: 164609
loss: 0.9814699292182922,grad_norm: 0.9383852232748056, iteration: 164610
loss: 1.0407096147537231,grad_norm: 0.9999991106275425, iteration: 164611
loss: 1.0228835344314575,grad_norm: 0.9999990714165177, iteration: 164612
loss: 1.0113544464111328,grad_norm: 0.999999187092277, iteration: 164613
loss: 1.0171124935150146,grad_norm: 0.9999991943985315, iteration: 164614
loss: 1.0157525539398193,grad_norm: 0.7373196748268683, iteration: 164615
loss: 1.0184438228607178,grad_norm: 0.9999991231278251, iteration: 164616
loss: 1.0189262628555298,grad_norm: 0.9953179258338948, iteration: 164617
loss: 1.0296649932861328,grad_norm: 0.9999992431512086, iteration: 164618
loss: 1.032760739326477,grad_norm: 0.9606884263084442, iteration: 164619
loss: 0.9870429039001465,grad_norm: 0.953626446630854, iteration: 164620
loss: 1.0168715715408325,grad_norm: 0.999999281851395, iteration: 164621
loss: 1.026951551437378,grad_norm: 0.999999123839748, iteration: 164622
loss: 1.0080209970474243,grad_norm: 0.9811758784882975, iteration: 164623
loss: 0.995200514793396,grad_norm: 0.9469748702968549, iteration: 164624
loss: 0.9990716576576233,grad_norm: 0.8977233749576206, iteration: 164625
loss: 1.0104260444641113,grad_norm: 0.9964457354559564, iteration: 164626
loss: 1.0331588983535767,grad_norm: 0.9999991522032128, iteration: 164627
loss: 0.9880458116531372,grad_norm: 0.992942291990752, iteration: 164628
loss: 1.0154434442520142,grad_norm: 0.7761172662707057, iteration: 164629
loss: 1.116433024406433,grad_norm: 0.9999992502025349, iteration: 164630
loss: 1.0182503461837769,grad_norm: 0.9999990946630769, iteration: 164631
loss: 1.021523118019104,grad_norm: 0.9999994160719183, iteration: 164632
loss: 0.9717274904251099,grad_norm: 0.9999991439756117, iteration: 164633
loss: 0.9684916138648987,grad_norm: 0.9999990217183958, iteration: 164634
loss: 0.9993394017219543,grad_norm: 0.9999991850937325, iteration: 164635
loss: 1.0012210607528687,grad_norm: 0.9999991733351926, iteration: 164636
loss: 1.0158435106277466,grad_norm: 0.9581682260653546, iteration: 164637
loss: 1.0006060600280762,grad_norm: 0.9999993227491392, iteration: 164638
loss: 0.9800404906272888,grad_norm: 0.9999990747997122, iteration: 164639
loss: 1.016288161277771,grad_norm: 0.9999991154950756, iteration: 164640
loss: 1.0079905986785889,grad_norm: 0.9999994580237261, iteration: 164641
loss: 0.9765750169754028,grad_norm: 0.932657475312851, iteration: 164642
loss: 1.0052229166030884,grad_norm: 0.999999239211601, iteration: 164643
loss: 0.9896007776260376,grad_norm: 0.8578893789960118, iteration: 164644
loss: 1.0332716703414917,grad_norm: 0.9999991249946801, iteration: 164645
loss: 0.9748011231422424,grad_norm: 0.9999991517202547, iteration: 164646
loss: 1.0356621742248535,grad_norm: 0.8398900342942963, iteration: 164647
loss: 1.0162036418914795,grad_norm: 0.9999990330907188, iteration: 164648
loss: 0.9924013018608093,grad_norm: 0.9999990456841152, iteration: 164649
loss: 1.060162901878357,grad_norm: 0.985232966993473, iteration: 164650
loss: 1.0084028244018555,grad_norm: 0.817365739742413, iteration: 164651
loss: 1.0169870853424072,grad_norm: 0.9999993322472367, iteration: 164652
loss: 1.0235297679901123,grad_norm: 0.9144633710099666, iteration: 164653
loss: 1.0131148099899292,grad_norm: 0.9240751760977082, iteration: 164654
loss: 1.0259199142456055,grad_norm: 0.9999990395810003, iteration: 164655
loss: 1.0083509683609009,grad_norm: 0.9344257708451997, iteration: 164656
loss: 1.0572619438171387,grad_norm: 0.9999991380846531, iteration: 164657
loss: 0.9984912872314453,grad_norm: 0.9999992018024738, iteration: 164658
loss: 1.0038108825683594,grad_norm: 0.9317765698106439, iteration: 164659
loss: 0.9857317805290222,grad_norm: 0.7554111866446565, iteration: 164660
loss: 0.9754050374031067,grad_norm: 0.8439184174243558, iteration: 164661
loss: 0.9875974655151367,grad_norm: 0.9999990958020321, iteration: 164662
loss: 1.0257834196090698,grad_norm: 0.9540820786893919, iteration: 164663
loss: 1.007024884223938,grad_norm: 0.976090568205311, iteration: 164664
loss: 0.9900180697441101,grad_norm: 0.9999991781356227, iteration: 164665
loss: 1.0048549175262451,grad_norm: 0.9999991303516005, iteration: 164666
loss: 1.0125161409378052,grad_norm: 0.9075335083996333, iteration: 164667
loss: 1.0330286026000977,grad_norm: 0.9999990510885556, iteration: 164668
loss: 0.9864215850830078,grad_norm: 0.9999993869913896, iteration: 164669
loss: 1.0023624897003174,grad_norm: 0.9776424337806089, iteration: 164670
loss: 0.9891189932823181,grad_norm: 0.9999991751552918, iteration: 164671
loss: 1.0143821239471436,grad_norm: 0.9454703003649407, iteration: 164672
loss: 0.9808336496353149,grad_norm: 0.9529833675786397, iteration: 164673
loss: 0.9940155148506165,grad_norm: 0.944921860963097, iteration: 164674
loss: 0.9968839287757874,grad_norm: 0.9999991600150646, iteration: 164675
loss: 1.0011159181594849,grad_norm: 0.8351384186609789, iteration: 164676
loss: 1.0095610618591309,grad_norm: 0.9999989216815931, iteration: 164677
loss: 1.0251823663711548,grad_norm: 0.9999989968802495, iteration: 164678
loss: 1.0266406536102295,grad_norm: 0.9999990330769482, iteration: 164679
loss: 0.9986657500267029,grad_norm: 0.9086229560294875, iteration: 164680
loss: 1.0038021802902222,grad_norm: 0.9794032709947307, iteration: 164681
loss: 0.9780891537666321,grad_norm: 0.9999992391801977, iteration: 164682
loss: 0.9704973697662354,grad_norm: 0.9864898667589326, iteration: 164683
loss: 1.0198464393615723,grad_norm: 0.9999990777221756, iteration: 164684
loss: 1.0491676330566406,grad_norm: 0.9999994973862528, iteration: 164685
loss: 1.0075047016143799,grad_norm: 0.9999990149912241, iteration: 164686
loss: 0.9967672228813171,grad_norm: 0.9707794931045974, iteration: 164687
loss: 1.020429253578186,grad_norm: 0.9999989151778798, iteration: 164688
loss: 0.9658337831497192,grad_norm: 0.8862083525471501, iteration: 164689
loss: 1.0394178628921509,grad_norm: 0.9999995309607035, iteration: 164690
loss: 0.9996185302734375,grad_norm: 0.9999989266477148, iteration: 164691
loss: 1.0000661611557007,grad_norm: 0.9128135694427646, iteration: 164692
loss: 1.0131218433380127,grad_norm: 0.9738938236710601, iteration: 164693
loss: 1.0079808235168457,grad_norm: 0.999999074270591, iteration: 164694
loss: 0.9958356022834778,grad_norm: 0.9999991691464448, iteration: 164695
loss: 0.9943458437919617,grad_norm: 0.9999991374292727, iteration: 164696
loss: 1.0068374872207642,grad_norm: 0.9030687920032631, iteration: 164697
loss: 1.0428217649459839,grad_norm: 0.9999992107512528, iteration: 164698
loss: 0.9978742599487305,grad_norm: 0.9999989924889876, iteration: 164699
loss: 0.9985238909721375,grad_norm: 0.9999990352729509, iteration: 164700
loss: 0.9975081086158752,grad_norm: 0.9947845874845895, iteration: 164701
loss: 1.0027878284454346,grad_norm: 0.8882613953346918, iteration: 164702
loss: 1.1133887767791748,grad_norm: 0.9999994721170108, iteration: 164703
loss: 1.001488447189331,grad_norm: 0.9999991711923479, iteration: 164704
loss: 1.0026638507843018,grad_norm: 0.9999989950383625, iteration: 164705
loss: 1.0285923480987549,grad_norm: 0.9999991164920023, iteration: 164706
loss: 1.0151582956314087,grad_norm: 0.9999995252567012, iteration: 164707
loss: 0.9611297845840454,grad_norm: 0.9741566823949094, iteration: 164708
loss: 1.0360066890716553,grad_norm: 0.9999992010857314, iteration: 164709
loss: 1.0144168138504028,grad_norm: 0.955463089865908, iteration: 164710
loss: 1.0088690519332886,grad_norm: 0.9999992562583, iteration: 164711
loss: 1.025709629058838,grad_norm: 0.9999990197160393, iteration: 164712
loss: 1.0435965061187744,grad_norm: 0.9999989982543143, iteration: 164713
loss: 0.9922854900360107,grad_norm: 0.9999989559160213, iteration: 164714
loss: 0.9911762475967407,grad_norm: 0.9999991040582713, iteration: 164715
loss: 1.0177817344665527,grad_norm: 0.9500084363627535, iteration: 164716
loss: 1.004761815071106,grad_norm: 0.953953019877357, iteration: 164717
loss: 0.9458056092262268,grad_norm: 0.9147317884247838, iteration: 164718
loss: 0.9902318120002747,grad_norm: 0.865952432488127, iteration: 164719
loss: 1.100040316581726,grad_norm: 0.9999994469590289, iteration: 164720
loss: 1.0672122240066528,grad_norm: 0.9999996052462639, iteration: 164721
loss: 0.9993450045585632,grad_norm: 0.9999992575290532, iteration: 164722
loss: 0.9869522452354431,grad_norm: 0.9521820953483447, iteration: 164723
loss: 1.0700913667678833,grad_norm: 0.9999991964465885, iteration: 164724
loss: 0.9820036292076111,grad_norm: 0.8869023618322085, iteration: 164725
loss: 0.9539023041725159,grad_norm: 0.8682445535421093, iteration: 164726
loss: 0.990559458732605,grad_norm: 0.9999991055217136, iteration: 164727
loss: 1.0353044271469116,grad_norm: 0.9999989476515115, iteration: 164728
loss: 1.0069104433059692,grad_norm: 0.999999298902426, iteration: 164729
loss: 1.0313857793807983,grad_norm: 0.9999990506163453, iteration: 164730
loss: 1.0003494024276733,grad_norm: 0.9049120754607837, iteration: 164731
loss: 1.01689612865448,grad_norm: 0.9364943406484274, iteration: 164732
loss: 0.9983602166175842,grad_norm: 0.8070092157608548, iteration: 164733
loss: 0.9953685998916626,grad_norm: 0.9936864776362201, iteration: 164734
loss: 1.0136830806732178,grad_norm: 0.9999992381040027, iteration: 164735
loss: 0.9475300312042236,grad_norm: 0.999999144088414, iteration: 164736
loss: 0.9942731857299805,grad_norm: 0.999999031230083, iteration: 164737
loss: 1.0039983987808228,grad_norm: 0.9999991190672941, iteration: 164738
loss: 1.0075995922088623,grad_norm: 0.8753641295802449, iteration: 164739
loss: 0.9996569156646729,grad_norm: 0.9070585062208031, iteration: 164740
loss: 0.9769050478935242,grad_norm: 0.9999990380249822, iteration: 164741
loss: 0.9861146807670593,grad_norm: 0.9999990883718021, iteration: 164742
loss: 0.9893129467964172,grad_norm: 0.8657367428475541, iteration: 164743
loss: 1.026291012763977,grad_norm: 0.9670635094829538, iteration: 164744
loss: 0.9986468553543091,grad_norm: 0.8248465628924478, iteration: 164745
loss: 0.9751201272010803,grad_norm: 0.8443516560865455, iteration: 164746
loss: 0.9844111204147339,grad_norm: 0.9999991460298007, iteration: 164747
loss: 0.9867710471153259,grad_norm: 0.9011662693988439, iteration: 164748
loss: 1.0005772113800049,grad_norm: 0.9999991555250483, iteration: 164749
loss: 1.0021378993988037,grad_norm: 0.8488370419854647, iteration: 164750
loss: 1.006777286529541,grad_norm: 0.9999990592728735, iteration: 164751
loss: 0.991614043712616,grad_norm: 0.9999998125586097, iteration: 164752
loss: 0.9581075310707092,grad_norm: 0.9961541318851971, iteration: 164753
loss: 1.0155762434005737,grad_norm: 0.9071595248107581, iteration: 164754
loss: 1.0382459163665771,grad_norm: 0.9623885014865994, iteration: 164755
loss: 0.9913585782051086,grad_norm: 0.9999990962622132, iteration: 164756
loss: 0.978491485118866,grad_norm: 0.9705435489903236, iteration: 164757
loss: 1.000382661819458,grad_norm: 0.999999234791403, iteration: 164758
loss: 0.9906708002090454,grad_norm: 0.9342100463504778, iteration: 164759
loss: 1.0096418857574463,grad_norm: 0.9999991793145514, iteration: 164760
loss: 0.9910944104194641,grad_norm: 0.9667674618510139, iteration: 164761
loss: 1.0606545209884644,grad_norm: 0.9214556324369592, iteration: 164762
loss: 1.020941972732544,grad_norm: 0.9999992269614, iteration: 164763
loss: 0.9758942723274231,grad_norm: 0.9999992158035652, iteration: 164764
loss: 1.0124753713607788,grad_norm: 0.9820203656630493, iteration: 164765
loss: 0.9995040893554688,grad_norm: 0.9999992223363889, iteration: 164766
loss: 0.9802529811859131,grad_norm: 0.9429957949471046, iteration: 164767
loss: 1.0105822086334229,grad_norm: 0.9270560057108542, iteration: 164768
loss: 0.9774923920631409,grad_norm: 0.9220532262464728, iteration: 164769
loss: 1.003105878829956,grad_norm: 0.9999992280854183, iteration: 164770
loss: 0.9827648401260376,grad_norm: 0.9356360643703865, iteration: 164771
loss: 1.0331157445907593,grad_norm: 0.9999991324965406, iteration: 164772
loss: 0.982530951499939,grad_norm: 0.8763107960456598, iteration: 164773
loss: 0.997361958026886,grad_norm: 0.9999991374224059, iteration: 164774
loss: 0.9698745012283325,grad_norm: 0.9031563943896531, iteration: 164775
loss: 0.98968106508255,grad_norm: 0.9999990841235331, iteration: 164776
loss: 0.9781571626663208,grad_norm: 0.9737418396120264, iteration: 164777
loss: 1.0875247716903687,grad_norm: 1.0000000905720987, iteration: 164778
loss: 0.9965525269508362,grad_norm: 0.9146943804016585, iteration: 164779
loss: 0.9911110997200012,grad_norm: 0.9999990273607646, iteration: 164780
loss: 1.0224804878234863,grad_norm: 0.9999991323996062, iteration: 164781
loss: 1.021199107170105,grad_norm: 0.9999995726130312, iteration: 164782
loss: 1.0115656852722168,grad_norm: 0.9999997475449862, iteration: 164783
loss: 0.9942308664321899,grad_norm: 0.8874794985838108, iteration: 164784
loss: 1.0619397163391113,grad_norm: 0.9999998678798211, iteration: 164785
loss: 1.005929708480835,grad_norm: 0.9567633171996626, iteration: 164786
loss: 1.0307430028915405,grad_norm: 0.9999992385090252, iteration: 164787
loss: 1.0043164491653442,grad_norm: 0.8007566723846752, iteration: 164788
loss: 1.0094932317733765,grad_norm: 0.9999991238916617, iteration: 164789
loss: 1.0051829814910889,grad_norm: 0.9999991668402204, iteration: 164790
loss: 0.9998149275779724,grad_norm: 0.999999184277748, iteration: 164791
loss: 1.038230538368225,grad_norm: 0.9999991854815176, iteration: 164792
loss: 0.9865480065345764,grad_norm: 0.9702964107898225, iteration: 164793
loss: 0.9915910959243774,grad_norm: 0.9999992165241287, iteration: 164794
loss: 1.007151484489441,grad_norm: 0.9999992161317298, iteration: 164795
loss: 1.0084311962127686,grad_norm: 0.9932657117416974, iteration: 164796
loss: 1.0011308193206787,grad_norm: 0.9775514857945271, iteration: 164797
loss: 1.0135743618011475,grad_norm: 0.9710111791934913, iteration: 164798
loss: 0.9955306053161621,grad_norm: 0.860234624968492, iteration: 164799
loss: 1.0073368549346924,grad_norm: 0.9999990487530268, iteration: 164800
loss: 1.0338785648345947,grad_norm: 0.9999996268049323, iteration: 164801
loss: 0.9588594436645508,grad_norm: 0.9894012988680769, iteration: 164802
loss: 0.982822060585022,grad_norm: 0.9999992708968757, iteration: 164803
loss: 1.0205950736999512,grad_norm: 0.9999992413921243, iteration: 164804
loss: 1.005249261856079,grad_norm: 0.9999990653917069, iteration: 164805
loss: 1.0274465084075928,grad_norm: 0.9999990804081135, iteration: 164806
loss: 0.9996514320373535,grad_norm: 0.999999465672833, iteration: 164807
loss: 1.0258445739746094,grad_norm: 0.9999996492208025, iteration: 164808
loss: 0.9526458978652954,grad_norm: 0.7914881413409429, iteration: 164809
loss: 1.00823175907135,grad_norm: 0.9999992269683928, iteration: 164810
loss: 1.0250118970870972,grad_norm: 0.9999992058508608, iteration: 164811
loss: 1.019551157951355,grad_norm: 0.929889548459384, iteration: 164812
loss: 0.9811809062957764,grad_norm: 0.9169068102109484, iteration: 164813
loss: 1.0294458866119385,grad_norm: 0.8302598756457629, iteration: 164814
loss: 1.0070672035217285,grad_norm: 0.920959783314591, iteration: 164815
loss: 1.007753610610962,grad_norm: 0.999999049740848, iteration: 164816
loss: 0.9751728773117065,grad_norm: 0.8643507747062869, iteration: 164817
loss: 0.957085907459259,grad_norm: 0.9651327606854126, iteration: 164818
loss: 1.0105762481689453,grad_norm: 0.9999989849197046, iteration: 164819
loss: 1.0280561447143555,grad_norm: 0.9999991128877862, iteration: 164820
loss: 1.0446633100509644,grad_norm: 0.8967462672680792, iteration: 164821
loss: 1.0039966106414795,grad_norm: 0.9999990914664212, iteration: 164822
loss: 1.0695072412490845,grad_norm: 0.9999998134476743, iteration: 164823
loss: 0.9890668988227844,grad_norm: 0.999999218753504, iteration: 164824
loss: 1.0555217266082764,grad_norm: 0.9906079805250468, iteration: 164825
loss: 0.9837561845779419,grad_norm: 0.974238657938341, iteration: 164826
loss: 1.020015001296997,grad_norm: 0.9269101095950271, iteration: 164827
loss: 1.0225039720535278,grad_norm: 0.9999996791237108, iteration: 164828
loss: 0.9988588094711304,grad_norm: 0.8968839390000278, iteration: 164829
loss: 1.0033479928970337,grad_norm: 0.9999997393152363, iteration: 164830
loss: 1.0389634370803833,grad_norm: 0.9999990895498564, iteration: 164831
loss: 1.0738674402236938,grad_norm: 0.9999992439250598, iteration: 164832
loss: 1.0066628456115723,grad_norm: 0.8923750820232073, iteration: 164833
loss: 1.0044419765472412,grad_norm: 0.9999991855862399, iteration: 164834
loss: 1.002916932106018,grad_norm: 0.8664409390575463, iteration: 164835
loss: 1.141029953956604,grad_norm: 0.9999996265378629, iteration: 164836
loss: 1.0343607664108276,grad_norm: 0.9920806472074775, iteration: 164837
loss: 1.025119662284851,grad_norm: 0.9999992472645748, iteration: 164838
loss: 1.0128564834594727,grad_norm: 0.9999997839476507, iteration: 164839
loss: 1.0643693208694458,grad_norm: 0.9999990897112514, iteration: 164840
loss: 0.9926784038543701,grad_norm: 0.9999991613958654, iteration: 164841
loss: 0.9810454845428467,grad_norm: 0.9999990930211982, iteration: 164842
loss: 1.0278648138046265,grad_norm: 0.939823184146122, iteration: 164843
loss: 1.012256383895874,grad_norm: 0.9999991930424864, iteration: 164844
loss: 1.0308235883712769,grad_norm: 0.9592379188914202, iteration: 164845
loss: 1.011452555656433,grad_norm: 0.881193271513423, iteration: 164846
loss: 1.029397964477539,grad_norm: 0.9999991550967245, iteration: 164847
loss: 1.011805772781372,grad_norm: 0.9316671268753439, iteration: 164848
loss: 0.9720180630683899,grad_norm: 0.9999990594500204, iteration: 164849
loss: 1.0081257820129395,grad_norm: 0.9999993065461488, iteration: 164850
loss: 1.0452749729156494,grad_norm: 0.8756883598592144, iteration: 164851
loss: 1.0057165622711182,grad_norm: 0.9999990600236082, iteration: 164852
loss: 0.9685854315757751,grad_norm: 0.9999991217792148, iteration: 164853
loss: 1.0082833766937256,grad_norm: 0.9807158409528121, iteration: 164854
loss: 0.9687434434890747,grad_norm: 0.9881319474912372, iteration: 164855
loss: 1.0253889560699463,grad_norm: 0.9738618956246198, iteration: 164856
loss: 0.9596773982048035,grad_norm: 0.9076345941694609, iteration: 164857
loss: 1.045744776725769,grad_norm: 0.9999994832257305, iteration: 164858
loss: 1.0126382112503052,grad_norm: 0.9747441361357557, iteration: 164859
loss: 0.9926931262016296,grad_norm: 0.9999990144381435, iteration: 164860
loss: 1.0156782865524292,grad_norm: 0.9522255636523635, iteration: 164861
loss: 0.9878178238868713,grad_norm: 0.9999990416317314, iteration: 164862
loss: 0.9722403883934021,grad_norm: 0.9999991962579537, iteration: 164863
loss: 0.9668773412704468,grad_norm: 0.9999991219493296, iteration: 164864
loss: 0.9953770041465759,grad_norm: 0.9940538660883345, iteration: 164865
loss: 1.015292763710022,grad_norm: 0.9579636476452067, iteration: 164866
loss: 1.0228509902954102,grad_norm: 0.9999991795307919, iteration: 164867
loss: 0.9796133637428284,grad_norm: 0.9999991885870343, iteration: 164868
loss: 0.9773222208023071,grad_norm: 0.9999991989211161, iteration: 164869
loss: 1.0266368389129639,grad_norm: 0.9999991593493657, iteration: 164870
loss: 0.9702317118644714,grad_norm: 0.9999991185720012, iteration: 164871
loss: 1.051506757736206,grad_norm: 0.9999991850679214, iteration: 164872
loss: 1.0001215934753418,grad_norm: 0.9566990545904699, iteration: 164873
loss: 0.9843524694442749,grad_norm: 0.9999989795525017, iteration: 164874
loss: 0.9636438488960266,grad_norm: 0.8454496656849921, iteration: 164875
loss: 1.0245404243469238,grad_norm: 0.9999991024924122, iteration: 164876
loss: 1.0287511348724365,grad_norm: 0.9999998639443148, iteration: 164877
loss: 1.0074918270111084,grad_norm: 0.9999992063088207, iteration: 164878
loss: 1.0316181182861328,grad_norm: 0.9927161397525031, iteration: 164879
loss: 1.0021328926086426,grad_norm: 0.9999990879507848, iteration: 164880
loss: 1.0090529918670654,grad_norm: 0.9512061609928737, iteration: 164881
loss: 0.9961467981338501,grad_norm: 0.9016531778838094, iteration: 164882
loss: 0.9587500095367432,grad_norm: 0.8850632371062065, iteration: 164883
loss: 1.0064890384674072,grad_norm: 0.9322469390840346, iteration: 164884
loss: 0.9952479004859924,grad_norm: 0.999999031997804, iteration: 164885
loss: 1.1213890314102173,grad_norm: 0.999999613423093, iteration: 164886
loss: 1.078751802444458,grad_norm: 0.9999991028227622, iteration: 164887
loss: 0.9830299615859985,grad_norm: 0.9193971606026968, iteration: 164888
loss: 1.0016210079193115,grad_norm: 0.9836249386541676, iteration: 164889
loss: 0.9899803996086121,grad_norm: 0.9999993178792811, iteration: 164890
loss: 1.0001106262207031,grad_norm: 0.9865510018519851, iteration: 164891
loss: 0.9974956512451172,grad_norm: 0.9220062034622695, iteration: 164892
loss: 1.0477715730667114,grad_norm: 0.9417101175047379, iteration: 164893
loss: 1.0027294158935547,grad_norm: 0.9745218122584861, iteration: 164894
loss: 1.0844783782958984,grad_norm: 0.9760137610317005, iteration: 164895
loss: 0.9865062236785889,grad_norm: 0.999999177424825, iteration: 164896
loss: 1.0114349126815796,grad_norm: 0.9999990210472527, iteration: 164897
loss: 0.9836835265159607,grad_norm: 0.8480052624775263, iteration: 164898
loss: 0.9959637522697449,grad_norm: 0.9999991097642104, iteration: 164899
loss: 0.9742151498794556,grad_norm: 0.9827108443103885, iteration: 164900
loss: 0.9980961680412292,grad_norm: 0.9999994815923741, iteration: 164901
loss: 0.9693487882614136,grad_norm: 0.9999990872028083, iteration: 164902
loss: 0.9939737915992737,grad_norm: 0.9999990447875711, iteration: 164903
loss: 1.017306923866272,grad_norm: 0.9999990820903573, iteration: 164904
loss: 0.9753051400184631,grad_norm: 0.8430702443639133, iteration: 164905
loss: 1.0092878341674805,grad_norm: 0.999999243107157, iteration: 164906
loss: 0.9794276356697083,grad_norm: 0.9410447822584862, iteration: 164907
loss: 0.989916980266571,grad_norm: 0.9999992176782481, iteration: 164908
loss: 1.025841474533081,grad_norm: 0.9999999240379585, iteration: 164909
loss: 0.9915249347686768,grad_norm: 0.9685743387260504, iteration: 164910
loss: 0.9906012415885925,grad_norm: 0.9264514590134717, iteration: 164911
loss: 0.9731452465057373,grad_norm: 0.8869420171758858, iteration: 164912
loss: 0.9885293841362,grad_norm: 0.988733689592905, iteration: 164913
loss: 0.9758458733558655,grad_norm: 0.9999990754003777, iteration: 164914
loss: 0.9935646653175354,grad_norm: 0.9999991953960674, iteration: 164915
loss: 0.991293728351593,grad_norm: 0.9999990921015501, iteration: 164916
loss: 1.0119504928588867,grad_norm: 0.9999991344505094, iteration: 164917
loss: 1.0332890748977661,grad_norm: 0.974134969585658, iteration: 164918
loss: 0.9988711476325989,grad_norm: 0.8310464365707362, iteration: 164919
loss: 1.006242036819458,grad_norm: 0.859770805137785, iteration: 164920
loss: 1.0001171827316284,grad_norm: 0.9892044990320469, iteration: 164921
loss: 0.9629303812980652,grad_norm: 0.9999990968016742, iteration: 164922
loss: 0.995964765548706,grad_norm: 0.9365093218052908, iteration: 164923
loss: 0.9726424217224121,grad_norm: 0.8976244406187392, iteration: 164924
loss: 1.0087571144104004,grad_norm: 0.907853262931061, iteration: 164925
loss: 1.026672601699829,grad_norm: 0.9999992117170652, iteration: 164926
loss: 1.0079096555709839,grad_norm: 0.9999990505415179, iteration: 164927
loss: 1.0078787803649902,grad_norm: 0.986520510957567, iteration: 164928
loss: 0.978158175945282,grad_norm: 0.9999993346623606, iteration: 164929
loss: 1.0590225458145142,grad_norm: 0.9435062534745222, iteration: 164930
loss: 1.0154513120651245,grad_norm: 0.942927363130964, iteration: 164931
loss: 1.0305486917495728,grad_norm: 0.899691053532345, iteration: 164932
loss: 0.9888098835945129,grad_norm: 0.9999991971126789, iteration: 164933
loss: 1.0083930492401123,grad_norm: 0.971715246323924, iteration: 164934
loss: 0.9630779027938843,grad_norm: 0.9964281288947485, iteration: 164935
loss: 1.0365468263626099,grad_norm: 0.9818762759604001, iteration: 164936
loss: 0.9893317818641663,grad_norm: 0.781219045416703, iteration: 164937
loss: 0.9805566668510437,grad_norm: 0.9999992415136926, iteration: 164938
loss: 0.9959595799446106,grad_norm: 0.920530005228377, iteration: 164939
loss: 1.0235408544540405,grad_norm: 0.9999991258412628, iteration: 164940
loss: 1.0472688674926758,grad_norm: 0.999999181304551, iteration: 164941
loss: 1.0585922002792358,grad_norm: 0.9999992590830985, iteration: 164942
loss: 1.0066062211990356,grad_norm: 0.9999992376082031, iteration: 164943
loss: 0.992805004119873,grad_norm: 0.9999990810641557, iteration: 164944
loss: 1.009179949760437,grad_norm: 0.9421087987487964, iteration: 164945
loss: 1.1245934963226318,grad_norm: 0.9999993626260916, iteration: 164946
loss: 0.9964897632598877,grad_norm: 0.9919874733835056, iteration: 164947
loss: 1.0093438625335693,grad_norm: 0.9122135810714056, iteration: 164948
loss: 0.9901058673858643,grad_norm: 0.9860095941075684, iteration: 164949
loss: 1.0463794469833374,grad_norm: 0.8814888309022069, iteration: 164950
loss: 0.9770060181617737,grad_norm: 0.9999990972672961, iteration: 164951
loss: 0.9971689581871033,grad_norm: 0.755141044134778, iteration: 164952
loss: 0.9810512661933899,grad_norm: 0.9196041121159714, iteration: 164953
loss: 0.9701783061027527,grad_norm: 0.9125343355536603, iteration: 164954
loss: 0.9817984104156494,grad_norm: 0.9999990624706712, iteration: 164955
loss: 1.0332274436950684,grad_norm: 0.9999992530366728, iteration: 164956
loss: 0.9728682637214661,grad_norm: 0.9999991071944072, iteration: 164957
loss: 1.0018023252487183,grad_norm: 0.9999989918492014, iteration: 164958
loss: 1.017755150794983,grad_norm: 0.9648679662733098, iteration: 164959
loss: 1.00950026512146,grad_norm: 0.9999998515716768, iteration: 164960
loss: 0.9786404371261597,grad_norm: 0.9999992006182172, iteration: 164961
loss: 1.0033320188522339,grad_norm: 0.9999990656927399, iteration: 164962
loss: 1.0288221836090088,grad_norm: 0.9999990864471525, iteration: 164963
loss: 1.0052491426467896,grad_norm: 0.8655680839944105, iteration: 164964
loss: 0.9812785983085632,grad_norm: 0.9999990781279832, iteration: 164965
loss: 1.0088378190994263,grad_norm: 0.9932247706742284, iteration: 164966
loss: 1.0072135925292969,grad_norm: 0.9909198598608818, iteration: 164967
loss: 0.9831712245941162,grad_norm: 0.776964332566811, iteration: 164968
loss: 1.0352388620376587,grad_norm: 0.9999990486740261, iteration: 164969
loss: 0.9984853267669678,grad_norm: 0.9795887306607027, iteration: 164970
loss: 0.9851882457733154,grad_norm: 0.9999990760254365, iteration: 164971
loss: 1.001028060913086,grad_norm: 0.999999206792625, iteration: 164972
loss: 1.0159751176834106,grad_norm: 0.9999989992687475, iteration: 164973
loss: 1.0114459991455078,grad_norm: 0.8789763516202111, iteration: 164974
loss: 0.9967936277389526,grad_norm: 0.9999991196103117, iteration: 164975
loss: 0.981451153755188,grad_norm: 0.9795959825442554, iteration: 164976
loss: 0.9716965556144714,grad_norm: 0.9269014000789395, iteration: 164977
loss: 1.02277672290802,grad_norm: 0.9999993023004308, iteration: 164978
loss: 1.0142567157745361,grad_norm: 0.9999990844330506, iteration: 164979
loss: 1.0149892568588257,grad_norm: 0.9999992559877389, iteration: 164980
loss: 1.0073192119598389,grad_norm: 0.8628551840964186, iteration: 164981
loss: 1.0060005187988281,grad_norm: 0.9999990073308016, iteration: 164982
loss: 1.00017249584198,grad_norm: 0.849389410179669, iteration: 164983
loss: 0.9444529414176941,grad_norm: 0.999999149715978, iteration: 164984
loss: 0.9940022230148315,grad_norm: 0.9999990708123232, iteration: 164985
loss: 0.9848676323890686,grad_norm: 0.9220055923382124, iteration: 164986
loss: 1.0356134176254272,grad_norm: 0.9581295398150297, iteration: 164987
loss: 0.9891830086708069,grad_norm: 0.9999990284814975, iteration: 164988
loss: 1.0084789991378784,grad_norm: 0.9999993247744272, iteration: 164989
loss: 0.9867964386940002,grad_norm: 0.9999990665628625, iteration: 164990
loss: 1.0366607904434204,grad_norm: 0.9795278488895366, iteration: 164991
loss: 1.0066354274749756,grad_norm: 0.9999991179711026, iteration: 164992
loss: 1.0006622076034546,grad_norm: 0.999999436431957, iteration: 164993
loss: 1.0031803846359253,grad_norm: 0.7492639983634349, iteration: 164994
loss: 1.0996681451797485,grad_norm: 0.999999957855035, iteration: 164995
loss: 0.9795926809310913,grad_norm: 0.9999998767829951, iteration: 164996
loss: 1.0589052438735962,grad_norm: 0.9999995895771725, iteration: 164997
loss: 0.9861408472061157,grad_norm: 0.8842164304040009, iteration: 164998
loss: 0.9970209002494812,grad_norm: 0.944743506617735, iteration: 164999
loss: 1.0043751001358032,grad_norm: 0.9611300502583229, iteration: 165000
loss: 0.9942067265510559,grad_norm: 0.9999999178286776, iteration: 165001
loss: 0.9932956695556641,grad_norm: 0.9999997675319416, iteration: 165002
loss: 0.9549248218536377,grad_norm: 0.9725947197221672, iteration: 165003
loss: 0.9861640334129333,grad_norm: 0.999999173115476, iteration: 165004
loss: 0.9897692799568176,grad_norm: 0.924977581300069, iteration: 165005
loss: 1.0402859449386597,grad_norm: 0.9999990856016927, iteration: 165006
loss: 0.9894317388534546,grad_norm: 0.8947046892625482, iteration: 165007
loss: 0.9696893095970154,grad_norm: 0.8972341315656954, iteration: 165008
loss: 1.0326896905899048,grad_norm: 0.9999989946888995, iteration: 165009
loss: 1.0206694602966309,grad_norm: 0.9626029583203524, iteration: 165010
loss: 0.9854781627655029,grad_norm: 0.8627938208930414, iteration: 165011
loss: 1.0203330516815186,grad_norm: 0.9999991230770097, iteration: 165012
loss: 1.0239458084106445,grad_norm: 0.9091627967723337, iteration: 165013
loss: 1.007033348083496,grad_norm: 0.9372950077706718, iteration: 165014
loss: 1.0006710290908813,grad_norm: 0.9999991540139284, iteration: 165015
loss: 1.0097665786743164,grad_norm: 0.9999992711653458, iteration: 165016
loss: 1.0142995119094849,grad_norm: 0.9999990708541896, iteration: 165017
loss: 1.0192270278930664,grad_norm: 0.8947845293836101, iteration: 165018
loss: 1.0006701946258545,grad_norm: 0.9999990818722053, iteration: 165019
loss: 1.0437170267105103,grad_norm: 0.9999998135852484, iteration: 165020
loss: 0.991798996925354,grad_norm: 0.9829581180730627, iteration: 165021
loss: 1.0055649280548096,grad_norm: 0.936184258941577, iteration: 165022
loss: 1.0052212476730347,grad_norm: 1.000000035225084, iteration: 165023
loss: 0.9960302710533142,grad_norm: 0.9538302659697844, iteration: 165024
loss: 0.9889024496078491,grad_norm: 0.9999992301510578, iteration: 165025
loss: 1.0071847438812256,grad_norm: 0.9999993348973729, iteration: 165026
loss: 1.001118779182434,grad_norm: 0.9999991723440772, iteration: 165027
loss: 0.9934825897216797,grad_norm: 0.9999990792192475, iteration: 165028
loss: 1.0017958879470825,grad_norm: 0.8238451976344102, iteration: 165029
loss: 1.0116640329360962,grad_norm: 0.9999994212201274, iteration: 165030
loss: 1.0194039344787598,grad_norm: 0.8539519291397527, iteration: 165031
loss: 1.0274122953414917,grad_norm: 0.999999581161954, iteration: 165032
loss: 1.000532865524292,grad_norm: 0.9999998685304485, iteration: 165033
loss: 0.9993098378181458,grad_norm: 0.9999991110999197, iteration: 165034
loss: 0.9994375109672546,grad_norm: 0.9999989837464565, iteration: 165035
loss: 0.9377450346946716,grad_norm: 0.9999991905585083, iteration: 165036
loss: 1.0055617094039917,grad_norm: 0.9999990108725136, iteration: 165037
loss: 0.9826797246932983,grad_norm: 0.9810896523238092, iteration: 165038
loss: 1.0101611614227295,grad_norm: 0.8566361366330558, iteration: 165039
loss: 0.9970701336860657,grad_norm: 0.9999992422882111, iteration: 165040
loss: 1.0268568992614746,grad_norm: 0.9999991269217847, iteration: 165041
loss: 1.0021566152572632,grad_norm: 0.9999991538515365, iteration: 165042
loss: 0.9702920913696289,grad_norm: 0.9217687874327206, iteration: 165043
loss: 1.0118727684020996,grad_norm: 0.999999162336549, iteration: 165044
loss: 1.0036052465438843,grad_norm: 0.9999991682205368, iteration: 165045
loss: 0.9811502695083618,grad_norm: 0.7685249591327828, iteration: 165046
loss: 0.9969317317008972,grad_norm: 0.9999992435362764, iteration: 165047
loss: 1.0121827125549316,grad_norm: 0.9999996911396022, iteration: 165048
loss: 0.935958206653595,grad_norm: 0.8836295659304443, iteration: 165049
loss: 0.994307279586792,grad_norm: 0.9999991625624123, iteration: 165050
loss: 1.0104459524154663,grad_norm: 0.86047158421916, iteration: 165051
loss: 1.008765459060669,grad_norm: 0.9999994257736492, iteration: 165052
loss: 0.9993851184844971,grad_norm: 0.949178224943867, iteration: 165053
loss: 0.9889332056045532,grad_norm: 0.9311155547090405, iteration: 165054
loss: 0.9619152545928955,grad_norm: 0.9472384785822263, iteration: 165055
loss: 1.0080283880233765,grad_norm: 0.8692302120137593, iteration: 165056
loss: 1.0011584758758545,grad_norm: 0.9999993102156624, iteration: 165057
loss: 0.9900346994400024,grad_norm: 0.9999993518109029, iteration: 165058
loss: 1.0060060024261475,grad_norm: 0.9999991548614967, iteration: 165059
loss: 1.0544673204421997,grad_norm: 0.9999998133824824, iteration: 165060
loss: 1.0204814672470093,grad_norm: 0.9999989471168801, iteration: 165061
loss: 0.9993279576301575,grad_norm: 0.9727367776519031, iteration: 165062
loss: 1.007303237915039,grad_norm: 0.9809375864227194, iteration: 165063
loss: 0.9978032112121582,grad_norm: 0.9999990952572712, iteration: 165064
loss: 1.1477879285812378,grad_norm: 0.9754075128302948, iteration: 165065
loss: 0.9809459447860718,grad_norm: 0.9288473967532337, iteration: 165066
loss: 0.9590455889701843,grad_norm: 0.9999989744081758, iteration: 165067
loss: 0.9901622533798218,grad_norm: 0.923538631094761, iteration: 165068
loss: 0.9830979108810425,grad_norm: 0.8316322644876375, iteration: 165069
loss: 0.994956910610199,grad_norm: 0.9043788074311727, iteration: 165070
loss: 0.9915924072265625,grad_norm: 0.9753663772637416, iteration: 165071
loss: 1.0124095678329468,grad_norm: 0.9999991043225128, iteration: 165072
loss: 0.9515436291694641,grad_norm: 0.9999990800632036, iteration: 165073
loss: 1.008169412612915,grad_norm: 0.9999989761146678, iteration: 165074
loss: 0.9469708800315857,grad_norm: 0.9938169304637484, iteration: 165075
loss: 0.997225821018219,grad_norm: 0.9999991261097598, iteration: 165076
loss: 0.9811121225357056,grad_norm: 0.8226986269243833, iteration: 165077
loss: 0.9566780924797058,grad_norm: 0.9999991394841875, iteration: 165078
loss: 1.0287247896194458,grad_norm: 0.9999990545612332, iteration: 165079
loss: 1.0200471878051758,grad_norm: 0.9999991368596146, iteration: 165080
loss: 0.9881362915039062,grad_norm: 0.9999992026342345, iteration: 165081
loss: 0.9967220425605774,grad_norm: 0.9493361411479795, iteration: 165082
loss: 0.9923321604728699,grad_norm: 0.8541932738475506, iteration: 165083
loss: 1.0283267498016357,grad_norm: 0.999999067214891, iteration: 165084
loss: 0.9936680197715759,grad_norm: 0.9999991314380502, iteration: 165085
loss: 0.988315761089325,grad_norm: 0.8770051918176832, iteration: 165086
loss: 1.0122277736663818,grad_norm: 0.9021851226503081, iteration: 165087
loss: 0.9794800877571106,grad_norm: 0.960864394716387, iteration: 165088
loss: 0.9910222887992859,grad_norm: 0.9999991377604058, iteration: 165089
loss: 1.0130547285079956,grad_norm: 0.8772150595767441, iteration: 165090
loss: 1.0160340070724487,grad_norm: 0.894172144493701, iteration: 165091
loss: 0.9656900763511658,grad_norm: 0.9080008087639292, iteration: 165092
loss: 1.0019880533218384,grad_norm: 0.9999993605325227, iteration: 165093
loss: 0.9991604089736938,grad_norm: 0.9999989935231813, iteration: 165094
loss: 0.9858055710792542,grad_norm: 0.9999990784115164, iteration: 165095
loss: 0.9743277430534363,grad_norm: 0.9866873923955042, iteration: 165096
loss: 1.0061277151107788,grad_norm: 0.9999991238944681, iteration: 165097
loss: 0.9835445880889893,grad_norm: 0.8559659995543598, iteration: 165098
loss: 1.0048702955245972,grad_norm: 0.9999990949714009, iteration: 165099
loss: 1.011982798576355,grad_norm: 0.9517935018914038, iteration: 165100
loss: 0.9820853471755981,grad_norm: 0.9744660728073593, iteration: 165101
loss: 1.0082565546035767,grad_norm: 0.9944686874409985, iteration: 165102
loss: 1.022531509399414,grad_norm: 0.9056168414972373, iteration: 165103
loss: 1.010441780090332,grad_norm: 0.9456075730327455, iteration: 165104
loss: 0.9830904006958008,grad_norm: 0.8947748670643201, iteration: 165105
loss: 1.0121017694473267,grad_norm: 0.9339058221936126, iteration: 165106
loss: 1.0130749940872192,grad_norm: 0.9970614862017241, iteration: 165107
loss: 0.9492620825767517,grad_norm: 0.9999992162931762, iteration: 165108
loss: 0.9528660774230957,grad_norm: 0.999999051309258, iteration: 165109
loss: 1.0086586475372314,grad_norm: 0.9999991671531792, iteration: 165110
loss: 0.9854761958122253,grad_norm: 0.9999990155390758, iteration: 165111
loss: 1.0209970474243164,grad_norm: 0.8659898625054155, iteration: 165112
loss: 1.0089714527130127,grad_norm: 0.9788533753626424, iteration: 165113
loss: 1.012792706489563,grad_norm: 0.9999993291955382, iteration: 165114
loss: 1.0188770294189453,grad_norm: 0.926514825484432, iteration: 165115
loss: 0.96041339635849,grad_norm: 0.9222371031262674, iteration: 165116
loss: 0.9921122789382935,grad_norm: 0.999998991479032, iteration: 165117
loss: 1.008211374282837,grad_norm: 0.9999990422840405, iteration: 165118
loss: 0.9937672019004822,grad_norm: 0.9372146921079979, iteration: 165119
loss: 1.027743935585022,grad_norm: 0.9999991919472001, iteration: 165120
loss: 1.0403425693511963,grad_norm: 0.9621585545816345, iteration: 165121
loss: 1.0266938209533691,grad_norm: 0.9802205470869059, iteration: 165122
loss: 1.0176039934158325,grad_norm: 0.8458688898223735, iteration: 165123
loss: 0.9571318626403809,grad_norm: 0.9538374274029155, iteration: 165124
loss: 0.9720852971076965,grad_norm: 0.999999249840136, iteration: 165125
loss: 1.008406400680542,grad_norm: 0.9999991627584959, iteration: 165126
loss: 0.9840295314788818,grad_norm: 0.9117126739820813, iteration: 165127
loss: 0.9841153621673584,grad_norm: 0.9999991133788277, iteration: 165128
loss: 1.0048152208328247,grad_norm: 0.9339655270851697, iteration: 165129
loss: 1.0112833976745605,grad_norm: 0.9849911991734952, iteration: 165130
loss: 0.9795268177986145,grad_norm: 0.9999990192086885, iteration: 165131
loss: 1.013899803161621,grad_norm: 0.8450639777101641, iteration: 165132
loss: 1.0093811750411987,grad_norm: 0.8930114872053793, iteration: 165133
loss: 1.0161851644515991,grad_norm: 0.9992971941481645, iteration: 165134
loss: 0.9508965611457825,grad_norm: 0.9603212225785335, iteration: 165135
loss: 0.9775901436805725,grad_norm: 0.959969582277001, iteration: 165136
loss: 1.013550877571106,grad_norm: 0.9999991001672035, iteration: 165137
loss: 1.0185576677322388,grad_norm: 0.9438021299323411, iteration: 165138
loss: 0.9972898960113525,grad_norm: 0.9999992835859378, iteration: 165139
loss: 0.9800047278404236,grad_norm: 0.9999991526410439, iteration: 165140
loss: 1.0060607194900513,grad_norm: 0.9833498652365749, iteration: 165141
loss: 0.9800596237182617,grad_norm: 0.9501480112955525, iteration: 165142
loss: 1.0183838605880737,grad_norm: 0.7898745311730074, iteration: 165143
loss: 1.0055173635482788,grad_norm: 0.8185820780702323, iteration: 165144
loss: 0.9880269765853882,grad_norm: 0.9999992598317227, iteration: 165145
loss: 0.9917572736740112,grad_norm: 0.7832236181812321, iteration: 165146
loss: 1.0020759105682373,grad_norm: 0.8992728489319383, iteration: 165147
loss: 0.9789657592773438,grad_norm: 0.9999991305211041, iteration: 165148
loss: 1.0136022567749023,grad_norm: 0.9535036550757853, iteration: 165149
loss: 0.961233913898468,grad_norm: 0.9672252518097874, iteration: 165150
loss: 1.008421778678894,grad_norm: 0.9999990424804598, iteration: 165151
loss: 0.9749559760093689,grad_norm: 0.9999992524255452, iteration: 165152
loss: 1.0154606103897095,grad_norm: 0.9999990116553222, iteration: 165153
loss: 0.9941501617431641,grad_norm: 0.9768236778936819, iteration: 165154
loss: 1.0317147970199585,grad_norm: 0.9999990469546596, iteration: 165155
loss: 1.032853126525879,grad_norm: 0.929056097123305, iteration: 165156
loss: 0.9737428426742554,grad_norm: 0.9686848386434783, iteration: 165157
loss: 0.979610025882721,grad_norm: 0.9999992478489956, iteration: 165158
loss: 0.9920582175254822,grad_norm: 0.9999991237831604, iteration: 165159
loss: 1.0116959810256958,grad_norm: 0.9316380891875292, iteration: 165160
loss: 1.019992470741272,grad_norm: 0.9169643474462769, iteration: 165161
loss: 0.9909489750862122,grad_norm: 0.9999991646555549, iteration: 165162
loss: 1.0081889629364014,grad_norm: 0.9306087563089658, iteration: 165163
loss: 1.0063797235488892,grad_norm: 0.9813785523594861, iteration: 165164
loss: 0.9715515971183777,grad_norm: 0.9863136844136801, iteration: 165165
loss: 0.9921320676803589,grad_norm: 0.936916577271867, iteration: 165166
loss: 1.0332564115524292,grad_norm: 0.9958738310760905, iteration: 165167
loss: 0.9731043577194214,grad_norm: 0.9999992311070067, iteration: 165168
loss: 0.9937089681625366,grad_norm: 0.9483254803467388, iteration: 165169
loss: 1.0050851106643677,grad_norm: 0.9999991518244548, iteration: 165170
loss: 0.9996561408042908,grad_norm: 0.9997174918939093, iteration: 165171
loss: 0.9923049807548523,grad_norm: 0.980560903844234, iteration: 165172
loss: 1.0149110555648804,grad_norm: 0.9999992470737266, iteration: 165173
loss: 0.98667311668396,grad_norm: 0.9986971254822968, iteration: 165174
loss: 1.0244888067245483,grad_norm: 0.9949877930975308, iteration: 165175
loss: 1.0196669101715088,grad_norm: 0.9999992480967489, iteration: 165176
loss: 1.003415822982788,grad_norm: 0.8500154694186115, iteration: 165177
loss: 1.0074690580368042,grad_norm: 0.8530843758206542, iteration: 165178
loss: 0.9802066087722778,grad_norm: 0.8667715897368051, iteration: 165179
loss: 1.01242995262146,grad_norm: 0.9575521721248753, iteration: 165180
loss: 1.0220285654067993,grad_norm: 0.9999991297980249, iteration: 165181
loss: 1.0280635356903076,grad_norm: 0.9999991361754152, iteration: 165182
loss: 0.9999635219573975,grad_norm: 0.958796841177148, iteration: 165183
loss: 0.9574555158615112,grad_norm: 0.8787804525854152, iteration: 165184
loss: 0.9488121271133423,grad_norm: 0.9999991525519236, iteration: 165185
loss: 1.0164982080459595,grad_norm: 0.9175906218262081, iteration: 165186
loss: 1.0067487955093384,grad_norm: 0.999999224453759, iteration: 165187
loss: 0.966780960559845,grad_norm: 0.9999989709678375, iteration: 165188
loss: 0.988189697265625,grad_norm: 0.9178174892230623, iteration: 165189
loss: 1.0347003936767578,grad_norm: 0.9677453207301322, iteration: 165190
loss: 1.0039838552474976,grad_norm: 0.9999989971565811, iteration: 165191
loss: 0.961303174495697,grad_norm: 0.9999992265770278, iteration: 165192
loss: 0.9799045920372009,grad_norm: 0.9999991951279631, iteration: 165193
loss: 1.020750880241394,grad_norm: 0.946376076196627, iteration: 165194
loss: 0.9876998662948608,grad_norm: 0.9999991931876546, iteration: 165195
loss: 1.0276752710342407,grad_norm: 0.9999991985921123, iteration: 165196
loss: 0.9886828660964966,grad_norm: 0.9587067072166551, iteration: 165197
loss: 0.9790844917297363,grad_norm: 0.9999998520144407, iteration: 165198
loss: 1.0209122896194458,grad_norm: 0.9999990309950851, iteration: 165199
loss: 1.0035310983657837,grad_norm: 0.9459656773064605, iteration: 165200
loss: 0.999386191368103,grad_norm: 0.9345947818300657, iteration: 165201
loss: 1.0273178815841675,grad_norm: 0.9459913742420065, iteration: 165202
loss: 0.9655433297157288,grad_norm: 0.946823953268275, iteration: 165203
loss: 0.9901015758514404,grad_norm: 0.801804309712928, iteration: 165204
loss: 0.9952365756034851,grad_norm: 0.9999992053731628, iteration: 165205
loss: 1.0279345512390137,grad_norm: 0.8689825321932371, iteration: 165206
loss: 1.0127578973770142,grad_norm: 0.9999991335916125, iteration: 165207
loss: 0.9604659080505371,grad_norm: 0.9999998518834393, iteration: 165208
loss: 1.018296480178833,grad_norm: 0.9944228603650791, iteration: 165209
loss: 1.0387928485870361,grad_norm: 0.9999992286077266, iteration: 165210
loss: 0.998806357383728,grad_norm: 0.9999992052887153, iteration: 165211
loss: 1.0285276174545288,grad_norm: 0.9999992189102114, iteration: 165212
loss: 0.9948859810829163,grad_norm: 0.972766788419963, iteration: 165213
loss: 0.992085337638855,grad_norm: 0.8364423998887676, iteration: 165214
loss: 1.0116666555404663,grad_norm: 0.9999991673287886, iteration: 165215
loss: 0.9908574819564819,grad_norm: 0.9650878604667907, iteration: 165216
loss: 1.188382625579834,grad_norm: 0.9999989880496418, iteration: 165217
loss: 0.9856796264648438,grad_norm: 0.999999124606836, iteration: 165218
loss: 1.0383678674697876,grad_norm: 0.9999989502047899, iteration: 165219
loss: 1.0258491039276123,grad_norm: 0.7593738338678973, iteration: 165220
loss: 1.019518494606018,grad_norm: 0.9999991038693642, iteration: 165221
loss: 1.0268718004226685,grad_norm: 0.9999990965583497, iteration: 165222
loss: 0.9791247248649597,grad_norm: 0.9706528607811691, iteration: 165223
loss: 1.0086828470230103,grad_norm: 0.9371594729587279, iteration: 165224
loss: 1.0125510692596436,grad_norm: 0.9999990755848054, iteration: 165225
loss: 1.0176516771316528,grad_norm: 0.9999992451236427, iteration: 165226
loss: 0.9977329969406128,grad_norm: 0.9999992785884305, iteration: 165227
loss: 1.0152915716171265,grad_norm: 0.9999990890760737, iteration: 165228
loss: 1.0024486780166626,grad_norm: 0.9486364560959785, iteration: 165229
loss: 0.9776051640510559,grad_norm: 0.9999990620569604, iteration: 165230
loss: 1.0147805213928223,grad_norm: 0.8596568823602677, iteration: 165231
loss: 0.9839795231819153,grad_norm: 0.999999153547379, iteration: 165232
loss: 0.9960962533950806,grad_norm: 0.9999991700291401, iteration: 165233
loss: 0.9760323762893677,grad_norm: 0.9999992025735129, iteration: 165234
loss: 1.0032603740692139,grad_norm: 0.88302668176262, iteration: 165235
loss: 0.9821125268936157,grad_norm: 0.9999992609773376, iteration: 165236
loss: 0.9844015836715698,grad_norm: 0.9235948575162055, iteration: 165237
loss: 0.9759377241134644,grad_norm: 0.999999270224889, iteration: 165238
loss: 1.0286756753921509,grad_norm: 0.9999991615308307, iteration: 165239
loss: 0.9942007064819336,grad_norm: 0.9999991253593394, iteration: 165240
loss: 1.0033396482467651,grad_norm: 0.9999991095260251, iteration: 165241
loss: 0.987504780292511,grad_norm: 0.9999991395202817, iteration: 165242
loss: 1.0010135173797607,grad_norm: 0.9376388993813473, iteration: 165243
loss: 0.9760984778404236,grad_norm: 0.9169066260618702, iteration: 165244
loss: 0.9817684888839722,grad_norm: 0.9999989640850047, iteration: 165245
loss: 0.9822424650192261,grad_norm: 0.8336983243240564, iteration: 165246
loss: 1.0174092054367065,grad_norm: 0.9608854318128262, iteration: 165247
loss: 0.9680398106575012,grad_norm: 0.9599305539221343, iteration: 165248
loss: 0.9701823592185974,grad_norm: 0.9999990963830879, iteration: 165249
loss: 1.016072392463684,grad_norm: 0.9326684335866235, iteration: 165250
loss: 1.0346431732177734,grad_norm: 0.8929046345776043, iteration: 165251
loss: 1.0113617181777954,grad_norm: 0.965715700237983, iteration: 165252
loss: 1.0139927864074707,grad_norm: 0.8304498390348672, iteration: 165253
loss: 1.0231835842132568,grad_norm: 0.8653469892658159, iteration: 165254
loss: 1.0087213516235352,grad_norm: 0.9281734330557853, iteration: 165255
loss: 0.9662242531776428,grad_norm: 0.9999990558561456, iteration: 165256
loss: 0.9499298334121704,grad_norm: 0.9999991697059126, iteration: 165257
loss: 0.9928580522537231,grad_norm: 0.9457200303973574, iteration: 165258
loss: 1.0068180561065674,grad_norm: 0.9999991597496868, iteration: 165259
loss: 1.0302149057388306,grad_norm: 0.9999991820351567, iteration: 165260
loss: 0.9955196976661682,grad_norm: 0.9999992035403127, iteration: 165261
loss: 0.9668967127799988,grad_norm: 0.9999991777980967, iteration: 165262
loss: 1.008169174194336,grad_norm: 0.9936473156384055, iteration: 165263
loss: 0.9579768180847168,grad_norm: 0.906560271564429, iteration: 165264
loss: 0.9798193573951721,grad_norm: 0.9999992545092119, iteration: 165265
loss: 1.0404064655303955,grad_norm: 0.999999121749088, iteration: 165266
loss: 0.9582938551902771,grad_norm: 0.9399100495420535, iteration: 165267
loss: 1.0056350231170654,grad_norm: 0.9999993007967065, iteration: 165268
loss: 1.0458130836486816,grad_norm: 0.9999995142127707, iteration: 165269
loss: 0.9747827053070068,grad_norm: 0.9999990592903597, iteration: 165270
loss: 1.0065281391143799,grad_norm: 0.999999154594238, iteration: 165271
loss: 1.0148518085479736,grad_norm: 0.9999992268496309, iteration: 165272
loss: 0.985398530960083,grad_norm: 0.999999244306982, iteration: 165273
loss: 0.9926634430885315,grad_norm: 0.9325808084149304, iteration: 165274
loss: 0.9810047149658203,grad_norm: 0.9219321596931549, iteration: 165275
loss: 0.9702646136283875,grad_norm: 0.9999990858694765, iteration: 165276
loss: 1.0027309656143188,grad_norm: 0.9999990736218868, iteration: 165277
loss: 1.0181176662445068,grad_norm: 0.9999990330968346, iteration: 165278
loss: 1.0013227462768555,grad_norm: 0.9999989600584356, iteration: 165279
loss: 0.9890993237495422,grad_norm: 0.9999992589629673, iteration: 165280
loss: 1.0063397884368896,grad_norm: 0.9999992696975317, iteration: 165281
loss: 0.9759072661399841,grad_norm: 0.9604727228189517, iteration: 165282
loss: 1.012684941291809,grad_norm: 0.8923322006287012, iteration: 165283
loss: 0.9876748919487,grad_norm: 0.9752704766934912, iteration: 165284
loss: 0.967304527759552,grad_norm: 0.9776482334540714, iteration: 165285
loss: 1.0224450826644897,grad_norm: 0.9999991033749613, iteration: 165286
loss: 1.0116770267486572,grad_norm: 0.8194747101743276, iteration: 165287
loss: 1.0271674394607544,grad_norm: 0.9996222018442976, iteration: 165288
loss: 0.9956839084625244,grad_norm: 0.8342148403024883, iteration: 165289
loss: 0.9693360924720764,grad_norm: 0.9999991107211639, iteration: 165290
loss: 0.9655055999755859,grad_norm: 0.99999917571873, iteration: 165291
loss: 0.9922388792037964,grad_norm: 0.9999993206271391, iteration: 165292
loss: 0.9778669476509094,grad_norm: 0.9748531267472845, iteration: 165293
loss: 1.0074995756149292,grad_norm: 0.9788827813099573, iteration: 165294
loss: 0.9788051247596741,grad_norm: 0.9999992339824577, iteration: 165295
loss: 1.019872784614563,grad_norm: 0.9999991241650907, iteration: 165296
loss: 0.9874328374862671,grad_norm: 0.9999991610491884, iteration: 165297
loss: 0.9860062003135681,grad_norm: 0.9999990914491947, iteration: 165298
loss: 0.9874125719070435,grad_norm: 0.8633298627990273, iteration: 165299
loss: 0.9514351487159729,grad_norm: 0.9999992270980541, iteration: 165300
loss: 1.0168356895446777,grad_norm: 0.999999222247392, iteration: 165301
loss: 1.0293954610824585,grad_norm: 0.9999990694460745, iteration: 165302
loss: 0.9754388332366943,grad_norm: 0.8561197317282195, iteration: 165303
loss: 1.0050336122512817,grad_norm: 0.9999991173568222, iteration: 165304
loss: 0.9974858164787292,grad_norm: 0.9999992239814176, iteration: 165305
loss: 1.00092351436615,grad_norm: 0.9999992501059616, iteration: 165306
loss: 0.987280547618866,grad_norm: 0.9934840434514145, iteration: 165307
loss: 0.997239887714386,grad_norm: 0.9999991658492655, iteration: 165308
loss: 1.0308005809783936,grad_norm: 0.9999990308127982, iteration: 165309
loss: 0.948627769947052,grad_norm: 0.9999992276690015, iteration: 165310
loss: 0.9800549149513245,grad_norm: 0.9999989581272911, iteration: 165311
loss: 0.9820335507392883,grad_norm: 0.9999990569207938, iteration: 165312
loss: 0.9630407094955444,grad_norm: 0.9999991647824223, iteration: 165313
loss: 0.9932978749275208,grad_norm: 0.999999127043646, iteration: 165314
loss: 1.0301252603530884,grad_norm: 0.8781935136918676, iteration: 165315
loss: 0.9688212275505066,grad_norm: 0.8877700929327652, iteration: 165316
loss: 0.9791954159736633,grad_norm: 0.8298688252633619, iteration: 165317
loss: 0.9928601384162903,grad_norm: 0.8346517548563857, iteration: 165318
loss: 1.018874168395996,grad_norm: 0.9919367796860596, iteration: 165319
loss: 1.0313000679016113,grad_norm: 0.9811678381947955, iteration: 165320
loss: 0.9900980591773987,grad_norm: 0.9999990169107247, iteration: 165321
loss: 1.0166254043579102,grad_norm: 0.9999990626969532, iteration: 165322
loss: 0.972491443157196,grad_norm: 0.9999992188448353, iteration: 165323
loss: 0.991853654384613,grad_norm: 0.9194074178791982, iteration: 165324
loss: 0.9952718615531921,grad_norm: 0.9272519999314894, iteration: 165325
loss: 0.9914471507072449,grad_norm: 0.9322745758789587, iteration: 165326
loss: 0.9958657026290894,grad_norm: 0.8283465562914515, iteration: 165327
loss: 1.012995719909668,grad_norm: 0.9788776033057826, iteration: 165328
loss: 1.0175244808197021,grad_norm: 0.8724621801428559, iteration: 165329
loss: 1.006705641746521,grad_norm: 0.8134263667525403, iteration: 165330
loss: 0.9563756585121155,grad_norm: 0.9999990209300353, iteration: 165331
loss: 1.0363471508026123,grad_norm: 0.9999991792399201, iteration: 165332
loss: 0.9917195439338684,grad_norm: 0.9999991901943825, iteration: 165333
loss: 0.9998520016670227,grad_norm: 0.9941884430346157, iteration: 165334
loss: 0.9864898920059204,grad_norm: 0.9378841188224329, iteration: 165335
loss: 1.0360463857650757,grad_norm: 0.9999992357944553, iteration: 165336
loss: 1.0134025812149048,grad_norm: 0.9999990893649835, iteration: 165337
loss: 1.0096073150634766,grad_norm: 0.938917873658866, iteration: 165338
loss: 1.019303321838379,grad_norm: 0.8363605365954334, iteration: 165339
loss: 0.9905137419700623,grad_norm: 0.951754549497682, iteration: 165340
loss: 0.9971224665641785,grad_norm: 0.9999990043623019, iteration: 165341
loss: 0.9705238938331604,grad_norm: 0.7360894487601279, iteration: 165342
loss: 1.0141198635101318,grad_norm: 0.9999989946099149, iteration: 165343
loss: 1.0163496732711792,grad_norm: 0.9999992518749083, iteration: 165344
loss: 0.988680362701416,grad_norm: 0.9999991354052087, iteration: 165345
loss: 0.9877152442932129,grad_norm: 0.9999990652353883, iteration: 165346
loss: 1.0233956575393677,grad_norm: 0.999999229834214, iteration: 165347
loss: 0.985929012298584,grad_norm: 0.9999992557441704, iteration: 165348
loss: 0.9905441999435425,grad_norm: 0.9999991930577595, iteration: 165349
loss: 1.028324007987976,grad_norm: 0.999999062968808, iteration: 165350
loss: 0.9888890981674194,grad_norm: 0.9999989999660381, iteration: 165351
loss: 0.9967558979988098,grad_norm: 0.9999991833333374, iteration: 165352
loss: 1.0039318799972534,grad_norm: 0.999999266404975, iteration: 165353
loss: 0.9953940510749817,grad_norm: 0.9984096139293154, iteration: 165354
loss: 0.9843024611473083,grad_norm: 0.9658606759456838, iteration: 165355
loss: 0.9919596314430237,grad_norm: 0.9924583724306426, iteration: 165356
loss: 0.9726687073707581,grad_norm: 0.9999991639889684, iteration: 165357
loss: 1.00963294506073,grad_norm: 0.999999249421944, iteration: 165358
loss: 1.007918119430542,grad_norm: 0.9406674138012433, iteration: 165359
loss: 0.9931738972663879,grad_norm: 0.8950889636928865, iteration: 165360
loss: 0.9509083032608032,grad_norm: 0.8780081660311271, iteration: 165361
loss: 1.0339499711990356,grad_norm: 0.999999019588405, iteration: 165362
loss: 1.0206552743911743,grad_norm: 0.7978829321025575, iteration: 165363
loss: 0.9956468939781189,grad_norm: 0.9281697446806819, iteration: 165364
loss: 0.9781944751739502,grad_norm: 0.8690410985998659, iteration: 165365
loss: 0.9629111886024475,grad_norm: 0.9999992297421191, iteration: 165366
loss: 1.025802493095398,grad_norm: 0.9999991260223551, iteration: 165367
loss: 1.0083054304122925,grad_norm: 0.9848010065758738, iteration: 165368
loss: 1.0048316717147827,grad_norm: 0.9953452429720365, iteration: 165369
loss: 0.9990671277046204,grad_norm: 0.8851090005670317, iteration: 165370
loss: 0.978384792804718,grad_norm: 0.8862174739777247, iteration: 165371
loss: 1.0021682977676392,grad_norm: 0.940471021273105, iteration: 165372
loss: 0.9499979615211487,grad_norm: 0.9999990161786075, iteration: 165373
loss: 1.0171948671340942,grad_norm: 0.999999130146646, iteration: 165374
loss: 1.006008505821228,grad_norm: 0.9999992050499116, iteration: 165375
loss: 0.9852719306945801,grad_norm: 0.999999294814714, iteration: 165376
loss: 0.9992038607597351,grad_norm: 0.9582644953725065, iteration: 165377
loss: 1.0164892673492432,grad_norm: 0.9999991367377337, iteration: 165378
loss: 0.977504312992096,grad_norm: 0.999999293077444, iteration: 165379
loss: 0.9811248183250427,grad_norm: 0.932490492547766, iteration: 165380
loss: 0.9992791414260864,grad_norm: 0.798193158684954, iteration: 165381
loss: 1.0024027824401855,grad_norm: 0.999999132930493, iteration: 165382
loss: 1.0112193822860718,grad_norm: 0.9270953424390975, iteration: 165383
loss: 0.9733515977859497,grad_norm: 0.920892709314146, iteration: 165384
loss: 0.967926025390625,grad_norm: 0.9999992678368994, iteration: 165385
loss: 0.9506219029426575,grad_norm: 0.9846921959943732, iteration: 165386
loss: 1.010574221611023,grad_norm: 0.9177053650524915, iteration: 165387
loss: 1.0081794261932373,grad_norm: 0.909632781008415, iteration: 165388
loss: 1.0216023921966553,grad_norm: 0.833127296224728, iteration: 165389
loss: 0.9815449714660645,grad_norm: 0.9544182900572415, iteration: 165390
loss: 0.9910057783126831,grad_norm: 0.9999992433475459, iteration: 165391
loss: 0.9943042397499084,grad_norm: 0.9999992025969499, iteration: 165392
loss: 0.9673742055892944,grad_norm: 0.9832222131393865, iteration: 165393
loss: 1.0339349508285522,grad_norm: 0.9999991521891785, iteration: 165394
loss: 1.0173860788345337,grad_norm: 0.9348101592653386, iteration: 165395
loss: 1.0231850147247314,grad_norm: 0.9999990597759554, iteration: 165396
loss: 1.0026001930236816,grad_norm: 0.9812762850731664, iteration: 165397
loss: 1.0249391794204712,grad_norm: 0.999999206083256, iteration: 165398
loss: 1.0314546823501587,grad_norm: 0.999998974035212, iteration: 165399
loss: 0.965755820274353,grad_norm: 0.9999991936938716, iteration: 165400
loss: 1.0096368789672852,grad_norm: 0.9999993190899308, iteration: 165401
loss: 1.0257357358932495,grad_norm: 0.9999994628246384, iteration: 165402
loss: 1.0024603605270386,grad_norm: 0.9999992039505231, iteration: 165403
loss: 0.9517508149147034,grad_norm: 0.9999990906687738, iteration: 165404
loss: 1.0132241249084473,grad_norm: 0.9350005713026419, iteration: 165405
loss: 1.0189756155014038,grad_norm: 0.8542761069385315, iteration: 165406
loss: 0.9898523092269897,grad_norm: 0.9999992452457765, iteration: 165407
loss: 1.0095844268798828,grad_norm: 0.9982761874540519, iteration: 165408
loss: 1.0152819156646729,grad_norm: 0.9319212518447229, iteration: 165409
loss: 1.006317377090454,grad_norm: 0.9519473748610114, iteration: 165410
loss: 1.0287400484085083,grad_norm: 0.8981700111581059, iteration: 165411
loss: 1.0198923349380493,grad_norm: 0.9708527839507423, iteration: 165412
loss: 0.9987716674804688,grad_norm: 0.9999991689165815, iteration: 165413
loss: 1.0056934356689453,grad_norm: 0.9830571714463214, iteration: 165414
loss: 1.0221821069717407,grad_norm: 0.9999991300400118, iteration: 165415
loss: 0.9942960143089294,grad_norm: 0.9999991105256306, iteration: 165416
loss: 0.9810085296630859,grad_norm: 0.9765772077471065, iteration: 165417
loss: 1.044378399848938,grad_norm: 0.9999992540044704, iteration: 165418
loss: 1.0265851020812988,grad_norm: 0.8388347882441127, iteration: 165419
loss: 0.981934666633606,grad_norm: 0.9145103268459536, iteration: 165420
loss: 1.039367437362671,grad_norm: 0.9177274527339392, iteration: 165421
loss: 0.9813488721847534,grad_norm: 0.9999990208905, iteration: 165422
loss: 0.9860275983810425,grad_norm: 0.8762145544794184, iteration: 165423
loss: 1.004372000694275,grad_norm: 0.9163511602652834, iteration: 165424
loss: 0.9932706952095032,grad_norm: 0.9684747647835125, iteration: 165425
loss: 1.0008100271224976,grad_norm: 0.9999990406564937, iteration: 165426
loss: 1.047841191291809,grad_norm: 0.903131813767046, iteration: 165427
loss: 0.9576303362846375,grad_norm: 0.9938409688056292, iteration: 165428
loss: 1.0286129713058472,grad_norm: 0.9999992553683285, iteration: 165429
loss: 0.9713326692581177,grad_norm: 0.9999991524597601, iteration: 165430
loss: 1.0825448036193848,grad_norm: 0.9428142287427824, iteration: 165431
loss: 0.9935684204101562,grad_norm: 0.9944243130338702, iteration: 165432
loss: 1.011843204498291,grad_norm: 0.9999991184837468, iteration: 165433
loss: 0.9871540665626526,grad_norm: 0.9999991879709101, iteration: 165434
loss: 0.9779660701751709,grad_norm: 0.8337348840745527, iteration: 165435
loss: 1.0244895219802856,grad_norm: 0.9819913184092318, iteration: 165436
loss: 0.9985862970352173,grad_norm: 0.9999990235722912, iteration: 165437
loss: 0.9813665747642517,grad_norm: 0.9999991459572982, iteration: 165438
loss: 0.9976252913475037,grad_norm: 0.9117014119546911, iteration: 165439
loss: 1.0768890380859375,grad_norm: 0.9999992961496578, iteration: 165440
loss: 1.0078086853027344,grad_norm: 0.8459095424262033, iteration: 165441
loss: 0.9930577278137207,grad_norm: 0.9667939901250762, iteration: 165442
loss: 1.0033093690872192,grad_norm: 0.9999991665261913, iteration: 165443
loss: 0.9920459389686584,grad_norm: 0.9412826571244882, iteration: 165444
loss: 1.0136487483978271,grad_norm: 0.9999993213022245, iteration: 165445
loss: 1.0861790180206299,grad_norm: 0.9999990393703109, iteration: 165446
loss: 1.006980299949646,grad_norm: 0.9614193049015333, iteration: 165447
loss: 1.0401806831359863,grad_norm: 0.8718329363726216, iteration: 165448
loss: 0.9866713881492615,grad_norm: 0.9476763615275228, iteration: 165449
loss: 1.0300754308700562,grad_norm: 0.9999990965908891, iteration: 165450
loss: 1.0044031143188477,grad_norm: 0.9999991612925884, iteration: 165451
loss: 1.0024373531341553,grad_norm: 0.9521109449097828, iteration: 165452
loss: 0.9934020042419434,grad_norm: 0.9960802625617879, iteration: 165453
loss: 0.9776479601860046,grad_norm: 0.9999990630040014, iteration: 165454
loss: 0.9657455682754517,grad_norm: 0.9999990386444505, iteration: 165455
loss: 0.9683154821395874,grad_norm: 0.9999990386305824, iteration: 165456
loss: 1.0311284065246582,grad_norm: 0.999999130296767, iteration: 165457
loss: 1.0229599475860596,grad_norm: 0.9999990267178541, iteration: 165458
loss: 0.9846181273460388,grad_norm: 0.9999991513825583, iteration: 165459
loss: 0.9953977465629578,grad_norm: 0.9999990374820513, iteration: 165460
loss: 1.0304350852966309,grad_norm: 0.9999990773656928, iteration: 165461
loss: 0.9932606220245361,grad_norm: 0.9993525816460582, iteration: 165462
loss: 1.0198904275894165,grad_norm: 0.9610061055201075, iteration: 165463
loss: 0.9813995361328125,grad_norm: 0.9999990541317609, iteration: 165464
loss: 0.9767820239067078,grad_norm: 0.9999989867384637, iteration: 165465
loss: 0.9486767053604126,grad_norm: 0.9907982105284155, iteration: 165466
loss: 1.0360430479049683,grad_norm: 0.9257306781616155, iteration: 165467
loss: 0.9916427731513977,grad_norm: 0.9561899876786247, iteration: 165468
loss: 0.9871391654014587,grad_norm: 0.9999992323438675, iteration: 165469
loss: 0.9734250903129578,grad_norm: 0.9999989962092133, iteration: 165470
loss: 1.0110623836517334,grad_norm: 0.9999989489871532, iteration: 165471
loss: 0.9812802076339722,grad_norm: 0.9999991145841405, iteration: 165472
loss: 0.980720043182373,grad_norm: 0.9999991116025837, iteration: 165473
loss: 1.0011017322540283,grad_norm: 0.9178939764754095, iteration: 165474
loss: 0.9619904160499573,grad_norm: 0.9631268294474864, iteration: 165475
loss: 1.0224683284759521,grad_norm: 0.9999995454134409, iteration: 165476
loss: 1.0418087244033813,grad_norm: 0.9999991094445623, iteration: 165477
loss: 1.0130172967910767,grad_norm: 0.9999991325659854, iteration: 165478
loss: 1.0123233795166016,grad_norm: 0.9999990653750859, iteration: 165479
loss: 0.984138548374176,grad_norm: 0.9999990563924701, iteration: 165480
loss: 1.0037081241607666,grad_norm: 0.9999992016433451, iteration: 165481
loss: 0.9960579872131348,grad_norm: 0.8824816090325084, iteration: 165482
loss: 1.092369794845581,grad_norm: 0.9448022433716853, iteration: 165483
loss: 0.9910700917243958,grad_norm: 0.7943262358681227, iteration: 165484
loss: 0.9614905118942261,grad_norm: 0.975300755178997, iteration: 165485
loss: 1.0155186653137207,grad_norm: 0.9316205618785982, iteration: 165486
loss: 0.9915366172790527,grad_norm: 0.8326889734687428, iteration: 165487
loss: 0.983242392539978,grad_norm: 0.9999990642078184, iteration: 165488
loss: 1.0171266794204712,grad_norm: 0.8032806318928069, iteration: 165489
loss: 1.0201979875564575,grad_norm: 0.9999991545585722, iteration: 165490
loss: 1.0182934999465942,grad_norm: 0.9637318439280786, iteration: 165491
loss: 1.0332764387130737,grad_norm: 0.9999990685755024, iteration: 165492
loss: 1.032101035118103,grad_norm: 0.9382023139188338, iteration: 165493
loss: 0.9551985859870911,grad_norm: 0.9163390276604576, iteration: 165494
loss: 1.0389246940612793,grad_norm: 0.9999991514054216, iteration: 165495
loss: 1.1759016513824463,grad_norm: 0.9999997006168406, iteration: 165496
loss: 0.9780805706977844,grad_norm: 0.9999990375361357, iteration: 165497
loss: 0.9963492155075073,grad_norm: 0.9999991200182542, iteration: 165498
loss: 0.9824633598327637,grad_norm: 0.8877190402691267, iteration: 165499
loss: 1.0009914636611938,grad_norm: 0.9789368841171049, iteration: 165500
loss: 0.9851934909820557,grad_norm: 0.9999991028033698, iteration: 165501
loss: 0.9848272800445557,grad_norm: 0.9999991099509729, iteration: 165502
loss: 1.0126506090164185,grad_norm: 0.9999991738231128, iteration: 165503
loss: 1.0113707780838013,grad_norm: 0.9999991777051563, iteration: 165504
loss: 1.0323320627212524,grad_norm: 0.9999994449110619, iteration: 165505
loss: 0.9798293113708496,grad_norm: 0.9431085804999154, iteration: 165506
loss: 1.0213327407836914,grad_norm: 0.9999992860153092, iteration: 165507
loss: 0.9762019515037537,grad_norm: 0.9449468117455134, iteration: 165508
loss: 1.0086123943328857,grad_norm: 0.9807068836776415, iteration: 165509
loss: 1.0178992748260498,grad_norm: 0.9999992939316409, iteration: 165510
loss: 0.9985929131507874,grad_norm: 0.9851108987701379, iteration: 165511
loss: 0.9924800992012024,grad_norm: 0.9999990153490693, iteration: 165512
loss: 0.9986627697944641,grad_norm: 0.9999991471103948, iteration: 165513
loss: 1.014014720916748,grad_norm: 0.9999993042732309, iteration: 165514
loss: 1.0191969871520996,grad_norm: 0.9999990934658307, iteration: 165515
loss: 0.9734907746315002,grad_norm: 0.9999991987538123, iteration: 165516
loss: 1.0048744678497314,grad_norm: 0.9999992974840479, iteration: 165517
loss: 1.1561170816421509,grad_norm: 0.9999996495620552, iteration: 165518
loss: 1.1268433332443237,grad_norm: 0.9999998615414006, iteration: 165519
loss: 0.9743984341621399,grad_norm: 0.9999992626674562, iteration: 165520
loss: 1.0045725107192993,grad_norm: 0.9999990880324674, iteration: 165521
loss: 0.9914237856864929,grad_norm: 0.9999991704797007, iteration: 165522
loss: 0.9961004853248596,grad_norm: 0.9999990173491515, iteration: 165523
loss: 0.9758645296096802,grad_norm: 0.9865113644314724, iteration: 165524
loss: 0.9694229364395142,grad_norm: 0.9683265399445853, iteration: 165525
loss: 1.0186901092529297,grad_norm: 0.9635213445594514, iteration: 165526
loss: 1.010066270828247,grad_norm: 0.9122938384813735, iteration: 165527
loss: 1.0051000118255615,grad_norm: 0.9999991031162652, iteration: 165528
loss: 0.9747486114501953,grad_norm: 0.9999991640915267, iteration: 165529
loss: 0.9699563980102539,grad_norm: 0.937618067963633, iteration: 165530
loss: 1.00881826877594,grad_norm: 0.9706789754291414, iteration: 165531
loss: 0.9765467047691345,grad_norm: 0.9999990953181878, iteration: 165532
loss: 0.9808434247970581,grad_norm: 0.9449029264208267, iteration: 165533
loss: 1.0113942623138428,grad_norm: 0.9967000289433428, iteration: 165534
loss: 1.0141918659210205,grad_norm: 0.9008859100833648, iteration: 165535
loss: 1.0074752569198608,grad_norm: 0.9066008589607205, iteration: 165536
loss: 1.0018596649169922,grad_norm: 0.9050487040141718, iteration: 165537
loss: 0.9868139624595642,grad_norm: 0.8267149364019215, iteration: 165538
loss: 1.0107111930847168,grad_norm: 0.9999990152034881, iteration: 165539
loss: 1.0087565183639526,grad_norm: 0.9494537864402003, iteration: 165540
loss: 0.9961813688278198,grad_norm: 0.9999990699874846, iteration: 165541
loss: 1.008569598197937,grad_norm: 0.9999991297209825, iteration: 165542
loss: 0.9745747447013855,grad_norm: 0.9999991315165273, iteration: 165543
loss: 1.0080817937850952,grad_norm: 0.9999990714330367, iteration: 165544
loss: 0.9921369552612305,grad_norm: 0.931950826846424, iteration: 165545
loss: 1.009831428527832,grad_norm: 0.808024914343634, iteration: 165546
loss: 1.0107299089431763,grad_norm: 0.8921807170713628, iteration: 165547
loss: 1.013991355895996,grad_norm: 0.9999993309893829, iteration: 165548
loss: 1.0132938623428345,grad_norm: 0.9803091650107376, iteration: 165549
loss: 1.034611701965332,grad_norm: 0.9938162553342291, iteration: 165550
loss: 1.0519373416900635,grad_norm: 0.9999990814805363, iteration: 165551
loss: 0.9836046099662781,grad_norm: 0.9999989827519511, iteration: 165552
loss: 1.0312105417251587,grad_norm: 0.9999991067799521, iteration: 165553
loss: 0.9996063709259033,grad_norm: 0.8881169826324835, iteration: 165554
loss: 1.0179210901260376,grad_norm: 0.9999993786747171, iteration: 165555
loss: 1.015325665473938,grad_norm: 0.9022902461645219, iteration: 165556
loss: 1.023862600326538,grad_norm: 0.9097332504557686, iteration: 165557
loss: 1.0033190250396729,grad_norm: 0.9472297849043635, iteration: 165558
loss: 0.995772123336792,grad_norm: 0.8772451216798762, iteration: 165559
loss: 0.9718679785728455,grad_norm: 0.8152006358780148, iteration: 165560
loss: 1.0054419040679932,grad_norm: 0.9999991599845923, iteration: 165561
loss: 0.9998930096626282,grad_norm: 0.9112082697215617, iteration: 165562
loss: 0.9954201579093933,grad_norm: 0.9246274412065875, iteration: 165563
loss: 1.0138134956359863,grad_norm: 0.9046191444950267, iteration: 165564
loss: 0.9558667540550232,grad_norm: 0.9995405836723437, iteration: 165565
loss: 0.9993230700492859,grad_norm: 0.9203485205632111, iteration: 165566
loss: 0.943432629108429,grad_norm: 0.9999991955916817, iteration: 165567
loss: 0.9989960193634033,grad_norm: 0.933100314856233, iteration: 165568
loss: 0.9804547429084778,grad_norm: 0.9755521897999957, iteration: 165569
loss: 1.0449947118759155,grad_norm: 0.999999653716604, iteration: 165570
loss: 0.9952333569526672,grad_norm: 0.9999992261341086, iteration: 165571
loss: 0.995876669883728,grad_norm: 0.8628692206901164, iteration: 165572
loss: 0.9946459531784058,grad_norm: 0.9099704348448461, iteration: 165573
loss: 1.0082075595855713,grad_norm: 0.9999990587639109, iteration: 165574
loss: 1.0109422206878662,grad_norm: 0.999999170779679, iteration: 165575
loss: 0.9969879984855652,grad_norm: 0.9999990391405459, iteration: 165576
loss: 0.989210307598114,grad_norm: 0.9999194582854073, iteration: 165577
loss: 0.9578610062599182,grad_norm: 0.9999991756796978, iteration: 165578
loss: 0.9910158514976501,grad_norm: 0.9999990830142347, iteration: 165579
loss: 1.0006135702133179,grad_norm: 0.9999991353668862, iteration: 165580
loss: 0.9533715844154358,grad_norm: 0.913440110064291, iteration: 165581
loss: 1.0015684366226196,grad_norm: 0.9999992128779708, iteration: 165582
loss: 1.0061415433883667,grad_norm: 0.9999989954878675, iteration: 165583
loss: 1.009270191192627,grad_norm: 0.989740754881292, iteration: 165584
loss: 1.019181489944458,grad_norm: 0.9999990743240413, iteration: 165585
loss: 0.990298867225647,grad_norm: 0.9999990630082014, iteration: 165586
loss: 0.9933103322982788,grad_norm: 0.9623210764536029, iteration: 165587
loss: 1.0644471645355225,grad_norm: 0.9999992185812, iteration: 165588
loss: 0.9602609872817993,grad_norm: 0.914903951791006, iteration: 165589
loss: 1.0073240995407104,grad_norm: 0.6976752218781039, iteration: 165590
loss: 1.0013915300369263,grad_norm: 0.9377915953498641, iteration: 165591
loss: 1.0118157863616943,grad_norm: 0.9077222006488187, iteration: 165592
loss: 0.9814805388450623,grad_norm: 0.9999990721070363, iteration: 165593
loss: 0.9954025149345398,grad_norm: 0.841355744214246, iteration: 165594
loss: 0.992745578289032,grad_norm: 0.9999990474060233, iteration: 165595
loss: 0.9900379180908203,grad_norm: 0.999999205039613, iteration: 165596
loss: 0.9941545128822327,grad_norm: 0.9999991383376475, iteration: 165597
loss: 1.012017846107483,grad_norm: 0.8554692882953956, iteration: 165598
loss: 0.9872490167617798,grad_norm: 0.9999991673497918, iteration: 165599
loss: 0.972164511680603,grad_norm: 0.9999991677885598, iteration: 165600
loss: 1.0312039852142334,grad_norm: 0.9999992097890047, iteration: 165601
loss: 1.0429871082305908,grad_norm: 0.9999991119296351, iteration: 165602
loss: 1.009597897529602,grad_norm: 0.9999990488163356, iteration: 165603
loss: 0.9597424864768982,grad_norm: 0.8787936790514153, iteration: 165604
loss: 1.0034501552581787,grad_norm: 0.7549517569633428, iteration: 165605
loss: 0.9696265459060669,grad_norm: 0.8866412383131413, iteration: 165606
loss: 1.02066171169281,grad_norm: 0.852243255245705, iteration: 165607
loss: 0.9921582341194153,grad_norm: 0.9999992177659492, iteration: 165608
loss: 1.0377223491668701,grad_norm: 0.8636130607080841, iteration: 165609
loss: 1.0086263418197632,grad_norm: 0.9999989372032332, iteration: 165610
loss: 1.032269835472107,grad_norm: 0.9682672991107913, iteration: 165611
loss: 0.9512206315994263,grad_norm: 0.9898566605645748, iteration: 165612
loss: 1.0316932201385498,grad_norm: 0.9999998378072565, iteration: 165613
loss: 0.9891153573989868,grad_norm: 0.9786781997998077, iteration: 165614
loss: 0.9851612448692322,grad_norm: 0.9999991413168972, iteration: 165615
loss: 0.979002833366394,grad_norm: 0.9198327442429858, iteration: 165616
loss: 1.0277308225631714,grad_norm: 0.9367120159749689, iteration: 165617
loss: 0.9775371551513672,grad_norm: 0.9999990549176055, iteration: 165618
loss: 1.003411889076233,grad_norm: 0.9506727845641977, iteration: 165619
loss: 0.99796462059021,grad_norm: 0.8893681943983739, iteration: 165620
loss: 1.0126692056655884,grad_norm: 0.9999993042663147, iteration: 165621
loss: 0.9937889575958252,grad_norm: 0.9999991512582512, iteration: 165622
loss: 0.9815702438354492,grad_norm: 0.9146459536171573, iteration: 165623
loss: 0.9683955907821655,grad_norm: 0.9999992266875029, iteration: 165624
loss: 0.9897837042808533,grad_norm: 0.9999991218955334, iteration: 165625
loss: 0.9425511956214905,grad_norm: 0.9999991931006058, iteration: 165626
loss: 1.0213037729263306,grad_norm: 0.9999992964928118, iteration: 165627
loss: 1.0360842943191528,grad_norm: 0.9999991051704832, iteration: 165628
loss: 0.9673944711685181,grad_norm: 0.8538140990740263, iteration: 165629
loss: 1.0012518167495728,grad_norm: 0.9999995368436161, iteration: 165630
loss: 0.9897325038909912,grad_norm: 0.8746350712130757, iteration: 165631
loss: 0.9923701882362366,grad_norm: 0.9898431854828345, iteration: 165632
loss: 0.9635810852050781,grad_norm: 0.9999991312628086, iteration: 165633
loss: 1.0167834758758545,grad_norm: 0.8858051761298463, iteration: 165634
loss: 1.0367305278778076,grad_norm: 0.9999991917395501, iteration: 165635
loss: 0.9821117520332336,grad_norm: 0.8789082744522083, iteration: 165636
loss: 0.9649059772491455,grad_norm: 0.99999912356991, iteration: 165637
loss: 0.9983343482017517,grad_norm: 0.999999149638302, iteration: 165638
loss: 0.9827004075050354,grad_norm: 0.9914842943992023, iteration: 165639
loss: 0.934506893157959,grad_norm: 0.9999989906123111, iteration: 165640
loss: 1.0113846063613892,grad_norm: 0.941299754850183, iteration: 165641
loss: 1.004522442817688,grad_norm: 0.999999107865623, iteration: 165642
loss: 1.0272241830825806,grad_norm: 0.9999990984608136, iteration: 165643
loss: 1.0440990924835205,grad_norm: 0.9999990776461712, iteration: 165644
loss: 1.0156188011169434,grad_norm: 0.8444735449368588, iteration: 165645
loss: 0.9971885085105896,grad_norm: 0.9999992214721295, iteration: 165646
loss: 1.0220247507095337,grad_norm: 0.8901196302365523, iteration: 165647
loss: 1.0466036796569824,grad_norm: 0.810589328242009, iteration: 165648
loss: 1.0181845426559448,grad_norm: 0.999999118512931, iteration: 165649
loss: 0.9813733100891113,grad_norm: 0.9999992983306445, iteration: 165650
loss: 0.9914095997810364,grad_norm: 0.9183096867524113, iteration: 165651
loss: 0.9973923563957214,grad_norm: 0.999999074660345, iteration: 165652
loss: 0.9802918434143066,grad_norm: 0.8635224438233962, iteration: 165653
loss: 1.0130435228347778,grad_norm: 0.9999990176204266, iteration: 165654
loss: 1.0091997385025024,grad_norm: 0.9771316402070263, iteration: 165655
loss: 0.9528049826622009,grad_norm: 0.9765622612902611, iteration: 165656
loss: 0.9666776657104492,grad_norm: 0.9856455541671537, iteration: 165657
loss: 1.006523609161377,grad_norm: 0.8556149749712965, iteration: 165658
loss: 1.0061496496200562,grad_norm: 0.9449118072719864, iteration: 165659
loss: 1.038301706314087,grad_norm: 0.9685149086008783, iteration: 165660
loss: 0.9979060292243958,grad_norm: 0.8460362902859029, iteration: 165661
loss: 0.9842848181724548,grad_norm: 0.9238806883829614, iteration: 165662
loss: 0.9919303059577942,grad_norm: 0.9999991827036795, iteration: 165663
loss: 0.9951856732368469,grad_norm: 0.9999992016798769, iteration: 165664
loss: 1.0289864540100098,grad_norm: 0.9999993068977314, iteration: 165665
loss: 1.0148625373840332,grad_norm: 0.9999991786817616, iteration: 165666
loss: 1.0082165002822876,grad_norm: 0.9999990271041053, iteration: 165667
loss: 0.9917100071907043,grad_norm: 0.8766901044887035, iteration: 165668
loss: 1.0261765718460083,grad_norm: 0.9868920343032331, iteration: 165669
loss: 0.9815541505813599,grad_norm: 0.8096740140328264, iteration: 165670
loss: 1.0108203887939453,grad_norm: 0.9999991283846459, iteration: 165671
loss: 0.9897766709327698,grad_norm: 0.9999991833704712, iteration: 165672
loss: 0.9734440445899963,grad_norm: 0.8787334934307245, iteration: 165673
loss: 1.0116957426071167,grad_norm: 0.9098165185404173, iteration: 165674
loss: 1.087672472000122,grad_norm: 0.9999990964574653, iteration: 165675
loss: 0.9880624413490295,grad_norm: 0.8780492350773609, iteration: 165676
loss: 0.9709610342979431,grad_norm: 0.9999991024002646, iteration: 165677
loss: 1.015485167503357,grad_norm: 0.9999990818170951, iteration: 165678
loss: 1.0049062967300415,grad_norm: 0.9999994675244911, iteration: 165679
loss: 0.9889687895774841,grad_norm: 0.9999990367129967, iteration: 165680
loss: 1.0263184309005737,grad_norm: 0.9394792790045962, iteration: 165681
loss: 1.0277289152145386,grad_norm: 0.999999086800611, iteration: 165682
loss: 1.0634171962738037,grad_norm: 0.9999991529595024, iteration: 165683
loss: 0.9758875370025635,grad_norm: 0.8027062242786777, iteration: 165684
loss: 0.9986234903335571,grad_norm: 0.8460474200060073, iteration: 165685
loss: 0.996433675289154,grad_norm: 0.999999018226713, iteration: 165686
loss: 0.955396831035614,grad_norm: 0.9469033093270685, iteration: 165687
loss: 0.9835363626480103,grad_norm: 0.9999989815349212, iteration: 165688
loss: 0.999802827835083,grad_norm: 0.8408012453811269, iteration: 165689
loss: 1.023668885231018,grad_norm: 0.9999990690195341, iteration: 165690
loss: 0.9966958165168762,grad_norm: 0.8419321474306156, iteration: 165691
loss: 0.9807235598564148,grad_norm: 0.9999990557939867, iteration: 165692
loss: 0.9789483547210693,grad_norm: 0.9699750594734018, iteration: 165693
loss: 1.0397076606750488,grad_norm: 0.9999991245505776, iteration: 165694
loss: 0.9672621488571167,grad_norm: 0.9999991553751416, iteration: 165695
loss: 1.0134035348892212,grad_norm: 0.9818489058810294, iteration: 165696
loss: 0.9690095782279968,grad_norm: 0.8882619767557468, iteration: 165697
loss: 1.004150390625,grad_norm: 0.9660421467791808, iteration: 165698
loss: 0.9938139915466309,grad_norm: 0.8997714688087647, iteration: 165699
loss: 1.0052250623703003,grad_norm: 0.9834813260493329, iteration: 165700
loss: 1.0021648406982422,grad_norm: 0.9999990745087454, iteration: 165701
loss: 0.9834181666374207,grad_norm: 0.9999991333334174, iteration: 165702
loss: 1.0036630630493164,grad_norm: 0.9999991171693914, iteration: 165703
loss: 0.9714257121086121,grad_norm: 0.9119843527160453, iteration: 165704
loss: 1.013419508934021,grad_norm: 0.999999176578841, iteration: 165705
loss: 1.007850170135498,grad_norm: 0.9999992296275216, iteration: 165706
loss: 1.004029631614685,grad_norm: 0.9999990337059886, iteration: 165707
loss: 1.0120927095413208,grad_norm: 0.8770695470506814, iteration: 165708
loss: 1.0471011400222778,grad_norm: 0.9999996311352567, iteration: 165709
loss: 0.9821495413780212,grad_norm: 0.9943845503009268, iteration: 165710
loss: 1.0319880247116089,grad_norm: 0.999999714987857, iteration: 165711
loss: 0.9894534945487976,grad_norm: 0.940554680967301, iteration: 165712
loss: 1.0270166397094727,grad_norm: 0.9837279583749996, iteration: 165713
loss: 1.005171537399292,grad_norm: 0.9999990317379476, iteration: 165714
loss: 1.0211552381515503,grad_norm: 0.9365370353043968, iteration: 165715
loss: 0.9870518445968628,grad_norm: 0.9999991296708479, iteration: 165716
loss: 1.0118552446365356,grad_norm: 0.9999990091970897, iteration: 165717
loss: 0.9872608780860901,grad_norm: 0.9999992481536184, iteration: 165718
loss: 1.0256491899490356,grad_norm: 0.8099243085957876, iteration: 165719
loss: 1.0059436559677124,grad_norm: 0.9999994376082418, iteration: 165720
loss: 1.0033315420150757,grad_norm: 0.8367523548555842, iteration: 165721
loss: 0.9865989089012146,grad_norm: 0.9999990378091873, iteration: 165722
loss: 0.9920285940170288,grad_norm: 0.8247722986438842, iteration: 165723
loss: 0.9929817318916321,grad_norm: 0.90300173861045, iteration: 165724
loss: 1.028340458869934,grad_norm: 0.9999998992414543, iteration: 165725
loss: 1.0004686117172241,grad_norm: 0.9016607758187929, iteration: 165726
loss: 1.0193533897399902,grad_norm: 0.9999991346707674, iteration: 165727
loss: 0.9738965034484863,grad_norm: 0.9999990795596849, iteration: 165728
loss: 0.9848686456680298,grad_norm: 0.9183987704316585, iteration: 165729
loss: 0.973380982875824,grad_norm: 0.9401630653009464, iteration: 165730
loss: 1.0081478357315063,grad_norm: 0.9100263786614257, iteration: 165731
loss: 1.0128014087677002,grad_norm: 0.9890697830051799, iteration: 165732
loss: 1.0437558889389038,grad_norm: 0.9407862501940655, iteration: 165733
loss: 0.9629815220832825,grad_norm: 0.9498820144215622, iteration: 165734
loss: 0.9730620980262756,grad_norm: 0.8926515273446857, iteration: 165735
loss: 1.032947301864624,grad_norm: 0.9775388798344002, iteration: 165736
loss: 0.996272087097168,grad_norm: 0.9991939678471321, iteration: 165737
loss: 1.0278854370117188,grad_norm: 0.760375395295378, iteration: 165738
loss: 0.9837085008621216,grad_norm: 0.9999996249376577, iteration: 165739
loss: 1.0069425106048584,grad_norm: 0.9002623310859692, iteration: 165740
loss: 0.9435341358184814,grad_norm: 0.9999992028432059, iteration: 165741
loss: 1.0093202590942383,grad_norm: 0.9999991404909796, iteration: 165742
loss: 0.9480504989624023,grad_norm: 0.9945708433148636, iteration: 165743
loss: 0.9788661003112793,grad_norm: 0.9722874146591713, iteration: 165744
loss: 0.99654620885849,grad_norm: 0.8515491377794097, iteration: 165745
loss: 0.9900224208831787,grad_norm: 0.8992403841058434, iteration: 165746
loss: 0.9789925217628479,grad_norm: 0.8961014719406706, iteration: 165747
loss: 1.0614426136016846,grad_norm: 0.9999993695919475, iteration: 165748
loss: 1.0080435276031494,grad_norm: 0.9910036935643919, iteration: 165749
loss: 0.9907752871513367,grad_norm: 0.8614778311209949, iteration: 165750
loss: 1.0066168308258057,grad_norm: 0.9999991261529477, iteration: 165751
loss: 1.2294223308563232,grad_norm: 0.9999997625287682, iteration: 165752
loss: 0.9882892370223999,grad_norm: 0.8838986423756205, iteration: 165753
loss: 1.0282561779022217,grad_norm: 0.9999991802555614, iteration: 165754
loss: 0.9773815870285034,grad_norm: 0.9288279368333252, iteration: 165755
loss: 1.011289358139038,grad_norm: 0.999999034447941, iteration: 165756
loss: 0.9738579988479614,grad_norm: 0.9999991088772845, iteration: 165757
loss: 0.9781469702720642,grad_norm: 0.881319617988436, iteration: 165758
loss: 0.994726300239563,grad_norm: 0.7953649511114254, iteration: 165759
loss: 1.1622594594955444,grad_norm: 0.9999998775812696, iteration: 165760
loss: 0.978155255317688,grad_norm: 0.9999990534914867, iteration: 165761
loss: 1.7092925310134888,grad_norm: 0.9999999079013873, iteration: 165762
loss: 1.1248780488967896,grad_norm: 0.9999999419878416, iteration: 165763
loss: 1.646202802658081,grad_norm: 0.999999897978557, iteration: 165764
loss: 1.0183197259902954,grad_norm: 0.9167831690005706, iteration: 165765
loss: 0.9839504361152649,grad_norm: 0.9473957791920955, iteration: 165766
loss: 0.9849187731742859,grad_norm: 0.8055642579543691, iteration: 165767
loss: 1.0061860084533691,grad_norm: 0.9999991441345719, iteration: 165768
loss: 0.9550943970680237,grad_norm: 0.993734582270991, iteration: 165769
loss: 0.9770634174346924,grad_norm: 0.999999046668548, iteration: 165770
loss: 1.017257571220398,grad_norm: 0.9999993619353422, iteration: 165771
loss: 0.9920300841331482,grad_norm: 0.9999992540654137, iteration: 165772
loss: 1.036902666091919,grad_norm: 0.9999995009105643, iteration: 165773
loss: 1.0211129188537598,grad_norm: 0.9999993343362928, iteration: 165774
loss: 1.0197560787200928,grad_norm: 0.9999992781318678, iteration: 165775
loss: 0.9829041957855225,grad_norm: 0.9820366785452586, iteration: 165776
loss: 0.9934465289115906,grad_norm: 0.9113815900657689, iteration: 165777
loss: 1.007513403892517,grad_norm: 0.9999992024141865, iteration: 165778
loss: 0.9952974915504456,grad_norm: 0.9593757005272622, iteration: 165779
loss: 1.0332173109054565,grad_norm: 0.9999992409250709, iteration: 165780
loss: 0.9900878071784973,grad_norm: 0.8589823705432506, iteration: 165781
loss: 1.0032343864440918,grad_norm: 0.9999991442150421, iteration: 165782
loss: 1.0342870950698853,grad_norm: 0.9999992845895539, iteration: 165783
loss: 0.9932824373245239,grad_norm: 0.987503561405599, iteration: 165784
loss: 1.0218172073364258,grad_norm: 0.8173287392534885, iteration: 165785
loss: 0.9700983166694641,grad_norm: 0.8784697851811475, iteration: 165786
loss: 1.0317566394805908,grad_norm: 0.9672268898846524, iteration: 165787
loss: 1.006142258644104,grad_norm: 0.9368111948847058, iteration: 165788
loss: 0.9967105984687805,grad_norm: 0.9102077884168377, iteration: 165789
loss: 1.0161960124969482,grad_norm: 0.9472731383338258, iteration: 165790
loss: 1.0477021932601929,grad_norm: 0.9999993130212045, iteration: 165791
loss: 0.9964925646781921,grad_norm: 0.9999991592140952, iteration: 165792
loss: 1.0312608480453491,grad_norm: 0.9999995895386627, iteration: 165793
loss: 1.0022579431533813,grad_norm: 0.9021386651365637, iteration: 165794
loss: 1.044566035270691,grad_norm: 0.8689188269711411, iteration: 165795
loss: 1.0152024030685425,grad_norm: 0.9999991788009688, iteration: 165796
loss: 1.0278091430664062,grad_norm: 0.9999999358333246, iteration: 165797
loss: 1.0190273523330688,grad_norm: 0.9999992061151731, iteration: 165798
loss: 1.011376976966858,grad_norm: 0.9676344352756966, iteration: 165799
loss: 0.9761738181114197,grad_norm: 0.999998998052198, iteration: 165800
loss: 0.9789712429046631,grad_norm: 0.9999991868145011, iteration: 165801
loss: 1.0277180671691895,grad_norm: 0.8990928383576635, iteration: 165802
loss: 0.9810629487037659,grad_norm: 0.9999993676475107, iteration: 165803
loss: 1.0163607597351074,grad_norm: 0.9659957862434505, iteration: 165804
loss: 0.9866839647293091,grad_norm: 0.9824145231224569, iteration: 165805
loss: 0.9745785593986511,grad_norm: 0.9999992690808098, iteration: 165806
loss: 1.016486644744873,grad_norm: 0.9839482515680137, iteration: 165807
loss: 1.0081329345703125,grad_norm: 0.8383959655400307, iteration: 165808
loss: 1.0234805345535278,grad_norm: 0.9999991987952606, iteration: 165809
loss: 0.9644022583961487,grad_norm: 0.925562311086112, iteration: 165810
loss: 0.9917076230049133,grad_norm: 0.9999994306942954, iteration: 165811
loss: 0.9824752807617188,grad_norm: 0.8861703266220222, iteration: 165812
loss: 1.0140267610549927,grad_norm: 0.9999996312989795, iteration: 165813
loss: 0.953925371170044,grad_norm: 0.878446772361185, iteration: 165814
loss: 1.0081127882003784,grad_norm: 0.8782465499235338, iteration: 165815
loss: 1.024499773979187,grad_norm: 0.9999991656250045, iteration: 165816
loss: 1.0134096145629883,grad_norm: 0.8277818840123305, iteration: 165817
loss: 1.0198460817337036,grad_norm: 0.8745498931601645, iteration: 165818
loss: 0.9805591106414795,grad_norm: 0.9999990541296025, iteration: 165819
loss: 1.0457549095153809,grad_norm: 0.9999990492514852, iteration: 165820
loss: 0.9833359718322754,grad_norm: 0.9710514575144079, iteration: 165821
loss: 1.0131112337112427,grad_norm: 0.83639246998569, iteration: 165822
loss: 1.0415141582489014,grad_norm: 0.9999992118036113, iteration: 165823
loss: 0.9823522567749023,grad_norm: 0.9999991935582839, iteration: 165824
loss: 0.9980712532997131,grad_norm: 0.999999807766727, iteration: 165825
loss: 0.9985098242759705,grad_norm: 0.9722830643563338, iteration: 165826
loss: 0.9959776401519775,grad_norm: 0.8939076408420679, iteration: 165827
loss: 1.004644513130188,grad_norm: 0.9999990465593253, iteration: 165828
loss: 1.0122275352478027,grad_norm: 0.9999991770671607, iteration: 165829
loss: 1.0257309675216675,grad_norm: 0.999999810897821, iteration: 165830
loss: 0.9916947484016418,grad_norm: 0.8285224639029467, iteration: 165831
loss: 0.9996812343597412,grad_norm: 0.8637674636638574, iteration: 165832
loss: 0.9827186465263367,grad_norm: 0.8710884050734252, iteration: 165833
loss: 0.9803450107574463,grad_norm: 0.9623026474548831, iteration: 165834
loss: 1.0312895774841309,grad_norm: 0.9999992218844833, iteration: 165835
loss: 0.9883244037628174,grad_norm: 0.9408704842428984, iteration: 165836
loss: 0.9922316074371338,grad_norm: 0.8968857630927323, iteration: 165837
loss: 1.0245527029037476,grad_norm: 0.9999990119017274, iteration: 165838
loss: 1.0322920083999634,grad_norm: 0.9147535966398775, iteration: 165839
loss: 0.9993730783462524,grad_norm: 0.9782398874458322, iteration: 165840
loss: 0.9796391725540161,grad_norm: 0.9829271515309109, iteration: 165841
loss: 1.0314050912857056,grad_norm: 0.9582940945534539, iteration: 165842
loss: 1.0285983085632324,grad_norm: 0.93317645737452, iteration: 165843
loss: 1.0425814390182495,grad_norm: 0.9999996099913113, iteration: 165844
loss: 0.9757251143455505,grad_norm: 0.9999991543588314, iteration: 165845
loss: 0.9883310794830322,grad_norm: 0.8983896785211929, iteration: 165846
loss: 1.0059919357299805,grad_norm: 0.9999993907882524, iteration: 165847
loss: 1.0669336318969727,grad_norm: 0.9999997442023159, iteration: 165848
loss: 1.0115013122558594,grad_norm: 0.9999990480014843, iteration: 165849
loss: 0.9973298907279968,grad_norm: 0.8297981903687143, iteration: 165850
loss: 0.9828869104385376,grad_norm: 0.8946050432584418, iteration: 165851
loss: 0.9841161966323853,grad_norm: 0.940641079622149, iteration: 165852
loss: 0.995719850063324,grad_norm: 0.9999989616206959, iteration: 165853
loss: 1.0211000442504883,grad_norm: 0.9999990940664818, iteration: 165854
loss: 1.0329270362854004,grad_norm: 0.9999994817942335, iteration: 165855
loss: 1.010319709777832,grad_norm: 0.9999998679209102, iteration: 165856
loss: 1.027047038078308,grad_norm: 0.9999991675657003, iteration: 165857
loss: 0.9826279282569885,grad_norm: 0.9999992316824471, iteration: 165858
loss: 1.0230556726455688,grad_norm: 0.9999996019676536, iteration: 165859
loss: 1.0536226034164429,grad_norm: 0.8800064869617524, iteration: 165860
loss: 0.9700501561164856,grad_norm: 0.9999991808126663, iteration: 165861
loss: 1.0416874885559082,grad_norm: 0.8141362556878021, iteration: 165862
loss: 1.029618263244629,grad_norm: 0.9372869166297706, iteration: 165863
loss: 1.0138370990753174,grad_norm: 0.9999990903398681, iteration: 165864
loss: 0.9692609906196594,grad_norm: 0.9999991255518583, iteration: 165865
loss: 1.0149434804916382,grad_norm: 0.9999991282318702, iteration: 165866
loss: 1.0234726667404175,grad_norm: 0.9999990298314998, iteration: 165867
loss: 1.0239019393920898,grad_norm: 0.9999992968230811, iteration: 165868
loss: 0.9853866696357727,grad_norm: 0.8704787113534918, iteration: 165869
loss: 1.0274749994277954,grad_norm: 0.9999992113646149, iteration: 165870
loss: 1.084439754486084,grad_norm: 0.9999993899802726, iteration: 165871
loss: 1.0175869464874268,grad_norm: 0.9999990977179595, iteration: 165872
loss: 1.010719895362854,grad_norm: 0.9639297582950301, iteration: 165873
loss: 1.0082204341888428,grad_norm: 0.8889758459674081, iteration: 165874
loss: 1.0769872665405273,grad_norm: 0.9999991039021388, iteration: 165875
loss: 1.00883948802948,grad_norm: 0.9999992661028477, iteration: 165876
loss: 1.0953179597854614,grad_norm: 0.9999992533467744, iteration: 165877
loss: 1.0681723356246948,grad_norm: 0.9999991758227824, iteration: 165878
loss: 0.9948843717575073,grad_norm: 0.9868780041653001, iteration: 165879
loss: 0.98982834815979,grad_norm: 0.9978814938424355, iteration: 165880
loss: 0.9945693612098694,grad_norm: 0.9999991042895192, iteration: 165881
loss: 1.0152064561843872,grad_norm: 0.9999991656622759, iteration: 165882
loss: 0.9992287158966064,grad_norm: 0.8305780130746703, iteration: 165883
loss: 1.0065088272094727,grad_norm: 0.9999989957066054, iteration: 165884
loss: 0.987664520740509,grad_norm: 0.9104456926965213, iteration: 165885
loss: 0.9896409511566162,grad_norm: 0.9999991877514721, iteration: 165886
loss: 0.9990249276161194,grad_norm: 0.9999991637242958, iteration: 165887
loss: 0.9936025142669678,grad_norm: 0.9999990202294439, iteration: 165888
loss: 1.0210065841674805,grad_norm: 0.8845265402269967, iteration: 165889
loss: 0.9969509243965149,grad_norm: 0.9520582917434219, iteration: 165890
loss: 1.0057430267333984,grad_norm: 0.9356147220931981, iteration: 165891
loss: 1.0054960250854492,grad_norm: 0.9077013391741221, iteration: 165892
loss: 0.9649431705474854,grad_norm: 0.8699225723916363, iteration: 165893
loss: 0.997799813747406,grad_norm: 0.999999125878151, iteration: 165894
loss: 1.0047610998153687,grad_norm: 0.9999991367696922, iteration: 165895
loss: 0.9839311242103577,grad_norm: 0.9999990380826772, iteration: 165896
loss: 1.0168633460998535,grad_norm: 0.9999991548847185, iteration: 165897
loss: 0.9957484006881714,grad_norm: 0.9217979945261484, iteration: 165898
loss: 0.9682318568229675,grad_norm: 0.9154509661206464, iteration: 165899
loss: 1.0113445520401,grad_norm: 0.9999992187150155, iteration: 165900
loss: 0.9886720776557922,grad_norm: 0.9664015266483956, iteration: 165901
loss: 0.9740462303161621,grad_norm: 0.9837499663984616, iteration: 165902
loss: 1.1597247123718262,grad_norm: 0.9999999989304522, iteration: 165903
loss: 0.9722882509231567,grad_norm: 0.906037293234309, iteration: 165904
loss: 0.9728484153747559,grad_norm: 0.9999991035361903, iteration: 165905
loss: 1.010731816291809,grad_norm: 0.9251594970789174, iteration: 165906
loss: 0.9854024648666382,grad_norm: 0.9999991118712878, iteration: 165907
loss: 1.3199414014816284,grad_norm: 0.9999999012502407, iteration: 165908
loss: 1.0894203186035156,grad_norm: 0.9999994310149324, iteration: 165909
loss: 1.0097525119781494,grad_norm: 0.8659051913079676, iteration: 165910
loss: 0.9915851354598999,grad_norm: 0.89315837080908, iteration: 165911
loss: 1.0079216957092285,grad_norm: 0.9999992294221871, iteration: 165912
loss: 1.001779317855835,grad_norm: 0.8996043587392749, iteration: 165913
loss: 0.9760552048683167,grad_norm: 0.9999990586999067, iteration: 165914
loss: 1.0068968534469604,grad_norm: 0.9044779455663468, iteration: 165915
loss: 1.0562348365783691,grad_norm: 0.999999163293662, iteration: 165916
loss: 0.9934733510017395,grad_norm: 0.9646446033250986, iteration: 165917
loss: 0.9896073341369629,grad_norm: 0.9365335158525524, iteration: 165918
loss: 0.9831430912017822,grad_norm: 0.9193816380616794, iteration: 165919
loss: 0.9993937611579895,grad_norm: 0.9999991765637005, iteration: 165920
loss: 0.9927802085876465,grad_norm: 0.9202425755685104, iteration: 165921
loss: 0.9869613647460938,grad_norm: 0.963904263698966, iteration: 165922
loss: 1.010844349861145,grad_norm: 0.9999992162896288, iteration: 165923
loss: 1.0357844829559326,grad_norm: 0.9999990624479547, iteration: 165924
loss: 1.0112208127975464,grad_norm: 0.9999999002885307, iteration: 165925
loss: 1.0354379415512085,grad_norm: 0.9999990773626005, iteration: 165926
loss: 0.9711304306983948,grad_norm: 0.8829378055330224, iteration: 165927
loss: 0.9784040451049805,grad_norm: 0.9235521953001056, iteration: 165928
loss: 1.0039165019989014,grad_norm: 0.999999267276368, iteration: 165929
loss: 0.9909394979476929,grad_norm: 0.9999992964517217, iteration: 165930
loss: 0.9557515382766724,grad_norm: 0.9999991285781813, iteration: 165931
loss: 1.0325576066970825,grad_norm: 0.9780610804181417, iteration: 165932
loss: 0.9928591251373291,grad_norm: 0.9665623285073139, iteration: 165933
loss: 1.0186023712158203,grad_norm: 0.9478635332070837, iteration: 165934
loss: 1.0255718231201172,grad_norm: 0.9054930848177348, iteration: 165935
loss: 1.0511406660079956,grad_norm: 0.9999992686440234, iteration: 165936
loss: 1.0091962814331055,grad_norm: 0.9999990595836089, iteration: 165937
loss: 1.0366194248199463,grad_norm: 0.9999990738362138, iteration: 165938
loss: 1.0200403928756714,grad_norm: 0.8656687900978595, iteration: 165939
loss: 0.9784613847732544,grad_norm: 0.9766359567933425, iteration: 165940
loss: 1.001352071762085,grad_norm: 0.9331088927457073, iteration: 165941
loss: 1.0217803716659546,grad_norm: 0.9769323042334026, iteration: 165942
loss: 1.0075453519821167,grad_norm: 0.9964669087957307, iteration: 165943
loss: 1.020105004310608,grad_norm: 0.9999991233208447, iteration: 165944
loss: 1.0931744575500488,grad_norm: 0.9999997455669545, iteration: 165945
loss: 0.9722579121589661,grad_norm: 0.9702196186190432, iteration: 165946
loss: 0.9679882526397705,grad_norm: 0.9893018969741973, iteration: 165947
loss: 1.0201860666275024,grad_norm: 0.9846085837979157, iteration: 165948
loss: 1.0259783267974854,grad_norm: 0.9999990546512855, iteration: 165949
loss: 0.9795832633972168,grad_norm: 0.9999991818882634, iteration: 165950
loss: 1.0002919435501099,grad_norm: 0.9999997874901645, iteration: 165951
loss: 1.0122430324554443,grad_norm: 0.999999272365431, iteration: 165952
loss: 1.014461636543274,grad_norm: 0.8804163297166564, iteration: 165953
loss: 1.0486770868301392,grad_norm: 0.9999991781362151, iteration: 165954
loss: 1.0334153175354004,grad_norm: 0.9999997035486958, iteration: 165955
loss: 0.9754683375358582,grad_norm: 0.9999990351395434, iteration: 165956
loss: 1.016705870628357,grad_norm: 0.9999991065591484, iteration: 165957
loss: 1.0427511930465698,grad_norm: 0.9999992460442895, iteration: 165958
loss: 0.9929770827293396,grad_norm: 0.9625400815090782, iteration: 165959
loss: 0.9947381019592285,grad_norm: 0.9999991666305177, iteration: 165960
loss: 1.0016783475875854,grad_norm: 0.999999196715777, iteration: 165961
loss: 1.0102369785308838,grad_norm: 0.9999991414369148, iteration: 165962
loss: 0.9767589569091797,grad_norm: 0.9999992464925087, iteration: 165963
loss: 1.0247406959533691,grad_norm: 0.9720008469494245, iteration: 165964
loss: 0.9749003648757935,grad_norm: 0.9999991676343762, iteration: 165965
loss: 1.0235975980758667,grad_norm: 0.9999991323081343, iteration: 165966
loss: 0.9870448708534241,grad_norm: 0.9284263354451568, iteration: 165967
loss: 1.051785945892334,grad_norm: 0.867342256813226, iteration: 165968
loss: 0.9572397470474243,grad_norm: 0.9429302938198083, iteration: 165969
loss: 1.0060101747512817,grad_norm: 0.999999120482595, iteration: 165970
loss: 1.007999300956726,grad_norm: 0.9999992864705813, iteration: 165971
loss: 1.026304006576538,grad_norm: 0.9822570285231649, iteration: 165972
loss: 0.9795728325843811,grad_norm: 0.9999996816437842, iteration: 165973
loss: 1.0038928985595703,grad_norm: 0.9999990619007826, iteration: 165974
loss: 0.9636062979698181,grad_norm: 0.8952704178989112, iteration: 165975
loss: 0.9788091778755188,grad_norm: 0.8857108895086365, iteration: 165976
loss: 1.0131614208221436,grad_norm: 0.9999990075812847, iteration: 165977
loss: 0.9897924065589905,grad_norm: 0.9999991305506554, iteration: 165978
loss: 1.1082149744033813,grad_norm: 0.9999992108223608, iteration: 165979
loss: 1.019253134727478,grad_norm: 0.9054087021260797, iteration: 165980
loss: 1.0157684087753296,grad_norm: 0.9999996399571766, iteration: 165981
loss: 0.9938735365867615,grad_norm: 0.9999991068180244, iteration: 165982
loss: 0.9996885657310486,grad_norm: 0.9468588432119698, iteration: 165983
loss: 1.0014293193817139,grad_norm: 0.999999034097684, iteration: 165984
loss: 0.9611878395080566,grad_norm: 0.991612146984613, iteration: 165985
loss: 0.9870131611824036,grad_norm: 0.9450697214087973, iteration: 165986
loss: 0.960155725479126,grad_norm: 0.9464817194308457, iteration: 165987
loss: 1.006872534751892,grad_norm: 0.7978426161294381, iteration: 165988
loss: 0.9778685569763184,grad_norm: 0.8969839082657342, iteration: 165989
loss: 1.0357385873794556,grad_norm: 0.8930367667403805, iteration: 165990
loss: 0.9897643327713013,grad_norm: 0.9118399521103062, iteration: 165991
loss: 0.9946317076683044,grad_norm: 0.9355382770656215, iteration: 165992
loss: 1.0191981792449951,grad_norm: 0.9999990141714562, iteration: 165993
loss: 1.0024083852767944,grad_norm: 0.9999990838501983, iteration: 165994
loss: 1.020198941230774,grad_norm: 0.9999989137930188, iteration: 165995
loss: 0.9777017831802368,grad_norm: 0.9406908255065841, iteration: 165996
loss: 0.9643806219100952,grad_norm: 0.9648260696276237, iteration: 165997
loss: 1.0287278890609741,grad_norm: 0.923681893465598, iteration: 165998
loss: 1.0181828737258911,grad_norm: 0.9999990655201343, iteration: 165999
loss: 0.9963954091072083,grad_norm: 0.9961995545432891, iteration: 166000
loss: 1.0193229913711548,grad_norm: 0.9999992810215681, iteration: 166001
loss: 1.0128157138824463,grad_norm: 0.9999989878111143, iteration: 166002
loss: 1.0129597187042236,grad_norm: 0.9591330089472196, iteration: 166003
loss: 1.012397289276123,grad_norm: 0.8938359230464968, iteration: 166004
loss: 1.0053625106811523,grad_norm: 0.9243994381665281, iteration: 166005
loss: 1.0120936632156372,grad_norm: 0.9999992043937173, iteration: 166006
loss: 1.0504324436187744,grad_norm: 0.9834993547727486, iteration: 166007
loss: 0.9945744872093201,grad_norm: 0.9999990583253318, iteration: 166008
loss: 1.0153943300247192,grad_norm: 0.9157296829511468, iteration: 166009
loss: 1.0105911493301392,grad_norm: 0.999062470386641, iteration: 166010
loss: 0.9831234812736511,grad_norm: 0.9014407071132615, iteration: 166011
loss: 0.9875216484069824,grad_norm: 0.8914510007972472, iteration: 166012
loss: 1.0424766540527344,grad_norm: 0.9561808614924313, iteration: 166013
loss: 1.0278570652008057,grad_norm: 0.8825426045224907, iteration: 166014
loss: 1.0248528718948364,grad_norm: 0.9999991508686752, iteration: 166015
loss: 0.9690392017364502,grad_norm: 0.9999997263623086, iteration: 166016
loss: 1.01326584815979,grad_norm: 0.9999998752546552, iteration: 166017
loss: 1.0035789012908936,grad_norm: 0.9999992411515505, iteration: 166018
loss: 0.9787667989730835,grad_norm: 0.7774989774097225, iteration: 166019
loss: 0.9980652928352356,grad_norm: 0.9999990324511153, iteration: 166020
loss: 0.9725035429000854,grad_norm: 0.9999990913298684, iteration: 166021
loss: 1.0114576816558838,grad_norm: 0.9999997257723079, iteration: 166022
loss: 1.0252615213394165,grad_norm: 0.9999990806421738, iteration: 166023
loss: 0.9863383769989014,grad_norm: 0.999999143099977, iteration: 166024
loss: 1.0081508159637451,grad_norm: 0.9999990985229652, iteration: 166025
loss: 1.0440618991851807,grad_norm: 0.9999995623210701, iteration: 166026
loss: 1.1349220275878906,grad_norm: 0.9999997314877455, iteration: 166027
loss: 1.0053610801696777,grad_norm: 0.9999991701455201, iteration: 166028
loss: 1.0391974449157715,grad_norm: 0.9999993258544339, iteration: 166029
loss: 1.0128730535507202,grad_norm: 0.9755908873416903, iteration: 166030
loss: 1.025489091873169,grad_norm: 0.9999990053881386, iteration: 166031
loss: 1.025145173072815,grad_norm: 0.9999993013145069, iteration: 166032
loss: 0.9854328632354736,grad_norm: 0.8145636597601168, iteration: 166033
loss: 0.9826387763023376,grad_norm: 0.8920654526565821, iteration: 166034
loss: 1.0188984870910645,grad_norm: 0.9773411345932779, iteration: 166035
loss: 1.001462697982788,grad_norm: 0.999999129295533, iteration: 166036
loss: 0.9963714480400085,grad_norm: 0.9894160113529882, iteration: 166037
loss: 1.0069059133529663,grad_norm: 0.8090922507995991, iteration: 166038
loss: 0.9563100934028625,grad_norm: 0.9544130949650976, iteration: 166039
loss: 0.9659392833709717,grad_norm: 0.9999991309695317, iteration: 166040
loss: 0.9767010807991028,grad_norm: 0.9227854246508084, iteration: 166041
loss: 1.0123144388198853,grad_norm: 0.9999989261407325, iteration: 166042
loss: 0.9980917572975159,grad_norm: 0.9068514808795947, iteration: 166043
loss: 1.010663628578186,grad_norm: 0.9999997404012967, iteration: 166044
loss: 0.989740788936615,grad_norm: 0.9614398578349678, iteration: 166045
loss: 1.0203304290771484,grad_norm: 0.9818271848963175, iteration: 166046
loss: 0.9970166087150574,grad_norm: 0.913363636947624, iteration: 166047
loss: 1.0263090133666992,grad_norm: 0.9999993896889191, iteration: 166048
loss: 1.0165200233459473,grad_norm: 0.9999991094975077, iteration: 166049
loss: 0.9984243512153625,grad_norm: 0.9999991303504685, iteration: 166050
loss: 0.9515352249145508,grad_norm: 0.9761140930005963, iteration: 166051
loss: 1.0129969120025635,grad_norm: 0.9999989020526977, iteration: 166052
loss: 1.028459906578064,grad_norm: 0.8797375068812112, iteration: 166053
loss: 1.0288915634155273,grad_norm: 0.9999994458099382, iteration: 166054
loss: 1.0243631601333618,grad_norm: 0.9999992794044973, iteration: 166055
loss: 0.9943500757217407,grad_norm: 0.9999991226979237, iteration: 166056
loss: 1.008835792541504,grad_norm: 0.9259769125091609, iteration: 166057
loss: 1.0322374105453491,grad_norm: 0.9745823846579373, iteration: 166058
loss: 0.9912115335464478,grad_norm: 0.999998972387859, iteration: 166059
loss: 0.983485996723175,grad_norm: 0.8614215615537613, iteration: 166060
loss: 1.0088599920272827,grad_norm: 0.8021625063006586, iteration: 166061
loss: 0.9685443639755249,grad_norm: 0.9619542924451608, iteration: 166062
loss: 0.995453953742981,grad_norm: 0.9999992419044567, iteration: 166063
loss: 0.9990808367729187,grad_norm: 0.9052794231511628, iteration: 166064
loss: 1.0025793313980103,grad_norm: 0.9999992504719284, iteration: 166065
loss: 1.0325971841812134,grad_norm: 0.9541844507505584, iteration: 166066
loss: 0.9782047867774963,grad_norm: 0.999999088118148, iteration: 166067
loss: 0.9832651019096375,grad_norm: 0.9999991292734284, iteration: 166068
loss: 1.021051287651062,grad_norm: 0.9999994560003828, iteration: 166069
loss: 1.0193294286727905,grad_norm: 0.9182304780823429, iteration: 166070
loss: 0.9835013151168823,grad_norm: 0.9999995206590422, iteration: 166071
loss: 1.0261282920837402,grad_norm: 0.9999991447945352, iteration: 166072
loss: 1.0035226345062256,grad_norm: 0.9064734206225625, iteration: 166073
loss: 1.0161820650100708,grad_norm: 0.9604028455196647, iteration: 166074
loss: 1.0063952207565308,grad_norm: 0.9999992081183838, iteration: 166075
loss: 1.0890735387802124,grad_norm: 0.9999995320408204, iteration: 166076
loss: 0.997194766998291,grad_norm: 0.9999989931657253, iteration: 166077
loss: 0.9842311143875122,grad_norm: 0.8813967401088321, iteration: 166078
loss: 0.976874053478241,grad_norm: 0.999999045370658, iteration: 166079
loss: 1.003943920135498,grad_norm: 0.9030452918063371, iteration: 166080
loss: 1.014153003692627,grad_norm: 0.9889795761208301, iteration: 166081
loss: 1.0183510780334473,grad_norm: 0.9999992127577922, iteration: 166082
loss: 1.0253983736038208,grad_norm: 0.9086336132671966, iteration: 166083
loss: 1.0495250225067139,grad_norm: 0.9999997328937968, iteration: 166084
loss: 0.981926441192627,grad_norm: 0.9757586194324442, iteration: 166085
loss: 1.0370244979858398,grad_norm: 0.9999991925519729, iteration: 166086
loss: 1.0275996923446655,grad_norm: 0.9999990897663635, iteration: 166087
loss: 1.0189303159713745,grad_norm: 0.8664857556275728, iteration: 166088
loss: 0.9494591355323792,grad_norm: 0.9999990776053062, iteration: 166089
loss: 0.9933364391326904,grad_norm: 0.9856654650882227, iteration: 166090
loss: 0.9976208209991455,grad_norm: 0.8247913202077777, iteration: 166091
loss: 0.9916004538536072,grad_norm: 0.9400464279966796, iteration: 166092
loss: 1.031653642654419,grad_norm: 0.9679726559673975, iteration: 166093
loss: 0.9740366339683533,grad_norm: 0.9496401530672374, iteration: 166094
loss: 0.9961847066879272,grad_norm: 0.985622124657401, iteration: 166095
loss: 1.0039366483688354,grad_norm: 0.999999218863139, iteration: 166096
loss: 1.006149172782898,grad_norm: 0.9257477739764141, iteration: 166097
loss: 0.999819278717041,grad_norm: 0.8630132542669342, iteration: 166098
loss: 0.9921324849128723,grad_norm: 0.9999989857497472, iteration: 166099
loss: 0.9766003489494324,grad_norm: 0.9999992175378263, iteration: 166100
loss: 1.0113905668258667,grad_norm: 0.8248854244320235, iteration: 166101
loss: 1.0317821502685547,grad_norm: 0.9999991452799832, iteration: 166102
loss: 0.9962792992591858,grad_norm: 0.999998983398294, iteration: 166103
loss: 1.017869472503662,grad_norm: 0.9999993526070721, iteration: 166104
loss: 1.0592442750930786,grad_norm: 0.999999130999778, iteration: 166105
loss: 1.024178385734558,grad_norm: 0.941115558183448, iteration: 166106
loss: 1.0021165609359741,grad_norm: 0.9594075978075777, iteration: 166107
loss: 0.9734148383140564,grad_norm: 0.9999991237380236, iteration: 166108
loss: 0.988785445690155,grad_norm: 0.9999989215989984, iteration: 166109
loss: 0.9912706613540649,grad_norm: 0.844271555128533, iteration: 166110
loss: 1.0127378702163696,grad_norm: 0.9999993604954769, iteration: 166111
loss: 1.038840651512146,grad_norm: 0.9999994821817313, iteration: 166112
loss: 1.0260878801345825,grad_norm: 0.9161319416194084, iteration: 166113
loss: 1.0750436782836914,grad_norm: 0.9999993500345594, iteration: 166114
loss: 0.9863153100013733,grad_norm: 0.8906806867817791, iteration: 166115
loss: 0.9972057938575745,grad_norm: 0.9999991416270048, iteration: 166116
loss: 1.0189603567123413,grad_norm: 0.8461532047557687, iteration: 166117
loss: 1.0177381038665771,grad_norm: 0.9999991326279283, iteration: 166118
loss: 0.990956723690033,grad_norm: 0.7857452219425451, iteration: 166119
loss: 0.9792605638504028,grad_norm: 0.9205230669585569, iteration: 166120
loss: 0.9820875525474548,grad_norm: 0.9828679221401829, iteration: 166121
loss: 1.0099735260009766,grad_norm: 0.9337443749443429, iteration: 166122
loss: 1.0049902200698853,grad_norm: 0.9999992731340429, iteration: 166123
loss: 0.991038978099823,grad_norm: 0.9999990791752053, iteration: 166124
loss: 0.9915357232093811,grad_norm: 0.956339706532826, iteration: 166125
loss: 0.9627363681793213,grad_norm: 0.9999990968829274, iteration: 166126
loss: 0.9881975650787354,grad_norm: 0.9683707949212468, iteration: 166127
loss: 0.9889943599700928,grad_norm: 0.9310873958861576, iteration: 166128
loss: 0.9755218029022217,grad_norm: 0.9999990898743604, iteration: 166129
loss: 0.9719269275665283,grad_norm: 0.9999990329603545, iteration: 166130
loss: 1.011078953742981,grad_norm: 0.8245967028488248, iteration: 166131
loss: 0.9730375409126282,grad_norm: 0.9999990532493763, iteration: 166132
loss: 1.0111645460128784,grad_norm: 0.9999996626466684, iteration: 166133
loss: 0.9780979752540588,grad_norm: 0.9999991607272373, iteration: 166134
loss: 0.9884454011917114,grad_norm: 0.92908844084089, iteration: 166135
loss: 1.1564114093780518,grad_norm: 0.9999991234917771, iteration: 166136
loss: 0.9906412363052368,grad_norm: 0.9147187329342716, iteration: 166137
loss: 0.9986976385116577,grad_norm: 0.9999991224746441, iteration: 166138
loss: 1.0038492679595947,grad_norm: 0.9999991043617663, iteration: 166139
loss: 1.019575834274292,grad_norm: 0.999999117611952, iteration: 166140
loss: 0.998832643032074,grad_norm: 0.9999999080311769, iteration: 166141
loss: 1.085033893585205,grad_norm: 0.9999998533613695, iteration: 166142
loss: 0.9794744849205017,grad_norm: 0.9132673306388343, iteration: 166143
loss: 1.0211511850357056,grad_norm: 0.9999995923414595, iteration: 166144
loss: 1.0160243511199951,grad_norm: 0.8512338871805647, iteration: 166145
loss: 0.9661072492599487,grad_norm: 0.9770206159603012, iteration: 166146
loss: 0.9693565368652344,grad_norm: 0.9999991295499506, iteration: 166147
loss: 1.0027868747711182,grad_norm: 0.9999990782286389, iteration: 166148
loss: 0.9819355607032776,grad_norm: 0.9999990947489094, iteration: 166149
loss: 0.9811139702796936,grad_norm: 0.9771559563894232, iteration: 166150
loss: 1.0484367609024048,grad_norm: 0.99999909720095, iteration: 166151
loss: 0.9870955944061279,grad_norm: 0.8194353709373454, iteration: 166152
loss: 1.0381418466567993,grad_norm: 0.9636881135396883, iteration: 166153
loss: 0.9962165355682373,grad_norm: 0.9999992448556739, iteration: 166154
loss: 0.9825713038444519,grad_norm: 0.9999991124298581, iteration: 166155
loss: 1.0237369537353516,grad_norm: 0.9999990555196437, iteration: 166156
loss: 0.987987220287323,grad_norm: 0.9999990494490768, iteration: 166157
loss: 1.0133910179138184,grad_norm: 0.9339336722073196, iteration: 166158
loss: 0.9988715052604675,grad_norm: 0.9145956545235251, iteration: 166159
loss: 1.028501272201538,grad_norm: 0.9787003998269681, iteration: 166160
loss: 1.0209481716156006,grad_norm: 0.9999990626847862, iteration: 166161
loss: 0.9760342836380005,grad_norm: 0.8841349482356611, iteration: 166162
loss: 0.9807607531547546,grad_norm: 0.8204897136664522, iteration: 166163
loss: 1.0231069326400757,grad_norm: 0.9999991440842658, iteration: 166164
loss: 0.9959368109703064,grad_norm: 0.9999989824535098, iteration: 166165
loss: 0.9851932525634766,grad_norm: 0.9670545078238819, iteration: 166166
loss: 1.0001168251037598,grad_norm: 0.999999173143101, iteration: 166167
loss: 0.9955530166625977,grad_norm: 0.9358177455255751, iteration: 166168
loss: 1.0248607397079468,grad_norm: 0.9999990953168736, iteration: 166169
loss: 0.9832205176353455,grad_norm: 0.9282427431082713, iteration: 166170
loss: 1.0276963710784912,grad_norm: 0.9999990502541996, iteration: 166171
loss: 0.9909968972206116,grad_norm: 0.8718162471274772, iteration: 166172
loss: 0.944866418838501,grad_norm: 0.9999990987487319, iteration: 166173
loss: 1.005415916442871,grad_norm: 0.9999992055151111, iteration: 166174
loss: 1.0055079460144043,grad_norm: 0.9999992715144156, iteration: 166175
loss: 0.9726468920707703,grad_norm: 0.9412499709729987, iteration: 166176
loss: 0.9528182744979858,grad_norm: 0.9369919997982489, iteration: 166177
loss: 0.9904425144195557,grad_norm: 0.8961359735517436, iteration: 166178
loss: 0.9991516470909119,grad_norm: 0.9999990753607021, iteration: 166179
loss: 1.0077056884765625,grad_norm: 0.9999995751665818, iteration: 166180
loss: 1.0089468955993652,grad_norm: 0.9999990840751009, iteration: 166181
loss: 1.0368684530258179,grad_norm: 0.9754361519473408, iteration: 166182
loss: 0.9649974703788757,grad_norm: 0.999999143447593, iteration: 166183
loss: 0.9746967554092407,grad_norm: 0.999999265467001, iteration: 166184
loss: 0.9980510473251343,grad_norm: 0.9999990856254152, iteration: 166185
loss: 1.0183062553405762,grad_norm: 0.9355283653101855, iteration: 166186
loss: 0.9662134647369385,grad_norm: 0.8647471585285395, iteration: 166187
loss: 1.006373643875122,grad_norm: 0.9986543529778518, iteration: 166188
loss: 1.0562888383865356,grad_norm: 0.9999991976751947, iteration: 166189
loss: 1.0907423496246338,grad_norm: 0.9999991457295904, iteration: 166190
loss: 1.0022448301315308,grad_norm: 0.9999994269529624, iteration: 166191
loss: 1.001810073852539,grad_norm: 0.8292695164718088, iteration: 166192
loss: 1.0400909185409546,grad_norm: 0.895132847747957, iteration: 166193
loss: 0.9934234023094177,grad_norm: 0.999999227309427, iteration: 166194
loss: 0.9692968130111694,grad_norm: 0.9046649515090438, iteration: 166195
loss: 1.0398507118225098,grad_norm: 0.9999999358325385, iteration: 166196
loss: 1.080062985420227,grad_norm: 1.0000000259793058, iteration: 166197
loss: 0.9665160179138184,grad_norm: 0.9723170141985489, iteration: 166198
loss: 1.0014854669570923,grad_norm: 0.8733470295992615, iteration: 166199
loss: 0.9978669285774231,grad_norm: 0.9999990992165869, iteration: 166200
loss: 1.0033433437347412,grad_norm: 0.9999991590259157, iteration: 166201
loss: 1.01762056350708,grad_norm: 0.9999990356998997, iteration: 166202
loss: 1.0036369562149048,grad_norm: 0.8673668599277662, iteration: 166203
loss: 1.0219205617904663,grad_norm: 0.968583995251455, iteration: 166204
loss: 1.0078736543655396,grad_norm: 0.9999990028689758, iteration: 166205
loss: 0.969900906085968,grad_norm: 0.999999093752863, iteration: 166206
loss: 0.9957738518714905,grad_norm: 0.999999151312041, iteration: 166207
loss: 1.0161908864974976,grad_norm: 0.8328076571328904, iteration: 166208
loss: 1.0039407014846802,grad_norm: 0.9999996549884134, iteration: 166209
loss: 1.0765800476074219,grad_norm: 0.9999995080758086, iteration: 166210
loss: 0.9906327128410339,grad_norm: 0.9999991297969993, iteration: 166211
loss: 0.974528968334198,grad_norm: 0.9321287172661344, iteration: 166212
loss: 0.983188807964325,grad_norm: 0.8555602495133889, iteration: 166213
loss: 1.0416479110717773,grad_norm: 0.9999992646216389, iteration: 166214
loss: 0.9805774688720703,grad_norm: 0.9868529693500251, iteration: 166215
loss: 1.0172785520553589,grad_norm: 0.9297369484146966, iteration: 166216
loss: 1.0242725610733032,grad_norm: 0.9999993741857958, iteration: 166217
loss: 0.9806661605834961,grad_norm: 0.9999991193095067, iteration: 166218
loss: 0.976631760597229,grad_norm: 0.9158686319285405, iteration: 166219
loss: 1.0353084802627563,grad_norm: 0.8902521731121645, iteration: 166220
loss: 1.007849931716919,grad_norm: 0.9999991584900545, iteration: 166221
loss: 0.9973098635673523,grad_norm: 0.934718338083812, iteration: 166222
loss: 0.9622256755828857,grad_norm: 0.8671906246626743, iteration: 166223
loss: 1.0267279148101807,grad_norm: 0.9076034966681745, iteration: 166224
loss: 0.9740121960639954,grad_norm: 0.9999992457959196, iteration: 166225
loss: 0.9882245063781738,grad_norm: 0.9999991679497555, iteration: 166226
loss: 0.9923054575920105,grad_norm: 0.9999992006594901, iteration: 166227
loss: 1.0264601707458496,grad_norm: 0.9473271371111348, iteration: 166228
loss: 0.9975842833518982,grad_norm: 0.889225167523461, iteration: 166229
loss: 1.0085859298706055,grad_norm: 0.9926330630980547, iteration: 166230
loss: 0.9737375974655151,grad_norm: 0.9999991223354353, iteration: 166231
loss: 0.9874634742736816,grad_norm: 0.8721388497416219, iteration: 166232
loss: 0.9825335144996643,grad_norm: 0.9184369863585528, iteration: 166233
loss: 0.9723777770996094,grad_norm: 0.9999991337649443, iteration: 166234
loss: 1.028953194618225,grad_norm: 0.9999990083457669, iteration: 166235
loss: 1.0031481981277466,grad_norm: 0.9999992123501218, iteration: 166236
loss: 0.9920091032981873,grad_norm: 0.9999989678177404, iteration: 166237
loss: 0.9788022637367249,grad_norm: 0.8919109913546879, iteration: 166238
loss: 1.001036524772644,grad_norm: 0.9793010169094771, iteration: 166239
loss: 0.9995849132537842,grad_norm: 0.9360613776492231, iteration: 166240
loss: 1.0249944925308228,grad_norm: 0.999999371876179, iteration: 166241
loss: 0.9985782504081726,grad_norm: 0.9586621683593501, iteration: 166242
loss: 0.9849715232849121,grad_norm: 0.9999990760074855, iteration: 166243
loss: 0.9969052672386169,grad_norm: 0.982607150151427, iteration: 166244
loss: 1.0307537317276,grad_norm: 0.9999998317276135, iteration: 166245
loss: 0.9786675572395325,grad_norm: 0.9599666723078342, iteration: 166246
loss: 1.0067604780197144,grad_norm: 0.9999990664412245, iteration: 166247
loss: 1.0062150955200195,grad_norm: 0.9999990827489569, iteration: 166248
loss: 1.0107090473175049,grad_norm: 0.9999992591390184, iteration: 166249
loss: 0.9923275709152222,grad_norm: 0.9999991905271234, iteration: 166250
loss: 0.9704070091247559,grad_norm: 0.9750371520506403, iteration: 166251
loss: 1.0065218210220337,grad_norm: 0.9714003422907344, iteration: 166252
loss: 1.003422737121582,grad_norm: 0.9999991361494603, iteration: 166253
loss: 0.9856529831886292,grad_norm: 0.8928627206141446, iteration: 166254
loss: 0.9788378477096558,grad_norm: 0.9999991668471357, iteration: 166255
loss: 1.0007812976837158,grad_norm: 0.9152696849265209, iteration: 166256
loss: 1.0156400203704834,grad_norm: 0.9941917063333171, iteration: 166257
loss: 0.9981248378753662,grad_norm: 0.9999991925928693, iteration: 166258
loss: 0.9931895732879639,grad_norm: 0.9969095157472254, iteration: 166259
loss: 1.0439724922180176,grad_norm: 0.9999990818863567, iteration: 166260
loss: 0.9772433638572693,grad_norm: 0.9999991920687401, iteration: 166261
loss: 0.9907980561256409,grad_norm: 0.9999992092606149, iteration: 166262
loss: 1.0057796239852905,grad_norm: 0.8961326943618054, iteration: 166263
loss: 1.0689175128936768,grad_norm: 0.9999996598968747, iteration: 166264
loss: 1.0062955617904663,grad_norm: 0.9999990091395343, iteration: 166265
loss: 1.0395327806472778,grad_norm: 0.9999996470415025, iteration: 166266
loss: 1.0643082857131958,grad_norm: 0.976922325981531, iteration: 166267
loss: 0.9770251512527466,grad_norm: 0.9850365029684685, iteration: 166268
loss: 1.0310256481170654,grad_norm: 0.9999989867078523, iteration: 166269
loss: 0.991861879825592,grad_norm: 0.9725137108563228, iteration: 166270
loss: 1.0073572397232056,grad_norm: 0.999999105597166, iteration: 166271
loss: 1.0072256326675415,grad_norm: 0.9398065500213472, iteration: 166272
loss: 0.9705125689506531,grad_norm: 0.9999990705337761, iteration: 166273
loss: 0.9675397872924805,grad_norm: 0.8034465559816734, iteration: 166274
loss: 0.9644465446472168,grad_norm: 0.908935666328257, iteration: 166275
loss: 1.0036803483963013,grad_norm: 0.9999990798703353, iteration: 166276
loss: 0.9824293851852417,grad_norm: 0.9999993091792732, iteration: 166277
loss: 1.010032057762146,grad_norm: 0.9999991108989504, iteration: 166278
loss: 0.9871191382408142,grad_norm: 0.9661525836800533, iteration: 166279
loss: 0.980941116809845,grad_norm: 0.9999991649190174, iteration: 166280
loss: 0.9848699569702148,grad_norm: 0.8732884194229147, iteration: 166281
loss: 1.0825287103652954,grad_norm: 0.9999994005228576, iteration: 166282
loss: 1.0333263874053955,grad_norm: 0.9999991738812235, iteration: 166283
loss: 0.9379507899284363,grad_norm: 0.9999990184399593, iteration: 166284
loss: 1.0125763416290283,grad_norm: 0.999999340204913, iteration: 166285
loss: 0.997941792011261,grad_norm: 0.9999991626528233, iteration: 166286
loss: 1.001151204109192,grad_norm: 0.8937780666097234, iteration: 166287
loss: 1.0176029205322266,grad_norm: 0.9999990900636304, iteration: 166288
loss: 0.9782704710960388,grad_norm: 0.8718907692550812, iteration: 166289
loss: 0.9883477091789246,grad_norm: 0.9643944347484693, iteration: 166290
loss: 0.9732276201248169,grad_norm: 0.9999990901071358, iteration: 166291
loss: 0.9765087366104126,grad_norm: 0.9936211102776825, iteration: 166292
loss: 0.9800597429275513,grad_norm: 0.9999992811811913, iteration: 166293
loss: 0.9602673649787903,grad_norm: 0.9999992804946904, iteration: 166294
loss: 1.005254864692688,grad_norm: 0.8924442486839331, iteration: 166295
loss: 1.023242712020874,grad_norm: 0.9999990596630673, iteration: 166296
loss: 1.0087666511535645,grad_norm: 0.938758058243194, iteration: 166297
loss: 1.0173434019088745,grad_norm: 0.9999993566801576, iteration: 166298
loss: 1.059833288192749,grad_norm: 0.9999999128048418, iteration: 166299
loss: 1.0017696619033813,grad_norm: 0.9999999466385254, iteration: 166300
loss: 1.0098196268081665,grad_norm: 0.9999991317667232, iteration: 166301
loss: 1.008266568183899,grad_norm: 0.8698004524654193, iteration: 166302
loss: 1.0736771821975708,grad_norm: 0.9999998609718348, iteration: 166303
loss: 0.9943044781684875,grad_norm: 0.9999990374671683, iteration: 166304
loss: 0.9698541760444641,grad_norm: 0.9119616233213078, iteration: 166305
loss: 1.0038386583328247,grad_norm: 0.9459306416164454, iteration: 166306
loss: 1.0165921449661255,grad_norm: 0.9387618772262415, iteration: 166307
loss: 0.9777207970619202,grad_norm: 0.9179841938527811, iteration: 166308
loss: 0.9903711080551147,grad_norm: 0.9046438015264582, iteration: 166309
loss: 1.0250152349472046,grad_norm: 0.9592151305372253, iteration: 166310
loss: 1.0257781744003296,grad_norm: 0.9376207818467615, iteration: 166311
loss: 0.9925424456596375,grad_norm: 0.9999992786216555, iteration: 166312
loss: 0.9839933514595032,grad_norm: 0.9999991189274694, iteration: 166313
loss: 1.0238724946975708,grad_norm: 0.9999995806141533, iteration: 166314
loss: 0.9973441958427429,grad_norm: 0.9999994461000716, iteration: 166315
loss: 0.9925063252449036,grad_norm: 0.8627454819390611, iteration: 166316
loss: 1.0101784467697144,grad_norm: 0.8839768089231436, iteration: 166317
loss: 1.0013834238052368,grad_norm: 0.9999991839355485, iteration: 166318
loss: 1.0042500495910645,grad_norm: 0.86145229482516, iteration: 166319
loss: 0.9621409773826599,grad_norm: 0.9868848357367417, iteration: 166320
loss: 1.0032498836517334,grad_norm: 0.9999992202866685, iteration: 166321
loss: 0.9660682082176208,grad_norm: 0.9182253532493418, iteration: 166322
loss: 1.0367978811264038,grad_norm: 0.9362943649556621, iteration: 166323
loss: 1.053290605545044,grad_norm: 0.9999999597074924, iteration: 166324
loss: 1.0150803327560425,grad_norm: 0.9999992322206379, iteration: 166325
loss: 0.9886388182640076,grad_norm: 0.9999992023089053, iteration: 166326
loss: 0.9385567307472229,grad_norm: 0.9999992335655413, iteration: 166327
loss: 0.9863241314888,grad_norm: 0.9999991647859918, iteration: 166328
loss: 1.0313985347747803,grad_norm: 0.9914772616019324, iteration: 166329
loss: 0.9667548537254333,grad_norm: 0.858715640857042, iteration: 166330
loss: 1.0204904079437256,grad_norm: 0.9571961322198467, iteration: 166331
loss: 0.9986175298690796,grad_norm: 0.9321439251405131, iteration: 166332
loss: 1.0400882959365845,grad_norm: 0.9999996956040076, iteration: 166333
loss: 0.9773029685020447,grad_norm: 0.9441221512483208, iteration: 166334
loss: 0.9792173504829407,grad_norm: 0.9999992050245217, iteration: 166335
loss: 0.973834216594696,grad_norm: 0.8273613242175063, iteration: 166336
loss: 1.0133216381072998,grad_norm: 0.9999991924116153, iteration: 166337
loss: 1.0113112926483154,grad_norm: 0.9639612582415026, iteration: 166338
loss: 1.0159982442855835,grad_norm: 0.9966780794871594, iteration: 166339
loss: 1.0128026008605957,grad_norm: 0.9999989537459334, iteration: 166340
loss: 0.963391900062561,grad_norm: 0.8898860297888772, iteration: 166341
loss: 1.0454705953598022,grad_norm: 0.9904422700611256, iteration: 166342
loss: 0.9820564389228821,grad_norm: 0.9999991880805971, iteration: 166343
loss: 1.0268267393112183,grad_norm: 0.9999990836121542, iteration: 166344
loss: 0.9847185611724854,grad_norm: 0.9999990592954242, iteration: 166345
loss: 1.0122485160827637,grad_norm: 0.9869079149302045, iteration: 166346
loss: 1.0073144435882568,grad_norm: 0.9456832712654313, iteration: 166347
loss: 1.0372099876403809,grad_norm: 1.0000000827644202, iteration: 166348
loss: 1.0094494819641113,grad_norm: 0.9999989793747692, iteration: 166349
loss: 1.0156484842300415,grad_norm: 0.9999991323750838, iteration: 166350
loss: 0.9707301259040833,grad_norm: 0.9015877366863524, iteration: 166351
loss: 1.0224406719207764,grad_norm: 0.9291412632854915, iteration: 166352
loss: 1.018364429473877,grad_norm: 0.999999081082001, iteration: 166353
loss: 1.0554307699203491,grad_norm: 0.9999990270079854, iteration: 166354
loss: 0.9834585189819336,grad_norm: 0.942214728782726, iteration: 166355
loss: 1.0063484907150269,grad_norm: 0.8783770648047561, iteration: 166356
loss: 0.9623246192932129,grad_norm: 0.9582420624644097, iteration: 166357
loss: 1.0029852390289307,grad_norm: 0.9636437953883061, iteration: 166358
loss: 1.0126285552978516,grad_norm: 0.9102115870238193, iteration: 166359
loss: 0.9951987862586975,grad_norm: 0.9999991769716227, iteration: 166360
loss: 0.9920406937599182,grad_norm: 0.9999992884663845, iteration: 166361
loss: 0.9941772818565369,grad_norm: 0.9439235224258317, iteration: 166362
loss: 0.9712576270103455,grad_norm: 0.9999992079559927, iteration: 166363
loss: 0.9692786335945129,grad_norm: 0.9999991194307075, iteration: 166364
loss: 0.9930931925773621,grad_norm: 0.9054338524625878, iteration: 166365
loss: 0.9573566913604736,grad_norm: 0.8770210066593288, iteration: 166366
loss: 0.9817303419113159,grad_norm: 0.9999990607087584, iteration: 166367
loss: 1.0311168432235718,grad_norm: 0.9999998979240466, iteration: 166368
loss: 1.011487603187561,grad_norm: 0.8826000729005574, iteration: 166369
loss: 0.9728536009788513,grad_norm: 0.8559147700673344, iteration: 166370
loss: 1.0000112056732178,grad_norm: 0.9765151238521207, iteration: 166371
loss: 1.0016489028930664,grad_norm: 0.9999991114799931, iteration: 166372
loss: 1.0345127582550049,grad_norm: 0.8839416947733597, iteration: 166373
loss: 1.030490517616272,grad_norm: 0.9999992987379233, iteration: 166374
loss: 0.9712460041046143,grad_norm: 0.9999992912213952, iteration: 166375
loss: 1.0075091123580933,grad_norm: 0.9999992229004062, iteration: 166376
loss: 1.0129426717758179,grad_norm: 0.9392762382106385, iteration: 166377
loss: 0.9691315293312073,grad_norm: 0.9999990860118909, iteration: 166378
loss: 0.9624089002609253,grad_norm: 0.9999990375576294, iteration: 166379
loss: 0.997815728187561,grad_norm: 0.9999991173336054, iteration: 166380
loss: 0.9935646653175354,grad_norm: 0.8877085447060118, iteration: 166381
loss: 0.9718207716941833,grad_norm: 0.8854633346501442, iteration: 166382
loss: 0.9844354391098022,grad_norm: 0.999998978336308, iteration: 166383
loss: 0.9802820086479187,grad_norm: 0.9999990596148656, iteration: 166384
loss: 1.0074204206466675,grad_norm: 0.9999990070472177, iteration: 166385
loss: 0.9941225647926331,grad_norm: 0.9956389856117719, iteration: 166386
loss: 0.952054500579834,grad_norm: 0.8931632035136843, iteration: 166387
loss: 1.00453782081604,grad_norm: 0.8002265175116589, iteration: 166388
loss: 1.1712658405303955,grad_norm: 0.9999992022435912, iteration: 166389
loss: 0.9822570085525513,grad_norm: 0.9143994826080719, iteration: 166390
loss: 1.09609055519104,grad_norm: 0.954878615737753, iteration: 166391
loss: 0.9779362082481384,grad_norm: 0.9145428006280767, iteration: 166392
loss: 1.008180856704712,grad_norm: 0.9999989042822972, iteration: 166393
loss: 1.0173102617263794,grad_norm: 0.9999995201060777, iteration: 166394
loss: 1.0091323852539062,grad_norm: 0.9999990100614207, iteration: 166395
loss: 0.9631552696228027,grad_norm: 0.8497572541259599, iteration: 166396
loss: 1.0014878511428833,grad_norm: 0.9999989658861287, iteration: 166397
loss: 0.9503680467605591,grad_norm: 0.9684156332298844, iteration: 166398
loss: 0.983608603477478,grad_norm: 0.9209322884325309, iteration: 166399
loss: 0.9827089309692383,grad_norm: 0.9999992308527306, iteration: 166400
loss: 1.0465823411941528,grad_norm: 0.9999990714228013, iteration: 166401
loss: 0.953592836856842,grad_norm: 0.8993418403277537, iteration: 166402
loss: 1.037682056427002,grad_norm: 0.9999992919284524, iteration: 166403
loss: 0.9763957858085632,grad_norm: 0.9375855621447883, iteration: 166404
loss: 0.9805590510368347,grad_norm: 0.9734977847096932, iteration: 166405
loss: 0.9934998750686646,grad_norm: 0.9430020686968178, iteration: 166406
loss: 0.9893882274627686,grad_norm: 0.9999990918687566, iteration: 166407
loss: 1.0075116157531738,grad_norm: 0.9999990240059833, iteration: 166408
loss: 1.1199344396591187,grad_norm: 0.999999616796677, iteration: 166409
loss: 0.9818152785301208,grad_norm: 0.9999992309797663, iteration: 166410
loss: 1.0022927522659302,grad_norm: 0.9999989092826628, iteration: 166411
loss: 1.0189722776412964,grad_norm: 0.9999993179390284, iteration: 166412
loss: 0.9841384887695312,grad_norm: 0.964079162071818, iteration: 166413
loss: 0.9909610748291016,grad_norm: 0.9999991558406294, iteration: 166414
loss: 1.0033924579620361,grad_norm: 0.894089909046701, iteration: 166415
loss: 1.0302904844284058,grad_norm: 0.89933595306187, iteration: 166416
loss: 1.020737648010254,grad_norm: 0.999999053262086, iteration: 166417
loss: 1.0075820684432983,grad_norm: 0.9999990456695738, iteration: 166418
loss: 0.9987544417381287,grad_norm: 0.9999990065340599, iteration: 166419
loss: 1.004270315170288,grad_norm: 0.9999990438536307, iteration: 166420
loss: 0.9975363612174988,grad_norm: 0.9999991397119942, iteration: 166421
loss: 1.0841981172561646,grad_norm: 0.999999890744917, iteration: 166422
loss: 1.0133576393127441,grad_norm: 0.9999993259102512, iteration: 166423
loss: 1.0024281740188599,grad_norm: 0.9999992358449444, iteration: 166424
loss: 1.0109320878982544,grad_norm: 0.9427777423684038, iteration: 166425
loss: 0.9864927530288696,grad_norm: 0.9925518254074082, iteration: 166426
loss: 1.0430597066879272,grad_norm: 0.7644272096251797, iteration: 166427
loss: 1.039955735206604,grad_norm: 0.9999991546293685, iteration: 166428
loss: 1.0209527015686035,grad_norm: 0.9999999021048058, iteration: 166429
loss: 0.9587771892547607,grad_norm: 0.9999991628823839, iteration: 166430
loss: 1.0051994323730469,grad_norm: 0.9999990642044652, iteration: 166431
loss: 0.9986816048622131,grad_norm: 0.9999993938532075, iteration: 166432
loss: 0.9965259432792664,grad_norm: 0.9999992324816068, iteration: 166433
loss: 0.9835327863693237,grad_norm: 0.9999990491593242, iteration: 166434
loss: 1.0042327642440796,grad_norm: 0.9696849138943386, iteration: 166435
loss: 1.007014513015747,grad_norm: 0.9999991098096086, iteration: 166436
loss: 0.9526315331459045,grad_norm: 0.9999988926418888, iteration: 166437
loss: 1.0390851497650146,grad_norm: 0.9774316544750077, iteration: 166438
loss: 0.9632306694984436,grad_norm: 0.9150480124371018, iteration: 166439
loss: 1.0270321369171143,grad_norm: 0.9142059863096391, iteration: 166440
loss: 0.9897889494895935,grad_norm: 0.959928031311028, iteration: 166441
loss: 1.045183539390564,grad_norm: 0.9999991129998336, iteration: 166442
loss: 1.0069752931594849,grad_norm: 0.9999989893905629, iteration: 166443
loss: 1.0393078327178955,grad_norm: 0.9999990418788535, iteration: 166444
loss: 1.007111668586731,grad_norm: 0.8814368491013266, iteration: 166445
loss: 1.0005254745483398,grad_norm: 0.999999258438921, iteration: 166446
loss: 1.0084059238433838,grad_norm: 0.9999990132032719, iteration: 166447
loss: 0.9922451972961426,grad_norm: 0.9128628507698346, iteration: 166448
loss: 1.0110238790512085,grad_norm: 0.9169572190943844, iteration: 166449
loss: 1.0466773509979248,grad_norm: 0.9768178121656166, iteration: 166450
loss: 1.0006636381149292,grad_norm: 0.9041666889717505, iteration: 166451
loss: 0.9778439402580261,grad_norm: 0.9999989839421308, iteration: 166452
loss: 0.9997296929359436,grad_norm: 0.9999991583527306, iteration: 166453
loss: 0.9709975123405457,grad_norm: 0.9999991162763773, iteration: 166454
loss: 0.9998160600662231,grad_norm: 0.9999991352753463, iteration: 166455
loss: 1.007809042930603,grad_norm: 0.9792680121660645, iteration: 166456
loss: 1.0248135328292847,grad_norm: 0.9999992080444787, iteration: 166457
loss: 1.0097061395645142,grad_norm: 0.9869762227909622, iteration: 166458
loss: 1.0287052392959595,grad_norm: 0.9999990255843447, iteration: 166459
loss: 1.022993564605713,grad_norm: 0.999999194740682, iteration: 166460
loss: 0.9588687419891357,grad_norm: 0.9999991279418269, iteration: 166461
loss: 0.9550841450691223,grad_norm: 0.9987246560503416, iteration: 166462
loss: 1.0063756704330444,grad_norm: 0.8579957994003803, iteration: 166463
loss: 1.1064106225967407,grad_norm: 0.9999992362510443, iteration: 166464
loss: 1.0789403915405273,grad_norm: 0.9999999074598778, iteration: 166465
loss: 0.9762736558914185,grad_norm: 0.9368952543583015, iteration: 166466
loss: 0.9832125902175903,grad_norm: 0.999999178581311, iteration: 166467
loss: 0.9806779026985168,grad_norm: 0.9999990437805554, iteration: 166468
loss: 1.0119551420211792,grad_norm: 0.9945880815970184, iteration: 166469
loss: 1.0019166469573975,grad_norm: 0.8951841521479345, iteration: 166470
loss: 0.9869780540466309,grad_norm: 0.9999990635091536, iteration: 166471
loss: 1.0345773696899414,grad_norm: 0.9999993783420447, iteration: 166472
loss: 0.9777497053146362,grad_norm: 0.9839520910050471, iteration: 166473
loss: 1.018894076347351,grad_norm: 0.992844109704032, iteration: 166474
loss: 1.0089597702026367,grad_norm: 0.9523680821066631, iteration: 166475
loss: 0.9700454473495483,grad_norm: 0.9441529830755041, iteration: 166476
loss: 1.0398924350738525,grad_norm: 0.999999308906998, iteration: 166477
loss: 1.0079809427261353,grad_norm: 0.8588593060251781, iteration: 166478
loss: 0.9754021763801575,grad_norm: 0.9999991415083274, iteration: 166479
loss: 1.0012282133102417,grad_norm: 0.988689760095041, iteration: 166480
loss: 0.9906944632530212,grad_norm: 0.9999992934132421, iteration: 166481
loss: 0.9877989292144775,grad_norm: 0.9881738709043238, iteration: 166482
loss: 1.013418436050415,grad_norm: 0.9410393861535191, iteration: 166483
loss: 1.0137872695922852,grad_norm: 0.9999992246604739, iteration: 166484
loss: 0.9411609172821045,grad_norm: 0.9271833266154493, iteration: 166485
loss: 1.0266425609588623,grad_norm: 0.8521076943845701, iteration: 166486
loss: 1.0899919271469116,grad_norm: 0.9999998029087909, iteration: 166487
loss: 1.005529761314392,grad_norm: 0.9999990164329685, iteration: 166488
loss: 0.9957306981086731,grad_norm: 0.9290441520520293, iteration: 166489
loss: 0.9751564860343933,grad_norm: 0.8561020435508446, iteration: 166490
loss: 1.0091643333435059,grad_norm: 0.9999990076608372, iteration: 166491
loss: 0.9945676922798157,grad_norm: 0.9999990384320762, iteration: 166492
loss: 1.0175834894180298,grad_norm: 0.8333587426605679, iteration: 166493
loss: 1.026214599609375,grad_norm: 0.923521492200562, iteration: 166494
loss: 1.0144051313400269,grad_norm: 0.9425878092798347, iteration: 166495
loss: 0.9650171399116516,grad_norm: 0.9301397996607803, iteration: 166496
loss: 1.0248911380767822,grad_norm: 0.9999992857392466, iteration: 166497
loss: 1.0421487092971802,grad_norm: 0.9484423585563434, iteration: 166498
loss: 0.9950575232505798,grad_norm: 0.9999990838918549, iteration: 166499
loss: 1.0067042112350464,grad_norm: 0.9248991341022638, iteration: 166500
loss: 1.0101770162582397,grad_norm: 0.9826065273583365, iteration: 166501
loss: 1.026557207107544,grad_norm: 0.9999991034254325, iteration: 166502
loss: 0.9820221662521362,grad_norm: 0.9925945946727761, iteration: 166503
loss: 1.0171658992767334,grad_norm: 0.8873707339423503, iteration: 166504
loss: 1.0322681665420532,grad_norm: 0.9999993077186362, iteration: 166505
loss: 1.0260478258132935,grad_norm: 0.9999991623808423, iteration: 166506
loss: 1.0282599925994873,grad_norm: 0.9191567948904544, iteration: 166507
loss: 0.9998332858085632,grad_norm: 0.863388542385332, iteration: 166508
loss: 1.0044782161712646,grad_norm: 0.9999993266259661, iteration: 166509
loss: 1.0310388803482056,grad_norm: 0.9986812640665946, iteration: 166510
loss: 1.0164228677749634,grad_norm: 0.9426679589597526, iteration: 166511
loss: 1.0462307929992676,grad_norm: 0.9999995854947165, iteration: 166512
loss: 0.9696755409240723,grad_norm: 0.9953527796503734, iteration: 166513
loss: 0.9923749566078186,grad_norm: 0.8950222829081481, iteration: 166514
loss: 1.0122566223144531,grad_norm: 0.9588328696844489, iteration: 166515
loss: 1.0242682695388794,grad_norm: 0.8585041353094786, iteration: 166516
loss: 0.9974043965339661,grad_norm: 0.8994718150215173, iteration: 166517
loss: 0.9981666803359985,grad_norm: 0.9430857390964701, iteration: 166518
loss: 1.009266972541809,grad_norm: 0.9999990130702068, iteration: 166519
loss: 1.0290358066558838,grad_norm: 0.9933412889589746, iteration: 166520
loss: 1.0006650686264038,grad_norm: 0.9324633706943433, iteration: 166521
loss: 1.0450637340545654,grad_norm: 0.9999992630924301, iteration: 166522
loss: 0.9825144410133362,grad_norm: 0.8570494094403384, iteration: 166523
loss: 1.0115115642547607,grad_norm: 0.8836600143323365, iteration: 166524
loss: 1.0241727828979492,grad_norm: 0.9999993427426639, iteration: 166525
loss: 1.010204553604126,grad_norm: 0.9999991872098658, iteration: 166526
loss: 1.0204203128814697,grad_norm: 0.9999993519634888, iteration: 166527
loss: 0.9972768425941467,grad_norm: 0.974269682674087, iteration: 166528
loss: 0.9812056422233582,grad_norm: 0.9999991947643847, iteration: 166529
loss: 1.0362422466278076,grad_norm: 0.9999991827525587, iteration: 166530
loss: 1.0154718160629272,grad_norm: 0.9999990579128689, iteration: 166531
loss: 1.0231486558914185,grad_norm: 0.9999991906533271, iteration: 166532
loss: 1.0035772323608398,grad_norm: 0.9344304120953661, iteration: 166533
loss: 0.9821473360061646,grad_norm: 0.9032284659830903, iteration: 166534
loss: 1.0260722637176514,grad_norm: 0.9999993232991894, iteration: 166535
loss: 0.9812000393867493,grad_norm: 0.9999991448716301, iteration: 166536
loss: 0.9557223320007324,grad_norm: 0.8984597460066036, iteration: 166537
loss: 1.0216102600097656,grad_norm: 0.941603713931134, iteration: 166538
loss: 1.0307490825653076,grad_norm: 0.9861900244667414, iteration: 166539
loss: 1.018036127090454,grad_norm: 0.9999991890076314, iteration: 166540
loss: 0.9651960730552673,grad_norm: 0.9472802373077323, iteration: 166541
loss: 0.9934648871421814,grad_norm: 0.9935524822702075, iteration: 166542
loss: 1.0122168064117432,grad_norm: 0.983321770024065, iteration: 166543
loss: 1.0450726747512817,grad_norm: 0.976369649348078, iteration: 166544
loss: 1.0064572095870972,grad_norm: 0.999999019666144, iteration: 166545
loss: 1.0274323225021362,grad_norm: 0.9999996103764773, iteration: 166546
loss: 0.9879036545753479,grad_norm: 0.9999992008924227, iteration: 166547
loss: 1.0038015842437744,grad_norm: 0.9459632243228829, iteration: 166548
loss: 0.9866136908531189,grad_norm: 0.8045381767989448, iteration: 166549
loss: 1.0784859657287598,grad_norm: 0.9999992382106326, iteration: 166550
loss: 1.0035834312438965,grad_norm: 0.9999999922889403, iteration: 166551
loss: 1.0143990516662598,grad_norm: 0.9999989195298518, iteration: 166552
loss: 1.129034399986267,grad_norm: 0.9999992660339689, iteration: 166553
loss: 1.001266598701477,grad_norm: 0.9516584717160252, iteration: 166554
loss: 1.033431887626648,grad_norm: 0.9999997939768286, iteration: 166555
loss: 0.9901065826416016,grad_norm: 0.9999991248612808, iteration: 166556
loss: 0.9809969067573547,grad_norm: 0.8845861311808374, iteration: 166557
loss: 1.083898663520813,grad_norm: 0.9999991352052796, iteration: 166558
loss: 1.0181870460510254,grad_norm: 0.9999991258074155, iteration: 166559
loss: 1.0061311721801758,grad_norm: 0.9999990269237923, iteration: 166560
loss: 1.0647327899932861,grad_norm: 0.9999999097885118, iteration: 166561
loss: 1.026932954788208,grad_norm: 0.9999989935443387, iteration: 166562
loss: 1.0642129182815552,grad_norm: 0.9999994689112658, iteration: 166563
loss: 0.9933595657348633,grad_norm: 0.9999992330635118, iteration: 166564
loss: 1.0761865377426147,grad_norm: 0.9999991434935463, iteration: 166565
loss: 1.0074843168258667,grad_norm: 0.9999991805515169, iteration: 166566
loss: 1.0195623636245728,grad_norm: 0.9999999636033925, iteration: 166567
loss: 0.9793668389320374,grad_norm: 0.8342787473111184, iteration: 166568
loss: 0.9919313788414001,grad_norm: 0.9999989780660927, iteration: 166569
loss: 0.9747707843780518,grad_norm: 0.9820865476501306, iteration: 166570
loss: 1.023881435394287,grad_norm: 0.9999995341862764, iteration: 166571
loss: 0.985565185546875,grad_norm: 0.9999991162690856, iteration: 166572
loss: 0.9869600534439087,grad_norm: 0.9499798252654639, iteration: 166573
loss: 1.0333346128463745,grad_norm: 0.9357848665468288, iteration: 166574
loss: 0.9976112842559814,grad_norm: 0.9754941010144734, iteration: 166575
loss: 0.9980893135070801,grad_norm: 0.9999991543431647, iteration: 166576
loss: 0.9972575902938843,grad_norm: 0.8825729025521696, iteration: 166577
loss: 1.0374497175216675,grad_norm: 0.999999152419955, iteration: 166578
loss: 1.0042380094528198,grad_norm: 0.9999991784193348, iteration: 166579
loss: 1.0097761154174805,grad_norm: 0.8584071596780191, iteration: 166580
loss: 1.0038213729858398,grad_norm: 0.9999993595472773, iteration: 166581
loss: 1.0081084966659546,grad_norm: 0.9349780679426635, iteration: 166582
loss: 1.0104485750198364,grad_norm: 0.9999997954460131, iteration: 166583
loss: 1.1247268915176392,grad_norm: 0.9999996338431457, iteration: 166584
loss: 1.0023918151855469,grad_norm: 0.9085273439404677, iteration: 166585
loss: 1.013024926185608,grad_norm: 0.9999997974338968, iteration: 166586
loss: 1.0243195295333862,grad_norm: 0.9229515852756474, iteration: 166587
loss: 0.9995225667953491,grad_norm: 0.9999990559852988, iteration: 166588
loss: 1.0356436967849731,grad_norm: 0.9999992400190971, iteration: 166589
loss: 1.0327481031417847,grad_norm: 0.9227514358083692, iteration: 166590
loss: 0.994962215423584,grad_norm: 0.8587701056243889, iteration: 166591
loss: 0.9756905436515808,grad_norm: 0.8903141205697659, iteration: 166592
loss: 1.031266212463379,grad_norm: 0.8784810065253703, iteration: 166593
loss: 0.9848750233650208,grad_norm: 0.9629137093379136, iteration: 166594
loss: 1.0495966672897339,grad_norm: 0.999999468440032, iteration: 166595
loss: 0.9950176477432251,grad_norm: 0.846148512786761, iteration: 166596
loss: 1.0670784711837769,grad_norm: 0.9999990722852075, iteration: 166597
loss: 1.1312663555145264,grad_norm: 0.9286932652376547, iteration: 166598
loss: 1.000680685043335,grad_norm: 0.9999991749850107, iteration: 166599
loss: 1.0006585121154785,grad_norm: 0.9999991791130155, iteration: 166600
loss: 1.0251158475875854,grad_norm: 0.9999993129380825, iteration: 166601
loss: 1.0649341344833374,grad_norm: 0.9519169659162509, iteration: 166602
loss: 0.9891530871391296,grad_norm: 0.7945919544223382, iteration: 166603
loss: 0.9848395586013794,grad_norm: 0.9668014437930975, iteration: 166604
loss: 1.0230416059494019,grad_norm: 0.9999992588386751, iteration: 166605
loss: 1.0084022283554077,grad_norm: 0.7766349294605148, iteration: 166606
loss: 1.0984350442886353,grad_norm: 0.9999992434293336, iteration: 166607
loss: 1.0031731128692627,grad_norm: 0.9501040095866661, iteration: 166608
loss: 1.1600427627563477,grad_norm: 0.9999991316670227, iteration: 166609
loss: 1.1933369636535645,grad_norm: 0.9999998421439535, iteration: 166610
loss: 1.080233097076416,grad_norm: 0.9999996220614579, iteration: 166611
loss: 1.0205912590026855,grad_norm: 0.9999992246991343, iteration: 166612
loss: 1.1941989660263062,grad_norm: 0.9999997069491837, iteration: 166613
loss: 0.9777607917785645,grad_norm: 0.945315452310136, iteration: 166614
loss: 0.9982259273529053,grad_norm: 0.9999992225169804, iteration: 166615
loss: 0.992440938949585,grad_norm: 0.9999991868290201, iteration: 166616
loss: 1.0629408359527588,grad_norm: 0.9999992885329048, iteration: 166617
loss: 1.0653232336044312,grad_norm: 0.9741897756927956, iteration: 166618
loss: 1.015613317489624,grad_norm: 0.9523279902803775, iteration: 166619
loss: 1.0066514015197754,grad_norm: 0.9999992259287281, iteration: 166620
loss: 1.0096842050552368,grad_norm: 0.9581905851752689, iteration: 166621
loss: 1.0548235177993774,grad_norm: 0.9999993974328774, iteration: 166622
loss: 1.006039023399353,grad_norm: 0.9999991230194256, iteration: 166623
loss: 1.0408143997192383,grad_norm: 0.9139916841065858, iteration: 166624
loss: 0.9877532720565796,grad_norm: 0.9999994051187939, iteration: 166625
loss: 1.002403974533081,grad_norm: 0.9999992797930981, iteration: 166626
loss: 1.1055480241775513,grad_norm: 0.88502193064405, iteration: 166627
loss: 0.9918733239173889,grad_norm: 0.9999994859756767, iteration: 166628
loss: 1.0322293043136597,grad_norm: 0.9999992930170913, iteration: 166629
loss: 1.081661581993103,grad_norm: 0.9275028084132966, iteration: 166630
loss: 1.061846375465393,grad_norm: 0.9651818740517306, iteration: 166631
loss: 1.380113959312439,grad_norm: 0.9999995281491003, iteration: 166632
loss: 1.0310726165771484,grad_norm: 0.9999992259981436, iteration: 166633
loss: 0.9969611763954163,grad_norm: 0.9999991469589429, iteration: 166634
loss: 1.087923526763916,grad_norm: 0.9999990926865953, iteration: 166635
loss: 1.020721197128296,grad_norm: 0.9734332513150538, iteration: 166636
loss: 1.0782372951507568,grad_norm: 0.9096426301655856, iteration: 166637
loss: 1.001175045967102,grad_norm: 0.9999990924960375, iteration: 166638
loss: 1.005712866783142,grad_norm: 0.867970474662741, iteration: 166639
loss: 1.0714449882507324,grad_norm: 0.9999997264907111, iteration: 166640
loss: 0.9668144583702087,grad_norm: 0.9893497335516267, iteration: 166641
loss: 1.0023423433303833,grad_norm: 0.9999993088453428, iteration: 166642
loss: 1.0091496706008911,grad_norm: 0.9999991653330391, iteration: 166643
loss: 0.9875351786613464,grad_norm: 0.9999992268915138, iteration: 166644
loss: 1.0322940349578857,grad_norm: 0.912361718809772, iteration: 166645
loss: 1.0335041284561157,grad_norm: 0.9999997928474011, iteration: 166646
loss: 1.00620436668396,grad_norm: 0.9999990478589635, iteration: 166647
loss: 1.017333984375,grad_norm: 0.9999992646820864, iteration: 166648
loss: 1.0264508724212646,grad_norm: 0.9999990568269612, iteration: 166649
loss: 1.0717703104019165,grad_norm: 0.9999991218288777, iteration: 166650
loss: 1.069214940071106,grad_norm: 0.9999992278865083, iteration: 166651
loss: 1.0362548828125,grad_norm: 0.9999991458691224, iteration: 166652
loss: 1.0433599948883057,grad_norm: 0.99999901093225, iteration: 166653
loss: 1.0259562730789185,grad_norm: 0.9969637653747951, iteration: 166654
loss: 0.9537038803100586,grad_norm: 0.999999207355723, iteration: 166655
loss: 1.0085253715515137,grad_norm: 0.9983091401789737, iteration: 166656
loss: 0.9884470701217651,grad_norm: 0.9999993304680971, iteration: 166657
loss: 0.9892995953559875,grad_norm: 0.9811141714375536, iteration: 166658
loss: 1.0057965517044067,grad_norm: 0.9626505826472693, iteration: 166659
loss: 0.9898201823234558,grad_norm: 0.9999991287445603, iteration: 166660
loss: 0.9874227643013,grad_norm: 0.8820466105980436, iteration: 166661
loss: 0.9736208319664001,grad_norm: 0.999999169896663, iteration: 166662
loss: 1.0501426458358765,grad_norm: 0.9999997610236997, iteration: 166663
loss: 1.1065629720687866,grad_norm: 0.9999998899710864, iteration: 166664
loss: 1.010025143623352,grad_norm: 0.9999992809124831, iteration: 166665
loss: 1.044122338294983,grad_norm: 0.9999992447089997, iteration: 166666
loss: 0.9836893677711487,grad_norm: 0.9999994710432094, iteration: 166667
loss: 1.0142239332199097,grad_norm: 0.9999991855180814, iteration: 166668
loss: 1.0195542573928833,grad_norm: 0.9416739060457212, iteration: 166669
loss: 0.986765444278717,grad_norm: 0.999999190726321, iteration: 166670
loss: 1.0108674764633179,grad_norm: 0.9841827263458381, iteration: 166671
loss: 1.0324939489364624,grad_norm: 0.9999994477127546, iteration: 166672
loss: 1.0065635442733765,grad_norm: 0.9999990428992012, iteration: 166673
loss: 0.9781593680381775,grad_norm: 0.8979800795681131, iteration: 166674
loss: 1.0070291757583618,grad_norm: 0.9999991173676623, iteration: 166675
loss: 1.0047458410263062,grad_norm: 0.99999908247539, iteration: 166676
loss: 0.9583376049995422,grad_norm: 0.8641749018603655, iteration: 166677
loss: 1.007035255432129,grad_norm: 0.8872665001492264, iteration: 166678
loss: 1.0094512701034546,grad_norm: 0.9999998152367172, iteration: 166679
loss: 0.9976006150245667,grad_norm: 0.9999996066812675, iteration: 166680
loss: 1.0001662969589233,grad_norm: 0.9999995116903926, iteration: 166681
loss: 0.9983515739440918,grad_norm: 0.9999999142665638, iteration: 166682
loss: 0.9999608993530273,grad_norm: 0.9999990991415633, iteration: 166683
loss: 0.9853212237358093,grad_norm: 0.9863539673232392, iteration: 166684
loss: 1.0182130336761475,grad_norm: 0.9999991242597364, iteration: 166685
loss: 1.0062320232391357,grad_norm: 0.8739099320242353, iteration: 166686
loss: 0.9951325058937073,grad_norm: 0.9999993894445138, iteration: 166687
loss: 0.9747137427330017,grad_norm: 0.8536859211853863, iteration: 166688
loss: 1.0074540376663208,grad_norm: 0.8964129633261313, iteration: 166689
loss: 1.001613974571228,grad_norm: 0.9837733608779156, iteration: 166690
loss: 0.9926891326904297,grad_norm: 0.9999994070539049, iteration: 166691
loss: 0.9969460964202881,grad_norm: 0.9999990907932528, iteration: 166692
loss: 0.9865232110023499,grad_norm: 0.871208693139808, iteration: 166693
loss: 1.0912120342254639,grad_norm: 0.9999993671254609, iteration: 166694
loss: 1.007491946220398,grad_norm: 0.9999991257839315, iteration: 166695
loss: 1.0462641716003418,grad_norm: 0.999999251909652, iteration: 166696
loss: 0.990307629108429,grad_norm: 0.9999990881651308, iteration: 166697
loss: 1.019101858139038,grad_norm: 0.9490289391828398, iteration: 166698
loss: 1.0218003988265991,grad_norm: 0.9626914172950921, iteration: 166699
loss: 0.9886073470115662,grad_norm: 0.996571883880091, iteration: 166700
loss: 0.9744161367416382,grad_norm: 0.999999143561823, iteration: 166701
loss: 0.9862480163574219,grad_norm: 0.9853828233784715, iteration: 166702
loss: 1.0250773429870605,grad_norm: 0.9999990379852294, iteration: 166703
loss: 0.9955199360847473,grad_norm: 0.8028025515742004, iteration: 166704
loss: 0.9754864573478699,grad_norm: 0.9468810316252434, iteration: 166705
loss: 1.0050013065338135,grad_norm: 0.9999993205577656, iteration: 166706
loss: 1.021741509437561,grad_norm: 0.9999993871300874, iteration: 166707
loss: 0.990816056728363,grad_norm: 0.9426072770441327, iteration: 166708
loss: 0.9991505146026611,grad_norm: 0.8692045584705769, iteration: 166709
loss: 1.0094609260559082,grad_norm: 0.9999991672166422, iteration: 166710
loss: 0.9686902761459351,grad_norm: 0.9999990998844245, iteration: 166711
loss: 1.0004022121429443,grad_norm: 0.9999990190101867, iteration: 166712
loss: 0.9866946935653687,grad_norm: 0.9638231529697784, iteration: 166713
loss: 0.971458911895752,grad_norm: 0.8606771519976527, iteration: 166714
loss: 0.9860585927963257,grad_norm: 0.9999991641635679, iteration: 166715
loss: 0.9811065793037415,grad_norm: 0.9999990718119918, iteration: 166716
loss: 1.0286121368408203,grad_norm: 0.890251082109586, iteration: 166717
loss: 0.9820607304573059,grad_norm: 0.9999990916939487, iteration: 166718
loss: 1.012718677520752,grad_norm: 0.9999991426945306, iteration: 166719
loss: 1.0220842361450195,grad_norm: 0.868669033873776, iteration: 166720
loss: 1.0304356813430786,grad_norm: 0.9532695676973179, iteration: 166721
loss: 0.9785555005073547,grad_norm: 0.999999095427088, iteration: 166722
loss: 1.0104018449783325,grad_norm: 0.9999990341401821, iteration: 166723
loss: 1.0055279731750488,grad_norm: 0.9999991727967644, iteration: 166724
loss: 0.9910209774971008,grad_norm: 0.975324405842539, iteration: 166725
loss: 1.0083521604537964,grad_norm: 0.9999995866788476, iteration: 166726
loss: 0.99041348695755,grad_norm: 0.9213895472535372, iteration: 166727
loss: 0.98079514503479,grad_norm: 0.999999275985957, iteration: 166728
loss: 0.9856223464012146,grad_norm: 0.9999991628958692, iteration: 166729
loss: 0.989546537399292,grad_norm: 0.9999990763286956, iteration: 166730
loss: 0.9833478927612305,grad_norm: 0.9999989559641537, iteration: 166731
loss: 0.9857571125030518,grad_norm: 0.8574644137226058, iteration: 166732
loss: 0.9848082661628723,grad_norm: 0.8123211536271807, iteration: 166733
loss: 0.9938485622406006,grad_norm: 0.9999991728388717, iteration: 166734
loss: 1.0077816247940063,grad_norm: 0.9999991788603613, iteration: 166735
loss: 0.990509569644928,grad_norm: 0.9999991954177254, iteration: 166736
loss: 0.9711705446243286,grad_norm: 0.9999991605241809, iteration: 166737
loss: 1.0038601160049438,grad_norm: 0.9585692274482414, iteration: 166738
loss: 1.0114495754241943,grad_norm: 0.9999991325511068, iteration: 166739
loss: 0.9893877506256104,grad_norm: 0.9620795193994268, iteration: 166740
loss: 0.9905853867530823,grad_norm: 0.9999991310035024, iteration: 166741
loss: 1.0177571773529053,grad_norm: 0.9999991785419376, iteration: 166742
loss: 0.9954264163970947,grad_norm: 0.9999991058423415, iteration: 166743
loss: 0.9850875735282898,grad_norm: 0.946199948767463, iteration: 166744
loss: 0.9830023050308228,grad_norm: 0.9999991400588448, iteration: 166745
loss: 1.0191456079483032,grad_norm: 0.9999990706910594, iteration: 166746
loss: 1.0384284257888794,grad_norm: 0.9999996532686763, iteration: 166747
loss: 0.9751735329627991,grad_norm: 0.9177229622017404, iteration: 166748
loss: 1.0132696628570557,grad_norm: 0.8865763984907254, iteration: 166749
loss: 1.0035576820373535,grad_norm: 0.8715632845251317, iteration: 166750
loss: 1.0109012126922607,grad_norm: 0.9999992508155351, iteration: 166751
loss: 1.0240715742111206,grad_norm: 0.9999990992647945, iteration: 166752
loss: 1.0551670789718628,grad_norm: 0.9999992312598786, iteration: 166753
loss: 1.0104809999465942,grad_norm: 0.9999994264202188, iteration: 166754
loss: 0.9854602813720703,grad_norm: 0.9999991770110785, iteration: 166755
loss: 1.0243778228759766,grad_norm: 0.8714706575308008, iteration: 166756
loss: 0.9889848232269287,grad_norm: 0.9999990799940857, iteration: 166757
loss: 0.9951368570327759,grad_norm: 0.9098472694701376, iteration: 166758
loss: 0.9655110836029053,grad_norm: 0.8886384016127261, iteration: 166759
loss: 0.9953229427337646,grad_norm: 0.9999990578111846, iteration: 166760
loss: 1.011244297027588,grad_norm: 0.8651300119617771, iteration: 166761
loss: 1.0140337944030762,grad_norm: 0.9515942773105643, iteration: 166762
loss: 1.0163568258285522,grad_norm: 0.9999997403909497, iteration: 166763
loss: 1.0138144493103027,grad_norm: 0.9034732212399087, iteration: 166764
loss: 0.9865136742591858,grad_norm: 0.9999992109213675, iteration: 166765
loss: 0.9841756820678711,grad_norm: 0.9999991502108255, iteration: 166766
loss: 0.9894667863845825,grad_norm: 0.9999991408146189, iteration: 166767
loss: 1.017834186553955,grad_norm: 0.9999990491473548, iteration: 166768
loss: 0.9850674271583557,grad_norm: 0.9730054116389126, iteration: 166769
loss: 1.017776370048523,grad_norm: 0.9999991612052544, iteration: 166770
loss: 0.9912261962890625,grad_norm: 0.9999990454221805, iteration: 166771
loss: 1.0497013330459595,grad_norm: 0.8981045004037242, iteration: 166772
loss: 0.9967604279518127,grad_norm: 0.9999989709705709, iteration: 166773
loss: 1.0241891145706177,grad_norm: 0.8135775862691277, iteration: 166774
loss: 0.9923192858695984,grad_norm: 0.9999991841897969, iteration: 166775
loss: 0.9990140199661255,grad_norm: 0.9999991681400581, iteration: 166776
loss: 0.9761123061180115,grad_norm: 0.999999064774083, iteration: 166777
loss: 0.9909746050834656,grad_norm: 0.9999991732540179, iteration: 166778
loss: 0.9727146625518799,grad_norm: 0.9999991336460154, iteration: 166779
loss: 1.031261682510376,grad_norm: 0.9999991360583435, iteration: 166780
loss: 0.9936705827713013,grad_norm: 0.9513946770737194, iteration: 166781
loss: 1.0098031759262085,grad_norm: 0.9999991945419079, iteration: 166782
loss: 0.9822531938552856,grad_norm: 0.9900112796426374, iteration: 166783
loss: 1.009448766708374,grad_norm: 0.999999338294497, iteration: 166784
loss: 0.9991547465324402,grad_norm: 0.9784055218754906, iteration: 166785
loss: 0.9954825639724731,grad_norm: 0.999999362486613, iteration: 166786
loss: 0.9888128042221069,grad_norm: 0.9695263348093631, iteration: 166787
loss: 0.9792981147766113,grad_norm: 0.9138823475417629, iteration: 166788
loss: 0.9947942495346069,grad_norm: 0.9113904808523843, iteration: 166789
loss: 0.9885851144790649,grad_norm: 0.9117628620049323, iteration: 166790
loss: 1.008597731590271,grad_norm: 0.9016052884109744, iteration: 166791
loss: 1.0207939147949219,grad_norm: 0.9999989876856412, iteration: 166792
loss: 1.0107471942901611,grad_norm: 0.9642869149288498, iteration: 166793
loss: 1.0050448179244995,grad_norm: 0.9900957522363454, iteration: 166794
loss: 1.0684542655944824,grad_norm: 0.9999993547640526, iteration: 166795
loss: 0.9619508385658264,grad_norm: 0.9716672836219131, iteration: 166796
loss: 1.0201020240783691,grad_norm: 0.9533929059564121, iteration: 166797
loss: 1.0114322900772095,grad_norm: 0.999999368406905, iteration: 166798
loss: 0.9903479218482971,grad_norm: 0.9049516557205611, iteration: 166799
loss: 0.9931168556213379,grad_norm: 0.9794120077547179, iteration: 166800
loss: 0.9930397868156433,grad_norm: 0.9999990749539307, iteration: 166801
loss: 0.9786949157714844,grad_norm: 0.9999991686528155, iteration: 166802
loss: 1.1343320608139038,grad_norm: 0.9999994066001496, iteration: 166803
loss: 1.0012080669403076,grad_norm: 0.9999990652511094, iteration: 166804
loss: 1.024308204650879,grad_norm: 0.9999991024962024, iteration: 166805
loss: 0.9946110844612122,grad_norm: 0.9999990116111123, iteration: 166806
loss: 0.9986914992332458,grad_norm: 0.9724982980205559, iteration: 166807
loss: 0.9895449280738831,grad_norm: 0.9698318482914229, iteration: 166808
loss: 1.000457763671875,grad_norm: 0.9999991168542721, iteration: 166809
loss: 1.005358099937439,grad_norm: 0.9680762028459564, iteration: 166810
loss: 0.9716776013374329,grad_norm: 0.999999056615082, iteration: 166811
loss: 0.9584053754806519,grad_norm: 0.9266436623347314, iteration: 166812
loss: 1.0122549533843994,grad_norm: 0.9835946259557126, iteration: 166813
loss: 1.022037148475647,grad_norm: 0.9878988169050287, iteration: 166814
loss: 1.0014830827713013,grad_norm: 0.999999049801348, iteration: 166815
loss: 0.9432732462882996,grad_norm: 0.9999991635292236, iteration: 166816
loss: 0.9761534333229065,grad_norm: 0.9999991843279536, iteration: 166817
loss: 1.0356817245483398,grad_norm: 0.9999991890396681, iteration: 166818
loss: 0.9556297063827515,grad_norm: 0.9361459881760416, iteration: 166819
loss: 0.9860512614250183,grad_norm: 0.9999990081760556, iteration: 166820
loss: 0.9939882755279541,grad_norm: 0.8737046286087743, iteration: 166821
loss: 1.0193172693252563,grad_norm: 0.9588953200573654, iteration: 166822
loss: 1.0019282102584839,grad_norm: 0.999999010179805, iteration: 166823
loss: 1.0187962055206299,grad_norm: 0.8696885939904544, iteration: 166824
loss: 1.0280511379241943,grad_norm: 0.9820229303889484, iteration: 166825
loss: 1.0020487308502197,grad_norm: 0.9243269520394106, iteration: 166826
loss: 1.0051723718643188,grad_norm: 0.9999992841402346, iteration: 166827
loss: 1.0003533363342285,grad_norm: 0.9999990183585465, iteration: 166828
loss: 1.012803077697754,grad_norm: 0.9210307686462094, iteration: 166829
loss: 0.9987989068031311,grad_norm: 0.8854182422961944, iteration: 166830
loss: 0.9831287860870361,grad_norm: 0.9370876708069731, iteration: 166831
loss: 1.0052931308746338,grad_norm: 0.9174054868476392, iteration: 166832
loss: 0.9837007522583008,grad_norm: 0.9999991368997342, iteration: 166833
loss: 0.9892793893814087,grad_norm: 0.999999183167904, iteration: 166834
loss: 0.993611216545105,grad_norm: 0.9999991802484066, iteration: 166835
loss: 1.0226640701293945,grad_norm: 0.9640014221754556, iteration: 166836
loss: 1.0136475563049316,grad_norm: 0.999999579487956, iteration: 166837
loss: 1.0062977075576782,grad_norm: 0.797090861936318, iteration: 166838
loss: 0.9500295519828796,grad_norm: 0.8837707172549134, iteration: 166839
loss: 1.0443285703659058,grad_norm: 0.8969339760280759, iteration: 166840
loss: 1.0432868003845215,grad_norm: 0.9999991373982647, iteration: 166841
loss: 0.9847680330276489,grad_norm: 0.9999991354969894, iteration: 166842
loss: 1.0056674480438232,grad_norm: 0.9097616778426229, iteration: 166843
loss: 0.9853375554084778,grad_norm: 0.9999990599033386, iteration: 166844
loss: 1.0297980308532715,grad_norm: 0.99999932574787, iteration: 166845
loss: 0.9945202469825745,grad_norm: 0.9999990357417093, iteration: 166846
loss: 1.0081228017807007,grad_norm: 0.9999990046648191, iteration: 166847
loss: 1.013519287109375,grad_norm: 0.9328029606969104, iteration: 166848
loss: 0.9771916270256042,grad_norm: 0.9999992709587913, iteration: 166849
loss: 0.9955928921699524,grad_norm: 0.9637227937283013, iteration: 166850
loss: 1.0103222131729126,grad_norm: 0.8719182270983165, iteration: 166851
loss: 0.997471809387207,grad_norm: 0.9570692014434019, iteration: 166852
loss: 1.0035651922225952,grad_norm: 0.9844171563217621, iteration: 166853
loss: 1.014061689376831,grad_norm: 0.9999989572021647, iteration: 166854
loss: 0.9873496890068054,grad_norm: 0.866057842471161, iteration: 166855
loss: 0.9784667491912842,grad_norm: 0.9999991118294697, iteration: 166856
loss: 1.0694268941879272,grad_norm: 0.9127723981351897, iteration: 166857
loss: 1.0167666673660278,grad_norm: 0.9999990945085074, iteration: 166858
loss: 0.9924250245094299,grad_norm: 0.9999991908950651, iteration: 166859
loss: 1.0011382102966309,grad_norm: 0.9999993346078685, iteration: 166860
loss: 1.0289950370788574,grad_norm: 0.9941455384086443, iteration: 166861
loss: 1.036365270614624,grad_norm: 0.9475444563691533, iteration: 166862
loss: 1.002820372581482,grad_norm: 0.9256233475995639, iteration: 166863
loss: 0.9791246056556702,grad_norm: 0.9828859167162842, iteration: 166864
loss: 0.9919294118881226,grad_norm: 0.9069664238549925, iteration: 166865
loss: 0.981101393699646,grad_norm: 0.9692649690744538, iteration: 166866
loss: 0.9949982762336731,grad_norm: 0.999999508239201, iteration: 166867
loss: 1.0101597309112549,grad_norm: 0.9999989394758992, iteration: 166868
loss: 1.0104058980941772,grad_norm: 0.8971645467998063, iteration: 166869
loss: 0.9797654747962952,grad_norm: 0.9955023325000422, iteration: 166870
loss: 0.9495639801025391,grad_norm: 0.9999992788714526, iteration: 166871
loss: 1.0573331117630005,grad_norm: 0.8715577319966062, iteration: 166872
loss: 0.9767988324165344,grad_norm: 0.9677315678515883, iteration: 166873
loss: 0.9921650886535645,grad_norm: 0.9999993249226728, iteration: 166874
loss: 1.022201657295227,grad_norm: 0.9675424626674574, iteration: 166875
loss: 0.9925096035003662,grad_norm: 0.9811507301493766, iteration: 166876
loss: 1.0133705139160156,grad_norm: 0.8956345714202028, iteration: 166877
loss: 0.9952095746994019,grad_norm: 0.9999992297481852, iteration: 166878
loss: 0.9835450053215027,grad_norm: 0.9714307720322194, iteration: 166879
loss: 0.9960177540779114,grad_norm: 0.999999043254247, iteration: 166880
loss: 0.9651173949241638,grad_norm: 0.777322463065063, iteration: 166881
loss: 1.051119327545166,grad_norm: 0.9999996121990469, iteration: 166882
loss: 1.006370186805725,grad_norm: 0.9776063178185287, iteration: 166883
loss: 1.0243912935256958,grad_norm: 0.9182124263484077, iteration: 166884
loss: 0.9514147043228149,grad_norm: 0.8848200110474336, iteration: 166885
loss: 1.004036784172058,grad_norm: 0.911361385019825, iteration: 166886
loss: 0.9959568977355957,grad_norm: 0.953877128240207, iteration: 166887
loss: 0.9909157156944275,grad_norm: 0.8720363286533392, iteration: 166888
loss: 0.9893845319747925,grad_norm: 0.999999216392022, iteration: 166889
loss: 0.9824403524398804,grad_norm: 0.9320943828133533, iteration: 166890
loss: 0.983070969581604,grad_norm: 0.9684068812853033, iteration: 166891
loss: 1.0088343620300293,grad_norm: 0.9999990928967432, iteration: 166892
loss: 1.0213100910186768,grad_norm: 0.9999991067954219, iteration: 166893
loss: 0.9748436212539673,grad_norm: 0.9583899162561528, iteration: 166894
loss: 1.0090291500091553,grad_norm: 0.999999836129875, iteration: 166895
loss: 0.9592885375022888,grad_norm: 0.9272784789337463, iteration: 166896
loss: 0.9577115774154663,grad_norm: 0.8494627057385178, iteration: 166897
loss: 0.9543914794921875,grad_norm: 0.9999992346535557, iteration: 166898
loss: 1.013188123703003,grad_norm: 0.8926619769404337, iteration: 166899
loss: 1.0337878465652466,grad_norm: 0.9999990983249067, iteration: 166900
loss: 1.0824358463287354,grad_norm: 0.9999994937174022, iteration: 166901
loss: 1.03553307056427,grad_norm: 0.9352987661874829, iteration: 166902
loss: 0.9630545377731323,grad_norm: 0.9999991178179422, iteration: 166903
loss: 0.9902672171592712,grad_norm: 0.9638684083877271, iteration: 166904
loss: 1.0198163986206055,grad_norm: 0.9999990564072594, iteration: 166905
loss: 0.9713359475135803,grad_norm: 0.9999990186248194, iteration: 166906
loss: 1.0073925256729126,grad_norm: 0.9999997102821807, iteration: 166907
loss: 0.9840949177742004,grad_norm: 0.9306615894261376, iteration: 166908
loss: 1.0181307792663574,grad_norm: 0.9582120440724934, iteration: 166909
loss: 1.0093342065811157,grad_norm: 0.9999989746508168, iteration: 166910
loss: 1.0150843858718872,grad_norm: 0.999999180376352, iteration: 166911
loss: 1.0019875764846802,grad_norm: 0.9591807547680052, iteration: 166912
loss: 0.9972888231277466,grad_norm: 0.9999991897097847, iteration: 166913
loss: 1.020629644393921,grad_norm: 0.999999243324466, iteration: 166914
loss: 1.0477843284606934,grad_norm: 0.7870090486155921, iteration: 166915
loss: 1.0465301275253296,grad_norm: 0.9999988846215901, iteration: 166916
loss: 1.0000548362731934,grad_norm: 0.9558493459789184, iteration: 166917
loss: 1.028868317604065,grad_norm: 0.9617301273120367, iteration: 166918
loss: 0.98238605260849,grad_norm: 0.9999990694366866, iteration: 166919
loss: 1.013713002204895,grad_norm: 0.7761056132406107, iteration: 166920
loss: 0.978406548500061,grad_norm: 0.9966364684557196, iteration: 166921
loss: 0.9467553496360779,grad_norm: 0.887787575342333, iteration: 166922
loss: 1.0018138885498047,grad_norm: 0.9153353744699874, iteration: 166923
loss: 1.0085631608963013,grad_norm: 0.9999990660672029, iteration: 166924
loss: 0.9803488254547119,grad_norm: 0.9151564080591834, iteration: 166925
loss: 1.0194664001464844,grad_norm: 0.9316594107739802, iteration: 166926
loss: 0.9864010810852051,grad_norm: 0.8968787243815649, iteration: 166927
loss: 0.9936215281486511,grad_norm: 0.9626750213888314, iteration: 166928
loss: 1.0295898914337158,grad_norm: 0.8773528227032438, iteration: 166929
loss: 1.0073838233947754,grad_norm: 0.9999991935414388, iteration: 166930
loss: 1.005980372428894,grad_norm: 0.9220964929043054, iteration: 166931
loss: 1.0010173320770264,grad_norm: 0.9999992922318519, iteration: 166932
loss: 1.0381956100463867,grad_norm: 0.9999990874409515, iteration: 166933
loss: 0.9960997700691223,grad_norm: 0.9449726463384466, iteration: 166934
loss: 0.9905940890312195,grad_norm: 0.8261941907174226, iteration: 166935
loss: 1.029270052909851,grad_norm: 0.8824465588386119, iteration: 166936
loss: 1.0105663537979126,grad_norm: 0.9548920706569114, iteration: 166937
loss: 0.9745889902114868,grad_norm: 0.9806128197971461, iteration: 166938
loss: 1.04609215259552,grad_norm: 0.9999997869718295, iteration: 166939
loss: 1.0144882202148438,grad_norm: 0.9999995918565525, iteration: 166940
loss: 1.0075316429138184,grad_norm: 0.9999991775189091, iteration: 166941
loss: 1.0155547857284546,grad_norm: 0.8839329249245621, iteration: 166942
loss: 0.9906893968582153,grad_norm: 0.9999995073624103, iteration: 166943
loss: 1.104036569595337,grad_norm: 0.999999187377429, iteration: 166944
loss: 1.044247031211853,grad_norm: 0.9999997901157212, iteration: 166945
loss: 1.0147725343704224,grad_norm: 0.9999992945963135, iteration: 166946
loss: 1.0174334049224854,grad_norm: 0.9999990777564407, iteration: 166947
loss: 0.9744465351104736,grad_norm: 0.9999991516204193, iteration: 166948
loss: 0.9768680930137634,grad_norm: 0.9999990909439044, iteration: 166949
loss: 0.9987645149230957,grad_norm: 0.8890431845419223, iteration: 166950
loss: 0.9840490818023682,grad_norm: 0.9999992583945985, iteration: 166951
loss: 0.9956968426704407,grad_norm: 0.9999990464989751, iteration: 166952
loss: 1.0214723348617554,grad_norm: 0.9487844327654914, iteration: 166953
loss: 0.9974086284637451,grad_norm: 0.9832151180952287, iteration: 166954
loss: 0.9644697308540344,grad_norm: 0.9999991095963884, iteration: 166955
loss: 0.9911036491394043,grad_norm: 0.878830982804364, iteration: 166956
loss: 0.967679500579834,grad_norm: 0.95434594252049, iteration: 166957
loss: 1.0571510791778564,grad_norm: 0.9999994527691467, iteration: 166958
loss: 1.0155518054962158,grad_norm: 0.8981612610403338, iteration: 166959
loss: 1.002764344215393,grad_norm: 0.9999990717555572, iteration: 166960
loss: 1.0142120122909546,grad_norm: 0.9999992435648554, iteration: 166961
loss: 1.0388121604919434,grad_norm: 0.999999090477452, iteration: 166962
loss: 0.9692155122756958,grad_norm: 0.9999990569870559, iteration: 166963
loss: 1.022999882698059,grad_norm: 0.9813955549574322, iteration: 166964
loss: 0.9726575613021851,grad_norm: 0.9999997556379354, iteration: 166965
loss: 0.9557971954345703,grad_norm: 0.9633733775449315, iteration: 166966
loss: 0.9772534966468811,grad_norm: 0.9999990470866562, iteration: 166967
loss: 1.0683984756469727,grad_norm: 0.9961818498866944, iteration: 166968
loss: 0.9932287931442261,grad_norm: 0.9667385458143017, iteration: 166969
loss: 1.0185850858688354,grad_norm: 0.9906513566040845, iteration: 166970
loss: 1.0039019584655762,grad_norm: 0.9999991517166928, iteration: 166971
loss: 1.0232163667678833,grad_norm: 0.9185208567157389, iteration: 166972
loss: 1.0004215240478516,grad_norm: 0.9999992830298304, iteration: 166973
loss: 1.0038847923278809,grad_norm: 0.9999991195932602, iteration: 166974
loss: 0.9938194751739502,grad_norm: 0.9592316991409491, iteration: 166975
loss: 1.0160232782363892,grad_norm: 0.9999990526208269, iteration: 166976
loss: 0.9984093308448792,grad_norm: 0.9999991295809895, iteration: 166977
loss: 0.967902421951294,grad_norm: 0.999999203814047, iteration: 166978
loss: 0.962866485118866,grad_norm: 0.9758935577859876, iteration: 166979
loss: 1.017622709274292,grad_norm: 0.8029899159101176, iteration: 166980
loss: 1.0538922548294067,grad_norm: 0.999999065656783, iteration: 166981
loss: 0.9630469083786011,grad_norm: 0.8780234845483206, iteration: 166982
loss: 0.9808263182640076,grad_norm: 0.9081976067288968, iteration: 166983
loss: 1.0033782720565796,grad_norm: 0.9214405403031746, iteration: 166984
loss: 1.0048476457595825,grad_norm: 0.9340440856160837, iteration: 166985
loss: 0.9857622981071472,grad_norm: 0.9790774816302455, iteration: 166986
loss: 1.0038883686065674,grad_norm: 0.8704237179386173, iteration: 166987
loss: 0.9456634521484375,grad_norm: 0.9169632401642972, iteration: 166988
loss: 0.9538343548774719,grad_norm: 0.9999992717704766, iteration: 166989
loss: 1.0083346366882324,grad_norm: 0.9557156643503308, iteration: 166990
loss: 0.9591230154037476,grad_norm: 0.9815152639802963, iteration: 166991
loss: 1.001619577407837,grad_norm: 0.9322190611408778, iteration: 166992
loss: 1.0029101371765137,grad_norm: 0.8572888240768127, iteration: 166993
loss: 1.0244759321212769,grad_norm: 0.9999992430083259, iteration: 166994
loss: 0.9610162377357483,grad_norm: 0.9999989559899667, iteration: 166995
loss: 0.9988364577293396,grad_norm: 0.9999990361498419, iteration: 166996
loss: 0.9426931142807007,grad_norm: 0.9999991148583596, iteration: 166997
loss: 0.9720041155815125,grad_norm: 0.9999990592893789, iteration: 166998
loss: 1.0236945152282715,grad_norm: 0.9999992471995552, iteration: 166999
loss: 1.0390034914016724,grad_norm: 0.9481890464903365, iteration: 167000
loss: 0.9988232254981995,grad_norm: 0.9243887130669481, iteration: 167001
loss: 0.9721878170967102,grad_norm: 0.9999990493846631, iteration: 167002
loss: 1.013962984085083,grad_norm: 0.8987875297725867, iteration: 167003
loss: 1.0270180702209473,grad_norm: 0.8726440035779293, iteration: 167004
loss: 1.0164371728897095,grad_norm: 0.9999990954418527, iteration: 167005
loss: 0.9765967726707458,grad_norm: 0.8391186820444512, iteration: 167006
loss: 0.9533646106719971,grad_norm: 0.8420152509558975, iteration: 167007
loss: 1.0229657888412476,grad_norm: 0.9999990176265947, iteration: 167008
loss: 1.0185312032699585,grad_norm: 0.9934663851081899, iteration: 167009
loss: 1.0299798250198364,grad_norm: 0.9999991793345891, iteration: 167010
loss: 1.0101759433746338,grad_norm: 0.9156036121092388, iteration: 167011
loss: 1.0157417058944702,grad_norm: 0.9999991407147736, iteration: 167012
loss: 1.0130316019058228,grad_norm: 0.9540122125759115, iteration: 167013
loss: 0.9807704091072083,grad_norm: 0.9999990829776864, iteration: 167014
loss: 1.031758427619934,grad_norm: 0.9999990488128058, iteration: 167015
loss: 1.0065869092941284,grad_norm: 0.9999990886380112, iteration: 167016
loss: 0.9954306483268738,grad_norm: 0.9999990706504637, iteration: 167017
loss: 1.0116907358169556,grad_norm: 0.9999990406801513, iteration: 167018
loss: 0.9895036816596985,grad_norm: 0.971395361478048, iteration: 167019
loss: 0.9728132486343384,grad_norm: 0.9221726943071952, iteration: 167020
loss: 0.9697670340538025,grad_norm: 0.9999991717595287, iteration: 167021
loss: 0.9333087205886841,grad_norm: 0.9155547503353385, iteration: 167022
loss: 1.035220742225647,grad_norm: 0.9999989631949603, iteration: 167023
loss: 1.013607144355774,grad_norm: 0.7818794834169159, iteration: 167024
loss: 1.0042792558670044,grad_norm: 0.8909886818010931, iteration: 167025
loss: 0.9695403575897217,grad_norm: 0.8988862033522431, iteration: 167026
loss: 0.9815101027488708,grad_norm: 0.9999991309379532, iteration: 167027
loss: 1.0011026859283447,grad_norm: 0.9824395328807033, iteration: 167028
loss: 1.0116207599639893,grad_norm: 0.9406150448716684, iteration: 167029
loss: 0.9873161315917969,grad_norm: 0.9883847720150414, iteration: 167030
loss: 1.005331039428711,grad_norm: 0.9999990387712374, iteration: 167031
loss: 1.0247706174850464,grad_norm: 0.9237852409143037, iteration: 167032
loss: 0.9801070690155029,grad_norm: 0.8573753155200093, iteration: 167033
loss: 0.9863877892494202,grad_norm: 0.9731261979577064, iteration: 167034
loss: 0.9799554347991943,grad_norm: 0.9940664633422608, iteration: 167035
loss: 0.9850639700889587,grad_norm: 0.9999989974034739, iteration: 167036
loss: 1.0091900825500488,grad_norm: 0.9080387210807247, iteration: 167037
loss: 0.9945728778839111,grad_norm: 0.8901554583706837, iteration: 167038
loss: 0.9675271511077881,grad_norm: 0.9999988826614833, iteration: 167039
loss: 1.002967119216919,grad_norm: 0.801617750874503, iteration: 167040
loss: 0.977074384689331,grad_norm: 0.9999996231655145, iteration: 167041
loss: 1.0008305311203003,grad_norm: 0.9999992294342581, iteration: 167042
loss: 0.9937621355056763,grad_norm: 0.8703240077295731, iteration: 167043
loss: 1.0479179620742798,grad_norm: 0.9999990964725272, iteration: 167044
loss: 0.9904630780220032,grad_norm: 0.9999989846164634, iteration: 167045
loss: 1.0037472248077393,grad_norm: 0.9999993802728371, iteration: 167046
loss: 1.0037131309509277,grad_norm: 0.9999991747940986, iteration: 167047
loss: 0.9939113855361938,grad_norm: 0.8920415702789436, iteration: 167048
loss: 1.019059658050537,grad_norm: 0.8939658373236256, iteration: 167049
loss: 1.0143165588378906,grad_norm: 0.9421625510699579, iteration: 167050
loss: 1.0435658693313599,grad_norm: 0.9999990418499658, iteration: 167051
loss: 1.0129249095916748,grad_norm: 0.9999990217225108, iteration: 167052
loss: 1.0108473300933838,grad_norm: 0.9871012372984702, iteration: 167053
loss: 1.0046718120574951,grad_norm: 0.9899729550620395, iteration: 167054
loss: 1.00897216796875,grad_norm: 0.887433445543952, iteration: 167055
loss: 1.0038529634475708,grad_norm: 0.9999997014938902, iteration: 167056
loss: 0.9902737140655518,grad_norm: 0.9999990670949301, iteration: 167057
loss: 1.0249590873718262,grad_norm: 0.7928783708365085, iteration: 167058
loss: 1.0192232131958008,grad_norm: 0.9999991210764665, iteration: 167059
loss: 1.0281883478164673,grad_norm: 0.9999990752861884, iteration: 167060
loss: 1.0238090753555298,grad_norm: 0.9999990905423091, iteration: 167061
loss: 1.0330852270126343,grad_norm: 0.8840094933600607, iteration: 167062
loss: 1.0036078691482544,grad_norm: 0.9607654593147583, iteration: 167063
loss: 1.0044852495193481,grad_norm: 0.9599286231085936, iteration: 167064
loss: 1.0570183992385864,grad_norm: 0.999999249191036, iteration: 167065
loss: 1.0247167348861694,grad_norm: 0.9999990729499635, iteration: 167066
loss: 1.0020438432693481,grad_norm: 0.9609650345283711, iteration: 167067
loss: 0.9959014058113098,grad_norm: 0.8579479789580421, iteration: 167068
loss: 1.0626391172409058,grad_norm: 0.9999999464148529, iteration: 167069
loss: 0.9922479391098022,grad_norm: 0.9136215883855242, iteration: 167070
loss: 1.012319803237915,grad_norm: 0.9034379240658436, iteration: 167071
loss: 1.0067496299743652,grad_norm: 0.99999905305221, iteration: 167072
loss: 1.0069301128387451,grad_norm: 0.9999990279942789, iteration: 167073
loss: 1.0057042837142944,grad_norm: 0.9999990939174185, iteration: 167074
loss: 0.9768956899642944,grad_norm: 0.9818963683482211, iteration: 167075
loss: 1.0796153545379639,grad_norm: 0.9999991587438627, iteration: 167076
loss: 0.974615752696991,grad_norm: 0.9999991495236059, iteration: 167077
loss: 0.973260760307312,grad_norm: 0.9999991786233193, iteration: 167078
loss: 0.9957641363143921,grad_norm: 0.8846121005539572, iteration: 167079
loss: 1.014719843864441,grad_norm: 0.9999995559534328, iteration: 167080
loss: 0.9965784549713135,grad_norm: 0.9999989925226922, iteration: 167081
loss: 0.9799041152000427,grad_norm: 0.9044859293501439, iteration: 167082
loss: 1.0614851713180542,grad_norm: 0.999999665212102, iteration: 167083
loss: 0.9713637232780457,grad_norm: 0.9999991950906394, iteration: 167084
loss: 0.994901180267334,grad_norm: 0.9999992585973368, iteration: 167085
loss: 1.0328296422958374,grad_norm: 0.890643718638277, iteration: 167086
loss: 0.9986202120780945,grad_norm: 0.8321116931638353, iteration: 167087
loss: 1.021955966949463,grad_norm: 0.9859126086464668, iteration: 167088
loss: 1.0051361322402954,grad_norm: 0.9645346211630064, iteration: 167089
loss: 0.993829607963562,grad_norm: 0.9999991367936047, iteration: 167090
loss: 0.957242488861084,grad_norm: 0.9999992330030996, iteration: 167091
loss: 0.985406756401062,grad_norm: 0.9999989926467143, iteration: 167092
loss: 0.9921706914901733,grad_norm: 0.9634452370944016, iteration: 167093
loss: 0.9975314736366272,grad_norm: 0.9999990779094979, iteration: 167094
loss: 0.9951032400131226,grad_norm: 0.9999992814889124, iteration: 167095
loss: 0.9883909225463867,grad_norm: 0.9545497232900879, iteration: 167096
loss: 1.0283123254776,grad_norm: 0.9743795721268184, iteration: 167097
loss: 0.9972516298294067,grad_norm: 0.9999992298561043, iteration: 167098
loss: 1.028134822845459,grad_norm: 0.999999053963262, iteration: 167099
loss: 1.10126793384552,grad_norm: 0.9999997280246085, iteration: 167100
loss: 1.0126147270202637,grad_norm: 0.9999992153777442, iteration: 167101
loss: 1.0173689126968384,grad_norm: 0.9769353544058413, iteration: 167102
loss: 1.0079641342163086,grad_norm: 0.999999184884194, iteration: 167103
loss: 1.010718822479248,grad_norm: 0.9999991276946413, iteration: 167104
loss: 1.0608841180801392,grad_norm: 0.8978901877064399, iteration: 167105
loss: 1.0210027694702148,grad_norm: 0.999999420324114, iteration: 167106
loss: 1.0022072792053223,grad_norm: 0.9999990864616792, iteration: 167107
loss: 1.0901799201965332,grad_norm: 0.9999993384169023, iteration: 167108
loss: 0.9588301777839661,grad_norm: 0.9911137287607595, iteration: 167109
loss: 1.014041781425476,grad_norm: 0.9999992843244532, iteration: 167110
loss: 1.0333809852600098,grad_norm: 0.999999486582327, iteration: 167111
loss: 0.9963108897209167,grad_norm: 0.9999991579986786, iteration: 167112
loss: 1.0664247274398804,grad_norm: 0.9427988534662864, iteration: 167113
loss: 0.976832389831543,grad_norm: 0.9999990708813615, iteration: 167114
loss: 0.9563000798225403,grad_norm: 0.9618225390266703, iteration: 167115
loss: 0.9938913583755493,grad_norm: 0.9262438962925859, iteration: 167116
loss: 1.0100175142288208,grad_norm: 0.9525104621385776, iteration: 167117
loss: 0.9814932942390442,grad_norm: 0.907547351913139, iteration: 167118
loss: 0.9781069159507751,grad_norm: 0.9310135765259773, iteration: 167119
loss: 1.0097118616104126,grad_norm: 0.9999989240280367, iteration: 167120
loss: 0.9648888111114502,grad_norm: 0.9567966314874377, iteration: 167121
loss: 0.9803894758224487,grad_norm: 0.9580441078113718, iteration: 167122
loss: 1.013167142868042,grad_norm: 0.9999991104501745, iteration: 167123
loss: 0.9695047736167908,grad_norm: 0.861647622816093, iteration: 167124
loss: 1.097062587738037,grad_norm: 0.9999997515415395, iteration: 167125
loss: 0.983572244644165,grad_norm: 0.9999990841959645, iteration: 167126
loss: 0.9802893996238708,grad_norm: 0.9999992655454147, iteration: 167127
loss: 0.9724617004394531,grad_norm: 0.9999992111145398, iteration: 167128
loss: 1.0075278282165527,grad_norm: 0.9999992911387593, iteration: 167129
loss: 1.0193942785263062,grad_norm: 0.9999991423636404, iteration: 167130
loss: 1.0010132789611816,grad_norm: 0.9999992503874322, iteration: 167131
loss: 1.1449542045593262,grad_norm: 0.999999250850682, iteration: 167132
loss: 1.0017883777618408,grad_norm: 0.9507270300056738, iteration: 167133
loss: 1.0018994808197021,grad_norm: 0.955017960296621, iteration: 167134
loss: 1.020829677581787,grad_norm: 0.9999991143299567, iteration: 167135
loss: 0.9719183444976807,grad_norm: 0.9702060595492111, iteration: 167136
loss: 0.9715377688407898,grad_norm: 0.9999990992379122, iteration: 167137
loss: 1.0387643575668335,grad_norm: 0.9999995716400341, iteration: 167138
loss: 0.9733331799507141,grad_norm: 0.9999990830617607, iteration: 167139
loss: 0.9987102150917053,grad_norm: 0.999999038282601, iteration: 167140
loss: 0.9964883327484131,grad_norm: 0.9999991379360942, iteration: 167141
loss: 1.008794903755188,grad_norm: 0.9999991891415325, iteration: 167142
loss: 0.9385953545570374,grad_norm: 0.9999990202747874, iteration: 167143
loss: 0.9946132898330688,grad_norm: 0.9196429748511636, iteration: 167144
loss: 1.0224063396453857,grad_norm: 0.8283101849658561, iteration: 167145
loss: 1.0067890882492065,grad_norm: 0.9999989499628794, iteration: 167146
loss: 1.023888349533081,grad_norm: 0.9999991475013374, iteration: 167147
loss: 1.021404504776001,grad_norm: 0.9999992145311268, iteration: 167148
loss: 0.9957880973815918,grad_norm: 0.9620106471369743, iteration: 167149
loss: 1.01701021194458,grad_norm: 0.9727871326098214, iteration: 167150
loss: 0.9765016436576843,grad_norm: 0.9141408486127396, iteration: 167151
loss: 0.990906834602356,grad_norm: 0.9903971159642556, iteration: 167152
loss: 1.0587142705917358,grad_norm: 0.9999991730396764, iteration: 167153
loss: 0.9850315451622009,grad_norm: 0.9999990775491611, iteration: 167154
loss: 0.9926630258560181,grad_norm: 0.9999991489857804, iteration: 167155
loss: 0.9875309467315674,grad_norm: 0.9999990342206505, iteration: 167156
loss: 1.0076056718826294,grad_norm: 0.9999989984103177, iteration: 167157
loss: 0.9570950865745544,grad_norm: 0.9999990609693213, iteration: 167158
loss: 1.0098531246185303,grad_norm: 0.7874985268847586, iteration: 167159
loss: 1.0202826261520386,grad_norm: 0.9999995623844663, iteration: 167160
loss: 0.9935405850410461,grad_norm: 0.9340461276687809, iteration: 167161
loss: 1.0010607242584229,grad_norm: 0.9480162718092834, iteration: 167162
loss: 0.9980055689811707,grad_norm: 0.9999991212649184, iteration: 167163
loss: 1.011688470840454,grad_norm: 0.9999990918333366, iteration: 167164
loss: 0.9688748121261597,grad_norm: 0.9412939698181741, iteration: 167165
loss: 1.266905426979065,grad_norm: 0.9999999977446383, iteration: 167166
loss: 1.009605884552002,grad_norm: 0.9304992907537304, iteration: 167167
loss: 0.9815897941589355,grad_norm: 0.9183522834787717, iteration: 167168
loss: 1.064913034439087,grad_norm: 0.9999992588573469, iteration: 167169
loss: 1.034850001335144,grad_norm: 0.9999992657595521, iteration: 167170
loss: 0.993349015712738,grad_norm: 0.9553182222099375, iteration: 167171
loss: 1.0239115953445435,grad_norm: 0.9999990861819972, iteration: 167172
loss: 0.97773677110672,grad_norm: 0.9690236475923917, iteration: 167173
loss: 0.9873812794685364,grad_norm: 0.9999991625557517, iteration: 167174
loss: 1.0324679613113403,grad_norm: 0.9999993499829971, iteration: 167175
loss: 1.0079660415649414,grad_norm: 0.8961108277084423, iteration: 167176
loss: 0.9701911807060242,grad_norm: 0.8360763007912585, iteration: 167177
loss: 0.9737759232521057,grad_norm: 0.8708790444418842, iteration: 167178
loss: 0.9750029444694519,grad_norm: 0.9999989151937674, iteration: 167179
loss: 0.992807149887085,grad_norm: 0.9999990749980355, iteration: 167180
loss: 1.0124101638793945,grad_norm: 0.9904319149527435, iteration: 167181
loss: 1.0098258256912231,grad_norm: 0.9170992762089554, iteration: 167182
loss: 1.1366456747055054,grad_norm: 0.999999719053288, iteration: 167183
loss: 1.0204612016677856,grad_norm: 0.9051717487561485, iteration: 167184
loss: 0.9646807909011841,grad_norm: 0.9999990867226938, iteration: 167185
loss: 1.0068401098251343,grad_norm: 0.9464891203484542, iteration: 167186
loss: 0.9819193482398987,grad_norm: 0.9999992951038205, iteration: 167187
loss: 0.9945361614227295,grad_norm: 0.9999992735511939, iteration: 167188
loss: 1.0106233358383179,grad_norm: 0.999999160844659, iteration: 167189
loss: 1.063307285308838,grad_norm: 0.9999998827130687, iteration: 167190
loss: 1.0087432861328125,grad_norm: 0.9999992620111059, iteration: 167191
loss: 1.0507827997207642,grad_norm: 0.8969562712216923, iteration: 167192
loss: 0.9934449791908264,grad_norm: 0.9975965088757716, iteration: 167193
loss: 1.0385620594024658,grad_norm: 0.9999991088263477, iteration: 167194
loss: 1.0171691179275513,grad_norm: 0.9999991799077653, iteration: 167195
loss: 1.0063213109970093,grad_norm: 0.9999989759972467, iteration: 167196
loss: 1.0105336904525757,grad_norm: 0.8682692400241213, iteration: 167197
loss: 0.9825395941734314,grad_norm: 0.9522562939103032, iteration: 167198
loss: 0.9634584188461304,grad_norm: 0.9999990796725262, iteration: 167199
loss: 0.9857650995254517,grad_norm: 0.8733795502800489, iteration: 167200
loss: 0.9944007396697998,grad_norm: 0.9231702672059813, iteration: 167201
loss: 0.9746076464653015,grad_norm: 0.9931720189355905, iteration: 167202
loss: 0.9928104877471924,grad_norm: 0.879686377169732, iteration: 167203
loss: 1.029080867767334,grad_norm: 0.9999992023049201, iteration: 167204
loss: 1.0159868001937866,grad_norm: 0.9999990559911354, iteration: 167205
loss: 0.9978033900260925,grad_norm: 0.9999992629503255, iteration: 167206
loss: 1.0159657001495361,grad_norm: 0.9999991195146319, iteration: 167207
loss: 1.0238136053085327,grad_norm: 0.8548545702722213, iteration: 167208
loss: 1.0061320066452026,grad_norm: 0.999998902191222, iteration: 167209
loss: 0.9766477942466736,grad_norm: 0.9284303988831412, iteration: 167210
loss: 0.9764273166656494,grad_norm: 0.9999993082056527, iteration: 167211
loss: 0.9878484606742859,grad_norm: 0.8011380844349075, iteration: 167212
loss: 1.00971257686615,grad_norm: 0.9829462483034489, iteration: 167213
loss: 1.0118962526321411,grad_norm: 0.9999990577477915, iteration: 167214
loss: 1.0154850482940674,grad_norm: 0.999999036848153, iteration: 167215
loss: 0.9945111274719238,grad_norm: 0.999999408415737, iteration: 167216
loss: 1.0367871522903442,grad_norm: 0.8521856058956827, iteration: 167217
loss: 1.0269980430603027,grad_norm: 0.9500340262676134, iteration: 167218
loss: 0.9828824996948242,grad_norm: 0.9426837038272768, iteration: 167219
loss: 1.034963846206665,grad_norm: 0.9999991064618942, iteration: 167220
loss: 0.9880790114402771,grad_norm: 0.9999990213588769, iteration: 167221
loss: 1.0133904218673706,grad_norm: 0.9999989533722822, iteration: 167222
loss: 1.0012590885162354,grad_norm: 0.999999031421572, iteration: 167223
loss: 1.0180350542068481,grad_norm: 0.8850994410540782, iteration: 167224
loss: 0.9794963598251343,grad_norm: 0.9429517447905811, iteration: 167225
loss: 1.0264016389846802,grad_norm: 0.9999991733337007, iteration: 167226
loss: 1.0335545539855957,grad_norm: 0.9999990736858273, iteration: 167227
loss: 1.0185580253601074,grad_norm: 0.9999992357598538, iteration: 167228
loss: 0.9943544268608093,grad_norm: 0.8914142142236686, iteration: 167229
loss: 0.9911904335021973,grad_norm: 0.9660939669977124, iteration: 167230
loss: 1.0190783739089966,grad_norm: 0.9999993555920564, iteration: 167231
loss: 0.9506691694259644,grad_norm: 0.9259457371392943, iteration: 167232
loss: 1.0231897830963135,grad_norm: 0.9999991353645448, iteration: 167233
loss: 1.029792070388794,grad_norm: 0.9339028851733948, iteration: 167234
loss: 0.96811842918396,grad_norm: 0.9999991500922278, iteration: 167235
loss: 0.9690650105476379,grad_norm: 0.9999990805243671, iteration: 167236
loss: 0.9993683099746704,grad_norm: 0.999999092138541, iteration: 167237
loss: 1.0177830457687378,grad_norm: 0.9999998011123694, iteration: 167238
loss: 0.9776771068572998,grad_norm: 0.92535819363075, iteration: 167239
loss: 0.9893985390663147,grad_norm: 0.9999991572645563, iteration: 167240
loss: 1.0007281303405762,grad_norm: 0.9999991091218818, iteration: 167241
loss: 1.0286188125610352,grad_norm: 0.9206394963471607, iteration: 167242
loss: 0.9883610010147095,grad_norm: 0.984217855652635, iteration: 167243
loss: 1.0104548931121826,grad_norm: 0.9077327027307545, iteration: 167244
loss: 0.9877084493637085,grad_norm: 0.9859738997553101, iteration: 167245
loss: 0.9774218201637268,grad_norm: 0.9774071545548412, iteration: 167246
loss: 1.000045657157898,grad_norm: 0.8267527673001687, iteration: 167247
loss: 1.0060700178146362,grad_norm: 0.999999156449596, iteration: 167248
loss: 1.0079821348190308,grad_norm: 0.9999990473450496, iteration: 167249
loss: 1.036442518234253,grad_norm: 0.950304856517172, iteration: 167250
loss: 1.0177136659622192,grad_norm: 0.9999990619640801, iteration: 167251
loss: 1.0435453653335571,grad_norm: 0.9705263885362706, iteration: 167252
loss: 0.9738863110542297,grad_norm: 0.9999991347868551, iteration: 167253
loss: 1.0356026887893677,grad_norm: 0.9893129694912915, iteration: 167254
loss: 0.9956033825874329,grad_norm: 0.9224225807288322, iteration: 167255
loss: 1.0208799839019775,grad_norm: 0.9999991165372755, iteration: 167256
loss: 0.9525688290596008,grad_norm: 0.9999991917918695, iteration: 167257
loss: 1.0121047496795654,grad_norm: 0.8466897417331027, iteration: 167258
loss: 1.0052505731582642,grad_norm: 0.9276638625374052, iteration: 167259
loss: 0.9694375991821289,grad_norm: 0.966118854699568, iteration: 167260
loss: 1.0129956007003784,grad_norm: 0.8829296054613169, iteration: 167261
loss: 1.0249063968658447,grad_norm: 0.999998946115302, iteration: 167262
loss: 0.995055615901947,grad_norm: 0.8811774108338308, iteration: 167263
loss: 1.0344904661178589,grad_norm: 0.9999991349178949, iteration: 167264
loss: 0.9901807904243469,grad_norm: 0.9999990515821456, iteration: 167265
loss: 0.9846146106719971,grad_norm: 0.9999990851368041, iteration: 167266
loss: 1.0001039505004883,grad_norm: 0.999999119504629, iteration: 167267
loss: 1.0096184015274048,grad_norm: 0.9285316152198925, iteration: 167268
loss: 1.0014833211898804,grad_norm: 0.9453374691133306, iteration: 167269
loss: 1.0280271768569946,grad_norm: 0.8942289226692937, iteration: 167270
loss: 1.0109463930130005,grad_norm: 0.7926159812768646, iteration: 167271
loss: 1.0037897825241089,grad_norm: 0.9858665520972528, iteration: 167272
loss: 0.9732815027236938,grad_norm: 0.9364772580626161, iteration: 167273
loss: 1.0266083478927612,grad_norm: 0.9999990888798956, iteration: 167274
loss: 1.02255117893219,grad_norm: 0.9291394666218534, iteration: 167275
loss: 1.008345127105713,grad_norm: 0.9999991023375375, iteration: 167276
loss: 1.026564121246338,grad_norm: 0.9418504831227823, iteration: 167277
loss: 1.0232033729553223,grad_norm: 0.9999989522193815, iteration: 167278
loss: 1.0076621770858765,grad_norm: 0.7812850055905985, iteration: 167279
loss: 1.0137310028076172,grad_norm: 0.9999992535042732, iteration: 167280
loss: 0.9930916428565979,grad_norm: 0.9379039721911269, iteration: 167281
loss: 0.9960690140724182,grad_norm: 0.940658738765577, iteration: 167282
loss: 1.021275281906128,grad_norm: 0.8139875429441973, iteration: 167283
loss: 1.0029999017715454,grad_norm: 0.988617836143191, iteration: 167284
loss: 1.0203648805618286,grad_norm: 0.8346237271154386, iteration: 167285
loss: 0.9965509176254272,grad_norm: 0.9999991777680627, iteration: 167286
loss: 1.0028709173202515,grad_norm: 0.9437328925109202, iteration: 167287
loss: 1.012123942375183,grad_norm: 0.9999992055174407, iteration: 167288
loss: 0.9882750511169434,grad_norm: 0.8554371777571764, iteration: 167289
loss: 1.007400631904602,grad_norm: 0.9999993402319995, iteration: 167290
loss: 0.9842836260795593,grad_norm: 0.9212330260921665, iteration: 167291
loss: 1.0123755931854248,grad_norm: 0.9999991505283572, iteration: 167292
loss: 0.9986026883125305,grad_norm: 0.999999031698577, iteration: 167293
loss: 1.2546062469482422,grad_norm: 0.9999997557785943, iteration: 167294
loss: 1.0088341236114502,grad_norm: 0.9974455285884014, iteration: 167295
loss: 0.9917030930519104,grad_norm: 0.9852800323982835, iteration: 167296
loss: 1.0247188806533813,grad_norm: 0.9999995095655191, iteration: 167297
loss: 0.9877063035964966,grad_norm: 0.9999991859969107, iteration: 167298
loss: 0.9992725253105164,grad_norm: 0.9999992540975778, iteration: 167299
loss: 0.9894074201583862,grad_norm: 0.9999990844521012, iteration: 167300
loss: 1.0402398109436035,grad_norm: 0.9999992244498803, iteration: 167301
loss: 1.0107697248458862,grad_norm: 0.9662048271310762, iteration: 167302
loss: 1.008807897567749,grad_norm: 0.8977034543954128, iteration: 167303
loss: 0.9863753914833069,grad_norm: 0.9999991162225791, iteration: 167304
loss: 1.0040340423583984,grad_norm: 0.9999990295925668, iteration: 167305
loss: 0.9932977557182312,grad_norm: 0.9999991386835139, iteration: 167306
loss: 1.0304176807403564,grad_norm: 0.9999990268267436, iteration: 167307
loss: 0.9694069623947144,grad_norm: 0.8424601584912167, iteration: 167308
loss: 0.9772276282310486,grad_norm: 0.9957025011189867, iteration: 167309
loss: 1.0623544454574585,grad_norm: 0.9999991634224777, iteration: 167310
loss: 1.0539137125015259,grad_norm: 0.9999995570658514, iteration: 167311
loss: 1.013222336769104,grad_norm: 0.884563130492081, iteration: 167312
loss: 0.9905413389205933,grad_norm: 0.9999990457085113, iteration: 167313
loss: 1.005906343460083,grad_norm: 0.999999055559086, iteration: 167314
loss: 0.9936268329620361,grad_norm: 0.9999996061687605, iteration: 167315
loss: 1.0304524898529053,grad_norm: 0.8997851790604656, iteration: 167316
loss: 0.9740562438964844,grad_norm: 0.9465781582625121, iteration: 167317
loss: 0.9680867791175842,grad_norm: 0.9999991452369649, iteration: 167318
loss: 1.028815507888794,grad_norm: 0.9999993638062428, iteration: 167319
loss: 1.0298794507980347,grad_norm: 0.8960995217321451, iteration: 167320
loss: 1.019588589668274,grad_norm: 0.9051052924881443, iteration: 167321
loss: 0.990847647190094,grad_norm: 0.8946759029674866, iteration: 167322
loss: 0.979849636554718,grad_norm: 0.9330345227594335, iteration: 167323
loss: 1.003684639930725,grad_norm: 0.8989300175356675, iteration: 167324
loss: 0.9930682182312012,grad_norm: 0.7853553868871218, iteration: 167325
loss: 0.9907205700874329,grad_norm: 0.8555072486758746, iteration: 167326
loss: 0.9657848477363586,grad_norm: 0.9912335387970095, iteration: 167327
loss: 0.9892607927322388,grad_norm: 0.9299464366815748, iteration: 167328
loss: 0.9850090742111206,grad_norm: 0.9999991334149559, iteration: 167329
loss: 0.9634447693824768,grad_norm: 0.9999991040976122, iteration: 167330
loss: 1.0150684118270874,grad_norm: 0.9999991241593406, iteration: 167331
loss: 0.9770461916923523,grad_norm: 0.9661236936940412, iteration: 167332
loss: 0.9914066195487976,grad_norm: 0.9999991741632716, iteration: 167333
loss: 0.9942450523376465,grad_norm: 0.928205255649628, iteration: 167334
loss: 0.9834064245223999,grad_norm: 0.9999990853349422, iteration: 167335
loss: 0.9960830807685852,grad_norm: 0.9005205707945799, iteration: 167336
loss: 1.0079514980316162,grad_norm: 0.9999991740506097, iteration: 167337
loss: 0.9861502647399902,grad_norm: 0.9999990839027677, iteration: 167338
loss: 1.0363699197769165,grad_norm: 0.9999998904446937, iteration: 167339
loss: 0.9640184044837952,grad_norm: 0.8928218480074567, iteration: 167340
loss: 0.983378529548645,grad_norm: 0.9999992105494065, iteration: 167341
loss: 1.0008314847946167,grad_norm: 0.9999991886589822, iteration: 167342
loss: 0.9800717830657959,grad_norm: 0.9366348329655836, iteration: 167343
loss: 1.0045965909957886,grad_norm: 0.9472578250362677, iteration: 167344
loss: 0.9947599172592163,grad_norm: 0.9999990419934925, iteration: 167345
loss: 1.0358774662017822,grad_norm: 0.9408492321446726, iteration: 167346
loss: 1.0058205127716064,grad_norm: 0.9999990665297624, iteration: 167347
loss: 1.001390814781189,grad_norm: 0.9999990683128309, iteration: 167348
loss: 0.9706631302833557,grad_norm: 0.9999990034193816, iteration: 167349
loss: 0.9898609519004822,grad_norm: 0.946037015048354, iteration: 167350
loss: 1.0048803091049194,grad_norm: 0.9999990700910852, iteration: 167351
loss: 0.9907127022743225,grad_norm: 0.999999207176321, iteration: 167352
loss: 0.9732412695884705,grad_norm: 0.8474498099430601, iteration: 167353
loss: 0.991896390914917,grad_norm: 0.9999990215868801, iteration: 167354
loss: 1.0229498147964478,grad_norm: 0.9999990769717274, iteration: 167355
loss: 0.9906078577041626,grad_norm: 0.8628024560428905, iteration: 167356
loss: 1.0297672748565674,grad_norm: 0.9999991917234167, iteration: 167357
loss: 1.0242810249328613,grad_norm: 0.999586802982708, iteration: 167358
loss: 1.0137757062911987,grad_norm: 0.9671638632093795, iteration: 167359
loss: 0.9840220808982849,grad_norm: 0.9047631983593726, iteration: 167360
loss: 0.9744488596916199,grad_norm: 0.999999173547709, iteration: 167361
loss: 1.0114402770996094,grad_norm: 0.9999992295516908, iteration: 167362
loss: 0.9667233228683472,grad_norm: 0.9999991662207693, iteration: 167363
loss: 1.001424789428711,grad_norm: 0.9806939966898409, iteration: 167364
loss: 0.9746612906455994,grad_norm: 0.9999991588778301, iteration: 167365
loss: 1.0189791917800903,grad_norm: 0.9999992002962896, iteration: 167366
loss: 0.9830729365348816,grad_norm: 0.9653158573148061, iteration: 167367
loss: 1.0053068399429321,grad_norm: 0.999999072083995, iteration: 167368
loss: 1.0112024545669556,grad_norm: 0.9933531396807507, iteration: 167369
loss: 0.9790230989456177,grad_norm: 0.9999990619771993, iteration: 167370
loss: 0.9718298316001892,grad_norm: 0.9489791228432599, iteration: 167371
loss: 1.0018877983093262,grad_norm: 0.9590112461421425, iteration: 167372
loss: 0.988076388835907,grad_norm: 0.8889759942567033, iteration: 167373
loss: 0.9495341777801514,grad_norm: 0.9734348106737533, iteration: 167374
loss: 1.0128370523452759,grad_norm: 0.9495101931909292, iteration: 167375
loss: 1.0046964883804321,grad_norm: 0.8433196514444404, iteration: 167376
loss: 1.0087730884552002,grad_norm: 0.8868945702423846, iteration: 167377
loss: 0.9642196893692017,grad_norm: 0.8861843083180605, iteration: 167378
loss: 0.9905000329017639,grad_norm: 0.9999993491783301, iteration: 167379
loss: 1.033794641494751,grad_norm: 0.9999991259548491, iteration: 167380
loss: 1.008859395980835,grad_norm: 0.9988241939672189, iteration: 167381
loss: 0.9714926481246948,grad_norm: 0.8871944679006484, iteration: 167382
loss: 1.0127304792404175,grad_norm: 0.8655883169795903, iteration: 167383
loss: 1.0069057941436768,grad_norm: 0.8771336375707293, iteration: 167384
loss: 0.997062623500824,grad_norm: 0.9113813168332158, iteration: 167385
loss: 1.0090441703796387,grad_norm: 0.9848853438887247, iteration: 167386
loss: 1.037702202796936,grad_norm: 0.9999992637870843, iteration: 167387
loss: 0.9861319065093994,grad_norm: 0.9777159559531603, iteration: 167388
loss: 0.9881277084350586,grad_norm: 0.9999989990878114, iteration: 167389
loss: 0.9713556170463562,grad_norm: 0.8674456035376891, iteration: 167390
loss: 0.9973691701889038,grad_norm: 0.8665772079715662, iteration: 167391
loss: 0.9983375668525696,grad_norm: 0.999999181541355, iteration: 167392
loss: 0.9885846376419067,grad_norm: 0.9999991168150723, iteration: 167393
loss: 0.9934022426605225,grad_norm: 0.9999991915250347, iteration: 167394
loss: 1.0116294622421265,grad_norm: 0.8144487983692382, iteration: 167395
loss: 0.9841080904006958,grad_norm: 0.9791953616072495, iteration: 167396
loss: 1.0116089582443237,grad_norm: 0.9999990599048425, iteration: 167397
loss: 1.0142874717712402,grad_norm: 0.9999992157545943, iteration: 167398
loss: 1.0234706401824951,grad_norm: 0.9978266958116127, iteration: 167399
loss: 1.0237717628479004,grad_norm: 0.9999991466664158, iteration: 167400
loss: 0.9863526821136475,grad_norm: 0.9444917648485277, iteration: 167401
loss: 0.9850950837135315,grad_norm: 0.9223468198784892, iteration: 167402
loss: 0.9693640470504761,grad_norm: 0.9999990837583355, iteration: 167403
loss: 1.0385463237762451,grad_norm: 0.9999991829182903, iteration: 167404
loss: 0.9807780981063843,grad_norm: 0.8626975086573394, iteration: 167405
loss: 0.9959788918495178,grad_norm: 0.9999993971652239, iteration: 167406
loss: 0.9907265901565552,grad_norm: 0.9999990777430245, iteration: 167407
loss: 0.9563012719154358,grad_norm: 0.9865013699041352, iteration: 167408
loss: 0.9974641799926758,grad_norm: 0.936137430356621, iteration: 167409
loss: 0.9907401204109192,grad_norm: 0.9851596882517437, iteration: 167410
loss: 1.0071302652359009,grad_norm: 0.8882287675497407, iteration: 167411
loss: 1.011770486831665,grad_norm: 0.9999991013196776, iteration: 167412
loss: 1.0233875513076782,grad_norm: 0.9999990481032031, iteration: 167413
loss: 1.0178555250167847,grad_norm: 0.9999989893783687, iteration: 167414
loss: 0.967986524105072,grad_norm: 0.9372946640847423, iteration: 167415
loss: 1.0049207210540771,grad_norm: 0.9999992597138837, iteration: 167416
loss: 0.9883702397346497,grad_norm: 0.9999990545306054, iteration: 167417
loss: 1.026188850402832,grad_norm: 0.9999990729388297, iteration: 167418
loss: 0.9673477411270142,grad_norm: 0.9457949613102866, iteration: 167419
loss: 0.9810314178466797,grad_norm: 0.9999988761064148, iteration: 167420
loss: 0.9567374587059021,grad_norm: 0.9453781702206687, iteration: 167421
loss: 0.9828313589096069,grad_norm: 0.9999990960838719, iteration: 167422
loss: 1.0134353637695312,grad_norm: 0.8557473220586252, iteration: 167423
loss: 0.9861183166503906,grad_norm: 0.9043559479688084, iteration: 167424
loss: 1.0024807453155518,grad_norm: 0.9990896083506173, iteration: 167425
loss: 1.0063573122024536,grad_norm: 0.9259679181517494, iteration: 167426
loss: 0.9558932781219482,grad_norm: 0.8780357954431733, iteration: 167427
loss: 0.9772673845291138,grad_norm: 0.9313980368872256, iteration: 167428
loss: 0.9949666261672974,grad_norm: 0.9010776239734715, iteration: 167429
loss: 1.0002917051315308,grad_norm: 0.8056700051484134, iteration: 167430
loss: 1.0203546285629272,grad_norm: 0.9999989838187456, iteration: 167431
loss: 1.0104773044586182,grad_norm: 0.9171200512300522, iteration: 167432
loss: 0.9562423825263977,grad_norm: 0.9999992137343311, iteration: 167433
loss: 1.0193617343902588,grad_norm: 0.9999991525175824, iteration: 167434
loss: 0.975892961025238,grad_norm: 0.895534405293668, iteration: 167435
loss: 0.9993767142295837,grad_norm: 0.9230330600789665, iteration: 167436
loss: 1.0064361095428467,grad_norm: 0.9999998587650173, iteration: 167437
loss: 0.9990971088409424,grad_norm: 0.9275747842620654, iteration: 167438
loss: 1.0577994585037231,grad_norm: 0.9999998075502652, iteration: 167439
loss: 0.9865606427192688,grad_norm: 0.9867746541208, iteration: 167440
loss: 0.9958160519599915,grad_norm: 0.99999917918502, iteration: 167441
loss: 1.0092869997024536,grad_norm: 0.9283986535720007, iteration: 167442
loss: 0.9697635173797607,grad_norm: 0.9044038712088416, iteration: 167443
loss: 0.999603807926178,grad_norm: 0.8879981580825361, iteration: 167444
loss: 0.9908560514450073,grad_norm: 0.9088647864952539, iteration: 167445
loss: 1.0441886186599731,grad_norm: 0.8578787879987226, iteration: 167446
loss: 1.0030124187469482,grad_norm: 0.9332628507436667, iteration: 167447
loss: 0.980061948299408,grad_norm: 0.8966478631544142, iteration: 167448
loss: 1.0199486017227173,grad_norm: 0.9500971010651328, iteration: 167449
loss: 1.0074453353881836,grad_norm: 0.99999896254477, iteration: 167450
loss: 0.9549782872200012,grad_norm: 0.9618867403461848, iteration: 167451
loss: 1.0349403619766235,grad_norm: 0.876990383808957, iteration: 167452
loss: 1.0354574918746948,grad_norm: 0.9999992649770703, iteration: 167453
loss: 0.9911528825759888,grad_norm: 0.7782557587997923, iteration: 167454
loss: 1.0081007480621338,grad_norm: 0.9999991026729618, iteration: 167455
loss: 1.0633312463760376,grad_norm: 0.999998879437149, iteration: 167456
loss: 0.9817052483558655,grad_norm: 0.9999992187588989, iteration: 167457
loss: 0.9782142043113708,grad_norm: 0.9999991073910006, iteration: 167458
loss: 0.9930723309516907,grad_norm: 0.9432184453407763, iteration: 167459
loss: 0.9661750197410583,grad_norm: 0.9648496156040345, iteration: 167460
loss: 0.9775708317756653,grad_norm: 0.9999991060262672, iteration: 167461
loss: 1.0062663555145264,grad_norm: 0.9999990867671457, iteration: 167462
loss: 1.0177055597305298,grad_norm: 0.9429561242340668, iteration: 167463
loss: 1.0150660276412964,grad_norm: 0.9999991622933873, iteration: 167464
loss: 1.0229206085205078,grad_norm: 0.9999990237349492, iteration: 167465
loss: 1.0600515604019165,grad_norm: 0.9999996183596624, iteration: 167466
loss: 1.0516520738601685,grad_norm: 0.910489057569669, iteration: 167467
loss: 0.9848946332931519,grad_norm: 0.876898019313366, iteration: 167468
loss: 1.0099437236785889,grad_norm: 0.999999171767849, iteration: 167469
loss: 1.0190892219543457,grad_norm: 0.9714661912312949, iteration: 167470
loss: 0.9676935076713562,grad_norm: 0.9999991219081018, iteration: 167471
loss: 1.0184087753295898,grad_norm: 0.9999990197038191, iteration: 167472
loss: 1.0405436754226685,grad_norm: 0.9999998042418691, iteration: 167473
loss: 0.98573237657547,grad_norm: 0.9999992418773144, iteration: 167474
loss: 0.9799287915229797,grad_norm: 0.9999991127208631, iteration: 167475
loss: 1.0317680835723877,grad_norm: 0.999999330435881, iteration: 167476
loss: 0.9923037886619568,grad_norm: 0.9999990149983591, iteration: 167477
loss: 0.9931516051292419,grad_norm: 0.9999991726675539, iteration: 167478
loss: 0.9901970028877258,grad_norm: 0.9225780682502484, iteration: 167479
loss: 0.9800835251808167,grad_norm: 0.8959570678263754, iteration: 167480
loss: 1.0167553424835205,grad_norm: 0.9999991745671195, iteration: 167481
loss: 0.9790869355201721,grad_norm: 0.9999990270808731, iteration: 167482
loss: 0.9976012110710144,grad_norm: 0.9999992386778984, iteration: 167483
loss: 0.9994654059410095,grad_norm: 0.8546264121600322, iteration: 167484
loss: 1.013980507850647,grad_norm: 0.999999944370728, iteration: 167485
loss: 0.988318145275116,grad_norm: 0.8480645666088228, iteration: 167486
loss: 1.0131967067718506,grad_norm: 0.9339486940179096, iteration: 167487
loss: 0.9958634376525879,grad_norm: 0.999999157669797, iteration: 167488
loss: 0.9467280507087708,grad_norm: 0.987642293589212, iteration: 167489
loss: 1.035285472869873,grad_norm: 0.9999990853487317, iteration: 167490
loss: 1.003401279449463,grad_norm: 0.9999991581611231, iteration: 167491
loss: 1.007539987564087,grad_norm: 0.7865587470642921, iteration: 167492
loss: 0.9942421317100525,grad_norm: 0.9999993100617046, iteration: 167493
loss: 0.9983950853347778,grad_norm: 0.999999063211026, iteration: 167494
loss: 1.0196034908294678,grad_norm: 0.9010581509766495, iteration: 167495
loss: 0.9928047060966492,grad_norm: 0.9690475751488207, iteration: 167496
loss: 1.0062832832336426,grad_norm: 0.8299697712513217, iteration: 167497
loss: 1.0898048877716064,grad_norm: 0.9999991364248317, iteration: 167498
loss: 1.0295759439468384,grad_norm: 0.9999990529229279, iteration: 167499
loss: 1.0271120071411133,grad_norm: 0.8439352341914922, iteration: 167500
loss: 1.0489381551742554,grad_norm: 0.9249677219329272, iteration: 167501
loss: 1.0077511072158813,grad_norm: 0.9314953704114105, iteration: 167502
loss: 0.9977611899375916,grad_norm: 0.9673482761117468, iteration: 167503
loss: 0.9916613698005676,grad_norm: 0.9999992264932207, iteration: 167504
loss: 1.0104743242263794,grad_norm: 0.9999996959657796, iteration: 167505
loss: 1.0164378881454468,grad_norm: 0.9611498759588197, iteration: 167506
loss: 1.018995761871338,grad_norm: 0.8113711615718568, iteration: 167507
loss: 1.0006381273269653,grad_norm: 0.8994294799169391, iteration: 167508
loss: 0.9871944189071655,grad_norm: 0.9043679173434712, iteration: 167509
loss: 1.0294407606124878,grad_norm: 0.999999143555055, iteration: 167510
loss: 1.130685567855835,grad_norm: 0.99999974797587, iteration: 167511
loss: 0.9883790016174316,grad_norm: 0.8482728934297901, iteration: 167512
loss: 0.9818588495254517,grad_norm: 0.999999000797384, iteration: 167513
loss: 1.0260034799575806,grad_norm: 0.8595165996198838, iteration: 167514
loss: 0.9981192946434021,grad_norm: 0.9992667971817821, iteration: 167515
loss: 0.9764290452003479,grad_norm: 0.9999990241812575, iteration: 167516
loss: 0.999325692653656,grad_norm: 0.9999991891047455, iteration: 167517
loss: 0.9982531070709229,grad_norm: 0.9526363140232171, iteration: 167518
loss: 1.035803198814392,grad_norm: 0.9999991344947636, iteration: 167519
loss: 1.0251835584640503,grad_norm: 0.9999995010700209, iteration: 167520
loss: 1.0196168422698975,grad_norm: 0.9999992732213271, iteration: 167521
loss: 1.0224391222000122,grad_norm: 0.999999639028454, iteration: 167522
loss: 0.9923883080482483,grad_norm: 0.9185212318633306, iteration: 167523
loss: 0.9899414777755737,grad_norm: 0.9999991342819722, iteration: 167524
loss: 0.9952691793441772,grad_norm: 0.8867590675687543, iteration: 167525
loss: 0.9941097497940063,grad_norm: 0.9440497957085819, iteration: 167526
loss: 1.0230964422225952,grad_norm: 0.9999989799223767, iteration: 167527
loss: 0.9964250922203064,grad_norm: 0.9376438613851134, iteration: 167528
loss: 1.082706093788147,grad_norm: 0.9999990525577511, iteration: 167529
loss: 0.9893943071365356,grad_norm: 0.8892298490042705, iteration: 167530
loss: 1.2659895420074463,grad_norm: 0.999999947048386, iteration: 167531
loss: 1.0399270057678223,grad_norm: 0.9999991045929711, iteration: 167532
loss: 0.9975826740264893,grad_norm: 0.9999994970979794, iteration: 167533
loss: 1.0301259756088257,grad_norm: 0.9999995546237984, iteration: 167534
loss: 0.9838535785675049,grad_norm: 0.9999991266069134, iteration: 167535
loss: 0.9954025745391846,grad_norm: 0.9999992454532156, iteration: 167536
loss: 1.0029438734054565,grad_norm: 0.9440357682221623, iteration: 167537
loss: 0.99986732006073,grad_norm: 0.9999993435515097, iteration: 167538
loss: 1.019877314567566,grad_norm: 0.9325698218867053, iteration: 167539
loss: 1.0294601917266846,grad_norm: 0.9999992101778938, iteration: 167540
loss: 0.97254878282547,grad_norm: 0.9999989898211403, iteration: 167541
loss: 0.9967861771583557,grad_norm: 0.9999991855228978, iteration: 167542
loss: 0.9474641680717468,grad_norm: 0.9999991238074459, iteration: 167543
loss: 1.0060701370239258,grad_norm: 0.9999369912319384, iteration: 167544
loss: 0.9818319082260132,grad_norm: 0.9065865946051243, iteration: 167545
loss: 1.0363752841949463,grad_norm: 0.9999990093277089, iteration: 167546
loss: 1.1155778169631958,grad_norm: 0.9999998189018494, iteration: 167547
loss: 1.0467764139175415,grad_norm: 0.9999995511493641, iteration: 167548
loss: 1.0030896663665771,grad_norm: 0.9999991049199022, iteration: 167549
loss: 0.9791640639305115,grad_norm: 0.8399625880890089, iteration: 167550
loss: 1.0043175220489502,grad_norm: 0.999999023778686, iteration: 167551
loss: 1.0024510622024536,grad_norm: 0.8494951710192855, iteration: 167552
loss: 1.0720617771148682,grad_norm: 0.9999997829429984, iteration: 167553
loss: 1.0082144737243652,grad_norm: 0.9176587806840433, iteration: 167554
loss: 0.9922807216644287,grad_norm: 0.9012814588607532, iteration: 167555
loss: 0.952534019947052,grad_norm: 0.9452963614346676, iteration: 167556
loss: 0.996569812297821,grad_norm: 0.999999427298037, iteration: 167557
loss: 1.0124822854995728,grad_norm: 0.9423646415550245, iteration: 167558
loss: 0.9967286586761475,grad_norm: 0.9989435523241775, iteration: 167559
loss: 1.0129939317703247,grad_norm: 0.9999991186228012, iteration: 167560
loss: 1.0450347661972046,grad_norm: 0.9999991204789186, iteration: 167561
loss: 1.009537935256958,grad_norm: 0.9999991129862739, iteration: 167562
loss: 1.0078879594802856,grad_norm: 0.9696672812082634, iteration: 167563
loss: 0.9696500897407532,grad_norm: 0.939172182424123, iteration: 167564
loss: 0.9746723175048828,grad_norm: 0.9999990891101899, iteration: 167565
loss: 1.0088623762130737,grad_norm: 0.9999990340196365, iteration: 167566
loss: 1.0042150020599365,grad_norm: 0.8135652783758545, iteration: 167567
loss: 1.0192756652832031,grad_norm: 0.999999235784602, iteration: 167568
loss: 0.9669755101203918,grad_norm: 0.9999991480663201, iteration: 167569
loss: 1.0441128015518188,grad_norm: 0.9999999012080771, iteration: 167570
loss: 0.9844821691513062,grad_norm: 0.9999990411714167, iteration: 167571
loss: 1.0743224620819092,grad_norm: 0.9999992439547827, iteration: 167572
loss: 1.0080342292785645,grad_norm: 0.9999991658294445, iteration: 167573
loss: 0.9526605606079102,grad_norm: 0.9245226772521455, iteration: 167574
loss: 0.9980531930923462,grad_norm: 0.9999992461100048, iteration: 167575
loss: 1.0882182121276855,grad_norm: 0.9246656341277238, iteration: 167576
loss: 1.0192089080810547,grad_norm: 0.9999992226574415, iteration: 167577
loss: 1.0457500219345093,grad_norm: 0.9999998594839253, iteration: 167578
loss: 1.0358386039733887,grad_norm: 0.9999996868896638, iteration: 167579
loss: 0.9686813354492188,grad_norm: 0.9618064936344021, iteration: 167580
loss: 1.034527063369751,grad_norm: 0.9999990308310639, iteration: 167581
loss: 0.9836505651473999,grad_norm: 0.8921115696060653, iteration: 167582
loss: 0.9903709292411804,grad_norm: 0.9403837167411294, iteration: 167583
loss: 1.0024263858795166,grad_norm: 0.9048328669938988, iteration: 167584
loss: 1.0022509098052979,grad_norm: 0.9608933764297635, iteration: 167585
loss: 1.0039767026901245,grad_norm: 0.8776378581744095, iteration: 167586
loss: 1.0055179595947266,grad_norm: 0.9999991097988339, iteration: 167587
loss: 0.9914683103561401,grad_norm: 0.9072352358678553, iteration: 167588
loss: 1.017413854598999,grad_norm: 0.9993441215395249, iteration: 167589
loss: 0.9906349778175354,grad_norm: 0.9046419291614487, iteration: 167590
loss: 1.0389426946640015,grad_norm: 0.9999991729627989, iteration: 167591
loss: 0.9907671809196472,grad_norm: 0.9400919473188325, iteration: 167592
loss: 0.9993064403533936,grad_norm: 0.9999992042824984, iteration: 167593
loss: 1.0043792724609375,grad_norm: 0.9850278122311678, iteration: 167594
loss: 0.9885596632957458,grad_norm: 0.9999991543300427, iteration: 167595
loss: 1.0076090097427368,grad_norm: 0.9537044842606827, iteration: 167596
loss: 1.0152251720428467,grad_norm: 0.9999992486706523, iteration: 167597
loss: 1.008565902709961,grad_norm: 0.9999989948554178, iteration: 167598
loss: 1.0209163427352905,grad_norm: 0.999999259572648, iteration: 167599
loss: 0.993188202381134,grad_norm: 0.9999990197012084, iteration: 167600
loss: 1.0425249338150024,grad_norm: 0.9999996154299501, iteration: 167601
loss: 0.9578864574432373,grad_norm: 0.9999990658159686, iteration: 167602
loss: 0.9862819314002991,grad_norm: 0.9721659488253763, iteration: 167603
loss: 1.0089389085769653,grad_norm: 0.999999057060819, iteration: 167604
loss: 0.9910823702812195,grad_norm: 0.9999992873805433, iteration: 167605
loss: 1.035726547241211,grad_norm: 0.999999360005928, iteration: 167606
loss: 0.9875444769859314,grad_norm: 0.940330054991974, iteration: 167607
loss: 0.9765231013298035,grad_norm: 0.9969424166820359, iteration: 167608
loss: 0.9993195533752441,grad_norm: 0.9999992249728005, iteration: 167609
loss: 0.9900691509246826,grad_norm: 0.9862445110632543, iteration: 167610
loss: 1.0169785022735596,grad_norm: 0.9999991452449192, iteration: 167611
loss: 1.0138970613479614,grad_norm: 0.9999992188259752, iteration: 167612
loss: 0.9864408373832703,grad_norm: 0.9999991139743464, iteration: 167613
loss: 1.003941297531128,grad_norm: 0.9999989272836256, iteration: 167614
loss: 0.9529486894607544,grad_norm: 0.9999989504256003, iteration: 167615
loss: 1.0062777996063232,grad_norm: 0.7819343377958653, iteration: 167616
loss: 0.9774098992347717,grad_norm: 0.999999123318885, iteration: 167617
loss: 0.969862163066864,grad_norm: 0.9735758601568338, iteration: 167618
loss: 1.0110037326812744,grad_norm: 0.8357416939319798, iteration: 167619
loss: 1.0691241025924683,grad_norm: 0.999999453429899, iteration: 167620
loss: 0.9766090512275696,grad_norm: 0.9621213198367691, iteration: 167621
loss: 0.9864800572395325,grad_norm: 0.9999999185507954, iteration: 167622
loss: 0.9868992567062378,grad_norm: 0.9999992550652093, iteration: 167623
loss: 1.0454843044281006,grad_norm: 0.9999991740130667, iteration: 167624
loss: 1.0193171501159668,grad_norm: 0.9999992229130512, iteration: 167625
loss: 0.9678421020507812,grad_norm: 0.8700323249130073, iteration: 167626
loss: 0.9900403618812561,grad_norm: 0.9240204216849541, iteration: 167627
loss: 0.9994103908538818,grad_norm: 0.8551829049385868, iteration: 167628
loss: 0.998245894908905,grad_norm: 0.9237667161444244, iteration: 167629
loss: 0.9936972856521606,grad_norm: 0.9999995356972889, iteration: 167630
loss: 1.0041323900222778,grad_norm: 0.9999991613317858, iteration: 167631
loss: 1.0354838371276855,grad_norm: 0.9505340677785125, iteration: 167632
loss: 1.00935959815979,grad_norm: 0.9999989240972129, iteration: 167633
loss: 1.010669469833374,grad_norm: 0.939242550851506, iteration: 167634
loss: 0.9916080832481384,grad_norm: 0.9999990872675757, iteration: 167635
loss: 1.0008519887924194,grad_norm: 0.9697674270213502, iteration: 167636
loss: 1.0056802034378052,grad_norm: 0.9999994709921921, iteration: 167637
loss: 0.9775845408439636,grad_norm: 0.9177432532580008, iteration: 167638
loss: 1.0123400688171387,grad_norm: 0.999999294338361, iteration: 167639
loss: 1.0375783443450928,grad_norm: 0.9999993820651115, iteration: 167640
loss: 0.9702035784721375,grad_norm: 0.9999991418486728, iteration: 167641
loss: 1.009442925453186,grad_norm: 0.9231133242102346, iteration: 167642
loss: 0.9868061542510986,grad_norm: 0.9999991597056564, iteration: 167643
loss: 0.9768526554107666,grad_norm: 0.7961193014645345, iteration: 167644
loss: 1.0343704223632812,grad_norm: 0.9999989177241242, iteration: 167645
loss: 1.0129426717758179,grad_norm: 0.9999991589817785, iteration: 167646
loss: 1.0036925077438354,grad_norm: 0.8942224688150741, iteration: 167647
loss: 1.0071648359298706,grad_norm: 0.9830827557272046, iteration: 167648
loss: 1.1147336959838867,grad_norm: 0.9999998637535487, iteration: 167649
loss: 0.984957218170166,grad_norm: 0.9140635055893672, iteration: 167650
loss: 0.9824178218841553,grad_norm: 0.9601632894617796, iteration: 167651
loss: 1.0280979871749878,grad_norm: 0.9999993146513431, iteration: 167652
loss: 0.9979709982872009,grad_norm: 0.9496158359922272, iteration: 167653
loss: 0.9859111905097961,grad_norm: 0.9999990416644778, iteration: 167654
loss: 0.9902812242507935,grad_norm: 0.9999990562997605, iteration: 167655
loss: 1.0345734357833862,grad_norm: 0.9999991296896029, iteration: 167656
loss: 1.173944354057312,grad_norm: 0.9999999703425766, iteration: 167657
loss: 0.9931499361991882,grad_norm: 0.8304365942770573, iteration: 167658
loss: 0.9977714419364929,grad_norm: 0.8953796058236644, iteration: 167659
loss: 0.9965391159057617,grad_norm: 0.9999991695734308, iteration: 167660
loss: 1.0516570806503296,grad_norm: 0.9999995390095228, iteration: 167661
loss: 0.976960301399231,grad_norm: 0.9999991820565898, iteration: 167662
loss: 1.0132850408554077,grad_norm: 0.9999995982588418, iteration: 167663
loss: 1.0634034872055054,grad_norm: 0.999999145133121, iteration: 167664
loss: 1.0246567726135254,grad_norm: 0.9999994508259424, iteration: 167665
loss: 1.0233906507492065,grad_norm: 0.999999312511056, iteration: 167666
loss: 1.0164427757263184,grad_norm: 0.8651274297363051, iteration: 167667
loss: 1.0147876739501953,grad_norm: 0.9999991432657852, iteration: 167668
loss: 0.9762181043624878,grad_norm: 0.9338573907212789, iteration: 167669
loss: 1.020201325416565,grad_norm: 0.9833286997611237, iteration: 167670
loss: 1.0004382133483887,grad_norm: 0.9999995885461049, iteration: 167671
loss: 1.0520795583724976,grad_norm: 0.9999993184247381, iteration: 167672
loss: 1.0710608959197998,grad_norm: 0.9999997323470552, iteration: 167673
loss: 0.9893494844436646,grad_norm: 0.9999991684766701, iteration: 167674
loss: 0.9966182708740234,grad_norm: 0.9999994556382261, iteration: 167675
loss: 1.007561445236206,grad_norm: 0.9999992661080319, iteration: 167676
loss: 1.052602767944336,grad_norm: 0.9999999056586212, iteration: 167677
loss: 1.1503311395645142,grad_norm: 0.999999745557736, iteration: 167678
loss: 1.0209873914718628,grad_norm: 0.9999999255805048, iteration: 167679
loss: 1.0021220445632935,grad_norm: 0.9999991160642842, iteration: 167680
loss: 1.1606104373931885,grad_norm: 0.9999995843556209, iteration: 167681
loss: 0.9592355489730835,grad_norm: 0.9452593239493484, iteration: 167682
loss: 0.9912087321281433,grad_norm: 0.9827573326781883, iteration: 167683
loss: 1.004351019859314,grad_norm: 0.9999992130170824, iteration: 167684
loss: 0.9829332232475281,grad_norm: 0.9403146018148603, iteration: 167685
loss: 1.026075839996338,grad_norm: 0.999999108917882, iteration: 167686
loss: 0.9913282990455627,grad_norm: 0.9999990311314642, iteration: 167687
loss: 0.9952166080474854,grad_norm: 0.9999990403198757, iteration: 167688
loss: 0.9867762327194214,grad_norm: 0.9441487296517096, iteration: 167689
loss: 1.0131233930587769,grad_norm: 0.8517468218442817, iteration: 167690
loss: 1.0464842319488525,grad_norm: 0.9666089920889153, iteration: 167691
loss: 0.9978155493736267,grad_norm: 0.9999991072688269, iteration: 167692
loss: 0.9447580575942993,grad_norm: 0.999999278154527, iteration: 167693
loss: 0.98631751537323,grad_norm: 0.9999992301794185, iteration: 167694
loss: 0.95127934217453,grad_norm: 0.9999992110654007, iteration: 167695
loss: 1.0096153020858765,grad_norm: 0.9016702011004094, iteration: 167696
loss: 1.0056819915771484,grad_norm: 0.9999990286979529, iteration: 167697
loss: 1.0207520723342896,grad_norm: 0.9999993974246676, iteration: 167698
loss: 0.9950681924819946,grad_norm: 0.999998958772904, iteration: 167699
loss: 1.0099458694458008,grad_norm: 0.9999991503117824, iteration: 167700
loss: 1.0100114345550537,grad_norm: 0.9581571924593442, iteration: 167701
loss: 0.9930369257926941,grad_norm: 0.9999992251099692, iteration: 167702
loss: 1.0444899797439575,grad_norm: 0.9999993539732311, iteration: 167703
loss: 0.9761763215065002,grad_norm: 0.9949647656042564, iteration: 167704
loss: 1.0823291540145874,grad_norm: 0.9999992092398778, iteration: 167705
loss: 1.0431020259857178,grad_norm: 0.9999991067295227, iteration: 167706
loss: 1.0039702653884888,grad_norm: 0.9999990803706709, iteration: 167707
loss: 1.0049772262573242,grad_norm: 0.9687516541885417, iteration: 167708
loss: 0.9844478964805603,grad_norm: 0.9977313526120735, iteration: 167709
loss: 1.002934455871582,grad_norm: 0.9999990461504955, iteration: 167710
loss: 1.015102505683899,grad_norm: 0.9999995796324694, iteration: 167711
loss: 1.0163869857788086,grad_norm: 0.9999990562891118, iteration: 167712
loss: 1.029815435409546,grad_norm: 0.9999991578573443, iteration: 167713
loss: 1.0080875158309937,grad_norm: 0.970564136249623, iteration: 167714
loss: 1.026216983795166,grad_norm: 0.9999992249036027, iteration: 167715
loss: 0.9954580068588257,grad_norm: 0.8848237118658269, iteration: 167716
loss: 1.0612750053405762,grad_norm: 0.9999992726892377, iteration: 167717
loss: 1.0269969701766968,grad_norm: 0.9999990152686377, iteration: 167718
loss: 0.9681142568588257,grad_norm: 0.9999991437060226, iteration: 167719
loss: 1.0486226081848145,grad_norm: 0.9999995900585602, iteration: 167720
loss: 1.2270995378494263,grad_norm: 0.9999990798858296, iteration: 167721
loss: 1.0358926057815552,grad_norm: 0.999999290949424, iteration: 167722
loss: 1.3220447301864624,grad_norm: 0.9999992372499634, iteration: 167723
loss: 1.2955015897750854,grad_norm: 0.9999998745357199, iteration: 167724
loss: 0.9870180487632751,grad_norm: 0.9218758722015193, iteration: 167725
loss: 1.25601065158844,grad_norm: 0.9999998866764851, iteration: 167726
loss: 1.068217158317566,grad_norm: 0.9999994044961341, iteration: 167727
loss: 1.0357714891433716,grad_norm: 0.9999999849695483, iteration: 167728
loss: 1.1171159744262695,grad_norm: 0.9999991336664502, iteration: 167729
loss: 1.0001541376113892,grad_norm: 0.827433610817936, iteration: 167730
loss: 1.1082979440689087,grad_norm: 0.9999999029417459, iteration: 167731
loss: 0.9912270903587341,grad_norm: 0.9999990385170705, iteration: 167732
loss: 1.108919620513916,grad_norm: 0.99999949495983, iteration: 167733
loss: 1.045117735862732,grad_norm: 0.9999991397786303, iteration: 167734
loss: 1.0356799364089966,grad_norm: 0.9999991759413275, iteration: 167735
loss: 1.0323323011398315,grad_norm: 0.9999990937123998, iteration: 167736
loss: 1.062596321105957,grad_norm: 0.9999996046548486, iteration: 167737
loss: 1.0894944667816162,grad_norm: 0.9999992245796127, iteration: 167738
loss: 0.9965007305145264,grad_norm: 0.8514314091465379, iteration: 167739
loss: 0.9981423020362854,grad_norm: 0.8137567001065336, iteration: 167740
loss: 1.0291975736618042,grad_norm: 0.9774086460104223, iteration: 167741
loss: 1.0167316198349,grad_norm: 0.9999997281962036, iteration: 167742
loss: 1.0293183326721191,grad_norm: 0.9999989997723386, iteration: 167743
loss: 1.0376133918762207,grad_norm: 0.999999505145113, iteration: 167744
loss: 0.9795196056365967,grad_norm: 0.8977405148867145, iteration: 167745
loss: 0.9981991648674011,grad_norm: 0.9239786099493186, iteration: 167746
loss: 1.00075364112854,grad_norm: 0.9282154882437154, iteration: 167747
loss: 1.0408639907836914,grad_norm: 0.999999238117327, iteration: 167748
loss: 0.9955520629882812,grad_norm: 0.869463061761143, iteration: 167749
loss: 1.0989809036254883,grad_norm: 0.99999955734649, iteration: 167750
loss: 1.014114260673523,grad_norm: 0.9999991362581055, iteration: 167751
loss: 0.9965412020683289,grad_norm: 0.9033510440113134, iteration: 167752
loss: 0.9872575998306274,grad_norm: 0.9999991979278265, iteration: 167753
loss: 1.0036016702651978,grad_norm: 0.9389727462049714, iteration: 167754
loss: 1.0507216453552246,grad_norm: 0.9999992061165548, iteration: 167755
loss: 0.9725856184959412,grad_norm: 0.8523454906655564, iteration: 167756
loss: 1.0054843425750732,grad_norm: 0.9670867613550248, iteration: 167757
loss: 0.9916563630104065,grad_norm: 0.9244249061566938, iteration: 167758
loss: 1.149849534034729,grad_norm: 0.9999998445168504, iteration: 167759
loss: 1.091184377670288,grad_norm: 0.9999998284176562, iteration: 167760
loss: 0.9881327748298645,grad_norm: 0.9999991452792951, iteration: 167761
loss: 1.1225956678390503,grad_norm: 0.9999996215974639, iteration: 167762
loss: 0.9788572788238525,grad_norm: 0.9623620326331908, iteration: 167763
loss: 0.989251971244812,grad_norm: 0.9078420132539694, iteration: 167764
loss: 1.1014933586120605,grad_norm: 0.9999996040927742, iteration: 167765
loss: 0.987576425075531,grad_norm: 0.9472114684334841, iteration: 167766
loss: 1.0071779489517212,grad_norm: 0.9999990914551026, iteration: 167767
loss: 0.9869411587715149,grad_norm: 0.9594709042257694, iteration: 167768
loss: 1.0207507610321045,grad_norm: 0.9999994203146525, iteration: 167769
loss: 1.0556634664535522,grad_norm: 0.9999998482314333, iteration: 167770
loss: 1.008811116218567,grad_norm: 0.9999990510953732, iteration: 167771
loss: 1.0157456398010254,grad_norm: 0.9999990792059585, iteration: 167772
loss: 1.0344325304031372,grad_norm: 0.9999998637947417, iteration: 167773
loss: 1.017915964126587,grad_norm: 0.999999104482005, iteration: 167774
loss: 1.0093395709991455,grad_norm: 0.9999992837167082, iteration: 167775
loss: 0.9880218505859375,grad_norm: 0.9999997383487718, iteration: 167776
loss: 0.9739406108856201,grad_norm: 0.999999058705135, iteration: 167777
loss: 0.9834796190261841,grad_norm: 0.9999991082025119, iteration: 167778
loss: 1.0166609287261963,grad_norm: 0.9999994152642179, iteration: 167779
loss: 0.9839627146720886,grad_norm: 0.9999990320676125, iteration: 167780
loss: 1.0035966634750366,grad_norm: 0.9915675774833058, iteration: 167781
loss: 0.9873219132423401,grad_norm: 0.906406537076675, iteration: 167782
loss: 1.1433494091033936,grad_norm: 0.9999991370988068, iteration: 167783
loss: 1.0114866495132446,grad_norm: 0.9999990429806455, iteration: 167784
loss: 1.0404534339904785,grad_norm: 0.8958542617381131, iteration: 167785
loss: 0.9767932295799255,grad_norm: 0.7370204556257504, iteration: 167786
loss: 1.0548256635665894,grad_norm: 0.9999997877747028, iteration: 167787
loss: 1.0157970190048218,grad_norm: 0.9900328603793398, iteration: 167788
loss: 0.9738922715187073,grad_norm: 0.999999168628478, iteration: 167789
loss: 1.0369608402252197,grad_norm: 0.9999993124408963, iteration: 167790
loss: 1.0222691297531128,grad_norm: 0.8707676677592705, iteration: 167791
loss: 1.0049949884414673,grad_norm: 0.936177844925099, iteration: 167792
loss: 1.0187053680419922,grad_norm: 0.999999324547143, iteration: 167793
loss: 1.0165077447891235,grad_norm: 0.9999996895467812, iteration: 167794
loss: 1.058473825454712,grad_norm: 0.999999839469521, iteration: 167795
loss: 1.010077953338623,grad_norm: 0.999999495557639, iteration: 167796
loss: 1.0291193723678589,grad_norm: 0.9999994240498241, iteration: 167797
loss: 1.0011136531829834,grad_norm: 0.8966479576687544, iteration: 167798
loss: 1.0043843984603882,grad_norm: 0.9999992039462577, iteration: 167799
loss: 0.976798951625824,grad_norm: 0.9999990650690408, iteration: 167800
loss: 1.031178593635559,grad_norm: 0.9999991265335162, iteration: 167801
loss: 1.0109158754348755,grad_norm: 0.9999991825187233, iteration: 167802
loss: 1.0539662837982178,grad_norm: 0.9999996919829707, iteration: 167803
loss: 0.9855635166168213,grad_norm: 0.9999989740649265, iteration: 167804
loss: 0.9986934065818787,grad_norm: 0.8850575658332316, iteration: 167805
loss: 0.9970371127128601,grad_norm: 0.9999991512440594, iteration: 167806
loss: 1.0644745826721191,grad_norm: 0.9999993256310015, iteration: 167807
loss: 0.94435715675354,grad_norm: 0.9999993267804919, iteration: 167808
loss: 0.9854155778884888,grad_norm: 0.9999990045788409, iteration: 167809
loss: 0.9931313395500183,grad_norm: 0.9999994472605892, iteration: 167810
loss: 1.0638008117675781,grad_norm: 0.9999999223395517, iteration: 167811
loss: 1.0196913480758667,grad_norm: 0.9999991805049011, iteration: 167812
loss: 0.9914151430130005,grad_norm: 0.8278518953723176, iteration: 167813
loss: 1.094347357749939,grad_norm: 0.9999990771688535, iteration: 167814
loss: 0.9927777051925659,grad_norm: 0.9999991064809587, iteration: 167815
loss: 1.0115492343902588,grad_norm: 0.9999989886803221, iteration: 167816
loss: 1.0201205015182495,grad_norm: 0.9999999014340992, iteration: 167817
loss: 1.026224970817566,grad_norm: 0.9476226386453807, iteration: 167818
loss: 1.020195484161377,grad_norm: 0.979793978213, iteration: 167819
loss: 1.0921050310134888,grad_norm: 0.9999994109278811, iteration: 167820
loss: 1.0076597929000854,grad_norm: 0.9680911084051358, iteration: 167821
loss: 0.9963626265525818,grad_norm: 0.9785157515071339, iteration: 167822
loss: 0.9687237739562988,grad_norm: 0.9999990629623897, iteration: 167823
loss: 1.0021522045135498,grad_norm: 0.7268021287318044, iteration: 167824
loss: 1.0098178386688232,grad_norm: 0.8695069624619678, iteration: 167825
loss: 0.9974951148033142,grad_norm: 0.9999990874845536, iteration: 167826
loss: 0.9994054436683655,grad_norm: 0.9999993772807656, iteration: 167827
loss: 0.9629181027412415,grad_norm: 0.9784585100921287, iteration: 167828
loss: 0.9623765349388123,grad_norm: 0.9498256552945885, iteration: 167829
loss: 0.9678904414176941,grad_norm: 0.932068176845214, iteration: 167830
loss: 1.0969429016113281,grad_norm: 0.9999994061315335, iteration: 167831
loss: 0.9815896153450012,grad_norm: 0.9999992272580275, iteration: 167832
loss: 0.9990449547767639,grad_norm: 0.9963308512833089, iteration: 167833
loss: 0.9856656789779663,grad_norm: 0.9999993129444105, iteration: 167834
loss: 1.0144754648208618,grad_norm: 0.9999990979834265, iteration: 167835
loss: 1.00825035572052,grad_norm: 0.9999990106206665, iteration: 167836
loss: 0.9995729327201843,grad_norm: 0.9742340540301734, iteration: 167837
loss: 1.0219545364379883,grad_norm: 0.9999997064474878, iteration: 167838
loss: 1.0149695873260498,grad_norm: 0.9999993173125966, iteration: 167839
loss: 1.016051173210144,grad_norm: 0.999999030334369, iteration: 167840
loss: 1.0062512159347534,grad_norm: 0.9999992791652135, iteration: 167841
loss: 0.9934383630752563,grad_norm: 0.9999991093458684, iteration: 167842
loss: 1.0572258234024048,grad_norm: 0.9999994377585352, iteration: 167843
loss: 1.0346758365631104,grad_norm: 0.9999990968218554, iteration: 167844
loss: 1.0515258312225342,grad_norm: 0.9999997077561041, iteration: 167845
loss: 0.9878512024879456,grad_norm: 0.9999992329967744, iteration: 167846
loss: 0.9558390378952026,grad_norm: 0.863998881483354, iteration: 167847
loss: 1.0011317729949951,grad_norm: 0.9999998321453496, iteration: 167848
loss: 1.016831874847412,grad_norm: 0.9039847638457891, iteration: 167849
loss: 0.9797847270965576,grad_norm: 0.9891318752945457, iteration: 167850
loss: 1.187929391860962,grad_norm: 0.9999996532528768, iteration: 167851
loss: 1.0172466039657593,grad_norm: 0.9002440765336452, iteration: 167852
loss: 1.0165175199508667,grad_norm: 0.9999991333495937, iteration: 167853
loss: 0.9996528029441833,grad_norm: 0.9999992281336817, iteration: 167854
loss: 1.0146639347076416,grad_norm: 0.9637983982402871, iteration: 167855
loss: 0.980195939540863,grad_norm: 0.9121498067154815, iteration: 167856
loss: 0.9878798723220825,grad_norm: 0.859636005946181, iteration: 167857
loss: 1.1029819250106812,grad_norm: 0.9999999521353755, iteration: 167858
loss: 0.9954541921615601,grad_norm: 0.9773809108198156, iteration: 167859
loss: 0.9605188965797424,grad_norm: 0.9999990085013177, iteration: 167860
loss: 1.0106308460235596,grad_norm: 0.8797650698216439, iteration: 167861
loss: 1.0125296115875244,grad_norm: 0.7906710060764287, iteration: 167862
loss: 0.9578675031661987,grad_norm: 0.9999990214098909, iteration: 167863
loss: 1.0046846866607666,grad_norm: 0.9998976442163319, iteration: 167864
loss: 1.0192327499389648,grad_norm: 0.8967793656869785, iteration: 167865
loss: 0.9603899717330933,grad_norm: 0.9999991872588421, iteration: 167866
loss: 0.9912574887275696,grad_norm: 0.9439177910135279, iteration: 167867
loss: 0.9821869134902954,grad_norm: 0.9999994180760677, iteration: 167868
loss: 0.9905556440353394,grad_norm: 0.9928560597849281, iteration: 167869
loss: 0.9778837561607361,grad_norm: 0.8830285326637707, iteration: 167870
loss: 0.9688234329223633,grad_norm: 0.8844333393483066, iteration: 167871
loss: 1.0243016481399536,grad_norm: 0.9999991195428989, iteration: 167872
loss: 1.0173866748809814,grad_norm: 0.9999992246258276, iteration: 167873
loss: 0.9938230514526367,grad_norm: 0.861694129352498, iteration: 167874
loss: 1.0275952816009521,grad_norm: 0.9999991972530953, iteration: 167875
loss: 1.0350196361541748,grad_norm: 0.8031379571336006, iteration: 167876
loss: 1.0164334774017334,grad_norm: 0.9999991248992494, iteration: 167877
loss: 1.0341796875,grad_norm: 0.9999991498006207, iteration: 167878
loss: 0.9890880584716797,grad_norm: 0.9999997328162293, iteration: 167879
loss: 1.022192120552063,grad_norm: 0.9999999097040982, iteration: 167880
loss: 1.0010713338851929,grad_norm: 0.9999991172403506, iteration: 167881
loss: 1.0305160284042358,grad_norm: 0.9191101427394014, iteration: 167882
loss: 1.024198055267334,grad_norm: 0.9999990069477669, iteration: 167883
loss: 0.9940915107727051,grad_norm: 0.9187926994012149, iteration: 167884
loss: 1.006831407546997,grad_norm: 0.9542340510466358, iteration: 167885
loss: 0.9985825419425964,grad_norm: 0.9541844402349452, iteration: 167886
loss: 0.9928732514381409,grad_norm: 0.9999991597677118, iteration: 167887
loss: 0.9854875802993774,grad_norm: 0.9603063774258903, iteration: 167888
loss: 1.0372015237808228,grad_norm: 0.9540784521409327, iteration: 167889
loss: 0.9949139356613159,grad_norm: 0.9999989959613333, iteration: 167890
loss: 1.1203935146331787,grad_norm: 0.9999991155372188, iteration: 167891
loss: 1.0085642337799072,grad_norm: 0.9661099472486533, iteration: 167892
loss: 1.0433013439178467,grad_norm: 0.9999991485287746, iteration: 167893
loss: 0.9769135117530823,grad_norm: 0.9583254922019885, iteration: 167894
loss: 0.9755142331123352,grad_norm: 0.999999050860012, iteration: 167895
loss: 1.0178838968276978,grad_norm: 0.9857250747900193, iteration: 167896
loss: 1.0012949705123901,grad_norm: 0.9999995053932643, iteration: 167897
loss: 0.9958646893501282,grad_norm: 0.9783275302978178, iteration: 167898
loss: 1.0161054134368896,grad_norm: 0.9999991670601055, iteration: 167899
loss: 1.0090738534927368,grad_norm: 0.9462920940523942, iteration: 167900
loss: 0.9808327555656433,grad_norm: 0.9774092511432084, iteration: 167901
loss: 1.0272562503814697,grad_norm: 0.9195389913782526, iteration: 167902
loss: 1.0094062089920044,grad_norm: 0.9999991415898272, iteration: 167903
loss: 0.9592064023017883,grad_norm: 0.9349829203517523, iteration: 167904
loss: 1.0087451934814453,grad_norm: 0.9999991374262285, iteration: 167905
loss: 1.0189640522003174,grad_norm: 0.9999990358945415, iteration: 167906
loss: 1.0021514892578125,grad_norm: 0.999999118312216, iteration: 167907
loss: 1.0089964866638184,grad_norm: 0.9976373256486665, iteration: 167908
loss: 1.006429672241211,grad_norm: 0.9999991024832009, iteration: 167909
loss: 1.0002562999725342,grad_norm: 0.9999991287061402, iteration: 167910
loss: 0.9903779625892639,grad_norm: 0.9667764422174417, iteration: 167911
loss: 1.0123231410980225,grad_norm: 0.9999991384148823, iteration: 167912
loss: 1.012073040008545,grad_norm: 0.9681133906786742, iteration: 167913
loss: 0.9990307688713074,grad_norm: 0.9999991729220671, iteration: 167914
loss: 0.9959442019462585,grad_norm: 0.9999991959115042, iteration: 167915
loss: 1.0105459690093994,grad_norm: 0.9999990496103045, iteration: 167916
loss: 1.0148649215698242,grad_norm: 0.8112831656664078, iteration: 167917
loss: 0.9797403812408447,grad_norm: 0.9999989708698984, iteration: 167918
loss: 0.9773003458976746,grad_norm: 0.9999994775411176, iteration: 167919
loss: 1.0350366830825806,grad_norm: 0.9999998471362819, iteration: 167920
loss: 0.9689945578575134,grad_norm: 0.9999990492468048, iteration: 167921
loss: 1.0265171527862549,grad_norm: 0.9375180422047107, iteration: 167922
loss: 1.0145536661148071,grad_norm: 0.8688032263637612, iteration: 167923
loss: 1.017500877380371,grad_norm: 0.9999990314526387, iteration: 167924
loss: 1.0740145444869995,grad_norm: 0.9999992198731292, iteration: 167925
loss: 0.9945967793464661,grad_norm: 0.9999990300304157, iteration: 167926
loss: 1.0224617719650269,grad_norm: 0.9999998084659039, iteration: 167927
loss: 1.1023379564285278,grad_norm: 0.9999994377160709, iteration: 167928
loss: 1.0844738483428955,grad_norm: 0.9999997359293886, iteration: 167929
loss: 1.0157403945922852,grad_norm: 0.999999182498123, iteration: 167930
loss: 0.9355086088180542,grad_norm: 0.9999992842449182, iteration: 167931
loss: 0.996779203414917,grad_norm: 0.9999992693921996, iteration: 167932
loss: 1.0262765884399414,grad_norm: 0.9999992239163147, iteration: 167933
loss: 0.9860443472862244,grad_norm: 0.9999991289816884, iteration: 167934
loss: 0.9935746192932129,grad_norm: 0.9464618633439547, iteration: 167935
loss: 1.0479813814163208,grad_norm: 0.9999991575871523, iteration: 167936
loss: 0.9762696027755737,grad_norm: 0.9999993236647677, iteration: 167937
loss: 0.9935598373413086,grad_norm: 0.999999137319681, iteration: 167938
loss: 1.0306452512741089,grad_norm: 0.9999995278534338, iteration: 167939
loss: 1.0105382204055786,grad_norm: 0.999999071539391, iteration: 167940
loss: 1.0024876594543457,grad_norm: 0.9423909820280237, iteration: 167941
loss: 1.0093092918395996,grad_norm: 0.9556905335943922, iteration: 167942
loss: 0.9834952354431152,grad_norm: 0.966222179276659, iteration: 167943
loss: 0.9690837264060974,grad_norm: 0.9524913499770606, iteration: 167944
loss: 0.9843644499778748,grad_norm: 0.9999990847737406, iteration: 167945
loss: 1.0570228099822998,grad_norm: 0.9999999764598513, iteration: 167946
loss: 0.9883409142494202,grad_norm: 0.9076934126041181, iteration: 167947
loss: 0.9938806295394897,grad_norm: 0.8662422446622353, iteration: 167948
loss: 0.9779000282287598,grad_norm: 0.9999992311821909, iteration: 167949
loss: 1.0088211297988892,grad_norm: 0.999999253963295, iteration: 167950
loss: 1.22946035861969,grad_norm: 0.9999997075694518, iteration: 167951
loss: 0.9793811440467834,grad_norm: 0.8678892017394519, iteration: 167952
loss: 0.9805086851119995,grad_norm: 0.9350349149318853, iteration: 167953
loss: 1.0440397262573242,grad_norm: 0.9999990798263765, iteration: 167954
loss: 0.9887911081314087,grad_norm: 0.8811500655205398, iteration: 167955
loss: 0.9993208050727844,grad_norm: 0.9999991378075285, iteration: 167956
loss: 1.0649911165237427,grad_norm: 0.9999995744945003, iteration: 167957
loss: 0.977379322052002,grad_norm: 0.9999991468640363, iteration: 167958
loss: 1.0015736818313599,grad_norm: 0.9999990348175285, iteration: 167959
loss: 0.9877450466156006,grad_norm: 0.9999992203568357, iteration: 167960
loss: 1.0273559093475342,grad_norm: 0.9999996391027879, iteration: 167961
loss: 0.9925491809844971,grad_norm: 0.8359683535764706, iteration: 167962
loss: 0.9852496385574341,grad_norm: 0.9999991852098885, iteration: 167963
loss: 1.0035347938537598,grad_norm: 0.9999991465279177, iteration: 167964
loss: 1.0306288003921509,grad_norm: 1.0000000121097676, iteration: 167965
loss: 1.112936019897461,grad_norm: 0.9999993823887082, iteration: 167966
loss: 0.9611714482307434,grad_norm: 0.9999990038616485, iteration: 167967
loss: 0.9511108994483948,grad_norm: 0.9999991074001223, iteration: 167968
loss: 0.990795910358429,grad_norm: 0.8295214143404616, iteration: 167969
loss: 1.0176153182983398,grad_norm: 0.9999992083206827, iteration: 167970
loss: 1.0373408794403076,grad_norm: 0.9999995400279496, iteration: 167971
loss: 0.989224374294281,grad_norm: 0.9999989432746728, iteration: 167972
loss: 1.0117977857589722,grad_norm: 0.7236220714683875, iteration: 167973
loss: 0.9983204007148743,grad_norm: 0.9098352528599659, iteration: 167974
loss: 1.0253251791000366,grad_norm: 0.9999991993602613, iteration: 167975
loss: 0.9993805885314941,grad_norm: 0.9999992013471211, iteration: 167976
loss: 0.9851354360580444,grad_norm: 0.9999991266674947, iteration: 167977
loss: 1.0090076923370361,grad_norm: 0.9751837502165872, iteration: 167978
loss: 0.9765562415122986,grad_norm: 0.9999991496367843, iteration: 167979
loss: 0.9959402084350586,grad_norm: 0.9894256746464867, iteration: 167980
loss: 1.1274164915084839,grad_norm: 0.999999805209064, iteration: 167981
loss: 0.9525611996650696,grad_norm: 0.9999990988482987, iteration: 167982
loss: 0.9895403385162354,grad_norm: 0.9999990500725242, iteration: 167983
loss: 0.9899566173553467,grad_norm: 0.9319124608621032, iteration: 167984
loss: 0.9953646063804626,grad_norm: 0.9920684092550717, iteration: 167985
loss: 1.005465030670166,grad_norm: 0.9999995723837447, iteration: 167986
loss: 1.0178894996643066,grad_norm: 0.9999998988937028, iteration: 167987
loss: 1.0565078258514404,grad_norm: 0.99999922578006, iteration: 167988
loss: 1.011614441871643,grad_norm: 0.9999993280515942, iteration: 167989
loss: 1.0206220149993896,grad_norm: 0.999999224867749, iteration: 167990
loss: 1.0254372358322144,grad_norm: 0.9999992545169808, iteration: 167991
loss: 1.0117921829223633,grad_norm: 0.9999991805089199, iteration: 167992
loss: 1.0339329242706299,grad_norm: 0.9999992707339759, iteration: 167993
loss: 0.9657188057899475,grad_norm: 0.9359220791935187, iteration: 167994
loss: 1.0003471374511719,grad_norm: 0.9957631637169878, iteration: 167995
loss: 1.0184847116470337,grad_norm: 0.9785722012230615, iteration: 167996
loss: 0.9983012080192566,grad_norm: 0.9308010905985992, iteration: 167997
loss: 1.0586553812026978,grad_norm: 0.9999991219995525, iteration: 167998
loss: 0.9763158559799194,grad_norm: 0.9705627075192669, iteration: 167999
loss: 0.9933880567550659,grad_norm: 0.9999992689442588, iteration: 168000
loss: 1.0026894807815552,grad_norm: 0.9999990777357959, iteration: 168001
loss: 0.9830979704856873,grad_norm: 0.9975020207316114, iteration: 168002
loss: 1.035377860069275,grad_norm: 0.9999992775285915, iteration: 168003
loss: 1.0732965469360352,grad_norm: 0.9999992566257809, iteration: 168004
loss: 1.0057666301727295,grad_norm: 0.9746586782720977, iteration: 168005
loss: 1.0305825471878052,grad_norm: 0.9999991864469393, iteration: 168006
loss: 1.024836540222168,grad_norm: 0.9402633610555711, iteration: 168007
loss: 1.0121017694473267,grad_norm: 0.9999990360487199, iteration: 168008
loss: 1.052590250968933,grad_norm: 0.9999994436888112, iteration: 168009
loss: 1.0024285316467285,grad_norm: 0.9156431430826135, iteration: 168010
loss: 1.0612359046936035,grad_norm: 0.9868827406289051, iteration: 168011
loss: 1.0483733415603638,grad_norm: 0.9252999815076995, iteration: 168012
loss: 1.024818778038025,grad_norm: 0.999999225189481, iteration: 168013
loss: 0.9722108244895935,grad_norm: 0.9077176736358441, iteration: 168014
loss: 1.019254446029663,grad_norm: 0.9999990654085581, iteration: 168015
loss: 1.064097285270691,grad_norm: 0.9999995666052919, iteration: 168016
loss: 0.9960826635360718,grad_norm: 0.8813592950172258, iteration: 168017
loss: 0.9870008230209351,grad_norm: 0.9999991438371689, iteration: 168018
loss: 1.008527159690857,grad_norm: 0.9999991589729741, iteration: 168019
loss: 1.0203131437301636,grad_norm: 0.9993804060625128, iteration: 168020
loss: 0.9918038845062256,grad_norm: 0.874026919282558, iteration: 168021
loss: 0.9894136190414429,grad_norm: 0.9999990554907283, iteration: 168022
loss: 1.0366368293762207,grad_norm: 0.9999989965273582, iteration: 168023
loss: 1.0685173273086548,grad_norm: 0.9999994454772054, iteration: 168024
loss: 1.048903226852417,grad_norm: 0.9999993618726999, iteration: 168025
loss: 1.000092625617981,grad_norm: 0.9999994971032009, iteration: 168026
loss: 0.9983787536621094,grad_norm: 0.9432801554198978, iteration: 168027
loss: 0.960618257522583,grad_norm: 0.9512272476219895, iteration: 168028
loss: 1.0055509805679321,grad_norm: 0.9999991237069202, iteration: 168029
loss: 0.9860434532165527,grad_norm: 0.9274759658688898, iteration: 168030
loss: 1.003616452217102,grad_norm: 0.8790022710037401, iteration: 168031
loss: 0.9603187441825867,grad_norm: 0.991561483012759, iteration: 168032
loss: 0.9977128505706787,grad_norm: 0.8787784826090126, iteration: 168033
loss: 1.045025110244751,grad_norm: 0.9999998391784256, iteration: 168034
loss: 0.988295316696167,grad_norm: 0.9980433305041847, iteration: 168035
loss: 1.0220892429351807,grad_norm: 0.9999992855641191, iteration: 168036
loss: 0.9954233169555664,grad_norm: 0.9999992530484871, iteration: 168037
loss: 1.0481681823730469,grad_norm: 0.9999999686933595, iteration: 168038
loss: 0.9895548224449158,grad_norm: 0.9748726552031481, iteration: 168039
loss: 1.0009362697601318,grad_norm: 0.9213034655283487, iteration: 168040
loss: 0.9934934377670288,grad_norm: 0.9999991672324675, iteration: 168041
loss: 0.9898894429206848,grad_norm: 0.9999993681717018, iteration: 168042
loss: 1.0486931800842285,grad_norm: 0.9999992985160496, iteration: 168043
loss: 0.983300507068634,grad_norm: 0.7832187443350968, iteration: 168044
loss: 0.9736350178718567,grad_norm: 0.8308208865135664, iteration: 168045
loss: 1.040570855140686,grad_norm: 0.9999993518722442, iteration: 168046
loss: 0.9884052276611328,grad_norm: 0.8533163812234387, iteration: 168047
loss: 0.9963747262954712,grad_norm: 0.9999991127693294, iteration: 168048
loss: 1.0359715223312378,grad_norm: 0.9999990510061878, iteration: 168049
loss: 1.0367867946624756,grad_norm: 0.9999991866145078, iteration: 168050
loss: 0.975918173789978,grad_norm: 0.9999989273376516, iteration: 168051
loss: 0.997246503829956,grad_norm: 0.9999990599196387, iteration: 168052
loss: 0.9882891178131104,grad_norm: 0.9999991014505984, iteration: 168053
loss: 0.99437016248703,grad_norm: 0.8551042794867036, iteration: 168054
loss: 1.0134594440460205,grad_norm: 0.9999996582473103, iteration: 168055
loss: 1.032971739768982,grad_norm: 0.9999990009071569, iteration: 168056
loss: 0.9930109977722168,grad_norm: 0.8358030431445602, iteration: 168057
loss: 0.990847647190094,grad_norm: 0.9999993720565779, iteration: 168058
loss: 0.9831250905990601,grad_norm: 0.9999990248973569, iteration: 168059
loss: 0.9856539368629456,grad_norm: 0.9246732162609425, iteration: 168060
loss: 1.0358508825302124,grad_norm: 0.9372930592804511, iteration: 168061
loss: 1.015041470527649,grad_norm: 0.8761554357751837, iteration: 168062
loss: 1.0139087438583374,grad_norm: 0.9999991154257075, iteration: 168063
loss: 0.9633626937866211,grad_norm: 0.9999991045444899, iteration: 168064
loss: 1.0025147199630737,grad_norm: 0.9999990818205865, iteration: 168065
loss: 1.019320011138916,grad_norm: 0.9359836956796063, iteration: 168066
loss: 0.9721863865852356,grad_norm: 0.9999990409048484, iteration: 168067
loss: 1.0194981098175049,grad_norm: 0.9999990255490729, iteration: 168068
loss: 0.9524617791175842,grad_norm: 0.9999989815240574, iteration: 168069
loss: 1.0314958095550537,grad_norm: 0.999999228707883, iteration: 168070
loss: 0.9810231328010559,grad_norm: 0.9999990849917899, iteration: 168071
loss: 1.0112569332122803,grad_norm: 0.9999991817768366, iteration: 168072
loss: 0.9709315896034241,grad_norm: 0.9203188413067265, iteration: 168073
loss: 0.9969721436500549,grad_norm: 0.9999989991725128, iteration: 168074
loss: 0.9926689863204956,grad_norm: 0.9603598205356834, iteration: 168075
loss: 1.0148683786392212,grad_norm: 0.9999992679351918, iteration: 168076
loss: 0.990682065486908,grad_norm: 0.8496904015551776, iteration: 168077
loss: 1.0007244348526,grad_norm: 0.9067459006025839, iteration: 168078
loss: 1.0010254383087158,grad_norm: 0.9999990768588052, iteration: 168079
loss: 0.9815419912338257,grad_norm: 0.9999998605247669, iteration: 168080
loss: 0.9945043921470642,grad_norm: 0.9999990295374971, iteration: 168081
loss: 0.9812272191047668,grad_norm: 0.9999998606283115, iteration: 168082
loss: 1.0197802782058716,grad_norm: 0.9470567738254267, iteration: 168083
loss: 0.9925194382667542,grad_norm: 0.9999990570430168, iteration: 168084
loss: 1.0091426372528076,grad_norm: 0.9778122481906985, iteration: 168085
loss: 1.030151605606079,grad_norm: 0.9411882917392574, iteration: 168086
loss: 1.0137227773666382,grad_norm: 0.9977940787831789, iteration: 168087
loss: 1.05299973487854,grad_norm: 0.9999994594375756, iteration: 168088
loss: 1.0207099914550781,grad_norm: 0.9999992614369957, iteration: 168089
loss: 1.083472490310669,grad_norm: 0.9999998664611162, iteration: 168090
loss: 0.9892656207084656,grad_norm: 0.9999992544817322, iteration: 168091
loss: 1.0020941495895386,grad_norm: 0.9240290935178445, iteration: 168092
loss: 1.0324409008026123,grad_norm: 0.9999998917950822, iteration: 168093
loss: 0.9751518964767456,grad_norm: 0.9999995571807301, iteration: 168094
loss: 0.9865797162055969,grad_norm: 0.9753059117324401, iteration: 168095
loss: 1.0237388610839844,grad_norm: 0.9999991828056738, iteration: 168096
loss: 1.003719449043274,grad_norm: 0.9922086694207578, iteration: 168097
loss: 1.0124629735946655,grad_norm: 0.9999991604544589, iteration: 168098
loss: 1.107473373413086,grad_norm: 0.9999991379627721, iteration: 168099
loss: 1.0523431301116943,grad_norm: 0.9999990985261706, iteration: 168100
loss: 0.9754202961921692,grad_norm: 0.9999990958184444, iteration: 168101
loss: 1.0003703832626343,grad_norm: 0.9999990168173013, iteration: 168102
loss: 0.9883936643600464,grad_norm: 0.9424180907466874, iteration: 168103
loss: 1.0058804750442505,grad_norm: 0.9999991495186633, iteration: 168104
loss: 0.994080126285553,grad_norm: 0.9999990641888532, iteration: 168105
loss: 1.0135570764541626,grad_norm: 0.8412087803272738, iteration: 168106
loss: 1.010904312133789,grad_norm: 0.8331739637230825, iteration: 168107
loss: 1.0155783891677856,grad_norm: 0.8644985679236475, iteration: 168108
loss: 0.9525827169418335,grad_norm: 0.9999991571894913, iteration: 168109
loss: 0.9740738868713379,grad_norm: 0.9999991488070591, iteration: 168110
loss: 0.9907048940658569,grad_norm: 0.9664945824083564, iteration: 168111
loss: 1.1009324789047241,grad_norm: 0.9999998325503134, iteration: 168112
loss: 0.9907196164131165,grad_norm: 0.9999989799382119, iteration: 168113
loss: 1.0201945304870605,grad_norm: 0.9999993541446797, iteration: 168114
loss: 1.0153425931930542,grad_norm: 0.9999999810731158, iteration: 168115
loss: 1.007686734199524,grad_norm: 0.9999991964616325, iteration: 168116
loss: 0.9856956601142883,grad_norm: 0.999998985980986, iteration: 168117
loss: 1.0413602590560913,grad_norm: 0.9999992107676161, iteration: 168118
loss: 0.9951076507568359,grad_norm: 0.999999099628411, iteration: 168119
loss: 0.9704452157020569,grad_norm: 0.9186255360117599, iteration: 168120
loss: 1.0290248394012451,grad_norm: 0.8305799826989173, iteration: 168121
loss: 1.0131940841674805,grad_norm: 0.8783199001021066, iteration: 168122
loss: 0.9763702750205994,grad_norm: 0.9999991841679945, iteration: 168123
loss: 0.9954711198806763,grad_norm: 0.9987230959023449, iteration: 168124
loss: 1.0026222467422485,grad_norm: 0.9999990665125484, iteration: 168125
loss: 0.9986861348152161,grad_norm: 0.999999070221982, iteration: 168126
loss: 0.9856022596359253,grad_norm: 0.9999991289133725, iteration: 168127
loss: 1.0539796352386475,grad_norm: 0.999999025854177, iteration: 168128
loss: 0.9911484122276306,grad_norm: 0.999998965831851, iteration: 168129
loss: 1.0279138088226318,grad_norm: 0.9999990901188716, iteration: 168130
loss: 1.2618212699890137,grad_norm: 0.9999995424718404, iteration: 168131
loss: 1.0257173776626587,grad_norm: 0.9999989931747665, iteration: 168132
loss: 1.0068848133087158,grad_norm: 0.8531942426947512, iteration: 168133
loss: 1.0196694135665894,grad_norm: 0.9999989043646332, iteration: 168134
loss: 1.147660732269287,grad_norm: 0.999999218099741, iteration: 168135
loss: 1.0037909746170044,grad_norm: 0.9661604994162953, iteration: 168136
loss: 1.012757420539856,grad_norm: 0.9682227538850818, iteration: 168137
loss: 0.9946389198303223,grad_norm: 0.9587909808354258, iteration: 168138
loss: 1.0002238750457764,grad_norm: 0.9889143660229239, iteration: 168139
loss: 0.9640524983406067,grad_norm: 0.9474909097401691, iteration: 168140
loss: 1.0002961158752441,grad_norm: 0.7569516401075324, iteration: 168141
loss: 0.9950453042984009,grad_norm: 0.9999993111271085, iteration: 168142
loss: 0.9462047815322876,grad_norm: 0.9999990606762554, iteration: 168143
loss: 0.9988880157470703,grad_norm: 0.9999993755697734, iteration: 168144
loss: 0.9505531191825867,grad_norm: 0.9999990418141244, iteration: 168145
loss: 0.9813662767410278,grad_norm: 0.9999991072039008, iteration: 168146
loss: 1.055029273033142,grad_norm: 0.9999994093653618, iteration: 168147
loss: 0.9789002537727356,grad_norm: 0.9597023084442093, iteration: 168148
loss: 1.0125690698623657,grad_norm: 0.9047070702478323, iteration: 168149
loss: 1.0330872535705566,grad_norm: 0.871715932075344, iteration: 168150
loss: 0.9901756048202515,grad_norm: 0.9999992672205538, iteration: 168151
loss: 0.988167941570282,grad_norm: 0.9999989916824796, iteration: 168152
loss: 1.0313218832015991,grad_norm: 0.9999994144637503, iteration: 168153
loss: 0.9781757593154907,grad_norm: 0.8231200009221836, iteration: 168154
loss: 1.0359485149383545,grad_norm: 0.9999995810440759, iteration: 168155
loss: 1.0163934230804443,grad_norm: 0.9999991711493948, iteration: 168156
loss: 0.9724612236022949,grad_norm: 0.9999991998889274, iteration: 168157
loss: 1.0184078216552734,grad_norm: 0.807574539672871, iteration: 168158
loss: 1.0187650918960571,grad_norm: 0.8294775538000414, iteration: 168159
loss: 0.9843765497207642,grad_norm: 0.9999991308050828, iteration: 168160
loss: 1.0388679504394531,grad_norm: 0.9999991423829259, iteration: 168161
loss: 0.9809273481369019,grad_norm: 0.9999991689171409, iteration: 168162
loss: 0.9977507591247559,grad_norm: 0.9031631129468605, iteration: 168163
loss: 1.0150954723358154,grad_norm: 0.9178484036491255, iteration: 168164
loss: 1.020303726196289,grad_norm: 0.9674486933340816, iteration: 168165
loss: 0.9959518909454346,grad_norm: 0.9164817955772596, iteration: 168166
loss: 1.1705858707427979,grad_norm: 0.9999996673104157, iteration: 168167
loss: 0.9653772711753845,grad_norm: 0.9495413365702993, iteration: 168168
loss: 0.9952907562255859,grad_norm: 0.9999991768339896, iteration: 168169
loss: 1.012656331062317,grad_norm: 0.9836877908851409, iteration: 168170
loss: 1.0267938375473022,grad_norm: 0.9999991399156882, iteration: 168171
loss: 0.9740950465202332,grad_norm: 0.9249274367804595, iteration: 168172
loss: 0.9694544672966003,grad_norm: 0.9444498266893286, iteration: 168173
loss: 0.9929115772247314,grad_norm: 0.9999992430016296, iteration: 168174
loss: 1.00844407081604,grad_norm: 0.8355527336149681, iteration: 168175
loss: 1.0119211673736572,grad_norm: 0.9884562390298068, iteration: 168176
loss: 0.99668288230896,grad_norm: 0.9238719691753681, iteration: 168177
loss: 1.087205410003662,grad_norm: 0.9999997757058356, iteration: 168178
loss: 0.987606406211853,grad_norm: 0.9999991046630278, iteration: 168179
loss: 1.2004859447479248,grad_norm: 0.9999991920996231, iteration: 168180
loss: 0.993331253528595,grad_norm: 0.999999035836924, iteration: 168181
loss: 0.9986863732337952,grad_norm: 0.851199276784799, iteration: 168182
loss: 1.0134902000427246,grad_norm: 0.999999261182809, iteration: 168183
loss: 0.9681633114814758,grad_norm: 0.9867576333190443, iteration: 168184
loss: 0.9928443431854248,grad_norm: 0.941182603423739, iteration: 168185
loss: 0.9729112386703491,grad_norm: 0.9985607038955979, iteration: 168186
loss: 1.002066731452942,grad_norm: 0.9999990719201666, iteration: 168187
loss: 0.9803794622421265,grad_norm: 0.9435303680114525, iteration: 168188
loss: 1.0007184743881226,grad_norm: 0.9484977386983724, iteration: 168189
loss: 1.0396430492401123,grad_norm: 0.9999997660012822, iteration: 168190
loss: 1.0369484424591064,grad_norm: 0.9999990761169899, iteration: 168191
loss: 1.0429631471633911,grad_norm: 0.9999989272136294, iteration: 168192
loss: 0.999567985534668,grad_norm: 0.8666141225053933, iteration: 168193
loss: 0.9823043942451477,grad_norm: 0.8107131303825591, iteration: 168194
loss: 1.0056300163269043,grad_norm: 0.9769737368122097, iteration: 168195
loss: 1.0103272199630737,grad_norm: 0.9999989977336231, iteration: 168196
loss: 0.992705762386322,grad_norm: 0.9836486505769142, iteration: 168197
loss: 0.9630917310714722,grad_norm: 0.8095650047016032, iteration: 168198
loss: 1.0072237253189087,grad_norm: 0.8996074869808502, iteration: 168199
loss: 0.9808467030525208,grad_norm: 0.9845408042673971, iteration: 168200
loss: 0.9997395277023315,grad_norm: 0.948120696123758, iteration: 168201
loss: 0.993504524230957,grad_norm: 0.9397778598806945, iteration: 168202
loss: 1.0144689083099365,grad_norm: 0.9054844904538593, iteration: 168203
loss: 0.9740535616874695,grad_norm: 0.9821554620556463, iteration: 168204
loss: 0.9736050963401794,grad_norm: 0.8472426280384924, iteration: 168205
loss: 1.0096712112426758,grad_norm: 0.9999989729682278, iteration: 168206
loss: 1.0226997137069702,grad_norm: 0.9999994759682213, iteration: 168207
loss: 0.9888315796852112,grad_norm: 0.8831124348844147, iteration: 168208
loss: 0.9778656363487244,grad_norm: 0.9999990999019778, iteration: 168209
loss: 1.0253183841705322,grad_norm: 0.9225741997017486, iteration: 168210
loss: 1.021597981452942,grad_norm: 0.9999990157380559, iteration: 168211
loss: 0.9765589237213135,grad_norm: 0.9999992189859114, iteration: 168212
loss: 1.0159287452697754,grad_norm: 0.9999991633323837, iteration: 168213
loss: 1.0187082290649414,grad_norm: 0.9999990503644025, iteration: 168214
loss: 0.983553409576416,grad_norm: 0.8911983098745899, iteration: 168215
loss: 1.0070282220840454,grad_norm: 0.9999992136213328, iteration: 168216
loss: 1.0012850761413574,grad_norm: 0.8763122464415531, iteration: 168217
loss: 1.0122183561325073,grad_norm: 0.8986575769796417, iteration: 168218
loss: 0.944765567779541,grad_norm: 0.835720088135762, iteration: 168219
loss: 1.2191684246063232,grad_norm: 0.9999995950616755, iteration: 168220
loss: 1.0377098321914673,grad_norm: 0.9999990837768629, iteration: 168221
loss: 1.0135856866836548,grad_norm: 0.9999990627365437, iteration: 168222
loss: 1.0464966297149658,grad_norm: 0.9999991016399576, iteration: 168223
loss: 0.9934810996055603,grad_norm: 0.999999195494483, iteration: 168224
loss: 1.0796574354171753,grad_norm: 0.9999994608534549, iteration: 168225
loss: 1.0061683654785156,grad_norm: 0.9999991362359112, iteration: 168226
loss: 1.0118982791900635,grad_norm: 0.8893347266611414, iteration: 168227
loss: 1.006046175956726,grad_norm: 0.8531393147208886, iteration: 168228
loss: 0.9930542707443237,grad_norm: 0.9999990272241338, iteration: 168229
loss: 0.9989129900932312,grad_norm: 0.9999992792638888, iteration: 168230
loss: 1.0078006982803345,grad_norm: 0.9288561960779784, iteration: 168231
loss: 0.9762381315231323,grad_norm: 0.9470034260074492, iteration: 168232
loss: 0.9875418543815613,grad_norm: 0.9349865753299138, iteration: 168233
loss: 1.0183422565460205,grad_norm: 0.894596886710309, iteration: 168234
loss: 1.0134634971618652,grad_norm: 0.8591481078599169, iteration: 168235
loss: 1.2627570629119873,grad_norm: 0.9999997295061475, iteration: 168236
loss: 1.026846170425415,grad_norm: 0.9999993353763966, iteration: 168237
loss: 0.9997135400772095,grad_norm: 0.8850335822925551, iteration: 168238
loss: 1.008483648300171,grad_norm: 0.9999992396969477, iteration: 168239
loss: 1.0185316801071167,grad_norm: 0.9999990338521954, iteration: 168240
loss: 1.0214145183563232,grad_norm: 0.9999992955932222, iteration: 168241
loss: 0.9732279777526855,grad_norm: 0.8462497683081003, iteration: 168242
loss: 1.003048300743103,grad_norm: 0.9999993908249085, iteration: 168243
loss: 1.0052987337112427,grad_norm: 0.9999989660182587, iteration: 168244
loss: 1.0352782011032104,grad_norm: 0.9999990138150784, iteration: 168245
loss: 0.9960052371025085,grad_norm: 0.9999991447364431, iteration: 168246
loss: 0.9755703210830688,grad_norm: 0.9999991607191486, iteration: 168247
loss: 1.013433575630188,grad_norm: 0.9959948612674298, iteration: 168248
loss: 1.0049304962158203,grad_norm: 0.8947640731683076, iteration: 168249
loss: 1.0213422775268555,grad_norm: 0.9328335515557886, iteration: 168250
loss: 1.019472599029541,grad_norm: 0.9999990412177577, iteration: 168251
loss: 0.9999408721923828,grad_norm: 0.999999219588491, iteration: 168252
loss: 0.9917437434196472,grad_norm: 0.984443489722514, iteration: 168253
loss: 0.9691118597984314,grad_norm: 0.9999991150959384, iteration: 168254
loss: 0.9963223338127136,grad_norm: 0.9999996094993271, iteration: 168255
loss: 0.9951526522636414,grad_norm: 0.9999991308314298, iteration: 168256
loss: 0.9932887554168701,grad_norm: 0.999999133969735, iteration: 168257
loss: 1.0092387199401855,grad_norm: 0.9999991814966029, iteration: 168258
loss: 1.0087229013442993,grad_norm: 0.9999990948578498, iteration: 168259
loss: 0.9667158722877502,grad_norm: 0.9955099700550155, iteration: 168260
loss: 0.9975588917732239,grad_norm: 0.9999993667294355, iteration: 168261
loss: 1.0172674655914307,grad_norm: 0.9999992513457687, iteration: 168262
loss: 1.010231614112854,grad_norm: 0.8153585315449527, iteration: 168263
loss: 0.9997584819793701,grad_norm: 0.8700416321247815, iteration: 168264
loss: 0.9991629719734192,grad_norm: 0.8744461172450352, iteration: 168265
loss: 1.037146806716919,grad_norm: 0.9999991250849514, iteration: 168266
loss: 1.0123203992843628,grad_norm: 0.9999989531614643, iteration: 168267
loss: 0.9577422738075256,grad_norm: 0.8521825978693441, iteration: 168268
loss: 1.0089402198791504,grad_norm: 0.999999138658737, iteration: 168269
loss: 0.9747873544692993,grad_norm: 0.9999991434358555, iteration: 168270
loss: 0.9682897329330444,grad_norm: 0.9398624597528398, iteration: 168271
loss: 0.9866434335708618,grad_norm: 0.9422412208880864, iteration: 168272
loss: 0.9881443977355957,grad_norm: 0.9805627021505715, iteration: 168273
loss: 0.9905532002449036,grad_norm: 0.999999057398604, iteration: 168274
loss: 0.9925479888916016,grad_norm: 0.886092530800947, iteration: 168275
loss: 0.9900449514389038,grad_norm: 0.8447493902805889, iteration: 168276
loss: 0.9997640252113342,grad_norm: 0.9932055708919005, iteration: 168277
loss: 1.0140407085418701,grad_norm: 0.999999256459414, iteration: 168278
loss: 1.0627143383026123,grad_norm: 0.9999995667468714, iteration: 168279
loss: 1.0413217544555664,grad_norm: 0.9508270395701702, iteration: 168280
loss: 0.9653367400169373,grad_norm: 0.9060390408979184, iteration: 168281
loss: 1.029309630393982,grad_norm: 0.9999991594715569, iteration: 168282
loss: 0.9648557305335999,grad_norm: 0.9999990177776958, iteration: 168283
loss: 1.0192042589187622,grad_norm: 0.9529806347645594, iteration: 168284
loss: 1.2053951025009155,grad_norm: 0.9999999847042065, iteration: 168285
loss: 1.0016781091690063,grad_norm: 0.9999990464909051, iteration: 168286
loss: 1.0059807300567627,grad_norm: 0.999999123804203, iteration: 168287
loss: 1.0302486419677734,grad_norm: 0.9999992340691065, iteration: 168288
loss: 0.9875597953796387,grad_norm: 0.9141308089443906, iteration: 168289
loss: 1.0215762853622437,grad_norm: 0.8266934753942257, iteration: 168290
loss: 1.015726923942566,grad_norm: 0.8579091006940067, iteration: 168291
loss: 1.0103421211242676,grad_norm: 0.9999999475978493, iteration: 168292
loss: 1.0018336772918701,grad_norm: 0.9883710830091754, iteration: 168293
loss: 0.9900203943252563,grad_norm: 0.9999990747382642, iteration: 168294
loss: 0.9990568161010742,grad_norm: 0.999999253974488, iteration: 168295
loss: 0.9820411205291748,grad_norm: 0.9389999662902393, iteration: 168296
loss: 0.9861278533935547,grad_norm: 0.9999991317608365, iteration: 168297
loss: 0.9877200722694397,grad_norm: 0.9999990806306168, iteration: 168298
loss: 1.0210719108581543,grad_norm: 0.9999991536763367, iteration: 168299
loss: 1.0042473077774048,grad_norm: 0.9081880850407004, iteration: 168300
loss: 1.016404390335083,grad_norm: 0.9999992439675067, iteration: 168301
loss: 0.9874633550643921,grad_norm: 0.9799646159819654, iteration: 168302
loss: 0.9876276850700378,grad_norm: 0.8275789271728008, iteration: 168303
loss: 1.0075325965881348,grad_norm: 0.9999994320947547, iteration: 168304
loss: 0.9821319580078125,grad_norm: 0.8705873004761682, iteration: 168305
loss: 1.1567082405090332,grad_norm: 0.9999997024890693, iteration: 168306
loss: 1.0323840379714966,grad_norm: 0.9586913139811132, iteration: 168307
loss: 1.0089281797409058,grad_norm: 0.9308106515895177, iteration: 168308
loss: 1.0038387775421143,grad_norm: 0.9999990141952145, iteration: 168309
loss: 1.006559133529663,grad_norm: 0.9999991368414038, iteration: 168310
loss: 1.1460779905319214,grad_norm: 0.9999997764527475, iteration: 168311
loss: 0.9847805500030518,grad_norm: 0.9999989722028377, iteration: 168312
loss: 1.0026991367340088,grad_norm: 0.9999991481944708, iteration: 168313
loss: 1.02140212059021,grad_norm: 0.9999990823981655, iteration: 168314
loss: 1.1429091691970825,grad_norm: 0.9999999571434603, iteration: 168315
loss: 1.0280370712280273,grad_norm: 0.8897648127347526, iteration: 168316
loss: 1.0138661861419678,grad_norm: 0.9999990528809605, iteration: 168317
loss: 1.0192610025405884,grad_norm: 0.9999991334210396, iteration: 168318
loss: 1.0845459699630737,grad_norm: 0.9999996245782549, iteration: 168319
loss: 1.0468181371688843,grad_norm: 0.9632483846304541, iteration: 168320
loss: 0.9737377166748047,grad_norm: 0.9720225865684389, iteration: 168321
loss: 1.0175182819366455,grad_norm: 0.9999992156630602, iteration: 168322
loss: 1.0306665897369385,grad_norm: 0.9999991417387335, iteration: 168323
loss: 1.0219041109085083,grad_norm: 0.9999991380835788, iteration: 168324
loss: 1.1611047983169556,grad_norm: 0.9999991099417248, iteration: 168325
loss: 1.0114699602127075,grad_norm: 0.9027156337407127, iteration: 168326
loss: 1.0232396125793457,grad_norm: 0.9999991614795264, iteration: 168327
loss: 0.9742752313613892,grad_norm: 0.9173003603947544, iteration: 168328
loss: 1.0396974086761475,grad_norm: 0.9999993374962421, iteration: 168329
loss: 0.992290735244751,grad_norm: 0.9701535957062776, iteration: 168330
loss: 1.011605143547058,grad_norm: 0.9971177252684079, iteration: 168331
loss: 0.9928492903709412,grad_norm: 0.9767175902645583, iteration: 168332
loss: 0.988451361656189,grad_norm: 0.9999991474367784, iteration: 168333
loss: 0.985065758228302,grad_norm: 0.9041217291338182, iteration: 168334
loss: 1.0485632419586182,grad_norm: 0.9735818360899139, iteration: 168335
loss: 0.9823375940322876,grad_norm: 0.8911744907648279, iteration: 168336
loss: 0.9749366044998169,grad_norm: 0.9351612271895462, iteration: 168337
loss: 0.9740692377090454,grad_norm: 0.9999989379448476, iteration: 168338
loss: 1.0205843448638916,grad_norm: 0.9360309339454218, iteration: 168339
loss: 0.99737948179245,grad_norm: 0.999999142845818, iteration: 168340
loss: 1.0122462511062622,grad_norm: 0.991875855982452, iteration: 168341
loss: 0.9864025115966797,grad_norm: 0.8861444235208864, iteration: 168342
loss: 0.9977114796638489,grad_norm: 0.999999068302345, iteration: 168343
loss: 1.0176507234573364,grad_norm: 0.975136807609693, iteration: 168344
loss: 0.9706395268440247,grad_norm: 0.9927900747021795, iteration: 168345
loss: 1.0015391111373901,grad_norm: 0.8933583643529702, iteration: 168346
loss: 0.9851285219192505,grad_norm: 0.9999992113145103, iteration: 168347
loss: 1.007533073425293,grad_norm: 0.8395055756597731, iteration: 168348
loss: 0.9891380667686462,grad_norm: 0.7915179814731926, iteration: 168349
loss: 1.0271919965744019,grad_norm: 0.9858629527964533, iteration: 168350
loss: 0.9786397814750671,grad_norm: 0.8700543112877571, iteration: 168351
loss: 0.9946368932723999,grad_norm: 0.9120403762303697, iteration: 168352
loss: 0.9909627437591553,grad_norm: 0.9832389449741933, iteration: 168353
loss: 0.9943246841430664,grad_norm: 0.9045173256727181, iteration: 168354
loss: 1.0107696056365967,grad_norm: 0.9233934511793005, iteration: 168355
loss: 0.997779369354248,grad_norm: 0.930479117857501, iteration: 168356
loss: 0.9951343536376953,grad_norm: 0.9183477421974805, iteration: 168357
loss: 1.014723539352417,grad_norm: 0.9457741148603214, iteration: 168358
loss: 1.0077309608459473,grad_norm: 0.9678771195505886, iteration: 168359
loss: 0.9903907775878906,grad_norm: 0.8507223252305434, iteration: 168360
loss: 1.067664384841919,grad_norm: 0.9999994298291022, iteration: 168361
loss: 0.9952582716941833,grad_norm: 0.8951416645327919, iteration: 168362
loss: 0.9667263031005859,grad_norm: 0.7934936987841913, iteration: 168363
loss: 0.959672749042511,grad_norm: 0.8507622614011874, iteration: 168364
loss: 1.0104002952575684,grad_norm: 0.999999240577836, iteration: 168365
loss: 0.9663569927215576,grad_norm: 0.9379221708834934, iteration: 168366
loss: 0.9858531951904297,grad_norm: 0.8835067047036628, iteration: 168367
loss: 0.9925327301025391,grad_norm: 0.9999992009662347, iteration: 168368
loss: 1.0126047134399414,grad_norm: 0.9696751274509232, iteration: 168369
loss: 1.009403944015503,grad_norm: 0.9999990674609884, iteration: 168370
loss: 1.012580394744873,grad_norm: 0.9999991700847978, iteration: 168371
loss: 1.0179189443588257,grad_norm: 0.9999999174842303, iteration: 168372
loss: 0.9829354286193848,grad_norm: 0.9999990180221245, iteration: 168373
loss: 1.0188701152801514,grad_norm: 0.990961652793699, iteration: 168374
loss: 0.978564441204071,grad_norm: 0.9999992266750397, iteration: 168375
loss: 1.0249289274215698,grad_norm: 0.9999992240709167, iteration: 168376
loss: 1.0407369136810303,grad_norm: 0.900087934372163, iteration: 168377
loss: 0.9825018644332886,grad_norm: 0.9578704806952743, iteration: 168378
loss: 0.9701655507087708,grad_norm: 0.9971756714198903, iteration: 168379
loss: 0.9810208678245544,grad_norm: 0.9414989922358818, iteration: 168380
loss: 1.0159345865249634,grad_norm: 0.8425144097154444, iteration: 168381
loss: 0.986222505569458,grad_norm: 0.9089108359625941, iteration: 168382
loss: 0.9962954521179199,grad_norm: 0.8821447710707353, iteration: 168383
loss: 1.0315015316009521,grad_norm: 0.9999991270548413, iteration: 168384
loss: 1.0147991180419922,grad_norm: 0.9999991519003723, iteration: 168385
loss: 1.0399250984191895,grad_norm: 0.8791764064742839, iteration: 168386
loss: 1.0043656826019287,grad_norm: 0.9999991841440957, iteration: 168387
loss: 0.9929595589637756,grad_norm: 0.9999991078770926, iteration: 168388
loss: 1.0127004384994507,grad_norm: 0.9999991403163045, iteration: 168389
loss: 1.0098947286605835,grad_norm: 0.9999992843674232, iteration: 168390
loss: 1.0219807624816895,grad_norm: 0.8813030496455496, iteration: 168391
loss: 1.021384596824646,grad_norm: 0.9999997837217934, iteration: 168392
loss: 0.9942452907562256,grad_norm: 0.8485118209525907, iteration: 168393
loss: 0.9895501136779785,grad_norm: 0.9999992588320922, iteration: 168394
loss: 0.9971394538879395,grad_norm: 0.9999991410300791, iteration: 168395
loss: 1.0022938251495361,grad_norm: 0.9487497419107771, iteration: 168396
loss: 0.9712772965431213,grad_norm: 0.8637130749185582, iteration: 168397
loss: 0.980118453502655,grad_norm: 0.9669852746755172, iteration: 168398
loss: 0.9865971803665161,grad_norm: 0.9999990570620171, iteration: 168399
loss: 1.021366834640503,grad_norm: 0.9179677920027404, iteration: 168400
loss: 1.0273698568344116,grad_norm: 0.996884098608529, iteration: 168401
loss: 0.9807813763618469,grad_norm: 0.9886695101334458, iteration: 168402
loss: 0.9821494221687317,grad_norm: 0.99999971837152, iteration: 168403
loss: 0.9658647179603577,grad_norm: 0.9999990609637311, iteration: 168404
loss: 1.0003517866134644,grad_norm: 0.9458444940771253, iteration: 168405
loss: 1.008270502090454,grad_norm: 0.999999277009483, iteration: 168406
loss: 1.0208145380020142,grad_norm: 0.826914979239298, iteration: 168407
loss: 1.0095856189727783,grad_norm: 0.9999989677364199, iteration: 168408
loss: 0.9872416257858276,grad_norm: 0.9999992043368333, iteration: 168409
loss: 0.9659303426742554,grad_norm: 0.9999992045174362, iteration: 168410
loss: 0.9740922451019287,grad_norm: 0.9999992367756112, iteration: 168411
loss: 1.0267783403396606,grad_norm: 0.9999995595735683, iteration: 168412
loss: 1.0034525394439697,grad_norm: 0.9999990928352492, iteration: 168413
loss: 1.0007439851760864,grad_norm: 0.9999991322985959, iteration: 168414
loss: 1.009643316268921,grad_norm: 0.8800918642734594, iteration: 168415
loss: 1.0115458965301514,grad_norm: 0.9344569031984591, iteration: 168416
loss: 1.0097318887710571,grad_norm: 0.8427552818201186, iteration: 168417
loss: 1.043318271636963,grad_norm: 0.831690362083383, iteration: 168418
loss: 0.9663975238800049,grad_norm: 0.9999991586280194, iteration: 168419
loss: 0.9701196551322937,grad_norm: 0.9517071327850604, iteration: 168420
loss: 0.9987081289291382,grad_norm: 0.8840496910492577, iteration: 168421
loss: 0.9779666662216187,grad_norm: 0.8594484759014364, iteration: 168422
loss: 0.99369877576828,grad_norm: 0.9999990575072718, iteration: 168423
loss: 0.9595149755477905,grad_norm: 0.870203597330443, iteration: 168424
loss: 0.9957275390625,grad_norm: 0.9676023189336227, iteration: 168425
loss: 0.9958367347717285,grad_norm: 0.976764218361039, iteration: 168426
loss: 0.9742124676704407,grad_norm: 0.9999991464654404, iteration: 168427
loss: 0.9849587678909302,grad_norm: 0.9999992175012337, iteration: 168428
loss: 1.0068925619125366,grad_norm: 0.9999991910169069, iteration: 168429
loss: 1.049485445022583,grad_norm: 0.9999994483472738, iteration: 168430
loss: 0.9921643137931824,grad_norm: 0.9999991462634219, iteration: 168431
loss: 1.0033457279205322,grad_norm: 0.9999990742040686, iteration: 168432
loss: 0.9948498606681824,grad_norm: 0.999999172091363, iteration: 168433
loss: 0.9986341595649719,grad_norm: 0.9808207022685221, iteration: 168434
loss: 1.0177037715911865,grad_norm: 0.9999990729305559, iteration: 168435
loss: 0.9851934313774109,grad_norm: 0.9286042720693618, iteration: 168436
loss: 1.0100816488265991,grad_norm: 0.9957467578415709, iteration: 168437
loss: 1.0909608602523804,grad_norm: 0.9999997946831277, iteration: 168438
loss: 1.0223729610443115,grad_norm: 0.9999992855540232, iteration: 168439
loss: 1.0022315979003906,grad_norm: 0.9999989777900641, iteration: 168440
loss: 0.9673188328742981,grad_norm: 0.902847700320122, iteration: 168441
loss: 1.0535277128219604,grad_norm: 0.9999999171894838, iteration: 168442
loss: 1.034353494644165,grad_norm: 0.9999991234844202, iteration: 168443
loss: 1.014471411705017,grad_norm: 0.9999993362732947, iteration: 168444
loss: 1.014451026916504,grad_norm: 0.99999972670211, iteration: 168445
loss: 0.9448640942573547,grad_norm: 0.9956162646801041, iteration: 168446
loss: 1.0178618431091309,grad_norm: 0.9999998174229966, iteration: 168447
loss: 1.03182852268219,grad_norm: 0.978898279187649, iteration: 168448
loss: 1.0139449834823608,grad_norm: 0.9999991129571413, iteration: 168449
loss: 0.9520448446273804,grad_norm: 0.9999992834960469, iteration: 168450
loss: 0.9840779304504395,grad_norm: 0.9250960293823787, iteration: 168451
loss: 0.9978853464126587,grad_norm: 0.9999997311373153, iteration: 168452
loss: 1.091369867324829,grad_norm: 0.9999995806653529, iteration: 168453
loss: 0.9985969662666321,grad_norm: 0.9483502505231016, iteration: 168454
loss: 0.9738708734512329,grad_norm: 0.999999005571428, iteration: 168455
loss: 1.004954218864441,grad_norm: 0.9999991019241693, iteration: 168456
loss: 0.9866229891777039,grad_norm: 0.9944264971833291, iteration: 168457
loss: 0.987182080745697,grad_norm: 0.8203505706039913, iteration: 168458
loss: 1.022099494934082,grad_norm: 0.9606622419174579, iteration: 168459
loss: 1.022125482559204,grad_norm: 0.9999991243718184, iteration: 168460
loss: 0.9991391897201538,grad_norm: 0.9999990089970806, iteration: 168461
loss: 1.0085548162460327,grad_norm: 0.9363023615953618, iteration: 168462
loss: 0.9817745685577393,grad_norm: 0.9999990631483281, iteration: 168463
loss: 0.9996107220649719,grad_norm: 0.9701645275201994, iteration: 168464
loss: 0.9497676491737366,grad_norm: 0.9999990093655414, iteration: 168465
loss: 1.0281715393066406,grad_norm: 0.9999992276701308, iteration: 168466
loss: 0.9768393039703369,grad_norm: 0.9942156585520703, iteration: 168467
loss: 1.0145167112350464,grad_norm: 0.9999992547248719, iteration: 168468
loss: 0.9504985213279724,grad_norm: 0.7677577486452726, iteration: 168469
loss: 1.012305736541748,grad_norm: 0.9817356054001902, iteration: 168470
loss: 1.0175963640213013,grad_norm: 0.999999284985012, iteration: 168471
loss: 1.0186461210250854,grad_norm: 0.9132165622020794, iteration: 168472
loss: 0.9753507971763611,grad_norm: 0.9999989920981391, iteration: 168473
loss: 1.0163474082946777,grad_norm: 0.9455857936944884, iteration: 168474
loss: 0.9916629195213318,grad_norm: 0.7614029557698727, iteration: 168475
loss: 0.9553417563438416,grad_norm: 0.9999989405073358, iteration: 168476
loss: 1.005536675453186,grad_norm: 0.9999991355191745, iteration: 168477
loss: 0.9810136556625366,grad_norm: 0.999999218877554, iteration: 168478
loss: 1.0185670852661133,grad_norm: 0.9999991431468938, iteration: 168479
loss: 0.9767623543739319,grad_norm: 0.9517582040221322, iteration: 168480
loss: 1.0184828042984009,grad_norm: 0.9766248851120392, iteration: 168481
loss: 1.0001273155212402,grad_norm: 0.9688193072804366, iteration: 168482
loss: 1.0176013708114624,grad_norm: 0.9241967791749323, iteration: 168483
loss: 1.020483374595642,grad_norm: 0.999999787256036, iteration: 168484
loss: 1.0325697660446167,grad_norm: 0.9999991896288987, iteration: 168485
loss: 1.0051546096801758,grad_norm: 0.9172859658483715, iteration: 168486
loss: 1.0110175609588623,grad_norm: 0.9606721520375415, iteration: 168487
loss: 1.0306944847106934,grad_norm: 0.999999123734554, iteration: 168488
loss: 0.9629802703857422,grad_norm: 0.9999990448896185, iteration: 168489
loss: 0.9904624223709106,grad_norm: 0.9999991841609495, iteration: 168490
loss: 0.9581987857818604,grad_norm: 0.9406811888382686, iteration: 168491
loss: 0.9767419099807739,grad_norm: 0.9999990979481077, iteration: 168492
loss: 0.9907096028327942,grad_norm: 0.9999990723927114, iteration: 168493
loss: 1.0220510959625244,grad_norm: 0.9999989878673968, iteration: 168494
loss: 1.0289098024368286,grad_norm: 0.9999991651816517, iteration: 168495
loss: 0.9933232069015503,grad_norm: 0.9027167211873931, iteration: 168496
loss: 0.9925119876861572,grad_norm: 0.9999990831308063, iteration: 168497
loss: 0.9764419198036194,grad_norm: 0.8433929160629505, iteration: 168498
loss: 0.9795923829078674,grad_norm: 0.9021296402304845, iteration: 168499
loss: 0.975102961063385,grad_norm: 0.896427070595105, iteration: 168500
loss: 0.9890438914299011,grad_norm: 0.9507888389483286, iteration: 168501
loss: 1.0006301403045654,grad_norm: 0.9999993270186632, iteration: 168502
loss: 1.0263094902038574,grad_norm: 0.9999995974808525, iteration: 168503
loss: 0.9967339634895325,grad_norm: 0.9475096110382456, iteration: 168504
loss: 0.9900554418563843,grad_norm: 0.9999998068316834, iteration: 168505
loss: 0.9843181371688843,grad_norm: 0.9669383754507546, iteration: 168506
loss: 0.9920385479927063,grad_norm: 0.9999990155424553, iteration: 168507
loss: 1.0339194536209106,grad_norm: 0.8503451662109168, iteration: 168508
loss: 1.0177065134048462,grad_norm: 0.8266749099149474, iteration: 168509
loss: 0.9831427335739136,grad_norm: 0.9962070055994047, iteration: 168510
loss: 0.9913843870162964,grad_norm: 0.9999990982732674, iteration: 168511
loss: 1.0331852436065674,grad_norm: 0.8571393944140133, iteration: 168512
loss: 1.0057979822158813,grad_norm: 0.8214865234383524, iteration: 168513
loss: 0.9927113652229309,grad_norm: 0.991241776464833, iteration: 168514
loss: 1.0091079473495483,grad_norm: 0.9999993322596616, iteration: 168515
loss: 0.9805237650871277,grad_norm: 0.9999993174572877, iteration: 168516
loss: 1.0005031824111938,grad_norm: 0.999999167278191, iteration: 168517
loss: 1.0181248188018799,grad_norm: 0.9999998200297111, iteration: 168518
loss: 1.0265460014343262,grad_norm: 0.9999991987679248, iteration: 168519
loss: 1.0196123123168945,grad_norm: 0.8727071513130105, iteration: 168520
loss: 0.9942201375961304,grad_norm: 0.9999992119710963, iteration: 168521
loss: 1.0204970836639404,grad_norm: 0.9999989023926306, iteration: 168522
loss: 1.0073561668395996,grad_norm: 0.9999991850902635, iteration: 168523
loss: 1.0506258010864258,grad_norm: 0.9999991779160382, iteration: 168524
loss: 1.0128188133239746,grad_norm: 0.9219966143063606, iteration: 168525
loss: 1.0197632312774658,grad_norm: 0.9999990825889131, iteration: 168526
loss: 1.0253291130065918,grad_norm: 0.975990901807309, iteration: 168527
loss: 1.0082587003707886,grad_norm: 0.9999992041862306, iteration: 168528
loss: 0.9762524962425232,grad_norm: 0.8647221607461794, iteration: 168529
loss: 1.0404951572418213,grad_norm: 0.9999997259131981, iteration: 168530
loss: 0.9897900819778442,grad_norm: 0.9999991142291415, iteration: 168531
loss: 1.0250294208526611,grad_norm: 0.9882149734433051, iteration: 168532
loss: 0.9504774808883667,grad_norm: 0.9511753578734462, iteration: 168533
loss: 0.9947342276573181,grad_norm: 0.9873563921575208, iteration: 168534
loss: 0.9890413880348206,grad_norm: 0.999999273964164, iteration: 168535
loss: 0.9919427037239075,grad_norm: 0.9507788974613236, iteration: 168536
loss: 1.0397816896438599,grad_norm: 0.9999992675364373, iteration: 168537
loss: 1.0188931226730347,grad_norm: 0.999999090964787, iteration: 168538
loss: 1.0167957544326782,grad_norm: 0.9999990381191124, iteration: 168539
loss: 0.9931846857070923,grad_norm: 0.9999990874898923, iteration: 168540
loss: 1.0023452043533325,grad_norm: 0.9764361601854502, iteration: 168541
loss: 1.0264177322387695,grad_norm: 0.9884601034051961, iteration: 168542
loss: 1.0335032939910889,grad_norm: 0.9999994812434484, iteration: 168543
loss: 0.9835259914398193,grad_norm: 0.9999992177552403, iteration: 168544
loss: 0.9957389235496521,grad_norm: 0.9050555686488578, iteration: 168545
loss: 0.985944926738739,grad_norm: 0.8505809716806111, iteration: 168546
loss: 0.969749927520752,grad_norm: 0.9708595032304047, iteration: 168547
loss: 0.9883700609207153,grad_norm: 0.9262750471826211, iteration: 168548
loss: 0.9514486789703369,grad_norm: 0.9794948756452657, iteration: 168549
loss: 0.9800945520401001,grad_norm: 0.9198918477354141, iteration: 168550
loss: 1.0060927867889404,grad_norm: 0.999999271552329, iteration: 168551
loss: 1.1547616720199585,grad_norm: 0.9999990232041034, iteration: 168552
loss: 1.0489256381988525,grad_norm: 0.9999993083663682, iteration: 168553
loss: 1.0260368585586548,grad_norm: 0.9948337801482595, iteration: 168554
loss: 1.0332835912704468,grad_norm: 0.9999991622939538, iteration: 168555
loss: 0.9779123663902283,grad_norm: 0.9999989977542366, iteration: 168556
loss: 0.9828431606292725,grad_norm: 0.8879519387723884, iteration: 168557
loss: 1.0007269382476807,grad_norm: 0.9553476850417733, iteration: 168558
loss: 0.9973228573799133,grad_norm: 0.9693650895722987, iteration: 168559
loss: 1.0546704530715942,grad_norm: 0.9548197569344583, iteration: 168560
loss: 0.9986289143562317,grad_norm: 0.9999989605371664, iteration: 168561
loss: 0.9682022333145142,grad_norm: 0.9999990967409365, iteration: 168562
loss: 1.0213133096694946,grad_norm: 0.9268006214124415, iteration: 168563
loss: 0.9935024380683899,grad_norm: 0.9999991919838741, iteration: 168564
loss: 0.9961392283439636,grad_norm: 0.9999993769373904, iteration: 168565
loss: 0.9950188994407654,grad_norm: 0.9999990177774035, iteration: 168566
loss: 1.1061947345733643,grad_norm: 0.9999992733750162, iteration: 168567
loss: 0.9933071136474609,grad_norm: 0.9187082994235196, iteration: 168568
loss: 1.0052200555801392,grad_norm: 0.9265224538680773, iteration: 168569
loss: 1.0052536725997925,grad_norm: 0.8867644304039567, iteration: 168570
loss: 0.9938033223152161,grad_norm: 0.9999994025673848, iteration: 168571
loss: 1.0288668870925903,grad_norm: 0.9999990637341238, iteration: 168572
loss: 0.9885807037353516,grad_norm: 0.9999989931977217, iteration: 168573
loss: 1.0249254703521729,grad_norm: 0.9999993022362852, iteration: 168574
loss: 1.0134410858154297,grad_norm: 0.9649910794450827, iteration: 168575
loss: 1.0251435041427612,grad_norm: 0.9999991234026975, iteration: 168576
loss: 0.9877902865409851,grad_norm: 0.9999991244180382, iteration: 168577
loss: 0.9759146571159363,grad_norm: 0.9999990776477574, iteration: 168578
loss: 1.0433616638183594,grad_norm: 0.9999993973623493, iteration: 168579
loss: 1.000775933265686,grad_norm: 0.9999990794526312, iteration: 168580
loss: 0.9953048825263977,grad_norm: 0.999999659059039, iteration: 168581
loss: 1.0024785995483398,grad_norm: 0.9999989628095366, iteration: 168582
loss: 0.9998818635940552,grad_norm: 0.9999989967821453, iteration: 168583
loss: 0.9860137701034546,grad_norm: 0.9999991718069983, iteration: 168584
loss: 0.9900373816490173,grad_norm: 0.9805660099586019, iteration: 168585
loss: 0.9638329148292542,grad_norm: 0.9999990541139241, iteration: 168586
loss: 1.0095269680023193,grad_norm: 0.9782246636175053, iteration: 168587
loss: 0.9987843632698059,grad_norm: 0.9312115315908341, iteration: 168588
loss: 0.9795132875442505,grad_norm: 0.9963623229899631, iteration: 168589
loss: 1.03089439868927,grad_norm: 0.9505608785766798, iteration: 168590
loss: 0.9893674850463867,grad_norm: 0.9773035838893259, iteration: 168591
loss: 1.0372401475906372,grad_norm: 0.9999992990821636, iteration: 168592
loss: 0.9314680695533752,grad_norm: 0.8518597704195007, iteration: 168593
loss: 0.9982319474220276,grad_norm: 0.9423012823071323, iteration: 168594
loss: 0.9857542514801025,grad_norm: 0.9996371476329139, iteration: 168595
loss: 1.0112918615341187,grad_norm: 0.9999989891649375, iteration: 168596
loss: 1.0147391557693481,grad_norm: 0.9724661985115379, iteration: 168597
loss: 0.9841517806053162,grad_norm: 0.9466373131284365, iteration: 168598
loss: 1.013237476348877,grad_norm: 0.9999991854882467, iteration: 168599
loss: 0.9955613017082214,grad_norm: 0.9999992843048825, iteration: 168600
loss: 0.9467924237251282,grad_norm: 0.9826507591132709, iteration: 168601
loss: 0.9967101216316223,grad_norm: 0.999999004355891, iteration: 168602
loss: 1.00786292552948,grad_norm: 0.999999182987208, iteration: 168603
loss: 1.0194098949432373,grad_norm: 0.9883534398351961, iteration: 168604
loss: 1.0100688934326172,grad_norm: 0.9563499341985353, iteration: 168605
loss: 0.9874251484870911,grad_norm: 0.9999991219022148, iteration: 168606
loss: 1.0303733348846436,grad_norm: 0.9550449554788273, iteration: 168607
loss: 1.0220434665679932,grad_norm: 0.9498013510753878, iteration: 168608
loss: 0.9877747893333435,grad_norm: 0.99999904505334, iteration: 168609
loss: 0.9676691293716431,grad_norm: 0.9999990458833791, iteration: 168610
loss: 1.013315200805664,grad_norm: 0.9189457303257248, iteration: 168611
loss: 1.0127017498016357,grad_norm: 0.9772087659121204, iteration: 168612
loss: 0.9863787889480591,grad_norm: 0.999999011112002, iteration: 168613
loss: 1.0010645389556885,grad_norm: 0.9999991394324302, iteration: 168614
loss: 1.0346417427062988,grad_norm: 0.99999905933658, iteration: 168615
loss: 0.9943142533302307,grad_norm: 0.9410816928088559, iteration: 168616
loss: 1.0080593824386597,grad_norm: 0.9999991498570947, iteration: 168617
loss: 1.0112848281860352,grad_norm: 0.9777252944174156, iteration: 168618
loss: 1.0070315599441528,grad_norm: 0.9999990096070247, iteration: 168619
loss: 1.021410584449768,grad_norm: 0.8427889774774087, iteration: 168620
loss: 0.9875577092170715,grad_norm: 0.9058481235245075, iteration: 168621
loss: 1.0873337984085083,grad_norm: 0.9999997622738614, iteration: 168622
loss: 0.9928951263427734,grad_norm: 0.9044228712830656, iteration: 168623
loss: 1.0216418504714966,grad_norm: 0.999999095862559, iteration: 168624
loss: 1.0252454280853271,grad_norm: 0.9999991546562859, iteration: 168625
loss: 1.021397352218628,grad_norm: 0.9549809889529317, iteration: 168626
loss: 0.9845951795578003,grad_norm: 0.9999991326293327, iteration: 168627
loss: 1.0391067266464233,grad_norm: 0.9728146663709912, iteration: 168628
loss: 1.031136155128479,grad_norm: 0.9999991284807819, iteration: 168629
loss: 1.0287301540374756,grad_norm: 0.9999991685801769, iteration: 168630
loss: 1.0206204652786255,grad_norm: 0.9103267235090511, iteration: 168631
loss: 0.9806780815124512,grad_norm: 0.9999991985402896, iteration: 168632
loss: 0.9946141242980957,grad_norm: 0.9999991241232129, iteration: 168633
loss: 1.0113097429275513,grad_norm: 0.9999994389210578, iteration: 168634
loss: 0.9779189825057983,grad_norm: 0.8920118778294677, iteration: 168635
loss: 0.9855467081069946,grad_norm: 0.8939049482989072, iteration: 168636
loss: 1.0420074462890625,grad_norm: 0.999999359329739, iteration: 168637
loss: 0.9980476498603821,grad_norm: 0.9580742421177575, iteration: 168638
loss: 1.0051157474517822,grad_norm: 0.9999991734780924, iteration: 168639
loss: 1.0559638738632202,grad_norm: 0.9999999260489639, iteration: 168640
loss: 0.9954114556312561,grad_norm: 0.9563104040159079, iteration: 168641
loss: 1.2307871580123901,grad_norm: 0.9999995534637305, iteration: 168642
loss: 1.031856656074524,grad_norm: 0.9999996236945792, iteration: 168643
loss: 1.0138448476791382,grad_norm: 0.9999992004158599, iteration: 168644
loss: 0.9797828197479248,grad_norm: 0.9999992285389058, iteration: 168645
loss: 1.01322603225708,grad_norm: 0.8842221192004723, iteration: 168646
loss: 1.0425827503204346,grad_norm: 0.9999989936595198, iteration: 168647
loss: 0.9738172292709351,grad_norm: 0.9999991671054891, iteration: 168648
loss: 1.0409940481185913,grad_norm: 0.9825796212243736, iteration: 168649
loss: 0.9906671643257141,grad_norm: 0.9434139124085903, iteration: 168650
loss: 1.0287284851074219,grad_norm: 0.8155507266794028, iteration: 168651
loss: 0.9947680234909058,grad_norm: 0.8870200966663422, iteration: 168652
loss: 1.061037540435791,grad_norm: 0.9999997594831455, iteration: 168653
loss: 1.0087321996688843,grad_norm: 0.9999991890425901, iteration: 168654
loss: 0.9890373945236206,grad_norm: 0.999999013260692, iteration: 168655
loss: 1.0211007595062256,grad_norm: 0.9531616745305598, iteration: 168656
loss: 1.1323522329330444,grad_norm: 0.9999993627341488, iteration: 168657
loss: 1.0562716722488403,grad_norm: 0.9999992908330759, iteration: 168658
loss: 1.020430326461792,grad_norm: 0.9999989714571724, iteration: 168659
loss: 1.0106719732284546,grad_norm: 0.9043457929780999, iteration: 168660
loss: 1.0039767026901245,grad_norm: 0.9999990445291047, iteration: 168661
loss: 1.00482177734375,grad_norm: 0.9384243577541561, iteration: 168662
loss: 0.9610031247138977,grad_norm: 0.9999991612584163, iteration: 168663
loss: 0.953946053981781,grad_norm: 0.8912565517478568, iteration: 168664
loss: 0.9946495294570923,grad_norm: 0.9999989685962194, iteration: 168665
loss: 1.0302859544754028,grad_norm: 0.999998964287405, iteration: 168666
loss: 1.0160263776779175,grad_norm: 0.9240409776946656, iteration: 168667
loss: 1.02684485912323,grad_norm: 0.9999991309055526, iteration: 168668
loss: 1.0710431337356567,grad_norm: 0.9999997523830099, iteration: 168669
loss: 1.0432900190353394,grad_norm: 0.9999991445239697, iteration: 168670
loss: 0.9866693019866943,grad_norm: 0.9999989268995556, iteration: 168671
loss: 1.0059012174606323,grad_norm: 0.9999993417205587, iteration: 168672
loss: 0.9771111011505127,grad_norm: 0.9999991927970466, iteration: 168673
loss: 1.1168981790542603,grad_norm: 0.999999466169926, iteration: 168674
loss: 0.9842947721481323,grad_norm: 0.955232990635159, iteration: 168675
loss: 0.9821522831916809,grad_norm: 0.9387332668283382, iteration: 168676
loss: 1.001111388206482,grad_norm: 0.9999991738685772, iteration: 168677
loss: 1.1262575387954712,grad_norm: 0.9999991869194863, iteration: 168678
loss: 1.080065131187439,grad_norm: 0.9999993452268855, iteration: 168679
loss: 1.037198781967163,grad_norm: 0.9999990777411273, iteration: 168680
loss: 1.0252314805984497,grad_norm: 0.999999048674435, iteration: 168681
loss: 0.9539158344268799,grad_norm: 0.9985750953057108, iteration: 168682
loss: 0.9875679016113281,grad_norm: 0.9999991526413038, iteration: 168683
loss: 1.0099319219589233,grad_norm: 0.9999990035412342, iteration: 168684
loss: 0.9754213094711304,grad_norm: 0.9999996007493487, iteration: 168685
loss: 1.0908901691436768,grad_norm: 0.9872213215969587, iteration: 168686
loss: 1.0619113445281982,grad_norm: 0.9999995909835699, iteration: 168687
loss: 0.976601779460907,grad_norm: 0.9999991435880153, iteration: 168688
loss: 1.3731439113616943,grad_norm: 0.9999999192312252, iteration: 168689
loss: 0.9851583242416382,grad_norm: 0.9932773372109964, iteration: 168690
loss: 0.9760944843292236,grad_norm: 0.9999990128734245, iteration: 168691
loss: 1.101447343826294,grad_norm: 0.9999991479625387, iteration: 168692
loss: 0.9973654747009277,grad_norm: 0.9529784511565771, iteration: 168693
loss: 1.009758710861206,grad_norm: 0.9986938943244191, iteration: 168694
loss: 0.9902506470680237,grad_norm: 0.9999991564055715, iteration: 168695
loss: 1.0316723585128784,grad_norm: 0.8385585466055266, iteration: 168696
loss: 0.998106837272644,grad_norm: 0.974925571333139, iteration: 168697
loss: 1.0146509408950806,grad_norm: 0.9999993123926337, iteration: 168698
loss: 0.9783318042755127,grad_norm: 0.9999991020054834, iteration: 168699
loss: 0.9566910266876221,grad_norm: 0.9999989564254673, iteration: 168700
loss: 1.0886403322219849,grad_norm: 0.9999993217243213, iteration: 168701
loss: 0.9959075450897217,grad_norm: 0.9999990677361617, iteration: 168702
loss: 1.0084301233291626,grad_norm: 0.9241418255259529, iteration: 168703
loss: 0.9754309058189392,grad_norm: 0.9453785123593642, iteration: 168704
loss: 1.0210376977920532,grad_norm: 0.9999991253647621, iteration: 168705
loss: 0.9745526909828186,grad_norm: 0.9999990430818372, iteration: 168706
loss: 0.965947687625885,grad_norm: 0.9067788857541152, iteration: 168707
loss: 0.9946426153182983,grad_norm: 0.9331203176058221, iteration: 168708
loss: 1.0088461637496948,grad_norm: 0.9999992510798458, iteration: 168709
loss: 1.0138522386550903,grad_norm: 0.9999992556437688, iteration: 168710
loss: 0.9844200015068054,grad_norm: 0.9999989853391782, iteration: 168711
loss: 1.003986120223999,grad_norm: 0.9999990667190704, iteration: 168712
loss: 1.0094044208526611,grad_norm: 0.9999997056680504, iteration: 168713
loss: 1.0835965871810913,grad_norm: 0.9999999087113736, iteration: 168714
loss: 0.9634467959403992,grad_norm: 0.9999991750218757, iteration: 168715
loss: 0.9883021116256714,grad_norm: 0.9817006270747007, iteration: 168716
loss: 0.9832857251167297,grad_norm: 0.9999991595173767, iteration: 168717
loss: 0.9752729535102844,grad_norm: 0.9999991048002596, iteration: 168718
loss: 1.0098882913589478,grad_norm: 0.9999999052889798, iteration: 168719
loss: 0.9911927580833435,grad_norm: 0.8811380207761156, iteration: 168720
loss: 0.953574001789093,grad_norm: 0.8887636898242345, iteration: 168721
loss: 1.0178159475326538,grad_norm: 0.871967073234715, iteration: 168722
loss: 1.061504602432251,grad_norm: 0.9587227588456971, iteration: 168723
loss: 1.0048738718032837,grad_norm: 0.88767588261127, iteration: 168724
loss: 0.9770459532737732,grad_norm: 0.9999991843682539, iteration: 168725
loss: 1.001775860786438,grad_norm: 0.9999990896023976, iteration: 168726
loss: 0.9501697421073914,grad_norm: 0.9999990908539336, iteration: 168727
loss: 0.9679809212684631,grad_norm: 0.9583529190266701, iteration: 168728
loss: 0.9894520044326782,grad_norm: 0.9487133298351492, iteration: 168729
loss: 0.9984866380691528,grad_norm: 0.9999992379184234, iteration: 168730
loss: 1.0419974327087402,grad_norm: 0.9999991008191375, iteration: 168731
loss: 1.0338964462280273,grad_norm: 0.9999991831440297, iteration: 168732
loss: 0.9622111916542053,grad_norm: 0.9480532895717712, iteration: 168733
loss: 0.9868126511573792,grad_norm: 0.999999149595299, iteration: 168734
loss: 0.9866199493408203,grad_norm: 0.9999989865632479, iteration: 168735
loss: 1.029768943786621,grad_norm: 0.9160603414344826, iteration: 168736
loss: 1.0215537548065186,grad_norm: 0.9999991185067365, iteration: 168737
loss: 0.9907420873641968,grad_norm: 0.9999991921764417, iteration: 168738
loss: 0.9937189221382141,grad_norm: 0.89449953294839, iteration: 168739
loss: 0.969322681427002,grad_norm: 0.8627740152689712, iteration: 168740
loss: 0.9494748115539551,grad_norm: 0.8718218192796525, iteration: 168741
loss: 1.029437780380249,grad_norm: 0.999999504199841, iteration: 168742
loss: 1.0471588373184204,grad_norm: 0.9999997973832625, iteration: 168743
loss: 0.994933545589447,grad_norm: 0.9999991206137696, iteration: 168744
loss: 1.0130560398101807,grad_norm: 0.9999992030661541, iteration: 168745
loss: 1.024585247039795,grad_norm: 0.9999998622686664, iteration: 168746
loss: 0.992857813835144,grad_norm: 0.999999149756421, iteration: 168747
loss: 1.0046237707138062,grad_norm: 0.8105988590659795, iteration: 168748
loss: 1.0009759664535522,grad_norm: 0.9999989037180304, iteration: 168749
loss: 0.9927297234535217,grad_norm: 0.8521263610753472, iteration: 168750
loss: 0.9949873089790344,grad_norm: 0.9798031899501356, iteration: 168751
loss: 0.991270899772644,grad_norm: 0.9796605494595337, iteration: 168752
loss: 1.0950740575790405,grad_norm: 0.9999993649638474, iteration: 168753
loss: 0.9752374887466431,grad_norm: 0.8242977140510958, iteration: 168754
loss: 1.010511040687561,grad_norm: 0.9142077986920549, iteration: 168755
loss: 0.9920563101768494,grad_norm: 0.999999712522349, iteration: 168756
loss: 1.0637143850326538,grad_norm: 0.9999992503048911, iteration: 168757
loss: 0.9889163374900818,grad_norm: 0.8919633339139436, iteration: 168758
loss: 0.9870657920837402,grad_norm: 0.999999057505128, iteration: 168759
loss: 1.014125108718872,grad_norm: 0.9999990554998816, iteration: 168760
loss: 1.0243970155715942,grad_norm: 0.8984578278293444, iteration: 168761
loss: 1.009884238243103,grad_norm: 0.9999990800119689, iteration: 168762
loss: 0.9628557562828064,grad_norm: 0.9570497751299607, iteration: 168763
loss: 1.024256706237793,grad_norm: 0.9999990578050821, iteration: 168764
loss: 1.0620208978652954,grad_norm: 0.9999994839275008, iteration: 168765
loss: 0.972048282623291,grad_norm: 0.923538741824796, iteration: 168766
loss: 0.964407742023468,grad_norm: 0.9063411336539563, iteration: 168767
loss: 0.9726192355155945,grad_norm: 0.9782599528241888, iteration: 168768
loss: 1.0309885740280151,grad_norm: 0.8660704828922478, iteration: 168769
loss: 1.0198259353637695,grad_norm: 0.9999991863567116, iteration: 168770
loss: 0.9853768348693848,grad_norm: 0.9999993107784759, iteration: 168771
loss: 1.0098214149475098,grad_norm: 0.7962907481092999, iteration: 168772
loss: 1.0305426120758057,grad_norm: 0.999999464034333, iteration: 168773
loss: 1.0096485614776611,grad_norm: 0.998407192118102, iteration: 168774
loss: 1.0241223573684692,grad_norm: 0.9527122378419055, iteration: 168775
loss: 1.0109422206878662,grad_norm: 0.9762252042324898, iteration: 168776
loss: 1.0237658023834229,grad_norm: 0.9999997713418898, iteration: 168777
loss: 1.0179718732833862,grad_norm: 0.9999995319507605, iteration: 168778
loss: 1.0334060192108154,grad_norm: 0.9999994626226352, iteration: 168779
loss: 0.9798279404640198,grad_norm: 0.9999991669007828, iteration: 168780
loss: 1.007493019104004,grad_norm: 0.8495392731623763, iteration: 168781
loss: 1.0202178955078125,grad_norm: 0.9860241588747474, iteration: 168782
loss: 0.9859350919723511,grad_norm: 0.9598741102277689, iteration: 168783
loss: 1.054740071296692,grad_norm: 0.9999990324940163, iteration: 168784
loss: 1.001850962638855,grad_norm: 0.9999994153873488, iteration: 168785
loss: 0.991242527961731,grad_norm: 0.9999990760923912, iteration: 168786
loss: 1.041327953338623,grad_norm: 0.9999999421643853, iteration: 168787
loss: 0.9924865961074829,grad_norm: 0.9701021569910532, iteration: 168788
loss: 1.0074589252471924,grad_norm: 0.999999161915808, iteration: 168789
loss: 0.9726336598396301,grad_norm: 0.9394342913052487, iteration: 168790
loss: 1.005650520324707,grad_norm: 0.9703920054754901, iteration: 168791
loss: 0.9878910779953003,grad_norm: 0.9999989972999821, iteration: 168792
loss: 1.0040619373321533,grad_norm: 0.9078129596045412, iteration: 168793
loss: 1.0119616985321045,grad_norm: 0.9999991864842321, iteration: 168794
loss: 0.9717845320701599,grad_norm: 0.9999990645402395, iteration: 168795
loss: 0.968169093132019,grad_norm: 0.9653495848602414, iteration: 168796
loss: 1.0083863735198975,grad_norm: 0.9418384972860022, iteration: 168797
loss: 1.004516839981079,grad_norm: 0.9999991644364093, iteration: 168798
loss: 0.9849286079406738,grad_norm: 0.938225834294647, iteration: 168799
loss: 0.9635003209114075,grad_norm: 0.9474384512318649, iteration: 168800
loss: 0.9807365536689758,grad_norm: 0.9999990173100527, iteration: 168801
loss: 0.9849827885627747,grad_norm: 0.9156490811718032, iteration: 168802
loss: 0.9746530055999756,grad_norm: 0.8719061179051827, iteration: 168803
loss: 0.9712117314338684,grad_norm: 0.999999047681949, iteration: 168804
loss: 0.9973713159561157,grad_norm: 0.9243215320238366, iteration: 168805
loss: 0.9909641742706299,grad_norm: 0.8773746680322938, iteration: 168806
loss: 0.987943172454834,grad_norm: 0.9999989788389446, iteration: 168807
loss: 0.9874562621116638,grad_norm: 0.8805296047501259, iteration: 168808
loss: 1.001086950302124,grad_norm: 0.9999990902255466, iteration: 168809
loss: 1.0181254148483276,grad_norm: 0.9601257769361369, iteration: 168810
loss: 0.990583062171936,grad_norm: 0.999999246381206, iteration: 168811
loss: 1.011003851890564,grad_norm: 0.9999990052657457, iteration: 168812
loss: 1.060137391090393,grad_norm: 0.9999994647272443, iteration: 168813
loss: 1.0191670656204224,grad_norm: 0.9999990078395803, iteration: 168814
loss: 1.0622496604919434,grad_norm: 0.9999991621770602, iteration: 168815
loss: 0.9933406710624695,grad_norm: 0.9788423314875189, iteration: 168816
loss: 1.0017014741897583,grad_norm: 0.9999990555228805, iteration: 168817
loss: 1.016680121421814,grad_norm: 0.9999990170694006, iteration: 168818
loss: 0.9951390027999878,grad_norm: 0.9999991100103772, iteration: 168819
loss: 1.0084292888641357,grad_norm: 0.9999991648708387, iteration: 168820
loss: 1.0174719095230103,grad_norm: 0.9999990861456497, iteration: 168821
loss: 0.990242600440979,grad_norm: 0.9091795180199632, iteration: 168822
loss: 0.9716619253158569,grad_norm: 0.8590705065033587, iteration: 168823
loss: 1.004355549812317,grad_norm: 0.9999991032336352, iteration: 168824
loss: 0.9903344511985779,grad_norm: 0.9999991317910444, iteration: 168825
loss: 1.0048898458480835,grad_norm: 0.9651744338894819, iteration: 168826
loss: 1.0574406385421753,grad_norm: 0.9999993888847789, iteration: 168827
loss: 0.9886558651924133,grad_norm: 0.9810368731193329, iteration: 168828
loss: 0.9764135479927063,grad_norm: 0.9999992093023602, iteration: 168829
loss: 0.9710222482681274,grad_norm: 0.9198129051343067, iteration: 168830
loss: 1.0148626565933228,grad_norm: 0.9999990400808226, iteration: 168831
loss: 0.9881287217140198,grad_norm: 0.9532464341271313, iteration: 168832
loss: 0.9736853241920471,grad_norm: 0.9999990690472549, iteration: 168833
loss: 1.0039321184158325,grad_norm: 0.9999991450322226, iteration: 168834
loss: 1.003974199295044,grad_norm: 0.9999989616531677, iteration: 168835
loss: 0.9928562641143799,grad_norm: 0.9781501240971293, iteration: 168836
loss: 1.015515923500061,grad_norm: 0.851957985758872, iteration: 168837
loss: 1.01233971118927,grad_norm: 0.99999913462797, iteration: 168838
loss: 1.0021053552627563,grad_norm: 0.9999991382126869, iteration: 168839
loss: 0.993403434753418,grad_norm: 0.960378966371722, iteration: 168840
loss: 1.0003100633621216,grad_norm: 0.999999101835945, iteration: 168841
loss: 0.9801837205886841,grad_norm: 0.9477579184843199, iteration: 168842
loss: 1.0014814138412476,grad_norm: 0.9999993168600838, iteration: 168843
loss: 0.9982250332832336,grad_norm: 0.9893658025253937, iteration: 168844
loss: 1.02586030960083,grad_norm: 0.9999992557206021, iteration: 168845
loss: 1.0193994045257568,grad_norm: 0.9490975494767798, iteration: 168846
loss: 0.9861986637115479,grad_norm: 0.9426909874277962, iteration: 168847
loss: 1.0220043659210205,grad_norm: 0.999999219962149, iteration: 168848
loss: 1.0189491510391235,grad_norm: 0.8221906291936799, iteration: 168849
loss: 0.9873058199882507,grad_norm: 0.9048867434413481, iteration: 168850
loss: 1.0131919384002686,grad_norm: 0.9999991569919269, iteration: 168851
loss: 1.042130470275879,grad_norm: 0.9999991353584085, iteration: 168852
loss: 1.0028951168060303,grad_norm: 0.9999994211778768, iteration: 168853
loss: 1.0394408702850342,grad_norm: 0.9999990979427086, iteration: 168854
loss: 1.0144243240356445,grad_norm: 0.9714970109798915, iteration: 168855
loss: 1.0130865573883057,grad_norm: 0.9999991058487276, iteration: 168856
loss: 0.9732764959335327,grad_norm: 0.9267606385406963, iteration: 168857
loss: 1.0183333158493042,grad_norm: 0.9999992931161634, iteration: 168858
loss: 1.0143309831619263,grad_norm: 0.8527791095808162, iteration: 168859
loss: 0.9970164895057678,grad_norm: 0.8637476239688952, iteration: 168860
loss: 1.004838228225708,grad_norm: 0.9807368903393643, iteration: 168861
loss: 0.9899609088897705,grad_norm: 0.8836298293849476, iteration: 168862
loss: 0.9975175857543945,grad_norm: 0.9496046321897265, iteration: 168863
loss: 0.9920446276664734,grad_norm: 0.9999991626006575, iteration: 168864
loss: 1.0187318325042725,grad_norm: 0.9999991346253703, iteration: 168865
loss: 1.0250797271728516,grad_norm: 0.9999990810497416, iteration: 168866
loss: 1.0147825479507446,grad_norm: 0.9584133179090628, iteration: 168867
loss: 0.9981681108474731,grad_norm: 0.9999989502254104, iteration: 168868
loss: 1.0220997333526611,grad_norm: 0.9806257102074838, iteration: 168869
loss: 0.9852405786514282,grad_norm: 0.99999902778485, iteration: 168870
loss: 1.0081241130828857,grad_norm: 0.9013219896077429, iteration: 168871
loss: 1.0211865901947021,grad_norm: 0.9999993084514717, iteration: 168872
loss: 0.963162899017334,grad_norm: 0.9550617709534244, iteration: 168873
loss: 1.0126162767410278,grad_norm: 0.99999912525261, iteration: 168874
loss: 0.9833242893218994,grad_norm: 0.8990671520495899, iteration: 168875
loss: 0.9913432598114014,grad_norm: 0.8756230374579822, iteration: 168876
loss: 1.0091989040374756,grad_norm: 0.9999991930231311, iteration: 168877
loss: 0.9917687773704529,grad_norm: 0.999999079437001, iteration: 168878
loss: 0.9856840968132019,grad_norm: 0.9999991781632882, iteration: 168879
loss: 1.0139721632003784,grad_norm: 0.9999998321642758, iteration: 168880
loss: 0.9956802129745483,grad_norm: 0.8365269363398526, iteration: 168881
loss: 1.0214711427688599,grad_norm: 0.9999991191146846, iteration: 168882
loss: 0.9631810188293457,grad_norm: 0.8963956710256675, iteration: 168883
loss: 1.0362720489501953,grad_norm: 0.9908724887754091, iteration: 168884
loss: 0.9676513671875,grad_norm: 0.9999993772762112, iteration: 168885
loss: 0.9936624765396118,grad_norm: 0.9799082468024713, iteration: 168886
loss: 0.9979904294013977,grad_norm: 0.975101296663513, iteration: 168887
loss: 0.9892861843109131,grad_norm: 0.8523411468027314, iteration: 168888
loss: 0.964790403842926,grad_norm: 0.9999992655663414, iteration: 168889
loss: 0.9765963554382324,grad_norm: 0.9999991001735429, iteration: 168890
loss: 0.9878199100494385,grad_norm: 0.8481078261387492, iteration: 168891
loss: 1.0128980875015259,grad_norm: 0.9999990818481083, iteration: 168892
loss: 0.9770784378051758,grad_norm: 0.9370692529573081, iteration: 168893
loss: 1.0359630584716797,grad_norm: 0.8968994453295419, iteration: 168894
loss: 0.9921680092811584,grad_norm: 0.8600377123890753, iteration: 168895
loss: 0.9751565456390381,grad_norm: 0.9999991438622546, iteration: 168896
loss: 1.021159291267395,grad_norm: 0.9152681258072474, iteration: 168897
loss: 1.0187292098999023,grad_norm: 0.9999991505315624, iteration: 168898
loss: 1.0038857460021973,grad_norm: 0.9999992317205597, iteration: 168899
loss: 0.993519127368927,grad_norm: 0.8986698983609415, iteration: 168900
loss: 1.0818051099777222,grad_norm: 0.999999933505023, iteration: 168901
loss: 1.0055848360061646,grad_norm: 0.9999991108440293, iteration: 168902
loss: 1.0226073265075684,grad_norm: 0.9999990460994268, iteration: 168903
loss: 1.0022324323654175,grad_norm: 0.9999994874800298, iteration: 168904
loss: 0.9797177314758301,grad_norm: 0.9999989584958331, iteration: 168905
loss: 0.9922488927841187,grad_norm: 0.9999992020926757, iteration: 168906
loss: 1.0191832780838013,grad_norm: 0.999999122601238, iteration: 168907
loss: 0.965587854385376,grad_norm: 0.9999990397595345, iteration: 168908
loss: 1.018312692642212,grad_norm: 0.9999989894172802, iteration: 168909
loss: 1.0087599754333496,grad_norm: 0.9487912706550492, iteration: 168910
loss: 1.0071619749069214,grad_norm: 0.9657542363599971, iteration: 168911
loss: 0.9920079112052917,grad_norm: 0.9999992236080181, iteration: 168912
loss: 1.002606749534607,grad_norm: 0.9999991312396581, iteration: 168913
loss: 1.001839280128479,grad_norm: 0.9999990901684291, iteration: 168914
loss: 1.0190433263778687,grad_norm: 0.9628440252467431, iteration: 168915
loss: 0.9963006973266602,grad_norm: 0.9999989815847126, iteration: 168916
loss: 1.0318617820739746,grad_norm: 0.9999991212303059, iteration: 168917
loss: 0.9584634304046631,grad_norm: 0.9999989495592062, iteration: 168918
loss: 1.0010809898376465,grad_norm: 0.9743625488990597, iteration: 168919
loss: 1.0077677965164185,grad_norm: 0.9999991653653414, iteration: 168920
loss: 0.9752236008644104,grad_norm: 0.999999125028945, iteration: 168921
loss: 1.038425087928772,grad_norm: 0.9999990529310621, iteration: 168922
loss: 0.9885804653167725,grad_norm: 0.9609185140587236, iteration: 168923
loss: 0.953906238079071,grad_norm: 0.9999991899300767, iteration: 168924
loss: 1.0089848041534424,grad_norm: 0.9999991418715207, iteration: 168925
loss: 1.019245982170105,grad_norm: 0.9539555798773934, iteration: 168926
loss: 0.9701325297355652,grad_norm: 0.9999990294755449, iteration: 168927
loss: 0.9870584011077881,grad_norm: 0.9999990662154675, iteration: 168928
loss: 0.992845892906189,grad_norm: 0.8126760126878516, iteration: 168929
loss: 0.9488561749458313,grad_norm: 0.9999991057762796, iteration: 168930
loss: 0.9965931177139282,grad_norm: 0.9999991736041937, iteration: 168931
loss: 0.9808455109596252,grad_norm: 0.9719150065232021, iteration: 168932
loss: 0.9638547897338867,grad_norm: 0.890412305120536, iteration: 168933
loss: 0.9961051344871521,grad_norm: 0.999999269154569, iteration: 168934
loss: 0.9806151390075684,grad_norm: 0.9676783432138402, iteration: 168935
loss: 1.0109500885009766,grad_norm: 0.9999989934356147, iteration: 168936
loss: 0.9867764115333557,grad_norm: 0.939098408636851, iteration: 168937
loss: 0.9890202879905701,grad_norm: 0.9383451776400025, iteration: 168938
loss: 1.0130144357681274,grad_norm: 0.9999991124389671, iteration: 168939
loss: 1.0085198879241943,grad_norm: 0.9920797191952783, iteration: 168940
loss: 1.0082061290740967,grad_norm: 0.9999992850851304, iteration: 168941
loss: 1.0233697891235352,grad_norm: 0.9149158900414844, iteration: 168942
loss: 1.0051478147506714,grad_norm: 0.9999990014740049, iteration: 168943
loss: 0.9636040925979614,grad_norm: 0.9999992838395195, iteration: 168944
loss: 1.010032296180725,grad_norm: 0.9141053757523199, iteration: 168945
loss: 0.9626075029373169,grad_norm: 0.9999989703900523, iteration: 168946
loss: 1.0239007472991943,grad_norm: 0.9999990402974437, iteration: 168947
loss: 0.9962059855461121,grad_norm: 0.9999990964192986, iteration: 168948
loss: 0.985842764377594,grad_norm: 0.8650228121027679, iteration: 168949
loss: 0.9883872866630554,grad_norm: 0.9689053001376139, iteration: 168950
loss: 1.0060250759124756,grad_norm: 0.9999989605457631, iteration: 168951
loss: 1.0098432302474976,grad_norm: 0.9999991434204965, iteration: 168952
loss: 0.9596108794212341,grad_norm: 0.9412689667497979, iteration: 168953
loss: 0.9689171314239502,grad_norm: 0.9467647464103923, iteration: 168954
loss: 1.001935601234436,grad_norm: 0.9999992162514336, iteration: 168955
loss: 1.0254398584365845,grad_norm: 0.9999990904112168, iteration: 168956
loss: 0.9818695783615112,grad_norm: 0.9999991473804661, iteration: 168957
loss: 1.0077531337738037,grad_norm: 0.9999991391596592, iteration: 168958
loss: 1.0098925828933716,grad_norm: 0.8681977377186929, iteration: 168959
loss: 1.008247971534729,grad_norm: 0.9999991188587306, iteration: 168960
loss: 1.0066802501678467,grad_norm: 0.9999992330217621, iteration: 168961
loss: 1.009589433670044,grad_norm: 0.999999175598208, iteration: 168962
loss: 0.9785276055335999,grad_norm: 0.9777686628175487, iteration: 168963
loss: 0.9583216309547424,grad_norm: 0.8478958920952384, iteration: 168964
loss: 0.9609754681587219,grad_norm: 0.8817089980275306, iteration: 168965
loss: 1.0151747465133667,grad_norm: 0.875074942967747, iteration: 168966
loss: 1.009412407875061,grad_norm: 0.999999172627678, iteration: 168967
loss: 0.984085202217102,grad_norm: 0.9305908085646849, iteration: 168968
loss: 1.0396424531936646,grad_norm: 0.9999996635495827, iteration: 168969
loss: 0.9883316159248352,grad_norm: 0.9509315852079664, iteration: 168970
loss: 0.9986777305603027,grad_norm: 0.9010122781011453, iteration: 168971
loss: 1.0280004739761353,grad_norm: 0.993168223608851, iteration: 168972
loss: 1.006367802619934,grad_norm: 0.9210699947190432, iteration: 168973
loss: 1.0011004209518433,grad_norm: 0.7884536307930162, iteration: 168974
loss: 0.9698819518089294,grad_norm: 0.9999990565536668, iteration: 168975
loss: 0.9940054416656494,grad_norm: 0.9999991026911439, iteration: 168976
loss: 1.022001028060913,grad_norm: 0.8694909111394051, iteration: 168977
loss: 0.9676439762115479,grad_norm: 0.9780150841724412, iteration: 168978
loss: 1.012264609336853,grad_norm: 0.9500866141213656, iteration: 168979
loss: 0.9726141095161438,grad_norm: 0.9741906470398585, iteration: 168980
loss: 1.0156692266464233,grad_norm: 0.8548806357506071, iteration: 168981
loss: 1.0103919506072998,grad_norm: 0.9473660856292924, iteration: 168982
loss: 1.0020904541015625,grad_norm: 0.9453506332537474, iteration: 168983
loss: 0.9976374506950378,grad_norm: 0.9318003052843752, iteration: 168984
loss: 1.0423555374145508,grad_norm: 0.9999991891143778, iteration: 168985
loss: 0.9831005930900574,grad_norm: 0.8599111262992718, iteration: 168986
loss: 0.9999322295188904,grad_norm: 0.8073877552684686, iteration: 168987
loss: 0.980628252029419,grad_norm: 0.9999991521434768, iteration: 168988
loss: 1.0506682395935059,grad_norm: 0.9999992793078079, iteration: 168989
loss: 0.9834616780281067,grad_norm: 0.858398041940478, iteration: 168990
loss: 1.0185725688934326,grad_norm: 0.9273696038872828, iteration: 168991
loss: 0.9807662963867188,grad_norm: 0.9940037898677548, iteration: 168992
loss: 0.9994890689849854,grad_norm: 0.9999989475101623, iteration: 168993
loss: 1.0173747539520264,grad_norm: 0.9909937711665173, iteration: 168994
loss: 0.9904609322547913,grad_norm: 0.9999991896869254, iteration: 168995
loss: 0.9917717576026917,grad_norm: 0.9098081796306291, iteration: 168996
loss: 0.9473121166229248,grad_norm: 0.9999991160345707, iteration: 168997
loss: 1.0061447620391846,grad_norm: 0.9999992227414648, iteration: 168998
loss: 0.98490971326828,grad_norm: 0.9999990567515007, iteration: 168999
loss: 1.0482587814331055,grad_norm: 0.9999990787981083, iteration: 169000
loss: 1.0092122554779053,grad_norm: 0.9999993064103652, iteration: 169001
loss: 0.9710685610771179,grad_norm: 0.9999992304688504, iteration: 169002
loss: 0.9543846249580383,grad_norm: 0.9999991418524091, iteration: 169003
loss: 0.9709980487823486,grad_norm: 0.9999992043769982, iteration: 169004
loss: 0.990605890750885,grad_norm: 0.9618335531249227, iteration: 169005
loss: 0.9735088348388672,grad_norm: 0.9689626772223384, iteration: 169006
loss: 1.0305216312408447,grad_norm: 0.999998928823051, iteration: 169007
loss: 1.019753098487854,grad_norm: 0.918110737470704, iteration: 169008
loss: 0.9894357323646545,grad_norm: 0.8420160873173067, iteration: 169009
loss: 0.9334026575088501,grad_norm: 0.9999991158435979, iteration: 169010
loss: 0.9860841631889343,grad_norm: 0.999999017850102, iteration: 169011
loss: 1.0180116891860962,grad_norm: 0.9999990200554171, iteration: 169012
loss: 1.0157251358032227,grad_norm: 0.8831072892984946, iteration: 169013
loss: 1.003695011138916,grad_norm: 0.9999991136418933, iteration: 169014
loss: 1.0209141969680786,grad_norm: 0.9999992210737275, iteration: 169015
loss: 1.019875407218933,grad_norm: 0.9999991312622301, iteration: 169016
loss: 0.9817991852760315,grad_norm: 0.9391903856853824, iteration: 169017
loss: 0.9992985129356384,grad_norm: 0.9999990230252289, iteration: 169018
loss: 0.9794963002204895,grad_norm: 0.9059384378156338, iteration: 169019
loss: 1.026355266571045,grad_norm: 0.9660779432452704, iteration: 169020
loss: 0.9553118348121643,grad_norm: 0.9999990835138776, iteration: 169021
loss: 0.9917308688163757,grad_norm: 0.9999989191434824, iteration: 169022
loss: 0.9828728437423706,grad_norm: 0.9091328193922946, iteration: 169023
loss: 0.9642356634140015,grad_norm: 0.93360950841677, iteration: 169024
loss: 1.0197151899337769,grad_norm: 0.9999991826213087, iteration: 169025
loss: 0.9423476457595825,grad_norm: 0.9999993978724212, iteration: 169026
loss: 0.9927524924278259,grad_norm: 0.995848104465668, iteration: 169027
loss: 1.0224847793579102,grad_norm: 0.9999991646042626, iteration: 169028
loss: 1.0024648904800415,grad_norm: 0.9999991015896773, iteration: 169029
loss: 0.980065107345581,grad_norm: 0.9182881597750889, iteration: 169030
loss: 0.9820751547813416,grad_norm: 0.7942835559722963, iteration: 169031
loss: 0.993588387966156,grad_norm: 0.9463035709899137, iteration: 169032
loss: 1.0399985313415527,grad_norm: 0.8385354763619178, iteration: 169033
loss: 1.0410115718841553,grad_norm: 0.973695073843228, iteration: 169034
loss: 0.9730041027069092,grad_norm: 0.9178352287550368, iteration: 169035
loss: 0.9882926344871521,grad_norm: 0.8213863708838858, iteration: 169036
loss: 0.9892258048057556,grad_norm: 0.8923774117820146, iteration: 169037
loss: 0.9765481352806091,grad_norm: 0.8668156372248794, iteration: 169038
loss: 0.9662302732467651,grad_norm: 0.9999992274529622, iteration: 169039
loss: 1.0129752159118652,grad_norm: 0.9999990772116591, iteration: 169040
loss: 1.0042699575424194,grad_norm: 0.9999991539134468, iteration: 169041
loss: 1.0218943357467651,grad_norm: 0.9999997997410205, iteration: 169042
loss: 0.995471715927124,grad_norm: 0.9999989084668934, iteration: 169043
loss: 0.9788870811462402,grad_norm: 0.9999991318083136, iteration: 169044
loss: 1.047641634941101,grad_norm: 0.979931577139379, iteration: 169045
loss: 1.0711826086044312,grad_norm: 0.9999991406957603, iteration: 169046
loss: 1.0099859237670898,grad_norm: 0.9999991979660626, iteration: 169047
loss: 0.9885997772216797,grad_norm: 0.9999990752329779, iteration: 169048
loss: 0.997561514377594,grad_norm: 0.9617515516214892, iteration: 169049
loss: 1.0026066303253174,grad_norm: 0.9615905893634583, iteration: 169050
loss: 1.011240839958191,grad_norm: 0.9768593128412684, iteration: 169051
loss: 0.9785597920417786,grad_norm: 0.9637578944452574, iteration: 169052
loss: 1.0088788270950317,grad_norm: 0.9094174621625918, iteration: 169053
loss: 1.0036532878875732,grad_norm: 0.9999995663974067, iteration: 169054
loss: 0.9968236684799194,grad_norm: 0.8962778181852917, iteration: 169055
loss: 1.0221188068389893,grad_norm: 0.999999167981861, iteration: 169056
loss: 1.0041366815567017,grad_norm: 0.9980053889190632, iteration: 169057
loss: 1.007837176322937,grad_norm: 0.91713867172773, iteration: 169058
loss: 1.0015249252319336,grad_norm: 0.9597244342247683, iteration: 169059
loss: 1.083063006401062,grad_norm: 0.9999991076224375, iteration: 169060
loss: 0.9874100089073181,grad_norm: 0.9999991583294151, iteration: 169061
loss: 1.0727890729904175,grad_norm: 0.9552683733987353, iteration: 169062
loss: 1.0186793804168701,grad_norm: 0.9999990585713507, iteration: 169063
loss: 0.9918594360351562,grad_norm: 0.9999991042759628, iteration: 169064
loss: 0.9808679819107056,grad_norm: 0.9129719320394283, iteration: 169065
loss: 0.944479763507843,grad_norm: 0.9999988853767351, iteration: 169066
loss: 1.0192549228668213,grad_norm: 0.8726614451220206, iteration: 169067
loss: 0.9595133066177368,grad_norm: 0.9126964267835026, iteration: 169068
loss: 1.0462971925735474,grad_norm: 0.9999992658322542, iteration: 169069
loss: 1.0552560091018677,grad_norm: 0.9999997529126852, iteration: 169070
loss: 1.0498679876327515,grad_norm: 0.9999994562177623, iteration: 169071
loss: 1.0107131004333496,grad_norm: 0.9612703266149922, iteration: 169072
loss: 1.0246638059616089,grad_norm: 0.8357279526091697, iteration: 169073
loss: 0.9791337847709656,grad_norm: 0.9671971421930631, iteration: 169074
loss: 1.0074827671051025,grad_norm: 0.9734551191878511, iteration: 169075
loss: 1.014858603477478,grad_norm: 0.9999990397470065, iteration: 169076
loss: 0.9754139184951782,grad_norm: 0.999999086437435, iteration: 169077
loss: 0.9539218544960022,grad_norm: 0.9837996252433308, iteration: 169078
loss: 0.9904937148094177,grad_norm: 0.9053726698896275, iteration: 169079
loss: 0.9960080981254578,grad_norm: 0.9999991317392486, iteration: 169080
loss: 0.9975109100341797,grad_norm: 0.9999994821514221, iteration: 169081
loss: 1.0042327642440796,grad_norm: 0.9971878608779893, iteration: 169082
loss: 0.9971514940261841,grad_norm: 0.9248022316509165, iteration: 169083
loss: 1.02816903591156,grad_norm: 0.9999990287473896, iteration: 169084
loss: 0.9694387316703796,grad_norm: 0.899619984857846, iteration: 169085
loss: 1.0307488441467285,grad_norm: 0.9253931141210129, iteration: 169086
loss: 1.0106135606765747,grad_norm: 0.9999991017807113, iteration: 169087
loss: 0.9822027683258057,grad_norm: 0.9999991915767805, iteration: 169088
loss: 1.0248197317123413,grad_norm: 0.8353853856952591, iteration: 169089
loss: 1.0188374519348145,grad_norm: 0.9999991912244495, iteration: 169090
loss: 1.012630581855774,grad_norm: 0.9999989820012403, iteration: 169091
loss: 1.0201040506362915,grad_norm: 0.9999990160932695, iteration: 169092
loss: 0.9930292367935181,grad_norm: 0.9999989342492558, iteration: 169093
loss: 0.9881559610366821,grad_norm: 0.9999990837291123, iteration: 169094
loss: 1.0244715213775635,grad_norm: 0.9652715731422633, iteration: 169095
loss: 1.0163429975509644,grad_norm: 0.8814710473147223, iteration: 169096
loss: 1.0086042881011963,grad_norm: 0.9999990664158448, iteration: 169097
loss: 0.994350016117096,grad_norm: 0.9999990745016482, iteration: 169098
loss: 1.0193997621536255,grad_norm: 0.9999990927982525, iteration: 169099
loss: 1.0068492889404297,grad_norm: 0.9999996114651517, iteration: 169100
loss: 0.995690643787384,grad_norm: 0.9401524000141799, iteration: 169101
loss: 1.035854697227478,grad_norm: 0.9999992641091925, iteration: 169102
loss: 1.0124804973602295,grad_norm: 0.9999991296909395, iteration: 169103
loss: 1.025800347328186,grad_norm: 0.9884900122736466, iteration: 169104
loss: 1.0046378374099731,grad_norm: 0.999999224388219, iteration: 169105
loss: 1.0121424198150635,grad_norm: 0.9411329866934065, iteration: 169106
loss: 1.0049325227737427,grad_norm: 0.9999993298431654, iteration: 169107
loss: 0.9829971194267273,grad_norm: 0.9999990253215993, iteration: 169108
loss: 1.0150824785232544,grad_norm: 0.8994555445429409, iteration: 169109
loss: 0.9945386648178101,grad_norm: 0.7796216236428736, iteration: 169110
loss: 1.0184499025344849,grad_norm: 0.999998999259628, iteration: 169111
loss: 1.032321572303772,grad_norm: 0.9463420559525444, iteration: 169112
loss: 0.9768245220184326,grad_norm: 0.9707512727496711, iteration: 169113
loss: 1.0124993324279785,grad_norm: 0.9999995675063, iteration: 169114
loss: 0.994044303894043,grad_norm: 0.947695940528482, iteration: 169115
loss: 1.013350009918213,grad_norm: 0.999999336890452, iteration: 169116
loss: 0.988065779209137,grad_norm: 0.9214694186052432, iteration: 169117
loss: 1.0029932260513306,grad_norm: 0.9550711042587192, iteration: 169118
loss: 0.9951507449150085,grad_norm: 0.9999997044611942, iteration: 169119
loss: 0.9795860052108765,grad_norm: 0.9999990369035203, iteration: 169120
loss: 0.9984109997749329,grad_norm: 0.9999989187284672, iteration: 169121
loss: 1.0039076805114746,grad_norm: 0.9235354960968265, iteration: 169122
loss: 0.9858789443969727,grad_norm: 0.9790996162321932, iteration: 169123
loss: 1.0889220237731934,grad_norm: 0.9999992905562363, iteration: 169124
loss: 0.9869673252105713,grad_norm: 0.8094142289090306, iteration: 169125
loss: 1.0459133386611938,grad_norm: 0.9999994398597899, iteration: 169126
loss: 1.0151232481002808,grad_norm: 0.9999990864502587, iteration: 169127
loss: 1.0433743000030518,grad_norm: 0.9999990452294227, iteration: 169128
loss: 0.9948432445526123,grad_norm: 0.9999991243581956, iteration: 169129
loss: 0.9584807753562927,grad_norm: 0.9698727414563377, iteration: 169130
loss: 0.9391127228736877,grad_norm: 0.9999990224084788, iteration: 169131
loss: 0.9972060322761536,grad_norm: 0.9999992020419812, iteration: 169132
loss: 1.002001404762268,grad_norm: 0.9670193721624055, iteration: 169133
loss: 0.9830678105354309,grad_norm: 0.9697432109992659, iteration: 169134
loss: 0.9733163714408875,grad_norm: 0.9999990395057898, iteration: 169135
loss: 1.00382399559021,grad_norm: 0.9557102853138995, iteration: 169136
loss: 1.0411429405212402,grad_norm: 0.9999991506988375, iteration: 169137
loss: 1.0517092943191528,grad_norm: 0.9999990733546076, iteration: 169138
loss: 1.0051296949386597,grad_norm: 0.9999991342333945, iteration: 169139
loss: 0.9750276803970337,grad_norm: 0.9999990314286793, iteration: 169140
loss: 0.989302933216095,grad_norm: 0.9821913338091979, iteration: 169141
loss: 1.0088168382644653,grad_norm: 0.886005010991455, iteration: 169142
loss: 0.9671993851661682,grad_norm: 0.9760114487090324, iteration: 169143
loss: 1.0171340703964233,grad_norm: 0.9016658256215151, iteration: 169144
loss: 0.9832282066345215,grad_norm: 0.9830539366908239, iteration: 169145
loss: 0.9987844824790955,grad_norm: 0.9999992187376665, iteration: 169146
loss: 0.9929128289222717,grad_norm: 0.8134477154240619, iteration: 169147
loss: 1.0137635469436646,grad_norm: 0.8521363231396093, iteration: 169148
loss: 1.001368761062622,grad_norm: 0.999998950044707, iteration: 169149
loss: 1.003522515296936,grad_norm: 0.9999991950151413, iteration: 169150
loss: 0.9650679230690002,grad_norm: 0.9999991941777924, iteration: 169151
loss: 0.9618793725967407,grad_norm: 0.9386520566880665, iteration: 169152
loss: 1.0246502161026,grad_norm: 0.9999991318350009, iteration: 169153
loss: 1.005203366279602,grad_norm: 0.956548769680464, iteration: 169154
loss: 0.9912829399108887,grad_norm: 0.9780553640533334, iteration: 169155
loss: 1.0245634317398071,grad_norm: 0.9999991490719335, iteration: 169156
loss: 0.969480574131012,grad_norm: 0.9999991982671717, iteration: 169157
loss: 1.0242540836334229,grad_norm: 0.9663543579934586, iteration: 169158
loss: 1.0029091835021973,grad_norm: 0.8686867462332812, iteration: 169159
loss: 1.0189111232757568,grad_norm: 0.7415073509171363, iteration: 169160
loss: 0.957779049873352,grad_norm: 0.9538730775722958, iteration: 169161
loss: 0.9833154678344727,grad_norm: 0.8905525589877398, iteration: 169162
loss: 0.9927871823310852,grad_norm: 0.9999991865486634, iteration: 169163
loss: 0.9944967031478882,grad_norm: 0.9999990996157796, iteration: 169164
loss: 1.0332658290863037,grad_norm: 0.9999993372640978, iteration: 169165
loss: 0.981386125087738,grad_norm: 0.9999991647869936, iteration: 169166
loss: 0.9793923497200012,grad_norm: 0.9999992123702456, iteration: 169167
loss: 0.9839925765991211,grad_norm: 0.9999992459125709, iteration: 169168
loss: 1.0095734596252441,grad_norm: 0.9266027336414587, iteration: 169169
loss: 0.9972800612449646,grad_norm: 0.9999991640037689, iteration: 169170
loss: 1.023568034172058,grad_norm: 0.9025503993635894, iteration: 169171
loss: 0.9958829283714294,grad_norm: 0.9999990648034452, iteration: 169172
loss: 1.0271443128585815,grad_norm: 0.9925677106452896, iteration: 169173
loss: 1.024535059928894,grad_norm: 0.9663888241296138, iteration: 169174
loss: 0.9665910601615906,grad_norm: 0.9999990877788343, iteration: 169175
loss: 1.008031964302063,grad_norm: 0.9999989595581468, iteration: 169176
loss: 0.994242250919342,grad_norm: 0.999999144653974, iteration: 169177
loss: 0.9881194233894348,grad_norm: 0.9999990417921149, iteration: 169178
loss: 1.0438357591629028,grad_norm: 0.9999991990694032, iteration: 169179
loss: 0.962738037109375,grad_norm: 0.9883281574063458, iteration: 169180
loss: 0.98280930519104,grad_norm: 0.9999992061459801, iteration: 169181
loss: 0.9915505051612854,grad_norm: 0.7955047717401085, iteration: 169182
loss: 1.0164732933044434,grad_norm: 0.9894119383359885, iteration: 169183
loss: 0.9665947556495667,grad_norm: 0.9285548909140778, iteration: 169184
loss: 0.9795752763748169,grad_norm: 0.860638884426182, iteration: 169185
loss: 0.9812310934066772,grad_norm: 0.9999990994227408, iteration: 169186
loss: 0.9845976829528809,grad_norm: 0.9540469024977641, iteration: 169187
loss: 1.0018614530563354,grad_norm: 0.9999992999710553, iteration: 169188
loss: 0.9999099969863892,grad_norm: 0.8586902386814349, iteration: 169189
loss: 1.0056322813034058,grad_norm: 0.9999992329950305, iteration: 169190
loss: 0.9724226593971252,grad_norm: 0.8259117708722196, iteration: 169191
loss: 0.9637476205825806,grad_norm: 0.9999991830569437, iteration: 169192
loss: 0.9948635101318359,grad_norm: 0.8145718370116022, iteration: 169193
loss: 0.9957917928695679,grad_norm: 0.9999990654307376, iteration: 169194
loss: 1.0128172636032104,grad_norm: 0.9999991461263215, iteration: 169195
loss: 0.9945068359375,grad_norm: 0.999999104462869, iteration: 169196
loss: 1.0455290079116821,grad_norm: 0.9999990895646086, iteration: 169197
loss: 0.9969924688339233,grad_norm: 0.9003497354493133, iteration: 169198
loss: 1.002764105796814,grad_norm: 0.9999991471409383, iteration: 169199
loss: 0.9754857420921326,grad_norm: 0.9942998162470489, iteration: 169200
loss: 1.1317834854125977,grad_norm: 0.9999997708923238, iteration: 169201
loss: 1.0066195726394653,grad_norm: 0.9999991153924137, iteration: 169202
loss: 0.9991328120231628,grad_norm: 0.9999993064008632, iteration: 169203
loss: 1.0118175745010376,grad_norm: 0.9999999773115477, iteration: 169204
loss: 0.9976741671562195,grad_norm: 0.9999990826664944, iteration: 169205
loss: 0.9602516293525696,grad_norm: 0.9999990242816869, iteration: 169206
loss: 1.0448684692382812,grad_norm: 0.9999992366299898, iteration: 169207
loss: 1.0023784637451172,grad_norm: 0.9999992289358893, iteration: 169208
loss: 1.01618492603302,grad_norm: 0.9999991896473802, iteration: 169209
loss: 0.979047954082489,grad_norm: 0.9999992848999442, iteration: 169210
loss: 1.0213533639907837,grad_norm: 0.9999990051935975, iteration: 169211
loss: 1.0270116329193115,grad_norm: 0.9739790056626134, iteration: 169212
loss: 1.0130674839019775,grad_norm: 0.8390689312197925, iteration: 169213
loss: 0.996669352054596,grad_norm: 0.9999991491696181, iteration: 169214
loss: 0.9643304347991943,grad_norm: 0.9116183509040735, iteration: 169215
loss: 0.944496214389801,grad_norm: 0.999999000328841, iteration: 169216
loss: 1.0291892290115356,grad_norm: 0.9999990578922233, iteration: 169217
loss: 0.9861038327217102,grad_norm: 0.8520728576761079, iteration: 169218
loss: 0.9905668497085571,grad_norm: 0.9350085397615732, iteration: 169219
loss: 0.9714609384536743,grad_norm: 0.830826853822572, iteration: 169220
loss: 0.9674568772315979,grad_norm: 0.8295047048906316, iteration: 169221
loss: 0.998267412185669,grad_norm: 0.9999991205801179, iteration: 169222
loss: 0.9900006651878357,grad_norm: 0.9999990615197818, iteration: 169223
loss: 1.0330758094787598,grad_norm: 0.9999991125719914, iteration: 169224
loss: 0.9986208081245422,grad_norm: 0.9994793406593673, iteration: 169225
loss: 0.9391579031944275,grad_norm: 0.965309073466618, iteration: 169226
loss: 1.0168089866638184,grad_norm: 0.9999991607660371, iteration: 169227
loss: 0.9971175193786621,grad_norm: 0.999999254319752, iteration: 169228
loss: 1.0946868658065796,grad_norm: 0.9999995758732546, iteration: 169229
loss: 0.9809799194335938,grad_norm: 0.9999989908527288, iteration: 169230
loss: 0.9963140487670898,grad_norm: 0.9999990293181278, iteration: 169231
loss: 0.9744150042533875,grad_norm: 0.9999991152969415, iteration: 169232
loss: 0.9973861575126648,grad_norm: 0.9593107301502448, iteration: 169233
loss: 0.972028374671936,grad_norm: 0.9999989988129497, iteration: 169234
loss: 1.001944899559021,grad_norm: 0.9610551289237723, iteration: 169235
loss: 1.0102490186691284,grad_norm: 0.9999994548714662, iteration: 169236
loss: 1.0009374618530273,grad_norm: 0.9121049182827758, iteration: 169237
loss: 0.9957412481307983,grad_norm: 0.99999915556199, iteration: 169238
loss: 1.0041568279266357,grad_norm: 0.9999991703118434, iteration: 169239
loss: 0.9913697838783264,grad_norm: 0.9999989945016919, iteration: 169240
loss: 0.9693203568458557,grad_norm: 0.9279684005299227, iteration: 169241
loss: 0.9901460409164429,grad_norm: 0.839724562587086, iteration: 169242
loss: 0.9782096743583679,grad_norm: 0.9999992563599199, iteration: 169243
loss: 0.9578990936279297,grad_norm: 0.9816150089618194, iteration: 169244
loss: 0.9798809885978699,grad_norm: 0.9999990452078332, iteration: 169245
loss: 0.9781199097633362,grad_norm: 0.999998984784523, iteration: 169246
loss: 0.9867849349975586,grad_norm: 0.9999989929472983, iteration: 169247
loss: 1.0033143758773804,grad_norm: 0.9999992024498932, iteration: 169248
loss: 1.0044636726379395,grad_norm: 0.9999992500200178, iteration: 169249
loss: 1.0250152349472046,grad_norm: 0.9999992413850807, iteration: 169250
loss: 1.0275084972381592,grad_norm: 0.9999996008948078, iteration: 169251
loss: 0.994296669960022,grad_norm: 0.861517345451705, iteration: 169252
loss: 0.9884909987449646,grad_norm: 0.9992037900864321, iteration: 169253
loss: 0.9838371276855469,grad_norm: 0.9999991094571478, iteration: 169254
loss: 1.01747465133667,grad_norm: 0.999999203862145, iteration: 169255
loss: 0.9842972755432129,grad_norm: 0.89864213645849, iteration: 169256
loss: 0.9895716905593872,grad_norm: 0.9559786685727027, iteration: 169257
loss: 0.9756556749343872,grad_norm: 0.9644277285299789, iteration: 169258
loss: 0.9601435661315918,grad_norm: 0.9999991507134531, iteration: 169259
loss: 1.0551468133926392,grad_norm: 0.9999993830008561, iteration: 169260
loss: 1.0477294921875,grad_norm: 0.9999992177576382, iteration: 169261
loss: 1.100928783416748,grad_norm: 0.9999993661466987, iteration: 169262
loss: 1.0104777812957764,grad_norm: 0.9999991794715626, iteration: 169263
loss: 1.0254307985305786,grad_norm: 0.9999990944368066, iteration: 169264
loss: 0.9992988705635071,grad_norm: 0.8159157482576095, iteration: 169265
loss: 0.9878484606742859,grad_norm: 0.8726344229696048, iteration: 169266
loss: 0.9763362407684326,grad_norm: 0.9999990364145288, iteration: 169267
loss: 0.9813703298568726,grad_norm: 0.9573843987026677, iteration: 169268
loss: 1.018373727798462,grad_norm: 0.9999992117934717, iteration: 169269
loss: 0.9876680374145508,grad_norm: 0.9999991344504483, iteration: 169270
loss: 1.008684515953064,grad_norm: 0.9999990617761008, iteration: 169271
loss: 0.9868056774139404,grad_norm: 0.9999993871165405, iteration: 169272
loss: 0.9652726650238037,grad_norm: 0.9999992999680049, iteration: 169273
loss: 1.0444495677947998,grad_norm: 0.9999990196080059, iteration: 169274
loss: 0.9914034605026245,grad_norm: 0.9999990730634809, iteration: 169275
loss: 0.9952046871185303,grad_norm: 0.9761841411139618, iteration: 169276
loss: 1.0439091920852661,grad_norm: 0.999999199237206, iteration: 169277
loss: 1.0384862422943115,grad_norm: 0.9999992326510976, iteration: 169278
loss: 0.997560441493988,grad_norm: 0.9999991340485878, iteration: 169279
loss: 0.990778923034668,grad_norm: 0.8440888135336805, iteration: 169280
loss: 0.9906585812568665,grad_norm: 0.9999990604410137, iteration: 169281
loss: 1.008242130279541,grad_norm: 0.8293717990304583, iteration: 169282
loss: 0.9745013117790222,grad_norm: 0.8376616022830581, iteration: 169283
loss: 1.0068598985671997,grad_norm: 0.9999990653855033, iteration: 169284
loss: 0.9811680912971497,grad_norm: 0.9314404204090824, iteration: 169285
loss: 0.9587376117706299,grad_norm: 0.7941749596335211, iteration: 169286
loss: 0.9665899276733398,grad_norm: 0.9999991417532718, iteration: 169287
loss: 0.9991902709007263,grad_norm: 0.9999991317171287, iteration: 169288
loss: 0.990452229976654,grad_norm: 0.8940712789549797, iteration: 169289
loss: 0.9866666197776794,grad_norm: 0.9572463562117737, iteration: 169290
loss: 0.9621744155883789,grad_norm: 0.8954609455525591, iteration: 169291
loss: 1.0273306369781494,grad_norm: 0.778946923005578, iteration: 169292
loss: 1.0032479763031006,grad_norm: 0.9999993243899422, iteration: 169293
loss: 0.9595105051994324,grad_norm: 0.9999990655360852, iteration: 169294
loss: 1.0187500715255737,grad_norm: 0.999999147569332, iteration: 169295
loss: 1.0310572385787964,grad_norm: 0.9999990472575957, iteration: 169296
loss: 0.9819749593734741,grad_norm: 0.9999991251386448, iteration: 169297
loss: 1.0032552480697632,grad_norm: 0.9186110343819693, iteration: 169298
loss: 0.9858453273773193,grad_norm: 0.9999990738559179, iteration: 169299
loss: 1.0169081687927246,grad_norm: 0.9999993142412571, iteration: 169300
loss: 0.9735604524612427,grad_norm: 0.9999991074784387, iteration: 169301
loss: 1.0319641828536987,grad_norm: 0.9160112914633857, iteration: 169302
loss: 0.9868276119232178,grad_norm: 0.9182029082032961, iteration: 169303
loss: 0.9922161102294922,grad_norm: 0.8897404771869297, iteration: 169304
loss: 1.0442174673080444,grad_norm: 0.9999992102052009, iteration: 169305
loss: 0.9423683881759644,grad_norm: 0.9339452180357384, iteration: 169306
loss: 1.0190306901931763,grad_norm: 0.88301272920776, iteration: 169307
loss: 1.020685076713562,grad_norm: 0.9731654582517221, iteration: 169308
loss: 0.9609508514404297,grad_norm: 0.9999991836430305, iteration: 169309
loss: 1.0015021562576294,grad_norm: 0.9999992503828966, iteration: 169310
loss: 1.0033732652664185,grad_norm: 0.8980417097606496, iteration: 169311
loss: 0.9887934327125549,grad_norm: 0.927623787460345, iteration: 169312
loss: 1.0389814376831055,grad_norm: 0.950095807789029, iteration: 169313
loss: 1.008401870727539,grad_norm: 0.9181626343356828, iteration: 169314
loss: 1.0101202726364136,grad_norm: 0.9999991411543585, iteration: 169315
loss: 0.9784159660339355,grad_norm: 0.9048098009873209, iteration: 169316
loss: 1.0100070238113403,grad_norm: 0.9174212750268341, iteration: 169317
loss: 0.977686882019043,grad_norm: 0.8489863913694966, iteration: 169318
loss: 1.020399570465088,grad_norm: 0.7304312232458642, iteration: 169319
loss: 1.0301560163497925,grad_norm: 0.9999990175213836, iteration: 169320
loss: 0.960060179233551,grad_norm: 0.8505794102664622, iteration: 169321
loss: 0.985201358795166,grad_norm: 0.9999990571965416, iteration: 169322
loss: 0.9940720200538635,grad_norm: 0.901842422572358, iteration: 169323
loss: 1.0264005661010742,grad_norm: 0.9099716524435587, iteration: 169324
loss: 1.0115774869918823,grad_norm: 0.9496952786464451, iteration: 169325
loss: 0.9656678438186646,grad_norm: 0.9176530836542255, iteration: 169326
loss: 0.9976708292961121,grad_norm: 0.9999991833806798, iteration: 169327
loss: 1.050702452659607,grad_norm: 0.9999990765061988, iteration: 169328
loss: 0.9920431971549988,grad_norm: 0.9999991968174309, iteration: 169329
loss: 0.9826703071594238,grad_norm: 0.9102453922875169, iteration: 169330
loss: 1.0451385974884033,grad_norm: 0.9999993322574897, iteration: 169331
loss: 1.0233625173568726,grad_norm: 0.999999071936944, iteration: 169332
loss: 1.02283775806427,grad_norm: 0.9999990817914749, iteration: 169333
loss: 1.1357109546661377,grad_norm: 0.9999994509547266, iteration: 169334
loss: 1.028092384338379,grad_norm: 0.999999069460982, iteration: 169335
loss: 1.048449158668518,grad_norm: 0.999999100380639, iteration: 169336
loss: 0.9774773716926575,grad_norm: 0.9999992008425804, iteration: 169337
loss: 1.0401411056518555,grad_norm: 0.9999990842277718, iteration: 169338
loss: 0.9903007745742798,grad_norm: 0.9653248760391483, iteration: 169339
loss: 0.9920331239700317,grad_norm: 0.9999990837848203, iteration: 169340
loss: 1.008941411972046,grad_norm: 0.9999991327497543, iteration: 169341
loss: 1.0059648752212524,grad_norm: 0.9999991737242258, iteration: 169342
loss: 1.0025811195373535,grad_norm: 0.9118893563331133, iteration: 169343
loss: 1.002292513847351,grad_norm: 0.992454007505252, iteration: 169344
loss: 1.0476715564727783,grad_norm: 0.9999998209648024, iteration: 169345
loss: 0.9804122447967529,grad_norm: 0.8963468499092214, iteration: 169346
loss: 0.9713937640190125,grad_norm: 0.9615742473331614, iteration: 169347
loss: 0.9811887145042419,grad_norm: 0.8933302602029266, iteration: 169348
loss: 0.9836092591285706,grad_norm: 0.9265751956362133, iteration: 169349
loss: 1.0245295763015747,grad_norm: 0.9999990976966515, iteration: 169350
loss: 0.9856570363044739,grad_norm: 0.9999992606702987, iteration: 169351
loss: 0.9895137548446655,grad_norm: 0.9999999301996765, iteration: 169352
loss: 1.0119808912277222,grad_norm: 0.999999115010568, iteration: 169353
loss: 0.9949651956558228,grad_norm: 0.946565187821421, iteration: 169354
loss: 0.9889265298843384,grad_norm: 0.8127348388265396, iteration: 169355
loss: 1.0813370943069458,grad_norm: 0.9999995063152533, iteration: 169356
loss: 0.9560408592224121,grad_norm: 0.9589776946346736, iteration: 169357
loss: 1.0133239030838013,grad_norm: 0.9671673812432461, iteration: 169358
loss: 1.0144953727722168,grad_norm: 0.8848157436652995, iteration: 169359
loss: 0.9854766726493835,grad_norm: 0.9999990723133523, iteration: 169360
loss: 1.0137046575546265,grad_norm: 0.9999991500120677, iteration: 169361
loss: 1.0479319095611572,grad_norm: 0.9999994034235519, iteration: 169362
loss: 0.9982595443725586,grad_norm: 0.9680694423015959, iteration: 169363
loss: 1.0074310302734375,grad_norm: 0.8899735149059004, iteration: 169364
loss: 1.0259701013565063,grad_norm: 0.9765054121712957, iteration: 169365
loss: 1.0147743225097656,grad_norm: 0.7819074943073848, iteration: 169366
loss: 1.030374526977539,grad_norm: 0.9999990974676786, iteration: 169367
loss: 0.9666244387626648,grad_norm: 0.9489145294096817, iteration: 169368
loss: 0.9790281057357788,grad_norm: 0.9404534675609288, iteration: 169369
loss: 0.9918484091758728,grad_norm: 0.9745686023707129, iteration: 169370
loss: 0.9943806529045105,grad_norm: 0.9999990475740977, iteration: 169371
loss: 0.9980169534683228,grad_norm: 0.9999991074557689, iteration: 169372
loss: 0.9980722069740295,grad_norm: 0.9999993314974566, iteration: 169373
loss: 0.9887877106666565,grad_norm: 0.9999990718512293, iteration: 169374
loss: 0.974737286567688,grad_norm: 0.9999999738094355, iteration: 169375
loss: 1.0582317113876343,grad_norm: 0.9999992394968896, iteration: 169376
loss: 1.0919318199157715,grad_norm: 0.9999997158157397, iteration: 169377
loss: 0.9626055359840393,grad_norm: 0.9999992378948062, iteration: 169378
loss: 1.0287421941757202,grad_norm: 0.9999996953439367, iteration: 169379
loss: 0.9974356293678284,grad_norm: 0.9999992865665114, iteration: 169380
loss: 1.005616307258606,grad_norm: 0.9999993167329423, iteration: 169381
loss: 0.9791706800460815,grad_norm: 0.9999990967857718, iteration: 169382
loss: 1.0743669271469116,grad_norm: 0.999999287106531, iteration: 169383
loss: 1.0024195909500122,grad_norm: 0.9999993365151402, iteration: 169384
loss: 1.0306273698806763,grad_norm: 0.9331422629202852, iteration: 169385
loss: 1.026389479637146,grad_norm: 0.9999991012943974, iteration: 169386
loss: 1.0884990692138672,grad_norm: 0.9700781682419122, iteration: 169387
loss: 0.9935117363929749,grad_norm: 0.9145850397916503, iteration: 169388
loss: 1.0145952701568604,grad_norm: 0.9999989351765064, iteration: 169389
loss: 1.0691964626312256,grad_norm: 0.9999991811603784, iteration: 169390
loss: 0.9834994673728943,grad_norm: 0.9999991788131373, iteration: 169391
loss: 1.0547378063201904,grad_norm: 0.9999989898521524, iteration: 169392
loss: 1.027365803718567,grad_norm: 0.9904258812179254, iteration: 169393
loss: 0.9824761152267456,grad_norm: 0.951356466111209, iteration: 169394
loss: 1.0332742929458618,grad_norm: 0.9362401822927545, iteration: 169395
loss: 1.04496169090271,grad_norm: 0.9999998702775903, iteration: 169396
loss: 0.9810997843742371,grad_norm: 0.948926582688895, iteration: 169397
loss: 0.9950911998748779,grad_norm: 0.8841112278789162, iteration: 169398
loss: 0.9857017993927002,grad_norm: 0.9792221892991152, iteration: 169399
loss: 0.9994881749153137,grad_norm: 0.8781345969924819, iteration: 169400
loss: 0.9583293795585632,grad_norm: 0.999999092963681, iteration: 169401
loss: 0.9851754307746887,grad_norm: 0.9023347158659549, iteration: 169402
loss: 1.0871018171310425,grad_norm: 0.9999996483595837, iteration: 169403
loss: 0.991698682308197,grad_norm: 0.9700400335296847, iteration: 169404
loss: 0.960614800453186,grad_norm: 0.9184181996427215, iteration: 169405
loss: 1.0338507890701294,grad_norm: 0.9999991179680849, iteration: 169406
loss: 1.0392491817474365,grad_norm: 0.8728658463106881, iteration: 169407
loss: 1.0498497486114502,grad_norm: 0.9999991727329121, iteration: 169408
loss: 1.0878175497055054,grad_norm: 0.9864707823569534, iteration: 169409
loss: 0.9991465210914612,grad_norm: 0.9999995629750587, iteration: 169410
loss: 1.1403205394744873,grad_norm: 0.9385335672669041, iteration: 169411
loss: 0.9966444969177246,grad_norm: 0.8776076463276482, iteration: 169412
loss: 0.9844340682029724,grad_norm: 0.8977197417461985, iteration: 169413
loss: 0.9769378900527954,grad_norm: 0.9585878447382216, iteration: 169414
loss: 1.080178141593933,grad_norm: 0.9999991013542918, iteration: 169415
loss: 0.9764156937599182,grad_norm: 0.9999990644093029, iteration: 169416
loss: 1.0068467855453491,grad_norm: 0.9633518604547895, iteration: 169417
loss: 1.0094430446624756,grad_norm: 0.9999990673133891, iteration: 169418
loss: 0.9744360446929932,grad_norm: 0.8081538148532014, iteration: 169419
loss: 1.0139681100845337,grad_norm: 0.9999992147004432, iteration: 169420
loss: 0.9850931167602539,grad_norm: 0.9999991891906316, iteration: 169421
loss: 1.0106182098388672,grad_norm: 0.9297769287266655, iteration: 169422
loss: 0.993755042552948,grad_norm: 0.8893813551785518, iteration: 169423
loss: 0.9891281127929688,grad_norm: 0.9799021000406873, iteration: 169424
loss: 1.0149115324020386,grad_norm: 0.9999991238654851, iteration: 169425
loss: 0.9855327606201172,grad_norm: 0.9242584636549074, iteration: 169426
loss: 0.966804563999176,grad_norm: 0.915426150612393, iteration: 169427
loss: 1.0680266618728638,grad_norm: 0.929187749018831, iteration: 169428
loss: 0.9947741031646729,grad_norm: 0.9999991483775249, iteration: 169429
loss: 0.9846267700195312,grad_norm: 0.9999991563522391, iteration: 169430
loss: 1.0155998468399048,grad_norm: 0.9255167409212769, iteration: 169431
loss: 1.0541585683822632,grad_norm: 0.9999996997503675, iteration: 169432
loss: 1.0116887092590332,grad_norm: 0.9440485470061037, iteration: 169433
loss: 0.9786516427993774,grad_norm: 0.9614523635113466, iteration: 169434
loss: 1.068043828010559,grad_norm: 0.8743068580481091, iteration: 169435
loss: 0.9915140867233276,grad_norm: 0.9378037014007029, iteration: 169436
loss: 1.0181552171707153,grad_norm: 0.9824293457419834, iteration: 169437
loss: 1.0090333223342896,grad_norm: 0.9999991025528271, iteration: 169438
loss: 1.0097872018814087,grad_norm: 0.975002139223824, iteration: 169439
loss: 0.9878297448158264,grad_norm: 0.9999990356216429, iteration: 169440
loss: 0.9828932881355286,grad_norm: 0.99999907717473, iteration: 169441
loss: 0.991087019443512,grad_norm: 0.8930681866761183, iteration: 169442
loss: 1.003699541091919,grad_norm: 0.9999989468478057, iteration: 169443
loss: 0.9682120680809021,grad_norm: 0.9406173661641682, iteration: 169444
loss: 0.9932575225830078,grad_norm: 0.8646072879929594, iteration: 169445
loss: 1.0169398784637451,grad_norm: 0.9999991197525583, iteration: 169446
loss: 0.981131374835968,grad_norm: 0.8622974024351902, iteration: 169447
loss: 0.9900708794593811,grad_norm: 0.9999992142213552, iteration: 169448
loss: 1.018301248550415,grad_norm: 0.9534646856085035, iteration: 169449
loss: 1.00244140625,grad_norm: 0.99999928741279, iteration: 169450
loss: 1.0044838190078735,grad_norm: 0.9999991649503281, iteration: 169451
loss: 1.0102871656417847,grad_norm: 0.9174870003466639, iteration: 169452
loss: 1.0363205671310425,grad_norm: 0.9999990296663596, iteration: 169453
loss: 0.9900162816047668,grad_norm: 0.9292744632090436, iteration: 169454
loss: 0.9737421870231628,grad_norm: 0.9639597068267383, iteration: 169455
loss: 0.9619367718696594,grad_norm: 0.999998924596505, iteration: 169456
loss: 0.9719038605690002,grad_norm: 0.9999990700099527, iteration: 169457
loss: 0.9940597414970398,grad_norm: 0.9999994019012327, iteration: 169458
loss: 1.0383646488189697,grad_norm: 0.9999990706752847, iteration: 169459
loss: 0.9834734797477722,grad_norm: 0.9488956625429851, iteration: 169460
loss: 1.0580228567123413,grad_norm: 0.9999999376443609, iteration: 169461
loss: 0.978820264339447,grad_norm: 0.9999991449028683, iteration: 169462
loss: 1.052703619003296,grad_norm: 0.8686406714745587, iteration: 169463
loss: 1.0037258863449097,grad_norm: 0.9999991237029261, iteration: 169464
loss: 1.0194822549819946,grad_norm: 0.9999991506938244, iteration: 169465
loss: 0.9854083061218262,grad_norm: 0.9999991793953432, iteration: 169466
loss: 0.9894170165061951,grad_norm: 0.9726699425135561, iteration: 169467
loss: 0.9392989873886108,grad_norm: 0.9157033341079901, iteration: 169468
loss: 1.057082176208496,grad_norm: 0.9999995751204538, iteration: 169469
loss: 1.038241982460022,grad_norm: 0.9468564484206624, iteration: 169470
loss: 1.144566297531128,grad_norm: 0.9999990443229044, iteration: 169471
loss: 1.0325285196304321,grad_norm: 0.87705434876293, iteration: 169472
loss: 1.005406379699707,grad_norm: 0.9541179161901063, iteration: 169473
loss: 1.0079213380813599,grad_norm: 0.9999992031363839, iteration: 169474
loss: 0.9670853018760681,grad_norm: 0.8529844497520636, iteration: 169475
loss: 1.022887945175171,grad_norm: 0.9999990606680018, iteration: 169476
loss: 0.9733659625053406,grad_norm: 0.9999991079116534, iteration: 169477
loss: 1.0206934213638306,grad_norm: 0.9031326816858898, iteration: 169478
loss: 1.0249289274215698,grad_norm: 0.8576577807249947, iteration: 169479
loss: 1.0140897035598755,grad_norm: 0.9918758736099341, iteration: 169480
loss: 1.0066715478897095,grad_norm: 0.9174556476847627, iteration: 169481
loss: 0.9371508955955505,grad_norm: 0.9999990080165287, iteration: 169482
loss: 0.9702067375183105,grad_norm: 0.9101028197272006, iteration: 169483
loss: 1.0094226598739624,grad_norm: 0.9999990854370899, iteration: 169484
loss: 0.9800708293914795,grad_norm: 0.9598094911424441, iteration: 169485
loss: 1.125160813331604,grad_norm: 0.9999994084806327, iteration: 169486
loss: 1.0239976644515991,grad_norm: 0.9450426030032949, iteration: 169487
loss: 0.9693811535835266,grad_norm: 0.9116573503574813, iteration: 169488
loss: 1.014222502708435,grad_norm: 0.9999991686029608, iteration: 169489
loss: 1.0490398406982422,grad_norm: 0.9999996251087238, iteration: 169490
loss: 1.0043306350708008,grad_norm: 0.8621742908740686, iteration: 169491
loss: 0.9650145769119263,grad_norm: 0.9999990775025033, iteration: 169492
loss: 0.9725167155265808,grad_norm: 0.9923246448455928, iteration: 169493
loss: 1.0188829898834229,grad_norm: 0.936981763138783, iteration: 169494
loss: 1.0215622186660767,grad_norm: 0.9999991077968943, iteration: 169495
loss: 0.9960951209068298,grad_norm: 0.9999992234031959, iteration: 169496
loss: 0.9622994661331177,grad_norm: 0.9999990551843451, iteration: 169497
loss: 1.0861268043518066,grad_norm: 0.9999997178853952, iteration: 169498
loss: 0.9949330687522888,grad_norm: 0.9999992212370217, iteration: 169499
loss: 1.010258436203003,grad_norm: 0.9293497050945821, iteration: 169500
loss: 1.0409449338912964,grad_norm: 0.9999992381174774, iteration: 169501
loss: 1.0202524662017822,grad_norm: 0.9999992162574814, iteration: 169502
loss: 0.9960383772850037,grad_norm: 0.9999990279396945, iteration: 169503
loss: 1.05570387840271,grad_norm: 0.9999990576014532, iteration: 169504
loss: 0.9756544232368469,grad_norm: 0.8670328411469098, iteration: 169505
loss: 0.9865878820419312,grad_norm: 0.9999990083023078, iteration: 169506
loss: 1.0283689498901367,grad_norm: 0.9999991710288563, iteration: 169507
loss: 1.0163639783859253,grad_norm: 0.9999990339295738, iteration: 169508
loss: 1.0416755676269531,grad_norm: 0.9999990137937562, iteration: 169509
loss: 0.9860150814056396,grad_norm: 0.8964968773384565, iteration: 169510
loss: 0.9863224029541016,grad_norm: 0.9449631963449279, iteration: 169511
loss: 1.0332043170928955,grad_norm: 0.9232179183650568, iteration: 169512
loss: 0.9865508675575256,grad_norm: 0.8885371083772038, iteration: 169513
loss: 0.9967946410179138,grad_norm: 0.8050418077269765, iteration: 169514
loss: 0.9755799174308777,grad_norm: 0.8827861472768992, iteration: 169515
loss: 1.050315260887146,grad_norm: 0.9999993112556179, iteration: 169516
loss: 1.0005922317504883,grad_norm: 0.8471154181171492, iteration: 169517
loss: 0.9954798221588135,grad_norm: 0.9774408013940971, iteration: 169518
loss: 0.9705066084861755,grad_norm: 0.9999991910681426, iteration: 169519
loss: 0.9851656556129456,grad_norm: 0.9999990873941764, iteration: 169520
loss: 1.0080440044403076,grad_norm: 0.9999990206401868, iteration: 169521
loss: 0.9917478561401367,grad_norm: 0.9499566521621764, iteration: 169522
loss: 1.0273901224136353,grad_norm: 0.9999991053196545, iteration: 169523
loss: 1.000484824180603,grad_norm: 0.8912929804761266, iteration: 169524
loss: 1.0110093355178833,grad_norm: 0.9999994916471725, iteration: 169525
loss: 0.9996671080589294,grad_norm: 0.9999991124476908, iteration: 169526
loss: 0.9786233901977539,grad_norm: 0.9331277676598607, iteration: 169527
loss: 1.0074669122695923,grad_norm: 0.899680626288604, iteration: 169528
loss: 1.0203137397766113,grad_norm: 0.9999991431004978, iteration: 169529
loss: 0.9953910708427429,grad_norm: 0.9999992039401154, iteration: 169530
loss: 0.9928946495056152,grad_norm: 0.9999989517975455, iteration: 169531
loss: 1.000152587890625,grad_norm: 0.8928009226002214, iteration: 169532
loss: 1.0253363847732544,grad_norm: 0.936754618926865, iteration: 169533
loss: 0.9856333136558533,grad_norm: 0.8709616585759782, iteration: 169534
loss: 1.0077338218688965,grad_norm: 0.9999991023429232, iteration: 169535
loss: 1.0176677703857422,grad_norm: 0.9553616436536865, iteration: 169536
loss: 0.9926495552062988,grad_norm: 0.9999992775136395, iteration: 169537
loss: 0.9846385717391968,grad_norm: 0.9999990972038708, iteration: 169538
loss: 0.9984167218208313,grad_norm: 0.9999990052196153, iteration: 169539
loss: 0.987429678440094,grad_norm: 0.9569318179237166, iteration: 169540
loss: 1.0439149141311646,grad_norm: 0.999999175073872, iteration: 169541
loss: 1.0008436441421509,grad_norm: 0.8452361953203417, iteration: 169542
loss: 0.9970853328704834,grad_norm: 0.9999990607099425, iteration: 169543
loss: 0.9819541573524475,grad_norm: 0.9999991099385427, iteration: 169544
loss: 0.9751511216163635,grad_norm: 0.9214625753969139, iteration: 169545
loss: 0.9581447839736938,grad_norm: 0.999999091696886, iteration: 169546
loss: 0.9737705588340759,grad_norm: 0.9999991084744907, iteration: 169547
loss: 0.9919247031211853,grad_norm: 0.8093736055944879, iteration: 169548
loss: 0.9857082366943359,grad_norm: 0.9887817841821168, iteration: 169549
loss: 0.9828711152076721,grad_norm: 0.9828132096327347, iteration: 169550
loss: 0.9845232367515564,grad_norm: 0.9999989811193378, iteration: 169551
loss: 0.973629355430603,grad_norm: 0.9999993105196919, iteration: 169552
loss: 0.9954068064689636,grad_norm: 0.8844402228640219, iteration: 169553
loss: 0.9841280579566956,grad_norm: 0.9999991454087154, iteration: 169554
loss: 0.9935387969017029,grad_norm: 0.999999141599532, iteration: 169555
loss: 0.9955359697341919,grad_norm: 0.9132010218356366, iteration: 169556
loss: 1.0718308687210083,grad_norm: 0.9999998380243263, iteration: 169557
loss: 0.9745845794677734,grad_norm: 0.9999990696016492, iteration: 169558
loss: 0.9971100687980652,grad_norm: 0.9021697697665572, iteration: 169559
loss: 1.0278522968292236,grad_norm: 0.9715649905090183, iteration: 169560
loss: 1.129574179649353,grad_norm: 0.9999993337229715, iteration: 169561
loss: 1.0054512023925781,grad_norm: 0.9012508832006456, iteration: 169562
loss: 0.9329330325126648,grad_norm: 0.999998930928248, iteration: 169563
loss: 1.0242406129837036,grad_norm: 0.9999997212929103, iteration: 169564
loss: 1.0432718992233276,grad_norm: 0.9999992005820364, iteration: 169565
loss: 0.9583662748336792,grad_norm: 0.9345401373488141, iteration: 169566
loss: 0.9966940879821777,grad_norm: 0.9592932385696945, iteration: 169567
loss: 1.0039997100830078,grad_norm: 0.8869171505857171, iteration: 169568
loss: 1.008769154548645,grad_norm: 0.9701320788957961, iteration: 169569
loss: 0.9835646748542786,grad_norm: 0.8855461026691942, iteration: 169570
loss: 1.079352855682373,grad_norm: 0.999999290493247, iteration: 169571
loss: 0.9691671133041382,grad_norm: 0.8744644497196538, iteration: 169572
loss: 1.047796368598938,grad_norm: 0.999999351546423, iteration: 169573
loss: 1.019228219985962,grad_norm: 0.9999991488244026, iteration: 169574
loss: 1.053259015083313,grad_norm: 0.9552432468918682, iteration: 169575
loss: 1.0644503831863403,grad_norm: 0.999999441901268, iteration: 169576
loss: 1.0635223388671875,grad_norm: 0.9999990821944095, iteration: 169577
loss: 1.0446733236312866,grad_norm: 0.9999997083093577, iteration: 169578
loss: 1.0122969150543213,grad_norm: 0.9877491789969683, iteration: 169579
loss: 0.9462590217590332,grad_norm: 0.9306132083505076, iteration: 169580
loss: 1.035593867301941,grad_norm: 0.9999991303105497, iteration: 169581
loss: 0.9886937737464905,grad_norm: 0.9999988608248228, iteration: 169582
loss: 0.9991098046302795,grad_norm: 0.9999990939778031, iteration: 169583
loss: 0.9962270259857178,grad_norm: 0.9999991998601717, iteration: 169584
loss: 0.9886523485183716,grad_norm: 0.8484548337500786, iteration: 169585
loss: 1.0307667255401611,grad_norm: 0.999999211090591, iteration: 169586
loss: 0.9909447431564331,grad_norm: 0.9999991985228943, iteration: 169587
loss: 0.9868151545524597,grad_norm: 0.9198549002979175, iteration: 169588
loss: 1.0129600763320923,grad_norm: 0.9999993482860194, iteration: 169589
loss: 0.9994311928749084,grad_norm: 0.9999991806507122, iteration: 169590
loss: 1.0156866312026978,grad_norm: 0.9999993355614052, iteration: 169591
loss: 1.0590153932571411,grad_norm: 0.9999994599609975, iteration: 169592
loss: 0.9906803369522095,grad_norm: 0.9999989948482861, iteration: 169593
loss: 0.9986673593521118,grad_norm: 0.9999990941742865, iteration: 169594
loss: 1.048144817352295,grad_norm: 0.9999997505211883, iteration: 169595
loss: 1.0299843549728394,grad_norm: 0.9999994338474976, iteration: 169596
loss: 0.9781538844108582,grad_norm: 0.9983779002242775, iteration: 169597
loss: 1.009041428565979,grad_norm: 0.9544088699030383, iteration: 169598
loss: 0.9956793189048767,grad_norm: 0.9999991249079913, iteration: 169599
loss: 1.0181913375854492,grad_norm: 0.999999016491346, iteration: 169600
loss: 0.9874641299247742,grad_norm: 0.8615680314311784, iteration: 169601
loss: 0.9944255352020264,grad_norm: 0.9999992761440104, iteration: 169602
loss: 1.017231822013855,grad_norm: 0.9999995742208104, iteration: 169603
loss: 1.064268708229065,grad_norm: 0.9999992789120868, iteration: 169604
loss: 0.9824773669242859,grad_norm: 0.9607689528101728, iteration: 169605
loss: 0.9522407054901123,grad_norm: 0.9999991501121731, iteration: 169606
loss: 1.2295477390289307,grad_norm: 0.9999999427622915, iteration: 169607
loss: 1.0119597911834717,grad_norm: 0.9999992089806978, iteration: 169608
loss: 0.9860380291938782,grad_norm: 0.9999999090050086, iteration: 169609
loss: 0.9838405847549438,grad_norm: 0.9999992256917933, iteration: 169610
loss: 1.0204079151153564,grad_norm: 0.9999990864856378, iteration: 169611
loss: 1.0621908903121948,grad_norm: 0.9999992301608162, iteration: 169612
loss: 0.9963359236717224,grad_norm: 0.9999999051879058, iteration: 169613
loss: 1.1338337659835815,grad_norm: 0.9999991799910855, iteration: 169614
loss: 1.0148756504058838,grad_norm: 0.9999992069661794, iteration: 169615
loss: 0.9868141412734985,grad_norm: 0.8835611058859569, iteration: 169616
loss: 0.9932391047477722,grad_norm: 0.915909115416404, iteration: 169617
loss: 1.02089524269104,grad_norm: 0.978405297452795, iteration: 169618
loss: 1.036512851715088,grad_norm: 0.9999997116800858, iteration: 169619
loss: 0.996422290802002,grad_norm: 0.9999991718661756, iteration: 169620
loss: 0.9902854561805725,grad_norm: 0.9155986123097017, iteration: 169621
loss: 1.0428848266601562,grad_norm: 0.9999991123392014, iteration: 169622
loss: 0.984062910079956,grad_norm: 0.8875145658374969, iteration: 169623
loss: 0.965555727481842,grad_norm: 0.9558828502361415, iteration: 169624
loss: 0.9662173986434937,grad_norm: 0.9999990016638307, iteration: 169625
loss: 1.0049335956573486,grad_norm: 0.9999990731833805, iteration: 169626
loss: 0.9984015822410583,grad_norm: 0.999999050733906, iteration: 169627
loss: 0.9886695742607117,grad_norm: 0.8204337901617574, iteration: 169628
loss: 0.9819797277450562,grad_norm: 0.9893145098058681, iteration: 169629
loss: 0.980415940284729,grad_norm: 0.9999989355474501, iteration: 169630
loss: 1.0314644575119019,grad_norm: 0.9999992391035013, iteration: 169631
loss: 0.9519929885864258,grad_norm: 0.9652368620673576, iteration: 169632
loss: 1.0164320468902588,grad_norm: 0.9999989173665911, iteration: 169633
loss: 1.0043036937713623,grad_norm: 0.8044397940514018, iteration: 169634
loss: 0.9987300038337708,grad_norm: 0.9111002377113051, iteration: 169635
loss: 0.9742270112037659,grad_norm: 0.999999144414646, iteration: 169636
loss: 0.9835732579231262,grad_norm: 0.9999991166036277, iteration: 169637
loss: 0.9918801188468933,grad_norm: 0.999999160223869, iteration: 169638
loss: 1.0094411373138428,grad_norm: 0.9999990450791393, iteration: 169639
loss: 1.0265635251998901,grad_norm: 0.923716365156828, iteration: 169640
loss: 0.9741312265396118,grad_norm: 0.9999989759065414, iteration: 169641
loss: 0.9947652816772461,grad_norm: 0.999999089086949, iteration: 169642
loss: 1.0024470090866089,grad_norm: 0.9341066800258149, iteration: 169643
loss: 0.9440860152244568,grad_norm: 0.9920325605507491, iteration: 169644
loss: 1.023004174232483,grad_norm: 0.9999990595271324, iteration: 169645
loss: 0.9868882298469543,grad_norm: 0.9358726873722812, iteration: 169646
loss: 1.0286930799484253,grad_norm: 0.9999993666989246, iteration: 169647
loss: 0.9961491823196411,grad_norm: 0.9253654373464938, iteration: 169648
loss: 1.0379983186721802,grad_norm: 0.9955914930022806, iteration: 169649
loss: 0.9722110033035278,grad_norm: 0.964192165484072, iteration: 169650
loss: 0.9934115409851074,grad_norm: 0.9473079325366895, iteration: 169651
loss: 1.2555811405181885,grad_norm: 0.9999997186380765, iteration: 169652
loss: 1.017344355583191,grad_norm: 0.9291335033812664, iteration: 169653
loss: 1.038596749305725,grad_norm: 0.8949602846094487, iteration: 169654
loss: 1.006944179534912,grad_norm: 0.9999991070142087, iteration: 169655
loss: 0.9849198460578918,grad_norm: 0.9917934211552453, iteration: 169656
loss: 0.973034679889679,grad_norm: 0.9999993941902394, iteration: 169657
loss: 1.0089926719665527,grad_norm: 0.9999990787323352, iteration: 169658
loss: 1.0022112131118774,grad_norm: 0.894505207661234, iteration: 169659
loss: 1.002280831336975,grad_norm: 0.9999994244168962, iteration: 169660
loss: 1.007097601890564,grad_norm: 0.9999992574812748, iteration: 169661
loss: 1.0040796995162964,grad_norm: 0.9231404223389279, iteration: 169662
loss: 1.0292413234710693,grad_norm: 0.999999130275293, iteration: 169663
loss: 1.0010706186294556,grad_norm: 0.796687533959294, iteration: 169664
loss: 0.9895845055580139,grad_norm: 0.9638166205868224, iteration: 169665
loss: 0.9885546565055847,grad_norm: 0.9701617129625233, iteration: 169666
loss: 1.0114667415618896,grad_norm: 0.9702952187142397, iteration: 169667
loss: 1.08587646484375,grad_norm: 0.9999994986934597, iteration: 169668
loss: 0.9928257465362549,grad_norm: 0.9999991580973259, iteration: 169669
loss: 0.9794154167175293,grad_norm: 0.9999989568984601, iteration: 169670
loss: 1.0044173002243042,grad_norm: 0.9278864334751917, iteration: 169671
loss: 1.0067139863967896,grad_norm: 0.8159726632922492, iteration: 169672
loss: 0.9696491956710815,grad_norm: 0.8629615420202033, iteration: 169673
loss: 0.9970245957374573,grad_norm: 0.8815090366577288, iteration: 169674
loss: 0.9817875623703003,grad_norm: 0.8456917082715529, iteration: 169675
loss: 0.9838683605194092,grad_norm: 0.9999991820541436, iteration: 169676
loss: 0.9486275315284729,grad_norm: 0.7628014375724507, iteration: 169677
loss: 1.0028307437896729,grad_norm: 0.8441085206200796, iteration: 169678
loss: 1.0303623676300049,grad_norm: 0.9746013139854467, iteration: 169679
loss: 1.0201863050460815,grad_norm: 0.8941848352678763, iteration: 169680
loss: 1.0013909339904785,grad_norm: 0.9806459307627179, iteration: 169681
loss: 1.1368576288223267,grad_norm: 0.9999999282341167, iteration: 169682
loss: 1.0145785808563232,grad_norm: 0.9546540427786923, iteration: 169683
loss: 0.9774019718170166,grad_norm: 0.999999157971439, iteration: 169684
loss: 0.969414472579956,grad_norm: 0.9999989975068962, iteration: 169685
loss: 1.0115476846694946,grad_norm: 0.9999993792418076, iteration: 169686
loss: 1.0256271362304688,grad_norm: 0.9632798085638448, iteration: 169687
loss: 1.028860092163086,grad_norm: 0.9195401295951109, iteration: 169688
loss: 0.9697602391242981,grad_norm: 0.7965565816917538, iteration: 169689
loss: 0.9871548414230347,grad_norm: 0.9999991873871513, iteration: 169690
loss: 0.9905344247817993,grad_norm: 0.9387804270495109, iteration: 169691
loss: 1.006940245628357,grad_norm: 0.9432126878143281, iteration: 169692
loss: 0.9937790632247925,grad_norm: 0.9999990433960353, iteration: 169693
loss: 0.982743501663208,grad_norm: 0.8398100371362828, iteration: 169694
loss: 0.9888098239898682,grad_norm: 0.9999991168549883, iteration: 169695
loss: 1.0135449171066284,grad_norm: 0.979236270874608, iteration: 169696
loss: 0.9557709693908691,grad_norm: 0.9999990433910849, iteration: 169697
loss: 1.0170457363128662,grad_norm: 0.9999990135519593, iteration: 169698
loss: 1.0024157762527466,grad_norm: 0.9974535353812385, iteration: 169699
loss: 0.9932118654251099,grad_norm: 0.9999991680372138, iteration: 169700
loss: 1.010801076889038,grad_norm: 0.9138642097226463, iteration: 169701
loss: 0.9849485158920288,grad_norm: 0.9847573416244102, iteration: 169702
loss: 1.0424836874008179,grad_norm: 0.999999337698534, iteration: 169703
loss: 0.9924830794334412,grad_norm: 0.9999990469081785, iteration: 169704
loss: 0.9936105608940125,grad_norm: 0.999999282711383, iteration: 169705
loss: 1.0229917764663696,grad_norm: 0.9999991422685484, iteration: 169706
loss: 0.9947081208229065,grad_norm: 0.9999992418785146, iteration: 169707
loss: 1.001824975013733,grad_norm: 0.8942043763539874, iteration: 169708
loss: 1.0155972242355347,grad_norm: 0.9244493325364738, iteration: 169709
loss: 1.0284348726272583,grad_norm: 0.9800504992046438, iteration: 169710
loss: 0.9721543192863464,grad_norm: 0.9999991304145897, iteration: 169711
loss: 1.0101760625839233,grad_norm: 0.9135389511807478, iteration: 169712
loss: 0.9914255738258362,grad_norm: 0.791117480393318, iteration: 169713
loss: 0.9881072640419006,grad_norm: 0.9999992300799557, iteration: 169714
loss: 0.9625591039657593,grad_norm: 0.9999992328252298, iteration: 169715
loss: 1.0322304964065552,grad_norm: 0.9384092774937861, iteration: 169716
loss: 0.9737645983695984,grad_norm: 0.99999895834212, iteration: 169717
loss: 1.0013495683670044,grad_norm: 0.9999992139075166, iteration: 169718
loss: 0.9841468334197998,grad_norm: 0.9999992580467623, iteration: 169719
loss: 0.9957987666130066,grad_norm: 0.9772515189556996, iteration: 169720
loss: 1.0275546312332153,grad_norm: 0.9117879003766625, iteration: 169721
loss: 1.014931321144104,grad_norm: 0.8216516861249279, iteration: 169722
loss: 1.0387046337127686,grad_norm: 0.9999992611219776, iteration: 169723
loss: 0.9909571409225464,grad_norm: 0.8910574553349276, iteration: 169724
loss: 1.0101773738861084,grad_norm: 0.9999992486308555, iteration: 169725
loss: 1.0042587518692017,grad_norm: 0.9851768017265995, iteration: 169726
loss: 1.029713749885559,grad_norm: 0.9999992050116762, iteration: 169727
loss: 0.9928784966468811,grad_norm: 0.9999990961440905, iteration: 169728
loss: 1.0588923692703247,grad_norm: 0.8622075115135487, iteration: 169729
loss: 0.9351439476013184,grad_norm: 0.8658587364253775, iteration: 169730
loss: 0.9994208812713623,grad_norm: 0.8682535874442239, iteration: 169731
loss: 1.0128757953643799,grad_norm: 0.9450595623772111, iteration: 169732
loss: 0.9780251383781433,grad_norm: 0.9999991168443053, iteration: 169733
loss: 0.9936776161193848,grad_norm: 0.9999995557420929, iteration: 169734
loss: 1.008169174194336,grad_norm: 0.9999991315057415, iteration: 169735
loss: 0.9705699682235718,grad_norm: 0.9999992022236598, iteration: 169736
loss: 1.0348830223083496,grad_norm: 0.9999989185472373, iteration: 169737
loss: 0.9981778264045715,grad_norm: 0.8313402161380299, iteration: 169738
loss: 1.043450117111206,grad_norm: 0.9999991460856397, iteration: 169739
loss: 1.0138715505599976,grad_norm: 0.9664392569314388, iteration: 169740
loss: 0.9970513582229614,grad_norm: 0.9503003263903744, iteration: 169741
loss: 0.9823587536811829,grad_norm: 0.8906133391778722, iteration: 169742
loss: 0.996321976184845,grad_norm: 0.9021858826199327, iteration: 169743
loss: 1.0124777555465698,grad_norm: 0.9696111492382314, iteration: 169744
loss: 0.9868429899215698,grad_norm: 0.9999990544194054, iteration: 169745
loss: 0.9876192212104797,grad_norm: 0.959453205181524, iteration: 169746
loss: 1.0351016521453857,grad_norm: 0.9185902042678279, iteration: 169747
loss: 1.0367801189422607,grad_norm: 0.9999996354305045, iteration: 169748
loss: 1.0307722091674805,grad_norm: 0.999999484522124, iteration: 169749
loss: 0.9887351989746094,grad_norm: 0.9999991283436848, iteration: 169750
loss: 1.015774130821228,grad_norm: 0.9999991175677753, iteration: 169751
loss: 0.9994583129882812,grad_norm: 0.9999992457123604, iteration: 169752
loss: 1.0372958183288574,grad_norm: 0.9999999289410958, iteration: 169753
loss: 1.0704542398452759,grad_norm: 0.9999997064396456, iteration: 169754
loss: 0.9530418515205383,grad_norm: 0.8511731758888443, iteration: 169755
loss: 1.0329084396362305,grad_norm: 0.9999996404160464, iteration: 169756
loss: 0.9716717600822449,grad_norm: 0.8944223751562763, iteration: 169757
loss: 1.010540246963501,grad_norm: 0.9876176174871459, iteration: 169758
loss: 0.993375301361084,grad_norm: 0.9999990974031642, iteration: 169759
loss: 0.9487190842628479,grad_norm: 0.9999991646738924, iteration: 169760
loss: 1.0283783674240112,grad_norm: 0.9999992659160919, iteration: 169761
loss: 1.015717625617981,grad_norm: 0.8886295393939997, iteration: 169762
loss: 0.9900897741317749,grad_norm: 0.9483400121051044, iteration: 169763
loss: 1.010080099105835,grad_norm: 0.9512614452311094, iteration: 169764
loss: 1.001116394996643,grad_norm: 0.9922068190811969, iteration: 169765
loss: 0.9867392182350159,grad_norm: 0.9999992121808925, iteration: 169766
loss: 1.017702579498291,grad_norm: 0.99999919081791, iteration: 169767
loss: 0.9810073971748352,grad_norm: 0.9145147506740546, iteration: 169768
loss: 1.0094008445739746,grad_norm: 0.821038712090067, iteration: 169769
loss: 1.0375319719314575,grad_norm: 0.999999933927405, iteration: 169770
loss: 0.982793390750885,grad_norm: 0.9999992486991371, iteration: 169771
loss: 0.9667603969573975,grad_norm: 0.8015591579102086, iteration: 169772
loss: 0.9747787117958069,grad_norm: 0.9727309583632018, iteration: 169773
loss: 1.0199341773986816,grad_norm: 0.9999989474375379, iteration: 169774
loss: 0.977983295917511,grad_norm: 0.8771493696472114, iteration: 169775
loss: 1.0031731128692627,grad_norm: 0.999999113693798, iteration: 169776
loss: 0.9887163043022156,grad_norm: 0.9999997512037675, iteration: 169777
loss: 1.017234206199646,grad_norm: 0.9999991831417729, iteration: 169778
loss: 1.035928726196289,grad_norm: 0.9687699080019995, iteration: 169779
loss: 0.9887757897377014,grad_norm: 0.8967846757532532, iteration: 169780
loss: 0.9656835794448853,grad_norm: 0.9090089599734195, iteration: 169781
loss: 1.0580370426177979,grad_norm: 0.9999993681343148, iteration: 169782
loss: 1.0291521549224854,grad_norm: 0.999999132629882, iteration: 169783
loss: 1.0073659420013428,grad_norm: 0.9193789453457278, iteration: 169784
loss: 0.9942465424537659,grad_norm: 0.9999990127841929, iteration: 169785
loss: 0.9954575896263123,grad_norm: 0.9398702213437655, iteration: 169786
loss: 1.048459529876709,grad_norm: 0.9999998265375076, iteration: 169787
loss: 0.9829955697059631,grad_norm: 0.999999126563189, iteration: 169788
loss: 0.9870706796646118,grad_norm: 0.9999990248902864, iteration: 169789
loss: 0.9963228106498718,grad_norm: 0.9999991537852112, iteration: 169790
loss: 1.030958652496338,grad_norm: 0.9999991097954007, iteration: 169791
loss: 0.9887592792510986,grad_norm: 0.99999921482847, iteration: 169792
loss: 0.9943971633911133,grad_norm: 0.9999991341364934, iteration: 169793
loss: 0.9773616790771484,grad_norm: 0.905770716659701, iteration: 169794
loss: 1.0138652324676514,grad_norm: 0.9999992688323942, iteration: 169795
loss: 0.9887492060661316,grad_norm: 0.9535798190347998, iteration: 169796
loss: 1.051032304763794,grad_norm: 0.9999996163876373, iteration: 169797
loss: 1.0254639387130737,grad_norm: 0.9999991017891046, iteration: 169798
loss: 1.0131913423538208,grad_norm: 0.99999900843098, iteration: 169799
loss: 1.1638330221176147,grad_norm: 0.999999855394061, iteration: 169800
loss: 0.9988915920257568,grad_norm: 0.9999991533526831, iteration: 169801
loss: 1.03685462474823,grad_norm: 0.9999990515226362, iteration: 169802
loss: 0.9679310321807861,grad_norm: 0.9554318048467558, iteration: 169803
loss: 0.9820084571838379,grad_norm: 0.9717305268921917, iteration: 169804
loss: 1.0316178798675537,grad_norm: 0.9999989451965179, iteration: 169805
loss: 0.9891753792762756,grad_norm: 0.9682911168836345, iteration: 169806
loss: 0.9861303567886353,grad_norm: 0.9999991176567582, iteration: 169807
loss: 1.029852032661438,grad_norm: 0.9023322354092603, iteration: 169808
loss: 0.9897115230560303,grad_norm: 0.897812998529022, iteration: 169809
loss: 1.0038460493087769,grad_norm: 0.9101161384189407, iteration: 169810
loss: 1.021275520324707,grad_norm: 0.9031599225679302, iteration: 169811
loss: 0.9665936231613159,grad_norm: 0.9999990299232955, iteration: 169812
loss: 1.0344178676605225,grad_norm: 0.9999991308142764, iteration: 169813
loss: 1.023473858833313,grad_norm: 0.9999990586331327, iteration: 169814
loss: 0.9622492790222168,grad_norm: 0.9510101378833574, iteration: 169815
loss: 1.0128034353256226,grad_norm: 0.8826614730997253, iteration: 169816
loss: 1.0147584676742554,grad_norm: 0.8333063624096965, iteration: 169817
loss: 0.9966671466827393,grad_norm: 0.8404963124611102, iteration: 169818
loss: 1.021850347518921,grad_norm: 0.9999992321126199, iteration: 169819
loss: 1.0722705125808716,grad_norm: 0.9999992545811449, iteration: 169820
loss: 0.9743615984916687,grad_norm: 0.999999141636037, iteration: 169821
loss: 0.97919762134552,grad_norm: 0.8259139197962614, iteration: 169822
loss: 0.9985018372535706,grad_norm: 0.9999994436126398, iteration: 169823
loss: 0.9908255934715271,grad_norm: 0.911826057692487, iteration: 169824
loss: 0.9982627034187317,grad_norm: 0.9999992127217868, iteration: 169825
loss: 1.018422245979309,grad_norm: 0.9180114544638024, iteration: 169826
loss: 0.9785483479499817,grad_norm: 0.9999990980536836, iteration: 169827
loss: 0.9917040467262268,grad_norm: 0.9999991851692291, iteration: 169828
loss: 1.0207237005233765,grad_norm: 0.9999991218427259, iteration: 169829
loss: 1.0221130847930908,grad_norm: 0.9999991592782463, iteration: 169830
loss: 1.0436784029006958,grad_norm: 0.9999990992886489, iteration: 169831
loss: 0.9930225610733032,grad_norm: 0.9999990540465298, iteration: 169832
loss: 1.0155401229858398,grad_norm: 0.9999990677679079, iteration: 169833
loss: 0.9993670582771301,grad_norm: 0.8675225526445723, iteration: 169834
loss: 0.9759698510169983,grad_norm: 0.9794358591123818, iteration: 169835
loss: 0.9873010516166687,grad_norm: 0.9355797306968241, iteration: 169836
loss: 1.007332682609558,grad_norm: 0.9426807805218023, iteration: 169837
loss: 0.987038791179657,grad_norm: 0.9999991009797478, iteration: 169838
loss: 1.0026990175247192,grad_norm: 0.999999164162868, iteration: 169839
loss: 0.9804093837738037,grad_norm: 0.953779767290775, iteration: 169840
loss: 0.9908643364906311,grad_norm: 0.9999990267678006, iteration: 169841
loss: 0.9940744638442993,grad_norm: 0.9999989286470643, iteration: 169842
loss: 1.1787184476852417,grad_norm: 0.9999992222930254, iteration: 169843
loss: 0.9962936043739319,grad_norm: 0.9999991879058037, iteration: 169844
loss: 0.9771307706832886,grad_norm: 0.9999992397945268, iteration: 169845
loss: 1.0257070064544678,grad_norm: 0.9999990695521058, iteration: 169846
loss: 1.0283435583114624,grad_norm: 0.9404553035319452, iteration: 169847
loss: 0.9860076904296875,grad_norm: 0.9572079549855745, iteration: 169848
loss: 1.0190924406051636,grad_norm: 0.9999995551522597, iteration: 169849
loss: 0.9937224388122559,grad_norm: 0.9999990974082936, iteration: 169850
loss: 0.9601054787635803,grad_norm: 0.8954015359176936, iteration: 169851
loss: 1.0229402780532837,grad_norm: 0.9999991600679703, iteration: 169852
loss: 1.0059040784835815,grad_norm: 0.9824581305632576, iteration: 169853
loss: 1.0054717063903809,grad_norm: 0.9248839006275698, iteration: 169854
loss: 0.9879025816917419,grad_norm: 0.999999163912593, iteration: 169855
loss: 1.0156755447387695,grad_norm: 0.9999998200557868, iteration: 169856
loss: 1.0059887170791626,grad_norm: 0.9999998517102034, iteration: 169857
loss: 0.9751929640769958,grad_norm: 0.9532921212380469, iteration: 169858
loss: 1.0346401929855347,grad_norm: 0.9999993680108971, iteration: 169859
loss: 1.000277042388916,grad_norm: 0.9871370118001238, iteration: 169860
loss: 1.0065566301345825,grad_norm: 0.867363191710941, iteration: 169861
loss: 0.9636127948760986,grad_norm: 0.9916715499217431, iteration: 169862
loss: 0.9840682744979858,grad_norm: 0.999998954525797, iteration: 169863
loss: 1.0177100896835327,grad_norm: 0.9745319222771668, iteration: 169864
loss: 0.99512779712677,grad_norm: 0.9562288892302545, iteration: 169865
loss: 1.0134512186050415,grad_norm: 0.9217212411145296, iteration: 169866
loss: 0.971839189529419,grad_norm: 0.9999992414931373, iteration: 169867
loss: 1.002497673034668,grad_norm: 0.9999992951921137, iteration: 169868
loss: 0.9835768938064575,grad_norm: 0.9999991328113438, iteration: 169869
loss: 1.0233534574508667,grad_norm: 0.9440681646945727, iteration: 169870
loss: 1.060930848121643,grad_norm: 0.9999995418260816, iteration: 169871
loss: 0.9855989813804626,grad_norm: 0.9999992401860898, iteration: 169872
loss: 1.0196470022201538,grad_norm: 0.9999996130135671, iteration: 169873
loss: 0.9437460899353027,grad_norm: 0.9999991660916691, iteration: 169874
loss: 0.9826869368553162,grad_norm: 0.9074389039482968, iteration: 169875
loss: 1.021791934967041,grad_norm: 0.7723896892166303, iteration: 169876
loss: 1.189536452293396,grad_norm: 0.9999992273689544, iteration: 169877
loss: 0.9805019497871399,grad_norm: 0.9999991803956656, iteration: 169878
loss: 0.9927186965942383,grad_norm: 0.9393177789770722, iteration: 169879
loss: 0.9950717687606812,grad_norm: 0.9031906836500049, iteration: 169880
loss: 1.007344365119934,grad_norm: 0.9999990878590075, iteration: 169881
loss: 0.990023672580719,grad_norm: 0.9999991807537051, iteration: 169882
loss: 0.9811747074127197,grad_norm: 0.9999989626697191, iteration: 169883
loss: 0.9987930655479431,grad_norm: 0.9434708457672499, iteration: 169884
loss: 0.9858818650245667,grad_norm: 0.9999990624868348, iteration: 169885
loss: 0.9665296673774719,grad_norm: 0.8921850091993854, iteration: 169886
loss: 0.9949533939361572,grad_norm: 0.9999990560504499, iteration: 169887
loss: 1.037602186203003,grad_norm: 0.999999048645414, iteration: 169888
loss: 0.9835705757141113,grad_norm: 0.9784716904826759, iteration: 169889
loss: 1.0460911989212036,grad_norm: 0.9999992602272795, iteration: 169890
loss: 1.0521454811096191,grad_norm: 0.9999995936188717, iteration: 169891
loss: 1.0654172897338867,grad_norm: 0.9999991939046595, iteration: 169892
loss: 0.9683097004890442,grad_norm: 0.9999991464212243, iteration: 169893
loss: 0.9993084073066711,grad_norm: 0.9999991884019673, iteration: 169894
loss: 0.9987542033195496,grad_norm: 0.9999991771497371, iteration: 169895
loss: 0.9899920225143433,grad_norm: 0.999999013939573, iteration: 169896
loss: 1.006148338317871,grad_norm: 0.9135394531127317, iteration: 169897
loss: 1.0170608758926392,grad_norm: 0.9845809051784682, iteration: 169898
loss: 0.981289803981781,grad_norm: 0.9999993181757162, iteration: 169899
loss: 0.9987017512321472,grad_norm: 0.9999997159166436, iteration: 169900
loss: 1.0004570484161377,grad_norm: 0.999999214131189, iteration: 169901
loss: 0.9704930782318115,grad_norm: 0.8411989319061051, iteration: 169902
loss: 0.987592339515686,grad_norm: 0.941034096663865, iteration: 169903
loss: 1.0032827854156494,grad_norm: 0.9636633999076377, iteration: 169904
loss: 0.9688659310340881,grad_norm: 0.7387100829438477, iteration: 169905
loss: 1.036218523979187,grad_norm: 0.9999991077055602, iteration: 169906
loss: 0.9718668460845947,grad_norm: 0.9999993519710728, iteration: 169907
loss: 0.9617239236831665,grad_norm: 0.9999992102435351, iteration: 169908
loss: 1.125540852546692,grad_norm: 0.9999990980720452, iteration: 169909
loss: 0.9983881711959839,grad_norm: 0.9747480317718151, iteration: 169910
loss: 1.006856918334961,grad_norm: 0.9838014330244562, iteration: 169911
loss: 0.992779016494751,grad_norm: 0.879704022687689, iteration: 169912
loss: 1.012194037437439,grad_norm: 0.9999992640084497, iteration: 169913
loss: 1.0303032398223877,grad_norm: 0.9854039050550336, iteration: 169914
loss: 0.9662571549415588,grad_norm: 0.9999991024884484, iteration: 169915
loss: 1.0021058320999146,grad_norm: 0.9655879269297201, iteration: 169916
loss: 1.0165408849716187,grad_norm: 0.99999946164782, iteration: 169917
loss: 1.0200635194778442,grad_norm: 0.9692093202970051, iteration: 169918
loss: 0.9947351813316345,grad_norm: 0.9511538169353374, iteration: 169919
loss: 1.0170525312423706,grad_norm: 0.9067706181324225, iteration: 169920
loss: 1.0034022331237793,grad_norm: 0.8358792894842588, iteration: 169921
loss: 0.9924192428588867,grad_norm: 0.916477314498379, iteration: 169922
loss: 1.019668459892273,grad_norm: 0.9999991921221837, iteration: 169923
loss: 1.0009737014770508,grad_norm: 0.960511572433414, iteration: 169924
loss: 1.02188241481781,grad_norm: 0.950186355967958, iteration: 169925
loss: 1.0705722570419312,grad_norm: 0.9973820600845729, iteration: 169926
loss: 1.0061358213424683,grad_norm: 0.7735605903603773, iteration: 169927
loss: 1.0162827968597412,grad_norm: 0.9999992153962808, iteration: 169928
loss: 0.9781131744384766,grad_norm: 0.9999991013521546, iteration: 169929
loss: 1.0098224878311157,grad_norm: 0.858153423681216, iteration: 169930
loss: 1.0081045627593994,grad_norm: 0.8535247544586182, iteration: 169931
loss: 0.9766451120376587,grad_norm: 0.9767868467583527, iteration: 169932
loss: 1.0017122030258179,grad_norm: 0.8949521726424527, iteration: 169933
loss: 0.9987760782241821,grad_norm: 0.9999990724472136, iteration: 169934
loss: 1.002281904220581,grad_norm: 0.8285105254356173, iteration: 169935
loss: 1.1140140295028687,grad_norm: 0.9999993243387895, iteration: 169936
loss: 0.9975175857543945,grad_norm: 0.9999990774052959, iteration: 169937
loss: 1.0481375455856323,grad_norm: 0.9999991087570806, iteration: 169938
loss: 1.0090022087097168,grad_norm: 0.9999991763638014, iteration: 169939
loss: 0.9897680878639221,grad_norm: 0.9999994830077793, iteration: 169940
loss: 1.016857624053955,grad_norm: 0.99999914575616, iteration: 169941
loss: 1.0020049810409546,grad_norm: 0.8566836830277395, iteration: 169942
loss: 1.0176717042922974,grad_norm: 0.9999992876664976, iteration: 169943
loss: 0.9807090759277344,grad_norm: 0.9999991793042657, iteration: 169944
loss: 1.0115633010864258,grad_norm: 0.8374296397942502, iteration: 169945
loss: 0.9960119128227234,grad_norm: 0.9415419116125128, iteration: 169946
loss: 0.9924185872077942,grad_norm: 0.8975748870768017, iteration: 169947
loss: 0.9939373135566711,grad_norm: 0.9999990886735964, iteration: 169948
loss: 1.0054903030395508,grad_norm: 0.9125181665671366, iteration: 169949
loss: 1.0226212739944458,grad_norm: 0.999999264885377, iteration: 169950
loss: 1.0003741979599,grad_norm: 0.9999990571038104, iteration: 169951
loss: 1.0015177726745605,grad_norm: 0.9999991270273849, iteration: 169952
loss: 0.9991868734359741,grad_norm: 0.9999991406731092, iteration: 169953
loss: 0.9674839973449707,grad_norm: 0.9999991097312413, iteration: 169954
loss: 1.0026658773422241,grad_norm: 0.9303035110349791, iteration: 169955
loss: 0.9959730505943298,grad_norm: 0.9979633920256006, iteration: 169956
loss: 0.9857137799263,grad_norm: 0.9901370932439929, iteration: 169957
loss: 0.9908627271652222,grad_norm: 0.9999990838313669, iteration: 169958
loss: 1.0299228429794312,grad_norm: 0.8481146022836039, iteration: 169959
loss: 0.9964803457260132,grad_norm: 0.9999991927619513, iteration: 169960
loss: 0.978125810623169,grad_norm: 0.9856315940304446, iteration: 169961
loss: 1.0207821130752563,grad_norm: 0.999999045504451, iteration: 169962
loss: 1.0008870363235474,grad_norm: 0.9999990659949956, iteration: 169963
loss: 0.9882539510726929,grad_norm: 0.8214182460490976, iteration: 169964
loss: 0.9813051223754883,grad_norm: 0.8126804250515781, iteration: 169965
loss: 0.961456298828125,grad_norm: 0.9781936273217529, iteration: 169966
loss: 1.022899866104126,grad_norm: 0.9793242499819578, iteration: 169967
loss: 0.9989171028137207,grad_norm: 0.9999992688052253, iteration: 169968
loss: 1.038934350013733,grad_norm: 0.9999990699032626, iteration: 169969
loss: 0.9802615642547607,grad_norm: 0.9999991410085157, iteration: 169970
loss: 0.9833733439445496,grad_norm: 0.9999991732426613, iteration: 169971
loss: 0.985695481300354,grad_norm: 0.9999990849488624, iteration: 169972
loss: 1.0280916690826416,grad_norm: 0.9705966045978264, iteration: 169973
loss: 1.0281826257705688,grad_norm: 0.9999992336032355, iteration: 169974
loss: 0.9932782649993896,grad_norm: 0.999999043051981, iteration: 169975
loss: 1.0222928524017334,grad_norm: 0.9304319230472043, iteration: 169976
loss: 0.9774484634399414,grad_norm: 0.9247115475160866, iteration: 169977
loss: 1.0131725072860718,grad_norm: 0.8011556529810938, iteration: 169978
loss: 1.01949143409729,grad_norm: 0.9999990938900046, iteration: 169979
loss: 0.9812440276145935,grad_norm: 0.7878849197555895, iteration: 169980
loss: 1.0200121402740479,grad_norm: 0.9847053784857968, iteration: 169981
loss: 0.9899590611457825,grad_norm: 0.9999989511900275, iteration: 169982
loss: 1.0160707235336304,grad_norm: 0.8995727833338242, iteration: 169983
loss: 1.0158742666244507,grad_norm: 0.9999991068060402, iteration: 169984
loss: 0.9698673486709595,grad_norm: 0.9999990324751544, iteration: 169985
loss: 1.0105417966842651,grad_norm: 0.9999991137932625, iteration: 169986
loss: 1.0050816535949707,grad_norm: 0.8842329317273173, iteration: 169987
loss: 1.0119436979293823,grad_norm: 0.948773482932854, iteration: 169988
loss: 0.9912266135215759,grad_norm: 0.8830124238797563, iteration: 169989
loss: 1.0039254426956177,grad_norm: 0.9999992790455836, iteration: 169990
loss: 0.962719738483429,grad_norm: 0.999999205685121, iteration: 169991
loss: 1.0158109664916992,grad_norm: 0.8489115984534491, iteration: 169992
loss: 0.9998742938041687,grad_norm: 0.9597725215469247, iteration: 169993
loss: 1.006194829940796,grad_norm: 0.9999990646547353, iteration: 169994
loss: 1.0162063837051392,grad_norm: 0.9999991576265848, iteration: 169995
loss: 0.990812361240387,grad_norm: 0.9321261327633089, iteration: 169996
loss: 1.0046926736831665,grad_norm: 0.9999991467101816, iteration: 169997
loss: 1.0227056741714478,grad_norm: 0.9999991960530978, iteration: 169998
loss: 1.013131856918335,grad_norm: 0.9604256211021059, iteration: 169999
loss: 0.9740303754806519,grad_norm: 0.9156834811913906, iteration: 170000
Evaluating at step 170000
{'val': 0.994433032348752, 'test': 2.0015310045248405}
loss: 0.9723644852638245,grad_norm: 0.9675629577152892, iteration: 170001
loss: 0.9550231099128723,grad_norm: 0.9999991451318586, iteration: 170002
loss: 0.9722437858581543,grad_norm: 0.8617393472970747, iteration: 170003
loss: 0.9816753268241882,grad_norm: 0.9999991321335453, iteration: 170004
loss: 0.9825693964958191,grad_norm: 0.9006097209316569, iteration: 170005
loss: 1.0028060674667358,grad_norm: 0.9999992301265144, iteration: 170006
loss: 0.9945673942565918,grad_norm: 0.999999159896732, iteration: 170007
loss: 0.9877638220787048,grad_norm: 0.9479715668698244, iteration: 170008
loss: 1.0087645053863525,grad_norm: 0.8988604909379807, iteration: 170009
loss: 0.9933732151985168,grad_norm: 0.9999990251901706, iteration: 170010
loss: 1.0093225240707397,grad_norm: 0.9999989603545729, iteration: 170011
loss: 0.9802637696266174,grad_norm: 0.9999990742622668, iteration: 170012
loss: 1.0960325002670288,grad_norm: 0.9999991811253491, iteration: 170013
loss: 1.0528305768966675,grad_norm: 0.9999991704113466, iteration: 170014
loss: 0.982433021068573,grad_norm: 0.9848819127813956, iteration: 170015
loss: 0.9599353671073914,grad_norm: 0.9896823297434876, iteration: 170016
loss: 1.004643201828003,grad_norm: 0.9703801256875751, iteration: 170017
loss: 1.0150090456008911,grad_norm: 0.9502842848681667, iteration: 170018
loss: 0.9840154051780701,grad_norm: 0.9999990977552562, iteration: 170019
loss: 0.9890348315238953,grad_norm: 0.9835596946346888, iteration: 170020
loss: 0.9857102036476135,grad_norm: 0.8797869293014994, iteration: 170021
loss: 1.0020418167114258,grad_norm: 0.9999989691936282, iteration: 170022
loss: 1.0171301364898682,grad_norm: 0.8704926948123627, iteration: 170023
loss: 0.979556679725647,grad_norm: 0.9999990267191649, iteration: 170024
loss: 0.9791067838668823,grad_norm: 0.9372902935399811, iteration: 170025
loss: 0.9933450818061829,grad_norm: 0.9646433567505422, iteration: 170026
loss: 0.9891496896743774,grad_norm: 0.9770356570062415, iteration: 170027
loss: 1.029889464378357,grad_norm: 0.9251497744797018, iteration: 170028
loss: 0.9659619927406311,grad_norm: 0.9995450453173265, iteration: 170029
loss: 1.0145223140716553,grad_norm: 0.9683826505690274, iteration: 170030
loss: 0.9999380707740784,grad_norm: 0.905283786242915, iteration: 170031
loss: 1.0502660274505615,grad_norm: 0.9999992642365829, iteration: 170032
loss: 0.9997134804725647,grad_norm: 0.9074235540257808, iteration: 170033
loss: 1.0887444019317627,grad_norm: 0.9999990938721552, iteration: 170034
loss: 0.9963453412055969,grad_norm: 0.8819149308706475, iteration: 170035
loss: 0.9821023344993591,grad_norm: 0.875642149653932, iteration: 170036
loss: 1.0035580396652222,grad_norm: 0.9999990536705734, iteration: 170037
loss: 1.0006000995635986,grad_norm: 0.9999992802482702, iteration: 170038
loss: 0.9912645220756531,grad_norm: 0.9999991189117402, iteration: 170039
loss: 0.9933516383171082,grad_norm: 0.9999990113007317, iteration: 170040
loss: 1.010748267173767,grad_norm: 0.9723814894039897, iteration: 170041
loss: 1.0400229692459106,grad_norm: 0.9508730071220629, iteration: 170042
loss: 0.9704316854476929,grad_norm: 0.9999991563606005, iteration: 170043
loss: 1.0002540349960327,grad_norm: 0.9999992534086303, iteration: 170044
loss: 0.9983848929405212,grad_norm: 0.9359960965275275, iteration: 170045
loss: 1.0026577711105347,grad_norm: 0.9999990668266519, iteration: 170046
loss: 1.0008891820907593,grad_norm: 0.9999992249007859, iteration: 170047
loss: 1.0087803602218628,grad_norm: 0.844278315227666, iteration: 170048
loss: 1.004141092300415,grad_norm: 0.855323026862872, iteration: 170049
loss: 0.9917798042297363,grad_norm: 0.8341957053478228, iteration: 170050
loss: 1.0028594732284546,grad_norm: 0.999999714314175, iteration: 170051
loss: 0.9768953323364258,grad_norm: 0.9968061282633607, iteration: 170052
loss: 1.0121569633483887,grad_norm: 0.9999990211756398, iteration: 170053
loss: 1.0171587467193604,grad_norm: 0.9999991035670177, iteration: 170054
loss: 0.9767240881919861,grad_norm: 0.8769740361942078, iteration: 170055
loss: 0.9937962293624878,grad_norm: 0.9356681464404313, iteration: 170056
loss: 1.01705801486969,grad_norm: 0.9999993076476772, iteration: 170057
loss: 1.0151959657669067,grad_norm: 0.9999990560613154, iteration: 170058
loss: 1.0195084810256958,grad_norm: 0.8933677307010883, iteration: 170059
loss: 1.0150456428527832,grad_norm: 0.9268064724521539, iteration: 170060
loss: 0.97509765625,grad_norm: 0.8611055156426732, iteration: 170061
loss: 0.9810335040092468,grad_norm: 0.9468289536775183, iteration: 170062
loss: 0.9821593761444092,grad_norm: 0.999999276707536, iteration: 170063
loss: 0.9833946824073792,grad_norm: 0.9999990343348629, iteration: 170064
loss: 1.0115386247634888,grad_norm: 0.942725424425693, iteration: 170065
loss: 0.964679479598999,grad_norm: 0.9186524164175531, iteration: 170066
loss: 1.0376853942871094,grad_norm: 0.9999992371271301, iteration: 170067
loss: 0.9751307368278503,grad_norm: 0.9390223706836179, iteration: 170068
loss: 0.9954289197921753,grad_norm: 0.8992654678136002, iteration: 170069
loss: 1.0693888664245605,grad_norm: 0.9999993960960207, iteration: 170070
loss: 0.9507365822792053,grad_norm: 0.9734485807824479, iteration: 170071
loss: 0.992620050907135,grad_norm: 0.9999991436077442, iteration: 170072
loss: 1.0033206939697266,grad_norm: 0.938628178765869, iteration: 170073
loss: 1.0023744106292725,grad_norm: 0.9999989892350647, iteration: 170074
loss: 0.9769700765609741,grad_norm: 0.9546727973423963, iteration: 170075
loss: 0.9656670093536377,grad_norm: 0.8963761458008073, iteration: 170076
loss: 1.1023379564285278,grad_norm: 0.9999990595100011, iteration: 170077
loss: 1.0012179613113403,grad_norm: 0.8892059668963271, iteration: 170078
loss: 0.964103102684021,grad_norm: 0.9999992273509765, iteration: 170079
loss: 1.0301599502563477,grad_norm: 0.9531356654008311, iteration: 170080
loss: 1.0175590515136719,grad_norm: 0.9999991710945425, iteration: 170081
loss: 1.0105299949645996,grad_norm: 0.9999991273860062, iteration: 170082
loss: 0.9880096912384033,grad_norm: 0.9925413057914076, iteration: 170083
loss: 1.0850151777267456,grad_norm: 0.9251980785771575, iteration: 170084
loss: 1.026187539100647,grad_norm: 0.8366257156444117, iteration: 170085
loss: 0.9715637564659119,grad_norm: 0.9302839358461238, iteration: 170086
loss: 0.9969286322593689,grad_norm: 0.8860470898015007, iteration: 170087
loss: 0.9957817196846008,grad_norm: 0.9999997502548319, iteration: 170088
loss: 1.004438042640686,grad_norm: 0.9999990045089047, iteration: 170089
loss: 1.0345392227172852,grad_norm: 0.9999996881127708, iteration: 170090
loss: 0.9771251082420349,grad_norm: 0.918927245814615, iteration: 170091
loss: 0.9735469818115234,grad_norm: 0.9999991128725145, iteration: 170092
loss: 0.9992061257362366,grad_norm: 0.999999708939753, iteration: 170093
loss: 1.0611199140548706,grad_norm: 0.9999991981996547, iteration: 170094
loss: 0.9865788221359253,grad_norm: 0.9999990974557317, iteration: 170095
loss: 1.0076014995574951,grad_norm: 0.9999990448213171, iteration: 170096
loss: 0.9995379447937012,grad_norm: 0.9999992261144107, iteration: 170097
loss: 1.0313104391098022,grad_norm: 0.9454272181767445, iteration: 170098
loss: 0.9988048076629639,grad_norm: 0.9999992038865004, iteration: 170099
loss: 0.9548320174217224,grad_norm: 0.9858384913860399, iteration: 170100
loss: 1.0113719701766968,grad_norm: 0.8578481730963817, iteration: 170101
loss: 1.0112566947937012,grad_norm: 0.8444224336628936, iteration: 170102
loss: 0.9949108958244324,grad_norm: 0.9999990643556137, iteration: 170103
loss: 0.9974581599235535,grad_norm: 0.9999990299080237, iteration: 170104
loss: 1.0089973211288452,grad_norm: 0.9999997101780355, iteration: 170105
loss: 1.037411093711853,grad_norm: 0.9999992994547615, iteration: 170106
loss: 0.9832722544670105,grad_norm: 0.8707823843320542, iteration: 170107
loss: 0.9734874963760376,grad_norm: 0.9203862332182075, iteration: 170108
loss: 0.9824978709220886,grad_norm: 0.9340250458235203, iteration: 170109
loss: 1.0090349912643433,grad_norm: 0.9987461098306815, iteration: 170110
loss: 1.001423716545105,grad_norm: 0.9999990621111253, iteration: 170111
loss: 1.0059325695037842,grad_norm: 0.9451230200323406, iteration: 170112
loss: 0.9563702344894409,grad_norm: 0.916690946496197, iteration: 170113
loss: 1.0086114406585693,grad_norm: 0.9735621976369969, iteration: 170114
loss: 0.9930552840232849,grad_norm: 0.9162568111944988, iteration: 170115
loss: 1.0127983093261719,grad_norm: 0.9999992961906938, iteration: 170116
loss: 1.0127424001693726,grad_norm: 0.999999131381982, iteration: 170117
loss: 1.0346325635910034,grad_norm: 0.9999989402405642, iteration: 170118
loss: 0.9848200082778931,grad_norm: 0.9710204517340939, iteration: 170119
loss: 0.9928688406944275,grad_norm: 0.9297204158909529, iteration: 170120
loss: 1.0022201538085938,grad_norm: 0.9068544114610964, iteration: 170121
loss: 1.0169559717178345,grad_norm: 0.9999991321860638, iteration: 170122
loss: 1.0347741842269897,grad_norm: 0.8257440726339791, iteration: 170123
loss: 1.0183134078979492,grad_norm: 0.9999990550347614, iteration: 170124
loss: 0.9827393889427185,grad_norm: 0.9999990953941598, iteration: 170125
loss: 1.0367729663848877,grad_norm: 0.9717693234196669, iteration: 170126
loss: 1.0249358415603638,grad_norm: 0.9999990937661011, iteration: 170127
loss: 1.0080981254577637,grad_norm: 0.8919690305308912, iteration: 170128
loss: 0.9834645390510559,grad_norm: 0.8772156308829836, iteration: 170129
loss: 1.0079691410064697,grad_norm: 0.9999990803222034, iteration: 170130
loss: 1.0191160440444946,grad_norm: 0.8968457956918741, iteration: 170131
loss: 1.134124755859375,grad_norm: 0.9999997729442011, iteration: 170132
loss: 1.0029140710830688,grad_norm: 0.9999992760225185, iteration: 170133
loss: 0.9986644387245178,grad_norm: 0.8544597892530953, iteration: 170134
loss: 1.00894296169281,grad_norm: 0.9940392217970264, iteration: 170135
loss: 1.002525806427002,grad_norm: 0.9598660321087741, iteration: 170136
loss: 0.960620641708374,grad_norm: 0.999999118495894, iteration: 170137
loss: 0.9910184741020203,grad_norm: 0.9708589909650143, iteration: 170138
loss: 0.9731854200363159,grad_norm: 0.9999990048926771, iteration: 170139
loss: 0.9867033362388611,grad_norm: 0.999999212595364, iteration: 170140
loss: 0.9914376139640808,grad_norm: 0.8561071887376427, iteration: 170141
loss: 0.9979960918426514,grad_norm: 0.9999990452152155, iteration: 170142
loss: 0.9887203574180603,grad_norm: 0.9771289010623914, iteration: 170143
loss: 0.9688012003898621,grad_norm: 0.9707229003832681, iteration: 170144
loss: 0.9680612087249756,grad_norm: 0.9999991204540416, iteration: 170145
loss: 0.9847047924995422,grad_norm: 0.999999438472234, iteration: 170146
loss: 0.9903268814086914,grad_norm: 0.9999991597562876, iteration: 170147
loss: 1.0180749893188477,grad_norm: 0.9999995223008119, iteration: 170148
loss: 0.9603045582771301,grad_norm: 0.9999991139085112, iteration: 170149
loss: 1.0345391035079956,grad_norm: 0.9999991046028385, iteration: 170150
loss: 1.0008987188339233,grad_norm: 0.9999990088974816, iteration: 170151
loss: 0.9950698614120483,grad_norm: 0.9354322386148015, iteration: 170152
loss: 0.9777927398681641,grad_norm: 0.9999990418715473, iteration: 170153
loss: 1.0056843757629395,grad_norm: 0.9999991181671553, iteration: 170154
loss: 0.976446807384491,grad_norm: 0.9999991323794392, iteration: 170155
loss: 0.9839633703231812,grad_norm: 0.9333579164074421, iteration: 170156
loss: 0.996624231338501,grad_norm: 0.9631653255963449, iteration: 170157
loss: 0.9910937547683716,grad_norm: 0.9454569722128099, iteration: 170158
loss: 0.9739224910736084,grad_norm: 0.98141224721565, iteration: 170159
loss: 0.9602925777435303,grad_norm: 0.9999990056131124, iteration: 170160
loss: 1.031708836555481,grad_norm: 0.9999991419304122, iteration: 170161
loss: 1.0020067691802979,grad_norm: 0.9999991161119696, iteration: 170162
loss: 0.9859424829483032,grad_norm: 0.9999991952147469, iteration: 170163
loss: 0.9466818571090698,grad_norm: 0.9999991006478061, iteration: 170164
loss: 1.0107665061950684,grad_norm: 0.9999993098528063, iteration: 170165
loss: 0.9990125298500061,grad_norm: 0.9157595287836688, iteration: 170166
loss: 0.9603059887886047,grad_norm: 0.9999991458763638, iteration: 170167
loss: 0.951022744178772,grad_norm: 0.9999991724684667, iteration: 170168
loss: 0.9869049191474915,grad_norm: 0.9999990863694126, iteration: 170169
loss: 1.025362253189087,grad_norm: 0.9999991629688693, iteration: 170170
loss: 1.0298136472702026,grad_norm: 0.9075218790609513, iteration: 170171
loss: 0.9889847636222839,grad_norm: 0.8913611802180161, iteration: 170172
loss: 1.023484230041504,grad_norm: 0.7875832817759779, iteration: 170173
loss: 0.9876478314399719,grad_norm: 0.9921514158199524, iteration: 170174
loss: 1.0448434352874756,grad_norm: 0.9989658872637399, iteration: 170175
loss: 1.0293309688568115,grad_norm: 0.9999993477140976, iteration: 170176
loss: 0.9730412364006042,grad_norm: 0.9600807280378201, iteration: 170177
loss: 1.0131171941757202,grad_norm: 0.7360526110456412, iteration: 170178
loss: 0.9868437647819519,grad_norm: 0.8206016774219588, iteration: 170179
loss: 0.9985606670379639,grad_norm: 0.8869779229468353, iteration: 170180
loss: 0.9982836246490479,grad_norm: 0.8801129444130554, iteration: 170181
loss: 0.9681593775749207,grad_norm: 0.9999992197200368, iteration: 170182
loss: 1.00638747215271,grad_norm: 0.9999992621944831, iteration: 170183
loss: 0.981608510017395,grad_norm: 0.7901813827385188, iteration: 170184
loss: 0.9855155348777771,grad_norm: 0.9387032545615661, iteration: 170185
loss: 0.9849575161933899,grad_norm: 0.8432234404150735, iteration: 170186
loss: 1.0082218647003174,grad_norm: 0.9999990977244656, iteration: 170187
loss: 0.9980306625366211,grad_norm: 0.9999991883281555, iteration: 170188
loss: 0.9844651818275452,grad_norm: 0.8386574844919645, iteration: 170189
loss: 0.9819092154502869,grad_norm: 0.9852515098798211, iteration: 170190
loss: 1.026131510734558,grad_norm: 0.9081860968720404, iteration: 170191
loss: 1.0547840595245361,grad_norm: 0.9999998693953998, iteration: 170192
loss: 1.000149130821228,grad_norm: 0.9999991449913954, iteration: 170193
loss: 1.014370083808899,grad_norm: 0.9381793276138793, iteration: 170194
loss: 0.9833996891975403,grad_norm: 0.9999990336553718, iteration: 170195
loss: 1.0086675882339478,grad_norm: 0.9999991676006191, iteration: 170196
loss: 0.9717088937759399,grad_norm: 0.963480437769064, iteration: 170197
loss: 1.0242235660552979,grad_norm: 0.9999992061114439, iteration: 170198
loss: 0.9625547528266907,grad_norm: 0.9999989276828842, iteration: 170199
loss: 1.0229483842849731,grad_norm: 0.999999114648977, iteration: 170200
loss: 1.013914704322815,grad_norm: 0.9999991836358283, iteration: 170201
loss: 1.0188260078430176,grad_norm: 0.9999992754260967, iteration: 170202
loss: 1.0168839693069458,grad_norm: 0.9999991119475589, iteration: 170203
loss: 1.0082447528839111,grad_norm: 0.9829399898892446, iteration: 170204
loss: 1.0359388589859009,grad_norm: 0.9999999322952569, iteration: 170205
loss: 0.9561503529548645,grad_norm: 0.967249840693321, iteration: 170206
loss: 1.0130926370620728,grad_norm: 0.9871578491853363, iteration: 170207
loss: 1.014642596244812,grad_norm: 0.9999990235905856, iteration: 170208
loss: 1.0160815715789795,grad_norm: 0.9280594937937914, iteration: 170209
loss: 1.0314137935638428,grad_norm: 0.9195067898970052, iteration: 170210
loss: 0.9856366515159607,grad_norm: 0.9999992199699388, iteration: 170211
loss: 1.006098747253418,grad_norm: 0.9999991243535314, iteration: 170212
loss: 0.9834022521972656,grad_norm: 0.9999991769651804, iteration: 170213
loss: 0.9885019659996033,grad_norm: 0.9999990250971321, iteration: 170214
loss: 0.9890788197517395,grad_norm: 0.9153873911553143, iteration: 170215
loss: 0.988857626914978,grad_norm: 0.9999990151750008, iteration: 170216
loss: 0.99464350938797,grad_norm: 0.9435672470866452, iteration: 170217
loss: 0.9739333391189575,grad_norm: 0.9735313954913707, iteration: 170218
loss: 1.0190151929855347,grad_norm: 0.9999991079520456, iteration: 170219
loss: 1.0139938592910767,grad_norm: 0.9531101436710282, iteration: 170220
loss: 1.0093666315078735,grad_norm: 0.9201575839847697, iteration: 170221
loss: 0.9885132908821106,grad_norm: 0.8577073241990198, iteration: 170222
loss: 1.0309423208236694,grad_norm: 0.9790651190091069, iteration: 170223
loss: 1.0077744722366333,grad_norm: 0.9999991343624587, iteration: 170224
loss: 0.9583768844604492,grad_norm: 0.9624477634632334, iteration: 170225
loss: 1.0719658136367798,grad_norm: 0.9999992392247651, iteration: 170226
loss: 0.987920880317688,grad_norm: 0.9999991791850851, iteration: 170227
loss: 1.0132333040237427,grad_norm: 0.9999989566134317, iteration: 170228
loss: 1.0193395614624023,grad_norm: 0.9999989924773421, iteration: 170229
loss: 0.9973999857902527,grad_norm: 0.9999991573825868, iteration: 170230
loss: 0.9747885465621948,grad_norm: 0.8843167290450167, iteration: 170231
loss: 1.0216209888458252,grad_norm: 0.9999992602670082, iteration: 170232
loss: 0.9939603209495544,grad_norm: 0.9693996456932538, iteration: 170233
loss: 1.0137020349502563,grad_norm: 0.9600745980107471, iteration: 170234
loss: 1.0315860509872437,grad_norm: 0.9999991774782891, iteration: 170235
loss: 1.0052720308303833,grad_norm: 0.9999991910039487, iteration: 170236
loss: 0.997957170009613,grad_norm: 0.8317790581989827, iteration: 170237
loss: 0.9929360747337341,grad_norm: 0.9999992439909459, iteration: 170238
loss: 0.9518150687217712,grad_norm: 0.7979627623707913, iteration: 170239
loss: 0.9746884703636169,grad_norm: 0.9203716513122505, iteration: 170240
loss: 1.0443506240844727,grad_norm: 0.9999993840746152, iteration: 170241
loss: 1.010441541671753,grad_norm: 0.9999991712708962, iteration: 170242
loss: 1.006545901298523,grad_norm: 0.9999991635968207, iteration: 170243
loss: 1.0333586931228638,grad_norm: 0.9999990434895725, iteration: 170244
loss: 1.0138471126556396,grad_norm: 0.9999990792301272, iteration: 170245
loss: 1.0083330869674683,grad_norm: 0.9999990996280481, iteration: 170246
loss: 0.9629505276679993,grad_norm: 0.8644270036238214, iteration: 170247
loss: 0.9686299562454224,grad_norm: 0.9806654194607571, iteration: 170248
loss: 1.0162315368652344,grad_norm: 0.9999989902774905, iteration: 170249
loss: 0.9891994595527649,grad_norm: 0.9999991824777581, iteration: 170250
loss: 1.0225601196289062,grad_norm: 0.9455193296070511, iteration: 170251
loss: 0.984870970249176,grad_norm: 0.9999999957170905, iteration: 170252
loss: 1.0064059495925903,grad_norm: 0.9999991106900344, iteration: 170253
loss: 0.9752523899078369,grad_norm: 0.9589242547490539, iteration: 170254
loss: 1.0579147338867188,grad_norm: 0.9999990746075559, iteration: 170255
loss: 0.9510626792907715,grad_norm: 0.8717958007880485, iteration: 170256
loss: 0.9607837796211243,grad_norm: 0.8879784941585359, iteration: 170257
loss: 1.0247749090194702,grad_norm: 0.9564992594147544, iteration: 170258
loss: 0.9665836095809937,grad_norm: 0.9999992031449265, iteration: 170259
loss: 0.9928774237632751,grad_norm: 0.8712810157829928, iteration: 170260
loss: 1.0097930431365967,grad_norm: 0.999999136664001, iteration: 170261
loss: 0.9700563549995422,grad_norm: 0.9907064565655302, iteration: 170262
loss: 1.0338019132614136,grad_norm: 0.9999992545995221, iteration: 170263
loss: 0.9852343201637268,grad_norm: 0.9306626920689244, iteration: 170264
loss: 1.0022146701812744,grad_norm: 0.9072473841946632, iteration: 170265
loss: 1.003487467765808,grad_norm: 0.9999991749625216, iteration: 170266
loss: 1.0507545471191406,grad_norm: 0.9999992359698154, iteration: 170267
loss: 1.022362232208252,grad_norm: 0.9999988782396279, iteration: 170268
loss: 1.0243195295333862,grad_norm: 0.8586129447409881, iteration: 170269
loss: 1.0145539045333862,grad_norm: 0.9999989531958254, iteration: 170270
loss: 1.000531792640686,grad_norm: 0.9862322612747838, iteration: 170271
loss: 0.9826101660728455,grad_norm: 0.999999110559441, iteration: 170272
loss: 0.9912148118019104,grad_norm: 0.999999156977149, iteration: 170273
loss: 0.9846936464309692,grad_norm: 0.9999991340732036, iteration: 170274
loss: 1.0844955444335938,grad_norm: 0.8350526095476435, iteration: 170275
loss: 1.0287959575653076,grad_norm: 0.9419212564568324, iteration: 170276
loss: 1.0038032531738281,grad_norm: 0.9702304425528915, iteration: 170277
loss: 0.9703331589698792,grad_norm: 0.8497928689473531, iteration: 170278
loss: 0.9930608868598938,grad_norm: 0.9999991744636108, iteration: 170279
loss: 0.9928070306777954,grad_norm: 0.9999992952307545, iteration: 170280
loss: 0.9590408205986023,grad_norm: 0.789853980412742, iteration: 170281
loss: 1.0084408521652222,grad_norm: 0.9999990285040542, iteration: 170282
loss: 0.997435450553894,grad_norm: 0.9820763905832787, iteration: 170283
loss: 1.011759638786316,grad_norm: 0.9970169517322358, iteration: 170284
loss: 0.9803338050842285,grad_norm: 0.8498612738224519, iteration: 170285
loss: 1.0128045082092285,grad_norm: 0.9999991952985201, iteration: 170286
loss: 0.9938786625862122,grad_norm: 0.9999991397385362, iteration: 170287
loss: 1.030139684677124,grad_norm: 0.8610030541695277, iteration: 170288
loss: 0.9538119435310364,grad_norm: 0.9478386083270288, iteration: 170289
loss: 1.0100305080413818,grad_norm: 0.884059463654474, iteration: 170290
loss: 0.9866928458213806,grad_norm: 0.9999992054483615, iteration: 170291
loss: 0.9834306240081787,grad_norm: 0.999999066935742, iteration: 170292
loss: 1.0084234476089478,grad_norm: 0.9620227256153512, iteration: 170293
loss: 1.0334587097167969,grad_norm: 0.9999991197960063, iteration: 170294
loss: 1.0212289094924927,grad_norm: 0.9730042902037885, iteration: 170295
loss: 1.0087090730667114,grad_norm: 0.9999991217046568, iteration: 170296
loss: 0.979312002658844,grad_norm: 0.9999991972001174, iteration: 170297
loss: 1.0208523273468018,grad_norm: 0.9626520147698101, iteration: 170298
loss: 1.0333963632583618,grad_norm: 0.9999990029978761, iteration: 170299
loss: 1.0280474424362183,grad_norm: 0.9999996402238516, iteration: 170300
loss: 0.9877508282661438,grad_norm: 0.9344728445188377, iteration: 170301
loss: 1.0110247135162354,grad_norm: 0.9105710167568931, iteration: 170302
loss: 0.9888108968734741,grad_norm: 0.9999991292638329, iteration: 170303
loss: 1.0534528493881226,grad_norm: 0.9559474071760702, iteration: 170304
loss: 1.1010373830795288,grad_norm: 0.999999099234812, iteration: 170305
loss: 0.9971758127212524,grad_norm: 0.9999993993244622, iteration: 170306
loss: 0.9987267851829529,grad_norm: 0.9999990382964059, iteration: 170307
loss: 1.0106247663497925,grad_norm: 0.99999891237992, iteration: 170308
loss: 0.9654517769813538,grad_norm: 0.9999989957526922, iteration: 170309
loss: 1.0069224834442139,grad_norm: 0.9999994310219782, iteration: 170310
loss: 1.0384941101074219,grad_norm: 0.9751790014936278, iteration: 170311
loss: 0.9927799105644226,grad_norm: 0.9999990910103941, iteration: 170312
loss: 0.999727725982666,grad_norm: 0.8638604856121174, iteration: 170313
loss: 1.0130839347839355,grad_norm: 0.9999991177941587, iteration: 170314
loss: 1.0311051607131958,grad_norm: 0.9999991279312483, iteration: 170315
loss: 1.0335276126861572,grad_norm: 0.9999998522999931, iteration: 170316
loss: 0.9948039650917053,grad_norm: 0.9999992802802358, iteration: 170317
loss: 0.9551970958709717,grad_norm: 0.9218091094859346, iteration: 170318
loss: 1.0019797086715698,grad_norm: 0.9999991536473528, iteration: 170319
loss: 1.0363014936447144,grad_norm: 0.9999990085088744, iteration: 170320
loss: 1.0358003377914429,grad_norm: 0.9999990400706158, iteration: 170321
loss: 1.0031813383102417,grad_norm: 0.9999998373410558, iteration: 170322
loss: 0.9914037585258484,grad_norm: 0.9999989615490544, iteration: 170323
loss: 1.0042028427124023,grad_norm: 0.9999989413574896, iteration: 170324
loss: 0.991715133190155,grad_norm: 0.9064644649087195, iteration: 170325
loss: 1.009178876876831,grad_norm: 0.9999990513613282, iteration: 170326
loss: 1.0008779764175415,grad_norm: 0.9299836078589941, iteration: 170327
loss: 1.012339472770691,grad_norm: 0.9710117482457032, iteration: 170328
loss: 1.0124082565307617,grad_norm: 0.9999989706153123, iteration: 170329
loss: 1.0070326328277588,grad_norm: 0.9944631967386879, iteration: 170330
loss: 0.9596640467643738,grad_norm: 0.999999264901153, iteration: 170331
loss: 0.998989462852478,grad_norm: 0.9999991758273515, iteration: 170332
loss: 1.0103800296783447,grad_norm: 0.8692653917609543, iteration: 170333
loss: 0.9743219614028931,grad_norm: 0.9999990652999007, iteration: 170334
loss: 0.987541675567627,grad_norm: 0.9999991081514628, iteration: 170335
loss: 0.9891347289085388,grad_norm: 0.9167515991353264, iteration: 170336
loss: 1.0141303539276123,grad_norm: 0.9999992893465354, iteration: 170337
loss: 0.9894811511039734,grad_norm: 0.9839936232427292, iteration: 170338
loss: 0.9866706132888794,grad_norm: 0.9999993208802506, iteration: 170339
loss: 1.0162211656570435,grad_norm: 0.9999991404346582, iteration: 170340
loss: 1.0054476261138916,grad_norm: 0.9999990371842262, iteration: 170341
loss: 1.0226836204528809,grad_norm: 0.9999991107351097, iteration: 170342
loss: 1.0378973484039307,grad_norm: 0.9999993522548756, iteration: 170343
loss: 1.0083683729171753,grad_norm: 0.999999162774315, iteration: 170344
loss: 1.0061836242675781,grad_norm: 0.9999991934099972, iteration: 170345
loss: 0.9886947274208069,grad_norm: 0.9840808266191401, iteration: 170346
loss: 1.0040030479431152,grad_norm: 0.8660438549390794, iteration: 170347
loss: 0.9517940282821655,grad_norm: 0.9999989563073389, iteration: 170348
loss: 1.0173320770263672,grad_norm: 0.8169107120122607, iteration: 170349
loss: 1.0476235151290894,grad_norm: 0.8430169734232597, iteration: 170350
loss: 0.9507604241371155,grad_norm: 0.9164370907754364, iteration: 170351
loss: 1.0041636228561401,grad_norm: 0.9999990322109754, iteration: 170352
loss: 0.9825345873832703,grad_norm: 0.9091397795757613, iteration: 170353
loss: 0.964139461517334,grad_norm: 0.9352136979458445, iteration: 170354
loss: 1.0258357524871826,grad_norm: 0.7731581222365685, iteration: 170355
loss: 1.0400986671447754,grad_norm: 0.999999104760188, iteration: 170356
loss: 1.2112648487091064,grad_norm: 0.9864555530277345, iteration: 170357
loss: 0.9761966466903687,grad_norm: 0.9999991244120525, iteration: 170358
loss: 1.0196232795715332,grad_norm: 0.9999991381013189, iteration: 170359
loss: 0.9719160199165344,grad_norm: 0.8939585058491257, iteration: 170360
loss: 0.9980267882347107,grad_norm: 0.9999991596775737, iteration: 170361
loss: 1.011681079864502,grad_norm: 0.8409127992441175, iteration: 170362
loss: 0.9960035681724548,grad_norm: 0.9999991072503182, iteration: 170363
loss: 1.0035468339920044,grad_norm: 0.9999990481591605, iteration: 170364
loss: 0.9733676910400391,grad_norm: 0.9999991342785356, iteration: 170365
loss: 0.9673371911048889,grad_norm: 0.8922860522281163, iteration: 170366
loss: 1.0060878992080688,grad_norm: 0.9999990318432176, iteration: 170367
loss: 0.9734923243522644,grad_norm: 0.9259988161717095, iteration: 170368
loss: 0.9724048972129822,grad_norm: 0.914265770398768, iteration: 170369
loss: 0.9923877716064453,grad_norm: 0.8555620812267595, iteration: 170370
loss: 0.9962127208709717,grad_norm: 0.9999995815639893, iteration: 170371
loss: 0.9822174906730652,grad_norm: 0.9131790291858032, iteration: 170372
loss: 0.9649715423583984,grad_norm: 0.9332612630706829, iteration: 170373
loss: 1.0077522993087769,grad_norm: 0.8771216423148775, iteration: 170374
loss: 0.9957125186920166,grad_norm: 0.8563650268450255, iteration: 170375
loss: 1.0057544708251953,grad_norm: 0.9185726992971055, iteration: 170376
loss: 1.0152348279953003,grad_norm: 0.9999991065763583, iteration: 170377
loss: 0.9908105134963989,grad_norm: 0.9920654110487279, iteration: 170378
loss: 0.9919272661209106,grad_norm: 0.9418623543861459, iteration: 170379
loss: 0.953347384929657,grad_norm: 0.9999991585867858, iteration: 170380
loss: 0.993754506111145,grad_norm: 0.9341191854467275, iteration: 170381
loss: 0.9810364246368408,grad_norm: 0.9043367686119106, iteration: 170382
loss: 1.0141932964324951,grad_norm: 0.9999993703170834, iteration: 170383
loss: 0.9733749628067017,grad_norm: 0.8738385987965488, iteration: 170384
loss: 0.9692915081977844,grad_norm: 0.9999991632699118, iteration: 170385
loss: 1.0241912603378296,grad_norm: 0.8639752727604186, iteration: 170386
loss: 1.0031694173812866,grad_norm: 0.9428376764665016, iteration: 170387
loss: 0.9852117896080017,grad_norm: 0.8748621714493879, iteration: 170388
loss: 0.965797483921051,grad_norm: 0.8301060122939233, iteration: 170389
loss: 0.9945961236953735,grad_norm: 0.9999991094372263, iteration: 170390
loss: 0.9936147928237915,grad_norm: 0.8743319713736986, iteration: 170391
loss: 1.03044593334198,grad_norm: 0.999999286515062, iteration: 170392
loss: 0.9974262118339539,grad_norm: 0.9999990951585136, iteration: 170393
loss: 1.1054754257202148,grad_norm: 0.99999961279456, iteration: 170394
loss: 0.984822154045105,grad_norm: 0.9889783411541054, iteration: 170395
loss: 0.9724041819572449,grad_norm: 0.9999993250359015, iteration: 170396
loss: 0.928391695022583,grad_norm: 0.9919549425650415, iteration: 170397
loss: 0.9970704913139343,grad_norm: 0.9999990925937885, iteration: 170398
loss: 1.0347405672073364,grad_norm: 0.9999990730066385, iteration: 170399
loss: 1.0354634523391724,grad_norm: 0.9999991890035553, iteration: 170400
loss: 0.9893175363540649,grad_norm: 0.8482307128240357, iteration: 170401
loss: 1.2826234102249146,grad_norm: 0.9999995860740675, iteration: 170402
loss: 0.9653104543685913,grad_norm: 0.9063920953916619, iteration: 170403
loss: 1.0344713926315308,grad_norm: 0.8586832971304491, iteration: 170404
loss: 0.9831722974777222,grad_norm: 0.9999991294878369, iteration: 170405
loss: 1.0110228061676025,grad_norm: 0.8532330437674025, iteration: 170406
loss: 1.0172317028045654,grad_norm: 0.9554823447609379, iteration: 170407
loss: 1.0198487043380737,grad_norm: 0.929984253015794, iteration: 170408
loss: 0.989737868309021,grad_norm: 0.9406567816389746, iteration: 170409
loss: 0.9984757304191589,grad_norm: 0.9999991998003562, iteration: 170410
loss: 1.017246127128601,grad_norm: 0.999999178081563, iteration: 170411
loss: 0.9499228000640869,grad_norm: 0.9999989421064273, iteration: 170412
loss: 0.9611747860908508,grad_norm: 0.8818922248269894, iteration: 170413
loss: 0.9855900406837463,grad_norm: 0.9712818215428709, iteration: 170414
loss: 1.0210471153259277,grad_norm: 0.9999993433337867, iteration: 170415
loss: 0.995302140712738,grad_norm: 0.8737810413808658, iteration: 170416
loss: 0.998374879360199,grad_norm: 0.9707139992830347, iteration: 170417
loss: 1.0132801532745361,grad_norm: 0.9999992145070944, iteration: 170418
loss: 1.01668119430542,grad_norm: 0.9999991572027942, iteration: 170419
loss: 0.9753913283348083,grad_norm: 0.9999991018923302, iteration: 170420
loss: 0.965156078338623,grad_norm: 0.9289955771489333, iteration: 170421
loss: 0.9864869117736816,grad_norm: 0.9999991218270265, iteration: 170422
loss: 1.0409207344055176,grad_norm: 0.9999992969076484, iteration: 170423
loss: 0.9905991554260254,grad_norm: 0.9999990831989227, iteration: 170424
loss: 0.9996374845504761,grad_norm: 0.9999990580998781, iteration: 170425
loss: 1.0362138748168945,grad_norm: 0.9999991045309751, iteration: 170426
loss: 0.9628168344497681,grad_norm: 0.9908406649939099, iteration: 170427
loss: 1.0892070531845093,grad_norm: 0.9999996512469754, iteration: 170428
loss: 0.9992985725402832,grad_norm: 0.8234939085221473, iteration: 170429
loss: 0.9917684197425842,grad_norm: 0.9728916674387591, iteration: 170430
loss: 1.0304070711135864,grad_norm: 0.8938506796308262, iteration: 170431
loss: 1.0504487752914429,grad_norm: 0.9999990093915294, iteration: 170432
loss: 1.016685128211975,grad_norm: 0.999999078868449, iteration: 170433
loss: 1.0048702955245972,grad_norm: 0.9175758225622981, iteration: 170434
loss: 1.0019623041152954,grad_norm: 0.9855641302235131, iteration: 170435
loss: 1.0466217994689941,grad_norm: 0.9999992459890921, iteration: 170436
loss: 1.0212830305099487,grad_norm: 0.9999991572473907, iteration: 170437
loss: 1.0177757740020752,grad_norm: 0.9999989372666885, iteration: 170438
loss: 0.9605065584182739,grad_norm: 0.9999992409715489, iteration: 170439
loss: 0.9987349510192871,grad_norm: 0.9897286976662597, iteration: 170440
loss: 0.9522327780723572,grad_norm: 0.8004052565942835, iteration: 170441
loss: 0.9936239123344421,grad_norm: 0.9999992303923271, iteration: 170442
loss: 0.9766721129417419,grad_norm: 0.9999989800714761, iteration: 170443
loss: 0.9863974452018738,grad_norm: 0.9999994388090625, iteration: 170444
loss: 0.9958369135856628,grad_norm: 0.9999991987334209, iteration: 170445
loss: 0.9830349087715149,grad_norm: 0.9870788524619192, iteration: 170446
loss: 0.9807150959968567,grad_norm: 0.9999993688222172, iteration: 170447
loss: 1.0029425621032715,grad_norm: 0.8408653918990254, iteration: 170448
loss: 1.030295491218567,grad_norm: 0.9144177461906247, iteration: 170449
loss: 1.0159116983413696,grad_norm: 0.9260186986050578, iteration: 170450
loss: 1.0505893230438232,grad_norm: 0.9999990360510457, iteration: 170451
loss: 0.9948604106903076,grad_norm: 0.9248837152093474, iteration: 170452
loss: 1.0480619668960571,grad_norm: 0.9999993087959772, iteration: 170453
loss: 0.9576405882835388,grad_norm: 0.9999991123455536, iteration: 170454
loss: 1.03589928150177,grad_norm: 0.9999991988567289, iteration: 170455
loss: 1.001940131187439,grad_norm: 0.9999991480414915, iteration: 170456
loss: 0.9926289319992065,grad_norm: 0.999999042018998, iteration: 170457
loss: 1.0289645195007324,grad_norm: 0.8844886683070676, iteration: 170458
loss: 0.9858752489089966,grad_norm: 0.9999996007516941, iteration: 170459
loss: 0.9753047823905945,grad_norm: 0.9146142298115322, iteration: 170460
loss: 1.0153752565383911,grad_norm: 0.8336735600963241, iteration: 170461
loss: 0.9847783446311951,grad_norm: 0.876693855046971, iteration: 170462
loss: 0.997630774974823,grad_norm: 0.8990680274396264, iteration: 170463
loss: 1.0025310516357422,grad_norm: 0.8656787617210814, iteration: 170464
loss: 1.0055867433547974,grad_norm: 0.9030589186499987, iteration: 170465
loss: 0.9464016556739807,grad_norm: 0.8873857662031657, iteration: 170466
loss: 0.9885956048965454,grad_norm: 0.9693099348142452, iteration: 170467
loss: 0.9748092889785767,grad_norm: 0.9651353998949836, iteration: 170468
loss: 1.0399129390716553,grad_norm: 0.9430768379689446, iteration: 170469
loss: 1.0389937162399292,grad_norm: 0.9999998011532573, iteration: 170470
loss: 0.9753779768943787,grad_norm: 0.9999993414488557, iteration: 170471
loss: 1.0279730558395386,grad_norm: 0.9999993524071378, iteration: 170472
loss: 0.9939248561859131,grad_norm: 0.9999990445680976, iteration: 170473
loss: 0.9885751605033875,grad_norm: 0.9999990356371243, iteration: 170474
loss: 0.9720240235328674,grad_norm: 0.9999994301778529, iteration: 170475
loss: 0.9923906922340393,grad_norm: 0.9999990895414222, iteration: 170476
loss: 1.034704327583313,grad_norm: 0.9999991821609734, iteration: 170477
loss: 1.032291054725647,grad_norm: 0.8636761348687113, iteration: 170478
loss: 1.0253511667251587,grad_norm: 0.9881096247428315, iteration: 170479
loss: 0.9645969867706299,grad_norm: 0.95285035918003, iteration: 170480
loss: 1.00313138961792,grad_norm: 0.9613758457121757, iteration: 170481
loss: 1.0024288892745972,grad_norm: 0.9999991649372902, iteration: 170482
loss: 1.0331251621246338,grad_norm: 0.9999989483993031, iteration: 170483
loss: 1.013236165046692,grad_norm: 0.8720901751953528, iteration: 170484
loss: 1.00822913646698,grad_norm: 0.7429120860985317, iteration: 170485
loss: 1.0041247606277466,grad_norm: 0.8189360951476804, iteration: 170486
loss: 0.9983518719673157,grad_norm: 0.9391054523059716, iteration: 170487
loss: 0.9656383395195007,grad_norm: 0.9999990990250698, iteration: 170488
loss: 1.0103201866149902,grad_norm: 0.9999991579055391, iteration: 170489
loss: 1.1717637777328491,grad_norm: 0.9999992414123786, iteration: 170490
loss: 0.9566075205802917,grad_norm: 0.954901695798529, iteration: 170491
loss: 0.988922655582428,grad_norm: 0.9999992170411958, iteration: 170492
loss: 1.0324069261550903,grad_norm: 0.9999992198949732, iteration: 170493
loss: 0.9975341558456421,grad_norm: 0.9999990891896557, iteration: 170494
loss: 1.0035969018936157,grad_norm: 0.984658696209474, iteration: 170495
loss: 0.9944671988487244,grad_norm: 0.9652349378487567, iteration: 170496
loss: 1.003875732421875,grad_norm: 0.9999990966979682, iteration: 170497
loss: 1.0031421184539795,grad_norm: 0.9999990691651442, iteration: 170498
loss: 1.0293720960617065,grad_norm: 0.9812774532208594, iteration: 170499
loss: 1.0023653507232666,grad_norm: 0.8370958472798545, iteration: 170500
loss: 1.0290913581848145,grad_norm: 0.9999990121616789, iteration: 170501
loss: 1.0160919427871704,grad_norm: 0.9999993098152038, iteration: 170502
loss: 0.9932713508605957,grad_norm: 0.9999991685423807, iteration: 170503
loss: 0.9990683794021606,grad_norm: 0.9999991531523098, iteration: 170504
loss: 1.0323177576065063,grad_norm: 0.9999994122928034, iteration: 170505
loss: 0.9385729432106018,grad_norm: 0.999998972935909, iteration: 170506
loss: 0.981209933757782,grad_norm: 0.9377485689822295, iteration: 170507
loss: 1.0220160484313965,grad_norm: 0.8874311066707714, iteration: 170508
loss: 0.9558848738670349,grad_norm: 0.9635804076733718, iteration: 170509
loss: 0.9979040622711182,grad_norm: 0.8357839350461741, iteration: 170510
loss: 0.9830626845359802,grad_norm: 0.9950114635791135, iteration: 170511
loss: 1.027717113494873,grad_norm: 0.8383500533256245, iteration: 170512
loss: 1.0042468309402466,grad_norm: 0.9568737629871364, iteration: 170513
loss: 1.0155274868011475,grad_norm: 0.9999991198330422, iteration: 170514
loss: 1.0042088031768799,grad_norm: 0.9999994955534355, iteration: 170515
loss: 1.004172444343567,grad_norm: 0.7579506467287881, iteration: 170516
loss: 0.9713339805603027,grad_norm: 0.9999991684478321, iteration: 170517
loss: 1.017365574836731,grad_norm: 0.9999991115148394, iteration: 170518
loss: 0.9890149235725403,grad_norm: 0.889177661941356, iteration: 170519
loss: 1.0002869367599487,grad_norm: 0.8456950033732793, iteration: 170520
loss: 1.0751038789749146,grad_norm: 0.9999991126462052, iteration: 170521
loss: 0.9956606030464172,grad_norm: 0.9982229875718368, iteration: 170522
loss: 1.0088049173355103,grad_norm: 0.9999990631044847, iteration: 170523
loss: 1.0105607509613037,grad_norm: 0.9999991380179244, iteration: 170524
loss: 0.9921706914901733,grad_norm: 0.9327363749113934, iteration: 170525
loss: 0.9940494298934937,grad_norm: 0.999999195348753, iteration: 170526
loss: 1.0118608474731445,grad_norm: 0.999999317068227, iteration: 170527
loss: 0.9978322982788086,grad_norm: 0.9999990223973275, iteration: 170528
loss: 0.9881585240364075,grad_norm: 0.8944462125107625, iteration: 170529
loss: 1.0562726259231567,grad_norm: 0.9999990920728801, iteration: 170530
loss: 1.0136538743972778,grad_norm: 0.999998963193533, iteration: 170531
loss: 1.0260590314865112,grad_norm: 0.853493261518644, iteration: 170532
loss: 1.0021693706512451,grad_norm: 0.9999990076807048, iteration: 170533
loss: 0.981330394744873,grad_norm: 0.9616905891848688, iteration: 170534
loss: 0.9797247052192688,grad_norm: 0.9999992264159484, iteration: 170535
loss: 1.0091023445129395,grad_norm: 0.9387155429406008, iteration: 170536
loss: 1.0089031457901,grad_norm: 0.9999990624534161, iteration: 170537
loss: 0.962162435054779,grad_norm: 0.9999991487784927, iteration: 170538
loss: 1.0349019765853882,grad_norm: 0.9999990610773222, iteration: 170539
loss: 1.0075277090072632,grad_norm: 0.9124288866453112, iteration: 170540
loss: 1.01457679271698,grad_norm: 0.8676048173376915, iteration: 170541
loss: 1.0262409448623657,grad_norm: 0.9999992337089814, iteration: 170542
loss: 1.0212222337722778,grad_norm: 0.999999204234948, iteration: 170543
loss: 0.9969173669815063,grad_norm: 0.9999989622077586, iteration: 170544
loss: 1.0641651153564453,grad_norm: 0.922063509651041, iteration: 170545
loss: 0.9989104270935059,grad_norm: 0.9999991216611246, iteration: 170546
loss: 1.0172008275985718,grad_norm: 0.9999989488766597, iteration: 170547
loss: 1.0285547971725464,grad_norm: 0.9999991411365496, iteration: 170548
loss: 1.0060679912567139,grad_norm: 0.9999990894961329, iteration: 170549
loss: 1.0141972303390503,grad_norm: 0.99999901684009, iteration: 170550
loss: 0.9935342669487,grad_norm: 0.9999992586285266, iteration: 170551
loss: 0.9800124764442444,grad_norm: 0.8884260806200428, iteration: 170552
loss: 1.0432652235031128,grad_norm: 0.9999990408984978, iteration: 170553
loss: 0.9802380800247192,grad_norm: 0.8364361884042539, iteration: 170554
loss: 0.9959394335746765,grad_norm: 0.9420643143497521, iteration: 170555
loss: 0.991486668586731,grad_norm: 0.9999994153141115, iteration: 170556
loss: 1.0524778366088867,grad_norm: 0.9999991530136475, iteration: 170557
loss: 1.0041561126708984,grad_norm: 0.8829856597738506, iteration: 170558
loss: 1.0009201765060425,grad_norm: 0.9999991870273678, iteration: 170559
loss: 1.0054609775543213,grad_norm: 0.9999996539786105, iteration: 170560
loss: 0.9903409481048584,grad_norm: 0.999999210880326, iteration: 170561
loss: 0.9989666938781738,grad_norm: 0.9810117537104748, iteration: 170562
loss: 1.0534889698028564,grad_norm: 0.9999990772119851, iteration: 170563
loss: 1.0807360410690308,grad_norm: 0.9999993564430337, iteration: 170564
loss: 1.0256049633026123,grad_norm: 0.8504828713027092, iteration: 170565
loss: 0.9869306087493896,grad_norm: 0.9825240069331025, iteration: 170566
loss: 0.9878212213516235,grad_norm: 0.9999990711564377, iteration: 170567
loss: 0.985163152217865,grad_norm: 0.958136900148484, iteration: 170568
loss: 1.008821964263916,grad_norm: 0.9737797946531912, iteration: 170569
loss: 1.0150171518325806,grad_norm: 0.860456573124789, iteration: 170570
loss: 0.9967889785766602,grad_norm: 0.9309215078024904, iteration: 170571
loss: 1.03164803981781,grad_norm: 0.7899337025032496, iteration: 170572
loss: 1.0301357507705688,grad_norm: 0.9999990766771778, iteration: 170573
loss: 1.0050952434539795,grad_norm: 0.9999991503058567, iteration: 170574
loss: 1.019147276878357,grad_norm: 0.8880063199220503, iteration: 170575
loss: 0.9693987965583801,grad_norm: 0.9999990590189143, iteration: 170576
loss: 0.9995455145835876,grad_norm: 0.9999991003335383, iteration: 170577
loss: 0.9757624268531799,grad_norm: 0.9565474411957178, iteration: 170578
loss: 0.9746643900871277,grad_norm: 0.9999992640552925, iteration: 170579
loss: 1.0107107162475586,grad_norm: 0.9999992512700046, iteration: 170580
loss: 0.9663346409797668,grad_norm: 0.8934209200766432, iteration: 170581
loss: 0.9727579951286316,grad_norm: 0.995852012587884, iteration: 170582
loss: 0.9571080207824707,grad_norm: 0.9999990019605398, iteration: 170583
loss: 1.031365156173706,grad_norm: 0.9999991128677421, iteration: 170584
loss: 1.0086666345596313,grad_norm: 0.9022065454320827, iteration: 170585
loss: 0.966234564781189,grad_norm: 0.7955867295977594, iteration: 170586
loss: 1.0032492876052856,grad_norm: 0.8320137283624379, iteration: 170587
loss: 1.0056242942810059,grad_norm: 0.9999990766607317, iteration: 170588
loss: 0.997443675994873,grad_norm: 0.9426394122109231, iteration: 170589
loss: 0.9993687272071838,grad_norm: 0.9916238744408177, iteration: 170590
loss: 1.002540111541748,grad_norm: 0.9068485952355626, iteration: 170591
loss: 1.0203142166137695,grad_norm: 0.9999989684976694, iteration: 170592
loss: 0.9916631579399109,grad_norm: 0.8071013933729332, iteration: 170593
loss: 1.0023605823516846,grad_norm: 0.9110789418498593, iteration: 170594
loss: 0.9698684811592102,grad_norm: 0.8832654971697561, iteration: 170595
loss: 0.9772378206253052,grad_norm: 0.9999991927348998, iteration: 170596
loss: 1.0089126825332642,grad_norm: 0.9709987538803823, iteration: 170597
loss: 0.9670307636260986,grad_norm: 0.9262327448141163, iteration: 170598
loss: 1.0035953521728516,grad_norm: 0.8816336743797013, iteration: 170599
loss: 0.9727984070777893,grad_norm: 0.9999992837479725, iteration: 170600
loss: 0.9645600914955139,grad_norm: 0.9999990815980973, iteration: 170601
loss: 1.0413676500320435,grad_norm: 0.9999991947601133, iteration: 170602
loss: 0.9794925451278687,grad_norm: 0.883560833012554, iteration: 170603
loss: 1.0173758268356323,grad_norm: 0.9999991344870485, iteration: 170604
loss: 0.9864509701728821,grad_norm: 0.8849972017337824, iteration: 170605
loss: 1.0241951942443848,grad_norm: 0.9999992743779089, iteration: 170606
loss: 1.008587121963501,grad_norm: 0.9167695451247926, iteration: 170607
loss: 1.016278862953186,grad_norm: 0.9173715889308259, iteration: 170608
loss: 0.9964454174041748,grad_norm: 0.9999991197727913, iteration: 170609
loss: 1.0280922651290894,grad_norm: 0.9817598566547798, iteration: 170610
loss: 0.97259920835495,grad_norm: 0.9999990613914086, iteration: 170611
loss: 0.993040919303894,grad_norm: 0.9239424523765857, iteration: 170612
loss: 1.051093339920044,grad_norm: 0.9999992033150169, iteration: 170613
loss: 1.0133661031723022,grad_norm: 0.9999990245760779, iteration: 170614
loss: 0.9600806832313538,grad_norm: 0.9999990989695678, iteration: 170615
loss: 0.9857807755470276,grad_norm: 0.9999990455593396, iteration: 170616
loss: 1.0332109928131104,grad_norm: 0.9999994115743257, iteration: 170617
loss: 1.0260096788406372,grad_norm: 0.9999990979659523, iteration: 170618
loss: 1.0676089525222778,grad_norm: 0.9999994435334009, iteration: 170619
loss: 1.0104647874832153,grad_norm: 0.9757598782856584, iteration: 170620
loss: 1.0041472911834717,grad_norm: 0.8744651464511698, iteration: 170621
loss: 1.0441421270370483,grad_norm: 0.9490517329711836, iteration: 170622
loss: 0.9802072048187256,grad_norm: 0.9999999917883633, iteration: 170623
loss: 1.012008547782898,grad_norm: 0.9999991350335992, iteration: 170624
loss: 1.0064849853515625,grad_norm: 0.9999990624596344, iteration: 170625
loss: 1.009848713874817,grad_norm: 0.9690228035621925, iteration: 170626
loss: 1.037026047706604,grad_norm: 0.9999995574884685, iteration: 170627
loss: 0.9997538924217224,grad_norm: 0.9439015587038079, iteration: 170628
loss: 1.0019078254699707,grad_norm: 0.9518479985220483, iteration: 170629
loss: 1.0188852548599243,grad_norm: 0.9999991174114821, iteration: 170630
loss: 0.9516297578811646,grad_norm: 0.926097784390205, iteration: 170631
loss: 1.026383399963379,grad_norm: 0.9369846781889172, iteration: 170632
loss: 0.9914866089820862,grad_norm: 0.853343390103691, iteration: 170633
loss: 0.9588130712509155,grad_norm: 0.9999999154938563, iteration: 170634
loss: 1.0117391347885132,grad_norm: 0.97149425062114, iteration: 170635
loss: 0.9701206684112549,grad_norm: 0.9092834017296033, iteration: 170636
loss: 1.0062735080718994,grad_norm: 0.9666544630009923, iteration: 170637
loss: 0.9878400564193726,grad_norm: 0.9999993627662264, iteration: 170638
loss: 1.0114562511444092,grad_norm: 0.8517982499817472, iteration: 170639
loss: 1.0546934604644775,grad_norm: 0.974900065501833, iteration: 170640
loss: 1.0179049968719482,grad_norm: 0.893760980952221, iteration: 170641
loss: 0.9867715239524841,grad_norm: 0.999999212143971, iteration: 170642
loss: 1.0143440961837769,grad_norm: 0.8639543035354527, iteration: 170643
loss: 1.0352699756622314,grad_norm: 0.999999852933555, iteration: 170644
loss: 1.0527535676956177,grad_norm: 0.9999998854777159, iteration: 170645
loss: 1.0041437149047852,grad_norm: 0.8543578007461657, iteration: 170646
loss: 0.9854974746704102,grad_norm: 0.9616707446508626, iteration: 170647
loss: 1.0181196928024292,grad_norm: 0.7967991154067647, iteration: 170648
loss: 0.9959558248519897,grad_norm: 0.9207700737301499, iteration: 170649
loss: 1.0418789386749268,grad_norm: 0.9999994329623876, iteration: 170650
loss: 1.0096778869628906,grad_norm: 0.8190788653990566, iteration: 170651
loss: 1.0117424726486206,grad_norm: 0.8648286352931286, iteration: 170652
loss: 1.0970484018325806,grad_norm: 0.9999999476327178, iteration: 170653
loss: 1.0238230228424072,grad_norm: 0.9999990575850817, iteration: 170654
loss: 0.9862900972366333,grad_norm: 0.9999990627672138, iteration: 170655
loss: 1.0142604112625122,grad_norm: 0.9688282267071094, iteration: 170656
loss: 1.0308173894882202,grad_norm: 0.9558838439300381, iteration: 170657
loss: 1.0075218677520752,grad_norm: 0.8759684971573223, iteration: 170658
loss: 0.9753723740577698,grad_norm: 0.9999991440163369, iteration: 170659
loss: 0.9730649590492249,grad_norm: 0.9999991204916874, iteration: 170660
loss: 1.0301871299743652,grad_norm: 0.9465662408132851, iteration: 170661
loss: 1.0108381509780884,grad_norm: 0.9999991524124457, iteration: 170662
loss: 1.0234956741333008,grad_norm: 0.8591318155892472, iteration: 170663
loss: 0.9738155603408813,grad_norm: 0.9572591635071688, iteration: 170664
loss: 1.0164048671722412,grad_norm: 0.9909892780157742, iteration: 170665
loss: 0.992139458656311,grad_norm: 0.8256606383757954, iteration: 170666
loss: 0.9740744233131409,grad_norm: 0.999999163409599, iteration: 170667
loss: 1.0134700536727905,grad_norm: 0.8993446756133782, iteration: 170668
loss: 1.0360751152038574,grad_norm: 0.9999992799689823, iteration: 170669
loss: 0.9818449020385742,grad_norm: 0.9999990655537674, iteration: 170670
loss: 1.019970417022705,grad_norm: 0.999999289339911, iteration: 170671
loss: 0.9900060296058655,grad_norm: 0.9999991579570477, iteration: 170672
loss: 1.025755763053894,grad_norm: 0.999999312223241, iteration: 170673
loss: 1.02181875705719,grad_norm: 0.9866367388195043, iteration: 170674
loss: 1.0252031087875366,grad_norm: 0.9655316243560788, iteration: 170675
loss: 0.9396263957023621,grad_norm: 0.9519729736763624, iteration: 170676
loss: 0.9916528463363647,grad_norm: 0.8794591689171868, iteration: 170677
loss: 1.0089142322540283,grad_norm: 0.9999991991480816, iteration: 170678
loss: 1.0067545175552368,grad_norm: 0.9999991618860657, iteration: 170679
loss: 0.9771255850791931,grad_norm: 0.9692333935905456, iteration: 170680
loss: 0.9947522282600403,grad_norm: 0.9999991552492181, iteration: 170681
loss: 1.0200037956237793,grad_norm: 0.9999993576313596, iteration: 170682
loss: 0.9873108863830566,grad_norm: 0.9999992932433912, iteration: 170683
loss: 1.0094645023345947,grad_norm: 0.9999992520222716, iteration: 170684
loss: 0.9957496523857117,grad_norm: 0.9999992431159143, iteration: 170685
loss: 1.0147929191589355,grad_norm: 0.9999991987249135, iteration: 170686
loss: 0.9760005474090576,grad_norm: 0.9159834317913047, iteration: 170687
loss: 0.9878383874893188,grad_norm: 0.9999992100819245, iteration: 170688
loss: 0.9995149374008179,grad_norm: 0.9999990225622846, iteration: 170689
loss: 0.9789181351661682,grad_norm: 0.8992383418895421, iteration: 170690
loss: 1.0324361324310303,grad_norm: 0.9999996336021966, iteration: 170691
loss: 0.9895967245101929,grad_norm: 0.9999992102504104, iteration: 170692
loss: 0.9962776303291321,grad_norm: 0.999999135859533, iteration: 170693
loss: 1.0018597841262817,grad_norm: 0.9931968536034821, iteration: 170694
loss: 1.0016369819641113,grad_norm: 0.9999990065899524, iteration: 170695
loss: 1.1429396867752075,grad_norm: 0.9999990581110652, iteration: 170696
loss: 0.9781426787376404,grad_norm: 0.9999993918638067, iteration: 170697
loss: 0.9959890246391296,grad_norm: 0.9145557043371552, iteration: 170698
loss: 0.9807725548744202,grad_norm: 0.9999992112089205, iteration: 170699
loss: 0.9953161478042603,grad_norm: 0.9721440067027534, iteration: 170700
loss: 0.993840217590332,grad_norm: 0.9062630142734484, iteration: 170701
loss: 0.9973409175872803,grad_norm: 0.9999990492641255, iteration: 170702
loss: 0.9913350939750671,grad_norm: 0.9999992007332119, iteration: 170703
loss: 1.012089729309082,grad_norm: 0.9758813670618097, iteration: 170704
loss: 1.007920742034912,grad_norm: 0.999999090195548, iteration: 170705
loss: 0.9505695104598999,grad_norm: 0.9175328627264415, iteration: 170706
loss: 0.9926720261573792,grad_norm: 0.9066575172845402, iteration: 170707
loss: 1.0700218677520752,grad_norm: 0.9999990494926991, iteration: 170708
loss: 1.0435633659362793,grad_norm: 0.9999991327933963, iteration: 170709
loss: 0.9857587814331055,grad_norm: 0.9927553347471183, iteration: 170710
loss: 1.0064648389816284,grad_norm: 0.9999992103924914, iteration: 170711
loss: 0.980527937412262,grad_norm: 0.8834501619863471, iteration: 170712
loss: 0.953449547290802,grad_norm: 0.9999991213531979, iteration: 170713
loss: 0.9925333261489868,grad_norm: 0.9228863418738633, iteration: 170714
loss: 1.041682243347168,grad_norm: 0.999999129738757, iteration: 170715
loss: 0.9780207872390747,grad_norm: 0.9999991727332967, iteration: 170716
loss: 0.980745255947113,grad_norm: 0.9999991416920624, iteration: 170717
loss: 1.0072669982910156,grad_norm: 0.9999990575523175, iteration: 170718
loss: 0.9544267654418945,grad_norm: 0.9999991039858942, iteration: 170719
loss: 1.005010962486267,grad_norm: 0.9186745138242759, iteration: 170720
loss: 1.0081225633621216,grad_norm: 0.9923570853048133, iteration: 170721
loss: 1.0493577718734741,grad_norm: 0.9838216091177372, iteration: 170722
loss: 0.9825485944747925,grad_norm: 0.9595053635123455, iteration: 170723
loss: 0.9959861040115356,grad_norm: 0.9352716716898405, iteration: 170724
loss: 1.0878536701202393,grad_norm: 0.999999188435334, iteration: 170725
loss: 1.039634346961975,grad_norm: 0.9999997280343556, iteration: 170726
loss: 0.9558547735214233,grad_norm: 0.9088536398067678, iteration: 170727
loss: 1.0105587244033813,grad_norm: 0.9999990571178586, iteration: 170728
loss: 0.9729728698730469,grad_norm: 0.889691027508362, iteration: 170729
loss: 1.0578646659851074,grad_norm: 0.9999996461915637, iteration: 170730
loss: 0.9946752786636353,grad_norm: 0.8730982369478273, iteration: 170731
loss: 0.9924288988113403,grad_norm: 0.9999991691279346, iteration: 170732
loss: 0.9737708568572998,grad_norm: 0.9626048409786893, iteration: 170733
loss: 1.021945834159851,grad_norm: 0.9999990845950365, iteration: 170734
loss: 1.0238845348358154,grad_norm: 0.9999990228338227, iteration: 170735
loss: 1.0454751253128052,grad_norm: 0.9132272151128003, iteration: 170736
loss: 1.03352952003479,grad_norm: 0.9999995514540507, iteration: 170737
loss: 1.0162622928619385,grad_norm: 0.9042263142141331, iteration: 170738
loss: 0.9927091002464294,grad_norm: 0.9999991125485909, iteration: 170739
loss: 1.0039801597595215,grad_norm: 0.9446942343835235, iteration: 170740
loss: 1.0794936418533325,grad_norm: 0.9999993974289239, iteration: 170741
loss: 0.9579663872718811,grad_norm: 0.9558903517807606, iteration: 170742
loss: 1.0024223327636719,grad_norm: 0.9999991417020473, iteration: 170743
loss: 1.0051106214523315,grad_norm: 0.7756210168306369, iteration: 170744
loss: 1.00860595703125,grad_norm: 0.8545411411516237, iteration: 170745
loss: 0.9872478246688843,grad_norm: 0.9999992059810447, iteration: 170746
loss: 0.9825025200843811,grad_norm: 0.9572734212742707, iteration: 170747
loss: 0.9922412037849426,grad_norm: 0.9999995859738986, iteration: 170748
loss: 0.961616039276123,grad_norm: 0.9056063349115592, iteration: 170749
loss: 1.0515379905700684,grad_norm: 0.9999992076272686, iteration: 170750
loss: 0.99615877866745,grad_norm: 0.9232771954083299, iteration: 170751
loss: 0.9983960390090942,grad_norm: 0.8821982080818219, iteration: 170752
loss: 0.9928385615348816,grad_norm: 0.8681738219515097, iteration: 170753
loss: 0.9900839924812317,grad_norm: 0.9999990784826048, iteration: 170754
loss: 0.9760847687721252,grad_norm: 0.9909981232048035, iteration: 170755
loss: 0.9882346391677856,grad_norm: 0.9999991883342525, iteration: 170756
loss: 1.0056536197662354,grad_norm: 0.9999990821236155, iteration: 170757
loss: 1.0169658660888672,grad_norm: 0.986812200719592, iteration: 170758
loss: 0.9806337356567383,grad_norm: 0.9119383729148003, iteration: 170759
loss: 1.0230106115341187,grad_norm: 0.9999990730747804, iteration: 170760
loss: 0.9890562295913696,grad_norm: 0.9485175701437119, iteration: 170761
loss: 1.0050510168075562,grad_norm: 0.8680370283011899, iteration: 170762
loss: 0.9843424558639526,grad_norm: 0.9999991594891974, iteration: 170763
loss: 0.9959215521812439,grad_norm: 0.958261670065296, iteration: 170764
loss: 0.9954916834831238,grad_norm: 0.8477109991663385, iteration: 170765
loss: 1.0147185325622559,grad_norm: 0.9704737976861644, iteration: 170766
loss: 0.974506676197052,grad_norm: 0.9999990177651127, iteration: 170767
loss: 0.9955469369888306,grad_norm: 0.9999990627420673, iteration: 170768
loss: 1.0518656969070435,grad_norm: 0.9999997187354881, iteration: 170769
loss: 0.9918724894523621,grad_norm: 0.9999992145818637, iteration: 170770
loss: 1.026291847229004,grad_norm: 0.9999990724558409, iteration: 170771
loss: 0.9856638312339783,grad_norm: 0.8506246423353965, iteration: 170772
loss: 1.0171735286712646,grad_norm: 0.9999991798654887, iteration: 170773
loss: 1.0257223844528198,grad_norm: 0.8967473118031615, iteration: 170774
loss: 1.0026534795761108,grad_norm: 0.999998991841943, iteration: 170775
loss: 1.0292302370071411,grad_norm: 0.9163698931317734, iteration: 170776
loss: 0.9792668223381042,grad_norm: 0.9999991111617088, iteration: 170777
loss: 0.967953622341156,grad_norm: 0.9999991238035583, iteration: 170778
loss: 1.0094801187515259,grad_norm: 0.9999990316041685, iteration: 170779
loss: 1.019320011138916,grad_norm: 0.9999990119924027, iteration: 170780
loss: 1.0130963325500488,grad_norm: 0.907689504970202, iteration: 170781
loss: 1.0273545980453491,grad_norm: 0.9999990934994278, iteration: 170782
loss: 1.0019748210906982,grad_norm: 0.9216861035288706, iteration: 170783
loss: 0.9950712323188782,grad_norm: 0.8845728447585619, iteration: 170784
loss: 0.9845603704452515,grad_norm: 0.9999990592000394, iteration: 170785
loss: 0.967546284198761,grad_norm: 0.907704494796308, iteration: 170786
loss: 0.9886046648025513,grad_norm: 0.8902932684688505, iteration: 170787
loss: 1.0081591606140137,grad_norm: 0.9999990794583309, iteration: 170788
loss: 1.008498191833496,grad_norm: 0.8614130292708483, iteration: 170789
loss: 1.005500078201294,grad_norm: 0.9110559718894141, iteration: 170790
loss: 1.0120820999145508,grad_norm: 0.9900146619291886, iteration: 170791
loss: 1.0069959163665771,grad_norm: 0.9999992040189327, iteration: 170792
loss: 1.007656216621399,grad_norm: 0.9999990380845775, iteration: 170793
loss: 1.046601414680481,grad_norm: 0.9999992928912472, iteration: 170794
loss: 0.9717113971710205,grad_norm: 0.9999991307150239, iteration: 170795
loss: 1.009122371673584,grad_norm: 0.9999989673548297, iteration: 170796
loss: 0.99786776304245,grad_norm: 0.9999989675488009, iteration: 170797
loss: 1.0298733711242676,grad_norm: 0.999999548645555, iteration: 170798
loss: 1.0604201555252075,grad_norm: 0.999999494705906, iteration: 170799
loss: 1.0052367448806763,grad_norm: 0.9885782829727779, iteration: 170800
loss: 1.0348328351974487,grad_norm: 1.0000000141382543, iteration: 170801
loss: 1.002054214477539,grad_norm: 0.9982464519970162, iteration: 170802
loss: 0.9722743630409241,grad_norm: 0.9999991196499727, iteration: 170803
loss: 1.0444912910461426,grad_norm: 0.7946319246455089, iteration: 170804
loss: 1.0168101787567139,grad_norm: 0.9999991416663347, iteration: 170805
loss: 0.9747720956802368,grad_norm: 0.8720290376319586, iteration: 170806
loss: 0.9791713356971741,grad_norm: 0.9202951170692071, iteration: 170807
loss: 0.9934573769569397,grad_norm: 0.9999992848284729, iteration: 170808
loss: 1.0002881288528442,grad_norm: 0.9999993914286898, iteration: 170809
loss: 0.99165940284729,grad_norm: 0.9999991721027545, iteration: 170810
loss: 0.990500271320343,grad_norm: 0.9999990345056367, iteration: 170811
loss: 1.0214512348175049,grad_norm: 0.9799777665049181, iteration: 170812
loss: 1.0685988664627075,grad_norm: 0.9307728158498437, iteration: 170813
loss: 1.021388053894043,grad_norm: 0.9999991988256212, iteration: 170814
loss: 1.0003128051757812,grad_norm: 0.8739873437699519, iteration: 170815
loss: 0.9962053894996643,grad_norm: 0.9999992002141628, iteration: 170816
loss: 1.0205594301223755,grad_norm: 0.8890188953307913, iteration: 170817
loss: 1.0128682851791382,grad_norm: 0.9325939266769874, iteration: 170818
loss: 0.9927086234092712,grad_norm: 0.914269005210268, iteration: 170819
loss: 0.9567088484764099,grad_norm: 0.9999991903153408, iteration: 170820
loss: 0.9810135364532471,grad_norm: 0.9999993295592366, iteration: 170821
loss: 0.9771047234535217,grad_norm: 0.999999029331288, iteration: 170822
loss: 0.9845971465110779,grad_norm: 0.8559839248268499, iteration: 170823
loss: 1.0019587278366089,grad_norm: 0.7676766185399636, iteration: 170824
loss: 0.9913387894630432,grad_norm: 0.9999991501799619, iteration: 170825
loss: 1.0356029272079468,grad_norm: 0.9999991447079075, iteration: 170826
loss: 0.9846150279045105,grad_norm: 0.9999992028129708, iteration: 170827
loss: 1.0553326606750488,grad_norm: 0.9999992657791423, iteration: 170828
loss: 0.9906623959541321,grad_norm: 0.9999993327959725, iteration: 170829
loss: 0.9983201026916504,grad_norm: 0.9999991061289031, iteration: 170830
loss: 0.9791366457939148,grad_norm: 0.9340501136658903, iteration: 170831
loss: 0.9845066070556641,grad_norm: 0.9817652471089493, iteration: 170832
loss: 0.9902271032333374,grad_norm: 0.9462700753160064, iteration: 170833
loss: 0.9759699702262878,grad_norm: 0.9999991222382727, iteration: 170834
loss: 0.9867989420890808,grad_norm: 0.9093780808427909, iteration: 170835
loss: 1.013503074645996,grad_norm: 0.9999992126702335, iteration: 170836
loss: 0.9906228184700012,grad_norm: 0.8236354377608991, iteration: 170837
loss: 0.9809980392456055,grad_norm: 0.8005037917411458, iteration: 170838
loss: 1.0154826641082764,grad_norm: 0.9999989996645563, iteration: 170839
loss: 0.9843885898590088,grad_norm: 0.909192968656344, iteration: 170840
loss: 1.0213985443115234,grad_norm: 0.920700657981693, iteration: 170841
loss: 1.1203397512435913,grad_norm: 0.9999996441335725, iteration: 170842
loss: 0.9872795939445496,grad_norm: 0.8856729631924691, iteration: 170843
loss: 0.9784248471260071,grad_norm: 0.9999991025291249, iteration: 170844
loss: 1.0023524761199951,grad_norm: 0.9199559409377973, iteration: 170845
loss: 1.006914734840393,grad_norm: 0.999999374360102, iteration: 170846
loss: 1.0010122060775757,grad_norm: 0.9999989986586901, iteration: 170847
loss: 0.9996493458747864,grad_norm: 0.9999990415972921, iteration: 170848
loss: 1.0064740180969238,grad_norm: 0.9914808942495181, iteration: 170849
loss: 1.0080362558364868,grad_norm: 0.9713003830309961, iteration: 170850
loss: 0.988734781742096,grad_norm: 0.9999990193698859, iteration: 170851
loss: 0.991973876953125,grad_norm: 0.9418087462284469, iteration: 170852
loss: 0.9952397346496582,grad_norm: 0.9999990467308815, iteration: 170853
loss: 1.0250440835952759,grad_norm: 0.9999991478783263, iteration: 170854
loss: 1.0072970390319824,grad_norm: 0.9999993302182139, iteration: 170855
loss: 1.0248279571533203,grad_norm: 0.9999996515191, iteration: 170856
loss: 1.0152416229248047,grad_norm: 0.9999991821269488, iteration: 170857
loss: 1.0100319385528564,grad_norm: 0.9544609709272933, iteration: 170858
loss: 0.9978502988815308,grad_norm: 0.9999996268442075, iteration: 170859
loss: 1.0158209800720215,grad_norm: 0.9999990027414016, iteration: 170860
loss: 1.0822759866714478,grad_norm: 0.999999405766393, iteration: 170861
loss: 0.9471369981765747,grad_norm: 0.914515339296425, iteration: 170862
loss: 0.9856546521186829,grad_norm: 0.9083473208973742, iteration: 170863
loss: 0.9868384003639221,grad_norm: 0.99999910520073, iteration: 170864
loss: 0.9974123239517212,grad_norm: 0.9999989516180715, iteration: 170865
loss: 1.0503469705581665,grad_norm: 0.9999991375117818, iteration: 170866
loss: 1.0375332832336426,grad_norm: 0.9999990707389305, iteration: 170867
loss: 0.9956321120262146,grad_norm: 0.767933213613345, iteration: 170868
loss: 0.9942834377288818,grad_norm: 0.8319631666420306, iteration: 170869
loss: 0.9876301884651184,grad_norm: 0.9999991244479915, iteration: 170870
loss: 1.0272154808044434,grad_norm: 0.9999990955946937, iteration: 170871
loss: 0.9668699502944946,grad_norm: 0.9999992043153132, iteration: 170872
loss: 0.9680799245834351,grad_norm: 0.9999991302399756, iteration: 170873
loss: 1.0197776556015015,grad_norm: 0.9288664068563809, iteration: 170874
loss: 1.0183732509613037,grad_norm: 0.9939666306199464, iteration: 170875
loss: 0.9836148619651794,grad_norm: 0.9999990831719787, iteration: 170876
loss: 0.9970822930335999,grad_norm: 0.8252497451897212, iteration: 170877
loss: 0.9957090020179749,grad_norm: 0.9999989983973679, iteration: 170878
loss: 0.9599969983100891,grad_norm: 0.9999989714092407, iteration: 170879
loss: 0.9678040146827698,grad_norm: 0.9907762871489563, iteration: 170880
loss: 0.9878622889518738,grad_norm: 0.9999993136737398, iteration: 170881
loss: 1.003896951675415,grad_norm: 0.9999991397305753, iteration: 170882
loss: 1.024788737297058,grad_norm: 0.9999992541575355, iteration: 170883
loss: 1.0111957788467407,grad_norm: 0.9999991916078121, iteration: 170884
loss: 1.018271565437317,grad_norm: 0.9999992054890229, iteration: 170885
loss: 0.9746769070625305,grad_norm: 0.9926820258611231, iteration: 170886
loss: 1.0113590955734253,grad_norm: 0.9805702861303558, iteration: 170887
loss: 1.0559154748916626,grad_norm: 0.9999991672451022, iteration: 170888
loss: 0.9690639972686768,grad_norm: 0.9999991274942474, iteration: 170889
loss: 0.9611406922340393,grad_norm: 0.9999992530621414, iteration: 170890
loss: 1.001238465309143,grad_norm: 0.9617571850109905, iteration: 170891
loss: 0.9861826300621033,grad_norm: 0.999999186782182, iteration: 170892
loss: 1.0091005563735962,grad_norm: 0.9573302397673303, iteration: 170893
loss: 1.016107439994812,grad_norm: 0.999999151704036, iteration: 170894
loss: 1.0399516820907593,grad_norm: 0.9999990346764634, iteration: 170895
loss: 1.001122236251831,grad_norm: 0.941746956383559, iteration: 170896
loss: 1.030390739440918,grad_norm: 0.9655513721412815, iteration: 170897
loss: 1.0181026458740234,grad_norm: 0.9999992331514422, iteration: 170898
loss: 0.9971458315849304,grad_norm: 0.9999991306710797, iteration: 170899
loss: 0.9819471836090088,grad_norm: 0.9858688853982597, iteration: 170900
loss: 0.942230761051178,grad_norm: 0.9999991062314234, iteration: 170901
loss: 1.0359281301498413,grad_norm: 0.905500663064484, iteration: 170902
loss: 0.9566640853881836,grad_norm: 0.9171721123810027, iteration: 170903
loss: 1.0044329166412354,grad_norm: 0.8969428343430133, iteration: 170904
loss: 1.013048529624939,grad_norm: 0.9999990483085104, iteration: 170905
loss: 1.0086778402328491,grad_norm: 0.982916651117667, iteration: 170906
loss: 0.9623393416404724,grad_norm: 0.9999991974753902, iteration: 170907
loss: 0.9642444252967834,grad_norm: 0.9723543791292745, iteration: 170908
loss: 1.0025889873504639,grad_norm: 0.8837655551480494, iteration: 170909
loss: 1.0238595008850098,grad_norm: 0.9814804541275406, iteration: 170910
loss: 0.9909806251525879,grad_norm: 0.9999991852910475, iteration: 170911
loss: 1.000899076461792,grad_norm: 0.9999991571438176, iteration: 170912
loss: 0.9521899819374084,grad_norm: 0.8569120241333911, iteration: 170913
loss: 0.9527495503425598,grad_norm: 0.9922735408061772, iteration: 170914
loss: 0.9982250332832336,grad_norm: 0.8551001793249081, iteration: 170915
loss: 1.0161259174346924,grad_norm: 0.843545705792299, iteration: 170916
loss: 0.9864814281463623,grad_norm: 0.9388239603504466, iteration: 170917
loss: 1.0018452405929565,grad_norm: 0.9999991629460683, iteration: 170918
loss: 0.9796349406242371,grad_norm: 0.9149650864094644, iteration: 170919
loss: 0.994199812412262,grad_norm: 0.9999991338253529, iteration: 170920
loss: 1.0778924226760864,grad_norm: 0.999999801188739, iteration: 170921
loss: 0.9914220571517944,grad_norm: 0.9999990330528437, iteration: 170922
loss: 1.007863998413086,grad_norm: 0.9642019344181495, iteration: 170923
loss: 1.003572940826416,grad_norm: 0.9999990436730789, iteration: 170924
loss: 0.9999408721923828,grad_norm: 0.9999992212757888, iteration: 170925
loss: 1.0185801982879639,grad_norm: 0.9659302629655372, iteration: 170926
loss: 1.0122488737106323,grad_norm: 0.9673753377762496, iteration: 170927
loss: 0.9552786946296692,grad_norm: 0.9999990346276773, iteration: 170928
loss: 0.9589688181877136,grad_norm: 0.9814348490365368, iteration: 170929
loss: 0.9912098050117493,grad_norm: 0.9149446088069592, iteration: 170930
loss: 1.0279580354690552,grad_norm: 0.9999995837359323, iteration: 170931
loss: 1.0138989686965942,grad_norm: 0.9999992263965296, iteration: 170932
loss: 0.9862597584724426,grad_norm: 0.9854180930252154, iteration: 170933
loss: 1.015250563621521,grad_norm: 0.9999991556380305, iteration: 170934
loss: 1.001889705657959,grad_norm: 0.9999991676619627, iteration: 170935
loss: 1.0222846269607544,grad_norm: 0.9036659885125271, iteration: 170936
loss: 0.9741976857185364,grad_norm: 0.9743313816137911, iteration: 170937
loss: 0.978752851486206,grad_norm: 0.7935724818351455, iteration: 170938
loss: 1.0046453475952148,grad_norm: 0.9999991243066102, iteration: 170939
loss: 1.0128039121627808,grad_norm: 0.8915301147246977, iteration: 170940
loss: 0.9905987977981567,grad_norm: 0.9999990554105009, iteration: 170941
loss: 0.9978677034378052,grad_norm: 0.9999993252212951, iteration: 170942
loss: 0.9737826585769653,grad_norm: 0.9999989938129149, iteration: 170943
loss: 1.0066251754760742,grad_norm: 0.9401191544332952, iteration: 170944
loss: 1.0845478773117065,grad_norm: 0.9999993945376946, iteration: 170945
loss: 0.9804657101631165,grad_norm: 0.9999991779606882, iteration: 170946
loss: 0.9854437112808228,grad_norm: 0.9552663822588648, iteration: 170947
loss: 0.9973787069320679,grad_norm: 0.9278803425357752, iteration: 170948
loss: 0.9837794899940491,grad_norm: 0.9999990919562963, iteration: 170949
loss: 0.9882665276527405,grad_norm: 0.9999991460454288, iteration: 170950
loss: 1.0291920900344849,grad_norm: 0.9261628410426483, iteration: 170951
loss: 1.0168167352676392,grad_norm: 0.9999989741625006, iteration: 170952
loss: 0.9637131690979004,grad_norm: 0.917285176069791, iteration: 170953
loss: 0.9994449615478516,grad_norm: 0.8517296608495337, iteration: 170954
loss: 1.0117175579071045,grad_norm: 0.9771708392833279, iteration: 170955
loss: 1.0248507261276245,grad_norm: 0.9999992426313897, iteration: 170956
loss: 0.928231418132782,grad_norm: 0.999999164565581, iteration: 170957
loss: 1.1148484945297241,grad_norm: 0.9999996582426306, iteration: 170958
loss: 0.9504008889198303,grad_norm: 0.9999991540327762, iteration: 170959
loss: 1.0356333255767822,grad_norm: 0.9008408444767024, iteration: 170960
loss: 0.9863152503967285,grad_norm: 0.9999991328130554, iteration: 170961
loss: 0.985752284526825,grad_norm: 0.9661848760748984, iteration: 170962
loss: 0.9913141131401062,grad_norm: 0.9999990364068507, iteration: 170963
loss: 1.0575205087661743,grad_norm: 0.9999991127203424, iteration: 170964
loss: 0.9717662930488586,grad_norm: 0.9581795393564368, iteration: 170965
loss: 1.119174838066101,grad_norm: 0.9999994286031099, iteration: 170966
loss: 0.9909601807594299,grad_norm: 0.8547944668030151, iteration: 170967
loss: 0.9932635426521301,grad_norm: 0.972087417620905, iteration: 170968
loss: 0.9902129173278809,grad_norm: 0.8855854907209061, iteration: 170969
loss: 1.0030159950256348,grad_norm: 0.9999992529495175, iteration: 170970
loss: 0.9927713871002197,grad_norm: 0.9999993106503517, iteration: 170971
loss: 1.0302504301071167,grad_norm: 0.9999993086182652, iteration: 170972
loss: 1.020169973373413,grad_norm: 0.9299703437429755, iteration: 170973
loss: 1.0035181045532227,grad_norm: 0.9999991420574437, iteration: 170974
loss: 0.9560739398002625,grad_norm: 0.9999989852342372, iteration: 170975
loss: 1.002312183380127,grad_norm: 0.9999989018452382, iteration: 170976
loss: 0.9863995909690857,grad_norm: 0.9999991522251589, iteration: 170977
loss: 0.9893738031387329,grad_norm: 0.999999059922453, iteration: 170978
loss: 1.0032986402511597,grad_norm: 0.8774303677462884, iteration: 170979
loss: 1.0139161348342896,grad_norm: 0.9784915558667778, iteration: 170980
loss: 0.9873936176300049,grad_norm: 0.9086852838860792, iteration: 170981
loss: 0.9663556814193726,grad_norm: 0.999999024718901, iteration: 170982
loss: 0.9735791683197021,grad_norm: 0.9863766929119845, iteration: 170983
loss: 1.0312690734863281,grad_norm: 0.8630985222100391, iteration: 170984
loss: 0.9951478838920593,grad_norm: 0.9974882146440486, iteration: 170985
loss: 1.0337809324264526,grad_norm: 0.9999990129556833, iteration: 170986
loss: 1.0435904264450073,grad_norm: 0.9151309419740398, iteration: 170987
loss: 0.9769137501716614,grad_norm: 0.8811091455201406, iteration: 170988
loss: 1.0187128782272339,grad_norm: 0.9605958526940058, iteration: 170989
loss: 1.0399558544158936,grad_norm: 0.9999991529841559, iteration: 170990
loss: 1.0025269985198975,grad_norm: 0.9999991044893389, iteration: 170991
loss: 0.9836616516113281,grad_norm: 0.9519812256602658, iteration: 170992
loss: 0.9891367554664612,grad_norm: 0.9999991423543582, iteration: 170993
loss: 1.0833762884140015,grad_norm: 0.9999993596185384, iteration: 170994
loss: 1.043184518814087,grad_norm: 0.999999068440326, iteration: 170995
loss: 1.1001534461975098,grad_norm: 0.9999991712284723, iteration: 170996
loss: 1.0277979373931885,grad_norm: 0.9746751255522247, iteration: 170997
loss: 0.9890224933624268,grad_norm: 0.9999989213232173, iteration: 170998
loss: 1.009097695350647,grad_norm: 0.9999991101366197, iteration: 170999
loss: 1.0108445882797241,grad_norm: 0.9999990800112131, iteration: 171000
loss: 0.9659166932106018,grad_norm: 0.9278408538327386, iteration: 171001
loss: 1.0339127779006958,grad_norm: 0.999999472558123, iteration: 171002
loss: 1.00029456615448,grad_norm: 0.9286291065130909, iteration: 171003
loss: 1.0139436721801758,grad_norm: 0.9999990502722804, iteration: 171004
loss: 1.0519112348556519,grad_norm: 0.9999990888940505, iteration: 171005
loss: 1.0322999954223633,grad_norm: 0.9999992796294024, iteration: 171006
loss: 1.0186392068862915,grad_norm: 0.9658910892843225, iteration: 171007
loss: 1.0013132095336914,grad_norm: 0.9999991900996055, iteration: 171008
loss: 0.994278609752655,grad_norm: 0.9999991347914896, iteration: 171009
loss: 0.9824714660644531,grad_norm: 0.9999990365368723, iteration: 171010
loss: 0.9848918914794922,grad_norm: 0.8607899731879173, iteration: 171011
loss: 0.9736124277114868,grad_norm: 0.9999991150267171, iteration: 171012
loss: 1.0067822933197021,grad_norm: 0.8628976893870365, iteration: 171013
loss: 1.0243785381317139,grad_norm: 0.9999990770855685, iteration: 171014
loss: 1.0492842197418213,grad_norm: 0.9972955916920054, iteration: 171015
loss: 1.0142159461975098,grad_norm: 0.9052742589564402, iteration: 171016
loss: 1.0017578601837158,grad_norm: 0.9922716823578008, iteration: 171017
loss: 1.0169658660888672,grad_norm: 0.8769939874354536, iteration: 171018
loss: 0.9826856255531311,grad_norm: 0.9205202460924108, iteration: 171019
loss: 0.96571946144104,grad_norm: 0.9999991421546972, iteration: 171020
loss: 1.0056058168411255,grad_norm: 0.9999990209829341, iteration: 171021
loss: 0.9565509557723999,grad_norm: 0.9999990137656254, iteration: 171022
loss: 0.9962353706359863,grad_norm: 0.9139085423096556, iteration: 171023
loss: 1.0054304599761963,grad_norm: 0.9999991667098238, iteration: 171024
loss: 1.0396648645401,grad_norm: 0.9999998790918492, iteration: 171025
loss: 1.007431149482727,grad_norm: 0.9999991029884873, iteration: 171026
loss: 0.9980604648590088,grad_norm: 0.9999989690386164, iteration: 171027
loss: 1.0045902729034424,grad_norm: 0.9999991503967535, iteration: 171028
loss: 1.0012600421905518,grad_norm: 0.8212913210911803, iteration: 171029
loss: 1.017332911491394,grad_norm: 0.9378824395653567, iteration: 171030
loss: 0.9737346172332764,grad_norm: 0.9999989383954976, iteration: 171031
loss: 0.9833754897117615,grad_norm: 0.9999989705225057, iteration: 171032
loss: 1.022044062614441,grad_norm: 0.999999300467938, iteration: 171033
loss: 1.0142513513565063,grad_norm: 0.8777374899474694, iteration: 171034
loss: 0.9436724781990051,grad_norm: 0.9999990938816535, iteration: 171035
loss: 0.9883298277854919,grad_norm: 0.9999993122711984, iteration: 171036
loss: 0.9854042530059814,grad_norm: 0.8778189672714501, iteration: 171037
loss: 1.007523775100708,grad_norm: 0.9999990725289144, iteration: 171038
loss: 0.9982607364654541,grad_norm: 0.9802856430664634, iteration: 171039
loss: 1.023702621459961,grad_norm: 0.9999991229766557, iteration: 171040
loss: 1.0213426351547241,grad_norm: 0.9999992387088097, iteration: 171041
loss: 0.9938489198684692,grad_norm: 0.9999990672414109, iteration: 171042
loss: 0.975490927696228,grad_norm: 0.9805857524247664, iteration: 171043
loss: 0.9898308515548706,grad_norm: 0.9999990105809001, iteration: 171044
loss: 0.977290153503418,grad_norm: 0.9999991562719539, iteration: 171045
loss: 0.949360191822052,grad_norm: 0.9242296980136301, iteration: 171046
loss: 0.999112606048584,grad_norm: 0.999999317128555, iteration: 171047
loss: 0.9805743098258972,grad_norm: 0.9999991071231215, iteration: 171048
loss: 1.0257291793823242,grad_norm: 0.9999995068080919, iteration: 171049
loss: 1.000186562538147,grad_norm: 0.9999990501460377, iteration: 171050
loss: 1.0046610832214355,grad_norm: 0.9999990074429126, iteration: 171051
loss: 1.0008840560913086,grad_norm: 0.9999990435418199, iteration: 171052
loss: 0.9519370794296265,grad_norm: 0.9205558121008204, iteration: 171053
loss: 0.9602851867675781,grad_norm: 0.9577435318469508, iteration: 171054
loss: 1.0278936624526978,grad_norm: 0.9854453240364475, iteration: 171055
loss: 1.0001649856567383,grad_norm: 0.9336902832702815, iteration: 171056
loss: 1.1720613241195679,grad_norm: 0.9999990770550303, iteration: 171057
loss: 1.0013498067855835,grad_norm: 0.9999991775245072, iteration: 171058
loss: 1.0133018493652344,grad_norm: 0.999999150425533, iteration: 171059
loss: 1.011643648147583,grad_norm: 0.9938484104016704, iteration: 171060
loss: 1.0087659358978271,grad_norm: 0.9999989790713427, iteration: 171061
loss: 0.9979634881019592,grad_norm: 0.9999991496537068, iteration: 171062
loss: 1.0059386491775513,grad_norm: 0.999999172235204, iteration: 171063
loss: 1.0043858289718628,grad_norm: 0.7837159937612755, iteration: 171064
loss: 1.0025967359542847,grad_norm: 0.9999992348128008, iteration: 171065
loss: 0.9676316380500793,grad_norm: 0.962444567147434, iteration: 171066
loss: 1.0227404832839966,grad_norm: 0.999999025081445, iteration: 171067
loss: 1.0071711540222168,grad_norm: 0.9920448969161383, iteration: 171068
loss: 1.0077582597732544,grad_norm: 0.8301753875534422, iteration: 171069
loss: 1.019640326499939,grad_norm: 0.9999994560212152, iteration: 171070
loss: 1.0125163793563843,grad_norm: 0.9329397547310887, iteration: 171071
loss: 0.9552760720252991,grad_norm: 0.9999991259000792, iteration: 171072
loss: 0.9706867933273315,grad_norm: 0.9038084557124995, iteration: 171073
loss: 0.9961790442466736,grad_norm: 0.9573122958769912, iteration: 171074
loss: 0.941540002822876,grad_norm: 0.9999991483774274, iteration: 171075
loss: 0.990834653377533,grad_norm: 0.9515260721671417, iteration: 171076
loss: 1.0017714500427246,grad_norm: 0.9999990060930333, iteration: 171077
loss: 1.0171220302581787,grad_norm: 0.9999990355451758, iteration: 171078
loss: 1.003279685974121,grad_norm: 0.9356883476423057, iteration: 171079
loss: 0.9884825348854065,grad_norm: 0.8544383202035286, iteration: 171080
loss: 0.9971383810043335,grad_norm: 0.9999991353382947, iteration: 171081
loss: 0.993563175201416,grad_norm: 0.9999999022740643, iteration: 171082
loss: 1.0050278902053833,grad_norm: 0.9999991320553538, iteration: 171083
loss: 0.9860767722129822,grad_norm: 0.9882536769059543, iteration: 171084
loss: 1.0061936378479004,grad_norm: 0.999999319224837, iteration: 171085
loss: 0.9888163208961487,grad_norm: 0.9999988997438878, iteration: 171086
loss: 0.954464316368103,grad_norm: 0.953068239287653, iteration: 171087
loss: 1.0061135292053223,grad_norm: 0.9999991057655303, iteration: 171088
loss: 0.9612791538238525,grad_norm: 0.999999259515331, iteration: 171089
loss: 1.2301735877990723,grad_norm: 0.9999996398529349, iteration: 171090
loss: 0.9801672101020813,grad_norm: 0.9999992871458266, iteration: 171091
loss: 0.9965413212776184,grad_norm: 0.9110565168342698, iteration: 171092
loss: 0.9939775466918945,grad_norm: 0.8756968882663895, iteration: 171093
loss: 1.0212123394012451,grad_norm: 0.9999991658728408, iteration: 171094
loss: 1.0196454524993896,grad_norm: 0.950681888809244, iteration: 171095
loss: 0.9598428010940552,grad_norm: 0.9208046157695589, iteration: 171096
loss: 1.0512248277664185,grad_norm: 0.9035195263731869, iteration: 171097
loss: 0.9914974570274353,grad_norm: 0.9343200857039407, iteration: 171098
loss: 1.0138649940490723,grad_norm: 0.8643167212893694, iteration: 171099
loss: 0.9486676454544067,grad_norm: 0.9999991221901504, iteration: 171100
loss: 1.0065944194793701,grad_norm: 0.999999127864961, iteration: 171101
loss: 0.9648678302764893,grad_norm: 0.9960140827595877, iteration: 171102
loss: 1.032503604888916,grad_norm: 0.9178639116484949, iteration: 171103
loss: 0.9995025396347046,grad_norm: 0.9999992440305291, iteration: 171104
loss: 0.9984836578369141,grad_norm: 0.9999991603843408, iteration: 171105
loss: 1.0227386951446533,grad_norm: 0.9999991288225442, iteration: 171106
loss: 1.0050216913223267,grad_norm: 0.9481735588079431, iteration: 171107
loss: 1.0120320320129395,grad_norm: 0.8965214550241059, iteration: 171108
loss: 0.9815750122070312,grad_norm: 0.9767285668447945, iteration: 171109
loss: 0.9929123520851135,grad_norm: 0.9999998666455402, iteration: 171110
loss: 0.9913378357887268,grad_norm: 0.9999991530467999, iteration: 171111
loss: 1.0427063703536987,grad_norm: 0.9999992288222249, iteration: 171112
loss: 0.9772757887840271,grad_norm: 0.9999992036623829, iteration: 171113
loss: 1.0308388471603394,grad_norm: 0.9999993633862163, iteration: 171114
loss: 1.001471757888794,grad_norm: 0.9999990245801724, iteration: 171115
loss: 1.3874242305755615,grad_norm: 0.9999995370929308, iteration: 171116
loss: 0.9834668636322021,grad_norm: 0.9999992062508284, iteration: 171117
loss: 1.0447609424591064,grad_norm: 0.8940655294361183, iteration: 171118
loss: 0.9764878749847412,grad_norm: 0.9999991873594618, iteration: 171119
loss: 0.9729418158531189,grad_norm: 0.999998990059133, iteration: 171120
loss: 1.011191487312317,grad_norm: 0.993009122929027, iteration: 171121
loss: 0.99848872423172,grad_norm: 0.9999991531081718, iteration: 171122
loss: 0.950864315032959,grad_norm: 0.9999991241940203, iteration: 171123
loss: 0.9993850588798523,grad_norm: 0.9045877198309173, iteration: 171124
loss: 1.0391182899475098,grad_norm: 0.9999989950785179, iteration: 171125
loss: 0.9824267029762268,grad_norm: 0.912925545144888, iteration: 171126
loss: 1.0313454866409302,grad_norm: 0.9999990770427468, iteration: 171127
loss: 1.0094860792160034,grad_norm: 0.9999992308771425, iteration: 171128
loss: 0.9993389248847961,grad_norm: 0.9999991433171931, iteration: 171129
loss: 1.00038480758667,grad_norm: 0.9646297720322778, iteration: 171130
loss: 0.9910564422607422,grad_norm: 0.9808514719754748, iteration: 171131
loss: 0.9837612509727478,grad_norm: 0.946690465135261, iteration: 171132
loss: 0.9904069304466248,grad_norm: 0.9999988970340183, iteration: 171133
loss: 0.9957632422447205,grad_norm: 0.8512709104846327, iteration: 171134
loss: 0.9811937212944031,grad_norm: 0.9167213021339936, iteration: 171135
loss: 1.0387080907821655,grad_norm: 0.9999990594556706, iteration: 171136
loss: 1.0306652784347534,grad_norm: 0.9999992763036001, iteration: 171137
loss: 0.9914061427116394,grad_norm: 0.9999991103718198, iteration: 171138
loss: 1.0649478435516357,grad_norm: 0.9999997445349293, iteration: 171139
loss: 1.003106951713562,grad_norm: 0.9999992002192466, iteration: 171140
loss: 0.9801283478736877,grad_norm: 0.9922696980629658, iteration: 171141
loss: 0.9802417755126953,grad_norm: 0.9999991132511271, iteration: 171142
loss: 0.9731020927429199,grad_norm: 0.9999990523284582, iteration: 171143
loss: 1.0020935535430908,grad_norm: 0.9999990700560214, iteration: 171144
loss: 1.040579080581665,grad_norm: 0.9999992306562827, iteration: 171145
loss: 1.0043152570724487,grad_norm: 0.9999991865231767, iteration: 171146
loss: 0.9865719676017761,grad_norm: 0.9999991184524972, iteration: 171147
loss: 1.0118083953857422,grad_norm: 0.9999990544887511, iteration: 171148
loss: 1.0194740295410156,grad_norm: 0.9750064961976673, iteration: 171149
loss: 1.0068414211273193,grad_norm: 0.941928211759549, iteration: 171150
loss: 0.9986551403999329,grad_norm: 0.9999990941242458, iteration: 171151
loss: 1.0520942211151123,grad_norm: 0.9814294342572553, iteration: 171152
loss: 0.9907931089401245,grad_norm: 0.9197480191590283, iteration: 171153
loss: 0.9619461894035339,grad_norm: 0.9999991943797415, iteration: 171154
loss: 0.9587736129760742,grad_norm: 0.9999991928462243, iteration: 171155
loss: 0.987272322177887,grad_norm: 0.9999991686193003, iteration: 171156
loss: 0.9802401661872864,grad_norm: 0.8019196293740276, iteration: 171157
loss: 1.0095933675765991,grad_norm: 0.9999998865118362, iteration: 171158
loss: 0.9787074327468872,grad_norm: 0.8896799014178985, iteration: 171159
loss: 0.9893421530723572,grad_norm: 0.9999991215077341, iteration: 171160
loss: 1.0378845930099487,grad_norm: 0.9999991479908276, iteration: 171161
loss: 1.0478404760360718,grad_norm: 0.999999031015297, iteration: 171162
loss: 0.9954395294189453,grad_norm: 0.9999990918254298, iteration: 171163
loss: 1.0035613775253296,grad_norm: 0.9136435861226264, iteration: 171164
loss: 1.0012218952178955,grad_norm: 0.847553753469784, iteration: 171165
loss: 1.0747737884521484,grad_norm: 0.9064358359016209, iteration: 171166
loss: 0.9988841414451599,grad_norm: 0.8778187135328559, iteration: 171167
loss: 1.0137298107147217,grad_norm: 0.9999995038410576, iteration: 171168
loss: 1.001692295074463,grad_norm: 0.9973241890891654, iteration: 171169
loss: 0.9935244917869568,grad_norm: 0.9999990787289662, iteration: 171170
loss: 1.0227288007736206,grad_norm: 0.9886837307784361, iteration: 171171
loss: 1.01785147190094,grad_norm: 0.9999995408976822, iteration: 171172
loss: 0.9950225949287415,grad_norm: 0.9839957251311563, iteration: 171173
loss: 1.0065135955810547,grad_norm: 0.9099659252542147, iteration: 171174
loss: 1.0195577144622803,grad_norm: 0.9999992314455893, iteration: 171175
loss: 1.0474389791488647,grad_norm: 0.9999991085470278, iteration: 171176
loss: 1.0731252431869507,grad_norm: 0.9999991499818822, iteration: 171177
loss: 1.0287342071533203,grad_norm: 0.9999991262093915, iteration: 171178
loss: 1.0659160614013672,grad_norm: 0.9999995091283983, iteration: 171179
loss: 0.9873611927032471,grad_norm: 0.9999994618196058, iteration: 171180
loss: 1.0383719205856323,grad_norm: 0.9999991287999067, iteration: 171181
loss: 1.0220890045166016,grad_norm: 0.928707324480007, iteration: 171182
loss: 0.9962145090103149,grad_norm: 0.9999989853880716, iteration: 171183
loss: 0.9992685317993164,grad_norm: 0.9999991063506223, iteration: 171184
loss: 0.9878235459327698,grad_norm: 0.9983606677408815, iteration: 171185
loss: 0.9709511995315552,grad_norm: 0.9601397077476925, iteration: 171186
loss: 0.9985625147819519,grad_norm: 0.964211700692451, iteration: 171187
loss: 1.057029366493225,grad_norm: 0.9999991819125027, iteration: 171188
loss: 1.0003925561904907,grad_norm: 0.9515989095080029, iteration: 171189
loss: 0.9784004092216492,grad_norm: 0.9999990938050318, iteration: 171190
loss: 1.0397660732269287,grad_norm: 0.9999988795911269, iteration: 171191
loss: 0.9837115406990051,grad_norm: 0.9999990789350197, iteration: 171192
loss: 1.0441843271255493,grad_norm: 0.9999990728062937, iteration: 171193
loss: 1.0115976333618164,grad_norm: 0.999999218530036, iteration: 171194
loss: 1.0259802341461182,grad_norm: 0.9999993485209998, iteration: 171195
loss: 0.9953479170799255,grad_norm: 0.8608079717857042, iteration: 171196
loss: 0.9649620652198792,grad_norm: 0.999999071637378, iteration: 171197
loss: 1.0243597030639648,grad_norm: 0.9999998977688024, iteration: 171198
loss: 1.0097270011901855,grad_norm: 0.999999181939608, iteration: 171199
loss: 1.0118651390075684,grad_norm: 0.9999992134227641, iteration: 171200
loss: 0.9793675541877747,grad_norm: 0.9999991782432307, iteration: 171201
loss: 0.9764339327812195,grad_norm: 0.9999990521203286, iteration: 171202
loss: 0.9886958599090576,grad_norm: 0.8856465731291723, iteration: 171203
loss: 1.0227042436599731,grad_norm: 0.8969503842427958, iteration: 171204
loss: 0.99977046251297,grad_norm: 0.9999990061731053, iteration: 171205
loss: 1.0097215175628662,grad_norm: 0.9999991230607332, iteration: 171206
loss: 0.998126208782196,grad_norm: 0.9999991398642866, iteration: 171207
loss: 1.0191909074783325,grad_norm: 0.977615782812959, iteration: 171208
loss: 1.034887671470642,grad_norm: 0.9999994464247081, iteration: 171209
loss: 1.0331618785858154,grad_norm: 0.8974653795815353, iteration: 171210
loss: 0.9895175099372864,grad_norm: 0.9999991496613191, iteration: 171211
loss: 1.0022298097610474,grad_norm: 0.9994704321169711, iteration: 171212
loss: 0.9744598865509033,grad_norm: 0.9999992194933679, iteration: 171213
loss: 1.0177057981491089,grad_norm: 0.9999991345379344, iteration: 171214
loss: 1.0419620275497437,grad_norm: 0.9999992929080654, iteration: 171215
loss: 1.003882884979248,grad_norm: 0.9959734509371228, iteration: 171216
loss: 1.0536177158355713,grad_norm: 0.937887145385845, iteration: 171217
loss: 1.0178269147872925,grad_norm: 0.9999990224045527, iteration: 171218
loss: 0.9906002283096313,grad_norm: 0.9593208514920077, iteration: 171219
loss: 1.0080478191375732,grad_norm: 0.9242068380543114, iteration: 171220
loss: 0.9868014454841614,grad_norm: 0.8555692054021491, iteration: 171221
loss: 1.0089503526687622,grad_norm: 0.8594759159926173, iteration: 171222
loss: 1.0083770751953125,grad_norm: 0.9999994711992455, iteration: 171223
loss: 1.0059199333190918,grad_norm: 0.982705310680475, iteration: 171224
loss: 1.026908278465271,grad_norm: 0.999999113800818, iteration: 171225
loss: 0.9811126589775085,grad_norm: 0.8924534784833182, iteration: 171226
loss: 0.9701361060142517,grad_norm: 0.8420206146721719, iteration: 171227
loss: 0.996576189994812,grad_norm: 0.99999915785548, iteration: 171228
loss: 0.9907009601593018,grad_norm: 0.8200332502540685, iteration: 171229
loss: 1.0336564779281616,grad_norm: 0.9999992098606639, iteration: 171230
loss: 0.9960876703262329,grad_norm: 0.9999990902300377, iteration: 171231
loss: 0.9969680905342102,grad_norm: 0.9873365292152314, iteration: 171232
loss: 1.0128436088562012,grad_norm: 0.9452749315739665, iteration: 171233
loss: 0.9950848817825317,grad_norm: 0.9999989872998758, iteration: 171234
loss: 0.9488734006881714,grad_norm: 0.9049346379426353, iteration: 171235
loss: 1.0434141159057617,grad_norm: 0.9999990607050248, iteration: 171236
loss: 0.980880856513977,grad_norm: 0.9999992569458739, iteration: 171237
loss: 0.988843560218811,grad_norm: 0.9999990626475296, iteration: 171238
loss: 0.9906204342842102,grad_norm: 0.954228158743244, iteration: 171239
loss: 1.0329912900924683,grad_norm: 0.9395924674576095, iteration: 171240
loss: 1.0172431468963623,grad_norm: 0.9999991109251365, iteration: 171241
loss: 0.9670261144638062,grad_norm: 0.9769903066134951, iteration: 171242
loss: 0.9789577126502991,grad_norm: 0.999999274188352, iteration: 171243
loss: 0.99413001537323,grad_norm: 0.9999989679164412, iteration: 171244
loss: 1.0065633058547974,grad_norm: 0.9014603043979393, iteration: 171245
loss: 1.0223153829574585,grad_norm: 0.9999990583892248, iteration: 171246
loss: 0.9846659302711487,grad_norm: 0.9999990199323583, iteration: 171247
loss: 0.9945738315582275,grad_norm: 0.9999989711652685, iteration: 171248
loss: 0.9769200086593628,grad_norm: 0.9999992455227661, iteration: 171249
loss: 0.9774872660636902,grad_norm: 0.8457144482938955, iteration: 171250
loss: 1.0040030479431152,grad_norm: 0.9999999175170343, iteration: 171251
loss: 0.9774907231330872,grad_norm: 0.9999991170420062, iteration: 171252
loss: 1.0725845098495483,grad_norm: 0.9999995157067091, iteration: 171253
loss: 1.0175925493240356,grad_norm: 0.9999991042468107, iteration: 171254
loss: 0.9901766180992126,grad_norm: 0.999999251926216, iteration: 171255
loss: 1.01275634765625,grad_norm: 0.9999991088396158, iteration: 171256
loss: 0.9913123846054077,grad_norm: 0.9182410714304285, iteration: 171257
loss: 0.9793013334274292,grad_norm: 0.999999100758887, iteration: 171258
loss: 0.9703782796859741,grad_norm: 0.9999991065067727, iteration: 171259
loss: 0.9837053418159485,grad_norm: 0.9999991999109263, iteration: 171260
loss: 1.0123095512390137,grad_norm: 0.8042272746325133, iteration: 171261
loss: 0.9563806056976318,grad_norm: 0.9215397628496005, iteration: 171262
loss: 1.0716382265090942,grad_norm: 0.9999994395880892, iteration: 171263
loss: 0.9807968735694885,grad_norm: 0.9999991308936341, iteration: 171264
loss: 1.0222820043563843,grad_norm: 0.9779815420784976, iteration: 171265
loss: 0.9876164793968201,grad_norm: 0.9999990978670129, iteration: 171266
loss: 0.9873157739639282,grad_norm: 0.9999992245078132, iteration: 171267
loss: 1.016543984413147,grad_norm: 0.9558650657883234, iteration: 171268
loss: 1.0239245891571045,grad_norm: 0.9999991435609175, iteration: 171269
loss: 0.9841485619544983,grad_norm: 0.848779885266245, iteration: 171270
loss: 0.9799155592918396,grad_norm: 0.9999991420078466, iteration: 171271
loss: 1.0165565013885498,grad_norm: 0.9999994316992021, iteration: 171272
loss: 1.0182855129241943,grad_norm: 0.999999764242334, iteration: 171273
loss: 0.9800096750259399,grad_norm: 0.9999989802578794, iteration: 171274
loss: 1.0402089357376099,grad_norm: 0.8313554976690716, iteration: 171275
loss: 1.0350033044815063,grad_norm: 0.8362807597926126, iteration: 171276
loss: 1.008721113204956,grad_norm: 0.999999032027895, iteration: 171277
loss: 0.9660505652427673,grad_norm: 0.9999991343773521, iteration: 171278
loss: 1.012716293334961,grad_norm: 0.9753784449302539, iteration: 171279
loss: 1.0070915222167969,grad_norm: 0.9867876075478158, iteration: 171280
loss: 1.007922649383545,grad_norm: 0.9999992469270989, iteration: 171281
loss: 1.215369462966919,grad_norm: 0.9999997476273907, iteration: 171282
loss: 0.9936431646347046,grad_norm: 0.9028062847283309, iteration: 171283
loss: 0.9918229579925537,grad_norm: 0.930036829573544, iteration: 171284
loss: 0.980633556842804,grad_norm: 0.999999107502982, iteration: 171285
loss: 0.9861113429069519,grad_norm: 0.7862223561199837, iteration: 171286
loss: 0.9932225942611694,grad_norm: 0.9999989946914553, iteration: 171287
loss: 0.9963666796684265,grad_norm: 0.8440244930196408, iteration: 171288
loss: 1.0279927253723145,grad_norm: 0.9627365624893476, iteration: 171289
loss: 1.0496222972869873,grad_norm: 0.9999991666855786, iteration: 171290
loss: 1.0243041515350342,grad_norm: 0.9999991145843695, iteration: 171291
loss: 0.9676793217658997,grad_norm: 0.8664870097937959, iteration: 171292
loss: 0.9875041842460632,grad_norm: 0.9999991684170855, iteration: 171293
loss: 0.9957842230796814,grad_norm: 0.9635032391853621, iteration: 171294
loss: 0.9852984547615051,grad_norm: 0.9282861773171749, iteration: 171295
loss: 1.015659213066101,grad_norm: 0.9465114086281864, iteration: 171296
loss: 1.017335295677185,grad_norm: 0.9789002273611936, iteration: 171297
loss: 1.0740710496902466,grad_norm: 0.9999991922984648, iteration: 171298
loss: 0.9999642968177795,grad_norm: 0.9999991129710922, iteration: 171299
loss: 0.9929043650627136,grad_norm: 0.9999991938540709, iteration: 171300
loss: 1.022939920425415,grad_norm: 0.9999990364659225, iteration: 171301
loss: 1.004838466644287,grad_norm: 0.9573429560699958, iteration: 171302
loss: 1.0057448148727417,grad_norm: 0.8468025677041549, iteration: 171303
loss: 0.983198881149292,grad_norm: 0.9999990278917928, iteration: 171304
loss: 0.9915169477462769,grad_norm: 0.99999918640603, iteration: 171305
loss: 1.0035854578018188,grad_norm: 0.8176218180453986, iteration: 171306
loss: 0.9768679141998291,grad_norm: 0.9999990624598392, iteration: 171307
loss: 1.0007489919662476,grad_norm: 0.9999991380066857, iteration: 171308
loss: 1.0131653547286987,grad_norm: 0.8549211675412683, iteration: 171309
loss: 1.012554407119751,grad_norm: 0.814050471602597, iteration: 171310
loss: 0.9473313689231873,grad_norm: 0.8400596450676278, iteration: 171311
loss: 1.0038807392120361,grad_norm: 0.9447954209098361, iteration: 171312
loss: 1.0336881875991821,grad_norm: 0.9242596138295422, iteration: 171313
loss: 1.0263465642929077,grad_norm: 0.9999990041088781, iteration: 171314
loss: 0.999302864074707,grad_norm: 0.9999992248642469, iteration: 171315
loss: 1.0030356645584106,grad_norm: 0.9999991730917571, iteration: 171316
loss: 1.0150017738342285,grad_norm: 0.9671803653185621, iteration: 171317
loss: 0.9957241415977478,grad_norm: 0.9999990909511179, iteration: 171318
loss: 0.9784027338027954,grad_norm: 0.880430123347642, iteration: 171319
loss: 1.115065336227417,grad_norm: 0.9999997866769693, iteration: 171320
loss: 0.9776369333267212,grad_norm: 0.8778461177000995, iteration: 171321
loss: 1.0004959106445312,grad_norm: 0.9999992845566263, iteration: 171322
loss: 1.0166656970977783,grad_norm: 0.9999990600471154, iteration: 171323
loss: 1.0153906345367432,grad_norm: 0.9999991331943257, iteration: 171324
loss: 0.9932273626327515,grad_norm: 0.9999990984060331, iteration: 171325
loss: 1.032243013381958,grad_norm: 0.9582810893941061, iteration: 171326
loss: 1.0247352123260498,grad_norm: 0.9999995888918297, iteration: 171327
loss: 1.0030120611190796,grad_norm: 0.9961591284972432, iteration: 171328
loss: 1.0017155408859253,grad_norm: 0.9999991970296199, iteration: 171329
loss: 1.0104975700378418,grad_norm: 0.9094425416146626, iteration: 171330
loss: 1.0189862251281738,grad_norm: 0.9999995448364325, iteration: 171331
loss: 1.0006937980651855,grad_norm: 0.9999991336684215, iteration: 171332
loss: 1.0125150680541992,grad_norm: 0.9999991390439479, iteration: 171333
loss: 1.0002326965332031,grad_norm: 0.9999990890583669, iteration: 171334
loss: 1.0052783489227295,grad_norm: 0.9999991667328256, iteration: 171335
loss: 1.0098322629928589,grad_norm: 0.9999993550789237, iteration: 171336
loss: 1.0430859327316284,grad_norm: 0.9999992836431382, iteration: 171337
loss: 0.9924920797348022,grad_norm: 0.8543675963984321, iteration: 171338
loss: 1.0016205310821533,grad_norm: 0.8324620744177336, iteration: 171339
loss: 1.0053455829620361,grad_norm: 0.9999991310040492, iteration: 171340
loss: 0.9769025444984436,grad_norm: 0.9654472740063804, iteration: 171341
loss: 0.9742318391799927,grad_norm: 0.8349713778929547, iteration: 171342
loss: 1.0206232070922852,grad_norm: 0.9999989991079833, iteration: 171343
loss: 0.962921142578125,grad_norm: 0.9829274899279226, iteration: 171344
loss: 0.9975566267967224,grad_norm: 0.9999990142198285, iteration: 171345
loss: 0.9824994206428528,grad_norm: 0.9608614191000164, iteration: 171346
loss: 1.0232304334640503,grad_norm: 0.9336998367749335, iteration: 171347
loss: 0.9981043934822083,grad_norm: 0.8994076228428642, iteration: 171348
loss: 1.0208238363265991,grad_norm: 0.9999991692464667, iteration: 171349
loss: 1.00200355052948,grad_norm: 0.9999990641921931, iteration: 171350
loss: 1.0113791227340698,grad_norm: 0.9999990253579586, iteration: 171351
loss: 1.0252685546875,grad_norm: 0.8963620609089309, iteration: 171352
loss: 1.0242048501968384,grad_norm: 0.9999991443835402, iteration: 171353
loss: 0.9954654574394226,grad_norm: 0.9999997472957781, iteration: 171354
loss: 1.0579901933670044,grad_norm: 0.999999049969686, iteration: 171355
loss: 1.0224952697753906,grad_norm: 0.9999992587934333, iteration: 171356
loss: 1.019572377204895,grad_norm: 0.9744304249925838, iteration: 171357
loss: 0.9879477620124817,grad_norm: 0.9999989725578561, iteration: 171358
loss: 1.0128179788589478,grad_norm: 0.9999990954996227, iteration: 171359
loss: 0.9592956304550171,grad_norm: 0.9999992308633667, iteration: 171360
loss: 0.9836294054985046,grad_norm: 0.9377842602886355, iteration: 171361
loss: 1.0222572088241577,grad_norm: 0.999998932472704, iteration: 171362
loss: 1.0174514055252075,grad_norm: 0.9999990266474647, iteration: 171363
loss: 0.9613357782363892,grad_norm: 0.9874844013572722, iteration: 171364
loss: 0.9705974459648132,grad_norm: 0.9999991096415745, iteration: 171365
loss: 1.0039010047912598,grad_norm: 0.9999989961228217, iteration: 171366
loss: 1.0406482219696045,grad_norm: 0.9999991306438596, iteration: 171367
loss: 0.9960821866989136,grad_norm: 0.9260637886840943, iteration: 171368
loss: 1.0062671899795532,grad_norm: 0.9784369469233266, iteration: 171369
loss: 1.0144010782241821,grad_norm: 0.9909227082545139, iteration: 171370
loss: 1.0040580034255981,grad_norm: 0.9999991864163948, iteration: 171371
loss: 0.9987460374832153,grad_norm: 0.9999990514001412, iteration: 171372
loss: 1.063435673713684,grad_norm: 0.9999992922218695, iteration: 171373
loss: 1.0819544792175293,grad_norm: 0.9999992081670781, iteration: 171374
loss: 1.022888422012329,grad_norm: 0.9112614764359065, iteration: 171375
loss: 0.9569137096405029,grad_norm: 0.9098208670075337, iteration: 171376
loss: 1.0713727474212646,grad_norm: 0.9999991980508688, iteration: 171377
loss: 1.0253034830093384,grad_norm: 0.9999999096445081, iteration: 171378
loss: 0.9830468893051147,grad_norm: 0.9999991755829278, iteration: 171379
loss: 0.9982699155807495,grad_norm: 0.9883730438651538, iteration: 171380
loss: 1.0135612487792969,grad_norm: 0.8178286615437456, iteration: 171381
loss: 1.0080275535583496,grad_norm: 0.9100726023414387, iteration: 171382
loss: 0.9920402765274048,grad_norm: 0.999998919196548, iteration: 171383
loss: 1.0030468702316284,grad_norm: 0.9891917597442843, iteration: 171384
loss: 1.1130741834640503,grad_norm: 0.9999992214993537, iteration: 171385
loss: 1.010411262512207,grad_norm: 0.9999995484321573, iteration: 171386
loss: 1.0152506828308105,grad_norm: 0.9705133951363947, iteration: 171387
loss: 1.0229253768920898,grad_norm: 0.9999992590168331, iteration: 171388
loss: 1.0120022296905518,grad_norm: 0.8862295724299326, iteration: 171389
loss: 1.0016653537750244,grad_norm: 0.9999991178700468, iteration: 171390
loss: 1.0042304992675781,grad_norm: 0.9999990219560488, iteration: 171391
loss: 1.1322983503341675,grad_norm: 0.9999993413775622, iteration: 171392
loss: 1.0033626556396484,grad_norm: 0.9999991530871972, iteration: 171393
loss: 1.0247974395751953,grad_norm: 0.9254266470915031, iteration: 171394
loss: 0.9696325659751892,grad_norm: 0.9999987949206974, iteration: 171395
loss: 0.9839663505554199,grad_norm: 0.9999991131091497, iteration: 171396
loss: 0.9929598569869995,grad_norm: 0.9537582470120267, iteration: 171397
loss: 1.038643717765808,grad_norm: 0.9999990323941914, iteration: 171398
loss: 0.9826588034629822,grad_norm: 0.99839534390709, iteration: 171399
loss: 1.005369782447815,grad_norm: 0.9184199211835581, iteration: 171400
loss: 1.0454598665237427,grad_norm: 0.9999991234698059, iteration: 171401
loss: 0.9830036163330078,grad_norm: 0.8396043224916202, iteration: 171402
loss: 0.9870007634162903,grad_norm: 0.99999912791167, iteration: 171403
loss: 1.005044937133789,grad_norm: 0.9999992810155348, iteration: 171404
loss: 1.020294189453125,grad_norm: 0.9935498616419087, iteration: 171405
loss: 1.0364853143692017,grad_norm: 0.9999991518303558, iteration: 171406
loss: 0.990178108215332,grad_norm: 0.9064058869504448, iteration: 171407
loss: 1.0132577419281006,grad_norm: 0.9999990577253052, iteration: 171408
loss: 1.0087230205535889,grad_norm: 0.9999991157370857, iteration: 171409
loss: 0.9706403613090515,grad_norm: 0.895715116996833, iteration: 171410
loss: 0.996281623840332,grad_norm: 0.9288406081652706, iteration: 171411
loss: 0.9930031299591064,grad_norm: 0.9646306878869515, iteration: 171412
loss: 0.9819138050079346,grad_norm: 0.8723872046208512, iteration: 171413
loss: 0.9902614951133728,grad_norm: 0.9920058080107835, iteration: 171414
loss: 1.0079160928726196,grad_norm: 0.9999990384743549, iteration: 171415
loss: 1.0135698318481445,grad_norm: 0.9614950952144148, iteration: 171416
loss: 0.996338427066803,grad_norm: 0.9999991229782027, iteration: 171417
loss: 1.060091257095337,grad_norm: 0.9999998496994423, iteration: 171418
loss: 1.0274466276168823,grad_norm: 0.999999103439465, iteration: 171419
loss: 1.0054399967193604,grad_norm: 0.9458388222426736, iteration: 171420
loss: 1.0229625701904297,grad_norm: 0.9999991190708677, iteration: 171421
loss: 0.9855650067329407,grad_norm: 0.8966786412253352, iteration: 171422
loss: 1.0206389427185059,grad_norm: 0.9999991473788356, iteration: 171423
loss: 1.0197266340255737,grad_norm: 0.977624526028125, iteration: 171424
loss: 0.9871171116828918,grad_norm: 0.9544134097185294, iteration: 171425
loss: 0.9817048907279968,grad_norm: 0.9788365934298028, iteration: 171426
loss: 0.9764581322669983,grad_norm: 0.8202576367065713, iteration: 171427
loss: 1.0024489164352417,grad_norm: 0.8756688671886955, iteration: 171428
loss: 0.9904001355171204,grad_norm: 0.9288207271201521, iteration: 171429
loss: 1.0289669036865234,grad_norm: 0.9999991372285273, iteration: 171430
loss: 0.9869864583015442,grad_norm: 0.9401776768643032, iteration: 171431
loss: 0.9961099028587341,grad_norm: 0.9184793377254244, iteration: 171432
loss: 1.0040944814682007,grad_norm: 0.9111637182180264, iteration: 171433
loss: 0.9932723641395569,grad_norm: 0.8801202572554044, iteration: 171434
loss: 1.0137991905212402,grad_norm: 0.9514153145387652, iteration: 171435
loss: 1.030717372894287,grad_norm: 0.9626103158533825, iteration: 171436
loss: 0.9793205261230469,grad_norm: 0.9218574991988056, iteration: 171437
loss: 0.9773518443107605,grad_norm: 0.9765807704800968, iteration: 171438
loss: 1.0037238597869873,grad_norm: 0.9938019598475113, iteration: 171439
loss: 1.0474328994750977,grad_norm: 0.9999994809872941, iteration: 171440
loss: 1.0088255405426025,grad_norm: 0.999999133580426, iteration: 171441
loss: 0.971836507320404,grad_norm: 0.9999992353248648, iteration: 171442
loss: 1.0104132890701294,grad_norm: 0.9619903773732513, iteration: 171443
loss: 0.9972014427185059,grad_norm: 0.9847318038788507, iteration: 171444
loss: 1.0070269107818604,grad_norm: 0.9999992656622937, iteration: 171445
loss: 0.9627889394760132,grad_norm: 0.9999991094882942, iteration: 171446
loss: 1.0193419456481934,grad_norm: 0.8740632394736825, iteration: 171447
loss: 0.9746038317680359,grad_norm: 0.9911977058197718, iteration: 171448
loss: 1.0067877769470215,grad_norm: 0.8825693169789728, iteration: 171449
loss: 1.0601836442947388,grad_norm: 0.9999996010311045, iteration: 171450
loss: 0.993794322013855,grad_norm: 0.9999991261340161, iteration: 171451
loss: 1.0000817775726318,grad_norm: 0.9710184551770465, iteration: 171452
loss: 1.0257172584533691,grad_norm: 0.9999993052644458, iteration: 171453
loss: 1.0183029174804688,grad_norm: 0.9999992896971202, iteration: 171454
loss: 0.9682968258857727,grad_norm: 0.9999989582360115, iteration: 171455
loss: 0.9910631775856018,grad_norm: 0.9999990928422622, iteration: 171456
loss: 0.9971105456352234,grad_norm: 0.9063739249943833, iteration: 171457
loss: 0.998494029045105,grad_norm: 0.8245893824304593, iteration: 171458
loss: 0.9898421168327332,grad_norm: 0.9999992211541976, iteration: 171459
loss: 0.9764163494110107,grad_norm: 0.9838365827667488, iteration: 171460
loss: 1.0885435342788696,grad_norm: 0.9999991674129168, iteration: 171461
loss: 1.0861244201660156,grad_norm: 0.9999995832803813, iteration: 171462
loss: 1.0401380062103271,grad_norm: 0.8778466577446519, iteration: 171463
loss: 0.982157826423645,grad_norm: 0.9999995944485703, iteration: 171464
loss: 0.9805140495300293,grad_norm: 0.7401769499852232, iteration: 171465
loss: 0.9843681454658508,grad_norm: 0.9999990439704478, iteration: 171466
loss: 1.0330157279968262,grad_norm: 0.9999991975500068, iteration: 171467
loss: 0.9962946176528931,grad_norm: 0.9718797846458913, iteration: 171468
loss: 0.9750752449035645,grad_norm: 0.9842379458939964, iteration: 171469
loss: 0.9989527463912964,grad_norm: 0.9999992308990244, iteration: 171470
loss: 1.008537769317627,grad_norm: 0.9999998939622766, iteration: 171471
loss: 1.013288974761963,grad_norm: 0.995146586515319, iteration: 171472
loss: 0.9934152960777283,grad_norm: 0.9635755934191779, iteration: 171473
loss: 1.0138798952102661,grad_norm: 0.9999991960175338, iteration: 171474
loss: 1.037781000137329,grad_norm: 0.9999990924325207, iteration: 171475
loss: 1.0072699785232544,grad_norm: 0.999999178319729, iteration: 171476
loss: 0.9944564700126648,grad_norm: 0.9999992998850775, iteration: 171477
loss: 1.058039903640747,grad_norm: 0.9999993970276214, iteration: 171478
loss: 0.9795870780944824,grad_norm: 0.9999992378215067, iteration: 171479
loss: 1.0222748517990112,grad_norm: 0.9999989956620419, iteration: 171480
loss: 0.9962166547775269,grad_norm: 0.9999992595104179, iteration: 171481
loss: 1.0030862092971802,grad_norm: 0.9999995934616525, iteration: 171482
loss: 1.0099821090698242,grad_norm: 0.8830495832165303, iteration: 171483
loss: 0.9898436069488525,grad_norm: 0.9999990370014792, iteration: 171484
loss: 0.978617250919342,grad_norm: 0.9999989602862295, iteration: 171485
loss: 0.9899919033050537,grad_norm: 0.8593182093596482, iteration: 171486
loss: 0.9920087456703186,grad_norm: 0.9307894484119379, iteration: 171487
loss: 1.0340012311935425,grad_norm: 0.9999990216690676, iteration: 171488
loss: 1.0015549659729004,grad_norm: 0.9999994367895962, iteration: 171489
loss: 0.9937571883201599,grad_norm: 0.9999991983970076, iteration: 171490
loss: 0.9885364770889282,grad_norm: 0.9654951206002965, iteration: 171491
loss: 1.002911925315857,grad_norm: 0.933582342961474, iteration: 171492
loss: 0.9925136566162109,grad_norm: 0.999999106756783, iteration: 171493
loss: 1.0293089151382446,grad_norm: 0.9449391827275032, iteration: 171494
loss: 0.9739140868186951,grad_norm: 0.9999991809677193, iteration: 171495
loss: 1.0125290155410767,grad_norm: 0.9999990866862848, iteration: 171496
loss: 1.0143636465072632,grad_norm: 0.9653855931420977, iteration: 171497
loss: 0.9900113940238953,grad_norm: 0.9000175608910335, iteration: 171498
loss: 1.0120631456375122,grad_norm: 0.9887855098777684, iteration: 171499
loss: 1.0208626985549927,grad_norm: 0.9999991327867013, iteration: 171500
loss: 1.057466745376587,grad_norm: 0.9999997855488597, iteration: 171501
loss: 0.9710487127304077,grad_norm: 0.8361172873021576, iteration: 171502
loss: 0.997901439666748,grad_norm: 0.9999990555592142, iteration: 171503
loss: 0.9998899698257446,grad_norm: 0.9399761516745592, iteration: 171504
loss: 0.9724246859550476,grad_norm: 0.8947997651711443, iteration: 171505
loss: 1.0000613927841187,grad_norm: 0.9999991874773942, iteration: 171506
loss: 0.9909720420837402,grad_norm: 0.9999991032320031, iteration: 171507
loss: 0.9790145754814148,grad_norm: 0.9999992213027373, iteration: 171508
loss: 0.9820369482040405,grad_norm: 0.9999996041620599, iteration: 171509
loss: 1.0446008443832397,grad_norm: 0.9999993671867298, iteration: 171510
loss: 1.0510166883468628,grad_norm: 0.8466572875510988, iteration: 171511
loss: 1.0438250303268433,grad_norm: 0.9699427983167007, iteration: 171512
loss: 1.0058566331863403,grad_norm: 0.9039991523516522, iteration: 171513
loss: 1.027890920639038,grad_norm: 0.9999989171179382, iteration: 171514
loss: 1.0007776021957397,grad_norm: 0.9999992634671431, iteration: 171515
loss: 1.076356053352356,grad_norm: 0.9999993895371063, iteration: 171516
loss: 1.0251473188400269,grad_norm: 0.9999991663334189, iteration: 171517
loss: 1.0132083892822266,grad_norm: 0.9999992089046063, iteration: 171518
loss: 0.9867480397224426,grad_norm: 0.9999992121877539, iteration: 171519
loss: 1.0306737422943115,grad_norm: 0.9999990636634557, iteration: 171520
loss: 0.9973131418228149,grad_norm: 0.9407774152147873, iteration: 171521
loss: 1.0548635721206665,grad_norm: 0.99999926998841, iteration: 171522
loss: 1.074242115020752,grad_norm: 0.9999997671598087, iteration: 171523
loss: 1.0071245431900024,grad_norm: 0.9999991134677635, iteration: 171524
loss: 0.9829235672950745,grad_norm: 0.9999991736636993, iteration: 171525
loss: 1.0047777891159058,grad_norm: 0.9753461047270687, iteration: 171526
loss: 0.9946172833442688,grad_norm: 0.9999990244186082, iteration: 171527
loss: 1.0029146671295166,grad_norm: 0.9366671024976991, iteration: 171528
loss: 1.0131678581237793,grad_norm: 0.9999991901299112, iteration: 171529
loss: 0.989335298538208,grad_norm: 0.8395639058791601, iteration: 171530
loss: 1.0069186687469482,grad_norm: 0.8593148117761749, iteration: 171531
loss: 1.0381358861923218,grad_norm: 0.9999996417080154, iteration: 171532
loss: 1.0045225620269775,grad_norm: 0.9999990856144028, iteration: 171533
loss: 0.9937760233879089,grad_norm: 0.9999994324179413, iteration: 171534
loss: 0.9965918064117432,grad_norm: 0.9528102437059959, iteration: 171535
loss: 0.9529621005058289,grad_norm: 0.8477055383339918, iteration: 171536
loss: 1.0093432664871216,grad_norm: 0.967905586248677, iteration: 171537
loss: 0.9630014896392822,grad_norm: 0.9410742601555951, iteration: 171538
loss: 0.9982313513755798,grad_norm: 0.8392425068240355, iteration: 171539
loss: 1.07124924659729,grad_norm: 0.8886265880306219, iteration: 171540
loss: 1.185530662536621,grad_norm: 0.9999999502706505, iteration: 171541
loss: 0.9819825291633606,grad_norm: 0.9751540932664032, iteration: 171542
loss: 1.0028051137924194,grad_norm: 0.999999278494767, iteration: 171543
loss: 1.096668004989624,grad_norm: 0.9999995207784794, iteration: 171544
loss: 1.031825065612793,grad_norm: 0.9118685061544636, iteration: 171545
loss: 1.0027432441711426,grad_norm: 0.9999989735306165, iteration: 171546
loss: 1.0604122877120972,grad_norm: 0.99999965924923, iteration: 171547
loss: 1.0200761556625366,grad_norm: 0.9999991188742954, iteration: 171548
loss: 1.0125737190246582,grad_norm: 0.9999992222463386, iteration: 171549
loss: 0.9772257208824158,grad_norm: 0.9151015430103494, iteration: 171550
loss: 0.9837100505828857,grad_norm: 0.9999995779850847, iteration: 171551
loss: 1.0025832653045654,grad_norm: 0.9782321154257945, iteration: 171552
loss: 1.037340760231018,grad_norm: 0.9999996590514544, iteration: 171553
loss: 1.0679458379745483,grad_norm: 0.9999997484616652, iteration: 171554
loss: 1.0409457683563232,grad_norm: 0.99999933557804, iteration: 171555
loss: 1.0207278728485107,grad_norm: 0.9999992248941327, iteration: 171556
loss: 1.0164752006530762,grad_norm: 0.9999995046413186, iteration: 171557
loss: 0.9670965075492859,grad_norm: 0.9999991634859563, iteration: 171558
loss: 0.9862841367721558,grad_norm: 0.8659409261257814, iteration: 171559
loss: 0.9925888776779175,grad_norm: 0.9999990964660619, iteration: 171560
loss: 1.0042626857757568,grad_norm: 0.9802473076586296, iteration: 171561
loss: 0.9970277547836304,grad_norm: 0.999999349750362, iteration: 171562
loss: 0.9587988257408142,grad_norm: 0.9433361826221471, iteration: 171563
loss: 0.9903517365455627,grad_norm: 0.9406469346850028, iteration: 171564
loss: 0.9824339747428894,grad_norm: 0.9999992188619924, iteration: 171565
loss: 1.0012500286102295,grad_norm: 0.9999990367681931, iteration: 171566
loss: 1.0125062465667725,grad_norm: 0.9999992466997164, iteration: 171567
loss: 1.0005831718444824,grad_norm: 0.9502295793287677, iteration: 171568
loss: 1.093281865119934,grad_norm: 0.9999994139353577, iteration: 171569
loss: 1.0211373567581177,grad_norm: 0.9999991375108566, iteration: 171570
loss: 1.0002012252807617,grad_norm: 0.9999991665218538, iteration: 171571
loss: 1.03746497631073,grad_norm: 0.999999183439903, iteration: 171572
loss: 1.0777770280838013,grad_norm: 0.9999991905686454, iteration: 171573
loss: 0.9758092164993286,grad_norm: 0.9999990685047567, iteration: 171574
loss: 1.0160987377166748,grad_norm: 0.9453645282378503, iteration: 171575
loss: 0.9744505882263184,grad_norm: 0.8942408597536335, iteration: 171576
loss: 0.9853243827819824,grad_norm: 0.8818403759645652, iteration: 171577
loss: 1.0228242874145508,grad_norm: 0.999999372620907, iteration: 171578
loss: 0.9975478053092957,grad_norm: 0.999999119535682, iteration: 171579
loss: 1.1026932001113892,grad_norm: 1.0000000389373593, iteration: 171580
loss: 1.0952378511428833,grad_norm: 0.9999997050372689, iteration: 171581
loss: 1.0691851377487183,grad_norm: 0.9999992799651062, iteration: 171582
loss: 0.9746583104133606,grad_norm: 0.9999990806624424, iteration: 171583
loss: 0.9953897595405579,grad_norm: 0.9999989658693106, iteration: 171584
loss: 1.0652737617492676,grad_norm: 0.9999992909123728, iteration: 171585
loss: 0.9757810831069946,grad_norm: 0.9999990372261818, iteration: 171586
loss: 1.117842674255371,grad_norm: 0.9999994693720589, iteration: 171587
loss: 0.9929185509681702,grad_norm: 0.8552305931979783, iteration: 171588
loss: 0.9461668133735657,grad_norm: 0.9366008398505253, iteration: 171589
loss: 0.9949979186058044,grad_norm: 0.9555762637264708, iteration: 171590
loss: 1.1047898530960083,grad_norm: 0.9935632496190584, iteration: 171591
loss: 0.9711393117904663,grad_norm: 0.9999992380575587, iteration: 171592
loss: 1.054919719696045,grad_norm: 0.9999994313828962, iteration: 171593
loss: 1.0022141933441162,grad_norm: 0.9561156852876729, iteration: 171594
loss: 1.1287505626678467,grad_norm: 0.9999998678446324, iteration: 171595
loss: 1.0056979656219482,grad_norm: 0.9999993716796353, iteration: 171596
loss: 1.0391322374343872,grad_norm: 0.7894240074682113, iteration: 171597
loss: 1.025726079940796,grad_norm: 0.999999483565249, iteration: 171598
loss: 1.0195188522338867,grad_norm: 0.9999991933701352, iteration: 171599
loss: 0.9902616739273071,grad_norm: 0.8636101637238667, iteration: 171600
loss: 1.0967391729354858,grad_norm: 0.9999991349370572, iteration: 171601
loss: 1.0181679725646973,grad_norm: 0.9999990584776853, iteration: 171602
loss: 0.9842557311058044,grad_norm: 0.9553642850517005, iteration: 171603
loss: 0.9806908369064331,grad_norm: 0.9221776949268492, iteration: 171604
loss: 0.9917004704475403,grad_norm: 0.9999991764150037, iteration: 171605
loss: 1.1726982593536377,grad_norm: 0.9999992429479317, iteration: 171606
loss: 1.0353896617889404,grad_norm: 0.9999995386585797, iteration: 171607
loss: 1.1743228435516357,grad_norm: 0.9999998761266714, iteration: 171608
loss: 1.1417537927627563,grad_norm: 0.9999998040247967, iteration: 171609
loss: 1.2967555522918701,grad_norm: 0.9999998372057882, iteration: 171610
loss: 1.0228430032730103,grad_norm: 0.9999993843221124, iteration: 171611
loss: 1.0439488887786865,grad_norm: 0.9999998679429539, iteration: 171612
loss: 1.0020917654037476,grad_norm: 0.9999994298261501, iteration: 171613
loss: 1.0929774045944214,grad_norm: 0.9999991853018494, iteration: 171614
loss: 1.0102125406265259,grad_norm: 0.9999990858497793, iteration: 171615
loss: 1.012754201889038,grad_norm: 0.9999994233129552, iteration: 171616
loss: 1.0040324926376343,grad_norm: 0.9654447378012206, iteration: 171617
loss: 1.0025888681411743,grad_norm: 0.9999990277939829, iteration: 171618
loss: 1.0078318119049072,grad_norm: 0.9999991507743393, iteration: 171619
loss: 1.1177669763565063,grad_norm: 0.9999994055430441, iteration: 171620
loss: 0.9814234972000122,grad_norm: 0.9700509443254399, iteration: 171621
loss: 1.0715608596801758,grad_norm: 0.9999998685521612, iteration: 171622
loss: 1.134047269821167,grad_norm: 0.9999998152447758, iteration: 171623
loss: 0.9934328198432922,grad_norm: 0.8679248403896301, iteration: 171624
loss: 0.9864846467971802,grad_norm: 0.9999991912237711, iteration: 171625
loss: 1.0139589309692383,grad_norm: 0.999999050412735, iteration: 171626
loss: 1.0004962682724,grad_norm: 0.8155295799218726, iteration: 171627
loss: 1.1287587881088257,grad_norm: 0.9999995938873476, iteration: 171628
loss: 1.0074594020843506,grad_norm: 0.9451010248593962, iteration: 171629
loss: 0.9996880292892456,grad_norm: 0.9999989677787556, iteration: 171630
loss: 1.0016298294067383,grad_norm: 0.9999991058913416, iteration: 171631
loss: 1.0257357358932495,grad_norm: 0.9999991090035425, iteration: 171632
loss: 1.0559190511703491,grad_norm: 0.9999995643701269, iteration: 171633
loss: 1.0400218963623047,grad_norm: 0.8839005384779529, iteration: 171634
loss: 1.075912356376648,grad_norm: 0.9999992567012169, iteration: 171635
loss: 0.9820467829704285,grad_norm: 0.9897452231220772, iteration: 171636
loss: 1.0357177257537842,grad_norm: 0.9999995367403098, iteration: 171637
loss: 0.9886537790298462,grad_norm: 0.999999711416455, iteration: 171638
loss: 1.0239155292510986,grad_norm: 0.9999994829731874, iteration: 171639
loss: 1.0234837532043457,grad_norm: 0.9999992164523657, iteration: 171640
loss: 1.020358920097351,grad_norm: 0.9999992959496095, iteration: 171641
loss: 1.0021977424621582,grad_norm: 0.9999990339911061, iteration: 171642
loss: 0.9787364602088928,grad_norm: 0.9999992896923113, iteration: 171643
loss: 1.0239567756652832,grad_norm: 0.9999990868519713, iteration: 171644
loss: 1.1120433807373047,grad_norm: 0.9999992909844493, iteration: 171645
loss: 0.9964856505393982,grad_norm: 0.9999990723306308, iteration: 171646
loss: 1.0240834951400757,grad_norm: 0.9999992455490536, iteration: 171647
loss: 1.0641309022903442,grad_norm: 0.9999995422127901, iteration: 171648
loss: 0.954650342464447,grad_norm: 0.9207349751310245, iteration: 171649
loss: 1.0374468564987183,grad_norm: 0.999999765441786, iteration: 171650
loss: 1.1693793535232544,grad_norm: 0.9999994655706188, iteration: 171651
loss: 1.029236912727356,grad_norm: 0.9999991821143212, iteration: 171652
loss: 1.0686113834381104,grad_norm: 0.9999999322779726, iteration: 171653
loss: 0.9968680143356323,grad_norm: 0.9999989707347527, iteration: 171654
loss: 1.0333150625228882,grad_norm: 0.9999994377809334, iteration: 171655
loss: 1.1263021230697632,grad_norm: 0.9999998732497802, iteration: 171656
loss: 1.000962257385254,grad_norm: 0.8617769735132066, iteration: 171657
loss: 1.0224207639694214,grad_norm: 0.9521119121425783, iteration: 171658
loss: 1.0278011560440063,grad_norm: 0.9999991150300541, iteration: 171659
loss: 1.0582637786865234,grad_norm: 0.999999656156627, iteration: 171660
loss: 1.018864631652832,grad_norm: 1.00000000387692, iteration: 171661
loss: 1.0051732063293457,grad_norm: 0.9999992066508714, iteration: 171662
loss: 1.0708491802215576,grad_norm: 0.9999994359442929, iteration: 171663
loss: 1.1059448719024658,grad_norm: 0.9999996590099854, iteration: 171664
loss: 1.014464259147644,grad_norm: 0.9999991371969016, iteration: 171665
loss: 1.0114392042160034,grad_norm: 0.9779125427294734, iteration: 171666
loss: 1.0385044813156128,grad_norm: 0.9999997570938224, iteration: 171667
loss: 1.0186984539031982,grad_norm: 0.999999086830317, iteration: 171668
loss: 1.026354193687439,grad_norm: 0.9999990787527547, iteration: 171669
loss: 0.9921155571937561,grad_norm: 0.9999990673229989, iteration: 171670
loss: 1.032235026359558,grad_norm: 0.8714293540442257, iteration: 171671
loss: 1.0447638034820557,grad_norm: 0.8772829190381631, iteration: 171672
loss: 1.027372121810913,grad_norm: 0.9999994214698427, iteration: 171673
loss: 0.9948588609695435,grad_norm: 0.9999992786944252, iteration: 171674
loss: 1.0358542203903198,grad_norm: 0.9999998324643813, iteration: 171675
loss: 1.0432237386703491,grad_norm: 0.9597285469815583, iteration: 171676
loss: 1.0172220468521118,grad_norm: 0.9264584500490597, iteration: 171677
loss: 0.998416006565094,grad_norm: 0.9999991681169388, iteration: 171678
loss: 0.9823198318481445,grad_norm: 0.9999991317137356, iteration: 171679
loss: 1.00789475440979,grad_norm: 0.9999992544346188, iteration: 171680
loss: 1.0231842994689941,grad_norm: 0.9999990996307556, iteration: 171681
loss: 1.0062735080718994,grad_norm: 0.9338103686388705, iteration: 171682
loss: 0.9889002442359924,grad_norm: 0.9999991204013051, iteration: 171683
loss: 0.9908123016357422,grad_norm: 0.9999991665927915, iteration: 171684
loss: 0.9893926382064819,grad_norm: 0.8462522133714594, iteration: 171685
loss: 0.9739201664924622,grad_norm: 0.9106248575793864, iteration: 171686
loss: 1.0516244173049927,grad_norm: 0.9999990334713522, iteration: 171687
loss: 0.9898425340652466,grad_norm: 0.9999990733983403, iteration: 171688
loss: 0.9554013609886169,grad_norm: 0.9999991430214691, iteration: 171689
loss: 0.9866583943367004,grad_norm: 0.9891711382877091, iteration: 171690
loss: 0.9907349348068237,grad_norm: 0.999999085402494, iteration: 171691
loss: 0.9805370569229126,grad_norm: 0.9999990650345136, iteration: 171692
loss: 1.032170057296753,grad_norm: 0.9999998999133415, iteration: 171693
loss: 1.019938349723816,grad_norm: 0.9999990937846229, iteration: 171694
loss: 1.0514121055603027,grad_norm: 0.9999991547694298, iteration: 171695
loss: 0.9903263449668884,grad_norm: 0.9999990873834322, iteration: 171696
loss: 1.0991069078445435,grad_norm: 0.9999998903194002, iteration: 171697
loss: 1.0011615753173828,grad_norm: 0.9999991483921779, iteration: 171698
loss: 1.0024993419647217,grad_norm: 0.789377258165071, iteration: 171699
loss: 1.013457179069519,grad_norm: 0.9493474621958466, iteration: 171700
loss: 1.024856686592102,grad_norm: 0.9815962240330118, iteration: 171701
loss: 1.0025792121887207,grad_norm: 0.8157858914653752, iteration: 171702
loss: 1.0207631587982178,grad_norm: 0.9999992120787915, iteration: 171703
loss: 1.0457203388214111,grad_norm: 0.9223381631208009, iteration: 171704
loss: 1.0257147550582886,grad_norm: 0.9999991958988516, iteration: 171705
loss: 0.9712557196617126,grad_norm: 0.9999996505079115, iteration: 171706
loss: 1.006657361984253,grad_norm: 0.999999203409139, iteration: 171707
loss: 1.0283526182174683,grad_norm: 0.8700973378245328, iteration: 171708
loss: 1.0077189207077026,grad_norm: 0.9999991920101627, iteration: 171709
loss: 0.9909273982048035,grad_norm: 0.9392751083934175, iteration: 171710
loss: 0.9944462776184082,grad_norm: 0.999998940938079, iteration: 171711
loss: 1.0400750637054443,grad_norm: 0.860988565118941, iteration: 171712
loss: 0.973837673664093,grad_norm: 0.932009691049017, iteration: 171713
loss: 0.9862009286880493,grad_norm: 0.99999910176198, iteration: 171714
loss: 1.000036358833313,grad_norm: 0.9262480159055441, iteration: 171715
loss: 1.023224949836731,grad_norm: 0.9999995782450538, iteration: 171716
loss: 1.0662569999694824,grad_norm: 0.9999996322557513, iteration: 171717
loss: 0.9783879518508911,grad_norm: 0.9913511738036105, iteration: 171718
loss: 1.0146429538726807,grad_norm: 0.9999992406340722, iteration: 171719
loss: 1.0223708152770996,grad_norm: 0.9999989975915892, iteration: 171720
loss: 1.037394642829895,grad_norm: 0.9431159842982302, iteration: 171721
loss: 0.9774297475814819,grad_norm: 0.9639594716790654, iteration: 171722
loss: 0.9762874841690063,grad_norm: 0.9230783822777975, iteration: 171723
loss: 0.9890742301940918,grad_norm: 0.8942656252927387, iteration: 171724
loss: 0.9690364003181458,grad_norm: 0.9696863866969596, iteration: 171725
loss: 0.9884085059165955,grad_norm: 0.9362307485020668, iteration: 171726
loss: 1.0001519918441772,grad_norm: 0.9999991365085736, iteration: 171727
loss: 1.0024722814559937,grad_norm: 0.9151545985629946, iteration: 171728
loss: 0.9774309992790222,grad_norm: 0.9961301193241479, iteration: 171729
loss: 0.9811589121818542,grad_norm: 0.9999990665115733, iteration: 171730
loss: 1.0056685209274292,grad_norm: 0.8813314468191088, iteration: 171731
loss: 1.0220953226089478,grad_norm: 0.8640655087298303, iteration: 171732
loss: 0.9800759553909302,grad_norm: 0.9904329842125891, iteration: 171733
loss: 0.985219419002533,grad_norm: 0.8452648148833731, iteration: 171734
loss: 1.003180742263794,grad_norm: 0.9919538313644862, iteration: 171735
loss: 1.0108410120010376,grad_norm: 0.9559963020926363, iteration: 171736
loss: 0.9966771006584167,grad_norm: 0.877255970613492, iteration: 171737
loss: 0.9708060026168823,grad_norm: 0.9999990638747718, iteration: 171738
loss: 1.0385574102401733,grad_norm: 0.9479237208500478, iteration: 171739
loss: 1.0367711782455444,grad_norm: 0.9999991327093627, iteration: 171740
loss: 0.961032509803772,grad_norm: 1.0000000630868588, iteration: 171741
loss: 1.0188276767730713,grad_norm: 0.9999991187408099, iteration: 171742
loss: 1.0085529088974,grad_norm: 0.9999991833650971, iteration: 171743
loss: 1.0139387845993042,grad_norm: 0.9999990534823542, iteration: 171744
loss: 1.0014832019805908,grad_norm: 0.9877114223122839, iteration: 171745
loss: 0.991919219493866,grad_norm: 0.9683914179680078, iteration: 171746
loss: 1.001803994178772,grad_norm: 0.9999990354604803, iteration: 171747
loss: 0.9862260818481445,grad_norm: 0.9417243432963306, iteration: 171748
loss: 0.9777067303657532,grad_norm: 0.9999991790063171, iteration: 171749
loss: 1.0235012769699097,grad_norm: 0.9823915175928081, iteration: 171750
loss: 1.0478718280792236,grad_norm: 0.9915517593097355, iteration: 171751
loss: 1.00054931640625,grad_norm: 0.9683827022485811, iteration: 171752
loss: 0.9659722447395325,grad_norm: 0.9080157915728855, iteration: 171753
loss: 1.017143726348877,grad_norm: 0.9999993645987353, iteration: 171754
loss: 0.9952196478843689,grad_norm: 0.9999991415759543, iteration: 171755
loss: 1.0034314393997192,grad_norm: 0.9999991351203916, iteration: 171756
loss: 1.0185563564300537,grad_norm: 0.9999992363136637, iteration: 171757
loss: 0.9879754781723022,grad_norm: 0.9999991193866093, iteration: 171758
loss: 0.9818961024284363,grad_norm: 0.9999989397956809, iteration: 171759
loss: 1.0030558109283447,grad_norm: 0.9999991201105382, iteration: 171760
loss: 0.9884302616119385,grad_norm: 0.9949571702363295, iteration: 171761
loss: 1.0379670858383179,grad_norm: 0.9999992744543185, iteration: 171762
loss: 1.067480206489563,grad_norm: 0.9999999020112861, iteration: 171763
loss: 0.9498487710952759,grad_norm: 0.9999989955208373, iteration: 171764
loss: 1.0070722103118896,grad_norm: 0.8580255199167608, iteration: 171765
loss: 1.0030243396759033,grad_norm: 0.948754777429306, iteration: 171766
loss: 0.9632728695869446,grad_norm: 0.9036310588483776, iteration: 171767
loss: 0.9904100894927979,grad_norm: 0.9350260449337641, iteration: 171768
loss: 1.0185939073562622,grad_norm: 0.9999990627940037, iteration: 171769
loss: 1.0380650758743286,grad_norm: 0.9400716862636084, iteration: 171770
loss: 0.9972642660140991,grad_norm: 0.9999989627288056, iteration: 171771
loss: 1.0244766473770142,grad_norm: 0.9999990160324195, iteration: 171772
loss: 1.026951551437378,grad_norm: 0.9785024218708125, iteration: 171773
loss: 1.0544359683990479,grad_norm: 0.9999994269290774, iteration: 171774
loss: 1.0304912328720093,grad_norm: 0.999999174484605, iteration: 171775
loss: 1.1709121465682983,grad_norm: 0.9999998684907472, iteration: 171776
loss: 1.0215429067611694,grad_norm: 0.8837622754642686, iteration: 171777
loss: 1.108677864074707,grad_norm: 0.9999999988669428, iteration: 171778
loss: 1.4830050468444824,grad_norm: 0.9999998443607517, iteration: 171779
loss: 1.2476638555526733,grad_norm: 0.9999999086084806, iteration: 171780
loss: 0.9874539375305176,grad_norm: 0.9999990853729558, iteration: 171781
loss: 0.9861906170845032,grad_norm: 0.9999998648091014, iteration: 171782
loss: 1.2867881059646606,grad_norm: 0.9999995237613802, iteration: 171783
loss: 1.0251024961471558,grad_norm: 0.999999011167693, iteration: 171784
loss: 0.9783616065979004,grad_norm: 0.8458521909223206, iteration: 171785
loss: 1.0853466987609863,grad_norm: 0.9999997398974624, iteration: 171786
loss: 1.0500608682632446,grad_norm: 0.9999992220178098, iteration: 171787
loss: 1.0178831815719604,grad_norm: 0.9999999243873638, iteration: 171788
loss: 1.0763322114944458,grad_norm: 0.9999993592025654, iteration: 171789
loss: 1.0478971004486084,grad_norm: 0.9999996389261459, iteration: 171790
loss: 1.17874014377594,grad_norm: 0.9999993740668793, iteration: 171791
loss: 1.054273247718811,grad_norm: 0.9999994685693064, iteration: 171792
loss: 0.9652984738349915,grad_norm: 0.9968399417328386, iteration: 171793
loss: 1.035548448562622,grad_norm: 0.9999992625494232, iteration: 171794
loss: 1.0232924222946167,grad_norm: 0.9999995639258767, iteration: 171795
loss: 1.086426854133606,grad_norm: 0.9999998627789487, iteration: 171796
loss: 1.118672251701355,grad_norm: 0.9999992328499122, iteration: 171797
loss: 1.0694432258605957,grad_norm: 0.9999995917874899, iteration: 171798
loss: 1.0546653270721436,grad_norm: 0.9999992417256536, iteration: 171799
loss: 1.005666732788086,grad_norm: 0.9999992126900135, iteration: 171800
loss: 1.0292458534240723,grad_norm: 0.9999990714657732, iteration: 171801
loss: 1.0490405559539795,grad_norm: 0.9999998797771801, iteration: 171802
loss: 1.1273248195648193,grad_norm: 0.9999994688213165, iteration: 171803
loss: 1.0997215509414673,grad_norm: 0.9999992688409409, iteration: 171804
loss: 1.089128017425537,grad_norm: 0.9999998113157789, iteration: 171805
loss: 1.0303541421890259,grad_norm: 0.9999992198143213, iteration: 171806
loss: 1.0362492799758911,grad_norm: 0.9999992129483973, iteration: 171807
loss: 1.0601192712783813,grad_norm: 0.9999994810646233, iteration: 171808
loss: 1.0280117988586426,grad_norm: 0.9999990557723337, iteration: 171809
loss: 1.005013346672058,grad_norm: 0.9999989924851862, iteration: 171810
loss: 1.011191725730896,grad_norm: 0.9999991688318239, iteration: 171811
loss: 1.0629459619522095,grad_norm: 0.9999993103364881, iteration: 171812
loss: 1.0287567377090454,grad_norm: 0.9999991310473684, iteration: 171813
loss: 1.041574239730835,grad_norm: 0.9999991803706664, iteration: 171814
loss: 1.0364283323287964,grad_norm: 0.999999120429225, iteration: 171815
loss: 0.9906713962554932,grad_norm: 0.9588188032896271, iteration: 171816
loss: 1.0840816497802734,grad_norm: 0.9999998002364782, iteration: 171817
loss: 1.0112340450286865,grad_norm: 0.9999992482451201, iteration: 171818
loss: 0.9703218340873718,grad_norm: 0.999999439326482, iteration: 171819
loss: 1.0223766565322876,grad_norm: 0.9999992197249881, iteration: 171820
loss: 1.0010056495666504,grad_norm: 0.9999992574176889, iteration: 171821
loss: 1.0084506273269653,grad_norm: 0.999999060653679, iteration: 171822
loss: 0.9957801103591919,grad_norm: 0.9999991254233617, iteration: 171823
loss: 0.9666797518730164,grad_norm: 0.9999990463157931, iteration: 171824
loss: 0.9831635355949402,grad_norm: 0.9999991495469209, iteration: 171825
loss: 0.9931148886680603,grad_norm: 0.999999182805527, iteration: 171826
loss: 1.0035721063613892,grad_norm: 0.9999990149039416, iteration: 171827
loss: 1.0504279136657715,grad_norm: 0.9999998810300168, iteration: 171828
loss: 1.0101631879806519,grad_norm: 0.9999992163992815, iteration: 171829
loss: 1.018228530883789,grad_norm: 0.9999992093879524, iteration: 171830
loss: 0.9955312609672546,grad_norm: 0.9999991829524295, iteration: 171831
loss: 0.9676620960235596,grad_norm: 0.9037493837638269, iteration: 171832
loss: 0.9888994693756104,grad_norm: 0.9999991776832106, iteration: 171833
loss: 1.1010247468948364,grad_norm: 0.9999993733299446, iteration: 171834
loss: 0.9921428561210632,grad_norm: 0.9999992524710553, iteration: 171835
loss: 1.0342442989349365,grad_norm: 0.9999992180839559, iteration: 171836
loss: 0.999330461025238,grad_norm: 0.9999990635618762, iteration: 171837
loss: 0.9944565892219543,grad_norm: 0.8978788672438083, iteration: 171838
loss: 1.0727421045303345,grad_norm: 0.9999992104628402, iteration: 171839
loss: 1.0139650106430054,grad_norm: 0.9359918748881338, iteration: 171840
loss: 0.9965288639068604,grad_norm: 0.8759562329028155, iteration: 171841
loss: 1.0153979063034058,grad_norm: 0.999999160664197, iteration: 171842
loss: 0.9732418060302734,grad_norm: 0.957864315270741, iteration: 171843
loss: 1.0088129043579102,grad_norm: 0.9999994397013825, iteration: 171844
loss: 0.9770728945732117,grad_norm: 0.9773554631962506, iteration: 171845
loss: 1.0312845706939697,grad_norm: 0.9999992695024509, iteration: 171846
loss: 0.9992727637290955,grad_norm: 0.915765596956412, iteration: 171847
loss: 0.9972564578056335,grad_norm: 0.9999997275418435, iteration: 171848
loss: 1.0107272863388062,grad_norm: 0.9999992119216863, iteration: 171849
loss: 1.0155616998672485,grad_norm: 0.9999990763565066, iteration: 171850
loss: 0.9714833498001099,grad_norm: 0.9476998551877039, iteration: 171851
loss: 0.9732416868209839,grad_norm: 0.9994812979546438, iteration: 171852
loss: 0.9742286801338196,grad_norm: 0.9999991755821056, iteration: 171853
loss: 1.0308867692947388,grad_norm: 0.9968353799310863, iteration: 171854
loss: 1.0129390954971313,grad_norm: 0.9999989030146299, iteration: 171855
loss: 1.0078142881393433,grad_norm: 0.9999990504050618, iteration: 171856
loss: 1.098339319229126,grad_norm: 0.99999921437434, iteration: 171857
loss: 1.0026867389678955,grad_norm: 0.99999914282162, iteration: 171858
loss: 0.9912475943565369,grad_norm: 0.9999990307787844, iteration: 171859
loss: 1.0019272565841675,grad_norm: 0.9650130298332901, iteration: 171860
loss: 1.0734809637069702,grad_norm: 0.9999995099530176, iteration: 171861
loss: 1.017043113708496,grad_norm: 0.8555344911867043, iteration: 171862
loss: 0.9997989535331726,grad_norm: 0.9989756207154528, iteration: 171863
loss: 0.9907460808753967,grad_norm: 0.9999992297642087, iteration: 171864
loss: 0.9920268058776855,grad_norm: 0.95118925727712, iteration: 171865
loss: 0.9541929364204407,grad_norm: 0.99999914818554, iteration: 171866
loss: 1.0264962911605835,grad_norm: 0.999999094805281, iteration: 171867
loss: 1.0094207525253296,grad_norm: 0.9999991676505603, iteration: 171868
loss: 1.0121289491653442,grad_norm: 0.9999992728246438, iteration: 171869
loss: 1.003435730934143,grad_norm: 0.999999116293178, iteration: 171870
loss: 0.999403715133667,grad_norm: 0.9999991747762144, iteration: 171871
loss: 0.9974668622016907,grad_norm: 0.9999995786543399, iteration: 171872
loss: 1.0658446550369263,grad_norm: 0.9503656412793092, iteration: 171873
loss: 1.1750823259353638,grad_norm: 0.9999996834893331, iteration: 171874
loss: 1.0027902126312256,grad_norm: 0.9177789288794042, iteration: 171875
loss: 1.0361391305923462,grad_norm: 0.9999991814413096, iteration: 171876
loss: 0.9973948001861572,grad_norm: 0.9999990849212747, iteration: 171877
loss: 1.0527441501617432,grad_norm: 0.9999995219300142, iteration: 171878
loss: 1.011000394821167,grad_norm: 0.8896327615877327, iteration: 171879
loss: 1.0065642595291138,grad_norm: 0.8485729824201876, iteration: 171880
loss: 0.9871590733528137,grad_norm: 0.9543928145932558, iteration: 171881
loss: 1.0574556589126587,grad_norm: 0.9999996752428514, iteration: 171882
loss: 1.0408631563186646,grad_norm: 0.9999991066699732, iteration: 171883
loss: 0.9946625828742981,grad_norm: 0.9019303964712528, iteration: 171884
loss: 0.9811170101165771,grad_norm: 0.9999991726884562, iteration: 171885
loss: 0.9800313115119934,grad_norm: 0.9181960920656997, iteration: 171886
loss: 0.9673386812210083,grad_norm: 0.9128742025585626, iteration: 171887
loss: 1.0060250759124756,grad_norm: 0.8970717515249949, iteration: 171888
loss: 0.9795635938644409,grad_norm: 0.8377973377773237, iteration: 171889
loss: 1.007225513458252,grad_norm: 0.9798861429095831, iteration: 171890
loss: 1.0159622430801392,grad_norm: 0.9999996529736143, iteration: 171891
loss: 0.9798585772514343,grad_norm: 0.9999990987258293, iteration: 171892
loss: 1.0297906398773193,grad_norm: 0.9999996763832361, iteration: 171893
loss: 1.024625301361084,grad_norm: 0.9999993746767467, iteration: 171894
loss: 0.9744256734848022,grad_norm: 0.9033510246148182, iteration: 171895
loss: 0.9848813414573669,grad_norm: 0.9999993355665624, iteration: 171896
loss: 1.0375282764434814,grad_norm: 0.9999992727526594, iteration: 171897
loss: 1.0226142406463623,grad_norm: 0.999999136077248, iteration: 171898
loss: 1.0033900737762451,grad_norm: 0.9307433386716778, iteration: 171899
loss: 1.0031012296676636,grad_norm: 0.8907391400469178, iteration: 171900
loss: 1.0125939846038818,grad_norm: 0.844618334704378, iteration: 171901
loss: 1.003913402557373,grad_norm: 0.9469146599378149, iteration: 171902
loss: 1.0417850017547607,grad_norm: 0.9289072738233991, iteration: 171903
loss: 1.0148566961288452,grad_norm: 0.8909591785537679, iteration: 171904
loss: 1.0484578609466553,grad_norm: 0.9999997891219801, iteration: 171905
loss: 0.9892699122428894,grad_norm: 0.9999991350009467, iteration: 171906
loss: 1.0007458925247192,grad_norm: 0.9999990574953744, iteration: 171907
loss: 1.0216010808944702,grad_norm: 0.9339109896938131, iteration: 171908
loss: 0.9964123368263245,grad_norm: 0.9999989992915008, iteration: 171909
loss: 1.0188826322555542,grad_norm: 0.9937427678036207, iteration: 171910
loss: 0.9844818115234375,grad_norm: 0.9642770687818099, iteration: 171911
loss: 1.0106629133224487,grad_norm: 0.9999992993143021, iteration: 171912
loss: 0.9639798402786255,grad_norm: 0.9999992695079576, iteration: 171913
loss: 0.9931889176368713,grad_norm: 0.9999991352969414, iteration: 171914
loss: 0.9533573985099792,grad_norm: 0.8568903366391274, iteration: 171915
loss: 0.9961585998535156,grad_norm: 0.9999992212411709, iteration: 171916
loss: 0.9825527667999268,grad_norm: 0.9999992923107996, iteration: 171917
loss: 0.9845471382141113,grad_norm: 0.9999991722950097, iteration: 171918
loss: 1.0180422067642212,grad_norm: 0.9999990897679769, iteration: 171919
loss: 1.006103515625,grad_norm: 0.9999998893950263, iteration: 171920
loss: 0.9990134835243225,grad_norm: 0.9999992493999007, iteration: 171921
loss: 1.0242825746536255,grad_norm: 0.9999994502034073, iteration: 171922
loss: 1.0104079246520996,grad_norm: 0.9999997897389281, iteration: 171923
loss: 0.9649931192398071,grad_norm: 0.9194702641046835, iteration: 171924
loss: 1.0615876913070679,grad_norm: 0.9999995515332949, iteration: 171925
loss: 0.9660836458206177,grad_norm: 0.9999990892967242, iteration: 171926
loss: 1.0181468725204468,grad_norm: 0.9999995018218774, iteration: 171927
loss: 0.9922436475753784,grad_norm: 0.9999989188629884, iteration: 171928
loss: 1.0046579837799072,grad_norm: 0.9999990245093957, iteration: 171929
loss: 0.9955031871795654,grad_norm: 0.9430861116088032, iteration: 171930
loss: 1.0099154710769653,grad_norm: 0.9999991393800086, iteration: 171931
loss: 1.0170148611068726,grad_norm: 0.9999991030882809, iteration: 171932
loss: 0.9667180776596069,grad_norm: 0.9999992235122906, iteration: 171933
loss: 1.0085740089416504,grad_norm: 0.9999989268539392, iteration: 171934
loss: 0.9922497868537903,grad_norm: 0.833585543723424, iteration: 171935
loss: 0.969843864440918,grad_norm: 0.967905386268003, iteration: 171936
loss: 1.0164262056350708,grad_norm: 0.9240723645512305, iteration: 171937
loss: 0.9927504062652588,grad_norm: 0.8477855581109146, iteration: 171938
loss: 1.0036087036132812,grad_norm: 0.999999041411033, iteration: 171939
loss: 1.020776629447937,grad_norm: 0.9078235153169437, iteration: 171940
loss: 1.0272736549377441,grad_norm: 0.9999992590026273, iteration: 171941
loss: 0.9635370969772339,grad_norm: 0.9999992102891792, iteration: 171942
loss: 0.9949326515197754,grad_norm: 0.8874585723420728, iteration: 171943
loss: 0.9951788783073425,grad_norm: 0.9774273925569726, iteration: 171944
loss: 0.9890347719192505,grad_norm: 0.9999989349145199, iteration: 171945
loss: 0.9615421891212463,grad_norm: 0.8763124911760864, iteration: 171946
loss: 0.9632525444030762,grad_norm: 0.9999990617933985, iteration: 171947
loss: 1.035010576248169,grad_norm: 0.9999992950013877, iteration: 171948
loss: 0.974369466304779,grad_norm: 0.9593078142615129, iteration: 171949
loss: 1.0162978172302246,grad_norm: 0.9999989975212408, iteration: 171950
loss: 1.017814040184021,grad_norm: 0.9999990899324572, iteration: 171951
loss: 1.0371476411819458,grad_norm: 0.9999994606286273, iteration: 171952
loss: 1.0047202110290527,grad_norm: 0.9999997119738602, iteration: 171953
loss: 1.0103507041931152,grad_norm: 0.9999994051517798, iteration: 171954
loss: 1.0188639163970947,grad_norm: 0.9559559759705539, iteration: 171955
loss: 0.9767338037490845,grad_norm: 0.9772639851907082, iteration: 171956
loss: 0.9906291365623474,grad_norm: 0.9999991994291768, iteration: 171957
loss: 0.9672124981880188,grad_norm: 0.840622233098497, iteration: 171958
loss: 1.0286506414413452,grad_norm: 0.9999992027224665, iteration: 171959
loss: 0.9953765869140625,grad_norm: 0.9515796050393869, iteration: 171960
loss: 1.0079737901687622,grad_norm: 0.9999991237696422, iteration: 171961
loss: 1.014906644821167,grad_norm: 0.99999916849947, iteration: 171962
loss: 1.0151342153549194,grad_norm: 0.999999840256791, iteration: 171963
loss: 0.986304759979248,grad_norm: 0.8550170079246814, iteration: 171964
loss: 1.0185178518295288,grad_norm: 0.9999991493847813, iteration: 171965
loss: 1.0464290380477905,grad_norm: 0.9999994297143942, iteration: 171966
loss: 0.9992966651916504,grad_norm: 0.9999990162258228, iteration: 171967
loss: 1.0292799472808838,grad_norm: 0.9999990768759569, iteration: 171968
loss: 1.1103266477584839,grad_norm: 0.9999992724585401, iteration: 171969
loss: 0.9921944737434387,grad_norm: 0.9999992686462694, iteration: 171970
loss: 1.0135163068771362,grad_norm: 0.9999990763296602, iteration: 171971
loss: 1.0106974840164185,grad_norm: 0.9999991148602447, iteration: 171972
loss: 1.008271336555481,grad_norm: 0.9039034109480258, iteration: 171973
loss: 0.9823459982872009,grad_norm: 0.9181237624086872, iteration: 171974
loss: 1.025604248046875,grad_norm: 0.9999989475498864, iteration: 171975
loss: 1.0340672731399536,grad_norm: 0.9999989745771537, iteration: 171976
loss: 0.9952471852302551,grad_norm: 0.999999032225774, iteration: 171977
loss: 1.016479253768921,grad_norm: 0.9999992624207998, iteration: 171978
loss: 1.0255305767059326,grad_norm: 0.8923995741156576, iteration: 171979
loss: 1.0094752311706543,grad_norm: 0.9939198818631222, iteration: 171980
loss: 1.0949968099594116,grad_norm: 0.9999999997803192, iteration: 171981
loss: 0.9593596458435059,grad_norm: 0.9999992730466334, iteration: 171982
loss: 1.059462547302246,grad_norm: 0.9999991839329035, iteration: 171983
loss: 0.9920900464057922,grad_norm: 0.9999990992670126, iteration: 171984
loss: 1.0254595279693604,grad_norm: 0.9999998357867711, iteration: 171985
loss: 0.9649646282196045,grad_norm: 0.9712912920467724, iteration: 171986
loss: 1.0525778532028198,grad_norm: 0.9999990077328174, iteration: 171987
loss: 1.0180455446243286,grad_norm: 0.9644804352021203, iteration: 171988
loss: 1.0402655601501465,grad_norm: 0.9999993974623091, iteration: 171989
loss: 0.9852721691131592,grad_norm: 0.999999102776618, iteration: 171990
loss: 1.01693594455719,grad_norm: 0.9999999142782128, iteration: 171991
loss: 1.030763030052185,grad_norm: 0.984443481579083, iteration: 171992
loss: 1.0739262104034424,grad_norm: 0.9999996537901715, iteration: 171993
loss: 1.0114247798919678,grad_norm: 0.9999991654884545, iteration: 171994
loss: 1.0324773788452148,grad_norm: 0.9999992860518656, iteration: 171995
loss: 1.0252790451049805,grad_norm: 0.9999990535135117, iteration: 171996
loss: 0.9949706196784973,grad_norm: 0.9230214662971676, iteration: 171997
loss: 1.0017777681350708,grad_norm: 0.9304279639459504, iteration: 171998
loss: 1.0006029605865479,grad_norm: 0.9891352397473138, iteration: 171999
loss: 1.0338616371154785,grad_norm: 0.999999490113864, iteration: 172000
loss: 1.1061846017837524,grad_norm: 0.9999996134629432, iteration: 172001
loss: 1.0578258037567139,grad_norm: 0.9999989401560566, iteration: 172002
loss: 1.0772422552108765,grad_norm: 0.9999991544952483, iteration: 172003
loss: 1.1209622621536255,grad_norm: 0.9999991638164588, iteration: 172004
loss: 1.0062110424041748,grad_norm: 0.9955316886056147, iteration: 172005
loss: 1.0920490026474,grad_norm: 0.9999993667992838, iteration: 172006
loss: 1.1167079210281372,grad_norm: 0.999999596975513, iteration: 172007
loss: 0.964541494846344,grad_norm: 0.9999991816307298, iteration: 172008
loss: 0.9981964826583862,grad_norm: 0.9999991905933394, iteration: 172009
loss: 1.1126030683517456,grad_norm: 0.9999992637428454, iteration: 172010
loss: 0.9887405633926392,grad_norm: 0.9999991027623483, iteration: 172011
loss: 0.9815027117729187,grad_norm: 0.9999990396399449, iteration: 172012
loss: 1.064369559288025,grad_norm: 0.9999997421669544, iteration: 172013
loss: 1.0170440673828125,grad_norm: 0.914815932236036, iteration: 172014
loss: 1.0441008806228638,grad_norm: 0.9999994395771842, iteration: 172015
loss: 1.0146338939666748,grad_norm: 0.9527771086565865, iteration: 172016
loss: 1.0349676609039307,grad_norm: 0.9371471064807757, iteration: 172017
loss: 0.9804158806800842,grad_norm: 0.8244302024040344, iteration: 172018
loss: 1.0347175598144531,grad_norm: 0.9999994012616235, iteration: 172019
loss: 1.0789979696273804,grad_norm: 0.9999996615404547, iteration: 172020
loss: 0.9888018369674683,grad_norm: 0.9999999030588268, iteration: 172021
loss: 0.951505184173584,grad_norm: 0.99999910837532, iteration: 172022
loss: 1.0092822313308716,grad_norm: 0.999999720712975, iteration: 172023
loss: 1.0930513143539429,grad_norm: 0.9999993993925593, iteration: 172024
loss: 1.0391379594802856,grad_norm: 0.9999990970042835, iteration: 172025
loss: 1.0421831607818604,grad_norm: 0.9999997995307063, iteration: 172026
loss: 0.9994547367095947,grad_norm: 0.9999991680723878, iteration: 172027
loss: 1.0193877220153809,grad_norm: 0.9999991878788453, iteration: 172028
loss: 1.023884892463684,grad_norm: 0.921270022700712, iteration: 172029
loss: 0.9917082786560059,grad_norm: 0.9434998096038026, iteration: 172030
loss: 1.0513949394226074,grad_norm: 0.9755349915816085, iteration: 172031
loss: 0.9897518754005432,grad_norm: 0.9358975248432422, iteration: 172032
loss: 0.9860642552375793,grad_norm: 0.999999203015138, iteration: 172033
loss: 1.0223965644836426,grad_norm: 0.899571720243543, iteration: 172034
loss: 0.9949281215667725,grad_norm: 0.8618055252762752, iteration: 172035
loss: 1.0405654907226562,grad_norm: 0.9999999115356533, iteration: 172036
loss: 0.9939736723899841,grad_norm: 0.8822490571081826, iteration: 172037
loss: 1.002551555633545,grad_norm: 0.9999991397721547, iteration: 172038
loss: 0.9641937017440796,grad_norm: 0.9999992422202851, iteration: 172039
loss: 1.0025445222854614,grad_norm: 0.998606728105079, iteration: 172040
loss: 1.0084154605865479,grad_norm: 0.933177129905219, iteration: 172041
loss: 1.0205882787704468,grad_norm: 0.9999994872490899, iteration: 172042
loss: 0.9513171911239624,grad_norm: 0.9999990092663534, iteration: 172043
loss: 1.0241185426712036,grad_norm: 0.9999993156737558, iteration: 172044
loss: 1.0721584558486938,grad_norm: 0.9999990516316657, iteration: 172045
loss: 0.9519217610359192,grad_norm: 0.9999991156966086, iteration: 172046
loss: 1.0383977890014648,grad_norm: 0.999999868575348, iteration: 172047
loss: 0.9894899129867554,grad_norm: 0.9999992588080153, iteration: 172048
loss: 1.0437520742416382,grad_norm: 0.9999997315551499, iteration: 172049
loss: 1.0803502798080444,grad_norm: 0.9999993932896016, iteration: 172050
loss: 1.0120360851287842,grad_norm: 0.9999993802221306, iteration: 172051
loss: 1.036097526550293,grad_norm: 0.8583975906343014, iteration: 172052
loss: 1.0254563093185425,grad_norm: 0.9999992114312553, iteration: 172053
loss: 0.9857643842697144,grad_norm: 0.9616715917103599, iteration: 172054
loss: 1.0023887157440186,grad_norm: 0.9093892443406206, iteration: 172055
loss: 1.067192554473877,grad_norm: 0.9999999441773271, iteration: 172056
loss: 0.9942443370819092,grad_norm: 0.9999997549585002, iteration: 172057
loss: 0.9839211106300354,grad_norm: 0.9832930134347655, iteration: 172058
loss: 0.9645622968673706,grad_norm: 0.9999992711577151, iteration: 172059
loss: 1.0769762992858887,grad_norm: 0.9999996358221424, iteration: 172060
loss: 1.0188347101211548,grad_norm: 0.927281916679093, iteration: 172061
loss: 0.96543288230896,grad_norm: 0.9999992281248408, iteration: 172062
loss: 0.9951846599578857,grad_norm: 0.9926684615674083, iteration: 172063
loss: 0.9791432619094849,grad_norm: 0.9240674885304886, iteration: 172064
loss: 1.0730971097946167,grad_norm: 0.9999997801290519, iteration: 172065
loss: 1.0349252223968506,grad_norm: 0.9999996466195763, iteration: 172066
loss: 0.9952840209007263,grad_norm: 0.9779832150285105, iteration: 172067
loss: 0.9984126091003418,grad_norm: 0.8730205113464032, iteration: 172068
loss: 1.0596554279327393,grad_norm: 0.9334276074384147, iteration: 172069
loss: 0.9847623705863953,grad_norm: 0.9999997663333992, iteration: 172070
loss: 0.987755537033081,grad_norm: 0.8760530993633433, iteration: 172071
loss: 0.988659143447876,grad_norm: 0.9999990495116879, iteration: 172072
loss: 1.031991958618164,grad_norm: 0.9999991429791606, iteration: 172073
loss: 1.0255473852157593,grad_norm: 0.9804748556776023, iteration: 172074
loss: 1.0032265186309814,grad_norm: 0.9999990632327935, iteration: 172075
loss: 0.961114227771759,grad_norm: 0.9174128566869122, iteration: 172076
loss: 1.0233665704727173,grad_norm: 0.8774840453559033, iteration: 172077
loss: 1.032190203666687,grad_norm: 0.8299405755284729, iteration: 172078
loss: 0.9817848801612854,grad_norm: 0.9120402253134912, iteration: 172079
loss: 0.9940698742866516,grad_norm: 0.9999991661770536, iteration: 172080
loss: 0.9963513612747192,grad_norm: 0.9999990964656935, iteration: 172081
loss: 1.0776548385620117,grad_norm: 0.9999993287422481, iteration: 172082
loss: 1.00899076461792,grad_norm: 0.9999997697695929, iteration: 172083
loss: 0.9540226459503174,grad_norm: 0.9999990569894173, iteration: 172084
loss: 0.9927905201911926,grad_norm: 0.9457822395052369, iteration: 172085
loss: 1.0049128532409668,grad_norm: 0.8900878231981394, iteration: 172086
loss: 0.9575640559196472,grad_norm: 0.9999992190622325, iteration: 172087
loss: 0.9773672223091125,grad_norm: 0.9679951019179086, iteration: 172088
loss: 0.984952449798584,grad_norm: 0.9999991636974006, iteration: 172089
loss: 0.9936876893043518,grad_norm: 0.9999990815836349, iteration: 172090
loss: 1.1352078914642334,grad_norm: 0.9999996249243804, iteration: 172091
loss: 0.9695954918861389,grad_norm: 0.9999990087189043, iteration: 172092
loss: 0.9998026490211487,grad_norm: 0.9999992770231781, iteration: 172093
loss: 1.010105848312378,grad_norm: 0.8964442344056416, iteration: 172094
loss: 0.9888684749603271,grad_norm: 0.9687438426749259, iteration: 172095
loss: 1.0290850400924683,grad_norm: 0.9999991431715242, iteration: 172096
loss: 1.139957070350647,grad_norm: 0.9999995848388255, iteration: 172097
loss: 0.977847158908844,grad_norm: 0.9999990828398144, iteration: 172098
loss: 1.0175997018814087,grad_norm: 0.8130243708347387, iteration: 172099
loss: 1.0215568542480469,grad_norm: 0.9999991370112356, iteration: 172100
loss: 1.010345220565796,grad_norm: 0.9999990484664384, iteration: 172101
loss: 1.0269978046417236,grad_norm: 0.9999990752009208, iteration: 172102
loss: 0.9845632910728455,grad_norm: 0.8415524775785216, iteration: 172103
loss: 1.016701579093933,grad_norm: 0.8956581285057327, iteration: 172104
loss: 0.9667425155639648,grad_norm: 0.8264702221920236, iteration: 172105
loss: 1.0656660795211792,grad_norm: 0.9999998349993269, iteration: 172106
loss: 0.9938748478889465,grad_norm: 0.9868988630841032, iteration: 172107
loss: 1.0591813325881958,grad_norm: 0.9999994797186403, iteration: 172108
loss: 1.0274654626846313,grad_norm: 0.9617600205894853, iteration: 172109
loss: 1.0126116275787354,grad_norm: 0.9816671374234301, iteration: 172110
loss: 1.0215911865234375,grad_norm: 0.9999991841590322, iteration: 172111
loss: 0.9992361068725586,grad_norm: 0.9999990014759509, iteration: 172112
loss: 1.0224202871322632,grad_norm: 0.9999999195044984, iteration: 172113
loss: 1.0275769233703613,grad_norm: 0.9999991386576179, iteration: 172114
loss: 1.0521714687347412,grad_norm: 0.9999990014486242, iteration: 172115
loss: 1.0310441255569458,grad_norm: 0.9999994845296128, iteration: 172116
loss: 1.0214091539382935,grad_norm: 0.9933482742674891, iteration: 172117
loss: 1.005247950553894,grad_norm: 0.9999991046822707, iteration: 172118
loss: 0.9676290154457092,grad_norm: 0.9818048705422756, iteration: 172119
loss: 0.996962308883667,grad_norm: 0.8897477241408748, iteration: 172120
loss: 0.9915140271186829,grad_norm: 0.9999995026172175, iteration: 172121
loss: 1.0221232175827026,grad_norm: 0.9999992226258332, iteration: 172122
loss: 1.0869778394699097,grad_norm: 0.9999998032127557, iteration: 172123
loss: 0.9783876538276672,grad_norm: 0.9999990601509899, iteration: 172124
loss: 1.0152150392532349,grad_norm: 0.9999992058300886, iteration: 172125
loss: 1.0060704946517944,grad_norm: 0.9460551340418425, iteration: 172126
loss: 0.9866642951965332,grad_norm: 0.999999016358441, iteration: 172127
loss: 0.9743825793266296,grad_norm: 0.9999991589030234, iteration: 172128
loss: 0.9913256764411926,grad_norm: 0.9999990528572191, iteration: 172129
loss: 0.9684328436851501,grad_norm: 0.9999994719027614, iteration: 172130
loss: 0.9712467193603516,grad_norm: 0.9999992046446676, iteration: 172131
loss: 1.0245656967163086,grad_norm: 0.9999992073353343, iteration: 172132
loss: 1.003953218460083,grad_norm: 0.9999993174891022, iteration: 172133
loss: 1.0250190496444702,grad_norm: 0.9999991860069183, iteration: 172134
loss: 1.0051701068878174,grad_norm: 0.8427123032782894, iteration: 172135
loss: 1.0198895931243896,grad_norm: 0.9999993554337298, iteration: 172136
loss: 1.0306601524353027,grad_norm: 0.8323204978931973, iteration: 172137
loss: 0.9794878363609314,grad_norm: 0.8779847402786991, iteration: 172138
loss: 1.0199182033538818,grad_norm: 0.999999024576667, iteration: 172139
loss: 1.080409049987793,grad_norm: 0.9999990510867466, iteration: 172140
loss: 0.9909141659736633,grad_norm: 0.9999993169126774, iteration: 172141
loss: 1.0143872499465942,grad_norm: 0.9999995927503988, iteration: 172142
loss: 0.9874563813209534,grad_norm: 0.9999992189334282, iteration: 172143
loss: 1.1368255615234375,grad_norm: 0.9999997864864859, iteration: 172144
loss: 0.978294849395752,grad_norm: 0.8873144417635606, iteration: 172145
loss: 1.0211695432662964,grad_norm: 0.999999122041509, iteration: 172146
loss: 1.0375823974609375,grad_norm: 0.9999992215517407, iteration: 172147
loss: 1.0325188636779785,grad_norm: 0.976104130355114, iteration: 172148
loss: 1.0064789056777954,grad_norm: 0.9999991883510787, iteration: 172149
loss: 1.0390836000442505,grad_norm: 0.9999989824202, iteration: 172150
loss: 1.00877845287323,grad_norm: 0.8811362907277215, iteration: 172151
loss: 1.0163969993591309,grad_norm: 0.9621377344557919, iteration: 172152
loss: 1.001886248588562,grad_norm: 0.9999991232296694, iteration: 172153
loss: 1.0247951745986938,grad_norm: 0.9999990348807185, iteration: 172154
loss: 0.988067090511322,grad_norm: 0.9999992299146754, iteration: 172155
loss: 0.9900411367416382,grad_norm: 0.9999991503580244, iteration: 172156
loss: 1.0240113735198975,grad_norm: 0.9999999028917846, iteration: 172157
loss: 0.9791052341461182,grad_norm: 0.9999990544132195, iteration: 172158
loss: 1.1978785991668701,grad_norm: 0.9999994411309606, iteration: 172159
loss: 0.9761648178100586,grad_norm: 0.9999992292400918, iteration: 172160
loss: 1.0405699014663696,grad_norm: 0.9999991437404103, iteration: 172161
loss: 0.9818109273910522,grad_norm: 0.9177170263493791, iteration: 172162
loss: 0.9784618616104126,grad_norm: 0.9999993568954502, iteration: 172163
loss: 0.9756927490234375,grad_norm: 0.8982249474825009, iteration: 172164
loss: 0.9885342121124268,grad_norm: 0.9763869640728694, iteration: 172165
loss: 1.0135318040847778,grad_norm: 0.9622801681519343, iteration: 172166
loss: 0.9793962240219116,grad_norm: 0.9561621166371115, iteration: 172167
loss: 0.9983978271484375,grad_norm: 0.966140264576167, iteration: 172168
loss: 1.0126525163650513,grad_norm: 0.9999990495069957, iteration: 172169
loss: 1.0448393821716309,grad_norm: 0.9999997404692262, iteration: 172170
loss: 1.0145549774169922,grad_norm: 0.999999602469858, iteration: 172171
loss: 0.9962270259857178,grad_norm: 0.9999995721027163, iteration: 172172
loss: 0.9891092777252197,grad_norm: 0.9302446914536888, iteration: 172173
loss: 0.9784064292907715,grad_norm: 0.9999992790672593, iteration: 172174
loss: 0.9957557320594788,grad_norm: 0.9999990895438743, iteration: 172175
loss: 1.0216953754425049,grad_norm: 0.9350981800814274, iteration: 172176
loss: 0.9946449398994446,grad_norm: 0.9999990448991523, iteration: 172177
loss: 1.013217806816101,grad_norm: 0.9999992367764974, iteration: 172178
loss: 0.9723033905029297,grad_norm: 0.930854344043489, iteration: 172179
loss: 1.0518454313278198,grad_norm: 0.9999999700179638, iteration: 172180
loss: 0.9997103214263916,grad_norm: 0.8075163895788717, iteration: 172181
loss: 1.013951063156128,grad_norm: 0.8637737085837572, iteration: 172182
loss: 0.9937574863433838,grad_norm: 0.9999991009294323, iteration: 172183
loss: 1.03500235080719,grad_norm: 0.9891476438582824, iteration: 172184
loss: 1.0379966497421265,grad_norm: 0.9999996130173113, iteration: 172185
loss: 1.0018150806427002,grad_norm: 0.9999989946395653, iteration: 172186
loss: 1.001206636428833,grad_norm: 0.9881295855447239, iteration: 172187
loss: 1.046689748764038,grad_norm: 0.9806700478071538, iteration: 172188
loss: 0.9839866757392883,grad_norm: 0.9999990131414975, iteration: 172189
loss: 0.9993817806243896,grad_norm: 0.9999989862465917, iteration: 172190
loss: 1.0089519023895264,grad_norm: 0.9999991524547874, iteration: 172191
loss: 1.0856965780258179,grad_norm: 0.9999993392841932, iteration: 172192
loss: 1.0136942863464355,grad_norm: 0.9999991993394007, iteration: 172193
loss: 0.9428683519363403,grad_norm: 0.9999990507461004, iteration: 172194
loss: 0.9848905801773071,grad_norm: 0.9918103884543403, iteration: 172195
loss: 0.993056058883667,grad_norm: 0.9999990707387617, iteration: 172196
loss: 0.9817305207252502,grad_norm: 0.9999993000458434, iteration: 172197
loss: 1.065353512763977,grad_norm: 0.9999991416470398, iteration: 172198
loss: 1.1615664958953857,grad_norm: 0.9999998502635994, iteration: 172199
loss: 0.9792758226394653,grad_norm: 0.934865974763856, iteration: 172200
loss: 1.0081923007965088,grad_norm: 0.9999994214927056, iteration: 172201
loss: 1.0644992589950562,grad_norm: 0.9999991121629106, iteration: 172202
loss: 1.0002881288528442,grad_norm: 0.9908950415450832, iteration: 172203
loss: 1.0879724025726318,grad_norm: 0.9999999236973134, iteration: 172204
loss: 1.0096213817596436,grad_norm: 0.9999992221022715, iteration: 172205
loss: 1.008454442024231,grad_norm: 0.9999992036541709, iteration: 172206
loss: 0.9830330610275269,grad_norm: 0.9015851477743674, iteration: 172207
loss: 1.122085690498352,grad_norm: 0.90224255140564, iteration: 172208
loss: 1.0200088024139404,grad_norm: 0.9864060160058169, iteration: 172209
loss: 1.044671654701233,grad_norm: 0.999999401124376, iteration: 172210
loss: 0.993069589138031,grad_norm: 0.9373169954809061, iteration: 172211
loss: 1.024011254310608,grad_norm: 0.9999990957337185, iteration: 172212
loss: 0.992523729801178,grad_norm: 0.9929123228669123, iteration: 172213
loss: 1.0215073823928833,grad_norm: 0.9656828866407535, iteration: 172214
loss: 1.0343683958053589,grad_norm: 0.9906309774040207, iteration: 172215
loss: 0.9975305795669556,grad_norm: 0.9999990681663551, iteration: 172216
loss: 0.9588279724121094,grad_norm: 0.999999061224362, iteration: 172217
loss: 1.0359861850738525,grad_norm: 0.9999995389583504, iteration: 172218
loss: 1.0151389837265015,grad_norm: 0.9999998874005779, iteration: 172219
loss: 0.9494427442550659,grad_norm: 0.9259856611869892, iteration: 172220
loss: 1.0555344820022583,grad_norm: 0.9999997633519888, iteration: 172221
loss: 1.0143537521362305,grad_norm: 0.9999994200347747, iteration: 172222
loss: 1.0075234174728394,grad_norm: 0.9999991303014356, iteration: 172223
loss: 1.0191564559936523,grad_norm: 0.9002485245492285, iteration: 172224
loss: 0.9942495226860046,grad_norm: 0.9932201695434084, iteration: 172225
loss: 1.0753118991851807,grad_norm: 0.9999998715118367, iteration: 172226
loss: 1.1043208837509155,grad_norm: 0.9999997927999724, iteration: 172227
loss: 0.9886993765830994,grad_norm: 0.9999989083047143, iteration: 172228
loss: 1.0639588832855225,grad_norm: 0.9999995202978766, iteration: 172229
loss: 0.9950926899909973,grad_norm: 0.8784078210063667, iteration: 172230
loss: 0.9824451804161072,grad_norm: 0.9192056438702966, iteration: 172231
loss: 1.0002211332321167,grad_norm: 0.9888754161555999, iteration: 172232
loss: 1.0382384061813354,grad_norm: 0.9999991678189842, iteration: 172233
loss: 0.987430989742279,grad_norm: 0.9537703153962601, iteration: 172234
loss: 0.9978930950164795,grad_norm: 0.8733377985023336, iteration: 172235
loss: 0.9807981252670288,grad_norm: 0.8927998393922714, iteration: 172236
loss: 1.0012229681015015,grad_norm: 0.9999991286181308, iteration: 172237
loss: 0.99305260181427,grad_norm: 0.9999998882859749, iteration: 172238
loss: 1.001867413520813,grad_norm: 0.8858781638786818, iteration: 172239
loss: 1.0268127918243408,grad_norm: 0.9999991970021888, iteration: 172240
loss: 1.0044106245040894,grad_norm: 0.914146364661796, iteration: 172241
loss: 0.990906298160553,grad_norm: 0.9410936401409639, iteration: 172242
loss: 1.0282261371612549,grad_norm: 0.983694647385733, iteration: 172243
loss: 1.0062553882598877,grad_norm: 0.9999991416286351, iteration: 172244
loss: 1.0163154602050781,grad_norm: 0.999999282527194, iteration: 172245
loss: 1.0179611444473267,grad_norm: 0.9136403715310867, iteration: 172246
loss: 0.9778793454170227,grad_norm: 0.9336337886650943, iteration: 172247
loss: 1.0592625141143799,grad_norm: 0.9999990781728093, iteration: 172248
loss: 1.0608432292938232,grad_norm: 0.9999991467345318, iteration: 172249
loss: 1.0386333465576172,grad_norm: 0.8672146707004709, iteration: 172250
loss: 1.1300735473632812,grad_norm: 0.9999992649636965, iteration: 172251
loss: 1.055394172668457,grad_norm: 0.9999992308692591, iteration: 172252
loss: 1.0414538383483887,grad_norm: 0.9999997857030525, iteration: 172253
loss: 0.9884369969367981,grad_norm: 0.8683919210279342, iteration: 172254
loss: 1.0051076412200928,grad_norm: 0.9999992010227126, iteration: 172255
loss: 0.9833619594573975,grad_norm: 0.9999990645369525, iteration: 172256
loss: 1.0003927946090698,grad_norm: 0.9999989803425103, iteration: 172257
loss: 1.013850212097168,grad_norm: 0.9999997739964399, iteration: 172258
loss: 1.0139669179916382,grad_norm: 0.9999991457855745, iteration: 172259
loss: 0.9887627363204956,grad_norm: 0.9660295907832295, iteration: 172260
loss: 0.9940506219863892,grad_norm: 0.9999991993961871, iteration: 172261
loss: 1.0228255987167358,grad_norm: 0.9999997989194352, iteration: 172262
loss: 1.0257235765457153,grad_norm: 0.9999990960181315, iteration: 172263
loss: 1.0293855667114258,grad_norm: 0.999999083970715, iteration: 172264
loss: 1.0198278427124023,grad_norm: 0.9999994978550687, iteration: 172265
loss: 1.035277009010315,grad_norm: 0.9999991790912043, iteration: 172266
loss: 1.0375677347183228,grad_norm: 0.9999991166559526, iteration: 172267
loss: 1.1181093454360962,grad_norm: 0.9999995775847287, iteration: 172268
loss: 0.9938055872917175,grad_norm: 0.9741873747824862, iteration: 172269
loss: 0.9424747228622437,grad_norm: 0.9733559643332632, iteration: 172270
loss: 0.9718737006187439,grad_norm: 0.9523480896153773, iteration: 172271
loss: 1.0174099206924438,grad_norm: 0.9672627713487799, iteration: 172272
loss: 1.0114721059799194,grad_norm: 0.9205146802463852, iteration: 172273
loss: 1.017594575881958,grad_norm: 0.9999991327563175, iteration: 172274
loss: 0.9832236766815186,grad_norm: 0.9203288312334894, iteration: 172275
loss: 0.9916151165962219,grad_norm: 0.9999991399326039, iteration: 172276
loss: 0.98907071352005,grad_norm: 0.9999993740735127, iteration: 172277
loss: 1.0125672817230225,grad_norm: 0.9787069550029911, iteration: 172278
loss: 0.9844342470169067,grad_norm: 0.9718708154048687, iteration: 172279
loss: 1.0089737176895142,grad_norm: 0.9336917955842245, iteration: 172280
loss: 1.0166587829589844,grad_norm: 0.9999998592912661, iteration: 172281
loss: 0.9849595427513123,grad_norm: 0.96365360446812, iteration: 172282
loss: 1.0050581693649292,grad_norm: 0.9999991703691409, iteration: 172283
loss: 0.9977028369903564,grad_norm: 0.9999990477740063, iteration: 172284
loss: 1.0002408027648926,grad_norm: 0.8680205868571444, iteration: 172285
loss: 1.0485308170318604,grad_norm: 0.9999996018992084, iteration: 172286
loss: 1.0448486804962158,grad_norm: 0.9999996434533064, iteration: 172287
loss: 0.9927368760108948,grad_norm: 0.9999992449313205, iteration: 172288
loss: 1.1787192821502686,grad_norm: 0.9999993668211566, iteration: 172289
loss: 1.0056483745574951,grad_norm: 0.9999992595131115, iteration: 172290
loss: 1.0628933906555176,grad_norm: 0.9472814575620646, iteration: 172291
loss: 1.0960261821746826,grad_norm: 0.9999998144973606, iteration: 172292
loss: 1.025224208831787,grad_norm: 0.999999055232014, iteration: 172293
loss: 1.010062575340271,grad_norm: 0.9999995349622901, iteration: 172294
loss: 1.0033166408538818,grad_norm: 0.9999991118330178, iteration: 172295
loss: 1.0288292169570923,grad_norm: 0.9799256380148831, iteration: 172296
loss: 1.079946756362915,grad_norm: 0.9999998847376679, iteration: 172297
loss: 1.153434157371521,grad_norm: 0.999999960515735, iteration: 172298
loss: 1.0823005437850952,grad_norm: 0.9999996898871433, iteration: 172299
loss: 1.0275322198867798,grad_norm: 0.9999991274803721, iteration: 172300
loss: 1.1064221858978271,grad_norm: 1.0000000145445387, iteration: 172301
loss: 1.040665626525879,grad_norm: 0.933537699229878, iteration: 172302
loss: 0.9604488611221313,grad_norm: 0.9999997260225466, iteration: 172303
loss: 1.0649863481521606,grad_norm: 0.9999993631399958, iteration: 172304
loss: 1.0310628414154053,grad_norm: 0.999999817738184, iteration: 172305
loss: 1.0111266374588013,grad_norm: 0.9791440031650842, iteration: 172306
loss: 1.0495692491531372,grad_norm: 0.9999998588159653, iteration: 172307
loss: 1.2083280086517334,grad_norm: 0.9999997884218885, iteration: 172308
loss: 1.215431809425354,grad_norm: 0.9999998728577787, iteration: 172309
loss: 1.2231053113937378,grad_norm: 0.9999998379786202, iteration: 172310
loss: 1.1512095928192139,grad_norm: 0.9999995237882602, iteration: 172311
loss: 1.0866795778274536,grad_norm: 0.9999997802544475, iteration: 172312
loss: 1.2651771306991577,grad_norm: 0.9999998418869062, iteration: 172313
loss: 1.0122921466827393,grad_norm: 0.99999972926663, iteration: 172314
loss: 1.029166579246521,grad_norm: 0.9999995999633426, iteration: 172315
loss: 0.9932585954666138,grad_norm: 0.9999991965701523, iteration: 172316
loss: 1.163281798362732,grad_norm: 0.9999996936677106, iteration: 172317
loss: 1.259625792503357,grad_norm: 0.9999998068980535, iteration: 172318
loss: 1.0278939008712769,grad_norm: 0.9999996685751419, iteration: 172319
loss: 1.0690065622329712,grad_norm: 0.9999991271313099, iteration: 172320
loss: 1.0822030305862427,grad_norm: 0.9999990665003154, iteration: 172321
loss: 1.0471748113632202,grad_norm: 0.9999998326672281, iteration: 172322
loss: 1.108374834060669,grad_norm: 0.9999991493046038, iteration: 172323
loss: 1.0151630640029907,grad_norm: 0.9999994725285594, iteration: 172324
loss: 0.9792178273200989,grad_norm: 0.9314398313529779, iteration: 172325
loss: 1.0946160554885864,grad_norm: 0.9999993100425278, iteration: 172326
loss: 0.9898768067359924,grad_norm: 0.9999991424712085, iteration: 172327
loss: 0.9549455642700195,grad_norm: 0.9999991432271665, iteration: 172328
loss: 1.009647250175476,grad_norm: 0.9251380710898419, iteration: 172329
loss: 0.9630016088485718,grad_norm: 0.9999991355078427, iteration: 172330
loss: 1.0041029453277588,grad_norm: 0.9999992404974167, iteration: 172331
loss: 1.1309542655944824,grad_norm: 0.999999843339227, iteration: 172332
loss: 0.9842957854270935,grad_norm: 0.9999990973489525, iteration: 172333
loss: 1.093034267425537,grad_norm: 0.9999995130253301, iteration: 172334
loss: 1.007352590560913,grad_norm: 0.9999990806227317, iteration: 172335
loss: 0.9695711731910706,grad_norm: 0.9348211081943445, iteration: 172336
loss: 0.9937678575515747,grad_norm: 0.8897896788407001, iteration: 172337
loss: 0.9932149052619934,grad_norm: 0.9999991786836684, iteration: 172338
loss: 1.0554429292678833,grad_norm: 0.9999996407908021, iteration: 172339
loss: 0.9433186650276184,grad_norm: 0.9999990990031888, iteration: 172340
loss: 1.0840532779693604,grad_norm: 0.9999999473241179, iteration: 172341
loss: 1.0161316394805908,grad_norm: 0.999999880047607, iteration: 172342
loss: 1.0220115184783936,grad_norm: 0.9181386256767505, iteration: 172343
loss: 0.9921066761016846,grad_norm: 0.9999990769611318, iteration: 172344
loss: 1.0071563720703125,grad_norm: 0.9999990892285462, iteration: 172345
loss: 1.0523335933685303,grad_norm: 0.9999994135321277, iteration: 172346
loss: 0.988466739654541,grad_norm: 0.9862190119696854, iteration: 172347
loss: 0.9839968681335449,grad_norm: 0.8594982370218212, iteration: 172348
loss: 1.1029127836227417,grad_norm: 0.9999995413511533, iteration: 172349
loss: 1.0987955331802368,grad_norm: 0.9999998669707306, iteration: 172350
loss: 1.0744316577911377,grad_norm: 0.9999995478554196, iteration: 172351
loss: 1.0215322971343994,grad_norm: 0.9999993471572609, iteration: 172352
loss: 1.2716329097747803,grad_norm: 0.9999999664555143, iteration: 172353
loss: 1.117346167564392,grad_norm: 0.9999999455155454, iteration: 172354
loss: 1.0210739374160767,grad_norm: 0.999999101330617, iteration: 172355
loss: 1.0246342420578003,grad_norm: 0.9999997100853375, iteration: 172356
loss: 0.9855015873908997,grad_norm: 0.9999990939973394, iteration: 172357
loss: 1.0017094612121582,grad_norm: 0.8312963340591923, iteration: 172358
loss: 0.9821571111679077,grad_norm: 0.9999991168414567, iteration: 172359
loss: 0.9831650257110596,grad_norm: 0.9999991112811374, iteration: 172360
loss: 0.9763665199279785,grad_norm: 0.9999991457537574, iteration: 172361
loss: 0.9822454452514648,grad_norm: 0.9999993034076669, iteration: 172362
loss: 1.0051281452178955,grad_norm: 0.9418012063737798, iteration: 172363
loss: 0.9711675047874451,grad_norm: 0.9265664215232027, iteration: 172364
loss: 1.0199780464172363,grad_norm: 0.9999998416122174, iteration: 172365
loss: 0.9741833209991455,grad_norm: 0.9982606766599763, iteration: 172366
loss: 1.0205172300338745,grad_norm: 0.9412167471730505, iteration: 172367
loss: 0.9834652543067932,grad_norm: 0.9999991122280222, iteration: 172368
loss: 1.0044747591018677,grad_norm: 0.9999991590864061, iteration: 172369
loss: 1.0436866283416748,grad_norm: 0.9850705534075593, iteration: 172370
loss: 1.0059796571731567,grad_norm: 0.9999992941223497, iteration: 172371
loss: 1.0454474687576294,grad_norm: 0.8729419474385628, iteration: 172372
loss: 1.0318739414215088,grad_norm: 0.9999990965148552, iteration: 172373
loss: 0.9960870146751404,grad_norm: 0.8775110429639191, iteration: 172374
loss: 0.9862577319145203,grad_norm: 0.9023588994570002, iteration: 172375
loss: 0.9949424266815186,grad_norm: 0.9999993211881364, iteration: 172376
loss: 0.9576172828674316,grad_norm: 0.9999992145107672, iteration: 172377
loss: 1.015864610671997,grad_norm: 0.9999992167199884, iteration: 172378
loss: 1.0196235179901123,grad_norm: 0.985893765571196, iteration: 172379
loss: 0.9574285745620728,grad_norm: 0.9999989802998104, iteration: 172380
loss: 1.013972520828247,grad_norm: 0.8444359262289802, iteration: 172381
loss: 0.9869495630264282,grad_norm: 0.9529905568700668, iteration: 172382
loss: 0.9920145273208618,grad_norm: 0.94155685256671, iteration: 172383
loss: 1.044252872467041,grad_norm: 0.9999993243792412, iteration: 172384
loss: 1.0104562044143677,grad_norm: 0.9999990096158407, iteration: 172385
loss: 0.9949315786361694,grad_norm: 0.8557453178306293, iteration: 172386
loss: 1.0180740356445312,grad_norm: 0.9999992086455659, iteration: 172387
loss: 0.9806686639785767,grad_norm: 0.9999990235447397, iteration: 172388
loss: 1.0688410997390747,grad_norm: 0.9999997268162547, iteration: 172389
loss: 0.9967266321182251,grad_norm: 0.9662151473011799, iteration: 172390
loss: 0.987568199634552,grad_norm: 0.9467459643880678, iteration: 172391
loss: 1.0257031917572021,grad_norm: 0.9999990905360574, iteration: 172392
loss: 0.9954248070716858,grad_norm: 0.9999991862815855, iteration: 172393
loss: 1.0060158967971802,grad_norm: 0.9839142227540265, iteration: 172394
loss: 0.9923502802848816,grad_norm: 0.9999995729133446, iteration: 172395
loss: 1.0224963426589966,grad_norm: 0.945799312481141, iteration: 172396
loss: 1.0124813318252563,grad_norm: 0.90083793512084, iteration: 172397
loss: 1.0268208980560303,grad_norm: 0.999999050613261, iteration: 172398
loss: 0.9920278191566467,grad_norm: 0.9081327713812403, iteration: 172399
loss: 1.0167745351791382,grad_norm: 0.9999991917381298, iteration: 172400
loss: 0.9998739361763,grad_norm: 0.9999989623206734, iteration: 172401
loss: 1.0197829008102417,grad_norm: 0.9999994905713434, iteration: 172402
loss: 1.032401442527771,grad_norm: 0.9999992447973005, iteration: 172403
loss: 0.9908556342124939,grad_norm: 0.9999991145263307, iteration: 172404
loss: 0.9918106198310852,grad_norm: 0.9835717189526212, iteration: 172405
loss: 1.0258313417434692,grad_norm: 0.9999995476076563, iteration: 172406
loss: 1.0025296211242676,grad_norm: 0.9999990498425645, iteration: 172407
loss: 1.0129464864730835,grad_norm: 0.7922817481954515, iteration: 172408
loss: 1.004837989807129,grad_norm: 0.9039160967321087, iteration: 172409
loss: 0.9937396049499512,grad_norm: 0.999999146066773, iteration: 172410
loss: 1.0010067224502563,grad_norm: 0.8376016708812196, iteration: 172411
loss: 1.0024129152297974,grad_norm: 0.8987703111250548, iteration: 172412
loss: 0.9679838418960571,grad_norm: 0.9999991150280942, iteration: 172413
loss: 1.0159854888916016,grad_norm: 0.9999991608468949, iteration: 172414
loss: 0.9882097244262695,grad_norm: 0.9999990552643985, iteration: 172415
loss: 0.9991414546966553,grad_norm: 0.8440261639074953, iteration: 172416
loss: 1.0201491117477417,grad_norm: 0.938983414838316, iteration: 172417
loss: 1.0198485851287842,grad_norm: 0.999998989230169, iteration: 172418
loss: 1.007075548171997,grad_norm: 0.9668219888648939, iteration: 172419
loss: 0.9702313542366028,grad_norm: 0.9999994026979592, iteration: 172420
loss: 1.0280262231826782,grad_norm: 0.9393163750664677, iteration: 172421
loss: 0.9791566729545593,grad_norm: 0.9862185331454147, iteration: 172422
loss: 1.0110681056976318,grad_norm: 0.9999992468740613, iteration: 172423
loss: 0.9904112815856934,grad_norm: 0.9726987728708888, iteration: 172424
loss: 1.110426664352417,grad_norm: 0.9999991619166853, iteration: 172425
loss: 1.0520809888839722,grad_norm: 0.9999993925605293, iteration: 172426
loss: 1.0001118183135986,grad_norm: 0.9999991674512698, iteration: 172427
loss: 0.9937206506729126,grad_norm: 0.9999991163360341, iteration: 172428
loss: 0.9975684881210327,grad_norm: 0.9158345120808157, iteration: 172429
loss: 0.9981803297996521,grad_norm: 0.8870946873647386, iteration: 172430
loss: 0.9973759651184082,grad_norm: 0.999999004464586, iteration: 172431
loss: 0.9847701787948608,grad_norm: 0.9999992724295239, iteration: 172432
loss: 1.030856728553772,grad_norm: 0.9999991508224668, iteration: 172433
loss: 0.9674409627914429,grad_norm: 0.988376019289698, iteration: 172434
loss: 1.0152919292449951,grad_norm: 0.875003778190688, iteration: 172435
loss: 0.9943264126777649,grad_norm: 0.9626590674465197, iteration: 172436
loss: 1.018243432044983,grad_norm: 0.8772018448406669, iteration: 172437
loss: 1.0339628458023071,grad_norm: 0.9999998628076511, iteration: 172438
loss: 0.9929816126823425,grad_norm: 0.9999992849696062, iteration: 172439
loss: 1.0547385215759277,grad_norm: 0.999999281881248, iteration: 172440
loss: 0.9655159115791321,grad_norm: 0.93493248924544, iteration: 172441
loss: 1.0885679721832275,grad_norm: 0.9999990571162864, iteration: 172442
loss: 1.0191553831100464,grad_norm: 0.9999991945398965, iteration: 172443
loss: 0.9785926938056946,grad_norm: 0.9999997192363119, iteration: 172444
loss: 1.020167350769043,grad_norm: 0.9999991539569681, iteration: 172445
loss: 0.9887512922286987,grad_norm: 0.9249443983550406, iteration: 172446
loss: 0.9442680478096008,grad_norm: 0.9999989613718243, iteration: 172447
loss: 0.9783417582511902,grad_norm: 0.9999990743963144, iteration: 172448
loss: 1.008482813835144,grad_norm: 0.9999992772186523, iteration: 172449
loss: 1.0156880617141724,grad_norm: 0.9206728978341621, iteration: 172450
loss: 1.000406265258789,grad_norm: 0.9999991998887252, iteration: 172451
loss: 0.9792013168334961,grad_norm: 0.9999990688635912, iteration: 172452
loss: 1.1596629619598389,grad_norm: 0.9999998623360794, iteration: 172453
loss: 0.9710599780082703,grad_norm: 0.9999990557431302, iteration: 172454
loss: 1.023654818534851,grad_norm: 0.9999993810655918, iteration: 172455
loss: 0.9885391592979431,grad_norm: 0.9999991081143379, iteration: 172456
loss: 1.0078625679016113,grad_norm: 0.999999045065245, iteration: 172457
loss: 1.036688208580017,grad_norm: 0.9999992481030315, iteration: 172458
loss: 1.0432347059249878,grad_norm: 0.9999992547923743, iteration: 172459
loss: 1.0144602060317993,grad_norm: 0.9754140252629646, iteration: 172460
loss: 0.9631338119506836,grad_norm: 0.9764703523136272, iteration: 172461
loss: 0.9895709156990051,grad_norm: 0.9015003923525895, iteration: 172462
loss: 0.9937590956687927,grad_norm: 0.9999990224388646, iteration: 172463
loss: 0.9835614562034607,grad_norm: 0.8761473187255984, iteration: 172464
loss: 1.0042181015014648,grad_norm: 0.9999990603817939, iteration: 172465
loss: 1.004110336303711,grad_norm: 0.8826774164608127, iteration: 172466
loss: 0.9926198124885559,grad_norm: 0.8469715023187167, iteration: 172467
loss: 1.008557915687561,grad_norm: 0.9999992798733238, iteration: 172468
loss: 1.0047696828842163,grad_norm: 0.9999990457331462, iteration: 172469
loss: 0.967037558555603,grad_norm: 0.9277654149300897, iteration: 172470
loss: 0.9775692820549011,grad_norm: 0.9999993289137815, iteration: 172471
loss: 0.9904108643531799,grad_norm: 0.8606471074250378, iteration: 172472
loss: 0.9812751412391663,grad_norm: 0.9999991823105684, iteration: 172473
loss: 0.986579954624176,grad_norm: 0.8685549585734285, iteration: 172474
loss: 1.0861209630966187,grad_norm: 0.999999415070768, iteration: 172475
loss: 1.0030301809310913,grad_norm: 0.999999108008874, iteration: 172476
loss: 1.0010138750076294,grad_norm: 0.9999998611302312, iteration: 172477
loss: 0.9863181710243225,grad_norm: 0.9999990332807203, iteration: 172478
loss: 0.9944373965263367,grad_norm: 0.9999992669830108, iteration: 172479
loss: 1.019014835357666,grad_norm: 0.987878622439269, iteration: 172480
loss: 0.9941684603691101,grad_norm: 0.9367574637925666, iteration: 172481
loss: 0.9895848035812378,grad_norm: 0.9999990734608363, iteration: 172482
loss: 0.9831233620643616,grad_norm: 0.8424567568796659, iteration: 172483
loss: 1.0796090364456177,grad_norm: 0.9999999549066455, iteration: 172484
loss: 0.9980512857437134,grad_norm: 0.9999995374223545, iteration: 172485
loss: 0.9285276532173157,grad_norm: 0.9071381389296955, iteration: 172486
loss: 1.0331459045410156,grad_norm: 0.9155310404724317, iteration: 172487
loss: 0.9944061636924744,grad_norm: 0.9999991426429656, iteration: 172488
loss: 1.0513297319412231,grad_norm: 0.9999991676763716, iteration: 172489
loss: 1.0096768140792847,grad_norm: 0.8558799380505601, iteration: 172490
loss: 0.9699793457984924,grad_norm: 0.9999993197032601, iteration: 172491
loss: 1.0238232612609863,grad_norm: 0.9200435590977809, iteration: 172492
loss: 0.9799715876579285,grad_norm: 0.9999990366799153, iteration: 172493
loss: 1.0612232685089111,grad_norm: 0.9999990805398319, iteration: 172494
loss: 0.9859169721603394,grad_norm: 0.9999992127047419, iteration: 172495
loss: 1.0182232856750488,grad_norm: 0.9461985015414278, iteration: 172496
loss: 1.0085591077804565,grad_norm: 0.9999992159239409, iteration: 172497
loss: 0.9804638624191284,grad_norm: 0.9605872555277086, iteration: 172498
loss: 0.9756377339363098,grad_norm: 0.9999990405435637, iteration: 172499
loss: 1.0299007892608643,grad_norm: 0.9999989088795628, iteration: 172500
loss: 1.0109848976135254,grad_norm: 0.8432762614635853, iteration: 172501
loss: 0.9926380515098572,grad_norm: 0.9953119650635319, iteration: 172502
loss: 0.9727980494499207,grad_norm: 0.9999992113926996, iteration: 172503
loss: 0.9560849666595459,grad_norm: 0.9999990813267535, iteration: 172504
loss: 1.0590078830718994,grad_norm: 0.9915216669369865, iteration: 172505
loss: 1.0325207710266113,grad_norm: 0.9999996727581215, iteration: 172506
loss: 0.9949076771736145,grad_norm: 0.9999991461840637, iteration: 172507
loss: 1.047658920288086,grad_norm: 0.9999992317488492, iteration: 172508
loss: 0.9907442927360535,grad_norm: 0.9689320908872683, iteration: 172509
loss: 1.039178729057312,grad_norm: 0.9999998225451676, iteration: 172510
loss: 1.0047413110733032,grad_norm: 0.9999990899404546, iteration: 172511
loss: 0.9964346885681152,grad_norm: 0.9999991908649327, iteration: 172512
loss: 1.0272706747055054,grad_norm: 0.9999997118846468, iteration: 172513
loss: 1.0163284540176392,grad_norm: 0.916111626870542, iteration: 172514
loss: 1.0153322219848633,grad_norm: 0.9999990948839619, iteration: 172515
loss: 1.0002588033676147,grad_norm: 0.9915773726141859, iteration: 172516
loss: 0.9711964130401611,grad_norm: 0.9999990833073565, iteration: 172517
loss: 1.0240223407745361,grad_norm: 0.9396690942278552, iteration: 172518
loss: 1.0284162759780884,grad_norm: 0.9999992036298045, iteration: 172519
loss: 1.0020208358764648,grad_norm: 0.9822636651895537, iteration: 172520
loss: 0.9695016741752625,grad_norm: 0.999998972326151, iteration: 172521
loss: 0.9697141051292419,grad_norm: 0.8463832407893358, iteration: 172522
loss: 0.9893818497657776,grad_norm: 0.8638248114649553, iteration: 172523
loss: 1.0169306993484497,grad_norm: 0.937890661321407, iteration: 172524
loss: 1.0267260074615479,grad_norm: 0.962299765398589, iteration: 172525
loss: 0.9957206845283508,grad_norm: 0.9387697812892777, iteration: 172526
loss: 1.0128835439682007,grad_norm: 0.9999991139057619, iteration: 172527
loss: 1.0690858364105225,grad_norm: 0.9024595914045831, iteration: 172528
loss: 0.9761372804641724,grad_norm: 0.9999990614497912, iteration: 172529
loss: 1.0135207176208496,grad_norm: 0.9108597767317355, iteration: 172530
loss: 1.0079731941223145,grad_norm: 0.9999990946426167, iteration: 172531
loss: 0.9385718107223511,grad_norm: 0.8515599811307359, iteration: 172532
loss: 0.9942886829376221,grad_norm: 0.9165099279711718, iteration: 172533
loss: 1.0308843851089478,grad_norm: 0.9803878268770705, iteration: 172534
loss: 1.0170949697494507,grad_norm: 0.9218384970821153, iteration: 172535
loss: 0.9882394671440125,grad_norm: 0.9999991316569868, iteration: 172536
loss: 1.0197136402130127,grad_norm: 0.9999998567621713, iteration: 172537
loss: 1.000807285308838,grad_norm: 0.9999990365719267, iteration: 172538
loss: 1.0209153890609741,grad_norm: 0.9056683002064982, iteration: 172539
loss: 0.9939191341400146,grad_norm: 0.8605602926040519, iteration: 172540
loss: 1.0118200778961182,grad_norm: 0.9999991645600198, iteration: 172541
loss: 0.9653525948524475,grad_norm: 0.9453583803435648, iteration: 172542
loss: 1.007538080215454,grad_norm: 0.9999991164126562, iteration: 172543
loss: 0.9910830855369568,grad_norm: 0.8864763076137668, iteration: 172544
loss: 1.028548240661621,grad_norm: 0.8743876205904397, iteration: 172545
loss: 1.0167309045791626,grad_norm: 0.9999990829692714, iteration: 172546
loss: 1.028800129890442,grad_norm: 0.9999990788465095, iteration: 172547
loss: 1.05341637134552,grad_norm: 0.8764383577803184, iteration: 172548
loss: 1.0084621906280518,grad_norm: 0.8448186602696293, iteration: 172549
loss: 1.0100101232528687,grad_norm: 0.9999991547104806, iteration: 172550
loss: 1.015796184539795,grad_norm: 0.9011602230856184, iteration: 172551
loss: 1.0039410591125488,grad_norm: 0.9362563261551764, iteration: 172552
loss: 0.9769521951675415,grad_norm: 0.9999990849168391, iteration: 172553
loss: 1.1345010995864868,grad_norm: 0.99999981926779, iteration: 172554
loss: 1.0084476470947266,grad_norm: 0.9999991911370205, iteration: 172555
loss: 0.9709999561309814,grad_norm: 0.9999990239859026, iteration: 172556
loss: 0.997738778591156,grad_norm: 0.9999991132732234, iteration: 172557
loss: 1.0260748863220215,grad_norm: 0.9999992125019976, iteration: 172558
loss: 1.001210331916809,grad_norm: 0.9999995072577035, iteration: 172559
loss: 0.9863605499267578,grad_norm: 0.9999992550284759, iteration: 172560
loss: 1.0366204977035522,grad_norm: 0.9341454721170876, iteration: 172561
loss: 0.9759148359298706,grad_norm: 0.819963915807587, iteration: 172562
loss: 0.9956813454627991,grad_norm: 0.9133058572742335, iteration: 172563
loss: 1.014883279800415,grad_norm: 0.9999990150293339, iteration: 172564
loss: 1.0088741779327393,grad_norm: 0.9123278709179017, iteration: 172565
loss: 1.0088714361190796,grad_norm: 0.999999078439182, iteration: 172566
loss: 1.0029749870300293,grad_norm: 0.999999173835128, iteration: 172567
loss: 1.0040955543518066,grad_norm: 0.8795414838271562, iteration: 172568
loss: 1.0082443952560425,grad_norm: 0.9999991425799776, iteration: 172569
loss: 0.9779632091522217,grad_norm: 0.9999990400380128, iteration: 172570
loss: 1.0329563617706299,grad_norm: 0.9999991784961209, iteration: 172571
loss: 0.9734665155410767,grad_norm: 0.8871370841578713, iteration: 172572
loss: 0.9935673475265503,grad_norm: 0.9999990835734786, iteration: 172573
loss: 0.9845786094665527,grad_norm: 0.9260722435572085, iteration: 172574
loss: 1.0164592266082764,grad_norm: 0.9999992240818664, iteration: 172575
loss: 0.9729880094528198,grad_norm: 0.9215035108797784, iteration: 172576
loss: 1.009204387664795,grad_norm: 0.9240537432231162, iteration: 172577
loss: 1.0212475061416626,grad_norm: 0.889166326998011, iteration: 172578
loss: 1.0199205875396729,grad_norm: 0.9493843760959347, iteration: 172579
loss: 1.0408169031143188,grad_norm: 0.999999407706419, iteration: 172580
loss: 1.0039788484573364,grad_norm: 0.9999991931592794, iteration: 172581
loss: 1.0390212535858154,grad_norm: 0.9999993327814125, iteration: 172582
loss: 1.024125099182129,grad_norm: 0.9695169208990803, iteration: 172583
loss: 1.0229071378707886,grad_norm: 0.8219664931951801, iteration: 172584
loss: 1.0123234987258911,grad_norm: 0.8668985247424377, iteration: 172585
loss: 0.9991260766983032,grad_norm: 0.9999991763159337, iteration: 172586
loss: 0.9946491718292236,grad_norm: 0.9999992016607491, iteration: 172587
loss: 1.023639440536499,grad_norm: 0.8498552547965339, iteration: 172588
loss: 0.9992375373840332,grad_norm: 0.9470265069099398, iteration: 172589
loss: 0.9843652248382568,grad_norm: 0.975326620772641, iteration: 172590
loss: 0.9954883456230164,grad_norm: 0.9999997408082284, iteration: 172591
loss: 1.0206809043884277,grad_norm: 0.949143018544926, iteration: 172592
loss: 0.9730583429336548,grad_norm: 0.9607506433427172, iteration: 172593
loss: 0.9808852672576904,grad_norm: 0.9999991889331289, iteration: 172594
loss: 1.001865029335022,grad_norm: 0.9690953378533609, iteration: 172595
loss: 0.9893307685852051,grad_norm: 0.999999147779585, iteration: 172596
loss: 1.0378484725952148,grad_norm: 0.999999593549439, iteration: 172597
loss: 1.0031180381774902,grad_norm: 0.9999997678393637, iteration: 172598
loss: 1.0025980472564697,grad_norm: 0.9999992354520119, iteration: 172599
loss: 1.177847146987915,grad_norm: 0.9999999821684283, iteration: 172600
loss: 0.9981045722961426,grad_norm: 0.806147287054627, iteration: 172601
loss: 1.0009740591049194,grad_norm: 0.9999991530337561, iteration: 172602
loss: 1.020908236503601,grad_norm: 0.9999991892676312, iteration: 172603
loss: 1.0177949666976929,grad_norm: 0.9192899341932314, iteration: 172604
loss: 0.9529276490211487,grad_norm: 0.8802149109680797, iteration: 172605
loss: 1.0042859315872192,grad_norm: 0.9971641685560964, iteration: 172606
loss: 1.0090323686599731,grad_norm: 0.9999991025320749, iteration: 172607
loss: 1.023311972618103,grad_norm: 0.9475507549323453, iteration: 172608
loss: 0.9914043545722961,grad_norm: 0.8467672762248442, iteration: 172609
loss: 1.0102037191390991,grad_norm: 0.9170161609402989, iteration: 172610
loss: 0.9966725707054138,grad_norm: 0.9999992371875217, iteration: 172611
loss: 1.0726557970046997,grad_norm: 0.9528903191421465, iteration: 172612
loss: 0.9826050400733948,grad_norm: 0.8981362122685973, iteration: 172613
loss: 0.9625656008720398,grad_norm: 0.9603550954898828, iteration: 172614
loss: 0.9952582120895386,grad_norm: 0.9999991795664065, iteration: 172615
loss: 1.017578363418579,grad_norm: 0.9999991782575632, iteration: 172616
loss: 0.9876301884651184,grad_norm: 0.9999990964268759, iteration: 172617
loss: 0.9704850912094116,grad_norm: 0.9039093634624635, iteration: 172618
loss: 0.9871607422828674,grad_norm: 0.9497100894809627, iteration: 172619
loss: 1.0839314460754395,grad_norm: 0.9999992133780804, iteration: 172620
loss: 0.9531371593475342,grad_norm: 0.8131725083055495, iteration: 172621
loss: 1.0873299837112427,grad_norm: 0.9999990558772537, iteration: 172622
loss: 1.009298324584961,grad_norm: 0.9139487225046361, iteration: 172623
loss: 1.0194604396820068,grad_norm: 0.9284814554787498, iteration: 172624
loss: 1.0027159452438354,grad_norm: 0.929944794580064, iteration: 172625
loss: 1.0155478715896606,grad_norm: 0.999999195275213, iteration: 172626
loss: 0.9824198484420776,grad_norm: 0.9434465066868768, iteration: 172627
loss: 1.0273834466934204,grad_norm: 0.9999991332185312, iteration: 172628
loss: 1.0087642669677734,grad_norm: 0.9606368393295086, iteration: 172629
loss: 0.9541066288948059,grad_norm: 0.9999990887893662, iteration: 172630
loss: 1.0174435377120972,grad_norm: 0.9191317921835331, iteration: 172631
loss: 1.0158882141113281,grad_norm: 0.7842157795922959, iteration: 172632
loss: 1.0394972562789917,grad_norm: 0.8774061010233923, iteration: 172633
loss: 0.9977514147758484,grad_norm: 0.9366028361932275, iteration: 172634
loss: 1.0068261623382568,grad_norm: 0.9805022090954283, iteration: 172635
loss: 0.9785928726196289,grad_norm: 0.9999989962655043, iteration: 172636
loss: 0.956895112991333,grad_norm: 0.898892195007607, iteration: 172637
loss: 1.0012775659561157,grad_norm: 0.9999994823021217, iteration: 172638
loss: 0.9937295317649841,grad_norm: 0.7749665480235097, iteration: 172639
loss: 0.9999792575836182,grad_norm: 0.9999988815130527, iteration: 172640
loss: 1.0101282596588135,grad_norm: 0.9999990674152743, iteration: 172641
loss: 1.0093188285827637,grad_norm: 0.9808686988255892, iteration: 172642
loss: 1.0108259916305542,grad_norm: 0.9711909794254463, iteration: 172643
loss: 1.0352782011032104,grad_norm: 0.7953330164895285, iteration: 172644
loss: 0.9877333045005798,grad_norm: 0.9895398918259966, iteration: 172645
loss: 0.975140392780304,grad_norm: 0.8979800213035075, iteration: 172646
loss: 1.0316497087478638,grad_norm: 0.9999994189160236, iteration: 172647
loss: 1.0235540866851807,grad_norm: 0.9999990883327461, iteration: 172648
loss: 1.019922137260437,grad_norm: 0.999999037641519, iteration: 172649
loss: 1.0151079893112183,grad_norm: 0.9693539062764673, iteration: 172650
loss: 0.9743714928627014,grad_norm: 0.9999990106718148, iteration: 172651
loss: 0.9982130527496338,grad_norm: 0.8224316285285924, iteration: 172652
loss: 1.0007826089859009,grad_norm: 0.9349869085847298, iteration: 172653
loss: 0.9791262745857239,grad_norm: 0.9999990983048354, iteration: 172654
loss: 1.0028256177902222,grad_norm: 0.9999990782503666, iteration: 172655
loss: 1.00589120388031,grad_norm: 0.9999991849320755, iteration: 172656
loss: 0.9724891185760498,grad_norm: 0.9427442489043606, iteration: 172657
loss: 1.01072096824646,grad_norm: 0.8971121466119382, iteration: 172658
loss: 0.9733898043632507,grad_norm: 0.7961920933498798, iteration: 172659
loss: 1.0006135702133179,grad_norm: 0.9999990589516374, iteration: 172660
loss: 1.0118029117584229,grad_norm: 0.9999994058156269, iteration: 172661
loss: 1.0540004968643188,grad_norm: 0.9999996025589478, iteration: 172662
loss: 1.003650426864624,grad_norm: 0.9999991650089255, iteration: 172663
loss: 0.9968616962432861,grad_norm: 0.9999990952358658, iteration: 172664
loss: 0.9901166558265686,grad_norm: 0.9057227721813363, iteration: 172665
loss: 1.0166882276535034,grad_norm: 0.9999990516719298, iteration: 172666
loss: 0.9905194640159607,grad_norm: 0.9999993131395996, iteration: 172667
loss: 0.9907448887825012,grad_norm: 0.8728998542408335, iteration: 172668
loss: 1.0499193668365479,grad_norm: 0.9999991290069866, iteration: 172669
loss: 0.9670726656913757,grad_norm: 0.9020142017561908, iteration: 172670
loss: 0.9949974417686462,grad_norm: 0.9999993236432291, iteration: 172671
loss: 1.033679485321045,grad_norm: 0.9708898262434864, iteration: 172672
loss: 0.9961833953857422,grad_norm: 0.9999991704751868, iteration: 172673
loss: 1.0158928632736206,grad_norm: 0.9999991069195306, iteration: 172674
loss: 0.9774463176727295,grad_norm: 0.999999072380382, iteration: 172675
loss: 1.0403281450271606,grad_norm: 0.999999186387768, iteration: 172676
loss: 0.996971070766449,grad_norm: 0.9999999662277682, iteration: 172677
loss: 1.0253528356552124,grad_norm: 0.9922348526075608, iteration: 172678
loss: 1.0401064157485962,grad_norm: 0.9999994802422141, iteration: 172679
loss: 1.016808271408081,grad_norm: 0.9999990503083176, iteration: 172680
loss: 0.9950639605522156,grad_norm: 0.9999991634136924, iteration: 172681
loss: 1.0358370542526245,grad_norm: 0.9999993490409503, iteration: 172682
loss: 0.9800237417221069,grad_norm: 0.9999990283598408, iteration: 172683
loss: 1.0144389867782593,grad_norm: 0.9999991094994123, iteration: 172684
loss: 0.9672085046768188,grad_norm: 0.882763887727366, iteration: 172685
loss: 0.9667074680328369,grad_norm: 0.9999990956940216, iteration: 172686
loss: 0.9994433522224426,grad_norm: 0.9999992605296277, iteration: 172687
loss: 1.013412356376648,grad_norm: 0.9999991611624658, iteration: 172688
loss: 1.0146276950836182,grad_norm: 0.9999993304126675, iteration: 172689
loss: 0.9804175496101379,grad_norm: 0.7351906839224984, iteration: 172690
loss: 0.9698159098625183,grad_norm: 0.9999990304839808, iteration: 172691
loss: 1.0278223752975464,grad_norm: 0.9588124867438726, iteration: 172692
loss: 1.001694917678833,grad_norm: 0.89235045974569, iteration: 172693
loss: 0.9817590117454529,grad_norm: 0.9973685135934901, iteration: 172694
loss: 0.9832524657249451,grad_norm: 0.9238416117823813, iteration: 172695
loss: 0.9967352151870728,grad_norm: 0.9291707619190358, iteration: 172696
loss: 1.0127055644989014,grad_norm: 0.9625830872728107, iteration: 172697
loss: 1.0044814348220825,grad_norm: 0.8832077064887205, iteration: 172698
loss: 1.0038472414016724,grad_norm: 0.999999175120714, iteration: 172699
loss: 1.0277619361877441,grad_norm: 0.999999182121269, iteration: 172700
loss: 1.0001511573791504,grad_norm: 0.9825836643700296, iteration: 172701
loss: 0.9853570461273193,grad_norm: 0.941869263951229, iteration: 172702
loss: 1.0127934217453003,grad_norm: 0.9021062025651506, iteration: 172703
loss: 0.986568808555603,grad_norm: 0.9999992361767045, iteration: 172704
loss: 1.0335179567337036,grad_norm: 0.9999990693123503, iteration: 172705
loss: 1.0141425132751465,grad_norm: 0.9999991270009612, iteration: 172706
loss: 0.9620630741119385,grad_norm: 0.9477606886994037, iteration: 172707
loss: 0.9939367175102234,grad_norm: 0.9999991651071141, iteration: 172708
loss: 0.9538739323616028,grad_norm: 0.9999991917064592, iteration: 172709
loss: 1.0133252143859863,grad_norm: 0.90330298973348, iteration: 172710
loss: 1.028936743736267,grad_norm: 0.9999991683271758, iteration: 172711
loss: 0.9595029950141907,grad_norm: 0.9999991206107423, iteration: 172712
loss: 0.9995550513267517,grad_norm: 0.9999991755034122, iteration: 172713
loss: 0.9711336493492126,grad_norm: 0.9999992678223312, iteration: 172714
loss: 1.0075958967208862,grad_norm: 0.9999989739248062, iteration: 172715
loss: 0.9926956295967102,grad_norm: 0.818350007284867, iteration: 172716
loss: 1.024766206741333,grad_norm: 0.9062040238777512, iteration: 172717
loss: 0.9860041737556458,grad_norm: 0.966707039034854, iteration: 172718
loss: 0.9879059791564941,grad_norm: 0.9999989345251448, iteration: 172719
loss: 1.0045461654663086,grad_norm: 0.9126689828241576, iteration: 172720
loss: 0.993428647518158,grad_norm: 0.9829983551455389, iteration: 172721
loss: 1.0104299783706665,grad_norm: 0.9999992328137245, iteration: 172722
loss: 1.0162681341171265,grad_norm: 0.8864247662592878, iteration: 172723
loss: 1.0161128044128418,grad_norm: 0.8496929784640167, iteration: 172724
loss: 0.9829367399215698,grad_norm: 0.9999991876971734, iteration: 172725
loss: 1.0224170684814453,grad_norm: 0.9999991531264761, iteration: 172726
loss: 1.0191738605499268,grad_norm: 0.8864536287921082, iteration: 172727
loss: 1.0266222953796387,grad_norm: 0.9999993038779927, iteration: 172728
loss: 1.0227532386779785,grad_norm: 0.9865005779044067, iteration: 172729
loss: 1.0008846521377563,grad_norm: 0.9350174084796833, iteration: 172730
loss: 0.9799734354019165,grad_norm: 0.9466470415970051, iteration: 172731
loss: 1.0047000646591187,grad_norm: 0.9843254256595767, iteration: 172732
loss: 1.007480263710022,grad_norm: 0.9678894582801241, iteration: 172733
loss: 0.9972421526908875,grad_norm: 0.9999993025649881, iteration: 172734
loss: 0.9866340756416321,grad_norm: 0.999999173965045, iteration: 172735
loss: 0.9712598323822021,grad_norm: 0.9101098980194812, iteration: 172736
loss: 1.03496515750885,grad_norm: 0.9626648363890885, iteration: 172737
loss: 0.9968374967575073,grad_norm: 0.9999999901804937, iteration: 172738
loss: 0.9926340579986572,grad_norm: 0.9970434477228391, iteration: 172739
loss: 1.0000338554382324,grad_norm: 0.9999992110773686, iteration: 172740
loss: 1.0018789768218994,grad_norm: 0.7980494092420083, iteration: 172741
loss: 1.0308599472045898,grad_norm: 0.9999988903918027, iteration: 172742
loss: 0.986354649066925,grad_norm: 0.9835916673540489, iteration: 172743
loss: 0.9455730319023132,grad_norm: 0.9999989917495835, iteration: 172744
loss: 1.0330653190612793,grad_norm: 0.8627465873759842, iteration: 172745
loss: 0.9686982035636902,grad_norm: 0.8905751947623941, iteration: 172746
loss: 0.9967968463897705,grad_norm: 0.9999990090897816, iteration: 172747
loss: 1.012759804725647,grad_norm: 0.9441661427348917, iteration: 172748
loss: 0.9873086810112,grad_norm: 0.8751123699140732, iteration: 172749
loss: 1.0133308172225952,grad_norm: 0.9999990843924434, iteration: 172750
loss: 0.9989721179008484,grad_norm: 0.8899332096133584, iteration: 172751
loss: 0.9979898929595947,grad_norm: 0.99999909064711, iteration: 172752
loss: 0.9848065376281738,grad_norm: 0.999999055504187, iteration: 172753
loss: 1.0221320390701294,grad_norm: 0.9999991018222707, iteration: 172754
loss: 1.037560224533081,grad_norm: 0.8766431673165712, iteration: 172755
loss: 1.0163815021514893,grad_norm: 0.9999991448961361, iteration: 172756
loss: 0.9964572787284851,grad_norm: 0.947555199009673, iteration: 172757
loss: 0.9510805010795593,grad_norm: 0.9999992943796095, iteration: 172758
loss: 0.9745767712593079,grad_norm: 0.9999991479239786, iteration: 172759
loss: 1.0429247617721558,grad_norm: 0.9999990156512519, iteration: 172760
loss: 1.0194512605667114,grad_norm: 0.9999990266289845, iteration: 172761
loss: 1.0141502618789673,grad_norm: 0.9782489973514994, iteration: 172762
loss: 1.0403069257736206,grad_norm: 0.9633317709880985, iteration: 172763
loss: 0.996396005153656,grad_norm: 0.9999996670486752, iteration: 172764
loss: 1.016222357749939,grad_norm: 0.9397018081536015, iteration: 172765
loss: 0.9903650879859924,grad_norm: 0.9999992297045753, iteration: 172766
loss: 0.9972209930419922,grad_norm: 0.9999991306127631, iteration: 172767
loss: 1.0166609287261963,grad_norm: 0.9999991034494595, iteration: 172768
loss: 1.021889567375183,grad_norm: 0.9131331707511108, iteration: 172769
loss: 1.0166912078857422,grad_norm: 0.9750577057017448, iteration: 172770
loss: 1.0014190673828125,grad_norm: 0.9999992790572901, iteration: 172771
loss: 1.0060662031173706,grad_norm: 0.9999995751408988, iteration: 172772
loss: 1.0006853342056274,grad_norm: 0.9986835633309706, iteration: 172773
loss: 0.9990086555480957,grad_norm: 0.8086504094051107, iteration: 172774
loss: 1.0179991722106934,grad_norm: 0.9319183846409194, iteration: 172775
loss: 1.0075931549072266,grad_norm: 0.9629314803515814, iteration: 172776
loss: 1.026591181755066,grad_norm: 0.9999998816286613, iteration: 172777
loss: 0.953838586807251,grad_norm: 0.9289742204483992, iteration: 172778
loss: 1.01521897315979,grad_norm: 0.9725249826262937, iteration: 172779
loss: 0.979748547077179,grad_norm: 0.9999991313287002, iteration: 172780
loss: 1.0000123977661133,grad_norm: 0.9406502340728676, iteration: 172781
loss: 0.9844741821289062,grad_norm: 0.8892713747807932, iteration: 172782
loss: 1.0235260725021362,grad_norm: 0.9999992372211326, iteration: 172783
loss: 1.0051378011703491,grad_norm: 0.9999990639395221, iteration: 172784
loss: 1.0080941915512085,grad_norm: 0.9999991129876232, iteration: 172785
loss: 0.9869933724403381,grad_norm: 0.9797149851257816, iteration: 172786
loss: 0.9961900115013123,grad_norm: 0.999999283908442, iteration: 172787
loss: 1.024713158607483,grad_norm: 0.9999992251000344, iteration: 172788
loss: 1.0145074129104614,grad_norm: 0.9263995481745, iteration: 172789
loss: 1.0168747901916504,grad_norm: 0.9999990042083514, iteration: 172790
loss: 1.010117769241333,grad_norm: 0.9999993186145024, iteration: 172791
loss: 0.9849611520767212,grad_norm: 0.9679696316493734, iteration: 172792
loss: 0.9832685589790344,grad_norm: 0.8606010016871387, iteration: 172793
loss: 0.9930707216262817,grad_norm: 0.9422979673850803, iteration: 172794
loss: 1.0712480545043945,grad_norm: 0.9999991483831498, iteration: 172795
loss: 1.0240181684494019,grad_norm: 0.9999994461046089, iteration: 172796
loss: 0.9894424676895142,grad_norm: 0.9999992230392156, iteration: 172797
loss: 1.0028109550476074,grad_norm: 0.999999173447309, iteration: 172798
loss: 1.006156325340271,grad_norm: 0.9999990632852814, iteration: 172799
loss: 1.033016562461853,grad_norm: 0.9852607323113446, iteration: 172800
loss: 1.0196698904037476,grad_norm: 0.8779767833513465, iteration: 172801
loss: 1.0140451192855835,grad_norm: 0.9999990600584516, iteration: 172802
loss: 0.9887858033180237,grad_norm: 0.9319966313318881, iteration: 172803
loss: 0.9868603348731995,grad_norm: 0.8985027573273595, iteration: 172804
loss: 0.9985141754150391,grad_norm: 0.9999994037312854, iteration: 172805
loss: 0.9763044714927673,grad_norm: 0.9999998320346064, iteration: 172806
loss: 1.0074588060379028,grad_norm: 0.9999990997128548, iteration: 172807
loss: 0.9606823921203613,grad_norm: 0.9084802470281725, iteration: 172808
loss: 1.0116238594055176,grad_norm: 0.8919571833302068, iteration: 172809
loss: 0.9942996501922607,grad_norm: 0.818853783634479, iteration: 172810
loss: 0.9822211861610413,grad_norm: 0.9292301597846119, iteration: 172811
loss: 0.9857643246650696,grad_norm: 0.9999992410082077, iteration: 172812
loss: 1.012787103652954,grad_norm: 0.781653097128179, iteration: 172813
loss: 1.0005444288253784,grad_norm: 0.8534842978313583, iteration: 172814
loss: 1.0003448724746704,grad_norm: 0.9609539886010214, iteration: 172815
loss: 0.9997556805610657,grad_norm: 0.8547521678538872, iteration: 172816
loss: 1.0065534114837646,grad_norm: 0.9999990634035933, iteration: 172817
loss: 0.9823974370956421,grad_norm: 0.9549436360711935, iteration: 172818
loss: 0.9870808124542236,grad_norm: 0.9667742574099678, iteration: 172819
loss: 0.9979711771011353,grad_norm: 0.9999991096917034, iteration: 172820
loss: 0.9832639098167419,grad_norm: 0.9614010901653774, iteration: 172821
loss: 1.0326827764511108,grad_norm: 0.9999991709845406, iteration: 172822
loss: 0.9996761083602905,grad_norm: 0.8405197673364078, iteration: 172823
loss: 1.0106432437896729,grad_norm: 0.9999992300594689, iteration: 172824
loss: 0.9857432842254639,grad_norm: 0.8483720864011973, iteration: 172825
loss: 1.0289145708084106,grad_norm: 0.8307194583211359, iteration: 172826
loss: 1.0074353218078613,grad_norm: 0.9644624651474113, iteration: 172827
loss: 0.9936404824256897,grad_norm: 0.9823510623989484, iteration: 172828
loss: 0.9936448931694031,grad_norm: 0.8817464649909438, iteration: 172829
loss: 0.9920914769172668,grad_norm: 0.9999991241136612, iteration: 172830
loss: 0.9871710538864136,grad_norm: 0.9487437646693301, iteration: 172831
loss: 0.9959949851036072,grad_norm: 0.9999990008379405, iteration: 172832
loss: 1.031473994255066,grad_norm: 0.999999308598371, iteration: 172833
loss: 1.0083810091018677,grad_norm: 0.8499794053101642, iteration: 172834
loss: 0.9988455176353455,grad_norm: 0.9288317397760031, iteration: 172835
loss: 0.9930089116096497,grad_norm: 0.8475514655161802, iteration: 172836
loss: 1.0278674364089966,grad_norm: 0.9654830820277017, iteration: 172837
loss: 0.9582241177558899,grad_norm: 0.9999992387196142, iteration: 172838
loss: 1.009611964225769,grad_norm: 0.9358541361499774, iteration: 172839
loss: 1.0441209077835083,grad_norm: 0.9999991063936549, iteration: 172840
loss: 0.9848532676696777,grad_norm: 0.7973113753988895, iteration: 172841
loss: 1.0530266761779785,grad_norm: 0.9999990334921703, iteration: 172842
loss: 1.0183799266815186,grad_norm: 0.9273493475235283, iteration: 172843
loss: 0.950639009475708,grad_norm: 0.9999990216052044, iteration: 172844
loss: 1.0134692192077637,grad_norm: 0.9999990936481872, iteration: 172845
loss: 0.9778071045875549,grad_norm: 0.9378768812953402, iteration: 172846
loss: 0.997786819934845,grad_norm: 0.9999990745988626, iteration: 172847
loss: 0.9763100147247314,grad_norm: 0.9999990213236098, iteration: 172848
loss: 0.9828850626945496,grad_norm: 0.9446220509870663, iteration: 172849
loss: 0.9625962972640991,grad_norm: 0.9993708858715242, iteration: 172850
loss: 1.000846028327942,grad_norm: 0.899564028370099, iteration: 172851
loss: 0.9874534010887146,grad_norm: 0.7929905665348111, iteration: 172852
loss: 0.9933369755744934,grad_norm: 0.8436403583338757, iteration: 172853
loss: 1.0494635105133057,grad_norm: 0.9999992403952572, iteration: 172854
loss: 0.9998372793197632,grad_norm: 0.9999991372253153, iteration: 172855
loss: 1.0116339921951294,grad_norm: 0.9999991459014307, iteration: 172856
loss: 1.0128698348999023,grad_norm: 0.9999993164067891, iteration: 172857
loss: 1.0096408128738403,grad_norm: 0.9999990290001409, iteration: 172858
loss: 0.9956892132759094,grad_norm: 0.9999991608765689, iteration: 172859
loss: 1.0048027038574219,grad_norm: 0.9999991424759948, iteration: 172860
loss: 0.9603225588798523,grad_norm: 0.9999989978716944, iteration: 172861
loss: 0.9748297929763794,grad_norm: 0.9999991125930071, iteration: 172862
loss: 0.9901167750358582,grad_norm: 0.9071831047264686, iteration: 172863
loss: 1.0033339262008667,grad_norm: 0.9999990525667237, iteration: 172864
loss: 1.0035303831100464,grad_norm: 0.9275083595606376, iteration: 172865
loss: 1.0116279125213623,grad_norm: 0.8362809122888963, iteration: 172866
loss: 1.0111180543899536,grad_norm: 0.9999990001148563, iteration: 172867
loss: 0.9967601299285889,grad_norm: 0.999999212231849, iteration: 172868
loss: 1.020845651626587,grad_norm: 0.9999991366675657, iteration: 172869
loss: 1.0324159860610962,grad_norm: 0.9999990025271402, iteration: 172870
loss: 1.0152313709259033,grad_norm: 0.9999992227199429, iteration: 172871
loss: 0.9832566976547241,grad_norm: 0.8682870153995177, iteration: 172872
loss: 0.9910787343978882,grad_norm: 0.8134847161119434, iteration: 172873
loss: 0.9869856238365173,grad_norm: 0.9999991254683109, iteration: 172874
loss: 0.9747297167778015,grad_norm: 0.9999990647585858, iteration: 172875
loss: 0.9707609415054321,grad_norm: 0.9999989630780424, iteration: 172876
loss: 1.003851294517517,grad_norm: 0.9999990083394796, iteration: 172877
loss: 0.9993287920951843,grad_norm: 0.9999990981723942, iteration: 172878
loss: 0.9793495535850525,grad_norm: 0.8984605326720655, iteration: 172879
loss: 0.9780609011650085,grad_norm: 0.9999991686188278, iteration: 172880
loss: 1.047278881072998,grad_norm: 0.9198112511275557, iteration: 172881
loss: 1.0011403560638428,grad_norm: 0.98401813371776, iteration: 172882
loss: 0.9905059933662415,grad_norm: 0.9965535513622988, iteration: 172883
loss: 1.0208652019500732,grad_norm: 0.9999996588021215, iteration: 172884
loss: 1.0182567834854126,grad_norm: 0.9091654622005484, iteration: 172885
loss: 0.99391108751297,grad_norm: 0.8955427786080943, iteration: 172886
loss: 0.9922937750816345,grad_norm: 0.9235879686847706, iteration: 172887
loss: 0.9821366667747498,grad_norm: 0.9999991519770881, iteration: 172888
loss: 0.9985909461975098,grad_norm: 0.8161941957995753, iteration: 172889
loss: 0.9881765246391296,grad_norm: 0.9510051712517025, iteration: 172890
loss: 0.9661745429039001,grad_norm: 0.9612535909990264, iteration: 172891
loss: 0.9761266708374023,grad_norm: 0.9999990134072996, iteration: 172892
loss: 0.9871333837509155,grad_norm: 0.9850602258823895, iteration: 172893
loss: 0.9728450179100037,grad_norm: 0.9999990109473949, iteration: 172894
loss: 0.9790900945663452,grad_norm: 0.9999990840374728, iteration: 172895
loss: 1.0138322114944458,grad_norm: 0.9520361280535717, iteration: 172896
loss: 0.940186083316803,grad_norm: 0.9353741855355585, iteration: 172897
loss: 0.985907793045044,grad_norm: 0.9999991205312547, iteration: 172898
loss: 0.9887619018554688,grad_norm: 0.9999991442131017, iteration: 172899
loss: 0.9815807938575745,grad_norm: 0.9999993852231994, iteration: 172900
loss: 1.0327181816101074,grad_norm: 0.9999990688212923, iteration: 172901
loss: 1.0268877744674683,grad_norm: 0.9999990421648138, iteration: 172902
loss: 1.0382206439971924,grad_norm: 0.9999992073838972, iteration: 172903
loss: 1.003381371498108,grad_norm: 0.8855306852170052, iteration: 172904
loss: 0.9883468151092529,grad_norm: 0.9999992386865776, iteration: 172905
loss: 0.9571565985679626,grad_norm: 0.9999991382551933, iteration: 172906
loss: 1.0203825235366821,grad_norm: 0.9593808807681189, iteration: 172907
loss: 0.9779621362686157,grad_norm: 0.9999990662232211, iteration: 172908
loss: 0.9842784404754639,grad_norm: 0.9730266369790262, iteration: 172909
loss: 1.0306850671768188,grad_norm: 0.9445521088234152, iteration: 172910
loss: 1.0109902620315552,grad_norm: 0.9999993615538991, iteration: 172911
loss: 0.9489540457725525,grad_norm: 0.9666957854954444, iteration: 172912
loss: 1.050027847290039,grad_norm: 0.9999998343667142, iteration: 172913
loss: 1.0538712739944458,grad_norm: 0.9999991890207536, iteration: 172914
loss: 0.9972890019416809,grad_norm: 0.9999990598303643, iteration: 172915
loss: 1.00919771194458,grad_norm: 0.9999991310055426, iteration: 172916
loss: 1.0245908498764038,grad_norm: 0.9657400998854812, iteration: 172917
loss: 1.0206198692321777,grad_norm: 0.9206990926127254, iteration: 172918
loss: 1.055841088294983,grad_norm: 0.9999992366617704, iteration: 172919
loss: 1.000670075416565,grad_norm: 0.9999992471752377, iteration: 172920
loss: 1.0062657594680786,grad_norm: 0.9527669371062191, iteration: 172921
loss: 0.994928240776062,grad_norm: 0.944100711047917, iteration: 172922
loss: 0.982204020023346,grad_norm: 0.7927760447546108, iteration: 172923
loss: 0.9556169509887695,grad_norm: 0.9999990937313405, iteration: 172924
loss: 0.9967510104179382,grad_norm: 0.8624064937037232, iteration: 172925
loss: 0.9906064867973328,grad_norm: 0.8697576987986014, iteration: 172926
loss: 0.9530845880508423,grad_norm: 0.9999991361703959, iteration: 172927
loss: 0.9835638403892517,grad_norm: 0.9999990058110413, iteration: 172928
loss: 0.9641327261924744,grad_norm: 0.9170049143309023, iteration: 172929
loss: 0.96217280626297,grad_norm: 0.9755618703551853, iteration: 172930
loss: 1.0093826055526733,grad_norm: 0.9893496598164739, iteration: 172931
loss: 1.0008430480957031,grad_norm: 0.9999990206002911, iteration: 172932
loss: 1.009328007698059,grad_norm: 0.9999990109598019, iteration: 172933
loss: 0.9924191832542419,grad_norm: 0.99999913910386, iteration: 172934
loss: 0.9834237098693848,grad_norm: 0.7626102724315498, iteration: 172935
loss: 1.013710618019104,grad_norm: 0.8017649782822251, iteration: 172936
loss: 1.0149712562561035,grad_norm: 0.9999990795332727, iteration: 172937
loss: 0.982560396194458,grad_norm: 0.9793072924680742, iteration: 172938
loss: 1.0307183265686035,grad_norm: 0.9999989044421762, iteration: 172939
loss: 1.0163298845291138,grad_norm: 0.9730988535559232, iteration: 172940
loss: 0.9596427083015442,grad_norm: 0.8589465668963261, iteration: 172941
loss: 1.0061688423156738,grad_norm: 0.999998995224774, iteration: 172942
loss: 1.0087813138961792,grad_norm: 0.9999992485488665, iteration: 172943
loss: 0.9647867679595947,grad_norm: 0.9999990269476141, iteration: 172944
loss: 0.961197555065155,grad_norm: 0.9999990998714406, iteration: 172945
loss: 0.9821174740791321,grad_norm: 0.9891733345048572, iteration: 172946
loss: 0.9700072407722473,grad_norm: 0.9841244496588255, iteration: 172947
loss: 1.042742371559143,grad_norm: 0.9999991453845186, iteration: 172948
loss: 1.0113002061843872,grad_norm: 0.9723234376422897, iteration: 172949
loss: 0.9857189059257507,grad_norm: 0.9999989932737384, iteration: 172950
loss: 0.9922555088996887,grad_norm: 0.9240567463888519, iteration: 172951
loss: 1.0046930313110352,grad_norm: 0.9999991311257419, iteration: 172952
loss: 1.0188610553741455,grad_norm: 0.8943953079252844, iteration: 172953
loss: 1.0311665534973145,grad_norm: 0.9556719341664227, iteration: 172954
loss: 1.023954153060913,grad_norm: 0.941907496399297, iteration: 172955
loss: 1.016242265701294,grad_norm: 0.8990685795273105, iteration: 172956
loss: 1.000592827796936,grad_norm: 0.9999990411103867, iteration: 172957
loss: 1.0197559595108032,grad_norm: 0.9999990557091903, iteration: 172958
loss: 0.9695953130722046,grad_norm: 0.9799698730885248, iteration: 172959
loss: 1.033979058265686,grad_norm: 0.9999992168114258, iteration: 172960
loss: 0.9928902983665466,grad_norm: 0.9999991671861453, iteration: 172961
loss: 1.018540859222412,grad_norm: 0.8652739787201924, iteration: 172962
loss: 0.9837368726730347,grad_norm: 0.9743685945334042, iteration: 172963
loss: 1.005552887916565,grad_norm: 0.9999991585996088, iteration: 172964
loss: 0.9521900415420532,grad_norm: 0.999999121333647, iteration: 172965
loss: 1.0231719017028809,grad_norm: 0.9999992038790073, iteration: 172966
loss: 0.9784249067306519,grad_norm: 0.9999991544109008, iteration: 172967
loss: 1.0110323429107666,grad_norm: 0.9999990692884047, iteration: 172968
loss: 1.0021535158157349,grad_norm: 0.9551753113090671, iteration: 172969
loss: 0.9828897714614868,grad_norm: 0.9140465746781563, iteration: 172970
loss: 1.0002351999282837,grad_norm: 0.9999991148586108, iteration: 172971
loss: 0.962615430355072,grad_norm: 0.7951933722004726, iteration: 172972
loss: 0.9947660565376282,grad_norm: 0.9567310359192845, iteration: 172973
loss: 1.0212076902389526,grad_norm: 0.9275931329175907, iteration: 172974
loss: 1.0021008253097534,grad_norm: 0.9703644150555868, iteration: 172975
loss: 1.0042774677276611,grad_norm: 0.9999992059461108, iteration: 172976
loss: 1.0118836164474487,grad_norm: 0.9071353361080955, iteration: 172977
loss: 0.9941664934158325,grad_norm: 0.8551280886788982, iteration: 172978
loss: 1.0104995965957642,grad_norm: 0.8421381510799127, iteration: 172979
loss: 0.9718567132949829,grad_norm: 0.9999992089332722, iteration: 172980
loss: 0.9985690712928772,grad_norm: 0.8283995995361428, iteration: 172981
loss: 0.9781673550605774,grad_norm: 0.8465834185560426, iteration: 172982
loss: 1.032820701599121,grad_norm: 0.9999991246969464, iteration: 172983
loss: 0.9650667309761047,grad_norm: 0.9381379032307292, iteration: 172984
loss: 0.9739754796028137,grad_norm: 0.9999991656361003, iteration: 172985
loss: 0.9760574102401733,grad_norm: 0.977197837653078, iteration: 172986
loss: 0.9998683333396912,grad_norm: 0.9999992046607588, iteration: 172987
loss: 1.0003788471221924,grad_norm: 0.821566201261055, iteration: 172988
loss: 0.9703260660171509,grad_norm: 0.9790319883967114, iteration: 172989
loss: 1.0258853435516357,grad_norm: 0.9999989507844981, iteration: 172990
loss: 1.005522608757019,grad_norm: 0.9999992646676951, iteration: 172991
loss: 1.021305799484253,grad_norm: 0.8688952589082701, iteration: 172992
loss: 0.9882946610450745,grad_norm: 0.960999472610319, iteration: 172993
loss: 0.9987816214561462,grad_norm: 0.9999991995175308, iteration: 172994
loss: 0.9825636744499207,grad_norm: 0.9999991385790948, iteration: 172995
loss: 0.9658421874046326,grad_norm: 0.9999992134311783, iteration: 172996
loss: 0.962996244430542,grad_norm: 0.9999990188921379, iteration: 172997
loss: 1.0268778800964355,grad_norm: 0.9999991841848102, iteration: 172998
loss: 0.9968281984329224,grad_norm: 0.8506180515405583, iteration: 172999
loss: 1.0215517282485962,grad_norm: 0.9687395594950268, iteration: 173000
loss: 1.0199884176254272,grad_norm: 0.9999996618251574, iteration: 173001
loss: 1.0087192058563232,grad_norm: 0.9999991037517084, iteration: 173002
loss: 1.0199891328811646,grad_norm: 0.9999990620957965, iteration: 173003
loss: 0.9891468286514282,grad_norm: 0.9999992002799495, iteration: 173004
loss: 1.0407804250717163,grad_norm: 0.9999992164201256, iteration: 173005
loss: 0.9799503087997437,grad_norm: 0.9999992845684443, iteration: 173006
loss: 1.0267964601516724,grad_norm: 0.832301364333539, iteration: 173007
loss: 1.0255844593048096,grad_norm: 0.9999990724871562, iteration: 173008
loss: 1.0027421712875366,grad_norm: 0.9613299674867197, iteration: 173009
loss: 1.0468999147415161,grad_norm: 0.9038368208939622, iteration: 173010
loss: 0.9849886894226074,grad_norm: 0.9999991240999334, iteration: 173011
loss: 1.0014078617095947,grad_norm: 0.9710520615168703, iteration: 173012
loss: 1.0179023742675781,grad_norm: 0.9999991594980436, iteration: 173013
loss: 0.9740886688232422,grad_norm: 0.9999991205613153, iteration: 173014
loss: 0.997133731842041,grad_norm: 0.9999992029934115, iteration: 173015
loss: 1.0105384588241577,grad_norm: 0.9999990718341665, iteration: 173016
loss: 1.0375616550445557,grad_norm: 0.9267096849207346, iteration: 173017
loss: 0.9980840682983398,grad_norm: 0.9999990485310947, iteration: 173018
loss: 1.0160330533981323,grad_norm: 0.9270488735561911, iteration: 173019
loss: 0.991481602191925,grad_norm: 0.839232898998609, iteration: 173020
loss: 0.9916407465934753,grad_norm: 0.9906387986159778, iteration: 173021
loss: 0.9554573893547058,grad_norm: 0.8880087869693107, iteration: 173022
loss: 0.9813773036003113,grad_norm: 0.8689520727320073, iteration: 173023
loss: 1.0157469511032104,grad_norm: 0.9999993152088588, iteration: 173024
loss: 0.9947357177734375,grad_norm: 0.9999990941842964, iteration: 173025
loss: 1.037395715713501,grad_norm: 0.9999990615856512, iteration: 173026
loss: 0.9705880880355835,grad_norm: 0.9999991713082563, iteration: 173027
loss: 0.9873672723770142,grad_norm: 0.9999991491758582, iteration: 173028
loss: 1.020529866218567,grad_norm: 0.9999994486774513, iteration: 173029
loss: 1.0037472248077393,grad_norm: 0.8956938109156417, iteration: 173030
loss: 0.9821482300758362,grad_norm: 0.9368603687806554, iteration: 173031
loss: 0.9846413135528564,grad_norm: 0.8775879072864982, iteration: 173032
loss: 1.0351505279541016,grad_norm: 0.999999245756268, iteration: 173033
loss: 1.0090267658233643,grad_norm: 0.9999990896771989, iteration: 173034
loss: 0.9676773548126221,grad_norm: 0.8512699579545274, iteration: 173035
loss: 0.9925950765609741,grad_norm: 0.9999991039618228, iteration: 173036
loss: 1.0148271322250366,grad_norm: 0.999999103841554, iteration: 173037
loss: 0.9687730669975281,grad_norm: 0.9766266455886996, iteration: 173038
loss: 0.9945510625839233,grad_norm: 0.9929074691387566, iteration: 173039
loss: 1.0137232542037964,grad_norm: 0.9904499469053951, iteration: 173040
loss: 0.9914996027946472,grad_norm: 0.8188106066698566, iteration: 173041
loss: 1.0278733968734741,grad_norm: 0.926156198850119, iteration: 173042
loss: 1.026075005531311,grad_norm: 0.9999990212681049, iteration: 173043
loss: 1.0093755722045898,grad_norm: 0.999999116693373, iteration: 173044
loss: 0.9873530268669128,grad_norm: 0.9999990110809812, iteration: 173045
loss: 1.0017424821853638,grad_norm: 0.9999990823774016, iteration: 173046
loss: 1.024277925491333,grad_norm: 0.9999993303493117, iteration: 173047
loss: 0.9348292350769043,grad_norm: 0.9738388790300937, iteration: 173048
loss: 0.9952572584152222,grad_norm: 0.8601170743207063, iteration: 173049
loss: 1.0164470672607422,grad_norm: 0.9999993052686676, iteration: 173050
loss: 1.0088776350021362,grad_norm: 0.8855438963422767, iteration: 173051
loss: 1.0340604782104492,grad_norm: 0.999999077624034, iteration: 173052
loss: 0.9892639517784119,grad_norm: 0.8813502865905286, iteration: 173053
loss: 0.9836803674697876,grad_norm: 0.9570578974504276, iteration: 173054
loss: 0.9755651950836182,grad_norm: 0.99999909990146, iteration: 173055
loss: 0.9964565634727478,grad_norm: 0.9999990960226274, iteration: 173056
loss: 0.9998456239700317,grad_norm: 0.999998968353562, iteration: 173057
loss: 1.0055210590362549,grad_norm: 0.9999991143334267, iteration: 173058
loss: 1.0356762409210205,grad_norm: 0.9999991309949106, iteration: 173059
loss: 0.9958962202072144,grad_norm: 0.9999991429431949, iteration: 173060
loss: 0.9918928146362305,grad_norm: 0.9024145444263765, iteration: 173061
loss: 1.0043483972549438,grad_norm: 0.950742946712647, iteration: 173062
loss: 1.0029072761535645,grad_norm: 0.9999989899207051, iteration: 173063
loss: 1.0126152038574219,grad_norm: 0.9845257502947766, iteration: 173064
loss: 1.0012876987457275,grad_norm: 0.7922472146447409, iteration: 173065
loss: 1.1643041372299194,grad_norm: 0.999999337183372, iteration: 173066
loss: 1.0161460638046265,grad_norm: 0.9047647205547891, iteration: 173067
loss: 0.9962431788444519,grad_norm: 0.9999990155077843, iteration: 173068
loss: 0.9877901077270508,grad_norm: 0.9999992296171499, iteration: 173069
loss: 1.0007058382034302,grad_norm: 0.9999991862750102, iteration: 173070
loss: 1.0095441341400146,grad_norm: 0.9999991158648264, iteration: 173071
loss: 1.0197505950927734,grad_norm: 0.9999992080340044, iteration: 173072
loss: 1.0260491371154785,grad_norm: 0.9999991998674018, iteration: 173073
loss: 1.019423246383667,grad_norm: 0.9999990347267348, iteration: 173074
loss: 1.0067650079727173,grad_norm: 0.9999991216999282, iteration: 173075
loss: 1.0144164562225342,grad_norm: 0.9728888884289065, iteration: 173076
loss: 1.0059839487075806,grad_norm: 0.9687500734590598, iteration: 173077
loss: 1.0094531774520874,grad_norm: 0.9999992254864827, iteration: 173078
loss: 0.9760268330574036,grad_norm: 0.9999991547057018, iteration: 173079
loss: 1.0201270580291748,grad_norm: 0.9999993359260528, iteration: 173080
loss: 0.9927571415901184,grad_norm: 0.891526225705608, iteration: 173081
loss: 0.9733154773712158,grad_norm: 0.9999991958468981, iteration: 173082
loss: 1.0157960653305054,grad_norm: 0.9480728568218304, iteration: 173083
loss: 1.0035574436187744,grad_norm: 0.7571273506194706, iteration: 173084
loss: 0.9876636862754822,grad_norm: 0.9432792287841539, iteration: 173085
loss: 1.0133380889892578,grad_norm: 0.8918638590886874, iteration: 173086
loss: 0.9802007079124451,grad_norm: 0.9999990845984199, iteration: 173087
loss: 1.0043150186538696,grad_norm: 0.8868290943355562, iteration: 173088
loss: 1.032046914100647,grad_norm: 0.9999991401103376, iteration: 173089
loss: 0.9975240230560303,grad_norm: 0.9647507580976528, iteration: 173090
loss: 0.9612307548522949,grad_norm: 0.9999991904049785, iteration: 173091
loss: 1.0063819885253906,grad_norm: 0.9889934461402577, iteration: 173092
loss: 0.9979060888290405,grad_norm: 0.7529670946549875, iteration: 173093
loss: 0.9510971307754517,grad_norm: 0.9999990954587598, iteration: 173094
loss: 1.0006606578826904,grad_norm: 0.9999992856602633, iteration: 173095
loss: 1.0021768808364868,grad_norm: 0.9691365988405218, iteration: 173096
loss: 1.0059537887573242,grad_norm: 0.8517443570604674, iteration: 173097
loss: 0.9773480296134949,grad_norm: 0.985257537936198, iteration: 173098
loss: 0.9993938207626343,grad_norm: 0.9350966765214567, iteration: 173099
loss: 1.01155424118042,grad_norm: 0.9999989019103291, iteration: 173100
loss: 1.037979245185852,grad_norm: 0.9877890218224684, iteration: 173101
loss: 0.944392204284668,grad_norm: 0.8777506406466181, iteration: 173102
loss: 1.0251407623291016,grad_norm: 0.9999997633281243, iteration: 173103
loss: 0.9875767230987549,grad_norm: 0.9999993385378447, iteration: 173104
loss: 1.02315092086792,grad_norm: 0.9999990359987192, iteration: 173105
loss: 1.0077379941940308,grad_norm: 0.9165076961610136, iteration: 173106
loss: 0.9836634993553162,grad_norm: 0.9207244597922315, iteration: 173107
loss: 0.9848925471305847,grad_norm: 0.99999912556617, iteration: 173108
loss: 0.9662804007530212,grad_norm: 0.9999991032973883, iteration: 173109
loss: 0.9951508641242981,grad_norm: 0.9999992853527843, iteration: 173110
loss: 1.0148289203643799,grad_norm: 0.872021241517249, iteration: 173111
loss: 0.9635420441627502,grad_norm: 0.9999992417622253, iteration: 173112
loss: 0.9809373617172241,grad_norm: 0.912216725613626, iteration: 173113
loss: 0.9713668823242188,grad_norm: 0.9111571196377632, iteration: 173114
loss: 0.9799938797950745,grad_norm: 0.9957355149217755, iteration: 173115
loss: 1.0076873302459717,grad_norm: 0.8642342139225887, iteration: 173116
loss: 0.9657767415046692,grad_norm: 0.9999991512693179, iteration: 173117
loss: 0.9870452880859375,grad_norm: 0.9999990768604167, iteration: 173118
loss: 1.0273375511169434,grad_norm: 0.958720950295457, iteration: 173119
loss: 0.9830614328384399,grad_norm: 0.9358835834408169, iteration: 173120
loss: 1.0130850076675415,grad_norm: 0.9999993205393235, iteration: 173121
loss: 1.0347800254821777,grad_norm: 0.9834783107889453, iteration: 173122
loss: 0.9837441444396973,grad_norm: 0.9896158310537344, iteration: 173123
loss: 0.9484604001045227,grad_norm: 0.9595486583177426, iteration: 173124
loss: 1.0273752212524414,grad_norm: 0.9999992917901144, iteration: 173125
loss: 1.048141598701477,grad_norm: 0.8762063225357116, iteration: 173126
loss: 1.1333848237991333,grad_norm: 0.9999991850006588, iteration: 173127
loss: 1.0291142463684082,grad_norm: 0.8420547632938379, iteration: 173128
loss: 1.0252482891082764,grad_norm: 0.9776565715712673, iteration: 173129
loss: 0.9786959290504456,grad_norm: 0.8919576957730918, iteration: 173130
loss: 1.0223987102508545,grad_norm: 0.8788309605809108, iteration: 173131
loss: 0.9673343896865845,grad_norm: 0.9862796201553771, iteration: 173132
loss: 0.9806826710700989,grad_norm: 0.8687632538527252, iteration: 173133
loss: 1.0325130224227905,grad_norm: 0.9999992441913654, iteration: 173134
loss: 0.9921361804008484,grad_norm: 0.88664136909112, iteration: 173135
loss: 1.0116153955459595,grad_norm: 0.9190494507383762, iteration: 173136
loss: 1.0673012733459473,grad_norm: 0.9999997017679136, iteration: 173137
loss: 1.0114762783050537,grad_norm: 0.9999990318746397, iteration: 173138
loss: 0.9858070015907288,grad_norm: 0.94176939133483, iteration: 173139
loss: 0.99118572473526,grad_norm: 0.9379104749389222, iteration: 173140
loss: 1.0083847045898438,grad_norm: 0.9999990708418918, iteration: 173141
loss: 0.9923359155654907,grad_norm: 0.9999991708381014, iteration: 173142
loss: 0.9615995287895203,grad_norm: 0.7779501982480396, iteration: 173143
loss: 0.9908468127250671,grad_norm: 0.9949602325449979, iteration: 173144
loss: 1.0169999599456787,grad_norm: 0.9018873979892574, iteration: 173145
loss: 1.001065731048584,grad_norm: 0.993949009496465, iteration: 173146
loss: 0.9815266728401184,grad_norm: 0.8548024834312705, iteration: 173147
loss: 1.0326796770095825,grad_norm: 0.9999991365261354, iteration: 173148
loss: 0.9914155602455139,grad_norm: 0.9592942769037978, iteration: 173149
loss: 0.991471529006958,grad_norm: 0.9356619539733155, iteration: 173150
loss: 0.9898765683174133,grad_norm: 0.99999906362264, iteration: 173151
loss: 1.0496612787246704,grad_norm: 0.9610291594756944, iteration: 173152
loss: 0.9836748838424683,grad_norm: 0.9617408938296165, iteration: 173153
loss: 1.0764473676681519,grad_norm: 0.999999214701002, iteration: 173154
loss: 1.024474024772644,grad_norm: 0.9999990364607647, iteration: 173155
loss: 1.1263941526412964,grad_norm: 0.998672545276205, iteration: 173156
loss: 0.9934142231941223,grad_norm: 0.9999990801113504, iteration: 173157
loss: 1.0356717109680176,grad_norm: 0.9999992816121758, iteration: 173158
loss: 0.9919990301132202,grad_norm: 0.9999992100279923, iteration: 173159
loss: 1.0007613897323608,grad_norm: 0.846524690355236, iteration: 173160
loss: 1.0878912210464478,grad_norm: 0.9999991102907186, iteration: 173161
loss: 1.0189510583877563,grad_norm: 0.99999913530764, iteration: 173162
loss: 1.0901237726211548,grad_norm: 0.9999993104365107, iteration: 173163
loss: 1.0040278434753418,grad_norm: 0.938896309122352, iteration: 173164
loss: 1.0307376384735107,grad_norm: 0.9999997512215063, iteration: 173165
loss: 1.0562939643859863,grad_norm: 0.9999994644850961, iteration: 173166
loss: 1.023470401763916,grad_norm: 0.9999991810678688, iteration: 173167
loss: 0.9850233197212219,grad_norm: 0.9699905755602439, iteration: 173168
loss: 1.0336552858352661,grad_norm: 0.9999992694178655, iteration: 173169
loss: 0.9935593008995056,grad_norm: 0.9999991794749399, iteration: 173170
loss: 1.0376620292663574,grad_norm: 0.9044751707427411, iteration: 173171
loss: 1.0230748653411865,grad_norm: 0.9999991546135112, iteration: 173172
loss: 1.0563209056854248,grad_norm: 0.999999990160825, iteration: 173173
loss: 0.9791160821914673,grad_norm: 0.9291432365497458, iteration: 173174
loss: 1.0547194480895996,grad_norm: 0.942281501044963, iteration: 173175
loss: 0.9775785207748413,grad_norm: 0.9387521904434537, iteration: 173176
loss: 1.0162290334701538,grad_norm: 0.9999993226663426, iteration: 173177
loss: 1.0121570825576782,grad_norm: 0.9999995127094948, iteration: 173178
loss: 1.0146445035934448,grad_norm: 0.9999989359219331, iteration: 173179
loss: 1.0367274284362793,grad_norm: 0.9999990378725025, iteration: 173180
loss: 1.032717227935791,grad_norm: 0.9999993345279613, iteration: 173181
loss: 1.0129225254058838,grad_norm: 0.9999992292448964, iteration: 173182
loss: 1.012709379196167,grad_norm: 0.91348144653496, iteration: 173183
loss: 1.0445181131362915,grad_norm: 0.9999991164277239, iteration: 173184
loss: 1.0123862028121948,grad_norm: 0.8345888175431343, iteration: 173185
loss: 1.007734775543213,grad_norm: 0.9144491177365819, iteration: 173186
loss: 0.9751703143119812,grad_norm: 0.9700950427837899, iteration: 173187
loss: 1.0499060153961182,grad_norm: 0.9999992534912614, iteration: 173188
loss: 1.0109386444091797,grad_norm: 0.9931128960066665, iteration: 173189
loss: 1.0601991415023804,grad_norm: 0.9999994974088888, iteration: 173190
loss: 0.9833531975746155,grad_norm: 0.8358188710116531, iteration: 173191
loss: 1.1362179517745972,grad_norm: 0.9999998157680721, iteration: 173192
loss: 0.991590678691864,grad_norm: 0.999999065652991, iteration: 173193
loss: 1.0122829675674438,grad_norm: 0.9999991027279163, iteration: 173194
loss: 0.9797075986862183,grad_norm: 0.8402461115540395, iteration: 173195
loss: 0.9947142601013184,grad_norm: 0.7961922989663552, iteration: 173196
loss: 1.0300612449645996,grad_norm: 0.9999990354757021, iteration: 173197
loss: 1.0220906734466553,grad_norm: 0.9131022797337038, iteration: 173198
loss: 0.9815397262573242,grad_norm: 0.8737198736684683, iteration: 173199
loss: 0.9761576652526855,grad_norm: 0.9000418103171574, iteration: 173200
loss: 0.9981353878974915,grad_norm: 0.9999990865069734, iteration: 173201
loss: 1.093165636062622,grad_norm: 0.9999992328146015, iteration: 173202
loss: 0.9555047750473022,grad_norm: 0.8539646381095554, iteration: 173203
loss: 0.9918744564056396,grad_norm: 0.8578175561000679, iteration: 173204
loss: 1.054579734802246,grad_norm: 0.9999991228714422, iteration: 173205
loss: 1.009202241897583,grad_norm: 0.9999993747524902, iteration: 173206
loss: 0.952363133430481,grad_norm: 0.9999990490803112, iteration: 173207
loss: 1.0066173076629639,grad_norm: 0.9852176733878019, iteration: 173208
loss: 0.9904518723487854,grad_norm: 0.9927529934392031, iteration: 173209
loss: 1.0286450386047363,grad_norm: 0.9999990785005037, iteration: 173210
loss: 1.0982474088668823,grad_norm: 0.9999990086020025, iteration: 173211
loss: 0.9614801406860352,grad_norm: 0.9052747879893113, iteration: 173212
loss: 0.9661259055137634,grad_norm: 0.9999992371125087, iteration: 173213
loss: 1.0124753713607788,grad_norm: 0.8595467553450465, iteration: 173214
loss: 1.0479438304901123,grad_norm: 0.9999994333025358, iteration: 173215
loss: 1.0188018083572388,grad_norm: 0.8579292877484218, iteration: 173216
loss: 1.017357587814331,grad_norm: 0.9999992491960971, iteration: 173217
loss: 0.9931558966636658,grad_norm: 0.9999989044997493, iteration: 173218
loss: 0.9831616282463074,grad_norm: 0.9549680855612099, iteration: 173219
loss: 1.02663254737854,grad_norm: 0.9965978479453068, iteration: 173220
loss: 0.9837191700935364,grad_norm: 0.9403584403130236, iteration: 173221
loss: 1.022972822189331,grad_norm: 0.999998985293074, iteration: 173222
loss: 1.0236817598342896,grad_norm: 0.9004088291762706, iteration: 173223
loss: 0.9838346242904663,grad_norm: 0.8605773610728329, iteration: 173224
loss: 1.0310616493225098,grad_norm: 0.9999991758299598, iteration: 173225
loss: 1.142582654953003,grad_norm: 0.9999998961348814, iteration: 173226
loss: 1.0118963718414307,grad_norm: 0.9560444399354023, iteration: 173227
loss: 1.0395854711532593,grad_norm: 0.9999991116095069, iteration: 173228
loss: 0.9525585174560547,grad_norm: 0.9999991236591885, iteration: 173229
loss: 0.9758696556091309,grad_norm: 0.9999990662838355, iteration: 173230
loss: 1.019641637802124,grad_norm: 0.8692279564022971, iteration: 173231
loss: 0.9919403195381165,grad_norm: 0.8418818810354367, iteration: 173232
loss: 0.9576407074928284,grad_norm: 0.9864668088390539, iteration: 173233
loss: 1.0008361339569092,grad_norm: 0.8947135403143696, iteration: 173234
loss: 0.9870032668113708,grad_norm: 0.9999990990033669, iteration: 173235
loss: 0.9728108644485474,grad_norm: 0.9999991359504079, iteration: 173236
loss: 0.9521684050559998,grad_norm: 0.9999992477863544, iteration: 173237
loss: 0.9613673686981201,grad_norm: 0.9999991722207784, iteration: 173238
loss: 1.0107485055923462,grad_norm: 0.9171320610880367, iteration: 173239
loss: 1.0131250619888306,grad_norm: 0.937528091038726, iteration: 173240
loss: 1.01598060131073,grad_norm: 0.9064700918782003, iteration: 173241
loss: 1.0592669248580933,grad_norm: 0.999999419627524, iteration: 173242
loss: 0.989646315574646,grad_norm: 0.8969041499710089, iteration: 173243
loss: 0.977881908416748,grad_norm: 0.8266708563068139, iteration: 173244
loss: 0.9966974854469299,grad_norm: 0.9999992019836328, iteration: 173245
loss: 0.9684149622917175,grad_norm: 0.8037071511652574, iteration: 173246
loss: 1.0058013200759888,grad_norm: 0.9517880778494914, iteration: 173247
loss: 0.9804635643959045,grad_norm: 0.9873438436220416, iteration: 173248
loss: 0.9857479333877563,grad_norm: 0.9999991792794117, iteration: 173249
loss: 0.9953381419181824,grad_norm: 0.99999921654423, iteration: 173250
loss: 1.029098629951477,grad_norm: 0.9999998186133545, iteration: 173251
loss: 0.9756980538368225,grad_norm: 0.9999997465564576, iteration: 173252
loss: 0.9929608106613159,grad_norm: 0.870652498437781, iteration: 173253
loss: 1.021510124206543,grad_norm: 0.9223254359328242, iteration: 173254
loss: 1.0159727334976196,grad_norm: 0.919901765274274, iteration: 173255
loss: 1.0090844631195068,grad_norm: 0.999999111111512, iteration: 173256
loss: 0.9642623662948608,grad_norm: 0.9335043796359298, iteration: 173257
loss: 0.993277907371521,grad_norm: 0.9421756891375376, iteration: 173258
loss: 0.987496554851532,grad_norm: 0.9999992473997903, iteration: 173259
loss: 1.0084171295166016,grad_norm: 0.9999994886173657, iteration: 173260
loss: 1.0328083038330078,grad_norm: 0.9999992802975275, iteration: 173261
loss: 1.021281361579895,grad_norm: 0.9999992489710263, iteration: 173262
loss: 0.9963392019271851,grad_norm: 0.9760477239156965, iteration: 173263
loss: 0.9949373006820679,grad_norm: 0.9980508444557135, iteration: 173264
loss: 1.008231282234192,grad_norm: 0.9839392862239729, iteration: 173265
loss: 0.9899845123291016,grad_norm: 0.9999991083348535, iteration: 173266
loss: 0.9931793212890625,grad_norm: 0.9999989823119505, iteration: 173267
loss: 0.9625701904296875,grad_norm: 0.9703225509196206, iteration: 173268
loss: 1.0265599489212036,grad_norm: 0.999999162662847, iteration: 173269
loss: 0.9937644004821777,grad_norm: 0.9999992574159953, iteration: 173270
loss: 1.021870732307434,grad_norm: 0.8804807831164553, iteration: 173271
loss: 0.9887027144432068,grad_norm: 0.9911881707560749, iteration: 173272
loss: 0.9932203888893127,grad_norm: 0.9085726031575605, iteration: 173273
loss: 0.9714850783348083,grad_norm: 0.9962066833029357, iteration: 173274
loss: 0.9884954690933228,grad_norm: 0.9999990420292661, iteration: 173275
loss: 1.0012277364730835,grad_norm: 0.9640177536231804, iteration: 173276
loss: 0.965004563331604,grad_norm: 0.9999990848668108, iteration: 173277
loss: 1.0103925466537476,grad_norm: 0.9999991751767674, iteration: 173278
loss: 0.9978016018867493,grad_norm: 0.9999991850670304, iteration: 173279
loss: 0.9950872659683228,grad_norm: 0.999999196800382, iteration: 173280
loss: 1.0031625032424927,grad_norm: 0.9999990557915167, iteration: 173281
loss: 1.0137373208999634,grad_norm: 0.8673601549134731, iteration: 173282
loss: 1.0145940780639648,grad_norm: 0.9999992953431844, iteration: 173283
loss: 0.9758224487304688,grad_norm: 0.999999096135411, iteration: 173284
loss: 0.9912381172180176,grad_norm: 0.9050927911830204, iteration: 173285
loss: 1.0174152851104736,grad_norm: 0.9999990040239587, iteration: 173286
loss: 1.0068556070327759,grad_norm: 0.9230328938759819, iteration: 173287
loss: 1.0134907960891724,grad_norm: 0.9149376496437004, iteration: 173288
loss: 1.0159423351287842,grad_norm: 0.9386549736358825, iteration: 173289
loss: 1.0134025812149048,grad_norm: 0.9999990615094784, iteration: 173290
loss: 0.9979038834571838,grad_norm: 0.9999989844534085, iteration: 173291
loss: 0.9887598156929016,grad_norm: 0.9999991778111512, iteration: 173292
loss: 1.0515590906143188,grad_norm: 0.930698784075711, iteration: 173293
loss: 0.9845864176750183,grad_norm: 0.9999989594514711, iteration: 173294
loss: 1.018171787261963,grad_norm: 0.9999989710025786, iteration: 173295
loss: 0.9808796048164368,grad_norm: 0.9999990389065906, iteration: 173296
loss: 1.0042957067489624,grad_norm: 0.9999990275784004, iteration: 173297
loss: 0.9814934134483337,grad_norm: 0.81456812974913, iteration: 173298
loss: 1.0134397745132446,grad_norm: 0.996812054114545, iteration: 173299
loss: 0.9789760708808899,grad_norm: 0.999999466146827, iteration: 173300
loss: 0.9813698530197144,grad_norm: 0.9052477439092536, iteration: 173301
loss: 1.013211965560913,grad_norm: 0.9875224897639411, iteration: 173302
loss: 0.9867696166038513,grad_norm: 0.8755422081180396, iteration: 173303
loss: 0.984713077545166,grad_norm: 0.9999991855384655, iteration: 173304
loss: 0.9838989973068237,grad_norm: 0.8617731221106447, iteration: 173305
loss: 0.9707918167114258,grad_norm: 0.9999990235177174, iteration: 173306
loss: 1.0183578729629517,grad_norm: 0.9353167404370107, iteration: 173307
loss: 0.9923502802848816,grad_norm: 0.9511337641262998, iteration: 173308
loss: 0.9666186571121216,grad_norm: 0.8823060792588254, iteration: 173309
loss: 0.994127631187439,grad_norm: 0.9999990678595722, iteration: 173310
loss: 1.0553910732269287,grad_norm: 0.9720809651206616, iteration: 173311
loss: 1.0121572017669678,grad_norm: 0.8546441330871403, iteration: 173312
loss: 1.0268677473068237,grad_norm: 0.9999991004896597, iteration: 173313
loss: 0.9654895067214966,grad_norm: 0.9999991863592168, iteration: 173314
loss: 0.9954642653465271,grad_norm: 0.844731089716039, iteration: 173315
loss: 1.0213751792907715,grad_norm: 0.9400526023328624, iteration: 173316
loss: 1.0142860412597656,grad_norm: 0.9981224841482458, iteration: 173317
loss: 0.989727795124054,grad_norm: 0.9999990032942689, iteration: 173318
loss: 1.038543462753296,grad_norm: 0.9999991625770457, iteration: 173319
loss: 1.007431983947754,grad_norm: 0.9999992968130725, iteration: 173320
loss: 0.9581940770149231,grad_norm: 0.853678032448548, iteration: 173321
loss: 0.9709141254425049,grad_norm: 0.9435800133938922, iteration: 173322
loss: 1.1068181991577148,grad_norm: 0.9999994567467821, iteration: 173323
loss: 0.9786388874053955,grad_norm: 0.8538441542177007, iteration: 173324
loss: 1.0309478044509888,grad_norm: 0.9189149861729808, iteration: 173325
loss: 0.9911172389984131,grad_norm: 0.8743088373415118, iteration: 173326
loss: 1.0078797340393066,grad_norm: 0.9999993011567492, iteration: 173327
loss: 1.0180760622024536,grad_norm: 0.9373824021376846, iteration: 173328
loss: 1.071179986000061,grad_norm: 0.9393193317960478, iteration: 173329
loss: 0.995534360408783,grad_norm: 0.9999990713029661, iteration: 173330
loss: 1.065420150756836,grad_norm: 0.9999992251701926, iteration: 173331
loss: 1.0754716396331787,grad_norm: 1.00000000334563, iteration: 173332
loss: 0.9682272672653198,grad_norm: 0.9671510447011166, iteration: 173333
loss: 0.9779776334762573,grad_norm: 0.9486468798606881, iteration: 173334
loss: 1.0153347253799438,grad_norm: 0.9999991334226682, iteration: 173335
loss: 0.9668537378311157,grad_norm: 0.9999992340238655, iteration: 173336
loss: 1.022619366645813,grad_norm: 0.9526450797365887, iteration: 173337
loss: 0.9967293739318848,grad_norm: 0.9999991714514306, iteration: 173338
loss: 0.9944730401039124,grad_norm: 0.9558194691479298, iteration: 173339
loss: 1.0275425910949707,grad_norm: 0.9251890008608322, iteration: 173340
loss: 1.128941297531128,grad_norm: 0.999999271363143, iteration: 173341
loss: 1.0205907821655273,grad_norm: 0.9999996893163167, iteration: 173342
loss: 0.9854419827461243,grad_norm: 0.9999992448612014, iteration: 173343
loss: 1.039349913597107,grad_norm: 0.9999999814462602, iteration: 173344
loss: 1.0664047002792358,grad_norm: 0.9999996224839662, iteration: 173345
loss: 1.0345386266708374,grad_norm: 0.9999991526397463, iteration: 173346
loss: 0.9711712002754211,grad_norm: 0.9574984151554772, iteration: 173347
loss: 1.0236797332763672,grad_norm: 0.9999991053366964, iteration: 173348
loss: 1.002380132675171,grad_norm: 0.999999356420268, iteration: 173349
loss: 1.0352411270141602,grad_norm: 0.8815284710924768, iteration: 173350
loss: 1.002424955368042,grad_norm: 0.9143066916237564, iteration: 173351
loss: 0.987457275390625,grad_norm: 0.8703500411544647, iteration: 173352
loss: 0.9664744734764099,grad_norm: 0.9136771299177211, iteration: 173353
loss: 0.9850394129753113,grad_norm: 0.9999993601952214, iteration: 173354
loss: 1.1117336750030518,grad_norm: 0.9999995254971142, iteration: 173355
loss: 1.144829273223877,grad_norm: 0.9999992963541312, iteration: 173356
loss: 0.9708229303359985,grad_norm: 0.9999990692745034, iteration: 173357
loss: 1.0079478025436401,grad_norm: 0.9999991410100149, iteration: 173358
loss: 0.9808509349822998,grad_norm: 0.9999989960816826, iteration: 173359
loss: 1.0104373693466187,grad_norm: 0.9999991637069573, iteration: 173360
loss: 0.9828351736068726,grad_norm: 0.8776891092216944, iteration: 173361
loss: 0.9938229918479919,grad_norm: 0.9999991406369699, iteration: 173362
loss: 0.9752568006515503,grad_norm: 0.9941538525901817, iteration: 173363
loss: 0.9974448680877686,grad_norm: 0.9999990280798693, iteration: 173364
loss: 0.9753018021583557,grad_norm: 0.9999991765133904, iteration: 173365
loss: 0.9775509238243103,grad_norm: 0.9999990509993648, iteration: 173366
loss: 0.9780917763710022,grad_norm: 0.9865202355208376, iteration: 173367
loss: 1.0013771057128906,grad_norm: 0.9999991247510303, iteration: 173368
loss: 0.9889233708381653,grad_norm: 0.8558378838890034, iteration: 173369
loss: 1.0161750316619873,grad_norm: 0.9999991850560268, iteration: 173370
loss: 1.0110065937042236,grad_norm: 0.9999989797136505, iteration: 173371
loss: 0.9895299077033997,grad_norm: 0.9025910744900416, iteration: 173372
loss: 1.0115454196929932,grad_norm: 0.9521379362329109, iteration: 173373
loss: 1.0225608348846436,grad_norm: 0.9791718686478867, iteration: 173374
loss: 1.0079199075698853,grad_norm: 0.9910531220093505, iteration: 173375
loss: 0.9405556917190552,grad_norm: 0.940246044726725, iteration: 173376
loss: 1.129676342010498,grad_norm: 1.0000000147065076, iteration: 173377
loss: 1.0084646940231323,grad_norm: 0.9535691644601852, iteration: 173378
loss: 1.0166274309158325,grad_norm: 0.9999990302147986, iteration: 173379
loss: 0.9816895723342896,grad_norm: 0.9999991099859727, iteration: 173380
loss: 0.9865536689758301,grad_norm: 0.9999990913067458, iteration: 173381
loss: 1.028625249862671,grad_norm: 0.9946084552068387, iteration: 173382
loss: 0.9857187867164612,grad_norm: 0.9555365351503962, iteration: 173383
loss: 1.0471171140670776,grad_norm: 0.9999992960807941, iteration: 173384
loss: 1.0096651315689087,grad_norm: 0.9205670088670954, iteration: 173385
loss: 0.9990320205688477,grad_norm: 0.9999991398006397, iteration: 173386
loss: 0.9973852634429932,grad_norm: 0.9999992875797968, iteration: 173387
loss: 1.0183080434799194,grad_norm: 0.9999992602856107, iteration: 173388
loss: 0.9998959302902222,grad_norm: 0.9169213985533535, iteration: 173389
loss: 0.966382622718811,grad_norm: 0.9999991235989065, iteration: 173390
loss: 1.006978988647461,grad_norm: 0.9999992441247001, iteration: 173391
loss: 1.0257542133331299,grad_norm: 0.879156752209853, iteration: 173392
loss: 0.9871613383293152,grad_norm: 0.9999991756710324, iteration: 173393
loss: 0.9711775779724121,grad_norm: 0.9999992298031473, iteration: 173394
loss: 1.0099512338638306,grad_norm: 0.9180154317282015, iteration: 173395
loss: 1.0377904176712036,grad_norm: 0.9999992857822521, iteration: 173396
loss: 0.989542543888092,grad_norm: 0.999999111413384, iteration: 173397
loss: 0.9921126961708069,grad_norm: 0.9999992725856569, iteration: 173398
loss: 1.008932113647461,grad_norm: 0.9793232680648649, iteration: 173399
loss: 1.0053982734680176,grad_norm: 0.9999990292398863, iteration: 173400
loss: 0.9993768334388733,grad_norm: 0.9999990535639472, iteration: 173401
loss: 0.9847214221954346,grad_norm: 0.9999995889130661, iteration: 173402
loss: 1.014273762702942,grad_norm: 0.9171615299821111, iteration: 173403
loss: 1.0117262601852417,grad_norm: 0.8863813362712768, iteration: 173404
loss: 0.9839908480644226,grad_norm: 0.9201515749607314, iteration: 173405
loss: 0.9583938717842102,grad_norm: 0.8833330081510171, iteration: 173406
loss: 1.0164134502410889,grad_norm: 0.9999992785966979, iteration: 173407
loss: 1.0039926767349243,grad_norm: 0.9895838823405216, iteration: 173408
loss: 1.0036181211471558,grad_norm: 0.9769679510734699, iteration: 173409
loss: 1.0088104009628296,grad_norm: 0.9884414970374579, iteration: 173410
loss: 0.9908260107040405,grad_norm: 0.9999991626715299, iteration: 173411
loss: 0.956946074962616,grad_norm: 0.9010791828660721, iteration: 173412
loss: 1.1092829704284668,grad_norm: 0.9999995838127558, iteration: 173413
loss: 0.9692326188087463,grad_norm: 0.9271414389829052, iteration: 173414
loss: 0.9848633408546448,grad_norm: 0.9999991196385101, iteration: 173415
loss: 0.9873349666595459,grad_norm: 0.9210506194188066, iteration: 173416
loss: 1.028527021408081,grad_norm: 0.99999914032061, iteration: 173417
loss: 0.9678851962089539,grad_norm: 0.9999991463253162, iteration: 173418
loss: 0.9786375761032104,grad_norm: 0.8774055515132866, iteration: 173419
loss: 1.0078988075256348,grad_norm: 0.9513710959704569, iteration: 173420
loss: 1.0333236455917358,grad_norm: 0.9999991125816812, iteration: 173421
loss: 1.024030089378357,grad_norm: 0.9999990852832833, iteration: 173422
loss: 0.984379231929779,grad_norm: 0.9058843002488636, iteration: 173423
loss: 0.9955589771270752,grad_norm: 0.99999911752542, iteration: 173424
loss: 0.9838289022445679,grad_norm: 0.9999989971359765, iteration: 173425
loss: 1.0080251693725586,grad_norm: 0.9091519091023211, iteration: 173426
loss: 0.9924753308296204,grad_norm: 0.95856675480229, iteration: 173427
loss: 0.9773003458976746,grad_norm: 0.999999061669857, iteration: 173428
loss: 0.9612801671028137,grad_norm: 0.9999991159520257, iteration: 173429
loss: 1.0135349035263062,grad_norm: 0.999999366196972, iteration: 173430
loss: 0.994439423084259,grad_norm: 0.999999019850417, iteration: 173431
loss: 1.014910340309143,grad_norm: 0.9999992905166613, iteration: 173432
loss: 1.0534071922302246,grad_norm: 0.9942784121842816, iteration: 173433
loss: 1.0104310512542725,grad_norm: 0.8367505280389772, iteration: 173434
loss: 0.9644038081169128,grad_norm: 0.9778573445730919, iteration: 173435
loss: 0.9930754899978638,grad_norm: 0.9334100660398195, iteration: 173436
loss: 1.0009757280349731,grad_norm: 0.9188466907598547, iteration: 173437
loss: 1.0254911184310913,grad_norm: 0.9999992323679217, iteration: 173438
loss: 0.9928631782531738,grad_norm: 0.9558024879830718, iteration: 173439
loss: 1.0223630666732788,grad_norm: 0.9503646322442891, iteration: 173440
loss: 1.0186711549758911,grad_norm: 0.9999989897866518, iteration: 173441
loss: 0.9996375441551208,grad_norm: 0.8609143018976032, iteration: 173442
loss: 1.0153119564056396,grad_norm: 0.9999992414006519, iteration: 173443
loss: 1.001263976097107,grad_norm: 0.9203962242414834, iteration: 173444
loss: 0.9654116034507751,grad_norm: 0.9771366851553952, iteration: 173445
loss: 0.9986850619316101,grad_norm: 0.8649877374618973, iteration: 173446
loss: 1.008415937423706,grad_norm: 0.9999991401042572, iteration: 173447
loss: 1.0113613605499268,grad_norm: 0.9397219128858663, iteration: 173448
loss: 1.027217149734497,grad_norm: 0.9999991412771111, iteration: 173449
loss: 1.0148564577102661,grad_norm: 0.9423710808348383, iteration: 173450
loss: 1.017164945602417,grad_norm: 0.8868835706278547, iteration: 173451
loss: 1.0014686584472656,grad_norm: 0.8316590314006613, iteration: 173452
loss: 1.0066057443618774,grad_norm: 0.9999991740604188, iteration: 173453
loss: 1.0461695194244385,grad_norm: 0.8586763603535971, iteration: 173454
loss: 1.0298123359680176,grad_norm: 0.9999989723032756, iteration: 173455
loss: 1.0226694345474243,grad_norm: 0.8746645120868125, iteration: 173456
loss: 1.0125796794891357,grad_norm: 0.9048603267087659, iteration: 173457
loss: 0.9987476468086243,grad_norm: 0.897488584151649, iteration: 173458
loss: 1.033735990524292,grad_norm: 0.9999990538215958, iteration: 173459
loss: 1.0334340333938599,grad_norm: 0.999999156885183, iteration: 173460
loss: 0.970578670501709,grad_norm: 0.9745707022301678, iteration: 173461
loss: 1.0061002969741821,grad_norm: 0.9447933209213509, iteration: 173462
loss: 1.019587755203247,grad_norm: 0.9999991962664202, iteration: 173463
loss: 0.9757899045944214,grad_norm: 0.999998930720941, iteration: 173464
loss: 0.9645164012908936,grad_norm: 0.9999990754217174, iteration: 173465
loss: 1.0202387571334839,grad_norm: 0.9999990793279206, iteration: 173466
loss: 1.011817455291748,grad_norm: 0.999999067189757, iteration: 173467
loss: 1.0102943181991577,grad_norm: 0.9999991046224924, iteration: 173468
loss: 1.0367228984832764,grad_norm: 0.9271389923734529, iteration: 173469
loss: 1.004990816116333,grad_norm: 0.8241629726725276, iteration: 173470
loss: 1.0255059003829956,grad_norm: 0.9999994065494051, iteration: 173471
loss: 0.9944964051246643,grad_norm: 0.9999989675257102, iteration: 173472
loss: 1.0102900266647339,grad_norm: 0.9999991279896596, iteration: 173473
loss: 0.9911667108535767,grad_norm: 0.9682081299572934, iteration: 173474
loss: 0.9667640924453735,grad_norm: 0.8938403804765027, iteration: 173475
loss: 1.00713312625885,grad_norm: 0.9999990996294182, iteration: 173476
loss: 1.0071437358856201,grad_norm: 0.9999992024311215, iteration: 173477
loss: 1.0084272623062134,grad_norm: 0.9524289126365758, iteration: 173478
loss: 0.98423171043396,grad_norm: 0.9999991455088058, iteration: 173479
loss: 1.008705735206604,grad_norm: 0.9999989410227847, iteration: 173480
loss: 1.0460691452026367,grad_norm: 0.9999989654550644, iteration: 173481
loss: 1.0021910667419434,grad_norm: 0.9418065314674473, iteration: 173482
loss: 0.9725410342216492,grad_norm: 0.9999990483562617, iteration: 173483
loss: 1.0150806903839111,grad_norm: 0.8116901462398668, iteration: 173484
loss: 1.003529667854309,grad_norm: 0.9999994501034261, iteration: 173485
loss: 1.0222232341766357,grad_norm: 0.9999990949329846, iteration: 173486
loss: 1.0112303495407104,grad_norm: 0.9999990587987627, iteration: 173487
loss: 1.0176482200622559,grad_norm: 0.9176271422522531, iteration: 173488
loss: 1.0085009336471558,grad_norm: 0.9999992091301515, iteration: 173489
loss: 0.9636363983154297,grad_norm: 0.930323739952749, iteration: 173490
loss: 0.9634450674057007,grad_norm: 0.9999990766635267, iteration: 173491
loss: 0.9832757711410522,grad_norm: 0.8821988036184354, iteration: 173492
loss: 1.027459740638733,grad_norm: 0.9817349674143324, iteration: 173493
loss: 0.9868380427360535,grad_norm: 0.8472549533810007, iteration: 173494
loss: 1.0444053411483765,grad_norm: 0.9999998402126512, iteration: 173495
loss: 1.00094735622406,grad_norm: 0.9730091490175828, iteration: 173496
loss: 0.9920392036437988,grad_norm: 0.9999991426821309, iteration: 173497
loss: 1.0489107370376587,grad_norm: 0.992182212804677, iteration: 173498
loss: 1.044235110282898,grad_norm: 0.9842093463027298, iteration: 173499
loss: 1.0156974792480469,grad_norm: 0.9520011110686893, iteration: 173500
loss: 0.9680826663970947,grad_norm: 0.898272421779162, iteration: 173501
loss: 1.0338070392608643,grad_norm: 0.9660973059909983, iteration: 173502
loss: 0.9824613928794861,grad_norm: 0.9338095246156257, iteration: 173503
loss: 0.9930601716041565,grad_norm: 0.8584010474377277, iteration: 173504
loss: 1.0133134126663208,grad_norm: 0.9208063703826119, iteration: 173505
loss: 0.9568087458610535,grad_norm: 0.9864457718905598, iteration: 173506
loss: 1.0371525287628174,grad_norm: 0.9358358457151374, iteration: 173507
loss: 0.9980804324150085,grad_norm: 0.8363471503648058, iteration: 173508
loss: 0.9864434599876404,grad_norm: 0.9999990513363428, iteration: 173509
loss: 0.9672845602035522,grad_norm: 0.9999991795543923, iteration: 173510
loss: 1.0007632970809937,grad_norm: 0.904770552484754, iteration: 173511
loss: 1.0862287282943726,grad_norm: 0.9999991505291638, iteration: 173512
loss: 1.0114331245422363,grad_norm: 0.9999990328098395, iteration: 173513
loss: 1.0144790410995483,grad_norm: 0.9740175363791016, iteration: 173514
loss: 1.009846568107605,grad_norm: 0.9999992333434653, iteration: 173515
loss: 1.027161717414856,grad_norm: 0.9966777565047664, iteration: 173516
loss: 1.0291438102722168,grad_norm: 0.9999998430052247, iteration: 173517
loss: 1.0217336416244507,grad_norm: 0.9999991459048526, iteration: 173518
loss: 1.0109530687332153,grad_norm: 0.8121019868217249, iteration: 173519
loss: 1.0013487339019775,grad_norm: 0.7777931449434021, iteration: 173520
loss: 0.9975390434265137,grad_norm: 0.9824283197579514, iteration: 173521
loss: 1.019356608390808,grad_norm: 0.9999992040963349, iteration: 173522
loss: 0.9925428628921509,grad_norm: 0.891030178782061, iteration: 173523
loss: 1.0287950038909912,grad_norm: 0.9999991791532337, iteration: 173524
loss: 0.9860497117042542,grad_norm: 0.9999991314573224, iteration: 173525
loss: 0.9604353308677673,grad_norm: 0.8939400283396225, iteration: 173526
loss: 1.0155295133590698,grad_norm: 0.9999991507431724, iteration: 173527
loss: 1.031436562538147,grad_norm: 0.9999994102973552, iteration: 173528
loss: 1.0238771438598633,grad_norm: 0.999999015207049, iteration: 173529
loss: 0.9883685111999512,grad_norm: 0.9095252456488138, iteration: 173530
loss: 0.9780292510986328,grad_norm: 0.9999991332867016, iteration: 173531
loss: 0.9938036203384399,grad_norm: 0.9965647432428741, iteration: 173532
loss: 0.9818487167358398,grad_norm: 0.9508500461113062, iteration: 173533
loss: 1.0095465183258057,grad_norm: 0.8596476641546521, iteration: 173534
loss: 1.0223889350891113,grad_norm: 0.9778356820879026, iteration: 173535
loss: 1.0349177122116089,grad_norm: 0.9999993188591253, iteration: 173536
loss: 1.02157723903656,grad_norm: 0.9999990967775313, iteration: 173537
loss: 0.9739289879798889,grad_norm: 0.9259393977768945, iteration: 173538
loss: 1.0166937112808228,grad_norm: 0.9999992489526148, iteration: 173539
loss: 0.9759876132011414,grad_norm: 0.9999460630967474, iteration: 173540
loss: 0.9998511075973511,grad_norm: 0.9999990381317662, iteration: 173541
loss: 0.9691673517227173,grad_norm: 0.8811630237007207, iteration: 173542
loss: 1.0813628435134888,grad_norm: 0.9999991628724115, iteration: 173543
loss: 0.9981392621994019,grad_norm: 0.9999991626599639, iteration: 173544
loss: 0.9991325736045837,grad_norm: 0.9999990842336729, iteration: 173545
loss: 0.9885161519050598,grad_norm: 0.831885268098659, iteration: 173546
loss: 1.0272431373596191,grad_norm: 0.9999991198788133, iteration: 173547
loss: 0.9734834432601929,grad_norm: 0.9999989957706532, iteration: 173548
loss: 1.0152379274368286,grad_norm: 0.9308786788584926, iteration: 173549
loss: 0.9919090867042542,grad_norm: 0.8692409094402505, iteration: 173550
loss: 0.9956449270248413,grad_norm: 0.9999990971599169, iteration: 173551
loss: 0.9754197001457214,grad_norm: 0.9999991255909579, iteration: 173552
loss: 1.034820556640625,grad_norm: 0.9999992766823959, iteration: 173553
loss: 1.0275825262069702,grad_norm: 0.9999991888930113, iteration: 173554
loss: 0.9806723594665527,grad_norm: 0.9999991061733924, iteration: 173555
loss: 1.0023504495620728,grad_norm: 0.9529693881878185, iteration: 173556
loss: 1.0515676736831665,grad_norm: 0.9999991449328973, iteration: 173557
loss: 0.9841335415840149,grad_norm: 0.9999989236500382, iteration: 173558
loss: 0.9837438464164734,grad_norm: 0.9457277712562917, iteration: 173559
loss: 0.9999144077301025,grad_norm: 0.9280367246373217, iteration: 173560
loss: 0.9986176490783691,grad_norm: 0.7875341332424832, iteration: 173561
loss: 1.021669864654541,grad_norm: 0.9999992120185958, iteration: 173562
loss: 1.0090866088867188,grad_norm: 0.8022909370335605, iteration: 173563
loss: 0.9894172549247742,grad_norm: 0.9327309673204954, iteration: 173564
loss: 1.112903118133545,grad_norm: 0.9999992272750785, iteration: 173565
loss: 1.011992335319519,grad_norm: 0.9999992442549994, iteration: 173566
loss: 1.0267298221588135,grad_norm: 0.9999992871920522, iteration: 173567
loss: 1.0165963172912598,grad_norm: 0.8583709369652384, iteration: 173568
loss: 0.980096697807312,grad_norm: 0.9999989680228746, iteration: 173569
loss: 1.0344160795211792,grad_norm: 0.9281545346259047, iteration: 173570
loss: 0.9829199314117432,grad_norm: 0.8197807368868122, iteration: 173571
loss: 1.0083736181259155,grad_norm: 0.8815050086881691, iteration: 173572
loss: 1.0004048347473145,grad_norm: 0.9999990644170167, iteration: 173573
loss: 1.1266751289367676,grad_norm: 0.9999993484109899, iteration: 173574
loss: 0.9643868207931519,grad_norm: 0.9551221138862422, iteration: 173575
loss: 0.9816399216651917,grad_norm: 0.9999991804856133, iteration: 173576
loss: 0.9984951615333557,grad_norm: 0.9999991789545234, iteration: 173577
loss: 1.0296249389648438,grad_norm: 0.999999101594212, iteration: 173578
loss: 1.006500482559204,grad_norm: 0.9999991907716966, iteration: 173579
loss: 0.9986451864242554,grad_norm: 0.999999120555166, iteration: 173580
loss: 1.0238914489746094,grad_norm: 0.9999992096237804, iteration: 173581
loss: 0.9845370054244995,grad_norm: 0.9712687736726884, iteration: 173582
loss: 1.00023353099823,grad_norm: 0.9055518369549713, iteration: 173583
loss: 1.014097809791565,grad_norm: 0.9999990812225701, iteration: 173584
loss: 0.9872674345970154,grad_norm: 0.9999993385557204, iteration: 173585
loss: 1.0250117778778076,grad_norm: 0.919293342736694, iteration: 173586
loss: 0.9737941026687622,grad_norm: 0.9999989075109293, iteration: 173587
loss: 0.9913612604141235,grad_norm: 0.9999992174140816, iteration: 173588
loss: 1.0141003131866455,grad_norm: 0.9606023858021436, iteration: 173589
loss: 0.9908249378204346,grad_norm: 0.9131960946453258, iteration: 173590
loss: 0.9994352459907532,grad_norm: 0.999999112538349, iteration: 173591
loss: 1.0181719064712524,grad_norm: 0.9999994439476365, iteration: 173592
loss: 0.9863470792770386,grad_norm: 0.9999990665221375, iteration: 173593
loss: 0.9788934588432312,grad_norm: 0.8648259469772805, iteration: 173594
loss: 1.359906554222107,grad_norm: 0.9999995736553308, iteration: 173595
loss: 0.9959949254989624,grad_norm: 0.9901228741238229, iteration: 173596
loss: 0.9728189706802368,grad_norm: 0.9999992358435739, iteration: 173597
loss: 1.012019395828247,grad_norm: 0.9741577751395224, iteration: 173598
loss: 1.0413072109222412,grad_norm: 0.999999037857509, iteration: 173599
loss: 0.9565761089324951,grad_norm: 0.9146892595116369, iteration: 173600
loss: 0.9853280782699585,grad_norm: 0.9541095664831615, iteration: 173601
loss: 0.9998655319213867,grad_norm: 0.9999991094793756, iteration: 173602
loss: 1.0080586671829224,grad_norm: 0.9999993961782017, iteration: 173603
loss: 0.9934439063072205,grad_norm: 0.8694123800582685, iteration: 173604
loss: 1.0462480783462524,grad_norm: 0.999999169372788, iteration: 173605
loss: 1.0011550188064575,grad_norm: 0.9999989872657464, iteration: 173606
loss: 1.0170621871948242,grad_norm: 0.9999993059243089, iteration: 173607
loss: 1.0026127099990845,grad_norm: 0.9370116930379394, iteration: 173608
loss: 1.011473536491394,grad_norm: 0.9999992224306931, iteration: 173609
loss: 1.0407073497772217,grad_norm: 0.9543474577973168, iteration: 173610
loss: 1.0114628076553345,grad_norm: 0.9784697159887829, iteration: 173611
loss: 1.0207350254058838,grad_norm: 0.9326553725293293, iteration: 173612
loss: 0.9906197786331177,grad_norm: 0.999202798996279, iteration: 173613
loss: 1.0204380750656128,grad_norm: 0.9344767216225356, iteration: 173614
loss: 0.9876149296760559,grad_norm: 0.8990815876281489, iteration: 173615
loss: 0.9840761423110962,grad_norm: 0.9343132723613401, iteration: 173616
loss: 1.014772653579712,grad_norm: 0.9999992170717972, iteration: 173617
loss: 1.048630714416504,grad_norm: 0.9999997369448378, iteration: 173618
loss: 1.0293498039245605,grad_norm: 0.9999990839423284, iteration: 173619
loss: 0.9757040739059448,grad_norm: 0.9999990762411729, iteration: 173620
loss: 0.9964039325714111,grad_norm: 0.9500861383547297, iteration: 173621
loss: 1.0046519041061401,grad_norm: 0.999772999342792, iteration: 173622
loss: 0.9886413812637329,grad_norm: 0.9166695327593968, iteration: 173623
loss: 1.0043706893920898,grad_norm: 0.9920497234535745, iteration: 173624
loss: 0.9857615828514099,grad_norm: 0.95086737636681, iteration: 173625
loss: 1.0217164754867554,grad_norm: 0.9920457202905821, iteration: 173626
loss: 1.0097918510437012,grad_norm: 0.9999991526143519, iteration: 173627
loss: 0.9713617563247681,grad_norm: 0.9851363882750711, iteration: 173628
loss: 1.003159523010254,grad_norm: 0.9999992158084235, iteration: 173629
loss: 1.0211399793624878,grad_norm: 0.9999996478953027, iteration: 173630
loss: 1.0080233812332153,grad_norm: 0.9999996081705057, iteration: 173631
loss: 1.0058568716049194,grad_norm: 0.9999991737146966, iteration: 173632
loss: 1.033323049545288,grad_norm: 0.9999991997084753, iteration: 173633
loss: 0.9809897541999817,grad_norm: 0.8517499762008591, iteration: 173634
loss: 0.9769909977912903,grad_norm: 0.9999992364285156, iteration: 173635
loss: 0.9898829460144043,grad_norm: 0.9999989729231129, iteration: 173636
loss: 0.9971335530281067,grad_norm: 0.9999989820604593, iteration: 173637
loss: 1.007730484008789,grad_norm: 0.9869059337125711, iteration: 173638
loss: 0.997212290763855,grad_norm: 0.9757222003441962, iteration: 173639
loss: 1.0448124408721924,grad_norm: 0.9999992188577849, iteration: 173640
loss: 1.0023303031921387,grad_norm: 0.9999990694643258, iteration: 173641
loss: 0.9951956868171692,grad_norm: 0.9994644247243899, iteration: 173642
loss: 0.9923481345176697,grad_norm: 0.9762262474504273, iteration: 173643
loss: 0.9988703727722168,grad_norm: 0.999998949856105, iteration: 173644
loss: 1.019431233406067,grad_norm: 0.8909321535746855, iteration: 173645
loss: 1.0001221895217896,grad_norm: 0.9999990683062658, iteration: 173646
loss: 0.9982841610908508,grad_norm: 0.9421061663812152, iteration: 173647
loss: 0.9646637439727783,grad_norm: 0.8951493964542473, iteration: 173648
loss: 0.9875872731208801,grad_norm: 0.8543289064231739, iteration: 173649
loss: 1.0280855894088745,grad_norm: 0.9999995643544505, iteration: 173650
loss: 0.9582194685935974,grad_norm: 0.999999170978678, iteration: 173651
loss: 1.0038946866989136,grad_norm: 0.8942820977363817, iteration: 173652
loss: 0.9880852699279785,grad_norm: 0.9999992948359908, iteration: 173653
loss: 1.0017101764678955,grad_norm: 0.9999992617773356, iteration: 173654
loss: 1.0125795602798462,grad_norm: 0.8783657426364115, iteration: 173655
loss: 0.9897295832633972,grad_norm: 0.8762966008981622, iteration: 173656
loss: 1.0437160730361938,grad_norm: 0.9999992713704156, iteration: 173657
loss: 0.9897308945655823,grad_norm: 0.9374079771754383, iteration: 173658
loss: 0.9928074479103088,grad_norm: 0.9999991628551417, iteration: 173659
loss: 1.0048712491989136,grad_norm: 0.9999991487290024, iteration: 173660
loss: 1.046880841255188,grad_norm: 0.9742814506388044, iteration: 173661
loss: 1.0260027647018433,grad_norm: 0.879773225375055, iteration: 173662
loss: 0.988310694694519,grad_norm: 0.8876365303198345, iteration: 173663
loss: 1.0158517360687256,grad_norm: 0.9999991349465622, iteration: 173664
loss: 0.9617749452590942,grad_norm: 0.999999313301137, iteration: 173665
loss: 1.0131915807724,grad_norm: 0.8415123560746808, iteration: 173666
loss: 0.9803169369697571,grad_norm: 0.9999991673133343, iteration: 173667
loss: 0.9951953291893005,grad_norm: 0.9999990743561513, iteration: 173668
loss: 1.0356518030166626,grad_norm: 0.9999992133774203, iteration: 173669
loss: 1.0022828578948975,grad_norm: 0.9905772421643588, iteration: 173670
loss: 0.9940019845962524,grad_norm: 0.8392558561766512, iteration: 173671
loss: 0.989439070224762,grad_norm: 0.9951364902725501, iteration: 173672
loss: 0.9910217523574829,grad_norm: 0.938555941581659, iteration: 173673
loss: 1.0093460083007812,grad_norm: 0.9999993614453107, iteration: 173674
loss: 1.0213274955749512,grad_norm: 0.9999992328137008, iteration: 173675
loss: 1.0194780826568604,grad_norm: 0.999999249613861, iteration: 173676
loss: 0.9988473653793335,grad_norm: 0.9999992377562857, iteration: 173677
loss: 1.008657455444336,grad_norm: 0.8988335859762873, iteration: 173678
loss: 0.9731736183166504,grad_norm: 0.796587796009138, iteration: 173679
loss: 1.0134286880493164,grad_norm: 0.938760634697467, iteration: 173680
loss: 1.0022433996200562,grad_norm: 0.9999990393805105, iteration: 173681
loss: 1.0667887926101685,grad_norm: 0.9696822980946301, iteration: 173682
loss: 0.9703236818313599,grad_norm: 0.9999990761764006, iteration: 173683
loss: 0.9699018001556396,grad_norm: 0.948175553908295, iteration: 173684
loss: 0.9858865141868591,grad_norm: 0.9178046010292611, iteration: 173685
loss: 1.0029261112213135,grad_norm: 0.9446886776578718, iteration: 173686
loss: 0.9917397499084473,grad_norm: 0.9999990532325049, iteration: 173687
loss: 0.9934988617897034,grad_norm: 0.8917711312892144, iteration: 173688
loss: 0.9948725700378418,grad_norm: 0.9586848415723991, iteration: 173689
loss: 1.00123131275177,grad_norm: 0.88109352006242, iteration: 173690
loss: 1.033139944076538,grad_norm: 0.9502123761655494, iteration: 173691
loss: 1.0113691091537476,grad_norm: 0.9999993138877172, iteration: 173692
loss: 0.991946280002594,grad_norm: 0.9150669786966709, iteration: 173693
loss: 1.0258017778396606,grad_norm: 0.999999748271226, iteration: 173694
loss: 0.985957145690918,grad_norm: 0.9223829154817963, iteration: 173695
loss: 1.1114587783813477,grad_norm: 0.9999991754673176, iteration: 173696
loss: 0.977020800113678,grad_norm: 0.9999991238384912, iteration: 173697
loss: 0.9792575836181641,grad_norm: 0.976558596496892, iteration: 173698
loss: 0.9781278967857361,grad_norm: 0.9157401046649852, iteration: 173699
loss: 0.9765350222587585,grad_norm: 0.9999994745124146, iteration: 173700
loss: 1.01629638671875,grad_norm: 0.8417369768251899, iteration: 173701
loss: 1.0165754556655884,grad_norm: 0.9406535076764911, iteration: 173702
loss: 1.1951286792755127,grad_norm: 0.9999995464644531, iteration: 173703
loss: 0.9483033418655396,grad_norm: 0.8729219725685929, iteration: 173704
loss: 1.0222269296646118,grad_norm: 0.9137683318347554, iteration: 173705
loss: 0.9728836417198181,grad_norm: 0.9999990924318807, iteration: 173706
loss: 1.0023910999298096,grad_norm: 0.9970720316456265, iteration: 173707
loss: 1.0035364627838135,grad_norm: 0.935335760708683, iteration: 173708
loss: 0.9835877418518066,grad_norm: 0.9999989834449667, iteration: 173709
loss: 0.9783012866973877,grad_norm: 0.8491971033686728, iteration: 173710
loss: 0.9771783351898193,grad_norm: 0.9999990111569715, iteration: 173711
loss: 0.9986245632171631,grad_norm: 0.9999991868924997, iteration: 173712
loss: 1.0086995363235474,grad_norm: 0.9445978135621695, iteration: 173713
loss: 1.0137075185775757,grad_norm: 0.9119206920532672, iteration: 173714
loss: 0.970556378364563,grad_norm: 0.9999991683859287, iteration: 173715
loss: 1.0061914920806885,grad_norm: 0.8670671405965127, iteration: 173716
loss: 0.99592125415802,grad_norm: 0.9573942516862015, iteration: 173717
loss: 1.0290765762329102,grad_norm: 0.9999998102345018, iteration: 173718
loss: 1.0064690113067627,grad_norm: 0.9999991809493316, iteration: 173719
loss: 1.0578961372375488,grad_norm: 0.9999993309879579, iteration: 173720
loss: 1.09795081615448,grad_norm: 0.9999991852127608, iteration: 173721
loss: 1.0610159635543823,grad_norm: 0.999999455685239, iteration: 173722
loss: 1.0294374227523804,grad_norm: 0.9999990270437138, iteration: 173723
loss: 0.9896253943443298,grad_norm: 0.9135577670433778, iteration: 173724
loss: 1.0185116529464722,grad_norm: 0.9806218089890822, iteration: 173725
loss: 0.9836256504058838,grad_norm: 0.999999617797744, iteration: 173726
loss: 1.1322697401046753,grad_norm: 0.999999093294525, iteration: 173727
loss: 1.01362943649292,grad_norm: 0.9862775594418081, iteration: 173728
loss: 0.9784486889839172,grad_norm: 0.9949119164131519, iteration: 173729
loss: 1.0262879133224487,grad_norm: 0.9611747068289127, iteration: 173730
loss: 0.9946670532226562,grad_norm: 0.8226475519220875, iteration: 173731
loss: 1.0304938554763794,grad_norm: 0.999999292156158, iteration: 173732
loss: 0.9920048713684082,grad_norm: 0.9999989412033174, iteration: 173733
loss: 1.0019575357437134,grad_norm: 0.8816349921613208, iteration: 173734
loss: 1.1385700702667236,grad_norm: 0.9999994948812266, iteration: 173735
loss: 0.9973978996276855,grad_norm: 0.9999992015034385, iteration: 173736
loss: 0.9967044591903687,grad_norm: 0.924422559054647, iteration: 173737
loss: 0.9745326042175293,grad_norm: 0.9999990946650985, iteration: 173738
loss: 1.0356204509735107,grad_norm: 0.9999991506748805, iteration: 173739
loss: 0.9654061198234558,grad_norm: 0.8858916842522109, iteration: 173740
loss: 0.9906588792800903,grad_norm: 0.9999990925108225, iteration: 173741
loss: 1.0237736701965332,grad_norm: 0.999999151894535, iteration: 173742
loss: 0.9958484172821045,grad_norm: 0.9776513778205725, iteration: 173743
loss: 0.9997456669807434,grad_norm: 0.999999152725305, iteration: 173744
loss: 1.0152338743209839,grad_norm: 0.999999203839888, iteration: 173745
loss: 1.0250691175460815,grad_norm: 0.9999990205858663, iteration: 173746
loss: 1.0052801370620728,grad_norm: 0.8849272657111822, iteration: 173747
loss: 1.0076733827590942,grad_norm: 0.8597094636956877, iteration: 173748
loss: 0.9666416645050049,grad_norm: 0.9999990414535991, iteration: 173749
loss: 1.0220404863357544,grad_norm: 0.9085005091597314, iteration: 173750
loss: 0.9999250173568726,grad_norm: 0.9158991603746187, iteration: 173751
loss: 1.0369845628738403,grad_norm: 0.9165503756244011, iteration: 173752
loss: 0.9765076637268066,grad_norm: 0.9473273620671094, iteration: 173753
loss: 1.0204063653945923,grad_norm: 0.8973166958880407, iteration: 173754
loss: 1.0134249925613403,grad_norm: 0.9612271063260128, iteration: 173755
loss: 1.0070585012435913,grad_norm: 0.847008267021515, iteration: 173756
loss: 0.9687874913215637,grad_norm: 0.9380528353107532, iteration: 173757
loss: 1.011664867401123,grad_norm: 0.7809226488834264, iteration: 173758
loss: 1.0130313634872437,grad_norm: 0.9999990144272326, iteration: 173759
loss: 1.0011183023452759,grad_norm: 0.999999119617362, iteration: 173760
loss: 1.0161583423614502,grad_norm: 0.9999991723489262, iteration: 173761
loss: 0.9929990768432617,grad_norm: 0.9999990373856658, iteration: 173762
loss: 1.012865424156189,grad_norm: 0.9416469250350323, iteration: 173763
loss: 0.9929826259613037,grad_norm: 0.9589802336009715, iteration: 173764
loss: 1.0207959413528442,grad_norm: 0.7770690124225993, iteration: 173765
loss: 0.9672024250030518,grad_norm: 0.9503439519923579, iteration: 173766
loss: 0.9937659502029419,grad_norm: 0.9999991760712674, iteration: 173767
loss: 0.9577580094337463,grad_norm: 0.9249062928715963, iteration: 173768
loss: 0.9986323118209839,grad_norm: 0.9999993177368337, iteration: 173769
loss: 1.0143073797225952,grad_norm: 0.9999990622896643, iteration: 173770
loss: 0.9650928974151611,grad_norm: 0.999999168788704, iteration: 173771
loss: 0.9921184182167053,grad_norm: 0.9051094255480565, iteration: 173772
loss: 1.0190032720565796,grad_norm: 0.8694932580543748, iteration: 173773
loss: 1.0162745714187622,grad_norm: 0.9999990080713438, iteration: 173774
loss: 1.0246024131774902,grad_norm: 0.9831358237346304, iteration: 173775
loss: 0.9858629107475281,grad_norm: 0.9999994616274921, iteration: 173776
loss: 1.0212589502334595,grad_norm: 0.9163978548167211, iteration: 173777
loss: 0.9983713030815125,grad_norm: 0.9999992640068622, iteration: 173778
loss: 0.9616783261299133,grad_norm: 0.999999814615765, iteration: 173779
loss: 1.0265014171600342,grad_norm: 0.8583446988507659, iteration: 173780
loss: 1.0030899047851562,grad_norm: 0.9999990186643836, iteration: 173781
loss: 0.9824593663215637,grad_norm: 0.9999990372814914, iteration: 173782
loss: 0.9699832797050476,grad_norm: 0.99061714239638, iteration: 173783
loss: 1.022379994392395,grad_norm: 0.8463702347998588, iteration: 173784
loss: 1.02498197555542,grad_norm: 0.770886822640684, iteration: 173785
loss: 1.0137178897857666,grad_norm: 0.9999991932051128, iteration: 173786
loss: 1.0639539957046509,grad_norm: 0.9999998951521611, iteration: 173787
loss: 0.9884636402130127,grad_norm: 0.9999991546572389, iteration: 173788
loss: 1.0200085639953613,grad_norm: 0.9999992195058677, iteration: 173789
loss: 1.0805158615112305,grad_norm: 0.9999992836575644, iteration: 173790
loss: 0.952894926071167,grad_norm: 0.9358129079118978, iteration: 173791
loss: 1.0137596130371094,grad_norm: 0.8876174136952871, iteration: 173792
loss: 0.9679338932037354,grad_norm: 0.8366790436839187, iteration: 173793
loss: 1.0049564838409424,grad_norm: 0.8805449961370267, iteration: 173794
loss: 1.0154961347579956,grad_norm: 0.9235362046939148, iteration: 173795
loss: 0.9958454370498657,grad_norm: 0.9999989924003286, iteration: 173796
loss: 0.9975408315658569,grad_norm: 0.9999991659883888, iteration: 173797
loss: 1.024644136428833,grad_norm: 0.7490353498805415, iteration: 173798
loss: 0.9988020658493042,grad_norm: 0.9999991583718483, iteration: 173799
loss: 0.993883490562439,grad_norm: 0.9118618862315849, iteration: 173800
loss: 1.0429595708847046,grad_norm: 0.9999996193205762, iteration: 173801
loss: 1.0226097106933594,grad_norm: 0.8656379907070146, iteration: 173802
loss: 0.9863335490226746,grad_norm: 0.9999992231683132, iteration: 173803
loss: 0.9950041770935059,grad_norm: 0.9999991368845587, iteration: 173804
loss: 1.0236921310424805,grad_norm: 0.9999990203610786, iteration: 173805
loss: 0.9761637449264526,grad_norm: 0.721422676624089, iteration: 173806
loss: 0.997793972492218,grad_norm: 0.999999205370973, iteration: 173807
loss: 1.0203357934951782,grad_norm: 0.9999996357384358, iteration: 173808
loss: 1.0105323791503906,grad_norm: 0.9999992330092725, iteration: 173809
loss: 1.0125166177749634,grad_norm: 0.9999991603519603, iteration: 173810
loss: 1.0088131427764893,grad_norm: 0.8990144666304627, iteration: 173811
loss: 1.0025379657745361,grad_norm: 0.8327112335077999, iteration: 173812
loss: 1.028382420539856,grad_norm: 0.933768332750203, iteration: 173813
loss: 1.0948617458343506,grad_norm: 0.9999998135990854, iteration: 173814
loss: 0.9675976037979126,grad_norm: 0.8235336930547903, iteration: 173815
loss: 0.9712101221084595,grad_norm: 0.9999990480274932, iteration: 173816
loss: 1.005552053451538,grad_norm: 0.9999990672494224, iteration: 173817
loss: 1.0204039812088013,grad_norm: 0.9780182115448113, iteration: 173818
loss: 1.070007085800171,grad_norm: 0.9999990287285615, iteration: 173819
loss: 1.1857541799545288,grad_norm: 0.9999997991391822, iteration: 173820
loss: 0.9929125905036926,grad_norm: 0.9999992888031906, iteration: 173821
loss: 1.086798906326294,grad_norm: 0.9999999511297494, iteration: 173822
loss: 0.9765826463699341,grad_norm: 0.8245476153578968, iteration: 173823
loss: 1.118737816810608,grad_norm: 0.9999992192209147, iteration: 173824
loss: 1.0457009077072144,grad_norm: 0.9999990714354665, iteration: 173825
loss: 1.0971115827560425,grad_norm: 0.9999991744701743, iteration: 173826
loss: 0.9972025752067566,grad_norm: 0.8991542914211915, iteration: 173827
loss: 1.0050475597381592,grad_norm: 0.9636292619650985, iteration: 173828
loss: 0.9764111042022705,grad_norm: 0.9522529726222758, iteration: 173829
loss: 0.9670014381408691,grad_norm: 0.9999992483743178, iteration: 173830
loss: 0.9661137461662292,grad_norm: 0.9296841725460783, iteration: 173831
loss: 1.0087859630584717,grad_norm: 0.999999080940018, iteration: 173832
loss: 1.0220803022384644,grad_norm: 0.9999993220844894, iteration: 173833
loss: 1.0196558237075806,grad_norm: 0.9999989904685634, iteration: 173834
loss: 0.9894970655441284,grad_norm: 0.9999990926645514, iteration: 173835
loss: 0.9971373677253723,grad_norm: 0.9999991378833879, iteration: 173836
loss: 1.0943816900253296,grad_norm: 0.9999991633939266, iteration: 173837
loss: 0.9956440329551697,grad_norm: 0.8740545979764959, iteration: 173838
loss: 1.134021520614624,grad_norm: 0.999999490328318, iteration: 173839
loss: 1.036417841911316,grad_norm: 0.9999992248535081, iteration: 173840
loss: 0.9838982224464417,grad_norm: 0.9617649550299425, iteration: 173841
loss: 0.9823548793792725,grad_norm: 0.9999991023457465, iteration: 173842
loss: 1.0118544101715088,grad_norm: 0.8354158362934091, iteration: 173843
loss: 0.9885613918304443,grad_norm: 0.9999990930438895, iteration: 173844
loss: 1.0088964700698853,grad_norm: 0.9076060388338654, iteration: 173845
loss: 0.9796328544616699,grad_norm: 0.9492329461129244, iteration: 173846
loss: 1.0066754817962646,grad_norm: 0.949344617294102, iteration: 173847
loss: 1.0417759418487549,grad_norm: 0.9999991267598574, iteration: 173848
loss: 1.0106843709945679,grad_norm: 0.9999991668137951, iteration: 173849
loss: 1.039863109588623,grad_norm: 0.9999997864154557, iteration: 173850
loss: 0.9872166514396667,grad_norm: 0.9442085692938951, iteration: 173851
loss: 1.0085853338241577,grad_norm: 0.9999990334580499, iteration: 173852
loss: 0.9712734818458557,grad_norm: 0.999999216913522, iteration: 173853
loss: 1.027324914932251,grad_norm: 0.9999990848080623, iteration: 173854
loss: 0.9964428544044495,grad_norm: 0.9999989764196526, iteration: 173855
loss: 1.013460636138916,grad_norm: 0.9999989854988768, iteration: 173856
loss: 0.9782684445381165,grad_norm: 0.8173089272319022, iteration: 173857
loss: 0.9926995635032654,grad_norm: 0.9999989339120622, iteration: 173858
loss: 0.9981278777122498,grad_norm: 0.8744977366922556, iteration: 173859
loss: 0.9909048676490784,grad_norm: 0.9999989663897269, iteration: 173860
loss: 1.0186313390731812,grad_norm: 0.8371165050633252, iteration: 173861
loss: 1.0289945602416992,grad_norm: 0.9999991187287377, iteration: 173862
loss: 1.0046157836914062,grad_norm: 0.9999990911531456, iteration: 173863
loss: 1.0081478357315063,grad_norm: 0.9999993084778277, iteration: 173864
loss: 0.9639280438423157,grad_norm: 0.8328960577758184, iteration: 173865
loss: 0.9811154007911682,grad_norm: 0.8928228451404489, iteration: 173866
loss: 1.0391193628311157,grad_norm: 0.9382167765198364, iteration: 173867
loss: 1.0008314847946167,grad_norm: 0.9999990346610064, iteration: 173868
loss: 0.979827880859375,grad_norm: 0.9999990945916398, iteration: 173869
loss: 1.0058345794677734,grad_norm: 0.8611313963945703, iteration: 173870
loss: 1.1086026430130005,grad_norm: 0.9999992449405619, iteration: 173871
loss: 0.9740325808525085,grad_norm: 0.8835498189248379, iteration: 173872
loss: 0.9983919262886047,grad_norm: 0.9999990960659522, iteration: 173873
loss: 0.9816169142723083,grad_norm: 0.9999992612471241, iteration: 173874
loss: 1.0202654600143433,grad_norm: 0.9999992429219839, iteration: 173875
loss: 0.9978935122489929,grad_norm: 0.9694790694839928, iteration: 173876
loss: 0.9934282302856445,grad_norm: 0.8406669716404981, iteration: 173877
loss: 0.9864827990531921,grad_norm: 0.9918722110904314, iteration: 173878
loss: 1.0068830251693726,grad_norm: 0.891623956619591, iteration: 173879
loss: 1.036400556564331,grad_norm: 0.9999994536270332, iteration: 173880
loss: 1.019853949546814,grad_norm: 0.9999993842021121, iteration: 173881
loss: 0.9618819355964661,grad_norm: 0.9999991808436366, iteration: 173882
loss: 0.9835992455482483,grad_norm: 0.9487422538970961, iteration: 173883
loss: 0.966361403465271,grad_norm: 0.891256898687032, iteration: 173884
loss: 1.0168685913085938,grad_norm: 0.9999991090373296, iteration: 173885
loss: 0.9858826398849487,grad_norm: 0.9808726597673452, iteration: 173886
loss: 0.9967401027679443,grad_norm: 0.9225776878830767, iteration: 173887
loss: 1.010819911956787,grad_norm: 0.9068751091561129, iteration: 173888
loss: 0.9871896505355835,grad_norm: 0.8532910065420655, iteration: 173889
loss: 1.0026965141296387,grad_norm: 0.9999992810505209, iteration: 173890
loss: 0.997225821018219,grad_norm: 0.9989001559772409, iteration: 173891
loss: 1.0037835836410522,grad_norm: 0.9999990941553809, iteration: 173892
loss: 1.0220450162887573,grad_norm: 0.999999041066915, iteration: 173893
loss: 1.0226157903671265,grad_norm: 0.9999991192157817, iteration: 173894
loss: 1.0926066637039185,grad_norm: 0.9999992681500081, iteration: 173895
loss: 1.018730878829956,grad_norm: 0.9608064111504607, iteration: 173896
loss: 1.0281068086624146,grad_norm: 0.9441662335228483, iteration: 173897
loss: 1.0094873905181885,grad_norm: 0.999999107587091, iteration: 173898
loss: 0.9668850302696228,grad_norm: 0.9695349871658131, iteration: 173899
loss: 0.9715695977210999,grad_norm: 0.9999992036205725, iteration: 173900
loss: 1.0243421792984009,grad_norm: 0.9421111961420606, iteration: 173901
loss: 1.032262921333313,grad_norm: 0.889741533773647, iteration: 173902
loss: 0.9828615784645081,grad_norm: 0.9999991053772469, iteration: 173903
loss: 1.0051027536392212,grad_norm: 0.9999990771539017, iteration: 173904
loss: 0.9964425563812256,grad_norm: 0.9999990353267595, iteration: 173905
loss: 1.1044330596923828,grad_norm: 1.0000000010422363, iteration: 173906
loss: 0.999475359916687,grad_norm: 0.9999991490413157, iteration: 173907
loss: 0.9902893900871277,grad_norm: 0.955460607186638, iteration: 173908
loss: 0.9975629448890686,grad_norm: 0.9882481080455134, iteration: 173909
loss: 0.9953977465629578,grad_norm: 0.833923229116939, iteration: 173910
loss: 0.9854062795639038,grad_norm: 0.9328377648326667, iteration: 173911
loss: 0.9708252549171448,grad_norm: 0.7831528941380295, iteration: 173912
loss: 1.084668517112732,grad_norm: 0.9999995118602303, iteration: 173913
loss: 1.0394091606140137,grad_norm: 0.8918547995011785, iteration: 173914
loss: 0.9954803586006165,grad_norm: 0.9999991396090001, iteration: 173915
loss: 1.0011616945266724,grad_norm: 0.9999992326619751, iteration: 173916
loss: 0.9899245500564575,grad_norm: 0.8502695559609138, iteration: 173917
loss: 1.0030838251113892,grad_norm: 0.9894454391078509, iteration: 173918
loss: 1.011103630065918,grad_norm: 0.8289800276375253, iteration: 173919
loss: 0.9700849056243896,grad_norm: 0.899894768387199, iteration: 173920
loss: 0.9701332449913025,grad_norm: 0.9061460514895188, iteration: 173921
loss: 1.0142529010772705,grad_norm: 0.9999993544463872, iteration: 173922
loss: 1.0173979997634888,grad_norm: 0.9999989474928981, iteration: 173923
loss: 0.9929419159889221,grad_norm: 0.9999989700127584, iteration: 173924
loss: 1.02045476436615,grad_norm: 0.9196631513156724, iteration: 173925
loss: 1.0218873023986816,grad_norm: 0.9293738785243159, iteration: 173926
loss: 0.9597035646438599,grad_norm: 0.8754052680597568, iteration: 173927
loss: 0.9668000936508179,grad_norm: 0.9455080125570982, iteration: 173928
loss: 1.072761058807373,grad_norm: 0.9999995430318032, iteration: 173929
loss: 0.9813722372055054,grad_norm: 0.9521732970751007, iteration: 173930
loss: 0.9642638564109802,grad_norm: 0.9726250887582892, iteration: 173931
loss: 0.9929386377334595,grad_norm: 0.8542795346253518, iteration: 173932
loss: 0.9465267062187195,grad_norm: 0.9999991314547181, iteration: 173933
loss: 1.006407618522644,grad_norm: 0.8833240880227397, iteration: 173934
loss: 1.0038714408874512,grad_norm: 0.9999991990667075, iteration: 173935
loss: 1.0378283262252808,grad_norm: 0.9793894399398073, iteration: 173936
loss: 0.9756994843482971,grad_norm: 0.9999990978581249, iteration: 173937
loss: 1.023121953010559,grad_norm: 0.9076486214158844, iteration: 173938
loss: 1.023629069328308,grad_norm: 0.9759584360806461, iteration: 173939
loss: 1.0104920864105225,grad_norm: 0.9183340090937913, iteration: 173940
loss: 1.0764975547790527,grad_norm: 0.9999995062542633, iteration: 173941
loss: 0.9930363297462463,grad_norm: 0.9934422317302384, iteration: 173942
loss: 1.0227892398834229,grad_norm: 0.8439874153093246, iteration: 173943
loss: 1.0429437160491943,grad_norm: 0.9999990434419097, iteration: 173944
loss: 1.0210353136062622,grad_norm: 0.9999992912708262, iteration: 173945
loss: 1.010724425315857,grad_norm: 0.9609104799950616, iteration: 173946
loss: 0.990684449672699,grad_norm: 0.9999999218205915, iteration: 173947
loss: 1.0445467233657837,grad_norm: 0.9999999222762856, iteration: 173948
loss: 1.012149691581726,grad_norm: 0.8623548122637043, iteration: 173949
loss: 1.0721019506454468,grad_norm: 0.9999991316945556, iteration: 173950
loss: 1.0226536989212036,grad_norm: 0.9999992705965622, iteration: 173951
loss: 1.0253607034683228,grad_norm: 0.9999991273644304, iteration: 173952
loss: 0.9970827698707581,grad_norm: 0.9338451871094045, iteration: 173953
loss: 1.0020614862442017,grad_norm: 0.9499470175681637, iteration: 173954
loss: 0.9767866730690002,grad_norm: 0.8461527277920959, iteration: 173955
loss: 1.0296365022659302,grad_norm: 0.8921070377903276, iteration: 173956
loss: 0.9963333606719971,grad_norm: 0.9999991380158637, iteration: 173957
loss: 0.9934356212615967,grad_norm: 0.9999990853027726, iteration: 173958
loss: 1.0139447450637817,grad_norm: 0.9999991901196716, iteration: 173959
loss: 0.9714787602424622,grad_norm: 0.8979137252672469, iteration: 173960
loss: 0.9961006045341492,grad_norm: 0.9999990462323383, iteration: 173961
loss: 1.0047261714935303,grad_norm: 0.9326215375328368, iteration: 173962
loss: 1.0192691087722778,grad_norm: 0.8481890083145953, iteration: 173963
loss: 1.0090103149414062,grad_norm: 0.9999990428401796, iteration: 173964
loss: 1.0204533338546753,grad_norm: 0.8933626745898848, iteration: 173965
loss: 1.007840633392334,grad_norm: 0.9541793150093666, iteration: 173966
loss: 1.0371487140655518,grad_norm: 0.9999993080797027, iteration: 173967
loss: 1.0210583209991455,grad_norm: 0.9999991509877864, iteration: 173968
loss: 0.9493417739868164,grad_norm: 0.9931735515123988, iteration: 173969
loss: 1.0085612535476685,grad_norm: 0.7911453794475423, iteration: 173970
loss: 1.010510802268982,grad_norm: 0.8423300437360285, iteration: 173971
loss: 1.0371075868606567,grad_norm: 1.0000000253593397, iteration: 173972
loss: 0.9911360144615173,grad_norm: 0.999999079477783, iteration: 173973
loss: 1.0083333253860474,grad_norm: 0.9999991285850252, iteration: 173974
loss: 1.0074399709701538,grad_norm: 0.9487823169780297, iteration: 173975
loss: 1.0388789176940918,grad_norm: 0.9999998175149418, iteration: 173976
loss: 0.9992121458053589,grad_norm: 0.9316512483940538, iteration: 173977
loss: 0.9787316918373108,grad_norm: 0.9999991456681492, iteration: 173978
loss: 0.9973150491714478,grad_norm: 0.9156036982784618, iteration: 173979
loss: 1.0140069723129272,grad_norm: 0.9999991666544364, iteration: 173980
loss: 0.9727134704589844,grad_norm: 0.8682412129582089, iteration: 173981
loss: 1.0174822807312012,grad_norm: 0.9999997802904727, iteration: 173982
loss: 0.9750995635986328,grad_norm: 0.9946238483638167, iteration: 173983
loss: 0.9825927019119263,grad_norm: 0.9665133913764349, iteration: 173984
loss: 0.9878799319267273,grad_norm: 0.9999996084605663, iteration: 173985
loss: 1.0219378471374512,grad_norm: 0.920075615102539, iteration: 173986
loss: 0.9994379878044128,grad_norm: 0.9999990371185169, iteration: 173987
loss: 0.9923532605171204,grad_norm: 0.9999992071656513, iteration: 173988
loss: 0.9872305989265442,grad_norm: 0.9999993938982171, iteration: 173989
loss: 1.010805368423462,grad_norm: 0.844697371146695, iteration: 173990
loss: 1.0049506425857544,grad_norm: 0.9872227944056366, iteration: 173991
loss: 0.9819444417953491,grad_norm: 0.9999992625432792, iteration: 173992
loss: 0.9898185133934021,grad_norm: 0.8105058232272568, iteration: 173993
loss: 0.9897255301475525,grad_norm: 0.9018751960242889, iteration: 173994
loss: 1.0128800868988037,grad_norm: 0.7422452621179458, iteration: 173995
loss: 1.0391558408737183,grad_norm: 0.9536426969321374, iteration: 173996
loss: 1.00217604637146,grad_norm: 0.9322437205949299, iteration: 173997
loss: 1.0063873529434204,grad_norm: 0.7858450066523944, iteration: 173998
loss: 1.0332508087158203,grad_norm: 0.9999991247895084, iteration: 173999
loss: 1.007645606994629,grad_norm: 0.8292261749765105, iteration: 174000
loss: 0.9938746094703674,grad_norm: 0.9999991767883168, iteration: 174001
loss: 0.9981526732444763,grad_norm: 0.9592078461919817, iteration: 174002
loss: 1.0190861225128174,grad_norm: 0.9325908011234695, iteration: 174003
loss: 0.9911239147186279,grad_norm: 0.9999991242139155, iteration: 174004
loss: 1.0120844841003418,grad_norm: 0.9150357654057973, iteration: 174005
loss: 0.9908329844474792,grad_norm: 0.9160028344752895, iteration: 174006
loss: 1.0417402982711792,grad_norm: 0.9702381720997935, iteration: 174007
loss: 1.0152350664138794,grad_norm: 0.9999990865651811, iteration: 174008
loss: 0.9985840320587158,grad_norm: 0.99999922872752, iteration: 174009
loss: 1.0062620639801025,grad_norm: 0.9999990193454057, iteration: 174010
loss: 1.0082296133041382,grad_norm: 0.9999990164802416, iteration: 174011
loss: 1.0022194385528564,grad_norm: 0.7834700722622429, iteration: 174012
loss: 1.028428077697754,grad_norm: 0.9999990663653664, iteration: 174013
loss: 0.9846474528312683,grad_norm: 0.8842837149806233, iteration: 174014
loss: 1.0426876544952393,grad_norm: 0.9912663143615931, iteration: 174015
loss: 1.027143955230713,grad_norm: 0.9999997481639675, iteration: 174016
loss: 1.025150179862976,grad_norm: 0.9317627987303843, iteration: 174017
loss: 0.9765099287033081,grad_norm: 0.9999990134323891, iteration: 174018
loss: 1.028093934059143,grad_norm: 0.8738465825931442, iteration: 174019
loss: 1.024861216545105,grad_norm: 0.9999992135927708, iteration: 174020
loss: 1.0407902002334595,grad_norm: 0.9849166697284517, iteration: 174021
loss: 1.0035299062728882,grad_norm: 0.8414058639459795, iteration: 174022
loss: 1.0031423568725586,grad_norm: 0.8367233316632878, iteration: 174023
loss: 0.9773883819580078,grad_norm: 0.8517090750997203, iteration: 174024
loss: 1.0127127170562744,grad_norm: 0.9999990253127254, iteration: 174025
loss: 1.0368642807006836,grad_norm: 0.9999994421057624, iteration: 174026
loss: 0.9969136714935303,grad_norm: 0.999999037952781, iteration: 174027
loss: 0.9942583441734314,grad_norm: 0.8384025003739763, iteration: 174028
loss: 0.9891583919525146,grad_norm: 0.996282592267377, iteration: 174029
loss: 0.9920490384101868,grad_norm: 0.9999991950758274, iteration: 174030
loss: 0.9956175088882446,grad_norm: 0.9999991801516683, iteration: 174031
loss: 1.002450704574585,grad_norm: 0.9999990436142095, iteration: 174032
loss: 1.0030204057693481,grad_norm: 0.9999990827171086, iteration: 174033
loss: 1.0249297618865967,grad_norm: 0.9999990947436233, iteration: 174034
loss: 1.0100409984588623,grad_norm: 0.9683972494426674, iteration: 174035
loss: 1.0791230201721191,grad_norm: 0.9999991650050795, iteration: 174036
loss: 1.0381956100463867,grad_norm: 0.9999999118813457, iteration: 174037
loss: 0.9557949900627136,grad_norm: 0.8915145072551761, iteration: 174038
loss: 0.9638765454292297,grad_norm: 0.9999990362420057, iteration: 174039
loss: 1.024581789970398,grad_norm: 0.8650090145171045, iteration: 174040
loss: 0.9825420379638672,grad_norm: 0.8912179736975349, iteration: 174041
loss: 0.999903678894043,grad_norm: 0.9999989783399365, iteration: 174042
loss: 0.9919720888137817,grad_norm: 0.9999989453274785, iteration: 174043
loss: 0.9806848168373108,grad_norm: 0.9913351900389156, iteration: 174044
loss: 1.0071301460266113,grad_norm: 0.9999990940914186, iteration: 174045
loss: 0.9755810499191284,grad_norm: 0.9999990350645018, iteration: 174046
loss: 1.0251524448394775,grad_norm: 0.9999991723849747, iteration: 174047
loss: 0.979552686214447,grad_norm: 0.9999991945112605, iteration: 174048
loss: 1.02695894241333,grad_norm: 0.9999996320897451, iteration: 174049
loss: 0.9615283608436584,grad_norm: 0.9999990333337985, iteration: 174050
loss: 0.968004584312439,grad_norm: 0.9999991481455341, iteration: 174051
loss: 1.0391700267791748,grad_norm: 0.9999990146166755, iteration: 174052
loss: 0.9945436120033264,grad_norm: 0.9999992451314538, iteration: 174053
loss: 0.9870125651359558,grad_norm: 0.9882450977288959, iteration: 174054
loss: 1.0020281076431274,grad_norm: 0.9999991148669695, iteration: 174055
loss: 0.985470175743103,grad_norm: 0.9693098743731176, iteration: 174056
loss: 0.9928281903266907,grad_norm: 0.9999989771393102, iteration: 174057
loss: 0.9839031100273132,grad_norm: 0.832859754017199, iteration: 174058
loss: 0.9878290891647339,grad_norm: 0.9999992091184415, iteration: 174059
loss: 0.9851582646369934,grad_norm: 0.999999155216791, iteration: 174060
loss: 1.0042359828948975,grad_norm: 0.9999991723685265, iteration: 174061
loss: 0.9493069648742676,grad_norm: 0.9999990842786873, iteration: 174062
loss: 1.0317203998565674,grad_norm: 0.9999997970093848, iteration: 174063
loss: 0.9866160750389099,grad_norm: 0.9999990866566149, iteration: 174064
loss: 1.0349828004837036,grad_norm: 0.9711791454703554, iteration: 174065
loss: 1.0052231550216675,grad_norm: 0.9628741628256619, iteration: 174066
loss: 0.9632605910301208,grad_norm: 0.8342941378044629, iteration: 174067
loss: 1.0184193849563599,grad_norm: 0.9707591844838164, iteration: 174068
loss: 1.0370814800262451,grad_norm: 0.9999989884088667, iteration: 174069
loss: 1.0192092657089233,grad_norm: 0.9999991774404038, iteration: 174070
loss: 0.9902804493904114,grad_norm: 0.8642462599423961, iteration: 174071
loss: 1.015973687171936,grad_norm: 0.9997898513388951, iteration: 174072
loss: 1.0101932287216187,grad_norm: 0.9999990331469331, iteration: 174073
loss: 0.9982907176017761,grad_norm: 0.999999166599871, iteration: 174074
loss: 0.9553436636924744,grad_norm: 0.9627818472653823, iteration: 174075
loss: 0.9658207893371582,grad_norm: 0.9467503602991619, iteration: 174076
loss: 0.9868871569633484,grad_norm: 0.9999990637960428, iteration: 174077
loss: 1.002424716949463,grad_norm: 0.9467049662939779, iteration: 174078
loss: 1.036161184310913,grad_norm: 0.9999993071888174, iteration: 174079
loss: 1.0084866285324097,grad_norm: 0.999999012944277, iteration: 174080
loss: 0.973961591720581,grad_norm: 0.9373900396184958, iteration: 174081
loss: 0.9894246459007263,grad_norm: 0.9999991811946038, iteration: 174082
loss: 1.021874189376831,grad_norm: 0.9316280231976608, iteration: 174083
loss: 0.9974292516708374,grad_norm: 0.9999991725691346, iteration: 174084
loss: 0.9916727542877197,grad_norm: 0.9911535445573599, iteration: 174085
loss: 1.0049703121185303,grad_norm: 0.9910741626265043, iteration: 174086
loss: 1.0075268745422363,grad_norm: 0.9999992932724797, iteration: 174087
loss: 0.9953655004501343,grad_norm: 0.9999992498700324, iteration: 174088
loss: 0.9772945642471313,grad_norm: 0.9999998967722626, iteration: 174089
loss: 1.0120422840118408,grad_norm: 0.9081360666908063, iteration: 174090
loss: 0.9839256405830383,grad_norm: 0.9362859061748677, iteration: 174091
loss: 1.0066492557525635,grad_norm: 0.9120599157743797, iteration: 174092
loss: 1.0121296644210815,grad_norm: 0.9894631941545813, iteration: 174093
loss: 0.9981328845024109,grad_norm: 0.9735962021782167, iteration: 174094
loss: 1.053105354309082,grad_norm: 0.9999995966754751, iteration: 174095
loss: 1.011073350906372,grad_norm: 0.9349403037928747, iteration: 174096
loss: 0.9843654036521912,grad_norm: 0.9266068571200737, iteration: 174097
loss: 1.0234884023666382,grad_norm: 0.9049076064330612, iteration: 174098
loss: 0.9871197938919067,grad_norm: 0.9999989942228528, iteration: 174099
loss: 1.0210938453674316,grad_norm: 0.884557801015132, iteration: 174100
loss: 0.9827803373336792,grad_norm: 0.9999991225549222, iteration: 174101
loss: 0.9884284734725952,grad_norm: 0.9744409799058084, iteration: 174102
loss: 0.9852255582809448,grad_norm: 0.9999991293661877, iteration: 174103
loss: 1.0929757356643677,grad_norm: 0.9999994815439813, iteration: 174104
loss: 1.0378203392028809,grad_norm: 0.832569475038577, iteration: 174105
loss: 1.0097146034240723,grad_norm: 0.9356157824084609, iteration: 174106
loss: 1.0161010026931763,grad_norm: 0.9650009174971825, iteration: 174107
loss: 1.014912486076355,grad_norm: 0.8879208785996898, iteration: 174108
loss: 0.9691898226737976,grad_norm: 0.9999990008368751, iteration: 174109
loss: 1.0526700019836426,grad_norm: 0.9720865051790765, iteration: 174110
loss: 1.0002843141555786,grad_norm: 0.9999992347814564, iteration: 174111
loss: 1.0147656202316284,grad_norm: 0.9999990878496955, iteration: 174112
loss: 1.0114929676055908,grad_norm: 0.9999990500514143, iteration: 174113
loss: 1.0215034484863281,grad_norm: 0.9739152657706864, iteration: 174114
loss: 1.008332371711731,grad_norm: 0.9999990308894163, iteration: 174115
loss: 1.0286390781402588,grad_norm: 0.999999251543158, iteration: 174116
loss: 1.0079309940338135,grad_norm: 0.9374234926115516, iteration: 174117
loss: 0.9506893754005432,grad_norm: 0.9999990872821073, iteration: 174118
loss: 0.9855061173439026,grad_norm: 0.8639833629644752, iteration: 174119
loss: 1.025439977645874,grad_norm: 0.9999990481223713, iteration: 174120
loss: 0.9518083930015564,grad_norm: 0.9999993830997282, iteration: 174121
loss: 0.978381872177124,grad_norm: 0.9999991196818858, iteration: 174122
loss: 1.0459332466125488,grad_norm: 0.9999991578803276, iteration: 174123
loss: 0.994741678237915,grad_norm: 0.9999990992693119, iteration: 174124
loss: 0.9879090189933777,grad_norm: 0.8446732078949295, iteration: 174125
loss: 0.9991639256477356,grad_norm: 0.9999997152945748, iteration: 174126
loss: 0.9981922507286072,grad_norm: 0.932332561164617, iteration: 174127
loss: 0.9873315691947937,grad_norm: 0.7833748998133702, iteration: 174128
loss: 1.0011131763458252,grad_norm: 0.9999991731597085, iteration: 174129
loss: 0.9860165119171143,grad_norm: 0.9911367595268011, iteration: 174130
loss: 0.9970556497573853,grad_norm: 0.9144128984518705, iteration: 174131
loss: 1.0213285684585571,grad_norm: 0.9769747300449491, iteration: 174132
loss: 1.0113592147827148,grad_norm: 0.9503816639788344, iteration: 174133
loss: 0.9973292350769043,grad_norm: 0.9999991053338814, iteration: 174134
loss: 1.0030146837234497,grad_norm: 0.8951457372275489, iteration: 174135
loss: 1.0029149055480957,grad_norm: 0.947866873323159, iteration: 174136
loss: 1.013588547706604,grad_norm: 0.9718655263037648, iteration: 174137
loss: 1.0270678997039795,grad_norm: 0.9967956144519915, iteration: 174138
loss: 0.9935062527656555,grad_norm: 0.9999991226406679, iteration: 174139
loss: 1.0065635442733765,grad_norm: 0.9546982433267062, iteration: 174140
loss: 0.9944690465927124,grad_norm: 0.9999989676516321, iteration: 174141
loss: 1.0137171745300293,grad_norm: 0.9999990573890629, iteration: 174142
loss: 1.0065901279449463,grad_norm: 0.9999990162584107, iteration: 174143
loss: 0.9689511060714722,grad_norm: 0.8869586997954129, iteration: 174144
loss: 0.962083637714386,grad_norm: 0.999999097354044, iteration: 174145
loss: 0.9929032325744629,grad_norm: 0.8866521318022031, iteration: 174146
loss: 1.0010426044464111,grad_norm: 0.9999992593874335, iteration: 174147
loss: 0.9932248592376709,grad_norm: 0.999999187720635, iteration: 174148
loss: 1.0140169858932495,grad_norm: 0.8302521328315056, iteration: 174149
loss: 1.070366621017456,grad_norm: 0.999999512269266, iteration: 174150
loss: 1.0403919219970703,grad_norm: 0.9999997949007994, iteration: 174151
loss: 0.9803869724273682,grad_norm: 0.9999992405816953, iteration: 174152
loss: 0.9989028573036194,grad_norm: 0.9581859948837314, iteration: 174153
loss: 0.9978064298629761,grad_norm: 0.9836499755050885, iteration: 174154
loss: 1.0012216567993164,grad_norm: 0.9999992084270463, iteration: 174155
loss: 0.9917600154876709,grad_norm: 0.9087276454698918, iteration: 174156
loss: 0.9980247616767883,grad_norm: 0.9287012929104242, iteration: 174157
loss: 1.0113964080810547,grad_norm: 0.9999991294041158, iteration: 174158
loss: 0.978710412979126,grad_norm: 0.9999991073311232, iteration: 174159
loss: 1.0279514789581299,grad_norm: 0.9999992579409315, iteration: 174160
loss: 0.9634487628936768,grad_norm: 0.9250862160545439, iteration: 174161
loss: 0.9674409627914429,grad_norm: 0.9999994638735656, iteration: 174162
loss: 0.9841100573539734,grad_norm: 0.8910837535811371, iteration: 174163
loss: 1.0068747997283936,grad_norm: 0.9999990723265252, iteration: 174164
loss: 0.9688263535499573,grad_norm: 0.8835942262276244, iteration: 174165
loss: 1.009831190109253,grad_norm: 0.9999991684937134, iteration: 174166
loss: 0.9934198260307312,grad_norm: 0.9999991613700753, iteration: 174167
loss: 1.077784776687622,grad_norm: 0.9999994864056, iteration: 174168
loss: 0.9804702401161194,grad_norm: 0.9677227963813287, iteration: 174169
loss: 0.989030659198761,grad_norm: 0.9999992328326164, iteration: 174170
loss: 1.0266759395599365,grad_norm: 0.986684641371289, iteration: 174171
loss: 1.0094233751296997,grad_norm: 0.9999991821962575, iteration: 174172
loss: 1.0285557508468628,grad_norm: 0.9999991301578164, iteration: 174173
loss: 0.9915391802787781,grad_norm: 0.9999991225189261, iteration: 174174
loss: 0.9929876327514648,grad_norm: 0.9986766166095267, iteration: 174175
loss: 0.9409763216972351,grad_norm: 0.9999991197494228, iteration: 174176
loss: 0.9937455058097839,grad_norm: 0.9999989436813367, iteration: 174177
loss: 0.9808923602104187,grad_norm: 0.999999151082244, iteration: 174178
loss: 1.0074881315231323,grad_norm: 0.9999991110701153, iteration: 174179
loss: 1.0528157949447632,grad_norm: 0.9999995234828714, iteration: 174180
loss: 1.0257591009140015,grad_norm: 0.9999992250703776, iteration: 174181
loss: 1.017262578010559,grad_norm: 0.9652409002585057, iteration: 174182
loss: 1.0120208263397217,grad_norm: 0.9999991175917141, iteration: 174183
loss: 0.9769706130027771,grad_norm: 0.9999990785624787, iteration: 174184
loss: 1.1062740087509155,grad_norm: 0.9999995635347465, iteration: 174185
loss: 1.0587247610092163,grad_norm: 0.9999991700473715, iteration: 174186
loss: 0.9592617154121399,grad_norm: 0.9026940135556348, iteration: 174187
loss: 0.9958614110946655,grad_norm: 0.9999990698030252, iteration: 174188
loss: 0.9867393970489502,grad_norm: 0.9646771491938657, iteration: 174189
loss: 1.0446563959121704,grad_norm: 0.8285978295138406, iteration: 174190
loss: 1.0212509632110596,grad_norm: 0.9999993622215979, iteration: 174191
loss: 0.9838500022888184,grad_norm: 0.8967169778692928, iteration: 174192
loss: 0.9680209755897522,grad_norm: 0.9782259275600383, iteration: 174193
loss: 0.9634826183319092,grad_norm: 0.9999991501327464, iteration: 174194
loss: 0.93809974193573,grad_norm: 0.9008616416396569, iteration: 174195
loss: 1.0222599506378174,grad_norm: 0.9242710848656899, iteration: 174196
loss: 1.007663369178772,grad_norm: 0.8293259878175616, iteration: 174197
loss: 1.001900315284729,grad_norm: 0.999999228269345, iteration: 174198
loss: 0.9760497212409973,grad_norm: 0.9267471353289776, iteration: 174199
loss: 0.9999538064002991,grad_norm: 0.9999991325934314, iteration: 174200
loss: 0.9857825636863708,grad_norm: 0.9999992145576151, iteration: 174201
loss: 1.0056222677230835,grad_norm: 0.9999993091764362, iteration: 174202
loss: 0.9365257620811462,grad_norm: 0.966324247880385, iteration: 174203
loss: 0.9921096563339233,grad_norm: 0.9632699704851849, iteration: 174204
loss: 1.0321425199508667,grad_norm: 0.9307085800982491, iteration: 174205
loss: 1.0183031558990479,grad_norm: 0.99999911266285, iteration: 174206
loss: 0.9800834655761719,grad_norm: 0.9999990017840681, iteration: 174207
loss: 1.00912606716156,grad_norm: 0.9512652778831165, iteration: 174208
loss: 0.9726926684379578,grad_norm: 0.9461453035902144, iteration: 174209
loss: 1.1308889389038086,grad_norm: 0.9999994863706658, iteration: 174210
loss: 1.0054187774658203,grad_norm: 0.971717872516925, iteration: 174211
loss: 0.9891243577003479,grad_norm: 0.9999993597038204, iteration: 174212
loss: 1.0031458139419556,grad_norm: 0.9999992718199211, iteration: 174213
loss: 1.015523910522461,grad_norm: 0.9194177353553096, iteration: 174214
loss: 0.9938794374465942,grad_norm: 0.9201360623676729, iteration: 174215
loss: 1.0303890705108643,grad_norm: 0.8799488280173118, iteration: 174216
loss: 0.9862358570098877,grad_norm: 0.9832233825222858, iteration: 174217
loss: 0.982566773891449,grad_norm: 0.9734988815195252, iteration: 174218
loss: 0.9962586164474487,grad_norm: 0.9999991182897022, iteration: 174219
loss: 1.041972279548645,grad_norm: 0.9999999328786905, iteration: 174220
loss: 1.0284687280654907,grad_norm: 0.9749549328708235, iteration: 174221
loss: 0.9954419732093811,grad_norm: 0.9759054195141644, iteration: 174222
loss: 0.9934220910072327,grad_norm: 0.8608509679733495, iteration: 174223
loss: 1.0459398031234741,grad_norm: 0.9886173681357279, iteration: 174224
loss: 0.9642158150672913,grad_norm: 0.9999990996951675, iteration: 174225
loss: 1.0055277347564697,grad_norm: 0.9330613586782437, iteration: 174226
loss: 1.004603624343872,grad_norm: 0.9999990204924913, iteration: 174227
loss: 1.0067938566207886,grad_norm: 0.9999991392763566, iteration: 174228
loss: 0.9954769611358643,grad_norm: 0.9999992237692581, iteration: 174229
loss: 1.000119686126709,grad_norm: 0.9553985349894835, iteration: 174230
loss: 1.0189286470413208,grad_norm: 0.999999160750883, iteration: 174231
loss: 1.0046439170837402,grad_norm: 0.9756978886174345, iteration: 174232
loss: 1.0724530220031738,grad_norm: 0.9999996574501954, iteration: 174233
loss: 0.9911156296730042,grad_norm: 0.9868764102811605, iteration: 174234
loss: 1.0000081062316895,grad_norm: 0.9999991547317988, iteration: 174235
loss: 0.9844682812690735,grad_norm: 0.9999991529487853, iteration: 174236
loss: 0.9709396958351135,grad_norm: 0.8264729577183102, iteration: 174237
loss: 1.0085234642028809,grad_norm: 0.955847432307612, iteration: 174238
loss: 0.9824135899543762,grad_norm: 0.9266542583956898, iteration: 174239
loss: 0.9620458483695984,grad_norm: 0.9999991522441415, iteration: 174240
loss: 1.032920002937317,grad_norm: 0.9999990169475427, iteration: 174241
loss: 0.9994503259658813,grad_norm: 0.9999989813965557, iteration: 174242
loss: 0.9730570316314697,grad_norm: 0.942957402622863, iteration: 174243
loss: 0.986176073551178,grad_norm: 0.9783553016829158, iteration: 174244
loss: 0.9889759421348572,grad_norm: 0.9999989568024553, iteration: 174245
loss: 0.9966416954994202,grad_norm: 0.9798298003176529, iteration: 174246
loss: 0.9865995049476624,grad_norm: 0.9999990634115964, iteration: 174247
loss: 1.033850073814392,grad_norm: 0.9999992370964643, iteration: 174248
loss: 0.966992199420929,grad_norm: 0.9999990439606872, iteration: 174249
loss: 1.0189154148101807,grad_norm: 0.8808389750431004, iteration: 174250
loss: 1.04788076877594,grad_norm: 0.932982415880564, iteration: 174251
loss: 0.9525850415229797,grad_norm: 0.9186375764535218, iteration: 174252
loss: 0.98697429895401,grad_norm: 0.9999991468558601, iteration: 174253
loss: 0.9895373582839966,grad_norm: 0.9999990920825162, iteration: 174254
loss: 0.9938346743583679,grad_norm: 0.9999989388577029, iteration: 174255
loss: 1.0022900104522705,grad_norm: 0.8854814823248092, iteration: 174256
loss: 0.9947302937507629,grad_norm: 0.9642819920654325, iteration: 174257
loss: 0.9986563920974731,grad_norm: 0.9999990895890382, iteration: 174258
loss: 1.0301361083984375,grad_norm: 0.9999990984930246, iteration: 174259
loss: 1.001370906829834,grad_norm: 0.9999992637393234, iteration: 174260
loss: 1.0016136169433594,grad_norm: 0.9999990298086585, iteration: 174261
loss: 0.9926633834838867,grad_norm: 0.9999996968069093, iteration: 174262
loss: 0.9981362819671631,grad_norm: 0.8965456794188441, iteration: 174263
loss: 0.9999064207077026,grad_norm: 0.9552992290607762, iteration: 174264
loss: 0.9934791922569275,grad_norm: 0.8839100888388898, iteration: 174265
loss: 0.9949542880058289,grad_norm: 0.9953615885996868, iteration: 174266
loss: 1.0230575799942017,grad_norm: 0.9999996388674995, iteration: 174267
loss: 0.9845510125160217,grad_norm: 0.9999990985250485, iteration: 174268
loss: 1.028128743171692,grad_norm: 0.999999172409262, iteration: 174269
loss: 1.0188502073287964,grad_norm: 0.9999995034462614, iteration: 174270
loss: 1.029659390449524,grad_norm: 0.9999992272760706, iteration: 174271
loss: 1.032586932182312,grad_norm: 0.9999991882218593, iteration: 174272
loss: 0.9726582765579224,grad_norm: 0.8855391340537846, iteration: 174273
loss: 1.0150489807128906,grad_norm: 0.9999990794905232, iteration: 174274
loss: 1.003784418106079,grad_norm: 0.9999990437405185, iteration: 174275
loss: 1.0085335969924927,grad_norm: 0.9999991731178867, iteration: 174276
loss: 0.9968760013580322,grad_norm: 0.9224965712040363, iteration: 174277
loss: 1.0038527250289917,grad_norm: 0.9463778461798172, iteration: 174278
loss: 1.003132939338684,grad_norm: 0.9999992994903153, iteration: 174279
loss: 1.0212359428405762,grad_norm: 0.9999993244774674, iteration: 174280
loss: 1.0366266965866089,grad_norm: 0.7979549294986936, iteration: 174281
loss: 1.0102734565734863,grad_norm: 0.9540067419094655, iteration: 174282
loss: 0.9833254814147949,grad_norm: 0.9340522449060467, iteration: 174283
loss: 0.9927504062652588,grad_norm: 0.9999992165535374, iteration: 174284
loss: 1.0390229225158691,grad_norm: 0.9511965426470202, iteration: 174285
loss: 0.9642467498779297,grad_norm: 0.8256261803967428, iteration: 174286
loss: 0.9806490540504456,grad_norm: 0.9604370863919427, iteration: 174287
loss: 0.9967484474182129,grad_norm: 0.9999997974764899, iteration: 174288
loss: 1.086482048034668,grad_norm: 0.9999995344772306, iteration: 174289
loss: 0.9991323351860046,grad_norm: 0.9203982660393488, iteration: 174290
loss: 0.9961605072021484,grad_norm: 0.8393950829269154, iteration: 174291
loss: 0.9835328459739685,grad_norm: 0.9536591769956402, iteration: 174292
loss: 0.9851704239845276,grad_norm: 0.8924280685693448, iteration: 174293
loss: 1.009891390800476,grad_norm: 0.9412019859593984, iteration: 174294
loss: 0.9679853320121765,grad_norm: 0.9999991509293154, iteration: 174295
loss: 0.9878453612327576,grad_norm: 0.9999989260041838, iteration: 174296
loss: 1.0325965881347656,grad_norm: 0.9999991803852323, iteration: 174297
loss: 0.9921661615371704,grad_norm: 0.9999992039411136, iteration: 174298
loss: 0.9992054104804993,grad_norm: 0.9999992605486533, iteration: 174299
loss: 0.970924437046051,grad_norm: 0.9999990603591247, iteration: 174300
loss: 0.9899477362632751,grad_norm: 0.9999992420487123, iteration: 174301
loss: 1.0112278461456299,grad_norm: 0.9999992179741939, iteration: 174302
loss: 1.0670078992843628,grad_norm: 0.9999991116737103, iteration: 174303
loss: 0.9994965195655823,grad_norm: 0.9999991524726829, iteration: 174304
loss: 1.0004993677139282,grad_norm: 0.9999992590567907, iteration: 174305
loss: 1.116858959197998,grad_norm: 0.9999992455496943, iteration: 174306
loss: 1.0355029106140137,grad_norm: 0.9999990283055118, iteration: 174307
loss: 0.9773434400558472,grad_norm: 0.8933523559036273, iteration: 174308
loss: 0.995434045791626,grad_norm: 0.999999151827458, iteration: 174309
loss: 1.008901596069336,grad_norm: 0.9199699772921719, iteration: 174310
loss: 1.0213689804077148,grad_norm: 0.932302964658633, iteration: 174311
loss: 0.9936251044273376,grad_norm: 0.9186004412911724, iteration: 174312
loss: 0.9824659824371338,grad_norm: 0.9201834495629343, iteration: 174313
loss: 0.9989339113235474,grad_norm: 0.999999048580471, iteration: 174314
loss: 0.994977593421936,grad_norm: 0.9999991107615203, iteration: 174315
loss: 0.9991629123687744,grad_norm: 0.9936466525540928, iteration: 174316
loss: 0.9880723357200623,grad_norm: 0.9999995346389124, iteration: 174317
loss: 1.037322998046875,grad_norm: 0.9999991452885377, iteration: 174318
loss: 1.0027512311935425,grad_norm: 0.9999992047968533, iteration: 174319
loss: 1.0165764093399048,grad_norm: 0.9999990509121522, iteration: 174320
loss: 0.9912119507789612,grad_norm: 0.9610461434747033, iteration: 174321
loss: 1.0019618272781372,grad_norm: 0.9999993983833876, iteration: 174322
loss: 0.9943158626556396,grad_norm: 0.907949943538309, iteration: 174323
loss: 1.0141963958740234,grad_norm: 0.9368458777257264, iteration: 174324
loss: 1.0076498985290527,grad_norm: 0.9999990896949471, iteration: 174325
loss: 1.0222654342651367,grad_norm: 0.9999992938747622, iteration: 174326
loss: 0.9665572643280029,grad_norm: 0.9999989669934943, iteration: 174327
loss: 0.9983609318733215,grad_norm: 0.9999991273198776, iteration: 174328
loss: 1.0031914710998535,grad_norm: 0.9512173762776333, iteration: 174329
loss: 0.9975516200065613,grad_norm: 0.999999076332522, iteration: 174330
loss: 0.9706287980079651,grad_norm: 0.9518454863972468, iteration: 174331
loss: 1.03449547290802,grad_norm: 0.9999992980493171, iteration: 174332
loss: 1.0389928817749023,grad_norm: 0.9999991436017653, iteration: 174333
loss: 0.9924432039260864,grad_norm: 0.999999151236616, iteration: 174334
loss: 0.9635270237922668,grad_norm: 0.9802145446221389, iteration: 174335
loss: 0.9964226484298706,grad_norm: 0.9999992212995447, iteration: 174336
loss: 0.9809878468513489,grad_norm: 0.9269583881249677, iteration: 174337
loss: 0.9995496869087219,grad_norm: 0.8558346027732235, iteration: 174338
loss: 0.955253005027771,grad_norm: 0.9423733127454181, iteration: 174339
loss: 1.0189802646636963,grad_norm: 0.9768885175115772, iteration: 174340
loss: 0.9976521134376526,grad_norm: 0.9999992587363769, iteration: 174341
loss: 0.9839335680007935,grad_norm: 0.9999989918338267, iteration: 174342
loss: 0.975565493106842,grad_norm: 0.9999989817936259, iteration: 174343
loss: 1.027596354484558,grad_norm: 0.9552175018061989, iteration: 174344
loss: 0.9512792825698853,grad_norm: 0.9492811563936325, iteration: 174345
loss: 1.0013197660446167,grad_norm: 0.9901172924237255, iteration: 174346
loss: 0.9668135046958923,grad_norm: 0.9913650403607687, iteration: 174347
loss: 1.0029151439666748,grad_norm: 0.8153829830783038, iteration: 174348
loss: 1.0393730401992798,grad_norm: 0.8860382694740557, iteration: 174349
loss: 0.9967541694641113,grad_norm: 0.999999011306877, iteration: 174350
loss: 1.0088766813278198,grad_norm: 0.8071575179244718, iteration: 174351
loss: 1.0136802196502686,grad_norm: 0.8527524940834511, iteration: 174352
loss: 0.9951080083847046,grad_norm: 0.9999991214946529, iteration: 174353
loss: 0.9699656963348389,grad_norm: 0.896511453899011, iteration: 174354
loss: 1.0258781909942627,grad_norm: 0.8521522324172499, iteration: 174355
loss: 0.9989644289016724,grad_norm: 0.9999990234478652, iteration: 174356
loss: 0.9935328960418701,grad_norm: 0.9657015958250572, iteration: 174357
loss: 0.9865102171897888,grad_norm: 0.9999991858558616, iteration: 174358
loss: 1.0350831747055054,grad_norm: 0.9999991185744778, iteration: 174359
loss: 1.0086888074874878,grad_norm: 0.9930906699942723, iteration: 174360
loss: 0.9454516768455505,grad_norm: 0.9999992548851847, iteration: 174361
loss: 0.9650522470474243,grad_norm: 0.889823855342162, iteration: 174362
loss: 0.9573957920074463,grad_norm: 0.9999990607402401, iteration: 174363
loss: 1.002488613128662,grad_norm: 0.9334407899348434, iteration: 174364
loss: 0.9980416297912598,grad_norm: 0.9127398996882762, iteration: 174365
loss: 1.009106159210205,grad_norm: 0.9999991020585645, iteration: 174366
loss: 1.0025198459625244,grad_norm: 0.9316549136099275, iteration: 174367
loss: 0.9880201816558838,grad_norm: 0.9704370732850346, iteration: 174368
loss: 0.988649308681488,grad_norm: 0.9999991372385533, iteration: 174369
loss: 1.0047829151153564,grad_norm: 0.999999145850651, iteration: 174370
loss: 1.0272752046585083,grad_norm: 0.9999992147233188, iteration: 174371
loss: 1.0312379598617554,grad_norm: 0.999999390442832, iteration: 174372
loss: 1.0023086071014404,grad_norm: 0.8644328673125994, iteration: 174373
loss: 1.0105152130126953,grad_norm: 0.8921642334182778, iteration: 174374
loss: 0.9418469667434692,grad_norm: 0.9999991819113838, iteration: 174375
loss: 1.0025063753128052,grad_norm: 0.9456683017571009, iteration: 174376
loss: 0.977462112903595,grad_norm: 0.9999992202009312, iteration: 174377
loss: 0.9854027032852173,grad_norm: 0.972400256380974, iteration: 174378
loss: 0.9665454030036926,grad_norm: 0.8974201116326098, iteration: 174379
loss: 1.0093134641647339,grad_norm: 0.8979814751354018, iteration: 174380
loss: 1.069178819656372,grad_norm: 0.825247653114372, iteration: 174381
loss: 0.9796985983848572,grad_norm: 0.9897026520038895, iteration: 174382
loss: 0.9697374701499939,grad_norm: 0.9999992114446825, iteration: 174383
loss: 1.0890600681304932,grad_norm: 0.9999998607576402, iteration: 174384
loss: 1.029516339302063,grad_norm: 0.9999990447533378, iteration: 174385
loss: 1.0021741390228271,grad_norm: 0.8393505077896841, iteration: 174386
loss: 0.995345950126648,grad_norm: 0.9833620828451001, iteration: 174387
loss: 0.978786289691925,grad_norm: 0.9490237307045627, iteration: 174388
loss: 1.0036181211471558,grad_norm: 0.9239000175015352, iteration: 174389
loss: 0.9991215467453003,grad_norm: 0.9999990046818346, iteration: 174390
loss: 1.0084154605865479,grad_norm: 0.9999990091998432, iteration: 174391
loss: 0.963639497756958,grad_norm: 0.9999990753384758, iteration: 174392
loss: 1.014399766921997,grad_norm: 0.9999995488431462, iteration: 174393
loss: 0.9972444176673889,grad_norm: 0.999998977961317, iteration: 174394
loss: 1.0306096076965332,grad_norm: 0.8390729113947046, iteration: 174395
loss: 0.9800905585289001,grad_norm: 0.9999989860350401, iteration: 174396
loss: 1.0207817554473877,grad_norm: 0.9999991864371046, iteration: 174397
loss: 0.9818311333656311,grad_norm: 0.8928717593720763, iteration: 174398
loss: 0.9852374792098999,grad_norm: 0.9634843461812501, iteration: 174399
loss: 1.0102521181106567,grad_norm: 0.999999125880383, iteration: 174400
loss: 0.9715599417686462,grad_norm: 0.9999992874811829, iteration: 174401
loss: 0.9843392372131348,grad_norm: 0.9153036161522252, iteration: 174402
loss: 1.0079275369644165,grad_norm: 0.8107872508987961, iteration: 174403
loss: 1.008447527885437,grad_norm: 0.9931653687139271, iteration: 174404
loss: 1.010604977607727,grad_norm: 0.9265761302936092, iteration: 174405
loss: 1.0249704122543335,grad_norm: 0.999999088986617, iteration: 174406
loss: 1.0116777420043945,grad_norm: 0.9205868375822454, iteration: 174407
loss: 0.984767735004425,grad_norm: 0.999999098721905, iteration: 174408
loss: 1.0231893062591553,grad_norm: 0.9744087243864639, iteration: 174409
loss: 1.0000441074371338,grad_norm: 0.9960694488233005, iteration: 174410
loss: 0.975673258304596,grad_norm: 0.9999996272853084, iteration: 174411
loss: 0.9987857937812805,grad_norm: 0.8743285964759965, iteration: 174412
loss: 1.0156182050704956,grad_norm: 0.9624427676293474, iteration: 174413
loss: 0.990414023399353,grad_norm: 0.892344801868394, iteration: 174414
loss: 0.9841576218605042,grad_norm: 0.788576603045215, iteration: 174415
loss: 0.9801947474479675,grad_norm: 0.9999991693530126, iteration: 174416
loss: 0.9802406430244446,grad_norm: 0.9999993100255565, iteration: 174417
loss: 0.9795480966567993,grad_norm: 0.999999147479295, iteration: 174418
loss: 1.0153559446334839,grad_norm: 0.8762527834332011, iteration: 174419
loss: 1.01142156124115,grad_norm: 0.904610106964425, iteration: 174420
loss: 0.9932212233543396,grad_norm: 0.9999989898769105, iteration: 174421
loss: 1.0016549825668335,grad_norm: 0.9999990670530018, iteration: 174422
loss: 1.0064603090286255,grad_norm: 0.9239512464095317, iteration: 174423
loss: 0.9923378825187683,grad_norm: 0.9999991355393388, iteration: 174424
loss: 0.9824634194374084,grad_norm: 0.9999989446431791, iteration: 174425
loss: 0.990384578704834,grad_norm: 0.9999989543853743, iteration: 174426
loss: 0.9846420884132385,grad_norm: 0.9999992456542339, iteration: 174427
loss: 1.022331714630127,grad_norm: 0.9999993347920549, iteration: 174428
loss: 0.9894269108772278,grad_norm: 0.8250225780206005, iteration: 174429
loss: 0.9914011359214783,grad_norm: 0.9999991665231998, iteration: 174430
loss: 0.9732974767684937,grad_norm: 0.9710541676524739, iteration: 174431
loss: 1.0230631828308105,grad_norm: 0.9573545419643068, iteration: 174432
loss: 0.9844135642051697,grad_norm: 0.7677450608096682, iteration: 174433
loss: 1.0182857513427734,grad_norm: 0.9786702014003913, iteration: 174434
loss: 0.9900251030921936,grad_norm: 0.9999990406320871, iteration: 174435
loss: 0.9928741455078125,grad_norm: 0.9999990816138878, iteration: 174436
loss: 0.9769681096076965,grad_norm: 0.9999992537723524, iteration: 174437
loss: 0.9817265868186951,grad_norm: 0.9999991379407844, iteration: 174438
loss: 0.9966433644294739,grad_norm: 0.9999992082062056, iteration: 174439
loss: 1.044995903968811,grad_norm: 0.999999359762174, iteration: 174440
loss: 0.9547715187072754,grad_norm: 0.9999992696975942, iteration: 174441
loss: 1.0283311605453491,grad_norm: 0.8845490779623515, iteration: 174442
loss: 0.9772056341171265,grad_norm: 0.9999991404151376, iteration: 174443
loss: 1.01042902469635,grad_norm: 0.9472913163874173, iteration: 174444
loss: 0.9930790662765503,grad_norm: 0.9558526643411521, iteration: 174445
loss: 0.9879586100578308,grad_norm: 0.9999991491240743, iteration: 174446
loss: 1.0274107456207275,grad_norm: 0.8463118119234798, iteration: 174447
loss: 1.0000888109207153,grad_norm: 0.9999991109917173, iteration: 174448
loss: 0.9863517880439758,grad_norm: 0.9524868482439504, iteration: 174449
loss: 0.974044144153595,grad_norm: 0.8078567601259256, iteration: 174450
loss: 0.994883120059967,grad_norm: 0.9999991802698732, iteration: 174451
loss: 1.0167908668518066,grad_norm: 0.9971990067297701, iteration: 174452
loss: 1.0213581323623657,grad_norm: 0.9999992583467884, iteration: 174453
loss: 0.9590597152709961,grad_norm: 0.9999990522813023, iteration: 174454
loss: 1.0163688659667969,grad_norm: 0.9888981769232537, iteration: 174455
loss: 0.9692550301551819,grad_norm: 0.8891113113453604, iteration: 174456
loss: 0.9938322305679321,grad_norm: 0.9999991414719961, iteration: 174457
loss: 1.0030694007873535,grad_norm: 0.7181074124605427, iteration: 174458
loss: 1.0069098472595215,grad_norm: 0.936916508470974, iteration: 174459
loss: 1.0056698322296143,grad_norm: 0.9075176918519421, iteration: 174460
loss: 0.9765190482139587,grad_norm: 0.9999992017931626, iteration: 174461
loss: 1.0102885961532593,grad_norm: 0.999999304646076, iteration: 174462
loss: 0.991655707359314,grad_norm: 0.9999993540692244, iteration: 174463
loss: 1.1211278438568115,grad_norm: 0.9999994643917974, iteration: 174464
loss: 1.0261861085891724,grad_norm: 0.7781655791973848, iteration: 174465
loss: 0.9903823733329773,grad_norm: 0.9652730129146114, iteration: 174466
loss: 1.0138814449310303,grad_norm: 0.9999991167523329, iteration: 174467
loss: 1.0101429224014282,grad_norm: 0.8761762056539876, iteration: 174468
loss: 0.9815065264701843,grad_norm: 0.878851715144912, iteration: 174469
loss: 1.0223355293273926,grad_norm: 0.8723612500502005, iteration: 174470
loss: 1.0037648677825928,grad_norm: 0.9999992269918119, iteration: 174471
loss: 1.0203431844711304,grad_norm: 0.9999993948643799, iteration: 174472
loss: 1.0031087398529053,grad_norm: 0.9999991328155025, iteration: 174473
loss: 0.9534350633621216,grad_norm: 0.9999990251961854, iteration: 174474
loss: 1.010705828666687,grad_norm: 0.8201034945758591, iteration: 174475
loss: 1.0269584655761719,grad_norm: 0.9929264194650023, iteration: 174476
loss: 1.0132418870925903,grad_norm: 0.9999990144585001, iteration: 174477
loss: 1.0228290557861328,grad_norm: 0.825973279638641, iteration: 174478
loss: 1.0320550203323364,grad_norm: 0.8972904368451247, iteration: 174479
loss: 1.005068063735962,grad_norm: 0.9510828394996218, iteration: 174480
loss: 0.9990060329437256,grad_norm: 0.9999990125468307, iteration: 174481
loss: 0.9730567336082458,grad_norm: 0.9999989123856444, iteration: 174482
loss: 1.0275787115097046,grad_norm: 0.9987352550798931, iteration: 174483
loss: 0.9634906649589539,grad_norm: 0.9999992466836827, iteration: 174484
loss: 0.9825447797775269,grad_norm: 0.999999172633771, iteration: 174485
loss: 0.9376811385154724,grad_norm: 0.9999991660388299, iteration: 174486
loss: 1.0094422101974487,grad_norm: 0.939084413184859, iteration: 174487
loss: 1.0234919786453247,grad_norm: 0.999999171163334, iteration: 174488
loss: 1.0159168243408203,grad_norm: 0.9654283473928811, iteration: 174489
loss: 0.9975816011428833,grad_norm: 0.9763779524431793, iteration: 174490
loss: 1.0066626071929932,grad_norm: 0.872243793474686, iteration: 174491
loss: 0.9644679427146912,grad_norm: 0.999999057160555, iteration: 174492
loss: 1.0079312324523926,grad_norm: 0.9534169397608372, iteration: 174493
loss: 1.0021567344665527,grad_norm: 0.9999990769525983, iteration: 174494
loss: 1.0285849571228027,grad_norm: 0.9999992193901788, iteration: 174495
loss: 1.0331425666809082,grad_norm: 0.9999991941301741, iteration: 174496
loss: 0.9878120422363281,grad_norm: 0.9999991653881788, iteration: 174497
loss: 0.9677709937095642,grad_norm: 0.9999991517050799, iteration: 174498
loss: 0.9975996613502502,grad_norm: 0.946041702514124, iteration: 174499
loss: 1.0255781412124634,grad_norm: 0.9524429981507084, iteration: 174500
loss: 0.9538986086845398,grad_norm: 0.9925432071265607, iteration: 174501
loss: 1.0241352319717407,grad_norm: 0.9999994073847833, iteration: 174502
loss: 1.045609712600708,grad_norm: 0.815841855678037, iteration: 174503
loss: 1.270821213722229,grad_norm: 0.9999996661229053, iteration: 174504
loss: 0.9861195683479309,grad_norm: 0.9355137499464022, iteration: 174505
loss: 1.1320526599884033,grad_norm: 0.9999992148538496, iteration: 174506
loss: 1.130820393562317,grad_norm: 0.9999994959309689, iteration: 174507
loss: 0.9801018834114075,grad_norm: 0.9999990016802072, iteration: 174508
loss: 1.0057220458984375,grad_norm: 0.9999993002050892, iteration: 174509
loss: 1.0062893629074097,grad_norm: 0.9691301168729911, iteration: 174510
loss: 0.9895864129066467,grad_norm: 0.9999991653649656, iteration: 174511
loss: 1.0028187036514282,grad_norm: 0.9999992701546869, iteration: 174512
loss: 1.005226731300354,grad_norm: 0.8631407128898213, iteration: 174513
loss: 1.028332233428955,grad_norm: 0.9608493718745057, iteration: 174514
loss: 1.010848045349121,grad_norm: 0.9999990509181172, iteration: 174515
loss: 0.997119128704071,grad_norm: 0.9999991823102604, iteration: 174516
loss: 1.0278013944625854,grad_norm: 0.9999991569952097, iteration: 174517
loss: 1.0186141729354858,grad_norm: 0.9999991281088196, iteration: 174518
loss: 0.9836450219154358,grad_norm: 0.999999189004815, iteration: 174519
loss: 1.0104373693466187,grad_norm: 0.9294178786478864, iteration: 174520
loss: 0.9791879653930664,grad_norm: 0.9880578003430358, iteration: 174521
loss: 0.9891439080238342,grad_norm: 0.9999992327314029, iteration: 174522
loss: 1.0376101732254028,grad_norm: 0.9846933958372855, iteration: 174523
loss: 1.0162346363067627,grad_norm: 0.7895549672956939, iteration: 174524
loss: 1.0516849756240845,grad_norm: 0.999999999792882, iteration: 174525
loss: 1.0034716129302979,grad_norm: 0.9999991112862582, iteration: 174526
loss: 1.0242345333099365,grad_norm: 0.9440203962281888, iteration: 174527
loss: 0.9978996515274048,grad_norm: 0.9999990290020107, iteration: 174528
loss: 0.9849861860275269,grad_norm: 0.9160304035792068, iteration: 174529
loss: 0.991700291633606,grad_norm: 0.9999991402589974, iteration: 174530
loss: 1.0613254308700562,grad_norm: 0.9999991691242068, iteration: 174531
loss: 0.991437554359436,grad_norm: 0.87649289486118, iteration: 174532
loss: 0.9966037273406982,grad_norm: 0.8923163556944428, iteration: 174533
loss: 1.0244508981704712,grad_norm: 0.9999989680193336, iteration: 174534
loss: 1.0026628971099854,grad_norm: 0.9999989534928458, iteration: 174535
loss: 0.9811045527458191,grad_norm: 0.9999990621239193, iteration: 174536
loss: 1.0171812772750854,grad_norm: 0.9999993388872609, iteration: 174537
loss: 0.9860637187957764,grad_norm: 0.9999989588419484, iteration: 174538
loss: 0.9677271842956543,grad_norm: 0.9999993636424037, iteration: 174539
loss: 0.9959433674812317,grad_norm: 0.999999391614185, iteration: 174540
loss: 0.966806948184967,grad_norm: 0.9999992268225951, iteration: 174541
loss: 1.0083825588226318,grad_norm: 0.9999991795373082, iteration: 174542
loss: 0.9823834896087646,grad_norm: 0.9928307352904271, iteration: 174543
loss: 0.9894345998764038,grad_norm: 0.8308325613328016, iteration: 174544
loss: 1.0139358043670654,grad_norm: 0.8797265993028589, iteration: 174545
loss: 1.0034613609313965,grad_norm: 0.9956437561016823, iteration: 174546
loss: 0.9709381461143494,grad_norm: 0.9063369660244213, iteration: 174547
loss: 0.9871290922164917,grad_norm: 0.9999992379470668, iteration: 174548
loss: 1.0300580263137817,grad_norm: 0.8254340724301632, iteration: 174549
loss: 1.0211191177368164,grad_norm: 0.9999992963068493, iteration: 174550
loss: 1.0407334566116333,grad_norm: 0.9956170096161772, iteration: 174551
loss: 0.9976591467857361,grad_norm: 0.9837327866277605, iteration: 174552
loss: 0.9584755301475525,grad_norm: 0.9999990697604447, iteration: 174553
loss: 0.9868766665458679,grad_norm: 0.9456642792916957, iteration: 174554
loss: 0.99367755651474,grad_norm: 0.9250779182101319, iteration: 174555
loss: 1.0173949003219604,grad_norm: 0.8558293096390189, iteration: 174556
loss: 0.9758641123771667,grad_norm: 0.9999992392985386, iteration: 174557
loss: 0.9894222617149353,grad_norm: 0.8850071696769714, iteration: 174558
loss: 0.9705842733383179,grad_norm: 0.9767042379933293, iteration: 174559
loss: 0.9931656718254089,grad_norm: 0.940590963919193, iteration: 174560
loss: 1.0169339179992676,grad_norm: 0.9451121030059753, iteration: 174561
loss: 1.033737063407898,grad_norm: 0.9999990195450645, iteration: 174562
loss: 1.0008158683776855,grad_norm: 0.974107618752246, iteration: 174563
loss: 0.9718553423881531,grad_norm: 0.9983516270636416, iteration: 174564
loss: 0.9866055846214294,grad_norm: 0.9677019497373014, iteration: 174565
loss: 1.0078868865966797,grad_norm: 0.999998972893621, iteration: 174566
loss: 1.0136709213256836,grad_norm: 0.9500110798390085, iteration: 174567
loss: 1.0042353868484497,grad_norm: 0.8351326695295925, iteration: 174568
loss: 0.9980804920196533,grad_norm: 0.9271375227635908, iteration: 174569
loss: 0.9891128540039062,grad_norm: 0.9999998151242517, iteration: 174570
loss: 0.9842093586921692,grad_norm: 0.9999991464899711, iteration: 174571
loss: 0.9939641356468201,grad_norm: 0.9999992428394882, iteration: 174572
loss: 0.989819347858429,grad_norm: 0.9999990663448941, iteration: 174573
loss: 0.9999046921730042,grad_norm: 0.9999991482691956, iteration: 174574
loss: 0.9950484037399292,grad_norm: 0.9657146517257295, iteration: 174575
loss: 0.9940235614776611,grad_norm: 0.9673810599270113, iteration: 174576
loss: 1.0257399082183838,grad_norm: 0.9999991433891577, iteration: 174577
loss: 1.0113446712493896,grad_norm: 0.9999991326887104, iteration: 174578
loss: 0.9561174511909485,grad_norm: 0.9922991557340353, iteration: 174579
loss: 0.9617411494255066,grad_norm: 0.9999991926753494, iteration: 174580
loss: 0.9947076439857483,grad_norm: 0.8100664751531228, iteration: 174581
loss: 0.9967901706695557,grad_norm: 0.9527114259124648, iteration: 174582
loss: 1.0096899271011353,grad_norm: 0.9256257249126242, iteration: 174583
loss: 0.9702735543251038,grad_norm: 0.8997678107680667, iteration: 174584
loss: 0.9950478076934814,grad_norm: 0.8618085222601759, iteration: 174585
loss: 1.0177202224731445,grad_norm: 0.9999991119242836, iteration: 174586
loss: 1.0015596151351929,grad_norm: 0.9999990701283228, iteration: 174587
loss: 0.9779990315437317,grad_norm: 0.8208910057576023, iteration: 174588
loss: 0.9900075197219849,grad_norm: 0.9117758911404251, iteration: 174589
loss: 1.0207507610321045,grad_norm: 0.9999991134454917, iteration: 174590
loss: 1.0086042881011963,grad_norm: 0.9999999263105098, iteration: 174591
loss: 1.009231448173523,grad_norm: 0.923310973967436, iteration: 174592
loss: 0.9982098340988159,grad_norm: 0.9999990727073634, iteration: 174593
loss: 0.9924135804176331,grad_norm: 0.9388622581065084, iteration: 174594
loss: 1.0446587800979614,grad_norm: 0.9593582195416005, iteration: 174595
loss: 1.0181432962417603,grad_norm: 0.9999990981247199, iteration: 174596
loss: 1.0081006288528442,grad_norm: 0.9999991843495258, iteration: 174597
loss: 0.9623304605484009,grad_norm: 0.999998947839032, iteration: 174598
loss: 0.9618827104568481,grad_norm: 0.999999140369262, iteration: 174599
loss: 0.9915047287940979,grad_norm: 0.9999990149126396, iteration: 174600
loss: 0.9949808716773987,grad_norm: 0.7843497359647795, iteration: 174601
loss: 0.9924652576446533,grad_norm: 0.9999992052307626, iteration: 174602
loss: 0.9476001858711243,grad_norm: 0.8489684610650642, iteration: 174603
loss: 0.9761695265769958,grad_norm: 0.9999992428839082, iteration: 174604
loss: 1.009816288948059,grad_norm: 0.955694825857065, iteration: 174605
loss: 0.9806004762649536,grad_norm: 0.886139599129536, iteration: 174606
loss: 1.011749505996704,grad_norm: 0.9455715074623803, iteration: 174607
loss: 0.9349706172943115,grad_norm: 0.99999914491245, iteration: 174608
loss: 0.9896876811981201,grad_norm: 0.9999990358746108, iteration: 174609
loss: 0.9789698123931885,grad_norm: 0.9999990787225647, iteration: 174610
loss: 0.9820385575294495,grad_norm: 0.9999990562335973, iteration: 174611
loss: 0.97076815366745,grad_norm: 0.8700074412730596, iteration: 174612
loss: 1.0226143598556519,grad_norm: 0.9999991305476862, iteration: 174613
loss: 1.0200059413909912,grad_norm: 0.9844634708835005, iteration: 174614
loss: 1.022660732269287,grad_norm: 0.907229487745878, iteration: 174615
loss: 1.0022573471069336,grad_norm: 0.9999991488021868, iteration: 174616
loss: 0.9957970976829529,grad_norm: 0.9999988819719614, iteration: 174617
loss: 1.0259332656860352,grad_norm: 0.9310141326140431, iteration: 174618
loss: 1.0182608366012573,grad_norm: 0.9817127582477394, iteration: 174619
loss: 1.0090852975845337,grad_norm: 0.9999991876052542, iteration: 174620
loss: 0.9751073122024536,grad_norm: 0.9999991061935392, iteration: 174621
loss: 1.0153439044952393,grad_norm: 0.9473808616660394, iteration: 174622
loss: 0.967290461063385,grad_norm: 0.9647149718050505, iteration: 174623
loss: 0.9778732061386108,grad_norm: 0.8744177322522437, iteration: 174624
loss: 1.0011000633239746,grad_norm: 0.874395165090468, iteration: 174625
loss: 0.9522680640220642,grad_norm: 0.9999991110884152, iteration: 174626
loss: 0.9788339138031006,grad_norm: 0.94076171345532, iteration: 174627
loss: 1.0177416801452637,grad_norm: 0.9999991349831618, iteration: 174628
loss: 0.981226921081543,grad_norm: 0.9177251289939805, iteration: 174629
loss: 0.9888433814048767,grad_norm: 0.9999991999834887, iteration: 174630
loss: 0.971362829208374,grad_norm: 0.9999988867321825, iteration: 174631
loss: 1.0244252681732178,grad_norm: 0.9999992458445539, iteration: 174632
loss: 1.0033771991729736,grad_norm: 0.9999990550657416, iteration: 174633
loss: 0.9902365207672119,grad_norm: 0.8706678920038694, iteration: 174634
loss: 0.9975025057792664,grad_norm: 0.8581712340278461, iteration: 174635
loss: 0.957068145275116,grad_norm: 0.9999992609419722, iteration: 174636
loss: 1.0184255838394165,grad_norm: 0.9999992003047025, iteration: 174637
loss: 0.9931136965751648,grad_norm: 0.8570313025340762, iteration: 174638
loss: 0.9702866077423096,grad_norm: 0.9038360743036153, iteration: 174639
loss: 1.01102614402771,grad_norm: 0.9999991582519768, iteration: 174640
loss: 1.0345743894577026,grad_norm: 0.9999994331703865, iteration: 174641
loss: 0.9746789932250977,grad_norm: 0.9999990853531406, iteration: 174642
loss: 0.9836541414260864,grad_norm: 0.9999992650236037, iteration: 174643
loss: 1.018833875656128,grad_norm: 0.999999227341188, iteration: 174644
loss: 1.0255279541015625,grad_norm: 0.9520227924122204, iteration: 174645
loss: 1.0014499425888062,grad_norm: 0.967139119103738, iteration: 174646
loss: 0.9964309930801392,grad_norm: 0.9999991100334727, iteration: 174647
loss: 0.9722119569778442,grad_norm: 0.7791094515785214, iteration: 174648
loss: 0.9843036532402039,grad_norm: 0.9999991024537083, iteration: 174649
loss: 0.9528123736381531,grad_norm: 0.9591510830047313, iteration: 174650
loss: 1.0275410413742065,grad_norm: 0.9999991089078799, iteration: 174651
loss: 1.0152337551116943,grad_norm: 0.8705178945960668, iteration: 174652
loss: 0.986702024936676,grad_norm: 0.9003835989621999, iteration: 174653
loss: 0.973268449306488,grad_norm: 0.8472908736690976, iteration: 174654
loss: 0.9798436760902405,grad_norm: 0.8067018595423496, iteration: 174655
loss: 1.0370352268218994,grad_norm: 0.8405139321608575, iteration: 174656
loss: 1.0338317155838013,grad_norm: 0.9068770598139648, iteration: 174657
loss: 0.9912497997283936,grad_norm: 0.9999991164586849, iteration: 174658
loss: 0.9770582318305969,grad_norm: 0.9245261856707674, iteration: 174659
loss: 0.9620147347450256,grad_norm: 0.9999990462035616, iteration: 174660
loss: 0.9411845803260803,grad_norm: 0.9999990389919811, iteration: 174661
loss: 0.9719753861427307,grad_norm: 0.8671208704165716, iteration: 174662
loss: 0.9845125079154968,grad_norm: 0.9400974349155792, iteration: 174663
loss: 1.0342003107070923,grad_norm: 0.8719704076546594, iteration: 174664
loss: 1.0164966583251953,grad_norm: 0.8768090481767946, iteration: 174665
loss: 0.9539205431938171,grad_norm: 0.989123432473181, iteration: 174666
loss: 0.9874305725097656,grad_norm: 0.8980548323189331, iteration: 174667
loss: 0.9864597320556641,grad_norm: 0.972285052701128, iteration: 174668
loss: 0.9561084508895874,grad_norm: 0.8330428939462681, iteration: 174669
loss: 0.977378785610199,grad_norm: 0.8164695260591431, iteration: 174670
loss: 0.9871018528938293,grad_norm: 0.9999989491125514, iteration: 174671
loss: 1.0286153554916382,grad_norm: 0.9999997823528493, iteration: 174672
loss: 1.0082743167877197,grad_norm: 0.9568080364279588, iteration: 174673
loss: 0.9999561309814453,grad_norm: 0.8801680497047373, iteration: 174674
loss: 1.0195398330688477,grad_norm: 0.9999991372199853, iteration: 174675
loss: 1.0182925462722778,grad_norm: 0.999999003365969, iteration: 174676
loss: 0.9896355867385864,grad_norm: 0.9999992867261036, iteration: 174677
loss: 0.995466411113739,grad_norm: 0.8839380560514418, iteration: 174678
loss: 0.9743031859397888,grad_norm: 0.9999991590163215, iteration: 174679
loss: 1.010201334953308,grad_norm: 0.9999989988436488, iteration: 174680
loss: 0.9962786436080933,grad_norm: 0.9999996860959853, iteration: 174681
loss: 1.0182021856307983,grad_norm: 0.9999993112347377, iteration: 174682
loss: 0.9867412447929382,grad_norm: 0.7939707462555411, iteration: 174683
loss: 0.9519954919815063,grad_norm: 0.9999992006146258, iteration: 174684
loss: 0.9815027713775635,grad_norm: 0.8560389751951893, iteration: 174685
loss: 1.0213173627853394,grad_norm: 0.999999279913925, iteration: 174686
loss: 0.9800301194190979,grad_norm: 0.9999998673748278, iteration: 174687
loss: 1.0124307870864868,grad_norm: 0.8398779932104485, iteration: 174688
loss: 1.0004130601882935,grad_norm: 0.971876906565431, iteration: 174689
loss: 1.0508548021316528,grad_norm: 0.9999993380007647, iteration: 174690
loss: 0.9898550510406494,grad_norm: 0.9999990176703538, iteration: 174691
loss: 0.9894563555717468,grad_norm: 0.9303817875555721, iteration: 174692
loss: 0.9876846075057983,grad_norm: 0.9350207939328158, iteration: 174693
loss: 1.0122802257537842,grad_norm: 0.9263770769042918, iteration: 174694
loss: 0.9936662316322327,grad_norm: 0.9432687228084304, iteration: 174695
loss: 1.0158292055130005,grad_norm: 0.9838548502803994, iteration: 174696
loss: 1.0196142196655273,grad_norm: 0.8736301451033693, iteration: 174697
loss: 1.0187345743179321,grad_norm: 0.9999991709103545, iteration: 174698
loss: 1.031807541847229,grad_norm: 0.9999997911294235, iteration: 174699
loss: 0.9869014620780945,grad_norm: 0.9999991404720063, iteration: 174700
loss: 1.017553448677063,grad_norm: 0.9999993079854868, iteration: 174701
loss: 0.9963394999504089,grad_norm: 0.9999992044865835, iteration: 174702
loss: 0.9739934206008911,grad_norm: 0.9999989288275576, iteration: 174703
loss: 1.018148422241211,grad_norm: 0.9210963653805139, iteration: 174704
loss: 0.9711633920669556,grad_norm: 0.9999991900960747, iteration: 174705
loss: 1.0517853498458862,grad_norm: 0.9511311656484648, iteration: 174706
loss: 0.974880576133728,grad_norm: 0.9999991782928541, iteration: 174707
loss: 0.9884492754936218,grad_norm: 0.9999999039873572, iteration: 174708
loss: 1.0072656869888306,grad_norm: 0.9999991325356801, iteration: 174709
loss: 1.0044052600860596,grad_norm: 0.9999992769585793, iteration: 174710
loss: 1.0243438482284546,grad_norm: 0.9866805630503447, iteration: 174711
loss: 0.98773193359375,grad_norm: 0.9999991858111162, iteration: 174712
loss: 0.9832731485366821,grad_norm: 0.9586702499572883, iteration: 174713
loss: 0.9985484480857849,grad_norm: 0.9999992583462902, iteration: 174714
loss: 0.9746930003166199,grad_norm: 0.9999989636728753, iteration: 174715
loss: 0.9855238199234009,grad_norm: 0.9999990582779221, iteration: 174716
loss: 1.0315834283828735,grad_norm: 0.9934113472853864, iteration: 174717
loss: 0.9631913900375366,grad_norm: 0.9587162094932008, iteration: 174718
loss: 1.0117721557617188,grad_norm: 0.9949858564430246, iteration: 174719
loss: 0.9982419013977051,grad_norm: 0.9404850898850411, iteration: 174720
loss: 1.0041476488113403,grad_norm: 0.935718298439396, iteration: 174721
loss: 1.0084500312805176,grad_norm: 0.8342467206900136, iteration: 174722
loss: 1.0065032243728638,grad_norm: 0.9999991456982009, iteration: 174723
loss: 1.000545620918274,grad_norm: 0.9894533210131757, iteration: 174724
loss: 0.98039311170578,grad_norm: 0.9182240987780702, iteration: 174725
loss: 0.9936487674713135,grad_norm: 0.8985064890711271, iteration: 174726
loss: 1.0184760093688965,grad_norm: 0.9676138740558767, iteration: 174727
loss: 0.988120436668396,grad_norm: 0.9999991618959775, iteration: 174728
loss: 1.026248574256897,grad_norm: 0.9999990961122771, iteration: 174729
loss: 1.0136396884918213,grad_norm: 0.9999991884298235, iteration: 174730
loss: 1.0060739517211914,grad_norm: 0.886058795194284, iteration: 174731
loss: 0.9704117774963379,grad_norm: 0.8859187117209086, iteration: 174732
loss: 0.9599847793579102,grad_norm: 0.9999992974096948, iteration: 174733
loss: 1.009779453277588,grad_norm: 0.935601409649137, iteration: 174734
loss: 0.9965943694114685,grad_norm: 0.9192185551562506, iteration: 174735
loss: 0.9803329706192017,grad_norm: 0.8723168677575464, iteration: 174736
loss: 0.9778305292129517,grad_norm: 0.9999990019213776, iteration: 174737
loss: 1.01054048538208,grad_norm: 0.9999990541518224, iteration: 174738
loss: 0.989594578742981,grad_norm: 0.9999990936185891, iteration: 174739
loss: 0.9888555407524109,grad_norm: 0.8147439687801942, iteration: 174740
loss: 0.9986434578895569,grad_norm: 0.9425192036628665, iteration: 174741
loss: 0.9990091323852539,grad_norm: 0.9999991810279669, iteration: 174742
loss: 0.9902642965316772,grad_norm: 0.9999995825306894, iteration: 174743
loss: 0.9860128164291382,grad_norm: 0.9999993732695956, iteration: 174744
loss: 1.0102277994155884,grad_norm: 0.9999991447428669, iteration: 174745
loss: 0.9918971657752991,grad_norm: 0.8194410089020915, iteration: 174746
loss: 0.9847849011421204,grad_norm: 0.8869649215257228, iteration: 174747
loss: 0.9898155927658081,grad_norm: 0.9999991325408066, iteration: 174748
loss: 0.9836104512214661,grad_norm: 0.9960377936971488, iteration: 174749
loss: 0.983337938785553,grad_norm: 0.9999991129623563, iteration: 174750
loss: 0.9902647137641907,grad_norm: 0.8806617796285294, iteration: 174751
loss: 0.9975316524505615,grad_norm: 0.9378097852330844, iteration: 174752
loss: 1.0032408237457275,grad_norm: 0.8400371557082489, iteration: 174753
loss: 0.9931281208992004,grad_norm: 0.9460529536558777, iteration: 174754
loss: 1.014915108680725,grad_norm: 0.9999994776682769, iteration: 174755
loss: 1.0031633377075195,grad_norm: 0.9999990475562347, iteration: 174756
loss: 1.0280011892318726,grad_norm: 0.9133943810687019, iteration: 174757
loss: 0.9853479862213135,grad_norm: 0.9300048619363863, iteration: 174758
loss: 0.9708487391471863,grad_norm: 0.8338478993410743, iteration: 174759
loss: 1.0209035873413086,grad_norm: 0.9004901731210798, iteration: 174760
loss: 0.9993489980697632,grad_norm: 0.9999990581092189, iteration: 174761
loss: 0.9939412474632263,grad_norm: 0.9878793460218778, iteration: 174762
loss: 0.9834360480308533,grad_norm: 0.9999992179509394, iteration: 174763
loss: 1.0320051908493042,grad_norm: 0.9999992551824579, iteration: 174764
loss: 1.0240697860717773,grad_norm: 0.9999994636609357, iteration: 174765
loss: 1.0153827667236328,grad_norm: 0.9048416970112043, iteration: 174766
loss: 1.026536464691162,grad_norm: 0.8359771615104628, iteration: 174767
loss: 0.9771738052368164,grad_norm: 0.9999992285136441, iteration: 174768
loss: 0.9859972596168518,grad_norm: 0.8808225373633446, iteration: 174769
loss: 1.0145957469940186,grad_norm: 0.9999991196693925, iteration: 174770
loss: 0.9929044246673584,grad_norm: 0.9999990712028438, iteration: 174771
loss: 1.0154714584350586,grad_norm: 0.8569917549499649, iteration: 174772
loss: 1.0019428730010986,grad_norm: 0.8856214020185371, iteration: 174773
loss: 0.986529529094696,grad_norm: 0.9898315376509266, iteration: 174774
loss: 1.0201059579849243,grad_norm: 0.9999991177743511, iteration: 174775
loss: 1.0295865535736084,grad_norm: 0.9883042838524637, iteration: 174776
loss: 1.0000959634780884,grad_norm: 0.9999990489728138, iteration: 174777
loss: 0.9617195129394531,grad_norm: 0.9909286315538197, iteration: 174778
loss: 0.9508805871009827,grad_norm: 0.9999991285559265, iteration: 174779
loss: 1.043661117553711,grad_norm: 0.9999995191654063, iteration: 174780
loss: 1.0362746715545654,grad_norm: 0.9260195539806418, iteration: 174781
loss: 0.9976189732551575,grad_norm: 0.9999991530297268, iteration: 174782
loss: 1.01221764087677,grad_norm: 0.9999989568434714, iteration: 174783
loss: 0.9691317677497864,grad_norm: 0.899008406335662, iteration: 174784
loss: 1.0222032070159912,grad_norm: 0.856320010433291, iteration: 174785
loss: 0.9843466877937317,grad_norm: 0.9571106804574441, iteration: 174786
loss: 0.9778579473495483,grad_norm: 0.9999992560947131, iteration: 174787
loss: 1.0234005451202393,grad_norm: 0.9999992183120388, iteration: 174788
loss: 0.9811170101165771,grad_norm: 0.9254139573869269, iteration: 174789
loss: 1.0224440097808838,grad_norm: 0.9999990159471082, iteration: 174790
loss: 1.007466435432434,grad_norm: 0.8339327684615462, iteration: 174791
loss: 1.036259412765503,grad_norm: 0.9469370214309402, iteration: 174792
loss: 1.0154294967651367,grad_norm: 0.969278721210946, iteration: 174793
loss: 0.9686000943183899,grad_norm: 0.9999991863892362, iteration: 174794
loss: 0.9803653359413147,grad_norm: 0.8484693298075655, iteration: 174795
loss: 0.9547610878944397,grad_norm: 0.8853328006005631, iteration: 174796
loss: 1.0103380680084229,grad_norm: 0.9163533342739284, iteration: 174797
loss: 0.9546905755996704,grad_norm: 0.9105209373503201, iteration: 174798
loss: 1.1264973878860474,grad_norm: 0.9999994747925234, iteration: 174799
loss: 1.0249555110931396,grad_norm: 0.9999998795289302, iteration: 174800
loss: 1.0175979137420654,grad_norm: 0.9900308390637252, iteration: 174801
loss: 0.9786438345909119,grad_norm: 0.9999990863091555, iteration: 174802
loss: 1.0244494676589966,grad_norm: 0.9034850242961207, iteration: 174803
loss: 1.0155869722366333,grad_norm: 0.999999189333937, iteration: 174804
loss: 0.9888798594474792,grad_norm: 0.9999992017075864, iteration: 174805
loss: 0.9726933240890503,grad_norm: 0.9999992444248207, iteration: 174806
loss: 1.0281589031219482,grad_norm: 0.9600700028828651, iteration: 174807
loss: 1.0126187801361084,grad_norm: 0.9723395088148421, iteration: 174808
loss: 1.0264174938201904,grad_norm: 0.9999991102768829, iteration: 174809
loss: 1.012107491493225,grad_norm: 0.9999990892151002, iteration: 174810
loss: 0.9814504384994507,grad_norm: 0.9609524841016797, iteration: 174811
loss: 1.1541539430618286,grad_norm: 0.9999998652495774, iteration: 174812
loss: 0.9789064526557922,grad_norm: 0.9646536283995346, iteration: 174813
loss: 0.9914385676383972,grad_norm: 0.9999990957648418, iteration: 174814
loss: 1.0212109088897705,grad_norm: 0.9999990865340529, iteration: 174815
loss: 0.9775574803352356,grad_norm: 0.9407584769163241, iteration: 174816
loss: 1.0003942251205444,grad_norm: 0.8723996619508677, iteration: 174817
loss: 1.016069769859314,grad_norm: 0.9999991439650194, iteration: 174818
loss: 1.0187065601348877,grad_norm: 0.9999992446902911, iteration: 174819
loss: 1.0539593696594238,grad_norm: 0.8988506485951945, iteration: 174820
loss: 1.0201700925827026,grad_norm: 0.9115083499097291, iteration: 174821
loss: 1.0344717502593994,grad_norm: 0.9999990698441786, iteration: 174822
loss: 0.9932168126106262,grad_norm: 0.9999990488268635, iteration: 174823
loss: 0.9976250529289246,grad_norm: 0.9999992987555713, iteration: 174824
loss: 1.0200234651565552,grad_norm: 0.9999992317738694, iteration: 174825
loss: 0.9631838202476501,grad_norm: 0.9999990640090913, iteration: 174826
loss: 1.0080407857894897,grad_norm: 0.9999990946548495, iteration: 174827
loss: 1.0015950202941895,grad_norm: 0.913689487570428, iteration: 174828
loss: 0.9552720189094543,grad_norm: 0.9999991336244072, iteration: 174829
loss: 1.003412127494812,grad_norm: 0.9999991394973473, iteration: 174830
loss: 0.9843298196792603,grad_norm: 0.8623886917413852, iteration: 174831
loss: 1.025032877922058,grad_norm: 0.9999991245225281, iteration: 174832
loss: 0.9949678778648376,grad_norm: 0.7806306579888194, iteration: 174833
loss: 1.0224249362945557,grad_norm: 0.9999991673997319, iteration: 174834
loss: 1.0154324769973755,grad_norm: 0.9999992282920399, iteration: 174835
loss: 1.021059513092041,grad_norm: 0.9999990579099183, iteration: 174836
loss: 1.0057779550552368,grad_norm: 0.9999990472905725, iteration: 174837
loss: 0.9521386027336121,grad_norm: 0.9999992250682435, iteration: 174838
loss: 0.9861212968826294,grad_norm: 0.9303604462598948, iteration: 174839
loss: 1.0045545101165771,grad_norm: 0.8893112410489206, iteration: 174840
loss: 0.9832037091255188,grad_norm: 0.9999991209664714, iteration: 174841
loss: 1.0217267274856567,grad_norm: 0.9999992758427171, iteration: 174842
loss: 0.9884593486785889,grad_norm: 0.9999991623045201, iteration: 174843
loss: 0.998005211353302,grad_norm: 0.9532173629817218, iteration: 174844
loss: 0.9749243855476379,grad_norm: 0.9747972479795235, iteration: 174845
loss: 0.9663649201393127,grad_norm: 0.9999991335236134, iteration: 174846
loss: 0.9654783606529236,grad_norm: 0.9999989910793241, iteration: 174847
loss: 0.9972294569015503,grad_norm: 0.8946377354800906, iteration: 174848
loss: 0.9881488680839539,grad_norm: 0.999999087863854, iteration: 174849
loss: 1.007580041885376,grad_norm: 0.9999989789205421, iteration: 174850
loss: 0.9883463382720947,grad_norm: 0.9999991557866635, iteration: 174851
loss: 1.0189579725265503,grad_norm: 0.9446313002977553, iteration: 174852
loss: 0.9990402460098267,grad_norm: 0.9828379816543602, iteration: 174853
loss: 0.9868025779724121,grad_norm: 0.904798446784629, iteration: 174854
loss: 0.9579585790634155,grad_norm: 0.9653785700916816, iteration: 174855
loss: 1.0092421770095825,grad_norm: 0.9999992463280951, iteration: 174856
loss: 1.0059444904327393,grad_norm: 0.9817929498016695, iteration: 174857
loss: 0.9548171758651733,grad_norm: 0.9999992090905104, iteration: 174858
loss: 1.0278258323669434,grad_norm: 0.9999992089709968, iteration: 174859
loss: 1.01038658618927,grad_norm: 0.9999992131992242, iteration: 174860
loss: 1.0028175115585327,grad_norm: 0.9999990208054844, iteration: 174861
loss: 1.0161008834838867,grad_norm: 0.9999991067815819, iteration: 174862
loss: 1.0192304849624634,grad_norm: 0.9999993144874655, iteration: 174863
loss: 0.9765961170196533,grad_norm: 0.9239354856312725, iteration: 174864
loss: 1.0076459646224976,grad_norm: 0.9662167207236856, iteration: 174865
loss: 0.9910542964935303,grad_norm: 0.999999133165326, iteration: 174866
loss: 0.9899418950080872,grad_norm: 0.9999991754040805, iteration: 174867
loss: 1.0124822854995728,grad_norm: 0.8687439036149771, iteration: 174868
loss: 0.9527952671051025,grad_norm: 0.9553121211719051, iteration: 174869
loss: 1.0133873224258423,grad_norm: 0.9999995342740392, iteration: 174870
loss: 1.0185050964355469,grad_norm: 0.8706383211411507, iteration: 174871
loss: 0.9618947505950928,grad_norm: 0.9999990994268153, iteration: 174872
loss: 1.0190494060516357,grad_norm: 0.9146214261185333, iteration: 174873
loss: 0.9944101572036743,grad_norm: 0.9999991182840746, iteration: 174874
loss: 1.000157117843628,grad_norm: 0.8356345609530372, iteration: 174875
loss: 1.0181139707565308,grad_norm: 0.9999993617266212, iteration: 174876
loss: 0.970849871635437,grad_norm: 0.8233994940294301, iteration: 174877
loss: 1.0093109607696533,grad_norm: 0.9999991135841888, iteration: 174878
loss: 0.9908473491668701,grad_norm: 0.9999991206106452, iteration: 174879
loss: 0.9769499897956848,grad_norm: 0.9159150095506287, iteration: 174880
loss: 0.9961320161819458,grad_norm: 0.9999991064197126, iteration: 174881
loss: 1.0089372396469116,grad_norm: 0.9822296065732519, iteration: 174882
loss: 1.002642035484314,grad_norm: 0.9459240803035871, iteration: 174883
loss: 1.0144487619400024,grad_norm: 0.9133242038617209, iteration: 174884
loss: 1.0026885271072388,grad_norm: 0.9999992073772325, iteration: 174885
loss: 0.9845498204231262,grad_norm: 0.9999990677148093, iteration: 174886
loss: 0.9878740906715393,grad_norm: 0.9862081253770976, iteration: 174887
loss: 1.0250310897827148,grad_norm: 0.9212585543195224, iteration: 174888
loss: 0.9771775007247925,grad_norm: 0.9791879213842624, iteration: 174889
loss: 0.9874281287193298,grad_norm: 0.9999991501480284, iteration: 174890
loss: 1.0137275457382202,grad_norm: 0.931087371590076, iteration: 174891
loss: 0.9751391410827637,grad_norm: 0.9481992768931038, iteration: 174892
loss: 0.9771448969841003,grad_norm: 0.8907546588427093, iteration: 174893
loss: 1.0466866493225098,grad_norm: 0.9469470805728348, iteration: 174894
loss: 0.9691529273986816,grad_norm: 0.9999992615584358, iteration: 174895
loss: 1.0163037776947021,grad_norm: 0.8704629579366747, iteration: 174896
loss: 0.9872701168060303,grad_norm: 0.8574237156827583, iteration: 174897
loss: 1.0168159008026123,grad_norm: 0.9418035535609569, iteration: 174898
loss: 1.026248812675476,grad_norm: 0.9999991354091841, iteration: 174899
loss: 1.0149962902069092,grad_norm: 0.9999991208917255, iteration: 174900
loss: 1.0151333808898926,grad_norm: 0.790010888301692, iteration: 174901
loss: 0.9855020046234131,grad_norm: 0.8523140899772286, iteration: 174902
loss: 1.0835448503494263,grad_norm: 0.9999991692882141, iteration: 174903
loss: 1.0270593166351318,grad_norm: 0.9444165862867212, iteration: 174904
loss: 1.0053597688674927,grad_norm: 0.9999989679326651, iteration: 174905
loss: 1.000847578048706,grad_norm: 0.9999996083795659, iteration: 174906
loss: 1.008493423461914,grad_norm: 0.9878392843040735, iteration: 174907
loss: 1.0253664255142212,grad_norm: 0.9999994200959275, iteration: 174908
loss: 1.002312421798706,grad_norm: 0.9759314841233817, iteration: 174909
loss: 1.0328199863433838,grad_norm: 0.9237770622721211, iteration: 174910
loss: 0.9755832552909851,grad_norm: 0.9776572109492303, iteration: 174911
loss: 1.0133484601974487,grad_norm: 0.9999990978305108, iteration: 174912
loss: 0.9949601888656616,grad_norm: 0.7876469683172962, iteration: 174913
loss: 0.9753126502037048,grad_norm: 0.9211272008418196, iteration: 174914
loss: 0.9994583129882812,grad_norm: 0.9999990158910292, iteration: 174915
loss: 0.9995987415313721,grad_norm: 0.8976399243751284, iteration: 174916
loss: 0.981179416179657,grad_norm: 0.9957789022024269, iteration: 174917
loss: 1.0400339365005493,grad_norm: 0.8693095308938368, iteration: 174918
loss: 0.9859219789505005,grad_norm: 0.9999990507943283, iteration: 174919
loss: 1.0062601566314697,grad_norm: 0.931666496067598, iteration: 174920
loss: 1.0314019918441772,grad_norm: 0.9999992207772486, iteration: 174921
loss: 1.0406208038330078,grad_norm: 0.9999992326584802, iteration: 174922
loss: 1.0105525255203247,grad_norm: 0.9999990418211007, iteration: 174923
loss: 0.9923757910728455,grad_norm: 0.9999994380836903, iteration: 174924
loss: 0.998661458492279,grad_norm: 0.9238421785478368, iteration: 174925
loss: 1.0188312530517578,grad_norm: 0.9999990265189305, iteration: 174926
loss: 1.0045138597488403,grad_norm: 0.9999991099255974, iteration: 174927
loss: 0.9688417911529541,grad_norm: 0.9826275466078848, iteration: 174928
loss: 1.0021346807479858,grad_norm: 0.7727974411464116, iteration: 174929
loss: 1.0188018083572388,grad_norm: 0.9031748048111939, iteration: 174930
loss: 0.9926642179489136,grad_norm: 0.9999992033868496, iteration: 174931
loss: 1.0170751810073853,grad_norm: 0.9836017080444264, iteration: 174932
loss: 0.9666064977645874,grad_norm: 0.9883037995765788, iteration: 174933
loss: 0.9985854029655457,grad_norm: 0.9215114860550959, iteration: 174934
loss: 0.9984002709388733,grad_norm: 0.9266827205012768, iteration: 174935
loss: 0.9675896763801575,grad_norm: 0.999998965574068, iteration: 174936
loss: 0.9971162676811218,grad_norm: 0.9999991628392831, iteration: 174937
loss: 1.0034410953521729,grad_norm: 0.9306843781300755, iteration: 174938
loss: 1.0294862985610962,grad_norm: 0.9723603781964784, iteration: 174939
loss: 0.9955740571022034,grad_norm: 0.999999243234383, iteration: 174940
loss: 1.0133616924285889,grad_norm: 0.9964426280756411, iteration: 174941
loss: 0.9835015535354614,grad_norm: 0.8468509303241514, iteration: 174942
loss: 0.9767143726348877,grad_norm: 0.999999182048777, iteration: 174943
loss: 1.055277705192566,grad_norm: 0.9746738382269359, iteration: 174944
loss: 1.0183424949645996,grad_norm: 0.9999989755238101, iteration: 174945
loss: 1.0141308307647705,grad_norm: 0.9999991146695723, iteration: 174946
loss: 0.9774894118309021,grad_norm: 0.9999998884734449, iteration: 174947
loss: 1.0126287937164307,grad_norm: 0.8441860513668065, iteration: 174948
loss: 0.9994057416915894,grad_norm: 0.9507442014978441, iteration: 174949
loss: 1.0014939308166504,grad_norm: 0.9999993120450643, iteration: 174950
loss: 1.0054373741149902,grad_norm: 0.9036516773072323, iteration: 174951
loss: 1.0157829523086548,grad_norm: 0.8575353781504034, iteration: 174952
loss: 1.0204410552978516,grad_norm: 0.9999991615112575, iteration: 174953
loss: 1.0196491479873657,grad_norm: 0.8870418056929514, iteration: 174954
loss: 1.009561538696289,grad_norm: 0.999999273662376, iteration: 174955
loss: 1.001831293106079,grad_norm: 0.9999991857584235, iteration: 174956
loss: 1.0146112442016602,grad_norm: 0.9999990896603173, iteration: 174957
loss: 0.9907805919647217,grad_norm: 0.96840290681608, iteration: 174958
loss: 0.9769824147224426,grad_norm: 0.9999991825310652, iteration: 174959
loss: 0.9733771681785583,grad_norm: 0.8167109281366881, iteration: 174960
loss: 1.0075947046279907,grad_norm: 0.9999990092898182, iteration: 174961
loss: 1.0095665454864502,grad_norm: 0.9643046612508067, iteration: 174962
loss: 1.0081442594528198,grad_norm: 0.999999296318501, iteration: 174963
loss: 1.0218740701675415,grad_norm: 0.8712859578576442, iteration: 174964
loss: 1.0374747514724731,grad_norm: 0.9999990769743825, iteration: 174965
loss: 1.0329900979995728,grad_norm: 0.9999991743987742, iteration: 174966
loss: 1.0116848945617676,grad_norm: 0.9548690483821185, iteration: 174967
loss: 1.0059367418289185,grad_norm: 0.9999990096152928, iteration: 174968
loss: 0.9533459544181824,grad_norm: 0.880206280874143, iteration: 174969
loss: 1.0067797899246216,grad_norm: 0.9999991435546941, iteration: 174970
loss: 1.0059740543365479,grad_norm: 0.9999992707741244, iteration: 174971
loss: 1.0337371826171875,grad_norm: 0.987126111270784, iteration: 174972
loss: 0.9639016389846802,grad_norm: 0.999999045689758, iteration: 174973
loss: 0.981982409954071,grad_norm: 0.9999990497631108, iteration: 174974
loss: 0.9884414672851562,grad_norm: 0.999999135250534, iteration: 174975
loss: 0.9660866856575012,grad_norm: 0.999999230770735, iteration: 174976
loss: 0.9576356410980225,grad_norm: 0.9999992441221799, iteration: 174977
loss: 1.0083365440368652,grad_norm: 0.9412850969909795, iteration: 174978
loss: 1.0412654876708984,grad_norm: 0.999999152978432, iteration: 174979
loss: 0.9927464127540588,grad_norm: 0.8394846433789759, iteration: 174980
loss: 0.9957668781280518,grad_norm: 0.99999915265711, iteration: 174981
loss: 1.0116487741470337,grad_norm: 0.9999990919398185, iteration: 174982
loss: 0.9993513226509094,grad_norm: 0.9315754238991859, iteration: 174983
loss: 1.2090493440628052,grad_norm: 0.9999998048609349, iteration: 174984
loss: 0.988192081451416,grad_norm: 0.9999991707882141, iteration: 174985
loss: 1.0093647241592407,grad_norm: 0.9288763540464618, iteration: 174986
loss: 1.0139120817184448,grad_norm: 0.8648735973022378, iteration: 174987
loss: 0.9803445339202881,grad_norm: 0.9999990524131176, iteration: 174988
loss: 0.9686540365219116,grad_norm: 0.9999992585714006, iteration: 174989
loss: 0.9756929278373718,grad_norm: 0.9725722163730806, iteration: 174990
loss: 1.0178407430648804,grad_norm: 0.8936641253338359, iteration: 174991
loss: 1.016514778137207,grad_norm: 0.9999991390550824, iteration: 174992
loss: 0.9722051024436951,grad_norm: 0.9736596578187826, iteration: 174993
loss: 1.0195038318634033,grad_norm: 0.9026955976364635, iteration: 174994
loss: 1.0310585498809814,grad_norm: 0.9999992011885512, iteration: 174995
loss: 0.9914340376853943,grad_norm: 0.968216584386844, iteration: 174996
loss: 0.9673990607261658,grad_norm: 0.9999991351570899, iteration: 174997
loss: 0.9696306586265564,grad_norm: 0.999999134306976, iteration: 174998
loss: 0.9872397780418396,grad_norm: 0.8998323046025677, iteration: 174999
loss: 0.9908839464187622,grad_norm: 0.9999998099025394, iteration: 175000
loss: 1.0342694520950317,grad_norm: 0.9999990695004513, iteration: 175001
loss: 0.9804360270500183,grad_norm: 0.9547902778091827, iteration: 175002
loss: 1.009116530418396,grad_norm: 0.9457631628641701, iteration: 175003
loss: 1.0213674306869507,grad_norm: 0.9999991774448591, iteration: 175004
loss: 1.0932796001434326,grad_norm: 0.9999996043470528, iteration: 175005
loss: 1.0686097145080566,grad_norm: 0.9999994618896154, iteration: 175006
loss: 0.9936745762825012,grad_norm: 0.8593100857013557, iteration: 175007
loss: 0.993079423904419,grad_norm: 0.8946177614594132, iteration: 175008
loss: 1.0371763706207275,grad_norm: 0.9999995969902653, iteration: 175009
loss: 1.0095112323760986,grad_norm: 0.9999990747092987, iteration: 175010
loss: 0.9784887433052063,grad_norm: 0.9999992000003686, iteration: 175011
loss: 0.999420166015625,grad_norm: 0.8528862111026951, iteration: 175012
loss: 1.000240683555603,grad_norm: 0.999999043916563, iteration: 175013
loss: 1.0610761642456055,grad_norm: 0.9999993389821974, iteration: 175014
loss: 0.9936339855194092,grad_norm: 0.9999991871660083, iteration: 175015
loss: 1.0528701543807983,grad_norm: 0.9999992050018269, iteration: 175016
loss: 0.98142409324646,grad_norm: 0.9999991577296207, iteration: 175017
loss: 0.9961946606636047,grad_norm: 0.9999992841877704, iteration: 175018
loss: 0.9807527661323547,grad_norm: 0.9205785835282464, iteration: 175019
loss: 1.0055794715881348,grad_norm: 0.9409746697776679, iteration: 175020
loss: 0.9972658753395081,grad_norm: 0.9999991184772006, iteration: 175021
loss: 0.9960647821426392,grad_norm: 0.8619849495653713, iteration: 175022
loss: 0.9821135401725769,grad_norm: 0.8605111717701157, iteration: 175023
loss: 0.9784174561500549,grad_norm: 0.9005330663197422, iteration: 175024
loss: 0.9848771095275879,grad_norm: 0.8813423674544212, iteration: 175025
loss: 0.9639346599578857,grad_norm: 0.9139895000555188, iteration: 175026
loss: 0.99894779920578,grad_norm: 0.9999991047302481, iteration: 175027
loss: 1.0262601375579834,grad_norm: 0.9748925826893694, iteration: 175028
loss: 1.0052300691604614,grad_norm: 0.999999116355922, iteration: 175029
loss: 0.9890245795249939,grad_norm: 0.9999991375843688, iteration: 175030
loss: 0.9849665760993958,grad_norm: 0.9999991154067331, iteration: 175031
loss: 1.024511694908142,grad_norm: 0.7733154513008892, iteration: 175032
loss: 0.9884468913078308,grad_norm: 0.9939572376458109, iteration: 175033
loss: 1.021165370941162,grad_norm: 0.9999998370363717, iteration: 175034
loss: 0.9813112020492554,grad_norm: 0.9999990116188247, iteration: 175035
loss: 1.0009747743606567,grad_norm: 0.9999991142262553, iteration: 175036
loss: 1.0136733055114746,grad_norm: 0.9999990204173348, iteration: 175037
loss: 1.011794924736023,grad_norm: 0.9694284417450134, iteration: 175038
loss: 1.0401533842086792,grad_norm: 0.9500102327783413, iteration: 175039
loss: 1.0260541439056396,grad_norm: 0.9999993742486821, iteration: 175040
loss: 0.9874573945999146,grad_norm: 0.9999990802616351, iteration: 175041
loss: 1.005777359008789,grad_norm: 0.9390552819175918, iteration: 175042
loss: 0.9903132319450378,grad_norm: 0.9999992505958138, iteration: 175043
loss: 0.9922040700912476,grad_norm: 0.9301354482600344, iteration: 175044
loss: 0.9534950852394104,grad_norm: 0.9999991791125336, iteration: 175045
loss: 0.9836142659187317,grad_norm: 0.9999989985383718, iteration: 175046
loss: 0.9955487251281738,grad_norm: 0.9873446694501506, iteration: 175047
loss: 1.0090668201446533,grad_norm: 0.9999992781235931, iteration: 175048
loss: 1.0027434825897217,grad_norm: 0.9999989325921833, iteration: 175049
loss: 0.9978622794151306,grad_norm: 0.9999991351814805, iteration: 175050
loss: 1.0069900751113892,grad_norm: 0.9266661704169734, iteration: 175051
loss: 1.017728567123413,grad_norm: 0.9999990161949985, iteration: 175052
loss: 0.9906558394432068,grad_norm: 0.9999994062003197, iteration: 175053
loss: 1.0386385917663574,grad_norm: 0.9202338294048188, iteration: 175054
loss: 1.0029605627059937,grad_norm: 0.9999992465640489, iteration: 175055
loss: 0.9959241151809692,grad_norm: 0.9999991846278051, iteration: 175056
loss: 1.0248550176620483,grad_norm: 0.9999991076696146, iteration: 175057
loss: 0.9846743941307068,grad_norm: 0.9999990632750589, iteration: 175058
loss: 0.9963359832763672,grad_norm: 0.9999990400953257, iteration: 175059
loss: 1.02848482131958,grad_norm: 0.9999991616136057, iteration: 175060
loss: 1.0576645135879517,grad_norm: 0.9999996505307451, iteration: 175061
loss: 1.0016907453536987,grad_norm: 0.9999990695877689, iteration: 175062
loss: 1.0127018690109253,grad_norm: 0.9390465926797523, iteration: 175063
loss: 0.9847195744514465,grad_norm: 0.9312318384061002, iteration: 175064
loss: 1.0288643836975098,grad_norm: 0.9479056574610253, iteration: 175065
loss: 1.027413010597229,grad_norm: 0.9671369874125223, iteration: 175066
loss: 1.0022659301757812,grad_norm: 0.860777890877182, iteration: 175067
loss: 0.9615891575813293,grad_norm: 0.9999991075889609, iteration: 175068
loss: 1.0186710357666016,grad_norm: 0.9731262986346088, iteration: 175069
loss: 1.026180386543274,grad_norm: 0.929894390284659, iteration: 175070
loss: 0.9959184527397156,grad_norm: 0.9999990394543915, iteration: 175071
loss: 0.9942652583122253,grad_norm: 0.9403236281546672, iteration: 175072
loss: 1.00441312789917,grad_norm: 0.8723115924833127, iteration: 175073
loss: 0.9708598852157593,grad_norm: 0.929254297507591, iteration: 175074
loss: 1.0261131525039673,grad_norm: 0.9999992964422231, iteration: 175075
loss: 0.9604580402374268,grad_norm: 0.9999992600942695, iteration: 175076
loss: 1.0300283432006836,grad_norm: 0.9999990642524957, iteration: 175077
loss: 1.0385074615478516,grad_norm: 0.9232477535021718, iteration: 175078
loss: 0.9758509993553162,grad_norm: 0.9999990251454544, iteration: 175079
loss: 1.0086877346038818,grad_norm: 0.9999989760496397, iteration: 175080
loss: 1.0086383819580078,grad_norm: 0.9497428189847704, iteration: 175081
loss: 0.995299220085144,grad_norm: 0.9085123163068878, iteration: 175082
loss: 1.0075956583023071,grad_norm: 0.959683587511602, iteration: 175083
loss: 0.965031087398529,grad_norm: 0.9999991686745162, iteration: 175084
loss: 1.004777193069458,grad_norm: 0.9607737826425634, iteration: 175085
loss: 1.0665172338485718,grad_norm: 0.9999998867218821, iteration: 175086
loss: 1.0120197534561157,grad_norm: 0.99999909445033, iteration: 175087
loss: 1.0053303241729736,grad_norm: 0.9999992716720084, iteration: 175088
loss: 1.0198140144348145,grad_norm: 0.999999072044225, iteration: 175089
loss: 1.0181242227554321,grad_norm: 0.9999993604983299, iteration: 175090
loss: 1.0048320293426514,grad_norm: 0.8517140102514081, iteration: 175091
loss: 1.0098322629928589,grad_norm: 0.9999991886753229, iteration: 175092
loss: 1.0224876403808594,grad_norm: 0.9078161152321992, iteration: 175093
loss: 0.9858914613723755,grad_norm: 0.9999992375406885, iteration: 175094
loss: 1.0170212984085083,grad_norm: 0.999999054142006, iteration: 175095
loss: 0.9875895977020264,grad_norm: 0.999999061964922, iteration: 175096
loss: 1.0061308145523071,grad_norm: 0.9603185957278776, iteration: 175097
loss: 1.0035901069641113,grad_norm: 0.9999991919318755, iteration: 175098
loss: 0.978638768196106,grad_norm: 0.9999990902085185, iteration: 175099
loss: 0.9896770715713501,grad_norm: 0.9999990253897508, iteration: 175100
loss: 0.9848529100418091,grad_norm: 0.999999214807487, iteration: 175101
loss: 0.9930700063705444,grad_norm: 0.9876462335460977, iteration: 175102
loss: 1.0227413177490234,grad_norm: 0.969743474234901, iteration: 175103
loss: 1.0167361497879028,grad_norm: 0.9999992388619671, iteration: 175104
loss: 1.1040760278701782,grad_norm: 0.9999999694810291, iteration: 175105
loss: 1.0118250846862793,grad_norm: 0.890635216597324, iteration: 175106
loss: 0.9764612913131714,grad_norm: 0.9999990524610023, iteration: 175107
loss: 1.0276020765304565,grad_norm: 0.9999998123379442, iteration: 175108
loss: 1.0113961696624756,grad_norm: 0.9999991530338027, iteration: 175109
loss: 1.0067979097366333,grad_norm: 0.9960765664616517, iteration: 175110
loss: 0.9911143183708191,grad_norm: 0.999998993158468, iteration: 175111
loss: 0.9811123013496399,grad_norm: 0.9999991260972092, iteration: 175112
loss: 1.0024126768112183,grad_norm: 0.8685673747822823, iteration: 175113
loss: 0.9977658987045288,grad_norm: 0.932111850866382, iteration: 175114
loss: 0.9927254915237427,grad_norm: 0.9806865742445701, iteration: 175115
loss: 1.0007431507110596,grad_norm: 0.9999992795423477, iteration: 175116
loss: 1.0301945209503174,grad_norm: 0.9157898770309836, iteration: 175117
loss: 1.0228443145751953,grad_norm: 0.9386297298080262, iteration: 175118
loss: 0.9814891219139099,grad_norm: 0.9999990590346632, iteration: 175119
loss: 1.0360596179962158,grad_norm: 0.8270271266362064, iteration: 175120
loss: 0.9522016048431396,grad_norm: 0.9824267375032013, iteration: 175121
loss: 1.0052813291549683,grad_norm: 0.871535049584388, iteration: 175122
loss: 0.9903589487075806,grad_norm: 0.9834748651718702, iteration: 175123
loss: 1.0349483489990234,grad_norm: 0.9999996705576142, iteration: 175124
loss: 1.0029973983764648,grad_norm: 0.9789066877976674, iteration: 175125
loss: 1.0169198513031006,grad_norm: 0.9999992480953092, iteration: 175126
loss: 1.0113595724105835,grad_norm: 0.8319294763945232, iteration: 175127
loss: 0.996616005897522,grad_norm: 0.958242505587326, iteration: 175128
loss: 0.969618558883667,grad_norm: 0.96114239737794, iteration: 175129
loss: 1.0142478942871094,grad_norm: 0.9136551529267317, iteration: 175130
loss: 0.9806676506996155,grad_norm: 0.8567644657357173, iteration: 175131
loss: 1.0041130781173706,grad_norm: 0.9999992243262247, iteration: 175132
loss: 0.984445333480835,grad_norm: 0.9999990368346388, iteration: 175133
loss: 0.9826403260231018,grad_norm: 0.9942867700882375, iteration: 175134
loss: 1.0158915519714355,grad_norm: 0.99999902436622, iteration: 175135
loss: 0.9677098393440247,grad_norm: 0.9999990051601915, iteration: 175136
loss: 1.0215703248977661,grad_norm: 0.9235572893554307, iteration: 175137
loss: 0.9824498891830444,grad_norm: 0.97102624961636, iteration: 175138
loss: 0.9848709106445312,grad_norm: 0.9703044831408576, iteration: 175139
loss: 0.977715015411377,grad_norm: 0.999998953524772, iteration: 175140
loss: 0.9782566428184509,grad_norm: 0.9999994111750645, iteration: 175141
loss: 1.016413927078247,grad_norm: 0.7927563602608153, iteration: 175142
loss: 0.9653247594833374,grad_norm: 0.9711625221426371, iteration: 175143
loss: 0.9794846177101135,grad_norm: 0.9999992037807736, iteration: 175144
loss: 1.0938398838043213,grad_norm: 0.9999994371503415, iteration: 175145
loss: 1.0328792333602905,grad_norm: 0.9999990184979874, iteration: 175146
loss: 1.0019580125808716,grad_norm: 0.9999989952434032, iteration: 175147
loss: 1.00033438205719,grad_norm: 0.977490115679691, iteration: 175148
loss: 1.0063207149505615,grad_norm: 0.9054131068072201, iteration: 175149
loss: 1.0018666982650757,grad_norm: 0.9999990120040614, iteration: 175150
loss: 0.9741189479827881,grad_norm: 0.9999991428621756, iteration: 175151
loss: 0.9791293144226074,grad_norm: 0.9656866017495177, iteration: 175152
loss: 1.0172076225280762,grad_norm: 0.9554556091227183, iteration: 175153
loss: 1.0342613458633423,grad_norm: 0.8842578723164958, iteration: 175154
loss: 0.9672995805740356,grad_norm: 0.8683490661806903, iteration: 175155
loss: 0.9940544366836548,grad_norm: 0.9456137034690149, iteration: 175156
loss: 1.0222898721694946,grad_norm: 0.9708569095694995, iteration: 175157
loss: 0.9888239502906799,grad_norm: 0.8863378596580241, iteration: 175158
loss: 0.963499128818512,grad_norm: 0.9999990244461064, iteration: 175159
loss: 0.9530540108680725,grad_norm: 0.8297800710732769, iteration: 175160
loss: 0.9601755738258362,grad_norm: 0.863003087800601, iteration: 175161
loss: 0.9696309566497803,grad_norm: 0.9999990012903126, iteration: 175162
loss: 1.0397475957870483,grad_norm: 0.9100098847353684, iteration: 175163
loss: 0.9954648017883301,grad_norm: 0.9999990876158588, iteration: 175164
loss: 0.9777995944023132,grad_norm: 0.9999991736548309, iteration: 175165
loss: 1.0039132833480835,grad_norm: 0.9092045263472817, iteration: 175166
loss: 0.9908196330070496,grad_norm: 0.9999990241312532, iteration: 175167
loss: 0.9815176129341125,grad_norm: 0.9080272588996595, iteration: 175168
loss: 1.0215446949005127,grad_norm: 0.8449079379380362, iteration: 175169
loss: 1.0079432725906372,grad_norm: 0.9999990554111198, iteration: 175170
loss: 1.008563756942749,grad_norm: 0.890549010842666, iteration: 175171
loss: 0.9935335516929626,grad_norm: 0.9999989423012081, iteration: 175172
loss: 1.0229432582855225,grad_norm: 0.9999991385556701, iteration: 175173
loss: 1.0310782194137573,grad_norm: 0.9999989997861187, iteration: 175174
loss: 1.0179439783096313,grad_norm: 0.8285214321809907, iteration: 175175
loss: 0.9904834032058716,grad_norm: 0.9435306260186379, iteration: 175176
loss: 0.9891412854194641,grad_norm: 0.9999991903159168, iteration: 175177
loss: 1.0101157426834106,grad_norm: 0.999999356020709, iteration: 175178
loss: 0.9941610097885132,grad_norm: 0.9999990552845728, iteration: 175179
loss: 0.988783597946167,grad_norm: 0.9737762394161319, iteration: 175180
loss: 1.0073130130767822,grad_norm: 0.9658034483919666, iteration: 175181
loss: 1.0200281143188477,grad_norm: 0.956058530046237, iteration: 175182
loss: 0.9861412644386292,grad_norm: 0.9999992259754483, iteration: 175183
loss: 1.0218253135681152,grad_norm: 0.8860810117731953, iteration: 175184
loss: 0.9732913970947266,grad_norm: 0.8182789396204556, iteration: 175185
loss: 1.04734468460083,grad_norm: 0.9999991260267305, iteration: 175186
loss: 0.9959101676940918,grad_norm: 0.9135636844889348, iteration: 175187
loss: 1.007332444190979,grad_norm: 0.9202319445608668, iteration: 175188
loss: 1.0160936117172241,grad_norm: 0.9999989776055287, iteration: 175189
loss: 1.0097793340682983,grad_norm: 0.9999992420723708, iteration: 175190
loss: 0.9955214262008667,grad_norm: 0.9177596095997805, iteration: 175191
loss: 1.0376298427581787,grad_norm: 0.8992235311098824, iteration: 175192
loss: 0.9890658259391785,grad_norm: 0.9589349087031714, iteration: 175193
loss: 1.0088365077972412,grad_norm: 0.9736471315567345, iteration: 175194
loss: 0.98291015625,grad_norm: 0.9498600060071144, iteration: 175195
loss: 1.023607850074768,grad_norm: 0.9999990916603886, iteration: 175196
loss: 0.9904288649559021,grad_norm: 0.9214393277045301, iteration: 175197
loss: 0.9900015592575073,grad_norm: 0.9500049526507979, iteration: 175198
loss: 0.9914731383323669,grad_norm: 0.9999993153902023, iteration: 175199
loss: 1.0057165622711182,grad_norm: 0.9999989938543149, iteration: 175200
loss: 1.0148909091949463,grad_norm: 0.9999991387948723, iteration: 175201
loss: 0.9710703492164612,grad_norm: 0.977726354528168, iteration: 175202
loss: 1.0101699829101562,grad_norm: 0.9999998528118964, iteration: 175203
loss: 1.0206087827682495,grad_norm: 0.9999993930374663, iteration: 175204
loss: 0.9959198832511902,grad_norm: 0.9999993047242616, iteration: 175205
loss: 1.021638035774231,grad_norm: 0.9999995300677768, iteration: 175206
loss: 1.023336410522461,grad_norm: 0.9999999786710041, iteration: 175207
loss: 1.0455187559127808,grad_norm: 0.9999991627630391, iteration: 175208
loss: 0.9880977869033813,grad_norm: 0.9999990034878503, iteration: 175209
loss: 0.9892027378082275,grad_norm: 0.9999989774752542, iteration: 175210
loss: 0.9844070672988892,grad_norm: 0.9999990722781406, iteration: 175211
loss: 1.0128328800201416,grad_norm: 0.9404277409652502, iteration: 175212
loss: 1.0151846408843994,grad_norm: 0.9066996927378214, iteration: 175213
loss: 1.0001674890518188,grad_norm: 0.8028777262234696, iteration: 175214
loss: 1.0097370147705078,grad_norm: 0.9999993080409075, iteration: 175215
loss: 1.0350052118301392,grad_norm: 0.9999990266447198, iteration: 175216
loss: 1.0069687366485596,grad_norm: 0.9999990648398308, iteration: 175217
loss: 1.0152806043624878,grad_norm: 0.8875927075338784, iteration: 175218
loss: 1.021153211593628,grad_norm: 0.9999990151058875, iteration: 175219
loss: 1.0037274360656738,grad_norm: 0.8874753038514965, iteration: 175220
loss: 0.954728364944458,grad_norm: 0.9999997519355056, iteration: 175221
loss: 1.025221824645996,grad_norm: 0.9022241828847707, iteration: 175222
loss: 0.9881231188774109,grad_norm: 0.9895524798984249, iteration: 175223
loss: 1.0097603797912598,grad_norm: 0.9999991285355121, iteration: 175224
loss: 0.9855806231498718,grad_norm: 0.9999990861160392, iteration: 175225
loss: 0.9820097088813782,grad_norm: 0.8195772865678933, iteration: 175226
loss: 1.0050053596496582,grad_norm: 0.9607258224797998, iteration: 175227
loss: 1.0194307565689087,grad_norm: 0.8377040568242631, iteration: 175228
loss: 1.0059739351272583,grad_norm: 0.9999989953743285, iteration: 175229
loss: 0.9788286089897156,grad_norm: 0.8165703433016747, iteration: 175230
loss: 1.0120054483413696,grad_norm: 0.9999990658296711, iteration: 175231
loss: 0.9733069539070129,grad_norm: 0.8418300790200811, iteration: 175232
loss: 0.9696741700172424,grad_norm: 0.9763793920484064, iteration: 175233
loss: 1.0214588642120361,grad_norm: 0.9999994374034673, iteration: 175234
loss: 1.0111206769943237,grad_norm: 0.9999990732906628, iteration: 175235
loss: 1.0199309587478638,grad_norm: 0.9999989545036173, iteration: 175236
loss: 0.9995341897010803,grad_norm: 0.9999991800651953, iteration: 175237
loss: 0.9718772172927856,grad_norm: 0.9691019353066886, iteration: 175238
loss: 0.9948853850364685,grad_norm: 0.9999991662254369, iteration: 175239
loss: 0.9821045994758606,grad_norm: 0.9999994215627137, iteration: 175240
loss: 0.9983174204826355,grad_norm: 0.8817471953477012, iteration: 175241
loss: 1.0097706317901611,grad_norm: 0.9942610947252707, iteration: 175242
loss: 1.0008010864257812,grad_norm: 0.8539399434110028, iteration: 175243
loss: 0.9979245066642761,grad_norm: 0.817767371360136, iteration: 175244
loss: 0.9688173532485962,grad_norm: 0.9999991437645444, iteration: 175245
loss: 0.9890787601470947,grad_norm: 0.9999990034498437, iteration: 175246
loss: 0.9821785092353821,grad_norm: 0.9370373055424781, iteration: 175247
loss: 1.002677083015442,grad_norm: 0.9829670402589595, iteration: 175248
loss: 0.9978557229042053,grad_norm: 0.9509672884665527, iteration: 175249
loss: 1.033351182937622,grad_norm: 0.9999993214267664, iteration: 175250
loss: 0.9837504029273987,grad_norm: 0.9999990237054815, iteration: 175251
loss: 1.0041481256484985,grad_norm: 0.8805067493673424, iteration: 175252
loss: 0.9932262301445007,grad_norm: 0.99999909448348, iteration: 175253
loss: 0.9755707383155823,grad_norm: 0.9999990374341747, iteration: 175254
loss: 1.032132863998413,grad_norm: 0.9977000740570798, iteration: 175255
loss: 1.0175509452819824,grad_norm: 0.9999990618480832, iteration: 175256
loss: 0.9977971911430359,grad_norm: 0.9999991141656072, iteration: 175257
loss: 1.0074514150619507,grad_norm: 0.9999991119917916, iteration: 175258
loss: 1.0548090934753418,grad_norm: 0.9999991049473712, iteration: 175259
loss: 0.9853973388671875,grad_norm: 0.9233693657036723, iteration: 175260
loss: 1.0038119554519653,grad_norm: 0.9999991330161621, iteration: 175261
loss: 1.0058494806289673,grad_norm: 0.942276807127032, iteration: 175262
loss: 1.0386103391647339,grad_norm: 0.8735714097395565, iteration: 175263
loss: 1.0938327312469482,grad_norm: 0.99999952122588, iteration: 175264
loss: 0.9979997277259827,grad_norm: 0.8700336940890134, iteration: 175265
loss: 1.0229772329330444,grad_norm: 0.8979141195737504, iteration: 175266
loss: 1.015222430229187,grad_norm: 0.8258487491772423, iteration: 175267
loss: 1.006615161895752,grad_norm: 0.8995709917135202, iteration: 175268
loss: 0.9784303307533264,grad_norm: 0.9202018472407792, iteration: 175269
loss: 1.0086712837219238,grad_norm: 0.8348353368246119, iteration: 175270
loss: 0.9756186604499817,grad_norm: 0.9855323940763111, iteration: 175271
loss: 0.9995352029800415,grad_norm: 0.999999919130435, iteration: 175272
loss: 0.9797136783599854,grad_norm: 0.999999139672778, iteration: 175273
loss: 0.9794428944587708,grad_norm: 0.999999066571665, iteration: 175274
loss: 1.0325583219528198,grad_norm: 0.9999990412673975, iteration: 175275
loss: 0.9757158160209656,grad_norm: 0.9999988905567332, iteration: 175276
loss: 1.0277458429336548,grad_norm: 0.9999996703476475, iteration: 175277
loss: 0.9827061891555786,grad_norm: 0.8700598901304332, iteration: 175278
loss: 0.9850592613220215,grad_norm: 0.9999990135523233, iteration: 175279
loss: 0.9765385389328003,grad_norm: 0.9999993075403489, iteration: 175280
loss: 0.9876283407211304,grad_norm: 0.999999137985334, iteration: 175281
loss: 0.9650148749351501,grad_norm: 0.8276507154709606, iteration: 175282
loss: 1.0178470611572266,grad_norm: 0.9404325881191083, iteration: 175283
loss: 0.9666399359703064,grad_norm: 0.9667098486051056, iteration: 175284
loss: 1.0402220487594604,grad_norm: 0.9999991597424309, iteration: 175285
loss: 0.9972921013832092,grad_norm: 0.8218991895823294, iteration: 175286
loss: 0.9835423827171326,grad_norm: 0.9089907925742339, iteration: 175287
loss: 0.9803487062454224,grad_norm: 0.999999232530474, iteration: 175288
loss: 1.0231845378875732,grad_norm: 0.9320873566751604, iteration: 175289
loss: 0.980675995349884,grad_norm: 0.9302020862746089, iteration: 175290
loss: 1.0041990280151367,grad_norm: 0.9999992009413379, iteration: 175291
loss: 0.9985569715499878,grad_norm: 0.9999991765746202, iteration: 175292
loss: 1.001315951347351,grad_norm: 0.9999991250543622, iteration: 175293
loss: 1.0318299531936646,grad_norm: 0.9999989590503892, iteration: 175294
loss: 0.9785301685333252,grad_norm: 0.952198473823805, iteration: 175295
loss: 1.0336129665374756,grad_norm: 0.9426668498402194, iteration: 175296
loss: 1.0263087749481201,grad_norm: 0.9999992175459446, iteration: 175297
loss: 0.99737149477005,grad_norm: 0.9960641109041224, iteration: 175298
loss: 1.0272327661514282,grad_norm: 0.9999991220854836, iteration: 175299
loss: 1.0047022104263306,grad_norm: 0.9999990563045406, iteration: 175300
loss: 0.9899094104766846,grad_norm: 0.9999992075440726, iteration: 175301
loss: 1.0017781257629395,grad_norm: 0.9999992043309353, iteration: 175302
loss: 1.005776286125183,grad_norm: 0.9118320728477285, iteration: 175303
loss: 1.0097938776016235,grad_norm: 0.9272006367949523, iteration: 175304
loss: 0.9901970624923706,grad_norm: 0.9241327802687472, iteration: 175305
loss: 0.9884558916091919,grad_norm: 0.8440402173211905, iteration: 175306
loss: 1.0229130983352661,grad_norm: 0.9999988755163571, iteration: 175307
loss: 0.9839676022529602,grad_norm: 0.9631256625724057, iteration: 175308
loss: 0.9997382760047913,grad_norm: 0.8780170843596278, iteration: 175309
loss: 0.999722421169281,grad_norm: 0.9593148733195469, iteration: 175310
loss: 1.0318444967269897,grad_norm: 0.7440156629781017, iteration: 175311
loss: 0.9919628500938416,grad_norm: 0.9300232197068157, iteration: 175312
loss: 0.9910524487495422,grad_norm: 0.9038670591361408, iteration: 175313
loss: 1.032889723777771,grad_norm: 0.8786519672093749, iteration: 175314
loss: 1.008656620979309,grad_norm: 0.9999992342121573, iteration: 175315
loss: 1.0131562948226929,grad_norm: 0.9999991752296291, iteration: 175316
loss: 0.9971327185630798,grad_norm: 0.9999992603649865, iteration: 175317
loss: 1.033983588218689,grad_norm: 0.9999991104991394, iteration: 175318
loss: 1.0150983333587646,grad_norm: 0.9999990139719003, iteration: 175319
loss: 1.0165938138961792,grad_norm: 0.9999992849236324, iteration: 175320
loss: 0.9682588577270508,grad_norm: 0.99999899244798, iteration: 175321
loss: 0.9528163075447083,grad_norm: 0.8846511032235864, iteration: 175322
loss: 0.965518057346344,grad_norm: 0.8900677758374718, iteration: 175323
loss: 1.0661262273788452,grad_norm: 0.9999991119540094, iteration: 175324
loss: 0.9708119630813599,grad_norm: 0.8693741388218215, iteration: 175325
loss: 0.9937381744384766,grad_norm: 0.9999991581601274, iteration: 175326
loss: 1.0253572463989258,grad_norm: 0.9999990624539399, iteration: 175327
loss: 0.9751766324043274,grad_norm: 0.9999990245389332, iteration: 175328
loss: 1.007070779800415,grad_norm: 0.9999991366215835, iteration: 175329
loss: 0.979181170463562,grad_norm: 0.9999990356980007, iteration: 175330
loss: 1.0010136365890503,grad_norm: 0.9999992070286842, iteration: 175331
loss: 1.0041320323944092,grad_norm: 0.9999993508963213, iteration: 175332
loss: 0.9772103428840637,grad_norm: 0.8368731441861186, iteration: 175333
loss: 0.9989171624183655,grad_norm: 0.9999991434003771, iteration: 175334
loss: 1.0142706632614136,grad_norm: 0.9999991701047779, iteration: 175335
loss: 1.0016357898712158,grad_norm: 0.9999992340336613, iteration: 175336
loss: 0.9985924959182739,grad_norm: 0.9999993662502924, iteration: 175337
loss: 0.9815423488616943,grad_norm: 0.999999156436983, iteration: 175338
loss: 0.9832009077072144,grad_norm: 0.9999991133965321, iteration: 175339
loss: 0.9961212873458862,grad_norm: 0.9252220686606579, iteration: 175340
loss: 1.030639886856079,grad_norm: 0.9999991447533251, iteration: 175341
loss: 1.0415912866592407,grad_norm: 0.9304544823555075, iteration: 175342
loss: 1.0082188844680786,grad_norm: 0.9999990061188, iteration: 175343
loss: 1.005949854850769,grad_norm: 0.9999992077334757, iteration: 175344
loss: 0.9884563088417053,grad_norm: 0.8615920561751536, iteration: 175345
loss: 0.9925763010978699,grad_norm: 0.9999990016132791, iteration: 175346
loss: 1.004064917564392,grad_norm: 0.9507770889294108, iteration: 175347
loss: 0.9798941016197205,grad_norm: 0.9999990261438317, iteration: 175348
loss: 1.0261937379837036,grad_norm: 0.9999991271908285, iteration: 175349
loss: 1.0079504251480103,grad_norm: 0.9180295533690054, iteration: 175350
loss: 0.9498462677001953,grad_norm: 0.8864887700700922, iteration: 175351
loss: 0.9561334252357483,grad_norm: 0.9705698043008086, iteration: 175352
loss: 1.0003010034561157,grad_norm: 0.7864833493920866, iteration: 175353
loss: 1.0058680772781372,grad_norm: 0.999999187478644, iteration: 175354
loss: 0.9996896982192993,grad_norm: 0.9999990344421288, iteration: 175355
loss: 1.030670166015625,grad_norm: 0.9999989967509233, iteration: 175356
loss: 0.9946444630622864,grad_norm: 0.975589409985096, iteration: 175357
loss: 1.006428599357605,grad_norm: 0.999999257572863, iteration: 175358
loss: 0.9880791306495667,grad_norm: 0.7981919550190398, iteration: 175359
loss: 1.033039927482605,grad_norm: 0.9999994213967441, iteration: 175360
loss: 1.0112742185592651,grad_norm: 0.9932892326537768, iteration: 175361
loss: 1.048388123512268,grad_norm: 0.9663123500746049, iteration: 175362
loss: 0.992578387260437,grad_norm: 0.9612951022831777, iteration: 175363
loss: 0.9962178468704224,grad_norm: 0.9799503615235808, iteration: 175364
loss: 0.960361897945404,grad_norm: 0.9575018140820583, iteration: 175365
loss: 1.0093532800674438,grad_norm: 0.9855120266164724, iteration: 175366
loss: 1.044175624847412,grad_norm: 0.9999997000790417, iteration: 175367
loss: 1.014938473701477,grad_norm: 0.8870865086950193, iteration: 175368
loss: 0.9970396757125854,grad_norm: 0.8277320970862513, iteration: 175369
loss: 0.9968833923339844,grad_norm: 0.9999991606225681, iteration: 175370
loss: 0.9887344241142273,grad_norm: 0.9999991204452586, iteration: 175371
loss: 1.018656849861145,grad_norm: 0.9999990924986594, iteration: 175372
loss: 1.0072044134140015,grad_norm: 0.9999993064779863, iteration: 175373
loss: 1.0108517408370972,grad_norm: 0.8783495904899546, iteration: 175374
loss: 0.9732844829559326,grad_norm: 0.99999911334115, iteration: 175375
loss: 1.0264995098114014,grad_norm: 0.9999991742448221, iteration: 175376
loss: 1.025383710861206,grad_norm: 0.9155067022509703, iteration: 175377
loss: 0.9987466335296631,grad_norm: 0.9424371212001472, iteration: 175378
loss: 1.0055701732635498,grad_norm: 0.9999991857412734, iteration: 175379
loss: 0.9916713237762451,grad_norm: 0.9999991364077967, iteration: 175380
loss: 0.9621171355247498,grad_norm: 0.9180738621421508, iteration: 175381
loss: 0.9793713092803955,grad_norm: 0.8751326658646039, iteration: 175382
loss: 1.0201241970062256,grad_norm: 0.9613185209259288, iteration: 175383
loss: 0.9719039797782898,grad_norm: 0.9396051549945382, iteration: 175384
loss: 1.0158727169036865,grad_norm: 0.9999989763085675, iteration: 175385
loss: 1.103553056716919,grad_norm: 0.9999992447283593, iteration: 175386
loss: 1.0008316040039062,grad_norm: 0.9999990433055639, iteration: 175387
loss: 1.0139344930648804,grad_norm: 0.9999990845742613, iteration: 175388
loss: 0.9932305216789246,grad_norm: 0.8838660343120507, iteration: 175389
loss: 0.9959922432899475,grad_norm: 0.9999991794636592, iteration: 175390
loss: 0.9715672731399536,grad_norm: 0.9999991779823895, iteration: 175391
loss: 0.9965380430221558,grad_norm: 0.9999992504202789, iteration: 175392
loss: 1.011436104774475,grad_norm: 0.9835539510235168, iteration: 175393
loss: 0.9869029521942139,grad_norm: 0.8436561946246826, iteration: 175394
loss: 1.0315418243408203,grad_norm: 0.9999993557643664, iteration: 175395
loss: 1.024087905883789,grad_norm: 0.9999992996028269, iteration: 175396
loss: 0.9743112325668335,grad_norm: 0.9999990982577348, iteration: 175397
loss: 1.0019561052322388,grad_norm: 0.9997483260462716, iteration: 175398
loss: 0.9921818971633911,grad_norm: 0.9999991577205454, iteration: 175399
loss: 0.9841100573539734,grad_norm: 0.8470058574513323, iteration: 175400
loss: 1.0186117887496948,grad_norm: 0.9999989924349271, iteration: 175401
loss: 1.0061254501342773,grad_norm: 0.999999171610757, iteration: 175402
loss: 0.9776278138160706,grad_norm: 0.9839329321932162, iteration: 175403
loss: 1.005936861038208,grad_norm: 0.9999991147187204, iteration: 175404
loss: 0.9914382100105286,grad_norm: 0.9999991782005708, iteration: 175405
loss: 0.9883912205696106,grad_norm: 0.8230345790045052, iteration: 175406
loss: 0.9987491965293884,grad_norm: 0.9742789505416818, iteration: 175407
loss: 0.985830545425415,grad_norm: 0.9999990889781545, iteration: 175408
loss: 0.9771021008491516,grad_norm: 0.9999991120396057, iteration: 175409
loss: 0.9633200764656067,grad_norm: 0.9999991889062761, iteration: 175410
loss: 0.9900181293487549,grad_norm: 0.999999174011207, iteration: 175411
loss: 0.9857397675514221,grad_norm: 0.9999991053752652, iteration: 175412
loss: 1.0266027450561523,grad_norm: 0.9999992748297015, iteration: 175413
loss: 0.9997739195823669,grad_norm: 0.939076494542869, iteration: 175414
loss: 0.9809747338294983,grad_norm: 0.9999990850897117, iteration: 175415
loss: 0.9739711880683899,grad_norm: 0.9265224368089386, iteration: 175416
loss: 0.9974228739738464,grad_norm: 0.9237499431184027, iteration: 175417
loss: 1.004332184791565,grad_norm: 0.9670914204083512, iteration: 175418
loss: 0.9950112700462341,grad_norm: 0.9999990226738164, iteration: 175419
loss: 1.033869743347168,grad_norm: 0.9585279281986535, iteration: 175420
loss: 0.9790871739387512,grad_norm: 0.9728040514092374, iteration: 175421
loss: 1.0106284618377686,grad_norm: 0.8628588865229595, iteration: 175422
loss: 1.0178451538085938,grad_norm: 0.9836103672354933, iteration: 175423
loss: 0.9927392601966858,grad_norm: 0.9999992678011796, iteration: 175424
loss: 0.9861016273498535,grad_norm: 0.99999900056594, iteration: 175425
loss: 0.9785308837890625,grad_norm: 0.9999989997260286, iteration: 175426
loss: 1.0331346988677979,grad_norm: 0.9999991904633627, iteration: 175427
loss: 0.9960731267929077,grad_norm: 0.9999991580670815, iteration: 175428
loss: 1.0059758424758911,grad_norm: 0.9513952971148948, iteration: 175429
loss: 0.9911118149757385,grad_norm: 0.8828627743802184, iteration: 175430
loss: 1.0049381256103516,grad_norm: 0.9999998585316296, iteration: 175431
loss: 0.970597505569458,grad_norm: 0.99999935320823, iteration: 175432
loss: 0.9621181488037109,grad_norm: 0.8512362224452857, iteration: 175433
loss: 0.9964625835418701,grad_norm: 0.9287947128945914, iteration: 175434
loss: 0.962376058101654,grad_norm: 0.9999989400476491, iteration: 175435
loss: 1.007128357887268,grad_norm: 0.8669826072606454, iteration: 175436
loss: 1.0325019359588623,grad_norm: 0.91569404303974, iteration: 175437
loss: 0.998860776424408,grad_norm: 0.8186511445464293, iteration: 175438
loss: 0.9981194734573364,grad_norm: 0.8720729537726309, iteration: 175439
loss: 0.9695814847946167,grad_norm: 0.9999991157410866, iteration: 175440
loss: 0.9465994238853455,grad_norm: 0.9991696752223591, iteration: 175441
loss: 1.0218268632888794,grad_norm: 0.9760521051806692, iteration: 175442
loss: 0.950856626033783,grad_norm: 0.999999045750472, iteration: 175443
loss: 0.9873837828636169,grad_norm: 0.9999990696569916, iteration: 175444
loss: 1.0061275959014893,grad_norm: 0.7681362571705153, iteration: 175445
loss: 1.0219076871871948,grad_norm: 0.9076160267803298, iteration: 175446
loss: 0.9640988707542419,grad_norm: 0.889725444568256, iteration: 175447
loss: 0.9957850575447083,grad_norm: 0.9610406829315294, iteration: 175448
loss: 1.0177081823349,grad_norm: 0.999999201987708, iteration: 175449
loss: 1.0222949981689453,grad_norm: 0.7972087150939141, iteration: 175450
loss: 1.0387990474700928,grad_norm: 0.999999294632606, iteration: 175451
loss: 0.9702072739601135,grad_norm: 0.8435771526801917, iteration: 175452
loss: 1.0993760824203491,grad_norm: 0.9999991695919199, iteration: 175453
loss: 1.007538914680481,grad_norm: 0.9999995965892703, iteration: 175454
loss: 1.0184929370880127,grad_norm: 0.9873620829905805, iteration: 175455
loss: 0.9764213562011719,grad_norm: 0.8735097672630434, iteration: 175456
loss: 0.9881556630134583,grad_norm: 0.9999989834135217, iteration: 175457
loss: 1.0162379741668701,grad_norm: 0.9999990673255026, iteration: 175458
loss: 0.962028443813324,grad_norm: 0.8809364493617831, iteration: 175459
loss: 0.9973517060279846,grad_norm: 0.8161033014007307, iteration: 175460
loss: 0.9695008993148804,grad_norm: 0.9999991678505178, iteration: 175461
loss: 1.063534140586853,grad_norm: 0.9999994596547157, iteration: 175462
loss: 0.9847274422645569,grad_norm: 0.9999991063781295, iteration: 175463
loss: 1.0475517511367798,grad_norm: 0.9999991387046453, iteration: 175464
loss: 1.00063955783844,grad_norm: 0.8384776127380361, iteration: 175465
loss: 0.9735601544380188,grad_norm: 0.8923165048307526, iteration: 175466
loss: 1.0012823343276978,grad_norm: 0.9999992368973775, iteration: 175467
loss: 0.9716612100601196,grad_norm: 0.8557792305739192, iteration: 175468
loss: 0.981854259967804,grad_norm: 0.999999095553027, iteration: 175469
loss: 0.9734677076339722,grad_norm: 0.9999989840072991, iteration: 175470
loss: 0.9917773604393005,grad_norm: 0.8829618693088825, iteration: 175471
loss: 1.0480130910873413,grad_norm: 0.9999990840739933, iteration: 175472
loss: 1.0169994831085205,grad_norm: 0.8945714729758286, iteration: 175473
loss: 1.0139878988265991,grad_norm: 0.9999990878643982, iteration: 175474
loss: 1.0167129039764404,grad_norm: 0.9082632625087632, iteration: 175475
loss: 0.977734386920929,grad_norm: 0.9999991532495672, iteration: 175476
loss: 0.97913658618927,grad_norm: 0.8732071948710047, iteration: 175477
loss: 1.0396164655685425,grad_norm: 0.9999989981861008, iteration: 175478
loss: 0.9947677850723267,grad_norm: 0.8821441709263587, iteration: 175479
loss: 1.002414345741272,grad_norm: 0.9113710581470391, iteration: 175480
loss: 0.9991133213043213,grad_norm: 0.9723454685618623, iteration: 175481
loss: 0.9972038269042969,grad_norm: 0.8032639269413189, iteration: 175482
loss: 1.017410397529602,grad_norm: 0.7996443535443647, iteration: 175483
loss: 1.0290099382400513,grad_norm: 0.9999992388700836, iteration: 175484
loss: 0.9907740950584412,grad_norm: 0.9999991374603284, iteration: 175485
loss: 0.9815440773963928,grad_norm: 0.9999991607230978, iteration: 175486
loss: 0.9690129160881042,grad_norm: 0.9101886431655596, iteration: 175487
loss: 1.0516455173492432,grad_norm: 0.9380461714160231, iteration: 175488
loss: 0.9877780079841614,grad_norm: 0.9999991765929143, iteration: 175489
loss: 0.9479602575302124,grad_norm: 0.9890445245529957, iteration: 175490
loss: 0.9620822072029114,grad_norm: 0.8702385564549844, iteration: 175491
loss: 0.9795032143592834,grad_norm: 0.9553563448809097, iteration: 175492
loss: 1.0017404556274414,grad_norm: 0.9999990181791697, iteration: 175493
loss: 1.0023692846298218,grad_norm: 0.9999991577566621, iteration: 175494
loss: 0.9933609962463379,grad_norm: 0.932866832513992, iteration: 175495
loss: 1.0293233394622803,grad_norm: 0.8581957910312552, iteration: 175496
loss: 1.0559682846069336,grad_norm: 0.968523716000512, iteration: 175497
loss: 1.0443814992904663,grad_norm: 0.9411632361381282, iteration: 175498
loss: 0.9892374277114868,grad_norm: 0.9999989929388622, iteration: 175499
loss: 1.0412888526916504,grad_norm: 0.9999992914740123, iteration: 175500
loss: 0.971750020980835,grad_norm: 0.9999990529582697, iteration: 175501
loss: 0.9684658646583557,grad_norm: 0.9576209330367601, iteration: 175502
loss: 1.0145024061203003,grad_norm: 0.9846919293129375, iteration: 175503
loss: 0.9715140461921692,grad_norm: 0.9999991274110805, iteration: 175504
loss: 1.1172343492507935,grad_norm: 0.9999998584903229, iteration: 175505
loss: 0.974316418170929,grad_norm: 0.817608627591393, iteration: 175506
loss: 0.9878377318382263,grad_norm: 0.9570343930452127, iteration: 175507
loss: 1.0054038763046265,grad_norm: 0.9999999586838824, iteration: 175508
loss: 1.0332363843917847,grad_norm: 0.9656833884217484, iteration: 175509
loss: 0.9804768562316895,grad_norm: 0.8631142298046798, iteration: 175510
loss: 1.007293939590454,grad_norm: 0.8999112484437207, iteration: 175511
loss: 1.0348100662231445,grad_norm: 0.9999995942128354, iteration: 175512
loss: 1.0141545534133911,grad_norm: 0.9999994124501488, iteration: 175513
loss: 0.9836274981498718,grad_norm: 0.8851765833265983, iteration: 175514
loss: 1.0113379955291748,grad_norm: 0.8962205167679631, iteration: 175515
loss: 0.9855912327766418,grad_norm: 0.8452424686476582, iteration: 175516
loss: 0.9941595792770386,grad_norm: 0.999999126371544, iteration: 175517
loss: 0.9822364449501038,grad_norm: 0.8597165828363625, iteration: 175518
loss: 1.0001410245895386,grad_norm: 0.9999992232887442, iteration: 175519
loss: 0.9716787934303284,grad_norm: 0.9999990837270822, iteration: 175520
loss: 0.9863264560699463,grad_norm: 0.9999989721358697, iteration: 175521
loss: 0.9902092218399048,grad_norm: 0.9468636132635622, iteration: 175522
loss: 0.9825114607810974,grad_norm: 0.9999989805150852, iteration: 175523
loss: 0.9524040818214417,grad_norm: 0.999999230944647, iteration: 175524
loss: 0.9840459823608398,grad_norm: 0.9999989536017696, iteration: 175525
loss: 0.9954175353050232,grad_norm: 0.9999991082512215, iteration: 175526
loss: 0.9945498108863831,grad_norm: 0.8642440891746777, iteration: 175527
loss: 0.9732568860054016,grad_norm: 0.9998644510284127, iteration: 175528
loss: 1.033803105354309,grad_norm: 0.8502460094015324, iteration: 175529
loss: 0.9950549006462097,grad_norm: 0.9069521789266621, iteration: 175530
loss: 0.977709949016571,grad_norm: 0.9499503460150942, iteration: 175531
loss: 0.9661749601364136,grad_norm: 0.9999991838896102, iteration: 175532
loss: 1.0778157711029053,grad_norm: 0.9999994570836581, iteration: 175533
loss: 0.990484893321991,grad_norm: 0.9999990968214424, iteration: 175534
loss: 1.089550256729126,grad_norm: 0.9999993025541358, iteration: 175535
loss: 1.0329002141952515,grad_norm: 0.9999999515416261, iteration: 175536
loss: 0.9929048418998718,grad_norm: 0.9999992352289501, iteration: 175537
loss: 1.0335363149642944,grad_norm: 0.9180875308187606, iteration: 175538
loss: 1.0198503732681274,grad_norm: 0.9999992217781769, iteration: 175539
loss: 1.0053002834320068,grad_norm: 0.9422752219424834, iteration: 175540
loss: 1.0276514291763306,grad_norm: 0.9999989889181328, iteration: 175541
loss: 0.9831438660621643,grad_norm: 0.9069764890552472, iteration: 175542
loss: 0.9899888038635254,grad_norm: 0.8566622357048536, iteration: 175543
loss: 0.975957989692688,grad_norm: 0.9519136865325888, iteration: 175544
loss: 1.02711021900177,grad_norm: 0.9920305917791852, iteration: 175545
loss: 1.0023081302642822,grad_norm: 0.999999263915911, iteration: 175546
loss: 0.9758055210113525,grad_norm: 0.9999991337972681, iteration: 175547
loss: 0.9958887696266174,grad_norm: 0.9999991109024805, iteration: 175548
loss: 1.0084491968154907,grad_norm: 0.9644400731140524, iteration: 175549
loss: 1.0435082912445068,grad_norm: 0.9999990879982339, iteration: 175550
loss: 1.0249552726745605,grad_norm: 0.8999712336322984, iteration: 175551
loss: 0.9441248774528503,grad_norm: 0.9999990725734386, iteration: 175552
loss: 0.999002993106842,grad_norm: 0.9382868977410175, iteration: 175553
loss: 0.9984937906265259,grad_norm: 0.9999992510784402, iteration: 175554
loss: 0.9694279432296753,grad_norm: 0.999999255629097, iteration: 175555
loss: 0.9929795861244202,grad_norm: 0.9563412627415468, iteration: 175556
loss: 1.020784854888916,grad_norm: 0.9690347282977663, iteration: 175557
loss: 1.0052270889282227,grad_norm: 0.9999993034751007, iteration: 175558
loss: 1.0009921789169312,grad_norm: 0.8255159963700865, iteration: 175559
loss: 1.0030335187911987,grad_norm: 0.8427187640430963, iteration: 175560
loss: 0.9545781016349792,grad_norm: 0.9999991327965194, iteration: 175561
loss: 1.0168392658233643,grad_norm: 0.8578144336977164, iteration: 175562
loss: 0.9996733665466309,grad_norm: 0.9999991710737222, iteration: 175563
loss: 1.022271990776062,grad_norm: 0.8268770381039252, iteration: 175564
loss: 1.0155010223388672,grad_norm: 0.9714869594300983, iteration: 175565
loss: 0.9899994730949402,grad_norm: 0.9492662041240827, iteration: 175566
loss: 0.9940563440322876,grad_norm: 0.9999990545588059, iteration: 175567
loss: 1.0021196603775024,grad_norm: 0.9999990819808372, iteration: 175568
loss: 0.9920322895050049,grad_norm: 0.9906847895949111, iteration: 175569
loss: 0.9856643676757812,grad_norm: 0.999999106800439, iteration: 175570
loss: 1.0098867416381836,grad_norm: 0.9999991295196363, iteration: 175571
loss: 1.0280512571334839,grad_norm: 0.999999363767619, iteration: 175572
loss: 1.0210001468658447,grad_norm: 0.9513990961150961, iteration: 175573
loss: 0.994147777557373,grad_norm: 0.8774679384153794, iteration: 175574
loss: 0.9925955533981323,grad_norm: 0.8472917749155046, iteration: 175575
loss: 0.978871762752533,grad_norm: 0.9999992277168193, iteration: 175576
loss: 0.9853156805038452,grad_norm: 0.9654526646458943, iteration: 175577
loss: 0.9863969087600708,grad_norm: 0.9999991541152251, iteration: 175578
loss: 1.0304640531539917,grad_norm: 0.9999992382582632, iteration: 175579
loss: 1.0062949657440186,grad_norm: 0.908457135544675, iteration: 175580
loss: 1.0057427883148193,grad_norm: 0.9999991841164888, iteration: 175581
loss: 1.0426229238510132,grad_norm: 0.9999990986622108, iteration: 175582
loss: 0.9945070743560791,grad_norm: 0.999999663411747, iteration: 175583
loss: 0.9858397841453552,grad_norm: 0.9599872287343136, iteration: 175584
loss: 0.9821078181266785,grad_norm: 0.9551029660400034, iteration: 175585
loss: 1.0207091569900513,grad_norm: 0.8810123466084399, iteration: 175586
loss: 1.0010963678359985,grad_norm: 0.8979596414313895, iteration: 175587
loss: 0.9972661733627319,grad_norm: 0.9999990263847576, iteration: 175588
loss: 0.9976210594177246,grad_norm: 0.8865184345141609, iteration: 175589
loss: 1.0759226083755493,grad_norm: 0.9999996014149803, iteration: 175590
loss: 0.9568172693252563,grad_norm: 0.8248863107095887, iteration: 175591
loss: 0.9969683885574341,grad_norm: 0.9999988953903076, iteration: 175592
loss: 0.9755088090896606,grad_norm: 0.9999991611179186, iteration: 175593
loss: 1.0086250305175781,grad_norm: 0.8778671125103745, iteration: 175594
loss: 0.9912770390510559,grad_norm: 0.9999991548136661, iteration: 175595
loss: 1.013822078704834,grad_norm: 0.9999991924063265, iteration: 175596
loss: 1.0043948888778687,grad_norm: 0.7601635654001815, iteration: 175597
loss: 1.003380298614502,grad_norm: 0.9586805458205535, iteration: 175598
loss: 0.984234094619751,grad_norm: 0.9241985655069538, iteration: 175599
loss: 1.0207427740097046,grad_norm: 0.9999991096707043, iteration: 175600
loss: 1.0200978517532349,grad_norm: 0.9841121578637934, iteration: 175601
loss: 1.0155365467071533,grad_norm: 0.9999991720645152, iteration: 175602
loss: 1.0098674297332764,grad_norm: 0.8738139892077351, iteration: 175603
loss: 0.9642335176467896,grad_norm: 0.8257618271137205, iteration: 175604
loss: 1.0021299123764038,grad_norm: 0.9999992462048634, iteration: 175605
loss: 0.9877830743789673,grad_norm: 0.9999991466049333, iteration: 175606
loss: 1.023978590965271,grad_norm: 0.9976892794063514, iteration: 175607
loss: 0.9512115120887756,grad_norm: 0.8825445593150947, iteration: 175608
loss: 1.0054068565368652,grad_norm: 0.8811306503767066, iteration: 175609
loss: 1.01483154296875,grad_norm: 0.9999992441591201, iteration: 175610
loss: 0.9729778170585632,grad_norm: 0.99999894526352, iteration: 175611
loss: 0.9644286036491394,grad_norm: 0.9999992063512738, iteration: 175612
loss: 1.0168445110321045,grad_norm: 0.9955250179384122, iteration: 175613
loss: 0.9983938336372375,grad_norm: 0.9016105490242622, iteration: 175614
loss: 0.9761625528335571,grad_norm: 0.8567564054633015, iteration: 175615
loss: 0.9809170365333557,grad_norm: 0.8994519799904563, iteration: 175616
loss: 1.019681692123413,grad_norm: 0.9999990866958713, iteration: 175617
loss: 1.0316957235336304,grad_norm: 0.9999990699080514, iteration: 175618
loss: 1.022594690322876,grad_norm: 0.9999989405852883, iteration: 175619
loss: 0.9916954040527344,grad_norm: 0.9293070229712483, iteration: 175620
loss: 0.9666123390197754,grad_norm: 0.8412890032331155, iteration: 175621
loss: 1.024369478225708,grad_norm: 0.9102377542373982, iteration: 175622
loss: 0.986312210559845,grad_norm: 0.9999991113262424, iteration: 175623
loss: 0.9975231885910034,grad_norm: 0.9999990406118201, iteration: 175624
loss: 1.0117106437683105,grad_norm: 0.9999991636907868, iteration: 175625
loss: 1.0197322368621826,grad_norm: 0.9999992168789534, iteration: 175626
loss: 1.0750447511672974,grad_norm: 0.9999998482240166, iteration: 175627
loss: 1.0053379535675049,grad_norm: 0.9999990840113294, iteration: 175628
loss: 1.0286016464233398,grad_norm: 0.9999989573433058, iteration: 175629
loss: 0.994552493095398,grad_norm: 0.8430106168824301, iteration: 175630
loss: 1.0179812908172607,grad_norm: 0.9999990988497389, iteration: 175631
loss: 1.0184332132339478,grad_norm: 0.7953351509546015, iteration: 175632
loss: 0.9952234625816345,grad_norm: 0.9999990264236471, iteration: 175633
loss: 1.01718270778656,grad_norm: 0.918825650947354, iteration: 175634
loss: 1.0210829973220825,grad_norm: 0.9763832987355409, iteration: 175635
loss: 0.9664950966835022,grad_norm: 0.9999991381450777, iteration: 175636
loss: 1.1832720041275024,grad_norm: 0.9999994512174332, iteration: 175637
loss: 1.039574384689331,grad_norm: 0.8376226719582955, iteration: 175638
loss: 1.1638695001602173,grad_norm: 0.9999999077678922, iteration: 175639
loss: 0.9960473775863647,grad_norm: 0.9155304606068353, iteration: 175640
loss: 1.0256844758987427,grad_norm: 0.9933291117802584, iteration: 175641
loss: 0.968750536441803,grad_norm: 0.999999090820978, iteration: 175642
loss: 0.9592176079750061,grad_norm: 0.9999991030008837, iteration: 175643
loss: 1.0301451683044434,grad_norm: 0.9999990584538596, iteration: 175644
loss: 1.015163540840149,grad_norm: 0.9946723855173424, iteration: 175645
loss: 0.9876074194908142,grad_norm: 0.9845297588184432, iteration: 175646
loss: 1.015806794166565,grad_norm: 0.9999996923271585, iteration: 175647
loss: 1.0152919292449951,grad_norm: 0.8270216392889659, iteration: 175648
loss: 1.060987949371338,grad_norm: 0.9999991591607842, iteration: 175649
loss: 0.9978325366973877,grad_norm: 0.9404219894441391, iteration: 175650
loss: 0.9797514081001282,grad_norm: 0.9774140008857068, iteration: 175651
loss: 1.0740761756896973,grad_norm: 0.9710671749011887, iteration: 175652
loss: 0.9946357607841492,grad_norm: 0.9999992522812817, iteration: 175653
loss: 0.9947013854980469,grad_norm: 0.9999989564206286, iteration: 175654
loss: 0.986976683139801,grad_norm: 0.9999997183247605, iteration: 175655
loss: 0.9845383763313293,grad_norm: 0.9683469459387327, iteration: 175656
loss: 0.9894972443580627,grad_norm: 0.9999995171560385, iteration: 175657
loss: 1.4274510145187378,grad_norm: 0.9999998883991129, iteration: 175658
loss: 1.0144926309585571,grad_norm: 0.9999991589084374, iteration: 175659
loss: 0.9909769892692566,grad_norm: 0.9431060784249121, iteration: 175660
loss: 0.9902570247650146,grad_norm: 0.9999990028971357, iteration: 175661
loss: 1.0017977952957153,grad_norm: 0.9394144555819041, iteration: 175662
loss: 0.9945935010910034,grad_norm: 0.999999010827566, iteration: 175663
loss: 1.0045006275177002,grad_norm: 0.9155655952889884, iteration: 175664
loss: 0.9663757681846619,grad_norm: 0.747419584820968, iteration: 175665
loss: 1.0020848512649536,grad_norm: 0.9402659141968617, iteration: 175666
loss: 1.01340651512146,grad_norm: 0.890970947475214, iteration: 175667
loss: 1.0386797189712524,grad_norm: 0.8653478938687417, iteration: 175668
loss: 0.9613045454025269,grad_norm: 0.9999990587262162, iteration: 175669
loss: 0.9880192279815674,grad_norm: 0.8736132478925978, iteration: 175670
loss: 0.948955237865448,grad_norm: 0.9999991621449523, iteration: 175671
loss: 1.0135912895202637,grad_norm: 0.9999990668976999, iteration: 175672
loss: 1.0001322031021118,grad_norm: 0.9999991234597583, iteration: 175673
loss: 0.9862509369850159,grad_norm: 0.9999990243743208, iteration: 175674
loss: 0.9980758428573608,grad_norm: 0.9999991136815912, iteration: 175675
loss: 1.0082106590270996,grad_norm: 0.9142849678617005, iteration: 175676
loss: 0.959424614906311,grad_norm: 0.9999993265019645, iteration: 175677
loss: 1.002875566482544,grad_norm: 0.9912045356747334, iteration: 175678
loss: 0.9733181595802307,grad_norm: 0.9096069103428761, iteration: 175679
loss: 0.9688202142715454,grad_norm: 0.9271637160097317, iteration: 175680
loss: 0.9856123328208923,grad_norm: 0.9999991466438931, iteration: 175681
loss: 1.023292899131775,grad_norm: 0.9999992766517893, iteration: 175682
loss: 0.9880655407905579,grad_norm: 0.8939538102473353, iteration: 175683
loss: 0.9884373545646667,grad_norm: 0.9999991103715902, iteration: 175684
loss: 1.0492805242538452,grad_norm: 0.9999992795778969, iteration: 175685
loss: 1.0102299451828003,grad_norm: 0.9999992700001618, iteration: 175686
loss: 0.9900469779968262,grad_norm: 0.8387459596440988, iteration: 175687
loss: 1.0005604028701782,grad_norm: 0.9574772628932394, iteration: 175688
loss: 1.0267084836959839,grad_norm: 0.9903420974441943, iteration: 175689
loss: 0.9931818246841431,grad_norm: 0.966238053809063, iteration: 175690
loss: 1.0150903463363647,grad_norm: 0.9393164673372051, iteration: 175691
loss: 1.0040384531021118,grad_norm: 0.8295923047905969, iteration: 175692
loss: 0.9813291430473328,grad_norm: 0.9999991677504583, iteration: 175693
loss: 1.0194240808486938,grad_norm: 0.9505579743360238, iteration: 175694
loss: 0.9808775782585144,grad_norm: 0.9999992464439464, iteration: 175695
loss: 1.0099128484725952,grad_norm: 0.9999991352343213, iteration: 175696
loss: 1.0250149965286255,grad_norm: 0.9999989426249913, iteration: 175697
loss: 1.0161733627319336,grad_norm: 0.9999998180946227, iteration: 175698
loss: 1.0264562368392944,grad_norm: 0.9999990564503866, iteration: 175699
loss: 1.0009710788726807,grad_norm: 0.9999992226820839, iteration: 175700
loss: 0.9900451898574829,grad_norm: 0.9999992107616491, iteration: 175701
loss: 0.990509569644928,grad_norm: 0.9734821171370629, iteration: 175702
loss: 0.9911407232284546,grad_norm: 0.963390369585451, iteration: 175703
loss: 0.9804027676582336,grad_norm: 0.9674708714514442, iteration: 175704
loss: 1.0079721212387085,grad_norm: 0.9999997150690861, iteration: 175705
loss: 1.0133757591247559,grad_norm: 0.9999992029477871, iteration: 175706
loss: 1.0064269304275513,grad_norm: 0.9999990682495538, iteration: 175707
loss: 1.0144922733306885,grad_norm: 0.9999990840594118, iteration: 175708
loss: 0.9871361255645752,grad_norm: 0.999999068236754, iteration: 175709
loss: 0.9563308358192444,grad_norm: 0.9999991031225823, iteration: 175710
loss: 0.9771857857704163,grad_norm: 0.9662558121991561, iteration: 175711
loss: 0.9706372618675232,grad_norm: 0.9689531746749874, iteration: 175712
loss: 1.0096096992492676,grad_norm: 0.9999991524531311, iteration: 175713
loss: 1.0236270427703857,grad_norm: 0.9999991544120389, iteration: 175714
loss: 1.0020549297332764,grad_norm: 0.8890868249870497, iteration: 175715
loss: 1.0376218557357788,grad_norm: 0.999999346481953, iteration: 175716
loss: 0.9691307544708252,grad_norm: 0.921361036860501, iteration: 175717
loss: 1.0054529905319214,grad_norm: 0.9999991031497122, iteration: 175718
loss: 1.0001007318496704,grad_norm: 0.8799530620473568, iteration: 175719
loss: 1.0337426662445068,grad_norm: 0.9424752665706058, iteration: 175720
loss: 1.0040693283081055,grad_norm: 0.8145982877148742, iteration: 175721
loss: 1.0433571338653564,grad_norm: 0.9999996440484751, iteration: 175722
loss: 0.9921466112136841,grad_norm: 0.9999991360409656, iteration: 175723
loss: 1.005689024925232,grad_norm: 0.9562217658033709, iteration: 175724
loss: 0.9722687005996704,grad_norm: 0.9999991084357411, iteration: 175725
loss: 1.0275185108184814,grad_norm: 0.7798169924588394, iteration: 175726
loss: 1.011793851852417,grad_norm: 0.8798551722666399, iteration: 175727
loss: 1.029630422592163,grad_norm: 0.9999991260451224, iteration: 175728
loss: 1.0389819145202637,grad_norm: 0.9551748781034598, iteration: 175729
loss: 0.994742751121521,grad_norm: 0.8282582235872815, iteration: 175730
loss: 1.000901222229004,grad_norm: 0.927847584276617, iteration: 175731
loss: 0.9586083889007568,grad_norm: 0.9635809164067414, iteration: 175732
loss: 1.0075606107711792,grad_norm: 0.9999989899499858, iteration: 175733
loss: 1.02694833278656,grad_norm: 0.9999993378416541, iteration: 175734
loss: 1.0093427896499634,grad_norm: 0.9999997364205881, iteration: 175735
loss: 0.9781509637832642,grad_norm: 0.9999990929453616, iteration: 175736
loss: 1.0235618352890015,grad_norm: 0.9999992325086015, iteration: 175737
loss: 0.9749898910522461,grad_norm: 0.9554511417882648, iteration: 175738
loss: 1.0240639448165894,grad_norm: 0.9999992422931823, iteration: 175739
loss: 1.0096421241760254,grad_norm: 0.9253327289793867, iteration: 175740
loss: 1.039047122001648,grad_norm: 0.9447554716416041, iteration: 175741
loss: 1.0277281999588013,grad_norm: 0.9898022291306535, iteration: 175742
loss: 1.0226882696151733,grad_norm: 0.904081913851451, iteration: 175743
loss: 0.9963359832763672,grad_norm: 0.8715697180344025, iteration: 175744
loss: 0.9884563684463501,grad_norm: 0.9999990407111712, iteration: 175745
loss: 1.0190314054489136,grad_norm: 0.9999991048059649, iteration: 175746
loss: 0.952556312084198,grad_norm: 0.9999992957489036, iteration: 175747
loss: 1.0005972385406494,grad_norm: 0.9507576799135296, iteration: 175748
loss: 0.9779576659202576,grad_norm: 0.9736230732988103, iteration: 175749
loss: 0.9976689219474792,grad_norm: 0.9999990733180414, iteration: 175750
loss: 0.9799644947052002,grad_norm: 0.9332964096850933, iteration: 175751
loss: 0.9902939200401306,grad_norm: 0.9999991012005374, iteration: 175752
loss: 0.9681606292724609,grad_norm: 0.8993007919522434, iteration: 175753
loss: 1.0296677350997925,grad_norm: 0.962005738864827, iteration: 175754
loss: 1.0107661485671997,grad_norm: 0.9999991532600285, iteration: 175755
loss: 1.0224319696426392,grad_norm: 0.9999992053630286, iteration: 175756
loss: 0.9758593440055847,grad_norm: 0.9999993226579723, iteration: 175757
loss: 0.9979488849639893,grad_norm: 0.9301924762963361, iteration: 175758
loss: 0.9595469236373901,grad_norm: 0.9999991958577067, iteration: 175759
loss: 0.9964948892593384,grad_norm: 0.9999991402102515, iteration: 175760
loss: 0.9900224208831787,grad_norm: 0.9999991041041258, iteration: 175761
loss: 0.9833638668060303,grad_norm: 0.9999989924207912, iteration: 175762
loss: 1.0185168981552124,grad_norm: 0.992493360526713, iteration: 175763
loss: 1.0815798044204712,grad_norm: 0.9999997097802388, iteration: 175764
loss: 1.0319862365722656,grad_norm: 0.999999055123122, iteration: 175765
loss: 1.0024349689483643,grad_norm: 0.9189583293237753, iteration: 175766
loss: 1.00117826461792,grad_norm: 0.9714098284917118, iteration: 175767
loss: 0.971868634223938,grad_norm: 0.9898117612178106, iteration: 175768
loss: 1.0098615884780884,grad_norm: 0.8207819202358192, iteration: 175769
loss: 0.9974904656410217,grad_norm: 0.8015553730936473, iteration: 175770
loss: 1.0074193477630615,grad_norm: 0.9288630416134123, iteration: 175771
loss: 1.0131148099899292,grad_norm: 0.9579931271992416, iteration: 175772
loss: 0.9966412782669067,grad_norm: 0.8605760745008877, iteration: 175773
loss: 0.9783040881156921,grad_norm: 0.9999995825566493, iteration: 175774
loss: 0.9834197759628296,grad_norm: 0.9717912727911362, iteration: 175775
loss: 1.0188820362091064,grad_norm: 0.9999990724349606, iteration: 175776
loss: 0.9986955523490906,grad_norm: 0.8916682749331657, iteration: 175777
loss: 0.9995495676994324,grad_norm: 0.800562864127426, iteration: 175778
loss: 0.9772156476974487,grad_norm: 0.8669565458484528, iteration: 175779
loss: 0.9994094371795654,grad_norm: 0.9999990772886781, iteration: 175780
loss: 1.001258134841919,grad_norm: 0.8952851955601835, iteration: 175781
loss: 1.0169235467910767,grad_norm: 0.9999997272797858, iteration: 175782
loss: 0.9704457521438599,grad_norm: 0.9473183181008543, iteration: 175783
loss: 1.071953535079956,grad_norm: 0.9999992525906957, iteration: 175784
loss: 1.0011134147644043,grad_norm: 0.9999990994490353, iteration: 175785
loss: 0.9947280883789062,grad_norm: 0.9999990865126018, iteration: 175786
loss: 1.0304125547409058,grad_norm: 0.8858389668957286, iteration: 175787
loss: 1.008611798286438,grad_norm: 0.9999992579765032, iteration: 175788
loss: 1.010489821434021,grad_norm: 0.9999991010676389, iteration: 175789
loss: 0.9986826777458191,grad_norm: 0.9097187906295696, iteration: 175790
loss: 1.0321705341339111,grad_norm: 0.9999991442023984, iteration: 175791
loss: 1.015163779258728,grad_norm: 0.9999991223411563, iteration: 175792
loss: 0.99056077003479,grad_norm: 0.8531867946231361, iteration: 175793
loss: 0.997467577457428,grad_norm: 0.9513876943909716, iteration: 175794
loss: 1.0236927270889282,grad_norm: 0.9494307170687776, iteration: 175795
loss: 0.9823845028877258,grad_norm: 0.999998952367896, iteration: 175796
loss: 0.9590301513671875,grad_norm: 0.901674051561994, iteration: 175797
loss: 1.0221290588378906,grad_norm: 0.9257192472678714, iteration: 175798
loss: 1.0391511917114258,grad_norm: 0.999999308221017, iteration: 175799
loss: 1.0073931217193604,grad_norm: 0.9225679470507352, iteration: 175800
loss: 0.9890904426574707,grad_norm: 0.9778434091636113, iteration: 175801
loss: 1.026517629623413,grad_norm: 0.999999075368804, iteration: 175802
loss: 0.963379979133606,grad_norm: 0.9999990800592041, iteration: 175803
loss: 1.008960247039795,grad_norm: 0.8737568833654771, iteration: 175804
loss: 1.0029468536376953,grad_norm: 0.9999992203495994, iteration: 175805
loss: 0.984894871711731,grad_norm: 0.9399253009483578, iteration: 175806
loss: 1.0172375440597534,grad_norm: 0.9999990538731479, iteration: 175807
loss: 0.9694176316261292,grad_norm: 0.8906344083953331, iteration: 175808
loss: 1.0145416259765625,grad_norm: 0.999999152636053, iteration: 175809
loss: 0.9975613355636597,grad_norm: 0.9350017566887152, iteration: 175810
loss: 1.0958365201950073,grad_norm: 0.9999999546022852, iteration: 175811
loss: 0.9488016366958618,grad_norm: 0.8842942991333124, iteration: 175812
loss: 1.006803274154663,grad_norm: 0.9999990548677699, iteration: 175813
loss: 1.0486007928848267,grad_norm: 0.9999997928760107, iteration: 175814
loss: 1.003495216369629,grad_norm: 0.8866174234908083, iteration: 175815
loss: 0.9976561069488525,grad_norm: 0.943270130307195, iteration: 175816
loss: 1.0046504735946655,grad_norm: 0.8664354636040312, iteration: 175817
loss: 1.0006103515625,grad_norm: 0.9507872057435253, iteration: 175818
loss: 0.9984502792358398,grad_norm: 0.9999990864348622, iteration: 175819
loss: 1.003473162651062,grad_norm: 0.9999990507201245, iteration: 175820
loss: 1.0196608304977417,grad_norm: 0.9992334231672181, iteration: 175821
loss: 0.9738750457763672,grad_norm: 0.9999991139522644, iteration: 175822
loss: 1.0102989673614502,grad_norm: 0.9999991834683946, iteration: 175823
loss: 0.9949608445167542,grad_norm: 0.9332073485610476, iteration: 175824
loss: 0.9903469681739807,grad_norm: 0.9999990295688926, iteration: 175825
loss: 1.0045140981674194,grad_norm: 0.9999993542563916, iteration: 175826
loss: 0.9854533672332764,grad_norm: 0.8687020271741654, iteration: 175827
loss: 0.9947150945663452,grad_norm: 0.9616961357306217, iteration: 175828
loss: 1.05545973777771,grad_norm: 0.9999996230512329, iteration: 175829
loss: 0.9677433371543884,grad_norm: 0.8680348730025155, iteration: 175830
loss: 1.0263158082962036,grad_norm: 0.9437068243318834, iteration: 175831
loss: 0.993313729763031,grad_norm: 0.910781631892072, iteration: 175832
loss: 1.0253733396530151,grad_norm: 0.9999996631618857, iteration: 175833
loss: 0.9934703707695007,grad_norm: 0.9999992178454413, iteration: 175834
loss: 0.9774399399757385,grad_norm: 0.8888183837469054, iteration: 175835
loss: 0.9885141849517822,grad_norm: 0.8891989096509931, iteration: 175836
loss: 1.0192071199417114,grad_norm: 0.9999992743353282, iteration: 175837
loss: 1.0215266942977905,grad_norm: 0.9999992436333189, iteration: 175838
loss: 1.0195822715759277,grad_norm: 0.9999990651033266, iteration: 175839
loss: 1.0215424299240112,grad_norm: 0.8924003845785661, iteration: 175840
loss: 0.9953108429908752,grad_norm: 0.9999992502637219, iteration: 175841
loss: 0.9798202514648438,grad_norm: 0.9006583746663687, iteration: 175842
loss: 1.0378164052963257,grad_norm: 0.9999997683757039, iteration: 175843
loss: 1.1468970775604248,grad_norm: 0.9999991330465474, iteration: 175844
loss: 0.9784787893295288,grad_norm: 0.8150583255908889, iteration: 175845
loss: 1.038722038269043,grad_norm: 0.9855622393669715, iteration: 175846
loss: 1.009035348892212,grad_norm: 0.999999624179456, iteration: 175847
loss: 1.0175689458847046,grad_norm: 0.9736383915082325, iteration: 175848
loss: 0.9597037434577942,grad_norm: 0.9607887363759302, iteration: 175849
loss: 1.0247430801391602,grad_norm: 0.9999991698946683, iteration: 175850
loss: 0.997148871421814,grad_norm: 0.8519341738474759, iteration: 175851
loss: 0.9811354875564575,grad_norm: 0.8243486171272774, iteration: 175852
loss: 1.0122355222702026,grad_norm: 0.9708614428678072, iteration: 175853
loss: 0.9917193055152893,grad_norm: 0.9999991922604748, iteration: 175854
loss: 1.0223337411880493,grad_norm: 0.9999992629628779, iteration: 175855
loss: 0.9950820803642273,grad_norm: 0.959859320706863, iteration: 175856
loss: 0.9775059819221497,grad_norm: 0.9999993068574973, iteration: 175857
loss: 0.9878352880477905,grad_norm: 0.9118462104706249, iteration: 175858
loss: 1.0025590658187866,grad_norm: 0.9999991932423017, iteration: 175859
loss: 1.0419535636901855,grad_norm: 0.9999998132408628, iteration: 175860
loss: 1.0088995695114136,grad_norm: 0.8292608923934524, iteration: 175861
loss: 1.0197303295135498,grad_norm: 0.8850176945780992, iteration: 175862
loss: 0.9967043399810791,grad_norm: 0.9398108832924531, iteration: 175863
loss: 0.971699595451355,grad_norm: 0.9999992468034808, iteration: 175864
loss: 1.0237523317337036,grad_norm: 0.9999990690667876, iteration: 175865
loss: 1.0187053680419922,grad_norm: 0.999999082672546, iteration: 175866
loss: 0.9873965978622437,grad_norm: 0.999999129899036, iteration: 175867
loss: 0.9593630433082581,grad_norm: 0.7902411897766956, iteration: 175868
loss: 0.9988681674003601,grad_norm: 0.9449018821164364, iteration: 175869
loss: 1.0140459537506104,grad_norm: 0.9999989740401729, iteration: 175870
loss: 1.003296136856079,grad_norm: 0.9999992133495406, iteration: 175871
loss: 0.9610703587532043,grad_norm: 0.9999992198824179, iteration: 175872
loss: 0.9879791140556335,grad_norm: 0.8423494470762175, iteration: 175873
loss: 0.9720158576965332,grad_norm: 0.999999252039607, iteration: 175874
loss: 1.0051579475402832,grad_norm: 0.8386430657586414, iteration: 175875
loss: 0.9733263850212097,grad_norm: 0.9999992895509923, iteration: 175876
loss: 1.0881717205047607,grad_norm: 0.9999991402185008, iteration: 175877
loss: 0.9838716387748718,grad_norm: 0.9999992613306941, iteration: 175878
loss: 0.9947548508644104,grad_norm: 0.9999990698903657, iteration: 175879
loss: 0.9899429082870483,grad_norm: 0.9999991636345635, iteration: 175880
loss: 0.9580731987953186,grad_norm: 0.9999991149734546, iteration: 175881
loss: 0.9802055954933167,grad_norm: 0.9621758505704443, iteration: 175882
loss: 1.0034799575805664,grad_norm: 0.9999993487052058, iteration: 175883
loss: 1.0193732976913452,grad_norm: 0.999999798753597, iteration: 175884
loss: 1.01539146900177,grad_norm: 0.9999991521065682, iteration: 175885
loss: 0.9448481202125549,grad_norm: 0.9999992806683449, iteration: 175886
loss: 1.0020920038223267,grad_norm: 0.9999990769894147, iteration: 175887
loss: 0.9606359004974365,grad_norm: 0.9326346970018725, iteration: 175888
loss: 1.0097520351409912,grad_norm: 0.9999990539668063, iteration: 175889
loss: 0.9890666604042053,grad_norm: 0.7913297223702817, iteration: 175890
loss: 1.0234521627426147,grad_norm: 0.9999989411369133, iteration: 175891
loss: 0.9898974299430847,grad_norm: 0.9999990837637578, iteration: 175892
loss: 1.0206762552261353,grad_norm: 0.9999990263212584, iteration: 175893
loss: 1.0125535726547241,grad_norm: 0.9999990015193002, iteration: 175894
loss: 0.9666303396224976,grad_norm: 0.9405632066738455, iteration: 175895
loss: 0.9940749406814575,grad_norm: 0.855217881599084, iteration: 175896
loss: 1.0005792379379272,grad_norm: 0.9762379408712852, iteration: 175897
loss: 1.0234994888305664,grad_norm: 0.9999990583415782, iteration: 175898
loss: 0.9894134402275085,grad_norm: 0.9999990688203925, iteration: 175899
loss: 0.979129433631897,grad_norm: 0.9999992783768441, iteration: 175900
loss: 1.0053305625915527,grad_norm: 0.9999989821657892, iteration: 175901
loss: 0.9781853556632996,grad_norm: 0.999999093041073, iteration: 175902
loss: 0.971287190914154,grad_norm: 0.9999991421363748, iteration: 175903
loss: 1.0084043741226196,grad_norm: 0.7864950912557072, iteration: 175904
loss: 1.0103144645690918,grad_norm: 0.9543300026970606, iteration: 175905
loss: 1.0248830318450928,grad_norm: 0.9622343650178026, iteration: 175906
loss: 0.9891895651817322,grad_norm: 0.9109551160036652, iteration: 175907
loss: 1.008020043373108,grad_norm: 0.9999991043903328, iteration: 175908
loss: 0.9685908555984497,grad_norm: 0.834928777215942, iteration: 175909
loss: 0.9781087040901184,grad_norm: 0.9247669513363386, iteration: 175910
loss: 1.0317195653915405,grad_norm: 0.9999991888676343, iteration: 175911
loss: 0.9874112010002136,grad_norm: 0.8867848030082695, iteration: 175912
loss: 1.0263012647628784,grad_norm: 0.9999992019059684, iteration: 175913
loss: 0.9738885164260864,grad_norm: 0.8170693305087073, iteration: 175914
loss: 0.953940212726593,grad_norm: 0.9999990997416343, iteration: 175915
loss: 0.9789863228797913,grad_norm: 0.9437308691916656, iteration: 175916
loss: 0.9802022576332092,grad_norm: 0.8232161804593965, iteration: 175917
loss: 0.9919962286949158,grad_norm: 0.9999991603187048, iteration: 175918
loss: 1.0389518737792969,grad_norm: 0.9523085937111478, iteration: 175919
loss: 1.029654860496521,grad_norm: 0.9999990964647648, iteration: 175920
loss: 1.0045503377914429,grad_norm: 0.9999996009226565, iteration: 175921
loss: 1.0253204107284546,grad_norm: 1.0000000473564261, iteration: 175922
loss: 1.0054751634597778,grad_norm: 0.9368738620513976, iteration: 175923
loss: 1.0068926811218262,grad_norm: 0.907891814587951, iteration: 175924
loss: 1.0013954639434814,grad_norm: 0.999999780572102, iteration: 175925
loss: 0.9960875511169434,grad_norm: 0.9307678451278506, iteration: 175926
loss: 0.9861536622047424,grad_norm: 0.9149667098459028, iteration: 175927
loss: 1.0050604343414307,grad_norm: 0.7882324027579958, iteration: 175928
loss: 1.0275558233261108,grad_norm: 0.964930982769977, iteration: 175929
loss: 1.0678728818893433,grad_norm: 0.9999996896359328, iteration: 175930
loss: 1.0173075199127197,grad_norm: 0.9999990972661216, iteration: 175931
loss: 1.055993914604187,grad_norm: 0.9999992258282118, iteration: 175932
loss: 1.0111933946609497,grad_norm: 0.9439960974244126, iteration: 175933
loss: 0.9778291583061218,grad_norm: 0.9999992962717414, iteration: 175934
loss: 1.0261778831481934,grad_norm: 0.9999989723382047, iteration: 175935
loss: 0.9898012280464172,grad_norm: 0.999999067007083, iteration: 175936
loss: 1.0033966302871704,grad_norm: 0.8699316928759662, iteration: 175937
loss: 1.0033884048461914,grad_norm: 0.9999996472232779, iteration: 175938
loss: 1.0191158056259155,grad_norm: 0.999999250342094, iteration: 175939
loss: 1.0423150062561035,grad_norm: 0.999999040089073, iteration: 175940
loss: 0.974950909614563,grad_norm: 0.9999991361332984, iteration: 175941
loss: 0.9886553287506104,grad_norm: 0.9691591763491496, iteration: 175942
loss: 0.9701579809188843,grad_norm: 0.895618054346245, iteration: 175943
loss: 1.0235893726348877,grad_norm: 0.9999992260065238, iteration: 175944
loss: 0.9973713755607605,grad_norm: 0.9178079699178822, iteration: 175945
loss: 0.946572482585907,grad_norm: 0.9622449713818629, iteration: 175946
loss: 0.9848169684410095,grad_norm: 0.9999990781379909, iteration: 175947
loss: 0.9881049394607544,grad_norm: 0.9561408456076244, iteration: 175948
loss: 0.9857852458953857,grad_norm: 0.8349193016907508, iteration: 175949
loss: 0.9978743195533752,grad_norm: 0.8092392782097974, iteration: 175950
loss: 1.0132415294647217,grad_norm: 0.8964504070198023, iteration: 175951
loss: 0.937685489654541,grad_norm: 0.9999991206713514, iteration: 175952
loss: 0.997286856174469,grad_norm: 0.891541556787373, iteration: 175953
loss: 1.000327467918396,grad_norm: 0.880749656479338, iteration: 175954
loss: 1.060489535331726,grad_norm: 0.9659966388468695, iteration: 175955
loss: 0.9707616567611694,grad_norm: 0.8806088593376087, iteration: 175956
loss: 1.0462015867233276,grad_norm: 0.9999992237641538, iteration: 175957
loss: 0.9937926530838013,grad_norm: 0.9262272592453608, iteration: 175958
loss: 1.001063346862793,grad_norm: 0.8021247153789082, iteration: 175959
loss: 0.9756295680999756,grad_norm: 0.9621196778661995, iteration: 175960
loss: 0.9893181920051575,grad_norm: 0.9601500012281611, iteration: 175961
loss: 0.9963119626045227,grad_norm: 0.9730437428475796, iteration: 175962
loss: 0.9891672730445862,grad_norm: 0.9999990888524111, iteration: 175963
loss: 1.009293794631958,grad_norm: 0.9120279633149888, iteration: 175964
loss: 1.0342152118682861,grad_norm: 0.9999992014304532, iteration: 175965
loss: 0.9733668565750122,grad_norm: 0.9999990419049756, iteration: 175966
loss: 0.9842825531959534,grad_norm: 0.9734559365027777, iteration: 175967
loss: 1.0312418937683105,grad_norm: 0.9999993296045424, iteration: 175968
loss: 1.0104197263717651,grad_norm: 0.9999994143413744, iteration: 175969
loss: 0.9902635216712952,grad_norm: 0.904454167148348, iteration: 175970
loss: 0.9956864714622498,grad_norm: 0.9388826431137444, iteration: 175971
loss: 0.9961618781089783,grad_norm: 0.9828486026259566, iteration: 175972
loss: 0.998824417591095,grad_norm: 0.9419012295387863, iteration: 175973
loss: 1.02158522605896,grad_norm: 0.999999102568283, iteration: 175974
loss: 0.9993605613708496,grad_norm: 0.9982743670458263, iteration: 175975
loss: 0.9744762182235718,grad_norm: 0.9999990595819654, iteration: 175976
loss: 0.9592827558517456,grad_norm: 0.9150925662592762, iteration: 175977
loss: 1.0164341926574707,grad_norm: 0.8628969440815738, iteration: 175978
loss: 0.9418889880180359,grad_norm: 0.9864973893480187, iteration: 175979
loss: 0.972411572933197,grad_norm: 0.8965030806701343, iteration: 175980
loss: 1.0109390020370483,grad_norm: 0.9035640490761042, iteration: 175981
loss: 1.0244241952896118,grad_norm: 0.9999992270479073, iteration: 175982
loss: 1.0343645811080933,grad_norm: 0.9999994180049665, iteration: 175983
loss: 1.0204750299453735,grad_norm: 0.9999996114504099, iteration: 175984
loss: 1.0478187799453735,grad_norm: 0.9999993578633544, iteration: 175985
loss: 0.9724336862564087,grad_norm: 0.9999990625950679, iteration: 175986
loss: 1.0169423818588257,grad_norm: 0.9999989850174757, iteration: 175987
loss: 1.0101618766784668,grad_norm: 0.885613639287867, iteration: 175988
loss: 1.0641027688980103,grad_norm: 0.9999994605561197, iteration: 175989
loss: 0.9893538951873779,grad_norm: 0.9999991726207657, iteration: 175990
loss: 1.0219758749008179,grad_norm: 0.9999991274975911, iteration: 175991
loss: 1.0108946561813354,grad_norm: 0.9999991743973831, iteration: 175992
loss: 1.00263249874115,grad_norm: 0.9130119545347453, iteration: 175993
loss: 1.0051954984664917,grad_norm: 0.8387722625008142, iteration: 175994
loss: 0.9973623752593994,grad_norm: 0.9856444165685183, iteration: 175995
loss: 1.0008881092071533,grad_norm: 0.9999990966017471, iteration: 175996
loss: 1.0082002878189087,grad_norm: 0.9999991958586378, iteration: 175997
loss: 0.9801225066184998,grad_norm: 0.9278140899260718, iteration: 175998
loss: 0.9600841403007507,grad_norm: 0.9929131060116805, iteration: 175999
loss: 0.9985005855560303,grad_norm: 0.9999991979140599, iteration: 176000
loss: 1.0627509355545044,grad_norm: 0.9999993081389571, iteration: 176001
loss: 1.0099260807037354,grad_norm: 0.9999990815981312, iteration: 176002
loss: 1.0279873609542847,grad_norm: 0.9586458048880623, iteration: 176003
loss: 0.9682374596595764,grad_norm: 0.9644029976209038, iteration: 176004
loss: 1.0236570835113525,grad_norm: 0.9999990467105102, iteration: 176005
loss: 1.0165053606033325,grad_norm: 0.9379423880699381, iteration: 176006
loss: 0.9730382561683655,grad_norm: 0.8129389287959164, iteration: 176007
loss: 1.0230679512023926,grad_norm: 0.8127719812398462, iteration: 176008
loss: 0.9993225336074829,grad_norm: 0.9999991132759912, iteration: 176009
loss: 0.9998690485954285,grad_norm: 0.9999989475114449, iteration: 176010
loss: 0.9897840619087219,grad_norm: 0.9999992984407926, iteration: 176011
loss: 1.0119720697402954,grad_norm: 0.9587001379682685, iteration: 176012
loss: 0.9751462340354919,grad_norm: 0.9702605576905765, iteration: 176013
loss: 0.978820264339447,grad_norm: 0.8859262949576113, iteration: 176014
loss: 0.9939748048782349,grad_norm: 0.999999011687059, iteration: 176015
loss: 1.0303460359573364,grad_norm: 0.9999992243780893, iteration: 176016
loss: 1.009377360343933,grad_norm: 0.9999989827268668, iteration: 176017
loss: 1.001432180404663,grad_norm: 0.8365636326126615, iteration: 176018
loss: 0.9976639747619629,grad_norm: 0.9999990978160245, iteration: 176019
loss: 1.0028616189956665,grad_norm: 0.895228246867361, iteration: 176020
loss: 0.9441404938697815,grad_norm: 0.8860093247097156, iteration: 176021
loss: 1.0198439359664917,grad_norm: 0.9171009551373982, iteration: 176022
loss: 0.9747982025146484,grad_norm: 0.9999992008893392, iteration: 176023
loss: 0.9808914065361023,grad_norm: 0.9999997489131403, iteration: 176024
loss: 1.0113708972930908,grad_norm: 0.9999991385404607, iteration: 176025
loss: 1.0002927780151367,grad_norm: 0.9859171090671018, iteration: 176026
loss: 0.9651482105255127,grad_norm: 0.9999993867711343, iteration: 176027
loss: 0.9938654899597168,grad_norm: 0.9999990776190296, iteration: 176028
loss: 1.015336513519287,grad_norm: 0.8319171673164034, iteration: 176029
loss: 1.022082805633545,grad_norm: 0.9999992913083445, iteration: 176030
loss: 1.0066853761672974,grad_norm: 0.9768173132419417, iteration: 176031
loss: 1.0250232219696045,grad_norm: 0.9093742083691512, iteration: 176032
loss: 0.9972578883171082,grad_norm: 0.9981078585403889, iteration: 176033
loss: 0.9797579646110535,grad_norm: 0.9660693771414084, iteration: 176034
loss: 0.9785997867584229,grad_norm: 0.9999990947312128, iteration: 176035
loss: 0.9827058911323547,grad_norm: 0.9758595298541969, iteration: 176036
loss: 1.0205930471420288,grad_norm: 0.9016877031396402, iteration: 176037
loss: 0.9827316403388977,grad_norm: 0.9999990754939466, iteration: 176038
loss: 0.9401031136512756,grad_norm: 0.8471276972713596, iteration: 176039
loss: 1.0038728713989258,grad_norm: 0.965300124338743, iteration: 176040
loss: 0.9972335696220398,grad_norm: 0.9999991616731909, iteration: 176041
loss: 1.0170596837997437,grad_norm: 0.9745045531670055, iteration: 176042
loss: 1.0042976140975952,grad_norm: 0.9999992133578114, iteration: 176043
loss: 1.006751298904419,grad_norm: 0.8242677577284737, iteration: 176044
loss: 0.9953153133392334,grad_norm: 0.9999990426217049, iteration: 176045
loss: 0.9974985718727112,grad_norm: 0.9943071659997678, iteration: 176046
loss: 0.9345620274543762,grad_norm: 0.9192687593164625, iteration: 176047
loss: 0.9801024794578552,grad_norm: 0.9051230170733856, iteration: 176048
loss: 1.0130640268325806,grad_norm: 0.9999993598571932, iteration: 176049
loss: 1.0169066190719604,grad_norm: 0.9644238653903004, iteration: 176050
loss: 0.9879928827285767,grad_norm: 0.999999096885387, iteration: 176051
loss: 1.0036262273788452,grad_norm: 0.9545373702311307, iteration: 176052
loss: 0.9928236603736877,grad_norm: 0.895878372344895, iteration: 176053
loss: 0.9952837228775024,grad_norm: 0.999999134072813, iteration: 176054
loss: 1.0165202617645264,grad_norm: 0.9509423928763852, iteration: 176055
loss: 0.9947775602340698,grad_norm: 0.9999989516339923, iteration: 176056
loss: 0.9665701389312744,grad_norm: 0.9999989926326097, iteration: 176057
loss: 1.0140620470046997,grad_norm: 0.8707564030607442, iteration: 176058
loss: 1.0248967409133911,grad_norm: 0.9999991406684842, iteration: 176059
loss: 1.0189876556396484,grad_norm: 0.9702152989058258, iteration: 176060
loss: 1.0100278854370117,grad_norm: 0.9999992384284665, iteration: 176061
loss: 1.0191856622695923,grad_norm: 0.8866096710470016, iteration: 176062
loss: 0.9875545501708984,grad_norm: 0.941920974058465, iteration: 176063
loss: 0.9844726324081421,grad_norm: 0.9999991815560846, iteration: 176064
loss: 1.0772161483764648,grad_norm: 0.9999990112639596, iteration: 176065
loss: 0.9757674932479858,grad_norm: 0.8144340523262328, iteration: 176066
loss: 0.9953774809837341,grad_norm: 0.9999992020819277, iteration: 176067
loss: 0.9751116037368774,grad_norm: 0.9999992665455022, iteration: 176068
loss: 1.0123001337051392,grad_norm: 0.9999992753919229, iteration: 176069
loss: 1.017878770828247,grad_norm: 0.9999991168706978, iteration: 176070
loss: 1.007238507270813,grad_norm: 0.9999989666904129, iteration: 176071
loss: 1.0222399234771729,grad_norm: 0.9753017345547855, iteration: 176072
loss: 0.9934149384498596,grad_norm: 0.99999921765703, iteration: 176073
loss: 1.0290545225143433,grad_norm: 0.9528301658698799, iteration: 176074
loss: 1.012855052947998,grad_norm: 0.8270547767009659, iteration: 176075
loss: 0.9919763207435608,grad_norm: 0.9735510499105797, iteration: 176076
loss: 0.9726592302322388,grad_norm: 0.9688065922408603, iteration: 176077
loss: 1.0100834369659424,grad_norm: 0.9759271634819376, iteration: 176078
loss: 0.9885478019714355,grad_norm: 0.999999055941076, iteration: 176079
loss: 0.9727153182029724,grad_norm: 0.9308314889163888, iteration: 176080
loss: 1.0322117805480957,grad_norm: 0.9999994761182048, iteration: 176081
loss: 0.9834475517272949,grad_norm: 0.9955638139632249, iteration: 176082
loss: 0.9782211780548096,grad_norm: 0.9011376893555095, iteration: 176083
loss: 0.9949911236763,grad_norm: 0.9999990431286171, iteration: 176084
loss: 0.9673165678977966,grad_norm: 0.8690974499761717, iteration: 176085
loss: 0.9631207585334778,grad_norm: 0.905079162816636, iteration: 176086
loss: 1.0009287595748901,grad_norm: 0.8733671885334349, iteration: 176087
loss: 0.983712911605835,grad_norm: 0.9999992498117386, iteration: 176088
loss: 0.9945226907730103,grad_norm: 0.9999991275194531, iteration: 176089
loss: 1.0122517347335815,grad_norm: 0.9999996112469102, iteration: 176090
loss: 1.0003530979156494,grad_norm: 0.999999412815604, iteration: 176091
loss: 1.0211158990859985,grad_norm: 0.9999990796996507, iteration: 176092
loss: 0.969089925289154,grad_norm: 0.9753420043301999, iteration: 176093
loss: 0.9803658723831177,grad_norm: 0.999999088456183, iteration: 176094
loss: 0.9771416187286377,grad_norm: 0.9899119720811985, iteration: 176095
loss: 1.0025975704193115,grad_norm: 0.993050216587083, iteration: 176096
loss: 0.9986001253128052,grad_norm: 0.9999989651525565, iteration: 176097
loss: 0.9864036440849304,grad_norm: 0.8492846103513341, iteration: 176098
loss: 1.0242607593536377,grad_norm: 0.9999989558209836, iteration: 176099
loss: 0.9635777473449707,grad_norm: 0.9894335349655775, iteration: 176100
loss: 1.0000771284103394,grad_norm: 0.9999991318101404, iteration: 176101
loss: 0.995231568813324,grad_norm: 0.9999990631071817, iteration: 176102
loss: 1.0202431678771973,grad_norm: 0.9999993300294959, iteration: 176103
loss: 0.9903191328048706,grad_norm: 0.9310492391241192, iteration: 176104
loss: 0.9928776025772095,grad_norm: 0.9999991321753174, iteration: 176105
loss: 1.0441187620162964,grad_norm: 0.9999992582674282, iteration: 176106
loss: 0.9987623691558838,grad_norm: 0.9999991389869904, iteration: 176107
loss: 1.027443289756775,grad_norm: 0.8988565769988361, iteration: 176108
loss: 0.9710249900817871,grad_norm: 0.9692500516232831, iteration: 176109
loss: 0.9848623871803284,grad_norm: 0.9999991832423695, iteration: 176110
loss: 0.9627726674079895,grad_norm: 0.9351113892819886, iteration: 176111
loss: 1.0073463916778564,grad_norm: 0.999999183987274, iteration: 176112
loss: 1.0044420957565308,grad_norm: 0.9794331832836074, iteration: 176113
loss: 0.9985198974609375,grad_norm: 0.8310370652579926, iteration: 176114
loss: 0.9846615791320801,grad_norm: 0.9432212771803411, iteration: 176115
loss: 1.0297068357467651,grad_norm: 0.9999991091038155, iteration: 176116
loss: 1.0263683795928955,grad_norm: 0.9999992576334557, iteration: 176117
loss: 0.9712972044944763,grad_norm: 0.999999200169325, iteration: 176118
loss: 1.001301646232605,grad_norm: 0.8618393866839319, iteration: 176119
loss: 1.1131000518798828,grad_norm: 0.9999990877405114, iteration: 176120
loss: 0.9943044781684875,grad_norm: 0.9995529267906051, iteration: 176121
loss: 0.9702109098434448,grad_norm: 0.8798702357722017, iteration: 176122
loss: 1.0819231271743774,grad_norm: 0.9999996979656033, iteration: 176123
loss: 0.9802630543708801,grad_norm: 0.9999991564113228, iteration: 176124
loss: 0.9781785011291504,grad_norm: 0.9999990341459742, iteration: 176125
loss: 0.9738076329231262,grad_norm: 0.9999991497926787, iteration: 176126
loss: 1.0147202014923096,grad_norm: 0.9802721605347218, iteration: 176127
loss: 1.0320955514907837,grad_norm: 0.9999994792169583, iteration: 176128
loss: 0.9624370336532593,grad_norm: 0.9993426191140191, iteration: 176129
loss: 1.054520606994629,grad_norm: 0.9999992089794053, iteration: 176130
loss: 0.9927525520324707,grad_norm: 0.9999991424047705, iteration: 176131
loss: 0.9760932326316833,grad_norm: 0.9600150324819245, iteration: 176132
loss: 0.999881386756897,grad_norm: 0.9999992007769775, iteration: 176133
loss: 0.9660267233848572,grad_norm: 0.8762180514341745, iteration: 176134
loss: 1.0040708780288696,grad_norm: 0.9097443588797728, iteration: 176135
loss: 1.0033636093139648,grad_norm: 0.9999992091877651, iteration: 176136
loss: 1.0245764255523682,grad_norm: 0.9393979447670419, iteration: 176137
loss: 0.9945470094680786,grad_norm: 0.9999991053721629, iteration: 176138
loss: 1.0138334035873413,grad_norm: 0.9999990576043888, iteration: 176139
loss: 0.9996817708015442,grad_norm: 0.9306337018067192, iteration: 176140
loss: 1.0374953746795654,grad_norm: 0.9999990939552481, iteration: 176141
loss: 1.00795578956604,grad_norm: 0.9634891710417269, iteration: 176142
loss: 0.9689047932624817,grad_norm: 0.9123195604072446, iteration: 176143
loss: 0.992409884929657,grad_norm: 0.9999992210628877, iteration: 176144
loss: 1.0064724683761597,grad_norm: 0.9760804975211875, iteration: 176145
loss: 0.9927231669425964,grad_norm: 0.9937024767645419, iteration: 176146
loss: 0.9751676321029663,grad_norm: 0.9999992353389449, iteration: 176147
loss: 1.0259926319122314,grad_norm: 0.9999991364206358, iteration: 176148
loss: 0.9987662434577942,grad_norm: 0.977291337496396, iteration: 176149
loss: 1.012718915939331,grad_norm: 0.999999312718949, iteration: 176150
loss: 1.0208417177200317,grad_norm: 0.9999990483338356, iteration: 176151
loss: 1.0027707815170288,grad_norm: 0.9999991956741874, iteration: 176152
loss: 0.9857488870620728,grad_norm: 0.9999991155834325, iteration: 176153
loss: 1.0185294151306152,grad_norm: 0.8643676772639586, iteration: 176154
loss: 0.9839167594909668,grad_norm: 0.9141713149688544, iteration: 176155
loss: 0.990267276763916,grad_norm: 0.9452695872192988, iteration: 176156
loss: 0.9891128540039062,grad_norm: 0.9999990591368051, iteration: 176157
loss: 0.9708380699157715,grad_norm: 0.9999991709820717, iteration: 176158
loss: 1.0248444080352783,grad_norm: 0.8587836266772947, iteration: 176159
loss: 1.0648219585418701,grad_norm: 0.9999992013351847, iteration: 176160
loss: 0.9776378870010376,grad_norm: 0.9261968057828117, iteration: 176161
loss: 1.020740032196045,grad_norm: 0.999999263770315, iteration: 176162
loss: 1.0115840435028076,grad_norm: 0.9999990563991182, iteration: 176163
loss: 1.0149562358856201,grad_norm: 0.9999990536539743, iteration: 176164
loss: 1.0211175680160522,grad_norm: 0.9999992199387753, iteration: 176165
loss: 0.9959291219711304,grad_norm: 0.9571125982170071, iteration: 176166
loss: 0.9617713689804077,grad_norm: 0.9536108368326452, iteration: 176167
loss: 0.9987106323242188,grad_norm: 0.9999991671447055, iteration: 176168
loss: 0.9925763010978699,grad_norm: 0.9999989652771076, iteration: 176169
loss: 1.0357112884521484,grad_norm: 0.9999991933847368, iteration: 176170
loss: 1.0072752237319946,grad_norm: 0.9999990795695296, iteration: 176171
loss: 1.0396666526794434,grad_norm: 0.9999990067495617, iteration: 176172
loss: 1.0118167400360107,grad_norm: 0.9999990426722913, iteration: 176173
loss: 0.9986818432807922,grad_norm: 0.9999990171790489, iteration: 176174
loss: 1.0055394172668457,grad_norm: 0.999999182572772, iteration: 176175
loss: 1.0171689987182617,grad_norm: 0.9999991238700665, iteration: 176176
loss: 1.0113165378570557,grad_norm: 0.9999991865026279, iteration: 176177
loss: 0.990118682384491,grad_norm: 0.9999989122220878, iteration: 176178
loss: 1.0330555438995361,grad_norm: 0.9999992243613616, iteration: 176179
loss: 1.0388520956039429,grad_norm: 0.9999989565121875, iteration: 176180
loss: 1.0021607875823975,grad_norm: 0.9999991585520561, iteration: 176181
loss: 1.0108611583709717,grad_norm: 0.8689264816509251, iteration: 176182
loss: 0.9856187701225281,grad_norm: 0.9146918821361101, iteration: 176183
loss: 1.0410617589950562,grad_norm: 0.971058964898216, iteration: 176184
loss: 1.0253976583480835,grad_norm: 0.9578068593483409, iteration: 176185
loss: 0.9675807356834412,grad_norm: 0.6958373619209757, iteration: 176186
loss: 1.0207507610321045,grad_norm: 0.9999989844271571, iteration: 176187
loss: 0.9919401407241821,grad_norm: 0.9271294168152142, iteration: 176188
loss: 0.9949536919593811,grad_norm: 0.9309138258574583, iteration: 176189
loss: 0.995602548122406,grad_norm: 0.8766109803633482, iteration: 176190
loss: 1.0101515054702759,grad_norm: 0.9761746730847306, iteration: 176191
loss: 0.9858068227767944,grad_norm: 0.8660086852357879, iteration: 176192
loss: 1.0383118391036987,grad_norm: 0.9999997484775004, iteration: 176193
loss: 0.9432717561721802,grad_norm: 0.9907306225525437, iteration: 176194
loss: 0.9616321921348572,grad_norm: 0.9999991109986428, iteration: 176195
loss: 1.0251306295394897,grad_norm: 0.9999996008032026, iteration: 176196
loss: 1.0015605688095093,grad_norm: 0.9999990357042357, iteration: 176197
loss: 0.9947470426559448,grad_norm: 0.9513054735574935, iteration: 176198
loss: 0.9854959845542908,grad_norm: 0.9999995543835016, iteration: 176199
loss: 1.0330003499984741,grad_norm: 0.9999990701210063, iteration: 176200
loss: 1.0104633569717407,grad_norm: 0.9160099117447912, iteration: 176201
loss: 0.9933676719665527,grad_norm: 0.9999989839258072, iteration: 176202
loss: 1.0097455978393555,grad_norm: 0.8856553932149491, iteration: 176203
loss: 0.9792558550834656,grad_norm: 0.999999015115319, iteration: 176204
loss: 1.0044618844985962,grad_norm: 0.9999991524382061, iteration: 176205
loss: 0.9675720930099487,grad_norm: 0.9111540275385779, iteration: 176206
loss: 0.9774097800254822,grad_norm: 0.9999989290551569, iteration: 176207
loss: 1.0338364839553833,grad_norm: 0.9092857905288443, iteration: 176208
loss: 0.9778279662132263,grad_norm: 0.9999990030637734, iteration: 176209
loss: 1.00584876537323,grad_norm: 0.9999993209827944, iteration: 176210
loss: 0.9933637976646423,grad_norm: 0.959321954951017, iteration: 176211
loss: 1.0265692472457886,grad_norm: 0.9095127617726082, iteration: 176212
loss: 1.0009796619415283,grad_norm: 0.921765871646968, iteration: 176213
loss: 0.9676119685173035,grad_norm: 0.9999990584569847, iteration: 176214
loss: 0.9823210835456848,grad_norm: 0.9999991155554151, iteration: 176215
loss: 0.9959269762039185,grad_norm: 0.8119240637150521, iteration: 176216
loss: 0.981792151927948,grad_norm: 0.8912690238495014, iteration: 176217
loss: 1.0043997764587402,grad_norm: 0.9999991412168416, iteration: 176218
loss: 1.0559355020523071,grad_norm: 0.8631295528398475, iteration: 176219
loss: 1.00457763671875,grad_norm: 0.8421365899686071, iteration: 176220
loss: 1.0301121473312378,grad_norm: 0.9417643634339817, iteration: 176221
loss: 1.0007858276367188,grad_norm: 0.9501225515880564, iteration: 176222
loss: 1.0260437726974487,grad_norm: 0.836992904056427, iteration: 176223
loss: 1.021830677986145,grad_norm: 0.885213552272161, iteration: 176224
loss: 1.0210648775100708,grad_norm: 0.9999992911495936, iteration: 176225
loss: 0.965546190738678,grad_norm: 0.9999992179596903, iteration: 176226
loss: 1.002622127532959,grad_norm: 0.9493295731872126, iteration: 176227
loss: 0.9535009264945984,grad_norm: 0.9999989810462707, iteration: 176228
loss: 0.9858843088150024,grad_norm: 0.9999990542571788, iteration: 176229
loss: 0.9741915464401245,grad_norm: 0.9999997688857826, iteration: 176230
loss: 1.0783940553665161,grad_norm: 0.9999995452952968, iteration: 176231
loss: 1.0082615613937378,grad_norm: 0.8164503055770939, iteration: 176232
loss: 1.078922152519226,grad_norm: 0.820624013345064, iteration: 176233
loss: 0.9582568407058716,grad_norm: 0.9999990317790186, iteration: 176234
loss: 1.0319859981536865,grad_norm: 0.999999364318455, iteration: 176235
loss: 0.9769662618637085,grad_norm: 0.9999992203301233, iteration: 176236
loss: 1.0245698690414429,grad_norm: 0.999999733558878, iteration: 176237
loss: 0.990666925907135,grad_norm: 0.9171772059326758, iteration: 176238
loss: 0.991667628288269,grad_norm: 0.9999992261674302, iteration: 176239
loss: 0.998755693435669,grad_norm: 0.9999999059940454, iteration: 176240
loss: 0.9998524785041809,grad_norm: 0.8477979573546737, iteration: 176241
loss: 1.0654184818267822,grad_norm: 0.9999991047299586, iteration: 176242
loss: 0.9939435720443726,grad_norm: 0.9999989787111249, iteration: 176243
loss: 0.9929524660110474,grad_norm: 0.9999990371309467, iteration: 176244
loss: 1.0224711894989014,grad_norm: 0.9999992363129685, iteration: 176245
loss: 0.9686053991317749,grad_norm: 0.857722485962004, iteration: 176246
loss: 1.0354865789413452,grad_norm: 0.9999991691506581, iteration: 176247
loss: 1.004848837852478,grad_norm: 0.9999993081459245, iteration: 176248
loss: 0.9735795855522156,grad_norm: 0.9417028153985817, iteration: 176249
loss: 1.001810073852539,grad_norm: 0.9999989789010345, iteration: 176250
loss: 1.0321506261825562,grad_norm: 0.9999991614658532, iteration: 176251
loss: 0.9842429757118225,grad_norm: 0.9999992224481451, iteration: 176252
loss: 0.991574227809906,grad_norm: 0.9878985139213573, iteration: 176253
loss: 1.0269620418548584,grad_norm: 0.993560145930845, iteration: 176254
loss: 1.0664664506912231,grad_norm: 0.9999995526741033, iteration: 176255
loss: 1.0036169290542603,grad_norm: 0.9999996316156549, iteration: 176256
loss: 0.9806989431381226,grad_norm: 0.9999994747137204, iteration: 176257
loss: 0.9701216220855713,grad_norm: 0.9814927801694898, iteration: 176258
loss: 0.9904457926750183,grad_norm: 0.9137099009351324, iteration: 176259
loss: 1.011196255683899,grad_norm: 0.9999991994997589, iteration: 176260
loss: 1.0119396448135376,grad_norm: 0.9277705366382961, iteration: 176261
loss: 1.0068609714508057,grad_norm: 0.9999989756879429, iteration: 176262
loss: 0.9541687965393066,grad_norm: 0.9999989913516122, iteration: 176263
loss: 0.998992919921875,grad_norm: 0.9999992401610255, iteration: 176264
loss: 1.0443074703216553,grad_norm: 0.9999998391519984, iteration: 176265
loss: 1.0170649290084839,grad_norm: 0.8839734272979666, iteration: 176266
loss: 0.9989691972732544,grad_norm: 0.9680548094732977, iteration: 176267
loss: 0.9741113185882568,grad_norm: 0.9745661944152308, iteration: 176268
loss: 1.0366390943527222,grad_norm: 0.9999991012522801, iteration: 176269
loss: 1.004714846611023,grad_norm: 0.8539259417990506, iteration: 176270
loss: 1.0338187217712402,grad_norm: 0.8547185684822826, iteration: 176271
loss: 1.0399225950241089,grad_norm: 0.9999991170515345, iteration: 176272
loss: 0.9916973114013672,grad_norm: 0.9767441792490248, iteration: 176273
loss: 1.0014209747314453,grad_norm: 0.9999991354788934, iteration: 176274
loss: 0.960913896560669,grad_norm: 0.9999991666822259, iteration: 176275
loss: 1.0229460000991821,grad_norm: 0.9999991275999598, iteration: 176276
loss: 0.9931517243385315,grad_norm: 0.9016187902720667, iteration: 176277
loss: 0.974913477897644,grad_norm: 0.8149712437778632, iteration: 176278
loss: 0.9889922738075256,grad_norm: 0.9215996115639418, iteration: 176279
loss: 0.9938582181930542,grad_norm: 0.9999994970144501, iteration: 176280
loss: 0.9654145836830139,grad_norm: 0.9999990736718725, iteration: 176281
loss: 0.9778804779052734,grad_norm: 0.9492598914864483, iteration: 176282
loss: 0.9973142743110657,grad_norm: 0.999999032290325, iteration: 176283
loss: 0.9443727731704712,grad_norm: 0.9999991096619754, iteration: 176284
loss: 0.9695869088172913,grad_norm: 0.9375154783725047, iteration: 176285
loss: 1.0029629468917847,grad_norm: 0.7448091135069256, iteration: 176286
loss: 0.9727900624275208,grad_norm: 0.9766612767551324, iteration: 176287
loss: 1.0025572776794434,grad_norm: 0.8434250935292733, iteration: 176288
loss: 0.9681426286697388,grad_norm: 0.9908183329132705, iteration: 176289
loss: 0.9539092183113098,grad_norm: 0.9999991850062213, iteration: 176290
loss: 1.0079823732376099,grad_norm: 0.9999990652413919, iteration: 176291
loss: 1.039103627204895,grad_norm: 0.9999996555167493, iteration: 176292
loss: 1.0078519582748413,grad_norm: 0.8226844214799229, iteration: 176293
loss: 1.0000046491622925,grad_norm: 0.921751179076824, iteration: 176294
loss: 1.0270501375198364,grad_norm: 0.9863089002218131, iteration: 176295
loss: 1.0111291408538818,grad_norm: 0.8715996713652447, iteration: 176296
loss: 1.0460877418518066,grad_norm: 0.9999995171737468, iteration: 176297
loss: 1.0200181007385254,grad_norm: 0.9359077906407167, iteration: 176298
loss: 0.9872573614120483,grad_norm: 0.7743546747344185, iteration: 176299
loss: 0.9932503700256348,grad_norm: 0.9500459475025448, iteration: 176300
loss: 1.0087398290634155,grad_norm: 0.8845565289752886, iteration: 176301
loss: 1.0037808418273926,grad_norm: 0.8523618591115523, iteration: 176302
loss: 1.0410239696502686,grad_norm: 0.9999992702528067, iteration: 176303
loss: 0.9987649321556091,grad_norm: 0.9999990538360272, iteration: 176304
loss: 1.009757399559021,grad_norm: 0.9857391615357292, iteration: 176305
loss: 0.9998739957809448,grad_norm: 0.9999992219798985, iteration: 176306
loss: 1.0634262561798096,grad_norm: 0.9999994219181633, iteration: 176307
loss: 1.005268931388855,grad_norm: 0.9951866995793478, iteration: 176308
loss: 1.0046802759170532,grad_norm: 0.9999991512855636, iteration: 176309
loss: 1.0391350984573364,grad_norm: 0.9907605750572221, iteration: 176310
loss: 1.0221092700958252,grad_norm: 0.999999044379129, iteration: 176311
loss: 0.9910449981689453,grad_norm: 0.9999991910761983, iteration: 176312
loss: 1.0262317657470703,grad_norm: 0.9999991243758785, iteration: 176313
loss: 0.9713083505630493,grad_norm: 0.9959034272972012, iteration: 176314
loss: 1.0144593715667725,grad_norm: 0.9999991054194347, iteration: 176315
loss: 0.9804710745811462,grad_norm: 0.9999991560515483, iteration: 176316
loss: 1.0065914392471313,grad_norm: 0.866935183834008, iteration: 176317
loss: 1.0086840391159058,grad_norm: 0.9999991320028186, iteration: 176318
loss: 1.0278704166412354,grad_norm: 0.9999990974342104, iteration: 176319
loss: 1.0011346340179443,grad_norm: 0.9999990929041362, iteration: 176320
loss: 0.9702526926994324,grad_norm: 0.9999990819110357, iteration: 176321
loss: 1.0216807126998901,grad_norm: 0.9276323369189349, iteration: 176322
loss: 1.0162113904953003,grad_norm: 0.8874038148344418, iteration: 176323
loss: 1.006082534790039,grad_norm: 0.9216248124671731, iteration: 176324
loss: 0.9892155528068542,grad_norm: 0.8341415132363128, iteration: 176325
loss: 0.9556821584701538,grad_norm: 0.9719775758015867, iteration: 176326
loss: 0.9870902895927429,grad_norm: 0.857794664060624, iteration: 176327
loss: 1.0017917156219482,grad_norm: 0.999999268279887, iteration: 176328
loss: 1.0125792026519775,grad_norm: 0.8622724257043634, iteration: 176329
loss: 1.0138992071151733,grad_norm: 0.999999892114257, iteration: 176330
loss: 1.0281468629837036,grad_norm: 0.9999995443841143, iteration: 176331
loss: 1.0693391561508179,grad_norm: 0.9999991878015165, iteration: 176332
loss: 0.9762970805168152,grad_norm: 0.8014440715158815, iteration: 176333
loss: 1.0019315481185913,grad_norm: 0.9792347747173764, iteration: 176334
loss: 0.9966685175895691,grad_norm: 0.9003285612068413, iteration: 176335
loss: 0.9892526865005493,grad_norm: 0.9178899202627707, iteration: 176336
loss: 0.9829412698745728,grad_norm: 0.9999990492190645, iteration: 176337
loss: 1.0254937410354614,grad_norm: 0.9454171819051361, iteration: 176338
loss: 0.9892293214797974,grad_norm: 0.8592838358575395, iteration: 176339
loss: 1.0226222276687622,grad_norm: 0.9999991118141435, iteration: 176340
loss: 1.0080476999282837,grad_norm: 0.900195570214928, iteration: 176341
loss: 1.0081617832183838,grad_norm: 0.9999991682166836, iteration: 176342
loss: 0.9998658299446106,grad_norm: 0.999999151802897, iteration: 176343
loss: 1.0115363597869873,grad_norm: 0.978594439902226, iteration: 176344
loss: 1.0054399967193604,grad_norm: 0.8180087921353594, iteration: 176345
loss: 0.9528460502624512,grad_norm: 0.9999990697949149, iteration: 176346
loss: 1.0142261981964111,grad_norm: 0.9654227523878433, iteration: 176347
loss: 0.9446693658828735,grad_norm: 0.999999073613608, iteration: 176348
loss: 1.0076826810836792,grad_norm: 0.8522793555599706, iteration: 176349
loss: 1.0290764570236206,grad_norm: 0.9687235196508825, iteration: 176350
loss: 0.9688138961791992,grad_norm: 0.9999989535156026, iteration: 176351
loss: 1.0027295351028442,grad_norm: 0.9999991002640491, iteration: 176352
loss: 1.0295555591583252,grad_norm: 0.9785616247406935, iteration: 176353
loss: 0.9810920357704163,grad_norm: 0.8394819872904093, iteration: 176354
loss: 0.9675177335739136,grad_norm: 0.9847038062584699, iteration: 176355
loss: 0.9735005497932434,grad_norm: 0.9675186578359636, iteration: 176356
loss: 0.986841082572937,grad_norm: 0.999999124486232, iteration: 176357
loss: 1.0430293083190918,grad_norm: 0.9999991581190424, iteration: 176358
loss: 0.997427761554718,grad_norm: 0.8749630499097193, iteration: 176359
loss: 1.0112314224243164,grad_norm: 0.9999991920610765, iteration: 176360
loss: 1.0272221565246582,grad_norm: 0.9999991650644917, iteration: 176361
loss: 0.9980887770652771,grad_norm: 0.8874347703086913, iteration: 176362
loss: 1.0040079355239868,grad_norm: 0.9999991227141446, iteration: 176363
loss: 0.9605443477630615,grad_norm: 0.816585276110763, iteration: 176364
loss: 1.0348068475723267,grad_norm: 0.9999991485712217, iteration: 176365
loss: 1.0191779136657715,grad_norm: 0.9999991499633348, iteration: 176366
loss: 0.9920980930328369,grad_norm: 0.9999999688688881, iteration: 176367
loss: 0.9947749972343445,grad_norm: 0.9999990903094016, iteration: 176368
loss: 0.9914018511772156,grad_norm: 0.962035012005194, iteration: 176369
loss: 1.010405421257019,grad_norm: 0.9999993553185228, iteration: 176370
loss: 0.9884393811225891,grad_norm: 0.9999990361782656, iteration: 176371
loss: 0.9847629070281982,grad_norm: 0.9999995218740804, iteration: 176372
loss: 1.001052737236023,grad_norm: 0.9999989440546784, iteration: 176373
loss: 0.9656967520713806,grad_norm: 0.903255350248228, iteration: 176374
loss: 0.9945750832557678,grad_norm: 0.9259332392688396, iteration: 176375
loss: 1.0367223024368286,grad_norm: 0.9005167096846899, iteration: 176376
loss: 0.9997540712356567,grad_norm: 0.878708643829967, iteration: 176377
loss: 0.9605657458305359,grad_norm: 0.8891657982992658, iteration: 176378
loss: 0.9810165166854858,grad_norm: 0.9086380352865627, iteration: 176379
loss: 0.987602710723877,grad_norm: 0.8660368665279798, iteration: 176380
loss: 0.9894885420799255,grad_norm: 0.931244728313718, iteration: 176381
loss: 1.0069941282272339,grad_norm: 0.9999992354814908, iteration: 176382
loss: 0.9682608246803284,grad_norm: 0.9999991302343437, iteration: 176383
loss: 0.9410344362258911,grad_norm: 0.9999990725200806, iteration: 176384
loss: 0.9825277328491211,grad_norm: 0.9781098783397373, iteration: 176385
loss: 0.9999754428863525,grad_norm: 0.7768016337259162, iteration: 176386
loss: 0.9520662426948547,grad_norm: 0.9999990332427124, iteration: 176387
loss: 1.0204339027404785,grad_norm: 0.9312069950789724, iteration: 176388
loss: 0.9937425255775452,grad_norm: 0.9999993737733318, iteration: 176389
loss: 0.9871037602424622,grad_norm: 0.9999993229814075, iteration: 176390
loss: 0.991838812828064,grad_norm: 0.9434384781797861, iteration: 176391
loss: 0.9784537553787231,grad_norm: 0.9999990174725875, iteration: 176392
loss: 1.0099990367889404,grad_norm: 0.8864866500233943, iteration: 176393
loss: 0.9860295653343201,grad_norm: 0.9908464709896827, iteration: 176394
loss: 1.0048751831054688,grad_norm: 0.9999990481998955, iteration: 176395
loss: 1.0478140115737915,grad_norm: 0.9999992278401066, iteration: 176396
loss: 0.9890894889831543,grad_norm: 0.9198901645558807, iteration: 176397
loss: 1.0119067430496216,grad_norm: 0.9999991325080876, iteration: 176398
loss: 1.0310041904449463,grad_norm: 0.9452840460708046, iteration: 176399
loss: 1.005271315574646,grad_norm: 0.9999991287236121, iteration: 176400
loss: 0.9911808371543884,grad_norm: 0.9999990258607347, iteration: 176401
loss: 0.9884092807769775,grad_norm: 0.9999989839409641, iteration: 176402
loss: 1.0020006895065308,grad_norm: 0.8804738628541592, iteration: 176403
loss: 0.9997704029083252,grad_norm: 0.9999990707723605, iteration: 176404
loss: 1.0289791822433472,grad_norm: 0.9850214485659364, iteration: 176405
loss: 1.022966742515564,grad_norm: 0.999999178459795, iteration: 176406
loss: 0.9880402088165283,grad_norm: 0.9999989817920035, iteration: 176407
loss: 0.9824231863021851,grad_norm: 0.9407875285945165, iteration: 176408
loss: 0.964383602142334,grad_norm: 0.7875771723425841, iteration: 176409
loss: 0.9662626385688782,grad_norm: 0.9855724390240761, iteration: 176410
loss: 0.9638746380805969,grad_norm: 0.9999993045009251, iteration: 176411
loss: 1.0366657972335815,grad_norm: 0.9999990694374876, iteration: 176412
loss: 0.9777057766914368,grad_norm: 0.9891490807378716, iteration: 176413
loss: 1.0064431428909302,grad_norm: 0.9999991096966097, iteration: 176414
loss: 1.0075544118881226,grad_norm: 0.999999120080975, iteration: 176415
loss: 1.0060619115829468,grad_norm: 0.9999993556312531, iteration: 176416
loss: 0.9812196493148804,grad_norm: 0.9999990779153146, iteration: 176417
loss: 0.973716676235199,grad_norm: 0.9999991642882174, iteration: 176418
loss: 1.0250622034072876,grad_norm: 0.9999990753064875, iteration: 176419
loss: 0.9876785278320312,grad_norm: 0.9668207202192124, iteration: 176420
loss: 0.9922937154769897,grad_norm: 0.9384836239092474, iteration: 176421
loss: 1.0131438970565796,grad_norm: 0.9733596962864548, iteration: 176422
loss: 1.0297013521194458,grad_norm: 0.969059960324483, iteration: 176423
loss: 0.9891197085380554,grad_norm: 0.9452648987550835, iteration: 176424
loss: 0.9948906898498535,grad_norm: 0.9626403227813612, iteration: 176425
loss: 0.9869950413703918,grad_norm: 0.9999991768943612, iteration: 176426
loss: 1.0062931776046753,grad_norm: 0.9045574956770697, iteration: 176427
loss: 1.011355996131897,grad_norm: 0.8221244984000444, iteration: 176428
loss: 0.9787149429321289,grad_norm: 0.8893197128600481, iteration: 176429
loss: 0.9909552335739136,grad_norm: 0.9169718578405623, iteration: 176430
loss: 1.0029035806655884,grad_norm: 0.9080129423068957, iteration: 176431
loss: 1.0064940452575684,grad_norm: 0.8827002135304187, iteration: 176432
loss: 0.9394177198410034,grad_norm: 0.999999414199021, iteration: 176433
loss: 0.9492468237876892,grad_norm: 0.9681330147525931, iteration: 176434
loss: 1.0094507932662964,grad_norm: 0.8831588450072938, iteration: 176435
loss: 1.0348503589630127,grad_norm: 0.980627682339711, iteration: 176436
loss: 1.0098410844802856,grad_norm: 0.999999040864564, iteration: 176437
loss: 0.9853573441505432,grad_norm: 0.8640604261483265, iteration: 176438
loss: 1.020875096321106,grad_norm: 0.9999991960739117, iteration: 176439
loss: 0.9979076385498047,grad_norm: 0.9062535275801463, iteration: 176440
loss: 0.9761664867401123,grad_norm: 0.9999991412097505, iteration: 176441
loss: 0.9851804971694946,grad_norm: 0.9999991314803409, iteration: 176442
loss: 0.9582619071006775,grad_norm: 0.9549922006671899, iteration: 176443
loss: 0.9784860610961914,grad_norm: 0.8756874033964932, iteration: 176444
loss: 0.9865058660507202,grad_norm: 0.9450837218006926, iteration: 176445
loss: 1.0351742506027222,grad_norm: 0.9999991038231731, iteration: 176446
loss: 1.0015994310379028,grad_norm: 0.9889276503758558, iteration: 176447
loss: 0.9988809823989868,grad_norm: 0.7507664749421956, iteration: 176448
loss: 1.019585132598877,grad_norm: 0.9506624176369588, iteration: 176449
loss: 0.9999077916145325,grad_norm: 0.8748170166013282, iteration: 176450
loss: 0.9797210693359375,grad_norm: 0.9088394767670191, iteration: 176451
loss: 1.0115339756011963,grad_norm: 0.999999154508847, iteration: 176452
loss: 1.0131944417953491,grad_norm: 0.8677392018557094, iteration: 176453
loss: 0.9952929615974426,grad_norm: 0.9999989962633116, iteration: 176454
loss: 0.9621613621711731,grad_norm: 0.8495616915356518, iteration: 176455
loss: 1.0177041292190552,grad_norm: 0.9999991616706474, iteration: 176456
loss: 1.01144540309906,grad_norm: 0.9929841167044521, iteration: 176457
loss: 0.9878457188606262,grad_norm: 0.8922454605402856, iteration: 176458
loss: 1.0128768682479858,grad_norm: 0.999999225705414, iteration: 176459
loss: 0.9969040155410767,grad_norm: 0.9441313582967727, iteration: 176460
loss: 1.016836404800415,grad_norm: 0.974155293091373, iteration: 176461
loss: 0.9529426097869873,grad_norm: 0.9999990992912122, iteration: 176462
loss: 1.0387585163116455,grad_norm: 0.8906771230339893, iteration: 176463
loss: 1.0134512186050415,grad_norm: 0.999999160563847, iteration: 176464
loss: 1.004367709159851,grad_norm: 0.9999991978108727, iteration: 176465
loss: 1.009898066520691,grad_norm: 0.9999991283389433, iteration: 176466
loss: 1.0136879682540894,grad_norm: 0.919909558889501, iteration: 176467
loss: 1.0392184257507324,grad_norm: 0.8119480014941999, iteration: 176468
loss: 0.9921587109565735,grad_norm: 0.999998957230418, iteration: 176469
loss: 1.0236783027648926,grad_norm: 0.9999991512512395, iteration: 176470
loss: 1.0281977653503418,grad_norm: 0.9220984528341959, iteration: 176471
loss: 1.0140751600265503,grad_norm: 0.9438031156693972, iteration: 176472
loss: 0.9704323410987854,grad_norm: 0.9563796130818718, iteration: 176473
loss: 1.0105878114700317,grad_norm: 0.9231717303206051, iteration: 176474
loss: 1.0166853666305542,grad_norm: 0.844180264850591, iteration: 176475
loss: 1.0039781332015991,grad_norm: 0.8731230105391476, iteration: 176476
loss: 1.0376442670822144,grad_norm: 0.9999993882152078, iteration: 176477
loss: 0.9903469681739807,grad_norm: 0.9653201548003796, iteration: 176478
loss: 0.9774782061576843,grad_norm: 0.9999990520480518, iteration: 176479
loss: 1.0161603689193726,grad_norm: 0.8495760757666391, iteration: 176480
loss: 1.000160813331604,grad_norm: 0.9490897280235787, iteration: 176481
loss: 1.0253013372421265,grad_norm: 0.7184339898481984, iteration: 176482
loss: 1.0122512578964233,grad_norm: 0.8846432768845374, iteration: 176483
loss: 1.0112409591674805,grad_norm: 0.9961026431701663, iteration: 176484
loss: 1.0096553564071655,grad_norm: 0.8154966579684712, iteration: 176485
loss: 0.9845535159111023,grad_norm: 0.9970774930223433, iteration: 176486
loss: 1.0001150369644165,grad_norm: 0.9785922013768167, iteration: 176487
loss: 1.0181362628936768,grad_norm: 0.9683112165250264, iteration: 176488
loss: 0.99949049949646,grad_norm: 0.9999991266076489, iteration: 176489
loss: 0.998576283454895,grad_norm: 0.9947518992090456, iteration: 176490
loss: 0.9933848977088928,grad_norm: 0.9545919461444773, iteration: 176491
loss: 1.0854065418243408,grad_norm: 0.9999997447979295, iteration: 176492
loss: 0.9917153716087341,grad_norm: 0.9153734084234014, iteration: 176493
loss: 1.0046080350875854,grad_norm: 0.9718968944435702, iteration: 176494
loss: 0.9839916229248047,grad_norm: 0.9119624096975294, iteration: 176495
loss: 1.0847631692886353,grad_norm: 0.9999991669749517, iteration: 176496
loss: 0.9853149056434631,grad_norm: 0.9954059512325576, iteration: 176497
loss: 1.0200834274291992,grad_norm: 0.9999991694945155, iteration: 176498
loss: 0.9677101373672485,grad_norm: 0.946646327600354, iteration: 176499
loss: 0.9843804836273193,grad_norm: 0.913810626512958, iteration: 176500
loss: 1.0231552124023438,grad_norm: 0.9999990198132722, iteration: 176501
loss: 1.0031145811080933,grad_norm: 0.9723736296892861, iteration: 176502
loss: 1.0655217170715332,grad_norm: 0.9999996068709304, iteration: 176503
loss: 1.009461522102356,grad_norm: 0.999999094739704, iteration: 176504
loss: 0.9920824766159058,grad_norm: 0.9999991322598, iteration: 176505
loss: 0.9781270027160645,grad_norm: 0.880634246185039, iteration: 176506
loss: 1.010039210319519,grad_norm: 0.9484446434584382, iteration: 176507
loss: 1.0028003454208374,grad_norm: 0.9999993086573575, iteration: 176508
loss: 0.9660743474960327,grad_norm: 0.9999990645805975, iteration: 176509
loss: 1.0031938552856445,grad_norm: 0.930916744194051, iteration: 176510
loss: 1.004660725593567,grad_norm: 0.9541031539797326, iteration: 176511
loss: 1.0019738674163818,grad_norm: 0.9999991590775433, iteration: 176512
loss: 1.0966898202896118,grad_norm: 0.9999997320395663, iteration: 176513
loss: 0.9945064783096313,grad_norm: 0.9584390278485534, iteration: 176514
loss: 0.9857069253921509,grad_norm: 0.9999992301998505, iteration: 176515
loss: 1.0563098192214966,grad_norm: 0.9999996372931479, iteration: 176516
loss: 1.047680139541626,grad_norm: 0.9999997977605016, iteration: 176517
loss: 1.0225356817245483,grad_norm: 0.9741742311486037, iteration: 176518
loss: 1.008285403251648,grad_norm: 0.9727674712148313, iteration: 176519
loss: 1.03639817237854,grad_norm: 0.9188842428423996, iteration: 176520
loss: 1.0084248781204224,grad_norm: 0.9632429326933774, iteration: 176521
loss: 1.0617457628250122,grad_norm: 0.9629496363720642, iteration: 176522
loss: 1.0027040243148804,grad_norm: 0.9999991095563046, iteration: 176523
loss: 1.0048521757125854,grad_norm: 0.9120419887741594, iteration: 176524
loss: 0.9798002243041992,grad_norm: 0.9999990327070598, iteration: 176525
loss: 1.0164240598678589,grad_norm: 0.9999990349974832, iteration: 176526
loss: 0.9727072715759277,grad_norm: 0.9292053532709726, iteration: 176527
loss: 1.016539454460144,grad_norm: 0.9999992627229726, iteration: 176528
loss: 0.984620213508606,grad_norm: 0.9079716778503558, iteration: 176529
loss: 1.0053374767303467,grad_norm: 0.8526920343659736, iteration: 176530
loss: 0.9791622757911682,grad_norm: 0.9999991288635511, iteration: 176531
loss: 1.025863766670227,grad_norm: 0.8364032301752745, iteration: 176532
loss: 0.9475375413894653,grad_norm: 0.9017485095609812, iteration: 176533
loss: 1.0331047773361206,grad_norm: 0.9999991026502318, iteration: 176534
loss: 1.0033633708953857,grad_norm: 0.8774834137821989, iteration: 176535
loss: 1.0182254314422607,grad_norm: 0.9517061313642701, iteration: 176536
loss: 0.9715143442153931,grad_norm: 0.9377044945637787, iteration: 176537
loss: 0.9830313324928284,grad_norm: 0.9121988198434233, iteration: 176538
loss: 0.9981449842453003,grad_norm: 0.7880359265484573, iteration: 176539
loss: 1.0210025310516357,grad_norm: 0.9999992439463585, iteration: 176540
loss: 0.9919191598892212,grad_norm: 0.999998985361676, iteration: 176541
loss: 1.0091133117675781,grad_norm: 0.9999991414168529, iteration: 176542
loss: 1.080286979675293,grad_norm: 0.9999993003563448, iteration: 176543
loss: 1.0059840679168701,grad_norm: 0.891310181523805, iteration: 176544
loss: 0.9817904233932495,grad_norm: 0.999999229876466, iteration: 176545
loss: 1.0015162229537964,grad_norm: 0.9999991150148297, iteration: 176546
loss: 1.0534136295318604,grad_norm: 0.9999999093160729, iteration: 176547
loss: 1.0043959617614746,grad_norm: 0.9730813101078923, iteration: 176548
loss: 1.0043095350265503,grad_norm: 0.8424738081816252, iteration: 176549
loss: 0.9740579724311829,grad_norm: 0.9999990222252394, iteration: 176550
loss: 1.017379641532898,grad_norm: 0.9892512293686936, iteration: 176551
loss: 0.9761402606964111,grad_norm: 0.9962370419567982, iteration: 176552
loss: 1.0042781829833984,grad_norm: 0.9999991811076369, iteration: 176553
loss: 1.0175215005874634,grad_norm: 0.9999991330498925, iteration: 176554
loss: 0.9761993885040283,grad_norm: 0.999999085443453, iteration: 176555
loss: 1.0536081790924072,grad_norm: 0.9999993790444226, iteration: 176556
loss: 1.0247611999511719,grad_norm: 0.9999990903309367, iteration: 176557
loss: 1.0033904314041138,grad_norm: 0.9815163131879292, iteration: 176558
loss: 0.983456552028656,grad_norm: 0.9999990992495932, iteration: 176559
loss: 0.941469669342041,grad_norm: 0.999999219214965, iteration: 176560
loss: 1.0267359018325806,grad_norm: 0.9969171813726068, iteration: 176561
loss: 0.9930850267410278,grad_norm: 0.9999991579568188, iteration: 176562
loss: 0.9760133624076843,grad_norm: 0.9999991944066388, iteration: 176563
loss: 1.0318629741668701,grad_norm: 0.999999151748577, iteration: 176564
loss: 0.9897935390472412,grad_norm: 0.999999207784186, iteration: 176565
loss: 1.0027961730957031,grad_norm: 0.9999990764593291, iteration: 176566
loss: 0.9856847524642944,grad_norm: 0.9999993162617301, iteration: 176567
loss: 0.9860420227050781,grad_norm: 0.8607742510290548, iteration: 176568
loss: 1.0762097835540771,grad_norm: 0.9999999118722194, iteration: 176569
loss: 0.9991735219955444,grad_norm: 0.9054272414190057, iteration: 176570
loss: 1.0054377317428589,grad_norm: 0.999999209688064, iteration: 176571
loss: 0.9828212261199951,grad_norm: 0.9999991467863909, iteration: 176572
loss: 1.0058488845825195,grad_norm: 0.9067086112921201, iteration: 176573
loss: 0.9619930386543274,grad_norm: 0.916642790323365, iteration: 176574
loss: 1.0090365409851074,grad_norm: 0.9999991222894808, iteration: 176575
loss: 0.9886568188667297,grad_norm: 0.8920827025541112, iteration: 176576
loss: 1.0097848176956177,grad_norm: 0.978240860696399, iteration: 176577
loss: 0.9794965386390686,grad_norm: 0.8816661349918269, iteration: 176578
loss: 1.016646385192871,grad_norm: 0.8165729970373957, iteration: 176579
loss: 0.9500632286071777,grad_norm: 0.9999991932474206, iteration: 176580
loss: 1.0032085180282593,grad_norm: 0.9999991496377483, iteration: 176581
loss: 0.9754431247711182,grad_norm: 0.9735388923762857, iteration: 176582
loss: 0.9762425422668457,grad_norm: 0.999999201679075, iteration: 176583
loss: 0.9869160056114197,grad_norm: 0.9999990146306184, iteration: 176584
loss: 1.0035048723220825,grad_norm: 0.999999051975529, iteration: 176585
loss: 1.0048270225524902,grad_norm: 0.987123245687231, iteration: 176586
loss: 1.028537392616272,grad_norm: 0.9163841059461048, iteration: 176587
loss: 1.0171090364456177,grad_norm: 0.9817865645432353, iteration: 176588
loss: 1.0024462938308716,grad_norm: 0.9520167158431355, iteration: 176589
loss: 1.034995436668396,grad_norm: 0.8850895955942165, iteration: 176590
loss: 0.9972575306892395,grad_norm: 0.9783191599324674, iteration: 176591
loss: 0.9959211349487305,grad_norm: 0.96412128429185, iteration: 176592
loss: 0.9938591718673706,grad_norm: 0.9692131634779141, iteration: 176593
loss: 0.9864277839660645,grad_norm: 0.9276710338100677, iteration: 176594
loss: 1.0960166454315186,grad_norm: 0.999999752780691, iteration: 176595
loss: 0.969855010509491,grad_norm: 0.9999991215459438, iteration: 176596
loss: 0.9954524040222168,grad_norm: 0.9999990054432228, iteration: 176597
loss: 0.999267578125,grad_norm: 0.8626161660377575, iteration: 176598
loss: 0.9884073138237,grad_norm: 0.956560186146622, iteration: 176599
loss: 1.0617822408676147,grad_norm: 0.9999990444269198, iteration: 176600
loss: 0.9549014568328857,grad_norm: 0.8718716021851014, iteration: 176601
loss: 1.0214837789535522,grad_norm: 0.8688381405748978, iteration: 176602
loss: 1.008298397064209,grad_norm: 0.9102722714845688, iteration: 176603
loss: 1.011084794998169,grad_norm: 0.9119184943142852, iteration: 176604
loss: 0.9849214553833008,grad_norm: 0.8973081588403626, iteration: 176605
loss: 0.9989864230155945,grad_norm: 0.9999990457076722, iteration: 176606
loss: 0.9745689034461975,grad_norm: 0.9999991091025353, iteration: 176607
loss: 0.997995913028717,grad_norm: 0.8843152999299233, iteration: 176608
loss: 1.0006052255630493,grad_norm: 0.9899328912337644, iteration: 176609
loss: 1.0004500150680542,grad_norm: 0.9986419751474664, iteration: 176610
loss: 1.0278834104537964,grad_norm: 0.9999991610096506, iteration: 176611
loss: 0.9830847382545471,grad_norm: 0.8953755088895619, iteration: 176612
loss: 1.008245825767517,grad_norm: 0.9805753142492919, iteration: 176613
loss: 0.9924731254577637,grad_norm: 0.9665849543910191, iteration: 176614
loss: 1.0048305988311768,grad_norm: 0.9625971695535824, iteration: 176615
loss: 1.0179575681686401,grad_norm: 0.8969111887747703, iteration: 176616
loss: 1.0148171186447144,grad_norm: 0.9363840564993499, iteration: 176617
loss: 1.0054773092269897,grad_norm: 0.9999990857605365, iteration: 176618
loss: 0.9978638887405396,grad_norm: 0.9999989609154369, iteration: 176619
loss: 1.0173301696777344,grad_norm: 0.9158600999015986, iteration: 176620
loss: 1.0285742282867432,grad_norm: 0.999999054172337, iteration: 176621
loss: 1.0404975414276123,grad_norm: 0.9999990910987642, iteration: 176622
loss: 0.9954740405082703,grad_norm: 0.999998992093451, iteration: 176623
loss: 1.0003310441970825,grad_norm: 0.9999991628749489, iteration: 176624
loss: 1.0056980848312378,grad_norm: 0.9999990961967143, iteration: 176625
loss: 0.9755275845527649,grad_norm: 0.9999990067830796, iteration: 176626
loss: 0.9841870069503784,grad_norm: 0.9713448869606072, iteration: 176627
loss: 1.0077629089355469,grad_norm: 0.9999991007906092, iteration: 176628
loss: 0.9858791828155518,grad_norm: 0.9999991800421116, iteration: 176629
loss: 0.9938493967056274,grad_norm: 0.8686017092778517, iteration: 176630
loss: 0.9861239790916443,grad_norm: 0.8955480529005162, iteration: 176631
loss: 1.0027631521224976,grad_norm: 0.7733125171852544, iteration: 176632
loss: 1.0303024053573608,grad_norm: 0.8970558926154266, iteration: 176633
loss: 1.0123437643051147,grad_norm: 0.9999992021760339, iteration: 176634
loss: 0.9849510788917542,grad_norm: 0.84610389305801, iteration: 176635
loss: 0.9917044043540955,grad_norm: 0.9999992659781213, iteration: 176636
loss: 1.020868182182312,grad_norm: 0.9999991032532782, iteration: 176637
loss: 1.012309193611145,grad_norm: 0.9999991649227433, iteration: 176638
loss: 0.9945113062858582,grad_norm: 0.8994367219119324, iteration: 176639
loss: 0.9611806869506836,grad_norm: 0.920948188364247, iteration: 176640
loss: 0.9852908849716187,grad_norm: 0.8955930302267773, iteration: 176641
loss: 0.9943463802337646,grad_norm: 0.9632300081802558, iteration: 176642
loss: 1.0112097263336182,grad_norm: 0.9999991767814708, iteration: 176643
loss: 1.0092988014221191,grad_norm: 0.999999158706179, iteration: 176644
loss: 1.0264533758163452,grad_norm: 0.9999990942005871, iteration: 176645
loss: 0.9904410243034363,grad_norm: 0.9180777405856428, iteration: 176646
loss: 1.025227665901184,grad_norm: 0.8029866743718218, iteration: 176647
loss: 1.0100640058517456,grad_norm: 0.9999993074218968, iteration: 176648
loss: 0.9861496686935425,grad_norm: 0.999999197230195, iteration: 176649
loss: 0.9772657752037048,grad_norm: 0.87604142535625, iteration: 176650
loss: 1.0287500619888306,grad_norm: 0.9999990534393026, iteration: 176651
loss: 1.0095590353012085,grad_norm: 0.9999991923300338, iteration: 176652
loss: 1.0157971382141113,grad_norm: 0.9469448389189152, iteration: 176653
loss: 1.0015757083892822,grad_norm: 0.8861495592708427, iteration: 176654
loss: 0.9779111742973328,grad_norm: 0.9999992038968504, iteration: 176655
loss: 0.9629783034324646,grad_norm: 0.9999993252028603, iteration: 176656
loss: 0.9878472685813904,grad_norm: 0.9109593753563664, iteration: 176657
loss: 0.9818823933601379,grad_norm: 0.9999990799925523, iteration: 176658
loss: 0.9984064698219299,grad_norm: 0.9999992044205748, iteration: 176659
loss: 0.9885472059249878,grad_norm: 0.9999990583371523, iteration: 176660
loss: 0.9935311675071716,grad_norm: 0.9999991598294411, iteration: 176661
loss: 0.9453614950180054,grad_norm: 0.9758683305665848, iteration: 176662
loss: 1.00680410861969,grad_norm: 0.9999990681955641, iteration: 176663
loss: 0.976708710193634,grad_norm: 0.999999059361588, iteration: 176664
loss: 0.9884268045425415,grad_norm: 0.952461512835108, iteration: 176665
loss: 1.0103131532669067,grad_norm: 0.9999992231433646, iteration: 176666
loss: 1.0332413911819458,grad_norm: 0.9999990498753825, iteration: 176667
loss: 1.0215380191802979,grad_norm: 0.9482563856985976, iteration: 176668
loss: 0.9688447713851929,grad_norm: 0.879583379878754, iteration: 176669
loss: 1.0376315116882324,grad_norm: 0.9999989731221364, iteration: 176670
loss: 1.0106959342956543,grad_norm: 0.9999989914838437, iteration: 176671
loss: 1.0116755962371826,grad_norm: 0.8881218945543289, iteration: 176672
loss: 0.9939029216766357,grad_norm: 0.999999304119922, iteration: 176673
loss: 0.9849351048469543,grad_norm: 0.8995178847582509, iteration: 176674
loss: 1.0203675031661987,grad_norm: 0.9122890981509735, iteration: 176675
loss: 0.9576294422149658,grad_norm: 0.9572773432023352, iteration: 176676
loss: 0.976509690284729,grad_norm: 0.9417916253469582, iteration: 176677
loss: 1.0351252555847168,grad_norm: 0.9999992399521755, iteration: 176678
loss: 1.0155880451202393,grad_norm: 0.999999250528752, iteration: 176679
loss: 0.9994751214981079,grad_norm: 0.8535632725867218, iteration: 176680
loss: 0.9705500602722168,grad_norm: 0.9175138276227689, iteration: 176681
loss: 0.9950563907623291,grad_norm: 0.9037783440888051, iteration: 176682
loss: 1.0046406984329224,grad_norm: 0.9999991121779821, iteration: 176683
loss: 0.9768619537353516,grad_norm: 0.9999990820179083, iteration: 176684
loss: 1.0125644207000732,grad_norm: 0.8963114566597211, iteration: 176685
loss: 1.013052225112915,grad_norm: 0.9440983661913745, iteration: 176686
loss: 0.9917765855789185,grad_norm: 0.9412431553361045, iteration: 176687
loss: 0.9891657829284668,grad_norm: 0.9300648184728805, iteration: 176688
loss: 1.0279725790023804,grad_norm: 0.9411743391736797, iteration: 176689
loss: 1.01492178440094,grad_norm: 0.9999990996328711, iteration: 176690
loss: 0.9805302619934082,grad_norm: 0.9940202061071858, iteration: 176691
loss: 0.9906853437423706,grad_norm: 0.9999990696086622, iteration: 176692
loss: 1.0515168905258179,grad_norm: 0.9999989631487487, iteration: 176693
loss: 1.037259817123413,grad_norm: 0.9999993067167247, iteration: 176694
loss: 0.9738487005233765,grad_norm: 0.9881243022929007, iteration: 176695
loss: 1.0066155195236206,grad_norm: 0.9999990937436489, iteration: 176696
loss: 0.96630859375,grad_norm: 0.9784267431711836, iteration: 176697
loss: 0.9696268439292908,grad_norm: 0.9999992093423254, iteration: 176698
loss: 0.9905270934104919,grad_norm: 0.8517430910003918, iteration: 176699
loss: 0.9992647767066956,grad_norm: 0.8374794604431686, iteration: 176700
loss: 0.9528654217720032,grad_norm: 0.8452856314566927, iteration: 176701
loss: 0.9590626955032349,grad_norm: 0.817426397072832, iteration: 176702
loss: 1.0128757953643799,grad_norm: 0.9999992932596489, iteration: 176703
loss: 0.9611374139785767,grad_norm: 0.9267576509708272, iteration: 176704
loss: 1.0255448818206787,grad_norm: 0.9096960517522295, iteration: 176705
loss: 0.9707208871841431,grad_norm: 0.9999991633249811, iteration: 176706
loss: 0.9619361758232117,grad_norm: 0.963239289970565, iteration: 176707
loss: 1.0037568807601929,grad_norm: 0.7665688769550595, iteration: 176708
loss: 1.0312068462371826,grad_norm: 0.8655715135008591, iteration: 176709
loss: 0.9862374067306519,grad_norm: 0.9397067175417034, iteration: 176710
loss: 1.0197789669036865,grad_norm: 0.8519144161063349, iteration: 176711
loss: 0.9993691444396973,grad_norm: 0.9999991131317791, iteration: 176712
loss: 1.023268699645996,grad_norm: 0.9999991796773966, iteration: 176713
loss: 1.0213847160339355,grad_norm: 0.9712704367357611, iteration: 176714
loss: 0.9984136819839478,grad_norm: 0.999999139328439, iteration: 176715
loss: 1.0564159154891968,grad_norm: 0.9999990100163255, iteration: 176716
loss: 1.0644184350967407,grad_norm: 0.9999996791568527, iteration: 176717
loss: 0.9958345293998718,grad_norm: 0.9556848691343406, iteration: 176718
loss: 1.021177053451538,grad_norm: 0.9999991075042115, iteration: 176719
loss: 0.9961156249046326,grad_norm: 0.9827229669434497, iteration: 176720
loss: 1.0026367902755737,grad_norm: 0.8836804034230142, iteration: 176721
loss: 1.027220368385315,grad_norm: 0.9999993061394281, iteration: 176722
loss: 0.9907090663909912,grad_norm: 0.7915786550741494, iteration: 176723
loss: 1.0166267156600952,grad_norm: 0.9617415674542609, iteration: 176724
loss: 0.9750403761863708,grad_norm: 0.988120837175101, iteration: 176725
loss: 0.9912348985671997,grad_norm: 0.9999991011058907, iteration: 176726
loss: 1.0090186595916748,grad_norm: 0.9999991104533971, iteration: 176727
loss: 0.9949103593826294,grad_norm: 0.999999091151568, iteration: 176728
loss: 0.9737445712089539,grad_norm: 0.9154565230289852, iteration: 176729
loss: 0.9772639870643616,grad_norm: 0.981510252260363, iteration: 176730
loss: 1.0135889053344727,grad_norm: 0.9397333764270884, iteration: 176731
loss: 1.004658579826355,grad_norm: 0.9999990551689937, iteration: 176732
loss: 1.011966586112976,grad_norm: 0.9999991916142063, iteration: 176733
loss: 0.9913969039916992,grad_norm: 0.8692517836149263, iteration: 176734
loss: 0.9994896650314331,grad_norm: 0.9999991509624608, iteration: 176735
loss: 0.9950894713401794,grad_norm: 0.9689837670372347, iteration: 176736
loss: 1.0129034519195557,grad_norm: 0.9999993040073587, iteration: 176737
loss: 0.9911330342292786,grad_norm: 0.999999049814326, iteration: 176738
loss: 1.0322643518447876,grad_norm: 0.9999991600185194, iteration: 176739
loss: 0.9821304678916931,grad_norm: 0.7816229490017276, iteration: 176740
loss: 0.9903249144554138,grad_norm: 0.9327869238757321, iteration: 176741
loss: 0.9835851192474365,grad_norm: 0.9999989925250058, iteration: 176742
loss: 0.9888097643852234,grad_norm: 0.9906169647090087, iteration: 176743
loss: 1.0056178569793701,grad_norm: 0.939950350693217, iteration: 176744
loss: 0.9481245279312134,grad_norm: 0.9999991908660457, iteration: 176745
loss: 1.0398406982421875,grad_norm: 0.939339505044728, iteration: 176746
loss: 0.9827508330345154,grad_norm: 0.8842028748173385, iteration: 176747
loss: 0.9735257625579834,grad_norm: 0.9999991077014807, iteration: 176748
loss: 1.0032356977462769,grad_norm: 0.9999992286710382, iteration: 176749
loss: 0.9950994849205017,grad_norm: 0.999999090568775, iteration: 176750
loss: 1.021337866783142,grad_norm: 0.8738699805911306, iteration: 176751
loss: 1.0255897045135498,grad_norm: 0.9688688030121578, iteration: 176752
loss: 0.9843567609786987,grad_norm: 0.9596264826004391, iteration: 176753
loss: 1.0031567811965942,grad_norm: 0.9999992013540463, iteration: 176754
loss: 1.0197802782058716,grad_norm: 0.9863124068773537, iteration: 176755
loss: 1.0456913709640503,grad_norm: 0.9999989634470371, iteration: 176756
loss: 1.0066896677017212,grad_norm: 0.8528329268659569, iteration: 176757
loss: 0.9570949673652649,grad_norm: 0.9977109106656806, iteration: 176758
loss: 0.9650903940200806,grad_norm: 0.898588207229128, iteration: 176759
loss: 0.9907404780387878,grad_norm: 0.9776778664768988, iteration: 176760
loss: 1.0138955116271973,grad_norm: 0.999999091982684, iteration: 176761
loss: 0.9914411306381226,grad_norm: 0.8454092990148011, iteration: 176762
loss: 1.0175014734268188,grad_norm: 0.9867476795775997, iteration: 176763
loss: 1.0175373554229736,grad_norm: 0.818681233516557, iteration: 176764
loss: 1.0132604837417603,grad_norm: 0.9822151402701343, iteration: 176765
loss: 1.0111697912216187,grad_norm: 0.9503182729695835, iteration: 176766
loss: 0.9725550413131714,grad_norm: 0.9999991503669873, iteration: 176767
loss: 1.0162968635559082,grad_norm: 0.9810609314099042, iteration: 176768
loss: 0.9946912527084351,grad_norm: 0.9999990998231363, iteration: 176769
loss: 0.982574462890625,grad_norm: 0.9999991329341212, iteration: 176770
loss: 1.0015912055969238,grad_norm: 0.9999989443680202, iteration: 176771
loss: 1.0413671731948853,grad_norm: 0.999999711676317, iteration: 176772
loss: 1.0055011510849,grad_norm: 0.9999991107942613, iteration: 176773
loss: 0.9664604067802429,grad_norm: 0.8566805296415867, iteration: 176774
loss: 1.0066752433776855,grad_norm: 0.9999990658999902, iteration: 176775
loss: 1.022642970085144,grad_norm: 0.968734465502097, iteration: 176776
loss: 1.0091274976730347,grad_norm: 0.849821144586615, iteration: 176777
loss: 1.0407202243804932,grad_norm: 0.9855701101543684, iteration: 176778
loss: 1.006386399269104,grad_norm: 0.999998993086077, iteration: 176779
loss: 1.001183032989502,grad_norm: 0.8283712310708318, iteration: 176780
loss: 0.9785152673721313,grad_norm: 0.9146629934515499, iteration: 176781
loss: 1.0007994174957275,grad_norm: 0.999999139025156, iteration: 176782
loss: 0.9866359233856201,grad_norm: 0.9999991365869559, iteration: 176783
loss: 0.9900830984115601,grad_norm: 0.9999991460921147, iteration: 176784
loss: 0.9835780262947083,grad_norm: 0.8641751586715434, iteration: 176785
loss: 1.0585408210754395,grad_norm: 0.9999997024002399, iteration: 176786
loss: 1.0034091472625732,grad_norm: 0.999999268852222, iteration: 176787
loss: 1.004654884338379,grad_norm: 0.9999992673267277, iteration: 176788
loss: 1.0322129726409912,grad_norm: 0.9999992826592007, iteration: 176789
loss: 0.9934415221214294,grad_norm: 0.9999990943609371, iteration: 176790
loss: 1.0751038789749146,grad_norm: 0.999999514461815, iteration: 176791
loss: 1.0227421522140503,grad_norm: 0.9078863588566078, iteration: 176792
loss: 0.9747605323791504,grad_norm: 0.9999991462910628, iteration: 176793
loss: 1.0148570537567139,grad_norm: 0.9400586177185188, iteration: 176794
loss: 1.0210354328155518,grad_norm: 0.9999990505239497, iteration: 176795
loss: 1.002455472946167,grad_norm: 0.9999990540904482, iteration: 176796
loss: 1.0174082517623901,grad_norm: 0.9999990584941427, iteration: 176797
loss: 1.015807032585144,grad_norm: 0.9999991671871006, iteration: 176798
loss: 1.0402921438217163,grad_norm: 0.9999991836102052, iteration: 176799
loss: 1.0123220682144165,grad_norm: 0.999999065925857, iteration: 176800
loss: 0.9968874454498291,grad_norm: 0.8765880141526603, iteration: 176801
loss: 0.9917613863945007,grad_norm: 0.9999990530734921, iteration: 176802
loss: 0.9899938702583313,grad_norm: 0.9999991980541605, iteration: 176803
loss: 0.9906986951828003,grad_norm: 0.9999991784246215, iteration: 176804
loss: 0.9969207644462585,grad_norm: 0.9999993679684571, iteration: 176805
loss: 1.028477430343628,grad_norm: 0.9999991595748868, iteration: 176806
loss: 1.0050922632217407,grad_norm: 0.9068910364569669, iteration: 176807
loss: 0.9918451905250549,grad_norm: 0.999999069063403, iteration: 176808
loss: 0.9592341184616089,grad_norm: 0.8924325005068877, iteration: 176809
loss: 0.9848047494888306,grad_norm: 0.9999992320707883, iteration: 176810
loss: 1.0141810178756714,grad_norm: 0.9684808603994911, iteration: 176811
loss: 1.0086969137191772,grad_norm: 0.9496252610917117, iteration: 176812
loss: 0.9985039830207825,grad_norm: 0.9869462740185873, iteration: 176813
loss: 0.9997828602790833,grad_norm: 0.9999993082391914, iteration: 176814
loss: 1.0145018100738525,grad_norm: 0.9999993187445072, iteration: 176815
loss: 1.016142725944519,grad_norm: 0.9999992027750397, iteration: 176816
loss: 1.0002232789993286,grad_norm: 0.9963648117341339, iteration: 176817
loss: 1.0022293329238892,grad_norm: 0.8961772839936042, iteration: 176818
loss: 0.999578058719635,grad_norm: 0.9999992009052339, iteration: 176819
loss: 1.0221165418624878,grad_norm: 0.9999992377000448, iteration: 176820
loss: 1.0169018507003784,grad_norm: 0.9999990343203667, iteration: 176821
loss: 0.9829574823379517,grad_norm: 0.8458773954472499, iteration: 176822
loss: 0.9706234931945801,grad_norm: 0.999999178616383, iteration: 176823
loss: 1.0126831531524658,grad_norm: 0.8438454145507361, iteration: 176824
loss: 0.9806473851203918,grad_norm: 0.7927848649302384, iteration: 176825
loss: 1.100881576538086,grad_norm: 0.9999997056044301, iteration: 176826
loss: 1.0030750036239624,grad_norm: 0.9799388152723997, iteration: 176827
loss: 1.0014716386795044,grad_norm: 0.99999903607069, iteration: 176828
loss: 1.0325623750686646,grad_norm: 0.9450575256959528, iteration: 176829
loss: 1.0363273620605469,grad_norm: 0.9563050127267883, iteration: 176830
loss: 1.009013295173645,grad_norm: 0.9999992753067933, iteration: 176831
loss: 1.023470401763916,grad_norm: 0.8153774862274298, iteration: 176832
loss: 1.0156832933425903,grad_norm: 0.9039882293440008, iteration: 176833
loss: 1.0106545686721802,grad_norm: 0.9999990660957551, iteration: 176834
loss: 1.031049370765686,grad_norm: 0.9999995801347548, iteration: 176835
loss: 1.025586724281311,grad_norm: 0.9999991575198648, iteration: 176836
loss: 1.0236440896987915,grad_norm: 0.9999992125572865, iteration: 176837
loss: 0.9629013538360596,grad_norm: 0.9758928528954576, iteration: 176838
loss: 1.0167641639709473,grad_norm: 0.9999997999573392, iteration: 176839
loss: 1.0171986818313599,grad_norm: 0.9477888762170755, iteration: 176840
loss: 1.0208063125610352,grad_norm: 0.9999992941289907, iteration: 176841
loss: 0.9806956052780151,grad_norm: 0.9999993577132316, iteration: 176842
loss: 1.0272037982940674,grad_norm: 0.9999992121039675, iteration: 176843
loss: 1.0011229515075684,grad_norm: 0.8793888803267281, iteration: 176844
loss: 0.9842925667762756,grad_norm: 0.9655119158744996, iteration: 176845
loss: 0.9964278936386108,grad_norm: 0.9587419566483836, iteration: 176846
loss: 1.0029582977294922,grad_norm: 0.8713188379172695, iteration: 176847
loss: 1.0041035413742065,grad_norm: 0.8385004420080547, iteration: 176848
loss: 0.9945210218429565,grad_norm: 0.944220238542062, iteration: 176849
loss: 0.9982945919036865,grad_norm: 0.9526540935285224, iteration: 176850
loss: 1.0104668140411377,grad_norm: 0.9328735370016052, iteration: 176851
loss: 1.0141876935958862,grad_norm: 0.9999991621294343, iteration: 176852
loss: 0.9826295375823975,grad_norm: 0.990711364332679, iteration: 176853
loss: 0.9644209742546082,grad_norm: 0.93772635236881, iteration: 176854
loss: 1.0033966302871704,grad_norm: 0.9272323371146376, iteration: 176855
loss: 1.0500519275665283,grad_norm: 0.999999151475149, iteration: 176856
loss: 0.9954327344894409,grad_norm: 0.9632599269851677, iteration: 176857
loss: 0.9787776470184326,grad_norm: 0.9999991413280197, iteration: 176858
loss: 0.9941530823707581,grad_norm: 0.9999991139492437, iteration: 176859
loss: 1.0006377696990967,grad_norm: 0.9999991351929082, iteration: 176860
loss: 0.9923400282859802,grad_norm: 0.9999991141482869, iteration: 176861
loss: 0.9877163171768188,grad_norm: 0.9603752701110126, iteration: 176862
loss: 0.974498987197876,grad_norm: 0.9262246305982405, iteration: 176863
loss: 1.0190224647521973,grad_norm: 0.9727079455690557, iteration: 176864
loss: 1.0114314556121826,grad_norm: 0.8966478005681104, iteration: 176865
loss: 0.9839233160018921,grad_norm: 0.9999990327529598, iteration: 176866
loss: 1.0332517623901367,grad_norm: 0.9405709693666264, iteration: 176867
loss: 0.9745771884918213,grad_norm: 0.9999991646085536, iteration: 176868
loss: 0.99748694896698,grad_norm: 0.8695678627873925, iteration: 176869
loss: 1.012061595916748,grad_norm: 0.9999991796397008, iteration: 176870
loss: 1.024396300315857,grad_norm: 0.9999991404281313, iteration: 176871
loss: 0.9783431887626648,grad_norm: 0.9788186404853225, iteration: 176872
loss: 1.037051796913147,grad_norm: 0.9528847348197217, iteration: 176873
loss: 1.0680540800094604,grad_norm: 0.9999991759203094, iteration: 176874
loss: 0.9496484398841858,grad_norm: 0.9934601257569127, iteration: 176875
loss: 0.9931895732879639,grad_norm: 0.9540177726692236, iteration: 176876
loss: 1.0290412902832031,grad_norm: 0.99999909877911, iteration: 176877
loss: 0.9910717606544495,grad_norm: 0.9778782499695672, iteration: 176878
loss: 0.9565784931182861,grad_norm: 0.9880735996329528, iteration: 176879
loss: 1.0214393138885498,grad_norm: 0.9999990785293413, iteration: 176880
loss: 0.9718675017356873,grad_norm: 0.9999991075793285, iteration: 176881
loss: 0.9744794368743896,grad_norm: 0.9999991140674059, iteration: 176882
loss: 0.9940540790557861,grad_norm: 0.9999993229185251, iteration: 176883
loss: 1.009076714515686,grad_norm: 0.9999994573226457, iteration: 176884
loss: 1.0124722719192505,grad_norm: 0.9999990004036606, iteration: 176885
loss: 1.0039777755737305,grad_norm: 0.9999990665063081, iteration: 176886
loss: 1.0308334827423096,grad_norm: 0.9303746753621224, iteration: 176887
loss: 1.0395879745483398,grad_norm: 0.8904699207393569, iteration: 176888
loss: 0.9905408620834351,grad_norm: 0.8664375885759388, iteration: 176889
loss: 1.0083067417144775,grad_norm: 0.9513067404373713, iteration: 176890
loss: 0.9889458417892456,grad_norm: 0.9999992283817378, iteration: 176891
loss: 1.043934941291809,grad_norm: 0.9999989459382371, iteration: 176892
loss: 1.0272424221038818,grad_norm: 0.9999992235947911, iteration: 176893
loss: 1.043143391609192,grad_norm: 0.999999855558007, iteration: 176894
loss: 1.0060491561889648,grad_norm: 0.9643184383350863, iteration: 176895
loss: 0.958239734172821,grad_norm: 0.9473387957318086, iteration: 176896
loss: 1.0121245384216309,grad_norm: 0.8283049534129985, iteration: 176897
loss: 0.9510179162025452,grad_norm: 0.9953731142923617, iteration: 176898
loss: 1.0030866861343384,grad_norm: 0.9999989911922197, iteration: 176899
loss: 0.9928915500640869,grad_norm: 0.9999990200672325, iteration: 176900
loss: 0.9893600940704346,grad_norm: 0.9999992140133421, iteration: 176901
loss: 0.9821827411651611,grad_norm: 0.9999991254184531, iteration: 176902
loss: 0.9930382370948792,grad_norm: 0.9657814576015333, iteration: 176903
loss: 1.0037250518798828,grad_norm: 0.9999991320216068, iteration: 176904
loss: 0.9946016669273376,grad_norm: 0.9999991023636569, iteration: 176905
loss: 1.01513671875,grad_norm: 0.9999992615998053, iteration: 176906
loss: 1.0161434412002563,grad_norm: 0.9415811717150372, iteration: 176907
loss: 0.9849790334701538,grad_norm: 0.8500155781957522, iteration: 176908
loss: 0.9642792344093323,grad_norm: 0.9571550613895397, iteration: 176909
loss: 0.9818857908248901,grad_norm: 0.9003507591657404, iteration: 176910
loss: 1.050455093383789,grad_norm: 0.9999991080269718, iteration: 176911
loss: 0.9859796166419983,grad_norm: 0.9999989489936038, iteration: 176912
loss: 0.9980494976043701,grad_norm: 0.999999109894982, iteration: 176913
loss: 0.9702627658843994,grad_norm: 0.8985626149682886, iteration: 176914
loss: 0.9570820927619934,grad_norm: 0.9395253423722715, iteration: 176915
loss: 1.0669264793395996,grad_norm: 0.9999991314734952, iteration: 176916
loss: 0.9539852142333984,grad_norm: 0.9999991850237661, iteration: 176917
loss: 0.978217363357544,grad_norm: 0.9999992632581309, iteration: 176918
loss: 0.9858185648918152,grad_norm: 0.9999990393983995, iteration: 176919
loss: 0.9886162281036377,grad_norm: 0.9346392621516943, iteration: 176920
loss: 0.9991025924682617,grad_norm: 0.9999991190601568, iteration: 176921
loss: 0.9952502846717834,grad_norm: 0.8883152712192153, iteration: 176922
loss: 0.9948866367340088,grad_norm: 0.9999990497920046, iteration: 176923
loss: 1.0163047313690186,grad_norm: 0.9939437966428029, iteration: 176924
loss: 1.0155562162399292,grad_norm: 0.9999993873884946, iteration: 176925
loss: 1.011850357055664,grad_norm: 0.995981665421612, iteration: 176926
loss: 0.9998963475227356,grad_norm: 0.9414885317602393, iteration: 176927
loss: 0.9900427460670471,grad_norm: 0.967047691005227, iteration: 176928
loss: 0.9815289378166199,grad_norm: 0.9999988640830696, iteration: 176929
loss: 0.9852179288864136,grad_norm: 0.9999991697415226, iteration: 176930
loss: 1.010550618171692,grad_norm: 0.8849237886078508, iteration: 176931
loss: 1.0463770627975464,grad_norm: 0.9999995686584326, iteration: 176932
loss: 1.0122356414794922,grad_norm: 0.8600250596518436, iteration: 176933
loss: 0.9722433090209961,grad_norm: 0.9080957161257198, iteration: 176934
loss: 1.04156494140625,grad_norm: 0.9999990292207207, iteration: 176935
loss: 1.014193058013916,grad_norm: 0.9999990622578822, iteration: 176936
loss: 0.9943305253982544,grad_norm: 0.9526564218133279, iteration: 176937
loss: 0.9967063069343567,grad_norm: 0.8323332149200248, iteration: 176938
loss: 0.9903192520141602,grad_norm: 0.9132912250221366, iteration: 176939
loss: 0.9900937080383301,grad_norm: 0.9999992064568832, iteration: 176940
loss: 0.9967749714851379,grad_norm: 0.8115764493888462, iteration: 176941
loss: 0.9474561214447021,grad_norm: 0.9999991638052452, iteration: 176942
loss: 0.9818501472473145,grad_norm: 0.9999993443462674, iteration: 176943
loss: 0.9955374598503113,grad_norm: 0.9439391786212884, iteration: 176944
loss: 0.9946417212486267,grad_norm: 0.9512137905128402, iteration: 176945
loss: 0.9826314449310303,grad_norm: 0.8863065706409032, iteration: 176946
loss: 1.0091078281402588,grad_norm: 0.9999991381675796, iteration: 176947
loss: 0.9753191471099854,grad_norm: 0.9321341520772122, iteration: 176948
loss: 1.0038082599639893,grad_norm: 0.7934326969985961, iteration: 176949
loss: 0.9785224795341492,grad_norm: 0.9106167791065686, iteration: 176950
loss: 0.9875068068504333,grad_norm: 0.9999991612556953, iteration: 176951
loss: 0.9613069891929626,grad_norm: 0.7886655631841726, iteration: 176952
loss: 0.9949840903282166,grad_norm: 0.9999991364536185, iteration: 176953
loss: 1.0100958347320557,grad_norm: 0.8528226416607753, iteration: 176954
loss: 0.9835792779922485,grad_norm: 0.9156658667626972, iteration: 176955
loss: 0.9320911169052124,grad_norm: 0.9910158752944621, iteration: 176956
loss: 0.9844835996627808,grad_norm: 0.8074848460055227, iteration: 176957
loss: 1.0056382417678833,grad_norm: 0.9263860773988671, iteration: 176958
loss: 1.0340886116027832,grad_norm: 0.9999991242218971, iteration: 176959
loss: 1.0083417892456055,grad_norm: 0.9381635123357646, iteration: 176960
loss: 0.976965069770813,grad_norm: 0.9999991565218855, iteration: 176961
loss: 0.9936172962188721,grad_norm: 0.9999991486545471, iteration: 176962
loss: 0.9981625080108643,grad_norm: 0.8696082355292314, iteration: 176963
loss: 0.9788094758987427,grad_norm: 0.9054609306603281, iteration: 176964
loss: 1.0029585361480713,grad_norm: 0.9999991083942772, iteration: 176965
loss: 1.0021123886108398,grad_norm: 0.9803126286027903, iteration: 176966
loss: 1.0157219171524048,grad_norm: 0.9999990291287706, iteration: 176967
loss: 0.9889373779296875,grad_norm: 0.9999991582991239, iteration: 176968
loss: 0.9950540065765381,grad_norm: 0.9999991213990305, iteration: 176969
loss: 1.0311614274978638,grad_norm: 0.9998263084088888, iteration: 176970
loss: 1.0015482902526855,grad_norm: 0.9999989872879059, iteration: 176971
loss: 1.0124740600585938,grad_norm: 0.9976410131642536, iteration: 176972
loss: 0.9765448570251465,grad_norm: 0.99999902713626, iteration: 176973
loss: 0.9843955039978027,grad_norm: 0.9999991002723361, iteration: 176974
loss: 1.0036890506744385,grad_norm: 0.9120776055947193, iteration: 176975
loss: 1.0290663242340088,grad_norm: 0.9111681590474604, iteration: 176976
loss: 1.0128567218780518,grad_norm: 0.9999990591104018, iteration: 176977
loss: 1.006184458732605,grad_norm: 0.8677311697282509, iteration: 176978
loss: 1.0091089010238647,grad_norm: 0.9999990529241958, iteration: 176979
loss: 0.9680684208869934,grad_norm: 0.9999991086491715, iteration: 176980
loss: 1.012557029724121,grad_norm: 0.9999992556018873, iteration: 176981
loss: 1.027321219444275,grad_norm: 0.9050065198823011, iteration: 176982
loss: 1.00648033618927,grad_norm: 0.9999991845033772, iteration: 176983
loss: 0.9924577474594116,grad_norm: 0.873099973882532, iteration: 176984
loss: 0.9479197263717651,grad_norm: 0.8029093324772512, iteration: 176985
loss: 0.9713177680969238,grad_norm: 0.8821496128992011, iteration: 176986
loss: 0.9937869310379028,grad_norm: 0.9999991608649795, iteration: 176987
loss: 0.9925164580345154,grad_norm: 0.9670134195834513, iteration: 176988
loss: 0.9843212962150574,grad_norm: 0.9999991100853998, iteration: 176989
loss: 0.9888750314712524,grad_norm: 0.9260514015022706, iteration: 176990
loss: 1.0047321319580078,grad_norm: 0.9999989192968425, iteration: 176991
loss: 1.0226545333862305,grad_norm: 0.9999990348647013, iteration: 176992
loss: 0.9978329539299011,grad_norm: 0.9999991343731398, iteration: 176993
loss: 0.9772992730140686,grad_norm: 0.908683131968866, iteration: 176994
loss: 0.9917285442352295,grad_norm: 0.9385027327645235, iteration: 176995
loss: 0.9823081493377686,grad_norm: 0.9999990392732969, iteration: 176996
loss: 0.9919634461402893,grad_norm: 0.7683662138503284, iteration: 176997
loss: 0.9650537967681885,grad_norm: 0.9055728873644571, iteration: 176998
loss: 0.9804264307022095,grad_norm: 0.9124864042569154, iteration: 176999
loss: 0.9954465627670288,grad_norm: 0.9999997029094342, iteration: 177000
loss: 0.9747701287269592,grad_norm: 0.999999101786088, iteration: 177001
loss: 1.0137790441513062,grad_norm: 0.9999991744984413, iteration: 177002
loss: 1.0261139869689941,grad_norm: 0.9999991392567156, iteration: 177003
loss: 1.0319972038269043,grad_norm: 0.9999990740885497, iteration: 177004
loss: 1.040503978729248,grad_norm: 0.9999991296932103, iteration: 177005
loss: 1.0063843727111816,grad_norm: 0.8554496726640608, iteration: 177006
loss: 1.0197914838790894,grad_norm: 0.9999990881837433, iteration: 177007
loss: 1.0592918395996094,grad_norm: 0.999998940369691, iteration: 177008
loss: 1.0055344104766846,grad_norm: 0.9665343239199019, iteration: 177009
loss: 0.9827871918678284,grad_norm: 0.9999989986862013, iteration: 177010
loss: 0.9914261698722839,grad_norm: 0.9999990902491118, iteration: 177011
loss: 1.01228928565979,grad_norm: 0.9570062341454626, iteration: 177012
loss: 0.9816138744354248,grad_norm: 0.958712014536179, iteration: 177013
loss: 1.0071346759796143,grad_norm: 0.9905220567443255, iteration: 177014
loss: 1.0705069303512573,grad_norm: 0.9999994454216417, iteration: 177015
loss: 0.9679384827613831,grad_norm: 0.9301701620805363, iteration: 177016
loss: 0.9733701944351196,grad_norm: 0.8166186976745223, iteration: 177017
loss: 1.0094873905181885,grad_norm: 0.8892730659217679, iteration: 177018
loss: 0.9975847601890564,grad_norm: 0.9454604768812708, iteration: 177019
loss: 0.989825427532196,grad_norm: 0.9142782646881185, iteration: 177020
loss: 1.0448805093765259,grad_norm: 0.9999991034977335, iteration: 177021
loss: 1.003842830657959,grad_norm: 0.9865373607261854, iteration: 177022
loss: 0.9900885224342346,grad_norm: 0.8995427781882677, iteration: 177023
loss: 1.084665060043335,grad_norm: 0.9999993647943408, iteration: 177024
loss: 0.9876348376274109,grad_norm: 0.9540870202954334, iteration: 177025
loss: 1.0034643411636353,grad_norm: 0.8852978719266121, iteration: 177026
loss: 1.01942777633667,grad_norm: 0.9999990810136575, iteration: 177027
loss: 0.9540865421295166,grad_norm: 0.9054680554976352, iteration: 177028
loss: 0.9977278113365173,grad_norm: 0.9184721635857878, iteration: 177029
loss: 1.0008108615875244,grad_norm: 0.999999079226028, iteration: 177030
loss: 1.0006502866744995,grad_norm: 0.9276138201954579, iteration: 177031
loss: 1.0197702646255493,grad_norm: 0.9999991422628888, iteration: 177032
loss: 1.0361016988754272,grad_norm: 0.99999947906575, iteration: 177033
loss: 0.94158935546875,grad_norm: 0.9365703327902223, iteration: 177034
loss: 0.9660921692848206,grad_norm: 0.9259020605603414, iteration: 177035
loss: 1.0040627717971802,grad_norm: 0.9055478228214783, iteration: 177036
loss: 0.9605832695960999,grad_norm: 0.9668685840359958, iteration: 177037
loss: 0.9723947048187256,grad_norm: 0.9922900005418768, iteration: 177038
loss: 1.0180327892303467,grad_norm: 0.9999998471283595, iteration: 177039
loss: 1.0042320489883423,grad_norm: 0.9988363097093361, iteration: 177040
loss: 0.9972318410873413,grad_norm: 0.9999990905388204, iteration: 177041
loss: 1.0174353122711182,grad_norm: 0.9414082134030158, iteration: 177042
loss: 0.9699288606643677,grad_norm: 0.93250875566847, iteration: 177043
loss: 1.01570463180542,grad_norm: 0.999999126091885, iteration: 177044
loss: 0.9928051829338074,grad_norm: 0.9999991887828354, iteration: 177045
loss: 0.9842007160186768,grad_norm: 0.9999991309896171, iteration: 177046
loss: 0.9994760751724243,grad_norm: 0.999999245477327, iteration: 177047
loss: 0.9963639378547668,grad_norm: 0.9999991326559765, iteration: 177048
loss: 1.0324668884277344,grad_norm: 0.9411753627294817, iteration: 177049
loss: 0.9972607493400574,grad_norm: 0.9999990667055833, iteration: 177050
loss: 1.0440428256988525,grad_norm: 0.9999992781982587, iteration: 177051
loss: 1.0099012851715088,grad_norm: 0.9918369975633095, iteration: 177052
loss: 1.0353063344955444,grad_norm: 0.9999991660523108, iteration: 177053
loss: 1.016659140586853,grad_norm: 0.9999999662287883, iteration: 177054
loss: 0.986146867275238,grad_norm: 0.9373012631297694, iteration: 177055
loss: 1.01510488986969,grad_norm: 0.9999992090072949, iteration: 177056
loss: 0.9817883372306824,grad_norm: 0.7923829630254224, iteration: 177057
loss: 0.9793794751167297,grad_norm: 0.9245217944724717, iteration: 177058
loss: 1.0202845335006714,grad_norm: 0.9999989492912859, iteration: 177059
loss: 1.0215855836868286,grad_norm: 0.9999992503735782, iteration: 177060
loss: 0.9771566987037659,grad_norm: 0.8644948948581022, iteration: 177061
loss: 0.9919527173042297,grad_norm: 0.9999989844425637, iteration: 177062
loss: 1.0120294094085693,grad_norm: 0.999999049791103, iteration: 177063
loss: 0.986362099647522,grad_norm: 0.7842083529993576, iteration: 177064
loss: 0.9552839398384094,grad_norm: 0.993926417531189, iteration: 177065
loss: 0.9962230324745178,grad_norm: 0.9718698709515144, iteration: 177066
loss: 1.0053449869155884,grad_norm: 0.8500143397964539, iteration: 177067
loss: 1.0101150274276733,grad_norm: 0.9999992439800476, iteration: 177068
loss: 1.0111876726150513,grad_norm: 0.9999993601620831, iteration: 177069
loss: 1.0006442070007324,grad_norm: 0.7866630253683896, iteration: 177070
loss: 1.01018226146698,grad_norm: 0.9853642645514162, iteration: 177071
loss: 0.9921948313713074,grad_norm: 0.8829317287774762, iteration: 177072
loss: 0.9878406524658203,grad_norm: 0.8532029495335985, iteration: 177073
loss: 0.9823567867279053,grad_norm: 0.9607661735159316, iteration: 177074
loss: 0.9763302803039551,grad_norm: 0.9056458948949923, iteration: 177075
loss: 0.9806879758834839,grad_norm: 0.9999991855390823, iteration: 177076
loss: 0.9351557493209839,grad_norm: 0.8865164884994357, iteration: 177077
loss: 0.9827540516853333,grad_norm: 0.9676116419542735, iteration: 177078
loss: 0.9924473166465759,grad_norm: 0.9999991757836523, iteration: 177079
loss: 0.9787266850471497,grad_norm: 0.8776468871581748, iteration: 177080
loss: 0.9914619326591492,grad_norm: 0.999999051255578, iteration: 177081
loss: 0.9859526753425598,grad_norm: 0.9167055580962029, iteration: 177082
loss: 0.9875392913818359,grad_norm: 0.9999991309331285, iteration: 177083
loss: 1.0349491834640503,grad_norm: 0.9060355208939038, iteration: 177084
loss: 1.0437877178192139,grad_norm: 0.9999991088133012, iteration: 177085
loss: 0.9635583758354187,grad_norm: 0.999999159985249, iteration: 177086
loss: 0.9840236902236938,grad_norm: 0.9999990858036304, iteration: 177087
loss: 0.9882606267929077,grad_norm: 0.9415033147426167, iteration: 177088
loss: 1.002180576324463,grad_norm: 0.9637677987585419, iteration: 177089
loss: 0.985209047794342,grad_norm: 0.9999991157624929, iteration: 177090
loss: 1.0118391513824463,grad_norm: 0.9195764283227519, iteration: 177091
loss: 0.9934727549552917,grad_norm: 0.9999991174654707, iteration: 177092
loss: 1.0379037857055664,grad_norm: 0.9999990969430294, iteration: 177093
loss: 0.964716374874115,grad_norm: 0.9973122826333424, iteration: 177094
loss: 0.9741564393043518,grad_norm: 0.9999991001630184, iteration: 177095
loss: 1.0106666088104248,grad_norm: 0.8577857645622535, iteration: 177096
loss: 0.9613704085350037,grad_norm: 0.7781931578344722, iteration: 177097
loss: 0.97942054271698,grad_norm: 0.9005704959509764, iteration: 177098
loss: 0.9824399352073669,grad_norm: 0.9999992016958561, iteration: 177099
loss: 0.9748961925506592,grad_norm: 0.9999991049385982, iteration: 177100
loss: 1.0064067840576172,grad_norm: 0.9225619103578451, iteration: 177101
loss: 1.0039725303649902,grad_norm: 0.8638948884739731, iteration: 177102
loss: 0.9967901706695557,grad_norm: 0.9999989198127209, iteration: 177103
loss: 0.981238842010498,grad_norm: 0.9866797244249078, iteration: 177104
loss: 1.0244054794311523,grad_norm: 0.9999990875830295, iteration: 177105
loss: 1.0185203552246094,grad_norm: 0.9105200865699073, iteration: 177106
loss: 0.9837772250175476,grad_norm: 0.9983318926130931, iteration: 177107
loss: 0.9853653907775879,grad_norm: 0.9999991171381436, iteration: 177108
loss: 1.0091756582260132,grad_norm: 0.8759662144259754, iteration: 177109
loss: 0.9783202409744263,grad_norm: 0.8942324124757394, iteration: 177110
loss: 1.0431864261627197,grad_norm: 0.9999991523653998, iteration: 177111
loss: 0.9587766528129578,grad_norm: 0.9070550425884324, iteration: 177112
loss: 0.967335045337677,grad_norm: 0.9383974999326876, iteration: 177113
loss: 1.0148409605026245,grad_norm: 0.9867159094391914, iteration: 177114
loss: 0.9901290535926819,grad_norm: 0.9999991159960095, iteration: 177115
loss: 0.9770712852478027,grad_norm: 0.9777768856567037, iteration: 177116
loss: 1.0035802125930786,grad_norm: 0.9999991070322203, iteration: 177117
loss: 1.0569061040878296,grad_norm: 0.9999995458172857, iteration: 177118
loss: 0.9894939064979553,grad_norm: 0.9999990494960936, iteration: 177119
loss: 1.1084847450256348,grad_norm: 0.9999991190479057, iteration: 177120
loss: 0.9977622628211975,grad_norm: 0.9547032544354878, iteration: 177121
loss: 1.036597490310669,grad_norm: 0.8466831255262608, iteration: 177122
loss: 1.023963212966919,grad_norm: 0.9999990917725948, iteration: 177123
loss: 0.9811542630195618,grad_norm: 0.9999990644101341, iteration: 177124
loss: 0.9766867756843567,grad_norm: 0.822620960698035, iteration: 177125
loss: 1.0175271034240723,grad_norm: 0.9999993536208017, iteration: 177126
loss: 0.9964755773544312,grad_norm: 0.9999991034535667, iteration: 177127
loss: 1.002550721168518,grad_norm: 0.8879510230175462, iteration: 177128
loss: 1.0254292488098145,grad_norm: 0.999998974665262, iteration: 177129
loss: 1.005210518836975,grad_norm: 0.9999990020898024, iteration: 177130
loss: 1.0085357427597046,grad_norm: 0.9999992305188131, iteration: 177131
loss: 1.011542558670044,grad_norm: 0.9999989663348905, iteration: 177132
loss: 0.9661609530448914,grad_norm: 0.9999990696193347, iteration: 177133
loss: 0.9979380369186401,grad_norm: 0.9088118813373439, iteration: 177134
loss: 1.0001792907714844,grad_norm: 0.999999311182699, iteration: 177135
loss: 0.9587156176567078,grad_norm: 0.9826760386429667, iteration: 177136
loss: 1.0021930932998657,grad_norm: 0.8295814582165861, iteration: 177137
loss: 0.925584077835083,grad_norm: 0.9999989533763426, iteration: 177138
loss: 0.9716124534606934,grad_norm: 0.9058719994803773, iteration: 177139
loss: 1.0305975675582886,grad_norm: 0.9999991109596188, iteration: 177140
loss: 0.9753891825675964,grad_norm: 0.9723965808541225, iteration: 177141
loss: 0.9818274974822998,grad_norm: 0.9156772594638123, iteration: 177142
loss: 0.9800248146057129,grad_norm: 0.9042827324186732, iteration: 177143
loss: 0.9934375882148743,grad_norm: 0.9999990728520025, iteration: 177144
loss: 1.0062824487686157,grad_norm: 0.9999992742873425, iteration: 177145
loss: 1.0099351406097412,grad_norm: 0.9999992874696342, iteration: 177146
loss: 0.9983089566230774,grad_norm: 0.9544788312523862, iteration: 177147
loss: 1.0238773822784424,grad_norm: 0.9999990537381267, iteration: 177148
loss: 0.9750708341598511,grad_norm: 0.9903652952613775, iteration: 177149
loss: 0.9841324687004089,grad_norm: 0.9168481204440799, iteration: 177150
loss: 0.9606995582580566,grad_norm: 0.999999001326757, iteration: 177151
loss: 1.012083888053894,grad_norm: 0.9101576866512239, iteration: 177152
loss: 0.9732517600059509,grad_norm: 0.9999990087072989, iteration: 177153
loss: 1.0082778930664062,grad_norm: 0.9999991749329263, iteration: 177154
loss: 1.0234192609786987,grad_norm: 0.9999999165532862, iteration: 177155
loss: 0.9663475751876831,grad_norm: 0.9999989901433194, iteration: 177156
loss: 0.9926264882087708,grad_norm: 0.9999997481647982, iteration: 177157
loss: 1.036444067955017,grad_norm: 0.9787782399068399, iteration: 177158
loss: 0.9993686676025391,grad_norm: 0.9699198398269755, iteration: 177159
loss: 1.001680612564087,grad_norm: 0.9049110418433557, iteration: 177160
loss: 1.0287272930145264,grad_norm: 0.9999991686476314, iteration: 177161
loss: 0.9871556758880615,grad_norm: 0.9999993356728601, iteration: 177162
loss: 0.9784086346626282,grad_norm: 0.9579625665061963, iteration: 177163
loss: 1.0184634923934937,grad_norm: 0.9650605357902854, iteration: 177164
loss: 1.0095535516738892,grad_norm: 0.999999203168348, iteration: 177165
loss: 0.986501157283783,grad_norm: 0.9999990257299779, iteration: 177166
loss: 0.9977448582649231,grad_norm: 0.9480563138849439, iteration: 177167
loss: 1.008475422859192,grad_norm: 0.9630196547881216, iteration: 177168
loss: 0.9804149270057678,grad_norm: 0.8464395143353078, iteration: 177169
loss: 1.0026625394821167,grad_norm: 0.9783396154637051, iteration: 177170
loss: 0.9911572933197021,grad_norm: 0.999999424994063, iteration: 177171
loss: 0.9949288964271545,grad_norm: 0.9999990456984531, iteration: 177172
loss: 0.9930086135864258,grad_norm: 0.8659667614965622, iteration: 177173
loss: 1.00407874584198,grad_norm: 0.9999990073363174, iteration: 177174
loss: 1.0198856592178345,grad_norm: 0.9999992297399669, iteration: 177175
loss: 1.0063248872756958,grad_norm: 0.8713476778246044, iteration: 177176
loss: 0.9930954575538635,grad_norm: 0.9238806082677591, iteration: 177177
loss: 0.9913859963417053,grad_norm: 0.7892176197109433, iteration: 177178
loss: 1.0054681301116943,grad_norm: 0.9999991905873866, iteration: 177179
loss: 0.9939143061637878,grad_norm: 0.9999993157539435, iteration: 177180
loss: 1.0012953281402588,grad_norm: 0.9547421777069302, iteration: 177181
loss: 0.9953474998474121,grad_norm: 0.9999991504736576, iteration: 177182
loss: 1.0321226119995117,grad_norm: 0.9999991256826345, iteration: 177183
loss: 1.0007102489471436,grad_norm: 0.9999991001357048, iteration: 177184
loss: 0.9801884889602661,grad_norm: 0.9425835516196491, iteration: 177185
loss: 0.9980829358100891,grad_norm: 0.9999990466254957, iteration: 177186
loss: 1.0110288858413696,grad_norm: 0.9999991345456578, iteration: 177187
loss: 0.998302698135376,grad_norm: 0.9145636718832594, iteration: 177188
loss: 1.020687460899353,grad_norm: 0.9031515819371887, iteration: 177189
loss: 1.0157328844070435,grad_norm: 0.9442090410362847, iteration: 177190
loss: 0.9607036113739014,grad_norm: 0.9638098495365226, iteration: 177191
loss: 1.0004940032958984,grad_norm: 0.8723643380513497, iteration: 177192
loss: 1.006460189819336,grad_norm: 0.9346029058359995, iteration: 177193
loss: 1.0174883604049683,grad_norm: 0.9999992694045151, iteration: 177194
loss: 1.0360705852508545,grad_norm: 0.999999084140968, iteration: 177195
loss: 0.9648543000221252,grad_norm: 0.9999992742729246, iteration: 177196
loss: 0.9737870097160339,grad_norm: 0.986635067973518, iteration: 177197
loss: 0.9945187568664551,grad_norm: 0.9438182159878051, iteration: 177198
loss: 1.0051723718643188,grad_norm: 0.7775571774176148, iteration: 177199
loss: 1.0096288919448853,grad_norm: 0.9999991010723122, iteration: 177200
loss: 0.9808631539344788,grad_norm: 0.8925314259396144, iteration: 177201
loss: 1.0507875680923462,grad_norm: 0.9999990928150996, iteration: 177202
loss: 0.9953311681747437,grad_norm: 0.9999990207100347, iteration: 177203
loss: 1.002813696861267,grad_norm: 0.9999990624527576, iteration: 177204
loss: 1.0126992464065552,grad_norm: 0.9999990995569643, iteration: 177205
loss: 0.978848397731781,grad_norm: 0.9999991164682578, iteration: 177206
loss: 0.9755613207817078,grad_norm: 0.9282371508548776, iteration: 177207
loss: 1.009128212928772,grad_norm: 0.8142040329624637, iteration: 177208
loss: 1.0032986402511597,grad_norm: 0.9999991684768936, iteration: 177209
loss: 1.0061438083648682,grad_norm: 0.9256848718243147, iteration: 177210
loss: 0.9781634211540222,grad_norm: 0.9804281838214793, iteration: 177211
loss: 1.0330686569213867,grad_norm: 0.9634674343787577, iteration: 177212
loss: 1.011555790901184,grad_norm: 0.9323010492111494, iteration: 177213
loss: 0.9979432225227356,grad_norm: 0.9999993061165527, iteration: 177214
loss: 1.0334452390670776,grad_norm: 0.9999991101437051, iteration: 177215
loss: 0.9652484655380249,grad_norm: 0.9504309821675584, iteration: 177216
loss: 1.014500617980957,grad_norm: 0.9125508993378381, iteration: 177217
loss: 0.9572038054466248,grad_norm: 0.9999991050931415, iteration: 177218
loss: 1.0200738906860352,grad_norm: 0.9390928936607087, iteration: 177219
loss: 1.0536407232284546,grad_norm: 0.985630097561437, iteration: 177220
loss: 1.0097345113754272,grad_norm: 0.9999991755297009, iteration: 177221
loss: 1.0246368646621704,grad_norm: 0.9215457672723882, iteration: 177222
loss: 1.018231749534607,grad_norm: 0.842073894818409, iteration: 177223
loss: 1.010526180267334,grad_norm: 0.9394825658860076, iteration: 177224
loss: 0.9797225594520569,grad_norm: 0.9999990277216247, iteration: 177225
loss: 0.9720619320869446,grad_norm: 0.8788603011898191, iteration: 177226
loss: 0.993205189704895,grad_norm: 0.9999990596952621, iteration: 177227
loss: 0.9908526539802551,grad_norm: 0.9999991611238483, iteration: 177228
loss: 0.9969065189361572,grad_norm: 0.9999991435944396, iteration: 177229
loss: 1.0080033540725708,grad_norm: 0.9047998117678326, iteration: 177230
loss: 0.9905757904052734,grad_norm: 0.9208366273590931, iteration: 177231
loss: 0.9852806925773621,grad_norm: 0.8692758703077764, iteration: 177232
loss: 1.0328965187072754,grad_norm: 0.9999991830253817, iteration: 177233
loss: 0.98161780834198,grad_norm: 0.9537314301484506, iteration: 177234
loss: 1.0103791952133179,grad_norm: 0.8668792222913665, iteration: 177235
loss: 0.9864940047264099,grad_norm: 0.9022611918751525, iteration: 177236
loss: 0.9750614166259766,grad_norm: 0.9999990402054488, iteration: 177237
loss: 0.9936075806617737,grad_norm: 0.9999991277307011, iteration: 177238
loss: 0.9865860939025879,grad_norm: 0.7935744211056063, iteration: 177239
loss: 0.9748733043670654,grad_norm: 0.9466974841494795, iteration: 177240
loss: 1.0258315801620483,grad_norm: 0.881845787400039, iteration: 177241
loss: 1.0243611335754395,grad_norm: 0.9151142003538214, iteration: 177242
loss: 1.0230295658111572,grad_norm: 0.9999990844699935, iteration: 177243
loss: 0.9957959651947021,grad_norm: 0.9999990786096135, iteration: 177244
loss: 1.0389832258224487,grad_norm: 0.999999075060259, iteration: 177245
loss: 1.013495922088623,grad_norm: 0.9368862047016031, iteration: 177246
loss: 0.9923603534698486,grad_norm: 0.9999990929962231, iteration: 177247
loss: 1.0483511686325073,grad_norm: 0.8043683643094278, iteration: 177248
loss: 0.9842092394828796,grad_norm: 0.8713955144719024, iteration: 177249
loss: 1.0221561193466187,grad_norm: 0.9435407179482025, iteration: 177250
loss: 1.002700924873352,grad_norm: 0.9017237474329546, iteration: 177251
loss: 1.000169277191162,grad_norm: 0.9999991539382992, iteration: 177252
loss: 1.0187715291976929,grad_norm: 0.8677790261363202, iteration: 177253
loss: 1.0413501262664795,grad_norm: 0.9999992488859728, iteration: 177254
loss: 1.0028685331344604,grad_norm: 0.9464619105632623, iteration: 177255
loss: 1.0062016248703003,grad_norm: 0.9999991600617569, iteration: 177256
loss: 1.0491307973861694,grad_norm: 0.9999995238978386, iteration: 177257
loss: 1.0046286582946777,grad_norm: 0.9438410352645424, iteration: 177258
loss: 0.9991656541824341,grad_norm: 0.9999991701864529, iteration: 177259
loss: 1.0117628574371338,grad_norm: 0.999999135105011, iteration: 177260
loss: 0.994922399520874,grad_norm: 0.8738958827778153, iteration: 177261
loss: 0.9778259992599487,grad_norm: 0.9999991486060065, iteration: 177262
loss: 0.9669079184532166,grad_norm: 0.9999990225937494, iteration: 177263
loss: 0.9720357060432434,grad_norm: 0.9332650102552015, iteration: 177264
loss: 1.0234922170639038,grad_norm: 0.9999990030048467, iteration: 177265
loss: 1.0056860446929932,grad_norm: 0.999999086520003, iteration: 177266
loss: 0.9641551375389099,grad_norm: 0.999999198235379, iteration: 177267
loss: 1.020023226737976,grad_norm: 0.9999991805069648, iteration: 177268
loss: 0.9847939014434814,grad_norm: 0.8372735859817865, iteration: 177269
loss: 1.0048657655715942,grad_norm: 0.8154606309364738, iteration: 177270
loss: 0.9771724939346313,grad_norm: 0.8911964899853478, iteration: 177271
loss: 1.0144197940826416,grad_norm: 0.9999996838653459, iteration: 177272
loss: 1.01536226272583,grad_norm: 0.9189251399723187, iteration: 177273
loss: 0.9901228547096252,grad_norm: 0.9330712055829342, iteration: 177274
loss: 0.9903250336647034,grad_norm: 0.9389737132589032, iteration: 177275
loss: 0.9443567395210266,grad_norm: 0.9620291944807549, iteration: 177276
loss: 1.0297213792800903,grad_norm: 0.9999992977203226, iteration: 177277
loss: 1.0071309804916382,grad_norm: 0.7862141679211831, iteration: 177278
loss: 1.0042047500610352,grad_norm: 0.9999991621811395, iteration: 177279
loss: 0.9754148721694946,grad_norm: 0.9596966021874196, iteration: 177280
loss: 1.0666249990463257,grad_norm: 0.9999998698429405, iteration: 177281
loss: 0.993278980255127,grad_norm: 0.7562975484096447, iteration: 177282
loss: 1.0183820724487305,grad_norm: 0.9999999687982796, iteration: 177283
loss: 1.0172673463821411,grad_norm: 0.9067158096491152, iteration: 177284
loss: 0.9927039742469788,grad_norm: 0.9999991945809229, iteration: 177285
loss: 0.986743688583374,grad_norm: 0.999999213069562, iteration: 177286
loss: 1.0229524374008179,grad_norm: 0.9153441589525356, iteration: 177287
loss: 1.0220106840133667,grad_norm: 0.9999990714785209, iteration: 177288
loss: 0.9954742789268494,grad_norm: 0.9261898321904354, iteration: 177289
loss: 1.0080246925354004,grad_norm: 0.8210247493923041, iteration: 177290
loss: 1.0307707786560059,grad_norm: 0.9152184862846531, iteration: 177291
loss: 0.9910845160484314,grad_norm: 0.9969001824914883, iteration: 177292
loss: 0.9792970418930054,grad_norm: 0.9999991586924384, iteration: 177293
loss: 0.9999570846557617,grad_norm: 0.7976736792059027, iteration: 177294
loss: 1.0417293310165405,grad_norm: 0.8850235860599208, iteration: 177295
loss: 0.9956071376800537,grad_norm: 0.981633813198551, iteration: 177296
loss: 1.0492686033248901,grad_norm: 0.9999992039441935, iteration: 177297
loss: 1.0760892629623413,grad_norm: 0.9999994339363978, iteration: 177298
loss: 0.964258074760437,grad_norm: 0.843938217149027, iteration: 177299
loss: 0.9733997583389282,grad_norm: 0.9999991274560871, iteration: 177300
loss: 1.0342798233032227,grad_norm: 0.9166712775734229, iteration: 177301
loss: 0.9717589616775513,grad_norm: 0.9881276024942129, iteration: 177302
loss: 0.9830020666122437,grad_norm: 0.9647882024482622, iteration: 177303
loss: 0.9947801828384399,grad_norm: 0.9867553128172839, iteration: 177304
loss: 0.9849595427513123,grad_norm: 0.8820866866022605, iteration: 177305
loss: 1.0001460313796997,grad_norm: 0.9999990859054343, iteration: 177306
loss: 1.0062317848205566,grad_norm: 0.9991043405181856, iteration: 177307
loss: 0.9551509618759155,grad_norm: 0.8711790114466775, iteration: 177308
loss: 0.9804559946060181,grad_norm: 0.9515349500554111, iteration: 177309
loss: 1.011648416519165,grad_norm: 0.9471941457911639, iteration: 177310
loss: 0.9936658143997192,grad_norm: 0.9999991697942796, iteration: 177311
loss: 0.9975815415382385,grad_norm: 0.8801161176651753, iteration: 177312
loss: 0.94975346326828,grad_norm: 0.9999990448681956, iteration: 177313
loss: 1.1092777252197266,grad_norm: 0.999999717230957, iteration: 177314
loss: 0.9960604906082153,grad_norm: 0.8607837419326716, iteration: 177315
loss: 1.0042766332626343,grad_norm: 0.9999990830115029, iteration: 177316
loss: 1.0179007053375244,grad_norm: 0.9999992590428762, iteration: 177317
loss: 0.992368757724762,grad_norm: 0.9999991541290231, iteration: 177318
loss: 1.101954698562622,grad_norm: 0.9999998436745953, iteration: 177319
loss: 0.9738246202468872,grad_norm: 0.9999991795024852, iteration: 177320
loss: 1.3718466758728027,grad_norm: 0.9999998269787979, iteration: 177321
loss: 0.9795568585395813,grad_norm: 0.8627674842342218, iteration: 177322
loss: 1.0079201459884644,grad_norm: 0.9749826544890259, iteration: 177323
loss: 1.0184236764907837,grad_norm: 0.9999991670628391, iteration: 177324
loss: 0.9824115633964539,grad_norm: 0.9532653018850938, iteration: 177325
loss: 0.9982224106788635,grad_norm: 0.9922499954181385, iteration: 177326
loss: 1.0484914779663086,grad_norm: 0.9999998347057085, iteration: 177327
loss: 1.0706167221069336,grad_norm: 0.9999993856228602, iteration: 177328
loss: 0.9908864498138428,grad_norm: 0.9287696958471229, iteration: 177329
loss: 1.017012357711792,grad_norm: 0.9592295129929099, iteration: 177330
loss: 1.0098161697387695,grad_norm: 0.9336503328147255, iteration: 177331
loss: 0.984319269657135,grad_norm: 0.9593742980664062, iteration: 177332
loss: 0.9702211022377014,grad_norm: 0.9999990362679336, iteration: 177333
loss: 1.0786690711975098,grad_norm: 0.9999997860348372, iteration: 177334
loss: 1.0088928937911987,grad_norm: 0.9999990616704668, iteration: 177335
loss: 0.9933831095695496,grad_norm: 0.9287434800477694, iteration: 177336
loss: 1.009511113166809,grad_norm: 0.874775765817432, iteration: 177337
loss: 1.1109273433685303,grad_norm: 0.9999992548957323, iteration: 177338
loss: 0.9878715872764587,grad_norm: 0.9999990708548766, iteration: 177339
loss: 1.1199487447738647,grad_norm: 0.9999996636567173, iteration: 177340
loss: 0.978186309337616,grad_norm: 0.9999990793214928, iteration: 177341
loss: 1.0999516248703003,grad_norm: 0.8784143620911254, iteration: 177342
loss: 0.9870958924293518,grad_norm: 0.9999990984712981, iteration: 177343
loss: 0.9533759355545044,grad_norm: 0.9590992079126991, iteration: 177344
loss: 1.1528571844100952,grad_norm: 0.9999995453925834, iteration: 177345
loss: 1.0846492052078247,grad_norm: 0.9999991839797872, iteration: 177346
loss: 1.2861109972000122,grad_norm: 0.999999645516232, iteration: 177347
loss: 1.058449387550354,grad_norm: 0.999999087210737, iteration: 177348
loss: 1.1238493919372559,grad_norm: 0.9999991513317079, iteration: 177349
loss: 1.1068788766860962,grad_norm: 0.9999998302800847, iteration: 177350
loss: 1.0928215980529785,grad_norm: 0.9999992721075401, iteration: 177351
loss: 1.146276593208313,grad_norm: 0.9999998864096682, iteration: 177352
loss: 1.0868252515792847,grad_norm: 0.9999994424480125, iteration: 177353
loss: 1.093934416770935,grad_norm: 0.999999549576158, iteration: 177354
loss: 1.5931806564331055,grad_norm: 0.999999932940674, iteration: 177355
loss: 1.0393328666687012,grad_norm: 0.999999091421958, iteration: 177356
loss: 1.2700111865997314,grad_norm: 0.999999741300714, iteration: 177357
loss: 1.5645928382873535,grad_norm: 0.9999999604836612, iteration: 177358
loss: 1.1338242292404175,grad_norm: 0.9999996404927749, iteration: 177359
loss: 1.1697479486465454,grad_norm: 0.9999994476805523, iteration: 177360
loss: 1.23477303981781,grad_norm: 0.9999997951487447, iteration: 177361
loss: 1.7021905183792114,grad_norm: 0.9999999310669565, iteration: 177362
loss: 1.085095763206482,grad_norm: 0.9999996710217948, iteration: 177363
loss: 1.2821193933486938,grad_norm: 0.9999997200086936, iteration: 177364
loss: 1.0786821842193604,grad_norm: 0.9999995224616395, iteration: 177365
loss: 1.7179383039474487,grad_norm: 0.9999996942126079, iteration: 177366
loss: 1.1983602046966553,grad_norm: 0.9999997949179659, iteration: 177367
loss: 1.1279388666152954,grad_norm: 0.9999991782100227, iteration: 177368
loss: 1.600162386894226,grad_norm: 0.9999999985140949, iteration: 177369
loss: 1.2886955738067627,grad_norm: 0.9999999473171196, iteration: 177370
loss: 1.1339823007583618,grad_norm: 0.9999992129584695, iteration: 177371
loss: 1.2672874927520752,grad_norm: 0.9999992684196003, iteration: 177372
loss: 1.3052364587783813,grad_norm: 0.9999998373986568, iteration: 177373
loss: 1.4369522333145142,grad_norm: 0.9999998281267791, iteration: 177374
loss: 1.1432298421859741,grad_norm: 0.9999995492228114, iteration: 177375
loss: 1.2265574932098389,grad_norm: 0.9999998284477017, iteration: 177376
loss: 1.0193678140640259,grad_norm: 0.9999991570386066, iteration: 177377
loss: 1.2503690719604492,grad_norm: 0.9999997968268084, iteration: 177378
loss: 1.2196096181869507,grad_norm: 0.999999758993916, iteration: 177379
loss: 0.97283536195755,grad_norm: 0.9999992814924955, iteration: 177380
loss: 1.232308030128479,grad_norm: 0.9999998395546746, iteration: 177381
loss: 1.1100400686264038,grad_norm: 0.9999992176258143, iteration: 177382
loss: 1.2431879043579102,grad_norm: 0.9999999577625095, iteration: 177383
loss: 1.0770329236984253,grad_norm: 0.9999990919981201, iteration: 177384
loss: 1.1902247667312622,grad_norm: 0.9999996969516616, iteration: 177385
loss: 1.0639184713363647,grad_norm: 0.9999992448955759, iteration: 177386
loss: 1.05866539478302,grad_norm: 0.9999993864394653, iteration: 177387
loss: 1.228232502937317,grad_norm: 0.9999998189006659, iteration: 177388
loss: 1.325002908706665,grad_norm: 1.0000000442995889, iteration: 177389
loss: 1.0253636837005615,grad_norm: 0.9999993078415045, iteration: 177390
loss: 1.0170387029647827,grad_norm: 0.999999560650825, iteration: 177391
loss: 1.1833699941635132,grad_norm: 0.9999995833887332, iteration: 177392
loss: 1.639175295829773,grad_norm: 0.999999829523394, iteration: 177393
loss: 1.0199681520462036,grad_norm: 0.9999992119677161, iteration: 177394
loss: 1.051068902015686,grad_norm: 0.9999994242998568, iteration: 177395
loss: 1.1682904958724976,grad_norm: 0.9999995496158817, iteration: 177396
loss: 1.2364802360534668,grad_norm: 0.9999996042155463, iteration: 177397
loss: 1.210841417312622,grad_norm: 0.9999996836519711, iteration: 177398
loss: 1.06364107131958,grad_norm: 0.9999999615998044, iteration: 177399
loss: 1.126764178276062,grad_norm: 1.0000000883056703, iteration: 177400
loss: 0.9527478814125061,grad_norm: 0.9999992978122976, iteration: 177401
loss: 0.9571699500083923,grad_norm: 0.9699759546518544, iteration: 177402
loss: 1.2189301252365112,grad_norm: 0.9999998612539955, iteration: 177403
loss: 1.5619183778762817,grad_norm: 0.9999995601690692, iteration: 177404
loss: 1.2244930267333984,grad_norm: 0.9999997669329179, iteration: 177405
loss: 1.0383763313293457,grad_norm: 0.9999996075587684, iteration: 177406
loss: 1.580695390701294,grad_norm: 0.999999718567746, iteration: 177407
loss: 1.0258039236068726,grad_norm: 0.9999991979636543, iteration: 177408
loss: 1.2767250537872314,grad_norm: 0.9999999923319283, iteration: 177409
loss: 1.194788932800293,grad_norm: 0.9999998272936229, iteration: 177410
loss: 1.264394998550415,grad_norm: 0.9999997523054004, iteration: 177411
loss: 1.086134672164917,grad_norm: 0.9999993276529667, iteration: 177412
loss: 1.2025574445724487,grad_norm: 0.9999999567837731, iteration: 177413
loss: 1.1057413816452026,grad_norm: 0.9999996944975732, iteration: 177414
loss: 1.0143290758132935,grad_norm: 1.0000000843754693, iteration: 177415
loss: 1.042317271232605,grad_norm: 0.9999992511460017, iteration: 177416
loss: 1.2256778478622437,grad_norm: 0.9999998153568248, iteration: 177417
loss: 1.0482361316680908,grad_norm: 0.9999998332019903, iteration: 177418
loss: 1.1675713062286377,grad_norm: 0.999999591282921, iteration: 177419
loss: 1.0880776643753052,grad_norm: 0.9999995618708555, iteration: 177420
loss: 1.2331043481826782,grad_norm: 0.9999999256208564, iteration: 177421
loss: 1.343980073928833,grad_norm: 0.9999998137912968, iteration: 177422
loss: 1.1834043264389038,grad_norm: 0.9999997951210556, iteration: 177423
loss: 1.1474145650863647,grad_norm: 0.9999998050825373, iteration: 177424
loss: 1.137402892112732,grad_norm: 0.9999995996623465, iteration: 177425
loss: 1.1284940242767334,grad_norm: 0.9999998153612902, iteration: 177426
loss: 1.1703426837921143,grad_norm: 0.9999992924888057, iteration: 177427
loss: 1.1115792989730835,grad_norm: 0.9999996529629364, iteration: 177428
loss: 1.1331366300582886,grad_norm: 0.9999994103208882, iteration: 177429
loss: 1.060408353805542,grad_norm: 0.9999993573821515, iteration: 177430
loss: 1.1265835762023926,grad_norm: 0.9999994916343116, iteration: 177431
loss: 1.2821950912475586,grad_norm: 0.9999995530066719, iteration: 177432
loss: 1.1207118034362793,grad_norm: 0.9999995252297875, iteration: 177433
loss: 1.225633144378662,grad_norm: 0.9999996113029718, iteration: 177434
loss: 1.1787904500961304,grad_norm: 0.9999997887780114, iteration: 177435
loss: 1.0514147281646729,grad_norm: 0.9999997588503172, iteration: 177436
loss: 1.2583435773849487,grad_norm: 0.9999999335850128, iteration: 177437
loss: 1.230486273765564,grad_norm: 0.999999585623759, iteration: 177438
loss: 1.0806516408920288,grad_norm: 0.9999993757300404, iteration: 177439
loss: 0.9514879584312439,grad_norm: 0.9994589939859946, iteration: 177440
loss: 1.1091746091842651,grad_norm: 0.9999992526699397, iteration: 177441
loss: 1.0414565801620483,grad_norm: 0.8214507820281766, iteration: 177442
loss: 1.2914294004440308,grad_norm: 0.9999997079137732, iteration: 177443
loss: 1.0538040399551392,grad_norm: 0.9999992440489526, iteration: 177444
loss: 1.2430297136306763,grad_norm: 0.9999997621281835, iteration: 177445
loss: 1.110106110572815,grad_norm: 0.9999994428233905, iteration: 177446
loss: 1.2259589433670044,grad_norm: 0.9999995956502157, iteration: 177447
loss: 1.4086793661117554,grad_norm: 0.9999995866479532, iteration: 177448
loss: 1.0774191617965698,grad_norm: 0.9999994400386512, iteration: 177449
loss: 1.0905474424362183,grad_norm: 0.9999991870793884, iteration: 177450
loss: 1.0341230630874634,grad_norm: 1.0000000035243468, iteration: 177451
loss: 1.0316060781478882,grad_norm: 0.9999998645798125, iteration: 177452
loss: 0.9317632913589478,grad_norm: 0.9999991953769898, iteration: 177453
loss: 1.1318833827972412,grad_norm: 0.999999981253041, iteration: 177454
loss: 1.2344180345535278,grad_norm: 0.9999996790819793, iteration: 177455
loss: 1.1301429271697998,grad_norm: 0.9999996297958532, iteration: 177456
loss: 1.271393895149231,grad_norm: 0.9999998503437778, iteration: 177457
loss: 1.0972360372543335,grad_norm: 1.0000000388120274, iteration: 177458
loss: 1.0983741283416748,grad_norm: 0.9999991946593978, iteration: 177459
loss: 1.0845904350280762,grad_norm: 0.9999992398018039, iteration: 177460
loss: 0.9813847541809082,grad_norm: 0.999999040869683, iteration: 177461
loss: 1.1197311878204346,grad_norm: 0.9999990320905345, iteration: 177462
loss: 0.9671973586082458,grad_norm: 0.9999991889099518, iteration: 177463
loss: 0.952100396156311,grad_norm: 0.999999204763372, iteration: 177464
loss: 1.0277650356292725,grad_norm: 0.9999997984246463, iteration: 177465
loss: 1.021565556526184,grad_norm: 0.9999990780073847, iteration: 177466
loss: 1.0500108003616333,grad_norm: 0.9999990107853496, iteration: 177467
loss: 1.1183637380599976,grad_norm: 0.9999995999700794, iteration: 177468
loss: 1.072061538696289,grad_norm: 0.9999997298736389, iteration: 177469
loss: 1.1000912189483643,grad_norm: 0.9999994097646795, iteration: 177470
loss: 1.156604528427124,grad_norm: 0.9999991567520349, iteration: 177471
loss: 1.2457927465438843,grad_norm: 0.9999997280089812, iteration: 177472
loss: 1.2741031646728516,grad_norm: 0.9999998649569968, iteration: 177473
loss: 1.2059407234191895,grad_norm: 0.999999917215344, iteration: 177474
loss: 1.0512148141860962,grad_norm: 0.9999995213141314, iteration: 177475
loss: 1.1584633588790894,grad_norm: 0.9999995897829215, iteration: 177476
loss: 1.0508345365524292,grad_norm: 0.986312822946689, iteration: 177477
loss: 1.060060977935791,grad_norm: 0.999999859010156, iteration: 177478
loss: 1.047042965888977,grad_norm: 0.9999997927852882, iteration: 177479
loss: 1.1057249307632446,grad_norm: 0.9999998492783905, iteration: 177480
loss: 1.030005931854248,grad_norm: 0.9999999815859862, iteration: 177481
loss: 1.2166768312454224,grad_norm: 0.9999999432046467, iteration: 177482
loss: 1.0643343925476074,grad_norm: 0.9999995855355811, iteration: 177483
loss: 0.9317086935043335,grad_norm: 0.8940799329719462, iteration: 177484
loss: 1.0678648948669434,grad_norm: 0.9999994470372479, iteration: 177485
loss: 1.1635457277297974,grad_norm: 0.9999996307154124, iteration: 177486
loss: 1.0828887224197388,grad_norm: 0.9999997285867932, iteration: 177487
loss: 1.0674313306808472,grad_norm: 0.99999909811512, iteration: 177488
loss: 1.0706634521484375,grad_norm: 0.999999185876586, iteration: 177489
loss: 1.0019134283065796,grad_norm: 0.9999992501046914, iteration: 177490
loss: 0.9898237586021423,grad_norm: 0.8590059612939682, iteration: 177491
loss: 1.0604292154312134,grad_norm: 0.9999993312607837, iteration: 177492
loss: 1.0172209739685059,grad_norm: 0.9999991976963482, iteration: 177493
loss: 1.1585774421691895,grad_norm: 0.9999999165477182, iteration: 177494
loss: 0.963569164276123,grad_norm: 0.9999990814307657, iteration: 177495
loss: 1.1884759664535522,grad_norm: 0.9999998339228822, iteration: 177496
loss: 1.0878376960754395,grad_norm: 0.9999993302045195, iteration: 177497
loss: 1.0469778776168823,grad_norm: 0.99999919032095, iteration: 177498
loss: 1.178186058998108,grad_norm: 0.9999995799204544, iteration: 177499
loss: 1.0346717834472656,grad_norm: 0.9999995977284531, iteration: 177500
loss: 0.9781709313392639,grad_norm: 0.9999990537090151, iteration: 177501
loss: 1.0705347061157227,grad_norm: 0.9999998704985895, iteration: 177502
loss: 1.0807888507843018,grad_norm: 0.9999995201421971, iteration: 177503
loss: 1.113182783126831,grad_norm: 0.9999998972234936, iteration: 177504
loss: 1.2415646314620972,grad_norm: 0.9999999520332162, iteration: 177505
loss: 1.0136909484863281,grad_norm: 0.9999996723593467, iteration: 177506
loss: 1.1244467496871948,grad_norm: 0.9999995923102163, iteration: 177507
loss: 1.075541377067566,grad_norm: 0.9999996148444593, iteration: 177508
loss: 1.1174263954162598,grad_norm: 0.9999999364337295, iteration: 177509
loss: 1.0464140176773071,grad_norm: 0.9999995597038918, iteration: 177510
loss: 1.1085466146469116,grad_norm: 0.9999996877502324, iteration: 177511
loss: 1.0038855075836182,grad_norm: 0.9999994368978901, iteration: 177512
loss: 1.1472948789596558,grad_norm: 0.9999994005770052, iteration: 177513
loss: 1.0325478315353394,grad_norm: 0.9999997754488639, iteration: 177514
loss: 1.121505618095398,grad_norm: 0.999999953398852, iteration: 177515
loss: 1.0150541067123413,grad_norm: 0.9999990440429483, iteration: 177516
loss: 1.0086112022399902,grad_norm: 0.9864522257382278, iteration: 177517
loss: 1.0011156797409058,grad_norm: 0.9999992246008496, iteration: 177518
loss: 1.043567180633545,grad_norm: 0.9999991344943037, iteration: 177519
loss: 1.0416136980056763,grad_norm: 0.9999997976259901, iteration: 177520
loss: 1.0509103536605835,grad_norm: 0.9999996962048028, iteration: 177521
loss: 1.1065200567245483,grad_norm: 0.9999994280889991, iteration: 177522
loss: 1.076635718345642,grad_norm: 0.9999997108417299, iteration: 177523
loss: 1.0169767141342163,grad_norm: 0.9999999723513553, iteration: 177524
loss: 1.0251284837722778,grad_norm: 0.9999994817218617, iteration: 177525
loss: 1.0179815292358398,grad_norm: 0.9999998895239157, iteration: 177526
loss: 1.2637248039245605,grad_norm: 1.0000000581889683, iteration: 177527
loss: 1.0088649988174438,grad_norm: 0.9503279585838336, iteration: 177528
loss: 1.0655252933502197,grad_norm: 0.9999997786933129, iteration: 177529
loss: 1.032067894935608,grad_norm: 0.9999993023276832, iteration: 177530
loss: 0.995718240737915,grad_norm: 0.9854344833248234, iteration: 177531
loss: 1.0963321924209595,grad_norm: 0.9999999356397636, iteration: 177532
loss: 1.0474106073379517,grad_norm: 0.9957600057799277, iteration: 177533
loss: 1.0552856922149658,grad_norm: 0.9999997411602192, iteration: 177534
loss: 1.0059205293655396,grad_norm: 0.8326874233196963, iteration: 177535
loss: 1.064742922782898,grad_norm: 0.9999996433445485, iteration: 177536
loss: 1.2735978364944458,grad_norm: 0.9999999994880747, iteration: 177537
loss: 1.0107225179672241,grad_norm: 0.9316935345262136, iteration: 177538
loss: 1.0274440050125122,grad_norm: 1.0000000626205725, iteration: 177539
loss: 1.1323432922363281,grad_norm: 0.9999998305511839, iteration: 177540
loss: 0.9914669394493103,grad_norm: 0.9999993329736924, iteration: 177541
loss: 0.9920384883880615,grad_norm: 0.9999991563282089, iteration: 177542
loss: 0.9892104864120483,grad_norm: 0.9615533837498458, iteration: 177543
loss: 1.0034970045089722,grad_norm: 0.9999991021092265, iteration: 177544
loss: 1.0138555765151978,grad_norm: 0.7608491298035237, iteration: 177545
loss: 0.9932246208190918,grad_norm: 0.9999991875424848, iteration: 177546
loss: 1.0145947933197021,grad_norm: 0.984493991970081, iteration: 177547
loss: 1.0692418813705444,grad_norm: 0.999999526737032, iteration: 177548
loss: 0.9772949814796448,grad_norm: 0.9999991615972429, iteration: 177549
loss: 1.0165066719055176,grad_norm: 0.8858642855253066, iteration: 177550
loss: 0.9864706993103027,grad_norm: 0.9999991306293435, iteration: 177551
loss: 1.0271720886230469,grad_norm: 0.9999994151121765, iteration: 177552
loss: 1.0626829862594604,grad_norm: 0.9999998920633083, iteration: 177553
loss: 0.9748262166976929,grad_norm: 0.9209657011433489, iteration: 177554
loss: 1.0483189821243286,grad_norm: 0.9999998347985183, iteration: 177555
loss: 1.1488093137741089,grad_norm: 0.9999994308295684, iteration: 177556
loss: 1.090651512145996,grad_norm: 1.0000000472589425, iteration: 177557
loss: 1.0292942523956299,grad_norm: 0.9815007456422474, iteration: 177558
loss: 1.0725464820861816,grad_norm: 0.9999995720644809, iteration: 177559
loss: 0.9729268550872803,grad_norm: 0.9754285403068578, iteration: 177560
loss: 1.0742144584655762,grad_norm: 0.9999999661786445, iteration: 177561
loss: 0.9985442161560059,grad_norm: 0.8792045168568463, iteration: 177562
loss: 1.1065993309020996,grad_norm: 0.9999996367731646, iteration: 177563
loss: 1.0043400526046753,grad_norm: 0.8907942515045001, iteration: 177564
loss: 1.0312705039978027,grad_norm: 0.9999991763320882, iteration: 177565
loss: 1.0369526147842407,grad_norm: 0.9999998339685351, iteration: 177566
loss: 1.0986000299453735,grad_norm: 0.9999998027835975, iteration: 177567
loss: 1.0121426582336426,grad_norm: 0.9825180753883846, iteration: 177568
loss: 1.021868348121643,grad_norm: 0.8225621007760245, iteration: 177569
loss: 1.0490972995758057,grad_norm: 0.9999999034170995, iteration: 177570
loss: 0.9721587300300598,grad_norm: 0.9616451424571919, iteration: 177571
loss: 1.0152055025100708,grad_norm: 0.9999990368636688, iteration: 177572
loss: 1.0164902210235596,grad_norm: 0.9583841519219891, iteration: 177573
loss: 1.0094845294952393,grad_norm: 0.9999990715316915, iteration: 177574
loss: 0.9884134531021118,grad_norm: 0.9428407818879495, iteration: 177575
loss: 1.0008939504623413,grad_norm: 0.9127757830257888, iteration: 177576
loss: 1.0160695314407349,grad_norm: 0.9562927917357004, iteration: 177577
loss: 1.038812518119812,grad_norm: 0.9999992922984939, iteration: 177578
loss: 1.0545845031738281,grad_norm: 0.9999993311157711, iteration: 177579
loss: 1.0338258743286133,grad_norm: 0.999999184451646, iteration: 177580
loss: 0.9669308066368103,grad_norm: 0.999998946408302, iteration: 177581
loss: 0.9943673610687256,grad_norm: 0.999999748631564, iteration: 177582
loss: 1.0097689628601074,grad_norm: 0.9832492772678116, iteration: 177583
loss: 1.1738746166229248,grad_norm: 0.9999995178761406, iteration: 177584
loss: 1.008466362953186,grad_norm: 0.9999991884981769, iteration: 177585
loss: 1.0000362396240234,grad_norm: 0.9680035089213942, iteration: 177586
loss: 1.0752986669540405,grad_norm: 0.9999991878884787, iteration: 177587
loss: 1.0390690565109253,grad_norm: 0.9999992586671451, iteration: 177588
loss: 1.0137925148010254,grad_norm: 0.9999991557805488, iteration: 177589
loss: 1.003958821296692,grad_norm: 0.9999992755127179, iteration: 177590
loss: 0.9761797785758972,grad_norm: 0.9610600123702348, iteration: 177591
loss: 1.001686453819275,grad_norm: 0.8492630937683022, iteration: 177592
loss: 0.9970051646232605,grad_norm: 0.9847288094789606, iteration: 177593
loss: 1.0114184617996216,grad_norm: 0.9999990671565057, iteration: 177594
loss: 1.0411317348480225,grad_norm: 0.9868948223600325, iteration: 177595
loss: 1.112646460533142,grad_norm: 0.9999992937088603, iteration: 177596
loss: 1.0562056303024292,grad_norm: 0.9999994184213434, iteration: 177597
loss: 0.9948045611381531,grad_norm: 0.9999991399866123, iteration: 177598
loss: 0.9902477860450745,grad_norm: 0.9999994591449122, iteration: 177599
loss: 1.0551689863204956,grad_norm: 0.9999995149941932, iteration: 177600
loss: 1.0498864650726318,grad_norm: 0.9999994451256954, iteration: 177601
loss: 1.0813467502593994,grad_norm: 0.9999993625165844, iteration: 177602
loss: 0.973273515701294,grad_norm: 0.9999995918532918, iteration: 177603
loss: 0.9720540046691895,grad_norm: 0.9516270972307888, iteration: 177604
loss: 0.971576452255249,grad_norm: 0.9999996111226803, iteration: 177605
loss: 1.0362521409988403,grad_norm: 0.9999991283934893, iteration: 177606
loss: 1.0242279767990112,grad_norm: 0.8606448135717057, iteration: 177607
loss: 1.0398181676864624,grad_norm: 0.999999213574318, iteration: 177608
loss: 1.0015472173690796,grad_norm: 0.8946763291529137, iteration: 177609
loss: 1.0044441223144531,grad_norm: 0.9999990863732592, iteration: 177610
loss: 1.0234198570251465,grad_norm: 0.9028875234878718, iteration: 177611
loss: 1.0454018115997314,grad_norm: 0.9999997148927752, iteration: 177612
loss: 0.9969810843467712,grad_norm: 0.9369406143433645, iteration: 177613
loss: 0.9775864481925964,grad_norm: 0.9628317778576241, iteration: 177614
loss: 1.0346404314041138,grad_norm: 0.9999991322293478, iteration: 177615
loss: 1.0873680114746094,grad_norm: 0.9999996843842198, iteration: 177616
loss: 1.0030252933502197,grad_norm: 0.9687043282301531, iteration: 177617
loss: 1.058232069015503,grad_norm: 0.9999992982861775, iteration: 177618
loss: 1.0017653703689575,grad_norm: 0.9999991777831306, iteration: 177619
loss: 1.0137592554092407,grad_norm: 0.9421632869103229, iteration: 177620
loss: 1.0215615034103394,grad_norm: 0.8408752714237294, iteration: 177621
loss: 1.022189736366272,grad_norm: 0.9999992125239091, iteration: 177622
loss: 1.0545735359191895,grad_norm: 0.9999992970106177, iteration: 177623
loss: 0.9909475445747375,grad_norm: 0.9999992016784508, iteration: 177624
loss: 1.0114787817001343,grad_norm: 0.9999991400415371, iteration: 177625
loss: 1.002218246459961,grad_norm: 0.9999991880913407, iteration: 177626
loss: 1.037233829498291,grad_norm: 0.8808669988172565, iteration: 177627
loss: 0.9942612051963806,grad_norm: 0.9999990113503507, iteration: 177628
loss: 1.037268877029419,grad_norm: 0.9999996698944407, iteration: 177629
loss: 1.0152541399002075,grad_norm: 0.9999990824184883, iteration: 177630
loss: 0.9892858266830444,grad_norm: 0.9580231408470572, iteration: 177631
loss: 0.9858739376068115,grad_norm: 0.9999996949006779, iteration: 177632
loss: 1.033385992050171,grad_norm: 0.9999991699529869, iteration: 177633
loss: 1.0020850896835327,grad_norm: 0.9621983423939411, iteration: 177634
loss: 0.9816731214523315,grad_norm: 0.9999990533710051, iteration: 177635
loss: 1.0004299879074097,grad_norm: 0.9874844493400646, iteration: 177636
loss: 0.9912678003311157,grad_norm: 0.8681632515968263, iteration: 177637
loss: 1.0248606204986572,grad_norm: 0.9592077968168273, iteration: 177638
loss: 1.1472834348678589,grad_norm: 0.9999999112261884, iteration: 177639
loss: 0.9765483736991882,grad_norm: 0.9999990147045196, iteration: 177640
loss: 1.0261058807373047,grad_norm: 0.9999994772429428, iteration: 177641
loss: 1.0255573987960815,grad_norm: 0.9999990974951795, iteration: 177642
loss: 0.9982953667640686,grad_norm: 0.8789340824615669, iteration: 177643
loss: 1.0033570528030396,grad_norm: 0.9890131703431367, iteration: 177644
loss: 1.0219786167144775,grad_norm: 0.8908817891347307, iteration: 177645
loss: 1.0103135108947754,grad_norm: 0.9080234100641935, iteration: 177646
loss: 0.9851399064064026,grad_norm: 0.999999153389677, iteration: 177647
loss: 1.0311626195907593,grad_norm: 0.9999989797247438, iteration: 177648
loss: 0.9915419220924377,grad_norm: 0.9999991739341649, iteration: 177649
loss: 1.041215181350708,grad_norm: 0.9942949371674248, iteration: 177650
loss: 1.0382907390594482,grad_norm: 0.9999990808633767, iteration: 177651
loss: 0.9907141327857971,grad_norm: 0.8915949342626069, iteration: 177652
loss: 1.1088696718215942,grad_norm: 0.999999641595361, iteration: 177653
loss: 1.0217338800430298,grad_norm: 0.9518205450604527, iteration: 177654
loss: 1.1163830757141113,grad_norm: 0.9999999056365805, iteration: 177655
loss: 0.980625569820404,grad_norm: 0.9757857224616523, iteration: 177656
loss: 0.9799244403839111,grad_norm: 0.9986096757411225, iteration: 177657
loss: 1.0079554319381714,grad_norm: 0.9999993612915472, iteration: 177658
loss: 1.0129485130310059,grad_norm: 0.9541085196390177, iteration: 177659
loss: 0.9795566201210022,grad_norm: 0.9999992479559268, iteration: 177660
loss: 0.9829952716827393,grad_norm: 0.8995648076457224, iteration: 177661
loss: 0.9798077940940857,grad_norm: 0.8847417704466751, iteration: 177662
loss: 0.9945785999298096,grad_norm: 0.9999991234232957, iteration: 177663
loss: 0.979005753993988,grad_norm: 0.9065844136094887, iteration: 177664
loss: 0.9863542318344116,grad_norm: 0.9999991608014314, iteration: 177665
loss: 1.0048431158065796,grad_norm: 0.8821401324069715, iteration: 177666
loss: 0.9947383999824524,grad_norm: 0.9057426667190721, iteration: 177667
loss: 1.0008716583251953,grad_norm: 0.9577933679138008, iteration: 177668
loss: 0.9619190096855164,grad_norm: 0.8922209877864842, iteration: 177669
loss: 1.012871503829956,grad_norm: 0.967720286340414, iteration: 177670
loss: 0.9962128400802612,grad_norm: 0.940681624329184, iteration: 177671
loss: 1.0528639554977417,grad_norm: 0.999999367313858, iteration: 177672
loss: 1.0018073320388794,grad_norm: 0.8751463799668202, iteration: 177673
loss: 0.9936283826828003,grad_norm: 0.9999991435209065, iteration: 177674
loss: 0.9678242206573486,grad_norm: 0.946985489425467, iteration: 177675
loss: 1.001821756362915,grad_norm: 0.9999990725844421, iteration: 177676
loss: 1.0091065168380737,grad_norm: 0.9903222103997958, iteration: 177677
loss: 0.9985077977180481,grad_norm: 0.9912987357427483, iteration: 177678
loss: 0.9839993119239807,grad_norm: 0.9999991274876853, iteration: 177679
loss: 1.0763278007507324,grad_norm: 0.9999999179356517, iteration: 177680
loss: 1.0239431858062744,grad_norm: 0.9709822072820868, iteration: 177681
loss: 1.010567545890808,grad_norm: 0.8324044555955186, iteration: 177682
loss: 1.0509319305419922,grad_norm: 1.000000118297103, iteration: 177683
loss: 0.9774767160415649,grad_norm: 0.9999990606241123, iteration: 177684
loss: 0.9931844472885132,grad_norm: 0.9999990708485039, iteration: 177685
loss: 1.0301121473312378,grad_norm: 0.989481851904852, iteration: 177686
loss: 0.9731712937355042,grad_norm: 0.999999215914528, iteration: 177687
loss: 0.9690457582473755,grad_norm: 0.9938806558080163, iteration: 177688
loss: 1.027874231338501,grad_norm: 0.9999992927239222, iteration: 177689
loss: 0.9608038663864136,grad_norm: 0.9769901926977123, iteration: 177690
loss: 1.0160737037658691,grad_norm: 0.9999991708342603, iteration: 177691
loss: 0.9864394068717957,grad_norm: 0.9793551282898726, iteration: 177692
loss: 0.9953164458274841,grad_norm: 0.9648205019095879, iteration: 177693
loss: 0.9690807461738586,grad_norm: 0.9539491950599879, iteration: 177694
loss: 0.9985173344612122,grad_norm: 0.8777170941125302, iteration: 177695
loss: 1.004843831062317,grad_norm: 0.8377064085096183, iteration: 177696
loss: 0.973454475402832,grad_norm: 0.9845006224152945, iteration: 177697
loss: 1.0304590463638306,grad_norm: 0.9999998146863874, iteration: 177698
loss: 0.9828649759292603,grad_norm: 0.9038507790538035, iteration: 177699
loss: 1.0092267990112305,grad_norm: 0.9999999312984433, iteration: 177700
loss: 1.0085465908050537,grad_norm: 0.9999993163468954, iteration: 177701
loss: 1.0425106287002563,grad_norm: 0.9999992042997716, iteration: 177702
loss: 1.001599907875061,grad_norm: 0.8890823565583946, iteration: 177703
loss: 1.0101910829544067,grad_norm: 0.9332912477577593, iteration: 177704
loss: 1.06876540184021,grad_norm: 0.7821443712279653, iteration: 177705
loss: 1.0086665153503418,grad_norm: 0.9867199245715563, iteration: 177706
loss: 1.017704963684082,grad_norm: 0.9999993850752482, iteration: 177707
loss: 1.010368824005127,grad_norm: 0.9999991648532133, iteration: 177708
loss: 0.9956315159797668,grad_norm: 0.9999989868317009, iteration: 177709
loss: 1.0145405530929565,grad_norm: 0.9509322909668693, iteration: 177710
loss: 0.9866765737533569,grad_norm: 0.9058676937821053, iteration: 177711
loss: 0.9913619756698608,grad_norm: 0.9106531589339488, iteration: 177712
loss: 1.019459843635559,grad_norm: 0.9955334294727838, iteration: 177713
loss: 0.999472439289093,grad_norm: 0.9717993038566299, iteration: 177714
loss: 1.0092129707336426,grad_norm: 0.9999990298194382, iteration: 177715
loss: 0.999495267868042,grad_norm: 0.9940759692288886, iteration: 177716
loss: 0.9920024275779724,grad_norm: 0.9999992395885772, iteration: 177717
loss: 0.9462031722068787,grad_norm: 0.9748320361141845, iteration: 177718
loss: 1.0298281908035278,grad_norm: 0.9999991922677378, iteration: 177719
loss: 0.9919815063476562,grad_norm: 0.9999989662488089, iteration: 177720
loss: 1.0278578996658325,grad_norm: 0.9999991672065134, iteration: 177721
loss: 0.9735493659973145,grad_norm: 0.9999991799576294, iteration: 177722
loss: 0.9801442623138428,grad_norm: 0.981950579714744, iteration: 177723
loss: 0.9864967465400696,grad_norm: 0.9999991485952597, iteration: 177724
loss: 1.1170223951339722,grad_norm: 0.9999997787719221, iteration: 177725
loss: 0.9800103306770325,grad_norm: 0.838231164474437, iteration: 177726
loss: 1.0142433643341064,grad_norm: 0.9441427095224211, iteration: 177727
loss: 0.9785072803497314,grad_norm: 0.9654357771365664, iteration: 177728
loss: 1.023797869682312,grad_norm: 0.9999991637712778, iteration: 177729
loss: 0.9533712863922119,grad_norm: 0.9999989608750117, iteration: 177730
loss: 1.0302926301956177,grad_norm: 0.9999998725759648, iteration: 177731
loss: 1.0475519895553589,grad_norm: 0.9999998037871489, iteration: 177732
loss: 0.9386665225028992,grad_norm: 0.9999989828912699, iteration: 177733
loss: 0.9688929915428162,grad_norm: 0.7038727939521485, iteration: 177734
loss: 0.9991085529327393,grad_norm: 0.9210451697671589, iteration: 177735
loss: 1.0278369188308716,grad_norm: 0.9999990333313377, iteration: 177736
loss: 0.9994722604751587,grad_norm: 0.8779522265750905, iteration: 177737
loss: 1.0030173063278198,grad_norm: 0.936719946540286, iteration: 177738
loss: 0.9997580051422119,grad_norm: 0.8334037032664793, iteration: 177739
loss: 0.956679105758667,grad_norm: 0.9999991705396546, iteration: 177740
loss: 1.0177440643310547,grad_norm: 0.9999992481800989, iteration: 177741
loss: 0.9992790818214417,grad_norm: 0.9999991601985508, iteration: 177742
loss: 0.9813022613525391,grad_norm: 0.9166805024355182, iteration: 177743
loss: 1.018131136894226,grad_norm: 0.8378095580868078, iteration: 177744
loss: 1.012737512588501,grad_norm: 0.8910597345378479, iteration: 177745
loss: 1.0788172483444214,grad_norm: 0.9999999497342351, iteration: 177746
loss: 0.9652670621871948,grad_norm: 0.9775155357635326, iteration: 177747
loss: 0.972684919834137,grad_norm: 0.9895407939061557, iteration: 177748
loss: 1.0019781589508057,grad_norm: 0.9999991419877834, iteration: 177749
loss: 1.0139849185943604,grad_norm: 0.8987907840696484, iteration: 177750
loss: 1.0080410242080688,grad_norm: 0.9999992185641644, iteration: 177751
loss: 1.0176305770874023,grad_norm: 0.9506611650828586, iteration: 177752
loss: 0.9619418382644653,grad_norm: 0.9999991102573389, iteration: 177753
loss: 0.9969939589500427,grad_norm: 0.9454980709123656, iteration: 177754
loss: 1.0200417041778564,grad_norm: 0.9999990594535515, iteration: 177755
loss: 0.9931861758232117,grad_norm: 0.9999992389384263, iteration: 177756
loss: 1.0924460887908936,grad_norm: 0.9999992927076089, iteration: 177757
loss: 1.0037769079208374,grad_norm: 0.9999992638053375, iteration: 177758
loss: 0.979133665561676,grad_norm: 0.9052497759492943, iteration: 177759
loss: 1.0540322065353394,grad_norm: 1.0000000511652671, iteration: 177760
loss: 1.0246978998184204,grad_norm: 0.9746742937481144, iteration: 177761
loss: 1.033811092376709,grad_norm: 0.96469436969451, iteration: 177762
loss: 1.0371613502502441,grad_norm: 0.9743748574441152, iteration: 177763
loss: 1.027999997138977,grad_norm: 0.9910116842435432, iteration: 177764
loss: 0.9635942578315735,grad_norm: 0.999999169771972, iteration: 177765
loss: 1.0023467540740967,grad_norm: 0.9999991202012026, iteration: 177766
loss: 1.0005933046340942,grad_norm: 0.8491914546999337, iteration: 177767
loss: 1.033681035041809,grad_norm: 0.9999991026284603, iteration: 177768
loss: 1.0315272808074951,grad_norm: 0.9999990057466883, iteration: 177769
loss: 1.0053153038024902,grad_norm: 0.9235709572244524, iteration: 177770
loss: 0.9891183376312256,grad_norm: 0.9999989356401084, iteration: 177771
loss: 1.019870400428772,grad_norm: 0.9327909394660482, iteration: 177772
loss: 0.9817752838134766,grad_norm: 0.9012785809184859, iteration: 177773
loss: 1.0184450149536133,grad_norm: 0.9575231859191915, iteration: 177774
loss: 0.9798153638839722,grad_norm: 0.9999989812304139, iteration: 177775
loss: 0.9986101388931274,grad_norm: 0.9999991102676784, iteration: 177776
loss: 0.9895646572113037,grad_norm: 0.9999990912641717, iteration: 177777
loss: 0.9973633885383606,grad_norm: 0.9027779249023774, iteration: 177778
loss: 0.9597352147102356,grad_norm: 0.9999991805770531, iteration: 177779
loss: 0.9942491054534912,grad_norm: 0.9999990267298465, iteration: 177780
loss: 1.0171548128128052,grad_norm: 0.9010893192091173, iteration: 177781
loss: 1.0340309143066406,grad_norm: 0.9999992648479465, iteration: 177782
loss: 1.025315523147583,grad_norm: 0.999999123096815, iteration: 177783
loss: 0.9891098141670227,grad_norm: 0.9475121358302555, iteration: 177784
loss: 0.9799224138259888,grad_norm: 0.9999991445851457, iteration: 177785
loss: 0.9700610041618347,grad_norm: 0.9617054080559505, iteration: 177786
loss: 1.0097079277038574,grad_norm: 0.8817843594789652, iteration: 177787
loss: 1.0091882944107056,grad_norm: 0.9555470708091669, iteration: 177788
loss: 0.9791392683982849,grad_norm: 0.9999991244790539, iteration: 177789
loss: 1.0039185285568237,grad_norm: 0.9572269725734759, iteration: 177790
loss: 1.0216509103775024,grad_norm: 0.9290710845992405, iteration: 177791
loss: 1.0130223035812378,grad_norm: 0.9924940259520714, iteration: 177792
loss: 1.006309986114502,grad_norm: 0.8798909850311409, iteration: 177793
loss: 0.9988712668418884,grad_norm: 0.896299943224712, iteration: 177794
loss: 0.9983346462249756,grad_norm: 0.9684288083174415, iteration: 177795
loss: 1.023932695388794,grad_norm: 0.8358559874466253, iteration: 177796
loss: 1.0279943943023682,grad_norm: 0.8325921848307669, iteration: 177797
loss: 1.0019875764846802,grad_norm: 0.9783915776858085, iteration: 177798
loss: 0.9774335622787476,grad_norm: 0.9999990867635142, iteration: 177799
loss: 0.9983188509941101,grad_norm: 0.9398512779799936, iteration: 177800
loss: 0.9665693044662476,grad_norm: 0.8994471248278866, iteration: 177801
loss: 0.9881776571273804,grad_norm: 0.9585830871616435, iteration: 177802
loss: 0.9890171885490417,grad_norm: 0.9730248557052074, iteration: 177803
loss: 1.0177477598190308,grad_norm: 0.8630662455412467, iteration: 177804
loss: 1.017806053161621,grad_norm: 0.9999991490600915, iteration: 177805
loss: 0.9755234718322754,grad_norm: 0.8961909540914262, iteration: 177806
loss: 0.9847938418388367,grad_norm: 0.9479622506835296, iteration: 177807
loss: 0.9469401240348816,grad_norm: 0.9999991425122923, iteration: 177808
loss: 1.0006117820739746,grad_norm: 0.9061471703401578, iteration: 177809
loss: 0.9426446557044983,grad_norm: 0.9696439032939176, iteration: 177810
loss: 0.9889232516288757,grad_norm: 0.8837285392750566, iteration: 177811
loss: 0.988076388835907,grad_norm: 0.8047002962117298, iteration: 177812
loss: 1.007910966873169,grad_norm: 0.8537860754963349, iteration: 177813
loss: 1.0411109924316406,grad_norm: 0.9669808936902217, iteration: 177814
loss: 1.0029137134552002,grad_norm: 0.790135250857764, iteration: 177815
loss: 0.960702657699585,grad_norm: 0.9999992167474524, iteration: 177816
loss: 0.9937412738800049,grad_norm: 0.9999992065090472, iteration: 177817
loss: 1.0102773904800415,grad_norm: 0.9761239431487224, iteration: 177818
loss: 0.990657389163971,grad_norm: 0.815644390221019, iteration: 177819
loss: 1.0147814750671387,grad_norm: 0.9600925967612538, iteration: 177820
loss: 0.9749863743782043,grad_norm: 0.8426837730289066, iteration: 177821
loss: 0.9737223982810974,grad_norm: 0.9999991100881148, iteration: 177822
loss: 0.9970059990882874,grad_norm: 0.9999993120816709, iteration: 177823
loss: 0.9975060820579529,grad_norm: 0.9790117858559365, iteration: 177824
loss: 1.0300008058547974,grad_norm: 0.9999991371663194, iteration: 177825
loss: 0.9928992390632629,grad_norm: 0.9999990460193601, iteration: 177826
loss: 0.9724883437156677,grad_norm: 0.91164688480687, iteration: 177827
loss: 0.9990796446800232,grad_norm: 0.9065525152303094, iteration: 177828
loss: 1.0135096311569214,grad_norm: 0.9999991207555654, iteration: 177829
loss: 0.9929100275039673,grad_norm: 0.9999990732081186, iteration: 177830
loss: 1.0064220428466797,grad_norm: 0.9999990538034712, iteration: 177831
loss: 0.9842683672904968,grad_norm: 0.9999991435538806, iteration: 177832
loss: 0.9754788279533386,grad_norm: 0.9999990689874467, iteration: 177833
loss: 1.0309832096099854,grad_norm: 0.8958918415005354, iteration: 177834
loss: 0.9798152446746826,grad_norm: 0.9480438312334589, iteration: 177835
loss: 0.9836806654930115,grad_norm: 0.8850724034757967, iteration: 177836
loss: 0.9994562864303589,grad_norm: 0.999999076730948, iteration: 177837
loss: 1.0025708675384521,grad_norm: 0.9999991949978481, iteration: 177838
loss: 1.005373239517212,grad_norm: 0.8049534207229557, iteration: 177839
loss: 0.9903926253318787,grad_norm: 0.9042949545356886, iteration: 177840
loss: 1.025566577911377,grad_norm: 0.9999991116489839, iteration: 177841
loss: 0.9936116337776184,grad_norm: 0.9545672596814763, iteration: 177842
loss: 0.996809184551239,grad_norm: 0.8386109211376661, iteration: 177843
loss: 1.002451777458191,grad_norm: 0.9999990748418591, iteration: 177844
loss: 0.9647372364997864,grad_norm: 0.9324748267652306, iteration: 177845
loss: 1.0145785808563232,grad_norm: 0.9241579380469402, iteration: 177846
loss: 1.0156372785568237,grad_norm: 0.9999992881628356, iteration: 177847
loss: 0.9925563335418701,grad_norm: 0.9999990217655254, iteration: 177848
loss: 1.0049983263015747,grad_norm: 0.9743472571489471, iteration: 177849
loss: 0.9952187538146973,grad_norm: 0.8312618901157854, iteration: 177850
loss: 1.0016285181045532,grad_norm: 0.9999993291848243, iteration: 177851
loss: 0.9895574450492859,grad_norm: 0.9999991159727292, iteration: 177852
loss: 1.0013447999954224,grad_norm: 0.9773262054435016, iteration: 177853
loss: 1.0098954439163208,grad_norm: 0.9703644668327736, iteration: 177854
loss: 0.9697203040122986,grad_norm: 0.8939209536502933, iteration: 177855
loss: 0.9758477210998535,grad_norm: 0.8437042008060959, iteration: 177856
loss: 0.9595584273338318,grad_norm: 0.9999990691534666, iteration: 177857
loss: 1.0173946619033813,grad_norm: 0.9999996269915434, iteration: 177858
loss: 1.0433063507080078,grad_norm: 0.9999990114750558, iteration: 177859
loss: 0.984072744846344,grad_norm: 0.8942379264962853, iteration: 177860
loss: 1.0199354887008667,grad_norm: 0.999999097539653, iteration: 177861
loss: 1.0272923707962036,grad_norm: 0.999999117729741, iteration: 177862
loss: 1.030243992805481,grad_norm: 0.9999991910293018, iteration: 177863
loss: 0.9993497729301453,grad_norm: 0.9213690329494901, iteration: 177864
loss: 1.0023856163024902,grad_norm: 0.9568615301054364, iteration: 177865
loss: 1.0103639364242554,grad_norm: 0.9999990676671272, iteration: 177866
loss: 1.012067437171936,grad_norm: 0.8788949645391788, iteration: 177867
loss: 0.9599283933639526,grad_norm: 0.977513989159049, iteration: 177868
loss: 0.9974848031997681,grad_norm: 0.9999990707148102, iteration: 177869
loss: 1.0633270740509033,grad_norm: 0.9999995969241622, iteration: 177870
loss: 0.9757946729660034,grad_norm: 0.999999106130078, iteration: 177871
loss: 1.0275002717971802,grad_norm: 0.8508229508264986, iteration: 177872
loss: 1.0245493650436401,grad_norm: 0.9999991612469723, iteration: 177873
loss: 1.0274412631988525,grad_norm: 0.9629031406082966, iteration: 177874
loss: 0.9945816993713379,grad_norm: 0.9964744545022891, iteration: 177875
loss: 1.011847972869873,grad_norm: 0.9999991661220742, iteration: 177876
loss: 1.000542402267456,grad_norm: 0.9340066765863417, iteration: 177877
loss: 1.0050771236419678,grad_norm: 0.8341724710290636, iteration: 177878
loss: 0.9538853764533997,grad_norm: 0.9563230756763703, iteration: 177879
loss: 1.0678366422653198,grad_norm: 0.9824112142660661, iteration: 177880
loss: 1.0360183715820312,grad_norm: 0.999999410692058, iteration: 177881
loss: 0.9740347862243652,grad_norm: 0.7704162742966929, iteration: 177882
loss: 0.9969070553779602,grad_norm: 0.9342295901400448, iteration: 177883
loss: 1.0228440761566162,grad_norm: 0.9942726392925719, iteration: 177884
loss: 1.0025966167449951,grad_norm: 0.8327339387246612, iteration: 177885
loss: 1.0240726470947266,grad_norm: 0.9141379647154922, iteration: 177886
loss: 1.027124047279358,grad_norm: 0.9999990664356881, iteration: 177887
loss: 1.013818383216858,grad_norm: 0.9839805141318742, iteration: 177888
loss: 1.0035669803619385,grad_norm: 0.9340029798456917, iteration: 177889
loss: 1.0247249603271484,grad_norm: 0.9999991819560548, iteration: 177890
loss: 1.0041483640670776,grad_norm: 0.9999993874590914, iteration: 177891
loss: 0.9695523977279663,grad_norm: 0.9999991694862558, iteration: 177892
loss: 1.0155948400497437,grad_norm: 0.9999990000914339, iteration: 177893
loss: 0.9667072892189026,grad_norm: 0.9891587344735435, iteration: 177894
loss: 0.9820901155471802,grad_norm: 0.8829516922476002, iteration: 177895
loss: 0.9865174293518066,grad_norm: 0.9011804559108618, iteration: 177896
loss: 1.002441167831421,grad_norm: 0.9648233382924405, iteration: 177897
loss: 1.015184760093689,grad_norm: 0.9999991455619359, iteration: 177898
loss: 1.0378679037094116,grad_norm: 0.9999992419059749, iteration: 177899
loss: 0.9558752179145813,grad_norm: 0.9999990758181706, iteration: 177900
loss: 0.959776759147644,grad_norm: 0.8969947126035226, iteration: 177901
loss: 0.9884639382362366,grad_norm: 0.9999991738579337, iteration: 177902
loss: 0.9581118822097778,grad_norm: 0.9999992074040405, iteration: 177903
loss: 0.9896138310432434,grad_norm: 0.9999990339037064, iteration: 177904
loss: 0.9979091882705688,grad_norm: 0.9999991358536475, iteration: 177905
loss: 0.9947517514228821,grad_norm: 0.9999990680324156, iteration: 177906
loss: 1.0278924703598022,grad_norm: 0.8408866130071434, iteration: 177907
loss: 1.0094910860061646,grad_norm: 0.9104535526976555, iteration: 177908
loss: 1.0310096740722656,grad_norm: 0.957558671209485, iteration: 177909
loss: 0.9934724569320679,grad_norm: 0.896209916573356, iteration: 177910
loss: 0.9885542988777161,grad_norm: 0.9999992610565642, iteration: 177911
loss: 0.9770413041114807,grad_norm: 0.8843409405989117, iteration: 177912
loss: 1.0346884727478027,grad_norm: 0.9999989993165042, iteration: 177913
loss: 0.9938741326332092,grad_norm: 0.9221549107173291, iteration: 177914
loss: 1.0145949125289917,grad_norm: 0.999999207718273, iteration: 177915
loss: 0.9565070867538452,grad_norm: 0.9973970688882684, iteration: 177916
loss: 1.028656005859375,grad_norm: 0.9794625091552683, iteration: 177917
loss: 1.006039023399353,grad_norm: 0.9829464470802995, iteration: 177918
loss: 0.9968293905258179,grad_norm: 0.9266586049611366, iteration: 177919
loss: 1.007546067237854,grad_norm: 0.9999991464702068, iteration: 177920
loss: 1.0146995782852173,grad_norm: 0.9619251878095029, iteration: 177921
loss: 1.041662335395813,grad_norm: 0.999999186904481, iteration: 177922
loss: 0.9526097178459167,grad_norm: 0.9999989664047452, iteration: 177923
loss: 0.9941233992576599,grad_norm: 0.9999992367300968, iteration: 177924
loss: 0.9886980652809143,grad_norm: 0.9438623062923471, iteration: 177925
loss: 0.9588282704353333,grad_norm: 0.999999098220568, iteration: 177926
loss: 1.0250613689422607,grad_norm: 0.9999990557313345, iteration: 177927
loss: 1.0100955963134766,grad_norm: 0.9999992211173677, iteration: 177928
loss: 0.9595129489898682,grad_norm: 0.9999990683136193, iteration: 177929
loss: 0.9772464632987976,grad_norm: 0.9999992909911496, iteration: 177930
loss: 1.0493979454040527,grad_norm: 0.9999996540476752, iteration: 177931
loss: 1.0081806182861328,grad_norm: 0.9999989886372191, iteration: 177932
loss: 1.0079841613769531,grad_norm: 0.9356475209981532, iteration: 177933
loss: 0.9799323678016663,grad_norm: 0.8807377469560232, iteration: 177934
loss: 1.0003695487976074,grad_norm: 0.9999989693938782, iteration: 177935
loss: 0.9686534404754639,grad_norm: 0.9041043939913092, iteration: 177936
loss: 1.0211464166641235,grad_norm: 0.9999992514154828, iteration: 177937
loss: 1.0446921586990356,grad_norm: 0.9760392595995079, iteration: 177938
loss: 1.1016333103179932,grad_norm: 0.9999995721032716, iteration: 177939
loss: 0.9867892265319824,grad_norm: 0.9999990354587422, iteration: 177940
loss: 1.0164389610290527,grad_norm: 0.8335276751930718, iteration: 177941
loss: 1.0334959030151367,grad_norm: 0.8620632195193506, iteration: 177942
loss: 1.0273817777633667,grad_norm: 0.9846036087317974, iteration: 177943
loss: 0.979062557220459,grad_norm: 0.9300630849917911, iteration: 177944
loss: 0.994354248046875,grad_norm: 0.976554842537932, iteration: 177945
loss: 1.064902663230896,grad_norm: 0.9613809861076161, iteration: 177946
loss: 0.9944948554039001,grad_norm: 0.9999991958009147, iteration: 177947
loss: 1.009192705154419,grad_norm: 0.9999991424372876, iteration: 177948
loss: 0.9789978265762329,grad_norm: 0.9999992683223945, iteration: 177949
loss: 0.959641695022583,grad_norm: 0.9990597871265182, iteration: 177950
loss: 1.000357985496521,grad_norm: 0.8996176399526151, iteration: 177951
loss: 0.9785448312759399,grad_norm: 0.999999155552993, iteration: 177952
loss: 1.002567172050476,grad_norm: 0.8990747234351061, iteration: 177953
loss: 1.0272401571273804,grad_norm: 0.9503590319822257, iteration: 177954
loss: 1.065230369567871,grad_norm: 0.9999996472443905, iteration: 177955
loss: 1.010358452796936,grad_norm: 0.9999990187368563, iteration: 177956
loss: 0.9914063215255737,grad_norm: 0.999999058897405, iteration: 177957
loss: 0.9871420860290527,grad_norm: 0.8612377602156182, iteration: 177958
loss: 1.0068429708480835,grad_norm: 0.929665368624067, iteration: 177959
loss: 0.9698715209960938,grad_norm: 0.8818458832836319, iteration: 177960
loss: 1.0052454471588135,grad_norm: 0.870229363384353, iteration: 177961
loss: 1.022776484489441,grad_norm: 0.7953302822936416, iteration: 177962
loss: 0.9717649221420288,grad_norm: 0.9999990698828136, iteration: 177963
loss: 0.9883098006248474,grad_norm: 0.8530907804036479, iteration: 177964
loss: 0.9610859751701355,grad_norm: 0.929969482420822, iteration: 177965
loss: 0.9769452214241028,grad_norm: 0.9442921252343219, iteration: 177966
loss: 1.0288095474243164,grad_norm: 0.9999990642372643, iteration: 177967
loss: 0.9862126708030701,grad_norm: 0.8965490612928557, iteration: 177968
loss: 1.011931300163269,grad_norm: 0.9999989792279392, iteration: 177969
loss: 0.9900262355804443,grad_norm: 0.9600375903941781, iteration: 177970
loss: 0.9907979369163513,grad_norm: 0.9642108038331639, iteration: 177971
loss: 1.0238116979599,grad_norm: 0.9999998494106038, iteration: 177972
loss: 1.0226832628250122,grad_norm: 0.9999991413603369, iteration: 177973
loss: 0.9733927249908447,grad_norm: 0.9794928437318259, iteration: 177974
loss: 1.0225337743759155,grad_norm: 0.9139017735828942, iteration: 177975
loss: 0.987879753112793,grad_norm: 0.999999242607267, iteration: 177976
loss: 1.1243031024932861,grad_norm: 0.9999992932386641, iteration: 177977
loss: 0.9936667680740356,grad_norm: 0.9999990034335828, iteration: 177978
loss: 1.0300664901733398,grad_norm: 0.8552256573283119, iteration: 177979
loss: 1.0320470333099365,grad_norm: 0.959662379336646, iteration: 177980
loss: 0.992611289024353,grad_norm: 0.8497547275718862, iteration: 177981
loss: 1.0334222316741943,grad_norm: 0.9999992454911464, iteration: 177982
loss: 1.0392909049987793,grad_norm: 0.9999992665087137, iteration: 177983
loss: 1.0099129676818848,grad_norm: 0.9999991495915966, iteration: 177984
loss: 1.0311305522918701,grad_norm: 0.9861035104918967, iteration: 177985
loss: 1.0291050672531128,grad_norm: 0.9999990590403327, iteration: 177986
loss: 0.9706093668937683,grad_norm: 0.9999993404434199, iteration: 177987
loss: 0.9832744002342224,grad_norm: 0.8306037022110355, iteration: 177988
loss: 0.9882450103759766,grad_norm: 0.9999990320109235, iteration: 177989
loss: 0.9409753680229187,grad_norm: 0.9651251300611373, iteration: 177990
loss: 0.9642284512519836,grad_norm: 0.9999990474364305, iteration: 177991
loss: 0.9976523518562317,grad_norm: 0.9929120743913314, iteration: 177992
loss: 1.0509397983551025,grad_norm: 0.9999989867643667, iteration: 177993
loss: 1.025681495666504,grad_norm: 0.9919450261389029, iteration: 177994
loss: 1.0025278329849243,grad_norm: 0.9999990519321658, iteration: 177995
loss: 0.9920629858970642,grad_norm: 0.9784369682543568, iteration: 177996
loss: 1.0266870260238647,grad_norm: 0.9999998423491795, iteration: 177997
loss: 1.0264792442321777,grad_norm: 0.9056378709753021, iteration: 177998
loss: 0.9931684732437134,grad_norm: 0.9171534694921993, iteration: 177999
loss: 0.9891645908355713,grad_norm: 0.9999990812292022, iteration: 178000
loss: 1.0329713821411133,grad_norm: 0.9999990979691977, iteration: 178001
loss: 0.9976974129676819,grad_norm: 0.8898460253723556, iteration: 178002
loss: 1.059315800666809,grad_norm: 0.9999991255561588, iteration: 178003
loss: 1.006527304649353,grad_norm: 0.8689244709852441, iteration: 178004
loss: 0.9608070254325867,grad_norm: 0.9200264288381651, iteration: 178005
loss: 0.9865856766700745,grad_norm: 0.9307287810957697, iteration: 178006
loss: 1.0007750988006592,grad_norm: 0.87710347491702, iteration: 178007
loss: 0.9868029356002808,grad_norm: 0.9647973508487835, iteration: 178008
loss: 0.9769728779792786,grad_norm: 0.9999991393389553, iteration: 178009
loss: 1.0031013488769531,grad_norm: 0.9999996720838812, iteration: 178010
loss: 1.013830542564392,grad_norm: 0.9307900020407675, iteration: 178011
loss: 0.9904478192329407,grad_norm: 0.8631833105205428, iteration: 178012
loss: 1.0316940546035767,grad_norm: 0.9425913091321914, iteration: 178013
loss: 1.0082131624221802,grad_norm: 0.9755572222252944, iteration: 178014
loss: 0.9798545241355896,grad_norm: 0.9685235251795008, iteration: 178015
loss: 1.0557222366333008,grad_norm: 0.9999990983329026, iteration: 178016
loss: 0.9904240369796753,grad_norm: 0.9999991126137608, iteration: 178017
loss: 1.0044186115264893,grad_norm: 0.9999992458885198, iteration: 178018
loss: 0.9667996764183044,grad_norm: 0.8719231330673703, iteration: 178019
loss: 1.00101900100708,grad_norm: 0.9999990796476415, iteration: 178020
loss: 1.0286027193069458,grad_norm: 0.9999997496508961, iteration: 178021
loss: 0.9893292188644409,grad_norm: 0.9999989665283736, iteration: 178022
loss: 1.006224274635315,grad_norm: 0.9999990890596168, iteration: 178023
loss: 0.9604105353355408,grad_norm: 0.9543971978160077, iteration: 178024
loss: 1.0021026134490967,grad_norm: 0.970074326999782, iteration: 178025
loss: 0.9692258238792419,grad_norm: 0.8336533381789865, iteration: 178026
loss: 1.0043020248413086,grad_norm: 0.9999989708762275, iteration: 178027
loss: 1.0073436498641968,grad_norm: 0.9999998488970353, iteration: 178028
loss: 1.0205687284469604,grad_norm: 0.9188706713710595, iteration: 178029
loss: 0.9945675730705261,grad_norm: 0.9791791074856677, iteration: 178030
loss: 1.02711021900177,grad_norm: 0.9977612723713617, iteration: 178031
loss: 0.9546542167663574,grad_norm: 0.9999990603010183, iteration: 178032
loss: 1.0212889909744263,grad_norm: 0.9244903270269358, iteration: 178033
loss: 1.026075005531311,grad_norm: 0.9716230831790469, iteration: 178034
loss: 1.009316325187683,grad_norm: 0.9999989325315015, iteration: 178035
loss: 1.017776370048523,grad_norm: 0.9999991600472529, iteration: 178036
loss: 1.0633183717727661,grad_norm: 0.9999992665882107, iteration: 178037
loss: 0.9923170804977417,grad_norm: 0.9999994518552385, iteration: 178038
loss: 0.9485389590263367,grad_norm: 0.8834200008546194, iteration: 178039
loss: 1.1003649234771729,grad_norm: 0.999999904540372, iteration: 178040
loss: 0.9985149502754211,grad_norm: 0.9999991871708985, iteration: 178041
loss: 1.0661125183105469,grad_norm: 0.9921195282563844, iteration: 178042
loss: 0.984131932258606,grad_norm: 0.9999990373577873, iteration: 178043
loss: 0.9921104907989502,grad_norm: 0.88932794161692, iteration: 178044
loss: 1.0217688083648682,grad_norm: 0.9304153848407805, iteration: 178045
loss: 1.008975863456726,grad_norm: 0.9999998792328254, iteration: 178046
loss: 0.9907916784286499,grad_norm: 0.9454663530880422, iteration: 178047
loss: 1.014706015586853,grad_norm: 0.838787106335769, iteration: 178048
loss: 0.992023766040802,grad_norm: 0.9211247222138003, iteration: 178049
loss: 1.0093086957931519,grad_norm: 0.8645472805886545, iteration: 178050
loss: 0.9905936121940613,grad_norm: 0.92219158781678, iteration: 178051
loss: 0.994313657283783,grad_norm: 0.9382599359192614, iteration: 178052
loss: 1.0643832683563232,grad_norm: 0.9467295244733583, iteration: 178053
loss: 1.1230442523956299,grad_norm: 0.9999996159755303, iteration: 178054
loss: 1.227896809577942,grad_norm: 1.0000000140799812, iteration: 178055
loss: 0.991476833820343,grad_norm: 0.9999991103819813, iteration: 178056
loss: 1.0909048318862915,grad_norm: 0.9195259104709866, iteration: 178057
loss: 1.0748183727264404,grad_norm: 0.9999998076899874, iteration: 178058
loss: 0.9716209769248962,grad_norm: 0.9913618468734708, iteration: 178059
loss: 1.070261836051941,grad_norm: 0.9154600153498651, iteration: 178060
loss: 1.0326844453811646,grad_norm: 0.9999989765045745, iteration: 178061
loss: 1.008205533027649,grad_norm: 0.9086526800613391, iteration: 178062
loss: 1.006779432296753,grad_norm: 0.6651925593103293, iteration: 178063
loss: 1.0066745281219482,grad_norm: 0.8339290784466652, iteration: 178064
loss: 1.0017590522766113,grad_norm: 0.928578714925958, iteration: 178065
loss: 1.0122885704040527,grad_norm: 0.9999991592435699, iteration: 178066
loss: 0.9697595834732056,grad_norm: 0.9629456874622656, iteration: 178067
loss: 1.042999029159546,grad_norm: 0.9999991504663895, iteration: 178068
loss: 1.028337836265564,grad_norm: 0.9999990041728691, iteration: 178069
loss: 1.009041428565979,grad_norm: 0.999999022691214, iteration: 178070
loss: 0.9557696580886841,grad_norm: 0.8770459275239922, iteration: 178071
loss: 0.9958532452583313,grad_norm: 0.9999990541921816, iteration: 178072
loss: 1.1724369525909424,grad_norm: 0.9999999881202255, iteration: 178073
loss: 1.2104854583740234,grad_norm: 1.000000014859037, iteration: 178074
loss: 0.9680663347244263,grad_norm: 0.9658074289499403, iteration: 178075
loss: 0.9784557223320007,grad_norm: 0.9999991472483918, iteration: 178076
loss: 0.9926932454109192,grad_norm: 0.999999189217867, iteration: 178077
loss: 1.0604422092437744,grad_norm: 0.9999991543553942, iteration: 178078
loss: 1.0737147331237793,grad_norm: 0.9964467531516589, iteration: 178079
loss: 0.9957958459854126,grad_norm: 0.9680152298498362, iteration: 178080
loss: 1.0059657096862793,grad_norm: 0.9999991592594182, iteration: 178081
loss: 1.010256290435791,grad_norm: 0.9999999897626369, iteration: 178082
loss: 0.9841558337211609,grad_norm: 0.9999990792653691, iteration: 178083
loss: 1.0277163982391357,grad_norm: 0.9999991129548587, iteration: 178084
loss: 1.0824815034866333,grad_norm: 1.000000001988982, iteration: 178085
loss: 1.0017917156219482,grad_norm: 0.9245636844737879, iteration: 178086
loss: 1.0382716655731201,grad_norm: 0.9999991706674871, iteration: 178087
loss: 0.9843770861625671,grad_norm: 0.9731594064254149, iteration: 178088
loss: 1.005818247795105,grad_norm: 0.9445874885166347, iteration: 178089
loss: 1.0379670858383179,grad_norm: 0.9999990038909338, iteration: 178090
loss: 0.9987832307815552,grad_norm: 0.8958316303107531, iteration: 178091
loss: 1.0109789371490479,grad_norm: 0.9999991594089617, iteration: 178092
loss: 1.0314316749572754,grad_norm: 0.9999990249843945, iteration: 178093
loss: 1.0028260946273804,grad_norm: 0.999999230470523, iteration: 178094
loss: 0.9869222044944763,grad_norm: 0.9861650690410378, iteration: 178095
loss: 1.0003702640533447,grad_norm: 0.926928589628838, iteration: 178096
loss: 0.9993517398834229,grad_norm: 0.9999993700259326, iteration: 178097
loss: 1.0129061937332153,grad_norm: 0.9999990257340773, iteration: 178098
loss: 1.060893177986145,grad_norm: 0.9999994282109366, iteration: 178099
loss: 1.0965073108673096,grad_norm: 0.99999897896987, iteration: 178100
loss: 0.9982946515083313,grad_norm: 0.9999991037236409, iteration: 178101
loss: 0.9932495951652527,grad_norm: 0.8662112260241316, iteration: 178102
loss: 1.0008184909820557,grad_norm: 0.9999992079236149, iteration: 178103
loss: 0.9768257737159729,grad_norm: 0.7559345235848531, iteration: 178104
loss: 1.0375617742538452,grad_norm: 0.9999993337750456, iteration: 178105
loss: 1.0211647748947144,grad_norm: 0.9692648240497997, iteration: 178106
loss: 1.0162731409072876,grad_norm: 0.9999989575933367, iteration: 178107
loss: 1.0387541055679321,grad_norm: 0.9565794464667601, iteration: 178108
loss: 0.9695569276809692,grad_norm: 0.7988618821257054, iteration: 178109
loss: 0.9931778907775879,grad_norm: 0.9610793917044339, iteration: 178110
loss: 0.9979427456855774,grad_norm: 0.9999991882301912, iteration: 178111
loss: 0.9853293299674988,grad_norm: 0.9999991169746428, iteration: 178112
loss: 1.0335278511047363,grad_norm: 0.9999990527412028, iteration: 178113
loss: 0.9539263248443604,grad_norm: 0.8520146008125149, iteration: 178114
loss: 0.9903166890144348,grad_norm: 0.94601632789413, iteration: 178115
loss: 1.013414978981018,grad_norm: 0.9190049860216554, iteration: 178116
loss: 1.02876877784729,grad_norm: 0.9999989657453722, iteration: 178117
loss: 1.0093914270401,grad_norm: 0.9177255083162175, iteration: 178118
loss: 1.0357218980789185,grad_norm: 0.9999995913489633, iteration: 178119
loss: 1.0007038116455078,grad_norm: 0.8981056690660034, iteration: 178120
loss: 0.9726200699806213,grad_norm: 0.9999992968075442, iteration: 178121
loss: 1.0269443988800049,grad_norm: 0.9270373785809575, iteration: 178122
loss: 0.9800194501876831,grad_norm: 0.9999991046621832, iteration: 178123
loss: 1.0269911289215088,grad_norm: 0.9999992036255015, iteration: 178124
loss: 0.9709339141845703,grad_norm: 0.9999992147611209, iteration: 178125
loss: 0.9860838055610657,grad_norm: 0.9172078843225434, iteration: 178126
loss: 0.9627495408058167,grad_norm: 0.9999991365730624, iteration: 178127
loss: 1.0192316770553589,grad_norm: 0.9999989792224598, iteration: 178128
loss: 1.0141806602478027,grad_norm: 0.9999992493806127, iteration: 178129
loss: 1.0278326272964478,grad_norm: 0.9999993348135829, iteration: 178130
loss: 0.9787643551826477,grad_norm: 0.9999991787086425, iteration: 178131
loss: 1.0155545473098755,grad_norm: 0.9999992297126089, iteration: 178132
loss: 1.0005651712417603,grad_norm: 0.9118263726023869, iteration: 178133
loss: 0.9740569591522217,grad_norm: 0.9202417374617458, iteration: 178134
loss: 1.0087890625,grad_norm: 0.9999994541890944, iteration: 178135
loss: 1.0253640413284302,grad_norm: 0.9999991798037039, iteration: 178136
loss: 0.9983299374580383,grad_norm: 0.9999990247550965, iteration: 178137
loss: 1.0103880167007446,grad_norm: 0.9401001697582113, iteration: 178138
loss: 0.9783105850219727,grad_norm: 0.9803968686956146, iteration: 178139
loss: 1.0119982957839966,grad_norm: 0.9999989904192688, iteration: 178140
loss: 1.0313533544540405,grad_norm: 0.9466833239228952, iteration: 178141
loss: 1.0035570859909058,grad_norm: 0.8565481303907215, iteration: 178142
loss: 0.9475981593132019,grad_norm: 0.9999990521591194, iteration: 178143
loss: 0.9979458451271057,grad_norm: 0.8910130817383034, iteration: 178144
loss: 1.006900668144226,grad_norm: 0.9999992121607232, iteration: 178145
loss: 1.0280762910842896,grad_norm: 0.9064259053506618, iteration: 178146
loss: 1.0110732316970825,grad_norm: 0.9212680390169494, iteration: 178147
loss: 0.9883478283882141,grad_norm: 0.9999992001278829, iteration: 178148
loss: 1.0108184814453125,grad_norm: 0.9723341462522873, iteration: 178149
loss: 0.9827149510383606,grad_norm: 0.9999993556010877, iteration: 178150
loss: 1.027296543121338,grad_norm: 0.9999993936404007, iteration: 178151
loss: 1.0326361656188965,grad_norm: 0.8620245285161734, iteration: 178152
loss: 0.9919928312301636,grad_norm: 0.9999991018744521, iteration: 178153
loss: 0.9721582531929016,grad_norm: 0.9765470642860308, iteration: 178154
loss: 1.0097086429595947,grad_norm: 0.9020854327361093, iteration: 178155
loss: 0.9946531057357788,grad_norm: 0.9650026512587212, iteration: 178156
loss: 1.0181087255477905,grad_norm: 0.9999990669403174, iteration: 178157
loss: 1.0373578071594238,grad_norm: 0.9183674929209351, iteration: 178158
loss: 1.0208938121795654,grad_norm: 0.9272962718379005, iteration: 178159
loss: 1.0006624460220337,grad_norm: 0.9026722089821185, iteration: 178160
loss: 1.028242826461792,grad_norm: 0.9999990914235745, iteration: 178161
loss: 1.0059186220169067,grad_norm: 0.9009161095335254, iteration: 178162
loss: 0.9870773553848267,grad_norm: 0.9999991684038619, iteration: 178163
loss: 0.9754261374473572,grad_norm: 0.963827651178711, iteration: 178164
loss: 1.0024675130844116,grad_norm: 0.9413904959941075, iteration: 178165
loss: 1.006540298461914,grad_norm: 0.9999991525569726, iteration: 178166
loss: 1.0047069787979126,grad_norm: 0.876682184915863, iteration: 178167
loss: 0.9892986416816711,grad_norm: 0.999999200558055, iteration: 178168
loss: 1.0245321989059448,grad_norm: 0.9999989587495628, iteration: 178169
loss: 1.027782917022705,grad_norm: 0.8835533208708577, iteration: 178170
loss: 0.9892338514328003,grad_norm: 0.9053759959374181, iteration: 178171
loss: 1.0110340118408203,grad_norm: 0.999999113244881, iteration: 178172
loss: 1.0050359964370728,grad_norm: 0.9010004728258536, iteration: 178173
loss: 0.9985558390617371,grad_norm: 0.9999989820536241, iteration: 178174
loss: 0.9813703298568726,grad_norm: 0.9999991002732177, iteration: 178175
loss: 1.0058684349060059,grad_norm: 0.8762842514667193, iteration: 178176
loss: 1.0200527906417847,grad_norm: 0.9999990697633697, iteration: 178177
loss: 0.9956694841384888,grad_norm: 0.973199193429194, iteration: 178178
loss: 1.0161528587341309,grad_norm: 0.9670417746526402, iteration: 178179
loss: 1.0200871229171753,grad_norm: 0.9999990495968157, iteration: 178180
loss: 0.9841778874397278,grad_norm: 0.9644524669192086, iteration: 178181
loss: 0.947995126247406,grad_norm: 0.9036293680292595, iteration: 178182
loss: 1.0128834247589111,grad_norm: 0.9999991187230227, iteration: 178183
loss: 1.0201271772384644,grad_norm: 0.9999992333172704, iteration: 178184
loss: 0.9569153189659119,grad_norm: 0.8874806389324946, iteration: 178185
loss: 1.0032676458358765,grad_norm: 0.8851194557295666, iteration: 178186
loss: 1.0346187353134155,grad_norm: 0.9403816040023989, iteration: 178187
loss: 1.0216554403305054,grad_norm: 0.9999991551856723, iteration: 178188
loss: 1.0123156309127808,grad_norm: 0.9311393895885859, iteration: 178189
loss: 0.9783281087875366,grad_norm: 0.9999990125064012, iteration: 178190
loss: 1.0413763523101807,grad_norm: 0.9999990822209053, iteration: 178191
loss: 0.9985175132751465,grad_norm: 0.8280452004848673, iteration: 178192
loss: 0.9764615297317505,grad_norm: 0.9999997135749161, iteration: 178193
loss: 0.9658980965614319,grad_norm: 0.891325493521106, iteration: 178194
loss: 1.0237360000610352,grad_norm: 0.8117078253100559, iteration: 178195
loss: 1.014151692390442,grad_norm: 0.9999991382170256, iteration: 178196
loss: 0.9970496296882629,grad_norm: 0.9183861262862131, iteration: 178197
loss: 0.9559454917907715,grad_norm: 0.9999991259338734, iteration: 178198
loss: 0.9880240559577942,grad_norm: 0.9999994572390419, iteration: 178199
loss: 1.1513326168060303,grad_norm: 0.9999997837346483, iteration: 178200
loss: 1.0423840284347534,grad_norm: 0.999999495409932, iteration: 178201
loss: 0.9804657101631165,grad_norm: 0.9999992612318133, iteration: 178202
loss: 1.0043381452560425,grad_norm: 0.9999990316838004, iteration: 178203
loss: 1.0166525840759277,grad_norm: 0.9999990838156031, iteration: 178204
loss: 0.9474858641624451,grad_norm: 0.841307611921999, iteration: 178205
loss: 1.016127109527588,grad_norm: 0.9999993031350429, iteration: 178206
loss: 0.9691776037216187,grad_norm: 0.999999252220667, iteration: 178207
loss: 0.9811975955963135,grad_norm: 0.9072070684077825, iteration: 178208
loss: 0.9863097667694092,grad_norm: 0.9777668264072665, iteration: 178209
loss: 0.9943847060203552,grad_norm: 0.9999990538733204, iteration: 178210
loss: 1.144075870513916,grad_norm: 0.9999997053679329, iteration: 178211
loss: 0.9948614239692688,grad_norm: 0.9999991570885727, iteration: 178212
loss: 1.0048222541809082,grad_norm: 0.9999990438248026, iteration: 178213
loss: 0.9980214238166809,grad_norm: 0.8848214719187696, iteration: 178214
loss: 0.9768532514572144,grad_norm: 0.9999992412806308, iteration: 178215
loss: 0.9884700775146484,grad_norm: 0.9999992054490581, iteration: 178216
loss: 0.98431396484375,grad_norm: 0.9349163612454721, iteration: 178217
loss: 1.0016714334487915,grad_norm: 0.9999992106314776, iteration: 178218
loss: 1.0092968940734863,grad_norm: 0.9004554271094632, iteration: 178219
loss: 1.136046290397644,grad_norm: 0.9999990560860055, iteration: 178220
loss: 0.9874781966209412,grad_norm: 0.9315971581563934, iteration: 178221
loss: 1.0856080055236816,grad_norm: 0.9999996306509367, iteration: 178222
loss: 0.9948914647102356,grad_norm: 0.8975116314310979, iteration: 178223
loss: 1.008317470550537,grad_norm: 0.9999991497200965, iteration: 178224
loss: 1.005839467048645,grad_norm: 0.9999991649219742, iteration: 178225
loss: 0.9803312420845032,grad_norm: 0.99999921178872, iteration: 178226
loss: 0.9768933653831482,grad_norm: 0.9999992628235423, iteration: 178227
loss: 1.011468529701233,grad_norm: 0.9875890746704622, iteration: 178228
loss: 1.0048896074295044,grad_norm: 0.9999991332862898, iteration: 178229
loss: 1.0113521814346313,grad_norm: 0.999999044003282, iteration: 178230
loss: 0.9458300471305847,grad_norm: 0.9429841411667265, iteration: 178231
loss: 0.994726300239563,grad_norm: 0.9999990675217173, iteration: 178232
loss: 1.0623071193695068,grad_norm: 0.9999992316863525, iteration: 178233
loss: 1.0341895818710327,grad_norm: 0.9999991255984625, iteration: 178234
loss: 1.0889989137649536,grad_norm: 0.9999998864346133, iteration: 178235
loss: 0.9673009514808655,grad_norm: 0.8521910540255357, iteration: 178236
loss: 1.309462070465088,grad_norm: 0.9999997138491112, iteration: 178237
loss: 0.9574849605560303,grad_norm: 0.9421613532005646, iteration: 178238
loss: 1.138717770576477,grad_norm: 0.9999992008717373, iteration: 178239
loss: 0.9820475578308105,grad_norm: 0.9999990974836785, iteration: 178240
loss: 1.0690711736679077,grad_norm: 0.9979703867490032, iteration: 178241
loss: 1.0491182804107666,grad_norm: 0.9999999367673538, iteration: 178242
loss: 1.002987027168274,grad_norm: 0.9999990459619684, iteration: 178243
loss: 0.9640640616416931,grad_norm: 0.9356559527196991, iteration: 178244
loss: 1.0868314504623413,grad_norm: 0.9999996274982633, iteration: 178245
loss: 1.047780990600586,grad_norm: 1.0000000618682117, iteration: 178246
loss: 0.9871306419372559,grad_norm: 0.9999990877358271, iteration: 178247
loss: 0.9467961192131042,grad_norm: 0.999999397221919, iteration: 178248
loss: 0.967579185962677,grad_norm: 0.9999990397130006, iteration: 178249
loss: 0.9945579767227173,grad_norm: 0.9575417177644938, iteration: 178250
loss: 1.0352944135665894,grad_norm: 0.8805494609193619, iteration: 178251
loss: 1.0039833784103394,grad_norm: 0.8826853902260163, iteration: 178252
loss: 1.1240825653076172,grad_norm: 0.9999998156914282, iteration: 178253
loss: 1.0256965160369873,grad_norm: 0.999999532241374, iteration: 178254
loss: 0.9903717041015625,grad_norm: 0.8654102767483849, iteration: 178255
loss: 1.0197371244430542,grad_norm: 0.9583772431595803, iteration: 178256
loss: 0.9545987844467163,grad_norm: 0.9006431964661553, iteration: 178257
loss: 1.0490570068359375,grad_norm: 0.999999747955415, iteration: 178258
loss: 0.9760311841964722,grad_norm: 0.9287662452125172, iteration: 178259
loss: 1.1496002674102783,grad_norm: 0.9999998129232891, iteration: 178260
loss: 1.0351896286010742,grad_norm: 0.9164869376682933, iteration: 178261
loss: 1.039507269859314,grad_norm: 0.9999991483335716, iteration: 178262
loss: 1.045513391494751,grad_norm: 0.9999990026635577, iteration: 178263
loss: 1.0099313259124756,grad_norm: 0.9059840109470069, iteration: 178264
loss: 1.0791188478469849,grad_norm: 0.9999996896243598, iteration: 178265
loss: 0.9735075831413269,grad_norm: 0.9999991221698156, iteration: 178266
loss: 0.985090434551239,grad_norm: 0.9999991351925144, iteration: 178267
loss: 1.0509552955627441,grad_norm: 0.9999994127763369, iteration: 178268
loss: 1.008297324180603,grad_norm: 0.9999990854596807, iteration: 178269
loss: 1.1072475910186768,grad_norm: 0.9999998893940261, iteration: 178270
loss: 1.006296992301941,grad_norm: 0.9999999322602939, iteration: 178271
loss: 0.9767699241638184,grad_norm: 0.9999989827070508, iteration: 178272
loss: 1.0366261005401611,grad_norm: 0.9999995204994002, iteration: 178273
loss: 0.988268256187439,grad_norm: 0.9547791802743012, iteration: 178274
loss: 0.9896320104598999,grad_norm: 0.9550161804124199, iteration: 178275
loss: 0.9995450377464294,grad_norm: 0.9999992050637692, iteration: 178276
loss: 0.99333655834198,grad_norm: 0.9999990604722231, iteration: 178277
loss: 0.9909212589263916,grad_norm: 0.9999991594839379, iteration: 178278
loss: 1.0263105630874634,grad_norm: 0.9999999282972248, iteration: 178279
loss: 0.9994339346885681,grad_norm: 0.9519822204234314, iteration: 178280
loss: 1.035081148147583,grad_norm: 0.821289830136006, iteration: 178281
loss: 0.985059916973114,grad_norm: 0.9999993060097393, iteration: 178282
loss: 1.0233784914016724,grad_norm: 0.9999992561462905, iteration: 178283
loss: 1.0186816453933716,grad_norm: 0.9999990494439952, iteration: 178284
loss: 0.9842736124992371,grad_norm: 0.8469143989601895, iteration: 178285
loss: 1.0009499788284302,grad_norm: 0.9938419320563355, iteration: 178286
loss: 0.9844099879264832,grad_norm: 0.9999991107485701, iteration: 178287
loss: 1.036584496498108,grad_norm: 0.942840178811105, iteration: 178288
loss: 1.0361090898513794,grad_norm: 0.9999991558823657, iteration: 178289
loss: 0.9676682353019714,grad_norm: 0.6951966080182878, iteration: 178290
loss: 1.0205589532852173,grad_norm: 0.9362502248323489, iteration: 178291
loss: 1.0065078735351562,grad_norm: 0.9999990673645711, iteration: 178292
loss: 1.0075119733810425,grad_norm: 0.8366108152951706, iteration: 178293
loss: 0.9340497851371765,grad_norm: 0.9999992432831573, iteration: 178294
loss: 1.0161292552947998,grad_norm: 0.8824964202629196, iteration: 178295
loss: 0.9859275221824646,grad_norm: 0.9999991994316721, iteration: 178296
loss: 1.0129674673080444,grad_norm: 0.9999993050751335, iteration: 178297
loss: 0.9779478907585144,grad_norm: 0.9304302794883981, iteration: 178298
loss: 1.0030882358551025,grad_norm: 0.9999991978372179, iteration: 178299
loss: 0.9997276663780212,grad_norm: 0.9999990641170973, iteration: 178300
loss: 0.9814132452011108,grad_norm: 0.963140033340922, iteration: 178301
loss: 1.0342718362808228,grad_norm: 0.9747108071188467, iteration: 178302
loss: 1.0004692077636719,grad_norm: 0.9790774183737228, iteration: 178303
loss: 1.0081901550292969,grad_norm: 0.9999989841138752, iteration: 178304
loss: 1.0527088642120361,grad_norm: 0.9999997093042162, iteration: 178305
loss: 1.0262893438339233,grad_norm: 0.9999992746291531, iteration: 178306
loss: 1.0163073539733887,grad_norm: 0.9999991794076506, iteration: 178307
loss: 1.0698063373565674,grad_norm: 0.999999063646955, iteration: 178308
loss: 0.9912481904029846,grad_norm: 0.9819866018185979, iteration: 178309
loss: 0.9912428855895996,grad_norm: 0.9212265764592861, iteration: 178310
loss: 0.9998164176940918,grad_norm: 0.9298741507226194, iteration: 178311
loss: 1.0135968923568726,grad_norm: 0.9999992617060454, iteration: 178312
loss: 1.0047147274017334,grad_norm: 0.9429434744276449, iteration: 178313
loss: 1.0182468891143799,grad_norm: 0.9999990473094763, iteration: 178314
loss: 1.008320927619934,grad_norm: 0.9999991491490134, iteration: 178315
loss: 1.0249165296554565,grad_norm: 0.9999989775646521, iteration: 178316
loss: 0.999298632144928,grad_norm: 0.9743918267068001, iteration: 178317
loss: 0.9999547600746155,grad_norm: 0.9999990876384905, iteration: 178318
loss: 1.004062294960022,grad_norm: 0.9999990237914012, iteration: 178319
loss: 1.0163335800170898,grad_norm: 0.9999990650112748, iteration: 178320
loss: 1.004327654838562,grad_norm: 0.9999988846086441, iteration: 178321
loss: 0.9969159364700317,grad_norm: 0.8671993851616252, iteration: 178322
loss: 1.0208749771118164,grad_norm: 0.9999991837695742, iteration: 178323
loss: 0.9886242151260376,grad_norm: 0.8792175813017521, iteration: 178324
loss: 0.9478944540023804,grad_norm: 0.9209885377959374, iteration: 178325
loss: 1.004179835319519,grad_norm: 0.9845498164193158, iteration: 178326
loss: 1.0005968809127808,grad_norm: 0.9928866977606281, iteration: 178327
loss: 1.0096222162246704,grad_norm: 0.9999993233816632, iteration: 178328
loss: 0.9817889928817749,grad_norm: 0.8683129765273462, iteration: 178329
loss: 1.008528470993042,grad_norm: 0.999999094747737, iteration: 178330
loss: 0.9997642040252686,grad_norm: 0.9999991245232672, iteration: 178331
loss: 1.0059198141098022,grad_norm: 0.9717764864659573, iteration: 178332
loss: 0.9745117425918579,grad_norm: 0.8948003917692967, iteration: 178333
loss: 1.068784475326538,grad_norm: 0.9999993738895213, iteration: 178334
loss: 0.9749963283538818,grad_norm: 0.9999992117762914, iteration: 178335
loss: 1.006005883216858,grad_norm: 0.9999989722532301, iteration: 178336
loss: 0.9739108085632324,grad_norm: 0.9999992024689149, iteration: 178337
loss: 1.0244053602218628,grad_norm: 0.826266799928625, iteration: 178338
loss: 0.9902076721191406,grad_norm: 0.9925141168667384, iteration: 178339
loss: 1.0290998220443726,grad_norm: 0.9999999350915331, iteration: 178340
loss: 0.9970880150794983,grad_norm: 0.9999989903260765, iteration: 178341
loss: 1.0251768827438354,grad_norm: 0.810587034401853, iteration: 178342
loss: 0.9862197637557983,grad_norm: 0.8127713060121966, iteration: 178343
loss: 0.9815211296081543,grad_norm: 0.8988862724738197, iteration: 178344
loss: 0.9746785759925842,grad_norm: 0.8667783589371224, iteration: 178345
loss: 0.9771324396133423,grad_norm: 0.9537187466484284, iteration: 178346
loss: 1.0201588869094849,grad_norm: 0.9839067418552719, iteration: 178347
loss: 0.9542908072471619,grad_norm: 0.9196642646932142, iteration: 178348
loss: 0.9919458031654358,grad_norm: 0.910333108960508, iteration: 178349
loss: 0.9717702269554138,grad_norm: 0.8528659773742648, iteration: 178350
loss: 1.0126179456710815,grad_norm: 0.9999990181501508, iteration: 178351
loss: 0.9807194471359253,grad_norm: 0.9999998000953666, iteration: 178352
loss: 1.0288634300231934,grad_norm: 0.9359340576235413, iteration: 178353
loss: 0.9973693490028381,grad_norm: 0.999999159287033, iteration: 178354
loss: 0.9786092638969421,grad_norm: 0.9999991352246046, iteration: 178355
loss: 1.0116907358169556,grad_norm: 0.9480539763711744, iteration: 178356
loss: 1.0032285451889038,grad_norm: 0.9999991429576611, iteration: 178357
loss: 0.9806919097900391,grad_norm: 0.9354649196141867, iteration: 178358
loss: 1.0199311971664429,grad_norm: 0.999999170981893, iteration: 178359
loss: 1.006791591644287,grad_norm: 0.9999990573176981, iteration: 178360
loss: 0.9949253797531128,grad_norm: 0.7523965068573506, iteration: 178361
loss: 0.9916908144950867,grad_norm: 0.9501791722968982, iteration: 178362
loss: 1.0350676774978638,grad_norm: 0.9999992268388146, iteration: 178363
loss: 0.9767903089523315,grad_norm: 0.999998977099753, iteration: 178364
loss: 1.0715489387512207,grad_norm: 0.9594319120246484, iteration: 178365
loss: 0.9930029511451721,grad_norm: 0.9349142850191456, iteration: 178366
loss: 0.9781298041343689,grad_norm: 0.9903240626520842, iteration: 178367
loss: 1.0000641345977783,grad_norm: 0.9999990035120127, iteration: 178368
loss: 0.9894773960113525,grad_norm: 0.9900315253685598, iteration: 178369
loss: 1.0326619148254395,grad_norm: 0.9307141046322823, iteration: 178370
loss: 1.017319917678833,grad_norm: 0.9011552397073039, iteration: 178371
loss: 0.9745367169380188,grad_norm: 0.9999991532849469, iteration: 178372
loss: 1.00133216381073,grad_norm: 0.9583157967916422, iteration: 178373
loss: 0.9950188398361206,grad_norm: 0.999999306384415, iteration: 178374
loss: 1.0218751430511475,grad_norm: 0.9999990208070326, iteration: 178375
loss: 1.0890570878982544,grad_norm: 0.9999993194211945, iteration: 178376
loss: 0.9856472015380859,grad_norm: 0.9502601954695329, iteration: 178377
loss: 0.9980250597000122,grad_norm: 0.9233121612593792, iteration: 178378
loss: 1.023864984512329,grad_norm: 0.9999991043649852, iteration: 178379
loss: 1.0337682962417603,grad_norm: 0.9278996885346046, iteration: 178380
loss: 0.9959058165550232,grad_norm: 0.8756257480460897, iteration: 178381
loss: 1.007035255432129,grad_norm: 0.9999994373787288, iteration: 178382
loss: 1.0088695287704468,grad_norm: 0.9999998538251071, iteration: 178383
loss: 1.0164973735809326,grad_norm: 0.9999989419556639, iteration: 178384
loss: 1.0495030879974365,grad_norm: 0.9999998683013722, iteration: 178385
loss: 1.0036967992782593,grad_norm: 0.846298064798573, iteration: 178386
loss: 1.0125402212142944,grad_norm: 0.8887005073047368, iteration: 178387
loss: 0.9802858233451843,grad_norm: 0.9832503766926688, iteration: 178388
loss: 0.9622793793678284,grad_norm: 0.9495339566953348, iteration: 178389
loss: 0.9963385462760925,grad_norm: 0.9999991753710463, iteration: 178390
loss: 1.000165343284607,grad_norm: 0.9246408703178485, iteration: 178391
loss: 0.9805165529251099,grad_norm: 0.9999991829009057, iteration: 178392
loss: 0.9814149737358093,grad_norm: 0.9645039294670695, iteration: 178393
loss: 0.9828384518623352,grad_norm: 0.8663526077922568, iteration: 178394
loss: 0.9878110885620117,grad_norm: 0.9999995423705367, iteration: 178395
loss: 1.0678743124008179,grad_norm: 0.9740279430559792, iteration: 178396
loss: 0.9935073852539062,grad_norm: 0.9999990658736678, iteration: 178397
loss: 0.9981333017349243,grad_norm: 0.9999989272966129, iteration: 178398
loss: 1.0251491069793701,grad_norm: 0.9433432287267475, iteration: 178399
loss: 1.0034550428390503,grad_norm: 0.9487256128463519, iteration: 178400
loss: 1.007063865661621,grad_norm: 0.9328936508983092, iteration: 178401
loss: 0.9696760773658752,grad_norm: 0.8575118699821852, iteration: 178402
loss: 1.0573904514312744,grad_norm: 0.999999881374066, iteration: 178403
loss: 0.9550071358680725,grad_norm: 0.8890588870539694, iteration: 178404
loss: 0.9912184476852417,grad_norm: 0.9999990180914283, iteration: 178405
loss: 1.015971302986145,grad_norm: 0.893266557777987, iteration: 178406
loss: 1.0026981830596924,grad_norm: 0.9999991905168595, iteration: 178407
loss: 1.0388765335083008,grad_norm: 0.999999014423745, iteration: 178408
loss: 1.0015077590942383,grad_norm: 0.9969347359796256, iteration: 178409
loss: 0.9893814921379089,grad_norm: 0.8900529059220994, iteration: 178410
loss: 1.0156841278076172,grad_norm: 0.9469614786812347, iteration: 178411
loss: 1.0068427324295044,grad_norm: 0.9999990525920833, iteration: 178412
loss: 1.0193790197372437,grad_norm: 0.9640977524488148, iteration: 178413
loss: 1.0058873891830444,grad_norm: 0.9999991066110693, iteration: 178414
loss: 0.9925554394721985,grad_norm: 0.9858122256271659, iteration: 178415
loss: 1.0421949625015259,grad_norm: 0.9999990789508015, iteration: 178416
loss: 0.9985889196395874,grad_norm: 0.9999990890361008, iteration: 178417
loss: 1.034712553024292,grad_norm: 0.9999997817545064, iteration: 178418
loss: 0.9923394322395325,grad_norm: 0.999999263496783, iteration: 178419
loss: 1.00534987449646,grad_norm: 0.999999068800634, iteration: 178420
loss: 0.969616174697876,grad_norm: 0.9999990658543852, iteration: 178421
loss: 1.0098556280136108,grad_norm: 0.999999269494424, iteration: 178422
loss: 0.9957287311553955,grad_norm: 0.8333169003548415, iteration: 178423
loss: 0.980957567691803,grad_norm: 0.9771627868687044, iteration: 178424
loss: 1.126396894454956,grad_norm: 0.9999997451586854, iteration: 178425
loss: 0.9941586256027222,grad_norm: 0.9112208513705753, iteration: 178426
loss: 0.9960840344429016,grad_norm: 0.8364561547660286, iteration: 178427
loss: 1.0357567071914673,grad_norm: 0.9999991317962527, iteration: 178428
loss: 1.0050867795944214,grad_norm: 0.8957048132594146, iteration: 178429
loss: 1.005042552947998,grad_norm: 0.9999990899687772, iteration: 178430
loss: 1.004825234413147,grad_norm: 0.9090935520187294, iteration: 178431
loss: 0.9850090742111206,grad_norm: 0.8484833578591432, iteration: 178432
loss: 1.0287657976150513,grad_norm: 0.9999993093642214, iteration: 178433
loss: 0.9727694392204285,grad_norm: 0.9035075963332184, iteration: 178434
loss: 0.9534834623336792,grad_norm: 0.999999052820176, iteration: 178435
loss: 0.9732770919799805,grad_norm: 0.8800506190934116, iteration: 178436
loss: 0.9505839347839355,grad_norm: 0.9999991204625145, iteration: 178437
loss: 1.0846340656280518,grad_norm: 0.9999992495886919, iteration: 178438
loss: 1.0215673446655273,grad_norm: 0.9591359173268836, iteration: 178439
loss: 1.0045875310897827,grad_norm: 0.9999995979343246, iteration: 178440
loss: 1.0034476518630981,grad_norm: 0.9999993375912642, iteration: 178441
loss: 0.9941685795783997,grad_norm: 0.8759387452495819, iteration: 178442
loss: 1.010191798210144,grad_norm: 0.8052708986532049, iteration: 178443
loss: 1.0225434303283691,grad_norm: 0.9543803834588684, iteration: 178444
loss: 1.0196471214294434,grad_norm: 0.9872885490704751, iteration: 178445
loss: 1.01411771774292,grad_norm: 0.817648053091397, iteration: 178446
loss: 0.9578391313552856,grad_norm: 0.9999996094105995, iteration: 178447
loss: 0.9739950299263,grad_norm: 0.867162771752753, iteration: 178448
loss: 0.9879318475723267,grad_norm: 0.9925017847194129, iteration: 178449
loss: 1.0472651720046997,grad_norm: 0.9999990760801505, iteration: 178450
loss: 0.9855668544769287,grad_norm: 0.9999994234427784, iteration: 178451
loss: 0.9945701360702515,grad_norm: 0.999999189343, iteration: 178452
loss: 0.9636379480361938,grad_norm: 0.9492749279043556, iteration: 178453
loss: 0.9752155542373657,grad_norm: 0.9493449329958856, iteration: 178454
loss: 0.9858952164649963,grad_norm: 0.8988204201495542, iteration: 178455
loss: 1.075539469718933,grad_norm: 0.9999999539329345, iteration: 178456
loss: 1.0069292783737183,grad_norm: 0.9999989752412503, iteration: 178457
loss: 1.0309919118881226,grad_norm: 0.99999911320559, iteration: 178458
loss: 1.0270581245422363,grad_norm: 0.9999991500045535, iteration: 178459
loss: 1.0172282457351685,grad_norm: 0.9192349120523005, iteration: 178460
loss: 0.9749882817268372,grad_norm: 0.9720579683598863, iteration: 178461
loss: 1.0052666664123535,grad_norm: 0.999999429182529, iteration: 178462
loss: 0.9670932292938232,grad_norm: 0.9164695365576712, iteration: 178463
loss: 1.0618715286254883,grad_norm: 0.9731143906842653, iteration: 178464
loss: 0.9570242166519165,grad_norm: 0.9999991137026444, iteration: 178465
loss: 1.0232408046722412,grad_norm: 0.918276730512031, iteration: 178466
loss: 0.975104808807373,grad_norm: 0.9569604313824331, iteration: 178467
loss: 1.0223865509033203,grad_norm: 0.8919147239536348, iteration: 178468
loss: 0.9401147365570068,grad_norm: 0.9999998385393516, iteration: 178469
loss: 0.9905526638031006,grad_norm: 0.9096405872208057, iteration: 178470
loss: 1.0025321245193481,grad_norm: 0.9042325330788128, iteration: 178471
loss: 1.029138207435608,grad_norm: 0.999999008171383, iteration: 178472
loss: 1.035154104232788,grad_norm: 0.9999991296015457, iteration: 178473
loss: 1.019583821296692,grad_norm: 0.9999991339816161, iteration: 178474
loss: 0.990025520324707,grad_norm: 0.9999991995654657, iteration: 178475
loss: 1.0038340091705322,grad_norm: 0.9447742207319788, iteration: 178476
loss: 0.9982083439826965,grad_norm: 0.8183707391590381, iteration: 178477
loss: 1.0015724897384644,grad_norm: 0.999999128028783, iteration: 178478
loss: 0.9942283630371094,grad_norm: 0.9999990121246695, iteration: 178479
loss: 1.00675368309021,grad_norm: 0.8292633005736448, iteration: 178480
loss: 0.9962985515594482,grad_norm: 0.9999991602730698, iteration: 178481
loss: 1.082072377204895,grad_norm: 0.9999992136457809, iteration: 178482
loss: 0.9583411812782288,grad_norm: 0.9999990718676093, iteration: 178483
loss: 0.9898346662521362,grad_norm: 0.9999990844603683, iteration: 178484
loss: 0.9847567677497864,grad_norm: 0.945143260854103, iteration: 178485
loss: 0.9727346897125244,grad_norm: 0.9999990658593825, iteration: 178486
loss: 0.9974980354309082,grad_norm: 0.9967975165925057, iteration: 178487
loss: 0.9748001098632812,grad_norm: 0.9999991458360706, iteration: 178488
loss: 0.9832979440689087,grad_norm: 0.8752044267667105, iteration: 178489
loss: 1.015558123588562,grad_norm: 0.9452595481666264, iteration: 178490
loss: 1.0133445262908936,grad_norm: 0.9876425947000843, iteration: 178491
loss: 1.0036585330963135,grad_norm: 0.9610798888867345, iteration: 178492
loss: 0.979580819606781,grad_norm: 0.9557520481600915, iteration: 178493
loss: 0.9841709733009338,grad_norm: 0.8751220109145265, iteration: 178494
loss: 0.9970665574073792,grad_norm: 0.9999991289076706, iteration: 178495
loss: 0.9929396510124207,grad_norm: 0.9999991933800909, iteration: 178496
loss: 1.011142373085022,grad_norm: 0.9999988772161489, iteration: 178497
loss: 1.0371912717819214,grad_norm: 0.9655080656315564, iteration: 178498
loss: 0.9955795407295227,grad_norm: 0.8438621408093347, iteration: 178499
loss: 0.9815120100975037,grad_norm: 0.9999990761966584, iteration: 178500
loss: 0.9961056709289551,grad_norm: 0.8909178691024872, iteration: 178501
loss: 1.0151002407073975,grad_norm: 0.9999990435931736, iteration: 178502
loss: 1.025550365447998,grad_norm: 0.999999197138061, iteration: 178503
loss: 1.0333757400512695,grad_norm: 0.9070672208164854, iteration: 178504
loss: 0.9647523760795593,grad_norm: 0.9948998570672206, iteration: 178505
loss: 1.0404776334762573,grad_norm: 0.916433904527957, iteration: 178506
loss: 1.031522274017334,grad_norm: 0.999999043506346, iteration: 178507
loss: 1.018196702003479,grad_norm: 0.9999989669565181, iteration: 178508
loss: 1.017922282218933,grad_norm: 0.9999990855712998, iteration: 178509
loss: 1.0056349039077759,grad_norm: 0.9999990533381461, iteration: 178510
loss: 1.0576471090316772,grad_norm: 0.9999991357609113, iteration: 178511
loss: 0.9923092722892761,grad_norm: 0.9999990682533177, iteration: 178512
loss: 1.0369819402694702,grad_norm: 0.9910128384261058, iteration: 178513
loss: 0.9720798134803772,grad_norm: 0.9999990573657765, iteration: 178514
loss: 0.9908803105354309,grad_norm: 0.9216809945830802, iteration: 178515
loss: 1.0153180360794067,grad_norm: 0.94670796900769, iteration: 178516
loss: 0.9503800868988037,grad_norm: 0.967741461057971, iteration: 178517
loss: 0.9844639301300049,grad_norm: 0.8516064954466116, iteration: 178518
loss: 0.9832295179367065,grad_norm: 0.9062566715541539, iteration: 178519
loss: 0.9952990412712097,grad_norm: 0.9991983051657115, iteration: 178520
loss: 0.9897975325584412,grad_norm: 0.9999992957589201, iteration: 178521
loss: 1.0368587970733643,grad_norm: 0.9133211520571712, iteration: 178522
loss: 1.0244882106781006,grad_norm: 0.9999996714112572, iteration: 178523
loss: 0.9784301519393921,grad_norm: 0.999999170474844, iteration: 178524
loss: 1.0067847967147827,grad_norm: 0.9999991217474488, iteration: 178525
loss: 0.9651995301246643,grad_norm: 0.9999990059152469, iteration: 178526
loss: 1.0500179529190063,grad_norm: 0.9999992379862572, iteration: 178527
loss: 1.0214101076126099,grad_norm: 0.9999990168221738, iteration: 178528
loss: 1.034639835357666,grad_norm: 0.999999210244916, iteration: 178529
loss: 0.9992545247077942,grad_norm: 0.9898405336487809, iteration: 178530
loss: 1.0111308097839355,grad_norm: 0.8259303554911083, iteration: 178531
loss: 0.9871063232421875,grad_norm: 0.9819303985683623, iteration: 178532
loss: 0.9712668061256409,grad_norm: 0.9769956668250844, iteration: 178533
loss: 0.9904853105545044,grad_norm: 0.9999991896868695, iteration: 178534
loss: 1.0810905694961548,grad_norm: 0.9999995027510166, iteration: 178535
loss: 1.001359462738037,grad_norm: 0.9687185186243212, iteration: 178536
loss: 1.0210553407669067,grad_norm: 0.999999055624268, iteration: 178537
loss: 1.0070112943649292,grad_norm: 0.8085846378077903, iteration: 178538
loss: 1.0091242790222168,grad_norm: 0.8193645779348878, iteration: 178539
loss: 0.9938678741455078,grad_norm: 0.9393858453566526, iteration: 178540
loss: 1.0176130533218384,grad_norm: 0.9999990811686951, iteration: 178541
loss: 1.005297064781189,grad_norm: 0.9999990803212218, iteration: 178542
loss: 0.9819980263710022,grad_norm: 0.999999030867228, iteration: 178543
loss: 1.0595773458480835,grad_norm: 0.9999994216815022, iteration: 178544
loss: 0.9937667846679688,grad_norm: 0.9331785255774921, iteration: 178545
loss: 1.0189948081970215,grad_norm: 0.99999915750366, iteration: 178546
loss: 0.9859293699264526,grad_norm: 0.8927028461240757, iteration: 178547
loss: 0.9760714173316956,grad_norm: 0.8603181284828807, iteration: 178548
loss: 1.0024815797805786,grad_norm: 0.9999993266321306, iteration: 178549
loss: 0.9817915558815002,grad_norm: 0.9050779520738805, iteration: 178550
loss: 0.9799541234970093,grad_norm: 0.9041375112216422, iteration: 178551
loss: 0.997722864151001,grad_norm: 0.9999990737040366, iteration: 178552
loss: 1.0532904863357544,grad_norm: 0.9999991152657748, iteration: 178553
loss: 0.9924416542053223,grad_norm: 0.9247948391970503, iteration: 178554
loss: 0.9971511363983154,grad_norm: 0.9999991098397603, iteration: 178555
loss: 1.056486964225769,grad_norm: 0.9999995527174743, iteration: 178556
loss: 0.9868407249450684,grad_norm: 0.9999991126926668, iteration: 178557
loss: 1.0884734392166138,grad_norm: 0.9999992847914502, iteration: 178558
loss: 1.0918210744857788,grad_norm: 0.9999993471797046, iteration: 178559
loss: 1.1385457515716553,grad_norm: 0.9999991320029844, iteration: 178560
loss: 1.0204113721847534,grad_norm: 0.9999997317370443, iteration: 178561
loss: 1.0153930187225342,grad_norm: 0.9999990764099725, iteration: 178562
loss: 0.9903014898300171,grad_norm: 0.9999990211067852, iteration: 178563
loss: 0.9780171513557434,grad_norm: 0.8220582898976301, iteration: 178564
loss: 0.998965859413147,grad_norm: 0.8154626579352473, iteration: 178565
loss: 1.0323774814605713,grad_norm: 0.999999691287702, iteration: 178566
loss: 1.0925159454345703,grad_norm: 0.9999991780716408, iteration: 178567
loss: 1.0577287673950195,grad_norm: 0.9999998325333168, iteration: 178568
loss: 1.0206265449523926,grad_norm: 0.999999161220282, iteration: 178569
loss: 0.9875102639198303,grad_norm: 0.9999991816966394, iteration: 178570
loss: 1.0155668258666992,grad_norm: 0.9775782444409523, iteration: 178571
loss: 1.0317319631576538,grad_norm: 0.9999990531087761, iteration: 178572
loss: 1.0281431674957275,grad_norm: 0.9999991682047457, iteration: 178573
loss: 1.0021400451660156,grad_norm: 0.9011851963113681, iteration: 178574
loss: 1.010909914970398,grad_norm: 0.9999991325061625, iteration: 178575
loss: 1.015392780303955,grad_norm: 0.9018947628125584, iteration: 178576
loss: 0.9852423667907715,grad_norm: 0.8989884301506742, iteration: 178577
loss: 1.016607642173767,grad_norm: 0.9999991549319827, iteration: 178578
loss: 1.0272974967956543,grad_norm: 0.9938965351671467, iteration: 178579
loss: 0.9717963337898254,grad_norm: 0.9999991158278672, iteration: 178580
loss: 1.0208886861801147,grad_norm: 0.970159931435383, iteration: 178581
loss: 1.0400141477584839,grad_norm: 0.9999992945035443, iteration: 178582
loss: 1.0063133239746094,grad_norm: 0.8574340397599258, iteration: 178583
loss: 1.022159218788147,grad_norm: 0.9438032551505066, iteration: 178584
loss: 1.0270969867706299,grad_norm: 0.7438854004715804, iteration: 178585
loss: 0.9940449595451355,grad_norm: 0.943266644704503, iteration: 178586
loss: 0.962037205696106,grad_norm: 0.9182607464058709, iteration: 178587
loss: 1.059006690979004,grad_norm: 0.9999991707961885, iteration: 178588
loss: 1.0042771100997925,grad_norm: 0.9242479416984939, iteration: 178589
loss: 1.012576937675476,grad_norm: 0.9999993076186422, iteration: 178590
loss: 0.9993099570274353,grad_norm: 0.9788584953691573, iteration: 178591
loss: 0.9750333428382874,grad_norm: 0.9655758311913084, iteration: 178592
loss: 0.9960243105888367,grad_norm: 0.9362692313780142, iteration: 178593
loss: 0.9805827140808105,grad_norm: 0.8746036197722219, iteration: 178594
loss: 1.061099886894226,grad_norm: 0.9999990089677392, iteration: 178595
loss: 0.9850865006446838,grad_norm: 0.8913604645936257, iteration: 178596
loss: 1.0012584924697876,grad_norm: 0.9999991084724796, iteration: 178597
loss: 1.0350528955459595,grad_norm: 0.9999997754445299, iteration: 178598
loss: 0.9853930473327637,grad_norm: 0.9516958016421415, iteration: 178599
loss: 1.0695141553878784,grad_norm: 1.0000000042354813, iteration: 178600
loss: 1.0102362632751465,grad_norm: 0.999999097110696, iteration: 178601
loss: 1.019203782081604,grad_norm: 0.9999990258684203, iteration: 178602
loss: 1.0484273433685303,grad_norm: 0.9999992250386114, iteration: 178603
loss: 1.1176947355270386,grad_norm: 0.9999997941851149, iteration: 178604
loss: 1.027919054031372,grad_norm: 0.9999990834090022, iteration: 178605
loss: 0.9836761951446533,grad_norm: 0.9065825200315465, iteration: 178606
loss: 1.0024933815002441,grad_norm: 0.874812581020833, iteration: 178607
loss: 1.0368127822875977,grad_norm: 0.9999996023307989, iteration: 178608
loss: 1.0452574491500854,grad_norm: 0.9801972303344341, iteration: 178609
loss: 1.052004337310791,grad_norm: 0.9999995823369157, iteration: 178610
loss: 1.204314112663269,grad_norm: 1.00000007045287, iteration: 178611
loss: 1.0950474739074707,grad_norm: 0.9999994815915253, iteration: 178612
loss: 0.9787681698799133,grad_norm: 0.9999990467285816, iteration: 178613
loss: 1.038244605064392,grad_norm: 0.9999990804570629, iteration: 178614
loss: 1.0853091478347778,grad_norm: 0.999999187927247, iteration: 178615
loss: 1.2696977853775024,grad_norm: 0.9999998272615402, iteration: 178616
loss: 1.2167730331420898,grad_norm: 0.9999997218959059, iteration: 178617
loss: 0.998059868812561,grad_norm: 0.9999991476254328, iteration: 178618
loss: 1.093196988105774,grad_norm: 0.999999378524598, iteration: 178619
loss: 1.0408945083618164,grad_norm: 0.9999996156227714, iteration: 178620
loss: 1.106693148612976,grad_norm: 0.9999990709468791, iteration: 178621
loss: 1.1632580757141113,grad_norm: 0.9999992751182976, iteration: 178622
loss: 1.061814308166504,grad_norm: 0.9814346315882619, iteration: 178623
loss: 1.1399366855621338,grad_norm: 0.9999993441138697, iteration: 178624
loss: 0.990537703037262,grad_norm: 0.9999990967527843, iteration: 178625
loss: 1.094951868057251,grad_norm: 0.999999606443225, iteration: 178626
loss: 1.0087013244628906,grad_norm: 0.9999991723328614, iteration: 178627
loss: 1.0645179748535156,grad_norm: 0.9999997265950555, iteration: 178628
loss: 0.9671645164489746,grad_norm: 0.8274467248570321, iteration: 178629
loss: 0.9998827576637268,grad_norm: 0.9999990929964385, iteration: 178630
loss: 1.0633400678634644,grad_norm: 0.999999956467076, iteration: 178631
loss: 1.150675892829895,grad_norm: 0.999999234163728, iteration: 178632
loss: 0.9981259107589722,grad_norm: 0.9999992228189416, iteration: 178633
loss: 1.0396429300308228,grad_norm: 0.9999991231511571, iteration: 178634
loss: 0.9792998433113098,grad_norm: 0.8581943552787749, iteration: 178635
loss: 1.0345399379730225,grad_norm: 0.9541876301369562, iteration: 178636
loss: 1.1260416507720947,grad_norm: 0.9999991410865436, iteration: 178637
loss: 1.022437572479248,grad_norm: 0.971340528537709, iteration: 178638
loss: 1.132405161857605,grad_norm: 0.9999993003442134, iteration: 178639
loss: 1.099592685699463,grad_norm: 0.9999998972951197, iteration: 178640
loss: 1.1503530740737915,grad_norm: 0.999999507663703, iteration: 178641
loss: 1.0481975078582764,grad_norm: 0.9999994922398222, iteration: 178642
loss: 1.0788111686706543,grad_norm: 0.9999999390073335, iteration: 178643
loss: 1.0468171834945679,grad_norm: 0.9999992065723864, iteration: 178644
loss: 1.0243096351623535,grad_norm: 0.9999992297135234, iteration: 178645
loss: 1.03709077835083,grad_norm: 0.9999993093353501, iteration: 178646
loss: 1.0335990190505981,grad_norm: 0.9999992576104373, iteration: 178647
loss: 1.0062421560287476,grad_norm: 0.9999992563291701, iteration: 178648
loss: 1.1517969369888306,grad_norm: 1.0000000039507408, iteration: 178649
loss: 1.016343116760254,grad_norm: 0.9282324783854052, iteration: 178650
loss: 0.9877593517303467,grad_norm: 0.9931169048873429, iteration: 178651
loss: 1.0211783647537231,grad_norm: 0.9999992261844896, iteration: 178652
loss: 0.9835714101791382,grad_norm: 0.9999990837032667, iteration: 178653
loss: 1.0581337213516235,grad_norm: 0.9999990885682627, iteration: 178654
loss: 1.0300172567367554,grad_norm: 0.8821980446941833, iteration: 178655
loss: 0.9967198371887207,grad_norm: 0.9999992060698042, iteration: 178656
loss: 1.0178320407867432,grad_norm: 0.9999998267307252, iteration: 178657
loss: 1.0281763076782227,grad_norm: 0.9999990739361365, iteration: 178658
loss: 1.051906704902649,grad_norm: 0.9036003036611377, iteration: 178659
loss: 1.0132622718811035,grad_norm: 0.9265921105028854, iteration: 178660
loss: 0.9729866981506348,grad_norm: 0.9102057107801844, iteration: 178661
loss: 1.0753874778747559,grad_norm: 0.9999994263122575, iteration: 178662
loss: 1.0350226163864136,grad_norm: 0.9999992692541398, iteration: 178663
loss: 1.0350816249847412,grad_norm: 0.9999992271445297, iteration: 178664
loss: 1.0384206771850586,grad_norm: 0.8270154114734931, iteration: 178665
loss: 1.1433343887329102,grad_norm: 0.9999991306551884, iteration: 178666
loss: 0.9708410501480103,grad_norm: 0.9999992192429168, iteration: 178667
loss: 1.0103760957717896,grad_norm: 0.9999993405201885, iteration: 178668
loss: 1.0167747735977173,grad_norm: 0.9999990299601996, iteration: 178669
loss: 1.0107405185699463,grad_norm: 0.9999990428445432, iteration: 178670
loss: 0.9688261151313782,grad_norm: 0.9080956498426918, iteration: 178671
loss: 0.9951848983764648,grad_norm: 0.9403815295928077, iteration: 178672
loss: 0.9936160445213318,grad_norm: 0.9999995651954015, iteration: 178673
loss: 1.0170155763626099,grad_norm: 0.9999992141494264, iteration: 178674
loss: 0.9739366769790649,grad_norm: 0.9999991184199504, iteration: 178675
loss: 1.0994495153427124,grad_norm: 0.9999997317829448, iteration: 178676
loss: 1.003340721130371,grad_norm: 0.8998226325760691, iteration: 178677
loss: 0.97159343957901,grad_norm: 0.9260114438654167, iteration: 178678
loss: 1.0240286588668823,grad_norm: 0.9420411174287499, iteration: 178679
loss: 1.0004150867462158,grad_norm: 0.9999990998487516, iteration: 178680
loss: 1.0165305137634277,grad_norm: 0.9999989545113787, iteration: 178681
loss: 1.0353254079818726,grad_norm: 0.960334112088038, iteration: 178682
loss: 0.9723654389381409,grad_norm: 0.8859826054084077, iteration: 178683
loss: 1.0318390130996704,grad_norm: 0.9999991721220746, iteration: 178684
loss: 0.995920717716217,grad_norm: 0.9999991957395188, iteration: 178685
loss: 0.9911280274391174,grad_norm: 0.9999990933992028, iteration: 178686
loss: 1.0297784805297852,grad_norm: 0.9999993379349574, iteration: 178687
loss: 1.0602686405181885,grad_norm: 0.9999991175418796, iteration: 178688
loss: 0.951779842376709,grad_norm: 0.9999991540525139, iteration: 178689
loss: 1.0039124488830566,grad_norm: 0.9302938741494403, iteration: 178690
loss: 0.9923972487449646,grad_norm: 0.9020005706422579, iteration: 178691
loss: 1.0818524360656738,grad_norm: 0.9999994202061276, iteration: 178692
loss: 0.9918692111968994,grad_norm: 0.9960273799044305, iteration: 178693
loss: 1.025291085243225,grad_norm: 0.9268893869016348, iteration: 178694
loss: 0.9573849439620972,grad_norm: 0.999999117275332, iteration: 178695
loss: 1.0177109241485596,grad_norm: 0.9277865794607685, iteration: 178696
loss: 0.9860591292381287,grad_norm: 0.9999993073992554, iteration: 178697
loss: 0.9990667700767517,grad_norm: 0.9999990872483819, iteration: 178698
loss: 1.0086407661437988,grad_norm: 0.9078823674796226, iteration: 178699
loss: 1.0092732906341553,grad_norm: 0.8989458512211522, iteration: 178700
loss: 0.9660263061523438,grad_norm: 0.9965219922636995, iteration: 178701
loss: 1.0196309089660645,grad_norm: 0.953382493531294, iteration: 178702
loss: 0.9805293083190918,grad_norm: 0.7959304662150444, iteration: 178703
loss: 1.0231119394302368,grad_norm: 0.9999991459128433, iteration: 178704
loss: 0.9890953898429871,grad_norm: 0.9999992312852438, iteration: 178705
loss: 1.0237241983413696,grad_norm: 0.9999990614110185, iteration: 178706
loss: 1.03273344039917,grad_norm: 0.9999991322923415, iteration: 178707
loss: 0.9834018349647522,grad_norm: 0.9999991425457767, iteration: 178708
loss: 1.0344395637512207,grad_norm: 0.9496792057380671, iteration: 178709
loss: 1.0120875835418701,grad_norm: 0.999091098963821, iteration: 178710
loss: 1.0177521705627441,grad_norm: 0.9999998094350585, iteration: 178711
loss: 1.0297770500183105,grad_norm: 0.9999990857862151, iteration: 178712
loss: 1.0192809104919434,grad_norm: 0.9536492004876023, iteration: 178713
loss: 1.0161590576171875,grad_norm: 0.8466654597542587, iteration: 178714
loss: 1.006407380104065,grad_norm: 0.9410624711981107, iteration: 178715
loss: 1.0156352519989014,grad_norm: 0.9999990912101012, iteration: 178716
loss: 1.0081483125686646,grad_norm: 0.9999992008697415, iteration: 178717
loss: 1.0641372203826904,grad_norm: 0.9999995837611373, iteration: 178718
loss: 1.0070595741271973,grad_norm: 0.8304640651119819, iteration: 178719
loss: 0.984218180179596,grad_norm: 0.9999989404651911, iteration: 178720
loss: 1.0167484283447266,grad_norm: 0.9999991812032893, iteration: 178721
loss: 1.0377180576324463,grad_norm: 0.9999990049941171, iteration: 178722
loss: 1.0179455280303955,grad_norm: 0.9999996764805579, iteration: 178723
loss: 1.0335193872451782,grad_norm: 0.9354071336160095, iteration: 178724
loss: 0.9968969225883484,grad_norm: 0.9999991567760793, iteration: 178725
loss: 0.9816848635673523,grad_norm: 0.999999131046412, iteration: 178726
loss: 1.0278822183609009,grad_norm: 0.9842965325748533, iteration: 178727
loss: 0.9879744052886963,grad_norm: 0.999999044954873, iteration: 178728
loss: 0.9838773012161255,grad_norm: 0.9463976422745997, iteration: 178729
loss: 0.9934549927711487,grad_norm: 0.9999991739587702, iteration: 178730
loss: 0.9906266927719116,grad_norm: 0.9212905735157292, iteration: 178731
loss: 0.9611160159111023,grad_norm: 0.9238730136510895, iteration: 178732
loss: 0.9597469568252563,grad_norm: 0.9113783021457172, iteration: 178733
loss: 0.9735997915267944,grad_norm: 0.9999991735537943, iteration: 178734
loss: 1.028897762298584,grad_norm: 0.9652218361388581, iteration: 178735
loss: 1.0023441314697266,grad_norm: 0.9953695692246666, iteration: 178736
loss: 0.942264974117279,grad_norm: 0.8843565228007364, iteration: 178737
loss: 1.062512993812561,grad_norm: 0.9489628445290774, iteration: 178738
loss: 1.013054370880127,grad_norm: 0.9701491933368425, iteration: 178739
loss: 0.9940646290779114,grad_norm: 0.8302144026994898, iteration: 178740
loss: 1.0185188055038452,grad_norm: 0.9428759403602132, iteration: 178741
loss: 1.0051567554473877,grad_norm: 0.999999091428173, iteration: 178742
loss: 1.035178542137146,grad_norm: 0.9999990541257423, iteration: 178743
loss: 0.9991584420204163,grad_norm: 0.9733094645512853, iteration: 178744
loss: 1.0286086797714233,grad_norm: 0.9962399641137253, iteration: 178745
loss: 1.0389196872711182,grad_norm: 0.9924736161739665, iteration: 178746
loss: 1.0454756021499634,grad_norm: 0.9524263431658776, iteration: 178747
loss: 1.06486177444458,grad_norm: 0.9999995641635452, iteration: 178748
loss: 1.0047030448913574,grad_norm: 0.972863161185706, iteration: 178749
loss: 1.0160629749298096,grad_norm: 0.8524691402023731, iteration: 178750
loss: 1.0148835182189941,grad_norm: 0.8392028894003681, iteration: 178751
loss: 1.01261305809021,grad_norm: 0.8351095702885651, iteration: 178752
loss: 0.9858940839767456,grad_norm: 0.9331223325654018, iteration: 178753
loss: 0.9858573079109192,grad_norm: 0.824284375008905, iteration: 178754
loss: 0.9944433569908142,grad_norm: 0.9228997182376418, iteration: 178755
loss: 0.9881362318992615,grad_norm: 0.9315651959347672, iteration: 178756
loss: 1.0061206817626953,grad_norm: 0.9999990444758354, iteration: 178757
loss: 0.9866148233413696,grad_norm: 0.9999989239692826, iteration: 178758
loss: 1.0355983972549438,grad_norm: 0.999999694743072, iteration: 178759
loss: 1.0086464881896973,grad_norm: 0.9725542671041999, iteration: 178760
loss: 0.9660057425498962,grad_norm: 0.9999990264090926, iteration: 178761
loss: 1.0682857036590576,grad_norm: 0.9999997702379608, iteration: 178762
loss: 1.0979647636413574,grad_norm: 0.9999993585387671, iteration: 178763
loss: 1.0088205337524414,grad_norm: 0.870792800645868, iteration: 178764
loss: 1.0556577444076538,grad_norm: 0.9999994322113782, iteration: 178765
loss: 1.036939024925232,grad_norm: 0.9296746943185454, iteration: 178766
loss: 0.9932911396026611,grad_norm: 0.9999990278386586, iteration: 178767
loss: 0.9750626087188721,grad_norm: 0.9999990540813881, iteration: 178768
loss: 1.010212779045105,grad_norm: 0.9999990104159202, iteration: 178769
loss: 0.9823448657989502,grad_norm: 0.8043839114010358, iteration: 178770
loss: 0.9837322235107422,grad_norm: 0.99999907379815, iteration: 178771
loss: 0.9611555337905884,grad_norm: 0.9999992038667995, iteration: 178772
loss: 1.011711835861206,grad_norm: 0.8893057594483449, iteration: 178773
loss: 0.9729744791984558,grad_norm: 0.9999991342735056, iteration: 178774
loss: 1.0260974168777466,grad_norm: 0.9500655729346685, iteration: 178775
loss: 1.0235973596572876,grad_norm: 0.9516015040210505, iteration: 178776
loss: 0.9896613359451294,grad_norm: 0.9999992206593767, iteration: 178777
loss: 1.0187777280807495,grad_norm: 0.8361967269867144, iteration: 178778
loss: 1.0100722312927246,grad_norm: 0.9859245103964284, iteration: 178779
loss: 1.0651695728302002,grad_norm: 0.9999990310125216, iteration: 178780
loss: 1.111458420753479,grad_norm: 0.9999995739880638, iteration: 178781
loss: 0.9649146199226379,grad_norm: 0.999999222586473, iteration: 178782
loss: 0.9692345857620239,grad_norm: 0.9999992990379171, iteration: 178783
loss: 0.9762334227561951,grad_norm: 0.9651750345028249, iteration: 178784
loss: 0.9836637377738953,grad_norm: 0.9900769433460745, iteration: 178785
loss: 0.9642851948738098,grad_norm: 0.9999991261672633, iteration: 178786
loss: 0.9825710654258728,grad_norm: 0.8416211640213347, iteration: 178787
loss: 0.9864411354064941,grad_norm: 0.9287535410677434, iteration: 178788
loss: 0.985984742641449,grad_norm: 0.9546856032888248, iteration: 178789
loss: 0.9883658289909363,grad_norm: 0.908772287385867, iteration: 178790
loss: 1.0361814498901367,grad_norm: 0.9999989423558786, iteration: 178791
loss: 0.983346164226532,grad_norm: 0.9871677029070345, iteration: 178792
loss: 1.0034372806549072,grad_norm: 0.8477331920575061, iteration: 178793
loss: 1.0162413120269775,grad_norm: 0.8934198961200208, iteration: 178794
loss: 1.0269529819488525,grad_norm: 0.9999990253936826, iteration: 178795
loss: 0.9956731796264648,grad_norm: 0.9700118637996672, iteration: 178796
loss: 1.0102167129516602,grad_norm: 0.9999991557810223, iteration: 178797
loss: 1.0018268823623657,grad_norm: 0.9387389845577091, iteration: 178798
loss: 1.012130856513977,grad_norm: 0.9799151460664659, iteration: 178799
loss: 1.0012568235397339,grad_norm: 0.9999991367108905, iteration: 178800
loss: 1.012886643409729,grad_norm: 0.7559883080397243, iteration: 178801
loss: 0.9878136515617371,grad_norm: 0.9712265951586238, iteration: 178802
loss: 1.0279349088668823,grad_norm: 0.9999990726883986, iteration: 178803
loss: 1.0058375597000122,grad_norm: 0.9494671217492594, iteration: 178804
loss: 0.9791773557662964,grad_norm: 0.8761337436829446, iteration: 178805
loss: 1.0034292936325073,grad_norm: 0.9416487610388957, iteration: 178806
loss: 1.0204617977142334,grad_norm: 0.9999991056636776, iteration: 178807
loss: 0.9898477792739868,grad_norm: 0.9645783522273786, iteration: 178808
loss: 1.0167661905288696,grad_norm: 0.8799973059276309, iteration: 178809
loss: 0.9737234115600586,grad_norm: 0.8908673397834617, iteration: 178810
loss: 0.9995570182800293,grad_norm: 0.9999991812495079, iteration: 178811
loss: 0.9861752390861511,grad_norm: 0.9999991121947696, iteration: 178812
loss: 0.9918901324272156,grad_norm: 0.9999989217923094, iteration: 178813
loss: 0.983776867389679,grad_norm: 0.999998985387014, iteration: 178814
loss: 1.0183403491973877,grad_norm: 0.7151062183578882, iteration: 178815
loss: 0.9506007432937622,grad_norm: 0.9299555773322982, iteration: 178816
loss: 0.9939159750938416,grad_norm: 0.9173171241647207, iteration: 178817
loss: 1.0093333721160889,grad_norm: 0.9999994640309376, iteration: 178818
loss: 0.9968199133872986,grad_norm: 0.9065848598467462, iteration: 178819
loss: 1.0217258930206299,grad_norm: 0.9563069114676286, iteration: 178820
loss: 1.017128825187683,grad_norm: 0.9842519924406987, iteration: 178821
loss: 0.9612561464309692,grad_norm: 0.9362305985782399, iteration: 178822
loss: 0.9827623963356018,grad_norm: 0.9999990065597322, iteration: 178823
loss: 1.0879026651382446,grad_norm: 0.9999994783246233, iteration: 178824
loss: 0.967354953289032,grad_norm: 0.9999991836634555, iteration: 178825
loss: 0.9602273106575012,grad_norm: 0.7898313283193448, iteration: 178826
loss: 0.9634559750556946,grad_norm: 0.8728080630089616, iteration: 178827
loss: 0.9984012842178345,grad_norm: 0.9999991573191659, iteration: 178828
loss: 0.9857730269432068,grad_norm: 0.9999995664950213, iteration: 178829
loss: 1.0175447463989258,grad_norm: 0.985324826252021, iteration: 178830
loss: 1.0368925333023071,grad_norm: 0.9048479914757472, iteration: 178831
loss: 1.0033917427062988,grad_norm: 0.9238575648231047, iteration: 178832
loss: 0.9995605945587158,grad_norm: 0.9999990851375105, iteration: 178833
loss: 0.9693686962127686,grad_norm: 0.9999989199501896, iteration: 178834
loss: 1.0225969552993774,grad_norm: 0.999999272157797, iteration: 178835
loss: 0.9882678985595703,grad_norm: 0.9999989810152983, iteration: 178836
loss: 1.0285930633544922,grad_norm: 0.9999991103187335, iteration: 178837
loss: 0.9981456995010376,grad_norm: 0.9999990526275065, iteration: 178838
loss: 0.9948961138725281,grad_norm: 0.9582330777196766, iteration: 178839
loss: 1.0099811553955078,grad_norm: 0.9999991043635482, iteration: 178840
loss: 0.9683489203453064,grad_norm: 0.9999990220963394, iteration: 178841
loss: 0.9717628359794617,grad_norm: 0.9999990464715066, iteration: 178842
loss: 1.0084729194641113,grad_norm: 0.9999991680583913, iteration: 178843
loss: 0.9547398686408997,grad_norm: 0.9999991048570475, iteration: 178844
loss: 0.9887610673904419,grad_norm: 0.9999992092811559, iteration: 178845
loss: 1.0131118297576904,grad_norm: 0.997541354588232, iteration: 178846
loss: 0.9814045429229736,grad_norm: 0.9373976689096974, iteration: 178847
loss: 1.032583475112915,grad_norm: 0.9999990125759849, iteration: 178848
loss: 0.9860968589782715,grad_norm: 0.9388863844064325, iteration: 178849
loss: 1.014504075050354,grad_norm: 0.9999991358566187, iteration: 178850
loss: 0.9928048849105835,grad_norm: 0.8501881701875892, iteration: 178851
loss: 0.9586736559867859,grad_norm: 0.8726334482483274, iteration: 178852
loss: 1.0234230756759644,grad_norm: 0.9999991759819646, iteration: 178853
loss: 1.0067038536071777,grad_norm: 0.9883666025143343, iteration: 178854
loss: 0.9671993255615234,grad_norm: 0.9999992088882336, iteration: 178855
loss: 0.9755231738090515,grad_norm: 0.9999992477653343, iteration: 178856
loss: 1.0671217441558838,grad_norm: 0.9999990620611247, iteration: 178857
loss: 1.000801920890808,grad_norm: 0.999998986736909, iteration: 178858
loss: 0.9593289494514465,grad_norm: 0.9228391792316851, iteration: 178859
loss: 1.0053807497024536,grad_norm: 0.8305093004135485, iteration: 178860
loss: 0.9674020409584045,grad_norm: 0.9999991998534806, iteration: 178861
loss: 0.9571921229362488,grad_norm: 0.9607305755083949, iteration: 178862
loss: 0.970626175403595,grad_norm: 0.9999990603406139, iteration: 178863
loss: 1.0235227346420288,grad_norm: 0.999999200864031, iteration: 178864
loss: 1.0089629888534546,grad_norm: 0.9571853835957671, iteration: 178865
loss: 1.0225433111190796,grad_norm: 0.919711814742077, iteration: 178866
loss: 1.0000299215316772,grad_norm: 0.9949161006953527, iteration: 178867
loss: 1.0724672079086304,grad_norm: 0.9999999412822882, iteration: 178868
loss: 0.9872929453849792,grad_norm: 0.9999991141014917, iteration: 178869
loss: 1.0223870277404785,grad_norm: 0.8535316789295793, iteration: 178870
loss: 0.97841876745224,grad_norm: 0.9119040176765352, iteration: 178871
loss: 1.0373575687408447,grad_norm: 0.9519730680096811, iteration: 178872
loss: 0.9761550426483154,grad_norm: 0.912748938196541, iteration: 178873
loss: 0.9852365851402283,grad_norm: 0.9999991618886221, iteration: 178874
loss: 1.006123661994934,grad_norm: 0.9999992523181453, iteration: 178875
loss: 1.0216588973999023,grad_norm: 0.9045572138483925, iteration: 178876
loss: 1.001024603843689,grad_norm: 0.9999992379589344, iteration: 178877
loss: 0.983309805393219,grad_norm: 0.8551748522487725, iteration: 178878
loss: 1.0076671838760376,grad_norm: 0.9955275750443171, iteration: 178879
loss: 0.9907146692276001,grad_norm: 0.8736617684553665, iteration: 178880
loss: 0.9959257245063782,grad_norm: 0.8442901512703062, iteration: 178881
loss: 1.027458906173706,grad_norm: 0.8834820291095348, iteration: 178882
loss: 0.9830026030540466,grad_norm: 0.9999992922709655, iteration: 178883
loss: 1.0556378364562988,grad_norm: 0.9999994012530875, iteration: 178884
loss: 0.9986896514892578,grad_norm: 0.9999992978344597, iteration: 178885
loss: 1.0740833282470703,grad_norm: 0.9999991240932857, iteration: 178886
loss: 1.0502954721450806,grad_norm: 0.9999999216363382, iteration: 178887
loss: 0.9921693205833435,grad_norm: 0.9999990030603213, iteration: 178888
loss: 0.9769278168678284,grad_norm: 0.9999990917836011, iteration: 178889
loss: 0.9900929927825928,grad_norm: 0.9999991907694539, iteration: 178890
loss: 0.9968993663787842,grad_norm: 0.9999989082757895, iteration: 178891
loss: 0.9920387864112854,grad_norm: 0.9999989605493232, iteration: 178892
loss: 1.018916130065918,grad_norm: 0.9999990501720739, iteration: 178893
loss: 1.0031322240829468,grad_norm: 0.9999994000941437, iteration: 178894
loss: 1.072522759437561,grad_norm: 0.9999992868667633, iteration: 178895
loss: 1.0073126554489136,grad_norm: 0.9999991683684366, iteration: 178896
loss: 1.0089313983917236,grad_norm: 0.9916552485045909, iteration: 178897
loss: 1.0143283605575562,grad_norm: 0.963427484604849, iteration: 178898
loss: 0.9924135804176331,grad_norm: 0.9063672462168074, iteration: 178899
loss: 0.9948270320892334,grad_norm: 0.9999997626421516, iteration: 178900
loss: 0.9850394129753113,grad_norm: 0.9999991286890096, iteration: 178901
loss: 1.0049066543579102,grad_norm: 0.999999039237318, iteration: 178902
loss: 0.9944640398025513,grad_norm: 0.9624246059629132, iteration: 178903
loss: 1.013494849205017,grad_norm: 0.9999991515885331, iteration: 178904
loss: 1.0257322788238525,grad_norm: 0.9999995764043715, iteration: 178905
loss: 0.9852138757705688,grad_norm: 0.805715365160883, iteration: 178906
loss: 1.0306702852249146,grad_norm: 0.9180292316304832, iteration: 178907
loss: 0.9669109582901001,grad_norm: 0.9926357138599481, iteration: 178908
loss: 1.0222907066345215,grad_norm: 0.9602221525073058, iteration: 178909
loss: 1.0418365001678467,grad_norm: 0.9999991931119194, iteration: 178910
loss: 0.9740679264068604,grad_norm: 0.9999991188719998, iteration: 178911
loss: 0.9963642954826355,grad_norm: 0.8866479393539785, iteration: 178912
loss: 0.9741182923316956,grad_norm: 0.9999991065170291, iteration: 178913
loss: 1.0124659538269043,grad_norm: 0.9265239406075184, iteration: 178914
loss: 1.1117504835128784,grad_norm: 0.9999992385251825, iteration: 178915
loss: 1.0048705339431763,grad_norm: 0.9999991054725448, iteration: 178916
loss: 0.979461133480072,grad_norm: 0.9987521294326994, iteration: 178917
loss: 1.048117995262146,grad_norm: 0.9999992932923218, iteration: 178918
loss: 1.0075734853744507,grad_norm: 0.9999990065822408, iteration: 178919
loss: 0.983151376247406,grad_norm: 0.9999990825767896, iteration: 178920
loss: 0.984656810760498,grad_norm: 0.999999179510212, iteration: 178921
loss: 0.9696863889694214,grad_norm: 0.9999990890660866, iteration: 178922
loss: 0.9880200624465942,grad_norm: 0.9999989701926605, iteration: 178923
loss: 1.009800910949707,grad_norm: 0.999999036462452, iteration: 178924
loss: 1.0598620176315308,grad_norm: 0.9044234311748363, iteration: 178925
loss: 0.9956952929496765,grad_norm: 0.9999990691632236, iteration: 178926
loss: 1.0216013193130493,grad_norm: 0.9737385558343798, iteration: 178927
loss: 0.9907381534576416,grad_norm: 0.8234561575413573, iteration: 178928
loss: 0.9904935956001282,grad_norm: 0.9999991776941731, iteration: 178929
loss: 1.0353796482086182,grad_norm: 0.9233294644304645, iteration: 178930
loss: 1.0045725107192993,grad_norm: 0.9095487894286206, iteration: 178931
loss: 0.9798927903175354,grad_norm: 0.9087125044208698, iteration: 178932
loss: 1.0616366863250732,grad_norm: 0.9957759404317176, iteration: 178933
loss: 1.0124398469924927,grad_norm: 0.9999992437787147, iteration: 178934
loss: 0.9874071478843689,grad_norm: 0.9999992585894157, iteration: 178935
loss: 1.010613203048706,grad_norm: 0.8391998933025945, iteration: 178936
loss: 0.9850494265556335,grad_norm: 0.9999988930822419, iteration: 178937
loss: 0.9856888651847839,grad_norm: 0.9999991519255181, iteration: 178938
loss: 0.9548740386962891,grad_norm: 0.9766183243225793, iteration: 178939
loss: 1.0113211870193481,grad_norm: 0.9999992927520289, iteration: 178940
loss: 1.014524221420288,grad_norm: 1.0000000795443607, iteration: 178941
loss: 1.0074703693389893,grad_norm: 0.9999990713704708, iteration: 178942
loss: 0.9811200499534607,grad_norm: 0.9999991777369966, iteration: 178943
loss: 0.9962648153305054,grad_norm: 0.9999990955716002, iteration: 178944
loss: 1.004500150680542,grad_norm: 0.860697301731129, iteration: 178945
loss: 1.0172086954116821,grad_norm: 0.9341067120250285, iteration: 178946
loss: 1.0046743154525757,grad_norm: 0.9999989886644002, iteration: 178947
loss: 1.0141061544418335,grad_norm: 0.9999991892774112, iteration: 178948
loss: 0.970050573348999,grad_norm: 0.9999991776088277, iteration: 178949
loss: 1.0322984457015991,grad_norm: 0.9999990331343916, iteration: 178950
loss: 1.0273330211639404,grad_norm: 0.9144630675182017, iteration: 178951
loss: 1.1321393251419067,grad_norm: 0.9999992628665202, iteration: 178952
loss: 1.0292178392410278,grad_norm: 0.9595735666133636, iteration: 178953
loss: 0.9949185252189636,grad_norm: 0.999999059623024, iteration: 178954
loss: 1.0005170106887817,grad_norm: 0.9999992359824859, iteration: 178955
loss: 1.008260726928711,grad_norm: 0.8242931928318276, iteration: 178956
loss: 1.0960290431976318,grad_norm: 0.9999992256908427, iteration: 178957
loss: 1.0298842191696167,grad_norm: 0.9999998913294926, iteration: 178958
loss: 1.0957119464874268,grad_norm: 0.9999998503799838, iteration: 178959
loss: 1.193782925605774,grad_norm: 0.9999997078881908, iteration: 178960
loss: 0.9644735455513,grad_norm: 0.9221431919188756, iteration: 178961
loss: 0.9989710450172424,grad_norm: 0.9542394126343073, iteration: 178962
loss: 0.9831224083900452,grad_norm: 0.9999991446581121, iteration: 178963
loss: 1.0391268730163574,grad_norm: 0.9799907602678802, iteration: 178964
loss: 0.9986962676048279,grad_norm: 0.9999992087419152, iteration: 178965
loss: 1.0076088905334473,grad_norm: 0.9999993411565958, iteration: 178966
loss: 1.0088083744049072,grad_norm: 0.8781277742256574, iteration: 178967
loss: 0.9866133332252502,grad_norm: 0.8464338071671235, iteration: 178968
loss: 0.948313295841217,grad_norm: 0.9999990092961805, iteration: 178969
loss: 1.0607532262802124,grad_norm: 0.9999994332890079, iteration: 178970
loss: 1.0162242650985718,grad_norm: 0.8211249617723534, iteration: 178971
loss: 0.9893606305122375,grad_norm: 0.9999991490171793, iteration: 178972
loss: 0.9726461172103882,grad_norm: 0.9999991697948089, iteration: 178973
loss: 0.9892091751098633,grad_norm: 0.9999991979500492, iteration: 178974
loss: 0.9746906161308289,grad_norm: 0.9281749071998426, iteration: 178975
loss: 1.0346382856369019,grad_norm: 0.9999991517102266, iteration: 178976
loss: 0.9896335601806641,grad_norm: 0.9511968218490304, iteration: 178977
loss: 0.9746861457824707,grad_norm: 0.9621275497055218, iteration: 178978
loss: 0.9943010210990906,grad_norm: 0.9999990834595877, iteration: 178979
loss: 1.0026532411575317,grad_norm: 0.9999999148455203, iteration: 178980
loss: 1.1292904615402222,grad_norm: 0.9999991457823787, iteration: 178981
loss: 1.1031242609024048,grad_norm: 0.9999989882975426, iteration: 178982
loss: 1.0086926221847534,grad_norm: 0.9985148510139732, iteration: 178983
loss: 0.9622656106948853,grad_norm: 0.9999990237573536, iteration: 178984
loss: 1.0562396049499512,grad_norm: 0.9951895342834614, iteration: 178985
loss: 0.9337427020072937,grad_norm: 0.960063281356815, iteration: 178986
loss: 0.9620965123176575,grad_norm: 0.9080946496177191, iteration: 178987
loss: 1.0857053995132446,grad_norm: 0.9999994591813517, iteration: 178988
loss: 1.0456081628799438,grad_norm: 0.9999990934128814, iteration: 178989
loss: 0.9759364724159241,grad_norm: 0.9633348437527959, iteration: 178990
loss: 0.977439284324646,grad_norm: 0.984695406805752, iteration: 178991
loss: 0.9810824394226074,grad_norm: 0.9999990207518267, iteration: 178992
loss: 1.0180635452270508,grad_norm: 0.8735051690657484, iteration: 178993
loss: 1.0194088220596313,grad_norm: 0.9342989930251698, iteration: 178994
loss: 1.000221848487854,grad_norm: 0.888428463856902, iteration: 178995
loss: 1.0150669813156128,grad_norm: 0.9904340886839631, iteration: 178996
loss: 1.0016958713531494,grad_norm: 0.9999992228986502, iteration: 178997
loss: 1.018032431602478,grad_norm: 0.999998970457778, iteration: 178998
loss: 1.0492336750030518,grad_norm: 0.9999999388484025, iteration: 178999
loss: 1.0192493200302124,grad_norm: 0.7599899406242073, iteration: 179000
loss: 1.0233080387115479,grad_norm: 0.9999991716166238, iteration: 179001
loss: 0.9562705159187317,grad_norm: 0.9999991248085489, iteration: 179002
loss: 1.0137748718261719,grad_norm: 0.9999997644321785, iteration: 179003
loss: 0.9692381024360657,grad_norm: 0.9999990827029901, iteration: 179004
loss: 0.9796186089515686,grad_norm: 0.8915964862451452, iteration: 179005
loss: 0.9777772426605225,grad_norm: 0.9900855261182331, iteration: 179006
loss: 0.9964654445648193,grad_norm: 0.8091024278477169, iteration: 179007
loss: 0.9966486096382141,grad_norm: 0.983309380179465, iteration: 179008
loss: 1.0314048528671265,grad_norm: 0.9999998360595647, iteration: 179009
loss: 0.9846239686012268,grad_norm: 0.8077679352092776, iteration: 179010
loss: 0.9700003862380981,grad_norm: 0.9999992393579337, iteration: 179011
loss: 1.016714096069336,grad_norm: 0.9999995213462141, iteration: 179012
loss: 0.9945312738418579,grad_norm: 0.9999990399139139, iteration: 179013
loss: 0.9892396330833435,grad_norm: 0.9999991017016638, iteration: 179014
loss: 1.0041738748550415,grad_norm: 0.9999999992975764, iteration: 179015
loss: 1.1037899255752563,grad_norm: 0.9999992133248695, iteration: 179016
loss: 0.9800338745117188,grad_norm: 0.8734313898862969, iteration: 179017
loss: 0.9839635491371155,grad_norm: 0.9999991411726602, iteration: 179018
loss: 1.0123087167739868,grad_norm: 0.8588038978102913, iteration: 179019
loss: 1.0638362169265747,grad_norm: 0.9999991602045254, iteration: 179020
loss: 1.0067487955093384,grad_norm: 0.9999991194209227, iteration: 179021
loss: 1.0027180910110474,grad_norm: 0.8370272347529343, iteration: 179022
loss: 0.9685729742050171,grad_norm: 0.9999990106332027, iteration: 179023
loss: 0.97750324010849,grad_norm: 0.907823801018566, iteration: 179024
loss: 0.9929242134094238,grad_norm: 0.9435091988103107, iteration: 179025
loss: 0.9756231904029846,grad_norm: 0.9999990048435718, iteration: 179026
loss: 1.0732923746109009,grad_norm: 0.8898465148411988, iteration: 179027
loss: 0.9773181080818176,grad_norm: 0.8840335516149089, iteration: 179028
loss: 1.0279136896133423,grad_norm: 0.9999991103553255, iteration: 179029
loss: 1.0115878582000732,grad_norm: 0.999999247832902, iteration: 179030
loss: 0.9680513143539429,grad_norm: 0.9261669504784188, iteration: 179031
loss: 1.052455186843872,grad_norm: 0.9999991573972032, iteration: 179032
loss: 1.0025115013122559,grad_norm: 0.9576933304048997, iteration: 179033
loss: 1.0169951915740967,grad_norm: 0.9999991444105822, iteration: 179034
loss: 0.9917798638343811,grad_norm: 0.9999989767996438, iteration: 179035
loss: 1.1131060123443604,grad_norm: 0.9999998211067699, iteration: 179036
loss: 1.0596650838851929,grad_norm: 0.9999992240144872, iteration: 179037
loss: 0.9882648587226868,grad_norm: 0.9999992306277657, iteration: 179038
loss: 0.9808565378189087,grad_norm: 0.9999997764552698, iteration: 179039
loss: 1.0182453393936157,grad_norm: 0.9999993339184708, iteration: 179040
loss: 1.0388813018798828,grad_norm: 0.9999993296465642, iteration: 179041
loss: 1.0090408325195312,grad_norm: 0.9868927480448135, iteration: 179042
loss: 0.9644691944122314,grad_norm: 0.9999993033636783, iteration: 179043
loss: 0.9934554100036621,grad_norm: 0.9495827342499454, iteration: 179044
loss: 1.0345643758773804,grad_norm: 0.9874808527872229, iteration: 179045
loss: 0.9835292100906372,grad_norm: 0.7513754984344598, iteration: 179046
loss: 1.0050055980682373,grad_norm: 0.9967273216931716, iteration: 179047
loss: 1.0027822256088257,grad_norm: 0.9625838686478606, iteration: 179048
loss: 0.9981501698493958,grad_norm: 0.9645204339560005, iteration: 179049
loss: 0.9849311709403992,grad_norm: 0.9328368875673523, iteration: 179050
loss: 1.0010043382644653,grad_norm: 0.9999991530797133, iteration: 179051
loss: 1.0182851552963257,grad_norm: 0.9340724889735987, iteration: 179052
loss: 0.983552873134613,grad_norm: 0.8516075928953523, iteration: 179053
loss: 0.9886617064476013,grad_norm: 0.8796819713086109, iteration: 179054
loss: 1.1265918016433716,grad_norm: 0.999999179510851, iteration: 179055
loss: 1.042078971862793,grad_norm: 0.9999996498572511, iteration: 179056
loss: 1.006252646446228,grad_norm: 0.923334902364238, iteration: 179057
loss: 1.0133339166641235,grad_norm: 0.9999991178059974, iteration: 179058
loss: 1.000990629196167,grad_norm: 0.9999991443096143, iteration: 179059
loss: 0.9894687533378601,grad_norm: 0.8891894048811929, iteration: 179060
loss: 0.9789810180664062,grad_norm: 0.9999992098047489, iteration: 179061
loss: 0.9831649661064148,grad_norm: 0.9907674199982216, iteration: 179062
loss: 0.9514882564544678,grad_norm: 0.9085824823177764, iteration: 179063
loss: 1.024649977684021,grad_norm: 0.9770705898277237, iteration: 179064
loss: 0.9811437726020813,grad_norm: 0.9999992448031239, iteration: 179065
loss: 0.986741304397583,grad_norm: 0.9999993895835011, iteration: 179066
loss: 0.9977894425392151,grad_norm: 0.9265485575178741, iteration: 179067
loss: 0.9709441661834717,grad_norm: 0.9999992301424936, iteration: 179068
loss: 1.0002918243408203,grad_norm: 0.999999242465638, iteration: 179069
loss: 1.0042080879211426,grad_norm: 0.9999990668313484, iteration: 179070
loss: 0.9808515310287476,grad_norm: 0.9309293616576222, iteration: 179071
loss: 0.9888275265693665,grad_norm: 0.9221158553250292, iteration: 179072
loss: 1.0311241149902344,grad_norm: 0.9094614991878768, iteration: 179073
loss: 1.0465843677520752,grad_norm: 0.9999989892414942, iteration: 179074
loss: 0.9810078740119934,grad_norm: 0.9499735287780097, iteration: 179075
loss: 1.0153563022613525,grad_norm: 0.9340777874315709, iteration: 179076
loss: 0.9925810098648071,grad_norm: 0.9418878974522127, iteration: 179077
loss: 0.9997684359550476,grad_norm: 0.8720962450542336, iteration: 179078
loss: 0.9999386668205261,grad_norm: 0.9240318052646583, iteration: 179079
loss: 0.9672529697418213,grad_norm: 0.8987090608103546, iteration: 179080
loss: 1.0205649137496948,grad_norm: 0.9999990813785075, iteration: 179081
loss: 1.0425959825515747,grad_norm: 0.9088568558085907, iteration: 179082
loss: 0.9962461590766907,grad_norm: 0.9999996939567679, iteration: 179083
loss: 0.9975016713142395,grad_norm: 0.9999995298150245, iteration: 179084
loss: 0.991826057434082,grad_norm: 0.8592507404963755, iteration: 179085
loss: 0.9980521202087402,grad_norm: 0.9999997616692254, iteration: 179086
loss: 1.0266318321228027,grad_norm: 0.9763651595732994, iteration: 179087
loss: 0.9742593765258789,grad_norm: 0.9693671176793068, iteration: 179088
loss: 0.9904719591140747,grad_norm: 0.9999994858622011, iteration: 179089
loss: 1.016656756401062,grad_norm: 0.9999994451112212, iteration: 179090
loss: 0.961341917514801,grad_norm: 0.9602910180477734, iteration: 179091
loss: 1.0009585618972778,grad_norm: 0.9439650000562776, iteration: 179092
loss: 0.9767777323722839,grad_norm: 0.8413439448347954, iteration: 179093
loss: 0.9925885796546936,grad_norm: 0.9083989232445321, iteration: 179094
loss: 0.9714565277099609,grad_norm: 0.851996237810412, iteration: 179095
loss: 0.9983672499656677,grad_norm: 0.9562696877073809, iteration: 179096
loss: 1.0461424589157104,grad_norm: 0.9999991129895577, iteration: 179097
loss: 1.0580918788909912,grad_norm: 0.9999999997631233, iteration: 179098
loss: 0.9891543984413147,grad_norm: 0.7326617434011004, iteration: 179099
loss: 0.9861331582069397,grad_norm: 0.8759928390534494, iteration: 179100
loss: 1.0317689180374146,grad_norm: 0.9995277917069026, iteration: 179101
loss: 0.9870944023132324,grad_norm: 0.8291099225386165, iteration: 179102
loss: 1.019668698310852,grad_norm: 0.9999990032411782, iteration: 179103
loss: 0.9884024262428284,grad_norm: 0.991243363984919, iteration: 179104
loss: 1.0510302782058716,grad_norm: 0.9999992844806154, iteration: 179105
loss: 1.0105794668197632,grad_norm: 0.8858993788807551, iteration: 179106
loss: 1.0053528547286987,grad_norm: 0.9999991760985012, iteration: 179107
loss: 1.0056394338607788,grad_norm: 0.9432061856786611, iteration: 179108
loss: 1.0265140533447266,grad_norm: 0.9999992289317574, iteration: 179109
loss: 0.9950675368309021,grad_norm: 0.9101554781011411, iteration: 179110
loss: 0.9939835667610168,grad_norm: 0.999999255121824, iteration: 179111
loss: 1.003767490386963,grad_norm: 0.9999991398043031, iteration: 179112
loss: 1.0255261659622192,grad_norm: 0.8739117051825074, iteration: 179113
loss: 0.9911144971847534,grad_norm: 0.949168695836566, iteration: 179114
loss: 1.0057642459869385,grad_norm: 0.8697911757041183, iteration: 179115
loss: 0.9950597286224365,grad_norm: 0.9293744095536121, iteration: 179116
loss: 1.0055373907089233,grad_norm: 0.9999991445536791, iteration: 179117
loss: 1.0015391111373901,grad_norm: 0.9999992609358623, iteration: 179118
loss: 1.0121890306472778,grad_norm: 0.9312497334709571, iteration: 179119
loss: 0.9925268888473511,grad_norm: 0.9917584539697477, iteration: 179120
loss: 0.939959704875946,grad_norm: 0.8172837871684832, iteration: 179121
loss: 1.0235081911087036,grad_norm: 0.9999994025928435, iteration: 179122
loss: 1.0171482563018799,grad_norm: 0.8467072069148815, iteration: 179123
loss: 1.006872534751892,grad_norm: 0.9999994424346635, iteration: 179124
loss: 1.033681869506836,grad_norm: 0.9999993099037774, iteration: 179125
loss: 0.9929190874099731,grad_norm: 0.7978640151200531, iteration: 179126
loss: 1.0211467742919922,grad_norm: 0.9999991451912636, iteration: 179127
loss: 1.023211121559143,grad_norm: 0.9999991179502897, iteration: 179128
loss: 0.9577690958976746,grad_norm: 0.8955673692733136, iteration: 179129
loss: 1.0148268938064575,grad_norm: 0.9999990968766981, iteration: 179130
loss: 1.0019128322601318,grad_norm: 0.9999991130838216, iteration: 179131
loss: 0.9804961085319519,grad_norm: 0.9999991890148587, iteration: 179132
loss: 0.9702441692352295,grad_norm: 0.9999992283598226, iteration: 179133
loss: 1.0267497301101685,grad_norm: 0.9999990986273978, iteration: 179134
loss: 1.026569128036499,grad_norm: 0.9999993431568234, iteration: 179135
loss: 1.0197149515151978,grad_norm: 0.9318729630400377, iteration: 179136
loss: 1.0141907930374146,grad_norm: 0.8339616254263184, iteration: 179137
loss: 0.9831871390342712,grad_norm: 0.999999163880254, iteration: 179138
loss: 0.9706199765205383,grad_norm: 0.9999991130853012, iteration: 179139
loss: 1.024832010269165,grad_norm: 0.9999990483873795, iteration: 179140
loss: 1.0280790328979492,grad_norm: 0.9999991639435917, iteration: 179141
loss: 0.99775230884552,grad_norm: 0.9823231079299198, iteration: 179142
loss: 1.0138763189315796,grad_norm: 0.999999077478226, iteration: 179143
loss: 1.0284783840179443,grad_norm: 0.9446734827998622, iteration: 179144
loss: 1.0293363332748413,grad_norm: 0.9537909081592643, iteration: 179145
loss: 0.9966758489608765,grad_norm: 0.9182413753007878, iteration: 179146
loss: 1.007845401763916,grad_norm: 0.937546567506868, iteration: 179147
loss: 0.9886271357536316,grad_norm: 0.9999992330450824, iteration: 179148
loss: 1.0265194177627563,grad_norm: 0.9679060847273284, iteration: 179149
loss: 1.0070488452911377,grad_norm: 0.9999990987830326, iteration: 179150
loss: 1.0130302906036377,grad_norm: 0.8665444880575313, iteration: 179151
loss: 0.9900075793266296,grad_norm: 0.8954613240534318, iteration: 179152
loss: 1.0284481048583984,grad_norm: 0.9999990101585076, iteration: 179153
loss: 0.969594419002533,grad_norm: 0.947233188324511, iteration: 179154
loss: 1.0229359865188599,grad_norm: 0.9999991592686652, iteration: 179155
loss: 0.9892342686653137,grad_norm: 0.9999991723315934, iteration: 179156
loss: 1.0108773708343506,grad_norm: 0.9999990082959489, iteration: 179157
loss: 0.9966006278991699,grad_norm: 0.9999990536204811, iteration: 179158
loss: 0.9854183197021484,grad_norm: 0.9999991137772886, iteration: 179159
loss: 0.9631872177124023,grad_norm: 0.9999991453424116, iteration: 179160
loss: 0.9837203621864319,grad_norm: 0.9191981656255941, iteration: 179161
loss: 0.9921634197235107,grad_norm: 0.9939365403028407, iteration: 179162
loss: 0.9881353974342346,grad_norm: 0.9987748712186061, iteration: 179163
loss: 1.0365115404129028,grad_norm: 0.9999991314722877, iteration: 179164
loss: 0.9794082641601562,grad_norm: 0.9744191654598455, iteration: 179165
loss: 0.9630101323127747,grad_norm: 0.999999097330935, iteration: 179166
loss: 1.01030695438385,grad_norm: 0.9999990592394503, iteration: 179167
loss: 1.0192034244537354,grad_norm: 0.829823752117138, iteration: 179168
loss: 1.0176576375961304,grad_norm: 0.9999997571273476, iteration: 179169
loss: 1.0192638635635376,grad_norm: 0.9999991829308333, iteration: 179170
loss: 1.0093417167663574,grad_norm: 0.9999992761670393, iteration: 179171
loss: 1.0464776754379272,grad_norm: 0.9728690380488384, iteration: 179172
loss: 0.96658855676651,grad_norm: 0.9999996500554629, iteration: 179173
loss: 0.9521413445472717,grad_norm: 0.9006851503697387, iteration: 179174
loss: 1.0110780000686646,grad_norm: 0.9270223087772489, iteration: 179175
loss: 0.995915412902832,grad_norm: 0.9309598405373598, iteration: 179176
loss: 1.0188913345336914,grad_norm: 0.9999992376541071, iteration: 179177
loss: 1.0310447216033936,grad_norm: 0.9200121666799028, iteration: 179178
loss: 0.969815194606781,grad_norm: 0.8374397550715585, iteration: 179179
loss: 0.9960217475891113,grad_norm: 0.9999992819251399, iteration: 179180
loss: 0.9728516936302185,grad_norm: 0.9414723548593591, iteration: 179181
loss: 0.996519923210144,grad_norm: 0.9358270738455854, iteration: 179182
loss: 0.9786139726638794,grad_norm: 0.8569167453206314, iteration: 179183
loss: 0.9929516911506653,grad_norm: 0.9561470149196761, iteration: 179184
loss: 1.0115065574645996,grad_norm: 0.843650765465229, iteration: 179185
loss: 0.9875251054763794,grad_norm: 0.9065588083569205, iteration: 179186
loss: 1.0004936456680298,grad_norm: 0.9999990500552269, iteration: 179187
loss: 1.0387135744094849,grad_norm: 0.999999327575533, iteration: 179188
loss: 1.0043696165084839,grad_norm: 0.9999991877819114, iteration: 179189
loss: 0.985463559627533,grad_norm: 0.8620037371279773, iteration: 179190
loss: 1.0190107822418213,grad_norm: 0.8492613360514154, iteration: 179191
loss: 0.9643513560295105,grad_norm: 0.9695883817094114, iteration: 179192
loss: 1.0405336618423462,grad_norm: 0.879232292123885, iteration: 179193
loss: 0.9910043478012085,grad_norm: 0.978276270637463, iteration: 179194
loss: 0.969464898109436,grad_norm: 0.9999991574131432, iteration: 179195
loss: 1.040597677230835,grad_norm: 0.9999990820315123, iteration: 179196
loss: 1.0122145414352417,grad_norm: 0.9780847342160577, iteration: 179197
loss: 1.0051735639572144,grad_norm: 0.9999990437868742, iteration: 179198
loss: 1.009953498840332,grad_norm: 0.9999990356560706, iteration: 179199
loss: 0.9864545464515686,grad_norm: 0.9033172467304528, iteration: 179200
loss: 0.9957566261291504,grad_norm: 0.9820068032549558, iteration: 179201
loss: 0.9805641174316406,grad_norm: 0.961715137009759, iteration: 179202
loss: 1.0069146156311035,grad_norm: 0.7615480948177467, iteration: 179203
loss: 0.9944062232971191,grad_norm: 0.9855398022893094, iteration: 179204
loss: 1.0014969110488892,grad_norm: 0.9186512703968477, iteration: 179205
loss: 0.996452271938324,grad_norm: 0.999999150100514, iteration: 179206
loss: 0.9938439130783081,grad_norm: 0.889663321926383, iteration: 179207
loss: 0.9870042204856873,grad_norm: 0.961188280517134, iteration: 179208
loss: 1.0073429346084595,grad_norm: 0.9369050974749672, iteration: 179209
loss: 1.0103052854537964,grad_norm: 0.9240980336627841, iteration: 179210
loss: 1.00430166721344,grad_norm: 0.9902189726701808, iteration: 179211
loss: 1.0002295970916748,grad_norm: 0.9267338560940254, iteration: 179212
loss: 0.9878258109092712,grad_norm: 0.9270266645846356, iteration: 179213
loss: 0.9939334988594055,grad_norm: 0.8966033048524037, iteration: 179214
loss: 0.9994570016860962,grad_norm: 0.9408485491478409, iteration: 179215
loss: 0.9995648860931396,grad_norm: 0.9999989568271964, iteration: 179216
loss: 0.9579864144325256,grad_norm: 0.9999990591159017, iteration: 179217
loss: 1.0112881660461426,grad_norm: 0.9999990294624178, iteration: 179218
loss: 1.0095672607421875,grad_norm: 0.9767932539664185, iteration: 179219
loss: 0.9618679285049438,grad_norm: 0.8564167663087977, iteration: 179220
loss: 1.023392915725708,grad_norm: 0.9341814383404418, iteration: 179221
loss: 1.008425235748291,grad_norm: 0.9999991902006953, iteration: 179222
loss: 0.9750113487243652,grad_norm: 0.845175899820778, iteration: 179223
loss: 0.986842155456543,grad_norm: 0.9680737404525505, iteration: 179224
loss: 0.9778285026550293,grad_norm: 0.9095727910461912, iteration: 179225
loss: 0.9635233283042908,grad_norm: 0.9509580007269655, iteration: 179226
loss: 1.018157720565796,grad_norm: 0.9999991178177335, iteration: 179227
loss: 1.0331308841705322,grad_norm: 0.9999997919549134, iteration: 179228
loss: 0.9745194315910339,grad_norm: 0.9999989963389596, iteration: 179229
loss: 0.9929755330085754,grad_norm: 0.977379375814612, iteration: 179230
loss: 1.0478993654251099,grad_norm: 0.9999991879762109, iteration: 179231
loss: 1.0625096559524536,grad_norm: 0.9999991003038935, iteration: 179232
loss: 1.025543212890625,grad_norm: 0.9999992168503228, iteration: 179233
loss: 0.9870113134384155,grad_norm: 0.8371862535708013, iteration: 179234
loss: 1.0199862718582153,grad_norm: 0.9999991023720156, iteration: 179235
loss: 0.9931459426879883,grad_norm: 0.9841310727338762, iteration: 179236
loss: 0.9872125387191772,grad_norm: 0.9706411547365003, iteration: 179237
loss: 1.0020831823349,grad_norm: 0.9438427214732001, iteration: 179238
loss: 1.0210695266723633,grad_norm: 0.9999989949015836, iteration: 179239
loss: 0.9872675538063049,grad_norm: 0.9081577252444278, iteration: 179240
loss: 0.9696889519691467,grad_norm: 0.848976109460859, iteration: 179241
loss: 1.0313082933425903,grad_norm: 0.9999995893649148, iteration: 179242
loss: 1.008790373802185,grad_norm: 0.8962044396108073, iteration: 179243
loss: 1.0032031536102295,grad_norm: 0.9999990664740048, iteration: 179244
loss: 0.9946043491363525,grad_norm: 0.854213938157809, iteration: 179245
loss: 1.027503490447998,grad_norm: 0.9999990532294998, iteration: 179246
loss: 1.0110630989074707,grad_norm: 0.9119108130345474, iteration: 179247
loss: 0.9694397449493408,grad_norm: 0.9999990925252207, iteration: 179248
loss: 1.0337014198303223,grad_norm: 0.921577246804526, iteration: 179249
loss: 1.02552330493927,grad_norm: 0.9386403116307108, iteration: 179250
loss: 0.9826260805130005,grad_norm: 0.9999989967274402, iteration: 179251
loss: 0.988857626914978,grad_norm: 0.9898318729157891, iteration: 179252
loss: 1.0159190893173218,grad_norm: 0.9999989776210375, iteration: 179253
loss: 1.0447360277175903,grad_norm: 0.9999998266041908, iteration: 179254
loss: 0.9756166338920593,grad_norm: 0.9552977733484835, iteration: 179255
loss: 0.9613083600997925,grad_norm: 0.9999990993368767, iteration: 179256
loss: 1.007700800895691,grad_norm: 0.9999990410907971, iteration: 179257
loss: 0.9719706177711487,grad_norm: 0.8750844551527931, iteration: 179258
loss: 0.9885128140449524,grad_norm: 0.9261150144391032, iteration: 179259
loss: 1.006649374961853,grad_norm: 0.9999993809244031, iteration: 179260
loss: 0.9968697428703308,grad_norm: 0.9289939279870083, iteration: 179261
loss: 1.0182639360427856,grad_norm: 0.9999992537040164, iteration: 179262
loss: 0.9921375513076782,grad_norm: 0.8473641525731438, iteration: 179263
loss: 1.0044859647750854,grad_norm: 0.9181117051819085, iteration: 179264
loss: 1.065965175628662,grad_norm: 0.999999306502755, iteration: 179265
loss: 0.9701113104820251,grad_norm: 0.8921174087226141, iteration: 179266
loss: 1.0339932441711426,grad_norm: 0.9999997764811953, iteration: 179267
loss: 1.2312822341918945,grad_norm: 0.9999999453370703, iteration: 179268
loss: 0.9935724139213562,grad_norm: 0.9612683673839597, iteration: 179269
loss: 1.026488184928894,grad_norm: 0.9999991446789127, iteration: 179270
loss: 1.0629609823226929,grad_norm: 0.9999992927352246, iteration: 179271
loss: 1.0102511644363403,grad_norm: 0.9617278362810354, iteration: 179272
loss: 1.0462828874588013,grad_norm: 0.9999998923455315, iteration: 179273
loss: 1.020853042602539,grad_norm: 0.9633212137676154, iteration: 179274
loss: 0.9730291366577148,grad_norm: 0.9999990527071879, iteration: 179275
loss: 0.9924132823944092,grad_norm: 0.8562151840485844, iteration: 179276
loss: 1.0023764371871948,grad_norm: 1.0000000217489846, iteration: 179277
loss: 0.9828945398330688,grad_norm: 0.9999990131174112, iteration: 179278
loss: 1.013451337814331,grad_norm: 0.9999991821022325, iteration: 179279
loss: 0.9549224376678467,grad_norm: 0.9801227821182034, iteration: 179280
loss: 1.0180280208587646,grad_norm: 0.9999992212074279, iteration: 179281
loss: 0.9894076585769653,grad_norm: 0.9330274464926784, iteration: 179282
loss: 1.0146825313568115,grad_norm: 0.9707030110477601, iteration: 179283
loss: 1.0107877254486084,grad_norm: 0.893506104185189, iteration: 179284
loss: 1.0220648050308228,grad_norm: 0.8617404073106133, iteration: 179285
loss: 1.0273253917694092,grad_norm: 0.9999992891339733, iteration: 179286
loss: 1.0365142822265625,grad_norm: 0.9999990729761705, iteration: 179287
loss: 1.0387901067733765,grad_norm: 0.999999949217209, iteration: 179288
loss: 1.0056923627853394,grad_norm: 0.9999990987950625, iteration: 179289
loss: 1.0310440063476562,grad_norm: 0.9661515933632149, iteration: 179290
loss: 1.0353747606277466,grad_norm: 0.9950532864952261, iteration: 179291
loss: 0.9724005460739136,grad_norm: 0.9999990892869467, iteration: 179292
loss: 1.0177273750305176,grad_norm: 0.9088626448558705, iteration: 179293
loss: 0.9725490808486938,grad_norm: 0.8994370868879606, iteration: 179294
loss: 1.0200492143630981,grad_norm: 0.9999990732741304, iteration: 179295
loss: 1.0629111528396606,grad_norm: 0.9999992430636099, iteration: 179296
loss: 0.9800986647605896,grad_norm: 0.9999989692125493, iteration: 179297
loss: 0.9842565655708313,grad_norm: 0.9214887523215283, iteration: 179298
loss: 1.0179561376571655,grad_norm: 0.8384417656780999, iteration: 179299
loss: 1.020286202430725,grad_norm: 0.8985154428807606, iteration: 179300
loss: 0.9985666871070862,grad_norm: 0.9627731502597636, iteration: 179301
loss: 0.9686577916145325,grad_norm: 0.9110614993597234, iteration: 179302
loss: 0.9894127249717712,grad_norm: 0.999999195025417, iteration: 179303
loss: 1.0133424997329712,grad_norm: 0.9999991328825953, iteration: 179304
loss: 1.112541675567627,grad_norm: 0.9999999169474819, iteration: 179305
loss: 1.0274255275726318,grad_norm: 0.9999990949639804, iteration: 179306
loss: 1.0046993494033813,grad_norm: 0.999999294782084, iteration: 179307
loss: 1.002055287361145,grad_norm: 0.9999992340996507, iteration: 179308
loss: 0.9771136045455933,grad_norm: 0.9999991518220249, iteration: 179309
loss: 1.023711085319519,grad_norm: 0.9999991656241664, iteration: 179310
loss: 0.9947133660316467,grad_norm: 0.9999991825229974, iteration: 179311
loss: 0.9870383739471436,grad_norm: 0.9999990877598124, iteration: 179312
loss: 1.0118993520736694,grad_norm: 0.939648262019763, iteration: 179313
loss: 0.9722232222557068,grad_norm: 0.9035737169403044, iteration: 179314
loss: 1.054962396621704,grad_norm: 0.9981127490845811, iteration: 179315
loss: 0.999462366104126,grad_norm: 0.9999992666579809, iteration: 179316
loss: 0.9840537309646606,grad_norm: 0.9999990998742239, iteration: 179317
loss: 0.955820620059967,grad_norm: 0.9999991419800865, iteration: 179318
loss: 1.0175861120224,grad_norm: 0.9999990298318016, iteration: 179319
loss: 1.0122970342636108,grad_norm: 0.9999996979054098, iteration: 179320
loss: 0.9889767169952393,grad_norm: 0.9633300740834895, iteration: 179321
loss: 1.0186448097229004,grad_norm: 0.9999990879143051, iteration: 179322
loss: 1.0089704990386963,grad_norm: 0.9999991425233966, iteration: 179323
loss: 0.9930157661437988,grad_norm: 0.9999991421463452, iteration: 179324
loss: 1.0244364738464355,grad_norm: 0.99999904915569, iteration: 179325
loss: 0.970325767993927,grad_norm: 0.9999991708191887, iteration: 179326
loss: 0.9767388105392456,grad_norm: 0.9678537361618738, iteration: 179327
loss: 0.9815053343772888,grad_norm: 0.9999989351008434, iteration: 179328
loss: 1.0123285055160522,grad_norm: 0.9999991747058088, iteration: 179329
loss: 1.005306601524353,grad_norm: 0.9999991438181856, iteration: 179330
loss: 1.0287139415740967,grad_norm: 0.8404244641012965, iteration: 179331
loss: 1.0025619268417358,grad_norm: 0.9715894921501679, iteration: 179332
loss: 0.9834668040275574,grad_norm: 0.8053230473883383, iteration: 179333
loss: 0.9716686606407166,grad_norm: 0.9806274914727515, iteration: 179334
loss: 1.1063615083694458,grad_norm: 0.9999991679328719, iteration: 179335
loss: 0.9898973703384399,grad_norm: 0.8969127468933891, iteration: 179336
loss: 1.0082074403762817,grad_norm: 0.9999991979414616, iteration: 179337
loss: 0.9941279888153076,grad_norm: 0.9999990081053663, iteration: 179338
loss: 0.9761096835136414,grad_norm: 0.9794421425251572, iteration: 179339
loss: 0.9426937699317932,grad_norm: 0.999999080582636, iteration: 179340
loss: 0.9794705510139465,grad_norm: 0.9421027394727376, iteration: 179341
loss: 1.0318892002105713,grad_norm: 0.9999992500686845, iteration: 179342
loss: 0.9854766726493835,grad_norm: 0.9691925793466372, iteration: 179343
loss: 0.9878619909286499,grad_norm: 0.849293850319699, iteration: 179344
loss: 0.9905825853347778,grad_norm: 0.9999992177024084, iteration: 179345
loss: 0.9771463871002197,grad_norm: 0.9802445536977376, iteration: 179346
loss: 0.9835832118988037,grad_norm: 0.9999990426438129, iteration: 179347
loss: 0.9742425084114075,grad_norm: 0.9765208940496012, iteration: 179348
loss: 1.019795298576355,grad_norm: 0.952788492418579, iteration: 179349
loss: 1.0201317071914673,grad_norm: 0.9999990755461042, iteration: 179350
loss: 0.9905110001564026,grad_norm: 0.9447404364446376, iteration: 179351
loss: 1.0313570499420166,grad_norm: 0.9999991756097717, iteration: 179352
loss: 1.013474464416504,grad_norm: 0.9999991781614115, iteration: 179353
loss: 0.994201123714447,grad_norm: 0.99999915317779, iteration: 179354
loss: 1.003385305404663,grad_norm: 0.9291446281266261, iteration: 179355
loss: 1.0164140462875366,grad_norm: 0.8719087614506771, iteration: 179356
loss: 1.0450489521026611,grad_norm: 0.8415323433695391, iteration: 179357
loss: 1.1729366779327393,grad_norm: 0.9999999122151269, iteration: 179358
loss: 0.9558306932449341,grad_norm: 0.9999992941292324, iteration: 179359
loss: 0.9645342826843262,grad_norm: 0.9999991890981345, iteration: 179360
loss: 0.995547890663147,grad_norm: 0.9474780666209073, iteration: 179361
loss: 0.9924502968788147,grad_norm: 0.9999990697160115, iteration: 179362
loss: 1.0045384168624878,grad_norm: 0.8689629225824483, iteration: 179363
loss: 1.0215307474136353,grad_norm: 0.9999992772279559, iteration: 179364
loss: 1.0403852462768555,grad_norm: 0.9999990773073, iteration: 179365
loss: 0.9812670350074768,grad_norm: 0.8356121131635686, iteration: 179366
loss: 1.0034178495407104,grad_norm: 0.9797569799784454, iteration: 179367
loss: 1.0079126358032227,grad_norm: 0.9014119100560757, iteration: 179368
loss: 1.0178295373916626,grad_norm: 0.8490172487107599, iteration: 179369
loss: 0.9541034698486328,grad_norm: 0.8472658578369662, iteration: 179370
loss: 1.0057170391082764,grad_norm: 0.9999990159752343, iteration: 179371
loss: 1.0207655429840088,grad_norm: 0.9688390826876769, iteration: 179372
loss: 0.992986261844635,grad_norm: 0.8199517345555536, iteration: 179373
loss: 0.9959155917167664,grad_norm: 0.9479414490841567, iteration: 179374
loss: 1.030795693397522,grad_norm: 0.9999998876640853, iteration: 179375
loss: 0.9889973402023315,grad_norm: 0.871641577991504, iteration: 179376
loss: 1.0233927965164185,grad_norm: 0.9677363252267439, iteration: 179377
loss: 1.0098963975906372,grad_norm: 0.9999991880937046, iteration: 179378
loss: 0.9875438809394836,grad_norm: 0.8570729145683694, iteration: 179379
loss: 1.0190414190292358,grad_norm: 0.9999990628724408, iteration: 179380
loss: 0.9997480511665344,grad_norm: 0.9999990837322635, iteration: 179381
loss: 0.967340350151062,grad_norm: 0.9999990994739334, iteration: 179382
loss: 0.9968456625938416,grad_norm: 0.9999990836408194, iteration: 179383
loss: 1.0346473455429077,grad_norm: 0.9999989927741886, iteration: 179384
loss: 1.0023044347763062,grad_norm: 0.941859438519287, iteration: 179385
loss: 0.9760538339614868,grad_norm: 0.8551590454846667, iteration: 179386
loss: 0.9565756320953369,grad_norm: 0.8719331832837435, iteration: 179387
loss: 1.026229739189148,grad_norm: 0.8718374745864903, iteration: 179388
loss: 0.9718561768531799,grad_norm: 0.9999990345481826, iteration: 179389
loss: 0.9581063985824585,grad_norm: 0.8987983703601752, iteration: 179390
loss: 1.0183125734329224,grad_norm: 0.8607082934764306, iteration: 179391
loss: 0.9847108721733093,grad_norm: 0.8542594164719596, iteration: 179392
loss: 0.9830620288848877,grad_norm: 0.8506685203813602, iteration: 179393
loss: 1.0311468839645386,grad_norm: 0.8840990573481647, iteration: 179394
loss: 1.0066602230072021,grad_norm: 0.9880736549871014, iteration: 179395
loss: 0.9923744797706604,grad_norm: 0.9598849284219589, iteration: 179396
loss: 0.974884033203125,grad_norm: 0.9999990305251667, iteration: 179397
loss: 1.0186290740966797,grad_norm: 0.8573001109946567, iteration: 179398
loss: 1.0301942825317383,grad_norm: 0.9104128419701948, iteration: 179399
loss: 1.0350029468536377,grad_norm: 0.9999992423297822, iteration: 179400
loss: 1.2023128271102905,grad_norm: 0.9999991417139019, iteration: 179401
loss: 0.9967101812362671,grad_norm: 0.8807696477984451, iteration: 179402
loss: 1.0263785123825073,grad_norm: 0.8878189938644305, iteration: 179403
loss: 1.0238869190216064,grad_norm: 0.9999993779620595, iteration: 179404
loss: 1.0342687368392944,grad_norm: 0.999999832842661, iteration: 179405
loss: 1.0065100193023682,grad_norm: 0.9450547845889631, iteration: 179406
loss: 0.9895776510238647,grad_norm: 0.8748663329644579, iteration: 179407
loss: 0.9880332946777344,grad_norm: 0.999999293469551, iteration: 179408
loss: 1.0374237298965454,grad_norm: 0.9999992108004183, iteration: 179409
loss: 0.9979901909828186,grad_norm: 0.9999989585035215, iteration: 179410
loss: 1.0046666860580444,grad_norm: 0.9854472291373307, iteration: 179411
loss: 1.0265470743179321,grad_norm: 0.9999990170348557, iteration: 179412
loss: 0.9968721866607666,grad_norm: 0.9999989523456863, iteration: 179413
loss: 1.018707513809204,grad_norm: 0.999999085918651, iteration: 179414
loss: 1.0047951936721802,grad_norm: 0.8822396931684936, iteration: 179415
loss: 0.9934099316596985,grad_norm: 0.9747464965415648, iteration: 179416
loss: 1.0484318733215332,grad_norm: 0.8183901890133392, iteration: 179417
loss: 0.9744734764099121,grad_norm: 0.8853680708456366, iteration: 179418
loss: 0.969741702079773,grad_norm: 0.8351545064964474, iteration: 179419
loss: 1.0047028064727783,grad_norm: 0.9957941038216354, iteration: 179420
loss: 1.0068376064300537,grad_norm: 0.9999990889189664, iteration: 179421
loss: 0.9630945920944214,grad_norm: 0.9762822644312384, iteration: 179422
loss: 0.9468032717704773,grad_norm: 0.9498818082451191, iteration: 179423
loss: 1.0087419748306274,grad_norm: 0.8903314363341255, iteration: 179424
loss: 0.9887928366661072,grad_norm: 0.999999110951042, iteration: 179425
loss: 0.9892002940177917,grad_norm: 0.8800262884048923, iteration: 179426
loss: 0.9753965735435486,grad_norm: 0.999999302649652, iteration: 179427
loss: 1.0069388151168823,grad_norm: 0.9999989449238099, iteration: 179428
loss: 0.9815147519111633,grad_norm: 0.9999992189995344, iteration: 179429
loss: 0.9997666478157043,grad_norm: 0.9077097914044051, iteration: 179430
loss: 0.9541636109352112,grad_norm: 0.9223453513172402, iteration: 179431
loss: 1.0076792240142822,grad_norm: 0.9999991342515747, iteration: 179432
loss: 1.0020084381103516,grad_norm: 0.9999992021158278, iteration: 179433
loss: 1.011448860168457,grad_norm: 0.8466017885475711, iteration: 179434
loss: 0.971573531627655,grad_norm: 0.9999990422934445, iteration: 179435
loss: 1.0511813163757324,grad_norm: 0.9999990683006624, iteration: 179436
loss: 1.0010265111923218,grad_norm: 0.999999075999951, iteration: 179437
loss: 0.9893620014190674,grad_norm: 0.9999995552739042, iteration: 179438
loss: 0.9983737468719482,grad_norm: 0.9999991176160353, iteration: 179439
loss: 1.005785584449768,grad_norm: 0.978822168362105, iteration: 179440
loss: 1.013321042060852,grad_norm: 0.9999991524929929, iteration: 179441
loss: 1.0059179067611694,grad_norm: 0.8672636305194037, iteration: 179442
loss: 0.9999405741691589,grad_norm: 0.897611662321363, iteration: 179443
loss: 1.0155023336410522,grad_norm: 0.9999989864008293, iteration: 179444
loss: 0.9635530710220337,grad_norm: 0.9999994059453368, iteration: 179445
loss: 1.0007795095443726,grad_norm: 0.919696526846786, iteration: 179446
loss: 1.0106796026229858,grad_norm: 0.9496368793326312, iteration: 179447
loss: 0.9944679737091064,grad_norm: 0.8431640216317672, iteration: 179448
loss: 1.0119984149932861,grad_norm: 0.9999989783921378, iteration: 179449
loss: 1.2124239206314087,grad_norm: 0.9999991460289962, iteration: 179450
loss: 1.0226517915725708,grad_norm: 0.9999991227340553, iteration: 179451
loss: 1.0770093202590942,grad_norm: 0.9999991084059067, iteration: 179452
loss: 0.9616458415985107,grad_norm: 0.8688036700307757, iteration: 179453
loss: 0.9979727268218994,grad_norm: 0.9261823714495945, iteration: 179454
loss: 0.9988414645195007,grad_norm: 0.9995293711714437, iteration: 179455
loss: 0.978728711605072,grad_norm: 0.9999990870038871, iteration: 179456
loss: 0.9731087684631348,grad_norm: 0.999999038620451, iteration: 179457
loss: 0.9869790077209473,grad_norm: 0.9052764259824827, iteration: 179458
loss: 1.0299171209335327,grad_norm: 0.9999991059242361, iteration: 179459
loss: 1.2415133714675903,grad_norm: 0.9999997901991665, iteration: 179460
loss: 1.0160466432571411,grad_norm: 0.9999992833875568, iteration: 179461
loss: 1.0196285247802734,grad_norm: 0.9999990005153562, iteration: 179462
loss: 0.9916579723358154,grad_norm: 0.952020156415542, iteration: 179463
loss: 0.9784771800041199,grad_norm: 0.9921268230516345, iteration: 179464
loss: 1.044800043106079,grad_norm: 0.910300338597676, iteration: 179465
loss: 1.0067963600158691,grad_norm: 0.7975391797380628, iteration: 179466
loss: 0.9680867791175842,grad_norm: 0.9999990078972322, iteration: 179467
loss: 0.9947963953018188,grad_norm: 0.9999989685302717, iteration: 179468
loss: 1.0272032022476196,grad_norm: 0.8677643354770171, iteration: 179469
loss: 0.9847802519798279,grad_norm: 0.9999989818515848, iteration: 179470
loss: 1.039965271949768,grad_norm: 0.9739487391473077, iteration: 179471
loss: 1.030543327331543,grad_norm: 0.8138550736318009, iteration: 179472
loss: 0.9637872576713562,grad_norm: 0.8624036879817796, iteration: 179473
loss: 0.9955152869224548,grad_norm: 0.9556756936696502, iteration: 179474
loss: 0.9772635698318481,grad_norm: 0.9999993183297987, iteration: 179475
loss: 1.0010088682174683,grad_norm: 0.9999991505095721, iteration: 179476
loss: 0.9862931370735168,grad_norm: 0.9237981945727699, iteration: 179477
loss: 0.98221755027771,grad_norm: 0.963037171988408, iteration: 179478
loss: 0.9796354174613953,grad_norm: 0.9999991149175466, iteration: 179479
loss: 0.9786546230316162,grad_norm: 0.968515311871806, iteration: 179480
loss: 0.975348174571991,grad_norm: 0.8447544725291813, iteration: 179481
loss: 1.0299408435821533,grad_norm: 0.9999992816501796, iteration: 179482
loss: 0.9865338206291199,grad_norm: 0.8740469340851871, iteration: 179483
loss: 0.9898090362548828,grad_norm: 0.9999991397445759, iteration: 179484
loss: 0.9687455296516418,grad_norm: 0.845756208712821, iteration: 179485
loss: 0.9783089756965637,grad_norm: 0.9999990522744121, iteration: 179486
loss: 1.0335229635238647,grad_norm: 0.9999991097250125, iteration: 179487
loss: 0.9868392944335938,grad_norm: 0.9999997009105291, iteration: 179488
loss: 0.9905009269714355,grad_norm: 0.987760026353739, iteration: 179489
loss: 1.0019307136535645,grad_norm: 0.9747248337869723, iteration: 179490
loss: 1.0071916580200195,grad_norm: 0.945151267027365, iteration: 179491
loss: 1.0284645557403564,grad_norm: 0.9999991781429616, iteration: 179492
loss: 1.0192396640777588,grad_norm: 0.898963581766744, iteration: 179493
loss: 1.052962064743042,grad_norm: 0.9999991873326023, iteration: 179494
loss: 1.0100951194763184,grad_norm: 0.99999921217081, iteration: 179495
loss: 1.0329188108444214,grad_norm: 0.9063603491578127, iteration: 179496
loss: 0.999119222164154,grad_norm: 0.9999990919578496, iteration: 179497
loss: 0.9688565731048584,grad_norm: 0.9002728565671138, iteration: 179498
loss: 1.0583056211471558,grad_norm: 0.9999991491034174, iteration: 179499
loss: 0.9793864488601685,grad_norm: 0.9999989872467543, iteration: 179500
loss: 1.0011112689971924,grad_norm: 0.9999991360322344, iteration: 179501
loss: 0.9973467588424683,grad_norm: 0.9999990237820533, iteration: 179502
loss: 1.0160253047943115,grad_norm: 0.9999989366092511, iteration: 179503
loss: 0.9763914346694946,grad_norm: 0.8604107744768817, iteration: 179504
loss: 0.9991816878318787,grad_norm: 0.9572198501726551, iteration: 179505
loss: 0.9760127067565918,grad_norm: 0.9999991864560488, iteration: 179506
loss: 1.0163034200668335,grad_norm: 0.8921790368949265, iteration: 179507
loss: 0.9974254369735718,grad_norm: 0.8359162333498039, iteration: 179508
loss: 1.0577778816223145,grad_norm: 0.9999994944298899, iteration: 179509
loss: 0.9912571310997009,grad_norm: 0.84360810233494, iteration: 179510
loss: 1.038360357284546,grad_norm: 0.9999990848188391, iteration: 179511
loss: 0.9841964244842529,grad_norm: 0.8488752932432694, iteration: 179512
loss: 0.9955503940582275,grad_norm: 0.9999998802948044, iteration: 179513
loss: 0.9944090843200684,grad_norm: 0.7869741603083527, iteration: 179514
loss: 1.0014046430587769,grad_norm: 0.9680406105301341, iteration: 179515
loss: 1.034644365310669,grad_norm: 0.8036549060937644, iteration: 179516
loss: 1.0211628675460815,grad_norm: 0.8589367439076178, iteration: 179517
loss: 1.0067864656448364,grad_norm: 0.9493618582359105, iteration: 179518
loss: 1.0035290718078613,grad_norm: 0.9999989470365849, iteration: 179519
loss: 1.023597240447998,grad_norm: 0.8013698584281364, iteration: 179520
loss: 1.0252373218536377,grad_norm: 0.9999989604997145, iteration: 179521
loss: 0.9858863949775696,grad_norm: 0.999999020274548, iteration: 179522
loss: 0.9998363852500916,grad_norm: 0.999060714637643, iteration: 179523
loss: 1.001701831817627,grad_norm: 0.8997297609068377, iteration: 179524
loss: 0.974420964717865,grad_norm: 0.9594554958327236, iteration: 179525
loss: 0.9881719946861267,grad_norm: 0.9344845257631658, iteration: 179526
loss: 1.0055986642837524,grad_norm: 0.917777805115486, iteration: 179527
loss: 1.018593192100525,grad_norm: 0.986567236997641, iteration: 179528
loss: 1.005800485610962,grad_norm: 0.8681310193156714, iteration: 179529
loss: 1.0182812213897705,grad_norm: 0.9306504165426133, iteration: 179530
loss: 0.9895482659339905,grad_norm: 0.8355328649702196, iteration: 179531
loss: 1.0467272996902466,grad_norm: 0.9999991978427107, iteration: 179532
loss: 1.024818778038025,grad_norm: 0.7574585744381527, iteration: 179533
loss: 1.0562678575515747,grad_norm: 0.9999992506236375, iteration: 179534
loss: 1.0167794227600098,grad_norm: 0.9998292389593406, iteration: 179535
loss: 1.02272367477417,grad_norm: 0.9543415136491654, iteration: 179536
loss: 0.9742622971534729,grad_norm: 0.9616250848647587, iteration: 179537
loss: 1.0672688484191895,grad_norm: 0.904117206122488, iteration: 179538
loss: 1.0456559658050537,grad_norm: 0.9741241502700057, iteration: 179539
loss: 1.013216495513916,grad_norm: 0.8458357542333811, iteration: 179540
loss: 0.9992820024490356,grad_norm: 0.9999991326513693, iteration: 179541
loss: 1.014006495475769,grad_norm: 0.7853444660718965, iteration: 179542
loss: 1.0599066019058228,grad_norm: 0.9999991984860996, iteration: 179543
loss: 0.9821531772613525,grad_norm: 0.833140768182162, iteration: 179544
loss: 0.9990639686584473,grad_norm: 0.9999991668277723, iteration: 179545
loss: 0.9800012707710266,grad_norm: 0.8987901505760116, iteration: 179546
loss: 1.0240932703018188,grad_norm: 0.8707100353593734, iteration: 179547
loss: 0.9670136570930481,grad_norm: 0.9861971793323011, iteration: 179548
loss: 0.9582492113113403,grad_norm: 0.9999990231298423, iteration: 179549
loss: 0.9760954976081848,grad_norm: 0.9999991501539484, iteration: 179550
loss: 1.011520266532898,grad_norm: 0.9999990145916279, iteration: 179551
loss: 0.9846038818359375,grad_norm: 0.999999198587577, iteration: 179552
loss: 1.0057380199432373,grad_norm: 0.9963876160288514, iteration: 179553
loss: 1.0148627758026123,grad_norm: 0.9999995852703725, iteration: 179554
loss: 1.0099016427993774,grad_norm: 0.9953402340195265, iteration: 179555
loss: 0.9836918115615845,grad_norm: 0.890615946737456, iteration: 179556
loss: 1.0209543704986572,grad_norm: 0.9274630563901347, iteration: 179557
loss: 0.9926572442054749,grad_norm: 0.9999991107028098, iteration: 179558
loss: 1.0053716897964478,grad_norm: 0.9197916759752269, iteration: 179559
loss: 1.1368783712387085,grad_norm: 0.999999503096889, iteration: 179560
loss: 0.9862084984779358,grad_norm: 0.9183283353323081, iteration: 179561
loss: 0.9589465856552124,grad_norm: 0.9999990857555034, iteration: 179562
loss: 0.9924289584159851,grad_norm: 0.9999990005805607, iteration: 179563
loss: 1.008841872215271,grad_norm: 0.838846891977479, iteration: 179564
loss: 1.0052075386047363,grad_norm: 0.9949291718709745, iteration: 179565
loss: 0.9936209321022034,grad_norm: 0.9326894096795918, iteration: 179566
loss: 1.024685025215149,grad_norm: 0.9639070571475288, iteration: 179567
loss: 1.0099270343780518,grad_norm: 0.9724924097963401, iteration: 179568
loss: 1.0263473987579346,grad_norm: 0.9999997888792764, iteration: 179569
loss: 0.9907729029655457,grad_norm: 0.8932101939118152, iteration: 179570
loss: 1.1081769466400146,grad_norm: 1.0000000545142487, iteration: 179571
loss: 1.0179601907730103,grad_norm: 0.9999992815110249, iteration: 179572
loss: 0.9793932437896729,grad_norm: 0.8509066750012221, iteration: 179573
loss: 1.0142641067504883,grad_norm: 0.7884301534469147, iteration: 179574
loss: 1.0262134075164795,grad_norm: 0.9536799483702608, iteration: 179575
loss: 0.9888736009597778,grad_norm: 0.8117960188567462, iteration: 179576
loss: 1.0299257040023804,grad_norm: 0.9999992487910326, iteration: 179577
loss: 1.007789969444275,grad_norm: 0.999999131573769, iteration: 179578
loss: 0.9893537759780884,grad_norm: 0.9674757724189286, iteration: 179579
loss: 0.9878138303756714,grad_norm: 0.9892653856079056, iteration: 179580
loss: 0.9960347414016724,grad_norm: 0.9888264156477921, iteration: 179581
loss: 0.967268168926239,grad_norm: 0.9999991023864572, iteration: 179582
loss: 1.0017000436782837,grad_norm: 0.9999991893250039, iteration: 179583
loss: 0.9496718049049377,grad_norm: 0.8735506633394056, iteration: 179584
loss: 0.9773654937744141,grad_norm: 0.9999990064968273, iteration: 179585
loss: 1.0020776987075806,grad_norm: 0.7690668088459603, iteration: 179586
loss: 0.9843472242355347,grad_norm: 0.9160768229515723, iteration: 179587
loss: 1.0063868761062622,grad_norm: 0.9999991092884989, iteration: 179588
loss: 0.9933369755744934,grad_norm: 0.8910697197472425, iteration: 179589
loss: 0.9968686103820801,grad_norm: 0.9999991318388013, iteration: 179590
loss: 1.0129913091659546,grad_norm: 0.9003533791837391, iteration: 179591
loss: 1.053520679473877,grad_norm: 0.9999991654632745, iteration: 179592
loss: 0.9870145916938782,grad_norm: 0.9204173298562103, iteration: 179593
loss: 1.0021377801895142,grad_norm: 0.9650502451299862, iteration: 179594
loss: 1.0189239978790283,grad_norm: 0.9999991180302416, iteration: 179595
loss: 0.9778262376785278,grad_norm: 0.9999990601603114, iteration: 179596
loss: 1.0440996885299683,grad_norm: 0.9999990939593023, iteration: 179597
loss: 0.9817614555358887,grad_norm: 0.8147197939685384, iteration: 179598
loss: 0.9875354766845703,grad_norm: 0.9999991204845364, iteration: 179599
loss: 0.9870212078094482,grad_norm: 0.9655984445724114, iteration: 179600
loss: 1.0106580257415771,grad_norm: 0.9409126470213569, iteration: 179601
loss: 1.0188637971878052,grad_norm: 0.8385015722765022, iteration: 179602
loss: 0.9878193140029907,grad_norm: 0.9023264285145168, iteration: 179603
loss: 1.026339054107666,grad_norm: 0.9999991258923538, iteration: 179604
loss: 1.0390536785125732,grad_norm: 0.8913611748644645, iteration: 179605
loss: 0.9883122444152832,grad_norm: 0.9293685037112194, iteration: 179606
loss: 0.9569555521011353,grad_norm: 0.9495342588580966, iteration: 179607
loss: 0.9867480397224426,grad_norm: 0.9513929715163673, iteration: 179608
loss: 1.0079425573349,grad_norm: 0.9084247821233316, iteration: 179609
loss: 0.9457424283027649,grad_norm: 0.9999990252732426, iteration: 179610
loss: 0.9738622307777405,grad_norm: 0.9999991804604644, iteration: 179611
loss: 1.0374997854232788,grad_norm: 0.9999990769256282, iteration: 179612
loss: 0.9888115525245667,grad_norm: 0.999999135782653, iteration: 179613
loss: 0.9879802465438843,grad_norm: 0.9645862366421433, iteration: 179614
loss: 1.0115299224853516,grad_norm: 0.9999990689872553, iteration: 179615
loss: 1.0220913887023926,grad_norm: 0.9092562100882065, iteration: 179616
loss: 0.9965615272521973,grad_norm: 0.943209650923344, iteration: 179617
loss: 1.0064443349838257,grad_norm: 0.9999991030257872, iteration: 179618
loss: 1.0037119388580322,grad_norm: 0.9881702550205025, iteration: 179619
loss: 1.0393459796905518,grad_norm: 0.956998627254752, iteration: 179620
loss: 1.01656174659729,grad_norm: 0.9999992707256699, iteration: 179621
loss: 0.9810540080070496,grad_norm: 0.9999991758610174, iteration: 179622
loss: 1.0010802745819092,grad_norm: 0.9999991988726015, iteration: 179623
loss: 0.9779700040817261,grad_norm: 0.9999990906063062, iteration: 179624
loss: 0.9823141694068909,grad_norm: 0.9433959280147003, iteration: 179625
loss: 1.0087864398956299,grad_norm: 0.9179818987022387, iteration: 179626
loss: 1.0115371942520142,grad_norm: 0.9315354584117458, iteration: 179627
loss: 1.0285437107086182,grad_norm: 0.8254603027878745, iteration: 179628
loss: 0.9988080263137817,grad_norm: 0.9500054995850075, iteration: 179629
loss: 1.003687858581543,grad_norm: 0.8703137605356135, iteration: 179630
loss: 0.985100269317627,grad_norm: 0.9999990268294767, iteration: 179631
loss: 0.9861880540847778,grad_norm: 0.9999991117294218, iteration: 179632
loss: 0.9977052211761475,grad_norm: 0.9541566257502888, iteration: 179633
loss: 0.9874472618103027,grad_norm: 0.9987867730273027, iteration: 179634
loss: 1.0246602296829224,grad_norm: 0.938049541139855, iteration: 179635
loss: 1.0265583992004395,grad_norm: 0.9638370193282135, iteration: 179636
loss: 0.9992148280143738,grad_norm: 0.999999239658297, iteration: 179637
loss: 1.0005178451538086,grad_norm: 0.9999991530291215, iteration: 179638
loss: 1.02107572555542,grad_norm: 0.9999989506758973, iteration: 179639
loss: 1.0236296653747559,grad_norm: 0.9999991472958345, iteration: 179640
loss: 0.9860774278640747,grad_norm: 0.9207028061922793, iteration: 179641
loss: 1.0070164203643799,grad_norm: 0.8802227881384475, iteration: 179642
loss: 0.9805243015289307,grad_norm: 0.9999992947684335, iteration: 179643
loss: 0.9716596007347107,grad_norm: 0.9615513790873652, iteration: 179644
loss: 0.9846093654632568,grad_norm: 0.978629691695671, iteration: 179645
loss: 1.0121439695358276,grad_norm: 0.8110886051528092, iteration: 179646
loss: 1.0285786390304565,grad_norm: 0.9999991171859769, iteration: 179647
loss: 1.0044292211532593,grad_norm: 0.9624898387605276, iteration: 179648
loss: 0.976624071598053,grad_norm: 0.96994021721297, iteration: 179649
loss: 0.9835987091064453,grad_norm: 0.889797983887705, iteration: 179650
loss: 1.024344563484192,grad_norm: 0.9999990146964285, iteration: 179651
loss: 1.0166593790054321,grad_norm: 0.9471027958628, iteration: 179652
loss: 0.9875584244728088,grad_norm: 0.8514988187589324, iteration: 179653
loss: 1.0263311862945557,grad_norm: 0.9999990586084467, iteration: 179654
loss: 0.9966544508934021,grad_norm: 0.7743352106066612, iteration: 179655
loss: 1.0013941526412964,grad_norm: 0.8857818326887806, iteration: 179656
loss: 1.0070911645889282,grad_norm: 0.9066906652229756, iteration: 179657
loss: 0.9600471258163452,grad_norm: 0.9999992377843783, iteration: 179658
loss: 1.0180408954620361,grad_norm: 0.9222051042248877, iteration: 179659
loss: 1.005659818649292,grad_norm: 0.921949695244345, iteration: 179660
loss: 1.0121257305145264,grad_norm: 0.988768227268278, iteration: 179661
loss: 1.0286296606063843,grad_norm: 0.9436229664080175, iteration: 179662
loss: 0.9992156624794006,grad_norm: 0.9258053593164124, iteration: 179663
loss: 1.0232218503952026,grad_norm: 0.9999995008653261, iteration: 179664
loss: 1.0083457231521606,grad_norm: 0.9999990493323065, iteration: 179665
loss: 0.9883707165718079,grad_norm: 0.9999992577122849, iteration: 179666
loss: 0.9965860843658447,grad_norm: 0.9054503398336564, iteration: 179667
loss: 1.0544886589050293,grad_norm: 0.9999996911367386, iteration: 179668
loss: 1.0092698335647583,grad_norm: 0.9452696118877468, iteration: 179669
loss: 1.0033396482467651,grad_norm: 0.9999990081367287, iteration: 179670
loss: 1.0003437995910645,grad_norm: 0.9999988838660647, iteration: 179671
loss: 1.010777235031128,grad_norm: 0.9999992613741817, iteration: 179672
loss: 1.0194579362869263,grad_norm: 0.9999989773070159, iteration: 179673
loss: 0.9948735237121582,grad_norm: 0.8454943976242386, iteration: 179674
loss: 0.989261269569397,grad_norm: 0.9168488362730625, iteration: 179675
loss: 1.0518362522125244,grad_norm: 0.9999991247601709, iteration: 179676
loss: 1.0159480571746826,grad_norm: 0.999999128141664, iteration: 179677
loss: 0.9988406300544739,grad_norm: 0.9621093124697232, iteration: 179678
loss: 0.972245454788208,grad_norm: 0.9033699362974703, iteration: 179679
loss: 0.9815995693206787,grad_norm: 0.9957079103368375, iteration: 179680
loss: 0.9968082308769226,grad_norm: 0.7597525957769682, iteration: 179681
loss: 0.9868427515029907,grad_norm: 0.962797933958386, iteration: 179682
loss: 0.995199978351593,grad_norm: 0.8742217475746149, iteration: 179683
loss: 0.9761726260185242,grad_norm: 0.9092350366599483, iteration: 179684
loss: 0.9952244758605957,grad_norm: 0.7996796571813103, iteration: 179685
loss: 0.9756284356117249,grad_norm: 0.9999991352515518, iteration: 179686
loss: 1.0169286727905273,grad_norm: 0.9494156796148543, iteration: 179687
loss: 0.9936550259590149,grad_norm: 0.7755011748194585, iteration: 179688
loss: 1.0130236148834229,grad_norm: 0.9999992844062922, iteration: 179689
loss: 0.9855881333351135,grad_norm: 0.9200702000194135, iteration: 179690
loss: 0.9737902283668518,grad_norm: 0.9999993415073832, iteration: 179691
loss: 0.9842942953109741,grad_norm: 0.9999991437085102, iteration: 179692
loss: 1.0080231428146362,grad_norm: 0.7583011810952509, iteration: 179693
loss: 0.9779453277587891,grad_norm: 0.9782798833361317, iteration: 179694
loss: 1.007017731666565,grad_norm: 0.9999991096942513, iteration: 179695
loss: 1.0253101587295532,grad_norm: 0.9999992800485917, iteration: 179696
loss: 1.0033518075942993,grad_norm: 0.9942166213371316, iteration: 179697
loss: 0.989235520362854,grad_norm: 0.9999989953125982, iteration: 179698
loss: 0.9984694123268127,grad_norm: 0.9999992589534372, iteration: 179699
loss: 0.9802226424217224,grad_norm: 0.9999990342573879, iteration: 179700
loss: 1.052565574645996,grad_norm: 0.9999993823556248, iteration: 179701
loss: 0.9939438700675964,grad_norm: 0.9546342878195652, iteration: 179702
loss: 1.0157791376113892,grad_norm: 0.9999991616803835, iteration: 179703
loss: 0.9967682361602783,grad_norm: 0.9999990845232383, iteration: 179704
loss: 0.9718406796455383,grad_norm: 0.9365915237535899, iteration: 179705
loss: 0.9891745448112488,grad_norm: 0.8312823014118255, iteration: 179706
loss: 0.9921027421951294,grad_norm: 0.9033927544527393, iteration: 179707
loss: 1.0178076028823853,grad_norm: 0.9680469681880021, iteration: 179708
loss: 1.0650959014892578,grad_norm: 0.9999990469542744, iteration: 179709
loss: 0.9713326096534729,grad_norm: 0.7168377749250547, iteration: 179710
loss: 1.0964064598083496,grad_norm: 0.9999996790998531, iteration: 179711
loss: 1.0330380201339722,grad_norm: 0.8404835391948756, iteration: 179712
loss: 1.120266079902649,grad_norm: 0.9999991051737119, iteration: 179713
loss: 0.9988052845001221,grad_norm: 0.9999990557585651, iteration: 179714
loss: 1.0042353868484497,grad_norm: 0.9452652320520307, iteration: 179715
loss: 1.0408751964569092,grad_norm: 0.9999991789031425, iteration: 179716
loss: 1.073177456855774,grad_norm: 0.9999991017657155, iteration: 179717
loss: 1.1295764446258545,grad_norm: 0.9999992220917713, iteration: 179718
loss: 1.0073868036270142,grad_norm: 0.8950790262303265, iteration: 179719
loss: 0.9825470447540283,grad_norm: 0.9999991064464907, iteration: 179720
loss: 1.0524688959121704,grad_norm: 0.9348623611012685, iteration: 179721
loss: 0.9923555254936218,grad_norm: 0.9150837634548865, iteration: 179722
loss: 1.1771427392959595,grad_norm: 0.9999991877376222, iteration: 179723
loss: 1.014320969581604,grad_norm: 0.9034527628981915, iteration: 179724
loss: 1.0127346515655518,grad_norm: 0.9999999152029408, iteration: 179725
loss: 1.0305582284927368,grad_norm: 0.9543252605329583, iteration: 179726
loss: 1.0040051937103271,grad_norm: 0.9999990597591918, iteration: 179727
loss: 1.0053478479385376,grad_norm: 0.8361059212424684, iteration: 179728
loss: 1.0035537481307983,grad_norm: 0.9999991257557308, iteration: 179729
loss: 1.0945600271224976,grad_norm: 0.9999994622468378, iteration: 179730
loss: 1.07440984249115,grad_norm: 0.99999986505374, iteration: 179731
loss: 0.9736539125442505,grad_norm: 0.9319633343807305, iteration: 179732
loss: 1.0175631046295166,grad_norm: 0.9493013972856965, iteration: 179733
loss: 1.0644272565841675,grad_norm: 0.9999996805083781, iteration: 179734
loss: 0.9774731397628784,grad_norm: 0.9854810450851509, iteration: 179735
loss: 1.06879460811615,grad_norm: 0.9999991924510998, iteration: 179736
loss: 1.0099241733551025,grad_norm: 0.9323929836928558, iteration: 179737
loss: 1.0524410009384155,grad_norm: 0.9999990615290333, iteration: 179738
loss: 1.0801849365234375,grad_norm: 0.9999991972617236, iteration: 179739
loss: 0.9875262975692749,grad_norm: 0.9999991118137765, iteration: 179740
loss: 0.9872127175331116,grad_norm: 0.8969714422883062, iteration: 179741
loss: 1.017095923423767,grad_norm: 0.9305575941972595, iteration: 179742
loss: 0.9684498906135559,grad_norm: 0.9999990731030394, iteration: 179743
loss: 0.9864874482154846,grad_norm: 0.9264146507761702, iteration: 179744
loss: 0.9998924136161804,grad_norm: 0.9999992837745958, iteration: 179745
loss: 0.992946445941925,grad_norm: 0.7940157066375807, iteration: 179746
loss: 1.0437639951705933,grad_norm: 0.9999992415048334, iteration: 179747
loss: 1.0327600240707397,grad_norm: 0.9333185887248141, iteration: 179748
loss: 0.9976474642753601,grad_norm: 0.9999990819394036, iteration: 179749
loss: 1.0236401557922363,grad_norm: 0.9999992226034667, iteration: 179750
loss: 0.9727490544319153,grad_norm: 0.9999990492559293, iteration: 179751
loss: 0.9952840805053711,grad_norm: 0.9999991799149505, iteration: 179752
loss: 0.9972794055938721,grad_norm: 0.9999997797994533, iteration: 179753
loss: 0.9978896975517273,grad_norm: 0.8427978587082712, iteration: 179754
loss: 0.9882623553276062,grad_norm: 0.9999992202535545, iteration: 179755
loss: 1.022189974784851,grad_norm: 0.9639650412674294, iteration: 179756
loss: 0.9728100299835205,grad_norm: 0.9739139107404241, iteration: 179757
loss: 0.9921368956565857,grad_norm: 0.9400772274577178, iteration: 179758
loss: 0.9806293845176697,grad_norm: 0.9686898750323986, iteration: 179759
loss: 1.0118428468704224,grad_norm: 0.9999990603406499, iteration: 179760
loss: 1.012627363204956,grad_norm: 0.9999990193525565, iteration: 179761
loss: 1.018395185470581,grad_norm: 0.8727754506653276, iteration: 179762
loss: 0.991452157497406,grad_norm: 0.9999992309528913, iteration: 179763
loss: 1.0428017377853394,grad_norm: 0.991880545606957, iteration: 179764
loss: 1.0028917789459229,grad_norm: 0.9805102537364974, iteration: 179765
loss: 0.9898297786712646,grad_norm: 0.9999991925198674, iteration: 179766
loss: 1.0204076766967773,grad_norm: 0.9087491920436496, iteration: 179767
loss: 1.007359504699707,grad_norm: 0.9684312626785145, iteration: 179768
loss: 1.0059412717819214,grad_norm: 0.8353278275910871, iteration: 179769
loss: 1.0452990531921387,grad_norm: 0.9999992118154651, iteration: 179770
loss: 0.9869682788848877,grad_norm: 0.8675811831546223, iteration: 179771
loss: 0.9751185774803162,grad_norm: 0.9608122430503899, iteration: 179772
loss: 0.998616635799408,grad_norm: 0.9999990767424911, iteration: 179773
loss: 0.9872998595237732,grad_norm: 0.9293672596788625, iteration: 179774
loss: 0.9957932829856873,grad_norm: 0.9293372512014668, iteration: 179775
loss: 0.9984835386276245,grad_norm: 0.8974339996441758, iteration: 179776
loss: 1.004060983657837,grad_norm: 0.8731462799591007, iteration: 179777
loss: 1.0389827489852905,grad_norm: 0.999999262262895, iteration: 179778
loss: 1.0209290981292725,grad_norm: 0.999999899518029, iteration: 179779
loss: 0.9713225364685059,grad_norm: 0.9999991360423808, iteration: 179780
loss: 1.0017709732055664,grad_norm: 0.9999990984630349, iteration: 179781
loss: 1.0490812063217163,grad_norm: 0.9999990517176827, iteration: 179782
loss: 0.9833508729934692,grad_norm: 0.8502785983364253, iteration: 179783
loss: 1.0386067628860474,grad_norm: 0.999999122252709, iteration: 179784
loss: 1.052120566368103,grad_norm: 0.9999997602454695, iteration: 179785
loss: 1.0238865613937378,grad_norm: 0.9999989653006978, iteration: 179786
loss: 0.9655711650848389,grad_norm: 0.9978270211048142, iteration: 179787
loss: 0.9681789875030518,grad_norm: 0.9990923646637881, iteration: 179788
loss: 0.9828401207923889,grad_norm: 0.8308096480124323, iteration: 179789
loss: 0.9853647947311401,grad_norm: 0.9999989603368626, iteration: 179790
loss: 0.9904778599739075,grad_norm: 0.9999989809579276, iteration: 179791
loss: 0.9925275444984436,grad_norm: 0.9352079495036656, iteration: 179792
loss: 1.0142239332199097,grad_norm: 0.9505747213074995, iteration: 179793
loss: 1.0007895231246948,grad_norm: 0.9999992587009109, iteration: 179794
loss: 1.0282872915267944,grad_norm: 0.9100398898608029, iteration: 179795
loss: 0.9703692197799683,grad_norm: 0.9999990695513944, iteration: 179796
loss: 0.9820305109024048,grad_norm: 0.9999991721662257, iteration: 179797
loss: 0.9984954595565796,grad_norm: 0.8521211833409519, iteration: 179798
loss: 0.995758056640625,grad_norm: 0.9655332845965707, iteration: 179799
loss: 1.0135453939437866,grad_norm: 0.9999991512195636, iteration: 179800
loss: 0.9765920042991638,grad_norm: 0.9328533296790963, iteration: 179801
loss: 0.9790312051773071,grad_norm: 0.9045503733064822, iteration: 179802
loss: 1.001548409461975,grad_norm: 0.9212958513947027, iteration: 179803
loss: 1.1062817573547363,grad_norm: 0.9999991262284397, iteration: 179804
loss: 0.9920287132263184,grad_norm: 0.9593199457137356, iteration: 179805
loss: 1.0214940309524536,grad_norm: 0.9592461083756254, iteration: 179806
loss: 1.033221960067749,grad_norm: 0.9999989957399941, iteration: 179807
loss: 1.0096746683120728,grad_norm: 0.9999994496596059, iteration: 179808
loss: 1.019394874572754,grad_norm: 0.9605074807777307, iteration: 179809
loss: 1.0053805112838745,grad_norm: 0.9999993563597546, iteration: 179810
loss: 1.0010346174240112,grad_norm: 0.8714514417925084, iteration: 179811
loss: 1.0077922344207764,grad_norm: 0.9999991045956546, iteration: 179812
loss: 1.0159780979156494,grad_norm: 0.8449138520639988, iteration: 179813
loss: 1.0263971090316772,grad_norm: 0.9513604655164233, iteration: 179814
loss: 0.9662774205207825,grad_norm: 0.9999991342438939, iteration: 179815
loss: 1.0439701080322266,grad_norm: 0.9256994919153011, iteration: 179816
loss: 1.0092723369598389,grad_norm: 0.9999991662697982, iteration: 179817
loss: 1.000213623046875,grad_norm: 0.9999992271813473, iteration: 179818
loss: 1.1111119985580444,grad_norm: 0.9999996366393634, iteration: 179819
loss: 0.9966968894004822,grad_norm: 0.9999990670656986, iteration: 179820
loss: 0.974964439868927,grad_norm: 0.9999991872048469, iteration: 179821
loss: 0.9531099200248718,grad_norm: 0.9444641228212309, iteration: 179822
loss: 1.0040366649627686,grad_norm: 0.9999991526196402, iteration: 179823
loss: 1.0035403966903687,grad_norm: 0.9999991136515624, iteration: 179824
loss: 0.9999415278434753,grad_norm: 0.9999990919342048, iteration: 179825
loss: 0.9457765221595764,grad_norm: 0.9999990376738248, iteration: 179826
loss: 1.0291718244552612,grad_norm: 0.9999990390855096, iteration: 179827
loss: 0.9940429329872131,grad_norm: 0.9999990676736995, iteration: 179828
loss: 1.001732349395752,grad_norm: 0.9981015936594032, iteration: 179829
loss: 1.0185521841049194,grad_norm: 0.9183633064177942, iteration: 179830
loss: 0.960781455039978,grad_norm: 0.7969099395800897, iteration: 179831
loss: 0.9656791090965271,grad_norm: 0.9122921081518627, iteration: 179832
loss: 1.0260623693466187,grad_norm: 0.993536115629413, iteration: 179833
loss: 1.0150901079177856,grad_norm: 0.9999990538412686, iteration: 179834
loss: 1.056083083152771,grad_norm: 0.9999993880828647, iteration: 179835
loss: 1.0324633121490479,grad_norm: 0.9999990361226652, iteration: 179836
loss: 0.9619103074073792,grad_norm: 0.9999991459364174, iteration: 179837
loss: 1.0112744569778442,grad_norm: 0.9732830297460302, iteration: 179838
loss: 0.9882169961929321,grad_norm: 0.9999991457312799, iteration: 179839
loss: 1.0099260807037354,grad_norm: 0.9999993243362257, iteration: 179840
loss: 0.9939330220222473,grad_norm: 0.8676180166242032, iteration: 179841
loss: 0.9915897250175476,grad_norm: 0.9999993414897531, iteration: 179842
loss: 1.0481396913528442,grad_norm: 0.8884524871727231, iteration: 179843
loss: 1.0127174854278564,grad_norm: 0.794222624158566, iteration: 179844
loss: 1.0044668912887573,grad_norm: 0.8454429760664915, iteration: 179845
loss: 1.0531365871429443,grad_norm: 0.9999993342043465, iteration: 179846
loss: 1.0073113441467285,grad_norm: 0.9999992246846403, iteration: 179847
loss: 0.9868354797363281,grad_norm: 0.9999991453898166, iteration: 179848
loss: 1.0145319700241089,grad_norm: 0.9029971893970742, iteration: 179849
loss: 1.019691824913025,grad_norm: 0.8160526613983443, iteration: 179850
loss: 1.0324349403381348,grad_norm: 0.8685580220389157, iteration: 179851
loss: 1.0182851552963257,grad_norm: 0.9897180340448006, iteration: 179852
loss: 1.0341770648956299,grad_norm: 0.9999999354696439, iteration: 179853
loss: 0.9923322200775146,grad_norm: 0.977621966608182, iteration: 179854
loss: 0.9980929493904114,grad_norm: 0.9837429605273208, iteration: 179855
loss: 0.9578613042831421,grad_norm: 0.9164370572129027, iteration: 179856
loss: 1.0235902070999146,grad_norm: 0.9840964207038095, iteration: 179857
loss: 1.034810185432434,grad_norm: 0.9999992336687135, iteration: 179858
loss: 0.999633252620697,grad_norm: 0.9999990081164188, iteration: 179859
loss: 0.9891912937164307,grad_norm: 0.9797696724101025, iteration: 179860
loss: 0.9722227454185486,grad_norm: 0.9999990929712748, iteration: 179861
loss: 0.9854655861854553,grad_norm: 0.9550296324353013, iteration: 179862
loss: 1.0011112689971924,grad_norm: 0.9898047628551286, iteration: 179863
loss: 1.003226637840271,grad_norm: 0.9070746313725597, iteration: 179864
loss: 1.0143955945968628,grad_norm: 0.9999991597068244, iteration: 179865
loss: 0.9829340577125549,grad_norm: 0.9311542777601048, iteration: 179866
loss: 1.029088020324707,grad_norm: 0.9999994133200678, iteration: 179867
loss: 1.020180106163025,grad_norm: 0.9999991209519302, iteration: 179868
loss: 0.9892651438713074,grad_norm: 0.9999990235619048, iteration: 179869
loss: 1.0099989175796509,grad_norm: 0.9999991231131995, iteration: 179870
loss: 0.9959011077880859,grad_norm: 0.9999992183284236, iteration: 179871
loss: 0.993593692779541,grad_norm: 0.9999990858325143, iteration: 179872
loss: 1.013189435005188,grad_norm: 0.9999990780093915, iteration: 179873
loss: 1.0384516716003418,grad_norm: 0.9999998084519726, iteration: 179874
loss: 1.0174840688705444,grad_norm: 0.999999225598053, iteration: 179875
loss: 1.0180069208145142,grad_norm: 0.9830987121530714, iteration: 179876
loss: 1.0270191431045532,grad_norm: 0.9904297468282642, iteration: 179877
loss: 0.9788148999214172,grad_norm: 0.9588530599318135, iteration: 179878
loss: 1.0041192770004272,grad_norm: 0.9686000872518381, iteration: 179879
loss: 0.9782083630561829,grad_norm: 0.9434602578671022, iteration: 179880
loss: 1.011885643005371,grad_norm: 0.9453618753789098, iteration: 179881
loss: 0.9861186742782593,grad_norm: 0.999999039746021, iteration: 179882
loss: 0.9732816815376282,grad_norm: 0.8677146645402469, iteration: 179883
loss: 1.0198670625686646,grad_norm: 0.999999003913454, iteration: 179884
loss: 0.9982556700706482,grad_norm: 0.899693159680482, iteration: 179885
loss: 1.0055923461914062,grad_norm: 0.9999996308083077, iteration: 179886
loss: 1.0318646430969238,grad_norm: 0.8578468913074913, iteration: 179887
loss: 1.0038233995437622,grad_norm: 0.999999224874934, iteration: 179888
loss: 0.9876759052276611,grad_norm: 0.8125481087985281, iteration: 179889
loss: 1.0089101791381836,grad_norm: 0.8203987026854944, iteration: 179890
loss: 0.9640764594078064,grad_norm: 0.9999991670846938, iteration: 179891
loss: 0.9832412600517273,grad_norm: 0.8957196981448579, iteration: 179892
loss: 1.002829909324646,grad_norm: 0.9999997555578786, iteration: 179893
loss: 0.9670977592468262,grad_norm: 0.894130168616637, iteration: 179894
loss: 1.0690245628356934,grad_norm: 0.9999994978337007, iteration: 179895
loss: 0.9767549633979797,grad_norm: 0.9723100714171913, iteration: 179896
loss: 1.0148656368255615,grad_norm: 0.7864645442120447, iteration: 179897
loss: 0.9882603287696838,grad_norm: 0.9949204871040135, iteration: 179898
loss: 1.040776014328003,grad_norm: 0.9999992174630781, iteration: 179899
loss: 0.9921137690544128,grad_norm: 0.9999988927538948, iteration: 179900
loss: 1.0067139863967896,grad_norm: 0.9999990453419085, iteration: 179901
loss: 1.1285969018936157,grad_norm: 0.9999991119801177, iteration: 179902
loss: 0.978484570980072,grad_norm: 0.9231805970567837, iteration: 179903
loss: 0.9720976948738098,grad_norm: 0.9340566783322544, iteration: 179904
loss: 0.9965425133705139,grad_norm: 0.9913693122842574, iteration: 179905
loss: 1.012186050415039,grad_norm: 0.9999991803901096, iteration: 179906
loss: 0.999334454536438,grad_norm: 0.9999991587210469, iteration: 179907
loss: 0.9637836813926697,grad_norm: 0.8492755651958166, iteration: 179908
loss: 0.9835243225097656,grad_norm: 0.9999991988706618, iteration: 179909
loss: 1.0804883241653442,grad_norm: 0.9999996071836632, iteration: 179910
loss: 0.9799396395683289,grad_norm: 0.8113968158293845, iteration: 179911
loss: 0.9987614750862122,grad_norm: 0.9999991310778736, iteration: 179912
loss: 0.9654286503791809,grad_norm: 0.9491558543027216, iteration: 179913
loss: 1.0197043418884277,grad_norm: 0.9999991672759737, iteration: 179914
loss: 1.0181972980499268,grad_norm: 0.9610587791857079, iteration: 179915
loss: 1.0287901163101196,grad_norm: 0.9999991732975342, iteration: 179916
loss: 0.9594322443008423,grad_norm: 0.999999211195086, iteration: 179917
loss: 0.9812498092651367,grad_norm: 0.9240031099333812, iteration: 179918
loss: 0.9962650537490845,grad_norm: 0.9999992212899946, iteration: 179919
loss: 1.038852334022522,grad_norm: 0.9109020244175572, iteration: 179920
loss: 0.9795641303062439,grad_norm: 0.9999992976259204, iteration: 179921
loss: 1.0025209188461304,grad_norm: 0.9999990662655917, iteration: 179922
loss: 0.9706190228462219,grad_norm: 0.9267294637743427, iteration: 179923
loss: 0.9877128005027771,grad_norm: 0.9627652606408353, iteration: 179924
loss: 0.9845132231712341,grad_norm: 0.9555788007302413, iteration: 179925
loss: 1.0625989437103271,grad_norm: 0.9133371357840704, iteration: 179926
loss: 1.021194577217102,grad_norm: 0.9999990159852101, iteration: 179927
loss: 1.0037453174591064,grad_norm: 0.9643343278975074, iteration: 179928
loss: 0.9858224987983704,grad_norm: 0.786348450779306, iteration: 179929
loss: 1.0032227039337158,grad_norm: 0.8906134247655683, iteration: 179930
loss: 1.0018845796585083,grad_norm: 0.9999993095394198, iteration: 179931
loss: 0.980505108833313,grad_norm: 0.9999991922849528, iteration: 179932
loss: 1.0176259279251099,grad_norm: 0.9999991526100024, iteration: 179933
loss: 0.9824832081794739,grad_norm: 0.8523196050406943, iteration: 179934
loss: 0.9957617521286011,grad_norm: 0.8608443689063595, iteration: 179935
loss: 1.0110588073730469,grad_norm: 0.9669149151309835, iteration: 179936
loss: 1.0148979425430298,grad_norm: 0.8187790297461902, iteration: 179937
loss: 0.9973387718200684,grad_norm: 0.9999991385920981, iteration: 179938
loss: 1.0249019861221313,grad_norm: 0.9999991311755483, iteration: 179939
loss: 1.022752046585083,grad_norm: 0.9999989326191894, iteration: 179940
loss: 1.0123132467269897,grad_norm: 0.9074503818120201, iteration: 179941
loss: 1.0193657875061035,grad_norm: 0.9644125825790739, iteration: 179942
loss: 0.9984288215637207,grad_norm: 0.8732533043302008, iteration: 179943
loss: 0.9883536696434021,grad_norm: 0.9060654228985513, iteration: 179944
loss: 0.9889467358589172,grad_norm: 0.9999989894615453, iteration: 179945
loss: 1.036320686340332,grad_norm: 0.9999990033975878, iteration: 179946
loss: 1.0132410526275635,grad_norm: 0.8922472554196873, iteration: 179947
loss: 0.9935668110847473,grad_norm: 0.9253989782574062, iteration: 179948
loss: 1.0015705823898315,grad_norm: 0.9348371970865434, iteration: 179949
loss: 0.9914560317993164,grad_norm: 0.9999990752057488, iteration: 179950
loss: 0.9768809676170349,grad_norm: 0.9937877527370818, iteration: 179951
loss: 0.9616779685020447,grad_norm: 0.8832767122673766, iteration: 179952
loss: 1.0346975326538086,grad_norm: 0.9822929023178673, iteration: 179953
loss: 1.0232746601104736,grad_norm: 0.8725778140193442, iteration: 179954
loss: 0.9926772713661194,grad_norm: 0.9777875345872536, iteration: 179955
loss: 0.9710100889205933,grad_norm: 0.9999991622358037, iteration: 179956
loss: 1.0027263164520264,grad_norm: 0.8910181909371506, iteration: 179957
loss: 0.9746527075767517,grad_norm: 0.8400257558331296, iteration: 179958
loss: 0.9971555471420288,grad_norm: 0.9999990761636471, iteration: 179959
loss: 0.9930328130722046,grad_norm: 0.9470442405502347, iteration: 179960
loss: 0.9722313284873962,grad_norm: 0.8083504065328619, iteration: 179961
loss: 1.0220590829849243,grad_norm: 0.9999998392863597, iteration: 179962
loss: 0.9836807250976562,grad_norm: 0.8651992215925614, iteration: 179963
loss: 1.0096246004104614,grad_norm: 0.9999992338480789, iteration: 179964
loss: 1.0122534036636353,grad_norm: 0.878923646524854, iteration: 179965
loss: 0.9682294726371765,grad_norm: 0.8999677050195275, iteration: 179966
loss: 1.0092341899871826,grad_norm: 0.9999990790320378, iteration: 179967
loss: 1.0130035877227783,grad_norm: 0.9331182910076383, iteration: 179968
loss: 1.019797921180725,grad_norm: 0.8842023556810998, iteration: 179969
loss: 0.975532054901123,grad_norm: 0.8901821395660005, iteration: 179970
loss: 0.989145815372467,grad_norm: 0.9816536202297077, iteration: 179971
loss: 0.982063889503479,grad_norm: 0.8854495403215263, iteration: 179972
loss: 0.99466872215271,grad_norm: 0.9405560054095152, iteration: 179973
loss: 1.0236607789993286,grad_norm: 0.9369751981823436, iteration: 179974
loss: 1.0417739152908325,grad_norm: 0.8478835834588171, iteration: 179975
loss: 0.969010591506958,grad_norm: 0.9367257827414875, iteration: 179976
loss: 0.9777929186820984,grad_norm: 0.9999990725516565, iteration: 179977
loss: 0.9736197590827942,grad_norm: 0.9999989927434492, iteration: 179978
loss: 0.9354783892631531,grad_norm: 0.8926323663153897, iteration: 179979
loss: 0.980529248714447,grad_norm: 0.9999991248650991, iteration: 179980
loss: 0.9489828944206238,grad_norm: 0.9999991464482022, iteration: 179981
loss: 0.9804258346557617,grad_norm: 0.9151706573705004, iteration: 179982
loss: 1.0072224140167236,grad_norm: 0.9144792698451, iteration: 179983
loss: 1.0236986875534058,grad_norm: 0.9696948116497398, iteration: 179984
loss: 0.9874595403671265,grad_norm: 0.8346465581523905, iteration: 179985
loss: 1.032690167427063,grad_norm: 0.7810164329349736, iteration: 179986
loss: 0.9949514269828796,grad_norm: 0.9999991416266577, iteration: 179987
loss: 1.0231188535690308,grad_norm: 0.9999990833690634, iteration: 179988
loss: 0.9957139492034912,grad_norm: 0.9135416363855177, iteration: 179989
loss: 1.0179438591003418,grad_norm: 0.8914692756975651, iteration: 179990
loss: 0.9897816777229309,grad_norm: 0.9999991256866407, iteration: 179991
loss: 1.02388596534729,grad_norm: 0.9999990555257997, iteration: 179992
loss: 0.9971579313278198,grad_norm: 0.9679973998200094, iteration: 179993
loss: 1.0143487453460693,grad_norm: 0.9621761824577663, iteration: 179994
loss: 1.0160620212554932,grad_norm: 0.9033460677862298, iteration: 179995
loss: 0.976732075214386,grad_norm: 0.9657162029474924, iteration: 179996
loss: 1.0260065793991089,grad_norm: 0.9877846689097443, iteration: 179997
loss: 1.0030230283737183,grad_norm: 0.9999990978771816, iteration: 179998
loss: 1.014248013496399,grad_norm: 0.9999991735725791, iteration: 179999
loss: 0.9995619058609009,grad_norm: 0.8900335066810913, iteration: 180000
Evaluating at step 180000
{'val': 0.994650062173605, 'test': 2.338921936175313}
loss: 1.0076971054077148,grad_norm: 0.9999990295274628, iteration: 180001
loss: 0.9985494017601013,grad_norm: 0.817741500577598, iteration: 180002
loss: 0.999731719493866,grad_norm: 0.9999991080735933, iteration: 180003
loss: 0.9809895753860474,grad_norm: 0.9648358099957687, iteration: 180004
loss: 0.9970725774765015,grad_norm: 0.9999991332410845, iteration: 180005
loss: 1.0231631994247437,grad_norm: 0.8588344721268254, iteration: 180006
loss: 0.9768651723861694,grad_norm: 0.9645734270006491, iteration: 180007
loss: 0.9803231954574585,grad_norm: 0.9993991944307915, iteration: 180008
loss: 1.0195229053497314,grad_norm: 0.9999989672133046, iteration: 180009
loss: 1.002295970916748,grad_norm: 0.9999990238929302, iteration: 180010
loss: 0.9634758830070496,grad_norm: 0.9198853595763128, iteration: 180011
loss: 0.9875620603561401,grad_norm: 0.9444075907443535, iteration: 180012
loss: 1.015445590019226,grad_norm: 0.9999992404104358, iteration: 180013
loss: 1.009851098060608,grad_norm: 0.8615989426700621, iteration: 180014
loss: 1.0671662092208862,grad_norm: 0.9999994029008175, iteration: 180015
loss: 0.9974379539489746,grad_norm: 0.9163273791294287, iteration: 180016
loss: 1.0106139183044434,grad_norm: 0.9077600949942393, iteration: 180017
loss: 0.9736979603767395,grad_norm: 0.9403908473574206, iteration: 180018
loss: 0.9737157821655273,grad_norm: 0.7435498761238687, iteration: 180019
loss: 0.9928940534591675,grad_norm: 0.9550856845738056, iteration: 180020
loss: 0.9965589046478271,grad_norm: 0.9873149636179228, iteration: 180021
loss: 0.9549577832221985,grad_norm: 0.9127773251086555, iteration: 180022
loss: 0.9913644790649414,grad_norm: 0.9999992731552321, iteration: 180023
loss: 0.9756407737731934,grad_norm: 0.8798416645873064, iteration: 180024
loss: 0.9751300811767578,grad_norm: 0.813415591700517, iteration: 180025
loss: 0.9775543212890625,grad_norm: 0.8517447581402335, iteration: 180026
loss: 1.023162603378296,grad_norm: 0.9999991495925703, iteration: 180027
loss: 0.990385115146637,grad_norm: 0.9368831833961169, iteration: 180028
loss: 0.9839749336242676,grad_norm: 0.9999991481832761, iteration: 180029
loss: 0.9857515096664429,grad_norm: 0.8881818699681197, iteration: 180030
loss: 1.0038074254989624,grad_norm: 0.9999990730820205, iteration: 180031
loss: 1.0051329135894775,grad_norm: 0.9999992208448051, iteration: 180032
loss: 1.0601606369018555,grad_norm: 0.9999991994439409, iteration: 180033
loss: 1.0169715881347656,grad_norm: 0.9999990428825489, iteration: 180034
loss: 0.9937396049499512,grad_norm: 0.8502579136305877, iteration: 180035
loss: 1.0172901153564453,grad_norm: 0.8132184258036877, iteration: 180036
loss: 0.9940109252929688,grad_norm: 0.9154689717911955, iteration: 180037
loss: 1.0166840553283691,grad_norm: 0.9666597790693581, iteration: 180038
loss: 1.0561772584915161,grad_norm: 0.9999991228651338, iteration: 180039
loss: 1.0455623865127563,grad_norm: 0.9399800687091446, iteration: 180040
loss: 0.9815593957901001,grad_norm: 0.9999993047581366, iteration: 180041
loss: 1.0100758075714111,grad_norm: 0.9999991384021314, iteration: 180042
loss: 1.0215054750442505,grad_norm: 0.9471708510311126, iteration: 180043
loss: 0.9846513867378235,grad_norm: 0.9999990615963186, iteration: 180044
loss: 1.006797432899475,grad_norm: 0.999999181281608, iteration: 180045
loss: 1.0235114097595215,grad_norm: 0.974306888457914, iteration: 180046
loss: 0.9831920266151428,grad_norm: 0.9889929210895558, iteration: 180047
loss: 1.0101784467697144,grad_norm: 0.9999990906515076, iteration: 180048
loss: 1.01334810256958,grad_norm: 0.8360051447620245, iteration: 180049
loss: 0.97304368019104,grad_norm: 0.9999990362569929, iteration: 180050
loss: 0.9773117303848267,grad_norm: 0.9999991220750288, iteration: 180051
loss: 1.029433250427246,grad_norm: 0.9999994619807784, iteration: 180052
loss: 1.033315896987915,grad_norm: 0.9643163625701197, iteration: 180053
loss: 1.0021138191223145,grad_norm: 0.960806682025218, iteration: 180054
loss: 0.9838691353797913,grad_norm: 0.9999989813300214, iteration: 180055
loss: 0.9877134561538696,grad_norm: 0.9999991747698438, iteration: 180056
loss: 0.9947729110717773,grad_norm: 0.9721597242642072, iteration: 180057
loss: 0.9960570931434631,grad_norm: 0.8542483017922056, iteration: 180058
loss: 1.0252736806869507,grad_norm: 0.9999989692535252, iteration: 180059
loss: 1.0082776546478271,grad_norm: 0.9540405589708948, iteration: 180060
loss: 0.9928632378578186,grad_norm: 0.9138505010772988, iteration: 180061
loss: 1.0137503147125244,grad_norm: 0.9999992738735192, iteration: 180062
loss: 0.9967306852340698,grad_norm: 0.8808757091630812, iteration: 180063
loss: 1.019377589225769,grad_norm: 0.9999992997012114, iteration: 180064
loss: 1.0410089492797852,grad_norm: 0.9748289415046646, iteration: 180065
loss: 1.018546462059021,grad_norm: 0.9999990453675615, iteration: 180066
loss: 1.0030008554458618,grad_norm: 0.943258749318796, iteration: 180067
loss: 0.982117772102356,grad_norm: 0.999999104201228, iteration: 180068
loss: 1.006570816040039,grad_norm: 0.9999991058061852, iteration: 180069
loss: 0.9774218201637268,grad_norm: 0.9763417566848417, iteration: 180070
loss: 0.9709834456443787,grad_norm: 0.9999998467229818, iteration: 180071
loss: 1.064054012298584,grad_norm: 0.999999193800808, iteration: 180072
loss: 1.0049593448638916,grad_norm: 0.9999990863678266, iteration: 180073
loss: 1.018602728843689,grad_norm: 0.9999989000768752, iteration: 180074
loss: 0.9881570935249329,grad_norm: 0.9170140610704938, iteration: 180075
loss: 1.0194287300109863,grad_norm: 0.9999992251411723, iteration: 180076
loss: 1.03720223903656,grad_norm: 0.9501858760986083, iteration: 180077
loss: 0.9669409990310669,grad_norm: 0.9894471269399497, iteration: 180078
loss: 0.9997702836990356,grad_norm: 0.9224503166424417, iteration: 180079
loss: 0.994536280632019,grad_norm: 0.9999989956119645, iteration: 180080
loss: 0.9955719113349915,grad_norm: 0.9963686902581508, iteration: 180081
loss: 1.0116642713546753,grad_norm: 0.9999990902323282, iteration: 180082
loss: 1.0273194313049316,grad_norm: 0.9999990889809885, iteration: 180083
loss: 1.034913420677185,grad_norm: 0.8994058914946819, iteration: 180084
loss: 0.9769884943962097,grad_norm: 0.999998971325452, iteration: 180085
loss: 0.9779847264289856,grad_norm: 0.9034765034107488, iteration: 180086
loss: 0.9508914947509766,grad_norm: 0.9798506025289001, iteration: 180087
loss: 0.9821865558624268,grad_norm: 0.901223250744303, iteration: 180088
loss: 1.0212035179138184,grad_norm: 0.9999991504736914, iteration: 180089
loss: 0.9641674160957336,grad_norm: 0.9999990392305406, iteration: 180090
loss: 0.9920687675476074,grad_norm: 0.9999991739371822, iteration: 180091
loss: 0.9813086986541748,grad_norm: 0.9813340017134496, iteration: 180092
loss: 1.0398614406585693,grad_norm: 0.9824759233668829, iteration: 180093
loss: 0.992372989654541,grad_norm: 0.9398849680684774, iteration: 180094
loss: 0.9952316284179688,grad_norm: 0.7974687309932321, iteration: 180095
loss: 0.988549530506134,grad_norm: 0.9999992091026383, iteration: 180096
loss: 1.015049934387207,grad_norm: 0.999999413205406, iteration: 180097
loss: 0.9813200235366821,grad_norm: 0.8417821919555942, iteration: 180098
loss: 1.0016010999679565,grad_norm: 0.8888878564372382, iteration: 180099
loss: 1.0200797319412231,grad_norm: 0.9999993794145857, iteration: 180100
loss: 0.9756155610084534,grad_norm: 0.8338211939422038, iteration: 180101
loss: 0.9948747158050537,grad_norm: 0.9172830743661049, iteration: 180102
loss: 1.0085580348968506,grad_norm: 0.9999991822625899, iteration: 180103
loss: 0.968926191329956,grad_norm: 0.9999991757778437, iteration: 180104
loss: 0.9695558547973633,grad_norm: 0.991038436896578, iteration: 180105
loss: 1.0325638055801392,grad_norm: 0.9175289584313723, iteration: 180106
loss: 1.0243793725967407,grad_norm: 0.9178898133761192, iteration: 180107
loss: 0.97420334815979,grad_norm: 0.8142756041468011, iteration: 180108
loss: 0.9860948920249939,grad_norm: 0.9533976325052749, iteration: 180109
loss: 1.0118657350540161,grad_norm: 0.9999994487459886, iteration: 180110
loss: 0.9852954745292664,grad_norm: 0.7998591519603845, iteration: 180111
loss: 1.0163555145263672,grad_norm: 0.9823621973188773, iteration: 180112
loss: 1.027974009513855,grad_norm: 0.9999990194906279, iteration: 180113
loss: 1.0009937286376953,grad_norm: 0.8270943615915173, iteration: 180114
loss: 0.9852427840232849,grad_norm: 0.8462374028636713, iteration: 180115
loss: 0.9775866270065308,grad_norm: 0.9999990705951245, iteration: 180116
loss: 1.0176458358764648,grad_norm: 0.9999997248669366, iteration: 180117
loss: 1.0226140022277832,grad_norm: 0.9268393737898758, iteration: 180118
loss: 1.016480565071106,grad_norm: 0.9999990323736189, iteration: 180119
loss: 0.9577438831329346,grad_norm: 0.92681183599489, iteration: 180120
loss: 0.9999585747718811,grad_norm: 0.9319573655451249, iteration: 180121
loss: 1.0150668621063232,grad_norm: 0.9862515987602793, iteration: 180122
loss: 1.0194579362869263,grad_norm: 0.9999991075874005, iteration: 180123
loss: 1.0248396396636963,grad_norm: 0.9999990531643744, iteration: 180124
loss: 0.9666288495063782,grad_norm: 0.9037413193881101, iteration: 180125
loss: 0.947533905506134,grad_norm: 0.8624244021282638, iteration: 180126
loss: 1.004365086555481,grad_norm: 0.9999989990655265, iteration: 180127
loss: 1.0151578187942505,grad_norm: 0.9833852199907304, iteration: 180128
loss: 1.010873794555664,grad_norm: 0.8883647520550234, iteration: 180129
loss: 1.0219210386276245,grad_norm: 0.9999991301112457, iteration: 180130
loss: 0.9793481826782227,grad_norm: 0.9808656716774145, iteration: 180131
loss: 1.0034801959991455,grad_norm: 0.9999991427367567, iteration: 180132
loss: 0.9889283180236816,grad_norm: 0.989954746989072, iteration: 180133
loss: 1.0260034799575806,grad_norm: 0.9999992462718776, iteration: 180134
loss: 0.9363572001457214,grad_norm: 0.9999991783970109, iteration: 180135
loss: 1.0004992485046387,grad_norm: 0.9999989918678219, iteration: 180136
loss: 0.9967755675315857,grad_norm: 0.9999989817475327, iteration: 180137
loss: 1.023327112197876,grad_norm: 0.9286413165703915, iteration: 180138
loss: 1.0279005765914917,grad_norm: 0.920403950941481, iteration: 180139
loss: 1.0120123624801636,grad_norm: 0.9999992499069594, iteration: 180140
loss: 0.954106330871582,grad_norm: 0.9380387023390327, iteration: 180141
loss: 1.0143202543258667,grad_norm: 0.9550049282495681, iteration: 180142
loss: 1.0547807216644287,grad_norm: 0.9999991817330747, iteration: 180143
loss: 1.0090954303741455,grad_norm: 0.9999992010662314, iteration: 180144
loss: 0.99014812707901,grad_norm: 0.9825882073937983, iteration: 180145
loss: 0.985800564289093,grad_norm: 0.9999999570263974, iteration: 180146
loss: 0.9837920665740967,grad_norm: 0.9999989576240421, iteration: 180147
loss: 1.0265575647354126,grad_norm: 0.9999992191247891, iteration: 180148
loss: 1.010749340057373,grad_norm: 0.9999990606943174, iteration: 180149
loss: 1.0112487077713013,grad_norm: 0.9999989753887314, iteration: 180150
loss: 0.9610171318054199,grad_norm: 0.9498642112861271, iteration: 180151
loss: 0.9722219705581665,grad_norm: 0.9954988763168827, iteration: 180152
loss: 0.9924598932266235,grad_norm: 0.9999990568739776, iteration: 180153
loss: 1.055254340171814,grad_norm: 0.9999992606296936, iteration: 180154
loss: 0.986402153968811,grad_norm: 0.8845125161915113, iteration: 180155
loss: 0.986994206905365,grad_norm: 0.9180988060748798, iteration: 180156
loss: 1.0139657258987427,grad_norm: 0.9999990059522904, iteration: 180157
loss: 0.980792224407196,grad_norm: 0.8388970398327243, iteration: 180158
loss: 1.0101771354675293,grad_norm: 0.9999991425913568, iteration: 180159
loss: 0.9890907406806946,grad_norm: 0.9765312151798976, iteration: 180160
loss: 1.0292025804519653,grad_norm: 0.933953156628654, iteration: 180161
loss: 0.964331865310669,grad_norm: 0.82236121191845, iteration: 180162
loss: 1.0230313539505005,grad_norm: 0.9999992086971158, iteration: 180163
loss: 1.0306893587112427,grad_norm: 0.9999992588498634, iteration: 180164
loss: 0.9956150650978088,grad_norm: 0.9762068716689499, iteration: 180165
loss: 1.027985692024231,grad_norm: 0.999999167326087, iteration: 180166
loss: 1.0048284530639648,grad_norm: 0.9999996202929317, iteration: 180167
loss: 1.0145206451416016,grad_norm: 0.9496872708932387, iteration: 180168
loss: 1.0047281980514526,grad_norm: 0.9999991983619421, iteration: 180169
loss: 1.0196739435195923,grad_norm: 0.9999990973751284, iteration: 180170
loss: 0.9990891814231873,grad_norm: 0.9694329945431182, iteration: 180171
loss: 1.0006994009017944,grad_norm: 0.9999991511215925, iteration: 180172
loss: 0.9905152916908264,grad_norm: 0.9999989652029008, iteration: 180173
loss: 1.010191798210144,grad_norm: 0.999999085168818, iteration: 180174
loss: 0.9996231198310852,grad_norm: 0.8154019578400884, iteration: 180175
loss: 0.9955506324768066,grad_norm: 0.9759176651284042, iteration: 180176
loss: 0.991718053817749,grad_norm: 0.9999998775135421, iteration: 180177
loss: 1.011019229888916,grad_norm: 0.901909909638075, iteration: 180178
loss: 0.9818463921546936,grad_norm: 0.9999992967800938, iteration: 180179
loss: 0.9778737425804138,grad_norm: 0.8244718181365187, iteration: 180180
loss: 1.0014361143112183,grad_norm: 0.8708307016702266, iteration: 180181
loss: 1.0199137926101685,grad_norm: 0.9999991631586553, iteration: 180182
loss: 1.02882981300354,grad_norm: 0.9653744727274151, iteration: 180183
loss: 0.9668244123458862,grad_norm: 0.9504597330070491, iteration: 180184
loss: 0.9833195805549622,grad_norm: 0.9522967277991529, iteration: 180185
loss: 1.0001333951950073,grad_norm: 0.9709482098508608, iteration: 180186
loss: 0.9693518280982971,grad_norm: 0.9976072788278088, iteration: 180187
loss: 1.007325530052185,grad_norm: 0.8362932761667327, iteration: 180188
loss: 1.015204906463623,grad_norm: 0.9999992266570309, iteration: 180189
loss: 1.0118025541305542,grad_norm: 0.9999988797124783, iteration: 180190
loss: 1.0298150777816772,grad_norm: 0.8997882332337493, iteration: 180191
loss: 1.0614391565322876,grad_norm: 0.9999995462558283, iteration: 180192
loss: 1.00657320022583,grad_norm: 0.9999992104035812, iteration: 180193
loss: 1.0067298412322998,grad_norm: 0.9999990634567787, iteration: 180194
loss: 1.0005669593811035,grad_norm: 0.9791376688252528, iteration: 180195
loss: 0.9624432325363159,grad_norm: 0.8545006260890681, iteration: 180196
loss: 0.9977133870124817,grad_norm: 0.9379559457951572, iteration: 180197
loss: 0.962696373462677,grad_norm: 0.9999993576977928, iteration: 180198
loss: 1.0055458545684814,grad_norm: 0.9282714002754794, iteration: 180199
loss: 1.002789855003357,grad_norm: 0.8453058130448967, iteration: 180200
loss: 0.9933574795722961,grad_norm: 0.9999993562252254, iteration: 180201
loss: 0.9924805760383606,grad_norm: 0.8400857815213778, iteration: 180202
loss: 0.9968493580818176,grad_norm: 0.8787479534704884, iteration: 180203
loss: 1.0062142610549927,grad_norm: 0.9999992473568985, iteration: 180204
loss: 1.0079809427261353,grad_norm: 0.9999995040763949, iteration: 180205
loss: 1.0148144960403442,grad_norm: 0.8485629860095231, iteration: 180206
loss: 1.025770902633667,grad_norm: 0.8974248298971491, iteration: 180207
loss: 1.014114499092102,grad_norm: 0.9999991829900776, iteration: 180208
loss: 0.9987338781356812,grad_norm: 0.95140277274688, iteration: 180209
loss: 0.9978805184364319,grad_norm: 0.9999990450714077, iteration: 180210
loss: 1.0077699422836304,grad_norm: 0.9591263605135198, iteration: 180211
loss: 1.0142794847488403,grad_norm: 0.9064217793457667, iteration: 180212
loss: 1.0052146911621094,grad_norm: 0.9999990338362785, iteration: 180213
loss: 0.999549150466919,grad_norm: 0.9524687890059567, iteration: 180214
loss: 1.0050607919692993,grad_norm: 0.8809983474453695, iteration: 180215
loss: 0.9827267527580261,grad_norm: 0.99999917581277, iteration: 180216
loss: 1.0013548135757446,grad_norm: 0.9236979734434394, iteration: 180217
loss: 1.0207515954971313,grad_norm: 0.9999996777999264, iteration: 180218
loss: 1.0094014406204224,grad_norm: 0.934017724267834, iteration: 180219
loss: 0.9699636697769165,grad_norm: 0.9999989951356056, iteration: 180220
loss: 0.967289388179779,grad_norm: 0.999999239766418, iteration: 180221
loss: 0.9961109161376953,grad_norm: 0.8280891675894005, iteration: 180222
loss: 1.0367999076843262,grad_norm: 0.914805980237374, iteration: 180223
loss: 0.9841837286949158,grad_norm: 0.9999990803535268, iteration: 180224
loss: 0.9914834499359131,grad_norm: 0.9999991623453238, iteration: 180225
loss: 0.9798031449317932,grad_norm: 0.9999995926228215, iteration: 180226
loss: 0.9930097460746765,grad_norm: 0.9546541958886056, iteration: 180227
loss: 0.987946093082428,grad_norm: 0.9814438952295247, iteration: 180228
loss: 0.9506359100341797,grad_norm: 0.9999991964551818, iteration: 180229
loss: 0.9933149218559265,grad_norm: 0.8395313563661746, iteration: 180230
loss: 1.005257487297058,grad_norm: 0.9999992147840587, iteration: 180231
loss: 1.0203684568405151,grad_norm: 0.8526605278119024, iteration: 180232
loss: 0.9892414808273315,grad_norm: 0.9999992215094682, iteration: 180233
loss: 1.0316543579101562,grad_norm: 0.9380845790216307, iteration: 180234
loss: 1.0051366090774536,grad_norm: 0.9999989874477253, iteration: 180235
loss: 0.9982355237007141,grad_norm: 0.9999991324084185, iteration: 180236
loss: 0.9843876957893372,grad_norm: 0.7898854360598068, iteration: 180237
loss: 1.02448308467865,grad_norm: 0.946232716348432, iteration: 180238
loss: 0.9923419952392578,grad_norm: 0.999999102556435, iteration: 180239
loss: 0.9693748950958252,grad_norm: 0.8878127808399546, iteration: 180240
loss: 0.9901880025863647,grad_norm: 0.9999991801120545, iteration: 180241
loss: 1.0357273817062378,grad_norm: 0.9999993119300402, iteration: 180242
loss: 0.9753623604774475,grad_norm: 0.901763005740553, iteration: 180243
loss: 1.022656798362732,grad_norm: 0.9506343742741091, iteration: 180244
loss: 1.042855143547058,grad_norm: 0.9999993855815051, iteration: 180245
loss: 1.0181190967559814,grad_norm: 0.9999990808913962, iteration: 180246
loss: 0.9639308452606201,grad_norm: 0.9999989358452592, iteration: 180247
loss: 1.0214664936065674,grad_norm: 0.9773461438723439, iteration: 180248
loss: 1.014259934425354,grad_norm: 0.9529925490100697, iteration: 180249
loss: 1.003578543663025,grad_norm: 0.9999990365545876, iteration: 180250
loss: 0.9979780316352844,grad_norm: 0.8212254059628734, iteration: 180251
loss: 0.9975677728652954,grad_norm: 0.9999992091864143, iteration: 180252
loss: 0.9881730675697327,grad_norm: 0.8873755322068019, iteration: 180253
loss: 0.9995117783546448,grad_norm: 0.9113624696866125, iteration: 180254
loss: 0.9836763739585876,grad_norm: 0.9563302358057182, iteration: 180255
loss: 1.0378484725952148,grad_norm: 0.9026646054814995, iteration: 180256
loss: 1.033805251121521,grad_norm: 1.0000000726034934, iteration: 180257
loss: 0.9881506562232971,grad_norm: 0.8038929141481665, iteration: 180258
loss: 0.9721969366073608,grad_norm: 0.8912657826322464, iteration: 180259
loss: 1.0167452096939087,grad_norm: 0.9521156724772788, iteration: 180260
loss: 1.0596188306808472,grad_norm: 0.999999827046466, iteration: 180261
loss: 0.9983403086662292,grad_norm: 0.8333821948946131, iteration: 180262
loss: 1.0160562992095947,grad_norm: 0.8989563335212613, iteration: 180263
loss: 1.0154690742492676,grad_norm: 0.9999991061803754, iteration: 180264
loss: 1.0106897354125977,grad_norm: 0.9999989816825853, iteration: 180265
loss: 0.985497236251831,grad_norm: 0.8984669366196292, iteration: 180266
loss: 0.990556538105011,grad_norm: 0.9757895431833872, iteration: 180267
loss: 1.0223559141159058,grad_norm: 0.9999991512302432, iteration: 180268
loss: 0.9978548884391785,grad_norm: 0.9999990774968534, iteration: 180269
loss: 1.001773476600647,grad_norm: 0.9999993732007749, iteration: 180270
loss: 0.9882484078407288,grad_norm: 0.9999988912898193, iteration: 180271
loss: 0.9985365867614746,grad_norm: 0.9042276806832142, iteration: 180272
loss: 0.9935044646263123,grad_norm: 0.9999990489920048, iteration: 180273
loss: 0.9736965298652649,grad_norm: 0.9456216852987813, iteration: 180274
loss: 1.04542875289917,grad_norm: 0.8656652532826352, iteration: 180275
loss: 1.1030993461608887,grad_norm: 0.9999997643910926, iteration: 180276
loss: 0.9989073276519775,grad_norm: 0.797002884101413, iteration: 180277
loss: 1.0224000215530396,grad_norm: 0.9976973167519607, iteration: 180278
loss: 1.024418592453003,grad_norm: 0.8860200386252576, iteration: 180279
loss: 0.9766327738761902,grad_norm: 0.9665958768055493, iteration: 180280
loss: 0.9845326542854309,grad_norm: 0.999999144446496, iteration: 180281
loss: 1.0016148090362549,grad_norm: 0.9999991814133514, iteration: 180282
loss: 0.9758455753326416,grad_norm: 0.9254373881607186, iteration: 180283
loss: 1.0002521276474,grad_norm: 0.9883225708975585, iteration: 180284
loss: 1.014638066291809,grad_norm: 0.9999996040197491, iteration: 180285
loss: 1.0015009641647339,grad_norm: 0.906066525851575, iteration: 180286
loss: 0.9692923426628113,grad_norm: 0.9586421732021608, iteration: 180287
loss: 0.9898819327354431,grad_norm: 0.9999993297775561, iteration: 180288
loss: 1.0357369184494019,grad_norm: 0.9456996213463148, iteration: 180289
loss: 0.9913734197616577,grad_norm: 0.7916799718842634, iteration: 180290
loss: 0.9996004700660706,grad_norm: 0.9999990774137745, iteration: 180291
loss: 1.0311312675476074,grad_norm: 0.9999991679891227, iteration: 180292
loss: 0.9717004299163818,grad_norm: 0.937406951515198, iteration: 180293
loss: 1.013696551322937,grad_norm: 0.9999991379061879, iteration: 180294
loss: 0.9970409274101257,grad_norm: 0.8430451827828677, iteration: 180295
loss: 1.0073604583740234,grad_norm: 0.8675145306946874, iteration: 180296
loss: 1.0056246519088745,grad_norm: 0.9641543646451864, iteration: 180297
loss: 1.0004655122756958,grad_norm: 0.9282788053817727, iteration: 180298
loss: 1.025735855102539,grad_norm: 0.9613951554047561, iteration: 180299
loss: 0.9687144160270691,grad_norm: 0.9999990603730812, iteration: 180300
loss: 1.0178104639053345,grad_norm: 0.8298777908349665, iteration: 180301
loss: 1.0161893367767334,grad_norm: 0.9473090155830138, iteration: 180302
loss: 1.0180792808532715,grad_norm: 0.8006505369718052, iteration: 180303
loss: 1.0345388650894165,grad_norm: 0.9999998854053175, iteration: 180304
loss: 1.0148833990097046,grad_norm: 0.9999990606228272, iteration: 180305
loss: 1.0008280277252197,grad_norm: 0.9999990131452362, iteration: 180306
loss: 1.0078308582305908,grad_norm: 0.9604239718002012, iteration: 180307
loss: 0.9882362484931946,grad_norm: 0.9306501938170613, iteration: 180308
loss: 0.9908626079559326,grad_norm: 0.9666775218212497, iteration: 180309
loss: 0.9870849847793579,grad_norm: 0.9999991917933129, iteration: 180310
loss: 1.063153862953186,grad_norm: 0.9999990742217206, iteration: 180311
loss: 0.9624462723731995,grad_norm: 0.9573731729550796, iteration: 180312
loss: 1.0020438432693481,grad_norm: 0.9999990612378733, iteration: 180313
loss: 1.0127962827682495,grad_norm: 0.9999990311787095, iteration: 180314
loss: 1.0166637897491455,grad_norm: 0.9082055436693459, iteration: 180315
loss: 0.992554783821106,grad_norm: 0.9903913694584415, iteration: 180316
loss: 0.9974508285522461,grad_norm: 0.9022253671057666, iteration: 180317
loss: 1.014758586883545,grad_norm: 0.9999991968582225, iteration: 180318
loss: 1.0392036437988281,grad_norm: 0.9999992630338895, iteration: 180319
loss: 1.0470017194747925,grad_norm: 0.9999991482154633, iteration: 180320
loss: 1.1035798788070679,grad_norm: 0.9999992161812017, iteration: 180321
loss: 1.0068111419677734,grad_norm: 0.9493629476850883, iteration: 180322
loss: 1.0147658586502075,grad_norm: 0.8859075064500644, iteration: 180323
loss: 0.9806230664253235,grad_norm: 0.8710502038463815, iteration: 180324
loss: 1.032422661781311,grad_norm: 0.9624566864788937, iteration: 180325
loss: 1.0042335987091064,grad_norm: 0.8290492052933017, iteration: 180326
loss: 1.035616397857666,grad_norm: 0.999999172869865, iteration: 180327
loss: 1.0204426050186157,grad_norm: 0.8162893723155098, iteration: 180328
loss: 0.9973851442337036,grad_norm: 0.9999990960833588, iteration: 180329
loss: 1.0401746034622192,grad_norm: 0.9999991504377058, iteration: 180330
loss: 1.0029501914978027,grad_norm: 0.9999991408767591, iteration: 180331
loss: 1.0340107679367065,grad_norm: 0.9999990043000334, iteration: 180332
loss: 0.9866194725036621,grad_norm: 0.9999990478852626, iteration: 180333
loss: 1.0152199268341064,grad_norm: 0.7429112505549176, iteration: 180334
loss: 0.9971926808357239,grad_norm: 0.9182235404692708, iteration: 180335
loss: 1.049396276473999,grad_norm: 0.9999998412881814, iteration: 180336
loss: 1.0002808570861816,grad_norm: 0.9999990812718617, iteration: 180337
loss: 1.0140273571014404,grad_norm: 0.8871964422472086, iteration: 180338
loss: 0.9779860973358154,grad_norm: 0.9288380608879782, iteration: 180339
loss: 0.9890035390853882,grad_norm: 0.9569494585794902, iteration: 180340
loss: 1.0158957242965698,grad_norm: 0.9529274195494237, iteration: 180341
loss: 0.9834819436073303,grad_norm: 0.8662071172180261, iteration: 180342
loss: 0.9676351547241211,grad_norm: 0.9847880408421088, iteration: 180343
loss: 1.009408712387085,grad_norm: 0.9999991343560158, iteration: 180344
loss: 0.9728025794029236,grad_norm: 0.9999992222345846, iteration: 180345
loss: 0.9605652093887329,grad_norm: 0.9999990380115876, iteration: 180346
loss: 1.0105055570602417,grad_norm: 0.9999990878347054, iteration: 180347
loss: 1.0109517574310303,grad_norm: 0.9999990342888752, iteration: 180348
loss: 1.0185216665267944,grad_norm: 0.9999991129355941, iteration: 180349
loss: 0.988142192363739,grad_norm: 0.9392421298631459, iteration: 180350
loss: 1.0197595357894897,grad_norm: 0.7980441467256225, iteration: 180351
loss: 0.9690164923667908,grad_norm: 0.9653807340269905, iteration: 180352
loss: 1.0134649276733398,grad_norm: 0.8444529882493146, iteration: 180353
loss: 0.9824692010879517,grad_norm: 0.9877818128308944, iteration: 180354
loss: 0.9874430894851685,grad_norm: 0.9999990670108438, iteration: 180355
loss: 0.9954873919487,grad_norm: 0.9999992564291649, iteration: 180356
loss: 0.999459981918335,grad_norm: 0.9999992778345878, iteration: 180357
loss: 1.010331153869629,grad_norm: 0.9333328040715428, iteration: 180358
loss: 1.006247639656067,grad_norm: 0.9664611579095952, iteration: 180359
loss: 0.9748781323432922,grad_norm: 0.9999992693230836, iteration: 180360
loss: 1.0065370798110962,grad_norm: 0.9999990932438576, iteration: 180361
loss: 0.9917799234390259,grad_norm: 0.9622617113384612, iteration: 180362
loss: 1.0068954229354858,grad_norm: 0.9999990711221056, iteration: 180363
loss: 0.96965491771698,grad_norm: 0.9999990523517434, iteration: 180364
loss: 1.0285590887069702,grad_norm: 0.8448985695814457, iteration: 180365
loss: 1.0910773277282715,grad_norm: 0.9999998552554807, iteration: 180366
loss: 0.9970926642417908,grad_norm: 0.9999991385089069, iteration: 180367
loss: 0.9513955116271973,grad_norm: 0.8699745754704863, iteration: 180368
loss: 1.0026342868804932,grad_norm: 0.9999992051425731, iteration: 180369
loss: 1.0027748346328735,grad_norm: 0.9025691719337549, iteration: 180370
loss: 0.9830639958381653,grad_norm: 0.9175211818879137, iteration: 180371
loss: 0.9774726629257202,grad_norm: 0.9999991121408159, iteration: 180372
loss: 0.9975783824920654,grad_norm: 0.9652503822015264, iteration: 180373
loss: 0.9878106117248535,grad_norm: 0.9999993312539979, iteration: 180374
loss: 0.9818150997161865,grad_norm: 0.8969038308835889, iteration: 180375
loss: 0.9900432825088501,grad_norm: 0.9999990815400502, iteration: 180376
loss: 1.0274443626403809,grad_norm: 0.9999992486952528, iteration: 180377
loss: 1.0285239219665527,grad_norm: 0.9999991422058404, iteration: 180378
loss: 0.9757875204086304,grad_norm: 0.9999990504391641, iteration: 180379
loss: 0.9736775159835815,grad_norm: 0.9808498723581873, iteration: 180380
loss: 1.0099457502365112,grad_norm: 0.9999990620447684, iteration: 180381
loss: 1.0159584283828735,grad_norm: 0.9661452510114465, iteration: 180382
loss: 0.9709882736206055,grad_norm: 0.9058980720713281, iteration: 180383
loss: 0.9761654138565063,grad_norm: 0.9196858090290764, iteration: 180384
loss: 1.0069485902786255,grad_norm: 0.9999991972457242, iteration: 180385
loss: 1.0128495693206787,grad_norm: 0.9999991493854781, iteration: 180386
loss: 1.000060796737671,grad_norm: 0.9999991004750811, iteration: 180387
loss: 1.0209461450576782,grad_norm: 0.8956349143200394, iteration: 180388
loss: 0.9914620518684387,grad_norm: 0.9999991499686587, iteration: 180389
loss: 1.0323357582092285,grad_norm: 0.8794607286029833, iteration: 180390
loss: 0.9926741123199463,grad_norm: 0.9999990809632655, iteration: 180391
loss: 1.0303230285644531,grad_norm: 0.9999991238170247, iteration: 180392
loss: 0.9861052632331848,grad_norm: 0.9999995577529855, iteration: 180393
loss: 1.0893465280532837,grad_norm: 0.9999991756099591, iteration: 180394
loss: 0.9714909195899963,grad_norm: 0.9999990641289871, iteration: 180395
loss: 1.0244590044021606,grad_norm: 0.9999990297999682, iteration: 180396
loss: 1.0072717666625977,grad_norm: 0.787254942898196, iteration: 180397
loss: 1.013019323348999,grad_norm: 0.9999995862928787, iteration: 180398
loss: 0.9807802438735962,grad_norm: 0.9999991848217276, iteration: 180399
loss: 0.9838992357254028,grad_norm: 0.8716429279582146, iteration: 180400
loss: 1.0267140865325928,grad_norm: 0.9999990394564378, iteration: 180401
loss: 0.9943299889564514,grad_norm: 0.9644424084909321, iteration: 180402
loss: 1.0221009254455566,grad_norm: 0.8306712029396572, iteration: 180403
loss: 0.9714990258216858,grad_norm: 0.9999991893618498, iteration: 180404
loss: 0.9933724403381348,grad_norm: 0.9782927451601645, iteration: 180405
loss: 0.967964231967926,grad_norm: 0.9275588318188454, iteration: 180406
loss: 0.9923208355903625,grad_norm: 0.9999990868325698, iteration: 180407
loss: 1.0381830930709839,grad_norm: 0.8983139973793557, iteration: 180408
loss: 0.9611307382583618,grad_norm: 0.8924610943552724, iteration: 180409
loss: 0.979174017906189,grad_norm: 0.8411463683604967, iteration: 180410
loss: 1.0151047706604004,grad_norm: 0.9999990663231927, iteration: 180411
loss: 0.9958405494689941,grad_norm: 0.9409124331470763, iteration: 180412
loss: 0.9896768927574158,grad_norm: 0.9999991309468288, iteration: 180413
loss: 0.9879249930381775,grad_norm: 0.9999992117578227, iteration: 180414
loss: 0.9840334057807922,grad_norm: 0.9999991410211687, iteration: 180415
loss: 0.9795219302177429,grad_norm: 0.8748902522900492, iteration: 180416
loss: 0.9854719638824463,grad_norm: 0.9656107486952262, iteration: 180417
loss: 1.0121560096740723,grad_norm: 0.9074141806186705, iteration: 180418
loss: 0.9935435652732849,grad_norm: 0.9999995531831967, iteration: 180419
loss: 0.976683497428894,grad_norm: 0.9999991803586241, iteration: 180420
loss: 1.0502156019210815,grad_norm: 0.9999996080297595, iteration: 180421
loss: 0.9633519649505615,grad_norm: 0.9212967791871302, iteration: 180422
loss: 0.9594602584838867,grad_norm: 0.9999991730833748, iteration: 180423
loss: 0.9855050444602966,grad_norm: 0.8886153172657142, iteration: 180424
loss: 1.0102671384811401,grad_norm: 0.9850970048961178, iteration: 180425
loss: 0.9757338166236877,grad_norm: 0.9999991655416012, iteration: 180426
loss: 1.0074814558029175,grad_norm: 0.9999990853091287, iteration: 180427
loss: 0.9907537698745728,grad_norm: 0.9766310623124084, iteration: 180428
loss: 1.0152950286865234,grad_norm: 0.9999990084518338, iteration: 180429
loss: 1.00034761428833,grad_norm: 0.9166731994915822, iteration: 180430
loss: 0.9978486895561218,grad_norm: 0.8374833758173441, iteration: 180431
loss: 1.0215073823928833,grad_norm: 0.9999991643828531, iteration: 180432
loss: 1.0381144285202026,grad_norm: 0.8758738722655638, iteration: 180433
loss: 1.0022791624069214,grad_norm: 0.9530576866714846, iteration: 180434
loss: 0.9741482734680176,grad_norm: 0.999999118425487, iteration: 180435
loss: 1.0363456010818481,grad_norm: 0.9344643772379372, iteration: 180436
loss: 0.9814454913139343,grad_norm: 0.9463180957391961, iteration: 180437
loss: 1.007595419883728,grad_norm: 0.9999989503702345, iteration: 180438
loss: 0.9892920255661011,grad_norm: 0.9999990593196441, iteration: 180439
loss: 1.0255374908447266,grad_norm: 0.9598638784278941, iteration: 180440
loss: 0.9730983376502991,grad_norm: 0.9510126260255726, iteration: 180441
loss: 1.0492430925369263,grad_norm: 0.9999996648061383, iteration: 180442
loss: 1.099533200263977,grad_norm: 0.9999993427681398, iteration: 180443
loss: 0.9948000311851501,grad_norm: 0.9999991641624049, iteration: 180444
loss: 1.013411521911621,grad_norm: 0.9999990055264123, iteration: 180445
loss: 0.9563313126564026,grad_norm: 0.9999991742366446, iteration: 180446
loss: 1.0057265758514404,grad_norm: 0.9263356496984747, iteration: 180447
loss: 0.9702082276344299,grad_norm: 0.9371313225429398, iteration: 180448
loss: 0.9807641506195068,grad_norm: 0.9999989959269844, iteration: 180449
loss: 0.9727981090545654,grad_norm: 0.9999992385138532, iteration: 180450
loss: 1.0103412866592407,grad_norm: 0.9999993636623311, iteration: 180451
loss: 1.0356013774871826,grad_norm: 0.995597763170822, iteration: 180452
loss: 1.0102803707122803,grad_norm: 0.9708716734706438, iteration: 180453
loss: 1.0023504495620728,grad_norm: 0.9999990508284284, iteration: 180454
loss: 0.9641798138618469,grad_norm: 0.9253726729169717, iteration: 180455
loss: 1.004753828048706,grad_norm: 0.8937779913535767, iteration: 180456
loss: 0.9898317456245422,grad_norm: 0.9626125811392651, iteration: 180457
loss: 0.9692168235778809,grad_norm: 0.9089972026210373, iteration: 180458
loss: 0.9783182740211487,grad_norm: 0.9216717299282753, iteration: 180459
loss: 1.0077050924301147,grad_norm: 0.999999248598266, iteration: 180460
loss: 0.985022783279419,grad_norm: 0.9828262779473798, iteration: 180461
loss: 1.0087898969650269,grad_norm: 0.9053227631446422, iteration: 180462
loss: 1.0123194456100464,grad_norm: 0.8766450736209987, iteration: 180463
loss: 0.9782962799072266,grad_norm: 0.8569301018603116, iteration: 180464
loss: 1.0142009258270264,grad_norm: 0.8457468727740839, iteration: 180465
loss: 0.9925881028175354,grad_norm: 0.9999992649930314, iteration: 180466
loss: 0.9871705174446106,grad_norm: 0.8339526185347871, iteration: 180467
loss: 0.9611198306083679,grad_norm: 0.9999990599766921, iteration: 180468
loss: 1.000784993171692,grad_norm: 0.9999990039467781, iteration: 180469
loss: 1.0002834796905518,grad_norm: 0.9041703256780559, iteration: 180470
loss: 0.9753932356834412,grad_norm: 0.9999991458319534, iteration: 180471
loss: 0.9582560062408447,grad_norm: 0.9947945188647839, iteration: 180472
loss: 1.0361734628677368,grad_norm: 0.9999993408854585, iteration: 180473
loss: 0.9871556758880615,grad_norm: 0.8621818251890125, iteration: 180474
loss: 1.0240994691848755,grad_norm: 0.999999068897969, iteration: 180475
loss: 0.9873332381248474,grad_norm: 0.9999991437942873, iteration: 180476
loss: 0.9840409755706787,grad_norm: 0.9443042768139768, iteration: 180477
loss: 1.143160343170166,grad_norm: 0.9999995543673651, iteration: 180478
loss: 1.1759198904037476,grad_norm: 0.9999991448695131, iteration: 180479
loss: 1.0184049606323242,grad_norm: 0.9999990193754241, iteration: 180480
loss: 1.0076481103897095,grad_norm: 0.9712822911187011, iteration: 180481
loss: 0.9949812889099121,grad_norm: 0.9999989968559801, iteration: 180482
loss: 0.949961245059967,grad_norm: 0.9400831025310251, iteration: 180483
loss: 0.9805594682693481,grad_norm: 0.9999991035535734, iteration: 180484
loss: 0.969057023525238,grad_norm: 0.9999990313809011, iteration: 180485
loss: 1.0073738098144531,grad_norm: 0.9999994173688256, iteration: 180486
loss: 0.9826939105987549,grad_norm: 0.9999992528308875, iteration: 180487
loss: 1.0077906847000122,grad_norm: 0.9999990891691998, iteration: 180488
loss: 0.998465359210968,grad_norm: 0.885136853948458, iteration: 180489
loss: 0.9888648390769958,grad_norm: 0.8671581633438199, iteration: 180490
loss: 0.9903885126113892,grad_norm: 0.9999992610436647, iteration: 180491
loss: 1.011846661567688,grad_norm: 0.9999991184057913, iteration: 180492
loss: 0.9996616244316101,grad_norm: 0.832670889020308, iteration: 180493
loss: 1.008095622062683,grad_norm: 0.9999990447711764, iteration: 180494
loss: 0.9947178959846497,grad_norm: 0.962591474747445, iteration: 180495
loss: 0.9865206480026245,grad_norm: 0.9729406687696098, iteration: 180496
loss: 0.9657203555107117,grad_norm: 0.9914623404414702, iteration: 180497
loss: 0.9896056652069092,grad_norm: 0.8413285900496138, iteration: 180498
loss: 1.021544337272644,grad_norm: 0.9999990763675193, iteration: 180499
loss: 1.0096452236175537,grad_norm: 0.8463556781107812, iteration: 180500
loss: 0.9865270853042603,grad_norm: 0.8943773503850136, iteration: 180501
loss: 0.9898127317428589,grad_norm: 0.9999990938949261, iteration: 180502
loss: 1.028106927871704,grad_norm: 0.99999966946748, iteration: 180503
loss: 0.9752300381660461,grad_norm: 0.933725849372292, iteration: 180504
loss: 1.1498793363571167,grad_norm: 0.9999990407087442, iteration: 180505
loss: 0.9710924029350281,grad_norm: 0.984701754738982, iteration: 180506
loss: 1.007968544960022,grad_norm: 0.9999990887642727, iteration: 180507
loss: 1.0117417573928833,grad_norm: 0.9999991032526392, iteration: 180508
loss: 1.026819109916687,grad_norm: 0.8799955828353367, iteration: 180509
loss: 1.0246853828430176,grad_norm: 0.9999990106528703, iteration: 180510
loss: 1.055320382118225,grad_norm: 0.9999995129447781, iteration: 180511
loss: 1.043594241142273,grad_norm: 0.9999999771154349, iteration: 180512
loss: 0.9898092150688171,grad_norm: 0.9682437496979343, iteration: 180513
loss: 0.9484990239143372,grad_norm: 0.9999992060543446, iteration: 180514
loss: 1.0918140411376953,grad_norm: 0.9999990794542519, iteration: 180515
loss: 0.9971585869789124,grad_norm: 0.9983727031902604, iteration: 180516
loss: 1.110429048538208,grad_norm: 0.9999993632464902, iteration: 180517
loss: 0.9806113243103027,grad_norm: 0.999998993238706, iteration: 180518
loss: 1.005568265914917,grad_norm: 0.9449310334748751, iteration: 180519
loss: 1.0583854913711548,grad_norm: 0.9999995941861729, iteration: 180520
loss: 1.0474276542663574,grad_norm: 0.999999497014775, iteration: 180521
loss: 1.176512360572815,grad_norm: 0.9999998105583356, iteration: 180522
loss: 1.0300166606903076,grad_norm: 0.9999991225723824, iteration: 180523
loss: 1.0284678936004639,grad_norm: 0.9999991453609394, iteration: 180524
loss: 0.9910080432891846,grad_norm: 0.9617312781168482, iteration: 180525
loss: 1.0061275959014893,grad_norm: 0.9999990986463948, iteration: 180526
loss: 0.9948313236236572,grad_norm: 0.8243395677743561, iteration: 180527
loss: 1.0402171611785889,grad_norm: 0.9821255501312346, iteration: 180528
loss: 1.1270147562026978,grad_norm: 0.9999999062239795, iteration: 180529
loss: 0.9817160367965698,grad_norm: 0.999999133057543, iteration: 180530
loss: 1.006929874420166,grad_norm: 0.9023081418447185, iteration: 180531
loss: 1.143464207649231,grad_norm: 0.9999995020365072, iteration: 180532
loss: 1.0404994487762451,grad_norm: 0.9548086354219223, iteration: 180533
loss: 0.9770830869674683,grad_norm: 0.9999988964024643, iteration: 180534
loss: 0.9432225823402405,grad_norm: 0.999999277522197, iteration: 180535
loss: 1.0512638092041016,grad_norm: 0.9999997661571077, iteration: 180536
loss: 0.9959432482719421,grad_norm: 0.999999232994809, iteration: 180537
loss: 1.0302174091339111,grad_norm: 0.9999991114725751, iteration: 180538
loss: 0.979918360710144,grad_norm: 0.9999991202504076, iteration: 180539
loss: 1.0173139572143555,grad_norm: 0.9883391678088809, iteration: 180540
loss: 1.0627497434616089,grad_norm: 0.9999999277761678, iteration: 180541
loss: 1.0239354372024536,grad_norm: 0.9754303425478769, iteration: 180542
loss: 1.0594346523284912,grad_norm: 0.9999990811468447, iteration: 180543
loss: 1.283223032951355,grad_norm: 0.9999995196930455, iteration: 180544
loss: 1.154968500137329,grad_norm: 0.9999999299983076, iteration: 180545
loss: 1.0292304754257202,grad_norm: 0.9999995237423953, iteration: 180546
loss: 1.1062040328979492,grad_norm: 0.9999993444011193, iteration: 180547
loss: 1.0197941064834595,grad_norm: 0.9493672136359986, iteration: 180548
loss: 1.3406604528427124,grad_norm: 0.9999999084703189, iteration: 180549
loss: 0.9655775427818298,grad_norm: 0.9999990885015547, iteration: 180550
loss: 1.0378544330596924,grad_norm: 0.8648582073958253, iteration: 180551
loss: 1.0016556978225708,grad_norm: 0.9225778153688602, iteration: 180552
loss: 1.0248689651489258,grad_norm: 0.99999906716816, iteration: 180553
loss: 1.0065717697143555,grad_norm: 0.7716092390279432, iteration: 180554
loss: 1.0896203517913818,grad_norm: 1.0000000230668091, iteration: 180555
loss: 1.1384550333023071,grad_norm: 0.9999997801727452, iteration: 180556
loss: 1.13599693775177,grad_norm: 0.9999998871697562, iteration: 180557
loss: 1.0125622749328613,grad_norm: 0.9999991310515621, iteration: 180558
loss: 1.064468264579773,grad_norm: 0.9999994789795956, iteration: 180559
loss: 1.130186676979065,grad_norm: 0.9999993713074957, iteration: 180560
loss: 1.046807885169983,grad_norm: 0.999999470032837, iteration: 180561
loss: 1.1083574295043945,grad_norm: 0.9999992281283981, iteration: 180562
loss: 1.0704327821731567,grad_norm: 0.9999993354526444, iteration: 180563
loss: 1.0760735273361206,grad_norm: 0.9999996278478747, iteration: 180564
loss: 0.9986506700515747,grad_norm: 0.9013411711390059, iteration: 180565
loss: 1.0023605823516846,grad_norm: 0.9539551101392493, iteration: 180566
loss: 1.0535142421722412,grad_norm: 0.9999991283217761, iteration: 180567
loss: 1.1003366708755493,grad_norm: 0.9999999760487087, iteration: 180568
loss: 1.0292686223983765,grad_norm: 0.9999999390512768, iteration: 180569
loss: 1.1340521574020386,grad_norm: 0.9999995207496026, iteration: 180570
loss: 1.055295705795288,grad_norm: 0.943658617750821, iteration: 180571
loss: 0.9679699540138245,grad_norm: 0.857467249448115, iteration: 180572
loss: 1.0054210424423218,grad_norm: 0.9759921472612401, iteration: 180573
loss: 0.9936621785163879,grad_norm: 0.999998993951772, iteration: 180574
loss: 0.9951013326644897,grad_norm: 0.9478037973186475, iteration: 180575
loss: 0.9867154359817505,grad_norm: 0.9999990635837771, iteration: 180576
loss: 1.0864973068237305,grad_norm: 0.9999996891633427, iteration: 180577
loss: 1.0389080047607422,grad_norm: 0.9999995874014073, iteration: 180578
loss: 0.9741605520248413,grad_norm: 0.999999261261145, iteration: 180579
loss: 1.0074416399002075,grad_norm: 0.9999995586240278, iteration: 180580
loss: 0.9951762557029724,grad_norm: 0.9158859517170143, iteration: 180581
loss: 1.0281225442886353,grad_norm: 0.903748821294184, iteration: 180582
loss: 1.0202131271362305,grad_norm: 0.9999990871162034, iteration: 180583
loss: 1.025399088859558,grad_norm: 0.9999996604369954, iteration: 180584
loss: 0.9944871664047241,grad_norm: 0.948122904691511, iteration: 180585
loss: 1.0018599033355713,grad_norm: 0.9217387122833303, iteration: 180586
loss: 0.9932908415794373,grad_norm: 0.9303850403828138, iteration: 180587
loss: 0.9997633695602417,grad_norm: 0.9884322442632199, iteration: 180588
loss: 1.0742429494857788,grad_norm: 0.9127386330799748, iteration: 180589
loss: 0.9991905093193054,grad_norm: 0.9691095745197494, iteration: 180590
loss: 0.984578013420105,grad_norm: 0.9512333930128627, iteration: 180591
loss: 1.0991393327713013,grad_norm: 0.9999993020494746, iteration: 180592
loss: 1.0011588335037231,grad_norm: 0.9522434058019424, iteration: 180593
loss: 0.9848053455352783,grad_norm: 0.9999990426488161, iteration: 180594
loss: 1.0356966257095337,grad_norm: 0.9920575733313555, iteration: 180595
loss: 1.0358071327209473,grad_norm: 0.9999992405986046, iteration: 180596
loss: 1.0753782987594604,grad_norm: 0.9999997904324264, iteration: 180597
loss: 0.986064612865448,grad_norm: 0.9429457245422347, iteration: 180598
loss: 1.0316179990768433,grad_norm: 0.9999990054908654, iteration: 180599
loss: 0.9839762449264526,grad_norm: 0.9659087647789832, iteration: 180600
loss: 1.067530632019043,grad_norm: 0.9999995348547828, iteration: 180601
loss: 1.05463707447052,grad_norm: 0.9999994727679313, iteration: 180602
loss: 1.0051075220108032,grad_norm: 0.8964682782015599, iteration: 180603
loss: 0.9826099872589111,grad_norm: 0.9807738943957286, iteration: 180604
loss: 0.9618541598320007,grad_norm: 0.999999164067545, iteration: 180605
loss: 1.1011073589324951,grad_norm: 0.9999996554080743, iteration: 180606
loss: 1.02553391456604,grad_norm: 0.9999989692314388, iteration: 180607
loss: 1.023112177848816,grad_norm: 0.9999990512670468, iteration: 180608
loss: 0.9698919653892517,grad_norm: 0.9618893026759634, iteration: 180609
loss: 0.9995955228805542,grad_norm: 0.9999989656800035, iteration: 180610
loss: 0.9974321722984314,grad_norm: 0.9999991746275692, iteration: 180611
loss: 1.0394169092178345,grad_norm: 0.9999998140930604, iteration: 180612
loss: 1.0115628242492676,grad_norm: 0.9999990627049833, iteration: 180613
loss: 0.9856054186820984,grad_norm: 0.9999992097945628, iteration: 180614
loss: 1.0048131942749023,grad_norm: 0.9999992532853249, iteration: 180615
loss: 1.0579391717910767,grad_norm: 0.9999993153277863, iteration: 180616
loss: 0.9997222423553467,grad_norm: 0.9999992404349501, iteration: 180617
loss: 0.9328451156616211,grad_norm: 0.9999992114524402, iteration: 180618
loss: 0.9771081805229187,grad_norm: 0.9999990602686322, iteration: 180619
loss: 1.0104937553405762,grad_norm: 0.9999990090304375, iteration: 180620
loss: 0.9747163653373718,grad_norm: 0.9999992590253681, iteration: 180621
loss: 1.066423773765564,grad_norm: 0.9999989558382302, iteration: 180622
loss: 1.0397913455963135,grad_norm: 0.999999324155919, iteration: 180623
loss: 1.0175347328186035,grad_norm: 0.8895017893353385, iteration: 180624
loss: 1.0909104347229004,grad_norm: 0.9999998121061602, iteration: 180625
loss: 1.0250365734100342,grad_norm: 0.9999992655073796, iteration: 180626
loss: 0.9670934677124023,grad_norm: 0.9589830230226599, iteration: 180627
loss: 1.0824146270751953,grad_norm: 0.9999994861034597, iteration: 180628
loss: 1.0234335660934448,grad_norm: 0.9999992130441959, iteration: 180629
loss: 1.0259042978286743,grad_norm: 0.8245086936527415, iteration: 180630
loss: 1.0167616605758667,grad_norm: 0.8987886077757734, iteration: 180631
loss: 1.0007317066192627,grad_norm: 0.9999990686589801, iteration: 180632
loss: 1.0608959197998047,grad_norm: 0.9999993599059475, iteration: 180633
loss: 1.0305932760238647,grad_norm: 0.9999991020682771, iteration: 180634
loss: 0.9946796894073486,grad_norm: 0.9901317326109315, iteration: 180635
loss: 0.9774647951126099,grad_norm: 0.9999991213361382, iteration: 180636
loss: 0.9890182614326477,grad_norm: 0.9460812966709624, iteration: 180637
loss: 0.9743946194648743,grad_norm: 0.989625979921099, iteration: 180638
loss: 0.9792680740356445,grad_norm: 0.9519892347001414, iteration: 180639
loss: 1.0023837089538574,grad_norm: 0.8925728395610444, iteration: 180640
loss: 1.0330570936203003,grad_norm: 0.9999989682421944, iteration: 180641
loss: 0.9624494314193726,grad_norm: 0.9999989711949148, iteration: 180642
loss: 0.9906291365623474,grad_norm: 0.9999994431497813, iteration: 180643
loss: 0.9969375729560852,grad_norm: 0.9999990301879479, iteration: 180644
loss: 1.0030399560928345,grad_norm: 0.9999992303842536, iteration: 180645
loss: 0.9885507225990295,grad_norm: 0.9999991189149146, iteration: 180646
loss: 0.9689396619796753,grad_norm: 0.9999993305794953, iteration: 180647
loss: 0.9967665672302246,grad_norm: 0.970044503005586, iteration: 180648
loss: 1.0144398212432861,grad_norm: 0.9067445483701657, iteration: 180649
loss: 0.9883292317390442,grad_norm: 0.9999993615278481, iteration: 180650
loss: 1.0327953100204468,grad_norm: 0.9999992276387565, iteration: 180651
loss: 1.0023387670516968,grad_norm: 0.9999991383984639, iteration: 180652
loss: 1.0042362213134766,grad_norm: 0.9999989490681512, iteration: 180653
loss: 1.0886995792388916,grad_norm: 0.9999997808520856, iteration: 180654
loss: 1.0255964994430542,grad_norm: 0.9999991394174251, iteration: 180655
loss: 0.980929970741272,grad_norm: 0.9147930623742551, iteration: 180656
loss: 0.9693790674209595,grad_norm: 0.9999992199272542, iteration: 180657
loss: 0.963641881942749,grad_norm: 0.9999989720452214, iteration: 180658
loss: 1.0008171796798706,grad_norm: 0.9451213840486374, iteration: 180659
loss: 1.0187766551971436,grad_norm: 0.9999990187903385, iteration: 180660
loss: 0.9803077578544617,grad_norm: 0.999067912659076, iteration: 180661
loss: 1.0795999765396118,grad_norm: 0.9999996846600804, iteration: 180662
loss: 1.0124189853668213,grad_norm: 0.9999992157826407, iteration: 180663
loss: 1.0095727443695068,grad_norm: 0.9999990971609576, iteration: 180664
loss: 1.1841130256652832,grad_norm: 0.9999992274585424, iteration: 180665
loss: 0.993129312992096,grad_norm: 0.9149823528304017, iteration: 180666
loss: 1.0117428302764893,grad_norm: 0.9999991181421156, iteration: 180667
loss: 1.0082311630249023,grad_norm: 0.9999991616453232, iteration: 180668
loss: 1.043865442276001,grad_norm: 0.9999995132315006, iteration: 180669
loss: 1.0132207870483398,grad_norm: 0.966040749510141, iteration: 180670
loss: 1.0018284320831299,grad_norm: 0.9999995609335867, iteration: 180671
loss: 1.0015138387680054,grad_norm: 0.9274717128430361, iteration: 180672
loss: 1.0427355766296387,grad_norm: 0.9999993840726382, iteration: 180673
loss: 1.0163960456848145,grad_norm: 0.9999993939828803, iteration: 180674
loss: 1.0232863426208496,grad_norm: 0.9944968820024614, iteration: 180675
loss: 1.0071241855621338,grad_norm: 0.9999991305647495, iteration: 180676
loss: 1.0297017097473145,grad_norm: 0.9999990734550669, iteration: 180677
loss: 1.015876293182373,grad_norm: 0.9999991858395509, iteration: 180678
loss: 0.9944407343864441,grad_norm: 0.9710453748531926, iteration: 180679
loss: 1.0243924856185913,grad_norm: 0.9202462930655506, iteration: 180680
loss: 1.0272427797317505,grad_norm: 0.9541476664920451, iteration: 180681
loss: 0.9763835668563843,grad_norm: 0.95015243591685, iteration: 180682
loss: 1.0215392112731934,grad_norm: 0.9559289491835548, iteration: 180683
loss: 1.0096263885498047,grad_norm: 0.9999990937321654, iteration: 180684
loss: 0.9795047640800476,grad_norm: 0.8784574832849233, iteration: 180685
loss: 1.0438686609268188,grad_norm: 0.9999998808657856, iteration: 180686
loss: 1.046061396598816,grad_norm: 0.9048624232065198, iteration: 180687
loss: 0.9933177828788757,grad_norm: 0.9999990137709739, iteration: 180688
loss: 0.9638499021530151,grad_norm: 0.9127502832775316, iteration: 180689
loss: 1.0532306432724,grad_norm: 0.9999991619641105, iteration: 180690
loss: 1.0242747068405151,grad_norm: 0.9999990564301959, iteration: 180691
loss: 1.0068813562393188,grad_norm: 0.999999121519108, iteration: 180692
loss: 0.9912027716636658,grad_norm: 0.9999990746454065, iteration: 180693
loss: 1.023940086364746,grad_norm: 0.999999087239292, iteration: 180694
loss: 1.0010216236114502,grad_norm: 0.9999991813366594, iteration: 180695
loss: 0.9955340623855591,grad_norm: 0.9999991115443588, iteration: 180696
loss: 1.0179983377456665,grad_norm: 0.969543250688845, iteration: 180697
loss: 1.003077507019043,grad_norm: 0.9999992640978688, iteration: 180698
loss: 1.001665472984314,grad_norm: 0.9999991720305627, iteration: 180699
loss: 1.034556269645691,grad_norm: 0.9999999709638028, iteration: 180700
loss: 1.027248740196228,grad_norm: 0.9999991596335416, iteration: 180701
loss: 1.0070284605026245,grad_norm: 0.9116242418591498, iteration: 180702
loss: 0.9869871139526367,grad_norm: 0.999999359775578, iteration: 180703
loss: 1.0028467178344727,grad_norm: 0.9932622618495349, iteration: 180704
loss: 0.980682373046875,grad_norm: 0.9483004938004914, iteration: 180705
loss: 1.0376436710357666,grad_norm: 0.9999994049018309, iteration: 180706
loss: 1.0144836902618408,grad_norm: 0.9126101102082681, iteration: 180707
loss: 0.9539823532104492,grad_norm: 0.9651011359208163, iteration: 180708
loss: 1.010923147201538,grad_norm: 0.9409715067692416, iteration: 180709
loss: 0.9606649875640869,grad_norm: 0.9600503093915849, iteration: 180710
loss: 1.0113165378570557,grad_norm: 0.9999991877756499, iteration: 180711
loss: 0.9896631240844727,grad_norm: 0.8039832618068613, iteration: 180712
loss: 0.9942925572395325,grad_norm: 0.9999990584484988, iteration: 180713
loss: 1.0012685060501099,grad_norm: 0.9999989897477394, iteration: 180714
loss: 0.9887831807136536,grad_norm: 0.9999990800213577, iteration: 180715
loss: 0.9738903641700745,grad_norm: 0.9999989482610407, iteration: 180716
loss: 1.0846730470657349,grad_norm: 0.9999995372079478, iteration: 180717
loss: 1.0202094316482544,grad_norm: 0.9999990704624153, iteration: 180718
loss: 1.0869582891464233,grad_norm: 0.9999993496555043, iteration: 180719
loss: 0.9974488019943237,grad_norm: 0.9999991098569898, iteration: 180720
loss: 1.0393502712249756,grad_norm: 0.999999276301293, iteration: 180721
loss: 1.0420467853546143,grad_norm: 0.9999992273027986, iteration: 180722
loss: 1.0181678533554077,grad_norm: 0.9999992624995486, iteration: 180723
loss: 1.0287069082260132,grad_norm: 0.9999998273553593, iteration: 180724
loss: 1.0380064249038696,grad_norm: 0.947573294157565, iteration: 180725
loss: 1.0510332584381104,grad_norm: 0.9999997533414173, iteration: 180726
loss: 1.0017919540405273,grad_norm: 0.9999990949795641, iteration: 180727
loss: 1.002703309059143,grad_norm: 0.9999990574137082, iteration: 180728
loss: 1.067421317100525,grad_norm: 0.9999992644206966, iteration: 180729
loss: 1.0414516925811768,grad_norm: 0.9999991372960664, iteration: 180730
loss: 1.0433290004730225,grad_norm: 0.9999990487198377, iteration: 180731
loss: 1.0095642805099487,grad_norm: 0.9999991424544006, iteration: 180732
loss: 1.0921202898025513,grad_norm: 0.9999994321851862, iteration: 180733
loss: 1.0374951362609863,grad_norm: 0.8808235895520901, iteration: 180734
loss: 0.9909210801124573,grad_norm: 0.9961851963903057, iteration: 180735
loss: 1.1191890239715576,grad_norm: 0.9999993341904823, iteration: 180736
loss: 1.024623990058899,grad_norm: 0.9999997825837749, iteration: 180737
loss: 1.0323647260665894,grad_norm: 0.9999992235004727, iteration: 180738
loss: 0.9916735887527466,grad_norm: 0.8312848360678724, iteration: 180739
loss: 1.004431128501892,grad_norm: 0.9999992164508258, iteration: 180740
loss: 1.0775153636932373,grad_norm: 0.9999996691082151, iteration: 180741
loss: 0.9889246225357056,grad_norm: 0.9140529006725003, iteration: 180742
loss: 1.0214463472366333,grad_norm: 0.9999991821868089, iteration: 180743
loss: 1.0027936697006226,grad_norm: 0.9221293679687249, iteration: 180744
loss: 1.0300025939941406,grad_norm: 0.9999993573178445, iteration: 180745
loss: 1.0278733968734741,grad_norm: 0.9237851790513085, iteration: 180746
loss: 1.0061898231506348,grad_norm: 0.999998996695179, iteration: 180747
loss: 0.9838355183601379,grad_norm: 0.9316825905635261, iteration: 180748
loss: 0.9781850576400757,grad_norm: 0.9999990978128376, iteration: 180749
loss: 0.9786179661750793,grad_norm: 0.9999990493850203, iteration: 180750
loss: 1.003523826599121,grad_norm: 0.9999996683058464, iteration: 180751
loss: 0.9850775003433228,grad_norm: 0.999999195923623, iteration: 180752
loss: 0.952393651008606,grad_norm: 0.9999999316388103, iteration: 180753
loss: 0.9856092929840088,grad_norm: 0.9999990628811342, iteration: 180754
loss: 1.0065948963165283,grad_norm: 0.9813759589008332, iteration: 180755
loss: 1.0178555250167847,grad_norm: 0.9999994446795545, iteration: 180756
loss: 1.016855239868164,grad_norm: 0.9971794035805565, iteration: 180757
loss: 1.021047830581665,grad_norm: 0.9999992984742009, iteration: 180758
loss: 1.0369499921798706,grad_norm: 0.9999993185350838, iteration: 180759
loss: 1.0271480083465576,grad_norm: 0.860091265423701, iteration: 180760
loss: 1.012460708618164,grad_norm: 0.999999290577289, iteration: 180761
loss: 1.062740683555603,grad_norm: 0.9999990719633144, iteration: 180762
loss: 1.0001897811889648,grad_norm: 0.914237902623699, iteration: 180763
loss: 1.0363975763320923,grad_norm: 0.999999187536789, iteration: 180764
loss: 1.1148498058319092,grad_norm: 0.9999995579149125, iteration: 180765
loss: 1.006516456604004,grad_norm: 0.9278428730981526, iteration: 180766
loss: 0.9981139302253723,grad_norm: 0.8591815297135853, iteration: 180767
loss: 0.9921518564224243,grad_norm: 0.9905370434921188, iteration: 180768
loss: 0.9977884292602539,grad_norm: 0.9774479205632175, iteration: 180769
loss: 1.0134453773498535,grad_norm: 0.999998936050671, iteration: 180770
loss: 1.0525636672973633,grad_norm: 0.9921141767780131, iteration: 180771
loss: 1.0404984951019287,grad_norm: 0.9999992794674548, iteration: 180772
loss: 1.006856083869934,grad_norm: 0.9999992252443652, iteration: 180773
loss: 1.0134671926498413,grad_norm: 0.8566355887396186, iteration: 180774
loss: 0.9730730652809143,grad_norm: 0.9742876353945419, iteration: 180775
loss: 0.9847859740257263,grad_norm: 0.935531258495266, iteration: 180776
loss: 0.9494810104370117,grad_norm: 0.9528726850690679, iteration: 180777
loss: 1.00642728805542,grad_norm: 0.9287085692214434, iteration: 180778
loss: 0.9877582788467407,grad_norm: 0.9999991479391092, iteration: 180779
loss: 0.9954742193222046,grad_norm: 0.9864889619286493, iteration: 180780
loss: 0.9757606387138367,grad_norm: 0.9999991247936236, iteration: 180781
loss: 0.9958009719848633,grad_norm: 0.9383319607281153, iteration: 180782
loss: 1.0103648900985718,grad_norm: 0.9999993044814631, iteration: 180783
loss: 1.011387586593628,grad_norm: 0.9847544328427785, iteration: 180784
loss: 1.0301358699798584,grad_norm: 0.9999991420283237, iteration: 180785
loss: 1.0123705863952637,grad_norm: 0.9999991337438067, iteration: 180786
loss: 0.999279797077179,grad_norm: 0.8782700743468418, iteration: 180787
loss: 0.97959303855896,grad_norm: 0.8624815153877893, iteration: 180788
loss: 0.9831362962722778,grad_norm: 0.995130119752829, iteration: 180789
loss: 0.994358479976654,grad_norm: 0.9414249013739866, iteration: 180790
loss: 1.0006681680679321,grad_norm: 0.9033413866731304, iteration: 180791
loss: 0.9932235479354858,grad_norm: 0.8622939498585043, iteration: 180792
loss: 1.0076907873153687,grad_norm: 0.9107269286361797, iteration: 180793
loss: 1.0365104675292969,grad_norm: 0.9999991065073831, iteration: 180794
loss: 1.0055623054504395,grad_norm: 0.9999990572660862, iteration: 180795
loss: 0.9869563579559326,grad_norm: 0.9897469781397372, iteration: 180796
loss: 0.9704065322875977,grad_norm: 0.9999991055344991, iteration: 180797
loss: 1.0201175212860107,grad_norm: 0.9999991684981738, iteration: 180798
loss: 1.019640564918518,grad_norm: 0.9999992806770094, iteration: 180799
loss: 0.968932569026947,grad_norm: 0.9999990203799872, iteration: 180800
loss: 1.030594825744629,grad_norm: 0.9064592995202335, iteration: 180801
loss: 0.9928748607635498,grad_norm: 0.9823819626179676, iteration: 180802
loss: 1.0062044858932495,grad_norm: 0.9999990522388573, iteration: 180803
loss: 1.0001221895217896,grad_norm: 0.9946184850512264, iteration: 180804
loss: 0.996358335018158,grad_norm: 0.9999991990285139, iteration: 180805
loss: 0.9996775388717651,grad_norm: 0.9999994903471613, iteration: 180806
loss: 1.1582382917404175,grad_norm: 0.9999996398851625, iteration: 180807
loss: 1.0370861291885376,grad_norm: 0.8722535410622172, iteration: 180808
loss: 1.0194108486175537,grad_norm: 0.9999995375728762, iteration: 180809
loss: 0.9936012625694275,grad_norm: 0.8577599999229036, iteration: 180810
loss: 0.9943830966949463,grad_norm: 0.9587059652282215, iteration: 180811
loss: 0.9653844833374023,grad_norm: 0.8386035516680345, iteration: 180812
loss: 0.9928231239318848,grad_norm: 0.9531024390980721, iteration: 180813
loss: 0.9835832715034485,grad_norm: 0.9999990774756502, iteration: 180814
loss: 0.980828046798706,grad_norm: 0.999999258831266, iteration: 180815
loss: 1.036138653755188,grad_norm: 0.9999992076142363, iteration: 180816
loss: 0.992417573928833,grad_norm: 0.9798051164793466, iteration: 180817
loss: 0.999591052532196,grad_norm: 0.9999992549709668, iteration: 180818
loss: 1.0141582489013672,grad_norm: 0.9523497322443621, iteration: 180819
loss: 1.0244743824005127,grad_norm: 0.9999991566597662, iteration: 180820
loss: 1.0195280313491821,grad_norm: 0.8348035550882243, iteration: 180821
loss: 1.011419415473938,grad_norm: 0.8690812859243439, iteration: 180822
loss: 1.0192927122116089,grad_norm: 0.9999991331714414, iteration: 180823
loss: 1.002790927886963,grad_norm: 0.9839639152926853, iteration: 180824
loss: 0.9771600961685181,grad_norm: 0.9716222948686946, iteration: 180825
loss: 1.023159384727478,grad_norm: 0.9785204423209393, iteration: 180826
loss: 0.9641416668891907,grad_norm: 0.9999995885388717, iteration: 180827
loss: 1.037028431892395,grad_norm: 0.9999992030075798, iteration: 180828
loss: 1.0033804178237915,grad_norm: 0.9999990805369763, iteration: 180829
loss: 0.9662006497383118,grad_norm: 0.999999044901488, iteration: 180830
loss: 1.0093498229980469,grad_norm: 0.9327250587681483, iteration: 180831
loss: 0.9798106551170349,grad_norm: 0.9999992334306288, iteration: 180832
loss: 1.0091862678527832,grad_norm: 0.9993119469971606, iteration: 180833
loss: 1.0070027112960815,grad_norm: 0.9999991410523548, iteration: 180834
loss: 1.0552400350570679,grad_norm: 0.9999989681630407, iteration: 180835
loss: 0.9750470519065857,grad_norm: 0.9999989971845455, iteration: 180836
loss: 1.0412925481796265,grad_norm: 0.9999993606085847, iteration: 180837
loss: 1.0004079341888428,grad_norm: 0.9999991235528035, iteration: 180838
loss: 1.0173176527023315,grad_norm: 0.9999991892167016, iteration: 180839
loss: 1.0028526782989502,grad_norm: 0.9749877681894854, iteration: 180840
loss: 1.0218039751052856,grad_norm: 0.8544764510333153, iteration: 180841
loss: 1.0003266334533691,grad_norm: 0.9999991697337053, iteration: 180842
loss: 0.9892869591712952,grad_norm: 0.8767244621816831, iteration: 180843
loss: 1.121561884880066,grad_norm: 0.9999995426184954, iteration: 180844
loss: 0.9907412528991699,grad_norm: 0.9741612639074481, iteration: 180845
loss: 0.9980903267860413,grad_norm: 0.9999991547763422, iteration: 180846
loss: 0.9995191693305969,grad_norm: 0.9999992363372616, iteration: 180847
loss: 1.0109449625015259,grad_norm: 0.9999989947886583, iteration: 180848
loss: 1.0040175914764404,grad_norm: 0.856133849228455, iteration: 180849
loss: 1.001634120941162,grad_norm: 0.9071439276997227, iteration: 180850
loss: 1.013407826423645,grad_norm: 0.8332056468582031, iteration: 180851
loss: 0.9963914752006531,grad_norm: 0.9611126914894473, iteration: 180852
loss: 1.0314840078353882,grad_norm: 0.9999994011626209, iteration: 180853
loss: 0.9603870511054993,grad_norm: 0.9454052900511775, iteration: 180854
loss: 0.9981763362884521,grad_norm: 0.9173995288783133, iteration: 180855
loss: 0.9985857605934143,grad_norm: 0.9886374157326411, iteration: 180856
loss: 1.025041103363037,grad_norm: 0.8746850078228857, iteration: 180857
loss: 1.0285239219665527,grad_norm: 0.99999947178633, iteration: 180858
loss: 1.1026166677474976,grad_norm: 0.9999997883770007, iteration: 180859
loss: 0.9625868797302246,grad_norm: 0.8902292516367255, iteration: 180860
loss: 1.0111573934555054,grad_norm: 0.9999998973718947, iteration: 180861
loss: 1.0064456462860107,grad_norm: 0.9999991391986347, iteration: 180862
loss: 1.0129799842834473,grad_norm: 0.9999990545913939, iteration: 180863
loss: 1.0023138523101807,grad_norm: 0.8676340922866389, iteration: 180864
loss: 0.9962977170944214,grad_norm: 0.9762734743267092, iteration: 180865
loss: 0.9943603277206421,grad_norm: 0.9871991445577181, iteration: 180866
loss: 1.0409233570098877,grad_norm: 0.9150532007745824, iteration: 180867
loss: 1.0048555135726929,grad_norm: 0.9439139563442122, iteration: 180868
loss: 1.0037325620651245,grad_norm: 0.808175611249275, iteration: 180869
loss: 1.018349528312683,grad_norm: 0.8555518824827855, iteration: 180870
loss: 1.0150879621505737,grad_norm: 0.9956975193238837, iteration: 180871
loss: 1.0295404195785522,grad_norm: 0.9999990988400038, iteration: 180872
loss: 1.0139482021331787,grad_norm: 0.9999991909682696, iteration: 180873
loss: 1.0321296453475952,grad_norm: 0.9999995960819995, iteration: 180874
loss: 1.0074397325515747,grad_norm: 0.9234088196322046, iteration: 180875
loss: 1.00436270236969,grad_norm: 0.9999989950030298, iteration: 180876
loss: 0.9775131940841675,grad_norm: 0.8775728790869388, iteration: 180877
loss: 0.939691424369812,grad_norm: 0.9999990072424336, iteration: 180878
loss: 0.9866990447044373,grad_norm: 0.9813849323819848, iteration: 180879
loss: 1.0130945444107056,grad_norm: 0.999999178087903, iteration: 180880
loss: 1.0179319381713867,grad_norm: 0.9767083102420182, iteration: 180881
loss: 1.032918095588684,grad_norm: 0.9999990581208389, iteration: 180882
loss: 1.0120142698287964,grad_norm: 0.9999991388645885, iteration: 180883
loss: 0.9573764801025391,grad_norm: 0.9999990866543197, iteration: 180884
loss: 0.9937532544136047,grad_norm: 0.9523183365045139, iteration: 180885
loss: 0.9951073527336121,grad_norm: 0.9049630210109924, iteration: 180886
loss: 1.0076934099197388,grad_norm: 0.922142227590611, iteration: 180887
loss: 0.9851074814796448,grad_norm: 0.9999991649917244, iteration: 180888
loss: 0.993492603302002,grad_norm: 0.9999991501137621, iteration: 180889
loss: 1.0206794738769531,grad_norm: 0.9700803108900238, iteration: 180890
loss: 1.0054295063018799,grad_norm: 0.8772496298985921, iteration: 180891
loss: 0.9808712601661682,grad_norm: 0.9999990192424959, iteration: 180892
loss: 0.9928991198539734,grad_norm: 0.8912571775726043, iteration: 180893
loss: 0.9944729804992676,grad_norm: 0.8157916683307173, iteration: 180894
loss: 1.0070172548294067,grad_norm: 0.9676520559739463, iteration: 180895
loss: 1.0252083539962769,grad_norm: 0.9999991788275425, iteration: 180896
loss: 1.0137591361999512,grad_norm: 0.9999993294954558, iteration: 180897
loss: 1.0092742443084717,grad_norm: 0.9999994161619714, iteration: 180898
loss: 1.0036063194274902,grad_norm: 0.9999992066542528, iteration: 180899
loss: 1.0128320455551147,grad_norm: 0.800239798636714, iteration: 180900
loss: 0.9926204681396484,grad_norm: 0.8318448428790777, iteration: 180901
loss: 1.0124355554580688,grad_norm: 0.9849938356376663, iteration: 180902
loss: 1.0353249311447144,grad_norm: 0.9999992629182696, iteration: 180903
loss: 0.9819291234016418,grad_norm: 0.9999990931601369, iteration: 180904
loss: 1.0210820436477661,grad_norm: 0.9999990591212058, iteration: 180905
loss: 1.0375381708145142,grad_norm: 0.9028184390290827, iteration: 180906
loss: 1.0194538831710815,grad_norm: 0.8322497281571131, iteration: 180907
loss: 0.9922906756401062,grad_norm: 0.9999990936794375, iteration: 180908
loss: 1.0166728496551514,grad_norm: 0.9476522347177166, iteration: 180909
loss: 1.0037480592727661,grad_norm: 0.9632236616708169, iteration: 180910
loss: 0.9900688529014587,grad_norm: 0.9126081820058202, iteration: 180911
loss: 0.9655393362045288,grad_norm: 0.969708601301196, iteration: 180912
loss: 1.0095103979110718,grad_norm: 0.9328256129496291, iteration: 180913
loss: 1.0071005821228027,grad_norm: 0.8749433334047741, iteration: 180914
loss: 0.9893829226493835,grad_norm: 0.9999990780459853, iteration: 180915
loss: 1.0036712884902954,grad_norm: 0.9999989402432325, iteration: 180916
loss: 0.9686575531959534,grad_norm: 0.999999102035568, iteration: 180917
loss: 1.026816487312317,grad_norm: 0.9058208843307368, iteration: 180918
loss: 1.019858956336975,grad_norm: 0.9153637788433876, iteration: 180919
loss: 1.0105524063110352,grad_norm: 0.9999991869121599, iteration: 180920
loss: 1.016019344329834,grad_norm: 0.862544289410181, iteration: 180921
loss: 0.980897843837738,grad_norm: 0.9745656747690955, iteration: 180922
loss: 0.9564599394798279,grad_norm: 0.9999990693574071, iteration: 180923
loss: 0.9862650632858276,grad_norm: 0.9999991679421151, iteration: 180924
loss: 0.9948086142539978,grad_norm: 0.9795780850679341, iteration: 180925
loss: 0.975274384021759,grad_norm: 0.8294496975082885, iteration: 180926
loss: 0.9869194626808167,grad_norm: 0.9999990942598763, iteration: 180927
loss: 0.9943717122077942,grad_norm: 0.999999186698925, iteration: 180928
loss: 1.0302801132202148,grad_norm: 0.9999991868309315, iteration: 180929
loss: 0.9572678804397583,grad_norm: 0.9999996254588608, iteration: 180930
loss: 0.9873877167701721,grad_norm: 0.9999991196916439, iteration: 180931
loss: 0.996714174747467,grad_norm: 0.8589916484070894, iteration: 180932
loss: 0.9683525562286377,grad_norm: 0.9999991060455147, iteration: 180933
loss: 0.980954647064209,grad_norm: 0.9599806101421885, iteration: 180934
loss: 0.9877937436103821,grad_norm: 0.9999995319482772, iteration: 180935
loss: 0.9878511428833008,grad_norm: 0.9761272299124452, iteration: 180936
loss: 0.9770095944404602,grad_norm: 0.9999990337355035, iteration: 180937
loss: 1.034338116645813,grad_norm: 0.9919289802086497, iteration: 180938
loss: 1.0241889953613281,grad_norm: 0.932048353821318, iteration: 180939
loss: 0.994272768497467,grad_norm: 0.9999992241170972, iteration: 180940
loss: 0.9982040524482727,grad_norm: 0.8623200894668832, iteration: 180941
loss: 0.9747861623764038,grad_norm: 0.9999992393727372, iteration: 180942
loss: 1.049576759338379,grad_norm: 0.9999990740427823, iteration: 180943
loss: 1.0199663639068604,grad_norm: 0.9999991605867168, iteration: 180944
loss: 0.995360791683197,grad_norm: 0.999999098248422, iteration: 180945
loss: 1.0072160959243774,grad_norm: 0.9999991256103186, iteration: 180946
loss: 0.9496983885765076,grad_norm: 0.9561375859839278, iteration: 180947
loss: 0.9897241592407227,grad_norm: 0.9542266928623251, iteration: 180948
loss: 1.000666618347168,grad_norm: 0.8602546087089811, iteration: 180949
loss: 0.9741817712783813,grad_norm: 0.9999991334302697, iteration: 180950
loss: 0.9540531039237976,grad_norm: 0.8582078626097235, iteration: 180951
loss: 0.9750325083732605,grad_norm: 0.8590713526773149, iteration: 180952
loss: 0.9942896366119385,grad_norm: 0.918080636755993, iteration: 180953
loss: 0.9824744462966919,grad_norm: 0.8903130923601463, iteration: 180954
loss: 0.9952600598335266,grad_norm: 0.9999991797925738, iteration: 180955
loss: 1.018267035484314,grad_norm: 0.9606004691732141, iteration: 180956
loss: 1.016768455505371,grad_norm: 0.9811122020762306, iteration: 180957
loss: 1.098056674003601,grad_norm: 0.9999998132406521, iteration: 180958
loss: 0.9725298285484314,grad_norm: 0.9999991880547624, iteration: 180959
loss: 0.9826413989067078,grad_norm: 0.9999989530863233, iteration: 180960
loss: 1.0406311750411987,grad_norm: 0.9991965340688613, iteration: 180961
loss: 0.9570088982582092,grad_norm: 0.9999990370850471, iteration: 180962
loss: 0.987354040145874,grad_norm: 0.9999991824325388, iteration: 180963
loss: 1.0095127820968628,grad_norm: 0.9999991487540272, iteration: 180964
loss: 1.0511454343795776,grad_norm: 0.9999995558092389, iteration: 180965
loss: 1.0110400915145874,grad_norm: 0.9999991032053912, iteration: 180966
loss: 1.0080554485321045,grad_norm: 0.999999164908629, iteration: 180967
loss: 1.046184778213501,grad_norm: 0.9999992708778284, iteration: 180968
loss: 1.0049428939819336,grad_norm: 0.9999991324155485, iteration: 180969
loss: 0.989906907081604,grad_norm: 0.9999989736328471, iteration: 180970
loss: 0.9568693041801453,grad_norm: 0.8654184411145013, iteration: 180971
loss: 0.9726701378822327,grad_norm: 0.9679184524098667, iteration: 180972
loss: 0.9847955107688904,grad_norm: 0.9104371372397208, iteration: 180973
loss: 1.0197901725769043,grad_norm: 0.9999997896980984, iteration: 180974
loss: 0.9735317230224609,grad_norm: 0.9223883355057317, iteration: 180975
loss: 0.9877970218658447,grad_norm: 0.9999991797659886, iteration: 180976
loss: 1.0000916719436646,grad_norm: 0.8465481167570651, iteration: 180977
loss: 0.998896062374115,grad_norm: 0.9999991632295518, iteration: 180978
loss: 0.9581515192985535,grad_norm: 0.9346613583205903, iteration: 180979
loss: 1.0288586616516113,grad_norm: 0.966726886456844, iteration: 180980
loss: 1.0374692678451538,grad_norm: 0.8863281661792478, iteration: 180981
loss: 0.9820637106895447,grad_norm: 0.9909037702852733, iteration: 180982
loss: 0.9801083207130432,grad_norm: 0.9999990034575038, iteration: 180983
loss: 1.0069923400878906,grad_norm: 0.9999992064175457, iteration: 180984
loss: 1.0011707544326782,grad_norm: 0.9686179546931263, iteration: 180985
loss: 0.9957947731018066,grad_norm: 0.999999364726902, iteration: 180986
loss: 1.0064692497253418,grad_norm: 0.8402462337503005, iteration: 180987
loss: 0.9845864176750183,grad_norm: 0.948753340838415, iteration: 180988
loss: 0.9647738933563232,grad_norm: 0.9999992002174395, iteration: 180989
loss: 0.9896885752677917,grad_norm: 0.9441173888035331, iteration: 180990
loss: 1.0244317054748535,grad_norm: 0.999999451286534, iteration: 180991
loss: 0.9963747262954712,grad_norm: 0.9167742830615793, iteration: 180992
loss: 0.9828831553459167,grad_norm: 0.999999152318774, iteration: 180993
loss: 0.9733335971832275,grad_norm: 0.9794504185213341, iteration: 180994
loss: 1.0147502422332764,grad_norm: 0.9154272129020623, iteration: 180995
loss: 0.9952144026756287,grad_norm: 0.846885660671022, iteration: 180996
loss: 0.9982539415359497,grad_norm: 0.9152685930785736, iteration: 180997
loss: 1.015231966972351,grad_norm: 0.9999991083916064, iteration: 180998
loss: 1.0041851997375488,grad_norm: 0.9486000815719658, iteration: 180999
loss: 0.9721843004226685,grad_norm: 0.9999995719712657, iteration: 181000
loss: 1.0213121175765991,grad_norm: 0.9645098960003335, iteration: 181001
loss: 0.9982205033302307,grad_norm: 0.9940307666451532, iteration: 181002
loss: 1.0049268007278442,grad_norm: 0.8603581431518847, iteration: 181003
loss: 0.9836824536323547,grad_norm: 0.9915083775439403, iteration: 181004
loss: 1.0118902921676636,grad_norm: 0.9999990024341755, iteration: 181005
loss: 0.9899564385414124,grad_norm: 0.9999990741947411, iteration: 181006
loss: 0.9910922646522522,grad_norm: 0.9999995680839874, iteration: 181007
loss: 1.005021333694458,grad_norm: 0.9999997283129307, iteration: 181008
loss: 0.9925652146339417,grad_norm: 0.9999990749009243, iteration: 181009
loss: 1.0519447326660156,grad_norm: 0.9460969342068568, iteration: 181010
loss: 0.9863026142120361,grad_norm: 0.9015065176899475, iteration: 181011
loss: 1.0009316205978394,grad_norm: 0.9638558045323209, iteration: 181012
loss: 0.9579444527626038,grad_norm: 0.9999992246466863, iteration: 181013
loss: 1.0211740732192993,grad_norm: 0.9999992272398853, iteration: 181014
loss: 0.9978207349777222,grad_norm: 0.9501835220294788, iteration: 181015
loss: 0.9507021903991699,grad_norm: 0.9586411219406974, iteration: 181016
loss: 1.036794662475586,grad_norm: 0.906929839423875, iteration: 181017
loss: 0.9777105450630188,grad_norm: 0.9999991410808227, iteration: 181018
loss: 1.0266754627227783,grad_norm: 0.9999991697468775, iteration: 181019
loss: 1.0120025873184204,grad_norm: 0.9999992837673705, iteration: 181020
loss: 1.0098567008972168,grad_norm: 0.8552986888458101, iteration: 181021
loss: 1.0052481889724731,grad_norm: 0.9525303446635418, iteration: 181022
loss: 1.0293467044830322,grad_norm: 0.9999992760304155, iteration: 181023
loss: 0.9901952147483826,grad_norm: 0.9572955481502428, iteration: 181024
loss: 0.9971825480461121,grad_norm: 0.9844913420414091, iteration: 181025
loss: 0.9901296496391296,grad_norm: 0.9006484625272999, iteration: 181026
loss: 1.0181262493133545,grad_norm: 0.9223003001849716, iteration: 181027
loss: 0.9655364155769348,grad_norm: 0.999999066483, iteration: 181028
loss: 0.9767234325408936,grad_norm: 0.9381667680598703, iteration: 181029
loss: 0.9979849457740784,grad_norm: 0.9003428938967819, iteration: 181030
loss: 0.9994277358055115,grad_norm: 0.9999992281745625, iteration: 181031
loss: 1.0034074783325195,grad_norm: 0.9999991087217728, iteration: 181032
loss: 1.014045238494873,grad_norm: 0.9556321611543971, iteration: 181033
loss: 1.0099174976348877,grad_norm: 0.9387500123636185, iteration: 181034
loss: 0.9809349179267883,grad_norm: 0.9999992356889966, iteration: 181035
loss: 1.0109175443649292,grad_norm: 0.9577187914789405, iteration: 181036
loss: 1.0194951295852661,grad_norm: 0.9273723774554926, iteration: 181037
loss: 0.9926913976669312,grad_norm: 0.9999989895168857, iteration: 181038
loss: 0.9879955053329468,grad_norm: 0.9999992036200724, iteration: 181039
loss: 0.997747004032135,grad_norm: 0.9263693773555411, iteration: 181040
loss: 0.9965015649795532,grad_norm: 0.8708408375191856, iteration: 181041
loss: 1.057090401649475,grad_norm: 0.9999992147279869, iteration: 181042
loss: 0.9743555784225464,grad_norm: 0.999999141747969, iteration: 181043
loss: 1.045325517654419,grad_norm: 0.9652337249131843, iteration: 181044
loss: 1.02070951461792,grad_norm: 0.9781776242481621, iteration: 181045
loss: 1.0299760103225708,grad_norm: 0.9999991477358883, iteration: 181046
loss: 1.0264209508895874,grad_norm: 0.9999992147472176, iteration: 181047
loss: 0.9907307624816895,grad_norm: 0.9999994955423701, iteration: 181048
loss: 0.9964359998703003,grad_norm: 0.999999075284515, iteration: 181049
loss: 0.9703341722488403,grad_norm: 0.9999991716423235, iteration: 181050
loss: 0.9563531279563904,grad_norm: 0.9237915348461649, iteration: 181051
loss: 1.0156495571136475,grad_norm: 0.931189235239336, iteration: 181052
loss: 1.020786166191101,grad_norm: 0.8537348179500607, iteration: 181053
loss: 0.9967063665390015,grad_norm: 0.9547735971931636, iteration: 181054
loss: 0.9820237159729004,grad_norm: 0.9792828373051796, iteration: 181055
loss: 1.0625263452529907,grad_norm: 0.9999989973159968, iteration: 181056
loss: 1.0523531436920166,grad_norm: 0.9999995873566121, iteration: 181057
loss: 1.0146160125732422,grad_norm: 0.9999990834013952, iteration: 181058
loss: 0.9758173227310181,grad_norm: 0.9999991853196323, iteration: 181059
loss: 0.991784930229187,grad_norm: 0.9999989895320461, iteration: 181060
loss: 1.0309791564941406,grad_norm: 0.924585771038765, iteration: 181061
loss: 0.9519973993301392,grad_norm: 0.9405890562027284, iteration: 181062
loss: 0.9866408705711365,grad_norm: 0.9324492219046185, iteration: 181063
loss: 0.9834507703781128,grad_norm: 0.9999991423545294, iteration: 181064
loss: 1.0113147497177124,grad_norm: 0.6933366720987689, iteration: 181065
loss: 1.0004143714904785,grad_norm: 0.8062787375398954, iteration: 181066
loss: 0.9824982285499573,grad_norm: 0.9393943595502487, iteration: 181067
loss: 0.9553573727607727,grad_norm: 0.9999991246311517, iteration: 181068
loss: 1.0055586099624634,grad_norm: 0.9999992176660433, iteration: 181069
loss: 0.9907421469688416,grad_norm: 0.9999990722368319, iteration: 181070
loss: 1.0063761472702026,grad_norm: 0.9999991876536922, iteration: 181071
loss: 0.9972823262214661,grad_norm: 0.9999993833441494, iteration: 181072
loss: 1.0079576969146729,grad_norm: 0.999999103856835, iteration: 181073
loss: 0.9789496660232544,grad_norm: 0.9086653285936485, iteration: 181074
loss: 1.0224300622940063,grad_norm: 0.9999990352797266, iteration: 181075
loss: 0.9944224953651428,grad_norm: 0.9123888894454423, iteration: 181076
loss: 0.9960663318634033,grad_norm: 0.9999990868117047, iteration: 181077
loss: 0.9748861789703369,grad_norm: 0.9999993855024237, iteration: 181078
loss: 0.9670592546463013,grad_norm: 0.9999990459812798, iteration: 181079
loss: 1.0194427967071533,grad_norm: 0.9999996512536089, iteration: 181080
loss: 0.9976446628570557,grad_norm: 0.950624575238955, iteration: 181081
loss: 1.01429283618927,grad_norm: 0.8786652683322913, iteration: 181082
loss: 0.9656577110290527,grad_norm: 0.9921980364632951, iteration: 181083
loss: 1.0271754264831543,grad_norm: 0.9999990673536091, iteration: 181084
loss: 0.987945556640625,grad_norm: 0.9999991674865022, iteration: 181085
loss: 0.9355341196060181,grad_norm: 0.9999992274408773, iteration: 181086
loss: 0.9984079599380493,grad_norm: 0.908197683373805, iteration: 181087
loss: 0.9861424565315247,grad_norm: 0.9780781933130783, iteration: 181088
loss: 1.0043045282363892,grad_norm: 0.9999991208220486, iteration: 181089
loss: 0.9987000226974487,grad_norm: 0.9999991301211208, iteration: 181090
loss: 1.0057334899902344,grad_norm: 0.9999990247500746, iteration: 181091
loss: 1.0038325786590576,grad_norm: 0.9999989538442645, iteration: 181092
loss: 0.9947027564048767,grad_norm: 0.9873561267322009, iteration: 181093
loss: 0.9675940871238708,grad_norm: 0.8249216660742342, iteration: 181094
loss: 1.0411107540130615,grad_norm: 0.9896096785031412, iteration: 181095
loss: 0.9786168336868286,grad_norm: 0.9999992618016863, iteration: 181096
loss: 1.0016956329345703,grad_norm: 0.9999991165333617, iteration: 181097
loss: 1.0091909170150757,grad_norm: 0.8893936100844744, iteration: 181098
loss: 1.0246515274047852,grad_norm: 0.9999997336753369, iteration: 181099
loss: 0.9848979115486145,grad_norm: 0.9351993024538966, iteration: 181100
loss: 0.9855932593345642,grad_norm: 0.7966786820974381, iteration: 181101
loss: 0.9801534414291382,grad_norm: 0.8423576612837773, iteration: 181102
loss: 1.0415480136871338,grad_norm: 0.9999996635294138, iteration: 181103
loss: 1.0909253358840942,grad_norm: 0.9999999849598821, iteration: 181104
loss: 1.0190215110778809,grad_norm: 0.9999991388270277, iteration: 181105
loss: 0.9967185258865356,grad_norm: 0.9652201449336313, iteration: 181106
loss: 0.9827908277511597,grad_norm: 0.9999991730904428, iteration: 181107
loss: 1.039218544960022,grad_norm: 0.9999989898648478, iteration: 181108
loss: 0.9879557490348816,grad_norm: 0.9415161881371029, iteration: 181109
loss: 0.9860529899597168,grad_norm: 0.9999990785533417, iteration: 181110
loss: 0.9961702823638916,grad_norm: 0.9668412516141272, iteration: 181111
loss: 0.9830605983734131,grad_norm: 0.9340001512083514, iteration: 181112
loss: 1.037482500076294,grad_norm: 0.999999275359554, iteration: 181113
loss: 0.9979935884475708,grad_norm: 0.9999991348772507, iteration: 181114
loss: 0.9895710349082947,grad_norm: 0.9999992681707717, iteration: 181115
loss: 1.0083811283111572,grad_norm: 0.9999990291856908, iteration: 181116
loss: 0.9901290535926819,grad_norm: 0.978724333421938, iteration: 181117
loss: 0.9906808137893677,grad_norm: 0.9999991926861823, iteration: 181118
loss: 1.0229817628860474,grad_norm: 0.9999990955832131, iteration: 181119
loss: 1.026807188987732,grad_norm: 0.9999993626112081, iteration: 181120
loss: 0.9809092283248901,grad_norm: 0.9924374480354592, iteration: 181121
loss: 1.0008635520935059,grad_norm: 0.9260997333332911, iteration: 181122
loss: 0.9928513169288635,grad_norm: 0.9999990605056167, iteration: 181123
loss: 0.9770002961158752,grad_norm: 0.999999123891659, iteration: 181124
loss: 0.9798240661621094,grad_norm: 0.999999139188337, iteration: 181125
loss: 0.9801914095878601,grad_norm: 0.8960661058895232, iteration: 181126
loss: 1.046125054359436,grad_norm: 0.9999991934550854, iteration: 181127
loss: 0.9555879831314087,grad_norm: 0.9999991620503689, iteration: 181128
loss: 0.9959306716918945,grad_norm: 0.9007938626137296, iteration: 181129
loss: 1.0097160339355469,grad_norm: 0.9999993348569103, iteration: 181130
loss: 0.9846699237823486,grad_norm: 0.8783962861584197, iteration: 181131
loss: 1.010725975036621,grad_norm: 0.999999185054576, iteration: 181132
loss: 0.9756192564964294,grad_norm: 0.8748079647355623, iteration: 181133
loss: 0.9819395542144775,grad_norm: 0.9999990705092321, iteration: 181134
loss: 1.0176100730895996,grad_norm: 0.999999160404584, iteration: 181135
loss: 1.0202603340148926,grad_norm: 0.9999990946928692, iteration: 181136
loss: 0.9997297525405884,grad_norm: 0.9999991838226077, iteration: 181137
loss: 1.0328919887542725,grad_norm: 0.9924875767183381, iteration: 181138
loss: 0.967122495174408,grad_norm: 0.7494296823905829, iteration: 181139
loss: 1.0054198503494263,grad_norm: 0.9168364981618627, iteration: 181140
loss: 0.9784423112869263,grad_norm: 0.9999992055733687, iteration: 181141
loss: 1.01503586769104,grad_norm: 0.9999992148294832, iteration: 181142
loss: 0.9901219010353088,grad_norm: 0.9999991567709337, iteration: 181143
loss: 0.9826075434684753,grad_norm: 0.9390426722046745, iteration: 181144
loss: 0.9748136401176453,grad_norm: 0.9236818810480855, iteration: 181145
loss: 0.9534241557121277,grad_norm: 0.9649424618059405, iteration: 181146
loss: 1.0029394626617432,grad_norm: 0.9999991341276417, iteration: 181147
loss: 1.011844277381897,grad_norm: 0.999999247880025, iteration: 181148
loss: 1.0112781524658203,grad_norm: 0.8514298275616433, iteration: 181149
loss: 0.9951654672622681,grad_norm: 0.9999990277240935, iteration: 181150
loss: 0.9944461584091187,grad_norm: 0.9628902770828035, iteration: 181151
loss: 1.0323975086212158,grad_norm: 0.9999991748806384, iteration: 181152
loss: 1.011816143989563,grad_norm: 0.9914621289356145, iteration: 181153
loss: 0.9456971287727356,grad_norm: 0.9999990582263494, iteration: 181154
loss: 1.018535852432251,grad_norm: 0.8665268241616978, iteration: 181155
loss: 0.9731813073158264,grad_norm: 0.9999991250738262, iteration: 181156
loss: 1.008212924003601,grad_norm: 0.890297062996016, iteration: 181157
loss: 0.9831446409225464,grad_norm: 0.9999991154486757, iteration: 181158
loss: 0.9630645513534546,grad_norm: 0.8103227843713826, iteration: 181159
loss: 0.9711359739303589,grad_norm: 0.833045776896238, iteration: 181160
loss: 1.0266129970550537,grad_norm: 0.9313775259373254, iteration: 181161
loss: 1.0256505012512207,grad_norm: 0.8852487160381698, iteration: 181162
loss: 1.0026359558105469,grad_norm: 0.9999990504044095, iteration: 181163
loss: 1.0293020009994507,grad_norm: 0.8790649696301872, iteration: 181164
loss: 1.042995572090149,grad_norm: 0.998223647807268, iteration: 181165
loss: 0.9503186941146851,grad_norm: 0.9999991278087489, iteration: 181166
loss: 0.9921154379844666,grad_norm: 0.9901302735389753, iteration: 181167
loss: 1.004310965538025,grad_norm: 0.9647402208310274, iteration: 181168
loss: 0.9664804339408875,grad_norm: 0.9999991587968904, iteration: 181169
loss: 1.0216538906097412,grad_norm: 0.9869420670181748, iteration: 181170
loss: 0.9914318323135376,grad_norm: 0.9997315272908451, iteration: 181171
loss: 0.9929835200309753,grad_norm: 0.8929193834481753, iteration: 181172
loss: 1.0142887830734253,grad_norm: 0.9086825059602172, iteration: 181173
loss: 1.0272269248962402,grad_norm: 0.9195221411523059, iteration: 181174
loss: 1.0011889934539795,grad_norm: 0.9227179443448782, iteration: 181175
loss: 0.989791214466095,grad_norm: 0.9999992486242815, iteration: 181176
loss: 1.0040971040725708,grad_norm: 0.9999990550637395, iteration: 181177
loss: 0.9969980120658875,grad_norm: 0.8242232803817396, iteration: 181178
loss: 1.08701491355896,grad_norm: 0.9999999059354279, iteration: 181179
loss: 1.0579026937484741,grad_norm: 0.9999997990364569, iteration: 181180
loss: 0.9681414365768433,grad_norm: 0.9982414322665465, iteration: 181181
loss: 1.0057436227798462,grad_norm: 0.9999989569132492, iteration: 181182
loss: 0.9997105598449707,grad_norm: 0.9999991663917048, iteration: 181183
loss: 1.0100184679031372,grad_norm: 0.9999990234629944, iteration: 181184
loss: 1.0333380699157715,grad_norm: 0.9999991712927611, iteration: 181185
loss: 0.9835423231124878,grad_norm: 0.9999992092017077, iteration: 181186
loss: 1.0090571641921997,grad_norm: 0.9952857443971065, iteration: 181187
loss: 1.016260027885437,grad_norm: 0.8860824759177596, iteration: 181188
loss: 1.0290107727050781,grad_norm: 0.9758885431744982, iteration: 181189
loss: 1.0081815719604492,grad_norm: 0.8044507729290387, iteration: 181190
loss: 0.9779608249664307,grad_norm: 0.866237096741498, iteration: 181191
loss: 0.993158757686615,grad_norm: 0.9974142712739781, iteration: 181192
loss: 0.9975663423538208,grad_norm: 0.9999990057617412, iteration: 181193
loss: 0.9942503571510315,grad_norm: 0.9612279813323666, iteration: 181194
loss: 1.0412712097167969,grad_norm: 0.9999996729025171, iteration: 181195
loss: 0.9823175668716431,grad_norm: 0.9999991506722686, iteration: 181196
loss: 1.01636803150177,grad_norm: 0.918363936923273, iteration: 181197
loss: 0.9923733472824097,grad_norm: 0.9999996921388405, iteration: 181198
loss: 0.9732115864753723,grad_norm: 0.9031232737503476, iteration: 181199
loss: 0.9781098365783691,grad_norm: 0.8579198073516645, iteration: 181200
loss: 0.9892598986625671,grad_norm: 0.9999996025519594, iteration: 181201
loss: 1.0661394596099854,grad_norm: 0.999999663047025, iteration: 181202
loss: 1.0457985401153564,grad_norm: 0.9365539618671959, iteration: 181203
loss: 0.9922849535942078,grad_norm: 0.9999989955462179, iteration: 181204
loss: 1.0173499584197998,grad_norm: 0.9999991193407028, iteration: 181205
loss: 1.0634454488754272,grad_norm: 0.9999991394169285, iteration: 181206
loss: 1.0129432678222656,grad_norm: 0.9999991027862293, iteration: 181207
loss: 1.0127668380737305,grad_norm: 0.838996215579441, iteration: 181208
loss: 1.0277315378189087,grad_norm: 0.9752466211911616, iteration: 181209
loss: 0.9970465302467346,grad_norm: 0.9306352027324957, iteration: 181210
loss: 0.9928030967712402,grad_norm: 0.966549303218456, iteration: 181211
loss: 0.9880959987640381,grad_norm: 0.9999992066326162, iteration: 181212
loss: 1.0805943012237549,grad_norm: 0.9397317623434872, iteration: 181213
loss: 1.0206702947616577,grad_norm: 0.9999989902802455, iteration: 181214
loss: 0.9909573793411255,grad_norm: 0.9118918753024517, iteration: 181215
loss: 1.0371394157409668,grad_norm: 0.9999991244964187, iteration: 181216
loss: 1.0044214725494385,grad_norm: 0.9346454276016906, iteration: 181217
loss: 0.9913727641105652,grad_norm: 0.861418579010326, iteration: 181218
loss: 0.9917989373207092,grad_norm: 0.9999991019983234, iteration: 181219
loss: 1.0287967920303345,grad_norm: 0.9999991281851284, iteration: 181220
loss: 1.008177638053894,grad_norm: 0.9339879208735395, iteration: 181221
loss: 1.0124592781066895,grad_norm: 0.9999991850414253, iteration: 181222
loss: 1.0341190099716187,grad_norm: 0.860279191607518, iteration: 181223
loss: 0.9955161809921265,grad_norm: 0.9999991129582216, iteration: 181224
loss: 1.0177839994430542,grad_norm: 0.9999991417982478, iteration: 181225
loss: 1.0569562911987305,grad_norm: 0.9999998879612192, iteration: 181226
loss: 1.1240589618682861,grad_norm: 0.9999998429354546, iteration: 181227
loss: 0.9657602310180664,grad_norm: 0.9999990743572508, iteration: 181228
loss: 1.0016956329345703,grad_norm: 0.9999990612778954, iteration: 181229
loss: 1.0129494667053223,grad_norm: 0.8797580449151045, iteration: 181230
loss: 0.9974952340126038,grad_norm: 0.974439281131832, iteration: 181231
loss: 1.0821337699890137,grad_norm: 0.9999990583535702, iteration: 181232
loss: 0.9824425578117371,grad_norm: 0.9999990336339598, iteration: 181233
loss: 0.9922572374343872,grad_norm: 0.9241952740001532, iteration: 181234
loss: 0.9855282306671143,grad_norm: 0.9999991322992522, iteration: 181235
loss: 0.9970218539237976,grad_norm: 0.9999990955305127, iteration: 181236
loss: 0.9949002265930176,grad_norm: 0.9020353673990358, iteration: 181237
loss: 1.0100260972976685,grad_norm: 0.9999991341404925, iteration: 181238
loss: 0.9568860530853271,grad_norm: 0.7519253641819471, iteration: 181239
loss: 1.0151760578155518,grad_norm: 0.9346485069074192, iteration: 181240
loss: 0.9962354302406311,grad_norm: 0.9137859502960863, iteration: 181241
loss: 0.9982749819755554,grad_norm: 0.8593081612982119, iteration: 181242
loss: 0.9852963089942932,grad_norm: 0.9999993874146963, iteration: 181243
loss: 1.0156724452972412,grad_norm: 0.8043300002684576, iteration: 181244
loss: 0.9599338173866272,grad_norm: 0.9999991940412694, iteration: 181245
loss: 0.9996131658554077,grad_norm: 0.999999115286374, iteration: 181246
loss: 1.0020705461502075,grad_norm: 0.9533285101329259, iteration: 181247
loss: 1.0267049074172974,grad_norm: 0.9999990893554106, iteration: 181248
loss: 1.0092520713806152,grad_norm: 0.8562346994771262, iteration: 181249
loss: 1.0127462148666382,grad_norm: 0.9485961014723745, iteration: 181250
loss: 1.063959002494812,grad_norm: 0.9999989331012229, iteration: 181251
loss: 0.9701492786407471,grad_norm: 0.9999991637057335, iteration: 181252
loss: 1.009782075881958,grad_norm: 0.999998876687248, iteration: 181253
loss: 0.9827835559844971,grad_norm: 0.9999990565625959, iteration: 181254
loss: 1.00438392162323,grad_norm: 0.9999990541141934, iteration: 181255
loss: 1.0145975351333618,grad_norm: 0.9133041318048967, iteration: 181256
loss: 1.0936036109924316,grad_norm: 0.999999949838973, iteration: 181257
loss: 0.975902795791626,grad_norm: 0.8918317422452323, iteration: 181258
loss: 0.9799728989601135,grad_norm: 0.9999990135056539, iteration: 181259
loss: 0.9951955676078796,grad_norm: 0.9995964990680095, iteration: 181260
loss: 0.9652448892593384,grad_norm: 0.9999992557926897, iteration: 181261
loss: 1.0045162439346313,grad_norm: 0.9190201006494505, iteration: 181262
loss: 1.0258657932281494,grad_norm: 0.9999997953581607, iteration: 181263
loss: 1.0229452848434448,grad_norm: 0.9999991907872477, iteration: 181264
loss: 1.010788083076477,grad_norm: 0.9411573909459479, iteration: 181265
loss: 1.0745326280593872,grad_norm: 0.9995606241117158, iteration: 181266
loss: 1.0217716693878174,grad_norm: 0.9999993310438318, iteration: 181267
loss: 1.022212028503418,grad_norm: 0.9999992087720568, iteration: 181268
loss: 1.0045802593231201,grad_norm: 0.9999989425557653, iteration: 181269
loss: 0.9670194983482361,grad_norm: 0.9760018174248881, iteration: 181270
loss: 1.0178196430206299,grad_norm: 0.8932811012924579, iteration: 181271
loss: 1.0270878076553345,grad_norm: 0.9999993669075682, iteration: 181272
loss: 0.9878772497177124,grad_norm: 0.9276832004492709, iteration: 181273
loss: 0.9691832065582275,grad_norm: 0.9999996351675905, iteration: 181274
loss: 1.0027326345443726,grad_norm: 0.9999989766148774, iteration: 181275
loss: 0.9723935127258301,grad_norm: 0.9999990011344865, iteration: 181276
loss: 0.9841067790985107,grad_norm: 0.9999991326821465, iteration: 181277
loss: 0.9994996786117554,grad_norm: 0.9127986654872113, iteration: 181278
loss: 0.9778500199317932,grad_norm: 0.9502263512878942, iteration: 181279
loss: 0.9710590839385986,grad_norm: 0.9532473418217992, iteration: 181280
loss: 1.0040760040283203,grad_norm: 0.898731747645525, iteration: 181281
loss: 0.9817953109741211,grad_norm: 0.9418276274308391, iteration: 181282
loss: 0.9852465391159058,grad_norm: 0.9999990249241641, iteration: 181283
loss: 1.0232051610946655,grad_norm: 0.9237229961393574, iteration: 181284
loss: 0.9925965070724487,grad_norm: 0.8605654672434147, iteration: 181285
loss: 1.0090579986572266,grad_norm: 0.9999992022513177, iteration: 181286
loss: 1.0066590309143066,grad_norm: 0.9999991205637642, iteration: 181287
loss: 0.9960799217224121,grad_norm: 0.9999991967090781, iteration: 181288
loss: 0.9819595217704773,grad_norm: 0.9669574932239895, iteration: 181289
loss: 0.9987088441848755,grad_norm: 0.9858636127444886, iteration: 181290
loss: 0.9900205135345459,grad_norm: 0.9999989502884854, iteration: 181291
loss: 0.9997548460960388,grad_norm: 0.9640731375062783, iteration: 181292
loss: 1.0060416460037231,grad_norm: 0.9999991701378266, iteration: 181293
loss: 0.9619157314300537,grad_norm: 0.9999991054226939, iteration: 181294
loss: 1.0035195350646973,grad_norm: 0.8633044128480213, iteration: 181295
loss: 1.01873779296875,grad_norm: 0.9870939533463582, iteration: 181296
loss: 1.0097203254699707,grad_norm: 0.8117537936569714, iteration: 181297
loss: 1.000278353691101,grad_norm: 0.9946181172442008, iteration: 181298
loss: 0.9894272089004517,grad_norm: 0.9904351831389661, iteration: 181299
loss: 0.9477800726890564,grad_norm: 0.992210265861698, iteration: 181300
loss: 1.012113332748413,grad_norm: 0.9999992052254019, iteration: 181301
loss: 1.0134543180465698,grad_norm: 0.9482459605880451, iteration: 181302
loss: 0.9929805397987366,grad_norm: 0.9656825209874302, iteration: 181303
loss: 1.0043554306030273,grad_norm: 0.9999991362837691, iteration: 181304
loss: 0.9828405976295471,grad_norm: 0.7283848856771762, iteration: 181305
loss: 0.9533929824829102,grad_norm: 0.9999991008848398, iteration: 181306
loss: 0.9775736331939697,grad_norm: 0.8636589585144624, iteration: 181307
loss: 0.9980987906455994,grad_norm: 0.8384821527479055, iteration: 181308
loss: 1.00090754032135,grad_norm: 0.999999260762375, iteration: 181309
loss: 0.9760851860046387,grad_norm: 0.9462680680740055, iteration: 181310
loss: 1.0121501684188843,grad_norm: 0.9405670762460224, iteration: 181311
loss: 1.004117727279663,grad_norm: 0.9999991181522719, iteration: 181312
loss: 1.0015442371368408,grad_norm: 0.9421521811735677, iteration: 181313
loss: 0.992888867855072,grad_norm: 0.9833441328871789, iteration: 181314
loss: 0.9513134360313416,grad_norm: 0.9520652520518733, iteration: 181315
loss: 0.9579148888587952,grad_norm: 0.9999991445827104, iteration: 181316
loss: 0.9898746013641357,grad_norm: 0.999999110178771, iteration: 181317
loss: 0.988028347492218,grad_norm: 0.9999990055895213, iteration: 181318
loss: 1.0266426801681519,grad_norm: 0.9332437646277447, iteration: 181319
loss: 1.0157499313354492,grad_norm: 0.9999990670106942, iteration: 181320
loss: 1.0090488195419312,grad_norm: 0.9999991596869641, iteration: 181321
loss: 1.023979902267456,grad_norm: 0.9999990107659338, iteration: 181322
loss: 1.0062884092330933,grad_norm: 0.9999990229987772, iteration: 181323
loss: 1.029888391494751,grad_norm: 0.9171402932835857, iteration: 181324
loss: 0.982782244682312,grad_norm: 0.9999991413521384, iteration: 181325
loss: 0.9985066056251526,grad_norm: 0.9999989409929523, iteration: 181326
loss: 0.9907467365264893,grad_norm: 0.8833468893139371, iteration: 181327
loss: 1.03164803981781,grad_norm: 0.9999991997750628, iteration: 181328
loss: 1.0007648468017578,grad_norm: 0.9999991300943867, iteration: 181329
loss: 1.025109887123108,grad_norm: 0.9999991921360754, iteration: 181330
loss: 1.012864112854004,grad_norm: 0.9153632695369436, iteration: 181331
loss: 0.9934796690940857,grad_norm: 0.9999992312099031, iteration: 181332
loss: 1.0116921663284302,grad_norm: 0.9999993058624774, iteration: 181333
loss: 1.037266492843628,grad_norm: 0.9418386407454012, iteration: 181334
loss: 1.0195379257202148,grad_norm: 0.9033314937289617, iteration: 181335
loss: 0.9799911975860596,grad_norm: 0.9258727624318475, iteration: 181336
loss: 1.010144829750061,grad_norm: 0.9999989116282033, iteration: 181337
loss: 0.9787079095840454,grad_norm: 0.9826660431600911, iteration: 181338
loss: 0.9478576183319092,grad_norm: 0.9415195816017126, iteration: 181339
loss: 1.0044046640396118,grad_norm: 0.999998981582248, iteration: 181340
loss: 0.9896222949028015,grad_norm: 0.999999205248266, iteration: 181341
loss: 0.9888579249382019,grad_norm: 0.9877044061047882, iteration: 181342
loss: 0.9973012804985046,grad_norm: 0.9999991404144141, iteration: 181343
loss: 0.9940860271453857,grad_norm: 0.9999991456572935, iteration: 181344
loss: 1.017831802368164,grad_norm: 0.9999991196131608, iteration: 181345
loss: 1.0289002656936646,grad_norm: 0.9999991247219211, iteration: 181346
loss: 1.0270484685897827,grad_norm: 0.9999990795663797, iteration: 181347
loss: 0.9846271276473999,grad_norm: 0.9349901292377187, iteration: 181348
loss: 0.9460785984992981,grad_norm: 0.999999231809057, iteration: 181349
loss: 1.089483618736267,grad_norm: 0.999999167257049, iteration: 181350
loss: 1.219254493713379,grad_norm: 0.999999850246359, iteration: 181351
loss: 0.9611865878105164,grad_norm: 0.9846130706699229, iteration: 181352
loss: 0.9853102564811707,grad_norm: 0.9522243366932369, iteration: 181353
loss: 0.9724388718605042,grad_norm: 0.9999990405025326, iteration: 181354
loss: 0.9716355204582214,grad_norm: 0.9310276227306679, iteration: 181355
loss: 1.013283610343933,grad_norm: 0.8432127401557954, iteration: 181356
loss: 0.9883858561515808,grad_norm: 0.8446494192523434, iteration: 181357
loss: 1.0389082431793213,grad_norm: 0.9999992060983561, iteration: 181358
loss: 1.011311650276184,grad_norm: 0.9041429941681193, iteration: 181359
loss: 0.9649240970611572,grad_norm: 0.9999991646039168, iteration: 181360
loss: 1.1045708656311035,grad_norm: 0.9999992467451588, iteration: 181361
loss: 0.9946730732917786,grad_norm: 0.9171550167315509, iteration: 181362
loss: 0.9608187675476074,grad_norm: 0.9999991019439336, iteration: 181363
loss: 0.9774275422096252,grad_norm: 0.9865923775087965, iteration: 181364
loss: 0.9931628704071045,grad_norm: 0.9999992466264808, iteration: 181365
loss: 0.9838730692863464,grad_norm: 0.9999990944574075, iteration: 181366
loss: 0.9691757559776306,grad_norm: 0.99999916556873, iteration: 181367
loss: 1.0477588176727295,grad_norm: 0.9192496106445538, iteration: 181368
loss: 0.9752914309501648,grad_norm: 0.9987656832306595, iteration: 181369
loss: 1.11104416847229,grad_norm: 0.9999991816560226, iteration: 181370
loss: 0.9771061539649963,grad_norm: 0.9243592809868654, iteration: 181371
loss: 1.013655424118042,grad_norm: 0.7824959558677854, iteration: 181372
loss: 1.0193489789962769,grad_norm: 0.9999989836365791, iteration: 181373
loss: 1.0254181623458862,grad_norm: 0.9035283999686904, iteration: 181374
loss: 0.9799574017524719,grad_norm: 0.9746740484934788, iteration: 181375
loss: 0.977831244468689,grad_norm: 0.9202225712670488, iteration: 181376
loss: 0.9998878240585327,grad_norm: 0.9803578123422663, iteration: 181377
loss: 0.9753760695457458,grad_norm: 0.9855464672185931, iteration: 181378
loss: 0.9961563348770142,grad_norm: 0.9145816189126315, iteration: 181379
loss: 0.9790859222412109,grad_norm: 0.9591871990463101, iteration: 181380
loss: 1.0130881071090698,grad_norm: 0.9999992390793608, iteration: 181381
loss: 1.0691180229187012,grad_norm: 0.9999993883467251, iteration: 181382
loss: 1.0302813053131104,grad_norm: 0.9997611538219832, iteration: 181383
loss: 1.0224653482437134,grad_norm: 0.8556483895751873, iteration: 181384
loss: 1.0132392644882202,grad_norm: 0.9999991626477747, iteration: 181385
loss: 0.9941306710243225,grad_norm: 0.9197791091888626, iteration: 181386
loss: 1.0009995698928833,grad_norm: 0.7944965302828653, iteration: 181387
loss: 1.0675772428512573,grad_norm: 0.9999992568253342, iteration: 181388
loss: 0.9747480750083923,grad_norm: 0.9999990951996826, iteration: 181389
loss: 0.981495201587677,grad_norm: 0.9999989738728342, iteration: 181390
loss: 0.9962581396102905,grad_norm: 0.999999117140394, iteration: 181391
loss: 1.0509458780288696,grad_norm: 0.9999998812540227, iteration: 181392
loss: 1.0141934156417847,grad_norm: 0.9999991522814361, iteration: 181393
loss: 0.9646440744400024,grad_norm: 0.9710151808533386, iteration: 181394
loss: 0.9862661957740784,grad_norm: 0.8600113097713605, iteration: 181395
loss: 0.9731093645095825,grad_norm: 0.999998909157074, iteration: 181396
loss: 1.0440164804458618,grad_norm: 0.9999990705302518, iteration: 181397
loss: 1.0124688148498535,grad_norm: 0.9783964826805683, iteration: 181398
loss: 0.9689493775367737,grad_norm: 0.9546289592638985, iteration: 181399
loss: 1.017653226852417,grad_norm: 0.9999991249543356, iteration: 181400
loss: 1.0260416269302368,grad_norm: 0.9999991969365901, iteration: 181401
loss: 1.023897647857666,grad_norm: 0.9679466781822399, iteration: 181402
loss: 1.0087532997131348,grad_norm: 0.9999991712210132, iteration: 181403
loss: 0.9775030612945557,grad_norm: 0.939007382346385, iteration: 181404
loss: 0.9911726117134094,grad_norm: 0.9999991296284081, iteration: 181405
loss: 1.039797067642212,grad_norm: 0.9625756205856437, iteration: 181406
loss: 0.9926296472549438,grad_norm: 0.9539452254660413, iteration: 181407
loss: 0.9889364838600159,grad_norm: 0.9999997890634615, iteration: 181408
loss: 0.9800312519073486,grad_norm: 0.9576157647135933, iteration: 181409
loss: 1.024523377418518,grad_norm: 0.9999991838870775, iteration: 181410
loss: 1.0171653032302856,grad_norm: 0.9999990636658997, iteration: 181411
loss: 1.0019214153289795,grad_norm: 0.9834053268278293, iteration: 181412
loss: 1.0013154745101929,grad_norm: 0.9999989797130694, iteration: 181413
loss: 0.9590416550636292,grad_norm: 0.8717419200260147, iteration: 181414
loss: 1.019762635231018,grad_norm: 0.956996089784235, iteration: 181415
loss: 1.0027713775634766,grad_norm: 0.9029961815445886, iteration: 181416
loss: 1.0173619985580444,grad_norm: 0.9906598529086323, iteration: 181417
loss: 0.9874685406684875,grad_norm: 0.9999991553774787, iteration: 181418
loss: 0.9944462776184082,grad_norm: 0.9191018500778283, iteration: 181419
loss: 0.998026430606842,grad_norm: 0.9999991585430278, iteration: 181420
loss: 1.0223487615585327,grad_norm: 0.8991329089259071, iteration: 181421
loss: 1.03226900100708,grad_norm: 0.987207668525459, iteration: 181422
loss: 1.0122008323669434,grad_norm: 0.999999094176768, iteration: 181423
loss: 0.9831613898277283,grad_norm: 0.994314382831571, iteration: 181424
loss: 1.0035576820373535,grad_norm: 0.9968959559669507, iteration: 181425
loss: 1.0735810995101929,grad_norm: 0.9999996931348203, iteration: 181426
loss: 1.0032074451446533,grad_norm: 0.9999993284622823, iteration: 181427
loss: 0.9770638942718506,grad_norm: 0.9999990888717525, iteration: 181428
loss: 0.9963251948356628,grad_norm: 0.938021957266118, iteration: 181429
loss: 1.0196865797042847,grad_norm: 0.9999992194266535, iteration: 181430
loss: 0.9824155569076538,grad_norm: 0.9281975226363381, iteration: 181431
loss: 1.0117301940917969,grad_norm: 0.9999992822830711, iteration: 181432
loss: 0.9319881796836853,grad_norm: 0.9999992410512839, iteration: 181433
loss: 0.989233136177063,grad_norm: 0.9285720730710513, iteration: 181434
loss: 1.0986355543136597,grad_norm: 0.9999995970735094, iteration: 181435
loss: 0.9983243942260742,grad_norm: 0.9999991587302349, iteration: 181436
loss: 1.0335007905960083,grad_norm: 0.8891116059458757, iteration: 181437
loss: 0.9978119134902954,grad_norm: 0.9999992004270059, iteration: 181438
loss: 0.9698089957237244,grad_norm: 0.988624539738167, iteration: 181439
loss: 1.0643802881240845,grad_norm: 0.9999992030220092, iteration: 181440
loss: 1.0061637163162231,grad_norm: 0.99999919836905, iteration: 181441
loss: 1.0008060932159424,grad_norm: 0.9870085220865257, iteration: 181442
loss: 1.0337976217269897,grad_norm: 0.9562742747697602, iteration: 181443
loss: 0.991354763507843,grad_norm: 0.9999989957203601, iteration: 181444
loss: 0.9824349284172058,grad_norm: 0.9729684553818793, iteration: 181445
loss: 1.028270959854126,grad_norm: 0.9999992613376246, iteration: 181446
loss: 1.050329327583313,grad_norm: 0.9999996862789925, iteration: 181447
loss: 1.0429400205612183,grad_norm: 0.985173911256576, iteration: 181448
loss: 1.018762469291687,grad_norm: 0.9474618790343788, iteration: 181449
loss: 0.9923732876777649,grad_norm: 0.9999992083315309, iteration: 181450
loss: 0.9777684807777405,grad_norm: 0.9999994360274159, iteration: 181451
loss: 0.9729751944541931,grad_norm: 0.9999989638527605, iteration: 181452
loss: 1.0135347843170166,grad_norm: 0.9999992776586066, iteration: 181453
loss: 0.9841306209564209,grad_norm: 0.9326790750578078, iteration: 181454
loss: 1.019199013710022,grad_norm: 0.9999991503094223, iteration: 181455
loss: 0.9621438384056091,grad_norm: 0.9465344280772353, iteration: 181456
loss: 1.0395795106887817,grad_norm: 0.9820822536125808, iteration: 181457
loss: 0.9986405372619629,grad_norm: 0.9559564701443539, iteration: 181458
loss: 1.0103634595870972,grad_norm: 0.9638328837305021, iteration: 181459
loss: 0.9868759512901306,grad_norm: 0.8978036245302692, iteration: 181460
loss: 1.005496621131897,grad_norm: 0.9216906006999033, iteration: 181461
loss: 0.9834201335906982,grad_norm: 0.8427895846558986, iteration: 181462
loss: 1.0014994144439697,grad_norm: 0.9532467647988292, iteration: 181463
loss: 1.0038021802902222,grad_norm: 0.9999993124897869, iteration: 181464
loss: 0.9815909266471863,grad_norm: 0.9999992371026424, iteration: 181465
loss: 1.002874493598938,grad_norm: 0.9999989443533753, iteration: 181466
loss: 1.0007041692733765,grad_norm: 0.9434671706447716, iteration: 181467
loss: 0.9901459217071533,grad_norm: 0.8631940156010578, iteration: 181468
loss: 0.9994211792945862,grad_norm: 0.9999990920900083, iteration: 181469
loss: 1.0204167366027832,grad_norm: 0.999999108738602, iteration: 181470
loss: 1.042191743850708,grad_norm: 0.9999990378809491, iteration: 181471
loss: 1.0100516080856323,grad_norm: 0.9999989390156553, iteration: 181472
loss: 1.0029741525650024,grad_norm: 0.9999991788613042, iteration: 181473
loss: 0.9847656488418579,grad_norm: 0.9999991634561862, iteration: 181474
loss: 0.9853096604347229,grad_norm: 0.9999992445680228, iteration: 181475
loss: 0.9935137033462524,grad_norm: 0.9999990323532846, iteration: 181476
loss: 0.9827224612236023,grad_norm: 0.999999145939607, iteration: 181477
loss: 0.980014443397522,grad_norm: 0.9857370287727587, iteration: 181478
loss: 1.0091551542282104,grad_norm: 0.8952050349476717, iteration: 181479
loss: 1.047219157218933,grad_norm: 0.9211533571378313, iteration: 181480
loss: 1.018696904182434,grad_norm: 0.9264886959471496, iteration: 181481
loss: 1.0152225494384766,grad_norm: 0.999999136450866, iteration: 181482
loss: 1.010501503944397,grad_norm: 0.9999991617006786, iteration: 181483
loss: 0.9996002912521362,grad_norm: 0.8157333351476614, iteration: 181484
loss: 1.015092134475708,grad_norm: 0.9999991829566843, iteration: 181485
loss: 0.9616332650184631,grad_norm: 0.8959853118737491, iteration: 181486
loss: 1.017613410949707,grad_norm: 0.9999992190283963, iteration: 181487
loss: 1.0269110202789307,grad_norm: 0.7706086444282622, iteration: 181488
loss: 0.9837177991867065,grad_norm: 0.8709164405030875, iteration: 181489
loss: 1.013653039932251,grad_norm: 0.996131452643926, iteration: 181490
loss: 1.0033341646194458,grad_norm: 0.9999991347666262, iteration: 181491
loss: 1.0226106643676758,grad_norm: 0.9999992532053684, iteration: 181492
loss: 0.9805464148521423,grad_norm: 0.906338328790797, iteration: 181493
loss: 0.9811532497406006,grad_norm: 0.9999991358635945, iteration: 181494
loss: 1.0255396366119385,grad_norm: 0.9999993036814333, iteration: 181495
loss: 0.9995071291923523,grad_norm: 0.9598776032597852, iteration: 181496
loss: 0.9819049835205078,grad_norm: 0.9489227085369529, iteration: 181497
loss: 1.0099384784698486,grad_norm: 0.9999991534172871, iteration: 181498
loss: 0.9700110554695129,grad_norm: 0.9239487104970538, iteration: 181499
loss: 1.0086655616760254,grad_norm: 0.9626320985571428, iteration: 181500
loss: 1.0124881267547607,grad_norm: 0.9938521105149654, iteration: 181501
loss: 1.0052229166030884,grad_norm: 0.9999991126119087, iteration: 181502
loss: 1.0230793952941895,grad_norm: 0.999999181105889, iteration: 181503
loss: 0.9754684567451477,grad_norm: 0.9596425914921124, iteration: 181504
loss: 1.013260841369629,grad_norm: 0.9999990108559665, iteration: 181505
loss: 1.0094187259674072,grad_norm: 0.9999992767454705, iteration: 181506
loss: 1.011336326599121,grad_norm: 0.9999994109106498, iteration: 181507
loss: 1.0430309772491455,grad_norm: 0.999999144928947, iteration: 181508
loss: 1.0388187170028687,grad_norm: 0.9999991226840735, iteration: 181509
loss: 1.0113009214401245,grad_norm: 0.9999991081062901, iteration: 181510
loss: 0.9422480463981628,grad_norm: 0.9999990387391058, iteration: 181511
loss: 0.993147611618042,grad_norm: 0.9999992288656746, iteration: 181512
loss: 0.9730408787727356,grad_norm: 0.8897636175945175, iteration: 181513
loss: 0.9741154313087463,grad_norm: 0.9999991124181942, iteration: 181514
loss: 1.0231740474700928,grad_norm: 0.9784341932431265, iteration: 181515
loss: 1.0023856163024902,grad_norm: 0.9279988768204939, iteration: 181516
loss: 1.0363162755966187,grad_norm: 0.9999991506023385, iteration: 181517
loss: 0.9971872568130493,grad_norm: 0.9999991783517145, iteration: 181518
loss: 0.9952538013458252,grad_norm: 0.8192325266295537, iteration: 181519
loss: 1.0220786333084106,grad_norm: 0.9567938851629462, iteration: 181520
loss: 0.9982913136482239,grad_norm: 0.9999990005825271, iteration: 181521
loss: 0.9853272438049316,grad_norm: 0.9544151911695687, iteration: 181522
loss: 1.0080021619796753,grad_norm: 0.8868321531550878, iteration: 181523
loss: 1.0500690937042236,grad_norm: 0.9999999508018864, iteration: 181524
loss: 1.0354464054107666,grad_norm: 0.999999196812703, iteration: 181525
loss: 0.988256573677063,grad_norm: 0.9999990818271777, iteration: 181526
loss: 1.003589153289795,grad_norm: 0.8728317809302466, iteration: 181527
loss: 1.0078226327896118,grad_norm: 0.9999992965166793, iteration: 181528
loss: 0.9923647046089172,grad_norm: 0.8969269407829478, iteration: 181529
loss: 0.9682630300521851,grad_norm: 0.9999990929825257, iteration: 181530
loss: 0.9994493126869202,grad_norm: 0.8451562820851154, iteration: 181531
loss: 1.0062744617462158,grad_norm: 0.877919079148406, iteration: 181532
loss: 1.0174139738082886,grad_norm: 0.9577369886257127, iteration: 181533
loss: 0.9893226027488708,grad_norm: 0.9705650462586762, iteration: 181534
loss: 1.020357370376587,grad_norm: 0.9929412783035727, iteration: 181535
loss: 1.0227502584457397,grad_norm: 0.999999165277639, iteration: 181536
loss: 1.0226606130599976,grad_norm: 0.9142427165802592, iteration: 181537
loss: 1.0452264547348022,grad_norm: 0.9999995072002336, iteration: 181538
loss: 1.0240905284881592,grad_norm: 0.999999122977551, iteration: 181539
loss: 1.0078791379928589,grad_norm: 0.9999991538866108, iteration: 181540
loss: 1.0399659872055054,grad_norm: 0.9999994414756868, iteration: 181541
loss: 1.0195341110229492,grad_norm: 0.9612196504795992, iteration: 181542
loss: 0.9887410998344421,grad_norm: 0.999999242599405, iteration: 181543
loss: 0.9838182330131531,grad_norm: 0.890213511543461, iteration: 181544
loss: 0.9807851910591125,grad_norm: 0.9999990206570726, iteration: 181545
loss: 0.9986158609390259,grad_norm: 0.9999991220114761, iteration: 181546
loss: 1.0004851818084717,grad_norm: 0.8255247861153732, iteration: 181547
loss: 0.9537386298179626,grad_norm: 0.9999989665820548, iteration: 181548
loss: 0.9649603366851807,grad_norm: 0.8637339141576487, iteration: 181549
loss: 1.0096004009246826,grad_norm: 0.9552660723548678, iteration: 181550
loss: 0.9944860339164734,grad_norm: 0.8811056266916167, iteration: 181551
loss: 1.0338380336761475,grad_norm: 0.9254067465439448, iteration: 181552
loss: 1.0849018096923828,grad_norm: 0.9185616675930245, iteration: 181553
loss: 0.9914411902427673,grad_norm: 0.9999991881451439, iteration: 181554
loss: 0.9626497030258179,grad_norm: 0.999998969974226, iteration: 181555
loss: 1.0023739337921143,grad_norm: 0.8530565788712088, iteration: 181556
loss: 0.953087329864502,grad_norm: 0.9768363407871735, iteration: 181557
loss: 0.982980489730835,grad_norm: 0.8557767651451454, iteration: 181558
loss: 1.0095479488372803,grad_norm: 0.9999993793269011, iteration: 181559
loss: 0.9753689765930176,grad_norm: 0.9075585209929994, iteration: 181560
loss: 0.9780533313751221,grad_norm: 0.9554541510638873, iteration: 181561
loss: 0.9712792038917542,grad_norm: 0.9423852792502617, iteration: 181562
loss: 1.0129354000091553,grad_norm: 0.985241772154253, iteration: 181563
loss: 1.0439438819885254,grad_norm: 0.9999991940526017, iteration: 181564
loss: 0.9915879964828491,grad_norm: 0.9254827303514168, iteration: 181565
loss: 1.0225861072540283,grad_norm: 0.9999998217039059, iteration: 181566
loss: 1.0141874551773071,grad_norm: 0.9999991490248914, iteration: 181567
loss: 1.0195214748382568,grad_norm: 0.9999992432988073, iteration: 181568
loss: 0.9887057542800903,grad_norm: 0.9070973242573649, iteration: 181569
loss: 0.9769874811172485,grad_norm: 0.9728520382201872, iteration: 181570
loss: 1.017154574394226,grad_norm: 0.9999991212787712, iteration: 181571
loss: 1.0330466032028198,grad_norm: 0.9314050198603525, iteration: 181572
loss: 1.008223295211792,grad_norm: 0.895422452699214, iteration: 181573
loss: 0.9980171322822571,grad_norm: 0.9999992614646154, iteration: 181574
loss: 0.974224328994751,grad_norm: 0.9999991123700975, iteration: 181575
loss: 0.987538754940033,grad_norm: 0.9435127111997708, iteration: 181576
loss: 1.001338005065918,grad_norm: 0.9186242058307663, iteration: 181577
loss: 1.0432206392288208,grad_norm: 0.9828722038733196, iteration: 181578
loss: 0.9476193785667419,grad_norm: 0.9836861760640914, iteration: 181579
loss: 0.9632105827331543,grad_norm: 0.9398808890268846, iteration: 181580
loss: 1.043033242225647,grad_norm: 0.9999992103002708, iteration: 181581
loss: 1.0095264911651611,grad_norm: 0.8199978824434456, iteration: 181582
loss: 0.9578569531440735,grad_norm: 0.8760191441407676, iteration: 181583
loss: 1.0183411836624146,grad_norm: 0.9999998702482724, iteration: 181584
loss: 0.9736097455024719,grad_norm: 0.9999990670925973, iteration: 181585
loss: 0.999200165271759,grad_norm: 0.9300911447042858, iteration: 181586
loss: 1.0134673118591309,grad_norm: 0.9999994231920913, iteration: 181587
loss: 1.0084388256072998,grad_norm: 0.8696284458648949, iteration: 181588
loss: 0.9954693913459778,grad_norm: 0.9003934045549085, iteration: 181589
loss: 0.9611140489578247,grad_norm: 0.9943189056853674, iteration: 181590
loss: 1.0053390264511108,grad_norm: 0.9999990563016852, iteration: 181591
loss: 1.0223850011825562,grad_norm: 0.9999990875203779, iteration: 181592
loss: 0.9610132575035095,grad_norm: 0.9999990355841479, iteration: 181593
loss: 1.0078281164169312,grad_norm: 0.9999989732026685, iteration: 181594
loss: 1.0024493932724,grad_norm: 0.9999991717137101, iteration: 181595
loss: 1.020216464996338,grad_norm: 0.9647084608026681, iteration: 181596
loss: 1.0231871604919434,grad_norm: 0.999999240620686, iteration: 181597
loss: 1.0149900913238525,grad_norm: 0.9170885916943459, iteration: 181598
loss: 0.9933071732521057,grad_norm: 0.9589891677085219, iteration: 181599
loss: 0.9849327802658081,grad_norm: 0.999999212153872, iteration: 181600
loss: 0.9588611721992493,grad_norm: 0.9572635028530979, iteration: 181601
loss: 1.00002920627594,grad_norm: 0.9491207038717495, iteration: 181602
loss: 1.0205191373825073,grad_norm: 0.8374416739307412, iteration: 181603
loss: 1.008940577507019,grad_norm: 0.9257722326499997, iteration: 181604
loss: 1.0206193923950195,grad_norm: 0.9999989852481314, iteration: 181605
loss: 1.0405869483947754,grad_norm: 0.9999989588313564, iteration: 181606
loss: 0.9826780557632446,grad_norm: 0.9999991480148933, iteration: 181607
loss: 1.0139316320419312,grad_norm: 0.9999991144154985, iteration: 181608
loss: 1.0252357721328735,grad_norm: 0.9999991231106423, iteration: 181609
loss: 1.0101799964904785,grad_norm: 0.8355555021641656, iteration: 181610
loss: 1.0226763486862183,grad_norm: 0.99999916238522, iteration: 181611
loss: 1.0050122737884521,grad_norm: 0.9643992452930086, iteration: 181612
loss: 0.9767208099365234,grad_norm: 0.999999133497457, iteration: 181613
loss: 1.0104987621307373,grad_norm: 0.9828431239015897, iteration: 181614
loss: 1.021785855293274,grad_norm: 0.9999992671488183, iteration: 181615
loss: 0.9991981983184814,grad_norm: 0.9765156123542225, iteration: 181616
loss: 1.0222272872924805,grad_norm: 0.9999995625185447, iteration: 181617
loss: 1.012094259262085,grad_norm: 0.9999991648465201, iteration: 181618
loss: 1.004294991493225,grad_norm: 0.9999991534234992, iteration: 181619
loss: 1.0241619348526,grad_norm: 0.950078990271318, iteration: 181620
loss: 0.999189019203186,grad_norm: 0.9477627211813633, iteration: 181621
loss: 0.9989054203033447,grad_norm: 0.8884063332970594, iteration: 181622
loss: 1.032814860343933,grad_norm: 0.9999991588575924, iteration: 181623
loss: 1.0164711475372314,grad_norm: 0.9999991329521423, iteration: 181624
loss: 1.0199625492095947,grad_norm: 0.9999993157047221, iteration: 181625
loss: 0.9898163080215454,grad_norm: 0.8578767039734361, iteration: 181626
loss: 1.0120166540145874,grad_norm: 0.9486626885831341, iteration: 181627
loss: 0.9603464007377625,grad_norm: 0.9999991688123611, iteration: 181628
loss: 0.9783487915992737,grad_norm: 0.999998956915662, iteration: 181629
loss: 0.9893887639045715,grad_norm: 0.9999990549793023, iteration: 181630
loss: 0.9893990159034729,grad_norm: 0.9999991333253937, iteration: 181631
loss: 0.9795584678649902,grad_norm: 0.8091105485383739, iteration: 181632
loss: 1.032615065574646,grad_norm: 0.9297390434252588, iteration: 181633
loss: 0.9927493333816528,grad_norm: 0.9999992016156056, iteration: 181634
loss: 0.9870142340660095,grad_norm: 0.8845967354261604, iteration: 181635
loss: 0.9782182574272156,grad_norm: 0.9999991376495186, iteration: 181636
loss: 0.9764590263366699,grad_norm: 0.9751890267108739, iteration: 181637
loss: 1.0547720193862915,grad_norm: 0.9999992012948332, iteration: 181638
loss: 1.0455560684204102,grad_norm: 0.9766346436400144, iteration: 181639
loss: 0.9637858867645264,grad_norm: 0.8964740711409028, iteration: 181640
loss: 1.0186810493469238,grad_norm: 0.8520401757821773, iteration: 181641
loss: 0.9895613789558411,grad_norm: 0.8765907989195294, iteration: 181642
loss: 1.0045408010482788,grad_norm: 0.9999994265781886, iteration: 181643
loss: 1.0066883563995361,grad_norm: 0.8247358465331736, iteration: 181644
loss: 1.0112944841384888,grad_norm: 0.9999992307879613, iteration: 181645
loss: 0.997820258140564,grad_norm: 0.9555990471158292, iteration: 181646
loss: 0.9607797265052795,grad_norm: 0.9004193872528533, iteration: 181647
loss: 0.9671886563301086,grad_norm: 0.9216709534302082, iteration: 181648
loss: 1.012384057044983,grad_norm: 0.9338400787033405, iteration: 181649
loss: 1.0368269681930542,grad_norm: 0.9999991742754615, iteration: 181650
loss: 0.9778974056243896,grad_norm: 0.999999218825484, iteration: 181651
loss: 0.9918453097343445,grad_norm: 0.9920068507719809, iteration: 181652
loss: 1.1008110046386719,grad_norm: 0.9999990230783973, iteration: 181653
loss: 0.9670407772064209,grad_norm: 0.9789242654403342, iteration: 181654
loss: 1.0153083801269531,grad_norm: 0.9822202272823666, iteration: 181655
loss: 0.9915251731872559,grad_norm: 0.9999990076970884, iteration: 181656
loss: 0.9875032305717468,grad_norm: 0.8736581548740371, iteration: 181657
loss: 0.9915742874145508,grad_norm: 0.9770269009287167, iteration: 181658
loss: 1.0153326988220215,grad_norm: 0.9999991977653884, iteration: 181659
loss: 0.9941595196723938,grad_norm: 0.753399249797194, iteration: 181660
loss: 0.9971749186515808,grad_norm: 0.9746519162334063, iteration: 181661
loss: 0.9709867835044861,grad_norm: 0.973122569954672, iteration: 181662
loss: 1.0353103876113892,grad_norm: 0.9146516480408574, iteration: 181663
loss: 1.0088069438934326,grad_norm: 0.9610245811126948, iteration: 181664
loss: 0.9861592650413513,grad_norm: 0.9319617451314256, iteration: 181665
loss: 0.9872995615005493,grad_norm: 0.9999991541731668, iteration: 181666
loss: 0.9984107613563538,grad_norm: 0.9999992763569522, iteration: 181667
loss: 1.0058722496032715,grad_norm: 0.9959961727870396, iteration: 181668
loss: 0.9353574514389038,grad_norm: 0.9999991022624041, iteration: 181669
loss: 1.0193055868148804,grad_norm: 0.9596750839431912, iteration: 181670
loss: 0.9855696558952332,grad_norm: 0.8767234316000614, iteration: 181671
loss: 1.019627332687378,grad_norm: 0.9999989596217308, iteration: 181672
loss: 0.985117495059967,grad_norm: 0.9999991357884228, iteration: 181673
loss: 1.0053315162658691,grad_norm: 0.9292942772296164, iteration: 181674
loss: 0.9955687522888184,grad_norm: 0.9999990798238088, iteration: 181675
loss: 1.0135124921798706,grad_norm: 0.9999991046301155, iteration: 181676
loss: 0.9959383010864258,grad_norm: 0.9840917021595798, iteration: 181677
loss: 1.0113662481307983,grad_norm: 0.9283260472132352, iteration: 181678
loss: 1.1311217546463013,grad_norm: 0.9999991005327787, iteration: 181679
loss: 1.0020171403884888,grad_norm: 0.9999993731167236, iteration: 181680
loss: 1.0106875896453857,grad_norm: 0.9794364031671685, iteration: 181681
loss: 0.9995756149291992,grad_norm: 0.9999990485889553, iteration: 181682
loss: 0.9992105960845947,grad_norm: 0.9836443197714919, iteration: 181683
loss: 1.0006608963012695,grad_norm: 0.9064465007362776, iteration: 181684
loss: 1.0078972578048706,grad_norm: 0.9999991533019391, iteration: 181685
loss: 1.02527916431427,grad_norm: 0.9999989283409059, iteration: 181686
loss: 1.0182234048843384,grad_norm: 0.9386465220659921, iteration: 181687
loss: 0.9600074887275696,grad_norm: 0.9343860141809326, iteration: 181688
loss: 1.0085725784301758,grad_norm: 0.9999990663079518, iteration: 181689
loss: 1.031840443611145,grad_norm: 0.999999543596228, iteration: 181690
loss: 0.9607352018356323,grad_norm: 0.999999044962035, iteration: 181691
loss: 1.0279043912887573,grad_norm: 0.9999989620220118, iteration: 181692
loss: 0.9989808201789856,grad_norm: 0.8934014082789603, iteration: 181693
loss: 1.014901876449585,grad_norm: 0.999999053075741, iteration: 181694
loss: 1.0031771659851074,grad_norm: 0.8824337770207452, iteration: 181695
loss: 1.0148202180862427,grad_norm: 0.8743026528176002, iteration: 181696
loss: 0.989936888217926,grad_norm: 0.9999990797136294, iteration: 181697
loss: 0.9846175909042358,grad_norm: 0.9892642094588144, iteration: 181698
loss: 0.9961234927177429,grad_norm: 0.8604742484429684, iteration: 181699
loss: 0.9946359395980835,grad_norm: 0.9999991386626157, iteration: 181700
loss: 0.9857158660888672,grad_norm: 0.9228272901544633, iteration: 181701
loss: 1.0101293325424194,grad_norm: 0.8768870092376615, iteration: 181702
loss: 0.9926879405975342,grad_norm: 0.9999989294357245, iteration: 181703
loss: 1.0083130598068237,grad_norm: 0.9999995565868088, iteration: 181704
loss: 1.1153756380081177,grad_norm: 0.9999990749853059, iteration: 181705
loss: 0.972527801990509,grad_norm: 0.962986897112112, iteration: 181706
loss: 1.0323314666748047,grad_norm: 0.999999110287067, iteration: 181707
loss: 0.9682706594467163,grad_norm: 0.9999991035810235, iteration: 181708
loss: 1.017511248588562,grad_norm: 0.9675299653551372, iteration: 181709
loss: 1.0033260583877563,grad_norm: 0.7914378746658447, iteration: 181710
loss: 0.9935452342033386,grad_norm: 0.9999991973214238, iteration: 181711
loss: 0.9846212267875671,grad_norm: 0.9999990972744263, iteration: 181712
loss: 0.9909389615058899,grad_norm: 0.9964373090330658, iteration: 181713
loss: 0.9659340977668762,grad_norm: 0.9351558436437437, iteration: 181714
loss: 1.006996512413025,grad_norm: 0.9999991867137993, iteration: 181715
loss: 0.9903614521026611,grad_norm: 0.9092802881417286, iteration: 181716
loss: 0.9961000680923462,grad_norm: 0.9999996689536037, iteration: 181717
loss: 1.022500991821289,grad_norm: 0.9967925462781422, iteration: 181718
loss: 0.9560036063194275,grad_norm: 0.9279111003653626, iteration: 181719
loss: 1.0230354070663452,grad_norm: 0.9999992364210136, iteration: 181720
loss: 1.0571341514587402,grad_norm: 0.9999994067709082, iteration: 181721
loss: 0.9933122396469116,grad_norm: 0.9999993688149557, iteration: 181722
loss: 0.9881452918052673,grad_norm: 0.9082243374512922, iteration: 181723
loss: 1.0167045593261719,grad_norm: 0.99999906693962, iteration: 181724
loss: 1.090457797050476,grad_norm: 0.9999999625908246, iteration: 181725
loss: 1.0102269649505615,grad_norm: 0.8411226077026734, iteration: 181726
loss: 1.0020521879196167,grad_norm: 0.9484187989769466, iteration: 181727
loss: 1.0284610986709595,grad_norm: 0.9999989424702005, iteration: 181728
loss: 1.112205982208252,grad_norm: 0.9999995348132195, iteration: 181729
loss: 1.0044249296188354,grad_norm: 0.8767562506950487, iteration: 181730
loss: 0.9801197052001953,grad_norm: 0.9621619664140126, iteration: 181731
loss: 0.9625585675239563,grad_norm: 0.9999991622291047, iteration: 181732
loss: 1.0185281038284302,grad_norm: 0.9338391672309646, iteration: 181733
loss: 1.030876636505127,grad_norm: 0.9551642369624589, iteration: 181734
loss: 1.01125168800354,grad_norm: 0.9097822187897489, iteration: 181735
loss: 0.9919826984405518,grad_norm: 0.949288528729288, iteration: 181736
loss: 1.0900241136550903,grad_norm: 0.9999996223230748, iteration: 181737
loss: 1.0268402099609375,grad_norm: 0.9999989884024999, iteration: 181738
loss: 1.0148288011550903,grad_norm: 0.8196653358209905, iteration: 181739
loss: 0.9572464227676392,grad_norm: 0.9420891088132499, iteration: 181740
loss: 0.9872880578041077,grad_norm: 0.9180816023749432, iteration: 181741
loss: 0.9746124744415283,grad_norm: 0.8520934870589993, iteration: 181742
loss: 0.9948269128799438,grad_norm: 0.9999991986576798, iteration: 181743
loss: 0.937521755695343,grad_norm: 0.9999991029205785, iteration: 181744
loss: 1.03115713596344,grad_norm: 0.9999990074861438, iteration: 181745
loss: 0.9859102964401245,grad_norm: 0.9999990361506359, iteration: 181746
loss: 1.2618542909622192,grad_norm: 0.999999127342705, iteration: 181747
loss: 1.048231601715088,grad_norm: 0.8916302862011397, iteration: 181748
loss: 1.0357170104980469,grad_norm: 0.9999991193593789, iteration: 181749
loss: 0.9827300906181335,grad_norm: 0.9999992170542661, iteration: 181750
loss: 1.0067596435546875,grad_norm: 0.999999044045481, iteration: 181751
loss: 1.0111767053604126,grad_norm: 0.9999991939799214, iteration: 181752
loss: 1.0083311796188354,grad_norm: 0.8864126639771498, iteration: 181753
loss: 0.9816062450408936,grad_norm: 0.9480060174925939, iteration: 181754
loss: 1.1584407091140747,grad_norm: 0.9999994921452706, iteration: 181755
loss: 0.976728081703186,grad_norm: 0.9999988654981272, iteration: 181756
loss: 0.9780827760696411,grad_norm: 0.9089715526936989, iteration: 181757
loss: 0.9890692234039307,grad_norm: 0.9999990362583675, iteration: 181758
loss: 0.9885571599006653,grad_norm: 0.9999995688920601, iteration: 181759
loss: 1.0201075077056885,grad_norm: 0.9999991489558302, iteration: 181760
loss: 0.9906300902366638,grad_norm: 0.999999050487372, iteration: 181761
loss: 0.9845842719078064,grad_norm: 0.9999993428537849, iteration: 181762
loss: 1.0473233461380005,grad_norm: 0.9999991987369704, iteration: 181763
loss: 1.0046778917312622,grad_norm: 0.9976773206547379, iteration: 181764
loss: 0.9898603558540344,grad_norm: 0.8068691011333209, iteration: 181765
loss: 0.997511088848114,grad_norm: 0.9909593701343178, iteration: 181766
loss: 0.9858174324035645,grad_norm: 0.9635345420122013, iteration: 181767
loss: 1.2676095962524414,grad_norm: 0.9999997709193797, iteration: 181768
loss: 1.0091902017593384,grad_norm: 0.9401050194844331, iteration: 181769
loss: 1.0044949054718018,grad_norm: 0.9999991020214414, iteration: 181770
loss: 0.9911954998970032,grad_norm: 0.9999990398748203, iteration: 181771
loss: 1.0307937860488892,grad_norm: 0.9999990873753242, iteration: 181772
loss: 1.0092235803604126,grad_norm: 0.9555897317061185, iteration: 181773
loss: 1.00946843624115,grad_norm: 0.9999990962857022, iteration: 181774
loss: 0.9898897409439087,grad_norm: 0.9999991257872153, iteration: 181775
loss: 1.0222861766815186,grad_norm: 0.8506665269805989, iteration: 181776
loss: 1.0560182332992554,grad_norm: 0.9999991230832326, iteration: 181777
loss: 0.9804853200912476,grad_norm: 0.9861320207737103, iteration: 181778
loss: 1.004014253616333,grad_norm: 0.9461309478088289, iteration: 181779
loss: 0.9892776012420654,grad_norm: 0.9999990902702416, iteration: 181780
loss: 1.0091922283172607,grad_norm: 0.999999366851736, iteration: 181781
loss: 0.9714011549949646,grad_norm: 0.9129696187338644, iteration: 181782
loss: 1.0124677419662476,grad_norm: 0.9999990613874491, iteration: 181783
loss: 1.0270946025848389,grad_norm: 0.900889310247146, iteration: 181784
loss: 1.0032716989517212,grad_norm: 0.9525469753156796, iteration: 181785
loss: 0.9405882954597473,grad_norm: 0.8561318542593783, iteration: 181786
loss: 0.9786105155944824,grad_norm: 0.9999993957361091, iteration: 181787
loss: 1.0074174404144287,grad_norm: 0.9999990653073813, iteration: 181788
loss: 0.9994058012962341,grad_norm: 0.9999990137433685, iteration: 181789
loss: 1.0390738248825073,grad_norm: 0.9403105159150742, iteration: 181790
loss: 1.0516605377197266,grad_norm: 0.9999992544427793, iteration: 181791
loss: 1.0001153945922852,grad_norm: 0.9612472119651708, iteration: 181792
loss: 0.9943192005157471,grad_norm: 0.8529902557229272, iteration: 181793
loss: 1.006003499031067,grad_norm: 0.9366124759541872, iteration: 181794
loss: 1.0213202238082886,grad_norm: 0.9999991695501859, iteration: 181795
loss: 0.9758085608482361,grad_norm: 0.8917249322125449, iteration: 181796
loss: 0.9949349761009216,grad_norm: 0.9400959756214037, iteration: 181797
loss: 1.017083764076233,grad_norm: 0.9999990334635652, iteration: 181798
loss: 0.932358980178833,grad_norm: 0.9999989976461809, iteration: 181799
loss: 1.0129051208496094,grad_norm: 0.9665121013985769, iteration: 181800
loss: 1.0259263515472412,grad_norm: 0.9999995584207239, iteration: 181801
loss: 0.968464195728302,grad_norm: 0.9999992183113352, iteration: 181802
loss: 0.9976481795310974,grad_norm: 0.9999992609951371, iteration: 181803
loss: 0.9894940853118896,grad_norm: 0.9999991942032281, iteration: 181804
loss: 1.0211946964263916,grad_norm: 0.9999992632387636, iteration: 181805
loss: 1.0333343744277954,grad_norm: 0.9999990387190851, iteration: 181806
loss: 1.286400556564331,grad_norm: 1.0000000641847944, iteration: 181807
loss: 1.0450197458267212,grad_norm: 0.9999992787067363, iteration: 181808
loss: 1.0037651062011719,grad_norm: 0.9999991606539482, iteration: 181809
loss: 1.0901094675064087,grad_norm: 0.999999218680112, iteration: 181810
loss: 0.9596037864685059,grad_norm: 0.9182680380830899, iteration: 181811
loss: 1.0784155130386353,grad_norm: 0.9999992608260276, iteration: 181812
loss: 0.9973900318145752,grad_norm: 0.9785779601884089, iteration: 181813
loss: 0.9918906092643738,grad_norm: 0.9757250750573236, iteration: 181814
loss: 0.9999136328697205,grad_norm: 0.9999997259997979, iteration: 181815
loss: 1.0641809701919556,grad_norm: 0.999999538314282, iteration: 181816
loss: 1.1259738206863403,grad_norm: 0.9999998316640593, iteration: 181817
loss: 1.0002089738845825,grad_norm: 0.9999991200676039, iteration: 181818
loss: 0.9958754777908325,grad_norm: 0.9999990530526165, iteration: 181819
loss: 1.0201553106307983,grad_norm: 0.9535691645268108, iteration: 181820
loss: 1.026239037513733,grad_norm: 0.9999990751661632, iteration: 181821
loss: 1.0395535230636597,grad_norm: 0.9999992829271908, iteration: 181822
loss: 1.0075985193252563,grad_norm: 0.9999991906187982, iteration: 181823
loss: 0.9996843338012695,grad_norm: 0.9960830127710796, iteration: 181824
loss: 1.0095555782318115,grad_norm: 0.992792913156504, iteration: 181825
loss: 0.9905593991279602,grad_norm: 0.8102166762121407, iteration: 181826
loss: 0.992146372795105,grad_norm: 0.9999990028031591, iteration: 181827
loss: 0.9966471791267395,grad_norm: 0.9585866555764109, iteration: 181828
loss: 1.000508427619934,grad_norm: 0.9999995079085585, iteration: 181829
loss: 1.0013940334320068,grad_norm: 0.842225111425832, iteration: 181830
loss: 1.0828806161880493,grad_norm: 0.9999997491604079, iteration: 181831
loss: 1.0230916738510132,grad_norm: 0.9593090746777178, iteration: 181832
loss: 1.0454875230789185,grad_norm: 0.9095343082197497, iteration: 181833
loss: 0.9767616391181946,grad_norm: 0.9193236096144319, iteration: 181834
loss: 0.9713649153709412,grad_norm: 0.8810672180497485, iteration: 181835
loss: 0.9854608178138733,grad_norm: 0.9999991419356066, iteration: 181836
loss: 0.9791858196258545,grad_norm: 0.9654385525330804, iteration: 181837
loss: 1.1126906871795654,grad_norm: 0.9999992678078277, iteration: 181838
loss: 1.0047321319580078,grad_norm: 0.9999991583229348, iteration: 181839
loss: 0.9937847256660461,grad_norm: 0.9626748869016039, iteration: 181840
loss: 0.9975801110267639,grad_norm: 0.9999991382141592, iteration: 181841
loss: 0.9921799302101135,grad_norm: 0.7927341765795701, iteration: 181842
loss: 0.9994978904724121,grad_norm: 0.9663375672633862, iteration: 181843
loss: 1.1261037588119507,grad_norm: 0.999999815971094, iteration: 181844
loss: 0.9798668026924133,grad_norm: 0.9999991051698303, iteration: 181845
loss: 1.002791166305542,grad_norm: 0.8605829440578145, iteration: 181846
loss: 1.0355592966079712,grad_norm: 0.9529622627666157, iteration: 181847
loss: 1.1523964405059814,grad_norm: 0.9999994223986908, iteration: 181848
loss: 0.9793570041656494,grad_norm: 0.9911171777793997, iteration: 181849
loss: 1.0089304447174072,grad_norm: 0.9999991408846509, iteration: 181850
loss: 0.9574680924415588,grad_norm: 0.9661845575418205, iteration: 181851
loss: 0.9930199384689331,grad_norm: 0.9999990542200747, iteration: 181852
loss: 0.9951919317245483,grad_norm: 0.9345629290018991, iteration: 181853
loss: 1.035563588142395,grad_norm: 0.999999026551507, iteration: 181854
loss: 0.9869468808174133,grad_norm: 0.9265851320378001, iteration: 181855
loss: 0.9998598098754883,grad_norm: 0.9999995487296477, iteration: 181856
loss: 0.9869667887687683,grad_norm: 0.9713136953331929, iteration: 181857
loss: 1.0636388063430786,grad_norm: 0.9999998694458172, iteration: 181858
loss: 0.9645141363143921,grad_norm: 0.9999997060822574, iteration: 181859
loss: 0.9889340400695801,grad_norm: 0.999999179110414, iteration: 181860
loss: 0.9989018440246582,grad_norm: 0.803093395283698, iteration: 181861
loss: 0.9992499947547913,grad_norm: 0.8550211990361918, iteration: 181862
loss: 1.0116395950317383,grad_norm: 0.8933844795166845, iteration: 181863
loss: 1.1337531805038452,grad_norm: 0.9999995892531911, iteration: 181864
loss: 1.0488522052764893,grad_norm: 0.9999998235329722, iteration: 181865
loss: 1.1057299375534058,grad_norm: 0.9999992742220851, iteration: 181866
loss: 1.046610951423645,grad_norm: 0.9999990432514936, iteration: 181867
loss: 1.3362163305282593,grad_norm: 0.9999996427880724, iteration: 181868
loss: 1.0011472702026367,grad_norm: 0.9999991020312473, iteration: 181869
loss: 1.0816216468811035,grad_norm: 0.935908335721428, iteration: 181870
loss: 1.1388604640960693,grad_norm: 0.9999992125952331, iteration: 181871
loss: 0.985880970954895,grad_norm: 0.9804332458200549, iteration: 181872
loss: 1.0975816249847412,grad_norm: 0.9999990052074096, iteration: 181873
loss: 1.1682958602905273,grad_norm: 0.9999996292064419, iteration: 181874
loss: 1.0761717557907104,grad_norm: 0.921761551240193, iteration: 181875
loss: 1.1432008743286133,grad_norm: 0.999999366354349, iteration: 181876
loss: 1.157641887664795,grad_norm: 0.9999999259924071, iteration: 181877
loss: 1.0285588502883911,grad_norm: 0.9999992746998312, iteration: 181878
loss: 1.0166947841644287,grad_norm: 0.9999993938514159, iteration: 181879
loss: 1.0217077732086182,grad_norm: 0.8920357007735398, iteration: 181880
loss: 1.2314753532409668,grad_norm: 0.9999998262678141, iteration: 181881
loss: 1.1052417755126953,grad_norm: 0.9999999183181572, iteration: 181882
loss: 1.0378870964050293,grad_norm: 0.9999992967507217, iteration: 181883
loss: 1.0795180797576904,grad_norm: 0.9999991154545961, iteration: 181884
loss: 1.007393479347229,grad_norm: 0.8364154997535304, iteration: 181885
loss: 0.9587007164955139,grad_norm: 0.9999993108789315, iteration: 181886
loss: 1.2396910190582275,grad_norm: 0.9999998834580274, iteration: 181887
loss: 1.030238151550293,grad_norm: 0.9999992713087194, iteration: 181888
loss: 1.20177161693573,grad_norm: 0.9999993100693979, iteration: 181889
loss: 0.9899294376373291,grad_norm: 0.9233165035754712, iteration: 181890
loss: 1.0160868167877197,grad_norm: 0.9295276857817082, iteration: 181891
loss: 1.0735567808151245,grad_norm: 0.9999996261674334, iteration: 181892
loss: 0.9818954467773438,grad_norm: 0.9168320169238959, iteration: 181893
loss: 1.0521385669708252,grad_norm: 0.999999842422958, iteration: 181894
loss: 1.029109239578247,grad_norm: 0.9999990965366337, iteration: 181895
loss: 1.0829639434814453,grad_norm: 0.9999990505322406, iteration: 181896
loss: 1.018513798713684,grad_norm: 0.9999991240490638, iteration: 181897
loss: 1.0090174674987793,grad_norm: 0.7993794348858717, iteration: 181898
loss: 1.0357829332351685,grad_norm: 0.9999996805009268, iteration: 181899
loss: 1.0609201192855835,grad_norm: 0.7909445450593023, iteration: 181900
loss: 1.0636780261993408,grad_norm: 0.999999192339678, iteration: 181901
loss: 1.1177340745925903,grad_norm: 0.9999991057310695, iteration: 181902
loss: 1.2227669954299927,grad_norm: 1.0000000071970097, iteration: 181903
loss: 0.9983242154121399,grad_norm: 0.9999990667737405, iteration: 181904
loss: 0.9888575673103333,grad_norm: 0.9999994511027687, iteration: 181905
loss: 1.1999608278274536,grad_norm: 0.9999997043457448, iteration: 181906
loss: 0.9946836829185486,grad_norm: 0.9889696485909397, iteration: 181907
loss: 1.007705569267273,grad_norm: 0.9999991698642424, iteration: 181908
loss: 0.9851130247116089,grad_norm: 0.9999991236158587, iteration: 181909
loss: 1.0389925241470337,grad_norm: 0.9999993877470437, iteration: 181910
loss: 1.0408707857131958,grad_norm: 0.999999038998648, iteration: 181911
loss: 1.1314918994903564,grad_norm: 0.9999995739896375, iteration: 181912
loss: 1.0698105096817017,grad_norm: 0.9999994948909722, iteration: 181913
loss: 1.1049742698669434,grad_norm: 0.9999991144217409, iteration: 181914
loss: 1.0819599628448486,grad_norm: 0.999999629320881, iteration: 181915
loss: 0.990053653717041,grad_norm: 0.9999991760790574, iteration: 181916
loss: 1.1037081480026245,grad_norm: 0.9999989872973097, iteration: 181917
loss: 1.0513511896133423,grad_norm: 0.9999993752991473, iteration: 181918
loss: 1.0782036781311035,grad_norm: 0.9999998267540924, iteration: 181919
loss: 1.0054367780685425,grad_norm: 0.9999991693279352, iteration: 181920
loss: 1.044126033782959,grad_norm: 0.999999244099639, iteration: 181921
loss: 1.1736407279968262,grad_norm: 0.9999998610446467, iteration: 181922
loss: 1.0045281648635864,grad_norm: 0.9968416563074693, iteration: 181923
loss: 1.0210169553756714,grad_norm: 0.999999074952302, iteration: 181924
loss: 1.168013095855713,grad_norm: 0.9999999751852745, iteration: 181925
loss: 1.1278618574142456,grad_norm: 0.9999991337294691, iteration: 181926
loss: 1.3063311576843262,grad_norm: 0.9999997573658105, iteration: 181927
loss: 1.021914005279541,grad_norm: 0.9999991721115035, iteration: 181928
loss: 1.150889277458191,grad_norm: 0.9999995451202056, iteration: 181929
loss: 1.084954857826233,grad_norm: 0.9999992481406301, iteration: 181930
loss: 1.0420217514038086,grad_norm: 0.9151457734006682, iteration: 181931
loss: 1.0200209617614746,grad_norm: 0.9999991307882236, iteration: 181932
loss: 1.0175037384033203,grad_norm: 0.9999991697129464, iteration: 181933
loss: 0.9923045039176941,grad_norm: 0.9312464241393759, iteration: 181934
loss: 1.1012123823165894,grad_norm: 0.9999995283908913, iteration: 181935
loss: 1.029490351676941,grad_norm: 0.9999992698410275, iteration: 181936
loss: 1.00664484500885,grad_norm: 0.9999991950681849, iteration: 181937
loss: 1.0150721073150635,grad_norm: 0.9999991652073886, iteration: 181938
loss: 0.9962744116783142,grad_norm: 0.9243241595407011, iteration: 181939
loss: 1.1423094272613525,grad_norm: 0.9999992062655952, iteration: 181940
loss: 1.041231393814087,grad_norm: 0.9999994298264552, iteration: 181941
loss: 1.0671429634094238,grad_norm: 0.9999997798085772, iteration: 181942
loss: 1.0026310682296753,grad_norm: 0.9999994838489364, iteration: 181943
loss: 1.056662917137146,grad_norm: 0.9999994211563257, iteration: 181944
loss: 1.0316396951675415,grad_norm: 0.9428619878704805, iteration: 181945
loss: 1.0783343315124512,grad_norm: 0.9999990956747785, iteration: 181946
loss: 0.992453932762146,grad_norm: 0.9930919263822668, iteration: 181947
loss: 1.1024821996688843,grad_norm: 0.9999996428819989, iteration: 181948
loss: 1.0437496900558472,grad_norm: 0.9999992960732973, iteration: 181949
loss: 1.0031225681304932,grad_norm: 0.9999990984664603, iteration: 181950
loss: 1.0164690017700195,grad_norm: 0.9222858706593422, iteration: 181951
loss: 0.9880242347717285,grad_norm: 0.9999992817871444, iteration: 181952
loss: 1.0182182788848877,grad_norm: 0.9999996980251853, iteration: 181953
loss: 1.028769850730896,grad_norm: 0.9999999309750541, iteration: 181954
loss: 1.068969964981079,grad_norm: 0.9999990285691472, iteration: 181955
loss: 1.0846108198165894,grad_norm: 0.9999992372069243, iteration: 181956
loss: 0.9890352487564087,grad_norm: 0.9999998780132913, iteration: 181957
loss: 1.075835108757019,grad_norm: 0.9999998195707054, iteration: 181958
loss: 1.001004934310913,grad_norm: 0.9999991776264125, iteration: 181959
loss: 1.0730901956558228,grad_norm: 0.999999413372321, iteration: 181960
loss: 1.12100350856781,grad_norm: 0.999999614131021, iteration: 181961
loss: 1.0185335874557495,grad_norm: 0.980179166767112, iteration: 181962
loss: 1.086299180984497,grad_norm: 0.9999998400442529, iteration: 181963
loss: 1.2215615510940552,grad_norm: 0.9999999103531558, iteration: 181964
loss: 1.0827951431274414,grad_norm: 0.9999993623888216, iteration: 181965
loss: 1.053187608718872,grad_norm: 0.999999568751705, iteration: 181966
loss: 1.0025254487991333,grad_norm: 1.0000000313563606, iteration: 181967
loss: 0.9830305576324463,grad_norm: 0.8585391036884891, iteration: 181968
loss: 0.9982219934463501,grad_norm: 0.9455979158976943, iteration: 181969
loss: 1.1558036804199219,grad_norm: 0.9999997145581406, iteration: 181970
loss: 1.0693392753601074,grad_norm: 0.9999996348568306, iteration: 181971
loss: 1.0813453197479248,grad_norm: 0.9999994826808646, iteration: 181972
loss: 1.0015431642532349,grad_norm: 0.9999992184335708, iteration: 181973
loss: 1.0915369987487793,grad_norm: 0.9999995932506758, iteration: 181974
loss: 1.0459692478179932,grad_norm: 0.9999996266225959, iteration: 181975
loss: 1.0711164474487305,grad_norm: 0.9999998561483292, iteration: 181976
loss: 1.0412318706512451,grad_norm: 0.9999997444402634, iteration: 181977
loss: 1.024243950843811,grad_norm: 0.9999992770071472, iteration: 181978
loss: 1.0069555044174194,grad_norm: 0.999999117081171, iteration: 181979
loss: 1.053601861000061,grad_norm: 0.9999993092814002, iteration: 181980
loss: 1.1157439947128296,grad_norm: 0.9999996758041911, iteration: 181981
loss: 1.0540897846221924,grad_norm: 0.9999996205637413, iteration: 181982
loss: 1.0837550163269043,grad_norm: 0.9999993559655886, iteration: 181983
loss: 1.069944143295288,grad_norm: 0.9999994016752645, iteration: 181984
loss: 1.1097474098205566,grad_norm: 0.9999993070302499, iteration: 181985
loss: 1.081828236579895,grad_norm: 0.9999997103248114, iteration: 181986
loss: 1.124323844909668,grad_norm: 0.999999881126048, iteration: 181987
loss: 1.1102238893508911,grad_norm: 0.9999993616753239, iteration: 181988
loss: 1.1393651962280273,grad_norm: 0.9999996029070535, iteration: 181989
loss: 0.9806589484214783,grad_norm: 0.9734540803408454, iteration: 181990
loss: 1.1395518779754639,grad_norm: 0.9999993856460593, iteration: 181991
loss: 1.1182585954666138,grad_norm: 0.9999997014953558, iteration: 181992
loss: 1.110268235206604,grad_norm: 0.9999998086161593, iteration: 181993
loss: 1.1243314743041992,grad_norm: 0.999999643877138, iteration: 181994
loss: 1.0418355464935303,grad_norm: 0.9999991870016418, iteration: 181995
loss: 1.1174074411392212,grad_norm: 0.999999960230542, iteration: 181996
loss: 1.1250046491622925,grad_norm: 0.9999995610225958, iteration: 181997
loss: 0.9934797883033752,grad_norm: 0.9581301938348744, iteration: 181998
loss: 1.0249333381652832,grad_norm: 0.9999993489675992, iteration: 181999
loss: 0.9996572136878967,grad_norm: 0.9999992830718796, iteration: 182000
loss: 1.064247965812683,grad_norm: 0.9999992243509965, iteration: 182001
loss: 1.139158844947815,grad_norm: 0.9999995307883578, iteration: 182002
loss: 1.2107456922531128,grad_norm: 0.9999998624856573, iteration: 182003
loss: 1.09420645236969,grad_norm: 0.9999998514845604, iteration: 182004
loss: 1.161216378211975,grad_norm: 0.9999995968952259, iteration: 182005
loss: 1.0182583332061768,grad_norm: 0.999999482365725, iteration: 182006
loss: 1.2311259508132935,grad_norm: 0.9999998403091628, iteration: 182007
loss: 1.1205425262451172,grad_norm: 0.9999997588501263, iteration: 182008
loss: 1.1091766357421875,grad_norm: 0.9999996982045466, iteration: 182009
loss: 1.3146705627441406,grad_norm: 0.9999995904680279, iteration: 182010
loss: 1.138874888420105,grad_norm: 0.9999996498614669, iteration: 182011
loss: 1.198014736175537,grad_norm: 0.9999998736093003, iteration: 182012
loss: 1.1356561183929443,grad_norm: 0.999999236665055, iteration: 182013
loss: 1.2745747566223145,grad_norm: 0.9999995545565311, iteration: 182014
loss: 1.0728492736816406,grad_norm: 0.9999991904078859, iteration: 182015
loss: 1.2721425294876099,grad_norm: 0.9999998111207364, iteration: 182016
loss: 1.1318628787994385,grad_norm: 0.9999999745751658, iteration: 182017
loss: 1.2073075771331787,grad_norm: 0.9999997875637032, iteration: 182018
loss: 1.1680724620819092,grad_norm: 0.999999357266163, iteration: 182019
loss: 1.1151043176651,grad_norm: 0.9999996646218926, iteration: 182020
loss: 1.0758252143859863,grad_norm: 0.9999999274859908, iteration: 182021
loss: 1.194246530532837,grad_norm: 0.9999995379477061, iteration: 182022
loss: 1.2662898302078247,grad_norm: 0.9999998293052138, iteration: 182023
loss: 1.1780128479003906,grad_norm: 0.9999997984546721, iteration: 182024
loss: 1.0980786085128784,grad_norm: 0.999999319454934, iteration: 182025
loss: 1.1282426118850708,grad_norm: 1.000000024695722, iteration: 182026
loss: 1.0499542951583862,grad_norm: 0.999999213196316, iteration: 182027
loss: 1.1569663286209106,grad_norm: 0.9999999598287053, iteration: 182028
loss: 1.260021448135376,grad_norm: 0.9999997240514519, iteration: 182029
loss: 1.1826099157333374,grad_norm: 0.9999996291821839, iteration: 182030
loss: 1.0892397165298462,grad_norm: 0.9999991135817621, iteration: 182031
loss: 1.1361521482467651,grad_norm: 0.9999998950921616, iteration: 182032
loss: 1.0330299139022827,grad_norm: 0.9999993348561605, iteration: 182033
loss: 1.0758321285247803,grad_norm: 0.9999993373374296, iteration: 182034
loss: 1.1048576831817627,grad_norm: 0.9999994730844755, iteration: 182035
loss: 1.2698454856872559,grad_norm: 0.9999999675154143, iteration: 182036
loss: 1.112694263458252,grad_norm: 0.9999995016704963, iteration: 182037
loss: 1.1467996835708618,grad_norm: 0.9999996826988711, iteration: 182038
loss: 1.065785527229309,grad_norm: 0.9999999643291906, iteration: 182039
loss: 1.0976500511169434,grad_norm: 0.9999996901493247, iteration: 182040
loss: 1.1618682146072388,grad_norm: 0.999999158745122, iteration: 182041
loss: 1.0648301839828491,grad_norm: 0.9999992501492753, iteration: 182042
loss: 1.0968286991119385,grad_norm: 0.9999992168834027, iteration: 182043
loss: 1.249687671661377,grad_norm: 0.9999999766860814, iteration: 182044
loss: 1.0205228328704834,grad_norm: 0.9999997660820898, iteration: 182045
loss: 1.0405794382095337,grad_norm: 0.9999992146720799, iteration: 182046
loss: 0.9957796931266785,grad_norm: 0.9999990614873767, iteration: 182047
loss: 1.1457641124725342,grad_norm: 0.9999996925512361, iteration: 182048
loss: 1.1476976871490479,grad_norm: 0.9999997248367298, iteration: 182049
loss: 1.0208863019943237,grad_norm: 0.9999995403782846, iteration: 182050
loss: 1.1789758205413818,grad_norm: 0.999999232505055, iteration: 182051
loss: 1.039074182510376,grad_norm: 0.9999997866165804, iteration: 182052
loss: 1.222751498222351,grad_norm: 0.9999997848170784, iteration: 182053
loss: 1.0726318359375,grad_norm: 0.9999995309247871, iteration: 182054
loss: 1.0431299209594727,grad_norm: 0.999999769526241, iteration: 182055
loss: 1.115933895111084,grad_norm: 0.9999993658190154, iteration: 182056
loss: 1.073805332183838,grad_norm: 0.9999992911477169, iteration: 182057
loss: 1.1407897472381592,grad_norm: 0.9999995159675606, iteration: 182058
loss: 1.0022915601730347,grad_norm: 0.9252688015475073, iteration: 182059
loss: 1.1804972887039185,grad_norm: 0.9999993204607619, iteration: 182060
loss: 1.0451014041900635,grad_norm: 0.9999999542822342, iteration: 182061
loss: 1.071365475654602,grad_norm: 0.9999990378065057, iteration: 182062
loss: 1.0421783924102783,grad_norm: 0.9999990547241426, iteration: 182063
loss: 1.1658625602722168,grad_norm: 0.9999999153223821, iteration: 182064
loss: 1.0085047483444214,grad_norm: 0.9999996305775415, iteration: 182065
loss: 1.0309661626815796,grad_norm: 0.890536240635559, iteration: 182066
loss: 1.0455396175384521,grad_norm: 0.999999232558152, iteration: 182067
loss: 1.0800659656524658,grad_norm: 0.9999992359080997, iteration: 182068
loss: 1.045502781867981,grad_norm: 0.9999992813057066, iteration: 182069
loss: 1.1095294952392578,grad_norm: 0.9999998985147617, iteration: 182070
loss: 0.9887133836746216,grad_norm: 0.9692103338376685, iteration: 182071
loss: 1.0206347703933716,grad_norm: 0.9999992550273029, iteration: 182072
loss: 1.0353288650512695,grad_norm: 0.9999993231507902, iteration: 182073
loss: 1.0246524810791016,grad_norm: 0.9999991332820488, iteration: 182074
loss: 1.138372540473938,grad_norm: 0.9999997106637304, iteration: 182075
loss: 1.0997955799102783,grad_norm: 0.9999998438566092, iteration: 182076
loss: 1.0296992063522339,grad_norm: 0.999999327830481, iteration: 182077
loss: 1.101431965827942,grad_norm: 0.9999993159321815, iteration: 182078
loss: 1.0318892002105713,grad_norm: 0.9999992425729258, iteration: 182079
loss: 0.9890221357345581,grad_norm: 0.8970199598992387, iteration: 182080
loss: 0.9742943644523621,grad_norm: 0.86623719402603, iteration: 182081
loss: 1.0555230379104614,grad_norm: 0.9999994810511011, iteration: 182082
loss: 0.9945507049560547,grad_norm: 0.9999990898492516, iteration: 182083
loss: 1.03385591506958,grad_norm: 0.9999990482319928, iteration: 182084
loss: 1.001033067703247,grad_norm: 0.9999989892133986, iteration: 182085
loss: 1.0950947999954224,grad_norm: 0.999999197982149, iteration: 182086
loss: 0.9503636956214905,grad_norm: 0.9999990762473406, iteration: 182087
loss: 1.0538992881774902,grad_norm: 0.9999994312077055, iteration: 182088
loss: 1.0371328592300415,grad_norm: 0.9481760956356411, iteration: 182089
loss: 1.0073823928833008,grad_norm: 0.9999991208958968, iteration: 182090
loss: 1.104325771331787,grad_norm: 0.9999997356852006, iteration: 182091
loss: 1.0437268018722534,grad_norm: 0.9999997365267379, iteration: 182092
loss: 1.0148823261260986,grad_norm: 0.9985537537467272, iteration: 182093
loss: 0.9537967443466187,grad_norm: 0.9999990966995002, iteration: 182094
loss: 0.9719973206520081,grad_norm: 0.9999992104129485, iteration: 182095
loss: 1.0406060218811035,grad_norm: 0.9855758890361833, iteration: 182096
loss: 1.0308336019515991,grad_norm: 0.9999999339801204, iteration: 182097
loss: 1.0123165845870972,grad_norm: 0.9914499749758946, iteration: 182098
loss: 1.1555638313293457,grad_norm: 0.9999995785097474, iteration: 182099
loss: 1.0170797109603882,grad_norm: 0.8597607070389738, iteration: 182100
loss: 1.006791353225708,grad_norm: 0.9999993716905816, iteration: 182101
loss: 0.9574624300003052,grad_norm: 0.872322096781704, iteration: 182102
loss: 1.0032647848129272,grad_norm: 0.9995177344374668, iteration: 182103
loss: 1.0745444297790527,grad_norm: 0.9482888431880532, iteration: 182104
loss: 0.9821419715881348,grad_norm: 0.9999992396570102, iteration: 182105
loss: 1.0306501388549805,grad_norm: 0.9999990778615536, iteration: 182106
loss: 0.9833760857582092,grad_norm: 0.9999990989908804, iteration: 182107
loss: 1.0179145336151123,grad_norm: 0.9999994445900714, iteration: 182108
loss: 0.9889881014823914,grad_norm: 0.9999991885751286, iteration: 182109
loss: 0.9487687945365906,grad_norm: 0.9999990979816955, iteration: 182110
loss: 1.0430116653442383,grad_norm: 0.9999991332926012, iteration: 182111
loss: 1.0077043771743774,grad_norm: 0.9999993155927153, iteration: 182112
loss: 1.009128451347351,grad_norm: 0.999999236019491, iteration: 182113
loss: 1.0096522569656372,grad_norm: 0.9999992312447062, iteration: 182114
loss: 1.0291599035263062,grad_norm: 0.9999993492469728, iteration: 182115
loss: 1.053639531135559,grad_norm: 0.9999998760789957, iteration: 182116
loss: 1.0382176637649536,grad_norm: 0.9999992859636836, iteration: 182117
loss: 1.0363943576812744,grad_norm: 0.9999992846753745, iteration: 182118
loss: 1.0039780139923096,grad_norm: 0.9999998489842339, iteration: 182119
loss: 0.9603606462478638,grad_norm: 0.9593026692018678, iteration: 182120
loss: 1.0438154935836792,grad_norm: 0.9999997453665835, iteration: 182121
loss: 1.2252470254898071,grad_norm: 0.9999993674003108, iteration: 182122
loss: 1.0211864709854126,grad_norm: 0.9999993916312008, iteration: 182123
loss: 0.9548420906066895,grad_norm: 0.999999083546721, iteration: 182124
loss: 1.03391432762146,grad_norm: 0.9999990911062775, iteration: 182125
loss: 1.0278161764144897,grad_norm: 0.9999998119247995, iteration: 182126
loss: 1.0995824337005615,grad_norm: 0.9999990678139785, iteration: 182127
loss: 0.9645161628723145,grad_norm: 0.8853113447429762, iteration: 182128
loss: 0.9671536087989807,grad_norm: 0.9999990829661479, iteration: 182129
loss: 0.9712349772453308,grad_norm: 0.9483444983298606, iteration: 182130
loss: 1.0765432119369507,grad_norm: 0.9999997066732595, iteration: 182131
loss: 1.0882822275161743,grad_norm: 0.9999994916026036, iteration: 182132
loss: 1.0127363204956055,grad_norm: 0.8632500518518351, iteration: 182133
loss: 1.1050559282302856,grad_norm: 0.9999995708837166, iteration: 182134
loss: 1.0711541175842285,grad_norm: 0.9999997227090645, iteration: 182135
loss: 1.0294153690338135,grad_norm: 0.9871255688773397, iteration: 182136
loss: 1.0275112390518188,grad_norm: 0.9999991184906366, iteration: 182137
loss: 1.0137943029403687,grad_norm: 0.9999991374270552, iteration: 182138
loss: 0.9919209480285645,grad_norm: 0.924957598214592, iteration: 182139
loss: 1.0970834493637085,grad_norm: 0.9999994121727142, iteration: 182140
loss: 1.0125843286514282,grad_norm: 0.812463688451758, iteration: 182141
loss: 0.9748647809028625,grad_norm: 0.8716745811663946, iteration: 182142
loss: 1.0922740697860718,grad_norm: 0.9999994858991451, iteration: 182143
loss: 1.0295143127441406,grad_norm: 0.9999997160741644, iteration: 182144
loss: 0.9756690859794617,grad_norm: 0.9651399235815377, iteration: 182145
loss: 1.0213388204574585,grad_norm: 0.9999991560333983, iteration: 182146
loss: 0.9926681518554688,grad_norm: 0.9999990419434598, iteration: 182147
loss: 0.9861180782318115,grad_norm: 0.9999996774880687, iteration: 182148
loss: 1.0101691484451294,grad_norm: 0.9999990897210574, iteration: 182149
loss: 0.9633556604385376,grad_norm: 0.8827362435296281, iteration: 182150
loss: 1.007814884185791,grad_norm: 0.9717469662490665, iteration: 182151
loss: 0.9922257661819458,grad_norm: 0.9999993060601109, iteration: 182152
loss: 1.0021142959594727,grad_norm: 0.9783552749718848, iteration: 182153
loss: 1.0025900602340698,grad_norm: 0.8924100352735659, iteration: 182154
loss: 0.9784651398658752,grad_norm: 0.7986407553936148, iteration: 182155
loss: 1.0014891624450684,grad_norm: 0.9999989409241119, iteration: 182156
loss: 0.9710031747817993,grad_norm: 0.9999990384275453, iteration: 182157
loss: 0.9864458441734314,grad_norm: 0.9183849998989183, iteration: 182158
loss: 0.9840784668922424,grad_norm: 0.999999137058173, iteration: 182159
loss: 0.9922122955322266,grad_norm: 0.8878962539756308, iteration: 182160
loss: 1.0556837320327759,grad_norm: 0.9999996411059171, iteration: 182161
loss: 1.0150742530822754,grad_norm: 0.9627194314239342, iteration: 182162
loss: 1.007205605506897,grad_norm: 0.9999991132841892, iteration: 182163
loss: 1.0773274898529053,grad_norm: 0.9999993716154287, iteration: 182164
loss: 0.985630214214325,grad_norm: 0.9999991312569375, iteration: 182165
loss: 0.9836352467536926,grad_norm: 0.863272001551307, iteration: 182166
loss: 0.9783859252929688,grad_norm: 0.9433281587471627, iteration: 182167
loss: 0.9904605746269226,grad_norm: 0.9760391015662067, iteration: 182168
loss: 0.9699918627738953,grad_norm: 0.8565000315727764, iteration: 182169
loss: 1.038420557975769,grad_norm: 0.9999990934516085, iteration: 182170
loss: 1.0030113458633423,grad_norm: 0.999999029308265, iteration: 182171
loss: 0.9858912825584412,grad_norm: 0.9625659808995121, iteration: 182172
loss: 0.9993810057640076,grad_norm: 0.9999993468722957, iteration: 182173
loss: 1.0635292530059814,grad_norm: 0.9270286374072637, iteration: 182174
loss: 0.9955953359603882,grad_norm: 0.8803505694143807, iteration: 182175
loss: 0.9753670692443848,grad_norm: 0.971460487439784, iteration: 182176
loss: 1.0205330848693848,grad_norm: 0.9999991631826644, iteration: 182177
loss: 0.9936370849609375,grad_norm: 0.8096714821434264, iteration: 182178
loss: 1.0041537284851074,grad_norm: 0.8289494425075509, iteration: 182179
loss: 1.1113276481628418,grad_norm: 0.9999990546071972, iteration: 182180
loss: 0.9954113960266113,grad_norm: 0.9870559498470705, iteration: 182181
loss: 1.003868579864502,grad_norm: 0.9999992997257615, iteration: 182182
loss: 0.9951611161231995,grad_norm: 0.8146633461122496, iteration: 182183
loss: 1.042454481124878,grad_norm: 0.9999992614598303, iteration: 182184
loss: 0.989188551902771,grad_norm: 0.8799310414650466, iteration: 182185
loss: 0.9936779737472534,grad_norm: 0.9999991808633956, iteration: 182186
loss: 1.0786223411560059,grad_norm: 0.9999999919183331, iteration: 182187
loss: 1.029943823814392,grad_norm: 0.9999997156538817, iteration: 182188
loss: 1.0136655569076538,grad_norm: 0.9171165674962214, iteration: 182189
loss: 1.052292823791504,grad_norm: 0.999999151010308, iteration: 182190
loss: 1.0371417999267578,grad_norm: 0.999999025702014, iteration: 182191
loss: 1.0431309938430786,grad_norm: 0.9999998161468203, iteration: 182192
loss: 0.9908530712127686,grad_norm: 0.9393334270813847, iteration: 182193
loss: 1.0515828132629395,grad_norm: 0.9999991617688535, iteration: 182194
loss: 1.008056879043579,grad_norm: 0.930010385917518, iteration: 182195
loss: 0.9989089965820312,grad_norm: 0.8768945721360423, iteration: 182196
loss: 0.9793762564659119,grad_norm: 0.9878108565032854, iteration: 182197
loss: 1.0172315835952759,grad_norm: 0.9532088437948589, iteration: 182198
loss: 1.0534217357635498,grad_norm: 0.9999995438038821, iteration: 182199
loss: 0.9935908913612366,grad_norm: 0.8456318378034179, iteration: 182200
loss: 1.023136854171753,grad_norm: 0.9999993362043922, iteration: 182201
loss: 1.0257471799850464,grad_norm: 0.9029064743263857, iteration: 182202
loss: 1.0164614915847778,grad_norm: 0.9999990944166912, iteration: 182203
loss: 1.0198054313659668,grad_norm: 0.9317907709077741, iteration: 182204
loss: 0.9867131114006042,grad_norm: 0.9999992214924922, iteration: 182205
loss: 0.9805613160133362,grad_norm: 0.9999990705082183, iteration: 182206
loss: 0.975444495677948,grad_norm: 0.974564851984931, iteration: 182207
loss: 0.9891435503959656,grad_norm: 0.9999989084470419, iteration: 182208
loss: 0.9721423983573914,grad_norm: 0.9079148502092317, iteration: 182209
loss: 1.0088456869125366,grad_norm: 0.9661599823802272, iteration: 182210
loss: 1.043428897857666,grad_norm: 0.9999993992802056, iteration: 182211
loss: 0.9875052571296692,grad_norm: 0.9999997593769256, iteration: 182212
loss: 0.9929938316345215,grad_norm: 0.9751083376612069, iteration: 182213
loss: 1.0764611959457397,grad_norm: 0.9999997551051968, iteration: 182214
loss: 1.00344979763031,grad_norm: 0.9999990076555961, iteration: 182215
loss: 1.0403321981430054,grad_norm: 0.9999993968037764, iteration: 182216
loss: 0.9391606450080872,grad_norm: 0.9999989850541098, iteration: 182217
loss: 1.0077866315841675,grad_norm: 0.9524016947536976, iteration: 182218
loss: 0.9737963080406189,grad_norm: 0.993346394525708, iteration: 182219
loss: 1.0181840658187866,grad_norm: 0.9999993421031702, iteration: 182220
loss: 1.0012315511703491,grad_norm: 0.9999991198245702, iteration: 182221
loss: 1.0103732347488403,grad_norm: 0.9999993044289442, iteration: 182222
loss: 0.995579719543457,grad_norm: 0.9999989618050454, iteration: 182223
loss: 1.0143882036209106,grad_norm: 0.9806328481274688, iteration: 182224
loss: 0.9743074178695679,grad_norm: 0.9999990604438987, iteration: 182225
loss: 1.0672118663787842,grad_norm: 0.9999991359596143, iteration: 182226
loss: 1.0211689472198486,grad_norm: 0.9999991703126483, iteration: 182227
loss: 1.0125256776809692,grad_norm: 0.9999998251770255, iteration: 182228
loss: 1.1267075538635254,grad_norm: 0.9999998166980865, iteration: 182229
loss: 1.0294909477233887,grad_norm: 0.976942494314547, iteration: 182230
loss: 1.0269253253936768,grad_norm: 0.9999999951898746, iteration: 182231
loss: 1.0227898359298706,grad_norm: 0.9999991997406031, iteration: 182232
loss: 0.9602203369140625,grad_norm: 0.8857274707058983, iteration: 182233
loss: 1.030690312385559,grad_norm: 0.9999991254189582, iteration: 182234
loss: 1.0360703468322754,grad_norm: 0.8944241666837881, iteration: 182235
loss: 1.1038899421691895,grad_norm: 0.9999997459243222, iteration: 182236
loss: 1.0935888290405273,grad_norm: 0.999999803784695, iteration: 182237
loss: 1.1372121572494507,grad_norm: 0.999999573557755, iteration: 182238
loss: 1.0028998851776123,grad_norm: 0.999999008942309, iteration: 182239
loss: 0.968556821346283,grad_norm: 0.9999993141508648, iteration: 182240
loss: 0.9715003371238708,grad_norm: 0.942218397589379, iteration: 182241
loss: 1.1208323240280151,grad_norm: 0.9999990246994718, iteration: 182242
loss: 1.0046125650405884,grad_norm: 0.9309067138104564, iteration: 182243
loss: 0.980856716632843,grad_norm: 0.9340607396878623, iteration: 182244
loss: 1.0103131532669067,grad_norm: 0.9999991006118483, iteration: 182245
loss: 1.0332742929458618,grad_norm: 0.9972994326977656, iteration: 182246
loss: 1.05037260055542,grad_norm: 0.9999992052952938, iteration: 182247
loss: 1.003644347190857,grad_norm: 0.8633144207284715, iteration: 182248
loss: 0.9905065894126892,grad_norm: 0.9963544559005044, iteration: 182249
loss: 1.0430737733840942,grad_norm: 0.9999992504547964, iteration: 182250
loss: 1.0060261487960815,grad_norm: 0.959105078542928, iteration: 182251
loss: 0.9716874957084656,grad_norm: 0.9999991330328702, iteration: 182252
loss: 1.0265982151031494,grad_norm: 0.8921723066233542, iteration: 182253
loss: 0.9871764183044434,grad_norm: 0.999018837901426, iteration: 182254
loss: 1.014747142791748,grad_norm: 0.9999994897245109, iteration: 182255
loss: 1.0356009006500244,grad_norm: 0.97343095602922, iteration: 182256
loss: 0.9538718461990356,grad_norm: 0.9999990842127873, iteration: 182257
loss: 0.965739369392395,grad_norm: 0.9999991748808471, iteration: 182258
loss: 1.0127755403518677,grad_norm: 0.9999990958393957, iteration: 182259
loss: 0.9860178828239441,grad_norm: 0.9754167517229574, iteration: 182260
loss: 1.0051708221435547,grad_norm: 0.9999993384279052, iteration: 182261
loss: 1.039680004119873,grad_norm: 0.9999992038880848, iteration: 182262
loss: 1.0312647819519043,grad_norm: 0.9448434794964092, iteration: 182263
loss: 1.0378867387771606,grad_norm: 0.9936185062279449, iteration: 182264
loss: 0.9915914535522461,grad_norm: 0.981961741854977, iteration: 182265
loss: 0.999750018119812,grad_norm: 0.9999991176610186, iteration: 182266
loss: 0.9945496916770935,grad_norm: 0.9999991160343807, iteration: 182267
loss: 1.002739667892456,grad_norm: 0.9999991280725867, iteration: 182268
loss: 0.9851357936859131,grad_norm: 0.9038973392436828, iteration: 182269
loss: 0.9986696243286133,grad_norm: 0.9892282848116738, iteration: 182270
loss: 1.0335192680358887,grad_norm: 0.8908481819792643, iteration: 182271
loss: 0.9629557132720947,grad_norm: 0.999999194844463, iteration: 182272
loss: 1.2025445699691772,grad_norm: 0.9999999934126989, iteration: 182273
loss: 1.0263698101043701,grad_norm: 0.9799374539850836, iteration: 182274
loss: 1.0102137327194214,grad_norm: 0.9999995907117175, iteration: 182275
loss: 1.0092796087265015,grad_norm: 0.9999992004019808, iteration: 182276
loss: 1.0070804357528687,grad_norm: 0.9999992742672408, iteration: 182277
loss: 0.9995960593223572,grad_norm: 0.9999990799579934, iteration: 182278
loss: 1.043480634689331,grad_norm: 0.9999990015591976, iteration: 182279
loss: 1.013403058052063,grad_norm: 0.9999990072279024, iteration: 182280
loss: 1.0116238594055176,grad_norm: 0.9999990962756439, iteration: 182281
loss: 0.9999611377716064,grad_norm: 0.8740165880740473, iteration: 182282
loss: 1.014735221862793,grad_norm: 0.9999991710467871, iteration: 182283
loss: 1.0012000799179077,grad_norm: 0.9999991164685608, iteration: 182284
loss: 1.0056713819503784,grad_norm: 0.9608849818547477, iteration: 182285
loss: 1.0346637964248657,grad_norm: 0.9193849780480094, iteration: 182286
loss: 0.9943578839302063,grad_norm: 0.9999991512881201, iteration: 182287
loss: 1.0752631425857544,grad_norm: 0.9999994243133162, iteration: 182288
loss: 1.0276992321014404,grad_norm: 0.9247719858828919, iteration: 182289
loss: 0.9962445497512817,grad_norm: 0.8170287050922518, iteration: 182290
loss: 1.0410630702972412,grad_norm: 0.9999990956791291, iteration: 182291
loss: 1.0044281482696533,grad_norm: 0.9563936206406302, iteration: 182292
loss: 1.108563780784607,grad_norm: 0.9999992807638319, iteration: 182293
loss: 0.9847705364227295,grad_norm: 0.9441934054776235, iteration: 182294
loss: 1.0681731700897217,grad_norm: 0.8610497967259312, iteration: 182295
loss: 0.9968931674957275,grad_norm: 0.9160338738111595, iteration: 182296
loss: 1.021189570426941,grad_norm: 0.99999918027031, iteration: 182297
loss: 1.0080971717834473,grad_norm: 0.9999991775148404, iteration: 182298
loss: 0.95745849609375,grad_norm: 0.9082193308816702, iteration: 182299
loss: 1.0066899061203003,grad_norm: 0.9999990824147668, iteration: 182300
loss: 0.991755485534668,grad_norm: 0.9383293018960209, iteration: 182301
loss: 0.9926599860191345,grad_norm: 0.927784550282395, iteration: 182302
loss: 0.9774916172027588,grad_norm: 0.9999991438647169, iteration: 182303
loss: 1.025259256362915,grad_norm: 0.9999991674334268, iteration: 182304
loss: 1.0719801187515259,grad_norm: 0.9999990197742568, iteration: 182305
loss: 0.984067440032959,grad_norm: 0.9905564551940912, iteration: 182306
loss: 1.0186383724212646,grad_norm: 0.926021682725517, iteration: 182307
loss: 1.0492854118347168,grad_norm: 0.9999992561846675, iteration: 182308
loss: 1.0100674629211426,grad_norm: 0.8735927621744723, iteration: 182309
loss: 1.0584684610366821,grad_norm: 0.9406883141622567, iteration: 182310
loss: 1.059569239616394,grad_norm: 0.9999999481053631, iteration: 182311
loss: 0.9808403253555298,grad_norm: 0.9659052082135546, iteration: 182312
loss: 0.9982261061668396,grad_norm: 0.9999989913101516, iteration: 182313
loss: 1.0066897869110107,grad_norm: 0.9999990957418489, iteration: 182314
loss: 0.9943049550056458,grad_norm: 0.9999993020012814, iteration: 182315
loss: 0.9831844568252563,grad_norm: 0.9208376657520864, iteration: 182316
loss: 0.9590499997138977,grad_norm: 0.9307172057054829, iteration: 182317
loss: 1.0730899572372437,grad_norm: 0.9999997150517463, iteration: 182318
loss: 0.9650734663009644,grad_norm: 0.9999992614575782, iteration: 182319
loss: 0.9936459064483643,grad_norm: 0.9999990028716276, iteration: 182320
loss: 0.9940007328987122,grad_norm: 0.9999990648003709, iteration: 182321
loss: 0.993205189704895,grad_norm: 0.9039544779817394, iteration: 182322
loss: 0.9916583299636841,grad_norm: 0.9391072407919916, iteration: 182323
loss: 1.0371354818344116,grad_norm: 0.9999989438564836, iteration: 182324
loss: 1.0293861627578735,grad_norm: 0.9999999247465629, iteration: 182325
loss: 1.022121787071228,grad_norm: 0.9973248198569943, iteration: 182326
loss: 0.9973190426826477,grad_norm: 0.851017904977253, iteration: 182327
loss: 0.9963017106056213,grad_norm: 0.934684280024193, iteration: 182328
loss: 1.0033890008926392,grad_norm: 0.901055045143204, iteration: 182329
loss: 1.0304104089736938,grad_norm: 0.8407473492496318, iteration: 182330
loss: 1.185337781906128,grad_norm: 0.999999268080013, iteration: 182331
loss: 0.994476318359375,grad_norm: 0.971828975774553, iteration: 182332
loss: 0.9928540587425232,grad_norm: 0.9214660475237348, iteration: 182333
loss: 0.9979967474937439,grad_norm: 0.9999994114667695, iteration: 182334
loss: 1.0068604946136475,grad_norm: 0.9158696077851671, iteration: 182335
loss: 1.0089666843414307,grad_norm: 0.9485737108316672, iteration: 182336
loss: 0.976651132106781,grad_norm: 0.8629108614091303, iteration: 182337
loss: 1.022875428199768,grad_norm: 0.9999991456194827, iteration: 182338
loss: 0.9887141585350037,grad_norm: 0.9637838107918145, iteration: 182339
loss: 1.1158989667892456,grad_norm: 0.9999993281310549, iteration: 182340
loss: 0.9686634540557861,grad_norm: 0.9999990638537946, iteration: 182341
loss: 1.0301687717437744,grad_norm: 0.9854366024898534, iteration: 182342
loss: 1.0090818405151367,grad_norm: 0.9999994485767966, iteration: 182343
loss: 1.1024830341339111,grad_norm: 0.9999992548775215, iteration: 182344
loss: 1.015898585319519,grad_norm: 0.9999990469340301, iteration: 182345
loss: 1.0028207302093506,grad_norm: 0.9999992945150523, iteration: 182346
loss: 1.0606021881103516,grad_norm: 0.9456773588462207, iteration: 182347
loss: 0.9818940758705139,grad_norm: 0.9902435434532995, iteration: 182348
loss: 1.0260441303253174,grad_norm: 0.9207359064279639, iteration: 182349
loss: 1.0019294023513794,grad_norm: 0.9999990561962365, iteration: 182350
loss: 1.02090322971344,grad_norm: 0.9999990173441009, iteration: 182351
loss: 1.0009944438934326,grad_norm: 0.8429270661699942, iteration: 182352
loss: 1.0231698751449585,grad_norm: 0.8728844053667407, iteration: 182353
loss: 0.9819826483726501,grad_norm: 0.9999990635233272, iteration: 182354
loss: 1.0156190395355225,grad_norm: 0.9999990818312132, iteration: 182355
loss: 0.9740567207336426,grad_norm: 0.7852343654286326, iteration: 182356
loss: 0.9984875321388245,grad_norm: 0.9999992176845091, iteration: 182357
loss: 0.9934908151626587,grad_norm: 0.9514891433354461, iteration: 182358
loss: 1.0183809995651245,grad_norm: 0.8783282700778015, iteration: 182359
loss: 0.9978824257850647,grad_norm: 0.7607819067378947, iteration: 182360
loss: 1.0367333889007568,grad_norm: 0.7820582067333937, iteration: 182361
loss: 1.0134199857711792,grad_norm: 0.9999989196563063, iteration: 182362
loss: 1.0027481317520142,grad_norm: 0.9349073877538318, iteration: 182363
loss: 1.0037710666656494,grad_norm: 0.9999992757760167, iteration: 182364
loss: 1.0132030248641968,grad_norm: 0.9915768638111354, iteration: 182365
loss: 1.0036523342132568,grad_norm: 0.9999990551044721, iteration: 182366
loss: 1.0190033912658691,grad_norm: 0.999999149077812, iteration: 182367
loss: 1.263484001159668,grad_norm: 0.9999995055786589, iteration: 182368
loss: 1.0066882371902466,grad_norm: 0.9999990385133415, iteration: 182369
loss: 1.0355325937271118,grad_norm: 0.9999996725375074, iteration: 182370
loss: 0.9954720735549927,grad_norm: 0.9999990935369942, iteration: 182371
loss: 1.0267592668533325,grad_norm: 0.9999994538348642, iteration: 182372
loss: 1.0350439548492432,grad_norm: 0.9999995895436451, iteration: 182373
loss: 1.0097442865371704,grad_norm: 0.9999992567743576, iteration: 182374
loss: 1.0138838291168213,grad_norm: 0.9999991548938626, iteration: 182375
loss: 1.034960150718689,grad_norm: 0.9999991885090532, iteration: 182376
loss: 1.0228538513183594,grad_norm: 0.9999992142558609, iteration: 182377
loss: 1.0007871389389038,grad_norm: 0.9999991379902579, iteration: 182378
loss: 1.0112872123718262,grad_norm: 0.9999990464364188, iteration: 182379
loss: 1.0261164903640747,grad_norm: 0.9906413098647646, iteration: 182380
loss: 0.9460285902023315,grad_norm: 0.8042601968064896, iteration: 182381
loss: 1.0387526750564575,grad_norm: 0.924616317818445, iteration: 182382
loss: 0.99717777967453,grad_norm: 0.9999992439837581, iteration: 182383
loss: 1.0242618322372437,grad_norm: 0.8992594986865752, iteration: 182384
loss: 1.0983681678771973,grad_norm: 0.9999995009978058, iteration: 182385
loss: 1.0334542989730835,grad_norm: 0.8293184709632551, iteration: 182386
loss: 1.0315247774124146,grad_norm: 0.9999994784294071, iteration: 182387
loss: 0.9854959845542908,grad_norm: 0.8628869156908396, iteration: 182388
loss: 0.9806303381919861,grad_norm: 0.9999993687655873, iteration: 182389
loss: 0.9962949156761169,grad_norm: 0.9999992128430726, iteration: 182390
loss: 1.0245345830917358,grad_norm: 0.9999989194711456, iteration: 182391
loss: 0.9804136157035828,grad_norm: 0.9999991998762968, iteration: 182392
loss: 1.0538181066513062,grad_norm: 0.9224816701446452, iteration: 182393
loss: 1.0527043342590332,grad_norm: 0.8882478270150201, iteration: 182394
loss: 0.9907237887382507,grad_norm: 0.9999990376712568, iteration: 182395
loss: 0.9914993047714233,grad_norm: 0.9999990716685542, iteration: 182396
loss: 1.0099480152130127,grad_norm: 0.9934707880776518, iteration: 182397
loss: 1.0702900886535645,grad_norm: 0.9999991436618957, iteration: 182398
loss: 0.9984280467033386,grad_norm: 0.9669079816849684, iteration: 182399
loss: 0.9614841341972351,grad_norm: 0.9447868832783047, iteration: 182400
loss: 1.037960410118103,grad_norm: 0.9245887148260132, iteration: 182401
loss: 0.9962344765663147,grad_norm: 0.9999991370665502, iteration: 182402
loss: 0.9797770977020264,grad_norm: 0.9820550557448144, iteration: 182403
loss: 0.9736568331718445,grad_norm: 0.9952055704359892, iteration: 182404
loss: 0.9864323735237122,grad_norm: 0.9999991928807977, iteration: 182405
loss: 1.0407133102416992,grad_norm: 0.9999991033357416, iteration: 182406
loss: 0.9912354946136475,grad_norm: 0.8442017512360108, iteration: 182407
loss: 0.9722703695297241,grad_norm: 0.9999990727207383, iteration: 182408
loss: 0.9853156805038452,grad_norm: 0.9381176635516485, iteration: 182409
loss: 1.0229785442352295,grad_norm: 0.9999990033355888, iteration: 182410
loss: 1.0054372549057007,grad_norm: 0.8974506407747892, iteration: 182411
loss: 0.9949201345443726,grad_norm: 0.9320726664335853, iteration: 182412
loss: 1.0297735929489136,grad_norm: 0.9561549960877908, iteration: 182413
loss: 0.9580385088920593,grad_norm: 0.9999990477516871, iteration: 182414
loss: 0.9883802533149719,grad_norm: 0.9267401466533963, iteration: 182415
loss: 1.0103379487991333,grad_norm: 0.9942730725476002, iteration: 182416
loss: 0.9723706245422363,grad_norm: 0.9855977286038107, iteration: 182417
loss: 1.0098471641540527,grad_norm: 0.9999999306075066, iteration: 182418
loss: 1.1053367853164673,grad_norm: 0.9999991883140789, iteration: 182419
loss: 0.988268256187439,grad_norm: 0.9935785472973597, iteration: 182420
loss: 0.9922204613685608,grad_norm: 0.8898751715977606, iteration: 182421
loss: 0.9889360666275024,grad_norm: 0.8885191783601022, iteration: 182422
loss: 1.0680789947509766,grad_norm: 0.9104133066398975, iteration: 182423
loss: 0.9477535486221313,grad_norm: 0.9999991501558945, iteration: 182424
loss: 0.9814479351043701,grad_norm: 0.9999994697521355, iteration: 182425
loss: 1.0245832204818726,grad_norm: 0.9999990369088878, iteration: 182426
loss: 0.9675063490867615,grad_norm: 0.8385163775379383, iteration: 182427
loss: 0.9972637295722961,grad_norm: 0.889582502279444, iteration: 182428
loss: 0.9936308264732361,grad_norm: 0.9999990533731266, iteration: 182429
loss: 0.987271785736084,grad_norm: 0.9999990686108347, iteration: 182430
loss: 0.9914864897727966,grad_norm: 0.9016622506805303, iteration: 182431
loss: 1.051812767982483,grad_norm: 0.9763348852225825, iteration: 182432
loss: 1.0658087730407715,grad_norm: 0.9436533599427518, iteration: 182433
loss: 1.0014430284500122,grad_norm: 0.9365002235173823, iteration: 182434
loss: 1.0206103324890137,grad_norm: 0.9705246901399547, iteration: 182435
loss: 1.048577070236206,grad_norm: 0.9199523707087981, iteration: 182436
loss: 1.0221478939056396,grad_norm: 0.9496368089413308, iteration: 182437
loss: 0.9959511160850525,grad_norm: 0.9999991906463868, iteration: 182438
loss: 1.0188627243041992,grad_norm: 0.9999994919880799, iteration: 182439
loss: 1.0348656177520752,grad_norm: 0.9999991635032207, iteration: 182440
loss: 1.13434898853302,grad_norm: 0.9999992394197421, iteration: 182441
loss: 1.051673412322998,grad_norm: 0.9999991076930784, iteration: 182442
loss: 0.9897794127464294,grad_norm: 0.7828553548868142, iteration: 182443
loss: 1.023229956626892,grad_norm: 0.796518768497548, iteration: 182444
loss: 1.0218504667282104,grad_norm: 0.9999992042594502, iteration: 182445
loss: 0.9999112486839294,grad_norm: 0.9999990465642657, iteration: 182446
loss: 1.0205302238464355,grad_norm: 0.9999991148421874, iteration: 182447
loss: 1.068255066871643,grad_norm: 0.9999990972271305, iteration: 182448
loss: 1.021375060081482,grad_norm: 0.9999991145303359, iteration: 182449
loss: 0.9990397095680237,grad_norm: 0.9507561436458409, iteration: 182450
loss: 0.971523106098175,grad_norm: 0.9999990500100536, iteration: 182451
loss: 1.0152919292449951,grad_norm: 0.9609836601090558, iteration: 182452
loss: 0.993134081363678,grad_norm: 0.9999996659352848, iteration: 182453
loss: 1.1163231134414673,grad_norm: 0.9999996852636539, iteration: 182454
loss: 1.027492880821228,grad_norm: 0.9999990631287407, iteration: 182455
loss: 0.9932583570480347,grad_norm: 0.9999990155378014, iteration: 182456
loss: 1.0254695415496826,grad_norm: 0.9999994127921902, iteration: 182457
loss: 1.0128569602966309,grad_norm: 0.7311580597874098, iteration: 182458
loss: 0.9932013154029846,grad_norm: 0.9999991257183393, iteration: 182459
loss: 1.0726526975631714,grad_norm: 0.9622898724084075, iteration: 182460
loss: 0.9861579537391663,grad_norm: 0.9491661356798718, iteration: 182461
loss: 0.9918938875198364,grad_norm: 0.9516719762290021, iteration: 182462
loss: 0.987395703792572,grad_norm: 0.8944942156988213, iteration: 182463
loss: 1.11905837059021,grad_norm: 0.9999996345607199, iteration: 182464
loss: 0.961209774017334,grad_norm: 0.9791102575696636, iteration: 182465
loss: 1.0031267404556274,grad_norm: 0.9999991682965014, iteration: 182466
loss: 1.011870265007019,grad_norm: 0.999999237923542, iteration: 182467
loss: 1.036895751953125,grad_norm: 0.9999998570751462, iteration: 182468
loss: 0.9775218367576599,grad_norm: 0.9999992490232391, iteration: 182469
loss: 1.0100921392440796,grad_norm: 0.9737775829252899, iteration: 182470
loss: 1.0158931016921997,grad_norm: 0.9660430481178649, iteration: 182471
loss: 0.9817754626274109,grad_norm: 0.999998984605514, iteration: 182472
loss: 1.0458685159683228,grad_norm: 0.9999993327256529, iteration: 182473
loss: 0.9772545695304871,grad_norm: 0.9630976251883175, iteration: 182474
loss: 1.0016014575958252,grad_norm: 0.983395227039066, iteration: 182475
loss: 1.0080384016036987,grad_norm: 0.9999990912665166, iteration: 182476
loss: 0.9879945516586304,grad_norm: 0.9729302230446558, iteration: 182477
loss: 1.1134124994277954,grad_norm: 0.9999996948913176, iteration: 182478
loss: 1.005222201347351,grad_norm: 0.9999993939362307, iteration: 182479
loss: 0.967425525188446,grad_norm: 0.986651464117353, iteration: 182480
loss: 1.0079689025878906,grad_norm: 0.9999989426111021, iteration: 182481
loss: 0.9786309003829956,grad_norm: 0.9999990875771115, iteration: 182482
loss: 0.980330228805542,grad_norm: 0.9999990132661216, iteration: 182483
loss: 0.9949989914894104,grad_norm: 0.9015207374642618, iteration: 182484
loss: 1.0538192987442017,grad_norm: 0.9019656959645325, iteration: 182485
loss: 0.9977466464042664,grad_norm: 0.9181104081055244, iteration: 182486
loss: 1.0107587575912476,grad_norm: 0.9353979092229003, iteration: 182487
loss: 1.0677378177642822,grad_norm: 0.9448381782527614, iteration: 182488
loss: 1.020207405090332,grad_norm: 0.9669179872633297, iteration: 182489
loss: 0.9640207886695862,grad_norm: 0.9595612232111072, iteration: 182490
loss: 0.9962630271911621,grad_norm: 0.9999992411846768, iteration: 182491
loss: 1.0162041187286377,grad_norm: 0.9999991112939711, iteration: 182492
loss: 0.9648330211639404,grad_norm: 0.9897631383251131, iteration: 182493
loss: 1.0386875867843628,grad_norm: 0.9999990867368775, iteration: 182494
loss: 0.9565601348876953,grad_norm: 0.8905298639057304, iteration: 182495
loss: 1.0297141075134277,grad_norm: 0.9351378853846032, iteration: 182496
loss: 0.949843168258667,grad_norm: 0.9458767876353947, iteration: 182497
loss: 0.9597641825675964,grad_norm: 0.9999991864448234, iteration: 182498
loss: 0.9809821248054504,grad_norm: 0.832127529887191, iteration: 182499
loss: 1.0214154720306396,grad_norm: 0.9848333004852902, iteration: 182500
loss: 1.0154998302459717,grad_norm: 0.9999995762125815, iteration: 182501
loss: 0.990578293800354,grad_norm: 0.9999989264622315, iteration: 182502
loss: 0.9948336482048035,grad_norm: 0.999999117909723, iteration: 182503
loss: 0.9731928110122681,grad_norm: 0.8570923326510169, iteration: 182504
loss: 0.9881582260131836,grad_norm: 0.9999990620667176, iteration: 182505
loss: 1.0511062145233154,grad_norm: 0.9350511909550743, iteration: 182506
loss: 1.0647622346878052,grad_norm: 0.9430059121711726, iteration: 182507
loss: 0.9780476093292236,grad_norm: 0.999999221272815, iteration: 182508
loss: 1.0290532112121582,grad_norm: 0.9999995339127511, iteration: 182509
loss: 1.0079823732376099,grad_norm: 0.9850998330070331, iteration: 182510
loss: 1.0183759927749634,grad_norm: 0.8608840506555577, iteration: 182511
loss: 0.9985813498497009,grad_norm: 0.9999992793282336, iteration: 182512
loss: 1.0062861442565918,grad_norm: 0.9400176301921843, iteration: 182513
loss: 1.038511037826538,grad_norm: 0.9436774770337449, iteration: 182514
loss: 1.0686681270599365,grad_norm: 0.9999990949421551, iteration: 182515
loss: 1.0287201404571533,grad_norm: 0.9253720982515313, iteration: 182516
loss: 1.0019758939743042,grad_norm: 0.9999992445011591, iteration: 182517
loss: 1.0102399587631226,grad_norm: 0.9999991017784958, iteration: 182518
loss: 1.0100576877593994,grad_norm: 0.9758680462496698, iteration: 182519
loss: 1.0287553071975708,grad_norm: 0.9999991521705586, iteration: 182520
loss: 1.090476155281067,grad_norm: 0.9999990737604332, iteration: 182521
loss: 1.020837426185608,grad_norm: 0.9374580688305434, iteration: 182522
loss: 1.0567150115966797,grad_norm: 0.9999992075578888, iteration: 182523
loss: 1.0119258165359497,grad_norm: 0.862806578502753, iteration: 182524
loss: 1.1279584169387817,grad_norm: 0.9999993107551234, iteration: 182525
loss: 1.0287799835205078,grad_norm: 0.9999992489171434, iteration: 182526
loss: 1.0025320053100586,grad_norm: 0.9999989741073375, iteration: 182527
loss: 1.0236455202102661,grad_norm: 0.9999990844276648, iteration: 182528
loss: 0.9980117082595825,grad_norm: 0.9471048941399101, iteration: 182529
loss: 1.0115851163864136,grad_norm: 0.9999991104120665, iteration: 182530
loss: 1.0722968578338623,grad_norm: 0.9999995482935011, iteration: 182531
loss: 1.065900206565857,grad_norm: 0.9341710487502444, iteration: 182532
loss: 0.9732799530029297,grad_norm: 0.8818436026381998, iteration: 182533
loss: 1.0101922750473022,grad_norm: 0.9223306678290182, iteration: 182534
loss: 1.079445481300354,grad_norm: 0.9999992983247968, iteration: 182535
loss: 1.0314677953720093,grad_norm: 0.9999995941109301, iteration: 182536
loss: 1.0066630840301514,grad_norm: 0.9999991373343811, iteration: 182537
loss: 1.0198991298675537,grad_norm: 0.9999990498492642, iteration: 182538
loss: 0.9857597947120667,grad_norm: 0.9255120081893188, iteration: 182539
loss: 0.9930219054222107,grad_norm: 0.9013101426855654, iteration: 182540
loss: 0.9736146330833435,grad_norm: 0.9999990463266228, iteration: 182541
loss: 0.9781025052070618,grad_norm: 0.9260257105356398, iteration: 182542
loss: 1.0045323371887207,grad_norm: 0.948520392562067, iteration: 182543
loss: 0.9735367894172668,grad_norm: 0.856280480556075, iteration: 182544
loss: 1.0524723529815674,grad_norm: 0.9724269101136005, iteration: 182545
loss: 1.0687141418457031,grad_norm: 0.9999992957225515, iteration: 182546
loss: 0.9705860018730164,grad_norm: 0.8692693363932475, iteration: 182547
loss: 1.041258692741394,grad_norm: 0.9999991525568243, iteration: 182548
loss: 0.9832620024681091,grad_norm: 0.9999991504995701, iteration: 182549
loss: 1.0851991176605225,grad_norm: 0.9999990861393275, iteration: 182550
loss: 1.006690263748169,grad_norm: 0.9681724954076955, iteration: 182551
loss: 0.9827902913093567,grad_norm: 0.9999992217322882, iteration: 182552
loss: 1.008599042892456,grad_norm: 0.9852418382144136, iteration: 182553
loss: 0.9654029011726379,grad_norm: 0.9999990228390344, iteration: 182554
loss: 1.0693527460098267,grad_norm: 0.918376836533656, iteration: 182555
loss: 0.9894900321960449,grad_norm: 0.9346037017696386, iteration: 182556
loss: 0.9815311431884766,grad_norm: 0.8771197581218279, iteration: 182557
loss: 0.9959387183189392,grad_norm: 0.9999992621088981, iteration: 182558
loss: 1.0031317472457886,grad_norm: 0.9999989758611071, iteration: 182559
loss: 0.9837560057640076,grad_norm: 0.9999998947678977, iteration: 182560
loss: 1.0359472036361694,grad_norm: 0.9907599989882334, iteration: 182561
loss: 1.0407721996307373,grad_norm: 0.9360524572312776, iteration: 182562
loss: 1.0160270929336548,grad_norm: 0.8991264378658447, iteration: 182563
loss: 0.9841662049293518,grad_norm: 0.9999990487223637, iteration: 182564
loss: 1.000217318534851,grad_norm: 0.8233832767772166, iteration: 182565
loss: 0.9859973192214966,grad_norm: 0.9999997338216531, iteration: 182566
loss: 1.0327647924423218,grad_norm: 0.9768839780844736, iteration: 182567
loss: 1.0023654699325562,grad_norm: 0.9999993484545053, iteration: 182568
loss: 0.9892581105232239,grad_norm: 0.8664287375585817, iteration: 182569
loss: 0.9633539915084839,grad_norm: 0.8515049070388959, iteration: 182570
loss: 0.9691326022148132,grad_norm: 0.9632727775239566, iteration: 182571
loss: 0.9707532525062561,grad_norm: 0.879354491003444, iteration: 182572
loss: 1.026455283164978,grad_norm: 0.9722768731780217, iteration: 182573
loss: 0.9856016635894775,grad_norm: 0.9632751078444498, iteration: 182574
loss: 0.9887313842773438,grad_norm: 0.9999990778078925, iteration: 182575
loss: 0.9937596321105957,grad_norm: 0.9317851118914102, iteration: 182576
loss: 0.9642942547798157,grad_norm: 0.9999991549886852, iteration: 182577
loss: 0.9516732096672058,grad_norm: 0.9554393083228079, iteration: 182578
loss: 0.96160888671875,grad_norm: 0.8997626506406332, iteration: 182579
loss: 1.005061388015747,grad_norm: 0.8506194037564635, iteration: 182580
loss: 0.9564892053604126,grad_norm: 0.9999992696593232, iteration: 182581
loss: 1.0121023654937744,grad_norm: 0.9112219053491761, iteration: 182582
loss: 1.0415247678756714,grad_norm: 0.9999991606263668, iteration: 182583
loss: 1.020869255065918,grad_norm: 0.9187634091041909, iteration: 182584
loss: 1.0020066499710083,grad_norm: 0.9999992633006213, iteration: 182585
loss: 1.0174371004104614,grad_norm: 0.9289387178212453, iteration: 182586
loss: 0.9910055994987488,grad_norm: 0.9999990204026536, iteration: 182587
loss: 1.0037131309509277,grad_norm: 0.9999992076358064, iteration: 182588
loss: 0.9964209198951721,grad_norm: 0.7841775737944794, iteration: 182589
loss: 1.004487156867981,grad_norm: 0.9619894722887954, iteration: 182590
loss: 0.9585784077644348,grad_norm: 0.7926368317054885, iteration: 182591
loss: 1.0151735544204712,grad_norm: 0.9999989513787034, iteration: 182592
loss: 1.0145595073699951,grad_norm: 0.9839661488669652, iteration: 182593
loss: 0.9495822191238403,grad_norm: 0.8316787013740652, iteration: 182594
loss: 0.9886976480484009,grad_norm: 0.9601088317693253, iteration: 182595
loss: 1.0239750146865845,grad_norm: 0.8125468115955125, iteration: 182596
loss: 0.9877737164497375,grad_norm: 0.8989848425751203, iteration: 182597
loss: 1.0278310775756836,grad_norm: 0.9999989995439689, iteration: 182598
loss: 1.0223788022994995,grad_norm: 0.9999989937018366, iteration: 182599
loss: 1.0005221366882324,grad_norm: 0.9999991609974139, iteration: 182600
loss: 0.9929068088531494,grad_norm: 0.8630938513604212, iteration: 182601
loss: 1.0173070430755615,grad_norm: 0.9382173006255358, iteration: 182602
loss: 1.0084558725357056,grad_norm: 0.9875573080066332, iteration: 182603
loss: 1.0228679180145264,grad_norm: 0.9999993319311761, iteration: 182604
loss: 1.0396355390548706,grad_norm: 0.9839090115504942, iteration: 182605
loss: 1.0696848630905151,grad_norm: 0.9999993751824154, iteration: 182606
loss: 0.9878277778625488,grad_norm: 0.9862493088967152, iteration: 182607
loss: 1.0079652070999146,grad_norm: 0.87747941361347, iteration: 182608
loss: 1.0469597578048706,grad_norm: 0.999999142368045, iteration: 182609
loss: 0.965314507484436,grad_norm: 0.7436824425378132, iteration: 182610
loss: 0.974598228931427,grad_norm: 0.8512042048917938, iteration: 182611
loss: 1.0179692506790161,grad_norm: 0.9999990977279269, iteration: 182612
loss: 1.011320948600769,grad_norm: 0.9999991267694901, iteration: 182613
loss: 0.9998977780342102,grad_norm: 0.9999990710780101, iteration: 182614
loss: 0.9977491497993469,grad_norm: 0.9999990489629474, iteration: 182615
loss: 0.9785447716712952,grad_norm: 0.9999991300384861, iteration: 182616
loss: 1.0069830417633057,grad_norm: 0.9999992537506829, iteration: 182617
loss: 1.0104551315307617,grad_norm: 0.936954812230903, iteration: 182618
loss: 1.0169131755828857,grad_norm: 0.9999989908113723, iteration: 182619
loss: 1.019808292388916,grad_norm: 0.7917652273436311, iteration: 182620
loss: 1.018377423286438,grad_norm: 0.9126883898626381, iteration: 182621
loss: 1.140712857246399,grad_norm: 0.8564891917275582, iteration: 182622
loss: 0.9619166254997253,grad_norm: 0.9999990273765846, iteration: 182623
loss: 0.9745175838470459,grad_norm: 0.9552988348231121, iteration: 182624
loss: 0.9718937873840332,grad_norm: 0.9723224071317065, iteration: 182625
loss: 1.0259898900985718,grad_norm: 0.9999991833854696, iteration: 182626
loss: 0.9751537442207336,grad_norm: 0.9748095341352477, iteration: 182627
loss: 1.033929705619812,grad_norm: 0.9999991556093639, iteration: 182628
loss: 0.9955844879150391,grad_norm: 0.9553174058791124, iteration: 182629
loss: 0.9946393966674805,grad_norm: 0.9999990705937583, iteration: 182630
loss: 0.9859070181846619,grad_norm: 0.999999120468433, iteration: 182631
loss: 1.07460618019104,grad_norm: 0.948158349659267, iteration: 182632
loss: 1.0365774631500244,grad_norm: 0.9944230549314556, iteration: 182633
loss: 0.9879893064498901,grad_norm: 0.999999119467268, iteration: 182634
loss: 0.9759551882743835,grad_norm: 0.9999990707775692, iteration: 182635
loss: 0.9790741205215454,grad_norm: 0.9737881169999996, iteration: 182636
loss: 1.0509788990020752,grad_norm: 0.9999991141575809, iteration: 182637
loss: 1.0266923904418945,grad_norm: 0.9451905333123826, iteration: 182638
loss: 1.0181632041931152,grad_norm: 0.9999989841397864, iteration: 182639
loss: 1.0292338132858276,grad_norm: 0.9999991578548455, iteration: 182640
loss: 1.0117161273956299,grad_norm: 0.9378741588710703, iteration: 182641
loss: 1.0089186429977417,grad_norm: 0.9197272211232533, iteration: 182642
loss: 1.000539779663086,grad_norm: 0.9999990379664816, iteration: 182643
loss: 1.0447502136230469,grad_norm: 0.9999992462970868, iteration: 182644
loss: 1.0098415613174438,grad_norm: 0.86521004419833, iteration: 182645
loss: 0.9981552958488464,grad_norm: 0.8084555752558149, iteration: 182646
loss: 0.9921488761901855,grad_norm: 0.9951188865706542, iteration: 182647
loss: 1.1331239938735962,grad_norm: 0.9999990977563799, iteration: 182648
loss: 1.0071120262145996,grad_norm: 0.9748434979254759, iteration: 182649
loss: 1.0027985572814941,grad_norm: 0.9999990960091314, iteration: 182650
loss: 0.9898046255111694,grad_norm: 0.9474771898128758, iteration: 182651
loss: 0.9717679619789124,grad_norm: 0.9999991100440934, iteration: 182652
loss: 1.0463305711746216,grad_norm: 0.9999994629018257, iteration: 182653
loss: 0.9959505796432495,grad_norm: 0.9120021255227206, iteration: 182654
loss: 0.9786180853843689,grad_norm: 0.899295563644686, iteration: 182655
loss: 1.0049993991851807,grad_norm: 0.9961073484346633, iteration: 182656
loss: 1.0399633646011353,grad_norm: 0.9812546883843174, iteration: 182657
loss: 1.0140759944915771,grad_norm: 0.999999406008303, iteration: 182658
loss: 1.019098162651062,grad_norm: 0.9414095039708469, iteration: 182659
loss: 0.9784279465675354,grad_norm: 0.8388700484691364, iteration: 182660
loss: 0.9386130571365356,grad_norm: 0.9999992384268424, iteration: 182661
loss: 0.9823762774467468,grad_norm: 0.9999993740742722, iteration: 182662
loss: 1.023959755897522,grad_norm: 0.9999997741882023, iteration: 182663
loss: 1.0105451345443726,grad_norm: 0.824617182092763, iteration: 182664
loss: 1.0064432621002197,grad_norm: 1.0000000552204937, iteration: 182665
loss: 1.005811095237732,grad_norm: 0.9999991335948564, iteration: 182666
loss: 0.9648618698120117,grad_norm: 0.9999990755168864, iteration: 182667
loss: 1.0039284229278564,grad_norm: 0.8840323380658521, iteration: 182668
loss: 1.0692414045333862,grad_norm: 0.9999993833331273, iteration: 182669
loss: 0.9848494529724121,grad_norm: 0.9427413035344304, iteration: 182670
loss: 1.0036007165908813,grad_norm: 0.9653371441220611, iteration: 182671
loss: 1.0371201038360596,grad_norm: 0.9999994052747243, iteration: 182672
loss: 1.0120785236358643,grad_norm: 0.9672199953718686, iteration: 182673
loss: 1.0052319765090942,grad_norm: 0.9999991481408298, iteration: 182674
loss: 1.0822737216949463,grad_norm: 0.9999992805144293, iteration: 182675
loss: 1.0001606941223145,grad_norm: 0.8357270008532253, iteration: 182676
loss: 0.9567632079124451,grad_norm: 0.9729796598206, iteration: 182677
loss: 1.0060194730758667,grad_norm: 0.9999991012545653, iteration: 182678
loss: 1.0584770441055298,grad_norm: 0.9183236430147641, iteration: 182679
loss: 0.9988006949424744,grad_norm: 0.9603399943022046, iteration: 182680
loss: 1.0777521133422852,grad_norm: 0.9995530157785072, iteration: 182681
loss: 0.9746693968772888,grad_norm: 0.9999993120976379, iteration: 182682
loss: 1.0590497255325317,grad_norm: 0.9999991549639001, iteration: 182683
loss: 1.005911111831665,grad_norm: 0.8679326496854727, iteration: 182684
loss: 0.9663190841674805,grad_norm: 0.9999991233700896, iteration: 182685
loss: 0.9487218856811523,grad_norm: 0.9562002426118924, iteration: 182686
loss: 1.0399366617202759,grad_norm: 0.9999997748675484, iteration: 182687
loss: 0.9895538091659546,grad_norm: 0.999999111774197, iteration: 182688
loss: 0.9853652119636536,grad_norm: 0.9912847056511602, iteration: 182689
loss: 1.016778588294983,grad_norm: 0.8825314964610598, iteration: 182690
loss: 1.0062954425811768,grad_norm: 0.9999992238592197, iteration: 182691
loss: 1.0036673545837402,grad_norm: 0.9999990267720821, iteration: 182692
loss: 1.0074701309204102,grad_norm: 0.9816069452223627, iteration: 182693
loss: 0.9803236126899719,grad_norm: 0.9707174619853303, iteration: 182694
loss: 0.9792346954345703,grad_norm: 0.8490645162129976, iteration: 182695
loss: 0.9901328682899475,grad_norm: 0.8327266580465739, iteration: 182696
loss: 1.014925479888916,grad_norm: 0.9295407797714533, iteration: 182697
loss: 1.000902533531189,grad_norm: 0.9999992183911224, iteration: 182698
loss: 1.0170754194259644,grad_norm: 0.9978230348179614, iteration: 182699
loss: 0.9945012927055359,grad_norm: 0.8940990721544358, iteration: 182700
loss: 1.0146770477294922,grad_norm: 0.8629394788385207, iteration: 182701
loss: 1.0063481330871582,grad_norm: 0.999998887261149, iteration: 182702
loss: 0.9781651496887207,grad_norm: 0.9999993286921027, iteration: 182703
loss: 0.9967033267021179,grad_norm: 0.8918797739990773, iteration: 182704
loss: 1.0984441041946411,grad_norm: 0.9999990682235848, iteration: 182705
loss: 1.0686235427856445,grad_norm: 0.9666086454639976, iteration: 182706
loss: 1.0608162879943848,grad_norm: 0.9999992784461101, iteration: 182707
loss: 1.023335337638855,grad_norm: 0.9709467834333936, iteration: 182708
loss: 1.016243577003479,grad_norm: 0.9999996747093065, iteration: 182709
loss: 1.000758171081543,grad_norm: 0.9625520208698175, iteration: 182710
loss: 1.0223491191864014,grad_norm: 0.8641811197095407, iteration: 182711
loss: 1.0061545372009277,grad_norm: 0.9098075952235495, iteration: 182712
loss: 1.0269526243209839,grad_norm: 0.8713709324242515, iteration: 182713
loss: 1.0180732011795044,grad_norm: 0.9794249390550217, iteration: 182714
loss: 1.0790932178497314,grad_norm: 0.9999994268827949, iteration: 182715
loss: 1.0146515369415283,grad_norm: 0.9999991604165345, iteration: 182716
loss: 0.9725551009178162,grad_norm: 0.8467116719043004, iteration: 182717
loss: 0.9672466516494751,grad_norm: 0.9608459347870693, iteration: 182718
loss: 1.004789113998413,grad_norm: 0.9999990727752502, iteration: 182719
loss: 0.9668899774551392,grad_norm: 0.9999990413007807, iteration: 182720
loss: 0.9891782402992249,grad_norm: 0.9234220777916244, iteration: 182721
loss: 1.0220507383346558,grad_norm: 0.9999990984895811, iteration: 182722
loss: 1.0580580234527588,grad_norm: 0.976525267712297, iteration: 182723
loss: 0.9896433353424072,grad_norm: 0.999998966715134, iteration: 182724
loss: 0.9587737917900085,grad_norm: 0.8803882501829416, iteration: 182725
loss: 1.0185374021530151,grad_norm: 0.8809652911809904, iteration: 182726
loss: 0.9909054040908813,grad_norm: 0.9999998522533414, iteration: 182727
loss: 1.0338104963302612,grad_norm: 0.9999992221413396, iteration: 182728
loss: 1.0025780200958252,grad_norm: 0.9999990430761451, iteration: 182729
loss: 0.9852681756019592,grad_norm: 0.9216470335550665, iteration: 182730
loss: 1.0227878093719482,grad_norm: 0.9999990603629688, iteration: 182731
loss: 0.9954476356506348,grad_norm: 0.9416635348398299, iteration: 182732
loss: 1.080450177192688,grad_norm: 0.9999990203065318, iteration: 182733
loss: 0.994780957698822,grad_norm: 0.9999992274864186, iteration: 182734
loss: 0.9837871193885803,grad_norm: 0.9720703391955293, iteration: 182735
loss: 1.0612761974334717,grad_norm: 0.9999997787183362, iteration: 182736
loss: 1.0040185451507568,grad_norm: 0.999999112900329, iteration: 182737
loss: 1.0378471612930298,grad_norm: 0.9660626766758355, iteration: 182738
loss: 0.9680415987968445,grad_norm: 0.9197722346405403, iteration: 182739
loss: 0.9851061701774597,grad_norm: 0.9999991710872034, iteration: 182740
loss: 1.0163674354553223,grad_norm: 0.9999992605862374, iteration: 182741
loss: 1.0275267362594604,grad_norm: 0.9403071405180736, iteration: 182742
loss: 1.0226339101791382,grad_norm: 0.9999990766475886, iteration: 182743
loss: 0.9993891716003418,grad_norm: 0.9999990412755767, iteration: 182744
loss: 1.0073953866958618,grad_norm: 0.9999991065714791, iteration: 182745
loss: 1.0153601169586182,grad_norm: 0.9999989803157393, iteration: 182746
loss: 1.0127276182174683,grad_norm: 0.9241784240796774, iteration: 182747
loss: 1.1113636493682861,grad_norm: 0.9999997677338036, iteration: 182748
loss: 0.9912194013595581,grad_norm: 0.9006240042881086, iteration: 182749
loss: 1.0909790992736816,grad_norm: 0.9999993940033236, iteration: 182750
loss: 1.0105515718460083,grad_norm: 0.9999994476180093, iteration: 182751
loss: 0.9665712714195251,grad_norm: 0.8929723208519486, iteration: 182752
loss: 1.0049569606781006,grad_norm: 0.9532794089277139, iteration: 182753
loss: 1.010625958442688,grad_norm: 0.9197315729120165, iteration: 182754
loss: 1.0947866439819336,grad_norm: 0.9999996929252305, iteration: 182755
loss: 0.9691941738128662,grad_norm: 0.9444924991850177, iteration: 182756
loss: 0.9955609440803528,grad_norm: 0.7910814257096491, iteration: 182757
loss: 0.9585129618644714,grad_norm: 0.9199318803153353, iteration: 182758
loss: 0.9535821080207825,grad_norm: 0.9999992327868003, iteration: 182759
loss: 0.9815236330032349,grad_norm: 0.9269763751000997, iteration: 182760
loss: 1.0107104778289795,grad_norm: 0.8584199876201728, iteration: 182761
loss: 1.0015650987625122,grad_norm: 0.8275800784135465, iteration: 182762
loss: 1.0561264753341675,grad_norm: 0.9999991601960871, iteration: 182763
loss: 0.9814554452896118,grad_norm: 0.9999992213852051, iteration: 182764
loss: 1.0180082321166992,grad_norm: 0.9100720401035466, iteration: 182765
loss: 0.9774516820907593,grad_norm: 0.9999991757171425, iteration: 182766
loss: 1.016670823097229,grad_norm: 0.9879570636939436, iteration: 182767
loss: 1.0029940605163574,grad_norm: 0.9463798725052275, iteration: 182768
loss: 0.9954184889793396,grad_norm: 0.9999992077309762, iteration: 182769
loss: 1.02522873878479,grad_norm: 0.9999990558304362, iteration: 182770
loss: 1.0116324424743652,grad_norm: 0.8807434956541987, iteration: 182771
loss: 1.0080223083496094,grad_norm: 0.911285872361986, iteration: 182772
loss: 1.0071766376495361,grad_norm: 0.8556773148978261, iteration: 182773
loss: 0.9919953942298889,grad_norm: 0.7394722000471704, iteration: 182774
loss: 1.0195943117141724,grad_norm: 0.8589439789700587, iteration: 182775
loss: 1.083696722984314,grad_norm: 0.9999992199168805, iteration: 182776
loss: 1.0447620153427124,grad_norm: 0.9999989915286488, iteration: 182777
loss: 1.0066406726837158,grad_norm: 0.9610408481848429, iteration: 182778
loss: 1.0423027276992798,grad_norm: 0.9999991012845275, iteration: 182779
loss: 1.0236289501190186,grad_norm: 0.8801689977653947, iteration: 182780
loss: 0.9672324657440186,grad_norm: 0.9604160907830609, iteration: 182781
loss: 0.9934301972389221,grad_norm: 0.9999991915462132, iteration: 182782
loss: 1.0241955518722534,grad_norm: 0.999999166682779, iteration: 182783
loss: 0.9935384392738342,grad_norm: 0.9999989708215189, iteration: 182784
loss: 1.0165371894836426,grad_norm: 0.8343818871033069, iteration: 182785
loss: 0.9977912306785583,grad_norm: 0.8872838661173544, iteration: 182786
loss: 0.9792463779449463,grad_norm: 0.9287748104001201, iteration: 182787
loss: 0.9617230892181396,grad_norm: 0.8343127942973059, iteration: 182788
loss: 1.0265311002731323,grad_norm: 0.8437856485223661, iteration: 182789
loss: 0.9907914400100708,grad_norm: 0.980919346628868, iteration: 182790
loss: 1.02655827999115,grad_norm: 0.99999905344807, iteration: 182791
loss: 1.087033987045288,grad_norm: 0.9999991735892365, iteration: 182792
loss: 0.9751757383346558,grad_norm: 0.9999991553414432, iteration: 182793
loss: 1.0522652864456177,grad_norm: 0.9999991997736943, iteration: 182794
loss: 0.9983677864074707,grad_norm: 0.9693358543418675, iteration: 182795
loss: 1.1263657808303833,grad_norm: 0.9999991502422182, iteration: 182796
loss: 0.9597189426422119,grad_norm: 0.9999990296452622, iteration: 182797
loss: 1.1147990226745605,grad_norm: 0.9999992362614664, iteration: 182798
loss: 0.990568220615387,grad_norm: 0.8025503107219142, iteration: 182799
loss: 1.0183500051498413,grad_norm: 0.9999992195718274, iteration: 182800
loss: 1.0327998399734497,grad_norm: 0.9676236586530143, iteration: 182801
loss: 0.9568837285041809,grad_norm: 0.9999989158830914, iteration: 182802
loss: 1.0107121467590332,grad_norm: 0.9999991779552725, iteration: 182803
loss: 1.0117955207824707,grad_norm: 0.9579890228890736, iteration: 182804
loss: 1.0270546674728394,grad_norm: 0.9354825384134831, iteration: 182805
loss: 0.9685192108154297,grad_norm: 0.8741772541543354, iteration: 182806
loss: 0.9891046285629272,grad_norm: 0.9116733557901366, iteration: 182807
loss: 0.9833071827888489,grad_norm: 0.9328082424399968, iteration: 182808
loss: 1.0110664367675781,grad_norm: 0.9281804749271249, iteration: 182809
loss: 0.9971358180046082,grad_norm: 0.9999991543096004, iteration: 182810
loss: 1.0260436534881592,grad_norm: 0.828028295591159, iteration: 182811
loss: 0.9574427604675293,grad_norm: 0.9222340504507699, iteration: 182812
loss: 0.9977557063102722,grad_norm: 0.8643861265394472, iteration: 182813
loss: 0.9899205565452576,grad_norm: 0.9151428240799642, iteration: 182814
loss: 1.0183706283569336,grad_norm: 0.999999899731308, iteration: 182815
loss: 1.0055935382843018,grad_norm: 0.9522839396214177, iteration: 182816
loss: 0.9919387102127075,grad_norm: 0.9452493230087892, iteration: 182817
loss: 1.0190659761428833,grad_norm: 0.923089466538416, iteration: 182818
loss: 0.9901063442230225,grad_norm: 0.8480266297238545, iteration: 182819
loss: 1.0255213975906372,grad_norm: 0.9163076000654007, iteration: 182820
loss: 0.9638846516609192,grad_norm: 0.88872632492067, iteration: 182821
loss: 0.9997920989990234,grad_norm: 0.8933285722747438, iteration: 182822
loss: 1.017999529838562,grad_norm: 0.9757111625222662, iteration: 182823
loss: 0.9933652877807617,grad_norm: 0.9952065140752767, iteration: 182824
loss: 1.0170655250549316,grad_norm: 0.9999989788566466, iteration: 182825
loss: 1.0250886678695679,grad_norm: 0.8185413236851087, iteration: 182826
loss: 0.9592794179916382,grad_norm: 0.9999989838555009, iteration: 182827
loss: 1.0022857189178467,grad_norm: 0.9966208755783383, iteration: 182828
loss: 1.0120935440063477,grad_norm: 0.9292622748295564, iteration: 182829
loss: 1.0127259492874146,grad_norm: 0.9999991680311232, iteration: 182830
loss: 1.0240739583969116,grad_norm: 0.9999990868142323, iteration: 182831
loss: 0.9937437772750854,grad_norm: 0.9640294937993578, iteration: 182832
loss: 1.0086009502410889,grad_norm: 0.6392899384932155, iteration: 182833
loss: 1.012883186340332,grad_norm: 0.8903483276666763, iteration: 182834
loss: 1.107478141784668,grad_norm: 0.9999991968979296, iteration: 182835
loss: 0.9821098446846008,grad_norm: 0.8744642260023359, iteration: 182836
loss: 0.9991796016693115,grad_norm: 0.9999989597399179, iteration: 182837
loss: 0.9773577451705933,grad_norm: 0.9999990918079185, iteration: 182838
loss: 0.9696623682975769,grad_norm: 0.999999179923651, iteration: 182839
loss: 1.0379208326339722,grad_norm: 0.9999990490394238, iteration: 182840
loss: 0.985263466835022,grad_norm: 0.9999993628835794, iteration: 182841
loss: 0.9633180499076843,grad_norm: 0.9562850532083532, iteration: 182842
loss: 1.005015254020691,grad_norm: 0.999999105943281, iteration: 182843
loss: 1.039336919784546,grad_norm: 0.9999991768278897, iteration: 182844
loss: 0.9794709086418152,grad_norm: 0.8084460883729744, iteration: 182845
loss: 0.9413819313049316,grad_norm: 0.9200180397774906, iteration: 182846
loss: 0.9725685119628906,grad_norm: 0.9999991907161304, iteration: 182847
loss: 0.9970389008522034,grad_norm: 0.9934745887492716, iteration: 182848
loss: 1.025503396987915,grad_norm: 0.9965198970074494, iteration: 182849
loss: 1.0238138437271118,grad_norm: 0.999999137981576, iteration: 182850
loss: 1.041775107383728,grad_norm: 0.9999991657904849, iteration: 182851
loss: 1.0523360967636108,grad_norm: 0.9999996482231158, iteration: 182852
loss: 1.0509979724884033,grad_norm: 0.9999996697951111, iteration: 182853
loss: 1.0160094499588013,grad_norm: 0.9375204922119736, iteration: 182854
loss: 1.011910319328308,grad_norm: 0.9999992727638394, iteration: 182855
loss: 1.017688274383545,grad_norm: 0.9999991155419411, iteration: 182856
loss: 0.9726987481117249,grad_norm: 0.999999111493681, iteration: 182857
loss: 0.978391170501709,grad_norm: 0.9999989539148899, iteration: 182858
loss: 1.0019501447677612,grad_norm: 0.96214783852106, iteration: 182859
loss: 1.0317788124084473,grad_norm: 0.9999990328652199, iteration: 182860
loss: 1.064346194267273,grad_norm: 0.9999991404984587, iteration: 182861
loss: 1.0299419164657593,grad_norm: 0.9765891586358535, iteration: 182862
loss: 1.0621169805526733,grad_norm: 0.9574636319654963, iteration: 182863
loss: 1.0297954082489014,grad_norm: 0.9999990495438191, iteration: 182864
loss: 1.0407512187957764,grad_norm: 0.9925334163646736, iteration: 182865
loss: 0.9851162433624268,grad_norm: 0.9999991085833068, iteration: 182866
loss: 1.00572669506073,grad_norm: 0.9999992419560095, iteration: 182867
loss: 1.012442708015442,grad_norm: 0.9999992642502745, iteration: 182868
loss: 1.0153254270553589,grad_norm: 0.9212329755395554, iteration: 182869
loss: 1.0028563737869263,grad_norm: 0.9999993110338226, iteration: 182870
loss: 0.9907577633857727,grad_norm: 0.9621371092605966, iteration: 182871
loss: 1.0559730529785156,grad_norm: 0.9999991297840403, iteration: 182872
loss: 1.067047357559204,grad_norm: 0.873981385922113, iteration: 182873
loss: 1.0452461242675781,grad_norm: 0.9867070213403961, iteration: 182874
loss: 0.9931570291519165,grad_norm: 0.9999990966227137, iteration: 182875
loss: 0.9904779195785522,grad_norm: 0.8919834650485162, iteration: 182876
loss: 0.9865735769271851,grad_norm: 0.8766647903304204, iteration: 182877
loss: 0.966264009475708,grad_norm: 0.9999991486850125, iteration: 182878
loss: 0.954734206199646,grad_norm: 0.9025787037891025, iteration: 182879
loss: 0.9789563417434692,grad_norm: 0.9999991077083418, iteration: 182880
loss: 1.0043370723724365,grad_norm: 0.999999218432751, iteration: 182881
loss: 1.0133283138275146,grad_norm: 0.9741761185882181, iteration: 182882
loss: 1.0651270151138306,grad_norm: 0.9999990671049691, iteration: 182883
loss: 1.0043187141418457,grad_norm: 0.8796725345501686, iteration: 182884
loss: 0.9762321710586548,grad_norm: 0.9925225086439259, iteration: 182885
loss: 0.9774583578109741,grad_norm: 0.9999989235394702, iteration: 182886
loss: 0.9757885336875916,grad_norm: 0.9042913212848722, iteration: 182887
loss: 1.01456618309021,grad_norm: 0.8756218668758297, iteration: 182888
loss: 0.9858225584030151,grad_norm: 0.9999990902662395, iteration: 182889
loss: 1.0304166078567505,grad_norm: 0.9999990849437109, iteration: 182890
loss: 1.0352816581726074,grad_norm: 0.9999990824970726, iteration: 182891
loss: 1.0344384908676147,grad_norm: 0.9999997553928335, iteration: 182892
loss: 1.063124656677246,grad_norm: 0.9999991526698309, iteration: 182893
loss: 0.9848618507385254,grad_norm: 0.8501909533944124, iteration: 182894
loss: 1.0034193992614746,grad_norm: 0.999999215104422, iteration: 182895
loss: 0.9654654264450073,grad_norm: 0.9999991478960857, iteration: 182896
loss: 0.9747811555862427,grad_norm: 0.9999992510377689, iteration: 182897
loss: 1.0057960748672485,grad_norm: 0.9999998200235912, iteration: 182898
loss: 1.0234490633010864,grad_norm: 0.8480645801707876, iteration: 182899
loss: 1.0514702796936035,grad_norm: 0.9999993267028603, iteration: 182900
loss: 0.9839797616004944,grad_norm: 0.9999990290321766, iteration: 182901
loss: 0.9915164709091187,grad_norm: 0.9713449403643281, iteration: 182902
loss: 1.0429375171661377,grad_norm: 0.9999993573874161, iteration: 182903
loss: 1.01939058303833,grad_norm: 0.9272841810635042, iteration: 182904
loss: 1.0462908744812012,grad_norm: 0.999999718063361, iteration: 182905
loss: 1.0808353424072266,grad_norm: 0.9693899332240196, iteration: 182906
loss: 1.0289347171783447,grad_norm: 0.9999995405535067, iteration: 182907
loss: 0.9981465339660645,grad_norm: 0.9770105153660285, iteration: 182908
loss: 1.118512511253357,grad_norm: 0.9999991771039389, iteration: 182909
loss: 0.9827471375465393,grad_norm: 0.9999990874121648, iteration: 182910
loss: 0.9798805713653564,grad_norm: 0.9999990995741668, iteration: 182911
loss: 0.9854841828346252,grad_norm: 0.972568088057929, iteration: 182912
loss: 1.0117895603179932,grad_norm: 0.9999991131733065, iteration: 182913
loss: 0.9740399122238159,grad_norm: 0.9779841944674693, iteration: 182914
loss: 0.9610803723335266,grad_norm: 0.9999990806393472, iteration: 182915
loss: 0.9995330572128296,grad_norm: 0.8938756837331362, iteration: 182916
loss: 1.01902174949646,grad_norm: 0.8446529062827838, iteration: 182917
loss: 0.9695027470588684,grad_norm: 0.8800931424245444, iteration: 182918
loss: 1.0381197929382324,grad_norm: 0.8884282381770472, iteration: 182919
loss: 1.0109481811523438,grad_norm: 0.9265319087453379, iteration: 182920
loss: 1.0008121728897095,grad_norm: 0.8369798979746875, iteration: 182921
loss: 1.0469046831130981,grad_norm: 0.90721688806913, iteration: 182922
loss: 0.9868777394294739,grad_norm: 0.9999992519675315, iteration: 182923
loss: 0.9921258687973022,grad_norm: 0.9999991339928913, iteration: 182924
loss: 0.9580861926078796,grad_norm: 0.9072940927141873, iteration: 182925
loss: 0.9815211892127991,grad_norm: 0.9999992355785676, iteration: 182926
loss: 0.9787338376045227,grad_norm: 0.9854453315172118, iteration: 182927
loss: 1.1126699447631836,grad_norm: 0.9999999478268169, iteration: 182928
loss: 1.0117831230163574,grad_norm: 0.9877142846845117, iteration: 182929
loss: 0.987399160861969,grad_norm: 0.9314692226485893, iteration: 182930
loss: 1.269484519958496,grad_norm: 0.9999998730683226, iteration: 182931
loss: 1.0703442096710205,grad_norm: 0.9999991834090272, iteration: 182932
loss: 1.1669763326644897,grad_norm: 0.9999992107044855, iteration: 182933
loss: 0.9725127220153809,grad_norm: 0.9598425125959816, iteration: 182934
loss: 0.9957090020179749,grad_norm: 0.9684895010869407, iteration: 182935
loss: 1.0029391050338745,grad_norm: 0.8833031651053624, iteration: 182936
loss: 0.9748671650886536,grad_norm: 0.9999991752782887, iteration: 182937
loss: 1.1500507593154907,grad_norm: 0.9999998538732268, iteration: 182938
loss: 1.0150731801986694,grad_norm: 0.8964458251229057, iteration: 182939
loss: 0.9934151768684387,grad_norm: 0.9303670805300843, iteration: 182940
loss: 0.9781295657157898,grad_norm: 0.9999991821607296, iteration: 182941
loss: 0.9771963357925415,grad_norm: 0.9999990811394395, iteration: 182942
loss: 0.9600982666015625,grad_norm: 0.9999992649131407, iteration: 182943
loss: 1.0055538415908813,grad_norm: 0.927355955364369, iteration: 182944
loss: 1.2332309484481812,grad_norm: 0.9999992348831547, iteration: 182945
loss: 1.2845101356506348,grad_norm: 0.9999998527645636, iteration: 182946
loss: 1.0642335414886475,grad_norm: 0.9999991974387203, iteration: 182947
loss: 1.0167267322540283,grad_norm: 0.9999990161925219, iteration: 182948
loss: 1.1755132675170898,grad_norm: 0.9999995928286459, iteration: 182949
loss: 1.230236291885376,grad_norm: 0.9999994457399319, iteration: 182950
loss: 1.3426706790924072,grad_norm: 0.9999994918164844, iteration: 182951
loss: 1.317301630973816,grad_norm: 1.0000000197359864, iteration: 182952
loss: 0.980358898639679,grad_norm: 0.9367246730030752, iteration: 182953
loss: 1.000915288925171,grad_norm: 0.9999992566637005, iteration: 182954
loss: 0.9873790740966797,grad_norm: 0.9999990959279852, iteration: 182955
loss: 1.013811707496643,grad_norm: 0.9999999234279136, iteration: 182956
loss: 0.9532011151313782,grad_norm: 0.9653892061330304, iteration: 182957
loss: 1.0725558996200562,grad_norm: 0.9999992109197626, iteration: 182958
loss: 1.0117014646530151,grad_norm: 0.9999991630920346, iteration: 182959
loss: 0.9691925048828125,grad_norm: 0.9089683481461485, iteration: 182960
loss: 0.9960168600082397,grad_norm: 0.9999992216197671, iteration: 182961
loss: 0.9868584871292114,grad_norm: 0.9201608930544989, iteration: 182962
loss: 0.9942920207977295,grad_norm: 0.9251609195030643, iteration: 182963
loss: 1.0301399230957031,grad_norm: 0.9999991014484209, iteration: 182964
loss: 1.0625814199447632,grad_norm: 1.0000000072157271, iteration: 182965
loss: 1.0038137435913086,grad_norm: 0.8313160516808017, iteration: 182966
loss: 0.9651086330413818,grad_norm: 0.8804742246838948, iteration: 182967
loss: 1.0645209550857544,grad_norm: 0.9999990415802396, iteration: 182968
loss: 1.0307731628417969,grad_norm: 0.9999993749773906, iteration: 182969
loss: 1.0979342460632324,grad_norm: 0.9999999897847613, iteration: 182970
loss: 0.959904670715332,grad_norm: 0.9790619103600284, iteration: 182971
loss: 1.058756947517395,grad_norm: 0.9999991982588746, iteration: 182972
loss: 1.0921339988708496,grad_norm: 0.9999992942799363, iteration: 182973
loss: 0.9471170902252197,grad_norm: 0.9999990071322932, iteration: 182974
loss: 1.0632219314575195,grad_norm: 0.9748058812643514, iteration: 182975
loss: 1.0362910032272339,grad_norm: 0.9952550887531842, iteration: 182976
loss: 0.9813461303710938,grad_norm: 0.9632153571306787, iteration: 182977
loss: 1.0041193962097168,grad_norm: 0.8790327701958546, iteration: 182978
loss: 1.0136772394180298,grad_norm: 0.999999195356468, iteration: 182979
loss: 0.9990115165710449,grad_norm: 0.9923231118365828, iteration: 182980
loss: 1.0358517169952393,grad_norm: 0.8453414951733182, iteration: 182981
loss: 1.0023539066314697,grad_norm: 0.9999992461080374, iteration: 182982
loss: 1.021445393562317,grad_norm: 0.86477086821944, iteration: 182983
loss: 1.0329123735427856,grad_norm: 0.9999992580354179, iteration: 182984
loss: 0.9916518330574036,grad_norm: 0.8778387171039561, iteration: 182985
loss: 1.0474762916564941,grad_norm: 0.9999990748905384, iteration: 182986
loss: 1.0009527206420898,grad_norm: 0.8728578743251515, iteration: 182987
loss: 1.0355035066604614,grad_norm: 0.978481441023316, iteration: 182988
loss: 1.0400694608688354,grad_norm: 0.999999055758472, iteration: 182989
loss: 1.0039454698562622,grad_norm: 0.8995534521156344, iteration: 182990
loss: 1.068931221961975,grad_norm: 0.999999397360698, iteration: 182991
loss: 0.9835669994354248,grad_norm: 0.9999992372915841, iteration: 182992
loss: 0.9838244915008545,grad_norm: 0.8518143919343945, iteration: 182993
loss: 1.0563139915466309,grad_norm: 0.9999990451947653, iteration: 182994
loss: 1.0173698663711548,grad_norm: 0.9999991373062667, iteration: 182995
loss: 1.0236155986785889,grad_norm: 0.9459114662898215, iteration: 182996
loss: 0.9806151390075684,grad_norm: 0.923831475726608, iteration: 182997
loss: 0.9860181212425232,grad_norm: 0.8241916039879433, iteration: 182998
loss: 1.0015935897827148,grad_norm: 0.9999991541551042, iteration: 182999
loss: 0.9664597511291504,grad_norm: 0.9999989996207319, iteration: 183000
loss: 1.0701249837875366,grad_norm: 0.9999996636320425, iteration: 183001
loss: 1.0163437128067017,grad_norm: 0.8684775375816074, iteration: 183002
loss: 0.9784439206123352,grad_norm: 0.9999991675560305, iteration: 183003
loss: 1.0312672853469849,grad_norm: 0.9999992003412521, iteration: 183004
loss: 1.0689458847045898,grad_norm: 0.9999992751158943, iteration: 183005
loss: 0.955074667930603,grad_norm: 0.9999990567305581, iteration: 183006
loss: 0.9959900975227356,grad_norm: 0.9999990846459397, iteration: 183007
loss: 1.0510445833206177,grad_norm: 0.9793430569871749, iteration: 183008
loss: 1.006001591682434,grad_norm: 0.9999992360562513, iteration: 183009
loss: 0.9431930780410767,grad_norm: 0.8471315300243953, iteration: 183010
loss: 1.0167236328125,grad_norm: 0.8720145474381265, iteration: 183011
loss: 1.0238710641860962,grad_norm: 0.9637335316013326, iteration: 183012
loss: 1.0283194780349731,grad_norm: 0.9504599735686479, iteration: 183013
loss: 0.9751824140548706,grad_norm: 0.9262456102491264, iteration: 183014
loss: 1.0232504606246948,grad_norm: 0.9999990854277059, iteration: 183015
loss: 0.9975387454032898,grad_norm: 0.9999991021584224, iteration: 183016
loss: 0.9954104423522949,grad_norm: 0.999999461397545, iteration: 183017
loss: 1.0853157043457031,grad_norm: 0.9999992727013023, iteration: 183018
loss: 1.0193963050842285,grad_norm: 0.9907822599666566, iteration: 183019
loss: 1.036383032798767,grad_norm: 0.9462680355084891, iteration: 183020
loss: 1.0526009798049927,grad_norm: 0.9999990738950922, iteration: 183021
loss: 1.0183556079864502,grad_norm: 0.9999990729922212, iteration: 183022
loss: 0.9799861311912537,grad_norm: 0.9982607229684065, iteration: 183023
loss: 1.0030256509780884,grad_norm: 0.9999992023915155, iteration: 183024
loss: 1.0079541206359863,grad_norm: 0.8463276204044322, iteration: 183025
loss: 0.9782025218009949,grad_norm: 0.9999992993228868, iteration: 183026
loss: 1.0463162660598755,grad_norm: 0.9999993559553448, iteration: 183027
loss: 1.032058596611023,grad_norm: 0.9674142821261856, iteration: 183028
loss: 0.9787892699241638,grad_norm: 0.9074070418119042, iteration: 183029
loss: 0.9964397549629211,grad_norm: 0.8691968848135387, iteration: 183030
loss: 1.0183597803115845,grad_norm: 0.9999992558829943, iteration: 183031
loss: 0.9937207698822021,grad_norm: 0.8971625948259012, iteration: 183032
loss: 1.0296686887741089,grad_norm: 0.9999992089845819, iteration: 183033
loss: 0.9788201451301575,grad_norm: 0.8878415031920328, iteration: 183034
loss: 0.9747312068939209,grad_norm: 0.9999990868713836, iteration: 183035
loss: 1.0274205207824707,grad_norm: 0.9999991990417745, iteration: 183036
loss: 0.9948559403419495,grad_norm: 0.9999991129514542, iteration: 183037
loss: 1.0087355375289917,grad_norm: 0.9999990387133646, iteration: 183038
loss: 0.9890609979629517,grad_norm: 0.9999991038894458, iteration: 183039
loss: 0.9804577827453613,grad_norm: 0.9999991821722664, iteration: 183040
loss: 0.9996704459190369,grad_norm: 0.9999991113888997, iteration: 183041
loss: 1.073617696762085,grad_norm: 0.9999999004018054, iteration: 183042
loss: 1.0270129442214966,grad_norm: 0.9999990149061263, iteration: 183043
loss: 1.0124980211257935,grad_norm: 0.9347096972545492, iteration: 183044
loss: 1.053767204284668,grad_norm: 0.9999989889446674, iteration: 183045
loss: 1.0039268732070923,grad_norm: 0.8856054781463205, iteration: 183046
loss: 1.008345127105713,grad_norm: 0.9766249981167896, iteration: 183047
loss: 1.0194618701934814,grad_norm: 0.9999994418614913, iteration: 183048
loss: 1.0037416219711304,grad_norm: 0.9999990335304182, iteration: 183049
loss: 1.0227380990982056,grad_norm: 0.9999991786935578, iteration: 183050
loss: 1.0008716583251953,grad_norm: 0.9875145207389081, iteration: 183051
loss: 1.176649570465088,grad_norm: 0.9999995224943192, iteration: 183052
loss: 1.0054116249084473,grad_norm: 0.9936009625923057, iteration: 183053
loss: 1.0247896909713745,grad_norm: 0.999999106157605, iteration: 183054
loss: 1.0158774852752686,grad_norm: 0.9999992057990116, iteration: 183055
loss: 1.0067031383514404,grad_norm: 0.9442929737898613, iteration: 183056
loss: 0.973656415939331,grad_norm: 0.9873784258999012, iteration: 183057
loss: 1.0110725164413452,grad_norm: 0.9666707927097109, iteration: 183058
loss: 0.991200864315033,grad_norm: 0.9689780653227571, iteration: 183059
loss: 1.0124361515045166,grad_norm: 0.9999991630861841, iteration: 183060
loss: 0.9910421371459961,grad_norm: 0.9323471600975096, iteration: 183061
loss: 1.029074788093567,grad_norm: 0.9240238333809173, iteration: 183062
loss: 1.0115827322006226,grad_norm: 0.9591246178169958, iteration: 183063
loss: 0.9982721209526062,grad_norm: 0.9917300297788835, iteration: 183064
loss: 1.0160188674926758,grad_norm: 0.9999997732695018, iteration: 183065
loss: 1.0029726028442383,grad_norm: 0.9999991988323065, iteration: 183066
loss: 1.002061367034912,grad_norm: 0.9266799898123246, iteration: 183067
loss: 1.002022385597229,grad_norm: 0.9999991200513452, iteration: 183068
loss: 0.9906435012817383,grad_norm: 0.8176847724430584, iteration: 183069
loss: 0.9866124987602234,grad_norm: 0.8793616916426243, iteration: 183070
loss: 0.9963068962097168,grad_norm: 0.9999992183428915, iteration: 183071
loss: 0.9778612852096558,grad_norm: 0.8716304636199776, iteration: 183072
loss: 0.986946165561676,grad_norm: 0.9672063718957449, iteration: 183073
loss: 1.0172970294952393,grad_norm: 0.9999990549762974, iteration: 183074
loss: 0.9829179644584656,grad_norm: 0.9373602878922046, iteration: 183075
loss: 1.026438593864441,grad_norm: 0.9491173217723669, iteration: 183076
loss: 0.9940032958984375,grad_norm: 0.8963885814667044, iteration: 183077
loss: 1.0279823541641235,grad_norm: 0.8901964333385325, iteration: 183078
loss: 1.0012850761413574,grad_norm: 0.9999994133415848, iteration: 183079
loss: 1.0129667520523071,grad_norm: 0.9999993079649567, iteration: 183080
loss: 1.0128984451293945,grad_norm: 0.8588778620085735, iteration: 183081
loss: 1.0075335502624512,grad_norm: 0.8918023162843529, iteration: 183082
loss: 1.054234504699707,grad_norm: 0.9999990734078485, iteration: 183083
loss: 1.0020383596420288,grad_norm: 0.9999991663726592, iteration: 183084
loss: 0.9855595827102661,grad_norm: 0.9314305128997339, iteration: 183085
loss: 0.9953491687774658,grad_norm: 0.9999990856898158, iteration: 183086
loss: 0.9591071605682373,grad_norm: 0.9622138879080718, iteration: 183087
loss: 1.051103949546814,grad_norm: 0.9999991555977165, iteration: 183088
loss: 1.0703519582748413,grad_norm: 0.9999992513457466, iteration: 183089
loss: 1.0067743062973022,grad_norm: 0.9191560231888496, iteration: 183090
loss: 1.0487850904464722,grad_norm: 0.9361391325311866, iteration: 183091
loss: 0.9881983399391174,grad_norm: 0.9999992559814468, iteration: 183092
loss: 1.0176239013671875,grad_norm: 0.9999991802518237, iteration: 183093
loss: 1.0375319719314575,grad_norm: 0.9999992600227258, iteration: 183094
loss: 1.0142468214035034,grad_norm: 0.9585195788071571, iteration: 183095
loss: 1.013108253479004,grad_norm: 0.9999991928856572, iteration: 183096
loss: 1.0166059732437134,grad_norm: 0.879847630470678, iteration: 183097
loss: 1.042884111404419,grad_norm: 0.908725844905169, iteration: 183098
loss: 0.9804072976112366,grad_norm: 0.974178666210791, iteration: 183099
loss: 1.0174802541732788,grad_norm: 0.989134596309216, iteration: 183100
loss: 1.0326327085494995,grad_norm: 0.8972106320101672, iteration: 183101
loss: 1.0221960544586182,grad_norm: 0.999999339827233, iteration: 183102
loss: 0.9889576435089111,grad_norm: 0.9359121878426097, iteration: 183103
loss: 1.0042744874954224,grad_norm: 0.8132780054977758, iteration: 183104
loss: 0.9635944366455078,grad_norm: 0.9999993891730504, iteration: 183105
loss: 0.9784952998161316,grad_norm: 0.9138100245079774, iteration: 183106
loss: 1.0280280113220215,grad_norm: 0.9292092243819803, iteration: 183107
loss: 1.0003994703292847,grad_norm: 0.8627018497412229, iteration: 183108
loss: 0.9761407971382141,grad_norm: 0.9419403316541918, iteration: 183109
loss: 0.9887755513191223,grad_norm: 0.9999990475746626, iteration: 183110
loss: 1.0394951105117798,grad_norm: 0.9999992223088151, iteration: 183111
loss: 0.9972342252731323,grad_norm: 0.9999990365969559, iteration: 183112
loss: 1.0455976724624634,grad_norm: 0.9988279899417681, iteration: 183113
loss: 1.022199034690857,grad_norm: 0.9999991728797117, iteration: 183114
loss: 0.9545407891273499,grad_norm: 0.999999045579978, iteration: 183115
loss: 0.9962232112884521,grad_norm: 0.9181120289004602, iteration: 183116
loss: 0.9580247402191162,grad_norm: 0.999999166525737, iteration: 183117
loss: 0.993219256401062,grad_norm: 0.9999990790635838, iteration: 183118
loss: 0.9796409606933594,grad_norm: 0.9999992177599726, iteration: 183119
loss: 0.9858477115631104,grad_norm: 0.9556360477979423, iteration: 183120
loss: 1.0182420015335083,grad_norm: 0.9437539728750194, iteration: 183121
loss: 0.9861149787902832,grad_norm: 0.9364977780266511, iteration: 183122
loss: 1.002373218536377,grad_norm: 0.9622868296104532, iteration: 183123
loss: 0.9775042533874512,grad_norm: 0.8726644845758402, iteration: 183124
loss: 0.9958119988441467,grad_norm: 0.9711140938101702, iteration: 183125
loss: 1.0138734579086304,grad_norm: 0.8772643474424879, iteration: 183126
loss: 1.0170685052871704,grad_norm: 0.9999990684587685, iteration: 183127
loss: 0.9940786361694336,grad_norm: 0.9286266206344713, iteration: 183128
loss: 0.9871888756752014,grad_norm: 0.8040456656988056, iteration: 183129
loss: 0.9656423926353455,grad_norm: 0.9748642574774056, iteration: 183130
loss: 0.992898166179657,grad_norm: 0.9828558604297123, iteration: 183131
loss: 1.0066536664962769,grad_norm: 0.9427186600764491, iteration: 183132
loss: 0.9836834669113159,grad_norm: 0.9412352239667009, iteration: 183133
loss: 0.9661741852760315,grad_norm: 0.9553695266966512, iteration: 183134
loss: 1.0071135759353638,grad_norm: 0.9999990830653003, iteration: 183135
loss: 1.0224826335906982,grad_norm: 0.9440745662734759, iteration: 183136
loss: 1.0028555393218994,grad_norm: 0.9999990687696824, iteration: 183137
loss: 0.9950358867645264,grad_norm: 0.9826270901740629, iteration: 183138
loss: 1.0089497566223145,grad_norm: 0.9999990699670874, iteration: 183139
loss: 1.0163044929504395,grad_norm: 0.9999991717735233, iteration: 183140
loss: 0.9887537956237793,grad_norm: 0.871811619495182, iteration: 183141
loss: 1.0108938217163086,grad_norm: 0.9568590886563594, iteration: 183142
loss: 1.0049034357070923,grad_norm: 0.9363646367021493, iteration: 183143
loss: 0.9848290085792542,grad_norm: 0.9938165011126056, iteration: 183144
loss: 1.0007890462875366,grad_norm: 0.999999148493339, iteration: 183145
loss: 0.9743645787239075,grad_norm: 0.9999990776631656, iteration: 183146
loss: 1.021018385887146,grad_norm: 0.9455966292191043, iteration: 183147
loss: 1.0091654062271118,grad_norm: 0.8285746326615343, iteration: 183148
loss: 1.0094560384750366,grad_norm: 0.9475726595304861, iteration: 183149
loss: 0.9817105531692505,grad_norm: 0.9999991140275561, iteration: 183150
loss: 1.0143773555755615,grad_norm: 0.9999989918299361, iteration: 183151
loss: 1.0443284511566162,grad_norm: 0.999999305215481, iteration: 183152
loss: 1.0188148021697998,grad_norm: 0.837277160288399, iteration: 183153
loss: 1.0127496719360352,grad_norm: 0.9999990823730324, iteration: 183154
loss: 0.9916115999221802,grad_norm: 0.9999991480129983, iteration: 183155
loss: 1.023787498474121,grad_norm: 0.9999991694792002, iteration: 183156
loss: 1.1866062879562378,grad_norm: 0.9999997162977177, iteration: 183157
loss: 1.02060866355896,grad_norm: 0.9998758678477343, iteration: 183158
loss: 1.0083705186843872,grad_norm: 0.999999107126693, iteration: 183159
loss: 0.976112425327301,grad_norm: 0.9797719422050178, iteration: 183160
loss: 0.9735275506973267,grad_norm: 0.9168657175366605, iteration: 183161
loss: 1.0032986402511597,grad_norm: 0.9999994355631114, iteration: 183162
loss: 1.043373703956604,grad_norm: 0.9999990184698035, iteration: 183163
loss: 0.9920364022254944,grad_norm: 0.9158770553261769, iteration: 183164
loss: 1.0185164213180542,grad_norm: 0.999999083053509, iteration: 183165
loss: 0.9830557703971863,grad_norm: 0.955465767605185, iteration: 183166
loss: 0.9748600125312805,grad_norm: 0.9999990886936772, iteration: 183167
loss: 1.0302191972732544,grad_norm: 0.9999992755930734, iteration: 183168
loss: 0.9995473027229309,grad_norm: 0.9999991614689732, iteration: 183169
loss: 0.9700959324836731,grad_norm: 0.9770718732706906, iteration: 183170
loss: 0.9855018258094788,grad_norm: 0.9523214494941582, iteration: 183171
loss: 0.9996557235717773,grad_norm: 0.9999991408122666, iteration: 183172
loss: 1.0110677480697632,grad_norm: 0.9999992427621374, iteration: 183173
loss: 1.027402400970459,grad_norm: 0.9999991967186308, iteration: 183174
loss: 0.9574194550514221,grad_norm: 0.9086957300918176, iteration: 183175
loss: 1.021618366241455,grad_norm: 0.9999991487802674, iteration: 183176
loss: 1.0033739805221558,grad_norm: 0.9618122424289957, iteration: 183177
loss: 0.9975906014442444,grad_norm: 0.999999001275684, iteration: 183178
loss: 0.9706950783729553,grad_norm: 0.8633869022027353, iteration: 183179
loss: 1.0091137886047363,grad_norm: 0.9999991776013512, iteration: 183180
loss: 1.0166910886764526,grad_norm: 0.9999990624692079, iteration: 183181
loss: 1.0349992513656616,grad_norm: 0.9999991550912661, iteration: 183182
loss: 0.9966335296630859,grad_norm: 0.9999992661067778, iteration: 183183
loss: 1.003293752670288,grad_norm: 0.8232822104162568, iteration: 183184
loss: 0.9822733998298645,grad_norm: 0.9999652038301308, iteration: 183185
loss: 0.9746972322463989,grad_norm: 0.9824312734061567, iteration: 183186
loss: 0.990496814250946,grad_norm: 0.9363199225998022, iteration: 183187
loss: 1.047635793685913,grad_norm: 0.9999991519331374, iteration: 183188
loss: 0.9856308102607727,grad_norm: 0.9819869412724678, iteration: 183189
loss: 1.0152971744537354,grad_norm: 0.8162551455028575, iteration: 183190
loss: 1.0207257270812988,grad_norm: 0.9799098440049663, iteration: 183191
loss: 0.9757712483406067,grad_norm: 0.9999991106851362, iteration: 183192
loss: 1.0255976915359497,grad_norm: 0.9240789024884881, iteration: 183193
loss: 0.9954409003257751,grad_norm: 0.999999476532306, iteration: 183194
loss: 0.9994304180145264,grad_norm: 0.8894056826281295, iteration: 183195
loss: 0.9862462878227234,grad_norm: 0.9990770831387372, iteration: 183196
loss: 1.00068199634552,grad_norm: 0.8746644603669999, iteration: 183197
loss: 0.9769089221954346,grad_norm: 0.8082429270170397, iteration: 183198
loss: 0.9851986169815063,grad_norm: 0.9847254795673583, iteration: 183199
loss: 0.9625426530838013,grad_norm: 0.9314930291042247, iteration: 183200
loss: 0.9892747402191162,grad_norm: 0.9999992060722295, iteration: 183201
loss: 0.9787451028823853,grad_norm: 0.9999991140538932, iteration: 183202
loss: 1.0528751611709595,grad_norm: 0.9268755080546888, iteration: 183203
loss: 0.9613959789276123,grad_norm: 0.9719657793452248, iteration: 183204
loss: 0.9537033438682556,grad_norm: 0.8929957198333175, iteration: 183205
loss: 1.0187195539474487,grad_norm: 0.9207569780797636, iteration: 183206
loss: 0.9670050144195557,grad_norm: 0.9999992911977548, iteration: 183207
loss: 1.00911545753479,grad_norm: 0.9999990444655364, iteration: 183208
loss: 0.9985151886940002,grad_norm: 0.9985684624923717, iteration: 183209
loss: 1.0150972604751587,grad_norm: 0.9275759863831373, iteration: 183210
loss: 1.0250886678695679,grad_norm: 0.9999992270156557, iteration: 183211
loss: 0.990361213684082,grad_norm: 0.867605601846066, iteration: 183212
loss: 0.9715495109558105,grad_norm: 0.8874170743763626, iteration: 183213
loss: 1.0048060417175293,grad_norm: 0.9259999017515677, iteration: 183214
loss: 1.0157626867294312,grad_norm: 0.9071357154137019, iteration: 183215
loss: 0.9972760677337646,grad_norm: 0.9527093257645325, iteration: 183216
loss: 0.9887544512748718,grad_norm: 0.9999993701122223, iteration: 183217
loss: 1.0085257291793823,grad_norm: 0.9999990582410818, iteration: 183218
loss: 0.9901258945465088,grad_norm: 0.9771075948519775, iteration: 183219
loss: 0.9909287691116333,grad_norm: 0.9999989455266609, iteration: 183220
loss: 1.0097490549087524,grad_norm: 0.9999989667629081, iteration: 183221
loss: 0.9635141491889954,grad_norm: 0.7223770666495498, iteration: 183222
loss: 1.0114332437515259,grad_norm: 0.9628548205369997, iteration: 183223
loss: 1.0426414012908936,grad_norm: 0.923832690227547, iteration: 183224
loss: 1.010030746459961,grad_norm: 0.8699301299312595, iteration: 183225
loss: 0.9869737029075623,grad_norm: 0.9846659252777051, iteration: 183226
loss: 0.9444538354873657,grad_norm: 0.9999991012544256, iteration: 183227
loss: 0.9553941488265991,grad_norm: 0.9999991673132149, iteration: 183228
loss: 0.9835230112075806,grad_norm: 0.9663732060250995, iteration: 183229
loss: 0.9987267255783081,grad_norm: 0.999999070887794, iteration: 183230
loss: 0.9827187061309814,grad_norm: 0.8379194113773513, iteration: 183231
loss: 0.9683383703231812,grad_norm: 0.8898424982882408, iteration: 183232
loss: 0.9729418754577637,grad_norm: 0.9608958094640212, iteration: 183233
loss: 0.9989790916442871,grad_norm: 0.9689002939572449, iteration: 183234
loss: 1.0179296731948853,grad_norm: 0.9842260797411432, iteration: 183235
loss: 0.9673935174942017,grad_norm: 0.9999991915406925, iteration: 183236
loss: 1.0062986612319946,grad_norm: 0.9999992856573189, iteration: 183237
loss: 1.037070393562317,grad_norm: 0.999999092785008, iteration: 183238
loss: 0.9934563040733337,grad_norm: 0.9999992078459701, iteration: 183239
loss: 0.9687038660049438,grad_norm: 0.795420104376099, iteration: 183240
loss: 0.9968348145484924,grad_norm: 0.9987236411833075, iteration: 183241
loss: 0.9805765748023987,grad_norm: 0.9999992309556105, iteration: 183242
loss: 0.9833921790122986,grad_norm: 0.8545016500039232, iteration: 183243
loss: 1.0511698722839355,grad_norm: 0.9963552527589787, iteration: 183244
loss: 0.9987602829933167,grad_norm: 0.9855237407110964, iteration: 183245
loss: 0.9533488154411316,grad_norm: 0.9999990745256105, iteration: 183246
loss: 1.0325162410736084,grad_norm: 0.9999990980489756, iteration: 183247
loss: 0.9991456270217896,grad_norm: 0.8193027692046576, iteration: 183248
loss: 0.9739755988121033,grad_norm: 0.9107418359593662, iteration: 183249
loss: 0.9936119318008423,grad_norm: 0.8491773295114647, iteration: 183250
loss: 0.9748928546905518,grad_norm: 0.9999990856537505, iteration: 183251
loss: 1.0263315439224243,grad_norm: 0.9214298680454129, iteration: 183252
loss: 1.01021409034729,grad_norm: 0.9267708591682493, iteration: 183253
loss: 0.9750701189041138,grad_norm: 0.9265674010991224, iteration: 183254
loss: 1.0063793659210205,grad_norm: 0.9999993358272654, iteration: 183255
loss: 1.0019539594650269,grad_norm: 0.9999991076148399, iteration: 183256
loss: 1.0290050506591797,grad_norm: 0.8726004007692875, iteration: 183257
loss: 1.0816121101379395,grad_norm: 0.9038111225761891, iteration: 183258
loss: 0.9853907823562622,grad_norm: 0.9999991463267522, iteration: 183259
loss: 0.9939912557601929,grad_norm: 0.8076116778438951, iteration: 183260
loss: 0.9899419546127319,grad_norm: 0.8788914314846056, iteration: 183261
loss: 1.0501214265823364,grad_norm: 0.9999994221450155, iteration: 183262
loss: 1.0141210556030273,grad_norm: 0.9999990858710863, iteration: 183263
loss: 1.0145134925842285,grad_norm: 0.9303033590179177, iteration: 183264
loss: 0.9851018190383911,grad_norm: 0.9999991092450632, iteration: 183265
loss: 1.0489294528961182,grad_norm: 0.9937833112490673, iteration: 183266
loss: 1.0527771711349487,grad_norm: 0.9126364230536217, iteration: 183267
loss: 1.0015848875045776,grad_norm: 0.8491227911226311, iteration: 183268
loss: 0.9566875696182251,grad_norm: 0.9999996094407625, iteration: 183269
loss: 1.0179390907287598,grad_norm: 0.9999995156208348, iteration: 183270
loss: 1.0371438264846802,grad_norm: 0.9999991673363791, iteration: 183271
loss: 1.0286251306533813,grad_norm: 0.9588856464319249, iteration: 183272
loss: 0.9755586385726929,grad_norm: 0.9624757699493462, iteration: 183273
loss: 0.9924852252006531,grad_norm: 0.9600456874793792, iteration: 183274
loss: 0.9670118093490601,grad_norm: 0.9999991083613066, iteration: 183275
loss: 0.9762513041496277,grad_norm: 0.9999990907116589, iteration: 183276
loss: 0.9827843904495239,grad_norm: 0.9999993357111581, iteration: 183277
loss: 1.0084561109542847,grad_norm: 0.9557639516601905, iteration: 183278
loss: 1.0031687021255493,grad_norm: 0.9715697172366864, iteration: 183279
loss: 0.9928281903266907,grad_norm: 0.9146357406675732, iteration: 183280
loss: 0.9830394983291626,grad_norm: 0.9270310206637472, iteration: 183281
loss: 1.0309032201766968,grad_norm: 0.9731317209868617, iteration: 183282
loss: 1.0150127410888672,grad_norm: 0.9720788222996141, iteration: 183283
loss: 1.0060800313949585,grad_norm: 0.9999991676838355, iteration: 183284
loss: 1.0097318887710571,grad_norm: 0.7710513925899664, iteration: 183285
loss: 0.9706541299819946,grad_norm: 0.9999990271049118, iteration: 183286
loss: 0.982170820236206,grad_norm: 0.9999991444997585, iteration: 183287
loss: 1.0082454681396484,grad_norm: 0.999999761579827, iteration: 183288
loss: 0.9844123125076294,grad_norm: 0.9999990088909423, iteration: 183289
loss: 0.9695720672607422,grad_norm: 0.9999990328287961, iteration: 183290
loss: 0.9837753176689148,grad_norm: 0.8022550069544132, iteration: 183291
loss: 0.9658792018890381,grad_norm: 0.819848055751737, iteration: 183292
loss: 1.0175946950912476,grad_norm: 0.9999989263332316, iteration: 183293
loss: 0.9497868418693542,grad_norm: 0.9491845390501695, iteration: 183294
loss: 0.9806668162345886,grad_norm: 0.9999991442306827, iteration: 183295
loss: 1.006624698638916,grad_norm: 0.8403585971542891, iteration: 183296
loss: 0.9940427541732788,grad_norm: 0.9698693823251866, iteration: 183297
loss: 1.0546990633010864,grad_norm: 0.9999995565768811, iteration: 183298
loss: 1.0042883157730103,grad_norm: 0.9386475152245213, iteration: 183299
loss: 1.023506760597229,grad_norm: 0.8438840691290543, iteration: 183300
loss: 1.0054901838302612,grad_norm: 0.9280316314934911, iteration: 183301
loss: 0.9757387042045593,grad_norm: 0.874233448203038, iteration: 183302
loss: 0.973854124546051,grad_norm: 0.9999992525730256, iteration: 183303
loss: 1.0608669519424438,grad_norm: 0.9999990396606803, iteration: 183304
loss: 0.9615861177444458,grad_norm: 0.9654548800687998, iteration: 183305
loss: 1.0089547634124756,grad_norm: 0.9488326386501696, iteration: 183306
loss: 0.9844409227371216,grad_norm: 0.8923844335397203, iteration: 183307
loss: 1.0146374702453613,grad_norm: 0.8969038438306183, iteration: 183308
loss: 1.0087672472000122,grad_norm: 0.9341814372571157, iteration: 183309
loss: 1.0078024864196777,grad_norm: 0.9999991179200728, iteration: 183310
loss: 1.0020347833633423,grad_norm: 0.9222268839819854, iteration: 183311
loss: 1.0304659605026245,grad_norm: 0.8832654178367474, iteration: 183312
loss: 1.010910153388977,grad_norm: 0.9999991598747244, iteration: 183313
loss: 0.9974752068519592,grad_norm: 0.877519280279427, iteration: 183314
loss: 1.0061599016189575,grad_norm: 0.8687478826896945, iteration: 183315
loss: 1.0124787092208862,grad_norm: 0.8129525233573826, iteration: 183316
loss: 0.9906837344169617,grad_norm: 0.9984644703369563, iteration: 183317
loss: 1.005287766456604,grad_norm: 0.9252119629403026, iteration: 183318
loss: 1.0458356142044067,grad_norm: 0.964389732463096, iteration: 183319
loss: 1.0166815519332886,grad_norm: 0.8359956211792642, iteration: 183320
loss: 0.9847832918167114,grad_norm: 0.8139398521572392, iteration: 183321
loss: 0.979016900062561,grad_norm: 0.846137775100645, iteration: 183322
loss: 1.0399448871612549,grad_norm: 0.9999992866730767, iteration: 183323
loss: 0.983653724193573,grad_norm: 0.9478374820497819, iteration: 183324
loss: 1.009261131286621,grad_norm: 0.9999989172696989, iteration: 183325
loss: 1.01742684841156,grad_norm: 0.9999991200831331, iteration: 183326
loss: 0.9609502553939819,grad_norm: 0.9999991232203153, iteration: 183327
loss: 1.0078693628311157,grad_norm: 0.8172612610209303, iteration: 183328
loss: 1.0202152729034424,grad_norm: 0.9449159693776243, iteration: 183329
loss: 1.027433156967163,grad_norm: 0.999999170714522, iteration: 183330
loss: 1.031591534614563,grad_norm: 0.9055302005314604, iteration: 183331
loss: 1.0040823221206665,grad_norm: 0.8638813205487803, iteration: 183332
loss: 1.0208311080932617,grad_norm: 0.9763301201059881, iteration: 183333
loss: 0.9947205185890198,grad_norm: 0.9999993442159646, iteration: 183334
loss: 0.9689769744873047,grad_norm: 0.9999990099963958, iteration: 183335
loss: 1.0225754976272583,grad_norm: 0.8924814395516747, iteration: 183336
loss: 0.987772524356842,grad_norm: 0.9999990275299808, iteration: 183337
loss: 0.9947599768638611,grad_norm: 0.947990122460129, iteration: 183338
loss: 1.000996470451355,grad_norm: 0.8663356862161332, iteration: 183339
loss: 0.983952522277832,grad_norm: 0.9999991024675499, iteration: 183340
loss: 0.9752247333526611,grad_norm: 0.9147569078166471, iteration: 183341
loss: 1.0089744329452515,grad_norm: 0.8163982256314668, iteration: 183342
loss: 1.0314029455184937,grad_norm: 0.9983670026965191, iteration: 183343
loss: 1.0118365287780762,grad_norm: 0.8669154623419102, iteration: 183344
loss: 0.9967978000640869,grad_norm: 0.9999991454156458, iteration: 183345
loss: 0.9803250432014465,grad_norm: 0.9611471190782169, iteration: 183346
loss: 0.9841513633728027,grad_norm: 0.8645861720039337, iteration: 183347
loss: 0.9688912034034729,grad_norm: 0.9924058297388672, iteration: 183348
loss: 1.0095285177230835,grad_norm: 0.7584047944556602, iteration: 183349
loss: 1.0023901462554932,grad_norm: 0.9792891209226154, iteration: 183350
loss: 1.0194964408874512,grad_norm: 0.9999991370919187, iteration: 183351
loss: 1.0319229364395142,grad_norm: 0.8235721421458411, iteration: 183352
loss: 0.9884563684463501,grad_norm: 0.9999991003257254, iteration: 183353
loss: 0.9906560778617859,grad_norm: 0.9999991070067417, iteration: 183354
loss: 0.9851628541946411,grad_norm: 0.9999991688932298, iteration: 183355
loss: 0.989136278629303,grad_norm: 0.9209455966618563, iteration: 183356
loss: 0.9543017745018005,grad_norm: 0.8740502043852835, iteration: 183357
loss: 0.998862624168396,grad_norm: 0.896834166185785, iteration: 183358
loss: 1.0211820602416992,grad_norm: 0.9999992615143156, iteration: 183359
loss: 0.9992260932922363,grad_norm: 0.9915477631692038, iteration: 183360
loss: 0.998119592666626,grad_norm: 0.9839338917582041, iteration: 183361
loss: 1.0120983123779297,grad_norm: 0.9999991041224955, iteration: 183362
loss: 0.982211709022522,grad_norm: 0.943323239456324, iteration: 183363
loss: 0.9793720245361328,grad_norm: 0.9999991486923652, iteration: 183364
loss: 1.0488510131835938,grad_norm: 0.9999991820621184, iteration: 183365
loss: 1.0096489191055298,grad_norm: 0.9999992012598493, iteration: 183366
loss: 1.011733889579773,grad_norm: 0.888972133188079, iteration: 183367
loss: 0.9965206384658813,grad_norm: 0.9405981278721129, iteration: 183368
loss: 0.9810395240783691,grad_norm: 0.9704889843366366, iteration: 183369
loss: 1.0192490816116333,grad_norm: 0.9906705558913758, iteration: 183370
loss: 1.0050313472747803,grad_norm: 0.9856898686188653, iteration: 183371
loss: 0.9929372072219849,grad_norm: 0.9275552226361558, iteration: 183372
loss: 0.9866365194320679,grad_norm: 0.9314360982534245, iteration: 183373
loss: 1.0016270875930786,grad_norm: 0.9999991122678695, iteration: 183374
loss: 0.9872097969055176,grad_norm: 0.9999994470480642, iteration: 183375
loss: 0.9979809522628784,grad_norm: 0.9999991121582436, iteration: 183376
loss: 0.9998793005943298,grad_norm: 0.9702610228732907, iteration: 183377
loss: 0.9907220005989075,grad_norm: 0.8905714334860028, iteration: 183378
loss: 1.0048704147338867,grad_norm: 0.9503978285040801, iteration: 183379
loss: 0.9571264982223511,grad_norm: 0.9914010318319909, iteration: 183380
loss: 0.9964684844017029,grad_norm: 0.8608186755391964, iteration: 183381
loss: 0.9785791039466858,grad_norm: 0.9255243256074575, iteration: 183382
loss: 0.9465813040733337,grad_norm: 0.9999990828892378, iteration: 183383
loss: 1.0221121311187744,grad_norm: 0.9367900536759389, iteration: 183384
loss: 1.0508465766906738,grad_norm: 0.999999477543909, iteration: 183385
loss: 1.0225809812545776,grad_norm: 0.8721640923673432, iteration: 183386
loss: 1.0355316400527954,grad_norm: 0.9059364244169036, iteration: 183387
loss: 1.042806625366211,grad_norm: 0.999999570854337, iteration: 183388
loss: 1.0303014516830444,grad_norm: 0.8529314665989602, iteration: 183389
loss: 0.9706835150718689,grad_norm: 0.9398926972104887, iteration: 183390
loss: 0.976226270198822,grad_norm: 0.9648270253327973, iteration: 183391
loss: 1.0508291721343994,grad_norm: 0.9486309355928629, iteration: 183392
loss: 1.0173975229263306,grad_norm: 0.9999990163541559, iteration: 183393
loss: 0.9922716617584229,grad_norm: 0.9999991829761865, iteration: 183394
loss: 1.0025663375854492,grad_norm: 0.9403308942780777, iteration: 183395
loss: 1.0221701860427856,grad_norm: 0.9694873191808502, iteration: 183396
loss: 0.9900655746459961,grad_norm: 0.9774584618944345, iteration: 183397
loss: 1.0267952680587769,grad_norm: 0.9999991236302888, iteration: 183398
loss: 0.9913341999053955,grad_norm: 0.999999139837441, iteration: 183399
loss: 1.0117406845092773,grad_norm: 0.9999991954753488, iteration: 183400
loss: 1.021726369857788,grad_norm: 0.9277125423907282, iteration: 183401
loss: 1.0224169492721558,grad_norm: 0.9999990969868118, iteration: 183402
loss: 0.9978165030479431,grad_norm: 0.9999991239250052, iteration: 183403
loss: 0.99875408411026,grad_norm: 0.9999990498104767, iteration: 183404
loss: 0.9947624206542969,grad_norm: 0.9999989963682833, iteration: 183405
loss: 0.9530426263809204,grad_norm: 0.8300264622380783, iteration: 183406
loss: 1.0281227827072144,grad_norm: 0.9142562130578131, iteration: 183407
loss: 0.9872392416000366,grad_norm: 0.9463854448722827, iteration: 183408
loss: 0.9555486440658569,grad_norm: 0.9999991447266712, iteration: 183409
loss: 1.0127674341201782,grad_norm: 0.9999991843565623, iteration: 183410
loss: 0.9742147326469421,grad_norm: 0.9428797296749487, iteration: 183411
loss: 0.9991307258605957,grad_norm: 0.9898740437275696, iteration: 183412
loss: 1.0214600563049316,grad_norm: 0.9999990762946488, iteration: 183413
loss: 1.0056110620498657,grad_norm: 0.9394665413024078, iteration: 183414
loss: 1.0074894428253174,grad_norm: 0.9999991618795533, iteration: 183415
loss: 1.0047639608383179,grad_norm: 0.9999992613348838, iteration: 183416
loss: 0.9917538166046143,grad_norm: 0.9521310366366558, iteration: 183417
loss: 0.9837741255760193,grad_norm: 0.9821929540426312, iteration: 183418
loss: 1.0064879655838013,grad_norm: 0.9999992055356878, iteration: 183419
loss: 0.9764159321784973,grad_norm: 0.8637179883521169, iteration: 183420
loss: 0.9971492886543274,grad_norm: 0.8881353929454, iteration: 183421
loss: 1.1047343015670776,grad_norm: 0.9999991551485091, iteration: 183422
loss: 0.9883845448493958,grad_norm: 0.8740586005576624, iteration: 183423
loss: 1.0253089666366577,grad_norm: 0.9999997673025713, iteration: 183424
loss: 1.0146877765655518,grad_norm: 0.999999334034963, iteration: 183425
loss: 1.0026053190231323,grad_norm: 0.8281114750747984, iteration: 183426
loss: 1.0111570358276367,grad_norm: 0.8735743188739066, iteration: 183427
loss: 1.0053775310516357,grad_norm: 0.9999992579518722, iteration: 183428
loss: 0.9862366318702698,grad_norm: 0.8814917241489817, iteration: 183429
loss: 1.0353659391403198,grad_norm: 0.9640310372091992, iteration: 183430
loss: 0.9958935976028442,grad_norm: 0.925104953317296, iteration: 183431
loss: 1.006766438484192,grad_norm: 0.9180279677943718, iteration: 183432
loss: 0.9657817482948303,grad_norm: 0.9999991336765707, iteration: 183433
loss: 1.0692452192306519,grad_norm: 0.9999998359256548, iteration: 183434
loss: 1.0033258199691772,grad_norm: 0.7838025654234407, iteration: 183435
loss: 1.0282878875732422,grad_norm: 0.999999196247304, iteration: 183436
loss: 1.0041691064834595,grad_norm: 0.8977743039679726, iteration: 183437
loss: 1.0000311136245728,grad_norm: 0.9999991764917747, iteration: 183438
loss: 0.979050874710083,grad_norm: 0.9999991229990662, iteration: 183439
loss: 1.0024528503417969,grad_norm: 0.9999990662337898, iteration: 183440
loss: 1.0050233602523804,grad_norm: 0.9644754876396928, iteration: 183441
loss: 0.9831755757331848,grad_norm: 0.9304531415279658, iteration: 183442
loss: 1.0105843544006348,grad_norm: 0.8998060275042792, iteration: 183443
loss: 0.9688051342964172,grad_norm: 0.8382436457278206, iteration: 183444
loss: 1.019911289215088,grad_norm: 0.9135453993347821, iteration: 183445
loss: 0.971851110458374,grad_norm: 0.8717861598308543, iteration: 183446
loss: 0.9850617051124573,grad_norm: 0.9999990368320009, iteration: 183447
loss: 1.0121064186096191,grad_norm: 0.8921270814861717, iteration: 183448
loss: 1.0105503797531128,grad_norm: 0.8925830398882109, iteration: 183449
loss: 1.0041210651397705,grad_norm: 0.9999990793320634, iteration: 183450
loss: 1.0028011798858643,grad_norm: 0.9999990731677034, iteration: 183451
loss: 0.981695830821991,grad_norm: 0.8653753928100648, iteration: 183452
loss: 0.9848994016647339,grad_norm: 0.9323007406216715, iteration: 183453
loss: 0.9838292002677917,grad_norm: 0.8509165103711206, iteration: 183454
loss: 1.0064325332641602,grad_norm: 0.9999990019630863, iteration: 183455
loss: 1.0379295349121094,grad_norm: 0.9999994303144184, iteration: 183456
loss: 1.0512511730194092,grad_norm: 0.9999990677519763, iteration: 183457
loss: 0.9930392503738403,grad_norm: 0.9999991090773488, iteration: 183458
loss: 1.0317569971084595,grad_norm: 0.9999994910098724, iteration: 183459
loss: 1.0128464698791504,grad_norm: 0.9280046437741144, iteration: 183460
loss: 0.9681752324104309,grad_norm: 0.9199699943065175, iteration: 183461
loss: 0.9886553287506104,grad_norm: 0.999999047193644, iteration: 183462
loss: 0.9699130654335022,grad_norm: 0.9859650183487702, iteration: 183463
loss: 0.9950723052024841,grad_norm: 0.8871343818633376, iteration: 183464
loss: 0.9815545678138733,grad_norm: 0.7852059916552215, iteration: 183465
loss: 1.0480067729949951,grad_norm: 0.9999992713728484, iteration: 183466
loss: 0.9739862680435181,grad_norm: 0.9765050137479369, iteration: 183467
loss: 1.002312421798706,grad_norm: 0.9999993217320607, iteration: 183468
loss: 1.0332231521606445,grad_norm: 0.8192347997998594, iteration: 183469
loss: 1.030107855796814,grad_norm: 0.9999990422452768, iteration: 183470
loss: 1.0228303670883179,grad_norm: 0.9999992702212904, iteration: 183471
loss: 1.015236735343933,grad_norm: 0.9999994089917998, iteration: 183472
loss: 0.9988049268722534,grad_norm: 0.776367781357725, iteration: 183473
loss: 1.0026448965072632,grad_norm: 0.999999162631201, iteration: 183474
loss: 0.9966481924057007,grad_norm: 0.9978282582215106, iteration: 183475
loss: 0.975825309753418,grad_norm: 0.9451931068670799, iteration: 183476
loss: 0.9858493208885193,grad_norm: 0.9326559576481952, iteration: 183477
loss: 1.001501202583313,grad_norm: 0.9663788975346898, iteration: 183478
loss: 1.0200302600860596,grad_norm: 0.8292906157551154, iteration: 183479
loss: 0.9942508339881897,grad_norm: 0.9288507694183044, iteration: 183480
loss: 0.9855538606643677,grad_norm: 0.9999989919502688, iteration: 183481
loss: 1.0117405652999878,grad_norm: 0.9999991235026365, iteration: 183482
loss: 0.9642542600631714,grad_norm: 0.8589414427527967, iteration: 183483
loss: 1.0278151035308838,grad_norm: 0.9999999207254416, iteration: 183484
loss: 1.0130388736724854,grad_norm: 0.948612217313904, iteration: 183485
loss: 1.0071431398391724,grad_norm: 0.9999991121669545, iteration: 183486
loss: 0.9922301173210144,grad_norm: 0.9999991573014944, iteration: 183487
loss: 0.9737574458122253,grad_norm: 0.8421247738237824, iteration: 183488
loss: 0.9714013338088989,grad_norm: 0.9999993446394368, iteration: 183489
loss: 0.9838979244232178,grad_norm: 0.9999990215864993, iteration: 183490
loss: 1.0021475553512573,grad_norm: 0.963978048610871, iteration: 183491
loss: 0.9916514158248901,grad_norm: 0.9015860274748665, iteration: 183492
loss: 0.98508620262146,grad_norm: 0.7981286309783874, iteration: 183493
loss: 1.004652738571167,grad_norm: 0.9149049139279206, iteration: 183494
loss: 0.9953868389129639,grad_norm: 0.9550769729041723, iteration: 183495
loss: 1.0048824548721313,grad_norm: 0.9074164072687593, iteration: 183496
loss: 1.0078957080841064,grad_norm: 0.9999993728376199, iteration: 183497
loss: 0.9810343980789185,grad_norm: 0.9133721578676351, iteration: 183498
loss: 1.0224049091339111,grad_norm: 0.972472368951894, iteration: 183499
loss: 0.9933613538742065,grad_norm: 0.9999991654243185, iteration: 183500
loss: 1.0434411764144897,grad_norm: 0.9999992529564782, iteration: 183501
loss: 0.9837357401847839,grad_norm: 0.9999990130081329, iteration: 183502
loss: 1.0098929405212402,grad_norm: 0.8575678093922406, iteration: 183503
loss: 1.0120651721954346,grad_norm: 0.9419669816558783, iteration: 183504
loss: 1.0166597366333008,grad_norm: 0.8408136964185144, iteration: 183505
loss: 0.9995736479759216,grad_norm: 0.9556415328778285, iteration: 183506
loss: 1.0331281423568726,grad_norm: 0.8451709530125726, iteration: 183507
loss: 1.007170557975769,grad_norm: 0.99999910372429, iteration: 183508
loss: 0.9906154274940491,grad_norm: 0.9106879221470183, iteration: 183509
loss: 0.984710156917572,grad_norm: 0.9999991376584041, iteration: 183510
loss: 0.9946978688240051,grad_norm: 0.9999990202829807, iteration: 183511
loss: 1.0372810363769531,grad_norm: 0.8995817174495648, iteration: 183512
loss: 0.995083212852478,grad_norm: 0.8256154333489594, iteration: 183513
loss: 1.021856427192688,grad_norm: 0.9763577811001161, iteration: 183514
loss: 0.9982084035873413,grad_norm: 0.9999992577765499, iteration: 183515
loss: 0.995670735836029,grad_norm: 0.8672930664582045, iteration: 183516
loss: 0.9955053925514221,grad_norm: 0.909553884258987, iteration: 183517
loss: 0.9570648670196533,grad_norm: 0.9999989417880232, iteration: 183518
loss: 1.0069748163223267,grad_norm: 0.8863759468402276, iteration: 183519
loss: 0.9966366291046143,grad_norm: 0.9999991265724145, iteration: 183520
loss: 0.9736908674240112,grad_norm: 0.9076074420250098, iteration: 183521
loss: 0.9968095421791077,grad_norm: 0.9999990899333164, iteration: 183522
loss: 0.9933074712753296,grad_norm: 0.9999993288043872, iteration: 183523
loss: 1.0145949125289917,grad_norm: 0.9298579502761481, iteration: 183524
loss: 1.0328890085220337,grad_norm: 0.9999991346918604, iteration: 183525
loss: 1.0221145153045654,grad_norm: 0.825469903430349, iteration: 183526
loss: 0.9765511155128479,grad_norm: 0.9222402021584672, iteration: 183527
loss: 1.034621000289917,grad_norm: 0.999999938091183, iteration: 183528
loss: 0.9849115014076233,grad_norm: 0.9999990070165619, iteration: 183529
loss: 0.9931073188781738,grad_norm: 0.9224042712471465, iteration: 183530
loss: 1.0225803852081299,grad_norm: 0.9062132757139497, iteration: 183531
loss: 0.9855753183364868,grad_norm: 0.928060772961186, iteration: 183532
loss: 0.9909910559654236,grad_norm: 0.906007164859325, iteration: 183533
loss: 1.0048179626464844,grad_norm: 0.9345970807818803, iteration: 183534
loss: 0.9887814521789551,grad_norm: 0.8746473300515256, iteration: 183535
loss: 0.9859195351600647,grad_norm: 0.8656366910425055, iteration: 183536
loss: 0.9927069544792175,grad_norm: 0.999999159371053, iteration: 183537
loss: 1.0083725452423096,grad_norm: 0.8829225227356408, iteration: 183538
loss: 1.0090625286102295,grad_norm: 0.9257462942823749, iteration: 183539
loss: 1.0413933992385864,grad_norm: 0.9999994470933566, iteration: 183540
loss: 1.0437790155410767,grad_norm: 0.9999992012819922, iteration: 183541
loss: 1.0151299238204956,grad_norm: 0.9999992075480716, iteration: 183542
loss: 0.9776510000228882,grad_norm: 0.969024724089096, iteration: 183543
loss: 1.0054587125778198,grad_norm: 0.9999992297888187, iteration: 183544
loss: 1.0077534914016724,grad_norm: 0.9277612303493743, iteration: 183545
loss: 1.0151225328445435,grad_norm: 0.878610184822991, iteration: 183546
loss: 0.9876275062561035,grad_norm: 0.929544475918252, iteration: 183547
loss: 0.9812862873077393,grad_norm: 0.9999991679723432, iteration: 183548
loss: 0.9860377907752991,grad_norm: 0.9709238046867381, iteration: 183549
loss: 1.0304768085479736,grad_norm: 0.9999991836816164, iteration: 183550
loss: 1.0088857412338257,grad_norm: 0.9999991914422456, iteration: 183551
loss: 1.0006260871887207,grad_norm: 0.9999992537176714, iteration: 183552
loss: 1.0024147033691406,grad_norm: 0.9407512682190996, iteration: 183553
loss: 0.9988086819648743,grad_norm: 0.9999992204179445, iteration: 183554
loss: 0.9815135598182678,grad_norm: 0.9536034317134684, iteration: 183555
loss: 0.9815433621406555,grad_norm: 0.9999990612477366, iteration: 183556
loss: 0.9989541172981262,grad_norm: 0.9999992617782036, iteration: 183557
loss: 0.9561552405357361,grad_norm: 0.852683374891507, iteration: 183558
loss: 1.0019760131835938,grad_norm: 0.9999991590140443, iteration: 183559
loss: 1.0052084922790527,grad_norm: 0.9231658086085469, iteration: 183560
loss: 1.0300241708755493,grad_norm: 0.8518947500973517, iteration: 183561
loss: 1.0300354957580566,grad_norm: 0.9227245051897601, iteration: 183562
loss: 0.9638848900794983,grad_norm: 0.9403177821799262, iteration: 183563
loss: 0.9775151014328003,grad_norm: 0.9245241291526474, iteration: 183564
loss: 0.9917685985565186,grad_norm: 0.9999992962622392, iteration: 183565
loss: 0.9903937578201294,grad_norm: 0.9151985013855986, iteration: 183566
loss: 1.0170897245407104,grad_norm: 0.9999991603133839, iteration: 183567
loss: 1.0178048610687256,grad_norm: 0.9234595111427587, iteration: 183568
loss: 0.9979210495948792,grad_norm: 0.8638353355539259, iteration: 183569
loss: 0.9779901504516602,grad_norm: 0.999999149284207, iteration: 183570
loss: 1.0649397373199463,grad_norm: 0.9999995751405245, iteration: 183571
loss: 1.000929355621338,grad_norm: 0.9999989844177442, iteration: 183572
loss: 1.010848879814148,grad_norm: 0.7483086667421474, iteration: 183573
loss: 1.0521305799484253,grad_norm: 0.9999996778297172, iteration: 183574
loss: 1.0083143711090088,grad_norm: 0.9999992798971443, iteration: 183575
loss: 0.9788803458213806,grad_norm: 0.8725145957608674, iteration: 183576
loss: 0.9612038731575012,grad_norm: 0.9999993330724025, iteration: 183577
loss: 1.0102989673614502,grad_norm: 0.9331117745496017, iteration: 183578
loss: 0.9904062151908875,grad_norm: 0.9999991364351689, iteration: 183579
loss: 1.0086896419525146,grad_norm: 0.9999993456843754, iteration: 183580
loss: 1.014500617980957,grad_norm: 0.953077317230972, iteration: 183581
loss: 0.995006799697876,grad_norm: 0.8276815397091428, iteration: 183582
loss: 1.0125771760940552,grad_norm: 0.9999992283534587, iteration: 183583
loss: 1.0623551607131958,grad_norm: 0.999999195649831, iteration: 183584
loss: 1.039082407951355,grad_norm: 0.9071801651057538, iteration: 183585
loss: 0.9864752888679504,grad_norm: 0.9999991380208525, iteration: 183586
loss: 1.0374234914779663,grad_norm: 0.9999991898621893, iteration: 183587
loss: 1.023794174194336,grad_norm: 0.9514023890470714, iteration: 183588
loss: 1.0463775396347046,grad_norm: 0.9098501832833119, iteration: 183589
loss: 1.0388237237930298,grad_norm: 0.9253812347269644, iteration: 183590
loss: 1.0023998022079468,grad_norm: 0.9628446136971378, iteration: 183591
loss: 0.9974875450134277,grad_norm: 0.999999145297128, iteration: 183592
loss: 0.9836778044700623,grad_norm: 0.8929801718000067, iteration: 183593
loss: 1.0246623754501343,grad_norm: 0.9559736003030845, iteration: 183594
loss: 0.9831058382987976,grad_norm: 0.9999991619463099, iteration: 183595
loss: 1.0085009336471558,grad_norm: 0.999999121338908, iteration: 183596
loss: 1.0102640390396118,grad_norm: 0.9258088822514091, iteration: 183597
loss: 1.0003342628479004,grad_norm: 0.9999991166875392, iteration: 183598
loss: 1.0003986358642578,grad_norm: 0.9475783050589739, iteration: 183599
loss: 1.0001760721206665,grad_norm: 0.9440998423462109, iteration: 183600
loss: 1.0094929933547974,grad_norm: 0.9999989621342208, iteration: 183601
loss: 0.9585199952125549,grad_norm: 0.8494857301214586, iteration: 183602
loss: 1.0032668113708496,grad_norm: 0.8926835701170925, iteration: 183603
loss: 0.9488597512245178,grad_norm: 0.9021368107059727, iteration: 183604
loss: 0.9671682715415955,grad_norm: 0.9999992071431915, iteration: 183605
loss: 0.9994426965713501,grad_norm: 0.9999993052443871, iteration: 183606
loss: 0.9942541122436523,grad_norm: 0.999999049930774, iteration: 183607
loss: 1.0108537673950195,grad_norm: 0.9999991072093105, iteration: 183608
loss: 0.9768068194389343,grad_norm: 0.9999992020047268, iteration: 183609
loss: 0.9846024513244629,grad_norm: 0.9391512068864184, iteration: 183610
loss: 0.9854044914245605,grad_norm: 0.9401429358902591, iteration: 183611
loss: 0.9726701974868774,grad_norm: 0.9999990497544554, iteration: 183612
loss: 1.011999249458313,grad_norm: 0.8107883503926138, iteration: 183613
loss: 0.9716429710388184,grad_norm: 0.9199110447645845, iteration: 183614
loss: 0.9832497835159302,grad_norm: 0.8906237461780219, iteration: 183615
loss: 1.0168724060058594,grad_norm: 0.9794190465724374, iteration: 183616
loss: 1.0335525274276733,grad_norm: 0.9999994812370611, iteration: 183617
loss: 1.0219764709472656,grad_norm: 0.9999991548054877, iteration: 183618
loss: 1.0076171159744263,grad_norm: 0.9921601470292409, iteration: 183619
loss: 0.9936665892601013,grad_norm: 0.9999989654852576, iteration: 183620
loss: 0.9830718636512756,grad_norm: 0.9192481919209244, iteration: 183621
loss: 1.0036636590957642,grad_norm: 0.866963425970106, iteration: 183622
loss: 0.988699197769165,grad_norm: 0.9736707480134051, iteration: 183623
loss: 1.0183947086334229,grad_norm: 0.9999991198342675, iteration: 183624
loss: 1.0022257566452026,grad_norm: 0.8325313877394023, iteration: 183625
loss: 0.985684871673584,grad_norm: 0.932426511720689, iteration: 183626
loss: 1.0012787580490112,grad_norm: 0.8393890644566021, iteration: 183627
loss: 1.011027216911316,grad_norm: 0.9939311461895475, iteration: 183628
loss: 0.9886451959609985,grad_norm: 0.9999991067880074, iteration: 183629
loss: 1.0330463647842407,grad_norm: 0.9999991329495169, iteration: 183630
loss: 1.0516761541366577,grad_norm: 0.9999997465040278, iteration: 183631
loss: 0.9766479730606079,grad_norm: 0.8931758780570361, iteration: 183632
loss: 0.996944785118103,grad_norm: 0.826919452441242, iteration: 183633
loss: 1.0317935943603516,grad_norm: 0.999998914803633, iteration: 183634
loss: 1.0142126083374023,grad_norm: 0.90530038174929, iteration: 183635
loss: 0.9458585381507874,grad_norm: 0.9999991234679051, iteration: 183636
loss: 0.9994158148765564,grad_norm: 0.9466207240069853, iteration: 183637
loss: 1.1175731420516968,grad_norm: 0.9999996509903634, iteration: 183638
loss: 0.984144389629364,grad_norm: 0.9204792737401801, iteration: 183639
loss: 1.0465680360794067,grad_norm: 0.9999994167315278, iteration: 183640
loss: 1.0094040632247925,grad_norm: 0.950527778126548, iteration: 183641
loss: 1.0156924724578857,grad_norm: 0.9999992258760745, iteration: 183642
loss: 1.001592993736267,grad_norm: 0.9106220442117324, iteration: 183643
loss: 0.9673869013786316,grad_norm: 0.9999991238416603, iteration: 183644
loss: 0.9765607714653015,grad_norm: 0.942505138330669, iteration: 183645
loss: 0.9515013694763184,grad_norm: 0.8790337137929234, iteration: 183646
loss: 1.0310052633285522,grad_norm: 0.9999991355709408, iteration: 183647
loss: 1.015486478805542,grad_norm: 0.960265075668286, iteration: 183648
loss: 1.0270193815231323,grad_norm: 0.8829171414834663, iteration: 183649
loss: 0.9964224100112915,grad_norm: 0.9321938625643585, iteration: 183650
loss: 1.0083187818527222,grad_norm: 0.8676668433595242, iteration: 183651
loss: 0.993188738822937,grad_norm: 0.9252057361498301, iteration: 183652
loss: 1.0432711839675903,grad_norm: 0.9999990623044196, iteration: 183653
loss: 0.9624019265174866,grad_norm: 0.9271742787257922, iteration: 183654
loss: 0.9714809060096741,grad_norm: 0.9963285545068399, iteration: 183655
loss: 1.0235066413879395,grad_norm: 0.9999993196476444, iteration: 183656
loss: 0.9994417428970337,grad_norm: 0.8721261618119338, iteration: 183657
loss: 1.029271125793457,grad_norm: 0.9067067128107614, iteration: 183658
loss: 1.0101666450500488,grad_norm: 0.9999992486932192, iteration: 183659
loss: 1.0342848300933838,grad_norm: 0.9999990869370269, iteration: 183660
loss: 1.0127317905426025,grad_norm: 0.9787824762741395, iteration: 183661
loss: 0.9635893106460571,grad_norm: 0.8533456585856871, iteration: 183662
loss: 1.0151795148849487,grad_norm: 0.9999991243622603, iteration: 183663
loss: 1.0138944387435913,grad_norm: 0.8990565530938663, iteration: 183664
loss: 0.9964434504508972,grad_norm: 0.8891224905463014, iteration: 183665
loss: 0.994526743888855,grad_norm: 0.9417547587921004, iteration: 183666
loss: 1.013355016708374,grad_norm: 0.9999989645195289, iteration: 183667
loss: 0.988120973110199,grad_norm: 0.9999990549184792, iteration: 183668
loss: 0.9843980669975281,grad_norm: 0.9138442630077004, iteration: 183669
loss: 1.1082146167755127,grad_norm: 0.9999992618289409, iteration: 183670
loss: 1.146222710609436,grad_norm: 0.9999992527134492, iteration: 183671
loss: 1.0039619207382202,grad_norm: 0.9999998809770173, iteration: 183672
loss: 1.0120304822921753,grad_norm: 0.9999991853873919, iteration: 183673
loss: 1.00872004032135,grad_norm: 0.999999318258818, iteration: 183674
loss: 1.0113979578018188,grad_norm: 0.9999990725424521, iteration: 183675
loss: 0.964749276638031,grad_norm: 0.9141899065351918, iteration: 183676
loss: 0.9835808277130127,grad_norm: 0.9999993769391218, iteration: 183677
loss: 1.0132689476013184,grad_norm: 0.999999148441499, iteration: 183678
loss: 1.0225284099578857,grad_norm: 0.9999991411541294, iteration: 183679
loss: 0.9957562685012817,grad_norm: 0.9273242168838458, iteration: 183680
loss: 0.9760922193527222,grad_norm: 0.9999992751726379, iteration: 183681
loss: 0.9665290117263794,grad_norm: 0.9999991891173443, iteration: 183682
loss: 0.97981858253479,grad_norm: 0.9333713960767108, iteration: 183683
loss: 1.0100585222244263,grad_norm: 0.9999995566229652, iteration: 183684
loss: 0.993300199508667,grad_norm: 0.9365906037728969, iteration: 183685
loss: 1.0196411609649658,grad_norm: 0.9999992987486888, iteration: 183686
loss: 1.0185644626617432,grad_norm: 0.8952336745807568, iteration: 183687
loss: 1.0116790533065796,grad_norm: 0.8023261264015715, iteration: 183688
loss: 1.003119945526123,grad_norm: 0.9476797632714723, iteration: 183689
loss: 1.007232666015625,grad_norm: 0.999999240199177, iteration: 183690
loss: 0.965460479259491,grad_norm: 0.9999990394307958, iteration: 183691
loss: 1.0150835514068604,grad_norm: 0.9999989968280952, iteration: 183692
loss: 1.0421497821807861,grad_norm: 0.8659381470132342, iteration: 183693
loss: 1.0160613059997559,grad_norm: 0.9999990788076362, iteration: 183694
loss: 1.0073888301849365,grad_norm: 0.9999991726345346, iteration: 183695
loss: 1.2419878244400024,grad_norm: 0.9999995647451558, iteration: 183696
loss: 1.0102171897888184,grad_norm: 0.9999992385678139, iteration: 183697
loss: 1.0128334760665894,grad_norm: 0.9999991236457201, iteration: 183698
loss: 1.0211700201034546,grad_norm: 0.9441720571720017, iteration: 183699
loss: 0.9866403341293335,grad_norm: 0.9798227507690707, iteration: 183700
loss: 1.0235893726348877,grad_norm: 0.9999989898683552, iteration: 183701
loss: 0.9940711259841919,grad_norm: 0.817799064344246, iteration: 183702
loss: 0.9940845370292664,grad_norm: 0.9006552964921488, iteration: 183703
loss: 0.9761590957641602,grad_norm: 0.8009902523961323, iteration: 183704
loss: 1.0100723505020142,grad_norm: 0.9342762750857464, iteration: 183705
loss: 0.9614155888557434,grad_norm: 0.9508806618205272, iteration: 183706
loss: 1.0284682512283325,grad_norm: 0.9999990721918396, iteration: 183707
loss: 1.0461221933364868,grad_norm: 0.9999989329481253, iteration: 183708
loss: 1.0033719539642334,grad_norm: 0.9999989670611498, iteration: 183709
loss: 1.009621024131775,grad_norm: 0.9495715291676873, iteration: 183710
loss: 0.9704502820968628,grad_norm: 0.9174850023187224, iteration: 183711
loss: 0.9725387096405029,grad_norm: 0.9908631278682459, iteration: 183712
loss: 0.9822002649307251,grad_norm: 0.9766713086238318, iteration: 183713
loss: 0.9667790532112122,grad_norm: 0.9441385695960512, iteration: 183714
loss: 0.9724579453468323,grad_norm: 0.9328427178059504, iteration: 183715
loss: 0.9699297547340393,grad_norm: 0.8826784218653179, iteration: 183716
loss: 1.0290576219558716,grad_norm: 0.8806535513758289, iteration: 183717
loss: 1.0086989402770996,grad_norm: 0.9999996404858666, iteration: 183718
loss: 1.025814175605774,grad_norm: 0.9285796167818688, iteration: 183719
loss: 0.9685519933700562,grad_norm: 0.9931382892200112, iteration: 183720
loss: 0.9950848817825317,grad_norm: 0.9999991089161113, iteration: 183721
loss: 1.00171959400177,grad_norm: 0.9999991785268202, iteration: 183722
loss: 0.9839150309562683,grad_norm: 0.9308370957270976, iteration: 183723
loss: 1.0572941303253174,grad_norm: 0.9999993809100769, iteration: 183724
loss: 0.990938127040863,grad_norm: 0.9502278712791249, iteration: 183725
loss: 1.0214216709136963,grad_norm: 0.9999991992824793, iteration: 183726
loss: 1.008142113685608,grad_norm: 0.9999991585943854, iteration: 183727
loss: 0.9919459819793701,grad_norm: 0.8745273489965643, iteration: 183728
loss: 1.0058835744857788,grad_norm: 0.8772905826071666, iteration: 183729
loss: 0.9980903267860413,grad_norm: 0.8760952926344509, iteration: 183730
loss: 1.0188413858413696,grad_norm: 0.9354318149975618, iteration: 183731
loss: 0.9577354788780212,grad_norm: 0.9999992010311519, iteration: 183732
loss: 0.9778679609298706,grad_norm: 0.8629196589674872, iteration: 183733
loss: 0.9920927882194519,grad_norm: 0.9999990471960051, iteration: 183734
loss: 1.022214412689209,grad_norm: 0.9999990232315094, iteration: 183735
loss: 0.9840121865272522,grad_norm: 0.9810379764766326, iteration: 183736
loss: 0.9973315596580505,grad_norm: 0.8811750199594595, iteration: 183737
loss: 0.988401472568512,grad_norm: 0.9999990541185864, iteration: 183738
loss: 0.9921694397926331,grad_norm: 0.9999991275984967, iteration: 183739
loss: 0.9829201102256775,grad_norm: 0.9005773441667223, iteration: 183740
loss: 1.0616885423660278,grad_norm: 0.9999997422351259, iteration: 183741
loss: 1.0294055938720703,grad_norm: 0.9639422422410372, iteration: 183742
loss: 1.0047627687454224,grad_norm: 0.8443487129629593, iteration: 183743
loss: 1.0197820663452148,grad_norm: 0.8650185825597373, iteration: 183744
loss: 0.9439178705215454,grad_norm: 0.9148746812292768, iteration: 183745
loss: 0.9893202185630798,grad_norm: 0.9999993121697991, iteration: 183746
loss: 0.9893860816955566,grad_norm: 0.9335392143728598, iteration: 183747
loss: 0.9954299926757812,grad_norm: 0.9999991456926205, iteration: 183748
loss: 1.0056815147399902,grad_norm: 0.8110167833984029, iteration: 183749
loss: 0.9693452715873718,grad_norm: 0.9999992392618697, iteration: 183750
loss: 1.0071063041687012,grad_norm: 0.984961888107862, iteration: 183751
loss: 1.0204070806503296,grad_norm: 0.8730213118817276, iteration: 183752
loss: 0.9788544178009033,grad_norm: 0.9999991438004061, iteration: 183753
loss: 1.0351568460464478,grad_norm: 0.7909371172669523, iteration: 183754
loss: 1.0455495119094849,grad_norm: 0.9999998544134326, iteration: 183755
loss: 0.9977707266807556,grad_norm: 0.9999991985872642, iteration: 183756
loss: 1.0649875402450562,grad_norm: 0.99999923135376, iteration: 183757
loss: 0.9518916606903076,grad_norm: 0.9999991726576994, iteration: 183758
loss: 0.9808499217033386,grad_norm: 0.9067292580804793, iteration: 183759
loss: 1.0008479356765747,grad_norm: 0.9570565462250447, iteration: 183760
loss: 1.0782185792922974,grad_norm: 0.9999999769872707, iteration: 183761
loss: 0.9840958714485168,grad_norm: 0.9999991084148244, iteration: 183762
loss: 0.9861768484115601,grad_norm: 0.9999989052491817, iteration: 183763
loss: 0.9654874801635742,grad_norm: 0.9999991498554587, iteration: 183764
loss: 0.9933412075042725,grad_norm: 0.8154600239102445, iteration: 183765
loss: 0.9963343143463135,grad_norm: 0.9999991198920608, iteration: 183766
loss: 1.4288887977600098,grad_norm: 0.9999993482081332, iteration: 183767
loss: 1.0150338411331177,grad_norm: 0.9999991709874754, iteration: 183768
loss: 1.030014991760254,grad_norm: 0.9999992740822989, iteration: 183769
loss: 1.0029443502426147,grad_norm: 0.9999989819746168, iteration: 183770
loss: 0.9864659309387207,grad_norm: 0.921448159785393, iteration: 183771
loss: 1.0243569612503052,grad_norm: 0.9999991447200188, iteration: 183772
loss: 1.0667046308517456,grad_norm: 0.9999999234832058, iteration: 183773
loss: 0.9681664705276489,grad_norm: 0.9999991263427346, iteration: 183774
loss: 0.9825212359428406,grad_norm: 0.9699022525715911, iteration: 183775
loss: 0.9921718239784241,grad_norm: 0.999999076790331, iteration: 183776
loss: 1.0077728033065796,grad_norm: 0.9999990929170538, iteration: 183777
loss: 1.0356887578964233,grad_norm: 0.8543446938426068, iteration: 183778
loss: 1.0135526657104492,grad_norm: 0.9999994804508134, iteration: 183779
loss: 1.0238735675811768,grad_norm: 0.9999991515438377, iteration: 183780
loss: 1.0003337860107422,grad_norm: 0.9999989682794265, iteration: 183781
loss: 0.9893558621406555,grad_norm: 0.9999991198465676, iteration: 183782
loss: 1.0150525569915771,grad_norm: 0.9999991091767564, iteration: 183783
loss: 0.9940082430839539,grad_norm: 0.8677603646821066, iteration: 183784
loss: 1.0193966627120972,grad_norm: 0.9274983305629706, iteration: 183785
loss: 1.0017435550689697,grad_norm: 0.9056014517199128, iteration: 183786
loss: 0.9860464930534363,grad_norm: 0.9002765918110961, iteration: 183787
loss: 1.0365533828735352,grad_norm: 0.9999991490386104, iteration: 183788
loss: 0.9819861650466919,grad_norm: 0.9999990829885618, iteration: 183789
loss: 1.0002812147140503,grad_norm: 0.9999990980048885, iteration: 183790
loss: 0.9514188766479492,grad_norm: 0.9999992236544673, iteration: 183791
loss: 0.9666998982429504,grad_norm: 0.9926702884856347, iteration: 183792
loss: 1.000354528427124,grad_norm: 0.9999991595575106, iteration: 183793
loss: 1.1597672700881958,grad_norm: 0.9999996096330641, iteration: 183794
loss: 0.974369466304779,grad_norm: 0.965923806046302, iteration: 183795
loss: 1.0333155393600464,grad_norm: 0.8708307679854502, iteration: 183796
loss: 1.0292776823043823,grad_norm: 0.9999991648504943, iteration: 183797
loss: 1.0020877122879028,grad_norm: 0.8137225979778192, iteration: 183798
loss: 0.9984204173088074,grad_norm: 0.9532128654058409, iteration: 183799
loss: 1.010250210762024,grad_norm: 0.963348263014068, iteration: 183800
loss: 1.0174272060394287,grad_norm: 0.9999990075295202, iteration: 183801
loss: 1.0277150869369507,grad_norm: 0.999999103468222, iteration: 183802
loss: 1.0275839567184448,grad_norm: 0.9483105965376312, iteration: 183803
loss: 1.160230040550232,grad_norm: 0.9999993654235352, iteration: 183804
loss: 1.0024058818817139,grad_norm: 0.9999992546733748, iteration: 183805
loss: 1.0137078762054443,grad_norm: 0.9999990395982336, iteration: 183806
loss: 0.9751861095428467,grad_norm: 0.9999998877996925, iteration: 183807
loss: 0.9872943758964539,grad_norm: 0.8108675131488509, iteration: 183808
loss: 1.0116409063339233,grad_norm: 0.999999157054084, iteration: 183809
loss: 1.1391587257385254,grad_norm: 0.99999985965338, iteration: 183810
loss: 1.0203689336776733,grad_norm: 0.9999991385620592, iteration: 183811
loss: 1.0063676834106445,grad_norm: 0.9082039144753072, iteration: 183812
loss: 1.0020049810409546,grad_norm: 0.8158874565157378, iteration: 183813
loss: 1.0201245546340942,grad_norm: 0.958702037947494, iteration: 183814
loss: 1.0004947185516357,grad_norm: 0.9999992342907081, iteration: 183815
loss: 0.9980681538581848,grad_norm: 0.9432982660445359, iteration: 183816
loss: 1.0108555555343628,grad_norm: 0.8628029505942962, iteration: 183817
loss: 0.9824679493904114,grad_norm: 0.999999342243905, iteration: 183818
loss: 1.0146379470825195,grad_norm: 0.8521063787446526, iteration: 183819
loss: 1.136793613433838,grad_norm: 0.9999995297670355, iteration: 183820
loss: 1.0221763849258423,grad_norm: 0.9999990659105047, iteration: 183821
loss: 0.9825916886329651,grad_norm: 0.8629629462701122, iteration: 183822
loss: 0.9692836999893188,grad_norm: 0.9463989297788825, iteration: 183823
loss: 0.9991410374641418,grad_norm: 0.892919141500814, iteration: 183824
loss: 1.0354652404785156,grad_norm: 0.999999566003988, iteration: 183825
loss: 0.9574432969093323,grad_norm: 0.926996938141423, iteration: 183826
loss: 0.978605329990387,grad_norm: 0.9127658703523657, iteration: 183827
loss: 0.9700064063072205,grad_norm: 0.9693545995370256, iteration: 183828
loss: 0.9865394234657288,grad_norm: 0.9999992556078664, iteration: 183829
loss: 0.984794557094574,grad_norm: 0.9999991006705893, iteration: 183830
loss: 0.9982831478118896,grad_norm: 0.9999989922252454, iteration: 183831
loss: 0.9992841482162476,grad_norm: 0.9999990226036886, iteration: 183832
loss: 1.0050448179244995,grad_norm: 0.9999991650429821, iteration: 183833
loss: 1.011112928390503,grad_norm: 0.9999990260268609, iteration: 183834
loss: 0.9863368272781372,grad_norm: 0.9889357820433966, iteration: 183835
loss: 1.0320522785186768,grad_norm: 0.9803537168431983, iteration: 183836
loss: 0.9900756478309631,grad_norm: 0.8139625500763151, iteration: 183837
loss: 1.049323558807373,grad_norm: 0.773504821272203, iteration: 183838
loss: 1.0316553115844727,grad_norm: 0.9999988816209705, iteration: 183839
loss: 1.0133914947509766,grad_norm: 0.9999996899028655, iteration: 183840
loss: 0.9943234324455261,grad_norm: 0.9366372356211267, iteration: 183841
loss: 1.0227704048156738,grad_norm: 0.9999990939651037, iteration: 183842
loss: 0.9880902767181396,grad_norm: 0.9999991127675782, iteration: 183843
loss: 0.9803250432014465,grad_norm: 0.9463423137022478, iteration: 183844
loss: 1.0085557699203491,grad_norm: 0.8186533529691662, iteration: 183845
loss: 0.9510422348976135,grad_norm: 0.9821743642673517, iteration: 183846
loss: 1.027543067932129,grad_norm: 0.9316569174173752, iteration: 183847
loss: 1.019997477531433,grad_norm: 0.9999990477382978, iteration: 183848
loss: 1.0239285230636597,grad_norm: 0.9999991255435899, iteration: 183849
loss: 0.9663190841674805,grad_norm: 0.9914294647250633, iteration: 183850
loss: 1.003193974494934,grad_norm: 0.8502495688745361, iteration: 183851
loss: 1.0465949773788452,grad_norm: 0.8458009760918761, iteration: 183852
loss: 0.9897767901420593,grad_norm: 0.9999992191263724, iteration: 183853
loss: 0.9803252220153809,grad_norm: 0.951794523159116, iteration: 183854
loss: 0.9989499449729919,grad_norm: 0.9999995558520481, iteration: 183855
loss: 0.9908965229988098,grad_norm: 0.9846199696971151, iteration: 183856
loss: 0.9848215579986572,grad_norm: 0.9696802371511113, iteration: 183857
loss: 1.0033878087997437,grad_norm: 0.9710087817289523, iteration: 183858
loss: 0.9937068819999695,grad_norm: 0.9999992659403765, iteration: 183859
loss: 0.972084105014801,grad_norm: 0.9723732374361964, iteration: 183860
loss: 0.9636430740356445,grad_norm: 0.999999220395849, iteration: 183861
loss: 1.0459116697311401,grad_norm: 0.9016445752412726, iteration: 183862
loss: 0.9829557538032532,grad_norm: 0.9696172280651324, iteration: 183863
loss: 1.0208393335342407,grad_norm: 0.9846586075292254, iteration: 183864
loss: 0.9839885830879211,grad_norm: 0.876104131812111, iteration: 183865
loss: 1.0035978555679321,grad_norm: 0.9365851365133323, iteration: 183866
loss: 1.0496169328689575,grad_norm: 0.9999996015837034, iteration: 183867
loss: 1.074041485786438,grad_norm: 0.9999992536824966, iteration: 183868
loss: 0.9701620936393738,grad_norm: 0.8855378433270776, iteration: 183869
loss: 1.0582610368728638,grad_norm: 0.999999768624691, iteration: 183870
loss: 1.031741738319397,grad_norm: 0.9999996029949526, iteration: 183871
loss: 0.9934481978416443,grad_norm: 0.9999990708477843, iteration: 183872
loss: 1.026498794555664,grad_norm: 0.9999993178603227, iteration: 183873
loss: 1.0036511421203613,grad_norm: 0.999999199301982, iteration: 183874
loss: 0.9828654527664185,grad_norm: 0.9573043819255193, iteration: 183875
loss: 0.9944002628326416,grad_norm: 0.8410399577307601, iteration: 183876
loss: 0.9964624047279358,grad_norm: 0.9999999659766045, iteration: 183877
loss: 1.0386857986450195,grad_norm: 0.999999147758137, iteration: 183878
loss: 1.018064022064209,grad_norm: 0.9279938144227632, iteration: 183879
loss: 0.9798640608787537,grad_norm: 0.9999991910098663, iteration: 183880
loss: 1.012241244316101,grad_norm: 0.8303176620515191, iteration: 183881
loss: 0.9933486580848694,grad_norm: 0.9999990797774736, iteration: 183882
loss: 0.9626402258872986,grad_norm: 0.9846746475541409, iteration: 183883
loss: 1.1135005950927734,grad_norm: 0.9999997536414176, iteration: 183884
loss: 0.9880458116531372,grad_norm: 0.9515234919884724, iteration: 183885
loss: 0.9795157313346863,grad_norm: 0.9797209907747303, iteration: 183886
loss: 1.0514079332351685,grad_norm: 0.999999567647415, iteration: 183887
loss: 1.0119811296463013,grad_norm: 0.9088084285269266, iteration: 183888
loss: 0.9955670833587646,grad_norm: 0.9999990764429961, iteration: 183889
loss: 0.9809065461158752,grad_norm: 0.8675334471581574, iteration: 183890
loss: 1.0024482011795044,grad_norm: 0.867959336275709, iteration: 183891
loss: 1.00603187084198,grad_norm: 0.9503525678988872, iteration: 183892
loss: 0.9963821172714233,grad_norm: 0.9432299987972799, iteration: 183893
loss: 1.0227259397506714,grad_norm: 0.9470238458949931, iteration: 183894
loss: 0.978283166885376,grad_norm: 0.9780086589512341, iteration: 183895
loss: 1.0173434019088745,grad_norm: 0.9999991559858592, iteration: 183896
loss: 0.984584629535675,grad_norm: 0.9999990027333675, iteration: 183897
loss: 1.0069983005523682,grad_norm: 0.8819693088392732, iteration: 183898
loss: 1.029772162437439,grad_norm: 0.7903870866679064, iteration: 183899
loss: 1.0150766372680664,grad_norm: 0.937533238666434, iteration: 183900
loss: 0.9763221740722656,grad_norm: 0.9999990556939822, iteration: 183901
loss: 0.9902116060256958,grad_norm: 0.9999992293772532, iteration: 183902
loss: 0.9855266213417053,grad_norm: 0.9858030091258194, iteration: 183903
loss: 1.0464584827423096,grad_norm: 0.9999994237697456, iteration: 183904
loss: 1.011577844619751,grad_norm: 0.9999990300410521, iteration: 183905
loss: 1.0130956172943115,grad_norm: 0.8553276878659303, iteration: 183906
loss: 0.9905300140380859,grad_norm: 0.9784355152550138, iteration: 183907
loss: 1.0306437015533447,grad_norm: 0.9999994308626946, iteration: 183908
loss: 0.9770801663398743,grad_norm: 0.9225620820540923, iteration: 183909
loss: 0.9839451909065247,grad_norm: 0.899314152504215, iteration: 183910
loss: 0.9678601622581482,grad_norm: 0.9896502448821126, iteration: 183911
loss: 0.9930216073989868,grad_norm: 0.9999991712507221, iteration: 183912
loss: 1.0347299575805664,grad_norm: 0.9999993366500803, iteration: 183913
loss: 1.0521965026855469,grad_norm: 0.9999996795929733, iteration: 183914
loss: 1.0453007221221924,grad_norm: 0.9999992479534142, iteration: 183915
loss: 0.9847206473350525,grad_norm: 0.9999990483064897, iteration: 183916
loss: 0.9731485247612,grad_norm: 0.9497961121913632, iteration: 183917
loss: 1.0259907245635986,grad_norm: 0.9363678831161195, iteration: 183918
loss: 0.9780524969100952,grad_norm: 0.8898543916475466, iteration: 183919
loss: 0.9922546148300171,grad_norm: 0.8363908994436967, iteration: 183920
loss: 1.0091471672058105,grad_norm: 0.8101954800785502, iteration: 183921
loss: 0.9863533973693848,grad_norm: 0.9242626018222115, iteration: 183922
loss: 0.9613458514213562,grad_norm: 0.9527986021735941, iteration: 183923
loss: 1.0259637832641602,grad_norm: 0.9215780465634433, iteration: 183924
loss: 0.978714644908905,grad_norm: 0.999999073332024, iteration: 183925
loss: 1.00787353515625,grad_norm: 0.9999991099721418, iteration: 183926
loss: 1.021497368812561,grad_norm: 0.9531126267044905, iteration: 183927
loss: 0.9951306581497192,grad_norm: 0.9539729763673317, iteration: 183928
loss: 1.0161476135253906,grad_norm: 0.9999990144858777, iteration: 183929
loss: 1.0218089818954468,grad_norm: 0.9843810479046728, iteration: 183930
loss: 0.9849609732627869,grad_norm: 0.9077475440928835, iteration: 183931
loss: 0.9963957667350769,grad_norm: 0.9999990726997818, iteration: 183932
loss: 1.0114878416061401,grad_norm: 0.999998973985868, iteration: 183933
loss: 1.0117733478546143,grad_norm: 0.9999994355225518, iteration: 183934
loss: 1.0204519033432007,grad_norm: 0.9999990643790758, iteration: 183935
loss: 0.9940416216850281,grad_norm: 0.9206524577982391, iteration: 183936
loss: 1.0241763591766357,grad_norm: 0.9999991022873452, iteration: 183937
loss: 0.9845646619796753,grad_norm: 0.9953189653142066, iteration: 183938
loss: 0.9937657713890076,grad_norm: 0.9999991012836676, iteration: 183939
loss: 1.0253580808639526,grad_norm: 0.9999990746999137, iteration: 183940
loss: 0.9608345031738281,grad_norm: 0.8955726628912067, iteration: 183941
loss: 1.0138250589370728,grad_norm: 0.8209327726477339, iteration: 183942
loss: 0.9884219169616699,grad_norm: 0.9657732156328575, iteration: 183943
loss: 1.008872628211975,grad_norm: 0.9999990621628594, iteration: 183944
loss: 0.9895821213722229,grad_norm: 0.9999990958185287, iteration: 183945
loss: 1.0080127716064453,grad_norm: 0.999999120310126, iteration: 183946
loss: 1.0105781555175781,grad_norm: 0.9999994150651321, iteration: 183947
loss: 1.000541090965271,grad_norm: 0.9999991921992483, iteration: 183948
loss: 1.0024906396865845,grad_norm: 0.9730002072890712, iteration: 183949
loss: 1.0780651569366455,grad_norm: 0.9999998814404056, iteration: 183950
loss: 1.00955331325531,grad_norm: 0.9891987308794125, iteration: 183951
loss: 0.9897662997245789,grad_norm: 0.9784256430987845, iteration: 183952
loss: 0.9941856861114502,grad_norm: 0.9999991638710539, iteration: 183953
loss: 1.0055397748947144,grad_norm: 0.8765363629741044, iteration: 183954
loss: 0.9799883961677551,grad_norm: 0.9055112476855105, iteration: 183955
loss: 1.0260037183761597,grad_norm: 0.999999287582404, iteration: 183956
loss: 1.004438042640686,grad_norm: 0.8975086259219544, iteration: 183957
loss: 0.9610975980758667,grad_norm: 0.8416785359486696, iteration: 183958
loss: 0.9723238348960876,grad_norm: 0.9588045662745347, iteration: 183959
loss: 0.9625399112701416,grad_norm: 0.9999991138561718, iteration: 183960
loss: 1.0082507133483887,grad_norm: 0.9999995852312908, iteration: 183961
loss: 1.0077978372573853,grad_norm: 0.9999991696734871, iteration: 183962
loss: 0.9860124588012695,grad_norm: 0.9999991159741433, iteration: 183963
loss: 0.9900712370872498,grad_norm: 0.9269223913722278, iteration: 183964
loss: 1.0054900646209717,grad_norm: 0.9277684129745979, iteration: 183965
loss: 1.0213645696640015,grad_norm: 0.9822315016820102, iteration: 183966
loss: 0.9990985989570618,grad_norm: 0.8109134590034275, iteration: 183967
loss: 1.0297306776046753,grad_norm: 0.999999373263958, iteration: 183968
loss: 0.9730402231216431,grad_norm: 0.9999992531709182, iteration: 183969
loss: 0.9746943712234497,grad_norm: 0.8989848702276558, iteration: 183970
loss: 0.9888796806335449,grad_norm: 0.9783857411295113, iteration: 183971
loss: 0.9911254644393921,grad_norm: 0.9999991759760489, iteration: 183972
loss: 1.0191930532455444,grad_norm: 0.8594609610607614, iteration: 183973
loss: 1.0056309700012207,grad_norm: 0.9749509495870943, iteration: 183974
loss: 0.9908990263938904,grad_norm: 0.8965919818266771, iteration: 183975
loss: 0.9988469481468201,grad_norm: 0.8366803589742469, iteration: 183976
loss: 1.01187002658844,grad_norm: 0.8828510302112409, iteration: 183977
loss: 1.0290113687515259,grad_norm: 0.9999999629267659, iteration: 183978
loss: 0.9832450747489929,grad_norm: 0.8717183299251656, iteration: 183979
loss: 0.9942731261253357,grad_norm: 0.8792125120906303, iteration: 183980
loss: 0.9853169918060303,grad_norm: 0.9999991599653225, iteration: 183981
loss: 0.987814724445343,grad_norm: 0.867091016393005, iteration: 183982
loss: 1.030156135559082,grad_norm: 0.8365050640324198, iteration: 183983
loss: 0.9914527535438538,grad_norm: 0.8717305866691534, iteration: 183984
loss: 1.020455241203308,grad_norm: 0.9999990996865029, iteration: 183985
loss: 0.9741954803466797,grad_norm: 0.9999992021856923, iteration: 183986
loss: 1.0223428010940552,grad_norm: 0.9999992292672234, iteration: 183987
loss: 0.9779031276702881,grad_norm: 0.9910935076866318, iteration: 183988
loss: 0.9629258513450623,grad_norm: 0.8953955718960247, iteration: 183989
loss: 1.0015950202941895,grad_norm: 0.9999990836252344, iteration: 183990
loss: 1.0011054277420044,grad_norm: 0.9506165162006353, iteration: 183991
loss: 1.0270015001296997,grad_norm: 0.9999989903468267, iteration: 183992
loss: 1.0245527029037476,grad_norm: 0.874209552442059, iteration: 183993
loss: 0.9883954524993896,grad_norm: 0.9896565503596458, iteration: 183994
loss: 1.025256633758545,grad_norm: 0.9999995639270123, iteration: 183995
loss: 0.9886364340782166,grad_norm: 0.8535540677390321, iteration: 183996
loss: 1.0453824996948242,grad_norm: 0.9999992379023807, iteration: 183997
loss: 0.9741536974906921,grad_norm: 0.9999991323225842, iteration: 183998
loss: 1.0028703212738037,grad_norm: 0.9895179197872929, iteration: 183999
loss: 1.0136384963989258,grad_norm: 0.9999992721515506, iteration: 184000
loss: 0.9841662049293518,grad_norm: 0.956561469146314, iteration: 184001
loss: 1.001686453819275,grad_norm: 0.9521022221999298, iteration: 184002
loss: 0.9970136284828186,grad_norm: 0.9002354470791432, iteration: 184003
loss: 0.9996131062507629,grad_norm: 0.9162948879159709, iteration: 184004
loss: 0.9850293397903442,grad_norm: 0.8191481727539727, iteration: 184005
loss: 0.9971859455108643,grad_norm: 0.9999992705846791, iteration: 184006
loss: 1.0029470920562744,grad_norm: 0.941519037045733, iteration: 184007
loss: 1.0459519624710083,grad_norm: 0.9160613974230644, iteration: 184008
loss: 1.0023620128631592,grad_norm: 0.9418029095503501, iteration: 184009
loss: 0.9518159627914429,grad_norm: 0.9412470166243785, iteration: 184010
loss: 0.9840269684791565,grad_norm: 0.9254875397910332, iteration: 184011
loss: 1.0249097347259521,grad_norm: 0.9999991602591246, iteration: 184012
loss: 1.0305882692337036,grad_norm: 0.8670778641929917, iteration: 184013
loss: 0.9915933012962341,grad_norm: 0.9687416433409275, iteration: 184014
loss: 0.9896366000175476,grad_norm: 0.9580956086947713, iteration: 184015
loss: 0.9939338564872742,grad_norm: 0.9999991930991556, iteration: 184016
loss: 0.9807898998260498,grad_norm: 0.916164070726192, iteration: 184017
loss: 1.0027273893356323,grad_norm: 0.931475214914342, iteration: 184018
loss: 1.024677038192749,grad_norm: 0.8107474606543635, iteration: 184019
loss: 0.9927079081535339,grad_norm: 0.7515250186510135, iteration: 184020
loss: 1.012244701385498,grad_norm: 0.9999991948113971, iteration: 184021
loss: 0.9923140406608582,grad_norm: 0.9999988643028144, iteration: 184022
loss: 0.9883996248245239,grad_norm: 0.9129338366045632, iteration: 184023
loss: 0.9951950311660767,grad_norm: 0.9347578076011545, iteration: 184024
loss: 1.0053149461746216,grad_norm: 0.9582937422474158, iteration: 184025
loss: 0.9981641173362732,grad_norm: 0.9886133115881063, iteration: 184026
loss: 1.008359670639038,grad_norm: 0.9173857457883776, iteration: 184027
loss: 1.0382150411605835,grad_norm: 0.9999992519520688, iteration: 184028
loss: 1.0085829496383667,grad_norm: 0.9999992543296743, iteration: 184029
loss: 0.9964220523834229,grad_norm: 0.9999990773502376, iteration: 184030
loss: 1.0142207145690918,grad_norm: 0.837474684663613, iteration: 184031
loss: 1.0307403802871704,grad_norm: 0.9999992766319487, iteration: 184032
loss: 0.9636744260787964,grad_norm: 0.9999990556612054, iteration: 184033
loss: 1.0082337856292725,grad_norm: 0.8863838703820087, iteration: 184034
loss: 0.9989137053489685,grad_norm: 0.9999990523040899, iteration: 184035
loss: 0.9848202466964722,grad_norm: 0.9999991218486247, iteration: 184036
loss: 0.9667476415634155,grad_norm: 0.9999993873259115, iteration: 184037
loss: 0.9930241107940674,grad_norm: 0.9999990056685419, iteration: 184038
loss: 1.0053223371505737,grad_norm: 0.9999991198568695, iteration: 184039
loss: 1.0687578916549683,grad_norm: 0.9999991609378851, iteration: 184040
loss: 1.0164390802383423,grad_norm: 0.9925221892270923, iteration: 184041
loss: 0.9753416776657104,grad_norm: 0.9999991349196155, iteration: 184042
loss: 0.9836700558662415,grad_norm: 0.9472366668423507, iteration: 184043
loss: 1.0002201795578003,grad_norm: 0.9172944155616298, iteration: 184044
loss: 0.9965351223945618,grad_norm: 0.9736272741042131, iteration: 184045
loss: 1.065068244934082,grad_norm: 0.9999991277179459, iteration: 184046
loss: 0.998385488986969,grad_norm: 0.9189587002690655, iteration: 184047
loss: 1.0134007930755615,grad_norm: 0.9960837726888444, iteration: 184048
loss: 1.0641552209854126,grad_norm: 0.9999994360014305, iteration: 184049
loss: 1.0087883472442627,grad_norm: 0.884356349059918, iteration: 184050
loss: 1.0028009414672852,grad_norm: 0.9678057925352199, iteration: 184051
loss: 0.9787343144416809,grad_norm: 0.8339822793526, iteration: 184052
loss: 1.0130712985992432,grad_norm: 0.9999992346441097, iteration: 184053
loss: 0.9920694828033447,grad_norm: 0.9999990965799009, iteration: 184054
loss: 0.9718946218490601,grad_norm: 0.9738114598413152, iteration: 184055
loss: 1.0230250358581543,grad_norm: 0.9999991562336918, iteration: 184056
loss: 0.9803351163864136,grad_norm: 0.9999990969376873, iteration: 184057
loss: 0.9909654855728149,grad_norm: 0.8732453978214927, iteration: 184058
loss: 1.02190101146698,grad_norm: 0.9612607856229199, iteration: 184059
loss: 1.0063637495040894,grad_norm: 0.9999991422340129, iteration: 184060
loss: 1.0184705257415771,grad_norm: 0.999999279694832, iteration: 184061
loss: 1.0146667957305908,grad_norm: 0.9066935695894288, iteration: 184062
loss: 0.9929350018501282,grad_norm: 0.9269104741039274, iteration: 184063
loss: 0.9874457120895386,grad_norm: 0.9920154468478108, iteration: 184064
loss: 1.0020686388015747,grad_norm: 0.9999991168145604, iteration: 184065
loss: 1.0416761636734009,grad_norm: 0.8975029037014984, iteration: 184066
loss: 0.9998845458030701,grad_norm: 0.9999991038587942, iteration: 184067
loss: 1.0350208282470703,grad_norm: 0.9999992255239155, iteration: 184068
loss: 0.9845869541168213,grad_norm: 0.7720204076852227, iteration: 184069
loss: 0.9807782173156738,grad_norm: 0.999999199554477, iteration: 184070
loss: 0.9593347907066345,grad_norm: 0.9029849634964219, iteration: 184071
loss: 1.0119049549102783,grad_norm: 0.9999990137069888, iteration: 184072
loss: 1.0160287618637085,grad_norm: 0.8697641702155198, iteration: 184073
loss: 0.9846979379653931,grad_norm: 0.9093541568741023, iteration: 184074
loss: 1.004461407661438,grad_norm: 0.9090098834548184, iteration: 184075
loss: 0.9826008677482605,grad_norm: 0.9999992085106685, iteration: 184076
loss: 0.9889671206474304,grad_norm: 0.9999990869044002, iteration: 184077
loss: 1.003621220588684,grad_norm: 0.8746500759837041, iteration: 184078
loss: 1.0014607906341553,grad_norm: 0.9999991356428554, iteration: 184079
loss: 0.9877207279205322,grad_norm: 0.9934248658541242, iteration: 184080
loss: 0.9900043606758118,grad_norm: 0.978461295575912, iteration: 184081
loss: 1.0059516429901123,grad_norm: 0.984822546474211, iteration: 184082
loss: 0.9844222664833069,grad_norm: 0.9340190829835542, iteration: 184083
loss: 1.017554759979248,grad_norm: 0.9999992202833897, iteration: 184084
loss: 0.9929672479629517,grad_norm: 0.959582066569836, iteration: 184085
loss: 0.9567058086395264,grad_norm: 0.7942080072953355, iteration: 184086
loss: 0.9627046585083008,grad_norm: 0.8932717197680793, iteration: 184087
loss: 0.9900149703025818,grad_norm: 0.9999990845945483, iteration: 184088
loss: 0.9768095016479492,grad_norm: 0.9999991831380821, iteration: 184089
loss: 0.9986740350723267,grad_norm: 0.9999995855591866, iteration: 184090
loss: 1.0140204429626465,grad_norm: 0.9999995123376635, iteration: 184091
loss: 0.9902892112731934,grad_norm: 0.9999991023374426, iteration: 184092
loss: 1.049627661705017,grad_norm: 0.9999992517140294, iteration: 184093
loss: 0.9999517798423767,grad_norm: 0.9999991399588877, iteration: 184094
loss: 0.9899861812591553,grad_norm: 0.9427306751441429, iteration: 184095
loss: 0.9705098867416382,grad_norm: 0.9906339276645626, iteration: 184096
loss: 1.0047659873962402,grad_norm: 0.9999991275231171, iteration: 184097
loss: 1.0011073350906372,grad_norm: 0.8812403354708139, iteration: 184098
loss: 1.0223803520202637,grad_norm: 0.9999994527509672, iteration: 184099
loss: 1.0122709274291992,grad_norm: 0.8907356607380198, iteration: 184100
loss: 0.99517822265625,grad_norm: 0.9453052543665305, iteration: 184101
loss: 0.992820680141449,grad_norm: 0.9283772793924315, iteration: 184102
loss: 1.0225830078125,grad_norm: 0.9999990507968834, iteration: 184103
loss: 1.02196204662323,grad_norm: 0.9999992120384552, iteration: 184104
loss: 1.0097705125808716,grad_norm: 0.9999991248559428, iteration: 184105
loss: 1.0659761428833008,grad_norm: 0.9999997073508974, iteration: 184106
loss: 0.9908633828163147,grad_norm: 0.9999992715235649, iteration: 184107
loss: 0.9853145480155945,grad_norm: 0.9005454127895668, iteration: 184108
loss: 1.0139760971069336,grad_norm: 0.983973515822076, iteration: 184109
loss: 0.9585919976234436,grad_norm: 0.8308123846827115, iteration: 184110
loss: 1.015156865119934,grad_norm: 0.9999995258392335, iteration: 184111
loss: 0.9995271563529968,grad_norm: 0.9656613736661395, iteration: 184112
loss: 1.007397174835205,grad_norm: 0.9999990883823858, iteration: 184113
loss: 1.0300320386886597,grad_norm: 0.8601886165669453, iteration: 184114
loss: 1.0120346546173096,grad_norm: 0.8157564591797138, iteration: 184115
loss: 0.9710286855697632,grad_norm: 0.9999992222764008, iteration: 184116
loss: 0.9704580307006836,grad_norm: 0.9590198229177171, iteration: 184117
loss: 1.0380456447601318,grad_norm: 0.9948067917387681, iteration: 184118
loss: 0.9922842383384705,grad_norm: 0.999999068484226, iteration: 184119
loss: 1.0428959131240845,grad_norm: 0.9665607314150139, iteration: 184120
loss: 0.9979578256607056,grad_norm: 0.9687300776679404, iteration: 184121
loss: 1.022904872894287,grad_norm: 0.8844788808113769, iteration: 184122
loss: 1.0404633283615112,grad_norm: 0.9999994435741193, iteration: 184123
loss: 1.039993166923523,grad_norm: 0.9661065783196848, iteration: 184124
loss: 0.9788211584091187,grad_norm: 0.9999990466531423, iteration: 184125
loss: 0.9918390512466431,grad_norm: 0.8721176018249448, iteration: 184126
loss: 0.9953415989875793,grad_norm: 0.9999992399659845, iteration: 184127
loss: 0.9697324633598328,grad_norm: 0.9999991098089149, iteration: 184128
loss: 0.9978047609329224,grad_norm: 0.9999991631331961, iteration: 184129
loss: 1.0110844373703003,grad_norm: 0.9999991651996609, iteration: 184130
loss: 1.0344743728637695,grad_norm: 0.9395267352834041, iteration: 184131
loss: 1.0269412994384766,grad_norm: 0.9999992569529838, iteration: 184132
loss: 1.0631974935531616,grad_norm: 0.9999994775338821, iteration: 184133
loss: 0.9811872839927673,grad_norm: 0.9999991096432649, iteration: 184134
loss: 1.020128846168518,grad_norm: 0.9999991911302826, iteration: 184135
loss: 1.0356590747833252,grad_norm: 0.9660612516857642, iteration: 184136
loss: 1.0265588760375977,grad_norm: 0.9999991542468699, iteration: 184137
loss: 1.020774006843567,grad_norm: 0.9944741685704934, iteration: 184138
loss: 0.957171618938446,grad_norm: 0.8906421472551469, iteration: 184139
loss: 1.0061671733856201,grad_norm: 0.8476721720218819, iteration: 184140
loss: 0.9924553632736206,grad_norm: 0.9999991298019397, iteration: 184141
loss: 1.0008543729782104,grad_norm: 0.9019675873009705, iteration: 184142
loss: 1.0064756870269775,grad_norm: 0.9482662766563867, iteration: 184143
loss: 1.0294677019119263,grad_norm: 0.9304402995954663, iteration: 184144
loss: 0.9905056953430176,grad_norm: 0.9357127464232347, iteration: 184145
loss: 0.9672342538833618,grad_norm: 0.9999995626499738, iteration: 184146
loss: 0.9886190891265869,grad_norm: 0.8129402820700774, iteration: 184147
loss: 1.0074015855789185,grad_norm: 0.9999991952086104, iteration: 184148
loss: 0.9641071557998657,grad_norm: 0.9138933370689631, iteration: 184149
loss: 1.0133850574493408,grad_norm: 0.9014736886591895, iteration: 184150
loss: 0.9714831113815308,grad_norm: 0.9999989781780092, iteration: 184151
loss: 0.9854840040206909,grad_norm: 0.9849802783295118, iteration: 184152
loss: 1.0109195709228516,grad_norm: 0.9703915753258493, iteration: 184153
loss: 0.9679886102676392,grad_norm: 0.8582666961793339, iteration: 184154
loss: 1.0217708349227905,grad_norm: 0.9670938889263425, iteration: 184155
loss: 0.9815555810928345,grad_norm: 0.9511088335146798, iteration: 184156
loss: 1.029687762260437,grad_norm: 0.8590094426934302, iteration: 184157
loss: 1.0402418375015259,grad_norm: 0.9432215908115538, iteration: 184158
loss: 0.9875573515892029,grad_norm: 0.9651264778705961, iteration: 184159
loss: 0.9808109402656555,grad_norm: 0.8299286955529809, iteration: 184160
loss: 0.9689012169837952,grad_norm: 0.9847040802953783, iteration: 184161
loss: 0.9857606291770935,grad_norm: 0.9999991624307247, iteration: 184162
loss: 1.0206602811813354,grad_norm: 0.9017885772138466, iteration: 184163
loss: 1.0049527883529663,grad_norm: 0.9999991637487278, iteration: 184164
loss: 0.9694885015487671,grad_norm: 0.9999989132299613, iteration: 184165
loss: 0.9891693592071533,grad_norm: 0.9495609367321243, iteration: 184166
loss: 0.9957994222640991,grad_norm: 0.9323360550529113, iteration: 184167
loss: 1.0064157247543335,grad_norm: 0.9999991655626256, iteration: 184168
loss: 1.0124815702438354,grad_norm: 0.9132670265228718, iteration: 184169
loss: 0.9983277916908264,grad_norm: 0.9793461668229139, iteration: 184170
loss: 1.007785439491272,grad_norm: 0.8653062584151658, iteration: 184171
loss: 0.9592914581298828,grad_norm: 0.9999990241873548, iteration: 184172
loss: 1.004238247871399,grad_norm: 0.915269212955331, iteration: 184173
loss: 1.014901876449585,grad_norm: 0.9926680076107968, iteration: 184174
loss: 1.0054527521133423,grad_norm: 0.8705571797831337, iteration: 184175
loss: 1.001655101776123,grad_norm: 0.9759556062665895, iteration: 184176
loss: 1.0243542194366455,grad_norm: 0.9999993685216905, iteration: 184177
loss: 0.9863965511322021,grad_norm: 0.8988817629414237, iteration: 184178
loss: 1.014432430267334,grad_norm: 0.9999994735532253, iteration: 184179
loss: 0.9710361361503601,grad_norm: 0.7772283536538438, iteration: 184180
loss: 0.9900513291358948,grad_norm: 0.9683425233751555, iteration: 184181
loss: 0.9755528569221497,grad_norm: 0.8728824686262445, iteration: 184182
loss: 1.021287441253662,grad_norm: 0.9510036066537098, iteration: 184183
loss: 1.0124486684799194,grad_norm: 0.9429774422775283, iteration: 184184
loss: 1.1134077310562134,grad_norm: 0.9999994653900808, iteration: 184185
loss: 0.9787541627883911,grad_norm: 0.8450493764542099, iteration: 184186
loss: 0.9620199203491211,grad_norm: 0.9250943524891666, iteration: 184187
loss: 1.0272510051727295,grad_norm: 0.9999993333376674, iteration: 184188
loss: 1.0211037397384644,grad_norm: 0.9999991507407271, iteration: 184189
loss: 0.9963356852531433,grad_norm: 0.9999991522833159, iteration: 184190
loss: 0.9954622387886047,grad_norm: 0.9121457112124479, iteration: 184191
loss: 0.9903943538665771,grad_norm: 0.9999990880822553, iteration: 184192
loss: 0.9997075796127319,grad_norm: 0.9999990818824842, iteration: 184193
loss: 1.0281089544296265,grad_norm: 0.99999894122973, iteration: 184194
loss: 1.0012950897216797,grad_norm: 0.9999991120341677, iteration: 184195
loss: 0.9642224311828613,grad_norm: 0.9099416422259194, iteration: 184196
loss: 1.0339051485061646,grad_norm: 0.999999708654412, iteration: 184197
loss: 0.957880973815918,grad_norm: 0.7880585479618234, iteration: 184198
loss: 1.0116277933120728,grad_norm: 0.8617681718334971, iteration: 184199
loss: 1.0002641677856445,grad_norm: 0.9999999310986932, iteration: 184200
loss: 0.9934936165809631,grad_norm: 0.7530968884672498, iteration: 184201
loss: 1.0096673965454102,grad_norm: 0.8520290864459076, iteration: 184202
loss: 1.012984037399292,grad_norm: 0.9999998302762948, iteration: 184203
loss: 1.0373402833938599,grad_norm: 0.999999669773956, iteration: 184204
loss: 1.007118821144104,grad_norm: 0.9999991490233111, iteration: 184205
loss: 1.0035306215286255,grad_norm: 0.9999990844006689, iteration: 184206
loss: 1.0410879850387573,grad_norm: 0.9999995414786133, iteration: 184207
loss: 1.0015308856964111,grad_norm: 0.9999992019944478, iteration: 184208
loss: 0.9854391813278198,grad_norm: 0.9999992224743823, iteration: 184209
loss: 1.0056443214416504,grad_norm: 0.9714605087701226, iteration: 184210
loss: 1.005707025527954,grad_norm: 0.9999990910920169, iteration: 184211
loss: 0.9750095009803772,grad_norm: 0.9494991794818417, iteration: 184212
loss: 0.9766536951065063,grad_norm: 0.8419188110190223, iteration: 184213
loss: 1.0712436437606812,grad_norm: 0.9999993663952427, iteration: 184214
loss: 1.020935297012329,grad_norm: 0.9413403717862528, iteration: 184215
loss: 1.0144404172897339,grad_norm: 0.8847881901034415, iteration: 184216
loss: 1.0490069389343262,grad_norm: 0.9807237059938969, iteration: 184217
loss: 1.017260193824768,grad_norm: 0.9999992163294626, iteration: 184218
loss: 0.9749656319618225,grad_norm: 0.9999991991890975, iteration: 184219
loss: 1.0062600374221802,grad_norm: 0.9999990623408451, iteration: 184220
loss: 1.0002270936965942,grad_norm: 0.9999991619121528, iteration: 184221
loss: 1.0959652662277222,grad_norm: 0.9999994103797499, iteration: 184222
loss: 1.0505038499832153,grad_norm: 0.9788377413702839, iteration: 184223
loss: 1.0364047288894653,grad_norm: 0.8657216141639521, iteration: 184224
loss: 0.9775965213775635,grad_norm: 0.9547125688236826, iteration: 184225
loss: 1.0008935928344727,grad_norm: 0.9247387354346037, iteration: 184226
loss: 1.0132999420166016,grad_norm: 0.9705273417918923, iteration: 184227
loss: 0.975818932056427,grad_norm: 0.8223520844478974, iteration: 184228
loss: 1.01891028881073,grad_norm: 0.8315604486067606, iteration: 184229
loss: 0.9993568062782288,grad_norm: 0.966729040947028, iteration: 184230
loss: 1.056149959564209,grad_norm: 0.9999996967807181, iteration: 184231
loss: 1.0135940313339233,grad_norm: 0.9999993460592594, iteration: 184232
loss: 0.9800426363945007,grad_norm: 0.9975439242327631, iteration: 184233
loss: 0.9587613344192505,grad_norm: 0.9999989704260296, iteration: 184234
loss: 1.0429596900939941,grad_norm: 0.9999992467353147, iteration: 184235
loss: 0.9663984775543213,grad_norm: 0.945645808908612, iteration: 184236
loss: 0.998723030090332,grad_norm: 0.9999992153321157, iteration: 184237
loss: 0.9959086775779724,grad_norm: 0.9390113563024071, iteration: 184238
loss: 0.9563770294189453,grad_norm: 0.8235665640783366, iteration: 184239
loss: 1.0231132507324219,grad_norm: 0.9999989672460355, iteration: 184240
loss: 1.0481560230255127,grad_norm: 0.9999990309826485, iteration: 184241
loss: 1.015343427658081,grad_norm: 0.9955996713861248, iteration: 184242
loss: 0.9969403147697449,grad_norm: 0.9999991493338877, iteration: 184243
loss: 1.0036265850067139,grad_norm: 0.999999253058992, iteration: 184244
loss: 0.9869645833969116,grad_norm: 0.8227014034967904, iteration: 184245
loss: 1.1129313707351685,grad_norm: 0.9999999238850094, iteration: 184246
loss: 0.9874274134635925,grad_norm: 0.9999990236296967, iteration: 184247
loss: 0.9964793920516968,grad_norm: 0.9188398581925871, iteration: 184248
loss: 0.957619845867157,grad_norm: 0.999998976030804, iteration: 184249
loss: 0.9863482117652893,grad_norm: 0.9999996128660744, iteration: 184250
loss: 1.035919427871704,grad_norm: 0.9999993417294494, iteration: 184251
loss: 0.9978996515274048,grad_norm: 0.9999990463373918, iteration: 184252
loss: 0.9797582626342773,grad_norm: 0.9999991994393777, iteration: 184253
loss: 1.049757957458496,grad_norm: 0.9999991758755195, iteration: 184254
loss: 1.0556138753890991,grad_norm: 0.9999992057189854, iteration: 184255
loss: 1.0169650316238403,grad_norm: 0.9951295600637228, iteration: 184256
loss: 1.0051308870315552,grad_norm: 0.9760595093080663, iteration: 184257
loss: 0.9957817792892456,grad_norm: 0.8240770095534868, iteration: 184258
loss: 0.9731084108352661,grad_norm: 0.8719874976787118, iteration: 184259
loss: 1.0017037391662598,grad_norm: 0.9218271932949315, iteration: 184260
loss: 0.9351003170013428,grad_norm: 0.9655362848994916, iteration: 184261
loss: 1.013587236404419,grad_norm: 0.9409684817332726, iteration: 184262
loss: 0.9706212878227234,grad_norm: 0.839541128011779, iteration: 184263
loss: 0.986482560634613,grad_norm: 0.8337957908096095, iteration: 184264
loss: 1.0074265003204346,grad_norm: 0.9999994206629129, iteration: 184265
loss: 1.0087246894836426,grad_norm: 0.9604290705922115, iteration: 184266
loss: 1.0378179550170898,grad_norm: 0.9999995104164883, iteration: 184267
loss: 1.010500192642212,grad_norm: 0.9999996784180555, iteration: 184268
loss: 0.9583069682121277,grad_norm: 0.8838800881345873, iteration: 184269
loss: 1.1007473468780518,grad_norm: 0.9999992093913845, iteration: 184270
loss: 0.9562181830406189,grad_norm: 0.9610392154604812, iteration: 184271
loss: 0.9911525845527649,grad_norm: 0.9999992649164349, iteration: 184272
loss: 0.9846836924552917,grad_norm: 0.9633793688783645, iteration: 184273
loss: 0.9930269718170166,grad_norm: 0.8862970539238461, iteration: 184274
loss: 0.9900814294815063,grad_norm: 0.9101142972081828, iteration: 184275
loss: 0.9914659261703491,grad_norm: 0.9352639845147548, iteration: 184276
loss: 1.0083112716674805,grad_norm: 0.8983985240477772, iteration: 184277
loss: 0.9757798314094543,grad_norm: 0.9999991113832619, iteration: 184278
loss: 1.024824619293213,grad_norm: 0.9999990087629871, iteration: 184279
loss: 0.9813745021820068,grad_norm: 0.7654576711244336, iteration: 184280
loss: 0.9765576720237732,grad_norm: 0.8732997881377761, iteration: 184281
loss: 1.0422873497009277,grad_norm: 0.9999997554509952, iteration: 184282
loss: 0.95851069688797,grad_norm: 0.9355079343323437, iteration: 184283
loss: 1.0085088014602661,grad_norm: 0.9318150338876008, iteration: 184284
loss: 1.0241036415100098,grad_norm: 0.8041368839237668, iteration: 184285
loss: 0.9769521355628967,grad_norm: 0.9999989607502461, iteration: 184286
loss: 1.0127002000808716,grad_norm: 0.9999990840923466, iteration: 184287
loss: 1.0226709842681885,grad_norm: 0.9559995688002072, iteration: 184288
loss: 0.9621289372444153,grad_norm: 0.9999991862810517, iteration: 184289
loss: 1.019574761390686,grad_norm: 0.9999998021912216, iteration: 184290
loss: 0.9734271168708801,grad_norm: 0.9826425217681554, iteration: 184291
loss: 0.9711546301841736,grad_norm: 0.9907764325498634, iteration: 184292
loss: 0.9840009808540344,grad_norm: 0.9999989829128193, iteration: 184293
loss: 0.97833651304245,grad_norm: 0.9268993351153879, iteration: 184294
loss: 1.0141726732254028,grad_norm: 0.9999995494905654, iteration: 184295
loss: 1.020670771598816,grad_norm: 0.9999997761861528, iteration: 184296
loss: 0.9952571988105774,grad_norm: 0.9999991981253878, iteration: 184297
loss: 0.998991847038269,grad_norm: 0.9385859611051854, iteration: 184298
loss: 1.01744544506073,grad_norm: 0.986570430669644, iteration: 184299
loss: 0.962030827999115,grad_norm: 0.9638977699643648, iteration: 184300
loss: 1.0135232210159302,grad_norm: 0.9675290613813639, iteration: 184301
loss: 1.0209283828735352,grad_norm: 0.992717668828527, iteration: 184302
loss: 0.9830010533332825,grad_norm: 0.9999990168893311, iteration: 184303
loss: 0.9905720353126526,grad_norm: 0.9220628850999489, iteration: 184304
loss: 1.009432077407837,grad_norm: 0.9999990134535344, iteration: 184305
loss: 0.957464337348938,grad_norm: 0.9999989485343806, iteration: 184306
loss: 0.9547649621963501,grad_norm: 0.8756125020248592, iteration: 184307
loss: 1.0314347743988037,grad_norm: 0.9999992800627674, iteration: 184308
loss: 0.9927915334701538,grad_norm: 0.9917507263564952, iteration: 184309
loss: 1.0015535354614258,grad_norm: 0.9563587487245412, iteration: 184310
loss: 0.9773247241973877,grad_norm: 0.8754344334374098, iteration: 184311
loss: 1.0114734172821045,grad_norm: 0.8215456006138012, iteration: 184312
loss: 1.0732964277267456,grad_norm: 0.9999990979404045, iteration: 184313
loss: 1.0120354890823364,grad_norm: 0.999999106965587, iteration: 184314
loss: 1.0153018236160278,grad_norm: 0.8879800102379406, iteration: 184315
loss: 0.9906854033470154,grad_norm: 0.9499507795884214, iteration: 184316
loss: 0.993768036365509,grad_norm: 0.9624927449699419, iteration: 184317
loss: 0.9928454160690308,grad_norm: 0.8503908293059961, iteration: 184318
loss: 0.9954012632369995,grad_norm: 0.999999169149588, iteration: 184319
loss: 0.9781860709190369,grad_norm: 0.8333833919057761, iteration: 184320
loss: 1.0061581134796143,grad_norm: 0.9999993978438676, iteration: 184321
loss: 0.9992501735687256,grad_norm: 0.9999990563906513, iteration: 184322
loss: 0.9831966161727905,grad_norm: 0.9999990483241042, iteration: 184323
loss: 0.9872679114341736,grad_norm: 0.9999990731495455, iteration: 184324
loss: 1.0006917715072632,grad_norm: 0.882906710956588, iteration: 184325
loss: 1.013073205947876,grad_norm: 0.8658406031509941, iteration: 184326
loss: 1.0053406953811646,grad_norm: 0.7901332555787601, iteration: 184327
loss: 1.007539987564087,grad_norm: 0.999999147098597, iteration: 184328
loss: 1.0004910230636597,grad_norm: 0.951788424223689, iteration: 184329
loss: 0.9287785291671753,grad_norm: 0.9246201962918568, iteration: 184330
loss: 1.0378772020339966,grad_norm: 0.9999993616960339, iteration: 184331
loss: 1.0085442066192627,grad_norm: 0.9999990162723855, iteration: 184332
loss: 0.9912607073783875,grad_norm: 0.8710156481885255, iteration: 184333
loss: 1.0011287927627563,grad_norm: 0.8309110750094266, iteration: 184334
loss: 0.9937149286270142,grad_norm: 0.9999989738286621, iteration: 184335
loss: 0.9682942628860474,grad_norm: 0.8638148432991212, iteration: 184336
loss: 0.9984740018844604,grad_norm: 0.9687707879504265, iteration: 184337
loss: 0.9897872805595398,grad_norm: 0.8151050989050365, iteration: 184338
loss: 0.9888510704040527,grad_norm: 0.9421166885041052, iteration: 184339
loss: 1.030085563659668,grad_norm: 0.9302319207103689, iteration: 184340
loss: 1.0001344680786133,grad_norm: 0.8382118872746116, iteration: 184341
loss: 1.0017483234405518,grad_norm: 0.8976425529907848, iteration: 184342
loss: 0.9927290081977844,grad_norm: 0.9999992536561365, iteration: 184343
loss: 1.0235872268676758,grad_norm: 0.9999990659011532, iteration: 184344
loss: 1.0817641019821167,grad_norm: 0.9930042288721364, iteration: 184345
loss: 0.9930318593978882,grad_norm: 0.9999992978744998, iteration: 184346
loss: 0.9726016521453857,grad_norm: 0.9999990703317758, iteration: 184347
loss: 0.9695918560028076,grad_norm: 0.8494145716235242, iteration: 184348
loss: 1.0033491849899292,grad_norm: 0.9999991125646521, iteration: 184349
loss: 1.0345863103866577,grad_norm: 0.999999009864869, iteration: 184350
loss: 1.0439170598983765,grad_norm: 0.9999996559817805, iteration: 184351
loss: 1.0125091075897217,grad_norm: 0.9999990369580688, iteration: 184352
loss: 1.0004990100860596,grad_norm: 0.8783103247901684, iteration: 184353
loss: 1.0567435026168823,grad_norm: 0.9999990751268898, iteration: 184354
loss: 1.1596263647079468,grad_norm: 0.9999993789790868, iteration: 184355
loss: 1.021382451057434,grad_norm: 0.95307358640767, iteration: 184356
loss: 1.0471807718276978,grad_norm: 0.9999990493180001, iteration: 184357
loss: 0.9641671776771545,grad_norm: 0.9787278078636504, iteration: 184358
loss: 1.0986967086791992,grad_norm: 0.931761506153198, iteration: 184359
loss: 1.094552993774414,grad_norm: 0.9564819052798814, iteration: 184360
loss: 1.030724287033081,grad_norm: 0.9999999914025491, iteration: 184361
loss: 1.0759563446044922,grad_norm: 0.9999991690219696, iteration: 184362
loss: 1.250161051750183,grad_norm: 0.9999998764102808, iteration: 184363
loss: 0.9982049465179443,grad_norm: 0.8270912056892272, iteration: 184364
loss: 1.1381548643112183,grad_norm: 0.9999990786720376, iteration: 184365
loss: 1.0907378196716309,grad_norm: 0.9999989545831165, iteration: 184366
loss: 0.9781918525695801,grad_norm: 0.8643321576729825, iteration: 184367
loss: 1.1003327369689941,grad_norm: 0.9999990543944574, iteration: 184368
loss: 1.0008140802383423,grad_norm: 0.9241465568188869, iteration: 184369
loss: 1.0254360437393188,grad_norm: 0.9247868260878311, iteration: 184370
loss: 0.9659280776977539,grad_norm: 0.9999991009327438, iteration: 184371
loss: 1.0065066814422607,grad_norm: 0.9648887447417813, iteration: 184372
loss: 1.1380670070648193,grad_norm: 0.9999991602463772, iteration: 184373
loss: 0.9791619777679443,grad_norm: 0.9999991496153223, iteration: 184374
loss: 1.2712321281433105,grad_norm: 0.9999990133609095, iteration: 184375
loss: 1.0737742185592651,grad_norm: 0.9999998159681224, iteration: 184376
loss: 1.1382107734680176,grad_norm: 0.9999998272741113, iteration: 184377
loss: 0.9526535272598267,grad_norm: 0.9076764761176503, iteration: 184378
loss: 1.1287662982940674,grad_norm: 0.999999760139201, iteration: 184379
loss: 0.9804782867431641,grad_norm: 0.9799263787331484, iteration: 184380
loss: 1.010204792022705,grad_norm: 0.9985023756710255, iteration: 184381
loss: 0.9911857843399048,grad_norm: 0.8572060373603043, iteration: 184382
loss: 0.9738981127738953,grad_norm: 0.9663339789800789, iteration: 184383
loss: 1.0430189371109009,grad_norm: 0.890881502961652, iteration: 184384
loss: 0.9981746077537537,grad_norm: 0.9999991637508826, iteration: 184385
loss: 0.9884877800941467,grad_norm: 0.9999993024155367, iteration: 184386
loss: 1.0045416355133057,grad_norm: 0.999999630666112, iteration: 184387
loss: 1.0170077085494995,grad_norm: 0.9999990019596817, iteration: 184388
loss: 1.0059943199157715,grad_norm: 0.9999990920400171, iteration: 184389
loss: 1.0104329586029053,grad_norm: 0.9121943005295159, iteration: 184390
loss: 0.9864155650138855,grad_norm: 0.9086002653807168, iteration: 184391
loss: 1.0222954750061035,grad_norm: 0.8598187833490413, iteration: 184392
loss: 1.0063914060592651,grad_norm: 0.8756074691553216, iteration: 184393
loss: 1.0280591249465942,grad_norm: 0.9688659225523719, iteration: 184394
loss: 1.0043374300003052,grad_norm: 0.9450948321099766, iteration: 184395
loss: 1.0723445415496826,grad_norm: 0.8902037926855617, iteration: 184396
loss: 0.9960594773292542,grad_norm: 0.9033342216094612, iteration: 184397
loss: 0.9898691773414612,grad_norm: 0.9508035819605998, iteration: 184398
loss: 0.9711352586746216,grad_norm: 0.8867726274758855, iteration: 184399
loss: 1.0776981115341187,grad_norm: 0.9999997726421371, iteration: 184400
loss: 1.0464705228805542,grad_norm: 0.9821332260363062, iteration: 184401
loss: 0.9852865934371948,grad_norm: 0.9352312858526582, iteration: 184402
loss: 0.9744867086410522,grad_norm: 0.9999992129805346, iteration: 184403
loss: 1.0472865104675293,grad_norm: 0.9999998584758282, iteration: 184404
loss: 1.0284501314163208,grad_norm: 0.9999994608376094, iteration: 184405
loss: 0.951041579246521,grad_norm: 0.9296648314967968, iteration: 184406
loss: 0.9989532232284546,grad_norm: 0.9197439955580133, iteration: 184407
loss: 1.050253987312317,grad_norm: 0.9999992219096535, iteration: 184408
loss: 1.0461443662643433,grad_norm: 1.0000000146925017, iteration: 184409
loss: 0.9770317077636719,grad_norm: 0.9999990709008103, iteration: 184410
loss: 1.0731550455093384,grad_norm: 0.9999990723978839, iteration: 184411
loss: 0.9874857664108276,grad_norm: 0.9999992204142327, iteration: 184412
loss: 1.0234838724136353,grad_norm: 0.9999990890300328, iteration: 184413
loss: 1.0664749145507812,grad_norm: 0.9505072501683545, iteration: 184414
loss: 0.9715756773948669,grad_norm: 0.999999092708399, iteration: 184415
loss: 1.1167227029800415,grad_norm: 0.9999992543682883, iteration: 184416
loss: 0.9963972568511963,grad_norm: 0.8602917730897331, iteration: 184417
loss: 0.9970664381980896,grad_norm: 0.9999991547828186, iteration: 184418
loss: 1.025746464729309,grad_norm: 0.9645188384679158, iteration: 184419
loss: 0.9548755288124084,grad_norm: 0.920247636344536, iteration: 184420
loss: 0.9572077393531799,grad_norm: 0.9712491458409094, iteration: 184421
loss: 0.9954659342765808,grad_norm: 0.8106280883300195, iteration: 184422
loss: 0.9870144128799438,grad_norm: 0.9999993374209779, iteration: 184423
loss: 1.0002942085266113,grad_norm: 0.9999992083960191, iteration: 184424
loss: 1.0302993059158325,grad_norm: 0.8896246945029909, iteration: 184425
loss: 0.9907442331314087,grad_norm: 0.9999994851277827, iteration: 184426
loss: 0.9871494770050049,grad_norm: 0.9471570070693431, iteration: 184427
loss: 0.9730974435806274,grad_norm: 0.9412264439460866, iteration: 184428
loss: 0.9933246374130249,grad_norm: 0.907967150977816, iteration: 184429
loss: 1.0609840154647827,grad_norm: 0.9999999196540167, iteration: 184430
loss: 0.9992440938949585,grad_norm: 0.9999990662965702, iteration: 184431
loss: 0.9677751660346985,grad_norm: 0.9999991602609208, iteration: 184432
loss: 1.019561767578125,grad_norm: 0.8921814174799452, iteration: 184433
loss: 0.9988167881965637,grad_norm: 0.9999991843298183, iteration: 184434
loss: 0.9858993291854858,grad_norm: 0.9369052881387686, iteration: 184435
loss: 0.9789469838142395,grad_norm: 0.9999990855289319, iteration: 184436
loss: 1.0228866338729858,grad_norm: 0.915456450890285, iteration: 184437
loss: 0.9920262694358826,grad_norm: 0.9999991039185785, iteration: 184438
loss: 1.001173734664917,grad_norm: 0.9999990540966457, iteration: 184439
loss: 0.9649248123168945,grad_norm: 0.8930919059160125, iteration: 184440
loss: 0.9797465801239014,grad_norm: 0.9966752589752983, iteration: 184441
loss: 0.9961795210838318,grad_norm: 0.9338863413250056, iteration: 184442
loss: 0.9973454475402832,grad_norm: 0.9999998288708157, iteration: 184443
loss: 0.9932512044906616,grad_norm: 0.999999039173304, iteration: 184444
loss: 1.041171669960022,grad_norm: 0.9999998433339186, iteration: 184445
loss: 0.9741694331169128,grad_norm: 0.9999989766974143, iteration: 184446
loss: 1.0443427562713623,grad_norm: 0.9270928762794128, iteration: 184447
loss: 0.9949120879173279,grad_norm: 0.8991353781404556, iteration: 184448
loss: 1.0112240314483643,grad_norm: 0.999999058607641, iteration: 184449
loss: 0.9798498749732971,grad_norm: 0.9999997362880203, iteration: 184450
loss: 1.0011348724365234,grad_norm: 0.8576455397048058, iteration: 184451
loss: 1.0623369216918945,grad_norm: 0.9999994002430987, iteration: 184452
loss: 1.028975486755371,grad_norm: 0.8979079069534731, iteration: 184453
loss: 0.9723044037818909,grad_norm: 0.9561476196961822, iteration: 184454
loss: 1.017214298248291,grad_norm: 0.9821485022312001, iteration: 184455
loss: 1.011404275894165,grad_norm: 0.9999991361982931, iteration: 184456
loss: 1.033652663230896,grad_norm: 0.9999994853396418, iteration: 184457
loss: 0.9922008514404297,grad_norm: 0.8930431589928839, iteration: 184458
loss: 0.9955426454544067,grad_norm: 0.999999132934696, iteration: 184459
loss: 0.9556993246078491,grad_norm: 0.999998977776481, iteration: 184460
loss: 0.9863740801811218,grad_norm: 0.9999991444746322, iteration: 184461
loss: 0.9788119792938232,grad_norm: 0.9809317044776975, iteration: 184462
loss: 1.029167890548706,grad_norm: 0.8893368372713756, iteration: 184463
loss: 0.9757539629936218,grad_norm: 0.9999990258962207, iteration: 184464
loss: 1.0278733968734741,grad_norm: 0.8321622542355276, iteration: 184465
loss: 0.9949138164520264,grad_norm: 0.9161862559560989, iteration: 184466
loss: 1.019933819770813,grad_norm: 0.9999991116908862, iteration: 184467
loss: 0.9918031096458435,grad_norm: 0.8805937397021811, iteration: 184468
loss: 0.9992550611495972,grad_norm: 0.898946102846155, iteration: 184469
loss: 1.0064060688018799,grad_norm: 0.9999990281510664, iteration: 184470
loss: 0.9911892414093018,grad_norm: 0.9820415066594727, iteration: 184471
loss: 0.9730933308601379,grad_norm: 0.9999989309744902, iteration: 184472
loss: 1.0334384441375732,grad_norm: 0.8861113481225479, iteration: 184473
loss: 1.002076268196106,grad_norm: 0.8532558601874923, iteration: 184474
loss: 0.9981523752212524,grad_norm: 0.9175253853868536, iteration: 184475
loss: 1.012279987335205,grad_norm: 0.9736938597094212, iteration: 184476
loss: 1.0114359855651855,grad_norm: 0.930388132081281, iteration: 184477
loss: 0.9928568601608276,grad_norm: 0.9999991967645638, iteration: 184478
loss: 0.9932379126548767,grad_norm: 0.9999992604559925, iteration: 184479
loss: 1.017032504081726,grad_norm: 0.9999991401099441, iteration: 184480
loss: 1.0176180601119995,grad_norm: 0.9999998124927063, iteration: 184481
loss: 1.2429488897323608,grad_norm: 0.9999994842846246, iteration: 184482
loss: 1.0221047401428223,grad_norm: 0.9999988848725561, iteration: 184483
loss: 1.0122016668319702,grad_norm: 0.9999991302588624, iteration: 184484
loss: 1.0608469247817993,grad_norm: 0.959233772274927, iteration: 184485
loss: 1.0176784992218018,grad_norm: 0.9040095558376479, iteration: 184486
loss: 0.978027880191803,grad_norm: 0.9999990590700678, iteration: 184487
loss: 1.0230997800827026,grad_norm: 0.9999991494634842, iteration: 184488
loss: 0.9439896941184998,grad_norm: 0.9999990568783825, iteration: 184489
loss: 1.1058306694030762,grad_norm: 0.999999370049468, iteration: 184490
loss: 1.0100491046905518,grad_norm: 0.9967118316298126, iteration: 184491
loss: 0.9809410572052002,grad_norm: 0.9627339894298158, iteration: 184492
loss: 0.9814103245735168,grad_norm: 0.8407866921335918, iteration: 184493
loss: 0.9963824152946472,grad_norm: 0.9999992477412771, iteration: 184494
loss: 0.9744917154312134,grad_norm: 0.999999113670589, iteration: 184495
loss: 1.0020345449447632,grad_norm: 0.9999993266486824, iteration: 184496
loss: 1.0031598806381226,grad_norm: 0.9999990806966577, iteration: 184497
loss: 1.0187374353408813,grad_norm: 0.9892682454182168, iteration: 184498
loss: 1.0230329036712646,grad_norm: 0.9999990358062987, iteration: 184499
loss: 0.9871492385864258,grad_norm: 0.9180158845646302, iteration: 184500
loss: 1.0102204084396362,grad_norm: 0.9999991834371832, iteration: 184501
loss: 0.9886043667793274,grad_norm: 0.8437798800355372, iteration: 184502
loss: 0.9999625086784363,grad_norm: 0.9124599254157592, iteration: 184503
loss: 1.0273189544677734,grad_norm: 0.9999991635001398, iteration: 184504
loss: 1.0064219236373901,grad_norm: 0.9999992099882127, iteration: 184505
loss: 0.9885736107826233,grad_norm: 0.8569530792337339, iteration: 184506
loss: 1.0089926719665527,grad_norm: 0.8859745478635018, iteration: 184507
loss: 0.9878818392753601,grad_norm: 0.999999843546681, iteration: 184508
loss: 0.992466926574707,grad_norm: 0.9292416999254226, iteration: 184509
loss: 1.026107668876648,grad_norm: 0.9999993355014777, iteration: 184510
loss: 0.9718330502510071,grad_norm: 0.9865106586460199, iteration: 184511
loss: 0.9796736836433411,grad_norm: 0.9675085647706384, iteration: 184512
loss: 1.0274608135223389,grad_norm: 0.999999055770814, iteration: 184513
loss: 1.0028433799743652,grad_norm: 0.9999990236932007, iteration: 184514
loss: 0.9981749653816223,grad_norm: 0.9999990494906268, iteration: 184515
loss: 1.0093255043029785,grad_norm: 0.9999989883127326, iteration: 184516
loss: 0.9664178490638733,grad_norm: 0.87845618153149, iteration: 184517
loss: 0.9954262375831604,grad_norm: 0.9999992012530617, iteration: 184518
loss: 0.9974420666694641,grad_norm: 0.8554830031420161, iteration: 184519
loss: 0.9985952377319336,grad_norm: 0.9256415573866674, iteration: 184520
loss: 1.0048528909683228,grad_norm: 0.9999996543406144, iteration: 184521
loss: 1.0147902965545654,grad_norm: 0.8824221376781265, iteration: 184522
loss: 1.0284422636032104,grad_norm: 0.999999073261908, iteration: 184523
loss: 1.062546730041504,grad_norm: 0.9999994669111135, iteration: 184524
loss: 0.9997273087501526,grad_norm: 0.9999991765691558, iteration: 184525
loss: 0.9658542275428772,grad_norm: 0.9319936169543471, iteration: 184526
loss: 1.0262576341629028,grad_norm: 0.9999989858521782, iteration: 184527
loss: 1.0330535173416138,grad_norm: 0.8779191584841558, iteration: 184528
loss: 0.9991495013237,grad_norm: 0.9160060395832677, iteration: 184529
loss: 1.0100406408309937,grad_norm: 0.969251156574229, iteration: 184530
loss: 0.9669380784034729,grad_norm: 0.9999992205640044, iteration: 184531
loss: 1.0640132427215576,grad_norm: 0.9999999903765656, iteration: 184532
loss: 0.9997026324272156,grad_norm: 0.8452399149406338, iteration: 184533
loss: 1.0420979261398315,grad_norm: 0.9999995131808481, iteration: 184534
loss: 1.0109683275222778,grad_norm: 0.8615127712088168, iteration: 184535
loss: 0.9662626385688782,grad_norm: 0.999999087403671, iteration: 184536
loss: 0.9836610555648804,grad_norm: 0.8936194457445102, iteration: 184537
loss: 0.9938821196556091,grad_norm: 0.9999991219198023, iteration: 184538
loss: 0.9849196076393127,grad_norm: 0.9375892592144137, iteration: 184539
loss: 0.9497292041778564,grad_norm: 0.7963390467206874, iteration: 184540
loss: 1.0007719993591309,grad_norm: 0.9682363588491458, iteration: 184541
loss: 0.988499104976654,grad_norm: 0.9648178829345841, iteration: 184542
loss: 1.0869100093841553,grad_norm: 0.9999994237168387, iteration: 184543
loss: 0.981234610080719,grad_norm: 0.9254774550470629, iteration: 184544
loss: 1.0127917528152466,grad_norm: 0.9999992991257803, iteration: 184545
loss: 1.0077064037322998,grad_norm: 0.9999990148414989, iteration: 184546
loss: 1.0905101299285889,grad_norm: 0.9999991392322669, iteration: 184547
loss: 1.0210891962051392,grad_norm: 0.9999995076013142, iteration: 184548
loss: 0.9758701920509338,grad_norm: 0.7535565343737272, iteration: 184549
loss: 1.145495057106018,grad_norm: 0.9999995695156662, iteration: 184550
loss: 1.2058751583099365,grad_norm: 0.9999998830675086, iteration: 184551
loss: 0.9857803583145142,grad_norm: 0.9999996207824977, iteration: 184552
loss: 1.1566046476364136,grad_norm: 0.9999993337954477, iteration: 184553
loss: 0.9542790055274963,grad_norm: 0.9999991118105205, iteration: 184554
loss: 1.1300128698349,grad_norm: 0.9999996321898404, iteration: 184555
loss: 1.0460865497589111,grad_norm: 0.9999997573136645, iteration: 184556
loss: 1.1391055583953857,grad_norm: 0.9999998795417597, iteration: 184557
loss: 1.011744499206543,grad_norm: 0.9765400237069004, iteration: 184558
loss: 1.039831280708313,grad_norm: 0.9999994152176778, iteration: 184559
loss: 1.064908742904663,grad_norm: 0.9391341449920336, iteration: 184560
loss: 1.0731629133224487,grad_norm: 0.8725364762124515, iteration: 184561
loss: 1.1967788934707642,grad_norm: 0.9999995830495867, iteration: 184562
loss: 1.056886911392212,grad_norm: 0.9999995428165976, iteration: 184563
loss: 1.0347357988357544,grad_norm: 0.9865056534665121, iteration: 184564
loss: 1.0905712842941284,grad_norm: 0.9999996021198043, iteration: 184565
loss: 1.0113458633422852,grad_norm: 1.0000000328569478, iteration: 184566
loss: 1.0925147533416748,grad_norm: 0.9999989725311561, iteration: 184567
loss: 1.0344425439834595,grad_norm: 0.9999998016318551, iteration: 184568
loss: 1.1307042837142944,grad_norm: 0.9999997058727773, iteration: 184569
loss: 1.0459033250808716,grad_norm: 0.9999994129283464, iteration: 184570
loss: 1.275694489479065,grad_norm: 0.9999998653361865, iteration: 184571
loss: 1.0212920904159546,grad_norm: 0.9999991444094967, iteration: 184572
loss: 1.0477350950241089,grad_norm: 0.9999991337286616, iteration: 184573
loss: 1.2990241050720215,grad_norm: 0.9999998822277981, iteration: 184574
loss: 1.0779547691345215,grad_norm: 0.9999997746328062, iteration: 184575
loss: 1.0198150873184204,grad_norm: 0.923743846988983, iteration: 184576
loss: 1.1619224548339844,grad_norm: 0.9999994261061503, iteration: 184577
loss: 1.0854865312576294,grad_norm: 0.9999998338284483, iteration: 184578
loss: 1.099097490310669,grad_norm: 0.9999999315559343, iteration: 184579
loss: 1.3110979795455933,grad_norm: 0.9999996047615728, iteration: 184580
loss: 1.2385276556015015,grad_norm: 0.9999999800981421, iteration: 184581
loss: 1.1861884593963623,grad_norm: 0.9999995715917946, iteration: 184582
loss: 1.235452651977539,grad_norm: 0.9999997521300082, iteration: 184583
loss: 1.1640815734863281,grad_norm: 0.9999998603535479, iteration: 184584
loss: 1.2403228282928467,grad_norm: 0.9999996044435602, iteration: 184585
loss: 1.116690754890442,grad_norm: 0.9999992918484407, iteration: 184586
loss: 1.0398342609405518,grad_norm: 0.9999996168387064, iteration: 184587
loss: 1.1477116346359253,grad_norm: 0.9999994379397605, iteration: 184588
loss: 1.262171745300293,grad_norm: 0.9999998264986532, iteration: 184589
loss: 1.1598331928253174,grad_norm: 0.9999993859589416, iteration: 184590
loss: 1.0820821523666382,grad_norm: 0.9999992574800524, iteration: 184591
loss: 1.2229363918304443,grad_norm: 0.9999998262095422, iteration: 184592
loss: 1.1333131790161133,grad_norm: 0.9999991755118178, iteration: 184593
loss: 1.2084994316101074,grad_norm: 0.9999995611934436, iteration: 184594
loss: 1.3290493488311768,grad_norm: 0.9999997327551188, iteration: 184595
loss: 1.1301966905593872,grad_norm: 0.9999995417127966, iteration: 184596
loss: 1.219893455505371,grad_norm: 0.999999722127109, iteration: 184597
loss: 1.3836219310760498,grad_norm: 0.9999993814612538, iteration: 184598
loss: 1.4312058687210083,grad_norm: 0.9999998789941686, iteration: 184599
loss: 1.2986136674880981,grad_norm: 0.999999742129421, iteration: 184600
loss: 1.352140188217163,grad_norm: 0.9999998592015369, iteration: 184601
loss: 1.2466248273849487,grad_norm: 0.9999999428117183, iteration: 184602
loss: 1.3765757083892822,grad_norm: 1.000000003285828, iteration: 184603
loss: 1.553174614906311,grad_norm: 0.9999998852003088, iteration: 184604
loss: 1.3827589750289917,grad_norm: 0.9999997834632941, iteration: 184605
loss: 1.2202376127243042,grad_norm: 0.9999999241044966, iteration: 184606
loss: 1.3363916873931885,grad_norm: 0.9999998437237789, iteration: 184607
loss: 1.493577241897583,grad_norm: 0.9999995898677771, iteration: 184608
loss: 1.4494788646697998,grad_norm: 0.9999997401930674, iteration: 184609
loss: 1.3227752447128296,grad_norm: 0.9999997263062188, iteration: 184610
loss: 1.5062063932418823,grad_norm: 0.9999998187819553, iteration: 184611
loss: 1.6138334274291992,grad_norm: 0.9999996556563596, iteration: 184612
loss: 1.2912956476211548,grad_norm: 0.9999998014062617, iteration: 184613
loss: 1.5250290632247925,grad_norm: 1.0000000313895718, iteration: 184614
loss: 1.6862882375717163,grad_norm: 0.9999998324176258, iteration: 184615
loss: 1.5331822633743286,grad_norm: 0.9999997706421899, iteration: 184616
loss: 1.4115467071533203,grad_norm: 0.999999772238671, iteration: 184617
loss: 1.6384763717651367,grad_norm: 0.9999997902655829, iteration: 184618
loss: 1.6085222959518433,grad_norm: 0.9999998245718711, iteration: 184619
loss: 1.6049203872680664,grad_norm: 0.9999999921176312, iteration: 184620
loss: 1.4931670427322388,grad_norm: 0.9999997139067889, iteration: 184621
loss: 1.3181143999099731,grad_norm: 0.999999507374828, iteration: 184622
loss: 1.5159847736358643,grad_norm: 0.9999998861381546, iteration: 184623
loss: 1.2669955492019653,grad_norm: 0.9999998393489512, iteration: 184624
loss: 1.4545435905456543,grad_norm: 0.9999998702170464, iteration: 184625
loss: 1.6797566413879395,grad_norm: 0.9999998568391736, iteration: 184626
loss: 1.5271987915039062,grad_norm: 1.000000033097956, iteration: 184627
loss: 1.4685940742492676,grad_norm: 0.9999999774006227, iteration: 184628
loss: 1.453019380569458,grad_norm: 0.9999997270025746, iteration: 184629
loss: 1.336290955543518,grad_norm: 0.9999998238342596, iteration: 184630
loss: 1.3401882648468018,grad_norm: 0.9999996073880524, iteration: 184631
loss: 1.6711480617523193,grad_norm: 0.999999795381707, iteration: 184632
loss: 1.589440107345581,grad_norm: 0.9999999153352662, iteration: 184633
loss: 1.6264760494232178,grad_norm: 0.9999998538741302, iteration: 184634
loss: 1.4579038619995117,grad_norm: 0.9999996726591475, iteration: 184635
loss: 1.854519009590149,grad_norm: 0.9999997757668232, iteration: 184636
loss: 1.5308945178985596,grad_norm: 0.9999994795010085, iteration: 184637
loss: 1.267151951789856,grad_norm: 0.9999994466282947, iteration: 184638
loss: 1.3198238611221313,grad_norm: 1.0000000243667548, iteration: 184639
loss: 1.4815133810043335,grad_norm: 0.999999717350688, iteration: 184640
loss: 1.4600647687911987,grad_norm: 0.9999995246399903, iteration: 184641
loss: 1.8334428071975708,grad_norm: 0.9999999217144628, iteration: 184642
loss: 1.5176852941513062,grad_norm: 0.9999998724727585, iteration: 184643
loss: 1.6382989883422852,grad_norm: 0.9999997397869225, iteration: 184644
loss: 1.6441627740859985,grad_norm: 1.000000026408418, iteration: 184645
loss: 1.4388413429260254,grad_norm: 0.999999761209646, iteration: 184646
loss: 1.4484513998031616,grad_norm: 0.99999954109076, iteration: 184647
loss: 1.5977981090545654,grad_norm: 0.999999855733727, iteration: 184648
loss: 1.5992268323898315,grad_norm: 0.9999997295171396, iteration: 184649
loss: 1.6560167074203491,grad_norm: 0.9999998658001066, iteration: 184650
loss: 1.5884547233581543,grad_norm: 1.0000000094986168, iteration: 184651
loss: 1.8279216289520264,grad_norm: 0.9999999505214142, iteration: 184652
loss: 1.6610841751098633,grad_norm: 0.9999998904813429, iteration: 184653
loss: 1.6722536087036133,grad_norm: 0.9999996596951127, iteration: 184654
loss: 1.5765525102615356,grad_norm: 0.9999999557996793, iteration: 184655
loss: 1.3076171875,grad_norm: 0.9999996961802187, iteration: 184656
loss: 1.6653389930725098,grad_norm: 0.9999997163771017, iteration: 184657
loss: 1.428575038909912,grad_norm: 0.999999835773115, iteration: 184658
loss: 1.534645915031433,grad_norm: 0.9999997946508757, iteration: 184659
loss: 1.5251387357711792,grad_norm: 0.9999998113522934, iteration: 184660
loss: 1.4541270732879639,grad_norm: 0.9999996048745029, iteration: 184661
loss: 1.3794307708740234,grad_norm: 0.9999998383781546, iteration: 184662
loss: 1.292327642440796,grad_norm: 0.999999824498763, iteration: 184663
loss: 1.5325088500976562,grad_norm: 0.9999998850601358, iteration: 184664
loss: 1.5976369380950928,grad_norm: 0.9999999554506115, iteration: 184665
loss: 1.5628485679626465,grad_norm: 0.9999997257102672, iteration: 184666
loss: 1.4278764724731445,grad_norm: 0.9999996934572613, iteration: 184667
loss: 1.4967200756072998,grad_norm: 0.9999997736511758, iteration: 184668
loss: 1.462142825126648,grad_norm: 0.9999996902041105, iteration: 184669
loss: 1.6508013010025024,grad_norm: 0.9999996550072696, iteration: 184670
loss: 1.1935712099075317,grad_norm: 0.9999994527206717, iteration: 184671
loss: 1.3701362609863281,grad_norm: 0.9999996233980987, iteration: 184672
loss: 1.4055439233779907,grad_norm: 0.9999998431811767, iteration: 184673
loss: 1.3175545930862427,grad_norm: 0.9999999189319125, iteration: 184674
loss: 1.4710427522659302,grad_norm: 1.0000000120191292, iteration: 184675
loss: 1.353137731552124,grad_norm: 1.00000004008507, iteration: 184676
loss: 1.3259143829345703,grad_norm: 0.9999995239580536, iteration: 184677
loss: 1.4592729806900024,grad_norm: 0.9999995736111188, iteration: 184678
loss: 1.3864226341247559,grad_norm: 1.0000000147596246, iteration: 184679
loss: 1.2529622316360474,grad_norm: 0.9999998756534981, iteration: 184680
loss: 1.2391812801361084,grad_norm: 0.9999997339496657, iteration: 184681
loss: 1.1692166328430176,grad_norm: 0.9999996218191949, iteration: 184682
loss: 1.2133413553237915,grad_norm: 0.9999998729935962, iteration: 184683
loss: 1.297129511833191,grad_norm: 0.9999995643554932, iteration: 184684
loss: 1.2342495918273926,grad_norm: 0.9999997887640651, iteration: 184685
loss: 1.2719147205352783,grad_norm: 0.9999996551868561, iteration: 184686
loss: 1.2894037961959839,grad_norm: 0.9999998771382149, iteration: 184687
loss: 1.3174586296081543,grad_norm: 0.9999998647924765, iteration: 184688
loss: 1.2160985469818115,grad_norm: 0.9999996518820671, iteration: 184689
loss: 1.1487493515014648,grad_norm: 0.9999994746408791, iteration: 184690
loss: 1.2897281646728516,grad_norm: 0.9999997859483901, iteration: 184691
loss: 1.2995816469192505,grad_norm: 0.9999997278703691, iteration: 184692
loss: 1.1105263233184814,grad_norm: 0.9999995384247308, iteration: 184693
loss: 1.1650161743164062,grad_norm: 0.9999998484098493, iteration: 184694
loss: 1.0911359786987305,grad_norm: 0.9999995569079291, iteration: 184695
loss: 1.1501692533493042,grad_norm: 0.9999993014335065, iteration: 184696
loss: 1.0934617519378662,grad_norm: 0.9999992838182507, iteration: 184697
loss: 1.0180983543395996,grad_norm: 0.9999990089855932, iteration: 184698
loss: 1.0892032384872437,grad_norm: 0.9999993234773082, iteration: 184699
loss: 1.1132359504699707,grad_norm: 0.9999999857274665, iteration: 184700
loss: 1.0626217126846313,grad_norm: 0.999999491073783, iteration: 184701
loss: 1.0753260850906372,grad_norm: 0.999999732974663, iteration: 184702
loss: 0.996634304523468,grad_norm: 0.999999167202666, iteration: 184703
loss: 1.141749382019043,grad_norm: 0.9999997170436525, iteration: 184704
loss: 1.0835542678833008,grad_norm: 0.999999813529164, iteration: 184705
loss: 1.0177420377731323,grad_norm: 0.9999992938791585, iteration: 184706
loss: 1.0319859981536865,grad_norm: 0.986658533923244, iteration: 184707
loss: 1.0961971282958984,grad_norm: 0.9999998659264596, iteration: 184708
loss: 1.0691792964935303,grad_norm: 0.9999995855318008, iteration: 184709
loss: 1.0430982112884521,grad_norm: 0.9999992808413698, iteration: 184710
loss: 1.0957332849502563,grad_norm: 0.9999998010775631, iteration: 184711
loss: 1.1068987846374512,grad_norm: 0.9999994663656301, iteration: 184712
loss: 0.999492347240448,grad_norm: 0.9999993300344912, iteration: 184713
loss: 1.0309265851974487,grad_norm: 0.9999991902382138, iteration: 184714
loss: 1.1379342079162598,grad_norm: 0.9999998143711823, iteration: 184715
loss: 1.08138108253479,grad_norm: 0.9009948812935833, iteration: 184716
loss: 1.01132333278656,grad_norm: 0.999999294647983, iteration: 184717
loss: 1.0342411994934082,grad_norm: 0.9999991611298524, iteration: 184718
loss: 1.0299664735794067,grad_norm: 0.9999995419720219, iteration: 184719
loss: 1.0676883459091187,grad_norm: 0.9980122620585612, iteration: 184720
loss: 1.0291565656661987,grad_norm: 0.9999999081503275, iteration: 184721
loss: 1.0722616910934448,grad_norm: 0.999999270504064, iteration: 184722
loss: 1.0196715593338013,grad_norm: 0.9999991955681977, iteration: 184723
loss: 1.0758321285247803,grad_norm: 0.9999999557701101, iteration: 184724
loss: 1.0284866094589233,grad_norm: 0.881547217110088, iteration: 184725
loss: 1.037392497062683,grad_norm: 0.9999991093472621, iteration: 184726
loss: 1.0193780660629272,grad_norm: 0.999999469624089, iteration: 184727
loss: 0.9588466286659241,grad_norm: 0.9999992592433083, iteration: 184728
loss: 0.9989637732505798,grad_norm: 0.9999991303929067, iteration: 184729
loss: 1.0256853103637695,grad_norm: 0.9999993649863119, iteration: 184730
loss: 1.0110656023025513,grad_norm: 0.9999990203434193, iteration: 184731
loss: 1.1610878705978394,grad_norm: 0.9999995354039305, iteration: 184732
loss: 1.0766152143478394,grad_norm: 0.99999946451935, iteration: 184733
loss: 1.041577696800232,grad_norm: 0.9999991725583736, iteration: 184734
loss: 1.0159786939620972,grad_norm: 0.99959524191289, iteration: 184735
loss: 1.0035295486450195,grad_norm: 0.9999992385406924, iteration: 184736
loss: 1.0108225345611572,grad_norm: 0.9999993995418348, iteration: 184737
loss: 1.0508525371551514,grad_norm: 0.9999993764857691, iteration: 184738
loss: 1.0472235679626465,grad_norm: 0.9999994406815952, iteration: 184739
loss: 0.9896660447120667,grad_norm: 0.9999992175476686, iteration: 184740
loss: 1.0371935367584229,grad_norm: 0.9999998583293767, iteration: 184741
loss: 1.0196518898010254,grad_norm: 0.9999994721747854, iteration: 184742
loss: 1.2267706394195557,grad_norm: 0.9999997020779631, iteration: 184743
loss: 1.0238016843795776,grad_norm: 0.9999991717125511, iteration: 184744
loss: 0.9760330319404602,grad_norm: 0.8631248330496735, iteration: 184745
loss: 1.1480381488800049,grad_norm: 0.9999999341812612, iteration: 184746
loss: 1.1017476320266724,grad_norm: 0.9999998767665988, iteration: 184747
loss: 0.9838908314704895,grad_norm: 0.8782256918858338, iteration: 184748
loss: 0.9746992588043213,grad_norm: 0.9999991579034011, iteration: 184749
loss: 1.0189615488052368,grad_norm: 0.9999997479496959, iteration: 184750
loss: 1.0321996212005615,grad_norm: 0.9475266313309405, iteration: 184751
loss: 1.078480839729309,grad_norm: 0.999999071991812, iteration: 184752
loss: 1.0229480266571045,grad_norm: 0.9999998555083724, iteration: 184753
loss: 1.0189129114151,grad_norm: 0.937279115927052, iteration: 184754
loss: 0.998355507850647,grad_norm: 0.848991291378337, iteration: 184755
loss: 1.056863784790039,grad_norm: 0.9999994311376742, iteration: 184756
loss: 1.0350779294967651,grad_norm: 0.9321068784512273, iteration: 184757
loss: 0.978240966796875,grad_norm: 0.9515004366960998, iteration: 184758
loss: 1.069990873336792,grad_norm: 0.9999998101393646, iteration: 184759
loss: 1.0139806270599365,grad_norm: 0.8826491290308182, iteration: 184760
loss: 1.0292880535125732,grad_norm: 0.9999993299718402, iteration: 184761
loss: 0.9743505716323853,grad_norm: 0.9999990339996446, iteration: 184762
loss: 1.0297926664352417,grad_norm: 0.9204432827891219, iteration: 184763
loss: 0.989656388759613,grad_norm: 0.8903344317593277, iteration: 184764
loss: 0.9740065336227417,grad_norm: 0.9828451351244506, iteration: 184765
loss: 0.9942179918289185,grad_norm: 0.9055523385613055, iteration: 184766
loss: 0.9932658672332764,grad_norm: 0.9999995045805118, iteration: 184767
loss: 1.009323000907898,grad_norm: 0.9999990680972788, iteration: 184768
loss: 0.9947384595870972,grad_norm: 0.9011230848234023, iteration: 184769
loss: 0.9886797666549683,grad_norm: 0.9999992867000911, iteration: 184770
loss: 1.0394623279571533,grad_norm: 0.9999997272964007, iteration: 184771
loss: 0.9768297076225281,grad_norm: 0.999999075213539, iteration: 184772
loss: 1.0028008222579956,grad_norm: 0.999999203684097, iteration: 184773
loss: 1.089077353477478,grad_norm: 0.9999992249501816, iteration: 184774
loss: 1.0306134223937988,grad_norm: 0.999999059552717, iteration: 184775
loss: 1.038584589958191,grad_norm: 0.9999994232240381, iteration: 184776
loss: 0.9439753890037537,grad_norm: 0.9999991066293036, iteration: 184777
loss: 1.0272328853607178,grad_norm: 0.9712787803626025, iteration: 184778
loss: 1.005144476890564,grad_norm: 0.9999990819681794, iteration: 184779
loss: 0.9879133701324463,grad_norm: 0.9379630591681566, iteration: 184780
loss: 0.9805994629859924,grad_norm: 0.9999990607533707, iteration: 184781
loss: 0.9909286499023438,grad_norm: 0.9474888407571, iteration: 184782
loss: 0.9807081818580627,grad_norm: 0.9021710597396246, iteration: 184783
loss: 0.9871901273727417,grad_norm: 0.804839291151041, iteration: 184784
loss: 0.9770780801773071,grad_norm: 0.9999999391972801, iteration: 184785
loss: 1.0439029932022095,grad_norm: 0.9999990953930449, iteration: 184786
loss: 1.0287599563598633,grad_norm: 0.999999070239486, iteration: 184787
loss: 1.0899040699005127,grad_norm: 0.9999992396808903, iteration: 184788
loss: 1.027590036392212,grad_norm: 0.9999998983661494, iteration: 184789
loss: 1.0077147483825684,grad_norm: 0.999999268298856, iteration: 184790
loss: 0.9860212802886963,grad_norm: 0.9999991079771365, iteration: 184791
loss: 1.0488324165344238,grad_norm: 0.9947632876039793, iteration: 184792
loss: 0.9987973570823669,grad_norm: 0.9999989888225752, iteration: 184793
loss: 1.030357837677002,grad_norm: 0.9999990194656199, iteration: 184794
loss: 1.0310739278793335,grad_norm: 0.9999991649126297, iteration: 184795
loss: 0.9831348657608032,grad_norm: 0.8750764893794052, iteration: 184796
loss: 0.9627214074134827,grad_norm: 0.8998351937397417, iteration: 184797
loss: 0.9886955618858337,grad_norm: 0.999999017614471, iteration: 184798
loss: 0.9913263916969299,grad_norm: 0.9999991627281851, iteration: 184799
loss: 1.0190459489822388,grad_norm: 0.9162932474831692, iteration: 184800
loss: 1.0054696798324585,grad_norm: 0.9190989855143855, iteration: 184801
loss: 0.9893789291381836,grad_norm: 0.9999990367751143, iteration: 184802
loss: 0.9978674650192261,grad_norm: 0.9884757938334184, iteration: 184803
loss: 1.0158039331436157,grad_norm: 0.9903464825219566, iteration: 184804
loss: 1.0115156173706055,grad_norm: 0.7977362278063808, iteration: 184805
loss: 1.0056004524230957,grad_norm: 0.8391831700024316, iteration: 184806
loss: 0.9719887971878052,grad_norm: 0.9999991906091507, iteration: 184807
loss: 0.9862520098686218,grad_norm: 0.9999990407995345, iteration: 184808
loss: 1.0283631086349487,grad_norm: 0.9394859792613578, iteration: 184809
loss: 1.0111225843429565,grad_norm: 0.9999995550091048, iteration: 184810
loss: 0.9974831938743591,grad_norm: 0.8826277723954682, iteration: 184811
loss: 1.0386377573013306,grad_norm: 0.9820637536779218, iteration: 184812
loss: 1.0014647245407104,grad_norm: 0.9411511108453977, iteration: 184813
loss: 0.9710879325866699,grad_norm: 0.9999991085285482, iteration: 184814
loss: 1.0437555313110352,grad_norm: 0.9999994893354472, iteration: 184815
loss: 0.9997038245201111,grad_norm: 0.9615469489659961, iteration: 184816
loss: 1.0179671049118042,grad_norm: 0.9872975654459365, iteration: 184817
loss: 1.0400031805038452,grad_norm: 0.9999990017374115, iteration: 184818
loss: 1.0439260005950928,grad_norm: 0.9999990282130595, iteration: 184819
loss: 1.1256787776947021,grad_norm: 0.9999993612881969, iteration: 184820
loss: 1.0230224132537842,grad_norm: 0.8305913666652838, iteration: 184821
loss: 1.0105469226837158,grad_norm: 0.9811602412061878, iteration: 184822
loss: 0.9959325194358826,grad_norm: 0.9999990793135375, iteration: 184823
loss: 1.069758415222168,grad_norm: 0.9999991488050636, iteration: 184824
loss: 1.0415549278259277,grad_norm: 0.8324110078793813, iteration: 184825
loss: 1.0186549425125122,grad_norm: 0.9297067648943204, iteration: 184826
loss: 0.9931269288063049,grad_norm: 0.8454207393166222, iteration: 184827
loss: 1.0853825807571411,grad_norm: 0.9813895490920067, iteration: 184828
loss: 0.9807032942771912,grad_norm: 0.9999990172361607, iteration: 184829
loss: 1.0925382375717163,grad_norm: 0.9999994579046987, iteration: 184830
loss: 1.0060977935791016,grad_norm: 0.9856879924601267, iteration: 184831
loss: 1.057485580444336,grad_norm: 0.9999993685949964, iteration: 184832
loss: 0.9863653182983398,grad_norm: 0.9999991912283546, iteration: 184833
loss: 0.9724522233009338,grad_norm: 0.9999992142817321, iteration: 184834
loss: 1.0150943994522095,grad_norm: 0.9999991638401204, iteration: 184835
loss: 1.0040080547332764,grad_norm: 0.9649367538485204, iteration: 184836
loss: 0.9904983639717102,grad_norm: 0.9999992406559693, iteration: 184837
loss: 1.0665901899337769,grad_norm: 0.9999992413019316, iteration: 184838
loss: 1.0156068801879883,grad_norm: 0.9909533399772923, iteration: 184839
loss: 0.9990487694740295,grad_norm: 0.9055890090542291, iteration: 184840
loss: 0.9898038506507874,grad_norm: 0.999999474515244, iteration: 184841
loss: 0.9664182066917419,grad_norm: 0.9999990521937006, iteration: 184842
loss: 1.0306508541107178,grad_norm: 0.9999992539780989, iteration: 184843
loss: 1.0263705253601074,grad_norm: 0.9058961791971035, iteration: 184844
loss: 0.9769412279129028,grad_norm: 0.9873044800780041, iteration: 184845
loss: 1.0230696201324463,grad_norm: 0.9999991594417283, iteration: 184846
loss: 1.051931381225586,grad_norm: 0.9999992731697348, iteration: 184847
loss: 1.0132986307144165,grad_norm: 0.9999990754654176, iteration: 184848
loss: 0.9946284294128418,grad_norm: 0.9462205387768927, iteration: 184849
loss: 1.015283226966858,grad_norm: 0.9946356694782085, iteration: 184850
loss: 0.9739890694618225,grad_norm: 0.9999992173875891, iteration: 184851
loss: 1.0071377754211426,grad_norm: 0.9999992348856694, iteration: 184852
loss: 0.9930390119552612,grad_norm: 0.9999992188097668, iteration: 184853
loss: 0.992336094379425,grad_norm: 0.933196091105171, iteration: 184854
loss: 1.0397207736968994,grad_norm: 0.9999991567265154, iteration: 184855
loss: 0.9960768818855286,grad_norm: 0.9999991556580878, iteration: 184856
loss: 0.9819815158843994,grad_norm: 0.9999991263787843, iteration: 184857
loss: 1.0438278913497925,grad_norm: 0.927334876297146, iteration: 184858
loss: 0.9924534559249878,grad_norm: 0.9557827900240566, iteration: 184859
loss: 0.9759650230407715,grad_norm: 0.999999209275279, iteration: 184860
loss: 0.9803534746170044,grad_norm: 0.9999991629723174, iteration: 184861
loss: 0.9570766687393188,grad_norm: 0.9164295454462701, iteration: 184862
loss: 1.0066808462142944,grad_norm: 0.962802340561959, iteration: 184863
loss: 0.9744922518730164,grad_norm: 0.8468604194438013, iteration: 184864
loss: 0.9983848929405212,grad_norm: 0.8856414026520167, iteration: 184865
loss: 0.9920361042022705,grad_norm: 0.9999997552536019, iteration: 184866
loss: 0.9740421772003174,grad_norm: 0.999999052412648, iteration: 184867
loss: 0.9865459203720093,grad_norm: 0.9623906150982717, iteration: 184868
loss: 0.9728866815567017,grad_norm: 0.9677934516783663, iteration: 184869
loss: 1.0933637619018555,grad_norm: 0.9999995094178836, iteration: 184870
loss: 1.0044690370559692,grad_norm: 0.9999991672408763, iteration: 184871
loss: 1.055689811706543,grad_norm: 0.9999991888157982, iteration: 184872
loss: 1.1118619441986084,grad_norm: 0.999999975026744, iteration: 184873
loss: 1.0345851182937622,grad_norm: 0.9999994596847017, iteration: 184874
loss: 1.0016907453536987,grad_norm: 0.9999989314345981, iteration: 184875
loss: 0.9825551509857178,grad_norm: 0.9468394760065336, iteration: 184876
loss: 1.0085289478302002,grad_norm: 0.999999125336418, iteration: 184877
loss: 0.9969542026519775,grad_norm: 0.9845723306125169, iteration: 184878
loss: 1.030515432357788,grad_norm: 0.9483184084937452, iteration: 184879
loss: 0.9920592308044434,grad_norm: 0.9999991744395336, iteration: 184880
loss: 0.9828818440437317,grad_norm: 0.915941206419703, iteration: 184881
loss: 1.0031627416610718,grad_norm: 0.9999991503895289, iteration: 184882
loss: 1.0271867513656616,grad_norm: 0.9999990403374291, iteration: 184883
loss: 1.0090025663375854,grad_norm: 0.9999991653794275, iteration: 184884
loss: 1.0243608951568604,grad_norm: 0.9156589176746877, iteration: 184885
loss: 0.9892933368682861,grad_norm: 0.99999916271428, iteration: 184886
loss: 1.019521951675415,grad_norm: 0.9375924155315043, iteration: 184887
loss: 0.9973598122596741,grad_norm: 0.9999993173910995, iteration: 184888
loss: 0.9627703428268433,grad_norm: 0.7827025083454697, iteration: 184889
loss: 0.9985311031341553,grad_norm: 0.9999990244989472, iteration: 184890
loss: 1.0937745571136475,grad_norm: 0.9999992951554737, iteration: 184891
loss: 0.9714329242706299,grad_norm: 0.9999990683568488, iteration: 184892
loss: 1.051658272743225,grad_norm: 0.9999998845530942, iteration: 184893
loss: 1.0991380214691162,grad_norm: 0.9999990953807602, iteration: 184894
loss: 0.9692193269729614,grad_norm: 0.9999990422136219, iteration: 184895
loss: 1.0202869176864624,grad_norm: 0.9999996925354899, iteration: 184896
loss: 1.0030535459518433,grad_norm: 0.9583216524155123, iteration: 184897
loss: 1.0259408950805664,grad_norm: 0.9999993032477267, iteration: 184898
loss: 0.9918574094772339,grad_norm: 0.9999989607894154, iteration: 184899
loss: 1.0357927083969116,grad_norm: 0.9999997252701228, iteration: 184900
loss: 1.1809005737304688,grad_norm: 0.9999990774311797, iteration: 184901
loss: 0.985578179359436,grad_norm: 0.9999990883315777, iteration: 184902
loss: 1.0319596529006958,grad_norm: 0.8011124620148514, iteration: 184903
loss: 1.0017426013946533,grad_norm: 0.942027398301618, iteration: 184904
loss: 1.0858131647109985,grad_norm: 0.9999992813387641, iteration: 184905
loss: 1.0227769613265991,grad_norm: 0.974178300886373, iteration: 184906
loss: 1.0428564548492432,grad_norm: 0.9999992733380115, iteration: 184907
loss: 0.9949170351028442,grad_norm: 0.9999992556675052, iteration: 184908
loss: 0.9929796457290649,grad_norm: 0.9209765462313116, iteration: 184909
loss: 0.9945846199989319,grad_norm: 0.872660227850739, iteration: 184910
loss: 1.0188696384429932,grad_norm: 0.9637582294753348, iteration: 184911
loss: 1.0631111860275269,grad_norm: 0.9285549700999072, iteration: 184912
loss: 0.9659692645072937,grad_norm: 0.9909337889043611, iteration: 184913
loss: 1.0126954317092896,grad_norm: 0.9435280409409343, iteration: 184914
loss: 0.977000892162323,grad_norm: 0.999999449278951, iteration: 184915
loss: 1.0206421613693237,grad_norm: 0.999999839786948, iteration: 184916
loss: 1.1095728874206543,grad_norm: 0.999999240857327, iteration: 184917
loss: 1.0715910196304321,grad_norm: 0.9999993539740478, iteration: 184918
loss: 1.0204179286956787,grad_norm: 0.9547493404642274, iteration: 184919
loss: 1.0141805410385132,grad_norm: 0.9999993204060081, iteration: 184920
loss: 0.9780349135398865,grad_norm: 0.9946859158561004, iteration: 184921
loss: 0.974584698677063,grad_norm: 0.9999992829929385, iteration: 184922
loss: 1.0382685661315918,grad_norm: 0.9999990526186227, iteration: 184923
loss: 0.9920645356178284,grad_norm: 0.9473907967258108, iteration: 184924
loss: 1.0100330114364624,grad_norm: 0.9999991745244279, iteration: 184925
loss: 0.9958815574645996,grad_norm: 0.9463123949868212, iteration: 184926
loss: 1.081690788269043,grad_norm: 0.9999994033646827, iteration: 184927
loss: 0.9878511428833008,grad_norm: 0.8911453049787655, iteration: 184928
loss: 0.982638418674469,grad_norm: 0.9999991299574342, iteration: 184929
loss: 0.9981572031974792,grad_norm: 0.9999991065524131, iteration: 184930
loss: 1.029166340827942,grad_norm: 0.9999991711324976, iteration: 184931
loss: 1.0481071472167969,grad_norm: 0.9999993352965343, iteration: 184932
loss: 0.9852768778800964,grad_norm: 0.9046119918362869, iteration: 184933
loss: 1.0456804037094116,grad_norm: 0.999999718786077, iteration: 184934
loss: 1.0300133228302002,grad_norm: 0.8423464004077459, iteration: 184935
loss: 1.1905124187469482,grad_norm: 0.9999995863712697, iteration: 184936
loss: 1.0002436637878418,grad_norm: 0.9670295887422707, iteration: 184937
loss: 1.0281587839126587,grad_norm: 0.9999992918329395, iteration: 184938
loss: 1.0211396217346191,grad_norm: 0.8876046283807552, iteration: 184939
loss: 1.0627435445785522,grad_norm: 0.9999991440166953, iteration: 184940
loss: 1.0134555101394653,grad_norm: 0.9687976114298273, iteration: 184941
loss: 1.0061209201812744,grad_norm: 0.9999996679864467, iteration: 184942
loss: 1.000255823135376,grad_norm: 0.8463735298407181, iteration: 184943
loss: 1.0249884128570557,grad_norm: 0.837005690817828, iteration: 184944
loss: 0.996393084526062,grad_norm: 0.894835695940109, iteration: 184945
loss: 1.0115829706192017,grad_norm: 0.9888437841087837, iteration: 184946
loss: 1.0184704065322876,grad_norm: 0.9272443470468601, iteration: 184947
loss: 0.9727942943572998,grad_norm: 0.9241479535502802, iteration: 184948
loss: 0.9722837209701538,grad_norm: 0.7778529349467597, iteration: 184949
loss: 1.0362882614135742,grad_norm: 0.9999997156263579, iteration: 184950
loss: 0.999423086643219,grad_norm: 0.9999992986229481, iteration: 184951
loss: 1.0064152479171753,grad_norm: 0.7162928557436906, iteration: 184952
loss: 1.014697790145874,grad_norm: 0.7646348478937879, iteration: 184953
loss: 1.0205583572387695,grad_norm: 0.9999993766178845, iteration: 184954
loss: 1.0361735820770264,grad_norm: 0.9999993046947664, iteration: 184955
loss: 0.987147331237793,grad_norm: 0.9452033575419087, iteration: 184956
loss: 0.9901434779167175,grad_norm: 0.9557110906911735, iteration: 184957
loss: 1.0787014961242676,grad_norm: 0.9999993419946392, iteration: 184958
loss: 1.0468456745147705,grad_norm: 0.9999996116713981, iteration: 184959
loss: 1.0378955602645874,grad_norm: 0.9999991982021799, iteration: 184960
loss: 1.0040358304977417,grad_norm: 0.9689380118800772, iteration: 184961
loss: 1.0840102434158325,grad_norm: 0.9999999905246552, iteration: 184962
loss: 1.0732675790786743,grad_norm: 0.9999996428694021, iteration: 184963
loss: 0.9998111128807068,grad_norm: 0.8921160392892885, iteration: 184964
loss: 1.1427360773086548,grad_norm: 0.9999993254150312, iteration: 184965
loss: 1.0896756649017334,grad_norm: 0.9999991248307508, iteration: 184966
loss: 1.0281705856323242,grad_norm: 0.9587019399349711, iteration: 184967
loss: 1.1319350004196167,grad_norm: 0.9999999896086507, iteration: 184968
loss: 1.0088104009628296,grad_norm: 0.9943434835349946, iteration: 184969
loss: 1.0303853750228882,grad_norm: 0.9999992240025958, iteration: 184970
loss: 1.0452940464019775,grad_norm: 0.9999990176216755, iteration: 184971
loss: 1.0531760454177856,grad_norm: 0.9999992646296602, iteration: 184972
loss: 0.9921121001243591,grad_norm: 0.9999990578695197, iteration: 184973
loss: 1.0056287050247192,grad_norm: 0.999999032465651, iteration: 184974
loss: 0.9943104386329651,grad_norm: 0.7902912526067422, iteration: 184975
loss: 1.053539514541626,grad_norm: 0.9999994348722612, iteration: 184976
loss: 1.0024330615997314,grad_norm: 0.7914715895600833, iteration: 184977
loss: 1.054065227508545,grad_norm: 0.9999992118820682, iteration: 184978
loss: 1.0699970722198486,grad_norm: 0.9166292064726578, iteration: 184979
loss: 1.0536558628082275,grad_norm: 0.9999996036804639, iteration: 184980
loss: 1.0456594228744507,grad_norm: 0.9999990144969001, iteration: 184981
loss: 0.9798029661178589,grad_norm: 0.9751183342087002, iteration: 184982
loss: 1.0049018859863281,grad_norm: 0.9999992522780123, iteration: 184983
loss: 0.9868385195732117,grad_norm: 0.9999991720518737, iteration: 184984
loss: 1.0014569759368896,grad_norm: 0.9999991017460012, iteration: 184985
loss: 0.9954870939254761,grad_norm: 0.9999992957027208, iteration: 184986
loss: 1.0034414529800415,grad_norm: 0.9999991587947957, iteration: 184987
loss: 1.0737483501434326,grad_norm: 0.969747186538003, iteration: 184988
loss: 1.015237808227539,grad_norm: 0.99999937412185, iteration: 184989
loss: 0.9556595087051392,grad_norm: 0.909651738737141, iteration: 184990
loss: 0.9939702153205872,grad_norm: 0.9999991721345868, iteration: 184991
loss: 1.0921927690505981,grad_norm: 0.9999996248151954, iteration: 184992
loss: 1.0741713047027588,grad_norm: 0.9999993120885102, iteration: 184993
loss: 1.0352959632873535,grad_norm: 0.9999993090205707, iteration: 184994
loss: 1.0501713752746582,grad_norm: 0.9999998053262006, iteration: 184995
loss: 1.0440540313720703,grad_norm: 0.9999998176452524, iteration: 184996
loss: 1.0143641233444214,grad_norm: 0.8425337436945888, iteration: 184997
loss: 0.9926887154579163,grad_norm: 0.8375915831167432, iteration: 184998
loss: 0.9877333045005798,grad_norm: 0.999999196600259, iteration: 184999
loss: 0.992325484752655,grad_norm: 0.9309881343424254, iteration: 185000
loss: 1.0134645700454712,grad_norm: 0.9999991260029856, iteration: 185001
loss: 0.9933469891548157,grad_norm: 0.9796677515279009, iteration: 185002
loss: 1.0293134450912476,grad_norm: 0.9999991186338858, iteration: 185003
loss: 1.02125883102417,grad_norm: 0.9999991783355839, iteration: 185004
loss: 0.9801427125930786,grad_norm: 0.9564234749002863, iteration: 185005
loss: 0.9930003881454468,grad_norm: 0.9598314611003157, iteration: 185006
loss: 1.0779451131820679,grad_norm: 0.9094053496406818, iteration: 185007
loss: 1.056061029434204,grad_norm: 0.9999995950239249, iteration: 185008
loss: 1.0001368522644043,grad_norm: 0.7918445863347003, iteration: 185009
loss: 0.9814847707748413,grad_norm: 0.9999988954999842, iteration: 185010
loss: 1.0096416473388672,grad_norm: 0.9078123019761627, iteration: 185011
loss: 0.9857878088951111,grad_norm: 0.9999989867712772, iteration: 185012
loss: 0.974107563495636,grad_norm: 0.8947175707218278, iteration: 185013
loss: 1.0040141344070435,grad_norm: 0.9999991626147663, iteration: 185014
loss: 0.9857423305511475,grad_norm: 0.9999989817941245, iteration: 185015
loss: 1.0278750658035278,grad_norm: 0.9894081640007136, iteration: 185016
loss: 1.029896855354309,grad_norm: 0.8667530841786131, iteration: 185017
loss: 0.9915332198143005,grad_norm: 0.8935556806349354, iteration: 185018
loss: 1.0953296422958374,grad_norm: 0.9999999192020811, iteration: 185019
loss: 1.0210622549057007,grad_norm: 0.9326669697572021, iteration: 185020
loss: 1.1257323026657104,grad_norm: 0.9999998391872914, iteration: 185021
loss: 1.0008975267410278,grad_norm: 0.9999991384367651, iteration: 185022
loss: 1.0451654195785522,grad_norm: 0.99999913220654, iteration: 185023
loss: 1.053894281387329,grad_norm: 0.9999994625239121, iteration: 185024
loss: 1.0091978311538696,grad_norm: 0.9999990183706473, iteration: 185025
loss: 1.032042384147644,grad_norm: 0.9999992419146436, iteration: 185026
loss: 1.0357486009597778,grad_norm: 0.9492271410065152, iteration: 185027
loss: 0.9928799271583557,grad_norm: 0.999999080825859, iteration: 185028
loss: 1.0954298973083496,grad_norm: 0.9533888295753946, iteration: 185029
loss: 1.0186742544174194,grad_norm: 0.9999991397524699, iteration: 185030
loss: 1.033361792564392,grad_norm: 0.9999991815034589, iteration: 185031
loss: 0.973969578742981,grad_norm: 0.8796509037914921, iteration: 185032
loss: 1.0376182794570923,grad_norm: 0.9999992901728979, iteration: 185033
loss: 1.0121251344680786,grad_norm: 0.9999991295144048, iteration: 185034
loss: 1.0223236083984375,grad_norm: 0.9999992692330344, iteration: 185035
loss: 0.9612255692481995,grad_norm: 0.8873638191740986, iteration: 185036
loss: 1.0119390487670898,grad_norm: 0.999999135418271, iteration: 185037
loss: 1.1033011674880981,grad_norm: 0.9999992680641668, iteration: 185038
loss: 1.1435091495513916,grad_norm: 0.9999991859959001, iteration: 185039
loss: 1.050065040588379,grad_norm: 0.9999994979342814, iteration: 185040
loss: 1.0033882856369019,grad_norm: 0.998255873086323, iteration: 185041
loss: 1.0037280321121216,grad_norm: 0.9369312423506465, iteration: 185042
loss: 1.0654628276824951,grad_norm: 0.9999994322850347, iteration: 185043
loss: 1.020456075668335,grad_norm: 0.9999991590496476, iteration: 185044
loss: 1.0036940574645996,grad_norm: 0.9061811836240747, iteration: 185045
loss: 1.0191320180892944,grad_norm: 0.9999990587830362, iteration: 185046
loss: 0.9950928688049316,grad_norm: 0.8876451893568585, iteration: 185047
loss: 1.0269602537155151,grad_norm: 0.9571946023069606, iteration: 185048
loss: 1.0159823894500732,grad_norm: 0.9223648062173901, iteration: 185049
loss: 0.9749042987823486,grad_norm: 0.8687232659762884, iteration: 185050
loss: 1.0050899982452393,grad_norm: 0.8508832343068413, iteration: 185051
loss: 1.016108751296997,grad_norm: 0.999999295991196, iteration: 185052
loss: 1.0301936864852905,grad_norm: 0.9999990156850482, iteration: 185053
loss: 0.992442786693573,grad_norm: 0.9999990694142749, iteration: 185054
loss: 1.0115464925765991,grad_norm: 0.9999991646479437, iteration: 185055
loss: 1.0465137958526611,grad_norm: 0.999999107992148, iteration: 185056
loss: 0.9779659509658813,grad_norm: 0.8600810495192, iteration: 185057
loss: 1.0121052265167236,grad_norm: 0.9999990527567797, iteration: 185058
loss: 0.970770537853241,grad_norm: 0.9476144370573331, iteration: 185059
loss: 0.9922109842300415,grad_norm: 0.9999992989103671, iteration: 185060
loss: 1.2595399618148804,grad_norm: 0.9999997660810868, iteration: 185061
loss: 0.9863945841789246,grad_norm: 0.9999992255404987, iteration: 185062
loss: 0.9858067035675049,grad_norm: 0.999999532561727, iteration: 185063
loss: 1.0096142292022705,grad_norm: 0.999998993583042, iteration: 185064
loss: 1.0164612531661987,grad_norm: 0.9679262518272727, iteration: 185065
loss: 0.9949573874473572,grad_norm: 0.9999990200312159, iteration: 185066
loss: 1.0104418992996216,grad_norm: 0.9999991895318733, iteration: 185067
loss: 1.0034904479980469,grad_norm: 0.9444686366159838, iteration: 185068
loss: 1.021755576133728,grad_norm: 0.9999990003972339, iteration: 185069
loss: 1.0079256296157837,grad_norm: 0.9077782548932882, iteration: 185070
loss: 1.0882806777954102,grad_norm: 0.9999999941313413, iteration: 185071
loss: 1.007505178451538,grad_norm: 0.9210178671529842, iteration: 185072
loss: 1.0102382898330688,grad_norm: 0.9999991804772649, iteration: 185073
loss: 0.9999556541442871,grad_norm: 0.9269821683234962, iteration: 185074
loss: 0.9696077704429626,grad_norm: 0.9707758447635574, iteration: 185075
loss: 1.025330662727356,grad_norm: 0.8920953862369887, iteration: 185076
loss: 0.9988918900489807,grad_norm: 0.9999990780524403, iteration: 185077
loss: 1.0078229904174805,grad_norm: 0.9999991564788596, iteration: 185078
loss: 1.0515573024749756,grad_norm: 0.9999990770106085, iteration: 185079
loss: 0.9871956706047058,grad_norm: 0.8673372705203687, iteration: 185080
loss: 1.0135585069656372,grad_norm: 0.9213367746365211, iteration: 185081
loss: 1.0103368759155273,grad_norm: 0.9999991667027746, iteration: 185082
loss: 1.1068432331085205,grad_norm: 1.0000000945447902, iteration: 185083
loss: 0.9859640598297119,grad_norm: 0.9031287014230741, iteration: 185084
loss: 0.9605211019515991,grad_norm: 0.9999991329580037, iteration: 185085
loss: 1.0761240720748901,grad_norm: 0.9999991423027649, iteration: 185086
loss: 0.989710807800293,grad_norm: 0.8826221910845922, iteration: 185087
loss: 1.0072405338287354,grad_norm: 0.9999991775566982, iteration: 185088
loss: 0.9938051700592041,grad_norm: 0.8187484397938025, iteration: 185089
loss: 1.002115249633789,grad_norm: 0.9999991651173238, iteration: 185090
loss: 0.9821701645851135,grad_norm: 0.8599123508501121, iteration: 185091
loss: 1.0381505489349365,grad_norm: 0.9999998317046527, iteration: 185092
loss: 0.9974693059921265,grad_norm: 0.9722065261985691, iteration: 185093
loss: 1.118347406387329,grad_norm: 0.9999997265922894, iteration: 185094
loss: 0.9644212126731873,grad_norm: 0.9999991568004766, iteration: 185095
loss: 0.9998518228530884,grad_norm: 0.9999997149926247, iteration: 185096
loss: 1.0081275701522827,grad_norm: 0.9999991345683655, iteration: 185097
loss: 0.9947406649589539,grad_norm: 0.9787794173709005, iteration: 185098
loss: 0.9476290941238403,grad_norm: 0.9999991376294032, iteration: 185099
loss: 1.0258064270019531,grad_norm: 0.9824467711115973, iteration: 185100
loss: 1.0233843326568604,grad_norm: 0.9446008101477673, iteration: 185101
loss: 1.020128607749939,grad_norm: 0.9999991950380641, iteration: 185102
loss: 1.0291376113891602,grad_norm: 0.9999993048757604, iteration: 185103
loss: 1.0345596075057983,grad_norm: 0.9999990234540458, iteration: 185104
loss: 1.0268081426620483,grad_norm: 0.9999993761938615, iteration: 185105
loss: 1.018326997756958,grad_norm: 0.8439065281216631, iteration: 185106
loss: 1.0231605768203735,grad_norm: 0.999998983076953, iteration: 185107
loss: 1.0249311923980713,grad_norm: 0.9179979059502444, iteration: 185108
loss: 0.9858684539794922,grad_norm: 0.9354641491678758, iteration: 185109
loss: 1.0987721681594849,grad_norm: 0.9999999302987077, iteration: 185110
loss: 0.9752593040466309,grad_norm: 0.9445862524587499, iteration: 185111
loss: 0.997563362121582,grad_norm: 0.9999992363031405, iteration: 185112
loss: 1.168521761894226,grad_norm: 0.9999996582050771, iteration: 185113
loss: 0.974677324295044,grad_norm: 0.9242508621339366, iteration: 185114
loss: 0.9942025542259216,grad_norm: 0.9765510738956744, iteration: 185115
loss: 1.0295401811599731,grad_norm: 0.999999315396611, iteration: 185116
loss: 0.9935342669487,grad_norm: 0.9429361147391487, iteration: 185117
loss: 0.9723549485206604,grad_norm: 0.9990028093494604, iteration: 185118
loss: 1.0207911729812622,grad_norm: 0.8910570586007349, iteration: 185119
loss: 1.0322293043136597,grad_norm: 0.9999992591109684, iteration: 185120
loss: 1.083699107170105,grad_norm: 0.9999999761016226, iteration: 185121
loss: 0.9929167628288269,grad_norm: 0.9999996804910478, iteration: 185122
loss: 1.021297574043274,grad_norm: 0.9999990846561059, iteration: 185123
loss: 1.0017949342727661,grad_norm: 0.9702083940166178, iteration: 185124
loss: 0.9937613010406494,grad_norm: 0.9999992846318246, iteration: 185125
loss: 1.018810749053955,grad_norm: 0.7988468705583787, iteration: 185126
loss: 0.964198648929596,grad_norm: 0.9999991046548189, iteration: 185127
loss: 1.0243542194366455,grad_norm: 0.9999991157438342, iteration: 185128
loss: 1.008763074874878,grad_norm: 0.8694062851085114, iteration: 185129
loss: 1.1836247444152832,grad_norm: 0.999999797597805, iteration: 185130
loss: 1.0610053539276123,grad_norm: 0.9999989074180695, iteration: 185131
loss: 1.0050978660583496,grad_norm: 0.9999990229071591, iteration: 185132
loss: 0.9864571690559387,grad_norm: 0.8710988528526448, iteration: 185133
loss: 1.0309873819351196,grad_norm: 0.9999991425304636, iteration: 185134
loss: 0.9759745001792908,grad_norm: 0.9754686303688517, iteration: 185135
loss: 0.9994072914123535,grad_norm: 0.986395628539826, iteration: 185136
loss: 1.007743000984192,grad_norm: 0.9999994296592297, iteration: 185137
loss: 1.0164475440979004,grad_norm: 0.9396809528488086, iteration: 185138
loss: 1.0592302083969116,grad_norm: 0.9999998931319792, iteration: 185139
loss: 0.9704792499542236,grad_norm: 0.918714452845325, iteration: 185140
loss: 1.0016008615493774,grad_norm: 0.9999992833266235, iteration: 185141
loss: 1.0312683582305908,grad_norm: 0.9566039016770941, iteration: 185142
loss: 1.0027891397476196,grad_norm: 0.971564751746573, iteration: 185143
loss: 1.04766845703125,grad_norm: 0.9999998699312448, iteration: 185144
loss: 0.9939984679222107,grad_norm: 0.9708755979377036, iteration: 185145
loss: 0.9948012828826904,grad_norm: 0.9166117558248308, iteration: 185146
loss: 0.9926109910011292,grad_norm: 0.999999077908003, iteration: 185147
loss: 1.0408765077590942,grad_norm: 0.9331028079325052, iteration: 185148
loss: 1.012913703918457,grad_norm: 0.9999990035230807, iteration: 185149
loss: 1.0066196918487549,grad_norm: 0.999999103875654, iteration: 185150
loss: 1.016402006149292,grad_norm: 0.9999998801533596, iteration: 185151
loss: 1.0249370336532593,grad_norm: 0.9999991064400164, iteration: 185152
loss: 0.9669828414916992,grad_norm: 0.8697601438918057, iteration: 185153
loss: 1.0310324430465698,grad_norm: 0.8981774179386219, iteration: 185154
loss: 0.9692109227180481,grad_norm: 0.9885463352641284, iteration: 185155
loss: 0.9907551407814026,grad_norm: 0.9999990538495465, iteration: 185156
loss: 1.02430260181427,grad_norm: 0.9999993689706926, iteration: 185157
loss: 0.997943103313446,grad_norm: 0.8325263876020103, iteration: 185158
loss: 1.0637582540512085,grad_norm: 0.9999991892693169, iteration: 185159
loss: 0.9965311884880066,grad_norm: 0.8521461429381815, iteration: 185160
loss: 1.013334035873413,grad_norm: 0.9398153454714286, iteration: 185161
loss: 1.0001765489578247,grad_norm: 0.9999989829728546, iteration: 185162
loss: 1.0102248191833496,grad_norm: 0.8412815152189945, iteration: 185163
loss: 1.0133707523345947,grad_norm: 0.999999139498736, iteration: 185164
loss: 0.9932058453559875,grad_norm: 0.9607470323653876, iteration: 185165
loss: 1.0188467502593994,grad_norm: 0.9999991673877072, iteration: 185166
loss: 1.0176101922988892,grad_norm: 0.8582956907041033, iteration: 185167
loss: 1.0403852462768555,grad_norm: 0.9999994236359845, iteration: 185168
loss: 1.05022132396698,grad_norm: 0.9999991043757736, iteration: 185169
loss: 1.050416111946106,grad_norm: 0.9999990130485479, iteration: 185170
loss: 1.030794620513916,grad_norm: 0.9745381800034674, iteration: 185171
loss: 1.0200074911117554,grad_norm: 0.9999989669620409, iteration: 185172
loss: 1.0067073106765747,grad_norm: 0.9482163861123508, iteration: 185173
loss: 1.0234216451644897,grad_norm: 0.966843741246909, iteration: 185174
loss: 1.0214976072311401,grad_norm: 0.9999991742096764, iteration: 185175
loss: 0.9777164459228516,grad_norm: 0.9999995115996282, iteration: 185176
loss: 0.9731888175010681,grad_norm: 0.999999153842543, iteration: 185177
loss: 0.9863203167915344,grad_norm: 0.9726565358829176, iteration: 185178
loss: 0.9942999482154846,grad_norm: 0.9691538969899147, iteration: 185179
loss: 0.9960193634033203,grad_norm: 0.9999990591235273, iteration: 185180
loss: 0.958432674407959,grad_norm: 0.9165520163623301, iteration: 185181
loss: 1.0684852600097656,grad_norm: 0.9692366169104555, iteration: 185182
loss: 1.034744143486023,grad_norm: 0.9999991547884326, iteration: 185183
loss: 0.951253354549408,grad_norm: 0.9999991232991017, iteration: 185184
loss: 0.975397527217865,grad_norm: 0.8157180583128, iteration: 185185
loss: 1.070747971534729,grad_norm: 0.9999999337418202, iteration: 185186
loss: 1.0122997760772705,grad_norm: 0.8904955120361879, iteration: 185187
loss: 1.000917911529541,grad_norm: 0.9999996499951014, iteration: 185188
loss: 1.0023276805877686,grad_norm: 0.9679568904174766, iteration: 185189
loss: 1.0014597177505493,grad_norm: 0.9999992488571408, iteration: 185190
loss: 1.0012791156768799,grad_norm: 0.8697691924411296, iteration: 185191
loss: 1.0150842666625977,grad_norm: 0.7803120048592697, iteration: 185192
loss: 1.0100609064102173,grad_norm: 0.8280829358199523, iteration: 185193
loss: 1.0028464794158936,grad_norm: 0.999999221815563, iteration: 185194
loss: 1.017846703529358,grad_norm: 0.9616791440214969, iteration: 185195
loss: 1.0075509548187256,grad_norm: 0.9178196379384062, iteration: 185196
loss: 1.0531951189041138,grad_norm: 0.9999993394599463, iteration: 185197
loss: 1.0101927518844604,grad_norm: 0.8890868992455166, iteration: 185198
loss: 1.0095607042312622,grad_norm: 0.9999992759945141, iteration: 185199
loss: 0.9976800680160522,grad_norm: 0.9342973462356284, iteration: 185200
loss: 0.9762938022613525,grad_norm: 0.9427199054057174, iteration: 185201
loss: 0.9569739699363708,grad_norm: 0.9310300643068877, iteration: 185202
loss: 1.0034303665161133,grad_norm: 0.8781946379740286, iteration: 185203
loss: 1.0168312788009644,grad_norm: 0.9829918383623768, iteration: 185204
loss: 1.017076015472412,grad_norm: 0.8879285321138036, iteration: 185205
loss: 0.9749221205711365,grad_norm: 0.7563156037346402, iteration: 185206
loss: 1.1237409114837646,grad_norm: 0.9999996443881332, iteration: 185207
loss: 0.9874746203422546,grad_norm: 0.9999990474920885, iteration: 185208
loss: 1.0083353519439697,grad_norm: 0.9238914389001942, iteration: 185209
loss: 1.0195910930633545,grad_norm: 0.9110673400043533, iteration: 185210
loss: 1.0246217250823975,grad_norm: 0.9999998652387657, iteration: 185211
loss: 0.9939801692962646,grad_norm: 0.9999991218934537, iteration: 185212
loss: 1.0179827213287354,grad_norm: 0.8126036270547268, iteration: 185213
loss: 1.0210157632827759,grad_norm: 0.9999990274424171, iteration: 185214
loss: 1.0229450464248657,grad_norm: 0.99999921725582, iteration: 185215
loss: 1.0363144874572754,grad_norm: 0.9999991162438486, iteration: 185216
loss: 1.0190907716751099,grad_norm: 0.9173535797058069, iteration: 185217
loss: 0.9675836563110352,grad_norm: 0.9780508055516919, iteration: 185218
loss: 0.9545524716377258,grad_norm: 0.9110059171642478, iteration: 185219
loss: 0.9785808324813843,grad_norm: 0.9858504488403288, iteration: 185220
loss: 1.011260986328125,grad_norm: 0.9999989867508674, iteration: 185221
loss: 1.043471336364746,grad_norm: 0.9999996366591637, iteration: 185222
loss: 0.9671719074249268,grad_norm: 0.9891128573037198, iteration: 185223
loss: 1.0460915565490723,grad_norm: 0.9999995718339607, iteration: 185224
loss: 0.9904163479804993,grad_norm: 0.9497781673923584, iteration: 185225
loss: 0.9835980534553528,grad_norm: 0.9999990656316594, iteration: 185226
loss: 1.0298025608062744,grad_norm: 0.9999990897149806, iteration: 185227
loss: 1.0050442218780518,grad_norm: 0.9999990783409437, iteration: 185228
loss: 0.9880338907241821,grad_norm: 0.9953143314532272, iteration: 185229
loss: 1.0380738973617554,grad_norm: 0.9999991607613491, iteration: 185230
loss: 0.9979852437973022,grad_norm: 0.9885972765927159, iteration: 185231
loss: 0.9947543740272522,grad_norm: 0.9070483450019455, iteration: 185232
loss: 1.0267398357391357,grad_norm: 0.9309630997074937, iteration: 185233
loss: 1.0177552700042725,grad_norm: 0.9999990300924615, iteration: 185234
loss: 1.0563429594039917,grad_norm: 0.9999999532926781, iteration: 185235
loss: 1.0161746740341187,grad_norm: 0.8023846107172036, iteration: 185236
loss: 1.00690758228302,grad_norm: 0.9836351473839391, iteration: 185237
loss: 1.003456950187683,grad_norm: 0.9468061605011558, iteration: 185238
loss: 1.0127766132354736,grad_norm: 0.9999990232451527, iteration: 185239
loss: 1.0287986993789673,grad_norm: 0.8013214530307351, iteration: 185240
loss: 1.0254477262496948,grad_norm: 0.9999990378119183, iteration: 185241
loss: 1.0080986022949219,grad_norm: 0.9999996780576708, iteration: 185242
loss: 0.9968701004981995,grad_norm: 0.9999989848527518, iteration: 185243
loss: 0.9951981902122498,grad_norm: 0.9999992686631368, iteration: 185244
loss: 1.00723135471344,grad_norm: 0.9999989655565007, iteration: 185245
loss: 1.0006060600280762,grad_norm: 0.9999990512313771, iteration: 185246
loss: 1.0145317316055298,grad_norm: 0.9907607683621901, iteration: 185247
loss: 1.0029981136322021,grad_norm: 0.999999211294806, iteration: 185248
loss: 1.0053818225860596,grad_norm: 0.9999992175694469, iteration: 185249
loss: 0.9902172684669495,grad_norm: 0.8813035535081924, iteration: 185250
loss: 0.9874559640884399,grad_norm: 0.9999990218923449, iteration: 185251
loss: 1.0082827806472778,grad_norm: 0.9999990894939521, iteration: 185252
loss: 0.9975863695144653,grad_norm: 0.9999991044725682, iteration: 185253
loss: 0.9825535416603088,grad_norm: 0.9999990813646739, iteration: 185254
loss: 0.9964949488639832,grad_norm: 0.9999991475186361, iteration: 185255
loss: 1.1784344911575317,grad_norm: 0.9999995422676063, iteration: 185256
loss: 1.0878928899765015,grad_norm: 0.999999529163777, iteration: 185257
loss: 1.0770498514175415,grad_norm: 0.9999992033268635, iteration: 185258
loss: 0.9955763816833496,grad_norm: 0.8604017812278338, iteration: 185259
loss: 0.9616549611091614,grad_norm: 0.9732709334121674, iteration: 185260
loss: 1.0140010118484497,grad_norm: 0.9999991478261926, iteration: 185261
loss: 1.055116891860962,grad_norm: 0.9999991565527542, iteration: 185262
loss: 0.9802292585372925,grad_norm: 0.999999146000589, iteration: 185263
loss: 0.9645342230796814,grad_norm: 0.999999155280936, iteration: 185264
loss: 1.1065917015075684,grad_norm: 0.999999153801962, iteration: 185265
loss: 1.0001676082611084,grad_norm: 0.8033574431351843, iteration: 185266
loss: 1.065329670906067,grad_norm: 0.9241850319372166, iteration: 185267
loss: 1.232670545578003,grad_norm: 0.9999995935379654, iteration: 185268
loss: 1.048022747039795,grad_norm: 0.9999995653926302, iteration: 185269
loss: 1.1236743927001953,grad_norm: 1.0000000071517754, iteration: 185270
loss: 0.9727898836135864,grad_norm: 0.8889882356948066, iteration: 185271
loss: 1.0688331127166748,grad_norm: 0.8489566129043075, iteration: 185272
loss: 1.0032795667648315,grad_norm: 0.8484733595026047, iteration: 185273
loss: 0.946226179599762,grad_norm: 0.9801030018975732, iteration: 185274
loss: 0.9906706809997559,grad_norm: 0.92717041519678, iteration: 185275
loss: 0.9955679178237915,grad_norm: 0.9945251340582549, iteration: 185276
loss: 1.0756734609603882,grad_norm: 0.9999990881339555, iteration: 185277
loss: 1.1212011575698853,grad_norm: 0.999999195815562, iteration: 185278
loss: 1.0603666305541992,grad_norm: 0.9441646799363085, iteration: 185279
loss: 1.0082628726959229,grad_norm: 0.8968506347284003, iteration: 185280
loss: 1.0188103914260864,grad_norm: 0.9999991445716019, iteration: 185281
loss: 0.9950239062309265,grad_norm: 0.9365164964308983, iteration: 185282
loss: 1.012821912765503,grad_norm: 0.8457086807312272, iteration: 185283
loss: 1.0088032484054565,grad_norm: 0.9999995890832915, iteration: 185284
loss: 1.004496455192566,grad_norm: 0.9292969512262736, iteration: 185285
loss: 1.0028167963027954,grad_norm: 0.9679146453881612, iteration: 185286
loss: 1.0336823463439941,grad_norm: 0.9214172810859504, iteration: 185287
loss: 1.0148063898086548,grad_norm: 0.9999992032586458, iteration: 185288
loss: 0.985230028629303,grad_norm: 0.9363869471076636, iteration: 185289
loss: 1.0112017393112183,grad_norm: 0.9999991773222658, iteration: 185290
loss: 0.9810777306556702,grad_norm: 0.9179916950499535, iteration: 185291
loss: 0.9612426161766052,grad_norm: 0.9999990656697961, iteration: 185292
loss: 1.0321389436721802,grad_norm: 0.9085874524297649, iteration: 185293
loss: 1.0569862127304077,grad_norm: 0.9999991004571563, iteration: 185294
loss: 0.9882024526596069,grad_norm: 0.9953497831329174, iteration: 185295
loss: 1.0497496128082275,grad_norm: 0.9999998304432924, iteration: 185296
loss: 1.0092016458511353,grad_norm: 0.958708633909505, iteration: 185297
loss: 1.0078647136688232,grad_norm: 0.9824677970349521, iteration: 185298
loss: 1.0636563301086426,grad_norm: 0.9923081832944278, iteration: 185299
loss: 0.9570233225822449,grad_norm: 0.8771216228685007, iteration: 185300
loss: 0.989998459815979,grad_norm: 0.9079809391993823, iteration: 185301
loss: 1.015058159828186,grad_norm: 0.9999990539773911, iteration: 185302
loss: 1.036149263381958,grad_norm: 0.9957641361754904, iteration: 185303
loss: 1.0270236730575562,grad_norm: 0.892928323742231, iteration: 185304
loss: 1.0163240432739258,grad_norm: 0.9999991097014883, iteration: 185305
loss: 0.9980741739273071,grad_norm: 0.8031200761247361, iteration: 185306
loss: 1.0381895303726196,grad_norm: 0.9820351523092915, iteration: 185307
loss: 1.0169084072113037,grad_norm: 0.9432440206076972, iteration: 185308
loss: 1.0001401901245117,grad_norm: 0.8039023019248246, iteration: 185309
loss: 1.0256156921386719,grad_norm: 0.8721754190508413, iteration: 185310
loss: 0.9669752717018127,grad_norm: 0.9453737896897478, iteration: 185311
loss: 1.017591953277588,grad_norm: 0.9999991413532195, iteration: 185312
loss: 1.0099971294403076,grad_norm: 0.9999994327535655, iteration: 185313
loss: 0.9337154030799866,grad_norm: 0.9999991581665858, iteration: 185314
loss: 1.0032199621200562,grad_norm: 0.9999991209396091, iteration: 185315
loss: 1.0038925409317017,grad_norm: 0.9098974379877759, iteration: 185316
loss: 1.009536862373352,grad_norm: 0.9365416410880326, iteration: 185317
loss: 1.021508812904358,grad_norm: 0.9411062609059361, iteration: 185318
loss: 1.049837350845337,grad_norm: 0.8879998757717951, iteration: 185319
loss: 1.1059986352920532,grad_norm: 0.9999990147696297, iteration: 185320
loss: 1.0270335674285889,grad_norm: 0.999999011601803, iteration: 185321
loss: 0.9872682094573975,grad_norm: 0.9999990360260345, iteration: 185322
loss: 0.9955174326896667,grad_norm: 0.999999205783024, iteration: 185323
loss: 0.9931436777114868,grad_norm: 0.9999991666306816, iteration: 185324
loss: 1.02829110622406,grad_norm: 0.8779598657973968, iteration: 185325
loss: 1.0245743989944458,grad_norm: 0.8997941230354377, iteration: 185326
loss: 1.0205254554748535,grad_norm: 0.901305287582902, iteration: 185327
loss: 0.9964208602905273,grad_norm: 0.9999995070419744, iteration: 185328
loss: 1.0078213214874268,grad_norm: 0.9261135019655721, iteration: 185329
loss: 1.0132642984390259,grad_norm: 0.9999989591119828, iteration: 185330
loss: 1.039762258529663,grad_norm: 0.9999991950313604, iteration: 185331
loss: 0.9649553298950195,grad_norm: 0.9065729659075237, iteration: 185332
loss: 1.1155706644058228,grad_norm: 0.9999996986019849, iteration: 185333
loss: 0.9792150855064392,grad_norm: 0.9783257921649396, iteration: 185334
loss: 0.9735828042030334,grad_norm: 0.945938459483112, iteration: 185335
loss: 0.9919008612632751,grad_norm: 0.999999083690925, iteration: 185336
loss: 0.986861526966095,grad_norm: 0.8556365205268878, iteration: 185337
loss: 1.001007318496704,grad_norm: 0.9999994837624312, iteration: 185338
loss: 0.9920719861984253,grad_norm: 0.9999991110345788, iteration: 185339
loss: 1.0163227319717407,grad_norm: 0.9735119489103876, iteration: 185340
loss: 1.0133103132247925,grad_norm: 0.999999198432022, iteration: 185341
loss: 1.015128254890442,grad_norm: 0.999999329851183, iteration: 185342
loss: 1.0571273565292358,grad_norm: 0.9999994572783184, iteration: 185343
loss: 1.022568702697754,grad_norm: 0.9086850356100562, iteration: 185344
loss: 1.004189133644104,grad_norm: 0.8287729084416571, iteration: 185345
loss: 0.9870298504829407,grad_norm: 0.8094652269011521, iteration: 185346
loss: 1.0306415557861328,grad_norm: 0.999998932044441, iteration: 185347
loss: 1.0044201612472534,grad_norm: 0.8578060909096442, iteration: 185348
loss: 0.989785373210907,grad_norm: 0.9999990999533486, iteration: 185349
loss: 1.0269395112991333,grad_norm: 0.9999997049614329, iteration: 185350
loss: 1.0328365564346313,grad_norm: 0.8807350230620203, iteration: 185351
loss: 0.993822455406189,grad_norm: 0.9251693561194958, iteration: 185352
loss: 0.9871863722801208,grad_norm: 0.9124163292233317, iteration: 185353
loss: 1.0044430494308472,grad_norm: 0.9325951472998819, iteration: 185354
loss: 0.9879016876220703,grad_norm: 0.9999993026152431, iteration: 185355
loss: 1.0226547718048096,grad_norm: 0.8890994795507202, iteration: 185356
loss: 0.9569475054740906,grad_norm: 0.9032454521119214, iteration: 185357
loss: 0.9983677864074707,grad_norm: 0.9999989348204019, iteration: 185358
loss: 0.9662901759147644,grad_norm: 0.862136118769122, iteration: 185359
loss: 0.998340904712677,grad_norm: 0.9999993995811931, iteration: 185360
loss: 1.1302698850631714,grad_norm: 0.9999998800230113, iteration: 185361
loss: 1.0234012603759766,grad_norm: 0.8889212866718664, iteration: 185362
loss: 1.0005857944488525,grad_norm: 0.8562007388765114, iteration: 185363
loss: 1.0097087621688843,grad_norm: 0.9999993062836737, iteration: 185364
loss: 1.0354269742965698,grad_norm: 0.8909835716777542, iteration: 185365
loss: 0.9621663689613342,grad_norm: 0.8931599692448577, iteration: 185366
loss: 1.1281883716583252,grad_norm: 0.9999990519247652, iteration: 185367
loss: 1.0228312015533447,grad_norm: 0.9999991204247002, iteration: 185368
loss: 0.997370183467865,grad_norm: 0.999998965091395, iteration: 185369
loss: 1.0374696254730225,grad_norm: 0.9992404952122316, iteration: 185370
loss: 0.9636926651000977,grad_norm: 0.9865975695263508, iteration: 185371
loss: 1.0014500617980957,grad_norm: 0.9999992127269383, iteration: 185372
loss: 1.0028430223464966,grad_norm: 0.9999989435085047, iteration: 185373
loss: 1.01189124584198,grad_norm: 0.9999992086715481, iteration: 185374
loss: 0.9660352468490601,grad_norm: 0.9461809711870721, iteration: 185375
loss: 1.0182130336761475,grad_norm: 0.8974044056395654, iteration: 185376
loss: 1.0341458320617676,grad_norm: 0.999999087788869, iteration: 185377
loss: 0.9865525364875793,grad_norm: 0.991264667169047, iteration: 185378
loss: 1.0024876594543457,grad_norm: 0.9999991055471517, iteration: 185379
loss: 0.977888286113739,grad_norm: 0.8418588454191118, iteration: 185380
loss: 0.9599052667617798,grad_norm: 0.9050399476550962, iteration: 185381
loss: 1.0333918333053589,grad_norm: 0.9621107589736189, iteration: 185382
loss: 1.0547808408737183,grad_norm: 0.9301620258452455, iteration: 185383
loss: 0.9951764345169067,grad_norm: 0.9999991585077214, iteration: 185384
loss: 0.9776564836502075,grad_norm: 0.9999990395988749, iteration: 185385
loss: 1.1478763818740845,grad_norm: 0.9999993017172064, iteration: 185386
loss: 1.006198525428772,grad_norm: 0.9671390984076305, iteration: 185387
loss: 1.0014758110046387,grad_norm: 0.99999910595919, iteration: 185388
loss: 1.0002716779708862,grad_norm: 0.9999991144431323, iteration: 185389
loss: 1.0322744846343994,grad_norm: 0.897146521324799, iteration: 185390
loss: 0.9970802664756775,grad_norm: 0.9999992548624943, iteration: 185391
loss: 1.017431616783142,grad_norm: 0.999999113985732, iteration: 185392
loss: 0.9724685549736023,grad_norm: 0.9999990850802838, iteration: 185393
loss: 0.9902723431587219,grad_norm: 0.9761254138920874, iteration: 185394
loss: 1.0091642141342163,grad_norm: 0.959312223535801, iteration: 185395
loss: 1.0215823650360107,grad_norm: 0.9771879698446865, iteration: 185396
loss: 0.9817764759063721,grad_norm: 0.9999990365836797, iteration: 185397
loss: 0.987656831741333,grad_norm: 0.9999989370622167, iteration: 185398
loss: 1.0355541706085205,grad_norm: 0.99999914101382, iteration: 185399
loss: 0.9799709320068359,grad_norm: 0.9241109814288069, iteration: 185400
loss: 0.9571871161460876,grad_norm: 0.9999991990180424, iteration: 185401
loss: 1.0169436931610107,grad_norm: 0.9476160537702762, iteration: 185402
loss: 1.0038599967956543,grad_norm: 0.9717866966660551, iteration: 185403
loss: 0.9933890700340271,grad_norm: 0.9999991608356348, iteration: 185404
loss: 0.9786242842674255,grad_norm: 0.9739644488713449, iteration: 185405
loss: 0.9782300591468811,grad_norm: 0.854153555331334, iteration: 185406
loss: 0.966285228729248,grad_norm: 0.9162958470499797, iteration: 185407
loss: 1.0092267990112305,grad_norm: 0.9999992916206513, iteration: 185408
loss: 1.0282282829284668,grad_norm: 0.9999996281736814, iteration: 185409
loss: 1.001184344291687,grad_norm: 0.966727393791993, iteration: 185410
loss: 1.000675082206726,grad_norm: 0.8888577158180747, iteration: 185411
loss: 0.9865953326225281,grad_norm: 0.9031602738536451, iteration: 185412
loss: 0.9840093851089478,grad_norm: 0.9715194392317997, iteration: 185413
loss: 1.0040011405944824,grad_norm: 0.9999991246556975, iteration: 185414
loss: 0.9969248175621033,grad_norm: 0.8466483073395766, iteration: 185415
loss: 0.9849689602851868,grad_norm: 0.9999992505506408, iteration: 185416
loss: 0.9833860993385315,grad_norm: 0.9999991198669851, iteration: 185417
loss: 1.0259722471237183,grad_norm: 0.8732724796588116, iteration: 185418
loss: 1.0240592956542969,grad_norm: 0.9163074939089917, iteration: 185419
loss: 0.9846708178520203,grad_norm: 0.728165960495034, iteration: 185420
loss: 0.9895681738853455,grad_norm: 0.9517166059948856, iteration: 185421
loss: 1.0124831199645996,grad_norm: 0.9999993367844772, iteration: 185422
loss: 0.9978775382041931,grad_norm: 0.9999991346447016, iteration: 185423
loss: 1.0237705707550049,grad_norm: 0.9999995135760565, iteration: 185424
loss: 0.9548574090003967,grad_norm: 0.8349864905452203, iteration: 185425
loss: 1.0056424140930176,grad_norm: 0.9338881037231989, iteration: 185426
loss: 0.9714338183403015,grad_norm: 0.9084598310825409, iteration: 185427
loss: 0.9994391798973083,grad_norm: 0.9999992567218798, iteration: 185428
loss: 1.090752124786377,grad_norm: 0.8587858469130134, iteration: 185429
loss: 0.980829656124115,grad_norm: 0.9316984970068308, iteration: 185430
loss: 0.9966829419136047,grad_norm: 0.8684401550465214, iteration: 185431
loss: 1.0365586280822754,grad_norm: 0.8794789078200373, iteration: 185432
loss: 1.0017826557159424,grad_norm: 0.9999990752940323, iteration: 185433
loss: 1.0266153812408447,grad_norm: 0.8871459533076881, iteration: 185434
loss: 0.9902712106704712,grad_norm: 0.9999989787239308, iteration: 185435
loss: 1.0107314586639404,grad_norm: 0.9999992361753254, iteration: 185436
loss: 1.1583937406539917,grad_norm: 0.9999997714978476, iteration: 185437
loss: 0.9745472073554993,grad_norm: 0.9869527042073948, iteration: 185438
loss: 0.9933940768241882,grad_norm: 0.9999989426258787, iteration: 185439
loss: 1.0561254024505615,grad_norm: 0.9999996731271471, iteration: 185440
loss: 0.9951761364936829,grad_norm: 0.999999178555562, iteration: 185441
loss: 1.078786849975586,grad_norm: 0.9639389524245193, iteration: 185442
loss: 1.0238521099090576,grad_norm: 0.8968695507447474, iteration: 185443
loss: 1.132393479347229,grad_norm: 0.9510466559070158, iteration: 185444
loss: 0.9938787221908569,grad_norm: 0.9864520317900901, iteration: 185445
loss: 0.9791694283485413,grad_norm: 0.9364315950959093, iteration: 185446
loss: 0.9806342720985413,grad_norm: 0.9999990888721875, iteration: 185447
loss: 0.9829772114753723,grad_norm: 0.9823398685653213, iteration: 185448
loss: 0.9993667006492615,grad_norm: 0.8359319997619201, iteration: 185449
loss: 0.9850962162017822,grad_norm: 0.9999989108267989, iteration: 185450
loss: 1.0024769306182861,grad_norm: 0.9612427979115789, iteration: 185451
loss: 0.9820951223373413,grad_norm: 0.9999991523271003, iteration: 185452
loss: 1.0013415813446045,grad_norm: 0.9160222720771202, iteration: 185453
loss: 0.9807944297790527,grad_norm: 0.8885883818279834, iteration: 185454
loss: 0.9796203970909119,grad_norm: 0.9483224896622416, iteration: 185455
loss: 1.0331300497055054,grad_norm: 0.954381523808655, iteration: 185456
loss: 0.9688780903816223,grad_norm: 0.9999990440325216, iteration: 185457
loss: 1.0117920637130737,grad_norm: 0.9999991964718169, iteration: 185458
loss: 1.0073727369308472,grad_norm: 0.9999992429063789, iteration: 185459
loss: 0.9919891357421875,grad_norm: 0.9799133414073714, iteration: 185460
loss: 1.0028568506240845,grad_norm: 0.9999990908744372, iteration: 185461
loss: 0.9497144818305969,grad_norm: 0.9994059621909407, iteration: 185462
loss: 1.0286592245101929,grad_norm: 0.8619949882400744, iteration: 185463
loss: 1.003251314163208,grad_norm: 0.9999993111295321, iteration: 185464
loss: 0.9860246181488037,grad_norm: 0.9999991044810885, iteration: 185465
loss: 1.01302170753479,grad_norm: 0.9999992268162989, iteration: 185466
loss: 0.9652341604232788,grad_norm: 0.7700912203977475, iteration: 185467
loss: 0.9869760274887085,grad_norm: 0.8829704973801833, iteration: 185468
loss: 1.07516348361969,grad_norm: 0.9999991808703959, iteration: 185469
loss: 0.9963102340698242,grad_norm: 0.8521091981607906, iteration: 185470
loss: 1.0089664459228516,grad_norm: 0.999999221893086, iteration: 185471
loss: 1.0172662734985352,grad_norm: 0.813593624987799, iteration: 185472
loss: 1.0152390003204346,grad_norm: 0.9999990601105144, iteration: 185473
loss: 0.9591602087020874,grad_norm: 0.8832389615522549, iteration: 185474
loss: 0.9762351512908936,grad_norm: 0.9999996717152908, iteration: 185475
loss: 1.0251191854476929,grad_norm: 0.9999992455964798, iteration: 185476
loss: 1.0018037557601929,grad_norm: 0.999999481080639, iteration: 185477
loss: 1.044710636138916,grad_norm: 0.9939980251009459, iteration: 185478
loss: 0.980381965637207,grad_norm: 0.9362212989606484, iteration: 185479
loss: 0.9879806041717529,grad_norm: 0.999999076807997, iteration: 185480
loss: 0.9741895794868469,grad_norm: 0.891160179229128, iteration: 185481
loss: 1.02300226688385,grad_norm: 0.9999990044740605, iteration: 185482
loss: 0.9633464217185974,grad_norm: 0.960902714058119, iteration: 185483
loss: 1.0254114866256714,grad_norm: 0.8656976480531587, iteration: 185484
loss: 0.999535083770752,grad_norm: 0.9999992544414626, iteration: 185485
loss: 0.9936937093734741,grad_norm: 0.9999992001076164, iteration: 185486
loss: 0.9691610336303711,grad_norm: 0.9397176704783069, iteration: 185487
loss: 1.0073822736740112,grad_norm: 0.9289111566656754, iteration: 185488
loss: 1.0623770952224731,grad_norm: 0.9899845780084104, iteration: 185489
loss: 1.0063896179199219,grad_norm: 0.999999618698361, iteration: 185490
loss: 0.9790287017822266,grad_norm: 0.9999990814694303, iteration: 185491
loss: 0.951326847076416,grad_norm: 0.9271758527955248, iteration: 185492
loss: 1.008493423461914,grad_norm: 0.9999991012581982, iteration: 185493
loss: 1.0095888376235962,grad_norm: 0.9999990889847528, iteration: 185494
loss: 0.9798089265823364,grad_norm: 0.8685066798409463, iteration: 185495
loss: 1.0016295909881592,grad_norm: 0.9999991387878634, iteration: 185496
loss: 0.9948367476463318,grad_norm: 0.9551787320801977, iteration: 185497
loss: 0.9935583472251892,grad_norm: 0.8677170686885669, iteration: 185498
loss: 1.012272834777832,grad_norm: 0.8021853627891693, iteration: 185499
loss: 1.0091557502746582,grad_norm: 0.9999990257967335, iteration: 185500
loss: 1.2110575437545776,grad_norm: 0.9999998777453489, iteration: 185501
loss: 0.9727705717086792,grad_norm: 0.9999991479162768, iteration: 185502
loss: 1.0076682567596436,grad_norm: 0.9999990613306078, iteration: 185503
loss: 0.9834384918212891,grad_norm: 0.983261128989689, iteration: 185504
loss: 0.9942491054534912,grad_norm: 0.9204671931445737, iteration: 185505
loss: 1.0224992036819458,grad_norm: 0.9999990928442036, iteration: 185506
loss: 0.9676278233528137,grad_norm: 0.8392523383022862, iteration: 185507
loss: 0.9678468704223633,grad_norm: 0.9999989499942072, iteration: 185508
loss: 0.9658566117286682,grad_norm: 0.9259484085876891, iteration: 185509
loss: 1.0156992673873901,grad_norm: 0.9999991312903947, iteration: 185510
loss: 1.0117146968841553,grad_norm: 0.9129872284262011, iteration: 185511
loss: 1.0039044618606567,grad_norm: 0.9999989574931003, iteration: 185512
loss: 1.0331339836120605,grad_norm: 0.9181846078994145, iteration: 185513
loss: 1.003069519996643,grad_norm: 0.9999992804557091, iteration: 185514
loss: 0.9470589756965637,grad_norm: 0.8018956096426869, iteration: 185515
loss: 1.0214122533798218,grad_norm: 0.9999991067960967, iteration: 185516
loss: 0.9835905432701111,grad_norm: 0.8708874565763488, iteration: 185517
loss: 1.0151656866073608,grad_norm: 0.9999992443344317, iteration: 185518
loss: 0.9940443634986877,grad_norm: 0.9431317512866091, iteration: 185519
loss: 1.0341722965240479,grad_norm: 0.9999999201218225, iteration: 185520
loss: 1.020756483078003,grad_norm: 0.9162720759845449, iteration: 185521
loss: 1.0978366136550903,grad_norm: 0.9397987590442896, iteration: 185522
loss: 0.9744207262992859,grad_norm: 0.8465740889752668, iteration: 185523
loss: 0.9293679594993591,grad_norm: 0.8660088557529549, iteration: 185524
loss: 0.9733643531799316,grad_norm: 0.8861866153711478, iteration: 185525
loss: 1.0067719221115112,grad_norm: 0.9564209511372361, iteration: 185526
loss: 1.0239719152450562,grad_norm: 0.9500630270324066, iteration: 185527
loss: 1.0297954082489014,grad_norm: 0.9999990260308397, iteration: 185528
loss: 1.0702943801879883,grad_norm: 0.9999991090424692, iteration: 185529
loss: 0.9789558053016663,grad_norm: 0.9999991493448371, iteration: 185530
loss: 1.0466668605804443,grad_norm: 0.9999994189230257, iteration: 185531
loss: 1.0066282749176025,grad_norm: 0.9067126219326571, iteration: 185532
loss: 1.1716887950897217,grad_norm: 0.999999702091484, iteration: 185533
loss: 1.0057073831558228,grad_norm: 0.9999990876867823, iteration: 185534
loss: 1.0060875415802002,grad_norm: 0.9046156808483825, iteration: 185535
loss: 1.105901837348938,grad_norm: 0.9999996250347425, iteration: 185536
loss: 0.9823311567306519,grad_norm: 0.9277293963331771, iteration: 185537
loss: 1.045170545578003,grad_norm: 0.999999066029137, iteration: 185538
loss: 0.9891645908355713,grad_norm: 0.9999991522533004, iteration: 185539
loss: 1.0055553913116455,grad_norm: 0.8486704709248546, iteration: 185540
loss: 1.0089695453643799,grad_norm: 0.9999990571570928, iteration: 185541
loss: 1.010745644569397,grad_norm: 0.999999256180677, iteration: 185542
loss: 0.9563444256782532,grad_norm: 0.9999990787504929, iteration: 185543
loss: 0.9992076754570007,grad_norm: 0.9999992695981336, iteration: 185544
loss: 0.9983392953872681,grad_norm: 0.9373779261199235, iteration: 185545
loss: 0.9885457158088684,grad_norm: 0.91138096307125, iteration: 185546
loss: 0.9856879115104675,grad_norm: 0.9999995584229411, iteration: 185547
loss: 1.0133428573608398,grad_norm: 0.9775778339824854, iteration: 185548
loss: 0.9867526888847351,grad_norm: 0.9279305432669505, iteration: 185549
loss: 1.0422635078430176,grad_norm: 0.8618756206963475, iteration: 185550
loss: 0.9988545179367065,grad_norm: 0.9999999689118212, iteration: 185551
loss: 1.016968846321106,grad_norm: 0.8354356274678466, iteration: 185552
loss: 0.9920367002487183,grad_norm: 0.9999991072112583, iteration: 185553
loss: 1.0405558347702026,grad_norm: 0.9043185244988197, iteration: 185554
loss: 1.025262475013733,grad_norm: 0.9210289037357337, iteration: 185555
loss: 0.9655139446258545,grad_norm: 0.9574053264442667, iteration: 185556
loss: 0.9729854464530945,grad_norm: 0.9999990394072266, iteration: 185557
loss: 0.9823698401451111,grad_norm: 0.9518112293799307, iteration: 185558
loss: 1.0437146425247192,grad_norm: 0.9999991360834332, iteration: 185559
loss: 1.0327352285385132,grad_norm: 0.9988460299488582, iteration: 185560
loss: 0.9546547532081604,grad_norm: 0.8733362934127026, iteration: 185561
loss: 1.007132887840271,grad_norm: 0.8941488472292313, iteration: 185562
loss: 0.9932207465171814,grad_norm: 0.999999069454987, iteration: 185563
loss: 1.0178515911102295,grad_norm: 0.9999994049539579, iteration: 185564
loss: 1.0357170104980469,grad_norm: 0.8499832988402062, iteration: 185565
loss: 0.9852668642997742,grad_norm: 0.9999991271722378, iteration: 185566
loss: 0.9999390840530396,grad_norm: 0.9740999612850602, iteration: 185567
loss: 1.1242491006851196,grad_norm: 0.9999990000816981, iteration: 185568
loss: 1.0086368322372437,grad_norm: 0.9999990742025141, iteration: 185569
loss: 1.0110516548156738,grad_norm: 0.9999992345285245, iteration: 185570
loss: 1.0342321395874023,grad_norm: 0.9999990635539036, iteration: 185571
loss: 0.9838545918464661,grad_norm: 0.7321608000286575, iteration: 185572
loss: 1.0006507635116577,grad_norm: 0.9999990289887833, iteration: 185573
loss: 1.009751319885254,grad_norm: 0.999999240684525, iteration: 185574
loss: 0.9921345114707947,grad_norm: 0.9999989732550971, iteration: 185575
loss: 1.017863154411316,grad_norm: 0.9999990514880233, iteration: 185576
loss: 1.0011084079742432,grad_norm: 0.9593648986044818, iteration: 185577
loss: 1.0007352828979492,grad_norm: 0.8936761160235634, iteration: 185578
loss: 1.0281941890716553,grad_norm: 0.9999993145755656, iteration: 185579
loss: 1.0394619703292847,grad_norm: 0.9121563092435777, iteration: 185580
loss: 0.9934526681900024,grad_norm: 0.8931770139020244, iteration: 185581
loss: 1.0582422018051147,grad_norm: 0.9999991253320999, iteration: 185582
loss: 1.1471052169799805,grad_norm: 0.9999995331460526, iteration: 185583
loss: 1.0430824756622314,grad_norm: 0.99999945022738, iteration: 185584
loss: 1.0248417854309082,grad_norm: 0.9999992578411556, iteration: 185585
loss: 0.9995178580284119,grad_norm: 0.9961171270229141, iteration: 185586
loss: 1.037760615348816,grad_norm: 0.9999989973576492, iteration: 185587
loss: 0.9686204195022583,grad_norm: 0.9999989660125469, iteration: 185588
loss: 0.985146701335907,grad_norm: 0.8098365494886186, iteration: 185589
loss: 0.9869682788848877,grad_norm: 0.9999991182963923, iteration: 185590
loss: 1.0157780647277832,grad_norm: 0.820025974568125, iteration: 185591
loss: 0.9753209948539734,grad_norm: 0.9999991263582539, iteration: 185592
loss: 0.9739728569984436,grad_norm: 0.9999996119183081, iteration: 185593
loss: 1.0331289768218994,grad_norm: 0.8655614493881828, iteration: 185594
loss: 0.9908776879310608,grad_norm: 0.9326163502969979, iteration: 185595
loss: 1.019905924797058,grad_norm: 0.9999992538023851, iteration: 185596
loss: 1.0155607461929321,grad_norm: 0.9999991133722567, iteration: 185597
loss: 1.008790373802185,grad_norm: 0.999999142420132, iteration: 185598
loss: 1.0258406400680542,grad_norm: 0.9974002096198312, iteration: 185599
loss: 0.9769744277000427,grad_norm: 0.9999991207947938, iteration: 185600
loss: 1.006056785583496,grad_norm: 0.9999989944964932, iteration: 185601
loss: 1.0280311107635498,grad_norm: 0.9999990491915741, iteration: 185602
loss: 1.060948371887207,grad_norm: 0.9999998699227265, iteration: 185603
loss: 1.0013048648834229,grad_norm: 0.9999990961608886, iteration: 185604
loss: 0.982971727848053,grad_norm: 0.999999042286437, iteration: 185605
loss: 0.9960536956787109,grad_norm: 0.9546546687531654, iteration: 185606
loss: 1.1285879611968994,grad_norm: 0.9999990379187235, iteration: 185607
loss: 1.0359046459197998,grad_norm: 0.965545248917603, iteration: 185608
loss: 0.9852479100227356,grad_norm: 0.9999990590782071, iteration: 185609
loss: 0.9744793176651001,grad_norm: 0.9377423975378723, iteration: 185610
loss: 1.094224452972412,grad_norm: 0.9999991582496562, iteration: 185611
loss: 1.0031931400299072,grad_norm: 0.9221704795318555, iteration: 185612
loss: 1.0204178094863892,grad_norm: 0.897918207113398, iteration: 185613
loss: 0.9773681163787842,grad_norm: 0.8663602592711964, iteration: 185614
loss: 1.0368200540542603,grad_norm: 0.9999991118398671, iteration: 185615
loss: 0.979338526725769,grad_norm: 0.9999990705892564, iteration: 185616
loss: 1.0326511859893799,grad_norm: 0.9999992948301628, iteration: 185617
loss: 0.9920257329940796,grad_norm: 0.9999992339805022, iteration: 185618
loss: 0.9795858263969421,grad_norm: 0.8083471499325909, iteration: 185619
loss: 1.0262608528137207,grad_norm: 0.9304321024282141, iteration: 185620
loss: 1.009216547012329,grad_norm: 0.8938614579780669, iteration: 185621
loss: 0.9700222015380859,grad_norm: 0.9999991540071045, iteration: 185622
loss: 1.000221610069275,grad_norm: 0.999999165888884, iteration: 185623
loss: 1.024596095085144,grad_norm: 0.9999992932211387, iteration: 185624
loss: 1.032230019569397,grad_norm: 0.9999991218030712, iteration: 185625
loss: 0.9927228689193726,grad_norm: 0.9944840925710129, iteration: 185626
loss: 1.0645620822906494,grad_norm: 0.9999996124673662, iteration: 185627
loss: 0.945644736289978,grad_norm: 0.9974945440546905, iteration: 185628
loss: 1.0388675928115845,grad_norm: 0.968349791165856, iteration: 185629
loss: 1.0020679235458374,grad_norm: 0.8542446114169802, iteration: 185630
loss: 0.9956414699554443,grad_norm: 0.9152889874992428, iteration: 185631
loss: 0.9802870154380798,grad_norm: 0.9999990597321738, iteration: 185632
loss: 1.0457065105438232,grad_norm: 0.9763333452754095, iteration: 185633
loss: 1.0012177228927612,grad_norm: 0.9586599010402085, iteration: 185634
loss: 1.0116899013519287,grad_norm: 0.9999997191483423, iteration: 185635
loss: 1.0151488780975342,grad_norm: 0.9333890121565825, iteration: 185636
loss: 1.0021363496780396,grad_norm: 0.9999990942365653, iteration: 185637
loss: 0.9661620855331421,grad_norm: 0.9163702698715006, iteration: 185638
loss: 1.0134706497192383,grad_norm: 0.8874367996720286, iteration: 185639
loss: 1.043940782546997,grad_norm: 0.9999990278566987, iteration: 185640
loss: 1.0224494934082031,grad_norm: 0.9793598997135508, iteration: 185641
loss: 1.033341646194458,grad_norm: 0.8186897626442521, iteration: 185642
loss: 1.0105421543121338,grad_norm: 0.9849782577391188, iteration: 185643
loss: 0.9921017289161682,grad_norm: 0.9999993030969389, iteration: 185644
loss: 0.9995045065879822,grad_norm: 0.9999991212939293, iteration: 185645
loss: 1.0047856569290161,grad_norm: 0.9999991050847684, iteration: 185646
loss: 1.007306456565857,grad_norm: 0.8467429406963253, iteration: 185647
loss: 1.0259275436401367,grad_norm: 0.9999993495588632, iteration: 185648
loss: 0.960538387298584,grad_norm: 0.9735138749026273, iteration: 185649
loss: 0.9873403310775757,grad_norm: 0.9588010973072065, iteration: 185650
loss: 0.9639326333999634,grad_norm: 0.9999991894143021, iteration: 185651
loss: 0.9987967610359192,grad_norm: 0.9139630457241623, iteration: 185652
loss: 0.9788316488265991,grad_norm: 0.9999991435063802, iteration: 185653
loss: 1.0058443546295166,grad_norm: 0.9999992178627624, iteration: 185654
loss: 1.0306180715560913,grad_norm: 0.9999990940039225, iteration: 185655
loss: 0.9834066033363342,grad_norm: 0.9999989089616462, iteration: 185656
loss: 1.018439769744873,grad_norm: 0.9395699706676008, iteration: 185657
loss: 0.9941629767417908,grad_norm: 0.8518965443879873, iteration: 185658
loss: 0.9662968516349792,grad_norm: 0.8983837163422117, iteration: 185659
loss: 1.0115946531295776,grad_norm: 0.9999994700893505, iteration: 185660
loss: 1.0255773067474365,grad_norm: 0.9999992155360433, iteration: 185661
loss: 1.031116008758545,grad_norm: 0.9999990751306197, iteration: 185662
loss: 0.9694519639015198,grad_norm: 0.9354798614081797, iteration: 185663
loss: 1.014492154121399,grad_norm: 0.8980292711640447, iteration: 185664
loss: 0.973910927772522,grad_norm: 0.9483177192010473, iteration: 185665
loss: 0.9791737794876099,grad_norm: 0.9187218481665197, iteration: 185666
loss: 0.9957245588302612,grad_norm: 0.8991717729985331, iteration: 185667
loss: 0.9865006804466248,grad_norm: 0.9159305966268736, iteration: 185668
loss: 0.9982378482818604,grad_norm: 0.9630060274934048, iteration: 185669
loss: 1.0255614519119263,grad_norm: 0.8655651194273885, iteration: 185670
loss: 0.9750487804412842,grad_norm: 0.9012947263266935, iteration: 185671
loss: 1.0076249837875366,grad_norm: 0.8025185749914953, iteration: 185672
loss: 0.9998674988746643,grad_norm: 0.9225171708692307, iteration: 185673
loss: 0.9979665279388428,grad_norm: 0.876090281899619, iteration: 185674
loss: 1.0201466083526611,grad_norm: 0.8850268953381942, iteration: 185675
loss: 0.98450767993927,grad_norm: 0.9999992496752782, iteration: 185676
loss: 0.9953173398971558,grad_norm: 0.9999990628303356, iteration: 185677
loss: 0.9945992231369019,grad_norm: 0.9999990020967718, iteration: 185678
loss: 1.0303950309753418,grad_norm: 0.9965346208916556, iteration: 185679
loss: 1.0185688734054565,grad_norm: 0.9999992401838856, iteration: 185680
loss: 0.9746858477592468,grad_norm: 0.9437943454855897, iteration: 185681
loss: 0.9836146235466003,grad_norm: 0.9999991501048914, iteration: 185682
loss: 0.9588433504104614,grad_norm: 0.999998968868759, iteration: 185683
loss: 1.0132991075515747,grad_norm: 0.9999991228914616, iteration: 185684
loss: 0.9711940884590149,grad_norm: 0.950570068534417, iteration: 185685
loss: 0.978267252445221,grad_norm: 0.9545171794868456, iteration: 185686
loss: 0.9808672070503235,grad_norm: 0.9999991401016944, iteration: 185687
loss: 1.0354732275009155,grad_norm: 0.9335720315619263, iteration: 185688
loss: 0.9833100438117981,grad_norm: 0.9999992128785802, iteration: 185689
loss: 0.9744791984558105,grad_norm: 0.9999989186690712, iteration: 185690
loss: 1.0201258659362793,grad_norm: 0.9999990442064871, iteration: 185691
loss: 0.9958412647247314,grad_norm: 0.9999990774848118, iteration: 185692
loss: 1.024080514907837,grad_norm: 0.9999992317532984, iteration: 185693
loss: 1.0226811170578003,grad_norm: 0.9366458813661485, iteration: 185694
loss: 0.9695167541503906,grad_norm: 0.8522123388167769, iteration: 185695
loss: 1.0368142127990723,grad_norm: 0.9999990690112482, iteration: 185696
loss: 0.9705122113227844,grad_norm: 0.9638189540749998, iteration: 185697
loss: 0.9911761283874512,grad_norm: 0.8677215064669527, iteration: 185698
loss: 1.0188874006271362,grad_norm: 0.9999997765314963, iteration: 185699
loss: 0.9925804734230042,grad_norm: 0.8138960385042769, iteration: 185700
loss: 0.9728081822395325,grad_norm: 0.9999990080294135, iteration: 185701
loss: 0.9506348371505737,grad_norm: 0.8762157800283572, iteration: 185702
loss: 1.0476945638656616,grad_norm: 0.999999809504808, iteration: 185703
loss: 1.0007374286651611,grad_norm: 0.9999991093062338, iteration: 185704
loss: 0.9858567714691162,grad_norm: 0.9822324114813477, iteration: 185705
loss: 1.0047615766525269,grad_norm: 0.9999991913615136, iteration: 185706
loss: 1.0131436586380005,grad_norm: 0.9999989836269524, iteration: 185707
loss: 1.0151435136795044,grad_norm: 0.8799922202751762, iteration: 185708
loss: 1.0017679929733276,grad_norm: 0.999999141617257, iteration: 185709
loss: 1.0253405570983887,grad_norm: 0.8547356766693623, iteration: 185710
loss: 1.0378220081329346,grad_norm: 0.9033490799597024, iteration: 185711
loss: 1.0660244226455688,grad_norm: 0.9926105338541129, iteration: 185712
loss: 1.0364737510681152,grad_norm: 0.8984120698029774, iteration: 185713
loss: 1.003298044204712,grad_norm: 0.9744988148794613, iteration: 185714
loss: 0.9655985236167908,grad_norm: 0.9664839149861905, iteration: 185715
loss: 0.9624661207199097,grad_norm: 0.9999992762639494, iteration: 185716
loss: 1.0222231149673462,grad_norm: 0.9999991786365692, iteration: 185717
loss: 1.0275427103042603,grad_norm: 0.9805320977304633, iteration: 185718
loss: 1.005194902420044,grad_norm: 0.8735441188832326, iteration: 185719
loss: 0.9907927513122559,grad_norm: 0.9999989302421053, iteration: 185720
loss: 1.0024763345718384,grad_norm: 0.9992681738276582, iteration: 185721
loss: 1.024917721748352,grad_norm: 0.8796117786258545, iteration: 185722
loss: 1.0373687744140625,grad_norm: 0.8937853465554717, iteration: 185723
loss: 1.0197811126708984,grad_norm: 0.8783702273807943, iteration: 185724
loss: 0.9928945899009705,grad_norm: 0.9638090203451819, iteration: 185725
loss: 1.0016757249832153,grad_norm: 0.9999991045122394, iteration: 185726
loss: 0.962340772151947,grad_norm: 0.9394308528526953, iteration: 185727
loss: 1.012203335762024,grad_norm: 0.9999991048170682, iteration: 185728
loss: 0.9800364375114441,grad_norm: 0.999999453159428, iteration: 185729
loss: 1.0360304117202759,grad_norm: 0.9999992238484492, iteration: 185730
loss: 0.9874829053878784,grad_norm: 0.9999989859165984, iteration: 185731
loss: 0.9986154437065125,grad_norm: 0.8547694288697986, iteration: 185732
loss: 1.0064704418182373,grad_norm: 0.8817446889155589, iteration: 185733
loss: 0.9696213603019714,grad_norm: 0.9999990354618695, iteration: 185734
loss: 0.9984351992607117,grad_norm: 0.9704183589094225, iteration: 185735
loss: 1.0204311609268188,grad_norm: 0.9007406143384941, iteration: 185736
loss: 0.9690927267074585,grad_norm: 0.9999997605570727, iteration: 185737
loss: 0.9866490364074707,grad_norm: 0.9004244400933448, iteration: 185738
loss: 0.9971539974212646,grad_norm: 0.9309798072253358, iteration: 185739
loss: 0.9660755395889282,grad_norm: 0.8924208374596281, iteration: 185740
loss: 1.0155417919158936,grad_norm: 0.9289765680228472, iteration: 185741
loss: 0.9906111359596252,grad_norm: 0.835500130161332, iteration: 185742
loss: 0.9599847197532654,grad_norm: 0.8771945945724517, iteration: 185743
loss: 1.0121655464172363,grad_norm: 0.9999991180789817, iteration: 185744
loss: 0.9880250692367554,grad_norm: 0.9999990562819759, iteration: 185745
loss: 1.004769206047058,grad_norm: 0.9320509857813754, iteration: 185746
loss: 1.0096335411071777,grad_norm: 0.9832009923951086, iteration: 185747
loss: 0.9973617196083069,grad_norm: 0.9246132460191999, iteration: 185748
loss: 1.0204930305480957,grad_norm: 0.8313950549450926, iteration: 185749
loss: 1.0075948238372803,grad_norm: 0.9431537573092665, iteration: 185750
loss: 0.9919393658638,grad_norm: 0.9999989771773827, iteration: 185751
loss: 1.0015523433685303,grad_norm: 0.9999990087559376, iteration: 185752
loss: 1.0856409072875977,grad_norm: 0.9691156239542474, iteration: 185753
loss: 0.9997109770774841,grad_norm: 0.900452215656213, iteration: 185754
loss: 0.991683840751648,grad_norm: 0.9999989794146766, iteration: 185755
loss: 0.9901207685470581,grad_norm: 0.9272114970078849, iteration: 185756
loss: 1.0166512727737427,grad_norm: 0.999999192458534, iteration: 185757
loss: 1.0131723880767822,grad_norm: 0.9999992278620504, iteration: 185758
loss: 1.0118815898895264,grad_norm: 0.8760835072023155, iteration: 185759
loss: 1.0146358013153076,grad_norm: 0.9702284730321362, iteration: 185760
loss: 0.9954215288162231,grad_norm: 0.8475715475452108, iteration: 185761
loss: 1.0067802667617798,grad_norm: 0.9122373205943138, iteration: 185762
loss: 1.0614676475524902,grad_norm: 0.9999991796321036, iteration: 185763
loss: 0.9813765287399292,grad_norm: 0.8509935916636528, iteration: 185764
loss: 0.9948635697364807,grad_norm: 0.9999991298355089, iteration: 185765
loss: 0.998309850692749,grad_norm: 0.9154887307560731, iteration: 185766
loss: 1.0070924758911133,grad_norm: 0.9999992443379516, iteration: 185767
loss: 1.01247239112854,grad_norm: 0.9791137616256964, iteration: 185768
loss: 1.0168637037277222,grad_norm: 0.9999991335898742, iteration: 185769
loss: 0.974609375,grad_norm: 0.8971523377816373, iteration: 185770
loss: 0.9631414413452148,grad_norm: 0.9999991274396832, iteration: 185771
loss: 0.9858574271202087,grad_norm: 0.8784111140944909, iteration: 185772
loss: 0.9817072153091431,grad_norm: 0.9999989968771048, iteration: 185773
loss: 0.9699913263320923,grad_norm: 0.8993978170789874, iteration: 185774
loss: 1.0152794122695923,grad_norm: 0.9938418097053417, iteration: 185775
loss: 0.9765065908432007,grad_norm: 0.9999992851519509, iteration: 185776
loss: 0.9960073232650757,grad_norm: 0.9999990576535993, iteration: 185777
loss: 1.0346251726150513,grad_norm: 0.8951064391648251, iteration: 185778
loss: 1.0007082223892212,grad_norm: 0.9354908461231904, iteration: 185779
loss: 0.9664261937141418,grad_norm: 0.9999991072715357, iteration: 185780
loss: 0.9958348870277405,grad_norm: 0.9999992069321064, iteration: 185781
loss: 1.0046676397323608,grad_norm: 0.9261681452847067, iteration: 185782
loss: 1.0337141752243042,grad_norm: 0.9523541212424989, iteration: 185783
loss: 1.0069764852523804,grad_norm: 0.8139123863295209, iteration: 185784
loss: 1.0076625347137451,grad_norm: 0.9650413425835216, iteration: 185785
loss: 1.0233854055404663,grad_norm: 0.9553654992029079, iteration: 185786
loss: 1.0307495594024658,grad_norm: 0.9999992926720479, iteration: 185787
loss: 0.9620867967605591,grad_norm: 0.912548474438657, iteration: 185788
loss: 0.9984492063522339,grad_norm: 0.9999990329636841, iteration: 185789
loss: 1.0380496978759766,grad_norm: 0.9485552928016252, iteration: 185790
loss: 0.9930628538131714,grad_norm: 0.9999991295508044, iteration: 185791
loss: 1.0009493827819824,grad_norm: 0.8855585189420261, iteration: 185792
loss: 1.020638346672058,grad_norm: 0.9999991995185638, iteration: 185793
loss: 0.9900709986686707,grad_norm: 0.9999991155078942, iteration: 185794
loss: 1.03010892868042,grad_norm: 0.9999989771315445, iteration: 185795
loss: 1.0343096256256104,grad_norm: 0.9999992298592318, iteration: 185796
loss: 0.9969578981399536,grad_norm: 0.9999990508701408, iteration: 185797
loss: 1.0184880495071411,grad_norm: 0.8773686251036664, iteration: 185798
loss: 1.0014123916625977,grad_norm: 0.9381043695477301, iteration: 185799
loss: 0.9554265141487122,grad_norm: 0.9999991279716445, iteration: 185800
loss: 0.9635072350502014,grad_norm: 0.9621518648109783, iteration: 185801
loss: 0.9479956030845642,grad_norm: 0.9999991277353079, iteration: 185802
loss: 0.9663493037223816,grad_norm: 0.8806715939451244, iteration: 185803
loss: 1.001382827758789,grad_norm: 0.8115415569099673, iteration: 185804
loss: 1.033737063407898,grad_norm: 0.9999989773937817, iteration: 185805
loss: 0.9884269833564758,grad_norm: 0.9246018387430186, iteration: 185806
loss: 0.9386831521987915,grad_norm: 0.9204515889053879, iteration: 185807
loss: 1.0173224210739136,grad_norm: 0.9547743893107963, iteration: 185808
loss: 1.047547698020935,grad_norm: 0.9999990106453625, iteration: 185809
loss: 1.0031388998031616,grad_norm: 0.9999993723025256, iteration: 185810
loss: 1.0128010511398315,grad_norm: 0.9999991302252098, iteration: 185811
loss: 0.9752785563468933,grad_norm: 0.9881501649707867, iteration: 185812
loss: 0.9926455020904541,grad_norm: 0.8832183334227529, iteration: 185813
loss: 0.9881154298782349,grad_norm: 0.8952838492899006, iteration: 185814
loss: 1.010786533355713,grad_norm: 0.9661470944699816, iteration: 185815
loss: 0.9708092212677002,grad_norm: 0.9379074092410035, iteration: 185816
loss: 0.9696824550628662,grad_norm: 0.7040636454707837, iteration: 185817
loss: 0.9777399897575378,grad_norm: 0.901267786081297, iteration: 185818
loss: 0.9783905148506165,grad_norm: 0.8853020357586066, iteration: 185819
loss: 0.9723213315010071,grad_norm: 0.9339318632550008, iteration: 185820
loss: 1.0146161317825317,grad_norm: 0.9999991344850174, iteration: 185821
loss: 1.0241328477859497,grad_norm: 0.9999991574587523, iteration: 185822
loss: 1.0046205520629883,grad_norm: 0.7955816273987938, iteration: 185823
loss: 1.0144765377044678,grad_norm: 0.8773425496367037, iteration: 185824
loss: 1.003368616104126,grad_norm: 0.9999990815916161, iteration: 185825
loss: 1.0101470947265625,grad_norm: 0.9243215637217457, iteration: 185826
loss: 1.0482544898986816,grad_norm: 0.9999990983660461, iteration: 185827
loss: 0.9811996817588806,grad_norm: 0.9713556265065114, iteration: 185828
loss: 0.9920657873153687,grad_norm: 0.9637377675218216, iteration: 185829
loss: 0.9893250465393066,grad_norm: 0.9999991592037051, iteration: 185830
loss: 1.0009214878082275,grad_norm: 0.9308094055879006, iteration: 185831
loss: 0.9568037390708923,grad_norm: 0.8874152484721811, iteration: 185832
loss: 1.0056909322738647,grad_norm: 0.9999991009621966, iteration: 185833
loss: 1.0256948471069336,grad_norm: 0.9700475932574114, iteration: 185834
loss: 1.0288267135620117,grad_norm: 0.9649452932936315, iteration: 185835
loss: 1.0590946674346924,grad_norm: 0.8435693446606155, iteration: 185836
loss: 1.0204596519470215,grad_norm: 0.9999991328294439, iteration: 185837
loss: 1.0084397792816162,grad_norm: 0.9999990856650997, iteration: 185838
loss: 1.0624226331710815,grad_norm: 0.9000426422934836, iteration: 185839
loss: 1.065596580505371,grad_norm: 0.9999996656481512, iteration: 185840
loss: 0.9842517375946045,grad_norm: 0.9999990707932883, iteration: 185841
loss: 1.0068467855453491,grad_norm: 0.9195642517115747, iteration: 185842
loss: 1.016748309135437,grad_norm: 0.9999995403764138, iteration: 185843
loss: 1.0423253774642944,grad_norm: 0.9999991043078212, iteration: 185844
loss: 1.0184264183044434,grad_norm: 0.9999991139735939, iteration: 185845
loss: 1.0166455507278442,grad_norm: 0.9689425785158827, iteration: 185846
loss: 0.9952811002731323,grad_norm: 0.9999992395607287, iteration: 185847
loss: 0.9851359128952026,grad_norm: 0.9066862783190538, iteration: 185848
loss: 0.988620400428772,grad_norm: 0.9999991237231397, iteration: 185849
loss: 1.0006306171417236,grad_norm: 0.9999991884281392, iteration: 185850
loss: 0.9796912670135498,grad_norm: 0.9048103416445501, iteration: 185851
loss: 1.0395585298538208,grad_norm: 0.9999991958963477, iteration: 185852
loss: 1.0616650581359863,grad_norm: 0.9999996190669627, iteration: 185853
loss: 0.9650421738624573,grad_norm: 0.9008410193439039, iteration: 185854
loss: 0.9697490930557251,grad_norm: 0.9697097830314402, iteration: 185855
loss: 1.0478748083114624,grad_norm: 0.7666489238965584, iteration: 185856
loss: 0.9582362771034241,grad_norm: 0.9928700383543888, iteration: 185857
loss: 1.0000214576721191,grad_norm: 0.8499899969267767, iteration: 185858
loss: 0.9932467937469482,grad_norm: 0.9613194795347497, iteration: 185859
loss: 1.0500433444976807,grad_norm: 0.9999992510166791, iteration: 185860
loss: 1.0485780239105225,grad_norm: 0.9999991813878899, iteration: 185861
loss: 0.9869788885116577,grad_norm: 0.8970344675989724, iteration: 185862
loss: 0.9893821477890015,grad_norm: 0.9005517479287329, iteration: 185863
loss: 1.0309653282165527,grad_norm: 0.9999989872534822, iteration: 185864
loss: 1.0240132808685303,grad_norm: 0.9999992952830247, iteration: 185865
loss: 1.0130876302719116,grad_norm: 0.9999991465829907, iteration: 185866
loss: 0.9999028444290161,grad_norm: 0.9730860875255127, iteration: 185867
loss: 0.9680975079536438,grad_norm: 0.9999992179183107, iteration: 185868
loss: 0.9896366000175476,grad_norm: 0.9999990483814527, iteration: 185869
loss: 0.9777856469154358,grad_norm: 0.9903699755543244, iteration: 185870
loss: 1.0157411098480225,grad_norm: 0.9999992270762298, iteration: 185871
loss: 0.9889078736305237,grad_norm: 0.987331719214007, iteration: 185872
loss: 1.0635852813720703,grad_norm: 0.9999997787459065, iteration: 185873
loss: 1.011631965637207,grad_norm: 0.9999990787462029, iteration: 185874
loss: 1.0362253189086914,grad_norm: 0.9017162224527386, iteration: 185875
loss: 1.0322740077972412,grad_norm: 0.999999007959738, iteration: 185876
loss: 0.967755138874054,grad_norm: 0.9721675204475702, iteration: 185877
loss: 1.014625906944275,grad_norm: 0.9999992413772961, iteration: 185878
loss: 0.9966800808906555,grad_norm: 0.9999990889588517, iteration: 185879
loss: 0.9824923872947693,grad_norm: 0.9999990364541671, iteration: 185880
loss: 1.0056923627853394,grad_norm: 0.9999990666792111, iteration: 185881
loss: 1.0072482824325562,grad_norm: 0.9985974905126782, iteration: 185882
loss: 1.0046452283859253,grad_norm: 0.9966636226490349, iteration: 185883
loss: 1.0044785737991333,grad_norm: 0.8934467955557295, iteration: 185884
loss: 0.9952489733695984,grad_norm: 0.9999993135888233, iteration: 185885
loss: 0.9873186349868774,grad_norm: 0.9999991394859651, iteration: 185886
loss: 1.002327799797058,grad_norm: 0.999999091013732, iteration: 185887
loss: 0.9670611619949341,grad_norm: 0.9551454380188181, iteration: 185888
loss: 0.9820033311843872,grad_norm: 0.9968558996063239, iteration: 185889
loss: 0.956510603427887,grad_norm: 0.9999990664755143, iteration: 185890
loss: 0.9997751116752625,grad_norm: 0.8524903867709013, iteration: 185891
loss: 1.0070523023605347,grad_norm: 0.9888792783525713, iteration: 185892
loss: 0.999704897403717,grad_norm: 0.9798504928497772, iteration: 185893
loss: 1.0205512046813965,grad_norm: 0.9999992931938413, iteration: 185894
loss: 1.0208710432052612,grad_norm: 0.9999998912531335, iteration: 185895
loss: 0.9833789467811584,grad_norm: 0.9339146587742058, iteration: 185896
loss: 0.9865694642066956,grad_norm: 0.9375238825428439, iteration: 185897
loss: 1.0282566547393799,grad_norm: 0.8865909389765648, iteration: 185898
loss: 0.9792567491531372,grad_norm: 0.8732574915315954, iteration: 185899
loss: 1.0473881959915161,grad_norm: 0.9999992792011146, iteration: 185900
loss: 1.0118950605392456,grad_norm: 0.9999990891028205, iteration: 185901
loss: 1.0301514863967896,grad_norm: 0.9381284842015719, iteration: 185902
loss: 1.0283957719802856,grad_norm: 0.7716469531577572, iteration: 185903
loss: 1.0248852968215942,grad_norm: 0.999999815401189, iteration: 185904
loss: 1.0631691217422485,grad_norm: 0.999999305336237, iteration: 185905
loss: 0.9946156144142151,grad_norm: 0.8717367667345048, iteration: 185906
loss: 1.0009677410125732,grad_norm: 0.9799118152497782, iteration: 185907
loss: 0.9865092039108276,grad_norm: 0.9607157082616289, iteration: 185908
loss: 0.963324785232544,grad_norm: 0.9999990115423341, iteration: 185909
loss: 1.0060529708862305,grad_norm: 0.9999989858236996, iteration: 185910
loss: 0.9905495643615723,grad_norm: 0.9451796934481259, iteration: 185911
loss: 0.9481410384178162,grad_norm: 0.9018166489657614, iteration: 185912
loss: 0.9932351112365723,grad_norm: 0.8883539496340629, iteration: 185913
loss: 1.0192131996154785,grad_norm: 0.9999991204387678, iteration: 185914
loss: 1.1167125701904297,grad_norm: 0.878159455612645, iteration: 185915
loss: 1.0001072883605957,grad_norm: 0.8933595429336905, iteration: 185916
loss: 1.0089366436004639,grad_norm: 0.9287070963285818, iteration: 185917
loss: 1.0028477907180786,grad_norm: 0.8319653510668225, iteration: 185918
loss: 1.0162627696990967,grad_norm: 0.9999990756703352, iteration: 185919
loss: 0.9973746538162231,grad_norm: 0.9135865470136567, iteration: 185920
loss: 1.0275009870529175,grad_norm: 0.8230737716841657, iteration: 185921
loss: 0.9919630289077759,grad_norm: 0.9211605149810653, iteration: 185922
loss: 1.0081125497817993,grad_norm: 0.9256456337441192, iteration: 185923
loss: 0.9615910053253174,grad_norm: 0.8738105745880396, iteration: 185924
loss: 0.9733285903930664,grad_norm: 0.9999991280910661, iteration: 185925
loss: 1.0108295679092407,grad_norm: 0.9588805817894342, iteration: 185926
loss: 1.0236849784851074,grad_norm: 0.8784678097079842, iteration: 185927
loss: 1.024459958076477,grad_norm: 0.9999991624060781, iteration: 185928
loss: 0.9895442724227905,grad_norm: 0.9521751115392496, iteration: 185929
loss: 1.011176347732544,grad_norm: 0.9999990726053095, iteration: 185930
loss: 0.9931789636611938,grad_norm: 0.9908372654649374, iteration: 185931
loss: 0.9968430399894714,grad_norm: 0.9734377352552119, iteration: 185932
loss: 0.9952029585838318,grad_norm: 0.9999991272286849, iteration: 185933
loss: 1.00215744972229,grad_norm: 0.9189639871339216, iteration: 185934
loss: 0.9784280061721802,grad_norm: 0.9965689749637201, iteration: 185935
loss: 0.9961592555046082,grad_norm: 0.8900890665604999, iteration: 185936
loss: 0.9615570902824402,grad_norm: 0.9999991825867817, iteration: 185937
loss: 0.9865857362747192,grad_norm: 0.8785221561006911, iteration: 185938
loss: 0.989048957824707,grad_norm: 0.8697006382632473, iteration: 185939
loss: 0.9807500839233398,grad_norm: 0.9336772176685719, iteration: 185940
loss: 0.985495388507843,grad_norm: 0.9650519326843554, iteration: 185941
loss: 1.0193723440170288,grad_norm: 0.982965622605939, iteration: 185942
loss: 1.0055947303771973,grad_norm: 0.9274257361111304, iteration: 185943
loss: 1.0246537923812866,grad_norm: 0.9999999738373974, iteration: 185944
loss: 1.0368353128433228,grad_norm: 0.9999991851697231, iteration: 185945
loss: 1.0110715627670288,grad_norm: 0.9999993252232052, iteration: 185946
loss: 1.037524700164795,grad_norm: 0.9999991324886406, iteration: 185947
loss: 1.0313947200775146,grad_norm: 0.8607296494444797, iteration: 185948
loss: 1.0064760446548462,grad_norm: 0.9999990669972814, iteration: 185949
loss: 1.0125722885131836,grad_norm: 0.9999992259079624, iteration: 185950
loss: 0.9965487122535706,grad_norm: 0.9999992034693767, iteration: 185951
loss: 0.9848031401634216,grad_norm: 0.8908874975185465, iteration: 185952
loss: 1.0207922458648682,grad_norm: 0.9832775579971639, iteration: 185953
loss: 0.9874883890151978,grad_norm: 0.9999992134348067, iteration: 185954
loss: 0.9965937733650208,grad_norm: 0.9204690558186737, iteration: 185955
loss: 1.016184687614441,grad_norm: 0.9889573034781035, iteration: 185956
loss: 0.9449621438980103,grad_norm: 0.9594766327627763, iteration: 185957
loss: 0.9937693476676941,grad_norm: 0.9999989978384186, iteration: 185958
loss: 1.026820421218872,grad_norm: 0.8441693299533546, iteration: 185959
loss: 0.9594101309776306,grad_norm: 0.9203622509709387, iteration: 185960
loss: 0.9950272440910339,grad_norm: 0.9605435438896835, iteration: 185961
loss: 0.9779690504074097,grad_norm: 0.859927365059634, iteration: 185962
loss: 1.015447974205017,grad_norm: 0.9999991464878805, iteration: 185963
loss: 1.0174001455307007,grad_norm: 0.9999994460030975, iteration: 185964
loss: 1.0048996210098267,grad_norm: 0.8998531184370684, iteration: 185965
loss: 1.019415259361267,grad_norm: 0.8349579950481028, iteration: 185966
loss: 0.9830243587493896,grad_norm: 0.8594301083018451, iteration: 185967
loss: 1.0032813549041748,grad_norm: 0.8240184711886844, iteration: 185968
loss: 1.0097541809082031,grad_norm: 0.8936708282850633, iteration: 185969
loss: 1.0156371593475342,grad_norm: 0.9642530987361371, iteration: 185970
loss: 0.9803993105888367,grad_norm: 0.8303962470343768, iteration: 185971
loss: 0.9970764517784119,grad_norm: 0.9629035702164562, iteration: 185972
loss: 1.0126720666885376,grad_norm: 0.9136577688768356, iteration: 185973
loss: 1.005183219909668,grad_norm: 0.9999991271493109, iteration: 185974
loss: 0.9776572585105896,grad_norm: 0.8252186456419406, iteration: 185975
loss: 1.037636160850525,grad_norm: 0.9431118059581456, iteration: 185976
loss: 1.0010759830474854,grad_norm: 0.8763305823841627, iteration: 185977
loss: 0.9867895245552063,grad_norm: 0.9923042426253965, iteration: 185978
loss: 1.0182185173034668,grad_norm: 0.9999990003360381, iteration: 185979
loss: 0.9931750297546387,grad_norm: 0.944766164495185, iteration: 185980
loss: 0.9750553369522095,grad_norm: 0.9757084970283737, iteration: 185981
loss: 0.9991739988327026,grad_norm: 0.9245434213642697, iteration: 185982
loss: 0.9842284321784973,grad_norm: 0.9999991344389635, iteration: 185983
loss: 0.9833621978759766,grad_norm: 0.9743880009703685, iteration: 185984
loss: 0.9815234541893005,grad_norm: 0.9999991155832076, iteration: 185985
loss: 1.0201478004455566,grad_norm: 0.968550164230346, iteration: 185986
loss: 0.9729008078575134,grad_norm: 0.8841165896899096, iteration: 185987
loss: 1.0010006427764893,grad_norm: 0.9677210488919015, iteration: 185988
loss: 0.9836104512214661,grad_norm: 0.9446594776296142, iteration: 185989
loss: 1.0163979530334473,grad_norm: 0.9999991431539176, iteration: 185990
loss: 0.9685205817222595,grad_norm: 0.9514616862889717, iteration: 185991
loss: 1.0065683126449585,grad_norm: 0.8851447713613202, iteration: 185992
loss: 1.0206480026245117,grad_norm: 0.9840677092146849, iteration: 185993
loss: 1.010838508605957,grad_norm: 0.9999993480845849, iteration: 185994
loss: 1.0308525562286377,grad_norm: 0.9999990610125755, iteration: 185995
loss: 0.9835934638977051,grad_norm: 0.9979873951384719, iteration: 185996
loss: 0.9896571040153503,grad_norm: 0.9999990778252549, iteration: 185997
loss: 1.0195914506912231,grad_norm: 0.8485071430473177, iteration: 185998
loss: 0.9882764220237732,grad_norm: 0.9999990938078048, iteration: 185999
loss: 0.998161792755127,grad_norm: 0.9999990050338406, iteration: 186000
loss: 0.972007691860199,grad_norm: 0.9999992195240489, iteration: 186001
loss: 1.0326135158538818,grad_norm: 0.8952601743313232, iteration: 186002
loss: 1.031147837638855,grad_norm: 0.8847948540791089, iteration: 186003
loss: 0.9902477264404297,grad_norm: 0.9999992020752412, iteration: 186004
loss: 0.9885391592979431,grad_norm: 0.7273346816883188, iteration: 186005
loss: 0.9808180928230286,grad_norm: 0.9758425226157397, iteration: 186006
loss: 1.0097852945327759,grad_norm: 0.9958638092694844, iteration: 186007
loss: 1.0386724472045898,grad_norm: 0.8654088734427718, iteration: 186008
loss: 0.9930150508880615,grad_norm: 0.999999035431887, iteration: 186009
loss: 1.00156569480896,grad_norm: 0.9999989916084341, iteration: 186010
loss: 0.9845770001411438,grad_norm: 0.9656241643018284, iteration: 186011
loss: 0.9936462640762329,grad_norm: 0.9821631931331539, iteration: 186012
loss: 1.0301865339279175,grad_norm: 0.9999991995507093, iteration: 186013
loss: 0.9819886684417725,grad_norm: 0.999999034497526, iteration: 186014
loss: 0.9742021560668945,grad_norm: 0.7608678099074658, iteration: 186015
loss: 1.003679633140564,grad_norm: 0.957642984643894, iteration: 186016
loss: 1.0436986684799194,grad_norm: 0.9920161186603177, iteration: 186017
loss: 0.9886096119880676,grad_norm: 0.9004572702049987, iteration: 186018
loss: 1.0371135473251343,grad_norm: 0.9829005437997691, iteration: 186019
loss: 0.9990062713623047,grad_norm: 0.8134194716969032, iteration: 186020
loss: 1.008819580078125,grad_norm: 0.8268026558367514, iteration: 186021
loss: 1.1177408695220947,grad_norm: 0.999999931420248, iteration: 186022
loss: 0.9961165189743042,grad_norm: 0.9774399026922886, iteration: 186023
loss: 1.0322554111480713,grad_norm: 0.9999992120831499, iteration: 186024
loss: 1.0182888507843018,grad_norm: 0.9033700283857045, iteration: 186025
loss: 0.9747207760810852,grad_norm: 0.9999990484995368, iteration: 186026
loss: 1.0221329927444458,grad_norm: 0.8441053534320072, iteration: 186027
loss: 0.9832215309143066,grad_norm: 0.999999092125109, iteration: 186028
loss: 1.034635066986084,grad_norm: 0.8886644390780213, iteration: 186029
loss: 1.033596158027649,grad_norm: 0.999998973114417, iteration: 186030
loss: 0.9968019723892212,grad_norm: 0.8527499924389456, iteration: 186031
loss: 0.9687345027923584,grad_norm: 0.980815143406944, iteration: 186032
loss: 1.1222072839736938,grad_norm: 0.9999993103819673, iteration: 186033
loss: 0.9458537697792053,grad_norm: 0.9999992003481399, iteration: 186034
loss: 0.9882535338401794,grad_norm: 0.9999992320238364, iteration: 186035
loss: 0.9928677082061768,grad_norm: 0.9649036838331368, iteration: 186036
loss: 1.022043228149414,grad_norm: 0.855978924256761, iteration: 186037
loss: 1.0094358921051025,grad_norm: 0.9999991272948623, iteration: 186038
loss: 1.0243618488311768,grad_norm: 0.8177202645630424, iteration: 186039
loss: 1.0361199378967285,grad_norm: 0.9839330326347651, iteration: 186040
loss: 0.976978600025177,grad_norm: 0.999999132086177, iteration: 186041
loss: 1.0153956413269043,grad_norm: 0.9999996690989083, iteration: 186042
loss: 0.9969498515129089,grad_norm: 0.9999990914523433, iteration: 186043
loss: 0.9956211447715759,grad_norm: 0.927149466553477, iteration: 186044
loss: 0.97919762134552,grad_norm: 0.9999990858266751, iteration: 186045
loss: 1.0035574436187744,grad_norm: 0.8422342623632458, iteration: 186046
loss: 1.0037283897399902,grad_norm: 0.9999991250775018, iteration: 186047
loss: 1.060368299484253,grad_norm: 0.999999688370025, iteration: 186048
loss: 1.004760980606079,grad_norm: 0.9753650876307852, iteration: 186049
loss: 1.035308599472046,grad_norm: 0.999999869644208, iteration: 186050
loss: 0.9897360801696777,grad_norm: 0.9282677382687489, iteration: 186051
loss: 1.03287935256958,grad_norm: 0.7954549420080866, iteration: 186052
loss: 0.9488826990127563,grad_norm: 0.9823882761829922, iteration: 186053
loss: 1.019876480102539,grad_norm: 0.8835198060126207, iteration: 186054
loss: 1.1074594259262085,grad_norm: 0.9999997882940765, iteration: 186055
loss: 1.0001015663146973,grad_norm: 0.9251228269032404, iteration: 186056
loss: 0.9652881622314453,grad_norm: 0.9733044752446147, iteration: 186057
loss: 1.0107654333114624,grad_norm: 0.8635696563611861, iteration: 186058
loss: 0.979485273361206,grad_norm: 0.9999992202786141, iteration: 186059
loss: 0.9897795915603638,grad_norm: 0.9268440589334984, iteration: 186060
loss: 0.9929900169372559,grad_norm: 0.8550058293466086, iteration: 186061
loss: 0.9764267206192017,grad_norm: 0.9367041010779058, iteration: 186062
loss: 0.998431384563446,grad_norm: 0.9999990175218375, iteration: 186063
loss: 0.9993839859962463,grad_norm: 0.9999989281912299, iteration: 186064
loss: 0.9750049710273743,grad_norm: 0.9786691131243974, iteration: 186065
loss: 0.9950525164604187,grad_norm: 0.940976208785453, iteration: 186066
loss: 1.045332431793213,grad_norm: 0.9999999004189961, iteration: 186067
loss: 1.02043616771698,grad_norm: 0.9999990918097256, iteration: 186068
loss: 1.015746831893921,grad_norm: 0.7937800615518069, iteration: 186069
loss: 1.006636619567871,grad_norm: 0.9999990336945045, iteration: 186070
loss: 1.001874566078186,grad_norm: 0.8839526200448464, iteration: 186071
loss: 0.9813025593757629,grad_norm: 0.9999990689275607, iteration: 186072
loss: 0.9861312508583069,grad_norm: 0.9999990660170227, iteration: 186073
loss: 1.0160406827926636,grad_norm: 0.8425355653834182, iteration: 186074
loss: 0.9834227561950684,grad_norm: 0.9449343412149686, iteration: 186075
loss: 0.9799070954322815,grad_norm: 0.9999990511772532, iteration: 186076
loss: 0.9678352475166321,grad_norm: 0.9378476929012487, iteration: 186077
loss: 1.0259298086166382,grad_norm: 0.8227742291759638, iteration: 186078
loss: 0.9953572154045105,grad_norm: 0.999999051827714, iteration: 186079
loss: 0.9876766800880432,grad_norm: 0.9999990763070432, iteration: 186080
loss: 1.0083541870117188,grad_norm: 0.9999991741018162, iteration: 186081
loss: 1.0120247602462769,grad_norm: 0.8059286435412397, iteration: 186082
loss: 0.9812794327735901,grad_norm: 0.8261062674622864, iteration: 186083
loss: 0.9792202711105347,grad_norm: 0.9429524051610183, iteration: 186084
loss: 1.0448086261749268,grad_norm: 0.9999990007562815, iteration: 186085
loss: 0.9993318319320679,grad_norm: 0.8984112610207268, iteration: 186086
loss: 1.0017399787902832,grad_norm: 0.9999999295573412, iteration: 186087
loss: 0.9549906849861145,grad_norm: 0.9999990273598574, iteration: 186088
loss: 0.9994259476661682,grad_norm: 0.9821983823009699, iteration: 186089
loss: 1.069003701210022,grad_norm: 0.975747692390123, iteration: 186090
loss: 1.0028586387634277,grad_norm: 0.9037730770705473, iteration: 186091
loss: 1.029905080795288,grad_norm: 0.9861851362847905, iteration: 186092
loss: 0.9985228180885315,grad_norm: 0.9843152325142581, iteration: 186093
loss: 0.9826049208641052,grad_norm: 0.917104588989988, iteration: 186094
loss: 0.9622001051902771,grad_norm: 0.9999992059657502, iteration: 186095
loss: 0.9799106121063232,grad_norm: 0.9999991602383328, iteration: 186096
loss: 1.0056922435760498,grad_norm: 0.9585337641534623, iteration: 186097
loss: 1.0021454095840454,grad_norm: 0.8036395224697231, iteration: 186098
loss: 0.987848699092865,grad_norm: 0.9770123360526606, iteration: 186099
loss: 0.9922894835472107,grad_norm: 0.8877340146386413, iteration: 186100
loss: 0.9802627563476562,grad_norm: 0.9999992250985741, iteration: 186101
loss: 0.9881237149238586,grad_norm: 0.9999991059847458, iteration: 186102
loss: 1.018654704093933,grad_norm: 0.967196590914358, iteration: 186103
loss: 0.9792455434799194,grad_norm: 0.9999997967550467, iteration: 186104
loss: 0.9944783449172974,grad_norm: 0.9999991705698799, iteration: 186105
loss: 1.0157381296157837,grad_norm: 0.9999991573185518, iteration: 186106
loss: 1.0563896894454956,grad_norm: 0.9999998161349395, iteration: 186107
loss: 1.017914891242981,grad_norm: 0.92659323509675, iteration: 186108
loss: 0.9737315773963928,grad_norm: 0.9231114440734457, iteration: 186109
loss: 0.9573073983192444,grad_norm: 0.8827955099198217, iteration: 186110
loss: 0.9706698060035706,grad_norm: 0.9999991723730624, iteration: 186111
loss: 1.0568647384643555,grad_norm: 0.9999993777942798, iteration: 186112
loss: 0.9856148362159729,grad_norm: 0.9472752145647706, iteration: 186113
loss: 1.0096805095672607,grad_norm: 0.871623824472821, iteration: 186114
loss: 0.9518802762031555,grad_norm: 0.7816746315221217, iteration: 186115
loss: 1.0202146768569946,grad_norm: 0.8587590277143591, iteration: 186116
loss: 0.9905945062637329,grad_norm: 0.81495081608123, iteration: 186117
loss: 1.0186011791229248,grad_norm: 0.9999991459850146, iteration: 186118
loss: 1.0080939531326294,grad_norm: 0.9999989748130128, iteration: 186119
loss: 1.0091570615768433,grad_norm: 0.9519539799527427, iteration: 186120
loss: 1.0886520147323608,grad_norm: 0.9999993021065908, iteration: 186121
loss: 0.9674608111381531,grad_norm: 0.9999991628516045, iteration: 186122
loss: 1.1590386629104614,grad_norm: 0.9999991188330477, iteration: 186123
loss: 1.018667459487915,grad_norm: 0.9238053758674937, iteration: 186124
loss: 1.0255736112594604,grad_norm: 0.9199715949137622, iteration: 186125
loss: 0.9995007514953613,grad_norm: 0.9999999105639388, iteration: 186126
loss: 0.9815611839294434,grad_norm: 0.9999990091089339, iteration: 186127
loss: 0.996863067150116,grad_norm: 0.9999990402076087, iteration: 186128
loss: 0.992919385433197,grad_norm: 0.9999991390506766, iteration: 186129
loss: 1.0433365106582642,grad_norm: 0.9999996915424982, iteration: 186130
loss: 0.9962199330329895,grad_norm: 0.9172133633318901, iteration: 186131
loss: 1.0274478197097778,grad_norm: 0.9999991161010533, iteration: 186132
loss: 1.046186923980713,grad_norm: 0.9999991547859717, iteration: 186133
loss: 1.0162612199783325,grad_norm: 0.9363710105840445, iteration: 186134
loss: 1.0301014184951782,grad_norm: 0.9999992001504824, iteration: 186135
loss: 1.0073153972625732,grad_norm: 0.999999174242338, iteration: 186136
loss: 1.0150582790374756,grad_norm: 0.8896310245008716, iteration: 186137
loss: 0.9893413782119751,grad_norm: 0.8980383068434205, iteration: 186138
loss: 1.13303542137146,grad_norm: 0.999999875574337, iteration: 186139
loss: 0.9819347262382507,grad_norm: 0.8535956628136442, iteration: 186140
loss: 0.9596492648124695,grad_norm: 0.8671560173387003, iteration: 186141
loss: 0.9739813804626465,grad_norm: 0.9999991025974619, iteration: 186142
loss: 1.0583950281143188,grad_norm: 0.8878161825673627, iteration: 186143
loss: 0.9683696031570435,grad_norm: 0.8859311889990635, iteration: 186144
loss: 1.023730993270874,grad_norm: 0.9829817583744317, iteration: 186145
loss: 1.0085985660552979,grad_norm: 0.9594904150857527, iteration: 186146
loss: 0.9730872511863708,grad_norm: 0.8860581799176359, iteration: 186147
loss: 0.9909207224845886,grad_norm: 0.999999917238041, iteration: 186148
loss: 1.0101680755615234,grad_norm: 0.942284070284423, iteration: 186149
loss: 1.0310174226760864,grad_norm: 0.7716435407718809, iteration: 186150
loss: 1.027645230293274,grad_norm: 0.9999990003097493, iteration: 186151
loss: 0.9595004916191101,grad_norm: 0.9952074767289182, iteration: 186152
loss: 1.0169928073883057,grad_norm: 0.9999991828345084, iteration: 186153
loss: 1.0103678703308105,grad_norm: 0.9999991720006435, iteration: 186154
loss: 1.1215626001358032,grad_norm: 0.9999994057179484, iteration: 186155
loss: 0.9690200686454773,grad_norm: 0.9574421990345146, iteration: 186156
loss: 1.0591473579406738,grad_norm: 0.9999995508715656, iteration: 186157
loss: 1.124699592590332,grad_norm: 0.9999990897113694, iteration: 186158
loss: 1.090137004852295,grad_norm: 0.9999991824426188, iteration: 186159
loss: 1.0895551443099976,grad_norm: 0.9999992316217416, iteration: 186160
loss: 0.992020845413208,grad_norm: 0.883846984683151, iteration: 186161
loss: 1.0288296937942505,grad_norm: 0.9999993014915122, iteration: 186162
loss: 0.9761646389961243,grad_norm: 0.9999991761025921, iteration: 186163
loss: 1.0041463375091553,grad_norm: 0.8745203309927252, iteration: 186164
loss: 1.0055407285690308,grad_norm: 0.9999993020986041, iteration: 186165
loss: 1.0612602233886719,grad_norm: 0.9999995331542652, iteration: 186166
loss: 0.9864757061004639,grad_norm: 0.9281937218527245, iteration: 186167
loss: 1.0149083137512207,grad_norm: 0.9999995677906202, iteration: 186168
loss: 0.9874957203865051,grad_norm: 0.8567567029461723, iteration: 186169
loss: 1.03505277633667,grad_norm: 0.9282648266238045, iteration: 186170
loss: 1.1007083654403687,grad_norm: 0.9999999229751605, iteration: 186171
loss: 0.9608287215232849,grad_norm: 0.9639206466353235, iteration: 186172
loss: 0.983677089214325,grad_norm: 0.999999121525656, iteration: 186173
loss: 1.2701447010040283,grad_norm: 0.9999996515039059, iteration: 186174
loss: 1.0255564451217651,grad_norm: 0.9396580930732131, iteration: 186175
loss: 0.9822078943252563,grad_norm: 0.9681106515060622, iteration: 186176
loss: 1.026313304901123,grad_norm: 0.9999990111955895, iteration: 186177
loss: 0.9734092354774475,grad_norm: 0.9857322561771666, iteration: 186178
loss: 1.102632761001587,grad_norm: 0.9999996014400547, iteration: 186179
loss: 0.989905595779419,grad_norm: 0.8618114384857245, iteration: 186180
loss: 0.9711329936981201,grad_norm: 0.9999990936801149, iteration: 186181
loss: 1.0810482501983643,grad_norm: 0.9999993047774117, iteration: 186182
loss: 1.316978931427002,grad_norm: 0.9999997469059195, iteration: 186183
loss: 0.9924304485321045,grad_norm: 0.99999916775462, iteration: 186184
loss: 1.0019299983978271,grad_norm: 0.8133301167300029, iteration: 186185
loss: 1.0035065412521362,grad_norm: 0.8056986413070654, iteration: 186186
loss: 1.0084294080734253,grad_norm: 0.9999991441005791, iteration: 186187
loss: 0.9512239694595337,grad_norm: 0.8729182683583057, iteration: 186188
loss: 0.9943477511405945,grad_norm: 0.9999991482245119, iteration: 186189
loss: 1.0572738647460938,grad_norm: 0.9999991434391924, iteration: 186190
loss: 0.9977586269378662,grad_norm: 0.9999991345558787, iteration: 186191
loss: 1.0523440837860107,grad_norm: 0.9999990240909731, iteration: 186192
loss: 1.0397588014602661,grad_norm: 0.9999994125431848, iteration: 186193
loss: 0.9957590103149414,grad_norm: 0.9585883037335663, iteration: 186194
loss: 1.0086065530776978,grad_norm: 0.999999114804507, iteration: 186195
loss: 1.0787400007247925,grad_norm: 0.9999991511922081, iteration: 186196
loss: 1.0000190734863281,grad_norm: 0.9999990451712513, iteration: 186197
loss: 1.011407494544983,grad_norm: 0.9371033949963856, iteration: 186198
loss: 1.0140671730041504,grad_norm: 0.9281333847315592, iteration: 186199
loss: 1.0516973733901978,grad_norm: 0.9692156847288613, iteration: 186200
loss: 1.0139737129211426,grad_norm: 0.9651987616787701, iteration: 186201
loss: 1.004843831062317,grad_norm: 0.8857844472616418, iteration: 186202
loss: 1.002084732055664,grad_norm: 0.9999997628842165, iteration: 186203
loss: 1.0925750732421875,grad_norm: 0.9999992542775203, iteration: 186204
loss: 1.0705926418304443,grad_norm: 0.9860916990878119, iteration: 186205
loss: 1.0647753477096558,grad_norm: 0.9693218092941707, iteration: 186206
loss: 1.0402847528457642,grad_norm: 0.9999997108084999, iteration: 186207
loss: 1.1797561645507812,grad_norm: 0.9999993517298474, iteration: 186208
loss: 1.016935110092163,grad_norm: 0.9999992152078345, iteration: 186209
loss: 0.9998745918273926,grad_norm: 0.9003378516590724, iteration: 186210
loss: 1.0774832963943481,grad_norm: 0.9999999125570543, iteration: 186211
loss: 0.9826563596725464,grad_norm: 0.8697054316729358, iteration: 186212
loss: 0.9891840815544128,grad_norm: 0.9999992350524017, iteration: 186213
loss: 1.0363562107086182,grad_norm: 0.8679885562122553, iteration: 186214
loss: 1.0393463373184204,grad_norm: 0.9747766642950811, iteration: 186215
loss: 0.9889824986457825,grad_norm: 0.9999992284760548, iteration: 186216
loss: 1.0122873783111572,grad_norm: 0.9856820937740542, iteration: 186217
loss: 1.0620218515396118,grad_norm: 0.99999912070649, iteration: 186218
loss: 0.9372180700302124,grad_norm: 0.9845530235460764, iteration: 186219
loss: 1.0715343952178955,grad_norm: 0.9997996623293173, iteration: 186220
loss: 0.9972167015075684,grad_norm: 0.9999991978333213, iteration: 186221
loss: 1.042171597480774,grad_norm: 0.999999932100125, iteration: 186222
loss: 1.0091513395309448,grad_norm: 0.9999994293925938, iteration: 186223
loss: 0.9927331805229187,grad_norm: 0.9851410409283057, iteration: 186224
loss: 1.1679375171661377,grad_norm: 0.9999998656574857, iteration: 186225
loss: 1.0355041027069092,grad_norm: 0.9999991506289975, iteration: 186226
loss: 0.993998110294342,grad_norm: 0.8894514935441141, iteration: 186227
loss: 1.0127366781234741,grad_norm: 0.9999991335399022, iteration: 186228
loss: 1.0564385652542114,grad_norm: 0.9999991899023452, iteration: 186229
loss: 1.027544379234314,grad_norm: 0.9999993305114693, iteration: 186230
loss: 0.9742226600646973,grad_norm: 0.8849412435714112, iteration: 186231
loss: 1.030057668685913,grad_norm: 0.8882959330297051, iteration: 186232
loss: 0.9803935289382935,grad_norm: 0.8453129115504304, iteration: 186233
loss: 1.0033642053604126,grad_norm: 0.9364829018150185, iteration: 186234
loss: 1.0070842504501343,grad_norm: 0.9999992491598457, iteration: 186235
loss: 1.0355899333953857,grad_norm: 0.9999990063450951, iteration: 186236
loss: 0.9960435628890991,grad_norm: 0.8481944008195185, iteration: 186237
loss: 1.073241114616394,grad_norm: 0.9589796061921844, iteration: 186238
loss: 0.9852214455604553,grad_norm: 0.9198619310533902, iteration: 186239
loss: 1.0475664138793945,grad_norm: 0.9999991950604356, iteration: 186240
loss: 1.0899134874343872,grad_norm: 0.999999270105418, iteration: 186241
loss: 1.0071220397949219,grad_norm: 0.9999992812004798, iteration: 186242
loss: 0.9895346760749817,grad_norm: 0.9066590690617835, iteration: 186243
loss: 1.0079561471939087,grad_norm: 0.9999990433750763, iteration: 186244
loss: 0.9622009992599487,grad_norm: 0.9492557591777985, iteration: 186245
loss: 0.9905479550361633,grad_norm: 0.9999991651677612, iteration: 186246
loss: 0.9947038888931274,grad_norm: 0.9999999509601495, iteration: 186247
loss: 1.0384633541107178,grad_norm: 0.9999998844599081, iteration: 186248
loss: 1.0087403059005737,grad_norm: 0.9530149547705834, iteration: 186249
loss: 1.0280681848526,grad_norm: 0.9999991387841886, iteration: 186250
loss: 1.0308538675308228,grad_norm: 0.9999990583283085, iteration: 186251
loss: 1.0126603841781616,grad_norm: 0.9356527362336229, iteration: 186252
loss: 0.9784818887710571,grad_norm: 0.9999991052542164, iteration: 186253
loss: 1.0166229009628296,grad_norm: 0.8731805039107495, iteration: 186254
loss: 0.9947537779808044,grad_norm: 0.9092255967844725, iteration: 186255
loss: 0.9745696783065796,grad_norm: 0.9596485636350756, iteration: 186256
loss: 1.0238767862319946,grad_norm: 0.9184547733690818, iteration: 186257
loss: 1.0008466243743896,grad_norm: 0.9832556355770483, iteration: 186258
loss: 1.0096325874328613,grad_norm: 0.9999991837793653, iteration: 186259
loss: 1.025176763534546,grad_norm: 0.9679831281674506, iteration: 186260
loss: 1.0519856214523315,grad_norm: 0.9999989357883491, iteration: 186261
loss: 0.9888859987258911,grad_norm: 0.9088109215176636, iteration: 186262
loss: 1.0110305547714233,grad_norm: 0.9092169326350747, iteration: 186263
loss: 0.9542284607887268,grad_norm: 0.9999991607567905, iteration: 186264
loss: 1.0038695335388184,grad_norm: 0.9154464370693263, iteration: 186265
loss: 0.9777920842170715,grad_norm: 0.9462474995304745, iteration: 186266
loss: 1.0108654499053955,grad_norm: 0.9999991818818339, iteration: 186267
loss: 1.0003442764282227,grad_norm: 0.9665205704945754, iteration: 186268
loss: 1.0262727737426758,grad_norm: 0.9999990649138009, iteration: 186269
loss: 1.0816336870193481,grad_norm: 0.9999993269806643, iteration: 186270
loss: 0.9734837412834167,grad_norm: 0.9937140319565024, iteration: 186271
loss: 1.0389404296875,grad_norm: 0.8363540584429732, iteration: 186272
loss: 1.017374873161316,grad_norm: 0.9999992317072467, iteration: 186273
loss: 1.1383432149887085,grad_norm: 0.9999993320695978, iteration: 186274
loss: 0.9648447632789612,grad_norm: 0.8928667322596343, iteration: 186275
loss: 1.006245493888855,grad_norm: 0.9349708045933581, iteration: 186276
loss: 1.0170187950134277,grad_norm: 0.921970452646015, iteration: 186277
loss: 1.0030981302261353,grad_norm: 0.9130990883552347, iteration: 186278
loss: 1.0091428756713867,grad_norm: 0.945102840472886, iteration: 186279
loss: 1.0630080699920654,grad_norm: 0.9999990915435816, iteration: 186280
loss: 1.0151872634887695,grad_norm: 0.9999990518287194, iteration: 186281
loss: 1.0687144994735718,grad_norm: 0.9999992476769063, iteration: 186282
loss: 1.0177193880081177,grad_norm: 0.8215256115474658, iteration: 186283
loss: 0.9515736699104309,grad_norm: 0.8925518894118498, iteration: 186284
loss: 0.9747046828269958,grad_norm: 0.9177999060017308, iteration: 186285
loss: 1.0681357383728027,grad_norm: 0.9883153797877983, iteration: 186286
loss: 1.0569372177124023,grad_norm: 0.9999990721983529, iteration: 186287
loss: 0.9994937777519226,grad_norm: 0.9599457345125494, iteration: 186288
loss: 1.0710949897766113,grad_norm: 0.9999989994489759, iteration: 186289
loss: 1.0080302953720093,grad_norm: 0.9999990714707498, iteration: 186290
loss: 1.007555365562439,grad_norm: 0.8496247282305268, iteration: 186291
loss: 1.0130412578582764,grad_norm: 0.9999990310430795, iteration: 186292
loss: 1.0365591049194336,grad_norm: 0.9244361235370773, iteration: 186293
loss: 0.9664545655250549,grad_norm: 0.9678769935281376, iteration: 186294
loss: 1.0136446952819824,grad_norm: 0.9999992789239153, iteration: 186295
loss: 0.9541571736335754,grad_norm: 0.9348548259226771, iteration: 186296
loss: 0.9988419413566589,grad_norm: 0.9999992314340942, iteration: 186297
loss: 0.9915741682052612,grad_norm: 0.9999991534413899, iteration: 186298
loss: 0.9813103675842285,grad_norm: 0.9999990737734535, iteration: 186299
loss: 1.0871853828430176,grad_norm: 0.9999991570466255, iteration: 186300
loss: 1.0173542499542236,grad_norm: 0.8304718849008914, iteration: 186301
loss: 1.009403109550476,grad_norm: 0.993112691930126, iteration: 186302
loss: 1.0059610605239868,grad_norm: 0.9999998758556117, iteration: 186303
loss: 1.0395638942718506,grad_norm: 0.9999991591691441, iteration: 186304
loss: 1.009675145149231,grad_norm: 1.0000000473258726, iteration: 186305
loss: 0.9951927661895752,grad_norm: 0.9045840544090311, iteration: 186306
loss: 0.9921846985816956,grad_norm: 0.9999991154607951, iteration: 186307
loss: 1.074852705001831,grad_norm: 0.9999996140118085, iteration: 186308
loss: 1.0238369703292847,grad_norm: 0.983841052219722, iteration: 186309
loss: 0.983259916305542,grad_norm: 0.8520630610213464, iteration: 186310
loss: 1.0323915481567383,grad_norm: 0.999999356715173, iteration: 186311
loss: 0.962330162525177,grad_norm: 0.919835956405299, iteration: 186312
loss: 0.9555651545524597,grad_norm: 0.8539824746939031, iteration: 186313
loss: 0.9844730496406555,grad_norm: 0.9783983875810739, iteration: 186314
loss: 1.0403087139129639,grad_norm: 0.9999992590424656, iteration: 186315
loss: 0.9804983139038086,grad_norm: 0.815307210709368, iteration: 186316
loss: 1.0230165719985962,grad_norm: 0.999999121326609, iteration: 186317
loss: 1.0268089771270752,grad_norm: 0.9999997522040481, iteration: 186318
loss: 0.9680818319320679,grad_norm: 0.9438773426484051, iteration: 186319
loss: 1.0744901895523071,grad_norm: 0.9999998352616349, iteration: 186320
loss: 1.004127860069275,grad_norm: 0.9999992684257093, iteration: 186321
loss: 1.0326528549194336,grad_norm: 0.9999991710130683, iteration: 186322
loss: 0.9852392077445984,grad_norm: 0.9999993212533744, iteration: 186323
loss: 1.0916293859481812,grad_norm: 0.9999992098853743, iteration: 186324
loss: 1.026278018951416,grad_norm: 0.9437171301305852, iteration: 186325
loss: 0.9789588451385498,grad_norm: 0.9999990428436354, iteration: 186326
loss: 0.966123640537262,grad_norm: 0.9999989996843537, iteration: 186327
loss: 0.9567016363143921,grad_norm: 0.9999992005052688, iteration: 186328
loss: 1.0958366394042969,grad_norm: 0.9999999599211242, iteration: 186329
loss: 0.9968684315681458,grad_norm: 0.9999991267261107, iteration: 186330
loss: 0.9939176440238953,grad_norm: 0.8925290100907481, iteration: 186331
loss: 0.9899557828903198,grad_norm: 0.999998956092437, iteration: 186332
loss: 0.9874037504196167,grad_norm: 0.9999989934699686, iteration: 186333
loss: 1.0617271661758423,grad_norm: 0.9999990592695803, iteration: 186334
loss: 0.9801413416862488,grad_norm: 0.9999991068607188, iteration: 186335
loss: 1.0497610569000244,grad_norm: 0.8992286120667421, iteration: 186336
loss: 1.0024125576019287,grad_norm: 0.999999132653244, iteration: 186337
loss: 0.9980131983757019,grad_norm: 0.9999991944621568, iteration: 186338
loss: 0.948668897151947,grad_norm: 0.999999377794286, iteration: 186339
loss: 1.0352528095245361,grad_norm: 0.84985818673913, iteration: 186340
loss: 1.0355420112609863,grad_norm: 0.9334002415519401, iteration: 186341
loss: 1.0030661821365356,grad_norm: 0.9773446394750218, iteration: 186342
loss: 1.0080078840255737,grad_norm: 0.9999989491497897, iteration: 186343
loss: 0.9806195497512817,grad_norm: 0.9999991267760396, iteration: 186344
loss: 1.0380057096481323,grad_norm: 0.9999990663998218, iteration: 186345
loss: 1.015683889389038,grad_norm: 0.9999992899161756, iteration: 186346
loss: 1.0222728252410889,grad_norm: 0.9999998499725551, iteration: 186347
loss: 1.020378828048706,grad_norm: 0.9271364295285901, iteration: 186348
loss: 0.991080641746521,grad_norm: 0.9999992022725825, iteration: 186349
loss: 1.036620020866394,grad_norm: 0.9999992702572944, iteration: 186350
loss: 1.0251271724700928,grad_norm: 0.9999993293849345, iteration: 186351
loss: 0.9956833124160767,grad_norm: 0.9822780125670044, iteration: 186352
loss: 1.068403720855713,grad_norm: 0.9999995127572632, iteration: 186353
loss: 1.029698133468628,grad_norm: 0.9999992364302233, iteration: 186354
loss: 1.0409965515136719,grad_norm: 0.9999990826638789, iteration: 186355
loss: 0.9889379143714905,grad_norm: 0.9989158562741911, iteration: 186356
loss: 1.0481563806533813,grad_norm: 0.8587256448844515, iteration: 186357
loss: 1.013426661491394,grad_norm: 0.9131376260473896, iteration: 186358
loss: 1.0590654611587524,grad_norm: 0.9999993875014385, iteration: 186359
loss: 0.9521433711051941,grad_norm: 0.9999992056221685, iteration: 186360
loss: 1.0435675382614136,grad_norm: 0.9999994941908906, iteration: 186361
loss: 1.010619878768921,grad_norm: 0.9999991326456273, iteration: 186362
loss: 1.008522868156433,grad_norm: 0.9971857991705065, iteration: 186363
loss: 1.0051871538162231,grad_norm: 0.999999779443082, iteration: 186364
loss: 1.0191508531570435,grad_norm: 0.905318084421712, iteration: 186365
loss: 0.9893301725387573,grad_norm: 0.9999990954771847, iteration: 186366
loss: 1.0019932985305786,grad_norm: 0.88536783137573, iteration: 186367
loss: 1.001931071281433,grad_norm: 0.9999989871061906, iteration: 186368
loss: 1.025111436843872,grad_norm: 0.999999250335989, iteration: 186369
loss: 1.0548818111419678,grad_norm: 0.9999996466555532, iteration: 186370
loss: 1.0154430866241455,grad_norm: 0.8613110480204802, iteration: 186371
loss: 0.9904578924179077,grad_norm: 0.9237185162135556, iteration: 186372
loss: 0.972194254398346,grad_norm: 0.9999990664542792, iteration: 186373
loss: 1.085532784461975,grad_norm: 0.9999993586678771, iteration: 186374
loss: 0.9748808741569519,grad_norm: 0.8770281025443566, iteration: 186375
loss: 1.010264277458191,grad_norm: 0.9999989590928375, iteration: 186376
loss: 1.1354566812515259,grad_norm: 0.9999996164042783, iteration: 186377
loss: 1.0049710273742676,grad_norm: 0.999999288438816, iteration: 186378
loss: 1.026878833770752,grad_norm: 0.9999994205199675, iteration: 186379
loss: 1.0293039083480835,grad_norm: 0.999999175147005, iteration: 186380
loss: 1.0308659076690674,grad_norm: 0.8579301032621536, iteration: 186381
loss: 1.030134677886963,grad_norm: 0.9999991139894182, iteration: 186382
loss: 1.0214473009109497,grad_norm: 0.9999992011228578, iteration: 186383
loss: 0.9850733280181885,grad_norm: 0.9169888795741308, iteration: 186384
loss: 1.0070102214813232,grad_norm: 0.9999989857031332, iteration: 186385
loss: 0.9939150214195251,grad_norm: 0.9999991983869916, iteration: 186386
loss: 0.9923483729362488,grad_norm: 0.9999989797703618, iteration: 186387
loss: 1.0110331773757935,grad_norm: 0.9999991063641311, iteration: 186388
loss: 1.0396077632904053,grad_norm: 0.999999462233447, iteration: 186389
loss: 1.0797897577285767,grad_norm: 0.9999992704509273, iteration: 186390
loss: 1.0703346729278564,grad_norm: 0.9999995037729748, iteration: 186391
loss: 1.0140460729599,grad_norm: 0.9201792861373013, iteration: 186392
loss: 0.9676247835159302,grad_norm: 0.9676837051590169, iteration: 186393
loss: 1.0234812498092651,grad_norm: 0.9999990823535859, iteration: 186394
loss: 0.9780805110931396,grad_norm: 0.983363323064017, iteration: 186395
loss: 1.0400820970535278,grad_norm: 0.9999994142516256, iteration: 186396
loss: 1.0029207468032837,grad_norm: 0.9999991539779072, iteration: 186397
loss: 0.9840593934059143,grad_norm: 0.9533777015417287, iteration: 186398
loss: 0.9808147549629211,grad_norm: 0.9626507309683914, iteration: 186399
loss: 1.0792124271392822,grad_norm: 0.9999994846880591, iteration: 186400
loss: 0.9897807836532593,grad_norm: 0.9999990717264468, iteration: 186401
loss: 1.0155757665634155,grad_norm: 0.9999991773120308, iteration: 186402
loss: 1.0437722206115723,grad_norm: 0.9107163327565694, iteration: 186403
loss: 1.0020266771316528,grad_norm: 0.9999992445693786, iteration: 186404
loss: 1.0007939338684082,grad_norm: 0.9104014118771279, iteration: 186405
loss: 0.9887118935585022,grad_norm: 0.9999990997423003, iteration: 186406
loss: 1.011818289756775,grad_norm: 0.9342732006799405, iteration: 186407
loss: 1.055498480796814,grad_norm: 0.9999999234493089, iteration: 186408
loss: 1.015423059463501,grad_norm: 0.9999989954263561, iteration: 186409
loss: 1.0152101516723633,grad_norm: 0.9999992231220101, iteration: 186410
loss: 1.0148143768310547,grad_norm: 0.9941475588238594, iteration: 186411
loss: 1.0190067291259766,grad_norm: 0.9921019192495429, iteration: 186412
loss: 1.0177115201950073,grad_norm: 0.9999992768428568, iteration: 186413
loss: 0.993204653263092,grad_norm: 0.9883442295531659, iteration: 186414
loss: 1.0460443496704102,grad_norm: 0.923519210677528, iteration: 186415
loss: 0.9771483540534973,grad_norm: 0.9999991129512411, iteration: 186416
loss: 1.0092811584472656,grad_norm: 0.9999992397684241, iteration: 186417
loss: 0.9775543212890625,grad_norm: 0.950684607893695, iteration: 186418
loss: 1.0307222604751587,grad_norm: 0.9999990161168356, iteration: 186419
loss: 0.9710595607757568,grad_norm: 0.9674193819471719, iteration: 186420
loss: 1.027518630027771,grad_norm: 0.856729356178886, iteration: 186421
loss: 0.987935483455658,grad_norm: 0.9999993263027053, iteration: 186422
loss: 1.0187073945999146,grad_norm: 0.9999993156831755, iteration: 186423
loss: 0.9657295942306519,grad_norm: 0.8644827723657948, iteration: 186424
loss: 0.9942046999931335,grad_norm: 0.9999991364605338, iteration: 186425
loss: 0.9820400476455688,grad_norm: 0.9999991558739467, iteration: 186426
loss: 1.0001826286315918,grad_norm: 0.9999991954822478, iteration: 186427
loss: 1.0022790431976318,grad_norm: 0.999999085307071, iteration: 186428
loss: 0.9750555157661438,grad_norm: 0.9999992699639532, iteration: 186429
loss: 0.9959986805915833,grad_norm: 0.9999991195913951, iteration: 186430
loss: 1.0475059747695923,grad_norm: 0.999999044546074, iteration: 186431
loss: 1.0093235969543457,grad_norm: 0.9999990388327472, iteration: 186432
loss: 1.0485320091247559,grad_norm: 0.9364541596315629, iteration: 186433
loss: 1.03387451171875,grad_norm: 0.9999992583548309, iteration: 186434
loss: 0.9842211604118347,grad_norm: 0.83739132386146, iteration: 186435
loss: 1.0142103433609009,grad_norm: 0.99088742711323, iteration: 186436
loss: 0.9754320979118347,grad_norm: 0.9180619426772362, iteration: 186437
loss: 1.0252786874771118,grad_norm: 0.8938730275217659, iteration: 186438
loss: 1.002172589302063,grad_norm: 0.9999991759393752, iteration: 186439
loss: 0.9744074940681458,grad_norm: 0.934639956545027, iteration: 186440
loss: 0.9890978336334229,grad_norm: 0.9164469500790917, iteration: 186441
loss: 1.0061943531036377,grad_norm: 0.9999990343423671, iteration: 186442
loss: 0.997382402420044,grad_norm: 0.9215333753077857, iteration: 186443
loss: 1.0043153762817383,grad_norm: 0.9999990319215708, iteration: 186444
loss: 1.015916347503662,grad_norm: 0.879946165086215, iteration: 186445
loss: 1.010813593864441,grad_norm: 0.999999017151976, iteration: 186446
loss: 0.9777414202690125,grad_norm: 0.9999991108905486, iteration: 186447
loss: 1.0698447227478027,grad_norm: 0.9999994456211069, iteration: 186448
loss: 0.992420494556427,grad_norm: 0.9999992639240521, iteration: 186449
loss: 0.9739830493927002,grad_norm: 0.9985136270258274, iteration: 186450
loss: 1.0214593410491943,grad_norm: 0.999999768602644, iteration: 186451
loss: 0.9871558547019958,grad_norm: 0.8840846074596411, iteration: 186452
loss: 1.0754101276397705,grad_norm: 0.999999130478771, iteration: 186453
loss: 0.9949681162834167,grad_norm: 0.922055558129617, iteration: 186454
loss: 0.9939496517181396,grad_norm: 0.9620495100807052, iteration: 186455
loss: 1.0038580894470215,grad_norm: 0.9999992038027191, iteration: 186456
loss: 0.9734798669815063,grad_norm: 0.9999989391145112, iteration: 186457
loss: 0.9580345749855042,grad_norm: 0.9717031271448145, iteration: 186458
loss: 0.96175217628479,grad_norm: 0.9735758271011842, iteration: 186459
loss: 0.9563860297203064,grad_norm: 0.8704548580343965, iteration: 186460
loss: 1.0574196577072144,grad_norm: 0.999999190761775, iteration: 186461
loss: 0.9899892807006836,grad_norm: 0.9218913839896564, iteration: 186462
loss: 1.076859712600708,grad_norm: 0.9999995719397433, iteration: 186463
loss: 1.0019367933273315,grad_norm: 0.976674431211728, iteration: 186464
loss: 1.1507898569107056,grad_norm: 0.9999998994132264, iteration: 186465
loss: 1.0114355087280273,grad_norm: 0.9040364361574641, iteration: 186466
loss: 1.193729043006897,grad_norm: 0.9999997988954277, iteration: 186467
loss: 1.1184176206588745,grad_norm: 0.9999996307627542, iteration: 186468
loss: 1.0652145147323608,grad_norm: 0.9999990843593789, iteration: 186469
loss: 1.016161322593689,grad_norm: 0.9999992258010194, iteration: 186470
loss: 1.144442081451416,grad_norm: 0.9999993595609914, iteration: 186471
loss: 0.9592761993408203,grad_norm: 0.9787196614820225, iteration: 186472
loss: 1.0210542678833008,grad_norm: 0.9535479833219954, iteration: 186473
loss: 1.0088868141174316,grad_norm: 0.9655106706526964, iteration: 186474
loss: 1.015431523323059,grad_norm: 0.9828966832151487, iteration: 186475
loss: 0.9921315312385559,grad_norm: 0.999999044040539, iteration: 186476
loss: 0.9877482056617737,grad_norm: 0.914903977331827, iteration: 186477
loss: 1.0719776153564453,grad_norm: 1.0000000127646118, iteration: 186478
loss: 1.0176608562469482,grad_norm: 0.9999994363218093, iteration: 186479
loss: 0.9909830093383789,grad_norm: 0.9648836250151085, iteration: 186480
loss: 1.016814112663269,grad_norm: 0.9999992220149723, iteration: 186481
loss: 0.9963177442550659,grad_norm: 0.9999990769327133, iteration: 186482
loss: 0.9827458262443542,grad_norm: 0.9999991339739963, iteration: 186483
loss: 0.9824051260948181,grad_norm: 0.9089006105310816, iteration: 186484
loss: 1.0705296993255615,grad_norm: 0.9999995369775432, iteration: 186485
loss: 0.9465611577033997,grad_norm: 0.9999991937399496, iteration: 186486
loss: 0.9925714731216431,grad_norm: 0.9999991760998769, iteration: 186487
loss: 0.9871456027030945,grad_norm: 0.9999992670203843, iteration: 186488
loss: 0.9678410887718201,grad_norm: 0.999999204513545, iteration: 186489
loss: 1.011692762374878,grad_norm: 0.8619304381092103, iteration: 186490
loss: 1.0421696901321411,grad_norm: 0.9999990468839693, iteration: 186491
loss: 1.048586368560791,grad_norm: 0.9999993195976611, iteration: 186492
loss: 0.9962810277938843,grad_norm: 0.8988214152779563, iteration: 186493
loss: 0.982466459274292,grad_norm: 0.8920012522650614, iteration: 186494
loss: 0.9870902299880981,grad_norm: 0.9999991902626272, iteration: 186495
loss: 1.0255013704299927,grad_norm: 0.8045623273024738, iteration: 186496
loss: 1.0032931566238403,grad_norm: 0.9999994275567581, iteration: 186497
loss: 0.9728882908821106,grad_norm: 0.8816009743442438, iteration: 186498
loss: 1.0106074810028076,grad_norm: 0.9254251324725239, iteration: 186499
loss: 1.0020396709442139,grad_norm: 0.9999991260929028, iteration: 186500
loss: 0.9784923195838928,grad_norm: 0.9999993071838015, iteration: 186501
loss: 1.0503038167953491,grad_norm: 0.9999998326422165, iteration: 186502
loss: 0.9816057682037354,grad_norm: 0.8738003848899834, iteration: 186503
loss: 1.0054080486297607,grad_norm: 0.888344375696692, iteration: 186504
loss: 1.0108153820037842,grad_norm: 0.9684073056682355, iteration: 186505
loss: 1.0125644207000732,grad_norm: 0.9999999236751984, iteration: 186506
loss: 0.9986521005630493,grad_norm: 0.874354756176319, iteration: 186507
loss: 0.9870617389678955,grad_norm: 0.9053561367354298, iteration: 186508
loss: 0.9887630343437195,grad_norm: 0.9999990169520134, iteration: 186509
loss: 0.9934237599372864,grad_norm: 0.9999990422639675, iteration: 186510
loss: 0.9883902668952942,grad_norm: 0.999999172794553, iteration: 186511
loss: 1.0054652690887451,grad_norm: 0.8674169137894382, iteration: 186512
loss: 1.018528699874878,grad_norm: 0.9135929960919075, iteration: 186513
loss: 0.9834980368614197,grad_norm: 0.9791527904011231, iteration: 186514
loss: 1.0187180042266846,grad_norm: 0.9279161330170709, iteration: 186515
loss: 0.9746328592300415,grad_norm: 0.9458211366575984, iteration: 186516
loss: 0.9835342168807983,grad_norm: 0.9443728777145983, iteration: 186517
loss: 0.9795958995819092,grad_norm: 0.8102260215893935, iteration: 186518
loss: 0.9998416304588318,grad_norm: 0.999999506936703, iteration: 186519
loss: 1.0594474077224731,grad_norm: 0.9999997921174409, iteration: 186520
loss: 0.9875369668006897,grad_norm: 0.8972807166154905, iteration: 186521
loss: 0.982471227645874,grad_norm: 0.9558335324029774, iteration: 186522
loss: 1.0038450956344604,grad_norm: 0.9999991256015766, iteration: 186523
loss: 1.0125813484191895,grad_norm: 0.9212198050505721, iteration: 186524
loss: 0.9952806234359741,grad_norm: 0.9999991194972481, iteration: 186525
loss: 0.9651238322257996,grad_norm: 0.9293765653589725, iteration: 186526
loss: 0.9925488829612732,grad_norm: 0.8697695440951614, iteration: 186527
loss: 1.0115735530853271,grad_norm: 0.9862913664059562, iteration: 186528
loss: 1.0352305173873901,grad_norm: 0.9999990957679487, iteration: 186529
loss: 1.0074114799499512,grad_norm: 0.9202936451316617, iteration: 186530
loss: 0.9998754262924194,grad_norm: 0.9725683696517624, iteration: 186531
loss: 0.9703932404518127,grad_norm: 0.9999990904349129, iteration: 186532
loss: 0.9929235577583313,grad_norm: 0.9304694387542729, iteration: 186533
loss: 0.9960211515426636,grad_norm: 0.8268930811412666, iteration: 186534
loss: 0.9927597045898438,grad_norm: 0.9999999266295362, iteration: 186535
loss: 0.985556423664093,grad_norm: 0.9999993031053451, iteration: 186536
loss: 1.010287880897522,grad_norm: 0.9999990176452875, iteration: 186537
loss: 0.9718579649925232,grad_norm: 0.9999995296630236, iteration: 186538
loss: 0.9909378886222839,grad_norm: 0.9999991721006609, iteration: 186539
loss: 0.9781429171562195,grad_norm: 0.9204251458852875, iteration: 186540
loss: 1.0068180561065674,grad_norm: 0.9999991659510724, iteration: 186541
loss: 1.0137962102890015,grad_norm: 0.9769683698261891, iteration: 186542
loss: 1.0011345148086548,grad_norm: 0.888638905144864, iteration: 186543
loss: 1.026092290878296,grad_norm: 0.9909278767519268, iteration: 186544
loss: 0.9679508209228516,grad_norm: 0.9516876750576734, iteration: 186545
loss: 1.0008584260940552,grad_norm: 0.9898089005397738, iteration: 186546
loss: 0.965043842792511,grad_norm: 0.9007642953917422, iteration: 186547
loss: 1.008033275604248,grad_norm: 0.9999989861995967, iteration: 186548
loss: 0.9818776845932007,grad_norm: 0.9733100309636525, iteration: 186549
loss: 0.9858747124671936,grad_norm: 0.9999991481810857, iteration: 186550
loss: 1.0096365213394165,grad_norm: 0.9999989879422079, iteration: 186551
loss: 1.0005179643630981,grad_norm: 0.8759809207133967, iteration: 186552
loss: 1.0153030157089233,grad_norm: 0.9026344661461119, iteration: 186553
loss: 1.0228782892227173,grad_norm: 0.9999992871840576, iteration: 186554
loss: 0.998814046382904,grad_norm: 0.8717780494235958, iteration: 186555
loss: 0.9740349054336548,grad_norm: 0.9999990011239207, iteration: 186556
loss: 0.9992629289627075,grad_norm: 0.9332654831338684, iteration: 186557
loss: 1.0135278701782227,grad_norm: 0.9572897302947453, iteration: 186558
loss: 1.0435550212860107,grad_norm: 0.9027978315026607, iteration: 186559
loss: 0.9687817692756653,grad_norm: 0.9579878873614834, iteration: 186560
loss: 1.0164949893951416,grad_norm: 0.9500629455426237, iteration: 186561
loss: 0.9763968586921692,grad_norm: 0.9999997627152484, iteration: 186562
loss: 0.9987080693244934,grad_norm: 0.9303064891805833, iteration: 186563
loss: 1.0251896381378174,grad_norm: 0.9999992654709423, iteration: 186564
loss: 0.9658068418502808,grad_norm: 0.9999991946595919, iteration: 186565
loss: 0.9736319780349731,grad_norm: 0.8379671280273516, iteration: 186566
loss: 1.0265389680862427,grad_norm: 0.9999991676458441, iteration: 186567
loss: 1.0179500579833984,grad_norm: 0.9999992330316969, iteration: 186568
loss: 1.040155291557312,grad_norm: 0.9999991555602511, iteration: 186569
loss: 0.9666001796722412,grad_norm: 0.9808246403647634, iteration: 186570
loss: 0.9828178286552429,grad_norm: 0.9649148223886269, iteration: 186571
loss: 0.9969584941864014,grad_norm: 0.803730898129958, iteration: 186572
loss: 1.0043468475341797,grad_norm: 0.9950514027889726, iteration: 186573
loss: 1.0015876293182373,grad_norm: 0.8848918722613056, iteration: 186574
loss: 0.9662043452262878,grad_norm: 0.9508812741266183, iteration: 186575
loss: 0.9895288944244385,grad_norm: 0.9999992048808162, iteration: 186576
loss: 0.9866020083427429,grad_norm: 0.9999992828271997, iteration: 186577
loss: 1.039249062538147,grad_norm: 0.772115756597174, iteration: 186578
loss: 1.0024079084396362,grad_norm: 0.8233190430216072, iteration: 186579
loss: 1.0155253410339355,grad_norm: 0.9570516766631272, iteration: 186580
loss: 1.0387119054794312,grad_norm: 0.8013811937024301, iteration: 186581
loss: 0.9765705466270447,grad_norm: 0.8680413225388237, iteration: 186582
loss: 0.9464249610900879,grad_norm: 0.8185361837795063, iteration: 186583
loss: 0.9520099759101868,grad_norm: 0.999998984913222, iteration: 186584
loss: 0.9972718358039856,grad_norm: 0.9999990442120211, iteration: 186585
loss: 0.9640437960624695,grad_norm: 0.974705261292994, iteration: 186586
loss: 1.034589171409607,grad_norm: 0.999999159740972, iteration: 186587
loss: 0.9868857860565186,grad_norm: 0.9862840431385538, iteration: 186588
loss: 1.0495792627334595,grad_norm: 0.8913982812101889, iteration: 186589
loss: 1.0081161260604858,grad_norm: 0.9893085085215112, iteration: 186590
loss: 0.9698010087013245,grad_norm: 0.8855226274502433, iteration: 186591
loss: 0.9778859615325928,grad_norm: 0.9999990551388434, iteration: 186592
loss: 1.014096975326538,grad_norm: 0.999999097152287, iteration: 186593
loss: 0.9351536631584167,grad_norm: 0.9999991972175796, iteration: 186594
loss: 1.0167776346206665,grad_norm: 0.8486618509775484, iteration: 186595
loss: 0.9947176575660706,grad_norm: 0.9874417785615406, iteration: 186596
loss: 0.992667555809021,grad_norm: 0.9999992726040535, iteration: 186597
loss: 1.016276240348816,grad_norm: 0.9999991796079953, iteration: 186598
loss: 0.9893562197685242,grad_norm: 0.9999992362173139, iteration: 186599
loss: 1.0171452760696411,grad_norm: 0.9008863611353368, iteration: 186600
loss: 0.9523947834968567,grad_norm: 0.9999990606817095, iteration: 186601
loss: 1.0397348403930664,grad_norm: 0.9999990500594484, iteration: 186602
loss: 1.0040417909622192,grad_norm: 0.9999991817033267, iteration: 186603
loss: 1.0328775644302368,grad_norm: 0.9999996711765545, iteration: 186604
loss: 1.0132660865783691,grad_norm: 0.9037069134629238, iteration: 186605
loss: 1.0190030336380005,grad_norm: 0.9794081749083551, iteration: 186606
loss: 1.031166672706604,grad_norm: 0.9999998847488779, iteration: 186607
loss: 0.9867542386054993,grad_norm: 0.9999991400422181, iteration: 186608
loss: 1.0211983919143677,grad_norm: 0.9999991346405432, iteration: 186609
loss: 0.9680681228637695,grad_norm: 0.8908863044565924, iteration: 186610
loss: 0.9925045967102051,grad_norm: 0.9999990814466546, iteration: 186611
loss: 1.008555293083191,grad_norm: 0.9999991958029145, iteration: 186612
loss: 1.0222820043563843,grad_norm: 0.9999992812294186, iteration: 186613
loss: 0.9720138907432556,grad_norm: 0.8350178635995485, iteration: 186614
loss: 1.0085124969482422,grad_norm: 0.9250421944404845, iteration: 186615
loss: 0.9914224743843079,grad_norm: 0.9622825405832194, iteration: 186616
loss: 1.0054372549057007,grad_norm: 0.9999990471549707, iteration: 186617
loss: 1.0040842294692993,grad_norm: 0.8351083933706371, iteration: 186618
loss: 1.0489521026611328,grad_norm: 0.9999997880783446, iteration: 186619
loss: 1.041307806968689,grad_norm: 0.9187681805804795, iteration: 186620
loss: 1.0156480073928833,grad_norm: 0.9999990827152017, iteration: 186621
loss: 1.0256775617599487,grad_norm: 0.9977788303196505, iteration: 186622
loss: 1.013545036315918,grad_norm: 0.9999991250424348, iteration: 186623
loss: 1.0091639757156372,grad_norm: 0.9060201933884852, iteration: 186624
loss: 1.08987295627594,grad_norm: 0.9999996590882883, iteration: 186625
loss: 1.0245877504348755,grad_norm: 0.8666442244440453, iteration: 186626
loss: 0.9918910264968872,grad_norm: 0.999999142898349, iteration: 186627
loss: 1.01982581615448,grad_norm: 0.9999993525049721, iteration: 186628
loss: 0.9850581884384155,grad_norm: 0.9999998100279107, iteration: 186629
loss: 0.9981240034103394,grad_norm: 0.7955496899935368, iteration: 186630
loss: 0.9914332032203674,grad_norm: 0.9999990930783592, iteration: 186631
loss: 1.0646796226501465,grad_norm: 0.9610356313191174, iteration: 186632
loss: 1.032792091369629,grad_norm: 0.9999992033066124, iteration: 186633
loss: 1.0015764236450195,grad_norm: 0.956755892577731, iteration: 186634
loss: 0.9970912933349609,grad_norm: 0.9136124718104779, iteration: 186635
loss: 0.961026668548584,grad_norm: 0.8162775879195596, iteration: 186636
loss: 1.0067131519317627,grad_norm: 0.9830824019778742, iteration: 186637
loss: 0.9840056300163269,grad_norm: 0.8792887996406994, iteration: 186638
loss: 1.0089147090911865,grad_norm: 0.8935855426824365, iteration: 186639
loss: 1.0392342805862427,grad_norm: 0.9117701477166424, iteration: 186640
loss: 1.0014809370040894,grad_norm: 0.996397640591012, iteration: 186641
loss: 0.9965891242027283,grad_norm: 0.9999992328210697, iteration: 186642
loss: 0.9742251634597778,grad_norm: 0.9888223340845571, iteration: 186643
loss: 1.0479220151901245,grad_norm: 0.9309226455658418, iteration: 186644
loss: 1.0045955181121826,grad_norm: 0.8916145551052406, iteration: 186645
loss: 0.9989715218544006,grad_norm: 0.980209866978325, iteration: 186646
loss: 1.0179977416992188,grad_norm: 0.9999989471849966, iteration: 186647
loss: 1.00618314743042,grad_norm: 0.8952295941763285, iteration: 186648
loss: 1.0226935148239136,grad_norm: 0.9999990458602839, iteration: 186649
loss: 0.9746789932250977,grad_norm: 0.9994715338414946, iteration: 186650
loss: 0.9695795774459839,grad_norm: 0.9999989829914442, iteration: 186651
loss: 0.98604416847229,grad_norm: 0.9014593604188728, iteration: 186652
loss: 1.0022221803665161,grad_norm: 0.9322991130061512, iteration: 186653
loss: 0.9758225083351135,grad_norm: 0.955476765557524, iteration: 186654
loss: 1.045854926109314,grad_norm: 0.9999991366831138, iteration: 186655
loss: 1.0280505418777466,grad_norm: 0.8470652321039055, iteration: 186656
loss: 1.0003595352172852,grad_norm: 0.9337572247341228, iteration: 186657
loss: 1.0085358619689941,grad_norm: 0.9714169748828085, iteration: 186658
loss: 0.9804754257202148,grad_norm: 0.9820377300938651, iteration: 186659
loss: 1.0241198539733887,grad_norm: 0.9670945496308758, iteration: 186660
loss: 0.9946919083595276,grad_norm: 0.9773664751574633, iteration: 186661
loss: 0.9694863557815552,grad_norm: 0.9999988927599176, iteration: 186662
loss: 0.9777494668960571,grad_norm: 0.9157462253720698, iteration: 186663
loss: 0.9947679042816162,grad_norm: 0.9999989649400167, iteration: 186664
loss: 1.0360660552978516,grad_norm: 0.9999991419894101, iteration: 186665
loss: 1.021464228630066,grad_norm: 0.9999991326990941, iteration: 186666
loss: 1.008764386177063,grad_norm: 0.9999991758444201, iteration: 186667
loss: 1.0284748077392578,grad_norm: 0.999999325802674, iteration: 186668
loss: 0.9674108028411865,grad_norm: 0.986583589340871, iteration: 186669
loss: 0.9974842071533203,grad_norm: 0.9957968966540525, iteration: 186670
loss: 0.9931698441505432,grad_norm: 0.9361796586869432, iteration: 186671
loss: 1.0165404081344604,grad_norm: 0.9999990823971245, iteration: 186672
loss: 1.006624698638916,grad_norm: 0.9045644094523916, iteration: 186673
loss: 1.0965088605880737,grad_norm: 0.999999481337701, iteration: 186674
loss: 0.9892958998680115,grad_norm: 0.8995476812136367, iteration: 186675
loss: 0.9727863669395447,grad_norm: 0.8214031432731221, iteration: 186676
loss: 1.028268814086914,grad_norm: 0.9999992608559057, iteration: 186677
loss: 1.013329029083252,grad_norm: 0.9534225270773464, iteration: 186678
loss: 1.0008718967437744,grad_norm: 0.9070845080838077, iteration: 186679
loss: 0.9813441038131714,grad_norm: 0.9856343451417461, iteration: 186680
loss: 0.994188666343689,grad_norm: 0.9715094422162334, iteration: 186681
loss: 1.0022692680358887,grad_norm: 0.9693917585611236, iteration: 186682
loss: 1.0862022638320923,grad_norm: 0.9999993745874539, iteration: 186683
loss: 1.0025562047958374,grad_norm: 0.7973461449371998, iteration: 186684
loss: 1.0110169649124146,grad_norm: 0.998539498113327, iteration: 186685
loss: 1.0019447803497314,grad_norm: 0.8704604841481314, iteration: 186686
loss: 0.988585352897644,grad_norm: 0.9999990907797102, iteration: 186687
loss: 1.004244327545166,grad_norm: 0.987599632930528, iteration: 186688
loss: 1.0935570001602173,grad_norm: 0.9999990763883108, iteration: 186689
loss: 0.9589852094650269,grad_norm: 0.913978301764907, iteration: 186690
loss: 1.0019336938858032,grad_norm: 0.9999990722986167, iteration: 186691
loss: 0.9734025001525879,grad_norm: 0.999999027318191, iteration: 186692
loss: 1.0423693656921387,grad_norm: 0.9999990471354565, iteration: 186693
loss: 0.9683560729026794,grad_norm: 0.9999991706691567, iteration: 186694
loss: 0.9803505539894104,grad_norm: 0.8514114491582686, iteration: 186695
loss: 0.990827739238739,grad_norm: 0.9999991353280815, iteration: 186696
loss: 0.9815645813941956,grad_norm: 0.9999991268857024, iteration: 186697
loss: 0.9954333305358887,grad_norm: 0.7712198104157426, iteration: 186698
loss: 0.9330213665962219,grad_norm: 0.9890104657936396, iteration: 186699
loss: 1.0003151893615723,grad_norm: 0.9999991035472081, iteration: 186700
loss: 0.9918239712715149,grad_norm: 0.9475397881852315, iteration: 186701
loss: 1.0050066709518433,grad_norm: 0.9350795446077287, iteration: 186702
loss: 0.9790778756141663,grad_norm: 0.967017858526127, iteration: 186703
loss: 0.9863162636756897,grad_norm: 0.9999990688251408, iteration: 186704
loss: 0.9938801527023315,grad_norm: 0.8070840553795187, iteration: 186705
loss: 0.9616671800613403,grad_norm: 0.8108995618270278, iteration: 186706
loss: 0.9872906804084778,grad_norm: 0.8196661729573501, iteration: 186707
loss: 0.9767699837684631,grad_norm: 0.9417960340582894, iteration: 186708
loss: 1.0012449026107788,grad_norm: 0.9999992362127296, iteration: 186709
loss: 0.9938189387321472,grad_norm: 0.8355579265957325, iteration: 186710
loss: 0.9971588850021362,grad_norm: 0.8770727152447131, iteration: 186711
loss: 0.9931255578994751,grad_norm: 0.8329435010967985, iteration: 186712
loss: 0.997791588306427,grad_norm: 0.8999653849315318, iteration: 186713
loss: 1.0242531299591064,grad_norm: 0.9423937495226828, iteration: 186714
loss: 1.0063270330429077,grad_norm: 0.9999990972989073, iteration: 186715
loss: 1.022361159324646,grad_norm: 0.9999990665645186, iteration: 186716
loss: 1.010060429573059,grad_norm: 0.925717071087712, iteration: 186717
loss: 1.0036661624908447,grad_norm: 0.9999991755076565, iteration: 186718
loss: 0.9914101958274841,grad_norm: 0.7810186947489883, iteration: 186719
loss: 1.0182124376296997,grad_norm: 0.8518574626436857, iteration: 186720
loss: 0.9991456270217896,grad_norm: 0.9096366680410463, iteration: 186721
loss: 0.9917283654212952,grad_norm: 0.9906581997269686, iteration: 186722
loss: 1.0036817789077759,grad_norm: 0.9999994379293451, iteration: 186723
loss: 1.013733983039856,grad_norm: 0.9999990955381428, iteration: 186724
loss: 1.0397334098815918,grad_norm: 0.9999990383554673, iteration: 186725
loss: 1.0186465978622437,grad_norm: 0.9196210007881594, iteration: 186726
loss: 1.005072832107544,grad_norm: 0.9999991376927215, iteration: 186727
loss: 1.0167423486709595,grad_norm: 0.999999522984993, iteration: 186728
loss: 1.003561019897461,grad_norm: 0.9999993738320802, iteration: 186729
loss: 1.0357850790023804,grad_norm: 0.9999995043794241, iteration: 186730
loss: 1.0058618783950806,grad_norm: 0.9999990596819192, iteration: 186731
loss: 1.0251721143722534,grad_norm: 0.9999991003011802, iteration: 186732
loss: 1.0332484245300293,grad_norm: 0.9254483859259505, iteration: 186733
loss: 0.9726232886314392,grad_norm: 0.9526757312531723, iteration: 186734
loss: 0.9907433986663818,grad_norm: 0.999998920607311, iteration: 186735
loss: 1.018754005432129,grad_norm: 0.9237574431833395, iteration: 186736
loss: 1.0008412599563599,grad_norm: 0.7715667587797175, iteration: 186737
loss: 1.0178663730621338,grad_norm: 0.988064751462811, iteration: 186738
loss: 0.9603142142295837,grad_norm: 0.8854390152105873, iteration: 186739
loss: 0.9939101934432983,grad_norm: 0.8993996973733317, iteration: 186740
loss: 1.0038856267929077,grad_norm: 0.9508173740214262, iteration: 186741
loss: 0.9820698499679565,grad_norm: 0.9999992408271253, iteration: 186742
loss: 1.0134451389312744,grad_norm: 0.923053447707786, iteration: 186743
loss: 1.0355887413024902,grad_norm: 0.99999911664584, iteration: 186744
loss: 1.0355137586593628,grad_norm: 0.9999991411160739, iteration: 186745
loss: 0.9844067096710205,grad_norm: 0.9399862766411656, iteration: 186746
loss: 1.007859468460083,grad_norm: 0.8978271008335358, iteration: 186747
loss: 1.0124820470809937,grad_norm: 0.9999991703880113, iteration: 186748
loss: 0.9859288930892944,grad_norm: 0.9288915806846447, iteration: 186749
loss: 0.9624214172363281,grad_norm: 0.9482627827887434, iteration: 186750
loss: 1.013363003730774,grad_norm: 0.8667714027400523, iteration: 186751
loss: 0.9916273951530457,grad_norm: 0.99999944690756, iteration: 186752
loss: 1.0119866132736206,grad_norm: 0.9999991749200767, iteration: 186753
loss: 1.0239076614379883,grad_norm: 0.9590702055082269, iteration: 186754
loss: 1.0079954862594604,grad_norm: 0.9471554099219978, iteration: 186755
loss: 0.9713084697723389,grad_norm: 0.9999991671319858, iteration: 186756
loss: 0.9994927048683167,grad_norm: 0.8627850494199493, iteration: 186757
loss: 1.0437577962875366,grad_norm: 0.9999992909154398, iteration: 186758
loss: 0.9758628010749817,grad_norm: 0.8153642216026005, iteration: 186759
loss: 0.9659934043884277,grad_norm: 0.9886956101579212, iteration: 186760
loss: 1.1236788034439087,grad_norm: 0.9999996535213875, iteration: 186761
loss: 1.0157690048217773,grad_norm: 0.999999596137004, iteration: 186762
loss: 1.02134108543396,grad_norm: 0.9999990889556667, iteration: 186763
loss: 1.0220507383346558,grad_norm: 0.9999991975253195, iteration: 186764
loss: 0.991012692451477,grad_norm: 0.9525890878120401, iteration: 186765
loss: 1.0283222198486328,grad_norm: 0.9428881500775597, iteration: 186766
loss: 0.9976590871810913,grad_norm: 0.9999991783589507, iteration: 186767
loss: 1.0182640552520752,grad_norm: 0.9464993591014038, iteration: 186768
loss: 0.9609047770500183,grad_norm: 0.9999997944809894, iteration: 186769
loss: 1.0313897132873535,grad_norm: 0.9299923721775909, iteration: 186770
loss: 0.9541370868682861,grad_norm: 0.8426260020466164, iteration: 186771
loss: 1.0385713577270508,grad_norm: 0.9999990990737857, iteration: 186772
loss: 1.0001630783081055,grad_norm: 0.8303791540021649, iteration: 186773
loss: 1.0282853841781616,grad_norm: 0.9614354128471936, iteration: 186774
loss: 1.002219319343567,grad_norm: 0.9704814558567941, iteration: 186775
loss: 1.0808185338974,grad_norm: 0.9999995213927438, iteration: 186776
loss: 1.006807804107666,grad_norm: 0.9965137877204818, iteration: 186777
loss: 0.9803328514099121,grad_norm: 0.8551704323850549, iteration: 186778
loss: 1.0146607160568237,grad_norm: 0.9991057412528708, iteration: 186779
loss: 1.0311338901519775,grad_norm: 0.9999999466526897, iteration: 186780
loss: 1.0199517011642456,grad_norm: 0.9999989657748809, iteration: 186781
loss: 1.0061951875686646,grad_norm: 0.9999995960736269, iteration: 186782
loss: 0.982273519039154,grad_norm: 0.9258711957359758, iteration: 186783
loss: 1.0044559240341187,grad_norm: 0.9999991915193591, iteration: 186784
loss: 1.009850263595581,grad_norm: 0.8670891642364589, iteration: 186785
loss: 0.9932063221931458,grad_norm: 0.8525589334378915, iteration: 186786
loss: 1.002443790435791,grad_norm: 0.9999989620540239, iteration: 186787
loss: 1.0203391313552856,grad_norm: 0.9607401995936939, iteration: 186788
loss: 1.0018017292022705,grad_norm: 0.9999997387032971, iteration: 186789
loss: 0.9672917127609253,grad_norm: 0.9999992018237044, iteration: 186790
loss: 0.9789886474609375,grad_norm: 0.9822058770676735, iteration: 186791
loss: 1.005807876586914,grad_norm: 0.8784330255092662, iteration: 186792
loss: 1.0242702960968018,grad_norm: 0.999999042089555, iteration: 186793
loss: 1.0201010704040527,grad_norm: 0.8026165973968691, iteration: 186794
loss: 1.0270514488220215,grad_norm: 0.8264517979213084, iteration: 186795
loss: 1.0032169818878174,grad_norm: 0.9999990625739692, iteration: 186796
loss: 1.031964659690857,grad_norm: 0.9999994621731583, iteration: 186797
loss: 1.06169593334198,grad_norm: 0.9999991554867362, iteration: 186798
loss: 0.999390184879303,grad_norm: 0.9428818202919473, iteration: 186799
loss: 1.03378427028656,grad_norm: 0.999999919073632, iteration: 186800
loss: 1.0044418573379517,grad_norm: 0.9999992213386921, iteration: 186801
loss: 0.9806912541389465,grad_norm: 0.9999989558083958, iteration: 186802
loss: 1.0330263376235962,grad_norm: 0.999999211335404, iteration: 186803
loss: 1.200838327407837,grad_norm: 0.9999996749243798, iteration: 186804
loss: 1.032637596130371,grad_norm: 0.9999999623842661, iteration: 186805
loss: 0.9668764472007751,grad_norm: 0.9999991031431629, iteration: 186806
loss: 1.0083378553390503,grad_norm: 0.9999992237822515, iteration: 186807
loss: 1.0951364040374756,grad_norm: 0.9057609375350911, iteration: 186808
loss: 0.9941787719726562,grad_norm: 0.887773786201869, iteration: 186809
loss: 0.9863083958625793,grad_norm: 0.9451009848863224, iteration: 186810
loss: 0.9727680087089539,grad_norm: 0.9999990526334464, iteration: 186811
loss: 1.0504628419876099,grad_norm: 0.8920965407883302, iteration: 186812
loss: 0.9898483157157898,grad_norm: 0.9646507439130678, iteration: 186813
loss: 1.0042393207550049,grad_norm: 0.9999992428243375, iteration: 186814
loss: 1.0286753177642822,grad_norm: 0.9999992308105514, iteration: 186815
loss: 0.9709075093269348,grad_norm: 0.9417958387934927, iteration: 186816
loss: 0.9968503713607788,grad_norm: 0.8486668927494957, iteration: 186817
loss: 1.0616096258163452,grad_norm: 0.99999995924082, iteration: 186818
loss: 1.029421091079712,grad_norm: 0.9599921860703287, iteration: 186819
loss: 1.0280686616897583,grad_norm: 0.9999992552767323, iteration: 186820
loss: 1.0116733312606812,grad_norm: 0.999999225812849, iteration: 186821
loss: 1.0201828479766846,grad_norm: 0.7789219095442816, iteration: 186822
loss: 0.98805171251297,grad_norm: 0.9999992778660601, iteration: 186823
loss: 1.0367028713226318,grad_norm: 0.9047060931229047, iteration: 186824
loss: 1.023693323135376,grad_norm: 0.899330951286786, iteration: 186825
loss: 1.0275304317474365,grad_norm: 0.9999991101638002, iteration: 186826
loss: 0.9857083559036255,grad_norm: 0.9080047459528329, iteration: 186827
loss: 1.0075734853744507,grad_norm: 0.9999990184897104, iteration: 186828
loss: 1.0022765398025513,grad_norm: 0.8408168862008114, iteration: 186829
loss: 1.0304404497146606,grad_norm: 0.8785074699886217, iteration: 186830
loss: 1.0123740434646606,grad_norm: 0.9999998061330209, iteration: 186831
loss: 0.9789754152297974,grad_norm: 0.9999990983965155, iteration: 186832
loss: 0.9863100051879883,grad_norm: 0.9955161303869163, iteration: 186833
loss: 0.9736602902412415,grad_norm: 0.9999995067699096, iteration: 186834
loss: 1.0599955320358276,grad_norm: 0.9999992539150062, iteration: 186835
loss: 0.9986677169799805,grad_norm: 0.9999990281838788, iteration: 186836
loss: 1.0126984119415283,grad_norm: 0.9999992340620835, iteration: 186837
loss: 0.9788463711738586,grad_norm: 0.7828193770256703, iteration: 186838
loss: 0.9958948493003845,grad_norm: 0.9999991718501078, iteration: 186839
loss: 0.9672866463661194,grad_norm: 0.9905960699821293, iteration: 186840
loss: 1.0521219968795776,grad_norm: 0.9999993300381367, iteration: 186841
loss: 0.9970747828483582,grad_norm: 0.9550088503618797, iteration: 186842
loss: 1.0055489540100098,grad_norm: 0.9999990202889525, iteration: 186843
loss: 0.9866938591003418,grad_norm: 0.8599718307974706, iteration: 186844
loss: 0.9939765930175781,grad_norm: 0.9133917691832302, iteration: 186845
loss: 0.9918837547302246,grad_norm: 0.9999990539265402, iteration: 186846
loss: 1.0407302379608154,grad_norm: 0.9999991325435619, iteration: 186847
loss: 0.9983647465705872,grad_norm: 0.894610119629922, iteration: 186848
loss: 1.007423758506775,grad_norm: 0.8745018683947334, iteration: 186849
loss: 0.9660486578941345,grad_norm: 0.9999992671750405, iteration: 186850
loss: 1.03251314163208,grad_norm: 0.9705278568114136, iteration: 186851
loss: 1.0148452520370483,grad_norm: 0.9999992333682518, iteration: 186852
loss: 0.987170398235321,grad_norm: 0.8381903428864548, iteration: 186853
loss: 1.017595887184143,grad_norm: 0.9999991804069982, iteration: 186854
loss: 1.0464794635772705,grad_norm: 0.9999996453207447, iteration: 186855
loss: 0.9859707951545715,grad_norm: 0.9040930109589945, iteration: 186856
loss: 1.0182287693023682,grad_norm: 0.9512471250584121, iteration: 186857
loss: 0.9997297525405884,grad_norm: 0.8730055546578811, iteration: 186858
loss: 1.0092848539352417,grad_norm: 0.9999991402225469, iteration: 186859
loss: 1.0151023864746094,grad_norm: 0.9999991016057898, iteration: 186860
loss: 1.0215742588043213,grad_norm: 0.9999991198947944, iteration: 186861
loss: 0.9903788566589355,grad_norm: 0.9601872537298117, iteration: 186862
loss: 0.9914042353630066,grad_norm: 0.9972488014058056, iteration: 186863
loss: 1.030454158782959,grad_norm: 0.9606988430307101, iteration: 186864
loss: 0.9778898358345032,grad_norm: 0.9342528040020901, iteration: 186865
loss: 1.0465638637542725,grad_norm: 0.8586017070437825, iteration: 186866
loss: 0.9479770064353943,grad_norm: 0.9257401776960779, iteration: 186867
loss: 1.0141301155090332,grad_norm: 0.8962084850442477, iteration: 186868
loss: 0.9963265657424927,grad_norm: 0.8802930631804583, iteration: 186869
loss: 1.0160155296325684,grad_norm: 0.9999992910572791, iteration: 186870
loss: 1.0255650281906128,grad_norm: 0.9594937493004315, iteration: 186871
loss: 1.0135200023651123,grad_norm: 0.9137280094178895, iteration: 186872
loss: 1.0324817895889282,grad_norm: 0.8975553510795529, iteration: 186873
loss: 1.08463716506958,grad_norm: 0.9999999912025909, iteration: 186874
loss: 1.0079337358474731,grad_norm: 0.8686040595658427, iteration: 186875
loss: 1.0205899477005005,grad_norm: 0.999999327415804, iteration: 186876
loss: 0.9913465976715088,grad_norm: 0.7898013507613947, iteration: 186877
loss: 1.0020657777786255,grad_norm: 0.8779484076438461, iteration: 186878
loss: 0.9987985491752625,grad_norm: 0.816514906113192, iteration: 186879
loss: 0.993145227432251,grad_norm: 0.9999990636339844, iteration: 186880
loss: 0.9945063591003418,grad_norm: 0.9587967852901225, iteration: 186881
loss: 0.9897183179855347,grad_norm: 0.887601819442814, iteration: 186882
loss: 0.9945631623268127,grad_norm: 0.9999991528669703, iteration: 186883
loss: 1.007975697517395,grad_norm: 0.8726995665340879, iteration: 186884
loss: 0.9894192814826965,grad_norm: 0.7880050654919067, iteration: 186885
loss: 1.0163079500198364,grad_norm: 0.9571756741045516, iteration: 186886
loss: 0.9768198728561401,grad_norm: 0.9999990822649512, iteration: 186887
loss: 1.0376688241958618,grad_norm: 0.8257036235452476, iteration: 186888
loss: 1.0085991621017456,grad_norm: 0.8699240161239766, iteration: 186889
loss: 1.0178996324539185,grad_norm: 0.9543783976643492, iteration: 186890
loss: 1.0052917003631592,grad_norm: 0.9999991592831067, iteration: 186891
loss: 1.0034611225128174,grad_norm: 0.9999991393544019, iteration: 186892
loss: 0.9772115349769592,grad_norm: 0.9909100189929028, iteration: 186893
loss: 0.967841386795044,grad_norm: 0.918653903420708, iteration: 186894
loss: 1.0121870040893555,grad_norm: 0.7810705947214863, iteration: 186895
loss: 0.986366868019104,grad_norm: 0.8989566033969937, iteration: 186896
loss: 1.0032464265823364,grad_norm: 0.9999988887476463, iteration: 186897
loss: 0.9967114925384521,grad_norm: 0.9027455123112589, iteration: 186898
loss: 0.9946870803833008,grad_norm: 0.8096103293645728, iteration: 186899
loss: 1.0204275846481323,grad_norm: 0.9999990287685727, iteration: 186900
loss: 0.9904191493988037,grad_norm: 0.9459795708486284, iteration: 186901
loss: 1.0217103958129883,grad_norm: 0.8330677390245598, iteration: 186902
loss: 0.9842729568481445,grad_norm: 0.9358709999795343, iteration: 186903
loss: 1.0129623413085938,grad_norm: 0.9999991305926651, iteration: 186904
loss: 0.9924368262290955,grad_norm: 0.9526094254650423, iteration: 186905
loss: 0.9976788759231567,grad_norm: 0.9999992183956882, iteration: 186906
loss: 0.9796216487884521,grad_norm: 0.8668994152668676, iteration: 186907
loss: 1.004568099975586,grad_norm: 0.9999992082594953, iteration: 186908
loss: 1.039405107498169,grad_norm: 0.9421789510400504, iteration: 186909
loss: 0.9711388349533081,grad_norm: 0.8951815895574857, iteration: 186910
loss: 1.0695345401763916,grad_norm: 0.9999992717194963, iteration: 186911
loss: 0.9860259294509888,grad_norm: 0.9999992636539342, iteration: 186912
loss: 1.0095462799072266,grad_norm: 0.9169778547454439, iteration: 186913
loss: 0.9831101298332214,grad_norm: 0.9488931603130569, iteration: 186914
loss: 1.0042146444320679,grad_norm: 0.9304195490346036, iteration: 186915
loss: 0.9777045845985413,grad_norm: 0.9325870314882482, iteration: 186916
loss: 0.9660884737968445,grad_norm: 0.8696344998800373, iteration: 186917
loss: 1.0002752542495728,grad_norm: 0.9489935589542139, iteration: 186918
loss: 1.0045260190963745,grad_norm: 0.9999992266019286, iteration: 186919
loss: 0.9843764305114746,grad_norm: 0.9439268824015216, iteration: 186920
loss: 0.9656311273574829,grad_norm: 0.8319374575905791, iteration: 186921
loss: 0.9989352822303772,grad_norm: 0.9999991236924632, iteration: 186922
loss: 0.9780823588371277,grad_norm: 0.8805378468957202, iteration: 186923
loss: 1.03110933303833,grad_norm: 0.9587316000357041, iteration: 186924
loss: 0.9770353436470032,grad_norm: 0.9390920470302033, iteration: 186925
loss: 1.0448530912399292,grad_norm: 0.9999992018826206, iteration: 186926
loss: 1.01266348361969,grad_norm: 0.8663334353123272, iteration: 186927
loss: 0.9825559258460999,grad_norm: 0.9503554762875298, iteration: 186928
loss: 0.9858630299568176,grad_norm: 0.9840028379825226, iteration: 186929
loss: 0.9837877154350281,grad_norm: 0.9448499977591125, iteration: 186930
loss: 0.9841150045394897,grad_norm: 0.9999993347831106, iteration: 186931
loss: 1.0069624185562134,grad_norm: 0.99999888364217, iteration: 186932
loss: 1.0411617755889893,grad_norm: 0.9468352841498147, iteration: 186933
loss: 1.012046456336975,grad_norm: 0.8624081124685953, iteration: 186934
loss: 1.0057989358901978,grad_norm: 0.9999991314951461, iteration: 186935
loss: 0.9977407455444336,grad_norm: 0.9999992135856685, iteration: 186936
loss: 0.9924495816230774,grad_norm: 0.9999991628674367, iteration: 186937
loss: 0.9858259558677673,grad_norm: 0.9999993011550401, iteration: 186938
loss: 0.9911374449729919,grad_norm: 0.9800607218108003, iteration: 186939
loss: 1.0543253421783447,grad_norm: 0.9134116343129269, iteration: 186940
loss: 0.9979998469352722,grad_norm: 0.9999990445632209, iteration: 186941
loss: 0.996447741985321,grad_norm: 0.999999276523125, iteration: 186942
loss: 1.0072802305221558,grad_norm: 0.9999991798198483, iteration: 186943
loss: 1.0010734796524048,grad_norm: 0.8791721264579194, iteration: 186944
loss: 1.0197370052337646,grad_norm: 0.99999900142012, iteration: 186945
loss: 1.0272012948989868,grad_norm: 0.9999997564418355, iteration: 186946
loss: 1.024215817451477,grad_norm: 0.9506885078865094, iteration: 186947
loss: 1.0325326919555664,grad_norm: 0.9701068367327366, iteration: 186948
loss: 1.0274410247802734,grad_norm: 0.9999989856771497, iteration: 186949
loss: 0.9892383813858032,grad_norm: 0.9334124114479309, iteration: 186950
loss: 0.9766777753829956,grad_norm: 0.9862003250648914, iteration: 186951
loss: 1.0148210525512695,grad_norm: 0.9999991787087307, iteration: 186952
loss: 0.9703927636146545,grad_norm: 0.9999989790145207, iteration: 186953
loss: 1.0029304027557373,grad_norm: 0.9999993375328692, iteration: 186954
loss: 0.9924789071083069,grad_norm: 0.9660680535514719, iteration: 186955
loss: 0.9873565435409546,grad_norm: 0.9219582195687634, iteration: 186956
loss: 1.0250118970870972,grad_norm: 0.9154205416353205, iteration: 186957
loss: 0.9810896515846252,grad_norm: 0.9999990534847525, iteration: 186958
loss: 1.058998703956604,grad_norm: 0.9999990848113073, iteration: 186959
loss: 0.9786783456802368,grad_norm: 0.940214498955221, iteration: 186960
loss: 1.0376453399658203,grad_norm: 0.9999991416192674, iteration: 186961
loss: 0.9811417460441589,grad_norm: 0.9999991487864907, iteration: 186962
loss: 1.0147920846939087,grad_norm: 0.8715078377739596, iteration: 186963
loss: 0.9938581585884094,grad_norm: 0.993415676746908, iteration: 186964
loss: 0.9806109070777893,grad_norm: 0.9999991646424912, iteration: 186965
loss: 1.0145409107208252,grad_norm: 0.8338845466332768, iteration: 186966
loss: 1.0129237174987793,grad_norm: 0.8863872593104007, iteration: 186967
loss: 1.0271031856536865,grad_norm: 0.9132287470119862, iteration: 186968
loss: 0.9583370685577393,grad_norm: 0.999999227301046, iteration: 186969
loss: 0.9887005090713501,grad_norm: 0.9644971011867188, iteration: 186970
loss: 0.962583065032959,grad_norm: 0.9510561491013345, iteration: 186971
loss: 1.0085808038711548,grad_norm: 0.8722363306460555, iteration: 186972
loss: 0.9767219424247742,grad_norm: 0.8830680632469641, iteration: 186973
loss: 0.9910346269607544,grad_norm: 0.9999993201893344, iteration: 186974
loss: 1.1029759645462036,grad_norm: 0.9999998815679261, iteration: 186975
loss: 1.0040005445480347,grad_norm: 0.9701030046459452, iteration: 186976
loss: 1.3080248832702637,grad_norm: 0.9999994083384552, iteration: 186977
loss: 1.0122559070587158,grad_norm: 0.8375907348203488, iteration: 186978
loss: 0.9887876510620117,grad_norm: 0.8109220594291979, iteration: 186979
loss: 1.0121159553527832,grad_norm: 0.9950582506149386, iteration: 186980
loss: 1.0319560766220093,grad_norm: 0.9775996210303719, iteration: 186981
loss: 0.988822340965271,grad_norm: 0.9109303103698364, iteration: 186982
loss: 0.9903290271759033,grad_norm: 0.9999990694688174, iteration: 186983
loss: 0.9891181588172913,grad_norm: 0.9853310126760131, iteration: 186984
loss: 0.9985468983650208,grad_norm: 0.9999991518949917, iteration: 186985
loss: 0.9977409839630127,grad_norm: 0.9548428353538434, iteration: 186986
loss: 0.9736331105232239,grad_norm: 0.9999992231727115, iteration: 186987
loss: 0.9773878455162048,grad_norm: 0.8138807327834803, iteration: 186988
loss: 0.9858043789863586,grad_norm: 0.9802535860021347, iteration: 186989
loss: 1.020369052886963,grad_norm: 0.9570879809192775, iteration: 186990
loss: 0.9935634732246399,grad_norm: 0.9136198261223303, iteration: 186991
loss: 0.9791132807731628,grad_norm: 0.9334270899003735, iteration: 186992
loss: 0.999247133731842,grad_norm: 0.9275167471483708, iteration: 186993
loss: 1.0171698331832886,grad_norm: 0.9999989481513618, iteration: 186994
loss: 1.0306422710418701,grad_norm: 0.8903009953182475, iteration: 186995
loss: 0.9827671051025391,grad_norm: 0.9999991495152156, iteration: 186996
loss: 1.0223276615142822,grad_norm: 0.912136020703142, iteration: 186997
loss: 0.9779882431030273,grad_norm: 0.9627828545717763, iteration: 186998
loss: 1.0119452476501465,grad_norm: 0.9738225989301933, iteration: 186999
loss: 1.0150407552719116,grad_norm: 0.9999990627146385, iteration: 187000
loss: 0.9618493914604187,grad_norm: 0.9666474788493515, iteration: 187001
loss: 0.987244188785553,grad_norm: 0.9690620052770759, iteration: 187002
loss: 1.0191007852554321,grad_norm: 0.7949398785679417, iteration: 187003
loss: 1.0291543006896973,grad_norm: 0.871018219916204, iteration: 187004
loss: 1.0211154222488403,grad_norm: 0.9767907188457279, iteration: 187005
loss: 0.990531325340271,grad_norm: 0.9999991398442138, iteration: 187006
loss: 1.0161480903625488,grad_norm: 0.9999991574153814, iteration: 187007
loss: 0.9902798533439636,grad_norm: 0.8727579793936983, iteration: 187008
loss: 0.991338849067688,grad_norm: 0.8823197043232943, iteration: 187009
loss: 1.0184016227722168,grad_norm: 0.8731223160622552, iteration: 187010
loss: 1.0084894895553589,grad_norm: 0.999999072214385, iteration: 187011
loss: 1.0046085119247437,grad_norm: 0.9999992537044813, iteration: 187012
loss: 1.0200213193893433,grad_norm: 0.9999991428093526, iteration: 187013
loss: 0.9951368570327759,grad_norm: 0.9311577476225342, iteration: 187014
loss: 1.0013880729675293,grad_norm: 0.9999990569984818, iteration: 187015
loss: 0.9863383769989014,grad_norm: 0.999999188130072, iteration: 187016
loss: 0.9604630470275879,grad_norm: 0.9999990206079707, iteration: 187017
loss: 1.0737338066101074,grad_norm: 0.9999994085247476, iteration: 187018
loss: 0.9988077282905579,grad_norm: 0.9732860778825619, iteration: 187019
loss: 0.9695848226547241,grad_norm: 0.9361038226013999, iteration: 187020
loss: 1.0235421657562256,grad_norm: 0.9183122497808149, iteration: 187021
loss: 0.984033465385437,grad_norm: 0.9422277784695947, iteration: 187022
loss: 1.0582513809204102,grad_norm: 0.9515431555914563, iteration: 187023
loss: 1.0255401134490967,grad_norm: 0.9999991133711101, iteration: 187024
loss: 0.9768620729446411,grad_norm: 0.9999991578889394, iteration: 187025
loss: 1.0062695741653442,grad_norm: 0.988860275104441, iteration: 187026
loss: 0.9797298312187195,grad_norm: 0.9999991682559336, iteration: 187027
loss: 0.9966328740119934,grad_norm: 0.9999990133415355, iteration: 187028
loss: 1.0068877935409546,grad_norm: 0.9540437633448934, iteration: 187029
loss: 1.063388705253601,grad_norm: 0.999999579180172, iteration: 187030
loss: 0.9689452648162842,grad_norm: 0.9835194558175656, iteration: 187031
loss: 0.9733273983001709,grad_norm: 0.9999991382423724, iteration: 187032
loss: 0.9983491897583008,grad_norm: 0.8693134964133952, iteration: 187033
loss: 0.9969606399536133,grad_norm: 0.9999995900956902, iteration: 187034
loss: 1.026989221572876,grad_norm: 0.8758066716242559, iteration: 187035
loss: 0.9908608198165894,grad_norm: 0.8453463938590525, iteration: 187036
loss: 1.0170304775238037,grad_norm: 0.9999991997305818, iteration: 187037
loss: 0.9733860492706299,grad_norm: 0.9337631393187596, iteration: 187038
loss: 0.9998655319213867,grad_norm: 0.8665384781261832, iteration: 187039
loss: 1.0307472944259644,grad_norm: 0.8918379533691867, iteration: 187040
loss: 0.9718303084373474,grad_norm: 0.8112141352761411, iteration: 187041
loss: 1.0158870220184326,grad_norm: 0.9468230634608062, iteration: 187042
loss: 1.0170862674713135,grad_norm: 0.9999990702086936, iteration: 187043
loss: 1.0018467903137207,grad_norm: 0.9999991250346347, iteration: 187044
loss: 0.9963706135749817,grad_norm: 0.9522616289874078, iteration: 187045
loss: 0.9907769560813904,grad_norm: 0.9033381018193328, iteration: 187046
loss: 0.9755499958992004,grad_norm: 0.9138974947298381, iteration: 187047
loss: 0.9950175285339355,grad_norm: 0.9999992602742763, iteration: 187048
loss: 0.964361310005188,grad_norm: 0.8944663341082255, iteration: 187049
loss: 0.963592529296875,grad_norm: 0.9999991025275029, iteration: 187050
loss: 1.0719692707061768,grad_norm: 0.99999928447154, iteration: 187051
loss: 0.9929039478302002,grad_norm: 0.8053302560119355, iteration: 187052
loss: 0.9838587641716003,grad_norm: 0.9999998306874602, iteration: 187053
loss: 0.9849992990493774,grad_norm: 0.8512345625411952, iteration: 187054
loss: 1.010553240776062,grad_norm: 0.7704339107973591, iteration: 187055
loss: 0.9639852046966553,grad_norm: 0.8792690652789679, iteration: 187056
loss: 1.0240709781646729,grad_norm: 0.9999990911162169, iteration: 187057
loss: 1.0135555267333984,grad_norm: 0.9972358594584061, iteration: 187058
loss: 1.0064345598220825,grad_norm: 0.9460509568021969, iteration: 187059
loss: 0.9787889122962952,grad_norm: 0.9695775905105352, iteration: 187060
loss: 1.000707983970642,grad_norm: 0.9859673249310295, iteration: 187061
loss: 1.0102758407592773,grad_norm: 0.9270642136300379, iteration: 187062
loss: 0.9486183524131775,grad_norm: 0.9785267553584865, iteration: 187063
loss: 1.0183252096176147,grad_norm: 0.9999991543541666, iteration: 187064
loss: 1.0116478204727173,grad_norm: 0.9599963959999366, iteration: 187065
loss: 0.9609014987945557,grad_norm: 0.9999993296939397, iteration: 187066
loss: 1.0227595567703247,grad_norm: 0.99999910406555, iteration: 187067
loss: 0.9824452996253967,grad_norm: 0.9999991895626106, iteration: 187068
loss: 1.0274360179901123,grad_norm: 0.8982150815833395, iteration: 187069
loss: 1.0091173648834229,grad_norm: 0.9298689663492073, iteration: 187070
loss: 0.9918951392173767,grad_norm: 0.9462358503490158, iteration: 187071
loss: 1.0047072172164917,grad_norm: 0.9387819197045143, iteration: 187072
loss: 1.0155280828475952,grad_norm: 0.9384518519215991, iteration: 187073
loss: 1.048470377922058,grad_norm: 0.9999991601567346, iteration: 187074
loss: 1.0206607580184937,grad_norm: 0.8834927685137733, iteration: 187075
loss: 0.9984704256057739,grad_norm: 0.9999992482961212, iteration: 187076
loss: 0.9840291738510132,grad_norm: 0.961034500403531, iteration: 187077
loss: 1.0409460067749023,grad_norm: 0.9999991002748193, iteration: 187078
loss: 1.0310419797897339,grad_norm: 0.9999994383457659, iteration: 187079
loss: 1.0322825908660889,grad_norm: 0.9999990319533033, iteration: 187080
loss: 0.9473634958267212,grad_norm: 0.9999991679792547, iteration: 187081
loss: 0.9843198657035828,grad_norm: 0.9999991433435687, iteration: 187082
loss: 0.9795385003089905,grad_norm: 0.9549789819633726, iteration: 187083
loss: 1.0331696271896362,grad_norm: 0.9999992635627725, iteration: 187084
loss: 1.077923059463501,grad_norm: 0.9999990925121931, iteration: 187085
loss: 0.9777343273162842,grad_norm: 0.9352830323113266, iteration: 187086
loss: 1.1646355390548706,grad_norm: 0.999999813316264, iteration: 187087
loss: 0.9870136976242065,grad_norm: 0.9214383948576362, iteration: 187088
loss: 1.015051245689392,grad_norm: 0.9387391535346551, iteration: 187089
loss: 1.001807689666748,grad_norm: 0.879397136457815, iteration: 187090
loss: 0.9979016184806824,grad_norm: 0.986028096327625, iteration: 187091
loss: 0.9851067662239075,grad_norm: 0.7833994150109036, iteration: 187092
loss: 0.9981732964515686,grad_norm: 0.9999991520532182, iteration: 187093
loss: 0.9910776615142822,grad_norm: 0.9999992859531353, iteration: 187094
loss: 1.0075515508651733,grad_norm: 0.8830935503090992, iteration: 187095
loss: 0.9933617115020752,grad_norm: 0.9337949755133088, iteration: 187096
loss: 1.0367436408996582,grad_norm: 0.9999999310015606, iteration: 187097
loss: 0.97932368516922,grad_norm: 0.9999990185153754, iteration: 187098
loss: 1.0323275327682495,grad_norm: 0.959015225147831, iteration: 187099
loss: 0.9422297477722168,grad_norm: 0.9726548094456728, iteration: 187100
loss: 1.0400750637054443,grad_norm: 0.9435841808935814, iteration: 187101
loss: 0.984557569026947,grad_norm: 0.9999991273478003, iteration: 187102
loss: 0.9674183130264282,grad_norm: 0.9999991727128594, iteration: 187103
loss: 0.9795853495597839,grad_norm: 0.9999991055317659, iteration: 187104
loss: 1.0183533430099487,grad_norm: 0.929735438345768, iteration: 187105
loss: 0.9975633025169373,grad_norm: 0.9999991212498599, iteration: 187106
loss: 1.032585620880127,grad_norm: 0.9999992473077122, iteration: 187107
loss: 1.0499732494354248,grad_norm: 0.9999999146210622, iteration: 187108
loss: 0.9968589544296265,grad_norm: 0.9208671174596341, iteration: 187109
loss: 0.9714319109916687,grad_norm: 0.9999991781029934, iteration: 187110
loss: 0.9833996891975403,grad_norm: 0.7724045214677885, iteration: 187111
loss: 1.0070337057113647,grad_norm: 0.9876928417261449, iteration: 187112
loss: 0.9995362162590027,grad_norm: 0.910154749868325, iteration: 187113
loss: 0.973529577255249,grad_norm: 0.9999990783363002, iteration: 187114
loss: 1.0289008617401123,grad_norm: 0.9999997726881088, iteration: 187115
loss: 0.995785653591156,grad_norm: 0.9999996233732994, iteration: 187116
loss: 1.027909755706787,grad_norm: 0.9999991340759378, iteration: 187117
loss: 1.0457367897033691,grad_norm: 0.8218046532531995, iteration: 187118
loss: 0.9906969666481018,grad_norm: 0.9999990047491706, iteration: 187119
loss: 1.0004956722259521,grad_norm: 0.9999991344934956, iteration: 187120
loss: 0.9965620040893555,grad_norm: 0.9999991464738183, iteration: 187121
loss: 1.0066871643066406,grad_norm: 0.9999992199542567, iteration: 187122
loss: 1.0117703676223755,grad_norm: 0.9999990024527481, iteration: 187123
loss: 0.9968600273132324,grad_norm: 0.9999990515769983, iteration: 187124
loss: 1.0015830993652344,grad_norm: 0.9999991145754277, iteration: 187125
loss: 1.0648841857910156,grad_norm: 0.9999996641517105, iteration: 187126
loss: 1.0221894979476929,grad_norm: 0.8228622956774033, iteration: 187127
loss: 1.0007506608963013,grad_norm: 0.9999991765423945, iteration: 187128
loss: 0.9897152781486511,grad_norm: 0.9047471939484696, iteration: 187129
loss: 0.9752614498138428,grad_norm: 0.8828235068774677, iteration: 187130
loss: 1.2026880979537964,grad_norm: 0.9999997113021627, iteration: 187131
loss: 0.9919959306716919,grad_norm: 0.9999995423654167, iteration: 187132
loss: 1.0093265771865845,grad_norm: 0.8688301392957153, iteration: 187133
loss: 1.076941967010498,grad_norm: 0.9999997039318912, iteration: 187134
loss: 0.9973962903022766,grad_norm: 0.9115826470013111, iteration: 187135
loss: 0.9841845035552979,grad_norm: 0.9114127504255498, iteration: 187136
loss: 1.0410407781600952,grad_norm: 0.9646561826863312, iteration: 187137
loss: 1.1492648124694824,grad_norm: 0.9999998845122454, iteration: 187138
loss: 1.1691253185272217,grad_norm: 0.9999994076282012, iteration: 187139
loss: 0.9751212000846863,grad_norm: 0.9999990692146806, iteration: 187140
loss: 0.9925287961959839,grad_norm: 0.9648912737289937, iteration: 187141
loss: 1.0357661247253418,grad_norm: 0.9999995979230635, iteration: 187142
loss: 1.0450584888458252,grad_norm: 0.9999997702574441, iteration: 187143
loss: 0.9984264969825745,grad_norm: 0.935734828446585, iteration: 187144
loss: 1.1408907175064087,grad_norm: 0.9999993381807056, iteration: 187145
loss: 1.009560465812683,grad_norm: 0.9034548396830123, iteration: 187146
loss: 1.0172886848449707,grad_norm: 0.9999990474199875, iteration: 187147
loss: 0.9676241278648376,grad_norm: 0.9718218504210383, iteration: 187148
loss: 1.0291813611984253,grad_norm: 0.9999992756191394, iteration: 187149
loss: 1.0184952020645142,grad_norm: 0.9999994588163955, iteration: 187150
loss: 0.9762253761291504,grad_norm: 0.9590883317637068, iteration: 187151
loss: 1.0064749717712402,grad_norm: 0.9999991329795267, iteration: 187152
loss: 1.0437730550765991,grad_norm: 0.9999989806888566, iteration: 187153
loss: 1.0218491554260254,grad_norm: 0.9999997586570459, iteration: 187154
loss: 1.044091820716858,grad_norm: 0.9999990573276893, iteration: 187155
loss: 0.9842623472213745,grad_norm: 0.9176199068644916, iteration: 187156
loss: 1.0203505754470825,grad_norm: 0.9835929646882284, iteration: 187157
loss: 1.1010226011276245,grad_norm: 0.9999996027550313, iteration: 187158
loss: 1.0082420110702515,grad_norm: 0.9999991625843825, iteration: 187159
loss: 1.180408000946045,grad_norm: 0.9999994633405499, iteration: 187160
loss: 1.0315293073654175,grad_norm: 0.9999992928659955, iteration: 187161
loss: 1.0104291439056396,grad_norm: 0.9999991123591859, iteration: 187162
loss: 0.9989847540855408,grad_norm: 0.9999992209225359, iteration: 187163
loss: 1.1312928199768066,grad_norm: 0.9999996675775918, iteration: 187164
loss: 1.0072156190872192,grad_norm: 0.9999993428973992, iteration: 187165
loss: 0.9878591895103455,grad_norm: 0.9228431574633319, iteration: 187166
loss: 1.0200865268707275,grad_norm: 0.9999992220271822, iteration: 187167
loss: 0.9780526757240295,grad_norm: 0.9392656215629968, iteration: 187168
loss: 1.0120525360107422,grad_norm: 0.9999992856868295, iteration: 187169
loss: 0.9799266457557678,grad_norm: 0.9218404337567401, iteration: 187170
loss: 1.0070269107818604,grad_norm: 0.999999184477285, iteration: 187171
loss: 1.0007281303405762,grad_norm: 0.9999990763197594, iteration: 187172
loss: 1.2029885053634644,grad_norm: 0.985750724662448, iteration: 187173
loss: 1.0188326835632324,grad_norm: 0.9999990891250179, iteration: 187174
loss: 1.018612265586853,grad_norm: 0.9424839128921453, iteration: 187175
loss: 1.003625512123108,grad_norm: 0.9999991480412461, iteration: 187176
loss: 0.9729294180870056,grad_norm: 0.9999990293486437, iteration: 187177
loss: 1.0361334085464478,grad_norm: 0.999999334292812, iteration: 187178
loss: 1.028773307800293,grad_norm: 0.9999992086267797, iteration: 187179
loss: 1.001599907875061,grad_norm: 0.9999990904828465, iteration: 187180
loss: 1.0006103515625,grad_norm: 0.9999998562244238, iteration: 187181
loss: 1.2232369184494019,grad_norm: 0.9999990598870029, iteration: 187182
loss: 0.9946545362472534,grad_norm: 0.9668092321843813, iteration: 187183
loss: 0.9841250777244568,grad_norm: 0.9999991748040558, iteration: 187184
loss: 1.0213791131973267,grad_norm: 0.9361779452620879, iteration: 187185
loss: 1.0477919578552246,grad_norm: 0.9999991779243091, iteration: 187186
loss: 0.9622470140457153,grad_norm: 0.88358250895133, iteration: 187187
loss: 1.1202579736709595,grad_norm: 0.9999995111079664, iteration: 187188
loss: 0.9762200713157654,grad_norm: 0.949686476857404, iteration: 187189
loss: 0.9877303838729858,grad_norm: 0.8613138424490218, iteration: 187190
loss: 0.99910569190979,grad_norm: 0.9999993833864916, iteration: 187191
loss: 1.041459560394287,grad_norm: 0.9999990708139875, iteration: 187192
loss: 1.046878457069397,grad_norm: 0.9190815978370497, iteration: 187193
loss: 1.0057926177978516,grad_norm: 0.9999990791682024, iteration: 187194
loss: 0.9963132739067078,grad_norm: 0.9418871882291362, iteration: 187195
loss: 1.0573056936264038,grad_norm: 0.9999991694892028, iteration: 187196
loss: 1.1748698949813843,grad_norm: 0.9999993540627159, iteration: 187197
loss: 1.0209640264511108,grad_norm: 0.9999990502708694, iteration: 187198
loss: 1.1934651136398315,grad_norm: 0.9999998878102077, iteration: 187199
loss: 0.995371401309967,grad_norm: 0.9999991458083235, iteration: 187200
loss: 1.0688750743865967,grad_norm: 0.9999991807682651, iteration: 187201
loss: 1.1374790668487549,grad_norm: 0.9999993065680546, iteration: 187202
loss: 1.0194133520126343,grad_norm: 0.8579255007397295, iteration: 187203
loss: 1.027618646621704,grad_norm: 0.9647173561294939, iteration: 187204
loss: 1.046622395515442,grad_norm: 0.999999415660558, iteration: 187205
loss: 0.9939410090446472,grad_norm: 0.9395112221846215, iteration: 187206
loss: 1.046684980392456,grad_norm: 0.9358018894311293, iteration: 187207
loss: 1.0128648281097412,grad_norm: 0.9999990631421576, iteration: 187208
loss: 0.9856149554252625,grad_norm: 0.870990817991236, iteration: 187209
loss: 1.0383694171905518,grad_norm: 0.9999990449252081, iteration: 187210
loss: 0.9753254055976868,grad_norm: 0.8389408784971188, iteration: 187211
loss: 1.0201003551483154,grad_norm: 0.9999995264218512, iteration: 187212
loss: 1.022965669631958,grad_norm: 0.9999990183047289, iteration: 187213
loss: 1.1149073839187622,grad_norm: 0.9999992519445977, iteration: 187214
loss: 1.022268533706665,grad_norm: 0.9999991661577525, iteration: 187215
loss: 1.0176029205322266,grad_norm: 0.9999991094017444, iteration: 187216
loss: 1.0112738609313965,grad_norm: 0.9351586540003017, iteration: 187217
loss: 0.9834567308425903,grad_norm: 0.9999994073412495, iteration: 187218
loss: 1.0095643997192383,grad_norm: 0.9999993151542572, iteration: 187219
loss: 1.007379174232483,grad_norm: 0.835405162821032, iteration: 187220
loss: 1.020318865776062,grad_norm: 0.9467971444508749, iteration: 187221
loss: 1.035007357597351,grad_norm: 0.8481382301093138, iteration: 187222
loss: 1.0323832035064697,grad_norm: 0.999999843570277, iteration: 187223
loss: 0.9989657402038574,grad_norm: 0.9999989513392801, iteration: 187224
loss: 0.9731116890907288,grad_norm: 0.9944265875063786, iteration: 187225
loss: 1.0082764625549316,grad_norm: 0.9184373771162976, iteration: 187226
loss: 1.0098340511322021,grad_norm: 0.9999991762662722, iteration: 187227
loss: 1.0129051208496094,grad_norm: 0.9999990234227538, iteration: 187228
loss: 0.9910683631896973,grad_norm: 0.9999990977259005, iteration: 187229
loss: 0.9988802075386047,grad_norm: 0.9999991503901712, iteration: 187230
loss: 0.982134997844696,grad_norm: 0.9999989574126409, iteration: 187231
loss: 0.9723020792007446,grad_norm: 0.9999991757361609, iteration: 187232
loss: 1.0293439626693726,grad_norm: 0.9999991754768629, iteration: 187233
loss: 1.0106059312820435,grad_norm: 0.9999991113828222, iteration: 187234
loss: 1.0452123880386353,grad_norm: 0.9999991204137113, iteration: 187235
loss: 1.0008350610733032,grad_norm: 0.9999991172823007, iteration: 187236
loss: 1.0061962604522705,grad_norm: 0.8672844284330272, iteration: 187237
loss: 1.0031189918518066,grad_norm: 0.9999992212284425, iteration: 187238
loss: 1.0053027868270874,grad_norm: 0.9999990926401106, iteration: 187239
loss: 0.9988440871238708,grad_norm: 0.892052110055215, iteration: 187240
loss: 1.0327526330947876,grad_norm: 0.9999991545472549, iteration: 187241
loss: 1.0730549097061157,grad_norm: 0.999999636130268, iteration: 187242
loss: 1.0084142684936523,grad_norm: 0.9114809155699203, iteration: 187243
loss: 1.032230257987976,grad_norm: 0.9999991413563992, iteration: 187244
loss: 1.0859838724136353,grad_norm: 0.9999995097639643, iteration: 187245
loss: 1.0266493558883667,grad_norm: 0.9999990104305919, iteration: 187246
loss: 1.0083526372909546,grad_norm: 0.8562645798285015, iteration: 187247
loss: 1.0344353914260864,grad_norm: 0.9999994025828327, iteration: 187248
loss: 0.9872434735298157,grad_norm: 0.9999990818094552, iteration: 187249
loss: 1.0229339599609375,grad_norm: 0.9999992333765422, iteration: 187250
loss: 0.9767519235610962,grad_norm: 0.9170561175120425, iteration: 187251
loss: 0.973445475101471,grad_norm: 0.9614940473854794, iteration: 187252
loss: 1.039693832397461,grad_norm: 0.9849898953435174, iteration: 187253
loss: 1.0081288814544678,grad_norm: 0.9999990333557524, iteration: 187254
loss: 1.1084208488464355,grad_norm: 0.9999995281987213, iteration: 187255
loss: 1.2435911893844604,grad_norm: 0.9999998796356321, iteration: 187256
loss: 1.0595842599868774,grad_norm: 0.9999991294540702, iteration: 187257
loss: 1.4405850172042847,grad_norm: 0.9999996500162024, iteration: 187258
loss: 1.298343300819397,grad_norm: 0.9999993112920126, iteration: 187259
loss: 1.7341521978378296,grad_norm: 0.999999664276847, iteration: 187260
loss: 1.1382229328155518,grad_norm: 0.9675843758117326, iteration: 187261
loss: 0.9932281374931335,grad_norm: 0.9559682299893884, iteration: 187262
loss: 1.6914271116256714,grad_norm: 0.9999997732281255, iteration: 187263
loss: 1.3382072448730469,grad_norm: 0.9999999592071395, iteration: 187264
loss: 1.4065921306610107,grad_norm: 0.9999998835125822, iteration: 187265
loss: 2.0173494815826416,grad_norm: 0.9999999228848507, iteration: 187266
loss: 1.2746068239212036,grad_norm: 0.9999992752705453, iteration: 187267
loss: 1.6780308485031128,grad_norm: 0.9999996848173184, iteration: 187268
loss: 1.4861342906951904,grad_norm: 0.9999994313789091, iteration: 187269
loss: 1.1525450944900513,grad_norm: 0.9999994519663393, iteration: 187270
loss: 1.3056801557540894,grad_norm: 0.9999999019310457, iteration: 187271
loss: 1.698106050491333,grad_norm: 0.9999998724353668, iteration: 187272
loss: 1.6462727785110474,grad_norm: 0.9999998731814361, iteration: 187273
loss: 1.1113651990890503,grad_norm: 0.9999999960549164, iteration: 187274
loss: 1.7649163007736206,grad_norm: 0.9999998713536854, iteration: 187275
loss: 1.484887957572937,grad_norm: 0.9999998869364842, iteration: 187276
loss: 1.5257103443145752,grad_norm: 0.9999999309358683, iteration: 187277
loss: 1.5858371257781982,grad_norm: 0.9999998300132662, iteration: 187278
loss: 1.7575271129608154,grad_norm: 0.9999998104634075, iteration: 187279
loss: 1.4293725490570068,grad_norm: 0.9999997742134126, iteration: 187280
loss: 1.7229210138320923,grad_norm: 0.9999998417060404, iteration: 187281
loss: 1.3778339624404907,grad_norm: 1.0000000326275933, iteration: 187282
loss: 1.5185540914535522,grad_norm: 0.999999807208853, iteration: 187283
loss: 1.7842572927474976,grad_norm: 0.9999999213326891, iteration: 187284
loss: 1.4709773063659668,grad_norm: 0.9999998591639123, iteration: 187285
loss: 1.529981017112732,grad_norm: 0.9999998316051302, iteration: 187286
loss: 1.3300700187683105,grad_norm: 0.9999998875598193, iteration: 187287
loss: 1.3051871061325073,grad_norm: 0.9999996762015761, iteration: 187288
loss: 1.2629154920578003,grad_norm: 0.9999997489348642, iteration: 187289
loss: 1.3495826721191406,grad_norm: 0.9999999450166731, iteration: 187290
loss: 1.5187644958496094,grad_norm: 0.9999997669905042, iteration: 187291
loss: 1.4475421905517578,grad_norm: 0.999999739030566, iteration: 187292
loss: 1.310204029083252,grad_norm: 0.9999997462494131, iteration: 187293
loss: 1.0969876050949097,grad_norm: 0.9999992517407524, iteration: 187294
loss: 1.303196907043457,grad_norm: 0.9999999374054278, iteration: 187295
loss: 1.5131548643112183,grad_norm: 0.9999999177922937, iteration: 187296
loss: 1.1718592643737793,grad_norm: 0.9999998677452923, iteration: 187297
loss: 1.3757550716400146,grad_norm: 0.9999999311341782, iteration: 187298
loss: 1.1945548057556152,grad_norm: 0.9999996679138038, iteration: 187299
loss: 1.2269506454467773,grad_norm: 0.9999999497250954, iteration: 187300
loss: 1.3824546337127686,grad_norm: 0.9999994821967882, iteration: 187301
loss: 1.2491086721420288,grad_norm: 0.9999997260403364, iteration: 187302
loss: 1.023297905921936,grad_norm: 0.9999998168165899, iteration: 187303
loss: 1.1709288358688354,grad_norm: 0.9999992803572583, iteration: 187304
loss: 1.2681459188461304,grad_norm: 0.9999999589339627, iteration: 187305
loss: 1.0649207830429077,grad_norm: 0.9999996928255808, iteration: 187306
loss: 1.054871678352356,grad_norm: 0.999999560186112, iteration: 187307
loss: 1.1299935579299927,grad_norm: 0.9999998259496643, iteration: 187308
loss: 1.0956766605377197,grad_norm: 0.9999995908007114, iteration: 187309
loss: 1.2017934322357178,grad_norm: 0.9999996605444134, iteration: 187310
loss: 1.0304367542266846,grad_norm: 0.9999996532630686, iteration: 187311
loss: 1.1469780206680298,grad_norm: 0.9999997277682809, iteration: 187312
loss: 1.0616847276687622,grad_norm: 0.9859741662763113, iteration: 187313
loss: 1.0704169273376465,grad_norm: 0.9842122591859042, iteration: 187314
loss: 1.20175302028656,grad_norm: 0.9999996100363523, iteration: 187315
loss: 1.0066887140274048,grad_norm: 0.9999991161300119, iteration: 187316
loss: 1.2369967699050903,grad_norm: 0.9999997223264219, iteration: 187317
loss: 1.00932776927948,grad_norm: 0.9999991994476057, iteration: 187318
loss: 1.0558730363845825,grad_norm: 0.9999991179510089, iteration: 187319
loss: 1.0830458402633667,grad_norm: 0.9869183233912725, iteration: 187320
loss: 1.3051667213439941,grad_norm: 0.9999995723213126, iteration: 187321
loss: 1.0631157159805298,grad_norm: 0.9999991087668593, iteration: 187322
loss: 1.0710052251815796,grad_norm: 0.9999992901952908, iteration: 187323
loss: 1.5152989625930786,grad_norm: 0.9999998895340875, iteration: 187324
loss: 1.0429126024246216,grad_norm: 0.9815771967759633, iteration: 187325
loss: 1.119671106338501,grad_norm: 0.9999995158360862, iteration: 187326
loss: 1.1256134510040283,grad_norm: 0.9999991572399969, iteration: 187327
loss: 1.2181533575057983,grad_norm: 0.9999993628625271, iteration: 187328
loss: 1.4573506116867065,grad_norm: 0.9999998102486289, iteration: 187329
loss: 1.2101291418075562,grad_norm: 0.9999989793227624, iteration: 187330
loss: 1.100080132484436,grad_norm: 0.9999992384475243, iteration: 187331
loss: 1.3646894693374634,grad_norm: 0.9999998435963029, iteration: 187332
loss: 1.1272844076156616,grad_norm: 0.9999993352007649, iteration: 187333
loss: 1.127502679824829,grad_norm: 0.9999994347749998, iteration: 187334
loss: 1.0390266180038452,grad_norm: 0.9999991342211634, iteration: 187335
loss: 1.5483406782150269,grad_norm: 0.9999999755193986, iteration: 187336
loss: 1.159165620803833,grad_norm: 0.9999994679167232, iteration: 187337
loss: 1.0552879571914673,grad_norm: 0.9999994967641738, iteration: 187338
loss: 1.1872347593307495,grad_norm: 0.9999995153212038, iteration: 187339
loss: 1.4599193334579468,grad_norm: 0.9999998792152183, iteration: 187340
loss: 1.0910178422927856,grad_norm: 0.9999992275005007, iteration: 187341
loss: 1.205566167831421,grad_norm: 1.000000039765984, iteration: 187342
loss: 1.1268818378448486,grad_norm: 0.9999994085511047, iteration: 187343
loss: 1.2104252576828003,grad_norm: 0.9999993851298755, iteration: 187344
loss: 1.537872314453125,grad_norm: 0.9999998157434942, iteration: 187345
loss: 1.181455373764038,grad_norm: 0.999999940011247, iteration: 187346
loss: 1.1893110275268555,grad_norm: 0.9999996475780989, iteration: 187347
loss: 1.1774606704711914,grad_norm: 0.999999928966528, iteration: 187348
loss: 1.2637020349502563,grad_norm: 0.9999997975144056, iteration: 187349
loss: 1.2550972700119019,grad_norm: 0.9999997859651087, iteration: 187350
loss: 1.3299380540847778,grad_norm: 0.999999613844961, iteration: 187351
loss: 1.1828505992889404,grad_norm: 0.9999993451394831, iteration: 187352
loss: 1.0795868635177612,grad_norm: 0.9999993216273635, iteration: 187353
loss: 1.0505279302597046,grad_norm: 0.9999997677465625, iteration: 187354
loss: 1.0669437646865845,grad_norm: 0.9999991953237076, iteration: 187355
loss: 1.1074695587158203,grad_norm: 0.9999999668253221, iteration: 187356
loss: 1.0316886901855469,grad_norm: 0.9999991939670013, iteration: 187357
loss: 1.0902317762374878,grad_norm: 0.9999998634459148, iteration: 187358
loss: 1.0868706703186035,grad_norm: 0.9999991716469727, iteration: 187359
loss: 1.0458554029464722,grad_norm: 0.999998989483222, iteration: 187360
loss: 1.5753672122955322,grad_norm: 0.999999748209292, iteration: 187361
loss: 1.1468112468719482,grad_norm: 0.9999997063190271, iteration: 187362
loss: 1.0431911945343018,grad_norm: 0.999999608652249, iteration: 187363
loss: 1.070324420928955,grad_norm: 0.9999993064265211, iteration: 187364
loss: 1.4317162036895752,grad_norm: 0.9999999206165987, iteration: 187365
loss: 1.213564395904541,grad_norm: 0.9999993986903919, iteration: 187366
loss: 1.1290971040725708,grad_norm: 0.9999999280186233, iteration: 187367
loss: 1.2145448923110962,grad_norm: 0.9999991642461192, iteration: 187368
loss: 1.3157060146331787,grad_norm: 0.9999999330041843, iteration: 187369
loss: 1.4037045240402222,grad_norm: 0.9999996185177142, iteration: 187370
loss: 1.427486538887024,grad_norm: 0.9999998496272129, iteration: 187371
loss: 1.1817458868026733,grad_norm: 0.9999995520377791, iteration: 187372
loss: 1.2701764106750488,grad_norm: 0.9999994558320617, iteration: 187373
loss: 1.2103780508041382,grad_norm: 0.9999991970641401, iteration: 187374
loss: 1.1942455768585205,grad_norm: 0.9999998514927843, iteration: 187375
loss: 1.1644110679626465,grad_norm: 0.9999992082327526, iteration: 187376
loss: 1.1762360334396362,grad_norm: 0.9999996716130157, iteration: 187377
loss: 1.3657335042953491,grad_norm: 0.9999998962539737, iteration: 187378
loss: 1.130433201789856,grad_norm: 0.9999997035518223, iteration: 187379
loss: 1.367522120475769,grad_norm: 0.9999997858643946, iteration: 187380
loss: 1.0653281211853027,grad_norm: 0.999999143487839, iteration: 187381
loss: 1.0957666635513306,grad_norm: 0.999999863341912, iteration: 187382
loss: 1.2316341400146484,grad_norm: 0.9999997570972216, iteration: 187383
loss: 1.1176191568374634,grad_norm: 0.9999998416001529, iteration: 187384
loss: 1.1703307628631592,grad_norm: 0.9999996823003828, iteration: 187385
loss: 1.1527385711669922,grad_norm: 0.9999998616582196, iteration: 187386
loss: 1.1085923910140991,grad_norm: 0.9999998804163567, iteration: 187387
loss: 1.0325247049331665,grad_norm: 0.9999991052278332, iteration: 187388
loss: 1.1413636207580566,grad_norm: 0.9999993057294635, iteration: 187389
loss: 1.2118178606033325,grad_norm: 0.9999999196788212, iteration: 187390
loss: 1.0483183860778809,grad_norm: 0.9999994907744862, iteration: 187391
loss: 1.1106328964233398,grad_norm: 0.9999999291457495, iteration: 187392
loss: 1.137266755104065,grad_norm: 0.9999996381751325, iteration: 187393
loss: 1.3751834630966187,grad_norm: 0.9999998053435275, iteration: 187394
loss: 1.2669693231582642,grad_norm: 0.999999969859287, iteration: 187395
loss: 1.27059805393219,grad_norm: 0.9999995176393867, iteration: 187396
loss: 1.2381318807601929,grad_norm: 0.9999991270978761, iteration: 187397
loss: 1.3700344562530518,grad_norm: 0.9999999322621745, iteration: 187398
loss: 1.4738500118255615,grad_norm: 0.9999999801603443, iteration: 187399
loss: 1.2140964269638062,grad_norm: 0.9999996440779036, iteration: 187400
loss: 1.1416740417480469,grad_norm: 0.9999996688076377, iteration: 187401
loss: 1.2904764413833618,grad_norm: 0.9999999450579014, iteration: 187402
loss: 1.191256046295166,grad_norm: 0.9999996887496032, iteration: 187403
loss: 1.3007065057754517,grad_norm: 0.9999999499459467, iteration: 187404
loss: 1.2589387893676758,grad_norm: 0.9999998836678523, iteration: 187405
loss: 1.2704108953475952,grad_norm: 1.0000000388196373, iteration: 187406
loss: 1.4580365419387817,grad_norm: 0.9999997433305622, iteration: 187407
loss: 1.2228249311447144,grad_norm: 0.9999995135399644, iteration: 187408
loss: 1.2965127229690552,grad_norm: 1.0000000301519103, iteration: 187409
loss: 1.274958848953247,grad_norm: 0.9999999335885816, iteration: 187410
loss: 1.089109182357788,grad_norm: 0.9999998252346155, iteration: 187411
loss: 1.4606966972351074,grad_norm: 1.0000000727961054, iteration: 187412
loss: 1.2204828262329102,grad_norm: 0.9999999252076884, iteration: 187413
loss: 1.1925737857818604,grad_norm: 0.9999999347623643, iteration: 187414
loss: 1.2357219457626343,grad_norm: 0.9999998097454585, iteration: 187415
loss: 1.277518630027771,grad_norm: 0.9999999189774055, iteration: 187416
loss: 1.2554160356521606,grad_norm: 0.9999998090196287, iteration: 187417
loss: 1.3161349296569824,grad_norm: 0.9999999684046949, iteration: 187418
loss: 1.3067171573638916,grad_norm: 0.9999999163353018, iteration: 187419
loss: 1.205928921699524,grad_norm: 0.999999271576702, iteration: 187420
loss: 1.127932071685791,grad_norm: 0.9999992778132647, iteration: 187421
loss: 1.0597573518753052,grad_norm: 0.9999998073464432, iteration: 187422
loss: 1.2489595413208008,grad_norm: 0.9999996396772972, iteration: 187423
loss: 1.1700199842453003,grad_norm: 0.999999871932307, iteration: 187424
loss: 1.088221788406372,grad_norm: 0.9999997224151552, iteration: 187425
loss: 1.1597623825073242,grad_norm: 0.9999993972037212, iteration: 187426
loss: 1.1804966926574707,grad_norm: 0.9999997317121887, iteration: 187427
loss: 1.324177622795105,grad_norm: 0.9999996690366154, iteration: 187428
loss: 1.1690765619277954,grad_norm: 0.999999745412888, iteration: 187429
loss: 1.2762051820755005,grad_norm: 0.9999996041224183, iteration: 187430
loss: 1.1850347518920898,grad_norm: 0.9999994799204273, iteration: 187431
loss: 1.1712298393249512,grad_norm: 0.9999998979657221, iteration: 187432
loss: 1.2741633653640747,grad_norm: 0.999999833338642, iteration: 187433
loss: 1.1455905437469482,grad_norm: 0.9999994695714056, iteration: 187434
loss: 1.3487794399261475,grad_norm: 1.0000000079026028, iteration: 187435
loss: 1.0157045125961304,grad_norm: 0.9965973295367023, iteration: 187436
loss: 1.332653522491455,grad_norm: 0.9999997328709889, iteration: 187437
loss: 1.2272300720214844,grad_norm: 0.9999999150649657, iteration: 187438
loss: 1.1426997184753418,grad_norm: 0.999999656115702, iteration: 187439
loss: 1.239848256111145,grad_norm: 0.9999997470209391, iteration: 187440
loss: 1.3214685916900635,grad_norm: 0.9999999695315352, iteration: 187441
loss: 1.3647841215133667,grad_norm: 0.9999999589130313, iteration: 187442
loss: 0.9712960720062256,grad_norm: 0.9999997901892923, iteration: 187443
loss: 1.3024016618728638,grad_norm: 0.9999993521829924, iteration: 187444
loss: 1.2998496294021606,grad_norm: 0.9999998092953444, iteration: 187445
loss: 1.1858947277069092,grad_norm: 0.9999995062069356, iteration: 187446
loss: 1.063894510269165,grad_norm: 0.9999992935629236, iteration: 187447
loss: 1.2883926630020142,grad_norm: 0.9999994850354101, iteration: 187448
loss: 1.193575382232666,grad_norm: 0.9999999206508529, iteration: 187449
loss: 0.9769651889801025,grad_norm: 0.9999993014283494, iteration: 187450
loss: 1.0380353927612305,grad_norm: 0.9999996569665663, iteration: 187451
loss: 1.0781800746917725,grad_norm: 0.9999996842344313, iteration: 187452
loss: 1.1254082918167114,grad_norm: 0.9999992352256984, iteration: 187453
loss: 1.1042141914367676,grad_norm: 1.0000000371350217, iteration: 187454
loss: 1.2219231128692627,grad_norm: 0.9999997641378658, iteration: 187455
loss: 1.0747525691986084,grad_norm: 0.9999993568246027, iteration: 187456
loss: 1.0492379665374756,grad_norm: 0.9999998584687365, iteration: 187457
loss: 1.1175917387008667,grad_norm: 0.9999992708848465, iteration: 187458
loss: 1.1492723226547241,grad_norm: 0.9999996268511974, iteration: 187459
loss: 1.0228298902511597,grad_norm: 0.9999991475065123, iteration: 187460
loss: 1.0857447385787964,grad_norm: 0.9999999521420115, iteration: 187461
loss: 1.0125267505645752,grad_norm: 0.9999992961925425, iteration: 187462
loss: 1.0471776723861694,grad_norm: 0.9999994343525762, iteration: 187463
loss: 1.1091575622558594,grad_norm: 0.9999995470633571, iteration: 187464
loss: 1.0164636373519897,grad_norm: 0.8757801487119834, iteration: 187465
loss: 1.0247722864151,grad_norm: 0.9999990927642132, iteration: 187466
loss: 1.1033389568328857,grad_norm: 0.9999991637676181, iteration: 187467
loss: 1.0124324560165405,grad_norm: 0.9999998351483461, iteration: 187468
loss: 1.1282341480255127,grad_norm: 0.999999360250191, iteration: 187469
loss: 0.9971747398376465,grad_norm: 0.867240713771929, iteration: 187470
loss: 1.0875732898712158,grad_norm: 0.9999997262501764, iteration: 187471
loss: 1.0347011089324951,grad_norm: 0.9999997510526991, iteration: 187472
loss: 1.052862524986267,grad_norm: 0.9999992268947562, iteration: 187473
loss: 0.9881821870803833,grad_norm: 0.999999151582649, iteration: 187474
loss: 1.001480221748352,grad_norm: 0.9999991616528487, iteration: 187475
loss: 1.0161888599395752,grad_norm: 0.8574161529718195, iteration: 187476
loss: 1.0110620260238647,grad_norm: 0.9999992673272472, iteration: 187477
loss: 1.0795435905456543,grad_norm: 0.9999997631505092, iteration: 187478
loss: 0.9532575011253357,grad_norm: 0.9906113873463371, iteration: 187479
loss: 0.9947615265846252,grad_norm: 0.9999990458967111, iteration: 187480
loss: 1.1463817358016968,grad_norm: 0.999999792131597, iteration: 187481
loss: 1.0734130144119263,grad_norm: 0.9999997489770398, iteration: 187482
loss: 0.9925987720489502,grad_norm: 0.9858715602066441, iteration: 187483
loss: 1.0312535762786865,grad_norm: 0.9999996483767927, iteration: 187484
loss: 1.0101826190948486,grad_norm: 0.9471323998780927, iteration: 187485
loss: 0.9976075291633606,grad_norm: 0.999999318873272, iteration: 187486
loss: 1.0193092823028564,grad_norm: 0.9999991183951562, iteration: 187487
loss: 0.9743902683258057,grad_norm: 0.9999990704837185, iteration: 187488
loss: 1.136947512626648,grad_norm: 0.9999998178483603, iteration: 187489
loss: 1.0423356294631958,grad_norm: 0.9999991478291372, iteration: 187490
loss: 1.0739773511886597,grad_norm: 0.9999992062169781, iteration: 187491
loss: 1.0697745084762573,grad_norm: 0.9999991393862779, iteration: 187492
loss: 1.0375791788101196,grad_norm: 0.9999997931365273, iteration: 187493
loss: 1.1577352285385132,grad_norm: 0.999999510113136, iteration: 187494
loss: 1.0315228700637817,grad_norm: 0.9999999182610477, iteration: 187495
loss: 1.0213123559951782,grad_norm: 0.958642648083857, iteration: 187496
loss: 1.0205098390579224,grad_norm: 0.9999991923826158, iteration: 187497
loss: 1.0284998416900635,grad_norm: 0.9999990286956136, iteration: 187498
loss: 1.0150066614151,grad_norm: 0.9988132798478353, iteration: 187499
loss: 1.0407739877700806,grad_norm: 0.9999994802909528, iteration: 187500
loss: 0.9910308122634888,grad_norm: 0.9999998188611475, iteration: 187501
loss: 1.0682649612426758,grad_norm: 0.999999858258777, iteration: 187502
loss: 0.9988228678703308,grad_norm: 0.999999208403804, iteration: 187503
loss: 1.0282827615737915,grad_norm: 0.9532030245700894, iteration: 187504
loss: 0.9686151146888733,grad_norm: 0.9830028316938071, iteration: 187505
loss: 0.9660031199455261,grad_norm: 0.9999990374066701, iteration: 187506
loss: 1.0196386575698853,grad_norm: 0.9999990854326044, iteration: 187507
loss: 1.1392064094543457,grad_norm: 0.9999996303246629, iteration: 187508
loss: 1.1238903999328613,grad_norm: 0.9999994710906611, iteration: 187509
loss: 1.0087082386016846,grad_norm: 0.9999993476869187, iteration: 187510
loss: 1.0302033424377441,grad_norm: 0.9739787973782116, iteration: 187511
loss: 0.989613950252533,grad_norm: 0.9999993281450128, iteration: 187512
loss: 1.0387448072433472,grad_norm: 0.9999996638346704, iteration: 187513
loss: 1.0866717100143433,grad_norm: 0.9999990087343278, iteration: 187514
loss: 1.0804181098937988,grad_norm: 0.9999993109663048, iteration: 187515
loss: 1.0069407224655151,grad_norm: 0.9999998649584876, iteration: 187516
loss: 1.0084549188613892,grad_norm: 0.9999992997725046, iteration: 187517
loss: 1.0008054971694946,grad_norm: 0.9999990870456632, iteration: 187518
loss: 0.978833019733429,grad_norm: 0.9999992819623696, iteration: 187519
loss: 1.0142097473144531,grad_norm: 0.9999992334214842, iteration: 187520
loss: 1.0258747339248657,grad_norm: 0.9145636703801121, iteration: 187521
loss: 0.9872652292251587,grad_norm: 0.9999992089405556, iteration: 187522
loss: 1.0049275159835815,grad_norm: 0.8061708725029834, iteration: 187523
loss: 0.9648117423057556,grad_norm: 0.9117749351759418, iteration: 187524
loss: 0.9644137024879456,grad_norm: 0.9999991637294038, iteration: 187525
loss: 1.0526037216186523,grad_norm: 0.9999996582386009, iteration: 187526
loss: 0.9894292950630188,grad_norm: 0.9999992662753154, iteration: 187527
loss: 1.0140928030014038,grad_norm: 0.9999989290280894, iteration: 187528
loss: 1.1028311252593994,grad_norm: 0.999999296971989, iteration: 187529
loss: 1.0380373001098633,grad_norm: 0.9999998060707096, iteration: 187530
loss: 1.0423393249511719,grad_norm: 0.9999992320595776, iteration: 187531
loss: 1.0525370836257935,grad_norm: 0.948637341788332, iteration: 187532
loss: 1.0176992416381836,grad_norm: 0.9999993219554526, iteration: 187533
loss: 1.0139830112457275,grad_norm: 0.9999992244735089, iteration: 187534
loss: 1.04922616481781,grad_norm: 0.905377481220102, iteration: 187535
loss: 1.0203053951263428,grad_norm: 0.9999998606703389, iteration: 187536
loss: 1.051710605621338,grad_norm: 0.9999991630624181, iteration: 187537
loss: 1.047929048538208,grad_norm: 0.9999991311032003, iteration: 187538
loss: 0.9869510531425476,grad_norm: 0.9999992441114288, iteration: 187539
loss: 1.0212944746017456,grad_norm: 0.9067461319312181, iteration: 187540
loss: 1.019851803779602,grad_norm: 0.8453858328191117, iteration: 187541
loss: 1.0702139139175415,grad_norm: 0.9999992158969482, iteration: 187542
loss: 0.9843127727508545,grad_norm: 0.9999992409710393, iteration: 187543
loss: 1.1090267896652222,grad_norm: 0.9999996508966332, iteration: 187544
loss: 1.0221306085586548,grad_norm: 0.9854937562592264, iteration: 187545
loss: 1.0259389877319336,grad_norm: 0.8152053856615645, iteration: 187546
loss: 1.0047804117202759,grad_norm: 0.9467589777651356, iteration: 187547
loss: 0.9825001955032349,grad_norm: 0.8455199934437364, iteration: 187548
loss: 1.0562430620193481,grad_norm: 0.9999993616905697, iteration: 187549
loss: 0.9942992329597473,grad_norm: 0.9999989982249048, iteration: 187550
loss: 0.9577599167823792,grad_norm: 0.9211198892645303, iteration: 187551
loss: 0.9788040518760681,grad_norm: 0.9999990752018149, iteration: 187552
loss: 0.9978827238082886,grad_norm: 0.9999991770971987, iteration: 187553
loss: 1.126213788986206,grad_norm: 0.9999997414384779, iteration: 187554
loss: 0.999856173992157,grad_norm: 0.9999991791980446, iteration: 187555
loss: 1.04974365234375,grad_norm: 0.9999995640214561, iteration: 187556
loss: 1.0180070400238037,grad_norm: 0.9999996866233847, iteration: 187557
loss: 0.9918239116668701,grad_norm: 0.9999993122783022, iteration: 187558
loss: 0.9924742579460144,grad_norm: 0.9846006576357813, iteration: 187559
loss: 1.0080469846725464,grad_norm: 0.9999993062321493, iteration: 187560
loss: 1.0408827066421509,grad_norm: 0.999999275458801, iteration: 187561
loss: 1.0145130157470703,grad_norm: 0.9999992369850847, iteration: 187562
loss: 1.0613338947296143,grad_norm: 0.9999991095686943, iteration: 187563
loss: 0.9945887327194214,grad_norm: 0.9999990927745861, iteration: 187564
loss: 0.9984303116798401,grad_norm: 0.9999991082530539, iteration: 187565
loss: 0.9809596538543701,grad_norm: 0.9999991266908481, iteration: 187566
loss: 0.9950166344642639,grad_norm: 0.9292915309175754, iteration: 187567
loss: 1.013157844543457,grad_norm: 0.9307448394302166, iteration: 187568
loss: 1.0474684238433838,grad_norm: 0.9806302799824104, iteration: 187569
loss: 1.0582829713821411,grad_norm: 0.99999916058749, iteration: 187570
loss: 0.9994106292724609,grad_norm: 0.9694983680495669, iteration: 187571
loss: 1.0053983926773071,grad_norm: 0.9999990595159061, iteration: 187572
loss: 0.9802225828170776,grad_norm: 0.8929367022063069, iteration: 187573
loss: 1.0023332834243774,grad_norm: 0.9791068499806354, iteration: 187574
loss: 0.9949678778648376,grad_norm: 0.783010630977836, iteration: 187575
loss: 1.017884612083435,grad_norm: 0.8220317437403158, iteration: 187576
loss: 0.9871284365653992,grad_norm: 0.999227932449597, iteration: 187577
loss: 0.9769095778465271,grad_norm: 0.9999996537847973, iteration: 187578
loss: 1.0441325902938843,grad_norm: 0.99999985681387, iteration: 187579
loss: 0.9908491969108582,grad_norm: 0.8504979339846468, iteration: 187580
loss: 1.0017786026000977,grad_norm: 0.9999993399274626, iteration: 187581
loss: 1.0270819664001465,grad_norm: 0.9901163139259127, iteration: 187582
loss: 1.0427665710449219,grad_norm: 0.999999812987282, iteration: 187583
loss: 1.0098185539245605,grad_norm: 0.8188233876358937, iteration: 187584
loss: 0.9914348721504211,grad_norm: 0.9582893898576759, iteration: 187585
loss: 1.005689263343811,grad_norm: 0.9839703071046728, iteration: 187586
loss: 1.0472655296325684,grad_norm: 0.9999998618145324, iteration: 187587
loss: 0.9623050093650818,grad_norm: 0.9999993576835955, iteration: 187588
loss: 0.9788210391998291,grad_norm: 0.9999993348004385, iteration: 187589
loss: 1.0112714767456055,grad_norm: 0.9411158957020409, iteration: 187590
loss: 0.9684514999389648,grad_norm: 0.9999990144208569, iteration: 187591
loss: 1.0136733055114746,grad_norm: 0.8993026526723062, iteration: 187592
loss: 0.9783173203468323,grad_norm: 0.9663146469297327, iteration: 187593
loss: 0.9994727373123169,grad_norm: 0.9999989411654654, iteration: 187594
loss: 1.010379672050476,grad_norm: 0.9999992030436469, iteration: 187595
loss: 1.0131076574325562,grad_norm: 0.7932209419812222, iteration: 187596
loss: 0.9770731925964355,grad_norm: 0.925030996524603, iteration: 187597
loss: 1.0287463665008545,grad_norm: 0.9868321356318756, iteration: 187598
loss: 1.0329232215881348,grad_norm: 0.999999750902315, iteration: 187599
loss: 0.9964929819107056,grad_norm: 0.9861449443953468, iteration: 187600
loss: 0.9996840357780457,grad_norm: 0.999999312457815, iteration: 187601
loss: 1.0273360013961792,grad_norm: 0.9999992024485056, iteration: 187602
loss: 1.0148590803146362,grad_norm: 0.9580940083092094, iteration: 187603
loss: 0.9988381862640381,grad_norm: 0.9999990177929454, iteration: 187604
loss: 0.9946040511131287,grad_norm: 0.8331261706573375, iteration: 187605
loss: 1.0054367780685425,grad_norm: 0.8632998727480328, iteration: 187606
loss: 0.9826273322105408,grad_norm: 0.9999991008457944, iteration: 187607
loss: 1.0809040069580078,grad_norm: 0.9999991269937445, iteration: 187608
loss: 1.0077989101409912,grad_norm: 0.9999996732355104, iteration: 187609
loss: 1.038749098777771,grad_norm: 0.7942244618543107, iteration: 187610
loss: 1.0166199207305908,grad_norm: 0.9999992861574859, iteration: 187611
loss: 1.0556697845458984,grad_norm: 0.9109378846788017, iteration: 187612
loss: 1.071407675743103,grad_norm: 0.9999991690316534, iteration: 187613
loss: 1.0620957612991333,grad_norm: 0.944859622742669, iteration: 187614
loss: 0.9444125294685364,grad_norm: 0.9999990855858474, iteration: 187615
loss: 1.0186692476272583,grad_norm: 0.9999990265067392, iteration: 187616
loss: 1.114654541015625,grad_norm: 0.9999996025216116, iteration: 187617
loss: 0.9857967495918274,grad_norm: 0.9233228467729956, iteration: 187618
loss: 1.0579853057861328,grad_norm: 0.9153523726656092, iteration: 187619
loss: 1.0090093612670898,grad_norm: 0.999999112582036, iteration: 187620
loss: 1.034340500831604,grad_norm: 0.8962096542900941, iteration: 187621
loss: 1.0000503063201904,grad_norm: 0.9999991693353381, iteration: 187622
loss: 0.9695342183113098,grad_norm: 0.8622883088256749, iteration: 187623
loss: 0.9854097962379456,grad_norm: 0.9999990841868638, iteration: 187624
loss: 0.9693678021430969,grad_norm: 0.999999169916901, iteration: 187625
loss: 1.0215609073638916,grad_norm: 0.9999996511004238, iteration: 187626
loss: 0.9649715423583984,grad_norm: 0.8387152590913333, iteration: 187627
loss: 1.0042407512664795,grad_norm: 0.8558237724928921, iteration: 187628
loss: 1.0037745237350464,grad_norm: 0.9999990793149275, iteration: 187629
loss: 1.0117956399917603,grad_norm: 0.8583434844816131, iteration: 187630
loss: 0.996917724609375,grad_norm: 0.9170846694611258, iteration: 187631
loss: 0.9689745306968689,grad_norm: 0.9258243580774284, iteration: 187632
loss: 1.0034428834915161,grad_norm: 0.9513482354854252, iteration: 187633
loss: 1.0171104669570923,grad_norm: 0.9999990361066563, iteration: 187634
loss: 1.0146089792251587,grad_norm: 0.8079337515138278, iteration: 187635
loss: 1.0240617990493774,grad_norm: 0.973576067157461, iteration: 187636
loss: 0.9825235605239868,grad_norm: 0.9999991219539844, iteration: 187637
loss: 1.0127646923065186,grad_norm: 0.9999990501408822, iteration: 187638
loss: 1.0185524225234985,grad_norm: 0.9999992132033849, iteration: 187639
loss: 1.0229183435440063,grad_norm: 0.9820014208816287, iteration: 187640
loss: 0.9732077121734619,grad_norm: 0.9248630299547846, iteration: 187641
loss: 0.9932976961135864,grad_norm: 0.9949965960265419, iteration: 187642
loss: 1.0312442779541016,grad_norm: 0.9999997424906863, iteration: 187643
loss: 0.9640831351280212,grad_norm: 0.9999990626982898, iteration: 187644
loss: 1.0274314880371094,grad_norm: 0.9999998752054964, iteration: 187645
loss: 0.9842115640640259,grad_norm: 0.96450683470836, iteration: 187646
loss: 0.9760532379150391,grad_norm: 0.9999991245234875, iteration: 187647
loss: 1.0328909158706665,grad_norm: 0.8347291959846397, iteration: 187648
loss: 0.9841166734695435,grad_norm: 0.9987797191214512, iteration: 187649
loss: 1.0349986553192139,grad_norm: 0.9999991503098219, iteration: 187650
loss: 0.9902631640434265,grad_norm: 0.9999992199940342, iteration: 187651
loss: 1.004836082458496,grad_norm: 0.9999990509208628, iteration: 187652
loss: 0.9942915439605713,grad_norm: 0.9999991414575056, iteration: 187653
loss: 0.990665078163147,grad_norm: 0.9112190333387354, iteration: 187654
loss: 0.9668375253677368,grad_norm: 0.9999990935511966, iteration: 187655
loss: 1.0043940544128418,grad_norm: 0.9999993642136539, iteration: 187656
loss: 0.9800162315368652,grad_norm: 0.9999992703766525, iteration: 187657
loss: 1.011853575706482,grad_norm: 0.9999993470175684, iteration: 187658
loss: 1.018855094909668,grad_norm: 0.8419078267536168, iteration: 187659
loss: 0.9691401720046997,grad_norm: 0.9249567928154215, iteration: 187660
loss: 1.0103058815002441,grad_norm: 0.9439847382373064, iteration: 187661
loss: 0.963040292263031,grad_norm: 0.9233114671930552, iteration: 187662
loss: 0.96063631772995,grad_norm: 0.9769163313008313, iteration: 187663
loss: 0.9798577427864075,grad_norm: 0.9999991476881008, iteration: 187664
loss: 0.9905600547790527,grad_norm: 0.9999989887954397, iteration: 187665
loss: 1.0517408847808838,grad_norm: 0.9999990447599505, iteration: 187666
loss: 0.9785035252571106,grad_norm: 0.9631128660725049, iteration: 187667
loss: 1.1080090999603271,grad_norm: 0.9999999529600926, iteration: 187668
loss: 0.9864308834075928,grad_norm: 0.9999991580898288, iteration: 187669
loss: 0.9687486886978149,grad_norm: 0.9999991365051543, iteration: 187670
loss: 0.9937597513198853,grad_norm: 0.8619051276839014, iteration: 187671
loss: 1.0145224332809448,grad_norm: 0.9380922175093057, iteration: 187672
loss: 1.0049370527267456,grad_norm: 0.8588638988757785, iteration: 187673
loss: 1.0073955059051514,grad_norm: 0.9999991961109119, iteration: 187674
loss: 0.9645816087722778,grad_norm: 0.9999991828894734, iteration: 187675
loss: 1.0176249742507935,grad_norm: 0.9271988141379173, iteration: 187676
loss: 0.9921023845672607,grad_norm: 0.9999989984888774, iteration: 187677
loss: 0.9776308536529541,grad_norm: 0.9652456655078092, iteration: 187678
loss: 1.024117112159729,grad_norm: 0.9221169097892187, iteration: 187679
loss: 0.9746131300926208,grad_norm: 0.8839545754605633, iteration: 187680
loss: 1.0285084247589111,grad_norm: 0.9999996418819961, iteration: 187681
loss: 1.0052767992019653,grad_norm: 0.9999990800851368, iteration: 187682
loss: 0.9964777231216431,grad_norm: 0.9835516088167348, iteration: 187683
loss: 1.0067001581192017,grad_norm: 0.999999003201895, iteration: 187684
loss: 0.9982780814170837,grad_norm: 0.9364653015707745, iteration: 187685
loss: 0.9954342246055603,grad_norm: 0.9607407886871944, iteration: 187686
loss: 0.980563223361969,grad_norm: 0.9578246032122646, iteration: 187687
loss: 0.9772028923034668,grad_norm: 0.9999993732769926, iteration: 187688
loss: 0.9924759864807129,grad_norm: 0.9999991362246683, iteration: 187689
loss: 1.0260467529296875,grad_norm: 0.8661696443681168, iteration: 187690
loss: 1.0087133646011353,grad_norm: 0.94374323595646, iteration: 187691
loss: 1.0092124938964844,grad_norm: 0.8644168826401518, iteration: 187692
loss: 0.9811269640922546,grad_norm: 0.9475747764141373, iteration: 187693
loss: 1.0069743394851685,grad_norm: 0.9999998814648459, iteration: 187694
loss: 0.9878910779953003,grad_norm: 0.9096072627522688, iteration: 187695
loss: 0.991474986076355,grad_norm: 0.999999505556704, iteration: 187696
loss: 0.9698469042778015,grad_norm: 0.9720201684473109, iteration: 187697
loss: 1.0070194005966187,grad_norm: 0.8702420471445224, iteration: 187698
loss: 1.0236653089523315,grad_norm: 0.9999999223378895, iteration: 187699
loss: 0.9884106516838074,grad_norm: 0.999999138561717, iteration: 187700
loss: 1.0056639909744263,grad_norm: 0.9999991697167467, iteration: 187701
loss: 1.0235631465911865,grad_norm: 0.9999991486386922, iteration: 187702
loss: 1.0125371217727661,grad_norm: 0.9999990521372294, iteration: 187703
loss: 1.0014314651489258,grad_norm: 0.897956476315425, iteration: 187704
loss: 1.0048681497573853,grad_norm: 0.9999990615838681, iteration: 187705
loss: 0.984348714351654,grad_norm: 0.98849140974397, iteration: 187706
loss: 1.0335354804992676,grad_norm: 0.9999990380683936, iteration: 187707
loss: 0.9720099568367004,grad_norm: 0.9654195615161484, iteration: 187708
loss: 0.9895082712173462,grad_norm: 0.8806082411611544, iteration: 187709
loss: 1.0030378103256226,grad_norm: 0.9829759934702067, iteration: 187710
loss: 0.9980370998382568,grad_norm: 0.9496099786968556, iteration: 187711
loss: 1.0187252759933472,grad_norm: 0.9257271255516101, iteration: 187712
loss: 0.9954091310501099,grad_norm: 0.9999991190422676, iteration: 187713
loss: 0.9859439730644226,grad_norm: 0.9999990077862214, iteration: 187714
loss: 1.017510175704956,grad_norm: 0.9689936031920172, iteration: 187715
loss: 1.0020126104354858,grad_norm: 0.9368333500035866, iteration: 187716
loss: 0.9945525527000427,grad_norm: 0.9841914197853415, iteration: 187717
loss: 0.994680643081665,grad_norm: 0.7480223539726194, iteration: 187718
loss: 1.0202161073684692,grad_norm: 0.913339121677531, iteration: 187719
loss: 1.0074527263641357,grad_norm: 0.9455722794936279, iteration: 187720
loss: 1.002682089805603,grad_norm: 0.999999052702715, iteration: 187721
loss: 0.974242091178894,grad_norm: 0.8558137945792791, iteration: 187722
loss: 1.0391764640808105,grad_norm: 0.9999991935863367, iteration: 187723
loss: 1.015065312385559,grad_norm: 0.8685946042443117, iteration: 187724
loss: 0.9899701476097107,grad_norm: 0.9258729574369039, iteration: 187725
loss: 0.999732255935669,grad_norm: 0.9999991072143728, iteration: 187726
loss: 0.9982346892356873,grad_norm: 0.9212802023319681, iteration: 187727
loss: 0.9815225005149841,grad_norm: 0.9999993160884665, iteration: 187728
loss: 1.0040392875671387,grad_norm: 0.8470996699765686, iteration: 187729
loss: 0.9946145415306091,grad_norm: 0.9884611491039403, iteration: 187730
loss: 1.038476824760437,grad_norm: 0.9999993288630156, iteration: 187731
loss: 0.9881505966186523,grad_norm: 0.9668453636422101, iteration: 187732
loss: 1.0126105546951294,grad_norm: 0.9999988868854983, iteration: 187733
loss: 0.9916258454322815,grad_norm: 0.8938053933046297, iteration: 187734
loss: 0.9695266485214233,grad_norm: 0.8627794490563835, iteration: 187735
loss: 0.979314923286438,grad_norm: 0.9999992260068469, iteration: 187736
loss: 0.9936060309410095,grad_norm: 0.8829897420874832, iteration: 187737
loss: 0.998237669467926,grad_norm: 0.9434479128316376, iteration: 187738
loss: 0.9766165018081665,grad_norm: 0.9999990870675401, iteration: 187739
loss: 1.0019065141677856,grad_norm: 0.999999160399329, iteration: 187740
loss: 0.961368203163147,grad_norm: 0.903712534323115, iteration: 187741
loss: 1.022849440574646,grad_norm: 0.9999991514379777, iteration: 187742
loss: 1.0152851343154907,grad_norm: 0.9460211980073544, iteration: 187743
loss: 1.017173409461975,grad_norm: 0.9774586430438238, iteration: 187744
loss: 1.012131690979004,grad_norm: 0.9999990418534456, iteration: 187745
loss: 0.9573401212692261,grad_norm: 0.9999993423432446, iteration: 187746
loss: 1.025766134262085,grad_norm: 0.9999989400623328, iteration: 187747
loss: 0.9923802614212036,grad_norm: 0.8552086211068768, iteration: 187748
loss: 0.9961364269256592,grad_norm: 0.8842641892002077, iteration: 187749
loss: 0.9892942905426025,grad_norm: 0.9999992182041547, iteration: 187750
loss: 0.9937588572502136,grad_norm: 0.9523476907204639, iteration: 187751
loss: 0.9924598932266235,grad_norm: 0.935703090447415, iteration: 187752
loss: 1.0063936710357666,grad_norm: 0.9441540248567674, iteration: 187753
loss: 1.058657169342041,grad_norm: 0.9999989784252538, iteration: 187754
loss: 1.0302941799163818,grad_norm: 0.9999992806273673, iteration: 187755
loss: 0.982283890247345,grad_norm: 0.9715834294835278, iteration: 187756
loss: 1.048388957977295,grad_norm: 0.9999990145310352, iteration: 187757
loss: 0.9667393565177917,grad_norm: 0.8719242220790505, iteration: 187758
loss: 0.9633041620254517,grad_norm: 0.9395518403954818, iteration: 187759
loss: 0.9812783002853394,grad_norm: 0.8403638143639476, iteration: 187760
loss: 0.9858930110931396,grad_norm: 0.8426231041122476, iteration: 187761
loss: 1.0203578472137451,grad_norm: 0.9999996087090002, iteration: 187762
loss: 1.0115965604782104,grad_norm: 0.8562657388045856, iteration: 187763
loss: 1.0062475204467773,grad_norm: 0.9793187992337729, iteration: 187764
loss: 0.9984778761863708,grad_norm: 0.8090124315705608, iteration: 187765
loss: 0.9897746443748474,grad_norm: 0.9999991418613317, iteration: 187766
loss: 1.034850835800171,grad_norm: 0.9999997528867874, iteration: 187767
loss: 1.0294822454452515,grad_norm: 0.9999990769466568, iteration: 187768
loss: 0.9801193475723267,grad_norm: 0.8710595038950669, iteration: 187769
loss: 0.9920459389686584,grad_norm: 0.9401939305673195, iteration: 187770
loss: 1.0079131126403809,grad_norm: 0.9999991807637021, iteration: 187771
loss: 0.9837413430213928,grad_norm: 0.9999997417307959, iteration: 187772
loss: 1.0204310417175293,grad_norm: 0.879141782977072, iteration: 187773
loss: 1.0026789903640747,grad_norm: 0.8968965714382544, iteration: 187774
loss: 0.9866403937339783,grad_norm: 0.9612546224264825, iteration: 187775
loss: 1.0374550819396973,grad_norm: 0.9485564568446512, iteration: 187776
loss: 1.066247820854187,grad_norm: 0.9999989603936001, iteration: 187777
loss: 1.0116522312164307,grad_norm: 0.9999994198787899, iteration: 187778
loss: 0.9991785287857056,grad_norm: 0.9999991034283061, iteration: 187779
loss: 1.0121878385543823,grad_norm: 0.9458801167970039, iteration: 187780
loss: 1.0002095699310303,grad_norm: 0.9999991122599653, iteration: 187781
loss: 0.9920862913131714,grad_norm: 0.9999991899191599, iteration: 187782
loss: 1.0405211448669434,grad_norm: 0.9999995857165582, iteration: 187783
loss: 1.2414700984954834,grad_norm: 1.0000000021274182, iteration: 187784
loss: 0.9762455821037292,grad_norm: 0.8348046254592535, iteration: 187785
loss: 0.9669914245605469,grad_norm: 0.8926720252331861, iteration: 187786
loss: 1.011698603630066,grad_norm: 0.9999990255994354, iteration: 187787
loss: 0.9834745526313782,grad_norm: 0.7891559377782097, iteration: 187788
loss: 1.103607177734375,grad_norm: 0.9999991145502949, iteration: 187789
loss: 1.0825400352478027,grad_norm: 0.9999999322063614, iteration: 187790
loss: 1.0370945930480957,grad_norm: 0.9999999448917142, iteration: 187791
loss: 1.0052247047424316,grad_norm: 0.7952379937501439, iteration: 187792
loss: 1.0135860443115234,grad_norm: 0.8424864709829935, iteration: 187793
loss: 1.102871060371399,grad_norm: 0.9999992166324211, iteration: 187794
loss: 1.0240557193756104,grad_norm: 0.999999656767127, iteration: 187795
loss: 0.9958796501159668,grad_norm: 0.9449299942068987, iteration: 187796
loss: 1.000691294670105,grad_norm: 0.9495526695792377, iteration: 187797
loss: 0.995566725730896,grad_norm: 0.9999990357091529, iteration: 187798
loss: 0.9728198647499084,grad_norm: 0.9580925123944847, iteration: 187799
loss: 0.9809779524803162,grad_norm: 0.7093750443437915, iteration: 187800
loss: 0.9950120449066162,grad_norm: 0.999999005751438, iteration: 187801
loss: 0.9950197339057922,grad_norm: 0.999999205080036, iteration: 187802
loss: 0.9996041655540466,grad_norm: 0.7645636389978101, iteration: 187803
loss: 0.9717798233032227,grad_norm: 0.8115101000871657, iteration: 187804
loss: 1.021830677986145,grad_norm: 0.9999991705771004, iteration: 187805
loss: 1.0050725936889648,grad_norm: 0.9999990201503789, iteration: 187806
loss: 1.0051041841506958,grad_norm: 0.8695312324286678, iteration: 187807
loss: 0.980279803276062,grad_norm: 0.9490015909412043, iteration: 187808
loss: 1.0315773487091064,grad_norm: 0.9999992552265562, iteration: 187809
loss: 1.0147678852081299,grad_norm: 0.899661794711492, iteration: 187810
loss: 1.1329021453857422,grad_norm: 0.9999993163109097, iteration: 187811
loss: 1.006941556930542,grad_norm: 0.9999991589621594, iteration: 187812
loss: 1.0022244453430176,grad_norm: 0.8185170992987678, iteration: 187813
loss: 1.0189605951309204,grad_norm: 0.9999992517272921, iteration: 187814
loss: 1.0354348421096802,grad_norm: 0.9758301399184187, iteration: 187815
loss: 1.0175238847732544,grad_norm: 0.9960441626911782, iteration: 187816
loss: 0.9801384210586548,grad_norm: 0.8971121600163932, iteration: 187817
loss: 1.006563425064087,grad_norm: 0.728449160980795, iteration: 187818
loss: 0.99216228723526,grad_norm: 0.866882417023684, iteration: 187819
loss: 1.0042659044265747,grad_norm: 0.9999990378338037, iteration: 187820
loss: 0.9921712279319763,grad_norm: 0.9075320315414869, iteration: 187821
loss: 0.9932854771614075,grad_norm: 0.9999999480776838, iteration: 187822
loss: 0.9689521789550781,grad_norm: 0.9495870606733146, iteration: 187823
loss: 0.9729171991348267,grad_norm: 0.9506165938602021, iteration: 187824
loss: 1.046768307685852,grad_norm: 0.9300108567289828, iteration: 187825
loss: 1.0120899677276611,grad_norm: 0.9999991561339492, iteration: 187826
loss: 0.9800788164138794,grad_norm: 0.9117904902309065, iteration: 187827
loss: 1.03324556350708,grad_norm: 0.9999990782512418, iteration: 187828
loss: 0.9848148822784424,grad_norm: 0.8531618239077855, iteration: 187829
loss: 1.0236279964447021,grad_norm: 0.999999202405353, iteration: 187830
loss: 1.0062530040740967,grad_norm: 0.9480417247787344, iteration: 187831
loss: 0.9380580186843872,grad_norm: 0.8791432550605985, iteration: 187832
loss: 0.9889450669288635,grad_norm: 0.9999991955539801, iteration: 187833
loss: 1.0068691968917847,grad_norm: 0.9714733209174371, iteration: 187834
loss: 0.9769596457481384,grad_norm: 0.9211679360979528, iteration: 187835
loss: 0.985970675945282,grad_norm: 0.9646110082207088, iteration: 187836
loss: 1.0196731090545654,grad_norm: 0.9722123721967248, iteration: 187837
loss: 1.025647521018982,grad_norm: 0.868025594960485, iteration: 187838
loss: 1.0071007013320923,grad_norm: 0.9216930984098133, iteration: 187839
loss: 0.9948418140411377,grad_norm: 0.9999990024578962, iteration: 187840
loss: 0.9917458891868591,grad_norm: 0.9333467442242958, iteration: 187841
loss: 0.9897395372390747,grad_norm: 0.8663491561606675, iteration: 187842
loss: 1.0038294792175293,grad_norm: 0.7886588879585721, iteration: 187843
loss: 1.1597539186477661,grad_norm: 0.9999999712078886, iteration: 187844
loss: 1.027142882347107,grad_norm: 0.9999993480876771, iteration: 187845
loss: 0.997413158416748,grad_norm: 0.9607568032476801, iteration: 187846
loss: 0.9595110416412354,grad_norm: 0.9999991008061027, iteration: 187847
loss: 1.0227934122085571,grad_norm: 0.9999992918433028, iteration: 187848
loss: 0.992948055267334,grad_norm: 0.9999991904828435, iteration: 187849
loss: 1.0017414093017578,grad_norm: 0.8964990482074833, iteration: 187850
loss: 0.9812423586845398,grad_norm: 0.8960880388093745, iteration: 187851
loss: 0.9840775728225708,grad_norm: 0.9999990673354336, iteration: 187852
loss: 1.00697660446167,grad_norm: 0.9999993010636632, iteration: 187853
loss: 0.9747812151908875,grad_norm: 0.8498131196510915, iteration: 187854
loss: 1.0276093482971191,grad_norm: 0.9999992861393596, iteration: 187855
loss: 1.0077180862426758,grad_norm: 0.7925004295158019, iteration: 187856
loss: 1.059570074081421,grad_norm: 0.9514905769124318, iteration: 187857
loss: 0.9912688136100769,grad_norm: 0.9999991438711486, iteration: 187858
loss: 1.004332184791565,grad_norm: 0.9278894704448573, iteration: 187859
loss: 1.008836030960083,grad_norm: 0.999999069307829, iteration: 187860
loss: 1.0503777265548706,grad_norm: 0.9762766842240668, iteration: 187861
loss: 0.9873998761177063,grad_norm: 0.9999991784513677, iteration: 187862
loss: 1.0459678173065186,grad_norm: 0.9999991200415893, iteration: 187863
loss: 0.9310905933380127,grad_norm: 0.9386026425304983, iteration: 187864
loss: 0.9880312085151672,grad_norm: 0.9524569076919677, iteration: 187865
loss: 1.0256367921829224,grad_norm: 0.9999992429362103, iteration: 187866
loss: 0.9816420078277588,grad_norm: 0.836506442127276, iteration: 187867
loss: 1.0468380451202393,grad_norm: 0.9999998215654865, iteration: 187868
loss: 0.9898847937583923,grad_norm: 0.999999122281461, iteration: 187869
loss: 1.1751668453216553,grad_norm: 0.9999995897445366, iteration: 187870
loss: 1.1481866836547852,grad_norm: 0.9999992795685179, iteration: 187871
loss: 1.0394517183303833,grad_norm: 0.9999991022258236, iteration: 187872
loss: 0.9613049030303955,grad_norm: 0.957327006497378, iteration: 187873
loss: 1.0286163091659546,grad_norm: 0.9999996656109332, iteration: 187874
loss: 0.9670501351356506,grad_norm: 0.8215341370041128, iteration: 187875
loss: 1.0059372186660767,grad_norm: 0.8887124673460821, iteration: 187876
loss: 0.9918918609619141,grad_norm: 0.9920244753875673, iteration: 187877
loss: 1.0110414028167725,grad_norm: 0.9999990118978836, iteration: 187878
loss: 0.9909706711769104,grad_norm: 0.9653554671591073, iteration: 187879
loss: 0.9987151026725769,grad_norm: 0.999999616424766, iteration: 187880
loss: 0.9836767911911011,grad_norm: 0.8782775441573871, iteration: 187881
loss: 0.9938862919807434,grad_norm: 0.9999992477731672, iteration: 187882
loss: 1.0388147830963135,grad_norm: 0.9168534647664728, iteration: 187883
loss: 1.0133167505264282,grad_norm: 0.9999990951772144, iteration: 187884
loss: 1.006182312965393,grad_norm: 0.8390899945159436, iteration: 187885
loss: 0.9984679222106934,grad_norm: 1.0000000547069885, iteration: 187886
loss: 1.0427334308624268,grad_norm: 0.9999999805948733, iteration: 187887
loss: 1.015807032585144,grad_norm: 0.9999991392257617, iteration: 187888
loss: 0.9767983555793762,grad_norm: 0.9533122925816486, iteration: 187889
loss: 0.9948650002479553,grad_norm: 0.843494840944188, iteration: 187890
loss: 0.956413209438324,grad_norm: 0.9540912561281966, iteration: 187891
loss: 0.9693064093589783,grad_norm: 0.99999909916806, iteration: 187892
loss: 1.0111985206604004,grad_norm: 0.9999989885766425, iteration: 187893
loss: 1.024457335472107,grad_norm: 0.9999990577316628, iteration: 187894
loss: 0.9956724643707275,grad_norm: 0.9422013519639187, iteration: 187895
loss: 1.0345731973648071,grad_norm: 0.9999991227311679, iteration: 187896
loss: 1.0011792182922363,grad_norm: 0.9999996644856819, iteration: 187897
loss: 0.9995291829109192,grad_norm: 0.9860904632686817, iteration: 187898
loss: 0.9952756762504578,grad_norm: 0.7694583682799809, iteration: 187899
loss: 1.0688650608062744,grad_norm: 0.836514198123742, iteration: 187900
loss: 1.0133744478225708,grad_norm: 0.9999997922896713, iteration: 187901
loss: 1.0410640239715576,grad_norm: 0.8887593028256524, iteration: 187902
loss: 1.0148159265518188,grad_norm: 0.9999990649292348, iteration: 187903
loss: 0.9931433200836182,grad_norm: 0.9609063461382543, iteration: 187904
loss: 0.9622709155082703,grad_norm: 0.8697511419642691, iteration: 187905
loss: 1.02595853805542,grad_norm: 0.9999989738562406, iteration: 187906
loss: 1.0249613523483276,grad_norm: 0.9999993223115579, iteration: 187907
loss: 0.9467821717262268,grad_norm: 0.8080263413049172, iteration: 187908
loss: 1.168354868888855,grad_norm: 0.9999990402618628, iteration: 187909
loss: 0.9463288187980652,grad_norm: 0.9500807589727149, iteration: 187910
loss: 1.029172658920288,grad_norm: 0.9360241700862862, iteration: 187911
loss: 1.0150667428970337,grad_norm: 0.9213821593593083, iteration: 187912
loss: 1.0041649341583252,grad_norm: 0.9978912763696561, iteration: 187913
loss: 1.028799057006836,grad_norm: 0.9418103527033973, iteration: 187914
loss: 0.9858949184417725,grad_norm: 0.9415983564433846, iteration: 187915
loss: 0.9844409823417664,grad_norm: 0.8968520440849984, iteration: 187916
loss: 1.0195273160934448,grad_norm: 0.9999991122627042, iteration: 187917
loss: 1.0136009454727173,grad_norm: 0.9999991722430274, iteration: 187918
loss: 0.9712356328964233,grad_norm: 0.9615743572512678, iteration: 187919
loss: 1.0317058563232422,grad_norm: 0.9999991769759632, iteration: 187920
loss: 1.0166860818862915,grad_norm: 0.9999992308096016, iteration: 187921
loss: 0.9968681931495667,grad_norm: 0.9499245650181452, iteration: 187922
loss: 1.0683326721191406,grad_norm: 0.9999991287400934, iteration: 187923
loss: 1.020284652709961,grad_norm: 0.9392799015904235, iteration: 187924
loss: 0.991563081741333,grad_norm: 0.9999994986627483, iteration: 187925
loss: 0.999336302280426,grad_norm: 0.8219634315506775, iteration: 187926
loss: 0.9914833307266235,grad_norm: 0.9224915638756132, iteration: 187927
loss: 0.9603340029716492,grad_norm: 0.9243797904806591, iteration: 187928
loss: 0.9754786491394043,grad_norm: 0.9999990453276189, iteration: 187929
loss: 1.0443943738937378,grad_norm: 0.9999991076027314, iteration: 187930
loss: 0.9755678176879883,grad_norm: 0.9537246332307143, iteration: 187931
loss: 1.0108872652053833,grad_norm: 0.8465045691793497, iteration: 187932
loss: 0.9867550730705261,grad_norm: 0.9494352586403393, iteration: 187933
loss: 1.0733121633529663,grad_norm: 0.9999991516278612, iteration: 187934
loss: 1.012107253074646,grad_norm: 0.9999995116493136, iteration: 187935
loss: 1.0057873725891113,grad_norm: 0.99999905738023, iteration: 187936
loss: 1.00184166431427,grad_norm: 0.9999991661387255, iteration: 187937
loss: 1.014459252357483,grad_norm: 0.8251979707894821, iteration: 187938
loss: 1.0098811388015747,grad_norm: 0.968297120836104, iteration: 187939
loss: 0.994979202747345,grad_norm: 0.9826266924643207, iteration: 187940
loss: 1.0045878887176514,grad_norm: 0.8130869002582832, iteration: 187941
loss: 1.018828272819519,grad_norm: 0.9999991505810508, iteration: 187942
loss: 0.9855715036392212,grad_norm: 0.9999991240047866, iteration: 187943
loss: 0.9945045709609985,grad_norm: 0.9907399058403011, iteration: 187944
loss: 0.9945246577262878,grad_norm: 0.9537271362954592, iteration: 187945
loss: 1.0009140968322754,grad_norm: 0.9999990043558684, iteration: 187946
loss: 0.9753640294075012,grad_norm: 0.7902323131438882, iteration: 187947
loss: 0.97511887550354,grad_norm: 0.9999992286983274, iteration: 187948
loss: 0.9967085123062134,grad_norm: 0.9318551540807766, iteration: 187949
loss: 1.0009621381759644,grad_norm: 0.8324268525646797, iteration: 187950
loss: 1.0091074705123901,grad_norm: 0.9999997808012351, iteration: 187951
loss: 1.0020183324813843,grad_norm: 0.9999990595778554, iteration: 187952
loss: 1.118031620979309,grad_norm: 0.9999991609480965, iteration: 187953
loss: 1.027878999710083,grad_norm: 0.882806849190161, iteration: 187954
loss: 1.0183175802230835,grad_norm: 0.8439850299240232, iteration: 187955
loss: 1.0749739408493042,grad_norm: 0.9999991883252713, iteration: 187956
loss: 1.0179232358932495,grad_norm: 0.9999990913678664, iteration: 187957
loss: 0.9859758615493774,grad_norm: 0.9999990045916899, iteration: 187958
loss: 1.0165175199508667,grad_norm: 0.9999990288193153, iteration: 187959
loss: 0.9923173785209656,grad_norm: 0.99999897378267, iteration: 187960
loss: 0.9869097471237183,grad_norm: 0.9999990452390064, iteration: 187961
loss: 0.9883978962898254,grad_norm: 0.8498239890160002, iteration: 187962
loss: 0.9680879712104797,grad_norm: 0.9999992123509985, iteration: 187963
loss: 1.0103135108947754,grad_norm: 0.8809321931849512, iteration: 187964
loss: 1.0502734184265137,grad_norm: 0.9999998866310917, iteration: 187965
loss: 0.9905964136123657,grad_norm: 0.9999991691146035, iteration: 187966
loss: 0.9846752285957336,grad_norm: 0.9759433906310742, iteration: 187967
loss: 0.997763454914093,grad_norm: 0.9999991704472091, iteration: 187968
loss: 1.1346385478973389,grad_norm: 0.9999990774806193, iteration: 187969
loss: 1.0359771251678467,grad_norm: 0.9999992429483036, iteration: 187970
loss: 0.9707993268966675,grad_norm: 0.8664443224637721, iteration: 187971
loss: 0.9700579047203064,grad_norm: 0.953499920243874, iteration: 187972
loss: 1.042893409729004,grad_norm: 0.999999266341887, iteration: 187973
loss: 0.976497232913971,grad_norm: 0.8922031558980325, iteration: 187974
loss: 1.0675327777862549,grad_norm: 0.9821739485846742, iteration: 187975
loss: 0.9946427345275879,grad_norm: 0.935120606614961, iteration: 187976
loss: 0.9904367923736572,grad_norm: 0.9842304881894052, iteration: 187977
loss: 1.0213117599487305,grad_norm: 0.9999991688621539, iteration: 187978
loss: 1.003943681716919,grad_norm: 0.9999992113723866, iteration: 187979
loss: 1.0017399787902832,grad_norm: 0.8965300682097648, iteration: 187980
loss: 1.0087276697158813,grad_norm: 0.9175526876905429, iteration: 187981
loss: 1.0258502960205078,grad_norm: 0.9999990831429533, iteration: 187982
loss: 1.1148313283920288,grad_norm: 0.9999993869621493, iteration: 187983
loss: 0.9981715083122253,grad_norm: 0.9999991350234423, iteration: 187984
loss: 0.9833298921585083,grad_norm: 0.9999989802733165, iteration: 187985
loss: 0.9871162176132202,grad_norm: 0.7906176629127867, iteration: 187986
loss: 0.9991263747215271,grad_norm: 0.999999108720492, iteration: 187987
loss: 0.9722101092338562,grad_norm: 0.9633112677819214, iteration: 187988
loss: 1.0034452676773071,grad_norm: 0.9215093549180805, iteration: 187989
loss: 0.9892794489860535,grad_norm: 0.9999991108072381, iteration: 187990
loss: 0.9831823110580444,grad_norm: 0.981697008825339, iteration: 187991
loss: 1.0165822505950928,grad_norm: 0.9999990046172309, iteration: 187992
loss: 1.005981683731079,grad_norm: 0.785843123198683, iteration: 187993
loss: 1.038898229598999,grad_norm: 0.9999992031200893, iteration: 187994
loss: 1.0210319757461548,grad_norm: 0.9989266316055241, iteration: 187995
loss: 1.0046679973602295,grad_norm: 0.9915326361814264, iteration: 187996
loss: 1.0164958238601685,grad_norm: 0.8666675825599135, iteration: 187997
loss: 1.029054045677185,grad_norm: 0.9506784040502115, iteration: 187998
loss: 1.0004173517227173,grad_norm: 0.9251168331078946, iteration: 187999
loss: 0.9786109328269958,grad_norm: 0.7969879251302918, iteration: 188000
loss: 0.9665383100509644,grad_norm: 0.9999990846127407, iteration: 188001
loss: 1.0079281330108643,grad_norm: 0.9257130995007001, iteration: 188002
loss: 0.997790515422821,grad_norm: 0.9999997530405815, iteration: 188003
loss: 1.020335078239441,grad_norm: 0.9811524241540124, iteration: 188004
loss: 0.9933250546455383,grad_norm: 0.8931755884971033, iteration: 188005
loss: 1.0229483842849731,grad_norm: 0.9999991004185647, iteration: 188006
loss: 0.9933050274848938,grad_norm: 0.8869532507102936, iteration: 188007
loss: 0.9792728424072266,grad_norm: 0.9999992434273796, iteration: 188008
loss: 1.2039694786071777,grad_norm: 0.9999992928566994, iteration: 188009
loss: 0.9999451041221619,grad_norm: 0.9673244551112554, iteration: 188010
loss: 0.9905498027801514,grad_norm: 0.8669832679069689, iteration: 188011
loss: 0.9905946850776672,grad_norm: 0.9999992309154471, iteration: 188012
loss: 1.0262930393218994,grad_norm: 0.8783172303442914, iteration: 188013
loss: 0.9849058389663696,grad_norm: 0.9426822696278506, iteration: 188014
loss: 1.0264980792999268,grad_norm: 0.903263249297515, iteration: 188015
loss: 1.0124253034591675,grad_norm: 0.9999989898299603, iteration: 188016
loss: 0.9884724020957947,grad_norm: 0.8583902784706643, iteration: 188017
loss: 0.9809805750846863,grad_norm: 0.8732748754326042, iteration: 188018
loss: 1.0265936851501465,grad_norm: 0.9053177800813111, iteration: 188019
loss: 1.0001204013824463,grad_norm: 0.9999992106776313, iteration: 188020
loss: 0.9749844670295715,grad_norm: 0.8870225201287962, iteration: 188021
loss: 1.0081502199172974,grad_norm: 0.9905680360353791, iteration: 188022
loss: 1.0443000793457031,grad_norm: 0.9999991419891, iteration: 188023
loss: 1.0288288593292236,grad_norm: 0.8601893033371484, iteration: 188024
loss: 1.0271066427230835,grad_norm: 0.9611709892234911, iteration: 188025
loss: 0.992839515209198,grad_norm: 0.9319523717696157, iteration: 188026
loss: 1.0440629720687866,grad_norm: 0.9999991708385245, iteration: 188027
loss: 1.000212550163269,grad_norm: 0.8946648813673531, iteration: 188028
loss: 1.1000566482543945,grad_norm: 0.9999999627947936, iteration: 188029
loss: 0.9890773296356201,grad_norm: 0.9999990351378092, iteration: 188030
loss: 0.992518424987793,grad_norm: 0.9999991295041132, iteration: 188031
loss: 1.010388731956482,grad_norm: 0.9999991164307138, iteration: 188032
loss: 0.9789565801620483,grad_norm: 0.8633762532405382, iteration: 188033
loss: 1.0060194730758667,grad_norm: 0.9717489446536969, iteration: 188034
loss: 0.9776616096496582,grad_norm: 0.9215298034266071, iteration: 188035
loss: 0.9982834458351135,grad_norm: 0.9999991206084508, iteration: 188036
loss: 0.999522864818573,grad_norm: 0.9999990799025419, iteration: 188037
loss: 1.0075452327728271,grad_norm: 0.9999993702709845, iteration: 188038
loss: 1.0363209247589111,grad_norm: 0.999999132332738, iteration: 188039
loss: 0.9817714095115662,grad_norm: 0.8023866749380302, iteration: 188040
loss: 1.027410864830017,grad_norm: 0.9999990108057175, iteration: 188041
loss: 1.0173531770706177,grad_norm: 0.9783064657690548, iteration: 188042
loss: 1.0204403400421143,grad_norm: 0.8343261461915762, iteration: 188043
loss: 1.0462080240249634,grad_norm: 0.951592472460502, iteration: 188044
loss: 0.9935630559921265,grad_norm: 0.9999989930255444, iteration: 188045
loss: 0.9929221272468567,grad_norm: 0.9999989267650855, iteration: 188046
loss: 1.0117809772491455,grad_norm: 0.9729226876236967, iteration: 188047
loss: 1.0099643468856812,grad_norm: 0.9835497572066716, iteration: 188048
loss: 0.9658499956130981,grad_norm: 0.9999992294872487, iteration: 188049
loss: 1.0017414093017578,grad_norm: 0.9644312144807333, iteration: 188050
loss: 1.008411169052124,grad_norm: 0.9999992444544009, iteration: 188051
loss: 0.9500203132629395,grad_norm: 0.9566315376400311, iteration: 188052
loss: 0.977043092250824,grad_norm: 0.9001269049282445, iteration: 188053
loss: 1.006488561630249,grad_norm: 0.8847896070728355, iteration: 188054
loss: 0.9993624091148376,grad_norm: 0.9999991709108603, iteration: 188055
loss: 1.051882028579712,grad_norm: 0.9743894983416633, iteration: 188056
loss: 1.0113178491592407,grad_norm: 0.9999991095658918, iteration: 188057
loss: 1.0288984775543213,grad_norm: 0.9910608281398461, iteration: 188058
loss: 1.020102620124817,grad_norm: 0.9016268117150905, iteration: 188059
loss: 0.9916397929191589,grad_norm: 0.9630694841995421, iteration: 188060
loss: 0.999652087688446,grad_norm: 0.9999990672307878, iteration: 188061
loss: 1.0439921617507935,grad_norm: 0.9999992526744192, iteration: 188062
loss: 1.0247673988342285,grad_norm: 0.7848743547173255, iteration: 188063
loss: 0.983635425567627,grad_norm: 0.8486047489467111, iteration: 188064
loss: 1.0269783735275269,grad_norm: 0.999999020707243, iteration: 188065
loss: 1.0033901929855347,grad_norm: 0.9788753789797113, iteration: 188066
loss: 0.9966950416564941,grad_norm: 0.9999996924597068, iteration: 188067
loss: 1.0091320276260376,grad_norm: 0.9999991685759262, iteration: 188068
loss: 1.027190089225769,grad_norm: 0.9999991154752143, iteration: 188069
loss: 0.9942971467971802,grad_norm: 0.9999990631429433, iteration: 188070
loss: 1.0139636993408203,grad_norm: 0.9083291284933034, iteration: 188071
loss: 1.0138133764266968,grad_norm: 0.9410853501850942, iteration: 188072
loss: 0.9953367710113525,grad_norm: 0.9853049719014428, iteration: 188073
loss: 0.9972470998764038,grad_norm: 0.9015785189908506, iteration: 188074
loss: 1.0025005340576172,grad_norm: 0.9999991174534422, iteration: 188075
loss: 0.9878365993499756,grad_norm: 0.9425745239298994, iteration: 188076
loss: 1.0092267990112305,grad_norm: 0.9463378985326527, iteration: 188077
loss: 0.9911894798278809,grad_norm: 0.9999992457143106, iteration: 188078
loss: 1.0093401670455933,grad_norm: 0.9681192812328899, iteration: 188079
loss: 1.0005686283111572,grad_norm: 0.9265123328296422, iteration: 188080
loss: 1.0293910503387451,grad_norm: 0.9999995428988091, iteration: 188081
loss: 0.9719897508621216,grad_norm: 0.9113415389388243, iteration: 188082
loss: 0.9901312589645386,grad_norm: 0.9999990497856742, iteration: 188083
loss: 1.0909829139709473,grad_norm: 0.9999993411824151, iteration: 188084
loss: 1.0101131200790405,grad_norm: 0.9748714990298266, iteration: 188085
loss: 1.0207886695861816,grad_norm: 0.8127348361663671, iteration: 188086
loss: 1.0123008489608765,grad_norm: 0.9852590129248415, iteration: 188087
loss: 1.0075854063034058,grad_norm: 0.9981263427988509, iteration: 188088
loss: 1.0805743932724,grad_norm: 0.9999992278027646, iteration: 188089
loss: 0.955298125743866,grad_norm: 0.9999991916561093, iteration: 188090
loss: 0.9687119126319885,grad_norm: 0.9999990520862352, iteration: 188091
loss: 1.0051699876785278,grad_norm: 0.9999990848833803, iteration: 188092
loss: 0.9981075525283813,grad_norm: 0.9999991209807917, iteration: 188093
loss: 0.9836109280586243,grad_norm: 0.9999990848764415, iteration: 188094
loss: 0.9943286180496216,grad_norm: 0.9999991643990082, iteration: 188095
loss: 1.0050209760665894,grad_norm: 0.9999992139925811, iteration: 188096
loss: 0.9623236656188965,grad_norm: 0.8177783918315625, iteration: 188097
loss: 0.9647497534751892,grad_norm: 0.9999990627000795, iteration: 188098
loss: 0.9624267816543579,grad_norm: 0.927795849708991, iteration: 188099
loss: 0.9924730658531189,grad_norm: 0.9662068376011744, iteration: 188100
loss: 0.9911170601844788,grad_norm: 0.9803734430572548, iteration: 188101
loss: 0.9807959198951721,grad_norm: 0.9999989821410937, iteration: 188102
loss: 0.9801591038703918,grad_norm: 0.8752405758759135, iteration: 188103
loss: 0.9873730540275574,grad_norm: 0.9999989997724454, iteration: 188104
loss: 0.9854446649551392,grad_norm: 0.9698156974047638, iteration: 188105
loss: 1.0069159269332886,grad_norm: 0.9321402246821354, iteration: 188106
loss: 1.104161262512207,grad_norm: 0.99999908508898, iteration: 188107
loss: 0.9841267466545105,grad_norm: 0.7884207060187368, iteration: 188108
loss: 0.9905937910079956,grad_norm: 0.9999991467135301, iteration: 188109
loss: 1.0257529020309448,grad_norm: 0.9524959373048509, iteration: 188110
loss: 1.0036174058914185,grad_norm: 0.9319849963586511, iteration: 188111
loss: 0.9727341532707214,grad_norm: 0.9211254368057321, iteration: 188112
loss: 1.002078652381897,grad_norm: 0.8217692183904085, iteration: 188113
loss: 0.9915209412574768,grad_norm: 0.9188596752380764, iteration: 188114
loss: 1.01241934299469,grad_norm: 0.7846965836154387, iteration: 188115
loss: 1.0026763677597046,grad_norm: 0.9895726024561777, iteration: 188116
loss: 0.9890954494476318,grad_norm: 0.7899088535266982, iteration: 188117
loss: 1.0134669542312622,grad_norm: 0.864941679266852, iteration: 188118
loss: 1.0407795906066895,grad_norm: 0.9999991046452565, iteration: 188119
loss: 1.0309836864471436,grad_norm: 0.8341577524966992, iteration: 188120
loss: 0.995826005935669,grad_norm: 0.9428198428849669, iteration: 188121
loss: 0.985909104347229,grad_norm: 0.9155748646284328, iteration: 188122
loss: 0.9502594470977783,grad_norm: 0.9773261742484106, iteration: 188123
loss: 0.9873911142349243,grad_norm: 0.8400384893870989, iteration: 188124
loss: 1.021715760231018,grad_norm: 0.9999999439541243, iteration: 188125
loss: 1.0021430253982544,grad_norm: 0.8594516801471059, iteration: 188126
loss: 1.020432710647583,grad_norm: 0.9999998640441555, iteration: 188127
loss: 1.0067377090454102,grad_norm: 0.9999990150111521, iteration: 188128
loss: 1.0170847177505493,grad_norm: 0.9999991828024168, iteration: 188129
loss: 1.0277650356292725,grad_norm: 0.9999994759023554, iteration: 188130
loss: 1.0113539695739746,grad_norm: 0.9116319842927751, iteration: 188131
loss: 1.012871503829956,grad_norm: 0.9999990411142105, iteration: 188132
loss: 0.990227997303009,grad_norm: 0.8904304281466345, iteration: 188133
loss: 0.9956784248352051,grad_norm: 0.970508119011381, iteration: 188134
loss: 1.0205564498901367,grad_norm: 0.9999998278965593, iteration: 188135
loss: 1.0030015707015991,grad_norm: 0.7590892212460961, iteration: 188136
loss: 0.9783856868743896,grad_norm: 0.8672698196968271, iteration: 188137
loss: 0.9876797795295715,grad_norm: 0.9999990454932866, iteration: 188138
loss: 1.0548158884048462,grad_norm: 0.9999994852481796, iteration: 188139
loss: 1.0285314321517944,grad_norm: 0.9883942670947459, iteration: 188140
loss: 0.9829068779945374,grad_norm: 0.9102939583686082, iteration: 188141
loss: 0.9963755011558533,grad_norm: 0.9999992292082839, iteration: 188142
loss: 0.9859424829483032,grad_norm: 0.9166695073020867, iteration: 188143
loss: 0.986666202545166,grad_norm: 0.9999990828722652, iteration: 188144
loss: 1.066776990890503,grad_norm: 0.9999990632451721, iteration: 188145
loss: 1.0012128353118896,grad_norm: 0.8620377881366297, iteration: 188146
loss: 1.0015594959259033,grad_norm: 0.914606353116677, iteration: 188147
loss: 0.9719944596290588,grad_norm: 0.935826165524569, iteration: 188148
loss: 0.9777753353118896,grad_norm: 0.9762664886010533, iteration: 188149
loss: 0.9871941208839417,grad_norm: 0.9999992627441352, iteration: 188150
loss: 1.0020191669464111,grad_norm: 0.9366274288214772, iteration: 188151
loss: 0.9760655760765076,grad_norm: 0.9999991604724848, iteration: 188152
loss: 0.9863737225532532,grad_norm: 0.9168273753959736, iteration: 188153
loss: 1.013104796409607,grad_norm: 0.9296406879561638, iteration: 188154
loss: 0.9895209074020386,grad_norm: 0.881519443425678, iteration: 188155
loss: 1.001297950744629,grad_norm: 0.9999990844210759, iteration: 188156
loss: 1.0033053159713745,grad_norm: 0.9700928369328933, iteration: 188157
loss: 0.9957484602928162,grad_norm: 0.9999992132126306, iteration: 188158
loss: 0.9656184911727905,grad_norm: 0.999999110428272, iteration: 188159
loss: 1.0060603618621826,grad_norm: 0.9603172959853618, iteration: 188160
loss: 1.0111215114593506,grad_norm: 0.7577794665782275, iteration: 188161
loss: 0.9926112294197083,grad_norm: 0.9780712739734467, iteration: 188162
loss: 1.048555850982666,grad_norm: 0.8816848606272559, iteration: 188163
loss: 0.9814055562019348,grad_norm: 0.8867652626371112, iteration: 188164
loss: 0.9709042906761169,grad_norm: 0.9999989837429653, iteration: 188165
loss: 0.9803745150566101,grad_norm: 0.9472320130955387, iteration: 188166
loss: 1.0359299182891846,grad_norm: 0.9190480506989109, iteration: 188167
loss: 1.0194203853607178,grad_norm: 0.9999991548160518, iteration: 188168
loss: 1.0136878490447998,grad_norm: 0.9999991953153221, iteration: 188169
loss: 1.0075162649154663,grad_norm: 0.9529430585563388, iteration: 188170
loss: 1.0142854452133179,grad_norm: 0.9927730499920131, iteration: 188171
loss: 0.9802384972572327,grad_norm: 0.8058776456090269, iteration: 188172
loss: 0.9504109025001526,grad_norm: 0.9999990969234221, iteration: 188173
loss: 0.9951301217079163,grad_norm: 0.9999996765180538, iteration: 188174
loss: 1.004648208618164,grad_norm: 0.9999990062178157, iteration: 188175
loss: 1.0060473680496216,grad_norm: 0.999999169397171, iteration: 188176
loss: 0.9888260960578918,grad_norm: 0.9739226670857586, iteration: 188177
loss: 0.98734050989151,grad_norm: 0.9888300553405667, iteration: 188178
loss: 1.0038201808929443,grad_norm: 0.999999408691481, iteration: 188179
loss: 1.0173238515853882,grad_norm: 0.9245306228360113, iteration: 188180
loss: 1.0052073001861572,grad_norm: 0.9686535043752913, iteration: 188181
loss: 0.9905222058296204,grad_norm: 0.945123014710271, iteration: 188182
loss: 0.998188853263855,grad_norm: 0.8697623227580383, iteration: 188183
loss: 0.9687800407409668,grad_norm: 0.7704120867303038, iteration: 188184
loss: 1.0886305570602417,grad_norm: 0.9683578029413547, iteration: 188185
loss: 1.012618064880371,grad_norm: 0.9999991114054508, iteration: 188186
loss: 0.946491003036499,grad_norm: 0.9999991501691142, iteration: 188187
loss: 0.9661130309104919,grad_norm: 0.999999227104781, iteration: 188188
loss: 0.9654921889305115,grad_norm: 0.9999990083744831, iteration: 188189
loss: 1.0148383378982544,grad_norm: 0.6857544860288421, iteration: 188190
loss: 0.9887993931770325,grad_norm: 0.9172468918247938, iteration: 188191
loss: 1.0140577554702759,grad_norm: 0.9373487511432205, iteration: 188192
loss: 0.9830459952354431,grad_norm: 0.9810121122806142, iteration: 188193
loss: 1.0091111660003662,grad_norm: 0.8948148955281031, iteration: 188194
loss: 1.0231481790542603,grad_norm: 0.7964071030478729, iteration: 188195
loss: 1.0023127794265747,grad_norm: 0.9626390271929682, iteration: 188196
loss: 0.9683996438980103,grad_norm: 0.9334494609393612, iteration: 188197
loss: 1.0201572179794312,grad_norm: 0.9237138032347781, iteration: 188198
loss: 0.9762888550758362,grad_norm: 0.8942075265761745, iteration: 188199
loss: 1.0132108926773071,grad_norm: 0.999999196801479, iteration: 188200
loss: 0.9875180721282959,grad_norm: 0.9996512205923646, iteration: 188201
loss: 1.0414202213287354,grad_norm: 0.999999347007531, iteration: 188202
loss: 1.0025678873062134,grad_norm: 0.9999991395086787, iteration: 188203
loss: 1.0033365488052368,grad_norm: 0.8606426914848216, iteration: 188204
loss: 0.9622208476066589,grad_norm: 0.9325075688176079, iteration: 188205
loss: 1.0026605129241943,grad_norm: 0.9546846406910225, iteration: 188206
loss: 1.0176159143447876,grad_norm: 0.9999990723119125, iteration: 188207
loss: 0.9917027354240417,grad_norm: 0.9999991091204056, iteration: 188208
loss: 0.9796233177185059,grad_norm: 0.9228683742427464, iteration: 188209
loss: 1.0149874687194824,grad_norm: 0.9291460093891802, iteration: 188210
loss: 1.0065299272537231,grad_norm: 0.9426766999847683, iteration: 188211
loss: 0.9799168705940247,grad_norm: 0.9130368231936069, iteration: 188212
loss: 0.9974648356437683,grad_norm: 0.8996184698242138, iteration: 188213
loss: 1.0169585943222046,grad_norm: 0.7671774228291239, iteration: 188214
loss: 1.0285671949386597,grad_norm: 0.9999991654350505, iteration: 188215
loss: 0.9899656772613525,grad_norm: 0.9275244958776125, iteration: 188216
loss: 1.0220990180969238,grad_norm: 0.9374119369132117, iteration: 188217
loss: 0.9958731532096863,grad_norm: 0.8732834473264028, iteration: 188218
loss: 1.0170189142227173,grad_norm: 0.9497659546674716, iteration: 188219
loss: 1.0256741046905518,grad_norm: 0.9489769117085852, iteration: 188220
loss: 1.0573748350143433,grad_norm: 0.9693876819159851, iteration: 188221
loss: 0.9594476819038391,grad_norm: 0.9999990988649129, iteration: 188222
loss: 0.9942659139633179,grad_norm: 0.9276726830090151, iteration: 188223
loss: 1.0168706178665161,grad_norm: 0.9227106985635898, iteration: 188224
loss: 1.1195281744003296,grad_norm: 0.9999999242930254, iteration: 188225
loss: 1.0093581676483154,grad_norm: 0.8622117478739112, iteration: 188226
loss: 1.0034143924713135,grad_norm: 0.9378320538206298, iteration: 188227
loss: 1.0016611814498901,grad_norm: 0.9232784513328472, iteration: 188228
loss: 0.9897002577781677,grad_norm: 0.9999990566093109, iteration: 188229
loss: 0.9951725006103516,grad_norm: 0.9999991234401088, iteration: 188230
loss: 1.0180550813674927,grad_norm: 0.9397565003229332, iteration: 188231
loss: 0.9922747611999512,grad_norm: 0.9559814342792282, iteration: 188232
loss: 0.998525083065033,grad_norm: 0.9999991162086873, iteration: 188233
loss: 1.0661988258361816,grad_norm: 0.9999990486703759, iteration: 188234
loss: 1.070963740348816,grad_norm: 0.9999993294773593, iteration: 188235
loss: 0.9898051023483276,grad_norm: 0.995285183932813, iteration: 188236
loss: 0.9824903011322021,grad_norm: 0.8264837552402788, iteration: 188237
loss: 0.9850266575813293,grad_norm: 0.9041069761306879, iteration: 188238
loss: 1.0028538703918457,grad_norm: 0.9999989820430719, iteration: 188239
loss: 0.9983485341072083,grad_norm: 0.9427588160837309, iteration: 188240
loss: 1.0008074045181274,grad_norm: 0.9131960205931702, iteration: 188241
loss: 1.0175505876541138,grad_norm: 0.9483702647632083, iteration: 188242
loss: 1.0114517211914062,grad_norm: 0.9999989867731315, iteration: 188243
loss: 1.027469277381897,grad_norm: 0.8328083353349429, iteration: 188244
loss: 0.9469530582427979,grad_norm: 0.9999991755390105, iteration: 188245
loss: 0.9702338576316833,grad_norm: 0.9440567701421059, iteration: 188246
loss: 0.9858824014663696,grad_norm: 0.8454414041704057, iteration: 188247
loss: 1.0106626749038696,grad_norm: 0.961771560144151, iteration: 188248
loss: 1.014876127243042,grad_norm: 0.994364495656388, iteration: 188249
loss: 1.0561566352844238,grad_norm: 0.9999992956342133, iteration: 188250
loss: 1.0040102005004883,grad_norm: 0.8389766244785131, iteration: 188251
loss: 1.0139108896255493,grad_norm: 0.8471563137898881, iteration: 188252
loss: 0.9794938564300537,grad_norm: 0.9346683631625677, iteration: 188253
loss: 0.9621118307113647,grad_norm: 0.9083893480753839, iteration: 188254
loss: 1.131229043006897,grad_norm: 0.9999991060243355, iteration: 188255
loss: 1.084030270576477,grad_norm: 0.980209984789643, iteration: 188256
loss: 1.039862036705017,grad_norm: 0.9999998111414495, iteration: 188257
loss: 0.9856936931610107,grad_norm: 0.999999200502474, iteration: 188258
loss: 1.0155994892120361,grad_norm: 0.9999991734877588, iteration: 188259
loss: 0.992769718170166,grad_norm: 0.9193712787654675, iteration: 188260
loss: 1.0019073486328125,grad_norm: 0.999999001991752, iteration: 188261
loss: 1.0210881233215332,grad_norm: 0.915655680543276, iteration: 188262
loss: 0.9902012348175049,grad_norm: 0.9704926343471058, iteration: 188263
loss: 1.0219117403030396,grad_norm: 0.9478468321050761, iteration: 188264
loss: 0.9938246011734009,grad_norm: 0.9188102592455175, iteration: 188265
loss: 1.0506230592727661,grad_norm: 0.9999993416712852, iteration: 188266
loss: 0.9541382789611816,grad_norm: 0.9968387760741433, iteration: 188267
loss: 1.0355044603347778,grad_norm: 0.8713182941327594, iteration: 188268
loss: 0.9726479053497314,grad_norm: 0.9999992694276509, iteration: 188269
loss: 0.9953106641769409,grad_norm: 0.9433055092670599, iteration: 188270
loss: 0.9799860119819641,grad_norm: 0.9790827074129056, iteration: 188271
loss: 1.0130022764205933,grad_norm: 0.9999992762842013, iteration: 188272
loss: 0.989465594291687,grad_norm: 0.8299884608401991, iteration: 188273
loss: 0.9734095335006714,grad_norm: 0.9999990863266442, iteration: 188274
loss: 0.9946531653404236,grad_norm: 0.980700076358615, iteration: 188275
loss: 1.0072506666183472,grad_norm: 0.9188162833683003, iteration: 188276
loss: 0.9821661710739136,grad_norm: 0.8656516922152493, iteration: 188277
loss: 1.1462990045547485,grad_norm: 0.9999998181815676, iteration: 188278
loss: 1.0422526597976685,grad_norm: 0.9999994503076948, iteration: 188279
loss: 1.027062177658081,grad_norm: 0.9999991381369221, iteration: 188280
loss: 0.9968445301055908,grad_norm: 0.999999177039519, iteration: 188281
loss: 1.0143396854400635,grad_norm: 0.8849727585976077, iteration: 188282
loss: 1.0033066272735596,grad_norm: 0.9999992176926059, iteration: 188283
loss: 0.9884464740753174,grad_norm: 0.9508583034316382, iteration: 188284
loss: 1.0053271055221558,grad_norm: 0.9230408746540408, iteration: 188285
loss: 0.9947496056556702,grad_norm: 0.999999058151418, iteration: 188286
loss: 0.9735832810401917,grad_norm: 0.9999991885755553, iteration: 188287
loss: 1.000205636024475,grad_norm: 0.9999990143549038, iteration: 188288
loss: 1.053483486175537,grad_norm: 0.8813034853851571, iteration: 188289
loss: 1.0109909772872925,grad_norm: 0.8159281651151713, iteration: 188290
loss: 0.9663832187652588,grad_norm: 0.9744013638834701, iteration: 188291
loss: 0.9647720456123352,grad_norm: 0.9198375715611252, iteration: 188292
loss: 0.9639909267425537,grad_norm: 0.9946165629599877, iteration: 188293
loss: 1.0121755599975586,grad_norm: 0.9999992094304714, iteration: 188294
loss: 1.0192673206329346,grad_norm: 0.999999178021415, iteration: 188295
loss: 0.9954860806465149,grad_norm: 0.9506111603947145, iteration: 188296
loss: 1.0180847644805908,grad_norm: 0.8792037042302512, iteration: 188297
loss: 1.0134743452072144,grad_norm: 0.8767962470757651, iteration: 188298
loss: 1.02704656124115,grad_norm: 0.9999991242564568, iteration: 188299
loss: 1.0349383354187012,grad_norm: 0.8967834342406059, iteration: 188300
loss: 1.0529042482376099,grad_norm: 0.9009206451059704, iteration: 188301
loss: 0.9887402653694153,grad_norm: 0.999551528445752, iteration: 188302
loss: 0.981834888458252,grad_norm: 0.9999991970624952, iteration: 188303
loss: 1.0159119367599487,grad_norm: 0.8512782621563254, iteration: 188304
loss: 1.0337814092636108,grad_norm: 0.8011261041094753, iteration: 188305
loss: 0.9914477467536926,grad_norm: 0.9999992220707455, iteration: 188306
loss: 1.1783952713012695,grad_norm: 0.9999998879411739, iteration: 188307
loss: 0.9910522103309631,grad_norm: 0.9605198799825027, iteration: 188308
loss: 1.0078006982803345,grad_norm: 0.9999991921181369, iteration: 188309
loss: 0.9770039319992065,grad_norm: 0.8262191060493184, iteration: 188310
loss: 0.9879233837127686,grad_norm: 0.8201048505124143, iteration: 188311
loss: 0.9745022654533386,grad_norm: 0.9999991881426236, iteration: 188312
loss: 0.9960604310035706,grad_norm: 0.9017655725563373, iteration: 188313
loss: 0.9817325472831726,grad_norm: 0.9999991782753895, iteration: 188314
loss: 0.9967400431632996,grad_norm: 0.9999990253141186, iteration: 188315
loss: 0.9808250069618225,grad_norm: 0.8310743542088623, iteration: 188316
loss: 0.9758960008621216,grad_norm: 0.9999990315206941, iteration: 188317
loss: 0.9835372567176819,grad_norm: 0.8568230430683049, iteration: 188318
loss: 1.0299173593521118,grad_norm: 0.9999990478440776, iteration: 188319
loss: 1.005140781402588,grad_norm: 0.9009603452321295, iteration: 188320
loss: 0.9748181700706482,grad_norm: 0.9999990310344602, iteration: 188321
loss: 1.0027841329574585,grad_norm: 0.9580637064138673, iteration: 188322
loss: 0.9858730435371399,grad_norm: 0.9999990944036898, iteration: 188323
loss: 1.0014501810073853,grad_norm: 0.9648597612737462, iteration: 188324
loss: 1.0140297412872314,grad_norm: 0.8895452528137784, iteration: 188325
loss: 1.070651650428772,grad_norm: 0.9999991034768443, iteration: 188326
loss: 1.0303200483322144,grad_norm: 0.9923652407038047, iteration: 188327
loss: 1.0189769268035889,grad_norm: 0.9385842215355533, iteration: 188328
loss: 1.049221158027649,grad_norm: 0.9949973715958869, iteration: 188329
loss: 1.0040737390518188,grad_norm: 0.9197412454451684, iteration: 188330
loss: 0.9990428686141968,grad_norm: 0.9702540610540702, iteration: 188331
loss: 0.9813305735588074,grad_norm: 0.9947773441279448, iteration: 188332
loss: 0.9837028980255127,grad_norm: 0.8931595537902164, iteration: 188333
loss: 1.0293985605239868,grad_norm: 0.8850201417993165, iteration: 188334
loss: 0.9464455842971802,grad_norm: 0.9069910532000237, iteration: 188335
loss: 1.0112143754959106,grad_norm: 0.8124300792915773, iteration: 188336
loss: 1.0496858358383179,grad_norm: 0.9999991423133112, iteration: 188337
loss: 0.9843885898590088,grad_norm: 0.9999989882246835, iteration: 188338
loss: 1.021613359451294,grad_norm: 0.9999994886005199, iteration: 188339
loss: 1.0829277038574219,grad_norm: 0.9999996081532374, iteration: 188340
loss: 0.9909465909004211,grad_norm: 0.9501735403512285, iteration: 188341
loss: 0.9842921495437622,grad_norm: 0.8160807224205475, iteration: 188342
loss: 1.026046872138977,grad_norm: 0.7295768227573751, iteration: 188343
loss: 1.0227233171463013,grad_norm: 0.807299034303927, iteration: 188344
loss: 1.0690388679504395,grad_norm: 0.9999991542888994, iteration: 188345
loss: 0.9562986493110657,grad_norm: 0.9787808276308385, iteration: 188346
loss: 1.0538710355758667,grad_norm: 0.9999992168952668, iteration: 188347
loss: 1.016159176826477,grad_norm: 0.9999991523919536, iteration: 188348
loss: 1.0131447315216064,grad_norm: 0.8631116043393462, iteration: 188349
loss: 1.0267988443374634,grad_norm: 0.9999992932547314, iteration: 188350
loss: 1.0024412870407104,grad_norm: 0.9999989477227564, iteration: 188351
loss: 0.9774461984634399,grad_norm: 0.9999994771768796, iteration: 188352
loss: 1.0269932746887207,grad_norm: 0.9487445962325622, iteration: 188353
loss: 0.9946185946464539,grad_norm: 0.8175792956723763, iteration: 188354
loss: 0.9953818321228027,grad_norm: 0.9405503867119015, iteration: 188355
loss: 1.019667625427246,grad_norm: 0.9999992384331643, iteration: 188356
loss: 1.0172985792160034,grad_norm: 0.9999989521195698, iteration: 188357
loss: 1.0150216817855835,grad_norm: 0.9999999091987646, iteration: 188358
loss: 1.002103328704834,grad_norm: 0.9556933606866385, iteration: 188359
loss: 1.0052766799926758,grad_norm: 0.8863264072457704, iteration: 188360
loss: 0.9808835983276367,grad_norm: 0.9999990553461834, iteration: 188361
loss: 1.0071264505386353,grad_norm: 0.9999990457173761, iteration: 188362
loss: 1.0080417394638062,grad_norm: 0.9935742427708163, iteration: 188363
loss: 1.011000156402588,grad_norm: 0.9999992651653572, iteration: 188364
loss: 1.036788821220398,grad_norm: 0.8530989614895645, iteration: 188365
loss: 0.9946173429489136,grad_norm: 0.9603088069314322, iteration: 188366
loss: 1.0296627283096313,grad_norm: 0.9999991337036018, iteration: 188367
loss: 0.9790083765983582,grad_norm: 0.8272747707493814, iteration: 188368
loss: 1.014115571975708,grad_norm: 0.9574370586510147, iteration: 188369
loss: 1.0135442018508911,grad_norm: 0.8793075425713494, iteration: 188370
loss: 1.0208423137664795,grad_norm: 0.974285980062494, iteration: 188371
loss: 1.0026447772979736,grad_norm: 0.9999990971780519, iteration: 188372
loss: 1.0182856321334839,grad_norm: 0.9999991957794724, iteration: 188373
loss: 1.0306040048599243,grad_norm: 0.9999990656314061, iteration: 188374
loss: 1.0135703086853027,grad_norm: 0.9999990365613706, iteration: 188375
loss: 1.139059066772461,grad_norm: 0.9999991813345771, iteration: 188376
loss: 0.9964997172355652,grad_norm: 0.8811678048393958, iteration: 188377
loss: 0.9596887826919556,grad_norm: 0.9999998302991867, iteration: 188378
loss: 0.9901501536369324,grad_norm: 0.8683154771856628, iteration: 188379
loss: 1.0143660306930542,grad_norm: 0.9210403164161695, iteration: 188380
loss: 1.0350757837295532,grad_norm: 0.9999999472080988, iteration: 188381
loss: 0.9600182771682739,grad_norm: 0.9287737605195582, iteration: 188382
loss: 1.057275652885437,grad_norm: 0.999999899035734, iteration: 188383
loss: 1.0582221746444702,grad_norm: 0.9999990179310698, iteration: 188384
loss: 1.0288095474243164,grad_norm: 0.9999991846902598, iteration: 188385
loss: 1.0104199647903442,grad_norm: 0.9999991867596795, iteration: 188386
loss: 1.0393990278244019,grad_norm: 0.9999992356120345, iteration: 188387
loss: 0.947289228439331,grad_norm: 0.9999990650342412, iteration: 188388
loss: 0.9738176465034485,grad_norm: 0.9564492132193766, iteration: 188389
loss: 0.9943616390228271,grad_norm: 0.9999997508941713, iteration: 188390
loss: 0.9788655638694763,grad_norm: 0.90680299117835, iteration: 188391
loss: 1.0218734741210938,grad_norm: 0.9907432560725753, iteration: 188392
loss: 0.978440523147583,grad_norm: 0.9796570412134639, iteration: 188393
loss: 1.008928894996643,grad_norm: 0.999999028596669, iteration: 188394
loss: 1.027106761932373,grad_norm: 0.8747183604295656, iteration: 188395
loss: 1.0017741918563843,grad_norm: 0.8461571544828203, iteration: 188396
loss: 1.0105667114257812,grad_norm: 0.9999990739701173, iteration: 188397
loss: 0.9996077418327332,grad_norm: 0.8286152607194623, iteration: 188398
loss: 0.9966669678688049,grad_norm: 0.9999997274860277, iteration: 188399
loss: 0.9719613790512085,grad_norm: 0.9219995485818188, iteration: 188400
loss: 0.9978368878364563,grad_norm: 0.9999991428035337, iteration: 188401
loss: 0.992647647857666,grad_norm: 0.9999990860456223, iteration: 188402
loss: 1.0054118633270264,grad_norm: 0.9447872473264528, iteration: 188403
loss: 0.9948810338973999,grad_norm: 0.9999990323350215, iteration: 188404
loss: 1.0075061321258545,grad_norm: 0.9939113360377133, iteration: 188405
loss: 0.9905676245689392,grad_norm: 0.954035249394212, iteration: 188406
loss: 1.0171092748641968,grad_norm: 0.9999990436291772, iteration: 188407
loss: 0.9728726744651794,grad_norm: 0.9999990939977382, iteration: 188408
loss: 0.9916099905967712,grad_norm: 0.8650282747655772, iteration: 188409
loss: 1.0015995502471924,grad_norm: 0.9999991752518735, iteration: 188410
loss: 0.9902348518371582,grad_norm: 0.9355101145697684, iteration: 188411
loss: 1.0291959047317505,grad_norm: 0.981547115114566, iteration: 188412
loss: 0.9628590941429138,grad_norm: 0.9312017389638982, iteration: 188413
loss: 1.0535686016082764,grad_norm: 0.9271556086614426, iteration: 188414
loss: 1.027881145477295,grad_norm: 0.9999999115934525, iteration: 188415
loss: 0.9989704489707947,grad_norm: 0.8679859187857689, iteration: 188416
loss: 1.0233571529388428,grad_norm: 0.9999992515429311, iteration: 188417
loss: 1.0305451154708862,grad_norm: 0.9999994115454804, iteration: 188418
loss: 1.017812967300415,grad_norm: 0.9999991158652998, iteration: 188419
loss: 1.0192900896072388,grad_norm: 0.999999787644669, iteration: 188420
loss: 1.0193721055984497,grad_norm: 0.983862498221877, iteration: 188421
loss: 0.9739338755607605,grad_norm: 0.9999993487405562, iteration: 188422
loss: 0.9975126385688782,grad_norm: 0.9650260263499183, iteration: 188423
loss: 1.148667573928833,grad_norm: 0.9999993449511276, iteration: 188424
loss: 1.0700526237487793,grad_norm: 0.9999995583015214, iteration: 188425
loss: 0.9989703893661499,grad_norm: 0.8838570134787226, iteration: 188426
loss: 1.041584849357605,grad_norm: 0.9999997781009932, iteration: 188427
loss: 0.97279292345047,grad_norm: 0.722216402519293, iteration: 188428
loss: 0.9931014776229858,grad_norm: 0.9365413179073199, iteration: 188429
loss: 0.9901173114776611,grad_norm: 0.9483424924356051, iteration: 188430
loss: 1.0178232192993164,grad_norm: 0.9999992705748133, iteration: 188431
loss: 1.0242815017700195,grad_norm: 0.9999991187841712, iteration: 188432
loss: 1.009581446647644,grad_norm: 0.9999998843090411, iteration: 188433
loss: 1.011412501335144,grad_norm: 0.9999990154038016, iteration: 188434
loss: 1.002943992614746,grad_norm: 0.7629134200009096, iteration: 188435
loss: 0.9760667681694031,grad_norm: 0.947527485906866, iteration: 188436
loss: 0.9862656593322754,grad_norm: 0.9999990459681429, iteration: 188437
loss: 1.0094441175460815,grad_norm: 0.919544156364972, iteration: 188438
loss: 0.9736767411231995,grad_norm: 0.8547117370046099, iteration: 188439
loss: 1.0058120489120483,grad_norm: 0.9163960500473715, iteration: 188440
loss: 1.015031337738037,grad_norm: 0.9999991906858781, iteration: 188441
loss: 0.9965673089027405,grad_norm: 0.925126594543897, iteration: 188442
loss: 0.9956182837486267,grad_norm: 0.943896883697875, iteration: 188443
loss: 1.0082772970199585,grad_norm: 0.9247129292895503, iteration: 188444
loss: 0.9870820045471191,grad_norm: 0.8804437648583615, iteration: 188445
loss: 1.0902572870254517,grad_norm: 0.9999992129444375, iteration: 188446
loss: 1.0253312587738037,grad_norm: 0.999999131775791, iteration: 188447
loss: 0.9712900519371033,grad_norm: 0.9999992692545407, iteration: 188448
loss: 0.9978806972503662,grad_norm: 0.9999990363214692, iteration: 188449
loss: 0.987551212310791,grad_norm: 0.8143193242987793, iteration: 188450
loss: 1.0126287937164307,grad_norm: 0.9999992026007776, iteration: 188451
loss: 0.9903315901756287,grad_norm: 0.9999989544075896, iteration: 188452
loss: 1.0303832292556763,grad_norm: 0.999999217687745, iteration: 188453
loss: 1.0201064348220825,grad_norm: 0.8220208721126706, iteration: 188454
loss: 1.0274139642715454,grad_norm: 0.9999990838750037, iteration: 188455
loss: 0.9883942604064941,grad_norm: 0.9999991439819588, iteration: 188456
loss: 0.967752993106842,grad_norm: 0.9633659002550121, iteration: 188457
loss: 0.9993292689323425,grad_norm: 0.999998998587061, iteration: 188458
loss: 1.01462984085083,grad_norm: 0.999999037752321, iteration: 188459
loss: 1.0208696126937866,grad_norm: 0.8436972869929997, iteration: 188460
loss: 0.9929381012916565,grad_norm: 0.9592115165639741, iteration: 188461
loss: 1.0303341150283813,grad_norm: 0.8998716261964225, iteration: 188462
loss: 1.017964482307434,grad_norm: 0.9999991867424315, iteration: 188463
loss: 0.9892987608909607,grad_norm: 0.99999902859368, iteration: 188464
loss: 1.0074419975280762,grad_norm: 0.7968510569541155, iteration: 188465
loss: 1.0097296237945557,grad_norm: 0.948079275983134, iteration: 188466
loss: 1.007764458656311,grad_norm: 0.8792805275034936, iteration: 188467
loss: 0.9879423379898071,grad_norm: 0.8647672343546282, iteration: 188468
loss: 1.0178531408309937,grad_norm: 0.9156234308390854, iteration: 188469
loss: 0.966992199420929,grad_norm: 0.9999995935872966, iteration: 188470
loss: 0.9840562343597412,grad_norm: 0.9425592102927906, iteration: 188471
loss: 0.9686934351921082,grad_norm: 0.9999991510498526, iteration: 188472
loss: 0.9455040097236633,grad_norm: 0.846444616354104, iteration: 188473
loss: 0.9963527917861938,grad_norm: 0.9898286340020105, iteration: 188474
loss: 1.02011239528656,grad_norm: 0.9218734772615922, iteration: 188475
loss: 1.0315189361572266,grad_norm: 0.999999391524278, iteration: 188476
loss: 1.031252145767212,grad_norm: 0.999184397458243, iteration: 188477
loss: 1.0087405443191528,grad_norm: 0.9999990660615611, iteration: 188478
loss: 0.9935083985328674,grad_norm: 0.948932843294653, iteration: 188479
loss: 0.9663636684417725,grad_norm: 0.9999990570508872, iteration: 188480
loss: 1.0005518198013306,grad_norm: 0.9739571572936797, iteration: 188481
loss: 0.9954195022583008,grad_norm: 0.9484086153301953, iteration: 188482
loss: 0.9692079424858093,grad_norm: 0.953048286828887, iteration: 188483
loss: 1.0544553995132446,grad_norm: 0.99999981890978, iteration: 188484
loss: 1.0231913328170776,grad_norm: 0.9999990502348318, iteration: 188485
loss: 1.0387996435165405,grad_norm: 0.7948027871794289, iteration: 188486
loss: 1.0487239360809326,grad_norm: 0.9999990937642128, iteration: 188487
loss: 0.9971100091934204,grad_norm: 0.8603584704858919, iteration: 188488
loss: 0.9848194122314453,grad_norm: 0.8914977373461915, iteration: 188489
loss: 1.0001336336135864,grad_norm: 0.93884257846587, iteration: 188490
loss: 1.0001202821731567,grad_norm: 0.9642010780074846, iteration: 188491
loss: 0.9502933621406555,grad_norm: 0.8896581852088439, iteration: 188492
loss: 0.9790725111961365,grad_norm: 0.9999992088228102, iteration: 188493
loss: 0.9708911180496216,grad_norm: 0.9999989857807892, iteration: 188494
loss: 1.0144189596176147,grad_norm: 0.999999718638424, iteration: 188495
loss: 0.9531112909317017,grad_norm: 0.9560764125944362, iteration: 188496
loss: 1.009109377861023,grad_norm: 0.9265840984990884, iteration: 188497
loss: 0.9947136640548706,grad_norm: 0.8823823375460124, iteration: 188498
loss: 0.983653724193573,grad_norm: 0.9999990849386395, iteration: 188499
loss: 0.9989773631095886,grad_norm: 0.9999991616990915, iteration: 188500
loss: 0.9807926416397095,grad_norm: 0.9999991844641138, iteration: 188501
loss: 1.013990044593811,grad_norm: 0.9999992689831458, iteration: 188502
loss: 1.0196621417999268,grad_norm: 0.9630931937363434, iteration: 188503
loss: 0.9938898682594299,grad_norm: 0.9999991557983458, iteration: 188504
loss: 1.0098456144332886,grad_norm: 0.9999992198568866, iteration: 188505
loss: 0.9577627778053284,grad_norm: 0.8891716470800276, iteration: 188506
loss: 1.033888816833496,grad_norm: 0.9999996212484176, iteration: 188507
loss: 1.030609130859375,grad_norm: 0.9999991942735321, iteration: 188508
loss: 0.9613394141197205,grad_norm: 0.9500559650853664, iteration: 188509
loss: 1.002646565437317,grad_norm: 0.9999994027338098, iteration: 188510
loss: 1.0611155033111572,grad_norm: 0.9999998733252725, iteration: 188511
loss: 0.9804222583770752,grad_norm: 0.8900948067739748, iteration: 188512
loss: 1.0256074666976929,grad_norm: 0.9999992684826318, iteration: 188513
loss: 0.9665406942367554,grad_norm: 0.8746980954456407, iteration: 188514
loss: 0.9931311011314392,grad_norm: 0.9999991113342365, iteration: 188515
loss: 0.9684881567955017,grad_norm: 0.9999989553813858, iteration: 188516
loss: 1.1054707765579224,grad_norm: 0.9725098218891162, iteration: 188517
loss: 0.980218231678009,grad_norm: 0.9224268322875613, iteration: 188518
loss: 1.0034323930740356,grad_norm: 0.9999990698793964, iteration: 188519
loss: 0.9859070181846619,grad_norm: 0.9999990917826395, iteration: 188520
loss: 0.9683965444564819,grad_norm: 0.9999990648286878, iteration: 188521
loss: 1.024100661277771,grad_norm: 0.7976264970935051, iteration: 188522
loss: 1.0148495435714722,grad_norm: 0.9999992754188242, iteration: 188523
loss: 1.0107972621917725,grad_norm: 0.9915477172612938, iteration: 188524
loss: 0.9843055009841919,grad_norm: 0.9553793949386543, iteration: 188525
loss: 1.0443998575210571,grad_norm: 0.999999157576454, iteration: 188526
loss: 0.994337797164917,grad_norm: 0.9993991562442058, iteration: 188527
loss: 0.9599379897117615,grad_norm: 0.8476251477904359, iteration: 188528
loss: 0.9710243344306946,grad_norm: 0.8182765350783625, iteration: 188529
loss: 1.0097771883010864,grad_norm: 0.9999991762715554, iteration: 188530
loss: 0.9558785557746887,grad_norm: 0.9126288667633675, iteration: 188531
loss: 1.0283390283584595,grad_norm: 0.9999990943831122, iteration: 188532
loss: 1.209114909172058,grad_norm: 0.9999997493993096, iteration: 188533
loss: 1.0608763694763184,grad_norm: 0.9999994038333455, iteration: 188534
loss: 1.1337981224060059,grad_norm: 0.9999998748948515, iteration: 188535
loss: 0.9891908764839172,grad_norm: 0.999999121005697, iteration: 188536
loss: 1.0256600379943848,grad_norm: 0.9999997886390293, iteration: 188537
loss: 0.9771981835365295,grad_norm: 0.7804930694251534, iteration: 188538
loss: 1.028342366218567,grad_norm: 0.9999991171426015, iteration: 188539
loss: 0.9933695793151855,grad_norm: 0.879008548345612, iteration: 188540
loss: 0.9960318803787231,grad_norm: 0.8610763793938662, iteration: 188541
loss: 1.0217682123184204,grad_norm: 0.9272760567256689, iteration: 188542
loss: 1.0115028619766235,grad_norm: 0.9877315804841765, iteration: 188543
loss: 0.9796948432922363,grad_norm: 0.9743438497295477, iteration: 188544
loss: 1.0183062553405762,grad_norm: 0.8763522311731012, iteration: 188545
loss: 1.003444790840149,grad_norm: 0.9327733468196275, iteration: 188546
loss: 1.0735706090927124,grad_norm: 0.9999992489038659, iteration: 188547
loss: 1.0231270790100098,grad_norm: 0.9999990712323749, iteration: 188548
loss: 1.0046226978302002,grad_norm: 0.8057702120217468, iteration: 188549
loss: 0.9712157249450684,grad_norm: 0.9999990173088876, iteration: 188550
loss: 1.0129863023757935,grad_norm: 0.9403732619528355, iteration: 188551
loss: 0.9946681261062622,grad_norm: 0.9999991017439948, iteration: 188552
loss: 1.0338419675827026,grad_norm: 0.9244101087216291, iteration: 188553
loss: 1.0193425416946411,grad_norm: 0.99999936771239, iteration: 188554
loss: 0.9877013564109802,grad_norm: 0.8622351226537015, iteration: 188555
loss: 0.9956744909286499,grad_norm: 0.9999990435059875, iteration: 188556
loss: 0.966987669467926,grad_norm: 0.9000180778925028, iteration: 188557
loss: 1.0038137435913086,grad_norm: 0.9999991993208378, iteration: 188558
loss: 1.0691663026809692,grad_norm: 0.9999991045039633, iteration: 188559
loss: 1.0029205083847046,grad_norm: 0.9444969827663385, iteration: 188560
loss: 1.028754711151123,grad_norm: 0.9999993829473747, iteration: 188561
loss: 1.0342849493026733,grad_norm: 0.9314798967144026, iteration: 188562
loss: 1.0147565603256226,grad_norm: 0.9113649469827104, iteration: 188563
loss: 1.051079273223877,grad_norm: 0.96477165760957, iteration: 188564
loss: 1.01594877243042,grad_norm: 0.9641999534387442, iteration: 188565
loss: 1.106094479560852,grad_norm: 0.9999990331116755, iteration: 188566
loss: 1.001394510269165,grad_norm: 0.9999999500336305, iteration: 188567
loss: 1.0028321743011475,grad_norm: 0.9999990768502601, iteration: 188568
loss: 1.016618251800537,grad_norm: 0.9327537408814198, iteration: 188569
loss: 1.0985050201416016,grad_norm: 0.9999998468698494, iteration: 188570
loss: 1.0041152238845825,grad_norm: 0.9540541201179855, iteration: 188571
loss: 1.0138866901397705,grad_norm: 0.9669246245422914, iteration: 188572
loss: 1.0229241847991943,grad_norm: 0.9999992412049659, iteration: 188573
loss: 1.0128028392791748,grad_norm: 0.9621799607711042, iteration: 188574
loss: 1.0260213613510132,grad_norm: 0.9999991010980396, iteration: 188575
loss: 1.0214227437973022,grad_norm: 0.81357236357244, iteration: 188576
loss: 0.9731873273849487,grad_norm: 0.9999991432359294, iteration: 188577
loss: 1.0197752714157104,grad_norm: 0.9999991109220683, iteration: 188578
loss: 1.025468111038208,grad_norm: 0.9999991856029304, iteration: 188579
loss: 1.012640118598938,grad_norm: 0.9999991478195127, iteration: 188580
loss: 1.011771559715271,grad_norm: 0.8851041556136853, iteration: 188581
loss: 1.01113760471344,grad_norm: 0.9999990644664133, iteration: 188582
loss: 1.0290348529815674,grad_norm: 0.9999992791132314, iteration: 188583
loss: 0.9750822186470032,grad_norm: 0.9999990907985485, iteration: 188584
loss: 1.019904613494873,grad_norm: 0.9894193432572926, iteration: 188585
loss: 0.9803792238235474,grad_norm: 0.8677636803940165, iteration: 188586
loss: 0.9986914992332458,grad_norm: 0.9081288519877376, iteration: 188587
loss: 1.0156395435333252,grad_norm: 0.9999990585519026, iteration: 188588
loss: 0.946929395198822,grad_norm: 0.8188738487062658, iteration: 188589
loss: 1.0290933847427368,grad_norm: 0.9999990869938552, iteration: 188590
loss: 0.9907128810882568,grad_norm: 0.9226729148535388, iteration: 188591
loss: 0.9658870697021484,grad_norm: 0.9966863960061074, iteration: 188592
loss: 0.9890172481536865,grad_norm: 0.8927744261474744, iteration: 188593
loss: 0.9951599836349487,grad_norm: 0.9999990217368294, iteration: 188594
loss: 1.0089163780212402,grad_norm: 0.860011807388783, iteration: 188595
loss: 1.0017991065979004,grad_norm: 0.8498059396274588, iteration: 188596
loss: 1.014724612236023,grad_norm: 0.9999991452845566, iteration: 188597
loss: 1.0326528549194336,grad_norm: 0.9999991311833694, iteration: 188598
loss: 1.012965440750122,grad_norm: 0.8461836574183561, iteration: 188599
loss: 1.0420416593551636,grad_norm: 0.9999993683316358, iteration: 188600
loss: 0.9660009145736694,grad_norm: 0.8676974406375717, iteration: 188601
loss: 1.0118263959884644,grad_norm: 0.8417006099121146, iteration: 188602
loss: 1.0271048545837402,grad_norm: 0.9086320986675028, iteration: 188603
loss: 0.9780058264732361,grad_norm: 0.7566014997691878, iteration: 188604
loss: 0.9489336013793945,grad_norm: 0.9999991275686423, iteration: 188605
loss: 1.012001872062683,grad_norm: 0.99999922748373, iteration: 188606
loss: 1.0230176448822021,grad_norm: 0.9655442505108965, iteration: 188607
loss: 1.0046846866607666,grad_norm: 0.9999989556555895, iteration: 188608
loss: 0.9989489316940308,grad_norm: 0.917562411859494, iteration: 188609
loss: 0.9812941551208496,grad_norm: 0.9999990834065561, iteration: 188610
loss: 0.9970726370811462,grad_norm: 0.9999994448810943, iteration: 188611
loss: 0.9706952571868896,grad_norm: 0.9623831034938906, iteration: 188612
loss: 1.0108236074447632,grad_norm: 0.9999990010348454, iteration: 188613
loss: 1.0220106840133667,grad_norm: 0.9031080219294537, iteration: 188614
loss: 0.9904555082321167,grad_norm: 0.8714671897222422, iteration: 188615
loss: 0.996779203414917,grad_norm: 0.8282183390391948, iteration: 188616
loss: 0.9985435009002686,grad_norm: 0.9905606342834261, iteration: 188617
loss: 1.00663423538208,grad_norm: 0.9981773716890129, iteration: 188618
loss: 1.0064934492111206,grad_norm: 0.9733623090042892, iteration: 188619
loss: 1.0113340616226196,grad_norm: 0.999999148138337, iteration: 188620
loss: 1.0091273784637451,grad_norm: 0.9999990618560429, iteration: 188621
loss: 1.020843744277954,grad_norm: 0.9999991895151623, iteration: 188622
loss: 0.996289074420929,grad_norm: 0.9999992042278846, iteration: 188623
loss: 0.9735907912254333,grad_norm: 0.8947530943173588, iteration: 188624
loss: 1.0070345401763916,grad_norm: 0.9999991340381083, iteration: 188625
loss: 1.0132478475570679,grad_norm: 0.8669984505800206, iteration: 188626
loss: 0.946832537651062,grad_norm: 0.8792546409228468, iteration: 188627
loss: 1.001313328742981,grad_norm: 0.7796234835976524, iteration: 188628
loss: 0.9748113751411438,grad_norm: 0.8577317541290019, iteration: 188629
loss: 1.0135334730148315,grad_norm: 0.9490816667773028, iteration: 188630
loss: 0.9501184225082397,grad_norm: 0.8715293291581001, iteration: 188631
loss: 1.0227848291397095,grad_norm: 0.9999994303235715, iteration: 188632
loss: 1.0759754180908203,grad_norm: 0.9999990263690095, iteration: 188633
loss: 0.978816032409668,grad_norm: 0.8935795611561796, iteration: 188634
loss: 0.9611995220184326,grad_norm: 0.9133843945474316, iteration: 188635
loss: 0.966813325881958,grad_norm: 0.9999992482333204, iteration: 188636
loss: 1.0024099349975586,grad_norm: 0.9381603298052837, iteration: 188637
loss: 1.0182162523269653,grad_norm: 0.9611612173995427, iteration: 188638
loss: 0.9695238471031189,grad_norm: 0.9999992235249499, iteration: 188639
loss: 1.00126051902771,grad_norm: 0.9999990030434864, iteration: 188640
loss: 0.9556865692138672,grad_norm: 0.9999991559908596, iteration: 188641
loss: 1.0471138954162598,grad_norm: 0.9999997010540246, iteration: 188642
loss: 0.9876076579093933,grad_norm: 0.9434056511637747, iteration: 188643
loss: 1.0227359533309937,grad_norm: 0.999999344333846, iteration: 188644
loss: 0.9774016737937927,grad_norm: 0.9753910198202833, iteration: 188645
loss: 0.9874788522720337,grad_norm: 0.973809311065145, iteration: 188646
loss: 0.9989578723907471,grad_norm: 0.9512268782945179, iteration: 188647
loss: 0.9940178990364075,grad_norm: 0.9999991548882222, iteration: 188648
loss: 0.9371830821037292,grad_norm: 0.9773365770187973, iteration: 188649
loss: 0.9938393235206604,grad_norm: 0.999999169875199, iteration: 188650
loss: 1.0307211875915527,grad_norm: 0.9999990926303938, iteration: 188651
loss: 0.9882711172103882,grad_norm: 0.9999990240094797, iteration: 188652
loss: 1.0163415670394897,grad_norm: 0.999999136474446, iteration: 188653
loss: 0.9711927175521851,grad_norm: 0.8368340220216817, iteration: 188654
loss: 1.0251878499984741,grad_norm: 0.8096594472030347, iteration: 188655
loss: 0.9811134934425354,grad_norm: 0.9512193490790749, iteration: 188656
loss: 0.9675164222717285,grad_norm: 0.9169155875589325, iteration: 188657
loss: 1.009898066520691,grad_norm: 0.8030898959563765, iteration: 188658
loss: 1.0172137022018433,grad_norm: 0.9088279298921041, iteration: 188659
loss: 0.9696827530860901,grad_norm: 0.9352165903643727, iteration: 188660
loss: 1.0297800302505493,grad_norm: 0.9999989488194321, iteration: 188661
loss: 0.9554910659790039,grad_norm: 0.9137932495558964, iteration: 188662
loss: 0.9652751684188843,grad_norm: 0.9185895526020128, iteration: 188663
loss: 0.9562303423881531,grad_norm: 0.9999990897254586, iteration: 188664
loss: 0.988838791847229,grad_norm: 0.8931058814863047, iteration: 188665
loss: 0.974621593952179,grad_norm: 0.9999990310563845, iteration: 188666
loss: 1.005815029144287,grad_norm: 0.8806197860164982, iteration: 188667
loss: 0.9984824657440186,grad_norm: 0.8039737701139826, iteration: 188668
loss: 1.0306326150894165,grad_norm: 0.9155184837051293, iteration: 188669
loss: 1.0369102954864502,grad_norm: 0.8531343832185938, iteration: 188670
loss: 1.0249295234680176,grad_norm: 0.9999993455035987, iteration: 188671
loss: 1.0028140544891357,grad_norm: 0.8878404398514397, iteration: 188672
loss: 0.9841426014900208,grad_norm: 0.9999990738385142, iteration: 188673
loss: 0.9882594347000122,grad_norm: 0.9999992778891247, iteration: 188674
loss: 0.9988551139831543,grad_norm: 0.999999025817534, iteration: 188675
loss: 1.006879210472107,grad_norm: 0.902093571211086, iteration: 188676
loss: 0.9900036454200745,grad_norm: 0.8899294925987506, iteration: 188677
loss: 0.9670661687850952,grad_norm: 0.9999990929701396, iteration: 188678
loss: 1.0274684429168701,grad_norm: 0.8326227614300266, iteration: 188679
loss: 1.0401431322097778,grad_norm: 0.9823632334009129, iteration: 188680
loss: 0.9875403046607971,grad_norm: 0.9999990837902978, iteration: 188681
loss: 1.0192562341690063,grad_norm: 0.9978187454568458, iteration: 188682
loss: 0.9948453903198242,grad_norm: 0.8790618736084592, iteration: 188683
loss: 1.0053820610046387,grad_norm: 0.9999990692989088, iteration: 188684
loss: 0.9979078769683838,grad_norm: 0.9999990289846673, iteration: 188685
loss: 1.00091552734375,grad_norm: 0.8616322176741401, iteration: 188686
loss: 1.0063775777816772,grad_norm: 0.9600224077016885, iteration: 188687
loss: 1.0043447017669678,grad_norm: 0.9999990809788533, iteration: 188688
loss: 1.0067719221115112,grad_norm: 0.9226340347050043, iteration: 188689
loss: 1.2241435050964355,grad_norm: 0.9999990842702672, iteration: 188690
loss: 0.9990624785423279,grad_norm: 0.9136685594485724, iteration: 188691
loss: 0.9814652800559998,grad_norm: 0.9589662464900297, iteration: 188692
loss: 0.9762192368507385,grad_norm: 0.8775539650689194, iteration: 188693
loss: 1.0260244607925415,grad_norm: 0.9999991664541173, iteration: 188694
loss: 0.9678248167037964,grad_norm: 0.9429714340645139, iteration: 188695
loss: 1.0059412717819214,grad_norm: 0.9999991856673295, iteration: 188696
loss: 1.0012990236282349,grad_norm: 0.9915153086078976, iteration: 188697
loss: 0.9651131629943848,grad_norm: 0.9479942600998775, iteration: 188698
loss: 1.012200117111206,grad_norm: 0.9999990328138416, iteration: 188699
loss: 0.9539254903793335,grad_norm: 0.9999992825636244, iteration: 188700
loss: 0.9739139676094055,grad_norm: 0.8470479009754709, iteration: 188701
loss: 1.0141221284866333,grad_norm: 0.9014172910408808, iteration: 188702
loss: 1.000357747077942,grad_norm: 0.9041967293485029, iteration: 188703
loss: 0.9732323288917542,grad_norm: 0.8003931894060903, iteration: 188704
loss: 0.9752598404884338,grad_norm: 0.8661603684282188, iteration: 188705
loss: 0.9990610480308533,grad_norm: 0.9999997838941097, iteration: 188706
loss: 1.012084722518921,grad_norm: 0.999999282483396, iteration: 188707
loss: 1.0138671398162842,grad_norm: 0.9999990647833404, iteration: 188708
loss: 0.9855278134346008,grad_norm: 0.931881474293181, iteration: 188709
loss: 1.0288499593734741,grad_norm: 0.9999992328955033, iteration: 188710
loss: 0.9786969423294067,grad_norm: 0.8130876032883405, iteration: 188711
loss: 1.057423710823059,grad_norm: 0.9999997492285682, iteration: 188712
loss: 0.9734842777252197,grad_norm: 0.9218960836176302, iteration: 188713
loss: 0.9730141162872314,grad_norm: 0.9914005538075972, iteration: 188714
loss: 1.0025756359100342,grad_norm: 0.9999998340761681, iteration: 188715
loss: 0.9943616986274719,grad_norm: 0.9080579126189064, iteration: 188716
loss: 0.9837170243263245,grad_norm: 0.9999991210399839, iteration: 188717
loss: 0.9839092493057251,grad_norm: 0.9586479429541637, iteration: 188718
loss: 0.9793746471405029,grad_norm: 0.8900186703113515, iteration: 188719
loss: 0.9914546012878418,grad_norm: 0.7761314319022845, iteration: 188720
loss: 0.9805074334144592,grad_norm: 0.9999991272125106, iteration: 188721
loss: 0.9728403687477112,grad_norm: 0.9999989639216065, iteration: 188722
loss: 0.9959012269973755,grad_norm: 0.999999120280986, iteration: 188723
loss: 0.9830572605133057,grad_norm: 0.9455705955970456, iteration: 188724
loss: 0.9963740706443787,grad_norm: 0.9024236601990969, iteration: 188725
loss: 0.9947811961174011,grad_norm: 0.9931124403586465, iteration: 188726
loss: 1.0277488231658936,grad_norm: 0.9973176676924577, iteration: 188727
loss: 0.9670879244804382,grad_norm: 0.9999991814572061, iteration: 188728
loss: 1.0112030506134033,grad_norm: 0.9894673414780545, iteration: 188729
loss: 1.0165001153945923,grad_norm: 0.7343983155043265, iteration: 188730
loss: 1.0254347324371338,grad_norm: 0.9999992259757603, iteration: 188731
loss: 0.9478294253349304,grad_norm: 0.9896970967132794, iteration: 188732
loss: 0.9563319087028503,grad_norm: 0.8804720411752825, iteration: 188733
loss: 0.9993938207626343,grad_norm: 0.9982879628098458, iteration: 188734
loss: 0.9918221831321716,grad_norm: 0.9688501573859046, iteration: 188735
loss: 0.9800792932510376,grad_norm: 0.9698498771919665, iteration: 188736
loss: 0.9932228922843933,grad_norm: 0.9694886312245964, iteration: 188737
loss: 0.9794303178787231,grad_norm: 0.8306761014507051, iteration: 188738
loss: 0.9865812659263611,grad_norm: 0.9999990762443678, iteration: 188739
loss: 0.9917213916778564,grad_norm: 0.9591956292485998, iteration: 188740
loss: 1.0253329277038574,grad_norm: 0.8547957761375088, iteration: 188741
loss: 0.9919719099998474,grad_norm: 0.9999990127022685, iteration: 188742
loss: 0.9816421270370483,grad_norm: 0.92730866154544, iteration: 188743
loss: 0.9882377982139587,grad_norm: 0.9999991902212265, iteration: 188744
loss: 0.9742132425308228,grad_norm: 0.9999989746095704, iteration: 188745
loss: 0.9727031588554382,grad_norm: 0.9999990593072371, iteration: 188746
loss: 1.0104750394821167,grad_norm: 0.999999127532953, iteration: 188747
loss: 0.9838414788246155,grad_norm: 0.9999991360297207, iteration: 188748
loss: 1.0076576471328735,grad_norm: 0.8407069153505471, iteration: 188749
loss: 0.9728177189826965,grad_norm: 0.9336372062484689, iteration: 188750
loss: 0.9839340448379517,grad_norm: 0.7850141890898547, iteration: 188751
loss: 1.0047030448913574,grad_norm: 0.9975221989012224, iteration: 188752
loss: 1.0336960554122925,grad_norm: 0.8827441906221114, iteration: 188753
loss: 1.0071172714233398,grad_norm: 0.9283953839129111, iteration: 188754
loss: 0.9881439805030823,grad_norm: 0.9745974193981983, iteration: 188755
loss: 1.044708251953125,grad_norm: 0.9999991563097809, iteration: 188756
loss: 1.0143277645111084,grad_norm: 0.9999991698203518, iteration: 188757
loss: 0.9946028590202332,grad_norm: 0.9999990322227421, iteration: 188758
loss: 0.962664008140564,grad_norm: 0.9695166721604805, iteration: 188759
loss: 1.0353672504425049,grad_norm: 0.8436675773852007, iteration: 188760
loss: 1.0224881172180176,grad_norm: 0.9341271833482374, iteration: 188761
loss: 0.9740384817123413,grad_norm: 0.9497364502331626, iteration: 188762
loss: 1.0159556865692139,grad_norm: 0.9999992487763566, iteration: 188763
loss: 0.932809054851532,grad_norm: 0.9999991809579946, iteration: 188764
loss: 0.9646551609039307,grad_norm: 0.94544427010362, iteration: 188765
loss: 0.9947410225868225,grad_norm: 0.9747734835088873, iteration: 188766
loss: 1.01638662815094,grad_norm: 0.9999990403449933, iteration: 188767
loss: 1.0192188024520874,grad_norm: 0.8121394764184604, iteration: 188768
loss: 1.0077263116836548,grad_norm: 0.9080504677544081, iteration: 188769
loss: 1.0230969190597534,grad_norm: 0.9999991520720775, iteration: 188770
loss: 0.9877418875694275,grad_norm: 0.9716632895471989, iteration: 188771
loss: 1.0064481496810913,grad_norm: 0.9999992912247249, iteration: 188772
loss: 0.9701940417289734,grad_norm: 0.9619864658382625, iteration: 188773
loss: 0.9977244138717651,grad_norm: 0.9999991475934243, iteration: 188774
loss: 1.0567641258239746,grad_norm: 0.9999999148943073, iteration: 188775
loss: 0.9648270010948181,grad_norm: 0.9999991454031888, iteration: 188776
loss: 1.0348960161209106,grad_norm: 0.9999990803512406, iteration: 188777
loss: 0.9935093522071838,grad_norm: 0.9158912821756258, iteration: 188778
loss: 0.9540704488754272,grad_norm: 0.8877731041465731, iteration: 188779
loss: 0.9947739243507385,grad_norm: 0.9735204600113522, iteration: 188780
loss: 1.013522744178772,grad_norm: 0.8995716031442211, iteration: 188781
loss: 1.020967721939087,grad_norm: 0.9999990419347236, iteration: 188782
loss: 0.9533756375312805,grad_norm: 0.9999991912347412, iteration: 188783
loss: 1.1085970401763916,grad_norm: 0.9999994181216691, iteration: 188784
loss: 1.0196465253829956,grad_norm: 0.9999992884453947, iteration: 188785
loss: 0.9848642945289612,grad_norm: 0.9582395091670114, iteration: 188786
loss: 1.0444543361663818,grad_norm: 0.9999991453842288, iteration: 188787
loss: 0.9783190488815308,grad_norm: 0.9999993185470413, iteration: 188788
loss: 1.0128051042556763,grad_norm: 0.9999989673283407, iteration: 188789
loss: 0.9566707611083984,grad_norm: 0.9396209752270598, iteration: 188790
loss: 0.9912417531013489,grad_norm: 0.9011527320002548, iteration: 188791
loss: 0.999426007270813,grad_norm: 0.7856540544768589, iteration: 188792
loss: 1.0208414793014526,grad_norm: 0.9262180758137369, iteration: 188793
loss: 0.9664767980575562,grad_norm: 0.9999991249495286, iteration: 188794
loss: 0.982679009437561,grad_norm: 0.8917679946956362, iteration: 188795
loss: 0.9871341586112976,grad_norm: 0.9327753941709084, iteration: 188796
loss: 1.0295995473861694,grad_norm: 0.9021131495922285, iteration: 188797
loss: 0.9904009103775024,grad_norm: 0.9273761485557116, iteration: 188798
loss: 0.9906800389289856,grad_norm: 0.9999996370514936, iteration: 188799
loss: 1.006263017654419,grad_norm: 0.9967842575885771, iteration: 188800
loss: 0.9905730485916138,grad_norm: 0.9999990821920935, iteration: 188801
loss: 1.0671815872192383,grad_norm: 0.9999990186135082, iteration: 188802
loss: 1.0159214735031128,grad_norm: 0.9999991403055251, iteration: 188803
loss: 0.9914718866348267,grad_norm: 0.9999990106995056, iteration: 188804
loss: 1.0413539409637451,grad_norm: 0.999999181849873, iteration: 188805
loss: 1.0023573637008667,grad_norm: 0.9168394924523383, iteration: 188806
loss: 1.0008844137191772,grad_norm: 0.9879979289839345, iteration: 188807
loss: 1.0032094717025757,grad_norm: 0.9654852306689466, iteration: 188808
loss: 0.9645909667015076,grad_norm: 0.9020028207788074, iteration: 188809
loss: 1.019490361213684,grad_norm: 0.8977881245445815, iteration: 188810
loss: 0.9937477707862854,grad_norm: 0.9902660885583983, iteration: 188811
loss: 1.1476832628250122,grad_norm: 0.9999994516032865, iteration: 188812
loss: 0.9948304891586304,grad_norm: 0.9095401181103627, iteration: 188813
loss: 1.0365781784057617,grad_norm: 0.9456122929297461, iteration: 188814
loss: 0.9852758049964905,grad_norm: 0.8368208769471126, iteration: 188815
loss: 1.0043601989746094,grad_norm: 0.9456693639133701, iteration: 188816
loss: 1.0099726915359497,grad_norm: 0.9000983527589153, iteration: 188817
loss: 1.0644874572753906,grad_norm: 0.9999992202363465, iteration: 188818
loss: 1.0275648832321167,grad_norm: 0.9999992754329204, iteration: 188819
loss: 1.0097554922103882,grad_norm: 0.9980647967627395, iteration: 188820
loss: 0.9915469884872437,grad_norm: 0.9999991032143364, iteration: 188821
loss: 0.979554295539856,grad_norm: 0.9999994442032719, iteration: 188822
loss: 1.0021950006484985,grad_norm: 0.999999368371772, iteration: 188823
loss: 0.978666365146637,grad_norm: 0.8959663931474479, iteration: 188824
loss: 1.033808946609497,grad_norm: 0.9939145427604913, iteration: 188825
loss: 1.0001096725463867,grad_norm: 0.8500006339757947, iteration: 188826
loss: 0.9632585644721985,grad_norm: 0.9340765078076606, iteration: 188827
loss: 1.0163295269012451,grad_norm: 0.8956075904299707, iteration: 188828
loss: 1.0022205114364624,grad_norm: 0.8921838555953411, iteration: 188829
loss: 1.0417317152023315,grad_norm: 0.9999991143979096, iteration: 188830
loss: 1.0139085054397583,grad_norm: 0.9999996266127409, iteration: 188831
loss: 0.9684221744537354,grad_norm: 0.8068641812257189, iteration: 188832
loss: 0.9759190678596497,grad_norm: 0.999999094384558, iteration: 188833
loss: 1.0534162521362305,grad_norm: 0.9999992804574412, iteration: 188834
loss: 0.9881219267845154,grad_norm: 0.9943807642863011, iteration: 188835
loss: 1.0250880718231201,grad_norm: 0.8424818895471693, iteration: 188836
loss: 0.9883925318717957,grad_norm: 0.897997559729228, iteration: 188837
loss: 0.9885284900665283,grad_norm: 0.9999994560766003, iteration: 188838
loss: 0.996138334274292,grad_norm: 0.9999996105616608, iteration: 188839
loss: 0.9896589517593384,grad_norm: 0.9999990842427154, iteration: 188840
loss: 1.0047444105148315,grad_norm: 0.9999990905552593, iteration: 188841
loss: 1.050357699394226,grad_norm: 1.0000000683897903, iteration: 188842
loss: 1.0027014017105103,grad_norm: 0.8944824705442228, iteration: 188843
loss: 0.9900539517402649,grad_norm: 0.9999991667630539, iteration: 188844
loss: 0.9891219735145569,grad_norm: 0.9999991890348141, iteration: 188845
loss: 1.0154210329055786,grad_norm: 0.9410543699538882, iteration: 188846
loss: 0.9708070158958435,grad_norm: 0.9999992326325506, iteration: 188847
loss: 1.002439260482788,grad_norm: 0.9999993617436977, iteration: 188848
loss: 0.982679545879364,grad_norm: 0.9999990124145823, iteration: 188849
loss: 0.9960324764251709,grad_norm: 0.8787527738037505, iteration: 188850
loss: 0.9509755969047546,grad_norm: 0.999029403947768, iteration: 188851
loss: 1.002462387084961,grad_norm: 0.9999991950077248, iteration: 188852
loss: 1.0490227937698364,grad_norm: 0.9472602891608268, iteration: 188853
loss: 0.9582033157348633,grad_norm: 0.923732700817018, iteration: 188854
loss: 0.9738193154335022,grad_norm: 0.9999990774079622, iteration: 188855
loss: 1.0200552940368652,grad_norm: 0.9594458424460711, iteration: 188856
loss: 0.9736514091491699,grad_norm: 0.9259499193353263, iteration: 188857
loss: 0.9887890815734863,grad_norm: 0.9999991519644023, iteration: 188858
loss: 1.0019718408584595,grad_norm: 0.9999992537089943, iteration: 188859
loss: 0.9788450002670288,grad_norm: 0.9999993238648641, iteration: 188860
loss: 0.9896119832992554,grad_norm: 0.9999991048076917, iteration: 188861
loss: 0.9858442544937134,grad_norm: 0.8770324683565367, iteration: 188862
loss: 0.9913827180862427,grad_norm: 0.9934553946804123, iteration: 188863
loss: 0.9832196235656738,grad_norm: 0.9999995711893709, iteration: 188864
loss: 0.9684847593307495,grad_norm: 0.9999991046880838, iteration: 188865
loss: 0.999655544757843,grad_norm: 0.9999992086758823, iteration: 188866
loss: 0.9973968267440796,grad_norm: 0.8254518153704206, iteration: 188867
loss: 0.9996669292449951,grad_norm: 0.999999164161769, iteration: 188868
loss: 0.993522047996521,grad_norm: 0.9999990879112036, iteration: 188869
loss: 1.0193817615509033,grad_norm: 0.961476488664437, iteration: 188870
loss: 0.983272910118103,grad_norm: 0.9999991250313021, iteration: 188871
loss: 1.014483094215393,grad_norm: 0.9999990494214671, iteration: 188872
loss: 1.0047045946121216,grad_norm: 0.8632098501466846, iteration: 188873
loss: 1.0234932899475098,grad_norm: 0.9999991593562441, iteration: 188874
loss: 1.0206862688064575,grad_norm: 0.9589159056488458, iteration: 188875
loss: 1.0099292993545532,grad_norm: 0.999998948560341, iteration: 188876
loss: 0.9903762936592102,grad_norm: 0.9165382708908195, iteration: 188877
loss: 0.99775630235672,grad_norm: 0.8582485185528739, iteration: 188878
loss: 0.98467618227005,grad_norm: 0.9999991502753067, iteration: 188879
loss: 0.9849964380264282,grad_norm: 0.9694548754733722, iteration: 188880
loss: 0.9727228879928589,grad_norm: 0.8952172672905523, iteration: 188881
loss: 0.9929340481758118,grad_norm: 0.9999989801020299, iteration: 188882
loss: 1.0033040046691895,grad_norm: 0.7901845012679684, iteration: 188883
loss: 0.9701352119445801,grad_norm: 0.9999991314134393, iteration: 188884
loss: 0.9972419142723083,grad_norm: 0.9999992449953405, iteration: 188885
loss: 1.0474588871002197,grad_norm: 0.8505604903157823, iteration: 188886
loss: 1.0570869445800781,grad_norm: 0.9680084559935235, iteration: 188887
loss: 1.0163432359695435,grad_norm: 0.9999991048003039, iteration: 188888
loss: 1.110083818435669,grad_norm: 0.9999996443564053, iteration: 188889
loss: 0.9672330617904663,grad_norm: 0.8412960369474348, iteration: 188890
loss: 1.0379213094711304,grad_norm: 0.8475412119881012, iteration: 188891
loss: 0.9931897521018982,grad_norm: 0.9138285314442383, iteration: 188892
loss: 1.0371853113174438,grad_norm: 0.8490255205373785, iteration: 188893
loss: 1.0262224674224854,grad_norm: 0.9999991090577555, iteration: 188894
loss: 0.9765684604644775,grad_norm: 0.8714016072345723, iteration: 188895
loss: 0.9993454813957214,grad_norm: 0.97802369430354, iteration: 188896
loss: 1.0010032653808594,grad_norm: 0.9901458655316778, iteration: 188897
loss: 0.9726933836936951,grad_norm: 0.9999990676079211, iteration: 188898
loss: 1.0076698064804077,grad_norm: 0.9999991374311856, iteration: 188899
loss: 1.002454161643982,grad_norm: 0.9999991687045341, iteration: 188900
loss: 1.0280665159225464,grad_norm: 0.994117382425563, iteration: 188901
loss: 1.0503487586975098,grad_norm: 0.9841987201814669, iteration: 188902
loss: 0.950732409954071,grad_norm: 0.9999990690331301, iteration: 188903
loss: 0.9908335208892822,grad_norm: 0.9133315030239602, iteration: 188904
loss: 1.006996512413025,grad_norm: 0.9432901289282107, iteration: 188905
loss: 0.9976083040237427,grad_norm: 0.9999991028281346, iteration: 188906
loss: 1.067363977432251,grad_norm: 0.9999995836573541, iteration: 188907
loss: 1.0214557647705078,grad_norm: 0.9825215209213534, iteration: 188908
loss: 1.0036510229110718,grad_norm: 0.9999993634738603, iteration: 188909
loss: 1.0227972269058228,grad_norm: 0.9805535752797349, iteration: 188910
loss: 1.0377248525619507,grad_norm: 0.9999993476769219, iteration: 188911
loss: 1.021417260169983,grad_norm: 0.999999296244285, iteration: 188912
loss: 1.0788954496383667,grad_norm: 0.9999993836764236, iteration: 188913
loss: 0.9846488833427429,grad_norm: 0.9883867148826525, iteration: 188914
loss: 1.0249830484390259,grad_norm: 0.9999989165805435, iteration: 188915
loss: 0.9618228673934937,grad_norm: 0.9775615597914044, iteration: 188916
loss: 1.0239914655685425,grad_norm: 0.9999991881041381, iteration: 188917
loss: 0.9838005900382996,grad_norm: 0.8484103725468947, iteration: 188918
loss: 0.9839439988136292,grad_norm: 0.9117138161547581, iteration: 188919
loss: 0.9948156476020813,grad_norm: 0.8851968181312604, iteration: 188920
loss: 1.0195374488830566,grad_norm: 0.8061882145552988, iteration: 188921
loss: 0.9904913902282715,grad_norm: 0.8960553994950652, iteration: 188922
loss: 0.9734129309654236,grad_norm: 0.8713830774938209, iteration: 188923
loss: 1.0018198490142822,grad_norm: 0.8381614321715228, iteration: 188924
loss: 1.0535463094711304,grad_norm: 0.999999156986027, iteration: 188925
loss: 1.0012372732162476,grad_norm: 0.8749654732424809, iteration: 188926
loss: 1.049850344657898,grad_norm: 0.9999989101920935, iteration: 188927
loss: 1.0167382955551147,grad_norm: 0.9999991614882804, iteration: 188928
loss: 0.9964255094528198,grad_norm: 0.9924225225522906, iteration: 188929
loss: 1.0321462154388428,grad_norm: 0.9999990304590851, iteration: 188930
loss: 0.98697829246521,grad_norm: 0.9999991801790108, iteration: 188931
loss: 0.9713178873062134,grad_norm: 0.8918149739434006, iteration: 188932
loss: 0.9844110012054443,grad_norm: 0.8285139377025368, iteration: 188933
loss: 1.0050312280654907,grad_norm: 0.999999453054769, iteration: 188934
loss: 0.9908487796783447,grad_norm: 0.9999990879471083, iteration: 188935
loss: 1.0173803567886353,grad_norm: 0.8966613376742292, iteration: 188936
loss: 0.9872859120368958,grad_norm: 0.9999990840726827, iteration: 188937
loss: 0.9926167130470276,grad_norm: 0.9926948063378901, iteration: 188938
loss: 0.9983172416687012,grad_norm: 0.822734400643766, iteration: 188939
loss: 1.0021182298660278,grad_norm: 0.8995097794057191, iteration: 188940
loss: 0.9824913144111633,grad_norm: 0.9661662778082315, iteration: 188941
loss: 1.0123176574707031,grad_norm: 0.7617829041425408, iteration: 188942
loss: 1.000826120376587,grad_norm: 0.999999069542233, iteration: 188943
loss: 0.9519662261009216,grad_norm: 0.9179860825957963, iteration: 188944
loss: 1.0102839469909668,grad_norm: 0.9999990908892079, iteration: 188945
loss: 0.9666404128074646,grad_norm: 0.9501528997113993, iteration: 188946
loss: 1.0005533695220947,grad_norm: 0.8866280816482026, iteration: 188947
loss: 0.9949638843536377,grad_norm: 0.9999990599831742, iteration: 188948
loss: 0.9801857471466064,grad_norm: 0.9999994054754965, iteration: 188949
loss: 1.0192193984985352,grad_norm: 0.8249223368910873, iteration: 188950
loss: 1.0034807920455933,grad_norm: 0.9828057257099737, iteration: 188951
loss: 0.969061017036438,grad_norm: 0.9397362220087718, iteration: 188952
loss: 0.9883332848548889,grad_norm: 0.9704591838858606, iteration: 188953
loss: 1.061091423034668,grad_norm: 0.9999997492746837, iteration: 188954
loss: 1.0197577476501465,grad_norm: 0.9329577699287614, iteration: 188955
loss: 0.9813138246536255,grad_norm: 0.9031140777961221, iteration: 188956
loss: 0.9476604461669922,grad_norm: 0.9999992441861916, iteration: 188957
loss: 1.0183061361312866,grad_norm: 0.9825796438762052, iteration: 188958
loss: 1.0340087413787842,grad_norm: 0.9999995128884017, iteration: 188959
loss: 0.9784225821495056,grad_norm: 0.9692173059024628, iteration: 188960
loss: 1.0324753522872925,grad_norm: 0.999999094037719, iteration: 188961
loss: 0.970241904258728,grad_norm: 0.9719114228576213, iteration: 188962
loss: 0.9924253225326538,grad_norm: 0.8390092812816196, iteration: 188963
loss: 1.0098624229431152,grad_norm: 0.9999991936164877, iteration: 188964
loss: 1.0144052505493164,grad_norm: 0.999998970905244, iteration: 188965
loss: 0.9777651429176331,grad_norm: 0.9999991497602099, iteration: 188966
loss: 0.9927635788917542,grad_norm: 0.8931800822356812, iteration: 188967
loss: 0.9792093634605408,grad_norm: 0.8373433729022794, iteration: 188968
loss: 1.0049282312393188,grad_norm: 0.9895503958944557, iteration: 188969
loss: 0.9877805709838867,grad_norm: 0.8566165778730426, iteration: 188970
loss: 1.0153504610061646,grad_norm: 0.9999990107768076, iteration: 188971
loss: 1.0060629844665527,grad_norm: 0.9204812353304165, iteration: 188972
loss: 1.0004695653915405,grad_norm: 0.9999989546911698, iteration: 188973
loss: 0.9800444841384888,grad_norm: 0.8909642778519895, iteration: 188974
loss: 1.0731717348098755,grad_norm: 0.9999993844910577, iteration: 188975
loss: 1.0192315578460693,grad_norm: 0.8869058394704318, iteration: 188976
loss: 0.9974729418754578,grad_norm: 0.9999991426083069, iteration: 188977
loss: 1.070786476135254,grad_norm: 0.9987749030907271, iteration: 188978
loss: 0.9328967928886414,grad_norm: 0.9721084066633149, iteration: 188979
loss: 1.0538907051086426,grad_norm: 0.9999996453054483, iteration: 188980
loss: 0.9902010560035706,grad_norm: 0.9619841261739803, iteration: 188981
loss: 1.0210622549057007,grad_norm: 0.9552580904819219, iteration: 188982
loss: 0.9964709281921387,grad_norm: 0.9999990884137111, iteration: 188983
loss: 0.9988253712654114,grad_norm: 0.9335045222171301, iteration: 188984
loss: 1.0127311944961548,grad_norm: 0.9999998572193352, iteration: 188985
loss: 1.0018221139907837,grad_norm: 0.9999989560806578, iteration: 188986
loss: 0.9605545401573181,grad_norm: 0.9491107962209957, iteration: 188987
loss: 1.0394411087036133,grad_norm: 0.9719659162656035, iteration: 188988
loss: 0.9999120831489563,grad_norm: 0.9999991512154881, iteration: 188989
loss: 1.0268588066101074,grad_norm: 0.999999392912141, iteration: 188990
loss: 0.9595157504081726,grad_norm: 0.9999994478280481, iteration: 188991
loss: 1.0328114032745361,grad_norm: 0.9999991953769529, iteration: 188992
loss: 1.0233972072601318,grad_norm: 0.8979544535223271, iteration: 188993
loss: 1.035477876663208,grad_norm: 0.9999994787590247, iteration: 188994
loss: 0.9980632066726685,grad_norm: 0.9548768561352056, iteration: 188995
loss: 0.9975142478942871,grad_norm: 0.9999990360018407, iteration: 188996
loss: 0.9623174071311951,grad_norm: 0.9999990836527779, iteration: 188997
loss: 0.9755750894546509,grad_norm: 0.9569573840746545, iteration: 188998
loss: 0.9919262528419495,grad_norm: 0.9999990137503945, iteration: 188999
loss: 1.0245498418807983,grad_norm: 0.9999996811807872, iteration: 189000
loss: 1.0054465532302856,grad_norm: 0.9974969121513858, iteration: 189001
loss: 1.010063648223877,grad_norm: 0.9755251125629002, iteration: 189002
loss: 1.0021278858184814,grad_norm: 0.9999990996339342, iteration: 189003
loss: 0.9869881868362427,grad_norm: 0.8471752380887778, iteration: 189004
loss: 0.9887197017669678,grad_norm: 0.9999991695459725, iteration: 189005
loss: 1.010549545288086,grad_norm: 0.9147627147594034, iteration: 189006
loss: 1.0129191875457764,grad_norm: 0.9437317547200166, iteration: 189007
loss: 1.0050690174102783,grad_norm: 0.9754634377446173, iteration: 189008
loss: 1.046715259552002,grad_norm: 0.9999992494350962, iteration: 189009
loss: 0.9949750900268555,grad_norm: 0.8817134147771681, iteration: 189010
loss: 1.011819839477539,grad_norm: 0.9999992015439912, iteration: 189011
loss: 1.0017889738082886,grad_norm: 0.8306603222149159, iteration: 189012
loss: 1.028275728225708,grad_norm: 0.8677902494698383, iteration: 189013
loss: 1.0019044876098633,grad_norm: 0.8777268386963122, iteration: 189014
loss: 1.0060540437698364,grad_norm: 0.9999990680946875, iteration: 189015
loss: 1.0261110067367554,grad_norm: 0.9999990946387092, iteration: 189016
loss: 0.9857460856437683,grad_norm: 0.999999215396438, iteration: 189017
loss: 0.9784241914749146,grad_norm: 0.8750632204884965, iteration: 189018
loss: 1.017712950706482,grad_norm: 0.9108880621797739, iteration: 189019
loss: 0.9821712374687195,grad_norm: 0.8212717539694433, iteration: 189020
loss: 1.011150598526001,grad_norm: 0.9999992855655533, iteration: 189021
loss: 1.0093172788619995,grad_norm: 0.9999991372861886, iteration: 189022
loss: 1.0031238794326782,grad_norm: 0.9999989469556252, iteration: 189023
loss: 1.0065243244171143,grad_norm: 0.9999999236628767, iteration: 189024
loss: 1.010524034500122,grad_norm: 0.8364879985702431, iteration: 189025
loss: 1.009678602218628,grad_norm: 0.9999991384538249, iteration: 189026
loss: 0.9799554347991943,grad_norm: 0.9999991858042044, iteration: 189027
loss: 1.0267621278762817,grad_norm: 0.9999990725883885, iteration: 189028
loss: 1.028944492340088,grad_norm: 0.9999991568819354, iteration: 189029
loss: 1.0355262756347656,grad_norm: 0.9999990694892918, iteration: 189030
loss: 1.0178438425064087,grad_norm: 0.8855781911981141, iteration: 189031
loss: 0.9917284846305847,grad_norm: 0.9999991707606787, iteration: 189032
loss: 1.072615146636963,grad_norm: 0.9999990853220143, iteration: 189033
loss: 1.0048562288284302,grad_norm: 0.9999994933940116, iteration: 189034
loss: 0.989573061466217,grad_norm: 0.9949305924717202, iteration: 189035
loss: 0.9709625244140625,grad_norm: 0.9999991615479152, iteration: 189036
loss: 0.9433925747871399,grad_norm: 0.921683999145788, iteration: 189037
loss: 0.9871074557304382,grad_norm: 0.9999992085673924, iteration: 189038
loss: 0.9841690063476562,grad_norm: 0.9024101383807301, iteration: 189039
loss: 1.0166007280349731,grad_norm: 0.9292852252275846, iteration: 189040
loss: 1.0285910367965698,grad_norm: 0.9999991118766439, iteration: 189041
loss: 1.0129650831222534,grad_norm: 0.9460444816055565, iteration: 189042
loss: 0.9566109776496887,grad_norm: 0.9415328883641796, iteration: 189043
loss: 0.9613258242607117,grad_norm: 0.9999991464811682, iteration: 189044
loss: 0.9932281970977783,grad_norm: 0.9014295105633506, iteration: 189045
loss: 1.0341475009918213,grad_norm: 0.9999992319582249, iteration: 189046
loss: 0.9915972948074341,grad_norm: 0.7871939567973695, iteration: 189047
loss: 1.0306671857833862,grad_norm: 0.9999992269561692, iteration: 189048
loss: 1.021165370941162,grad_norm: 0.8507580510960497, iteration: 189049
loss: 0.9847265481948853,grad_norm: 0.9999998129271757, iteration: 189050
loss: 0.9853301644325256,grad_norm: 0.9999990881289974, iteration: 189051
loss: 1.03926682472229,grad_norm: 0.999999297100789, iteration: 189052
loss: 1.0109145641326904,grad_norm: 0.9176410332436015, iteration: 189053
loss: 0.9448923468589783,grad_norm: 0.9226586609144392, iteration: 189054
loss: 0.9676051735877991,grad_norm: 0.9483526090205479, iteration: 189055
loss: 0.9903157949447632,grad_norm: 0.9226071944968525, iteration: 189056
loss: 0.9816538691520691,grad_norm: 0.9999992227363138, iteration: 189057
loss: 1.0181900262832642,grad_norm: 0.999999089758001, iteration: 189058
loss: 1.0340490341186523,grad_norm: 0.9755733752806933, iteration: 189059
loss: 0.9907242059707642,grad_norm: 0.9599020204203412, iteration: 189060
loss: 1.02332603931427,grad_norm: 0.9604366504578025, iteration: 189061
loss: 1.024698257446289,grad_norm: 0.9719740185382238, iteration: 189062
loss: 1.0296952724456787,grad_norm: 0.8793440621091984, iteration: 189063
loss: 0.9908072352409363,grad_norm: 0.99999900439152, iteration: 189064
loss: 0.9961351752281189,grad_norm: 0.9373438455717719, iteration: 189065
loss: 0.9526882171630859,grad_norm: 0.9570225277041169, iteration: 189066
loss: 1.0277042388916016,grad_norm: 0.9999990069772349, iteration: 189067
loss: 0.9911849498748779,grad_norm: 0.9736012226211066, iteration: 189068
loss: 1.019665241241455,grad_norm: 0.9688820054602987, iteration: 189069
loss: 1.0145924091339111,grad_norm: 0.9202030076013995, iteration: 189070
loss: 1.0187475681304932,grad_norm: 0.9999991985657827, iteration: 189071
loss: 1.0400526523590088,grad_norm: 0.950007350668357, iteration: 189072
loss: 0.9998123645782471,grad_norm: 0.9678454366888025, iteration: 189073
loss: 1.0217682123184204,grad_norm: 0.9999992086601046, iteration: 189074
loss: 0.9682507514953613,grad_norm: 0.8868403518141534, iteration: 189075
loss: 1.0057445764541626,grad_norm: 0.8754163695863141, iteration: 189076
loss: 0.9927160739898682,grad_norm: 0.9999992430463968, iteration: 189077
loss: 1.0156733989715576,grad_norm: 0.8796261109012635, iteration: 189078
loss: 1.0761423110961914,grad_norm: 0.9999990691689596, iteration: 189079
loss: 0.9691864848136902,grad_norm: 0.9999990102383982, iteration: 189080
loss: 0.9984683394432068,grad_norm: 0.999999051795718, iteration: 189081
loss: 0.9987969994544983,grad_norm: 0.9038784504218457, iteration: 189082
loss: 0.9836713671684265,grad_norm: 0.9999998185077976, iteration: 189083
loss: 0.9733274579048157,grad_norm: 0.8864367683743766, iteration: 189084
loss: 1.0463759899139404,grad_norm: 0.8272523594884972, iteration: 189085
loss: 1.022262454032898,grad_norm: 0.9999992625601212, iteration: 189086
loss: 0.9858606457710266,grad_norm: 0.9999990052309268, iteration: 189087
loss: 0.9615252614021301,grad_norm: 0.8300026736941358, iteration: 189088
loss: 1.0057332515716553,grad_norm: 0.8524490855196118, iteration: 189089
loss: 0.9762184023857117,grad_norm: 0.9480640844523465, iteration: 189090
loss: 1.0072520971298218,grad_norm: 0.9999992897836015, iteration: 189091
loss: 0.9884920716285706,grad_norm: 0.8722374056199927, iteration: 189092
loss: 0.9783467650413513,grad_norm: 0.953862181264669, iteration: 189093
loss: 1.0039249658584595,grad_norm: 0.83224205564554, iteration: 189094
loss: 1.0014901161193848,grad_norm: 0.9999991452180886, iteration: 189095
loss: 0.9856995344161987,grad_norm: 0.8652529429886779, iteration: 189096
loss: 0.9591798782348633,grad_norm: 0.9505243481567642, iteration: 189097
loss: 1.0189467668533325,grad_norm: 0.9483412027468748, iteration: 189098
loss: 1.039087176322937,grad_norm: 0.9999990338098436, iteration: 189099
loss: 1.0088934898376465,grad_norm: 0.9999991691135142, iteration: 189100
loss: 0.9764720797538757,grad_norm: 0.9999990493404814, iteration: 189101
loss: 1.02590811252594,grad_norm: 0.9999996251842823, iteration: 189102
loss: 0.9744327664375305,grad_norm: 0.8059487009048051, iteration: 189103
loss: 1.0207794904708862,grad_norm: 0.9999996673161965, iteration: 189104
loss: 1.0113950967788696,grad_norm: 0.9999990030190119, iteration: 189105
loss: 1.0086472034454346,grad_norm: 0.9999991456071309, iteration: 189106
loss: 1.0164179801940918,grad_norm: 0.9750276766807837, iteration: 189107
loss: 0.9822063446044922,grad_norm: 0.9999990300326204, iteration: 189108
loss: 1.0173336267471313,grad_norm: 0.9999991799328202, iteration: 189109
loss: 0.9869669675827026,grad_norm: 0.974317398309852, iteration: 189110
loss: 0.9967020153999329,grad_norm: 0.9931968348039784, iteration: 189111
loss: 0.9777500033378601,grad_norm: 0.9999991470277471, iteration: 189112
loss: 1.006248116493225,grad_norm: 0.9999991518517377, iteration: 189113
loss: 0.9995936751365662,grad_norm: 0.9999990849448033, iteration: 189114
loss: 0.976790726184845,grad_norm: 0.9658075143717572, iteration: 189115
loss: 0.9826652407646179,grad_norm: 0.9800851596483386, iteration: 189116
loss: 1.003791332244873,grad_norm: 0.9999991473217317, iteration: 189117
loss: 1.020207405090332,grad_norm: 0.9999999230390799, iteration: 189118
loss: 1.0124166011810303,grad_norm: 0.9999989867023062, iteration: 189119
loss: 0.9848090410232544,grad_norm: 0.9999990501561732, iteration: 189120
loss: 0.9997835755348206,grad_norm: 0.795902688141182, iteration: 189121
loss: 0.9728647470474243,grad_norm: 0.9999990658725783, iteration: 189122
loss: 0.9717216491699219,grad_norm: 0.7320046404338756, iteration: 189123
loss: 0.9958975911140442,grad_norm: 0.9999991504456779, iteration: 189124
loss: 1.0565139055252075,grad_norm: 0.9999999526053547, iteration: 189125
loss: 0.9694812297821045,grad_norm: 0.9999991354224113, iteration: 189126
loss: 1.092028260231018,grad_norm: 0.877763905816633, iteration: 189127
loss: 0.9948354959487915,grad_norm: 0.9999991740048337, iteration: 189128
loss: 1.0062599182128906,grad_norm: 0.8841413678687979, iteration: 189129
loss: 1.0314033031463623,grad_norm: 0.8826228352817755, iteration: 189130
loss: 1.0076440572738647,grad_norm: 0.9999992295818032, iteration: 189131
loss: 1.0133939981460571,grad_norm: 0.9218519687667016, iteration: 189132
loss: 0.9824946522712708,grad_norm: 0.9999990470575119, iteration: 189133
loss: 1.0084422826766968,grad_norm: 0.9891602718742287, iteration: 189134
loss: 1.007839322090149,grad_norm: 0.9999991645680848, iteration: 189135
loss: 1.0336943864822388,grad_norm: 0.7782473449530328, iteration: 189136
loss: 1.0205192565917969,grad_norm: 0.9999994642913179, iteration: 189137
loss: 0.9712644815444946,grad_norm: 0.9999992145158249, iteration: 189138
loss: 1.0074883699417114,grad_norm: 0.888753751939243, iteration: 189139
loss: 0.9991546869277954,grad_norm: 0.9111477687802574, iteration: 189140
loss: 1.0094681978225708,grad_norm: 0.9999991759551342, iteration: 189141
loss: 0.9904321432113647,grad_norm: 0.8992469734723025, iteration: 189142
loss: 1.0018489360809326,grad_norm: 0.9651442627240543, iteration: 189143
loss: 1.0157232284545898,grad_norm: 0.8332293017599205, iteration: 189144
loss: 1.0411932468414307,grad_norm: 0.9999992923922634, iteration: 189145
loss: 1.0012590885162354,grad_norm: 0.9999990174777396, iteration: 189146
loss: 1.026991605758667,grad_norm: 0.9999995312895933, iteration: 189147
loss: 0.9908523559570312,grad_norm: 0.9999989890871767, iteration: 189148
loss: 0.9745925068855286,grad_norm: 0.9689474951476805, iteration: 189149
loss: 1.0457403659820557,grad_norm: 0.9541552058586182, iteration: 189150
loss: 1.0353153944015503,grad_norm: 0.8823126156373051, iteration: 189151
loss: 1.0304672718048096,grad_norm: 0.9999992235525872, iteration: 189152
loss: 0.9936631321907043,grad_norm: 0.9548966108843336, iteration: 189153
loss: 1.0231106281280518,grad_norm: 0.9999991991329326, iteration: 189154
loss: 0.992326021194458,grad_norm: 0.9999992037362231, iteration: 189155
loss: 0.9650678038597107,grad_norm: 0.9999991062246064, iteration: 189156
loss: 0.9813447594642639,grad_norm: 0.9426768775117754, iteration: 189157
loss: 1.0151861906051636,grad_norm: 0.999999719028642, iteration: 189158
loss: 0.9977412819862366,grad_norm: 0.7999413185502222, iteration: 189159
loss: 1.048966884613037,grad_norm: 0.9999990644521983, iteration: 189160
loss: 1.0118629932403564,grad_norm: 0.8890426278048222, iteration: 189161
loss: 1.0088696479797363,grad_norm: 0.8969446667412847, iteration: 189162
loss: 1.0217496156692505,grad_norm: 0.9999994026430045, iteration: 189163
loss: 0.9698837995529175,grad_norm: 0.8497482272147291, iteration: 189164
loss: 1.003345251083374,grad_norm: 0.9696429287948738, iteration: 189165
loss: 1.009540319442749,grad_norm: 0.8138346726833121, iteration: 189166
loss: 1.00489342212677,grad_norm: 0.8791823927301189, iteration: 189167
loss: 0.9941840767860413,grad_norm: 0.9064314625451771, iteration: 189168
loss: 1.015494465827942,grad_norm: 0.859115184739308, iteration: 189169
loss: 1.0170352458953857,grad_norm: 0.957692577972204, iteration: 189170
loss: 0.9984030723571777,grad_norm: 0.9999990261833833, iteration: 189171
loss: 1.0075196027755737,grad_norm: 0.9999992050092888, iteration: 189172
loss: 1.0388867855072021,grad_norm: 0.9999992250531051, iteration: 189173
loss: 0.9826889634132385,grad_norm: 0.844023819126658, iteration: 189174
loss: 1.0000687837600708,grad_norm: 0.8257432033632065, iteration: 189175
loss: 0.9942141175270081,grad_norm: 0.9999990058187955, iteration: 189176
loss: 0.9785735607147217,grad_norm: 0.8670649539946744, iteration: 189177
loss: 0.9840381741523743,grad_norm: 0.9999991664905853, iteration: 189178
loss: 1.0182520151138306,grad_norm: 0.9999992158746165, iteration: 189179
loss: 1.0030474662780762,grad_norm: 0.9514259359371078, iteration: 189180
loss: 1.025533676147461,grad_norm: 0.9999992806762723, iteration: 189181
loss: 0.9858722686767578,grad_norm: 0.8254439661859436, iteration: 189182
loss: 1.0257043838500977,grad_norm: 0.9999989947122745, iteration: 189183
loss: 0.98097825050354,grad_norm: 0.9999990739700697, iteration: 189184
loss: 0.9877866506576538,grad_norm: 0.807994333547492, iteration: 189185
loss: 1.0008368492126465,grad_norm: 0.8293334155203096, iteration: 189186
loss: 0.9933705925941467,grad_norm: 0.9779205691899767, iteration: 189187
loss: 0.9843279123306274,grad_norm: 0.8887460806258927, iteration: 189188
loss: 0.9886806607246399,grad_norm: 0.89555980079502, iteration: 189189
loss: 0.9885439872741699,grad_norm: 0.9591773300922222, iteration: 189190
loss: 1.0168033838272095,grad_norm: 0.9000474494884494, iteration: 189191
loss: 0.9887727499008179,grad_norm: 0.974191130997817, iteration: 189192
loss: 0.9944750070571899,grad_norm: 0.9999992135127223, iteration: 189193
loss: 0.9700289964675903,grad_norm: 0.9466214654792116, iteration: 189194
loss: 1.0186398029327393,grad_norm: 0.9819411619330916, iteration: 189195
loss: 0.9578230381011963,grad_norm: 0.9999989507016791, iteration: 189196
loss: 0.9904513359069824,grad_norm: 0.9999992777616848, iteration: 189197
loss: 1.009390115737915,grad_norm: 0.8210706306149613, iteration: 189198
loss: 1.0009493827819824,grad_norm: 0.9285302682833215, iteration: 189199
loss: 1.0085012912750244,grad_norm: 0.9999991367525958, iteration: 189200
loss: 0.9796099066734314,grad_norm: 0.9999990774274201, iteration: 189201
loss: 1.0157952308654785,grad_norm: 0.9999991692317419, iteration: 189202
loss: 0.9810689091682434,grad_norm: 0.8358128627927952, iteration: 189203
loss: 1.049323320388794,grad_norm: 0.9999993042258661, iteration: 189204
loss: 1.0201271772384644,grad_norm: 0.7907648894640249, iteration: 189205
loss: 1.008002758026123,grad_norm: 0.8189518652102835, iteration: 189206
loss: 1.021968126296997,grad_norm: 0.8795217339719311, iteration: 189207
loss: 1.024169921875,grad_norm: 1.0000000008397922, iteration: 189208
loss: 1.0226737260818481,grad_norm: 0.9294441198199673, iteration: 189209
loss: 1.0026376247406006,grad_norm: 0.8456275913007448, iteration: 189210
loss: 1.0235254764556885,grad_norm: 0.9999989222386995, iteration: 189211
loss: 1.0012199878692627,grad_norm: 0.9999988595786088, iteration: 189212
loss: 1.0007373094558716,grad_norm: 0.9999991771244086, iteration: 189213
loss: 1.0384235382080078,grad_norm: 0.9999991050648114, iteration: 189214
loss: 1.030992865562439,grad_norm: 0.9999994611228225, iteration: 189215
loss: 0.9674199819564819,grad_norm: 0.8945224899524336, iteration: 189216
loss: 1.02981698513031,grad_norm: 0.9137039664025565, iteration: 189217
loss: 1.0204250812530518,grad_norm: 0.9865347857666916, iteration: 189218
loss: 1.0115267038345337,grad_norm: 0.8937862505781337, iteration: 189219
loss: 1.0110303163528442,grad_norm: 0.9916925451717495, iteration: 189220
loss: 0.9650542736053467,grad_norm: 0.9999990388805038, iteration: 189221
loss: 1.002386450767517,grad_norm: 0.8670026733680009, iteration: 189222
loss: 1.0010484457015991,grad_norm: 0.9103812924954938, iteration: 189223
loss: 1.0225633382797241,grad_norm: 0.9956417866003832, iteration: 189224
loss: 0.9897971153259277,grad_norm: 0.9999990457750436, iteration: 189225
loss: 1.005959153175354,grad_norm: 0.8381714322254836, iteration: 189226
loss: 0.9895447492599487,grad_norm: 0.8889606752651035, iteration: 189227
loss: 1.0297987461090088,grad_norm: 0.7865663756118918, iteration: 189228
loss: 0.9925917387008667,grad_norm: 0.8438543579798081, iteration: 189229
loss: 1.0074255466461182,grad_norm: 0.9894598672549799, iteration: 189230
loss: 1.0087172985076904,grad_norm: 0.9603440347045255, iteration: 189231
loss: 1.0127274990081787,grad_norm: 0.9822885637512536, iteration: 189232
loss: 1.0018739700317383,grad_norm: 0.9992235740874512, iteration: 189233
loss: 1.007576584815979,grad_norm: 0.8498651574355034, iteration: 189234
loss: 0.9939366579055786,grad_norm: 0.9451463258444605, iteration: 189235
loss: 1.0828908681869507,grad_norm: 0.9999998725079262, iteration: 189236
loss: 0.9751377701759338,grad_norm: 0.9999991293980581, iteration: 189237
loss: 0.9798434972763062,grad_norm: 0.9999990131545395, iteration: 189238
loss: 0.9711984992027283,grad_norm: 0.8625333998964769, iteration: 189239
loss: 1.016647458076477,grad_norm: 0.9503888558178409, iteration: 189240
loss: 1.0011169910430908,grad_norm: 0.9999990784031862, iteration: 189241
loss: 1.026180624961853,grad_norm: 0.8515062346090226, iteration: 189242
loss: 0.9954932332038879,grad_norm: 0.9999991826474561, iteration: 189243
loss: 0.9850041270256042,grad_norm: 0.813046643825985, iteration: 189244
loss: 1.0031040906906128,grad_norm: 0.999999044805582, iteration: 189245
loss: 1.0261527299880981,grad_norm: 0.999999405569585, iteration: 189246
loss: 0.9973797798156738,grad_norm: 0.9077017631210493, iteration: 189247
loss: 0.9785113334655762,grad_norm: 0.9024710953000562, iteration: 189248
loss: 1.0304211378097534,grad_norm: 0.9999993011031145, iteration: 189249
loss: 0.9556088447570801,grad_norm: 0.999999060550777, iteration: 189250
loss: 0.9870016574859619,grad_norm: 0.790334831239794, iteration: 189251
loss: 1.0075312852859497,grad_norm: 0.9999990732485884, iteration: 189252
loss: 0.983364462852478,grad_norm: 0.9093465137275275, iteration: 189253
loss: 0.9951338171958923,grad_norm: 0.9999991526864919, iteration: 189254
loss: 1.036022424697876,grad_norm: 0.8447189210695902, iteration: 189255
loss: 0.9865832924842834,grad_norm: 0.9999992174101657, iteration: 189256
loss: 0.9901394248008728,grad_norm: 0.8340291011958559, iteration: 189257
loss: 0.9844022989273071,grad_norm: 0.9534564532325527, iteration: 189258
loss: 1.085305094718933,grad_norm: 0.929670939858523, iteration: 189259
loss: 1.0210623741149902,grad_norm: 0.7598244527649721, iteration: 189260
loss: 1.094408631324768,grad_norm: 0.967635271809286, iteration: 189261
loss: 1.0265172719955444,grad_norm: 0.918021834738062, iteration: 189262
loss: 1.0128204822540283,grad_norm: 0.8641104402927814, iteration: 189263
loss: 1.0031884908676147,grad_norm: 0.9868999580036671, iteration: 189264
loss: 1.0003479719161987,grad_norm: 0.999999174450324, iteration: 189265
loss: 1.1158866882324219,grad_norm: 0.9176160922795327, iteration: 189266
loss: 1.0144261121749878,grad_norm: 0.9350932826388191, iteration: 189267
loss: 1.0136407613754272,grad_norm: 0.9999993077033971, iteration: 189268
loss: 1.0024107694625854,grad_norm: 0.9999991368706186, iteration: 189269
loss: 0.9853264689445496,grad_norm: 0.9999991309628521, iteration: 189270
loss: 1.0805902481079102,grad_norm: 0.9999990703188824, iteration: 189271
loss: 1.037455677986145,grad_norm: 0.9999991890360944, iteration: 189272
loss: 1.059524655342102,grad_norm: 0.9999992330900043, iteration: 189273
loss: 1.013396143913269,grad_norm: 0.9576027864058989, iteration: 189274
loss: 1.0139362812042236,grad_norm: 0.9675955642299922, iteration: 189275
loss: 1.002073884010315,grad_norm: 0.9204856542033212, iteration: 189276
loss: 0.9857560992240906,grad_norm: 0.920523672064404, iteration: 189277
loss: 1.0269691944122314,grad_norm: 0.9999991611879403, iteration: 189278
loss: 0.9824512004852295,grad_norm: 0.9999992571374389, iteration: 189279
loss: 0.9929919242858887,grad_norm: 0.9466753049945263, iteration: 189280
loss: 0.9998162388801575,grad_norm: 0.7719246791468314, iteration: 189281
loss: 1.0047556161880493,grad_norm: 0.8844873839717102, iteration: 189282
loss: 1.0410090684890747,grad_norm: 0.9999993523196289, iteration: 189283
loss: 1.0053197145462036,grad_norm: 0.999999191903846, iteration: 189284
loss: 0.9756256937980652,grad_norm: 0.9999990895930382, iteration: 189285
loss: 0.988880455493927,grad_norm: 0.9999990141468117, iteration: 189286
loss: 1.0085079669952393,grad_norm: 0.9999995352751263, iteration: 189287
loss: 0.9890604019165039,grad_norm: 0.8802429960223274, iteration: 189288
loss: 1.015411615371704,grad_norm: 0.9999991237436249, iteration: 189289
loss: 1.0432208776474,grad_norm: 0.8712835680166419, iteration: 189290
loss: 0.9876428246498108,grad_norm: 0.8246023764858414, iteration: 189291
loss: 1.0407838821411133,grad_norm: 0.9999998181166738, iteration: 189292
loss: 1.0196706056594849,grad_norm: 0.8519032715801943, iteration: 189293
loss: 1.000786542892456,grad_norm: 0.9999992613908595, iteration: 189294
loss: 0.9765084385871887,grad_norm: 0.9999990941827557, iteration: 189295
loss: 1.006436824798584,grad_norm: 0.9999991744591055, iteration: 189296
loss: 1.0233064889907837,grad_norm: 0.9999993336262979, iteration: 189297
loss: 1.0301650762557983,grad_norm: 0.9999992576747884, iteration: 189298
loss: 0.9732202887535095,grad_norm: 0.9999991115393063, iteration: 189299
loss: 1.0081065893173218,grad_norm: 0.8674874013906746, iteration: 189300
loss: 0.9714368581771851,grad_norm: 0.8465839207972918, iteration: 189301
loss: 1.0136107206344604,grad_norm: 0.9204431954976493, iteration: 189302
loss: 1.0139241218566895,grad_norm: 0.8207238263066047, iteration: 189303
loss: 1.016864538192749,grad_norm: 0.9999990833658272, iteration: 189304
loss: 0.9879153966903687,grad_norm: 0.9298762320141453, iteration: 189305
loss: 1.0061235427856445,grad_norm: 0.9999991441308284, iteration: 189306
loss: 0.9810164570808411,grad_norm: 0.9661986143165239, iteration: 189307
loss: 0.9827669262886047,grad_norm: 0.9495404345508084, iteration: 189308
loss: 0.9834211468696594,grad_norm: 0.9999990733997287, iteration: 189309
loss: 1.0252354145050049,grad_norm: 0.9999990728495409, iteration: 189310
loss: 1.0045650005340576,grad_norm: 0.9399926020854678, iteration: 189311
loss: 1.0109882354736328,grad_norm: 0.9999992039671256, iteration: 189312
loss: 1.0255556106567383,grad_norm: 0.9999990922094402, iteration: 189313
loss: 0.9918352961540222,grad_norm: 0.95873152211481, iteration: 189314
loss: 0.9761031866073608,grad_norm: 0.8455287862185691, iteration: 189315
loss: 0.9878212213516235,grad_norm: 0.9999991713265578, iteration: 189316
loss: 1.0217887163162231,grad_norm: 0.9209966122722195, iteration: 189317
loss: 1.0281308889389038,grad_norm: 0.9223062708985267, iteration: 189318
loss: 1.0222417116165161,grad_norm: 0.9999992180776673, iteration: 189319
loss: 0.9821220636367798,grad_norm: 0.9999989878315889, iteration: 189320
loss: 0.987034261226654,grad_norm: 0.9999990898907783, iteration: 189321
loss: 0.9599277973175049,grad_norm: 0.9999992593024791, iteration: 189322
loss: 1.0119518041610718,grad_norm: 0.9067666101320919, iteration: 189323
loss: 0.9814298152923584,grad_norm: 0.9999990423196272, iteration: 189324
loss: 1.0008622407913208,grad_norm: 0.9999990147743512, iteration: 189325
loss: 0.9759120345115662,grad_norm: 0.9443872371326307, iteration: 189326
loss: 1.0177377462387085,grad_norm: 0.8980724911783384, iteration: 189327
loss: 1.02226722240448,grad_norm: 0.9999991304943587, iteration: 189328
loss: 1.0050734281539917,grad_norm: 0.9999993906760362, iteration: 189329
loss: 1.0528490543365479,grad_norm: 0.9999993508407473, iteration: 189330
loss: 0.9899959564208984,grad_norm: 0.9999992487509578, iteration: 189331
loss: 0.9984035491943359,grad_norm: 0.9660091481204128, iteration: 189332
loss: 1.0139076709747314,grad_norm: 0.8971238672948402, iteration: 189333
loss: 0.9873045086860657,grad_norm: 0.8760029443085002, iteration: 189334
loss: 1.011324405670166,grad_norm: 0.9169101509278758, iteration: 189335
loss: 1.0029648542404175,grad_norm: 0.8809901935923783, iteration: 189336
loss: 1.0321301221847534,grad_norm: 0.7283600268176686, iteration: 189337
loss: 1.0012437105178833,grad_norm: 0.9138947451678362, iteration: 189338
loss: 1.0103402137756348,grad_norm: 0.9532494832322498, iteration: 189339
loss: 1.0322123765945435,grad_norm: 0.9999991820287282, iteration: 189340
loss: 0.9861027598381042,grad_norm: 0.8147974065085429, iteration: 189341
loss: 0.9815393090248108,grad_norm: 0.9791486508331213, iteration: 189342
loss: 0.972827672958374,grad_norm: 0.9999991739348266, iteration: 189343
loss: 0.9579852819442749,grad_norm: 0.9300419038633537, iteration: 189344
loss: 0.9893485903739929,grad_norm: 0.9999991411872436, iteration: 189345
loss: 0.9945186972618103,grad_norm: 0.9340327015247446, iteration: 189346
loss: 1.0115315914154053,grad_norm: 0.9428627069838363, iteration: 189347
loss: 0.991627037525177,grad_norm: 0.9228036220360278, iteration: 189348
loss: 0.967277467250824,grad_norm: 0.9999991868574984, iteration: 189349
loss: 0.9691911935806274,grad_norm: 0.9283717021550156, iteration: 189350
loss: 0.9469361901283264,grad_norm: 0.9999992847179566, iteration: 189351
loss: 1.0161402225494385,grad_norm: 0.9999990305748213, iteration: 189352
loss: 1.007755994796753,grad_norm: 0.9896928637290874, iteration: 189353
loss: 1.0350760221481323,grad_norm: 0.9999991270898383, iteration: 189354
loss: 0.9536643624305725,grad_norm: 0.9830578603638658, iteration: 189355
loss: 0.9851243495941162,grad_norm: 0.9999991853237405, iteration: 189356
loss: 0.9854015111923218,grad_norm: 0.8773643805795126, iteration: 189357
loss: 1.0100595951080322,grad_norm: 0.9999991583506459, iteration: 189358
loss: 0.9832199215888977,grad_norm: 0.9126662404813954, iteration: 189359
loss: 0.9962887167930603,grad_norm: 0.9999991142559688, iteration: 189360
loss: 0.998729944229126,grad_norm: 0.9999997112619018, iteration: 189361
loss: 1.0468182563781738,grad_norm: 0.9999992106206405, iteration: 189362
loss: 1.014267086982727,grad_norm: 0.9797709753044185, iteration: 189363
loss: 1.0085020065307617,grad_norm: 0.9986540261182094, iteration: 189364
loss: 1.0176777839660645,grad_norm: 0.9016883310103043, iteration: 189365
loss: 1.0094274282455444,grad_norm: 0.9999990339037732, iteration: 189366
loss: 1.0172195434570312,grad_norm: 0.9999990269814749, iteration: 189367
loss: 1.001969337463379,grad_norm: 0.9479197062363423, iteration: 189368
loss: 0.9829109311103821,grad_norm: 0.8680988480374606, iteration: 189369
loss: 1.071093201637268,grad_norm: 0.9999991212086939, iteration: 189370
loss: 1.0023210048675537,grad_norm: 0.9999993438241711, iteration: 189371
loss: 1.0246646404266357,grad_norm: 0.7633740313200614, iteration: 189372
loss: 0.9901654720306396,grad_norm: 0.9343888371556043, iteration: 189373
loss: 1.0060596466064453,grad_norm: 0.9038409271623692, iteration: 189374
loss: 0.966949462890625,grad_norm: 0.9710472366511939, iteration: 189375
loss: 0.983485996723175,grad_norm: 0.9999990271304632, iteration: 189376
loss: 0.9896150231361389,grad_norm: 0.9422522321587143, iteration: 189377
loss: 1.015551209449768,grad_norm: 0.8632166987861262, iteration: 189378
loss: 0.9736950993537903,grad_norm: 0.9999990485271649, iteration: 189379
loss: 0.9586381316184998,grad_norm: 0.9861283148727056, iteration: 189380
loss: 0.9801446199417114,grad_norm: 0.8245085613055938, iteration: 189381
loss: 1.012790322303772,grad_norm: 0.9999997251437638, iteration: 189382
loss: 1.0048712491989136,grad_norm: 0.9464270048497528, iteration: 189383
loss: 1.0117851495742798,grad_norm: 0.9628216386303247, iteration: 189384
loss: 1.0029633045196533,grad_norm: 0.9380371918494076, iteration: 189385
loss: 1.0093441009521484,grad_norm: 0.989743913677828, iteration: 189386
loss: 1.0426216125488281,grad_norm: 0.9999991532861259, iteration: 189387
loss: 0.9946566820144653,grad_norm: 0.9943433049839734, iteration: 189388
loss: 1.0004574060440063,grad_norm: 0.8859494830722567, iteration: 189389
loss: 0.9957330822944641,grad_norm: 0.8717990577070164, iteration: 189390
loss: 1.006668210029602,grad_norm: 0.9999992409723194, iteration: 189391
loss: 0.9970572590827942,grad_norm: 0.9999990534356388, iteration: 189392
loss: 0.9691832661628723,grad_norm: 0.9550501693078011, iteration: 189393
loss: 0.984956681728363,grad_norm: 0.9176855641744939, iteration: 189394
loss: 1.0089365243911743,grad_norm: 0.9634813684077917, iteration: 189395
loss: 1.0021215677261353,grad_norm: 0.9626472687870794, iteration: 189396
loss: 0.959024965763092,grad_norm: 0.99999909968232, iteration: 189397
loss: 0.995930016040802,grad_norm: 0.9980404144992789, iteration: 189398
loss: 0.9593282341957092,grad_norm: 0.9999991659744936, iteration: 189399
loss: 0.9839010238647461,grad_norm: 0.9999990865058851, iteration: 189400
loss: 0.9818881750106812,grad_norm: 0.9999989975283758, iteration: 189401
loss: 1.0077086687088013,grad_norm: 0.813888721555496, iteration: 189402
loss: 0.9549060463905334,grad_norm: 0.999999091695098, iteration: 189403
loss: 1.0246903896331787,grad_norm: 0.9999991227586265, iteration: 189404
loss: 0.9601163268089294,grad_norm: 0.9999993332192253, iteration: 189405
loss: 0.972978949546814,grad_norm: 0.862857406758089, iteration: 189406
loss: 0.9714105725288391,grad_norm: 0.8245489353110066, iteration: 189407
loss: 1.0255649089813232,grad_norm: 0.8727001902966002, iteration: 189408
loss: 1.0043898820877075,grad_norm: 0.9275743260141305, iteration: 189409
loss: 1.0062873363494873,grad_norm: 0.8612326240663898, iteration: 189410
loss: 1.0259740352630615,grad_norm: 0.901786830789768, iteration: 189411
loss: 1.033515453338623,grad_norm: 0.9206815036938178, iteration: 189412
loss: 0.9910240173339844,grad_norm: 0.9722437275710181, iteration: 189413
loss: 0.9656811952590942,grad_norm: 0.9999990510365518, iteration: 189414
loss: 1.1381876468658447,grad_norm: 0.9999993796551792, iteration: 189415
loss: 0.9838963150978088,grad_norm: 0.8295916145443524, iteration: 189416
loss: 1.0289483070373535,grad_norm: 0.8322028477486509, iteration: 189417
loss: 1.0050503015518188,grad_norm: 0.7909289419383472, iteration: 189418
loss: 1.0524660348892212,grad_norm: 0.9259876072197647, iteration: 189419
loss: 0.9952075481414795,grad_norm: 0.9999991962587068, iteration: 189420
loss: 0.970362663269043,grad_norm: 0.9999990784532327, iteration: 189421
loss: 1.0101945400238037,grad_norm: 0.9999991546770897, iteration: 189422
loss: 1.0277615785598755,grad_norm: 0.991087681701182, iteration: 189423
loss: 0.9966732859611511,grad_norm: 0.9999990111779327, iteration: 189424
loss: 1.057826280593872,grad_norm: 0.9721688717364431, iteration: 189425
loss: 1.095721960067749,grad_norm: 0.9999994250771741, iteration: 189426
loss: 0.9923044443130493,grad_norm: 0.9701690035138026, iteration: 189427
loss: 0.9959896802902222,grad_norm: 0.999998964460298, iteration: 189428
loss: 0.9753670692443848,grad_norm: 0.7707147720125802, iteration: 189429
loss: 1.0259861946105957,grad_norm: 0.8682348438985207, iteration: 189430
loss: 1.0045037269592285,grad_norm: 0.9999990596621801, iteration: 189431
loss: 0.9672012329101562,grad_norm: 0.8592376310231274, iteration: 189432
loss: 1.0063204765319824,grad_norm: 0.8549073791847881, iteration: 189433
loss: 0.9715867042541504,grad_norm: 0.9999990036442339, iteration: 189434
loss: 1.0330255031585693,grad_norm: 0.9864101129094622, iteration: 189435
loss: 0.9905629754066467,grad_norm: 0.9999993047739069, iteration: 189436
loss: 1.0559301376342773,grad_norm: 0.999999244546887, iteration: 189437
loss: 1.0194789171218872,grad_norm: 0.9889637404460838, iteration: 189438
loss: 0.9750898480415344,grad_norm: 0.9826251572871624, iteration: 189439
loss: 1.0034157037734985,grad_norm: 0.9279966783931888, iteration: 189440
loss: 1.1196188926696777,grad_norm: 0.9999991676293395, iteration: 189441
loss: 0.985560953617096,grad_norm: 0.9999989600730173, iteration: 189442
loss: 1.0179470777511597,grad_norm: 0.8996551029784883, iteration: 189443
loss: 0.9556320309638977,grad_norm: 0.9999991749861964, iteration: 189444
loss: 0.9767801761627197,grad_norm: 0.967912557941577, iteration: 189445
loss: 0.9911423921585083,grad_norm: 0.9026953390550152, iteration: 189446
loss: 0.9629511833190918,grad_norm: 0.9615848643663645, iteration: 189447
loss: 1.034448504447937,grad_norm: 0.9999995338753239, iteration: 189448
loss: 0.975735068321228,grad_norm: 0.8958021125472477, iteration: 189449
loss: 1.0007225275039673,grad_norm: 0.999999195529619, iteration: 189450
loss: 0.9471637606620789,grad_norm: 0.9888067169587076, iteration: 189451
loss: 1.0274572372436523,grad_norm: 0.8638530943106095, iteration: 189452
loss: 1.102953314781189,grad_norm: 0.9999994879306001, iteration: 189453
loss: 1.0032824277877808,grad_norm: 0.9755322394356758, iteration: 189454
loss: 0.9912247061729431,grad_norm: 0.9999990095846617, iteration: 189455
loss: 1.0316147804260254,grad_norm: 0.9999991474062736, iteration: 189456
loss: 1.0681965351104736,grad_norm: 0.999999308894873, iteration: 189457
loss: 1.0079642534255981,grad_norm: 0.999066160080399, iteration: 189458
loss: 1.0760879516601562,grad_norm: 0.9999998555310498, iteration: 189459
loss: 1.0339347124099731,grad_norm: 0.9999991328196931, iteration: 189460
loss: 1.021629810333252,grad_norm: 0.9999992640033352, iteration: 189461
loss: 0.9733911752700806,grad_norm: 0.8029586980897361, iteration: 189462
loss: 0.9703471064567566,grad_norm: 0.9331845581055317, iteration: 189463
loss: 0.9967348575592041,grad_norm: 0.9999989808087827, iteration: 189464
loss: 0.9837303161621094,grad_norm: 0.8909391635293001, iteration: 189465
loss: 1.0026506185531616,grad_norm: 0.8656789614790773, iteration: 189466
loss: 1.023138403892517,grad_norm: 0.9999990976967507, iteration: 189467
loss: 0.9991511702537537,grad_norm: 0.9951605997997328, iteration: 189468
loss: 0.9921602010726929,grad_norm: 0.8934329295253672, iteration: 189469
loss: 0.9848672151565552,grad_norm: 0.8870305197947056, iteration: 189470
loss: 0.9862267374992371,grad_norm: 0.9999991289174118, iteration: 189471
loss: 1.020290732383728,grad_norm: 0.9999990719306808, iteration: 189472
loss: 1.003280520439148,grad_norm: 0.9999993248720741, iteration: 189473
loss: 1.0491695404052734,grad_norm: 0.9999990246293275, iteration: 189474
loss: 1.0451087951660156,grad_norm: 0.9999991762545988, iteration: 189475
loss: 0.9830588102340698,grad_norm: 0.9999991036308576, iteration: 189476
loss: 0.9702269434928894,grad_norm: 0.9999990168537177, iteration: 189477
loss: 1.0172226428985596,grad_norm: 0.9999992385523633, iteration: 189478
loss: 1.0700657367706299,grad_norm: 0.9999996024505022, iteration: 189479
loss: 1.0236319303512573,grad_norm: 0.9894058816820532, iteration: 189480
loss: 1.0483497381210327,grad_norm: 0.9999999201113922, iteration: 189481
loss: 1.0151482820510864,grad_norm: 0.9999989645691072, iteration: 189482
loss: 1.019059658050537,grad_norm: 0.9999990727963662, iteration: 189483
loss: 0.974943220615387,grad_norm: 0.9554956709783348, iteration: 189484
loss: 1.002381443977356,grad_norm: 0.9999991914130637, iteration: 189485
loss: 0.9609649777412415,grad_norm: 0.9746639852778127, iteration: 189486
loss: 0.9848079085350037,grad_norm: 0.9783832589917985, iteration: 189487
loss: 0.9907902479171753,grad_norm: 0.9999991418469308, iteration: 189488
loss: 0.9921532869338989,grad_norm: 0.9999991367472781, iteration: 189489
loss: 1.1259567737579346,grad_norm: 0.9999993780651981, iteration: 189490
loss: 1.0484873056411743,grad_norm: 0.9999992515858236, iteration: 189491
loss: 0.9830654263496399,grad_norm: 0.9769662582379974, iteration: 189492
loss: 1.0366071462631226,grad_norm: 0.8761437695560755, iteration: 189493
loss: 1.0229053497314453,grad_norm: 0.993762896975316, iteration: 189494
loss: 1.0345386266708374,grad_norm: 0.9999993879015414, iteration: 189495
loss: 1.0140438079833984,grad_norm: 0.7933191557571695, iteration: 189496
loss: 0.9944453239440918,grad_norm: 0.9999992008098316, iteration: 189497
loss: 1.025614619255066,grad_norm: 0.8657747966322141, iteration: 189498
loss: 0.990630030632019,grad_norm: 0.962073898721657, iteration: 189499
loss: 0.9945425391197205,grad_norm: 0.9999991048998819, iteration: 189500
loss: 0.9610036015510559,grad_norm: 0.8920709276268843, iteration: 189501
loss: 1.0324764251708984,grad_norm: 0.9999990398325427, iteration: 189502
loss: 1.0224487781524658,grad_norm: 0.9999991211294667, iteration: 189503
loss: 1.273491621017456,grad_norm: 0.9999993359239536, iteration: 189504
loss: 1.02010977268219,grad_norm: 0.7339096175295104, iteration: 189505
loss: 1.0324745178222656,grad_norm: 0.9999993656576972, iteration: 189506
loss: 1.0105468034744263,grad_norm: 0.9999991357738334, iteration: 189507
loss: 1.0011470317840576,grad_norm: 0.8981755770068515, iteration: 189508
loss: 1.0076560974121094,grad_norm: 0.9999991551727541, iteration: 189509
loss: 1.01222562789917,grad_norm: 0.9999990475118142, iteration: 189510
loss: 1.005372405052185,grad_norm: 0.9999989618202625, iteration: 189511
loss: 1.0432043075561523,grad_norm: 0.9999991549730755, iteration: 189512
loss: 1.082302451133728,grad_norm: 0.9999993774666903, iteration: 189513
loss: 1.0230538845062256,grad_norm: 0.9999990540281747, iteration: 189514
loss: 0.9947268962860107,grad_norm: 0.9999991098273658, iteration: 189515
loss: 1.0166040658950806,grad_norm: 0.9999991520012584, iteration: 189516
loss: 1.0192610025405884,grad_norm: 0.9999993028100793, iteration: 189517
loss: 0.9878527522087097,grad_norm: 0.9999990628077613, iteration: 189518
loss: 1.1380971670150757,grad_norm: 0.999999316891137, iteration: 189519
loss: 0.9900778532028198,grad_norm: 0.9999996417347708, iteration: 189520
loss: 0.9873883724212646,grad_norm: 0.9102366564627966, iteration: 189521
loss: 1.0225270986557007,grad_norm: 0.9999993017450289, iteration: 189522
loss: 1.0037236213684082,grad_norm: 0.9999990900375888, iteration: 189523
loss: 0.9879431128501892,grad_norm: 0.927883351974778, iteration: 189524
loss: 1.0810960531234741,grad_norm: 0.999999822652723, iteration: 189525
loss: 0.987519383430481,grad_norm: 0.9864020101859686, iteration: 189526
loss: 0.9838064908981323,grad_norm: 0.9999990139864794, iteration: 189527
loss: 1.033672571182251,grad_norm: 0.999999173616899, iteration: 189528
loss: 1.0475798845291138,grad_norm: 0.99999904408669, iteration: 189529
loss: 0.9840667247772217,grad_norm: 0.8494020645568499, iteration: 189530
loss: 0.9974883198738098,grad_norm: 0.9936674639613833, iteration: 189531
loss: 1.0118169784545898,grad_norm: 0.9042703467322065, iteration: 189532
loss: 1.0133700370788574,grad_norm: 0.9086360681197777, iteration: 189533
loss: 1.0534063577651978,grad_norm: 0.9999997666013167, iteration: 189534
loss: 0.9900150895118713,grad_norm: 0.8451562974160308, iteration: 189535
loss: 1.0648936033248901,grad_norm: 0.9999991771840966, iteration: 189536
loss: 1.006907343864441,grad_norm: 0.9999993451631574, iteration: 189537
loss: 1.0061849355697632,grad_norm: 0.9785683786545845, iteration: 189538
loss: 1.0148491859436035,grad_norm: 0.9590028851679246, iteration: 189539
loss: 1.033205509185791,grad_norm: 0.9999991727786449, iteration: 189540
loss: 1.068349838256836,grad_norm: 0.9999991630292502, iteration: 189541
loss: 0.9825528264045715,grad_norm: 0.9999989820224886, iteration: 189542
loss: 1.0304620265960693,grad_norm: 0.9072463041688785, iteration: 189543
loss: 1.03770112991333,grad_norm: 0.9999989918189485, iteration: 189544
loss: 0.9606995582580566,grad_norm: 0.9484196685870726, iteration: 189545
loss: 1.0204001665115356,grad_norm: 0.9640148387859003, iteration: 189546
loss: 1.0212278366088867,grad_norm: 0.9097478287537603, iteration: 189547
loss: 1.0029510259628296,grad_norm: 0.9664261773206533, iteration: 189548
loss: 1.0060371160507202,grad_norm: 0.9999990782184679, iteration: 189549
loss: 0.9755071997642517,grad_norm: 0.9081463335845438, iteration: 189550
loss: 0.9926122426986694,grad_norm: 0.9216581101454401, iteration: 189551
loss: 1.0089666843414307,grad_norm: 0.9999991801341822, iteration: 189552
loss: 0.9817437529563904,grad_norm: 0.999999123423072, iteration: 189553
loss: 1.0357813835144043,grad_norm: 0.8952369505645092, iteration: 189554
loss: 1.0214207172393799,grad_norm: 0.9278479019632029, iteration: 189555
loss: 0.9840537905693054,grad_norm: 0.992114238964856, iteration: 189556
loss: 0.9749728441238403,grad_norm: 0.9414791559266381, iteration: 189557
loss: 0.9977468252182007,grad_norm: 0.9067402156415699, iteration: 189558
loss: 1.009894847869873,grad_norm: 0.9703322135319146, iteration: 189559
loss: 1.0109132528305054,grad_norm: 0.9999990527072556, iteration: 189560
loss: 0.9873485565185547,grad_norm: 0.9999989276998926, iteration: 189561
loss: 1.0074137449264526,grad_norm: 0.9999990897107489, iteration: 189562
loss: 1.0179933309555054,grad_norm: 0.9698614937373597, iteration: 189563
loss: 0.9978623390197754,grad_norm: 0.9999990070961798, iteration: 189564
loss: 0.995069146156311,grad_norm: 0.827839572035939, iteration: 189565
loss: 1.0224967002868652,grad_norm: 0.9096002989802886, iteration: 189566
loss: 1.004528284072876,grad_norm: 0.9545580921040214, iteration: 189567
loss: 0.9749245047569275,grad_norm: 0.9966116244210197, iteration: 189568
loss: 1.0228900909423828,grad_norm: 0.9798653139878146, iteration: 189569
loss: 0.9973621964454651,grad_norm: 0.9990940839954826, iteration: 189570
loss: 1.0019941329956055,grad_norm: 0.9560866676740557, iteration: 189571
loss: 0.9842116832733154,grad_norm: 0.8366586860550087, iteration: 189572
loss: 1.0077160596847534,grad_norm: 0.9999992319198592, iteration: 189573
loss: 1.0318422317504883,grad_norm: 0.9350032736852951, iteration: 189574
loss: 0.9888234734535217,grad_norm: 0.9999990235776477, iteration: 189575
loss: 1.0016919374465942,grad_norm: 0.9070058399744403, iteration: 189576
loss: 0.969426691532135,grad_norm: 0.9734045816657408, iteration: 189577
loss: 1.0475112199783325,grad_norm: 0.9999990848621234, iteration: 189578
loss: 1.0179740190505981,grad_norm: 0.9566982338213713, iteration: 189579
loss: 0.9740266799926758,grad_norm: 0.9999991003549769, iteration: 189580
loss: 1.0244877338409424,grad_norm: 0.9298105109823696, iteration: 189581
loss: 0.9908403158187866,grad_norm: 0.832949430494036, iteration: 189582
loss: 1.0040332078933716,grad_norm: 0.9573989016355507, iteration: 189583
loss: 0.9972731471061707,grad_norm: 0.9052215653186327, iteration: 189584
loss: 1.0169578790664673,grad_norm: 0.9306757164891227, iteration: 189585
loss: 0.9965524077415466,grad_norm: 0.8484458001828318, iteration: 189586
loss: 1.0346893072128296,grad_norm: 0.8920294602487507, iteration: 189587
loss: 0.9895216226577759,grad_norm: 0.9999990380289826, iteration: 189588
loss: 1.0208823680877686,grad_norm: 0.9025511084909043, iteration: 189589
loss: 0.990397036075592,grad_norm: 0.8681341631575298, iteration: 189590
loss: 1.0166528224945068,grad_norm: 0.9180126218902419, iteration: 189591
loss: 1.0161844491958618,grad_norm: 0.9214242284248224, iteration: 189592
loss: 1.0227227210998535,grad_norm: 0.8796936882794307, iteration: 189593
loss: 0.9871516823768616,grad_norm: 0.8716352341444888, iteration: 189594
loss: 1.0029629468917847,grad_norm: 0.999999076671473, iteration: 189595
loss: 0.9927922487258911,grad_norm: 0.9999991182167215, iteration: 189596
loss: 1.0063005685806274,grad_norm: 0.9999991260128376, iteration: 189597
loss: 0.9919273853302002,grad_norm: 0.9609339086371675, iteration: 189598
loss: 1.0144027471542358,grad_norm: 0.9128195503138632, iteration: 189599
loss: 1.005671501159668,grad_norm: 0.9999991178274514, iteration: 189600
loss: 0.9878370761871338,grad_norm: 0.909861064091144, iteration: 189601
loss: 1.0280643701553345,grad_norm: 0.9999992036327249, iteration: 189602
loss: 1.0010534524917603,grad_norm: 0.9852159548753505, iteration: 189603
loss: 0.9730212092399597,grad_norm: 0.9999991988519662, iteration: 189604
loss: 1.0252915620803833,grad_norm: 0.9999991779615777, iteration: 189605
loss: 1.0350264310836792,grad_norm: 0.9999990775075308, iteration: 189606
loss: 1.0020653009414673,grad_norm: 0.9084218168141682, iteration: 189607
loss: 0.9554192423820496,grad_norm: 0.9089467919135851, iteration: 189608
loss: 1.0214686393737793,grad_norm: 0.999999857955946, iteration: 189609
loss: 0.99652099609375,grad_norm: 0.9999990813904984, iteration: 189610
loss: 1.0159624814987183,grad_norm: 0.9733391441123559, iteration: 189611
loss: 0.955089807510376,grad_norm: 0.9767812504835651, iteration: 189612
loss: 0.9929578304290771,grad_norm: 0.981877251095976, iteration: 189613
loss: 1.0156878232955933,grad_norm: 0.8510136982723728, iteration: 189614
loss: 1.0158616304397583,grad_norm: 0.845508224971773, iteration: 189615
loss: 1.0271005630493164,grad_norm: 0.8741349711877484, iteration: 189616
loss: 1.0169062614440918,grad_norm: 0.9999990725525909, iteration: 189617
loss: 0.9923568367958069,grad_norm: 0.8888010046336979, iteration: 189618
loss: 0.994352400302887,grad_norm: 0.959865220088546, iteration: 189619
loss: 1.004111409187317,grad_norm: 0.9052903861902751, iteration: 189620
loss: 0.968614935874939,grad_norm: 0.949870073887871, iteration: 189621
loss: 0.9925664067268372,grad_norm: 0.7825476591920056, iteration: 189622
loss: 1.035739779472351,grad_norm: 0.999999279445221, iteration: 189623
loss: 1.001344919204712,grad_norm: 0.8589729665022654, iteration: 189624
loss: 1.0033522844314575,grad_norm: 0.9616153077295895, iteration: 189625
loss: 1.0223498344421387,grad_norm: 0.9787468830769382, iteration: 189626
loss: 0.9957024455070496,grad_norm: 0.9999991542344973, iteration: 189627
loss: 1.0333951711654663,grad_norm: 0.8956779610853691, iteration: 189628
loss: 1.0038479566574097,grad_norm: 0.9207855596131457, iteration: 189629
loss: 0.9860205054283142,grad_norm: 0.9306146776604398, iteration: 189630
loss: 1.0096641778945923,grad_norm: 0.9999990879283914, iteration: 189631
loss: 0.9994421005249023,grad_norm: 0.6437711020048926, iteration: 189632
loss: 1.0097941160202026,grad_norm: 0.8759162937793602, iteration: 189633
loss: 0.9952593445777893,grad_norm: 0.9999991623968417, iteration: 189634
loss: 1.0215753316879272,grad_norm: 0.9999991268368619, iteration: 189635
loss: 0.9902527928352356,grad_norm: 0.9999990403424175, iteration: 189636
loss: 0.9898374676704407,grad_norm: 0.9903310752745271, iteration: 189637
loss: 0.9859448075294495,grad_norm: 0.8225818074358451, iteration: 189638
loss: 0.9471487402915955,grad_norm: 0.9942823842037768, iteration: 189639
loss: 1.0424312353134155,grad_norm: 0.8401079310355836, iteration: 189640
loss: 1.1008189916610718,grad_norm: 0.9999996035995465, iteration: 189641
loss: 1.0185084342956543,grad_norm: 0.9179873339125059, iteration: 189642
loss: 1.0028250217437744,grad_norm: 0.9640254122665793, iteration: 189643
loss: 0.9713459014892578,grad_norm: 0.9573735319234273, iteration: 189644
loss: 1.0046250820159912,grad_norm: 0.8673205883156854, iteration: 189645
loss: 1.0982165336608887,grad_norm: 0.9999998929617163, iteration: 189646
loss: 1.0075806379318237,grad_norm: 0.9999991600345663, iteration: 189647
loss: 1.0110201835632324,grad_norm: 0.8245371823942281, iteration: 189648
loss: 1.0078034400939941,grad_norm: 0.9999991349937508, iteration: 189649
loss: 0.9972648620605469,grad_norm: 0.8907729326586782, iteration: 189650
loss: 1.0061579942703247,grad_norm: 0.8771160861706963, iteration: 189651
loss: 0.9882630705833435,grad_norm: 0.999999049523158, iteration: 189652
loss: 0.9990708231925964,grad_norm: 0.9599187328041252, iteration: 189653
loss: 1.0006623268127441,grad_norm: 0.9582477259597026, iteration: 189654
loss: 1.0171293020248413,grad_norm: 0.9817691059911843, iteration: 189655
loss: 0.9752696752548218,grad_norm: 0.9999990550261307, iteration: 189656
loss: 1.0339068174362183,grad_norm: 0.9999992133203651, iteration: 189657
loss: 0.9824612140655518,grad_norm: 0.9999991624872913, iteration: 189658
loss: 0.948628306388855,grad_norm: 0.9715331912693751, iteration: 189659
loss: 1.0241928100585938,grad_norm: 0.9999990499402284, iteration: 189660
loss: 0.9992404580116272,grad_norm: 0.8605130170068913, iteration: 189661
loss: 0.9832687377929688,grad_norm: 0.9999990215033084, iteration: 189662
loss: 0.9878528714179993,grad_norm: 0.9280025100634283, iteration: 189663
loss: 1.015707015991211,grad_norm: 0.9701415003200575, iteration: 189664
loss: 1.0273009538650513,grad_norm: 0.9999989823518183, iteration: 189665
loss: 1.0054879188537598,grad_norm: 0.9065616932065211, iteration: 189666
loss: 1.0028584003448486,grad_norm: 0.9999991387373914, iteration: 189667
loss: 0.9772660732269287,grad_norm: 0.9447849482769392, iteration: 189668
loss: 0.9945671558380127,grad_norm: 0.964377449634788, iteration: 189669
loss: 0.9954647421836853,grad_norm: 0.9999991102634082, iteration: 189670
loss: 1.0104594230651855,grad_norm: 0.9999990575978561, iteration: 189671
loss: 0.9844290018081665,grad_norm: 0.9303895952546392, iteration: 189672
loss: 1.0322341918945312,grad_norm: 0.999999119563175, iteration: 189673
loss: 1.0066319704055786,grad_norm: 0.8836409385611176, iteration: 189674
loss: 1.0229167938232422,grad_norm: 0.8969535004063474, iteration: 189675
loss: 0.9719845056533813,grad_norm: 0.9305887839555087, iteration: 189676
loss: 1.002530813217163,grad_norm: 0.8310278031654724, iteration: 189677
loss: 1.044043779373169,grad_norm: 0.9999996382946882, iteration: 189678
loss: 1.0302410125732422,grad_norm: 0.9999992103384454, iteration: 189679
loss: 0.9821383357048035,grad_norm: 0.96417775742034, iteration: 189680
loss: 0.9762317538261414,grad_norm: 0.9999991541502455, iteration: 189681
loss: 0.9930428266525269,grad_norm: 0.999999114849857, iteration: 189682
loss: 1.0890066623687744,grad_norm: 0.8918411776303247, iteration: 189683
loss: 1.0142170190811157,grad_norm: 0.9999992722401676, iteration: 189684
loss: 0.9844047427177429,grad_norm: 0.8433063902248646, iteration: 189685
loss: 1.0026696920394897,grad_norm: 0.8775157160956392, iteration: 189686
loss: 0.9845364093780518,grad_norm: 0.9999991312342849, iteration: 189687
loss: 1.013914942741394,grad_norm: 0.9695766497518962, iteration: 189688
loss: 1.0165547132492065,grad_norm: 0.9962510571976516, iteration: 189689
loss: 1.0248141288757324,grad_norm: 0.9041704907687959, iteration: 189690
loss: 0.9960172176361084,grad_norm: 0.9207471615026697, iteration: 189691
loss: 1.0469127893447876,grad_norm: 0.8545789054882806, iteration: 189692
loss: 1.0214152336120605,grad_norm: 0.9999991511864436, iteration: 189693
loss: 0.9718269109725952,grad_norm: 0.9999991222216085, iteration: 189694
loss: 0.9720741510391235,grad_norm: 0.8421585229823398, iteration: 189695
loss: 1.011195421218872,grad_norm: 0.9655370731066352, iteration: 189696
loss: 1.0190281867980957,grad_norm: 0.9285025196332733, iteration: 189697
loss: 0.9940182566642761,grad_norm: 0.997674727918464, iteration: 189698
loss: 1.0060690641403198,grad_norm: 0.9977355558508422, iteration: 189699
loss: 0.9849813580513,grad_norm: 0.8634196418217399, iteration: 189700
loss: 0.9983915686607361,grad_norm: 0.9820258243151373, iteration: 189701
loss: 1.0080726146697998,grad_norm: 0.9698770858704344, iteration: 189702
loss: 0.9857251644134521,grad_norm: 0.7259816968242938, iteration: 189703
loss: 1.035977840423584,grad_norm: 0.999999048330499, iteration: 189704
loss: 1.0076371431350708,grad_norm: 0.8258333494768848, iteration: 189705
loss: 0.9887545108795166,grad_norm: 0.8355825378784305, iteration: 189706
loss: 0.9992634654045105,grad_norm: 0.9247864017540474, iteration: 189707
loss: 0.9964686036109924,grad_norm: 0.9882299058396193, iteration: 189708
loss: 1.027174711227417,grad_norm: 0.988350953393363, iteration: 189709
loss: 1.010467767715454,grad_norm: 0.9999995093008399, iteration: 189710
loss: 0.9872804284095764,grad_norm: 0.9999991296799002, iteration: 189711
loss: 1.0176455974578857,grad_norm: 0.970816156438941, iteration: 189712
loss: 1.0139613151550293,grad_norm: 0.8442822393237533, iteration: 189713
loss: 1.00498628616333,grad_norm: 0.9999989801780251, iteration: 189714
loss: 0.9634287357330322,grad_norm: 0.8891170469585186, iteration: 189715
loss: 1.0018908977508545,grad_norm: 0.944654187283608, iteration: 189716
loss: 1.0142104625701904,grad_norm: 0.8808305540997334, iteration: 189717
loss: 0.9945720434188843,grad_norm: 0.9699928447685388, iteration: 189718
loss: 0.9944865703582764,grad_norm: 0.9999990642422426, iteration: 189719
loss: 0.9702850580215454,grad_norm: 0.8515088453933916, iteration: 189720
loss: 0.9830625653266907,grad_norm: 0.8665136390806731, iteration: 189721
loss: 1.0183441638946533,grad_norm: 0.9925927224528879, iteration: 189722
loss: 0.9824426174163818,grad_norm: 0.9999990297995386, iteration: 189723
loss: 1.0314137935638428,grad_norm: 0.999999085630768, iteration: 189724
loss: 1.0019601583480835,grad_norm: 0.9930807433092829, iteration: 189725
loss: 1.0123916864395142,grad_norm: 0.988341430843933, iteration: 189726
loss: 0.9680346846580505,grad_norm: 0.9911130397917645, iteration: 189727
loss: 1.0010662078857422,grad_norm: 0.9999990747449863, iteration: 189728
loss: 0.996604323387146,grad_norm: 0.9999991079619925, iteration: 189729
loss: 0.9870420098304749,grad_norm: 0.9734760281826736, iteration: 189730
loss: 0.9923720359802246,grad_norm: 0.919970669582213, iteration: 189731
loss: 1.0152719020843506,grad_norm: 0.8671768478987038, iteration: 189732
loss: 0.9917004704475403,grad_norm: 0.9999991406701598, iteration: 189733
loss: 0.9952216744422913,grad_norm: 0.999999070754969, iteration: 189734
loss: 1.0182899236679077,grad_norm: 0.9095412235684561, iteration: 189735
loss: 0.990719735622406,grad_norm: 0.9049871540944959, iteration: 189736
loss: 1.0042606592178345,grad_norm: 0.9475096708472704, iteration: 189737
loss: 1.0222407579421997,grad_norm: 0.9754694045953421, iteration: 189738
loss: 1.0060739517211914,grad_norm: 0.9999991718601566, iteration: 189739
loss: 0.9963326454162598,grad_norm: 0.9755659535819996, iteration: 189740
loss: 1.0027605295181274,grad_norm: 0.9999992153895217, iteration: 189741
loss: 0.99908447265625,grad_norm: 0.9999990649744716, iteration: 189742
loss: 1.0064929723739624,grad_norm: 0.9256249789476374, iteration: 189743
loss: 0.9979360699653625,grad_norm: 0.9901702601767794, iteration: 189744
loss: 1.0181832313537598,grad_norm: 0.8554136019627001, iteration: 189745
loss: 0.9713311195373535,grad_norm: 0.9578918695866462, iteration: 189746
loss: 1.0497844219207764,grad_norm: 0.8240053447430953, iteration: 189747
loss: 1.0161203145980835,grad_norm: 0.9791151346930647, iteration: 189748
loss: 0.9961509108543396,grad_norm: 0.8833368403136528, iteration: 189749
loss: 1.0118753910064697,grad_norm: 0.855438526104584, iteration: 189750
loss: 1.0507464408874512,grad_norm: 0.9999989719645247, iteration: 189751
loss: 1.058495044708252,grad_norm: 0.9488283599494625, iteration: 189752
loss: 0.9724215865135193,grad_norm: 0.8534715986441976, iteration: 189753
loss: 0.9525639414787292,grad_norm: 0.9710189747110474, iteration: 189754
loss: 0.9997791647911072,grad_norm: 0.8344459810234313, iteration: 189755
loss: 1.0338315963745117,grad_norm: 0.9766008545139617, iteration: 189756
loss: 0.987034022808075,grad_norm: 0.9886877896928583, iteration: 189757
loss: 1.0530811548233032,grad_norm: 0.8949370851079432, iteration: 189758
loss: 0.9678006768226624,grad_norm: 0.9999992108869233, iteration: 189759
loss: 0.9921731948852539,grad_norm: 0.9061578647394515, iteration: 189760
loss: 1.0045344829559326,grad_norm: 0.9999991892548704, iteration: 189761
loss: 1.0018043518066406,grad_norm: 0.9999991810004333, iteration: 189762
loss: 1.026087760925293,grad_norm: 0.999998948805753, iteration: 189763
loss: 1.0013039112091064,grad_norm: 0.8987989190542474, iteration: 189764
loss: 0.9733988046646118,grad_norm: 0.9244916816461358, iteration: 189765
loss: 1.011470913887024,grad_norm: 0.9645992953915247, iteration: 189766
loss: 0.977993905544281,grad_norm: 0.9999992870556761, iteration: 189767
loss: 1.0113677978515625,grad_norm: 0.9999990218292271, iteration: 189768
loss: 0.9656276702880859,grad_norm: 0.9999992077199201, iteration: 189769
loss: 0.986661970615387,grad_norm: 0.9975532097556196, iteration: 189770
loss: 0.9780895709991455,grad_norm: 0.9756684261440742, iteration: 189771
loss: 1.0017598867416382,grad_norm: 0.7793905489234951, iteration: 189772
loss: 0.9850310683250427,grad_norm: 0.896608560518655, iteration: 189773
loss: 1.0024315118789673,grad_norm: 0.855883558666288, iteration: 189774
loss: 1.0243829488754272,grad_norm: 0.9999992154169693, iteration: 189775
loss: 0.9991135001182556,grad_norm: 0.8675464656601999, iteration: 189776
loss: 0.9585725665092468,grad_norm: 0.9646861247840094, iteration: 189777
loss: 1.0092583894729614,grad_norm: 0.9999991354746252, iteration: 189778
loss: 0.99640953540802,grad_norm: 0.9033427620682445, iteration: 189779
loss: 0.9986451864242554,grad_norm: 0.9749198932780888, iteration: 189780
loss: 0.9960816502571106,grad_norm: 0.9999991400754675, iteration: 189781
loss: 1.1336642503738403,grad_norm: 0.9999994049530666, iteration: 189782
loss: 1.0095881223678589,grad_norm: 0.9999992414012999, iteration: 189783
loss: 0.9917558431625366,grad_norm: 0.8707188956041594, iteration: 189784
loss: 0.9917211532592773,grad_norm: 0.9800095069889381, iteration: 189785
loss: 0.9792299270629883,grad_norm: 0.9999992619547151, iteration: 189786
loss: 0.9619262218475342,grad_norm: 0.9999991377887802, iteration: 189787
loss: 1.0011165142059326,grad_norm: 0.9999991745150251, iteration: 189788
loss: 0.9948218464851379,grad_norm: 0.9999991347664903, iteration: 189789
loss: 1.0299427509307861,grad_norm: 0.9999989692612985, iteration: 189790
loss: 1.01799738407135,grad_norm: 0.9999989785007467, iteration: 189791
loss: 1.1701900959014893,grad_norm: 0.999999541322019, iteration: 189792
loss: 0.9689802527427673,grad_norm: 0.9706997641635304, iteration: 189793
loss: 1.0367611646652222,grad_norm: 0.8441936513952306, iteration: 189794
loss: 1.0150196552276611,grad_norm: 0.9999990693773562, iteration: 189795
loss: 1.0214183330535889,grad_norm: 0.9999995791955999, iteration: 189796
loss: 0.9884474873542786,grad_norm: 0.8447594078824784, iteration: 189797
loss: 1.027533769607544,grad_norm: 0.9999991548072469, iteration: 189798
loss: 1.0374029874801636,grad_norm: 0.9999998427336204, iteration: 189799
loss: 1.017859935760498,grad_norm: 0.8182685200573924, iteration: 189800
loss: 1.0080664157867432,grad_norm: 0.8051885864894514, iteration: 189801
loss: 1.0016251802444458,grad_norm: 0.919348576273669, iteration: 189802
loss: 0.9911883473396301,grad_norm: 0.9999990903358973, iteration: 189803
loss: 1.0132776498794556,grad_norm: 0.8526702843219881, iteration: 189804
loss: 1.0332142114639282,grad_norm: 0.8474196594127332, iteration: 189805
loss: 0.9948410987854004,grad_norm: 0.9014678860870763, iteration: 189806
loss: 0.9763914942741394,grad_norm: 0.9211193905452333, iteration: 189807
loss: 1.014045000076294,grad_norm: 0.9276848102956131, iteration: 189808
loss: 1.0311771631240845,grad_norm: 0.8083516462967866, iteration: 189809
loss: 1.0230766534805298,grad_norm: 0.9169953838393506, iteration: 189810
loss: 0.9913991093635559,grad_norm: 0.9999990843760745, iteration: 189811
loss: 1.0028377771377563,grad_norm: 0.878269628372604, iteration: 189812
loss: 1.1440880298614502,grad_norm: 0.9999998208454087, iteration: 189813
loss: 0.9978097677230835,grad_norm: 0.8841977038881851, iteration: 189814
loss: 1.0002262592315674,grad_norm: 0.808834396137829, iteration: 189815
loss: 1.0564188957214355,grad_norm: 0.9999991969882868, iteration: 189816
loss: 1.0400339365005493,grad_norm: 0.9252405875012519, iteration: 189817
loss: 1.0147982835769653,grad_norm: 0.9999991666058117, iteration: 189818
loss: 0.9638891220092773,grad_norm: 0.8792081829521223, iteration: 189819
loss: 1.0066442489624023,grad_norm: 0.9999991114306069, iteration: 189820
loss: 0.9901543855667114,grad_norm: 0.9999991244791542, iteration: 189821
loss: 1.0303322076797485,grad_norm: 0.9898878443721291, iteration: 189822
loss: 1.0096278190612793,grad_norm: 0.9999993103933063, iteration: 189823
loss: 0.9745733141899109,grad_norm: 0.9865505371692601, iteration: 189824
loss: 1.0216342210769653,grad_norm: 0.9999998029794313, iteration: 189825
loss: 0.9503623843193054,grad_norm: 0.9192679040396624, iteration: 189826
loss: 0.9861913323402405,grad_norm: 0.9955968103334399, iteration: 189827
loss: 1.0004674196243286,grad_norm: 0.9999990267549501, iteration: 189828
loss: 1.0151766538619995,grad_norm: 0.9999991842894802, iteration: 189829
loss: 0.9993289709091187,grad_norm: 0.8515753248096809, iteration: 189830
loss: 0.9978060722351074,grad_norm: 0.9944615354096189, iteration: 189831
loss: 1.0065877437591553,grad_norm: 0.9999988874966511, iteration: 189832
loss: 1.0236252546310425,grad_norm: 0.9999992149685297, iteration: 189833
loss: 1.0650466680526733,grad_norm: 0.9250826059358404, iteration: 189834
loss: 0.9698895812034607,grad_norm: 0.852245167065632, iteration: 189835
loss: 1.1311143636703491,grad_norm: 0.999999827408241, iteration: 189836
loss: 1.000239610671997,grad_norm: 0.9999989739607678, iteration: 189837
loss: 0.975495457649231,grad_norm: 0.9149862126250945, iteration: 189838
loss: 1.0722543001174927,grad_norm: 0.9999991236451424, iteration: 189839
loss: 1.0158259868621826,grad_norm: 0.9999998707307484, iteration: 189840
loss: 1.0028319358825684,grad_norm: 0.9999989784331575, iteration: 189841
loss: 1.268284559249878,grad_norm: 1.0000000460695688, iteration: 189842
loss: 0.988888680934906,grad_norm: 0.9999989690144606, iteration: 189843
loss: 1.0095081329345703,grad_norm: 0.915045099243262, iteration: 189844
loss: 1.1548594236373901,grad_norm: 0.9999990580339416, iteration: 189845
loss: 1.0365452766418457,grad_norm: 0.9999991140193555, iteration: 189846
loss: 1.0092759132385254,grad_norm: 0.9318091703748635, iteration: 189847
loss: 0.9872635006904602,grad_norm: 0.999999539140421, iteration: 189848
loss: 0.9914606213569641,grad_norm: 0.9999989854796043, iteration: 189849
loss: 1.0208616256713867,grad_norm: 0.9999998394111516, iteration: 189850
loss: 1.0704354047775269,grad_norm: 0.9999994685683622, iteration: 189851
loss: 0.9832658171653748,grad_norm: 0.9999990758247941, iteration: 189852
loss: 0.9962394833564758,grad_norm: 0.999999289402669, iteration: 189853
loss: 0.9985949993133545,grad_norm: 0.8324677241262688, iteration: 189854
loss: 0.9986038208007812,grad_norm: 0.9999993091665769, iteration: 189855
loss: 1.0229136943817139,grad_norm: 0.7532671009285469, iteration: 189856
loss: 1.1093602180480957,grad_norm: 0.9999999979972355, iteration: 189857
loss: 1.1937742233276367,grad_norm: 0.9999993129939737, iteration: 189858
loss: 0.990869402885437,grad_norm: 0.8703911421612838, iteration: 189859
loss: 1.0975611209869385,grad_norm: 0.9999990565011314, iteration: 189860
loss: 0.9965483546257019,grad_norm: 0.9999991338257547, iteration: 189861
loss: 1.0548386573791504,grad_norm: 0.9381961303354516, iteration: 189862
loss: 1.0807008743286133,grad_norm: 0.7821074872134907, iteration: 189863
loss: 1.0345309972763062,grad_norm: 0.9999990549493573, iteration: 189864
loss: 1.0433573722839355,grad_norm: 0.99999983635874, iteration: 189865
loss: 1.0006513595581055,grad_norm: 0.9140859957893709, iteration: 189866
loss: 1.030057668685913,grad_norm: 0.9713067397581112, iteration: 189867
loss: 0.9880113005638123,grad_norm: 0.9999989819133, iteration: 189868
loss: 1.061772346496582,grad_norm: 0.8696698825679264, iteration: 189869
loss: 0.9951712489128113,grad_norm: 0.9999991157884844, iteration: 189870
loss: 1.0372107028961182,grad_norm: 0.9125444093566902, iteration: 189871
loss: 1.0020126104354858,grad_norm: 0.8696011063005483, iteration: 189872
loss: 1.0128066539764404,grad_norm: 0.9999992380818786, iteration: 189873
loss: 1.0277756452560425,grad_norm: 0.9005088060689984, iteration: 189874
loss: 1.0087968111038208,grad_norm: 0.8347761877055082, iteration: 189875
loss: 1.0114567279815674,grad_norm: 0.9862890589458057, iteration: 189876
loss: 1.0168529748916626,grad_norm: 0.999999176957631, iteration: 189877
loss: 1.0265889167785645,grad_norm: 0.999999101457237, iteration: 189878
loss: 1.012984275817871,grad_norm: 0.9999992751628328, iteration: 189879
loss: 0.9792338609695435,grad_norm: 0.8832527019527111, iteration: 189880
loss: 1.029694676399231,grad_norm: 0.9999991123188822, iteration: 189881
loss: 1.0576368570327759,grad_norm: 0.9999996220502991, iteration: 189882
loss: 1.010841727256775,grad_norm: 0.9976660199332655, iteration: 189883
loss: 0.9612838625907898,grad_norm: 0.9324416420932791, iteration: 189884
loss: 0.9801514744758606,grad_norm: 0.999999208400082, iteration: 189885
loss: 1.001815676689148,grad_norm: 0.8418038724648295, iteration: 189886
loss: 0.9928991198539734,grad_norm: 0.87996557108679, iteration: 189887
loss: 0.9969676733016968,grad_norm: 0.9138414343752849, iteration: 189888
loss: 0.9970848560333252,grad_norm: 0.8840877308654027, iteration: 189889
loss: 1.0000349283218384,grad_norm: 0.9999990855417169, iteration: 189890
loss: 1.1884647607803345,grad_norm: 0.9999997107144187, iteration: 189891
loss: 1.0247464179992676,grad_norm: 0.999999297625277, iteration: 189892
loss: 1.0378928184509277,grad_norm: 1.0000000144664527, iteration: 189893
loss: 0.9784247279167175,grad_norm: 0.9999990545563271, iteration: 189894
loss: 1.0001397132873535,grad_norm: 0.9999989922270985, iteration: 189895
loss: 1.030583381652832,grad_norm: 0.937404674474154, iteration: 189896
loss: 1.0154832601547241,grad_norm: 0.9257173420951798, iteration: 189897
loss: 0.983472466468811,grad_norm: 0.9999991367794704, iteration: 189898
loss: 1.0085606575012207,grad_norm: 0.9999991727013289, iteration: 189899
loss: 1.0821654796600342,grad_norm: 0.9999999224496293, iteration: 189900
loss: 0.975342869758606,grad_norm: 0.9588500730936367, iteration: 189901
loss: 0.9634357690811157,grad_norm: 0.8324341955051922, iteration: 189902
loss: 0.9994807243347168,grad_norm: 0.9999993950442367, iteration: 189903
loss: 0.993000328540802,grad_norm: 0.9999990528677091, iteration: 189904
loss: 1.0044186115264893,grad_norm: 0.8224711553274551, iteration: 189905
loss: 0.9992110133171082,grad_norm: 0.999999194431643, iteration: 189906
loss: 1.0060973167419434,grad_norm: 0.9999990205664921, iteration: 189907
loss: 1.0475634336471558,grad_norm: 0.9999993502526641, iteration: 189908
loss: 0.977591872215271,grad_norm: 0.7994985673229924, iteration: 189909
loss: 0.9835494756698608,grad_norm: 0.999999184888802, iteration: 189910
loss: 1.0203355550765991,grad_norm: 0.9999991082579925, iteration: 189911
loss: 0.9851677417755127,grad_norm: 0.8598205711878685, iteration: 189912
loss: 0.9858918786048889,grad_norm: 0.9794023401306112, iteration: 189913
loss: 1.0309455394744873,grad_norm: 0.9999990631120008, iteration: 189914
loss: 1.0233135223388672,grad_norm: 0.9999993517535767, iteration: 189915
loss: 1.04297935962677,grad_norm: 0.9999997217002632, iteration: 189916
loss: 0.9553792476654053,grad_norm: 0.8991723306521693, iteration: 189917
loss: 1.0087549686431885,grad_norm: 0.99999917545076, iteration: 189918
loss: 0.9684922099113464,grad_norm: 0.999999270716555, iteration: 189919
loss: 1.0299394130706787,grad_norm: 0.9249376075128255, iteration: 189920
loss: 1.001159906387329,grad_norm: 0.999999552363658, iteration: 189921
loss: 1.0886743068695068,grad_norm: 0.9999995698303049, iteration: 189922
loss: 1.0452032089233398,grad_norm: 0.999999063250163, iteration: 189923
loss: 1.0139820575714111,grad_norm: 0.999999038184379, iteration: 189924
loss: 1.0446211099624634,grad_norm: 0.9999993197901087, iteration: 189925
loss: 0.9936731457710266,grad_norm: 0.9182616569651806, iteration: 189926
loss: 1.0178470611572266,grad_norm: 0.9373190644500071, iteration: 189927
loss: 1.0476622581481934,grad_norm: 0.9099878471135185, iteration: 189928
loss: 1.0551478862762451,grad_norm: 0.999999838473921, iteration: 189929
loss: 1.0338196754455566,grad_norm: 0.9779212850502331, iteration: 189930
loss: 1.0515215396881104,grad_norm: 0.9999997571074192, iteration: 189931
loss: 1.169547200202942,grad_norm: 0.9999998245002648, iteration: 189932
loss: 1.1856361627578735,grad_norm: 0.9999999204836487, iteration: 189933
loss: 1.0944191217422485,grad_norm: 0.9999997524369401, iteration: 189934
loss: 0.9993516206741333,grad_norm: 0.9999999492413144, iteration: 189935
loss: 1.0103237628936768,grad_norm: 0.9999990772506073, iteration: 189936
loss: 1.0868953466415405,grad_norm: 0.9999999161943322, iteration: 189937
loss: 1.071060299873352,grad_norm: 0.9999993768126056, iteration: 189938
loss: 1.0608024597167969,grad_norm: 0.9999999004543679, iteration: 189939
loss: 1.0969356298446655,grad_norm: 0.999999310584522, iteration: 189940
loss: 1.0742700099945068,grad_norm: 0.9999995016149688, iteration: 189941
loss: 1.1194133758544922,grad_norm: 0.9999997485142001, iteration: 189942
loss: 1.115261435508728,grad_norm: 0.9999995136123946, iteration: 189943
loss: 1.115691065788269,grad_norm: 0.9999998199639348, iteration: 189944
loss: 1.1322609186172485,grad_norm: 0.9999999208866842, iteration: 189945
loss: 1.137509822845459,grad_norm: 0.9999998433133996, iteration: 189946
loss: 1.0331919193267822,grad_norm: 0.9999991694552485, iteration: 189947
loss: 1.1217039823532104,grad_norm: 0.9999998297476581, iteration: 189948
loss: 0.9935751557350159,grad_norm: 0.9999996083139686, iteration: 189949
loss: 1.0105879306793213,grad_norm: 0.9999990647751972, iteration: 189950
loss: 1.0184032917022705,grad_norm: 0.9999998234733304, iteration: 189951
loss: 1.0561506748199463,grad_norm: 0.9999999759871532, iteration: 189952
loss: 1.0958441495895386,grad_norm: 0.9999998165415017, iteration: 189953
loss: 1.0306931734085083,grad_norm: 0.9999993404533286, iteration: 189954
loss: 1.0987141132354736,grad_norm: 0.9999995787357167, iteration: 189955
loss: 1.1615289449691772,grad_norm: 0.999999825906068, iteration: 189956
loss: 1.1631230115890503,grad_norm: 0.9999996585543343, iteration: 189957
loss: 1.068649172782898,grad_norm: 0.9999999122117864, iteration: 189958
loss: 1.1345943212509155,grad_norm: 0.999999523471181, iteration: 189959
loss: 1.1095935106277466,grad_norm: 0.9999998321388649, iteration: 189960
loss: 1.0441093444824219,grad_norm: 0.9999999045736505, iteration: 189961
loss: 1.0390876531600952,grad_norm: 0.9999999680827143, iteration: 189962
loss: 1.019432783126831,grad_norm: 0.9999993084235811, iteration: 189963
loss: 1.0771323442459106,grad_norm: 0.999999433273352, iteration: 189964
loss: 1.1410610675811768,grad_norm: 0.999999900457651, iteration: 189965
loss: 1.0060597658157349,grad_norm: 0.9999992548114462, iteration: 189966
loss: 1.049678921699524,grad_norm: 1.0000000703646887, iteration: 189967
loss: 1.016845464706421,grad_norm: 0.9999991765069266, iteration: 189968
loss: 0.9710594415664673,grad_norm: 0.999999198403602, iteration: 189969
loss: 1.0000736713409424,grad_norm: 0.9999992438882044, iteration: 189970
loss: 1.049881100654602,grad_norm: 0.9999991544799298, iteration: 189971
loss: 1.0486758947372437,grad_norm: 0.9999992673775849, iteration: 189972
loss: 1.0434881448745728,grad_norm: 0.9980539302135949, iteration: 189973
loss: 0.9628030061721802,grad_norm: 0.9999991033702751, iteration: 189974
loss: 1.0234686136245728,grad_norm: 0.9999992167003147, iteration: 189975
loss: 1.1025389432907104,grad_norm: 0.9999996458664883, iteration: 189976
loss: 1.0164287090301514,grad_norm: 0.9999993289931989, iteration: 189977
loss: 1.0324116945266724,grad_norm: 0.9999992836951189, iteration: 189978
loss: 0.99515300989151,grad_norm: 0.9269255443410663, iteration: 189979
loss: 0.9854958057403564,grad_norm: 0.9999990974319624, iteration: 189980
loss: 1.0108106136322021,grad_norm: 0.9999992117872712, iteration: 189981
loss: 0.9554438591003418,grad_norm: 0.8973627064991101, iteration: 189982
loss: 1.0045167207717896,grad_norm: 0.9999990627208999, iteration: 189983
loss: 1.0168378353118896,grad_norm: 0.9999991006911055, iteration: 189984
loss: 1.1159946918487549,grad_norm: 0.9999991155590267, iteration: 189985
loss: 1.0023291110992432,grad_norm: 0.9999991585731782, iteration: 189986
loss: 1.0654816627502441,grad_norm: 0.9999990794864405, iteration: 189987
loss: 1.027685523033142,grad_norm: 0.9999990842856055, iteration: 189988
loss: 1.0235552787780762,grad_norm: 0.9999998708379783, iteration: 189989
loss: 1.0329288244247437,grad_norm: 0.9999989778930702, iteration: 189990
loss: 0.9869858026504517,grad_norm: 0.8989303587085071, iteration: 189991
loss: 1.0139240026474,grad_norm: 0.9999994368380454, iteration: 189992
loss: 0.9991418719291687,grad_norm: 0.9999998817457266, iteration: 189993
loss: 1.1412012577056885,grad_norm: 0.999999446295012, iteration: 189994
loss: 0.9669894576072693,grad_norm: 0.8018432483660631, iteration: 189995
loss: 1.0755068063735962,grad_norm: 0.9999999491731442, iteration: 189996
loss: 1.1351896524429321,grad_norm: 0.9999999923145773, iteration: 189997
loss: 1.0820749998092651,grad_norm: 0.9999993552854999, iteration: 189998
loss: 0.9926336407661438,grad_norm: 0.9955567605079878, iteration: 189999
loss: 1.179612636566162,grad_norm: 0.9999998192072124, iteration: 190000
Evaluating at step 190000
{'val': 1.0056385435163975, 'test': 2.4404697303954785}
loss: 1.0875341892242432,grad_norm: 0.9999993069529464, iteration: 190001
loss: 1.0868922472000122,grad_norm: 0.9385150924370339, iteration: 190002
loss: 1.1549007892608643,grad_norm: 0.9999997684232476, iteration: 190003
loss: 1.070602536201477,grad_norm: 0.9999991708524476, iteration: 190004
loss: 0.9946543574333191,grad_norm: 0.9999990693190292, iteration: 190005
loss: 0.975638210773468,grad_norm: 0.9999988986502665, iteration: 190006
loss: 1.0035134553909302,grad_norm: 0.9099538402381263, iteration: 190007
loss: 0.9742828011512756,grad_norm: 0.999999596601262, iteration: 190008
loss: 1.1164528131484985,grad_norm: 0.9999997462507939, iteration: 190009
loss: 0.9941622018814087,grad_norm: 0.9890263978274448, iteration: 190010
loss: 1.0670279264450073,grad_norm: 0.9999991185301473, iteration: 190011
loss: 1.0868641138076782,grad_norm: 0.9999998683523977, iteration: 190012
loss: 1.101076364517212,grad_norm: 0.9999994471539162, iteration: 190013
loss: 1.018505334854126,grad_norm: 0.9999990217081137, iteration: 190014
loss: 0.9817065000534058,grad_norm: 0.9999992795409421, iteration: 190015
loss: 1.1264115571975708,grad_norm: 0.9999996578036849, iteration: 190016
loss: 1.014289379119873,grad_norm: 0.9019631644274947, iteration: 190017
loss: 1.0058292150497437,grad_norm: 0.9999996867143093, iteration: 190018
loss: 0.9814446568489075,grad_norm: 0.9097032265473416, iteration: 190019
loss: 0.9795649647712708,grad_norm: 0.9999989877503407, iteration: 190020
loss: 1.01125967502594,grad_norm: 0.9999992257995314, iteration: 190021
loss: 1.0302425622940063,grad_norm: 0.9999996040429523, iteration: 190022
loss: 1.0046418905258179,grad_norm: 0.9999998366885174, iteration: 190023
loss: 1.0500869750976562,grad_norm: 0.999999881314821, iteration: 190024
loss: 1.0319958925247192,grad_norm: 0.9686831796170148, iteration: 190025
loss: 0.9698589444160461,grad_norm: 0.9999992565805277, iteration: 190026
loss: 1.0316730737686157,grad_norm: 0.9589958143013326, iteration: 190027
loss: 1.0540704727172852,grad_norm: 0.9999991455131105, iteration: 190028
loss: 1.0437442064285278,grad_norm: 0.9999997252390141, iteration: 190029
loss: 1.0120404958724976,grad_norm: 0.9999992877571265, iteration: 190030
loss: 1.1324598789215088,grad_norm: 0.9999998707890404, iteration: 190031
loss: 1.061043381690979,grad_norm: 0.9999997546513875, iteration: 190032
loss: 0.9842409491539001,grad_norm: 0.9746862643255939, iteration: 190033
loss: 0.9850108623504639,grad_norm: 0.9975911972458184, iteration: 190034
loss: 1.038493037223816,grad_norm: 0.9999991393259706, iteration: 190035
loss: 0.9765759110450745,grad_norm: 0.9999992457268927, iteration: 190036
loss: 1.050171971321106,grad_norm: 0.9999996707509021, iteration: 190037
loss: 0.9848837852478027,grad_norm: 0.999999118600467, iteration: 190038
loss: 1.0409214496612549,grad_norm: 0.9999993418428149, iteration: 190039
loss: 1.0406936407089233,grad_norm: 0.9999998613398516, iteration: 190040
loss: 1.0699622631072998,grad_norm: 0.99999974615761, iteration: 190041
loss: 0.9958518743515015,grad_norm: 0.9999990588961513, iteration: 190042
loss: 0.9824128150939941,grad_norm: 0.9999989779498073, iteration: 190043
loss: 1.0278525352478027,grad_norm: 0.9136144817920561, iteration: 190044
loss: 1.039991021156311,grad_norm: 0.99999907582714, iteration: 190045
loss: 1.0258362293243408,grad_norm: 0.9999994764538294, iteration: 190046
loss: 1.0140202045440674,grad_norm: 0.9999991688617114, iteration: 190047
loss: 0.9921886920928955,grad_norm: 0.9999991091448743, iteration: 190048
loss: 0.9966152906417847,grad_norm: 0.9999991594827364, iteration: 190049
loss: 1.043184757232666,grad_norm: 0.9999998928751548, iteration: 190050
loss: 0.996275007724762,grad_norm: 0.9999993834924845, iteration: 190051
loss: 1.0032497644424438,grad_norm: 0.9216987890742374, iteration: 190052
loss: 0.97532719373703,grad_norm: 0.9642275919151393, iteration: 190053
loss: 1.0123379230499268,grad_norm: 0.8558680627873185, iteration: 190054
loss: 0.9808082580566406,grad_norm: 0.8180620447225261, iteration: 190055
loss: 1.0379364490509033,grad_norm: 0.9999997185136592, iteration: 190056
loss: 1.0040562152862549,grad_norm: 0.9999990848864067, iteration: 190057
loss: 1.0385081768035889,grad_norm: 0.9999997612587888, iteration: 190058
loss: 1.002256155014038,grad_norm: 0.9874152058042566, iteration: 190059
loss: 1.0234150886535645,grad_norm: 0.9999993090555973, iteration: 190060
loss: 1.0188534259796143,grad_norm: 0.9485920529913024, iteration: 190061
loss: 0.9651234745979309,grad_norm: 0.9999989839164154, iteration: 190062
loss: 1.0356719493865967,grad_norm: 0.9999992361139859, iteration: 190063
loss: 1.0030497312545776,grad_norm: 0.9999991896403129, iteration: 190064
loss: 0.9867249727249146,grad_norm: 0.9999991292396754, iteration: 190065
loss: 1.00384521484375,grad_norm: 0.9999999368239485, iteration: 190066
loss: 1.0119993686676025,grad_norm: 0.9647080584718513, iteration: 190067
loss: 0.9861647486686707,grad_norm: 0.9999991055007607, iteration: 190068
loss: 0.9918214082717896,grad_norm: 0.8852722231459441, iteration: 190069
loss: 1.0839176177978516,grad_norm: 0.9999993079003385, iteration: 190070
loss: 1.0737786293029785,grad_norm: 0.9999994650922293, iteration: 190071
loss: 1.010055661201477,grad_norm: 0.956098496226678, iteration: 190072
loss: 1.0939146280288696,grad_norm: 0.9999998857053005, iteration: 190073
loss: 1.0083471536636353,grad_norm: 0.8814210864873262, iteration: 190074
loss: 1.0152323246002197,grad_norm: 0.9173559357952918, iteration: 190075
loss: 0.9969045519828796,grad_norm: 0.999999232009434, iteration: 190076
loss: 1.0131827592849731,grad_norm: 0.9354690180126395, iteration: 190077
loss: 0.9940608143806458,grad_norm: 0.9999991539019175, iteration: 190078
loss: 1.0766392946243286,grad_norm: 0.999999336181813, iteration: 190079
loss: 0.9966624975204468,grad_norm: 0.8436696897883709, iteration: 190080
loss: 1.0159497261047363,grad_norm: 0.999999172747953, iteration: 190081
loss: 0.9981404542922974,grad_norm: 0.999999127998212, iteration: 190082
loss: 1.0192140340805054,grad_norm: 0.999999125527619, iteration: 190083
loss: 1.0405137538909912,grad_norm: 0.9999994768531866, iteration: 190084
loss: 1.0106537342071533,grad_norm: 0.9999990303713233, iteration: 190085
loss: 0.9818274974822998,grad_norm: 0.8777952939072569, iteration: 190086
loss: 1.030248999595642,grad_norm: 0.9999997725086275, iteration: 190087
loss: 1.0040003061294556,grad_norm: 0.9046143614098048, iteration: 190088
loss: 1.0394644737243652,grad_norm: 0.9999994514590574, iteration: 190089
loss: 0.9881669878959656,grad_norm: 0.9413889972604386, iteration: 190090
loss: 0.9650253653526306,grad_norm: 0.9999992470668678, iteration: 190091
loss: 0.9621149897575378,grad_norm: 0.983313845099205, iteration: 190092
loss: 1.0185294151306152,grad_norm: 0.9999991252275595, iteration: 190093
loss: 1.0155894756317139,grad_norm: 0.9999994622380272, iteration: 190094
loss: 0.9751757383346558,grad_norm: 0.9999996652209772, iteration: 190095
loss: 1.034649133682251,grad_norm: 0.9999993209962383, iteration: 190096
loss: 1.0131741762161255,grad_norm: 0.9999991174935386, iteration: 190097
loss: 1.0828957557678223,grad_norm: 0.9348589891522537, iteration: 190098
loss: 0.9937685132026672,grad_norm: 0.9999990054549122, iteration: 190099
loss: 1.0254703760147095,grad_norm: 0.9999991104828669, iteration: 190100
loss: 0.9863643646240234,grad_norm: 0.9831276189184561, iteration: 190101
loss: 1.0183568000793457,grad_norm: 0.9999999421776398, iteration: 190102
loss: 0.9918798804283142,grad_norm: 0.9999996223356991, iteration: 190103
loss: 1.0309828519821167,grad_norm: 0.8765420143514598, iteration: 190104
loss: 0.9969494938850403,grad_norm: 0.8506909765314365, iteration: 190105
loss: 1.0201302766799927,grad_norm: 0.9999998577235204, iteration: 190106
loss: 1.0087884664535522,grad_norm: 0.9999991867811412, iteration: 190107
loss: 1.0150153636932373,grad_norm: 0.8573409627389076, iteration: 190108
loss: 1.0091301202774048,grad_norm: 0.957612671523934, iteration: 190109
loss: 0.9763821959495544,grad_norm: 0.9999993051434681, iteration: 190110
loss: 1.0331789255142212,grad_norm: 0.9969662667149086, iteration: 190111
loss: 0.9884374141693115,grad_norm: 0.9999990336568747, iteration: 190112
loss: 1.055071473121643,grad_norm: 0.9933890517957567, iteration: 190113
loss: 0.9860978722572327,grad_norm: 0.9855642382298216, iteration: 190114
loss: 0.9901369214057922,grad_norm: 0.8781021008742705, iteration: 190115
loss: 1.0024607181549072,grad_norm: 0.8961656617997058, iteration: 190116
loss: 1.0203142166137695,grad_norm: 0.8602687381941788, iteration: 190117
loss: 0.977193295955658,grad_norm: 0.9865851611961618, iteration: 190118
loss: 1.0170167684555054,grad_norm: 0.9999989958655092, iteration: 190119
loss: 1.01569402217865,grad_norm: 0.9205081734644612, iteration: 190120
loss: 1.0092960596084595,grad_norm: 0.9999992709534803, iteration: 190121
loss: 1.0186506509780884,grad_norm: 0.9646688594981838, iteration: 190122
loss: 1.0209804773330688,grad_norm: 0.9999995440034183, iteration: 190123
loss: 1.0079905986785889,grad_norm: 0.971651935183644, iteration: 190124
loss: 1.0283950567245483,grad_norm: 0.9489641465142277, iteration: 190125
loss: 1.0082772970199585,grad_norm: 0.9999996771198854, iteration: 190126
loss: 1.0127350091934204,grad_norm: 0.8768542458276123, iteration: 190127
loss: 1.0334274768829346,grad_norm: 0.8107044157220117, iteration: 190128
loss: 0.9749262928962708,grad_norm: 0.8463200688749245, iteration: 190129
loss: 0.9819340109825134,grad_norm: 0.9999995991445351, iteration: 190130
loss: 0.9866952300071716,grad_norm: 0.8960109095439789, iteration: 190131
loss: 0.9843102097511292,grad_norm: 0.9089126715248849, iteration: 190132
loss: 1.0286548137664795,grad_norm: 0.9999996450600824, iteration: 190133
loss: 1.0076061487197876,grad_norm: 0.999999257147119, iteration: 190134
loss: 0.9814693331718445,grad_norm: 0.8344815287167452, iteration: 190135
loss: 0.9638857245445251,grad_norm: 0.8919229361611809, iteration: 190136
loss: 1.0149199962615967,grad_norm: 0.8359918689537482, iteration: 190137
loss: 1.0311295986175537,grad_norm: 0.9999991486749871, iteration: 190138
loss: 1.039138674736023,grad_norm: 0.999999177309204, iteration: 190139
loss: 0.9833714962005615,grad_norm: 0.9999993854978038, iteration: 190140
loss: 0.9718275666236877,grad_norm: 0.9999990361887097, iteration: 190141
loss: 0.9716068506240845,grad_norm: 0.8186504500611929, iteration: 190142
loss: 0.9714474081993103,grad_norm: 0.9321678925028098, iteration: 190143
loss: 0.9984211325645447,grad_norm: 0.9999991972962501, iteration: 190144
loss: 0.9861046075820923,grad_norm: 0.8579883570199025, iteration: 190145
loss: 1.0332660675048828,grad_norm: 0.9999991529103057, iteration: 190146
loss: 1.0276298522949219,grad_norm: 0.9999990971297961, iteration: 190147
loss: 1.0519770383834839,grad_norm: 1.000000031228601, iteration: 190148
loss: 0.9812915325164795,grad_norm: 0.9999991699018315, iteration: 190149
loss: 0.9830647706985474,grad_norm: 0.9098519453965898, iteration: 190150
loss: 1.0390937328338623,grad_norm: 0.9999997368856551, iteration: 190151
loss: 1.0052368640899658,grad_norm: 0.9999989841368205, iteration: 190152
loss: 1.0178261995315552,grad_norm: 0.94896766228458, iteration: 190153
loss: 1.0668957233428955,grad_norm: 0.9999993603967411, iteration: 190154
loss: 1.0105063915252686,grad_norm: 0.9999991723695374, iteration: 190155
loss: 1.002692461013794,grad_norm: 0.9771315067526789, iteration: 190156
loss: 1.0055960416793823,grad_norm: 0.931194964000496, iteration: 190157
loss: 0.969697892665863,grad_norm: 0.9149429898536993, iteration: 190158
loss: 0.985992968082428,grad_norm: 0.9999997625145025, iteration: 190159
loss: 0.9982749223709106,grad_norm: 0.9343744962223625, iteration: 190160
loss: 1.090833067893982,grad_norm: 0.9999993148804303, iteration: 190161
loss: 1.005639910697937,grad_norm: 0.999999208664977, iteration: 190162
loss: 0.9932777881622314,grad_norm: 0.8376879071164496, iteration: 190163
loss: 1.0148088932037354,grad_norm: 0.9999990177441902, iteration: 190164
loss: 1.015702724456787,grad_norm: 0.9658150475041479, iteration: 190165
loss: 0.9963881373405457,grad_norm: 0.9999991968943246, iteration: 190166
loss: 1.0035513639450073,grad_norm: 0.9999996134138242, iteration: 190167
loss: 1.00580632686615,grad_norm: 0.9999993275338996, iteration: 190168
loss: 1.0126781463623047,grad_norm: 0.9999996609230188, iteration: 190169
loss: 1.0078070163726807,grad_norm: 0.999999216600665, iteration: 190170
loss: 0.9841325283050537,grad_norm: 0.9262500399682623, iteration: 190171
loss: 0.9879817366600037,grad_norm: 0.9999991455136267, iteration: 190172
loss: 0.9782366752624512,grad_norm: 0.9495700604797719, iteration: 190173
loss: 0.9711247682571411,grad_norm: 0.9999990796597511, iteration: 190174
loss: 1.0107961893081665,grad_norm: 0.9999990268872089, iteration: 190175
loss: 0.9957958459854126,grad_norm: 0.949058480059434, iteration: 190176
loss: 1.0009926557540894,grad_norm: 0.9302634665113451, iteration: 190177
loss: 1.0218805074691772,grad_norm: 0.9999990623355333, iteration: 190178
loss: 1.0027353763580322,grad_norm: 0.9999990477389467, iteration: 190179
loss: 0.9770336747169495,grad_norm: 0.9999991341189681, iteration: 190180
loss: 1.1747889518737793,grad_norm: 0.9999996573691416, iteration: 190181
loss: 1.0110151767730713,grad_norm: 0.9783681211300228, iteration: 190182
loss: 0.9986259937286377,grad_norm: 0.8808228458062781, iteration: 190183
loss: 1.0426106452941895,grad_norm: 0.9999992910087988, iteration: 190184
loss: 1.0135855674743652,grad_norm: 0.852118398622086, iteration: 190185
loss: 1.04625403881073,grad_norm: 0.9999992289077528, iteration: 190186
loss: 1.0391182899475098,grad_norm: 0.9999991301942499, iteration: 190187
loss: 0.9875730276107788,grad_norm: 0.9007787719580501, iteration: 190188
loss: 0.9788526892662048,grad_norm: 0.9999991546865313, iteration: 190189
loss: 1.009029746055603,grad_norm: 0.8999407697016885, iteration: 190190
loss: 0.9695923924446106,grad_norm: 0.9999990234572866, iteration: 190191
loss: 1.0032987594604492,grad_norm: 0.9781050626357762, iteration: 190192
loss: 1.0460498332977295,grad_norm: 0.9999995050782139, iteration: 190193
loss: 1.0671747922897339,grad_norm: 0.9999993600433159, iteration: 190194
loss: 1.036515712738037,grad_norm: 0.9999999540801738, iteration: 190195
loss: 0.9590200781822205,grad_norm: 0.9999995268982951, iteration: 190196
loss: 0.9906783103942871,grad_norm: 0.9706495091599379, iteration: 190197
loss: 1.019309401512146,grad_norm: 0.883860658268439, iteration: 190198
loss: 1.012535572052002,grad_norm: 0.9999992701995346, iteration: 190199
loss: 1.0172382593154907,grad_norm: 0.9582820340172076, iteration: 190200
loss: 1.0726876258850098,grad_norm: 0.9999998606822101, iteration: 190201
loss: 1.0210261344909668,grad_norm: 0.9999990722785512, iteration: 190202
loss: 0.9723489880561829,grad_norm: 0.9999990568776398, iteration: 190203
loss: 1.0220564603805542,grad_norm: 0.8825827401314776, iteration: 190204
loss: 1.0121866464614868,grad_norm: 0.9999991737856074, iteration: 190205
loss: 1.015696406364441,grad_norm: 0.9999995590066284, iteration: 190206
loss: 1.0074737071990967,grad_norm: 0.9999990158461161, iteration: 190207
loss: 1.0045876502990723,grad_norm: 0.9999991602772876, iteration: 190208
loss: 1.0207347869873047,grad_norm: 0.9999992853529818, iteration: 190209
loss: 1.0081663131713867,grad_norm: 0.9999991689032683, iteration: 190210
loss: 1.0046157836914062,grad_norm: 0.999999292105581, iteration: 190211
loss: 0.9685306549072266,grad_norm: 0.9023831945665135, iteration: 190212
loss: 1.0130906105041504,grad_norm: 0.9999996256914354, iteration: 190213
loss: 1.0309746265411377,grad_norm: 0.8727411860694819, iteration: 190214
loss: 0.9681587219238281,grad_norm: 0.9355741724605185, iteration: 190215
loss: 0.9744685292243958,grad_norm: 0.9693396675221321, iteration: 190216
loss: 1.0428521633148193,grad_norm: 0.9999998291989318, iteration: 190217
loss: 0.9918923377990723,grad_norm: 0.9999991727826288, iteration: 190218
loss: 0.9999502897262573,grad_norm: 0.8792610892279054, iteration: 190219
loss: 0.9754762649536133,grad_norm: 0.9288283957573448, iteration: 190220
loss: 1.0012744665145874,grad_norm: 0.9685619161992246, iteration: 190221
loss: 0.9814379215240479,grad_norm: 0.978986042855494, iteration: 190222
loss: 1.0023945569992065,grad_norm: 0.9999990382940198, iteration: 190223
loss: 0.989743173122406,grad_norm: 0.9999990241467637, iteration: 190224
loss: 1.0519740581512451,grad_norm: 0.9999997263603435, iteration: 190225
loss: 1.0011852979660034,grad_norm: 0.8523421805438065, iteration: 190226
loss: 1.0363237857818604,grad_norm: 0.9999993810010723, iteration: 190227
loss: 1.0139431953430176,grad_norm: 0.9999991761864022, iteration: 190228
loss: 1.0362868309020996,grad_norm: 0.9405520165317107, iteration: 190229
loss: 0.9568020701408386,grad_norm: 0.9999990536207449, iteration: 190230
loss: 0.9741986393928528,grad_norm: 0.9708790901496691, iteration: 190231
loss: 0.9877306222915649,grad_norm: 0.9999993105419277, iteration: 190232
loss: 0.9762630462646484,grad_norm: 0.884967450974821, iteration: 190233
loss: 1.017998218536377,grad_norm: 0.9999990926862505, iteration: 190234
loss: 1.01882004737854,grad_norm: 0.9999991169079026, iteration: 190235
loss: 1.019224762916565,grad_norm: 0.9999992728950209, iteration: 190236
loss: 1.0106298923492432,grad_norm: 0.9999994893036421, iteration: 190237
loss: 0.9987413883209229,grad_norm: 0.9999991438652434, iteration: 190238
loss: 0.9806387424468994,grad_norm: 0.9199032675253938, iteration: 190239
loss: 1.058240532875061,grad_norm: 0.9999990413532847, iteration: 190240
loss: 1.00803804397583,grad_norm: 0.8534341160273611, iteration: 190241
loss: 1.0028327703475952,grad_norm: 0.999999920671017, iteration: 190242
loss: 1.0254305601119995,grad_norm: 0.9540894899655875, iteration: 190243
loss: 1.0117833614349365,grad_norm: 0.999999079031936, iteration: 190244
loss: 1.0924944877624512,grad_norm: 0.9999992728438702, iteration: 190245
loss: 0.9796282649040222,grad_norm: 0.9999990734866459, iteration: 190246
loss: 0.9930052757263184,grad_norm: 0.999999195289246, iteration: 190247
loss: 0.99587482213974,grad_norm: 0.9199947231595286, iteration: 190248
loss: 1.0315215587615967,grad_norm: 0.9999991330271142, iteration: 190249
loss: 0.9591596722602844,grad_norm: 0.9720051960198471, iteration: 190250
loss: 1.081040620803833,grad_norm: 0.999999211220616, iteration: 190251
loss: 1.012569785118103,grad_norm: 0.8321068682609194, iteration: 190252
loss: 1.0148202180862427,grad_norm: 0.9999992146465925, iteration: 190253
loss: 1.0263029336929321,grad_norm: 0.9571810872482239, iteration: 190254
loss: 1.020546555519104,grad_norm: 0.999999249241729, iteration: 190255
loss: 0.9768351912498474,grad_norm: 0.9953788418091861, iteration: 190256
loss: 0.9815232157707214,grad_norm: 0.8984792265355713, iteration: 190257
loss: 1.006606936454773,grad_norm: 0.8489819392363394, iteration: 190258
loss: 1.007704734802246,grad_norm: 0.9999990397035895, iteration: 190259
loss: 1.0098135471343994,grad_norm: 0.9999990973044061, iteration: 190260
loss: 1.0142666101455688,grad_norm: 0.9131315408980455, iteration: 190261
loss: 0.9947347640991211,grad_norm: 0.9999990410785613, iteration: 190262
loss: 1.0028928518295288,grad_norm: 0.9999989689050715, iteration: 190263
loss: 1.0039435625076294,grad_norm: 0.8553376029003852, iteration: 190264
loss: 1.0198134183883667,grad_norm: 0.9999992058015095, iteration: 190265
loss: 0.998390793800354,grad_norm: 0.9999991115091582, iteration: 190266
loss: 0.9912495017051697,grad_norm: 0.999999094974117, iteration: 190267
loss: 0.9616840481758118,grad_norm: 0.9974095392591418, iteration: 190268
loss: 1.0766546726226807,grad_norm: 0.8998229570361026, iteration: 190269
loss: 1.0478829145431519,grad_norm: 0.9999992820689265, iteration: 190270
loss: 1.0093272924423218,grad_norm: 0.9999991109617392, iteration: 190271
loss: 1.0181381702423096,grad_norm: 0.9999994374901929, iteration: 190272
loss: 0.9795365929603577,grad_norm: 0.9816579121265538, iteration: 190273
loss: 0.9907087087631226,grad_norm: 0.9999995199802174, iteration: 190274
loss: 1.0072705745697021,grad_norm: 0.9302003187866382, iteration: 190275
loss: 0.9900461435317993,grad_norm: 0.9999993440208296, iteration: 190276
loss: 1.0057804584503174,grad_norm: 0.9999991079040166, iteration: 190277
loss: 0.9987990260124207,grad_norm: 0.9999996465654866, iteration: 190278
loss: 1.0186821222305298,grad_norm: 0.9115614881602689, iteration: 190279
loss: 1.0053483247756958,grad_norm: 0.9999996976137149, iteration: 190280
loss: 1.0605756044387817,grad_norm: 0.9999990741174516, iteration: 190281
loss: 0.9680079221725464,grad_norm: 0.8799712842820223, iteration: 190282
loss: 0.9758561849594116,grad_norm: 0.9999992181783838, iteration: 190283
loss: 1.025294542312622,grad_norm: 0.9999990325739302, iteration: 190284
loss: 0.9830501079559326,grad_norm: 0.999999121872486, iteration: 190285
loss: 0.9957959055900574,grad_norm: 0.9371254677097948, iteration: 190286
loss: 1.013763427734375,grad_norm: 0.999999149775247, iteration: 190287
loss: 0.9990930557250977,grad_norm: 0.9999991752019918, iteration: 190288
loss: 0.9863617420196533,grad_norm: 0.9340748345780252, iteration: 190289
loss: 1.049783706665039,grad_norm: 0.9999999548705812, iteration: 190290
loss: 1.0138163566589355,grad_norm: 0.9485233240576632, iteration: 190291
loss: 0.9724118113517761,grad_norm: 0.9999987485995129, iteration: 190292
loss: 1.0181595087051392,grad_norm: 0.9999990573090286, iteration: 190293
loss: 0.9979223012924194,grad_norm: 0.792275758998095, iteration: 190294
loss: 1.008460521697998,grad_norm: 0.9754688447156341, iteration: 190295
loss: 1.0336171388626099,grad_norm: 0.9999998934258291, iteration: 190296
loss: 1.000781774520874,grad_norm: 0.9999991581755285, iteration: 190297
loss: 1.0011557340621948,grad_norm: 0.9351934791191515, iteration: 190298
loss: 1.0474439859390259,grad_norm: 0.9999992208368303, iteration: 190299
loss: 0.9968205690383911,grad_norm: 0.9476326396505288, iteration: 190300
loss: 0.9818521738052368,grad_norm: 0.9999990921918163, iteration: 190301
loss: 1.026469349861145,grad_norm: 0.7884415243422913, iteration: 190302
loss: 1.0762360095977783,grad_norm: 0.999999368984381, iteration: 190303
loss: 0.9826382398605347,grad_norm: 0.9999991325158349, iteration: 190304
loss: 1.0089739561080933,grad_norm: 0.9052120527213291, iteration: 190305
loss: 1.0202807188034058,grad_norm: 0.6780803304580932, iteration: 190306
loss: 0.9796468019485474,grad_norm: 0.9935536978799231, iteration: 190307
loss: 0.9713285565376282,grad_norm: 0.9999990909333333, iteration: 190308
loss: 1.0123776197433472,grad_norm: 0.9688966261383015, iteration: 190309
loss: 1.1765296459197998,grad_norm: 0.9999994814228664, iteration: 190310
loss: 1.0001767873764038,grad_norm: 0.8631334454418256, iteration: 190311
loss: 1.0043537616729736,grad_norm: 0.999999397112776, iteration: 190312
loss: 1.018427848815918,grad_norm: 0.999999171670896, iteration: 190313
loss: 0.9845249652862549,grad_norm: 0.9383103397837652, iteration: 190314
loss: 1.02213454246521,grad_norm: 0.9999994644378556, iteration: 190315
loss: 1.0202898979187012,grad_norm: 0.9999992688543519, iteration: 190316
loss: 1.0660918951034546,grad_norm: 0.9999993357671815, iteration: 190317
loss: 0.9830762147903442,grad_norm: 0.7018588800235602, iteration: 190318
loss: 1.049433708190918,grad_norm: 0.999999367694963, iteration: 190319
loss: 0.9520496726036072,grad_norm: 0.9999997007122295, iteration: 190320
loss: 0.9923097491264343,grad_norm: 0.9999996401942975, iteration: 190321
loss: 1.018193006515503,grad_norm: 0.9999993225793763, iteration: 190322
loss: 0.9908761978149414,grad_norm: 0.9999989993782378, iteration: 190323
loss: 1.0199413299560547,grad_norm: 0.9999991826702154, iteration: 190324
loss: 0.9874195456504822,grad_norm: 0.9999992074169343, iteration: 190325
loss: 0.9972057342529297,grad_norm: 0.999999070070927, iteration: 190326
loss: 0.97440505027771,grad_norm: 0.9999991200479061, iteration: 190327
loss: 0.9834826588630676,grad_norm: 0.9528625484447371, iteration: 190328
loss: 1.0353939533233643,grad_norm: 0.9806069838173922, iteration: 190329
loss: 0.980255126953125,grad_norm: 0.9999991118149492, iteration: 190330
loss: 0.9521759152412415,grad_norm: 0.974827041751254, iteration: 190331
loss: 1.0229824781417847,grad_norm: 0.999998965998876, iteration: 190332
loss: 1.049803376197815,grad_norm: 0.9999998126771504, iteration: 190333
loss: 1.039788842201233,grad_norm: 0.9124645493755213, iteration: 190334
loss: 1.171311378479004,grad_norm: 1.000000037820628, iteration: 190335
loss: 0.9865555763244629,grad_norm: 0.9270651276189406, iteration: 190336
loss: 0.9811805486679077,grad_norm: 0.8214786807286077, iteration: 190337
loss: 1.0323926210403442,grad_norm: 0.9651639860064908, iteration: 190338
loss: 0.9893415570259094,grad_norm: 0.7876659508915508, iteration: 190339
loss: 1.075573205947876,grad_norm: 0.9999991142527755, iteration: 190340
loss: 1.0648783445358276,grad_norm: 0.9999999736958147, iteration: 190341
loss: 1.0498077869415283,grad_norm: 0.9999993817059103, iteration: 190342
loss: 1.2238924503326416,grad_norm: 0.9999999191246406, iteration: 190343
loss: 1.0023859739303589,grad_norm: 0.999999297702131, iteration: 190344
loss: 1.1006877422332764,grad_norm: 0.9999995416077349, iteration: 190345
loss: 1.1913942098617554,grad_norm: 0.9999992423229317, iteration: 190346
loss: 1.0468571186065674,grad_norm: 0.9999998232470427, iteration: 190347
loss: 1.2800014019012451,grad_norm: 0.9999999547444929, iteration: 190348
loss: 1.1289106607437134,grad_norm: 0.9999992702371838, iteration: 190349
loss: 1.0029668807983398,grad_norm: 0.9999991268048762, iteration: 190350
loss: 1.1041014194488525,grad_norm: 0.9999998607383097, iteration: 190351
loss: 1.2380038499832153,grad_norm: 0.9999991236227957, iteration: 190352
loss: 1.1024523973464966,grad_norm: 0.9999997767526867, iteration: 190353
loss: 1.1266870498657227,grad_norm: 0.9999999730192153, iteration: 190354
loss: 1.3202050924301147,grad_norm: 0.9999998891778811, iteration: 190355
loss: 1.040362000465393,grad_norm: 0.9999991707923911, iteration: 190356
loss: 1.1189550161361694,grad_norm: 0.9999998239669485, iteration: 190357
loss: 1.1063477993011475,grad_norm: 0.999999107143872, iteration: 190358
loss: 0.9473384618759155,grad_norm: 0.9999991697249515, iteration: 190359
loss: 1.0101006031036377,grad_norm: 0.9999991718345377, iteration: 190360
loss: 1.0351676940917969,grad_norm: 0.9999998222190812, iteration: 190361
loss: 1.0529701709747314,grad_norm: 0.9999993060540325, iteration: 190362
loss: 0.9819560647010803,grad_norm: 0.9329063790778013, iteration: 190363
loss: 1.0647413730621338,grad_norm: 0.9999990946401943, iteration: 190364
loss: 1.0162078142166138,grad_norm: 0.9999991378889889, iteration: 190365
loss: 1.0268397331237793,grad_norm: 0.9999990532579713, iteration: 190366
loss: 1.0174850225448608,grad_norm: 0.9999994550754343, iteration: 190367
loss: 1.067044973373413,grad_norm: 0.9999994707729982, iteration: 190368
loss: 0.969317615032196,grad_norm: 0.9999990951085258, iteration: 190369
loss: 0.9773799180984497,grad_norm: 0.99999898322465, iteration: 190370
loss: 1.036544919013977,grad_norm: 0.9999997193836611, iteration: 190371
loss: 1.0583387613296509,grad_norm: 0.9999999455273515, iteration: 190372
loss: 0.9924262166023254,grad_norm: 0.9655043367403735, iteration: 190373
loss: 1.0239843130111694,grad_norm: 0.9999997499786513, iteration: 190374
loss: 1.0692789554595947,grad_norm: 0.9999997633292541, iteration: 190375
loss: 0.9623391032218933,grad_norm: 0.9576556131597914, iteration: 190376
loss: 0.9756136536598206,grad_norm: 0.9157372198050973, iteration: 190377
loss: 1.0547301769256592,grad_norm: 0.9999996183410306, iteration: 190378
loss: 0.990410566329956,grad_norm: 0.9999993773627905, iteration: 190379
loss: 1.031678318977356,grad_norm: 0.9999991896245544, iteration: 190380
loss: 1.0079401731491089,grad_norm: 0.9999990643655149, iteration: 190381
loss: 1.0813695192337036,grad_norm: 1.000000029620279, iteration: 190382
loss: 1.003348469734192,grad_norm: 0.9999990806728182, iteration: 190383
loss: 1.1221609115600586,grad_norm: 0.9999997419877956, iteration: 190384
loss: 1.001583218574524,grad_norm: 0.9999990679366024, iteration: 190385
loss: 1.0405933856964111,grad_norm: 0.8855148469903362, iteration: 190386
loss: 1.0089197158813477,grad_norm: 0.9999991227894972, iteration: 190387
loss: 1.0196222066879272,grad_norm: 0.9999993472513171, iteration: 190388
loss: 0.9848583936691284,grad_norm: 0.8971181971439995, iteration: 190389
loss: 1.0383186340332031,grad_norm: 0.9999990464727495, iteration: 190390
loss: 1.0727289915084839,grad_norm: 0.9999992693368239, iteration: 190391
loss: 1.017195224761963,grad_norm: 0.9999998472857431, iteration: 190392
loss: 1.0065126419067383,grad_norm: 0.8977573174493999, iteration: 190393
loss: 0.9697432518005371,grad_norm: 0.9023588322027793, iteration: 190394
loss: 1.0083287954330444,grad_norm: 0.9999992332100011, iteration: 190395
loss: 1.0235035419464111,grad_norm: 0.9999990410407458, iteration: 190396
loss: 1.0100855827331543,grad_norm: 0.8162418810485688, iteration: 190397
loss: 0.9835143089294434,grad_norm: 0.9999992016054918, iteration: 190398
loss: 1.0209407806396484,grad_norm: 0.9324056048952212, iteration: 190399
loss: 1.0167068243026733,grad_norm: 0.9999994267402166, iteration: 190400
loss: 0.9975880980491638,grad_norm: 0.9426377552109786, iteration: 190401
loss: 1.0105832815170288,grad_norm: 0.877465814339253, iteration: 190402
loss: 1.023476243019104,grad_norm: 0.9999992897443745, iteration: 190403
loss: 1.0486124753952026,grad_norm: 0.9999991618597532, iteration: 190404
loss: 0.9773235321044922,grad_norm: 0.8969606653849999, iteration: 190405
loss: 1.0609725713729858,grad_norm: 0.9999990916030909, iteration: 190406
loss: 1.057085394859314,grad_norm: 0.9999993946885504, iteration: 190407
loss: 0.9840240478515625,grad_norm: 0.99999998088347, iteration: 190408
loss: 1.0157172679901123,grad_norm: 0.9999996620862143, iteration: 190409
loss: 0.9920466542243958,grad_norm: 0.7768697473611595, iteration: 190410
loss: 0.9838123321533203,grad_norm: 0.9035267172142147, iteration: 190411
loss: 0.9940078258514404,grad_norm: 0.9999989932378507, iteration: 190412
loss: 0.9893160462379456,grad_norm: 0.9999993332805995, iteration: 190413
loss: 1.0020549297332764,grad_norm: 0.9999990735849437, iteration: 190414
loss: 1.008834958076477,grad_norm: 0.9999992811667421, iteration: 190415
loss: 0.9733555316925049,grad_norm: 0.7741506395841584, iteration: 190416
loss: 1.0133445262908936,grad_norm: 0.9899829107010678, iteration: 190417
loss: 1.016626238822937,grad_norm: 0.9999990380160335, iteration: 190418
loss: 1.005587100982666,grad_norm: 0.9999989893135205, iteration: 190419
loss: 1.005442500114441,grad_norm: 0.9815383346286412, iteration: 190420
loss: 1.0481932163238525,grad_norm: 0.999999770542002, iteration: 190421
loss: 1.013979196548462,grad_norm: 0.8370783817432488, iteration: 190422
loss: 0.9865549802780151,grad_norm: 0.9999992409942225, iteration: 190423
loss: 0.9867393374443054,grad_norm: 0.9999989762306314, iteration: 190424
loss: 1.0316323041915894,grad_norm: 0.9706760436438066, iteration: 190425
loss: 0.9793083071708679,grad_norm: 0.9999991065060646, iteration: 190426
loss: 1.036149501800537,grad_norm: 0.9999993041734485, iteration: 190427
loss: 1.0129954814910889,grad_norm: 0.999999282951848, iteration: 190428
loss: 1.013788104057312,grad_norm: 0.9256905797454108, iteration: 190429
loss: 1.0119105577468872,grad_norm: 0.963130700785411, iteration: 190430
loss: 1.0034711360931396,grad_norm: 0.9999990081421187, iteration: 190431
loss: 1.0255190134048462,grad_norm: 0.8558102607440411, iteration: 190432
loss: 0.9967842698097229,grad_norm: 0.8171422732274239, iteration: 190433
loss: 0.9607448577880859,grad_norm: 0.8833694397512396, iteration: 190434
loss: 1.0493342876434326,grad_norm: 0.9999990792716293, iteration: 190435
loss: 0.9949765205383301,grad_norm: 0.8114215209127252, iteration: 190436
loss: 0.9838554859161377,grad_norm: 0.8693275662219183, iteration: 190437
loss: 1.0351910591125488,grad_norm: 0.9366175157091693, iteration: 190438
loss: 0.9284404516220093,grad_norm: 0.9999989896321371, iteration: 190439
loss: 0.9727680087089539,grad_norm: 0.9163569334086286, iteration: 190440
loss: 1.0076868534088135,grad_norm: 0.9893334279020853, iteration: 190441
loss: 0.9825770854949951,grad_norm: 0.9999992620250178, iteration: 190442
loss: 1.0274782180786133,grad_norm: 0.9453835250869617, iteration: 190443
loss: 1.0637869834899902,grad_norm: 0.9999993156725061, iteration: 190444
loss: 0.9900273084640503,grad_norm: 0.9999991764406605, iteration: 190445
loss: 0.9963260293006897,grad_norm: 0.9999991504706739, iteration: 190446
loss: 0.9841817021369934,grad_norm: 0.9637828685021516, iteration: 190447
loss: 1.0114184617996216,grad_norm: 0.9403168784801209, iteration: 190448
loss: 1.0038752555847168,grad_norm: 0.9441139558480873, iteration: 190449
loss: 0.983069896697998,grad_norm: 0.9466928399463495, iteration: 190450
loss: 1.0026217699050903,grad_norm: 0.8363836447047363, iteration: 190451
loss: 1.000681757926941,grad_norm: 0.9029963087926326, iteration: 190452
loss: 0.988213837146759,grad_norm: 0.8494315800931228, iteration: 190453
loss: 0.9927753806114197,grad_norm: 0.987380337150649, iteration: 190454
loss: 1.023632287979126,grad_norm: 0.9571052613580251, iteration: 190455
loss: 0.9999327659606934,grad_norm: 0.9999990743489057, iteration: 190456
loss: 0.9517101049423218,grad_norm: 0.8823464903237317, iteration: 190457
loss: 1.0029819011688232,grad_norm: 0.9999989802974443, iteration: 190458
loss: 0.9914127588272095,grad_norm: 0.9999989674182448, iteration: 190459
loss: 0.9584770798683167,grad_norm: 0.8807214093517602, iteration: 190460
loss: 1.0054666996002197,grad_norm: 0.9999993008188733, iteration: 190461
loss: 0.9614017605781555,grad_norm: 0.8731245748073844, iteration: 190462
loss: 0.9867302179336548,grad_norm: 0.8355201967418425, iteration: 190463
loss: 1.104488492012024,grad_norm: 0.999999211250412, iteration: 190464
loss: 1.0013772249221802,grad_norm: 0.9999992103485799, iteration: 190465
loss: 0.9839062094688416,grad_norm: 0.8332568871768186, iteration: 190466
loss: 0.9624407291412354,grad_norm: 0.9999996505102351, iteration: 190467
loss: 1.0693373680114746,grad_norm: 0.9999991636642563, iteration: 190468
loss: 0.9741437435150146,grad_norm: 0.990770174612046, iteration: 190469
loss: 0.9702242016792297,grad_norm: 0.9761005416798934, iteration: 190470
loss: 1.0400444269180298,grad_norm: 0.9515057370047754, iteration: 190471
loss: 1.0166972875595093,grad_norm: 0.9705773782967873, iteration: 190472
loss: 0.9883033037185669,grad_norm: 0.9999992939876803, iteration: 190473
loss: 1.0046616792678833,grad_norm: 0.9999990610013492, iteration: 190474
loss: 0.9988656044006348,grad_norm: 0.9999991301665583, iteration: 190475
loss: 0.9903570413589478,grad_norm: 0.9982258030514168, iteration: 190476
loss: 1.0016087293624878,grad_norm: 0.9999993266125863, iteration: 190477
loss: 0.9816902875900269,grad_norm: 0.9999993182338004, iteration: 190478
loss: 0.9869326949119568,grad_norm: 0.8322926323783524, iteration: 190479
loss: 0.9973235130310059,grad_norm: 0.9999992149784329, iteration: 190480
loss: 1.001461148262024,grad_norm: 0.9921205767114992, iteration: 190481
loss: 0.9948235154151917,grad_norm: 0.999999619698539, iteration: 190482
loss: 0.9626979231834412,grad_norm: 0.8899755317535347, iteration: 190483
loss: 0.9724553227424622,grad_norm: 0.9999991104720098, iteration: 190484
loss: 0.9963223338127136,grad_norm: 0.9999997895729745, iteration: 190485
loss: 0.9534657597541809,grad_norm: 0.9185665826789049, iteration: 190486
loss: 1.0159727334976196,grad_norm: 0.9999995543659623, iteration: 190487
loss: 0.976891815662384,grad_norm: 0.9999991499210535, iteration: 190488
loss: 1.014859676361084,grad_norm: 0.9374977431542898, iteration: 190489
loss: 1.003998041152954,grad_norm: 0.8985841638989125, iteration: 190490
loss: 1.0463111400604248,grad_norm: 0.968486459115604, iteration: 190491
loss: 1.007765293121338,grad_norm: 0.9722481752538459, iteration: 190492
loss: 1.0207078456878662,grad_norm: 0.9373261641678967, iteration: 190493
loss: 1.0185260772705078,grad_norm: 0.999999145952791, iteration: 190494
loss: 1.031747579574585,grad_norm: 0.868104989189594, iteration: 190495
loss: 1.0384551286697388,grad_norm: 0.9999992182368846, iteration: 190496
loss: 0.9789121747016907,grad_norm: 0.9999989041851447, iteration: 190497
loss: 1.077863097190857,grad_norm: 0.928403131488682, iteration: 190498
loss: 1.017511010169983,grad_norm: 0.9751031425595726, iteration: 190499
loss: 0.9680523872375488,grad_norm: 0.9999991970629418, iteration: 190500
loss: 0.9874182343482971,grad_norm: 0.9999992878793028, iteration: 190501
loss: 1.000042200088501,grad_norm: 0.9844070581433076, iteration: 190502
loss: 1.0084513425827026,grad_norm: 0.9999994603621304, iteration: 190503
loss: 0.9818379878997803,grad_norm: 0.8876310104813374, iteration: 190504
loss: 0.9746461510658264,grad_norm: 0.9999991894407567, iteration: 190505
loss: 0.9938929080963135,grad_norm: 0.7790458076465392, iteration: 190506
loss: 1.029731273651123,grad_norm: 0.9999994170319212, iteration: 190507
loss: 0.9628869295120239,grad_norm: 0.9999995553630929, iteration: 190508
loss: 0.9999857544898987,grad_norm: 0.8669283401426322, iteration: 190509
loss: 1.012904405593872,grad_norm: 0.9503162780648565, iteration: 190510
loss: 0.9926394820213318,grad_norm: 0.9999990862127983, iteration: 190511
loss: 0.9692624807357788,grad_norm: 0.9999991650447504, iteration: 190512
loss: 1.0239776372909546,grad_norm: 0.999999130421796, iteration: 190513
loss: 0.9845300316810608,grad_norm: 0.9999993584118603, iteration: 190514
loss: 1.0077922344207764,grad_norm: 0.9769817972376116, iteration: 190515
loss: 0.9970894455909729,grad_norm: 0.9429674417489186, iteration: 190516
loss: 0.9984572529792786,grad_norm: 0.9235649186830381, iteration: 190517
loss: 0.995473325252533,grad_norm: 0.9999990309750041, iteration: 190518
loss: 0.9607337117195129,grad_norm: 0.9999990724118116, iteration: 190519
loss: 1.0303739309310913,grad_norm: 0.8759895438961213, iteration: 190520
loss: 1.0288724899291992,grad_norm: 0.9999990963122256, iteration: 190521
loss: 0.9706198573112488,grad_norm: 0.921804854790035, iteration: 190522
loss: 0.9798389077186584,grad_norm: 0.914036934032537, iteration: 190523
loss: 1.0117744207382202,grad_norm: 0.9999990193215788, iteration: 190524
loss: 1.0242265462875366,grad_norm: 0.8000544260156797, iteration: 190525
loss: 1.050795555114746,grad_norm: 0.9999997150146137, iteration: 190526
loss: 1.0157418251037598,grad_norm: 0.8739040385831733, iteration: 190527
loss: 1.024750828742981,grad_norm: 0.9999989973793729, iteration: 190528
loss: 0.9987039566040039,grad_norm: 0.9999994564142426, iteration: 190529
loss: 1.014125943183899,grad_norm: 0.9264950792677947, iteration: 190530
loss: 0.9995220303535461,grad_norm: 0.9690075647266341, iteration: 190531
loss: 0.9828777313232422,grad_norm: 0.9999990714492104, iteration: 190532
loss: 0.9691835641860962,grad_norm: 0.9999991337737291, iteration: 190533
loss: 0.9890454411506653,grad_norm: 0.9999990868214873, iteration: 190534
loss: 0.9710975885391235,grad_norm: 0.9617141481180674, iteration: 190535
loss: 0.9810201525688171,grad_norm: 0.99999920392459, iteration: 190536
loss: 0.9970823526382446,grad_norm: 0.9999992271054767, iteration: 190537
loss: 0.9974731206893921,grad_norm: 0.9981052259311523, iteration: 190538
loss: 0.9863383173942566,grad_norm: 0.8743185241108598, iteration: 190539
loss: 1.0181831121444702,grad_norm: 0.9999990102233228, iteration: 190540
loss: 0.9656825661659241,grad_norm: 0.9999990385073361, iteration: 190541
loss: 1.0062235593795776,grad_norm: 0.9388897472742537, iteration: 190542
loss: 1.039818286895752,grad_norm: 0.9727171568657211, iteration: 190543
loss: 1.1009448766708374,grad_norm: 0.9999998035858242, iteration: 190544
loss: 1.0236135721206665,grad_norm: 0.9999990747360433, iteration: 190545
loss: 0.9724324345588684,grad_norm: 0.9999989756058373, iteration: 190546
loss: 1.0020899772644043,grad_norm: 0.9999991953585805, iteration: 190547
loss: 1.0111439228057861,grad_norm: 0.9999990980296877, iteration: 190548
loss: 0.9827502965927124,grad_norm: 0.9591646941121209, iteration: 190549
loss: 0.982758641242981,grad_norm: 0.999999055477141, iteration: 190550
loss: 0.9713398814201355,grad_norm: 0.8783487323529761, iteration: 190551
loss: 0.9977383613586426,grad_norm: 0.9999990460879072, iteration: 190552
loss: 0.9811952710151672,grad_norm: 0.8919253874658285, iteration: 190553
loss: 0.9985353350639343,grad_norm: 0.9582588710194461, iteration: 190554
loss: 0.994114875793457,grad_norm: 0.9538521161906284, iteration: 190555
loss: 0.9892593026161194,grad_norm: 0.9230399835490684, iteration: 190556
loss: 0.9821877479553223,grad_norm: 0.9999990778140706, iteration: 190557
loss: 1.000440001487732,grad_norm: 0.9999993056486686, iteration: 190558
loss: 1.0502748489379883,grad_norm: 0.9999991044604449, iteration: 190559
loss: 1.002213716506958,grad_norm: 0.914935545164987, iteration: 190560
loss: 1.0002621412277222,grad_norm: 0.9140661048679871, iteration: 190561
loss: 0.9972376823425293,grad_norm: 0.7893913814577942, iteration: 190562
loss: 1.0413157939910889,grad_norm: 0.9999989791322255, iteration: 190563
loss: 1.061848759651184,grad_norm: 0.9999995211636118, iteration: 190564
loss: 0.9999679923057556,grad_norm: 0.9999991386391927, iteration: 190565
loss: 0.9741109609603882,grad_norm: 0.9999992762966577, iteration: 190566
loss: 1.0409588813781738,grad_norm: 0.9999991489708205, iteration: 190567
loss: 0.9850628972053528,grad_norm: 0.8775866813829042, iteration: 190568
loss: 0.9458697438240051,grad_norm: 0.9198469642467325, iteration: 190569
loss: 1.0449104309082031,grad_norm: 0.9999996406041255, iteration: 190570
loss: 1.0069395303726196,grad_norm: 0.8440484597567925, iteration: 190571
loss: 0.9973090887069702,grad_norm: 0.8737318331514367, iteration: 190572
loss: 0.9894681572914124,grad_norm: 0.9999990388473137, iteration: 190573
loss: 1.0147570371627808,grad_norm: 0.9999996958974879, iteration: 190574
loss: 0.9893962144851685,grad_norm: 0.9138905312782677, iteration: 190575
loss: 1.094017744064331,grad_norm: 0.9999994791162391, iteration: 190576
loss: 0.9670167565345764,grad_norm: 0.8282682882361103, iteration: 190577
loss: 0.985389769077301,grad_norm: 0.9043956940472966, iteration: 190578
loss: 0.9921590685844421,grad_norm: 0.8109475754870663, iteration: 190579
loss: 1.007826566696167,grad_norm: 0.9049721342821223, iteration: 190580
loss: 1.0281505584716797,grad_norm: 0.8026927053554421, iteration: 190581
loss: 0.9913108348846436,grad_norm: 0.9613125309491716, iteration: 190582
loss: 0.9976528882980347,grad_norm: 0.9301646579780908, iteration: 190583
loss: 1.005408763885498,grad_norm: 0.8914618899202708, iteration: 190584
loss: 0.9476999640464783,grad_norm: 0.949297011424027, iteration: 190585
loss: 0.9905485510826111,grad_norm: 0.8033366751521588, iteration: 190586
loss: 1.053296446800232,grad_norm: 0.9999993257703794, iteration: 190587
loss: 0.9924753308296204,grad_norm: 0.9792046286838566, iteration: 190588
loss: 1.020314335823059,grad_norm: 0.9999989976583165, iteration: 190589
loss: 0.9893000721931458,grad_norm: 0.9999991853309518, iteration: 190590
loss: 1.0020252466201782,grad_norm: 0.9631630000481424, iteration: 190591
loss: 1.0161679983139038,grad_norm: 0.9999991503245275, iteration: 190592
loss: 0.9871687889099121,grad_norm: 0.9999994048023074, iteration: 190593
loss: 1.0178892612457275,grad_norm: 0.9267951821022203, iteration: 190594
loss: 0.9813621044158936,grad_norm: 0.9999991137544411, iteration: 190595
loss: 0.9393326640129089,grad_norm: 0.8699122205208911, iteration: 190596
loss: 0.9860551357269287,grad_norm: 0.9241707999137694, iteration: 190597
loss: 0.9648366570472717,grad_norm: 0.999999102428295, iteration: 190598
loss: 0.9952330589294434,grad_norm: 0.985130197885944, iteration: 190599
loss: 1.00074303150177,grad_norm: 0.9999989955858539, iteration: 190600
loss: 1.0277307033538818,grad_norm: 0.8382805648410946, iteration: 190601
loss: 1.0095232725143433,grad_norm: 0.9999990778518271, iteration: 190602
loss: 0.9643890261650085,grad_norm: 0.9999990879837901, iteration: 190603
loss: 0.9812377095222473,grad_norm: 0.9299297182076436, iteration: 190604
loss: 0.9967978596687317,grad_norm: 0.8982373458552122, iteration: 190605
loss: 1.0135395526885986,grad_norm: 0.9690892401453766, iteration: 190606
loss: 1.0054875612258911,grad_norm: 0.9999990448340998, iteration: 190607
loss: 1.0237258672714233,grad_norm: 0.914439355578409, iteration: 190608
loss: 0.9957555532455444,grad_norm: 0.902708494199994, iteration: 190609
loss: 1.025326132774353,grad_norm: 0.9999991763063901, iteration: 190610
loss: 1.0031145811080933,grad_norm: 0.9131750224881315, iteration: 190611
loss: 0.9850547313690186,grad_norm: 0.9999990699743424, iteration: 190612
loss: 1.0102771520614624,grad_norm: 0.9999992844014514, iteration: 190613
loss: 1.0337498188018799,grad_norm: 0.9999998340283432, iteration: 190614
loss: 1.0059638023376465,grad_norm: 0.9758430308304931, iteration: 190615
loss: 0.9837868213653564,grad_norm: 0.9999990585706745, iteration: 190616
loss: 1.0017447471618652,grad_norm: 0.999999113822449, iteration: 190617
loss: 1.0066343545913696,grad_norm: 0.9999990692690572, iteration: 190618
loss: 0.9727684259414673,grad_norm: 0.9596649848807538, iteration: 190619
loss: 0.9522303938865662,grad_norm: 0.9999991429818821, iteration: 190620
loss: 0.998696506023407,grad_norm: 0.9999991194099899, iteration: 190621
loss: 1.0104018449783325,grad_norm: 0.999999073579954, iteration: 190622
loss: 1.0107773542404175,grad_norm: 0.99999896127116, iteration: 190623
loss: 1.0487546920776367,grad_norm: 0.9999990750007464, iteration: 190624
loss: 1.0186283588409424,grad_norm: 0.9999990760259851, iteration: 190625
loss: 0.986548125743866,grad_norm: 0.9999991973902524, iteration: 190626
loss: 1.0305883884429932,grad_norm: 0.9509415895413623, iteration: 190627
loss: 0.9966776967048645,grad_norm: 0.9221098712247182, iteration: 190628
loss: 0.9765396118164062,grad_norm: 0.9999990115189431, iteration: 190629
loss: 0.9797778725624084,grad_norm: 0.9575925237586174, iteration: 190630
loss: 0.9636291265487671,grad_norm: 0.7705602818820595, iteration: 190631
loss: 0.9357057213783264,grad_norm: 0.9368677132137646, iteration: 190632
loss: 1.0164878368377686,grad_norm: 0.9496680521876123, iteration: 190633
loss: 0.9851348996162415,grad_norm: 0.9999990223558096, iteration: 190634
loss: 1.0202088356018066,grad_norm: 0.9999992382688389, iteration: 190635
loss: 1.0509390830993652,grad_norm: 0.9999997933571133, iteration: 190636
loss: 1.040537714958191,grad_norm: 0.9999995719627005, iteration: 190637
loss: 0.9820215106010437,grad_norm: 0.7868827932617608, iteration: 190638
loss: 1.0260963439941406,grad_norm: 0.999999283349748, iteration: 190639
loss: 1.0281555652618408,grad_norm: 0.9250352013928325, iteration: 190640
loss: 0.9905656576156616,grad_norm: 0.9357074422003879, iteration: 190641
loss: 1.0462446212768555,grad_norm: 0.99361837644405, iteration: 190642
loss: 0.9922911524772644,grad_norm: 0.9999989803979361, iteration: 190643
loss: 0.9767131209373474,grad_norm: 0.9999996608752535, iteration: 190644
loss: 1.0020064115524292,grad_norm: 0.7453624154546797, iteration: 190645
loss: 1.0005466938018799,grad_norm: 0.9999992753414346, iteration: 190646
loss: 0.9896312952041626,grad_norm: 0.8181947950186197, iteration: 190647
loss: 0.9982783794403076,grad_norm: 0.9104937012138306, iteration: 190648
loss: 1.0058493614196777,grad_norm: 0.9391997369460517, iteration: 190649
loss: 0.9726503491401672,grad_norm: 0.9999990511124501, iteration: 190650
loss: 1.0098494291305542,grad_norm: 0.9227408742126376, iteration: 190651
loss: 1.014900803565979,grad_norm: 0.9753702703541297, iteration: 190652
loss: 1.0212284326553345,grad_norm: 0.9999991834278338, iteration: 190653
loss: 1.0155729055404663,grad_norm: 0.9999991919491119, iteration: 190654
loss: 1.0204741954803467,grad_norm: 0.9999991818169015, iteration: 190655
loss: 1.008023977279663,grad_norm: 0.8509560129983982, iteration: 190656
loss: 0.9861119985580444,grad_norm: 0.9810661876301382, iteration: 190657
loss: 1.0380582809448242,grad_norm: 0.8347176115390128, iteration: 190658
loss: 1.0053032636642456,grad_norm: 0.9999990777974046, iteration: 190659
loss: 1.0002955198287964,grad_norm: 0.9999990848461792, iteration: 190660
loss: 0.9358242154121399,grad_norm: 0.9999991688242821, iteration: 190661
loss: 1.011857032775879,grad_norm: 0.9577628997847636, iteration: 190662
loss: 0.9629787802696228,grad_norm: 0.955457564731666, iteration: 190663
loss: 0.9698706269264221,grad_norm: 0.9295596918794471, iteration: 190664
loss: 0.9978190064430237,grad_norm: 0.9999990398291146, iteration: 190665
loss: 0.9901710748672485,grad_norm: 0.8919663414795908, iteration: 190666
loss: 1.0693904161453247,grad_norm: 0.9999989643333597, iteration: 190667
loss: 0.9832496643066406,grad_norm: 0.9999991117073197, iteration: 190668
loss: 1.0047986507415771,grad_norm: 0.9999994504531488, iteration: 190669
loss: 0.9679901599884033,grad_norm: 0.9029150309410265, iteration: 190670
loss: 1.0035932064056396,grad_norm: 0.990078772076236, iteration: 190671
loss: 0.9561795592308044,grad_norm: 0.9554372666115767, iteration: 190672
loss: 1.0300394296646118,grad_norm: 0.9999991118097713, iteration: 190673
loss: 0.983626663684845,grad_norm: 0.8956753276580166, iteration: 190674
loss: 0.9952986836433411,grad_norm: 0.8547104555540671, iteration: 190675
loss: 0.9829502105712891,grad_norm: 0.9991947147050244, iteration: 190676
loss: 1.0830111503601074,grad_norm: 0.9999992759785887, iteration: 190677
loss: 1.0314645767211914,grad_norm: 0.8987283764292098, iteration: 190678
loss: 0.9695402383804321,grad_norm: 0.9999990551488658, iteration: 190679
loss: 1.0114223957061768,grad_norm: 0.9249101314820143, iteration: 190680
loss: 1.014950156211853,grad_norm: 0.8762860101167034, iteration: 190681
loss: 1.138150691986084,grad_norm: 0.9999997027466532, iteration: 190682
loss: 1.025586485862732,grad_norm: 0.8870485817175683, iteration: 190683
loss: 1.0033292770385742,grad_norm: 0.9999989962185588, iteration: 190684
loss: 0.9842517971992493,grad_norm: 0.8694454054196591, iteration: 190685
loss: 0.9611490368843079,grad_norm: 0.8395049551745777, iteration: 190686
loss: 0.9672132134437561,grad_norm: 0.9986015787541833, iteration: 190687
loss: 1.015390157699585,grad_norm: 0.9999998624522837, iteration: 190688
loss: 1.0442546606063843,grad_norm: 0.9999992285071199, iteration: 190689
loss: 0.9762616157531738,grad_norm: 0.8321877204603427, iteration: 190690
loss: 0.9711095094680786,grad_norm: 0.999999079489109, iteration: 190691
loss: 0.9705058336257935,grad_norm: 0.8412085032925937, iteration: 190692
loss: 0.971570611000061,grad_norm: 0.9476727217299746, iteration: 190693
loss: 1.0423554182052612,grad_norm: 0.9379259449146794, iteration: 190694
loss: 0.9929479360580444,grad_norm: 0.9999991645111176, iteration: 190695
loss: 0.9653359055519104,grad_norm: 0.9999990906027013, iteration: 190696
loss: 0.979636013507843,grad_norm: 0.8854408004741401, iteration: 190697
loss: 1.00795316696167,grad_norm: 0.9089764220095503, iteration: 190698
loss: 0.9962020516395569,grad_norm: 0.9352533677153011, iteration: 190699
loss: 1.0025488138198853,grad_norm: 0.987969168014564, iteration: 190700
loss: 1.0253602266311646,grad_norm: 0.8008831155110825, iteration: 190701
loss: 1.019229769706726,grad_norm: 0.9999990245605954, iteration: 190702
loss: 0.9422262907028198,grad_norm: 0.9999997860505668, iteration: 190703
loss: 0.9876765012741089,grad_norm: 0.9999994332914492, iteration: 190704
loss: 1.003998041152954,grad_norm: 0.8507910019660837, iteration: 190705
loss: 0.9995825290679932,grad_norm: 0.9999992267624344, iteration: 190706
loss: 0.9973133206367493,grad_norm: 0.8532499308786022, iteration: 190707
loss: 1.009891390800476,grad_norm: 0.9999997325323668, iteration: 190708
loss: 1.0122493505477905,grad_norm: 0.9999991916278402, iteration: 190709
loss: 0.9968606233596802,grad_norm: 0.9247643496154376, iteration: 190710
loss: 1.0264387130737305,grad_norm: 0.9363596804291333, iteration: 190711
loss: 1.0350985527038574,grad_norm: 0.9060582222008092, iteration: 190712
loss: 1.0250673294067383,grad_norm: 0.9243828879081231, iteration: 190713
loss: 1.01524019241333,grad_norm: 0.8231776662957673, iteration: 190714
loss: 1.016672968864441,grad_norm: 0.9999991351420168, iteration: 190715
loss: 1.0414924621582031,grad_norm: 0.9999991013927735, iteration: 190716
loss: 0.9766455888748169,grad_norm: 0.9999995372466123, iteration: 190717
loss: 1.0284053087234497,grad_norm: 0.8301893960404164, iteration: 190718
loss: 0.9902976751327515,grad_norm: 0.9876094164048775, iteration: 190719
loss: 0.9876826405525208,grad_norm: 0.9150513188335745, iteration: 190720
loss: 1.0137910842895508,grad_norm: 0.999999202310311, iteration: 190721
loss: 1.0080175399780273,grad_norm: 0.9999991474358947, iteration: 190722
loss: 1.0109649896621704,grad_norm: 0.9131861825081327, iteration: 190723
loss: 0.9858248829841614,grad_norm: 0.9999990061835367, iteration: 190724
loss: 0.9806034564971924,grad_norm: 0.9871365238320191, iteration: 190725
loss: 0.9922909736633301,grad_norm: 0.9309250899635783, iteration: 190726
loss: 1.0231763124465942,grad_norm: 0.9999992440495216, iteration: 190727
loss: 1.029800295829773,grad_norm: 0.9513479258244192, iteration: 190728
loss: 0.995691180229187,grad_norm: 0.999999084235604, iteration: 190729
loss: 0.9967370629310608,grad_norm: 0.9999992293894864, iteration: 190730
loss: 1.0000536441802979,grad_norm: 0.9999991970365968, iteration: 190731
loss: 0.9772825241088867,grad_norm: 0.8666911359046413, iteration: 190732
loss: 0.9959848523139954,grad_norm: 0.7121976041759933, iteration: 190733
loss: 0.9721055030822754,grad_norm: 0.9790160439532317, iteration: 190734
loss: 1.006082534790039,grad_norm: 0.8719617739759038, iteration: 190735
loss: 1.0563660860061646,grad_norm: 0.9999993841130234, iteration: 190736
loss: 0.994080126285553,grad_norm: 0.99999907903879, iteration: 190737
loss: 0.9731913805007935,grad_norm: 0.9045822484603598, iteration: 190738
loss: 1.0163160562515259,grad_norm: 0.8370956885424449, iteration: 190739
loss: 0.9894186854362488,grad_norm: 0.9999991037036302, iteration: 190740
loss: 0.9997214078903198,grad_norm: 0.8669400860630939, iteration: 190741
loss: 0.9903143048286438,grad_norm: 0.9999990709192459, iteration: 190742
loss: 0.9951185584068298,grad_norm: 0.9059382169102708, iteration: 190743
loss: 0.9905014038085938,grad_norm: 0.8492198854138261, iteration: 190744
loss: 0.9924402832984924,grad_norm: 0.8334603688686781, iteration: 190745
loss: 0.9939565658569336,grad_norm: 0.9999991515940702, iteration: 190746
loss: 0.993798553943634,grad_norm: 0.9999989700434025, iteration: 190747
loss: 0.999758780002594,grad_norm: 0.8844943700362166, iteration: 190748
loss: 1.0065208673477173,grad_norm: 0.9999990693957685, iteration: 190749
loss: 1.0148380994796753,grad_norm: 0.8679583744993602, iteration: 190750
loss: 1.0259335041046143,grad_norm: 0.850680010407836, iteration: 190751
loss: 1.0225263833999634,grad_norm: 0.9699062321031424, iteration: 190752
loss: 0.9868771433830261,grad_norm: 0.9999998709256672, iteration: 190753
loss: 1.0109630823135376,grad_norm: 0.9999990417717493, iteration: 190754
loss: 1.0140330791473389,grad_norm: 0.9999990986237892, iteration: 190755
loss: 1.033491611480713,grad_norm: 0.9999991054831444, iteration: 190756
loss: 0.9788737893104553,grad_norm: 0.8738492610170392, iteration: 190757
loss: 0.9591491222381592,grad_norm: 0.9497042643598023, iteration: 190758
loss: 0.9929459095001221,grad_norm: 0.9999990826682563, iteration: 190759
loss: 1.0104656219482422,grad_norm: 0.9999991897168542, iteration: 190760
loss: 1.0265699625015259,grad_norm: 0.8748859275187022, iteration: 190761
loss: 1.0055382251739502,grad_norm: 0.9999990691977181, iteration: 190762
loss: 0.9656252861022949,grad_norm: 0.8408150877139844, iteration: 190763
loss: 0.9793975353240967,grad_norm: 0.999999070783613, iteration: 190764
loss: 1.0219956636428833,grad_norm: 0.9966503172866252, iteration: 190765
loss: 1.0429880619049072,grad_norm: 0.9999998538845195, iteration: 190766
loss: 0.967612624168396,grad_norm: 0.9656785007190486, iteration: 190767
loss: 1.0124189853668213,grad_norm: 0.860609481899336, iteration: 190768
loss: 0.999530553817749,grad_norm: 0.9999991397724086, iteration: 190769
loss: 1.0183383226394653,grad_norm: 0.892260911940247, iteration: 190770
loss: 1.0398753881454468,grad_norm: 0.9999999589213976, iteration: 190771
loss: 1.007463812828064,grad_norm: 0.8174627573799624, iteration: 190772
loss: 1.0984753370285034,grad_norm: 0.9075799455814416, iteration: 190773
loss: 0.9786034226417542,grad_norm: 0.8777209961832363, iteration: 190774
loss: 0.9838286638259888,grad_norm: 0.9999991955466301, iteration: 190775
loss: 1.0288749933242798,grad_norm: 0.9542221997032365, iteration: 190776
loss: 1.0092469453811646,grad_norm: 0.9999988825561265, iteration: 190777
loss: 0.9965050220489502,grad_norm: 0.9999997091477387, iteration: 190778
loss: 1.0003960132598877,grad_norm: 0.9999991249640702, iteration: 190779
loss: 0.9498832821846008,grad_norm: 0.9999992693420393, iteration: 190780
loss: 1.0095374584197998,grad_norm: 0.9999992680215832, iteration: 190781
loss: 1.0171085596084595,grad_norm: 0.9377175346903293, iteration: 190782
loss: 1.002447485923767,grad_norm: 0.9020621917272311, iteration: 190783
loss: 1.0157510042190552,grad_norm: 0.9999991557361747, iteration: 190784
loss: 0.9884034991264343,grad_norm: 0.9999991382727258, iteration: 190785
loss: 0.9723073244094849,grad_norm: 0.9958905543648041, iteration: 190786
loss: 1.0005035400390625,grad_norm: 0.9275827947271716, iteration: 190787
loss: 1.0567567348480225,grad_norm: 0.9999996590515104, iteration: 190788
loss: 1.00857675075531,grad_norm: 0.9626929492611449, iteration: 190789
loss: 1.0260018110275269,grad_norm: 0.9326319788929951, iteration: 190790
loss: 1.0325089693069458,grad_norm: 0.9999997447245518, iteration: 190791
loss: 1.0226589441299438,grad_norm: 0.9999993244337595, iteration: 190792
loss: 1.016337513923645,grad_norm: 0.9999992213902911, iteration: 190793
loss: 1.0400227308273315,grad_norm: 0.9999991365963975, iteration: 190794
loss: 0.9893496036529541,grad_norm: 0.9064775412825112, iteration: 190795
loss: 0.9934367537498474,grad_norm: 0.9999991574697428, iteration: 190796
loss: 1.140637993812561,grad_norm: 0.9999994322331814, iteration: 190797
loss: 0.9878033399581909,grad_norm: 0.8967196665071603, iteration: 190798
loss: 1.039445161819458,grad_norm: 0.9999997339770401, iteration: 190799
loss: 1.1407161951065063,grad_norm: 0.9999997534781133, iteration: 190800
loss: 0.9914861917495728,grad_norm: 0.896072116710149, iteration: 190801
loss: 1.0053433179855347,grad_norm: 0.9999989732319676, iteration: 190802
loss: 1.0016322135925293,grad_norm: 0.9999992559374736, iteration: 190803
loss: 0.9984627366065979,grad_norm: 0.9999996951907206, iteration: 190804
loss: 1.024085521697998,grad_norm: 0.9999996826755297, iteration: 190805
loss: 1.0311232805252075,grad_norm: 0.999999990459678, iteration: 190806
loss: 1.0018079280853271,grad_norm: 0.8963221276285797, iteration: 190807
loss: 1.0026795864105225,grad_norm: 0.9999994771099091, iteration: 190808
loss: 1.0227149724960327,grad_norm: 0.9414138298872323, iteration: 190809
loss: 1.013504981994629,grad_norm: 0.999999056464933, iteration: 190810
loss: 0.9935780763626099,grad_norm: 0.9999991524858175, iteration: 190811
loss: 1.0285509824752808,grad_norm: 0.9999990750342307, iteration: 190812
loss: 1.0532196760177612,grad_norm: 0.9999991103560409, iteration: 190813
loss: 1.0149800777435303,grad_norm: 0.8850530876411902, iteration: 190814
loss: 0.9925020337104797,grad_norm: 0.9908024357503602, iteration: 190815
loss: 0.9796284437179565,grad_norm: 0.9999992222596517, iteration: 190816
loss: 0.9536972641944885,grad_norm: 0.9999992045482909, iteration: 190817
loss: 1.0035887956619263,grad_norm: 0.862399861803219, iteration: 190818
loss: 1.0154147148132324,grad_norm: 0.9999991143802727, iteration: 190819
loss: 0.9833044409751892,grad_norm: 0.9999992649432236, iteration: 190820
loss: 0.9906978011131287,grad_norm: 0.8443422323492897, iteration: 190821
loss: 0.9892839193344116,grad_norm: 0.8218478579316276, iteration: 190822
loss: 0.9938653707504272,grad_norm: 0.8574998386162235, iteration: 190823
loss: 1.0063248872756958,grad_norm: 0.9999990745471983, iteration: 190824
loss: 0.9688591361045837,grad_norm: 0.9999989445945127, iteration: 190825
loss: 0.9871503710746765,grad_norm: 0.9878190347949085, iteration: 190826
loss: 0.978570282459259,grad_norm: 0.9999992022598282, iteration: 190827
loss: 0.9900585412979126,grad_norm: 0.9999991515448569, iteration: 190828
loss: 0.987983763217926,grad_norm: 0.8964341235521622, iteration: 190829
loss: 0.9839290976524353,grad_norm: 0.9999998916235889, iteration: 190830
loss: 1.0302633047103882,grad_norm: 0.987578658297526, iteration: 190831
loss: 0.9853194355964661,grad_norm: 0.9016729620484244, iteration: 190832
loss: 1.0173180103302002,grad_norm: 0.99999897016135, iteration: 190833
loss: 1.013801097869873,grad_norm: 0.9999992955079674, iteration: 190834
loss: 0.9621384739875793,grad_norm: 0.8918266009171967, iteration: 190835
loss: 0.9962846040725708,grad_norm: 0.9999990755431424, iteration: 190836
loss: 1.0050784349441528,grad_norm: 0.8565300179754938, iteration: 190837
loss: 1.0217432975769043,grad_norm: 0.8470421309346385, iteration: 190838
loss: 1.010685920715332,grad_norm: 0.8654681497906864, iteration: 190839
loss: 0.9683380722999573,grad_norm: 0.9200611916208558, iteration: 190840
loss: 0.9990726113319397,grad_norm: 0.9999992426128836, iteration: 190841
loss: 1.0158625841140747,grad_norm: 0.816453729794997, iteration: 190842
loss: 0.9935458302497864,grad_norm: 0.9689226750072605, iteration: 190843
loss: 0.9970656633377075,grad_norm: 0.9999998409603831, iteration: 190844
loss: 1.0210609436035156,grad_norm: 0.9481612573148658, iteration: 190845
loss: 0.9487471580505371,grad_norm: 0.9999991402398348, iteration: 190846
loss: 0.9996674060821533,grad_norm: 0.9791605157183051, iteration: 190847
loss: 0.9996972680091858,grad_norm: 0.9578173643906533, iteration: 190848
loss: 1.0313886404037476,grad_norm: 0.9618845544592356, iteration: 190849
loss: 0.9773696064949036,grad_norm: 0.999999085748848, iteration: 190850
loss: 0.9785060882568359,grad_norm: 0.9999990994807031, iteration: 190851
loss: 0.9932860136032104,grad_norm: 0.9999991153627498, iteration: 190852
loss: 0.9963895082473755,grad_norm: 0.8767352129135645, iteration: 190853
loss: 1.0200453996658325,grad_norm: 0.9689501172253205, iteration: 190854
loss: 0.9945461750030518,grad_norm: 0.9999989984487058, iteration: 190855
loss: 0.9700794219970703,grad_norm: 0.8370238435437052, iteration: 190856
loss: 1.0142210721969604,grad_norm: 0.7921407263890483, iteration: 190857
loss: 1.0304419994354248,grad_norm: 0.9999989192755068, iteration: 190858
loss: 1.005083441734314,grad_norm: 0.9999990143201806, iteration: 190859
loss: 0.9980847239494324,grad_norm: 0.7830344186676785, iteration: 190860
loss: 0.9700129628181458,grad_norm: 0.9443739539429273, iteration: 190861
loss: 1.0571380853652954,grad_norm: 0.9999995676771587, iteration: 190862
loss: 1.0114021301269531,grad_norm: 0.9999990559388143, iteration: 190863
loss: 0.9583906531333923,grad_norm: 0.9887471179038474, iteration: 190864
loss: 0.9999921917915344,grad_norm: 0.9999990105078791, iteration: 190865
loss: 0.9899061322212219,grad_norm: 0.9679040860578838, iteration: 190866
loss: 1.004266619682312,grad_norm: 0.9999991951374056, iteration: 190867
loss: 1.0190682411193848,grad_norm: 0.9704554079877283, iteration: 190868
loss: 1.031955361366272,grad_norm: 0.9999999481130476, iteration: 190869
loss: 1.043817162513733,grad_norm: 0.9793603068037187, iteration: 190870
loss: 1.0256662368774414,grad_norm: 0.9999992302784553, iteration: 190871
loss: 1.0190192461013794,grad_norm: 0.9999995309845219, iteration: 190872
loss: 1.02273690700531,grad_norm: 0.8738495139040021, iteration: 190873
loss: 1.0017629861831665,grad_norm: 0.9196751898721677, iteration: 190874
loss: 0.9852555394172668,grad_norm: 0.9902173262225576, iteration: 190875
loss: 0.9933320879936218,grad_norm: 0.9242201980274767, iteration: 190876
loss: 0.9426566958427429,grad_norm: 0.8664219859997235, iteration: 190877
loss: 0.9951315522193909,grad_norm: 0.8341857690792969, iteration: 190878
loss: 1.0329746007919312,grad_norm: 0.8816621680824862, iteration: 190879
loss: 1.0331255197525024,grad_norm: 0.9999992509920591, iteration: 190880
loss: 0.9661309719085693,grad_norm: 0.8838441319313567, iteration: 190881
loss: 0.9897651076316833,grad_norm: 0.907979275021928, iteration: 190882
loss: 1.0209156274795532,grad_norm: 0.8509376888673833, iteration: 190883
loss: 0.9844391942024231,grad_norm: 0.9999991471592105, iteration: 190884
loss: 1.025072455406189,grad_norm: 0.9999991782160209, iteration: 190885
loss: 0.9925020337104797,grad_norm: 0.8805772498653623, iteration: 190886
loss: 1.0131458044052124,grad_norm: 0.9999992000173115, iteration: 190887
loss: 1.0240135192871094,grad_norm: 0.9697093331028214, iteration: 190888
loss: 0.9856350421905518,grad_norm: 0.7221843268981955, iteration: 190889
loss: 1.0151548385620117,grad_norm: 0.9999995865075408, iteration: 190890
loss: 0.9738075733184814,grad_norm: 0.9253375696425803, iteration: 190891
loss: 1.0137438774108887,grad_norm: 0.8982317285151248, iteration: 190892
loss: 0.9938472509384155,grad_norm: 0.9999991311060713, iteration: 190893
loss: 0.9855747818946838,grad_norm: 0.9422987489026103, iteration: 190894
loss: 1.003085732460022,grad_norm: 0.9219295495341292, iteration: 190895
loss: 0.9986925721168518,grad_norm: 0.9223798782914925, iteration: 190896
loss: 1.0178301334381104,grad_norm: 0.9999992046172949, iteration: 190897
loss: 1.024827241897583,grad_norm: 0.9999993855424657, iteration: 190898
loss: 0.9835332036018372,grad_norm: 0.9999989583510523, iteration: 190899
loss: 1.018511414527893,grad_norm: 0.9999993078926235, iteration: 190900
loss: 1.0018993616104126,grad_norm: 0.9999989917661853, iteration: 190901
loss: 0.9454118609428406,grad_norm: 0.9999991434193957, iteration: 190902
loss: 1.0096490383148193,grad_norm: 0.9999990644209438, iteration: 190903
loss: 0.9766015410423279,grad_norm: 0.9418290175005449, iteration: 190904
loss: 1.001452922821045,grad_norm: 0.9999990840408949, iteration: 190905
loss: 0.9890092015266418,grad_norm: 0.9557519176219516, iteration: 190906
loss: 0.9852624535560608,grad_norm: 0.8515644126261336, iteration: 190907
loss: 0.9663239121437073,grad_norm: 0.9999991495991721, iteration: 190908
loss: 1.0342233180999756,grad_norm: 0.9916587716430378, iteration: 190909
loss: 0.9735517501831055,grad_norm: 0.9113947187099264, iteration: 190910
loss: 1.0576817989349365,grad_norm: 0.9999996156131289, iteration: 190911
loss: 0.9847773909568787,grad_norm: 0.923825381467182, iteration: 190912
loss: 0.992441713809967,grad_norm: 0.9631301426336969, iteration: 190913
loss: 1.0100657939910889,grad_norm: 0.9533829829724725, iteration: 190914
loss: 1.0102107524871826,grad_norm: 0.9380287501627945, iteration: 190915
loss: 1.0458345413208008,grad_norm: 0.999998975479033, iteration: 190916
loss: 1.0186021327972412,grad_norm: 0.9444543909210746, iteration: 190917
loss: 1.000669002532959,grad_norm: 0.9263546060214622, iteration: 190918
loss: 0.97639000415802,grad_norm: 0.9210833343717413, iteration: 190919
loss: 1.0322357416152954,grad_norm: 0.9999992072571235, iteration: 190920
loss: 1.029126524925232,grad_norm: 0.999999099780195, iteration: 190921
loss: 1.0049619674682617,grad_norm: 0.8578228189996102, iteration: 190922
loss: 0.9923083782196045,grad_norm: 0.9148241018833304, iteration: 190923
loss: 0.9948452711105347,grad_norm: 0.9999991104452397, iteration: 190924
loss: 1.0010031461715698,grad_norm: 0.8639152466834518, iteration: 190925
loss: 0.9645723700523376,grad_norm: 0.8074680639226536, iteration: 190926
loss: 0.9682608246803284,grad_norm: 0.9999992148541554, iteration: 190927
loss: 1.0245354175567627,grad_norm: 0.9999992440995187, iteration: 190928
loss: 1.0277658700942993,grad_norm: 0.9999991135089843, iteration: 190929
loss: 0.9977723956108093,grad_norm: 0.8995279889683823, iteration: 190930
loss: 0.997288703918457,grad_norm: 0.7793256335093772, iteration: 190931
loss: 1.0098729133605957,grad_norm: 0.9999991499123302, iteration: 190932
loss: 1.031297206878662,grad_norm: 0.9999996895417332, iteration: 190933
loss: 0.997567355632782,grad_norm: 0.8309329083910749, iteration: 190934
loss: 0.9643953442573547,grad_norm: 0.9999990332605765, iteration: 190935
loss: 1.0081217288970947,grad_norm: 0.9883558270191187, iteration: 190936
loss: 1.0103915929794312,grad_norm: 0.9464406940428426, iteration: 190937
loss: 1.016891360282898,grad_norm: 0.9025994261850058, iteration: 190938
loss: 1.025283932685852,grad_norm: 0.9091211566810282, iteration: 190939
loss: 0.978251039981842,grad_norm: 0.9807083124269648, iteration: 190940
loss: 1.0653501749038696,grad_norm: 0.9999992107598185, iteration: 190941
loss: 0.9710659980773926,grad_norm: 0.8981057261015156, iteration: 190942
loss: 0.9666926860809326,grad_norm: 0.8574050604047663, iteration: 190943
loss: 1.0225807428359985,grad_norm: 0.966942844981907, iteration: 190944
loss: 0.9456610679626465,grad_norm: 0.9311742893837537, iteration: 190945
loss: 0.9776895642280579,grad_norm: 0.9706301504489839, iteration: 190946
loss: 0.9937559962272644,grad_norm: 0.9999992308341276, iteration: 190947
loss: 0.9882478713989258,grad_norm: 0.8866518146149842, iteration: 190948
loss: 0.9686682224273682,grad_norm: 0.8427500108958886, iteration: 190949
loss: 0.9684886932373047,grad_norm: 0.9999990405007134, iteration: 190950
loss: 1.0172713994979858,grad_norm: 0.999999326650946, iteration: 190951
loss: 0.9997338652610779,grad_norm: 0.9999991167741246, iteration: 190952
loss: 0.9748558402061462,grad_norm: 0.8499420168135529, iteration: 190953
loss: 0.996607780456543,grad_norm: 0.9126600103795495, iteration: 190954
loss: 0.9766031503677368,grad_norm: 0.9999992003852556, iteration: 190955
loss: 0.9928947687149048,grad_norm: 0.7676027311928512, iteration: 190956
loss: 0.9993366003036499,grad_norm: 0.9999991328639652, iteration: 190957
loss: 0.9915288686752319,grad_norm: 0.8310781960503179, iteration: 190958
loss: 0.9937580823898315,grad_norm: 0.9999991379568173, iteration: 190959
loss: 1.0700916051864624,grad_norm: 0.9999998534979625, iteration: 190960
loss: 0.9725756645202637,grad_norm: 0.9965218139135201, iteration: 190961
loss: 1.012161135673523,grad_norm: 0.9999991457745334, iteration: 190962
loss: 0.9583409428596497,grad_norm: 0.9999991787893078, iteration: 190963
loss: 1.0241851806640625,grad_norm: 0.7963896785729134, iteration: 190964
loss: 1.008061170578003,grad_norm: 0.8981194637077735, iteration: 190965
loss: 0.9556395411491394,grad_norm: 0.9999992366999191, iteration: 190966
loss: 1.0472965240478516,grad_norm: 0.9045529097496197, iteration: 190967
loss: 1.030126929283142,grad_norm: 0.9313336650584466, iteration: 190968
loss: 0.975487470626831,grad_norm: 0.9956449092355512, iteration: 190969
loss: 1.0171464681625366,grad_norm: 0.9999992547020222, iteration: 190970
loss: 1.0292567014694214,grad_norm: 0.9999990917631004, iteration: 190971
loss: 0.9705866575241089,grad_norm: 0.9473354607824575, iteration: 190972
loss: 1.0008931159973145,grad_norm: 0.7942643976381438, iteration: 190973
loss: 0.9570538997650146,grad_norm: 0.8850230802048581, iteration: 190974
loss: 0.9806264042854309,grad_norm: 0.8268425087605014, iteration: 190975
loss: 0.9755334854125977,grad_norm: 0.8915491797486413, iteration: 190976
loss: 0.9677716493606567,grad_norm: 0.9780778114375709, iteration: 190977
loss: 1.016019344329834,grad_norm: 0.9751300516948944, iteration: 190978
loss: 1.0138087272644043,grad_norm: 0.9999991099918685, iteration: 190979
loss: 0.9852510094642639,grad_norm: 0.9999988922727922, iteration: 190980
loss: 0.9644919037818909,grad_norm: 0.9999991361870013, iteration: 190981
loss: 0.9811643958091736,grad_norm: 0.8796854823391619, iteration: 190982
loss: 0.9709106683731079,grad_norm: 0.9218029238086413, iteration: 190983
loss: 0.9611091017723083,grad_norm: 0.9999991084645778, iteration: 190984
loss: 0.95866459608078,grad_norm: 0.9059725828374378, iteration: 190985
loss: 0.9799838066101074,grad_norm: 0.9999991758848848, iteration: 190986
loss: 0.9949969053268433,grad_norm: 0.9006061970630601, iteration: 190987
loss: 1.0211176872253418,grad_norm: 0.9999992478304773, iteration: 190988
loss: 1.0345509052276611,grad_norm: 0.8520553141841587, iteration: 190989
loss: 1.0041221380233765,grad_norm: 0.9999990022918506, iteration: 190990
loss: 1.0203235149383545,grad_norm: 0.9999998869771636, iteration: 190991
loss: 0.9935186505317688,grad_norm: 0.854918689573774, iteration: 190992
loss: 1.0237475633621216,grad_norm: 0.8753682597629873, iteration: 190993
loss: 1.0254499912261963,grad_norm: 0.9210036612157024, iteration: 190994
loss: 0.9804669618606567,grad_norm: 0.9316219405774854, iteration: 190995
loss: 0.9456833600997925,grad_norm: 0.9105549668966484, iteration: 190996
loss: 1.0355385541915894,grad_norm: 0.9999991838821795, iteration: 190997
loss: 1.0330049991607666,grad_norm: 0.9999992809541827, iteration: 190998
loss: 1.0164233446121216,grad_norm: 0.9999992006942257, iteration: 190999
loss: 0.9632478356361389,grad_norm: 0.9999991679200044, iteration: 191000
loss: 1.0610606670379639,grad_norm: 0.9999999353775729, iteration: 191001
loss: 0.9740369915962219,grad_norm: 0.9656399564198119, iteration: 191002
loss: 1.002217173576355,grad_norm: 0.9452851164837008, iteration: 191003
loss: 1.0241270065307617,grad_norm: 0.9999990602448664, iteration: 191004
loss: 1.0735340118408203,grad_norm: 0.9999999351699722, iteration: 191005
loss: 0.9682086706161499,grad_norm: 0.9999988445014654, iteration: 191006
loss: 0.9821011424064636,grad_norm: 0.8745128877634541, iteration: 191007
loss: 1.0164800882339478,grad_norm: 0.999999080508885, iteration: 191008
loss: 1.0988205671310425,grad_norm: 0.9999993247958379, iteration: 191009
loss: 0.9895492792129517,grad_norm: 0.9999993412499486, iteration: 191010
loss: 1.203553557395935,grad_norm: 0.9999993434752551, iteration: 191011
loss: 1.060935139656067,grad_norm: 0.9999990604750592, iteration: 191012
loss: 0.9968447685241699,grad_norm: 0.8689090555519954, iteration: 191013
loss: 1.021493673324585,grad_norm: 0.9999992359801968, iteration: 191014
loss: 0.9832392334938049,grad_norm: 0.9999992001729541, iteration: 191015
loss: 1.0498950481414795,grad_norm: 0.9999992182344515, iteration: 191016
loss: 1.0314594507217407,grad_norm: 0.9999992336886552, iteration: 191017
loss: 1.117262363433838,grad_norm: 0.9999997581325162, iteration: 191018
loss: 1.047935128211975,grad_norm: 0.9999990537488945, iteration: 191019
loss: 1.069663405418396,grad_norm: 0.9130525171836529, iteration: 191020
loss: 0.9545834064483643,grad_norm: 0.9999992008442681, iteration: 191021
loss: 1.0211296081542969,grad_norm: 0.8808680259049069, iteration: 191022
loss: 1.2251592874526978,grad_norm: 0.9999992490198738, iteration: 191023
loss: 0.9965684413909912,grad_norm: 0.8366334382515612, iteration: 191024
loss: 0.9949876666069031,grad_norm: 0.9999990206021564, iteration: 191025
loss: 0.9924783110618591,grad_norm: 0.8238783171542003, iteration: 191026
loss: 1.0821985006332397,grad_norm: 0.9999999561391271, iteration: 191027
loss: 1.0084251165390015,grad_norm: 0.9313622489010154, iteration: 191028
loss: 1.1352379322052002,grad_norm: 0.9999994961001286, iteration: 191029
loss: 0.9703308343887329,grad_norm: 0.9999991791817617, iteration: 191030
loss: 1.0996015071868896,grad_norm: 0.9999991526968255, iteration: 191031
loss: 1.0978567600250244,grad_norm: 0.999999937127308, iteration: 191032
loss: 1.132727861404419,grad_norm: 0.999999917671548, iteration: 191033
loss: 1.0450905561447144,grad_norm: 0.9999994622614633, iteration: 191034
loss: 1.0901669263839722,grad_norm: 0.9999995692427085, iteration: 191035
loss: 1.0077916383743286,grad_norm: 0.9631964367998229, iteration: 191036
loss: 0.9797691106796265,grad_norm: 0.9684410493902609, iteration: 191037
loss: 1.2766629457473755,grad_norm: 0.999999903707061, iteration: 191038
loss: 1.1695070266723633,grad_norm: 0.999999640429748, iteration: 191039
loss: 1.0046812295913696,grad_norm: 0.8221147766737955, iteration: 191040
loss: 1.289309024810791,grad_norm: 0.9999994945734602, iteration: 191041
loss: 1.0375851392745972,grad_norm: 0.9999991075354105, iteration: 191042
loss: 1.0598788261413574,grad_norm: 1.000000135947269, iteration: 191043
loss: 1.0016676187515259,grad_norm: 0.901573138700925, iteration: 191044
loss: 1.0048717260360718,grad_norm: 0.9111125648129474, iteration: 191045
loss: 1.0179765224456787,grad_norm: 0.9600336805976021, iteration: 191046
loss: 1.0084617137908936,grad_norm: 0.9883301489821389, iteration: 191047
loss: 1.0405837297439575,grad_norm: 0.999999601802503, iteration: 191048
loss: 1.0491024255752563,grad_norm: 0.9999992048708489, iteration: 191049
loss: 1.044098973274231,grad_norm: 0.9999989761353411, iteration: 191050
loss: 1.0321226119995117,grad_norm: 0.999999046695861, iteration: 191051
loss: 1.0865416526794434,grad_norm: 0.9999997400254372, iteration: 191052
loss: 0.9777113795280457,grad_norm: 0.8890290831992528, iteration: 191053
loss: 1.0550156831741333,grad_norm: 0.9999998979472582, iteration: 191054
loss: 1.0326087474822998,grad_norm: 0.7935068868100195, iteration: 191055
loss: 0.9752679467201233,grad_norm: 0.9999990368634325, iteration: 191056
loss: 1.0141112804412842,grad_norm: 0.771027121074334, iteration: 191057
loss: 1.0370452404022217,grad_norm: 0.9999994634970962, iteration: 191058
loss: 1.0181572437286377,grad_norm: 0.9999991916667539, iteration: 191059
loss: 0.9783552885055542,grad_norm: 0.9999992502238887, iteration: 191060
loss: 1.038953185081482,grad_norm: 0.9565112272282109, iteration: 191061
loss: 1.0106381177902222,grad_norm: 0.9999997550360267, iteration: 191062
loss: 1.0460104942321777,grad_norm: 0.99999966524107, iteration: 191063
loss: 1.0078924894332886,grad_norm: 0.9663388807183109, iteration: 191064
loss: 1.020501732826233,grad_norm: 0.8955177486575103, iteration: 191065
loss: 0.9760074019432068,grad_norm: 0.9206518108936291, iteration: 191066
loss: 0.9995055198669434,grad_norm: 0.9729942074486091, iteration: 191067
loss: 0.9986438155174255,grad_norm: 0.9999991624757909, iteration: 191068
loss: 1.0173335075378418,grad_norm: 0.9999990622519089, iteration: 191069
loss: 1.0111758708953857,grad_norm: 0.9999994894473998, iteration: 191070
loss: 0.9921512007713318,grad_norm: 0.8918747791354393, iteration: 191071
loss: 1.0018548965454102,grad_norm: 0.9999995613464125, iteration: 191072
loss: 1.0521682500839233,grad_norm: 0.9999995906293533, iteration: 191073
loss: 1.0360041856765747,grad_norm: 0.9999994813990246, iteration: 191074
loss: 1.0363101959228516,grad_norm: 0.9999991549208407, iteration: 191075
loss: 0.9876753687858582,grad_norm: 0.8292329485939501, iteration: 191076
loss: 1.007044792175293,grad_norm: 0.9999991001500402, iteration: 191077
loss: 1.0425962209701538,grad_norm: 0.9999992959542898, iteration: 191078
loss: 0.9873486757278442,grad_norm: 0.9086274926522856, iteration: 191079
loss: 1.03294837474823,grad_norm: 0.9999997607439259, iteration: 191080
loss: 1.0143852233886719,grad_norm: 0.9999991282206712, iteration: 191081
loss: 1.0199569463729858,grad_norm: 0.9999995075245759, iteration: 191082
loss: 0.9856292009353638,grad_norm: 0.999999249880436, iteration: 191083
loss: 1.0738335847854614,grad_norm: 0.9999996109249093, iteration: 191084
loss: 1.0405933856964111,grad_norm: 0.9999996540852223, iteration: 191085
loss: 0.9807620048522949,grad_norm: 0.9954665375826355, iteration: 191086
loss: 0.9898025989532471,grad_norm: 0.999999458228718, iteration: 191087
loss: 1.2007287740707397,grad_norm: 0.9999996476199108, iteration: 191088
loss: 0.9872163534164429,grad_norm: 0.8389849736032262, iteration: 191089
loss: 0.9850336313247681,grad_norm: 0.9497886835716143, iteration: 191090
loss: 1.0336555242538452,grad_norm: 0.919700186225679, iteration: 191091
loss: 1.004770278930664,grad_norm: 0.9878399594586084, iteration: 191092
loss: 1.0448483228683472,grad_norm: 0.9999990875926013, iteration: 191093
loss: 0.9984686374664307,grad_norm: 0.8770854667899795, iteration: 191094
loss: 0.989457905292511,grad_norm: 0.9126753109588893, iteration: 191095
loss: 0.966884195804596,grad_norm: 0.9999990793736405, iteration: 191096
loss: 1.023589849472046,grad_norm: 0.9999992022741492, iteration: 191097
loss: 1.0176126956939697,grad_norm: 0.9999994086206739, iteration: 191098
loss: 1.0071299076080322,grad_norm: 0.999999086172196, iteration: 191099
loss: 1.0239752531051636,grad_norm: 0.9999993119481517, iteration: 191100
loss: 1.0003834962844849,grad_norm: 0.9999993991960547, iteration: 191101
loss: 1.0363425016403198,grad_norm: 0.9999993398499218, iteration: 191102
loss: 1.0123176574707031,grad_norm: 0.9999990494565213, iteration: 191103
loss: 0.9416775107383728,grad_norm: 0.9999990734654447, iteration: 191104
loss: 1.013720989227295,grad_norm: 0.8722330876978865, iteration: 191105
loss: 1.0688163042068481,grad_norm: 0.9999997630063368, iteration: 191106
loss: 1.0000178813934326,grad_norm: 0.9999991981626559, iteration: 191107
loss: 1.0272057056427002,grad_norm: 0.9431826173575905, iteration: 191108
loss: 0.9831921458244324,grad_norm: 0.9999991551971161, iteration: 191109
loss: 0.9601412415504456,grad_norm: 0.999999152732268, iteration: 191110
loss: 1.009146809577942,grad_norm: 0.9999993200682148, iteration: 191111
loss: 0.9741789698600769,grad_norm: 0.9999991713125261, iteration: 191112
loss: 1.0094225406646729,grad_norm: 0.9999990397964114, iteration: 191113
loss: 0.9972714185714722,grad_norm: 0.9161125939029998, iteration: 191114
loss: 0.967261791229248,grad_norm: 0.9384274258691411, iteration: 191115
loss: 1.01494562625885,grad_norm: 0.9999992148877905, iteration: 191116
loss: 0.9902578592300415,grad_norm: 0.7835506121984195, iteration: 191117
loss: 1.0228327512741089,grad_norm: 0.9999997242049155, iteration: 191118
loss: 1.0067247152328491,grad_norm: 0.8493671374459378, iteration: 191119
loss: 1.051335096359253,grad_norm: 1.000000002433788, iteration: 191120
loss: 1.014362096786499,grad_norm: 0.9005553145753327, iteration: 191121
loss: 1.0205918550491333,grad_norm: 0.9999992173174876, iteration: 191122
loss: 0.9791924357414246,grad_norm: 0.9999992206635461, iteration: 191123
loss: 1.0156724452972412,grad_norm: 0.8732544855724117, iteration: 191124
loss: 1.0232049226760864,grad_norm: 0.912254513953999, iteration: 191125
loss: 0.9843122959136963,grad_norm: 0.9717803648714339, iteration: 191126
loss: 1.0191570520401,grad_norm: 0.8216633564999539, iteration: 191127
loss: 0.9789595603942871,grad_norm: 0.9999994136392814, iteration: 191128
loss: 0.9793466925621033,grad_norm: 0.9759470276979824, iteration: 191129
loss: 0.9829250574111938,grad_norm: 0.95172110437656, iteration: 191130
loss: 1.1390923261642456,grad_norm: 0.9999990566186097, iteration: 191131
loss: 1.0046360492706299,grad_norm: 0.9164706952014523, iteration: 191132
loss: 0.9784777164459229,grad_norm: 0.9999990822170048, iteration: 191133
loss: 1.0183520317077637,grad_norm: 0.8493690978222626, iteration: 191134
loss: 1.0164490938186646,grad_norm: 0.9999990992215964, iteration: 191135
loss: 0.999123752117157,grad_norm: 0.9999991736736845, iteration: 191136
loss: 0.9926279187202454,grad_norm: 0.8969337425923031, iteration: 191137
loss: 1.000701665878296,grad_norm: 0.9919274328578457, iteration: 191138
loss: 1.0126086473464966,grad_norm: 0.8565872901517078, iteration: 191139
loss: 1.002851963043213,grad_norm: 0.9504724539330319, iteration: 191140
loss: 1.012406349182129,grad_norm: 0.8429901502878149, iteration: 191141
loss: 1.0155587196350098,grad_norm: 0.9999991068591556, iteration: 191142
loss: 0.9977678060531616,grad_norm: 0.9999990814380981, iteration: 191143
loss: 1.0357123613357544,grad_norm: 0.9999997982774108, iteration: 191144
loss: 1.1104284524917603,grad_norm: 0.9999990516682723, iteration: 191145
loss: 0.995105504989624,grad_norm: 0.9656337274862032, iteration: 191146
loss: 1.0329043865203857,grad_norm: 0.9999990385574209, iteration: 191147
loss: 1.0346744060516357,grad_norm: 0.999999464695719, iteration: 191148
loss: 0.9683781266212463,grad_norm: 0.8854470788644391, iteration: 191149
loss: 0.9931102991104126,grad_norm: 0.9999989460169647, iteration: 191150
loss: 1.0180317163467407,grad_norm: 0.9082997125156717, iteration: 191151
loss: 1.0164586305618286,grad_norm: 0.8761629686240513, iteration: 191152
loss: 1.0453009605407715,grad_norm: 0.8476661532332177, iteration: 191153
loss: 0.9953463077545166,grad_norm: 0.9013968136618482, iteration: 191154
loss: 0.9954459071159363,grad_norm: 0.9999989872419419, iteration: 191155
loss: 1.0339637994766235,grad_norm: 0.9999992216552921, iteration: 191156
loss: 1.03995943069458,grad_norm: 0.999999921534178, iteration: 191157
loss: 0.9772962331771851,grad_norm: 0.8651818712833566, iteration: 191158
loss: 1.0178778171539307,grad_norm: 0.9776457527678009, iteration: 191159
loss: 1.0293039083480835,grad_norm: 0.8267166935953838, iteration: 191160
loss: 1.1638414859771729,grad_norm: 0.9999993887595654, iteration: 191161
loss: 0.9922111630439758,grad_norm: 0.9999992829105362, iteration: 191162
loss: 1.0386500358581543,grad_norm: 0.8971157213401022, iteration: 191163
loss: 1.040634274482727,grad_norm: 0.8782057533578941, iteration: 191164
loss: 0.9835733771324158,grad_norm: 0.9999990716680199, iteration: 191165
loss: 1.0254052877426147,grad_norm: 0.9038455644547629, iteration: 191166
loss: 0.9944384098052979,grad_norm: 0.9355403278700837, iteration: 191167
loss: 1.0222212076187134,grad_norm: 0.888073231419476, iteration: 191168
loss: 1.0114344358444214,grad_norm: 1.0000000044621176, iteration: 191169
loss: 1.0955824851989746,grad_norm: 0.9999994626697628, iteration: 191170
loss: 1.030642032623291,grad_norm: 0.9999991899338774, iteration: 191171
loss: 0.989389955997467,grad_norm: 0.8341511721704684, iteration: 191172
loss: 1.0411068201065063,grad_norm: 0.9999991858403926, iteration: 191173
loss: 1.0372053384780884,grad_norm: 0.9999992299741687, iteration: 191174
loss: 0.9887951612472534,grad_norm: 0.7660268499722793, iteration: 191175
loss: 1.0317355394363403,grad_norm: 0.9999996607925438, iteration: 191176
loss: 1.0061818361282349,grad_norm: 0.9999991388930881, iteration: 191177
loss: 1.0007165670394897,grad_norm: 0.9120622518788727, iteration: 191178
loss: 1.0219111442565918,grad_norm: 0.9999992973944111, iteration: 191179
loss: 1.0206507444381714,grad_norm: 0.9703301536000051, iteration: 191180
loss: 0.9799641370773315,grad_norm: 0.9960392376087162, iteration: 191181
loss: 1.0074807405471802,grad_norm: 0.8777821794182534, iteration: 191182
loss: 1.0137425661087036,grad_norm: 0.8087609659019439, iteration: 191183
loss: 1.0628955364227295,grad_norm: 0.9999992203278091, iteration: 191184
loss: 0.9917246699333191,grad_norm: 0.9999991083725591, iteration: 191185
loss: 1.0236175060272217,grad_norm: 0.9947615497258822, iteration: 191186
loss: 1.0728156566619873,grad_norm: 0.9999999917682546, iteration: 191187
loss: 0.9989810585975647,grad_norm: 0.8750420224236728, iteration: 191188
loss: 1.0410405397415161,grad_norm: 0.7635528527003915, iteration: 191189
loss: 0.9789937138557434,grad_norm: 0.9885462923568517, iteration: 191190
loss: 1.0114257335662842,grad_norm: 0.766619457284816, iteration: 191191
loss: 0.9916564226150513,grad_norm: 0.9254017546187454, iteration: 191192
loss: 1.0064340829849243,grad_norm: 0.9616532255148971, iteration: 191193
loss: 0.9998438954353333,grad_norm: 0.9999996577787255, iteration: 191194
loss: 1.023161768913269,grad_norm: 0.999999945091278, iteration: 191195
loss: 0.9747097492218018,grad_norm: 0.8372726917292036, iteration: 191196
loss: 0.9925165772438049,grad_norm: 0.9914533794787582, iteration: 191197
loss: 0.9792592525482178,grad_norm: 0.7412777930359054, iteration: 191198
loss: 0.9784225225448608,grad_norm: 0.9999992135511244, iteration: 191199
loss: 1.0192488431930542,grad_norm: 1.000000008645796, iteration: 191200
loss: 0.9853174686431885,grad_norm: 0.8789101085872604, iteration: 191201
loss: 1.037641167640686,grad_norm: 0.9999992053350465, iteration: 191202
loss: 0.9735414981842041,grad_norm: 0.9851099225967819, iteration: 191203
loss: 0.9846515655517578,grad_norm: 0.9528383792896213, iteration: 191204
loss: 1.0216107368469238,grad_norm: 0.827127324102938, iteration: 191205
loss: 1.0268405675888062,grad_norm: 0.9999991412388811, iteration: 191206
loss: 1.0161277055740356,grad_norm: 0.8876489517062122, iteration: 191207
loss: 1.0169146060943604,grad_norm: 0.9999990053311995, iteration: 191208
loss: 0.9661685228347778,grad_norm: 0.9999992318956955, iteration: 191209
loss: 0.9771804213523865,grad_norm: 0.9999988745108067, iteration: 191210
loss: 1.0052436590194702,grad_norm: 0.9999998308015405, iteration: 191211
loss: 0.9923915266990662,grad_norm: 0.9999989924489912, iteration: 191212
loss: 1.0256034135818481,grad_norm: 0.9999990124729032, iteration: 191213
loss: 0.9842857718467712,grad_norm: 0.9999990862337498, iteration: 191214
loss: 1.0004830360412598,grad_norm: 0.8329996069978604, iteration: 191215
loss: 1.0074399709701538,grad_norm: 0.952171730714193, iteration: 191216
loss: 0.9630809426307678,grad_norm: 0.9492035035265308, iteration: 191217
loss: 1.0132246017456055,grad_norm: 0.9530014470471366, iteration: 191218
loss: 0.9873846173286438,grad_norm: 0.9999994121222658, iteration: 191219
loss: 0.9936972260475159,grad_norm: 0.841301868051087, iteration: 191220
loss: 1.003512978553772,grad_norm: 0.8223692539693694, iteration: 191221
loss: 1.000010371208191,grad_norm: 0.9812306433986998, iteration: 191222
loss: 0.9765532612800598,grad_norm: 0.848453545945678, iteration: 191223
loss: 1.0188252925872803,grad_norm: 0.8419367478700759, iteration: 191224
loss: 1.0176197290420532,grad_norm: 0.9999990646957159, iteration: 191225
loss: 1.0246894359588623,grad_norm: 0.9975331785266106, iteration: 191226
loss: 0.9921188950538635,grad_norm: 0.7757011919276324, iteration: 191227
loss: 0.9828304648399353,grad_norm: 0.9999992173235145, iteration: 191228
loss: 0.9893715381622314,grad_norm: 0.9999990909215425, iteration: 191229
loss: 0.9779188632965088,grad_norm: 0.967780833514534, iteration: 191230
loss: 0.9927552938461304,grad_norm: 0.9999991423585881, iteration: 191231
loss: 1.004804253578186,grad_norm: 0.9999990308113316, iteration: 191232
loss: 1.0124744176864624,grad_norm: 0.9534075922503812, iteration: 191233
loss: 1.0010645389556885,grad_norm: 0.9976496595859458, iteration: 191234
loss: 0.984740138053894,grad_norm: 0.9668141182453361, iteration: 191235
loss: 1.003469467163086,grad_norm: 0.9999988975343482, iteration: 191236
loss: 0.9851892590522766,grad_norm: 0.9999993774304641, iteration: 191237
loss: 0.9760743975639343,grad_norm: 0.9083436286231886, iteration: 191238
loss: 1.0197478532791138,grad_norm: 0.9227157766976458, iteration: 191239
loss: 1.0305888652801514,grad_norm: 0.9999990333387444, iteration: 191240
loss: 0.9633346199989319,grad_norm: 0.9976615129573129, iteration: 191241
loss: 0.9914779663085938,grad_norm: 0.9999989724586547, iteration: 191242
loss: 0.9774325489997864,grad_norm: 0.9999991553524289, iteration: 191243
loss: 1.106767177581787,grad_norm: 0.9999991209233383, iteration: 191244
loss: 1.0190036296844482,grad_norm: 0.7553061606973863, iteration: 191245
loss: 1.0399178266525269,grad_norm: 0.9378859887781971, iteration: 191246
loss: 1.020912528038025,grad_norm: 0.9478393160790213, iteration: 191247
loss: 1.0475913286209106,grad_norm: 0.9597635643081546, iteration: 191248
loss: 1.009906530380249,grad_norm: 0.9999990497336384, iteration: 191249
loss: 0.9734141826629639,grad_norm: 0.999999265644309, iteration: 191250
loss: 1.0451873540878296,grad_norm: 0.9999991662355863, iteration: 191251
loss: 0.9869444370269775,grad_norm: 0.9316810956521536, iteration: 191252
loss: 0.9644905924797058,grad_norm: 0.9999990975032054, iteration: 191253
loss: 0.9995945692062378,grad_norm: 0.9754529780315948, iteration: 191254
loss: 0.9997183084487915,grad_norm: 0.9037949857831026, iteration: 191255
loss: 1.0428653955459595,grad_norm: 0.999999178184103, iteration: 191256
loss: 1.0019410848617554,grad_norm: 0.9999990787754974, iteration: 191257
loss: 1.0102746486663818,grad_norm: 0.9999992174981938, iteration: 191258
loss: 1.0367640256881714,grad_norm: 0.9999994216781258, iteration: 191259
loss: 1.0126703977584839,grad_norm: 0.9999994613131613, iteration: 191260
loss: 0.9860993027687073,grad_norm: 0.9826458463665593, iteration: 191261
loss: 1.014419436454773,grad_norm: 0.9999990750632843, iteration: 191262
loss: 1.0430948734283447,grad_norm: 0.9999995129156164, iteration: 191263
loss: 1.0143260955810547,grad_norm: 0.9999992545806193, iteration: 191264
loss: 1.0238642692565918,grad_norm: 0.8774806613633122, iteration: 191265
loss: 1.051179051399231,grad_norm: 0.9999990641910296, iteration: 191266
loss: 0.9986007213592529,grad_norm: 0.9999993613809429, iteration: 191267
loss: 1.0212938785552979,grad_norm: 0.8077705427417, iteration: 191268
loss: 1.0064278841018677,grad_norm: 0.9999564556717936, iteration: 191269
loss: 1.0358834266662598,grad_norm: 0.9999990904067892, iteration: 191270
loss: 1.0549437999725342,grad_norm: 0.999999669804191, iteration: 191271
loss: 1.024193286895752,grad_norm: 0.9999991880701711, iteration: 191272
loss: 0.9697157144546509,grad_norm: 0.9999991723493494, iteration: 191273
loss: 0.9806197285652161,grad_norm: 0.9561573740889588, iteration: 191274
loss: 0.9964054822921753,grad_norm: 0.9999991142673461, iteration: 191275
loss: 0.9977020025253296,grad_norm: 0.999999191038157, iteration: 191276
loss: 0.9973321557044983,grad_norm: 0.871220694320496, iteration: 191277
loss: 1.0148433446884155,grad_norm: 0.9999989648869657, iteration: 191278
loss: 1.0499558448791504,grad_norm: 0.999999184672519, iteration: 191279
loss: 1.0041844844818115,grad_norm: 0.8827909884064862, iteration: 191280
loss: 0.9761888384819031,grad_norm: 0.8439039900893491, iteration: 191281
loss: 1.048264980316162,grad_norm: 0.9999995768489174, iteration: 191282
loss: 1.0001130104064941,grad_norm: 0.8742752528129554, iteration: 191283
loss: 1.004264235496521,grad_norm: 0.812479452990563, iteration: 191284
loss: 1.021284818649292,grad_norm: 0.9571798545604533, iteration: 191285
loss: 1.0447190999984741,grad_norm: 0.9999990253030067, iteration: 191286
loss: 1.0303397178649902,grad_norm: 0.8945183325763985, iteration: 191287
loss: 1.0113707780838013,grad_norm: 0.9762844783587306, iteration: 191288
loss: 1.007091760635376,grad_norm: 0.8961531339186275, iteration: 191289
loss: 1.0001347064971924,grad_norm: 0.9507506287345491, iteration: 191290
loss: 0.9813039898872375,grad_norm: 0.8635289965259867, iteration: 191291
loss: 0.9857566356658936,grad_norm: 0.9354697173979533, iteration: 191292
loss: 0.9755910038948059,grad_norm: 0.8788835681418868, iteration: 191293
loss: 0.9976277947425842,grad_norm: 0.9806550023779461, iteration: 191294
loss: 1.0256719589233398,grad_norm: 0.989999496487593, iteration: 191295
loss: 0.9926910400390625,grad_norm: 0.9999992373580664, iteration: 191296
loss: 1.0053659677505493,grad_norm: 0.8772100052025505, iteration: 191297
loss: 0.9920811057090759,grad_norm: 0.9999991671593201, iteration: 191298
loss: 0.9759756326675415,grad_norm: 0.9999997883001344, iteration: 191299
loss: 0.9928554892539978,grad_norm: 0.9537632016431782, iteration: 191300
loss: 1.0027180910110474,grad_norm: 0.9999991094676876, iteration: 191301
loss: 1.0048952102661133,grad_norm: 0.9999991543507039, iteration: 191302
loss: 1.0157190561294556,grad_norm: 0.9109634715940559, iteration: 191303
loss: 0.9965800046920776,grad_norm: 0.9334589972550796, iteration: 191304
loss: 1.0023088455200195,grad_norm: 0.9920606183123535, iteration: 191305
loss: 0.9832029938697815,grad_norm: 0.771720868308004, iteration: 191306
loss: 0.9970977306365967,grad_norm: 0.9549711208289061, iteration: 191307
loss: 0.9898617267608643,grad_norm: 0.8328735777003973, iteration: 191308
loss: 1.0362999439239502,grad_norm: 0.9999993668340119, iteration: 191309
loss: 1.0111944675445557,grad_norm: 0.8967035213820969, iteration: 191310
loss: 0.9973036646842957,grad_norm: 0.8548485220102474, iteration: 191311
loss: 1.0137040615081787,grad_norm: 0.9650165380934612, iteration: 191312
loss: 1.0122950077056885,grad_norm: 0.9999990654336935, iteration: 191313
loss: 1.3417659997940063,grad_norm: 0.9999993916895219, iteration: 191314
loss: 0.9791944026947021,grad_norm: 0.8477516911977812, iteration: 191315
loss: 0.9989666938781738,grad_norm: 0.8057478044858938, iteration: 191316
loss: 0.9687384963035583,grad_norm: 0.999999213264083, iteration: 191317
loss: 1.008616328239441,grad_norm: 0.968392891051948, iteration: 191318
loss: 1.0102019309997559,grad_norm: 0.9253300679020361, iteration: 191319
loss: 0.9636083245277405,grad_norm: 0.9879952972696706, iteration: 191320
loss: 1.0233502388000488,grad_norm: 0.8299196771057994, iteration: 191321
loss: 0.9781423211097717,grad_norm: 0.9999991344998651, iteration: 191322
loss: 0.9809623956680298,grad_norm: 0.9059331829086891, iteration: 191323
loss: 0.9665492177009583,grad_norm: 0.7944567157432595, iteration: 191324
loss: 1.0121148824691772,grad_norm: 0.9999991423802194, iteration: 191325
loss: 1.010303020477295,grad_norm: 0.9999991064291426, iteration: 191326
loss: 0.9664164185523987,grad_norm: 0.9399973313945481, iteration: 191327
loss: 1.0701979398727417,grad_norm: 0.9999998152611752, iteration: 191328
loss: 1.0834821462631226,grad_norm: 0.8472908085195654, iteration: 191329
loss: 0.9515885710716248,grad_norm: 0.9999991394193535, iteration: 191330
loss: 0.9971677660942078,grad_norm: 0.849336577651099, iteration: 191331
loss: 1.0016571283340454,grad_norm: 0.9999998437337395, iteration: 191332
loss: 0.9829939007759094,grad_norm: 0.9385901624305435, iteration: 191333
loss: 1.0260553359985352,grad_norm: 0.996455207472395, iteration: 191334
loss: 1.0052359104156494,grad_norm: 0.9999993828579189, iteration: 191335
loss: 0.9742162227630615,grad_norm: 0.9128844067856026, iteration: 191336
loss: 0.9753887057304382,grad_norm: 0.9234453838838108, iteration: 191337
loss: 1.0148899555206299,grad_norm: 0.9600726363053659, iteration: 191338
loss: 0.9730525612831116,grad_norm: 0.9999991757849566, iteration: 191339
loss: 1.0364832878112793,grad_norm: 0.9999990684201436, iteration: 191340
loss: 0.9907086491584778,grad_norm: 0.8713694629751534, iteration: 191341
loss: 1.0201257467269897,grad_norm: 0.999999030358331, iteration: 191342
loss: 1.011594533920288,grad_norm: 0.906568858378522, iteration: 191343
loss: 1.069563627243042,grad_norm: 0.9999990487916952, iteration: 191344
loss: 0.965600311756134,grad_norm: 0.9918991699048264, iteration: 191345
loss: 0.9963741302490234,grad_norm: 0.999998987446151, iteration: 191346
loss: 1.0040841102600098,grad_norm: 0.8287348363900781, iteration: 191347
loss: 1.0010660886764526,grad_norm: 0.9999990060665991, iteration: 191348
loss: 0.9861404299736023,grad_norm: 0.9712733146897126, iteration: 191349
loss: 0.9747133851051331,grad_norm: 0.9526401984457182, iteration: 191350
loss: 1.0080788135528564,grad_norm: 0.9999996430102387, iteration: 191351
loss: 1.043203592300415,grad_norm: 0.9999989667495971, iteration: 191352
loss: 0.9964617490768433,grad_norm: 0.9957371305727223, iteration: 191353
loss: 0.9762091636657715,grad_norm: 0.9999991045467445, iteration: 191354
loss: 1.0486754179000854,grad_norm: 0.999999928220303, iteration: 191355
loss: 1.0251787900924683,grad_norm: 0.9999990862646212, iteration: 191356
loss: 1.0405861139297485,grad_norm: 0.9971660154786293, iteration: 191357
loss: 1.1841232776641846,grad_norm: 0.999999910925218, iteration: 191358
loss: 1.1053794622421265,grad_norm: 0.9999996872140957, iteration: 191359
loss: 1.0188459157943726,grad_norm: 0.9999999399704085, iteration: 191360
loss: 0.9900535941123962,grad_norm: 0.9416713238027042, iteration: 191361
loss: 1.1272242069244385,grad_norm: 0.9999997414641011, iteration: 191362
loss: 0.9901289939880371,grad_norm: 0.9999990110857215, iteration: 191363
loss: 1.0085113048553467,grad_norm: 0.8317019079768585, iteration: 191364
loss: 1.0177421569824219,grad_norm: 0.9999992522003736, iteration: 191365
loss: 1.022514820098877,grad_norm: 0.7870078650588552, iteration: 191366
loss: 0.9547001123428345,grad_norm: 0.9253843095030453, iteration: 191367
loss: 1.0039483308792114,grad_norm: 0.9747520511468358, iteration: 191368
loss: 1.0026130676269531,grad_norm: 0.8521355415279479, iteration: 191369
loss: 1.0000025033950806,grad_norm: 0.9366332267782402, iteration: 191370
loss: 1.0224131345748901,grad_norm: 0.9999991077079323, iteration: 191371
loss: 1.018515706062317,grad_norm: 0.999999009692919, iteration: 191372
loss: 0.9994151592254639,grad_norm: 0.8052185506619135, iteration: 191373
loss: 0.9987583756446838,grad_norm: 0.9778423108733864, iteration: 191374
loss: 1.0266724824905396,grad_norm: 0.9999991022793349, iteration: 191375
loss: 0.9716097116470337,grad_norm: 0.9999996105582706, iteration: 191376
loss: 1.0143426656723022,grad_norm: 0.9999990343675039, iteration: 191377
loss: 1.0120645761489868,grad_norm: 0.9807234712383973, iteration: 191378
loss: 1.028354287147522,grad_norm: 0.9477249012057153, iteration: 191379
loss: 0.9843176603317261,grad_norm: 0.9999998398180261, iteration: 191380
loss: 0.9718148112297058,grad_norm: 0.933196479462768, iteration: 191381
loss: 1.0338746309280396,grad_norm: 0.894112217107967, iteration: 191382
loss: 1.0171653032302856,grad_norm: 0.7962935670408158, iteration: 191383
loss: 1.0137497186660767,grad_norm: 0.9999992599667108, iteration: 191384
loss: 1.0278801918029785,grad_norm: 0.9999991628067088, iteration: 191385
loss: 0.9578202962875366,grad_norm: 0.9999990102411105, iteration: 191386
loss: 1.0292376279830933,grad_norm: 0.806168205643556, iteration: 191387
loss: 1.009057879447937,grad_norm: 0.9999992785952854, iteration: 191388
loss: 1.013301134109497,grad_norm: 0.9999991481694404, iteration: 191389
loss: 1.022421956062317,grad_norm: 0.9108245841447279, iteration: 191390
loss: 1.005588412284851,grad_norm: 0.944530247071366, iteration: 191391
loss: 1.068306565284729,grad_norm: 0.9999990319680965, iteration: 191392
loss: 1.0484939813613892,grad_norm: 0.9999992641076131, iteration: 191393
loss: 1.011281967163086,grad_norm: 0.9905480958506103, iteration: 191394
loss: 0.9785107374191284,grad_norm: 0.9766122603361983, iteration: 191395
loss: 0.9971078038215637,grad_norm: 0.9999991041541748, iteration: 191396
loss: 1.0181487798690796,grad_norm: 0.909099034890986, iteration: 191397
loss: 1.0019068717956543,grad_norm: 0.9999991291797247, iteration: 191398
loss: 1.0037696361541748,grad_norm: 0.9627154792368648, iteration: 191399
loss: 1.0119733810424805,grad_norm: 0.9999990913673806, iteration: 191400
loss: 0.9976909756660461,grad_norm: 0.9999991517595909, iteration: 191401
loss: 1.0046144723892212,grad_norm: 0.9999990842147257, iteration: 191402
loss: 1.0150597095489502,grad_norm: 0.9999990637141117, iteration: 191403
loss: 1.025823950767517,grad_norm: 0.9999994701929019, iteration: 191404
loss: 1.0155534744262695,grad_norm: 0.9999998175371395, iteration: 191405
loss: 1.020128846168518,grad_norm: 0.8512349508553683, iteration: 191406
loss: 1.0089914798736572,grad_norm: 0.9670777633142996, iteration: 191407
loss: 0.9685980677604675,grad_norm: 0.8703374137187323, iteration: 191408
loss: 1.0066314935684204,grad_norm: 0.8706160752508036, iteration: 191409
loss: 0.9758722186088562,grad_norm: 0.9659298719847189, iteration: 191410
loss: 1.0264744758605957,grad_norm: 0.9098339536290609, iteration: 191411
loss: 0.9789865016937256,grad_norm: 0.8563844554920166, iteration: 191412
loss: 1.0122891664505005,grad_norm: 0.9999990905822997, iteration: 191413
loss: 1.011254906654358,grad_norm: 0.9341887701927755, iteration: 191414
loss: 0.9947828054428101,grad_norm: 0.779581944704235, iteration: 191415
loss: 0.9878836870193481,grad_norm: 0.9627069864908716, iteration: 191416
loss: 1.0197464227676392,grad_norm: 0.8872486883685532, iteration: 191417
loss: 1.0115116834640503,grad_norm: 0.8564420233372708, iteration: 191418
loss: 1.0054693222045898,grad_norm: 0.9171993859909586, iteration: 191419
loss: 1.0172308683395386,grad_norm: 0.9999990539092678, iteration: 191420
loss: 1.0179331302642822,grad_norm: 0.8585744588575558, iteration: 191421
loss: 1.0154914855957031,grad_norm: 0.976126802741785, iteration: 191422
loss: 1.0161808729171753,grad_norm: 0.9999989538623096, iteration: 191423
loss: 0.9795275926589966,grad_norm: 0.905571795494069, iteration: 191424
loss: 0.9916996955871582,grad_norm: 0.891695303831909, iteration: 191425
loss: 1.008566975593567,grad_norm: 0.8878833212019387, iteration: 191426
loss: 0.9973823428153992,grad_norm: 0.7977125383734667, iteration: 191427
loss: 0.995739758014679,grad_norm: 0.9999990148137563, iteration: 191428
loss: 0.9842846393585205,grad_norm: 0.9371893028867183, iteration: 191429
loss: 1.0154528617858887,grad_norm: 0.8644374554277131, iteration: 191430
loss: 1.037757158279419,grad_norm: 0.9999993120695936, iteration: 191431
loss: 1.0369844436645508,grad_norm: 0.9816768670404312, iteration: 191432
loss: 1.0003713369369507,grad_norm: 0.9999989821842757, iteration: 191433
loss: 1.0186829566955566,grad_norm: 0.9487670090862472, iteration: 191434
loss: 0.9936358332633972,grad_norm: 0.9921218542256479, iteration: 191435
loss: 1.0215398073196411,grad_norm: 0.7333981990125605, iteration: 191436
loss: 0.9879997372627258,grad_norm: 0.8409820776862684, iteration: 191437
loss: 0.9743502736091614,grad_norm: 0.9999989600778973, iteration: 191438
loss: 0.9796442985534668,grad_norm: 0.9999990095395216, iteration: 191439
loss: 0.9892096519470215,grad_norm: 0.8831469819598885, iteration: 191440
loss: 0.9795047044754028,grad_norm: 0.9419673147979989, iteration: 191441
loss: 1.0100785493850708,grad_norm: 0.9576491052324753, iteration: 191442
loss: 1.1802324056625366,grad_norm: 0.99999925427472, iteration: 191443
loss: 1.0265573263168335,grad_norm: 0.9951033129891362, iteration: 191444
loss: 0.9866425395011902,grad_norm: 0.9208772956310024, iteration: 191445
loss: 0.9901560544967651,grad_norm: 0.999999126576851, iteration: 191446
loss: 0.9837837815284729,grad_norm: 0.9999990467384537, iteration: 191447
loss: 1.0428751707077026,grad_norm: 0.9851569008347566, iteration: 191448
loss: 0.9541060328483582,grad_norm: 0.9549654615862267, iteration: 191449
loss: 1.0226465463638306,grad_norm: 0.9999990449501976, iteration: 191450
loss: 0.9899890422821045,grad_norm: 0.9999991183876069, iteration: 191451
loss: 1.009710431098938,grad_norm: 0.9740391039128811, iteration: 191452
loss: 0.9914563894271851,grad_norm: 0.8694076008813999, iteration: 191453
loss: 1.0221569538116455,grad_norm: 0.9999990721237664, iteration: 191454
loss: 1.0017590522766113,grad_norm: 0.9999992672503435, iteration: 191455
loss: 1.0017486810684204,grad_norm: 0.8496466254082268, iteration: 191456
loss: 1.0065479278564453,grad_norm: 0.9999989784850928, iteration: 191457
loss: 0.9782112240791321,grad_norm: 0.9999991449218427, iteration: 191458
loss: 1.0393762588500977,grad_norm: 0.9999992002704094, iteration: 191459
loss: 1.0959999561309814,grad_norm: 0.943772932146951, iteration: 191460
loss: 1.0320549011230469,grad_norm: 0.9764535768931158, iteration: 191461
loss: 1.0071662664413452,grad_norm: 0.8401684439440694, iteration: 191462
loss: 0.9750474691390991,grad_norm: 0.8469203100617201, iteration: 191463
loss: 0.9861027002334595,grad_norm: 0.9999992128525844, iteration: 191464
loss: 1.0401244163513184,grad_norm: 0.863854056541643, iteration: 191465
loss: 0.9430387616157532,grad_norm: 0.9999990774717515, iteration: 191466
loss: 1.053257942199707,grad_norm: 0.9716712853742963, iteration: 191467
loss: 1.0374599695205688,grad_norm: 0.8069311685919399, iteration: 191468
loss: 0.9588186740875244,grad_norm: 0.9999990589833119, iteration: 191469
loss: 0.9727739691734314,grad_norm: 0.8915354307095092, iteration: 191470
loss: 1.1428437232971191,grad_norm: 0.999999583210458, iteration: 191471
loss: 0.9726114273071289,grad_norm: 0.8469269858543108, iteration: 191472
loss: 1.0150508880615234,grad_norm: 0.865198037882314, iteration: 191473
loss: 0.9914482235908508,grad_norm: 0.8843462576623115, iteration: 191474
loss: 0.9884048700332642,grad_norm: 0.9999992321682818, iteration: 191475
loss: 1.012725591659546,grad_norm: 0.9316555888908237, iteration: 191476
loss: 0.9790929555892944,grad_norm: 0.9999991669383281, iteration: 191477
loss: 1.0143101215362549,grad_norm: 0.8856725776061379, iteration: 191478
loss: 1.0076093673706055,grad_norm: 0.9999991768487986, iteration: 191479
loss: 0.9680977463722229,grad_norm: 0.9863282819282776, iteration: 191480
loss: 1.044840931892395,grad_norm: 0.9999991600759941, iteration: 191481
loss: 0.9864374399185181,grad_norm: 0.9812775687378279, iteration: 191482
loss: 0.9924983382225037,grad_norm: 0.9601414989211112, iteration: 191483
loss: 0.9640169143676758,grad_norm: 0.857726021646902, iteration: 191484
loss: 1.0606412887573242,grad_norm: 0.999999871786145, iteration: 191485
loss: 1.0338616371154785,grad_norm: 0.9382607427566309, iteration: 191486
loss: 1.0035237073898315,grad_norm: 0.9571444397788833, iteration: 191487
loss: 0.9728982448577881,grad_norm: 0.8887874846521677, iteration: 191488
loss: 0.9794241189956665,grad_norm: 0.9999990418984203, iteration: 191489
loss: 1.002306342124939,grad_norm: 0.9999990827776539, iteration: 191490
loss: 1.068042278289795,grad_norm: 0.9999991795997577, iteration: 191491
loss: 0.9823811650276184,grad_norm: 0.9354414628733907, iteration: 191492
loss: 0.9860568046569824,grad_norm: 0.9606206959982326, iteration: 191493
loss: 0.9956977963447571,grad_norm: 0.8228597357132511, iteration: 191494
loss: 1.018135905265808,grad_norm: 0.9993436007166999, iteration: 191495
loss: 1.0000790357589722,grad_norm: 0.8442625862122466, iteration: 191496
loss: 0.9979082942008972,grad_norm: 0.833460855073652, iteration: 191497
loss: 0.9943164587020874,grad_norm: 0.9584691434892525, iteration: 191498
loss: 0.970706582069397,grad_norm: 0.941758893897015, iteration: 191499
loss: 0.958218514919281,grad_norm: 0.9999990613649599, iteration: 191500
loss: 0.9700713157653809,grad_norm: 0.9694740133709029, iteration: 191501
loss: 1.0448898077011108,grad_norm: 0.9999998495852617, iteration: 191502
loss: 1.0116177797317505,grad_norm: 0.9841768002028424, iteration: 191503
loss: 1.0065574645996094,grad_norm: 0.9999991249257613, iteration: 191504
loss: 1.037964940071106,grad_norm: 0.9999990559927361, iteration: 191505
loss: 0.9927382469177246,grad_norm: 0.9999989247642129, iteration: 191506
loss: 1.0013036727905273,grad_norm: 0.873959711326163, iteration: 191507
loss: 0.9928131699562073,grad_norm: 0.9394932459418727, iteration: 191508
loss: 1.0212767124176025,grad_norm: 0.999999037659341, iteration: 191509
loss: 1.0286537408828735,grad_norm: 0.9999990875067324, iteration: 191510
loss: 0.9770306348800659,grad_norm: 0.9999992566238675, iteration: 191511
loss: 1.0342330932617188,grad_norm: 0.8948619064749133, iteration: 191512
loss: 0.987065851688385,grad_norm: 0.9999998091480432, iteration: 191513
loss: 1.1135122776031494,grad_norm: 0.9999992854477995, iteration: 191514
loss: 1.0002760887145996,grad_norm: 0.8934743843321838, iteration: 191515
loss: 0.9687570333480835,grad_norm: 0.7972177198051923, iteration: 191516
loss: 1.1893033981323242,grad_norm: 0.9999991407476486, iteration: 191517
loss: 0.9908213019371033,grad_norm: 0.9340168566286747, iteration: 191518
loss: 1.0339387655258179,grad_norm: 0.8481478497477211, iteration: 191519
loss: 1.0114326477050781,grad_norm: 0.9999991757397776, iteration: 191520
loss: 1.0498775243759155,grad_norm: 0.9039895931555759, iteration: 191521
loss: 0.9670835733413696,grad_norm: 0.8980361614293612, iteration: 191522
loss: 1.0285227298736572,grad_norm: 0.9510306045671113, iteration: 191523
loss: 1.0222768783569336,grad_norm: 0.9999989181128182, iteration: 191524
loss: 1.1858059167861938,grad_norm: 0.9999998646023157, iteration: 191525
loss: 0.9957651495933533,grad_norm: 0.9308120622488871, iteration: 191526
loss: 1.027112364768982,grad_norm: 0.9240234349078493, iteration: 191527
loss: 0.9741865396499634,grad_norm: 0.9981259173096259, iteration: 191528
loss: 1.0574597120285034,grad_norm: 0.9161514430723211, iteration: 191529
loss: 1.0254063606262207,grad_norm: 0.9211453665908156, iteration: 191530
loss: 0.9822021126747131,grad_norm: 0.9999990976114733, iteration: 191531
loss: 0.9982317090034485,grad_norm: 0.9999992426206766, iteration: 191532
loss: 1.0295445919036865,grad_norm: 0.9426735492976379, iteration: 191533
loss: 1.0269242525100708,grad_norm: 0.9999989103729021, iteration: 191534
loss: 1.0399644374847412,grad_norm: 0.9999994201015818, iteration: 191535
loss: 0.9757877588272095,grad_norm: 0.9999989967516849, iteration: 191536
loss: 1.0008127689361572,grad_norm: 0.9999989344431903, iteration: 191537
loss: 1.0140092372894287,grad_norm: 0.9934887262338188, iteration: 191538
loss: 1.0219258069992065,grad_norm: 0.9999991149060757, iteration: 191539
loss: 0.9919354915618896,grad_norm: 0.9567950379034013, iteration: 191540
loss: 0.980741560459137,grad_norm: 0.9163863251578601, iteration: 191541
loss: 0.9798175692558289,grad_norm: 0.9539878266615041, iteration: 191542
loss: 1.0037336349487305,grad_norm: 0.9999992092200443, iteration: 191543
loss: 1.0008090734481812,grad_norm: 0.9999998571127828, iteration: 191544
loss: 1.0146543979644775,grad_norm: 0.9959992441469062, iteration: 191545
loss: 0.9823651313781738,grad_norm: 0.9999991168414162, iteration: 191546
loss: 1.0302950143814087,grad_norm: 0.9948509621032737, iteration: 191547
loss: 1.0064984560012817,grad_norm: 0.9730432754446589, iteration: 191548
loss: 0.9929555654525757,grad_norm: 0.9999991520701319, iteration: 191549
loss: 0.9825289249420166,grad_norm: 0.999999039524334, iteration: 191550
loss: 1.0740593671798706,grad_norm: 0.9684059866128677, iteration: 191551
loss: 0.9732139110565186,grad_norm: 0.9999997730800755, iteration: 191552
loss: 1.0007158517837524,grad_norm: 0.9999991776638574, iteration: 191553
loss: 1.0005837678909302,grad_norm: 0.9999991144051837, iteration: 191554
loss: 0.9888847470283508,grad_norm: 0.8873305249079921, iteration: 191555
loss: 0.975042998790741,grad_norm: 0.8047398228581961, iteration: 191556
loss: 1.0716196298599243,grad_norm: 0.9213899099498715, iteration: 191557
loss: 1.0102031230926514,grad_norm: 0.9774541403361803, iteration: 191558
loss: 0.9619223475456238,grad_norm: 0.9999993321117906, iteration: 191559
loss: 1.0082874298095703,grad_norm: 0.8632050285554899, iteration: 191560
loss: 0.9812331199645996,grad_norm: 0.8724135926382748, iteration: 191561
loss: 1.0058300495147705,grad_norm: 0.9238420771806288, iteration: 191562
loss: 1.0095216035842896,grad_norm: 0.9999992134379277, iteration: 191563
loss: 0.9965102076530457,grad_norm: 0.9702095415796047, iteration: 191564
loss: 1.0185749530792236,grad_norm: 0.9933675415663673, iteration: 191565
loss: 1.00382661819458,grad_norm: 0.9999991244619539, iteration: 191566
loss: 1.0065937042236328,grad_norm: 0.8226722889926271, iteration: 191567
loss: 0.9877683520317078,grad_norm: 0.9781399585471355, iteration: 191568
loss: 0.9873744249343872,grad_norm: 0.8293593848650086, iteration: 191569
loss: 0.9509524703025818,grad_norm: 0.9999991599951207, iteration: 191570
loss: 1.0110551118850708,grad_norm: 0.7918831065209114, iteration: 191571
loss: 0.9896364808082581,grad_norm: 0.9999991057392132, iteration: 191572
loss: 1.0192409753799438,grad_norm: 0.8772759162334048, iteration: 191573
loss: 1.0185028314590454,grad_norm: 0.999999182193344, iteration: 191574
loss: 0.9896525144577026,grad_norm: 0.9999992650166367, iteration: 191575
loss: 1.021078109741211,grad_norm: 0.9999991213838791, iteration: 191576
loss: 1.0237951278686523,grad_norm: 0.9810835633109227, iteration: 191577
loss: 1.1211891174316406,grad_norm: 0.9999998487149, iteration: 191578
loss: 0.9811158776283264,grad_norm: 0.9999991066795616, iteration: 191579
loss: 0.9833344221115112,grad_norm: 0.9999990599507435, iteration: 191580
loss: 1.0395166873931885,grad_norm: 0.8969621384174684, iteration: 191581
loss: 1.0177459716796875,grad_norm: 0.8095772926437745, iteration: 191582
loss: 0.9965362548828125,grad_norm: 0.9093715613260478, iteration: 191583
loss: 0.9650053977966309,grad_norm: 0.9302783133490974, iteration: 191584
loss: 1.0171289443969727,grad_norm: 0.9074346912594342, iteration: 191585
loss: 1.022755742073059,grad_norm: 0.9999992250941357, iteration: 191586
loss: 1.0023748874664307,grad_norm: 0.9999990860742698, iteration: 191587
loss: 0.9797959327697754,grad_norm: 0.9999990391387816, iteration: 191588
loss: 0.9612404108047485,grad_norm: 0.8297598465597048, iteration: 191589
loss: 0.9661418199539185,grad_norm: 0.9031749324606908, iteration: 191590
loss: 0.9910039901733398,grad_norm: 0.9999991627191365, iteration: 191591
loss: 1.0077930688858032,grad_norm: 0.9999990295410213, iteration: 191592
loss: 1.023024082183838,grad_norm: 0.9999996565113349, iteration: 191593
loss: 0.9821377396583557,grad_norm: 0.8690930416878635, iteration: 191594
loss: 1.0299559831619263,grad_norm: 0.9338255422837006, iteration: 191595
loss: 1.0076836347579956,grad_norm: 0.9952395198159176, iteration: 191596
loss: 1.0213778018951416,grad_norm: 0.9999993319121949, iteration: 191597
loss: 1.134956955909729,grad_norm: 0.9999992596065197, iteration: 191598
loss: 0.9759557247161865,grad_norm: 0.8867573057287483, iteration: 191599
loss: 1.0117098093032837,grad_norm: 0.8680340376455229, iteration: 191600
loss: 1.0350779294967651,grad_norm: 0.9999997945587425, iteration: 191601
loss: 1.01022469997406,grad_norm: 0.8963736774695505, iteration: 191602
loss: 0.9838970899581909,grad_norm: 0.9530023479667913, iteration: 191603
loss: 1.0102583169937134,grad_norm: 0.9999991210506414, iteration: 191604
loss: 0.9781897664070129,grad_norm: 0.9999991107802422, iteration: 191605
loss: 1.0028876066207886,grad_norm: 0.9999990909686222, iteration: 191606
loss: 0.9664644598960876,grad_norm: 0.938490898639687, iteration: 191607
loss: 1.0042848587036133,grad_norm: 0.9254321452758968, iteration: 191608
loss: 0.9954133033752441,grad_norm: 0.7898231201662123, iteration: 191609
loss: 1.0169198513031006,grad_norm: 0.999999254619241, iteration: 191610
loss: 1.0299478769302368,grad_norm: 0.9136652880082163, iteration: 191611
loss: 1.1360827684402466,grad_norm: 0.9999990428397753, iteration: 191612
loss: 0.9729059338569641,grad_norm: 0.9618282283880644, iteration: 191613
loss: 1.01021409034729,grad_norm: 0.9999990288096354, iteration: 191614
loss: 1.1729415655136108,grad_norm: 0.9999997012891939, iteration: 191615
loss: 1.0207575559616089,grad_norm: 0.9999990926799255, iteration: 191616
loss: 1.0132724046707153,grad_norm: 0.9999994588056778, iteration: 191617
loss: 0.9990480542182922,grad_norm: 0.9999992223218178, iteration: 191618
loss: 0.9796954393386841,grad_norm: 0.9999991704060244, iteration: 191619
loss: 0.9944638013839722,grad_norm: 0.9999990904201143, iteration: 191620
loss: 1.033782958984375,grad_norm: 0.9359759520006345, iteration: 191621
loss: 0.9962601661682129,grad_norm: 0.999999100133746, iteration: 191622
loss: 1.068740725517273,grad_norm: 0.9999995338838554, iteration: 191623
loss: 1.1588448286056519,grad_norm: 0.999999862696239, iteration: 191624
loss: 1.0213631391525269,grad_norm: 0.9999990601562474, iteration: 191625
loss: 1.0884045362472534,grad_norm: 0.9999997715550508, iteration: 191626
loss: 1.0159306526184082,grad_norm: 0.9999994586860532, iteration: 191627
loss: 1.0327943563461304,grad_norm: 0.9999995198773726, iteration: 191628
loss: 1.026342511177063,grad_norm: 0.9999997027476462, iteration: 191629
loss: 0.9840279221534729,grad_norm: 0.9999992248047254, iteration: 191630
loss: 0.9883131384849548,grad_norm: 0.973233562435718, iteration: 191631
loss: 1.010745644569397,grad_norm: 0.9506252340273008, iteration: 191632
loss: 0.9583367705345154,grad_norm: 0.960917461574884, iteration: 191633
loss: 1.048624873161316,grad_norm: 0.9331151514045919, iteration: 191634
loss: 0.9974057078361511,grad_norm: 0.9747211462814918, iteration: 191635
loss: 0.9946138262748718,grad_norm: 0.8845597471877148, iteration: 191636
loss: 1.019524335861206,grad_norm: 0.9674381358719695, iteration: 191637
loss: 0.9996968507766724,grad_norm: 0.9454006961640408, iteration: 191638
loss: 1.0078926086425781,grad_norm: 0.9999992952326727, iteration: 191639
loss: 0.9928523302078247,grad_norm: 0.9667418214020985, iteration: 191640
loss: 0.9947113394737244,grad_norm: 0.9999991703025315, iteration: 191641
loss: 0.9828621745109558,grad_norm: 0.9999991700975182, iteration: 191642
loss: 1.0229395627975464,grad_norm: 0.8500854538745203, iteration: 191643
loss: 1.0041344165802002,grad_norm: 0.9999990780388855, iteration: 191644
loss: 0.9747138619422913,grad_norm: 0.918485628630913, iteration: 191645
loss: 0.9599773287773132,grad_norm: 0.9728989659319479, iteration: 191646
loss: 1.0294849872589111,grad_norm: 0.9999996652861753, iteration: 191647
loss: 0.9897409677505493,grad_norm: 0.9999992994474645, iteration: 191648
loss: 1.014486312866211,grad_norm: 0.9999990404718752, iteration: 191649
loss: 1.0201977491378784,grad_norm: 0.9427802750604646, iteration: 191650
loss: 1.0153865814208984,grad_norm: 0.8344044579210039, iteration: 191651
loss: 1.0093165636062622,grad_norm: 0.9999991835272808, iteration: 191652
loss: 1.0122783184051514,grad_norm: 0.9999999503542822, iteration: 191653
loss: 1.0178978443145752,grad_norm: 0.9999997942641593, iteration: 191654
loss: 0.9843965172767639,grad_norm: 0.9991651710747265, iteration: 191655
loss: 1.0657044649124146,grad_norm: 0.999999056785842, iteration: 191656
loss: 1.06910240650177,grad_norm: 0.9583239542566451, iteration: 191657
loss: 0.9845678806304932,grad_norm: 0.9999990880602689, iteration: 191658
loss: 1.030726432800293,grad_norm: 0.8674907047781425, iteration: 191659
loss: 1.0164835453033447,grad_norm: 0.9999994623464025, iteration: 191660
loss: 0.9989506006240845,grad_norm: 0.9999991148016297, iteration: 191661
loss: 1.0005348920822144,grad_norm: 0.9130599829744708, iteration: 191662
loss: 1.000852108001709,grad_norm: 0.8904918433663499, iteration: 191663
loss: 1.0189887285232544,grad_norm: 0.9999991521087432, iteration: 191664
loss: 1.0294936895370483,grad_norm: 0.8109785841251277, iteration: 191665
loss: 1.0176870822906494,grad_norm: 0.9999988971649322, iteration: 191666
loss: 1.0337237119674683,grad_norm: 0.8619080394551364, iteration: 191667
loss: 1.0702366828918457,grad_norm: 0.9999996153042796, iteration: 191668
loss: 1.025477409362793,grad_norm: 0.9999998483032512, iteration: 191669
loss: 1.0081889629364014,grad_norm: 0.999999179317748, iteration: 191670
loss: 0.9942441582679749,grad_norm: 0.9907022417014231, iteration: 191671
loss: 1.0322229862213135,grad_norm: 0.9999993036121, iteration: 191672
loss: 1.00032639503479,grad_norm: 0.860818847070916, iteration: 191673
loss: 1.0282319784164429,grad_norm: 0.9999993236428709, iteration: 191674
loss: 0.9797943830490112,grad_norm: 0.7683768794251262, iteration: 191675
loss: 0.9854297041893005,grad_norm: 0.922104781159391, iteration: 191676
loss: 0.9893397688865662,grad_norm: 0.9561279626047353, iteration: 191677
loss: 0.9880895018577576,grad_norm: 0.9999990556247866, iteration: 191678
loss: 0.9908129572868347,grad_norm: 0.9999995800645701, iteration: 191679
loss: 1.0410171747207642,grad_norm: 0.999999986100524, iteration: 191680
loss: 1.0298881530761719,grad_norm: 0.8130818934516932, iteration: 191681
loss: 1.1792395114898682,grad_norm: 0.9999996216684948, iteration: 191682
loss: 0.9820366501808167,grad_norm: 0.9999992345569187, iteration: 191683
loss: 0.9982185959815979,grad_norm: 0.8860707093789949, iteration: 191684
loss: 0.9814554452896118,grad_norm: 0.999999070202114, iteration: 191685
loss: 1.0234602689743042,grad_norm: 0.9999997849549822, iteration: 191686
loss: 0.9919416308403015,grad_norm: 0.9999989761952386, iteration: 191687
loss: 1.0459985733032227,grad_norm: 0.9999990797181607, iteration: 191688
loss: 1.0898863077163696,grad_norm: 0.9999997324458886, iteration: 191689
loss: 0.9710659384727478,grad_norm: 0.7959300984715538, iteration: 191690
loss: 0.9944955706596375,grad_norm: 0.8743728828028943, iteration: 191691
loss: 0.9680827856063843,grad_norm: 0.9818325823581943, iteration: 191692
loss: 0.9378498196601868,grad_norm: 0.8053872968752368, iteration: 191693
loss: 0.9580678343772888,grad_norm: 0.9591594698723604, iteration: 191694
loss: 1.031852126121521,grad_norm: 0.9999991094282316, iteration: 191695
loss: 1.021925687789917,grad_norm: 0.9766944295109192, iteration: 191696
loss: 0.986859917640686,grad_norm: 0.9426512905502842, iteration: 191697
loss: 1.0213100910186768,grad_norm: 0.9999993593293647, iteration: 191698
loss: 0.9844902157783508,grad_norm: 0.9296209947386577, iteration: 191699
loss: 1.1471437215805054,grad_norm: 0.9982094153074158, iteration: 191700
loss: 1.324233889579773,grad_norm: 0.9999995641424155, iteration: 191701
loss: 1.100722074508667,grad_norm: 0.9999991041815989, iteration: 191702
loss: 1.010898470878601,grad_norm: 0.9999990451147333, iteration: 191703
loss: 1.1511460542678833,grad_norm: 0.9999993439293205, iteration: 191704
loss: 1.121904730796814,grad_norm: 0.9999993123924206, iteration: 191705
loss: 1.1287187337875366,grad_norm: 0.9999992986415877, iteration: 191706
loss: 1.1199191808700562,grad_norm: 0.9999993130300042, iteration: 191707
loss: 0.9780445098876953,grad_norm: 0.9498933304395781, iteration: 191708
loss: 1.1334997415542603,grad_norm: 0.9999990876355277, iteration: 191709
loss: 1.2563554048538208,grad_norm: 0.9999994718915172, iteration: 191710
loss: 0.9842201471328735,grad_norm: 0.9291306884529802, iteration: 191711
loss: 1.0273244380950928,grad_norm: 0.9999998638524807, iteration: 191712
loss: 1.1513185501098633,grad_norm: 0.9999995337149211, iteration: 191713
loss: 1.058151364326477,grad_norm: 0.9999994557853473, iteration: 191714
loss: 1.0292681455612183,grad_norm: 0.9999994782020903, iteration: 191715
loss: 1.1126147508621216,grad_norm: 0.9999997675996223, iteration: 191716
loss: 1.055361032485962,grad_norm: 0.9999991840376221, iteration: 191717
loss: 1.083125352859497,grad_norm: 1.0000000295619054, iteration: 191718
loss: 1.0066165924072266,grad_norm: 0.9999991787169803, iteration: 191719
loss: 1.0633496046066284,grad_norm: 0.9999991385110693, iteration: 191720
loss: 1.0066622495651245,grad_norm: 0.8830900207033008, iteration: 191721
loss: 1.0016323328018188,grad_norm: 0.9999991782086801, iteration: 191722
loss: 0.9873390793800354,grad_norm: 0.8236741176161763, iteration: 191723
loss: 1.043131709098816,grad_norm: 0.9999992815874185, iteration: 191724
loss: 1.0099308490753174,grad_norm: 0.8048486251420321, iteration: 191725
loss: 1.13531494140625,grad_norm: 0.9999996016112582, iteration: 191726
loss: 0.9815416932106018,grad_norm: 0.9769217773210152, iteration: 191727
loss: 0.9854418039321899,grad_norm: 0.9999990936220554, iteration: 191728
loss: 1.0647085905075073,grad_norm: 0.9999999330508319, iteration: 191729
loss: 1.0145167112350464,grad_norm: 0.999999247562118, iteration: 191730
loss: 0.984131395816803,grad_norm: 0.9764359898697161, iteration: 191731
loss: 1.0005122423171997,grad_norm: 0.9986102016427439, iteration: 191732
loss: 1.0923715829849243,grad_norm: 0.999999888341006, iteration: 191733
loss: 1.0069522857666016,grad_norm: 0.7885619310128541, iteration: 191734
loss: 1.004077434539795,grad_norm: 0.9999992199840765, iteration: 191735
loss: 0.9859197735786438,grad_norm: 0.7860756539446604, iteration: 191736
loss: 1.0129553079605103,grad_norm: 0.7956677774891701, iteration: 191737
loss: 1.021817922592163,grad_norm: 0.989332805382411, iteration: 191738
loss: 0.9878373146057129,grad_norm: 0.8756079514906128, iteration: 191739
loss: 0.9895979762077332,grad_norm: 0.9999993454539539, iteration: 191740
loss: 1.0247385501861572,grad_norm: 0.9999994913227701, iteration: 191741
loss: 1.0236080884933472,grad_norm: 0.9999994836616496, iteration: 191742
loss: 0.9779285192489624,grad_norm: 0.8620844546491404, iteration: 191743
loss: 0.9305284023284912,grad_norm: 0.862474054762685, iteration: 191744
loss: 1.0084749460220337,grad_norm: 0.7657336740923769, iteration: 191745
loss: 0.968070924282074,grad_norm: 0.841115451656968, iteration: 191746
loss: 1.0196270942687988,grad_norm: 0.9074096297670279, iteration: 191747
loss: 1.0717966556549072,grad_norm: 0.999999895602756, iteration: 191748
loss: 1.0937623977661133,grad_norm: 0.9999998515841682, iteration: 191749
loss: 1.0255550146102905,grad_norm: 0.9999993072830861, iteration: 191750
loss: 0.9729977250099182,grad_norm: 0.999999188793051, iteration: 191751
loss: 0.9729756116867065,grad_norm: 0.9999991363019589, iteration: 191752
loss: 1.0263416767120361,grad_norm: 0.9999992182452985, iteration: 191753
loss: 1.008337378501892,grad_norm: 0.9277527135836815, iteration: 191754
loss: 1.0153987407684326,grad_norm: 0.7812277977103993, iteration: 191755
loss: 0.9944010376930237,grad_norm: 0.8768367671796695, iteration: 191756
loss: 1.0260268449783325,grad_norm: 0.9670461543884016, iteration: 191757
loss: 0.9752030372619629,grad_norm: 0.907809962319187, iteration: 191758
loss: 0.9876930117607117,grad_norm: 0.99999899536845, iteration: 191759
loss: 0.9902855753898621,grad_norm: 0.9199573561169504, iteration: 191760
loss: 1.0182709693908691,grad_norm: 0.8957151914433528, iteration: 191761
loss: 0.952812910079956,grad_norm: 0.8323949849266502, iteration: 191762
loss: 1.0171757936477661,grad_norm: 0.9999990508671933, iteration: 191763
loss: 1.0132949352264404,grad_norm: 0.9999989253820629, iteration: 191764
loss: 1.0170071125030518,grad_norm: 0.8361423165453156, iteration: 191765
loss: 1.029862642288208,grad_norm: 0.9717392240556965, iteration: 191766
loss: 0.9767822623252869,grad_norm: 0.9999990985212827, iteration: 191767
loss: 0.9686629176139832,grad_norm: 0.8362049688597146, iteration: 191768
loss: 1.112151026725769,grad_norm: 0.9999998821744861, iteration: 191769
loss: 1.2951687574386597,grad_norm: 0.9999998528318979, iteration: 191770
loss: 0.9928869605064392,grad_norm: 0.9999990658726956, iteration: 191771
loss: 1.0104116201400757,grad_norm: 0.9925548831965068, iteration: 191772
loss: 0.9656643271446228,grad_norm: 0.9999991245451781, iteration: 191773
loss: 0.957974910736084,grad_norm: 0.9999991802422126, iteration: 191774
loss: 1.1503374576568604,grad_norm: 0.9929817901257575, iteration: 191775
loss: 1.0854582786560059,grad_norm: 0.9999993627958762, iteration: 191776
loss: 1.00993013381958,grad_norm: 0.9194504483573044, iteration: 191777
loss: 1.017332673072815,grad_norm: 0.9999993195029748, iteration: 191778
loss: 0.9737629294395447,grad_norm: 0.8770057440076587, iteration: 191779
loss: 0.9968591928482056,grad_norm: 0.7781313670530599, iteration: 191780
loss: 0.9855215549468994,grad_norm: 0.9999991216798249, iteration: 191781
loss: 0.9754867553710938,grad_norm: 0.8656962992051267, iteration: 191782
loss: 0.9870827794075012,grad_norm: 0.9300242976863673, iteration: 191783
loss: 1.0436683893203735,grad_norm: 0.999999528929871, iteration: 191784
loss: 1.0795931816101074,grad_norm: 0.999999384367744, iteration: 191785
loss: 1.077156662940979,grad_norm: 1.000000039222205, iteration: 191786
loss: 1.0309072732925415,grad_norm: 0.9999995014160624, iteration: 191787
loss: 0.9946861267089844,grad_norm: 0.9144161019350766, iteration: 191788
loss: 1.0062018632888794,grad_norm: 0.9698078509785997, iteration: 191789
loss: 1.0516098737716675,grad_norm: 0.999999489210138, iteration: 191790
loss: 1.00133216381073,grad_norm: 0.8524871723582856, iteration: 191791
loss: 1.0275908708572388,grad_norm: 0.8773398793070606, iteration: 191792
loss: 0.9550779461860657,grad_norm: 0.9936142313928327, iteration: 191793
loss: 1.0030938386917114,grad_norm: 0.8375103508777277, iteration: 191794
loss: 0.9618287086486816,grad_norm: 0.8764291067984203, iteration: 191795
loss: 0.9874568581581116,grad_norm: 0.9999996746467291, iteration: 191796
loss: 1.0366814136505127,grad_norm: 0.9999990204202777, iteration: 191797
loss: 1.0399432182312012,grad_norm: 0.9999993827480647, iteration: 191798
loss: 0.9898788332939148,grad_norm: 0.828382903587959, iteration: 191799
loss: 0.9853907227516174,grad_norm: 0.8180624586482835, iteration: 191800
loss: 1.0089489221572876,grad_norm: 0.9850893310498295, iteration: 191801
loss: 0.9896444082260132,grad_norm: 0.9999989673121927, iteration: 191802
loss: 0.9747734665870667,grad_norm: 0.9999991053408608, iteration: 191803
loss: 1.0383663177490234,grad_norm: 0.8758878843718206, iteration: 191804
loss: 0.9903797507286072,grad_norm: 0.9018941223976208, iteration: 191805
loss: 1.0028444528579712,grad_norm: 0.859135925371298, iteration: 191806
loss: 0.9882840514183044,grad_norm: 0.9999991646115519, iteration: 191807
loss: 0.99176424741745,grad_norm: 0.9343485368022932, iteration: 191808
loss: 0.9535764455795288,grad_norm: 0.8894601976654875, iteration: 191809
loss: 1.0033427476882935,grad_norm: 0.7636161203059408, iteration: 191810
loss: 1.040785789489746,grad_norm: 0.9999993232005971, iteration: 191811
loss: 1.015528678894043,grad_norm: 0.999999080542927, iteration: 191812
loss: 0.9859735369682312,grad_norm: 0.9999991014443467, iteration: 191813
loss: 0.9827520847320557,grad_norm: 0.8932385755542439, iteration: 191814
loss: 1.146501898765564,grad_norm: 0.9999993776800361, iteration: 191815
loss: 0.9963728785514832,grad_norm: 0.9999990979795662, iteration: 191816
loss: 1.0215649604797363,grad_norm: 0.8707850059966284, iteration: 191817
loss: 1.008152723312378,grad_norm: 0.9999992390218353, iteration: 191818
loss: 1.0325390100479126,grad_norm: 0.9999992213286699, iteration: 191819
loss: 0.9704458713531494,grad_norm: 0.9999996324917068, iteration: 191820
loss: 0.9889371991157532,grad_norm: 0.9999991844301058, iteration: 191821
loss: 1.0132594108581543,grad_norm: 0.7655568087742779, iteration: 191822
loss: 1.0060237646102905,grad_norm: 0.7467637982028856, iteration: 191823
loss: 0.9977777600288391,grad_norm: 0.9445212619156584, iteration: 191824
loss: 0.9939541220664978,grad_norm: 0.9494543549954156, iteration: 191825
loss: 1.0349559783935547,grad_norm: 0.9999990757128763, iteration: 191826
loss: 1.0310192108154297,grad_norm: 0.9999991891506569, iteration: 191827
loss: 0.9897148013114929,grad_norm: 0.9999993742348242, iteration: 191828
loss: 1.014858365058899,grad_norm: 0.9999990804145881, iteration: 191829
loss: 0.9891960024833679,grad_norm: 0.9885612371158841, iteration: 191830
loss: 0.9810444116592407,grad_norm: 0.9724072253290963, iteration: 191831
loss: 0.987266480922699,grad_norm: 0.9999993141361753, iteration: 191832
loss: 0.988086998462677,grad_norm: 0.8692086227351035, iteration: 191833
loss: 1.0224965810775757,grad_norm: 0.8730390744271749, iteration: 191834
loss: 1.0246226787567139,grad_norm: 0.999999057198124, iteration: 191835
loss: 1.0197426080703735,grad_norm: 0.9832445984178392, iteration: 191836
loss: 0.98757004737854,grad_norm: 0.9999992046169077, iteration: 191837
loss: 1.0163549184799194,grad_norm: 0.7962837001778432, iteration: 191838
loss: 1.006192684173584,grad_norm: 0.9999991570628715, iteration: 191839
loss: 1.0469211339950562,grad_norm: 0.9999995139861677, iteration: 191840
loss: 0.9952797889709473,grad_norm: 0.9490698767941724, iteration: 191841
loss: 1.000183343887329,grad_norm: 0.999999200882704, iteration: 191842
loss: 0.963171660900116,grad_norm: 0.999999062200553, iteration: 191843
loss: 1.0361392498016357,grad_norm: 0.9000116442265562, iteration: 191844
loss: 0.9843658804893494,grad_norm: 0.9065601692995899, iteration: 191845
loss: 0.9961204528808594,grad_norm: 0.7629971351967845, iteration: 191846
loss: 1.0128737688064575,grad_norm: 0.9999989683379674, iteration: 191847
loss: 0.9964261054992676,grad_norm: 0.8815064144744912, iteration: 191848
loss: 0.9917194843292236,grad_norm: 0.9730505628333008, iteration: 191849
loss: 1.0270391702651978,grad_norm: 0.8642768754817588, iteration: 191850
loss: 1.0330369472503662,grad_norm: 0.9999990277538047, iteration: 191851
loss: 1.017861247062683,grad_norm: 0.8613674989111598, iteration: 191852
loss: 1.0573559999465942,grad_norm: 0.9999998237209917, iteration: 191853
loss: 0.9572036862373352,grad_norm: 0.9999992006120992, iteration: 191854
loss: 0.992034912109375,grad_norm: 0.9999988812143138, iteration: 191855
loss: 0.9935162663459778,grad_norm: 0.96150943675172, iteration: 191856
loss: 1.0009392499923706,grad_norm: 0.9999992559177022, iteration: 191857
loss: 1.0228503942489624,grad_norm: 0.9999992443953155, iteration: 191858
loss: 0.9746195673942566,grad_norm: 0.9237109613002463, iteration: 191859
loss: 1.0114827156066895,grad_norm: 0.9999990486187379, iteration: 191860
loss: 0.9956693649291992,grad_norm: 0.978565271379688, iteration: 191861
loss: 1.0098527669906616,grad_norm: 0.8552609286555827, iteration: 191862
loss: 1.0099899768829346,grad_norm: 0.9898628694440863, iteration: 191863
loss: 1.0399056673049927,grad_norm: 0.9320291998235113, iteration: 191864
loss: 0.9975036978721619,grad_norm: 0.7666529623148512, iteration: 191865
loss: 0.9977964162826538,grad_norm: 0.9870324546293816, iteration: 191866
loss: 1.0084221363067627,grad_norm: 0.8123483656427403, iteration: 191867
loss: 1.057848572731018,grad_norm: 0.9999997849720814, iteration: 191868
loss: 1.0360534191131592,grad_norm: 0.8824844582857819, iteration: 191869
loss: 1.0298384428024292,grad_norm: 0.9546112402598776, iteration: 191870
loss: 1.0419938564300537,grad_norm: 0.9999990068377923, iteration: 191871
loss: 1.0222357511520386,grad_norm: 0.9999989999493119, iteration: 191872
loss: 1.026138186454773,grad_norm: 0.9912906581543202, iteration: 191873
loss: 0.9681127667427063,grad_norm: 0.9858215766568966, iteration: 191874
loss: 1.0075336694717407,grad_norm: 0.9918140512043508, iteration: 191875
loss: 0.9973188638687134,grad_norm: 0.9730316622762004, iteration: 191876
loss: 1.0192688703536987,grad_norm: 0.8054622243765662, iteration: 191877
loss: 1.061835527420044,grad_norm: 0.9999992389005674, iteration: 191878
loss: 0.998812735080719,grad_norm: 0.888549573112803, iteration: 191879
loss: 1.0225625038146973,grad_norm: 0.9999991421859257, iteration: 191880
loss: 1.191826581954956,grad_norm: 0.9999992443152602, iteration: 191881
loss: 0.9600019454956055,grad_norm: 0.9222767047890843, iteration: 191882
loss: 0.9901592135429382,grad_norm: 0.9120888397676274, iteration: 191883
loss: 1.045137882232666,grad_norm: 0.9999992553232211, iteration: 191884
loss: 1.0042394399642944,grad_norm: 0.999999008451281, iteration: 191885
loss: 1.0323151350021362,grad_norm: 0.9999996003574295, iteration: 191886
loss: 1.2267826795578003,grad_norm: 0.9999998839446279, iteration: 191887
loss: 0.9638599753379822,grad_norm: 0.999999296803325, iteration: 191888
loss: 1.0342743396759033,grad_norm: 0.9641627073807408, iteration: 191889
loss: 1.0200250148773193,grad_norm: 0.9999991051253535, iteration: 191890
loss: 0.9768865704536438,grad_norm: 0.9999994640721268, iteration: 191891
loss: 0.9955772161483765,grad_norm: 0.8632688802021393, iteration: 191892
loss: 1.1012248992919922,grad_norm: 0.9999996408510509, iteration: 191893
loss: 1.0162001848220825,grad_norm: 0.9999992404236577, iteration: 191894
loss: 0.9822713732719421,grad_norm: 0.9999994568202872, iteration: 191895
loss: 0.9591498374938965,grad_norm: 0.9703212462042259, iteration: 191896
loss: 0.9914016723632812,grad_norm: 0.9741418682285178, iteration: 191897
loss: 0.9820672273635864,grad_norm: 0.9999990010995239, iteration: 191898
loss: 0.9960176348686218,grad_norm: 0.8553844238648827, iteration: 191899
loss: 1.019476294517517,grad_norm: 0.8303416433094544, iteration: 191900
loss: 1.0041288137435913,grad_norm: 0.9394015763798841, iteration: 191901
loss: 0.9804016351699829,grad_norm: 0.9999988936030978, iteration: 191902
loss: 1.039718747138977,grad_norm: 0.9995713710078563, iteration: 191903
loss: 0.9760857224464417,grad_norm: 0.9999990891457864, iteration: 191904
loss: 1.0295687913894653,grad_norm: 0.978670891900686, iteration: 191905
loss: 1.0022794008255005,grad_norm: 0.8407637870682902, iteration: 191906
loss: 0.9886944890022278,grad_norm: 0.853178588958223, iteration: 191907
loss: 0.9912298321723938,grad_norm: 0.9995907440523801, iteration: 191908
loss: 1.019139051437378,grad_norm: 0.9999992310939873, iteration: 191909
loss: 0.9768342971801758,grad_norm: 0.999999047153591, iteration: 191910
loss: 1.0075654983520508,grad_norm: 0.977275155874669, iteration: 191911
loss: 0.9650571346282959,grad_norm: 0.932989404512608, iteration: 191912
loss: 1.022328495979309,grad_norm: 0.9686785843993065, iteration: 191913
loss: 1.0476858615875244,grad_norm: 0.9999992268603978, iteration: 191914
loss: 1.0289065837860107,grad_norm: 0.9999992984654823, iteration: 191915
loss: 0.9798429608345032,grad_norm: 0.8451131708533888, iteration: 191916
loss: 1.0127661228179932,grad_norm: 0.8979431467595532, iteration: 191917
loss: 1.0107078552246094,grad_norm: 0.9999992851291933, iteration: 191918
loss: 1.0066262483596802,grad_norm: 0.8339555786224744, iteration: 191919
loss: 1.031130075454712,grad_norm: 0.999999017996061, iteration: 191920
loss: 1.1299329996109009,grad_norm: 0.999999404572068, iteration: 191921
loss: 0.9957967400550842,grad_norm: 0.99999946577644, iteration: 191922
loss: 0.9527168273925781,grad_norm: 0.9312061625182345, iteration: 191923
loss: 1.0160176753997803,grad_norm: 0.8830644338586803, iteration: 191924
loss: 0.9927741289138794,grad_norm: 0.7746412285801405, iteration: 191925
loss: 0.9783228635787964,grad_norm: 0.9287089698317904, iteration: 191926
loss: 1.1510425806045532,grad_norm: 0.9999999221610629, iteration: 191927
loss: 0.9908708333969116,grad_norm: 0.8743272230713726, iteration: 191928
loss: 1.0020771026611328,grad_norm: 0.999999228357919, iteration: 191929
loss: 1.0146658420562744,grad_norm: 0.9811927093776168, iteration: 191930
loss: 1.0235741138458252,grad_norm: 0.999999687596302, iteration: 191931
loss: 1.032584547996521,grad_norm: 0.9224304972139917, iteration: 191932
loss: 1.0115853548049927,grad_norm: 0.9999991485286113, iteration: 191933
loss: 1.05439293384552,grad_norm: 0.9999993072678007, iteration: 191934
loss: 1.0346506834030151,grad_norm: 0.9999991834930406, iteration: 191935
loss: 0.9934327602386475,grad_norm: 0.9999992025551757, iteration: 191936
loss: 1.0875523090362549,grad_norm: 0.9999997494850761, iteration: 191937
loss: 1.0342371463775635,grad_norm: 0.9999990483458192, iteration: 191938
loss: 0.9804655909538269,grad_norm: 0.8784238613099381, iteration: 191939
loss: 0.9862731099128723,grad_norm: 0.9864222129105835, iteration: 191940
loss: 1.0318177938461304,grad_norm: 0.9568574962809295, iteration: 191941
loss: 1.0587800741195679,grad_norm: 0.9999998704066521, iteration: 191942
loss: 0.99662184715271,grad_norm: 0.9950814860518166, iteration: 191943
loss: 0.9638626575469971,grad_norm: 0.886865977670222, iteration: 191944
loss: 0.9772830009460449,grad_norm: 0.9999989316102158, iteration: 191945
loss: 0.9781720638275146,grad_norm: 0.999999149376714, iteration: 191946
loss: 0.9815229177474976,grad_norm: 0.852646161693599, iteration: 191947
loss: 0.9830781817436218,grad_norm: 0.9547387500023167, iteration: 191948
loss: 0.9764894843101501,grad_norm: 0.8371732332368715, iteration: 191949
loss: 0.9723232388496399,grad_norm: 0.9226165881129451, iteration: 191950
loss: 1.0477776527404785,grad_norm: 0.9075375768337004, iteration: 191951
loss: 0.9903084635734558,grad_norm: 0.9999988970318909, iteration: 191952
loss: 1.0125936269760132,grad_norm: 0.9999990253203827, iteration: 191953
loss: 0.9887686371803284,grad_norm: 0.9999991627375124, iteration: 191954
loss: 0.9883268475532532,grad_norm: 0.999999138681745, iteration: 191955
loss: 0.9838765263557434,grad_norm: 0.9999991592817367, iteration: 191956
loss: 0.985830545425415,grad_norm: 0.9999990967751925, iteration: 191957
loss: 0.9823431968688965,grad_norm: 0.9999991033496664, iteration: 191958
loss: 1.0195648670196533,grad_norm: 0.9999990001451792, iteration: 191959
loss: 0.9985640645027161,grad_norm: 0.9999996473935772, iteration: 191960
loss: 0.9796043634414673,grad_norm: 0.9838091686264702, iteration: 191961
loss: 1.0127626657485962,grad_norm: 0.9359129383482059, iteration: 191962
loss: 0.9689578413963318,grad_norm: 0.999999165991925, iteration: 191963
loss: 0.9924545884132385,grad_norm: 0.9999992478069301, iteration: 191964
loss: 1.057137370109558,grad_norm: 0.9999991203612156, iteration: 191965
loss: 1.0368858575820923,grad_norm: 0.9999990433266216, iteration: 191966
loss: 0.9890638589859009,grad_norm: 0.9465184351292807, iteration: 191967
loss: 0.9689269661903381,grad_norm: 0.9464389469530712, iteration: 191968
loss: 1.024121642112732,grad_norm: 0.9999991487443062, iteration: 191969
loss: 1.0042656660079956,grad_norm: 0.9519296978517031, iteration: 191970
loss: 1.0077576637268066,grad_norm: 0.9999991360861695, iteration: 191971
loss: 0.9537493586540222,grad_norm: 0.8455967240581929, iteration: 191972
loss: 0.9935427308082581,grad_norm: 0.9999991321655927, iteration: 191973
loss: 0.9560723900794983,grad_norm: 0.9601843153897457, iteration: 191974
loss: 1.0500805377960205,grad_norm: 0.9999991416263438, iteration: 191975
loss: 1.025456428527832,grad_norm: 0.9999990888420641, iteration: 191976
loss: 1.0014958381652832,grad_norm: 0.9145303336898696, iteration: 191977
loss: 1.023409128189087,grad_norm: 0.9461125935770102, iteration: 191978
loss: 1.0409953594207764,grad_norm: 0.9999990227450851, iteration: 191979
loss: 0.976098358631134,grad_norm: 0.9428635049966517, iteration: 191980
loss: 1.0143060684204102,grad_norm: 0.9999989689590902, iteration: 191981
loss: 1.0301545858383179,grad_norm: 0.9999992922061186, iteration: 191982
loss: 0.9871081709861755,grad_norm: 0.9999990958080668, iteration: 191983
loss: 1.0346590280532837,grad_norm: 0.9999998618571722, iteration: 191984
loss: 1.0581907033920288,grad_norm: 0.9792689294078919, iteration: 191985
loss: 1.0144740343093872,grad_norm: 0.7964719706670189, iteration: 191986
loss: 0.9873508810997009,grad_norm: 0.9806505633005886, iteration: 191987
loss: 1.034461498260498,grad_norm: 0.9999991847667612, iteration: 191988
loss: 1.0138615369796753,grad_norm: 0.9999992022667947, iteration: 191989
loss: 1.036217451095581,grad_norm: 0.9999990369496943, iteration: 191990
loss: 1.0789086818695068,grad_norm: 0.9999991001601499, iteration: 191991
loss: 1.0255813598632812,grad_norm: 0.9590688219252614, iteration: 191992
loss: 1.0333635807037354,grad_norm: 0.9999992942397543, iteration: 191993
loss: 1.0056357383728027,grad_norm: 0.9999990815687436, iteration: 191994
loss: 0.9892985820770264,grad_norm: 0.9373040098886554, iteration: 191995
loss: 0.9485346078872681,grad_norm: 0.898376315322589, iteration: 191996
loss: 0.9858676195144653,grad_norm: 0.9999992295071101, iteration: 191997
loss: 0.9593247175216675,grad_norm: 0.878383304063087, iteration: 191998
loss: 0.9803162813186646,grad_norm: 0.9999989184588812, iteration: 191999
loss: 1.0321816205978394,grad_norm: 0.999998936316902, iteration: 192000
loss: 1.0200929641723633,grad_norm: 0.9558366757613216, iteration: 192001
loss: 1.0131102800369263,grad_norm: 0.9999991157547596, iteration: 192002
loss: 1.0085270404815674,grad_norm: 0.9999990092141454, iteration: 192003
loss: 0.9870657920837402,grad_norm: 0.9999991199932274, iteration: 192004
loss: 0.9876107573509216,grad_norm: 0.9999991275817807, iteration: 192005
loss: 1.0187735557556152,grad_norm: 0.8209966648481017, iteration: 192006
loss: 1.007124662399292,grad_norm: 0.9999991475574451, iteration: 192007
loss: 1.0090360641479492,grad_norm: 0.9999989964181336, iteration: 192008
loss: 0.9713351726531982,grad_norm: 0.9099732848857157, iteration: 192009
loss: 1.0193439722061157,grad_norm: 0.9212274843399585, iteration: 192010
loss: 0.9828882217407227,grad_norm: 0.9872592284678477, iteration: 192011
loss: 0.9955149292945862,grad_norm: 0.8734504853977004, iteration: 192012
loss: 0.9658005833625793,grad_norm: 0.9999990964577001, iteration: 192013
loss: 0.9400853514671326,grad_norm: 0.9999991651758126, iteration: 192014
loss: 1.020820140838623,grad_norm: 0.9347165466486416, iteration: 192015
loss: 1.0183653831481934,grad_norm: 0.9999989862799985, iteration: 192016
loss: 0.9920800924301147,grad_norm: 0.9791737173834578, iteration: 192017
loss: 1.0047459602355957,grad_norm: 0.9999990929214051, iteration: 192018
loss: 1.0045970678329468,grad_norm: 0.7782827756761859, iteration: 192019
loss: 0.9822191596031189,grad_norm: 0.9999991094301173, iteration: 192020
loss: 1.002432942390442,grad_norm: 0.9561088298270685, iteration: 192021
loss: 1.0166596174240112,grad_norm: 0.9999991470646253, iteration: 192022
loss: 0.9870348572731018,grad_norm: 0.8781531047120168, iteration: 192023
loss: 0.9799526333808899,grad_norm: 0.9999990919123892, iteration: 192024
loss: 0.9904176592826843,grad_norm: 0.8396503696363009, iteration: 192025
loss: 0.9944661855697632,grad_norm: 0.9999991778751157, iteration: 192026
loss: 0.9847368001937866,grad_norm: 0.9551679464877374, iteration: 192027
loss: 1.002672553062439,grad_norm: 0.9999991160558556, iteration: 192028
loss: 1.0115710496902466,grad_norm: 0.817897002123977, iteration: 192029
loss: 1.0101433992385864,grad_norm: 0.8416193074255045, iteration: 192030
loss: 1.0021228790283203,grad_norm: 0.975631572421181, iteration: 192031
loss: 0.9838689565658569,grad_norm: 0.9999990574449876, iteration: 192032
loss: 0.9683884382247925,grad_norm: 0.9833687077372812, iteration: 192033
loss: 0.9729109406471252,grad_norm: 0.9153329135931055, iteration: 192034
loss: 1.0163946151733398,grad_norm: 0.9999994219142185, iteration: 192035
loss: 1.0031421184539795,grad_norm: 0.877250668571219, iteration: 192036
loss: 1.0046544075012207,grad_norm: 0.9698337268647493, iteration: 192037
loss: 1.0268945693969727,grad_norm: 0.9229112572548607, iteration: 192038
loss: 1.0025205612182617,grad_norm: 0.9363539509005633, iteration: 192039
loss: 1.0096142292022705,grad_norm: 0.9999991776048818, iteration: 192040
loss: 1.0227153301239014,grad_norm: 0.8090903007039527, iteration: 192041
loss: 1.0216622352600098,grad_norm: 0.9999990421235507, iteration: 192042
loss: 1.0232526063919067,grad_norm: 0.9787042527671985, iteration: 192043
loss: 0.9899928569793701,grad_norm: 0.9306923898122094, iteration: 192044
loss: 0.9690154194831848,grad_norm: 0.9805249432655985, iteration: 192045
loss: 1.009940266609192,grad_norm: 0.8976045689978894, iteration: 192046
loss: 0.9701278209686279,grad_norm: 0.8710005249403624, iteration: 192047
loss: 1.0105524063110352,grad_norm: 0.9557806819053207, iteration: 192048
loss: 1.0106441974639893,grad_norm: 0.9593844311656277, iteration: 192049
loss: 1.001526951789856,grad_norm: 0.9669273482329691, iteration: 192050
loss: 0.9537126421928406,grad_norm: 0.9877983802687159, iteration: 192051
loss: 1.0601146221160889,grad_norm: 0.9999990874414427, iteration: 192052
loss: 1.027578353881836,grad_norm: 0.8689387949124248, iteration: 192053
loss: 0.9790322184562683,grad_norm: 0.8877614410026006, iteration: 192054
loss: 1.2543628215789795,grad_norm: 0.9999996120206208, iteration: 192055
loss: 0.9735957980155945,grad_norm: 0.9999991774011967, iteration: 192056
loss: 0.9917268753051758,grad_norm: 0.8017892214304401, iteration: 192057
loss: 0.968681812286377,grad_norm: 0.8811789348473895, iteration: 192058
loss: 1.00240159034729,grad_norm: 0.9999990828007125, iteration: 192059
loss: 1.0167802572250366,grad_norm: 0.9999998655932688, iteration: 192060
loss: 0.9971866011619568,grad_norm: 0.9999990754412466, iteration: 192061
loss: 0.9995171427726746,grad_norm: 0.9999991906726469, iteration: 192062
loss: 0.954002320766449,grad_norm: 0.999999059700471, iteration: 192063
loss: 1.0119305849075317,grad_norm: 0.9999991875337522, iteration: 192064
loss: 1.0542055368423462,grad_norm: 0.9999992655279963, iteration: 192065
loss: 1.0299404859542847,grad_norm: 0.9999991097195501, iteration: 192066
loss: 0.9770215749740601,grad_norm: 0.9227698148836897, iteration: 192067
loss: 0.9698305130004883,grad_norm: 0.8912848246336557, iteration: 192068
loss: 1.033082127571106,grad_norm: 0.9999993997900231, iteration: 192069
loss: 0.993523120880127,grad_norm: 0.8642133944931578, iteration: 192070
loss: 1.0590803623199463,grad_norm: 0.9778855151436872, iteration: 192071
loss: 1.0309453010559082,grad_norm: 0.9999990850128011, iteration: 192072
loss: 1.0026782751083374,grad_norm: 0.8862543893684326, iteration: 192073
loss: 1.028999924659729,grad_norm: 0.9999990011842129, iteration: 192074
loss: 0.9851748943328857,grad_norm: 0.9409951648115022, iteration: 192075
loss: 0.9983634352684021,grad_norm: 0.8771053203703432, iteration: 192076
loss: 0.9756761789321899,grad_norm: 0.9578401863667165, iteration: 192077
loss: 0.9841180443763733,grad_norm: 0.9999990863275554, iteration: 192078
loss: 1.0200825929641724,grad_norm: 0.9999991365391433, iteration: 192079
loss: 1.0160568952560425,grad_norm: 0.8905404168410688, iteration: 192080
loss: 1.0081621408462524,grad_norm: 0.9999992757040602, iteration: 192081
loss: 0.9805257320404053,grad_norm: 0.999999134905438, iteration: 192082
loss: 1.0424295663833618,grad_norm: 0.9999999427954174, iteration: 192083
loss: 0.985899806022644,grad_norm: 0.9999989113185399, iteration: 192084
loss: 0.9728226065635681,grad_norm: 0.9224625973087136, iteration: 192085
loss: 1.028499960899353,grad_norm: 0.8996639908652887, iteration: 192086
loss: 1.0597960948944092,grad_norm: 0.9999992082008723, iteration: 192087
loss: 0.9837846755981445,grad_norm: 0.9999997303339063, iteration: 192088
loss: 0.969014048576355,grad_norm: 0.942327387338283, iteration: 192089
loss: 1.0258233547210693,grad_norm: 0.9803442981020254, iteration: 192090
loss: 1.0110825300216675,grad_norm: 0.9538937726972054, iteration: 192091
loss: 1.0141891241073608,grad_norm: 0.9999992138712327, iteration: 192092
loss: 0.9957085251808167,grad_norm: 0.8672159518106243, iteration: 192093
loss: 0.9555222392082214,grad_norm: 0.8925376996228738, iteration: 192094
loss: 1.0152243375778198,grad_norm: 0.9729449765800547, iteration: 192095
loss: 1.0488982200622559,grad_norm: 0.9999991248459661, iteration: 192096
loss: 1.0028871297836304,grad_norm: 0.999999286791736, iteration: 192097
loss: 1.0240719318389893,grad_norm: 0.9999990510577125, iteration: 192098
loss: 0.9949464201927185,grad_norm: 0.9124279340953485, iteration: 192099
loss: 1.1450786590576172,grad_norm: 0.9999994501525425, iteration: 192100
loss: 0.9869680404663086,grad_norm: 0.873966332775958, iteration: 192101
loss: 0.997336745262146,grad_norm: 0.9999993255878467, iteration: 192102
loss: 1.0083187818527222,grad_norm: 0.9999989622944635, iteration: 192103
loss: 1.1006499528884888,grad_norm: 0.9999992774301847, iteration: 192104
loss: 0.9875935316085815,grad_norm: 0.9999992503633094, iteration: 192105
loss: 0.9926212430000305,grad_norm: 0.9305425895637414, iteration: 192106
loss: 0.9743635058403015,grad_norm: 0.8943038674414002, iteration: 192107
loss: 1.0255473852157593,grad_norm: 0.9635856797714973, iteration: 192108
loss: 0.9803459644317627,grad_norm: 0.9469638991984571, iteration: 192109
loss: 1.000229835510254,grad_norm: 0.9421081075779665, iteration: 192110
loss: 1.0005888938903809,grad_norm: 0.9999990938223342, iteration: 192111
loss: 1.0312137603759766,grad_norm: 0.9999998934442288, iteration: 192112
loss: 0.9932599663734436,grad_norm: 0.8246837595844267, iteration: 192113
loss: 1.0682520866394043,grad_norm: 0.886122468689957, iteration: 192114
loss: 0.9638127088546753,grad_norm: 0.8039741434724649, iteration: 192115
loss: 1.0346022844314575,grad_norm: 0.999999234563999, iteration: 192116
loss: 1.004359245300293,grad_norm: 0.9491048861079364, iteration: 192117
loss: 0.9855853319168091,grad_norm: 0.9999992601584139, iteration: 192118
loss: 0.9997996687889099,grad_norm: 0.8839713205514796, iteration: 192119
loss: 1.074947476387024,grad_norm: 0.9999997370798521, iteration: 192120
loss: 1.0341448783874512,grad_norm: 0.9999991116500185, iteration: 192121
loss: 1.000776767730713,grad_norm: 0.8220062386649568, iteration: 192122
loss: 1.081477403640747,grad_norm: 0.8290331784522329, iteration: 192123
loss: 1.0348728895187378,grad_norm: 0.9243579830549934, iteration: 192124
loss: 1.0084704160690308,grad_norm: 0.9549899375968061, iteration: 192125
loss: 0.9793241620063782,grad_norm: 0.9999991310536415, iteration: 192126
loss: 1.0305489301681519,grad_norm: 0.8337141224159059, iteration: 192127
loss: 0.9881768822669983,grad_norm: 0.9999990075708238, iteration: 192128
loss: 0.9756759405136108,grad_norm: 0.7964850277498208, iteration: 192129
loss: 1.0136620998382568,grad_norm: 0.9985551124254682, iteration: 192130
loss: 0.9974193572998047,grad_norm: 0.999999094101081, iteration: 192131
loss: 1.0327355861663818,grad_norm: 0.9465542770513065, iteration: 192132
loss: 1.0044153928756714,grad_norm: 0.9999991698941967, iteration: 192133
loss: 1.0054829120635986,grad_norm: 0.866053382832843, iteration: 192134
loss: 0.969014585018158,grad_norm: 0.9734052467423365, iteration: 192135
loss: 0.9765345454216003,grad_norm: 0.9138805772823838, iteration: 192136
loss: 0.9795453548431396,grad_norm: 0.999998993891642, iteration: 192137
loss: 1.0705784559249878,grad_norm: 0.9999993175686335, iteration: 192138
loss: 0.9737020134925842,grad_norm: 0.9082630080581651, iteration: 192139
loss: 0.9745819568634033,grad_norm: 0.8875835394914561, iteration: 192140
loss: 0.9572559595108032,grad_norm: 0.9999989961498589, iteration: 192141
loss: 1.0045411586761475,grad_norm: 0.9977199264602701, iteration: 192142
loss: 1.0220565795898438,grad_norm: 0.999998942416703, iteration: 192143
loss: 0.9952452182769775,grad_norm: 0.9552534458232372, iteration: 192144
loss: 0.9991266131401062,grad_norm: 0.8074881740108082, iteration: 192145
loss: 0.9644106030464172,grad_norm: 0.9999992456456949, iteration: 192146
loss: 1.0028653144836426,grad_norm: 0.8991767971792813, iteration: 192147
loss: 0.9906328320503235,grad_norm: 0.788373887680796, iteration: 192148
loss: 0.9658007025718689,grad_norm: 0.9089252828190156, iteration: 192149
loss: 1.0004554986953735,grad_norm: 0.9999991493033017, iteration: 192150
loss: 0.9605581760406494,grad_norm: 0.9999990966883442, iteration: 192151
loss: 0.9906914234161377,grad_norm: 0.9999991675154117, iteration: 192152
loss: 1.0348907709121704,grad_norm: 0.9999991335692717, iteration: 192153
loss: 1.0042723417282104,grad_norm: 0.9999991756790512, iteration: 192154
loss: 0.9693434834480286,grad_norm: 0.8544150781454529, iteration: 192155
loss: 1.0038243532180786,grad_norm: 0.8597275615381468, iteration: 192156
loss: 1.006598949432373,grad_norm: 0.9504666199666811, iteration: 192157
loss: 0.99550461769104,grad_norm: 0.9999991547348047, iteration: 192158
loss: 1.0270758867263794,grad_norm: 0.841925926932015, iteration: 192159
loss: 1.005940556526184,grad_norm: 0.9999990197662101, iteration: 192160
loss: 0.991934597492218,grad_norm: 0.9543862960120717, iteration: 192161
loss: 0.9987947344779968,grad_norm: 0.9999993021736252, iteration: 192162
loss: 1.0109074115753174,grad_norm: 0.9999993881331348, iteration: 192163
loss: 0.9862658381462097,grad_norm: 0.8723459342425407, iteration: 192164
loss: 1.0216525793075562,grad_norm: 0.9019848336718463, iteration: 192165
loss: 1.0026377439498901,grad_norm: 0.9999990984276508, iteration: 192166
loss: 1.0045807361602783,grad_norm: 0.9655438788093503, iteration: 192167
loss: 1.0347435474395752,grad_norm: 0.9999990818787499, iteration: 192168
loss: 1.0261722803115845,grad_norm: 0.9752160013759296, iteration: 192169
loss: 0.9769518971443176,grad_norm: 0.9195949264579324, iteration: 192170
loss: 0.9823132753372192,grad_norm: 0.9999990589332666, iteration: 192171
loss: 1.0006595849990845,grad_norm: 0.9999990102571084, iteration: 192172
loss: 1.0026803016662598,grad_norm: 0.9999996966184954, iteration: 192173
loss: 1.006571888923645,grad_norm: 0.999999137264528, iteration: 192174
loss: 1.0084412097930908,grad_norm: 0.8638844489549288, iteration: 192175
loss: 1.018562912940979,grad_norm: 0.9551191610001748, iteration: 192176
loss: 0.9772219061851501,grad_norm: 0.99999899716827, iteration: 192177
loss: 0.9825410842895508,grad_norm: 0.9999991457209741, iteration: 192178
loss: 1.0777039527893066,grad_norm: 0.9999992435132108, iteration: 192179
loss: 1.0173701047897339,grad_norm: 0.9426810482231817, iteration: 192180
loss: 1.030672550201416,grad_norm: 0.9042797616124588, iteration: 192181
loss: 1.0174332857131958,grad_norm: 0.9999990713576792, iteration: 192182
loss: 1.0244802236557007,grad_norm: 0.9999992411187905, iteration: 192183
loss: 0.9688594937324524,grad_norm: 0.966621869004519, iteration: 192184
loss: 1.0132369995117188,grad_norm: 0.8174117136161638, iteration: 192185
loss: 1.0083614587783813,grad_norm: 0.9999992379131891, iteration: 192186
loss: 0.9986199736595154,grad_norm: 0.9429563863128158, iteration: 192187
loss: 1.0073217153549194,grad_norm: 0.946779195679303, iteration: 192188
loss: 1.0083485841751099,grad_norm: 0.8904604776179535, iteration: 192189
loss: 1.0144989490509033,grad_norm: 0.7959533954965646, iteration: 192190
loss: 0.9903322458267212,grad_norm: 0.9127925556860494, iteration: 192191
loss: 0.9823328256607056,grad_norm: 0.999999154409738, iteration: 192192
loss: 0.9907381534576416,grad_norm: 0.9999996494902712, iteration: 192193
loss: 0.9965746998786926,grad_norm: 0.9999992958439958, iteration: 192194
loss: 1.0266577005386353,grad_norm: 0.9999998799773835, iteration: 192195
loss: 1.0207931995391846,grad_norm: 0.9999990614460381, iteration: 192196
loss: 1.0106912851333618,grad_norm: 0.9999991415900175, iteration: 192197
loss: 0.9866818785667419,grad_norm: 0.8663750267077543, iteration: 192198
loss: 1.0817108154296875,grad_norm: 0.9999990903440519, iteration: 192199
loss: 0.9953545331954956,grad_norm: 0.9999993168604143, iteration: 192200
loss: 0.9794071316719055,grad_norm: 0.9999990033022009, iteration: 192201
loss: 0.9656425714492798,grad_norm: 0.9999991459945764, iteration: 192202
loss: 1.0208015441894531,grad_norm: 0.999999036578122, iteration: 192203
loss: 1.013710618019104,grad_norm: 0.8719921898409873, iteration: 192204
loss: 0.9963605403900146,grad_norm: 0.996657132381718, iteration: 192205
loss: 1.1262307167053223,grad_norm: 0.9999997579897049, iteration: 192206
loss: 1.0065580606460571,grad_norm: 0.837414322454083, iteration: 192207
loss: 0.9835832715034485,grad_norm: 0.8643188007470342, iteration: 192208
loss: 0.9682841897010803,grad_norm: 0.9649195851541024, iteration: 192209
loss: 0.9943166375160217,grad_norm: 0.9999992037902078, iteration: 192210
loss: 1.0498026609420776,grad_norm: 0.9999991264869919, iteration: 192211
loss: 1.0150784254074097,grad_norm: 0.9999998414344204, iteration: 192212
loss: 1.0071951150894165,grad_norm: 0.9507303128484741, iteration: 192213
loss: 0.9945139288902283,grad_norm: 0.8853791469401652, iteration: 192214
loss: 1.0124714374542236,grad_norm: 0.9999993311384155, iteration: 192215
loss: 0.995707631111145,grad_norm: 0.9999990356794968, iteration: 192216
loss: 1.013123869895935,grad_norm: 0.9849594790934891, iteration: 192217
loss: 1.0225569009780884,grad_norm: 0.9214949446219199, iteration: 192218
loss: 0.9763334393501282,grad_norm: 0.975939030874561, iteration: 192219
loss: 0.9856119155883789,grad_norm: 0.9967512060458757, iteration: 192220
loss: 1.0107369422912598,grad_norm: 0.9999991622543317, iteration: 192221
loss: 1.004059910774231,grad_norm: 0.9058403829060568, iteration: 192222
loss: 1.0283830165863037,grad_norm: 0.9999990925453071, iteration: 192223
loss: 1.0250015258789062,grad_norm: 0.8425759604999574, iteration: 192224
loss: 1.008012294769287,grad_norm: 0.788692364865102, iteration: 192225
loss: 1.0374256372451782,grad_norm: 0.9999990859895855, iteration: 192226
loss: 1.0385239124298096,grad_norm: 0.9999992214682745, iteration: 192227
loss: 0.9992639422416687,grad_norm: 0.9999991710131307, iteration: 192228
loss: 1.0214943885803223,grad_norm: 0.9999991150830425, iteration: 192229
loss: 1.0562875270843506,grad_norm: 0.9999996271657995, iteration: 192230
loss: 0.9786396026611328,grad_norm: 0.8303603177584931, iteration: 192231
loss: 0.9978234171867371,grad_norm: 0.9999991757582125, iteration: 192232
loss: 0.9921765923500061,grad_norm: 0.999999645419915, iteration: 192233
loss: 1.0103086233139038,grad_norm: 0.9999992940880704, iteration: 192234
loss: 0.9759321212768555,grad_norm: 0.9890689017154836, iteration: 192235
loss: 0.9912686944007874,grad_norm: 0.9999991692497794, iteration: 192236
loss: 1.0100373029708862,grad_norm: 0.9379268347720574, iteration: 192237
loss: 0.9967042803764343,grad_norm: 0.9353264252444309, iteration: 192238
loss: 1.002492070198059,grad_norm: 0.999999118839345, iteration: 192239
loss: 0.986238420009613,grad_norm: 0.9571522616389387, iteration: 192240
loss: 0.9203866124153137,grad_norm: 0.9602011037104488, iteration: 192241
loss: 0.9797510504722595,grad_norm: 0.9999989812449425, iteration: 192242
loss: 1.0787595510482788,grad_norm: 0.9999993191276095, iteration: 192243
loss: 0.9733066558837891,grad_norm: 0.9957501441793902, iteration: 192244
loss: 0.9657055735588074,grad_norm: 0.9807279938162371, iteration: 192245
loss: 1.0109976530075073,grad_norm: 0.9999991219322036, iteration: 192246
loss: 0.9999772310256958,grad_norm: 0.9999993801635839, iteration: 192247
loss: 0.9702627658843994,grad_norm: 0.9999989543195862, iteration: 192248
loss: 1.1012885570526123,grad_norm: 0.9999993784012486, iteration: 192249
loss: 0.9864172339439392,grad_norm: 0.9473124582804641, iteration: 192250
loss: 1.0196971893310547,grad_norm: 0.9110574974398651, iteration: 192251
loss: 1.0220234394073486,grad_norm: 0.9999991299433473, iteration: 192252
loss: 1.013016700744629,grad_norm: 0.9314520451349189, iteration: 192253
loss: 0.9823277592658997,grad_norm: 0.999999016858053, iteration: 192254
loss: 1.010193943977356,grad_norm: 0.9999994893480905, iteration: 192255
loss: 0.9572993516921997,grad_norm: 0.9999992395920104, iteration: 192256
loss: 0.9387041926383972,grad_norm: 0.8410991108893014, iteration: 192257
loss: 1.0292844772338867,grad_norm: 0.9999990601266485, iteration: 192258
loss: 1.014548897743225,grad_norm: 0.9179673256160824, iteration: 192259
loss: 0.9939530491828918,grad_norm: 0.999999056840465, iteration: 192260
loss: 1.0160526037216187,grad_norm: 0.8224267112161787, iteration: 192261
loss: 1.0034995079040527,grad_norm: 0.9999993478054285, iteration: 192262
loss: 1.0227290391921997,grad_norm: 0.9999990275353792, iteration: 192263
loss: 0.98744136095047,grad_norm: 0.9999990904811744, iteration: 192264
loss: 0.9767148494720459,grad_norm: 0.999999125773602, iteration: 192265
loss: 0.9848662614822388,grad_norm: 0.9999990923338974, iteration: 192266
loss: 1.0220084190368652,grad_norm: 0.9999990007779695, iteration: 192267
loss: 0.9786607623100281,grad_norm: 0.9984497653574755, iteration: 192268
loss: 1.0319600105285645,grad_norm: 0.9999992688578322, iteration: 192269
loss: 0.9653351306915283,grad_norm: 0.9999989525370893, iteration: 192270
loss: 1.0401889085769653,grad_norm: 0.9583906665868801, iteration: 192271
loss: 1.0264391899108887,grad_norm: 0.98005668524831, iteration: 192272
loss: 1.0022557973861694,grad_norm: 0.8883840706081115, iteration: 192273
loss: 0.972615122795105,grad_norm: 0.9999989891774524, iteration: 192274
loss: 1.0096269845962524,grad_norm: 0.9999995369658973, iteration: 192275
loss: 0.9978553056716919,grad_norm: 0.999999165487676, iteration: 192276
loss: 0.9984447956085205,grad_norm: 0.9999990225208497, iteration: 192277
loss: 0.9834061861038208,grad_norm: 0.999998963381186, iteration: 192278
loss: 0.9960375428199768,grad_norm: 0.9757382367448943, iteration: 192279
loss: 0.9878512620925903,grad_norm: 0.999999138828207, iteration: 192280
loss: 1.0305393934249878,grad_norm: 0.9999992323906874, iteration: 192281
loss: 1.0007236003875732,grad_norm: 0.9999989949766175, iteration: 192282
loss: 0.9919111728668213,grad_norm: 0.8335768632074931, iteration: 192283
loss: 1.030336618423462,grad_norm: 0.9999990768885235, iteration: 192284
loss: 1.0215915441513062,grad_norm: 0.9999991155950138, iteration: 192285
loss: 0.9921181797981262,grad_norm: 0.9873000171854571, iteration: 192286
loss: 1.030351996421814,grad_norm: 0.8686465381643037, iteration: 192287
loss: 0.9820324182510376,grad_norm: 0.9484589660283947, iteration: 192288
loss: 1.0361019372940063,grad_norm: 0.9999996655689979, iteration: 192289
loss: 0.9923850297927856,grad_norm: 0.8356430460955759, iteration: 192290
loss: 1.047638177871704,grad_norm: 0.9999996998997545, iteration: 192291
loss: 0.9530207514762878,grad_norm: 0.9010960505876121, iteration: 192292
loss: 0.9639344811439514,grad_norm: 0.9999991404150645, iteration: 192293
loss: 1.020210862159729,grad_norm: 0.9999991502116967, iteration: 192294
loss: 1.0196624994277954,grad_norm: 0.9999990443489764, iteration: 192295
loss: 1.004710078239441,grad_norm: 0.9999990351525189, iteration: 192296
loss: 1.0078344345092773,grad_norm: 0.9999991522296297, iteration: 192297
loss: 0.9975337982177734,grad_norm: 0.9062213061627695, iteration: 192298
loss: 1.021833062171936,grad_norm: 0.9708526828684868, iteration: 192299
loss: 1.031632423400879,grad_norm: 0.9999992024945998, iteration: 192300
loss: 0.9877864718437195,grad_norm: 0.9210932704999404, iteration: 192301
loss: 0.9612924456596375,grad_norm: 0.9999991003415817, iteration: 192302
loss: 1.0364022254943848,grad_norm: 0.8844135680982637, iteration: 192303
loss: 1.0128237009048462,grad_norm: 0.9068819959358466, iteration: 192304
loss: 1.0057438611984253,grad_norm: 0.9999991083515118, iteration: 192305
loss: 0.9927613735198975,grad_norm: 0.8744405412144085, iteration: 192306
loss: 1.023330807685852,grad_norm: 0.9999991331796309, iteration: 192307
loss: 1.0202422142028809,grad_norm: 0.9270089128384962, iteration: 192308
loss: 1.0269609689712524,grad_norm: 0.9453754637772974, iteration: 192309
loss: 0.9668517708778381,grad_norm: 0.9321047346482692, iteration: 192310
loss: 0.9791171550750732,grad_norm: 0.9493495590584502, iteration: 192311
loss: 0.9884699583053589,grad_norm: 0.9980753849960994, iteration: 192312
loss: 0.9926918745040894,grad_norm: 0.9454728529385389, iteration: 192313
loss: 1.0220050811767578,grad_norm: 0.9999990984168028, iteration: 192314
loss: 1.037742257118225,grad_norm: 0.999999157024635, iteration: 192315
loss: 0.9928176999092102,grad_norm: 0.9546879998100445, iteration: 192316
loss: 0.9476499557495117,grad_norm: 0.8935286555762222, iteration: 192317
loss: 0.9717360138893127,grad_norm: 0.9862964040431814, iteration: 192318
loss: 0.9995763897895813,grad_norm: 0.8353675552935451, iteration: 192319
loss: 1.022842288017273,grad_norm: 0.9999990174242007, iteration: 192320
loss: 0.9685944318771362,grad_norm: 0.8813091344468823, iteration: 192321
loss: 1.0049357414245605,grad_norm: 0.9999992281651503, iteration: 192322
loss: 0.9865532517433167,grad_norm: 0.9999994443494675, iteration: 192323
loss: 1.0182173252105713,grad_norm: 0.9999992427445249, iteration: 192324
loss: 1.0279008150100708,grad_norm: 1.000000028841317, iteration: 192325
loss: 1.010630488395691,grad_norm: 0.6887378038930647, iteration: 192326
loss: 0.9688334465026855,grad_norm: 0.9999991659012152, iteration: 192327
loss: 1.1026804447174072,grad_norm: 0.8883358526390545, iteration: 192328
loss: 1.0353466272354126,grad_norm: 0.8568832685020006, iteration: 192329
loss: 0.9981274604797363,grad_norm: 0.9999992504634, iteration: 192330
loss: 0.9935042262077332,grad_norm: 0.8940259693416382, iteration: 192331
loss: 0.9765746593475342,grad_norm: 0.9780525008309149, iteration: 192332
loss: 0.9905006289482117,grad_norm: 0.9329082747748211, iteration: 192333
loss: 1.0921798944473267,grad_norm: 0.999999895641302, iteration: 192334
loss: 0.9640636444091797,grad_norm: 0.9999993128203529, iteration: 192335
loss: 0.9792377352714539,grad_norm: 0.9999996075265953, iteration: 192336
loss: 1.023284912109375,grad_norm: 0.972554788029048, iteration: 192337
loss: 0.9954970479011536,grad_norm: 0.9999991501560638, iteration: 192338
loss: 0.9695611000061035,grad_norm: 0.9999991088911055, iteration: 192339
loss: 1.0118404626846313,grad_norm: 0.828600869286848, iteration: 192340
loss: 1.0492658615112305,grad_norm: 0.9909517755306675, iteration: 192341
loss: 0.9832233190536499,grad_norm: 0.8706080064631155, iteration: 192342
loss: 1.0531851053237915,grad_norm: 0.9999992531106676, iteration: 192343
loss: 1.0175925493240356,grad_norm: 0.9777040254533678, iteration: 192344
loss: 0.9872209429740906,grad_norm: 0.9872384785396993, iteration: 192345
loss: 0.9827485084533691,grad_norm: 0.8740720776525214, iteration: 192346
loss: 1.016142725944519,grad_norm: 0.9999999329107623, iteration: 192347
loss: 0.9959349036216736,grad_norm: 0.7569934860057409, iteration: 192348
loss: 1.0083087682724,grad_norm: 0.9719252643909088, iteration: 192349
loss: 1.0015193223953247,grad_norm: 0.9816323701659908, iteration: 192350
loss: 1.0067732334136963,grad_norm: 0.9999990948882783, iteration: 192351
loss: 1.022674560546875,grad_norm: 0.8372291523923432, iteration: 192352
loss: 0.9983887076377869,grad_norm: 0.9999991739627825, iteration: 192353
loss: 1.0036321878433228,grad_norm: 0.9708870539698349, iteration: 192354
loss: 1.053523302078247,grad_norm: 0.999999183319853, iteration: 192355
loss: 0.9856505393981934,grad_norm: 0.7995486045855149, iteration: 192356
loss: 1.0270296335220337,grad_norm: 0.9067200251525669, iteration: 192357
loss: 1.0268139839172363,grad_norm: 0.9267864722015348, iteration: 192358
loss: 1.0024807453155518,grad_norm: 0.9999991762172487, iteration: 192359
loss: 1.0265793800354004,grad_norm: 0.9101746299960874, iteration: 192360
loss: 0.9866716265678406,grad_norm: 0.9999991062614263, iteration: 192361
loss: 1.0573490858078003,grad_norm: 0.9999991578361234, iteration: 192362
loss: 0.9844857454299927,grad_norm: 0.8649260116611188, iteration: 192363
loss: 1.0213861465454102,grad_norm: 0.8188187439146755, iteration: 192364
loss: 0.9999887347221375,grad_norm: 0.9999991969078107, iteration: 192365
loss: 0.9462386965751648,grad_norm: 0.9277918893523333, iteration: 192366
loss: 0.9976357817649841,grad_norm: 0.9999993070214264, iteration: 192367
loss: 0.9683728814125061,grad_norm: 0.9861568302328148, iteration: 192368
loss: 1.019261360168457,grad_norm: 0.9415932749614999, iteration: 192369
loss: 1.0238083600997925,grad_norm: 0.8089229284354652, iteration: 192370
loss: 0.9904844164848328,grad_norm: 0.8169009412807149, iteration: 192371
loss: 1.0351243019104004,grad_norm: 0.9999992980466121, iteration: 192372
loss: 0.9514838457107544,grad_norm: 0.9478460904962124, iteration: 192373
loss: 1.0373022556304932,grad_norm: 0.9999992699445363, iteration: 192374
loss: 0.96284419298172,grad_norm: 0.8623244696050395, iteration: 192375
loss: 0.9994670152664185,grad_norm: 0.9027046801112913, iteration: 192376
loss: 0.9970625638961792,grad_norm: 0.9999990958287256, iteration: 192377
loss: 0.9860289096832275,grad_norm: 0.9840110723967862, iteration: 192378
loss: 1.084454894065857,grad_norm: 0.9524459096131029, iteration: 192379
loss: 1.007634162902832,grad_norm: 0.8788406337804235, iteration: 192380
loss: 0.9777925610542297,grad_norm: 0.999921758740738, iteration: 192381
loss: 0.9950137138366699,grad_norm: 0.9999991013544701, iteration: 192382
loss: 1.0287399291992188,grad_norm: 0.9999991720068575, iteration: 192383
loss: 1.0302778482437134,grad_norm: 0.9993420578090708, iteration: 192384
loss: 0.9534891843795776,grad_norm: 0.9999992070683903, iteration: 192385
loss: 1.0060875415802002,grad_norm: 0.7990620749425368, iteration: 192386
loss: 0.9883720874786377,grad_norm: 0.9999990628862034, iteration: 192387
loss: 0.9762298464775085,grad_norm: 0.8167269023908126, iteration: 192388
loss: 0.9802929162979126,grad_norm: 0.8838648568530255, iteration: 192389
loss: 1.0724759101867676,grad_norm: 0.9999990670449538, iteration: 192390
loss: 0.9835059642791748,grad_norm: 0.9796565663714143, iteration: 192391
loss: 0.9796087145805359,grad_norm: 0.9999991049029486, iteration: 192392
loss: 1.0235487222671509,grad_norm: 0.9464162880514166, iteration: 192393
loss: 1.1734564304351807,grad_norm: 0.9999993177279786, iteration: 192394
loss: 1.0971503257751465,grad_norm: 0.999999931872047, iteration: 192395
loss: 0.9982685446739197,grad_norm: 0.9999990582144868, iteration: 192396
loss: 1.0210212469100952,grad_norm: 0.897319893998391, iteration: 192397
loss: 0.9912325739860535,grad_norm: 0.9999989815464184, iteration: 192398
loss: 0.9895128011703491,grad_norm: 0.8412906783916324, iteration: 192399
loss: 1.0002933740615845,grad_norm: 0.8905099551013287, iteration: 192400
loss: 1.0060728788375854,grad_norm: 0.9001031925353679, iteration: 192401
loss: 1.042750358581543,grad_norm: 0.999999084052406, iteration: 192402
loss: 1.0043470859527588,grad_norm: 0.9999992212792711, iteration: 192403
loss: 0.9864022135734558,grad_norm: 0.9999991816519778, iteration: 192404
loss: 1.0347633361816406,grad_norm: 0.9688946593080303, iteration: 192405
loss: 1.0816946029663086,grad_norm: 0.9999993068410519, iteration: 192406
loss: 0.9888744950294495,grad_norm: 0.9999990792040956, iteration: 192407
loss: 1.0259150266647339,grad_norm: 0.9999990755701843, iteration: 192408
loss: 1.0018855333328247,grad_norm: 0.9999992539142537, iteration: 192409
loss: 0.9761818647384644,grad_norm: 0.9999991909456086, iteration: 192410
loss: 0.9940496683120728,grad_norm: 0.9999990794459996, iteration: 192411
loss: 0.9863199591636658,grad_norm: 0.8955339189192074, iteration: 192412
loss: 1.0011944770812988,grad_norm: 0.9999991569539128, iteration: 192413
loss: 1.0186488628387451,grad_norm: 0.9999998553960632, iteration: 192414
loss: 0.9945265650749207,grad_norm: 0.9302874262057678, iteration: 192415
loss: 1.0373952388763428,grad_norm: 0.9999990315341635, iteration: 192416
loss: 0.9963579773902893,grad_norm: 0.944474313730259, iteration: 192417
loss: 1.0025782585144043,grad_norm: 0.8638689650019967, iteration: 192418
loss: 0.9820795059204102,grad_norm: 0.9028298450645827, iteration: 192419
loss: 1.0052285194396973,grad_norm: 0.8742463835610568, iteration: 192420
loss: 1.0047943592071533,grad_norm: 0.999999227593255, iteration: 192421
loss: 0.9867944717407227,grad_norm: 0.9174312241792419, iteration: 192422
loss: 1.026066541671753,grad_norm: 0.9999991702766183, iteration: 192423
loss: 0.9910217523574829,grad_norm: 0.9850668714701264, iteration: 192424
loss: 1.0098298788070679,grad_norm: 0.8737705331918288, iteration: 192425
loss: 1.008278489112854,grad_norm: 0.8998111058165817, iteration: 192426
loss: 1.0152477025985718,grad_norm: 0.9999990988268032, iteration: 192427
loss: 1.0253119468688965,grad_norm: 0.9999990606047948, iteration: 192428
loss: 0.9965988993644714,grad_norm: 0.953774674953596, iteration: 192429
loss: 1.0142371654510498,grad_norm: 0.9514449530932649, iteration: 192430
loss: 0.9759693145751953,grad_norm: 0.9999991077724858, iteration: 192431
loss: 1.0690771341323853,grad_norm: 0.9999990166481818, iteration: 192432
loss: 1.0197690725326538,grad_norm: 0.954083261504823, iteration: 192433
loss: 1.048189401626587,grad_norm: 0.9999989894026166, iteration: 192434
loss: 0.9863294959068298,grad_norm: 0.9999992000356913, iteration: 192435
loss: 1.0241297483444214,grad_norm: 0.8506334397254596, iteration: 192436
loss: 1.0037835836410522,grad_norm: 0.9999991032512981, iteration: 192437
loss: 0.9927077293395996,grad_norm: 0.9999989813081148, iteration: 192438
loss: 0.974766194820404,grad_norm: 0.9999990908374126, iteration: 192439
loss: 0.9722248315811157,grad_norm: 0.9999992192747063, iteration: 192440
loss: 1.0000689029693604,grad_norm: 0.9446152204108351, iteration: 192441
loss: 1.0262559652328491,grad_norm: 0.8284394556798738, iteration: 192442
loss: 0.9847171306610107,grad_norm: 0.9999991752244163, iteration: 192443
loss: 0.9977824091911316,grad_norm: 0.9228058744810684, iteration: 192444
loss: 0.993188202381134,grad_norm: 0.9618542029085214, iteration: 192445
loss: 1.0780954360961914,grad_norm: 0.7872220391514727, iteration: 192446
loss: 1.1359187364578247,grad_norm: 0.9999997794035295, iteration: 192447
loss: 0.9881144762039185,grad_norm: 0.8979865331828416, iteration: 192448
loss: 1.0036773681640625,grad_norm: 0.9498667905087944, iteration: 192449
loss: 0.9970947504043579,grad_norm: 0.9124255462747902, iteration: 192450
loss: 0.9825613498687744,grad_norm: 0.9643258676673885, iteration: 192451
loss: 1.0240037441253662,grad_norm: 0.9999991781984818, iteration: 192452
loss: 0.9805512428283691,grad_norm: 0.937078683380645, iteration: 192453
loss: 1.041358470916748,grad_norm: 0.9999992439185909, iteration: 192454
loss: 1.0071830749511719,grad_norm: 0.9999992513236488, iteration: 192455
loss: 1.0215791463851929,grad_norm: 0.836757306139138, iteration: 192456
loss: 1.0055267810821533,grad_norm: 0.9999990086928013, iteration: 192457
loss: 1.007494568824768,grad_norm: 0.9523834259811541, iteration: 192458
loss: 0.9724829196929932,grad_norm: 0.9175393341745385, iteration: 192459
loss: 1.0247230529785156,grad_norm: 0.9999992832828182, iteration: 192460
loss: 1.0094647407531738,grad_norm: 0.9999992947680159, iteration: 192461
loss: 1.020983338356018,grad_norm: 0.9427759228750444, iteration: 192462
loss: 0.9872071743011475,grad_norm: 0.8166706010139504, iteration: 192463
loss: 0.9789520502090454,grad_norm: 0.9548127036506168, iteration: 192464
loss: 0.9930607676506042,grad_norm: 0.9261664627291791, iteration: 192465
loss: 0.9784759283065796,grad_norm: 0.9999989917855966, iteration: 192466
loss: 1.0138475894927979,grad_norm: 0.9999992310030517, iteration: 192467
loss: 1.013084053993225,grad_norm: 0.9533883862959259, iteration: 192468
loss: 0.990676760673523,grad_norm: 0.9999993503030704, iteration: 192469
loss: 1.0025838613510132,grad_norm: 0.8836844011735664, iteration: 192470
loss: 0.9992102384567261,grad_norm: 0.9999992250564146, iteration: 192471
loss: 1.0406681299209595,grad_norm: 0.9194021426271476, iteration: 192472
loss: 0.9830604791641235,grad_norm: 0.9920471471982075, iteration: 192473
loss: 0.9706801772117615,grad_norm: 0.8051574811757688, iteration: 192474
loss: 1.0661585330963135,grad_norm: 0.999999603330753, iteration: 192475
loss: 0.9634291529655457,grad_norm: 0.999999164679307, iteration: 192476
loss: 1.024516224861145,grad_norm: 0.9636835668853767, iteration: 192477
loss: 0.9965712428092957,grad_norm: 0.9619185389463798, iteration: 192478
loss: 0.9803329706192017,grad_norm: 0.8781528375037885, iteration: 192479
loss: 0.9889239072799683,grad_norm: 0.9999990681954802, iteration: 192480
loss: 0.9892120957374573,grad_norm: 0.9371211125633234, iteration: 192481
loss: 0.991023063659668,grad_norm: 0.8446766984573572, iteration: 192482
loss: 1.01669442653656,grad_norm: 0.8487203989921136, iteration: 192483
loss: 1.0164005756378174,grad_norm: 0.9165741902724415, iteration: 192484
loss: 0.9780500531196594,grad_norm: 0.9999991349040326, iteration: 192485
loss: 1.017029047012329,grad_norm: 0.9501155142302907, iteration: 192486
loss: 0.9744357466697693,grad_norm: 0.9999990607886056, iteration: 192487
loss: 1.0070894956588745,grad_norm: 0.9658366602937217, iteration: 192488
loss: 1.0395262241363525,grad_norm: 0.9417652183588014, iteration: 192489
loss: 0.9559071063995361,grad_norm: 0.7599894032225031, iteration: 192490
loss: 0.9803454875946045,grad_norm: 0.9999991184366727, iteration: 192491
loss: 1.0100576877593994,grad_norm: 0.9860015304289264, iteration: 192492
loss: 1.0132094621658325,grad_norm: 0.8393750481173504, iteration: 192493
loss: 0.9816030263900757,grad_norm: 0.9999989602318975, iteration: 192494
loss: 1.039322018623352,grad_norm: 0.9453773289705149, iteration: 192495
loss: 1.0174411535263062,grad_norm: 0.9997920720790094, iteration: 192496
loss: 0.9957848191261292,grad_norm: 0.9999990261177844, iteration: 192497
loss: 0.9830057621002197,grad_norm: 0.9999992315867738, iteration: 192498
loss: 1.0068347454071045,grad_norm: 0.9999990571158041, iteration: 192499
loss: 0.956748366355896,grad_norm: 0.999999133907296, iteration: 192500
loss: 0.9919244050979614,grad_norm: 0.9999991348031427, iteration: 192501
loss: 1.0010056495666504,grad_norm: 0.9878792295009309, iteration: 192502
loss: 1.0212870836257935,grad_norm: 0.999999087746787, iteration: 192503
loss: 1.0226646661758423,grad_norm: 0.9999991172361076, iteration: 192504
loss: 0.9959710240364075,grad_norm: 0.9121180436329518, iteration: 192505
loss: 1.0244437456130981,grad_norm: 0.9999991805482479, iteration: 192506
loss: 0.9971843957901001,grad_norm: 0.999999335935103, iteration: 192507
loss: 0.9928626418113708,grad_norm: 0.9999991560283731, iteration: 192508
loss: 0.9880756735801697,grad_norm: 0.8546706509504626, iteration: 192509
loss: 1.0131347179412842,grad_norm: 0.9999999098678848, iteration: 192510
loss: 1.1377345323562622,grad_norm: 0.9999995350300905, iteration: 192511
loss: 1.0068985223770142,grad_norm: 0.9999991294041071, iteration: 192512
loss: 0.997098445892334,grad_norm: 0.9306702430863251, iteration: 192513
loss: 0.9951860308647156,grad_norm: 0.9999990413710622, iteration: 192514
loss: 0.9789297580718994,grad_norm: 0.9650757943616689, iteration: 192515
loss: 0.9814703464508057,grad_norm: 0.9999990988332212, iteration: 192516
loss: 0.9727575778961182,grad_norm: 0.9999993289079844, iteration: 192517
loss: 0.9811418652534485,grad_norm: 0.8640651847956584, iteration: 192518
loss: 1.0129145383834839,grad_norm: 0.9999990210625849, iteration: 192519
loss: 0.9861584305763245,grad_norm: 0.9907261777258126, iteration: 192520
loss: 0.9627373218536377,grad_norm: 0.9999991435406476, iteration: 192521
loss: 0.9699728488922119,grad_norm: 0.8183933475664037, iteration: 192522
loss: 1.0140273571014404,grad_norm: 0.8479438452476371, iteration: 192523
loss: 1.0265520811080933,grad_norm: 0.999999369840981, iteration: 192524
loss: 1.042149305343628,grad_norm: 0.9999992542427131, iteration: 192525
loss: 0.9911127686500549,grad_norm: 0.9999990453548275, iteration: 192526
loss: 0.958185613155365,grad_norm: 0.9576695992619084, iteration: 192527
loss: 0.9575207829475403,grad_norm: 0.8353126532771891, iteration: 192528
loss: 0.9907745122909546,grad_norm: 0.9999992356799873, iteration: 192529
loss: 1.014139175415039,grad_norm: 0.9999990498135212, iteration: 192530
loss: 1.0314348936080933,grad_norm: 0.8321809774892961, iteration: 192531
loss: 0.9882916212081909,grad_norm: 0.9148545133634991, iteration: 192532
loss: 1.047123908996582,grad_norm: 0.9999999449249627, iteration: 192533
loss: 0.9950200915336609,grad_norm: 0.8416007278103087, iteration: 192534
loss: 1.0252946615219116,grad_norm: 0.802574469321574, iteration: 192535
loss: 1.0190354585647583,grad_norm: 0.8181237894873798, iteration: 192536
loss: 1.0168379545211792,grad_norm: 0.9999990404337901, iteration: 192537
loss: 1.0572375059127808,grad_norm: 0.9327968274473756, iteration: 192538
loss: 1.1112028360366821,grad_norm: 0.9999990909255646, iteration: 192539
loss: 1.006726861000061,grad_norm: 0.999999258534429, iteration: 192540
loss: 0.9969378709793091,grad_norm: 0.9999990891617775, iteration: 192541
loss: 0.9436115026473999,grad_norm: 0.9999990495088604, iteration: 192542
loss: 0.9972122311592102,grad_norm: 0.999999138138202, iteration: 192543
loss: 1.0296698808670044,grad_norm: 0.9999993495896257, iteration: 192544
loss: 0.9798523783683777,grad_norm: 0.7377731437048762, iteration: 192545
loss: 1.049361228942871,grad_norm: 0.9999994613018207, iteration: 192546
loss: 0.989013671875,grad_norm: 0.8666241186322893, iteration: 192547
loss: 0.9747841358184814,grad_norm: 0.8140481728427903, iteration: 192548
loss: 0.9614960551261902,grad_norm: 0.9840067976697798, iteration: 192549
loss: 0.988945722579956,grad_norm: 0.9219406578089602, iteration: 192550
loss: 1.0153948068618774,grad_norm: 0.9999991056026633, iteration: 192551
loss: 1.0012165307998657,grad_norm: 0.8565087601751478, iteration: 192552
loss: 1.0316044092178345,grad_norm: 0.9999997291266951, iteration: 192553
loss: 1.0046744346618652,grad_norm: 0.9444922824917912, iteration: 192554
loss: 0.9959664940834045,grad_norm: 0.8756441095968067, iteration: 192555
loss: 1.0394043922424316,grad_norm: 0.9919379722250953, iteration: 192556
loss: 0.9840966463088989,grad_norm: 0.890308805145725, iteration: 192557
loss: 1.0165073871612549,grad_norm: 0.9999990466036085, iteration: 192558
loss: 1.0045356750488281,grad_norm: 0.9999990851830795, iteration: 192559
loss: 0.995764434337616,grad_norm: 0.9999990724157901, iteration: 192560
loss: 0.9669288396835327,grad_norm: 0.9052055253783637, iteration: 192561
loss: 0.9994776248931885,grad_norm: 0.9999992939351569, iteration: 192562
loss: 1.0296369791030884,grad_norm: 0.9316265260132037, iteration: 192563
loss: 1.0086349248886108,grad_norm: 0.9999991380402738, iteration: 192564
loss: 0.9980555176734924,grad_norm: 0.9340747412403326, iteration: 192565
loss: 1.0202312469482422,grad_norm: 0.7930792847337377, iteration: 192566
loss: 1.009161353111267,grad_norm: 0.9999990332049952, iteration: 192567
loss: 0.9726481437683105,grad_norm: 0.9999988821488471, iteration: 192568
loss: 1.0086028575897217,grad_norm: 0.9999992294382223, iteration: 192569
loss: 1.0335901975631714,grad_norm: 0.7989693045009982, iteration: 192570
loss: 0.9949291348457336,grad_norm: 0.9078150234815047, iteration: 192571
loss: 1.0186476707458496,grad_norm: 0.9999991555546109, iteration: 192572
loss: 1.0294592380523682,grad_norm: 0.9999993095599111, iteration: 192573
loss: 1.0202221870422363,grad_norm: 0.8538008913991884, iteration: 192574
loss: 1.0025893449783325,grad_norm: 0.9361669789662258, iteration: 192575
loss: 1.0308690071105957,grad_norm: 0.9000443835953004, iteration: 192576
loss: 1.0097194910049438,grad_norm: 0.9247163069491451, iteration: 192577
loss: 1.0192737579345703,grad_norm: 0.9999991147248181, iteration: 192578
loss: 0.9919286370277405,grad_norm: 0.9999992307391361, iteration: 192579
loss: 0.9605140089988708,grad_norm: 0.8769019036464634, iteration: 192580
loss: 0.9710201621055603,grad_norm: 0.9999993631074552, iteration: 192581
loss: 1.0317533016204834,grad_norm: 0.9999993660948243, iteration: 192582
loss: 1.0374408960342407,grad_norm: 0.9999995618372884, iteration: 192583
loss: 1.0202443599700928,grad_norm: 0.8749429521450384, iteration: 192584
loss: 0.9823757410049438,grad_norm: 0.9999990273036571, iteration: 192585
loss: 1.000449776649475,grad_norm: 0.999999897861826, iteration: 192586
loss: 0.969512403011322,grad_norm: 0.8314159126641992, iteration: 192587
loss: 0.9668482542037964,grad_norm: 0.999999398575664, iteration: 192588
loss: 1.0114173889160156,grad_norm: 0.7532618196221439, iteration: 192589
loss: 1.0050599575042725,grad_norm: 0.9567711217573626, iteration: 192590
loss: 0.9511783123016357,grad_norm: 0.9999991108845657, iteration: 192591
loss: 0.9965816736221313,grad_norm: 0.9170088983520888, iteration: 192592
loss: 0.9569200277328491,grad_norm: 0.7972571624268467, iteration: 192593
loss: 0.9761990308761597,grad_norm: 0.8922754882723457, iteration: 192594
loss: 0.9932233691215515,grad_norm: 0.9999990906115344, iteration: 192595
loss: 1.0527006387710571,grad_norm: 0.999999245448234, iteration: 192596
loss: 0.99656742811203,grad_norm: 0.9999991115042963, iteration: 192597
loss: 1.0255087614059448,grad_norm: 0.9999993369196216, iteration: 192598
loss: 0.9887452721595764,grad_norm: 0.9075838619532802, iteration: 192599
loss: 1.0192015171051025,grad_norm: 0.9999996597751295, iteration: 192600
loss: 0.9816378951072693,grad_norm: 0.9999991075698996, iteration: 192601
loss: 0.966131865978241,grad_norm: 0.8343615893078461, iteration: 192602
loss: 0.9808636903762817,grad_norm: 0.953568588216031, iteration: 192603
loss: 1.0140190124511719,grad_norm: 0.8986903684592517, iteration: 192604
loss: 0.9911069273948669,grad_norm: 0.8794192020322643, iteration: 192605
loss: 1.0295697450637817,grad_norm: 0.999999265710539, iteration: 192606
loss: 1.0048856735229492,grad_norm: 0.9999994689526969, iteration: 192607
loss: 0.9865292906761169,grad_norm: 0.9999992191061455, iteration: 192608
loss: 1.0281956195831299,grad_norm: 0.9369927321191471, iteration: 192609
loss: 0.9690160155296326,grad_norm: 0.9930306864532713, iteration: 192610
loss: 0.9886900186538696,grad_norm: 0.9152842316432573, iteration: 192611
loss: 0.986417293548584,grad_norm: 0.999999096482605, iteration: 192612
loss: 0.9683324098587036,grad_norm: 0.9495036302187059, iteration: 192613
loss: 0.9855496883392334,grad_norm: 0.963226754222383, iteration: 192614
loss: 0.9966127276420593,grad_norm: 0.9780575305999702, iteration: 192615
loss: 0.9649526476860046,grad_norm: 0.8960194220634241, iteration: 192616
loss: 0.9836575388908386,grad_norm: 0.8533073641115206, iteration: 192617
loss: 0.9822891354560852,grad_norm: 0.9650868925450125, iteration: 192618
loss: 1.0137486457824707,grad_norm: 0.9971056416739068, iteration: 192619
loss: 1.0248719453811646,grad_norm: 0.7713405969880616, iteration: 192620
loss: 0.9967955350875854,grad_norm: 0.9999990818009841, iteration: 192621
loss: 1.0132840871810913,grad_norm: 0.999999769219517, iteration: 192622
loss: 1.0086956024169922,grad_norm: 0.9999998985947364, iteration: 192623
loss: 1.042289137840271,grad_norm: 0.9999991924173285, iteration: 192624
loss: 0.9812310934066772,grad_norm: 0.8778356395373846, iteration: 192625
loss: 0.9873886704444885,grad_norm: 0.9834116423205426, iteration: 192626
loss: 1.0240846872329712,grad_norm: 0.9784798670559305, iteration: 192627
loss: 0.9745004773139954,grad_norm: 0.927986169537731, iteration: 192628
loss: 1.0374242067337036,grad_norm: 0.9006335117488827, iteration: 192629
loss: 1.0744305849075317,grad_norm: 0.9999992475252815, iteration: 192630
loss: 1.0270191431045532,grad_norm: 0.8590263104814273, iteration: 192631
loss: 1.0091954469680786,grad_norm: 0.9407491484049824, iteration: 192632
loss: 1.0165843963623047,grad_norm: 0.9999992170791908, iteration: 192633
loss: 0.9929577112197876,grad_norm: 0.9999992021209958, iteration: 192634
loss: 1.0051692724227905,grad_norm: 0.8021786274415511, iteration: 192635
loss: 1.0241817235946655,grad_norm: 0.999999270126326, iteration: 192636
loss: 1.0066791772842407,grad_norm: 0.9999991757497693, iteration: 192637
loss: 1.035004734992981,grad_norm: 0.9999990027986315, iteration: 192638
loss: 1.0104455947875977,grad_norm: 0.86591726603317, iteration: 192639
loss: 1.000747799873352,grad_norm: 0.9999992526833156, iteration: 192640
loss: 0.9890934824943542,grad_norm: 0.9506533807694608, iteration: 192641
loss: 1.010433316230774,grad_norm: 0.9348559208158465, iteration: 192642
loss: 0.9954254627227783,grad_norm: 0.9999989283717263, iteration: 192643
loss: 1.0127407312393188,grad_norm: 0.9232421424311488, iteration: 192644
loss: 1.003578782081604,grad_norm: 0.8816507892481968, iteration: 192645
loss: 0.9793379306793213,grad_norm: 0.9999991615044239, iteration: 192646
loss: 0.984451174736023,grad_norm: 0.8872812951453701, iteration: 192647
loss: 1.0473891496658325,grad_norm: 0.9539417064511864, iteration: 192648
loss: 0.9993083477020264,grad_norm: 0.9999993217789458, iteration: 192649
loss: 1.0064302682876587,grad_norm: 0.9452635449775166, iteration: 192650
loss: 0.9922578930854797,grad_norm: 0.9465375796251811, iteration: 192651
loss: 0.9833149313926697,grad_norm: 0.894606463947918, iteration: 192652
loss: 0.9927724599838257,grad_norm: 0.9999991727081196, iteration: 192653
loss: 1.053451418876648,grad_norm: 0.9999991173861227, iteration: 192654
loss: 0.9968894124031067,grad_norm: 0.900914119440772, iteration: 192655
loss: 0.9357178211212158,grad_norm: 0.9999993521685494, iteration: 192656
loss: 1.0006860494613647,grad_norm: 0.9892983050381547, iteration: 192657
loss: 0.9757139682769775,grad_norm: 0.8894082579385617, iteration: 192658
loss: 1.0060116052627563,grad_norm: 0.9812733314980197, iteration: 192659
loss: 1.000199794769287,grad_norm: 0.8948854325460341, iteration: 192660
loss: 0.9891215562820435,grad_norm: 0.939841019749945, iteration: 192661
loss: 1.0010805130004883,grad_norm: 0.874926891566085, iteration: 192662
loss: 0.9816681742668152,grad_norm: 0.9356221830896928, iteration: 192663
loss: 1.0199174880981445,grad_norm: 0.8599903698431574, iteration: 192664
loss: 1.0176596641540527,grad_norm: 0.9773892679122911, iteration: 192665
loss: 0.9971533417701721,grad_norm: 0.9999988931212664, iteration: 192666
loss: 0.993533194065094,grad_norm: 0.91169135431069, iteration: 192667
loss: 0.9855151772499084,grad_norm: 0.9979782452977917, iteration: 192668
loss: 0.9770575761795044,grad_norm: 0.9999990647518497, iteration: 192669
loss: 0.9943148493766785,grad_norm: 0.8924439863364739, iteration: 192670
loss: 1.0354598760604858,grad_norm: 0.8762743665336229, iteration: 192671
loss: 0.9902458786964417,grad_norm: 0.8816439423259629, iteration: 192672
loss: 1.013721227645874,grad_norm: 0.9999991550120417, iteration: 192673
loss: 1.012427806854248,grad_norm: 0.9999991356796917, iteration: 192674
loss: 0.9642776846885681,grad_norm: 0.8435620318066663, iteration: 192675
loss: 1.0008176565170288,grad_norm: 0.9999993802702468, iteration: 192676
loss: 1.0302914381027222,grad_norm: 0.9749465643437529, iteration: 192677
loss: 1.039841651916504,grad_norm: 0.999998989709143, iteration: 192678
loss: 1.0173892974853516,grad_norm: 0.999998964149566, iteration: 192679
loss: 1.1067184209823608,grad_norm: 0.9999998136335109, iteration: 192680
loss: 1.0048550367355347,grad_norm: 0.9999991406516752, iteration: 192681
loss: 1.0431143045425415,grad_norm: 0.9999992149884337, iteration: 192682
loss: 1.0438389778137207,grad_norm: 0.9999990393956483, iteration: 192683
loss: 1.0046701431274414,grad_norm: 0.964888678307175, iteration: 192684
loss: 1.0240812301635742,grad_norm: 0.9999997393689792, iteration: 192685
loss: 0.991763174533844,grad_norm: 0.9198323655871431, iteration: 192686
loss: 1.0081462860107422,grad_norm: 0.9440759286758746, iteration: 192687
loss: 1.0174189805984497,grad_norm: 0.9241083160779083, iteration: 192688
loss: 1.0000821352005005,grad_norm: 0.9652556948401072, iteration: 192689
loss: 1.0312702655792236,grad_norm: 0.9223266640144239, iteration: 192690
loss: 0.999025821685791,grad_norm: 0.8311097433947215, iteration: 192691
loss: 1.0127789974212646,grad_norm: 0.8346399923793749, iteration: 192692
loss: 0.9868845343589783,grad_norm: 0.8510936991728651, iteration: 192693
loss: 1.0106343030929565,grad_norm: 0.9999992904349403, iteration: 192694
loss: 0.9915029406547546,grad_norm: 0.8942283527160705, iteration: 192695
loss: 0.9667221307754517,grad_norm: 0.9528641968056765, iteration: 192696
loss: 0.9950547218322754,grad_norm: 0.8814290010316597, iteration: 192697
loss: 1.0396784543991089,grad_norm: 0.9999993080929314, iteration: 192698
loss: 0.9865899682044983,grad_norm: 0.9999990818400063, iteration: 192699
loss: 1.005607008934021,grad_norm: 0.9999991549533666, iteration: 192700
loss: 1.0006966590881348,grad_norm: 0.999999070159787, iteration: 192701
loss: 1.0225238800048828,grad_norm: 0.9375839381728804, iteration: 192702
loss: 1.0222922563552856,grad_norm: 0.9239966203664159, iteration: 192703
loss: 1.0429593324661255,grad_norm: 0.7687368235857025, iteration: 192704
loss: 1.0095813274383545,grad_norm: 0.9999990264062142, iteration: 192705
loss: 1.0185794830322266,grad_norm: 0.9999991814987266, iteration: 192706
loss: 1.0039280652999878,grad_norm: 0.8927470218865518, iteration: 192707
loss: 1.0275828838348389,grad_norm: 0.9999991424784184, iteration: 192708
loss: 0.9955877065658569,grad_norm: 0.9999994441379315, iteration: 192709
loss: 1.0261688232421875,grad_norm: 0.8480751458803926, iteration: 192710
loss: 0.9637160301208496,grad_norm: 0.9999990971643068, iteration: 192711
loss: 0.9735943078994751,grad_norm: 0.9909945339155645, iteration: 192712
loss: 1.0030771493911743,grad_norm: 0.9999990533115819, iteration: 192713
loss: 0.9960097670555115,grad_norm: 0.9999991462442305, iteration: 192714
loss: 0.9752632975578308,grad_norm: 0.999999273125843, iteration: 192715
loss: 1.0080876350402832,grad_norm: 0.8438404407993154, iteration: 192716
loss: 1.0227999687194824,grad_norm: 0.9999991894794695, iteration: 192717
loss: 1.0123378038406372,grad_norm: 0.9341005096042357, iteration: 192718
loss: 1.0048662424087524,grad_norm: 0.99999910429003, iteration: 192719
loss: 0.9932920932769775,grad_norm: 0.9999990655526605, iteration: 192720
loss: 0.9866589307785034,grad_norm: 0.8579602237017352, iteration: 192721
loss: 1.0217417478561401,grad_norm: 0.9999994073826798, iteration: 192722
loss: 1.01032555103302,grad_norm: 0.8513134245955952, iteration: 192723
loss: 1.025452971458435,grad_norm: 0.9628018266180915, iteration: 192724
loss: 0.9199328422546387,grad_norm: 0.8957715644623231, iteration: 192725
loss: 0.9698948264122009,grad_norm: 0.9658541243957152, iteration: 192726
loss: 1.0043398141860962,grad_norm: 0.9539638326291501, iteration: 192727
loss: 1.002264380455017,grad_norm: 0.8991907341382498, iteration: 192728
loss: 0.9970489144325256,grad_norm: 0.8792867345835065, iteration: 192729
loss: 0.9878627061843872,grad_norm: 0.8322566650766748, iteration: 192730
loss: 0.9718472361564636,grad_norm: 0.9999990867843446, iteration: 192731
loss: 0.9861540794372559,grad_norm: 0.7178028618688659, iteration: 192732
loss: 0.9941627383232117,grad_norm: 0.9999990059717323, iteration: 192733
loss: 0.9874396324157715,grad_norm: 0.9999991680287766, iteration: 192734
loss: 1.0145678520202637,grad_norm: 0.9999989834447336, iteration: 192735
loss: 0.9564553499221802,grad_norm: 0.9999990304656071, iteration: 192736
loss: 0.9676799178123474,grad_norm: 0.9852894087044659, iteration: 192737
loss: 1.0159446001052856,grad_norm: 0.9999992016605387, iteration: 192738
loss: 1.0010700225830078,grad_norm: 0.7731556199006473, iteration: 192739
loss: 0.9924664497375488,grad_norm: 0.9850634333192042, iteration: 192740
loss: 1.0037721395492554,grad_norm: 0.8849307227878758, iteration: 192741
loss: 0.9876112937927246,grad_norm: 0.9933348218439886, iteration: 192742
loss: 1.0053608417510986,grad_norm: 0.9745881961847472, iteration: 192743
loss: 1.0062075853347778,grad_norm: 0.892435698858801, iteration: 192744
loss: 1.0143935680389404,grad_norm: 0.9999990917649555, iteration: 192745
loss: 0.9921734929084778,grad_norm: 0.816175046334495, iteration: 192746
loss: 1.0028965473175049,grad_norm: 0.9311954860923705, iteration: 192747
loss: 1.0085694789886475,grad_norm: 0.9999995284102619, iteration: 192748
loss: 0.9983906149864197,grad_norm: 0.9999990755903473, iteration: 192749
loss: 0.9958022236824036,grad_norm: 0.8252300692397869, iteration: 192750
loss: 0.9843929409980774,grad_norm: 0.9139013539710779, iteration: 192751
loss: 1.0368397235870361,grad_norm: 0.9999992605486767, iteration: 192752
loss: 1.0217006206512451,grad_norm: 0.9999990438739549, iteration: 192753
loss: 0.9531936645507812,grad_norm: 0.9999990519038326, iteration: 192754
loss: 1.0317308902740479,grad_norm: 0.9788912894630568, iteration: 192755
loss: 0.9919395446777344,grad_norm: 0.9223689841806941, iteration: 192756
loss: 0.9731859564781189,grad_norm: 0.9130393403878688, iteration: 192757
loss: 1.008233666419983,grad_norm: 0.9999990541397971, iteration: 192758
loss: 0.9820680022239685,grad_norm: 0.9999990249088321, iteration: 192759
loss: 0.9843528866767883,grad_norm: 0.9999991403419428, iteration: 192760
loss: 0.9732828736305237,grad_norm: 0.9272617205048879, iteration: 192761
loss: 0.9920012950897217,grad_norm: 0.9999995944348956, iteration: 192762
loss: 1.0209590196609497,grad_norm: 0.8397528832143619, iteration: 192763
loss: 0.9935856461524963,grad_norm: 0.8734110245351255, iteration: 192764
loss: 1.0055404901504517,grad_norm: 0.8760433949219578, iteration: 192765
loss: 1.017241358757019,grad_norm: 0.968927405235831, iteration: 192766
loss: 1.0048519372940063,grad_norm: 0.9999991752666336, iteration: 192767
loss: 1.0320740938186646,grad_norm: 0.9999989543090114, iteration: 192768
loss: 0.982090175151825,grad_norm: 0.9999992641650308, iteration: 192769
loss: 0.983431339263916,grad_norm: 0.8853536412987952, iteration: 192770
loss: 1.0111732482910156,grad_norm: 0.9999992605051405, iteration: 192771
loss: 0.9426941871643066,grad_norm: 0.9999991117048345, iteration: 192772
loss: 1.0189249515533447,grad_norm: 0.8518232036240134, iteration: 192773
loss: 0.9956337809562683,grad_norm: 0.8984035619832711, iteration: 192774
loss: 0.9969311356544495,grad_norm: 0.9520158504265777, iteration: 192775
loss: 0.9830933213233948,grad_norm: 0.9872131739426827, iteration: 192776
loss: 0.9803804159164429,grad_norm: 0.9178384383440948, iteration: 192777
loss: 0.9879708886146545,grad_norm: 0.9999991214467802, iteration: 192778
loss: 1.015579342842102,grad_norm: 0.9999992630754163, iteration: 192779
loss: 0.988749623298645,grad_norm: 0.9999990513501149, iteration: 192780
loss: 0.9724400639533997,grad_norm: 0.9336049291013193, iteration: 192781
loss: 1.0211536884307861,grad_norm: 0.9999990554555539, iteration: 192782
loss: 0.9733866453170776,grad_norm: 0.9999990762227281, iteration: 192783
loss: 1.0014585256576538,grad_norm: 0.87966265838697, iteration: 192784
loss: 0.9859540462493896,grad_norm: 0.8396756513299317, iteration: 192785
loss: 0.9954736232757568,grad_norm: 0.881473866498568, iteration: 192786
loss: 0.9594952464103699,grad_norm: 0.9999990746894803, iteration: 192787
loss: 1.0069297552108765,grad_norm: 0.9393881431919778, iteration: 192788
loss: 1.004257321357727,grad_norm: 0.7280838158963496, iteration: 192789
loss: 1.0222840309143066,grad_norm: 0.950155612153081, iteration: 192790
loss: 0.9919652938842773,grad_norm: 0.9999991159206425, iteration: 192791
loss: 0.9927781820297241,grad_norm: 0.99999909339891, iteration: 192792
loss: 0.9954620599746704,grad_norm: 0.9143531384814835, iteration: 192793
loss: 0.9721132516860962,grad_norm: 0.9173944034911026, iteration: 192794
loss: 0.96979820728302,grad_norm: 0.9355958153844199, iteration: 192795
loss: 0.9915850162506104,grad_norm: 0.8980710341309553, iteration: 192796
loss: 1.0240613222122192,grad_norm: 0.8809865857844824, iteration: 192797
loss: 1.034088373184204,grad_norm: 0.9999991600269796, iteration: 192798
loss: 1.034314513206482,grad_norm: 0.9999993563368625, iteration: 192799
loss: 0.9942330718040466,grad_norm: 0.7626646662791505, iteration: 192800
loss: 1.0223616361618042,grad_norm: 0.9278473350069734, iteration: 192801
loss: 0.9966533780097961,grad_norm: 0.9999989968832655, iteration: 192802
loss: 0.9566991925239563,grad_norm: 0.999998913117127, iteration: 192803
loss: 1.032446026802063,grad_norm: 0.9270227224229143, iteration: 192804
loss: 0.9736312031745911,grad_norm: 0.9699312271162971, iteration: 192805
loss: 0.9829326272010803,grad_norm: 0.9999990709349907, iteration: 192806
loss: 1.0168557167053223,grad_norm: 0.9999989243734847, iteration: 192807
loss: 0.967150092124939,grad_norm: 0.9999991498099714, iteration: 192808
loss: 0.9703465700149536,grad_norm: 0.999999470575598, iteration: 192809
loss: 0.9667155146598816,grad_norm: 0.9202662755489676, iteration: 192810
loss: 1.0336800813674927,grad_norm: 0.9999997736434677, iteration: 192811
loss: 1.0267287492752075,grad_norm: 0.8740708016020103, iteration: 192812
loss: 0.9920084476470947,grad_norm: 0.9230327478616388, iteration: 192813
loss: 1.0158637762069702,grad_norm: 0.80294777909729, iteration: 192814
loss: 1.0012056827545166,grad_norm: 0.8822439941020143, iteration: 192815
loss: 0.9963091611862183,grad_norm: 0.8956874070233558, iteration: 192816
loss: 0.9906054139137268,grad_norm: 0.9999991439677192, iteration: 192817
loss: 1.0311440229415894,grad_norm: 0.9704506886939056, iteration: 192818
loss: 0.9872733354568481,grad_norm: 0.902491170466211, iteration: 192819
loss: 1.0386046171188354,grad_norm: 0.8840452301040497, iteration: 192820
loss: 0.9966527819633484,grad_norm: 0.9867119909454038, iteration: 192821
loss: 0.9985414743423462,grad_norm: 0.9888413898932584, iteration: 192822
loss: 1.0050917863845825,grad_norm: 0.9999991066074395, iteration: 192823
loss: 1.0164458751678467,grad_norm: 0.9999993729877868, iteration: 192824
loss: 0.997170627117157,grad_norm: 0.9433368006053235, iteration: 192825
loss: 1.012404441833496,grad_norm: 0.793765069102113, iteration: 192826
loss: 0.9913523197174072,grad_norm: 0.8986202218565643, iteration: 192827
loss: 0.9770733714103699,grad_norm: 0.9999992181265625, iteration: 192828
loss: 0.9663153886795044,grad_norm: 0.9213390378964379, iteration: 192829
loss: 1.0102354288101196,grad_norm: 0.8599334550048127, iteration: 192830
loss: 0.984358549118042,grad_norm: 0.9999994521222946, iteration: 192831
loss: 0.9836230278015137,grad_norm: 0.8367251736833039, iteration: 192832
loss: 1.0178132057189941,grad_norm: 0.9999992232596546, iteration: 192833
loss: 0.9951705932617188,grad_norm: 0.8418042250192015, iteration: 192834
loss: 1.0142384767532349,grad_norm: 0.9999991530332604, iteration: 192835
loss: 1.0308762788772583,grad_norm: 0.9091199584257266, iteration: 192836
loss: 0.9754208922386169,grad_norm: 0.9288542032528823, iteration: 192837
loss: 1.0247149467468262,grad_norm: 0.9999990919460259, iteration: 192838
loss: 0.9650717973709106,grad_norm: 0.9999990228077358, iteration: 192839
loss: 0.9940463304519653,grad_norm: 0.9239204599324463, iteration: 192840
loss: 1.041866421699524,grad_norm: 0.95820762999048, iteration: 192841
loss: 1.0271856784820557,grad_norm: 0.8596169642407543, iteration: 192842
loss: 0.9529368281364441,grad_norm: 0.9999990826867065, iteration: 192843
loss: 0.9710909724235535,grad_norm: 0.9198270947939573, iteration: 192844
loss: 1.029849648475647,grad_norm: 0.9999990351533273, iteration: 192845
loss: 0.9775820970535278,grad_norm: 0.9999998687509362, iteration: 192846
loss: 1.0230190753936768,grad_norm: 0.8243054507295537, iteration: 192847
loss: 0.9966362118721008,grad_norm: 0.9307842880391598, iteration: 192848
loss: 1.0135356187820435,grad_norm: 0.974123141214107, iteration: 192849
loss: 1.0367896556854248,grad_norm: 0.9999991522089465, iteration: 192850
loss: 1.0099529027938843,grad_norm: 0.9999992037228785, iteration: 192851
loss: 0.9796706438064575,grad_norm: 0.9999991393525723, iteration: 192852
loss: 0.9916125535964966,grad_norm: 0.8901786208036476, iteration: 192853
loss: 0.9656776189804077,grad_norm: 0.9999989846625242, iteration: 192854
loss: 0.9967000484466553,grad_norm: 0.9118328146443412, iteration: 192855
loss: 0.9744514226913452,grad_norm: 0.9999991111794309, iteration: 192856
loss: 0.9806175231933594,grad_norm: 0.9995514355080222, iteration: 192857
loss: 0.9919363260269165,grad_norm: 0.9143329013712856, iteration: 192858
loss: 0.9724758267402649,grad_norm: 0.8916194927154696, iteration: 192859
loss: 0.9988546371459961,grad_norm: 0.9999990717262307, iteration: 192860
loss: 0.9869513511657715,grad_norm: 0.9742059500276058, iteration: 192861
loss: 0.9780053496360779,grad_norm: 0.9999991132913515, iteration: 192862
loss: 1.04145085811615,grad_norm: 0.9798906220340761, iteration: 192863
loss: 1.055416464805603,grad_norm: 0.9999995774237864, iteration: 192864
loss: 1.0193332433700562,grad_norm: 0.9796324027281799, iteration: 192865
loss: 0.9739561676979065,grad_norm: 0.905358646246404, iteration: 192866
loss: 0.9969857931137085,grad_norm: 0.9999990950647842, iteration: 192867
loss: 0.981446385383606,grad_norm: 0.8973324626552047, iteration: 192868
loss: 1.0030527114868164,grad_norm: 0.9999990379066567, iteration: 192869
loss: 0.9876075983047485,grad_norm: 0.9999990306501938, iteration: 192870
loss: 0.9873755574226379,grad_norm: 0.9999994802467069, iteration: 192871
loss: 0.9721229672431946,grad_norm: 0.9684133636693241, iteration: 192872
loss: 0.9966799020767212,grad_norm: 0.9721659969432945, iteration: 192873
loss: 0.9878142476081848,grad_norm: 0.7919594734230143, iteration: 192874
loss: 0.9653962850570679,grad_norm: 0.9468342765985661, iteration: 192875
loss: 1.0211083889007568,grad_norm: 0.9999991833972718, iteration: 192876
loss: 0.9608170986175537,grad_norm: 0.9825939934542903, iteration: 192877
loss: 1.0307739973068237,grad_norm: 0.9327336935235988, iteration: 192878
loss: 1.0028678178787231,grad_norm: 0.9001745256576238, iteration: 192879
loss: 1.000842571258545,grad_norm: 0.9607267145755136, iteration: 192880
loss: 1.1077176332473755,grad_norm: 0.9999994898413893, iteration: 192881
loss: 1.0051363706588745,grad_norm: 0.9999992851428688, iteration: 192882
loss: 0.9892696738243103,grad_norm: 0.9999992923012203, iteration: 192883
loss: 0.9954738020896912,grad_norm: 0.9904002102659663, iteration: 192884
loss: 0.9945428371429443,grad_norm: 0.9999990329446173, iteration: 192885
loss: 1.0050510168075562,grad_norm: 0.9669493276988049, iteration: 192886
loss: 0.9833551049232483,grad_norm: 0.9999990561348112, iteration: 192887
loss: 1.0045146942138672,grad_norm: 0.9999994196975156, iteration: 192888
loss: 0.9865790009498596,grad_norm: 0.9999990493651283, iteration: 192889
loss: 1.002464771270752,grad_norm: 0.9647108840715684, iteration: 192890
loss: 0.9852705001831055,grad_norm: 0.9130049208061967, iteration: 192891
loss: 0.9641167521476746,grad_norm: 0.9470135382868816, iteration: 192892
loss: 1.0119950771331787,grad_norm: 0.9999990913959101, iteration: 192893
loss: 0.9857731461524963,grad_norm: 0.9999990253503435, iteration: 192894
loss: 0.9667800068855286,grad_norm: 0.9911238723593924, iteration: 192895
loss: 1.0052796602249146,grad_norm: 0.8699908295856886, iteration: 192896
loss: 0.9561977982521057,grad_norm: 0.9999990901851957, iteration: 192897
loss: 0.9831516146659851,grad_norm: 0.9383867036934649, iteration: 192898
loss: 1.0095618963241577,grad_norm: 0.9578809564790688, iteration: 192899
loss: 0.9902801513671875,grad_norm: 0.9999993518933126, iteration: 192900
loss: 0.9954043030738831,grad_norm: 0.865703130941333, iteration: 192901
loss: 0.9621362686157227,grad_norm: 0.9999990561854861, iteration: 192902
loss: 1.0168308019638062,grad_norm: 0.9999990161770885, iteration: 192903
loss: 1.0136542320251465,grad_norm: 0.7771966049817285, iteration: 192904
loss: 1.0264027118682861,grad_norm: 0.9999992871554187, iteration: 192905
loss: 1.0257740020751953,grad_norm: 0.7643211933635347, iteration: 192906
loss: 1.0155096054077148,grad_norm: 0.8680234939230608, iteration: 192907
loss: 0.9890879988670349,grad_norm: 0.9999991750366999, iteration: 192908
loss: 0.9548189043998718,grad_norm: 0.9619929070335561, iteration: 192909
loss: 1.006845235824585,grad_norm: 0.9415239952961262, iteration: 192910
loss: 1.020820140838623,grad_norm: 0.7588867816909477, iteration: 192911
loss: 0.963140070438385,grad_norm: 0.9781351959861413, iteration: 192912
loss: 0.967042863368988,grad_norm: 0.974271934679262, iteration: 192913
loss: 1.0190871953964233,grad_norm: 0.9727871327983793, iteration: 192914
loss: 0.9722601771354675,grad_norm: 0.8821831423229993, iteration: 192915
loss: 0.976939857006073,grad_norm: 0.8913258402758395, iteration: 192916
loss: 0.9869982600212097,grad_norm: 0.9912083586934315, iteration: 192917
loss: 0.9926439523696899,grad_norm: 0.8892621401157045, iteration: 192918
loss: 1.0159516334533691,grad_norm: 0.939268824482778, iteration: 192919
loss: 1.0274113416671753,grad_norm: 0.9845016492359939, iteration: 192920
loss: 0.9797060489654541,grad_norm: 0.9807385477388106, iteration: 192921
loss: 0.9855714440345764,grad_norm: 0.8823723186661406, iteration: 192922
loss: 0.9760743975639343,grad_norm: 0.9264495881460945, iteration: 192923
loss: 0.9978288412094116,grad_norm: 0.9999990759582075, iteration: 192924
loss: 0.991125762462616,grad_norm: 0.9092339414880529, iteration: 192925
loss: 1.012886881828308,grad_norm: 0.9999990864698786, iteration: 192926
loss: 1.038360357284546,grad_norm: 0.9999992766124522, iteration: 192927
loss: 0.9926669001579285,grad_norm: 0.9999992472517771, iteration: 192928
loss: 1.004646897315979,grad_norm: 0.9575192649277097, iteration: 192929
loss: 1.0133740901947021,grad_norm: 0.9020953315048429, iteration: 192930
loss: 0.9509671330451965,grad_norm: 0.9702511156563585, iteration: 192931
loss: 1.0035001039505005,grad_norm: 0.9053782002790588, iteration: 192932
loss: 0.9863981008529663,grad_norm: 0.9177847986344055, iteration: 192933
loss: 1.0095336437225342,grad_norm: 0.999999014931624, iteration: 192934
loss: 1.0257402658462524,grad_norm: 0.9999990052289542, iteration: 192935
loss: 1.014832854270935,grad_norm: 0.9999991420348133, iteration: 192936
loss: 0.9854657053947449,grad_norm: 0.9953268295524514, iteration: 192937
loss: 0.9620903134346008,grad_norm: 0.9999992818561861, iteration: 192938
loss: 1.0042427778244019,grad_norm: 0.9999989472997043, iteration: 192939
loss: 1.025753378868103,grad_norm: 0.9999991726876636, iteration: 192940
loss: 1.0185714960098267,grad_norm: 0.9195210854552538, iteration: 192941
loss: 0.9973341226577759,grad_norm: 0.8592843580296706, iteration: 192942
loss: 0.9584865570068359,grad_norm: 0.8096508251452382, iteration: 192943
loss: 1.031299114227295,grad_norm: 0.999999128661374, iteration: 192944
loss: 0.9747251868247986,grad_norm: 0.9999991465206012, iteration: 192945
loss: 1.0518020391464233,grad_norm: 0.999999027826036, iteration: 192946
loss: 0.9991417527198792,grad_norm: 0.8312962886853288, iteration: 192947
loss: 0.9905931353569031,grad_norm: 0.9815398230109303, iteration: 192948
loss: 0.9902971982955933,grad_norm: 0.9999989799123693, iteration: 192949
loss: 0.9990032911300659,grad_norm: 0.8529873036880591, iteration: 192950
loss: 0.9935183525085449,grad_norm: 0.9543249416761573, iteration: 192951
loss: 1.0028151273727417,grad_norm: 0.9137773574280623, iteration: 192952
loss: 1.0065592527389526,grad_norm: 0.9861809731990515, iteration: 192953
loss: 0.9844409823417664,grad_norm: 0.9079510831480285, iteration: 192954
loss: 0.9892312288284302,grad_norm: 0.9999992787124605, iteration: 192955
loss: 0.9801350831985474,grad_norm: 0.8786772323313261, iteration: 192956
loss: 1.0059031248092651,grad_norm: 0.9999991412720467, iteration: 192957
loss: 0.986739456653595,grad_norm: 0.823097890396715, iteration: 192958
loss: 1.030434489250183,grad_norm: 0.9999992160134722, iteration: 192959
loss: 1.0200446844100952,grad_norm: 0.9999990563879839, iteration: 192960
loss: 0.9492218494415283,grad_norm: 0.9999997808952209, iteration: 192961
loss: 1.0310156345367432,grad_norm: 0.9999992591564508, iteration: 192962
loss: 0.9949184060096741,grad_norm: 0.8233999042118378, iteration: 192963
loss: 0.9884169101715088,grad_norm: 0.9999992145353052, iteration: 192964
loss: 0.9775070548057556,grad_norm: 0.8851562171568602, iteration: 192965
loss: 1.0087740421295166,grad_norm: 0.9576319491945281, iteration: 192966
loss: 1.0189071893692017,grad_norm: 0.9999991017579026, iteration: 192967
loss: 0.9462416172027588,grad_norm: 0.8953424627246367, iteration: 192968
loss: 1.0460823774337769,grad_norm: 0.9999998536108653, iteration: 192969
loss: 0.9913346171379089,grad_norm: 0.8827816459958414, iteration: 192970
loss: 0.996285080909729,grad_norm: 0.9671615589066346, iteration: 192971
loss: 1.0164248943328857,grad_norm: 0.9074557475429967, iteration: 192972
loss: 0.9671103358268738,grad_norm: 0.9803885104897448, iteration: 192973
loss: 1.0284793376922607,grad_norm: 0.9999992167167902, iteration: 192974
loss: 1.016761302947998,grad_norm: 0.9999991251679333, iteration: 192975
loss: 0.9707932472229004,grad_norm: 0.9999989891750548, iteration: 192976
loss: 0.9983865022659302,grad_norm: 0.9122778633159608, iteration: 192977
loss: 0.9997645616531372,grad_norm: 0.9053935363524834, iteration: 192978
loss: 1.0074176788330078,grad_norm: 0.9999990348536333, iteration: 192979
loss: 0.966622531414032,grad_norm: 0.9572958171018118, iteration: 192980
loss: 0.9913999438285828,grad_norm: 0.9336842422812164, iteration: 192981
loss: 0.9948499202728271,grad_norm: 0.9910717446485247, iteration: 192982
loss: 0.986736536026001,grad_norm: 0.9803425112819399, iteration: 192983
loss: 0.9870993494987488,grad_norm: 0.9560965160880753, iteration: 192984
loss: 1.0285496711730957,grad_norm: 0.7643759011924356, iteration: 192985
loss: 0.9980621337890625,grad_norm: 0.9845906060266034, iteration: 192986
loss: 1.0073271989822388,grad_norm: 0.9999992263227071, iteration: 192987
loss: 0.9995259642601013,grad_norm: 0.9999990710798521, iteration: 192988
loss: 0.9913877844810486,grad_norm: 0.8086918874169252, iteration: 192989
loss: 1.0079115629196167,grad_norm: 0.8936576582843353, iteration: 192990
loss: 0.9957084655761719,grad_norm: 0.9999991254224202, iteration: 192991
loss: 0.9431332349777222,grad_norm: 0.9593634371213213, iteration: 192992
loss: 1.008980631828308,grad_norm: 0.9078054673753396, iteration: 192993
loss: 1.0136669874191284,grad_norm: 0.9182330004231569, iteration: 192994
loss: 0.9495330452919006,grad_norm: 0.9999990809742142, iteration: 192995
loss: 0.9845665097236633,grad_norm: 0.9999989386532663, iteration: 192996
loss: 0.9990102052688599,grad_norm: 0.9999990379127145, iteration: 192997
loss: 0.97733473777771,grad_norm: 0.9101610887489128, iteration: 192998
loss: 1.008399486541748,grad_norm: 0.9999992129359243, iteration: 192999
loss: 0.9793046712875366,grad_norm: 0.9187052318396012, iteration: 193000
loss: 1.0038200616836548,grad_norm: 0.9999992335515732, iteration: 193001
loss: 1.017199993133545,grad_norm: 0.9587055729116615, iteration: 193002
loss: 1.035825490951538,grad_norm: 0.9999992259382258, iteration: 193003
loss: 0.9911623001098633,grad_norm: 0.9999991332350067, iteration: 193004
loss: 0.9621837139129639,grad_norm: 0.9999990606534136, iteration: 193005
loss: 1.0117157697677612,grad_norm: 0.9999990783228495, iteration: 193006
loss: 0.9848140478134155,grad_norm: 0.9706430885163411, iteration: 193007
loss: 0.9860636591911316,grad_norm: 0.9999991940200562, iteration: 193008
loss: 0.9926558136940002,grad_norm: 0.9999992326403813, iteration: 193009
loss: 0.9653180241584778,grad_norm: 0.8984284443084789, iteration: 193010
loss: 0.992660641670227,grad_norm: 0.9999990579117086, iteration: 193011
loss: 0.9861183166503906,grad_norm: 0.9492965745704525, iteration: 193012
loss: 0.9727433919906616,grad_norm: 0.9999990065297887, iteration: 193013
loss: 1.0001049041748047,grad_norm: 0.9114518639469165, iteration: 193014
loss: 0.9793693423271179,grad_norm: 0.999683600504944, iteration: 193015
loss: 0.9897401928901672,grad_norm: 0.9999992443144555, iteration: 193016
loss: 0.9849452972412109,grad_norm: 0.9999990088216273, iteration: 193017
loss: 0.9900302886962891,grad_norm: 0.8175795848056374, iteration: 193018
loss: 1.0105658769607544,grad_norm: 0.7867451186952154, iteration: 193019
loss: 0.991072952747345,grad_norm: 0.9999993167617037, iteration: 193020
loss: 0.9896844029426575,grad_norm: 0.9536686772260752, iteration: 193021
loss: 0.9821630120277405,grad_norm: 0.9548931207492791, iteration: 193022
loss: 0.9643732309341431,grad_norm: 0.9999992130275859, iteration: 193023
loss: 1.0019009113311768,grad_norm: 0.8970747398317234, iteration: 193024
loss: 0.9928052425384521,grad_norm: 0.9804352587831922, iteration: 193025
loss: 0.9962546825408936,grad_norm: 0.8945306953117147, iteration: 193026
loss: 0.9932389855384827,grad_norm: 0.9999991918072273, iteration: 193027
loss: 0.9664042592048645,grad_norm: 0.9999989103218514, iteration: 193028
loss: 0.9917299747467041,grad_norm: 0.8179446983514101, iteration: 193029
loss: 1.0048326253890991,grad_norm: 0.9999990858175036, iteration: 193030
loss: 1.0032098293304443,grad_norm: 1.0000000166534369, iteration: 193031
loss: 1.0404764413833618,grad_norm: 0.9828098276649296, iteration: 193032
loss: 1.0100971460342407,grad_norm: 0.9999991362302981, iteration: 193033
loss: 1.0031349658966064,grad_norm: 0.8653157493501751, iteration: 193034
loss: 1.0251874923706055,grad_norm: 0.9910729993699585, iteration: 193035
loss: 0.9562179446220398,grad_norm: 0.9065023494360746, iteration: 193036
loss: 1.0266214609146118,grad_norm: 0.9999989425012858, iteration: 193037
loss: 1.0449116230010986,grad_norm: 0.9240643125355891, iteration: 193038
loss: 1.0085396766662598,grad_norm: 0.8690393076534841, iteration: 193039
loss: 1.0180257558822632,grad_norm: 0.99999910295743, iteration: 193040
loss: 0.9728121757507324,grad_norm: 0.9493368816968751, iteration: 193041
loss: 0.9719411134719849,grad_norm: 0.8539393736077963, iteration: 193042
loss: 1.0054322481155396,grad_norm: 0.9184975609426818, iteration: 193043
loss: 1.015377163887024,grad_norm: 0.9999990994851021, iteration: 193044
loss: 1.013249158859253,grad_norm: 0.9999991840096853, iteration: 193045
loss: 1.0016148090362549,grad_norm: 0.9999992381081414, iteration: 193046
loss: 0.989328920841217,grad_norm: 0.8854222941180734, iteration: 193047
loss: 1.0227422714233398,grad_norm: 0.9534251646393498, iteration: 193048
loss: 1.0012292861938477,grad_norm: 0.9999990651707784, iteration: 193049
loss: 0.9971907734870911,grad_norm: 0.8880950780371623, iteration: 193050
loss: 0.9781619310379028,grad_norm: 0.9448291191754403, iteration: 193051
loss: 1.0139837265014648,grad_norm: 0.999999517150985, iteration: 193052
loss: 1.0274983644485474,grad_norm: 0.8046031326719543, iteration: 193053
loss: 1.0210245847702026,grad_norm: 0.9423183565312432, iteration: 193054
loss: 0.9933974146842957,grad_norm: 0.8600469013607742, iteration: 193055
loss: 0.9651617407798767,grad_norm: 0.9999991390053891, iteration: 193056
loss: 1.0113986730575562,grad_norm: 0.9999990803911029, iteration: 193057
loss: 1.0025259256362915,grad_norm: 0.9999993643002907, iteration: 193058
loss: 1.009782314300537,grad_norm: 0.9108874301125036, iteration: 193059
loss: 1.0146564245224,grad_norm: 0.9491004351778143, iteration: 193060
loss: 0.9954631328582764,grad_norm: 0.9999990857261317, iteration: 193061
loss: 0.9882197380065918,grad_norm: 0.9424132490926336, iteration: 193062
loss: 0.9921553730964661,grad_norm: 0.9534129045997779, iteration: 193063
loss: 1.0008854866027832,grad_norm: 0.999999025496573, iteration: 193064
loss: 0.9752331376075745,grad_norm: 0.9314497008637387, iteration: 193065
loss: 1.0135077238082886,grad_norm: 0.950318073415951, iteration: 193066
loss: 1.0119383335113525,grad_norm: 0.9867511883753757, iteration: 193067
loss: 1.0138760805130005,grad_norm: 0.9311583755150276, iteration: 193068
loss: 1.0324538946151733,grad_norm: 0.9999989073206419, iteration: 193069
loss: 0.9907686114311218,grad_norm: 0.8523751521818143, iteration: 193070
loss: 0.996141791343689,grad_norm: 0.9951650224554517, iteration: 193071
loss: 1.0462641716003418,grad_norm: 0.9999991397746486, iteration: 193072
loss: 0.9741365313529968,grad_norm: 0.9999990606836937, iteration: 193073
loss: 0.996121346950531,grad_norm: 0.8832702924419736, iteration: 193074
loss: 0.9998366832733154,grad_norm: 0.9999998063764075, iteration: 193075
loss: 0.9994816780090332,grad_norm: 0.8456883306803961, iteration: 193076
loss: 1.0088435411453247,grad_norm: 0.9954786992984735, iteration: 193077
loss: 1.0296401977539062,grad_norm: 0.8126016734208826, iteration: 193078
loss: 0.9764036536216736,grad_norm: 0.8775796957119274, iteration: 193079
loss: 0.9930191040039062,grad_norm: 0.9036079100207519, iteration: 193080
loss: 0.9976783990859985,grad_norm: 0.8286530900037957, iteration: 193081
loss: 0.9977899789810181,grad_norm: 0.9080371714417567, iteration: 193082
loss: 0.957196056842804,grad_norm: 0.9234951913191771, iteration: 193083
loss: 1.032784104347229,grad_norm: 0.9479317879539498, iteration: 193084
loss: 0.9810325503349304,grad_norm: 0.9999993043859904, iteration: 193085
loss: 1.0181465148925781,grad_norm: 0.8645609439967498, iteration: 193086
loss: 0.9888668656349182,grad_norm: 0.7043862425241877, iteration: 193087
loss: 1.0191622972488403,grad_norm: 0.9139601648981382, iteration: 193088
loss: 1.028273105621338,grad_norm: 0.9999991976281742, iteration: 193089
loss: 1.0208796262741089,grad_norm: 0.9639259814028339, iteration: 193090
loss: 1.0213383436203003,grad_norm: 0.9389672038857784, iteration: 193091
loss: 0.9930223226547241,grad_norm: 0.8556347340411519, iteration: 193092
loss: 1.0648045539855957,grad_norm: 0.999999378677842, iteration: 193093
loss: 1.0399447679519653,grad_norm: 0.9999991022902798, iteration: 193094
loss: 1.0306016206741333,grad_norm: 0.9999990835395168, iteration: 193095
loss: 0.977676272392273,grad_norm: 0.9011977780237631, iteration: 193096
loss: 0.9722824096679688,grad_norm: 0.999999903666979, iteration: 193097
loss: 0.9975436329841614,grad_norm: 0.9333259832348273, iteration: 193098
loss: 1.009705662727356,grad_norm: 0.8537344772136016, iteration: 193099
loss: 1.0031318664550781,grad_norm: 0.9999989748517523, iteration: 193100
loss: 1.000120997428894,grad_norm: 0.9085061229444531, iteration: 193101
loss: 0.9961739182472229,grad_norm: 0.9627302899892941, iteration: 193102
loss: 0.9974574446678162,grad_norm: 0.9693962361503046, iteration: 193103
loss: 0.9990593194961548,grad_norm: 0.9883213632686435, iteration: 193104
loss: 0.9703519940376282,grad_norm: 0.9999990430222249, iteration: 193105
loss: 1.0150320529937744,grad_norm: 0.9999992330184625, iteration: 193106
loss: 0.9676718711853027,grad_norm: 0.7733953065276205, iteration: 193107
loss: 1.00005304813385,grad_norm: 0.9999990252622306, iteration: 193108
loss: 0.9642577171325684,grad_norm: 0.9094634081114932, iteration: 193109
loss: 1.0543051958084106,grad_norm: 0.9999995236024607, iteration: 193110
loss: 0.9818281531333923,grad_norm: 0.99999929437385, iteration: 193111
loss: 0.9606612324714661,grad_norm: 0.9585057959101092, iteration: 193112
loss: 1.0194145441055298,grad_norm: 0.9999991247004542, iteration: 193113
loss: 1.016003966331482,grad_norm: 0.9999990041797133, iteration: 193114
loss: 1.0153453350067139,grad_norm: 0.9999990292741463, iteration: 193115
loss: 1.0233384370803833,grad_norm: 0.9487135544004672, iteration: 193116
loss: 1.0300315618515015,grad_norm: 0.9999995512603157, iteration: 193117
loss: 1.0160731077194214,grad_norm: 0.992031647249858, iteration: 193118
loss: 0.961216926574707,grad_norm: 0.9000192690533857, iteration: 193119
loss: 0.9965749382972717,grad_norm: 0.8544649483607624, iteration: 193120
loss: 0.9969083666801453,grad_norm: 0.9999990769360054, iteration: 193121
loss: 0.97691810131073,grad_norm: 0.9226348467913956, iteration: 193122
loss: 1.0196809768676758,grad_norm: 0.9999991031112424, iteration: 193123
loss: 0.9786126613616943,grad_norm: 0.9999994400893599, iteration: 193124
loss: 1.0441186428070068,grad_norm: 0.9999997986718323, iteration: 193125
loss: 0.9881294965744019,grad_norm: 0.9968549416996179, iteration: 193126
loss: 1.0215356349945068,grad_norm: 0.8874078770545469, iteration: 193127
loss: 0.9852942824363708,grad_norm: 0.8794694421816559, iteration: 193128
loss: 0.9935891628265381,grad_norm: 0.7531416431377321, iteration: 193129
loss: 1.0238337516784668,grad_norm: 0.9999997016275473, iteration: 193130
loss: 0.9687179327011108,grad_norm: 0.7889761057907895, iteration: 193131
loss: 0.9503469467163086,grad_norm: 0.9154433607129188, iteration: 193132
loss: 1.0160242319107056,grad_norm: 0.8877786858337074, iteration: 193133
loss: 1.000641942024231,grad_norm: 0.9248802428771945, iteration: 193134
loss: 0.9971157312393188,grad_norm: 0.888010633490473, iteration: 193135
loss: 0.9879210591316223,grad_norm: 0.9174415620134757, iteration: 193136
loss: 1.0034204721450806,grad_norm: 0.8926504056446299, iteration: 193137
loss: 0.9804836511611938,grad_norm: 0.9760284188192893, iteration: 193138
loss: 1.0141364336013794,grad_norm: 0.7755148850683733, iteration: 193139
loss: 1.0000215768814087,grad_norm: 0.907433573292674, iteration: 193140
loss: 1.0054066181182861,grad_norm: 0.9999991507948114, iteration: 193141
loss: 0.9877208471298218,grad_norm: 0.9999991202432618, iteration: 193142
loss: 0.9967026710510254,grad_norm: 0.9999992231362295, iteration: 193143
loss: 1.0123053789138794,grad_norm: 0.999998990664569, iteration: 193144
loss: 1.0462552309036255,grad_norm: 0.9902003334764801, iteration: 193145
loss: 1.0027164220809937,grad_norm: 0.900588675011503, iteration: 193146
loss: 1.0164976119995117,grad_norm: 0.9207134666276536, iteration: 193147
loss: 1.0157907009124756,grad_norm: 0.9999997181618806, iteration: 193148
loss: 1.0011496543884277,grad_norm: 0.9999991030166375, iteration: 193149
loss: 0.9907382130622864,grad_norm: 0.8820172967904019, iteration: 193150
loss: 0.9936509728431702,grad_norm: 0.99999915496403, iteration: 193151
loss: 1.0113743543624878,grad_norm: 0.9540586647513958, iteration: 193152
loss: 0.9612300395965576,grad_norm: 0.8102211527342256, iteration: 193153
loss: 1.038547158241272,grad_norm: 0.9999998114629037, iteration: 193154
loss: 0.9965730309486389,grad_norm: 0.8304034344669702, iteration: 193155
loss: 1.007717490196228,grad_norm: 0.9999992009199303, iteration: 193156
loss: 1.0222047567367554,grad_norm: 0.958055307782243, iteration: 193157
loss: 1.0251734256744385,grad_norm: 0.9455027591297912, iteration: 193158
loss: 1.0077199935913086,grad_norm: 0.9999991423298575, iteration: 193159
loss: 0.9955933094024658,grad_norm: 0.9107922700832751, iteration: 193160
loss: 1.0285780429840088,grad_norm: 0.9999992684088209, iteration: 193161
loss: 1.0005818605422974,grad_norm: 0.9999991376577482, iteration: 193162
loss: 0.9968931078910828,grad_norm: 0.9213219651234449, iteration: 193163
loss: 0.9851139783859253,grad_norm: 0.9517998071170634, iteration: 193164
loss: 0.9768062829971313,grad_norm: 0.842168869287607, iteration: 193165
loss: 0.9812667965888977,grad_norm: 0.9779531766992586, iteration: 193166
loss: 1.0281776189804077,grad_norm: 0.999999111971732, iteration: 193167
loss: 1.023069977760315,grad_norm: 0.9845644302851551, iteration: 193168
loss: 0.9769359827041626,grad_norm: 0.9064322904399218, iteration: 193169
loss: 0.9856832027435303,grad_norm: 0.9999991074837318, iteration: 193170
loss: 0.9966939091682434,grad_norm: 0.9999991185933753, iteration: 193171
loss: 0.9969369769096375,grad_norm: 0.8243344507092826, iteration: 193172
loss: 1.0212945938110352,grad_norm: 0.9999992090955837, iteration: 193173
loss: 1.0164523124694824,grad_norm: 0.9545742603210725, iteration: 193174
loss: 0.9837131500244141,grad_norm: 0.9999990533480204, iteration: 193175
loss: 1.0148229598999023,grad_norm: 0.9999991490607757, iteration: 193176
loss: 0.9938927292823792,grad_norm: 0.7273786631016902, iteration: 193177
loss: 0.981778085231781,grad_norm: 0.972393486484264, iteration: 193178
loss: 0.9789153933525085,grad_norm: 0.9999990529244426, iteration: 193179
loss: 0.9990331530570984,grad_norm: 0.7935067683666287, iteration: 193180
loss: 1.0375657081604004,grad_norm: 0.9999991437322254, iteration: 193181
loss: 1.0126042366027832,grad_norm: 0.8499739168842353, iteration: 193182
loss: 0.967511773109436,grad_norm: 0.9999991433185882, iteration: 193183
loss: 1.0376464128494263,grad_norm: 0.9999991236459035, iteration: 193184
loss: 1.0202293395996094,grad_norm: 0.872609327047185, iteration: 193185
loss: 1.0105541944503784,grad_norm: 0.7886738945845626, iteration: 193186
loss: 0.9956831932067871,grad_norm: 0.8931949537044637, iteration: 193187
loss: 0.9610759615898132,grad_norm: 0.8619492405123469, iteration: 193188
loss: 0.976166844367981,grad_norm: 0.9999991446582522, iteration: 193189
loss: 0.9896817207336426,grad_norm: 0.7419159192266579, iteration: 193190
loss: 0.9893715381622314,grad_norm: 0.8551897714640254, iteration: 193191
loss: 1.0162242650985718,grad_norm: 0.8874091281478292, iteration: 193192
loss: 0.9898837804794312,grad_norm: 0.999999210127405, iteration: 193193
loss: 1.0037727355957031,grad_norm: 0.9772372419808887, iteration: 193194
loss: 0.9783855080604553,grad_norm: 0.9033169877472157, iteration: 193195
loss: 0.9845238327980042,grad_norm: 0.9999990254762153, iteration: 193196
loss: 0.9952178597450256,grad_norm: 0.9612929509298591, iteration: 193197
loss: 0.995854377746582,grad_norm: 0.9999990542895638, iteration: 193198
loss: 1.004252552986145,grad_norm: 0.9999992849401487, iteration: 193199
loss: 0.950438380241394,grad_norm: 0.8818296750313657, iteration: 193200
loss: 1.001692771911621,grad_norm: 0.8336647888922504, iteration: 193201
loss: 0.9693065285682678,grad_norm: 0.999998994182375, iteration: 193202
loss: 0.9597710967063904,grad_norm: 0.8994530741262984, iteration: 193203
loss: 1.0786864757537842,grad_norm: 0.9948187182509797, iteration: 193204
loss: 1.0008130073547363,grad_norm: 0.9999990744512754, iteration: 193205
loss: 1.0200954675674438,grad_norm: 0.9999991551326615, iteration: 193206
loss: 0.9981256127357483,grad_norm: 0.9999990687165039, iteration: 193207
loss: 1.012791395187378,grad_norm: 0.9060144397953105, iteration: 193208
loss: 0.9961190819740295,grad_norm: 0.9582457670946511, iteration: 193209
loss: 0.9944884181022644,grad_norm: 0.9387415772441458, iteration: 193210
loss: 1.010692834854126,grad_norm: 0.8002910435823816, iteration: 193211
loss: 0.9588003754615784,grad_norm: 0.9999992153784264, iteration: 193212
loss: 0.9800359010696411,grad_norm: 0.9999991981246814, iteration: 193213
loss: 0.9921674728393555,grad_norm: 0.9826587549786892, iteration: 193214
loss: 1.05260169506073,grad_norm: 0.9389622943090852, iteration: 193215
loss: 0.9598471522331238,grad_norm: 0.9550170217805887, iteration: 193216
loss: 0.979886531829834,grad_norm: 0.9999991877519364, iteration: 193217
loss: 1.0193895101547241,grad_norm: 0.89001881512612, iteration: 193218
loss: 0.980608344078064,grad_norm: 0.8437110815374597, iteration: 193219
loss: 0.9978787899017334,grad_norm: 0.8794397441274492, iteration: 193220
loss: 1.013694167137146,grad_norm: 0.9999995639960875, iteration: 193221
loss: 0.9882652759552002,grad_norm: 0.979921753780213, iteration: 193222
loss: 0.9844325184822083,grad_norm: 0.9999990796192818, iteration: 193223
loss: 0.9967963099479675,grad_norm: 0.9999992608927579, iteration: 193224
loss: 0.999627947807312,grad_norm: 0.9149564071983147, iteration: 193225
loss: 1.0116839408874512,grad_norm: 0.9999990172531364, iteration: 193226
loss: 1.022666573524475,grad_norm: 0.9999991790627256, iteration: 193227
loss: 1.00527024269104,grad_norm: 0.9999994487541422, iteration: 193228
loss: 1.0109635591506958,grad_norm: 0.8974179851973225, iteration: 193229
loss: 0.996749758720398,grad_norm: 0.7571092305225667, iteration: 193230
loss: 1.0235505104064941,grad_norm: 0.9191795037218184, iteration: 193231
loss: 1.031416416168213,grad_norm: 0.999999517748701, iteration: 193232
loss: 0.9651521444320679,grad_norm: 0.8151156631118447, iteration: 193233
loss: 0.9611272215843201,grad_norm: 0.9182125388783601, iteration: 193234
loss: 0.981698215007782,grad_norm: 0.8522973269023972, iteration: 193235
loss: 0.9810187220573425,grad_norm: 0.8666465945248506, iteration: 193236
loss: 1.0570693016052246,grad_norm: 0.9999989843019346, iteration: 193237
loss: 1.044025182723999,grad_norm: 0.9999991949031878, iteration: 193238
loss: 1.1945260763168335,grad_norm: 0.9999990147296297, iteration: 193239
loss: 1.0250235795974731,grad_norm: 0.999999407307477, iteration: 193240
loss: 1.0250792503356934,grad_norm: 0.9999990269278487, iteration: 193241
loss: 1.0824241638183594,grad_norm: 0.9999993245263091, iteration: 193242
loss: 0.9883416295051575,grad_norm: 0.9999990719717683, iteration: 193243
loss: 1.0366536378860474,grad_norm: 0.9999990880539515, iteration: 193244
loss: 0.988018274307251,grad_norm: 0.8416295506722183, iteration: 193245
loss: 0.9552510976791382,grad_norm: 0.7977965365243522, iteration: 193246
loss: 1.0140998363494873,grad_norm: 0.9999999452248182, iteration: 193247
loss: 1.0769251585006714,grad_norm: 0.9999999326107774, iteration: 193248
loss: 1.0389844179153442,grad_norm: 0.9764033849129131, iteration: 193249
loss: 0.946041464805603,grad_norm: 0.999999062269031, iteration: 193250
loss: 1.0229718685150146,grad_norm: 0.999999061067206, iteration: 193251
loss: 1.0013800859451294,grad_norm: 0.9999991214297668, iteration: 193252
loss: 1.0256032943725586,grad_norm: 0.9639434254310655, iteration: 193253
loss: 0.9681357145309448,grad_norm: 0.9999990171900293, iteration: 193254
loss: 0.9776250720024109,grad_norm: 0.9999991845440046, iteration: 193255
loss: 0.9176654815673828,grad_norm: 0.9694255253821852, iteration: 193256
loss: 0.9755488634109497,grad_norm: 0.9271044076527617, iteration: 193257
loss: 0.9684066772460938,grad_norm: 0.993259283009884, iteration: 193258
loss: 1.1896401643753052,grad_norm: 0.999999178209024, iteration: 193259
loss: 0.9807013869285583,grad_norm: 0.999999193734507, iteration: 193260
loss: 1.0339990854263306,grad_norm: 0.9733167278129231, iteration: 193261
loss: 0.9695816040039062,grad_norm: 0.9447129288608503, iteration: 193262
loss: 1.0198147296905518,grad_norm: 0.9645521038991703, iteration: 193263
loss: 1.0262125730514526,grad_norm: 0.99999904575606, iteration: 193264
loss: 1.0936394929885864,grad_norm: 0.9999994401139488, iteration: 193265
loss: 1.0122060775756836,grad_norm: 0.8576664345055324, iteration: 193266
loss: 0.9815087914466858,grad_norm: 0.8719029333250453, iteration: 193267
loss: 1.0377812385559082,grad_norm: 0.8875614265198917, iteration: 193268
loss: 1.0083348751068115,grad_norm: 0.96499763412582, iteration: 193269
loss: 0.94754558801651,grad_norm: 0.9255908362728075, iteration: 193270
loss: 0.987802267074585,grad_norm: 0.9887793392652336, iteration: 193271
loss: 0.9807553887367249,grad_norm: 0.8750106318331082, iteration: 193272
loss: 1.0152400732040405,grad_norm: 0.8388730517622457, iteration: 193273
loss: 1.0425537824630737,grad_norm: 0.9999993339744306, iteration: 193274
loss: 1.0245789289474487,grad_norm: 0.9014056460615366, iteration: 193275
loss: 1.0223438739776611,grad_norm: 0.9831746609717715, iteration: 193276
loss: 1.0359580516815186,grad_norm: 0.9999993632214114, iteration: 193277
loss: 1.0094008445739746,grad_norm: 0.9074305424925524, iteration: 193278
loss: 0.9987419843673706,grad_norm: 0.9999994167867395, iteration: 193279
loss: 1.0240038633346558,grad_norm: 0.8085554822574301, iteration: 193280
loss: 0.99322909116745,grad_norm: 0.999999213648242, iteration: 193281
loss: 1.0139743089675903,grad_norm: 0.9999994695678018, iteration: 193282
loss: 0.9992379546165466,grad_norm: 0.9695602545840657, iteration: 193283
loss: 0.9876497387886047,grad_norm: 0.8636337217355745, iteration: 193284
loss: 0.9950754046440125,grad_norm: 0.9999990795722081, iteration: 193285
loss: 1.0207397937774658,grad_norm: 0.8044402124832976, iteration: 193286
loss: 1.0028640031814575,grad_norm: 0.9159377063732888, iteration: 193287
loss: 1.0211554765701294,grad_norm: 0.9883101008395165, iteration: 193288
loss: 1.0424346923828125,grad_norm: 0.9484408432440127, iteration: 193289
loss: 1.0193647146224976,grad_norm: 0.8481247403522977, iteration: 193290
loss: 1.0176278352737427,grad_norm: 0.9999992930076449, iteration: 193291
loss: 0.9719356298446655,grad_norm: 0.9999990577537065, iteration: 193292
loss: 0.9943100810050964,grad_norm: 0.9258334943004821, iteration: 193293
loss: 0.9885637760162354,grad_norm: 0.9999990527334142, iteration: 193294
loss: 0.984427809715271,grad_norm: 0.9467173427375012, iteration: 193295
loss: 1.003221869468689,grad_norm: 0.9999991441885209, iteration: 193296
loss: 0.9804654121398926,grad_norm: 0.9999991548474907, iteration: 193297
loss: 1.072393536567688,grad_norm: 0.9110938030306278, iteration: 193298
loss: 1.0289784669876099,grad_norm: 0.9999991884675882, iteration: 193299
loss: 1.1159236431121826,grad_norm: 0.9999998121069947, iteration: 193300
loss: 1.0255496501922607,grad_norm: 0.9999992911572229, iteration: 193301
loss: 0.9933381676673889,grad_norm: 0.8290597757717592, iteration: 193302
loss: 0.9743287563323975,grad_norm: 0.9351776269436208, iteration: 193303
loss: 0.9971798062324524,grad_norm: 0.7590169926846404, iteration: 193304
loss: 1.0159459114074707,grad_norm: 0.9948546475891057, iteration: 193305
loss: 0.9739717245101929,grad_norm: 0.8477112038306743, iteration: 193306
loss: 0.9862662553787231,grad_norm: 0.880810075496374, iteration: 193307
loss: 1.0345090627670288,grad_norm: 0.9999993367949567, iteration: 193308
loss: 1.0262316465377808,grad_norm: 0.9575247694408795, iteration: 193309
loss: 0.977608323097229,grad_norm: 0.9589323731121663, iteration: 193310
loss: 0.9593147039413452,grad_norm: 0.807454839479979, iteration: 193311
loss: 1.0114470720291138,grad_norm: 0.8958987728988682, iteration: 193312
loss: 0.9814403057098389,grad_norm: 0.8395490693184923, iteration: 193313
loss: 0.9761849045753479,grad_norm: 0.8919319859460576, iteration: 193314
loss: 1.0174098014831543,grad_norm: 0.9999991677596948, iteration: 193315
loss: 1.020325779914856,grad_norm: 0.9999994238403684, iteration: 193316
loss: 1.0549098253250122,grad_norm: 0.8861478761619112, iteration: 193317
loss: 0.9833642840385437,grad_norm: 0.7888435254367129, iteration: 193318
loss: 1.0083887577056885,grad_norm: 0.9999991818194546, iteration: 193319
loss: 0.9859183430671692,grad_norm: 0.9274351453022932, iteration: 193320
loss: 0.997527003288269,grad_norm: 0.9999991893094151, iteration: 193321
loss: 1.0020900964736938,grad_norm: 0.9547127453976342, iteration: 193322
loss: 1.0198286771774292,grad_norm: 0.911126662560302, iteration: 193323
loss: 0.9822924733161926,grad_norm: 0.9999997727424021, iteration: 193324
loss: 1.0003186464309692,grad_norm: 0.9492005838144412, iteration: 193325
loss: 0.9799087047576904,grad_norm: 0.9999997733409072, iteration: 193326
loss: 1.0040462017059326,grad_norm: 0.8958173701992131, iteration: 193327
loss: 0.9454367160797119,grad_norm: 0.8756322166117653, iteration: 193328
loss: 0.994949996471405,grad_norm: 0.8705207284902684, iteration: 193329
loss: 1.022997260093689,grad_norm: 0.9317790937785694, iteration: 193330
loss: 1.0204969644546509,grad_norm: 0.9730891009840507, iteration: 193331
loss: 1.0625033378601074,grad_norm: 0.9999997653739252, iteration: 193332
loss: 1.007628321647644,grad_norm: 0.9999993604297811, iteration: 193333
loss: 1.0235259532928467,grad_norm: 0.9999992779721337, iteration: 193334
loss: 0.9889672994613647,grad_norm: 0.9747955862779929, iteration: 193335
loss: 0.9513150453567505,grad_norm: 0.9999991362539059, iteration: 193336
loss: 1.0072664022445679,grad_norm: 0.9741272752654857, iteration: 193337
loss: 1.0176706314086914,grad_norm: 0.899027540192691, iteration: 193338
loss: 0.9780288934707642,grad_norm: 0.9999990325157561, iteration: 193339
loss: 1.0056703090667725,grad_norm: 0.9945135483341808, iteration: 193340
loss: 1.116855502128601,grad_norm: 0.9999991859390206, iteration: 193341
loss: 0.9568994641304016,grad_norm: 0.9143293161545522, iteration: 193342
loss: 0.9692909717559814,grad_norm: 0.9486039529072694, iteration: 193343
loss: 0.9686177968978882,grad_norm: 0.9999992421017564, iteration: 193344
loss: 1.0055105686187744,grad_norm: 0.9999998784797131, iteration: 193345
loss: 1.009964942932129,grad_norm: 0.8506357927718892, iteration: 193346
loss: 1.0438657999038696,grad_norm: 0.9152060021306134, iteration: 193347
loss: 1.0040034055709839,grad_norm: 0.8394976499087261, iteration: 193348
loss: 0.9932783246040344,grad_norm: 0.9999996096483251, iteration: 193349
loss: 0.9755228757858276,grad_norm: 0.8233882118238898, iteration: 193350
loss: 1.0189380645751953,grad_norm: 0.9999991313085638, iteration: 193351
loss: 0.9894220232963562,grad_norm: 0.9999990604523296, iteration: 193352
loss: 0.9845260381698608,grad_norm: 0.9999989898912583, iteration: 193353
loss: 0.9669631123542786,grad_norm: 0.9999989645195623, iteration: 193354
loss: 0.9961541891098022,grad_norm: 0.9509706518085788, iteration: 193355
loss: 0.9766566157341003,grad_norm: 0.9479822217353923, iteration: 193356
loss: 1.0187615156173706,grad_norm: 0.9077062935955854, iteration: 193357
loss: 0.9952281713485718,grad_norm: 0.9905096771499123, iteration: 193358
loss: 0.983807384967804,grad_norm: 0.917436004180485, iteration: 193359
loss: 0.9658392071723938,grad_norm: 0.8610270059953459, iteration: 193360
loss: 1.0070247650146484,grad_norm: 0.9999991062090811, iteration: 193361
loss: 0.9910820722579956,grad_norm: 0.9999993007932811, iteration: 193362
loss: 1.015665888786316,grad_norm: 0.8503155241378726, iteration: 193363
loss: 1.0094128847122192,grad_norm: 0.8725034606613699, iteration: 193364
loss: 0.9871012568473816,grad_norm: 0.9999999577805437, iteration: 193365
loss: 0.9890773296356201,grad_norm: 0.9999994692532431, iteration: 193366
loss: 1.004057765007019,grad_norm: 0.9999990236748946, iteration: 193367
loss: 1.0010735988616943,grad_norm: 0.8828686389628448, iteration: 193368
loss: 0.9701270461082458,grad_norm: 0.9553060293026823, iteration: 193369
loss: 0.9888190627098083,grad_norm: 0.9999991243898058, iteration: 193370
loss: 0.9961155652999878,grad_norm: 0.9522282941986845, iteration: 193371
loss: 1.0214229822158813,grad_norm: 0.8595128667496726, iteration: 193372
loss: 1.0243866443634033,grad_norm: 0.9999991269451886, iteration: 193373
loss: 0.9907024502754211,grad_norm: 0.9643818555399987, iteration: 193374
loss: 1.0197495222091675,grad_norm: 0.9796382772057465, iteration: 193375
loss: 1.011478304862976,grad_norm: 0.99999925495962, iteration: 193376
loss: 1.0787557363510132,grad_norm: 0.999999153593842, iteration: 193377
loss: 1.0120633840560913,grad_norm: 0.9999991656139637, iteration: 193378
loss: 1.004033088684082,grad_norm: 0.9294489221973447, iteration: 193379
loss: 1.0192022323608398,grad_norm: 0.9965473328666791, iteration: 193380
loss: 1.0163570642471313,grad_norm: 0.8830123001758708, iteration: 193381
loss: 1.0218851566314697,grad_norm: 0.9999999720462485, iteration: 193382
loss: 1.000718593597412,grad_norm: 0.8680505488149796, iteration: 193383
loss: 0.990360677242279,grad_norm: 0.9999991026406815, iteration: 193384
loss: 1.0031837224960327,grad_norm: 0.8802899144640611, iteration: 193385
loss: 1.0029922723770142,grad_norm: 0.9670149515070996, iteration: 193386
loss: 0.9814474582672119,grad_norm: 0.9087702907953913, iteration: 193387
loss: 0.9866816997528076,grad_norm: 0.9609958571805434, iteration: 193388
loss: 1.0148121118545532,grad_norm: 0.8237635357475454, iteration: 193389
loss: 0.975460946559906,grad_norm: 0.9999992556664273, iteration: 193390
loss: 1.0081013441085815,grad_norm: 0.9999992793927139, iteration: 193391
loss: 1.046260118484497,grad_norm: 0.9999992584354113, iteration: 193392
loss: 1.019525170326233,grad_norm: 0.9962868976347615, iteration: 193393
loss: 0.9874704480171204,grad_norm: 0.7333597222349788, iteration: 193394
loss: 0.9900062084197998,grad_norm: 0.9999998863839112, iteration: 193395
loss: 0.9827921390533447,grad_norm: 0.9999991606915708, iteration: 193396
loss: 1.0498892068862915,grad_norm: 0.8831670166723702, iteration: 193397
loss: 1.0021709203720093,grad_norm: 0.834864879474452, iteration: 193398
loss: 1.012001395225525,grad_norm: 0.9695285092284429, iteration: 193399
loss: 0.9955121874809265,grad_norm: 0.9999992294531478, iteration: 193400
loss: 1.0058445930480957,grad_norm: 0.8235852742620822, iteration: 193401
loss: 1.0133776664733887,grad_norm: 0.8777168326981786, iteration: 193402
loss: 0.9863518476486206,grad_norm: 0.907367769899217, iteration: 193403
loss: 0.9879370331764221,grad_norm: 0.8817650836248668, iteration: 193404
loss: 0.9927492737770081,grad_norm: 0.9999991873773908, iteration: 193405
loss: 1.0162876844406128,grad_norm: 0.999999102892599, iteration: 193406
loss: 1.003698706626892,grad_norm: 0.9556151170489549, iteration: 193407
loss: 0.9754844903945923,grad_norm: 0.8818400435752431, iteration: 193408
loss: 0.9739491939544678,grad_norm: 0.8392634454184726, iteration: 193409
loss: 1.0043716430664062,grad_norm: 0.8093586211269519, iteration: 193410
loss: 0.9894523024559021,grad_norm: 0.8913882024911358, iteration: 193411
loss: 0.9999481439590454,grad_norm: 0.8768358819604898, iteration: 193412
loss: 0.9450973272323608,grad_norm: 0.9738674955921046, iteration: 193413
loss: 1.0036579370498657,grad_norm: 0.9311207770648581, iteration: 193414
loss: 1.0251612663269043,grad_norm: 0.8215965052370364, iteration: 193415
loss: 1.008503794670105,grad_norm: 0.9999992378365609, iteration: 193416
loss: 0.9780986309051514,grad_norm: 0.952237190990805, iteration: 193417
loss: 1.002981424331665,grad_norm: 0.8286639797735331, iteration: 193418
loss: 1.0210132598876953,grad_norm: 0.8859306602287803, iteration: 193419
loss: 1.003960371017456,grad_norm: 0.999999818379115, iteration: 193420
loss: 0.9729921221733093,grad_norm: 0.9999990087990369, iteration: 193421
loss: 0.9939664602279663,grad_norm: 0.9557540561186103, iteration: 193422
loss: 1.0250169038772583,grad_norm: 0.8852481513598824, iteration: 193423
loss: 0.9897729158401489,grad_norm: 0.9486174794376879, iteration: 193424
loss: 1.0324455499649048,grad_norm: 0.9999990747563069, iteration: 193425
loss: 0.9970029592514038,grad_norm: 0.9181599580778036, iteration: 193426
loss: 1.029375672340393,grad_norm: 0.9999991030066104, iteration: 193427
loss: 1.0396418571472168,grad_norm: 0.9493435834998732, iteration: 193428
loss: 1.064336895942688,grad_norm: 0.999999567639768, iteration: 193429
loss: 1.0087600946426392,grad_norm: 0.9758254689301665, iteration: 193430
loss: 1.1398152112960815,grad_norm: 0.9999999810035254, iteration: 193431
loss: 1.0243605375289917,grad_norm: 0.9999991176752538, iteration: 193432
loss: 1.0123740434646606,grad_norm: 0.9355316481887287, iteration: 193433
loss: 1.0381289720535278,grad_norm: 0.9781657967268783, iteration: 193434
loss: 0.9611513018608093,grad_norm: 0.9999993477928445, iteration: 193435
loss: 0.9808826446533203,grad_norm: 0.921649440075079, iteration: 193436
loss: 1.0619847774505615,grad_norm: 0.9574761973478616, iteration: 193437
loss: 0.988530695438385,grad_norm: 0.9999991863625387, iteration: 193438
loss: 1.0140974521636963,grad_norm: 0.9999990811415385, iteration: 193439
loss: 0.9826878309249878,grad_norm: 0.7512721681716354, iteration: 193440
loss: 1.0093693733215332,grad_norm: 0.7978834704517775, iteration: 193441
loss: 1.0113327503204346,grad_norm: 0.899621051260842, iteration: 193442
loss: 0.9933223724365234,grad_norm: 0.9645277375266335, iteration: 193443
loss: 1.034623384475708,grad_norm: 0.931955111325466, iteration: 193444
loss: 1.0282403230667114,grad_norm: 0.923225162469853, iteration: 193445
loss: 0.9994516372680664,grad_norm: 0.9454900670196463, iteration: 193446
loss: 0.9725017547607422,grad_norm: 0.9506343994845192, iteration: 193447
loss: 1.016706943511963,grad_norm: 0.9999990650724561, iteration: 193448
loss: 1.0147141218185425,grad_norm: 0.8563845690644636, iteration: 193449
loss: 1.0497409105300903,grad_norm: 0.9039551649607032, iteration: 193450
loss: 0.9916184544563293,grad_norm: 0.9999991129902268, iteration: 193451
loss: 0.994236171245575,grad_norm: 0.8516898535876148, iteration: 193452
loss: 1.0173468589782715,grad_norm: 0.9067177087521184, iteration: 193453
loss: 1.0059466361999512,grad_norm: 0.883292960582912, iteration: 193454
loss: 0.9976882338523865,grad_norm: 0.9999999144989591, iteration: 193455
loss: 0.9581027030944824,grad_norm: 0.9999989889867295, iteration: 193456
loss: 0.9738715291023254,grad_norm: 0.9597157626761031, iteration: 193457
loss: 1.0086554288864136,grad_norm: 0.9321675702648426, iteration: 193458
loss: 0.9906253218650818,grad_norm: 0.9371721822068236, iteration: 193459
loss: 1.0335949659347534,grad_norm: 0.970805946740936, iteration: 193460
loss: 1.0010101795196533,grad_norm: 0.9999991659507708, iteration: 193461
loss: 1.0927062034606934,grad_norm: 0.9999998777786508, iteration: 193462
loss: 0.9553555846214294,grad_norm: 0.9999991103871311, iteration: 193463
loss: 1.028125286102295,grad_norm: 0.9280722888442626, iteration: 193464
loss: 1.0747528076171875,grad_norm: 0.9999997016415023, iteration: 193465
loss: 0.9847816824913025,grad_norm: 0.9999991969356997, iteration: 193466
loss: 1.0062053203582764,grad_norm: 0.8642917466791262, iteration: 193467
loss: 0.9883669018745422,grad_norm: 0.9999991648051441, iteration: 193468
loss: 0.9838173389434814,grad_norm: 0.8623870122618746, iteration: 193469
loss: 1.054892897605896,grad_norm: 0.999998991358161, iteration: 193470
loss: 1.0035371780395508,grad_norm: 0.9999990511408147, iteration: 193471
loss: 1.0032296180725098,grad_norm: 0.9990556966473256, iteration: 193472
loss: 0.9726953506469727,grad_norm: 0.906414764412376, iteration: 193473
loss: 0.9924564361572266,grad_norm: 0.9972969425353239, iteration: 193474
loss: 1.0108164548873901,grad_norm: 0.9999990158740736, iteration: 193475
loss: 1.0136381387710571,grad_norm: 0.827803322081186, iteration: 193476
loss: 1.0289726257324219,grad_norm: 0.9999990579349524, iteration: 193477
loss: 1.0112762451171875,grad_norm: 0.7811636533921201, iteration: 193478
loss: 1.0078436136245728,grad_norm: 0.9999990997201799, iteration: 193479
loss: 1.1562955379486084,grad_norm: 0.9999994763894424, iteration: 193480
loss: 1.0224432945251465,grad_norm: 0.9489458594422647, iteration: 193481
loss: 1.0068957805633545,grad_norm: 0.8676683567980947, iteration: 193482
loss: 1.0486400127410889,grad_norm: 0.9999994277739787, iteration: 193483
loss: 0.986411452293396,grad_norm: 0.9787982675134416, iteration: 193484
loss: 0.9986660480499268,grad_norm: 0.8205551177519581, iteration: 193485
loss: 1.0035136938095093,grad_norm: 0.9999997169330216, iteration: 193486
loss: 1.0165959596633911,grad_norm: 0.9690559177838902, iteration: 193487
loss: 0.9780314564704895,grad_norm: 0.7866633340322489, iteration: 193488
loss: 0.9938243627548218,grad_norm: 0.9196797556161517, iteration: 193489
loss: 1.0751378536224365,grad_norm: 0.8665495208995407, iteration: 193490
loss: 1.246904730796814,grad_norm: 0.9999993775356539, iteration: 193491
loss: 1.0227733850479126,grad_norm: 0.8389524955965663, iteration: 193492
loss: 1.0315483808517456,grad_norm: 0.9999993044525983, iteration: 193493
loss: 1.1611000299453735,grad_norm: 0.9999998598651403, iteration: 193494
loss: 1.0151675939559937,grad_norm: 0.9999990629226787, iteration: 193495
loss: 0.9888720512390137,grad_norm: 0.9093814051290732, iteration: 193496
loss: 1.0543228387832642,grad_norm: 0.9999991012190278, iteration: 193497
loss: 1.0022666454315186,grad_norm: 0.9999991121373171, iteration: 193498
loss: 1.0411241054534912,grad_norm: 0.8610327063593465, iteration: 193499
loss: 0.9817648530006409,grad_norm: 0.9085556121751247, iteration: 193500
loss: 0.9843674898147583,grad_norm: 0.9999991374225385, iteration: 193501
loss: 1.0133275985717773,grad_norm: 0.9999990314091821, iteration: 193502
loss: 1.054990291595459,grad_norm: 0.9126179870549438, iteration: 193503
loss: 1.0322004556655884,grad_norm: 0.7405770293040936, iteration: 193504
loss: 0.9682599306106567,grad_norm: 0.9186341695193246, iteration: 193505
loss: 0.9565566182136536,grad_norm: 0.8843484973560568, iteration: 193506
loss: 0.9641870260238647,grad_norm: 0.9757530968792928, iteration: 193507
loss: 1.000822901725769,grad_norm: 0.9999991863035961, iteration: 193508
loss: 0.9908477663993835,grad_norm: 0.99999951125229, iteration: 193509
loss: 0.978632390499115,grad_norm: 0.8138642044770271, iteration: 193510
loss: 0.9967146515846252,grad_norm: 0.9105114979710325, iteration: 193511
loss: 0.9717878699302673,grad_norm: 0.9999990819498091, iteration: 193512
loss: 0.9851051568984985,grad_norm: 0.9999990806955373, iteration: 193513
loss: 1.0895755290985107,grad_norm: 0.9999993363253943, iteration: 193514
loss: 1.2169030904769897,grad_norm: 0.9999995854670367, iteration: 193515
loss: 1.0241392850875854,grad_norm: 0.9999995260311455, iteration: 193516
loss: 1.0800843238830566,grad_norm: 0.9999994935043873, iteration: 193517
loss: 0.9839745163917542,grad_norm: 0.9263312429464278, iteration: 193518
loss: 1.0253283977508545,grad_norm: 0.7662744309860532, iteration: 193519
loss: 1.038076400756836,grad_norm: 0.9057845580769506, iteration: 193520
loss: 0.9823402166366577,grad_norm: 0.9999991295610823, iteration: 193521
loss: 0.9859899282455444,grad_norm: 0.9905382554340982, iteration: 193522
loss: 0.9947162866592407,grad_norm: 0.9084628877028683, iteration: 193523
loss: 0.9802462458610535,grad_norm: 0.995462066691151, iteration: 193524
loss: 1.060986042022705,grad_norm: 0.9999992415216173, iteration: 193525
loss: 0.9781907200813293,grad_norm: 0.9999990449630045, iteration: 193526
loss: 0.9941070079803467,grad_norm: 0.9999990515565246, iteration: 193527
loss: 1.0329148769378662,grad_norm: 0.842965225122826, iteration: 193528
loss: 1.0021181106567383,grad_norm: 0.9372596175463013, iteration: 193529
loss: 0.997230052947998,grad_norm: 0.9999991242306051, iteration: 193530
loss: 1.1354758739471436,grad_norm: 0.9999993569053905, iteration: 193531
loss: 0.9805851578712463,grad_norm: 0.74330538915278, iteration: 193532
loss: 1.003717064857483,grad_norm: 0.8569561249152184, iteration: 193533
loss: 0.9920410513877869,grad_norm: 0.9387226179777022, iteration: 193534
loss: 1.0146396160125732,grad_norm: 0.9999998098401581, iteration: 193535
loss: 1.0278265476226807,grad_norm: 0.9999991083317812, iteration: 193536
loss: 1.0715755224227905,grad_norm: 0.9999990297450572, iteration: 193537
loss: 1.0072089433670044,grad_norm: 0.9999990818233917, iteration: 193538
loss: 0.9827551245689392,grad_norm: 0.8466209356989471, iteration: 193539
loss: 0.9855344295501709,grad_norm: 0.7779618847124729, iteration: 193540
loss: 0.999560534954071,grad_norm: 0.9999995047238209, iteration: 193541
loss: 1.055738091468811,grad_norm: 0.9999992604664831, iteration: 193542
loss: 0.9989607930183411,grad_norm: 0.753629703715429, iteration: 193543
loss: 1.1461507081985474,grad_norm: 0.999999510614751, iteration: 193544
loss: 0.988139271736145,grad_norm: 0.9604699740985706, iteration: 193545
loss: 0.9986039400100708,grad_norm: 0.9999991990252058, iteration: 193546
loss: 1.0025839805603027,grad_norm: 0.9728491158522289, iteration: 193547
loss: 1.028584599494934,grad_norm: 0.9398123886380828, iteration: 193548
loss: 1.0786852836608887,grad_norm: 0.999999772661156, iteration: 193549
loss: 1.0270318984985352,grad_norm: 0.9395328004362646, iteration: 193550
loss: 1.017816424369812,grad_norm: 0.948112685588765, iteration: 193551
loss: 1.0338366031646729,grad_norm: 0.999999061478819, iteration: 193552
loss: 1.0110514163970947,grad_norm: 0.9274189319098021, iteration: 193553
loss: 1.0143260955810547,grad_norm: 0.9373097223296752, iteration: 193554
loss: 1.0973975658416748,grad_norm: 0.9999994517918613, iteration: 193555
loss: 1.0332112312316895,grad_norm: 0.8925594069829783, iteration: 193556
loss: 1.0207027196884155,grad_norm: 0.9883620200172316, iteration: 193557
loss: 1.0687841176986694,grad_norm: 0.9999990516003083, iteration: 193558
loss: 1.0149872303009033,grad_norm: 0.9999992322642925, iteration: 193559
loss: 0.9924666881561279,grad_norm: 0.8043553318285297, iteration: 193560
loss: 1.015175223350525,grad_norm: 0.9902821291005283, iteration: 193561
loss: 1.014603614807129,grad_norm: 0.9623266570194084, iteration: 193562
loss: 1.018336534500122,grad_norm: 0.8731334780381798, iteration: 193563
loss: 1.0352174043655396,grad_norm: 0.900294178413934, iteration: 193564
loss: 1.0113680362701416,grad_norm: 0.9999991618328468, iteration: 193565
loss: 1.0315991640090942,grad_norm: 0.9999991949149726, iteration: 193566
loss: 1.0202101469039917,grad_norm: 0.9999991677422859, iteration: 193567
loss: 1.0023033618927002,grad_norm: 0.9852746459521624, iteration: 193568
loss: 1.0062981843948364,grad_norm: 0.9999991357692763, iteration: 193569
loss: 1.0343499183654785,grad_norm: 0.9290893552977683, iteration: 193570
loss: 1.0446163415908813,grad_norm: 0.7547124935030322, iteration: 193571
loss: 1.050613284111023,grad_norm: 0.9304798066980067, iteration: 193572
loss: 0.9903889894485474,grad_norm: 0.9999992924902922, iteration: 193573
loss: 0.9667877554893494,grad_norm: 0.9931046613890877, iteration: 193574
loss: 1.02511465549469,grad_norm: 0.9375240277722368, iteration: 193575
loss: 0.9786203503608704,grad_norm: 0.999999462388724, iteration: 193576
loss: 1.0231496095657349,grad_norm: 0.9999991583833546, iteration: 193577
loss: 0.9834188222885132,grad_norm: 0.9999991425441792, iteration: 193578
loss: 0.9664226174354553,grad_norm: 0.8930744330002329, iteration: 193579
loss: 0.9816702008247375,grad_norm: 0.7526099841631885, iteration: 193580
loss: 1.0158607959747314,grad_norm: 0.847166802027242, iteration: 193581
loss: 1.020137906074524,grad_norm: 0.9999991979568092, iteration: 193582
loss: 0.9766317009925842,grad_norm: 0.999999120623528, iteration: 193583
loss: 0.9979339838027954,grad_norm: 0.8316454179619244, iteration: 193584
loss: 1.1839791536331177,grad_norm: 0.9999995074402391, iteration: 193585
loss: 1.0110468864440918,grad_norm: 0.9636067673526539, iteration: 193586
loss: 1.0072181224822998,grad_norm: 0.9999990722853295, iteration: 193587
loss: 1.0041810274124146,grad_norm: 0.927704428522494, iteration: 193588
loss: 0.9635423421859741,grad_norm: 0.9999990888599546, iteration: 193589
loss: 1.0375910997390747,grad_norm: 0.9936572191998998, iteration: 193590
loss: 1.0258036851882935,grad_norm: 0.9533089519043063, iteration: 193591
loss: 1.0544812679290771,grad_norm: 0.9999992880859776, iteration: 193592
loss: 0.9982811212539673,grad_norm: 0.9206423556549509, iteration: 193593
loss: 0.9937410950660706,grad_norm: 0.9999989287337124, iteration: 193594
loss: 1.0334348678588867,grad_norm: 0.9227346294341299, iteration: 193595
loss: 1.0250308513641357,grad_norm: 0.999999136701833, iteration: 193596
loss: 1.0343328714370728,grad_norm: 0.9999992276661268, iteration: 193597
loss: 0.9998102188110352,grad_norm: 0.9999991845893945, iteration: 193598
loss: 1.0082371234893799,grad_norm: 0.8914892383393067, iteration: 193599
loss: 1.0013916492462158,grad_norm: 0.9999993568530956, iteration: 193600
loss: 1.1086068153381348,grad_norm: 0.9999997638213965, iteration: 193601
loss: 1.111554741859436,grad_norm: 0.9999995039779, iteration: 193602
loss: 0.9850383400917053,grad_norm: 0.968558131983064, iteration: 193603
loss: 1.0126731395721436,grad_norm: 0.8432301233083813, iteration: 193604
loss: 1.0031400918960571,grad_norm: 0.9999996584431853, iteration: 193605
loss: 1.0115866661071777,grad_norm: 0.8294561672508572, iteration: 193606
loss: 1.006737232208252,grad_norm: 0.9999993667660588, iteration: 193607
loss: 0.9985678791999817,grad_norm: 0.9905307661081231, iteration: 193608
loss: 1.0262703895568848,grad_norm: 0.9217114298790711, iteration: 193609
loss: 1.030813217163086,grad_norm: 0.9999993151164801, iteration: 193610
loss: 0.9985480904579163,grad_norm: 0.902032137352051, iteration: 193611
loss: 0.9840126633644104,grad_norm: 0.9999990269001829, iteration: 193612
loss: 1.0090246200561523,grad_norm: 0.9559266694174182, iteration: 193613
loss: 1.021127700805664,grad_norm: 0.9999994426026771, iteration: 193614
loss: 0.9750078916549683,grad_norm: 0.9452514229338788, iteration: 193615
loss: 1.0007100105285645,grad_norm: 0.9999992454017962, iteration: 193616
loss: 1.0331132411956787,grad_norm: 0.8814144584967333, iteration: 193617
loss: 1.0174181461334229,grad_norm: 0.8840474761936238, iteration: 193618
loss: 0.9809836149215698,grad_norm: 0.9999990498203487, iteration: 193619
loss: 1.1632020473480225,grad_norm: 1.0000000567225729, iteration: 193620
loss: 1.0159817934036255,grad_norm: 0.9836231496763947, iteration: 193621
loss: 0.9983040690422058,grad_norm: 0.8449607309383661, iteration: 193622
loss: 0.9972279071807861,grad_norm: 0.916559194024365, iteration: 193623
loss: 1.06351637840271,grad_norm: 0.9999995145770241, iteration: 193624
loss: 1.011345624923706,grad_norm: 0.9540152314990022, iteration: 193625
loss: 0.9767287969589233,grad_norm: 0.9473936987084913, iteration: 193626
loss: 0.9843076467514038,grad_norm: 0.860378966927828, iteration: 193627
loss: 1.1194729804992676,grad_norm: 0.9999996466220908, iteration: 193628
loss: 0.9917023777961731,grad_norm: 0.9999991020943615, iteration: 193629
loss: 1.0117650032043457,grad_norm: 0.9229025348314265, iteration: 193630
loss: 1.0342552661895752,grad_norm: 0.8624145996620345, iteration: 193631
loss: 0.9936765432357788,grad_norm: 0.9203774098054488, iteration: 193632
loss: 1.0075592994689941,grad_norm: 0.9999990883200105, iteration: 193633
loss: 1.0410528182983398,grad_norm: 0.9999998502678592, iteration: 193634
loss: 1.0136017799377441,grad_norm: 0.9610804112135946, iteration: 193635
loss: 1.0007892847061157,grad_norm: 0.9491005228563595, iteration: 193636
loss: 1.0203380584716797,grad_norm: 0.9999991421467274, iteration: 193637
loss: 0.9867028594017029,grad_norm: 0.9407555317062484, iteration: 193638
loss: 1.0218548774719238,grad_norm: 0.8426840217134925, iteration: 193639
loss: 1.0140221118927002,grad_norm: 0.859611358751637, iteration: 193640
loss: 1.0060087442398071,grad_norm: 0.8171702492567942, iteration: 193641
loss: 0.9743849635124207,grad_norm: 0.8391404708365922, iteration: 193642
loss: 0.9947882294654846,grad_norm: 0.9878673679420814, iteration: 193643
loss: 0.9913406372070312,grad_norm: 0.9816299208172922, iteration: 193644
loss: 0.9636979699134827,grad_norm: 0.9748578294883558, iteration: 193645
loss: 1.0515596866607666,grad_norm: 0.959938866460899, iteration: 193646
loss: 1.0092262029647827,grad_norm: 0.9143550148793701, iteration: 193647
loss: 1.001551866531372,grad_norm: 0.9482158627917862, iteration: 193648
loss: 1.0021660327911377,grad_norm: 0.9657381251261672, iteration: 193649
loss: 1.0601478815078735,grad_norm: 0.933183306166265, iteration: 193650
loss: 1.009840726852417,grad_norm: 0.9999992083237897, iteration: 193651
loss: 1.017525315284729,grad_norm: 0.9999996492204863, iteration: 193652
loss: 0.9861652255058289,grad_norm: 0.9954306277117675, iteration: 193653
loss: 0.96002197265625,grad_norm: 0.8951933925796951, iteration: 193654
loss: 0.9934070110321045,grad_norm: 0.9999991134344988, iteration: 193655
loss: 1.0623517036437988,grad_norm: 0.9999990126516606, iteration: 193656
loss: 0.9890718460083008,grad_norm: 0.9999989319300118, iteration: 193657
loss: 1.02576744556427,grad_norm: 0.9999992032214908, iteration: 193658
loss: 1.0069172382354736,grad_norm: 0.9999992489209637, iteration: 193659
loss: 1.0417977571487427,grad_norm: 0.9999991721386506, iteration: 193660
loss: 0.9957099556922913,grad_norm: 0.915489534633673, iteration: 193661
loss: 0.9908507466316223,grad_norm: 0.9999992244668102, iteration: 193662
loss: 1.0143142938613892,grad_norm: 0.9215663138489758, iteration: 193663
loss: 0.9880104064941406,grad_norm: 0.9999990144980745, iteration: 193664
loss: 1.010787010192871,grad_norm: 0.9965846660339198, iteration: 193665
loss: 1.0423847436904907,grad_norm: 0.827880961471211, iteration: 193666
loss: 1.0351892709732056,grad_norm: 0.9999991370961374, iteration: 193667
loss: 0.985072910785675,grad_norm: 0.9262288424914925, iteration: 193668
loss: 1.0260188579559326,grad_norm: 0.9999992448501926, iteration: 193669
loss: 0.9876401424407959,grad_norm: 0.9017798754554099, iteration: 193670
loss: 0.9855759739875793,grad_norm: 0.8654069903665409, iteration: 193671
loss: 0.9707968831062317,grad_norm: 0.9410225350970821, iteration: 193672
loss: 1.0199226140975952,grad_norm: 0.9514760607921766, iteration: 193673
loss: 1.018198847770691,grad_norm: 0.8865024436372653, iteration: 193674
loss: 1.00492525100708,grad_norm: 0.850131580371287, iteration: 193675
loss: 0.9976763725280762,grad_norm: 0.999999218595357, iteration: 193676
loss: 0.9791008830070496,grad_norm: 0.8911125926091403, iteration: 193677
loss: 1.116995930671692,grad_norm: 0.9999993057237497, iteration: 193678
loss: 1.0562512874603271,grad_norm: 0.7706178713219813, iteration: 193679
loss: 1.0162768363952637,grad_norm: 0.9733439513758987, iteration: 193680
loss: 1.0226736068725586,grad_norm: 0.9999991318228613, iteration: 193681
loss: 0.9863383173942566,grad_norm: 0.879757627311418, iteration: 193682
loss: 0.9739078283309937,grad_norm: 0.9621103287445181, iteration: 193683
loss: 1.0204499959945679,grad_norm: 0.8652821008280193, iteration: 193684
loss: 1.0027198791503906,grad_norm: 0.9999990489555547, iteration: 193685
loss: 0.9867492914199829,grad_norm: 0.854886019384365, iteration: 193686
loss: 1.0266951322555542,grad_norm: 0.9999991426480066, iteration: 193687
loss: 1.0099079608917236,grad_norm: 0.9816894151108582, iteration: 193688
loss: 1.0142124891281128,grad_norm: 0.9999990007580534, iteration: 193689
loss: 1.024316668510437,grad_norm: 0.8263515481892786, iteration: 193690
loss: 0.9773783087730408,grad_norm: 0.9365362407968434, iteration: 193691
loss: 1.0243057012557983,grad_norm: 0.9999990581691999, iteration: 193692
loss: 1.0205280780792236,grad_norm: 0.8702388426050128, iteration: 193693
loss: 0.9827744364738464,grad_norm: 0.8742085274599598, iteration: 193694
loss: 0.9989922046661377,grad_norm: 0.9999995604237191, iteration: 193695
loss: 1.0033159255981445,grad_norm: 0.865208850455773, iteration: 193696
loss: 1.0308003425598145,grad_norm: 0.9999997837688908, iteration: 193697
loss: 1.0169767141342163,grad_norm: 0.9999991810292402, iteration: 193698
loss: 1.015442967414856,grad_norm: 0.9650139076766129, iteration: 193699
loss: 1.000689148902893,grad_norm: 0.999999130636151, iteration: 193700
loss: 1.003718614578247,grad_norm: 0.9957201030561793, iteration: 193701
loss: 0.9658439755439758,grad_norm: 0.7820577532402847, iteration: 193702
loss: 1.0865631103515625,grad_norm: 0.8953946960336245, iteration: 193703
loss: 0.9787969589233398,grad_norm: 0.9999991445972353, iteration: 193704
loss: 1.012707233428955,grad_norm: 0.9615194519808583, iteration: 193705
loss: 0.9649341106414795,grad_norm: 0.9113439377651904, iteration: 193706
loss: 1.0807420015335083,grad_norm: 0.999999895694803, iteration: 193707
loss: 0.9831271767616272,grad_norm: 0.9999990767747975, iteration: 193708
loss: 1.0142813920974731,grad_norm: 0.8218390726631638, iteration: 193709
loss: 1.002601981163025,grad_norm: 0.9999994580990932, iteration: 193710
loss: 1.0886977910995483,grad_norm: 0.9999992081570969, iteration: 193711
loss: 1.0403690338134766,grad_norm: 0.9587927481064998, iteration: 193712
loss: 1.002482295036316,grad_norm: 0.8585974372825529, iteration: 193713
loss: 1.003237009048462,grad_norm: 0.8637813956767468, iteration: 193714
loss: 1.0071382522583008,grad_norm: 0.9999990706026947, iteration: 193715
loss: 1.0112922191619873,grad_norm: 0.9054056810754643, iteration: 193716
loss: 1.003055214881897,grad_norm: 0.9999990812550539, iteration: 193717
loss: 0.9876433610916138,grad_norm: 0.9657545760004622, iteration: 193718
loss: 0.9921033978462219,grad_norm: 0.9999991901496377, iteration: 193719
loss: 1.0049090385437012,grad_norm: 0.9667947299047485, iteration: 193720
loss: 0.9972756505012512,grad_norm: 0.9999991344952794, iteration: 193721
loss: 0.9967610836029053,grad_norm: 0.7944367271437298, iteration: 193722
loss: 1.0212541818618774,grad_norm: 0.7387218764897007, iteration: 193723
loss: 0.9763854742050171,grad_norm: 0.9999990414559036, iteration: 193724
loss: 0.9915226697921753,grad_norm: 0.8434540964209251, iteration: 193725
loss: 1.0030912160873413,grad_norm: 0.9110405254177815, iteration: 193726
loss: 1.0221964120864868,grad_norm: 0.9999989961664053, iteration: 193727
loss: 0.9817097783088684,grad_norm: 0.9498080612285902, iteration: 193728
loss: 0.9991328716278076,grad_norm: 0.9999990708441757, iteration: 193729
loss: 0.9809563159942627,grad_norm: 0.9999991488551133, iteration: 193730
loss: 1.0131944417953491,grad_norm: 0.883369114817214, iteration: 193731
loss: 1.0362564325332642,grad_norm: 0.999999130586313, iteration: 193732
loss: 0.9682570099830627,grad_norm: 0.9999991805135223, iteration: 193733
loss: 0.9685748219490051,grad_norm: 0.9999990508261594, iteration: 193734
loss: 0.9667887687683105,grad_norm: 0.9783296771091088, iteration: 193735
loss: 1.0161346197128296,grad_norm: 0.9634223466215027, iteration: 193736
loss: 0.9777225255966187,grad_norm: 0.8680038251833878, iteration: 193737
loss: 1.008238673210144,grad_norm: 0.9999996758120194, iteration: 193738
loss: 1.0030773878097534,grad_norm: 0.8650010704824782, iteration: 193739
loss: 1.0065126419067383,grad_norm: 0.9364372705776521, iteration: 193740
loss: 1.0058804750442505,grad_norm: 0.9999991615166215, iteration: 193741
loss: 0.9762281179428101,grad_norm: 0.9999990813430588, iteration: 193742
loss: 0.9923731088638306,grad_norm: 0.9773585710946538, iteration: 193743
loss: 1.02604079246521,grad_norm: 0.9999991447515747, iteration: 193744
loss: 0.99761563539505,grad_norm: 0.9999993096286829, iteration: 193745
loss: 1.004022479057312,grad_norm: 0.9999991330713516, iteration: 193746
loss: 1.0260828733444214,grad_norm: 0.9529979214325298, iteration: 193747
loss: 0.9980342388153076,grad_norm: 0.9229922144571222, iteration: 193748
loss: 0.9801344275474548,grad_norm: 0.972640828134571, iteration: 193749
loss: 1.0066035985946655,grad_norm: 0.8865126184307531, iteration: 193750
loss: 1.0672338008880615,grad_norm: 0.9999990541608071, iteration: 193751
loss: 1.014905571937561,grad_norm: 0.9743917431920643, iteration: 193752
loss: 0.9957635998725891,grad_norm: 0.8907791990782755, iteration: 193753
loss: 0.9733173251152039,grad_norm: 0.9077493924049909, iteration: 193754
loss: 1.0441266298294067,grad_norm: 0.9748238215945785, iteration: 193755
loss: 0.9969662427902222,grad_norm: 0.9999991746916468, iteration: 193756
loss: 0.9765051007270813,grad_norm: 0.9144635310430735, iteration: 193757
loss: 0.998367428779602,grad_norm: 0.921140211839752, iteration: 193758
loss: 1.018388271331787,grad_norm: 0.9852570532397921, iteration: 193759
loss: 0.9824233055114746,grad_norm: 0.9610590841106587, iteration: 193760
loss: 1.1092348098754883,grad_norm: 0.9999996264889931, iteration: 193761
loss: 1.027376413345337,grad_norm: 0.9999992602705106, iteration: 193762
loss: 1.0177295207977295,grad_norm: 0.893993787821388, iteration: 193763
loss: 1.0051922798156738,grad_norm: 0.9942680891529373, iteration: 193764
loss: 1.0197808742523193,grad_norm: 0.9891182401102963, iteration: 193765
loss: 1.0916200876235962,grad_norm: 0.9594363691577894, iteration: 193766
loss: 0.9869105815887451,grad_norm: 0.9950488743690409, iteration: 193767
loss: 1.0023455619812012,grad_norm: 0.9999992517535614, iteration: 193768
loss: 0.992567777633667,grad_norm: 0.857131155814271, iteration: 193769
loss: 1.0122203826904297,grad_norm: 0.9270570083390254, iteration: 193770
loss: 0.9900092482566833,grad_norm: 0.9470116655902651, iteration: 193771
loss: 0.9425124526023865,grad_norm: 0.9708945658872746, iteration: 193772
loss: 0.9793410301208496,grad_norm: 0.9999991269049394, iteration: 193773
loss: 1.0237687826156616,grad_norm: 0.994439025954294, iteration: 193774
loss: 0.9886690974235535,grad_norm: 0.9597461378319062, iteration: 193775
loss: 0.9694363474845886,grad_norm: 0.778969908548392, iteration: 193776
loss: 1.003273367881775,grad_norm: 0.9556455988033866, iteration: 193777
loss: 0.9879457950592041,grad_norm: 0.9171998971690668, iteration: 193778
loss: 0.9970962405204773,grad_norm: 0.991051347574965, iteration: 193779
loss: 0.9963249564170837,grad_norm: 0.9999990157695694, iteration: 193780
loss: 1.0077227354049683,grad_norm: 0.8694827433785671, iteration: 193781
loss: 1.0084524154663086,grad_norm: 0.9768249026845038, iteration: 193782
loss: 0.9950730800628662,grad_norm: 0.9999990156231572, iteration: 193783
loss: 0.948712170124054,grad_norm: 0.8444163587699478, iteration: 193784
loss: 0.9885239005088806,grad_norm: 0.9522135187086705, iteration: 193785
loss: 1.0146821737289429,grad_norm: 0.9065770361951152, iteration: 193786
loss: 1.0160037279129028,grad_norm: 0.793062286163586, iteration: 193787
loss: 1.0420986413955688,grad_norm: 0.9999990986104913, iteration: 193788
loss: 0.9499478340148926,grad_norm: 0.8114940072153897, iteration: 193789
loss: 1.0012147426605225,grad_norm: 0.8682568943976866, iteration: 193790
loss: 1.1108051538467407,grad_norm: 0.9999997794468763, iteration: 193791
loss: 0.9991292953491211,grad_norm: 0.8818460500702756, iteration: 193792
loss: 1.0168218612670898,grad_norm: 0.9313329061418514, iteration: 193793
loss: 1.0101457834243774,grad_norm: 0.9999990921688056, iteration: 193794
loss: 0.9637066125869751,grad_norm: 0.8889014188198832, iteration: 193795
loss: 0.9788839221000671,grad_norm: 0.9736986760745769, iteration: 193796
loss: 1.0499839782714844,grad_norm: 0.9505569627988492, iteration: 193797
loss: 1.0052471160888672,grad_norm: 0.99999909368576, iteration: 193798
loss: 1.0440655946731567,grad_norm: 0.9999991949568219, iteration: 193799
loss: 1.0202716588974,grad_norm: 0.8917254010966053, iteration: 193800
loss: 0.9980903267860413,grad_norm: 0.9999991244514372, iteration: 193801
loss: 1.0228581428527832,grad_norm: 0.9491503583814189, iteration: 193802
loss: 1.0063852071762085,grad_norm: 0.9999990939218165, iteration: 193803
loss: 1.0398255586624146,grad_norm: 0.9979037884607111, iteration: 193804
loss: 0.9677541255950928,grad_norm: 0.9544909641310214, iteration: 193805
loss: 0.9710304141044617,grad_norm: 0.7631492098851451, iteration: 193806
loss: 0.9951743483543396,grad_norm: 0.9430373253406021, iteration: 193807
loss: 1.0152606964111328,grad_norm: 0.8762011874402101, iteration: 193808
loss: 1.0186173915863037,grad_norm: 0.8003636135113612, iteration: 193809
loss: 1.0095083713531494,grad_norm: 0.999999400801823, iteration: 193810
loss: 0.977827250957489,grad_norm: 0.9999990123847133, iteration: 193811
loss: 0.9879017472267151,grad_norm: 0.9999991081869393, iteration: 193812
loss: 0.9718868136405945,grad_norm: 0.9999991273481975, iteration: 193813
loss: 0.9828332662582397,grad_norm: 0.8936442246765225, iteration: 193814
loss: 1.0054134130477905,grad_norm: 0.9960152879954165, iteration: 193815
loss: 1.0054422616958618,grad_norm: 0.9710618198678928, iteration: 193816
loss: 0.978406548500061,grad_norm: 0.9878038717963303, iteration: 193817
loss: 1.0014482736587524,grad_norm: 0.9999990375755288, iteration: 193818
loss: 1.0470497608184814,grad_norm: 0.9999990520408433, iteration: 193819
loss: 1.0055580139160156,grad_norm: 0.950932897744876, iteration: 193820
loss: 0.9938926100730896,grad_norm: 0.9065817384253996, iteration: 193821
loss: 1.036685585975647,grad_norm: 0.9999999410780283, iteration: 193822
loss: 1.029123067855835,grad_norm: 0.9999991137780261, iteration: 193823
loss: 1.001044750213623,grad_norm: 0.7705855312402561, iteration: 193824
loss: 1.0024006366729736,grad_norm: 0.998288837871831, iteration: 193825
loss: 0.9971573352813721,grad_norm: 0.9999990649846706, iteration: 193826
loss: 1.0570865869522095,grad_norm: 0.9999990658980832, iteration: 193827
loss: 0.9983838796615601,grad_norm: 0.9999990899380895, iteration: 193828
loss: 1.0108853578567505,grad_norm: 0.9178528879764991, iteration: 193829
loss: 1.01719069480896,grad_norm: 0.9999992483656448, iteration: 193830
loss: 1.0138319730758667,grad_norm: 0.9999990681093958, iteration: 193831
loss: 0.9589152932167053,grad_norm: 0.8625452295950996, iteration: 193832
loss: 0.9892193078994751,grad_norm: 0.9999992424447898, iteration: 193833
loss: 0.992102324962616,grad_norm: 0.8866284676124915, iteration: 193834
loss: 1.0256457328796387,grad_norm: 0.9997446510625321, iteration: 193835
loss: 1.0187104940414429,grad_norm: 0.880477305215659, iteration: 193836
loss: 1.0321013927459717,grad_norm: 0.947590778305541, iteration: 193837
loss: 1.0360115766525269,grad_norm: 0.9999990371996705, iteration: 193838
loss: 0.9838772416114807,grad_norm: 0.7453406993983985, iteration: 193839
loss: 0.9939408302307129,grad_norm: 0.7638618578709117, iteration: 193840
loss: 1.0065302848815918,grad_norm: 0.8980808709531166, iteration: 193841
loss: 0.9888446927070618,grad_norm: 0.8939714496175722, iteration: 193842
loss: 1.0108786821365356,grad_norm: 0.999998976661956, iteration: 193843
loss: 0.9992019534111023,grad_norm: 0.9385733474372435, iteration: 193844
loss: 0.99576336145401,grad_norm: 0.7849472468583484, iteration: 193845
loss: 1.003908395767212,grad_norm: 0.992107876470578, iteration: 193846
loss: 0.981924831867218,grad_norm: 0.8687104088572821, iteration: 193847
loss: 1.020281195640564,grad_norm: 0.9999990522052676, iteration: 193848
loss: 1.029326319694519,grad_norm: 0.9999995057852856, iteration: 193849
loss: 1.0209219455718994,grad_norm: 0.9999991900345482, iteration: 193850
loss: 1.0188648700714111,grad_norm: 0.7186956311570118, iteration: 193851
loss: 1.0124245882034302,grad_norm: 0.9999990343909387, iteration: 193852
loss: 0.951203465461731,grad_norm: 0.8950283806044004, iteration: 193853
loss: 1.0420440435409546,grad_norm: 0.9273639229154347, iteration: 193854
loss: 1.0187667608261108,grad_norm: 0.8562787120076769, iteration: 193855
loss: 1.0068187713623047,grad_norm: 0.9999991075399641, iteration: 193856
loss: 0.9806776642799377,grad_norm: 0.9999990559186953, iteration: 193857
loss: 1.0170283317565918,grad_norm: 0.7528483540668679, iteration: 193858
loss: 0.9961841106414795,grad_norm: 0.9999989455340278, iteration: 193859
loss: 0.9861593842506409,grad_norm: 0.8957587926076175, iteration: 193860
loss: 1.0349247455596924,grad_norm: 0.9171554326230377, iteration: 193861
loss: 0.9900357723236084,grad_norm: 0.9999991552588667, iteration: 193862
loss: 1.009521245956421,grad_norm: 0.9999992129529617, iteration: 193863
loss: 1.0235944986343384,grad_norm: 0.9999997205386926, iteration: 193864
loss: 0.9839332103729248,grad_norm: 0.9969408870062488, iteration: 193865
loss: 0.9997860193252563,grad_norm: 0.9041611388681612, iteration: 193866
loss: 0.9904083609580994,grad_norm: 0.9999991251554422, iteration: 193867
loss: 1.010002851486206,grad_norm: 0.8132620380113993, iteration: 193868
loss: 0.9507068991661072,grad_norm: 0.9294372479782104, iteration: 193869
loss: 0.9644748568534851,grad_norm: 0.9999989981377378, iteration: 193870
loss: 0.9706469178199768,grad_norm: 0.991879074042447, iteration: 193871
loss: 1.008044719696045,grad_norm: 0.8844736930443635, iteration: 193872
loss: 1.0396541357040405,grad_norm: 0.9892034423958243, iteration: 193873
loss: 1.0109373331069946,grad_norm: 0.9999990469740677, iteration: 193874
loss: 1.011775255203247,grad_norm: 0.9738420112852837, iteration: 193875
loss: 0.9743675589561462,grad_norm: 0.9560635830024256, iteration: 193876
loss: 0.9889029860496521,grad_norm: 0.9999991315169463, iteration: 193877
loss: 1.016554594039917,grad_norm: 0.9230196913085271, iteration: 193878
loss: 0.9945617318153381,grad_norm: 0.9030479134617702, iteration: 193879
loss: 1.0059878826141357,grad_norm: 0.7980382795140499, iteration: 193880
loss: 1.008246660232544,grad_norm: 0.91518217180951, iteration: 193881
loss: 0.9979336857795715,grad_norm: 0.9999991802283129, iteration: 193882
loss: 0.9520344734191895,grad_norm: 0.9999991403745032, iteration: 193883
loss: 1.056547999382019,grad_norm: 0.9999997001470465, iteration: 193884
loss: 0.9665538668632507,grad_norm: 0.9999989020777533, iteration: 193885
loss: 1.0255858898162842,grad_norm: 0.9538751289196634, iteration: 193886
loss: 0.9854747653007507,grad_norm: 0.984389041585174, iteration: 193887
loss: 1.0055642127990723,grad_norm: 0.9459086177121035, iteration: 193888
loss: 1.0007903575897217,grad_norm: 0.9782903005905087, iteration: 193889
loss: 1.0096614360809326,grad_norm: 0.9502111551605692, iteration: 193890
loss: 0.9984889626502991,grad_norm: 0.9999990461897853, iteration: 193891
loss: 0.9847121834754944,grad_norm: 0.999998993686048, iteration: 193892
loss: 0.986972451210022,grad_norm: 0.9877790265746001, iteration: 193893
loss: 1.0773848295211792,grad_norm: 0.885616413885134, iteration: 193894
loss: 0.9920070767402649,grad_norm: 0.9999990397784277, iteration: 193895
loss: 0.978485107421875,grad_norm: 0.9999990775539597, iteration: 193896
loss: 1.0017458200454712,grad_norm: 0.9999990378037105, iteration: 193897
loss: 0.9992219805717468,grad_norm: 0.8865501281865562, iteration: 193898
loss: 0.9821863174438477,grad_norm: 0.8747998686045336, iteration: 193899
loss: 0.9919624924659729,grad_norm: 0.9637748580115205, iteration: 193900
loss: 0.9893935322761536,grad_norm: 0.9999991664434423, iteration: 193901
loss: 0.9847687482833862,grad_norm: 0.9251989228887454, iteration: 193902
loss: 1.0101282596588135,grad_norm: 0.7973145004817861, iteration: 193903
loss: 0.9919846057891846,grad_norm: 0.9042030441997208, iteration: 193904
loss: 0.9774165153503418,grad_norm: 0.917689962021552, iteration: 193905
loss: 1.007896900177002,grad_norm: 0.9957255639144876, iteration: 193906
loss: 1.0347617864608765,grad_norm: 0.9345239467871188, iteration: 193907
loss: 0.9952208399772644,grad_norm: 0.9340459088837104, iteration: 193908
loss: 1.0577093362808228,grad_norm: 0.8591089068749046, iteration: 193909
loss: 0.9617368578910828,grad_norm: 0.9029404594613379, iteration: 193910
loss: 0.9826083183288574,grad_norm: 0.9999990867636629, iteration: 193911
loss: 1.0428650379180908,grad_norm: 0.9999991418084624, iteration: 193912
loss: 1.0084469318389893,grad_norm: 0.9999992077733199, iteration: 193913
loss: 0.9725348949432373,grad_norm: 0.9246109875371351, iteration: 193914
loss: 1.0158730745315552,grad_norm: 0.836587696303145, iteration: 193915
loss: 0.9988808631896973,grad_norm: 0.9999990890385385, iteration: 193916
loss: 0.9808793663978577,grad_norm: 0.9121564816233081, iteration: 193917
loss: 1.0204639434814453,grad_norm: 0.9999991487804364, iteration: 193918
loss: 1.0296790599822998,grad_norm: 0.9530852838002891, iteration: 193919
loss: 1.0228461027145386,grad_norm: 0.999999167703075, iteration: 193920
loss: 1.0020204782485962,grad_norm: 0.9435069070495147, iteration: 193921
loss: 0.999934196472168,grad_norm: 0.9999991053398143, iteration: 193922
loss: 1.042189359664917,grad_norm: 0.8735913989654115, iteration: 193923
loss: 0.9852759838104248,grad_norm: 0.9616276983316262, iteration: 193924
loss: 1.0104799270629883,grad_norm: 0.8672830889860178, iteration: 193925
loss: 1.0107425451278687,grad_norm: 0.969062239266417, iteration: 193926
loss: 0.9983203411102295,grad_norm: 0.9668449774095278, iteration: 193927
loss: 0.9823021292686462,grad_norm: 0.9321335126249928, iteration: 193928
loss: 1.00925874710083,grad_norm: 0.9337642314340463, iteration: 193929
loss: 1.0453566312789917,grad_norm: 0.8515398246590425, iteration: 193930
loss: 0.969168484210968,grad_norm: 0.8285903815999183, iteration: 193931
loss: 1.026987910270691,grad_norm: 0.8770768008983588, iteration: 193932
loss: 0.9721671938896179,grad_norm: 0.9999994418897403, iteration: 193933
loss: 1.0350871086120605,grad_norm: 0.9999997986111544, iteration: 193934
loss: 1.1251215934753418,grad_norm: 0.999999354244657, iteration: 193935
loss: 0.9920098185539246,grad_norm: 0.8885101052747897, iteration: 193936
loss: 0.9737182855606079,grad_norm: 0.6885247959747769, iteration: 193937
loss: 0.9762218594551086,grad_norm: 0.9738193308529008, iteration: 193938
loss: 0.9491897821426392,grad_norm: 0.999999021776545, iteration: 193939
loss: 0.9906839728355408,grad_norm: 0.9999990894756324, iteration: 193940
loss: 0.9879466891288757,grad_norm: 0.8862169060116908, iteration: 193941
loss: 0.988980770111084,grad_norm: 0.9999992727687361, iteration: 193942
loss: 1.0030007362365723,grad_norm: 0.9999992966328871, iteration: 193943
loss: 0.9892610907554626,grad_norm: 0.9389722262646427, iteration: 193944
loss: 1.02659273147583,grad_norm: 0.9999990821764335, iteration: 193945
loss: 0.9653582572937012,grad_norm: 0.9999991814913338, iteration: 193946
loss: 1.0051720142364502,grad_norm: 0.9582475645940665, iteration: 193947
loss: 1.0127569437026978,grad_norm: 0.9359435146610142, iteration: 193948
loss: 1.0756148099899292,grad_norm: 0.9999990272262327, iteration: 193949
loss: 0.9982690215110779,grad_norm: 0.8949099960838794, iteration: 193950
loss: 1.000139594078064,grad_norm: 0.9999991011880355, iteration: 193951
loss: 1.010844111442566,grad_norm: 0.8202191232523985, iteration: 193952
loss: 1.0089590549468994,grad_norm: 0.8959807046514953, iteration: 193953
loss: 0.9880415201187134,grad_norm: 0.9999991162194032, iteration: 193954
loss: 1.002020239830017,grad_norm: 0.9790687239878164, iteration: 193955
loss: 0.9956348538398743,grad_norm: 0.904439890260565, iteration: 193956
loss: 0.9778366684913635,grad_norm: 0.8071645956012243, iteration: 193957
loss: 1.000571370124817,grad_norm: 0.8146966458378392, iteration: 193958
loss: 0.9623039364814758,grad_norm: 0.9076783545496044, iteration: 193959
loss: 1.009523630142212,grad_norm: 0.9214220564958951, iteration: 193960
loss: 0.9851899147033691,grad_norm: 0.9999988921474714, iteration: 193961
loss: 0.9797086119651794,grad_norm: 0.9999989244546648, iteration: 193962
loss: 1.0333633422851562,grad_norm: 0.9999991353255947, iteration: 193963
loss: 1.0015884637832642,grad_norm: 0.999999119736421, iteration: 193964
loss: 0.9931811690330505,grad_norm: 0.9999990810897978, iteration: 193965
loss: 1.0230947732925415,grad_norm: 0.9999991107151541, iteration: 193966
loss: 1.036036491394043,grad_norm: 0.9999994264910017, iteration: 193967
loss: 1.0256264209747314,grad_norm: 0.9130473311715049, iteration: 193968
loss: 0.9700368046760559,grad_norm: 0.999999099996428, iteration: 193969
loss: 1.031976342201233,grad_norm: 0.9689857172861684, iteration: 193970
loss: 0.9648425579071045,grad_norm: 0.9152766228329929, iteration: 193971
loss: 1.014413833618164,grad_norm: 0.9793941426492743, iteration: 193972
loss: 1.0203217267990112,grad_norm: 0.9488438737766074, iteration: 193973
loss: 0.9828004837036133,grad_norm: 0.8476869982871813, iteration: 193974
loss: 0.9993710517883301,grad_norm: 0.9999997830602354, iteration: 193975
loss: 1.0308789014816284,grad_norm: 0.999999575654017, iteration: 193976
loss: 1.0120292901992798,grad_norm: 0.99999905065666, iteration: 193977
loss: 1.0000420808792114,grad_norm: 0.8636446339807085, iteration: 193978
loss: 1.0300451517105103,grad_norm: 0.9159630024103904, iteration: 193979
loss: 0.9793415665626526,grad_norm: 0.9999991555905967, iteration: 193980
loss: 1.0067346096038818,grad_norm: 0.7842979170420574, iteration: 193981
loss: 0.9567099213600159,grad_norm: 0.9999995308715637, iteration: 193982
loss: 1.0165846347808838,grad_norm: 0.9999991392237045, iteration: 193983
loss: 1.0034006834030151,grad_norm: 0.9999990619889783, iteration: 193984
loss: 1.0244008302688599,grad_norm: 0.8279055683900942, iteration: 193985
loss: 0.9857754111289978,grad_norm: 0.7862457058809748, iteration: 193986
loss: 1.1975715160369873,grad_norm: 0.9999994784438784, iteration: 193987
loss: 0.9931586384773254,grad_norm: 0.850756582437624, iteration: 193988
loss: 1.015029788017273,grad_norm: 0.9999990673516631, iteration: 193989
loss: 0.9932776689529419,grad_norm: 0.999999055342473, iteration: 193990
loss: 0.9840345978736877,grad_norm: 0.9270533437402079, iteration: 193991
loss: 0.9973932504653931,grad_norm: 0.7520722570637022, iteration: 193992
loss: 1.0062050819396973,grad_norm: 0.9999991451661823, iteration: 193993
loss: 1.0452486276626587,grad_norm: 0.9392247628874744, iteration: 193994
loss: 1.0541590452194214,grad_norm: 0.9254369267521318, iteration: 193995
loss: 0.9883493781089783,grad_norm: 0.9999991305211098, iteration: 193996
loss: 0.9838581085205078,grad_norm: 0.8766051746841013, iteration: 193997
loss: 1.007240891456604,grad_norm: 0.9118305938582387, iteration: 193998
loss: 0.9930439591407776,grad_norm: 0.8239017954104011, iteration: 193999
loss: 1.004298210144043,grad_norm: 0.9359426281517544, iteration: 194000
loss: 1.0028653144836426,grad_norm: 0.8990945746212289, iteration: 194001
loss: 0.984089732170105,grad_norm: 0.9999990019171784, iteration: 194002
loss: 0.9924483895301819,grad_norm: 0.9999992468323197, iteration: 194003
loss: 1.0008363723754883,grad_norm: 0.9999989313218669, iteration: 194004
loss: 1.0090769529342651,grad_norm: 0.9164666483587587, iteration: 194005
loss: 1.0190787315368652,grad_norm: 0.9571363122406494, iteration: 194006
loss: 1.0616064071655273,grad_norm: 0.9999991101642484, iteration: 194007
loss: 0.9959895610809326,grad_norm: 0.7675562751506966, iteration: 194008
loss: 0.9832103252410889,grad_norm: 0.863947755007166, iteration: 194009
loss: 1.0457112789154053,grad_norm: 0.9999991993309565, iteration: 194010
loss: 0.9828089475631714,grad_norm: 0.9999996195201214, iteration: 194011
loss: 1.0339328050613403,grad_norm: 0.9999991528552188, iteration: 194012
loss: 1.0361846685409546,grad_norm: 0.8666078306655406, iteration: 194013
loss: 1.0282392501831055,grad_norm: 0.9999993148949898, iteration: 194014
loss: 1.0280052423477173,grad_norm: 0.9327840903548176, iteration: 194015
loss: 1.089681625366211,grad_norm: 0.9352064327232592, iteration: 194016
loss: 0.9788700342178345,grad_norm: 0.999999018593168, iteration: 194017
loss: 0.9835829734802246,grad_norm: 0.9378545205444875, iteration: 194018
loss: 0.9827290177345276,grad_norm: 0.9999991569904795, iteration: 194019
loss: 1.0134295225143433,grad_norm: 0.9247697807571447, iteration: 194020
loss: 1.0303865671157837,grad_norm: 0.8831682909798015, iteration: 194021
loss: 1.0387694835662842,grad_norm: 0.9999990262357944, iteration: 194022
loss: 1.0415643453598022,grad_norm: 0.9999990879365511, iteration: 194023
loss: 1.009196400642395,grad_norm: 0.7963544768138238, iteration: 194024
loss: 0.9766980409622192,grad_norm: 0.8252656511127173, iteration: 194025
loss: 0.9827268123626709,grad_norm: 0.8508631989518752, iteration: 194026
loss: 1.0065901279449463,grad_norm: 0.9192254580191489, iteration: 194027
loss: 1.0195342302322388,grad_norm: 0.9999990959641987, iteration: 194028
loss: 0.9967365264892578,grad_norm: 0.9071014778321487, iteration: 194029
loss: 0.9822263121604919,grad_norm: 0.9999990686269796, iteration: 194030
loss: 1.0117268562316895,grad_norm: 0.9999990744672705, iteration: 194031
loss: 1.023191213607788,grad_norm: 0.9999997921657768, iteration: 194032
loss: 1.003350019454956,grad_norm: 0.9250126911449732, iteration: 194033
loss: 1.0451098680496216,grad_norm: 0.9859478650811785, iteration: 194034
loss: 1.027573823928833,grad_norm: 0.9999994501490161, iteration: 194035
loss: 0.9962344765663147,grad_norm: 0.9450890655008894, iteration: 194036
loss: 0.9905937910079956,grad_norm: 0.9999990558836422, iteration: 194037
loss: 1.0054008960723877,grad_norm: 0.898210350282767, iteration: 194038
loss: 0.994417130947113,grad_norm: 0.920278936557109, iteration: 194039
loss: 0.980391800403595,grad_norm: 0.951957731902826, iteration: 194040
loss: 1.036075234413147,grad_norm: 0.9999990894661909, iteration: 194041
loss: 0.9439405798912048,grad_norm: 0.9999991825034052, iteration: 194042
loss: 0.969020426273346,grad_norm: 0.9999992223156662, iteration: 194043
loss: 0.9834340214729309,grad_norm: 0.9785444975441092, iteration: 194044
loss: 0.9685665369033813,grad_norm: 0.8100840334939508, iteration: 194045
loss: 0.9651454091072083,grad_norm: 0.999999223885677, iteration: 194046
loss: 1.0188100337982178,grad_norm: 0.8295958220773139, iteration: 194047
loss: 0.9778782725334167,grad_norm: 0.8146031086229606, iteration: 194048
loss: 1.0196479558944702,grad_norm: 0.9999989182473882, iteration: 194049
loss: 1.012878179550171,grad_norm: 0.9999992836745046, iteration: 194050
loss: 1.0389901399612427,grad_norm: 0.9999993792669952, iteration: 194051
loss: 0.9878144264221191,grad_norm: 0.9488578006690394, iteration: 194052
loss: 1.0128222703933716,grad_norm: 0.8898765016013801, iteration: 194053
loss: 1.0149070024490356,grad_norm: 0.796520540652264, iteration: 194054
loss: 0.9575633406639099,grad_norm: 0.916393277154993, iteration: 194055
loss: 0.9785745143890381,grad_norm: 0.875991656414297, iteration: 194056
loss: 1.0128027200698853,grad_norm: 0.8538709938807476, iteration: 194057
loss: 1.0141671895980835,grad_norm: 0.8312068239861828, iteration: 194058
loss: 1.0152852535247803,grad_norm: 0.9999991594456068, iteration: 194059
loss: 0.9838284850120544,grad_norm: 0.950647857988831, iteration: 194060
loss: 0.9750014543533325,grad_norm: 0.9642048020613202, iteration: 194061
loss: 1.005116581916809,grad_norm: 0.9999990569605018, iteration: 194062
loss: 1.0049220323562622,grad_norm: 0.9999991328590151, iteration: 194063
loss: 1.0482908487319946,grad_norm: 0.9999995301731234, iteration: 194064
loss: 0.9999985098838806,grad_norm: 0.883549585336639, iteration: 194065
loss: 0.9889518618583679,grad_norm: 0.87779771159896, iteration: 194066
loss: 0.9907605051994324,grad_norm: 0.917897216973364, iteration: 194067
loss: 0.9598885178565979,grad_norm: 0.8522656570577751, iteration: 194068
loss: 1.0021060705184937,grad_norm: 0.9933622909179759, iteration: 194069
loss: 0.941996693611145,grad_norm: 0.849999547861322, iteration: 194070
loss: 1.0460587739944458,grad_norm: 0.8604304084577976, iteration: 194071
loss: 1.0098767280578613,grad_norm: 0.8623102185095747, iteration: 194072
loss: 0.9938418865203857,grad_norm: 0.9963601578609819, iteration: 194073
loss: 1.2117416858673096,grad_norm: 0.9999998429379672, iteration: 194074
loss: 0.983288049697876,grad_norm: 0.9999991381364387, iteration: 194075
loss: 1.027660846710205,grad_norm: 0.995485802597422, iteration: 194076
loss: 1.0026699304580688,grad_norm: 0.9417973279525592, iteration: 194077
loss: 0.9981443285942078,grad_norm: 0.8914653188367142, iteration: 194078
loss: 0.9945918321609497,grad_norm: 0.9691305831955473, iteration: 194079
loss: 0.9837101697921753,grad_norm: 0.9843185027843872, iteration: 194080
loss: 1.0091699361801147,grad_norm: 0.9999992387084932, iteration: 194081
loss: 1.0316193103790283,grad_norm: 0.992685314236416, iteration: 194082
loss: 1.0025516748428345,grad_norm: 0.8838696020986497, iteration: 194083
loss: 0.9900628328323364,grad_norm: 0.9103946898605095, iteration: 194084
loss: 1.054033637046814,grad_norm: 0.9999995548137232, iteration: 194085
loss: 1.0599626302719116,grad_norm: 0.9297365836017946, iteration: 194086
loss: 1.0644047260284424,grad_norm: 0.999999190395223, iteration: 194087
loss: 1.0075981616973877,grad_norm: 0.9999991939499545, iteration: 194088
loss: 1.013549566268921,grad_norm: 0.9400154243862889, iteration: 194089
loss: 1.0051835775375366,grad_norm: 0.9277227065084578, iteration: 194090
loss: 0.9878426194190979,grad_norm: 0.9070286850338153, iteration: 194091
loss: 0.9635487794876099,grad_norm: 0.9999992075527319, iteration: 194092
loss: 0.9749683141708374,grad_norm: 0.8928575084963282, iteration: 194093
loss: 0.9734782576560974,grad_norm: 0.8949650200511536, iteration: 194094
loss: 1.0023688077926636,grad_norm: 0.9844915037677409, iteration: 194095
loss: 1.0737593173980713,grad_norm: 0.9999993807898594, iteration: 194096
loss: 1.0354993343353271,grad_norm: 0.7628683253982034, iteration: 194097
loss: 1.000046730041504,grad_norm: 0.9098325229810365, iteration: 194098
loss: 0.9476256370544434,grad_norm: 0.9019146375812389, iteration: 194099
loss: 1.006055474281311,grad_norm: 0.9920446612215855, iteration: 194100
loss: 1.0013678073883057,grad_norm: 0.89820614059408, iteration: 194101
loss: 1.0008381605148315,grad_norm: 0.9999991614520509, iteration: 194102
loss: 1.0167487859725952,grad_norm: 0.9999997418551431, iteration: 194103
loss: 1.0167514085769653,grad_norm: 0.9097442445561427, iteration: 194104
loss: 1.044600009918213,grad_norm: 0.9999993323288076, iteration: 194105
loss: 0.9473896026611328,grad_norm: 0.999999184936808, iteration: 194106
loss: 0.9823530316352844,grad_norm: 0.9439565328739062, iteration: 194107
loss: 1.0190049409866333,grad_norm: 0.9999992805368523, iteration: 194108
loss: 1.0353580713272095,grad_norm: 0.9999992934193852, iteration: 194109
loss: 1.0340826511383057,grad_norm: 0.9999990869936605, iteration: 194110
loss: 1.0192029476165771,grad_norm: 0.8114327458581215, iteration: 194111
loss: 1.0037360191345215,grad_norm: 0.9999991616601871, iteration: 194112
loss: 0.9670172333717346,grad_norm: 0.9999997399760239, iteration: 194113
loss: 1.0152003765106201,grad_norm: 0.8809170104375393, iteration: 194114
loss: 1.006164789199829,grad_norm: 0.9667910950980279, iteration: 194115
loss: 0.9994184374809265,grad_norm: 0.9999990539848445, iteration: 194116
loss: 0.991653323173523,grad_norm: 0.9856427976279083, iteration: 194117
loss: 0.9784579277038574,grad_norm: 0.9230504203211893, iteration: 194118
loss: 1.0229852199554443,grad_norm: 0.9085928734712411, iteration: 194119
loss: 1.0116920471191406,grad_norm: 0.8530174447254152, iteration: 194120
loss: 1.0157091617584229,grad_norm: 0.999999070271413, iteration: 194121
loss: 0.9912351369857788,grad_norm: 0.9836589303523791, iteration: 194122
loss: 1.038374900817871,grad_norm: 0.9999989773589985, iteration: 194123
loss: 1.0266221761703491,grad_norm: 0.7963034807014124, iteration: 194124
loss: 0.9733471870422363,grad_norm: 0.962017482624567, iteration: 194125
loss: 0.9982051253318787,grad_norm: 0.9405749725630017, iteration: 194126
loss: 0.9926336407661438,grad_norm: 0.9999991396159925, iteration: 194127
loss: 0.9916414022445679,grad_norm: 0.9536099679635318, iteration: 194128
loss: 1.0541951656341553,grad_norm: 0.9999989706683237, iteration: 194129
loss: 0.9801479578018188,grad_norm: 0.9999991478976846, iteration: 194130
loss: 0.9581823945045471,grad_norm: 0.9992542130075911, iteration: 194131
loss: 0.9846811890602112,grad_norm: 0.9999991012086, iteration: 194132
loss: 0.9947821497917175,grad_norm: 0.9739235931425301, iteration: 194133
loss: 0.9999814033508301,grad_norm: 0.9999989980169834, iteration: 194134
loss: 1.023901104927063,grad_norm: 0.9526464482245769, iteration: 194135
loss: 0.9559157490730286,grad_norm: 0.9999990590327611, iteration: 194136
loss: 0.9780689477920532,grad_norm: 0.8722042556852245, iteration: 194137
loss: 1.0834479331970215,grad_norm: 0.9999993597407983, iteration: 194138
loss: 0.9973577260971069,grad_norm: 0.9201929013446525, iteration: 194139
loss: 0.9672285914421082,grad_norm: 0.9327175250809483, iteration: 194140
loss: 0.947809100151062,grad_norm: 0.999998960403408, iteration: 194141
loss: 1.0578322410583496,grad_norm: 0.8262242467788439, iteration: 194142
loss: 0.9654539823532104,grad_norm: 0.9999992762240641, iteration: 194143
loss: 0.9881181120872498,grad_norm: 0.9447242688439977, iteration: 194144
loss: 1.007604956626892,grad_norm: 0.8512566606067115, iteration: 194145
loss: 1.022814393043518,grad_norm: 0.9899423923097889, iteration: 194146
loss: 1.018947958946228,grad_norm: 0.7890966665560717, iteration: 194147
loss: 0.9999430179595947,grad_norm: 0.9999990751387554, iteration: 194148
loss: 1.0236189365386963,grad_norm: 0.8277749108905039, iteration: 194149
loss: 0.9969810247421265,grad_norm: 0.9234378086244189, iteration: 194150
loss: 1.0088632106781006,grad_norm: 0.9012268488985385, iteration: 194151
loss: 0.9888191819190979,grad_norm: 0.9999992113405859, iteration: 194152
loss: 1.001184344291687,grad_norm: 0.999999232928481, iteration: 194153
loss: 0.9880096316337585,grad_norm: 0.9999990595658637, iteration: 194154
loss: 1.004485011100769,grad_norm: 0.8134031580924233, iteration: 194155
loss: 1.020332932472229,grad_norm: 0.9898106721689008, iteration: 194156
loss: 0.9749631881713867,grad_norm: 0.9111325709003867, iteration: 194157
loss: 0.9884071946144104,grad_norm: 0.9999990471562077, iteration: 194158
loss: 1.035054087638855,grad_norm: 0.9421979550795376, iteration: 194159
loss: 1.0045368671417236,grad_norm: 0.9790418862474289, iteration: 194160
loss: 0.9513638615608215,grad_norm: 0.805250373671566, iteration: 194161
loss: 0.9826943874359131,grad_norm: 0.8838935102330919, iteration: 194162
loss: 1.0001591444015503,grad_norm: 0.9999989842222974, iteration: 194163
loss: 0.9701336622238159,grad_norm: 0.9885733691441021, iteration: 194164
loss: 0.9843717813491821,grad_norm: 0.9117191953936788, iteration: 194165
loss: 1.0182427167892456,grad_norm: 0.9793529263349384, iteration: 194166
loss: 0.9964801073074341,grad_norm: 0.9999990513668126, iteration: 194167
loss: 0.986137330532074,grad_norm: 0.9999991423332569, iteration: 194168
loss: 0.9644713997840881,grad_norm: 0.939890035132482, iteration: 194169
loss: 1.0087692737579346,grad_norm: 0.8913812705485376, iteration: 194170
loss: 1.0516356229782104,grad_norm: 0.9033531520344652, iteration: 194171
loss: 1.019288420677185,grad_norm: 0.8969754691208879, iteration: 194172
loss: 0.9887695908546448,grad_norm: 0.9999991085399038, iteration: 194173
loss: 0.9823629260063171,grad_norm: 0.9125991664096432, iteration: 194174
loss: 0.9686382412910461,grad_norm: 0.9604451214627306, iteration: 194175
loss: 1.0523700714111328,grad_norm: 0.9999994900702954, iteration: 194176
loss: 0.953879177570343,grad_norm: 0.9999991175788074, iteration: 194177
loss: 1.01707923412323,grad_norm: 0.8822196005862857, iteration: 194178
loss: 1.0060101747512817,grad_norm: 0.9486913845300571, iteration: 194179
loss: 0.9756631255149841,grad_norm: 0.8061159529578756, iteration: 194180
loss: 0.9903274178504944,grad_norm: 0.8111074413413879, iteration: 194181
loss: 1.0040456056594849,grad_norm: 0.9999989721182747, iteration: 194182
loss: 1.0146154165267944,grad_norm: 0.8652063002940003, iteration: 194183
loss: 1.0307502746582031,grad_norm: 0.9999990615361366, iteration: 194184
loss: 1.006555199623108,grad_norm: 0.9065141961542409, iteration: 194185
loss: 1.037152647972107,grad_norm: 0.9416607674683236, iteration: 194186
loss: 0.9955169558525085,grad_norm: 0.8816801198676678, iteration: 194187
loss: 0.9733604788780212,grad_norm: 0.9216077746949379, iteration: 194188
loss: 1.0034137964248657,grad_norm: 0.8939281353732543, iteration: 194189
loss: 0.9945815801620483,grad_norm: 0.9999991073663916, iteration: 194190
loss: 0.9882405996322632,grad_norm: 0.8050396869961312, iteration: 194191
loss: 1.0140910148620605,grad_norm: 0.9999993299751476, iteration: 194192
loss: 0.9839655160903931,grad_norm: 0.9999992662946001, iteration: 194193
loss: 0.9934689402580261,grad_norm: 0.9999990064982429, iteration: 194194
loss: 1.000962257385254,grad_norm: 0.9999989767645794, iteration: 194195
loss: 1.1983505487442017,grad_norm: 0.9999998065987447, iteration: 194196
loss: 1.0003712177276611,grad_norm: 0.9760575959129479, iteration: 194197
loss: 1.0254713296890259,grad_norm: 0.9999992419584713, iteration: 194198
loss: 1.0227330923080444,grad_norm: 0.8103424296263196, iteration: 194199
loss: 1.017795443534851,grad_norm: 0.9999992465950596, iteration: 194200
loss: 1.083711862564087,grad_norm: 0.9999995213857183, iteration: 194201
loss: 0.9913341403007507,grad_norm: 0.99999906085906, iteration: 194202
loss: 1.034407377243042,grad_norm: 0.9071699370466922, iteration: 194203
loss: 1.030116081237793,grad_norm: 0.9999990837507111, iteration: 194204
loss: 1.017957091331482,grad_norm: 0.9999990836732345, iteration: 194205
loss: 0.9676619172096252,grad_norm: 0.9390394412830062, iteration: 194206
loss: 0.9711145162582397,grad_norm: 0.8829784532249664, iteration: 194207
loss: 0.9908179640769958,grad_norm: 0.9999997663236824, iteration: 194208
loss: 0.990673840045929,grad_norm: 0.9052504767107681, iteration: 194209
loss: 0.9837448000907898,grad_norm: 0.9615756080010633, iteration: 194210
loss: 1.0119128227233887,grad_norm: 0.9999990099640965, iteration: 194211
loss: 0.9732359051704407,grad_norm: 0.9999991535778536, iteration: 194212
loss: 1.0211877822875977,grad_norm: 0.9999992270648735, iteration: 194213
loss: 1.022896409034729,grad_norm: 0.9999991094746077, iteration: 194214
loss: 1.0498517751693726,grad_norm: 0.9177059111043836, iteration: 194215
loss: 1.0037376880645752,grad_norm: 0.8779947675376634, iteration: 194216
loss: 1.000934362411499,grad_norm: 0.8218180651357472, iteration: 194217
loss: 1.018399715423584,grad_norm: 0.9357403106838273, iteration: 194218
loss: 1.000058889389038,grad_norm: 0.8709811522121639, iteration: 194219
loss: 0.9977284669876099,grad_norm: 0.8245373691728894, iteration: 194220
loss: 0.9773023128509521,grad_norm: 0.9999994761774762, iteration: 194221
loss: 0.9742906093597412,grad_norm: 0.8388129739835416, iteration: 194222
loss: 1.0281286239624023,grad_norm: 0.9999992790498214, iteration: 194223
loss: 0.9935609698295593,grad_norm: 0.9999994431767446, iteration: 194224
loss: 0.9801493287086487,grad_norm: 0.9350925825429552, iteration: 194225
loss: 0.9767882823944092,grad_norm: 0.7635462467852334, iteration: 194226
loss: 1.0412566661834717,grad_norm: 0.8706880811350053, iteration: 194227
loss: 1.0217888355255127,grad_norm: 0.9999991995964088, iteration: 194228
loss: 0.9909906387329102,grad_norm: 0.9522055018668748, iteration: 194229
loss: 1.201624870300293,grad_norm: 0.9999999055372442, iteration: 194230
loss: 1.0115817785263062,grad_norm: 0.9346559510591698, iteration: 194231
loss: 1.088191032409668,grad_norm: 0.9999997030033164, iteration: 194232
loss: 0.9984124302864075,grad_norm: 0.8568180899542782, iteration: 194233
loss: 1.010638952255249,grad_norm: 0.9214692247629566, iteration: 194234
loss: 1.0005594491958618,grad_norm: 0.9020868399712949, iteration: 194235
loss: 0.9719266295433044,grad_norm: 0.9999991145087738, iteration: 194236
loss: 1.0290677547454834,grad_norm: 0.9999991050254612, iteration: 194237
loss: 1.0380072593688965,grad_norm: 0.9999998838156183, iteration: 194238
loss: 1.002530813217163,grad_norm: 0.8313573950792076, iteration: 194239
loss: 1.010598063468933,grad_norm: 0.9999998569486492, iteration: 194240
loss: 0.979626476764679,grad_norm: 0.8031860757619687, iteration: 194241
loss: 0.9976213574409485,grad_norm: 0.999999018278842, iteration: 194242
loss: 0.9889850616455078,grad_norm: 0.834259578469589, iteration: 194243
loss: 1.1319581270217896,grad_norm: 0.9999998511933802, iteration: 194244
loss: 0.9783018231391907,grad_norm: 0.8950415106433945, iteration: 194245
loss: 0.9974470138549805,grad_norm: 0.999999316744361, iteration: 194246
loss: 1.0349372625350952,grad_norm: 0.9999995110346027, iteration: 194247
loss: 0.9954923987388611,grad_norm: 0.8400490722023747, iteration: 194248
loss: 0.9752147197723389,grad_norm: 0.8752289180133157, iteration: 194249
loss: 1.0246384143829346,grad_norm: 0.9999991648204978, iteration: 194250
loss: 0.9936079382896423,grad_norm: 0.8433259200388885, iteration: 194251
loss: 0.9933200478553772,grad_norm: 0.988970547054478, iteration: 194252
loss: 1.042099952697754,grad_norm: 0.9884910555971365, iteration: 194253
loss: 1.017974615097046,grad_norm: 0.9253826712024588, iteration: 194254
loss: 0.9929880499839783,grad_norm: 0.9375573931797678, iteration: 194255
loss: 0.9941099286079407,grad_norm: 0.9423849491774946, iteration: 194256
loss: 0.9914975166320801,grad_norm: 0.9581123201377558, iteration: 194257
loss: 1.0229392051696777,grad_norm: 0.9999991041275543, iteration: 194258
loss: 0.9770349860191345,grad_norm: 0.9736123718214185, iteration: 194259
loss: 0.9878070950508118,grad_norm: 0.9661709340892332, iteration: 194260
loss: 1.0146269798278809,grad_norm: 0.8852234004851844, iteration: 194261
loss: 0.9897248148918152,grad_norm: 0.87295739029569, iteration: 194262
loss: 0.9925872087478638,grad_norm: 0.9999990464833738, iteration: 194263
loss: 1.0581945180892944,grad_norm: 0.9999997031959742, iteration: 194264
loss: 0.9972610473632812,grad_norm: 0.9999997397456778, iteration: 194265
loss: 0.9935885071754456,grad_norm: 0.9999995365551688, iteration: 194266
loss: 1.028037428855896,grad_norm: 0.8477692581447576, iteration: 194267
loss: 1.0069324970245361,grad_norm: 0.999999210617355, iteration: 194268
loss: 0.9775164127349854,grad_norm: 0.9999991185521254, iteration: 194269
loss: 0.9695374965667725,grad_norm: 0.9999991686851174, iteration: 194270
loss: 1.0182108879089355,grad_norm: 0.9491459400197733, iteration: 194271
loss: 1.0086331367492676,grad_norm: 0.9999992172346165, iteration: 194272
loss: 1.0488548278808594,grad_norm: 0.9999998773599337, iteration: 194273
loss: 1.0009875297546387,grad_norm: 0.8541088072881589, iteration: 194274
loss: 1.0635477304458618,grad_norm: 0.9999992113182115, iteration: 194275
loss: 0.9910577535629272,grad_norm: 0.9807369249091256, iteration: 194276
loss: 1.029619574546814,grad_norm: 0.9999989929791693, iteration: 194277
loss: 1.0160983800888062,grad_norm: 0.9074307962128281, iteration: 194278
loss: 1.0030509233474731,grad_norm: 0.9999991739618052, iteration: 194279
loss: 1.0225530862808228,grad_norm: 0.8483749992328325, iteration: 194280
loss: 0.9858918190002441,grad_norm: 0.9642384861383418, iteration: 194281
loss: 1.0138981342315674,grad_norm: 0.9999993363722401, iteration: 194282
loss: 1.0024574995040894,grad_norm: 0.9999992711591592, iteration: 194283
loss: 0.9827424883842468,grad_norm: 0.9310718917748348, iteration: 194284
loss: 1.0273387432098389,grad_norm: 0.9480238317356793, iteration: 194285
loss: 0.9883309602737427,grad_norm: 0.9999993008774628, iteration: 194286
loss: 0.9829967617988586,grad_norm: 0.9326329680046771, iteration: 194287
loss: 0.9862127900123596,grad_norm: 0.9950278860564354, iteration: 194288
loss: 1.0455636978149414,grad_norm: 0.9305961881174525, iteration: 194289
loss: 1.0114673376083374,grad_norm: 0.9312563319832478, iteration: 194290
loss: 1.0156277418136597,grad_norm: 0.999999366993752, iteration: 194291
loss: 0.9851793050765991,grad_norm: 0.8517222372236919, iteration: 194292
loss: 1.0451078414916992,grad_norm: 0.9999998151044754, iteration: 194293
loss: 0.9850773811340332,grad_norm: 0.892284227831318, iteration: 194294
loss: 0.9891181588172913,grad_norm: 0.8026379546946821, iteration: 194295
loss: 0.9751183986663818,grad_norm: 0.9999991743299529, iteration: 194296
loss: 1.039689302444458,grad_norm: 0.9999991924732727, iteration: 194297
loss: 1.0240331888198853,grad_norm: 0.959408258913409, iteration: 194298
loss: 1.0004630088806152,grad_norm: 0.8801132777247886, iteration: 194299
loss: 1.009152889251709,grad_norm: 0.9590668703928615, iteration: 194300
loss: 1.0049322843551636,grad_norm: 0.8061910063804961, iteration: 194301
loss: 0.9866061806678772,grad_norm: 0.9999989339594765, iteration: 194302
loss: 1.0102252960205078,grad_norm: 0.9698026326231233, iteration: 194303
loss: 1.0052812099456787,grad_norm: 0.8622590725554886, iteration: 194304
loss: 0.9935674071311951,grad_norm: 0.9999990671844093, iteration: 194305
loss: 0.9841683506965637,grad_norm: 0.8801854606747437, iteration: 194306
loss: 1.022621989250183,grad_norm: 0.8862122468102478, iteration: 194307
loss: 0.9751342535018921,grad_norm: 0.9999994346492309, iteration: 194308
loss: 1.0188533067703247,grad_norm: 0.8616426033424904, iteration: 194309
loss: 1.015474796295166,grad_norm: 0.8226571415092725, iteration: 194310
loss: 1.010995626449585,grad_norm: 0.944597634558535, iteration: 194311
loss: 1.009034276008606,grad_norm: 0.9999990562080547, iteration: 194312
loss: 1.0000402927398682,grad_norm: 0.9999998583255486, iteration: 194313
loss: 1.019618034362793,grad_norm: 0.8484329179106131, iteration: 194314
loss: 1.028457522392273,grad_norm: 0.8988761852626946, iteration: 194315
loss: 1.0012037754058838,grad_norm: 0.9123588865130836, iteration: 194316
loss: 1.0480161905288696,grad_norm: 0.9999994214648482, iteration: 194317
loss: 0.9938479065895081,grad_norm: 0.9999995003153821, iteration: 194318
loss: 1.0136317014694214,grad_norm: 0.8060347365502118, iteration: 194319
loss: 0.9915786981582642,grad_norm: 0.9419515198455007, iteration: 194320
loss: 0.9970247745513916,grad_norm: 0.8836725610249132, iteration: 194321
loss: 1.0165069103240967,grad_norm: 0.9999993048259997, iteration: 194322
loss: 1.0028960704803467,grad_norm: 0.8402736783947496, iteration: 194323
loss: 1.0049171447753906,grad_norm: 0.8770949202740904, iteration: 194324
loss: 1.0576350688934326,grad_norm: 0.999999104904132, iteration: 194325
loss: 1.0226131677627563,grad_norm: 0.9999991654084713, iteration: 194326
loss: 1.0095088481903076,grad_norm: 0.999999106387336, iteration: 194327
loss: 1.031840205192566,grad_norm: 0.9503331359778682, iteration: 194328
loss: 1.027336835861206,grad_norm: 0.9999991513296345, iteration: 194329
loss: 0.982931911945343,grad_norm: 0.9558069576078976, iteration: 194330
loss: 1.0362327098846436,grad_norm: 0.9999993922123833, iteration: 194331
loss: 0.996534526348114,grad_norm: 0.9530793561352191, iteration: 194332
loss: 1.01657235622406,grad_norm: 0.9999995322924502, iteration: 194333
loss: 1.0509980916976929,grad_norm: 0.9591525560050448, iteration: 194334
loss: 0.9739399552345276,grad_norm: 0.9999995400880952, iteration: 194335
loss: 1.0336109399795532,grad_norm: 0.9999993788610093, iteration: 194336
loss: 0.975864827632904,grad_norm: 0.9999991868150089, iteration: 194337
loss: 0.9925152659416199,grad_norm: 0.9705973628578096, iteration: 194338
loss: 0.9876419305801392,grad_norm: 0.9437081990981977, iteration: 194339
loss: 0.9926006197929382,grad_norm: 0.8165481315801643, iteration: 194340
loss: 1.0331251621246338,grad_norm: 0.780698086368818, iteration: 194341
loss: 1.0155417919158936,grad_norm: 0.8143333702364436, iteration: 194342
loss: 0.9482008218765259,grad_norm: 0.9942086995328832, iteration: 194343
loss: 0.9647191166877747,grad_norm: 0.999999262234061, iteration: 194344
loss: 0.9711988568305969,grad_norm: 0.9263338940769928, iteration: 194345
loss: 0.9935912489891052,grad_norm: 0.9605167017230842, iteration: 194346
loss: 1.0394214391708374,grad_norm: 0.9999994567888785, iteration: 194347
loss: 1.0242969989776611,grad_norm: 0.9999990578995156, iteration: 194348
loss: 0.9690298438072205,grad_norm: 0.9999989915477534, iteration: 194349
loss: 1.0084400177001953,grad_norm: 0.9485133819850438, iteration: 194350
loss: 0.986997127532959,grad_norm: 0.9999990752888953, iteration: 194351
loss: 0.9923732280731201,grad_norm: 0.9999990602158517, iteration: 194352
loss: 1.014238715171814,grad_norm: 0.9999999914374987, iteration: 194353
loss: 0.9987096786499023,grad_norm: 0.9999991862510366, iteration: 194354
loss: 0.9928324818611145,grad_norm: 0.9122196053166887, iteration: 194355
loss: 1.0221959352493286,grad_norm: 0.9193741946371625, iteration: 194356
loss: 0.9968211650848389,grad_norm: 0.8776487124438528, iteration: 194357
loss: 1.050206184387207,grad_norm: 0.9999994426955394, iteration: 194358
loss: 0.9816860556602478,grad_norm: 0.9999989991785704, iteration: 194359
loss: 1.0322587490081787,grad_norm: 0.9999995308269174, iteration: 194360
loss: 0.9954150915145874,grad_norm: 0.9999989703640655, iteration: 194361
loss: 1.0081794261932373,grad_norm: 0.9532690053261473, iteration: 194362
loss: 0.9691393375396729,grad_norm: 0.9028073692635478, iteration: 194363
loss: 1.022376537322998,grad_norm: 0.9999991275543433, iteration: 194364
loss: 1.004935622215271,grad_norm: 0.999999039358515, iteration: 194365
loss: 1.007378339767456,grad_norm: 0.8896971119558693, iteration: 194366
loss: 1.0088021755218506,grad_norm: 0.9509440817907603, iteration: 194367
loss: 0.9720527529716492,grad_norm: 0.9999991757886137, iteration: 194368
loss: 1.0241570472717285,grad_norm: 0.9999991572353173, iteration: 194369
loss: 0.9924538135528564,grad_norm: 0.933055658424377, iteration: 194370
loss: 0.9953779578208923,grad_norm: 0.9999989921607783, iteration: 194371
loss: 1.0033763647079468,grad_norm: 0.9999992932387466, iteration: 194372
loss: 1.0068447589874268,grad_norm: 0.9536214836676474, iteration: 194373
loss: 1.0419992208480835,grad_norm: 0.999999266969748, iteration: 194374
loss: 1.0080164670944214,grad_norm: 0.9107192355782527, iteration: 194375
loss: 1.0189236402511597,grad_norm: 0.9999989333931408, iteration: 194376
loss: 0.9731388688087463,grad_norm: 0.8989685956993589, iteration: 194377
loss: 1.0176372528076172,grad_norm: 0.9263198765485272, iteration: 194378
loss: 1.0074999332427979,grad_norm: 0.9999993231116174, iteration: 194379
loss: 1.0010507106781006,grad_norm: 0.925855343073654, iteration: 194380
loss: 0.9560519456863403,grad_norm: 0.9999991748052469, iteration: 194381
loss: 1.0004310607910156,grad_norm: 0.8687868182858416, iteration: 194382
loss: 1.0084120035171509,grad_norm: 0.9539942945671239, iteration: 194383
loss: 1.0065306425094604,grad_norm: 0.9999993087991484, iteration: 194384
loss: 1.0148309469223022,grad_norm: 0.8747177939191224, iteration: 194385
loss: 1.0048604011535645,grad_norm: 0.9059909041984697, iteration: 194386
loss: 1.1784435510635376,grad_norm: 0.9999991355108617, iteration: 194387
loss: 0.985007643699646,grad_norm: 0.7908778354342694, iteration: 194388
loss: 1.0047433376312256,grad_norm: 0.9307931131307469, iteration: 194389
loss: 1.0365259647369385,grad_norm: 0.9293717359280684, iteration: 194390
loss: 0.9979824423789978,grad_norm: 0.9818145233937307, iteration: 194391
loss: 0.9618330597877502,grad_norm: 0.9999990446863981, iteration: 194392
loss: 1.023833155632019,grad_norm: 0.9999998588885615, iteration: 194393
loss: 0.9619028568267822,grad_norm: 0.9026733569827656, iteration: 194394
loss: 1.0065195560455322,grad_norm: 0.9999991330256479, iteration: 194395
loss: 0.9683936238288879,grad_norm: 0.9999991305366321, iteration: 194396
loss: 0.9729030132293701,grad_norm: 0.9432464145026797, iteration: 194397
loss: 0.9946216940879822,grad_norm: 0.820381380817218, iteration: 194398
loss: 1.062173843383789,grad_norm: 0.9999998981686933, iteration: 194399
loss: 1.0075715780258179,grad_norm: 0.944664557937742, iteration: 194400
loss: 1.0126280784606934,grad_norm: 0.8343060444417862, iteration: 194401
loss: 1.0229439735412598,grad_norm: 0.8776469544477045, iteration: 194402
loss: 1.0439410209655762,grad_norm: 0.9999991772434922, iteration: 194403
loss: 0.9767979383468628,grad_norm: 0.7889549018452752, iteration: 194404
loss: 1.0175291299819946,grad_norm: 0.981207161058537, iteration: 194405
loss: 0.987617552280426,grad_norm: 0.8470140568820156, iteration: 194406
loss: 1.0228110551834106,grad_norm: 0.9999995917596701, iteration: 194407
loss: 1.0062364339828491,grad_norm: 0.9999991068232382, iteration: 194408
loss: 0.9813534021377563,grad_norm: 0.9999991505723496, iteration: 194409
loss: 1.0473942756652832,grad_norm: 0.9999993518234034, iteration: 194410
loss: 1.0138508081436157,grad_norm: 0.9881890425723863, iteration: 194411
loss: 1.0129107236862183,grad_norm: 0.9630108803092431, iteration: 194412
loss: 1.0051467418670654,grad_norm: 0.9999990228447879, iteration: 194413
loss: 0.9761198163032532,grad_norm: 0.8988828438973703, iteration: 194414
loss: 1.0285913944244385,grad_norm: 0.8024540399476308, iteration: 194415
loss: 0.9873418211936951,grad_norm: 0.8778966611417072, iteration: 194416
loss: 0.9752731323242188,grad_norm: 0.9488991479696901, iteration: 194417
loss: 1.008223056793213,grad_norm: 0.989014011825112, iteration: 194418
loss: 0.9910791516304016,grad_norm: 0.9239249267514191, iteration: 194419
loss: 1.009966254234314,grad_norm: 0.9999990324322282, iteration: 194420
loss: 1.00886869430542,grad_norm: 0.9439812125422876, iteration: 194421
loss: 0.9790346622467041,grad_norm: 0.9307767757223079, iteration: 194422
loss: 0.9982627630233765,grad_norm: 0.9999799527346905, iteration: 194423
loss: 0.975003719329834,grad_norm: 0.9320709013788282, iteration: 194424
loss: 0.9804940819740295,grad_norm: 0.9049206318810228, iteration: 194425
loss: 0.9937652349472046,grad_norm: 0.9999991198100965, iteration: 194426
loss: 0.9479208588600159,grad_norm: 0.9434208753584219, iteration: 194427
loss: 0.9777901768684387,grad_norm: 0.9999992153768047, iteration: 194428
loss: 0.9687047004699707,grad_norm: 0.8557179180965818, iteration: 194429
loss: 1.0231138467788696,grad_norm: 0.9999992895466794, iteration: 194430
loss: 0.9951547980308533,grad_norm: 0.8431650357221089, iteration: 194431
loss: 0.9736032485961914,grad_norm: 0.9478646263451254, iteration: 194432
loss: 1.1010364294052124,grad_norm: 0.999999352825582, iteration: 194433
loss: 0.9954010248184204,grad_norm: 0.8318048282389612, iteration: 194434
loss: 0.9754287600517273,grad_norm: 0.8132862829167886, iteration: 194435
loss: 0.9766983389854431,grad_norm: 0.9331322882491846, iteration: 194436
loss: 1.0669111013412476,grad_norm: 0.9999998927062923, iteration: 194437
loss: 1.0131996870040894,grad_norm: 0.9243314140741752, iteration: 194438
loss: 0.9808319807052612,grad_norm: 0.9999993094814379, iteration: 194439
loss: 0.9984428882598877,grad_norm: 0.903861349669827, iteration: 194440
loss: 1.0262649059295654,grad_norm: 0.9999990581652968, iteration: 194441
loss: 0.9795674085617065,grad_norm: 0.9999990781336809, iteration: 194442
loss: 1.0560377836227417,grad_norm: 0.999999011521694, iteration: 194443
loss: 0.9989138841629028,grad_norm: 0.9708338591121902, iteration: 194444
loss: 0.9853073358535767,grad_norm: 0.9611517341034231, iteration: 194445
loss: 0.9854627847671509,grad_norm: 0.999999093243748, iteration: 194446
loss: 0.9994926452636719,grad_norm: 0.9162113135045804, iteration: 194447
loss: 1.011641263961792,grad_norm: 0.9999990271963116, iteration: 194448
loss: 0.9991427063941956,grad_norm: 0.854495149088131, iteration: 194449
loss: 0.9697450995445251,grad_norm: 0.9646167949236362, iteration: 194450
loss: 0.9648509621620178,grad_norm: 0.9701235528835603, iteration: 194451
loss: 0.9961851239204407,grad_norm: 0.9657794398920595, iteration: 194452
loss: 0.9660552144050598,grad_norm: 0.963629865162623, iteration: 194453
loss: 1.0114519596099854,grad_norm: 0.9722361027519428, iteration: 194454
loss: 1.0261133909225464,grad_norm: 0.9999989992942232, iteration: 194455
loss: 0.9991331696510315,grad_norm: 0.9404690441026331, iteration: 194456
loss: 1.1007585525512695,grad_norm: 0.9999993085166317, iteration: 194457
loss: 1.004902720451355,grad_norm: 0.9999991470047729, iteration: 194458
loss: 1.0009469985961914,grad_norm: 0.9159889915269009, iteration: 194459
loss: 0.9773547053337097,grad_norm: 0.9937518964990653, iteration: 194460
loss: 0.9678008556365967,grad_norm: 0.8591367682419058, iteration: 194461
loss: 1.0324980020523071,grad_norm: 0.9999990998007258, iteration: 194462
loss: 1.090849757194519,grad_norm: 0.9999991248114196, iteration: 194463
loss: 1.026667833328247,grad_norm: 0.9428255663834991, iteration: 194464
loss: 0.9947774410247803,grad_norm: 0.9999990691751096, iteration: 194465
loss: 0.9794492721557617,grad_norm: 0.9999996366262166, iteration: 194466
loss: 1.0066125392913818,grad_norm: 0.8556624064874383, iteration: 194467
loss: 1.0030016899108887,grad_norm: 0.9727674578711115, iteration: 194468
loss: 0.987260639667511,grad_norm: 0.9505045774009304, iteration: 194469
loss: 1.013648509979248,grad_norm: 0.941644086693353, iteration: 194470
loss: 1.030373454093933,grad_norm: 0.8738632457880166, iteration: 194471
loss: 0.9955236315727234,grad_norm: 0.9111572967785209, iteration: 194472
loss: 0.9516652226448059,grad_norm: 0.9999989967637152, iteration: 194473
loss: 1.0250426530838013,grad_norm: 0.999999080110997, iteration: 194474
loss: 0.9776238799095154,grad_norm: 0.9411173337967873, iteration: 194475
loss: 0.99465411901474,grad_norm: 0.8228616490831743, iteration: 194476
loss: 1.0138983726501465,grad_norm: 0.9530095132023024, iteration: 194477
loss: 0.9597935080528259,grad_norm: 0.9999997870192766, iteration: 194478
loss: 0.9909504055976868,grad_norm: 0.9999990708272596, iteration: 194479
loss: 1.0097626447677612,grad_norm: 0.9926701189741167, iteration: 194480
loss: 0.9708331823348999,grad_norm: 0.9999990523723233, iteration: 194481
loss: 0.9952223300933838,grad_norm: 0.8418142845975725, iteration: 194482
loss: 0.9818389415740967,grad_norm: 0.9999991925696219, iteration: 194483
loss: 1.0087968111038208,grad_norm: 0.7747523766122987, iteration: 194484
loss: 0.9884535670280457,grad_norm: 0.9999991193231655, iteration: 194485
loss: 0.9854863882064819,grad_norm: 0.9999990807608161, iteration: 194486
loss: 1.0122195482254028,grad_norm: 0.9999992459451241, iteration: 194487
loss: 1.0326799154281616,grad_norm: 0.8853808692414907, iteration: 194488
loss: 1.004989743232727,grad_norm: 0.8991875928913133, iteration: 194489
loss: 1.0239531993865967,grad_norm: 0.999999274895218, iteration: 194490
loss: 1.0131150484085083,grad_norm: 0.9273549265484765, iteration: 194491
loss: 1.042840600013733,grad_norm: 0.9999994326015318, iteration: 194492
loss: 1.0143470764160156,grad_norm: 0.9327378169213842, iteration: 194493
loss: 1.004407286643982,grad_norm: 0.9274674956044475, iteration: 194494
loss: 0.9789836406707764,grad_norm: 0.9999991670330027, iteration: 194495
loss: 1.0129199028015137,grad_norm: 0.9952739988876671, iteration: 194496
loss: 0.9984450340270996,grad_norm: 0.9999993127082889, iteration: 194497
loss: 1.0282200574874878,grad_norm: 0.9999994030089703, iteration: 194498
loss: 1.0018765926361084,grad_norm: 0.9318901295979601, iteration: 194499
loss: 0.9871631264686584,grad_norm: 0.8158062877079745, iteration: 194500
loss: 1.02577805519104,grad_norm: 0.8180664636911295, iteration: 194501
loss: 0.975031852722168,grad_norm: 0.9999991191236026, iteration: 194502
loss: 1.007535696029663,grad_norm: 0.9576686908182277, iteration: 194503
loss: 1.0001220703125,grad_norm: 0.9999990828869173, iteration: 194504
loss: 1.0117186307907104,grad_norm: 0.9447607748816119, iteration: 194505
loss: 0.9853015542030334,grad_norm: 0.999999019666035, iteration: 194506
loss: 0.9943662881851196,grad_norm: 0.9614928692681385, iteration: 194507
loss: 0.991769015789032,grad_norm: 0.9999991089393458, iteration: 194508
loss: 1.0231825113296509,grad_norm: 0.9999991706428963, iteration: 194509
loss: 1.1261111497879028,grad_norm: 0.9999994131521306, iteration: 194510
loss: 0.9765769243240356,grad_norm: 0.9999990185125092, iteration: 194511
loss: 1.005814790725708,grad_norm: 0.9334621326973229, iteration: 194512
loss: 1.0077260732650757,grad_norm: 0.9591841689032968, iteration: 194513
loss: 0.9983950257301331,grad_norm: 0.961928934580431, iteration: 194514
loss: 0.9854013919830322,grad_norm: 0.949064662163535, iteration: 194515
loss: 1.0342868566513062,grad_norm: 0.9999990193889453, iteration: 194516
loss: 1.0290946960449219,grad_norm: 0.9608668471755772, iteration: 194517
loss: 0.9940802454948425,grad_norm: 0.7852607814836298, iteration: 194518
loss: 1.0546478033065796,grad_norm: 0.9999992961425487, iteration: 194519
loss: 1.0078366994857788,grad_norm: 0.9509417303654613, iteration: 194520
loss: 0.9925718903541565,grad_norm: 0.9999992343686717, iteration: 194521
loss: 0.989896833896637,grad_norm: 0.9999991477917676, iteration: 194522
loss: 0.9989408254623413,grad_norm: 0.9999991281734879, iteration: 194523
loss: 1.0202897787094116,grad_norm: 0.8808183567449549, iteration: 194524
loss: 0.9779140949249268,grad_norm: 0.8591908617075648, iteration: 194525
loss: 0.9930370450019836,grad_norm: 0.9999991764566696, iteration: 194526
loss: 0.9951702952384949,grad_norm: 0.781311490642136, iteration: 194527
loss: 1.091873288154602,grad_norm: 0.9999995075952612, iteration: 194528
loss: 0.9556760191917419,grad_norm: 0.9850006551677127, iteration: 194529
loss: 1.0078563690185547,grad_norm: 0.9999989834194738, iteration: 194530
loss: 0.9972127079963684,grad_norm: 0.9852544474116632, iteration: 194531
loss: 1.0179256200790405,grad_norm: 0.8881167543743257, iteration: 194532
loss: 1.0167776346206665,grad_norm: 0.8308327655216412, iteration: 194533
loss: 1.0179497003555298,grad_norm: 0.9999991440775936, iteration: 194534
loss: 1.0035194158554077,grad_norm: 0.9601295213808905, iteration: 194535
loss: 1.0200660228729248,grad_norm: 0.9999990216594312, iteration: 194536
loss: 0.9823000431060791,grad_norm: 0.9139087632758703, iteration: 194537
loss: 1.2072142362594604,grad_norm: 0.9999997468643351, iteration: 194538
loss: 0.9714652299880981,grad_norm: 0.9666884259390525, iteration: 194539
loss: 0.9751414656639099,grad_norm: 0.9675817329698627, iteration: 194540
loss: 1.0883300304412842,grad_norm: 0.9999990802388652, iteration: 194541
loss: 1.0075668096542358,grad_norm: 0.9999991732824575, iteration: 194542
loss: 0.9774706959724426,grad_norm: 0.9740084800322787, iteration: 194543
loss: 0.9739053249359131,grad_norm: 0.9999991983886678, iteration: 194544
loss: 1.0309826135635376,grad_norm: 0.9999991005883321, iteration: 194545
loss: 0.9784994721412659,grad_norm: 0.9999990415134468, iteration: 194546
loss: 0.9724100828170776,grad_norm: 0.9479379173114929, iteration: 194547
loss: 1.0012710094451904,grad_norm: 0.9532795637733086, iteration: 194548
loss: 1.1072951555252075,grad_norm: 0.999999537893565, iteration: 194549
loss: 1.0059281587600708,grad_norm: 0.9999991193360142, iteration: 194550
loss: 0.9620360136032104,grad_norm: 0.9999991137766079, iteration: 194551
loss: 1.0127359628677368,grad_norm: 0.9999990502274356, iteration: 194552
loss: 1.0259578227996826,grad_norm: 0.9999994467158326, iteration: 194553
loss: 1.022644281387329,grad_norm: 0.9715906712378728, iteration: 194554
loss: 1.1841613054275513,grad_norm: 0.9999994354035361, iteration: 194555
loss: 1.0559556484222412,grad_norm: 0.9999990736335372, iteration: 194556
loss: 1.121667504310608,grad_norm: 0.9999991227401216, iteration: 194557
loss: 0.9556775093078613,grad_norm: 0.9228047745137143, iteration: 194558
loss: 1.0003196001052856,grad_norm: 0.9999998229457876, iteration: 194559
loss: 1.0760319232940674,grad_norm: 0.9939403268885437, iteration: 194560
loss: 0.9906567931175232,grad_norm: 0.9999993565191918, iteration: 194561
loss: 0.9504572749137878,grad_norm: 0.9380613590425017, iteration: 194562
loss: 0.9782807230949402,grad_norm: 0.9599815410924937, iteration: 194563
loss: 1.0292294025421143,grad_norm: 0.8188269895387577, iteration: 194564
loss: 1.0116629600524902,grad_norm: 0.9239324234609406, iteration: 194565
loss: 1.0689295530319214,grad_norm: 0.9999989776269713, iteration: 194566
loss: 1.0997847318649292,grad_norm: 0.9999999461240291, iteration: 194567
loss: 0.9837803244590759,grad_norm: 0.9999990227646339, iteration: 194568
loss: 0.989955723285675,grad_norm: 0.9999991368528515, iteration: 194569
loss: 1.014906644821167,grad_norm: 0.8024339410809934, iteration: 194570
loss: 1.0105090141296387,grad_norm: 0.9999996085648157, iteration: 194571
loss: 1.0113029479980469,grad_norm: 0.9999991707517311, iteration: 194572
loss: 1.0673421621322632,grad_norm: 0.999999205469907, iteration: 194573
loss: 0.9989936947822571,grad_norm: 0.9999992834821447, iteration: 194574
loss: 0.9984617233276367,grad_norm: 0.999999853887555, iteration: 194575
loss: 1.140140175819397,grad_norm: 0.999999411861946, iteration: 194576
loss: 0.9879080057144165,grad_norm: 0.9606678910453026, iteration: 194577
loss: 0.9941707849502563,grad_norm: 0.8946363359665147, iteration: 194578
loss: 1.0756449699401855,grad_norm: 0.9999991708718846, iteration: 194579
loss: 1.003659963607788,grad_norm: 0.9087394338811431, iteration: 194580
loss: 1.0093766450881958,grad_norm: 0.9999990417216522, iteration: 194581
loss: 1.0340602397918701,grad_norm: 0.9999990336781173, iteration: 194582
loss: 1.0400621891021729,grad_norm: 0.719133281700013, iteration: 194583
loss: 1.0391923189163208,grad_norm: 0.99999917894732, iteration: 194584
loss: 1.0414267778396606,grad_norm: 0.9999993222638744, iteration: 194585
loss: 1.0308399200439453,grad_norm: 0.8827128152528056, iteration: 194586
loss: 0.9904630780220032,grad_norm: 0.9322385292565681, iteration: 194587
loss: 1.037985920906067,grad_norm: 0.9999993504188475, iteration: 194588
loss: 1.0099753141403198,grad_norm: 0.8783341341939631, iteration: 194589
loss: 1.0592873096466064,grad_norm: 0.9999990754707047, iteration: 194590
loss: 0.9899420142173767,grad_norm: 0.909183658756886, iteration: 194591
loss: 1.0484426021575928,grad_norm: 0.9999991681695651, iteration: 194592
loss: 1.0214706659317017,grad_norm: 0.8888065753398359, iteration: 194593
loss: 1.1590137481689453,grad_norm: 0.9999994755522726, iteration: 194594
loss: 1.0175132751464844,grad_norm: 0.9999991701322346, iteration: 194595
loss: 1.0066384077072144,grad_norm: 0.9999990567679755, iteration: 194596
loss: 0.9860880374908447,grad_norm: 0.980821140929362, iteration: 194597
loss: 0.979127049446106,grad_norm: 0.9282420786458291, iteration: 194598
loss: 0.984979510307312,grad_norm: 0.8556829490849986, iteration: 194599
loss: 0.9986019134521484,grad_norm: 0.9999993657546915, iteration: 194600
loss: 0.9905169010162354,grad_norm: 0.9235740058654484, iteration: 194601
loss: 0.9970653653144836,grad_norm: 0.9929987123508406, iteration: 194602
loss: 1.0108661651611328,grad_norm: 0.9999992579289041, iteration: 194603
loss: 0.9803521037101746,grad_norm: 0.9999993567699192, iteration: 194604
loss: 0.9748534560203552,grad_norm: 0.8448246297497904, iteration: 194605
loss: 0.9698752164840698,grad_norm: 0.8492043062334225, iteration: 194606
loss: 0.9725756645202637,grad_norm: 0.9999994353137047, iteration: 194607
loss: 0.9738819003105164,grad_norm: 0.8896851285560765, iteration: 194608
loss: 0.9885092377662659,grad_norm: 0.9999991858874291, iteration: 194609
loss: 0.9788826107978821,grad_norm: 0.9135461136541738, iteration: 194610
loss: 1.0621592998504639,grad_norm: 0.9999999434542172, iteration: 194611
loss: 1.0515788793563843,grad_norm: 0.9999997685849455, iteration: 194612
loss: 0.9644215106964111,grad_norm: 0.9999989124750412, iteration: 194613
loss: 1.065443515777588,grad_norm: 0.9842903850823058, iteration: 194614
loss: 1.011995792388916,grad_norm: 0.9249796673630111, iteration: 194615
loss: 0.9581041932106018,grad_norm: 0.9736632225595965, iteration: 194616
loss: 0.9861621856689453,grad_norm: 0.9999990982437695, iteration: 194617
loss: 0.9955872297286987,grad_norm: 0.8452951536003792, iteration: 194618
loss: 0.9977683424949646,grad_norm: 0.8816415193303355, iteration: 194619
loss: 0.9507765173912048,grad_norm: 0.9389262022347529, iteration: 194620
loss: 0.9520519375801086,grad_norm: 0.99999936833312, iteration: 194621
loss: 1.1237860918045044,grad_norm: 0.9999999230227223, iteration: 194622
loss: 0.9947119951248169,grad_norm: 0.9301042995256138, iteration: 194623
loss: 1.0608927011489868,grad_norm: 0.9999995257607115, iteration: 194624
loss: 0.9804168343544006,grad_norm: 0.9714477662946993, iteration: 194625
loss: 1.0019134283065796,grad_norm: 0.8639531105059488, iteration: 194626
loss: 0.9847865700721741,grad_norm: 0.9301964188766374, iteration: 194627
loss: 1.0210509300231934,grad_norm: 0.9937118469839696, iteration: 194628
loss: 0.9546127319335938,grad_norm: 0.9807235555714392, iteration: 194629
loss: 0.9757806062698364,grad_norm: 0.9999992028329199, iteration: 194630
loss: 0.9951217770576477,grad_norm: 0.7928596768420108, iteration: 194631
loss: 0.9807953238487244,grad_norm: 0.8794820040941299, iteration: 194632
loss: 1.0572413206100464,grad_norm: 0.9999995658626, iteration: 194633
loss: 0.9780376553535461,grad_norm: 0.9999992314079943, iteration: 194634
loss: 1.0103733539581299,grad_norm: 0.9999991250520791, iteration: 194635
loss: 1.0063880681991577,grad_norm: 0.9999989701304463, iteration: 194636
loss: 0.9983269572257996,grad_norm: 0.9999991431898776, iteration: 194637
loss: 1.1136069297790527,grad_norm: 0.9760648065116502, iteration: 194638
loss: 1.0372188091278076,grad_norm: 0.9999990714427428, iteration: 194639
loss: 1.0026965141296387,grad_norm: 0.9808252526529601, iteration: 194640
loss: 1.0243089199066162,grad_norm: 0.9999991328192114, iteration: 194641
loss: 0.9932470917701721,grad_norm: 0.8790321791427789, iteration: 194642
loss: 1.034551978111267,grad_norm: 0.9999990953935012, iteration: 194643
loss: 0.9382320642471313,grad_norm: 0.8958983934087771, iteration: 194644
loss: 0.9984362125396729,grad_norm: 0.8074340856039972, iteration: 194645
loss: 1.1047917604446411,grad_norm: 0.9999991620814259, iteration: 194646
loss: 0.9442352652549744,grad_norm: 0.8325207101644618, iteration: 194647
loss: 1.0166246891021729,grad_norm: 0.8308070580837846, iteration: 194648
loss: 1.0022532939910889,grad_norm: 0.9026362352479586, iteration: 194649
loss: 0.9979261755943298,grad_norm: 0.9315819012770219, iteration: 194650
loss: 1.0057653188705444,grad_norm: 0.999999206972142, iteration: 194651
loss: 1.017736554145813,grad_norm: 0.8331943184665906, iteration: 194652
loss: 0.993471622467041,grad_norm: 0.9570636630497201, iteration: 194653
loss: 1.0217777490615845,grad_norm: 0.8108461865129933, iteration: 194654
loss: 1.006176233291626,grad_norm: 0.9999991139690939, iteration: 194655
loss: 0.979967474937439,grad_norm: 0.9999990318935054, iteration: 194656
loss: 1.0637474060058594,grad_norm: 0.904786751753607, iteration: 194657
loss: 1.0325031280517578,grad_norm: 0.999999075024263, iteration: 194658
loss: 0.9865227937698364,grad_norm: 0.9475992563238467, iteration: 194659
loss: 0.9985633492469788,grad_norm: 0.8843002968179352, iteration: 194660
loss: 0.9896573424339294,grad_norm: 0.9999991768514547, iteration: 194661
loss: 0.9857068061828613,grad_norm: 0.9999990717341715, iteration: 194662
loss: 0.9746534824371338,grad_norm: 0.9045523015229475, iteration: 194663
loss: 1.0391895771026611,grad_norm: 0.9999989074243479, iteration: 194664
loss: 1.0212666988372803,grad_norm: 0.8988658632751124, iteration: 194665
loss: 0.9799553751945496,grad_norm: 0.8260230916271987, iteration: 194666
loss: 0.9562125205993652,grad_norm: 0.9999990263416347, iteration: 194667
loss: 0.9961928725242615,grad_norm: 0.8026094965288773, iteration: 194668
loss: 1.022968053817749,grad_norm: 0.9999992196558172, iteration: 194669
loss: 0.967268705368042,grad_norm: 0.9641094212687836, iteration: 194670
loss: 1.0290342569351196,grad_norm: 0.9999991848708094, iteration: 194671
loss: 1.0303053855895996,grad_norm: 0.9999991614784816, iteration: 194672
loss: 1.0089854001998901,grad_norm: 0.999998851111777, iteration: 194673
loss: 1.0370550155639648,grad_norm: 0.9999992761081052, iteration: 194674
loss: 1.0133625268936157,grad_norm: 0.9999992449822466, iteration: 194675
loss: 0.9862354397773743,grad_norm: 0.9595648711940663, iteration: 194676
loss: 1.0297749042510986,grad_norm: 0.9548941720743281, iteration: 194677
loss: 0.9734238386154175,grad_norm: 0.9121203503720997, iteration: 194678
loss: 1.0215015411376953,grad_norm: 0.9999993251185755, iteration: 194679
loss: 0.9917365312576294,grad_norm: 0.8914240287874856, iteration: 194680
loss: 1.0150395631790161,grad_norm: 0.9999991763346436, iteration: 194681
loss: 0.9944347739219666,grad_norm: 0.8017169021646595, iteration: 194682
loss: 1.0389573574066162,grad_norm: 0.9999992795781462, iteration: 194683
loss: 0.9991685152053833,grad_norm: 0.8180237825308893, iteration: 194684
loss: 1.0368601083755493,grad_norm: 0.9999989862532472, iteration: 194685
loss: 1.0283640623092651,grad_norm: 0.9478907141693419, iteration: 194686
loss: 1.0227258205413818,grad_norm: 0.9999990645214885, iteration: 194687
loss: 0.9980652928352356,grad_norm: 0.9474964798343626, iteration: 194688
loss: 0.9871450662612915,grad_norm: 0.9999992143935575, iteration: 194689
loss: 0.9827389121055603,grad_norm: 0.97258747722501, iteration: 194690
loss: 0.9879197478294373,grad_norm: 0.7902836155404139, iteration: 194691
loss: 1.0056493282318115,grad_norm: 0.9180813386146847, iteration: 194692
loss: 0.9999316334724426,grad_norm: 0.9837989326968077, iteration: 194693
loss: 1.0215506553649902,grad_norm: 0.9999990453689962, iteration: 194694
loss: 1.0470826625823975,grad_norm: 0.8873368606146066, iteration: 194695
loss: 1.0011268854141235,grad_norm: 0.9999990024379997, iteration: 194696
loss: 1.0043764114379883,grad_norm: 0.8185188797366021, iteration: 194697
loss: 0.981484055519104,grad_norm: 0.9999990357486797, iteration: 194698
loss: 0.9814978241920471,grad_norm: 0.999999166655428, iteration: 194699
loss: 1.0162081718444824,grad_norm: 0.999999142018325, iteration: 194700
loss: 0.9538449645042419,grad_norm: 0.8033953522381748, iteration: 194701
loss: 1.0549973249435425,grad_norm: 0.9999992325458504, iteration: 194702
loss: 1.0178714990615845,grad_norm: 0.9248562986819733, iteration: 194703
loss: 1.0235486030578613,grad_norm: 0.8568712319592708, iteration: 194704
loss: 1.0520493984222412,grad_norm: 0.9761126090895096, iteration: 194705
loss: 1.0134825706481934,grad_norm: 0.9729476114221988, iteration: 194706
loss: 0.9704063534736633,grad_norm: 0.9999991355960531, iteration: 194707
loss: 0.9388042092323303,grad_norm: 0.9999989999188048, iteration: 194708
loss: 0.9836485385894775,grad_norm: 0.9589131732380017, iteration: 194709
loss: 1.000402569770813,grad_norm: 0.9999992362426554, iteration: 194710
loss: 0.9950488209724426,grad_norm: 0.8820142645502592, iteration: 194711
loss: 1.1468411684036255,grad_norm: 0.9999998217712278, iteration: 194712
loss: 1.0242507457733154,grad_norm: 0.8075766023829863, iteration: 194713
loss: 1.0340667963027954,grad_norm: 0.9999994940258893, iteration: 194714
loss: 1.0294761657714844,grad_norm: 0.9999998454685158, iteration: 194715
loss: 0.9950737953186035,grad_norm: 0.999999151872402, iteration: 194716
loss: 0.9672114849090576,grad_norm: 0.9108744802450235, iteration: 194717
loss: 0.9883807897567749,grad_norm: 0.8571559296315348, iteration: 194718
loss: 1.0019680261611938,grad_norm: 0.9999991938809587, iteration: 194719
loss: 1.001485824584961,grad_norm: 0.9999990443182414, iteration: 194720
loss: 1.0345274209976196,grad_norm: 0.9896489489834187, iteration: 194721
loss: 1.0123379230499268,grad_norm: 0.9999991223534483, iteration: 194722
loss: 0.9986540079116821,grad_norm: 0.9141674378599056, iteration: 194723
loss: 0.9592687487602234,grad_norm: 0.9260354911479008, iteration: 194724
loss: 0.9880204200744629,grad_norm: 0.999999051963892, iteration: 194725
loss: 1.003190279006958,grad_norm: 0.999999357721903, iteration: 194726
loss: 0.9899928569793701,grad_norm: 0.9999992473535919, iteration: 194727
loss: 1.0507144927978516,grad_norm: 0.9999998902829901, iteration: 194728
loss: 1.0338821411132812,grad_norm: 0.8835506308897786, iteration: 194729
loss: 1.0161412954330444,grad_norm: 0.9146644240696044, iteration: 194730
loss: 1.0001684427261353,grad_norm: 0.9080167003108252, iteration: 194731
loss: 0.9718111753463745,grad_norm: 0.8652700791148523, iteration: 194732
loss: 1.036999225616455,grad_norm: 0.9386486774531919, iteration: 194733
loss: 1.0166828632354736,grad_norm: 0.9999989794345664, iteration: 194734
loss: 1.0336289405822754,grad_norm: 0.9999999740113615, iteration: 194735
loss: 1.010087013244629,grad_norm: 0.9999991583853896, iteration: 194736
loss: 0.9703612327575684,grad_norm: 0.8331357211399062, iteration: 194737
loss: 0.9896082878112793,grad_norm: 0.9649775364927118, iteration: 194738
loss: 1.010046124458313,grad_norm: 0.9091067908477414, iteration: 194739
loss: 0.9907983541488647,grad_norm: 0.9429628277887091, iteration: 194740
loss: 1.0533928871154785,grad_norm: 0.9999997815399077, iteration: 194741
loss: 1.0056778192520142,grad_norm: 0.9999993577407863, iteration: 194742
loss: 1.0349440574645996,grad_norm: 0.9999991035268334, iteration: 194743
loss: 0.967568576335907,grad_norm: 0.8787818349459189, iteration: 194744
loss: 1.0029361248016357,grad_norm: 0.9665909219004615, iteration: 194745
loss: 1.025683879852295,grad_norm: 0.8831577331275843, iteration: 194746
loss: 1.0041511058807373,grad_norm: 0.998123633811603, iteration: 194747
loss: 0.9986371397972107,grad_norm: 0.9999990882283902, iteration: 194748
loss: 1.0600695610046387,grad_norm: 0.999998952402077, iteration: 194749
loss: 1.001174807548523,grad_norm: 0.9563021562676639, iteration: 194750
loss: 0.9832988381385803,grad_norm: 0.9097624763622998, iteration: 194751
loss: 1.044734239578247,grad_norm: 0.9196135444936853, iteration: 194752
loss: 1.0339452028274536,grad_norm: 0.9999991162263335, iteration: 194753
loss: 1.0336592197418213,grad_norm: 0.9967285124795405, iteration: 194754
loss: 0.9546774625778198,grad_norm: 0.8687749154954441, iteration: 194755
loss: 0.9719793796539307,grad_norm: 0.9999991074192973, iteration: 194756
loss: 0.9686688780784607,grad_norm: 0.9924714343386415, iteration: 194757
loss: 1.0085294246673584,grad_norm: 0.9999992167954396, iteration: 194758
loss: 1.0098789930343628,grad_norm: 0.8556828968483884, iteration: 194759
loss: 0.9966700673103333,grad_norm: 0.8269234080910651, iteration: 194760
loss: 0.9923468232154846,grad_norm: 0.9999991820592617, iteration: 194761
loss: 1.0107041597366333,grad_norm: 0.9999990960582862, iteration: 194762
loss: 0.9935398697853088,grad_norm: 0.9865544263878014, iteration: 194763
loss: 0.9762558341026306,grad_norm: 0.9441828361095398, iteration: 194764
loss: 0.994825005531311,grad_norm: 0.858504813283793, iteration: 194765
loss: 1.0422505140304565,grad_norm: 0.9999990478567865, iteration: 194766
loss: 1.0199569463729858,grad_norm: 0.9759354973122197, iteration: 194767
loss: 1.0286216735839844,grad_norm: 0.844074736849873, iteration: 194768
loss: 0.982481837272644,grad_norm: 0.9999990325684067, iteration: 194769
loss: 1.0778601169586182,grad_norm: 0.9532500733377691, iteration: 194770
loss: 0.9751010537147522,grad_norm: 0.9999989723933266, iteration: 194771
loss: 0.9781086444854736,grad_norm: 0.8533778910152, iteration: 194772
loss: 0.9867206811904907,grad_norm: 0.9986790366179434, iteration: 194773
loss: 1.0898667573928833,grad_norm: 0.9562927496793396, iteration: 194774
loss: 1.018633246421814,grad_norm: 0.9450327332997099, iteration: 194775
loss: 1.1786143779754639,grad_norm: 0.9999996820313656, iteration: 194776
loss: 1.052538275718689,grad_norm: 0.9999994667268693, iteration: 194777
loss: 1.0270884037017822,grad_norm: 0.9999996437035453, iteration: 194778
loss: 1.0356526374816895,grad_norm: 0.8719992370836055, iteration: 194779
loss: 1.1615575551986694,grad_norm: 0.9999999132030506, iteration: 194780
loss: 1.1435950994491577,grad_norm: 0.9999990586541991, iteration: 194781
loss: 1.0047050714492798,grad_norm: 0.7538084584775064, iteration: 194782
loss: 1.0151793956756592,grad_norm: 0.9741243780713132, iteration: 194783
loss: 0.9599902033805847,grad_norm: 0.8199550374150705, iteration: 194784
loss: 0.9918564558029175,grad_norm: 0.9460914916671186, iteration: 194785
loss: 1.0687443017959595,grad_norm: 0.9727101574515635, iteration: 194786
loss: 0.9586222767829895,grad_norm: 0.9999991377509804, iteration: 194787
loss: 1.0105106830596924,grad_norm: 0.9948359599230002, iteration: 194788
loss: 1.0590349435806274,grad_norm: 0.7970696544162222, iteration: 194789
loss: 1.0222396850585938,grad_norm: 0.9999991089743132, iteration: 194790
loss: 1.0872153043746948,grad_norm: 0.9999990855770842, iteration: 194791
loss: 1.0639854669570923,grad_norm: 0.9999995348283445, iteration: 194792
loss: 1.0648845434188843,grad_norm: 0.999999472024654, iteration: 194793
loss: 0.9658704400062561,grad_norm: 0.9917750951783851, iteration: 194794
loss: 1.0788487195968628,grad_norm: 0.9999991787888525, iteration: 194795
loss: 0.9592861533164978,grad_norm: 0.9999990994680774, iteration: 194796
loss: 1.0506194829940796,grad_norm: 0.9153211474013938, iteration: 194797
loss: 0.9898937940597534,grad_norm: 0.8561087644484281, iteration: 194798
loss: 0.9913804531097412,grad_norm: 0.9999991842582091, iteration: 194799
loss: 1.0163540840148926,grad_norm: 0.9578728660840607, iteration: 194800
loss: 0.9903352856636047,grad_norm: 0.9999991989840823, iteration: 194801
loss: 1.003732681274414,grad_norm: 0.9372030351090364, iteration: 194802
loss: 1.008270502090454,grad_norm: 0.7996233351677774, iteration: 194803
loss: 0.9964887499809265,grad_norm: 0.9999991701300623, iteration: 194804
loss: 0.9915826320648193,grad_norm: 0.9608766686577177, iteration: 194805
loss: 1.0442960262298584,grad_norm: 0.999999208883089, iteration: 194806
loss: 0.957866907119751,grad_norm: 0.9982608212735485, iteration: 194807
loss: 0.9670417308807373,grad_norm: 0.9147871271824773, iteration: 194808
loss: 0.9751030802726746,grad_norm: 0.9999990504205615, iteration: 194809
loss: 0.9989579916000366,grad_norm: 0.8489351506249995, iteration: 194810
loss: 1.0165451765060425,grad_norm: 0.9586890822299462, iteration: 194811
loss: 0.9877890348434448,grad_norm: 0.9999998463751866, iteration: 194812
loss: 0.9861826300621033,grad_norm: 0.8628394741579006, iteration: 194813
loss: 1.01454496383667,grad_norm: 0.9634207502399073, iteration: 194814
loss: 0.976959764957428,grad_norm: 0.9668023591662378, iteration: 194815
loss: 0.9771450161933899,grad_norm: 0.8770383896043, iteration: 194816
loss: 0.9680014848709106,grad_norm: 0.9999993037509034, iteration: 194817
loss: 1.0178614854812622,grad_norm: 0.9517131760725062, iteration: 194818
loss: 1.0194036960601807,grad_norm: 0.940914605250032, iteration: 194819
loss: 0.9981503486633301,grad_norm: 0.9999991293331613, iteration: 194820
loss: 1.0776339769363403,grad_norm: 0.9999993920342866, iteration: 194821
loss: 0.9847085475921631,grad_norm: 0.999999527775754, iteration: 194822
loss: 1.0142186880111694,grad_norm: 0.9999991770232818, iteration: 194823
loss: 1.0196658372879028,grad_norm: 0.8583829587527535, iteration: 194824
loss: 1.0041881799697876,grad_norm: 0.999999163188256, iteration: 194825
loss: 1.0306276082992554,grad_norm: 0.9999990538691287, iteration: 194826
loss: 1.0038549900054932,grad_norm: 0.9999990101399145, iteration: 194827
loss: 0.9904231429100037,grad_norm: 0.899486129390751, iteration: 194828
loss: 0.9775908589363098,grad_norm: 0.884228694741841, iteration: 194829
loss: 0.9930024743080139,grad_norm: 0.9487884278104002, iteration: 194830
loss: 1.0300105810165405,grad_norm: 0.9999991040925074, iteration: 194831
loss: 0.9754908084869385,grad_norm: 0.8457144018196825, iteration: 194832
loss: 0.9904619455337524,grad_norm: 0.9999991018088111, iteration: 194833
loss: 1.0356452465057373,grad_norm: 0.9999992267873902, iteration: 194834
loss: 1.0205187797546387,grad_norm: 0.9062389175021853, iteration: 194835
loss: 1.0125930309295654,grad_norm: 0.999999079736493, iteration: 194836
loss: 1.0058015584945679,grad_norm: 0.9999990393596129, iteration: 194837
loss: 1.0421442985534668,grad_norm: 0.9999991935786816, iteration: 194838
loss: 1.0357245206832886,grad_norm: 0.9639560668011264, iteration: 194839
loss: 1.006442666053772,grad_norm: 0.9999991093567502, iteration: 194840
loss: 0.9710500836372375,grad_norm: 0.9999991484860522, iteration: 194841
loss: 1.03026282787323,grad_norm: 0.9999991789383054, iteration: 194842
loss: 1.00507390499115,grad_norm: 0.7812700480374539, iteration: 194843
loss: 0.953942596912384,grad_norm: 0.999999115622281, iteration: 194844
loss: 1.02122962474823,grad_norm: 0.9999991670780193, iteration: 194845
loss: 1.0000720024108887,grad_norm: 0.9985081800039012, iteration: 194846
loss: 1.009413719177246,grad_norm: 0.9614646447345726, iteration: 194847
loss: 0.9968806505203247,grad_norm: 0.9664705855978141, iteration: 194848
loss: 0.9727070927619934,grad_norm: 0.9422005524394123, iteration: 194849
loss: 1.0216196775436401,grad_norm: 0.9192414173646021, iteration: 194850
loss: 1.0116537809371948,grad_norm: 0.9999992155205071, iteration: 194851
loss: 1.0090463161468506,grad_norm: 0.9999991105742073, iteration: 194852
loss: 0.9891360402107239,grad_norm: 0.8795599924732141, iteration: 194853
loss: 0.9991549849510193,grad_norm: 0.8921300394963971, iteration: 194854
loss: 1.0076974630355835,grad_norm: 0.8942798913458706, iteration: 194855
loss: 1.0326653718948364,grad_norm: 0.9999991609980367, iteration: 194856
loss: 1.0208498239517212,grad_norm: 0.9999990524056892, iteration: 194857
loss: 0.9949544072151184,grad_norm: 0.976125750733528, iteration: 194858
loss: 0.9972296357154846,grad_norm: 0.9999989864758407, iteration: 194859
loss: 1.0003483295440674,grad_norm: 0.9999991065580695, iteration: 194860
loss: 1.0444461107254028,grad_norm: 0.9999989679517811, iteration: 194861
loss: 1.006292700767517,grad_norm: 0.9999990380582561, iteration: 194862
loss: 0.9603269100189209,grad_norm: 0.999998845247703, iteration: 194863
loss: 1.0133436918258667,grad_norm: 0.9397090105812508, iteration: 194864
loss: 0.9944506287574768,grad_norm: 0.9999990961539078, iteration: 194865
loss: 1.0010018348693848,grad_norm: 0.9950959609254699, iteration: 194866
loss: 0.9848718047142029,grad_norm: 0.9552128316274658, iteration: 194867
loss: 1.0000066757202148,grad_norm: 0.8812448065568087, iteration: 194868
loss: 1.032761812210083,grad_norm: 0.9077671870602669, iteration: 194869
loss: 0.9989503026008606,grad_norm: 0.9999990467421647, iteration: 194870
loss: 1.003153920173645,grad_norm: 0.90301327142382, iteration: 194871
loss: 1.0293738842010498,grad_norm: 0.9096863823491278, iteration: 194872
loss: 0.9993788599967957,grad_norm: 0.9999991782468168, iteration: 194873
loss: 0.9884623885154724,grad_norm: 0.9421298057096305, iteration: 194874
loss: 0.9993209838867188,grad_norm: 0.9790739763579275, iteration: 194875
loss: 0.9871169328689575,grad_norm: 0.994562421313294, iteration: 194876
loss: 0.9658119678497314,grad_norm: 0.8935195191636067, iteration: 194877
loss: 0.9993310570716858,grad_norm: 0.8059120181261854, iteration: 194878
loss: 0.9840402007102966,grad_norm: 0.9609375984446409, iteration: 194879
loss: 0.9670090675354004,grad_norm: 0.9054013066675934, iteration: 194880
loss: 1.0412811040878296,grad_norm: 1.0000000402880413, iteration: 194881
loss: 1.0329937934875488,grad_norm: 0.9999995740539337, iteration: 194882
loss: 0.9995284676551819,grad_norm: 0.9430756401492032, iteration: 194883
loss: 1.0641509294509888,grad_norm: 0.9999997031036716, iteration: 194884
loss: 1.0013893842697144,grad_norm: 0.9999991067973644, iteration: 194885
loss: 1.0007268190383911,grad_norm: 0.9999991719294303, iteration: 194886
loss: 1.0174263715744019,grad_norm: 0.9999993610966634, iteration: 194887
loss: 0.991729736328125,grad_norm: 0.9339099571566187, iteration: 194888
loss: 1.011154055595398,grad_norm: 0.888288121194564, iteration: 194889
loss: 0.9884383678436279,grad_norm: 0.9999995970758218, iteration: 194890
loss: 1.0249674320220947,grad_norm: 0.9999991252186061, iteration: 194891
loss: 0.9993682503700256,grad_norm: 0.9999990270453081, iteration: 194892
loss: 1.0485132932662964,grad_norm: 0.9999998966470289, iteration: 194893
loss: 1.070050835609436,grad_norm: 0.999999207925577, iteration: 194894
loss: 1.0274535417556763,grad_norm: 0.9499759587951768, iteration: 194895
loss: 1.0485972166061401,grad_norm: 0.9999991114150556, iteration: 194896
loss: 1.0426297187805176,grad_norm: 0.9999998241318168, iteration: 194897
loss: 0.9723754525184631,grad_norm: 0.9239295506448855, iteration: 194898
loss: 0.9799797534942627,grad_norm: 0.8852172878759516, iteration: 194899
loss: 1.011454463005066,grad_norm: 0.912180941907663, iteration: 194900
loss: 1.0054391622543335,grad_norm: 0.8655556336290273, iteration: 194901
loss: 1.037521243095398,grad_norm: 0.9999991974729733, iteration: 194902
loss: 1.0387229919433594,grad_norm: 0.9999991755028427, iteration: 194903
loss: 1.0846467018127441,grad_norm: 0.8282958440394541, iteration: 194904
loss: 0.9811830520629883,grad_norm: 0.9125185494947243, iteration: 194905
loss: 0.975659966468811,grad_norm: 0.8548857819601637, iteration: 194906
loss: 0.994478702545166,grad_norm: 1.0000000223796166, iteration: 194907
loss: 1.0133116245269775,grad_norm: 0.9999994029298727, iteration: 194908
loss: 1.2183823585510254,grad_norm: 0.9999992264113233, iteration: 194909
loss: 0.960901141166687,grad_norm: 0.9524491032008159, iteration: 194910
loss: 1.214540719985962,grad_norm: 0.9999993689341513, iteration: 194911
loss: 1.0807311534881592,grad_norm: 0.9999994589918448, iteration: 194912
loss: 1.102190375328064,grad_norm: 0.9999991209122601, iteration: 194913
loss: 1.0010963678359985,grad_norm: 0.9461580551015063, iteration: 194914
loss: 1.004604458808899,grad_norm: 0.9999994278764504, iteration: 194915
loss: 1.0372016429901123,grad_norm: 0.7946662459137993, iteration: 194916
loss: 0.9524163603782654,grad_norm: 0.9999989883255616, iteration: 194917
loss: 0.9982584714889526,grad_norm: 0.9333421275837969, iteration: 194918
loss: 0.9975258111953735,grad_norm: 0.8046963598593591, iteration: 194919
loss: 1.045480489730835,grad_norm: 0.9999993138012186, iteration: 194920
loss: 1.1241302490234375,grad_norm: 0.9999993316135429, iteration: 194921
loss: 0.9537346959114075,grad_norm: 0.9308130873921663, iteration: 194922
loss: 1.0156264305114746,grad_norm: 0.9999991396206618, iteration: 194923
loss: 1.0794469118118286,grad_norm: 0.9999993135864197, iteration: 194924
loss: 0.9893725514411926,grad_norm: 0.9999992342156845, iteration: 194925
loss: 1.0040205717086792,grad_norm: 0.9999991240062158, iteration: 194926
loss: 1.0605409145355225,grad_norm: 0.9999994222981995, iteration: 194927
loss: 1.1099610328674316,grad_norm: 0.9999992638758767, iteration: 194928
loss: 1.0195140838623047,grad_norm: 0.8288336027225088, iteration: 194929
loss: 1.0482213497161865,grad_norm: 0.999999029907627, iteration: 194930
loss: 1.0399930477142334,grad_norm: 0.9999992462937436, iteration: 194931
loss: 1.113884449005127,grad_norm: 0.9999990969909287, iteration: 194932
loss: 1.1167031526565552,grad_norm: 0.9999997287335218, iteration: 194933
loss: 0.9935526251792908,grad_norm: 0.9338212805996234, iteration: 194934
loss: 1.1429760456085205,grad_norm: 0.9999993412441813, iteration: 194935
loss: 0.9964801669120789,grad_norm: 0.8750439459600052, iteration: 194936
loss: 0.9953352212905884,grad_norm: 0.9665047423661064, iteration: 194937
loss: 1.0369688272476196,grad_norm: 0.8938656095553008, iteration: 194938
loss: 1.034536600112915,grad_norm: 0.8649376590015361, iteration: 194939
loss: 1.4564354419708252,grad_norm: 0.999999887132711, iteration: 194940
loss: 1.0342464447021484,grad_norm: 0.9999992090392132, iteration: 194941
loss: 1.0303364992141724,grad_norm: 0.8160431230103639, iteration: 194942
loss: 0.9655245542526245,grad_norm: 0.9409419695935054, iteration: 194943
loss: 1.2104920148849487,grad_norm: 0.9999994725967866, iteration: 194944
loss: 1.0137525796890259,grad_norm: 0.9999996070290874, iteration: 194945
loss: 1.0464420318603516,grad_norm: 0.9999997826133177, iteration: 194946
loss: 0.9863295555114746,grad_norm: 0.9999992646792246, iteration: 194947
loss: 1.0171122550964355,grad_norm: 0.9999991386521077, iteration: 194948
loss: 1.0010085105895996,grad_norm: 0.9999990509117438, iteration: 194949
loss: 1.1326686143875122,grad_norm: 0.9999999437260582, iteration: 194950
loss: 1.0611960887908936,grad_norm: 0.999999347212389, iteration: 194951
loss: 0.9780853986740112,grad_norm: 0.9999990806296305, iteration: 194952
loss: 1.0103744268417358,grad_norm: 0.9288819481593972, iteration: 194953
loss: 0.9985440969467163,grad_norm: 0.8297959359864403, iteration: 194954
loss: 0.9762275815010071,grad_norm: 0.9581471272435996, iteration: 194955
loss: 1.057260513305664,grad_norm: 0.9821132379171778, iteration: 194956
loss: 0.9776643514633179,grad_norm: 0.999999006033789, iteration: 194957
loss: 1.0052170753479004,grad_norm: 0.9999998212968968, iteration: 194958
loss: 0.9879902601242065,grad_norm: 0.9999991693382672, iteration: 194959
loss: 1.3327641487121582,grad_norm: 0.9999996894672489, iteration: 194960
loss: 0.9880592226982117,grad_norm: 0.8244051060669987, iteration: 194961
loss: 0.9726988673210144,grad_norm: 0.9999990997899373, iteration: 194962
loss: 0.9744563698768616,grad_norm: 0.9393977111539119, iteration: 194963
loss: 1.0618202686309814,grad_norm: 0.999999454334578, iteration: 194964
loss: 0.9877400994300842,grad_norm: 0.9746154911975412, iteration: 194965
loss: 1.1967387199401855,grad_norm: 0.9999992476947129, iteration: 194966
loss: 0.9746585488319397,grad_norm: 0.9999992186965506, iteration: 194967
loss: 1.0037122964859009,grad_norm: 0.8450430189451282, iteration: 194968
loss: 1.0190601348876953,grad_norm: 0.9083361371961741, iteration: 194969
loss: 1.1517856121063232,grad_norm: 0.9999999145221288, iteration: 194970
loss: 1.0067846775054932,grad_norm: 0.8786882555439472, iteration: 194971
loss: 1.0049490928649902,grad_norm: 0.9999992334610874, iteration: 194972
loss: 1.0621846914291382,grad_norm: 0.9999997750483035, iteration: 194973
loss: 1.000595211982727,grad_norm: 0.9881534908830892, iteration: 194974
loss: 1.0024547576904297,grad_norm: 0.9999991605015219, iteration: 194975
loss: 1.0285793542861938,grad_norm: 0.9520395027570447, iteration: 194976
loss: 1.0376795530319214,grad_norm: 0.9548847117155195, iteration: 194977
loss: 1.0978726148605347,grad_norm: 0.999999179594136, iteration: 194978
loss: 0.9825060367584229,grad_norm: 0.7297808401708958, iteration: 194979
loss: 0.9923175573348999,grad_norm: 0.9999993113557593, iteration: 194980
loss: 0.9997877478599548,grad_norm: 0.9999993853192192, iteration: 194981
loss: 1.0371432304382324,grad_norm: 0.9999992747714754, iteration: 194982
loss: 1.0811156034469604,grad_norm: 0.7346461485270196, iteration: 194983
loss: 0.9995682239532471,grad_norm: 0.9999991099157084, iteration: 194984
loss: 1.1952883005142212,grad_norm: 0.9999991773507453, iteration: 194985
loss: 1.0086835622787476,grad_norm: 0.9807938224004995, iteration: 194986
loss: 1.1432690620422363,grad_norm: 0.999999473377045, iteration: 194987
loss: 1.032684564590454,grad_norm: 0.9999993267311743, iteration: 194988
loss: 1.0615588426589966,grad_norm: 1.000000010462581, iteration: 194989
loss: 0.9958091378211975,grad_norm: 0.8632082055320328, iteration: 194990
loss: 1.0289838314056396,grad_norm: 0.9635997037003773, iteration: 194991
loss: 0.9985793232917786,grad_norm: 0.9999991519929532, iteration: 194992
loss: 1.0207123756408691,grad_norm: 0.9999991376674796, iteration: 194993
loss: 1.0266149044036865,grad_norm: 0.9999990798524878, iteration: 194994
loss: 1.1107717752456665,grad_norm: 0.9999998673828017, iteration: 194995
loss: 1.0450252294540405,grad_norm: 0.9999993916312109, iteration: 194996
loss: 0.9978864192962646,grad_norm: 0.9040590286320729, iteration: 194997
loss: 1.0035409927368164,grad_norm: 0.9682298739459023, iteration: 194998
loss: 1.036070704460144,grad_norm: 0.9999994818998957, iteration: 194999
loss: 1.0459985733032227,grad_norm: 0.9999999227733692, iteration: 195000
loss: 1.1027557849884033,grad_norm: 0.99999926696268, iteration: 195001
loss: 1.0101934671401978,grad_norm: 0.8571029337916748, iteration: 195002
loss: 1.1414954662322998,grad_norm: 0.9999992323987325, iteration: 195003
loss: 1.0233798027038574,grad_norm: 0.9999995770085409, iteration: 195004
loss: 0.977989137172699,grad_norm: 0.9999991025990317, iteration: 195005
loss: 1.1036407947540283,grad_norm: 0.9999990890861342, iteration: 195006
loss: 1.004537582397461,grad_norm: 0.999999335808233, iteration: 195007
loss: 1.0162053108215332,grad_norm: 0.9578313403905803, iteration: 195008
loss: 0.9819395542144775,grad_norm: 0.8053113959154673, iteration: 195009
loss: 1.038390040397644,grad_norm: 0.9999995720191106, iteration: 195010
loss: 1.089130163192749,grad_norm: 0.9999990946928453, iteration: 195011
loss: 1.0316380262374878,grad_norm: 0.9742744694849088, iteration: 195012
loss: 1.0973443984985352,grad_norm: 0.9999997993051666, iteration: 195013
loss: 1.0158249139785767,grad_norm: 1.0000000858992024, iteration: 195014
loss: 0.9964476227760315,grad_norm: 0.9999997658515571, iteration: 195015
loss: 1.0219099521636963,grad_norm: 0.9999993084118073, iteration: 195016
loss: 1.046425223350525,grad_norm: 0.9999996444671755, iteration: 195017
loss: 1.0137568712234497,grad_norm: 0.9783662262950116, iteration: 195018
loss: 0.989266037940979,grad_norm: 0.8489270114711117, iteration: 195019
loss: 1.1295347213745117,grad_norm: 0.999999231354698, iteration: 195020
loss: 0.9744619727134705,grad_norm: 0.8410178066547045, iteration: 195021
loss: 1.0084218978881836,grad_norm: 0.9999993951741977, iteration: 195022
loss: 0.9973983764648438,grad_norm: 0.9886786115425397, iteration: 195023
loss: 1.0588260889053345,grad_norm: 0.9996843234277563, iteration: 195024
loss: 1.0152490139007568,grad_norm: 0.9268880529659319, iteration: 195025
loss: 1.0641038417816162,grad_norm: 0.8394900527562218, iteration: 195026
loss: 1.0247437953948975,grad_norm: 0.9681485156303504, iteration: 195027
loss: 1.2312895059585571,grad_norm: 0.9999998532992226, iteration: 195028
loss: 1.0193525552749634,grad_norm: 0.8856090227371354, iteration: 195029
loss: 0.9982485771179199,grad_norm: 0.9999991424293477, iteration: 195030
loss: 0.9990742802619934,grad_norm: 0.8957418990769905, iteration: 195031
loss: 1.022196650505066,grad_norm: 0.9999999368941568, iteration: 195032
loss: 1.0022947788238525,grad_norm: 0.9512402923277673, iteration: 195033
loss: 1.0002005100250244,grad_norm: 0.9091227274885666, iteration: 195034
loss: 0.9987677931785583,grad_norm: 0.9999992657871374, iteration: 195035
loss: 1.0415016412734985,grad_norm: 0.9999990790803771, iteration: 195036
loss: 1.0535959005355835,grad_norm: 0.9999995899142331, iteration: 195037
loss: 0.9965859651565552,grad_norm: 0.8221087565927865, iteration: 195038
loss: 1.005104899406433,grad_norm: 0.8164773528920299, iteration: 195039
loss: 1.1876457929611206,grad_norm: 0.9999995393615032, iteration: 195040
loss: 1.0683457851409912,grad_norm: 0.9999991123309088, iteration: 195041
loss: 1.0514947175979614,grad_norm: 0.9970546961436672, iteration: 195042
loss: 1.0177373886108398,grad_norm: 0.9999991632918263, iteration: 195043
loss: 1.0892934799194336,grad_norm: 0.9999991260903993, iteration: 195044
loss: 1.0376029014587402,grad_norm: 0.9999991255212379, iteration: 195045
loss: 0.9594001173973083,grad_norm: 0.9999992817190336, iteration: 195046
loss: 1.0451775789260864,grad_norm: 0.999999374017126, iteration: 195047
loss: 1.0275112390518188,grad_norm: 0.9999991625904256, iteration: 195048
loss: 1.0216495990753174,grad_norm: 0.9999993836960082, iteration: 195049
loss: 1.0601997375488281,grad_norm: 0.9999991548213109, iteration: 195050
loss: 1.0056357383728027,grad_norm: 0.9972832977839541, iteration: 195051
loss: 1.0008952617645264,grad_norm: 0.9999991602663305, iteration: 195052
loss: 0.9927722215652466,grad_norm: 0.8650992635464558, iteration: 195053
loss: 1.0702816247940063,grad_norm: 0.999999110769438, iteration: 195054
loss: 1.0188547372817993,grad_norm: 0.9999990941493175, iteration: 195055
loss: 0.9757431745529175,grad_norm: 0.9999992064345082, iteration: 195056
loss: 1.0065056085586548,grad_norm: 0.8408368988895726, iteration: 195057
loss: 1.0031644105911255,grad_norm: 0.9999990864641652, iteration: 195058
loss: 1.0084863901138306,grad_norm: 0.9275186550817838, iteration: 195059
loss: 1.046921730041504,grad_norm: 0.9648067654389193, iteration: 195060
loss: 1.1046332120895386,grad_norm: 0.9999996776816511, iteration: 195061
loss: 1.2545204162597656,grad_norm: 0.9999999904699512, iteration: 195062
loss: 0.9649412035942078,grad_norm: 0.9578144884884449, iteration: 195063
loss: 1.0206298828125,grad_norm: 0.9999994272188956, iteration: 195064
loss: 0.9501746892929077,grad_norm: 0.9999993617089306, iteration: 195065
loss: 0.9904742240905762,grad_norm: 0.9999991926362718, iteration: 195066
loss: 1.0050925016403198,grad_norm: 0.999999187498462, iteration: 195067
loss: 1.0368200540542603,grad_norm: 0.9999999947015736, iteration: 195068
loss: 1.0322086811065674,grad_norm: 0.9324226535517973, iteration: 195069
loss: 0.9909968376159668,grad_norm: 0.9999991458304326, iteration: 195070
loss: 0.9984880685806274,grad_norm: 0.9999994411291626, iteration: 195071
loss: 1.073803186416626,grad_norm: 0.9605808592686953, iteration: 195072
loss: 1.0944814682006836,grad_norm: 0.9999992500320722, iteration: 195073
loss: 1.0081568956375122,grad_norm: 0.9273053235777705, iteration: 195074
loss: 1.0271259546279907,grad_norm: 0.9999990886373435, iteration: 195075
loss: 1.0625879764556885,grad_norm: 0.9999993629572559, iteration: 195076
loss: 1.0108689069747925,grad_norm: 0.9999990345396406, iteration: 195077
loss: 1.0583375692367554,grad_norm: 0.9999992856046935, iteration: 195078
loss: 1.0087686777114868,grad_norm: 0.9999996588978068, iteration: 195079
loss: 1.0112125873565674,grad_norm: 0.960707213450508, iteration: 195080
loss: 1.0041207075119019,grad_norm: 0.999999481518541, iteration: 195081
loss: 0.9725587368011475,grad_norm: 0.8581724836758535, iteration: 195082
loss: 1.0082151889801025,grad_norm: 0.9999991984344927, iteration: 195083
loss: 1.0246977806091309,grad_norm: 0.9999995440912363, iteration: 195084
loss: 1.2326337099075317,grad_norm: 0.9999998963299334, iteration: 195085
loss: 1.0090017318725586,grad_norm: 0.9999990656446129, iteration: 195086
loss: 1.0241801738739014,grad_norm: 0.9999990521786698, iteration: 195087
loss: 0.9759411215782166,grad_norm: 0.9393627124983793, iteration: 195088
loss: 1.0030708312988281,grad_norm: 0.9542826252350495, iteration: 195089
loss: 1.0126596689224243,grad_norm: 0.953686972337645, iteration: 195090
loss: 0.962935209274292,grad_norm: 0.8350945202248579, iteration: 195091
loss: 1.0139596462249756,grad_norm: 0.7728881887884463, iteration: 195092
loss: 1.0271917581558228,grad_norm: 0.9999991769273239, iteration: 195093
loss: 0.9964520335197449,grad_norm: 0.9695474202695383, iteration: 195094
loss: 1.0385719537734985,grad_norm: 0.9999996772519453, iteration: 195095
loss: 1.1002920866012573,grad_norm: 0.9999992859166418, iteration: 195096
loss: 1.0910029411315918,grad_norm: 0.9999992162467168, iteration: 195097
loss: 0.9771688580513,grad_norm: 0.818237565554562, iteration: 195098
loss: 0.9795039296150208,grad_norm: 0.9236304033382321, iteration: 195099
loss: 1.0039066076278687,grad_norm: 0.8615418454104342, iteration: 195100
loss: 1.01718270778656,grad_norm: 0.9408553434308066, iteration: 195101
loss: 0.9722055792808533,grad_norm: 0.9999992298132274, iteration: 195102
loss: 0.98073410987854,grad_norm: 0.999999170478138, iteration: 195103
loss: 1.0228723287582397,grad_norm: 0.9469259398025781, iteration: 195104
loss: 0.9793545007705688,grad_norm: 0.9999991134158137, iteration: 195105
loss: 0.9975087642669678,grad_norm: 0.9999993197466024, iteration: 195106
loss: 1.0741270780563354,grad_norm: 0.999999440686885, iteration: 195107
loss: 1.0016568899154663,grad_norm: 0.8040451349816793, iteration: 195108
loss: 1.052631139755249,grad_norm: 0.8967196349578206, iteration: 195109
loss: 0.995601236820221,grad_norm: 0.9453397093963624, iteration: 195110
loss: 1.0027458667755127,grad_norm: 0.9273959968547602, iteration: 195111
loss: 0.9831998944282532,grad_norm: 0.9999990194996493, iteration: 195112
loss: 1.015671968460083,grad_norm: 0.9999991670831342, iteration: 195113
loss: 1.0209182500839233,grad_norm: 0.9812026361078152, iteration: 195114
loss: 1.1361101865768433,grad_norm: 0.9999997360707328, iteration: 195115
loss: 1.0039618015289307,grad_norm: 0.864417633651187, iteration: 195116
loss: 1.0004888772964478,grad_norm: 0.9589521387273462, iteration: 195117
loss: 0.9910767674446106,grad_norm: 0.8134454698218782, iteration: 195118
loss: 0.9961176514625549,grad_norm: 0.7937819583781582, iteration: 195119
loss: 0.9918860793113708,grad_norm: 0.8897412303734263, iteration: 195120
loss: 1.0036059617996216,grad_norm: 0.9883053619321112, iteration: 195121
loss: 1.0948610305786133,grad_norm: 0.9999998155941704, iteration: 195122
loss: 1.0239217281341553,grad_norm: 0.886436740682546, iteration: 195123
loss: 0.9975351095199585,grad_norm: 0.8269668595673902, iteration: 195124
loss: 0.9916937947273254,grad_norm: 0.9371152470033313, iteration: 195125
loss: 0.995649516582489,grad_norm: 0.7554504809887328, iteration: 195126
loss: 1.0087063312530518,grad_norm: 0.9604461401370243, iteration: 195127
loss: 1.0211161375045776,grad_norm: 0.9999993969901391, iteration: 195128
loss: 0.9790486693382263,grad_norm: 0.9748885901676358, iteration: 195129
loss: 1.3517063856124878,grad_norm: 0.9999993302288335, iteration: 195130
loss: 1.1302884817123413,grad_norm: 0.9999996465437404, iteration: 195131
loss: 0.9848095774650574,grad_norm: 0.8703513548220944, iteration: 195132
loss: 1.0198055505752563,grad_norm: 0.9258546678800821, iteration: 195133
loss: 0.9731504917144775,grad_norm: 0.759683493269066, iteration: 195134
loss: 1.0089116096496582,grad_norm: 0.9999992772355979, iteration: 195135
loss: 0.9816165566444397,grad_norm: 0.9999992250055588, iteration: 195136
loss: 0.9974237680435181,grad_norm: 0.9999990684248589, iteration: 195137
loss: 0.9892720580101013,grad_norm: 0.9852995371174671, iteration: 195138
loss: 1.0414425134658813,grad_norm: 0.9999991993139998, iteration: 195139
loss: 1.0090771913528442,grad_norm: 0.9999992239048886, iteration: 195140
loss: 0.9663925170898438,grad_norm: 0.9240924955158341, iteration: 195141
loss: 0.9510160684585571,grad_norm: 0.892169384794355, iteration: 195142
loss: 0.9720600247383118,grad_norm: 0.9929959388989215, iteration: 195143
loss: 0.9933590888977051,grad_norm: 0.9999990558652397, iteration: 195144
loss: 0.9890031218528748,grad_norm: 0.9329443116967597, iteration: 195145
loss: 0.9631463885307312,grad_norm: 0.9629894777619515, iteration: 195146
loss: 0.9731531739234924,grad_norm: 0.914256417938888, iteration: 195147
loss: 0.9960366487503052,grad_norm: 0.9999990522484441, iteration: 195148
loss: 0.9933233261108398,grad_norm: 0.9999992791023236, iteration: 195149
loss: 1.0138037204742432,grad_norm: 0.9999990314496306, iteration: 195150
loss: 1.1130602359771729,grad_norm: 0.999999360934489, iteration: 195151
loss: 1.2015053033828735,grad_norm: 0.9999998289931601, iteration: 195152
loss: 1.0648760795593262,grad_norm: 0.9141384120408673, iteration: 195153
loss: 1.0311717987060547,grad_norm: 0.951231338112257, iteration: 195154
loss: 0.9698302149772644,grad_norm: 0.88609412153095, iteration: 195155
loss: 0.9745329022407532,grad_norm: 0.9999989199031978, iteration: 195156
loss: 1.1815156936645508,grad_norm: 0.99999962754286, iteration: 195157
loss: 0.9687464833259583,grad_norm: 0.829412979322857, iteration: 195158
loss: 0.9903267025947571,grad_norm: 0.8831770151462182, iteration: 195159
loss: 1.0578817129135132,grad_norm: 0.9999995912612069, iteration: 195160
loss: 1.0263947248458862,grad_norm: 0.9754709270208252, iteration: 195161
loss: 0.9816704392433167,grad_norm: 0.9624951667609544, iteration: 195162
loss: 1.0033001899719238,grad_norm: 0.9999991603174386, iteration: 195163
loss: 1.1265852451324463,grad_norm: 0.9999990583873589, iteration: 195164
loss: 0.9884152412414551,grad_norm: 0.9999990127677227, iteration: 195165
loss: 0.9621261358261108,grad_norm: 0.9995538211635036, iteration: 195166
loss: 1.1869200468063354,grad_norm: 0.9999998338892074, iteration: 195167
loss: 0.9799767136573792,grad_norm: 0.9999991519941532, iteration: 195168
loss: 1.0100886821746826,grad_norm: 0.9999998361083227, iteration: 195169
loss: 0.9758961200714111,grad_norm: 0.8842686884151933, iteration: 195170
loss: 1.0516843795776367,grad_norm: 0.9999996346503263, iteration: 195171
loss: 1.0933741331100464,grad_norm: 0.9999998679987617, iteration: 195172
loss: 1.0572642087936401,grad_norm: 0.999999004934214, iteration: 195173
loss: 1.033139705657959,grad_norm: 0.9999991612498802, iteration: 195174
loss: 1.0433387756347656,grad_norm: 0.9999994944918313, iteration: 195175
loss: 1.0179641246795654,grad_norm: 0.9685819692768267, iteration: 195176
loss: 1.0798439979553223,grad_norm: 0.9999993540238173, iteration: 195177
loss: 1.0147786140441895,grad_norm: 0.9999993059909874, iteration: 195178
loss: 1.0227991342544556,grad_norm: 0.9999991732904772, iteration: 195179
loss: 0.9782767295837402,grad_norm: 0.9141948047959336, iteration: 195180
loss: 1.1374775171279907,grad_norm: 0.9999992342433475, iteration: 195181
loss: 1.0262272357940674,grad_norm: 0.9631152876590878, iteration: 195182
loss: 1.0005959272384644,grad_norm: 0.9605212487577732, iteration: 195183
loss: 1.0092852115631104,grad_norm: 0.9999991185124856, iteration: 195184
loss: 1.0312129259109497,grad_norm: 0.9999990925472949, iteration: 195185
loss: 0.9580953121185303,grad_norm: 0.999999169565865, iteration: 195186
loss: 1.0609004497528076,grad_norm: 0.999999157213088, iteration: 195187
loss: 1.014288306236267,grad_norm: 0.8594640715854724, iteration: 195188
loss: 1.0362052917480469,grad_norm: 0.9999993330897253, iteration: 195189
loss: 1.0949846506118774,grad_norm: 0.9999996558761487, iteration: 195190
loss: 0.9864227175712585,grad_norm: 0.9999990164830637, iteration: 195191
loss: 1.0433464050292969,grad_norm: 0.9960704003591626, iteration: 195192
loss: 0.9871417284011841,grad_norm: 0.9999991369779394, iteration: 195193
loss: 1.0004810094833374,grad_norm: 0.9232418649108615, iteration: 195194
loss: 1.0945454835891724,grad_norm: 0.9999998826916008, iteration: 195195
loss: 0.9637662768363953,grad_norm: 0.8775049921852264, iteration: 195196
loss: 1.0683684349060059,grad_norm: 0.919424638759034, iteration: 195197
loss: 0.989910364151001,grad_norm: 0.7828562640936865, iteration: 195198
loss: 0.9978676438331604,grad_norm: 0.9347074121835979, iteration: 195199
loss: 1.0629724264144897,grad_norm: 0.9999991847537109, iteration: 195200
loss: 1.0650752782821655,grad_norm: 0.9999999529381608, iteration: 195201
loss: 1.0366933345794678,grad_norm: 0.9999992311667616, iteration: 195202
loss: 1.1354697942733765,grad_norm: 0.9999999453067013, iteration: 195203
loss: 0.9836015105247498,grad_norm: 0.9157247205235536, iteration: 195204
loss: 1.076646327972412,grad_norm: 0.9999996131986829, iteration: 195205
loss: 1.0297377109527588,grad_norm: 0.9999995939523947, iteration: 195206
loss: 1.1109811067581177,grad_norm: 0.9999995566027771, iteration: 195207
loss: 1.0223362445831299,grad_norm: 0.9616832232977571, iteration: 195208
loss: 1.0315005779266357,grad_norm: 0.9999992314372969, iteration: 195209
loss: 1.0126038789749146,grad_norm: 0.9999997228543203, iteration: 195210
loss: 0.9784616231918335,grad_norm: 0.9347752419877677, iteration: 195211
loss: 1.070557951927185,grad_norm: 0.9999996838278015, iteration: 195212
loss: 1.0631479024887085,grad_norm: 0.9967098742300418, iteration: 195213
loss: 0.9983283877372742,grad_norm: 0.9999990185450922, iteration: 195214
loss: 1.0627774000167847,grad_norm: 0.9999995022079342, iteration: 195215
loss: 1.0899851322174072,grad_norm: 0.9999997754040939, iteration: 195216
loss: 1.0930514335632324,grad_norm: 0.9999993989886851, iteration: 195217
loss: 0.9747446179389954,grad_norm: 0.8966922934148328, iteration: 195218
loss: 0.977234959602356,grad_norm: 0.9999990914711413, iteration: 195219
loss: 1.0072658061981201,grad_norm: 0.9999991595071875, iteration: 195220
loss: 1.0390087366104126,grad_norm: 0.9999998053399893, iteration: 195221
loss: 1.0410635471343994,grad_norm: 0.9999998940870566, iteration: 195222
loss: 1.0453609228134155,grad_norm: 0.9999992201956696, iteration: 195223
loss: 1.0630887746810913,grad_norm: 0.9999992463516123, iteration: 195224
loss: 1.007328748703003,grad_norm: 0.9999995616329893, iteration: 195225
loss: 0.9934390783309937,grad_norm: 0.9999991161527252, iteration: 195226
loss: 1.0102773904800415,grad_norm: 0.9440211460473648, iteration: 195227
loss: 0.9829411506652832,grad_norm: 0.8897633774048878, iteration: 195228
loss: 1.036254644393921,grad_norm: 0.9999990692271056, iteration: 195229
loss: 1.016457438468933,grad_norm: 0.8154693096702367, iteration: 195230
loss: 1.102599024772644,grad_norm: 0.9999992836377867, iteration: 195231
loss: 1.0273211002349854,grad_norm: 0.9999992564686899, iteration: 195232
loss: 1.0093557834625244,grad_norm: 0.9999994424489468, iteration: 195233
loss: 1.0015511512756348,grad_norm: 0.9999990502469804, iteration: 195234
loss: 0.9758569598197937,grad_norm: 0.9332607184708959, iteration: 195235
loss: 1.0248253345489502,grad_norm: 0.8279929106012224, iteration: 195236
loss: 1.1053597927093506,grad_norm: 0.9999994044222813, iteration: 195237
loss: 1.0018975734710693,grad_norm: 0.9179491834770034, iteration: 195238
loss: 0.997836172580719,grad_norm: 0.9999993264430301, iteration: 195239
loss: 0.9740164875984192,grad_norm: 0.780901339407866, iteration: 195240
loss: 0.983306884765625,grad_norm: 0.9744386805319526, iteration: 195241
loss: 1.0211515426635742,grad_norm: 0.7761870335086333, iteration: 195242
loss: 1.0003478527069092,grad_norm: 0.9287932514278415, iteration: 195243
loss: 0.9793919324874878,grad_norm: 0.9717271116349587, iteration: 195244
loss: 1.0074433088302612,grad_norm: 0.9618016949398707, iteration: 195245
loss: 0.9770752191543579,grad_norm: 0.9999990937963599, iteration: 195246
loss: 1.0319222211837769,grad_norm: 0.9999991409072594, iteration: 195247
loss: 1.1680570840835571,grad_norm: 0.9999994403740512, iteration: 195248
loss: 1.2307424545288086,grad_norm: 0.9999996377282134, iteration: 195249
loss: 1.0006492137908936,grad_norm: 0.9884822115736457, iteration: 195250
loss: 1.0658555030822754,grad_norm: 0.999999124885955, iteration: 195251
loss: 1.010932445526123,grad_norm: 0.8560191950553562, iteration: 195252
loss: 1.0498813390731812,grad_norm: 0.99999985900221, iteration: 195253
loss: 1.0236632823944092,grad_norm: 0.9529584673955376, iteration: 195254
loss: 1.0023372173309326,grad_norm: 0.9999991369901642, iteration: 195255
loss: 1.0021926164627075,grad_norm: 0.8960881906636096, iteration: 195256
loss: 0.9681802988052368,grad_norm: 0.9613052408148731, iteration: 195257
loss: 1.0031077861785889,grad_norm: 0.9559260187376168, iteration: 195258
loss: 1.0036331415176392,grad_norm: 0.9999992367978554, iteration: 195259
loss: 0.9789004325866699,grad_norm: 0.977054935616412, iteration: 195260
loss: 1.0010077953338623,grad_norm: 0.9393534197163126, iteration: 195261
loss: 1.0211493968963623,grad_norm: 0.9999990826496082, iteration: 195262
loss: 0.9677239060401917,grad_norm: 0.9130352757282298, iteration: 195263
loss: 1.0078368186950684,grad_norm: 0.999998994126906, iteration: 195264
loss: 1.0158218145370483,grad_norm: 0.9999993142953649, iteration: 195265
loss: 1.0131417512893677,grad_norm: 0.8509283358267102, iteration: 195266
loss: 0.990309476852417,grad_norm: 0.9999992891226285, iteration: 195267
loss: 0.979935884475708,grad_norm: 0.9408937833933988, iteration: 195268
loss: 1.0154519081115723,grad_norm: 0.9999992097127116, iteration: 195269
loss: 1.0620577335357666,grad_norm: 0.999999130934437, iteration: 195270
loss: 1.0114372968673706,grad_norm: 0.959391728705038, iteration: 195271
loss: 0.9919663071632385,grad_norm: 0.9999990936987959, iteration: 195272
loss: 1.0442157983779907,grad_norm: 0.94427929968574, iteration: 195273
loss: 1.0343306064605713,grad_norm: 0.9999991221457752, iteration: 195274
loss: 1.0000828504562378,grad_norm: 0.9999988678826232, iteration: 195275
loss: 1.0167707204818726,grad_norm: 0.9999990766150184, iteration: 195276
loss: 1.1307930946350098,grad_norm: 0.9999998648107162, iteration: 195277
loss: 1.0149677991867065,grad_norm: 0.999999069377742, iteration: 195278
loss: 1.007339358329773,grad_norm: 0.9999993464915302, iteration: 195279
loss: 1.0217849016189575,grad_norm: 0.8855766960495971, iteration: 195280
loss: 0.9989960193634033,grad_norm: 0.999999239144245, iteration: 195281
loss: 0.9827038049697876,grad_norm: 0.9790424071739251, iteration: 195282
loss: 1.0056369304656982,grad_norm: 0.9309799790478565, iteration: 195283
loss: 1.003238320350647,grad_norm: 0.8295263362741613, iteration: 195284
loss: 1.0059099197387695,grad_norm: 0.9975423048703309, iteration: 195285
loss: 0.9803451299667358,grad_norm: 0.819439543389002, iteration: 195286
loss: 1.042153239250183,grad_norm: 0.9040935614247438, iteration: 195287
loss: 0.9908928871154785,grad_norm: 0.8731700043897523, iteration: 195288
loss: 1.0378354787826538,grad_norm: 0.9999997588096838, iteration: 195289
loss: 0.9940217733383179,grad_norm: 0.9999992456755407, iteration: 195290
loss: 0.9890848994255066,grad_norm: 0.9999992374442019, iteration: 195291
loss: 1.011283278465271,grad_norm: 0.8602425743388381, iteration: 195292
loss: 1.0144031047821045,grad_norm: 0.9999992600599219, iteration: 195293
loss: 0.9733948111534119,grad_norm: 0.9999991608542428, iteration: 195294
loss: 1.0174624919891357,grad_norm: 0.8404702547309967, iteration: 195295
loss: 1.043562412261963,grad_norm: 0.9903489074009573, iteration: 195296
loss: 1.0061196088790894,grad_norm: 0.999998976716855, iteration: 195297
loss: 1.0179164409637451,grad_norm: 0.9999992743213271, iteration: 195298
loss: 0.957912027835846,grad_norm: 0.9999991154259328, iteration: 195299
loss: 0.9772487282752991,grad_norm: 0.9341855950725018, iteration: 195300
loss: 1.0080727338790894,grad_norm: 0.8908937734527311, iteration: 195301
loss: 1.0536640882492065,grad_norm: 0.9999991405143991, iteration: 195302
loss: 1.0234057903289795,grad_norm: 0.8526384446159951, iteration: 195303
loss: 1.0069146156311035,grad_norm: 0.9041384030461476, iteration: 195304
loss: 1.0334117412567139,grad_norm: 0.9999993530658332, iteration: 195305
loss: 1.0217784643173218,grad_norm: 0.8679854484196858, iteration: 195306
loss: 1.0676195621490479,grad_norm: 0.9999989123144336, iteration: 195307
loss: 1.0489245653152466,grad_norm: 0.9999995442326308, iteration: 195308
loss: 1.0410912036895752,grad_norm: 0.9999990929444486, iteration: 195309
loss: 1.0234746932983398,grad_norm: 0.9999990194405162, iteration: 195310
loss: 1.1501468420028687,grad_norm: 0.999999443924161, iteration: 195311
loss: 0.9590917229652405,grad_norm: 0.9999991142959235, iteration: 195312
loss: 0.990563690662384,grad_norm: 0.9193566933466756, iteration: 195313
loss: 1.0306692123413086,grad_norm: 0.8931824392320624, iteration: 195314
loss: 1.0005446672439575,grad_norm: 0.9999991031870866, iteration: 195315
loss: 0.9758586287498474,grad_norm: 0.8819056835925846, iteration: 195316
loss: 1.1449817419052124,grad_norm: 0.9999992861055353, iteration: 195317
loss: 1.116356611251831,grad_norm: 0.9999991199589804, iteration: 195318
loss: 1.0030186176300049,grad_norm: 0.9999990505253217, iteration: 195319
loss: 1.0189504623413086,grad_norm: 0.9999991226051412, iteration: 195320
loss: 0.9750822186470032,grad_norm: 0.8457617736842965, iteration: 195321
loss: 0.9974403977394104,grad_norm: 0.9999991104061681, iteration: 195322
loss: 1.0032752752304077,grad_norm: 0.9999991640092065, iteration: 195323
loss: 0.9962674975395203,grad_norm: 0.8187189247761066, iteration: 195324
loss: 0.9778923392295837,grad_norm: 0.8821004702210553, iteration: 195325
loss: 1.0730247497558594,grad_norm: 0.9999991705729467, iteration: 195326
loss: 0.9785022735595703,grad_norm: 0.999999177149777, iteration: 195327
loss: 1.0210168361663818,grad_norm: 0.9999994104422478, iteration: 195328
loss: 1.0056254863739014,grad_norm: 0.9999990777717539, iteration: 195329
loss: 1.018616795539856,grad_norm: 0.9769371105239183, iteration: 195330
loss: 1.0280840396881104,grad_norm: 0.9977205697224714, iteration: 195331
loss: 0.9607155919075012,grad_norm: 0.9757756521319552, iteration: 195332
loss: 0.9988377094268799,grad_norm: 0.8342695548603439, iteration: 195333
loss: 0.9930006861686707,grad_norm: 0.8435153963491796, iteration: 195334
loss: 1.0336894989013672,grad_norm: 0.9999990514272168, iteration: 195335
loss: 0.9696642756462097,grad_norm: 0.8234192961653845, iteration: 195336
loss: 1.021809697151184,grad_norm: 0.9999993197479397, iteration: 195337
loss: 0.9934492111206055,grad_norm: 0.9999990619615651, iteration: 195338
loss: 1.0308752059936523,grad_norm: 0.9999991823859435, iteration: 195339
loss: 0.9493900537490845,grad_norm: 0.9999991606321725, iteration: 195340
loss: 0.962130069732666,grad_norm: 0.9698664050144172, iteration: 195341
loss: 1.028329610824585,grad_norm: 0.999999269615913, iteration: 195342
loss: 0.9987221956253052,grad_norm: 0.9999991060379824, iteration: 195343
loss: 0.9951685667037964,grad_norm: 0.9749924898880516, iteration: 195344
loss: 1.0346373319625854,grad_norm: 0.9999996039439254, iteration: 195345
loss: 1.016941785812378,grad_norm: 0.9999992121188416, iteration: 195346
loss: 1.003006935119629,grad_norm: 0.9999989902496838, iteration: 195347
loss: 0.9874936938285828,grad_norm: 0.9999991341986623, iteration: 195348
loss: 0.9715994596481323,grad_norm: 0.9999990408121793, iteration: 195349
loss: 1.0929814577102661,grad_norm: 0.999999785500446, iteration: 195350
loss: 1.0100438594818115,grad_norm: 0.9000030680804275, iteration: 195351
loss: 0.9933323860168457,grad_norm: 0.8806120255358173, iteration: 195352
loss: 1.0097143650054932,grad_norm: 0.9999991102672199, iteration: 195353
loss: 0.9740980267524719,grad_norm: 0.9999991838918896, iteration: 195354
loss: 0.9874398708343506,grad_norm: 0.9754227890078847, iteration: 195355
loss: 1.1183212995529175,grad_norm: 0.9999998523974096, iteration: 195356
loss: 1.0436676740646362,grad_norm: 0.9999991147625978, iteration: 195357
loss: 0.9570396542549133,grad_norm: 0.9999992235225819, iteration: 195358
loss: 1.0293164253234863,grad_norm: 0.9999996555056173, iteration: 195359
loss: 1.1033998727798462,grad_norm: 0.9999991050286541, iteration: 195360
loss: 0.9966268539428711,grad_norm: 0.9999990865539051, iteration: 195361
loss: 0.9897695779800415,grad_norm: 0.9343477668071191, iteration: 195362
loss: 1.029787302017212,grad_norm: 0.9734346515864452, iteration: 195363
loss: 1.0171501636505127,grad_norm: 0.9999998479784031, iteration: 195364
loss: 1.003684401512146,grad_norm: 0.9999992669026008, iteration: 195365
loss: 1.0456544160842896,grad_norm: 0.9999994556405635, iteration: 195366
loss: 0.9899302124977112,grad_norm: 0.9999991155527421, iteration: 195367
loss: 0.9766842126846313,grad_norm: 0.7924071432502783, iteration: 195368
loss: 1.087841272354126,grad_norm: 0.9999997014568833, iteration: 195369
loss: 0.9661163091659546,grad_norm: 0.86876338025977, iteration: 195370
loss: 0.9976863861083984,grad_norm: 0.989096693311124, iteration: 195371
loss: 1.0129342079162598,grad_norm: 0.9999996747139166, iteration: 195372
loss: 1.0403327941894531,grad_norm: 0.7715946798097453, iteration: 195373
loss: 0.991725742816925,grad_norm: 0.9999991839848993, iteration: 195374
loss: 0.9767898917198181,grad_norm: 0.9376046730571033, iteration: 195375
loss: 0.9934291243553162,grad_norm: 0.9862989273753998, iteration: 195376
loss: 1.0229225158691406,grad_norm: 0.9901789744310918, iteration: 195377
loss: 0.9962818622589111,grad_norm: 0.8098827081248762, iteration: 195378
loss: 1.0282846689224243,grad_norm: 0.9720295700709349, iteration: 195379
loss: 0.9828345775604248,grad_norm: 0.8266040005165924, iteration: 195380
loss: 0.9814733862876892,grad_norm: 0.9999991154349401, iteration: 195381
loss: 0.9650418162345886,grad_norm: 0.8102469019924479, iteration: 195382
loss: 1.0496673583984375,grad_norm: 0.9999991049528405, iteration: 195383
loss: 1.0144845247268677,grad_norm: 0.8429016222686663, iteration: 195384
loss: 1.0147873163223267,grad_norm: 0.9999991543972432, iteration: 195385
loss: 1.0239804983139038,grad_norm: 0.9999991731599717, iteration: 195386
loss: 0.9831587672233582,grad_norm: 0.9999991867257759, iteration: 195387
loss: 1.0181862115859985,grad_norm: 0.9445677333241456, iteration: 195388
loss: 1.008440613746643,grad_norm: 0.7402956173245641, iteration: 195389
loss: 1.0149637460708618,grad_norm: 0.814210325177375, iteration: 195390
loss: 1.0440762042999268,grad_norm: 0.9999992158379012, iteration: 195391
loss: 0.993061363697052,grad_norm: 0.9999992439613979, iteration: 195392
loss: 1.1003572940826416,grad_norm: 0.999999959872595, iteration: 195393
loss: 1.0089424848556519,grad_norm: 0.9999991258675284, iteration: 195394
loss: 1.0281574726104736,grad_norm: 0.9999992170247788, iteration: 195395
loss: 0.9753865003585815,grad_norm: 0.9999991408387584, iteration: 195396
loss: 0.9762917160987854,grad_norm: 0.999998938996516, iteration: 195397
loss: 1.0199354887008667,grad_norm: 0.9999992444031446, iteration: 195398
loss: 0.9995437860488892,grad_norm: 0.9999991446718588, iteration: 195399
loss: 0.9951269626617432,grad_norm: 0.9999991493753565, iteration: 195400
loss: 0.996882975101471,grad_norm: 0.999999332984412, iteration: 195401
loss: 1.040147066116333,grad_norm: 0.9999994222750972, iteration: 195402
loss: 1.0066335201263428,grad_norm: 0.9999990109023658, iteration: 195403
loss: 1.0957705974578857,grad_norm: 0.983960439852636, iteration: 195404
loss: 1.1163221597671509,grad_norm: 0.9999998725236763, iteration: 195405
loss: 1.0174494981765747,grad_norm: 0.9999989993566122, iteration: 195406
loss: 1.2040878534317017,grad_norm: 0.9999998063837152, iteration: 195407
loss: 1.0098333358764648,grad_norm: 0.9999991085860575, iteration: 195408
loss: 0.9723541140556335,grad_norm: 0.9999992391836611, iteration: 195409
loss: 1.1516033411026,grad_norm: 0.9657079148112939, iteration: 195410
loss: 1.089535117149353,grad_norm: 0.9999998284268435, iteration: 195411
loss: 1.0099788904190063,grad_norm: 0.999999185911622, iteration: 195412
loss: 1.1347988843917847,grad_norm: 0.9999992293015764, iteration: 195413
loss: 1.0117957592010498,grad_norm: 0.999999107625963, iteration: 195414
loss: 1.0180047750473022,grad_norm: 0.999999695537094, iteration: 195415
loss: 0.9960746765136719,grad_norm: 0.9903363961349861, iteration: 195416
loss: 1.3638979196548462,grad_norm: 0.999999703029398, iteration: 195417
loss: 1.3203233480453491,grad_norm: 0.9999997888530432, iteration: 195418
loss: 1.0753147602081299,grad_norm: 0.9705565745171613, iteration: 195419
loss: 1.1349188089370728,grad_norm: 0.9999992452040237, iteration: 195420
loss: 1.0713937282562256,grad_norm: 0.9999996634529826, iteration: 195421
loss: 1.0289629697799683,grad_norm: 0.9110542396089565, iteration: 195422
loss: 1.1111690998077393,grad_norm: 0.9999989932908602, iteration: 195423
loss: 1.1231532096862793,grad_norm: 0.9999993079890735, iteration: 195424
loss: 1.3015459775924683,grad_norm: 0.9999994588549914, iteration: 195425
loss: 1.1237077713012695,grad_norm: 0.9999991345386954, iteration: 195426
loss: 1.1062726974487305,grad_norm: 0.9999991877681241, iteration: 195427
loss: 1.4381684064865112,grad_norm: 0.9999997999105218, iteration: 195428
loss: 1.1210397481918335,grad_norm: 0.9999994083906515, iteration: 195429
loss: 1.0771358013153076,grad_norm: 1.0000001047409501, iteration: 195430
loss: 1.0576858520507812,grad_norm: 0.9999989705801383, iteration: 195431
loss: 1.3272147178649902,grad_norm: 0.9999999728768931, iteration: 195432
loss: 1.1588958501815796,grad_norm: 0.9999997815157082, iteration: 195433
loss: 1.0086772441864014,grad_norm: 0.9999990827399106, iteration: 195434
loss: 1.0246464014053345,grad_norm: 0.9687660031449398, iteration: 195435
loss: 1.1484826803207397,grad_norm: 0.9999992037216373, iteration: 195436
loss: 1.1481688022613525,grad_norm: 1.0000000190561817, iteration: 195437
loss: 1.0776137113571167,grad_norm: 0.9999991442879189, iteration: 195438
loss: 1.0035550594329834,grad_norm: 0.9148365691536045, iteration: 195439
loss: 1.0487889051437378,grad_norm: 0.9956597112866564, iteration: 195440
loss: 1.0417360067367554,grad_norm: 0.9999993246753304, iteration: 195441
loss: 1.0559656620025635,grad_norm: 0.9999997307935168, iteration: 195442
loss: 1.055802822113037,grad_norm: 0.9999991340140447, iteration: 195443
loss: 1.0069319009780884,grad_norm: 0.9999994081548449, iteration: 195444
loss: 1.0544761419296265,grad_norm: 0.9999993589695542, iteration: 195445
loss: 1.156052827835083,grad_norm: 0.999999324161744, iteration: 195446
loss: 0.9992839097976685,grad_norm: 0.9999992289183247, iteration: 195447
loss: 0.9978287816047668,grad_norm: 0.9999992570072184, iteration: 195448
loss: 1.5921026468276978,grad_norm: 0.9999999647920333, iteration: 195449
loss: 1.021465539932251,grad_norm: 0.9999995630034848, iteration: 195450
loss: 1.1124786138534546,grad_norm: 0.9999995156229042, iteration: 195451
loss: 1.1049747467041016,grad_norm: 0.9999993758522375, iteration: 195452
loss: 1.2398672103881836,grad_norm: 0.9999997565281906, iteration: 195453
loss: 0.9836624264717102,grad_norm: 0.999999086648625, iteration: 195454
loss: 0.9988211393356323,grad_norm: 0.9999998398750631, iteration: 195455
loss: 0.9834259152412415,grad_norm: 0.9860699065435493, iteration: 195456
loss: 1.178011178970337,grad_norm: 0.9999997710364964, iteration: 195457
loss: 1.0292867422103882,grad_norm: 0.9999992669932379, iteration: 195458
loss: 1.0395276546478271,grad_norm: 0.9999992655129524, iteration: 195459
loss: 1.0246074199676514,grad_norm: 0.9999993399788516, iteration: 195460
loss: 1.0112688541412354,grad_norm: 0.9747761239510833, iteration: 195461
loss: 1.0009962320327759,grad_norm: 0.8952258686138767, iteration: 195462
loss: 1.0009247064590454,grad_norm: 0.9193666181971676, iteration: 195463
loss: 1.0331735610961914,grad_norm: 0.9999990854561152, iteration: 195464
loss: 1.0311176776885986,grad_norm: 0.9660043816637243, iteration: 195465
loss: 1.0234870910644531,grad_norm: 0.9999994701237847, iteration: 195466
loss: 1.0580939054489136,grad_norm: 0.9999990613325694, iteration: 195467
loss: 1.035407543182373,grad_norm: 0.9999993075531273, iteration: 195468
loss: 1.094594120979309,grad_norm: 0.999999629222242, iteration: 195469
loss: 0.9851168394088745,grad_norm: 0.9923490000167995, iteration: 195470
loss: 0.9952651262283325,grad_norm: 0.8960334102692065, iteration: 195471
loss: 1.0143991708755493,grad_norm: 0.7973500046044305, iteration: 195472
loss: 1.0397121906280518,grad_norm: 0.9999997052709172, iteration: 195473
loss: 1.0205063819885254,grad_norm: 0.8709439395380634, iteration: 195474
loss: 0.9831956624984741,grad_norm: 0.8426722352864893, iteration: 195475
loss: 1.0085102319717407,grad_norm: 0.9578393936267421, iteration: 195476
loss: 1.1439273357391357,grad_norm: 0.9999995356811283, iteration: 195477
loss: 1.0404958724975586,grad_norm: 0.9999999077907531, iteration: 195478
loss: 0.9501572251319885,grad_norm: 0.8047751134239155, iteration: 195479
loss: 1.165286898612976,grad_norm: 0.9999999277047793, iteration: 195480
loss: 1.1079202890396118,grad_norm: 0.9999999418817305, iteration: 195481
loss: 1.0728765726089478,grad_norm: 0.9999992155390216, iteration: 195482
loss: 1.0208566188812256,grad_norm: 0.9762610577050667, iteration: 195483
loss: 1.0927679538726807,grad_norm: 0.9999990997826492, iteration: 195484
loss: 1.123305320739746,grad_norm: 0.9999992819315947, iteration: 195485
loss: 1.1028863191604614,grad_norm: 0.9999991908256675, iteration: 195486
loss: 1.0143576860427856,grad_norm: 0.9554211968368996, iteration: 195487
loss: 1.3079761266708374,grad_norm: 0.9999999912776634, iteration: 195488
loss: 1.3214062452316284,grad_norm: 0.9999997536331936, iteration: 195489
loss: 1.0864149332046509,grad_norm: 0.9999997236894113, iteration: 195490
loss: 1.2234057188034058,grad_norm: 0.9999993820019738, iteration: 195491
loss: 1.3500064611434937,grad_norm: 0.9999997299506885, iteration: 195492
loss: 1.063805341720581,grad_norm: 0.9999997646939636, iteration: 195493
loss: 1.0535657405853271,grad_norm: 0.999999275827712, iteration: 195494
loss: 1.1558589935302734,grad_norm: 0.9999998188398315, iteration: 195495
loss: 1.1407665014266968,grad_norm: 0.9999998779048667, iteration: 195496
loss: 1.0151686668395996,grad_norm: 0.9544208071059929, iteration: 195497
loss: 1.1491752862930298,grad_norm: 0.9999998357392388, iteration: 195498
loss: 1.2110017538070679,grad_norm: 0.9999993697885762, iteration: 195499
loss: 1.0356547832489014,grad_norm: 0.9707171311125484, iteration: 195500
loss: 0.9960190057754517,grad_norm: 0.9999991624403417, iteration: 195501
loss: 1.3158347606658936,grad_norm: 0.9999999233910465, iteration: 195502
loss: 1.1657030582427979,grad_norm: 0.9999995177563806, iteration: 195503
loss: 1.023992896080017,grad_norm: 0.8736858995272074, iteration: 195504
loss: 1.1689788103103638,grad_norm: 0.9999999865308113, iteration: 195505
loss: 1.0911649465560913,grad_norm: 0.9999996093971021, iteration: 195506
loss: 1.030015468597412,grad_norm: 0.906196489869631, iteration: 195507
loss: 1.522329568862915,grad_norm: 0.9999998849581978, iteration: 195508
loss: 0.9880736470222473,grad_norm: 0.99999918893511, iteration: 195509
loss: 1.2799253463745117,grad_norm: 0.9999998485407938, iteration: 195510
loss: 1.1491737365722656,grad_norm: 0.9999997108668083, iteration: 195511
loss: 1.043395757675171,grad_norm: 0.9999990932013807, iteration: 195512
loss: 0.995952308177948,grad_norm: 0.9999997170047237, iteration: 195513
loss: 0.998936653137207,grad_norm: 0.9799216003186898, iteration: 195514
loss: 1.071221113204956,grad_norm: 0.9999994834633251, iteration: 195515
loss: 1.3161468505859375,grad_norm: 0.9999995109731216, iteration: 195516
loss: 1.1637808084487915,grad_norm: 0.9999995683927632, iteration: 195517
loss: 1.1654446125030518,grad_norm: 0.9999995072697603, iteration: 195518
loss: 0.9935383796691895,grad_norm: 0.9809371741383242, iteration: 195519
loss: 1.0128177404403687,grad_norm: 0.9994173199953579, iteration: 195520
loss: 0.9576241374015808,grad_norm: 0.9999991231670815, iteration: 195521
loss: 1.056381106376648,grad_norm: 0.999998890797079, iteration: 195522
loss: 1.099491000175476,grad_norm: 0.9716458699461378, iteration: 195523
loss: 1.0375220775604248,grad_norm: 0.9999991654155397, iteration: 195524
loss: 1.1338353157043457,grad_norm: 0.999999986239919, iteration: 195525
loss: 1.192333459854126,grad_norm: 0.9999994256012135, iteration: 195526
loss: 1.047664761543274,grad_norm: 0.8905624345954546, iteration: 195527
loss: 1.0285975933074951,grad_norm: 0.9672316071079612, iteration: 195528
loss: 0.9982752799987793,grad_norm: 0.9999991017458257, iteration: 195529
loss: 0.9798823595046997,grad_norm: 0.8364635801714907, iteration: 195530
loss: 0.963584840297699,grad_norm: 0.9999994652289212, iteration: 195531
loss: 1.222694993019104,grad_norm: 0.9999997444305049, iteration: 195532
loss: 0.9902075529098511,grad_norm: 0.9265950875484787, iteration: 195533
loss: 1.2024880647659302,grad_norm: 0.9999992761743262, iteration: 195534
loss: 1.0577421188354492,grad_norm: 0.8918738897571381, iteration: 195535
loss: 1.079469084739685,grad_norm: 0.9094098928329878, iteration: 195536
loss: 1.051171898841858,grad_norm: 0.9999996139036768, iteration: 195537
loss: 1.0803886651992798,grad_norm: 0.9999990879072401, iteration: 195538
loss: 0.9809514284133911,grad_norm: 0.8069876913984465, iteration: 195539
loss: 1.0062642097473145,grad_norm: 0.9999990401415717, iteration: 195540
loss: 1.0317195653915405,grad_norm: 0.9999996766148302, iteration: 195541
loss: 1.112502932548523,grad_norm: 1.0000000305381258, iteration: 195542
loss: 0.9865477681159973,grad_norm: 0.9999992497340612, iteration: 195543
loss: 1.1198543310165405,grad_norm: 0.9999994306923922, iteration: 195544
loss: 1.004609227180481,grad_norm: 0.9045563343862076, iteration: 195545
loss: 1.0651578903198242,grad_norm: 0.9999993030683044, iteration: 195546
loss: 1.0935014486312866,grad_norm: 0.9999997683791377, iteration: 195547
loss: 0.9786075949668884,grad_norm: 0.9323655647465864, iteration: 195548
loss: 0.9917543530464172,grad_norm: 0.9999992089100567, iteration: 195549
loss: 0.9987040162086487,grad_norm: 0.784318932976205, iteration: 195550
loss: 1.0638004541397095,grad_norm: 0.9999992429752618, iteration: 195551
loss: 0.9663413166999817,grad_norm: 0.8603371027178359, iteration: 195552
loss: 0.9846423864364624,grad_norm: 0.9733212209277424, iteration: 195553
loss: 1.087568759918213,grad_norm: 0.9999994309963255, iteration: 195554
loss: 1.0078356266021729,grad_norm: 0.9347591789358122, iteration: 195555
loss: 1.069082498550415,grad_norm: 0.9999992222739471, iteration: 195556
loss: 1.00253164768219,grad_norm: 0.9999990418050558, iteration: 195557
loss: 0.9504706859588623,grad_norm: 0.8836400548441219, iteration: 195558
loss: 0.9593880772590637,grad_norm: 0.9999992059077509, iteration: 195559
loss: 0.9807106256484985,grad_norm: 0.9503051879597258, iteration: 195560
loss: 0.988386332988739,grad_norm: 0.9999992119601419, iteration: 195561
loss: 1.0098108053207397,grad_norm: 0.8630995181206528, iteration: 195562
loss: 1.0123023986816406,grad_norm: 0.9999990086773608, iteration: 195563
loss: 0.9964768886566162,grad_norm: 0.804750637405404, iteration: 195564
loss: 0.9677892923355103,grad_norm: 0.9471915180996341, iteration: 195565
loss: 0.9963535070419312,grad_norm: 0.9999992434006181, iteration: 195566
loss: 0.9874154925346375,grad_norm: 0.847703392619247, iteration: 195567
loss: 1.0131371021270752,grad_norm: 0.9673840289271802, iteration: 195568
loss: 1.0502955913543701,grad_norm: 0.9173367513218453, iteration: 195569
loss: 1.0614992380142212,grad_norm: 0.9999992577188797, iteration: 195570
loss: 0.9804795980453491,grad_norm: 0.8803283063769606, iteration: 195571
loss: 0.9923310875892639,grad_norm: 0.9999991477357861, iteration: 195572
loss: 0.9652661085128784,grad_norm: 0.9999991458518289, iteration: 195573
loss: 0.9964908957481384,grad_norm: 0.9904217799199947, iteration: 195574
loss: 0.9565934538841248,grad_norm: 0.7674218978334497, iteration: 195575
loss: 1.0007798671722412,grad_norm: 0.9999991421003585, iteration: 195576
loss: 1.0300416946411133,grad_norm: 0.9999996028808258, iteration: 195577
loss: 1.0315625667572021,grad_norm: 0.9550758383727231, iteration: 195578
loss: 1.0290637016296387,grad_norm: 0.9999992085225116, iteration: 195579
loss: 0.999522864818573,grad_norm: 0.9984306746884105, iteration: 195580
loss: 1.0104135274887085,grad_norm: 0.8989280791376965, iteration: 195581
loss: 0.9528778195381165,grad_norm: 0.8754378554766369, iteration: 195582
loss: 1.0299415588378906,grad_norm: 0.9980890557502585, iteration: 195583
loss: 1.003594994544983,grad_norm: 0.9999993389682653, iteration: 195584
loss: 0.9717994928359985,grad_norm: 0.9999990330416411, iteration: 195585
loss: 1.050124168395996,grad_norm: 0.9999991540998292, iteration: 195586
loss: 1.0125476121902466,grad_norm: 0.9999994949471602, iteration: 195587
loss: 1.01237154006958,grad_norm: 0.999999228701125, iteration: 195588
loss: 1.0356072187423706,grad_norm: 0.9999991041902457, iteration: 195589
loss: 0.9627004265785217,grad_norm: 0.9363885520690429, iteration: 195590
loss: 1.0987919569015503,grad_norm: 0.912324251617239, iteration: 195591
loss: 1.031516432762146,grad_norm: 0.999999559929199, iteration: 195592
loss: 1.0235282182693481,grad_norm: 0.9999991428829607, iteration: 195593
loss: 1.0011115074157715,grad_norm: 0.9075155573226252, iteration: 195594
loss: 1.007156252861023,grad_norm: 0.9999993690369061, iteration: 195595
loss: 1.0680516958236694,grad_norm: 0.9096531564092127, iteration: 195596
loss: 0.9982802867889404,grad_norm: 0.9736595673026659, iteration: 195597
loss: 0.9839060306549072,grad_norm: 0.7284285925731162, iteration: 195598
loss: 0.9819853901863098,grad_norm: 0.9559085878423031, iteration: 195599
loss: 1.0164740085601807,grad_norm: 0.999999031084981, iteration: 195600
loss: 0.9839770197868347,grad_norm: 0.999999393707457, iteration: 195601
loss: 1.0066778659820557,grad_norm: 0.7599860660931762, iteration: 195602
loss: 0.9852092266082764,grad_norm: 0.9999991826645188, iteration: 195603
loss: 0.9301191568374634,grad_norm: 0.9018848530625656, iteration: 195604
loss: 0.9908053278923035,grad_norm: 0.9521446817687744, iteration: 195605
loss: 0.9777886867523193,grad_norm: 0.959916629517076, iteration: 195606
loss: 1.1075067520141602,grad_norm: 0.9616391053421389, iteration: 195607
loss: 0.9927486181259155,grad_norm: 0.9367372010612378, iteration: 195608
loss: 0.9791717529296875,grad_norm: 0.9578921583516713, iteration: 195609
loss: 0.9525692462921143,grad_norm: 0.9848923085067947, iteration: 195610
loss: 0.9995133876800537,grad_norm: 0.9145660810892835, iteration: 195611
loss: 1.01458740234375,grad_norm: 0.9999990587073514, iteration: 195612
loss: 1.0040349960327148,grad_norm: 0.9175845293506334, iteration: 195613
loss: 1.0475475788116455,grad_norm: 0.9999990560635338, iteration: 195614
loss: 0.9821103811264038,grad_norm: 0.9253999369295869, iteration: 195615
loss: 1.0097154378890991,grad_norm: 0.9026569641514428, iteration: 195616
loss: 1.0546684265136719,grad_norm: 0.9904436509091374, iteration: 195617
loss: 1.000268816947937,grad_norm: 0.9999992294957714, iteration: 195618
loss: 1.0688862800598145,grad_norm: 0.9418942654626818, iteration: 195619
loss: 1.020378828048706,grad_norm: 0.9574864500037955, iteration: 195620
loss: 0.9888761043548584,grad_norm: 0.8642542268190426, iteration: 195621
loss: 0.9602224826812744,grad_norm: 0.998993528497604, iteration: 195622
loss: 1.0005912780761719,grad_norm: 0.9999989969068254, iteration: 195623
loss: 0.9999982118606567,grad_norm: 0.9256842859575077, iteration: 195624
loss: 1.0013102293014526,grad_norm: 0.8908611481111918, iteration: 195625
loss: 0.9950243234634399,grad_norm: 0.8924475236385891, iteration: 195626
loss: 0.9907775521278381,grad_norm: 0.999999184343508, iteration: 195627
loss: 1.0037221908569336,grad_norm: 0.9999991067021247, iteration: 195628
loss: 1.000892162322998,grad_norm: 0.9999990707603372, iteration: 195629
loss: 1.011272668838501,grad_norm: 0.9999991546803891, iteration: 195630
loss: 1.027378797531128,grad_norm: 0.9600318506018094, iteration: 195631
loss: 1.0317484140396118,grad_norm: 0.9999990401937587, iteration: 195632
loss: 1.0134973526000977,grad_norm: 0.9679870775827031, iteration: 195633
loss: 1.0153300762176514,grad_norm: 0.9999990055293079, iteration: 195634
loss: 1.0834124088287354,grad_norm: 0.9999998417057717, iteration: 195635
loss: 1.0027515888214111,grad_norm: 0.9999991902804021, iteration: 195636
loss: 1.0148463249206543,grad_norm: 0.9999991489066735, iteration: 195637
loss: 1.0288891792297363,grad_norm: 0.9999991732898132, iteration: 195638
loss: 1.0003104209899902,grad_norm: 0.999999121346995, iteration: 195639
loss: 1.0082815885543823,grad_norm: 0.999999252582777, iteration: 195640
loss: 1.0276994705200195,grad_norm: 0.9999992637469169, iteration: 195641
loss: 0.977701723575592,grad_norm: 0.8837860891114966, iteration: 195642
loss: 1.0001096725463867,grad_norm: 0.9535767383698203, iteration: 195643
loss: 1.0195459127426147,grad_norm: 0.8036894923321082, iteration: 195644
loss: 1.096285343170166,grad_norm: 0.9999999486535734, iteration: 195645
loss: 0.9872086048126221,grad_norm: 0.9533488482216724, iteration: 195646
loss: 1.014521598815918,grad_norm: 0.9999995821238459, iteration: 195647
loss: 1.0036312341690063,grad_norm: 0.9999992997407962, iteration: 195648
loss: 1.0381906032562256,grad_norm: 0.8453530272645163, iteration: 195649
loss: 1.1343234777450562,grad_norm: 0.999999134471547, iteration: 195650
loss: 0.9900288581848145,grad_norm: 0.9744173747350146, iteration: 195651
loss: 0.9995561838150024,grad_norm: 0.9918556680114774, iteration: 195652
loss: 0.9933581948280334,grad_norm: 0.9728479585042459, iteration: 195653
loss: 1.061471939086914,grad_norm: 0.8549248106370516, iteration: 195654
loss: 0.9488967061042786,grad_norm: 0.9999992281401143, iteration: 195655
loss: 1.0238356590270996,grad_norm: 0.8981500516386058, iteration: 195656
loss: 1.0085500478744507,grad_norm: 0.9999991428423076, iteration: 195657
loss: 1.0090947151184082,grad_norm: 0.9999989588165723, iteration: 195658
loss: 1.015031099319458,grad_norm: 0.9999993342407943, iteration: 195659
loss: 1.0349562168121338,grad_norm: 0.9999995987585129, iteration: 195660
loss: 1.0002332925796509,grad_norm: 0.9399744579596071, iteration: 195661
loss: 0.9894246459007263,grad_norm: 0.9999991644608037, iteration: 195662
loss: 0.9492325782775879,grad_norm: 0.8546973677382386, iteration: 195663
loss: 1.0739264488220215,grad_norm: 0.8108093324000809, iteration: 195664
loss: 1.0251747369766235,grad_norm: 0.9540384048205844, iteration: 195665
loss: 1.020063042640686,grad_norm: 0.91537167889035, iteration: 195666
loss: 1.0085577964782715,grad_norm: 0.9999994117272148, iteration: 195667
loss: 0.9761847853660583,grad_norm: 0.9799986968281502, iteration: 195668
loss: 1.0030608177185059,grad_norm: 0.9999998708197692, iteration: 195669
loss: 1.0568081140518188,grad_norm: 0.9999991196705795, iteration: 195670
loss: 0.967457115650177,grad_norm: 0.9999990678079389, iteration: 195671
loss: 1.1405833959579468,grad_norm: 0.9999998945316411, iteration: 195672
loss: 0.9927144050598145,grad_norm: 0.8925026434919672, iteration: 195673
loss: 1.0031946897506714,grad_norm: 0.9730378714305963, iteration: 195674
loss: 1.0381808280944824,grad_norm: 0.9999993327590512, iteration: 195675
loss: 1.021436333656311,grad_norm: 0.8980443634540979, iteration: 195676
loss: 0.9561166763305664,grad_norm: 0.7039047206592519, iteration: 195677
loss: 1.0077016353607178,grad_norm: 0.846990958404555, iteration: 195678
loss: 0.9877608418464661,grad_norm: 0.9999991712102942, iteration: 195679
loss: 1.01207435131073,grad_norm: 0.9999991408164005, iteration: 195680
loss: 1.0167003870010376,grad_norm: 0.8276089902634272, iteration: 195681
loss: 1.0098347663879395,grad_norm: 0.9999989416479573, iteration: 195682
loss: 0.952218234539032,grad_norm: 0.8301910496341305, iteration: 195683
loss: 1.013552188873291,grad_norm: 0.9999990925010306, iteration: 195684
loss: 1.0245095491409302,grad_norm: 0.8438207435480385, iteration: 195685
loss: 0.9935111403465271,grad_norm: 0.9999991566171075, iteration: 195686
loss: 1.0757393836975098,grad_norm: 0.9999996278894927, iteration: 195687
loss: 0.9607109427452087,grad_norm: 0.9999990403636652, iteration: 195688
loss: 1.0297110080718994,grad_norm: 0.9999990142170143, iteration: 195689
loss: 0.9965507984161377,grad_norm: 0.9999992020835698, iteration: 195690
loss: 1.0031059980392456,grad_norm: 0.7643926104777012, iteration: 195691
loss: 1.0237621068954468,grad_norm: 0.9999994481907196, iteration: 195692
loss: 1.0740680694580078,grad_norm: 0.9999996266620524, iteration: 195693
loss: 0.9944201707839966,grad_norm: 0.7916710072599161, iteration: 195694
loss: 0.9957132935523987,grad_norm: 0.7704218344252323, iteration: 195695
loss: 1.0205073356628418,grad_norm: 0.9999990646595283, iteration: 195696
loss: 1.0165842771530151,grad_norm: 0.9999991437782116, iteration: 195697
loss: 1.0018826723098755,grad_norm: 0.7987220480740996, iteration: 195698
loss: 1.0142136812210083,grad_norm: 0.8443680091286995, iteration: 195699
loss: 1.0137104988098145,grad_norm: 0.8478186669843694, iteration: 195700
loss: 0.9683225750923157,grad_norm: 0.9999992029079078, iteration: 195701
loss: 1.010201096534729,grad_norm: 0.998486422685604, iteration: 195702
loss: 0.9970109462738037,grad_norm: 0.7458197052411902, iteration: 195703
loss: 0.9967836141586304,grad_norm: 0.987569035973437, iteration: 195704
loss: 0.9947524666786194,grad_norm: 0.9299411676789913, iteration: 195705
loss: 1.0001115798950195,grad_norm: 0.9999990650534625, iteration: 195706
loss: 0.9433982372283936,grad_norm: 0.831605249385602, iteration: 195707
loss: 1.0066626071929932,grad_norm: 0.9999991396266441, iteration: 195708
loss: 1.0042433738708496,grad_norm: 0.8179448133946896, iteration: 195709
loss: 0.9892774224281311,grad_norm: 0.7444584755377319, iteration: 195710
loss: 1.0179554224014282,grad_norm: 0.9697505828086418, iteration: 195711
loss: 1.0021084547042847,grad_norm: 0.9301772896989843, iteration: 195712
loss: 0.9844468235969543,grad_norm: 0.9999990440218218, iteration: 195713
loss: 0.9904783368110657,grad_norm: 0.9533363272851272, iteration: 195714
loss: 0.9731731414794922,grad_norm: 0.790242003447117, iteration: 195715
loss: 1.1269892454147339,grad_norm: 0.9999995572619877, iteration: 195716
loss: 1.0456444025039673,grad_norm: 0.9379472049148717, iteration: 195717
loss: 0.9760888814926147,grad_norm: 0.9771238465846984, iteration: 195718
loss: 1.0198287963867188,grad_norm: 0.9999990669010689, iteration: 195719
loss: 1.0281426906585693,grad_norm: 0.9291790650192492, iteration: 195720
loss: 1.0664838552474976,grad_norm: 0.9999997906539986, iteration: 195721
loss: 1.1332885026931763,grad_norm: 0.999999340032128, iteration: 195722
loss: 1.081329107284546,grad_norm: 0.999999889828557, iteration: 195723
loss: 1.0609503984451294,grad_norm: 0.9999992446859055, iteration: 195724
loss: 1.1884970664978027,grad_norm: 0.9999995539946879, iteration: 195725
loss: 0.975387454032898,grad_norm: 0.9999990974878561, iteration: 195726
loss: 0.9665684700012207,grad_norm: 0.9256418014627048, iteration: 195727
loss: 0.9949002265930176,grad_norm: 0.9999991073616962, iteration: 195728
loss: 0.9872064590454102,grad_norm: 0.8236222785052613, iteration: 195729
loss: 0.9798513054847717,grad_norm: 0.9661146971976444, iteration: 195730
loss: 1.0725783109664917,grad_norm: 0.999999254784295, iteration: 195731
loss: 0.9925036430358887,grad_norm: 0.799828770293025, iteration: 195732
loss: 1.011764645576477,grad_norm: 0.9498649676851115, iteration: 195733
loss: 1.0091904401779175,grad_norm: 0.9999992227265933, iteration: 195734
loss: 1.05954909324646,grad_norm: 0.9999994331776794, iteration: 195735
loss: 0.9791622757911682,grad_norm: 0.8806107094073076, iteration: 195736
loss: 1.0153940916061401,grad_norm: 0.9999992736150441, iteration: 195737
loss: 1.0587681531906128,grad_norm: 0.898044312004721, iteration: 195738
loss: 1.0241384506225586,grad_norm: 0.9999991148490224, iteration: 195739
loss: 1.0994738340377808,grad_norm: 0.999999141096643, iteration: 195740
loss: 1.002443790435791,grad_norm: 0.943067961195585, iteration: 195741
loss: 0.9955137968063354,grad_norm: 0.8440813257244543, iteration: 195742
loss: 1.0593948364257812,grad_norm: 0.9999996983698362, iteration: 195743
loss: 1.0463097095489502,grad_norm: 0.9040281398887653, iteration: 195744
loss: 1.036217212677002,grad_norm: 0.9999991586664543, iteration: 195745
loss: 1.1904895305633545,grad_norm: 0.9999999368316767, iteration: 195746
loss: 1.082045078277588,grad_norm: 0.9999998203344949, iteration: 195747
loss: 0.9597631692886353,grad_norm: 0.99999911689295, iteration: 195748
loss: 1.0042502880096436,grad_norm: 0.9999990641690257, iteration: 195749
loss: 1.0238465070724487,grad_norm: 1.0000000261111106, iteration: 195750
loss: 1.0893077850341797,grad_norm: 0.9621845901167305, iteration: 195751
loss: 0.9762823581695557,grad_norm: 0.9242606099242621, iteration: 195752
loss: 0.9536556005477905,grad_norm: 0.9748953716335579, iteration: 195753
loss: 0.9764978885650635,grad_norm: 0.9761698692250763, iteration: 195754
loss: 1.0181344747543335,grad_norm: 0.9100642511171759, iteration: 195755
loss: 1.1344069242477417,grad_norm: 0.9999993646665268, iteration: 195756
loss: 1.0097514390945435,grad_norm: 0.9226497547647188, iteration: 195757
loss: 1.0294570922851562,grad_norm: 0.9999990272476926, iteration: 195758
loss: 0.9821485280990601,grad_norm: 0.8385454164752535, iteration: 195759
loss: 1.1208438873291016,grad_norm: 0.9999995473859692, iteration: 195760
loss: 1.0680700540542603,grad_norm: 0.999999141994635, iteration: 195761
loss: 1.000256896018982,grad_norm: 0.9054511928092003, iteration: 195762
loss: 0.9796580076217651,grad_norm: 0.8604223342300775, iteration: 195763
loss: 1.007536768913269,grad_norm: 0.8810716138403624, iteration: 195764
loss: 1.1300593614578247,grad_norm: 0.9999993359025179, iteration: 195765
loss: 1.0270882844924927,grad_norm: 0.9437219891883192, iteration: 195766
loss: 1.038645625114441,grad_norm: 0.9999992117723953, iteration: 195767
loss: 1.0504710674285889,grad_norm: 0.9999992794725795, iteration: 195768
loss: 0.9983553290367126,grad_norm: 0.8624400379854039, iteration: 195769
loss: 1.0203150510787964,grad_norm: 0.836557569973556, iteration: 195770
loss: 0.9775562286376953,grad_norm: 0.9493378638661718, iteration: 195771
loss: 0.997148871421814,grad_norm: 0.9999990920134254, iteration: 195772
loss: 1.0264099836349487,grad_norm: 0.9999990938710142, iteration: 195773
loss: 1.0176173448562622,grad_norm: 0.8800535544566124, iteration: 195774
loss: 0.9871744513511658,grad_norm: 0.9181451795195249, iteration: 195775
loss: 1.0166089534759521,grad_norm: 0.8687395430741328, iteration: 195776
loss: 1.0195969343185425,grad_norm: 0.9999990690393081, iteration: 195777
loss: 1.0496686697006226,grad_norm: 0.9999991588919974, iteration: 195778
loss: 1.0214344263076782,grad_norm: 0.9999990633730512, iteration: 195779
loss: 1.1012729406356812,grad_norm: 0.9999997036464887, iteration: 195780
loss: 1.3236984014511108,grad_norm: 0.9999996112550378, iteration: 195781
loss: 0.9778861403465271,grad_norm: 0.9999989530091303, iteration: 195782
loss: 1.1909620761871338,grad_norm: 0.9999995424570531, iteration: 195783
loss: 1.0079790353775024,grad_norm: 0.9999991840948352, iteration: 195784
loss: 1.0201679468154907,grad_norm: 0.9999994197513087, iteration: 195785
loss: 1.0262866020202637,grad_norm: 0.9999992866070435, iteration: 195786
loss: 1.0047358274459839,grad_norm: 0.9999990324221503, iteration: 195787
loss: 1.0787431001663208,grad_norm: 0.9999998210591292, iteration: 195788
loss: 1.0360032320022583,grad_norm: 0.9999991907455152, iteration: 195789
loss: 1.0215195417404175,grad_norm: 0.9999994179227252, iteration: 195790
loss: 1.0121439695358276,grad_norm: 0.9999995653238194, iteration: 195791
loss: 1.0171215534210205,grad_norm: 0.9999991301152182, iteration: 195792
loss: 0.9883813858032227,grad_norm: 0.8656412279523817, iteration: 195793
loss: 0.9921882748603821,grad_norm: 0.9849659322351211, iteration: 195794
loss: 1.0386264324188232,grad_norm: 0.9999993690510735, iteration: 195795
loss: 0.991561233997345,grad_norm: 0.9955117103818332, iteration: 195796
loss: 0.98206627368927,grad_norm: 0.9080522743881503, iteration: 195797
loss: 1.0669792890548706,grad_norm: 0.9999993374782259, iteration: 195798
loss: 1.0053743124008179,grad_norm: 0.9714047470323672, iteration: 195799
loss: 1.015837550163269,grad_norm: 0.797864814241451, iteration: 195800
loss: 1.0480459928512573,grad_norm: 0.9678371918145693, iteration: 195801
loss: 0.9824846982955933,grad_norm: 0.9999991152029719, iteration: 195802
loss: 0.9845319390296936,grad_norm: 0.7901787354789483, iteration: 195803
loss: 0.9868685007095337,grad_norm: 0.9592376876750516, iteration: 195804
loss: 1.0210769176483154,grad_norm: 0.9999994498336892, iteration: 195805
loss: 0.9948501586914062,grad_norm: 0.9364494453418379, iteration: 195806
loss: 1.0765748023986816,grad_norm: 0.9999991730256627, iteration: 195807
loss: 1.0081942081451416,grad_norm: 0.8290405536939757, iteration: 195808
loss: 0.9922770857810974,grad_norm: 0.999999178794149, iteration: 195809
loss: 0.9859670996665955,grad_norm: 0.9591963707417346, iteration: 195810
loss: 1.0091408491134644,grad_norm: 0.9125689911393463, iteration: 195811
loss: 1.053795576095581,grad_norm: 0.9999993589063191, iteration: 195812
loss: 1.014873743057251,grad_norm: 0.9957235854267332, iteration: 195813
loss: 1.0087114572525024,grad_norm: 0.9999992558769191, iteration: 195814
loss: 0.9854363799095154,grad_norm: 0.9999992036492384, iteration: 195815
loss: 1.007196307182312,grad_norm: 0.9733870722694374, iteration: 195816
loss: 0.9839831590652466,grad_norm: 0.9847927310612806, iteration: 195817
loss: 1.0121538639068604,grad_norm: 0.9999995241676874, iteration: 195818
loss: 1.023100733757019,grad_norm: 0.9999989705142873, iteration: 195819
loss: 1.0343189239501953,grad_norm: 0.8337354844653313, iteration: 195820
loss: 1.0197299718856812,grad_norm: 0.886111676669329, iteration: 195821
loss: 1.0273057222366333,grad_norm: 0.9778922331994665, iteration: 195822
loss: 0.9504331946372986,grad_norm: 0.8936892346393034, iteration: 195823
loss: 1.017000436782837,grad_norm: 0.9999991146204401, iteration: 195824
loss: 1.0100617408752441,grad_norm: 0.947788865755773, iteration: 195825
loss: 0.9895761013031006,grad_norm: 0.9999994618456595, iteration: 195826
loss: 1.0060093402862549,grad_norm: 0.9119829701559838, iteration: 195827
loss: 0.9575271606445312,grad_norm: 0.8870211333788373, iteration: 195828
loss: 1.0643303394317627,grad_norm: 0.9342250364767692, iteration: 195829
loss: 0.9855175614356995,grad_norm: 0.9999991373297182, iteration: 195830
loss: 0.9902302622795105,grad_norm: 0.9999992034680018, iteration: 195831
loss: 1.0613070726394653,grad_norm: 0.8797237392509192, iteration: 195832
loss: 0.9972548484802246,grad_norm: 0.8895230727337831, iteration: 195833
loss: 1.0155102014541626,grad_norm: 0.9999990781271796, iteration: 195834
loss: 0.9970656037330627,grad_norm: 0.9598084534186822, iteration: 195835
loss: 1.0478291511535645,grad_norm: 0.9999991795654373, iteration: 195836
loss: 1.0507043600082397,grad_norm: 0.9999990930628497, iteration: 195837
loss: 0.9958608746528625,grad_norm: 0.9114558888675974, iteration: 195838
loss: 0.9947472214698792,grad_norm: 0.9999990210968476, iteration: 195839
loss: 0.9874353408813477,grad_norm: 0.9999998888320091, iteration: 195840
loss: 1.0294736623764038,grad_norm: 0.9999991899976816, iteration: 195841
loss: 0.9963284730911255,grad_norm: 0.9999991768688128, iteration: 195842
loss: 1.0386313199996948,grad_norm: 0.9999990839468994, iteration: 195843
loss: 1.0071475505828857,grad_norm: 0.9336958376611637, iteration: 195844
loss: 1.056533694267273,grad_norm: 0.9999990828692427, iteration: 195845
loss: 0.9978187680244446,grad_norm: 0.9457469592632124, iteration: 195846
loss: 0.9800347685813904,grad_norm: 0.9703651110729091, iteration: 195847
loss: 1.028403401374817,grad_norm: 0.999999202666606, iteration: 195848
loss: 1.013999342918396,grad_norm: 0.8839072156037265, iteration: 195849
loss: 0.9737012982368469,grad_norm: 0.9999991270265949, iteration: 195850
loss: 0.9624449014663696,grad_norm: 0.7372915484263877, iteration: 195851
loss: 0.9926180839538574,grad_norm: 0.979286523928972, iteration: 195852
loss: 1.005104422569275,grad_norm: 0.9999991772859753, iteration: 195853
loss: 1.007220983505249,grad_norm: 0.9999990631498518, iteration: 195854
loss: 1.1025043725967407,grad_norm: 0.9999996679031956, iteration: 195855
loss: 1.0153220891952515,grad_norm: 0.9999990504308298, iteration: 195856
loss: 1.0067405700683594,grad_norm: 0.9800348755510924, iteration: 195857
loss: 0.9447651505470276,grad_norm: 0.9395531629028364, iteration: 195858
loss: 1.077608346939087,grad_norm: 0.9999992815840337, iteration: 195859
loss: 1.1077516078948975,grad_norm: 0.923248818931561, iteration: 195860
loss: 1.0320277214050293,grad_norm: 0.9336761104778012, iteration: 195861
loss: 0.9571430087089539,grad_norm: 0.8652797009474782, iteration: 195862
loss: 0.9941415190696716,grad_norm: 0.7592776168571682, iteration: 195863
loss: 1.0719949007034302,grad_norm: 0.9999991884469878, iteration: 195864
loss: 1.0682704448699951,grad_norm: 0.999999379392235, iteration: 195865
loss: 1.0510834455490112,grad_norm: 0.9999994514314507, iteration: 195866
loss: 1.0453604459762573,grad_norm: 0.8623604895040877, iteration: 195867
loss: 1.0013329982757568,grad_norm: 0.7623972823456298, iteration: 195868
loss: 1.010132074356079,grad_norm: 0.9462325986462355, iteration: 195869
loss: 1.007059097290039,grad_norm: 0.8644653644501527, iteration: 195870
loss: 1.20071542263031,grad_norm: 0.9999993186674798, iteration: 195871
loss: 0.9673321843147278,grad_norm: 0.9329677009662323, iteration: 195872
loss: 1.0988638401031494,grad_norm: 0.9999992863819823, iteration: 195873
loss: 1.0582858324050903,grad_norm: 1.000000151973723, iteration: 195874
loss: 1.2079079151153564,grad_norm: 0.9999997683385075, iteration: 195875
loss: 1.0211116075515747,grad_norm: 0.8885538001684832, iteration: 195876
loss: 1.015928864479065,grad_norm: 0.8938522683718154, iteration: 195877
loss: 1.0008286237716675,grad_norm: 0.88561239852834, iteration: 195878
loss: 1.173116683959961,grad_norm: 0.9999993856720627, iteration: 195879
loss: 1.031245231628418,grad_norm: 0.888232169343104, iteration: 195880
loss: 1.0056324005126953,grad_norm: 0.9999990739821979, iteration: 195881
loss: 1.1182234287261963,grad_norm: 0.9999996922053742, iteration: 195882
loss: 1.1745972633361816,grad_norm: 0.999999395168947, iteration: 195883
loss: 0.9886834621429443,grad_norm: 0.9999991070295444, iteration: 195884
loss: 1.0022213459014893,grad_norm: 0.9706941729978121, iteration: 195885
loss: 1.0224841833114624,grad_norm: 0.8622945856177225, iteration: 195886
loss: 1.0092648267745972,grad_norm: 0.8387018277571952, iteration: 195887
loss: 1.0373059511184692,grad_norm: 0.9999998222983605, iteration: 195888
loss: 1.0159968137741089,grad_norm: 0.999999127027906, iteration: 195889
loss: 1.0565967559814453,grad_norm: 0.867861404095968, iteration: 195890
loss: 1.0160919427871704,grad_norm: 0.9999991085429134, iteration: 195891
loss: 1.0352811813354492,grad_norm: 0.9999990957353758, iteration: 195892
loss: 1.0460416078567505,grad_norm: 0.9999994918171087, iteration: 195893
loss: 1.0733567476272583,grad_norm: 0.9999995540814562, iteration: 195894
loss: 1.0081055164337158,grad_norm: 0.9999991708454339, iteration: 195895
loss: 1.063917875289917,grad_norm: 0.9999995429726859, iteration: 195896
loss: 1.080836296081543,grad_norm: 0.9999990581024502, iteration: 195897
loss: 1.0380074977874756,grad_norm: 0.9999998238043863, iteration: 195898
loss: 1.103341817855835,grad_norm: 0.9999998040370833, iteration: 195899
loss: 0.9794268608093262,grad_norm: 0.9999990771667994, iteration: 195900
loss: 1.0139175653457642,grad_norm: 0.8598127633143021, iteration: 195901
loss: 1.0313291549682617,grad_norm: 0.9999991539354119, iteration: 195902
loss: 1.1505004167556763,grad_norm: 0.99999948098284, iteration: 195903
loss: 1.047669768333435,grad_norm: 0.9999993671397119, iteration: 195904
loss: 1.0023373365402222,grad_norm: 0.9999991062049818, iteration: 195905
loss: 0.9913491010665894,grad_norm: 0.9999992365737765, iteration: 195906
loss: 0.9905897378921509,grad_norm: 0.9312619987036039, iteration: 195907
loss: 1.043814778327942,grad_norm: 0.999999246950654, iteration: 195908
loss: 1.0227038860321045,grad_norm: 0.9999991026795095, iteration: 195909
loss: 1.0301433801651,grad_norm: 0.9999992423209041, iteration: 195910
loss: 1.0441075563430786,grad_norm: 0.9999990144602344, iteration: 195911
loss: 1.0947524309158325,grad_norm: 0.9999990954754316, iteration: 195912
loss: 1.0128235816955566,grad_norm: 0.9999991169967576, iteration: 195913
loss: 1.0197783708572388,grad_norm: 0.9999993142073588, iteration: 195914
loss: 0.9634947776794434,grad_norm: 0.9724613194642173, iteration: 195915
loss: 1.0552928447723389,grad_norm: 0.9999994083001323, iteration: 195916
loss: 1.0686841011047363,grad_norm: 0.9999992217037917, iteration: 195917
loss: 1.0417277812957764,grad_norm: 0.9999993122349075, iteration: 195918
loss: 1.0296072959899902,grad_norm: 0.9999990813056403, iteration: 195919
loss: 1.0307189226150513,grad_norm: 0.8841234595871552, iteration: 195920
loss: 0.9922617673873901,grad_norm: 0.9999990925001888, iteration: 195921
loss: 1.0088108777999878,grad_norm: 0.8653359156756598, iteration: 195922
loss: 1.0259705781936646,grad_norm: 0.9999991152199682, iteration: 195923
loss: 1.0204726457595825,grad_norm: 0.9999991723315101, iteration: 195924
loss: 0.9797852635383606,grad_norm: 0.9232959024275542, iteration: 195925
loss: 1.1119003295898438,grad_norm: 0.9999994965729566, iteration: 195926
loss: 1.187088131904602,grad_norm: 0.9999992200057223, iteration: 195927
loss: 1.0194848775863647,grad_norm: 0.9999994752460654, iteration: 195928
loss: 1.0861904621124268,grad_norm: 0.9999990414684109, iteration: 195929
loss: 1.1083707809448242,grad_norm: 0.9999998835999938, iteration: 195930
loss: 0.9791886806488037,grad_norm: 0.9999990001021491, iteration: 195931
loss: 1.013500690460205,grad_norm: 0.9988912285082948, iteration: 195932
loss: 1.0616012811660767,grad_norm: 0.999999555756952, iteration: 195933
loss: 0.9971200823783875,grad_norm: 0.9999998428016055, iteration: 195934
loss: 1.069257140159607,grad_norm: 0.9018045429929885, iteration: 195935
loss: 1.0194953680038452,grad_norm: 0.9999992503173009, iteration: 195936
loss: 1.0153502225875854,grad_norm: 0.9999990215604659, iteration: 195937
loss: 1.0149095058441162,grad_norm: 0.8780075984511775, iteration: 195938
loss: 1.031272053718567,grad_norm: 0.9999992478619018, iteration: 195939
loss: 0.9994144439697266,grad_norm: 0.841062159826383, iteration: 195940
loss: 1.1040303707122803,grad_norm: 0.9999992280499186, iteration: 195941
loss: 1.0585546493530273,grad_norm: 0.9999996197276972, iteration: 195942
loss: 0.986579418182373,grad_norm: 0.9827909135450684, iteration: 195943
loss: 0.9778903126716614,grad_norm: 0.812903497247176, iteration: 195944
loss: 1.0764225721359253,grad_norm: 0.9999997820950116, iteration: 195945
loss: 1.1051956415176392,grad_norm: 0.9999995225980144, iteration: 195946
loss: 0.9963332414627075,grad_norm: 0.9880240329212399, iteration: 195947
loss: 1.0323166847229004,grad_norm: 0.8879984776383192, iteration: 195948
loss: 1.0044995546340942,grad_norm: 0.9999989311092974, iteration: 195949
loss: 0.9798890948295593,grad_norm: 0.8157341215389162, iteration: 195950
loss: 1.0188069343566895,grad_norm: 0.999999018010994, iteration: 195951
loss: 1.026113748550415,grad_norm: 0.9999994899443313, iteration: 195952
loss: 0.9961286187171936,grad_norm: 0.9999990836219945, iteration: 195953
loss: 1.1383147239685059,grad_norm: 0.9999991901407491, iteration: 195954
loss: 0.9957777261734009,grad_norm: 0.79589700163122, iteration: 195955
loss: 1.0090832710266113,grad_norm: 0.9236545109418594, iteration: 195956
loss: 0.9607830047607422,grad_norm: 0.9504401067901597, iteration: 195957
loss: 1.0048222541809082,grad_norm: 0.9999992785009552, iteration: 195958
loss: 1.0075124502182007,grad_norm: 0.9999992998844868, iteration: 195959
loss: 0.985884428024292,grad_norm: 0.8974556968999943, iteration: 195960
loss: 1.090494990348816,grad_norm: 0.9447025903929439, iteration: 195961
loss: 1.0392999649047852,grad_norm: 0.9999990868876244, iteration: 195962
loss: 1.0029828548431396,grad_norm: 0.8333560544033379, iteration: 195963
loss: 1.0522090196609497,grad_norm: 0.9999991971151869, iteration: 195964
loss: 1.0221916437149048,grad_norm: 0.9999991174178138, iteration: 195965
loss: 1.0286346673965454,grad_norm: 0.9999997999164565, iteration: 195966
loss: 0.9832841753959656,grad_norm: 0.9999990902529607, iteration: 195967
loss: 1.0184714794158936,grad_norm: 0.9362191613652142, iteration: 195968
loss: 1.0077064037322998,grad_norm: 0.9999991262126562, iteration: 195969
loss: 1.0696271657943726,grad_norm: 0.9073936963917513, iteration: 195970
loss: 1.007840871810913,grad_norm: 0.9707449653774001, iteration: 195971
loss: 1.070325255393982,grad_norm: 0.9999995701758335, iteration: 195972
loss: 0.9945284128189087,grad_norm: 0.8906937419318739, iteration: 195973
loss: 1.0049186944961548,grad_norm: 0.9999991142515708, iteration: 195974
loss: 1.0523271560668945,grad_norm: 0.9999991123757891, iteration: 195975
loss: 1.0157440900802612,grad_norm: 0.977195995230803, iteration: 195976
loss: 0.9971576929092407,grad_norm: 0.9999991741982497, iteration: 195977
loss: 1.0172276496887207,grad_norm: 0.9999991735899946, iteration: 195978
loss: 1.0617773532867432,grad_norm: 0.9999999643893338, iteration: 195979
loss: 1.0607892274856567,grad_norm: 0.999999300902394, iteration: 195980
loss: 1.007297396659851,grad_norm: 0.9999992410302292, iteration: 195981
loss: 1.0454171895980835,grad_norm: 0.9999994748422785, iteration: 195982
loss: 1.0351883172988892,grad_norm: 0.999999230703103, iteration: 195983
loss: 0.9873464703559875,grad_norm: 0.9999996772345605, iteration: 195984
loss: 1.1718028783798218,grad_norm: 0.9999994241312082, iteration: 195985
loss: 1.07108473777771,grad_norm: 0.9999993610198412, iteration: 195986
loss: 1.0309948921203613,grad_norm: 0.9999991755145519, iteration: 195987
loss: 1.016653299331665,grad_norm: 0.9585708820583694, iteration: 195988
loss: 0.9920021891593933,grad_norm: 0.9999990783697915, iteration: 195989
loss: 0.9910311102867126,grad_norm: 0.8743914918115113, iteration: 195990
loss: 0.9864994883537292,grad_norm: 0.9999998947602086, iteration: 195991
loss: 1.0102308988571167,grad_norm: 0.9999991732668295, iteration: 195992
loss: 0.9782477617263794,grad_norm: 0.9999990386692645, iteration: 195993
loss: 0.9996387362480164,grad_norm: 0.9999989646797999, iteration: 195994
loss: 0.9927809834480286,grad_norm: 0.9751092937263177, iteration: 195995
loss: 0.950610339641571,grad_norm: 0.9999990813150867, iteration: 195996
loss: 0.9676337242126465,grad_norm: 0.8604346378109219, iteration: 195997
loss: 1.0197924375534058,grad_norm: 0.9414710191677211, iteration: 195998
loss: 1.1178443431854248,grad_norm: 0.9999996904671256, iteration: 195999
loss: 1.0400903224945068,grad_norm: 0.999999894218972, iteration: 196000
loss: 1.0698471069335938,grad_norm: 0.9999995000815741, iteration: 196001
loss: 1.0537710189819336,grad_norm: 0.9999990690265327, iteration: 196002
loss: 0.9972755312919617,grad_norm: 0.9015196503689815, iteration: 196003
loss: 0.978668749332428,grad_norm: 0.8186145308430723, iteration: 196004
loss: 0.9977631568908691,grad_norm: 0.9999993324769952, iteration: 196005
loss: 1.0355619192123413,grad_norm: 0.9999991808488219, iteration: 196006
loss: 1.083122730255127,grad_norm: 0.999999381749888, iteration: 196007
loss: 0.9902944564819336,grad_norm: 0.9230059501997901, iteration: 196008
loss: 0.9928635954856873,grad_norm: 0.9183731899872082, iteration: 196009
loss: 1.0462383031845093,grad_norm: 0.9999998334210368, iteration: 196010
loss: 0.9925493597984314,grad_norm: 0.8499261288652018, iteration: 196011
loss: 1.0219402313232422,grad_norm: 0.9999991260020977, iteration: 196012
loss: 0.9733368754386902,grad_norm: 0.9999989726663652, iteration: 196013
loss: 1.1980326175689697,grad_norm: 0.9999995473882131, iteration: 196014
loss: 1.0062487125396729,grad_norm: 0.8624716687463628, iteration: 196015
loss: 0.9886320233345032,grad_norm: 0.8896080188081433, iteration: 196016
loss: 0.994379460811615,grad_norm: 0.9950003899445107, iteration: 196017
loss: 1.0172793865203857,grad_norm: 0.9103048270796356, iteration: 196018
loss: 0.983653724193573,grad_norm: 0.9485697894493612, iteration: 196019
loss: 1.0095889568328857,grad_norm: 0.9830044962688648, iteration: 196020
loss: 1.1428600549697876,grad_norm: 0.9999994345875624, iteration: 196021
loss: 1.0115833282470703,grad_norm: 0.956565808950857, iteration: 196022
loss: 0.9897271394729614,grad_norm: 0.9999990421996442, iteration: 196023
loss: 1.0637646913528442,grad_norm: 0.9999992379644406, iteration: 196024
loss: 1.161297082901001,grad_norm: 0.9999998853191246, iteration: 196025
loss: 0.9950112104415894,grad_norm: 0.913986835548658, iteration: 196026
loss: 1.0463968515396118,grad_norm: 0.9999999387856332, iteration: 196027
loss: 0.9768723845481873,grad_norm: 0.9332506692332594, iteration: 196028
loss: 1.015194058418274,grad_norm: 0.9625499786936274, iteration: 196029
loss: 1.2872730493545532,grad_norm: 0.9999991976768953, iteration: 196030
loss: 1.0273077487945557,grad_norm: 0.8662176457031675, iteration: 196031
loss: 1.1801533699035645,grad_norm: 0.9999993135718805, iteration: 196032
loss: 1.1172791719436646,grad_norm: 0.9999992354921291, iteration: 196033
loss: 1.0682464838027954,grad_norm: 0.9999993233328023, iteration: 196034
loss: 0.9471279382705688,grad_norm: 0.999999151395149, iteration: 196035
loss: 1.0122586488723755,grad_norm: 0.9999990809285509, iteration: 196036
loss: 1.0614888668060303,grad_norm: 0.9999999071941419, iteration: 196037
loss: 1.012358546257019,grad_norm: 0.9999990760838464, iteration: 196038
loss: 0.9871512055397034,grad_norm: 0.9788294224192989, iteration: 196039
loss: 0.976344883441925,grad_norm: 0.9768712687141291, iteration: 196040
loss: 1.0397042036056519,grad_norm: 0.9999992063029498, iteration: 196041
loss: 1.0149483680725098,grad_norm: 0.9717895053330837, iteration: 196042
loss: 1.0329999923706055,grad_norm: 0.9999993009347455, iteration: 196043
loss: 1.0049313306808472,grad_norm: 0.8845864123639616, iteration: 196044
loss: 1.0121833086013794,grad_norm: 0.9499642693287612, iteration: 196045
loss: 1.0176349878311157,grad_norm: 0.8643338226638353, iteration: 196046
loss: 1.043252944946289,grad_norm: 0.8893934141637919, iteration: 196047
loss: 1.0815494060516357,grad_norm: 0.9999998362874409, iteration: 196048
loss: 1.0144035816192627,grad_norm: 0.9999991517146783, iteration: 196049
loss: 1.0481128692626953,grad_norm: 0.9999996275766476, iteration: 196050
loss: 1.0506579875946045,grad_norm: 0.8484664701560618, iteration: 196051
loss: 1.0227168798446655,grad_norm: 0.9999990076571474, iteration: 196052
loss: 0.9760634303092957,grad_norm: 0.902234045745787, iteration: 196053
loss: 0.9862065315246582,grad_norm: 0.9999993614555475, iteration: 196054
loss: 1.0315500497817993,grad_norm: 0.9999992543081133, iteration: 196055
loss: 1.0406302213668823,grad_norm: 0.9999990806754057, iteration: 196056
loss: 1.0063611268997192,grad_norm: 0.9999989178212093, iteration: 196057
loss: 1.0162320137023926,grad_norm: 0.9999992239315499, iteration: 196058
loss: 1.0155913829803467,grad_norm: 0.999999275719358, iteration: 196059
loss: 1.1239780187606812,grad_norm: 0.9999997996838497, iteration: 196060
loss: 1.0240051746368408,grad_norm: 0.9178261553234759, iteration: 196061
loss: 0.9881947636604309,grad_norm: 0.9999999276131243, iteration: 196062
loss: 1.0258500576019287,grad_norm: 0.9813522008131963, iteration: 196063
loss: 1.0018165111541748,grad_norm: 0.9993403487333159, iteration: 196064
loss: 1.0839502811431885,grad_norm: 0.999999079135707, iteration: 196065
loss: 0.973392128944397,grad_norm: 0.8252182094418093, iteration: 196066
loss: 1.03153395652771,grad_norm: 0.8701705218237196, iteration: 196067
loss: 1.0105009078979492,grad_norm: 0.9695166395137148, iteration: 196068
loss: 1.0158270597457886,grad_norm: 0.9092187070258687, iteration: 196069
loss: 1.0396777391433716,grad_norm: 0.9999999235376986, iteration: 196070
loss: 1.1472911834716797,grad_norm: 0.9999998934500254, iteration: 196071
loss: 0.9627193808555603,grad_norm: 0.999999156997668, iteration: 196072
loss: 0.9981840252876282,grad_norm: 0.9316148521635838, iteration: 196073
loss: 1.1674129962921143,grad_norm: 0.9999999037066347, iteration: 196074
loss: 1.0506209135055542,grad_norm: 0.9999994151619763, iteration: 196075
loss: 1.0127952098846436,grad_norm: 0.9999998581256653, iteration: 196076
loss: 1.0191110372543335,grad_norm: 0.9999991740124475, iteration: 196077
loss: 0.9998998045921326,grad_norm: 0.8764358712814295, iteration: 196078
loss: 1.0430729389190674,grad_norm: 0.9999997753052334, iteration: 196079
loss: 0.9978169202804565,grad_norm: 0.8120664362877268, iteration: 196080
loss: 1.0062764883041382,grad_norm: 0.9999993314952949, iteration: 196081
loss: 0.9994639754295349,grad_norm: 0.9999991763855912, iteration: 196082
loss: 1.0043954849243164,grad_norm: 0.9999991929807791, iteration: 196083
loss: 0.9928346872329712,grad_norm: 0.9999990703913791, iteration: 196084
loss: 1.1034703254699707,grad_norm: 0.9999998275275611, iteration: 196085
loss: 1.002580165863037,grad_norm: 0.9999995712809047, iteration: 196086
loss: 1.0385451316833496,grad_norm: 0.9999992771689699, iteration: 196087
loss: 1.022471308708191,grad_norm: 0.9138074440533734, iteration: 196088
loss: 0.9924280643463135,grad_norm: 0.9999996840360286, iteration: 196089
loss: 1.0414104461669922,grad_norm: 0.9999999044968815, iteration: 196090
loss: 1.0068318843841553,grad_norm: 0.9999995087072332, iteration: 196091
loss: 0.992590069770813,grad_norm: 0.9605581327834357, iteration: 196092
loss: 0.9909716248512268,grad_norm: 0.9999990386703068, iteration: 196093
loss: 1.0147695541381836,grad_norm: 0.9999989009231419, iteration: 196094
loss: 1.0068880319595337,grad_norm: 0.9999999718098415, iteration: 196095
loss: 1.0161073207855225,grad_norm: 0.9999991461776996, iteration: 196096
loss: 0.9954550266265869,grad_norm: 0.8917069728829735, iteration: 196097
loss: 0.975841224193573,grad_norm: 0.960374680392794, iteration: 196098
loss: 1.0165681838989258,grad_norm: 0.7863386785702412, iteration: 196099
loss: 1.004330039024353,grad_norm: 0.9432987354312183, iteration: 196100
loss: 1.0147985219955444,grad_norm: 0.9330523274441076, iteration: 196101
loss: 0.9826473593711853,grad_norm: 0.9223931025928915, iteration: 196102
loss: 1.005196452140808,grad_norm: 0.9213313010320617, iteration: 196103
loss: 1.12429678440094,grad_norm: 0.9999996319489246, iteration: 196104
loss: 1.0028696060180664,grad_norm: 0.9999995267618232, iteration: 196105
loss: 1.0296021699905396,grad_norm: 0.8547773678139224, iteration: 196106
loss: 1.0087816715240479,grad_norm: 0.8837086663972251, iteration: 196107
loss: 0.9760818481445312,grad_norm: 0.9530276746474148, iteration: 196108
loss: 0.9801715612411499,grad_norm: 0.9040308353610221, iteration: 196109
loss: 1.0545331239700317,grad_norm: 0.9999994236062687, iteration: 196110
loss: 1.129599690437317,grad_norm: 0.9712180957306127, iteration: 196111
loss: 1.0014123916625977,grad_norm: 0.9999991315748052, iteration: 196112
loss: 0.9807524085044861,grad_norm: 0.8564843449085914, iteration: 196113
loss: 0.993547260761261,grad_norm: 0.9676718848216348, iteration: 196114
loss: 0.9843769669532776,grad_norm: 0.8910023560085886, iteration: 196115
loss: 0.9719389081001282,grad_norm: 0.8310356905827545, iteration: 196116
loss: 1.0081183910369873,grad_norm: 0.9241451784787227, iteration: 196117
loss: 1.0248337984085083,grad_norm: 0.8563491339092104, iteration: 196118
loss: 1.0161024332046509,grad_norm: 0.8033284906882906, iteration: 196119
loss: 0.995573878288269,grad_norm: 0.9219619959674742, iteration: 196120
loss: 0.9781938791275024,grad_norm: 0.9418430557969624, iteration: 196121
loss: 1.0130358934402466,grad_norm: 0.9634100262674392, iteration: 196122
loss: 1.0316734313964844,grad_norm: 0.9843057393375354, iteration: 196123
loss: 0.9809314012527466,grad_norm: 0.925807784418827, iteration: 196124
loss: 0.9917530417442322,grad_norm: 0.7483816289107119, iteration: 196125
loss: 1.0132105350494385,grad_norm: 0.9990748879571472, iteration: 196126
loss: 1.004550814628601,grad_norm: 0.9999991235367608, iteration: 196127
loss: 0.9760597348213196,grad_norm: 0.6967692990004902, iteration: 196128
loss: 1.0478328466415405,grad_norm: 0.9999991403806066, iteration: 196129
loss: 1.016976237297058,grad_norm: 0.9622886992013484, iteration: 196130
loss: 1.0284637212753296,grad_norm: 0.985576522338133, iteration: 196131
loss: 0.9976171255111694,grad_norm: 0.9229279243110885, iteration: 196132
loss: 0.9767972230911255,grad_norm: 0.9782422429688041, iteration: 196133
loss: 0.9622145891189575,grad_norm: 0.9865755982799855, iteration: 196134
loss: 1.0347622632980347,grad_norm: 0.999999253985237, iteration: 196135
loss: 0.9708623886108398,grad_norm: 0.8487779871157496, iteration: 196136
loss: 1.0156234502792358,grad_norm: 0.999999124221811, iteration: 196137
loss: 0.9766840934753418,grad_norm: 0.999999082948946, iteration: 196138
loss: 1.055066704750061,grad_norm: 0.8905625100692901, iteration: 196139
loss: 1.0067108869552612,grad_norm: 0.954043583594819, iteration: 196140
loss: 0.9649438261985779,grad_norm: 0.9999990000389215, iteration: 196141
loss: 1.00701105594635,grad_norm: 0.9999991926302068, iteration: 196142
loss: 1.0714623928070068,grad_norm: 0.8911196404218429, iteration: 196143
loss: 1.082738995552063,grad_norm: 0.9999996254977876, iteration: 196144
loss: 0.9956165552139282,grad_norm: 0.9079744747615479, iteration: 196145
loss: 1.0409624576568604,grad_norm: 0.8021121585115469, iteration: 196146
loss: 1.0098614692687988,grad_norm: 0.8669406685089049, iteration: 196147
loss: 0.9968823194503784,grad_norm: 0.9413841435868038, iteration: 196148
loss: 0.9895186424255371,grad_norm: 0.8703497813227559, iteration: 196149
loss: 1.0007723569869995,grad_norm: 0.9128910337964263, iteration: 196150
loss: 0.964852511882782,grad_norm: 0.9999991171096563, iteration: 196151
loss: 1.0111815929412842,grad_norm: 0.8538336025307444, iteration: 196152
loss: 0.9957144856452942,grad_norm: 0.9472904827523545, iteration: 196153
loss: 1.012355923652649,grad_norm: 0.9799691350082448, iteration: 196154
loss: 0.9734436869621277,grad_norm: 0.9506492216774329, iteration: 196155
loss: 0.9654540419578552,grad_norm: 0.9645425957630533, iteration: 196156
loss: 1.005855917930603,grad_norm: 0.9876108766980981, iteration: 196157
loss: 1.0045642852783203,grad_norm: 0.938626028475899, iteration: 196158
loss: 1.0582985877990723,grad_norm: 0.9999989852146853, iteration: 196159
loss: 1.0054981708526611,grad_norm: 0.8799482523869083, iteration: 196160
loss: 1.0464421510696411,grad_norm: 0.9999994996575972, iteration: 196161
loss: 1.021547555923462,grad_norm: 0.9999992008368417, iteration: 196162
loss: 1.010896921157837,grad_norm: 0.9756164247650553, iteration: 196163
loss: 1.0158487558364868,grad_norm: 0.8707838898754946, iteration: 196164
loss: 1.0012595653533936,grad_norm: 0.9185995514114649, iteration: 196165
loss: 1.021191954612732,grad_norm: 0.8489930388910408, iteration: 196166
loss: 0.9678504467010498,grad_norm: 0.9999991533434208, iteration: 196167
loss: 1.0266228914260864,grad_norm: 0.9999994248507821, iteration: 196168
loss: 1.021114706993103,grad_norm: 0.9999991503370905, iteration: 196169
loss: 1.0191506147384644,grad_norm: 0.9486822188694137, iteration: 196170
loss: 1.0067603588104248,grad_norm: 0.9598290486472598, iteration: 196171
loss: 1.0252209901809692,grad_norm: 0.999999333551224, iteration: 196172
loss: 0.9790863990783691,grad_norm: 0.999999012958469, iteration: 196173
loss: 1.0030933618545532,grad_norm: 0.9999992050742912, iteration: 196174
loss: 1.0095757246017456,grad_norm: 0.9999989348441687, iteration: 196175
loss: 0.9939436316490173,grad_norm: 0.9195439599708353, iteration: 196176
loss: 1.032190203666687,grad_norm: 0.9543880093494747, iteration: 196177
loss: 0.9510797262191772,grad_norm: 0.9045226796935276, iteration: 196178
loss: 1.0197067260742188,grad_norm: 0.8902564361139013, iteration: 196179
loss: 0.9994109869003296,grad_norm: 0.8796173641055148, iteration: 196180
loss: 1.0096371173858643,grad_norm: 0.9511249404168682, iteration: 196181
loss: 1.0097942352294922,grad_norm: 0.9388528349735338, iteration: 196182
loss: 0.9799981713294983,grad_norm: 0.9999992177683544, iteration: 196183
loss: 1.0187867879867554,grad_norm: 0.9409845139873912, iteration: 196184
loss: 1.0115211009979248,grad_norm: 0.9320867412975478, iteration: 196185
loss: 0.9909449815750122,grad_norm: 0.9999991612093801, iteration: 196186
loss: 0.985454261302948,grad_norm: 0.8936929467306339, iteration: 196187
loss: 1.0037318468093872,grad_norm: 0.9999991101215251, iteration: 196188
loss: 1.009917140007019,grad_norm: 0.9999993124416778, iteration: 196189
loss: 0.9699484705924988,grad_norm: 0.8836900056869689, iteration: 196190
loss: 1.0132863521575928,grad_norm: 0.9999990768464669, iteration: 196191
loss: 0.9972033500671387,grad_norm: 0.9999990038500549, iteration: 196192
loss: 0.998230516910553,grad_norm: 0.98882444384007, iteration: 196193
loss: 0.9887850284576416,grad_norm: 0.8962981129739308, iteration: 196194
loss: 1.0041680335998535,grad_norm: 0.9919140387877048, iteration: 196195
loss: 1.000722050666809,grad_norm: 0.8065741970992377, iteration: 196196
loss: 0.9894211888313293,grad_norm: 0.9040452023826957, iteration: 196197
loss: 0.9972498416900635,grad_norm: 0.9999989939948679, iteration: 196198
loss: 1.0333706140518188,grad_norm: 0.9999989588758093, iteration: 196199
loss: 0.9747222661972046,grad_norm: 0.9999990955014412, iteration: 196200
loss: 1.008944034576416,grad_norm: 0.9999991103630631, iteration: 196201
loss: 1.005584478378296,grad_norm: 0.7270732505079275, iteration: 196202
loss: 0.9624068140983582,grad_norm: 0.9999990440934465, iteration: 196203
loss: 1.009692907333374,grad_norm: 0.9999998587366985, iteration: 196204
loss: 0.9821183085441589,grad_norm: 0.9999992375863496, iteration: 196205
loss: 0.9833035469055176,grad_norm: 0.8460737783451774, iteration: 196206
loss: 0.9798056483268738,grad_norm: 0.9999992772469687, iteration: 196207
loss: 0.9983343482017517,grad_norm: 0.9999992485813334, iteration: 196208
loss: 1.0275461673736572,grad_norm: 0.988568771754346, iteration: 196209
loss: 1.0215752124786377,grad_norm: 0.9479291565589183, iteration: 196210
loss: 0.9769130945205688,grad_norm: 0.8934693158880365, iteration: 196211
loss: 1.0116430521011353,grad_norm: 0.9999997572917245, iteration: 196212
loss: 0.9911115765571594,grad_norm: 0.7484856534694759, iteration: 196213
loss: 0.9793545007705688,grad_norm: 0.9999991000335988, iteration: 196214
loss: 1.0242315530776978,grad_norm: 0.9831032447756098, iteration: 196215
loss: 1.023704171180725,grad_norm: 0.9999992866520563, iteration: 196216
loss: 0.9659313559532166,grad_norm: 0.9416245161114699, iteration: 196217
loss: 0.9674500823020935,grad_norm: 0.9282257143974562, iteration: 196218
loss: 1.0466153621673584,grad_norm: 0.8466650283416078, iteration: 196219
loss: 0.990351676940918,grad_norm: 0.9519014240424777, iteration: 196220
loss: 1.0537266731262207,grad_norm: 0.9142197957158786, iteration: 196221
loss: 1.0411101579666138,grad_norm: 0.9046786582907285, iteration: 196222
loss: 1.0045795440673828,grad_norm: 0.9999991315250756, iteration: 196223
loss: 0.9807173013687134,grad_norm: 0.9710141654163318, iteration: 196224
loss: 1.0068144798278809,grad_norm: 0.9999991450913552, iteration: 196225
loss: 1.0066930055618286,grad_norm: 0.9187186733522665, iteration: 196226
loss: 0.9919225573539734,grad_norm: 0.9637202993759557, iteration: 196227
loss: 1.0120527744293213,grad_norm: 0.8871633046451508, iteration: 196228
loss: 0.9945761561393738,grad_norm: 0.9687594315331076, iteration: 196229
loss: 1.0100152492523193,grad_norm: 0.9999992342576766, iteration: 196230
loss: 1.0170917510986328,grad_norm: 0.893509818069421, iteration: 196231
loss: 0.9871487021446228,grad_norm: 0.8439934149934354, iteration: 196232
loss: 1.0104854106903076,grad_norm: 0.8637386863676438, iteration: 196233
loss: 1.0119644403457642,grad_norm: 0.9999994715018141, iteration: 196234
loss: 1.09644615650177,grad_norm: 0.9999998031464581, iteration: 196235
loss: 0.9911971092224121,grad_norm: 0.7672773725782098, iteration: 196236
loss: 0.999972939491272,grad_norm: 0.9999991550247239, iteration: 196237
loss: 1.0758095979690552,grad_norm: 0.999999722523136, iteration: 196238
loss: 1.0228261947631836,grad_norm: 0.9534924921584531, iteration: 196239
loss: 0.9745325446128845,grad_norm: 0.9996828864652, iteration: 196240
loss: 0.9962471127510071,grad_norm: 0.8760604073119493, iteration: 196241
loss: 1.0272984504699707,grad_norm: 0.9999994474212084, iteration: 196242
loss: 1.0178008079528809,grad_norm: 0.9999994586956469, iteration: 196243
loss: 1.022493839263916,grad_norm: 0.9999990175680971, iteration: 196244
loss: 1.0016669034957886,grad_norm: 0.9999992973749637, iteration: 196245
loss: 1.0262386798858643,grad_norm: 0.8627028938979786, iteration: 196246
loss: 1.0111857652664185,grad_norm: 0.9999991149920895, iteration: 196247
loss: 0.9886569380760193,grad_norm: 0.9760596301403959, iteration: 196248
loss: 1.0325957536697388,grad_norm: 0.9489133023805607, iteration: 196249
loss: 1.0153014659881592,grad_norm: 0.9999993099118032, iteration: 196250
loss: 1.058652400970459,grad_norm: 0.9999993685090312, iteration: 196251
loss: 0.9826847314834595,grad_norm: 0.8668744125590296, iteration: 196252
loss: 1.0079424381256104,grad_norm: 0.9284017649532276, iteration: 196253
loss: 1.0367006063461304,grad_norm: 0.9017662939483141, iteration: 196254
loss: 1.031302809715271,grad_norm: 0.9999991521127097, iteration: 196255
loss: 1.0112189054489136,grad_norm: 0.9999992018336625, iteration: 196256
loss: 0.977891206741333,grad_norm: 0.8540197715020977, iteration: 196257
loss: 0.9697180390357971,grad_norm: 0.937251052058701, iteration: 196258
loss: 1.0319520235061646,grad_norm: 0.9999990564553135, iteration: 196259
loss: 1.0222210884094238,grad_norm: 0.7952482363767243, iteration: 196260
loss: 0.9874774217605591,grad_norm: 0.9418404132551982, iteration: 196261
loss: 1.0023038387298584,grad_norm: 0.9734780997525941, iteration: 196262
loss: 1.0175808668136597,grad_norm: 0.8784700043289538, iteration: 196263
loss: 0.9880469441413879,grad_norm: 0.939103537605619, iteration: 196264
loss: 0.9975407719612122,grad_norm: 0.7212381826818859, iteration: 196265
loss: 1.020695447921753,grad_norm: 0.9999996710156595, iteration: 196266
loss: 1.0085505247116089,grad_norm: 0.9783084320336561, iteration: 196267
loss: 0.9831091165542603,grad_norm: 0.8895256008081142, iteration: 196268
loss: 0.9648810029029846,grad_norm: 0.8730510183156278, iteration: 196269
loss: 1.0282658338546753,grad_norm: 0.9922576591026677, iteration: 196270
loss: 1.000304102897644,grad_norm: 0.9952520955640229, iteration: 196271
loss: 1.0316803455352783,grad_norm: 0.9999991487707438, iteration: 196272
loss: 1.0235559940338135,grad_norm: 0.906187630225301, iteration: 196273
loss: 0.9690185189247131,grad_norm: 0.9999991191559627, iteration: 196274
loss: 1.0238887071609497,grad_norm: 0.9831373796421983, iteration: 196275
loss: 1.0064584016799927,grad_norm: 0.9996637878257749, iteration: 196276
loss: 1.0685564279556274,grad_norm: 0.99999910711798, iteration: 196277
loss: 1.0404667854309082,grad_norm: 0.9999995165107814, iteration: 196278
loss: 1.0084476470947266,grad_norm: 0.8962143997670816, iteration: 196279
loss: 1.117882251739502,grad_norm: 0.9999991588465996, iteration: 196280
loss: 1.0054608583450317,grad_norm: 0.9999992525358082, iteration: 196281
loss: 1.0625780820846558,grad_norm: 0.9999994461329667, iteration: 196282
loss: 0.9951527714729309,grad_norm: 0.9999990562902393, iteration: 196283
loss: 0.9814498424530029,grad_norm: 0.8458320222997947, iteration: 196284
loss: 1.0113470554351807,grad_norm: 0.8546093847253386, iteration: 196285
loss: 0.9886307716369629,grad_norm: 0.8388892773086455, iteration: 196286
loss: 0.9699122905731201,grad_norm: 0.9689176055438445, iteration: 196287
loss: 1.1054272651672363,grad_norm: 0.9999991619842533, iteration: 196288
loss: 1.0025826692581177,grad_norm: 0.9460389716990935, iteration: 196289
loss: 1.0202760696411133,grad_norm: 0.9999990605407313, iteration: 196290
loss: 1.0033141374588013,grad_norm: 0.9145689730751911, iteration: 196291
loss: 1.0006109476089478,grad_norm: 0.8825988166306974, iteration: 196292
loss: 0.9959897994995117,grad_norm: 0.921578823730038, iteration: 196293
loss: 1.0008069276809692,grad_norm: 0.9610707645201264, iteration: 196294
loss: 1.0041431188583374,grad_norm: 0.9673357886848718, iteration: 196295
loss: 0.948805570602417,grad_norm: 0.8261489869032241, iteration: 196296
loss: 0.9831140637397766,grad_norm: 0.9999991215659586, iteration: 196297
loss: 0.9977346062660217,grad_norm: 0.8080559212465697, iteration: 196298
loss: 1.00294828414917,grad_norm: 0.9999992526443006, iteration: 196299
loss: 0.9789591431617737,grad_norm: 0.9721251327443876, iteration: 196300
loss: 0.9927387237548828,grad_norm: 0.8430463592924315, iteration: 196301
loss: 0.9927814602851868,grad_norm: 0.9999989906356002, iteration: 196302
loss: 0.9618486762046814,grad_norm: 0.8927564845761073, iteration: 196303
loss: 0.9638276696205139,grad_norm: 0.83658034959417, iteration: 196304
loss: 1.0031076669692993,grad_norm: 0.9999993690005351, iteration: 196305
loss: 1.011867642402649,grad_norm: 0.9907210594817342, iteration: 196306
loss: 1.0121840238571167,grad_norm: 0.9597347135392532, iteration: 196307
loss: 1.0061498880386353,grad_norm: 0.9999995497012379, iteration: 196308
loss: 0.9675291776657104,grad_norm: 0.9368019040155918, iteration: 196309
loss: 1.021136999130249,grad_norm: 0.9999991283923934, iteration: 196310
loss: 0.966395378112793,grad_norm: 0.8504588876922785, iteration: 196311
loss: 0.9721284508705139,grad_norm: 0.9999990294969936, iteration: 196312
loss: 1.0377191305160522,grad_norm: 0.9999989953663253, iteration: 196313
loss: 1.0180282592773438,grad_norm: 0.9999990490532255, iteration: 196314
loss: 1.0368009805679321,grad_norm: 0.9036719343382501, iteration: 196315
loss: 1.0404891967773438,grad_norm: 0.9999997041799065, iteration: 196316
loss: 0.9813632369041443,grad_norm: 0.9974863324122372, iteration: 196317
loss: 1.013695478439331,grad_norm: 0.9999991745608574, iteration: 196318
loss: 1.083117961883545,grad_norm: 0.9999999329615511, iteration: 196319
loss: 1.003555178642273,grad_norm: 0.8762492338251006, iteration: 196320
loss: 0.9869776964187622,grad_norm: 0.956917319206019, iteration: 196321
loss: 1.0507373809814453,grad_norm: 0.9999991192409025, iteration: 196322
loss: 1.001069188117981,grad_norm: 0.9999990182196246, iteration: 196323
loss: 1.0027309656143188,grad_norm: 0.9999990082883781, iteration: 196324
loss: 1.1721247434616089,grad_norm: 0.999999925252798, iteration: 196325
loss: 1.0272632837295532,grad_norm: 0.999999172109994, iteration: 196326
loss: 1.032599687576294,grad_norm: 0.7698378253879593, iteration: 196327
loss: 0.9903560280799866,grad_norm: 0.9041346175966304, iteration: 196328
loss: 1.1004600524902344,grad_norm: 0.9999997896978119, iteration: 196329
loss: 1.0220192670822144,grad_norm: 0.9999991592307791, iteration: 196330
loss: 1.0352885723114014,grad_norm: 0.9999990568826395, iteration: 196331
loss: 1.0257036685943604,grad_norm: 0.9999991748240341, iteration: 196332
loss: 1.0372956991195679,grad_norm: 0.966404642038633, iteration: 196333
loss: 1.0522578954696655,grad_norm: 0.9999992228725475, iteration: 196334
loss: 1.0051896572113037,grad_norm: 0.9999990887352556, iteration: 196335
loss: 0.9721478223800659,grad_norm: 0.9019160128349524, iteration: 196336
loss: 0.9585321545600891,grad_norm: 0.9999991525815718, iteration: 196337
loss: 1.0263402462005615,grad_norm: 0.8168158937410719, iteration: 196338
loss: 0.9969782829284668,grad_norm: 0.999998993537897, iteration: 196339
loss: 1.0021165609359741,grad_norm: 0.8480158731093658, iteration: 196340
loss: 0.9752779006958008,grad_norm: 0.9259090565630428, iteration: 196341
loss: 0.9847807288169861,grad_norm: 0.9999990328338287, iteration: 196342
loss: 0.966217577457428,grad_norm: 0.8664642317940229, iteration: 196343
loss: 0.9834692478179932,grad_norm: 0.9824075260449392, iteration: 196344
loss: 1.0102334022521973,grad_norm: 0.9639329576123353, iteration: 196345
loss: 1.0412323474884033,grad_norm: 0.9999988818031333, iteration: 196346
loss: 1.0600212812423706,grad_norm: 0.900648681651333, iteration: 196347
loss: 0.9788234829902649,grad_norm: 0.920248398976703, iteration: 196348
loss: 0.9880552291870117,grad_norm: 0.944668600489483, iteration: 196349
loss: 0.9986998438835144,grad_norm: 0.9089196591035734, iteration: 196350
loss: 1.0250595808029175,grad_norm: 0.8433537950914827, iteration: 196351
loss: 1.023506760597229,grad_norm: 0.8522037557421375, iteration: 196352
loss: 0.9845028519630432,grad_norm: 0.9999992427325892, iteration: 196353
loss: 1.0079208612442017,grad_norm: 0.9999990483769683, iteration: 196354
loss: 0.978045642375946,grad_norm: 0.8663504545458794, iteration: 196355
loss: 0.9866039156913757,grad_norm: 0.895369438121867, iteration: 196356
loss: 1.0461229085922241,grad_norm: 0.7399579663539262, iteration: 196357
loss: 1.0051692724227905,grad_norm: 0.9999991317731122, iteration: 196358
loss: 0.9684697985649109,grad_norm: 0.9999991469385379, iteration: 196359
loss: 0.9941588044166565,grad_norm: 0.9999991613970337, iteration: 196360
loss: 1.0100152492523193,grad_norm: 0.9886134702126224, iteration: 196361
loss: 0.9986392855644226,grad_norm: 0.859751111317381, iteration: 196362
loss: 1.0185598134994507,grad_norm: 0.8882043194455074, iteration: 196363
loss: 1.0305041074752808,grad_norm: 0.9008608448048335, iteration: 196364
loss: 0.9802720546722412,grad_norm: 0.9999990318993613, iteration: 196365
loss: 0.9924697279930115,grad_norm: 0.8515011867184368, iteration: 196366
loss: 0.9981395602226257,grad_norm: 0.9698556237931946, iteration: 196367
loss: 0.9827151298522949,grad_norm: 0.9999992178269835, iteration: 196368
loss: 0.9636284708976746,grad_norm: 0.759709020711683, iteration: 196369
loss: 1.0300401449203491,grad_norm: 0.9999991649314952, iteration: 196370
loss: 0.9953122735023499,grad_norm: 0.9999990766791977, iteration: 196371
loss: 0.9571340680122375,grad_norm: 0.8933087219484914, iteration: 196372
loss: 1.0054962635040283,grad_norm: 0.8323982385976478, iteration: 196373
loss: 1.0039364099502563,grad_norm: 0.9228470389493397, iteration: 196374
loss: 0.9913920760154724,grad_norm: 0.8721737946317585, iteration: 196375
loss: 1.03986656665802,grad_norm: 0.9634236057403064, iteration: 196376
loss: 1.0093837976455688,grad_norm: 0.9999990532315939, iteration: 196377
loss: 1.018232822418213,grad_norm: 0.9061739102752714, iteration: 196378
loss: 1.0273568630218506,grad_norm: 0.9518892158750132, iteration: 196379
loss: 0.990393340587616,grad_norm: 0.9999995262511499, iteration: 196380
loss: 1.0255779027938843,grad_norm: 0.8559181518328103, iteration: 196381
loss: 1.024039626121521,grad_norm: 0.9999990842561297, iteration: 196382
loss: 0.9819283485412598,grad_norm: 0.9823882133929385, iteration: 196383
loss: 0.9620508551597595,grad_norm: 0.9999993480509501, iteration: 196384
loss: 1.0309102535247803,grad_norm: 0.8662750623714871, iteration: 196385
loss: 0.9838213920593262,grad_norm: 0.9999991521282316, iteration: 196386
loss: 1.029075026512146,grad_norm: 0.9999991030907863, iteration: 196387
loss: 0.9993647336959839,grad_norm: 0.8513835980968917, iteration: 196388
loss: 1.004594326019287,grad_norm: 0.9530166674387763, iteration: 196389
loss: 1.0031815767288208,grad_norm: 0.8810242753656743, iteration: 196390
loss: 1.0204929113388062,grad_norm: 0.9999990689938792, iteration: 196391
loss: 0.9877467155456543,grad_norm: 0.9999992011213626, iteration: 196392
loss: 1.0162322521209717,grad_norm: 0.8564161880891169, iteration: 196393
loss: 1.034474492073059,grad_norm: 0.9999998553142441, iteration: 196394
loss: 1.027055025100708,grad_norm: 0.9662327138434386, iteration: 196395
loss: 0.9771633744239807,grad_norm: 0.9547642477410591, iteration: 196396
loss: 0.9951971173286438,grad_norm: 0.9197133909639319, iteration: 196397
loss: 1.0265772342681885,grad_norm: 0.8953354391767439, iteration: 196398
loss: 1.0149508714675903,grad_norm: 0.8843230100161136, iteration: 196399
loss: 0.9888312220573425,grad_norm: 0.9999991170504651, iteration: 196400
loss: 0.9663273692131042,grad_norm: 0.9608665426014945, iteration: 196401
loss: 1.007399559020996,grad_norm: 0.926553516776869, iteration: 196402
loss: 1.0068511962890625,grad_norm: 0.9270092127486051, iteration: 196403
loss: 1.0136040449142456,grad_norm: 0.9508019486176005, iteration: 196404
loss: 0.9629978537559509,grad_norm: 0.910792067562592, iteration: 196405
loss: 1.0044543743133545,grad_norm: 0.9999991179312998, iteration: 196406
loss: 1.0023939609527588,grad_norm: 0.8881938006892547, iteration: 196407
loss: 0.9917293190956116,grad_norm: 0.9999992236471524, iteration: 196408
loss: 0.9858046174049377,grad_norm: 0.8071904488056623, iteration: 196409
loss: 1.025420069694519,grad_norm: 0.991881348751896, iteration: 196410
loss: 0.9833155274391174,grad_norm: 0.999485054188022, iteration: 196411
loss: 1.0094172954559326,grad_norm: 0.8310169507527464, iteration: 196412
loss: 1.0270947217941284,grad_norm: 0.9326476298431107, iteration: 196413
loss: 1.0422548055648804,grad_norm: 0.9999991158941393, iteration: 196414
loss: 1.015368103981018,grad_norm: 0.9752863747479129, iteration: 196415
loss: 1.023921012878418,grad_norm: 0.9999990555919632, iteration: 196416
loss: 0.9972788095474243,grad_norm: 0.8573631545958238, iteration: 196417
loss: 0.9992445707321167,grad_norm: 0.9509902726642496, iteration: 196418
loss: 0.9695565700531006,grad_norm: 0.9345404981682751, iteration: 196419
loss: 0.9897247552871704,grad_norm: 0.9044657417108966, iteration: 196420
loss: 1.0169848203659058,grad_norm: 0.9222765857050771, iteration: 196421
loss: 0.9791879653930664,grad_norm: 0.8357723235869603, iteration: 196422
loss: 0.9876134991645813,grad_norm: 0.9863026333614258, iteration: 196423
loss: 0.9652035236358643,grad_norm: 0.9999992752849726, iteration: 196424
loss: 0.9674743413925171,grad_norm: 0.9741388811064177, iteration: 196425
loss: 0.9956853985786438,grad_norm: 0.9832096404843997, iteration: 196426
loss: 1.016205906867981,grad_norm: 0.999999245458006, iteration: 196427
loss: 0.982111930847168,grad_norm: 0.7794429215587112, iteration: 196428
loss: 0.9918985962867737,grad_norm: 0.7978188495931221, iteration: 196429
loss: 1.0118553638458252,grad_norm: 0.9999991893627865, iteration: 196430
loss: 1.0195419788360596,grad_norm: 0.98054374933669, iteration: 196431
loss: 1.0233793258666992,grad_norm: 0.9999992006991092, iteration: 196432
loss: 0.9559094905853271,grad_norm: 0.8631738470720285, iteration: 196433
loss: 1.0012966394424438,grad_norm: 0.8942120321947432, iteration: 196434
loss: 0.9731818437576294,grad_norm: 0.9784961634911448, iteration: 196435
loss: 1.0087810754776,grad_norm: 0.899571586785702, iteration: 196436
loss: 1.008223056793213,grad_norm: 0.9999990541555621, iteration: 196437
loss: 0.9818098545074463,grad_norm: 0.9400306892750128, iteration: 196438
loss: 1.0154049396514893,grad_norm: 0.9999997632227197, iteration: 196439
loss: 0.9900341629981995,grad_norm: 0.9488106759581876, iteration: 196440
loss: 1.0173120498657227,grad_norm: 0.9233608220706365, iteration: 196441
loss: 0.995598554611206,grad_norm: 0.8777002099958316, iteration: 196442
loss: 1.006483554840088,grad_norm: 0.8727632840532026, iteration: 196443
loss: 1.0274161100387573,grad_norm: 0.8927649542546666, iteration: 196444
loss: 0.9546126127243042,grad_norm: 0.9999990193841649, iteration: 196445
loss: 0.9810022115707397,grad_norm: 0.9999989755201196, iteration: 196446
loss: 0.992378294467926,grad_norm: 0.9999992589839666, iteration: 196447
loss: 0.983726978302002,grad_norm: 0.879627781838978, iteration: 196448
loss: 0.9470716118812561,grad_norm: 0.9999989913922693, iteration: 196449
loss: 1.0206619501113892,grad_norm: 0.81160436380774, iteration: 196450
loss: 0.9988182187080383,grad_norm: 0.8397310505870748, iteration: 196451
loss: 0.9781662821769714,grad_norm: 0.9377746052062, iteration: 196452
loss: 0.9967076182365417,grad_norm: 0.8073178328896798, iteration: 196453
loss: 0.9818949103355408,grad_norm: 0.9855879630831157, iteration: 196454
loss: 1.0100988149642944,grad_norm: 0.8305040172384308, iteration: 196455
loss: 1.0185717344284058,grad_norm: 0.9999990515487316, iteration: 196456
loss: 1.001835584640503,grad_norm: 0.9619028543839685, iteration: 196457
loss: 1.0264638662338257,grad_norm: 0.9195158352563518, iteration: 196458
loss: 1.0161036252975464,grad_norm: 0.9999991103393613, iteration: 196459
loss: 0.9823039174079895,grad_norm: 0.9662025856548275, iteration: 196460
loss: 0.99469393491745,grad_norm: 0.7796508306994974, iteration: 196461
loss: 1.0120054483413696,grad_norm: 0.9999990237413239, iteration: 196462
loss: 1.0175398588180542,grad_norm: 0.9999999730197733, iteration: 196463
loss: 0.9788365364074707,grad_norm: 0.9231415755045372, iteration: 196464
loss: 0.9917479157447815,grad_norm: 0.7198838392109509, iteration: 196465
loss: 0.9989953637123108,grad_norm: 0.9999992257495086, iteration: 196466
loss: 1.0292812585830688,grad_norm: 0.8268103681881395, iteration: 196467
loss: 1.0055681467056274,grad_norm: 0.8348382376508621, iteration: 196468
loss: 0.9882344603538513,grad_norm: 0.9473670591463356, iteration: 196469
loss: 0.9913886189460754,grad_norm: 0.9999991106630715, iteration: 196470
loss: 1.0424669981002808,grad_norm: 0.8671371174449948, iteration: 196471
loss: 0.9689828157424927,grad_norm: 0.9999990681645753, iteration: 196472
loss: 1.0172481536865234,grad_norm: 0.9999989654226548, iteration: 196473
loss: 0.9609300494194031,grad_norm: 0.8206709297904277, iteration: 196474
loss: 1.0020571947097778,grad_norm: 0.9496775199163072, iteration: 196475
loss: 1.0828120708465576,grad_norm: 0.9999993402999414, iteration: 196476
loss: 1.0283414125442505,grad_norm: 0.9999990839532702, iteration: 196477
loss: 1.0077513456344604,grad_norm: 0.9934208722209976, iteration: 196478
loss: 1.0403876304626465,grad_norm: 0.9999990641194305, iteration: 196479
loss: 0.9648706912994385,grad_norm: 0.8448198470476209, iteration: 196480
loss: 0.9510115385055542,grad_norm: 0.8766487903086271, iteration: 196481
loss: 1.0101962089538574,grad_norm: 0.9999991471770144, iteration: 196482
loss: 1.0126724243164062,grad_norm: 0.9887619907158107, iteration: 196483
loss: 1.017870545387268,grad_norm: 0.9123944357154504, iteration: 196484
loss: 1.0063670873641968,grad_norm: 0.9999991333023001, iteration: 196485
loss: 1.008228063583374,grad_norm: 0.8533689958046242, iteration: 196486
loss: 1.000539779663086,grad_norm: 0.999999113458197, iteration: 196487
loss: 1.0123244524002075,grad_norm: 0.8779907906974067, iteration: 196488
loss: 1.0028690099716187,grad_norm: 0.9999991123534958, iteration: 196489
loss: 0.9915457963943481,grad_norm: 0.9999991540967997, iteration: 196490
loss: 0.9678905010223389,grad_norm: 0.9999991314912181, iteration: 196491
loss: 1.0237681865692139,grad_norm: 0.851810329979759, iteration: 196492
loss: 1.0068711042404175,grad_norm: 0.9171076677316584, iteration: 196493
loss: 1.0182849168777466,grad_norm: 0.9999989338322953, iteration: 196494
loss: 1.0218597650527954,grad_norm: 0.9933551640640148, iteration: 196495
loss: 1.0246930122375488,grad_norm: 0.9670118330781888, iteration: 196496
loss: 0.9777244925498962,grad_norm: 0.8536084307289626, iteration: 196497
loss: 1.0175281763076782,grad_norm: 0.9377021429941558, iteration: 196498
loss: 0.9516892433166504,grad_norm: 0.8784500633817819, iteration: 196499
loss: 1.0154969692230225,grad_norm: 0.8293386437572858, iteration: 196500
loss: 0.986666202545166,grad_norm: 0.783899235177519, iteration: 196501
loss: 0.988698422908783,grad_norm: 0.9238056996591424, iteration: 196502
loss: 0.9792795777320862,grad_norm: 0.9746277155735528, iteration: 196503
loss: 0.9515253901481628,grad_norm: 0.9316862215940632, iteration: 196504
loss: 0.9546753168106079,grad_norm: 0.9010457606709712, iteration: 196505
loss: 0.941326916217804,grad_norm: 0.9504133812776435, iteration: 196506
loss: 1.024350643157959,grad_norm: 0.9285675455150917, iteration: 196507
loss: 0.991527795791626,grad_norm: 0.8670497237297551, iteration: 196508
loss: 1.0468343496322632,grad_norm: 0.9999991284533012, iteration: 196509
loss: 1.0106991529464722,grad_norm: 0.9999990948556976, iteration: 196510
loss: 1.023105502128601,grad_norm: 0.9999993683707701, iteration: 196511
loss: 0.9803516864776611,grad_norm: 0.9426018222776723, iteration: 196512
loss: 0.966181755065918,grad_norm: 0.9999990131085256, iteration: 196513
loss: 0.9982764720916748,grad_norm: 0.9999989914186309, iteration: 196514
loss: 1.0142024755477905,grad_norm: 0.893578476068584, iteration: 196515
loss: 0.9879083633422852,grad_norm: 0.9071442600740115, iteration: 196516
loss: 1.0187172889709473,grad_norm: 0.9999996371821972, iteration: 196517
loss: 1.0951790809631348,grad_norm: 0.97935174822923, iteration: 196518
loss: 1.00920832157135,grad_norm: 0.999999483181238, iteration: 196519
loss: 1.0165679454803467,grad_norm: 0.9999990133314247, iteration: 196520
loss: 1.0408387184143066,grad_norm: 0.9999991457219265, iteration: 196521
loss: 1.0500540733337402,grad_norm: 0.9885752378270831, iteration: 196522
loss: 1.004065752029419,grad_norm: 0.8474302318246029, iteration: 196523
loss: 1.005168080329895,grad_norm: 0.8443593194025754, iteration: 196524
loss: 0.986482560634613,grad_norm: 0.9999990944887402, iteration: 196525
loss: 1.0059432983398438,grad_norm: 0.8005173209639855, iteration: 196526
loss: 1.0070061683654785,grad_norm: 0.884640159996189, iteration: 196527
loss: 0.9843356013298035,grad_norm: 0.8744776973187179, iteration: 196528
loss: 0.9977904558181763,grad_norm: 0.9504047594764283, iteration: 196529
loss: 1.0230458974838257,grad_norm: 0.8845379421266308, iteration: 196530
loss: 0.9855713844299316,grad_norm: 0.9999991423900285, iteration: 196531
loss: 0.9520847797393799,grad_norm: 0.9121837380686714, iteration: 196532
loss: 1.0587208271026611,grad_norm: 0.9999994909204437, iteration: 196533
loss: 1.0101895332336426,grad_norm: 0.9584680525424533, iteration: 196534
loss: 0.9892995357513428,grad_norm: 0.9999991684570188, iteration: 196535
loss: 1.0050352811813354,grad_norm: 0.9999990466638005, iteration: 196536
loss: 0.9797070026397705,grad_norm: 0.853823211377384, iteration: 196537
loss: 1.0359457731246948,grad_norm: 0.9999991063248568, iteration: 196538
loss: 0.9650722742080688,grad_norm: 0.9999990424813193, iteration: 196539
loss: 0.9915017485618591,grad_norm: 0.9999991363272973, iteration: 196540
loss: 0.9494402408599854,grad_norm: 0.9504584526784492, iteration: 196541
loss: 0.9681731462478638,grad_norm: 0.8987350768444036, iteration: 196542
loss: 1.004847764968872,grad_norm: 0.9125034099435866, iteration: 196543
loss: 0.9885149598121643,grad_norm: 0.9251942061153486, iteration: 196544
loss: 0.9993127584457397,grad_norm: 0.9365884856566131, iteration: 196545
loss: 0.9899739623069763,grad_norm: 0.999999156180164, iteration: 196546
loss: 1.0346643924713135,grad_norm: 0.9999997415089358, iteration: 196547
loss: 0.9561331868171692,grad_norm: 0.948204563971743, iteration: 196548
loss: 1.0344183444976807,grad_norm: 0.9510791560794051, iteration: 196549
loss: 0.9759763479232788,grad_norm: 0.9999989283342554, iteration: 196550
loss: 0.9971025586128235,grad_norm: 0.9563171356946104, iteration: 196551
loss: 0.9759169220924377,grad_norm: 0.8927580753275411, iteration: 196552
loss: 1.0217617750167847,grad_norm: 0.9999990815593934, iteration: 196553
loss: 0.9810152649879456,grad_norm: 0.9946438091985947, iteration: 196554
loss: 1.0058408975601196,grad_norm: 0.9188152612405255, iteration: 196555
loss: 1.0032349824905396,grad_norm: 0.871059266527573, iteration: 196556
loss: 0.9933737516403198,grad_norm: 0.877030348458833, iteration: 196557
loss: 1.0451728105545044,grad_norm: 0.9493790847302385, iteration: 196558
loss: 1.0268125534057617,grad_norm: 0.8518853688324246, iteration: 196559
loss: 0.9690796136856079,grad_norm: 0.9175764994648639, iteration: 196560
loss: 1.0010937452316284,grad_norm: 0.8812518297445553, iteration: 196561
loss: 1.0086474418640137,grad_norm: 0.8199437868058977, iteration: 196562
loss: 0.9809185266494751,grad_norm: 0.9713949924319032, iteration: 196563
loss: 0.9535741209983826,grad_norm: 0.9286813011304742, iteration: 196564
loss: 0.9624456763267517,grad_norm: 0.9999992160919592, iteration: 196565
loss: 0.9961601495742798,grad_norm: 0.968605679812267, iteration: 196566
loss: 0.9652233123779297,grad_norm: 0.9045289476489792, iteration: 196567
loss: 0.9933717846870422,grad_norm: 0.8938982257129515, iteration: 196568
loss: 0.9832257628440857,grad_norm: 0.9042392977127712, iteration: 196569
loss: 0.9948573708534241,grad_norm: 0.9999990498512665, iteration: 196570
loss: 0.9812102317810059,grad_norm: 0.9999992513886355, iteration: 196571
loss: 0.9705342650413513,grad_norm: 0.9801981673380152, iteration: 196572
loss: 1.0202982425689697,grad_norm: 0.9801738449293821, iteration: 196573
loss: 1.013114333152771,grad_norm: 0.9879481017151304, iteration: 196574
loss: 0.9736102819442749,grad_norm: 0.7931913499680815, iteration: 196575
loss: 1.0137202739715576,grad_norm: 0.8111205016598908, iteration: 196576
loss: 0.985410749912262,grad_norm: 0.9784617526438804, iteration: 196577
loss: 1.0335485935211182,grad_norm: 0.9999991028482106, iteration: 196578
loss: 0.986349880695343,grad_norm: 0.9366345014677852, iteration: 196579
loss: 1.0112732648849487,grad_norm: 0.9999990780548691, iteration: 196580
loss: 1.0204682350158691,grad_norm: 0.9999991479503824, iteration: 196581
loss: 0.9910041093826294,grad_norm: 0.9999991838717446, iteration: 196582
loss: 1.0044974088668823,grad_norm: 0.8899375731311436, iteration: 196583
loss: 0.9767897725105286,grad_norm: 0.8701589890836244, iteration: 196584
loss: 0.9854264259338379,grad_norm: 0.8198549167008132, iteration: 196585
loss: 0.9692409038543701,grad_norm: 0.9963501361176197, iteration: 196586
loss: 1.0156341791152954,grad_norm: 0.9999992482381571, iteration: 196587
loss: 0.9858524203300476,grad_norm: 0.9999996486393034, iteration: 196588
loss: 0.9944394826889038,grad_norm: 0.978228288727909, iteration: 196589
loss: 1.0206273794174194,grad_norm: 0.9999994002993103, iteration: 196590
loss: 1.0242964029312134,grad_norm: 0.8303940987785177, iteration: 196591
loss: 0.988153338432312,grad_norm: 0.9999987764625756, iteration: 196592
loss: 1.0511341094970703,grad_norm: 0.9999990942264236, iteration: 196593
loss: 1.0163630247116089,grad_norm: 0.9026249062657377, iteration: 196594
loss: 0.9712120890617371,grad_norm: 0.9339147239959296, iteration: 196595
loss: 1.019147276878357,grad_norm: 0.994840658646225, iteration: 196596
loss: 1.0135958194732666,grad_norm: 0.9560549389061109, iteration: 196597
loss: 1.0585386753082275,grad_norm: 0.9999997327872944, iteration: 196598
loss: 0.9683281183242798,grad_norm: 0.9999991596839568, iteration: 196599
loss: 0.9851069450378418,grad_norm: 0.9999996189165697, iteration: 196600
loss: 0.9747253060340881,grad_norm: 0.8340704302552766, iteration: 196601
loss: 1.0016542673110962,grad_norm: 0.8277444638943644, iteration: 196602
loss: 1.0320192575454712,grad_norm: 0.8985299898472714, iteration: 196603
loss: 0.9802719950675964,grad_norm: 0.8335686267993547, iteration: 196604
loss: 1.0182149410247803,grad_norm: 0.9999990246997047, iteration: 196605
loss: 0.9907392263412476,grad_norm: 0.9428097210623342, iteration: 196606
loss: 0.9911924600601196,grad_norm: 0.9999992312265715, iteration: 196607
loss: 0.9717216491699219,grad_norm: 0.9999993014947071, iteration: 196608
loss: 0.9730871915817261,grad_norm: 0.9999991033494988, iteration: 196609
loss: 0.9807968139648438,grad_norm: 0.9999991372397773, iteration: 196610
loss: 1.0134590864181519,grad_norm: 0.9171076294669339, iteration: 196611
loss: 1.002547264099121,grad_norm: 0.9999992469776411, iteration: 196612
loss: 1.0084009170532227,grad_norm: 0.9999992584117078, iteration: 196613
loss: 1.0003957748413086,grad_norm: 0.8043325728319125, iteration: 196614
loss: 0.9651113748550415,grad_norm: 0.9999990480963313, iteration: 196615
loss: 1.002685785293579,grad_norm: 0.999999050059, iteration: 196616
loss: 1.0318286418914795,grad_norm: 0.999998986181757, iteration: 196617
loss: 0.9855455160140991,grad_norm: 0.999999108045749, iteration: 196618
loss: 0.9839158058166504,grad_norm: 0.8695983251190649, iteration: 196619
loss: 1.0569899082183838,grad_norm: 0.9999998881690194, iteration: 196620
loss: 0.9991955161094666,grad_norm: 0.9999992804676231, iteration: 196621
loss: 1.0116217136383057,grad_norm: 0.9999990658721168, iteration: 196622
loss: 1.0196322202682495,grad_norm: 0.8528922953722906, iteration: 196623
loss: 0.9759825468063354,grad_norm: 0.9406389158855484, iteration: 196624
loss: 0.966002881526947,grad_norm: 0.7873372219676703, iteration: 196625
loss: 1.0120892524719238,grad_norm: 0.906149149798856, iteration: 196626
loss: 1.0302034616470337,grad_norm: 0.9999991243407286, iteration: 196627
loss: 1.013649582862854,grad_norm: 0.885178543952312, iteration: 196628
loss: 1.0725144147872925,grad_norm: 0.9999994763397577, iteration: 196629
loss: 0.9831253886222839,grad_norm: 0.8842678495174661, iteration: 196630
loss: 1.1090916395187378,grad_norm: 0.999999940626675, iteration: 196631
loss: 1.0081926584243774,grad_norm: 0.897192613540182, iteration: 196632
loss: 1.0025956630706787,grad_norm: 0.9169424027828389, iteration: 196633
loss: 0.9816356897354126,grad_norm: 0.999999155798169, iteration: 196634
loss: 0.9670435190200806,grad_norm: 0.9443178239126075, iteration: 196635
loss: 1.0069767236709595,grad_norm: 0.8908057894598606, iteration: 196636
loss: 0.99643874168396,grad_norm: 0.897949902580987, iteration: 196637
loss: 1.0298100709915161,grad_norm: 0.9999990953197676, iteration: 196638
loss: 0.9723684787750244,grad_norm: 0.8827857204040598, iteration: 196639
loss: 1.0881011486053467,grad_norm: 0.9999996606779449, iteration: 196640
loss: 1.0079313516616821,grad_norm: 0.8664677181208966, iteration: 196641
loss: 0.9909504055976868,grad_norm: 0.9813424208670247, iteration: 196642
loss: 1.0159823894500732,grad_norm: 0.9999991143418175, iteration: 196643
loss: 0.9941871762275696,grad_norm: 0.9477001784728228, iteration: 196644
loss: 1.0275360345840454,grad_norm: 0.9097528990758404, iteration: 196645
loss: 0.9676016569137573,grad_norm: 0.914195087103581, iteration: 196646
loss: 1.0074882507324219,grad_norm: 0.9999991053417466, iteration: 196647
loss: 1.000830888748169,grad_norm: 0.9999991505359261, iteration: 196648
loss: 1.0559567213058472,grad_norm: 0.9999997856656483, iteration: 196649
loss: 1.0028440952301025,grad_norm: 0.999999108538913, iteration: 196650
loss: 1.0174973011016846,grad_norm: 0.8986232540015846, iteration: 196651
loss: 0.9973256587982178,grad_norm: 0.8910214262349642, iteration: 196652
loss: 0.9952097535133362,grad_norm: 0.8526445057452988, iteration: 196653
loss: 0.9657668471336365,grad_norm: 0.999999130276585, iteration: 196654
loss: 1.018000841140747,grad_norm: 0.917013637723708, iteration: 196655
loss: 1.0168919563293457,grad_norm: 0.9815300700136406, iteration: 196656
loss: 0.9725605845451355,grad_norm: 0.905719717344509, iteration: 196657
loss: 1.015254020690918,grad_norm: 0.9999995839623175, iteration: 196658
loss: 1.0880903005599976,grad_norm: 0.9999999187230744, iteration: 196659
loss: 1.014618158340454,grad_norm: 0.9417066619414473, iteration: 196660
loss: 1.0247002840042114,grad_norm: 0.999999058266255, iteration: 196661
loss: 1.0054160356521606,grad_norm: 0.9999990020170256, iteration: 196662
loss: 1.0052133798599243,grad_norm: 0.999998995590262, iteration: 196663
loss: 1.0068001747131348,grad_norm: 0.9534205398767837, iteration: 196664
loss: 1.0061904191970825,grad_norm: 0.9940707021522793, iteration: 196665
loss: 0.9562661647796631,grad_norm: 0.9999991587253173, iteration: 196666
loss: 1.0021942853927612,grad_norm: 0.7596786416523986, iteration: 196667
loss: 0.9928331971168518,grad_norm: 0.9999992038099814, iteration: 196668
loss: 0.9999818205833435,grad_norm: 0.835325210287228, iteration: 196669
loss: 1.0132510662078857,grad_norm: 0.9999991352685088, iteration: 196670
loss: 0.9634097814559937,grad_norm: 0.8162152175604874, iteration: 196671
loss: 1.0441337823867798,grad_norm: 0.8912269198895042, iteration: 196672
loss: 1.008922815322876,grad_norm: 0.6786776797234966, iteration: 196673
loss: 0.9924072027206421,grad_norm: 0.8736062813951453, iteration: 196674
loss: 0.9919836521148682,grad_norm: 0.7084523372188585, iteration: 196675
loss: 0.9934933185577393,grad_norm: 0.8504453270292253, iteration: 196676
loss: 0.9706704616546631,grad_norm: 0.9194599008379524, iteration: 196677
loss: 1.021716833114624,grad_norm: 0.9146965363839334, iteration: 196678
loss: 0.9997045993804932,grad_norm: 0.9145657023085626, iteration: 196679
loss: 0.9781820178031921,grad_norm: 0.999999248966713, iteration: 196680
loss: 1.0273346900939941,grad_norm: 0.8266594490695608, iteration: 196681
loss: 1.047859787940979,grad_norm: 0.9999991845842667, iteration: 196682
loss: 1.0327359437942505,grad_norm: 0.9999990038264427, iteration: 196683
loss: 1.0186234712600708,grad_norm: 0.9128315326581018, iteration: 196684
loss: 0.945464551448822,grad_norm: 0.9999990249553861, iteration: 196685
loss: 1.0159574747085571,grad_norm: 0.9849898612030502, iteration: 196686
loss: 1.0289169549942017,grad_norm: 0.9999990895345161, iteration: 196687
loss: 0.992686927318573,grad_norm: 0.8763242940976017, iteration: 196688
loss: 1.036639928817749,grad_norm: 0.941784195189814, iteration: 196689
loss: 1.0400218963623047,grad_norm: 0.9999991883144258, iteration: 196690
loss: 0.9707376956939697,grad_norm: 0.9969656001649756, iteration: 196691
loss: 0.9811856150627136,grad_norm: 0.9642926323193997, iteration: 196692
loss: 0.9934911727905273,grad_norm: 0.8920000488452985, iteration: 196693
loss: 1.0129715204238892,grad_norm: 0.9999990631398108, iteration: 196694
loss: 0.9974695444107056,grad_norm: 0.9999991419177656, iteration: 196695
loss: 1.0574100017547607,grad_norm: 0.9999999781434208, iteration: 196696
loss: 1.05348801612854,grad_norm: 0.9996581746566717, iteration: 196697
loss: 1.0286839008331299,grad_norm: 0.7090784776405427, iteration: 196698
loss: 1.013831377029419,grad_norm: 0.9333092108793988, iteration: 196699
loss: 1.0274258852005005,grad_norm: 0.989411482480585, iteration: 196700
loss: 1.0064854621887207,grad_norm: 0.9975350216373211, iteration: 196701
loss: 1.0701870918273926,grad_norm: 0.999999175786546, iteration: 196702
loss: 1.0168983936309814,grad_norm: 0.9999993732150184, iteration: 196703
loss: 0.9874469637870789,grad_norm: 0.9999991319122907, iteration: 196704
loss: 1.045671820640564,grad_norm: 0.8586883429750288, iteration: 196705
loss: 1.01211678981781,grad_norm: 0.8225449153188318, iteration: 196706
loss: 0.9871321320533752,grad_norm: 0.9200611539401337, iteration: 196707
loss: 1.0264610052108765,grad_norm: 0.9821733776039347, iteration: 196708
loss: 0.9784990549087524,grad_norm: 0.811178637039416, iteration: 196709
loss: 1.1603481769561768,grad_norm: 0.999999524249194, iteration: 196710
loss: 1.0307506322860718,grad_norm: 0.9342895278286435, iteration: 196711
loss: 1.042331337928772,grad_norm: 0.9014319449561581, iteration: 196712
loss: 0.9695285558700562,grad_norm: 0.8740891306469466, iteration: 196713
loss: 1.0419819355010986,grad_norm: 0.99999954259673, iteration: 196714
loss: 1.0183907747268677,grad_norm: 0.9777409175100513, iteration: 196715
loss: 1.0401015281677246,grad_norm: 0.9999992224082025, iteration: 196716
loss: 0.969736635684967,grad_norm: 0.9999990132990094, iteration: 196717
loss: 1.0039626359939575,grad_norm: 0.9999996021100979, iteration: 196718
loss: 1.0278043746948242,grad_norm: 0.999999894771389, iteration: 196719
loss: 0.9935616254806519,grad_norm: 0.9002420525265921, iteration: 196720
loss: 0.9830518364906311,grad_norm: 0.839342397425873, iteration: 196721
loss: 0.9936724305152893,grad_norm: 0.9799494813828413, iteration: 196722
loss: 0.991043746471405,grad_norm: 0.9999995593983428, iteration: 196723
loss: 0.9923797845840454,grad_norm: 0.9998293481114595, iteration: 196724
loss: 0.9839642643928528,grad_norm: 0.9999990156107684, iteration: 196725
loss: 1.0123800039291382,grad_norm: 0.87317536870559, iteration: 196726
loss: 0.999586820602417,grad_norm: 0.963362952471086, iteration: 196727
loss: 0.9924841523170471,grad_norm: 0.8851324684786082, iteration: 196728
loss: 1.0125226974487305,grad_norm: 0.9542669375281935, iteration: 196729
loss: 1.008712887763977,grad_norm: 0.8864796131415049, iteration: 196730
loss: 0.9997831583023071,grad_norm: 0.7981692621915891, iteration: 196731
loss: 1.0154434442520142,grad_norm: 0.9999989397902607, iteration: 196732
loss: 1.0673987865447998,grad_norm: 0.9999178178419965, iteration: 196733
loss: 1.0582939386367798,grad_norm: 0.9999997496360519, iteration: 196734
loss: 0.9736583232879639,grad_norm: 0.9733604523581761, iteration: 196735
loss: 0.9731113314628601,grad_norm: 0.9223565346461855, iteration: 196736
loss: 1.0122944116592407,grad_norm: 0.8453630676151581, iteration: 196737
loss: 0.9964560866355896,grad_norm: 0.9910591787658943, iteration: 196738
loss: 0.9873725771903992,grad_norm: 0.9676280876443164, iteration: 196739
loss: 0.9972192049026489,grad_norm: 0.9999992948623385, iteration: 196740
loss: 1.0417016744613647,grad_norm: 0.9248621340764794, iteration: 196741
loss: 1.0333119630813599,grad_norm: 0.8934842371865194, iteration: 196742
loss: 0.9995934963226318,grad_norm: 0.8345440969606424, iteration: 196743
loss: 1.0000616312026978,grad_norm: 0.9999994048498746, iteration: 196744
loss: 1.0446165800094604,grad_norm: 0.9999991223975445, iteration: 196745
loss: 0.9764549136161804,grad_norm: 0.8485409183411471, iteration: 196746
loss: 1.004901647567749,grad_norm: 0.9999990567256776, iteration: 196747
loss: 0.9666157960891724,grad_norm: 0.9204652954253063, iteration: 196748
loss: 1.0309628248214722,grad_norm: 0.9999992273494529, iteration: 196749
loss: 1.001133680343628,grad_norm: 0.7733469049768978, iteration: 196750
loss: 0.9814633131027222,grad_norm: 0.9999994967193989, iteration: 196751
loss: 0.9873417019844055,grad_norm: 0.9999992693405263, iteration: 196752
loss: 1.0657991170883179,grad_norm: 0.9999990571076922, iteration: 196753
loss: 0.9869188070297241,grad_norm: 0.8690974447036501, iteration: 196754
loss: 1.0113095045089722,grad_norm: 0.8710096958996755, iteration: 196755
loss: 1.0040647983551025,grad_norm: 0.9999990583717621, iteration: 196756
loss: 1.0188629627227783,grad_norm: 0.9999995920348793, iteration: 196757
loss: 1.1500523090362549,grad_norm: 0.9999998382381101, iteration: 196758
loss: 1.0429894924163818,grad_norm: 0.9999992370840035, iteration: 196759
loss: 0.9492160081863403,grad_norm: 0.9683332909447468, iteration: 196760
loss: 0.9909192323684692,grad_norm: 0.9457152457354393, iteration: 196761
loss: 0.9997580647468567,grad_norm: 0.8396421455466035, iteration: 196762
loss: 0.9804897904396057,grad_norm: 0.9595489213744255, iteration: 196763
loss: 0.9532880187034607,grad_norm: 0.8957562115239985, iteration: 196764
loss: 1.0039621591567993,grad_norm: 0.949321381576453, iteration: 196765
loss: 1.0185084342956543,grad_norm: 0.9999993356250743, iteration: 196766
loss: 1.0087378025054932,grad_norm: 0.7752775360006178, iteration: 196767
loss: 0.982109546661377,grad_norm: 0.7927620754993062, iteration: 196768
loss: 1.0014405250549316,grad_norm: 0.9969486088775392, iteration: 196769
loss: 1.0897547006607056,grad_norm: 0.9999993880874252, iteration: 196770
loss: 0.9914332032203674,grad_norm: 0.9203845008501903, iteration: 196771
loss: 1.0359668731689453,grad_norm: 0.999999503107165, iteration: 196772
loss: 1.0296931266784668,grad_norm: 0.8852943468437543, iteration: 196773
loss: 1.0556896924972534,grad_norm: 0.9999989895446003, iteration: 196774
loss: 0.9928439855575562,grad_norm: 0.8647031846166974, iteration: 196775
loss: 1.0169841051101685,grad_norm: 0.9999990449173918, iteration: 196776
loss: 1.0145142078399658,grad_norm: 0.9999990764848642, iteration: 196777
loss: 0.9873231053352356,grad_norm: 0.9999991214387548, iteration: 196778
loss: 1.00399649143219,grad_norm: 0.9643112018274949, iteration: 196779
loss: 0.974871039390564,grad_norm: 0.94438303071006, iteration: 196780
loss: 1.0022834539413452,grad_norm: 0.9999994697942993, iteration: 196781
loss: 0.9789521098136902,grad_norm: 0.9999990964075266, iteration: 196782
loss: 1.0068930387496948,grad_norm: 0.9389249474271159, iteration: 196783
loss: 1.026881217956543,grad_norm: 0.9867050893204744, iteration: 196784
loss: 0.9934923648834229,grad_norm: 0.8081007056509918, iteration: 196785
loss: 1.0377731323242188,grad_norm: 0.9572460822976714, iteration: 196786
loss: 0.9874094128608704,grad_norm: 0.9617011333066311, iteration: 196787
loss: 0.9701633453369141,grad_norm: 0.8496795547555168, iteration: 196788
loss: 1.0041179656982422,grad_norm: 0.9999990388658161, iteration: 196789
loss: 0.9885239005088806,grad_norm: 0.958766563297888, iteration: 196790
loss: 1.0229549407958984,grad_norm: 0.9999990713818131, iteration: 196791
loss: 0.962192177772522,grad_norm: 0.9999991128167315, iteration: 196792
loss: 1.028640866279602,grad_norm: 0.9999989889625215, iteration: 196793
loss: 0.977077066898346,grad_norm: 0.9638947138401012, iteration: 196794
loss: 1.043003797531128,grad_norm: 0.9999998392359277, iteration: 196795
loss: 1.0126599073410034,grad_norm: 0.9999989260722648, iteration: 196796
loss: 0.9557231664657593,grad_norm: 0.9999989946655247, iteration: 196797
loss: 1.028178095817566,grad_norm: 0.9159214192356866, iteration: 196798
loss: 0.9913340210914612,grad_norm: 0.8771847462061501, iteration: 196799
loss: 1.0180213451385498,grad_norm: 0.9999991422135045, iteration: 196800
loss: 0.9648247957229614,grad_norm: 0.9999991427644452, iteration: 196801
loss: 0.9985737800598145,grad_norm: 0.9161099721195662, iteration: 196802
loss: 0.9932184219360352,grad_norm: 0.9999991515657402, iteration: 196803
loss: 0.9904252290725708,grad_norm: 0.9974371834515089, iteration: 196804
loss: 0.9933569431304932,grad_norm: 0.9999991674511078, iteration: 196805
loss: 0.9892352223396301,grad_norm: 0.9670236606349387, iteration: 196806
loss: 1.0307058095932007,grad_norm: 0.9999990811437919, iteration: 196807
loss: 0.9716274738311768,grad_norm: 0.7504494048819369, iteration: 196808
loss: 0.9991777539253235,grad_norm: 0.9921831843091534, iteration: 196809
loss: 0.9824656248092651,grad_norm: 0.886465910610086, iteration: 196810
loss: 0.977222204208374,grad_norm: 0.8171027291181726, iteration: 196811
loss: 0.9907917976379395,grad_norm: 0.9646770338664873, iteration: 196812
loss: 1.0147541761398315,grad_norm: 0.9208785628540523, iteration: 196813
loss: 1.026281476020813,grad_norm: 0.9999996494019375, iteration: 196814
loss: 1.0416814088821411,grad_norm: 0.9999991695291104, iteration: 196815
loss: 1.0027703046798706,grad_norm: 0.8534824926249006, iteration: 196816
loss: 1.0222975015640259,grad_norm: 0.8771876128605078, iteration: 196817
loss: 1.039406657218933,grad_norm: 0.9999998524101529, iteration: 196818
loss: 0.9945343136787415,grad_norm: 0.9168964860366268, iteration: 196819
loss: 1.008543848991394,grad_norm: 0.9999993085795351, iteration: 196820
loss: 1.0216012001037598,grad_norm: 0.9999991920288935, iteration: 196821
loss: 0.9829822182655334,grad_norm: 0.9783030816832666, iteration: 196822
loss: 1.0179523229599,grad_norm: 0.999999536806782, iteration: 196823
loss: 0.9863778352737427,grad_norm: 0.9607884306201825, iteration: 196824
loss: 0.9966903328895569,grad_norm: 0.9460404651895776, iteration: 196825
loss: 0.9895539283752441,grad_norm: 0.9085253364945217, iteration: 196826
loss: 0.9948632717132568,grad_norm: 0.9999990632175212, iteration: 196827
loss: 0.9660108089447021,grad_norm: 0.9788291675705993, iteration: 196828
loss: 0.9906547665596008,grad_norm: 0.9828348352443416, iteration: 196829
loss: 0.9703236222267151,grad_norm: 0.9999992460348034, iteration: 196830
loss: 1.0152416229248047,grad_norm: 0.9377327854001728, iteration: 196831
loss: 1.0138213634490967,grad_norm: 0.7361240833234871, iteration: 196832
loss: 1.0257149934768677,grad_norm: 0.9999998675544557, iteration: 196833
loss: 0.9877796173095703,grad_norm: 0.8698609417875834, iteration: 196834
loss: 0.988718569278717,grad_norm: 0.9704916002187048, iteration: 196835
loss: 1.0155420303344727,grad_norm: 0.7834401123217896, iteration: 196836
loss: 1.0105037689208984,grad_norm: 0.8007883055918853, iteration: 196837
loss: 1.0257372856140137,grad_norm: 0.9428455687130787, iteration: 196838
loss: 1.0148457288742065,grad_norm: 0.9999990878671552, iteration: 196839
loss: 0.9574940204620361,grad_norm: 0.8703309250718827, iteration: 196840
loss: 0.9991945028305054,grad_norm: 0.8173888014584423, iteration: 196841
loss: 0.9845278263092041,grad_norm: 0.942141393379867, iteration: 196842
loss: 1.0060577392578125,grad_norm: 0.8373227326700691, iteration: 196843
loss: 0.9999744892120361,grad_norm: 0.8479500621542071, iteration: 196844
loss: 1.0475670099258423,grad_norm: 0.999999239147615, iteration: 196845
loss: 1.0949853658676147,grad_norm: 0.9999992642411313, iteration: 196846
loss: 0.9810702204704285,grad_norm: 0.877435476654329, iteration: 196847
loss: 1.007196068763733,grad_norm: 0.99999910030998, iteration: 196848
loss: 1.00596284866333,grad_norm: 0.9981337826518586, iteration: 196849
loss: 1.0108585357666016,grad_norm: 0.6858503621388844, iteration: 196850
loss: 1.0122319459915161,grad_norm: 0.8819792980034578, iteration: 196851
loss: 0.9799835681915283,grad_norm: 0.9481601880062278, iteration: 196852
loss: 1.0309910774230957,grad_norm: 0.9999996844454305, iteration: 196853
loss: 0.9915345907211304,grad_norm: 0.8292615866775083, iteration: 196854
loss: 1.0539171695709229,grad_norm: 0.999999452439005, iteration: 196855
loss: 1.0025297403335571,grad_norm: 0.9999991894243309, iteration: 196856
loss: 0.9856874346733093,grad_norm: 0.999999338190732, iteration: 196857
loss: 1.0405062437057495,grad_norm: 0.9913182427901931, iteration: 196858
loss: 1.0532317161560059,grad_norm: 0.9103886101458143, iteration: 196859
loss: 1.0674928426742554,grad_norm: 0.9999997088500931, iteration: 196860
loss: 0.9899151921272278,grad_norm: 0.9999989841427044, iteration: 196861
loss: 1.0821841955184937,grad_norm: 0.9999997628646999, iteration: 196862
loss: 0.9962713122367859,grad_norm: 0.9999990832652438, iteration: 196863
loss: 1.0029785633087158,grad_norm: 0.9999991467524821, iteration: 196864
loss: 0.993242084980011,grad_norm: 0.9999995934822802, iteration: 196865
loss: 0.9984228014945984,grad_norm: 0.8392311220192347, iteration: 196866
loss: 1.002841830253601,grad_norm: 0.945374666627638, iteration: 196867
loss: 0.9972323179244995,grad_norm: 0.8977933531374751, iteration: 196868
loss: 1.015519618988037,grad_norm: 0.9589755583214378, iteration: 196869
loss: 0.9851008057594299,grad_norm: 0.9817141767329574, iteration: 196870
loss: 1.0373302698135376,grad_norm: 0.8779010567856644, iteration: 196871
loss: 1.0511451959609985,grad_norm: 0.9999999413559083, iteration: 196872
loss: 0.9966041445732117,grad_norm: 0.9999990227248776, iteration: 196873
loss: 0.9924859404563904,grad_norm: 0.999998969030662, iteration: 196874
loss: 0.9971188306808472,grad_norm: 0.9999991236304592, iteration: 196875
loss: 1.0291146039962769,grad_norm: 0.9999998867485852, iteration: 196876
loss: 1.0082950592041016,grad_norm: 0.9999994916255035, iteration: 196877
loss: 0.9825963377952576,grad_norm: 0.9999990739706853, iteration: 196878
loss: 0.9691520929336548,grad_norm: 0.9999990745072257, iteration: 196879
loss: 0.9984716773033142,grad_norm: 0.9999990880195753, iteration: 196880
loss: 0.9888211488723755,grad_norm: 0.905603013295315, iteration: 196881
loss: 0.9701072573661804,grad_norm: 0.9351800179541796, iteration: 196882
loss: 1.0278993844985962,grad_norm: 0.9999990638083398, iteration: 196883
loss: 1.0134438276290894,grad_norm: 0.9340144993086588, iteration: 196884
loss: 1.0137287378311157,grad_norm: 0.8921919419035376, iteration: 196885
loss: 1.0178407430648804,grad_norm: 0.9939959918511295, iteration: 196886
loss: 0.9809316396713257,grad_norm: 0.7711900476897406, iteration: 196887
loss: 1.0274829864501953,grad_norm: 0.9999999020407168, iteration: 196888
loss: 1.0410516262054443,grad_norm: 0.8756631622986056, iteration: 196889
loss: 1.0067845582962036,grad_norm: 0.8799298254491681, iteration: 196890
loss: 1.0002485513687134,grad_norm: 0.9999992151488392, iteration: 196891
loss: 1.000725269317627,grad_norm: 0.9999990593178286, iteration: 196892
loss: 0.9886320233345032,grad_norm: 0.9061504553353154, iteration: 196893
loss: 0.9896605610847473,grad_norm: 0.9999991320679404, iteration: 196894
loss: 0.9888275861740112,grad_norm: 0.9999991587247221, iteration: 196895
loss: 1.0305719375610352,grad_norm: 0.9999990720356194, iteration: 196896
loss: 0.9835692048072815,grad_norm: 0.8301819675158482, iteration: 196897
loss: 1.0012274980545044,grad_norm: 0.9999990480783603, iteration: 196898
loss: 1.008307933807373,grad_norm: 0.8016357902139992, iteration: 196899
loss: 0.9877431988716125,grad_norm: 0.99999912718707, iteration: 196900
loss: 0.9874643683433533,grad_norm: 0.826363832599399, iteration: 196901
loss: 1.0464917421340942,grad_norm: 0.9999991349316268, iteration: 196902
loss: 0.9632925391197205,grad_norm: 0.8979991994672523, iteration: 196903
loss: 0.9785358309745789,grad_norm: 0.9490694941709691, iteration: 196904
loss: 1.045905590057373,grad_norm: 0.999998940467471, iteration: 196905
loss: 1.0396379232406616,grad_norm: 0.967832559575987, iteration: 196906
loss: 0.9904656410217285,grad_norm: 0.9999991454174965, iteration: 196907
loss: 0.971625030040741,grad_norm: 0.9799936813508321, iteration: 196908
loss: 0.9691064357757568,grad_norm: 0.9999991043875412, iteration: 196909
loss: 1.0265628099441528,grad_norm: 0.9944185606757112, iteration: 196910
loss: 0.981796145439148,grad_norm: 0.9730717345943486, iteration: 196911
loss: 0.9990151524543762,grad_norm: 0.9999992943723599, iteration: 196912
loss: 0.9692139625549316,grad_norm: 0.9404393538321262, iteration: 196913
loss: 0.9991862177848816,grad_norm: 0.9936064400304845, iteration: 196914
loss: 1.0167268514633179,grad_norm: 0.8092094644703194, iteration: 196915
loss: 0.999398946762085,grad_norm: 0.9587585744766004, iteration: 196916
loss: 1.0132890939712524,grad_norm: 0.8129780520840464, iteration: 196917
loss: 1.0251986980438232,grad_norm: 0.9294782157398574, iteration: 196918
loss: 0.9971495866775513,grad_norm: 0.9999990257250048, iteration: 196919
loss: 1.0463353395462036,grad_norm: 0.9999990842623634, iteration: 196920
loss: 1.015648365020752,grad_norm: 0.961588806664844, iteration: 196921
loss: 1.0360301733016968,grad_norm: 0.9458240183000235, iteration: 196922
loss: 0.9752764701843262,grad_norm: 0.875823663309678, iteration: 196923
loss: 1.002159595489502,grad_norm: 0.9999990066855979, iteration: 196924
loss: 0.99443519115448,grad_norm: 0.9999992334735395, iteration: 196925
loss: 1.0069561004638672,grad_norm: 0.9999991619080375, iteration: 196926
loss: 1.014607548713684,grad_norm: 0.9999991588058391, iteration: 196927
loss: 0.963549792766571,grad_norm: 0.9901399601815045, iteration: 196928
loss: 1.0067315101623535,grad_norm: 0.93995962768155, iteration: 196929
loss: 1.0041821002960205,grad_norm: 0.9640838531809278, iteration: 196930
loss: 0.9834297895431519,grad_norm: 0.8520680970171242, iteration: 196931
loss: 1.0196514129638672,grad_norm: 0.9579563461079501, iteration: 196932
loss: 0.9935069680213928,grad_norm: 0.8959853626863495, iteration: 196933
loss: 0.985707700252533,grad_norm: 0.9039339346952014, iteration: 196934
loss: 1.0068061351776123,grad_norm: 0.9999990345451442, iteration: 196935
loss: 1.0291918516159058,grad_norm: 0.9999994779374716, iteration: 196936
loss: 0.9955653548240662,grad_norm: 0.9307773848639406, iteration: 196937
loss: 1.048785924911499,grad_norm: 0.9999991607923142, iteration: 196938
loss: 1.0299896001815796,grad_norm: 0.9999991999548041, iteration: 196939
loss: 1.0123710632324219,grad_norm: 0.9295169483938607, iteration: 196940
loss: 0.983353853225708,grad_norm: 0.9999990963668967, iteration: 196941
loss: 0.9877108335494995,grad_norm: 0.8326889657517146, iteration: 196942
loss: 0.9861066937446594,grad_norm: 0.8738316515825074, iteration: 196943
loss: 1.0069639682769775,grad_norm: 0.9553751801294053, iteration: 196944
loss: 1.0216323137283325,grad_norm: 0.9999993252409557, iteration: 196945
loss: 0.9895589351654053,grad_norm: 0.9458280328706348, iteration: 196946
loss: 0.9775862693786621,grad_norm: 0.97315034809537, iteration: 196947
loss: 1.0078002214431763,grad_norm: 0.9999991943673864, iteration: 196948
loss: 1.0043644905090332,grad_norm: 0.8654619380714668, iteration: 196949
loss: 1.0191426277160645,grad_norm: 0.99999915587313, iteration: 196950
loss: 0.9874136447906494,grad_norm: 0.9999990959081583, iteration: 196951
loss: 0.9570073485374451,grad_norm: 0.9610710560988517, iteration: 196952
loss: 0.9932265877723694,grad_norm: 0.999998975927217, iteration: 196953
loss: 1.0078858137130737,grad_norm: 0.9999990191906398, iteration: 196954
loss: 1.0376863479614258,grad_norm: 0.9999994904843605, iteration: 196955
loss: 1.0219027996063232,grad_norm: 0.9999991054921744, iteration: 196956
loss: 1.0253894329071045,grad_norm: 0.9935349058377722, iteration: 196957
loss: 0.9779654145240784,grad_norm: 0.9307752496579306, iteration: 196958
loss: 0.9893773198127747,grad_norm: 0.9412813207188095, iteration: 196959
loss: 1.029755711555481,grad_norm: 0.8102771888560877, iteration: 196960
loss: 1.0213760137557983,grad_norm: 0.8343935192566209, iteration: 196961
loss: 0.9704171419143677,grad_norm: 0.8732778286841417, iteration: 196962
loss: 0.9890797138214111,grad_norm: 0.8993876178432484, iteration: 196963
loss: 1.0173667669296265,grad_norm: 0.9999991876713229, iteration: 196964
loss: 1.1034024953842163,grad_norm: 1.0000000327551546, iteration: 196965
loss: 1.0039236545562744,grad_norm: 0.9999990410793913, iteration: 196966
loss: 0.9933372735977173,grad_norm: 0.9999992911608548, iteration: 196967
loss: 1.0158442258834839,grad_norm: 0.8635431885839545, iteration: 196968
loss: 1.0259854793548584,grad_norm: 0.9999996118166675, iteration: 196969
loss: 1.055793046951294,grad_norm: 0.9999993790663766, iteration: 196970
loss: 1.0331237316131592,grad_norm: 0.7864186503500926, iteration: 196971
loss: 1.0085419416427612,grad_norm: 0.8762807477335983, iteration: 196972
loss: 0.9985284209251404,grad_norm: 0.9999991976551577, iteration: 196973
loss: 1.0399729013442993,grad_norm: 0.9999992764156189, iteration: 196974
loss: 1.005551815032959,grad_norm: 0.8659294891339366, iteration: 196975
loss: 1.0003327131271362,grad_norm: 0.9999994022271523, iteration: 196976
loss: 0.9768878817558289,grad_norm: 0.8698984301911469, iteration: 196977
loss: 1.0170979499816895,grad_norm: 0.7842393182901564, iteration: 196978
loss: 1.0255235433578491,grad_norm: 0.9999990543999459, iteration: 196979
loss: 1.004981279373169,grad_norm: 0.9899099636604133, iteration: 196980
loss: 1.0045970678329468,grad_norm: 0.999999101712739, iteration: 196981
loss: 1.0294551849365234,grad_norm: 0.9999991832127793, iteration: 196982
loss: 1.0206466913223267,grad_norm: 0.8528955459017749, iteration: 196983
loss: 0.9808940291404724,grad_norm: 0.7236261107306432, iteration: 196984
loss: 1.0897072553634644,grad_norm: 0.9999991559732031, iteration: 196985
loss: 1.0061635971069336,grad_norm: 0.9999992638859918, iteration: 196986
loss: 1.0017534494400024,grad_norm: 0.8623794334164814, iteration: 196987
loss: 0.9773049354553223,grad_norm: 0.9221184758899904, iteration: 196988
loss: 0.9913249015808105,grad_norm: 0.9999991053654332, iteration: 196989
loss: 1.0077353715896606,grad_norm: 0.9550345062989315, iteration: 196990
loss: 1.1033161878585815,grad_norm: 0.8912715964267114, iteration: 196991
loss: 1.0394420623779297,grad_norm: 0.9999990715971998, iteration: 196992
loss: 1.0107301473617554,grad_norm: 0.9999991009621696, iteration: 196993
loss: 1.0178169012069702,grad_norm: 0.9385288024922169, iteration: 196994
loss: 0.9981383085250854,grad_norm: 0.9999996812156102, iteration: 196995
loss: 1.026534080505371,grad_norm: 0.9999991771605848, iteration: 196996
loss: 0.9707609415054321,grad_norm: 0.924194183934406, iteration: 196997
loss: 0.998357355594635,grad_norm: 0.8929100510956933, iteration: 196998
loss: 0.977085530757904,grad_norm: 0.8405718672480836, iteration: 196999
loss: 0.9733347296714783,grad_norm: 0.8833046066679061, iteration: 197000
loss: 1.0132651329040527,grad_norm: 0.9999989898898635, iteration: 197001
loss: 0.9707043170928955,grad_norm: 0.9999991438210462, iteration: 197002
loss: 1.0376129150390625,grad_norm: 0.9999994932025629, iteration: 197003
loss: 0.9898154735565186,grad_norm: 0.8949953003317213, iteration: 197004
loss: 1.002945065498352,grad_norm: 0.9999991955765242, iteration: 197005
loss: 0.9929425716400146,grad_norm: 0.9290907907246684, iteration: 197006
loss: 1.0243076086044312,grad_norm: 0.9170775016067891, iteration: 197007
loss: 1.0171542167663574,grad_norm: 0.9999991057113274, iteration: 197008
loss: 1.029375672340393,grad_norm: 0.9999990436729548, iteration: 197009
loss: 1.009905457496643,grad_norm: 0.8530605115539865, iteration: 197010
loss: 0.9705943465232849,grad_norm: 0.9999990647504259, iteration: 197011
loss: 0.9688838124275208,grad_norm: 0.9882739352570807, iteration: 197012
loss: 1.0180257558822632,grad_norm: 0.8122115556791438, iteration: 197013
loss: 0.9925894737243652,grad_norm: 0.9999990994474908, iteration: 197014
loss: 0.997353732585907,grad_norm: 0.9860321843775257, iteration: 197015
loss: 0.9729766249656677,grad_norm: 0.9999990826090261, iteration: 197016
loss: 1.0676896572113037,grad_norm: 0.9999992111406685, iteration: 197017
loss: 1.0112372636795044,grad_norm: 0.8535782335864335, iteration: 197018
loss: 1.0139065980911255,grad_norm: 0.9908082077365695, iteration: 197019
loss: 0.9856047630310059,grad_norm: 0.9999991761785738, iteration: 197020
loss: 0.9919719099998474,grad_norm: 0.8245007249123328, iteration: 197021
loss: 1.0099297761917114,grad_norm: 0.9327940825860281, iteration: 197022
loss: 1.0348196029663086,grad_norm: 0.7861701201254531, iteration: 197023
loss: 1.0028212070465088,grad_norm: 0.9999991035418293, iteration: 197024
loss: 0.9622693657875061,grad_norm: 0.9999991560043653, iteration: 197025
loss: 0.9948621392250061,grad_norm: 0.9889295354651981, iteration: 197026
loss: 0.9804033041000366,grad_norm: 0.9999991604920957, iteration: 197027
loss: 1.0071314573287964,grad_norm: 0.9197317583359078, iteration: 197028
loss: 0.9953635931015015,grad_norm: 0.8995806248864129, iteration: 197029
loss: 0.9928821325302124,grad_norm: 0.9035094542320925, iteration: 197030
loss: 0.972968578338623,grad_norm: 0.9362336010160572, iteration: 197031
loss: 0.955079972743988,grad_norm: 0.9999990054885307, iteration: 197032
loss: 0.9733522534370422,grad_norm: 0.9141429288907709, iteration: 197033
loss: 0.9943884611129761,grad_norm: 0.9410405861738296, iteration: 197034
loss: 1.0952296257019043,grad_norm: 0.9999999281536179, iteration: 197035
loss: 1.001406192779541,grad_norm: 0.9169680866582299, iteration: 197036
loss: 0.9865512251853943,grad_norm: 0.9030061871066953, iteration: 197037
loss: 1.0191255807876587,grad_norm: 0.892528723479366, iteration: 197038
loss: 1.031643271446228,grad_norm: 0.9349121814035954, iteration: 197039
loss: 0.9748806953430176,grad_norm: 0.9869862632182211, iteration: 197040
loss: 0.9816248416900635,grad_norm: 0.9999990478784733, iteration: 197041
loss: 1.0078073740005493,grad_norm: 0.9999996524223023, iteration: 197042
loss: 1.002981424331665,grad_norm: 0.9999990536327812, iteration: 197043
loss: 0.96199631690979,grad_norm: 0.9999990115588056, iteration: 197044
loss: 1.0011399984359741,grad_norm: 0.7595707669852247, iteration: 197045
loss: 1.0294982194900513,grad_norm: 0.9999998143504774, iteration: 197046
loss: 0.9960076212882996,grad_norm: 0.9999991550215529, iteration: 197047
loss: 1.0200284719467163,grad_norm: 0.7993329265893088, iteration: 197048
loss: 0.9875128269195557,grad_norm: 0.9999990084595085, iteration: 197049
loss: 1.0228971242904663,grad_norm: 0.9999990296539574, iteration: 197050
loss: 1.0267735719680786,grad_norm: 0.8872368067013285, iteration: 197051
loss: 1.0225471258163452,grad_norm: 0.9075771316366626, iteration: 197052
loss: 1.0519232749938965,grad_norm: 0.9999994421900406, iteration: 197053
loss: 1.034152626991272,grad_norm: 0.9858398908726362, iteration: 197054
loss: 0.9677963852882385,grad_norm: 0.7799945477065255, iteration: 197055
loss: 0.9959222674369812,grad_norm: 0.8933062931213814, iteration: 197056
loss: 1.007434368133545,grad_norm: 0.9999991192890801, iteration: 197057
loss: 1.004966378211975,grad_norm: 0.9999990745046843, iteration: 197058
loss: 1.0082060098648071,grad_norm: 0.9970728227949084, iteration: 197059
loss: 1.0102120637893677,grad_norm: 0.999999243178776, iteration: 197060
loss: 1.066304326057434,grad_norm: 0.999999592059085, iteration: 197061
loss: 0.9929215312004089,grad_norm: 0.9999993498041789, iteration: 197062
loss: 1.0442582368850708,grad_norm: 0.8366926567908815, iteration: 197063
loss: 0.9956145286560059,grad_norm: 0.8673520409014539, iteration: 197064
loss: 1.0196492671966553,grad_norm: 0.9999991632235896, iteration: 197065
loss: 1.038551688194275,grad_norm: 0.9782633133075415, iteration: 197066
loss: 1.019618272781372,grad_norm: 0.8614566604119647, iteration: 197067
loss: 1.020792841911316,grad_norm: 0.9999989686293074, iteration: 197068
loss: 1.0331693887710571,grad_norm: 0.9999992570226168, iteration: 197069
loss: 0.9870542883872986,grad_norm: 0.9999992007691277, iteration: 197070
loss: 1.0073128938674927,grad_norm: 0.9999991995545681, iteration: 197071
loss: 1.0012694597244263,grad_norm: 0.9999991669161759, iteration: 197072
loss: 0.9822301864624023,grad_norm: 0.9182265840712738, iteration: 197073
loss: 1.0226423740386963,grad_norm: 0.9959343618459593, iteration: 197074
loss: 0.9768937230110168,grad_norm: 0.9221333829200334, iteration: 197075
loss: 1.0335617065429688,grad_norm: 0.9999993245480316, iteration: 197076
loss: 0.9984835982322693,grad_norm: 0.9999990956234991, iteration: 197077
loss: 0.9640729427337646,grad_norm: 0.9010156065481929, iteration: 197078
loss: 1.0250312089920044,grad_norm: 0.9898043873021771, iteration: 197079
loss: 1.0318297147750854,grad_norm: 0.9365975541404665, iteration: 197080
loss: 0.9811961054801941,grad_norm: 0.9999991059470557, iteration: 197081
loss: 1.0396130084991455,grad_norm: 0.9999990899450972, iteration: 197082
loss: 0.9855517745018005,grad_norm: 0.8161410641854533, iteration: 197083
loss: 0.9613834619522095,grad_norm: 0.8479860886423368, iteration: 197084
loss: 0.965149462223053,grad_norm: 0.9999991312252291, iteration: 197085
loss: 1.0067154169082642,grad_norm: 0.8360138375389587, iteration: 197086
loss: 0.9686689376831055,grad_norm: 0.8424353384167899, iteration: 197087
loss: 1.0075864791870117,grad_norm: 0.9999990893096082, iteration: 197088
loss: 0.9839130640029907,grad_norm: 0.9999989541150812, iteration: 197089
loss: 0.987456202507019,grad_norm: 0.9999996236373924, iteration: 197090
loss: 1.0309447050094604,grad_norm: 0.9999991049309395, iteration: 197091
loss: 0.999850869178772,grad_norm: 0.9659950629362285, iteration: 197092
loss: 0.9825592041015625,grad_norm: 0.9794670552891465, iteration: 197093
loss: 0.9938101172447205,grad_norm: 0.8869692084913872, iteration: 197094
loss: 0.9639649391174316,grad_norm: 0.9714450755238035, iteration: 197095
loss: 0.9956583976745605,grad_norm: 0.7017493623829861, iteration: 197096
loss: 1.0077760219573975,grad_norm: 0.9238639382398834, iteration: 197097
loss: 1.0091378688812256,grad_norm: 0.9999991093997199, iteration: 197098
loss: 0.9858791828155518,grad_norm: 0.8864767944973077, iteration: 197099
loss: 1.0099884271621704,grad_norm: 0.9999990838490762, iteration: 197100
loss: 0.989589273929596,grad_norm: 0.9977174320213292, iteration: 197101
loss: 0.9807108640670776,grad_norm: 0.9793404538177978, iteration: 197102
loss: 0.9640053510665894,grad_norm: 0.8978991545042116, iteration: 197103
loss: 0.996380627155304,grad_norm: 0.987359682834305, iteration: 197104
loss: 0.9766145944595337,grad_norm: 0.713955758405761, iteration: 197105
loss: 1.0556542873382568,grad_norm: 0.9999993695240541, iteration: 197106
loss: 0.9882149696350098,grad_norm: 0.8124362647087442, iteration: 197107
loss: 1.004570722579956,grad_norm: 0.8519886653536298, iteration: 197108
loss: 0.9991216659545898,grad_norm: 0.9249665531058228, iteration: 197109
loss: 1.01188063621521,grad_norm: 0.9743027800563735, iteration: 197110
loss: 1.0086946487426758,grad_norm: 0.8896338907501966, iteration: 197111
loss: 1.2513391971588135,grad_norm: 0.9999996773141892, iteration: 197112
loss: 0.9778096675872803,grad_norm: 0.9280591305168977, iteration: 197113
loss: 1.000050663948059,grad_norm: 0.754438690831447, iteration: 197114
loss: 0.994287371635437,grad_norm: 0.999999533453581, iteration: 197115
loss: 1.0065298080444336,grad_norm: 0.9646160674654657, iteration: 197116
loss: 1.0822330713272095,grad_norm: 0.9981517299976004, iteration: 197117
loss: 1.0206915140151978,grad_norm: 0.8232809802062725, iteration: 197118
loss: 1.0353237390518188,grad_norm: 0.9999995266725223, iteration: 197119
loss: 0.9961640238761902,grad_norm: 0.7621820657005973, iteration: 197120
loss: 1.0263736248016357,grad_norm: 0.8929739748300881, iteration: 197121
loss: 0.9926373958587646,grad_norm: 0.9999991164327684, iteration: 197122
loss: 0.9941875338554382,grad_norm: 0.9848108187853214, iteration: 197123
loss: 0.9914677739143372,grad_norm: 0.9214143995730785, iteration: 197124
loss: 0.9899824857711792,grad_norm: 0.9999990791463103, iteration: 197125
loss: 1.0108537673950195,grad_norm: 0.8577280424505457, iteration: 197126
loss: 0.9865232706069946,grad_norm: 0.999999130476605, iteration: 197127
loss: 1.012497901916504,grad_norm: 0.8443641461037157, iteration: 197128
loss: 1.004895806312561,grad_norm: 0.8339684354494962, iteration: 197129
loss: 0.9701645970344543,grad_norm: 0.9117100621380155, iteration: 197130
loss: 1.0155322551727295,grad_norm: 0.83875558247887, iteration: 197131
loss: 0.9982923269271851,grad_norm: 0.9999998431238972, iteration: 197132
loss: 1.0138142108917236,grad_norm: 0.9431000590563007, iteration: 197133
loss: 0.9799047112464905,grad_norm: 0.8682077587883247, iteration: 197134
loss: 1.023153305053711,grad_norm: 0.9999990307082592, iteration: 197135
loss: 1.011378288269043,grad_norm: 0.9999991229032641, iteration: 197136
loss: 1.0142089128494263,grad_norm: 0.8850362271422991, iteration: 197137
loss: 0.9887254238128662,grad_norm: 0.8875831275855011, iteration: 197138
loss: 0.942590057849884,grad_norm: 0.9999990577306545, iteration: 197139
loss: 1.0349057912826538,grad_norm: 0.9938108470616926, iteration: 197140
loss: 0.9862508773803711,grad_norm: 0.9049650008461914, iteration: 197141
loss: 1.0143765211105347,grad_norm: 0.9999990804297834, iteration: 197142
loss: 0.974016547203064,grad_norm: 0.9999989901583957, iteration: 197143
loss: 0.9649254083633423,grad_norm: 0.8727120466795001, iteration: 197144
loss: 1.0380280017852783,grad_norm: 0.8373481079023408, iteration: 197145
loss: 0.9997332096099854,grad_norm: 0.9409462339891808, iteration: 197146
loss: 0.9620484709739685,grad_norm: 0.901174073257162, iteration: 197147
loss: 0.9953174591064453,grad_norm: 0.8154674376940139, iteration: 197148
loss: 1.0157562494277954,grad_norm: 0.9999990026400406, iteration: 197149
loss: 0.9937837719917297,grad_norm: 0.9054397264031167, iteration: 197150
loss: 0.9841337203979492,grad_norm: 0.9336593878825328, iteration: 197151
loss: 0.9565786719322205,grad_norm: 0.9324472788516888, iteration: 197152
loss: 0.9875717163085938,grad_norm: 0.9999990442475726, iteration: 197153
loss: 1.0158692598342896,grad_norm: 0.9999991587783724, iteration: 197154
loss: 1.0121066570281982,grad_norm: 0.9999989971046173, iteration: 197155
loss: 0.9712154269218445,grad_norm: 0.9999988631629014, iteration: 197156
loss: 1.0220026969909668,grad_norm: 0.9999989816960275, iteration: 197157
loss: 1.0166751146316528,grad_norm: 0.9028107785983687, iteration: 197158
loss: 0.9848330616950989,grad_norm: 0.9999990912236506, iteration: 197159
loss: 1.0178577899932861,grad_norm: 0.999999180595315, iteration: 197160
loss: 1.0738364458084106,grad_norm: 0.999999213423863, iteration: 197161
loss: 1.0207135677337646,grad_norm: 0.9999991223241186, iteration: 197162
loss: 1.0258806943893433,grad_norm: 0.8448397085888107, iteration: 197163
loss: 0.9784913659095764,grad_norm: 0.9999990564871949, iteration: 197164
loss: 1.0591504573822021,grad_norm: 0.999999887059845, iteration: 197165
loss: 1.0136123895645142,grad_norm: 0.999999004271723, iteration: 197166
loss: 1.0276168584823608,grad_norm: 0.9999992904112001, iteration: 197167
loss: 1.0111674070358276,grad_norm: 0.9999991441324908, iteration: 197168
loss: 1.0085256099700928,grad_norm: 0.9999991749669392, iteration: 197169
loss: 1.0098848342895508,grad_norm: 0.9999996424276041, iteration: 197170
loss: 0.9872356653213501,grad_norm: 0.9412069747197509, iteration: 197171
loss: 0.9960438013076782,grad_norm: 0.9888100388482829, iteration: 197172
loss: 0.9811747670173645,grad_norm: 0.9773405365146894, iteration: 197173
loss: 1.0122770071029663,grad_norm: 0.8888808235411471, iteration: 197174
loss: 1.0146771669387817,grad_norm: 0.9999991524350031, iteration: 197175
loss: 0.9588664174079895,grad_norm: 0.9981260443205829, iteration: 197176
loss: 1.014898419380188,grad_norm: 0.999999056205606, iteration: 197177
loss: 1.0163017511367798,grad_norm: 0.80014171012926, iteration: 197178
loss: 1.0138167142868042,grad_norm: 0.9150588611891843, iteration: 197179
loss: 0.9620696306228638,grad_norm: 0.9999991586999146, iteration: 197180
loss: 1.0170929431915283,grad_norm: 0.894808746321328, iteration: 197181
loss: 0.9998260736465454,grad_norm: 0.9131365827579866, iteration: 197182
loss: 0.989916205406189,grad_norm: 0.9344217778736484, iteration: 197183
loss: 0.9886302947998047,grad_norm: 0.9309311108726742, iteration: 197184
loss: 1.025750756263733,grad_norm: 0.9999998637951559, iteration: 197185
loss: 0.9986534714698792,grad_norm: 0.9078575385746536, iteration: 197186
loss: 1.0136667490005493,grad_norm: 0.9783956828490525, iteration: 197187
loss: 0.9899776577949524,grad_norm: 0.84657584850674, iteration: 197188
loss: 1.008992314338684,grad_norm: 0.9999990961748397, iteration: 197189
loss: 1.0033948421478271,grad_norm: 0.9999990589319935, iteration: 197190
loss: 1.0054700374603271,grad_norm: 0.9828307130293492, iteration: 197191
loss: 1.0034996271133423,grad_norm: 0.8715217994131311, iteration: 197192
loss: 0.9489468932151794,grad_norm: 0.806186284429823, iteration: 197193
loss: 0.9670025110244751,grad_norm: 0.9999992223670952, iteration: 197194
loss: 1.016689419746399,grad_norm: 0.7772816951408424, iteration: 197195
loss: 1.002510666847229,grad_norm: 0.8644808757802773, iteration: 197196
loss: 0.9915289878845215,grad_norm: 0.9999990002391468, iteration: 197197
loss: 0.9919697642326355,grad_norm: 0.9910531748429932, iteration: 197198
loss: 1.0271854400634766,grad_norm: 0.8229737671480659, iteration: 197199
loss: 1.0148097276687622,grad_norm: 0.9999991482908356, iteration: 197200
loss: 0.9805078506469727,grad_norm: 0.8584161224849859, iteration: 197201
loss: 0.9959720969200134,grad_norm: 0.9999991276655805, iteration: 197202
loss: 1.0057851076126099,grad_norm: 0.9831265113312796, iteration: 197203
loss: 0.9916148781776428,grad_norm: 0.9999991063186754, iteration: 197204
loss: 0.99763423204422,grad_norm: 0.9999992370961444, iteration: 197205
loss: 0.9937973022460938,grad_norm: 0.8485064652047583, iteration: 197206
loss: 0.9826793074607849,grad_norm: 0.8610693882730468, iteration: 197207
loss: 0.980361819267273,grad_norm: 0.9999991613287054, iteration: 197208
loss: 0.990073561668396,grad_norm: 0.9990790165950034, iteration: 197209
loss: 1.0134702920913696,grad_norm: 0.9885818877248191, iteration: 197210
loss: 1.021350622177124,grad_norm: 0.9999991386053709, iteration: 197211
loss: 1.0045448541641235,grad_norm: 0.9733702187552091, iteration: 197212
loss: 0.9602007269859314,grad_norm: 0.8969009156872153, iteration: 197213
loss: 0.9927751421928406,grad_norm: 0.9176312621757738, iteration: 197214
loss: 0.9760620594024658,grad_norm: 0.9000389758773458, iteration: 197215
loss: 1.0140613317489624,grad_norm: 0.8990842839530846, iteration: 197216
loss: 0.9951941967010498,grad_norm: 0.918954016459797, iteration: 197217
loss: 1.0105106830596924,grad_norm: 0.944498555075077, iteration: 197218
loss: 1.025565505027771,grad_norm: 0.9999990823845465, iteration: 197219
loss: 0.9884492754936218,grad_norm: 0.9789703806930679, iteration: 197220
loss: 0.9741265177726746,grad_norm: 0.8689721125419427, iteration: 197221
loss: 1.034043550491333,grad_norm: 0.9702610818680624, iteration: 197222
loss: 1.002742052078247,grad_norm: 0.9295038863386316, iteration: 197223
loss: 1.0097308158874512,grad_norm: 0.9999990298978925, iteration: 197224
loss: 0.9791721105575562,grad_norm: 0.946316864811853, iteration: 197225
loss: 0.9426599144935608,grad_norm: 0.9999991103494971, iteration: 197226
loss: 0.9737030863761902,grad_norm: 0.8190075923967266, iteration: 197227
loss: 0.9987645745277405,grad_norm: 0.8208140076343459, iteration: 197228
loss: 1.001706600189209,grad_norm: 0.9999991421462224, iteration: 197229
loss: 0.9821759462356567,grad_norm: 0.8993417467112708, iteration: 197230
loss: 0.9854781627655029,grad_norm: 0.8967216219541768, iteration: 197231
loss: 0.9994878172874451,grad_norm: 0.9187159066210399, iteration: 197232
loss: 0.9957196116447449,grad_norm: 0.9790557205470094, iteration: 197233
loss: 0.997986376285553,grad_norm: 0.9999991278176796, iteration: 197234
loss: 1.021713137626648,grad_norm: 0.8995070530281084, iteration: 197235
loss: 1.0153380632400513,grad_norm: 0.9804664406617913, iteration: 197236
loss: 0.9752035737037659,grad_norm: 0.9290365975452044, iteration: 197237
loss: 1.0039697885513306,grad_norm: 0.9999989631069759, iteration: 197238
loss: 1.061174988746643,grad_norm: 0.8902449188646335, iteration: 197239
loss: 0.9906008839607239,grad_norm: 0.9596117391734255, iteration: 197240
loss: 1.0417057275772095,grad_norm: 0.9999994638819332, iteration: 197241
loss: 1.0118954181671143,grad_norm: 0.9602388397416343, iteration: 197242
loss: 1.004124641418457,grad_norm: 0.7631037893393573, iteration: 197243
loss: 1.0031871795654297,grad_norm: 0.9108814659794804, iteration: 197244
loss: 1.0200756788253784,grad_norm: 0.8661110219420578, iteration: 197245
loss: 0.9589133858680725,grad_norm: 0.8850067453335395, iteration: 197246
loss: 1.0569511651992798,grad_norm: 0.9999993863366807, iteration: 197247
loss: 0.9523399472236633,grad_norm: 0.8497558871005308, iteration: 197248
loss: 0.9700169563293457,grad_norm: 0.9999988971779966, iteration: 197249
loss: 0.9769895672798157,grad_norm: 0.9138876349033128, iteration: 197250
loss: 1.023811936378479,grad_norm: 0.9524373726120031, iteration: 197251
loss: 0.9702306389808655,grad_norm: 0.999999018085002, iteration: 197252
loss: 0.9760031700134277,grad_norm: 0.9999990647202956, iteration: 197253
loss: 0.9920221567153931,grad_norm: 0.9243923417511394, iteration: 197254
loss: 0.9944654107093811,grad_norm: 0.8597652300929389, iteration: 197255
loss: 1.0035115480422974,grad_norm: 0.9478936373715501, iteration: 197256
loss: 1.0212153196334839,grad_norm: 0.9999990913570797, iteration: 197257
loss: 0.9932754635810852,grad_norm: 0.911297267153877, iteration: 197258
loss: 1.0133713483810425,grad_norm: 0.9038633907305161, iteration: 197259
loss: 0.9947605133056641,grad_norm: 0.9482511476144508, iteration: 197260
loss: 0.978965163230896,grad_norm: 0.9329476369647218, iteration: 197261
loss: 1.007050633430481,grad_norm: 0.9999990979932295, iteration: 197262
loss: 0.9682105779647827,grad_norm: 0.8840286584393183, iteration: 197263
loss: 0.9694182872772217,grad_norm: 0.844307931661505, iteration: 197264
loss: 1.0274296998977661,grad_norm: 0.9745365795736606, iteration: 197265
loss: 1.014580249786377,grad_norm: 0.7908716642744857, iteration: 197266
loss: 1.1184508800506592,grad_norm: 0.9999989835735347, iteration: 197267
loss: 0.9523300528526306,grad_norm: 0.8149486598731784, iteration: 197268
loss: 0.9705594778060913,grad_norm: 0.9999991127016359, iteration: 197269
loss: 0.9908667206764221,grad_norm: 0.8793843873642601, iteration: 197270
loss: 1.0090718269348145,grad_norm: 0.8849705649704053, iteration: 197271
loss: 0.9942479133605957,grad_norm: 0.9504069571982607, iteration: 197272
loss: 1.0136791467666626,grad_norm: 0.9481542732825067, iteration: 197273
loss: 1.0027339458465576,grad_norm: 0.9803939326696097, iteration: 197274
loss: 1.05600905418396,grad_norm: 0.9999995588361579, iteration: 197275
loss: 0.9739115238189697,grad_norm: 0.9221643781137245, iteration: 197276
loss: 0.9972922205924988,grad_norm: 0.9944825694167715, iteration: 197277
loss: 0.9892213940620422,grad_norm: 0.9535234652050003, iteration: 197278
loss: 0.9644186496734619,grad_norm: 0.9999992013385012, iteration: 197279
loss: 1.0252610445022583,grad_norm: 0.9851757100538149, iteration: 197280
loss: 1.0247268676757812,grad_norm: 0.9999990464523513, iteration: 197281
loss: 0.9870684742927551,grad_norm: 0.9475075908083969, iteration: 197282
loss: 1.0170222520828247,grad_norm: 0.9364303725139671, iteration: 197283
loss: 0.9823418259620667,grad_norm: 0.9422361854385006, iteration: 197284
loss: 0.9972735643386841,grad_norm: 0.9302025545671588, iteration: 197285
loss: 1.1147410869598389,grad_norm: 0.99999945963156, iteration: 197286
loss: 1.0169867277145386,grad_norm: 0.9717649631610826, iteration: 197287
loss: 1.0019758939743042,grad_norm: 0.9964718050232134, iteration: 197288
loss: 1.0227017402648926,grad_norm: 0.9999989993770448, iteration: 197289
loss: 0.9979853630065918,grad_norm: 0.9994977579534671, iteration: 197290
loss: 1.0429731607437134,grad_norm: 0.8460550958101564, iteration: 197291
loss: 1.0227941274642944,grad_norm: 0.9999989357761319, iteration: 197292
loss: 0.9932831525802612,grad_norm: 0.9352161820006927, iteration: 197293
loss: 0.9713462591171265,grad_norm: 0.9999990069535029, iteration: 197294
loss: 0.9676647186279297,grad_norm: 0.8894085786832141, iteration: 197295
loss: 0.9915065169334412,grad_norm: 0.9431976276623036, iteration: 197296
loss: 1.0118759870529175,grad_norm: 0.9486151886811063, iteration: 197297
loss: 1.018537163734436,grad_norm: 0.9215884922966582, iteration: 197298
loss: 0.9838621020317078,grad_norm: 0.9629429539025853, iteration: 197299
loss: 0.978951096534729,grad_norm: 0.9999989800055312, iteration: 197300
loss: 1.0094125270843506,grad_norm: 0.8873377426680263, iteration: 197301
loss: 0.9773432612419128,grad_norm: 0.8633390246718535, iteration: 197302
loss: 1.016829013824463,grad_norm: 0.9493644727231471, iteration: 197303
loss: 0.9460664987564087,grad_norm: 0.9999990095634652, iteration: 197304
loss: 0.9925902485847473,grad_norm: 0.9146948857006062, iteration: 197305
loss: 0.9800208210945129,grad_norm: 0.9143144800930925, iteration: 197306
loss: 0.9915169477462769,grad_norm: 0.8874121082198778, iteration: 197307
loss: 0.9848805665969849,grad_norm: 0.9999990826415962, iteration: 197308
loss: 0.9804727435112,grad_norm: 0.8876769778022201, iteration: 197309
loss: 0.9842762351036072,grad_norm: 0.9052296619345434, iteration: 197310
loss: 0.9443872570991516,grad_norm: 0.9999995041926139, iteration: 197311
loss: 1.0060160160064697,grad_norm: 0.876860901804178, iteration: 197312
loss: 0.9804802536964417,grad_norm: 0.9530720066272323, iteration: 197313
loss: 0.9869953393936157,grad_norm: 0.9986750882390087, iteration: 197314
loss: 0.9852750301361084,grad_norm: 0.8170589402612852, iteration: 197315
loss: 1.021325945854187,grad_norm: 0.9446301402830513, iteration: 197316
loss: 1.0572314262390137,grad_norm: 0.999999085720027, iteration: 197317
loss: 0.9948557615280151,grad_norm: 0.8996075207621371, iteration: 197318
loss: 0.9825661182403564,grad_norm: 0.8781113749928139, iteration: 197319
loss: 0.9664005637168884,grad_norm: 0.9179775511902134, iteration: 197320
loss: 1.015905499458313,grad_norm: 0.9244202724017263, iteration: 197321
loss: 1.0217119455337524,grad_norm: 0.8073630417078703, iteration: 197322
loss: 1.00929594039917,grad_norm: 0.9999989935829777, iteration: 197323
loss: 0.9903030395507812,grad_norm: 0.993706690849724, iteration: 197324
loss: 1.0391826629638672,grad_norm: 0.9016601986452618, iteration: 197325
loss: 0.97071373462677,grad_norm: 0.9018978899342045, iteration: 197326
loss: 0.9974766373634338,grad_norm: 0.9999991905015755, iteration: 197327
loss: 0.9856976866722107,grad_norm: 0.9999992026100796, iteration: 197328
loss: 0.9731477499008179,grad_norm: 0.9640676506347848, iteration: 197329
loss: 0.99727863073349,grad_norm: 0.9999990854441558, iteration: 197330
loss: 0.9831691980361938,grad_norm: 0.9999990236371151, iteration: 197331
loss: 1.0161762237548828,grad_norm: 0.8659899917251277, iteration: 197332
loss: 0.9827179908752441,grad_norm: 0.8410914008474647, iteration: 197333
loss: 1.0212881565093994,grad_norm: 0.9118505033460526, iteration: 197334
loss: 0.9827259182929993,grad_norm: 0.7710377476311316, iteration: 197335
loss: 1.0072259902954102,grad_norm: 0.8928911741518882, iteration: 197336
loss: 0.991011917591095,grad_norm: 0.9236941973627745, iteration: 197337
loss: 0.9949923157691956,grad_norm: 0.8946584028621196, iteration: 197338
loss: 0.9840932488441467,grad_norm: 0.97407286079436, iteration: 197339
loss: 0.9796987771987915,grad_norm: 0.9999991218176006, iteration: 197340
loss: 1.0038788318634033,grad_norm: 0.9509161184483386, iteration: 197341
loss: 0.9692495465278625,grad_norm: 0.9421502497306957, iteration: 197342
loss: 1.0082809925079346,grad_norm: 0.8363973481830436, iteration: 197343
loss: 0.93210768699646,grad_norm: 0.8687527506594072, iteration: 197344
loss: 0.998680055141449,grad_norm: 0.9999992179298377, iteration: 197345
loss: 1.0441491603851318,grad_norm: 0.9999991534718486, iteration: 197346
loss: 0.9913299083709717,grad_norm: 0.9999993348814755, iteration: 197347
loss: 1.0234359502792358,grad_norm: 0.9999990242917604, iteration: 197348
loss: 1.0320453643798828,grad_norm: 0.9999989712070438, iteration: 197349
loss: 0.9931367635726929,grad_norm: 0.8322513857871086, iteration: 197350
loss: 1.0521830320358276,grad_norm: 0.8931134666273257, iteration: 197351
loss: 0.9778487086296082,grad_norm: 0.9278521366311632, iteration: 197352
loss: 0.9943974018096924,grad_norm: 0.9783689394069192, iteration: 197353
loss: 1.001785159111023,grad_norm: 0.9927570893581457, iteration: 197354
loss: 1.0080691576004028,grad_norm: 0.831619605981914, iteration: 197355
loss: 0.9813335537910461,grad_norm: 0.9999995576916085, iteration: 197356
loss: 0.9777457118034363,grad_norm: 0.942690654276599, iteration: 197357
loss: 1.00236177444458,grad_norm: 0.9117055623106728, iteration: 197358
loss: 0.9761531949043274,grad_norm: 0.9999991472840691, iteration: 197359
loss: 1.0133728981018066,grad_norm: 0.9786311953038049, iteration: 197360
loss: 0.9945552945137024,grad_norm: 0.8728959476039002, iteration: 197361
loss: 0.9980602860450745,grad_norm: 0.9999990807935178, iteration: 197362
loss: 1.0530422925949097,grad_norm: 0.92331537116365, iteration: 197363
loss: 0.9976374506950378,grad_norm: 0.7376360630563337, iteration: 197364
loss: 0.9995458722114563,grad_norm: 0.8557270507733054, iteration: 197365
loss: 0.959433376789093,grad_norm: 0.8290185864073962, iteration: 197366
loss: 1.0435471534729004,grad_norm: 0.9149266966680109, iteration: 197367
loss: 1.0111294984817505,grad_norm: 0.9999990736466942, iteration: 197368
loss: 0.9624342918395996,grad_norm: 0.9763376175557583, iteration: 197369
loss: 0.9779103994369507,grad_norm: 0.9999991594327802, iteration: 197370
loss: 1.135596752166748,grad_norm: 0.9999992708985519, iteration: 197371
loss: 1.0297726392745972,grad_norm: 0.8157005648523121, iteration: 197372
loss: 1.0248440504074097,grad_norm: 0.9563855617084349, iteration: 197373
loss: 1.0001318454742432,grad_norm: 0.9999991349185643, iteration: 197374
loss: 0.9834033250808716,grad_norm: 0.99999909176193, iteration: 197375
loss: 1.0183947086334229,grad_norm: 0.9999989449043594, iteration: 197376
loss: 1.0145214796066284,grad_norm: 0.9534231713344575, iteration: 197377
loss: 1.0034486055374146,grad_norm: 0.9381198258679926, iteration: 197378
loss: 1.0390331745147705,grad_norm: 0.9999990570218853, iteration: 197379
loss: 1.0357542037963867,grad_norm: 0.9999996188963216, iteration: 197380
loss: 1.122961401939392,grad_norm: 0.9999991499095021, iteration: 197381
loss: 1.0826482772827148,grad_norm: 0.9999997012584795, iteration: 197382
loss: 0.9841307401657104,grad_norm: 0.999999273170681, iteration: 197383
loss: 0.9915552735328674,grad_norm: 0.8871377264392324, iteration: 197384
loss: 1.054911494255066,grad_norm: 0.9999990712138485, iteration: 197385
loss: 1.0477535724639893,grad_norm: 0.9999992978914415, iteration: 197386
loss: 0.9630863666534424,grad_norm: 0.9999991408995728, iteration: 197387
loss: 1.0113978385925293,grad_norm: 0.9999990751408384, iteration: 197388
loss: 1.0053751468658447,grad_norm: 0.8637581601706915, iteration: 197389
loss: 0.9612988829612732,grad_norm: 0.7671867657165607, iteration: 197390
loss: 0.9809325933456421,grad_norm: 0.9424033874411344, iteration: 197391
loss: 1.0001227855682373,grad_norm: 0.9684850848244347, iteration: 197392
loss: 1.0034741163253784,grad_norm: 0.8658080762213736, iteration: 197393
loss: 1.015427827835083,grad_norm: 0.8900733676953317, iteration: 197394
loss: 0.985107421875,grad_norm: 0.9504242644597616, iteration: 197395
loss: 1.0187830924987793,grad_norm: 0.999999063880112, iteration: 197396
loss: 0.9973456859588623,grad_norm: 0.7864471628681164, iteration: 197397
loss: 1.032717227935791,grad_norm: 0.8279158266385404, iteration: 197398
loss: 0.9796958565711975,grad_norm: 0.9393400645828237, iteration: 197399
loss: 0.9931554794311523,grad_norm: 0.778639488222302, iteration: 197400
loss: 0.983834981918335,grad_norm: 0.9388295908318148, iteration: 197401
loss: 0.966343343257904,grad_norm: 0.8705600458361413, iteration: 197402
loss: 1.007401466369629,grad_norm: 0.9378199475349395, iteration: 197403
loss: 1.0347832441329956,grad_norm: 0.9999994787722385, iteration: 197404
loss: 0.9488780498504639,grad_norm: 0.9793701855204106, iteration: 197405
loss: 0.95627361536026,grad_norm: 0.8795225275986756, iteration: 197406
loss: 0.9547936320304871,grad_norm: 0.8881698634304669, iteration: 197407
loss: 0.9849528670310974,grad_norm: 0.9207604749672839, iteration: 197408
loss: 1.136714220046997,grad_norm: 0.9999997243966575, iteration: 197409
loss: 0.9694499373435974,grad_norm: 0.9638182245393334, iteration: 197410
loss: 0.9882452487945557,grad_norm: 0.9999991694741476, iteration: 197411
loss: 0.98809814453125,grad_norm: 0.9708906025786559, iteration: 197412
loss: 0.9726470112800598,grad_norm: 0.8378352936966419, iteration: 197413
loss: 0.9836596250534058,grad_norm: 0.9999989638940568, iteration: 197414
loss: 1.0022356510162354,grad_norm: 0.7892931862355803, iteration: 197415
loss: 0.9942335486412048,grad_norm: 0.8991365536432623, iteration: 197416
loss: 1.0235960483551025,grad_norm: 0.7673294548129342, iteration: 197417
loss: 1.0770388841629028,grad_norm: 0.9508176131805806, iteration: 197418
loss: 0.9803828001022339,grad_norm: 0.9999994712707143, iteration: 197419
loss: 1.0002737045288086,grad_norm: 0.9331040358689516, iteration: 197420
loss: 1.0840675830841064,grad_norm: 0.9999995041650123, iteration: 197421
loss: 0.9778497219085693,grad_norm: 0.8944085179681333, iteration: 197422
loss: 1.0023881196975708,grad_norm: 0.8570872873037532, iteration: 197423
loss: 1.0116198062896729,grad_norm: 0.9805877278343659, iteration: 197424
loss: 0.9901783466339111,grad_norm: 0.9999991155081044, iteration: 197425
loss: 0.9921650886535645,grad_norm: 0.9670572383919878, iteration: 197426
loss: 1.0490092039108276,grad_norm: 0.9999993289038808, iteration: 197427
loss: 0.9993407726287842,grad_norm: 0.8913672210267692, iteration: 197428
loss: 0.9697531461715698,grad_norm: 0.9452222507647069, iteration: 197429
loss: 0.9685477614402771,grad_norm: 0.9891156050470292, iteration: 197430
loss: 1.0200475454330444,grad_norm: 0.9999992027244287, iteration: 197431
loss: 0.9741314053535461,grad_norm: 0.8420070318033687, iteration: 197432
loss: 1.0003889799118042,grad_norm: 0.9661439915677109, iteration: 197433
loss: 0.9716010093688965,grad_norm: 0.7895837047012852, iteration: 197434
loss: 0.9956403970718384,grad_norm: 0.9999991964897492, iteration: 197435
loss: 1.077041506767273,grad_norm: 0.9999994257055651, iteration: 197436
loss: 1.010650396347046,grad_norm: 0.9999990932047892, iteration: 197437
loss: 1.0009595155715942,grad_norm: 0.8750728994386067, iteration: 197438
loss: 1.0751866102218628,grad_norm: 0.9925538117403354, iteration: 197439
loss: 0.9851387143135071,grad_norm: 0.9999990855485593, iteration: 197440
loss: 0.983668327331543,grad_norm: 0.807783467482376, iteration: 197441
loss: 1.0678825378417969,grad_norm: 0.9999992457703913, iteration: 197442
loss: 1.0233263969421387,grad_norm: 0.8881461093062984, iteration: 197443
loss: 0.9593380093574524,grad_norm: 0.9999992039448515, iteration: 197444
loss: 1.0347633361816406,grad_norm: 0.9999992807758044, iteration: 197445
loss: 0.9939019083976746,grad_norm: 0.9895764934089433, iteration: 197446
loss: 1.0262216329574585,grad_norm: 0.9999998205549477, iteration: 197447
loss: 0.9647428393363953,grad_norm: 0.8286547936802465, iteration: 197448
loss: 1.0080844163894653,grad_norm: 0.9999992251024922, iteration: 197449
loss: 1.0304893255233765,grad_norm: 0.7927714915509754, iteration: 197450
loss: 0.9853619337081909,grad_norm: 0.9351464236736793, iteration: 197451
loss: 0.9771891832351685,grad_norm: 0.9990872985372106, iteration: 197452
loss: 0.9686530828475952,grad_norm: 0.8945422402941706, iteration: 197453
loss: 1.0247260332107544,grad_norm: 0.9023187659679976, iteration: 197454
loss: 0.96992427110672,grad_norm: 0.8680102498391594, iteration: 197455
loss: 0.9857956171035767,grad_norm: 0.9068518763099355, iteration: 197456
loss: 1.0428516864776611,grad_norm: 0.99999901423926, iteration: 197457
loss: 0.9917880296707153,grad_norm: 0.8836070816830982, iteration: 197458
loss: 0.990875244140625,grad_norm: 0.8769483040390886, iteration: 197459
loss: 0.9774360656738281,grad_norm: 0.9999991234855755, iteration: 197460
loss: 1.0909373760223389,grad_norm: 0.9999997374178552, iteration: 197461
loss: 1.0011097192764282,grad_norm: 0.9454477289112068, iteration: 197462
loss: 1.008020043373108,grad_norm: 0.9410078676073167, iteration: 197463
loss: 1.0134960412979126,grad_norm: 0.9999989681082093, iteration: 197464
loss: 0.9866539835929871,grad_norm: 0.8229232014527303, iteration: 197465
loss: 1.01101815700531,grad_norm: 0.9999997778714794, iteration: 197466
loss: 0.9977980256080627,grad_norm: 0.9999992075741255, iteration: 197467
loss: 0.9855039715766907,grad_norm: 0.9999989827842636, iteration: 197468
loss: 0.9978475570678711,grad_norm: 0.8220960326287495, iteration: 197469
loss: 0.9598492980003357,grad_norm: 0.8205023317309255, iteration: 197470
loss: 0.9971139430999756,grad_norm: 0.8482865617190984, iteration: 197471
loss: 1.0170190334320068,grad_norm: 0.8761130380558281, iteration: 197472
loss: 0.9707208275794983,grad_norm: 0.9999991314183039, iteration: 197473
loss: 1.1634209156036377,grad_norm: 0.9999993878844994, iteration: 197474
loss: 0.9942235946655273,grad_norm: 0.8724848251257233, iteration: 197475
loss: 0.9874093532562256,grad_norm: 0.9026522793283632, iteration: 197476
loss: 0.9858884215354919,grad_norm: 0.9999991754244218, iteration: 197477
loss: 1.004368543624878,grad_norm: 0.9999990708671117, iteration: 197478
loss: 0.9799020290374756,grad_norm: 0.9999991076367895, iteration: 197479
loss: 1.0111651420593262,grad_norm: 0.9780564428230115, iteration: 197480
loss: 1.003548502922058,grad_norm: 0.9999990028032203, iteration: 197481
loss: 1.0100414752960205,grad_norm: 0.9999991454674212, iteration: 197482
loss: 0.9464457631111145,grad_norm: 0.9321221074490398, iteration: 197483
loss: 1.003191590309143,grad_norm: 0.870434975376024, iteration: 197484
loss: 0.9974095821380615,grad_norm: 0.8902160621801077, iteration: 197485
loss: 0.9946414232254028,grad_norm: 0.9776227748259794, iteration: 197486
loss: 0.9693784713745117,grad_norm: 0.9999991234015887, iteration: 197487
loss: 0.9865740537643433,grad_norm: 0.9999989359614171, iteration: 197488
loss: 1.0133427381515503,grad_norm: 0.9332859822534195, iteration: 197489
loss: 0.9977052211761475,grad_norm: 0.9999992853840098, iteration: 197490
loss: 1.0048168897628784,grad_norm: 0.9999990645534758, iteration: 197491
loss: 0.9906068444252014,grad_norm: 0.9999992867188072, iteration: 197492
loss: 1.075671672821045,grad_norm: 0.9999991593877857, iteration: 197493
loss: 0.9990376234054565,grad_norm: 0.999999026068678, iteration: 197494
loss: 1.0110195875167847,grad_norm: 0.9999988861204347, iteration: 197495
loss: 0.9986437559127808,grad_norm: 0.9512030790454521, iteration: 197496
loss: 0.9895079731941223,grad_norm: 0.8896138804063016, iteration: 197497
loss: 1.0466214418411255,grad_norm: 0.9999998157077167, iteration: 197498
loss: 1.0169481039047241,grad_norm: 0.9999991347767108, iteration: 197499
loss: 1.0311956405639648,grad_norm: 0.8748848627824778, iteration: 197500
loss: 1.0351427793502808,grad_norm: 0.911874187066997, iteration: 197501
loss: 1.0025492906570435,grad_norm: 0.9507454536101577, iteration: 197502
loss: 0.9983510971069336,grad_norm: 0.9437786475634622, iteration: 197503
loss: 0.9817640781402588,grad_norm: 0.9999991620694944, iteration: 197504
loss: 1.0260491371154785,grad_norm: 0.9999989990593005, iteration: 197505
loss: 0.9932429194450378,grad_norm: 0.9334941231277337, iteration: 197506
loss: 0.9887751340866089,grad_norm: 0.9166364648079307, iteration: 197507
loss: 0.9927051067352295,grad_norm: 0.963274319229497, iteration: 197508
loss: 1.011696219444275,grad_norm: 0.999999515390098, iteration: 197509
loss: 1.1534082889556885,grad_norm: 0.9999996498344588, iteration: 197510
loss: 0.999932050704956,grad_norm: 0.9999991451669303, iteration: 197511
loss: 1.0301239490509033,grad_norm: 0.8818261172821064, iteration: 197512
loss: 1.0471564531326294,grad_norm: 0.9999990628139541, iteration: 197513
loss: 1.0118612051010132,grad_norm: 0.9792609773456655, iteration: 197514
loss: 1.0426534414291382,grad_norm: 0.9999992555219411, iteration: 197515
loss: 0.9768311977386475,grad_norm: 0.9233436660329516, iteration: 197516
loss: 1.0222132205963135,grad_norm: 0.9665371664028121, iteration: 197517
loss: 1.0354316234588623,grad_norm: 0.9999990331146993, iteration: 197518
loss: 0.9987418055534363,grad_norm: 0.8516365872517998, iteration: 197519
loss: 0.9575679898262024,grad_norm: 0.9087834165126923, iteration: 197520
loss: 1.015366792678833,grad_norm: 0.8442040874690776, iteration: 197521
loss: 1.0330016613006592,grad_norm: 0.9999990694342924, iteration: 197522
loss: 0.9995230436325073,grad_norm: 0.9999990547546174, iteration: 197523
loss: 0.9963697791099548,grad_norm: 0.8883919330514232, iteration: 197524
loss: 0.9899266958236694,grad_norm: 0.9999990849580804, iteration: 197525
loss: 1.0344711542129517,grad_norm: 0.7573000113907663, iteration: 197526
loss: 1.004996418952942,grad_norm: 0.9403152064833649, iteration: 197527
loss: 0.9824498891830444,grad_norm: 0.9215201305349272, iteration: 197528
loss: 1.0055567026138306,grad_norm: 0.8903284915541394, iteration: 197529
loss: 0.970163881778717,grad_norm: 0.9702647413549168, iteration: 197530
loss: 0.9674952030181885,grad_norm: 0.9265938978515579, iteration: 197531
loss: 1.01897132396698,grad_norm: 0.870899121018418, iteration: 197532
loss: 0.9615180492401123,grad_norm: 0.9021375044488734, iteration: 197533
loss: 1.0142605304718018,grad_norm: 0.9999998562083942, iteration: 197534
loss: 0.9922613501548767,grad_norm: 0.9747067628932663, iteration: 197535
loss: 1.0524075031280518,grad_norm: 0.9999991308250273, iteration: 197536
loss: 0.9874850511550903,grad_norm: 0.8932754033797191, iteration: 197537
loss: 1.026626467704773,grad_norm: 0.8577550486419724, iteration: 197538
loss: 1.0145372152328491,grad_norm: 0.999999175595635, iteration: 197539
loss: 0.9982396364212036,grad_norm: 0.9999990720192891, iteration: 197540
loss: 0.9849717020988464,grad_norm: 0.999999116514817, iteration: 197541
loss: 1.0013422966003418,grad_norm: 0.8833781394005215, iteration: 197542
loss: 0.9721799492835999,grad_norm: 0.9999990321562413, iteration: 197543
loss: 1.0696851015090942,grad_norm: 0.9999991181471956, iteration: 197544
loss: 1.0047526359558105,grad_norm: 0.9999990356397921, iteration: 197545
loss: 0.9797777533531189,grad_norm: 0.9999992487018984, iteration: 197546
loss: 0.9765036106109619,grad_norm: 0.9118782213526782, iteration: 197547
loss: 1.277518630027771,grad_norm: 0.9999998611873603, iteration: 197548
loss: 0.9830648899078369,grad_norm: 0.9408870162142068, iteration: 197549
loss: 1.0156103372573853,grad_norm: 0.9999996762800911, iteration: 197550
loss: 0.9994311928749084,grad_norm: 0.8892635427027914, iteration: 197551
loss: 1.0071046352386475,grad_norm: 0.9999997702150409, iteration: 197552
loss: 0.9826450943946838,grad_norm: 0.9881832606403145, iteration: 197553
loss: 1.0203129053115845,grad_norm: 0.9786628229995827, iteration: 197554
loss: 0.9894388914108276,grad_norm: 0.9999990277629538, iteration: 197555
loss: 1.1717170476913452,grad_norm: 0.887757940427142, iteration: 197556
loss: 1.082593560218811,grad_norm: 0.9936236568438189, iteration: 197557
loss: 1.0631123781204224,grad_norm: 0.9999999682789181, iteration: 197558
loss: 0.9775952100753784,grad_norm: 0.9906460474452308, iteration: 197559
loss: 1.1156376600265503,grad_norm: 0.9999994235042345, iteration: 197560
loss: 0.9869795441627502,grad_norm: 0.9999991221969877, iteration: 197561
loss: 0.974434494972229,grad_norm: 0.9225269871751544, iteration: 197562
loss: 1.0132732391357422,grad_norm: 0.9666517177314439, iteration: 197563
loss: 0.9785085320472717,grad_norm: 0.9999990975524368, iteration: 197564
loss: 1.0255448818206787,grad_norm: 0.9590671870741191, iteration: 197565
loss: 1.148328423500061,grad_norm: 0.9999991740156216, iteration: 197566
loss: 1.0210779905319214,grad_norm: 0.9393491334738077, iteration: 197567
loss: 1.079223871231079,grad_norm: 0.9999998646355837, iteration: 197568
loss: 1.0105870962142944,grad_norm: 0.9712167868655404, iteration: 197569
loss: 1.0145031213760376,grad_norm: 0.9999992656719583, iteration: 197570
loss: 1.015528917312622,grad_norm: 0.9423221248110248, iteration: 197571
loss: 1.0057814121246338,grad_norm: 0.8039147994093343, iteration: 197572
loss: 1.0591387748718262,grad_norm: 0.9999994745599304, iteration: 197573
loss: 1.0862598419189453,grad_norm: 0.999999841271208, iteration: 197574
loss: 1.029872179031372,grad_norm: 0.8960361508926251, iteration: 197575
loss: 1.0262761116027832,grad_norm: 0.9999990676482537, iteration: 197576
loss: 1.0348997116088867,grad_norm: 0.9999991452342145, iteration: 197577
loss: 1.1663875579833984,grad_norm: 0.9999999642694495, iteration: 197578
loss: 0.9991711974143982,grad_norm: 0.885802447450867, iteration: 197579
loss: 1.1757397651672363,grad_norm: 0.9999992846999872, iteration: 197580
loss: 1.0048093795776367,grad_norm: 0.9999991923082747, iteration: 197581
loss: 1.0141304731369019,grad_norm: 0.9999993422331065, iteration: 197582
loss: 1.0115872621536255,grad_norm: 0.999999847293863, iteration: 197583
loss: 1.0160080194473267,grad_norm: 0.8401997490039115, iteration: 197584
loss: 1.0340628623962402,grad_norm: 0.9726547075801804, iteration: 197585
loss: 0.9963456988334656,grad_norm: 0.9433782685365552, iteration: 197586
loss: 1.015821099281311,grad_norm: 0.979676377277069, iteration: 197587
loss: 0.9844698905944824,grad_norm: 0.9999991363739138, iteration: 197588
loss: 0.9697872400283813,grad_norm: 0.999999064416797, iteration: 197589
loss: 1.009393572807312,grad_norm: 0.9955185854764357, iteration: 197590
loss: 0.9921150803565979,grad_norm: 0.9588565275888887, iteration: 197591
loss: 1.1709429025650024,grad_norm: 0.9999993701141537, iteration: 197592
loss: 1.0448167324066162,grad_norm: 0.9999990872601, iteration: 197593
loss: 1.0550471544265747,grad_norm: 0.958534484066097, iteration: 197594
loss: 1.0680041313171387,grad_norm: 0.9999994186695292, iteration: 197595
loss: 1.0248963832855225,grad_norm: 0.9999993156329187, iteration: 197596
loss: 1.0050677061080933,grad_norm: 0.9999998391671651, iteration: 197597
loss: 1.0061407089233398,grad_norm: 0.9999990700465997, iteration: 197598
loss: 1.0084401369094849,grad_norm: 0.999999059333133, iteration: 197599
loss: 1.015526533126831,grad_norm: 0.999999678993914, iteration: 197600
loss: 0.9703608751296997,grad_norm: 0.9999991795755154, iteration: 197601
loss: 1.0170888900756836,grad_norm: 0.9999990227278185, iteration: 197602
loss: 0.9872152209281921,grad_norm: 0.9999994205444497, iteration: 197603
loss: 0.9622692465782166,grad_norm: 0.999999000151733, iteration: 197604
loss: 0.9918479323387146,grad_norm: 0.9783522343928096, iteration: 197605
loss: 1.0621302127838135,grad_norm: 0.9999994450415167, iteration: 197606
loss: 0.9966928958892822,grad_norm: 0.9999989878111856, iteration: 197607
loss: 0.9823137521743774,grad_norm: 0.9999996676080181, iteration: 197608
loss: 0.9529609084129333,grad_norm: 0.9354853394380224, iteration: 197609
loss: 1.0217384099960327,grad_norm: 0.9186346311333734, iteration: 197610
loss: 0.9991841316223145,grad_norm: 0.9999991076204693, iteration: 197611
loss: 1.0094571113586426,grad_norm: 0.9937186809610693, iteration: 197612
loss: 1.0125352144241333,grad_norm: 0.9999991184837836, iteration: 197613
loss: 0.994403064250946,grad_norm: 0.9999991240950971, iteration: 197614
loss: 0.9801306128501892,grad_norm: 0.8277653257575376, iteration: 197615
loss: 1.0066710710525513,grad_norm: 0.9999993790756739, iteration: 197616
loss: 0.9766616821289062,grad_norm: 0.9339267699995253, iteration: 197617
loss: 1.0438129901885986,grad_norm: 0.9999991488923996, iteration: 197618
loss: 0.9901078343391418,grad_norm: 0.9999991923534128, iteration: 197619
loss: 0.9936655163764954,grad_norm: 0.7942105958152128, iteration: 197620
loss: 0.9889504313468933,grad_norm: 0.8498054685065759, iteration: 197621
loss: 0.9859092235565186,grad_norm: 0.9999990202816271, iteration: 197622
loss: 1.0104244947433472,grad_norm: 0.873683788421844, iteration: 197623
loss: 0.9861491322517395,grad_norm: 0.9645511067183647, iteration: 197624
loss: 1.0008455514907837,grad_norm: 0.9999990007655724, iteration: 197625
loss: 1.03937828540802,grad_norm: 0.9999990656383665, iteration: 197626
loss: 1.0215333700180054,grad_norm: 0.8477359944509288, iteration: 197627
loss: 1.0096702575683594,grad_norm: 0.8494361936928047, iteration: 197628
loss: 0.9883477687835693,grad_norm: 0.9999989931922495, iteration: 197629
loss: 1.0461831092834473,grad_norm: 0.9999991259471579, iteration: 197630
loss: 0.9931970238685608,grad_norm: 0.8931412665911218, iteration: 197631
loss: 1.0093268156051636,grad_norm: 0.9445231393234411, iteration: 197632
loss: 1.0198490619659424,grad_norm: 0.8647932172547377, iteration: 197633
loss: 1.0089813470840454,grad_norm: 0.9003655144793854, iteration: 197634
loss: 0.9940507411956787,grad_norm: 0.7017731322805922, iteration: 197635
loss: 1.028294324874878,grad_norm: 0.9154882697580828, iteration: 197636
loss: 0.9685959219932556,grad_norm: 0.8229268959803613, iteration: 197637
loss: 1.0022146701812744,grad_norm: 0.953715035874562, iteration: 197638
loss: 0.9410014748573303,grad_norm: 0.8868902076719544, iteration: 197639
loss: 1.0066100358963013,grad_norm: 0.9999992736456242, iteration: 197640
loss: 0.9768560528755188,grad_norm: 0.8540302869551664, iteration: 197641
loss: 1.0043267011642456,grad_norm: 0.9977764746769001, iteration: 197642
loss: 1.0009112358093262,grad_norm: 0.8752052244113374, iteration: 197643
loss: 1.0028936862945557,grad_norm: 0.9999992008641736, iteration: 197644
loss: 0.979022204875946,grad_norm: 0.9999991348196609, iteration: 197645
loss: 0.9918102622032166,grad_norm: 0.9999993423706421, iteration: 197646
loss: 0.971498966217041,grad_norm: 0.9082100451353878, iteration: 197647
loss: 1.0046665668487549,grad_norm: 0.8667274281645067, iteration: 197648
loss: 1.0275036096572876,grad_norm: 0.913572007208918, iteration: 197649
loss: 1.0102410316467285,grad_norm: 0.987670547086732, iteration: 197650
loss: 0.9764009714126587,grad_norm: 0.9196900386175745, iteration: 197651
loss: 0.9756832122802734,grad_norm: 0.9884344535631013, iteration: 197652
loss: 1.0036522150039673,grad_norm: 0.8658715851180218, iteration: 197653
loss: 0.9999799132347107,grad_norm: 0.8720344526481296, iteration: 197654
loss: 0.9981323480606079,grad_norm: 0.8503134654952639, iteration: 197655
loss: 0.9727543592453003,grad_norm: 0.9162062829865535, iteration: 197656
loss: 0.9990594983100891,grad_norm: 0.8841590340694518, iteration: 197657
loss: 1.038194179534912,grad_norm: 0.9889682429625722, iteration: 197658
loss: 0.9971247911453247,grad_norm: 0.9999992225870117, iteration: 197659
loss: 1.004644513130188,grad_norm: 0.9999990799915827, iteration: 197660
loss: 1.0580512285232544,grad_norm: 0.9999994664826647, iteration: 197661
loss: 0.986515998840332,grad_norm: 0.9824947829554578, iteration: 197662
loss: 0.9843146800994873,grad_norm: 0.7671003905185643, iteration: 197663
loss: 0.9925444722175598,grad_norm: 0.8352252613827287, iteration: 197664
loss: 1.097801923751831,grad_norm: 0.9999995097265444, iteration: 197665
loss: 1.0017896890640259,grad_norm: 0.9999990500675578, iteration: 197666
loss: 0.9864128828048706,grad_norm: 0.9999990976395857, iteration: 197667
loss: 1.0002299547195435,grad_norm: 0.9209659159486133, iteration: 197668
loss: 1.0002946853637695,grad_norm: 0.9915187395692084, iteration: 197669
loss: 1.0003407001495361,grad_norm: 0.9180903761136971, iteration: 197670
loss: 0.9860516786575317,grad_norm: 0.8779470101577486, iteration: 197671
loss: 1.0034772157669067,grad_norm: 0.9999992438833492, iteration: 197672
loss: 0.9928138256072998,grad_norm: 0.9999989748009043, iteration: 197673
loss: 1.0422438383102417,grad_norm: 0.9643241011252572, iteration: 197674
loss: 0.9867963194847107,grad_norm: 0.9762569205863568, iteration: 197675
loss: 0.9894874691963196,grad_norm: 0.812238893682306, iteration: 197676
loss: 0.9962592124938965,grad_norm: 0.9999990373623538, iteration: 197677
loss: 0.9705123901367188,grad_norm: 0.9721014438993947, iteration: 197678
loss: 1.0087850093841553,grad_norm: 0.9127400423860208, iteration: 197679
loss: 0.9573907852172852,grad_norm: 0.999999114288988, iteration: 197680
loss: 0.9980496168136597,grad_norm: 0.999999054079481, iteration: 197681
loss: 0.9926767945289612,grad_norm: 0.8233374925664514, iteration: 197682
loss: 0.9843021035194397,grad_norm: 0.9999989309004224, iteration: 197683
loss: 0.9735134243965149,grad_norm: 0.9461981179887731, iteration: 197684
loss: 0.9967420101165771,grad_norm: 0.9999991906326415, iteration: 197685
loss: 0.9718478322029114,grad_norm: 0.8954220674634855, iteration: 197686
loss: 0.9955019950866699,grad_norm: 0.8404229849300141, iteration: 197687
loss: 0.9808201789855957,grad_norm: 0.8193686617686807, iteration: 197688
loss: 1.0022848844528198,grad_norm: 0.9544673367236199, iteration: 197689
loss: 0.9777141213417053,grad_norm: 0.9899215265736567, iteration: 197690
loss: 1.0150710344314575,grad_norm: 0.9294650239411373, iteration: 197691
loss: 1.0136940479278564,grad_norm: 0.9479909731592312, iteration: 197692
loss: 1.0090566873550415,grad_norm: 0.7944522450545316, iteration: 197693
loss: 1.041762113571167,grad_norm: 0.9391115800525202, iteration: 197694
loss: 0.9954712986946106,grad_norm: 0.8722912748478341, iteration: 197695
loss: 1.2048401832580566,grad_norm: 0.9999998474958135, iteration: 197696
loss: 1.0033111572265625,grad_norm: 0.9999991068468278, iteration: 197697
loss: 1.0847042798995972,grad_norm: 0.9999995306285994, iteration: 197698
loss: 0.986042320728302,grad_norm: 0.9197914312963074, iteration: 197699
loss: 0.9884227514266968,grad_norm: 0.9999994639779686, iteration: 197700
loss: 1.0453479290008545,grad_norm: 0.9999993982094336, iteration: 197701
loss: 1.003049373626709,grad_norm: 0.9692924823036786, iteration: 197702
loss: 1.0029326677322388,grad_norm: 0.9011618570040332, iteration: 197703
loss: 1.0038731098175049,grad_norm: 0.9192302814462241, iteration: 197704
loss: 1.0034010410308838,grad_norm: 0.9999993984049741, iteration: 197705
loss: 1.0182174444198608,grad_norm: 0.9999991214259292, iteration: 197706
loss: 0.9700633883476257,grad_norm: 0.8058661559503503, iteration: 197707
loss: 0.9916459918022156,grad_norm: 0.9831516681317602, iteration: 197708
loss: 1.0036216974258423,grad_norm: 0.9999989872338833, iteration: 197709
loss: 0.9678795337677002,grad_norm: 0.9691810626122344, iteration: 197710
loss: 1.0440146923065186,grad_norm: 0.9999994540022475, iteration: 197711
loss: 1.0351238250732422,grad_norm: 0.9052401495271376, iteration: 197712
loss: 1.0052013397216797,grad_norm: 0.8419887967047043, iteration: 197713
loss: 0.9821213483810425,grad_norm: 0.999999156151011, iteration: 197714
loss: 0.9997415542602539,grad_norm: 0.8755359617396741, iteration: 197715
loss: 1.0121630430221558,grad_norm: 0.9999993171203023, iteration: 197716
loss: 0.9575739502906799,grad_norm: 0.8089615146188313, iteration: 197717
loss: 1.0200344324111938,grad_norm: 0.9999989707414682, iteration: 197718
loss: 1.041486144065857,grad_norm: 0.9999990696580217, iteration: 197719
loss: 0.9982855319976807,grad_norm: 0.9999990446079589, iteration: 197720
loss: 0.9713680744171143,grad_norm: 0.9708150196738693, iteration: 197721
loss: 0.9853214621543884,grad_norm: 0.7934836208080425, iteration: 197722
loss: 1.0162734985351562,grad_norm: 0.9999993244820781, iteration: 197723
loss: 0.9756395816802979,grad_norm: 0.9999989561825855, iteration: 197724
loss: 1.0002864599227905,grad_norm: 0.9790691322101702, iteration: 197725
loss: 1.0472363233566284,grad_norm: 0.9999990000386718, iteration: 197726
loss: 1.051796793937683,grad_norm: 0.9999991781446791, iteration: 197727
loss: 0.9603050351142883,grad_norm: 0.9429372779195797, iteration: 197728
loss: 0.9945101737976074,grad_norm: 0.9999993063238909, iteration: 197729
loss: 1.035141944885254,grad_norm: 0.9582849910480021, iteration: 197730
loss: 1.0182524919509888,grad_norm: 0.9999992426277542, iteration: 197731
loss: 1.0180634260177612,grad_norm: 0.999999235965474, iteration: 197732
loss: 1.086905598640442,grad_norm: 0.9999990020055892, iteration: 197733
loss: 1.0110018253326416,grad_norm: 0.8218784080932282, iteration: 197734
loss: 0.9956578612327576,grad_norm: 0.9026732947475148, iteration: 197735
loss: 0.9862544536590576,grad_norm: 0.9999990035734628, iteration: 197736
loss: 1.0343683958053589,grad_norm: 0.9882864516195722, iteration: 197737
loss: 1.0066672563552856,grad_norm: 0.9785362470696151, iteration: 197738
loss: 1.024746060371399,grad_norm: 0.9999991407192849, iteration: 197739
loss: 1.0080124139785767,grad_norm: 0.9999991482409631, iteration: 197740
loss: 1.003820776939392,grad_norm: 0.8965375869554212, iteration: 197741
loss: 0.9868181943893433,grad_norm: 0.8176976188379298, iteration: 197742
loss: 1.0271457433700562,grad_norm: 0.999999195540349, iteration: 197743
loss: 0.9857118129730225,grad_norm: 0.8374774210602242, iteration: 197744
loss: 1.028694987297058,grad_norm: 0.99999930153801, iteration: 197745
loss: 0.979056715965271,grad_norm: 0.9460289473915368, iteration: 197746
loss: 1.028594970703125,grad_norm: 0.9118425897279326, iteration: 197747
loss: 1.0301932096481323,grad_norm: 0.9999990350026172, iteration: 197748
loss: 0.9953275918960571,grad_norm: 0.985213795657089, iteration: 197749
loss: 1.0155413150787354,grad_norm: 0.9999993967982518, iteration: 197750
loss: 1.0061261653900146,grad_norm: 0.9999996630810085, iteration: 197751
loss: 1.0411007404327393,grad_norm: 0.9999998352074342, iteration: 197752
loss: 0.9761328101158142,grad_norm: 0.9972940946059122, iteration: 197753
loss: 0.9908814430236816,grad_norm: 0.9576536583286626, iteration: 197754
loss: 0.9795883893966675,grad_norm: 0.999999217823192, iteration: 197755
loss: 1.0021164417266846,grad_norm: 0.8964689641941828, iteration: 197756
loss: 0.977964460849762,grad_norm: 0.9752494657231817, iteration: 197757
loss: 1.0325803756713867,grad_norm: 0.9999992349020587, iteration: 197758
loss: 1.0218040943145752,grad_norm: 0.9999997178929219, iteration: 197759
loss: 1.0611244440078735,grad_norm: 0.8350069237709139, iteration: 197760
loss: 1.0083622932434082,grad_norm: 0.9578807752059353, iteration: 197761
loss: 1.0083292722702026,grad_norm: 0.8388520051253544, iteration: 197762
loss: 0.978223979473114,grad_norm: 0.9999991336744718, iteration: 197763
loss: 0.991733193397522,grad_norm: 0.9999991627649025, iteration: 197764
loss: 0.982879102230072,grad_norm: 0.9999990912477724, iteration: 197765
loss: 1.0159947872161865,grad_norm: 0.8571621786861858, iteration: 197766
loss: 0.9954119324684143,grad_norm: 0.9314503508689672, iteration: 197767
loss: 1.012449026107788,grad_norm: 0.9999994766284901, iteration: 197768
loss: 1.0621286630630493,grad_norm: 0.9999996204820798, iteration: 197769
loss: 0.9814634919166565,grad_norm: 0.8572325433546, iteration: 197770
loss: 1.0019643306732178,grad_norm: 0.9432362116604438, iteration: 197771
loss: 1.0763444900512695,grad_norm: 0.9999994059407749, iteration: 197772
loss: 0.9758557081222534,grad_norm: 0.820488710495407, iteration: 197773
loss: 1.0283842086791992,grad_norm: 0.9510751828939181, iteration: 197774
loss: 1.0050020217895508,grad_norm: 0.9309012865922845, iteration: 197775
loss: 0.9954157471656799,grad_norm: 0.8644559119971182, iteration: 197776
loss: 1.0011693239212036,grad_norm: 0.8686475730880532, iteration: 197777
loss: 1.011225700378418,grad_norm: 0.8640272968957112, iteration: 197778
loss: 1.011120319366455,grad_norm: 0.9999996450339846, iteration: 197779
loss: 0.985886812210083,grad_norm: 0.9999990648527913, iteration: 197780
loss: 1.0005242824554443,grad_norm: 0.9615839446490825, iteration: 197781
loss: 0.9931024312973022,grad_norm: 0.8920381446170306, iteration: 197782
loss: 0.9947039484977722,grad_norm: 0.7692994302228615, iteration: 197783
loss: 1.0123990774154663,grad_norm: 0.9999997045013694, iteration: 197784
loss: 1.0943080186843872,grad_norm: 0.9999995448456339, iteration: 197785
loss: 1.012988567352295,grad_norm: 0.9902331532199848, iteration: 197786
loss: 0.9873212575912476,grad_norm: 0.9184060411657281, iteration: 197787
loss: 0.9613829851150513,grad_norm: 0.8908569246575201, iteration: 197788
loss: 1.009995937347412,grad_norm: 0.9760939286208388, iteration: 197789
loss: 0.9646886587142944,grad_norm: 0.9615735490186208, iteration: 197790
loss: 1.0049043893814087,grad_norm: 0.9622487056078372, iteration: 197791
loss: 1.0074787139892578,grad_norm: 0.9999991225273922, iteration: 197792
loss: 1.049668312072754,grad_norm: 0.999998947205633, iteration: 197793
loss: 1.0017164945602417,grad_norm: 0.8774607756347655, iteration: 197794
loss: 1.0096638202667236,grad_norm: 0.9930584135730411, iteration: 197795
loss: 0.9780407547950745,grad_norm: 0.9999992048313469, iteration: 197796
loss: 0.9977259635925293,grad_norm: 0.9612053659367962, iteration: 197797
loss: 1.00999116897583,grad_norm: 0.9999994303544761, iteration: 197798
loss: 1.0048164129257202,grad_norm: 0.9774306395024727, iteration: 197799
loss: 1.0062146186828613,grad_norm: 0.9999990843956427, iteration: 197800
loss: 1.0532381534576416,grad_norm: 0.9999993775268878, iteration: 197801
loss: 0.9899933338165283,grad_norm: 0.906871087356896, iteration: 197802
loss: 0.9806686043739319,grad_norm: 0.9999990821416809, iteration: 197803
loss: 1.0791720151901245,grad_norm: 0.99999908619815, iteration: 197804
loss: 0.9745481014251709,grad_norm: 0.9505454319548167, iteration: 197805
loss: 0.9728487730026245,grad_norm: 0.9606024522479525, iteration: 197806
loss: 0.9909478425979614,grad_norm: 0.9999991494160441, iteration: 197807
loss: 1.0012818574905396,grad_norm: 0.7726515260674363, iteration: 197808
loss: 1.0223864316940308,grad_norm: 0.9848501889291693, iteration: 197809
loss: 1.0033892393112183,grad_norm: 0.748755241425687, iteration: 197810
loss: 1.0000035762786865,grad_norm: 0.9132217194850053, iteration: 197811
loss: 1.0467536449432373,grad_norm: 0.8804233478007715, iteration: 197812
loss: 1.0081593990325928,grad_norm: 0.9608253548062718, iteration: 197813
loss: 1.0264211893081665,grad_norm: 0.9999991061353249, iteration: 197814
loss: 0.987421989440918,grad_norm: 0.9118456794263814, iteration: 197815
loss: 1.0309185981750488,grad_norm: 0.9999989524561232, iteration: 197816
loss: 1.0264946222305298,grad_norm: 0.999999181763086, iteration: 197817
loss: 1.035874366760254,grad_norm: 0.9088079832954291, iteration: 197818
loss: 0.9956764578819275,grad_norm: 0.9116826277190698, iteration: 197819
loss: 1.031848430633545,grad_norm: 0.9993925596850735, iteration: 197820
loss: 1.0090807676315308,grad_norm: 0.999999164447672, iteration: 197821
loss: 1.0560286045074463,grad_norm: 0.9999992386794846, iteration: 197822
loss: 1.0455937385559082,grad_norm: 0.9999992301762796, iteration: 197823
loss: 0.9931656122207642,grad_norm: 0.9317020457736784, iteration: 197824
loss: 0.9932736158370972,grad_norm: 0.8057724139462392, iteration: 197825
loss: 0.9915561079978943,grad_norm: 0.9999991250370294, iteration: 197826
loss: 1.0363320112228394,grad_norm: 0.9999997423691362, iteration: 197827
loss: 1.0440912246704102,grad_norm: 0.9681933553985279, iteration: 197828
loss: 0.9772666692733765,grad_norm: 0.9287404769499289, iteration: 197829
loss: 1.0115045309066772,grad_norm: 0.9999990459666532, iteration: 197830
loss: 1.0012527704238892,grad_norm: 0.999999306799321, iteration: 197831
loss: 0.9928975105285645,grad_norm: 0.8724332862954512, iteration: 197832
loss: 0.9938027262687683,grad_norm: 0.9793679100673054, iteration: 197833
loss: 0.9967049360275269,grad_norm: 0.9936288473993427, iteration: 197834
loss: 1.014489769935608,grad_norm: 0.999998957046624, iteration: 197835
loss: 0.954630970954895,grad_norm: 0.9999991939898066, iteration: 197836
loss: 0.9893847703933716,grad_norm: 0.9037965947653461, iteration: 197837
loss: 1.0020850896835327,grad_norm: 0.9999989586061285, iteration: 197838
loss: 0.9927600622177124,grad_norm: 0.8914402276262761, iteration: 197839
loss: 1.0056232213974,grad_norm: 0.9999991244608949, iteration: 197840
loss: 1.020065426826477,grad_norm: 0.9999992346614663, iteration: 197841
loss: 0.981207013130188,grad_norm: 0.9999991107345194, iteration: 197842
loss: 1.0054322481155396,grad_norm: 0.9999991604606451, iteration: 197843
loss: 0.9729169607162476,grad_norm: 0.9999991825235666, iteration: 197844
loss: 1.0341354608535767,grad_norm: 0.8877969922304825, iteration: 197845
loss: 0.9682949781417847,grad_norm: 0.9999991932728413, iteration: 197846
loss: 0.9816351532936096,grad_norm: 0.9999990202390293, iteration: 197847
loss: 0.9910266399383545,grad_norm: 0.8882989984008376, iteration: 197848
loss: 0.9897253513336182,grad_norm: 0.8830714856105902, iteration: 197849
loss: 1.0042073726654053,grad_norm: 0.999999127420299, iteration: 197850
loss: 1.0267544984817505,grad_norm: 0.9198091717640959, iteration: 197851
loss: 0.9831864237785339,grad_norm: 0.9999992325344425, iteration: 197852
loss: 0.9818083643913269,grad_norm: 0.8357744767259636, iteration: 197853
loss: 1.0262256860733032,grad_norm: 0.8561407586362446, iteration: 197854
loss: 1.0050699710845947,grad_norm: 0.9531442923960453, iteration: 197855
loss: 1.0222543478012085,grad_norm: 0.9060468077915631, iteration: 197856
loss: 1.0463694334030151,grad_norm: 0.9150936708716657, iteration: 197857
loss: 1.0107790231704712,grad_norm: 0.9999991836826637, iteration: 197858
loss: 0.9908164143562317,grad_norm: 0.8962630199707853, iteration: 197859
loss: 1.0267839431762695,grad_norm: 0.834138234656105, iteration: 197860
loss: 0.9807208180427551,grad_norm: 0.9999990736213609, iteration: 197861
loss: 1.0268831253051758,grad_norm: 0.9999998745448447, iteration: 197862
loss: 1.005202054977417,grad_norm: 0.9980509979701699, iteration: 197863
loss: 0.9816869497299194,grad_norm: 0.9339577675880282, iteration: 197864
loss: 0.9941397309303284,grad_norm: 0.9396706746466629, iteration: 197865
loss: 0.9592782258987427,grad_norm: 0.9970873898701718, iteration: 197866
loss: 1.0338032245635986,grad_norm: 0.9344101802069639, iteration: 197867
loss: 1.0205810070037842,grad_norm: 0.9999992186819128, iteration: 197868
loss: 0.9686222076416016,grad_norm: 0.9020159172495514, iteration: 197869
loss: 1.0067976713180542,grad_norm: 0.8256720492434411, iteration: 197870
loss: 1.0008599758148193,grad_norm: 0.9999991524101038, iteration: 197871
loss: 1.0090062618255615,grad_norm: 0.8632063011602871, iteration: 197872
loss: 0.9837263226509094,grad_norm: 0.9530475777567089, iteration: 197873
loss: 0.9748338460922241,grad_norm: 0.9999989491960575, iteration: 197874
loss: 1.0270507335662842,grad_norm: 0.8578598878197835, iteration: 197875
loss: 0.9673076868057251,grad_norm: 0.9999991125529202, iteration: 197876
loss: 0.9959174394607544,grad_norm: 0.9520475675120142, iteration: 197877
loss: 0.9914294481277466,grad_norm: 0.9999991518279452, iteration: 197878
loss: 0.9685049653053284,grad_norm: 0.9344231423350745, iteration: 197879
loss: 1.0119248628616333,grad_norm: 0.8626417858906349, iteration: 197880
loss: 1.0462886095046997,grad_norm: 0.9999992189662317, iteration: 197881
loss: 1.014750599861145,grad_norm: 0.9999989942459344, iteration: 197882
loss: 0.9774378538131714,grad_norm: 0.9282211685008439, iteration: 197883
loss: 1.0103790760040283,grad_norm: 0.999999053284746, iteration: 197884
loss: 1.1631213426589966,grad_norm: 0.9999998612322736, iteration: 197885
loss: 1.0323514938354492,grad_norm: 0.9999990931729033, iteration: 197886
loss: 0.9722757935523987,grad_norm: 0.9830511809969009, iteration: 197887
loss: 0.985120415687561,grad_norm: 0.9379231241832964, iteration: 197888
loss: 1.0066725015640259,grad_norm: 0.8875760169008925, iteration: 197889
loss: 0.9824890494346619,grad_norm: 0.9999992504999976, iteration: 197890
loss: 1.0337520837783813,grad_norm: 0.999999857249416, iteration: 197891
loss: 0.9875317811965942,grad_norm: 0.8875529398926086, iteration: 197892
loss: 1.009181022644043,grad_norm: 0.9999992450340635, iteration: 197893
loss: 0.975749671459198,grad_norm: 0.9999991152384875, iteration: 197894
loss: 1.0154657363891602,grad_norm: 0.99999974596445, iteration: 197895
loss: 0.9989038109779358,grad_norm: 0.84772866831259, iteration: 197896
loss: 0.9876564741134644,grad_norm: 0.9999992725764021, iteration: 197897
loss: 1.0030354261398315,grad_norm: 0.9854435744729366, iteration: 197898
loss: 0.9746692776679993,grad_norm: 0.9999991432947524, iteration: 197899
loss: 0.9853324890136719,grad_norm: 0.9750774138591839, iteration: 197900
loss: 1.0048400163650513,grad_norm: 0.9101490199697284, iteration: 197901
loss: 0.9639139771461487,grad_norm: 0.9999990070604478, iteration: 197902
loss: 0.9886829257011414,grad_norm: 0.9999990592294553, iteration: 197903
loss: 1.0056332349777222,grad_norm: 0.999998985864556, iteration: 197904
loss: 1.0443317890167236,grad_norm: 0.9591383373997096, iteration: 197905
loss: 0.9850077629089355,grad_norm: 0.999999250548419, iteration: 197906
loss: 1.0273878574371338,grad_norm: 0.9999992039306912, iteration: 197907
loss: 0.9885675311088562,grad_norm: 0.899132596272012, iteration: 197908
loss: 1.163578748703003,grad_norm: 0.9999991971701571, iteration: 197909
loss: 1.0371731519699097,grad_norm: 0.9202512784825294, iteration: 197910
loss: 1.0110077857971191,grad_norm: 0.7517144891412171, iteration: 197911
loss: 1.0732980966567993,grad_norm: 0.9974377084524046, iteration: 197912
loss: 0.9923579692840576,grad_norm: 0.9999990430411922, iteration: 197913
loss: 1.009604811668396,grad_norm: 0.9999991230736306, iteration: 197914
loss: 0.9914360642433167,grad_norm: 0.8315788826007293, iteration: 197915
loss: 0.9981262683868408,grad_norm: 0.9745588211116543, iteration: 197916
loss: 1.052998423576355,grad_norm: 0.9999999293433061, iteration: 197917
loss: 1.0200674533843994,grad_norm: 0.9999990383978619, iteration: 197918
loss: 1.01675283908844,grad_norm: 0.8290066738448676, iteration: 197919
loss: 1.0190343856811523,grad_norm: 0.9999992262723536, iteration: 197920
loss: 1.0009511709213257,grad_norm: 0.9999991849800048, iteration: 197921
loss: 0.9829842448234558,grad_norm: 0.8350543553832342, iteration: 197922
loss: 0.9860071539878845,grad_norm: 0.8499471128495416, iteration: 197923
loss: 1.0386083126068115,grad_norm: 0.9999997190370535, iteration: 197924
loss: 1.1187361478805542,grad_norm: 0.9999997059967978, iteration: 197925
loss: 1.0165057182312012,grad_norm: 0.9999998124577788, iteration: 197926
loss: 0.9919331073760986,grad_norm: 0.7632957042573513, iteration: 197927
loss: 1.0276992321014404,grad_norm: 0.9999991687339134, iteration: 197928
loss: 1.027345895767212,grad_norm: 0.9697688714752378, iteration: 197929
loss: 0.9848726391792297,grad_norm: 0.9999991474158245, iteration: 197930
loss: 1.0072485208511353,grad_norm: 0.9691225485194205, iteration: 197931
loss: 1.0002388954162598,grad_norm: 0.9999991146903904, iteration: 197932
loss: 1.0231748819351196,grad_norm: 0.82480585730116, iteration: 197933
loss: 1.0169700384140015,grad_norm: 0.9850310726065782, iteration: 197934
loss: 0.9870956540107727,grad_norm: 0.8930782859965897, iteration: 197935
loss: 1.0269670486450195,grad_norm: 0.95613675852005, iteration: 197936
loss: 0.9714094400405884,grad_norm: 0.8062060283112544, iteration: 197937
loss: 0.9940775632858276,grad_norm: 0.8493676851311569, iteration: 197938
loss: 1.0537830591201782,grad_norm: 0.9999992458673533, iteration: 197939
loss: 1.0468025207519531,grad_norm: 0.9373821074471367, iteration: 197940
loss: 1.0340989828109741,grad_norm: 0.9999997338499393, iteration: 197941
loss: 1.0071035623550415,grad_norm: 0.9999992737977181, iteration: 197942
loss: 1.0701147317886353,grad_norm: 0.999999079915127, iteration: 197943
loss: 0.9853543043136597,grad_norm: 0.8983392586858905, iteration: 197944
loss: 1.0102713108062744,grad_norm: 0.9152355791942179, iteration: 197945
loss: 0.9973616003990173,grad_norm: 0.8247479343796366, iteration: 197946
loss: 0.9920638203620911,grad_norm: 0.9999991306512086, iteration: 197947
loss: 1.0269817113876343,grad_norm: 0.9999991076940788, iteration: 197948
loss: 1.0017982721328735,grad_norm: 0.999999203867345, iteration: 197949
loss: 0.9956004023551941,grad_norm: 0.999999961582777, iteration: 197950
loss: 0.9934278130531311,grad_norm: 0.8798027227007467, iteration: 197951
loss: 0.9450170397758484,grad_norm: 0.8945178179474659, iteration: 197952
loss: 1.0226917266845703,grad_norm: 0.9088632621721909, iteration: 197953
loss: 1.0164117813110352,grad_norm: 0.9999990739043113, iteration: 197954
loss: 0.9623338580131531,grad_norm: 0.9661642178085588, iteration: 197955
loss: 0.98967045545578,grad_norm: 0.9526376625496509, iteration: 197956
loss: 1.0014365911483765,grad_norm: 0.9476546771749574, iteration: 197957
loss: 1.137411117553711,grad_norm: 0.9999997790550574, iteration: 197958
loss: 1.0126640796661377,grad_norm: 0.945879146663651, iteration: 197959
loss: 0.9991920590400696,grad_norm: 0.9924273122524492, iteration: 197960
loss: 1.0201939344406128,grad_norm: 0.9999992394465799, iteration: 197961
loss: 1.0278178453445435,grad_norm: 0.8244320290348691, iteration: 197962
loss: 0.9746493697166443,grad_norm: 0.8148775667558824, iteration: 197963
loss: 0.989233136177063,grad_norm: 0.8881968306034335, iteration: 197964
loss: 0.9727110266685486,grad_norm: 0.9309672555917474, iteration: 197965
loss: 0.9915603399276733,grad_norm: 0.9999990239837132, iteration: 197966
loss: 0.9925506711006165,grad_norm: 0.8812519349011672, iteration: 197967
loss: 0.9518896341323853,grad_norm: 0.9999990983773775, iteration: 197968
loss: 0.9450856447219849,grad_norm: 0.9096183642249349, iteration: 197969
loss: 0.9882869720458984,grad_norm: 0.9999992674373135, iteration: 197970
loss: 0.9573788642883301,grad_norm: 0.9399523554075545, iteration: 197971
loss: 0.9649893641471863,grad_norm: 0.9505969132980574, iteration: 197972
loss: 1.030962586402893,grad_norm: 0.9999997037939826, iteration: 197973
loss: 1.0214792490005493,grad_norm: 0.9999991349430709, iteration: 197974
loss: 1.0300194025039673,grad_norm: 0.8886492778143209, iteration: 197975
loss: 1.012274146080017,grad_norm: 0.8773950624861387, iteration: 197976
loss: 0.9926539659500122,grad_norm: 0.8850696904500798, iteration: 197977
loss: 1.0429660081863403,grad_norm: 0.9999991536005755, iteration: 197978
loss: 1.0040349960327148,grad_norm: 0.820162203023701, iteration: 197979
loss: 0.9834240674972534,grad_norm: 0.9999990113676843, iteration: 197980
loss: 1.0592596530914307,grad_norm: 0.9999990871621911, iteration: 197981
loss: 1.4327528476715088,grad_norm: 0.9999999804386512, iteration: 197982
loss: 1.0119402408599854,grad_norm: 0.9999990997678946, iteration: 197983
loss: 1.0177432298660278,grad_norm: 0.9999989991012204, iteration: 197984
loss: 0.9666443467140198,grad_norm: 0.9999992933396127, iteration: 197985
loss: 1.0266131162643433,grad_norm: 0.8677813931632764, iteration: 197986
loss: 0.9798722267150879,grad_norm: 0.86236596705202, iteration: 197987
loss: 1.0506644248962402,grad_norm: 0.9999990218074553, iteration: 197988
loss: 1.0646765232086182,grad_norm: 0.9999991727304207, iteration: 197989
loss: 1.000259518623352,grad_norm: 0.8493774774559106, iteration: 197990
loss: 1.0310975313186646,grad_norm: 0.9651196195140048, iteration: 197991
loss: 1.1598995923995972,grad_norm: 0.9999995675373372, iteration: 197992
loss: 1.0220634937286377,grad_norm: 0.8387175282090923, iteration: 197993
loss: 1.0074968338012695,grad_norm: 0.9999990700134783, iteration: 197994
loss: 1.0010802745819092,grad_norm: 0.8918765633053122, iteration: 197995
loss: 0.9987918138504028,grad_norm: 0.9999993058895009, iteration: 197996
loss: 1.0343537330627441,grad_norm: 0.9655271101345824, iteration: 197997
loss: 1.0343419313430786,grad_norm: 0.9999992473042107, iteration: 197998
loss: 0.9930427670478821,grad_norm: 0.9999990873593242, iteration: 197999
loss: 0.9757459759712219,grad_norm: 0.8694686762560411, iteration: 198000
loss: 0.9926349520683289,grad_norm: 0.9074381525977626, iteration: 198001
loss: 1.0749398469924927,grad_norm: 0.9999991332370461, iteration: 198002
loss: 1.0149040222167969,grad_norm: 0.9999992064908342, iteration: 198003
loss: 1.0275551080703735,grad_norm: 0.999999468704218, iteration: 198004
loss: 1.0121278762817383,grad_norm: 0.8994847180624425, iteration: 198005
loss: 0.9983910322189331,grad_norm: 0.9077783596237053, iteration: 198006
loss: 0.9762489199638367,grad_norm: 0.9308562459558295, iteration: 198007
loss: 1.029673457145691,grad_norm: 0.9999998572870212, iteration: 198008
loss: 0.9677664637565613,grad_norm: 0.9598392146926854, iteration: 198009
loss: 1.0408111810684204,grad_norm: 0.8345446348641818, iteration: 198010
loss: 0.9864049553871155,grad_norm: 0.9435444120873696, iteration: 198011
loss: 0.9952686429023743,grad_norm: 0.9999994348742179, iteration: 198012
loss: 0.9910724759101868,grad_norm: 0.8175346959041242, iteration: 198013
loss: 1.0161738395690918,grad_norm: 0.9999992101703769, iteration: 198014
loss: 1.0659188032150269,grad_norm: 0.8119555144133026, iteration: 198015
loss: 0.9967490434646606,grad_norm: 0.829781274269342, iteration: 198016
loss: 1.032772421836853,grad_norm: 0.9165303293991617, iteration: 198017
loss: 0.9715629816055298,grad_norm: 0.9565384650928264, iteration: 198018
loss: 1.108484148979187,grad_norm: 0.9999990616406232, iteration: 198019
loss: 1.0236173868179321,grad_norm: 0.9941806273986912, iteration: 198020
loss: 0.9993578195571899,grad_norm: 0.9604336266882038, iteration: 198021
loss: 0.9816784858703613,grad_norm: 0.999999206076019, iteration: 198022
loss: 1.0964137315750122,grad_norm: 0.9999995625392483, iteration: 198023
loss: 1.008856177330017,grad_norm: 0.9169078396531433, iteration: 198024
loss: 1.0509153604507446,grad_norm: 0.9352849137924119, iteration: 198025
loss: 0.9834036827087402,grad_norm: 0.8540704159141329, iteration: 198026
loss: 1.0017104148864746,grad_norm: 0.9999990973538649, iteration: 198027
loss: 0.9992617964744568,grad_norm: 0.999999024373818, iteration: 198028
loss: 1.0421303510665894,grad_norm: 0.9607907551656225, iteration: 198029
loss: 0.9876556992530823,grad_norm: 0.9032679230792791, iteration: 198030
loss: 1.0086729526519775,grad_norm: 0.9999993310090665, iteration: 198031
loss: 1.0485657453536987,grad_norm: 0.9999992607246073, iteration: 198032
loss: 0.9545624852180481,grad_norm: 0.9262634141861994, iteration: 198033
loss: 1.006877064704895,grad_norm: 0.9999990365777948, iteration: 198034
loss: 1.001955509185791,grad_norm: 0.9067663686389528, iteration: 198035
loss: 0.9777535796165466,grad_norm: 0.8317256692173253, iteration: 198036
loss: 1.0109076499938965,grad_norm: 0.9114267109304048, iteration: 198037
loss: 1.02501380443573,grad_norm: 0.9597805351573578, iteration: 198038
loss: 1.057268500328064,grad_norm: 0.9411926742899738, iteration: 198039
loss: 0.9439892768859863,grad_norm: 0.8985720537315377, iteration: 198040
loss: 0.9958690404891968,grad_norm: 0.8482991023155108, iteration: 198041
loss: 1.0085560083389282,grad_norm: 0.8765123446838156, iteration: 198042
loss: 1.0199769735336304,grad_norm: 0.9999990781282151, iteration: 198043
loss: 0.9651558995246887,grad_norm: 0.8124272237094188, iteration: 198044
loss: 1.058955430984497,grad_norm: 0.9999992516043881, iteration: 198045
loss: 1.0115134716033936,grad_norm: 0.9999992124254166, iteration: 198046
loss: 1.0240005254745483,grad_norm: 0.9916997027676582, iteration: 198047
loss: 1.0286200046539307,grad_norm: 0.9265657347729612, iteration: 198048
loss: 0.9626637101173401,grad_norm: 0.8338579728069171, iteration: 198049
loss: 0.9711494445800781,grad_norm: 0.9446273386134503, iteration: 198050
loss: 1.0215238332748413,grad_norm: 0.9999990922223245, iteration: 198051
loss: 0.9789363145828247,grad_norm: 0.879654687666884, iteration: 198052
loss: 1.020272970199585,grad_norm: 0.9999991538644719, iteration: 198053
loss: 0.9559629559516907,grad_norm: 0.8874410291181726, iteration: 198054
loss: 0.9891418218612671,grad_norm: 0.9295500831197566, iteration: 198055
loss: 1.034730076789856,grad_norm: 0.880898772387225, iteration: 198056
loss: 0.9682849049568176,grad_norm: 0.9233225074367043, iteration: 198057
loss: 1.0066672563552856,grad_norm: 0.8731866199909211, iteration: 198058
loss: 0.9821693897247314,grad_norm: 0.9028392520096374, iteration: 198059
loss: 0.9939671158790588,grad_norm: 0.8781925044407413, iteration: 198060
loss: 1.0175471305847168,grad_norm: 0.9999990112889823, iteration: 198061
loss: 1.0393164157867432,grad_norm: 0.9587585895873569, iteration: 198062
loss: 0.9810523986816406,grad_norm: 0.9368447396006913, iteration: 198063
loss: 1.029637336730957,grad_norm: 0.9999994962810452, iteration: 198064
loss: 0.9987694621086121,grad_norm: 0.9669841264847, iteration: 198065
loss: 0.9977901577949524,grad_norm: 0.9999990507182015, iteration: 198066
loss: 1.0131137371063232,grad_norm: 0.8587232204252657, iteration: 198067
loss: 1.0611802339553833,grad_norm: 0.9999991327528834, iteration: 198068
loss: 0.9923609495162964,grad_norm: 0.9999991728821016, iteration: 198069
loss: 1.1011606454849243,grad_norm: 0.9999998129855054, iteration: 198070
loss: 0.9689521193504333,grad_norm: 0.9670243058943669, iteration: 198071
loss: 0.9807711243629456,grad_norm: 0.9999990137103129, iteration: 198072
loss: 1.079176902770996,grad_norm: 0.9999995918339755, iteration: 198073
loss: 1.0341265201568604,grad_norm: 0.8526394220279192, iteration: 198074
loss: 0.9771164655685425,grad_norm: 0.9999992037395882, iteration: 198075
loss: 1.0008383989334106,grad_norm: 0.9537587790388472, iteration: 198076
loss: 1.0207581520080566,grad_norm: 0.8566973518020466, iteration: 198077
loss: 1.003572940826416,grad_norm: 0.9999990156261312, iteration: 198078
loss: 0.9909138083457947,grad_norm: 0.9024497307033442, iteration: 198079
loss: 1.0124484300613403,grad_norm: 0.977298290336058, iteration: 198080
loss: 1.0531545877456665,grad_norm: 0.9999998070451129, iteration: 198081
loss: 1.0003468990325928,grad_norm: 0.9999992412237823, iteration: 198082
loss: 1.0123889446258545,grad_norm: 0.866934800847273, iteration: 198083
loss: 1.0012681484222412,grad_norm: 0.84399292312543, iteration: 198084
loss: 1.033157467842102,grad_norm: 0.9999992063815217, iteration: 198085
loss: 0.9848081469535828,grad_norm: 0.9999994423808358, iteration: 198086
loss: 1.0035614967346191,grad_norm: 0.959311312996498, iteration: 198087
loss: 0.9781233072280884,grad_norm: 0.9082047577522993, iteration: 198088
loss: 0.9557459950447083,grad_norm: 0.96137131154665, iteration: 198089
loss: 0.9921649098396301,grad_norm: 0.9651626164054512, iteration: 198090
loss: 0.9977587461471558,grad_norm: 0.7948383676110975, iteration: 198091
loss: 1.008067011833191,grad_norm: 0.9999998150705575, iteration: 198092
loss: 1.0821877717971802,grad_norm: 0.9999993406721536, iteration: 198093
loss: 1.0138589143753052,grad_norm: 0.9691832139305772, iteration: 198094
loss: 1.0262842178344727,grad_norm: 0.9504919807532727, iteration: 198095
loss: 1.1108404397964478,grad_norm: 0.9999998224520971, iteration: 198096
loss: 0.9867367148399353,grad_norm: 0.9999991228109543, iteration: 198097
loss: 0.9902799725532532,grad_norm: 0.90249264884662, iteration: 198098
loss: 1.027949571609497,grad_norm: 0.9753227344425757, iteration: 198099
loss: 1.0553370714187622,grad_norm: 0.9214799733223858, iteration: 198100
loss: 1.0588001012802124,grad_norm: 0.9999995796945443, iteration: 198101
loss: 1.0239945650100708,grad_norm: 0.9999990998483125, iteration: 198102
loss: 1.0106886625289917,grad_norm: 0.9280919324916649, iteration: 198103
loss: 1.0330475568771362,grad_norm: 0.9999995975667196, iteration: 198104
loss: 1.012018084526062,grad_norm: 0.933708287742429, iteration: 198105
loss: 0.9873061776161194,grad_norm: 0.8502449770379383, iteration: 198106
loss: 0.9811211228370667,grad_norm: 0.9188802901530742, iteration: 198107
loss: 1.0770095586776733,grad_norm: 0.999999589461129, iteration: 198108
loss: 1.0236307382583618,grad_norm: 0.9014319609970761, iteration: 198109
loss: 1.0000752210617065,grad_norm: 0.9004669202086698, iteration: 198110
loss: 0.9807888865470886,grad_norm: 0.9999991848311224, iteration: 198111
loss: 0.9805400967597961,grad_norm: 0.9885560796189723, iteration: 198112
loss: 1.016238808631897,grad_norm: 0.999999065403581, iteration: 198113
loss: 1.0304896831512451,grad_norm: 0.9892646619328455, iteration: 198114
loss: 0.9539206027984619,grad_norm: 0.999999298861926, iteration: 198115
loss: 1.035948634147644,grad_norm: 0.9999992174760127, iteration: 198116
loss: 0.9961493015289307,grad_norm: 0.9227926609351151, iteration: 198117
loss: 1.016067624092102,grad_norm: 0.9999990972203537, iteration: 198118
loss: 0.9824963808059692,grad_norm: 0.9999990105409506, iteration: 198119
loss: 1.0131813287734985,grad_norm: 0.9446968872895776, iteration: 198120
loss: 1.017180323600769,grad_norm: 0.9726074085295645, iteration: 198121
loss: 1.0467418432235718,grad_norm: 0.9999992334634223, iteration: 198122
loss: 0.9640024304389954,grad_norm: 0.9999991745416187, iteration: 198123
loss: 0.9804282188415527,grad_norm: 0.9999992792346278, iteration: 198124
loss: 0.9998828172683716,grad_norm: 0.999999052029415, iteration: 198125
loss: 0.9546318054199219,grad_norm: 0.9888116978667963, iteration: 198126
loss: 0.9880496263504028,grad_norm: 0.9817863142839555, iteration: 198127
loss: 1.0166847705841064,grad_norm: 0.9999993081058455, iteration: 198128
loss: 0.9919154644012451,grad_norm: 0.999999075220077, iteration: 198129
loss: 1.0433059930801392,grad_norm: 0.9999992495467044, iteration: 198130
loss: 0.96842360496521,grad_norm: 0.8752155712244034, iteration: 198131
loss: 0.9751670956611633,grad_norm: 0.9851679119655107, iteration: 198132
loss: 0.9720529913902283,grad_norm: 0.8487907432013169, iteration: 198133
loss: 0.9479629993438721,grad_norm: 0.9702837051153013, iteration: 198134
loss: 1.0057157278060913,grad_norm: 0.9999991240510011, iteration: 198135
loss: 1.0022779703140259,grad_norm: 0.9999990427025218, iteration: 198136
loss: 0.9882168173789978,grad_norm: 0.8731358647175058, iteration: 198137
loss: 1.004029393196106,grad_norm: 0.8827884732804008, iteration: 198138
loss: 0.9801049828529358,grad_norm: 0.7975242006214875, iteration: 198139
loss: 1.0299086570739746,grad_norm: 0.9999997359732216, iteration: 198140
loss: 0.9942349195480347,grad_norm: 0.8080280449146442, iteration: 198141
loss: 0.9938488006591797,grad_norm: 0.9999991969234164, iteration: 198142
loss: 1.0222362279891968,grad_norm: 0.9999991588645218, iteration: 198143
loss: 1.0039124488830566,grad_norm: 0.9211117856274973, iteration: 198144
loss: 1.0487326383590698,grad_norm: 0.9999991135985493, iteration: 198145
loss: 1.0852166414260864,grad_norm: 0.9999991316682698, iteration: 198146
loss: 1.0059138536453247,grad_norm: 0.9999991609043136, iteration: 198147
loss: 0.9769617319107056,grad_norm: 0.8365903916688479, iteration: 198148
loss: 1.0129399299621582,grad_norm: 0.9999993122690066, iteration: 198149
loss: 0.9987718462944031,grad_norm: 0.9999990402796143, iteration: 198150
loss: 1.0140693187713623,grad_norm: 0.8616584815270444, iteration: 198151
loss: 0.98922199010849,grad_norm: 0.8610631720471794, iteration: 198152
loss: 0.9672338962554932,grad_norm: 0.9999991865939268, iteration: 198153
loss: 0.988817036151886,grad_norm: 0.9999991983304558, iteration: 198154
loss: 0.9777130484580994,grad_norm: 0.9999990908355115, iteration: 198155
loss: 0.9798423647880554,grad_norm: 0.952274427851898, iteration: 198156
loss: 1.0056676864624023,grad_norm: 0.9446961819276188, iteration: 198157
loss: 1.0037493705749512,grad_norm: 0.9813249176481358, iteration: 198158
loss: 1.0067754983901978,grad_norm: 0.7581078633576201, iteration: 198159
loss: 1.0870610475540161,grad_norm: 0.9999993650270853, iteration: 198160
loss: 0.9815233945846558,grad_norm: 0.908887614667879, iteration: 198161
loss: 0.9919204115867615,grad_norm: 0.8406886285320949, iteration: 198162
loss: 1.002903699874878,grad_norm: 0.8391583582316263, iteration: 198163
loss: 0.9809529781341553,grad_norm: 0.8895181733825817, iteration: 198164
loss: 1.0053651332855225,grad_norm: 0.9248540291767899, iteration: 198165
loss: 1.036301851272583,grad_norm: 0.9999991887467342, iteration: 198166
loss: 0.9956470727920532,grad_norm: 0.9999990339416428, iteration: 198167
loss: 0.9941912889480591,grad_norm: 0.8998983291000274, iteration: 198168
loss: 1.0367690324783325,grad_norm: 0.9999990844931281, iteration: 198169
loss: 1.0103954076766968,grad_norm: 0.8868936672500464, iteration: 198170
loss: 1.0210164785385132,grad_norm: 0.8353403812062808, iteration: 198171
loss: 0.9787218570709229,grad_norm: 0.8099772817768649, iteration: 198172
loss: 1.0105289220809937,grad_norm: 0.9999990438464971, iteration: 198173
loss: 0.9497073292732239,grad_norm: 0.9856295189998815, iteration: 198174
loss: 0.99629807472229,grad_norm: 0.9120661574090342, iteration: 198175
loss: 1.1414629220962524,grad_norm: 0.9999993774284583, iteration: 198176
loss: 0.9756513833999634,grad_norm: 0.923757810153224, iteration: 198177
loss: 1.0143827199935913,grad_norm: 0.9781799249960528, iteration: 198178
loss: 1.0126991271972656,grad_norm: 0.8295316906205488, iteration: 198179
loss: 0.996312141418457,grad_norm: 0.9506890114204016, iteration: 198180
loss: 1.0031970739364624,grad_norm: 0.9999990294301838, iteration: 198181
loss: 0.9775545597076416,grad_norm: 0.9355717550324538, iteration: 198182
loss: 1.009006142616272,grad_norm: 0.9575814059181188, iteration: 198183
loss: 1.0163100957870483,grad_norm: 0.8793262020017167, iteration: 198184
loss: 0.9843207597732544,grad_norm: 0.9999990726695952, iteration: 198185
loss: 0.9972425103187561,grad_norm: 0.7993456130230133, iteration: 198186
loss: 1.0294898748397827,grad_norm: 0.8739406049274717, iteration: 198187
loss: 0.9956569075584412,grad_norm: 0.9044512405531003, iteration: 198188
loss: 1.035211205482483,grad_norm: 0.9999990664803395, iteration: 198189
loss: 1.0171419382095337,grad_norm: 0.9999991742452792, iteration: 198190
loss: 1.0267019271850586,grad_norm: 0.9999991262440984, iteration: 198191
loss: 1.026370882987976,grad_norm: 0.9999989848515775, iteration: 198192
loss: 0.9633603692054749,grad_norm: 0.9478459463941864, iteration: 198193
loss: 1.0276001691818237,grad_norm: 0.9999992738269999, iteration: 198194
loss: 0.9764225482940674,grad_norm: 0.9999994223347762, iteration: 198195
loss: 1.0214455127716064,grad_norm: 0.9633338445389702, iteration: 198196
loss: 0.9748201370239258,grad_norm: 0.8679032751319277, iteration: 198197
loss: 1.0021356344223022,grad_norm: 0.9999992111280264, iteration: 198198
loss: 0.9849782586097717,grad_norm: 0.9360352627310018, iteration: 198199
loss: 1.0493193864822388,grad_norm: 0.999999202813509, iteration: 198200
loss: 1.009995698928833,grad_norm: 0.9278208685210082, iteration: 198201
loss: 0.9502499103546143,grad_norm: 0.9820081916464976, iteration: 198202
loss: 0.9860720038414001,grad_norm: 0.99999920812332, iteration: 198203
loss: 0.9958246946334839,grad_norm: 0.9222059991858397, iteration: 198204
loss: 1.0131293535232544,grad_norm: 0.9999991039923668, iteration: 198205
loss: 0.9970209002494812,grad_norm: 0.9065474569936028, iteration: 198206
loss: 0.9825912714004517,grad_norm: 0.8183706699377838, iteration: 198207
loss: 1.0191620588302612,grad_norm: 0.7951263683841283, iteration: 198208
loss: 0.9918226003646851,grad_norm: 0.9402682855383795, iteration: 198209
loss: 1.011543869972229,grad_norm: 0.9475505971454015, iteration: 198210
loss: 1.0303990840911865,grad_norm: 0.9999995856928527, iteration: 198211
loss: 1.0094406604766846,grad_norm: 0.9999991259574272, iteration: 198212
loss: 1.003320574760437,grad_norm: 0.9999991726537676, iteration: 198213
loss: 1.0137375593185425,grad_norm: 0.8582711704589012, iteration: 198214
loss: 1.0184556245803833,grad_norm: 0.9713699148417045, iteration: 198215
loss: 0.9751985669136047,grad_norm: 0.9999992344516473, iteration: 198216
loss: 0.9957301616668701,grad_norm: 0.974211709711096, iteration: 198217
loss: 1.0118366479873657,grad_norm: 0.9554770440477895, iteration: 198218
loss: 1.010353922843933,grad_norm: 0.9999995019122453, iteration: 198219
loss: 1.0005736351013184,grad_norm: 0.9999990977787929, iteration: 198220
loss: 1.0011425018310547,grad_norm: 0.8429585360441012, iteration: 198221
loss: 1.024280309677124,grad_norm: 0.9999994598194617, iteration: 198222
loss: 1.03165602684021,grad_norm: 0.9999998124683573, iteration: 198223
loss: 1.0086749792099,grad_norm: 0.9999990724839083, iteration: 198224
loss: 0.9760326147079468,grad_norm: 0.8965250539800064, iteration: 198225
loss: 1.022085189819336,grad_norm: 0.9999991917098385, iteration: 198226
loss: 0.9936163425445557,grad_norm: 0.7589303543008633, iteration: 198227
loss: 0.9893611669540405,grad_norm: 0.9999992725513601, iteration: 198228
loss: 1.0312772989273071,grad_norm: 0.9999991746718417, iteration: 198229
loss: 0.966626763343811,grad_norm: 0.9999995071489328, iteration: 198230
loss: 1.0565381050109863,grad_norm: 0.9181225815385523, iteration: 198231
loss: 1.0132931470870972,grad_norm: 0.8458270755906566, iteration: 198232
loss: 1.0270113945007324,grad_norm: 0.9999992753469739, iteration: 198233
loss: 1.0256340503692627,grad_norm: 0.999999551519001, iteration: 198234
loss: 1.0825691223144531,grad_norm: 0.9999992021349641, iteration: 198235
loss: 1.0424842834472656,grad_norm: 0.9999996433369208, iteration: 198236
loss: 0.9981351494789124,grad_norm: 0.8811586828242574, iteration: 198237
loss: 0.9764049053192139,grad_norm: 0.8923982759470118, iteration: 198238
loss: 0.9491878747940063,grad_norm: 0.8510863426002175, iteration: 198239
loss: 0.9972018003463745,grad_norm: 0.8584479425838948, iteration: 198240
loss: 0.9885639548301697,grad_norm: 0.9999990685074476, iteration: 198241
loss: 0.9928886890411377,grad_norm: 0.8939816576675584, iteration: 198242
loss: 1.0007617473602295,grad_norm: 0.9565822998517739, iteration: 198243
loss: 1.0286897420883179,grad_norm: 0.9657635384056241, iteration: 198244
loss: 1.0014805793762207,grad_norm: 0.8306809865845388, iteration: 198245
loss: 0.9588579535484314,grad_norm: 0.9999991486253962, iteration: 198246
loss: 1.0009351968765259,grad_norm: 0.9999990987386823, iteration: 198247
loss: 1.025597095489502,grad_norm: 0.9999989689303609, iteration: 198248
loss: 0.9939637780189514,grad_norm: 0.9426513305639098, iteration: 198249
loss: 1.0180238485336304,grad_norm: 0.8660084402337099, iteration: 198250
loss: 1.0108048915863037,grad_norm: 0.9803417703078715, iteration: 198251
loss: 1.0085397958755493,grad_norm: 0.78484116608667, iteration: 198252
loss: 0.9985581040382385,grad_norm: 0.9999990638080147, iteration: 198253
loss: 1.009555697441101,grad_norm: 0.830926497034116, iteration: 198254
loss: 1.0020614862442017,grad_norm: 0.8996099569144297, iteration: 198255
loss: 1.0217046737670898,grad_norm: 0.9999990918176843, iteration: 198256
loss: 1.0219556093215942,grad_norm: 0.9522822347994205, iteration: 198257
loss: 1.0013949871063232,grad_norm: 0.7762337729371832, iteration: 198258
loss: 1.0059314966201782,grad_norm: 0.9622537149218662, iteration: 198259
loss: 0.9697467684745789,grad_norm: 0.9999990495297149, iteration: 198260
loss: 0.9829561114311218,grad_norm: 0.8462098996340673, iteration: 198261
loss: 0.9793201684951782,grad_norm: 0.7670221941041335, iteration: 198262
loss: 0.9614651203155518,grad_norm: 0.9999992235974109, iteration: 198263
loss: 1.018736720085144,grad_norm: 0.9999989524675142, iteration: 198264
loss: 1.011422872543335,grad_norm: 0.9465366557611785, iteration: 198265
loss: 0.9971222281455994,grad_norm: 0.9461638145175592, iteration: 198266
loss: 0.9958499073982239,grad_norm: 0.9848449246135036, iteration: 198267
loss: 0.996904730796814,grad_norm: 0.8914275208932647, iteration: 198268
loss: 0.9842033982276917,grad_norm: 0.984444442159575, iteration: 198269
loss: 0.9477834701538086,grad_norm: 0.881637862265613, iteration: 198270
loss: 0.9606893062591553,grad_norm: 0.999998990789001, iteration: 198271
loss: 0.9692833423614502,grad_norm: 0.874947609283988, iteration: 198272
loss: 1.0323668718338013,grad_norm: 0.999999135259683, iteration: 198273
loss: 1.0132642984390259,grad_norm: 0.9999991512388876, iteration: 198274
loss: 1.0055744647979736,grad_norm: 0.999999186318329, iteration: 198275
loss: 0.9805554151535034,grad_norm: 0.9415642502745516, iteration: 198276
loss: 1.0052622556686401,grad_norm: 0.9999992615153176, iteration: 198277
loss: 1.0051158666610718,grad_norm: 0.999999216698646, iteration: 198278
loss: 1.015199065208435,grad_norm: 0.9999992540117877, iteration: 198279
loss: 0.9668863415718079,grad_norm: 0.9303684298253808, iteration: 198280
loss: 1.0032951831817627,grad_norm: 0.9999992627905074, iteration: 198281
loss: 1.0454055070877075,grad_norm: 0.9999993072889374, iteration: 198282
loss: 1.0242265462875366,grad_norm: 0.9999991856627103, iteration: 198283
loss: 1.0014878511428833,grad_norm: 0.9999991872258156, iteration: 198284
loss: 1.0120210647583008,grad_norm: 0.9058715070690047, iteration: 198285
loss: 0.9574916362762451,grad_norm: 0.99999916586794, iteration: 198286
loss: 1.032956838607788,grad_norm: 0.9999993202365306, iteration: 198287
loss: 1.001849889755249,grad_norm: 0.9999992945775759, iteration: 198288
loss: 1.036417007446289,grad_norm: 0.9999994111229694, iteration: 198289
loss: 1.0605578422546387,grad_norm: 0.9999993196240241, iteration: 198290
loss: 0.9591222405433655,grad_norm: 0.9999990128148132, iteration: 198291
loss: 0.9616702795028687,grad_norm: 0.9335201793665786, iteration: 198292
loss: 0.965143084526062,grad_norm: 0.9490798860278092, iteration: 198293
loss: 0.9866697192192078,grad_norm: 0.9102310744734488, iteration: 198294
loss: 1.023101806640625,grad_norm: 0.999998987787995, iteration: 198295
loss: 1.0039628744125366,grad_norm: 0.9999991551172279, iteration: 198296
loss: 0.9626796841621399,grad_norm: 0.9732802781056881, iteration: 198297
loss: 0.9795160889625549,grad_norm: 0.9311518998958249, iteration: 198298
loss: 0.9919976592063904,grad_norm: 0.9700504567604389, iteration: 198299
loss: 0.9828025102615356,grad_norm: 0.9705770061129868, iteration: 198300
loss: 0.97157222032547,grad_norm: 0.937169205523968, iteration: 198301
loss: 0.9845279455184937,grad_norm: 0.9051045756750178, iteration: 198302
loss: 1.0179747343063354,grad_norm: 0.9999991880519908, iteration: 198303
loss: 0.9756258726119995,grad_norm: 0.9999990024231138, iteration: 198304
loss: 0.9911822080612183,grad_norm: 0.9999989686466952, iteration: 198305
loss: 0.9877761006355286,grad_norm: 0.9905045424687249, iteration: 198306
loss: 0.9659457802772522,grad_norm: 0.8786003062405691, iteration: 198307
loss: 0.994602620601654,grad_norm: 0.999999181227171, iteration: 198308
loss: 1.0316331386566162,grad_norm: 0.9999991108030436, iteration: 198309
loss: 1.001354455947876,grad_norm: 0.9999990809760115, iteration: 198310
loss: 0.9646749496459961,grad_norm: 0.9999990425346156, iteration: 198311
loss: 1.0074107646942139,grad_norm: 0.9999991602524434, iteration: 198312
loss: 0.9654948115348816,grad_norm: 0.9234488482839592, iteration: 198313
loss: 1.0195386409759521,grad_norm: 0.9999991980383095, iteration: 198314
loss: 1.021724820137024,grad_norm: 0.9999990121717316, iteration: 198315
loss: 1.02222740650177,grad_norm: 0.9999993561157117, iteration: 198316
loss: 1.0117299556732178,grad_norm: 0.715170146166948, iteration: 198317
loss: 1.0078868865966797,grad_norm: 0.9999991258010716, iteration: 198318
loss: 0.9779129028320312,grad_norm: 0.9598038138403127, iteration: 198319
loss: 0.9846175312995911,grad_norm: 0.9999990744994727, iteration: 198320
loss: 1.0005911588668823,grad_norm: 0.9999994441347462, iteration: 198321
loss: 1.0281312465667725,grad_norm: 0.7564897735501296, iteration: 198322
loss: 0.9523734450340271,grad_norm: 0.9242545470224506, iteration: 198323
loss: 0.9846323132514954,grad_norm: 0.850699828042521, iteration: 198324
loss: 0.9673774838447571,grad_norm: 0.8508897544895815, iteration: 198325
loss: 1.01337468624115,grad_norm: 0.861237461579592, iteration: 198326
loss: 1.0351020097732544,grad_norm: 0.8630527853844091, iteration: 198327
loss: 0.9933637976646423,grad_norm: 0.734450394757699, iteration: 198328
loss: 1.0061304569244385,grad_norm: 0.9999990624654502, iteration: 198329
loss: 1.0289099216461182,grad_norm: 0.8181525085278837, iteration: 198330
loss: 0.9722627997398376,grad_norm: 0.9575944134481669, iteration: 198331
loss: 1.0029481649398804,grad_norm: 0.9999992922638403, iteration: 198332
loss: 0.9963136911392212,grad_norm: 0.9733062225427465, iteration: 198333
loss: 1.0092421770095825,grad_norm: 0.932866619448169, iteration: 198334
loss: 1.0096436738967896,grad_norm: 0.9999990756350849, iteration: 198335
loss: 0.9721972942352295,grad_norm: 0.8638848753604491, iteration: 198336
loss: 0.9636234045028687,grad_norm: 0.7018943204225033, iteration: 198337
loss: 1.0059258937835693,grad_norm: 0.9293858103104589, iteration: 198338
loss: 0.9460800290107727,grad_norm: 0.744497419372717, iteration: 198339
loss: 0.979187548160553,grad_norm: 0.8307830766313179, iteration: 198340
loss: 1.0036287307739258,grad_norm: 0.848883096976549, iteration: 198341
loss: 1.0338314771652222,grad_norm: 0.915329749703932, iteration: 198342
loss: 1.0070900917053223,grad_norm: 0.9315472440133473, iteration: 198343
loss: 0.9728707671165466,grad_norm: 0.9265942339252605, iteration: 198344
loss: 1.0047916173934937,grad_norm: 0.9110113305973252, iteration: 198345
loss: 0.9985356330871582,grad_norm: 0.9631300200105414, iteration: 198346
loss: 1.01040518283844,grad_norm: 0.8850408474651877, iteration: 198347
loss: 1.0001015663146973,grad_norm: 0.9999989589937479, iteration: 198348
loss: 0.992798924446106,grad_norm: 0.8923794191360465, iteration: 198349
loss: 1.003705620765686,grad_norm: 0.9294364075638545, iteration: 198350
loss: 1.080275297164917,grad_norm: 0.999999193267491, iteration: 198351
loss: 1.0056884288787842,grad_norm: 0.9999993753362778, iteration: 198352
loss: 1.0034935474395752,grad_norm: 0.9999991029065377, iteration: 198353
loss: 1.038209080696106,grad_norm: 0.9999993901606752, iteration: 198354
loss: 1.0264122486114502,grad_norm: 0.9914820782504513, iteration: 198355
loss: 0.9950377941131592,grad_norm: 0.9216995230239474, iteration: 198356
loss: 0.9877005815505981,grad_norm: 0.8500335968106467, iteration: 198357
loss: 0.9880320429801941,grad_norm: 0.9999990441973133, iteration: 198358
loss: 1.0098222494125366,grad_norm: 0.9710569124789676, iteration: 198359
loss: 1.016080379486084,grad_norm: 0.9999990818987673, iteration: 198360
loss: 1.0461300611495972,grad_norm: 0.8883171709576799, iteration: 198361
loss: 1.0143866539001465,grad_norm: 0.9999991135174626, iteration: 198362
loss: 0.9765658974647522,grad_norm: 0.9137873047898026, iteration: 198363
loss: 0.9988372921943665,grad_norm: 0.8757789326611807, iteration: 198364
loss: 1.0112541913986206,grad_norm: 0.9318295537303039, iteration: 198365
loss: 1.0246942043304443,grad_norm: 0.7783718097937371, iteration: 198366
loss: 0.9741138815879822,grad_norm: 0.9999990798348803, iteration: 198367
loss: 0.9731383323669434,grad_norm: 0.8525448173098267, iteration: 198368
loss: 0.9905392527580261,grad_norm: 0.91457105478347, iteration: 198369
loss: 0.9731244444847107,grad_norm: 0.9522961325206865, iteration: 198370
loss: 1.0034561157226562,grad_norm: 0.9672986906189157, iteration: 198371
loss: 1.0024592876434326,grad_norm: 0.9999991715516166, iteration: 198372
loss: 1.015798807144165,grad_norm: 0.9466773195276561, iteration: 198373
loss: 1.0157674551010132,grad_norm: 0.7702608494310685, iteration: 198374
loss: 0.975014328956604,grad_norm: 0.877377579324295, iteration: 198375
loss: 0.9952605366706848,grad_norm: 0.7947037402549694, iteration: 198376
loss: 1.0110468864440918,grad_norm: 0.8808733280002576, iteration: 198377
loss: 1.015535831451416,grad_norm: 0.9999991772163107, iteration: 198378
loss: 1.00169837474823,grad_norm: 0.9720982655427489, iteration: 198379
loss: 1.012180209159851,grad_norm: 0.9999991734742651, iteration: 198380
loss: 1.005717396736145,grad_norm: 0.9999994792252146, iteration: 198381
loss: 1.0049268007278442,grad_norm: 0.9663501548309581, iteration: 198382
loss: 1.0187723636627197,grad_norm: 0.7681723177733781, iteration: 198383
loss: 1.0069081783294678,grad_norm: 0.9342137875913245, iteration: 198384
loss: 1.0472701787948608,grad_norm: 0.9999992630791631, iteration: 198385
loss: 0.9816461205482483,grad_norm: 0.8941425917633085, iteration: 198386
loss: 1.0290932655334473,grad_norm: 0.9999991075355242, iteration: 198387
loss: 0.9837031960487366,grad_norm: 0.976889967367028, iteration: 198388
loss: 0.9865549206733704,grad_norm: 0.9109075865623404, iteration: 198389
loss: 1.0153886079788208,grad_norm: 0.874459758914967, iteration: 198390
loss: 0.9670371413230896,grad_norm: 0.8882530958706579, iteration: 198391
loss: 1.0437490940093994,grad_norm: 0.999999601344633, iteration: 198392
loss: 0.9956350922584534,grad_norm: 0.999999390727235, iteration: 198393
loss: 0.9938469529151917,grad_norm: 0.9310972780628541, iteration: 198394
loss: 1.0165132284164429,grad_norm: 0.9811316096800394, iteration: 198395
loss: 1.0054080486297607,grad_norm: 0.9875901695198804, iteration: 198396
loss: 0.9979549646377563,grad_norm: 0.9975030179727884, iteration: 198397
loss: 0.9829999804496765,grad_norm: 0.91662978005591, iteration: 198398
loss: 1.0045280456542969,grad_norm: 0.8673759826601923, iteration: 198399
loss: 0.999237596988678,grad_norm: 0.9746859046195541, iteration: 198400
loss: 0.9695674180984497,grad_norm: 0.9390529111855666, iteration: 198401
loss: 0.9834071397781372,grad_norm: 0.7534536528255251, iteration: 198402
loss: 1.0132147073745728,grad_norm: 0.84145226439088, iteration: 198403
loss: 0.9850748181343079,grad_norm: 0.8295723141416995, iteration: 198404
loss: 0.973601758480072,grad_norm: 0.9999990114546407, iteration: 198405
loss: 0.9913437962532043,grad_norm: 0.932671237505242, iteration: 198406
loss: 0.983802080154419,grad_norm: 0.9999991737701471, iteration: 198407
loss: 1.0001174211502075,grad_norm: 0.9999991986906307, iteration: 198408
loss: 1.011284351348877,grad_norm: 0.9029259278210414, iteration: 198409
loss: 1.023594617843628,grad_norm: 0.9999992064089175, iteration: 198410
loss: 1.0023996829986572,grad_norm: 0.9453402306918773, iteration: 198411
loss: 0.9831365346908569,grad_norm: 0.8330375115451364, iteration: 198412
loss: 1.0676683187484741,grad_norm: 0.9999991670572772, iteration: 198413
loss: 1.016396164894104,grad_norm: 0.8815799434973336, iteration: 198414
loss: 1.0159989595413208,grad_norm: 0.8171015410991705, iteration: 198415
loss: 0.9842349290847778,grad_norm: 0.9561510160557628, iteration: 198416
loss: 0.9971277117729187,grad_norm: 0.949907252138639, iteration: 198417
loss: 1.002497911453247,grad_norm: 0.9999991232927669, iteration: 198418
loss: 0.9977814555168152,grad_norm: 0.999999881288653, iteration: 198419
loss: 0.9712637066841125,grad_norm: 0.9174345011949445, iteration: 198420
loss: 1.0137571096420288,grad_norm: 0.9999992871833037, iteration: 198421
loss: 0.9891552925109863,grad_norm: 0.9999991370508717, iteration: 198422
loss: 1.0089365243911743,grad_norm: 0.8458258733053748, iteration: 198423
loss: 0.9762596487998962,grad_norm: 0.9999990501721859, iteration: 198424
loss: 1.0068440437316895,grad_norm: 0.955246833080749, iteration: 198425
loss: 0.9746509194374084,grad_norm: 0.9999990164812341, iteration: 198426
loss: 1.0284028053283691,grad_norm: 0.9159835811747187, iteration: 198427
loss: 0.9934878349304199,grad_norm: 0.8281065117867171, iteration: 198428
loss: 0.9944906234741211,grad_norm: 0.9999992299705224, iteration: 198429
loss: 0.9722919464111328,grad_norm: 0.9186548565702992, iteration: 198430
loss: 0.9685195088386536,grad_norm: 0.9999991313964947, iteration: 198431
loss: 1.0676590204238892,grad_norm: 0.9999994964279748, iteration: 198432
loss: 1.0247347354888916,grad_norm: 0.9999990823150663, iteration: 198433
loss: 1.0309504270553589,grad_norm: 0.9999990049144353, iteration: 198434
loss: 0.9864418506622314,grad_norm: 0.913059100621543, iteration: 198435
loss: 1.0176687240600586,grad_norm: 0.7126584668018383, iteration: 198436
loss: 1.02475106716156,grad_norm: 0.9999991162520512, iteration: 198437
loss: 0.9842933416366577,grad_norm: 0.9999990389566039, iteration: 198438
loss: 1.0013552904129028,grad_norm: 0.9129844452963692, iteration: 198439
loss: 1.0268298387527466,grad_norm: 0.8927264156600575, iteration: 198440
loss: 0.9788522124290466,grad_norm: 0.9999992320122354, iteration: 198441
loss: 0.9580965638160706,grad_norm: 0.9298268026469941, iteration: 198442
loss: 0.9929148554801941,grad_norm: 0.9690030079236609, iteration: 198443
loss: 1.006066918373108,grad_norm: 0.8843192969838933, iteration: 198444
loss: 0.9972735047340393,grad_norm: 0.9735827814424343, iteration: 198445
loss: 0.9920733571052551,grad_norm: 0.9164968187441864, iteration: 198446
loss: 0.9570325016975403,grad_norm: 0.8518015450433158, iteration: 198447
loss: 0.9828466176986694,grad_norm: 0.9789659849084231, iteration: 198448
loss: 1.0306909084320068,grad_norm: 0.8802860971831509, iteration: 198449
loss: 1.0258452892303467,grad_norm: 0.9846490508376883, iteration: 198450
loss: 0.9704626202583313,grad_norm: 0.9250756188685528, iteration: 198451
loss: 1.0081095695495605,grad_norm: 0.9774537083772352, iteration: 198452
loss: 0.9985977411270142,grad_norm: 0.9999990426220248, iteration: 198453
loss: 0.9810265898704529,grad_norm: 0.9999990074341539, iteration: 198454
loss: 0.9955937266349792,grad_norm: 0.9999991237012072, iteration: 198455
loss: 1.0037440061569214,grad_norm: 0.8951869106359865, iteration: 198456
loss: 1.0101025104522705,grad_norm: 0.8992162783195818, iteration: 198457
loss: 0.9781809449195862,grad_norm: 0.9999995979208045, iteration: 198458
loss: 1.0092055797576904,grad_norm: 0.9854689714125063, iteration: 198459
loss: 1.0278632640838623,grad_norm: 0.8756040342435136, iteration: 198460
loss: 0.9863401055335999,grad_norm: 0.8544534978561478, iteration: 198461
loss: 0.9590222239494324,grad_norm: 0.9381285640436097, iteration: 198462
loss: 1.032469391822815,grad_norm: 0.9757443883753779, iteration: 198463
loss: 1.0183780193328857,grad_norm: 0.9999990669198217, iteration: 198464
loss: 1.0581570863723755,grad_norm: 1.0000000495020696, iteration: 198465
loss: 1.0348528623580933,grad_norm: 0.9999992954087183, iteration: 198466
loss: 1.0217363834381104,grad_norm: 0.9012750008994977, iteration: 198467
loss: 0.985431969165802,grad_norm: 0.9971489665372693, iteration: 198468
loss: 0.9848214983940125,grad_norm: 0.9071534510379722, iteration: 198469
loss: 0.9907042980194092,grad_norm: 0.8482176258525492, iteration: 198470
loss: 0.9820826053619385,grad_norm: 0.8993268636466037, iteration: 198471
loss: 1.023980736732483,grad_norm: 0.9415425143757552, iteration: 198472
loss: 0.9967882633209229,grad_norm: 0.9999996840237048, iteration: 198473
loss: 1.0170882940292358,grad_norm: 0.8758367125289945, iteration: 198474
loss: 1.0132594108581543,grad_norm: 0.9999990874815238, iteration: 198475
loss: 1.068706750869751,grad_norm: 0.9477240435244613, iteration: 198476
loss: 0.9629867076873779,grad_norm: 0.9024171736556135, iteration: 198477
loss: 0.9890828132629395,grad_norm: 0.9562083461320598, iteration: 198478
loss: 1.011948585510254,grad_norm: 0.9331557025655719, iteration: 198479
loss: 1.0081455707550049,grad_norm: 0.8608579699168534, iteration: 198480
loss: 0.9871380925178528,grad_norm: 0.999999348323912, iteration: 198481
loss: 1.0154261589050293,grad_norm: 0.8834728432249689, iteration: 198482
loss: 1.0208942890167236,grad_norm: 0.953962640064248, iteration: 198483
loss: 1.009945034980774,grad_norm: 0.9999995209692741, iteration: 198484
loss: 0.9763983488082886,grad_norm: 0.9999991450337202, iteration: 198485
loss: 0.9799637794494629,grad_norm: 0.9325927697220939, iteration: 198486
loss: 0.9702419638633728,grad_norm: 0.9999990085928965, iteration: 198487
loss: 0.9916273951530457,grad_norm: 0.9999992390611415, iteration: 198488
loss: 0.9910706281661987,grad_norm: 0.9115519000243884, iteration: 198489
loss: 1.0100387334823608,grad_norm: 0.9999989211275917, iteration: 198490
loss: 0.9665129780769348,grad_norm: 0.8673387521260204, iteration: 198491
loss: 1.0237349271774292,grad_norm: 0.999999032727682, iteration: 198492
loss: 0.983231782913208,grad_norm: 0.9999990839589648, iteration: 198493
loss: 1.032161831855774,grad_norm: 0.9999990466043733, iteration: 198494
loss: 1.0099220275878906,grad_norm: 0.7986938137936854, iteration: 198495
loss: 1.0036001205444336,grad_norm: 0.9999990897200554, iteration: 198496
loss: 0.9953625798225403,grad_norm: 0.999999116096943, iteration: 198497
loss: 0.959502637386322,grad_norm: 0.9731109602238436, iteration: 198498
loss: 0.957428514957428,grad_norm: 0.857176861305054, iteration: 198499
loss: 0.9880810379981995,grad_norm: 0.9999991182498347, iteration: 198500
loss: 0.9946519136428833,grad_norm: 0.9999991195082362, iteration: 198501
loss: 1.014044165611267,grad_norm: 0.9483716177953345, iteration: 198502
loss: 1.000088095664978,grad_norm: 0.8812238059458998, iteration: 198503
loss: 1.0945484638214111,grad_norm: 0.9999991129308157, iteration: 198504
loss: 0.9716001749038696,grad_norm: 0.8821910134617419, iteration: 198505
loss: 1.0400025844573975,grad_norm: 0.8236970499914655, iteration: 198506
loss: 1.0106087923049927,grad_norm: 0.9999992794869965, iteration: 198507
loss: 0.9743116497993469,grad_norm: 0.9999991203221417, iteration: 198508
loss: 1.0111466646194458,grad_norm: 0.9889776436409984, iteration: 198509
loss: 0.9989029765129089,grad_norm: 0.9787599820155943, iteration: 198510
loss: 1.0124034881591797,grad_norm: 0.9999991043525402, iteration: 198511
loss: 1.0187066793441772,grad_norm: 0.9999990067822986, iteration: 198512
loss: 1.0041652917861938,grad_norm: 0.9233902215817777, iteration: 198513
loss: 0.9976054430007935,grad_norm: 0.9999993763972083, iteration: 198514
loss: 1.0511146783828735,grad_norm: 0.9999999042534436, iteration: 198515
loss: 0.9957718849182129,grad_norm: 0.9999990921155619, iteration: 198516
loss: 1.006244421005249,grad_norm: 0.9541833250338377, iteration: 198517
loss: 1.0062224864959717,grad_norm: 0.9461225573831092, iteration: 198518
loss: 0.9872150421142578,grad_norm: 0.8237647194049592, iteration: 198519
loss: 0.9828760027885437,grad_norm: 0.7834693270849146, iteration: 198520
loss: 1.0190625190734863,grad_norm: 0.9219644076191638, iteration: 198521
loss: 1.0028129816055298,grad_norm: 0.8255395659805745, iteration: 198522
loss: 0.9992751479148865,grad_norm: 0.9505785721406074, iteration: 198523
loss: 1.0239449739456177,grad_norm: 0.9999991831935385, iteration: 198524
loss: 1.0142379999160767,grad_norm: 0.999999113466422, iteration: 198525
loss: 0.9670955538749695,grad_norm: 0.9999989680840441, iteration: 198526
loss: 1.0200332403182983,grad_norm: 0.9999990004554816, iteration: 198527
loss: 0.9800028800964355,grad_norm: 0.9999989996457844, iteration: 198528
loss: 1.0075794458389282,grad_norm: 0.9999989931696522, iteration: 198529
loss: 0.9952517151832581,grad_norm: 0.8868865049394946, iteration: 198530
loss: 0.9919062256813049,grad_norm: 0.8960449930285347, iteration: 198531
loss: 1.0001487731933594,grad_norm: 0.9999992056192498, iteration: 198532
loss: 0.969936728477478,grad_norm: 0.8643247358338684, iteration: 198533
loss: 1.0003323554992676,grad_norm: 0.999999351184981, iteration: 198534
loss: 1.0181596279144287,grad_norm: 0.9999990965550104, iteration: 198535
loss: 0.9940395951271057,grad_norm: 0.8958268594911808, iteration: 198536
loss: 0.9657630324363708,grad_norm: 0.9999991721264663, iteration: 198537
loss: 1.0055742263793945,grad_norm: 0.9999993648853729, iteration: 198538
loss: 1.0083415508270264,grad_norm: 0.8391018216761043, iteration: 198539
loss: 0.9724307656288147,grad_norm: 0.8751707371502916, iteration: 198540
loss: 1.0129389762878418,grad_norm: 0.9999989333197047, iteration: 198541
loss: 1.0057278871536255,grad_norm: 0.9834471902321685, iteration: 198542
loss: 0.9922643899917603,grad_norm: 0.9521160929867962, iteration: 198543
loss: 1.0001922845840454,grad_norm: 0.8500941268325394, iteration: 198544
loss: 0.9909869432449341,grad_norm: 0.9999991282629539, iteration: 198545
loss: 0.9906516671180725,grad_norm: 0.8842296145899243, iteration: 198546
loss: 1.0143699645996094,grad_norm: 0.9999991142775865, iteration: 198547
loss: 0.9955148100852966,grad_norm: 0.999999005193014, iteration: 198548
loss: 1.0106827020645142,grad_norm: 0.9999990703561713, iteration: 198549
loss: 1.0001384019851685,grad_norm: 0.999999269293178, iteration: 198550
loss: 0.9806361198425293,grad_norm: 0.9999992384572292, iteration: 198551
loss: 0.9914414286613464,grad_norm: 0.9086156656053338, iteration: 198552
loss: 1.0277209281921387,grad_norm: 0.9999990684515471, iteration: 198553
loss: 1.0147435665130615,grad_norm: 0.9999990639391575, iteration: 198554
loss: 1.020743727684021,grad_norm: 0.7623212562710292, iteration: 198555
loss: 0.9718178510665894,grad_norm: 0.8128571752253169, iteration: 198556
loss: 1.0066779851913452,grad_norm: 0.99877556751915, iteration: 198557
loss: 1.0676461458206177,grad_norm: 0.8895633193024179, iteration: 198558
loss: 0.9775171279907227,grad_norm: 0.9999990289331955, iteration: 198559
loss: 0.9986768364906311,grad_norm: 0.9366794436929823, iteration: 198560
loss: 0.9623743295669556,grad_norm: 0.9999990828691803, iteration: 198561
loss: 1.0164756774902344,grad_norm: 0.9043806125598922, iteration: 198562
loss: 1.0024536848068237,grad_norm: 0.9999994434218363, iteration: 198563
loss: 0.9742228984832764,grad_norm: 0.8666891467888281, iteration: 198564
loss: 0.9734987616539001,grad_norm: 0.8200270678873557, iteration: 198565
loss: 0.982502818107605,grad_norm: 0.9680894382641837, iteration: 198566
loss: 0.9801484942436218,grad_norm: 0.901904226996163, iteration: 198567
loss: 0.9939886927604675,grad_norm: 0.9999991805352152, iteration: 198568
loss: 1.025812029838562,grad_norm: 0.980133031004067, iteration: 198569
loss: 0.9687435626983643,grad_norm: 0.8483243025150403, iteration: 198570
loss: 1.0348381996154785,grad_norm: 0.9945588165518341, iteration: 198571
loss: 0.9944917559623718,grad_norm: 0.8804771192678913, iteration: 198572
loss: 0.9928662180900574,grad_norm: 0.9392232392671034, iteration: 198573
loss: 0.9983270764350891,grad_norm: 0.8686139215287805, iteration: 198574
loss: 1.0362610816955566,grad_norm: 0.9999991741560305, iteration: 198575
loss: 0.9863836169242859,grad_norm: 0.8727221816083027, iteration: 198576
loss: 0.9854706525802612,grad_norm: 0.9999990719816096, iteration: 198577
loss: 0.9736586213111877,grad_norm: 0.9999991029887402, iteration: 198578
loss: 1.0385606288909912,grad_norm: 0.999998896725858, iteration: 198579
loss: 1.021375060081482,grad_norm: 0.8713822803493446, iteration: 198580
loss: 1.0521997213363647,grad_norm: 0.9999990317767551, iteration: 198581
loss: 0.9722945094108582,grad_norm: 0.9999992468306385, iteration: 198582
loss: 0.9927999377250671,grad_norm: 0.8211672719341493, iteration: 198583
loss: 1.027395248413086,grad_norm: 0.9146768917758128, iteration: 198584
loss: 0.9737057089805603,grad_norm: 0.8560518257369649, iteration: 198585
loss: 1.0310394763946533,grad_norm: 0.9999990989555755, iteration: 198586
loss: 1.013827919960022,grad_norm: 0.8077641003755414, iteration: 198587
loss: 0.9894956350326538,grad_norm: 0.9877615555493073, iteration: 198588
loss: 1.0164496898651123,grad_norm: 0.9744456491475288, iteration: 198589
loss: 1.00399911403656,grad_norm: 0.9999992232209645, iteration: 198590
loss: 1.038543462753296,grad_norm: 0.9999992781733036, iteration: 198591
loss: 1.0085192918777466,grad_norm: 0.8158000843173048, iteration: 198592
loss: 1.0095278024673462,grad_norm: 0.7934273708043816, iteration: 198593
loss: 1.0500210523605347,grad_norm: 0.9123426316821246, iteration: 198594
loss: 0.961119532585144,grad_norm: 0.9226569615304886, iteration: 198595
loss: 0.9915452599525452,grad_norm: 0.9999990089708883, iteration: 198596
loss: 1.0326048135757446,grad_norm: 0.9999999702639553, iteration: 198597
loss: 0.9788076877593994,grad_norm: 0.9999993575830247, iteration: 198598
loss: 0.9994533061981201,grad_norm: 0.8222729059196766, iteration: 198599
loss: 1.009077787399292,grad_norm: 0.959672276331758, iteration: 198600
loss: 1.0209881067276,grad_norm: 0.9517166583653995, iteration: 198601
loss: 1.0401735305786133,grad_norm: 0.9679371569947668, iteration: 198602
loss: 0.9892702698707581,grad_norm: 0.9999989808193102, iteration: 198603
loss: 0.9902929663658142,grad_norm: 0.8637361474663525, iteration: 198604
loss: 0.999542236328125,grad_norm: 0.9999991744501975, iteration: 198605
loss: 0.945141077041626,grad_norm: 0.884717059588794, iteration: 198606
loss: 1.0270568132400513,grad_norm: 0.854930002573393, iteration: 198607
loss: 1.0261656045913696,grad_norm: 0.833773962308013, iteration: 198608
loss: 1.0227891206741333,grad_norm: 0.7675230365722803, iteration: 198609
loss: 1.0181788206100464,grad_norm: 0.876309932308002, iteration: 198610
loss: 1.0048904418945312,grad_norm: 0.8019318201806261, iteration: 198611
loss: 0.97920823097229,grad_norm: 0.9441275224226269, iteration: 198612
loss: 1.0310354232788086,grad_norm: 0.9999991031829732, iteration: 198613
loss: 1.0277259349822998,grad_norm: 0.9423036845807776, iteration: 198614
loss: 1.007444977760315,grad_norm: 0.8356925732620413, iteration: 198615
loss: 1.036076545715332,grad_norm: 0.8848589953931217, iteration: 198616
loss: 1.1118649244308472,grad_norm: 0.9999998515915678, iteration: 198617
loss: 1.0196490287780762,grad_norm: 0.9808387483567415, iteration: 198618
loss: 1.0172291994094849,grad_norm: 0.9999995205203794, iteration: 198619
loss: 0.9966418743133545,grad_norm: 0.9999989519338573, iteration: 198620
loss: 0.9696211814880371,grad_norm: 0.9236390120541803, iteration: 198621
loss: 1.0234694480895996,grad_norm: 0.9999991540588613, iteration: 198622
loss: 1.031200647354126,grad_norm: 0.9999990468194867, iteration: 198623
loss: 0.9854596853256226,grad_norm: 0.9249182248617793, iteration: 198624
loss: 1.031267762184143,grad_norm: 0.999998915371323, iteration: 198625
loss: 1.0160276889801025,grad_norm: 0.9242469306962691, iteration: 198626
loss: 0.9965572357177734,grad_norm: 0.9900669147862903, iteration: 198627
loss: 0.9593960642814636,grad_norm: 0.9470322207803419, iteration: 198628
loss: 0.9826195240020752,grad_norm: 0.8749430487873833, iteration: 198629
loss: 0.973142147064209,grad_norm: 0.9999993403272978, iteration: 198630
loss: 0.9990994334220886,grad_norm: 0.9990286738732673, iteration: 198631
loss: 1.0033910274505615,grad_norm: 0.9999991072370414, iteration: 198632
loss: 1.048550009727478,grad_norm: 0.999999205011055, iteration: 198633
loss: 1.0182232856750488,grad_norm: 0.9999995186527035, iteration: 198634
loss: 0.9898407459259033,grad_norm: 0.847488639682061, iteration: 198635
loss: 0.9995335340499878,grad_norm: 0.703603379715464, iteration: 198636
loss: 0.9949302077293396,grad_norm: 0.9241462988417999, iteration: 198637
loss: 1.0151373147964478,grad_norm: 0.8826484915491065, iteration: 198638
loss: 0.9920969009399414,grad_norm: 0.9644532532478632, iteration: 198639
loss: 1.0101995468139648,grad_norm: 0.7870165793684994, iteration: 198640
loss: 0.9717855453491211,grad_norm: 0.9364705055310262, iteration: 198641
loss: 1.0792113542556763,grad_norm: 0.9531338907121373, iteration: 198642
loss: 1.0416038036346436,grad_norm: 0.9999993412566888, iteration: 198643
loss: 0.9785030484199524,grad_norm: 0.9999991726564288, iteration: 198644
loss: 0.9769174456596375,grad_norm: 0.9999990975596622, iteration: 198645
loss: 1.0480968952178955,grad_norm: 0.999999144164879, iteration: 198646
loss: 0.9856281280517578,grad_norm: 0.9745196727522409, iteration: 198647
loss: 0.9541597962379456,grad_norm: 0.9999990265299569, iteration: 198648
loss: 0.9717283248901367,grad_norm: 0.940929291469072, iteration: 198649
loss: 0.986746609210968,grad_norm: 0.9999992706123548, iteration: 198650
loss: 1.0128087997436523,grad_norm: 0.9999989506704363, iteration: 198651
loss: 1.0294405221939087,grad_norm: 0.8523882124467745, iteration: 198652
loss: 0.9749868512153625,grad_norm: 0.9754746232038435, iteration: 198653
loss: 1.0106981992721558,grad_norm: 0.9999997365502804, iteration: 198654
loss: 1.0073601007461548,grad_norm: 0.9999989967242908, iteration: 198655
loss: 1.0047043561935425,grad_norm: 0.8533737513605135, iteration: 198656
loss: 0.9783996939659119,grad_norm: 0.9999992052807637, iteration: 198657
loss: 0.9939873218536377,grad_norm: 0.93556791205393, iteration: 198658
loss: 1.0065768957138062,grad_norm: 0.9999990499795038, iteration: 198659
loss: 1.0358699560165405,grad_norm: 0.9999991728981773, iteration: 198660
loss: 1.001663327217102,grad_norm: 0.8698373424134999, iteration: 198661
loss: 1.0231136083602905,grad_norm: 0.9999990308959356, iteration: 198662
loss: 1.0209304094314575,grad_norm: 0.9117587792129893, iteration: 198663
loss: 1.0292688608169556,grad_norm: 0.9635581913024279, iteration: 198664
loss: 0.9889899492263794,grad_norm: 0.9101232449438019, iteration: 198665
loss: 0.9975884556770325,grad_norm: 0.9999990733854744, iteration: 198666
loss: 0.9732353687286377,grad_norm: 0.8400727837244067, iteration: 198667
loss: 1.0301978588104248,grad_norm: 0.9999991637023184, iteration: 198668
loss: 1.0068480968475342,grad_norm: 0.9119945324953932, iteration: 198669
loss: 0.9763628840446472,grad_norm: 0.9066814962566218, iteration: 198670
loss: 0.9961109757423401,grad_norm: 0.9360337930714773, iteration: 198671
loss: 1.019181251525879,grad_norm: 0.8828033322904881, iteration: 198672
loss: 0.9799280762672424,grad_norm: 0.7740642497238464, iteration: 198673
loss: 0.9692907929420471,grad_norm: 0.8088167905281289, iteration: 198674
loss: 0.9925435185432434,grad_norm: 0.8609261299117893, iteration: 198675
loss: 1.0888181924819946,grad_norm: 0.9273943518328011, iteration: 198676
loss: 1.0070056915283203,grad_norm: 0.999999138874711, iteration: 198677
loss: 0.9901815056800842,grad_norm: 0.7994000461427724, iteration: 198678
loss: 0.9870093464851379,grad_norm: 0.8589301218620942, iteration: 198679
loss: 1.0198633670806885,grad_norm: 0.845467339418742, iteration: 198680
loss: 1.0150909423828125,grad_norm: 0.9999992650844605, iteration: 198681
loss: 0.9784208536148071,grad_norm: 0.8868391773796096, iteration: 198682
loss: 0.9915586113929749,grad_norm: 0.9289805626271036, iteration: 198683
loss: 1.0190273523330688,grad_norm: 0.9337795767356432, iteration: 198684
loss: 0.9955796599388123,grad_norm: 0.9274368734862736, iteration: 198685
loss: 0.9970269799232483,grad_norm: 0.9417033867094357, iteration: 198686
loss: 1.0040233135223389,grad_norm: 0.9954848759845457, iteration: 198687
loss: 0.9984046220779419,grad_norm: 0.9999993431659929, iteration: 198688
loss: 0.9681059122085571,grad_norm: 0.9999990351350004, iteration: 198689
loss: 0.9977084398269653,grad_norm: 0.9576205715809208, iteration: 198690
loss: 1.0299909114837646,grad_norm: 0.9999992719129389, iteration: 198691
loss: 1.0263408422470093,grad_norm: 0.9585656368658635, iteration: 198692
loss: 1.047167420387268,grad_norm: 0.9999993399102521, iteration: 198693
loss: 1.0818878412246704,grad_norm: 0.9999995213723516, iteration: 198694
loss: 0.9948635101318359,grad_norm: 0.895103754538399, iteration: 198695
loss: 0.9866725206375122,grad_norm: 0.9715290105576324, iteration: 198696
loss: 0.956605076789856,grad_norm: 0.8906003575211758, iteration: 198697
loss: 1.018088698387146,grad_norm: 0.9999991571644745, iteration: 198698
loss: 1.0002095699310303,grad_norm: 0.9478921242316409, iteration: 198699
loss: 0.9908322095870972,grad_norm: 0.9586396030275933, iteration: 198700
loss: 1.0033353567123413,grad_norm: 0.9999989653983583, iteration: 198701
loss: 0.9906439185142517,grad_norm: 0.8906289683444651, iteration: 198702
loss: 1.0011764764785767,grad_norm: 0.999999119645337, iteration: 198703
loss: 1.0021203756332397,grad_norm: 0.8826261543981564, iteration: 198704
loss: 1.003749132156372,grad_norm: 0.9411782584572306, iteration: 198705
loss: 1.0046539306640625,grad_norm: 0.7703726987957633, iteration: 198706
loss: 0.9751459956169128,grad_norm: 0.8972990158495014, iteration: 198707
loss: 1.0184242725372314,grad_norm: 0.999999056408779, iteration: 198708
loss: 1.0776442289352417,grad_norm: 0.9999992055316832, iteration: 198709
loss: 0.9913268685340881,grad_norm: 0.896585387977745, iteration: 198710
loss: 0.9960410594940186,grad_norm: 0.99999901566549, iteration: 198711
loss: 1.0189306735992432,grad_norm: 0.999998942448173, iteration: 198712
loss: 1.0281325578689575,grad_norm: 0.9999990769571449, iteration: 198713
loss: 0.9993602633476257,grad_norm: 0.8996134658727464, iteration: 198714
loss: 1.0555651187896729,grad_norm: 0.9999994436482776, iteration: 198715
loss: 0.9983304142951965,grad_norm: 0.8232350307404378, iteration: 198716
loss: 0.9916898608207703,grad_norm: 0.7819693753611249, iteration: 198717
loss: 0.9852107763290405,grad_norm: 0.9999990580916801, iteration: 198718
loss: 1.0098910331726074,grad_norm: 0.8324137698414723, iteration: 198719
loss: 0.9996752738952637,grad_norm: 0.793883224784121, iteration: 198720
loss: 0.9981217384338379,grad_norm: 0.8586768601652089, iteration: 198721
loss: 1.0246973037719727,grad_norm: 0.9999991909648802, iteration: 198722
loss: 1.0115092992782593,grad_norm: 0.8929047572408523, iteration: 198723
loss: 1.0054821968078613,grad_norm: 0.999998988266784, iteration: 198724
loss: 1.0058621168136597,grad_norm: 0.9999990369854945, iteration: 198725
loss: 1.0207501649856567,grad_norm: 0.9760394361890358, iteration: 198726
loss: 1.0632120370864868,grad_norm: 0.9999997279115378, iteration: 198727
loss: 0.9813663959503174,grad_norm: 0.9999992623821433, iteration: 198728
loss: 1.0016510486602783,grad_norm: 0.8535818641294334, iteration: 198729
loss: 0.948461651802063,grad_norm: 0.9618881477742144, iteration: 198730
loss: 0.9847872257232666,grad_norm: 0.8318397925490529, iteration: 198731
loss: 0.9983031749725342,grad_norm: 0.7473220572363075, iteration: 198732
loss: 0.9894404411315918,grad_norm: 0.9999991671027153, iteration: 198733
loss: 0.9820953011512756,grad_norm: 0.917505322020354, iteration: 198734
loss: 1.0175566673278809,grad_norm: 0.840236741499504, iteration: 198735
loss: 1.0012083053588867,grad_norm: 0.9315744041829599, iteration: 198736
loss: 1.0324513912200928,grad_norm: 0.8662348211115733, iteration: 198737
loss: 0.9867563843727112,grad_norm: 0.9999990633867079, iteration: 198738
loss: 1.0817855596542358,grad_norm: 0.999999290416227, iteration: 198739
loss: 0.9955556988716125,grad_norm: 0.7595704087665307, iteration: 198740
loss: 1.0152112245559692,grad_norm: 0.9714514888428732, iteration: 198741
loss: 1.0072574615478516,grad_norm: 0.999999521566192, iteration: 198742
loss: 0.9671812653541565,grad_norm: 0.8911624222592703, iteration: 198743
loss: 1.0129334926605225,grad_norm: 0.9900293863169904, iteration: 198744
loss: 1.0335086584091187,grad_norm: 0.9999992284440026, iteration: 198745
loss: 1.004286527633667,grad_norm: 0.9470329509774751, iteration: 198746
loss: 1.0619992017745972,grad_norm: 0.9999992831066533, iteration: 198747
loss: 1.011366844177246,grad_norm: 0.9122374945623104, iteration: 198748
loss: 0.9969705939292908,grad_norm: 0.9999989843661358, iteration: 198749
loss: 0.9971129894256592,grad_norm: 0.9999989500073774, iteration: 198750
loss: 0.974822998046875,grad_norm: 0.9099421540833288, iteration: 198751
loss: 1.0306180715560913,grad_norm: 0.9999992175570833, iteration: 198752
loss: 1.0152579545974731,grad_norm: 0.8937845533274821, iteration: 198753
loss: 1.0150189399719238,grad_norm: 0.9509583990361657, iteration: 198754
loss: 1.026968240737915,grad_norm: 0.7697480322821452, iteration: 198755
loss: 1.0055131912231445,grad_norm: 0.9583441752548645, iteration: 198756
loss: 0.9804236888885498,grad_norm: 0.9999988959485373, iteration: 198757
loss: 1.0054532289505005,grad_norm: 0.8992331681101859, iteration: 198758
loss: 0.9731581211090088,grad_norm: 0.8999627548071993, iteration: 198759
loss: 1.0250341892242432,grad_norm: 0.9999991481047168, iteration: 198760
loss: 1.0196653604507446,grad_norm: 0.7930760929384919, iteration: 198761
loss: 1.0006705522537231,grad_norm: 0.9801559924089389, iteration: 198762
loss: 0.973121166229248,grad_norm: 0.8327504425363612, iteration: 198763
loss: 0.9622122645378113,grad_norm: 0.9820946437632142, iteration: 198764
loss: 0.9745498895645142,grad_norm: 0.941705202745406, iteration: 198765
loss: 1.0185511112213135,grad_norm: 0.9999993811364734, iteration: 198766
loss: 1.008704662322998,grad_norm: 0.9999992635738512, iteration: 198767
loss: 1.0217366218566895,grad_norm: 0.920171780092352, iteration: 198768
loss: 0.9806626439094543,grad_norm: 0.9999989829295194, iteration: 198769
loss: 0.9751688241958618,grad_norm: 0.999999028970697, iteration: 198770
loss: 1.0145442485809326,grad_norm: 0.9056210807510985, iteration: 198771
loss: 1.0098495483398438,grad_norm: 0.9331065625994897, iteration: 198772
loss: 0.9952058792114258,grad_norm: 0.9574152409500849, iteration: 198773
loss: 0.943179726600647,grad_norm: 0.9963690149633528, iteration: 198774
loss: 0.9637551307678223,grad_norm: 0.9311924482560161, iteration: 198775
loss: 1.0007909536361694,grad_norm: 0.7555775304640387, iteration: 198776
loss: 0.9628884792327881,grad_norm: 0.8417246448163604, iteration: 198777
loss: 0.9639601707458496,grad_norm: 0.8563986821959815, iteration: 198778
loss: 0.9954213500022888,grad_norm: 0.999999013101284, iteration: 198779
loss: 1.0194000005722046,grad_norm: 0.8772877775416612, iteration: 198780
loss: 0.9791004061698914,grad_norm: 0.9416263243050933, iteration: 198781
loss: 1.0817183256149292,grad_norm: 0.9999991194475953, iteration: 198782
loss: 1.0167536735534668,grad_norm: 0.8632625947167251, iteration: 198783
loss: 1.0065884590148926,grad_norm: 0.9398111613608306, iteration: 198784
loss: 0.9868745803833008,grad_norm: 0.9999992195999432, iteration: 198785
loss: 0.9602882266044617,grad_norm: 0.9999991434114177, iteration: 198786
loss: 0.9754818081855774,grad_norm: 0.8455073032121246, iteration: 198787
loss: 0.978706955909729,grad_norm: 0.8458649894336708, iteration: 198788
loss: 0.9948272705078125,grad_norm: 0.970304777207107, iteration: 198789
loss: 1.0093625783920288,grad_norm: 0.8445678859029422, iteration: 198790
loss: 1.0308820009231567,grad_norm: 0.9999995482325038, iteration: 198791
loss: 1.017802357673645,grad_norm: 0.9454171312337992, iteration: 198792
loss: 1.0622066259384155,grad_norm: 0.7745751717086442, iteration: 198793
loss: 1.0196343660354614,grad_norm: 0.9219872966695272, iteration: 198794
loss: 1.0156595706939697,grad_norm: 0.9630802835463491, iteration: 198795
loss: 1.0258510112762451,grad_norm: 0.9999997784434381, iteration: 198796
loss: 0.995372474193573,grad_norm: 0.9999991046321735, iteration: 198797
loss: 0.9748971462249756,grad_norm: 0.97787299566691, iteration: 198798
loss: 1.0065754652023315,grad_norm: 0.8166969011021279, iteration: 198799
loss: 1.0287920236587524,grad_norm: 0.8965891097500238, iteration: 198800
loss: 0.9697818756103516,grad_norm: 0.9999991141065657, iteration: 198801
loss: 1.0069810152053833,grad_norm: 0.8240851595915345, iteration: 198802
loss: 1.0099481344223022,grad_norm: 0.8361818160207738, iteration: 198803
loss: 0.9659294486045837,grad_norm: 0.8996773321563509, iteration: 198804
loss: 1.0171817541122437,grad_norm: 0.9999992399819824, iteration: 198805
loss: 0.9720496535301208,grad_norm: 0.9999990650167946, iteration: 198806
loss: 0.9801422357559204,grad_norm: 0.9999998553593079, iteration: 198807
loss: 0.979663610458374,grad_norm: 0.9999990584477413, iteration: 198808
loss: 0.9498888254165649,grad_norm: 0.9322664146545749, iteration: 198809
loss: 0.9707307815551758,grad_norm: 0.9176015461908962, iteration: 198810
loss: 1.014060616493225,grad_norm: 0.8940232746110338, iteration: 198811
loss: 0.985340416431427,grad_norm: 0.8081973893522798, iteration: 198812
loss: 0.9585986137390137,grad_norm: 0.9385645019008249, iteration: 198813
loss: 0.9764770865440369,grad_norm: 0.8912583112347431, iteration: 198814
loss: 0.9704616665840149,grad_norm: 0.9792796872771675, iteration: 198815
loss: 1.0399253368377686,grad_norm: 0.999999254597753, iteration: 198816
loss: 1.015474796295166,grad_norm: 0.9999989414998257, iteration: 198817
loss: 0.9874022603034973,grad_norm: 0.9999992394304243, iteration: 198818
loss: 0.9765529632568359,grad_norm: 0.9721072748336896, iteration: 198819
loss: 0.985682487487793,grad_norm: 0.8226845014772644, iteration: 198820
loss: 1.0178098678588867,grad_norm: 0.8918560017093663, iteration: 198821
loss: 1.0015394687652588,grad_norm: 0.8276609442440264, iteration: 198822
loss: 0.9820035099983215,grad_norm: 0.9999991610456421, iteration: 198823
loss: 0.9695556163787842,grad_norm: 0.9855391392582791, iteration: 198824
loss: 0.9759722352027893,grad_norm: 0.8842593884159533, iteration: 198825
loss: 0.9981038570404053,grad_norm: 0.9198060235241673, iteration: 198826
loss: 1.006246566772461,grad_norm: 0.9999991506034288, iteration: 198827
loss: 0.9972826242446899,grad_norm: 0.8583415859807885, iteration: 198828
loss: 1.0318105220794678,grad_norm: 0.8798639633073071, iteration: 198829
loss: 0.9601645469665527,grad_norm: 0.9999991096268258, iteration: 198830
loss: 0.9872880578041077,grad_norm: 0.8089576025353644, iteration: 198831
loss: 0.9935453534126282,grad_norm: 0.9999991284611404, iteration: 198832
loss: 1.0114268064498901,grad_norm: 0.9663709612405517, iteration: 198833
loss: 1.0315815210342407,grad_norm: 0.999998988473903, iteration: 198834
loss: 1.0378912687301636,grad_norm: 0.9999998610161857, iteration: 198835
loss: 0.9909331798553467,grad_norm: 0.9999991952382283, iteration: 198836
loss: 1.0035653114318848,grad_norm: 0.9999991127449271, iteration: 198837
loss: 0.9922326803207397,grad_norm: 0.934348550881758, iteration: 198838
loss: 1.0236735343933105,grad_norm: 0.9999990606612873, iteration: 198839
loss: 1.0031788349151611,grad_norm: 0.9999991443080645, iteration: 198840
loss: 0.9962500929832458,grad_norm: 0.870289729454813, iteration: 198841
loss: 0.9992666840553284,grad_norm: 0.8486065143340555, iteration: 198842
loss: 0.9989262223243713,grad_norm: 0.8359174067063527, iteration: 198843
loss: 0.972771942615509,grad_norm: 0.999999165746039, iteration: 198844
loss: 1.0216680765151978,grad_norm: 0.9999994994922822, iteration: 198845
loss: 0.9967184662818909,grad_norm: 0.8854522064304383, iteration: 198846
loss: 0.9677876234054565,grad_norm: 0.9999990158192881, iteration: 198847
loss: 1.0241423845291138,grad_norm: 0.9999990472750344, iteration: 198848
loss: 1.0031483173370361,grad_norm: 0.8885003821227432, iteration: 198849
loss: 0.9974310398101807,grad_norm: 0.9347815541143737, iteration: 198850
loss: 0.9733525514602661,grad_norm: 0.9999991792370477, iteration: 198851
loss: 0.9986732602119446,grad_norm: 0.8781911182898522, iteration: 198852
loss: 1.026206135749817,grad_norm: 0.9681499705214577, iteration: 198853
loss: 1.0502053499221802,grad_norm: 0.99999992403906, iteration: 198854
loss: 0.9901549816131592,grad_norm: 0.8910136790551002, iteration: 198855
loss: 1.0045766830444336,grad_norm: 0.9999991656632528, iteration: 198856
loss: 1.007081151008606,grad_norm: 0.9999997065992671, iteration: 198857
loss: 1.1105252504348755,grad_norm: 0.9999999522659566, iteration: 198858
loss: 0.982313334941864,grad_norm: 0.9125257563014555, iteration: 198859
loss: 1.0059459209442139,grad_norm: 0.8999669933043909, iteration: 198860
loss: 0.995736837387085,grad_norm: 0.9999989693610347, iteration: 198861
loss: 1.0108795166015625,grad_norm: 0.9999991768833802, iteration: 198862
loss: 1.0770463943481445,grad_norm: 0.9999998698902808, iteration: 198863
loss: 0.9920402765274048,grad_norm: 0.9999989777428905, iteration: 198864
loss: 1.010918140411377,grad_norm: 0.7677231130862325, iteration: 198865
loss: 1.0027399063110352,grad_norm: 0.9551544515276831, iteration: 198866
loss: 1.0237854719161987,grad_norm: 0.8847460416022075, iteration: 198867
loss: 1.0714598894119263,grad_norm: 0.9999991350372036, iteration: 198868
loss: 1.3678823709487915,grad_norm: 0.9999994718694033, iteration: 198869
loss: 1.0073474645614624,grad_norm: 0.9579610108455102, iteration: 198870
loss: 1.0669987201690674,grad_norm: 0.9999997836779122, iteration: 198871
loss: 1.1178497076034546,grad_norm: 0.9999996102480819, iteration: 198872
loss: 1.0157475471496582,grad_norm: 0.9594478901885806, iteration: 198873
loss: 0.9711401462554932,grad_norm: 0.9999991705030264, iteration: 198874
loss: 0.9557439684867859,grad_norm: 0.9188265950762514, iteration: 198875
loss: 0.9682821035385132,grad_norm: 0.9999990875884675, iteration: 198876
loss: 0.9985413551330566,grad_norm: 0.9792992516312983, iteration: 198877
loss: 1.0480351448059082,grad_norm: 0.9954249420581922, iteration: 198878
loss: 1.0215935707092285,grad_norm: 0.9503757023509021, iteration: 198879
loss: 1.0251719951629639,grad_norm: 0.9999991870961991, iteration: 198880
loss: 1.14810311794281,grad_norm: 0.9999997815653392, iteration: 198881
loss: 1.0119006633758545,grad_norm: 0.9381937751031547, iteration: 198882
loss: 1.0633207559585571,grad_norm: 0.9999991871292541, iteration: 198883
loss: 1.0066633224487305,grad_norm: 0.8212981600983932, iteration: 198884
loss: 0.9847859144210815,grad_norm: 0.9999994376260083, iteration: 198885
loss: 0.990454375743866,grad_norm: 0.8828437436367762, iteration: 198886
loss: 0.9946368336677551,grad_norm: 0.9999999472841513, iteration: 198887
loss: 1.0547338724136353,grad_norm: 0.9999993690095825, iteration: 198888
loss: 1.0361475944519043,grad_norm: 0.9383415543137548, iteration: 198889
loss: 1.0158642530441284,grad_norm: 0.9260758065955199, iteration: 198890
loss: 0.9876249432563782,grad_norm: 0.9999990592754342, iteration: 198891
loss: 0.9634712934494019,grad_norm: 0.9999991908654526, iteration: 198892
loss: 1.0195387601852417,grad_norm: 0.9648417120517678, iteration: 198893
loss: 0.9771828055381775,grad_norm: 0.971700613758764, iteration: 198894
loss: 1.0172994136810303,grad_norm: 0.9999992940898783, iteration: 198895
loss: 0.9967789649963379,grad_norm: 0.8894177851028281, iteration: 198896
loss: 0.9701489210128784,grad_norm: 0.9999991337442683, iteration: 198897
loss: 0.9678767323493958,grad_norm: 0.9580979948046117, iteration: 198898
loss: 0.97279953956604,grad_norm: 0.9999991688239633, iteration: 198899
loss: 1.0341986417770386,grad_norm: 0.8352549829539251, iteration: 198900
loss: 1.0168346166610718,grad_norm: 0.9567025379973195, iteration: 198901
loss: 0.9682966470718384,grad_norm: 0.934535436874531, iteration: 198902
loss: 0.9792912602424622,grad_norm: 0.9999990238880545, iteration: 198903
loss: 0.97164386510849,grad_norm: 0.9515137907954694, iteration: 198904
loss: 1.0251942873001099,grad_norm: 0.9999999090563133, iteration: 198905
loss: 0.9898152351379395,grad_norm: 0.9674513143426129, iteration: 198906
loss: 0.9851908683776855,grad_norm: 0.9999995325633625, iteration: 198907
loss: 0.9903165102005005,grad_norm: 0.8942984739205547, iteration: 198908
loss: 0.981829822063446,grad_norm: 0.8667168076342883, iteration: 198909
loss: 1.0137100219726562,grad_norm: 0.9999990252067589, iteration: 198910
loss: 0.9918712377548218,grad_norm: 0.9999992587898134, iteration: 198911
loss: 0.9995987415313721,grad_norm: 0.9013824607008759, iteration: 198912
loss: 1.005172610282898,grad_norm: 0.999999584006218, iteration: 198913
loss: 1.015051007270813,grad_norm: 0.9999990826328757, iteration: 198914
loss: 0.977452278137207,grad_norm: 0.9999993830760846, iteration: 198915
loss: 1.0009801387786865,grad_norm: 0.8707014145955535, iteration: 198916
loss: 1.01596999168396,grad_norm: 0.9999994287912097, iteration: 198917
loss: 0.9386119246482849,grad_norm: 0.890069165895022, iteration: 198918
loss: 0.9961487650871277,grad_norm: 0.9999991212615934, iteration: 198919
loss: 1.0448271036148071,grad_norm: 0.9999990410708256, iteration: 198920
loss: 0.9923887252807617,grad_norm: 0.9387847313842558, iteration: 198921
loss: 1.0158361196517944,grad_norm: 0.9198271477991244, iteration: 198922
loss: 0.9990192651748657,grad_norm: 0.8654123842536005, iteration: 198923
loss: 1.0598938465118408,grad_norm: 0.9999993972651203, iteration: 198924
loss: 0.9940946102142334,grad_norm: 0.9999991642346727, iteration: 198925
loss: 1.0090246200561523,grad_norm: 0.8332694060919277, iteration: 198926
loss: 1.1660315990447998,grad_norm: 1.0000000041389507, iteration: 198927
loss: 0.9760000705718994,grad_norm: 0.9064199720173075, iteration: 198928
loss: 1.0144424438476562,grad_norm: 0.7587734575947646, iteration: 198929
loss: 0.9744712114334106,grad_norm: 0.9128256177659482, iteration: 198930
loss: 1.0103148221969604,grad_norm: 0.9618915239330241, iteration: 198931
loss: 0.9929752349853516,grad_norm: 0.8360399737225491, iteration: 198932
loss: 0.9776553511619568,grad_norm: 0.999999098520661, iteration: 198933
loss: 1.014456033706665,grad_norm: 0.8236022499341817, iteration: 198934
loss: 1.0285940170288086,grad_norm: 0.9257565696747876, iteration: 198935
loss: 1.0120697021484375,grad_norm: 0.9129454835276426, iteration: 198936
loss: 1.0087424516677856,grad_norm: 0.9999990648693452, iteration: 198937
loss: 0.9864370822906494,grad_norm: 0.948756924764779, iteration: 198938
loss: 1.044276475906372,grad_norm: 0.9999991996902505, iteration: 198939
loss: 0.9492762684822083,grad_norm: 0.9762734247492252, iteration: 198940
loss: 0.963517963886261,grad_norm: 0.8774413872054015, iteration: 198941
loss: 1.004555583000183,grad_norm: 0.9999991481706018, iteration: 198942
loss: 1.0116227865219116,grad_norm: 0.8691352303572432, iteration: 198943
loss: 1.0193449258804321,grad_norm: 0.9999990945607758, iteration: 198944
loss: 0.9960243701934814,grad_norm: 0.8568922194313267, iteration: 198945
loss: 0.998268187046051,grad_norm: 0.9007725880049815, iteration: 198946
loss: 1.0589923858642578,grad_norm: 0.9999996237825566, iteration: 198947
loss: 1.0294524431228638,grad_norm: 0.9217478588667982, iteration: 198948
loss: 0.9807846546173096,grad_norm: 0.9999999088143047, iteration: 198949
loss: 1.0319031476974487,grad_norm: 0.9310311781452629, iteration: 198950
loss: 1.128074049949646,grad_norm: 0.9682438164259077, iteration: 198951
loss: 1.005460500717163,grad_norm: 0.9999993126986132, iteration: 198952
loss: 0.9542037844657898,grad_norm: 0.9720649545684672, iteration: 198953
loss: 1.004227638244629,grad_norm: 0.9373292491653541, iteration: 198954
loss: 1.0628249645233154,grad_norm: 0.9999998606668553, iteration: 198955
loss: 1.1405905485153198,grad_norm: 0.999999945861189, iteration: 198956
loss: 0.9946237802505493,grad_norm: 0.9177630860807583, iteration: 198957
loss: 0.9738303422927856,grad_norm: 0.9999991296608559, iteration: 198958
loss: 1.1697807312011719,grad_norm: 0.9646853091162875, iteration: 198959
loss: 0.9958711862564087,grad_norm: 0.9493868668320983, iteration: 198960
loss: 1.1371750831604004,grad_norm: 0.9999994052510341, iteration: 198961
loss: 1.0784789323806763,grad_norm: 0.9999991900359815, iteration: 198962
loss: 0.9582878947257996,grad_norm: 0.993257997976736, iteration: 198963
loss: 1.3269380331039429,grad_norm: 0.9999993837799297, iteration: 198964
loss: 1.004860758781433,grad_norm: 0.9381802752390953, iteration: 198965
loss: 0.985572099685669,grad_norm: 0.9795780219846693, iteration: 198966
loss: 1.0296987295150757,grad_norm: 0.827260915535772, iteration: 198967
loss: 1.043062686920166,grad_norm: 0.9894681125453326, iteration: 198968
loss: 1.0491477251052856,grad_norm: 0.9999994551116664, iteration: 198969
loss: 0.9866299629211426,grad_norm: 0.9587939121425356, iteration: 198970
loss: 1.107603669166565,grad_norm: 0.9999990422205918, iteration: 198971
loss: 1.1127437353134155,grad_norm: 0.9999998934034419, iteration: 198972
loss: 1.1967796087265015,grad_norm: 0.9999998306898369, iteration: 198973
loss: 1.1088216304779053,grad_norm: 0.9999991931627287, iteration: 198974
loss: 1.1341147422790527,grad_norm: 0.9999992693216849, iteration: 198975
loss: 1.0464506149291992,grad_norm: 0.9797535960892837, iteration: 198976
loss: 1.119365930557251,grad_norm: 0.9999993375995114, iteration: 198977
loss: 1.1884868144989014,grad_norm: 0.9999992993661306, iteration: 198978
loss: 0.9983031153678894,grad_norm: 0.9999989915625096, iteration: 198979
loss: 1.2579740285873413,grad_norm: 1.0000000018702297, iteration: 198980
loss: 1.0775617361068726,grad_norm: 0.9999993495164942, iteration: 198981
loss: 1.1567920446395874,grad_norm: 0.9999991024135694, iteration: 198982
loss: 1.083290934562683,grad_norm: 0.9999993334442586, iteration: 198983
loss: 1.1251392364501953,grad_norm: 0.9999996852935326, iteration: 198984
loss: 1.16746985912323,grad_norm: 0.9999998131706963, iteration: 198985
loss: 1.0998233556747437,grad_norm: 0.9999990473810177, iteration: 198986
loss: 1.0978069305419922,grad_norm: 0.9999991791908919, iteration: 198987
loss: 1.2169718742370605,grad_norm: 0.9999998156443433, iteration: 198988
loss: 1.073400855064392,grad_norm: 0.9999995835927306, iteration: 198989
loss: 1.0918514728546143,grad_norm: 0.999999264089379, iteration: 198990
loss: 1.0321565866470337,grad_norm: 0.9999994027543232, iteration: 198991
loss: 1.0566104650497437,grad_norm: 0.9999994305776538, iteration: 198992
loss: 1.1271966695785522,grad_norm: 0.9999998399686205, iteration: 198993
loss: 1.1302522420883179,grad_norm: 0.9999997464029333, iteration: 198994
loss: 0.9941345453262329,grad_norm: 0.9999991795258433, iteration: 198995
loss: 1.0424209833145142,grad_norm: 1.0000000413565278, iteration: 198996
loss: 1.027788519859314,grad_norm: 0.9999991815531065, iteration: 198997
loss: 1.038365125656128,grad_norm: 0.9999990565509811, iteration: 198998
loss: 1.0333820581436157,grad_norm: 0.9999991389750386, iteration: 198999
loss: 0.9642518162727356,grad_norm: 0.9475196846677439, iteration: 199000
loss: 0.9688913822174072,grad_norm: 0.8885496146631129, iteration: 199001
loss: 1.0510120391845703,grad_norm: 0.9999990948607346, iteration: 199002
loss: 0.9945606589317322,grad_norm: 0.9869714356088298, iteration: 199003
loss: 0.986945629119873,grad_norm: 0.974094832877385, iteration: 199004
loss: 0.9921030402183533,grad_norm: 0.9414928592968488, iteration: 199005
loss: 0.9931724667549133,grad_norm: 0.9999991009912833, iteration: 199006
loss: 0.9529957175254822,grad_norm: 0.9533357198609017, iteration: 199007
loss: 1.0830447673797607,grad_norm: 0.9999994028060549, iteration: 199008
loss: 1.0211397409439087,grad_norm: 0.9997476844623904, iteration: 199009
loss: 1.058937668800354,grad_norm: 0.999999784653078, iteration: 199010
loss: 0.9913934469223022,grad_norm: 0.9999995735377432, iteration: 199011
loss: 0.9836030602455139,grad_norm: 0.9999990311432857, iteration: 199012
loss: 0.9581732749938965,grad_norm: 0.9999991415292742, iteration: 199013
loss: 0.9954752326011658,grad_norm: 0.9999990230915362, iteration: 199014
loss: 1.0223485231399536,grad_norm: 0.905847425556701, iteration: 199015
loss: 1.0490789413452148,grad_norm: 0.9024700790218132, iteration: 199016
loss: 0.9945964217185974,grad_norm: 0.9762096998354778, iteration: 199017
loss: 1.0099821090698242,grad_norm: 0.9817886696132214, iteration: 199018
loss: 1.0172557830810547,grad_norm: 0.9999991383152196, iteration: 199019
loss: 1.0311870574951172,grad_norm: 0.90785647833409, iteration: 199020
loss: 1.0053105354309082,grad_norm: 0.9999991483429044, iteration: 199021
loss: 1.0182769298553467,grad_norm: 0.9999991421813837, iteration: 199022
loss: 1.0185050964355469,grad_norm: 0.8282318909577188, iteration: 199023
loss: 1.007117748260498,grad_norm: 0.9047336288182211, iteration: 199024
loss: 1.082190990447998,grad_norm: 0.9999998049772736, iteration: 199025
loss: 0.9933594465255737,grad_norm: 0.999999067643212, iteration: 199026
loss: 1.0187381505966187,grad_norm: 0.9309623124030643, iteration: 199027
loss: 1.0910924673080444,grad_norm: 0.999999421427993, iteration: 199028
loss: 1.0004130601882935,grad_norm: 0.8963410694844763, iteration: 199029
loss: 0.9821032285690308,grad_norm: 0.8028958928092325, iteration: 199030
loss: 1.0131176710128784,grad_norm: 0.9570399920880477, iteration: 199031
loss: 1.0065895318984985,grad_norm: 0.8864222629150752, iteration: 199032
loss: 0.9792372584342957,grad_norm: 0.9999990706661479, iteration: 199033
loss: 1.0606602430343628,grad_norm: 0.9317646031111737, iteration: 199034
loss: 1.0130690336227417,grad_norm: 0.8261980226966367, iteration: 199035
loss: 1.0878846645355225,grad_norm: 0.9999999071906632, iteration: 199036
loss: 1.1107808351516724,grad_norm: 0.9999996723873567, iteration: 199037
loss: 1.0120867490768433,grad_norm: 0.9999993385076055, iteration: 199038
loss: 0.9566837549209595,grad_norm: 0.9270807574398472, iteration: 199039
loss: 1.021996021270752,grad_norm: 0.9999991697063552, iteration: 199040
loss: 0.9884452819824219,grad_norm: 0.9279768089063112, iteration: 199041
loss: 1.0292725563049316,grad_norm: 0.9999997903679202, iteration: 199042
loss: 1.0076664686203003,grad_norm: 0.9791602135892225, iteration: 199043
loss: 1.0819852352142334,grad_norm: 0.9999998412772525, iteration: 199044
loss: 1.0271931886672974,grad_norm: 0.999999972756086, iteration: 199045
loss: 0.979773223400116,grad_norm: 0.9999997164670231, iteration: 199046
loss: 1.0048060417175293,grad_norm: 0.8971759407022789, iteration: 199047
loss: 0.9704105854034424,grad_norm: 0.9358936595575656, iteration: 199048
loss: 1.0004161596298218,grad_norm: 0.9999990593731601, iteration: 199049
loss: 1.0639482736587524,grad_norm: 0.9999990644375976, iteration: 199050
loss: 1.2412301301956177,grad_norm: 0.9999997813979531, iteration: 199051
loss: 0.9759258031845093,grad_norm: 0.9999991218677805, iteration: 199052
loss: 0.9650174379348755,grad_norm: 0.9451444880395689, iteration: 199053
loss: 1.0555669069290161,grad_norm: 0.9999999159344728, iteration: 199054
loss: 0.9825820326805115,grad_norm: 0.8757865887824685, iteration: 199055
loss: 1.1296310424804688,grad_norm: 0.9999996859819916, iteration: 199056
loss: 1.0528594255447388,grad_norm: 0.9999999763174441, iteration: 199057
loss: 1.0052742958068848,grad_norm: 0.8580222398000159, iteration: 199058
loss: 0.9670279026031494,grad_norm: 0.9578280972243362, iteration: 199059
loss: 1.034270167350769,grad_norm: 0.9999993947130661, iteration: 199060
loss: 0.9891637563705444,grad_norm: 0.9999990704271041, iteration: 199061
loss: 1.1246626377105713,grad_norm: 0.9999990384991463, iteration: 199062
loss: 1.0645924806594849,grad_norm: 0.9999997333310096, iteration: 199063
loss: 0.9891712665557861,grad_norm: 0.9999992871750268, iteration: 199064
loss: 0.9951735734939575,grad_norm: 0.9999991774051553, iteration: 199065
loss: 1.049598217010498,grad_norm: 0.9999991933192581, iteration: 199066
loss: 1.100821852684021,grad_norm: 0.999999118089108, iteration: 199067
loss: 0.9668962359428406,grad_norm: 0.9718099282541556, iteration: 199068
loss: 1.114018201828003,grad_norm: 0.9999998167624544, iteration: 199069
loss: 0.9864966869354248,grad_norm: 0.8884658124484692, iteration: 199070
loss: 0.975179135799408,grad_norm: 0.9447964668799401, iteration: 199071
loss: 1.1293641328811646,grad_norm: 0.9999997757269581, iteration: 199072
loss: 1.0336917638778687,grad_norm: 0.9999989295023569, iteration: 199073
loss: 1.030251383781433,grad_norm: 0.999998991394665, iteration: 199074
loss: 0.9776843190193176,grad_norm: 0.9999994903031437, iteration: 199075
loss: 0.9847549796104431,grad_norm: 0.9694487046841777, iteration: 199076
loss: 0.9719409346580505,grad_norm: 0.9999990535293459, iteration: 199077
loss: 0.9965352416038513,grad_norm: 0.9999990807260483, iteration: 199078
loss: 1.0322566032409668,grad_norm: 0.999999199391506, iteration: 199079
loss: 0.9948760867118835,grad_norm: 0.9999997728687549, iteration: 199080
loss: 0.9922381639480591,grad_norm: 0.827249209897344, iteration: 199081
loss: 1.0798838138580322,grad_norm: 0.999999417271074, iteration: 199082
loss: 1.0021098852157593,grad_norm: 0.9999991537363572, iteration: 199083
loss: 0.9634481072425842,grad_norm: 1.0000000074345146, iteration: 199084
loss: 0.9983078241348267,grad_norm: 0.8683100122999886, iteration: 199085
loss: 0.9950580596923828,grad_norm: 0.99999914216702, iteration: 199086
loss: 0.9580143094062805,grad_norm: 0.9648280260624444, iteration: 199087
loss: 1.005303978919983,grad_norm: 0.9999991025280065, iteration: 199088
loss: 0.9650663733482361,grad_norm: 0.9999992449985728, iteration: 199089
loss: 0.9788755178451538,grad_norm: 0.8575798315842129, iteration: 199090
loss: 0.9676627516746521,grad_norm: 0.9999991819247889, iteration: 199091
loss: 0.9970211386680603,grad_norm: 0.9999990325547625, iteration: 199092
loss: 1.0053397417068481,grad_norm: 0.9999995038566984, iteration: 199093
loss: 0.9959973096847534,grad_norm: 0.9999990231877607, iteration: 199094
loss: 0.9798039793968201,grad_norm: 0.86697879293717, iteration: 199095
loss: 0.9910891056060791,grad_norm: 0.9999991629241449, iteration: 199096
loss: 1.0417283773422241,grad_norm: 0.9836731846244474, iteration: 199097
loss: 1.113836407661438,grad_norm: 0.9999991441712682, iteration: 199098
loss: 1.0138142108917236,grad_norm: 0.9662849611600094, iteration: 199099
loss: 0.9971662163734436,grad_norm: 0.9553301270923864, iteration: 199100
loss: 1.1201441287994385,grad_norm: 0.9999991532191673, iteration: 199101
loss: 1.0211384296417236,grad_norm: 0.9999990459733107, iteration: 199102
loss: 1.009705901145935,grad_norm: 0.8990740609865111, iteration: 199103
loss: 1.0024181604385376,grad_norm: 0.9999992553446708, iteration: 199104
loss: 0.944361686706543,grad_norm: 0.9610360805484313, iteration: 199105
loss: 1.0459988117218018,grad_norm: 0.9999991725743873, iteration: 199106
loss: 0.9999551177024841,grad_norm: 0.9999989915317145, iteration: 199107
loss: 1.0534721612930298,grad_norm: 0.9999992819449262, iteration: 199108
loss: 1.130300760269165,grad_norm: 0.9999993324141423, iteration: 199109
loss: 1.0157040357589722,grad_norm: 0.9875415069574641, iteration: 199110
loss: 0.9610140919685364,grad_norm: 0.9021989063530935, iteration: 199111
loss: 1.1229745149612427,grad_norm: 0.9999990251661265, iteration: 199112
loss: 0.9975487589836121,grad_norm: 0.9999991306659507, iteration: 199113
loss: 1.0300323963165283,grad_norm: 0.8444275899290924, iteration: 199114
loss: 1.0378347635269165,grad_norm: 0.9999990645108651, iteration: 199115
loss: 0.9894194006919861,grad_norm: 0.9953356943551434, iteration: 199116
loss: 0.9958149790763855,grad_norm: 0.9999990982525891, iteration: 199117
loss: 1.002328634262085,grad_norm: 0.9999996680969278, iteration: 199118
loss: 1.0035196542739868,grad_norm: 0.8810991425910218, iteration: 199119
loss: 1.0584574937820435,grad_norm: 0.9999996406926529, iteration: 199120
loss: 0.9614908695220947,grad_norm: 0.9999993234893966, iteration: 199121
loss: 0.9353505969047546,grad_norm: 0.9999992113432783, iteration: 199122
loss: 1.1737191677093506,grad_norm: 0.9999995955881708, iteration: 199123
loss: 1.0171011686325073,grad_norm: 0.999999040790237, iteration: 199124
loss: 1.117531657218933,grad_norm: 0.9999990539824897, iteration: 199125
loss: 0.9683828949928284,grad_norm: 0.849893808840118, iteration: 199126
loss: 0.9958986639976501,grad_norm: 0.9999989857675786, iteration: 199127
loss: 1.130010962486267,grad_norm: 0.9999996242074879, iteration: 199128
loss: 0.9743390679359436,grad_norm: 0.9060900296960758, iteration: 199129
loss: 0.9724767208099365,grad_norm: 0.923466692480956, iteration: 199130
loss: 1.020334243774414,grad_norm: 0.9344615331426641, iteration: 199131
loss: 0.9394153356552124,grad_norm: 0.9331322052445076, iteration: 199132
loss: 1.0090110301971436,grad_norm: 0.9999992774927521, iteration: 199133
loss: 1.1112029552459717,grad_norm: 0.9999990990007951, iteration: 199134
loss: 0.9895169138908386,grad_norm: 0.9482687262270273, iteration: 199135
loss: 0.9710808992385864,grad_norm: 0.9999989828806609, iteration: 199136
loss: 0.9976367354393005,grad_norm: 0.9448891893116291, iteration: 199137
loss: 0.9773964285850525,grad_norm: 0.9205796167666745, iteration: 199138
loss: 1.085266351699829,grad_norm: 0.9181544456887418, iteration: 199139
loss: 1.0672355890274048,grad_norm: 0.9999991197334624, iteration: 199140
loss: 0.9896078109741211,grad_norm: 0.9999992274726307, iteration: 199141
loss: 1.023755431175232,grad_norm: 0.999999110571842, iteration: 199142
loss: 0.961936891078949,grad_norm: 0.999999159812033, iteration: 199143
loss: 0.9570515155792236,grad_norm: 0.7846185460396976, iteration: 199144
loss: 1.022341012954712,grad_norm: 0.8381790159953377, iteration: 199145
loss: 1.0047681331634521,grad_norm: 0.9999990101557272, iteration: 199146
loss: 1.002623438835144,grad_norm: 0.9999991170553839, iteration: 199147
loss: 0.997955322265625,grad_norm: 0.8595687613846967, iteration: 199148
loss: 1.0117260217666626,grad_norm: 0.9631145791866161, iteration: 199149
loss: 1.0608303546905518,grad_norm: 0.9999994675461517, iteration: 199150
loss: 0.952508807182312,grad_norm: 0.972737049656974, iteration: 199151
loss: 1.2299989461898804,grad_norm: 0.9999993153512867, iteration: 199152
loss: 1.033555269241333,grad_norm: 0.9999992349654739, iteration: 199153
loss: 0.9875053763389587,grad_norm: 0.9565182721462495, iteration: 199154
loss: 1.0191335678100586,grad_norm: 0.9999992458616836, iteration: 199155
loss: 0.9979480504989624,grad_norm: 0.8165407343815756, iteration: 199156
loss: 1.0621178150177002,grad_norm: 0.9999999628965149, iteration: 199157
loss: 1.0193370580673218,grad_norm: 0.9115922525191326, iteration: 199158
loss: 1.0819014310836792,grad_norm: 0.9999990889697878, iteration: 199159
loss: 0.9951404929161072,grad_norm: 0.9378248337323838, iteration: 199160
loss: 1.0069355964660645,grad_norm: 0.9999907639389293, iteration: 199161
loss: 1.015335202217102,grad_norm: 1.0000000834155423, iteration: 199162
loss: 0.9804846048355103,grad_norm: 0.9035110782119262, iteration: 199163
loss: 1.0003688335418701,grad_norm: 0.8901769710455667, iteration: 199164
loss: 1.025765299797058,grad_norm: 0.9999991982397903, iteration: 199165
loss: 0.9988555908203125,grad_norm: 0.8894855046891719, iteration: 199166
loss: 0.9924902319908142,grad_norm: 0.8776733179746967, iteration: 199167
loss: 1.0050079822540283,grad_norm: 0.9792757657990689, iteration: 199168
loss: 1.0059010982513428,grad_norm: 0.8895653330211196, iteration: 199169
loss: 1.0004643201828003,grad_norm: 0.882779223595464, iteration: 199170
loss: 0.977853000164032,grad_norm: 0.9999990465016504, iteration: 199171
loss: 1.0391491651535034,grad_norm: 0.9999990120793296, iteration: 199172
loss: 1.0364927053451538,grad_norm: 0.9999996623366891, iteration: 199173
loss: 0.9619312882423401,grad_norm: 0.8998574826322014, iteration: 199174
loss: 0.9803661108016968,grad_norm: 0.9375358542196859, iteration: 199175
loss: 0.9694022536277771,grad_norm: 0.9999990370420031, iteration: 199176
loss: 1.019235610961914,grad_norm: 0.7650674851270625, iteration: 199177
loss: 1.021667718887329,grad_norm: 0.9082905683311869, iteration: 199178
loss: 1.009229302406311,grad_norm: 0.9999991910511339, iteration: 199179
loss: 1.0013338327407837,grad_norm: 0.7532846628238229, iteration: 199180
loss: 0.9876812696456909,grad_norm: 0.9999991985992065, iteration: 199181
loss: 1.0056606531143188,grad_norm: 0.8940249428452831, iteration: 199182
loss: 1.036936640739441,grad_norm: 0.7929906081688914, iteration: 199183
loss: 1.0726397037506104,grad_norm: 0.9999998342854873, iteration: 199184
loss: 0.9944058060646057,grad_norm: 0.8700404804434686, iteration: 199185
loss: 0.9502315521240234,grad_norm: 0.8872398003078997, iteration: 199186
loss: 1.000982403755188,grad_norm: 0.9408252154001684, iteration: 199187
loss: 1.012020468711853,grad_norm: 0.9999993812971567, iteration: 199188
loss: 1.0161713361740112,grad_norm: 0.9999990682225386, iteration: 199189
loss: 1.0244662761688232,grad_norm: 0.9912048992504835, iteration: 199190
loss: 1.0638258457183838,grad_norm: 0.9999997746464492, iteration: 199191
loss: 1.0033849477767944,grad_norm: 0.854565918848176, iteration: 199192
loss: 0.9714920520782471,grad_norm: 0.9672692208167314, iteration: 199193
loss: 1.0061309337615967,grad_norm: 0.800705415550705, iteration: 199194
loss: 1.0049514770507812,grad_norm: 0.9999990945938544, iteration: 199195
loss: 0.9787026047706604,grad_norm: 0.9999991044549524, iteration: 199196
loss: 0.9808729290962219,grad_norm: 0.9180095761059616, iteration: 199197
loss: 1.0109128952026367,grad_norm: 0.9999994361016413, iteration: 199198
loss: 1.0513865947723389,grad_norm: 0.9999995777470831, iteration: 199199
loss: 1.032682180404663,grad_norm: 0.9999990270288422, iteration: 199200
loss: 1.0086880922317505,grad_norm: 0.9999991140868499, iteration: 199201
loss: 1.0082422494888306,grad_norm: 0.9999990931188731, iteration: 199202
loss: 1.027571201324463,grad_norm: 0.9999991176135997, iteration: 199203
loss: 0.9840080142021179,grad_norm: 0.8005105797064993, iteration: 199204
loss: 1.013542652130127,grad_norm: 0.99999960339295, iteration: 199205
loss: 1.0008357763290405,grad_norm: 0.9999992005363253, iteration: 199206
loss: 0.9852553606033325,grad_norm: 0.8413749988884788, iteration: 199207
loss: 0.9702809453010559,grad_norm: 0.9999989692861425, iteration: 199208
loss: 0.9476996660232544,grad_norm: 0.9999991228795447, iteration: 199209
loss: 0.9837013483047485,grad_norm: 0.970091877345559, iteration: 199210
loss: 0.9917159080505371,grad_norm: 0.8525610995452615, iteration: 199211
loss: 1.0364198684692383,grad_norm: 0.9440612759390475, iteration: 199212
loss: 0.9938279390335083,grad_norm: 0.9999992303053566, iteration: 199213
loss: 1.0172531604766846,grad_norm: 0.9177755341401752, iteration: 199214
loss: 1.0012165307998657,grad_norm: 0.9149773304822804, iteration: 199215
loss: 0.9884840846061707,grad_norm: 0.885193064260869, iteration: 199216
loss: 0.9869120121002197,grad_norm: 0.9829655572531033, iteration: 199217
loss: 1.0215903520584106,grad_norm: 0.9999989704237086, iteration: 199218
loss: 0.9545516967773438,grad_norm: 0.977456922735564, iteration: 199219
loss: 0.9971932768821716,grad_norm: 0.8489386993649948, iteration: 199220
loss: 1.0171977281570435,grad_norm: 0.900385112972246, iteration: 199221
loss: 1.0100539922714233,grad_norm: 0.9999994388633013, iteration: 199222
loss: 1.01360285282135,grad_norm: 0.9897911449565873, iteration: 199223
loss: 1.0415855646133423,grad_norm: 0.9779409354284768, iteration: 199224
loss: 1.0132883787155151,grad_norm: 0.9232889408227184, iteration: 199225
loss: 1.0152896642684937,grad_norm: 0.9936097884017269, iteration: 199226
loss: 0.9860800504684448,grad_norm: 0.9649099911183828, iteration: 199227
loss: 1.0057231187820435,grad_norm: 0.999999088339053, iteration: 199228
loss: 0.9901014566421509,grad_norm: 0.9999994211734723, iteration: 199229
loss: 0.9859008193016052,grad_norm: 0.9812346670242853, iteration: 199230
loss: 1.0148911476135254,grad_norm: 0.9960153419599957, iteration: 199231
loss: 0.9743045568466187,grad_norm: 0.9999990765555073, iteration: 199232
loss: 0.9820901155471802,grad_norm: 0.9999993030912468, iteration: 199233
loss: 0.9847630262374878,grad_norm: 0.9909467334682588, iteration: 199234
loss: 0.9832879900932312,grad_norm: 0.9287619298658686, iteration: 199235
loss: 1.0391227006912231,grad_norm: 0.906797040003458, iteration: 199236
loss: 0.975322961807251,grad_norm: 0.9099083799019831, iteration: 199237
loss: 0.9730767607688904,grad_norm: 0.9391836407711325, iteration: 199238
loss: 0.9971089363098145,grad_norm: 0.9442609929658476, iteration: 199239
loss: 1.083220362663269,grad_norm: 0.9999990599270019, iteration: 199240
loss: 0.9754328727722168,grad_norm: 0.9405603271809956, iteration: 199241
loss: 1.0193203687667847,grad_norm: 0.9026195413011681, iteration: 199242
loss: 1.0007835626602173,grad_norm: 0.852446361158008, iteration: 199243
loss: 1.0257673263549805,grad_norm: 0.9999991600171607, iteration: 199244
loss: 1.0065734386444092,grad_norm: 0.9999992547404842, iteration: 199245
loss: 1.0032342672348022,grad_norm: 0.8347693316354586, iteration: 199246
loss: 1.048218011856079,grad_norm: 0.99999957475286, iteration: 199247
loss: 1.0675599575042725,grad_norm: 0.9999992421129231, iteration: 199248
loss: 1.0219886302947998,grad_norm: 0.9011801767282744, iteration: 199249
loss: 0.9953941106796265,grad_norm: 0.9502588388258186, iteration: 199250
loss: 1.0360112190246582,grad_norm: 0.9592028695544198, iteration: 199251
loss: 0.9947776198387146,grad_norm: 0.9237428134420548, iteration: 199252
loss: 1.0107365846633911,grad_norm: 0.9708042943269319, iteration: 199253
loss: 1.0344445705413818,grad_norm: 0.9999992207651479, iteration: 199254
loss: 1.0407676696777344,grad_norm: 0.9999990245846534, iteration: 199255
loss: 0.9802290797233582,grad_norm: 0.9618754618680201, iteration: 199256
loss: 1.0373613834381104,grad_norm: 0.9999992053064518, iteration: 199257
loss: 1.0367313623428345,grad_norm: 0.9999991140996145, iteration: 199258
loss: 1.0288666486740112,grad_norm: 0.9999991672680609, iteration: 199259
loss: 1.0183054208755493,grad_norm: 0.9999994032970887, iteration: 199260
loss: 1.0029592514038086,grad_norm: 0.9239215517303659, iteration: 199261
loss: 1.0035849809646606,grad_norm: 0.8727240550595486, iteration: 199262
loss: 0.9932036399841309,grad_norm: 0.9802441949379922, iteration: 199263
loss: 1.002245306968689,grad_norm: 0.9999992401053115, iteration: 199264
loss: 0.9833351969718933,grad_norm: 0.9751572173719244, iteration: 199265
loss: 0.9949135184288025,grad_norm: 0.9999993223453709, iteration: 199266
loss: 1.0781642198562622,grad_norm: 0.9482371477844871, iteration: 199267
loss: 0.9953652024269104,grad_norm: 0.9001288324348606, iteration: 199268
loss: 1.0066916942596436,grad_norm: 0.9999991669207903, iteration: 199269
loss: 1.0115129947662354,grad_norm: 0.890274135620434, iteration: 199270
loss: 1.0405653715133667,grad_norm: 0.9999994462418109, iteration: 199271
loss: 0.9685407280921936,grad_norm: 0.900984873006861, iteration: 199272
loss: 1.0184435844421387,grad_norm: 0.9999991299028473, iteration: 199273
loss: 1.0396004915237427,grad_norm: 0.7401997256367377, iteration: 199274
loss: 0.999373733997345,grad_norm: 0.9569535679949587, iteration: 199275
loss: 1.015458583831787,grad_norm: 0.99999902898814, iteration: 199276
loss: 0.9930419325828552,grad_norm: 0.8449250883109793, iteration: 199277
loss: 0.9790562391281128,grad_norm: 0.91626207025393, iteration: 199278
loss: 1.0621682405471802,grad_norm: 0.9396929758170712, iteration: 199279
loss: 1.0004615783691406,grad_norm: 0.9124161952776304, iteration: 199280
loss: 1.0297681093215942,grad_norm: 0.9999990629852513, iteration: 199281
loss: 0.9697240591049194,grad_norm: 0.999999064637349, iteration: 199282
loss: 0.9917314052581787,grad_norm: 0.8798885935765266, iteration: 199283
loss: 0.9845691919326782,grad_norm: 0.9726992208289628, iteration: 199284
loss: 1.0133541822433472,grad_norm: 0.8131467006771644, iteration: 199285
loss: 0.9738967418670654,grad_norm: 0.9999996133888921, iteration: 199286
loss: 1.0185251235961914,grad_norm: 0.9999998544401557, iteration: 199287
loss: 0.9872605204582214,grad_norm: 0.9999990185900117, iteration: 199288
loss: 0.9897403120994568,grad_norm: 0.9999990965532822, iteration: 199289
loss: 1.0133821964263916,grad_norm: 0.9999991121775983, iteration: 199290
loss: 1.052955150604248,grad_norm: 0.9999995791251902, iteration: 199291
loss: 0.9943522214889526,grad_norm: 0.9531944802651694, iteration: 199292
loss: 1.0118345022201538,grad_norm: 0.9734270581632966, iteration: 199293
loss: 0.9755738973617554,grad_norm: 0.9552741424922685, iteration: 199294
loss: 1.0168434381484985,grad_norm: 0.9015015578741382, iteration: 199295
loss: 1.0020020008087158,grad_norm: 0.9999993919573725, iteration: 199296
loss: 0.9867479801177979,grad_norm: 0.8194735583393183, iteration: 199297
loss: 1.019531011581421,grad_norm: 0.9162421225069763, iteration: 199298
loss: 1.0128865242004395,grad_norm: 0.9999990148795217, iteration: 199299
loss: 0.9936709403991699,grad_norm: 0.8289270743277583, iteration: 199300
loss: 0.984383761882782,grad_norm: 0.8749823071494139, iteration: 199301
loss: 0.9936970472335815,grad_norm: 0.8884830225699666, iteration: 199302
loss: 1.0016231536865234,grad_norm: 0.9251496750909578, iteration: 199303
loss: 0.9915041327476501,grad_norm: 0.9670076299273462, iteration: 199304
loss: 1.0149074792861938,grad_norm: 0.7797971086340668, iteration: 199305
loss: 0.9842864871025085,grad_norm: 0.9732829688037835, iteration: 199306
loss: 1.089691400527954,grad_norm: 0.8367490879236789, iteration: 199307
loss: 1.0031527280807495,grad_norm: 0.8866433210160363, iteration: 199308
loss: 1.0150612592697144,grad_norm: 0.8256557786443349, iteration: 199309
loss: 1.002998948097229,grad_norm: 0.930161401119881, iteration: 199310
loss: 0.9957200884819031,grad_norm: 0.9350546278581923, iteration: 199311
loss: 0.9802480936050415,grad_norm: 0.9766187176041605, iteration: 199312
loss: 1.035401701927185,grad_norm: 0.9999993464240339, iteration: 199313
loss: 0.9702544212341309,grad_norm: 0.952350091796756, iteration: 199314
loss: 0.9795844554901123,grad_norm: 0.9562974522568264, iteration: 199315
loss: 0.9766802191734314,grad_norm: 0.9032919692094548, iteration: 199316
loss: 1.0764245986938477,grad_norm: 0.9999997964717949, iteration: 199317
loss: 0.994550347328186,grad_norm: 0.927678923162517, iteration: 199318
loss: 1.0433721542358398,grad_norm: 0.9999992631959408, iteration: 199319
loss: 1.043910264968872,grad_norm: 0.9999990916309299, iteration: 199320
loss: 1.0165425539016724,grad_norm: 0.9743867944542304, iteration: 199321
loss: 0.9593067169189453,grad_norm: 0.84350211085583, iteration: 199322
loss: 0.9927850365638733,grad_norm: 0.8005773760928317, iteration: 199323
loss: 1.0314940214157104,grad_norm: 0.9999995506414009, iteration: 199324
loss: 1.0188732147216797,grad_norm: 0.9999997883664564, iteration: 199325
loss: 1.0191915035247803,grad_norm: 0.9552300901479576, iteration: 199326
loss: 1.0387924909591675,grad_norm: 0.9999990596003318, iteration: 199327
loss: 1.0179564952850342,grad_norm: 0.9999996017334961, iteration: 199328
loss: 1.0052876472473145,grad_norm: 0.9573723306713567, iteration: 199329
loss: 1.0145514011383057,grad_norm: 0.9999990231957205, iteration: 199330
loss: 1.0271276235580444,grad_norm: 0.9999991514931724, iteration: 199331
loss: 1.0134570598602295,grad_norm: 0.8754323054000526, iteration: 199332
loss: 1.0042386054992676,grad_norm: 0.9121302947436448, iteration: 199333
loss: 0.9748406410217285,grad_norm: 0.9479932085704975, iteration: 199334
loss: 1.0137991905212402,grad_norm: 0.9256392136358573, iteration: 199335
loss: 1.0071252584457397,grad_norm: 0.9559566339128274, iteration: 199336
loss: 0.9992769360542297,grad_norm: 0.9999990932312047, iteration: 199337
loss: 0.981426477432251,grad_norm: 0.9999990402710955, iteration: 199338
loss: 1.0073167085647583,grad_norm: 0.9999992471003762, iteration: 199339
loss: 1.0094621181488037,grad_norm: 0.9999990762340588, iteration: 199340
loss: 1.0226627588272095,grad_norm: 0.9999993793468056, iteration: 199341
loss: 0.976548969745636,grad_norm: 0.8568986458658694, iteration: 199342
loss: 0.977669894695282,grad_norm: 0.8741903339338235, iteration: 199343
loss: 1.0176125764846802,grad_norm: 0.9999996200496833, iteration: 199344
loss: 0.9946454763412476,grad_norm: 0.9869052316785392, iteration: 199345
loss: 1.0056782960891724,grad_norm: 0.9999993352109092, iteration: 199346
loss: 1.0271248817443848,grad_norm: 0.9762528362043532, iteration: 199347
loss: 0.9568566679954529,grad_norm: 0.9147823891296982, iteration: 199348
loss: 0.9778282642364502,grad_norm: 0.8497758819491452, iteration: 199349
loss: 0.9947249889373779,grad_norm: 0.9999991516643406, iteration: 199350
loss: 1.0655639171600342,grad_norm: 0.9999991417673298, iteration: 199351
loss: 1.0205016136169434,grad_norm: 0.835265053576222, iteration: 199352
loss: 1.0328655242919922,grad_norm: 0.9999991424932386, iteration: 199353
loss: 1.0061959028244019,grad_norm: 0.8834756877752808, iteration: 199354
loss: 1.0056262016296387,grad_norm: 0.8927356810573035, iteration: 199355
loss: 1.0088781118392944,grad_norm: 0.8131228969701122, iteration: 199356
loss: 0.9529417157173157,grad_norm: 0.9999990174930208, iteration: 199357
loss: 1.0265706777572632,grad_norm: 0.9798107114792309, iteration: 199358
loss: 1.023791790008545,grad_norm: 0.8770206522293145, iteration: 199359
loss: 0.9890052676200867,grad_norm: 0.9671957174060827, iteration: 199360
loss: 0.9897497892379761,grad_norm: 0.9616911492270633, iteration: 199361
loss: 1.0172662734985352,grad_norm: 0.9999993951705494, iteration: 199362
loss: 1.0732252597808838,grad_norm: 0.8707327411674401, iteration: 199363
loss: 1.0172263383865356,grad_norm: 0.9999991768038584, iteration: 199364
loss: 0.9946067333221436,grad_norm: 0.8331071206162859, iteration: 199365
loss: 0.9891229271888733,grad_norm: 0.9999997632894326, iteration: 199366
loss: 1.0106635093688965,grad_norm: 0.9033735370028227, iteration: 199367
loss: 0.9906709790229797,grad_norm: 0.887203733320059, iteration: 199368
loss: 0.983261227607727,grad_norm: 0.9682685541207544, iteration: 199369
loss: 0.9479048252105713,grad_norm: 0.9633874271755594, iteration: 199370
loss: 0.9709632992744446,grad_norm: 0.9638800642977168, iteration: 199371
loss: 0.9634493589401245,grad_norm: 0.9584547591048825, iteration: 199372
loss: 1.024245262145996,grad_norm: 0.894476902292956, iteration: 199373
loss: 1.0615196228027344,grad_norm: 0.9452389594432975, iteration: 199374
loss: 1.0057151317596436,grad_norm: 0.999999216186593, iteration: 199375
loss: 0.979365348815918,grad_norm: 0.9999989786430795, iteration: 199376
loss: 1.0043399333953857,grad_norm: 0.85085031406575, iteration: 199377
loss: 0.9860245585441589,grad_norm: 0.8796445020095999, iteration: 199378
loss: 1.010420322418213,grad_norm: 0.99999893710164, iteration: 199379
loss: 1.0159950256347656,grad_norm: 0.9189417627112537, iteration: 199380
loss: 1.039157748222351,grad_norm: 0.9999989932696218, iteration: 199381
loss: 0.9705096483230591,grad_norm: 0.9999991584341669, iteration: 199382
loss: 0.9774896502494812,grad_norm: 0.9061886807800962, iteration: 199383
loss: 0.9926154017448425,grad_norm: 0.9198769634930256, iteration: 199384
loss: 1.0295538902282715,grad_norm: 0.7809410512633311, iteration: 199385
loss: 1.0002142190933228,grad_norm: 0.8112468111716613, iteration: 199386
loss: 1.0082303285598755,grad_norm: 0.9999996807445188, iteration: 199387
loss: 0.9661821126937866,grad_norm: 0.9999991207231558, iteration: 199388
loss: 0.995537281036377,grad_norm: 0.8913849104959201, iteration: 199389
loss: 1.0191174745559692,grad_norm: 0.9999993872157658, iteration: 199390
loss: 0.9525116682052612,grad_norm: 0.9999991861132654, iteration: 199391
loss: 1.0360742807388306,grad_norm: 0.811509875129415, iteration: 199392
loss: 0.9862919449806213,grad_norm: 0.838590019733831, iteration: 199393
loss: 0.9809311032295227,grad_norm: 0.8880055293675017, iteration: 199394
loss: 1.0094871520996094,grad_norm: 0.8857284941712633, iteration: 199395
loss: 1.0081905126571655,grad_norm: 0.9163593947581038, iteration: 199396
loss: 0.9971643090248108,grad_norm: 0.9999999209282787, iteration: 199397
loss: 1.0032987594604492,grad_norm: 0.841494435544336, iteration: 199398
loss: 0.9564505815505981,grad_norm: 0.8658070596721736, iteration: 199399
loss: 0.9566022157669067,grad_norm: 0.9335613433442796, iteration: 199400
loss: 1.018786907196045,grad_norm: 0.9999992000215259, iteration: 199401
loss: 1.0269299745559692,grad_norm: 0.8297476491010394, iteration: 199402
loss: 0.992009162902832,grad_norm: 0.8306158464535052, iteration: 199403
loss: 1.016975998878479,grad_norm: 0.9999997490176894, iteration: 199404
loss: 0.9969137907028198,grad_norm: 0.8048168806345463, iteration: 199405
loss: 1.0406527519226074,grad_norm: 0.9999991786384991, iteration: 199406
loss: 1.0093132257461548,grad_norm: 0.9738272128900141, iteration: 199407
loss: 0.9710513949394226,grad_norm: 0.9611773202131929, iteration: 199408
loss: 1.0083662271499634,grad_norm: 0.9159272999765365, iteration: 199409
loss: 1.0365030765533447,grad_norm: 0.8760821817650861, iteration: 199410
loss: 1.0985218286514282,grad_norm: 0.9999993267264368, iteration: 199411
loss: 1.0100127458572388,grad_norm: 0.8285461406141913, iteration: 199412
loss: 1.0233757495880127,grad_norm: 0.9783306660365684, iteration: 199413
loss: 1.024715781211853,grad_norm: 0.9999991120714231, iteration: 199414
loss: 1.0083223581314087,grad_norm: 0.8026340791287254, iteration: 199415
loss: 0.9840324521064758,grad_norm: 0.9999992054695606, iteration: 199416
loss: 1.022767424583435,grad_norm: 0.999999141548982, iteration: 199417
loss: 0.9962600469589233,grad_norm: 0.9498782656910507, iteration: 199418
loss: 1.0034934282302856,grad_norm: 0.8611889802027525, iteration: 199419
loss: 1.0503336191177368,grad_norm: 0.999999096784027, iteration: 199420
loss: 1.0204702615737915,grad_norm: 0.9999992484026747, iteration: 199421
loss: 1.0315760374069214,grad_norm: 0.9999999382034499, iteration: 199422
loss: 0.9737419486045837,grad_norm: 0.9323589142362165, iteration: 199423
loss: 1.0422612428665161,grad_norm: 0.9939803815505854, iteration: 199424
loss: 1.044011116027832,grad_norm: 0.9999995750753389, iteration: 199425
loss: 1.0103622674942017,grad_norm: 0.9999990433495946, iteration: 199426
loss: 0.9982897639274597,grad_norm: 0.999999483881955, iteration: 199427
loss: 0.9894282817840576,grad_norm: 0.78350517524758, iteration: 199428
loss: 1.0190995931625366,grad_norm: 0.8609968734643844, iteration: 199429
loss: 1.0238043069839478,grad_norm: 0.904522692261456, iteration: 199430
loss: 0.9959253668785095,grad_norm: 0.8873342753901224, iteration: 199431
loss: 1.0191071033477783,grad_norm: 0.9999994536129672, iteration: 199432
loss: 1.0262593030929565,grad_norm: 0.9999991678878578, iteration: 199433
loss: 1.0089070796966553,grad_norm: 0.9999990033816938, iteration: 199434
loss: 1.0299797058105469,grad_norm: 0.9963996492481416, iteration: 199435
loss: 0.9570322632789612,grad_norm: 0.9999991693383536, iteration: 199436
loss: 1.0765249729156494,grad_norm: 0.9999990499647794, iteration: 199437
loss: 0.9873476028442383,grad_norm: 0.8123493293131986, iteration: 199438
loss: 0.9806452989578247,grad_norm: 0.9495778507238125, iteration: 199439
loss: 0.9892886877059937,grad_norm: 0.9999991147942424, iteration: 199440
loss: 0.9681932926177979,grad_norm: 0.7899254030827726, iteration: 199441
loss: 1.0442593097686768,grad_norm: 0.9292215148970908, iteration: 199442
loss: 0.9849005937576294,grad_norm: 0.9999990546371215, iteration: 199443
loss: 0.9891515374183655,grad_norm: 0.9999991618772109, iteration: 199444
loss: 1.0216344594955444,grad_norm: 0.914774235051599, iteration: 199445
loss: 1.0378706455230713,grad_norm: 0.9583933304659455, iteration: 199446
loss: 0.9937122464179993,grad_norm: 0.9492359496512577, iteration: 199447
loss: 1.0209276676177979,grad_norm: 0.9963780475595756, iteration: 199448
loss: 1.028778314590454,grad_norm: 0.87959462891607, iteration: 199449
loss: 1.0419925451278687,grad_norm: 0.9761974567674989, iteration: 199450
loss: 1.0332000255584717,grad_norm: 0.9999995955410613, iteration: 199451
loss: 0.989589273929596,grad_norm: 0.949343886397164, iteration: 199452
loss: 1.008696436882019,grad_norm: 0.9019928894169531, iteration: 199453
loss: 0.9967589378356934,grad_norm: 0.9361938904435586, iteration: 199454
loss: 1.000770926475525,grad_norm: 0.868003102283094, iteration: 199455
loss: 1.004421591758728,grad_norm: 0.8845523480098916, iteration: 199456
loss: 0.9789286851882935,grad_norm: 0.9999992273029555, iteration: 199457
loss: 1.008675217628479,grad_norm: 0.9999993926160656, iteration: 199458
loss: 0.9794038534164429,grad_norm: 0.9999994387656412, iteration: 199459
loss: 1.0108124017715454,grad_norm: 0.9353622085891388, iteration: 199460
loss: 1.0095415115356445,grad_norm: 0.9999992118715461, iteration: 199461
loss: 1.0340766906738281,grad_norm: 0.9111076577488199, iteration: 199462
loss: 1.010903239250183,grad_norm: 0.9999992525867631, iteration: 199463
loss: 0.9946058392524719,grad_norm: 0.9082009290300859, iteration: 199464
loss: 1.004594326019287,grad_norm: 0.7704960733474735, iteration: 199465
loss: 1.0707560777664185,grad_norm: 0.9999991288042102, iteration: 199466
loss: 0.9982436895370483,grad_norm: 0.8042947368522384, iteration: 199467
loss: 0.9876247048377991,grad_norm: 0.9514107936648547, iteration: 199468
loss: 0.9921842813491821,grad_norm: 0.9918263086970182, iteration: 199469
loss: 0.989205539226532,grad_norm: 0.9999990889366492, iteration: 199470
loss: 1.0001744031906128,grad_norm: 0.9999990830175353, iteration: 199471
loss: 1.023913025856018,grad_norm: 0.9999990906971675, iteration: 199472
loss: 0.9928863644599915,grad_norm: 0.9927584106232068, iteration: 199473
loss: 0.9735100865364075,grad_norm: 0.9999996689704217, iteration: 199474
loss: 1.0323352813720703,grad_norm: 0.9999993967408043, iteration: 199475
loss: 1.00491464138031,grad_norm: 0.7187111588192379, iteration: 199476
loss: 0.985741913318634,grad_norm: 0.9999991221502289, iteration: 199477
loss: 0.9750745892524719,grad_norm: 0.9909714225948548, iteration: 199478
loss: 0.9845319986343384,grad_norm: 0.8845951251818813, iteration: 199479
loss: 1.0120915174484253,grad_norm: 0.9536904256929646, iteration: 199480
loss: 1.0026613473892212,grad_norm: 0.8213622450002432, iteration: 199481
loss: 1.0270237922668457,grad_norm: 0.9835179287647113, iteration: 199482
loss: 1.037951111793518,grad_norm: 0.9999991897898, iteration: 199483
loss: 0.9848838448524475,grad_norm: 0.9999991386166458, iteration: 199484
loss: 0.998930037021637,grad_norm: 0.9999991437269263, iteration: 199485
loss: 1.0259346961975098,grad_norm: 0.9852455133905561, iteration: 199486
loss: 1.004185438156128,grad_norm: 0.9999991720748426, iteration: 199487
loss: 1.0176624059677124,grad_norm: 0.9999991101521456, iteration: 199488
loss: 1.0205843448638916,grad_norm: 0.9999991655023746, iteration: 199489
loss: 0.9955552816390991,grad_norm: 0.9999991191907365, iteration: 199490
loss: 1.0227340459823608,grad_norm: 0.999999045412336, iteration: 199491
loss: 1.008870244026184,grad_norm: 0.9999991084302193, iteration: 199492
loss: 0.9706253409385681,grad_norm: 0.9054414011744574, iteration: 199493
loss: 1.0108225345611572,grad_norm: 0.9363927747112849, iteration: 199494
loss: 1.02322518825531,grad_norm: 0.9999990556576541, iteration: 199495
loss: 1.0447458028793335,grad_norm: 0.999999044205891, iteration: 199496
loss: 1.0200790166854858,grad_norm: 0.9999994047031328, iteration: 199497
loss: 1.0034888982772827,grad_norm: 0.999998968561803, iteration: 199498
loss: 1.0325837135314941,grad_norm: 0.9999991708994829, iteration: 199499
loss: 1.0114531517028809,grad_norm: 0.965343012397121, iteration: 199500
loss: 0.9527903199195862,grad_norm: 0.8690479324558236, iteration: 199501
loss: 0.9858844876289368,grad_norm: 0.7743143953882429, iteration: 199502
loss: 0.9919726848602295,grad_norm: 0.906226680344806, iteration: 199503
loss: 0.954277753829956,grad_norm: 0.9837383083033346, iteration: 199504
loss: 0.998524010181427,grad_norm: 0.9033994215995957, iteration: 199505
loss: 1.0149519443511963,grad_norm: 0.7969344880219787, iteration: 199506
loss: 0.9707897901535034,grad_norm: 0.7879032487740316, iteration: 199507
loss: 1.047513484954834,grad_norm: 0.9999991390838775, iteration: 199508
loss: 0.9994821548461914,grad_norm: 0.9999991869090129, iteration: 199509
loss: 1.0257011651992798,grad_norm: 0.9999989355995799, iteration: 199510
loss: 0.9698967933654785,grad_norm: 0.8781610945810862, iteration: 199511
loss: 0.9828386902809143,grad_norm: 0.941653586542856, iteration: 199512
loss: 0.9978030323982239,grad_norm: 0.8550309184393956, iteration: 199513
loss: 1.0238127708435059,grad_norm: 0.9567043359765707, iteration: 199514
loss: 0.9849216938018799,grad_norm: 0.9424720170004257, iteration: 199515
loss: 1.0011050701141357,grad_norm: 0.9498707646123952, iteration: 199516
loss: 0.9999532699584961,grad_norm: 0.9999991105939701, iteration: 199517
loss: 1.0024347305297852,grad_norm: 0.9999989166185069, iteration: 199518
loss: 1.002485990524292,grad_norm: 0.9999990760711659, iteration: 199519
loss: 1.017284631729126,grad_norm: 0.9999991813410863, iteration: 199520
loss: 0.9926562905311584,grad_norm: 0.8399165549560321, iteration: 199521
loss: 0.9998670220375061,grad_norm: 0.9583195767754189, iteration: 199522
loss: 1.0142568349838257,grad_norm: 0.9999990537125335, iteration: 199523
loss: 1.0235415697097778,grad_norm: 0.9909557451236539, iteration: 199524
loss: 1.0098457336425781,grad_norm: 0.9385794691888176, iteration: 199525
loss: 1.0132668018341064,grad_norm: 0.8070525027909824, iteration: 199526
loss: 0.9942387342453003,grad_norm: 0.9256872009203386, iteration: 199527
loss: 1.0581940412521362,grad_norm: 0.999999180789731, iteration: 199528
loss: 1.0325127840042114,grad_norm: 0.9073937526252353, iteration: 199529
loss: 1.0080393552780151,grad_norm: 0.9999992620674908, iteration: 199530
loss: 0.9607475996017456,grad_norm: 0.8741536877052469, iteration: 199531
loss: 0.9747576117515564,grad_norm: 0.9433535179887909, iteration: 199532
loss: 1.0329054594039917,grad_norm: 0.9681598462032968, iteration: 199533
loss: 0.9787520170211792,grad_norm: 0.8709713522196584, iteration: 199534
loss: 0.9641878008842468,grad_norm: 0.9601960730689029, iteration: 199535
loss: 0.9960671663284302,grad_norm: 0.8025274468824127, iteration: 199536
loss: 0.997528076171875,grad_norm: 0.8881165906105029, iteration: 199537
loss: 1.0047956705093384,grad_norm: 0.8908012201398742, iteration: 199538
loss: 1.0102344751358032,grad_norm: 0.7457426451712623, iteration: 199539
loss: 0.9840627312660217,grad_norm: 0.8649185354682776, iteration: 199540
loss: 0.9959056377410889,grad_norm: 0.8324356293752527, iteration: 199541
loss: 1.0118716955184937,grad_norm: 0.9999991433413392, iteration: 199542
loss: 1.0016709566116333,grad_norm: 0.9056659768038368, iteration: 199543
loss: 0.9641379714012146,grad_norm: 0.9769458889545842, iteration: 199544
loss: 1.0057977437973022,grad_norm: 0.965901847267811, iteration: 199545
loss: 0.9992110729217529,grad_norm: 0.9999990116578014, iteration: 199546
loss: 0.9896997809410095,grad_norm: 0.999999166707305, iteration: 199547
loss: 0.9883631467819214,grad_norm: 0.9864603747591454, iteration: 199548
loss: 1.0211362838745117,grad_norm: 0.9305627921413835, iteration: 199549
loss: 1.0053677558898926,grad_norm: 0.8444254959544631, iteration: 199550
loss: 0.9518135190010071,grad_norm: 0.873487964360936, iteration: 199551
loss: 0.9709619283676147,grad_norm: 0.9999991827093961, iteration: 199552
loss: 1.0188074111938477,grad_norm: 0.9999991063624394, iteration: 199553
loss: 1.022716999053955,grad_norm: 0.9541558579218448, iteration: 199554
loss: 1.0020664930343628,grad_norm: 0.9999992522292427, iteration: 199555
loss: 1.0124948024749756,grad_norm: 0.9453600895298914, iteration: 199556
loss: 0.9867435097694397,grad_norm: 0.9217437036224436, iteration: 199557
loss: 1.006536602973938,grad_norm: 0.9796468058606599, iteration: 199558
loss: 1.0251237154006958,grad_norm: 0.9509670240988853, iteration: 199559
loss: 1.0741851329803467,grad_norm: 0.999999315291531, iteration: 199560
loss: 0.9994692802429199,grad_norm: 0.9999993745953951, iteration: 199561
loss: 0.9622952342033386,grad_norm: 0.8550952347324291, iteration: 199562
loss: 1.0327184200286865,grad_norm: 0.9999991638045275, iteration: 199563
loss: 1.0409451723098755,grad_norm: 0.9999997508671484, iteration: 199564
loss: 0.9984567761421204,grad_norm: 0.8257198086739422, iteration: 199565
loss: 1.0082557201385498,grad_norm: 0.979711191752122, iteration: 199566
loss: 0.9815425872802734,grad_norm: 0.9276965902567148, iteration: 199567
loss: 0.9801956415176392,grad_norm: 0.8958386429692392, iteration: 199568
loss: 1.0538994073867798,grad_norm: 0.8993428705125162, iteration: 199569
loss: 1.010284662246704,grad_norm: 0.9999990762352885, iteration: 199570
loss: 1.028847098350525,grad_norm: 0.862720435733234, iteration: 199571
loss: 1.008039951324463,grad_norm: 0.9162267362452334, iteration: 199572
loss: 1.0315238237380981,grad_norm: 0.999999572920861, iteration: 199573
loss: 0.9801437854766846,grad_norm: 0.9738961273982409, iteration: 199574
loss: 1.0264499187469482,grad_norm: 0.9999990363967863, iteration: 199575
loss: 0.9770706295967102,grad_norm: 0.8756107829404746, iteration: 199576
loss: 1.0049303770065308,grad_norm: 0.7915933177120519, iteration: 199577
loss: 1.0367467403411865,grad_norm: 0.8521110957325146, iteration: 199578
loss: 1.106878399848938,grad_norm: 0.9999993624783744, iteration: 199579
loss: 1.0266094207763672,grad_norm: 0.8911607011936375, iteration: 199580
loss: 0.9957404136657715,grad_norm: 0.9962847010821018, iteration: 199581
loss: 0.9666134119033813,grad_norm: 0.962485953066028, iteration: 199582
loss: 0.9866805672645569,grad_norm: 0.9999990103221978, iteration: 199583
loss: 1.0004266500473022,grad_norm: 0.8476079879682473, iteration: 199584
loss: 1.0073599815368652,grad_norm: 0.9999997172614452, iteration: 199585
loss: 0.9702556133270264,grad_norm: 0.8530845828545679, iteration: 199586
loss: 1.0350779294967651,grad_norm: 0.9359449588065261, iteration: 199587
loss: 1.0108917951583862,grad_norm: 0.9999997369340592, iteration: 199588
loss: 1.0077413320541382,grad_norm: 0.9104503675138574, iteration: 199589
loss: 1.0061442852020264,grad_norm: 0.8864768768889167, iteration: 199590
loss: 1.0493041276931763,grad_norm: 0.9999991575082764, iteration: 199591
loss: 0.9900712370872498,grad_norm: 0.8281446497849987, iteration: 199592
loss: 1.0249810218811035,grad_norm: 0.9593430310401101, iteration: 199593
loss: 1.0153449773788452,grad_norm: 0.9999990858402115, iteration: 199594
loss: 0.9970016479492188,grad_norm: 0.8920392071598674, iteration: 199595
loss: 1.0096778869628906,grad_norm: 0.9267914385901925, iteration: 199596
loss: 1.0172388553619385,grad_norm: 0.8168252512310965, iteration: 199597
loss: 1.0483033657073975,grad_norm: 0.9999991706994195, iteration: 199598
loss: 0.9406262040138245,grad_norm: 0.9556081942291729, iteration: 199599
loss: 1.0089843273162842,grad_norm: 0.9188050111524886, iteration: 199600
loss: 1.0089234113693237,grad_norm: 0.9999993698414653, iteration: 199601
loss: 1.024551510810852,grad_norm: 0.9094601310969115, iteration: 199602
loss: 0.9867729544639587,grad_norm: 0.9734656504790963, iteration: 199603
loss: 0.9910470247268677,grad_norm: 0.9999991174258926, iteration: 199604
loss: 1.0055389404296875,grad_norm: 0.9999991524779395, iteration: 199605
loss: 1.0266281366348267,grad_norm: 0.9999989347704991, iteration: 199606
loss: 1.0134024620056152,grad_norm: 0.8013699788333266, iteration: 199607
loss: 0.9893327951431274,grad_norm: 0.9007609016823852, iteration: 199608
loss: 1.0186102390289307,grad_norm: 0.8483007482155251, iteration: 199609
loss: 1.0104327201843262,grad_norm: 0.9999992258566293, iteration: 199610
loss: 0.9973230361938477,grad_norm: 0.9999990247763019, iteration: 199611
loss: 1.0209530591964722,grad_norm: 0.9999992488593085, iteration: 199612
loss: 0.9971623420715332,grad_norm: 0.9871738111634982, iteration: 199613
loss: 0.9740588068962097,grad_norm: 0.9999991551007847, iteration: 199614
loss: 0.9493972063064575,grad_norm: 0.9659709380534509, iteration: 199615
loss: 1.0102965831756592,grad_norm: 0.9661306766237177, iteration: 199616
loss: 0.9701808094978333,grad_norm: 0.9669658327820619, iteration: 199617
loss: 1.035475492477417,grad_norm: 0.999999146652924, iteration: 199618
loss: 0.9801644682884216,grad_norm: 0.9540385210573624, iteration: 199619
loss: 1.0371347665786743,grad_norm: 0.999999341065588, iteration: 199620
loss: 1.031794786453247,grad_norm: 0.8273905043550498, iteration: 199621
loss: 1.0063766241073608,grad_norm: 0.9733452874855517, iteration: 199622
loss: 1.0124216079711914,grad_norm: 0.9226857917097878, iteration: 199623
loss: 1.0267466306686401,grad_norm: 0.9999991895371114, iteration: 199624
loss: 0.9485271573066711,grad_norm: 0.9404320220077036, iteration: 199625
loss: 0.9982243776321411,grad_norm: 0.7752060665789012, iteration: 199626
loss: 0.9962221384048462,grad_norm: 0.9141789968641274, iteration: 199627
loss: 1.007189154624939,grad_norm: 0.9999992478405684, iteration: 199628
loss: 0.9774854779243469,grad_norm: 0.8191364392319124, iteration: 199629
loss: 0.98128342628479,grad_norm: 0.9316779478678031, iteration: 199630
loss: 0.9840164184570312,grad_norm: 0.8146171298771708, iteration: 199631
loss: 1.0179554224014282,grad_norm: 0.9710793411972279, iteration: 199632
loss: 0.9903296232223511,grad_norm: 0.8019521321062304, iteration: 199633
loss: 1.0403249263763428,grad_norm: 0.8852475786951046, iteration: 199634
loss: 1.0068501234054565,grad_norm: 0.999999120443699, iteration: 199635
loss: 0.9957112669944763,grad_norm: 0.9897127899385236, iteration: 199636
loss: 0.9624876976013184,grad_norm: 0.9999990302596188, iteration: 199637
loss: 0.9818091988563538,grad_norm: 0.893272614946673, iteration: 199638
loss: 1.0005059242248535,grad_norm: 0.9999991649287572, iteration: 199639
loss: 1.0097661018371582,grad_norm: 0.9999992199765667, iteration: 199640
loss: 1.009279727935791,grad_norm: 0.9431654842234615, iteration: 199641
loss: 1.0058449506759644,grad_norm: 0.9999989872523688, iteration: 199642
loss: 1.0006221532821655,grad_norm: 0.9999991255117897, iteration: 199643
loss: 1.0164474248886108,grad_norm: 0.8920921503729965, iteration: 199644
loss: 1.0186333656311035,grad_norm: 0.9088142243765751, iteration: 199645
loss: 0.9892721772193909,grad_norm: 0.9999990509753197, iteration: 199646
loss: 0.9879202842712402,grad_norm: 0.9999992196495544, iteration: 199647
loss: 0.9937425851821899,grad_norm: 0.9999993290598724, iteration: 199648
loss: 1.0163182020187378,grad_norm: 0.8908508564995943, iteration: 199649
loss: 0.9985917806625366,grad_norm: 0.8928807935240436, iteration: 199650
loss: 0.9909929633140564,grad_norm: 0.9999993341898715, iteration: 199651
loss: 1.0002015829086304,grad_norm: 0.9999990364541879, iteration: 199652
loss: 0.9843156337738037,grad_norm: 0.8545914141351965, iteration: 199653
loss: 1.019679307937622,grad_norm: 0.9925389839218709, iteration: 199654
loss: 1.0210621356964111,grad_norm: 0.9723824153863934, iteration: 199655
loss: 0.9894254207611084,grad_norm: 0.9360957735142686, iteration: 199656
loss: 0.9896349906921387,grad_norm: 0.9999990466511348, iteration: 199657
loss: 1.005484938621521,grad_norm: 0.9999990980624892, iteration: 199658
loss: 0.9847347736358643,grad_norm: 0.9478424337498235, iteration: 199659
loss: 0.9943631291389465,grad_norm: 0.9303227385968219, iteration: 199660
loss: 0.9808443188667297,grad_norm: 0.9150567736408542, iteration: 199661
loss: 1.0160170793533325,grad_norm: 0.999999133568973, iteration: 199662
loss: 0.9821426272392273,grad_norm: 0.9258662358595697, iteration: 199663
loss: 1.0006219148635864,grad_norm: 0.9999997006633793, iteration: 199664
loss: 0.9974551200866699,grad_norm: 0.9647151079893118, iteration: 199665
loss: 1.0235484838485718,grad_norm: 0.9999990728902327, iteration: 199666
loss: 1.013684868812561,grad_norm: 0.9256239127537722, iteration: 199667
loss: 0.9438806176185608,grad_norm: 0.999998988066526, iteration: 199668
loss: 0.9763427972793579,grad_norm: 0.8876184294875206, iteration: 199669
loss: 0.9677379727363586,grad_norm: 0.9673007088201716, iteration: 199670
loss: 1.017783284187317,grad_norm: 0.9999991905931213, iteration: 199671
loss: 1.0101863145828247,grad_norm: 0.9999991362029558, iteration: 199672
loss: 0.967911422252655,grad_norm: 0.8568888134623383, iteration: 199673
loss: 0.9720029234886169,grad_norm: 0.8324162151662501, iteration: 199674
loss: 1.0326968431472778,grad_norm: 0.8674222656781965, iteration: 199675
loss: 1.0071792602539062,grad_norm: 0.9999991936880005, iteration: 199676
loss: 1.0412944555282593,grad_norm: 0.9999992095416802, iteration: 199677
loss: 0.976373553276062,grad_norm: 0.8759998706603921, iteration: 199678
loss: 1.0438841581344604,grad_norm: 0.9999991775502783, iteration: 199679
loss: 0.9789142608642578,grad_norm: 0.8511611411118611, iteration: 199680
loss: 1.01532781124115,grad_norm: 0.949625105376433, iteration: 199681
loss: 0.9788874983787537,grad_norm: 0.7414174848362346, iteration: 199682
loss: 0.981902539730072,grad_norm: 0.9999991767112445, iteration: 199683
loss: 0.9931179285049438,grad_norm: 0.9871219225723658, iteration: 199684
loss: 0.9937459826469421,grad_norm: 0.9999992197555108, iteration: 199685
loss: 1.0831977128982544,grad_norm: 0.9999992968195488, iteration: 199686
loss: 1.0079666376113892,grad_norm: 0.999999010925764, iteration: 199687
loss: 1.0071793794631958,grad_norm: 0.9874589687560237, iteration: 199688
loss: 1.0159623622894287,grad_norm: 0.9999992251053003, iteration: 199689
loss: 1.0089730024337769,grad_norm: 0.9194544916282764, iteration: 199690
loss: 1.0407577753067017,grad_norm: 0.9999998348616714, iteration: 199691
loss: 0.998883068561554,grad_norm: 0.9999991734227635, iteration: 199692
loss: 1.0169274806976318,grad_norm: 0.8951980927827201, iteration: 199693
loss: 0.975135862827301,grad_norm: 0.987860513814229, iteration: 199694
loss: 1.0117607116699219,grad_norm: 0.8305226118244577, iteration: 199695
loss: 1.0441927909851074,grad_norm: 0.8194595810085626, iteration: 199696
loss: 1.035568356513977,grad_norm: 0.9093709986327131, iteration: 199697
loss: 1.0087080001831055,grad_norm: 0.8756880311310569, iteration: 199698
loss: 1.00407874584198,grad_norm: 0.9462899855047145, iteration: 199699
loss: 1.0121217966079712,grad_norm: 0.8483820769912088, iteration: 199700
loss: 1.0000500679016113,grad_norm: 0.9999990698086326, iteration: 199701
loss: 0.9675811529159546,grad_norm: 0.9999993035163102, iteration: 199702
loss: 1.0411683320999146,grad_norm: 0.9999992838355529, iteration: 199703
loss: 1.0139352083206177,grad_norm: 0.7613278972729373, iteration: 199704
loss: 0.9662954807281494,grad_norm: 0.9896126249484121, iteration: 199705
loss: 0.979719877243042,grad_norm: 0.999999225120741, iteration: 199706
loss: 0.9889495372772217,grad_norm: 0.9999995139390948, iteration: 199707
loss: 1.006672739982605,grad_norm: 0.999999037383582, iteration: 199708
loss: 0.9676731824874878,grad_norm: 0.8929813602629315, iteration: 199709
loss: 0.9992146492004395,grad_norm: 0.9999992173147114, iteration: 199710
loss: 1.026185393333435,grad_norm: 0.999999055018092, iteration: 199711
loss: 0.9654029607772827,grad_norm: 0.888371095899285, iteration: 199712
loss: 0.9459994435310364,grad_norm: 0.9999990092693427, iteration: 199713
loss: 1.0315611362457275,grad_norm: 0.9488800561605234, iteration: 199714
loss: 1.0261386632919312,grad_norm: 0.9298451998972923, iteration: 199715
loss: 1.0145752429962158,grad_norm: 0.9084193059271809, iteration: 199716
loss: 1.0302445888519287,grad_norm: 0.9090149722707467, iteration: 199717
loss: 1.0325231552124023,grad_norm: 0.9318647650626808, iteration: 199718
loss: 0.9908784627914429,grad_norm: 0.8151343646646252, iteration: 199719
loss: 1.0530245304107666,grad_norm: 0.9409708534443573, iteration: 199720
loss: 1.0055301189422607,grad_norm: 0.9999990717476089, iteration: 199721
loss: 1.008986473083496,grad_norm: 0.8956858903271967, iteration: 199722
loss: 1.014692783355713,grad_norm: 0.999998932792024, iteration: 199723
loss: 1.0021774768829346,grad_norm: 0.9274278426947421, iteration: 199724
loss: 1.0184489488601685,grad_norm: 0.917283169846686, iteration: 199725
loss: 1.00923490524292,grad_norm: 0.8840161176746296, iteration: 199726
loss: 0.9698078632354736,grad_norm: 0.9999990102457512, iteration: 199727
loss: 1.0184047222137451,grad_norm: 0.9999990286875722, iteration: 199728
loss: 0.9741657376289368,grad_norm: 0.9999991449206276, iteration: 199729
loss: 1.0203112363815308,grad_norm: 0.8886316778004428, iteration: 199730
loss: 0.9741243124008179,grad_norm: 0.9133567480912946, iteration: 199731
loss: 0.9830363988876343,grad_norm: 0.9408914742834515, iteration: 199732
loss: 1.0250357389450073,grad_norm: 0.9999989512734728, iteration: 199733
loss: 0.9759686589241028,grad_norm: 0.9999997886086829, iteration: 199734
loss: 1.0017303228378296,grad_norm: 0.9512061913384617, iteration: 199735
loss: 1.0336195230484009,grad_norm: 0.9999992318424531, iteration: 199736
loss: 1.0081617832183838,grad_norm: 0.9999998144891183, iteration: 199737
loss: 1.0315093994140625,grad_norm: 0.9999992017754263, iteration: 199738
loss: 1.017186164855957,grad_norm: 0.9730268372328005, iteration: 199739
loss: 0.9994718432426453,grad_norm: 0.9999989542456067, iteration: 199740
loss: 0.994804859161377,grad_norm: 0.8604247153376512, iteration: 199741
loss: 1.0178768634796143,grad_norm: 0.95096167040204, iteration: 199742
loss: 0.9846644997596741,grad_norm: 0.982921734050962, iteration: 199743
loss: 1.009609341621399,grad_norm: 0.9999991847779209, iteration: 199744
loss: 0.9763634204864502,grad_norm: 0.887580226151123, iteration: 199745
loss: 0.959674060344696,grad_norm: 0.8850422477247717, iteration: 199746
loss: 1.0218793153762817,grad_norm: 0.9681842559837199, iteration: 199747
loss: 0.9884535670280457,grad_norm: 0.9999992025542531, iteration: 199748
loss: 1.0195821523666382,grad_norm: 0.9236632838954447, iteration: 199749
loss: 1.0388450622558594,grad_norm: 0.9999992255203263, iteration: 199750
loss: 0.986287534236908,grad_norm: 0.9381983398050242, iteration: 199751
loss: 1.0040425062179565,grad_norm: 0.8494981461529979, iteration: 199752
loss: 0.9437211751937866,grad_norm: 0.9999990021691497, iteration: 199753
loss: 1.0174508094787598,grad_norm: 0.8690489185053845, iteration: 199754
loss: 1.0763866901397705,grad_norm: 0.9999991022374933, iteration: 199755
loss: 1.041867733001709,grad_norm: 0.951352699919418, iteration: 199756
loss: 1.0254194736480713,grad_norm: 0.961762694421828, iteration: 199757
loss: 0.9674978256225586,grad_norm: 0.8806533129041307, iteration: 199758
loss: 0.9369286894798279,grad_norm: 0.9999991416899958, iteration: 199759
loss: 0.972253143787384,grad_norm: 0.9999992927923377, iteration: 199760
loss: 1.0043704509735107,grad_norm: 0.9525143442040305, iteration: 199761
loss: 0.9786942005157471,grad_norm: 0.9559587347057261, iteration: 199762
loss: 0.9892737865447998,grad_norm: 0.9999990249094053, iteration: 199763
loss: 0.9725908041000366,grad_norm: 0.8209297136901015, iteration: 199764
loss: 1.0170625448226929,grad_norm: 0.9680137610358434, iteration: 199765
loss: 1.0023332834243774,grad_norm: 0.8546711497835372, iteration: 199766
loss: 0.9712991118431091,grad_norm: 0.9999992122016107, iteration: 199767
loss: 1.0347939729690552,grad_norm: 0.9999991303360583, iteration: 199768
loss: 0.9740849733352661,grad_norm: 0.9717655863646749, iteration: 199769
loss: 1.021574854850769,grad_norm: 0.9132539293801004, iteration: 199770
loss: 1.0125917196273804,grad_norm: 0.8412514414264197, iteration: 199771
loss: 0.9984803795814514,grad_norm: 0.9999990989408352, iteration: 199772
loss: 1.0271886587142944,grad_norm: 0.9999992484787101, iteration: 199773
loss: 0.9887365102767944,grad_norm: 0.9999990003306601, iteration: 199774
loss: 1.0007009506225586,grad_norm: 0.8696244647374656, iteration: 199775
loss: 0.9700055718421936,grad_norm: 0.9999991778792343, iteration: 199776
loss: 1.0120731592178345,grad_norm: 0.8450164750134109, iteration: 199777
loss: 0.9815822839736938,grad_norm: 0.8065549040079226, iteration: 199778
loss: 0.9829665422439575,grad_norm: 0.8227792935320474, iteration: 199779
loss: 1.002058982849121,grad_norm: 0.9741388747004186, iteration: 199780
loss: 0.9679538011550903,grad_norm: 0.9999990773804228, iteration: 199781
loss: 0.992810845375061,grad_norm: 0.9999992509762144, iteration: 199782
loss: 1.0069979429244995,grad_norm: 0.869927559221709, iteration: 199783
loss: 1.0205211639404297,grad_norm: 0.9438227567130538, iteration: 199784
loss: 1.0005005598068237,grad_norm: 0.8460158202621983, iteration: 199785
loss: 1.0157222747802734,grad_norm: 0.8939419120376357, iteration: 199786
loss: 0.9953375458717346,grad_norm: 0.9999991138122093, iteration: 199787
loss: 1.0255688428878784,grad_norm: 0.9999990495861406, iteration: 199788
loss: 0.9722545742988586,grad_norm: 0.9999992088387695, iteration: 199789
loss: 0.9524268507957458,grad_norm: 0.8719556480707125, iteration: 199790
loss: 1.081214427947998,grad_norm: 0.95128659106565, iteration: 199791
loss: 1.0083343982696533,grad_norm: 0.9999990743909483, iteration: 199792
loss: 0.9907735586166382,grad_norm: 0.7416015641612352, iteration: 199793
loss: 1.0016909837722778,grad_norm: 0.9999995430838823, iteration: 199794
loss: 1.0368340015411377,grad_norm: 0.9484563232721701, iteration: 199795
loss: 1.0005311965942383,grad_norm: 0.9999990182324515, iteration: 199796
loss: 0.9547112584114075,grad_norm: 0.9343418799073852, iteration: 199797
loss: 0.9607184529304504,grad_norm: 0.8148216492613604, iteration: 199798
loss: 0.9885905385017395,grad_norm: 0.9960911894137091, iteration: 199799
loss: 1.0398120880126953,grad_norm: 0.9415541789203344, iteration: 199800
loss: 1.0334951877593994,grad_norm: 0.999999135712977, iteration: 199801
loss: 0.9981703162193298,grad_norm: 0.9675278731593784, iteration: 199802
loss: 0.9994806051254272,grad_norm: 0.9071606672110024, iteration: 199803
loss: 1.0128419399261475,grad_norm: 0.9999993258360873, iteration: 199804
loss: 0.9999721050262451,grad_norm: 0.999999132079397, iteration: 199805
loss: 0.9797985553741455,grad_norm: 0.9999990112492616, iteration: 199806
loss: 1.0107805728912354,grad_norm: 0.9256986080979731, iteration: 199807
loss: 1.0097354650497437,grad_norm: 0.9235403308672444, iteration: 199808
loss: 1.0069719552993774,grad_norm: 0.9999992274516433, iteration: 199809
loss: 0.9904094934463501,grad_norm: 0.9999991185938061, iteration: 199810
loss: 0.9950950741767883,grad_norm: 0.9528443778997688, iteration: 199811
loss: 1.0053682327270508,grad_norm: 0.9178804706242948, iteration: 199812
loss: 0.9946826696395874,grad_norm: 0.9073017251254801, iteration: 199813
loss: 1.0186841487884521,grad_norm: 0.9130090737469255, iteration: 199814
loss: 0.9842750430107117,grad_norm: 0.9151035066333327, iteration: 199815
loss: 0.950284481048584,grad_norm: 0.9595090508439268, iteration: 199816
loss: 1.0358290672302246,grad_norm: 0.999999130821025, iteration: 199817
loss: 1.0182619094848633,grad_norm: 0.9999990704232953, iteration: 199818
loss: 0.997965931892395,grad_norm: 0.9839883176701252, iteration: 199819
loss: 0.9810425639152527,grad_norm: 0.8977389055833466, iteration: 199820
loss: 0.9847731590270996,grad_norm: 0.9853578815573347, iteration: 199821
loss: 0.9478410482406616,grad_norm: 0.9351303695451337, iteration: 199822
loss: 1.0041016340255737,grad_norm: 0.8823636889375508, iteration: 199823
loss: 1.0328125953674316,grad_norm: 0.999999057726417, iteration: 199824
loss: 0.9981341361999512,grad_norm: 0.8466236146842269, iteration: 199825
loss: 1.0274239778518677,grad_norm: 0.9468284571453441, iteration: 199826
loss: 1.0143784284591675,grad_norm: 0.9999992824218477, iteration: 199827
loss: 1.025572657585144,grad_norm: 0.9999996875003764, iteration: 199828
loss: 1.00492525100708,grad_norm: 0.9999991253089324, iteration: 199829
loss: 1.0708390474319458,grad_norm: 0.9199294825434772, iteration: 199830
loss: 0.9441115856170654,grad_norm: 0.9999990992126376, iteration: 199831
loss: 0.9802249073982239,grad_norm: 0.9736691442778236, iteration: 199832
loss: 0.9630711078643799,grad_norm: 0.9999990278312825, iteration: 199833
loss: 1.0742995738983154,grad_norm: 0.9999996121106397, iteration: 199834
loss: 0.9451870918273926,grad_norm: 0.8612871877575625, iteration: 199835
loss: 0.9946892261505127,grad_norm: 0.9999993336930095, iteration: 199836
loss: 1.000228762626648,grad_norm: 0.9999990827244511, iteration: 199837
loss: 1.0405986309051514,grad_norm: 0.999999036654523, iteration: 199838
loss: 1.01016366481781,grad_norm: 0.8991154423699591, iteration: 199839
loss: 0.9859035015106201,grad_norm: 0.7597256765814604, iteration: 199840
loss: 0.9887988567352295,grad_norm: 0.9999990532573116, iteration: 199841
loss: 0.9958882927894592,grad_norm: 0.9999998432543795, iteration: 199842
loss: 1.0181148052215576,grad_norm: 0.9796549508691514, iteration: 199843
loss: 1.0099033117294312,grad_norm: 0.8454199969589201, iteration: 199844
loss: 0.9742431640625,grad_norm: 0.9999990790325249, iteration: 199845
loss: 0.99366295337677,grad_norm: 0.9235539079769022, iteration: 199846
loss: 0.9618909955024719,grad_norm: 0.9604720326595784, iteration: 199847
loss: 1.0332787036895752,grad_norm: 0.859408962549869, iteration: 199848
loss: 0.9622252583503723,grad_norm: 0.9999991975414511, iteration: 199849
loss: 1.0726231336593628,grad_norm: 0.9156243220940514, iteration: 199850
loss: 0.9812614917755127,grad_norm: 0.9999991011805618, iteration: 199851
loss: 0.9966909289360046,grad_norm: 0.9388384116686037, iteration: 199852
loss: 0.9780961871147156,grad_norm: 0.8903935433544684, iteration: 199853
loss: 1.0077234506607056,grad_norm: 0.9999995374759104, iteration: 199854
loss: 0.9831132292747498,grad_norm: 0.9999992655506678, iteration: 199855
loss: 1.0199296474456787,grad_norm: 0.9999991850131504, iteration: 199856
loss: 1.0130641460418701,grad_norm: 0.9264073504095343, iteration: 199857
loss: 1.0013539791107178,grad_norm: 0.9999991301821283, iteration: 199858
loss: 1.0267374515533447,grad_norm: 0.811829518813872, iteration: 199859
loss: 1.0287504196166992,grad_norm: 0.8362291848354133, iteration: 199860
loss: 0.9845613837242126,grad_norm: 0.9997611527360307, iteration: 199861
loss: 1.0938431024551392,grad_norm: 0.9999992410212254, iteration: 199862
loss: 1.0008820295333862,grad_norm: 0.9999999216799733, iteration: 199863
loss: 0.9913493990898132,grad_norm: 0.8702899430705269, iteration: 199864
loss: 1.008854627609253,grad_norm: 0.9999990728638223, iteration: 199865
loss: 1.010886311531067,grad_norm: 0.9999991727223317, iteration: 199866
loss: 1.0180449485778809,grad_norm: 0.9628004270550309, iteration: 199867
loss: 1.0273452997207642,grad_norm: 0.979683197791348, iteration: 199868
loss: 0.9971825480461121,grad_norm: 0.8969202622929272, iteration: 199869
loss: 1.0158578157424927,grad_norm: 0.9251041886199347, iteration: 199870
loss: 0.9914119839668274,grad_norm: 0.9610302783611313, iteration: 199871
loss: 1.0015959739685059,grad_norm: 0.9647000484442458, iteration: 199872
loss: 1.027393102645874,grad_norm: 0.8345414415172904, iteration: 199873
loss: 1.0601962804794312,grad_norm: 0.9999992665129298, iteration: 199874
loss: 1.023768663406372,grad_norm: 0.9999991013424517, iteration: 199875
loss: 0.967657208442688,grad_norm: 0.8887938312020651, iteration: 199876
loss: 0.9742778539657593,grad_norm: 0.9507398514512181, iteration: 199877
loss: 0.9971899390220642,grad_norm: 0.990718791860185, iteration: 199878
loss: 0.9994142651557922,grad_norm: 0.9553606063337771, iteration: 199879
loss: 1.0005555152893066,grad_norm: 0.8746358784967899, iteration: 199880
loss: 1.015201210975647,grad_norm: 0.9999989998736664, iteration: 199881
loss: 0.97123122215271,grad_norm: 0.9198062289523481, iteration: 199882
loss: 0.990060031414032,grad_norm: 0.9999991347342496, iteration: 199883
loss: 1.017537236213684,grad_norm: 0.9704390063347235, iteration: 199884
loss: 1.0563554763793945,grad_norm: 0.9999992218944307, iteration: 199885
loss: 0.9924848079681396,grad_norm: 0.9924573755122301, iteration: 199886
loss: 0.9815822839736938,grad_norm: 0.9808227791244295, iteration: 199887
loss: 1.0169845819473267,grad_norm: 0.9753727715504424, iteration: 199888
loss: 1.0056108236312866,grad_norm: 0.8268015995042137, iteration: 199889
loss: 1.0119330883026123,grad_norm: 0.9396972041780881, iteration: 199890
loss: 1.043969988822937,grad_norm: 0.999999079156825, iteration: 199891
loss: 0.9702010154724121,grad_norm: 0.8867228512878996, iteration: 199892
loss: 0.9509022831916809,grad_norm: 0.9999989763742057, iteration: 199893
loss: 0.9900845289230347,grad_norm: 0.9999990825091987, iteration: 199894
loss: 0.9757874011993408,grad_norm: 0.999999177547139, iteration: 199895
loss: 0.9819538593292236,grad_norm: 0.931631103924434, iteration: 199896
loss: 1.0443083047866821,grad_norm: 0.9999991644875528, iteration: 199897
loss: 1.023384690284729,grad_norm: 0.9549187209263674, iteration: 199898
loss: 1.0054038763046265,grad_norm: 0.9999992392233297, iteration: 199899
loss: 1.0383507013320923,grad_norm: 0.9999990265763156, iteration: 199900
loss: 1.0297099351882935,grad_norm: 0.9999991259472354, iteration: 199901
loss: 1.0135161876678467,grad_norm: 0.9501837249262922, iteration: 199902
loss: 1.0201820135116577,grad_norm: 0.9999990591462953, iteration: 199903
loss: 0.9759727120399475,grad_norm: 0.9389457859030904, iteration: 199904
loss: 1.026815414428711,grad_norm: 0.9999991180431604, iteration: 199905
loss: 1.0412623882293701,grad_norm: 0.99999914600669, iteration: 199906
loss: 0.9795949459075928,grad_norm: 0.8398547241276578, iteration: 199907
loss: 0.9647851586341858,grad_norm: 0.9999991395062204, iteration: 199908
loss: 1.0701643228530884,grad_norm: 0.999999749343362, iteration: 199909
loss: 0.9980977773666382,grad_norm: 0.8796555663371898, iteration: 199910
loss: 1.04585862159729,grad_norm: 0.9386945830040015, iteration: 199911
loss: 1.006862998008728,grad_norm: 0.9760226717014127, iteration: 199912
loss: 1.0128530263900757,grad_norm: 0.9440442653108556, iteration: 199913
loss: 1.001172423362732,grad_norm: 0.9395362065502821, iteration: 199914
loss: 1.0182713270187378,grad_norm: 0.9385516402238797, iteration: 199915
loss: 1.014163851737976,grad_norm: 0.9999990535867932, iteration: 199916
loss: 1.0056309700012207,grad_norm: 0.8701458607144472, iteration: 199917
loss: 1.0129165649414062,grad_norm: 0.9999989495600007, iteration: 199918
loss: 1.0991970300674438,grad_norm: 0.9999992486308992, iteration: 199919
loss: 0.9991970658302307,grad_norm: 0.9152735443428454, iteration: 199920
loss: 0.9783236980438232,grad_norm: 0.8880394599646031, iteration: 199921
loss: 0.9785771369934082,grad_norm: 0.8392182350622888, iteration: 199922
loss: 0.9916802048683167,grad_norm: 0.9931966393903843, iteration: 199923
loss: 1.0203273296356201,grad_norm: 0.9177641721636595, iteration: 199924
loss: 0.9984247088432312,grad_norm: 0.8714630183211138, iteration: 199925
loss: 0.9558412432670593,grad_norm: 0.9999989627372434, iteration: 199926
loss: 0.9912123084068298,grad_norm: 0.9204576190107214, iteration: 199927
loss: 1.05311119556427,grad_norm: 0.9999998709487155, iteration: 199928
loss: 0.9602148532867432,grad_norm: 0.8854465346321116, iteration: 199929
loss: 1.0207387208938599,grad_norm: 0.8085592329461536, iteration: 199930
loss: 1.0042730569839478,grad_norm: 0.9999991552441765, iteration: 199931
loss: 1.0121577978134155,grad_norm: 0.931971957115229, iteration: 199932
loss: 0.9993972778320312,grad_norm: 0.999999026260119, iteration: 199933
loss: 0.9870719313621521,grad_norm: 0.9267353018291536, iteration: 199934
loss: 0.9972853064537048,grad_norm: 0.9999992750599213, iteration: 199935
loss: 1.0181537866592407,grad_norm: 0.935338672011987, iteration: 199936
loss: 1.0069580078125,grad_norm: 0.9024618861503606, iteration: 199937
loss: 1.0185264348983765,grad_norm: 0.9858524685542601, iteration: 199938
loss: 1.017562747001648,grad_norm: 0.9999991502958258, iteration: 199939
loss: 1.0081303119659424,grad_norm: 0.9999991047295678, iteration: 199940
loss: 1.0092216730117798,grad_norm: 0.9999991865821757, iteration: 199941
loss: 1.0011212825775146,grad_norm: 0.9999989975802036, iteration: 199942
loss: 1.006575107574463,grad_norm: 0.9228519776158212, iteration: 199943
loss: 0.9713165163993835,grad_norm: 0.8490503937789651, iteration: 199944
loss: 1.018000602722168,grad_norm: 0.9897899067922855, iteration: 199945
loss: 0.9786296486854553,grad_norm: 0.7852681691415279, iteration: 199946
loss: 1.0339869260787964,grad_norm: 0.9999992046050079, iteration: 199947
loss: 1.001194715499878,grad_norm: 0.8273400537955715, iteration: 199948
loss: 1.012285828590393,grad_norm: 0.9999992290770477, iteration: 199949
loss: 0.9816840887069702,grad_norm: 0.9779419331059973, iteration: 199950
loss: 1.0182902812957764,grad_norm: 0.9686288813250269, iteration: 199951
loss: 1.007047414779663,grad_norm: 0.9999993936747145, iteration: 199952
loss: 0.9701110124588013,grad_norm: 0.9917192295198091, iteration: 199953
loss: 0.9843700528144836,grad_norm: 0.9303812747782225, iteration: 199954
loss: 0.9930534362792969,grad_norm: 0.898013155363469, iteration: 199955
loss: 1.0114935636520386,grad_norm: 0.8944558003549982, iteration: 199956
loss: 1.0032374858856201,grad_norm: 0.9688784697387851, iteration: 199957
loss: 1.0233434438705444,grad_norm: 0.9999991377972126, iteration: 199958
loss: 0.990675151348114,grad_norm: 0.9846771887326796, iteration: 199959
loss: 1.020176887512207,grad_norm: 0.8445918365052683, iteration: 199960
loss: 1.0131994485855103,grad_norm: 0.9999990559593661, iteration: 199961
loss: 0.9860901832580566,grad_norm: 0.9803164067124271, iteration: 199962
loss: 1.0256681442260742,grad_norm: 0.943104274638931, iteration: 199963
loss: 0.9740074872970581,grad_norm: 0.9235943622981148, iteration: 199964
loss: 0.9949221611022949,grad_norm: 0.9999990899802536, iteration: 199965
loss: 0.985320508480072,grad_norm: 0.9096738211123072, iteration: 199966
loss: 1.0039774179458618,grad_norm: 0.8886748442363144, iteration: 199967
loss: 0.9869064092636108,grad_norm: 0.9618651676770024, iteration: 199968
loss: 1.072561264038086,grad_norm: 0.999999153764874, iteration: 199969
loss: 0.9778932929039001,grad_norm: 0.8964767939774344, iteration: 199970
loss: 1.0263152122497559,grad_norm: 0.8352444738170142, iteration: 199971
loss: 0.99271559715271,grad_norm: 0.8894934565170318, iteration: 199972
loss: 1.063636302947998,grad_norm: 0.9999998067683065, iteration: 199973
loss: 0.9834965467453003,grad_norm: 0.9182908009921518, iteration: 199974
loss: 1.0117886066436768,grad_norm: 0.9999989563483601, iteration: 199975
loss: 1.0058666467666626,grad_norm: 0.7771789026643615, iteration: 199976
loss: 1.011714220046997,grad_norm: 0.9999990690246361, iteration: 199977
loss: 0.9862514138221741,grad_norm: 0.9999991151604883, iteration: 199978
loss: 1.0118303298950195,grad_norm: 0.9534675390417024, iteration: 199979
loss: 1.0022543668746948,grad_norm: 0.999999069312896, iteration: 199980
loss: 1.0082690715789795,grad_norm: 0.9117887845380823, iteration: 199981
loss: 1.0079935789108276,grad_norm: 0.9999992003832584, iteration: 199982
loss: 0.996776819229126,grad_norm: 0.9999990656246746, iteration: 199983
loss: 1.010259747505188,grad_norm: 0.8711983838220493, iteration: 199984
loss: 0.9970611333847046,grad_norm: 0.9999992436342374, iteration: 199985
loss: 0.9520248770713806,grad_norm: 0.8347776925091416, iteration: 199986
loss: 1.0037287473678589,grad_norm: 0.9449947873970812, iteration: 199987
loss: 0.9929128885269165,grad_norm: 0.8692341212334914, iteration: 199988
loss: 0.9923533797264099,grad_norm: 0.9541648315501935, iteration: 199989
loss: 0.997732937335968,grad_norm: 0.9891603868610632, iteration: 199990
loss: 1.0107344388961792,grad_norm: 0.9999990950870823, iteration: 199991
loss: 1.0220359563827515,grad_norm: 0.9999992093004784, iteration: 199992
loss: 1.0446250438690186,grad_norm: 0.9501865360191392, iteration: 199993
loss: 0.9952412247657776,grad_norm: 0.9999992067071022, iteration: 199994
loss: 0.9834545254707336,grad_norm: 0.8637274450562364, iteration: 199995
loss: 1.0169821977615356,grad_norm: 0.9999990685358928, iteration: 199996
loss: 1.0005179643630981,grad_norm: 0.9227483660984551, iteration: 199997
loss: 0.9953048229217529,grad_norm: 0.9999990672733008, iteration: 199998
loss: 0.9939897656440735,grad_norm: 0.9324986443204043, iteration: 199999
loss: 1.0153281688690186,grad_norm: 0.9999991411833821, iteration: 200000
Evaluating at step 200000
{'val': 0.9950829781591892, 'test': 2.8598075362440114}
loss: 0.9881678223609924,grad_norm: 0.8458632068929399, iteration: 200001
loss: 1.0158305168151855,grad_norm: 0.8120199125080962, iteration: 200002
loss: 1.0019992589950562,grad_norm: 0.8121961074909343, iteration: 200003
loss: 0.9686780571937561,grad_norm: 0.9048571581195856, iteration: 200004
loss: 1.0111428499221802,grad_norm: 0.9999993664232307, iteration: 200005
loss: 0.9812672138214111,grad_norm: 0.8894074625602247, iteration: 200006
loss: 0.9987307190895081,grad_norm: 0.9999990486780631, iteration: 200007
loss: 1.0180588960647583,grad_norm: 0.9999991196260427, iteration: 200008
loss: 1.0091029405593872,grad_norm: 0.990120462465952, iteration: 200009
loss: 0.979119598865509,grad_norm: 0.8271662494274598, iteration: 200010
loss: 0.998691976070404,grad_norm: 0.9999989663105688, iteration: 200011
loss: 1.0402014255523682,grad_norm: 0.9999990730694415, iteration: 200012
loss: 0.9758571982383728,grad_norm: 0.8457639545218605, iteration: 200013
loss: 1.0443408489227295,grad_norm: 0.9999992091837133, iteration: 200014
loss: 0.9804762005805969,grad_norm: 0.8422751858391805, iteration: 200015
loss: 1.0165120363235474,grad_norm: 0.9999990551303234, iteration: 200016
loss: 0.9967260360717773,grad_norm: 0.7712120347107737, iteration: 200017
loss: 0.9939559102058411,grad_norm: 0.9126352574078758, iteration: 200018
loss: 0.9977226853370667,grad_norm: 0.876413112884725, iteration: 200019
loss: 1.023271918296814,grad_norm: 0.8116145257615457, iteration: 200020
loss: 1.0135498046875,grad_norm: 0.8367154364425353, iteration: 200021
loss: 1.024699091911316,grad_norm: 0.9999991296886855, iteration: 200022
loss: 0.9697715640068054,grad_norm: 0.8111470017330653, iteration: 200023
loss: 1.0362452268600464,grad_norm: 0.9411537273070464, iteration: 200024
loss: 1.014042854309082,grad_norm: 0.9448942161997393, iteration: 200025
loss: 1.038521647453308,grad_norm: 0.9478427246728445, iteration: 200026
loss: 0.9668587446212769,grad_norm: 0.9999990664674976, iteration: 200027
loss: 0.9998304843902588,grad_norm: 0.8853474121627053, iteration: 200028
loss: 1.0060150623321533,grad_norm: 0.9882986973312361, iteration: 200029
loss: 1.0220727920532227,grad_norm: 0.881382758977164, iteration: 200030
loss: 1.0040276050567627,grad_norm: 0.8251112661595958, iteration: 200031
loss: 1.0102882385253906,grad_norm: 0.9176696008543238, iteration: 200032
loss: 0.9563723802566528,grad_norm: 0.8536291324385042, iteration: 200033
loss: 0.9739404916763306,grad_norm: 0.8850770381429619, iteration: 200034
loss: 1.0302273035049438,grad_norm: 0.9999990027654769, iteration: 200035
loss: 0.9919899106025696,grad_norm: 0.9510176432505787, iteration: 200036
loss: 1.0083991289138794,grad_norm: 0.8555108963485285, iteration: 200037
loss: 1.007214903831482,grad_norm: 0.8433088490530002, iteration: 200038
loss: 0.9730395078659058,grad_norm: 0.9999990971070774, iteration: 200039
loss: 0.9886130690574646,grad_norm: 0.926600025177985, iteration: 200040
loss: 0.9956719875335693,grad_norm: 0.9999992629661113, iteration: 200041
loss: 0.9770786762237549,grad_norm: 0.7818368076245541, iteration: 200042
loss: 1.008604884147644,grad_norm: 0.86397621528627, iteration: 200043
loss: 1.0250673294067383,grad_norm: 0.9075796980978592, iteration: 200044
loss: 0.9940442442893982,grad_norm: 0.9685327035761024, iteration: 200045
loss: 0.9892582893371582,grad_norm: 0.9999989181079169, iteration: 200046
loss: 0.9873666167259216,grad_norm: 0.9203689888827187, iteration: 200047
loss: 0.9720168709754944,grad_norm: 0.9999990914440408, iteration: 200048
loss: 1.058103322982788,grad_norm: 0.9999991717803596, iteration: 200049
loss: 1.0130513906478882,grad_norm: 0.999999022007889, iteration: 200050
loss: 0.9756143093109131,grad_norm: 0.9999990309697284, iteration: 200051
loss: 0.9736387133598328,grad_norm: 0.8545819397740119, iteration: 200052
loss: 0.9626045227050781,grad_norm: 0.999999100949136, iteration: 200053
loss: 1.0035494565963745,grad_norm: 0.9999990013977962, iteration: 200054
loss: 0.9890866279602051,grad_norm: 0.9195554483476661, iteration: 200055
loss: 0.9666815400123596,grad_norm: 0.8729723560072064, iteration: 200056
loss: 0.9889078140258789,grad_norm: 0.8217829411620403, iteration: 200057
loss: 0.9759616255760193,grad_norm: 0.8860646548320497, iteration: 200058
loss: 1.0047845840454102,grad_norm: 0.9947926515717652, iteration: 200059
loss: 1.0064858198165894,grad_norm: 0.8245570358293409, iteration: 200060
loss: 1.0089222192764282,grad_norm: 0.848017215489806, iteration: 200061
loss: 1.0144984722137451,grad_norm: 0.9331110470237111, iteration: 200062
loss: 0.9887285828590393,grad_norm: 0.92395365908618, iteration: 200063
loss: 0.9933733344078064,grad_norm: 0.8459714604984508, iteration: 200064
loss: 1.0307060480117798,grad_norm: 0.8654685398542824, iteration: 200065
loss: 0.9990508556365967,grad_norm: 0.8365308778616498, iteration: 200066
loss: 1.0217945575714111,grad_norm: 0.9430001553365854, iteration: 200067
loss: 0.9704493284225464,grad_norm: 0.9814325665516892, iteration: 200068
loss: 0.9744812250137329,grad_norm: 0.9374081259651836, iteration: 200069
loss: 1.0053369998931885,grad_norm: 0.8720127554416616, iteration: 200070
loss: 1.0203720331192017,grad_norm: 0.9999994701050646, iteration: 200071
loss: 1.0519766807556152,grad_norm: 0.9999996444636936, iteration: 200072
loss: 0.9970079660415649,grad_norm: 0.8092474831481964, iteration: 200073
loss: 1.0394279956817627,grad_norm: 0.9999991657512858, iteration: 200074
loss: 1.0095365047454834,grad_norm: 0.9832876216150676, iteration: 200075
loss: 0.9979697465896606,grad_norm: 0.9821294786629188, iteration: 200076
loss: 1.038831353187561,grad_norm: 0.9999991641563079, iteration: 200077
loss: 0.9745050072669983,grad_norm: 0.9343307966680034, iteration: 200078
loss: 0.9963083863258362,grad_norm: 0.9222096850043641, iteration: 200079
loss: 0.9824216961860657,grad_norm: 0.99999904015641, iteration: 200080
loss: 1.0144143104553223,grad_norm: 0.9999989702439012, iteration: 200081
loss: 0.9853299856185913,grad_norm: 0.9980833151408198, iteration: 200082
loss: 0.9848973155021667,grad_norm: 0.99999900946391, iteration: 200083
loss: 1.0101367235183716,grad_norm: 0.9999992138635114, iteration: 200084
loss: 1.0119495391845703,grad_norm: 0.8920360171655989, iteration: 200085
loss: 0.9970740675926208,grad_norm: 0.9101453461142112, iteration: 200086
loss: 1.0346684455871582,grad_norm: 0.9569954907666903, iteration: 200087
loss: 1.0549806356430054,grad_norm: 0.9029021695735843, iteration: 200088
loss: 1.0061523914337158,grad_norm: 0.885331919705866, iteration: 200089
loss: 0.9878023862838745,grad_norm: 0.999999182273421, iteration: 200090
loss: 1.0224213600158691,grad_norm: 0.9151429786512753, iteration: 200091
loss: 1.0927952527999878,grad_norm: 0.9999995013532349, iteration: 200092
loss: 0.9515759348869324,grad_norm: 0.9241525299876155, iteration: 200093
loss: 0.9900619983673096,grad_norm: 0.9707738080147762, iteration: 200094
loss: 0.9912675619125366,grad_norm: 0.9999992311468396, iteration: 200095
loss: 0.9899744987487793,grad_norm: 0.9627502748196013, iteration: 200096
loss: 0.9788166880607605,grad_norm: 0.9251780581153848, iteration: 200097
loss: 1.015512228012085,grad_norm: 0.9999990442566207, iteration: 200098
loss: 1.0297099351882935,grad_norm: 0.8971286249179625, iteration: 200099
loss: 0.9933943748474121,grad_norm: 0.8846014007592601, iteration: 200100
loss: 1.0121443271636963,grad_norm: 0.9203970813165192, iteration: 200101
loss: 0.9979172945022583,grad_norm: 0.9999991875795373, iteration: 200102
loss: 1.0455211400985718,grad_norm: 0.7799910683624547, iteration: 200103
loss: 1.0174423456192017,grad_norm: 0.9999991195201209, iteration: 200104
loss: 0.9791660308837891,grad_norm: 0.9999988801006441, iteration: 200105
loss: 1.0031988620758057,grad_norm: 0.9999996795119037, iteration: 200106
loss: 1.0015370845794678,grad_norm: 0.7853404205145265, iteration: 200107
loss: 1.0048261880874634,grad_norm: 0.999999068144535, iteration: 200108
loss: 0.9895645976066589,grad_norm: 0.7769333374834242, iteration: 200109
loss: 0.9762802124023438,grad_norm: 0.9999991625982073, iteration: 200110
loss: 0.9954054355621338,grad_norm: 0.9395412871371734, iteration: 200111
loss: 1.001808524131775,grad_norm: 0.952257808197538, iteration: 200112
loss: 0.9897728562355042,grad_norm: 0.9999994429067988, iteration: 200113
loss: 0.950868546962738,grad_norm: 0.9999991170904147, iteration: 200114
loss: 1.0079247951507568,grad_norm: 0.9846659547133269, iteration: 200115
loss: 1.026038646697998,grad_norm: 0.9999997085701559, iteration: 200116
loss: 1.063015341758728,grad_norm: 0.9999997953264799, iteration: 200117
loss: 1.0173250436782837,grad_norm: 0.9102843220983372, iteration: 200118
loss: 0.9940362572669983,grad_norm: 0.9755176680066859, iteration: 200119
loss: 1.007229208946228,grad_norm: 0.9176303019100865, iteration: 200120
loss: 1.0114606618881226,grad_norm: 0.9748000284807764, iteration: 200121
loss: 0.9656404256820679,grad_norm: 0.9999990197549521, iteration: 200122
loss: 0.9751665592193604,grad_norm: 0.9576799538362313, iteration: 200123
loss: 0.9871309399604797,grad_norm: 0.8438423801089782, iteration: 200124
loss: 1.0107091665267944,grad_norm: 0.9444364179791402, iteration: 200125
loss: 1.0195724964141846,grad_norm: 0.8710319987039571, iteration: 200126
loss: 0.9936261177062988,grad_norm: 0.9949066344139428, iteration: 200127
loss: 1.0160208940505981,grad_norm: 0.9999991027023905, iteration: 200128
loss: 1.0005689859390259,grad_norm: 0.883768183551926, iteration: 200129
loss: 1.0151063203811646,grad_norm: 0.9999990249746484, iteration: 200130
loss: 1.0161659717559814,grad_norm: 0.9263522636558952, iteration: 200131
loss: 0.9919952154159546,grad_norm: 0.8411300687804489, iteration: 200132
loss: 1.0633364915847778,grad_norm: 0.9999990783693997, iteration: 200133
loss: 1.0433026552200317,grad_norm: 0.9804045227404319, iteration: 200134
loss: 0.979697585105896,grad_norm: 0.9999995541416645, iteration: 200135
loss: 1.0224179029464722,grad_norm: 0.9999991657075417, iteration: 200136
loss: 1.028942346572876,grad_norm: 0.9999990907050454, iteration: 200137
loss: 0.9850311875343323,grad_norm: 0.9490115118878935, iteration: 200138
loss: 1.0009747743606567,grad_norm: 0.9999991513810906, iteration: 200139
loss: 1.045082688331604,grad_norm: 0.7610654710100606, iteration: 200140
loss: 0.9688605070114136,grad_norm: 0.8656844366937982, iteration: 200141
loss: 1.0278748273849487,grad_norm: 0.9999992783287112, iteration: 200142
loss: 1.0225428342819214,grad_norm: 0.9999995116936813, iteration: 200143
loss: 1.0124284029006958,grad_norm: 0.9695691752958411, iteration: 200144
loss: 1.0070050954818726,grad_norm: 0.8928645091475182, iteration: 200145
loss: 0.98189777135849,grad_norm: 0.8946221846286986, iteration: 200146
loss: 0.9971075057983398,grad_norm: 0.9459990309811184, iteration: 200147
loss: 1.0134048461914062,grad_norm: 0.999999420377779, iteration: 200148
loss: 0.9878658056259155,grad_norm: 0.9999991005563652, iteration: 200149
loss: 0.9920356869697571,grad_norm: 0.8336114822621965, iteration: 200150
loss: 1.01059889793396,grad_norm: 0.9999990519693501, iteration: 200151
loss: 0.9987884759902954,grad_norm: 0.897975296755843, iteration: 200152
loss: 1.0250924825668335,grad_norm: 0.9642777283815781, iteration: 200153
loss: 1.0095634460449219,grad_norm: 0.864020283067229, iteration: 200154
loss: 1.0144826173782349,grad_norm: 0.999999453185454, iteration: 200155
loss: 1.0066486597061157,grad_norm: 0.7835065198198135, iteration: 200156
loss: 0.9891898036003113,grad_norm: 0.9202440008337252, iteration: 200157
loss: 1.0179944038391113,grad_norm: 0.999999283617931, iteration: 200158
loss: 0.9902979135513306,grad_norm: 0.8929373448796126, iteration: 200159
loss: 0.9339557886123657,grad_norm: 0.9513985982702677, iteration: 200160
loss: 1.0306947231292725,grad_norm: 0.9999992616313779, iteration: 200161
loss: 0.9699566960334778,grad_norm: 0.867577804898063, iteration: 200162
loss: 0.9761736989021301,grad_norm: 0.9999992657165062, iteration: 200163
loss: 1.024094820022583,grad_norm: 0.9999991485185636, iteration: 200164
loss: 1.0186041593551636,grad_norm: 0.9999990342895525, iteration: 200165
loss: 1.0344191789627075,grad_norm: 0.9999990546310189, iteration: 200166
loss: 0.965600848197937,grad_norm: 0.9999989635697099, iteration: 200167
loss: 0.9923667311668396,grad_norm: 0.9304496300676607, iteration: 200168
loss: 0.9862900376319885,grad_norm: 0.8365542599812187, iteration: 200169
loss: 1.0118027925491333,grad_norm: 0.9999990353335722, iteration: 200170
loss: 1.0100858211517334,grad_norm: 0.9999990126686517, iteration: 200171
loss: 1.0037182569503784,grad_norm: 0.9422454432764564, iteration: 200172
loss: 1.0033619403839111,grad_norm: 0.9999990345178651, iteration: 200173
loss: 1.0361770391464233,grad_norm: 0.9999997026546201, iteration: 200174
loss: 1.0137803554534912,grad_norm: 0.9566454330558302, iteration: 200175
loss: 0.9668983817100525,grad_norm: 0.9999991803868624, iteration: 200176
loss: 1.0057731866836548,grad_norm: 0.9103701971558066, iteration: 200177
loss: 0.9901436567306519,grad_norm: 0.9999990830751023, iteration: 200178
loss: 1.0105817317962646,grad_norm: 0.9811668110291831, iteration: 200179
loss: 1.048100471496582,grad_norm: 0.9999995477963038, iteration: 200180
loss: 0.9894561767578125,grad_norm: 0.9322373921860357, iteration: 200181
loss: 0.9881864786148071,grad_norm: 0.8136978658157795, iteration: 200182
loss: 1.0218850374221802,grad_norm: 0.8806161556648583, iteration: 200183
loss: 1.0296396017074585,grad_norm: 0.9999990430973021, iteration: 200184
loss: 0.990828275680542,grad_norm: 0.848392740677588, iteration: 200185
loss: 0.971461296081543,grad_norm: 0.9676246753582748, iteration: 200186
loss: 1.0332750082015991,grad_norm: 0.8837349407746311, iteration: 200187
loss: 1.0005661249160767,grad_norm: 0.9658464091920562, iteration: 200188
loss: 1.0268795490264893,grad_norm: 0.7925082338258058, iteration: 200189
loss: 1.0020643472671509,grad_norm: 0.9350914384201086, iteration: 200190
loss: 0.9603531956672668,grad_norm: 0.8852551172014912, iteration: 200191
loss: 1.0375359058380127,grad_norm: 0.9999993744435953, iteration: 200192
loss: 0.9957050681114197,grad_norm: 0.9509543094757725, iteration: 200193
loss: 0.998799741268158,grad_norm: 0.8779113675164448, iteration: 200194
loss: 0.9925040006637573,grad_norm: 0.8894986994971585, iteration: 200195
loss: 0.9807592034339905,grad_norm: 0.9275619151419154, iteration: 200196
loss: 1.029770016670227,grad_norm: 0.8966025084752944, iteration: 200197
loss: 1.0001755952835083,grad_norm: 0.7509754069691219, iteration: 200198
loss: 1.0241755247116089,grad_norm: 0.9999990540001106, iteration: 200199
loss: 0.9786182641983032,grad_norm: 0.9738852240387987, iteration: 200200
loss: 0.9809958338737488,grad_norm: 0.9658194161458769, iteration: 200201
loss: 0.9839792847633362,grad_norm: 0.9999990979954395, iteration: 200202
loss: 1.0311919450759888,grad_norm: 0.9729097241063058, iteration: 200203
loss: 1.006042718887329,grad_norm: 0.9686952958269045, iteration: 200204
loss: 1.006371021270752,grad_norm: 0.9999990509312087, iteration: 200205
loss: 1.0344901084899902,grad_norm: 0.9999993522627874, iteration: 200206
loss: 0.9817097783088684,grad_norm: 0.9999990460889208, iteration: 200207
loss: 1.0049505233764648,grad_norm: 0.7626600947940758, iteration: 200208
loss: 0.9963235259056091,grad_norm: 0.9999991980674284, iteration: 200209
loss: 0.9864521622657776,grad_norm: 0.9348781224669722, iteration: 200210
loss: 1.0785070657730103,grad_norm: 0.9999999964268184, iteration: 200211
loss: 1.0166434049606323,grad_norm: 0.9555648304830078, iteration: 200212
loss: 0.9828509092330933,grad_norm: 0.8242365555389704, iteration: 200213
loss: 1.0112022161483765,grad_norm: 0.9999991772459564, iteration: 200214
loss: 0.9807988405227661,grad_norm: 0.9460127513684895, iteration: 200215
loss: 0.9805180430412292,grad_norm: 0.9999993505565985, iteration: 200216
loss: 0.9895533919334412,grad_norm: 0.8762305229913762, iteration: 200217
loss: 0.9783386588096619,grad_norm: 0.9999990134103943, iteration: 200218
loss: 1.0112980604171753,grad_norm: 0.8909080854711484, iteration: 200219
loss: 0.9824679493904114,grad_norm: 0.9080407886346721, iteration: 200220
loss: 0.9662016034126282,grad_norm: 0.8954262069022223, iteration: 200221
loss: 0.9656726121902466,grad_norm: 0.8693956635969395, iteration: 200222
loss: 0.9848668575286865,grad_norm: 0.8450937373966898, iteration: 200223
loss: 1.0443848371505737,grad_norm: 0.9999997126101781, iteration: 200224
loss: 1.011601448059082,grad_norm: 0.9999996817522638, iteration: 200225
loss: 1.0246857404708862,grad_norm: 0.8948126206045186, iteration: 200226
loss: 0.999174952507019,grad_norm: 0.9999994276618541, iteration: 200227
loss: 0.9894005656242371,grad_norm: 0.988637695418487, iteration: 200228
loss: 1.0127075910568237,grad_norm: 0.9057657012448, iteration: 200229
loss: 0.9994137287139893,grad_norm: 0.9999990068519719, iteration: 200230
loss: 0.9365248084068298,grad_norm: 0.862892162342127, iteration: 200231
loss: 0.9947394132614136,grad_norm: 0.8726711736216723, iteration: 200232
loss: 1.0236984491348267,grad_norm: 0.9063253640422899, iteration: 200233
loss: 1.03798246383667,grad_norm: 0.9999991902664391, iteration: 200234
loss: 0.9968323707580566,grad_norm: 0.8762017321694039, iteration: 200235
loss: 0.9843075275421143,grad_norm: 0.9999990752958346, iteration: 200236
loss: 1.0265703201293945,grad_norm: 0.8381706745317544, iteration: 200237
loss: 0.9950504302978516,grad_norm: 0.8848079720990858, iteration: 200238
loss: 1.042665719985962,grad_norm: 0.8126292783446669, iteration: 200239
loss: 0.9498081207275391,grad_norm: 0.9999991664483356, iteration: 200240
loss: 0.9955466985702515,grad_norm: 0.9999992096160415, iteration: 200241
loss: 1.028766393661499,grad_norm: 0.7273049977687949, iteration: 200242
loss: 1.0119538307189941,grad_norm: 0.999999217870405, iteration: 200243
loss: 1.0007762908935547,grad_norm: 0.9208600506420411, iteration: 200244
loss: 0.993303656578064,grad_norm: 0.8713586233732489, iteration: 200245
loss: 1.006459355354309,grad_norm: 0.8916205349270541, iteration: 200246
loss: 1.0104782581329346,grad_norm: 0.8920976819113777, iteration: 200247
loss: 1.0001022815704346,grad_norm: 0.999998949007893, iteration: 200248
loss: 0.9906519055366516,grad_norm: 0.9547096732615178, iteration: 200249
loss: 1.002230167388916,grad_norm: 0.9379458346577672, iteration: 200250
loss: 0.9833801984786987,grad_norm: 0.8524617043863768, iteration: 200251
loss: 0.9794194102287292,grad_norm: 0.923916838386292, iteration: 200252
loss: 1.0318738222122192,grad_norm: 0.9562329906576718, iteration: 200253
loss: 1.0285035371780396,grad_norm: 0.9141516869239493, iteration: 200254
loss: 1.0241363048553467,grad_norm: 0.9999992281328046, iteration: 200255
loss: 1.0278669595718384,grad_norm: 0.8419246591982642, iteration: 200256
loss: 0.9642672538757324,grad_norm: 0.9999990913204078, iteration: 200257
loss: 0.9773812890052795,grad_norm: 0.8605037061961681, iteration: 200258
loss: 0.9951606392860413,grad_norm: 0.9427331689117042, iteration: 200259
loss: 0.982627809047699,grad_norm: 0.9999995221447118, iteration: 200260
loss: 0.9967525601387024,grad_norm: 0.9716053095192799, iteration: 200261
loss: 0.9808024764060974,grad_norm: 0.9999990550616263, iteration: 200262
loss: 1.0421817302703857,grad_norm: 0.8749490372502114, iteration: 200263
loss: 1.0081653594970703,grad_norm: 0.8301420582402822, iteration: 200264
loss: 1.0073872804641724,grad_norm: 0.9107995638009762, iteration: 200265
loss: 1.000188946723938,grad_norm: 0.8264886496562949, iteration: 200266
loss: 0.9983782172203064,grad_norm: 0.9834475022795486, iteration: 200267
loss: 0.9854661822319031,grad_norm: 0.989780045659936, iteration: 200268
loss: 0.9967966675758362,grad_norm: 0.9999991053371415, iteration: 200269
loss: 1.0261825323104858,grad_norm: 0.9163104807398915, iteration: 200270
loss: 0.984350323677063,grad_norm: 0.9999993639439643, iteration: 200271
loss: 1.013786792755127,grad_norm: 0.9789527688168744, iteration: 200272
loss: 1.0275518894195557,grad_norm: 0.9396359828226919, iteration: 200273
loss: 0.9942310452461243,grad_norm: 0.9744193767425666, iteration: 200274
loss: 0.9927971959114075,grad_norm: 0.9452699912589781, iteration: 200275
loss: 0.9868201613426208,grad_norm: 0.9999991118705978, iteration: 200276
loss: 0.9962396025657654,grad_norm: 0.9999990170797323, iteration: 200277
loss: 0.9981359243392944,grad_norm: 0.97766710245809, iteration: 200278
loss: 0.9869289398193359,grad_norm: 0.9999989922544119, iteration: 200279
loss: 1.0370304584503174,grad_norm: 0.9999991551233848, iteration: 200280
loss: 0.9951837062835693,grad_norm: 0.977340370033459, iteration: 200281
loss: 0.9856809973716736,grad_norm: 0.8259477605187763, iteration: 200282
loss: 0.9914248585700989,grad_norm: 0.8353285723002477, iteration: 200283
loss: 0.9916576147079468,grad_norm: 0.9554173195507702, iteration: 200284
loss: 0.9931217432022095,grad_norm: 0.9999990336713126, iteration: 200285
loss: 0.9818723797798157,grad_norm: 0.8685542378542541, iteration: 200286
loss: 0.9839685559272766,grad_norm: 0.9647967306651811, iteration: 200287
loss: 0.9593419432640076,grad_norm: 0.8679879474930235, iteration: 200288
loss: 1.010872483253479,grad_norm: 0.7638859336963555, iteration: 200289
loss: 0.9694502353668213,grad_norm: 0.9999991690317223, iteration: 200290
loss: 1.0033848285675049,grad_norm: 0.9999990784490597, iteration: 200291
loss: 0.9629632234573364,grad_norm: 0.9999992618657401, iteration: 200292
loss: 0.9768127202987671,grad_norm: 0.9216787080468285, iteration: 200293
loss: 0.9730604290962219,grad_norm: 0.8132577605302052, iteration: 200294
loss: 1.0139421224594116,grad_norm: 0.9616159029959443, iteration: 200295
loss: 1.0365214347839355,grad_norm: 0.9999991542409341, iteration: 200296
loss: 0.9810795187950134,grad_norm: 0.9867997433782558, iteration: 200297
loss: 0.9948250651359558,grad_norm: 0.9646728679863985, iteration: 200298
loss: 1.0004910230636597,grad_norm: 0.9447377855686359, iteration: 200299
loss: 0.9496482610702515,grad_norm: 0.8038274729186933, iteration: 200300
loss: 1.0434013605117798,grad_norm: 0.9999989745599392, iteration: 200301
loss: 0.9496679902076721,grad_norm: 0.8734288044133255, iteration: 200302
loss: 1.0390504598617554,grad_norm: 0.9354749989363663, iteration: 200303
loss: 1.030091643333435,grad_norm: 0.9421560178919526, iteration: 200304
loss: 0.9706786870956421,grad_norm: 0.9999989412767202, iteration: 200305
loss: 1.0101239681243896,grad_norm: 0.9142594514893946, iteration: 200306
loss: 1.0263539552688599,grad_norm: 0.8766750813335514, iteration: 200307
loss: 0.9446519613265991,grad_norm: 0.8511249306498209, iteration: 200308
loss: 1.1876916885375977,grad_norm: 0.9999997837016992, iteration: 200309
loss: 1.035152554512024,grad_norm: 0.9999990110811866, iteration: 200310
loss: 1.0069537162780762,grad_norm: 0.846392425284186, iteration: 200311
loss: 1.0063865184783936,grad_norm: 0.9999990251662239, iteration: 200312
loss: 0.999557614326477,grad_norm: 0.9999990854555866, iteration: 200313
loss: 1.004861831665039,grad_norm: 0.9107771980577108, iteration: 200314
loss: 1.0257086753845215,grad_norm: 0.9999991587334593, iteration: 200315
loss: 1.0269018411636353,grad_norm: 0.9999990642576554, iteration: 200316
loss: 0.983079731464386,grad_norm: 0.8746330351911512, iteration: 200317
loss: 0.9853435158729553,grad_norm: 0.9999998899257582, iteration: 200318
loss: 1.0202077627182007,grad_norm: 0.9999990650444819, iteration: 200319
loss: 0.9857310056686401,grad_norm: 0.8338003127402551, iteration: 200320
loss: 1.0082340240478516,grad_norm: 0.885504899796386, iteration: 200321
loss: 1.0128200054168701,grad_norm: 0.9999990626073193, iteration: 200322
loss: 0.9691742658615112,grad_norm: 0.9999990162625828, iteration: 200323
loss: 0.9917435646057129,grad_norm: 0.9841654427314072, iteration: 200324
loss: 0.981741189956665,grad_norm: 0.9221507947821884, iteration: 200325
loss: 1.0287292003631592,grad_norm: 0.969840237151458, iteration: 200326
loss: 1.0167477130889893,grad_norm: 0.999999136274775, iteration: 200327
loss: 0.9784421920776367,grad_norm: 0.9999991536820444, iteration: 200328
loss: 1.0279513597488403,grad_norm: 0.9999991141682312, iteration: 200329
loss: 0.9906795024871826,grad_norm: 0.9989683087519943, iteration: 200330
loss: 1.0276997089385986,grad_norm: 0.7767651277122201, iteration: 200331
loss: 0.9956831336021423,grad_norm: 0.7144790856155633, iteration: 200332
loss: 0.992379367351532,grad_norm: 0.9999991909950798, iteration: 200333
loss: 1.0274001359939575,grad_norm: 0.9400033022425036, iteration: 200334
loss: 0.9754895567893982,grad_norm: 0.9674743050830423, iteration: 200335
loss: 1.0217705965042114,grad_norm: 0.9259325567868529, iteration: 200336
loss: 1.0054686069488525,grad_norm: 0.9406163568591449, iteration: 200337
loss: 0.9838525652885437,grad_norm: 0.9999990143463067, iteration: 200338
loss: 1.0164304971694946,grad_norm: 0.916619558944268, iteration: 200339
loss: 0.9850011467933655,grad_norm: 0.815362135928251, iteration: 200340
loss: 0.9905882477760315,grad_norm: 0.8566667787712259, iteration: 200341
loss: 1.0116146802902222,grad_norm: 0.9999991561873082, iteration: 200342
loss: 1.0031025409698486,grad_norm: 0.8905805594838979, iteration: 200343
loss: 0.9764971137046814,grad_norm: 0.9999990636893632, iteration: 200344
loss: 0.9998104572296143,grad_norm: 0.8551040005615947, iteration: 200345
loss: 1.033539891242981,grad_norm: 0.9999999634852121, iteration: 200346
loss: 0.9959102869033813,grad_norm: 0.9576825626142266, iteration: 200347
loss: 0.9993050694465637,grad_norm: 0.9736393503245827, iteration: 200348
loss: 1.039994239807129,grad_norm: 0.9999990514522674, iteration: 200349
loss: 0.9995138049125671,grad_norm: 0.8745097160034352, iteration: 200350
loss: 1.0068219900131226,grad_norm: 0.9394306732236122, iteration: 200351
loss: 1.0166808366775513,grad_norm: 0.99999903482653, iteration: 200352
loss: 1.0154112577438354,grad_norm: 0.9410808461915623, iteration: 200353
loss: 0.9992993474006653,grad_norm: 0.9523672723016164, iteration: 200354
loss: 0.9925891160964966,grad_norm: 0.9999991269813132, iteration: 200355
loss: 0.9764182567596436,grad_norm: 0.9999990872397886, iteration: 200356
loss: 0.9933022260665894,grad_norm: 0.9106169136242983, iteration: 200357
loss: 1.024408221244812,grad_norm: 0.8488693186316437, iteration: 200358
loss: 1.018812656402588,grad_norm: 0.9999992250007249, iteration: 200359
loss: 0.993130087852478,grad_norm: 0.7912638854313571, iteration: 200360
loss: 1.0036216974258423,grad_norm: 0.7717820032519538, iteration: 200361
loss: 1.0054001808166504,grad_norm: 0.9840177912861982, iteration: 200362
loss: 1.0142767429351807,grad_norm: 0.9416797949977413, iteration: 200363
loss: 0.9884189367294312,grad_norm: 0.9999995751741223, iteration: 200364
loss: 0.959499716758728,grad_norm: 0.9007760220174014, iteration: 200365
loss: 1.0044788122177124,grad_norm: 0.959923588264944, iteration: 200366
loss: 0.9479125738143921,grad_norm: 0.8564021424938467, iteration: 200367
loss: 0.9953567385673523,grad_norm: 0.9044078384291735, iteration: 200368
loss: 0.9881961345672607,grad_norm: 0.9999990747177002, iteration: 200369
loss: 1.017472505569458,grad_norm: 0.9999991857556626, iteration: 200370
loss: 0.9588248133659363,grad_norm: 0.9999991609463778, iteration: 200371
loss: 0.9597193598747253,grad_norm: 0.9999990135172735, iteration: 200372
loss: 0.9973328709602356,grad_norm: 0.910597259406932, iteration: 200373
loss: 1.011574625968933,grad_norm: 0.82446097792779, iteration: 200374
loss: 1.0004191398620605,grad_norm: 0.7899475267072961, iteration: 200375
loss: 1.0005087852478027,grad_norm: 0.8584753850587757, iteration: 200376
loss: 1.0139864683151245,grad_norm: 0.8469127004314008, iteration: 200377
loss: 0.9951357841491699,grad_norm: 0.9999991248185999, iteration: 200378
loss: 0.9975152015686035,grad_norm: 0.8377905761194477, iteration: 200379
loss: 0.9960110187530518,grad_norm: 0.9750452631492136, iteration: 200380
loss: 0.9853750467300415,grad_norm: 0.9999991874850378, iteration: 200381
loss: 1.130034327507019,grad_norm: 0.9999994296758024, iteration: 200382
loss: 1.0203863382339478,grad_norm: 0.8545214767098553, iteration: 200383
loss: 0.9961445927619934,grad_norm: 0.9786797747184304, iteration: 200384
loss: 1.0158873796463013,grad_norm: 0.9999991966398178, iteration: 200385
loss: 0.9921020269393921,grad_norm: 0.9577654725322282, iteration: 200386
loss: 1.001803994178772,grad_norm: 0.9053179562531385, iteration: 200387
loss: 1.0177195072174072,grad_norm: 0.9999990864947276, iteration: 200388
loss: 1.0229110717773438,grad_norm: 0.9999991133738013, iteration: 200389
loss: 0.9944154620170593,grad_norm: 0.8094133311547744, iteration: 200390
loss: 1.0281803607940674,grad_norm: 0.9999995164634555, iteration: 200391
loss: 1.0119415521621704,grad_norm: 0.9319004100350455, iteration: 200392
loss: 1.0390260219573975,grad_norm: 0.8210345674958531, iteration: 200393
loss: 0.9807801842689514,grad_norm: 0.8030902995559016, iteration: 200394
loss: 0.9893168210983276,grad_norm: 0.9999991630476847, iteration: 200395
loss: 1.0089914798736572,grad_norm: 0.9089872221904429, iteration: 200396
loss: 1.0014991760253906,grad_norm: 0.9293487290025748, iteration: 200397
loss: 0.9947575330734253,grad_norm: 0.9097628080579098, iteration: 200398
loss: 1.0261799097061157,grad_norm: 0.9999991170844205, iteration: 200399
loss: 1.0140087604522705,grad_norm: 0.9999998460668155, iteration: 200400
loss: 0.9850001931190491,grad_norm: 0.9018219997356123, iteration: 200401
loss: 1.0267852544784546,grad_norm: 0.9349039752653352, iteration: 200402
loss: 0.9677163362503052,grad_norm: 0.999998996892824, iteration: 200403
loss: 0.9850530624389648,grad_norm: 0.890188940228459, iteration: 200404
loss: 1.0713564157485962,grad_norm: 0.9321131807237015, iteration: 200405
loss: 0.9848587512969971,grad_norm: 0.9999991113002076, iteration: 200406
loss: 0.9950830936431885,grad_norm: 0.9999990898832154, iteration: 200407
loss: 0.9946523308753967,grad_norm: 0.9999989770116384, iteration: 200408
loss: 0.9845705628395081,grad_norm: 0.8505904633102302, iteration: 200409
loss: 1.038879632949829,grad_norm: 0.9739861072334731, iteration: 200410
loss: 0.9927383065223694,grad_norm: 0.8131028463667385, iteration: 200411
loss: 0.999433159828186,grad_norm: 0.9999995429394117, iteration: 200412
loss: 1.0209839344024658,grad_norm: 0.9999992882174817, iteration: 200413
loss: 1.0093200206756592,grad_norm: 0.9999992289996487, iteration: 200414
loss: 1.0418883562088013,grad_norm: 0.9999999006464557, iteration: 200415
loss: 0.9967060685157776,grad_norm: 0.9867797336663727, iteration: 200416
loss: 0.9834474921226501,grad_norm: 0.9999992799211137, iteration: 200417
loss: 0.9615382552146912,grad_norm: 0.9999991623434444, iteration: 200418
loss: 1.0644919872283936,grad_norm: 0.999999562042694, iteration: 200419
loss: 0.9988906383514404,grad_norm: 0.8791670406199487, iteration: 200420
loss: 1.0048797130584717,grad_norm: 0.9999992794394438, iteration: 200421
loss: 0.9813051819801331,grad_norm: 0.9135740569796174, iteration: 200422
loss: 1.0172346830368042,grad_norm: 0.8706101773601413, iteration: 200423
loss: 0.9832949638366699,grad_norm: 0.9939919432074646, iteration: 200424
loss: 1.0098422765731812,grad_norm: 0.9999989434721565, iteration: 200425
loss: 0.989246666431427,grad_norm: 0.8245760070018461, iteration: 200426
loss: 1.0010331869125366,grad_norm: 0.9999992038903598, iteration: 200427
loss: 1.0232166051864624,grad_norm: 0.8258069721283798, iteration: 200428
loss: 1.0188722610473633,grad_norm: 0.999999654793853, iteration: 200429
loss: 1.008867621421814,grad_norm: 0.9534917091848095, iteration: 200430
loss: 0.9901402592658997,grad_norm: 0.9851022856597754, iteration: 200431
loss: 0.9863234758377075,grad_norm: 0.9408186526044493, iteration: 200432
loss: 0.9760507345199585,grad_norm: 0.9705565993609805, iteration: 200433
loss: 1.017327904701233,grad_norm: 0.9999991082770016, iteration: 200434
loss: 0.9832585453987122,grad_norm: 0.9783837714442793, iteration: 200435
loss: 0.9946714639663696,grad_norm: 0.8150713141244248, iteration: 200436
loss: 1.0139994621276855,grad_norm: 0.8734249544035361, iteration: 200437
loss: 1.0335649251937866,grad_norm: 0.8841048283335955, iteration: 200438
loss: 1.0050461292266846,grad_norm: 0.9641631187662391, iteration: 200439
loss: 1.0405786037445068,grad_norm: 0.9514720251806736, iteration: 200440
loss: 0.9986001253128052,grad_norm: 0.9698481889246121, iteration: 200441
loss: 0.9878835678100586,grad_norm: 0.9999992209604912, iteration: 200442
loss: 1.0079914331436157,grad_norm: 0.8328594611622981, iteration: 200443
loss: 1.0052213668823242,grad_norm: 0.9477443634247685, iteration: 200444
loss: 0.9909242391586304,grad_norm: 0.9004046828238751, iteration: 200445
loss: 1.0049865245819092,grad_norm: 0.9667841914992925, iteration: 200446
loss: 0.9938015937805176,grad_norm: 0.9041083427817631, iteration: 200447
loss: 0.9580206871032715,grad_norm: 0.9999990115617052, iteration: 200448
loss: 1.0555481910705566,grad_norm: 0.9999997328199681, iteration: 200449
loss: 0.9774966835975647,grad_norm: 0.9999991252231102, iteration: 200450
loss: 1.0018998384475708,grad_norm: 0.9999990445878869, iteration: 200451
loss: 0.9882727265357971,grad_norm: 0.9999990280775578, iteration: 200452
loss: 1.005149245262146,grad_norm: 0.9999991165175144, iteration: 200453
loss: 0.9964073896408081,grad_norm: 0.9474954888032663, iteration: 200454
loss: 0.989585816860199,grad_norm: 0.9638939228599203, iteration: 200455
loss: 1.014628529548645,grad_norm: 0.9999998489256093, iteration: 200456
loss: 1.0083411931991577,grad_norm: 0.9999990066293967, iteration: 200457
loss: 1.0115569829940796,grad_norm: 0.8239610864814314, iteration: 200458
loss: 1.0011425018310547,grad_norm: 0.9999989446255357, iteration: 200459
loss: 1.022226333618164,grad_norm: 0.9999991711054068, iteration: 200460
loss: 0.9936378598213196,grad_norm: 0.9441462766294443, iteration: 200461
loss: 0.9805288910865784,grad_norm: 0.9196850348368435, iteration: 200462
loss: 1.0218303203582764,grad_norm: 0.9606280515355182, iteration: 200463
loss: 1.0314234495162964,grad_norm: 0.9999993484256011, iteration: 200464
loss: 0.988649845123291,grad_norm: 0.9999992842972905, iteration: 200465
loss: 1.0003522634506226,grad_norm: 0.9562806025634918, iteration: 200466
loss: 0.9725161194801331,grad_norm: 0.9755918210586328, iteration: 200467
loss: 1.0057860612869263,grad_norm: 0.8182271708010501, iteration: 200468
loss: 1.0045866966247559,grad_norm: 0.8074956288227442, iteration: 200469
loss: 1.0181457996368408,grad_norm: 0.896518984379748, iteration: 200470
loss: 1.0167769193649292,grad_norm: 0.9999998427345188, iteration: 200471
loss: 0.997295081615448,grad_norm: 0.9999990373692234, iteration: 200472
loss: 0.9891220331192017,grad_norm: 0.9999990916436097, iteration: 200473
loss: 0.9976098537445068,grad_norm: 0.9999989843558454, iteration: 200474
loss: 0.9898919463157654,grad_norm: 0.9030034976495838, iteration: 200475
loss: 0.9858165979385376,grad_norm: 0.8707515360415826, iteration: 200476
loss: 1.0029748678207397,grad_norm: 0.9999991121976577, iteration: 200477
loss: 0.9855241179466248,grad_norm: 0.9196319217060278, iteration: 200478
loss: 1.1418006420135498,grad_norm: 0.9999994501684317, iteration: 200479
loss: 1.009456992149353,grad_norm: 0.9999996363922281, iteration: 200480
loss: 1.0020604133605957,grad_norm: 0.7062264234751229, iteration: 200481
loss: 1.0052897930145264,grad_norm: 0.8909389253322123, iteration: 200482
loss: 1.0460762977600098,grad_norm: 0.9999992039795863, iteration: 200483
loss: 1.0008268356323242,grad_norm: 0.9999991985091484, iteration: 200484
loss: 1.0367660522460938,grad_norm: 0.9999992529195404, iteration: 200485
loss: 1.0411162376403809,grad_norm: 0.9999993302199415, iteration: 200486
loss: 1.1190286874771118,grad_norm: 0.999999992326322, iteration: 200487
loss: 1.0269194841384888,grad_norm: 0.9999995017478807, iteration: 200488
loss: 1.0342670679092407,grad_norm: 0.999999675681784, iteration: 200489
loss: 1.0400806665420532,grad_norm: 0.9999997290147806, iteration: 200490
loss: 1.1447113752365112,grad_norm: 0.9999995776058622, iteration: 200491
loss: 1.1540532112121582,grad_norm: 0.9999993926113669, iteration: 200492
loss: 1.0927454233169556,grad_norm: 0.9999997721174533, iteration: 200493
loss: 0.9967131018638611,grad_norm: 0.9999990703693566, iteration: 200494
loss: 1.041443943977356,grad_norm: 0.9999993131396764, iteration: 200495
loss: 1.2121232748031616,grad_norm: 0.9999997712450953, iteration: 200496
loss: 1.0462876558303833,grad_norm: 0.9999994737592259, iteration: 200497
loss: 1.0718135833740234,grad_norm: 0.9999992557351133, iteration: 200498
loss: 1.1235555410385132,grad_norm: 0.9999996418125839, iteration: 200499
loss: 1.078930377960205,grad_norm: 0.9999993674177411, iteration: 200500
loss: 1.165628433227539,grad_norm: 0.9999991861977643, iteration: 200501
loss: 1.137378454208374,grad_norm: 0.9999995249035063, iteration: 200502
loss: 1.1057063341140747,grad_norm: 0.9999992128074029, iteration: 200503
loss: 1.168843150138855,grad_norm: 0.9999992231667774, iteration: 200504
loss: 1.067231297492981,grad_norm: 0.9999997394732483, iteration: 200505
loss: 1.1193126440048218,grad_norm: 0.9999992620256224, iteration: 200506
loss: 1.0747853517532349,grad_norm: 0.9999993994748849, iteration: 200507
loss: 1.0715597867965698,grad_norm: 0.9999992663734202, iteration: 200508
loss: 1.0906201601028442,grad_norm: 0.9999995468571979, iteration: 200509
loss: 1.0614049434661865,grad_norm: 0.9999990460824669, iteration: 200510
loss: 1.078495740890503,grad_norm: 0.999999189032814, iteration: 200511
loss: 1.0926254987716675,grad_norm: 0.9999992064928824, iteration: 200512
loss: 1.2787196636199951,grad_norm: 0.9999998555314927, iteration: 200513
loss: 1.0393016338348389,grad_norm: 0.9876369598040998, iteration: 200514
loss: 1.0376343727111816,grad_norm: 0.9999997813873279, iteration: 200515
loss: 1.0755125284194946,grad_norm: 0.9999995571883454, iteration: 200516
loss: 1.041528582572937,grad_norm: 0.9466622733414929, iteration: 200517
loss: 1.2073220014572144,grad_norm: 0.9999997455204339, iteration: 200518
loss: 1.2287824153900146,grad_norm: 0.9999993550587838, iteration: 200519
loss: 1.0685676336288452,grad_norm: 0.9999991351436139, iteration: 200520
loss: 1.080570936203003,grad_norm: 0.9999997261397648, iteration: 200521
loss: 1.1564806699752808,grad_norm: 1.000000083420559, iteration: 200522
loss: 1.0088120698928833,grad_norm: 0.9999990656742568, iteration: 200523
loss: 1.1557331085205078,grad_norm: 0.9999997738459351, iteration: 200524
loss: 1.047913670539856,grad_norm: 0.9999990741142999, iteration: 200525
loss: 1.143599033355713,grad_norm: 0.9999994666541911, iteration: 200526
loss: 1.0453599691390991,grad_norm: 0.8516959592319644, iteration: 200527
loss: 1.054350733757019,grad_norm: 0.999999183472082, iteration: 200528
loss: 1.1012845039367676,grad_norm: 0.99999926182683, iteration: 200529
loss: 1.0619674921035767,grad_norm: 0.9564619238510613, iteration: 200530
loss: 1.071925401687622,grad_norm: 0.9999990834727206, iteration: 200531
loss: 1.0595030784606934,grad_norm: 0.9999991220565412, iteration: 200532
loss: 1.089554786682129,grad_norm: 0.9999991878579002, iteration: 200533
loss: 0.9837586283683777,grad_norm: 0.9796708586646405, iteration: 200534
loss: 1.031558871269226,grad_norm: 0.9999995549269367, iteration: 200535
loss: 1.0712125301361084,grad_norm: 0.9999993412092838, iteration: 200536
loss: 1.0670511722564697,grad_norm: 0.9999991540896521, iteration: 200537
loss: 1.0487722158432007,grad_norm: 0.9999994721550295, iteration: 200538
loss: 1.0621213912963867,grad_norm: 0.9247159517670965, iteration: 200539
loss: 1.0458014011383057,grad_norm: 0.9999991176836486, iteration: 200540
loss: 1.0915372371673584,grad_norm: 0.9999992877572156, iteration: 200541
loss: 1.0325664281845093,grad_norm: 0.9999992713519733, iteration: 200542
loss: 1.1162760257720947,grad_norm: 0.9999992958649283, iteration: 200543
loss: 1.0569138526916504,grad_norm: 0.9794163414099244, iteration: 200544
loss: 1.0540670156478882,grad_norm: 0.8380931142267726, iteration: 200545
loss: 1.101402759552002,grad_norm: 0.999999837720701, iteration: 200546
loss: 1.0765231847763062,grad_norm: 0.8766653046679721, iteration: 200547
loss: 1.052652359008789,grad_norm: 0.894303175776744, iteration: 200548
loss: 1.1304858922958374,grad_norm: 0.999999194840984, iteration: 200549
loss: 1.0642826557159424,grad_norm: 0.9999990713502556, iteration: 200550
loss: 1.0224980115890503,grad_norm: 0.8586296190661199, iteration: 200551
loss: 1.0911481380462646,grad_norm: 0.998901241372729, iteration: 200552
loss: 1.0853736400604248,grad_norm: 0.9999998948490018, iteration: 200553
loss: 1.0958797931671143,grad_norm: 0.9999995201798728, iteration: 200554
loss: 1.0730937719345093,grad_norm: 0.9999991548707566, iteration: 200555
loss: 1.0486798286437988,grad_norm: 0.9999991559560328, iteration: 200556
loss: 1.0498683452606201,grad_norm: 0.9999992140822971, iteration: 200557
loss: 1.0621932744979858,grad_norm: 0.9999992696957467, iteration: 200558
loss: 1.1075234413146973,grad_norm: 0.9999999354309272, iteration: 200559
loss: 1.0140994787216187,grad_norm: 0.9999990173751341, iteration: 200560
loss: 1.1320884227752686,grad_norm: 0.9999991539034478, iteration: 200561
loss: 1.0631555318832397,grad_norm: 0.9999996255284649, iteration: 200562
loss: 1.0439682006835938,grad_norm: 0.9296587202479267, iteration: 200563
loss: 1.0731555223464966,grad_norm: 0.9498380232206945, iteration: 200564
loss: 1.0235917568206787,grad_norm: 0.9999991890276448, iteration: 200565
loss: 1.02655827999115,grad_norm: 0.9538440242668216, iteration: 200566
loss: 1.0491544008255005,grad_norm: 0.9999991633598138, iteration: 200567
loss: 1.0068144798278809,grad_norm: 0.9999991938682109, iteration: 200568
loss: 1.0689777135849,grad_norm: 0.9999997392279966, iteration: 200569
loss: 1.0312352180480957,grad_norm: 0.9999992477569303, iteration: 200570
loss: 1.0702073574066162,grad_norm: 0.9999991796880446, iteration: 200571
loss: 1.0236742496490479,grad_norm: 0.9999990711578932, iteration: 200572
loss: 1.0458033084869385,grad_norm: 0.9044248692982803, iteration: 200573
loss: 1.1382051706314087,grad_norm: 1.0000000289655515, iteration: 200574
loss: 1.033300757408142,grad_norm: 0.9999992793631343, iteration: 200575
loss: 1.0640789270401,grad_norm: 0.9999992750747542, iteration: 200576
loss: 1.0718823671340942,grad_norm: 0.9999997893785284, iteration: 200577
loss: 1.033389925956726,grad_norm: 0.9999989714899917, iteration: 200578
loss: 1.08931303024292,grad_norm: 0.9999995895791335, iteration: 200579
loss: 0.9981324076652527,grad_norm: 0.9999990338962063, iteration: 200580
loss: 1.024153470993042,grad_norm: 0.9999991665503121, iteration: 200581
loss: 1.0417121648788452,grad_norm: 0.9999989786196382, iteration: 200582
loss: 1.1022615432739258,grad_norm: 0.9321058912118819, iteration: 200583
loss: 1.070367455482483,grad_norm: 0.9999991895015562, iteration: 200584
loss: 1.3299956321716309,grad_norm: 0.9999996984635618, iteration: 200585
loss: 1.0198122262954712,grad_norm: 0.8448559524110975, iteration: 200586
loss: 1.051322102546692,grad_norm: 0.8539337759912516, iteration: 200587
loss: 1.058175802230835,grad_norm: 0.9240240507056289, iteration: 200588
loss: 0.9742286801338196,grad_norm: 0.9999990611943032, iteration: 200589
loss: 1.0948553085327148,grad_norm: 0.9999994153462023, iteration: 200590
loss: 1.0078396797180176,grad_norm: 0.999999289941761, iteration: 200591
loss: 1.0045629739761353,grad_norm: 0.9748047088283949, iteration: 200592
loss: 0.9953576326370239,grad_norm: 0.9999990715137922, iteration: 200593
loss: 1.1324913501739502,grad_norm: 0.9999996780279776, iteration: 200594
loss: 1.0120676755905151,grad_norm: 0.872020112901413, iteration: 200595
loss: 1.0194675922393799,grad_norm: 0.9999992648120267, iteration: 200596
loss: 1.105871319770813,grad_norm: 0.9999997575943017, iteration: 200597
loss: 1.0768147706985474,grad_norm: 0.9958024349100928, iteration: 200598
loss: 1.1975764036178589,grad_norm: 0.9999993949746058, iteration: 200599
loss: 1.148980975151062,grad_norm: 0.999999631718389, iteration: 200600
loss: 0.9790987372398376,grad_norm: 0.919959849819799, iteration: 200601
loss: 1.0312535762786865,grad_norm: 0.9133715857444619, iteration: 200602
loss: 0.9914108514785767,grad_norm: 0.9999995028711145, iteration: 200603
loss: 1.0116676092147827,grad_norm: 0.999999037779265, iteration: 200604
loss: 1.053133249282837,grad_norm: 0.9999999428012547, iteration: 200605
loss: 0.978651225566864,grad_norm: 0.8539381059906174, iteration: 200606
loss: 0.999767005443573,grad_norm: 0.9999990562940956, iteration: 200607
loss: 1.1095073223114014,grad_norm: 0.9999997488689397, iteration: 200608
loss: 1.0138310194015503,grad_norm: 0.999999071974914, iteration: 200609
loss: 1.0131402015686035,grad_norm: 0.9999992140007918, iteration: 200610
loss: 1.140593409538269,grad_norm: 0.9999996666504489, iteration: 200611
loss: 1.0353364944458008,grad_norm: 0.9999991160596795, iteration: 200612
loss: 1.0379749536514282,grad_norm: 0.9999990441983108, iteration: 200613
loss: 1.0240873098373413,grad_norm: 0.9942767003210414, iteration: 200614
loss: 1.1033272743225098,grad_norm: 0.9999992697810561, iteration: 200615
loss: 1.0214182138442993,grad_norm: 0.9675666621116593, iteration: 200616
loss: 0.9911073446273804,grad_norm: 0.9999992398893105, iteration: 200617
loss: 1.0416696071624756,grad_norm: 0.9999995071783745, iteration: 200618
loss: 0.9840161204338074,grad_norm: 0.9999991200904237, iteration: 200619
loss: 1.1083818674087524,grad_norm: 0.9999995359265546, iteration: 200620
loss: 1.01749849319458,grad_norm: 0.9999993429680065, iteration: 200621
loss: 0.9526975750923157,grad_norm: 0.9475609539548389, iteration: 200622
loss: 1.0652202367782593,grad_norm: 0.9999993452327073, iteration: 200623
loss: 1.0143587589263916,grad_norm: 0.9999991557183909, iteration: 200624
loss: 0.9967098236083984,grad_norm: 0.9718751745304882, iteration: 200625
loss: 1.0631967782974243,grad_norm: 0.9999996810010028, iteration: 200626
loss: 0.995394766330719,grad_norm: 0.9999992119876118, iteration: 200627
loss: 0.9337025880813599,grad_norm: 0.935660391433194, iteration: 200628
loss: 1.0092968940734863,grad_norm: 0.9999998572016456, iteration: 200629
loss: 1.0362945795059204,grad_norm: 0.9999990237343586, iteration: 200630
loss: 1.0342473983764648,grad_norm: 0.9999991784305771, iteration: 200631
loss: 1.023903727531433,grad_norm: 0.9772394114766725, iteration: 200632
loss: 1.0116803646087646,grad_norm: 0.9510356368530879, iteration: 200633
loss: 0.9993467926979065,grad_norm: 0.9831074094001377, iteration: 200634
loss: 0.9808218479156494,grad_norm: 0.9999991950713547, iteration: 200635
loss: 0.9992864727973938,grad_norm: 0.9999992876522439, iteration: 200636
loss: 1.007330298423767,grad_norm: 0.9492206537911989, iteration: 200637
loss: 1.0168472528457642,grad_norm: 0.9940713673042658, iteration: 200638
loss: 1.0292466878890991,grad_norm: 0.9999992331748792, iteration: 200639
loss: 0.9662854075431824,grad_norm: 0.9999992880139567, iteration: 200640
loss: 1.011004090309143,grad_norm: 0.8540119833210985, iteration: 200641
loss: 1.0254191160202026,grad_norm: 0.9999990905845921, iteration: 200642
loss: 1.0243511199951172,grad_norm: 0.9999992498219481, iteration: 200643
loss: 1.063104271888733,grad_norm: 0.9999993159961521, iteration: 200644
loss: 1.0041694641113281,grad_norm: 0.8880502179146231, iteration: 200645
loss: 1.0211188793182373,grad_norm: 0.9999994935076919, iteration: 200646
loss: 0.9694021940231323,grad_norm: 0.9999992172770967, iteration: 200647
loss: 1.0288124084472656,grad_norm: 0.9174201934481007, iteration: 200648
loss: 0.9996250867843628,grad_norm: 0.9999992544427921, iteration: 200649
loss: 1.046589732170105,grad_norm: 0.976897576454144, iteration: 200650
loss: 0.9847835898399353,grad_norm: 0.9480142106593127, iteration: 200651
loss: 0.9839562773704529,grad_norm: 0.9999992451795574, iteration: 200652
loss: 1.0047268867492676,grad_norm: 0.9052021382518032, iteration: 200653
loss: 1.0360603332519531,grad_norm: 0.9999990993543734, iteration: 200654
loss: 1.0066704750061035,grad_norm: 0.963825085340599, iteration: 200655
loss: 1.0110124349594116,grad_norm: 0.9999996562764634, iteration: 200656
loss: 0.9643995761871338,grad_norm: 0.8948280309632607, iteration: 200657
loss: 1.0610507726669312,grad_norm: 0.999999056286341, iteration: 200658
loss: 1.0267651081085205,grad_norm: 0.9999993965697672, iteration: 200659
loss: 1.0204699039459229,grad_norm: 0.9999990860548102, iteration: 200660
loss: 1.0055705308914185,grad_norm: 0.9999994646547904, iteration: 200661
loss: 1.0739686489105225,grad_norm: 0.9999997251832802, iteration: 200662
loss: 0.9990212321281433,grad_norm: 0.9999994853360034, iteration: 200663
loss: 1.071679949760437,grad_norm: 0.999998975269999, iteration: 200664
loss: 1.0195136070251465,grad_norm: 0.9606804107040421, iteration: 200665
loss: 1.0708518028259277,grad_norm: 0.9999996186445871, iteration: 200666
loss: 1.0908448696136475,grad_norm: 0.9999998744804155, iteration: 200667
loss: 1.1062716245651245,grad_norm: 1.0000000615710951, iteration: 200668
loss: 1.089644432067871,grad_norm: 0.9999991743960607, iteration: 200669
loss: 1.0299816131591797,grad_norm: 0.9864901853559617, iteration: 200670
loss: 1.066433310508728,grad_norm: 0.9999991497509588, iteration: 200671
loss: 1.003241777420044,grad_norm: 0.9359768082269929, iteration: 200672
loss: 1.010102391242981,grad_norm: 0.9999990174314658, iteration: 200673
loss: 0.9989128112792969,grad_norm: 0.8927807836926194, iteration: 200674
loss: 1.0075700283050537,grad_norm: 0.98812850103736, iteration: 200675
loss: 1.0162482261657715,grad_norm: 0.9252281830605348, iteration: 200676
loss: 1.0030711889266968,grad_norm: 0.9999994687706889, iteration: 200677
loss: 0.9916332364082336,grad_norm: 0.9999991503976705, iteration: 200678
loss: 1.0387816429138184,grad_norm: 0.9999998709071275, iteration: 200679
loss: 1.042616605758667,grad_norm: 0.9999996338900594, iteration: 200680
loss: 0.9686065316200256,grad_norm: 0.9211732112695339, iteration: 200681
loss: 0.9960668087005615,grad_norm: 0.9270084799612798, iteration: 200682
loss: 1.050479769706726,grad_norm: 0.9999994963521265, iteration: 200683
loss: 0.9979876279830933,grad_norm: 0.9082658667827483, iteration: 200684
loss: 0.997532844543457,grad_norm: 0.9999992167045682, iteration: 200685
loss: 1.0020883083343506,grad_norm: 0.9315411085272577, iteration: 200686
loss: 1.001890778541565,grad_norm: 0.918551989516062, iteration: 200687
loss: 0.982011079788208,grad_norm: 0.9999994711686695, iteration: 200688
loss: 1.027626633644104,grad_norm: 0.9999995553953512, iteration: 200689
loss: 1.1199579238891602,grad_norm: 0.9999991562990449, iteration: 200690
loss: 0.9917007684707642,grad_norm: 0.9999991100006127, iteration: 200691
loss: 0.9919978380203247,grad_norm: 0.8067639021950367, iteration: 200692
loss: 1.0417397022247314,grad_norm: 0.9999993138483418, iteration: 200693
loss: 1.0760599374771118,grad_norm: 0.9999991377794298, iteration: 200694
loss: 1.0119227170944214,grad_norm: 0.9999996199967962, iteration: 200695
loss: 0.9543368220329285,grad_norm: 0.9928373456023634, iteration: 200696
loss: 1.0282212495803833,grad_norm: 0.9999990515513484, iteration: 200697
loss: 0.9986116290092468,grad_norm: 0.9999992447186377, iteration: 200698
loss: 1.022063970565796,grad_norm: 0.8709460236933336, iteration: 200699
loss: 1.037182092666626,grad_norm: 0.8732017208173735, iteration: 200700
loss: 1.0012868642807007,grad_norm: 0.999999176058728, iteration: 200701
loss: 1.0244985818862915,grad_norm: 0.9735766767199798, iteration: 200702
loss: 1.0253939628601074,grad_norm: 0.999999186343273, iteration: 200703
loss: 1.0079386234283447,grad_norm: 0.8975837935028598, iteration: 200704
loss: 1.0299373865127563,grad_norm: 0.9999993825925224, iteration: 200705
loss: 0.9974348545074463,grad_norm: 0.9999996762202153, iteration: 200706
loss: 1.037773847579956,grad_norm: 0.9999996776847817, iteration: 200707
loss: 1.0162266492843628,grad_norm: 0.9999992676760165, iteration: 200708
loss: 0.9902058243751526,grad_norm: 0.9692314028661315, iteration: 200709
loss: 1.0421377420425415,grad_norm: 0.9999990594636248, iteration: 200710
loss: 1.1021114587783813,grad_norm: 0.9999999192617386, iteration: 200711
loss: 1.0078682899475098,grad_norm: 0.9999995049323113, iteration: 200712
loss: 1.0310966968536377,grad_norm: 0.958924306393497, iteration: 200713
loss: 1.0467702150344849,grad_norm: 0.9168240035075427, iteration: 200714
loss: 1.0537514686584473,grad_norm: 0.9999994187483643, iteration: 200715
loss: 0.9816830158233643,grad_norm: 0.8670480365281452, iteration: 200716
loss: 0.9436467289924622,grad_norm: 0.9999991177031422, iteration: 200717
loss: 0.9735567569732666,grad_norm: 0.8500198580424008, iteration: 200718
loss: 1.007910132408142,grad_norm: 0.9999990800628848, iteration: 200719
loss: 0.9928932785987854,grad_norm: 0.7754609287595343, iteration: 200720
loss: 1.0437883138656616,grad_norm: 0.9999998518048981, iteration: 200721
loss: 1.0068058967590332,grad_norm: 0.9172013598466175, iteration: 200722
loss: 1.002444863319397,grad_norm: 0.986916436072814, iteration: 200723
loss: 0.9700048565864563,grad_norm: 0.9999990842561934, iteration: 200724
loss: 1.028373122215271,grad_norm: 0.9999997724925875, iteration: 200725
loss: 1.0240095853805542,grad_norm: 0.9999999262403748, iteration: 200726
loss: 1.024173378944397,grad_norm: 0.999999152022502, iteration: 200727
loss: 1.094282865524292,grad_norm: 0.9999991912830554, iteration: 200728
loss: 0.9822145104408264,grad_norm: 0.8288450681785289, iteration: 200729
loss: 1.0167744159698486,grad_norm: 0.9999992033771561, iteration: 200730
loss: 1.0549378395080566,grad_norm: 0.9999998476094708, iteration: 200731
loss: 1.0247784852981567,grad_norm: 0.9786999556058575, iteration: 200732
loss: 0.9996786713600159,grad_norm: 0.9999989478635664, iteration: 200733
loss: 1.0473458766937256,grad_norm: 0.9999991347919694, iteration: 200734
loss: 0.9663944840431213,grad_norm: 0.9999992133602693, iteration: 200735
loss: 1.0288047790527344,grad_norm: 0.9246829295601207, iteration: 200736
loss: 1.0445895195007324,grad_norm: 0.9999990289550381, iteration: 200737
loss: 0.9982098937034607,grad_norm: 0.9903971950848597, iteration: 200738
loss: 0.992582380771637,grad_norm: 0.9999989408367459, iteration: 200739
loss: 1.0641143321990967,grad_norm: 0.9999990555396482, iteration: 200740
loss: 0.9676491618156433,grad_norm: 0.9371256096769284, iteration: 200741
loss: 1.028582215309143,grad_norm: 0.9999991417178609, iteration: 200742
loss: 1.0247036218643188,grad_norm: 0.9993008489190668, iteration: 200743
loss: 1.0050486326217651,grad_norm: 0.7608602175753019, iteration: 200744
loss: 0.9863296747207642,grad_norm: 0.8644549899540386, iteration: 200745
loss: 1.015072226524353,grad_norm: 0.9999992389584392, iteration: 200746
loss: 0.9982302188873291,grad_norm: 0.8863169809246935, iteration: 200747
loss: 0.9985413551330566,grad_norm: 0.9447460422334626, iteration: 200748
loss: 0.994972825050354,grad_norm: 0.9999995654955073, iteration: 200749
loss: 0.998468816280365,grad_norm: 0.9999992263060397, iteration: 200750
loss: 1.025100827217102,grad_norm: 0.9999990226264184, iteration: 200751
loss: 0.9961004853248596,grad_norm: 0.9349626037523023, iteration: 200752
loss: 0.9855645298957825,grad_norm: 0.9159115616487209, iteration: 200753
loss: 1.01768958568573,grad_norm: 0.9476193376113027, iteration: 200754
loss: 1.008111596107483,grad_norm: 0.9373443667246414, iteration: 200755
loss: 1.039643406867981,grad_norm: 0.9999990572340115, iteration: 200756
loss: 1.01689612865448,grad_norm: 0.9365709364177552, iteration: 200757
loss: 0.9827646613121033,grad_norm: 0.9999992613638169, iteration: 200758
loss: 1.0406519174575806,grad_norm: 0.8955790460927456, iteration: 200759
loss: 0.9699249267578125,grad_norm: 0.9999998577804707, iteration: 200760
loss: 0.9858609437942505,grad_norm: 0.9999997849587545, iteration: 200761
loss: 0.9972915649414062,grad_norm: 0.9387192479382722, iteration: 200762
loss: 1.0035109519958496,grad_norm: 0.9999991831568855, iteration: 200763
loss: 1.0112751722335815,grad_norm: 0.9999990208988307, iteration: 200764
loss: 1.0238620042800903,grad_norm: 0.846369148029682, iteration: 200765
loss: 0.9759520292282104,grad_norm: 0.9999992452962573, iteration: 200766
loss: 1.0029054880142212,grad_norm: 0.9728546622743263, iteration: 200767
loss: 1.036277413368225,grad_norm: 0.9999990734903057, iteration: 200768
loss: 1.0507107973098755,grad_norm: 0.9293890188125659, iteration: 200769
loss: 1.0114223957061768,grad_norm: 0.9999992110822972, iteration: 200770
loss: 1.0017567873001099,grad_norm: 0.9999992011817724, iteration: 200771
loss: 0.9871095418930054,grad_norm: 0.976646797113223, iteration: 200772
loss: 1.0013197660446167,grad_norm: 0.9330245292281044, iteration: 200773
loss: 0.9929059743881226,grad_norm: 0.999998975741752, iteration: 200774
loss: 1.0312654972076416,grad_norm: 0.9886978239909693, iteration: 200775
loss: 0.9876554608345032,grad_norm: 0.8731383889826778, iteration: 200776
loss: 1.006568431854248,grad_norm: 0.9999992336166229, iteration: 200777
loss: 1.068200945854187,grad_norm: 0.9091832654026468, iteration: 200778
loss: 1.0152462720870972,grad_norm: 0.9698676412084988, iteration: 200779
loss: 1.008473515510559,grad_norm: 0.9369342200752516, iteration: 200780
loss: 0.9974008202552795,grad_norm: 0.999999201470618, iteration: 200781
loss: 0.9570268392562866,grad_norm: 0.825457481560113, iteration: 200782
loss: 1.032045841217041,grad_norm: 0.955868798214067, iteration: 200783
loss: 0.9905415177345276,grad_norm: 0.9222506747156781, iteration: 200784
loss: 1.0047751665115356,grad_norm: 0.8669727636897755, iteration: 200785
loss: 1.0139895677566528,grad_norm: 0.9999999349166429, iteration: 200786
loss: 1.0066927671432495,grad_norm: 0.9999991367974431, iteration: 200787
loss: 1.0200098752975464,grad_norm: 0.9072951459457672, iteration: 200788
loss: 1.0254788398742676,grad_norm: 0.9999990402220025, iteration: 200789
loss: 0.9552172422409058,grad_norm: 0.9999990681101929, iteration: 200790
loss: 0.9985592365264893,grad_norm: 0.9999995883170342, iteration: 200791
loss: 0.9952998757362366,grad_norm: 0.8175120870745909, iteration: 200792
loss: 0.9830543398857117,grad_norm: 0.9475861405252143, iteration: 200793
loss: 0.9968727231025696,grad_norm: 0.890359999100272, iteration: 200794
loss: 0.998515248298645,grad_norm: 0.9748031638792152, iteration: 200795
loss: 0.9999409914016724,grad_norm: 0.9999989800655361, iteration: 200796
loss: 0.9827870726585388,grad_norm: 0.9509680280252378, iteration: 200797
loss: 0.997294545173645,grad_norm: 0.9999991456272564, iteration: 200798
loss: 1.0013644695281982,grad_norm: 0.9348577589033719, iteration: 200799
loss: 1.0065449476242065,grad_norm: 0.9999994527798414, iteration: 200800
loss: 0.99246746301651,grad_norm: 0.9311725405910805, iteration: 200801
loss: 1.0002250671386719,grad_norm: 0.9124849111521285, iteration: 200802
loss: 0.9738415479660034,grad_norm: 0.8114195522793857, iteration: 200803
loss: 1.012553334236145,grad_norm: 0.970842171946403, iteration: 200804
loss: 1.019551396369934,grad_norm: 0.9999989685195761, iteration: 200805
loss: 1.004225254058838,grad_norm: 0.9999991605035978, iteration: 200806
loss: 1.0538320541381836,grad_norm: 0.99999935996565, iteration: 200807
loss: 1.0097078084945679,grad_norm: 0.9086949346048189, iteration: 200808
loss: 1.1366757154464722,grad_norm: 0.9999999218228761, iteration: 200809
loss: 1.0337070226669312,grad_norm: 0.999999446505465, iteration: 200810
loss: 0.989409863948822,grad_norm: 0.874074724291465, iteration: 200811
loss: 1.0086063146591187,grad_norm: 0.847765064149208, iteration: 200812
loss: 1.1182883977890015,grad_norm: 0.9999999870601486, iteration: 200813
loss: 0.9492334723472595,grad_norm: 0.9488775753984144, iteration: 200814
loss: 1.3016550540924072,grad_norm: 0.999999499512716, iteration: 200815
loss: 1.0665379762649536,grad_norm: 0.99999925577641, iteration: 200816
loss: 1.0271549224853516,grad_norm: 0.891002770127777, iteration: 200817
loss: 0.989065945148468,grad_norm: 0.8747305215891592, iteration: 200818
loss: 1.3477776050567627,grad_norm: 0.9999997384693199, iteration: 200819
loss: 0.9777379035949707,grad_norm: 0.8983245343510802, iteration: 200820
loss: 1.0073031187057495,grad_norm: 0.8537751099886871, iteration: 200821
loss: 0.9952291250228882,grad_norm: 0.941807764635371, iteration: 200822
loss: 1.0066207647323608,grad_norm: 0.9999992044783463, iteration: 200823
loss: 1.0037877559661865,grad_norm: 0.9999991519113692, iteration: 200824
loss: 1.0048085451126099,grad_norm: 0.8432977976038577, iteration: 200825
loss: 1.0173797607421875,grad_norm: 0.9999990841567359, iteration: 200826
loss: 1.0492032766342163,grad_norm: 0.9999992083131732, iteration: 200827
loss: 0.9943833351135254,grad_norm: 0.8792406258675213, iteration: 200828
loss: 1.0550962686538696,grad_norm: 0.9999990950108381, iteration: 200829
loss: 0.9840491414070129,grad_norm: 0.9810636607711816, iteration: 200830
loss: 1.031974196434021,grad_norm: 0.8804282641939928, iteration: 200831
loss: 1.0289194583892822,grad_norm: 0.9999993194915018, iteration: 200832
loss: 1.0220705270767212,grad_norm: 0.9041949747528334, iteration: 200833
loss: 0.9726131558418274,grad_norm: 0.9999991630935389, iteration: 200834
loss: 1.0087401866912842,grad_norm: 0.9999997396829431, iteration: 200835
loss: 1.0173488855361938,grad_norm: 0.9999990750281144, iteration: 200836
loss: 1.0476264953613281,grad_norm: 0.999999405371965, iteration: 200837
loss: 1.009364366531372,grad_norm: 0.99999924443511, iteration: 200838
loss: 0.9871113896369934,grad_norm: 0.900768827833611, iteration: 200839
loss: 0.9976991415023804,grad_norm: 0.9206221382071444, iteration: 200840
loss: 1.0257595777511597,grad_norm: 0.9999999687019386, iteration: 200841
loss: 1.0396004915237427,grad_norm: 0.9516788887546096, iteration: 200842
loss: 1.0131889581680298,grad_norm: 0.9999989752144244, iteration: 200843
loss: 0.9919328093528748,grad_norm: 0.9999991620661808, iteration: 200844
loss: 0.9940273761749268,grad_norm: 0.9573087302882063, iteration: 200845
loss: 1.0356186628341675,grad_norm: 0.999999621137821, iteration: 200846
loss: 0.9835466146469116,grad_norm: 0.8565257728488456, iteration: 200847
loss: 1.0472832918167114,grad_norm: 0.8880367133860756, iteration: 200848
loss: 0.9956579208374023,grad_norm: 0.9264988967728451, iteration: 200849
loss: 1.1226999759674072,grad_norm: 0.9999993995579418, iteration: 200850
loss: 1.0775147676467896,grad_norm: 0.8220778731634968, iteration: 200851
loss: 0.9957261085510254,grad_norm: 0.9999990449627111, iteration: 200852
loss: 0.939845860004425,grad_norm: 0.9999990557368508, iteration: 200853
loss: 1.011654257774353,grad_norm: 0.999999892023795, iteration: 200854
loss: 1.0736109018325806,grad_norm: 0.9999994615326163, iteration: 200855
loss: 1.032947301864624,grad_norm: 0.9999998468448335, iteration: 200856
loss: 0.961482584476471,grad_norm: 0.9999992906245312, iteration: 200857
loss: 1.0055274963378906,grad_norm: 0.9483575063416871, iteration: 200858
loss: 1.0243308544158936,grad_norm: 0.8947458655993625, iteration: 200859
loss: 1.0101127624511719,grad_norm: 0.9999990850057293, iteration: 200860
loss: 1.010764241218567,grad_norm: 0.7796699331694855, iteration: 200861
loss: 0.9969657063484192,grad_norm: 0.9999990410689283, iteration: 200862
loss: 1.0268217325210571,grad_norm: 0.8867626190199998, iteration: 200863
loss: 0.9594354629516602,grad_norm: 0.9999991474529051, iteration: 200864
loss: 1.0265930891036987,grad_norm: 0.9999990085894749, iteration: 200865
loss: 0.9782761335372925,grad_norm: 0.9013504528399131, iteration: 200866
loss: 0.9990344047546387,grad_norm: 0.9999992383265585, iteration: 200867
loss: 1.0585672855377197,grad_norm: 0.9999991005160955, iteration: 200868
loss: 0.9966793656349182,grad_norm: 0.999998998305451, iteration: 200869
loss: 1.070331335067749,grad_norm: 0.9999991656923419, iteration: 200870
loss: 0.9576011300086975,grad_norm: 0.9988544085038517, iteration: 200871
loss: 0.9843772649765015,grad_norm: 0.99999917879915, iteration: 200872
loss: 1.013969898223877,grad_norm: 0.9141361882499071, iteration: 200873
loss: 0.9958569407463074,grad_norm: 0.9999991780445684, iteration: 200874
loss: 1.0114076137542725,grad_norm: 0.9525211820137657, iteration: 200875
loss: 0.9923794865608215,grad_norm: 0.9999990282893411, iteration: 200876
loss: 0.9944872260093689,grad_norm: 0.9414756660933297, iteration: 200877
loss: 1.017203450202942,grad_norm: 0.999999589671768, iteration: 200878
loss: 1.012160301208496,grad_norm: 0.9999990052829401, iteration: 200879
loss: 0.9869919419288635,grad_norm: 0.9178380146508164, iteration: 200880
loss: 1.0690079927444458,grad_norm: 0.9999993466638799, iteration: 200881
loss: 0.975440263748169,grad_norm: 0.9999991905750644, iteration: 200882
loss: 0.9842261075973511,grad_norm: 0.9999991498168725, iteration: 200883
loss: 0.9877649545669556,grad_norm: 0.9999992698482822, iteration: 200884
loss: 1.0126962661743164,grad_norm: 0.9773455883669007, iteration: 200885
loss: 0.9782960414886475,grad_norm: 0.9508725710460372, iteration: 200886
loss: 0.9877621531486511,grad_norm: 0.9999992285127296, iteration: 200887
loss: 0.9978384375572205,grad_norm: 0.8374476137063516, iteration: 200888
loss: 1.011789083480835,grad_norm: 0.9999990055790358, iteration: 200889
loss: 1.0227197408676147,grad_norm: 0.9204166848142289, iteration: 200890
loss: 0.9778807163238525,grad_norm: 0.9999990951054413, iteration: 200891
loss: 0.9788641333580017,grad_norm: 0.980105977708611, iteration: 200892
loss: 0.9995240569114685,grad_norm: 0.9999991143290222, iteration: 200893
loss: 1.0312447547912598,grad_norm: 0.8786965293457557, iteration: 200894
loss: 1.1366013288497925,grad_norm: 0.9421409937849122, iteration: 200895
loss: 1.0121186971664429,grad_norm: 0.8516191287166847, iteration: 200896
loss: 1.0109690427780151,grad_norm: 0.8615872793336486, iteration: 200897
loss: 0.9927544593811035,grad_norm: 0.9999990213969294, iteration: 200898
loss: 0.9763351678848267,grad_norm: 0.8519648005729338, iteration: 200899
loss: 1.0165364742279053,grad_norm: 0.9462178986966526, iteration: 200900
loss: 1.0285756587982178,grad_norm: 0.9723888847341036, iteration: 200901
loss: 1.0015774965286255,grad_norm: 0.861295453628764, iteration: 200902
loss: 0.9845335483551025,grad_norm: 0.8940582100805606, iteration: 200903
loss: 0.9782115817070007,grad_norm: 0.8692502668434129, iteration: 200904
loss: 1.0505262613296509,grad_norm: 0.9971712847958767, iteration: 200905
loss: 1.041905403137207,grad_norm: 0.999999840535436, iteration: 200906
loss: 0.9974198937416077,grad_norm: 0.9518402512209939, iteration: 200907
loss: 1.0238546133041382,grad_norm: 0.9300767660945006, iteration: 200908
loss: 1.0245524644851685,grad_norm: 0.915859282840567, iteration: 200909
loss: 1.0611628293991089,grad_norm: 0.9999991190002824, iteration: 200910
loss: 1.016554355621338,grad_norm: 0.8170696213966269, iteration: 200911
loss: 0.9509615898132324,grad_norm: 0.9999991117545098, iteration: 200912
loss: 0.9623405933380127,grad_norm: 0.9999990535814268, iteration: 200913
loss: 0.9689393043518066,grad_norm: 0.8213175893075207, iteration: 200914
loss: 1.013398289680481,grad_norm: 0.9614933062016404, iteration: 200915
loss: 0.9817994236946106,grad_norm: 0.9999990307411845, iteration: 200916
loss: 0.9912002682685852,grad_norm: 0.9325416213499342, iteration: 200917
loss: 0.9978654980659485,grad_norm: 0.968302490754667, iteration: 200918
loss: 1.0012885332107544,grad_norm: 0.9162877489773521, iteration: 200919
loss: 1.0292441844940186,grad_norm: 0.9999991552167459, iteration: 200920
loss: 1.0407497882843018,grad_norm: 0.9751082689586329, iteration: 200921
loss: 0.9960253834724426,grad_norm: 0.869271583200651, iteration: 200922
loss: 1.0181775093078613,grad_norm: 0.9842539206081465, iteration: 200923
loss: 1.0424960851669312,grad_norm: 0.999999121847056, iteration: 200924
loss: 1.0016205310821533,grad_norm: 0.9999999529806782, iteration: 200925
loss: 0.9857372641563416,grad_norm: 0.8811420632838771, iteration: 200926
loss: 1.0226614475250244,grad_norm: 0.9553877640895542, iteration: 200927
loss: 0.9900376796722412,grad_norm: 0.9271105688238155, iteration: 200928
loss: 0.9984087944030762,grad_norm: 0.9554644521800322, iteration: 200929
loss: 1.1448254585266113,grad_norm: 0.9999998483788634, iteration: 200930
loss: 0.9719637632369995,grad_norm: 0.9858128228740046, iteration: 200931
loss: 1.027298092842102,grad_norm: 0.9999994850114103, iteration: 200932
loss: 1.1349687576293945,grad_norm: 0.9999992395644679, iteration: 200933
loss: 0.9553929567337036,grad_norm: 0.926291687523621, iteration: 200934
loss: 0.9926331639289856,grad_norm: 0.9999992233720202, iteration: 200935
loss: 1.0837236642837524,grad_norm: 0.9586094468188555, iteration: 200936
loss: 0.9971848726272583,grad_norm: 0.8609748397952691, iteration: 200937
loss: 0.9877229928970337,grad_norm: 0.9999992213263977, iteration: 200938
loss: 1.0266894102096558,grad_norm: 0.8401055283237189, iteration: 200939
loss: 1.005121111869812,grad_norm: 0.8204920543739755, iteration: 200940
loss: 1.046542763710022,grad_norm: 0.9999998578238566, iteration: 200941
loss: 1.028898000717163,grad_norm: 0.9999995328918246, iteration: 200942
loss: 0.9787024855613708,grad_norm: 0.8236167433625237, iteration: 200943
loss: 0.9771652817726135,grad_norm: 0.8922211463422693, iteration: 200944
loss: 1.0245115756988525,grad_norm: 0.9971611318637664, iteration: 200945
loss: 1.0186387300491333,grad_norm: 0.9999991782072103, iteration: 200946
loss: 1.1569859981536865,grad_norm: 0.999999247700099, iteration: 200947
loss: 0.9953970313072205,grad_norm: 0.9434976454625122, iteration: 200948
loss: 0.9945724606513977,grad_norm: 0.8310078091652062, iteration: 200949
loss: 1.0254946947097778,grad_norm: 0.9999992247194941, iteration: 200950
loss: 0.9807735681533813,grad_norm: 0.9999991989965662, iteration: 200951
loss: 0.9916881322860718,grad_norm: 0.9061835014400009, iteration: 200952
loss: 0.9670689702033997,grad_norm: 0.8909088558553447, iteration: 200953
loss: 1.0154290199279785,grad_norm: 0.9999991163961223, iteration: 200954
loss: 0.9550526142120361,grad_norm: 0.9886430804945902, iteration: 200955
loss: 0.9761360883712769,grad_norm: 0.9856153509065272, iteration: 200956
loss: 1.0342541933059692,grad_norm: 0.8073697739061662, iteration: 200957
loss: 0.9814586639404297,grad_norm: 0.8233835853852391, iteration: 200958
loss: 1.0168464183807373,grad_norm: 0.8663733305012085, iteration: 200959
loss: 1.1220184564590454,grad_norm: 0.9999992204315289, iteration: 200960
loss: 0.998723030090332,grad_norm: 0.9827500029725608, iteration: 200961
loss: 0.9992156624794006,grad_norm: 0.9666004503928979, iteration: 200962
loss: 1.0159118175506592,grad_norm: 0.8888234994050463, iteration: 200963
loss: 1.0027066469192505,grad_norm: 0.9432392978127578, iteration: 200964
loss: 1.0006012916564941,grad_norm: 0.9999991416471741, iteration: 200965
loss: 0.9702510237693787,grad_norm: 0.9160353422471124, iteration: 200966
loss: 1.0650966167449951,grad_norm: 0.9999993149582088, iteration: 200967
loss: 0.9647367596626282,grad_norm: 0.999999219640333, iteration: 200968
loss: 1.011872410774231,grad_norm: 0.9679112405257465, iteration: 200969
loss: 1.0134098529815674,grad_norm: 0.9005899576551244, iteration: 200970
loss: 0.9586977958679199,grad_norm: 0.9736185527836793, iteration: 200971
loss: 1.0951814651489258,grad_norm: 0.9999996987528323, iteration: 200972
loss: 0.9668110013008118,grad_norm: 0.9052482976790387, iteration: 200973
loss: 1.0233759880065918,grad_norm: 0.9675039394879945, iteration: 200974
loss: 0.9793910980224609,grad_norm: 0.9560832006977367, iteration: 200975
loss: 1.0398014783859253,grad_norm: 0.8969429731136181, iteration: 200976
loss: 1.0837482213974,grad_norm: 0.9999999483390904, iteration: 200977
loss: 0.9782988429069519,grad_norm: 0.9660377302932364, iteration: 200978
loss: 1.005467176437378,grad_norm: 0.9999990392699446, iteration: 200979
loss: 0.9937304258346558,grad_norm: 0.8999551771712399, iteration: 200980
loss: 0.999866247177124,grad_norm: 0.9999993313749957, iteration: 200981
loss: 0.9698253273963928,grad_norm: 0.8814897299963786, iteration: 200982
loss: 1.0016709566116333,grad_norm: 0.993885799213679, iteration: 200983
loss: 1.312172293663025,grad_norm: 0.9999999195797513, iteration: 200984
loss: 1.1576675176620483,grad_norm: 0.9999993610900532, iteration: 200985
loss: 1.0071181058883667,grad_norm: 0.999998951922527, iteration: 200986
loss: 0.9684336185455322,grad_norm: 0.8581250010490037, iteration: 200987
loss: 1.0096698999404907,grad_norm: 0.9705509133102364, iteration: 200988
loss: 1.0312178134918213,grad_norm: 0.9999993047337048, iteration: 200989
loss: 0.9828017354011536,grad_norm: 0.999999008713983, iteration: 200990
loss: 1.1116869449615479,grad_norm: 0.9999994934885942, iteration: 200991
loss: 1.1259346008300781,grad_norm: 0.9999995148113977, iteration: 200992
loss: 0.9789431095123291,grad_norm: 0.8977667379433653, iteration: 200993
loss: 1.001698613166809,grad_norm: 0.8141435604759263, iteration: 200994
loss: 0.9917924404144287,grad_norm: 0.930423798488537, iteration: 200995
loss: 1.092448115348816,grad_norm: 0.9999991669292053, iteration: 200996
loss: 1.0748071670532227,grad_norm: 0.9999993155606662, iteration: 200997
loss: 1.0477800369262695,grad_norm: 0.9999990036368658, iteration: 200998
loss: 0.9938132166862488,grad_norm: 0.9102787162114487, iteration: 200999
loss: 0.9699578285217285,grad_norm: 0.919260747846619, iteration: 201000
loss: 1.0337926149368286,grad_norm: 0.9003656880575134, iteration: 201001
loss: 0.9731717705726624,grad_norm: 0.9999994642169978, iteration: 201002
loss: 1.0450729131698608,grad_norm: 0.9999996933014837, iteration: 201003
loss: 1.3827745914459229,grad_norm: 0.9999999191111416, iteration: 201004
loss: 1.0019539594650269,grad_norm: 0.9999993367512456, iteration: 201005
loss: 0.9877786636352539,grad_norm: 0.8955416471377344, iteration: 201006
loss: 1.008346676826477,grad_norm: 0.9999989411170767, iteration: 201007
loss: 1.0106918811798096,grad_norm: 0.9999991316554562, iteration: 201008
loss: 0.9732308983802795,grad_norm: 0.9999999266375754, iteration: 201009
loss: 1.0192707777023315,grad_norm: 0.9999992015359568, iteration: 201010
loss: 1.079963207244873,grad_norm: 0.9999996244864885, iteration: 201011
loss: 1.0177937746047974,grad_norm: 0.8300218349942698, iteration: 201012
loss: 1.0316027402877808,grad_norm: 0.9715837769916503, iteration: 201013
loss: 0.9880112409591675,grad_norm: 0.8763758370284768, iteration: 201014
loss: 0.9920570850372314,grad_norm: 0.7841534379942486, iteration: 201015
loss: 1.02046537399292,grad_norm: 0.9999993185282663, iteration: 201016
loss: 1.0527660846710205,grad_norm: 0.9999991091684103, iteration: 201017
loss: 0.9989856481552124,grad_norm: 0.9999990505974349, iteration: 201018
loss: 1.069655418395996,grad_norm: 0.9999991231640304, iteration: 201019
loss: 0.993154764175415,grad_norm: 0.9999989586120359, iteration: 201020
loss: 1.0184407234191895,grad_norm: 0.8638042531041273, iteration: 201021
loss: 1.0009467601776123,grad_norm: 0.9999990989882831, iteration: 201022
loss: 0.9435812830924988,grad_norm: 0.9999991037988112, iteration: 201023
loss: 0.9943533539772034,grad_norm: 0.9910388730554708, iteration: 201024
loss: 1.0230849981307983,grad_norm: 0.9918466314116844, iteration: 201025
loss: 0.9887746572494507,grad_norm: 0.9507427512994449, iteration: 201026
loss: 0.9923624396324158,grad_norm: 0.9999991043210609, iteration: 201027
loss: 1.005171537399292,grad_norm: 0.9819353034453077, iteration: 201028
loss: 0.9759045243263245,grad_norm: 0.9485780178354366, iteration: 201029
loss: 0.978904664516449,grad_norm: 0.999999219143219, iteration: 201030
loss: 1.0015454292297363,grad_norm: 0.8885012384161626, iteration: 201031
loss: 1.0908643007278442,grad_norm: 0.9744327984586504, iteration: 201032
loss: 1.0002504587173462,grad_norm: 0.9581118998428888, iteration: 201033
loss: 0.9655748605728149,grad_norm: 0.9262159793168786, iteration: 201034
loss: 1.0482697486877441,grad_norm: 0.9999992161297673, iteration: 201035
loss: 0.9716830253601074,grad_norm: 0.9989187185447796, iteration: 201036
loss: 1.0324864387512207,grad_norm: 0.9999994291415407, iteration: 201037
loss: 1.0156749486923218,grad_norm: 0.8285174835264077, iteration: 201038
loss: 0.9945931434631348,grad_norm: 0.8384226222271741, iteration: 201039
loss: 0.9993073344230652,grad_norm: 0.87768535051542, iteration: 201040
loss: 1.0021718740463257,grad_norm: 0.8893878203564547, iteration: 201041
loss: 1.0280317068099976,grad_norm: 0.8517984472873178, iteration: 201042
loss: 0.9676955938339233,grad_norm: 0.8667545435030135, iteration: 201043
loss: 1.0345765352249146,grad_norm: 0.8986136205152638, iteration: 201044
loss: 1.0018073320388794,grad_norm: 0.8159003466815898, iteration: 201045
loss: 1.05886709690094,grad_norm: 0.9999994378329833, iteration: 201046
loss: 1.0141494274139404,grad_norm: 0.9999994697015432, iteration: 201047
loss: 1.0169098377227783,grad_norm: 0.8722306391299238, iteration: 201048
loss: 1.0179598331451416,grad_norm: 0.9999992104776091, iteration: 201049
loss: 0.9982807636260986,grad_norm: 0.8716850341140338, iteration: 201050
loss: 0.9915433526039124,grad_norm: 0.7900139703806942, iteration: 201051
loss: 0.994945764541626,grad_norm: 0.8627288796306614, iteration: 201052
loss: 1.0083531141281128,grad_norm: 0.9999996221314149, iteration: 201053
loss: 1.0276752710342407,grad_norm: 0.9999990772522638, iteration: 201054
loss: 0.9515842795372009,grad_norm: 0.8121260989413082, iteration: 201055
loss: 1.0073387622833252,grad_norm: 0.8236516760523335, iteration: 201056
loss: 1.036900281906128,grad_norm: 0.9999989957872727, iteration: 201057
loss: 1.1106727123260498,grad_norm: 0.999999022884108, iteration: 201058
loss: 1.0150564908981323,grad_norm: 0.9265932274712175, iteration: 201059
loss: 0.9899013638496399,grad_norm: 0.9397979360618341, iteration: 201060
loss: 1.0256696939468384,grad_norm: 0.8906958630453957, iteration: 201061
loss: 1.0417512655258179,grad_norm: 0.9999996432325002, iteration: 201062
loss: 1.0125949382781982,grad_norm: 0.7675094219074285, iteration: 201063
loss: 0.9712511897087097,grad_norm: 0.9999989239095299, iteration: 201064
loss: 1.0378164052963257,grad_norm: 0.999999022107915, iteration: 201065
loss: 0.963103711605072,grad_norm: 0.9999991144092212, iteration: 201066
loss: 0.9836920499801636,grad_norm: 0.9083807336634245, iteration: 201067
loss: 0.9671134352684021,grad_norm: 0.9064509244432769, iteration: 201068
loss: 0.9973437786102295,grad_norm: 0.9201328576289042, iteration: 201069
loss: 0.9887306094169617,grad_norm: 0.9200504801373771, iteration: 201070
loss: 1.032971978187561,grad_norm: 0.9999994643298679, iteration: 201071
loss: 1.0052978992462158,grad_norm: 0.9999994255249364, iteration: 201072
loss: 0.9857920408248901,grad_norm: 0.9121114555635788, iteration: 201073
loss: 1.0103623867034912,grad_norm: 0.9999999004018745, iteration: 201074
loss: 1.0010910034179688,grad_norm: 0.8615076177934682, iteration: 201075
loss: 1.0249249935150146,grad_norm: 0.9999996270534676, iteration: 201076
loss: 1.0388578176498413,grad_norm: 0.9999991381805289, iteration: 201077
loss: 1.0156688690185547,grad_norm: 0.8049905431960881, iteration: 201078
loss: 0.9707823991775513,grad_norm: 0.9999990293112563, iteration: 201079
loss: 1.018261194229126,grad_norm: 0.999999260507464, iteration: 201080
loss: 1.0181705951690674,grad_norm: 0.9999991987948618, iteration: 201081
loss: 1.013198733329773,grad_norm: 0.9207112182541902, iteration: 201082
loss: 0.9919283390045166,grad_norm: 0.9113759207575387, iteration: 201083
loss: 1.021394968032837,grad_norm: 0.9999990419393626, iteration: 201084
loss: 1.0235611200332642,grad_norm: 0.9999990583841714, iteration: 201085
loss: 0.9945504069328308,grad_norm: 0.9999991648936668, iteration: 201086
loss: 0.9591819643974304,grad_norm: 0.8928025856403933, iteration: 201087
loss: 1.0977652072906494,grad_norm: 1.0000000326816092, iteration: 201088
loss: 1.013725996017456,grad_norm: 0.9999990728573542, iteration: 201089
loss: 0.9927971363067627,grad_norm: 0.9622069633441136, iteration: 201090
loss: 0.989271879196167,grad_norm: 0.9999991556349799, iteration: 201091
loss: 1.0269927978515625,grad_norm: 0.9062372612452471, iteration: 201092
loss: 0.9942906498908997,grad_norm: 0.9999989992709803, iteration: 201093
loss: 0.9875423908233643,grad_norm: 0.8679517978785589, iteration: 201094
loss: 1.0099151134490967,grad_norm: 0.9999991693900427, iteration: 201095
loss: 1.0189568996429443,grad_norm: 0.9999991112933816, iteration: 201096
loss: 1.0268157720565796,grad_norm: 0.8103742644600731, iteration: 201097
loss: 0.9925538301467896,grad_norm: 0.8307987440658564, iteration: 201098
loss: 1.0104584693908691,grad_norm: 0.889091683601554, iteration: 201099
loss: 1.0313196182250977,grad_norm: 0.8953574839746242, iteration: 201100
loss: 1.0023465156555176,grad_norm: 0.9999990496388644, iteration: 201101
loss: 0.9820684194564819,grad_norm: 0.9999994598023022, iteration: 201102
loss: 1.0388656854629517,grad_norm: 0.9999997615500796, iteration: 201103
loss: 1.0196681022644043,grad_norm: 0.9456551932608678, iteration: 201104
loss: 0.9869888424873352,grad_norm: 0.7445655081631841, iteration: 201105
loss: 1.0122205018997192,grad_norm: 0.9999991592662568, iteration: 201106
loss: 1.0248292684555054,grad_norm: 0.9999991342698465, iteration: 201107
loss: 0.9945597052574158,grad_norm: 0.9999995201626034, iteration: 201108
loss: 0.9875777959823608,grad_norm: 0.9999991596282387, iteration: 201109
loss: 0.9851491451263428,grad_norm: 0.9999991759052602, iteration: 201110
loss: 0.952580988407135,grad_norm: 0.8807942627279829, iteration: 201111
loss: 1.0030875205993652,grad_norm: 0.9999999309033107, iteration: 201112
loss: 0.9764056205749512,grad_norm: 0.9679522590084202, iteration: 201113
loss: 1.054418921470642,grad_norm: 0.9999995994723642, iteration: 201114
loss: 0.9651610255241394,grad_norm: 0.9999991053220048, iteration: 201115
loss: 1.0033031702041626,grad_norm: 0.929718704983934, iteration: 201116
loss: 0.9830470085144043,grad_norm: 0.9737895432967827, iteration: 201117
loss: 0.9956602454185486,grad_norm: 0.9454589980165012, iteration: 201118
loss: 0.9809785485267639,grad_norm: 0.8823832036557824, iteration: 201119
loss: 0.9954324960708618,grad_norm: 0.999999060191264, iteration: 201120
loss: 1.0509167909622192,grad_norm: 0.9999991371931145, iteration: 201121
loss: 0.9851168990135193,grad_norm: 0.7325081765632814, iteration: 201122
loss: 1.0270984172821045,grad_norm: 0.9999990484612409, iteration: 201123
loss: 1.0441797971725464,grad_norm: 0.9999991572097597, iteration: 201124
loss: 1.0362828969955444,grad_norm: 0.9999993509883549, iteration: 201125
loss: 0.9981208443641663,grad_norm: 0.9999992206380173, iteration: 201126
loss: 1.0332932472229004,grad_norm: 0.9506854854995599, iteration: 201127
loss: 0.9830777645111084,grad_norm: 0.9945471359892836, iteration: 201128
loss: 0.993412435054779,grad_norm: 0.8698713855229382, iteration: 201129
loss: 1.0096412897109985,grad_norm: 0.8327718182769243, iteration: 201130
loss: 1.0005300045013428,grad_norm: 0.9999990876088862, iteration: 201131
loss: 0.9854307174682617,grad_norm: 0.97316266449615, iteration: 201132
loss: 1.0087798833847046,grad_norm: 0.9496666039387374, iteration: 201133
loss: 1.0110470056533813,grad_norm: 0.7607546200355936, iteration: 201134
loss: 1.0264238119125366,grad_norm: 0.9468977330812258, iteration: 201135
loss: 1.0312341451644897,grad_norm: 0.9999998813167111, iteration: 201136
loss: 0.9923768043518066,grad_norm: 0.850234092931764, iteration: 201137
loss: 1.1229383945465088,grad_norm: 0.9999998713107715, iteration: 201138
loss: 1.0210990905761719,grad_norm: 0.9508839219925622, iteration: 201139
loss: 0.9875909090042114,grad_norm: 0.9685767414259956, iteration: 201140
loss: 0.9943676590919495,grad_norm: 0.9999990824865056, iteration: 201141
loss: 1.0305465459823608,grad_norm: 0.9999997991156246, iteration: 201142
loss: 1.0051196813583374,grad_norm: 0.9999990461324064, iteration: 201143
loss: 1.0260064601898193,grad_norm: 0.9613350585453834, iteration: 201144
loss: 0.9995244741439819,grad_norm: 0.9693744458013666, iteration: 201145
loss: 0.987467348575592,grad_norm: 0.9999991466608927, iteration: 201146
loss: 1.0363901853561401,grad_norm: 0.9998881857090067, iteration: 201147
loss: 1.0355918407440186,grad_norm: 0.9999990094541464, iteration: 201148
loss: 1.0144522190093994,grad_norm: 0.8275329374140948, iteration: 201149
loss: 1.008548378944397,grad_norm: 0.9307818468902707, iteration: 201150
loss: 0.973462700843811,grad_norm: 0.9999990476667862, iteration: 201151
loss: 1.0627825260162354,grad_norm: 0.9999993249062973, iteration: 201152
loss: 0.9967525601387024,grad_norm: 0.9745286746157675, iteration: 201153
loss: 1.1033095121383667,grad_norm: 0.9999995155019195, iteration: 201154
loss: 1.0108717679977417,grad_norm: 0.9999995791134461, iteration: 201155
loss: 1.0020195245742798,grad_norm: 0.9999991045731694, iteration: 201156
loss: 1.0125634670257568,grad_norm: 0.9999998708893669, iteration: 201157
loss: 1.033318042755127,grad_norm: 0.9999991689161728, iteration: 201158
loss: 1.0119398832321167,grad_norm: 0.9678819543683732, iteration: 201159
loss: 1.0235966444015503,grad_norm: 0.9999990378264212, iteration: 201160
loss: 0.979485273361206,grad_norm: 0.9999991341819694, iteration: 201161
loss: 0.9776684641838074,grad_norm: 0.8395335132845636, iteration: 201162
loss: 0.9850258231163025,grad_norm: 0.8927035340855471, iteration: 201163
loss: 0.9861254096031189,grad_norm: 0.9999993069577804, iteration: 201164
loss: 0.9943026900291443,grad_norm: 0.9238462296839023, iteration: 201165
loss: 1.0370330810546875,grad_norm: 0.9688709583861455, iteration: 201166
loss: 1.0409282445907593,grad_norm: 0.9431130316921109, iteration: 201167
loss: 1.0122389793395996,grad_norm: 0.8728733310057046, iteration: 201168
loss: 1.0457091331481934,grad_norm: 0.9934418759199197, iteration: 201169
loss: 0.9651026129722595,grad_norm: 0.952317547864351, iteration: 201170
loss: 0.9943404197692871,grad_norm: 0.8888472409052258, iteration: 201171
loss: 1.037297010421753,grad_norm: 0.999999106210783, iteration: 201172
loss: 0.9893556237220764,grad_norm: 0.9999997327652426, iteration: 201173
loss: 1.022274374961853,grad_norm: 0.9999995239649426, iteration: 201174
loss: 1.0119988918304443,grad_norm: 0.9340412813780306, iteration: 201175
loss: 1.0626254081726074,grad_norm: 0.9999994403649518, iteration: 201176
loss: 0.9927840232849121,grad_norm: 0.9999996312675815, iteration: 201177
loss: 1.116194486618042,grad_norm: 0.9999997493095402, iteration: 201178
loss: 1.006300926208496,grad_norm: 0.9887281029115418, iteration: 201179
loss: 1.0081825256347656,grad_norm: 0.9864649671757124, iteration: 201180
loss: 0.9907718896865845,grad_norm: 0.9999990788883483, iteration: 201181
loss: 0.9884926676750183,grad_norm: 0.9011218630679937, iteration: 201182
loss: 1.0294054746627808,grad_norm: 0.9999991406050607, iteration: 201183
loss: 1.065237283706665,grad_norm: 0.9999993785170138, iteration: 201184
loss: 1.091291069984436,grad_norm: 0.9999995337423259, iteration: 201185
loss: 0.9915427565574646,grad_norm: 0.9054049831446856, iteration: 201186
loss: 1.0119438171386719,grad_norm: 0.9999992058867622, iteration: 201187
loss: 0.9905660152435303,grad_norm: 0.9569294793558175, iteration: 201188
loss: 0.9557145237922668,grad_norm: 0.9999992165344374, iteration: 201189
loss: 1.016724705696106,grad_norm: 0.8826910385222579, iteration: 201190
loss: 0.9861924648284912,grad_norm: 0.9382686624649652, iteration: 201191
loss: 0.998482346534729,grad_norm: 0.7784274901141988, iteration: 201192
loss: 1.0213936567306519,grad_norm: 0.999999333328215, iteration: 201193
loss: 1.020481824874878,grad_norm: 0.9818427851441435, iteration: 201194
loss: 1.0030269622802734,grad_norm: 0.8434063526631262, iteration: 201195
loss: 1.060144305229187,grad_norm: 0.9999996990425093, iteration: 201196
loss: 1.0122517347335815,grad_norm: 0.8582414620354969, iteration: 201197
loss: 1.0310600996017456,grad_norm: 0.8414670336239959, iteration: 201198
loss: 1.0826427936553955,grad_norm: 0.9999996794820688, iteration: 201199
loss: 1.0162280797958374,grad_norm: 0.9999991889239629, iteration: 201200
loss: 0.9927383661270142,grad_norm: 0.7764481637003546, iteration: 201201
loss: 1.0096901655197144,grad_norm: 0.999999250703207, iteration: 201202
loss: 1.0146087408065796,grad_norm: 0.9999990569514047, iteration: 201203
loss: 0.9903603196144104,grad_norm: 0.9175804494978349, iteration: 201204
loss: 0.9912964701652527,grad_norm: 0.9970033186190251, iteration: 201205
loss: 1.0032178163528442,grad_norm: 0.9778354026186975, iteration: 201206
loss: 0.9569752216339111,grad_norm: 0.9057795124042058, iteration: 201207
loss: 0.9974134564399719,grad_norm: 0.940252359226756, iteration: 201208
loss: 1.0192639827728271,grad_norm: 0.8642818409056614, iteration: 201209
loss: 1.1328786611557007,grad_norm: 0.9999993856312945, iteration: 201210
loss: 1.0331066846847534,grad_norm: 0.9770228849050113, iteration: 201211
loss: 1.0118353366851807,grad_norm: 0.9999992573285603, iteration: 201212
loss: 1.0004687309265137,grad_norm: 0.8482926004911467, iteration: 201213
loss: 1.0118416547775269,grad_norm: 0.9440548063420504, iteration: 201214
loss: 1.0091906785964966,grad_norm: 0.9999994485701966, iteration: 201215
loss: 0.9703920483589172,grad_norm: 0.9279532610263383, iteration: 201216
loss: 1.0895068645477295,grad_norm: 0.9999999143524958, iteration: 201217
loss: 0.9938730597496033,grad_norm: 0.9999995683061228, iteration: 201218
loss: 1.0072729587554932,grad_norm: 0.999999106652156, iteration: 201219
loss: 1.0062572956085205,grad_norm: 0.8583468725132022, iteration: 201220
loss: 0.9828203320503235,grad_norm: 0.9999990946507482, iteration: 201221
loss: 0.9640331268310547,grad_norm: 0.8628948592732285, iteration: 201222
loss: 1.015723705291748,grad_norm: 0.8943773093869161, iteration: 201223
loss: 1.0066055059432983,grad_norm: 0.8416098550050495, iteration: 201224
loss: 0.9934002757072449,grad_norm: 0.9640791055554943, iteration: 201225
loss: 1.0183320045471191,grad_norm: 0.999999674157983, iteration: 201226
loss: 1.0218162536621094,grad_norm: 0.999999048366614, iteration: 201227
loss: 0.994404673576355,grad_norm: 0.9123921914795298, iteration: 201228
loss: 0.9909023642539978,grad_norm: 0.8637019801530829, iteration: 201229
loss: 0.97547447681427,grad_norm: 0.8938280802389187, iteration: 201230
loss: 1.0339909791946411,grad_norm: 0.9999989751093318, iteration: 201231
loss: 1.0106148719787598,grad_norm: 0.9999991371542477, iteration: 201232
loss: 0.971371591091156,grad_norm: 0.77555478629161, iteration: 201233
loss: 0.9593051671981812,grad_norm: 0.8561543740108891, iteration: 201234
loss: 1.0331039428710938,grad_norm: 0.9999989914746387, iteration: 201235
loss: 0.9916013479232788,grad_norm: 0.9999992979824153, iteration: 201236
loss: 0.9890109300613403,grad_norm: 0.9923750402269651, iteration: 201237
loss: 1.005173921585083,grad_norm: 0.8287209475977848, iteration: 201238
loss: 1.0290255546569824,grad_norm: 0.9999991192532338, iteration: 201239
loss: 0.9974679946899414,grad_norm: 0.9999991586971512, iteration: 201240
loss: 1.0210444927215576,grad_norm: 0.9999990364853588, iteration: 201241
loss: 0.9876070618629456,grad_norm: 0.8132486527087568, iteration: 201242
loss: 1.014746904373169,grad_norm: 0.9459242072336955, iteration: 201243
loss: 0.987274169921875,grad_norm: 0.9999992889798159, iteration: 201244
loss: 0.9623275399208069,grad_norm: 0.968070255163608, iteration: 201245
loss: 0.9968744516372681,grad_norm: 0.9595863903279664, iteration: 201246
loss: 0.9735344052314758,grad_norm: 0.932449855486931, iteration: 201247
loss: 0.9675233960151672,grad_norm: 0.9999989481697115, iteration: 201248
loss: 1.0127032995224,grad_norm: 0.9999990801374691, iteration: 201249
loss: 0.991542637348175,grad_norm: 0.9341052779987065, iteration: 201250
loss: 0.9866224527359009,grad_norm: 0.9999991531763007, iteration: 201251
loss: 1.0577000379562378,grad_norm: 0.999999404157869, iteration: 201252
loss: 1.0455107688903809,grad_norm: 0.9999993526527682, iteration: 201253
loss: 0.9628472328186035,grad_norm: 0.9398475355870125, iteration: 201254
loss: 1.065616250038147,grad_norm: 0.9999994998012542, iteration: 201255
loss: 1.0557780265808105,grad_norm: 0.9999990711922367, iteration: 201256
loss: 1.0234675407409668,grad_norm: 0.9999992347551546, iteration: 201257
loss: 1.0044732093811035,grad_norm: 0.999999135598116, iteration: 201258
loss: 0.9983944296836853,grad_norm: 0.9757077210269889, iteration: 201259
loss: 1.0599943399429321,grad_norm: 0.9999992326937002, iteration: 201260
loss: 0.9541801810264587,grad_norm: 0.9302509492255557, iteration: 201261
loss: 1.0009164810180664,grad_norm: 1.0000000191075453, iteration: 201262
loss: 1.0167503356933594,grad_norm: 0.9819761847385636, iteration: 201263
loss: 0.9691737294197083,grad_norm: 0.9999992363142352, iteration: 201264
loss: 0.9856478571891785,grad_norm: 0.7865344648667405, iteration: 201265
loss: 1.003731608390808,grad_norm: 0.9801856205379028, iteration: 201266
loss: 1.0070335865020752,grad_norm: 0.9128142951299669, iteration: 201267
loss: 1.0197830200195312,grad_norm: 0.9106382831645397, iteration: 201268
loss: 1.019192099571228,grad_norm: 0.9999990127749349, iteration: 201269
loss: 0.9835981130599976,grad_norm: 0.9999991474644432, iteration: 201270
loss: 0.964931845664978,grad_norm: 0.9999990253991352, iteration: 201271
loss: 1.0072211027145386,grad_norm: 0.8748430385618807, iteration: 201272
loss: 1.0199775695800781,grad_norm: 0.935734352072552, iteration: 201273
loss: 1.0199917554855347,grad_norm: 0.9860368275834247, iteration: 201274
loss: 0.9995637536048889,grad_norm: 0.9999992481700029, iteration: 201275
loss: 1.0091910362243652,grad_norm: 0.9827397138699149, iteration: 201276
loss: 1.00053071975708,grad_norm: 0.8576345544067643, iteration: 201277
loss: 0.981647789478302,grad_norm: 0.8943083966582498, iteration: 201278
loss: 0.9932577610015869,grad_norm: 0.9294067931543668, iteration: 201279
loss: 1.0084317922592163,grad_norm: 0.9579344251866718, iteration: 201280
loss: 1.0184860229492188,grad_norm: 0.8919425020345786, iteration: 201281
loss: 1.011110544204712,grad_norm: 0.9720643020904911, iteration: 201282
loss: 0.96207594871521,grad_norm: 0.8981942524732217, iteration: 201283
loss: 0.9708179831504822,grad_norm: 0.9668167181325539, iteration: 201284
loss: 0.9645478129386902,grad_norm: 0.8936714152721325, iteration: 201285
loss: 1.0078538656234741,grad_norm: 0.9291917261025298, iteration: 201286
loss: 0.9986278414726257,grad_norm: 0.999999349362009, iteration: 201287
loss: 1.004219651222229,grad_norm: 0.9308565760972664, iteration: 201288
loss: 0.9858922362327576,grad_norm: 0.9821811892042507, iteration: 201289
loss: 0.9868083596229553,grad_norm: 0.9999992907615065, iteration: 201290
loss: 0.9896143078804016,grad_norm: 0.7956813708912798, iteration: 201291
loss: 0.9954747557640076,grad_norm: 0.9999991822534992, iteration: 201292
loss: 1.0531961917877197,grad_norm: 0.9999994568565086, iteration: 201293
loss: 1.0118385553359985,grad_norm: 0.8385029967052439, iteration: 201294
loss: 1.0404229164123535,grad_norm: 0.9752566742147083, iteration: 201295
loss: 0.9927987456321716,grad_norm: 0.8620942658436548, iteration: 201296
loss: 1.0915902853012085,grad_norm: 0.9999993066022879, iteration: 201297
loss: 1.02113676071167,grad_norm: 0.9999990984889231, iteration: 201298
loss: 1.0150090456008911,grad_norm: 0.8801592118970621, iteration: 201299
loss: 0.9923567771911621,grad_norm: 0.9565845241193079, iteration: 201300
loss: 1.0022825002670288,grad_norm: 0.9989377281929942, iteration: 201301
loss: 1.0001674890518188,grad_norm: 0.8104695463675082, iteration: 201302
loss: 1.0047216415405273,grad_norm: 0.9999990662570735, iteration: 201303
loss: 1.0066375732421875,grad_norm: 0.9999990476050514, iteration: 201304
loss: 1.013468623161316,grad_norm: 0.9024146541705844, iteration: 201305
loss: 1.014825463294983,grad_norm: 0.899752179920162, iteration: 201306
loss: 0.9892101287841797,grad_norm: 0.8176407203321222, iteration: 201307
loss: 0.9873344302177429,grad_norm: 0.7861990827496682, iteration: 201308
loss: 1.006000280380249,grad_norm: 0.9999993294544994, iteration: 201309
loss: 1.0440603494644165,grad_norm: 0.8813782586103406, iteration: 201310
loss: 1.0156078338623047,grad_norm: 0.9999989659783842, iteration: 201311
loss: 0.9778409600257874,grad_norm: 0.9520181426236026, iteration: 201312
loss: 0.9969086647033691,grad_norm: 0.854888638203154, iteration: 201313
loss: 1.005996584892273,grad_norm: 0.8879362093026382, iteration: 201314
loss: 1.0920900106430054,grad_norm: 0.9999992381334432, iteration: 201315
loss: 1.0996311902999878,grad_norm: 0.9999993501673674, iteration: 201316
loss: 1.0124255418777466,grad_norm: 0.9334422988141936, iteration: 201317
loss: 1.0433193445205688,grad_norm: 1.000000035663422, iteration: 201318
loss: 1.024257779121399,grad_norm: 0.9999989215373019, iteration: 201319
loss: 0.9952106475830078,grad_norm: 0.9999990673934246, iteration: 201320
loss: 1.1071324348449707,grad_norm: 0.9999992262723124, iteration: 201321
loss: 1.03648841381073,grad_norm: 0.8829141237420095, iteration: 201322
loss: 0.9818286299705505,grad_norm: 0.8261998581045874, iteration: 201323
loss: 0.9804558753967285,grad_norm: 0.9234608093793509, iteration: 201324
loss: 1.00486421585083,grad_norm: 0.9999998849065204, iteration: 201325
loss: 1.024340033531189,grad_norm: 0.8581719136251814, iteration: 201326
loss: 0.9586078524589539,grad_norm: 0.7897406205990154, iteration: 201327
loss: 0.9782821536064148,grad_norm: 0.9477575507507745, iteration: 201328
loss: 0.9955035448074341,grad_norm: 0.7809162212932678, iteration: 201329
loss: 1.0262588262557983,grad_norm: 0.9999991726715937, iteration: 201330
loss: 1.0086205005645752,grad_norm: 0.9630968741792524, iteration: 201331
loss: 1.0168346166610718,grad_norm: 0.999999245492417, iteration: 201332
loss: 0.9842467308044434,grad_norm: 0.9129614358002567, iteration: 201333
loss: 0.998037576675415,grad_norm: 0.8390951290816767, iteration: 201334
loss: 1.008897304534912,grad_norm: 0.9820481615520525, iteration: 201335
loss: 0.9720163941383362,grad_norm: 0.9310454069128145, iteration: 201336
loss: 0.9889698624610901,grad_norm: 0.9999990494225593, iteration: 201337
loss: 0.9993941187858582,grad_norm: 0.8409483140523528, iteration: 201338
loss: 0.9676803350448608,grad_norm: 0.9353036354865173, iteration: 201339
loss: 1.0143324136734009,grad_norm: 0.9583254841557577, iteration: 201340
loss: 0.9997862577438354,grad_norm: 0.9999991804497402, iteration: 201341
loss: 0.9974292516708374,grad_norm: 0.8105845101425765, iteration: 201342
loss: 1.0210665464401245,grad_norm: 0.906935950288235, iteration: 201343
loss: 1.007939338684082,grad_norm: 0.9999990955525684, iteration: 201344
loss: 1.0482720136642456,grad_norm: 0.9999993835209284, iteration: 201345
loss: 1.0125502347946167,grad_norm: 0.8314557862238137, iteration: 201346
loss: 1.013999104499817,grad_norm: 0.9999989668099862, iteration: 201347
loss: 0.9309714436531067,grad_norm: 0.9999990346378942, iteration: 201348
loss: 0.9795462489128113,grad_norm: 0.9025388140129974, iteration: 201349
loss: 1.0165719985961914,grad_norm: 0.9999990612499849, iteration: 201350
loss: 1.0242780447006226,grad_norm: 0.9119927044544589, iteration: 201351
loss: 0.9775123596191406,grad_norm: 0.8662999148270505, iteration: 201352
loss: 0.9820404052734375,grad_norm: 0.9999990218089018, iteration: 201353
loss: 1.0138874053955078,grad_norm: 0.9199244150885066, iteration: 201354
loss: 1.0165938138961792,grad_norm: 0.9148943346011085, iteration: 201355
loss: 0.9616972804069519,grad_norm: 0.999999032042142, iteration: 201356
loss: 1.0005236864089966,grad_norm: 0.9999991643576441, iteration: 201357
loss: 1.0808218717575073,grad_norm: 0.9999996515747898, iteration: 201358
loss: 1.0092829465866089,grad_norm: 0.8381463648692039, iteration: 201359
loss: 0.9804984331130981,grad_norm: 0.999999166265107, iteration: 201360
loss: 1.00014066696167,grad_norm: 0.9999998831992866, iteration: 201361
loss: 1.193414330482483,grad_norm: 0.9999996790679642, iteration: 201362
loss: 0.9847056269645691,grad_norm: 0.8660790634694461, iteration: 201363
loss: 0.9962666034698486,grad_norm: 0.980669191239157, iteration: 201364
loss: 0.983029305934906,grad_norm: 0.9180330920651735, iteration: 201365
loss: 1.0122398138046265,grad_norm: 0.994325564741961, iteration: 201366
loss: 0.999615490436554,grad_norm: 0.9780329257280245, iteration: 201367
loss: 1.0398050546646118,grad_norm: 0.9999992405956162, iteration: 201368
loss: 1.027948260307312,grad_norm: 0.9767071253429466, iteration: 201369
loss: 0.9779412150382996,grad_norm: 0.9999993319399662, iteration: 201370
loss: 1.0141966342926025,grad_norm: 0.9656055809164389, iteration: 201371
loss: 0.96486496925354,grad_norm: 0.8912618932545935, iteration: 201372
loss: 1.020148515701294,grad_norm: 0.9999990931226054, iteration: 201373
loss: 1.010248064994812,grad_norm: 0.8763539909503368, iteration: 201374
loss: 0.9845380783081055,grad_norm: 0.848153777633761, iteration: 201375
loss: 1.0062388181686401,grad_norm: 0.8968710847380708, iteration: 201376
loss: 0.9737687706947327,grad_norm: 0.998428861655777, iteration: 201377
loss: 1.0300323963165283,grad_norm: 0.9835119657649358, iteration: 201378
loss: 1.02073073387146,grad_norm: 0.9999991812351651, iteration: 201379
loss: 1.009621500968933,grad_norm: 0.9999992152671422, iteration: 201380
loss: 0.994422972202301,grad_norm: 0.8443465051625723, iteration: 201381
loss: 1.0260199308395386,grad_norm: 0.877629937716528, iteration: 201382
loss: 1.0998293161392212,grad_norm: 0.9999992582770856, iteration: 201383
loss: 1.0052953958511353,grad_norm: 0.9999991396762259, iteration: 201384
loss: 1.0182325839996338,grad_norm: 0.9999995752366743, iteration: 201385
loss: 0.9778453707695007,grad_norm: 0.9999991469962457, iteration: 201386
loss: 1.0138968229293823,grad_norm: 0.9999989835713312, iteration: 201387
loss: 1.0170114040374756,grad_norm: 0.9046040417116131, iteration: 201388
loss: 1.0151643753051758,grad_norm: 0.9183898109203474, iteration: 201389
loss: 0.9884169101715088,grad_norm: 0.9999990726511508, iteration: 201390
loss: 1.0098066329956055,grad_norm: 0.9999990326900862, iteration: 201391
loss: 1.001378059387207,grad_norm: 0.9313336755050049, iteration: 201392
loss: 1.0035784244537354,grad_norm: 0.8850208149636467, iteration: 201393
loss: 1.0101654529571533,grad_norm: 0.9000394947248428, iteration: 201394
loss: 1.0146158933639526,grad_norm: 0.9999992664922556, iteration: 201395
loss: 0.9755351543426514,grad_norm: 0.9999995376752734, iteration: 201396
loss: 1.0381938219070435,grad_norm: 0.8936669844874897, iteration: 201397
loss: 0.9954177737236023,grad_norm: 0.8758832224454929, iteration: 201398
loss: 0.9703550934791565,grad_norm: 0.999999024444076, iteration: 201399
loss: 1.0090047121047974,grad_norm: 0.9225689167921661, iteration: 201400
loss: 1.0216989517211914,grad_norm: 0.7751738789845213, iteration: 201401
loss: 1.0006890296936035,grad_norm: 0.9604238428878848, iteration: 201402
loss: 0.9826800227165222,grad_norm: 0.9999990822993223, iteration: 201403
loss: 1.0142520666122437,grad_norm: 0.9999991948980622, iteration: 201404
loss: 1.0430023670196533,grad_norm: 0.9999996228188002, iteration: 201405
loss: 0.9674674868583679,grad_norm: 0.8638437749928847, iteration: 201406
loss: 1.0299856662750244,grad_norm: 0.9162710336689831, iteration: 201407
loss: 1.000514268875122,grad_norm: 0.8989335695189644, iteration: 201408
loss: 1.011008858680725,grad_norm: 0.9999992790074469, iteration: 201409
loss: 0.9663718342781067,grad_norm: 0.8715736476133825, iteration: 201410
loss: 0.9790836572647095,grad_norm: 0.8667679236854483, iteration: 201411
loss: 1.0166723728179932,grad_norm: 0.9821244917181123, iteration: 201412
loss: 1.018393635749817,grad_norm: 0.9224403818138143, iteration: 201413
loss: 0.9776453375816345,grad_norm: 0.9999992318398556, iteration: 201414
loss: 1.0377641916275024,grad_norm: 0.8402713459874193, iteration: 201415
loss: 1.0038326978683472,grad_norm: 0.9999991451648075, iteration: 201416
loss: 0.9584282636642456,grad_norm: 0.8646433429989228, iteration: 201417
loss: 0.9682613015174866,grad_norm: 0.8996116595461227, iteration: 201418
loss: 0.9939529299736023,grad_norm: 0.9404901445249914, iteration: 201419
loss: 1.010633945465088,grad_norm: 0.8001807780749652, iteration: 201420
loss: 1.0095723867416382,grad_norm: 0.9999990201258031, iteration: 201421
loss: 0.9908919930458069,grad_norm: 0.9328668329169347, iteration: 201422
loss: 1.0255390405654907,grad_norm: 0.9999992860637115, iteration: 201423
loss: 1.0795540809631348,grad_norm: 0.9999996992331998, iteration: 201424
loss: 1.005547046661377,grad_norm: 0.8699201023353278, iteration: 201425
loss: 0.9795069098472595,grad_norm: 0.803120256052265, iteration: 201426
loss: 0.9673972129821777,grad_norm: 0.8777983773944645, iteration: 201427
loss: 0.9834732413291931,grad_norm: 0.9999991461080775, iteration: 201428
loss: 1.0164073705673218,grad_norm: 0.9150261612099184, iteration: 201429
loss: 1.0216587781906128,grad_norm: 0.9999992081099349, iteration: 201430
loss: 0.9915930032730103,grad_norm: 0.8560126458508962, iteration: 201431
loss: 0.9853023290634155,grad_norm: 0.8699342567154758, iteration: 201432
loss: 0.963759183883667,grad_norm: 0.9744388478195951, iteration: 201433
loss: 0.974219560623169,grad_norm: 0.8672377867312572, iteration: 201434
loss: 0.9503543376922607,grad_norm: 0.9999991409683793, iteration: 201435
loss: 1.0348583459854126,grad_norm: 0.9999990986778575, iteration: 201436
loss: 1.0105053186416626,grad_norm: 0.9999997256574034, iteration: 201437
loss: 0.9972612857818604,grad_norm: 0.8527510725830958, iteration: 201438
loss: 0.9844456315040588,grad_norm: 0.965148941620088, iteration: 201439
loss: 0.9779865741729736,grad_norm: 0.8585634089204293, iteration: 201440
loss: 1.0069471597671509,grad_norm: 0.9999990396402108, iteration: 201441
loss: 1.1359531879425049,grad_norm: 0.9999992039413272, iteration: 201442
loss: 0.9999750256538391,grad_norm: 0.8656188872657372, iteration: 201443
loss: 1.0175130367279053,grad_norm: 0.8924577051934552, iteration: 201444
loss: 1.0232839584350586,grad_norm: 0.8601342376133249, iteration: 201445
loss: 0.9868005514144897,grad_norm: 0.9999991723545812, iteration: 201446
loss: 1.0241237878799438,grad_norm: 0.8060022173122346, iteration: 201447
loss: 1.0084954500198364,grad_norm: 0.8111690616866136, iteration: 201448
loss: 1.1273833513259888,grad_norm: 0.999999856434056, iteration: 201449
loss: 0.9940717220306396,grad_norm: 0.9647225983771209, iteration: 201450
loss: 1.0043587684631348,grad_norm: 0.9353589520949279, iteration: 201451
loss: 0.9653363823890686,grad_norm: 0.9018814501070558, iteration: 201452
loss: 1.005412220954895,grad_norm: 0.9132399150230056, iteration: 201453
loss: 0.9577810168266296,grad_norm: 0.9561888947093918, iteration: 201454
loss: 0.9685733914375305,grad_norm: 0.7770066740049736, iteration: 201455
loss: 1.0709468126296997,grad_norm: 0.9999994834752417, iteration: 201456
loss: 1.0548102855682373,grad_norm: 0.9999991429551319, iteration: 201457
loss: 1.0394015312194824,grad_norm: 0.8844794471705142, iteration: 201458
loss: 0.9679503440856934,grad_norm: 0.9275214773983663, iteration: 201459
loss: 0.9786226153373718,grad_norm: 0.9663259819069507, iteration: 201460
loss: 0.9950186610221863,grad_norm: 0.9999990888232546, iteration: 201461
loss: 1.1351182460784912,grad_norm: 0.9999996222273769, iteration: 201462
loss: 0.9895903468132019,grad_norm: 0.9498721707868379, iteration: 201463
loss: 1.017490267753601,grad_norm: 0.9253342862664894, iteration: 201464
loss: 1.0104490518569946,grad_norm: 0.9999990356505153, iteration: 201465
loss: 1.0253812074661255,grad_norm: 0.9349654491237251, iteration: 201466
loss: 1.009040117263794,grad_norm: 0.9999991491175365, iteration: 201467
loss: 1.026557445526123,grad_norm: 0.999999179339478, iteration: 201468
loss: 0.997986912727356,grad_norm: 0.9999990876753247, iteration: 201469
loss: 1.006992220878601,grad_norm: 0.9999991066697651, iteration: 201470
loss: 1.0458937883377075,grad_norm: 0.9918715919566916, iteration: 201471
loss: 1.0174551010131836,grad_norm: 0.8960051443040183, iteration: 201472
loss: 0.9955013394355774,grad_norm: 0.716621235043162, iteration: 201473
loss: 1.031821846961975,grad_norm: 0.9750818608537165, iteration: 201474
loss: 0.9939398765563965,grad_norm: 0.9999994107752644, iteration: 201475
loss: 0.9847683310508728,grad_norm: 0.8798699472608906, iteration: 201476
loss: 0.9822030663490295,grad_norm: 0.9999990823481429, iteration: 201477
loss: 1.0064013004302979,grad_norm: 0.9299068757773145, iteration: 201478
loss: 0.986409604549408,grad_norm: 0.9999990543233851, iteration: 201479
loss: 1.0368897914886475,grad_norm: 0.9762736127239475, iteration: 201480
loss: 1.028847336769104,grad_norm: 0.9999990474488245, iteration: 201481
loss: 1.043351650238037,grad_norm: 0.7981300517699712, iteration: 201482
loss: 0.989255964756012,grad_norm: 0.9067851884839219, iteration: 201483
loss: 1.0207445621490479,grad_norm: 0.9999993564170632, iteration: 201484
loss: 1.0032705068588257,grad_norm: 0.8780827976516665, iteration: 201485
loss: 0.9786536693572998,grad_norm: 0.999999230891401, iteration: 201486
loss: 1.0059287548065186,grad_norm: 0.9999991547224714, iteration: 201487
loss: 1.0397125482559204,grad_norm: 0.999999185943222, iteration: 201488
loss: 1.0162731409072876,grad_norm: 0.9132080623668934, iteration: 201489
loss: 0.9842323064804077,grad_norm: 0.9999989210524731, iteration: 201490
loss: 1.071786642074585,grad_norm: 0.9366286134762173, iteration: 201491
loss: 1.0000004768371582,grad_norm: 0.9999990509097755, iteration: 201492
loss: 0.9986116290092468,grad_norm: 0.9105714488327086, iteration: 201493
loss: 1.00125253200531,grad_norm: 0.8996353003537247, iteration: 201494
loss: 0.9907108545303345,grad_norm: 0.9947258056344079, iteration: 201495
loss: 0.9674859642982483,grad_norm: 0.9999991884028174, iteration: 201496
loss: 0.9943157434463501,grad_norm: 0.9999990796921134, iteration: 201497
loss: 1.0755873918533325,grad_norm: 0.9999994544605176, iteration: 201498
loss: 1.0154355764389038,grad_norm: 0.9158053908877224, iteration: 201499
loss: 0.9954403042793274,grad_norm: 0.9999989576048941, iteration: 201500
loss: 1.0091009140014648,grad_norm: 0.8361867983178628, iteration: 201501
loss: 1.0034235715866089,grad_norm: 0.8606987659343174, iteration: 201502
loss: 1.013145089149475,grad_norm: 0.9999990562306829, iteration: 201503
loss: 0.9967933893203735,grad_norm: 0.9999989389329413, iteration: 201504
loss: 0.9699613451957703,grad_norm: 0.8546039171972806, iteration: 201505
loss: 1.0044379234313965,grad_norm: 0.6591166461024325, iteration: 201506
loss: 0.9979695677757263,grad_norm: 0.8169090544313973, iteration: 201507
loss: 1.0039829015731812,grad_norm: 0.8564169354014572, iteration: 201508
loss: 1.0486232042312622,grad_norm: 0.9171333090278082, iteration: 201509
loss: 0.9597638249397278,grad_norm: 0.8768218925528314, iteration: 201510
loss: 0.9759140610694885,grad_norm: 0.7664695224244541, iteration: 201511
loss: 0.9956944584846497,grad_norm: 0.999999183424439, iteration: 201512
loss: 0.9964356422424316,grad_norm: 0.9999990790798482, iteration: 201513
loss: 0.9847424030303955,grad_norm: 0.9999990427613362, iteration: 201514
loss: 0.9883064031600952,grad_norm: 0.9373568716286436, iteration: 201515
loss: 1.0245475769042969,grad_norm: 0.8715629522948959, iteration: 201516
loss: 1.0102459192276,grad_norm: 0.842938162362736, iteration: 201517
loss: 1.0269834995269775,grad_norm: 0.9999992284803431, iteration: 201518
loss: 1.002219796180725,grad_norm: 0.9999990679576325, iteration: 201519
loss: 1.0112287998199463,grad_norm: 0.8636763439547802, iteration: 201520
loss: 0.9925650358200073,grad_norm: 0.8987699888772478, iteration: 201521
loss: 1.0103812217712402,grad_norm: 0.9999990631878289, iteration: 201522
loss: 1.0310442447662354,grad_norm: 0.9999991239630335, iteration: 201523
loss: 1.0836938619613647,grad_norm: 0.9898881615523631, iteration: 201524
loss: 0.9876474738121033,grad_norm: 0.9121070269545857, iteration: 201525
loss: 1.2141400575637817,grad_norm: 0.9999993961322914, iteration: 201526
loss: 0.9533607363700867,grad_norm: 0.8953364442131673, iteration: 201527
loss: 0.9726885557174683,grad_norm: 0.9999992424346802, iteration: 201528
loss: 0.9808403849601746,grad_norm: 0.9999990576900287, iteration: 201529
loss: 1.0244370698928833,grad_norm: 0.9999989578426987, iteration: 201530
loss: 1.011093258857727,grad_norm: 0.9999992537110546, iteration: 201531
loss: 1.1012083292007446,grad_norm: 0.9999996081045054, iteration: 201532
loss: 0.9845433235168457,grad_norm: 0.9648824253250752, iteration: 201533
loss: 0.9702314138412476,grad_norm: 0.8342470553592449, iteration: 201534
loss: 1.072732925415039,grad_norm: 0.9999998619704472, iteration: 201535
loss: 1.006005883216858,grad_norm: 0.9999992548658563, iteration: 201536
loss: 0.9902358651161194,grad_norm: 0.9466799751543958, iteration: 201537
loss: 0.994709849357605,grad_norm: 0.925351989762996, iteration: 201538
loss: 0.9503441452980042,grad_norm: 0.8826698022204781, iteration: 201539
loss: 1.0168952941894531,grad_norm: 0.9999991755727967, iteration: 201540
loss: 0.9537222385406494,grad_norm: 0.9685216680816807, iteration: 201541
loss: 1.0054699182510376,grad_norm: 0.9999992359442083, iteration: 201542
loss: 0.9870613217353821,grad_norm: 0.9999994097777319, iteration: 201543
loss: 1.0003119707107544,grad_norm: 0.970961415107522, iteration: 201544
loss: 0.9677536487579346,grad_norm: 0.9239827870292538, iteration: 201545
loss: 1.0173730850219727,grad_norm: 0.9999992686786298, iteration: 201546
loss: 0.9679123759269714,grad_norm: 0.9999990924251024, iteration: 201547
loss: 0.9786468148231506,grad_norm: 0.9890403140992501, iteration: 201548
loss: 0.999619722366333,grad_norm: 0.9999990783330064, iteration: 201549
loss: 0.9700828790664673,grad_norm: 0.9999990517472821, iteration: 201550
loss: 0.9999816417694092,grad_norm: 0.9007502378219963, iteration: 201551
loss: 0.9980109333992004,grad_norm: 0.998928895967228, iteration: 201552
loss: 1.0007470846176147,grad_norm: 0.9999989925063391, iteration: 201553
loss: 0.9634374976158142,grad_norm: 0.8297371955978864, iteration: 201554
loss: 1.0141303539276123,grad_norm: 0.9999990759210097, iteration: 201555
loss: 0.9809043407440186,grad_norm: 0.9999991387818533, iteration: 201556
loss: 1.0037851333618164,grad_norm: 0.9999990870790642, iteration: 201557
loss: 0.9935574531555176,grad_norm: 0.9999996663468935, iteration: 201558
loss: 1.071370244026184,grad_norm: 0.9681580447404148, iteration: 201559
loss: 0.991325855255127,grad_norm: 0.8982929079981253, iteration: 201560
loss: 1.0522360801696777,grad_norm: 0.9999996370607585, iteration: 201561
loss: 0.9977465867996216,grad_norm: 0.9999990634188837, iteration: 201562
loss: 0.9586133360862732,grad_norm: 0.9757072404257935, iteration: 201563
loss: 1.0001444816589355,grad_norm: 0.9744444044051015, iteration: 201564
loss: 0.9943938255310059,grad_norm: 0.8752250825986759, iteration: 201565
loss: 1.0027103424072266,grad_norm: 0.9285872207022328, iteration: 201566
loss: 0.9853518605232239,grad_norm: 0.9999990638837979, iteration: 201567
loss: 1.051986575126648,grad_norm: 0.9999990805174406, iteration: 201568
loss: 1.0098254680633545,grad_norm: 0.9246009140098959, iteration: 201569
loss: 0.9897671341896057,grad_norm: 0.9463507191382358, iteration: 201570
loss: 1.037075161933899,grad_norm: 0.9078343881666779, iteration: 201571
loss: 0.9980932474136353,grad_norm: 0.8493759156140285, iteration: 201572
loss: 1.02719247341156,grad_norm: 0.928810659498271, iteration: 201573
loss: 1.0142393112182617,grad_norm: 0.9999991837856431, iteration: 201574
loss: 1.0221554040908813,grad_norm: 0.85319976036394, iteration: 201575
loss: 0.980164110660553,grad_norm: 0.8986140861125005, iteration: 201576
loss: 0.9876471757888794,grad_norm: 0.9089330246525545, iteration: 201577
loss: 0.988660454750061,grad_norm: 0.9999990428760345, iteration: 201578
loss: 0.9713118076324463,grad_norm: 0.9399965709824362, iteration: 201579
loss: 1.0352989435195923,grad_norm: 0.9999991630108283, iteration: 201580
loss: 0.9746923446655273,grad_norm: 0.9967744108137434, iteration: 201581
loss: 1.0018244981765747,grad_norm: 0.8950947545187454, iteration: 201582
loss: 1.0273994207382202,grad_norm: 0.771846866307929, iteration: 201583
loss: 1.0006906986236572,grad_norm: 0.9345795301617826, iteration: 201584
loss: 0.9842167496681213,grad_norm: 0.9841825458513815, iteration: 201585
loss: 1.0014210939407349,grad_norm: 0.9409166046268188, iteration: 201586
loss: 1.073518991470337,grad_norm: 0.9999990596750626, iteration: 201587
loss: 1.009935975074768,grad_norm: 0.9999992957679156, iteration: 201588
loss: 1.0079065561294556,grad_norm: 0.9999990078330611, iteration: 201589
loss: 1.0136884450912476,grad_norm: 0.9190535361703261, iteration: 201590
loss: 1.0279533863067627,grad_norm: 0.9999990877240278, iteration: 201591
loss: 0.9780029654502869,grad_norm: 0.8597235668520766, iteration: 201592
loss: 1.072889804840088,grad_norm: 0.9999997777703264, iteration: 201593
loss: 1.010157585144043,grad_norm: 0.9163139338851325, iteration: 201594
loss: 0.9737125635147095,grad_norm: 0.9983256413228592, iteration: 201595
loss: 1.0651826858520508,grad_norm: 0.9999996282980804, iteration: 201596
loss: 1.0003057718276978,grad_norm: 0.9999990589648751, iteration: 201597
loss: 1.0098637342453003,grad_norm: 0.9999997588878125, iteration: 201598
loss: 1.0545142889022827,grad_norm: 0.8979644072690024, iteration: 201599
loss: 0.9909263849258423,grad_norm: 0.9999991458587965, iteration: 201600
loss: 1.0037927627563477,grad_norm: 0.9999989148740209, iteration: 201601
loss: 1.0399494171142578,grad_norm: 0.9179500092836413, iteration: 201602
loss: 0.9903299808502197,grad_norm: 0.9999990333710602, iteration: 201603
loss: 1.0093392133712769,grad_norm: 0.9999990959232301, iteration: 201604
loss: 1.030841588973999,grad_norm: 0.9999991281423682, iteration: 201605
loss: 0.9874356389045715,grad_norm: 0.8847015010590071, iteration: 201606
loss: 1.0245345830917358,grad_norm: 0.9757746033678911, iteration: 201607
loss: 0.9688467979431152,grad_norm: 0.9999989931197559, iteration: 201608
loss: 0.9961854219436646,grad_norm: 0.9999991270227993, iteration: 201609
loss: 0.9739168286323547,grad_norm: 0.8918298412320331, iteration: 201610
loss: 0.9884669780731201,grad_norm: 0.9999990278683489, iteration: 201611
loss: 0.9907568097114563,grad_norm: 0.9345848067406028, iteration: 201612
loss: 1.024957299232483,grad_norm: 0.9226094205080784, iteration: 201613
loss: 1.0593329668045044,grad_norm: 0.999999422792181, iteration: 201614
loss: 1.0167442560195923,grad_norm: 0.8325491464138215, iteration: 201615
loss: 1.0046353340148926,grad_norm: 0.9999991013213205, iteration: 201616
loss: 1.0036269426345825,grad_norm: 0.8884333416608563, iteration: 201617
loss: 0.9964888095855713,grad_norm: 0.9634802024660183, iteration: 201618
loss: 0.9977530241012573,grad_norm: 0.9999990773944586, iteration: 201619
loss: 1.0125768184661865,grad_norm: 0.9983664706354767, iteration: 201620
loss: 1.0548913478851318,grad_norm: 0.9999991913686731, iteration: 201621
loss: 1.0996944904327393,grad_norm: 0.9999990061516582, iteration: 201622
loss: 1.1044127941131592,grad_norm: 0.9999999429280365, iteration: 201623
loss: 1.0157550573349,grad_norm: 0.9861111535666729, iteration: 201624
loss: 0.9831163883209229,grad_norm: 0.7738506667857271, iteration: 201625
loss: 1.0059537887573242,grad_norm: 0.9999991315829749, iteration: 201626
loss: 1.0150504112243652,grad_norm: 0.9999991392960648, iteration: 201627
loss: 0.9803770184516907,grad_norm: 0.8917979441378423, iteration: 201628
loss: 0.9815877079963684,grad_norm: 0.8958743878939375, iteration: 201629
loss: 0.9983399510383606,grad_norm: 0.8404858064008283, iteration: 201630
loss: 0.9908871054649353,grad_norm: 0.895151518551199, iteration: 201631
loss: 1.0020467042922974,grad_norm: 0.9999997020812155, iteration: 201632
loss: 0.9993177056312561,grad_norm: 0.7977196495565556, iteration: 201633
loss: 1.0127147436141968,grad_norm: 0.9999991941782805, iteration: 201634
loss: 1.006891131401062,grad_norm: 0.9095135472521848, iteration: 201635
loss: 1.0095598697662354,grad_norm: 0.9999991286221209, iteration: 201636
loss: 1.0420551300048828,grad_norm: 0.9999994858681418, iteration: 201637
loss: 0.9886317849159241,grad_norm: 0.9999994240673131, iteration: 201638
loss: 1.0035487413406372,grad_norm: 0.999999260545171, iteration: 201639
loss: 1.063019871711731,grad_norm: 0.9999998221135596, iteration: 201640
loss: 1.0555157661437988,grad_norm: 0.8763530985041197, iteration: 201641
loss: 0.9753244519233704,grad_norm: 0.9999990578379538, iteration: 201642
loss: 1.0169223546981812,grad_norm: 0.9966207603482246, iteration: 201643
loss: 0.9980767965316772,grad_norm: 0.8706628641351167, iteration: 201644
loss: 1.0379575490951538,grad_norm: 0.9999995478681806, iteration: 201645
loss: 0.9785186648368835,grad_norm: 0.861498764093482, iteration: 201646
loss: 0.9998807907104492,grad_norm: 0.9999999792382108, iteration: 201647
loss: 0.9876216053962708,grad_norm: 0.9999990467516858, iteration: 201648
loss: 0.990283191204071,grad_norm: 0.8526711501620874, iteration: 201649
loss: 1.0855132341384888,grad_norm: 0.9999991391532227, iteration: 201650
loss: 1.0165399312973022,grad_norm: 0.9999991551913782, iteration: 201651
loss: 1.0514765977859497,grad_norm: 0.9999990387729257, iteration: 201652
loss: 1.0995292663574219,grad_norm: 0.9999991470250831, iteration: 201653
loss: 1.1338099241256714,grad_norm: 0.9999997955595151, iteration: 201654
loss: 1.0464874505996704,grad_norm: 0.9999991189459285, iteration: 201655
loss: 0.9984890222549438,grad_norm: 0.7467302539134786, iteration: 201656
loss: 1.0271110534667969,grad_norm: 0.9519516698347852, iteration: 201657
loss: 0.987895131111145,grad_norm: 0.9487345115127065, iteration: 201658
loss: 1.0597318410873413,grad_norm: 0.9999992178765078, iteration: 201659
loss: 1.019805908203125,grad_norm: 0.9999991398664536, iteration: 201660
loss: 1.0487881898880005,grad_norm: 0.8705599739462274, iteration: 201661
loss: 1.008148193359375,grad_norm: 0.9999991168720473, iteration: 201662
loss: 1.011048674583435,grad_norm: 0.9999998169169365, iteration: 201663
loss: 0.9905959367752075,grad_norm: 0.9999995948649788, iteration: 201664
loss: 1.0389089584350586,grad_norm: 0.9999996248145582, iteration: 201665
loss: 1.037093997001648,grad_norm: 0.9999997385424996, iteration: 201666
loss: 1.006332278251648,grad_norm: 0.9946310275115283, iteration: 201667
loss: 1.0392305850982666,grad_norm: 0.9999991591221612, iteration: 201668
loss: 1.01663339138031,grad_norm: 0.9312437941686638, iteration: 201669
loss: 1.043716549873352,grad_norm: 0.9999998710697826, iteration: 201670
loss: 0.9655004143714905,grad_norm: 0.9163624498978807, iteration: 201671
loss: 0.9974124431610107,grad_norm: 0.8594965692900889, iteration: 201672
loss: 1.0495140552520752,grad_norm: 0.9999991362622817, iteration: 201673
loss: 1.052195429801941,grad_norm: 0.9945612767684349, iteration: 201674
loss: 1.0393667221069336,grad_norm: 0.9999991150080417, iteration: 201675
loss: 1.0308328866958618,grad_norm: 0.9999991504738199, iteration: 201676
loss: 1.022195816040039,grad_norm: 0.7102111252717969, iteration: 201677
loss: 1.0008957386016846,grad_norm: 0.9287040945328002, iteration: 201678
loss: 1.0077321529388428,grad_norm: 0.7851150701154936, iteration: 201679
loss: 1.0003340244293213,grad_norm: 0.99999923492032, iteration: 201680
loss: 0.9993047118186951,grad_norm: 0.8276908821118297, iteration: 201681
loss: 1.0590068101882935,grad_norm: 0.999999175624449, iteration: 201682
loss: 0.9728347659111023,grad_norm: 0.9999991752757023, iteration: 201683
loss: 1.0216329097747803,grad_norm: 0.8606766112724727, iteration: 201684
loss: 0.9841921925544739,grad_norm: 0.9684245898396177, iteration: 201685
loss: 1.120256781578064,grad_norm: 0.9999999092824, iteration: 201686
loss: 0.9752273559570312,grad_norm: 0.9999993691885816, iteration: 201687
loss: 1.0078206062316895,grad_norm: 0.7955789340044118, iteration: 201688
loss: 1.041711449623108,grad_norm: 0.9242817035934554, iteration: 201689
loss: 0.9994602203369141,grad_norm: 0.8849739239520127, iteration: 201690
loss: 0.9608326554298401,grad_norm: 0.8756507513330216, iteration: 201691
loss: 0.9901340007781982,grad_norm: 0.8406510862878197, iteration: 201692
loss: 1.0617412328720093,grad_norm: 0.848513923443043, iteration: 201693
loss: 1.0158299207687378,grad_norm: 0.9630141513950343, iteration: 201694
loss: 1.023144006729126,grad_norm: 0.8612973106843999, iteration: 201695
loss: 1.0182204246520996,grad_norm: 0.8635749104144778, iteration: 201696
loss: 0.9740957021713257,grad_norm: 0.8641927110875549, iteration: 201697
loss: 1.0051586627960205,grad_norm: 0.8781525371608887, iteration: 201698
loss: 1.016669511795044,grad_norm: 0.8701595461330606, iteration: 201699
loss: 1.0098388195037842,grad_norm: 0.9657848265510345, iteration: 201700
loss: 1.0088343620300293,grad_norm: 0.9999990259736575, iteration: 201701
loss: 1.0461894273757935,grad_norm: 0.9999992780977044, iteration: 201702
loss: 0.9811738729476929,grad_norm: 0.8840694058918673, iteration: 201703
loss: 1.006858229637146,grad_norm: 0.9999991157218913, iteration: 201704
loss: 0.9850538372993469,grad_norm: 0.979259501849893, iteration: 201705
loss: 1.0068100690841675,grad_norm: 0.9917270278488353, iteration: 201706
loss: 0.9818742275238037,grad_norm: 0.9999991336154482, iteration: 201707
loss: 0.9890591502189636,grad_norm: 0.8423272850427636, iteration: 201708
loss: 0.9976009130477905,grad_norm: 0.9999993742867613, iteration: 201709
loss: 1.013181209564209,grad_norm: 0.9943084325102977, iteration: 201710
loss: 1.0213090181350708,grad_norm: 0.8841536099106764, iteration: 201711
loss: 1.0196175575256348,grad_norm: 0.9999990621188048, iteration: 201712
loss: 0.9789038896560669,grad_norm: 0.8986175024798242, iteration: 201713
loss: 1.0097613334655762,grad_norm: 0.9467257806905643, iteration: 201714
loss: 0.9701811075210571,grad_norm: 0.8794712603830201, iteration: 201715
loss: 0.978607177734375,grad_norm: 0.9038270752080888, iteration: 201716
loss: 0.9706306457519531,grad_norm: 0.999999274944456, iteration: 201717
loss: 1.0020216703414917,grad_norm: 0.9999991123385854, iteration: 201718
loss: 1.0367258787155151,grad_norm: 0.999999526323724, iteration: 201719
loss: 1.0071063041687012,grad_norm: 0.8324948697492374, iteration: 201720
loss: 0.9702130556106567,grad_norm: 0.8895631750564906, iteration: 201721
loss: 1.0253074169158936,grad_norm: 0.9999990580659327, iteration: 201722
loss: 1.0065675973892212,grad_norm: 0.8673712965056025, iteration: 201723
loss: 1.0171905755996704,grad_norm: 0.9999997696312395, iteration: 201724
loss: 0.9870344996452332,grad_norm: 0.9999991019727933, iteration: 201725
loss: 0.9807373285293579,grad_norm: 0.9046708026440847, iteration: 201726
loss: 1.050511360168457,grad_norm: 0.999999212560492, iteration: 201727
loss: 0.9585490822792053,grad_norm: 0.9886183947496385, iteration: 201728
loss: 1.138132095336914,grad_norm: 0.9999993555762821, iteration: 201729
loss: 0.9819034934043884,grad_norm: 0.8284462650142795, iteration: 201730
loss: 1.0235110521316528,grad_norm: 0.9543755061058846, iteration: 201731
loss: 1.0260167121887207,grad_norm: 0.9999992769357237, iteration: 201732
loss: 1.0106760263442993,grad_norm: 0.8861765736899966, iteration: 201733
loss: 0.9852319359779358,grad_norm: 0.9409081100891594, iteration: 201734
loss: 1.0525652170181274,grad_norm: 0.897557246319341, iteration: 201735
loss: 0.9837927222251892,grad_norm: 0.9704762782472052, iteration: 201736
loss: 0.9584767818450928,grad_norm: 0.9245874154164828, iteration: 201737
loss: 1.0110366344451904,grad_norm: 0.8703617669803975, iteration: 201738
loss: 1.0249751806259155,grad_norm: 0.9999998301347778, iteration: 201739
loss: 1.0142933130264282,grad_norm: 0.9320330871759133, iteration: 201740
loss: 0.9976300597190857,grad_norm: 0.8781450346128971, iteration: 201741
loss: 1.0509247779846191,grad_norm: 0.9999994596440488, iteration: 201742
loss: 1.0275263786315918,grad_norm: 0.9999991350127858, iteration: 201743
loss: 0.9971667528152466,grad_norm: 0.9720700915246739, iteration: 201744
loss: 1.0249370336532593,grad_norm: 0.9999990571487447, iteration: 201745
loss: 1.027611255645752,grad_norm: 0.9413311297970125, iteration: 201746
loss: 1.0127750635147095,grad_norm: 0.966164386817129, iteration: 201747
loss: 1.0136888027191162,grad_norm: 0.8006151475202399, iteration: 201748
loss: 1.0071897506713867,grad_norm: 0.993172328659011, iteration: 201749
loss: 0.9962615966796875,grad_norm: 0.9136267538659669, iteration: 201750
loss: 1.0416439771652222,grad_norm: 0.8240151140048073, iteration: 201751
loss: 1.0394024848937988,grad_norm: 0.993803204607856, iteration: 201752
loss: 1.050516128540039,grad_norm: 0.9999997720578507, iteration: 201753
loss: 1.0178664922714233,grad_norm: 0.9840021746140601, iteration: 201754
loss: 1.0740840435028076,grad_norm: 0.9875834946410958, iteration: 201755
loss: 1.1562309265136719,grad_norm: 0.9999999633077551, iteration: 201756
loss: 1.017429232597351,grad_norm: 0.841779334608722, iteration: 201757
loss: 1.011345386505127,grad_norm: 0.9999992702667996, iteration: 201758
loss: 1.0030709505081177,grad_norm: 0.9407606734725433, iteration: 201759
loss: 0.967644453048706,grad_norm: 0.9390448933566846, iteration: 201760
loss: 0.9969407916069031,grad_norm: 0.9999989358349366, iteration: 201761
loss: 0.9903044104576111,grad_norm: 0.850490182921412, iteration: 201762
loss: 1.0553388595581055,grad_norm: 0.97294455203988, iteration: 201763
loss: 0.9779747724533081,grad_norm: 0.9999990880930794, iteration: 201764
loss: 1.0085710287094116,grad_norm: 0.9999992456382882, iteration: 201765
loss: 0.9503729343414307,grad_norm: 0.9115033257066023, iteration: 201766
loss: 1.066192626953125,grad_norm: 0.8540108873793912, iteration: 201767
loss: 0.9872643351554871,grad_norm: 0.9999992589557822, iteration: 201768
loss: 1.008158564567566,grad_norm: 0.9441060041082052, iteration: 201769
loss: 1.0096666812896729,grad_norm: 0.7552115304320229, iteration: 201770
loss: 0.9770336747169495,grad_norm: 0.7753169902334899, iteration: 201771
loss: 0.9901743531227112,grad_norm: 0.686111106492718, iteration: 201772
loss: 0.9821885228157043,grad_norm: 0.9999990524805219, iteration: 201773
loss: 1.02245032787323,grad_norm: 0.9999990795981981, iteration: 201774
loss: 1.0219478607177734,grad_norm: 0.9999992819239166, iteration: 201775
loss: 1.0257604122161865,grad_norm: 0.9093112009572466, iteration: 201776
loss: 1.1095677614212036,grad_norm: 0.9999997918551023, iteration: 201777
loss: 0.9521209597587585,grad_norm: 0.9811878955616905, iteration: 201778
loss: 0.9854605197906494,grad_norm: 0.9572056059685161, iteration: 201779
loss: 1.0066778659820557,grad_norm: 0.8692479608820218, iteration: 201780
loss: 1.0076464414596558,grad_norm: 0.9999992457563773, iteration: 201781
loss: 0.9790539741516113,grad_norm: 0.8231036195892794, iteration: 201782
loss: 1.0080547332763672,grad_norm: 0.9996655165980627, iteration: 201783
loss: 1.0006123781204224,grad_norm: 0.9400219629141959, iteration: 201784
loss: 1.0011889934539795,grad_norm: 0.9999992079891307, iteration: 201785
loss: 1.008236050605774,grad_norm: 0.982152611085392, iteration: 201786
loss: 1.0311583280563354,grad_norm: 0.9999992026127218, iteration: 201787
loss: 0.975993812084198,grad_norm: 0.867709145865834, iteration: 201788
loss: 0.996099054813385,grad_norm: 0.9646020447036733, iteration: 201789
loss: 1.00821053981781,grad_norm: 0.9999992388787539, iteration: 201790
loss: 1.001268744468689,grad_norm: 0.9999992082280506, iteration: 201791
loss: 0.9838501811027527,grad_norm: 0.9238752198147118, iteration: 201792
loss: 0.9798774123191833,grad_norm: 0.9511349915534039, iteration: 201793
loss: 0.9560273289680481,grad_norm: 0.8732249413717335, iteration: 201794
loss: 0.9924355745315552,grad_norm: 0.9999108131693691, iteration: 201795
loss: 1.0079801082611084,grad_norm: 0.8607946768797696, iteration: 201796
loss: 1.0070722103118896,grad_norm: 0.8725750494068202, iteration: 201797
loss: 1.061113953590393,grad_norm: 0.9999991626202628, iteration: 201798
loss: 1.0006706714630127,grad_norm: 0.9645157708405271, iteration: 201799
loss: 1.055554747581482,grad_norm: 0.999999784851993, iteration: 201800
loss: 0.9678640365600586,grad_norm: 0.9991680372577135, iteration: 201801
loss: 0.9626768827438354,grad_norm: 0.9999993723629274, iteration: 201802
loss: 0.9599597454071045,grad_norm: 0.9074701615255327, iteration: 201803
loss: 0.9743444919586182,grad_norm: 0.8898560748653883, iteration: 201804
loss: 0.9723732471466064,grad_norm: 0.9999991784883783, iteration: 201805
loss: 1.0224339962005615,grad_norm: 0.7993907672211433, iteration: 201806
loss: 1.0178273916244507,grad_norm: 0.8596295863198293, iteration: 201807
loss: 1.0203969478607178,grad_norm: 0.9999994815850236, iteration: 201808
loss: 1.002837061882019,grad_norm: 0.8905435366834878, iteration: 201809
loss: 0.9940152764320374,grad_norm: 0.9833485668539586, iteration: 201810
loss: 1.035488247871399,grad_norm: 0.9269818159186005, iteration: 201811
loss: 0.9859166145324707,grad_norm: 0.9999991610687795, iteration: 201812
loss: 0.9897162914276123,grad_norm: 0.9999992719291645, iteration: 201813
loss: 0.9956907629966736,grad_norm: 0.9025588378295796, iteration: 201814
loss: 1.0404398441314697,grad_norm: 0.8211586990513716, iteration: 201815
loss: 0.9952671527862549,grad_norm: 0.9392884281766296, iteration: 201816
loss: 0.999533474445343,grad_norm: 0.9733898896247287, iteration: 201817
loss: 1.029334545135498,grad_norm: 0.9597253306248044, iteration: 201818
loss: 1.030226707458496,grad_norm: 0.9236341006102264, iteration: 201819
loss: 1.0443875789642334,grad_norm: 0.9999993938510332, iteration: 201820
loss: 1.005063533782959,grad_norm: 0.8994149820578037, iteration: 201821
loss: 0.9893397092819214,grad_norm: 0.8838097042519126, iteration: 201822
loss: 1.038338541984558,grad_norm: 0.9999989641951276, iteration: 201823
loss: 0.9977005124092102,grad_norm: 0.9458410952532376, iteration: 201824
loss: 1.0218082666397095,grad_norm: 0.8754310354545396, iteration: 201825
loss: 0.975501298904419,grad_norm: 0.8505154674719583, iteration: 201826
loss: 0.9921872615814209,grad_norm: 0.9949080795992568, iteration: 201827
loss: 1.0199559926986694,grad_norm: 0.8711907607986085, iteration: 201828
loss: 1.0529483556747437,grad_norm: 0.8872494838830568, iteration: 201829
loss: 0.9992054104804993,grad_norm: 0.999999546184581, iteration: 201830
loss: 0.9884730577468872,grad_norm: 0.9740547629311241, iteration: 201831
loss: 1.0119785070419312,grad_norm: 0.8824924048422511, iteration: 201832
loss: 0.9763177037239075,grad_norm: 0.8948062495520974, iteration: 201833
loss: 1.0079652070999146,grad_norm: 0.8616118165867097, iteration: 201834
loss: 0.9834107160568237,grad_norm: 0.8400595621153446, iteration: 201835
loss: 0.9702098965644836,grad_norm: 0.9323804550880294, iteration: 201836
loss: 0.9938898086547852,grad_norm: 0.8625457835422964, iteration: 201837
loss: 1.0864452123641968,grad_norm: 0.9999993182867731, iteration: 201838
loss: 1.009472370147705,grad_norm: 0.9999991788044731, iteration: 201839
loss: 1.0282139778137207,grad_norm: 0.8396924591009729, iteration: 201840
loss: 1.0067203044891357,grad_norm: 0.9197099157472617, iteration: 201841
loss: 1.018980622291565,grad_norm: 0.9195533553123667, iteration: 201842
loss: 1.0114446878433228,grad_norm: 0.9999999345403806, iteration: 201843
loss: 1.0224360227584839,grad_norm: 0.9999993083890815, iteration: 201844
loss: 1.0222749710083008,grad_norm: 0.9999999114243724, iteration: 201845
loss: 1.0133466720581055,grad_norm: 0.9999997186309468, iteration: 201846
loss: 0.9950570464134216,grad_norm: 0.8400272796879071, iteration: 201847
loss: 1.0136877298355103,grad_norm: 0.999999155762606, iteration: 201848
loss: 1.0120539665222168,grad_norm: 0.9999990999710541, iteration: 201849
loss: 1.0088917016983032,grad_norm: 0.9999990295797306, iteration: 201850
loss: 1.0149697065353394,grad_norm: 0.9999992795797848, iteration: 201851
loss: 0.9829264879226685,grad_norm: 0.8924849890979275, iteration: 201852
loss: 1.0110570192337036,grad_norm: 0.9392168753167001, iteration: 201853
loss: 1.0433661937713623,grad_norm: 0.9999998679453865, iteration: 201854
loss: 0.9990676641464233,grad_norm: 0.9999992025290659, iteration: 201855
loss: 0.9740846753120422,grad_norm: 0.9952978377541553, iteration: 201856
loss: 1.007362961769104,grad_norm: 0.9745845805035647, iteration: 201857
loss: 1.0426702499389648,grad_norm: 0.8428022323758954, iteration: 201858
loss: 0.9954768419265747,grad_norm: 0.915506385673714, iteration: 201859
loss: 1.0303654670715332,grad_norm: 0.9613328276802067, iteration: 201860
loss: 0.9942739009857178,grad_norm: 0.8218786357902559, iteration: 201861
loss: 1.034619927406311,grad_norm: 0.9999992691067168, iteration: 201862
loss: 1.0136927366256714,grad_norm: 0.9637098618689395, iteration: 201863
loss: 0.9961503744125366,grad_norm: 0.9296854401612973, iteration: 201864
loss: 0.9958839416503906,grad_norm: 0.8256857496975905, iteration: 201865
loss: 0.9834688305854797,grad_norm: 0.9266736112646069, iteration: 201866
loss: 1.0137871503829956,grad_norm: 0.9999991708240913, iteration: 201867
loss: 1.0079281330108643,grad_norm: 0.9999997351429492, iteration: 201868
loss: 1.0304813385009766,grad_norm: 0.9339862714653318, iteration: 201869
loss: 1.00983726978302,grad_norm: 0.9999990964299411, iteration: 201870
loss: 0.9713174104690552,grad_norm: 0.9890769403171281, iteration: 201871
loss: 1.1090182065963745,grad_norm: 0.9999991152490072, iteration: 201872
loss: 0.979745090007782,grad_norm: 0.9999998705702356, iteration: 201873
loss: 1.1182045936584473,grad_norm: 0.9222129264007743, iteration: 201874
loss: 1.0209254026412964,grad_norm: 0.9999992513929632, iteration: 201875
loss: 1.0325819253921509,grad_norm: 0.9999989951421091, iteration: 201876
loss: 1.0109429359436035,grad_norm: 0.8578017685532962, iteration: 201877
loss: 0.9946200847625732,grad_norm: 0.8591161013213691, iteration: 201878
loss: 1.0241445302963257,grad_norm: 0.836281485491536, iteration: 201879
loss: 1.0263183116912842,grad_norm: 0.9999990405705572, iteration: 201880
loss: 1.0256544351577759,grad_norm: 0.9999990725699464, iteration: 201881
loss: 0.9800012111663818,grad_norm: 0.7840305959978174, iteration: 201882
loss: 1.0163315534591675,grad_norm: 0.9999995298618344, iteration: 201883
loss: 0.9661082625389099,grad_norm: 0.886908484639888, iteration: 201884
loss: 1.1044894456863403,grad_norm: 0.9999992804204558, iteration: 201885
loss: 1.0141736268997192,grad_norm: 0.8713584458258783, iteration: 201886
loss: 1.0473582744598389,grad_norm: 0.9657830920491246, iteration: 201887
loss: 1.0051418542861938,grad_norm: 0.9489184772882631, iteration: 201888
loss: 0.9886090755462646,grad_norm: 0.778551071311752, iteration: 201889
loss: 1.041137456893921,grad_norm: 0.9999991907488283, iteration: 201890
loss: 1.0057979822158813,grad_norm: 0.9272055352438252, iteration: 201891
loss: 1.0130045413970947,grad_norm: 0.8247993660065644, iteration: 201892
loss: 0.9949151277542114,grad_norm: 0.9596275772860796, iteration: 201893
loss: 1.025680422782898,grad_norm: 0.9414089412292608, iteration: 201894
loss: 0.9662019610404968,grad_norm: 0.999999021542253, iteration: 201895
loss: 1.0472451448440552,grad_norm: 0.9349721436510854, iteration: 201896
loss: 1.0324852466583252,grad_norm: 0.8977424789005441, iteration: 201897
loss: 0.9733555316925049,grad_norm: 0.9999991307598308, iteration: 201898
loss: 1.1069718599319458,grad_norm: 0.9999993597348497, iteration: 201899
loss: 1.038710594177246,grad_norm: 0.9999990803439549, iteration: 201900
loss: 1.0315850973129272,grad_norm: 0.9455140035089119, iteration: 201901
loss: 0.9868623614311218,grad_norm: 0.9335477010583986, iteration: 201902
loss: 1.0503917932510376,grad_norm: 0.9999990829137729, iteration: 201903
loss: 0.9524776339530945,grad_norm: 0.8938725374607797, iteration: 201904
loss: 0.9899811744689941,grad_norm: 0.9347117049043807, iteration: 201905
loss: 0.9595381021499634,grad_norm: 0.8457908005471231, iteration: 201906
loss: 1.00774085521698,grad_norm: 0.9213066270266584, iteration: 201907
loss: 0.9974279999732971,grad_norm: 0.9999991112942169, iteration: 201908
loss: 0.969591498374939,grad_norm: 0.9228762346585364, iteration: 201909
loss: 1.0324592590332031,grad_norm: 0.9990113755164655, iteration: 201910
loss: 1.0495069026947021,grad_norm: 0.9538422012267923, iteration: 201911
loss: 1.0270026922225952,grad_norm: 0.999999106442906, iteration: 201912
loss: 1.0240271091461182,grad_norm: 1.000000069834558, iteration: 201913
loss: 1.2812299728393555,grad_norm: 0.9999997505036552, iteration: 201914
loss: 1.0051120519638062,grad_norm: 0.9999990642924511, iteration: 201915
loss: 1.0274128913879395,grad_norm: 0.889182131037838, iteration: 201916
loss: 0.9582605957984924,grad_norm: 0.82331016247223, iteration: 201917
loss: 0.9702680706977844,grad_norm: 0.9999990410659807, iteration: 201918
loss: 1.0451992750167847,grad_norm: 0.9999994157276657, iteration: 201919
loss: 1.0365262031555176,grad_norm: 0.9999991364635649, iteration: 201920
loss: 0.9506188631057739,grad_norm: 0.9315656314425542, iteration: 201921
loss: 0.9770165085792542,grad_norm: 0.89289750083895, iteration: 201922
loss: 1.0033012628555298,grad_norm: 0.8164563172981756, iteration: 201923
loss: 1.000207781791687,grad_norm: 0.9999990366802104, iteration: 201924
loss: 1.0010161399841309,grad_norm: 0.9999991235359644, iteration: 201925
loss: 1.016843557357788,grad_norm: 0.825968299014925, iteration: 201926
loss: 1.0203793048858643,grad_norm: 0.9999991134352297, iteration: 201927
loss: 1.0130059719085693,grad_norm: 0.9999991453465051, iteration: 201928
loss: 1.0640771389007568,grad_norm: 0.9999989543087149, iteration: 201929
loss: 0.9861351847648621,grad_norm: 0.8374697568044399, iteration: 201930
loss: 0.99101722240448,grad_norm: 0.9163858210685132, iteration: 201931
loss: 0.9756546020507812,grad_norm: 0.9891687089677552, iteration: 201932
loss: 0.9829190969467163,grad_norm: 0.9999992318200728, iteration: 201933
loss: 0.998011589050293,grad_norm: 0.9127529572586506, iteration: 201934
loss: 0.9768638014793396,grad_norm: 0.9999997174655172, iteration: 201935
loss: 1.0016690492630005,grad_norm: 0.9664009095579355, iteration: 201936
loss: 0.9862013459205627,grad_norm: 0.9999992828384292, iteration: 201937
loss: 1.1136113405227661,grad_norm: 0.9812948862244892, iteration: 201938
loss: 0.9852145910263062,grad_norm: 0.9717954575367042, iteration: 201939
loss: 1.0078725814819336,grad_norm: 0.99999911998979, iteration: 201940
loss: 0.9863904118537903,grad_norm: 0.9796178389421843, iteration: 201941
loss: 1.037237524986267,grad_norm: 0.9337239832069517, iteration: 201942
loss: 1.0120313167572021,grad_norm: 0.9999991741423425, iteration: 201943
loss: 1.007359266281128,grad_norm: 0.9535448889697372, iteration: 201944
loss: 0.9551394581794739,grad_norm: 0.8328625566800909, iteration: 201945
loss: 0.9519587159156799,grad_norm: 0.7221388479082798, iteration: 201946
loss: 0.9816416501998901,grad_norm: 0.999999559625742, iteration: 201947
loss: 1.0173085927963257,grad_norm: 0.9903325972899051, iteration: 201948
loss: 1.056322455406189,grad_norm: 0.9748516209490721, iteration: 201949
loss: 1.0310580730438232,grad_norm: 0.8617709697411379, iteration: 201950
loss: 1.2910093069076538,grad_norm: 0.9999999122443555, iteration: 201951
loss: 0.9759838581085205,grad_norm: 0.9735997406907231, iteration: 201952
loss: 0.9516145586967468,grad_norm: 0.9999990258120235, iteration: 201953
loss: 1.0426974296569824,grad_norm: 0.8612467476964257, iteration: 201954
loss: 0.9899848103523254,grad_norm: 0.9727040023554115, iteration: 201955
loss: 1.0071446895599365,grad_norm: 0.9289854800742523, iteration: 201956
loss: 1.0256036520004272,grad_norm: 0.8857565714327049, iteration: 201957
loss: 1.0034571886062622,grad_norm: 0.9685855032191207, iteration: 201958
loss: 1.0394824743270874,grad_norm: 0.8955753376949162, iteration: 201959
loss: 0.9997654557228088,grad_norm: 0.8762253359066344, iteration: 201960
loss: 1.0228056907653809,grad_norm: 0.9999992415159231, iteration: 201961
loss: 1.0413566827774048,grad_norm: 0.9681415883596503, iteration: 201962
loss: 0.9786661267280579,grad_norm: 0.91898852209075, iteration: 201963
loss: 1.0182409286499023,grad_norm: 0.9999990465911018, iteration: 201964
loss: 0.9621998071670532,grad_norm: 0.9371533795982078, iteration: 201965
loss: 0.9646909832954407,grad_norm: 0.9241271257332117, iteration: 201966
loss: 0.9820799231529236,grad_norm: 0.8308075413962288, iteration: 201967
loss: 0.9942158460617065,grad_norm: 0.8875813377374149, iteration: 201968
loss: 1.0223565101623535,grad_norm: 0.945233021701976, iteration: 201969
loss: 1.0418012142181396,grad_norm: 0.9999990392725331, iteration: 201970
loss: 0.9908945560455322,grad_norm: 0.9710387978595996, iteration: 201971
loss: 0.986966073513031,grad_norm: 0.9999989635964043, iteration: 201972
loss: 1.0307872295379639,grad_norm: 0.9407373055294929, iteration: 201973
loss: 0.9975301623344421,grad_norm: 0.8808315009667721, iteration: 201974
loss: 1.0003925561904907,grad_norm: 0.9999993946258942, iteration: 201975
loss: 1.0086054801940918,grad_norm: 0.9250971410003317, iteration: 201976
loss: 1.0127159357070923,grad_norm: 0.9296023895359344, iteration: 201977
loss: 1.0039374828338623,grad_norm: 0.8631097266114429, iteration: 201978
loss: 0.9639076590538025,grad_norm: 0.9999991908689159, iteration: 201979
loss: 1.0178169012069702,grad_norm: 0.9999995657200229, iteration: 201980
loss: 1.0252244472503662,grad_norm: 0.9999990593114717, iteration: 201981
loss: 1.013013243675232,grad_norm: 0.9999991635555181, iteration: 201982
loss: 0.9957050681114197,grad_norm: 0.9310120599895858, iteration: 201983
loss: 0.9896024465560913,grad_norm: 0.9083774976232422, iteration: 201984
loss: 0.9952138066291809,grad_norm: 0.9999991710159069, iteration: 201985
loss: 0.988085150718689,grad_norm: 0.9195333310080396, iteration: 201986
loss: 1.0241713523864746,grad_norm: 0.8779156663325844, iteration: 201987
loss: 1.0360548496246338,grad_norm: 0.9999990745385641, iteration: 201988
loss: 1.0135912895202637,grad_norm: 0.8764214192645972, iteration: 201989
loss: 1.0097250938415527,grad_norm: 0.9999992571520276, iteration: 201990
loss: 1.021127700805664,grad_norm: 0.8394105079414407, iteration: 201991
loss: 1.0260885953903198,grad_norm: 0.8721248618139265, iteration: 201992
loss: 1.0362235307693481,grad_norm: 0.8176011613110774, iteration: 201993
loss: 1.0336229801177979,grad_norm: 0.9999989138819582, iteration: 201994
loss: 0.9799630641937256,grad_norm: 0.9999991298539038, iteration: 201995
loss: 0.9988943338394165,grad_norm: 0.9999991302811916, iteration: 201996
loss: 1.0005414485931396,grad_norm: 0.9999992007733238, iteration: 201997
loss: 1.0189329385757446,grad_norm: 0.7835794777567252, iteration: 201998
loss: 1.0624178647994995,grad_norm: 0.8286833626615083, iteration: 201999
loss: 1.0271345376968384,grad_norm: 0.9999995918281304, iteration: 202000
loss: 1.0013704299926758,grad_norm: 0.9999991219933937, iteration: 202001
loss: 0.9900645613670349,grad_norm: 0.9407513954424929, iteration: 202002
loss: 0.9453341364860535,grad_norm: 0.9999989644853992, iteration: 202003
loss: 0.9996291995048523,grad_norm: 0.9914711317410135, iteration: 202004
loss: 1.0286377668380737,grad_norm: 0.9999991609186031, iteration: 202005
loss: 0.9835968017578125,grad_norm: 0.9999989707925703, iteration: 202006
loss: 1.0063459873199463,grad_norm: 0.8795548797570891, iteration: 202007
loss: 1.0424269437789917,grad_norm: 0.9999991103442016, iteration: 202008
loss: 0.9816248416900635,grad_norm: 0.9999992353255506, iteration: 202009
loss: 0.9975456595420837,grad_norm: 0.9357532993207202, iteration: 202010
loss: 1.0072470903396606,grad_norm: 0.9999990147093489, iteration: 202011
loss: 0.9435228109359741,grad_norm: 0.9999990988268807, iteration: 202012
loss: 1.0521047115325928,grad_norm: 0.9999995485401338, iteration: 202013
loss: 0.9804429411888123,grad_norm: 0.8446007325293325, iteration: 202014
loss: 0.995302140712738,grad_norm: 0.9999990152073908, iteration: 202015
loss: 0.9641098380088806,grad_norm: 0.9841664195804807, iteration: 202016
loss: 1.0143882036209106,grad_norm: 0.9608636168198487, iteration: 202017
loss: 0.9894424080848694,grad_norm: 0.7832257832188605, iteration: 202018
loss: 0.9869351387023926,grad_norm: 0.8052256912518541, iteration: 202019
loss: 0.9828745722770691,grad_norm: 0.9228520282055026, iteration: 202020
loss: 0.9921367764472961,grad_norm: 0.9999991707821352, iteration: 202021
loss: 0.9484497904777527,grad_norm: 0.9999996043422009, iteration: 202022
loss: 1.0421509742736816,grad_norm: 0.9999996286028096, iteration: 202023
loss: 1.0136288404464722,grad_norm: 0.90821798661701, iteration: 202024
loss: 1.0057895183563232,grad_norm: 0.9999993301422178, iteration: 202025
loss: 1.0176656246185303,grad_norm: 0.9999990223243128, iteration: 202026
loss: 1.0606266260147095,grad_norm: 0.9999991564815671, iteration: 202027
loss: 0.9830261468887329,grad_norm: 0.9999989668302837, iteration: 202028
loss: 1.0694092512130737,grad_norm: 0.9718793297544747, iteration: 202029
loss: 0.9885392785072327,grad_norm: 0.8836259004653285, iteration: 202030
loss: 1.0312118530273438,grad_norm: 0.9977692859035152, iteration: 202031
loss: 1.0203526020050049,grad_norm: 0.9999995054881566, iteration: 202032
loss: 1.0111455917358398,grad_norm: 0.9999991386066784, iteration: 202033
loss: 0.9783817529678345,grad_norm: 0.9177840443752602, iteration: 202034
loss: 1.0186882019042969,grad_norm: 0.9951997939615522, iteration: 202035
loss: 1.010435700416565,grad_norm: 0.9999997402713987, iteration: 202036
loss: 0.9894458055496216,grad_norm: 0.8833391774765652, iteration: 202037
loss: 1.0023157596588135,grad_norm: 0.9999990256989363, iteration: 202038
loss: 1.0024652481079102,grad_norm: 0.9999991345803441, iteration: 202039
loss: 0.9686233997344971,grad_norm: 0.9873682334967947, iteration: 202040
loss: 1.0134491920471191,grad_norm: 0.921485702425571, iteration: 202041
loss: 1.0265262126922607,grad_norm: 0.9780354926517137, iteration: 202042
loss: 1.0126557350158691,grad_norm: 0.80775521635373, iteration: 202043
loss: 1.0296697616577148,grad_norm: 0.9999995212106916, iteration: 202044
loss: 1.041648030281067,grad_norm: 0.915038023395763, iteration: 202045
loss: 1.0009067058563232,grad_norm: 0.9471639341968312, iteration: 202046
loss: 0.9916591644287109,grad_norm: 0.9419813800342554, iteration: 202047
loss: 0.977109968662262,grad_norm: 0.9720434470959344, iteration: 202048
loss: 1.0433014631271362,grad_norm: 0.99999964992994, iteration: 202049
loss: 1.0003732442855835,grad_norm: 0.9062890106588507, iteration: 202050
loss: 0.9641352295875549,grad_norm: 0.9999992424566275, iteration: 202051
loss: 1.0106606483459473,grad_norm: 0.999999139397301, iteration: 202052
loss: 0.9926522970199585,grad_norm: 0.9479639350341288, iteration: 202053
loss: 0.9968428611755371,grad_norm: 0.745328087902622, iteration: 202054
loss: 0.9852524399757385,grad_norm: 0.8338225062257992, iteration: 202055
loss: 0.9970307946205139,grad_norm: 0.9999990007880557, iteration: 202056
loss: 1.0297287702560425,grad_norm: 0.9999990495204742, iteration: 202057
loss: 1.0378954410552979,grad_norm: 0.999999081124577, iteration: 202058
loss: 1.0034207105636597,grad_norm: 0.9414190168268095, iteration: 202059
loss: 0.9762597680091858,grad_norm: 0.9954753934943401, iteration: 202060
loss: 0.984610915184021,grad_norm: 0.9999994549980004, iteration: 202061
loss: 1.0164803266525269,grad_norm: 0.9999990895569858, iteration: 202062
loss: 1.0120692253112793,grad_norm: 0.9999990270357133, iteration: 202063
loss: 1.0260884761810303,grad_norm: 0.9597586405314781, iteration: 202064
loss: 1.029495358467102,grad_norm: 0.8734958654183819, iteration: 202065
loss: 0.9875882863998413,grad_norm: 0.9053319017517879, iteration: 202066
loss: 1.0047614574432373,grad_norm: 0.8596981264983771, iteration: 202067
loss: 1.005954623222351,grad_norm: 0.9999991553986359, iteration: 202068
loss: 1.0476858615875244,grad_norm: 0.9999989717475156, iteration: 202069
loss: 1.000216007232666,grad_norm: 0.8920669881064375, iteration: 202070
loss: 0.9839330315589905,grad_norm: 0.9999992029968074, iteration: 202071
loss: 0.9962692260742188,grad_norm: 0.9999990953737216, iteration: 202072
loss: 1.0084898471832275,grad_norm: 0.9144267824137834, iteration: 202073
loss: 0.9862217307090759,grad_norm: 0.8826597228098181, iteration: 202074
loss: 1.0162005424499512,grad_norm: 0.976371614330096, iteration: 202075
loss: 0.9997510313987732,grad_norm: 0.9847044557614306, iteration: 202076
loss: 0.9873650074005127,grad_norm: 0.9542904283277173, iteration: 202077
loss: 0.9779133796691895,grad_norm: 0.9202759501099974, iteration: 202078
loss: 1.033729076385498,grad_norm: 0.9999990239155975, iteration: 202079
loss: 1.0126577615737915,grad_norm: 0.8429836817700597, iteration: 202080
loss: 1.0189517736434937,grad_norm: 0.876183158449501, iteration: 202081
loss: 0.9859148263931274,grad_norm: 0.9563287956157378, iteration: 202082
loss: 0.9605449438095093,grad_norm: 0.8520075386817675, iteration: 202083
loss: 0.9778757691383362,grad_norm: 0.8853607543894887, iteration: 202084
loss: 1.0179635286331177,grad_norm: 0.8933641896404636, iteration: 202085
loss: 1.0136216878890991,grad_norm: 0.9999990082438471, iteration: 202086
loss: 0.9856495261192322,grad_norm: 0.8390777270763736, iteration: 202087
loss: 1.003234624862671,grad_norm: 0.7900234447125959, iteration: 202088
loss: 0.9934279322624207,grad_norm: 0.9901234008885791, iteration: 202089
loss: 0.9889165759086609,grad_norm: 0.6921439635023088, iteration: 202090
loss: 1.0632071495056152,grad_norm: 0.9230313079049561, iteration: 202091
loss: 1.0098440647125244,grad_norm: 0.8569650107272392, iteration: 202092
loss: 1.0382648706436157,grad_norm: 0.999999208429826, iteration: 202093
loss: 0.9906945824623108,grad_norm: 0.8406426647565338, iteration: 202094
loss: 0.9948040246963501,grad_norm: 0.8716690839069929, iteration: 202095
loss: 0.9785823225975037,grad_norm: 0.9614884156361608, iteration: 202096
loss: 1.0375462770462036,grad_norm: 0.9999990163019244, iteration: 202097
loss: 0.9616562128067017,grad_norm: 0.943064707045433, iteration: 202098
loss: 1.0026049613952637,grad_norm: 0.9999990798353072, iteration: 202099
loss: 1.0224393606185913,grad_norm: 0.9984668499710587, iteration: 202100
loss: 0.994127631187439,grad_norm: 0.9917501090920536, iteration: 202101
loss: 0.9573695659637451,grad_norm: 0.989330419066263, iteration: 202102
loss: 1.0282748937606812,grad_norm: 0.9292148581925248, iteration: 202103
loss: 1.024476408958435,grad_norm: 0.948486295520923, iteration: 202104
loss: 1.053530216217041,grad_norm: 0.9999995221894485, iteration: 202105
loss: 0.9610934257507324,grad_norm: 0.9999992088169795, iteration: 202106
loss: 1.0240724086761475,grad_norm: 0.9999990423621189, iteration: 202107
loss: 1.0035978555679321,grad_norm: 0.9269942776888208, iteration: 202108
loss: 0.9910137057304382,grad_norm: 0.9692269084984012, iteration: 202109
loss: 1.024747371673584,grad_norm: 0.9214650783066018, iteration: 202110
loss: 0.9809388518333435,grad_norm: 0.9027970394264687, iteration: 202111
loss: 0.9946878552436829,grad_norm: 0.932914467663857, iteration: 202112
loss: 0.9928812384605408,grad_norm: 0.9999990567608555, iteration: 202113
loss: 1.0138968229293823,grad_norm: 0.9989956469576449, iteration: 202114
loss: 0.966829776763916,grad_norm: 0.8954246001898835, iteration: 202115
loss: 1.063106894493103,grad_norm: 0.9999992503175342, iteration: 202116
loss: 0.9863339066505432,grad_norm: 0.9151956440986079, iteration: 202117
loss: 0.9971531629562378,grad_norm: 0.9202448121768458, iteration: 202118
loss: 0.9684012532234192,grad_norm: 0.7039357355634667, iteration: 202119
loss: 0.9947452545166016,grad_norm: 0.9999991449450628, iteration: 202120
loss: 1.008839726448059,grad_norm: 0.9999991924371924, iteration: 202121
loss: 0.9624002575874329,grad_norm: 0.8714247360156022, iteration: 202122
loss: 0.9560674428939819,grad_norm: 0.9999991092851697, iteration: 202123
loss: 1.020969033241272,grad_norm: 0.9999991072312084, iteration: 202124
loss: 1.0098586082458496,grad_norm: 0.89392164392673, iteration: 202125
loss: 1.0180968046188354,grad_norm: 0.9522627201024867, iteration: 202126
loss: 1.02690589427948,grad_norm: 0.8343797862195564, iteration: 202127
loss: 1.0033173561096191,grad_norm: 0.8937723764556076, iteration: 202128
loss: 0.9897786378860474,grad_norm: 0.999999015071608, iteration: 202129
loss: 1.0048103332519531,grad_norm: 0.8891699571783452, iteration: 202130
loss: 0.9952602982521057,grad_norm: 0.8541241108791098, iteration: 202131
loss: 1.0076884031295776,grad_norm: 0.9856235460953742, iteration: 202132
loss: 0.989128053188324,grad_norm: 0.9130268185801855, iteration: 202133
loss: 0.9865341186523438,grad_norm: 0.95096873866476, iteration: 202134
loss: 0.9711621403694153,grad_norm: 0.9054721870363283, iteration: 202135
loss: 1.1064515113830566,grad_norm: 0.9999996278281944, iteration: 202136
loss: 0.9891989827156067,grad_norm: 0.8564650848055315, iteration: 202137
loss: 0.9915816187858582,grad_norm: 0.8486531690799562, iteration: 202138
loss: 1.0141769647598267,grad_norm: 0.8040427980234663, iteration: 202139
loss: 1.0316145420074463,grad_norm: 0.999999188082554, iteration: 202140
loss: 1.0375287532806396,grad_norm: 0.9999991545867086, iteration: 202141
loss: 0.9958972930908203,grad_norm: 0.9463135738979909, iteration: 202142
loss: 0.9930350184440613,grad_norm: 0.9999991464268541, iteration: 202143
loss: 1.0344384908676147,grad_norm: 0.8630553352115092, iteration: 202144
loss: 0.9819008111953735,grad_norm: 0.883540671921708, iteration: 202145
loss: 0.991858959197998,grad_norm: 0.9999990244618167, iteration: 202146
loss: 0.9922428131103516,grad_norm: 0.9052054314276379, iteration: 202147
loss: 1.0408272743225098,grad_norm: 0.9999991753321797, iteration: 202148
loss: 1.005458950996399,grad_norm: 0.8959665881990347, iteration: 202149
loss: 1.0302104949951172,grad_norm: 0.9983030397475915, iteration: 202150
loss: 1.028883457183838,grad_norm: 0.9999991370875958, iteration: 202151
loss: 1.019551396369934,grad_norm: 0.8608449447495725, iteration: 202152
loss: 1.0452229976654053,grad_norm: 0.9492945140760287, iteration: 202153
loss: 0.9845391511917114,grad_norm: 0.9999991717725097, iteration: 202154
loss: 0.9966514110565186,grad_norm: 0.9999990960249243, iteration: 202155
loss: 0.9898037314414978,grad_norm: 0.6979070694520242, iteration: 202156
loss: 0.9738566279411316,grad_norm: 0.9295113230102336, iteration: 202157
loss: 1.0005073547363281,grad_norm: 0.9102510729371328, iteration: 202158
loss: 0.9726635813713074,grad_norm: 0.8894769638615528, iteration: 202159
loss: 1.0126243829727173,grad_norm: 0.9999991347373046, iteration: 202160
loss: 0.9999403953552246,grad_norm: 0.9999991523652494, iteration: 202161
loss: 0.9918904304504395,grad_norm: 0.9551551673650303, iteration: 202162
loss: 1.0713034868240356,grad_norm: 0.8771132323844989, iteration: 202163
loss: 1.0003412961959839,grad_norm: 0.9312865115494008, iteration: 202164
loss: 1.0268269777297974,grad_norm: 0.9999998344348058, iteration: 202165
loss: 0.9982990622520447,grad_norm: 0.9434743995803003, iteration: 202166
loss: 0.9754277467727661,grad_norm: 0.8903150651911008, iteration: 202167
loss: 0.9955558180809021,grad_norm: 0.9474629112628785, iteration: 202168
loss: 0.9982113242149353,grad_norm: 0.9590053667478382, iteration: 202169
loss: 0.9904463291168213,grad_norm: 0.9999991832574104, iteration: 202170
loss: 0.9776697754859924,grad_norm: 0.9466483869054964, iteration: 202171
loss: 1.0107717514038086,grad_norm: 0.8640822307870615, iteration: 202172
loss: 1.0255986452102661,grad_norm: 0.9838136417044656, iteration: 202173
loss: 0.9969046711921692,grad_norm: 0.882994683862819, iteration: 202174
loss: 0.9897940754890442,grad_norm: 0.9048790021177471, iteration: 202175
loss: 0.9923814535140991,grad_norm: 0.8608218010898284, iteration: 202176
loss: 0.9808182120323181,grad_norm: 0.999999022741614, iteration: 202177
loss: 0.9813344478607178,grad_norm: 0.9999989560664243, iteration: 202178
loss: 1.0203109979629517,grad_norm: 0.8717945182604404, iteration: 202179
loss: 1.0329846143722534,grad_norm: 0.9999990322364514, iteration: 202180
loss: 1.0086146593093872,grad_norm: 0.8129069662000077, iteration: 202181
loss: 1.01710844039917,grad_norm: 0.8356507943901853, iteration: 202182
loss: 1.002669095993042,grad_norm: 0.987593124859645, iteration: 202183
loss: 0.9886912703514099,grad_norm: 0.8760546899993005, iteration: 202184
loss: 0.977424144744873,grad_norm: 0.8615471876818931, iteration: 202185
loss: 1.029396891593933,grad_norm: 0.9239449115511569, iteration: 202186
loss: 0.9833687543869019,grad_norm: 0.9386815075603056, iteration: 202187
loss: 0.984947144985199,grad_norm: 0.7195200073399753, iteration: 202188
loss: 1.0466848611831665,grad_norm: 0.9999991779202717, iteration: 202189
loss: 1.0016542673110962,grad_norm: 0.9999991023346899, iteration: 202190
loss: 0.9983286261558533,grad_norm: 0.999999160371791, iteration: 202191
loss: 1.0102635622024536,grad_norm: 0.8778587340898854, iteration: 202192
loss: 0.9945327639579773,grad_norm: 0.8689579759292683, iteration: 202193
loss: 0.9869331121444702,grad_norm: 0.8604764030361522, iteration: 202194
loss: 1.0000616312026978,grad_norm: 0.7868633884031613, iteration: 202195
loss: 1.0156922340393066,grad_norm: 0.9999989611042335, iteration: 202196
loss: 1.0268501043319702,grad_norm: 0.8730030602882869, iteration: 202197
loss: 0.9935740828514099,grad_norm: 0.9943439428353126, iteration: 202198
loss: 0.9856945276260376,grad_norm: 0.8898104993574188, iteration: 202199
loss: 0.9837518930435181,grad_norm: 0.8885887461778568, iteration: 202200
loss: 1.0038071870803833,grad_norm: 0.9023306167688402, iteration: 202201
loss: 0.9522318840026855,grad_norm: 0.9328242864366566, iteration: 202202
loss: 1.000991702079773,grad_norm: 0.8997599718907202, iteration: 202203
loss: 0.9748013019561768,grad_norm: 0.9999991218073712, iteration: 202204
loss: 0.9667782187461853,grad_norm: 0.7880728511017578, iteration: 202205
loss: 1.0218877792358398,grad_norm: 0.9564601484996644, iteration: 202206
loss: 1.02616548538208,grad_norm: 0.9999991629380343, iteration: 202207
loss: 0.9936944246292114,grad_norm: 0.7785989335902728, iteration: 202208
loss: 1.02388334274292,grad_norm: 0.9135783916104752, iteration: 202209
loss: 0.9853906035423279,grad_norm: 0.8486956062038709, iteration: 202210
loss: 0.9965183138847351,grad_norm: 0.9999990822877413, iteration: 202211
loss: 0.9961802363395691,grad_norm: 0.8524899833483228, iteration: 202212
loss: 1.0109318494796753,grad_norm: 0.9727186046744302, iteration: 202213
loss: 1.01875901222229,grad_norm: 0.9649624074080544, iteration: 202214
loss: 0.9987820982933044,grad_norm: 0.9936644847948879, iteration: 202215
loss: 1.0188761949539185,grad_norm: 0.9790919335183509, iteration: 202216
loss: 0.9840616583824158,grad_norm: 0.9262867852624057, iteration: 202217
loss: 0.9919037222862244,grad_norm: 0.9755523722586694, iteration: 202218
loss: 0.9871297478675842,grad_norm: 0.9999991632086509, iteration: 202219
loss: 0.9861116409301758,grad_norm: 0.9999991497543911, iteration: 202220
loss: 0.9978587031364441,grad_norm: 0.9999990794194082, iteration: 202221
loss: 0.9952890276908875,grad_norm: 0.9999990600108808, iteration: 202222
loss: 1.051533579826355,grad_norm: 0.9999991541396226, iteration: 202223
loss: 0.9854824542999268,grad_norm: 0.9302438434156017, iteration: 202224
loss: 0.9560063481330872,grad_norm: 0.9269737613836727, iteration: 202225
loss: 0.9696178436279297,grad_norm: 0.9025291550459037, iteration: 202226
loss: 1.0191296339035034,grad_norm: 0.9249203608622653, iteration: 202227
loss: 0.9797728657722473,grad_norm: 0.9441665704054518, iteration: 202228
loss: 0.9523775577545166,grad_norm: 0.8950984374510367, iteration: 202229
loss: 1.0274704694747925,grad_norm: 0.9420543786668113, iteration: 202230
loss: 0.9728806614875793,grad_norm: 0.9486590874545928, iteration: 202231
loss: 0.9948422312736511,grad_norm: 0.9664809425035082, iteration: 202232
loss: 0.9852527976036072,grad_norm: 0.9926702132042585, iteration: 202233
loss: 0.9933043718338013,grad_norm: 0.9999992510493194, iteration: 202234
loss: 1.0118200778961182,grad_norm: 0.9195074991086577, iteration: 202235
loss: 1.0108014345169067,grad_norm: 0.9999990624318427, iteration: 202236
loss: 1.060185432434082,grad_norm: 0.9999995984841705, iteration: 202237
loss: 0.973324716091156,grad_norm: 0.9309094031408446, iteration: 202238
loss: 1.0083613395690918,grad_norm: 0.9784915917377474, iteration: 202239
loss: 0.9611143469810486,grad_norm: 0.887360012654305, iteration: 202240
loss: 0.9905564785003662,grad_norm: 0.8595318867212326, iteration: 202241
loss: 1.0010173320770264,grad_norm: 0.7464036258641387, iteration: 202242
loss: 1.0025101900100708,grad_norm: 0.9284201454374609, iteration: 202243
loss: 1.010555624961853,grad_norm: 0.7841086765929105, iteration: 202244
loss: 1.017765760421753,grad_norm: 0.9999999403037837, iteration: 202245
loss: 0.9927235841751099,grad_norm: 0.9999990819649138, iteration: 202246
loss: 1.026049256324768,grad_norm: 0.8697814448153436, iteration: 202247
loss: 1.0157009363174438,grad_norm: 0.9806226861514223, iteration: 202248
loss: 0.9933416843414307,grad_norm: 0.8378857951175799, iteration: 202249
loss: 1.0317697525024414,grad_norm: 0.8904776275608443, iteration: 202250
loss: 0.99630206823349,grad_norm: 0.9999989740850895, iteration: 202251
loss: 0.9713879227638245,grad_norm: 0.9258296062419222, iteration: 202252
loss: 0.9810364842414856,grad_norm: 0.9708851208265503, iteration: 202253
loss: 1.004130244255066,grad_norm: 0.8160646704553756, iteration: 202254
loss: 0.9666866660118103,grad_norm: 0.8165214485616923, iteration: 202255
loss: 1.0213645696640015,grad_norm: 0.9832072690814216, iteration: 202256
loss: 1.0309561491012573,grad_norm: 0.8272481645396517, iteration: 202257
loss: 0.9969989657402039,grad_norm: 0.9123511276694954, iteration: 202258
loss: 0.9898272156715393,grad_norm: 0.9999993858298628, iteration: 202259
loss: 0.9751093983650208,grad_norm: 0.9602941604660297, iteration: 202260
loss: 1.0085430145263672,grad_norm: 0.932272477284248, iteration: 202261
loss: 1.0061924457550049,grad_norm: 0.9602850146006574, iteration: 202262
loss: 1.0235624313354492,grad_norm: 0.7733817061703173, iteration: 202263
loss: 1.01230788230896,grad_norm: 0.9999991297194705, iteration: 202264
loss: 0.9805732369422913,grad_norm: 0.918094418489969, iteration: 202265
loss: 1.0029600858688354,grad_norm: 0.9999990160480289, iteration: 202266
loss: 0.9921318292617798,grad_norm: 0.8390612431456067, iteration: 202267
loss: 1.01142156124115,grad_norm: 0.999999053857557, iteration: 202268
loss: 0.9960190653800964,grad_norm: 0.9999991829640698, iteration: 202269
loss: 1.021836280822754,grad_norm: 0.9141091611736233, iteration: 202270
loss: 1.0258293151855469,grad_norm: 0.999999075918487, iteration: 202271
loss: 0.9936728477478027,grad_norm: 0.9999992029628825, iteration: 202272
loss: 1.0310111045837402,grad_norm: 0.8727902319722652, iteration: 202273
loss: 1.0134224891662598,grad_norm: 0.99999926511682, iteration: 202274
loss: 1.0140892267227173,grad_norm: 0.8868363939456996, iteration: 202275
loss: 1.0080004930496216,grad_norm: 0.999999245819334, iteration: 202276
loss: 1.0178602933883667,grad_norm: 0.9999992234692722, iteration: 202277
loss: 0.9619337320327759,grad_norm: 0.9999991833766231, iteration: 202278
loss: 0.984799325466156,grad_norm: 0.8250952876782442, iteration: 202279
loss: 1.0173739194869995,grad_norm: 0.9488467244374412, iteration: 202280
loss: 0.9854975938796997,grad_norm: 0.8319161286447231, iteration: 202281
loss: 0.9963779449462891,grad_norm: 0.8163367164758598, iteration: 202282
loss: 0.9819009900093079,grad_norm: 0.8194393023442921, iteration: 202283
loss: 1.012746810913086,grad_norm: 0.892520478958068, iteration: 202284
loss: 0.9764530658721924,grad_norm: 0.9136109177455811, iteration: 202285
loss: 1.0051536560058594,grad_norm: 0.9999990573532612, iteration: 202286
loss: 0.9835048317909241,grad_norm: 0.9069563243620988, iteration: 202287
loss: 0.9922024607658386,grad_norm: 0.7750312245522717, iteration: 202288
loss: 1.0411314964294434,grad_norm: 0.9999990690961225, iteration: 202289
loss: 0.9533718228340149,grad_norm: 0.9912323139247577, iteration: 202290
loss: 0.9753220677375793,grad_norm: 0.9999990092664501, iteration: 202291
loss: 0.992509126663208,grad_norm: 0.9999990737443627, iteration: 202292
loss: 0.9974033832550049,grad_norm: 0.964058985130707, iteration: 202293
loss: 1.005964994430542,grad_norm: 0.8290495565286763, iteration: 202294
loss: 1.0075360536575317,grad_norm: 0.9416545767038818, iteration: 202295
loss: 1.0369685888290405,grad_norm: 0.9906681615134723, iteration: 202296
loss: 1.0165328979492188,grad_norm: 0.8085660394192535, iteration: 202297
loss: 0.9809856414794922,grad_norm: 0.9999990995541264, iteration: 202298
loss: 1.00812566280365,grad_norm: 0.9999990912324456, iteration: 202299
loss: 0.998082160949707,grad_norm: 0.9999991758385172, iteration: 202300
loss: 0.9728151559829712,grad_norm: 0.7258903501212441, iteration: 202301
loss: 0.9562109112739563,grad_norm: 0.8473599895325676, iteration: 202302
loss: 1.016127347946167,grad_norm: 0.9969340021055277, iteration: 202303
loss: 0.9752228856086731,grad_norm: 0.8636407561814392, iteration: 202304
loss: 0.9954033493995667,grad_norm: 0.9212870448027908, iteration: 202305
loss: 0.9586300253868103,grad_norm: 0.9200662131768953, iteration: 202306
loss: 0.9744127988815308,grad_norm: 0.9999989994557621, iteration: 202307
loss: 0.9757143259048462,grad_norm: 0.9371695475907165, iteration: 202308
loss: 1.01603102684021,grad_norm: 0.9827404484813067, iteration: 202309
loss: 1.0531330108642578,grad_norm: 0.9999991196366612, iteration: 202310
loss: 0.9855987429618835,grad_norm: 0.9620786383106112, iteration: 202311
loss: 1.0444892644882202,grad_norm: 0.999999920397568, iteration: 202312
loss: 0.9997424483299255,grad_norm: 0.999998984768592, iteration: 202313
loss: 0.9711108207702637,grad_norm: 0.999999116777235, iteration: 202314
loss: 0.9561477899551392,grad_norm: 0.8975633325156397, iteration: 202315
loss: 1.0291517972946167,grad_norm: 0.999999074176466, iteration: 202316
loss: 1.0232559442520142,grad_norm: 0.9999996143365723, iteration: 202317
loss: 0.99176424741745,grad_norm: 0.9399164672723982, iteration: 202318
loss: 1.0187673568725586,grad_norm: 1.0000000182234743, iteration: 202319
loss: 0.9892387986183167,grad_norm: 0.9999989720786534, iteration: 202320
loss: 0.9503701329231262,grad_norm: 0.9440528988134634, iteration: 202321
loss: 1.0035605430603027,grad_norm: 0.9888198100350339, iteration: 202322
loss: 1.0756890773773193,grad_norm: 0.9999990856726264, iteration: 202323
loss: 0.9719809889793396,grad_norm: 0.8036363597197664, iteration: 202324
loss: 1.016526222229004,grad_norm: 0.9999990912560349, iteration: 202325
loss: 1.0239708423614502,grad_norm: 0.9373384795297894, iteration: 202326
loss: 1.0018171072006226,grad_norm: 0.9999989064492544, iteration: 202327
loss: 1.0362259149551392,grad_norm: 0.999998968863405, iteration: 202328
loss: 1.0278719663619995,grad_norm: 0.999999123809259, iteration: 202329
loss: 1.0197092294692993,grad_norm: 0.9999997864088216, iteration: 202330
loss: 1.0291763544082642,grad_norm: 0.8809673161946717, iteration: 202331
loss: 1.0229620933532715,grad_norm: 0.9590040029419448, iteration: 202332
loss: 1.0137745141983032,grad_norm: 0.9999991938479499, iteration: 202333
loss: 0.9988254308700562,grad_norm: 0.9674011428770664, iteration: 202334
loss: 1.0204086303710938,grad_norm: 0.9999994177226669, iteration: 202335
loss: 1.0188202857971191,grad_norm: 0.99999932499113, iteration: 202336
loss: 1.0120929479599,grad_norm: 0.8924371717075017, iteration: 202337
loss: 0.9834893941879272,grad_norm: 0.8405867219840775, iteration: 202338
loss: 1.0041497945785522,grad_norm: 0.9999994460043654, iteration: 202339
loss: 1.0631773471832275,grad_norm: 0.8466948148700577, iteration: 202340
loss: 1.0306373834609985,grad_norm: 1.000000012682099, iteration: 202341
loss: 1.035327672958374,grad_norm: 0.9999998417792262, iteration: 202342
loss: 0.986075222492218,grad_norm: 0.9999992213172908, iteration: 202343
loss: 1.002865195274353,grad_norm: 0.9473966295552133, iteration: 202344
loss: 1.0189528465270996,grad_norm: 0.9515993542745687, iteration: 202345
loss: 1.0269793272018433,grad_norm: 0.9213983956656268, iteration: 202346
loss: 0.9981958866119385,grad_norm: 0.9999990135011378, iteration: 202347
loss: 0.9959582686424255,grad_norm: 0.9999991344949448, iteration: 202348
loss: 1.0075777769088745,grad_norm: 0.8937966859000229, iteration: 202349
loss: 0.9696807265281677,grad_norm: 0.863091796252542, iteration: 202350
loss: 0.9813515543937683,grad_norm: 0.999999135785144, iteration: 202351
loss: 1.030197024345398,grad_norm: 0.9536315698076128, iteration: 202352
loss: 0.9651350378990173,grad_norm: 0.999999120043902, iteration: 202353
loss: 1.0126510858535767,grad_norm: 0.8438212416590396, iteration: 202354
loss: 1.0129024982452393,grad_norm: 0.915280704736453, iteration: 202355
loss: 1.0186944007873535,grad_norm: 0.8192284143046431, iteration: 202356
loss: 0.9652167558670044,grad_norm: 0.9344747983537627, iteration: 202357
loss: 0.9937514066696167,grad_norm: 0.9196438978529752, iteration: 202358
loss: 0.9878075122833252,grad_norm: 0.9421635091500477, iteration: 202359
loss: 1.0017606019973755,grad_norm: 0.9950105497616047, iteration: 202360
loss: 1.012040376663208,grad_norm: 0.9193496300921078, iteration: 202361
loss: 0.9756110906600952,grad_norm: 0.9999990793274693, iteration: 202362
loss: 1.0618104934692383,grad_norm: 0.9999992816646515, iteration: 202363
loss: 1.0212301015853882,grad_norm: 0.8901386784504166, iteration: 202364
loss: 1.0204780101776123,grad_norm: 0.9183325107406777, iteration: 202365
loss: 0.9816998839378357,grad_norm: 0.8784384199786701, iteration: 202366
loss: 1.0168123245239258,grad_norm: 0.9999990091521379, iteration: 202367
loss: 1.0462366342544556,grad_norm: 0.8448336778614621, iteration: 202368
loss: 0.9963254332542419,grad_norm: 0.9565971051108662, iteration: 202369
loss: 0.9718024730682373,grad_norm: 0.9999991273920558, iteration: 202370
loss: 1.0190914869308472,grad_norm: 0.9999991161331259, iteration: 202371
loss: 0.987885594367981,grad_norm: 0.9100622664291002, iteration: 202372
loss: 0.9949886202812195,grad_norm: 0.9999993496356524, iteration: 202373
loss: 1.010358214378357,grad_norm: 0.9999994802135367, iteration: 202374
loss: 0.9683961868286133,grad_norm: 0.999999089905403, iteration: 202375
loss: 1.0116528272628784,grad_norm: 0.9999991573503092, iteration: 202376
loss: 1.0254428386688232,grad_norm: 0.9018619219023819, iteration: 202377
loss: 1.0368927717208862,grad_norm: 0.999999902489522, iteration: 202378
loss: 1.003686547279358,grad_norm: 0.9099147591205103, iteration: 202379
loss: 0.9783161878585815,grad_norm: 0.8777614869271194, iteration: 202380
loss: 0.9784717559814453,grad_norm: 0.9999990047738023, iteration: 202381
loss: 1.0212273597717285,grad_norm: 0.9999990922330683, iteration: 202382
loss: 1.0045549869537354,grad_norm: 0.8779471901095411, iteration: 202383
loss: 1.017037272453308,grad_norm: 0.9999989834458736, iteration: 202384
loss: 0.9870197772979736,grad_norm: 0.9311672705207373, iteration: 202385
loss: 1.0300185680389404,grad_norm: 0.9999992537418345, iteration: 202386
loss: 1.0216648578643799,grad_norm: 0.9038870706177905, iteration: 202387
loss: 1.0307339429855347,grad_norm: 0.8979064680415649, iteration: 202388
loss: 1.0005556344985962,grad_norm: 0.8965498987033435, iteration: 202389
loss: 1.0034786462783813,grad_norm: 0.9999991418978796, iteration: 202390
loss: 0.9812580943107605,grad_norm: 0.9979253445992884, iteration: 202391
loss: 0.9854602217674255,grad_norm: 0.8899177544722255, iteration: 202392
loss: 1.0022060871124268,grad_norm: 0.9042315657487974, iteration: 202393
loss: 0.9781104326248169,grad_norm: 0.9943500407056861, iteration: 202394
loss: 1.0020134449005127,grad_norm: 0.9999993309450255, iteration: 202395
loss: 1.0115288496017456,grad_norm: 0.9491856685439491, iteration: 202396
loss: 1.0223865509033203,grad_norm: 0.9999990714665595, iteration: 202397
loss: 0.9760039448738098,grad_norm: 0.8750111196011662, iteration: 202398
loss: 1.0292232036590576,grad_norm: 0.854564435821374, iteration: 202399
loss: 1.0575920343399048,grad_norm: 0.9999992699240046, iteration: 202400
loss: 1.0027234554290771,grad_norm: 0.8538446006405743, iteration: 202401
loss: 1.0154149532318115,grad_norm: 0.8640280334565785, iteration: 202402
loss: 0.9863691926002502,grad_norm: 0.9848017146551946, iteration: 202403
loss: 1.0190566778182983,grad_norm: 0.9442335524405023, iteration: 202404
loss: 1.0297629833221436,grad_norm: 0.9370103359005517, iteration: 202405
loss: 1.008440613746643,grad_norm: 0.9999993392962457, iteration: 202406
loss: 0.9903144240379333,grad_norm: 0.9999989251080151, iteration: 202407
loss: 0.9912444353103638,grad_norm: 0.8728832323873543, iteration: 202408
loss: 1.016748309135437,grad_norm: 0.953021030689031, iteration: 202409
loss: 1.0336058139801025,grad_norm: 0.8492192082545813, iteration: 202410
loss: 1.0049484968185425,grad_norm: 0.8242790555462464, iteration: 202411
loss: 0.9986399412155151,grad_norm: 0.9829419237925106, iteration: 202412
loss: 0.999552845954895,grad_norm: 0.9838674281942588, iteration: 202413
loss: 1.0300278663635254,grad_norm: 0.7968579539959554, iteration: 202414
loss: 1.0115561485290527,grad_norm: 0.999999064637181, iteration: 202415
loss: 1.0083822011947632,grad_norm: 0.9999993634150381, iteration: 202416
loss: 0.9746705889701843,grad_norm: 0.9999991649268487, iteration: 202417
loss: 0.951032817363739,grad_norm: 0.999999100870438, iteration: 202418
loss: 1.0151728391647339,grad_norm: 0.8601949973615757, iteration: 202419
loss: 0.9949163794517517,grad_norm: 0.9999992927927631, iteration: 202420
loss: 1.0330870151519775,grad_norm: 0.9999990863396508, iteration: 202421
loss: 1.0454668998718262,grad_norm: 0.9999993195628066, iteration: 202422
loss: 1.000899314880371,grad_norm: 0.9552457164697468, iteration: 202423
loss: 1.0098884105682373,grad_norm: 0.8616265297070196, iteration: 202424
loss: 1.0396099090576172,grad_norm: 0.9515796088140474, iteration: 202425
loss: 0.9955821633338928,grad_norm: 0.9999991001755871, iteration: 202426
loss: 1.0031237602233887,grad_norm: 0.9999990681512089, iteration: 202427
loss: 1.0012718439102173,grad_norm: 0.7924118759007, iteration: 202428
loss: 0.9794986248016357,grad_norm: 0.999999152317868, iteration: 202429
loss: 1.0009039640426636,grad_norm: 0.9999991246171622, iteration: 202430
loss: 0.9898806214332581,grad_norm: 0.9023121822965212, iteration: 202431
loss: 1.0454667806625366,grad_norm: 0.8311924511191238, iteration: 202432
loss: 1.0160820484161377,grad_norm: 0.9999991770038613, iteration: 202433
loss: 1.023094892501831,grad_norm: 0.9999995034333139, iteration: 202434
loss: 1.0008517503738403,grad_norm: 0.9999992051052312, iteration: 202435
loss: 0.9896030426025391,grad_norm: 0.8938035313217697, iteration: 202436
loss: 0.9943461418151855,grad_norm: 0.9104009350266706, iteration: 202437
loss: 0.9847501516342163,grad_norm: 0.9999990841176049, iteration: 202438
loss: 1.0138921737670898,grad_norm: 0.8244979510110866, iteration: 202439
loss: 0.9528764486312866,grad_norm: 0.999999032674, iteration: 202440
loss: 0.9719343781471252,grad_norm: 0.9460550463453499, iteration: 202441
loss: 1.0007950067520142,grad_norm: 0.8961493323168247, iteration: 202442
loss: 0.9798049926757812,grad_norm: 0.9857988017056303, iteration: 202443
loss: 0.9966586232185364,grad_norm: 0.9015870064179644, iteration: 202444
loss: 0.9782382845878601,grad_norm: 0.8366123906582359, iteration: 202445
loss: 1.000002145767212,grad_norm: 0.9775350691806258, iteration: 202446
loss: 0.9762688875198364,grad_norm: 0.9994973206597497, iteration: 202447
loss: 0.9869382977485657,grad_norm: 0.7934321222916497, iteration: 202448
loss: 1.0043545961380005,grad_norm: 0.9999990956661593, iteration: 202449
loss: 1.0393463373184204,grad_norm: 0.8294918637956351, iteration: 202450
loss: 1.0619431734085083,grad_norm: 0.9999995630858953, iteration: 202451
loss: 0.9504954218864441,grad_norm: 0.9573599475573945, iteration: 202452
loss: 0.9859848022460938,grad_norm: 0.8599240811842568, iteration: 202453
loss: 1.0288996696472168,grad_norm: 0.9963907966011076, iteration: 202454
loss: 1.0546371936798096,grad_norm: 0.9999996747270519, iteration: 202455
loss: 0.9931509494781494,grad_norm: 0.9999991087467979, iteration: 202456
loss: 1.0082308053970337,grad_norm: 0.9999991478586164, iteration: 202457
loss: 0.9977438449859619,grad_norm: 0.992516890347021, iteration: 202458
loss: 1.0144978761672974,grad_norm: 0.8885127367959776, iteration: 202459
loss: 1.0040457248687744,grad_norm: 0.8657747174581781, iteration: 202460
loss: 0.9996416568756104,grad_norm: 0.8953531765655426, iteration: 202461
loss: 1.0424038171768188,grad_norm: 0.9999992387423897, iteration: 202462
loss: 0.9773736596107483,grad_norm: 0.9999990635438561, iteration: 202463
loss: 1.0071525573730469,grad_norm: 0.9999989901856658, iteration: 202464
loss: 1.0246303081512451,grad_norm: 0.9999992180664435, iteration: 202465
loss: 0.9667567610740662,grad_norm: 0.9999992296558021, iteration: 202466
loss: 1.0400643348693848,grad_norm: 0.999999195719123, iteration: 202467
loss: 0.9725832939147949,grad_norm: 0.9999990335880272, iteration: 202468
loss: 0.9915845990180969,grad_norm: 0.954457084577604, iteration: 202469
loss: 0.9956255555152893,grad_norm: 0.9999990305879751, iteration: 202470
loss: 0.9847878217697144,grad_norm: 0.9999990762809814, iteration: 202471
loss: 1.0120075941085815,grad_norm: 0.9449705965424996, iteration: 202472
loss: 1.031156063079834,grad_norm: 0.8836788197403832, iteration: 202473
loss: 0.9915547370910645,grad_norm: 0.9999995275092719, iteration: 202474
loss: 0.9994338750839233,grad_norm: 0.7948477885871541, iteration: 202475
loss: 1.0106884241104126,grad_norm: 0.9515208435212521, iteration: 202476
loss: 1.017398715019226,grad_norm: 0.9999991043385634, iteration: 202477
loss: 1.0274306535720825,grad_norm: 0.9999991450415276, iteration: 202478
loss: 0.9774397611618042,grad_norm: 0.9999990962361219, iteration: 202479
loss: 1.0279492139816284,grad_norm: 0.9999997281645635, iteration: 202480
loss: 1.0057525634765625,grad_norm: 0.9999992486393275, iteration: 202481
loss: 0.9939625859260559,grad_norm: 0.8557212498744973, iteration: 202482
loss: 1.0147054195404053,grad_norm: 0.9999992930681715, iteration: 202483
loss: 0.9926585555076599,grad_norm: 0.9044839155109442, iteration: 202484
loss: 1.0931071043014526,grad_norm: 0.999999201347082, iteration: 202485
loss: 0.9707674384117126,grad_norm: 0.8326636815493258, iteration: 202486
loss: 1.0150408744812012,grad_norm: 0.9418638019608132, iteration: 202487
loss: 1.0129820108413696,grad_norm: 0.9999991545143678, iteration: 202488
loss: 1.0326781272888184,grad_norm: 0.9999990920004322, iteration: 202489
loss: 1.012757658958435,grad_norm: 0.9433598670379395, iteration: 202490
loss: 0.9897789359092712,grad_norm: 0.8804882748300799, iteration: 202491
loss: 1.068696141242981,grad_norm: 0.9999990126215624, iteration: 202492
loss: 1.018679141998291,grad_norm: 1.0000000154649944, iteration: 202493
loss: 1.0199685096740723,grad_norm: 0.9999995470480341, iteration: 202494
loss: 0.982459306716919,grad_norm: 0.9969852853998333, iteration: 202495
loss: 0.998633623123169,grad_norm: 0.8658508732204961, iteration: 202496
loss: 1.0042870044708252,grad_norm: 0.9504462408272775, iteration: 202497
loss: 1.0218639373779297,grad_norm: 0.9999992315211426, iteration: 202498
loss: 1.0213937759399414,grad_norm: 0.999999322760445, iteration: 202499
loss: 0.9912558794021606,grad_norm: 0.9999990228065875, iteration: 202500
loss: 1.010050654411316,grad_norm: 0.999999788882519, iteration: 202501
loss: 1.0216628313064575,grad_norm: 0.9687662087378177, iteration: 202502
loss: 1.001691222190857,grad_norm: 0.9999990940126429, iteration: 202503
loss: 1.0158604383468628,grad_norm: 0.8975723452205179, iteration: 202504
loss: 1.0167204141616821,grad_norm: 0.8329640245617425, iteration: 202505
loss: 0.988081157207489,grad_norm: 0.9258516364705811, iteration: 202506
loss: 1.0203200578689575,grad_norm: 0.9999991619180211, iteration: 202507
loss: 0.9864377975463867,grad_norm: 0.8060413009375659, iteration: 202508
loss: 0.96860671043396,grad_norm: 0.9999991065877025, iteration: 202509
loss: 1.0092463493347168,grad_norm: 0.9708712343548614, iteration: 202510
loss: 1.058477759361267,grad_norm: 0.9999991524941115, iteration: 202511
loss: 1.0090205669403076,grad_norm: 0.9727814259829305, iteration: 202512
loss: 0.9620487689971924,grad_norm: 0.9018224886056694, iteration: 202513
loss: 0.9915786981582642,grad_norm: 0.9173471755807835, iteration: 202514
loss: 0.9964609146118164,grad_norm: 0.8295719714002994, iteration: 202515
loss: 1.0301401615142822,grad_norm: 0.9320568014501557, iteration: 202516
loss: 0.9952328205108643,grad_norm: 0.999999168546412, iteration: 202517
loss: 1.0303890705108643,grad_norm: 0.9999991689741419, iteration: 202518
loss: 1.0159952640533447,grad_norm: 0.9999992109248971, iteration: 202519
loss: 1.0021765232086182,grad_norm: 0.9999989798288376, iteration: 202520
loss: 1.0234423875808716,grad_norm: 0.8923865817966082, iteration: 202521
loss: 0.980732798576355,grad_norm: 0.999999049574192, iteration: 202522
loss: 0.9963067770004272,grad_norm: 0.9999990921447267, iteration: 202523
loss: 0.9958011507987976,grad_norm: 0.9033909717786952, iteration: 202524
loss: 0.9856393337249756,grad_norm: 0.858304503959895, iteration: 202525
loss: 0.9955219030380249,grad_norm: 0.9550823523597911, iteration: 202526
loss: 0.9999191164970398,grad_norm: 0.9999991939112063, iteration: 202527
loss: 1.0002626180648804,grad_norm: 0.9523306639980005, iteration: 202528
loss: 0.9994168877601624,grad_norm: 0.8903066460822538, iteration: 202529
loss: 1.0175321102142334,grad_norm: 0.9937303338897066, iteration: 202530
loss: 1.0175130367279053,grad_norm: 0.999999249039015, iteration: 202531
loss: 1.0134917497634888,grad_norm: 0.9301874419401627, iteration: 202532
loss: 0.9994492530822754,grad_norm: 0.85951642632365, iteration: 202533
loss: 1.0027648210525513,grad_norm: 0.999999001899488, iteration: 202534
loss: 1.0035685300827026,grad_norm: 0.9563185560824993, iteration: 202535
loss: 1.0337257385253906,grad_norm: 0.9999992753478116, iteration: 202536
loss: 0.9978889226913452,grad_norm: 0.853072073475369, iteration: 202537
loss: 1.032939076423645,grad_norm: 0.926000171376914, iteration: 202538
loss: 0.9896519780158997,grad_norm: 0.9458637669542075, iteration: 202539
loss: 0.986591100692749,grad_norm: 0.999999256759421, iteration: 202540
loss: 1.0291967391967773,grad_norm: 0.9999989601174205, iteration: 202541
loss: 0.9857461452484131,grad_norm: 0.903785564618268, iteration: 202542
loss: 0.9919058680534363,grad_norm: 0.956516032728917, iteration: 202543
loss: 1.0086264610290527,grad_norm: 0.9874356205262629, iteration: 202544
loss: 1.008191704750061,grad_norm: 0.772634265198698, iteration: 202545
loss: 1.0159664154052734,grad_norm: 0.9290436575445379, iteration: 202546
loss: 1.04481041431427,grad_norm: 0.999999623039103, iteration: 202547
loss: 1.005785584449768,grad_norm: 0.9999993252377444, iteration: 202548
loss: 0.9683804512023926,grad_norm: 0.8754515775199244, iteration: 202549
loss: 0.9967378377914429,grad_norm: 0.9151393889221751, iteration: 202550
loss: 1.0191086530685425,grad_norm: 0.9999992416496775, iteration: 202551
loss: 0.9836543202400208,grad_norm: 0.9999992260095678, iteration: 202552
loss: 1.010677456855774,grad_norm: 0.9431109812258704, iteration: 202553
loss: 0.9588568210601807,grad_norm: 0.8362299631816996, iteration: 202554
loss: 1.019934058189392,grad_norm: 0.9380151244759513, iteration: 202555
loss: 1.002058982849121,grad_norm: 0.9371191586038071, iteration: 202556
loss: 1.1916532516479492,grad_norm: 0.999999731810017, iteration: 202557
loss: 0.9924973249435425,grad_norm: 0.8060076768206558, iteration: 202558
loss: 0.9976485371589661,grad_norm: 0.9765369689274341, iteration: 202559
loss: 1.0328787565231323,grad_norm: 0.7965439588951333, iteration: 202560
loss: 0.9800763726234436,grad_norm: 0.8858370192490823, iteration: 202561
loss: 0.9934207201004028,grad_norm: 0.999999879624581, iteration: 202562
loss: 0.9773560762405396,grad_norm: 0.8344584592709055, iteration: 202563
loss: 0.9975230097770691,grad_norm: 0.9999991897812484, iteration: 202564
loss: 0.9919285774230957,grad_norm: 0.9999993124342234, iteration: 202565
loss: 1.0058739185333252,grad_norm: 0.8609822646450522, iteration: 202566
loss: 1.0120962858200073,grad_norm: 0.8081077262120981, iteration: 202567
loss: 1.003217101097107,grad_norm: 0.9999992583376209, iteration: 202568
loss: 1.0177860260009766,grad_norm: 0.9595570468526341, iteration: 202569
loss: 0.9752098321914673,grad_norm: 0.9392060171251811, iteration: 202570
loss: 0.9933087825775146,grad_norm: 0.9206083373555689, iteration: 202571
loss: 1.0581274032592773,grad_norm: 1.0000000074239044, iteration: 202572
loss: 0.9755138754844666,grad_norm: 0.9999991683987857, iteration: 202573
loss: 0.9635566473007202,grad_norm: 0.9657654801770699, iteration: 202574
loss: 1.0324243307113647,grad_norm: 0.9983805681231703, iteration: 202575
loss: 1.0252820253372192,grad_norm: 0.8277724714214462, iteration: 202576
loss: 1.0197452306747437,grad_norm: 0.9999991777011384, iteration: 202577
loss: 1.0051554441452026,grad_norm: 0.9741217589279285, iteration: 202578
loss: 0.9933168888092041,grad_norm: 0.9999996498206646, iteration: 202579
loss: 0.9850945472717285,grad_norm: 0.9403370994592904, iteration: 202580
loss: 1.0166693925857544,grad_norm: 0.9234480093502939, iteration: 202581
loss: 1.024398922920227,grad_norm: 0.9840001423528089, iteration: 202582
loss: 0.9953077435493469,grad_norm: 0.9794580154276477, iteration: 202583
loss: 1.0068702697753906,grad_norm: 0.9553746382116546, iteration: 202584
loss: 1.0839149951934814,grad_norm: 0.9999999997072773, iteration: 202585
loss: 1.0136229991912842,grad_norm: 0.8916136861802132, iteration: 202586
loss: 0.9905580282211304,grad_norm: 0.9999991879816097, iteration: 202587
loss: 0.9969833493232727,grad_norm: 0.9427955329809404, iteration: 202588
loss: 0.995032548904419,grad_norm: 0.9999991552058434, iteration: 202589
loss: 0.9960671067237854,grad_norm: 0.8646610209004703, iteration: 202590
loss: 0.9680135250091553,grad_norm: 0.9187540015569137, iteration: 202591
loss: 1.001451849937439,grad_norm: 0.9999996893274583, iteration: 202592
loss: 0.9836506247520447,grad_norm: 0.999999070103638, iteration: 202593
loss: 1.0244396924972534,grad_norm: 0.9321335232206128, iteration: 202594
loss: 1.0143566131591797,grad_norm: 0.8824550056149713, iteration: 202595
loss: 1.0053942203521729,grad_norm: 0.9999990477453046, iteration: 202596
loss: 1.0053479671478271,grad_norm: 0.9025763035612003, iteration: 202597
loss: 0.9978996515274048,grad_norm: 0.9999989541370018, iteration: 202598
loss: 0.9857794642448425,grad_norm: 0.990719364294172, iteration: 202599
loss: 0.9876699447631836,grad_norm: 0.944015282472531, iteration: 202600
loss: 1.0238533020019531,grad_norm: 0.9999992257410366, iteration: 202601
loss: 1.0084220170974731,grad_norm: 0.9999991223472798, iteration: 202602
loss: 1.0087599754333496,grad_norm: 0.9999990880632521, iteration: 202603
loss: 0.9938579797744751,grad_norm: 0.9999990959812299, iteration: 202604
loss: 1.0156956911087036,grad_norm: 0.8347590016992542, iteration: 202605
loss: 1.000859022140503,grad_norm: 0.9938138703418027, iteration: 202606
loss: 1.0421028137207031,grad_norm: 0.9999992565808816, iteration: 202607
loss: 0.9982274174690247,grad_norm: 0.9999998164587123, iteration: 202608
loss: 0.99126797914505,grad_norm: 0.9999997753766893, iteration: 202609
loss: 0.9796102643013,grad_norm: 0.8054422242624278, iteration: 202610
loss: 1.001391887664795,grad_norm: 0.9620357806225867, iteration: 202611
loss: 0.9802369475364685,grad_norm: 0.9222581753596973, iteration: 202612
loss: 0.9907997846603394,grad_norm: 0.9999989938202006, iteration: 202613
loss: 0.9740778207778931,grad_norm: 0.9999997124267794, iteration: 202614
loss: 0.9818614721298218,grad_norm: 0.9765009174316953, iteration: 202615
loss: 0.9988933801651001,grad_norm: 0.9999989580826886, iteration: 202616
loss: 1.0150502920150757,grad_norm: 0.9999991534277141, iteration: 202617
loss: 1.0307353734970093,grad_norm: 0.914966983228212, iteration: 202618
loss: 1.0194900035858154,grad_norm: 0.7615073840132677, iteration: 202619
loss: 0.9717807769775391,grad_norm: 0.9999997639292532, iteration: 202620
loss: 1.0223630666732788,grad_norm: 0.9999998133169108, iteration: 202621
loss: 1.0092397928237915,grad_norm: 0.9946104552086851, iteration: 202622
loss: 1.0287433862686157,grad_norm: 0.9999990030158601, iteration: 202623
loss: 1.0128883123397827,grad_norm: 0.9980604728879349, iteration: 202624
loss: 0.9898640513420105,grad_norm: 0.9880485892778259, iteration: 202625
loss: 0.9773383736610413,grad_norm: 0.999999151849188, iteration: 202626
loss: 1.0126323699951172,grad_norm: 0.8493651496458675, iteration: 202627
loss: 0.9985103607177734,grad_norm: 0.958640054193716, iteration: 202628
loss: 0.9939748644828796,grad_norm: 0.8856534529911354, iteration: 202629
loss: 0.9983921051025391,grad_norm: 0.8403106962349028, iteration: 202630
loss: 1.0191173553466797,grad_norm: 0.7871482506960903, iteration: 202631
loss: 1.0268549919128418,grad_norm: 0.9999991696197326, iteration: 202632
loss: 0.9924470782279968,grad_norm: 0.9999991989783266, iteration: 202633
loss: 1.0057966709136963,grad_norm: 0.8987541949987237, iteration: 202634
loss: 0.9876710772514343,grad_norm: 0.999999154053757, iteration: 202635
loss: 1.1109544038772583,grad_norm: 0.9999997902349699, iteration: 202636
loss: 1.0420806407928467,grad_norm: 0.9999995763375469, iteration: 202637
loss: 0.9897295832633972,grad_norm: 0.9535575366729009, iteration: 202638
loss: 0.970180332660675,grad_norm: 0.9999992096502504, iteration: 202639
loss: 1.0505430698394775,grad_norm: 0.9999993144081277, iteration: 202640
loss: 0.9788482785224915,grad_norm: 0.8624390197576636, iteration: 202641
loss: 0.9948764443397522,grad_norm: 0.9999992376209718, iteration: 202642
loss: 0.9911304712295532,grad_norm: 0.8778826417319369, iteration: 202643
loss: 1.010292410850525,grad_norm: 0.9999992528073608, iteration: 202644
loss: 1.0472157001495361,grad_norm: 0.9999990836013529, iteration: 202645
loss: 1.009799838066101,grad_norm: 0.9752248832655093, iteration: 202646
loss: 0.9717256426811218,grad_norm: 0.9443248555469661, iteration: 202647
loss: 1.0348533391952515,grad_norm: 0.9242565679019676, iteration: 202648
loss: 0.972461998462677,grad_norm: 0.9999991952744919, iteration: 202649
loss: 0.9944065809249878,grad_norm: 0.9344496216168182, iteration: 202650
loss: 0.9886692762374878,grad_norm: 0.9616987413862482, iteration: 202651
loss: 1.0418951511383057,grad_norm: 0.999999712147775, iteration: 202652
loss: 1.0209455490112305,grad_norm: 0.9999990370069722, iteration: 202653
loss: 1.0088716745376587,grad_norm: 0.8909523177887421, iteration: 202654
loss: 1.0398439168930054,grad_norm: 0.885086355975609, iteration: 202655
loss: 0.9760603904724121,grad_norm: 0.9999991790406735, iteration: 202656
loss: 0.9930110573768616,grad_norm: 0.9168667257609046, iteration: 202657
loss: 1.023516058921814,grad_norm: 0.999999211894628, iteration: 202658
loss: 1.0027501583099365,grad_norm: 0.97826328095409, iteration: 202659
loss: 1.007439374923706,grad_norm: 0.9999991122663955, iteration: 202660
loss: 0.97834712266922,grad_norm: 0.8802540446654423, iteration: 202661
loss: 1.0291235446929932,grad_norm: 0.99999924856455, iteration: 202662
loss: 1.0011717081069946,grad_norm: 0.9999989715899736, iteration: 202663
loss: 1.0468581914901733,grad_norm: 0.9999992777085392, iteration: 202664
loss: 0.9870222210884094,grad_norm: 0.9999996713631039, iteration: 202665
loss: 0.9630235433578491,grad_norm: 0.9999996399737235, iteration: 202666
loss: 1.0217820405960083,grad_norm: 0.9243610271578432, iteration: 202667
loss: 0.9803564548492432,grad_norm: 0.967783693985647, iteration: 202668
loss: 0.9795503616333008,grad_norm: 0.9630650227646586, iteration: 202669
loss: 1.0136644840240479,grad_norm: 0.9999990297339294, iteration: 202670
loss: 1.0031108856201172,grad_norm: 0.99999991042745, iteration: 202671
loss: 0.9689928889274597,grad_norm: 0.8931980837116191, iteration: 202672
loss: 1.0031284093856812,grad_norm: 0.932131134118475, iteration: 202673
loss: 1.0021917819976807,grad_norm: 0.9064935269406188, iteration: 202674
loss: 1.0050561428070068,grad_norm: 0.9999990410142514, iteration: 202675
loss: 0.9987179040908813,grad_norm: 0.9689481025996417, iteration: 202676
loss: 0.9954358339309692,grad_norm: 0.9999992225273344, iteration: 202677
loss: 0.9703948497772217,grad_norm: 0.9999990648256507, iteration: 202678
loss: 0.9897729158401489,grad_norm: 0.8639860947173512, iteration: 202679
loss: 0.9715268015861511,grad_norm: 0.8184528559916933, iteration: 202680
loss: 0.9941214323043823,grad_norm: 0.90518801760908, iteration: 202681
loss: 0.9536623954772949,grad_norm: 0.936864491553001, iteration: 202682
loss: 0.9983611702919006,grad_norm: 0.9813588089024046, iteration: 202683
loss: 0.9901483654975891,grad_norm: 0.9999992419696959, iteration: 202684
loss: 0.9997555613517761,grad_norm: 0.8385503753603522, iteration: 202685
loss: 1.0238761901855469,grad_norm: 0.9404874433301783, iteration: 202686
loss: 1.013741374015808,grad_norm: 0.9999998833677854, iteration: 202687
loss: 0.9998607039451599,grad_norm: 0.9999999119721846, iteration: 202688
loss: 0.978368878364563,grad_norm: 0.9999998467351103, iteration: 202689
loss: 0.9915813207626343,grad_norm: 0.9999996997528645, iteration: 202690
loss: 1.0180178880691528,grad_norm: 0.8368199333028884, iteration: 202691
loss: 1.0049004554748535,grad_norm: 0.7644078437052584, iteration: 202692
loss: 1.0056897401809692,grad_norm: 0.9450369895049063, iteration: 202693
loss: 0.9883255362510681,grad_norm: 0.9999991256927647, iteration: 202694
loss: 1.0190987586975098,grad_norm: 0.9999990276790653, iteration: 202695
loss: 1.0124191045761108,grad_norm: 0.8609097008649391, iteration: 202696
loss: 0.9903737902641296,grad_norm: 0.8618900419894345, iteration: 202697
loss: 0.9736603498458862,grad_norm: 0.9999990616101505, iteration: 202698
loss: 1.0114620923995972,grad_norm: 0.9999991670219417, iteration: 202699
loss: 1.035988688468933,grad_norm: 0.8635277111291108, iteration: 202700
loss: 0.983210563659668,grad_norm: 0.9230295818270018, iteration: 202701
loss: 1.0443086624145508,grad_norm: 0.9999997928445338, iteration: 202702
loss: 0.9901946783065796,grad_norm: 0.9999992562541483, iteration: 202703
loss: 1.032815933227539,grad_norm: 0.9999991719249712, iteration: 202704
loss: 1.0615030527114868,grad_norm: 0.9999991100601452, iteration: 202705
loss: 0.9912768602371216,grad_norm: 0.9999990567928613, iteration: 202706
loss: 1.0079210996627808,grad_norm: 0.9999991620515538, iteration: 202707
loss: 1.0156553983688354,grad_norm: 0.9999995253758891, iteration: 202708
loss: 0.99148029088974,grad_norm: 0.9999989755154839, iteration: 202709
loss: 0.974132239818573,grad_norm: 0.9999991433245125, iteration: 202710
loss: 0.9875636696815491,grad_norm: 0.9999990273363458, iteration: 202711
loss: 0.9871942400932312,grad_norm: 0.8409285858338482, iteration: 202712
loss: 1.0160737037658691,grad_norm: 0.9999991197665367, iteration: 202713
loss: 1.0044397115707397,grad_norm: 0.9999996540448443, iteration: 202714
loss: 1.0125147104263306,grad_norm: 0.9044698352606405, iteration: 202715
loss: 0.990899384021759,grad_norm: 0.8313450416179659, iteration: 202716
loss: 0.9586520791053772,grad_norm: 0.8801093256994553, iteration: 202717
loss: 1.0341566801071167,grad_norm: 0.8553375943771545, iteration: 202718
loss: 0.9990460872650146,grad_norm: 0.8724463717014697, iteration: 202719
loss: 1.0178260803222656,grad_norm: 0.9455109482756413, iteration: 202720
loss: 1.00584876537323,grad_norm: 0.86694702287931, iteration: 202721
loss: 1.0290162563323975,grad_norm: 0.9999991297500189, iteration: 202722
loss: 1.0125558376312256,grad_norm: 0.999999364839354, iteration: 202723
loss: 1.001002550125122,grad_norm: 0.9999991269538314, iteration: 202724
loss: 1.0264339447021484,grad_norm: 0.9101436266428053, iteration: 202725
loss: 1.0023919343948364,grad_norm: 0.9091990389356884, iteration: 202726
loss: 1.0106788873672485,grad_norm: 0.9999990768686476, iteration: 202727
loss: 0.9786573648452759,grad_norm: 0.8500828101456204, iteration: 202728
loss: 1.0507127046585083,grad_norm: 0.9999990305091137, iteration: 202729
loss: 0.9733903408050537,grad_norm: 0.9948932426262637, iteration: 202730
loss: 1.0696789026260376,grad_norm: 0.9999993294807367, iteration: 202731
loss: 1.0223631858825684,grad_norm: 0.9999998857910805, iteration: 202732
loss: 1.0007555484771729,grad_norm: 0.9527841210113678, iteration: 202733
loss: 0.9814306497573853,grad_norm: 0.9513570634293956, iteration: 202734
loss: 0.9694355130195618,grad_norm: 0.9824932405605094, iteration: 202735
loss: 1.0964058637619019,grad_norm: 0.9999997224068302, iteration: 202736
loss: 0.9909331798553467,grad_norm: 0.9423118295042298, iteration: 202737
loss: 0.9921655058860779,grad_norm: 0.8962159036469454, iteration: 202738
loss: 1.0700558423995972,grad_norm: 0.9999990342580731, iteration: 202739
loss: 0.9934737682342529,grad_norm: 0.956570543246843, iteration: 202740
loss: 1.0087641477584839,grad_norm: 0.8380651775646158, iteration: 202741
loss: 1.0295757055282593,grad_norm: 0.9999993925473882, iteration: 202742
loss: 1.1317318677902222,grad_norm: 0.9999993044906957, iteration: 202743
loss: 1.0183488130569458,grad_norm: 0.920545153210845, iteration: 202744
loss: 1.0495718717575073,grad_norm: 0.9999990708021277, iteration: 202745
loss: 1.0153286457061768,grad_norm: 0.9999990977981087, iteration: 202746
loss: 1.0145035982131958,grad_norm: 0.8042070457077458, iteration: 202747
loss: 0.9807528853416443,grad_norm: 0.9690661339639519, iteration: 202748
loss: 1.1033153533935547,grad_norm: 0.9999992575540011, iteration: 202749
loss: 0.9975882172584534,grad_norm: 0.845848241129321, iteration: 202750
loss: 0.9651408195495605,grad_norm: 0.9808756725642598, iteration: 202751
loss: 0.997833251953125,grad_norm: 0.9999989119416893, iteration: 202752
loss: 0.9901168346405029,grad_norm: 0.9947910444099288, iteration: 202753
loss: 0.9849385023117065,grad_norm: 0.857387223521868, iteration: 202754
loss: 1.003265142440796,grad_norm: 0.9643378222191105, iteration: 202755
loss: 0.9690347909927368,grad_norm: 0.9999991839317067, iteration: 202756
loss: 0.9896112084388733,grad_norm: 0.9513887027549939, iteration: 202757
loss: 0.9704115390777588,grad_norm: 0.8843651085279147, iteration: 202758
loss: 1.0250554084777832,grad_norm: 0.8906288732772889, iteration: 202759
loss: 0.9953077435493469,grad_norm: 0.9999990655311782, iteration: 202760
loss: 1.0189186334609985,grad_norm: 0.9327090643763182, iteration: 202761
loss: 0.99274080991745,grad_norm: 0.8269567846744947, iteration: 202762
loss: 1.0084726810455322,grad_norm: 0.7949685608320356, iteration: 202763
loss: 1.0204439163208008,grad_norm: 0.9999996455570502, iteration: 202764
loss: 1.0122227668762207,grad_norm: 0.9379181476432986, iteration: 202765
loss: 0.9600069522857666,grad_norm: 0.9367163493978454, iteration: 202766
loss: 1.0188398361206055,grad_norm: 1.0000000084935199, iteration: 202767
loss: 1.0125190019607544,grad_norm: 0.9146507827803024, iteration: 202768
loss: 0.9715582132339478,grad_norm: 0.9442131655813949, iteration: 202769
loss: 1.0748251676559448,grad_norm: 0.9491616126262001, iteration: 202770
loss: 1.0027812719345093,grad_norm: 0.999999080512035, iteration: 202771
loss: 0.9742958545684814,grad_norm: 0.7723082976465111, iteration: 202772
loss: 1.0330878496170044,grad_norm: 0.9726929450867098, iteration: 202773
loss: 0.9831783175468445,grad_norm: 0.9422427733745242, iteration: 202774
loss: 1.0256085395812988,grad_norm: 0.999999047352253, iteration: 202775
loss: 0.9786219000816345,grad_norm: 0.9999993345847811, iteration: 202776
loss: 0.9590091109275818,grad_norm: 0.9453058764078008, iteration: 202777
loss: 1.0506534576416016,grad_norm: 0.9999993338852688, iteration: 202778
loss: 1.0644645690917969,grad_norm: 0.9999996365884564, iteration: 202779
loss: 1.0035409927368164,grad_norm: 0.9999991360144619, iteration: 202780
loss: 1.0221028327941895,grad_norm: 0.9999992275845323, iteration: 202781
loss: 1.0353318452835083,grad_norm: 0.8846393068954078, iteration: 202782
loss: 0.9947124123573303,grad_norm: 0.8197588693519493, iteration: 202783
loss: 0.9879052639007568,grad_norm: 0.7921818320544927, iteration: 202784
loss: 1.0113898515701294,grad_norm: 0.9999991175851782, iteration: 202785
loss: 1.012580156326294,grad_norm: 0.9999991731553288, iteration: 202786
loss: 0.9848329424858093,grad_norm: 0.9296086525085309, iteration: 202787
loss: 1.002126693725586,grad_norm: 0.9040182979683568, iteration: 202788
loss: 0.9893133640289307,grad_norm: 0.913649559857145, iteration: 202789
loss: 1.0371145009994507,grad_norm: 0.9999990850326788, iteration: 202790
loss: 1.016353964805603,grad_norm: 0.8994982690335435, iteration: 202791
loss: 0.980368971824646,grad_norm: 0.8787289899384934, iteration: 202792
loss: 0.9819477796554565,grad_norm: 0.9999990183769514, iteration: 202793
loss: 1.1054133176803589,grad_norm: 0.9386484947958634, iteration: 202794
loss: 0.9860612750053406,grad_norm: 0.9406473192796344, iteration: 202795
loss: 1.012790560722351,grad_norm: 0.9998022296865581, iteration: 202796
loss: 1.0124908685684204,grad_norm: 0.9999991420208907, iteration: 202797
loss: 0.9589556455612183,grad_norm: 0.988920235477782, iteration: 202798
loss: 0.9974935054779053,grad_norm: 0.9999991180919776, iteration: 202799
loss: 1.0052423477172852,grad_norm: 0.8433420091715935, iteration: 202800
loss: 1.0030442476272583,grad_norm: 0.9112522109653792, iteration: 202801
loss: 0.9727973341941833,grad_norm: 0.9999993361479436, iteration: 202802
loss: 1.0279054641723633,grad_norm: 0.9791874051474755, iteration: 202803
loss: 1.0141830444335938,grad_norm: 0.9999991169951329, iteration: 202804
loss: 0.9858320951461792,grad_norm: 0.8310129671988459, iteration: 202805
loss: 1.0009480714797974,grad_norm: 0.9999989432924108, iteration: 202806
loss: 0.9785709381103516,grad_norm: 0.8466439477381764, iteration: 202807
loss: 1.0531777143478394,grad_norm: 0.9999990782330831, iteration: 202808
loss: 0.9858148694038391,grad_norm: 0.9307565724082105, iteration: 202809
loss: 0.9847183227539062,grad_norm: 0.9999990055130364, iteration: 202810
loss: 1.0104159116744995,grad_norm: 0.9479375510371593, iteration: 202811
loss: 0.9890576004981995,grad_norm: 0.87354518007876, iteration: 202812
loss: 0.9957584142684937,grad_norm: 0.999999051045535, iteration: 202813
loss: 1.0384854078292847,grad_norm: 0.9999991196122995, iteration: 202814
loss: 1.0244628190994263,grad_norm: 0.9999991079901945, iteration: 202815
loss: 1.023419737815857,grad_norm: 0.935899900098747, iteration: 202816
loss: 1.0136111974716187,grad_norm: 0.9999991171170487, iteration: 202817
loss: 0.9954557418823242,grad_norm: 0.8734911184204788, iteration: 202818
loss: 0.9929094314575195,grad_norm: 0.9999995466316987, iteration: 202819
loss: 1.018242597579956,grad_norm: 0.8945596184005212, iteration: 202820
loss: 0.9969465136528015,grad_norm: 0.9127799965493749, iteration: 202821
loss: 0.99399733543396,grad_norm: 0.9999990482298035, iteration: 202822
loss: 0.9885461330413818,grad_norm: 0.9999992638758174, iteration: 202823
loss: 1.035225749015808,grad_norm: 0.9062873412853361, iteration: 202824
loss: 1.012241244316101,grad_norm: 0.9999993072309772, iteration: 202825
loss: 0.9851094484329224,grad_norm: 0.957990078855829, iteration: 202826
loss: 0.9965043663978577,grad_norm: 0.9485463036584769, iteration: 202827
loss: 1.0225516557693481,grad_norm: 0.8232010105570453, iteration: 202828
loss: 1.0060389041900635,grad_norm: 0.9999992114489036, iteration: 202829
loss: 1.0236952304840088,grad_norm: 0.9999994463637104, iteration: 202830
loss: 0.9858341217041016,grad_norm: 0.8422152701679171, iteration: 202831
loss: 1.0075809955596924,grad_norm: 0.9499959144928974, iteration: 202832
loss: 0.9836780428886414,grad_norm: 0.8257880729192575, iteration: 202833
loss: 1.0207366943359375,grad_norm: 0.9942576152854207, iteration: 202834
loss: 1.01146399974823,grad_norm: 0.9999990590017025, iteration: 202835
loss: 1.030625581741333,grad_norm: 0.7733672774056515, iteration: 202836
loss: 1.0139172077178955,grad_norm: 0.9620614349688198, iteration: 202837
loss: 1.0226298570632935,grad_norm: 0.9866777682493627, iteration: 202838
loss: 0.9606392979621887,grad_norm: 0.9999992146924918, iteration: 202839
loss: 0.9881759285926819,grad_norm: 0.9723782238628748, iteration: 202840
loss: 0.9875780344009399,grad_norm: 0.999999044520245, iteration: 202841
loss: 0.9877697229385376,grad_norm: 0.8493783203754759, iteration: 202842
loss: 0.9780101180076599,grad_norm: 0.905560479882236, iteration: 202843
loss: 0.9554727077484131,grad_norm: 0.7989913205154349, iteration: 202844
loss: 0.9980568289756775,grad_norm: 0.8230045116332383, iteration: 202845
loss: 1.065022349357605,grad_norm: 0.99999913754682, iteration: 202846
loss: 0.9952385425567627,grad_norm: 0.942500213459606, iteration: 202847
loss: 1.1177843809127808,grad_norm: 0.999999517250809, iteration: 202848
loss: 0.9709213376045227,grad_norm: 0.9999990426781196, iteration: 202849
loss: 1.009504795074463,grad_norm: 0.9471457906854985, iteration: 202850
loss: 1.0237009525299072,grad_norm: 0.9999992576326323, iteration: 202851
loss: 0.9966764450073242,grad_norm: 0.9380207966195414, iteration: 202852
loss: 0.9640437960624695,grad_norm: 0.9999992155272165, iteration: 202853
loss: 1.012966275215149,grad_norm: 0.999999173665075, iteration: 202854
loss: 1.0050022602081299,grad_norm: 0.9453505795424045, iteration: 202855
loss: 0.9581984281539917,grad_norm: 0.9999989640241335, iteration: 202856
loss: 0.9974793791770935,grad_norm: 0.8759575331805136, iteration: 202857
loss: 1.0301871299743652,grad_norm: 0.9999994783960917, iteration: 202858
loss: 1.0115011930465698,grad_norm: 0.9999991985483042, iteration: 202859
loss: 1.023280143737793,grad_norm: 0.8924394748800173, iteration: 202860
loss: 1.0413588285446167,grad_norm: 0.8653351732497097, iteration: 202861
loss: 1.0082473754882812,grad_norm: 0.9999991306616018, iteration: 202862
loss: 0.9845007658004761,grad_norm: 0.928208726417342, iteration: 202863
loss: 0.9891752004623413,grad_norm: 0.8382612926569122, iteration: 202864
loss: 1.0115091800689697,grad_norm: 0.9999992681302466, iteration: 202865
loss: 1.0407129526138306,grad_norm: 0.7596403688773319, iteration: 202866
loss: 1.0000081062316895,grad_norm: 0.9999991418289539, iteration: 202867
loss: 1.0077800750732422,grad_norm: 0.9776969072068855, iteration: 202868
loss: 0.9971210360527039,grad_norm: 0.9378613495363362, iteration: 202869
loss: 0.9964876770973206,grad_norm: 0.9745042786694745, iteration: 202870
loss: 0.9682634472846985,grad_norm: 0.9092129706581722, iteration: 202871
loss: 1.018936038017273,grad_norm: 0.9544466866388127, iteration: 202872
loss: 1.0166205167770386,grad_norm: 0.8463866264468177, iteration: 202873
loss: 0.9637574553489685,grad_norm: 0.9254722336648316, iteration: 202874
loss: 1.0296872854232788,grad_norm: 0.9999990961775022, iteration: 202875
loss: 1.0146790742874146,grad_norm: 0.9189415556614917, iteration: 202876
loss: 1.0066784620285034,grad_norm: 0.9209818715299225, iteration: 202877
loss: 0.9788873791694641,grad_norm: 0.9999999040355035, iteration: 202878
loss: 0.9863658547401428,grad_norm: 0.9999990178097234, iteration: 202879
loss: 0.9716792106628418,grad_norm: 0.8243951557130669, iteration: 202880
loss: 0.9752286672592163,grad_norm: 0.9999991771354272, iteration: 202881
loss: 0.9666650891304016,grad_norm: 0.8355480791731128, iteration: 202882
loss: 0.9879522919654846,grad_norm: 0.9535147475911147, iteration: 202883
loss: 0.9971883893013,grad_norm: 0.8249721467053913, iteration: 202884
loss: 0.9979844689369202,grad_norm: 0.9999991485595277, iteration: 202885
loss: 1.0193097591400146,grad_norm: 0.9999991285391971, iteration: 202886
loss: 1.0030289888381958,grad_norm: 0.8514328267940966, iteration: 202887
loss: 0.9917683601379395,grad_norm: 0.9798067646628729, iteration: 202888
loss: 1.0313935279846191,grad_norm: 0.8627306839525676, iteration: 202889
loss: 0.9676290154457092,grad_norm: 0.9999991022976455, iteration: 202890
loss: 0.9814449548721313,grad_norm: 0.9999991518751584, iteration: 202891
loss: 1.0147086381912231,grad_norm: 0.999999964219573, iteration: 202892
loss: 1.0100774765014648,grad_norm: 0.9999994060022303, iteration: 202893
loss: 1.0070322751998901,grad_norm: 0.8473773217031205, iteration: 202894
loss: 1.0139700174331665,grad_norm: 0.8950023928284561, iteration: 202895
loss: 1.0269865989685059,grad_norm: 0.9547637191850796, iteration: 202896
loss: 1.0162218809127808,grad_norm: 0.9238406109253012, iteration: 202897
loss: 0.9575356245040894,grad_norm: 0.9999991656909861, iteration: 202898
loss: 0.9393388628959656,grad_norm: 0.9603573332983403, iteration: 202899
loss: 0.9739748239517212,grad_norm: 0.9716543568133885, iteration: 202900
loss: 0.9533318877220154,grad_norm: 0.9999989949611383, iteration: 202901
loss: 0.9927102327346802,grad_norm: 0.9058678938318501, iteration: 202902
loss: 0.9831207394599915,grad_norm: 0.9429377388269357, iteration: 202903
loss: 1.0316147804260254,grad_norm: 0.999999039773044, iteration: 202904
loss: 1.0062470436096191,grad_norm: 0.9254642126322024, iteration: 202905
loss: 1.0647739171981812,grad_norm: 0.9999998048646064, iteration: 202906
loss: 1.0034384727478027,grad_norm: 0.9954926051114839, iteration: 202907
loss: 1.0211182832717896,grad_norm: 0.999999103512674, iteration: 202908
loss: 0.9947476387023926,grad_norm: 0.999999766339603, iteration: 202909
loss: 0.9886859655380249,grad_norm: 0.9313905752892382, iteration: 202910
loss: 0.9538683891296387,grad_norm: 0.9999992547474159, iteration: 202911
loss: 0.9962213039398193,grad_norm: 0.9999994392019027, iteration: 202912
loss: 0.9608283042907715,grad_norm: 0.9699276689613339, iteration: 202913
loss: 1.014445424079895,grad_norm: 0.8452190242319297, iteration: 202914
loss: 0.9865071177482605,grad_norm: 0.8253025775419945, iteration: 202915
loss: 1.0738886594772339,grad_norm: 0.9999994072158813, iteration: 202916
loss: 1.0170515775680542,grad_norm: 0.9999991614732235, iteration: 202917
loss: 0.9801509380340576,grad_norm: 0.9124236111651647, iteration: 202918
loss: 0.9955412745475769,grad_norm: 0.9617725600359346, iteration: 202919
loss: 1.0127003192901611,grad_norm: 0.8259569901192994, iteration: 202920
loss: 1.0012229681015015,grad_norm: 0.9962206121260245, iteration: 202921
loss: 1.0012845993041992,grad_norm: 0.9999991211511075, iteration: 202922
loss: 0.9940154552459717,grad_norm: 0.8718724368991263, iteration: 202923
loss: 1.0381901264190674,grad_norm: 0.9999992215083399, iteration: 202924
loss: 1.0133790969848633,grad_norm: 0.9238889681433332, iteration: 202925
loss: 0.9689801335334778,grad_norm: 0.9999991259365436, iteration: 202926
loss: 1.0600619316101074,grad_norm: 0.9999994503730698, iteration: 202927
loss: 1.0430761575698853,grad_norm: 0.9999990941432433, iteration: 202928
loss: 0.9690627455711365,grad_norm: 0.7819277030068402, iteration: 202929
loss: 0.9748293161392212,grad_norm: 0.9843720316140011, iteration: 202930
loss: 1.0574235916137695,grad_norm: 0.9999992688077488, iteration: 202931
loss: 1.0059410333633423,grad_norm: 0.8095942512428755, iteration: 202932
loss: 1.0337600708007812,grad_norm: 0.9821686795491001, iteration: 202933
loss: 1.0217974185943604,grad_norm: 0.9999994937771166, iteration: 202934
loss: 0.9570101499557495,grad_norm: 0.9850890331611417, iteration: 202935
loss: 0.9857426285743713,grad_norm: 0.9189665912323207, iteration: 202936
loss: 0.9999593496322632,grad_norm: 0.999999035908427, iteration: 202937
loss: 0.9798739552497864,grad_norm: 0.9999991375230056, iteration: 202938
loss: 0.9478495717048645,grad_norm: 0.9999989682515436, iteration: 202939
loss: 1.0157880783081055,grad_norm: 0.9999993886440216, iteration: 202940
loss: 0.9951384663581848,grad_norm: 0.9596356521540854, iteration: 202941
loss: 1.0474224090576172,grad_norm: 0.9646467201777864, iteration: 202942
loss: 0.9903611540794373,grad_norm: 0.9692860979788761, iteration: 202943
loss: 1.0228917598724365,grad_norm: 0.9999989606017895, iteration: 202944
loss: 1.0212459564208984,grad_norm: 0.9576279108452193, iteration: 202945
loss: 1.0190030336380005,grad_norm: 0.9941272629329246, iteration: 202946
loss: 1.0056138038635254,grad_norm: 0.9329189170478593, iteration: 202947
loss: 1.0086947679519653,grad_norm: 0.9999990091231334, iteration: 202948
loss: 0.9952579140663147,grad_norm: 0.9709915243485044, iteration: 202949
loss: 0.9977623224258423,grad_norm: 0.9557711804456859, iteration: 202950
loss: 0.9885439276695251,grad_norm: 0.8850661312367085, iteration: 202951
loss: 0.9844006299972534,grad_norm: 0.8758647388507811, iteration: 202952
loss: 0.9963362216949463,grad_norm: 0.9921463350117569, iteration: 202953
loss: 1.1230599880218506,grad_norm: 0.9999990969415581, iteration: 202954
loss: 1.012437343597412,grad_norm: 0.9691860375564115, iteration: 202955
loss: 1.0055850744247437,grad_norm: 0.9999996859513025, iteration: 202956
loss: 1.013727068901062,grad_norm: 0.9999990246824245, iteration: 202957
loss: 1.0144517421722412,grad_norm: 0.9999991393375618, iteration: 202958
loss: 0.9970687627792358,grad_norm: 0.8224578159231886, iteration: 202959
loss: 1.0047781467437744,grad_norm: 0.9070726863342242, iteration: 202960
loss: 1.0377439260482788,grad_norm: 0.9999990535052368, iteration: 202961
loss: 1.0623438358306885,grad_norm: 0.9791779102759098, iteration: 202962
loss: 1.012036681175232,grad_norm: 0.9999993076774659, iteration: 202963
loss: 1.0029184818267822,grad_norm: 0.9119313977469276, iteration: 202964
loss: 1.0317490100860596,grad_norm: 0.9222579623063542, iteration: 202965
loss: 1.020111083984375,grad_norm: 0.9999990543919223, iteration: 202966
loss: 1.0264756679534912,grad_norm: 0.9899341354252087, iteration: 202967
loss: 0.9877965450286865,grad_norm: 0.8892619192342922, iteration: 202968
loss: 1.0531331300735474,grad_norm: 0.999999276745665, iteration: 202969
loss: 0.9861465692520142,grad_norm: 0.9232868379986651, iteration: 202970
loss: 1.0093519687652588,grad_norm: 0.8702507094876566, iteration: 202971
loss: 1.0456557273864746,grad_norm: 0.8019061619659593, iteration: 202972
loss: 1.0435645580291748,grad_norm: 0.9999996519542981, iteration: 202973
loss: 1.00480318069458,grad_norm: 0.8332947029024917, iteration: 202974
loss: 1.0135078430175781,grad_norm: 0.9999992135197775, iteration: 202975
loss: 0.9849478006362915,grad_norm: 0.9196810209711174, iteration: 202976
loss: 0.9659278392791748,grad_norm: 0.9999991913801778, iteration: 202977
loss: 0.9792797565460205,grad_norm: 0.8950495753890348, iteration: 202978
loss: 1.0161807537078857,grad_norm: 0.9999992237865772, iteration: 202979
loss: 1.0505707263946533,grad_norm: 0.9999992584718777, iteration: 202980
loss: 0.9983027577400208,grad_norm: 0.8185282207666338, iteration: 202981
loss: 0.9620808362960815,grad_norm: 0.862569753944781, iteration: 202982
loss: 1.000931978225708,grad_norm: 0.9334704778309731, iteration: 202983
loss: 0.9856976270675659,grad_norm: 0.999999545392709, iteration: 202984
loss: 0.9844598770141602,grad_norm: 0.9053430691904827, iteration: 202985
loss: 1.0249122381210327,grad_norm: 0.9809017841694281, iteration: 202986
loss: 0.9940624833106995,grad_norm: 0.9999992250552059, iteration: 202987
loss: 1.0119823217391968,grad_norm: 0.9999994082651698, iteration: 202988
loss: 0.9781208038330078,grad_norm: 0.9278805724139202, iteration: 202989
loss: 0.9787298440933228,grad_norm: 0.9855792912257222, iteration: 202990
loss: 1.0168178081512451,grad_norm: 0.7893137285439332, iteration: 202991
loss: 1.040095329284668,grad_norm: 0.9999990938569182, iteration: 202992
loss: 1.0994234085083008,grad_norm: 0.9999993166162249, iteration: 202993
loss: 0.9717332124710083,grad_norm: 0.9133202372152662, iteration: 202994
loss: 0.9844627976417542,grad_norm: 0.9078898254031698, iteration: 202995
loss: 1.0146257877349854,grad_norm: 0.9880265417895502, iteration: 202996
loss: 1.0119163990020752,grad_norm: 0.9325019106869912, iteration: 202997
loss: 0.9782333970069885,grad_norm: 0.9726654316771243, iteration: 202998
loss: 0.9793470501899719,grad_norm: 0.9999991144107355, iteration: 202999
loss: 0.9926055669784546,grad_norm: 0.8421102985607422, iteration: 203000
loss: 0.9994452595710754,grad_norm: 0.9874797013664302, iteration: 203001
loss: 1.0056331157684326,grad_norm: 0.9999993113321509, iteration: 203002
loss: 1.0158758163452148,grad_norm: 0.8061458050311427, iteration: 203003
loss: 0.985953688621521,grad_norm: 0.8417121290772785, iteration: 203004
loss: 1.0076884031295776,grad_norm: 0.9565384575327446, iteration: 203005
loss: 1.0179044008255005,grad_norm: 0.999999090903553, iteration: 203006
loss: 1.0044286251068115,grad_norm: 0.9398756430699804, iteration: 203007
loss: 1.0249725580215454,grad_norm: 0.9999990481174584, iteration: 203008
loss: 1.017604112625122,grad_norm: 0.9282645476653613, iteration: 203009
loss: 1.0120694637298584,grad_norm: 0.9564018863552097, iteration: 203010
loss: 0.999489426612854,grad_norm: 0.9285319293228479, iteration: 203011
loss: 1.0420362949371338,grad_norm: 0.8530041237838168, iteration: 203012
loss: 0.9908629655838013,grad_norm: 0.8111389664500361, iteration: 203013
loss: 0.9990893602371216,grad_norm: 0.9999991949249791, iteration: 203014
loss: 1.0000805854797363,grad_norm: 0.8293255316712395, iteration: 203015
loss: 1.001129150390625,grad_norm: 0.9528116496968054, iteration: 203016
loss: 0.9965639710426331,grad_norm: 0.9999997182736197, iteration: 203017
loss: 1.00665283203125,grad_norm: 0.8852432706506462, iteration: 203018
loss: 1.0106624364852905,grad_norm: 0.8866465486383126, iteration: 203019
loss: 1.028324842453003,grad_norm: 0.9999993245701587, iteration: 203020
loss: 0.9851409792900085,grad_norm: 0.8933634154622746, iteration: 203021
loss: 0.9975515007972717,grad_norm: 0.7987512780284829, iteration: 203022
loss: 1.0019599199295044,grad_norm: 0.8390560923501252, iteration: 203023
loss: 1.0159499645233154,grad_norm: 0.9999991023328485, iteration: 203024
loss: 0.9760901927947998,grad_norm: 0.8359416147909309, iteration: 203025
loss: 1.0262196063995361,grad_norm: 0.8705005638858035, iteration: 203026
loss: 0.9397426843643188,grad_norm: 0.9999992054006682, iteration: 203027
loss: 1.0085039138793945,grad_norm: 0.9999990949647375, iteration: 203028
loss: 1.0046237707138062,grad_norm: 0.9685538040787489, iteration: 203029
loss: 1.076582670211792,grad_norm: 0.9999996480342509, iteration: 203030
loss: 1.0090959072113037,grad_norm: 0.9999992222959043, iteration: 203031
loss: 0.9798660278320312,grad_norm: 0.7694073845218092, iteration: 203032
loss: 0.9730144739151001,grad_norm: 0.9515270914738326, iteration: 203033
loss: 1.0962504148483276,grad_norm: 0.9999995253990465, iteration: 203034
loss: 1.0439906120300293,grad_norm: 0.9999990959795171, iteration: 203035
loss: 1.0002951622009277,grad_norm: 0.9131253630465045, iteration: 203036
loss: 0.9540995955467224,grad_norm: 0.826570727279692, iteration: 203037
loss: 1.0240181684494019,grad_norm: 0.8615118636974076, iteration: 203038
loss: 0.9714812636375427,grad_norm: 0.9818414110414176, iteration: 203039
loss: 1.0108762979507446,grad_norm: 0.8571580357986037, iteration: 203040
loss: 0.9925011992454529,grad_norm: 0.8415758063921858, iteration: 203041
loss: 1.0012892484664917,grad_norm: 0.99999911161003, iteration: 203042
loss: 1.065130352973938,grad_norm: 0.9590909972214741, iteration: 203043
loss: 0.9680616855621338,grad_norm: 0.8498751301124612, iteration: 203044
loss: 0.9919325113296509,grad_norm: 0.9663409779669135, iteration: 203045
loss: 0.9953439831733704,grad_norm: 0.965370079314706, iteration: 203046
loss: 1.004889965057373,grad_norm: 0.7755140370100053, iteration: 203047
loss: 0.996508777141571,grad_norm: 0.9999991280234749, iteration: 203048
loss: 1.0971102714538574,grad_norm: 0.9999992451669879, iteration: 203049
loss: 1.0003787279129028,grad_norm: 0.999999189043571, iteration: 203050
loss: 1.0502053499221802,grad_norm: 0.9999993169769837, iteration: 203051
loss: 1.017632246017456,grad_norm: 0.9999993052107911, iteration: 203052
loss: 1.0383802652359009,grad_norm: 0.9067890656558435, iteration: 203053
loss: 0.9887144565582275,grad_norm: 0.9999990303405168, iteration: 203054
loss: 0.9919121861457825,grad_norm: 0.999999140442464, iteration: 203055
loss: 1.0180065631866455,grad_norm: 0.8718237989705667, iteration: 203056
loss: 0.9903133511543274,grad_norm: 0.8226459494551637, iteration: 203057
loss: 1.0028278827667236,grad_norm: 0.9999990027683048, iteration: 203058
loss: 1.0106210708618164,grad_norm: 0.8183181564604024, iteration: 203059
loss: 1.0007586479187012,grad_norm: 0.9414326698779917, iteration: 203060
loss: 1.0141258239746094,grad_norm: 0.9999991094415476, iteration: 203061
loss: 1.0019563436508179,grad_norm: 0.9999990535364269, iteration: 203062
loss: 1.0282810926437378,grad_norm: 0.9999991020023561, iteration: 203063
loss: 0.9968957304954529,grad_norm: 0.8987788207068428, iteration: 203064
loss: 1.0212509632110596,grad_norm: 0.9999991649775173, iteration: 203065
loss: 1.0134663581848145,grad_norm: 0.9883942174557331, iteration: 203066
loss: 0.9678075909614563,grad_norm: 0.8472743241936335, iteration: 203067
loss: 1.0330449342727661,grad_norm: 0.9999990491820905, iteration: 203068
loss: 0.9896912574768066,grad_norm: 0.9999991527993185, iteration: 203069
loss: 0.9824922680854797,grad_norm: 0.9999992588045797, iteration: 203070
loss: 0.9824452996253967,grad_norm: 0.9999991230722063, iteration: 203071
loss: 0.9765186905860901,grad_norm: 0.999999255204153, iteration: 203072
loss: 0.9756420850753784,grad_norm: 0.9278779642846494, iteration: 203073
loss: 0.9872263073921204,grad_norm: 0.8403278161653222, iteration: 203074
loss: 1.006181001663208,grad_norm: 0.9448276202911757, iteration: 203075
loss: 0.996684193611145,grad_norm: 0.999999152514457, iteration: 203076
loss: 1.0194993019104004,grad_norm: 0.999999106100974, iteration: 203077
loss: 1.0488934516906738,grad_norm: 1.000000002876143, iteration: 203078
loss: 0.9847577810287476,grad_norm: 0.9999991437361188, iteration: 203079
loss: 0.9984970092773438,grad_norm: 0.8505219475992274, iteration: 203080
loss: 1.022835373878479,grad_norm: 0.9691956663082217, iteration: 203081
loss: 0.9788358807563782,grad_norm: 0.9106518467882394, iteration: 203082
loss: 0.9969232082366943,grad_norm: 0.9038067214223832, iteration: 203083
loss: 0.9781307578086853,grad_norm: 0.8024048561114656, iteration: 203084
loss: 0.9936532974243164,grad_norm: 0.8263828015474598, iteration: 203085
loss: 1.0120570659637451,grad_norm: 0.9999992691257817, iteration: 203086
loss: 0.9961097836494446,grad_norm: 0.9999991280508581, iteration: 203087
loss: 0.9712045788764954,grad_norm: 0.8148090365002651, iteration: 203088
loss: 0.9793266654014587,grad_norm: 0.869221941183829, iteration: 203089
loss: 1.0187132358551025,grad_norm: 0.9343291885228467, iteration: 203090
loss: 0.9833460450172424,grad_norm: 0.9999989360413076, iteration: 203091
loss: 0.9692782759666443,grad_norm: 0.9999991618551516, iteration: 203092
loss: 1.0024824142456055,grad_norm: 0.9296885569056356, iteration: 203093
loss: 1.0202878713607788,grad_norm: 0.9999990588917741, iteration: 203094
loss: 1.006908655166626,grad_norm: 0.9999990430688103, iteration: 203095
loss: 1.0562689304351807,grad_norm: 0.9999993109646418, iteration: 203096
loss: 1.036689281463623,grad_norm: 0.9469203965475171, iteration: 203097
loss: 1.0294123888015747,grad_norm: 0.9999990492975533, iteration: 203098
loss: 1.0012648105621338,grad_norm: 0.9707258310128419, iteration: 203099
loss: 1.0047301054000854,grad_norm: 0.9437347317063002, iteration: 203100
loss: 1.010520577430725,grad_norm: 0.9999990658166334, iteration: 203101
loss: 0.9793499708175659,grad_norm: 0.9539484260948811, iteration: 203102
loss: 1.0034563541412354,grad_norm: 0.9999992117022048, iteration: 203103
loss: 0.9907606840133667,grad_norm: 0.7828332405478304, iteration: 203104
loss: 1.00904381275177,grad_norm: 0.9999992340205226, iteration: 203105
loss: 1.0167324542999268,grad_norm: 0.9272577267150379, iteration: 203106
loss: 1.0063519477844238,grad_norm: 0.9999990905616415, iteration: 203107
loss: 1.0030241012573242,grad_norm: 0.9811314785412387, iteration: 203108
loss: 1.002219557762146,grad_norm: 0.9999992035444335, iteration: 203109
loss: 1.0128148794174194,grad_norm: 0.9890088961449699, iteration: 203110
loss: 1.0197728872299194,grad_norm: 0.9111320581896443, iteration: 203111
loss: 1.0157808065414429,grad_norm: 0.9999992800436757, iteration: 203112
loss: 0.9972296953201294,grad_norm: 0.876640333781691, iteration: 203113
loss: 1.025396466255188,grad_norm: 0.9999991356382666, iteration: 203114
loss: 1.0002219676971436,grad_norm: 0.9381228484307451, iteration: 203115
loss: 1.0006648302078247,grad_norm: 0.9999990516302846, iteration: 203116
loss: 1.0464140176773071,grad_norm: 0.9999994652560891, iteration: 203117
loss: 0.9925209283828735,grad_norm: 0.999999122966192, iteration: 203118
loss: 1.0138094425201416,grad_norm: 0.9942627506776078, iteration: 203119
loss: 0.9441541433334351,grad_norm: 0.9182821597233141, iteration: 203120
loss: 0.9966589212417603,grad_norm: 0.9398993412724468, iteration: 203121
loss: 1.0068135261535645,grad_norm: 0.9721659718687045, iteration: 203122
loss: 0.9510660171508789,grad_norm: 0.8719237574144267, iteration: 203123
loss: 0.979597270488739,grad_norm: 0.984141066180671, iteration: 203124
loss: 1.0706785917282104,grad_norm: 0.9999991645190558, iteration: 203125
loss: 0.9877287149429321,grad_norm: 0.9999989209724491, iteration: 203126
loss: 0.9798716306686401,grad_norm: 0.7900471675587541, iteration: 203127
loss: 1.0071310997009277,grad_norm: 0.9999990876039005, iteration: 203128
loss: 1.0089025497436523,grad_norm: 0.9999990374994323, iteration: 203129
loss: 1.0039736032485962,grad_norm: 0.9999991200274229, iteration: 203130
loss: 1.0008732080459595,grad_norm: 0.9999989669611281, iteration: 203131
loss: 1.0194923877716064,grad_norm: 0.8636739358803966, iteration: 203132
loss: 1.001563310623169,grad_norm: 0.9665670633417974, iteration: 203133
loss: 1.013730525970459,grad_norm: 0.9812122612717562, iteration: 203134
loss: 1.0155714750289917,grad_norm: 0.8020168047303899, iteration: 203135
loss: 1.057542085647583,grad_norm: 0.9999995683971469, iteration: 203136
loss: 0.9729518890380859,grad_norm: 0.9999993168243616, iteration: 203137
loss: 0.9952695369720459,grad_norm: 0.9035681232607569, iteration: 203138
loss: 1.005448341369629,grad_norm: 0.9737405042645004, iteration: 203139
loss: 0.9540947675704956,grad_norm: 0.9046123430799529, iteration: 203140
loss: 0.9955997467041016,grad_norm: 0.8087842048036615, iteration: 203141
loss: 0.9448303580284119,grad_norm: 0.9738582633983858, iteration: 203142
loss: 1.0136620998382568,grad_norm: 0.994495132937037, iteration: 203143
loss: 0.9986513257026672,grad_norm: 0.9999994656123297, iteration: 203144
loss: 1.0136700868606567,grad_norm: 0.8243742331532374, iteration: 203145
loss: 0.9807794690132141,grad_norm: 0.9380346066001484, iteration: 203146
loss: 0.9835224151611328,grad_norm: 0.9999990679971822, iteration: 203147
loss: 1.0114034414291382,grad_norm: 0.8334513552129166, iteration: 203148
loss: 1.0070946216583252,grad_norm: 0.9745140725237451, iteration: 203149
loss: 1.0240070819854736,grad_norm: 0.9999997432144555, iteration: 203150
loss: 1.016228199005127,grad_norm: 0.9999992903877292, iteration: 203151
loss: 0.9835809469223022,grad_norm: 0.999999141847413, iteration: 203152
loss: 1.0073293447494507,grad_norm: 0.7912054938184524, iteration: 203153
loss: 1.0504131317138672,grad_norm: 0.9999991780836881, iteration: 203154
loss: 1.0037410259246826,grad_norm: 0.9999989954770718, iteration: 203155
loss: 1.0070863962173462,grad_norm: 0.8570183924475449, iteration: 203156
loss: 0.9913675785064697,grad_norm: 0.8688610989984866, iteration: 203157
loss: 1.0336296558380127,grad_norm: 0.9999992375833742, iteration: 203158
loss: 0.9986789226531982,grad_norm: 0.8780158655696496, iteration: 203159
loss: 1.0275129079818726,grad_norm: 0.9999989738220095, iteration: 203160
loss: 0.9831169843673706,grad_norm: 0.9999993485598373, iteration: 203161
loss: 0.9680679440498352,grad_norm: 0.9917655473234986, iteration: 203162
loss: 0.9642707705497742,grad_norm: 0.9747913877025477, iteration: 203163
loss: 0.979759693145752,grad_norm: 0.9999989908879007, iteration: 203164
loss: 0.9967689514160156,grad_norm: 0.9868544365691543, iteration: 203165
loss: 0.9874014258384705,grad_norm: 0.9210445359141732, iteration: 203166
loss: 1.0129330158233643,grad_norm: 0.8773211683577483, iteration: 203167
loss: 1.0167618989944458,grad_norm: 0.9713662496423038, iteration: 203168
loss: 0.9853156208992004,grad_norm: 0.9999991584846478, iteration: 203169
loss: 1.0133459568023682,grad_norm: 0.9999992509102453, iteration: 203170
loss: 0.9995716214179993,grad_norm: 0.9947672865898406, iteration: 203171
loss: 0.9846059679985046,grad_norm: 0.9253646533877351, iteration: 203172
loss: 1.0328787565231323,grad_norm: 0.9999997013105287, iteration: 203173
loss: 1.0111445188522339,grad_norm: 0.9999990628881478, iteration: 203174
loss: 0.9863893389701843,grad_norm: 0.9180806083088215, iteration: 203175
loss: 1.0275890827178955,grad_norm: 0.9764097435178806, iteration: 203176
loss: 1.0108319520950317,grad_norm: 0.9999996856531296, iteration: 203177
loss: 0.9551698565483093,grad_norm: 0.9999991229129395, iteration: 203178
loss: 0.9821112155914307,grad_norm: 0.9999990273592309, iteration: 203179
loss: 0.968349039554596,grad_norm: 0.9283955537194729, iteration: 203180
loss: 1.0206378698349,grad_norm: 0.8799652373858737, iteration: 203181
loss: 1.0987989902496338,grad_norm: 0.9999995432167453, iteration: 203182
loss: 0.9670534133911133,grad_norm: 0.9295256020620792, iteration: 203183
loss: 0.9836373329162598,grad_norm: 0.9999993221532641, iteration: 203184
loss: 1.0194367170333862,grad_norm: 0.9017294157914462, iteration: 203185
loss: 0.9741877913475037,grad_norm: 0.8707108150960311, iteration: 203186
loss: 1.0230156183242798,grad_norm: 0.9999991131237812, iteration: 203187
loss: 1.0144219398498535,grad_norm: 0.8698138002958236, iteration: 203188
loss: 0.9765233397483826,grad_norm: 0.8163929038475001, iteration: 203189
loss: 0.9797739386558533,grad_norm: 0.9999991885413773, iteration: 203190
loss: 1.0002310276031494,grad_norm: 0.9956112725840162, iteration: 203191
loss: 1.005859375,grad_norm: 0.9999992167801302, iteration: 203192
loss: 1.0282591581344604,grad_norm: 0.8975327673390966, iteration: 203193
loss: 1.0316046476364136,grad_norm: 0.8715521580056552, iteration: 203194
loss: 0.9710063934326172,grad_norm: 0.9787169281187176, iteration: 203195
loss: 0.991776704788208,grad_norm: 0.8285837007482237, iteration: 203196
loss: 1.007538080215454,grad_norm: 0.8930643172365361, iteration: 203197
loss: 0.9640668630599976,grad_norm: 0.9999990226216141, iteration: 203198
loss: 1.0137156248092651,grad_norm: 0.9105805454861655, iteration: 203199
loss: 1.0519769191741943,grad_norm: 0.9999995271526335, iteration: 203200
loss: 1.032211422920227,grad_norm: 0.9999990060289855, iteration: 203201
loss: 1.0053560733795166,grad_norm: 0.9999992126619575, iteration: 203202
loss: 1.0232666730880737,grad_norm: 0.9999990190674501, iteration: 203203
loss: 0.9988133311271667,grad_norm: 0.9902878233490932, iteration: 203204
loss: 0.997038722038269,grad_norm: 0.9999998447600438, iteration: 203205
loss: 1.0490881204605103,grad_norm: 0.9120567815036159, iteration: 203206
loss: 0.9667725563049316,grad_norm: 0.999999043217, iteration: 203207
loss: 1.0264331102371216,grad_norm: 0.9127954065138919, iteration: 203208
loss: 1.0049232244491577,grad_norm: 0.9999991956145163, iteration: 203209
loss: 1.0129661560058594,grad_norm: 0.9999989116326978, iteration: 203210
loss: 1.0314074754714966,grad_norm: 0.8074038149581475, iteration: 203211
loss: 1.0052920579910278,grad_norm: 0.7914101766376169, iteration: 203212
loss: 0.9887824654579163,grad_norm: 0.9288409051980329, iteration: 203213
loss: 0.9936437606811523,grad_norm: 0.909090436246636, iteration: 203214
loss: 0.9810872673988342,grad_norm: 0.8918788954633253, iteration: 203215
loss: 0.9460093975067139,grad_norm: 0.9971285662952796, iteration: 203216
loss: 0.9919469952583313,grad_norm: 0.9790308459717002, iteration: 203217
loss: 1.0152798891067505,grad_norm: 0.7273443570358175, iteration: 203218
loss: 1.0215227603912354,grad_norm: 0.832321623141816, iteration: 203219
loss: 0.9791561961174011,grad_norm: 0.8570577291566606, iteration: 203220
loss: 1.0193346738815308,grad_norm: 0.9280686571281574, iteration: 203221
loss: 1.0002148151397705,grad_norm: 0.7493577581255894, iteration: 203222
loss: 1.0362801551818848,grad_norm: 0.9999990967876099, iteration: 203223
loss: 1.0126019716262817,grad_norm: 0.9999992096160352, iteration: 203224
loss: 1.027889609336853,grad_norm: 0.8438996769261943, iteration: 203225
loss: 0.9980466961860657,grad_norm: 0.8600527376272091, iteration: 203226
loss: 1.0361906290054321,grad_norm: 0.999999162709197, iteration: 203227
loss: 0.9482359886169434,grad_norm: 0.9999990513023636, iteration: 203228
loss: 1.024125337600708,grad_norm: 0.9999991451738509, iteration: 203229
loss: 0.9859610199928284,grad_norm: 0.9076251407174234, iteration: 203230
loss: 1.0177099704742432,grad_norm: 0.8957736169511908, iteration: 203231
loss: 0.9986556768417358,grad_norm: 0.9999991760656638, iteration: 203232
loss: 0.9905162453651428,grad_norm: 0.9999991755865649, iteration: 203233
loss: 0.9706816673278809,grad_norm: 0.993965540933226, iteration: 203234
loss: 1.0183237791061401,grad_norm: 0.8961483589301155, iteration: 203235
loss: 1.0293043851852417,grad_norm: 0.9999991918769846, iteration: 203236
loss: 1.0230754613876343,grad_norm: 0.9999989627623808, iteration: 203237
loss: 1.0291587114334106,grad_norm: 0.9245432197843713, iteration: 203238
loss: 1.023266315460205,grad_norm: 0.9999992708043569, iteration: 203239
loss: 1.0000393390655518,grad_norm: 0.778665809884011, iteration: 203240
loss: 1.0212475061416626,grad_norm: 0.9999991082726369, iteration: 203241
loss: 0.9999195337295532,grad_norm: 0.9628074993383365, iteration: 203242
loss: 1.0089595317840576,grad_norm: 0.999999175939144, iteration: 203243
loss: 1.0245352983474731,grad_norm: 0.8845095379288178, iteration: 203244
loss: 0.972149670124054,grad_norm: 0.9777936965596821, iteration: 203245
loss: 1.0215898752212524,grad_norm: 0.9565379046586878, iteration: 203246
loss: 0.9464638829231262,grad_norm: 0.9077468934919981, iteration: 203247
loss: 0.9845094084739685,grad_norm: 0.8217283687904198, iteration: 203248
loss: 0.9633678793907166,grad_norm: 0.9930353417704783, iteration: 203249
loss: 1.046459674835205,grad_norm: 0.9999991717784404, iteration: 203250
loss: 0.99991375207901,grad_norm: 0.8963244864262878, iteration: 203251
loss: 0.9675480127334595,grad_norm: 0.9999991750772037, iteration: 203252
loss: 1.020499348640442,grad_norm: 0.8059379705417817, iteration: 203253
loss: 0.9940184354782104,grad_norm: 0.8884849576294602, iteration: 203254
loss: 1.013712763786316,grad_norm: 0.872674157058939, iteration: 203255
loss: 1.0319474935531616,grad_norm: 0.974996409156881, iteration: 203256
loss: 1.0130044221878052,grad_norm: 0.8697021314601919, iteration: 203257
loss: 1.0011812448501587,grad_norm: 0.9240298320785643, iteration: 203258
loss: 0.9925660490989685,grad_norm: 0.8375862892441179, iteration: 203259
loss: 1.0166566371917725,grad_norm: 0.9423950050893379, iteration: 203260
loss: 1.0175963640213013,grad_norm: 0.999999067384324, iteration: 203261
loss: 1.0193605422973633,grad_norm: 0.9999993401547411, iteration: 203262
loss: 0.9842402338981628,grad_norm: 0.9067575410009612, iteration: 203263
loss: 0.9976947903633118,grad_norm: 0.8788096997944964, iteration: 203264
loss: 1.0198549032211304,grad_norm: 0.8289658794809556, iteration: 203265
loss: 0.997634768486023,grad_norm: 0.8626433549160357, iteration: 203266
loss: 1.002873182296753,grad_norm: 0.9704027400517757, iteration: 203267
loss: 1.0036921501159668,grad_norm: 0.9999990918010649, iteration: 203268
loss: 1.0087312459945679,grad_norm: 0.9132809123797389, iteration: 203269
loss: 0.9931966066360474,grad_norm: 0.8385748250833408, iteration: 203270
loss: 0.9808984398841858,grad_norm: 0.9413354701009896, iteration: 203271
loss: 1.0014472007751465,grad_norm: 0.9327450966508507, iteration: 203272
loss: 1.018551230430603,grad_norm: 0.8864995369060519, iteration: 203273
loss: 1.0251742601394653,grad_norm: 0.9870416053845368, iteration: 203274
loss: 1.0202008485794067,grad_norm: 0.8906920549753284, iteration: 203275
loss: 1.0577728748321533,grad_norm: 0.9999995993075389, iteration: 203276
loss: 0.9970468282699585,grad_norm: 0.9999996973612161, iteration: 203277
loss: 0.98204505443573,grad_norm: 0.7985668864353342, iteration: 203278
loss: 0.9947510361671448,grad_norm: 0.8400669220072897, iteration: 203279
loss: 0.9697014689445496,grad_norm: 0.9999989505770132, iteration: 203280
loss: 1.0030241012573242,grad_norm: 0.8044113961922893, iteration: 203281
loss: 0.999323844909668,grad_norm: 0.9294132224002851, iteration: 203282
loss: 1.0151585340499878,grad_norm: 0.9999989213950335, iteration: 203283
loss: 0.985312283039093,grad_norm: 0.9266653190084628, iteration: 203284
loss: 1.00456964969635,grad_norm: 0.9999991095882514, iteration: 203285
loss: 0.9831625819206238,grad_norm: 0.7766103628036588, iteration: 203286
loss: 0.9923989772796631,grad_norm: 0.8640745458070558, iteration: 203287
loss: 0.9892631769180298,grad_norm: 0.9999991515610631, iteration: 203288
loss: 1.0097196102142334,grad_norm: 0.9261685834032725, iteration: 203289
loss: 0.9985578656196594,grad_norm: 0.8972641168685556, iteration: 203290
loss: 0.9782600402832031,grad_norm: 0.8538438397680446, iteration: 203291
loss: 1.0374919176101685,grad_norm: 0.99999994942276, iteration: 203292
loss: 1.0060316324234009,grad_norm: 0.9209325902867584, iteration: 203293
loss: 0.9654735326766968,grad_norm: 0.86911131731169, iteration: 203294
loss: 1.018012285232544,grad_norm: 0.9999991402470215, iteration: 203295
loss: 0.9908691644668579,grad_norm: 0.9999990842612889, iteration: 203296
loss: 0.9973658919334412,grad_norm: 0.9999991544676687, iteration: 203297
loss: 0.9996461272239685,grad_norm: 0.9999991117315915, iteration: 203298
loss: 1.0370254516601562,grad_norm: 0.9999990971965776, iteration: 203299
loss: 1.0143972635269165,grad_norm: 0.9999991081222688, iteration: 203300
loss: 1.0224751234054565,grad_norm: 0.9999993830552019, iteration: 203301
loss: 1.0320935249328613,grad_norm: 0.9999998647392274, iteration: 203302
loss: 1.0211375951766968,grad_norm: 0.7809946225550659, iteration: 203303
loss: 0.9977982044219971,grad_norm: 0.9999991336243133, iteration: 203304
loss: 0.9843905568122864,grad_norm: 0.9495321037981359, iteration: 203305
loss: 0.9968510866165161,grad_norm: 0.9999990711208937, iteration: 203306
loss: 0.9879432916641235,grad_norm: 0.9462112641561382, iteration: 203307
loss: 1.076825499534607,grad_norm: 0.9999994444504758, iteration: 203308
loss: 1.014513373374939,grad_norm: 0.880637592908579, iteration: 203309
loss: 0.9792832136154175,grad_norm: 0.9482162444791252, iteration: 203310
loss: 0.9925063252449036,grad_norm: 0.9494808618123595, iteration: 203311
loss: 0.9876354932785034,grad_norm: 0.9470412072926948, iteration: 203312
loss: 1.0080101490020752,grad_norm: 0.8540798707221557, iteration: 203313
loss: 1.00338613986969,grad_norm: 0.7870535491315823, iteration: 203314
loss: 1.0399991273880005,grad_norm: 0.9999996340342822, iteration: 203315
loss: 0.9819572567939758,grad_norm: 0.9999996629424683, iteration: 203316
loss: 0.978284478187561,grad_norm: 0.9773989241666616, iteration: 203317
loss: 1.0006239414215088,grad_norm: 0.9999991130102442, iteration: 203318
loss: 1.019976019859314,grad_norm: 0.8696565837914092, iteration: 203319
loss: 0.9949690103530884,grad_norm: 0.9999990553324273, iteration: 203320
loss: 1.0104855298995972,grad_norm: 0.9999990922647192, iteration: 203321
loss: 1.026774525642395,grad_norm: 0.8926095536543857, iteration: 203322
loss: 0.9885475635528564,grad_norm: 0.9261283395284868, iteration: 203323
loss: 0.9860280156135559,grad_norm: 0.9721742686639946, iteration: 203324
loss: 0.9746574759483337,grad_norm: 0.8439569224333243, iteration: 203325
loss: 1.0607270002365112,grad_norm: 0.9999997071497102, iteration: 203326
loss: 1.0290693044662476,grad_norm: 0.8153166823384019, iteration: 203327
loss: 0.9775083661079407,grad_norm: 0.9071730718504236, iteration: 203328
loss: 0.9682261347770691,grad_norm: 0.9999991094135883, iteration: 203329
loss: 0.9858123064041138,grad_norm: 0.9999989174601905, iteration: 203330
loss: 0.9969280958175659,grad_norm: 0.8147288140416296, iteration: 203331
loss: 1.0306049585342407,grad_norm: 0.9745670230128624, iteration: 203332
loss: 1.0248591899871826,grad_norm: 0.9361705996182117, iteration: 203333
loss: 1.0353434085845947,grad_norm: 0.9999993653005254, iteration: 203334
loss: 1.0020991563796997,grad_norm: 0.9999991134432964, iteration: 203335
loss: 0.9572501182556152,grad_norm: 0.9300603095435319, iteration: 203336
loss: 1.0134518146514893,grad_norm: 0.8595760728321855, iteration: 203337
loss: 1.0188605785369873,grad_norm: 0.9912483348108432, iteration: 203338
loss: 0.9892826676368713,grad_norm: 0.9279913597808056, iteration: 203339
loss: 0.9464830160140991,grad_norm: 0.9999991630087202, iteration: 203340
loss: 1.0278856754302979,grad_norm: 0.8673392764199799, iteration: 203341
loss: 0.9989431500434875,grad_norm: 0.9116841531504601, iteration: 203342
loss: 0.9943543076515198,grad_norm: 0.9999989789867023, iteration: 203343
loss: 0.9780935049057007,grad_norm: 0.9428502025945777, iteration: 203344
loss: 1.0041840076446533,grad_norm: 0.9635964050726803, iteration: 203345
loss: 0.982248067855835,grad_norm: 0.7719358139908187, iteration: 203346
loss: 0.9975560307502747,grad_norm: 0.9637632863669795, iteration: 203347
loss: 1.0233598947525024,grad_norm: 0.9647963038747007, iteration: 203348
loss: 1.0216078758239746,grad_norm: 0.9999991571511908, iteration: 203349
loss: 1.0177747011184692,grad_norm: 0.9999991790440688, iteration: 203350
loss: 0.9799625873565674,grad_norm: 0.7348201969054675, iteration: 203351
loss: 1.0243582725524902,grad_norm: 0.8995898442606585, iteration: 203352
loss: 0.9865884184837341,grad_norm: 0.9999990008971384, iteration: 203353
loss: 0.997382640838623,grad_norm: 0.8430910953336593, iteration: 203354
loss: 1.0450228452682495,grad_norm: 0.9960792653079464, iteration: 203355
loss: 1.00790536403656,grad_norm: 0.9999990828242725, iteration: 203356
loss: 0.9611303210258484,grad_norm: 0.9999988851695161, iteration: 203357
loss: 1.0050562620162964,grad_norm: 0.9757808659160306, iteration: 203358
loss: 0.9863817691802979,grad_norm: 0.9185392258907736, iteration: 203359
loss: 0.9853323101997375,grad_norm: 0.9437682836590201, iteration: 203360
loss: 0.9581224918365479,grad_norm: 0.9706842365316748, iteration: 203361
loss: 1.0118863582611084,grad_norm: 0.9999992762919931, iteration: 203362
loss: 1.002590298652649,grad_norm: 0.999999081847077, iteration: 203363
loss: 1.0150269269943237,grad_norm: 0.9928807713721263, iteration: 203364
loss: 1.0068447589874268,grad_norm: 0.9999991594547477, iteration: 203365
loss: 1.0122393369674683,grad_norm: 0.8062808170983674, iteration: 203366
loss: 1.0163521766662598,grad_norm: 0.9428162759029998, iteration: 203367
loss: 1.0055997371673584,grad_norm: 0.9999990276170644, iteration: 203368
loss: 0.9648118019104004,grad_norm: 0.8622077429153326, iteration: 203369
loss: 0.9974722266197205,grad_norm: 0.8922044514822421, iteration: 203370
loss: 1.0294148921966553,grad_norm: 0.9159887469933973, iteration: 203371
loss: 1.032748818397522,grad_norm: 0.9999993285933324, iteration: 203372
loss: 0.9964693784713745,grad_norm: 0.8112371095262668, iteration: 203373
loss: 0.9894302487373352,grad_norm: 0.8931935603183478, iteration: 203374
loss: 0.993844747543335,grad_norm: 0.9425815470076793, iteration: 203375
loss: 0.9848906993865967,grad_norm: 0.9999991345811668, iteration: 203376
loss: 1.0094094276428223,grad_norm: 0.8060478165211796, iteration: 203377
loss: 1.0116063356399536,grad_norm: 0.9999991590160745, iteration: 203378
loss: 0.9858356714248657,grad_norm: 0.999999114529834, iteration: 203379
loss: 0.9690507054328918,grad_norm: 0.999172880679249, iteration: 203380
loss: 0.9720131158828735,grad_norm: 0.9999990458033516, iteration: 203381
loss: 1.0201711654663086,grad_norm: 0.9999990161667418, iteration: 203382
loss: 1.0013185739517212,grad_norm: 0.8814865608362555, iteration: 203383
loss: 0.9760841131210327,grad_norm: 0.8803943982689482, iteration: 203384
loss: 1.0149544477462769,grad_norm: 0.8706553094119369, iteration: 203385
loss: 1.0139870643615723,grad_norm: 0.8463163616474807, iteration: 203386
loss: 1.0361000299453735,grad_norm: 0.8905441124348091, iteration: 203387
loss: 0.9982943534851074,grad_norm: 0.968196172040647, iteration: 203388
loss: 0.9931344389915466,grad_norm: 0.9999992192766521, iteration: 203389
loss: 1.0008238554000854,grad_norm: 0.8251128102448388, iteration: 203390
loss: 0.9912336468696594,grad_norm: 0.8981173580124802, iteration: 203391
loss: 0.9904319047927856,grad_norm: 0.9112644367381244, iteration: 203392
loss: 0.9579323530197144,grad_norm: 0.9079613993430441, iteration: 203393
loss: 0.9961999654769897,grad_norm: 0.8693059345784846, iteration: 203394
loss: 1.0007315874099731,grad_norm: 0.9642850414192126, iteration: 203395
loss: 0.9818131327629089,grad_norm: 0.9307357700130174, iteration: 203396
loss: 0.996981680393219,grad_norm: 0.9830575622916022, iteration: 203397
loss: 0.9868593215942383,grad_norm: 0.9999992276945089, iteration: 203398
loss: 1.0436509847640991,grad_norm: 0.9068747894752741, iteration: 203399
loss: 0.9993917942047119,grad_norm: 0.99517177266871, iteration: 203400
loss: 0.9666401147842407,grad_norm: 0.7414013224932052, iteration: 203401
loss: 1.0022249221801758,grad_norm: 0.9825209794017732, iteration: 203402
loss: 0.9941985011100769,grad_norm: 0.9999993301283111, iteration: 203403
loss: 1.0199164152145386,grad_norm: 0.9999990939301768, iteration: 203404
loss: 0.9744216203689575,grad_norm: 0.9924048289064158, iteration: 203405
loss: 1.0051745176315308,grad_norm: 0.9923842165648414, iteration: 203406
loss: 0.9865482449531555,grad_norm: 0.9502583113948138, iteration: 203407
loss: 1.0062386989593506,grad_norm: 0.8925355412301886, iteration: 203408
loss: 1.0368614196777344,grad_norm: 0.9194015752818423, iteration: 203409
loss: 0.9864898920059204,grad_norm: 0.9006149270105199, iteration: 203410
loss: 1.036987066268921,grad_norm: 0.840017269381269, iteration: 203411
loss: 0.9797676205635071,grad_norm: 0.845992344218417, iteration: 203412
loss: 0.9840959310531616,grad_norm: 0.99999914244224, iteration: 203413
loss: 1.0351473093032837,grad_norm: 0.9237001205553714, iteration: 203414
loss: 0.9945521950721741,grad_norm: 0.8679375963040276, iteration: 203415
loss: 1.0159350633621216,grad_norm: 0.8543910541250952, iteration: 203416
loss: 0.9963023662567139,grad_norm: 0.8610973641810731, iteration: 203417
loss: 1.0244629383087158,grad_norm: 0.9999991987215013, iteration: 203418
loss: 1.0073951482772827,grad_norm: 0.8343459905982223, iteration: 203419
loss: 0.9928485155105591,grad_norm: 0.9062382180200494, iteration: 203420
loss: 1.0042591094970703,grad_norm: 0.9999997767392103, iteration: 203421
loss: 0.9909336566925049,grad_norm: 0.8527380865205595, iteration: 203422
loss: 1.0283440351486206,grad_norm: 0.9999991185415372, iteration: 203423
loss: 0.9967167973518372,grad_norm: 0.8485136362971266, iteration: 203424
loss: 0.9894256591796875,grad_norm: 0.9013434515784717, iteration: 203425
loss: 1.0130664110183716,grad_norm: 0.9390149436231344, iteration: 203426
loss: 0.9897648692131042,grad_norm: 0.9999990326174009, iteration: 203427
loss: 0.9955500364303589,grad_norm: 0.9264011281190648, iteration: 203428
loss: 1.0031852722167969,grad_norm: 0.8352892167533397, iteration: 203429
loss: 0.9980546236038208,grad_norm: 0.9284925667048127, iteration: 203430
loss: 1.0312622785568237,grad_norm: 0.9855027578970658, iteration: 203431
loss: 1.0096808671951294,grad_norm: 0.8469693178001092, iteration: 203432
loss: 0.9806636571884155,grad_norm: 0.9999990632794566, iteration: 203433
loss: 1.0138707160949707,grad_norm: 0.8459969046696254, iteration: 203434
loss: 0.9971281290054321,grad_norm: 0.8569339863503231, iteration: 203435
loss: 1.0114799737930298,grad_norm: 0.8112491719669322, iteration: 203436
loss: 1.001259446144104,grad_norm: 0.8440961457613724, iteration: 203437
loss: 1.0347760915756226,grad_norm: 0.9999991922789689, iteration: 203438
loss: 1.018389344215393,grad_norm: 0.9328421592919551, iteration: 203439
loss: 0.994387149810791,grad_norm: 0.9939813914999908, iteration: 203440
loss: 0.9662277698516846,grad_norm: 0.9084651526397274, iteration: 203441
loss: 1.0158404111862183,grad_norm: 0.8797151380334897, iteration: 203442
loss: 0.9938598871231079,grad_norm: 0.9999995464207505, iteration: 203443
loss: 1.0071210861206055,grad_norm: 0.7986085980487134, iteration: 203444
loss: 1.0110862255096436,grad_norm: 0.9750761098519041, iteration: 203445
loss: 1.0028748512268066,grad_norm: 0.8695627282193152, iteration: 203446
loss: 1.0394647121429443,grad_norm: 0.9999991734992065, iteration: 203447
loss: 1.0318834781646729,grad_norm: 0.9999990868170088, iteration: 203448
loss: 0.97017502784729,grad_norm: 0.8367630789885857, iteration: 203449
loss: 1.0093554258346558,grad_norm: 0.9999995898356889, iteration: 203450
loss: 0.9927937388420105,grad_norm: 0.8346901249970964, iteration: 203451
loss: 1.020698070526123,grad_norm: 0.9999989785731405, iteration: 203452
loss: 1.0082453489303589,grad_norm: 0.9999991848508878, iteration: 203453
loss: 0.9971311092376709,grad_norm: 0.799736871813055, iteration: 203454
loss: 1.0116338729858398,grad_norm: 0.8061916938833136, iteration: 203455
loss: 1.004634141921997,grad_norm: 0.9230488597033204, iteration: 203456
loss: 0.9863001704216003,grad_norm: 0.9999990800459361, iteration: 203457
loss: 1.013353943824768,grad_norm: 0.9999991974666597, iteration: 203458
loss: 1.0070652961730957,grad_norm: 0.9999990993704934, iteration: 203459
loss: 0.9843578338623047,grad_norm: 0.8739353328103506, iteration: 203460
loss: 1.0054409503936768,grad_norm: 0.879213357332383, iteration: 203461
loss: 1.0514748096466064,grad_norm: 0.9581338124500703, iteration: 203462
loss: 0.9660289883613586,grad_norm: 0.8579750696447657, iteration: 203463
loss: 1.0118614435195923,grad_norm: 0.9999989749862407, iteration: 203464
loss: 0.9976294636726379,grad_norm: 0.7632733660437089, iteration: 203465
loss: 1.0117424726486206,grad_norm: 0.8222033120423875, iteration: 203466
loss: 0.9918771982192993,grad_norm: 0.935568436989785, iteration: 203467
loss: 0.9902228116989136,grad_norm: 0.949657627318314, iteration: 203468
loss: 0.9821045398712158,grad_norm: 0.999999031846247, iteration: 203469
loss: 1.0006552934646606,grad_norm: 0.831103479167908, iteration: 203470
loss: 0.9666452407836914,grad_norm: 0.9271210323901461, iteration: 203471
loss: 1.002458930015564,grad_norm: 0.9999990308018873, iteration: 203472
loss: 0.9848735332489014,grad_norm: 0.9999991658927697, iteration: 203473
loss: 0.9799100160598755,grad_norm: 0.9124559945157003, iteration: 203474
loss: 1.0192934274673462,grad_norm: 0.9999991778683787, iteration: 203475
loss: 1.0144706964492798,grad_norm: 0.8650821232910592, iteration: 203476
loss: 1.0123748779296875,grad_norm: 0.8274834171845351, iteration: 203477
loss: 0.9814030528068542,grad_norm: 0.9999990546696915, iteration: 203478
loss: 0.9604576230049133,grad_norm: 0.9979764353497913, iteration: 203479
loss: 1.0125377178192139,grad_norm: 0.9999991916980777, iteration: 203480
loss: 1.014333724975586,grad_norm: 0.9999989842989094, iteration: 203481
loss: 1.047891616821289,grad_norm: 0.9363368212417701, iteration: 203482
loss: 1.0839134454727173,grad_norm: 0.9211271460320689, iteration: 203483
loss: 0.9714910387992859,grad_norm: 0.9366950913006702, iteration: 203484
loss: 1.0098118782043457,grad_norm: 0.9999990678166848, iteration: 203485
loss: 0.9538823366165161,grad_norm: 0.9477853811256056, iteration: 203486
loss: 1.0050286054611206,grad_norm: 0.8548080526173537, iteration: 203487
loss: 0.9814659953117371,grad_norm: 0.8252508572657098, iteration: 203488
loss: 0.9918879270553589,grad_norm: 0.9362470775924708, iteration: 203489
loss: 1.021386981010437,grad_norm: 0.8986884644457158, iteration: 203490
loss: 0.9723076224327087,grad_norm: 0.8504497707267963, iteration: 203491
loss: 1.0066536664962769,grad_norm: 0.9723608247424441, iteration: 203492
loss: 1.0083403587341309,grad_norm: 0.9691370119435285, iteration: 203493
loss: 0.9948090314865112,grad_norm: 0.8335030051329784, iteration: 203494
loss: 0.9889865517616272,grad_norm: 0.9999990510680944, iteration: 203495
loss: 0.9858430624008179,grad_norm: 0.8613501196030767, iteration: 203496
loss: 1.0048445463180542,grad_norm: 0.8455523988395106, iteration: 203497
loss: 1.0093178749084473,grad_norm: 0.8450365869010238, iteration: 203498
loss: 1.0027936697006226,grad_norm: 0.9711612112832028, iteration: 203499
loss: 0.9810733199119568,grad_norm: 0.9999992166098486, iteration: 203500
loss: 1.0051754713058472,grad_norm: 0.9514065975334628, iteration: 203501
loss: 0.968505322933197,grad_norm: 0.9999991839015395, iteration: 203502
loss: 0.9654578566551208,grad_norm: 0.9999991104724877, iteration: 203503
loss: 0.9909265637397766,grad_norm: 0.9999990841021422, iteration: 203504
loss: 1.0204285383224487,grad_norm: 0.8023383195963525, iteration: 203505
loss: 0.9978273510932922,grad_norm: 0.9723137157854539, iteration: 203506
loss: 1.0467385053634644,grad_norm: 0.8482811923736938, iteration: 203507
loss: 1.005340576171875,grad_norm: 0.9975546517313475, iteration: 203508
loss: 0.9772074818611145,grad_norm: 0.9999990612869738, iteration: 203509
loss: 1.0267798900604248,grad_norm: 0.9999993709606004, iteration: 203510
loss: 0.9611587524414062,grad_norm: 0.9121027024937325, iteration: 203511
loss: 0.9650962352752686,grad_norm: 0.8086908946997283, iteration: 203512
loss: 1.0203310251235962,grad_norm: 0.9999990637701379, iteration: 203513
loss: 0.9908426403999329,grad_norm: 0.7746365710511437, iteration: 203514
loss: 0.9970377683639526,grad_norm: 0.939501138016183, iteration: 203515
loss: 1.0060436725616455,grad_norm: 0.8577159176465027, iteration: 203516
loss: 0.9885345697402954,grad_norm: 0.9999990733391847, iteration: 203517
loss: 1.038986086845398,grad_norm: 0.9999991216777575, iteration: 203518
loss: 1.0098087787628174,grad_norm: 0.8949735224970967, iteration: 203519
loss: 1.0130573511123657,grad_norm: 0.993741844192256, iteration: 203520
loss: 0.9723079800605774,grad_norm: 0.8346036152717379, iteration: 203521
loss: 1.0171061754226685,grad_norm: 0.9651222163750494, iteration: 203522
loss: 0.9944412112236023,grad_norm: 0.8781709027298864, iteration: 203523
loss: 1.0103046894073486,grad_norm: 0.854493102058556, iteration: 203524
loss: 0.9977930784225464,grad_norm: 0.999999298510021, iteration: 203525
loss: 0.9905117750167847,grad_norm: 0.999999057120366, iteration: 203526
loss: 0.9940959215164185,grad_norm: 0.9999990913449993, iteration: 203527
loss: 0.9776516556739807,grad_norm: 0.9999991843905394, iteration: 203528
loss: 1.0359487533569336,grad_norm: 0.9999991874209329, iteration: 203529
loss: 1.0217350721359253,grad_norm: 0.9999989462808211, iteration: 203530
loss: 1.0390149354934692,grad_norm: 0.8066970198711692, iteration: 203531
loss: 0.9419529438018799,grad_norm: 0.881100585147336, iteration: 203532
loss: 0.9466467499732971,grad_norm: 0.9999990674656329, iteration: 203533
loss: 0.9901544451713562,grad_norm: 0.9999991546075099, iteration: 203534
loss: 0.9840174317359924,grad_norm: 0.8695056700628465, iteration: 203535
loss: 1.0043485164642334,grad_norm: 0.8487458671613521, iteration: 203536
loss: 1.0002208948135376,grad_norm: 0.8978972658753228, iteration: 203537
loss: 1.0251063108444214,grad_norm: 0.946768667473689, iteration: 203538
loss: 0.9998269081115723,grad_norm: 0.9999990951533447, iteration: 203539
loss: 0.975690484046936,grad_norm: 0.9999988674195708, iteration: 203540
loss: 0.9532517790794373,grad_norm: 0.8557274235935459, iteration: 203541
loss: 0.9606000185012817,grad_norm: 0.9227615841979736, iteration: 203542
loss: 0.9793264865875244,grad_norm: 0.9999991511566881, iteration: 203543
loss: 1.001063346862793,grad_norm: 0.794043441105875, iteration: 203544
loss: 1.002538800239563,grad_norm: 0.9237567296260927, iteration: 203545
loss: 1.011440634727478,grad_norm: 0.9534580176585824, iteration: 203546
loss: 0.9667211771011353,grad_norm: 0.7717105751411233, iteration: 203547
loss: 1.0097260475158691,grad_norm: 0.9999991049337882, iteration: 203548
loss: 0.954781174659729,grad_norm: 0.9999989556030537, iteration: 203549
loss: 1.0061556100845337,grad_norm: 0.9229322005514657, iteration: 203550
loss: 1.0064650774002075,grad_norm: 0.9999992548280318, iteration: 203551
loss: 1.0224288702011108,grad_norm: 0.999999225426816, iteration: 203552
loss: 1.0057315826416016,grad_norm: 0.9999998002853978, iteration: 203553
loss: 0.9732034802436829,grad_norm: 0.9999993745109466, iteration: 203554
loss: 1.0065839290618896,grad_norm: 0.8870810290936126, iteration: 203555
loss: 1.0303093194961548,grad_norm: 0.9873256565807241, iteration: 203556
loss: 1.0000851154327393,grad_norm: 0.9999988839541123, iteration: 203557
loss: 1.0149555206298828,grad_norm: 0.8471333135547348, iteration: 203558
loss: 0.9834069609642029,grad_norm: 0.9697199002745343, iteration: 203559
loss: 1.0126118659973145,grad_norm: 0.9999991128901969, iteration: 203560
loss: 0.9933549761772156,grad_norm: 0.9525338279960234, iteration: 203561
loss: 1.0059863328933716,grad_norm: 0.9999992304264914, iteration: 203562
loss: 0.9493340253829956,grad_norm: 0.957285594568724, iteration: 203563
loss: 1.0010204315185547,grad_norm: 0.9999991024485252, iteration: 203564
loss: 1.0249710083007812,grad_norm: 0.9999991240890524, iteration: 203565
loss: 1.0026439428329468,grad_norm: 0.9763425952393715, iteration: 203566
loss: 0.9697080254554749,grad_norm: 0.9377712785068387, iteration: 203567
loss: 1.0021324157714844,grad_norm: 0.9677160298333318, iteration: 203568
loss: 0.9872843623161316,grad_norm: 0.9294115321050547, iteration: 203569
loss: 1.0083726644515991,grad_norm: 0.9999991363980929, iteration: 203570
loss: 0.9592813849449158,grad_norm: 0.9999993185580588, iteration: 203571
loss: 0.9892985224723816,grad_norm: 0.8119625829677944, iteration: 203572
loss: 0.9944721460342407,grad_norm: 0.9482093759086817, iteration: 203573
loss: 0.9938967823982239,grad_norm: 0.9003609553533045, iteration: 203574
loss: 0.9889359474182129,grad_norm: 0.9551783895349224, iteration: 203575
loss: 0.975181519985199,grad_norm: 0.936050632203626, iteration: 203576
loss: 0.9956775903701782,grad_norm: 0.9999990930267552, iteration: 203577
loss: 0.9925089478492737,grad_norm: 0.959496290882163, iteration: 203578
loss: 0.9778308868408203,grad_norm: 0.9007881970374025, iteration: 203579
loss: 1.0281689167022705,grad_norm: 0.9999990458806911, iteration: 203580
loss: 0.9899433255195618,grad_norm: 0.9435027008750974, iteration: 203581
loss: 0.9681127071380615,grad_norm: 0.8805668996166771, iteration: 203582
loss: 0.9783763289451599,grad_norm: 0.8245649625657082, iteration: 203583
loss: 0.9831964373588562,grad_norm: 0.90734367546224, iteration: 203584
loss: 1.004051685333252,grad_norm: 0.9999997670503146, iteration: 203585
loss: 0.9649052619934082,grad_norm: 0.9999992122150093, iteration: 203586
loss: 0.9954168200492859,grad_norm: 0.937150213597111, iteration: 203587
loss: 0.9958491921424866,grad_norm: 0.8581738561195463, iteration: 203588
loss: 1.0414868593215942,grad_norm: 0.9999994471115788, iteration: 203589
loss: 1.0590656995773315,grad_norm: 0.9999997091630647, iteration: 203590
loss: 1.035884141921997,grad_norm: 0.9474568343904021, iteration: 203591
loss: 0.9733842015266418,grad_norm: 0.999999269319043, iteration: 203592
loss: 1.007374882698059,grad_norm: 0.9999996674579441, iteration: 203593
loss: 0.985683262348175,grad_norm: 0.858547380266196, iteration: 203594
loss: 1.027021050453186,grad_norm: 0.9999990148410991, iteration: 203595
loss: 0.9983338117599487,grad_norm: 0.8614820863751723, iteration: 203596
loss: 1.0369945764541626,grad_norm: 0.9780351853353914, iteration: 203597
loss: 0.9983173608779907,grad_norm: 0.9999991428847601, iteration: 203598
loss: 0.9836101531982422,grad_norm: 0.9998822347276455, iteration: 203599
loss: 1.0244346857070923,grad_norm: 0.913216517451471, iteration: 203600
loss: 0.9811437726020813,grad_norm: 0.9205334037210678, iteration: 203601
loss: 1.0024100542068481,grad_norm: 0.9951697665730121, iteration: 203602
loss: 1.0074297189712524,grad_norm: 0.9999990372196479, iteration: 203603
loss: 0.9975653886795044,grad_norm: 0.8503515851070302, iteration: 203604
loss: 1.0009913444519043,grad_norm: 0.8330973317840658, iteration: 203605
loss: 0.9792160987854004,grad_norm: 0.9028696143903266, iteration: 203606
loss: 1.008906602859497,grad_norm: 0.9999990295640115, iteration: 203607
loss: 1.008427619934082,grad_norm: 0.8509354296279075, iteration: 203608
loss: 0.9842159152030945,grad_norm: 0.9716972059295108, iteration: 203609
loss: 0.9593885540962219,grad_norm: 0.7947078756989344, iteration: 203610
loss: 1.0110242366790771,grad_norm: 0.9999991045800829, iteration: 203611
loss: 0.985429584980011,grad_norm: 0.9999990792934172, iteration: 203612
loss: 0.9718352556228638,grad_norm: 0.8328567863390308, iteration: 203613
loss: 0.9838058352470398,grad_norm: 0.9999991423384421, iteration: 203614
loss: 1.0260443687438965,grad_norm: 0.8486543727935622, iteration: 203615
loss: 1.011292815208435,grad_norm: 0.9843058460420054, iteration: 203616
loss: 0.9905360341072083,grad_norm: 0.9999991277944655, iteration: 203617
loss: 1.0810487270355225,grad_norm: 0.9999990310800553, iteration: 203618
loss: 0.9913070797920227,grad_norm: 0.83896818195818, iteration: 203619
loss: 0.9737507700920105,grad_norm: 0.9999991718636031, iteration: 203620
loss: 0.990241289138794,grad_norm: 0.9419436947896189, iteration: 203621
loss: 0.9966820478439331,grad_norm: 0.8975826064698595, iteration: 203622
loss: 1.0106316804885864,grad_norm: 0.9999990736752812, iteration: 203623
loss: 1.004328966140747,grad_norm: 0.768201103609574, iteration: 203624
loss: 0.9995704889297485,grad_norm: 0.9999990500987862, iteration: 203625
loss: 0.9654052257537842,grad_norm: 0.9601556446884979, iteration: 203626
loss: 0.9721565246582031,grad_norm: 0.9460273650217579, iteration: 203627
loss: 1.0420873165130615,grad_norm: 0.999999252400109, iteration: 203628
loss: 1.0120220184326172,grad_norm: 0.9999991792412743, iteration: 203629
loss: 1.0328660011291504,grad_norm: 0.9247032318664593, iteration: 203630
loss: 1.0044430494308472,grad_norm: 0.8693262169389299, iteration: 203631
loss: 0.9745094180107117,grad_norm: 0.9803062374446785, iteration: 203632
loss: 1.0100091695785522,grad_norm: 0.9999991474662002, iteration: 203633
loss: 0.9717624187469482,grad_norm: 0.8814632359300955, iteration: 203634
loss: 1.0261272192001343,grad_norm: 0.893213172785057, iteration: 203635
loss: 0.9785405993461609,grad_norm: 0.8481376216428524, iteration: 203636
loss: 1.0137979984283447,grad_norm: 0.8968856979204276, iteration: 203637
loss: 0.9827360510826111,grad_norm: 0.9999990666121955, iteration: 203638
loss: 1.0386449098587036,grad_norm: 0.9814952598857646, iteration: 203639
loss: 1.0363961458206177,grad_norm: 0.8737942323270902, iteration: 203640
loss: 1.0029141902923584,grad_norm: 0.9999991642350303, iteration: 203641
loss: 1.0151066780090332,grad_norm: 0.9999999147010443, iteration: 203642
loss: 1.0193748474121094,grad_norm: 0.9933519860224811, iteration: 203643
loss: 1.0953272581100464,grad_norm: 0.9999994953656778, iteration: 203644
loss: 1.035836935043335,grad_norm: 0.9038799556824492, iteration: 203645
loss: 0.9635593295097351,grad_norm: 0.9999990805368331, iteration: 203646
loss: 0.9848867654800415,grad_norm: 0.9966665962951464, iteration: 203647
loss: 0.97829669713974,grad_norm: 0.999999046479114, iteration: 203648
loss: 0.9738671183586121,grad_norm: 0.8767622167402478, iteration: 203649
loss: 1.0336205959320068,grad_norm: 0.7021804806366744, iteration: 203650
loss: 1.0140419006347656,grad_norm: 0.9639922363763647, iteration: 203651
loss: 1.0134508609771729,grad_norm: 0.8635013236678659, iteration: 203652
loss: 0.9685466289520264,grad_norm: 0.8939419236967616, iteration: 203653
loss: 0.9928673505783081,grad_norm: 0.9999990351531016, iteration: 203654
loss: 0.9701807498931885,grad_norm: 0.8221368522716395, iteration: 203655
loss: 0.9887749552726746,grad_norm: 0.8935054440179735, iteration: 203656
loss: 1.0022552013397217,grad_norm: 0.9999990789743229, iteration: 203657
loss: 1.016418695449829,grad_norm: 0.999999210375173, iteration: 203658
loss: 0.9628332257270813,grad_norm: 0.9999994907710091, iteration: 203659
loss: 1.0138009786605835,grad_norm: 0.912579574777108, iteration: 203660
loss: 0.9914517998695374,grad_norm: 0.9999991214316021, iteration: 203661
loss: 0.9836851954460144,grad_norm: 0.9045534894113324, iteration: 203662
loss: 0.972599446773529,grad_norm: 0.9009596105295206, iteration: 203663
loss: 0.9748839735984802,grad_norm: 0.9154294614044204, iteration: 203664
loss: 0.9594887495040894,grad_norm: 0.9999990328549279, iteration: 203665
loss: 0.995715856552124,grad_norm: 0.8658801172558829, iteration: 203666
loss: 0.9798246622085571,grad_norm: 0.8593360806695081, iteration: 203667
loss: 0.9835112690925598,grad_norm: 0.9999991782572815, iteration: 203668
loss: 1.0377088785171509,grad_norm: 0.9438587746900438, iteration: 203669
loss: 0.9680536985397339,grad_norm: 0.8840014820806004, iteration: 203670
loss: 1.0051134824752808,grad_norm: 0.9999990148464004, iteration: 203671
loss: 1.0201525688171387,grad_norm: 0.9795614967519621, iteration: 203672
loss: 0.9933006763458252,grad_norm: 0.9999990479989935, iteration: 203673
loss: 1.010443091392517,grad_norm: 0.826931978860979, iteration: 203674
loss: 1.0103510618209839,grad_norm: 0.8277430197864091, iteration: 203675
loss: 0.9982359409332275,grad_norm: 0.9999990891373749, iteration: 203676
loss: 0.9524226188659668,grad_norm: 0.9097499283419973, iteration: 203677
loss: 1.0058552026748657,grad_norm: 0.9999991213284698, iteration: 203678
loss: 0.969413161277771,grad_norm: 0.9999991200336326, iteration: 203679
loss: 1.0239261388778687,grad_norm: 0.9495864300434497, iteration: 203680
loss: 0.9857367873191833,grad_norm: 0.9643267830847085, iteration: 203681
loss: 0.9931299686431885,grad_norm: 0.9999991474722665, iteration: 203682
loss: 0.9526561498641968,grad_norm: 0.8992166203284565, iteration: 203683
loss: 0.9929101467132568,grad_norm: 0.9216918205719579, iteration: 203684
loss: 1.0080784559249878,grad_norm: 0.8223670394625443, iteration: 203685
loss: 1.0110303163528442,grad_norm: 0.8095868928370934, iteration: 203686
loss: 0.9879602193832397,grad_norm: 0.8990659872404829, iteration: 203687
loss: 1.0080711841583252,grad_norm: 0.8393659822867582, iteration: 203688
loss: 1.0030113458633423,grad_norm: 0.8005576711574199, iteration: 203689
loss: 1.0273462533950806,grad_norm: 0.9564318458420533, iteration: 203690
loss: 1.0165448188781738,grad_norm: 0.999999160139913, iteration: 203691
loss: 0.9666630029678345,grad_norm: 0.9792566004684689, iteration: 203692
loss: 0.9857362508773804,grad_norm: 0.9988825375089797, iteration: 203693
loss: 0.9822758436203003,grad_norm: 0.9454410309855102, iteration: 203694
loss: 1.0100263357162476,grad_norm: 0.9740355213434785, iteration: 203695
loss: 1.0121568441390991,grad_norm: 0.9999992428696666, iteration: 203696
loss: 1.0141092538833618,grad_norm: 0.9149241928277603, iteration: 203697
loss: 0.9897797703742981,grad_norm: 0.9999991048127457, iteration: 203698
loss: 0.9475448727607727,grad_norm: 0.9567717437470451, iteration: 203699
loss: 1.0003232955932617,grad_norm: 0.9717758480348538, iteration: 203700
loss: 1.0205382108688354,grad_norm: 0.999999097109065, iteration: 203701
loss: 0.9952965974807739,grad_norm: 0.9471765570295633, iteration: 203702
loss: 1.0887401103973389,grad_norm: 0.9999991960818656, iteration: 203703
loss: 0.9694806337356567,grad_norm: 0.9999991644657317, iteration: 203704
loss: 0.9820231199264526,grad_norm: 0.8476443894768435, iteration: 203705
loss: 1.0216178894042969,grad_norm: 0.9999990938995142, iteration: 203706
loss: 1.0228863954544067,grad_norm: 0.9264148201228363, iteration: 203707
loss: 0.954659640789032,grad_norm: 0.9223173410612342, iteration: 203708
loss: 0.9806976318359375,grad_norm: 0.9183396015779243, iteration: 203709
loss: 1.0016722679138184,grad_norm: 0.9187129088438319, iteration: 203710
loss: 0.9943339824676514,grad_norm: 0.9999991728924337, iteration: 203711
loss: 0.9868248105049133,grad_norm: 0.9633781591506275, iteration: 203712
loss: 1.0117852687835693,grad_norm: 0.9540736240972265, iteration: 203713
loss: 0.9690952301025391,grad_norm: 0.9421845874259023, iteration: 203714
loss: 1.0171177387237549,grad_norm: 0.9192694814556067, iteration: 203715
loss: 1.0030591487884521,grad_norm: 0.9438684579148551, iteration: 203716
loss: 0.9623476266860962,grad_norm: 0.9999990023462549, iteration: 203717
loss: 0.992418646812439,grad_norm: 0.9174380325290739, iteration: 203718
loss: 0.9951773285865784,grad_norm: 0.8783606859839502, iteration: 203719
loss: 1.017652988433838,grad_norm: 0.9375974092407843, iteration: 203720
loss: 0.9931957125663757,grad_norm: 0.930089367356931, iteration: 203721
loss: 0.9947676062583923,grad_norm: 0.9269569090753744, iteration: 203722
loss: 1.0452200174331665,grad_norm: 0.9342219314566965, iteration: 203723
loss: 1.0076298713684082,grad_norm: 0.9029687294647695, iteration: 203724
loss: 0.9971053004264832,grad_norm: 0.9999991017034294, iteration: 203725
loss: 1.0065128803253174,grad_norm: 0.8959555190214854, iteration: 203726
loss: 0.9974600672721863,grad_norm: 0.9999990021059563, iteration: 203727
loss: 1.0182757377624512,grad_norm: 0.9999991134646097, iteration: 203728
loss: 0.9651620984077454,grad_norm: 0.8273386741825269, iteration: 203729
loss: 1.0529019832611084,grad_norm: 0.9999991642382132, iteration: 203730
loss: 1.007946491241455,grad_norm: 0.9999991162610861, iteration: 203731
loss: 1.0148119926452637,grad_norm: 0.9029824375423373, iteration: 203732
loss: 0.9807960987091064,grad_norm: 0.8822304868509813, iteration: 203733
loss: 0.9892837405204773,grad_norm: 0.9999992254430241, iteration: 203734
loss: 0.9954372644424438,grad_norm: 0.9951436825456478, iteration: 203735
loss: 1.0220539569854736,grad_norm: 0.9999990650126712, iteration: 203736
loss: 0.9857707619667053,grad_norm: 0.9999991497192722, iteration: 203737
loss: 0.9585742950439453,grad_norm: 0.9782032222440268, iteration: 203738
loss: 1.0140119791030884,grad_norm: 0.9838276683080768, iteration: 203739
loss: 1.003538966178894,grad_norm: 0.9999990181658112, iteration: 203740
loss: 1.0083521604537964,grad_norm: 0.9999993487111338, iteration: 203741
loss: 0.9581241607666016,grad_norm: 0.9999991339609544, iteration: 203742
loss: 0.9864763021469116,grad_norm: 0.8240576951638179, iteration: 203743
loss: 1.0054033994674683,grad_norm: 0.8741308316627914, iteration: 203744
loss: 1.0200423002243042,grad_norm: 0.9999991627285428, iteration: 203745
loss: 1.0060151815414429,grad_norm: 0.88787718845256, iteration: 203746
loss: 1.0108286142349243,grad_norm: 0.9705763432550573, iteration: 203747
loss: 1.0027159452438354,grad_norm: 0.9240898798528698, iteration: 203748
loss: 1.0188541412353516,grad_norm: 0.8986967975364749, iteration: 203749
loss: 0.9901990294456482,grad_norm: 0.9122956150330596, iteration: 203750
loss: 1.0419217348098755,grad_norm: 0.99999989253068, iteration: 203751
loss: 0.9928628206253052,grad_norm: 0.9122210219090392, iteration: 203752
loss: 1.0162869691848755,grad_norm: 0.9999991664340544, iteration: 203753
loss: 0.9893765449523926,grad_norm: 0.9059708795277548, iteration: 203754
loss: 0.9895802140235901,grad_norm: 0.9609990671274506, iteration: 203755
loss: 0.9899929761886597,grad_norm: 0.8323941701840837, iteration: 203756
loss: 1.0012924671173096,grad_norm: 0.90792574263372, iteration: 203757
loss: 0.9591647386550903,grad_norm: 0.8868608415171331, iteration: 203758
loss: 0.9952716827392578,grad_norm: 0.8407311093830735, iteration: 203759
loss: 1.0312845706939697,grad_norm: 0.8407124169789755, iteration: 203760
loss: 1.001429796218872,grad_norm: 0.9999992283353819, iteration: 203761
loss: 0.9631132483482361,grad_norm: 0.8152501711679769, iteration: 203762
loss: 0.9906183481216431,grad_norm: 0.9999990428973021, iteration: 203763
loss: 0.9572747349739075,grad_norm: 0.9883011925115143, iteration: 203764
loss: 1.0316119194030762,grad_norm: 0.9430723840176222, iteration: 203765
loss: 1.005959391593933,grad_norm: 0.907407938764924, iteration: 203766
loss: 0.933014988899231,grad_norm: 0.9999991298830513, iteration: 203767
loss: 1.011684536933899,grad_norm: 0.9479448760555177, iteration: 203768
loss: 0.9853397011756897,grad_norm: 0.999999223973512, iteration: 203769
loss: 1.0020638704299927,grad_norm: 0.9328851352080971, iteration: 203770
loss: 0.9924420714378357,grad_norm: 0.9999990765802558, iteration: 203771
loss: 1.0473785400390625,grad_norm: 0.959718899014209, iteration: 203772
loss: 0.9853772521018982,grad_norm: 0.9450621002330545, iteration: 203773
loss: 0.9959356784820557,grad_norm: 0.9833119468369586, iteration: 203774
loss: 0.9976058006286621,grad_norm: 0.8588124035502233, iteration: 203775
loss: 1.01462984085083,grad_norm: 0.7420084633721529, iteration: 203776
loss: 1.0151546001434326,grad_norm: 0.9901915193978521, iteration: 203777
loss: 1.0205672979354858,grad_norm: 0.814863634918861, iteration: 203778
loss: 1.015893578529358,grad_norm: 0.9930629599044066, iteration: 203779
loss: 1.018795371055603,grad_norm: 0.9999992037425663, iteration: 203780
loss: 0.9944934248924255,grad_norm: 0.9329927179791382, iteration: 203781
loss: 0.9917024374008179,grad_norm: 0.9999990200935512, iteration: 203782
loss: 1.0150601863861084,grad_norm: 0.9999993469723996, iteration: 203783
loss: 1.0203065872192383,grad_norm: 0.8586135471909914, iteration: 203784
loss: 1.0320886373519897,grad_norm: 0.9999994174055985, iteration: 203785
loss: 0.9924007058143616,grad_norm: 0.8434086405478352, iteration: 203786
loss: 0.9851129651069641,grad_norm: 0.838239833403111, iteration: 203787
loss: 1.013371467590332,grad_norm: 0.9069992564990919, iteration: 203788
loss: 0.9952276945114136,grad_norm: 0.9736437018699801, iteration: 203789
loss: 0.9827613830566406,grad_norm: 0.9999990182667898, iteration: 203790
loss: 0.9958552122116089,grad_norm: 0.9999991138827277, iteration: 203791
loss: 0.9653835892677307,grad_norm: 0.8631495252261585, iteration: 203792
loss: 0.976942241191864,grad_norm: 0.8509539161427561, iteration: 203793
loss: 1.012786865234375,grad_norm: 0.9999993009729202, iteration: 203794
loss: 0.9999588131904602,grad_norm: 0.9305876965654604, iteration: 203795
loss: 0.9433431029319763,grad_norm: 0.9999992209217116, iteration: 203796
loss: 1.0307079553604126,grad_norm: 0.9281982062007696, iteration: 203797
loss: 0.9496602416038513,grad_norm: 0.8824345386117564, iteration: 203798
loss: 1.0212496519088745,grad_norm: 0.9999990270083965, iteration: 203799
loss: 0.9922754168510437,grad_norm: 0.9854926535307503, iteration: 203800
loss: 0.9905080199241638,grad_norm: 0.99999904521679, iteration: 203801
loss: 0.9853690266609192,grad_norm: 0.9999991159121419, iteration: 203802
loss: 1.0196706056594849,grad_norm: 0.9097505215727693, iteration: 203803
loss: 0.9678075313568115,grad_norm: 0.8455429855745052, iteration: 203804
loss: 1.0245100259780884,grad_norm: 0.9999996159835712, iteration: 203805
loss: 0.9962783455848694,grad_norm: 0.9004609528127742, iteration: 203806
loss: 1.0032036304473877,grad_norm: 0.9999990982225043, iteration: 203807
loss: 0.9937398433685303,grad_norm: 0.9999992534581459, iteration: 203808
loss: 1.1626754999160767,grad_norm: 0.9999996754181538, iteration: 203809
loss: 0.9773761034011841,grad_norm: 0.8559599619761826, iteration: 203810
loss: 0.99421626329422,grad_norm: 0.7978651568295826, iteration: 203811
loss: 0.984025776386261,grad_norm: 0.926712630327891, iteration: 203812
loss: 0.9880600571632385,grad_norm: 0.9492583353722438, iteration: 203813
loss: 0.9728344082832336,grad_norm: 0.9999989241303358, iteration: 203814
loss: 0.9207596182823181,grad_norm: 0.9999989106899035, iteration: 203815
loss: 0.9844778180122375,grad_norm: 0.9359356743327529, iteration: 203816
loss: 0.9580428600311279,grad_norm: 0.8570229981654967, iteration: 203817
loss: 0.9873239398002625,grad_norm: 0.9999991484796736, iteration: 203818
loss: 0.9989889860153198,grad_norm: 0.9925604250135868, iteration: 203819
loss: 1.012226939201355,grad_norm: 0.9999995955088713, iteration: 203820
loss: 0.9886799454689026,grad_norm: 0.9941817945292191, iteration: 203821
loss: 1.0103318691253662,grad_norm: 0.9322385415974495, iteration: 203822
loss: 1.02046799659729,grad_norm: 0.9999991359452725, iteration: 203823
loss: 1.0222598314285278,grad_norm: 0.9999991335782414, iteration: 203824
loss: 0.9884107112884521,grad_norm: 0.9558610979382169, iteration: 203825
loss: 0.9794314503669739,grad_norm: 0.9109358450408661, iteration: 203826
loss: 0.9846475124359131,grad_norm: 0.999999735926392, iteration: 203827
loss: 0.9976766705513,grad_norm: 0.9891828378175291, iteration: 203828
loss: 1.0373204946517944,grad_norm: 0.9999991837190552, iteration: 203829
loss: 0.9928043484687805,grad_norm: 0.9999995997348408, iteration: 203830
loss: 1.0416150093078613,grad_norm: 0.9879147793375223, iteration: 203831
loss: 0.9505873322486877,grad_norm: 0.9999990029502, iteration: 203832
loss: 0.9377783536911011,grad_norm: 0.9999992479181099, iteration: 203833
loss: 0.9946501851081848,grad_norm: 0.9150960241120455, iteration: 203834
loss: 0.9932272434234619,grad_norm: 0.9999989730249412, iteration: 203835
loss: 1.044264316558838,grad_norm: 0.9545705099295609, iteration: 203836
loss: 1.0110982656478882,grad_norm: 0.9304518775989746, iteration: 203837
loss: 1.0022773742675781,grad_norm: 0.9999990645937126, iteration: 203838
loss: 1.076485514640808,grad_norm: 0.9999989712880378, iteration: 203839
loss: 1.013055443763733,grad_norm: 0.9650377761517256, iteration: 203840
loss: 1.0122199058532715,grad_norm: 0.9125604994810215, iteration: 203841
loss: 0.9828041195869446,grad_norm: 0.9999989881788179, iteration: 203842
loss: 1.0151841640472412,grad_norm: 0.8710029938617626, iteration: 203843
loss: 0.9931728839874268,grad_norm: 0.8602758646670624, iteration: 203844
loss: 0.9836265444755554,grad_norm: 0.870751294643149, iteration: 203845
loss: 0.9941756725311279,grad_norm: 0.9999991178592977, iteration: 203846
loss: 1.0190434455871582,grad_norm: 0.9999998747920729, iteration: 203847
loss: 1.0063613653182983,grad_norm: 0.8817481663199898, iteration: 203848
loss: 1.0246984958648682,grad_norm: 0.9845515807524902, iteration: 203849
loss: 1.0156359672546387,grad_norm: 0.960309370985199, iteration: 203850
loss: 0.9631214141845703,grad_norm: 0.9884117720889897, iteration: 203851
loss: 0.9410274624824524,grad_norm: 0.9397784282586921, iteration: 203852
loss: 1.057619571685791,grad_norm: 0.9999991075514079, iteration: 203853
loss: 1.0200872421264648,grad_norm: 0.9162211831356035, iteration: 203854
loss: 0.9795255661010742,grad_norm: 0.8592268903011998, iteration: 203855
loss: 1.0122838020324707,grad_norm: 0.9274461980897606, iteration: 203856
loss: 1.0005450248718262,grad_norm: 0.9541631205344246, iteration: 203857
loss: 0.985862135887146,grad_norm: 0.768983367433175, iteration: 203858
loss: 0.9671133160591125,grad_norm: 0.9999991435008433, iteration: 203859
loss: 1.0164110660552979,grad_norm: 0.9999991644598774, iteration: 203860
loss: 1.0700269937515259,grad_norm: 0.9999998327565379, iteration: 203861
loss: 1.0137683153152466,grad_norm: 0.8410407884558471, iteration: 203862
loss: 1.0286898612976074,grad_norm: 0.8042582914549232, iteration: 203863
loss: 1.0014578104019165,grad_norm: 0.9705317229746315, iteration: 203864
loss: 1.0418720245361328,grad_norm: 0.8853053010546905, iteration: 203865
loss: 1.0312221050262451,grad_norm: 0.9999989100514223, iteration: 203866
loss: 0.9701098203659058,grad_norm: 0.9999993288795467, iteration: 203867
loss: 0.9646581411361694,grad_norm: 0.83352988136769, iteration: 203868
loss: 1.0078679323196411,grad_norm: 0.8164119241126863, iteration: 203869
loss: 0.9910121560096741,grad_norm: 0.8839884904589529, iteration: 203870
loss: 0.9728708267211914,grad_norm: 0.8253646138278742, iteration: 203871
loss: 0.9785761833190918,grad_norm: 0.9905143360719095, iteration: 203872
loss: 0.9957005977630615,grad_norm: 0.8401805647008682, iteration: 203873
loss: 1.0010000467300415,grad_norm: 0.8093126864663955, iteration: 203874
loss: 1.0293591022491455,grad_norm: 0.9999991842021891, iteration: 203875
loss: 1.0243662595748901,grad_norm: 0.8264622168980873, iteration: 203876
loss: 1.0001311302185059,grad_norm: 0.9013930529254283, iteration: 203877
loss: 1.0070748329162598,grad_norm: 0.8469300858789057, iteration: 203878
loss: 1.0568199157714844,grad_norm: 0.906862712341442, iteration: 203879
loss: 1.0103179216384888,grad_norm: 0.9999992885201323, iteration: 203880
loss: 0.9835038781166077,grad_norm: 0.9666317585446955, iteration: 203881
loss: 0.9875593781471252,grad_norm: 0.8907857588059913, iteration: 203882
loss: 1.00521981716156,grad_norm: 0.9999990380522213, iteration: 203883
loss: 1.0349454879760742,grad_norm: 0.999999004482977, iteration: 203884
loss: 1.0019999742507935,grad_norm: 0.976236162266381, iteration: 203885
loss: 1.0060733556747437,grad_norm: 0.7494081784422245, iteration: 203886
loss: 0.9987378716468811,grad_norm: 0.9150286983226313, iteration: 203887
loss: 1.0148975849151611,grad_norm: 0.874991422715204, iteration: 203888
loss: 1.023647427558899,grad_norm: 0.7886000193712838, iteration: 203889
loss: 1.0373886823654175,grad_norm: 0.9999991542261069, iteration: 203890
loss: 0.9675230979919434,grad_norm: 0.9862199808458915, iteration: 203891
loss: 1.0172754526138306,grad_norm: 0.9156305021048112, iteration: 203892
loss: 1.0246092081069946,grad_norm: 0.9999990330494855, iteration: 203893
loss: 0.9659204483032227,grad_norm: 0.9999992006478297, iteration: 203894
loss: 1.0280530452728271,grad_norm: 0.9999990849990383, iteration: 203895
loss: 0.9856250882148743,grad_norm: 0.8662590575538335, iteration: 203896
loss: 0.9878175854682922,grad_norm: 0.8233871890169915, iteration: 203897
loss: 1.030137300491333,grad_norm: 0.9315295747757404, iteration: 203898
loss: 1.0369747877120972,grad_norm: 0.9999992312039576, iteration: 203899
loss: 1.0271577835083008,grad_norm: 0.9409853641523721, iteration: 203900
loss: 0.973381519317627,grad_norm: 0.879674786005763, iteration: 203901
loss: 1.0091911554336548,grad_norm: 0.9999989088539538, iteration: 203902
loss: 1.0254193544387817,grad_norm: 0.8779602026266956, iteration: 203903
loss: 1.0196701288223267,grad_norm: 0.8531813578519887, iteration: 203904
loss: 0.9759848117828369,grad_norm: 0.8743905744072575, iteration: 203905
loss: 0.9909447431564331,grad_norm: 0.9319385041265229, iteration: 203906
loss: 0.9981279969215393,grad_norm: 0.8842342603267502, iteration: 203907
loss: 1.0092730522155762,grad_norm: 0.900258521070077, iteration: 203908
loss: 1.0089727640151978,grad_norm: 0.9615358594394071, iteration: 203909
loss: 1.022685170173645,grad_norm: 0.9472824460174686, iteration: 203910
loss: 0.9954917430877686,grad_norm: 0.9068195238750211, iteration: 203911
loss: 0.9920070767402649,grad_norm: 0.9405506796559279, iteration: 203912
loss: 0.9844335317611694,grad_norm: 0.9867431283434142, iteration: 203913
loss: 0.9956819415092468,grad_norm: 0.8283140625282268, iteration: 203914
loss: 0.9878755807876587,grad_norm: 0.9110819791251914, iteration: 203915
loss: 1.0144325494766235,grad_norm: 0.8145713717190567, iteration: 203916
loss: 0.9825258851051331,grad_norm: 0.9999991251638614, iteration: 203917
loss: 1.0135700702667236,grad_norm: 0.9878905486665329, iteration: 203918
loss: 1.0288257598876953,grad_norm: 0.9374674507587978, iteration: 203919
loss: 0.9989569187164307,grad_norm: 0.9247377420561373, iteration: 203920
loss: 1.0063343048095703,grad_norm: 0.9747158350980575, iteration: 203921
loss: 0.9590564966201782,grad_norm: 0.8310486245632258, iteration: 203922
loss: 0.9878073334693909,grad_norm: 0.9999989479696008, iteration: 203923
loss: 0.9947728514671326,grad_norm: 0.9999992756617343, iteration: 203924
loss: 1.024282693862915,grad_norm: 0.9999992404725266, iteration: 203925
loss: 0.9904031157493591,grad_norm: 0.8013745406142276, iteration: 203926
loss: 1.01753830909729,grad_norm: 0.9885170115596761, iteration: 203927
loss: 1.0044426918029785,grad_norm: 0.9999991976455528, iteration: 203928
loss: 0.9804062247276306,grad_norm: 0.8166527216329188, iteration: 203929
loss: 0.9987233877182007,grad_norm: 0.8921380281625695, iteration: 203930
loss: 1.0138906240463257,grad_norm: 0.8659856714480918, iteration: 203931
loss: 1.0304421186447144,grad_norm: 0.9999991607867572, iteration: 203932
loss: 0.98758465051651,grad_norm: 0.9999990504405032, iteration: 203933
loss: 0.9803146719932556,grad_norm: 0.8756872488587312, iteration: 203934
loss: 0.9962655305862427,grad_norm: 0.8918308666032875, iteration: 203935
loss: 0.9767886996269226,grad_norm: 0.9843248515234622, iteration: 203936
loss: 0.9945037364959717,grad_norm: 0.8617237242449233, iteration: 203937
loss: 1.025993824005127,grad_norm: 0.8179621317936148, iteration: 203938
loss: 1.0299724340438843,grad_norm: 0.9999991176097577, iteration: 203939
loss: 1.0396565198898315,grad_norm: 0.9322221220693975, iteration: 203940
loss: 0.9924827218055725,grad_norm: 0.9450092494388496, iteration: 203941
loss: 0.9947762489318848,grad_norm: 0.9494651593607881, iteration: 203942
loss: 0.9942934513092041,grad_norm: 0.8613820815271654, iteration: 203943
loss: 0.9910303354263306,grad_norm: 0.9999990304938574, iteration: 203944
loss: 0.9920432567596436,grad_norm: 0.8313328847364598, iteration: 203945
loss: 0.9860216975212097,grad_norm: 0.999999185707472, iteration: 203946
loss: 0.9845292568206787,grad_norm: 0.8283463581240671, iteration: 203947
loss: 0.9940097332000732,grad_norm: 0.9999991931656458, iteration: 203948
loss: 1.0102319717407227,grad_norm: 0.9017484851005562, iteration: 203949
loss: 0.9892745614051819,grad_norm: 0.9999989975364137, iteration: 203950
loss: 0.9777656197547913,grad_norm: 0.8479343874935672, iteration: 203951
loss: 1.0319880247116089,grad_norm: 0.999999044219122, iteration: 203952
loss: 1.0135462284088135,grad_norm: 0.9032947268129229, iteration: 203953
loss: 1.0069786310195923,grad_norm: 0.9389336473038642, iteration: 203954
loss: 0.9841436743736267,grad_norm: 0.8872156882998063, iteration: 203955
loss: 0.9843034744262695,grad_norm: 0.9187231347743426, iteration: 203956
loss: 1.0187814235687256,grad_norm: 0.999999233746016, iteration: 203957
loss: 1.0151002407073975,grad_norm: 0.9600931703985837, iteration: 203958
loss: 0.9710258841514587,grad_norm: 0.918743553038541, iteration: 203959
loss: 0.9950889945030212,grad_norm: 0.983182834898048, iteration: 203960
loss: 1.003896951675415,grad_norm: 0.9999990931763048, iteration: 203961
loss: 1.0274620056152344,grad_norm: 0.9999992615431945, iteration: 203962
loss: 1.0374900102615356,grad_norm: 0.9532302557648453, iteration: 203963
loss: 1.0131797790527344,grad_norm: 0.865960531227814, iteration: 203964
loss: 1.0342708826065063,grad_norm: 0.9999990530056903, iteration: 203965
loss: 1.0093467235565186,grad_norm: 0.9999992187407128, iteration: 203966
loss: 1.0180411338806152,grad_norm: 0.9999989730061245, iteration: 203967
loss: 1.025566577911377,grad_norm: 0.9999990883426304, iteration: 203968
loss: 0.9956387281417847,grad_norm: 0.7839596031434808, iteration: 203969
loss: 1.0064584016799927,grad_norm: 0.8674056624343098, iteration: 203970
loss: 0.9698776006698608,grad_norm: 0.9999989237301901, iteration: 203971
loss: 1.0092365741729736,grad_norm: 0.8741891642462417, iteration: 203972
loss: 0.9730682373046875,grad_norm: 0.8920546960551113, iteration: 203973
loss: 1.0075135231018066,grad_norm: 0.9999990765567439, iteration: 203974
loss: 0.9856430292129517,grad_norm: 0.8485723960205446, iteration: 203975
loss: 1.05146324634552,grad_norm: 0.9999994419606262, iteration: 203976
loss: 0.9710847735404968,grad_norm: 0.9600693289130873, iteration: 203977
loss: 1.0220612287521362,grad_norm: 0.7844918361375971, iteration: 203978
loss: 1.0129077434539795,grad_norm: 0.7463345856220565, iteration: 203979
loss: 0.975158154964447,grad_norm: 0.9999990164430664, iteration: 203980
loss: 1.0308510065078735,grad_norm: 0.9163020465490929, iteration: 203981
loss: 1.0114139318466187,grad_norm: 0.9999991615836921, iteration: 203982
loss: 1.0119115114212036,grad_norm: 0.9390688252644086, iteration: 203983
loss: 0.9781708121299744,grad_norm: 0.964087025299361, iteration: 203984
loss: 0.9923798441886902,grad_norm: 0.968955109974836, iteration: 203985
loss: 1.0038433074951172,grad_norm: 0.9013134531397782, iteration: 203986
loss: 1.0006937980651855,grad_norm: 0.8011617466874604, iteration: 203987
loss: 0.9571215510368347,grad_norm: 0.9999992992943633, iteration: 203988
loss: 1.014638900756836,grad_norm: 0.8267883775848012, iteration: 203989
loss: 0.96779865026474,grad_norm: 0.9999989993051592, iteration: 203990
loss: 1.01335871219635,grad_norm: 0.8767623508280783, iteration: 203991
loss: 0.9784094095230103,grad_norm: 0.835954267444268, iteration: 203992
loss: 1.0043452978134155,grad_norm: 0.9540706412269538, iteration: 203993
loss: 1.0225579738616943,grad_norm: 0.9275643979033199, iteration: 203994
loss: 1.0098190307617188,grad_norm: 0.9999991856987089, iteration: 203995
loss: 0.9913123250007629,grad_norm: 0.9652547900946088, iteration: 203996
loss: 1.0261729955673218,grad_norm: 0.9999989445356163, iteration: 203997
loss: 1.001918077468872,grad_norm: 0.9417216172331816, iteration: 203998
loss: 1.011845588684082,grad_norm: 0.889962042163845, iteration: 203999
loss: 0.9846702218055725,grad_norm: 0.9999991803279247, iteration: 204000
loss: 0.9700639843940735,grad_norm: 0.9999990995399781, iteration: 204001
loss: 0.9476951956748962,grad_norm: 0.863993298062983, iteration: 204002
loss: 1.0011777877807617,grad_norm: 0.7201436273251064, iteration: 204003
loss: 1.0013821125030518,grad_norm: 0.8883919311690534, iteration: 204004
loss: 0.9889923930168152,grad_norm: 0.9641102823331303, iteration: 204005
loss: 0.9805415868759155,grad_norm: 0.9848195308834504, iteration: 204006
loss: 1.0058717727661133,grad_norm: 0.9812668041064325, iteration: 204007
loss: 1.0487295389175415,grad_norm: 0.9999991696388144, iteration: 204008
loss: 1.0261770486831665,grad_norm: 0.9999990626856045, iteration: 204009
loss: 1.0579041242599487,grad_norm: 0.9999990547413459, iteration: 204010
loss: 1.009331226348877,grad_norm: 0.9999990935202098, iteration: 204011
loss: 1.0105687379837036,grad_norm: 0.8253810162714953, iteration: 204012
loss: 0.9666338562965393,grad_norm: 0.9999989621550431, iteration: 204013
loss: 1.0286773443222046,grad_norm: 0.9794604629720338, iteration: 204014
loss: 1.009620189666748,grad_norm: 0.8804968680818218, iteration: 204015
loss: 0.9901659488677979,grad_norm: 0.9999990160915185, iteration: 204016
loss: 1.010635495185852,grad_norm: 0.8633454923246399, iteration: 204017
loss: 0.9916931390762329,grad_norm: 0.9999990176490183, iteration: 204018
loss: 1.0146771669387817,grad_norm: 0.917813738994852, iteration: 204019
loss: 0.9780265688896179,grad_norm: 0.9999991295511745, iteration: 204020
loss: 1.0049422979354858,grad_norm: 0.999999195829784, iteration: 204021
loss: 1.029815912246704,grad_norm: 0.9999989805729838, iteration: 204022
loss: 1.0212664604187012,grad_norm: 0.9214069220856043, iteration: 204023
loss: 0.9899818897247314,grad_norm: 0.9240362966323138, iteration: 204024
loss: 0.9880061745643616,grad_norm: 0.9999990742304162, iteration: 204025
loss: 0.9903598427772522,grad_norm: 0.9504591027161449, iteration: 204026
loss: 0.9964959025382996,grad_norm: 0.9156086724874002, iteration: 204027
loss: 0.9685825109481812,grad_norm: 0.9359905007687899, iteration: 204028
loss: 1.0093019008636475,grad_norm: 0.9407952013270742, iteration: 204029
loss: 0.978762686252594,grad_norm: 0.8835178677678466, iteration: 204030
loss: 1.0123218297958374,grad_norm: 0.8112774556874185, iteration: 204031
loss: 0.9987040162086487,grad_norm: 0.9783377163580664, iteration: 204032
loss: 0.9928613305091858,grad_norm: 0.9999990618575337, iteration: 204033
loss: 0.9787507057189941,grad_norm: 0.957588272817692, iteration: 204034
loss: 0.9917436242103577,grad_norm: 0.8760238410679105, iteration: 204035
loss: 1.0199321508407593,grad_norm: 0.7957398426940059, iteration: 204036
loss: 0.9890397191047668,grad_norm: 0.8636361642724705, iteration: 204037
loss: 0.9917727112770081,grad_norm: 0.9921078719269899, iteration: 204038
loss: 0.9937034845352173,grad_norm: 0.9436652911382389, iteration: 204039
loss: 0.9984044432640076,grad_norm: 0.8787088646749525, iteration: 204040
loss: 1.0072938203811646,grad_norm: 0.99999928917513, iteration: 204041
loss: 1.0041254758834839,grad_norm: 0.9551862621817953, iteration: 204042
loss: 0.9866797924041748,grad_norm: 0.999999428170065, iteration: 204043
loss: 0.9784601330757141,grad_norm: 0.9251149748507285, iteration: 204044
loss: 1.0215517282485962,grad_norm: 0.9489219806589992, iteration: 204045
loss: 1.008668303489685,grad_norm: 0.9027695399705984, iteration: 204046
loss: 1.004569411277771,grad_norm: 0.9431014296039613, iteration: 204047
loss: 0.9957425594329834,grad_norm: 0.8239622494602554, iteration: 204048
loss: 0.9997574687004089,grad_norm: 0.8288391860774467, iteration: 204049
loss: 1.0055460929870605,grad_norm: 0.9999991625599862, iteration: 204050
loss: 0.9861371517181396,grad_norm: 0.8442444203466563, iteration: 204051
loss: 1.0342739820480347,grad_norm: 0.9999991271899524, iteration: 204052
loss: 0.9941745400428772,grad_norm: 0.899111462357091, iteration: 204053
loss: 1.016052007675171,grad_norm: 0.9999993555901555, iteration: 204054
loss: 1.016034722328186,grad_norm: 0.9005038706255828, iteration: 204055
loss: 1.0152639150619507,grad_norm: 0.9999991279859604, iteration: 204056
loss: 1.0136990547180176,grad_norm: 0.999999104215184, iteration: 204057
loss: 1.0879292488098145,grad_norm: 0.9999996266794433, iteration: 204058
loss: 0.9494093656539917,grad_norm: 0.9999991022563579, iteration: 204059
loss: 0.9558989405632019,grad_norm: 0.9523836389825636, iteration: 204060
loss: 1.0600368976593018,grad_norm: 0.9999991869293269, iteration: 204061
loss: 1.0941094160079956,grad_norm: 0.9999991490813055, iteration: 204062
loss: 1.0145341157913208,grad_norm: 0.9999991033810386, iteration: 204063
loss: 0.9585670232772827,grad_norm: 0.9999990413116274, iteration: 204064
loss: 0.9803301095962524,grad_norm: 0.7592037814222189, iteration: 204065
loss: 1.0015842914581299,grad_norm: 0.9999991213405995, iteration: 204066
loss: 1.0141643285751343,grad_norm: 0.8692668189188063, iteration: 204067
loss: 0.9509294629096985,grad_norm: 0.9157272624377358, iteration: 204068
loss: 1.0032751560211182,grad_norm: 0.9999991566506179, iteration: 204069
loss: 1.0157489776611328,grad_norm: 0.9927058164488876, iteration: 204070
loss: 0.9587774872779846,grad_norm: 0.8871293528118526, iteration: 204071
loss: 1.0567846298217773,grad_norm: 0.9999998654623545, iteration: 204072
loss: 1.015155553817749,grad_norm: 0.8537467937283887, iteration: 204073
loss: 0.9943522810935974,grad_norm: 0.9937362593306325, iteration: 204074
loss: 1.0026947259902954,grad_norm: 0.8576226083124959, iteration: 204075
loss: 1.0205693244934082,grad_norm: 0.9999990613406077, iteration: 204076
loss: 0.9714232087135315,grad_norm: 0.8861943237927168, iteration: 204077
loss: 0.9662796258926392,grad_norm: 0.8315647460077272, iteration: 204078
loss: 0.9702693223953247,grad_norm: 0.8614217800394134, iteration: 204079
loss: 1.0228683948516846,grad_norm: 0.9144129129292184, iteration: 204080
loss: 0.9913402199745178,grad_norm: 0.8712040547923153, iteration: 204081
loss: 0.9660406708717346,grad_norm: 0.999999114139493, iteration: 204082
loss: 0.9819369316101074,grad_norm: 0.9451252314586162, iteration: 204083
loss: 0.983898937702179,grad_norm: 0.9999991957725322, iteration: 204084
loss: 1.0204898118972778,grad_norm: 0.9999990659462774, iteration: 204085
loss: 1.009407639503479,grad_norm: 0.999999204048381, iteration: 204086
loss: 0.9912257194519043,grad_norm: 0.8861787650745208, iteration: 204087
loss: 1.0542492866516113,grad_norm: 0.9999992392338718, iteration: 204088
loss: 1.023756504058838,grad_norm: 0.8139158892169784, iteration: 204089
loss: 0.996431827545166,grad_norm: 0.9999998905176725, iteration: 204090
loss: 1.0098282098770142,grad_norm: 0.9793394795002454, iteration: 204091
loss: 0.9777476787567139,grad_norm: 0.8508890069396063, iteration: 204092
loss: 1.006761074066162,grad_norm: 0.9999990416887979, iteration: 204093
loss: 0.9837737083435059,grad_norm: 0.872825133315839, iteration: 204094
loss: 0.9969275593757629,grad_norm: 0.9760763753597463, iteration: 204095
loss: 1.0090978145599365,grad_norm: 0.9999992705418815, iteration: 204096
loss: 1.0154765844345093,grad_norm: 0.9999998967319603, iteration: 204097
loss: 1.0101077556610107,grad_norm: 0.9999990950137979, iteration: 204098
loss: 1.0385123491287231,grad_norm: 0.9999991474196208, iteration: 204099
loss: 1.0126945972442627,grad_norm: 0.9508523590721434, iteration: 204100
loss: 0.9950536489486694,grad_norm: 0.9454295057479959, iteration: 204101
loss: 0.9742118716239929,grad_norm: 0.9999991739409333, iteration: 204102
loss: 1.0257664918899536,grad_norm: 0.9999995768617268, iteration: 204103
loss: 0.979545533657074,grad_norm: 0.9559644533472005, iteration: 204104
loss: 1.006595492362976,grad_norm: 0.9231831655247982, iteration: 204105
loss: 0.9719151854515076,grad_norm: 0.698641936901744, iteration: 204106
loss: 1.0149437189102173,grad_norm: 0.9999998367563664, iteration: 204107
loss: 1.0146929025650024,grad_norm: 0.8558485267392276, iteration: 204108
loss: 1.014917016029358,grad_norm: 0.9999989586790543, iteration: 204109
loss: 0.9892337918281555,grad_norm: 0.9999990698954787, iteration: 204110
loss: 0.9918296337127686,grad_norm: 0.9715775359412389, iteration: 204111
loss: 0.9722936749458313,grad_norm: 0.8053158995384783, iteration: 204112
loss: 1.019748330116272,grad_norm: 0.9999993832247734, iteration: 204113
loss: 0.9955198764801025,grad_norm: 0.9875965849596632, iteration: 204114
loss: 0.9893016815185547,grad_norm: 0.9929199365124807, iteration: 204115
loss: 1.0536472797393799,grad_norm: 0.9999991844806888, iteration: 204116
loss: 0.9341728091239929,grad_norm: 0.999999221158984, iteration: 204117
loss: 0.9974406957626343,grad_norm: 0.859507253610017, iteration: 204118
loss: 0.9774009585380554,grad_norm: 0.9999993269396393, iteration: 204119
loss: 1.0421537160873413,grad_norm: 0.796811773183842, iteration: 204120
loss: 1.001643180847168,grad_norm: 0.9587878152799703, iteration: 204121
loss: 0.9698431491851807,grad_norm: 0.8232467959600557, iteration: 204122
loss: 0.9419010281562805,grad_norm: 0.999999107157659, iteration: 204123
loss: 1.0394312143325806,grad_norm: 0.9999992376198, iteration: 204124
loss: 0.9905261993408203,grad_norm: 0.8972294962457844, iteration: 204125
loss: 1.0143747329711914,grad_norm: 0.7532744471790289, iteration: 204126
loss: 1.0031781196594238,grad_norm: 0.9052891000861829, iteration: 204127
loss: 0.9904892444610596,grad_norm: 0.9873285980387784, iteration: 204128
loss: 1.0004035234451294,grad_norm: 0.7721490700652724, iteration: 204129
loss: 1.0031962394714355,grad_norm: 0.8519396663935973, iteration: 204130
loss: 1.001370906829834,grad_norm: 0.9999991141128071, iteration: 204131
loss: 1.0192445516586304,grad_norm: 0.999999022787661, iteration: 204132
loss: 0.9952718615531921,grad_norm: 0.7682485294740946, iteration: 204133
loss: 1.124629259109497,grad_norm: 0.9999991553647709, iteration: 204134
loss: 1.0197179317474365,grad_norm: 0.9999992227458717, iteration: 204135
loss: 0.9842708110809326,grad_norm: 0.8773653230710298, iteration: 204136
loss: 0.9886048436164856,grad_norm: 0.8608005361563686, iteration: 204137
loss: 0.9994104504585266,grad_norm: 0.9662619230506015, iteration: 204138
loss: 1.0143232345581055,grad_norm: 0.999999384773775, iteration: 204139
loss: 0.9669237732887268,grad_norm: 0.9847586485542725, iteration: 204140
loss: 0.9873030185699463,grad_norm: 0.964952616344351, iteration: 204141
loss: 1.024690866470337,grad_norm: 0.9999991534573509, iteration: 204142
loss: 1.0132695436477661,grad_norm: 0.9999999946680679, iteration: 204143
loss: 0.9756333231925964,grad_norm: 0.885793274220541, iteration: 204144
loss: 1.0045185089111328,grad_norm: 0.9999991246178604, iteration: 204145
loss: 0.9817403554916382,grad_norm: 0.9346767906703765, iteration: 204146
loss: 0.9902746677398682,grad_norm: 0.9999991538281275, iteration: 204147
loss: 1.015243411064148,grad_norm: 0.9999991571183787, iteration: 204148
loss: 1.0020148754119873,grad_norm: 0.999999155734482, iteration: 204149
loss: 0.9835092425346375,grad_norm: 0.9809751544444849, iteration: 204150
loss: 1.0144044160842896,grad_norm: 0.8665602652210178, iteration: 204151
loss: 1.0179084539413452,grad_norm: 0.9999992937336353, iteration: 204152
loss: 0.982421875,grad_norm: 0.9999990325413108, iteration: 204153
loss: 0.9985376596450806,grad_norm: 0.99999905618921, iteration: 204154
loss: 1.010148048400879,grad_norm: 0.9999991714632602, iteration: 204155
loss: 0.9960530400276184,grad_norm: 0.8404509997508276, iteration: 204156
loss: 0.9964151382446289,grad_norm: 0.9145740760611754, iteration: 204157
loss: 0.9875741004943848,grad_norm: 0.999999150884242, iteration: 204158
loss: 1.0084367990493774,grad_norm: 0.9674161634081055, iteration: 204159
loss: 0.9929960370063782,grad_norm: 0.9445149681421497, iteration: 204160
loss: 0.9816005229949951,grad_norm: 0.999999208541927, iteration: 204161
loss: 0.9812840223312378,grad_norm: 0.9811952842339329, iteration: 204162
loss: 1.0097155570983887,grad_norm: 0.8737270287625899, iteration: 204163
loss: 1.024530291557312,grad_norm: 0.9999991296571904, iteration: 204164
loss: 0.9649322032928467,grad_norm: 0.9789047351295951, iteration: 204165
loss: 1.0015113353729248,grad_norm: 0.9999991724057202, iteration: 204166
loss: 0.9903156757354736,grad_norm: 0.978964275292363, iteration: 204167
loss: 1.0369765758514404,grad_norm: 0.9999992247402228, iteration: 204168
loss: 1.0345667600631714,grad_norm: 0.9990132955718661, iteration: 204169
loss: 0.983385443687439,grad_norm: 0.819646544197673, iteration: 204170
loss: 0.9473781585693359,grad_norm: 0.9999990853867082, iteration: 204171
loss: 0.9755906462669373,grad_norm: 0.9195038969712732, iteration: 204172
loss: 1.000473141670227,grad_norm: 0.9999991817494259, iteration: 204173
loss: 0.9855504631996155,grad_norm: 0.9314237613965934, iteration: 204174
loss: 0.9951723217964172,grad_norm: 0.8999139331353873, iteration: 204175
loss: 1.0365736484527588,grad_norm: 0.9738922968219225, iteration: 204176
loss: 0.960517406463623,grad_norm: 0.9999990430126039, iteration: 204177
loss: 0.9848513603210449,grad_norm: 0.9999992056813384, iteration: 204178
loss: 0.9836034178733826,grad_norm: 0.9999991570727688, iteration: 204179
loss: 0.9804651737213135,grad_norm: 0.9562522179181513, iteration: 204180
loss: 1.0157173871994019,grad_norm: 0.9834797060765288, iteration: 204181
loss: 0.9658003449440002,grad_norm: 0.9539550658044574, iteration: 204182
loss: 1.0129084587097168,grad_norm: 0.9477456757772504, iteration: 204183
loss: 1.033890724182129,grad_norm: 0.9832852095269902, iteration: 204184
loss: 0.9721605181694031,grad_norm: 0.9999990557871571, iteration: 204185
loss: 1.0260175466537476,grad_norm: 0.9999996416821472, iteration: 204186
loss: 1.0029722452163696,grad_norm: 0.9442445416055149, iteration: 204187
loss: 0.9508392214775085,grad_norm: 0.905504575606224, iteration: 204188
loss: 1.0394511222839355,grad_norm: 0.7846409416982343, iteration: 204189
loss: 1.0104358196258545,grad_norm: 0.912573690381722, iteration: 204190
loss: 0.9421969056129456,grad_norm: 0.9225054624094394, iteration: 204191
loss: 1.0331178903579712,grad_norm: 0.9563253470927965, iteration: 204192
loss: 0.993964433670044,grad_norm: 0.7770474475284735, iteration: 204193
loss: 1.0047153234481812,grad_norm: 0.9999996570200236, iteration: 204194
loss: 1.022440791130066,grad_norm: 0.9130850626025692, iteration: 204195
loss: 1.02289879322052,grad_norm: 0.873592722575977, iteration: 204196
loss: 0.9862208366394043,grad_norm: 0.9787959913098698, iteration: 204197
loss: 1.0078800916671753,grad_norm: 0.9999991347269661, iteration: 204198
loss: 1.0586199760437012,grad_norm: 0.9999992970787411, iteration: 204199
loss: 1.0167224407196045,grad_norm: 0.7796169443054535, iteration: 204200
loss: 0.9656485319137573,grad_norm: 0.9442126029778174, iteration: 204201
loss: 1.0219022035598755,grad_norm: 0.9186852030356617, iteration: 204202
loss: 0.9883910417556763,grad_norm: 0.8711125083445237, iteration: 204203
loss: 0.9933474659919739,grad_norm: 0.8768519893871759, iteration: 204204
loss: 0.9992308616638184,grad_norm: 0.9999991681362376, iteration: 204205
loss: 0.9895168542861938,grad_norm: 0.9913610484924893, iteration: 204206
loss: 1.0061396360397339,grad_norm: 0.9230770889681073, iteration: 204207
loss: 1.0124499797821045,grad_norm: 0.7738543092334746, iteration: 204208
loss: 1.003962516784668,grad_norm: 0.9999994908537501, iteration: 204209
loss: 0.9894177913665771,grad_norm: 0.8779158724076987, iteration: 204210
loss: 1.0214300155639648,grad_norm: 0.9999993573189171, iteration: 204211
loss: 0.9788044691085815,grad_norm: 0.7418701665846213, iteration: 204212
loss: 1.0040243864059448,grad_norm: 0.9555607774136087, iteration: 204213
loss: 0.9825859665870667,grad_norm: 0.8623192154257461, iteration: 204214
loss: 1.0001819133758545,grad_norm: 0.9300670254808174, iteration: 204215
loss: 1.08120596408844,grad_norm: 0.9999996956385853, iteration: 204216
loss: 0.9743584394454956,grad_norm: 0.9999992307220573, iteration: 204217
loss: 0.9787185788154602,grad_norm: 0.8241107706839171, iteration: 204218
loss: 1.026618242263794,grad_norm: 0.9999999281404837, iteration: 204219
loss: 0.995293915271759,grad_norm: 0.8623172818679675, iteration: 204220
loss: 1.00192391872406,grad_norm: 0.952792939770539, iteration: 204221
loss: 1.004550814628601,grad_norm: 0.8078060404372777, iteration: 204222
loss: 1.0074900388717651,grad_norm: 0.9621237716152246, iteration: 204223
loss: 0.970014214515686,grad_norm: 0.851893160647302, iteration: 204224
loss: 1.0003541707992554,grad_norm: 0.9491182538342086, iteration: 204225
loss: 1.0350953340530396,grad_norm: 0.9935734445782638, iteration: 204226
loss: 0.9670939445495605,grad_norm: 0.9341154632242311, iteration: 204227
loss: 1.0083420276641846,grad_norm: 0.8226097556626998, iteration: 204228
loss: 0.9794422388076782,grad_norm: 0.9777597365569975, iteration: 204229
loss: 1.0219676494598389,grad_norm: 0.907732154013419, iteration: 204230
loss: 1.0151985883712769,grad_norm: 0.9861707716992409, iteration: 204231
loss: 1.0101678371429443,grad_norm: 0.8230060143655561, iteration: 204232
loss: 0.9813411235809326,grad_norm: 0.9999991606392697, iteration: 204233
loss: 0.9939280152320862,grad_norm: 0.8694057104440988, iteration: 204234
loss: 1.0350686311721802,grad_norm: 0.999999419509577, iteration: 204235
loss: 1.0825023651123047,grad_norm: 0.9999998787379639, iteration: 204236
loss: 1.0160940885543823,grad_norm: 0.9999990230170032, iteration: 204237
loss: 0.9606260061264038,grad_norm: 0.9608981975162666, iteration: 204238
loss: 0.9874381422996521,grad_norm: 0.999999217248704, iteration: 204239
loss: 0.9983043074607849,grad_norm: 0.9041867561319509, iteration: 204240
loss: 1.0091028213500977,grad_norm: 0.8563765819698236, iteration: 204241
loss: 1.0040603876113892,grad_norm: 0.9485363005919213, iteration: 204242
loss: 1.008965015411377,grad_norm: 0.9976973814051617, iteration: 204243
loss: 1.02074134349823,grad_norm: 0.8003840958437884, iteration: 204244
loss: 1.0369051694869995,grad_norm: 0.9999991825270154, iteration: 204245
loss: 1.004655122756958,grad_norm: 0.9999990739552043, iteration: 204246
loss: 1.0032031536102295,grad_norm: 0.9285460206593505, iteration: 204247
loss: 0.9791233539581299,grad_norm: 0.9999990987947476, iteration: 204248
loss: 1.0142844915390015,grad_norm: 0.9999991575986495, iteration: 204249
loss: 0.9583114981651306,grad_norm: 0.899150067376171, iteration: 204250
loss: 1.07368803024292,grad_norm: 0.9999991238783585, iteration: 204251
loss: 0.994096577167511,grad_norm: 0.8066800720523101, iteration: 204252
loss: 1.0377311706542969,grad_norm: 0.9999991342626028, iteration: 204253
loss: 1.035595417022705,grad_norm: 0.8459766113538675, iteration: 204254
loss: 1.0005189180374146,grad_norm: 0.7943353399602777, iteration: 204255
loss: 0.986253023147583,grad_norm: 0.999999167646523, iteration: 204256
loss: 1.0130972862243652,grad_norm: 0.9999993162719373, iteration: 204257
loss: 0.9685567617416382,grad_norm: 0.999999176594249, iteration: 204258
loss: 1.0151851177215576,grad_norm: 0.8579183999560196, iteration: 204259
loss: 0.9868248701095581,grad_norm: 0.9999996292623002, iteration: 204260
loss: 1.0451315641403198,grad_norm: 0.9827844410874462, iteration: 204261
loss: 1.0125198364257812,grad_norm: 0.7938953758831254, iteration: 204262
loss: 0.975867748260498,grad_norm: 0.9999757726156152, iteration: 204263
loss: 1.0763185024261475,grad_norm: 0.9999996468600039, iteration: 204264
loss: 1.0274038314819336,grad_norm: 0.9511387344788403, iteration: 204265
loss: 1.0506309270858765,grad_norm: 0.9999991039243284, iteration: 204266
loss: 1.023154854774475,grad_norm: 0.9999999089893121, iteration: 204267
loss: 0.997473955154419,grad_norm: 0.9999993500139516, iteration: 204268
loss: 0.9970889687538147,grad_norm: 0.9999995103421166, iteration: 204269
loss: 1.0384782552719116,grad_norm: 0.9999990327519965, iteration: 204270
loss: 1.0045289993286133,grad_norm: 0.9214724239800088, iteration: 204271
loss: 0.986238420009613,grad_norm: 0.9999991417125469, iteration: 204272
loss: 0.985907793045044,grad_norm: 0.9389008872572137, iteration: 204273
loss: 0.9981701374053955,grad_norm: 0.8192402291272456, iteration: 204274
loss: 1.0114372968673706,grad_norm: 0.919139220633571, iteration: 204275
loss: 1.003445029258728,grad_norm: 0.9999992580502054, iteration: 204276
loss: 1.0679922103881836,grad_norm: 0.9999993138295599, iteration: 204277
loss: 1.0247689485549927,grad_norm: 0.8817993348347961, iteration: 204278
loss: 0.9821635484695435,grad_norm: 0.9220183808721006, iteration: 204279
loss: 1.0275249481201172,grad_norm: 0.7864684426609633, iteration: 204280
loss: 1.0128364562988281,grad_norm: 0.9218939413912582, iteration: 204281
loss: 0.9973241090774536,grad_norm: 0.9859962159251087, iteration: 204282
loss: 1.031031608581543,grad_norm: 0.9999990222860328, iteration: 204283
loss: 0.9990897178649902,grad_norm: 0.9999989641437653, iteration: 204284
loss: 0.9660506248474121,grad_norm: 0.9560755152867172, iteration: 204285
loss: 1.015813946723938,grad_norm: 0.9999991649871488, iteration: 204286
loss: 1.0068788528442383,grad_norm: 0.9573053511255089, iteration: 204287
loss: 1.0186372995376587,grad_norm: 0.8699221091883961, iteration: 204288
loss: 0.9930076003074646,grad_norm: 0.9167279267148063, iteration: 204289
loss: 1.0140827894210815,grad_norm: 0.9999990197630382, iteration: 204290
loss: 0.9762343764305115,grad_norm: 0.9843752213624208, iteration: 204291
loss: 0.9681707620620728,grad_norm: 0.9339683081585292, iteration: 204292
loss: 1.101053237915039,grad_norm: 0.9999997829772872, iteration: 204293
loss: 1.005867838859558,grad_norm: 0.9510529699092523, iteration: 204294
loss: 0.9893805980682373,grad_norm: 0.9999989748164093, iteration: 204295
loss: 0.9562956094741821,grad_norm: 0.999999189426099, iteration: 204296
loss: 0.9911133050918579,grad_norm: 0.9999990554963485, iteration: 204297
loss: 1.0188161134719849,grad_norm: 0.8748217408109933, iteration: 204298
loss: 1.0061488151550293,grad_norm: 0.9999990903561237, iteration: 204299
loss: 0.9679403901100159,grad_norm: 0.8805394951605632, iteration: 204300
loss: 0.9740463495254517,grad_norm: 0.8662058308514, iteration: 204301
loss: 0.9980963468551636,grad_norm: 0.9179611445502247, iteration: 204302
loss: 0.9778174161911011,grad_norm: 0.9161318581370028, iteration: 204303
loss: 1.0037245750427246,grad_norm: 0.9999991938650699, iteration: 204304
loss: 0.9893535375595093,grad_norm: 0.8508994344239673, iteration: 204305
loss: 0.9865395426750183,grad_norm: 0.9999995011592859, iteration: 204306
loss: 0.9682988524436951,grad_norm: 0.9999990865652212, iteration: 204307
loss: 1.0097194910049438,grad_norm: 0.9024999981486294, iteration: 204308
loss: 0.9921942949295044,grad_norm: 0.9560369908000521, iteration: 204309
loss: 0.9861702919006348,grad_norm: 0.8570518689935704, iteration: 204310
loss: 0.9715969562530518,grad_norm: 0.8078844390805443, iteration: 204311
loss: 1.0375685691833496,grad_norm: 0.9999991913613698, iteration: 204312
loss: 1.0215942859649658,grad_norm: 0.9583302163741159, iteration: 204313
loss: 0.96505206823349,grad_norm: 0.9999990680415854, iteration: 204314
loss: 0.982177734375,grad_norm: 0.9999990507677347, iteration: 204315
loss: 1.0190328359603882,grad_norm: 0.7181540297478571, iteration: 204316
loss: 0.98316490650177,grad_norm: 0.9999990166464536, iteration: 204317
loss: 0.9558724761009216,grad_norm: 0.99172373546482, iteration: 204318
loss: 0.9978591203689575,grad_norm: 0.9999990258141177, iteration: 204319
loss: 1.0330644845962524,grad_norm: 0.9999998790647634, iteration: 204320
loss: 1.012656569480896,grad_norm: 0.8323931146610344, iteration: 204321
loss: 0.9609257578849792,grad_norm: 0.8381043537651477, iteration: 204322
loss: 1.0223489999771118,grad_norm: 0.9999989851473413, iteration: 204323
loss: 0.9996100068092346,grad_norm: 0.8971386514769739, iteration: 204324
loss: 0.9748967885971069,grad_norm: 0.8845849669346695, iteration: 204325
loss: 0.963132381439209,grad_norm: 0.8949550110927593, iteration: 204326
loss: 1.0751891136169434,grad_norm: 0.9999992314341822, iteration: 204327
loss: 1.033807396888733,grad_norm: 0.9200437372708993, iteration: 204328
loss: 1.0038522481918335,grad_norm: 0.9999991046575845, iteration: 204329
loss: 1.0405757427215576,grad_norm: 0.9999991487428151, iteration: 204330
loss: 1.0091240406036377,grad_norm: 0.8279375604555288, iteration: 204331
loss: 1.012058138847351,grad_norm: 0.9999992716178695, iteration: 204332
loss: 0.9839423894882202,grad_norm: 0.946392657410778, iteration: 204333
loss: 0.9980380535125732,grad_norm: 0.9258493908210338, iteration: 204334
loss: 1.011701226234436,grad_norm: 0.8688435838623294, iteration: 204335
loss: 0.96967613697052,grad_norm: 0.846356111871193, iteration: 204336
loss: 1.1713179349899292,grad_norm: 0.9999998954191803, iteration: 204337
loss: 1.035606026649475,grad_norm: 0.9999999636922727, iteration: 204338
loss: 0.9932093024253845,grad_norm: 0.8805038299142148, iteration: 204339
loss: 1.0053496360778809,grad_norm: 0.8443830723300999, iteration: 204340
loss: 1.040022373199463,grad_norm: 0.9535681543345115, iteration: 204341
loss: 1.0081732273101807,grad_norm: 0.9792597289282201, iteration: 204342
loss: 1.0051528215408325,grad_norm: 0.9999991498019029, iteration: 204343
loss: 1.0093774795532227,grad_norm: 0.959369673339242, iteration: 204344
loss: 1.0935062170028687,grad_norm: 0.9999999299769348, iteration: 204345
loss: 0.9738346934318542,grad_norm: 0.8293169862451671, iteration: 204346
loss: 1.017255425453186,grad_norm: 0.9999990834516527, iteration: 204347
loss: 1.0185669660568237,grad_norm: 0.7954846699422045, iteration: 204348
loss: 1.0963000059127808,grad_norm: 0.9999993793216247, iteration: 204349
loss: 0.9669134020805359,grad_norm: 0.9999991606784754, iteration: 204350
loss: 0.9998546242713928,grad_norm: 0.9999994131028275, iteration: 204351
loss: 0.9980091452598572,grad_norm: 0.8915366600654498, iteration: 204352
loss: 1.0126374959945679,grad_norm: 0.9999992445211757, iteration: 204353
loss: 0.9851382970809937,grad_norm: 0.8877460209961706, iteration: 204354
loss: 0.9727810621261597,grad_norm: 0.9999992194991754, iteration: 204355
loss: 0.9916440844535828,grad_norm: 0.9135711201745793, iteration: 204356
loss: 0.975885272026062,grad_norm: 0.9999993913037958, iteration: 204357
loss: 0.9737828969955444,grad_norm: 0.9999990326810293, iteration: 204358
loss: 0.9697065949440002,grad_norm: 0.999999195100712, iteration: 204359
loss: 1.045191764831543,grad_norm: 0.9999993237814297, iteration: 204360
loss: 1.0042064189910889,grad_norm: 0.9999989838804074, iteration: 204361
loss: 0.9804689288139343,grad_norm: 0.9999991430337022, iteration: 204362
loss: 1.0124623775482178,grad_norm: 0.9999998279853539, iteration: 204363
loss: 0.9605904817581177,grad_norm: 0.9999999098816094, iteration: 204364
loss: 1.0139052867889404,grad_norm: 0.9999990781743173, iteration: 204365
loss: 0.9981680512428284,grad_norm: 0.8958415852857915, iteration: 204366
loss: 1.0811972618103027,grad_norm: 0.9999997765675586, iteration: 204367
loss: 1.0151084661483765,grad_norm: 0.9999991407826342, iteration: 204368
loss: 1.0261454582214355,grad_norm: 0.9999990915057719, iteration: 204369
loss: 1.014154076576233,grad_norm: 0.9043151438886272, iteration: 204370
loss: 1.0762577056884766,grad_norm: 1.0000000238919062, iteration: 204371
loss: 1.0315961837768555,grad_norm: 0.9294289868722722, iteration: 204372
loss: 0.9825794100761414,grad_norm: 0.9999989810331311, iteration: 204373
loss: 0.9478626251220703,grad_norm: 0.870269039761635, iteration: 204374
loss: 0.9796770811080933,grad_norm: 0.9647401571689393, iteration: 204375
loss: 1.067204236984253,grad_norm: 0.9999998649229341, iteration: 204376
loss: 0.9759766459465027,grad_norm: 0.99999909635446, iteration: 204377
loss: 1.0059733390808105,grad_norm: 0.8702471321927554, iteration: 204378
loss: 1.0056486129760742,grad_norm: 0.8503411457066581, iteration: 204379
loss: 0.9926957488059998,grad_norm: 0.9999990450576064, iteration: 204380
loss: 1.0259724855422974,grad_norm: 0.9999991414013301, iteration: 204381
loss: 1.0132867097854614,grad_norm: 0.9999991210226916, iteration: 204382
loss: 0.9767281413078308,grad_norm: 0.9243623260903412, iteration: 204383
loss: 1.1976450681686401,grad_norm: 0.9999996154717263, iteration: 204384
loss: 1.057765245437622,grad_norm: 0.8527773583885434, iteration: 204385
loss: 1.0177193880081177,grad_norm: 0.999999516370539, iteration: 204386
loss: 1.0470712184906006,grad_norm: 0.9827582772504204, iteration: 204387
loss: 1.0262545347213745,grad_norm: 0.8111831287677995, iteration: 204388
loss: 1.0382295846939087,grad_norm: 0.8847719977352224, iteration: 204389
loss: 1.0967497825622559,grad_norm: 0.999999683182576, iteration: 204390
loss: 1.0233497619628906,grad_norm: 0.9999990751057625, iteration: 204391
loss: 1.0153380632400513,grad_norm: 0.999999326179206, iteration: 204392
loss: 0.9863322973251343,grad_norm: 0.8385676380940099, iteration: 204393
loss: 0.9620221257209778,grad_norm: 0.9999993653952397, iteration: 204394
loss: 0.9807634949684143,grad_norm: 0.9058997538708488, iteration: 204395
loss: 1.0182167291641235,grad_norm: 0.9999991178790504, iteration: 204396
loss: 1.025933861732483,grad_norm: 0.9920380340548571, iteration: 204397
loss: 0.9879682660102844,grad_norm: 0.8269731115255323, iteration: 204398
loss: 1.0306874513626099,grad_norm: 0.868350307709688, iteration: 204399
loss: 0.9923423528671265,grad_norm: 0.9441995407559914, iteration: 204400
loss: 1.003053069114685,grad_norm: 0.8337391439785407, iteration: 204401
loss: 1.0314546823501587,grad_norm: 0.9999991881563788, iteration: 204402
loss: 0.9793446063995361,grad_norm: 0.9999992311797977, iteration: 204403
loss: 1.027217984199524,grad_norm: 0.9999991703575936, iteration: 204404
loss: 0.9881563186645508,grad_norm: 0.8947321674780926, iteration: 204405
loss: 0.9972267746925354,grad_norm: 0.7985529945868902, iteration: 204406
loss: 0.9843354821205139,grad_norm: 0.9649567916038507, iteration: 204407
loss: 1.0166045427322388,grad_norm: 0.9999990622416338, iteration: 204408
loss: 0.9768739938735962,grad_norm: 0.9999990391641066, iteration: 204409
loss: 1.0251797437667847,grad_norm: 0.9999997226030026, iteration: 204410
loss: 0.9502366185188293,grad_norm: 0.8844777039557274, iteration: 204411
loss: 1.2324280738830566,grad_norm: 0.9999999015590503, iteration: 204412
loss: 1.0006208419799805,grad_norm: 0.9999990972466534, iteration: 204413
loss: 0.99517422914505,grad_norm: 0.9289758394467438, iteration: 204414
loss: 0.9969444870948792,grad_norm: 0.999998988474774, iteration: 204415
loss: 0.9996449947357178,grad_norm: 0.9533062090878054, iteration: 204416
loss: 1.037063717842102,grad_norm: 0.9563298285624905, iteration: 204417
loss: 0.9560644030570984,grad_norm: 0.8610167866153418, iteration: 204418
loss: 1.010277509689331,grad_norm: 0.9471983632534886, iteration: 204419
loss: 0.9954545497894287,grad_norm: 0.9138576189960914, iteration: 204420
loss: 0.9660415649414062,grad_norm: 0.9778340350269621, iteration: 204421
loss: 1.1485865116119385,grad_norm: 0.9999990826554298, iteration: 204422
loss: 1.0075300931930542,grad_norm: 0.9999991917718053, iteration: 204423
loss: 1.008284330368042,grad_norm: 0.751961527019432, iteration: 204424
loss: 1.0071547031402588,grad_norm: 0.9481384933230024, iteration: 204425
loss: 1.1083085536956787,grad_norm: 0.9999996703138212, iteration: 204426
loss: 0.9844371676445007,grad_norm: 0.8013267353030992, iteration: 204427
loss: 1.0419501066207886,grad_norm: 0.7659129023944697, iteration: 204428
loss: 0.995270311832428,grad_norm: 0.9785793252415693, iteration: 204429
loss: 1.0259405374526978,grad_norm: 0.999999217117929, iteration: 204430
loss: 0.9794335961341858,grad_norm: 0.9999990701219758, iteration: 204431
loss: 1.0270848274230957,grad_norm: 0.9999990975860076, iteration: 204432
loss: 0.9817075133323669,grad_norm: 0.862160316073429, iteration: 204433
loss: 1.060732126235962,grad_norm: 0.9999991510309637, iteration: 204434
loss: 1.0244182348251343,grad_norm: 0.9692481224263855, iteration: 204435
loss: 0.9321522116661072,grad_norm: 0.939038629258364, iteration: 204436
loss: 0.9427558779716492,grad_norm: 0.9643011229928261, iteration: 204437
loss: 1.0141793489456177,grad_norm: 0.9999991796901869, iteration: 204438
loss: 1.0291293859481812,grad_norm: 0.8343962981353106, iteration: 204439
loss: 1.0481324195861816,grad_norm: 0.9999994621275269, iteration: 204440
loss: 1.0090699195861816,grad_norm: 0.8854489542968547, iteration: 204441
loss: 1.0221391916275024,grad_norm: 0.9999996043176259, iteration: 204442
loss: 0.9773235321044922,grad_norm: 0.9186986149220656, iteration: 204443
loss: 1.0103075504302979,grad_norm: 0.9511939652364713, iteration: 204444
loss: 1.0187585353851318,grad_norm: 0.9462027790914542, iteration: 204445
loss: 1.0146459341049194,grad_norm: 0.99999902615327, iteration: 204446
loss: 0.9786952137947083,grad_norm: 0.9429104716376867, iteration: 204447
loss: 0.96624356508255,grad_norm: 0.9433184263335694, iteration: 204448
loss: 0.9850337505340576,grad_norm: 0.8543697820302883, iteration: 204449
loss: 1.2440953254699707,grad_norm: 0.9999997807321215, iteration: 204450
loss: 1.057961344718933,grad_norm: 0.9999992252543244, iteration: 204451
loss: 0.99424147605896,grad_norm: 0.9999991181899351, iteration: 204452
loss: 1.03960382938385,grad_norm: 0.9999991035188758, iteration: 204453
loss: 1.0518014430999756,grad_norm: 0.9999997318149558, iteration: 204454
loss: 1.017050862312317,grad_norm: 0.9990066979006104, iteration: 204455
loss: 1.0130163431167603,grad_norm: 0.8910254464878868, iteration: 204456
loss: 1.032325029373169,grad_norm: 0.9374998541002709, iteration: 204457
loss: 0.961715817451477,grad_norm: 0.8509327236589428, iteration: 204458
loss: 1.029313564300537,grad_norm: 0.94808959728634, iteration: 204459
loss: 1.0110105276107788,grad_norm: 0.767905579876255, iteration: 204460
loss: 0.9601109623908997,grad_norm: 0.9999991385609995, iteration: 204461
loss: 1.0382091999053955,grad_norm: 0.9753708047283508, iteration: 204462
loss: 0.9904630780220032,grad_norm: 0.9999988892934876, iteration: 204463
loss: 1.079925775527954,grad_norm: 0.9999996856482768, iteration: 204464
loss: 0.9886360764503479,grad_norm: 0.9999991361162686, iteration: 204465
loss: 1.0291919708251953,grad_norm: 0.9999993422618446, iteration: 204466
loss: 1.0111043453216553,grad_norm: 0.8949405684856266, iteration: 204467
loss: 1.069738507270813,grad_norm: 0.9999997469944257, iteration: 204468
loss: 1.0192251205444336,grad_norm: 0.9739709834521375, iteration: 204469
loss: 0.9826034903526306,grad_norm: 0.8807860368746159, iteration: 204470
loss: 1.0082266330718994,grad_norm: 0.9999990942030101, iteration: 204471
loss: 0.9994352459907532,grad_norm: 0.9999990557988115, iteration: 204472
loss: 0.9834768772125244,grad_norm: 0.9999991243860624, iteration: 204473
loss: 1.0017544031143188,grad_norm: 0.9810088447333974, iteration: 204474
loss: 1.0084974765777588,grad_norm: 0.9999989297116963, iteration: 204475
loss: 1.0218861103057861,grad_norm: 0.9999994784554025, iteration: 204476
loss: 1.0006554126739502,grad_norm: 0.9999993319057433, iteration: 204477
loss: 0.9663663506507874,grad_norm: 0.8982270913764369, iteration: 204478
loss: 0.9938659071922302,grad_norm: 0.9266018646549208, iteration: 204479
loss: 1.0989960432052612,grad_norm: 0.9999998473403324, iteration: 204480
loss: 1.013563632965088,grad_norm: 0.9564783390904797, iteration: 204481
loss: 0.9780035018920898,grad_norm: 0.9999991719965103, iteration: 204482
loss: 1.0066169500350952,grad_norm: 0.9999998980286947, iteration: 204483
loss: 1.0439993143081665,grad_norm: 0.9999990771733255, iteration: 204484
loss: 0.9989694356918335,grad_norm: 0.8843346298578486, iteration: 204485
loss: 1.0214970111846924,grad_norm: 0.99999893961616, iteration: 204486
loss: 1.4279475212097168,grad_norm: 0.9999996901819941, iteration: 204487
loss: 1.0332964658737183,grad_norm: 0.9999990384273029, iteration: 204488
loss: 1.0288325548171997,grad_norm: 0.8180972480433965, iteration: 204489
loss: 1.0032368898391724,grad_norm: 0.9999992315944171, iteration: 204490
loss: 0.9975738525390625,grad_norm: 0.9999989056860636, iteration: 204491
loss: 1.0150268077850342,grad_norm: 0.9999990625328178, iteration: 204492
loss: 1.0515434741973877,grad_norm: 0.8936532321653632, iteration: 204493
loss: 0.9923720955848694,grad_norm: 0.9999991763646313, iteration: 204494
loss: 1.0154082775115967,grad_norm: 0.9999991005856128, iteration: 204495
loss: 0.9679864048957825,grad_norm: 0.8816498491438247, iteration: 204496
loss: 1.035852074623108,grad_norm: 0.9914456221295216, iteration: 204497
loss: 0.9872240424156189,grad_norm: 0.8738324502640804, iteration: 204498
loss: 1.0267969369888306,grad_norm: 0.9999990088333987, iteration: 204499
loss: 0.9959027767181396,grad_norm: 0.8864274786563445, iteration: 204500
loss: 1.009262204170227,grad_norm: 0.9999994723536414, iteration: 204501
loss: 0.9718010425567627,grad_norm: 0.99999897191165, iteration: 204502
loss: 0.9789406657218933,grad_norm: 0.928009186340228, iteration: 204503
loss: 1.0311203002929688,grad_norm: 0.9999992142561269, iteration: 204504
loss: 1.0474216938018799,grad_norm: 0.9999995315367176, iteration: 204505
loss: 1.035555362701416,grad_norm: 0.9999989946660316, iteration: 204506
loss: 1.0104475021362305,grad_norm: 0.9999990287309649, iteration: 204507
loss: 0.9985997080802917,grad_norm: 0.9420477298577227, iteration: 204508
loss: 1.0020831823349,grad_norm: 0.9200716892284075, iteration: 204509
loss: 1.0028947591781616,grad_norm: 0.8981365817962011, iteration: 204510
loss: 1.0696362257003784,grad_norm: 0.9999992575820228, iteration: 204511
loss: 1.0099674463272095,grad_norm: 0.9999993541255441, iteration: 204512
loss: 1.022841215133667,grad_norm: 0.9999989745198121, iteration: 204513
loss: 1.023664951324463,grad_norm: 0.9043924232249061, iteration: 204514
loss: 1.0069056749343872,grad_norm: 0.9999989582479649, iteration: 204515
loss: 1.0337262153625488,grad_norm: 0.9347533233642054, iteration: 204516
loss: 0.973288357257843,grad_norm: 0.9999990733661526, iteration: 204517
loss: 1.0050599575042725,grad_norm: 0.9187709062917312, iteration: 204518
loss: 1.0324159860610962,grad_norm: 0.9999992624419176, iteration: 204519
loss: 1.0371136665344238,grad_norm: 0.8145632202011992, iteration: 204520
loss: 0.9941605925559998,grad_norm: 0.9999992127520105, iteration: 204521
loss: 1.0466617345809937,grad_norm: 0.99999916778056, iteration: 204522
loss: 1.060356616973877,grad_norm: 0.9999999481658587, iteration: 204523
loss: 1.0036526918411255,grad_norm: 0.9999990221801257, iteration: 204524
loss: 1.0198338031768799,grad_norm: 0.9999999879871001, iteration: 204525
loss: 0.9902743101119995,grad_norm: 0.9999991474973653, iteration: 204526
loss: 1.0643444061279297,grad_norm: 0.9999998409217068, iteration: 204527
loss: 1.0113351345062256,grad_norm: 0.9999995130463136, iteration: 204528
loss: 0.9702430367469788,grad_norm: 0.9413512949973404, iteration: 204529
loss: 0.994965672492981,grad_norm: 0.915263509755504, iteration: 204530
loss: 0.9789528250694275,grad_norm: 0.9935578949391823, iteration: 204531
loss: 0.9893488883972168,grad_norm: 0.9999991452083985, iteration: 204532
loss: 0.9898248314857483,grad_norm: 0.9999993643252466, iteration: 204533
loss: 1.0501898527145386,grad_norm: 0.9999992232691758, iteration: 204534
loss: 0.9475270509719849,grad_norm: 0.8568637697965051, iteration: 204535
loss: 1.0082067251205444,grad_norm: 0.9999998242046171, iteration: 204536
loss: 0.9742754697799683,grad_norm: 0.9208702062543273, iteration: 204537
loss: 1.017088532447815,grad_norm: 0.9225195831867461, iteration: 204538
loss: 1.0134083032608032,grad_norm: 0.9999997395080694, iteration: 204539
loss: 0.9582768082618713,grad_norm: 0.9012640327553055, iteration: 204540
loss: 0.9979271292686462,grad_norm: 0.9999990345947823, iteration: 204541
loss: 0.9936575293540955,grad_norm: 0.7822561875766446, iteration: 204542
loss: 1.0324838161468506,grad_norm: 0.9353579491710144, iteration: 204543
loss: 1.0008370876312256,grad_norm: 0.8840002259776816, iteration: 204544
loss: 1.009016990661621,grad_norm: 0.9075001187544302, iteration: 204545
loss: 0.9981294870376587,grad_norm: 0.9999993567889792, iteration: 204546
loss: 0.9894312024116516,grad_norm: 0.8367154724566815, iteration: 204547
loss: 1.0140658617019653,grad_norm: 0.9046247126304628, iteration: 204548
loss: 1.033785343170166,grad_norm: 0.9999991072334679, iteration: 204549
loss: 1.0043467283248901,grad_norm: 0.9999991806622275, iteration: 204550
loss: 0.995620608329773,grad_norm: 0.9531262460817164, iteration: 204551
loss: 0.9798532724380493,grad_norm: 0.9811259447981847, iteration: 204552
loss: 1.0362461805343628,grad_norm: 0.9999990578587705, iteration: 204553
loss: 0.9724559783935547,grad_norm: 0.8207909553770183, iteration: 204554
loss: 1.0406545400619507,grad_norm: 0.9999999777537599, iteration: 204555
loss: 0.991337776184082,grad_norm: 0.9875851680333061, iteration: 204556
loss: 1.027039885520935,grad_norm: 0.9999991717325066, iteration: 204557
loss: 1.0051352977752686,grad_norm: 0.8649290128692917, iteration: 204558
loss: 1.0032190084457397,grad_norm: 0.9206690010049519, iteration: 204559
loss: 0.9897094368934631,grad_norm: 0.8992709263406332, iteration: 204560
loss: 1.0089125633239746,grad_norm: 0.9999991896911202, iteration: 204561
loss: 0.9957301616668701,grad_norm: 0.9999990867973202, iteration: 204562
loss: 1.0184894800186157,grad_norm: 0.9999989931396979, iteration: 204563
loss: 1.0243545770645142,grad_norm: 0.9875017873493519, iteration: 204564
loss: 0.9969750642776489,grad_norm: 0.917866980554918, iteration: 204565
loss: 1.0178942680358887,grad_norm: 0.7865208193092319, iteration: 204566
loss: 1.002311110496521,grad_norm: 0.9999991563238645, iteration: 204567
loss: 0.9814648628234863,grad_norm: 0.9576784472354078, iteration: 204568
loss: 1.0147321224212646,grad_norm: 0.7972304910827375, iteration: 204569
loss: 1.0007610321044922,grad_norm: 0.8205001901951888, iteration: 204570
loss: 0.9831937551498413,grad_norm: 0.9979442949662404, iteration: 204571
loss: 0.9702410697937012,grad_norm: 0.9999990240949255, iteration: 204572
loss: 1.0235469341278076,grad_norm: 0.9999996897261518, iteration: 204573
loss: 1.032095193862915,grad_norm: 0.9999995688683712, iteration: 204574
loss: 0.9762149453163147,grad_norm: 0.8894985541566419, iteration: 204575
loss: 1.0143566131591797,grad_norm: 0.9999992456863495, iteration: 204576
loss: 1.0407260656356812,grad_norm: 0.9476534382545923, iteration: 204577
loss: 0.9887147545814514,grad_norm: 0.9465108340511078, iteration: 204578
loss: 0.9750237464904785,grad_norm: 0.8676328188330287, iteration: 204579
loss: 0.9868443608283997,grad_norm: 0.9999991536801521, iteration: 204580
loss: 0.9600673913955688,grad_norm: 0.9999991549376429, iteration: 204581
loss: 0.9608943462371826,grad_norm: 0.9999990247484746, iteration: 204582
loss: 1.0126749277114868,grad_norm: 0.8514744280122535, iteration: 204583
loss: 1.019543170928955,grad_norm: 0.9999990101425674, iteration: 204584
loss: 1.022687315940857,grad_norm: 0.9761447487471432, iteration: 204585
loss: 1.001981496810913,grad_norm: 0.8140256634072359, iteration: 204586
loss: 0.9758707284927368,grad_norm: 0.9699032031473048, iteration: 204587
loss: 0.9882562160491943,grad_norm: 0.8028270391735106, iteration: 204588
loss: 1.046231746673584,grad_norm: 0.9999990643586322, iteration: 204589
loss: 1.0377987623214722,grad_norm: 0.999999211889014, iteration: 204590
loss: 0.9971818327903748,grad_norm: 0.9999990707417106, iteration: 204591
loss: 1.0378105640411377,grad_norm: 0.8520187623688751, iteration: 204592
loss: 1.0018428564071655,grad_norm: 0.9987471680423765, iteration: 204593
loss: 1.0156059265136719,grad_norm: 0.8705041040609923, iteration: 204594
loss: 0.9938898682594299,grad_norm: 0.9999992419066273, iteration: 204595
loss: 0.974419355392456,grad_norm: 0.8341820837620361, iteration: 204596
loss: 1.0104731321334839,grad_norm: 0.8650220888085677, iteration: 204597
loss: 1.019759178161621,grad_norm: 0.9875880072309543, iteration: 204598
loss: 1.0250089168548584,grad_norm: 0.9215246802059941, iteration: 204599
loss: 0.9970827698707581,grad_norm: 0.9741589601046124, iteration: 204600
loss: 0.9968939423561096,grad_norm: 0.9999990838203394, iteration: 204601
loss: 1.0034042596817017,grad_norm: 0.9999991254777633, iteration: 204602
loss: 0.9864219427108765,grad_norm: 0.8565858038146302, iteration: 204603
loss: 0.9979514479637146,grad_norm: 0.9999990253459489, iteration: 204604
loss: 0.9643329381942749,grad_norm: 0.9999990338385599, iteration: 204605
loss: 1.0037657022476196,grad_norm: 0.8596221837957417, iteration: 204606
loss: 1.026878833770752,grad_norm: 0.9821865639322279, iteration: 204607
loss: 0.9872324466705322,grad_norm: 0.8966200095967096, iteration: 204608
loss: 0.9998530745506287,grad_norm: 0.9145675591729329, iteration: 204609
loss: 1.019277811050415,grad_norm: 0.8406073854518618, iteration: 204610
loss: 0.9892182946205139,grad_norm: 0.8846438178140513, iteration: 204611
loss: 0.990317702293396,grad_norm: 0.9999991144191045, iteration: 204612
loss: 0.9983881115913391,grad_norm: 0.8379051832408978, iteration: 204613
loss: 1.0166958570480347,grad_norm: 0.8354119408335929, iteration: 204614
loss: 1.069060206413269,grad_norm: 0.9865902686192632, iteration: 204615
loss: 0.9883241057395935,grad_norm: 0.8467731035158119, iteration: 204616
loss: 0.9884158968925476,grad_norm: 0.9801755230021925, iteration: 204617
loss: 1.0407687425613403,grad_norm: 1.000000004829114, iteration: 204618
loss: 1.0047328472137451,grad_norm: 0.999998891324992, iteration: 204619
loss: 1.0662314891815186,grad_norm: 0.999999734381452, iteration: 204620
loss: 0.9680816531181335,grad_norm: 0.8733736861709179, iteration: 204621
loss: 0.9932230114936829,grad_norm: 0.8818993579965543, iteration: 204622
loss: 0.9894909858703613,grad_norm: 0.811237982217985, iteration: 204623
loss: 1.0420701503753662,grad_norm: 0.99999960634172, iteration: 204624
loss: 0.9745553135871887,grad_norm: 0.7590144908244518, iteration: 204625
loss: 0.9941222667694092,grad_norm: 0.8639141189869047, iteration: 204626
loss: 0.977519690990448,grad_norm: 0.9999990029308959, iteration: 204627
loss: 0.9584137201309204,grad_norm: 0.9672959551044049, iteration: 204628
loss: 1.0369006395339966,grad_norm: 0.9877579774588423, iteration: 204629
loss: 1.0144734382629395,grad_norm: 0.9999992665124138, iteration: 204630
loss: 1.0012390613555908,grad_norm: 0.9999990978020015, iteration: 204631
loss: 1.0001391172409058,grad_norm: 0.9621325046881796, iteration: 204632
loss: 1.0788856744766235,grad_norm: 0.9999990780913399, iteration: 204633
loss: 0.9689871668815613,grad_norm: 0.9766794324521412, iteration: 204634
loss: 1.016829490661621,grad_norm: 0.8636510145019748, iteration: 204635
loss: 1.0558233261108398,grad_norm: 0.9999997233262078, iteration: 204636
loss: 1.0145044326782227,grad_norm: 0.9999991709958981, iteration: 204637
loss: 0.9969844222068787,grad_norm: 0.9999992097340508, iteration: 204638
loss: 1.0168262720108032,grad_norm: 0.9411117935444875, iteration: 204639
loss: 1.0215065479278564,grad_norm: 0.9999992035700493, iteration: 204640
loss: 1.0095818042755127,grad_norm: 0.8413466144495888, iteration: 204641
loss: 0.9836841821670532,grad_norm: 0.8550153443361477, iteration: 204642
loss: 1.0480440855026245,grad_norm: 0.9169427335968241, iteration: 204643
loss: 1.020440936088562,grad_norm: 0.9679387282645853, iteration: 204644
loss: 1.0407202243804932,grad_norm: 0.999999166720821, iteration: 204645
loss: 1.0203912258148193,grad_norm: 0.9999990350890718, iteration: 204646
loss: 0.9809483885765076,grad_norm: 0.9383412097665687, iteration: 204647
loss: 0.9431477189064026,grad_norm: 0.991621477072779, iteration: 204648
loss: 0.9791663885116577,grad_norm: 0.9436230601725223, iteration: 204649
loss: 1.00310218334198,grad_norm: 0.8877033006829926, iteration: 204650
loss: 0.9984014630317688,grad_norm: 0.8113151485564261, iteration: 204651
loss: 1.0033127069473267,grad_norm: 0.8176208452440561, iteration: 204652
loss: 0.9746912121772766,grad_norm: 0.9121596948648872, iteration: 204653
loss: 0.9563280940055847,grad_norm: 0.8013056735094571, iteration: 204654
loss: 0.9901670217514038,grad_norm: 0.9706333063726712, iteration: 204655
loss: 1.0097767114639282,grad_norm: 0.9999991278574372, iteration: 204656
loss: 0.9724450707435608,grad_norm: 0.9662102605245315, iteration: 204657
loss: 1.0185655355453491,grad_norm: 0.7864791327661053, iteration: 204658
loss: 1.0259904861450195,grad_norm: 0.9999990300457569, iteration: 204659
loss: 0.9806380271911621,grad_norm: 0.8274895968680464, iteration: 204660
loss: 0.9853433966636658,grad_norm: 0.7898948858037799, iteration: 204661
loss: 1.0300594568252563,grad_norm: 0.9999992393084155, iteration: 204662
loss: 1.0179141759872437,grad_norm: 0.9999991050575291, iteration: 204663
loss: 0.9940707087516785,grad_norm: 0.9458551604102737, iteration: 204664
loss: 1.0237764120101929,grad_norm: 0.7413609539567079, iteration: 204665
loss: 0.9628830552101135,grad_norm: 0.9999992095658211, iteration: 204666
loss: 0.9972655177116394,grad_norm: 0.9999990565080705, iteration: 204667
loss: 0.9612554907798767,grad_norm: 0.9999990975816805, iteration: 204668
loss: 0.9981921911239624,grad_norm: 0.9306577154224553, iteration: 204669
loss: 0.9856962561607361,grad_norm: 0.8746499569964915, iteration: 204670
loss: 0.994056224822998,grad_norm: 0.8743311175514717, iteration: 204671
loss: 1.019507646560669,grad_norm: 0.9498862611691407, iteration: 204672
loss: 0.980184018611908,grad_norm: 0.9999991285601941, iteration: 204673
loss: 0.9671676754951477,grad_norm: 0.9372536782690292, iteration: 204674
loss: 0.9998483061790466,grad_norm: 0.9790489285048788, iteration: 204675
loss: 1.0511102676391602,grad_norm: 0.9999991172042185, iteration: 204676
loss: 0.9949306845664978,grad_norm: 0.9999991995907858, iteration: 204677
loss: 1.0159541368484497,grad_norm: 0.9999991920773951, iteration: 204678
loss: 1.0040456056594849,grad_norm: 0.9999990057952886, iteration: 204679
loss: 1.0005590915679932,grad_norm: 0.7681427856439177, iteration: 204680
loss: 0.9791771173477173,grad_norm: 0.9273428691901919, iteration: 204681
loss: 1.0029269456863403,grad_norm: 0.9999990222645534, iteration: 204682
loss: 1.0035347938537598,grad_norm: 0.9999990611406046, iteration: 204683
loss: 0.9946016073226929,grad_norm: 0.8468844086739071, iteration: 204684
loss: 0.9711394309997559,grad_norm: 0.9999991799391605, iteration: 204685
loss: 0.9495229125022888,grad_norm: 0.9515377620264375, iteration: 204686
loss: 0.9970694780349731,grad_norm: 0.9999993632907487, iteration: 204687
loss: 1.003571629524231,grad_norm: 0.9999992237881269, iteration: 204688
loss: 1.0243065357208252,grad_norm: 0.9020844484909533, iteration: 204689
loss: 1.0210191011428833,grad_norm: 0.9834082081049044, iteration: 204690
loss: 0.9919392466545105,grad_norm: 0.9800484431538694, iteration: 204691
loss: 0.9900597333908081,grad_norm: 0.9999995418373462, iteration: 204692
loss: 1.006643533706665,grad_norm: 0.9999992533149425, iteration: 204693
loss: 0.9676222205162048,grad_norm: 0.8201039982567115, iteration: 204694
loss: 1.0142290592193604,grad_norm: 0.9048977841531958, iteration: 204695
loss: 0.9929935932159424,grad_norm: 0.7566350829633863, iteration: 204696
loss: 0.9875791072845459,grad_norm: 0.9114566252741992, iteration: 204697
loss: 0.9949736595153809,grad_norm: 0.896310445477184, iteration: 204698
loss: 0.9870116710662842,grad_norm: 0.9363113518618307, iteration: 204699
loss: 0.9871676564216614,grad_norm: 0.9234603572296437, iteration: 204700
loss: 0.9844766855239868,grad_norm: 0.9999990920786113, iteration: 204701
loss: 0.9952260255813599,grad_norm: 0.9999990162235207, iteration: 204702
loss: 0.9791319370269775,grad_norm: 0.9999991842162385, iteration: 204703
loss: 0.9912348389625549,grad_norm: 0.8532780807093685, iteration: 204704
loss: 0.9988183379173279,grad_norm: 0.9999991312639569, iteration: 204705
loss: 0.9968442916870117,grad_norm: 0.9999992438542861, iteration: 204706
loss: 0.9894806742668152,grad_norm: 0.9502387316641767, iteration: 204707
loss: 1.015212893486023,grad_norm: 0.8136625556446452, iteration: 204708
loss: 1.000083565711975,grad_norm: 0.9999990308596881, iteration: 204709
loss: 1.0165883302688599,grad_norm: 0.9598520854485513, iteration: 204710
loss: 1.0329453945159912,grad_norm: 0.9999992545640467, iteration: 204711
loss: 0.9774704575538635,grad_norm: 0.9999990825751465, iteration: 204712
loss: 0.9888877272605896,grad_norm: 0.9495893606995937, iteration: 204713
loss: 1.0342011451721191,grad_norm: 0.7920664622947057, iteration: 204714
loss: 1.0363527536392212,grad_norm: 0.9929056191768626, iteration: 204715
loss: 1.0145515203475952,grad_norm: 0.8807040536222848, iteration: 204716
loss: 0.9964135885238647,grad_norm: 0.9999992899880265, iteration: 204717
loss: 0.9823005199432373,grad_norm: 0.8446303002968636, iteration: 204718
loss: 1.0210775136947632,grad_norm: 0.9605447896114944, iteration: 204719
loss: 1.0071196556091309,grad_norm: 0.862200128668784, iteration: 204720
loss: 1.0050755739212036,grad_norm: 0.7275944598481139, iteration: 204721
loss: 0.9886842370033264,grad_norm: 0.9423454534392842, iteration: 204722
loss: 1.0098857879638672,grad_norm: 0.8671538346338812, iteration: 204723
loss: 1.0113525390625,grad_norm: 0.9999991863784548, iteration: 204724
loss: 0.9837554693222046,grad_norm: 0.9999991181430599, iteration: 204725
loss: 1.0166044235229492,grad_norm: 0.8257754614126509, iteration: 204726
loss: 0.9828652739524841,grad_norm: 0.9031092750468789, iteration: 204727
loss: 1.0723627805709839,grad_norm: 0.9999991231324826, iteration: 204728
loss: 0.9743436574935913,grad_norm: 0.9999990930192972, iteration: 204729
loss: 0.9865388870239258,grad_norm: 0.9715382195735892, iteration: 204730
loss: 0.9907820224761963,grad_norm: 0.9999991894307582, iteration: 204731
loss: 0.9827350378036499,grad_norm: 0.8648947763181841, iteration: 204732
loss: 1.0298484563827515,grad_norm: 0.8976039836976625, iteration: 204733
loss: 1.0117346048355103,grad_norm: 0.9644417235045525, iteration: 204734
loss: 1.0080195665359497,grad_norm: 0.9999992788622115, iteration: 204735
loss: 0.9961527585983276,grad_norm: 0.912276370015612, iteration: 204736
loss: 0.9648932814598083,grad_norm: 0.9401194047137759, iteration: 204737
loss: 0.9897035360336304,grad_norm: 0.9999992675151299, iteration: 204738
loss: 1.018175721168518,grad_norm: 0.9430876490474427, iteration: 204739
loss: 1.0184744596481323,grad_norm: 0.8843257680094015, iteration: 204740
loss: 1.0337855815887451,grad_norm: 0.9999990986616428, iteration: 204741
loss: 1.0226022005081177,grad_norm: 0.8612609933438823, iteration: 204742
loss: 0.9905871748924255,grad_norm: 0.9999990218228659, iteration: 204743
loss: 1.0081583261489868,grad_norm: 0.8237125301362083, iteration: 204744
loss: 1.0018342733383179,grad_norm: 0.9189260936340765, iteration: 204745
loss: 1.0041362047195435,grad_norm: 0.9647258268152459, iteration: 204746
loss: 1.0370465517044067,grad_norm: 0.8857539968761352, iteration: 204747
loss: 0.9843355417251587,grad_norm: 0.9272972515816286, iteration: 204748
loss: 1.0130990743637085,grad_norm: 0.8334924198504715, iteration: 204749
loss: 1.020917296409607,grad_norm: 0.731280896917914, iteration: 204750
loss: 0.9945318102836609,grad_norm: 0.9999990969872803, iteration: 204751
loss: 1.0176821947097778,grad_norm: 0.9999995902492708, iteration: 204752
loss: 1.0030491352081299,grad_norm: 0.911952862500267, iteration: 204753
loss: 1.0279357433319092,grad_norm: 0.8549377653719349, iteration: 204754
loss: 0.9969053268432617,grad_norm: 0.9835302868441395, iteration: 204755
loss: 1.0224097967147827,grad_norm: 0.8521570725600454, iteration: 204756
loss: 0.9880049824714661,grad_norm: 0.9999990584178442, iteration: 204757
loss: 0.9959795475006104,grad_norm: 0.9163215202418986, iteration: 204758
loss: 1.0198971033096313,grad_norm: 0.8995788174677898, iteration: 204759
loss: 1.032983422279358,grad_norm: 0.9693580858376246, iteration: 204760
loss: 0.9735455513000488,grad_norm: 0.9999993221336235, iteration: 204761
loss: 0.9812196493148804,grad_norm: 0.9999991829670215, iteration: 204762
loss: 0.9506747722625732,grad_norm: 0.9999990811343042, iteration: 204763
loss: 1.0195143222808838,grad_norm: 0.7183644783346916, iteration: 204764
loss: 1.0139498710632324,grad_norm: 0.9999990258784135, iteration: 204765
loss: 0.964590311050415,grad_norm: 0.9008841220697147, iteration: 204766
loss: 0.9760934710502625,grad_norm: 0.9999989024368159, iteration: 204767
loss: 0.9803677797317505,grad_norm: 0.975639078812714, iteration: 204768
loss: 0.9501747488975525,grad_norm: 0.9296857922034647, iteration: 204769
loss: 0.9893020987510681,grad_norm: 0.9147024532273743, iteration: 204770
loss: 1.0069317817687988,grad_norm: 0.8804465280310874, iteration: 204771
loss: 1.0262244939804077,grad_norm: 0.9124767456039231, iteration: 204772
loss: 0.9830365777015686,grad_norm: 0.8533244715415733, iteration: 204773
loss: 0.9788816571235657,grad_norm: 0.9077159140493513, iteration: 204774
loss: 1.0284672975540161,grad_norm: 0.7635329587499199, iteration: 204775
loss: 1.001112699508667,grad_norm: 0.9735293826390524, iteration: 204776
loss: 1.007054328918457,grad_norm: 0.8845912078850016, iteration: 204777
loss: 0.9997780323028564,grad_norm: 0.9180320269202277, iteration: 204778
loss: 1.0256483554840088,grad_norm: 0.8431099959751781, iteration: 204779
loss: 0.9784063696861267,grad_norm: 0.9999992616650164, iteration: 204780
loss: 0.9756225943565369,grad_norm: 0.999999066883252, iteration: 204781
loss: 0.9822655320167542,grad_norm: 0.8066461168752572, iteration: 204782
loss: 1.0018588304519653,grad_norm: 0.9999994048419, iteration: 204783
loss: 1.0186046361923218,grad_norm: 0.9999990496024038, iteration: 204784
loss: 1.0179693698883057,grad_norm: 0.9544060772104748, iteration: 204785
loss: 1.0546432733535767,grad_norm: 0.960968218895989, iteration: 204786
loss: 0.9548346400260925,grad_norm: 0.9152009715768408, iteration: 204787
loss: 1.0003552436828613,grad_norm: 0.9320329089829175, iteration: 204788
loss: 1.0248810052871704,grad_norm: 0.9999991095030406, iteration: 204789
loss: 1.123436689376831,grad_norm: 0.9999991679046546, iteration: 204790
loss: 0.9980356097221375,grad_norm: 0.999999222184996, iteration: 204791
loss: 0.9634201526641846,grad_norm: 0.9115120043806041, iteration: 204792
loss: 1.0026240348815918,grad_norm: 0.8760255625115962, iteration: 204793
loss: 0.9653016924858093,grad_norm: 0.9999990600839608, iteration: 204794
loss: 0.9907606244087219,grad_norm: 0.942060972443583, iteration: 204795
loss: 0.975067138671875,grad_norm: 0.9387242908840444, iteration: 204796
loss: 0.9946916699409485,grad_norm: 0.8585731803593044, iteration: 204797
loss: 1.0092989206314087,grad_norm: 0.9999994187474383, iteration: 204798
loss: 1.0773669481277466,grad_norm: 0.9999992379712881, iteration: 204799
loss: 1.0590780973434448,grad_norm: 0.9999991525286017, iteration: 204800
loss: 1.004461646080017,grad_norm: 0.9999990906586802, iteration: 204801
loss: 1.0228071212768555,grad_norm: 0.9999989461395997, iteration: 204802
loss: 0.9773841500282288,grad_norm: 0.9999991554190053, iteration: 204803
loss: 1.0021241903305054,grad_norm: 0.7988248991640596, iteration: 204804
loss: 0.9983064532279968,grad_norm: 0.9496543628458728, iteration: 204805
loss: 1.0384451150894165,grad_norm: 0.999999545800589, iteration: 204806
loss: 0.9782354831695557,grad_norm: 0.9999990860143887, iteration: 204807
loss: 0.987291693687439,grad_norm: 0.9999995299225938, iteration: 204808
loss: 1.0230082273483276,grad_norm: 0.9267916189323838, iteration: 204809
loss: 0.9579747319221497,grad_norm: 0.9999990592221928, iteration: 204810
loss: 1.0189464092254639,grad_norm: 0.9999990355408624, iteration: 204811
loss: 1.0383275747299194,grad_norm: 0.999999090337441, iteration: 204812
loss: 1.0191586017608643,grad_norm: 0.8645059828897528, iteration: 204813
loss: 1.0105185508728027,grad_norm: 0.9999991933714178, iteration: 204814
loss: 0.9915424585342407,grad_norm: 0.9196291854813924, iteration: 204815
loss: 0.9870132207870483,grad_norm: 0.9809543683452207, iteration: 204816
loss: 0.962436318397522,grad_norm: 0.884490100079228, iteration: 204817
loss: 1.0569701194763184,grad_norm: 0.9999990424968618, iteration: 204818
loss: 0.9861380457878113,grad_norm: 0.9455733947384013, iteration: 204819
loss: 0.9743066430091858,grad_norm: 0.8804623576160786, iteration: 204820
loss: 1.0017341375350952,grad_norm: 0.8909838482371405, iteration: 204821
loss: 1.0005149841308594,grad_norm: 0.9999991130127907, iteration: 204822
loss: 1.0337218046188354,grad_norm: 0.9045363423539157, iteration: 204823
loss: 1.028375506401062,grad_norm: 0.9925179280588153, iteration: 204824
loss: 1.0180741548538208,grad_norm: 0.9999992504761647, iteration: 204825
loss: 1.0035954713821411,grad_norm: 0.9999991448981133, iteration: 204826
loss: 0.989474892616272,grad_norm: 0.9999991493347936, iteration: 204827
loss: 1.0076372623443604,grad_norm: 0.9999992659216523, iteration: 204828
loss: 0.9875746369361877,grad_norm: 0.9870063807787474, iteration: 204829
loss: 0.971497654914856,grad_norm: 0.9043013690942222, iteration: 204830
loss: 0.9822199940681458,grad_norm: 0.9511142577759998, iteration: 204831
loss: 0.9615285992622375,grad_norm: 0.9071096163795686, iteration: 204832
loss: 0.982048511505127,grad_norm: 0.8239531643940391, iteration: 204833
loss: 0.9780754446983337,grad_norm: 0.9353075318657993, iteration: 204834
loss: 1.0036576986312866,grad_norm: 0.9973848123459449, iteration: 204835
loss: 1.0062572956085205,grad_norm: 0.9999991789502808, iteration: 204836
loss: 0.9977771043777466,grad_norm: 0.8315052077508323, iteration: 204837
loss: 1.0055623054504395,grad_norm: 0.9999990058243272, iteration: 204838
loss: 0.9643367528915405,grad_norm: 0.9442932687121056, iteration: 204839
loss: 0.9912530183792114,grad_norm: 0.9039078047349618, iteration: 204840
loss: 1.033827304840088,grad_norm: 0.9999993648675901, iteration: 204841
loss: 0.988544225692749,grad_norm: 0.9623643938690171, iteration: 204842
loss: 1.0176491737365723,grad_norm: 0.9786866565337179, iteration: 204843
loss: 0.9973774552345276,grad_norm: 0.8611786791742434, iteration: 204844
loss: 1.0154643058776855,grad_norm: 0.9999992387187056, iteration: 204845
loss: 0.9778890013694763,grad_norm: 0.8292908353941411, iteration: 204846
loss: 1.0193284749984741,grad_norm: 0.8934341753933983, iteration: 204847
loss: 1.0102463960647583,grad_norm: 0.9633736450863889, iteration: 204848
loss: 0.9856085181236267,grad_norm: 0.999999105960292, iteration: 204849
loss: 1.022571325302124,grad_norm: 0.9999992082435692, iteration: 204850
loss: 1.0213727951049805,grad_norm: 0.9999991009102589, iteration: 204851
loss: 0.9945877194404602,grad_norm: 0.9999990150090885, iteration: 204852
loss: 1.0271704196929932,grad_norm: 0.8647799941885809, iteration: 204853
loss: 1.0004262924194336,grad_norm: 0.9999991038251111, iteration: 204854
loss: 0.9874034523963928,grad_norm: 0.950918539921622, iteration: 204855
loss: 0.9799969792366028,grad_norm: 0.999999129565215, iteration: 204856
loss: 0.9946654438972473,grad_norm: 0.9999991468427009, iteration: 204857
loss: 1.0156368017196655,grad_norm: 0.84131224875018, iteration: 204858
loss: 1.0206177234649658,grad_norm: 0.9999991019626566, iteration: 204859
loss: 1.0216208696365356,grad_norm: 0.999999322251684, iteration: 204860
loss: 0.9981600046157837,grad_norm: 0.9999991472607462, iteration: 204861
loss: 1.0193736553192139,grad_norm: 0.988472920746693, iteration: 204862
loss: 1.0215526819229126,grad_norm: 0.8429662611647528, iteration: 204863
loss: 0.947529137134552,grad_norm: 0.9258595716871193, iteration: 204864
loss: 1.031914472579956,grad_norm: 0.9999991725838958, iteration: 204865
loss: 0.9748702645301819,grad_norm: 0.9408723080978063, iteration: 204866
loss: 0.9753696322441101,grad_norm: 0.999999059116606, iteration: 204867
loss: 0.9777964949607849,grad_norm: 0.9999992742437361, iteration: 204868
loss: 1.0217517614364624,grad_norm: 0.8294797897961804, iteration: 204869
loss: 1.0138300657272339,grad_norm: 0.9999990323525522, iteration: 204870
loss: 1.0113805532455444,grad_norm: 0.8317954451581547, iteration: 204871
loss: 1.0341428518295288,grad_norm: 0.878383536932504, iteration: 204872
loss: 0.9522649049758911,grad_norm: 0.9106739716716201, iteration: 204873
loss: 1.0287688970565796,grad_norm: 0.9453108354708152, iteration: 204874
loss: 1.0081647634506226,grad_norm: 0.9413078589548634, iteration: 204875
loss: 0.9932428002357483,grad_norm: 0.9351168996289384, iteration: 204876
loss: 0.9737573266029358,grad_norm: 0.8416779735624165, iteration: 204877
loss: 1.0055872201919556,grad_norm: 0.9944968589946334, iteration: 204878
loss: 0.986700713634491,grad_norm: 0.995286121292443, iteration: 204879
loss: 1.0042307376861572,grad_norm: 0.9916831833390783, iteration: 204880
loss: 1.017039179801941,grad_norm: 0.969190723404138, iteration: 204881
loss: 0.9876399040222168,grad_norm: 0.8511506473695656, iteration: 204882
loss: 0.9910488724708557,grad_norm: 0.9999992222794237, iteration: 204883
loss: 0.9606772065162659,grad_norm: 0.9999993083271961, iteration: 204884
loss: 1.0017553567886353,grad_norm: 0.8694141501136387, iteration: 204885
loss: 1.000105857849121,grad_norm: 0.999999847274688, iteration: 204886
loss: 0.9905624389648438,grad_norm: 0.9486665000441415, iteration: 204887
loss: 1.0067517757415771,grad_norm: 0.9999997106804314, iteration: 204888
loss: 1.0174481868743896,grad_norm: 0.9999992072699291, iteration: 204889
loss: 1.0003973245620728,grad_norm: 0.9999990956112024, iteration: 204890
loss: 1.0660321712493896,grad_norm: 0.9999996929292556, iteration: 204891
loss: 1.0361878871917725,grad_norm: 0.9999996497278247, iteration: 204892
loss: 1.029281735420227,grad_norm: 0.8945317356389149, iteration: 204893
loss: 0.9681665897369385,grad_norm: 0.9999990250760964, iteration: 204894
loss: 0.99432373046875,grad_norm: 0.9491713030513422, iteration: 204895
loss: 0.9970393180847168,grad_norm: 0.9999992490530438, iteration: 204896
loss: 0.9567733407020569,grad_norm: 0.9957724301207059, iteration: 204897
loss: 0.9865055680274963,grad_norm: 0.9686366101209517, iteration: 204898
loss: 0.963424563407898,grad_norm: 0.9779999985988659, iteration: 204899
loss: 0.9769515991210938,grad_norm: 0.9044921774346933, iteration: 204900
loss: 0.9983307123184204,grad_norm: 0.9154710645627083, iteration: 204901
loss: 1.0101953744888306,grad_norm: 0.9659238989285608, iteration: 204902
loss: 0.9826199412345886,grad_norm: 0.8702666306910171, iteration: 204903
loss: 1.015781044960022,grad_norm: 0.931260465677838, iteration: 204904
loss: 1.0294114351272583,grad_norm: 0.9999991714223813, iteration: 204905
loss: 0.9559484720230103,grad_norm: 0.9999989740940387, iteration: 204906
loss: 0.9661945700645447,grad_norm: 0.9838826933506232, iteration: 204907
loss: 1.045670747756958,grad_norm: 0.9999992152506957, iteration: 204908
loss: 0.9849924445152283,grad_norm: 0.9002347099910435, iteration: 204909
loss: 0.988569974899292,grad_norm: 0.9999989900938624, iteration: 204910
loss: 1.0083001852035522,grad_norm: 0.9878586854361463, iteration: 204911
loss: 1.0117641687393188,grad_norm: 0.9415657016448592, iteration: 204912
loss: 1.027991771697998,grad_norm: 0.9482446061446305, iteration: 204913
loss: 0.9914706349372864,grad_norm: 0.8644335576700766, iteration: 204914
loss: 1.013066053390503,grad_norm: 0.9999990463581734, iteration: 204915
loss: 0.9778711199760437,grad_norm: 0.9779354462384144, iteration: 204916
loss: 1.0439907312393188,grad_norm: 0.9999991744830906, iteration: 204917
loss: 0.9897913932800293,grad_norm: 0.9864508337320814, iteration: 204918
loss: 1.0167551040649414,grad_norm: 0.9999990976256413, iteration: 204919
loss: 0.9805142283439636,grad_norm: 0.9999992432387622, iteration: 204920
loss: 1.0359539985656738,grad_norm: 0.9999991402605659, iteration: 204921
loss: 1.0168559551239014,grad_norm: 0.9999990643866531, iteration: 204922
loss: 0.9988974928855896,grad_norm: 0.9999990683573062, iteration: 204923
loss: 0.9668560028076172,grad_norm: 0.954006237597664, iteration: 204924
loss: 1.0208778381347656,grad_norm: 0.9999990798205081, iteration: 204925
loss: 0.9660862684249878,grad_norm: 0.7856931598210064, iteration: 204926
loss: 1.0076870918273926,grad_norm: 0.9864737572197169, iteration: 204927
loss: 0.9906753301620483,grad_norm: 0.9999989545487213, iteration: 204928
loss: 0.9930265545845032,grad_norm: 0.9217678800995837, iteration: 204929
loss: 1.0904955863952637,grad_norm: 0.9554337090956903, iteration: 204930
loss: 0.9884808659553528,grad_norm: 0.999999922535625, iteration: 204931
loss: 1.0123639106750488,grad_norm: 0.9999990213118253, iteration: 204932
loss: 0.9737696051597595,grad_norm: 0.8582464393827333, iteration: 204933
loss: 1.028757095336914,grad_norm: 0.9034458384440015, iteration: 204934
loss: 0.9896160960197449,grad_norm: 0.9675153812242919, iteration: 204935
loss: 1.0463322401046753,grad_norm: 0.8422742073232067, iteration: 204936
loss: 0.9932632446289062,grad_norm: 0.845236028862795, iteration: 204937
loss: 1.0340831279754639,grad_norm: 0.9999992493704962, iteration: 204938
loss: 1.0547206401824951,grad_norm: 0.9035275999810246, iteration: 204939
loss: 1.0024402141571045,grad_norm: 0.9563765342253424, iteration: 204940
loss: 0.9968990087509155,grad_norm: 0.9999991515529281, iteration: 204941
loss: 1.0143076181411743,grad_norm: 0.7958302829070664, iteration: 204942
loss: 0.9753835797309875,grad_norm: 0.9633542674205958, iteration: 204943
loss: 1.0238544940948486,grad_norm: 0.9999993445185563, iteration: 204944
loss: 0.9784913063049316,grad_norm: 0.9015543896477842, iteration: 204945
loss: 0.9590341448783875,grad_norm: 0.9127080462554636, iteration: 204946
loss: 0.9959814548492432,grad_norm: 0.9999991535731838, iteration: 204947
loss: 0.9870159029960632,grad_norm: 0.7940085698344064, iteration: 204948
loss: 1.0209115743637085,grad_norm: 0.9999990205537098, iteration: 204949
loss: 1.0730072259902954,grad_norm: 0.9999996284262724, iteration: 204950
loss: 1.0370465517044067,grad_norm: 0.9999991426324276, iteration: 204951
loss: 0.9738578200340271,grad_norm: 0.8722971623516005, iteration: 204952
loss: 1.0399340391159058,grad_norm: 0.9999991418473522, iteration: 204953
loss: 1.000694990158081,grad_norm: 0.9999997447309957, iteration: 204954
loss: 0.9818564057350159,grad_norm: 0.8708294856733411, iteration: 204955
loss: 0.9951975345611572,grad_norm: 0.8775163201471013, iteration: 204956
loss: 1.0404338836669922,grad_norm: 0.9172553106246049, iteration: 204957
loss: 0.9969342350959778,grad_norm: 0.9999990543732667, iteration: 204958
loss: 1.0250815153121948,grad_norm: 0.9049635188897794, iteration: 204959
loss: 0.9943567514419556,grad_norm: 0.9463832483467269, iteration: 204960
loss: 0.9842879772186279,grad_norm: 0.9156438189644663, iteration: 204961
loss: 0.9926919937133789,grad_norm: 0.8266144819351514, iteration: 204962
loss: 0.9899623394012451,grad_norm: 0.8437807954354124, iteration: 204963
loss: 1.0051370859146118,grad_norm: 0.9999990577190964, iteration: 204964
loss: 0.9618710875511169,grad_norm: 0.7240071269452112, iteration: 204965
loss: 0.9964191913604736,grad_norm: 0.9005951617359945, iteration: 204966
loss: 0.9774847030639648,grad_norm: 0.8542914310771145, iteration: 204967
loss: 1.0115700960159302,grad_norm: 0.9999991624863388, iteration: 204968
loss: 0.995876133441925,grad_norm: 0.9265125364028473, iteration: 204969
loss: 1.038262128829956,grad_norm: 0.9647506867254496, iteration: 204970
loss: 1.0384559631347656,grad_norm: 0.9999991786584695, iteration: 204971
loss: 0.9900467395782471,grad_norm: 0.964360087096875, iteration: 204972
loss: 0.9800618886947632,grad_norm: 0.9999991296339512, iteration: 204973
loss: 1.0402147769927979,grad_norm: 0.9999992386036456, iteration: 204974
loss: 1.0219146013259888,grad_norm: 0.9999992995413175, iteration: 204975
loss: 1.0034363269805908,grad_norm: 0.9598621176889132, iteration: 204976
loss: 1.0051201581954956,grad_norm: 0.9729422626016839, iteration: 204977
loss: 0.9924436211585999,grad_norm: 0.8821220979342851, iteration: 204978
loss: 1.0027421712875366,grad_norm: 0.9999991330743545, iteration: 204979
loss: 0.9837051033973694,grad_norm: 0.8699444012214148, iteration: 204980
loss: 0.973697304725647,grad_norm: 0.9999992239692846, iteration: 204981
loss: 1.029034972190857,grad_norm: 0.9999998600760311, iteration: 204982
loss: 1.003025770187378,grad_norm: 0.9515558970651644, iteration: 204983
loss: 0.9790487289428711,grad_norm: 0.8542977220890376, iteration: 204984
loss: 1.021276831626892,grad_norm: 0.8580340558932422, iteration: 204985
loss: 0.9788615703582764,grad_norm: 0.9330375307325329, iteration: 204986
loss: 1.0165448188781738,grad_norm: 0.9999991255010131, iteration: 204987
loss: 1.0303229093551636,grad_norm: 0.8357286192962476, iteration: 204988
loss: 0.9929267168045044,grad_norm: 0.844814398154754, iteration: 204989
loss: 1.0232617855072021,grad_norm: 0.8606949870078254, iteration: 204990
loss: 1.0335451364517212,grad_norm: 0.9999999671365517, iteration: 204991
loss: 1.0011675357818604,grad_norm: 0.931804704620598, iteration: 204992
loss: 0.9933673739433289,grad_norm: 0.999998952687728, iteration: 204993
loss: 1.0497066974639893,grad_norm: 0.975684370363008, iteration: 204994
loss: 0.9799240231513977,grad_norm: 0.8084350048154607, iteration: 204995
loss: 0.9890457391738892,grad_norm: 0.9797447017224269, iteration: 204996
loss: 0.9977971315383911,grad_norm: 0.8469907876127659, iteration: 204997
loss: 1.0469763278961182,grad_norm: 0.9487128467144916, iteration: 204998
loss: 0.9883227944374084,grad_norm: 0.8732831299797339, iteration: 204999
loss: 1.006847858428955,grad_norm: 0.950638613172721, iteration: 205000
loss: 1.0185235738754272,grad_norm: 0.8452571025675576, iteration: 205001
loss: 0.9914165139198303,grad_norm: 0.86615363609086, iteration: 205002
loss: 1.0242456197738647,grad_norm: 0.8412389239144268, iteration: 205003
loss: 0.9870787262916565,grad_norm: 0.9810128649236233, iteration: 205004
loss: 0.9934424757957458,grad_norm: 0.9999991883902879, iteration: 205005
loss: 0.9870523810386658,grad_norm: 0.9999991728686185, iteration: 205006
loss: 0.9697248935699463,grad_norm: 0.945818409191038, iteration: 205007
loss: 1.0191160440444946,grad_norm: 0.8914290117659855, iteration: 205008
loss: 1.0160975456237793,grad_norm: 0.9253761525314808, iteration: 205009
loss: 1.0025960206985474,grad_norm: 0.8271263218505601, iteration: 205010
loss: 1.0030570030212402,grad_norm: 0.9373437427664079, iteration: 205011
loss: 1.0171257257461548,grad_norm: 0.9999991835052977, iteration: 205012
loss: 1.0078561305999756,grad_norm: 0.9999990748847468, iteration: 205013
loss: 1.0126123428344727,grad_norm: 0.8324321033029473, iteration: 205014
loss: 1.0120580196380615,grad_norm: 0.8947023064020205, iteration: 205015
loss: 0.9852147102355957,grad_norm: 0.9999992058611672, iteration: 205016
loss: 1.0100085735321045,grad_norm: 0.9999991107275816, iteration: 205017
loss: 0.9752528667449951,grad_norm: 0.9999991639134364, iteration: 205018
loss: 0.9953514337539673,grad_norm: 0.9999991733227035, iteration: 205019
loss: 0.9930523037910461,grad_norm: 0.9999992260848256, iteration: 205020
loss: 1.0617722272872925,grad_norm: 0.9999998994011609, iteration: 205021
loss: 0.9715076088905334,grad_norm: 0.8392668648024967, iteration: 205022
loss: 1.0059159994125366,grad_norm: 0.9999994511348944, iteration: 205023
loss: 0.9781140685081482,grad_norm: 0.9948140988422898, iteration: 205024
loss: 1.0115082263946533,grad_norm: 0.8313483648051454, iteration: 205025
loss: 0.9860530495643616,grad_norm: 0.9999992745971521, iteration: 205026
loss: 1.0121034383773804,grad_norm: 0.9999996000960293, iteration: 205027
loss: 0.988108217716217,grad_norm: 0.8909576241615627, iteration: 205028
loss: 0.9836270809173584,grad_norm: 0.9999989854492256, iteration: 205029
loss: 0.9916335940361023,grad_norm: 0.9554927559348924, iteration: 205030
loss: 1.0478789806365967,grad_norm: 0.9999993278777184, iteration: 205031
loss: 0.9519686698913574,grad_norm: 0.9999990461800377, iteration: 205032
loss: 0.9845551252365112,grad_norm: 0.9426948464558381, iteration: 205033
loss: 0.9975371956825256,grad_norm: 0.8336289797193914, iteration: 205034
loss: 0.9538423418998718,grad_norm: 0.8966321870908881, iteration: 205035
loss: 1.005719542503357,grad_norm: 0.912978003150764, iteration: 205036
loss: 1.0063964128494263,grad_norm: 0.9999992405397343, iteration: 205037
loss: 0.9803939461708069,grad_norm: 0.9999990711460556, iteration: 205038
loss: 1.0490753650665283,grad_norm: 0.9999989946185281, iteration: 205039
loss: 0.9905189275741577,grad_norm: 0.9485711680740515, iteration: 205040
loss: 0.9967753887176514,grad_norm: 0.987177112676658, iteration: 205041
loss: 0.9802236557006836,grad_norm: 0.9999992020389012, iteration: 205042
loss: 0.9858834743499756,grad_norm: 0.9372089374938357, iteration: 205043
loss: 0.9845262765884399,grad_norm: 0.8553370838095681, iteration: 205044
loss: 1.0241684913635254,grad_norm: 0.9640487889082249, iteration: 205045
loss: 0.9982196092605591,grad_norm: 0.8906275902297986, iteration: 205046
loss: 0.9690671563148499,grad_norm: 0.8694691184294282, iteration: 205047
loss: 1.1064274311065674,grad_norm: 0.9999995858365026, iteration: 205048
loss: 0.9795939922332764,grad_norm: 0.9098853943804671, iteration: 205049
loss: 0.99164879322052,grad_norm: 0.999999012151704, iteration: 205050
loss: 0.9911039471626282,grad_norm: 0.999999008808474, iteration: 205051
loss: 1.0161155462265015,grad_norm: 0.9999991514785164, iteration: 205052
loss: 0.9931815266609192,grad_norm: 0.9999989064846158, iteration: 205053
loss: 1.0043461322784424,grad_norm: 0.8788924497853684, iteration: 205054
loss: 1.0090935230255127,grad_norm: 0.8939297999206787, iteration: 205055
loss: 1.0150760412216187,grad_norm: 0.9999992813048358, iteration: 205056
loss: 0.9992009401321411,grad_norm: 0.8063372145174101, iteration: 205057
loss: 1.0379828214645386,grad_norm: 0.9999991509524127, iteration: 205058
loss: 1.0212903022766113,grad_norm: 0.9282586633713726, iteration: 205059
loss: 1.031224012374878,grad_norm: 0.9999991450806202, iteration: 205060
loss: 0.9692387580871582,grad_norm: 0.8874940230623682, iteration: 205061
loss: 1.0441670417785645,grad_norm: 0.908530274930403, iteration: 205062
loss: 1.0101161003112793,grad_norm: 0.9999997404580739, iteration: 205063
loss: 0.9980776309967041,grad_norm: 0.9999991304107478, iteration: 205064
loss: 1.0088425874710083,grad_norm: 0.9999991218488025, iteration: 205065
loss: 1.0044044256210327,grad_norm: 0.999999166010146, iteration: 205066
loss: 1.015031099319458,grad_norm: 0.9999995427079623, iteration: 205067
loss: 0.9859279990196228,grad_norm: 0.9313377173109237, iteration: 205068
loss: 1.0342977046966553,grad_norm: 0.9999991355277202, iteration: 205069
loss: 1.0149004459381104,grad_norm: 0.8855379492173864, iteration: 205070
loss: 1.002363681793213,grad_norm: 0.7519286274356436, iteration: 205071
loss: 0.9606310725212097,grad_norm: 0.798181000116175, iteration: 205072
loss: 1.0517842769622803,grad_norm: 0.9999998084174392, iteration: 205073
loss: 1.0011402368545532,grad_norm: 0.9546623330935963, iteration: 205074
loss: 0.9914782047271729,grad_norm: 0.884561848639807, iteration: 205075
loss: 1.004629135131836,grad_norm: 0.9255370796876854, iteration: 205076
loss: 0.9699652194976807,grad_norm: 0.9391242388032499, iteration: 205077
loss: 1.0641672611236572,grad_norm: 0.9999999159585277, iteration: 205078
loss: 1.001281976699829,grad_norm: 0.9999993330039076, iteration: 205079
loss: 1.0066368579864502,grad_norm: 0.851807400810129, iteration: 205080
loss: 1.1217771768569946,grad_norm: 0.9999994103021022, iteration: 205081
loss: 0.9964540600776672,grad_norm: 0.8787792447626479, iteration: 205082
loss: 1.0055887699127197,grad_norm: 0.9550185619239697, iteration: 205083
loss: 1.0063632726669312,grad_norm: 0.9309819914430246, iteration: 205084
loss: 0.9773266315460205,grad_norm: 0.8727722612767119, iteration: 205085
loss: 0.9434977173805237,grad_norm: 0.9458491138031876, iteration: 205086
loss: 0.9762266278266907,grad_norm: 0.8592504681669306, iteration: 205087
loss: 1.024844765663147,grad_norm: 0.9999990505943577, iteration: 205088
loss: 0.9515357613563538,grad_norm: 0.9999991773208533, iteration: 205089
loss: 0.9937438368797302,grad_norm: 0.854880413699972, iteration: 205090
loss: 0.9708545207977295,grad_norm: 0.850685252498612, iteration: 205091
loss: 1.0629992485046387,grad_norm: 0.9999992062017368, iteration: 205092
loss: 1.0361188650131226,grad_norm: 0.8377903176465367, iteration: 205093
loss: 1.0170986652374268,grad_norm: 0.9619593888890905, iteration: 205094
loss: 1.0065611600875854,grad_norm: 0.9988583719426519, iteration: 205095
loss: 0.9813803434371948,grad_norm: 0.838267566502049, iteration: 205096
loss: 1.031794548034668,grad_norm: 0.9049371056642513, iteration: 205097
loss: 1.0055791139602661,grad_norm: 0.9999992009236163, iteration: 205098
loss: 0.9971489310264587,grad_norm: 0.9999990534274403, iteration: 205099
loss: 0.9497177004814148,grad_norm: 0.9073864833298173, iteration: 205100
loss: 0.9690606594085693,grad_norm: 0.9999992052194341, iteration: 205101
loss: 0.9906448721885681,grad_norm: 0.9999993753623617, iteration: 205102
loss: 1.003959059715271,grad_norm: 0.8904367224761635, iteration: 205103
loss: 1.0211998224258423,grad_norm: 0.999999232519035, iteration: 205104
loss: 1.0307669639587402,grad_norm: 0.9999990992537353, iteration: 205105
loss: 1.096828818321228,grad_norm: 0.9999999489242676, iteration: 205106
loss: 1.199968934059143,grad_norm: 0.9999992901692873, iteration: 205107
loss: 1.0297660827636719,grad_norm: 0.9999992503104207, iteration: 205108
loss: 1.0424742698669434,grad_norm: 0.9999992016158197, iteration: 205109
loss: 0.9926449656486511,grad_norm: 0.9169842813542127, iteration: 205110
loss: 1.0376067161560059,grad_norm: 0.9334040421039284, iteration: 205111
loss: 0.978948175907135,grad_norm: 0.8788929989984543, iteration: 205112
loss: 1.0267503261566162,grad_norm: 0.9999992201081456, iteration: 205113
loss: 1.0460134744644165,grad_norm: 0.9525071448181716, iteration: 205114
loss: 1.009860634803772,grad_norm: 0.9143857215362923, iteration: 205115
loss: 1.0292214155197144,grad_norm: 0.7784331128282534, iteration: 205116
loss: 0.9719060659408569,grad_norm: 0.9287808273430809, iteration: 205117
loss: 1.0191575288772583,grad_norm: 0.9999990944914137, iteration: 205118
loss: 1.003460168838501,grad_norm: 0.9999990727968278, iteration: 205119
loss: 0.9635382294654846,grad_norm: 0.9999991633697778, iteration: 205120
loss: 1.1445928812026978,grad_norm: 0.9999992295700156, iteration: 205121
loss: 1.0308154821395874,grad_norm: 0.906117964451468, iteration: 205122
loss: 0.9941158294677734,grad_norm: 0.9999989868994186, iteration: 205123
loss: 1.024795413017273,grad_norm: 0.8704422180206551, iteration: 205124
loss: 1.0170308351516724,grad_norm: 0.8583985490835678, iteration: 205125
loss: 1.0075470209121704,grad_norm: 0.9125737567602628, iteration: 205126
loss: 0.9729393124580383,grad_norm: 0.9268022930604336, iteration: 205127
loss: 1.0058351755142212,grad_norm: 0.9731845974030637, iteration: 205128
loss: 1.0463143587112427,grad_norm: 0.8553024877318306, iteration: 205129
loss: 0.9779113531112671,grad_norm: 0.9807164945923272, iteration: 205130
loss: 0.9463540315628052,grad_norm: 0.9438151551060702, iteration: 205131
loss: 1.0327306985855103,grad_norm: 0.9999995879979947, iteration: 205132
loss: 0.988945484161377,grad_norm: 0.7901442936570053, iteration: 205133
loss: 1.009812355041504,grad_norm: 0.8781846645885371, iteration: 205134
loss: 0.9854934215545654,grad_norm: 0.8814762343738299, iteration: 205135
loss: 1.0416284799575806,grad_norm: 0.9999992440433059, iteration: 205136
loss: 0.9767557382583618,grad_norm: 0.9999994060199471, iteration: 205137
loss: 0.9805178046226501,grad_norm: 0.9209608648462085, iteration: 205138
loss: 1.0107303857803345,grad_norm: 0.7993109218285194, iteration: 205139
loss: 1.0246965885162354,grad_norm: 0.8985319919975243, iteration: 205140
loss: 0.9840781092643738,grad_norm: 0.9724222041343972, iteration: 205141
loss: 1.0066440105438232,grad_norm: 0.9999993119334225, iteration: 205142
loss: 0.9968730211257935,grad_norm: 0.8532272195422406, iteration: 205143
loss: 0.9871050119400024,grad_norm: 0.930096187401457, iteration: 205144
loss: 0.9760318398475647,grad_norm: 0.9948524269175565, iteration: 205145
loss: 1.0196146965026855,grad_norm: 0.9614384425042208, iteration: 205146
loss: 0.9904242753982544,grad_norm: 0.9999990739966496, iteration: 205147
loss: 1.0082305669784546,grad_norm: 0.8463162934477284, iteration: 205148
loss: 0.9694495797157288,grad_norm: 0.9999991154409722, iteration: 205149
loss: 0.9743191003799438,grad_norm: 0.9294802247852703, iteration: 205150
loss: 1.0156646966934204,grad_norm: 0.939653784784758, iteration: 205151
loss: 1.0669559240341187,grad_norm: 0.9999997926470349, iteration: 205152
loss: 0.9868717193603516,grad_norm: 0.9832279194359842, iteration: 205153
loss: 1.0345250368118286,grad_norm: 0.9999993164758499, iteration: 205154
loss: 0.9707643389701843,grad_norm: 0.9338940661682885, iteration: 205155
loss: 0.9922557473182678,grad_norm: 0.9999992842438485, iteration: 205156
loss: 1.0156760215759277,grad_norm: 0.9999997869630745, iteration: 205157
loss: 1.01615571975708,grad_norm: 0.9936423833532722, iteration: 205158
loss: 0.999776303768158,grad_norm: 0.9999992318715527, iteration: 205159
loss: 0.9972890019416809,grad_norm: 0.9709922588662689, iteration: 205160
loss: 1.013710618019104,grad_norm: 0.9356940490112128, iteration: 205161
loss: 0.9705711603164673,grad_norm: 0.9385442772830176, iteration: 205162
loss: 1.0231237411499023,grad_norm: 0.8273632613876419, iteration: 205163
loss: 0.9747401475906372,grad_norm: 0.925875551466779, iteration: 205164
loss: 1.0352909564971924,grad_norm: 0.999998997170121, iteration: 205165
loss: 0.9888933300971985,grad_norm: 0.8948320349736519, iteration: 205166
loss: 1.0010607242584229,grad_norm: 0.9710318267669514, iteration: 205167
loss: 0.9959608316421509,grad_norm: 0.9237660589215773, iteration: 205168
loss: 1.0121350288391113,grad_norm: 0.9999989364061496, iteration: 205169
loss: 0.9837679862976074,grad_norm: 0.8168594349490719, iteration: 205170
loss: 1.0044881105422974,grad_norm: 0.999999843610315, iteration: 205171
loss: 1.0109971761703491,grad_norm: 0.8957622952642923, iteration: 205172
loss: 0.9850368499755859,grad_norm: 0.9999994801466993, iteration: 205173
loss: 1.0465706586837769,grad_norm: 0.9999995628759862, iteration: 205174
loss: 1.031325101852417,grad_norm: 0.9853618639407953, iteration: 205175
loss: 0.9919018745422363,grad_norm: 0.999999026644943, iteration: 205176
loss: 1.0198856592178345,grad_norm: 0.9999991199963754, iteration: 205177
loss: 0.9794104099273682,grad_norm: 0.9729820658295059, iteration: 205178
loss: 1.0229709148406982,grad_norm: 0.9999990789541439, iteration: 205179
loss: 1.0147536993026733,grad_norm: 0.9702868972954474, iteration: 205180
loss: 0.999852180480957,grad_norm: 0.9999990379111838, iteration: 205181
loss: 1.0165785551071167,grad_norm: 0.8101602494622513, iteration: 205182
loss: 0.9888965487480164,grad_norm: 0.873014700651017, iteration: 205183
loss: 1.0267603397369385,grad_norm: 0.9489121650029034, iteration: 205184
loss: 0.9987984299659729,grad_norm: 0.8224805149947206, iteration: 205185
loss: 0.9851593375205994,grad_norm: 0.8799007568579086, iteration: 205186
loss: 0.9736995100975037,grad_norm: 0.9904828400504436, iteration: 205187
loss: 1.0016571283340454,grad_norm: 0.8427778282849742, iteration: 205188
loss: 1.0301146507263184,grad_norm: 0.9999989716510324, iteration: 205189
loss: 1.1246957778930664,grad_norm: 0.9999991732706107, iteration: 205190
loss: 1.011618971824646,grad_norm: 0.8855516461135117, iteration: 205191
loss: 0.9855983257293701,grad_norm: 0.9778549637731933, iteration: 205192
loss: 1.0247621536254883,grad_norm: 0.9406765621524343, iteration: 205193
loss: 0.9452991485595703,grad_norm: 0.8283124393864149, iteration: 205194
loss: 0.9507219195365906,grad_norm: 0.9771028844165297, iteration: 205195
loss: 1.0191164016723633,grad_norm: 0.9667966562224145, iteration: 205196
loss: 1.0285922288894653,grad_norm: 0.9135674401994516, iteration: 205197
loss: 1.0203579664230347,grad_norm: 0.9541833364868861, iteration: 205198
loss: 0.9736201167106628,grad_norm: 0.8780702728171884, iteration: 205199
loss: 1.0042600631713867,grad_norm: 0.9428691959120248, iteration: 205200
loss: 0.9759567379951477,grad_norm: 0.9999991112068177, iteration: 205201
loss: 0.9847445487976074,grad_norm: 0.9230224183744072, iteration: 205202
loss: 1.0274749994277954,grad_norm: 0.9999991978910115, iteration: 205203
loss: 1.0102530717849731,grad_norm: 0.9999995638896266, iteration: 205204
loss: 0.9861780405044556,grad_norm: 0.899290581250973, iteration: 205205
loss: 0.983008086681366,grad_norm: 0.9482842523185073, iteration: 205206
loss: 0.9898303151130676,grad_norm: 0.9711008493244461, iteration: 205207
loss: 1.0234501361846924,grad_norm: 0.9555203061511138, iteration: 205208
loss: 0.9748595952987671,grad_norm: 0.8729734482280678, iteration: 205209
loss: 0.9611793756484985,grad_norm: 0.9999990116670144, iteration: 205210
loss: 0.9938014149665833,grad_norm: 0.8938525051717343, iteration: 205211
loss: 0.9781454801559448,grad_norm: 0.9254769310758253, iteration: 205212
loss: 1.018455147743225,grad_norm: 0.9999991770804889, iteration: 205213
loss: 0.9716549515724182,grad_norm: 0.9870783484094265, iteration: 205214
loss: 1.0115554332733154,grad_norm: 0.9361492299590016, iteration: 205215
loss: 1.028723955154419,grad_norm: 0.9750263839089323, iteration: 205216
loss: 1.010422706604004,grad_norm: 0.9999990788381646, iteration: 205217
loss: 1.008713722229004,grad_norm: 0.8892556302721172, iteration: 205218
loss: 0.9963343143463135,grad_norm: 0.9342543170019343, iteration: 205219
loss: 0.9693196415901184,grad_norm: 0.8874738276374117, iteration: 205220
loss: 0.9822583198547363,grad_norm: 0.8136109842138703, iteration: 205221
loss: 1.0332238674163818,grad_norm: 0.934796042703304, iteration: 205222
loss: 0.972863495349884,grad_norm: 0.9988860053670833, iteration: 205223
loss: 1.0101197957992554,grad_norm: 0.9999991266434207, iteration: 205224
loss: 0.995273232460022,grad_norm: 0.9865248420700136, iteration: 205225
loss: 1.0518901348114014,grad_norm: 0.9999992622327649, iteration: 205226
loss: 1.0336558818817139,grad_norm: 0.8636252038777157, iteration: 205227
loss: 1.0008610486984253,grad_norm: 0.9146326171217429, iteration: 205228
loss: 1.0138115882873535,grad_norm: 0.9924240778276012, iteration: 205229
loss: 0.9760012626647949,grad_norm: 0.8463588956958307, iteration: 205230
loss: 1.018010139465332,grad_norm: 0.8530993733368727, iteration: 205231
loss: 1.081130862236023,grad_norm: 0.999999186351394, iteration: 205232
loss: 1.0425995588302612,grad_norm: 0.877164688528693, iteration: 205233
loss: 1.0130616426467896,grad_norm: 0.9999992269254281, iteration: 205234
loss: 0.9915708303451538,grad_norm: 0.8548352009884563, iteration: 205235
loss: 1.0044102668762207,grad_norm: 0.999998966381966, iteration: 205236
loss: 0.9951725006103516,grad_norm: 0.9999989168324208, iteration: 205237
loss: 1.0905872583389282,grad_norm: 0.9999998271452495, iteration: 205238
loss: 0.9944906830787659,grad_norm: 0.9999990291553741, iteration: 205239
loss: 1.0127452611923218,grad_norm: 0.9999993373459521, iteration: 205240
loss: 0.9993404150009155,grad_norm: 0.9999990737929089, iteration: 205241
loss: 0.9989932179450989,grad_norm: 0.9999991331505589, iteration: 205242
loss: 1.0796343088150024,grad_norm: 0.9999998193768609, iteration: 205243
loss: 1.0157077312469482,grad_norm: 0.9999991587617394, iteration: 205244
loss: 0.983618438243866,grad_norm: 0.8349585062979786, iteration: 205245
loss: 0.9561476707458496,grad_norm: 0.9999990970486484, iteration: 205246
loss: 1.0622608661651611,grad_norm: 0.9999999110807215, iteration: 205247
loss: 1.0277717113494873,grad_norm: 0.9999992190999798, iteration: 205248
loss: 1.1802170276641846,grad_norm: 1.0000000420421868, iteration: 205249
loss: 1.1967209577560425,grad_norm: 0.9999992369986115, iteration: 205250
loss: 1.0702592134475708,grad_norm: 0.999999166435531, iteration: 205251
loss: 1.0174834728240967,grad_norm: 0.9075842460150109, iteration: 205252
loss: 0.9734788537025452,grad_norm: 0.8205350736633577, iteration: 205253
loss: 0.9923466444015503,grad_norm: 0.8864948956300638, iteration: 205254
loss: 1.0716460943222046,grad_norm: 0.9999993207815139, iteration: 205255
loss: 0.9812209606170654,grad_norm: 0.9999998926496289, iteration: 205256
loss: 0.9877740740776062,grad_norm: 0.8991204546552921, iteration: 205257
loss: 1.0124897956848145,grad_norm: 0.862222885141685, iteration: 205258
loss: 0.9970786571502686,grad_norm: 0.9152269823583336, iteration: 205259
loss: 0.9999558329582214,grad_norm: 0.8073837256657209, iteration: 205260
loss: 0.9867770075798035,grad_norm: 0.8397487974449088, iteration: 205261
loss: 0.9896830916404724,grad_norm: 0.9567096987337037, iteration: 205262
loss: 0.9306009411811829,grad_norm: 0.9773388693038205, iteration: 205263
loss: 0.9959709644317627,grad_norm: 0.968147244054813, iteration: 205264
loss: 1.0057660341262817,grad_norm: 0.9999990905294982, iteration: 205265
loss: 0.9629703760147095,grad_norm: 0.840822750178919, iteration: 205266
loss: 1.0461212396621704,grad_norm: 0.9014079608900551, iteration: 205267
loss: 0.9516577124595642,grad_norm: 0.9999992043797494, iteration: 205268
loss: 1.0028295516967773,grad_norm: 0.8133128067608887, iteration: 205269
loss: 1.0305272340774536,grad_norm: 0.8414455086097888, iteration: 205270
loss: 0.9658894538879395,grad_norm: 0.913390592526898, iteration: 205271
loss: 1.021443486213684,grad_norm: 0.8232936678562308, iteration: 205272
loss: 0.9770675897598267,grad_norm: 0.9999989174888004, iteration: 205273
loss: 0.9781531691551208,grad_norm: 0.8256774442844546, iteration: 205274
loss: 1.0004911422729492,grad_norm: 0.7068273863145208, iteration: 205275
loss: 0.952009379863739,grad_norm: 0.977149131257386, iteration: 205276
loss: 0.9914528131484985,grad_norm: 0.9999996519930151, iteration: 205277
loss: 0.9801921844482422,grad_norm: 0.9999992040452883, iteration: 205278
loss: 0.9816519618034363,grad_norm: 0.9999992276045764, iteration: 205279
loss: 0.962394118309021,grad_norm: 0.9113828555041579, iteration: 205280
loss: 1.0023847818374634,grad_norm: 0.999999258161954, iteration: 205281
loss: 1.005773901939392,grad_norm: 0.9999993984546743, iteration: 205282
loss: 0.9815092086791992,grad_norm: 0.8465365244478052, iteration: 205283
loss: 1.0211409330368042,grad_norm: 0.9999990540961278, iteration: 205284
loss: 0.9912876486778259,grad_norm: 0.9013822535218521, iteration: 205285
loss: 0.9943499565124512,grad_norm: 0.8966684956297142, iteration: 205286
loss: 1.0079230070114136,grad_norm: 0.815030404162332, iteration: 205287
loss: 1.0112143754959106,grad_norm: 0.999999115746389, iteration: 205288
loss: 0.9975393414497375,grad_norm: 0.8576340061458397, iteration: 205289
loss: 1.018248200416565,grad_norm: 0.8118871270472771, iteration: 205290
loss: 0.975543737411499,grad_norm: 0.8321890701420263, iteration: 205291
loss: 1.007260799407959,grad_norm: 0.9196238417675509, iteration: 205292
loss: 1.0158275365829468,grad_norm: 0.9480130713400449, iteration: 205293
loss: 0.9953776001930237,grad_norm: 0.9999991594525185, iteration: 205294
loss: 1.0319297313690186,grad_norm: 0.9713986895887596, iteration: 205295
loss: 1.0095878839492798,grad_norm: 0.809117932408554, iteration: 205296
loss: 1.01106858253479,grad_norm: 0.9131675876311619, iteration: 205297
loss: 0.9761308431625366,grad_norm: 0.8190965750227974, iteration: 205298
loss: 0.9655122756958008,grad_norm: 0.9999990441855052, iteration: 205299
loss: 0.9993396997451782,grad_norm: 0.9516859447127399, iteration: 205300
loss: 1.0110136270523071,grad_norm: 0.9999989435238306, iteration: 205301
loss: 1.0123834609985352,grad_norm: 0.9006652275452878, iteration: 205302
loss: 1.003290057182312,grad_norm: 0.9054067524919515, iteration: 205303
loss: 0.9994542002677917,grad_norm: 0.9999990932977839, iteration: 205304
loss: 0.9983816146850586,grad_norm: 0.8710934072939845, iteration: 205305
loss: 0.9953170418739319,grad_norm: 0.9411148898740223, iteration: 205306
loss: 0.9930142760276794,grad_norm: 0.7695619113635976, iteration: 205307
loss: 0.9991026520729065,grad_norm: 0.8809949776573072, iteration: 205308
loss: 1.016717553138733,grad_norm: 0.8854357175555184, iteration: 205309
loss: 0.9933159947395325,grad_norm: 0.974422921178616, iteration: 205310
loss: 1.0276085138320923,grad_norm: 0.882611067677327, iteration: 205311
loss: 0.9789374470710754,grad_norm: 0.9999989947627673, iteration: 205312
loss: 0.9995000958442688,grad_norm: 0.9435958878526286, iteration: 205313
loss: 1.011060357093811,grad_norm: 0.9999991626645311, iteration: 205314
loss: 0.9324267506599426,grad_norm: 0.8939112326558957, iteration: 205315
loss: 0.9852578639984131,grad_norm: 0.9725780143644901, iteration: 205316
loss: 0.9674122929573059,grad_norm: 0.8511871210074413, iteration: 205317
loss: 0.9446446299552917,grad_norm: 0.9096276008427933, iteration: 205318
loss: 1.0000594854354858,grad_norm: 0.9999991207974159, iteration: 205319
loss: 0.9906030297279358,grad_norm: 0.9999991708851308, iteration: 205320
loss: 1.0060268640518188,grad_norm: 0.9999992369689773, iteration: 205321
loss: 1.007175326347351,grad_norm: 0.9999991249995852, iteration: 205322
loss: 1.0177257061004639,grad_norm: 0.915976867601493, iteration: 205323
loss: 1.0049018859863281,grad_norm: 0.7532631890429812, iteration: 205324
loss: 0.9550440907478333,grad_norm: 0.8794926242159139, iteration: 205325
loss: 0.9909495711326599,grad_norm: 0.9999991622972987, iteration: 205326
loss: 0.9641799926757812,grad_norm: 0.9999989956644414, iteration: 205327
loss: 0.99653160572052,grad_norm: 0.7755946326980115, iteration: 205328
loss: 0.9994007349014282,grad_norm: 0.9999991850020826, iteration: 205329
loss: 1.0423009395599365,grad_norm: 0.9996473858627107, iteration: 205330
loss: 0.9966862201690674,grad_norm: 0.9751349306873919, iteration: 205331
loss: 1.0317126512527466,grad_norm: 0.943051746108307, iteration: 205332
loss: 0.9896683096885681,grad_norm: 0.9999990728953072, iteration: 205333
loss: 1.0013233423233032,grad_norm: 0.9297318790605336, iteration: 205334
loss: 0.9974595904350281,grad_norm: 0.9097283891074147, iteration: 205335
loss: 1.002370834350586,grad_norm: 0.9999990532347862, iteration: 205336
loss: 1.0715738534927368,grad_norm: 0.9999993112158527, iteration: 205337
loss: 1.0174192190170288,grad_norm: 0.865590135371923, iteration: 205338
loss: 0.9672313928604126,grad_norm: 0.9844932213465247, iteration: 205339
loss: 1.0123517513275146,grad_norm: 0.9195189200447896, iteration: 205340
loss: 0.9558017253875732,grad_norm: 0.9271990803610766, iteration: 205341
loss: 0.9628086090087891,grad_norm: 0.8285051918899891, iteration: 205342
loss: 1.0111178159713745,grad_norm: 0.760148070157549, iteration: 205343
loss: 0.9975855946540833,grad_norm: 0.8537733141295951, iteration: 205344
loss: 0.9699653387069702,grad_norm: 0.9063564005332039, iteration: 205345
loss: 1.0330830812454224,grad_norm: 0.9947360898852539, iteration: 205346
loss: 0.9989833235740662,grad_norm: 0.9800496166586441, iteration: 205347
loss: 0.9813875555992126,grad_norm: 0.7896464019933654, iteration: 205348
loss: 1.0176385641098022,grad_norm: 0.907013346481166, iteration: 205349
loss: 0.9749918580055237,grad_norm: 0.8731640751431097, iteration: 205350
loss: 0.9971331357955933,grad_norm: 0.9999992880344305, iteration: 205351
loss: 0.9591742753982544,grad_norm: 0.7660914580942273, iteration: 205352
loss: 1.0182912349700928,grad_norm: 0.9999996723012479, iteration: 205353
loss: 0.974682629108429,grad_norm: 0.8087667390357166, iteration: 205354
loss: 0.984699010848999,grad_norm: 0.7587135527683201, iteration: 205355
loss: 1.0059483051300049,grad_norm: 0.9508406755469874, iteration: 205356
loss: 1.0093684196472168,grad_norm: 0.8648120052525631, iteration: 205357
loss: 1.0346044301986694,grad_norm: 0.9999994695475437, iteration: 205358
loss: 0.9706418514251709,grad_norm: 0.9319822232593747, iteration: 205359
loss: 1.0194652080535889,grad_norm: 0.999999169165943, iteration: 205360
loss: 1.020448088645935,grad_norm: 0.8780136691661059, iteration: 205361
loss: 0.9794750213623047,grad_norm: 0.8994052365671948, iteration: 205362
loss: 0.9892587661743164,grad_norm: 0.8124237374195951, iteration: 205363
loss: 0.986030101776123,grad_norm: 0.8016464119242954, iteration: 205364
loss: 0.9877475500106812,grad_norm: 0.9999990725088428, iteration: 205365
loss: 1.006869912147522,grad_norm: 0.9999990320683374, iteration: 205366
loss: 1.0109145641326904,grad_norm: 0.9999991858666303, iteration: 205367
loss: 0.9859797358512878,grad_norm: 0.8345520321155652, iteration: 205368
loss: 1.0212376117706299,grad_norm: 0.9999990524846405, iteration: 205369
loss: 1.011481761932373,grad_norm: 0.9900342924231563, iteration: 205370
loss: 1.0005065202713013,grad_norm: 0.9713698880188921, iteration: 205371
loss: 0.9788672924041748,grad_norm: 0.9734812938269074, iteration: 205372
loss: 1.0487483739852905,grad_norm: 0.7987322536217781, iteration: 205373
loss: 0.9867842197418213,grad_norm: 0.9396621690741802, iteration: 205374
loss: 0.963425874710083,grad_norm: 0.8304506866766935, iteration: 205375
loss: 1.0257835388183594,grad_norm: 0.9999989645939621, iteration: 205376
loss: 0.9721158742904663,grad_norm: 0.9999990316020368, iteration: 205377
loss: 1.0009592771530151,grad_norm: 0.9127837700726245, iteration: 205378
loss: 1.007751703262329,grad_norm: 0.9999990959788705, iteration: 205379
loss: 0.996422290802002,grad_norm: 0.9999998728269526, iteration: 205380
loss: 1.00871741771698,grad_norm: 0.9999990665576146, iteration: 205381
loss: 1.0318015813827515,grad_norm: 0.9711241155184805, iteration: 205382
loss: 1.0316190719604492,grad_norm: 0.9999991742029245, iteration: 205383
loss: 1.021213412284851,grad_norm: 0.9999990257892467, iteration: 205384
loss: 1.005048394203186,grad_norm: 0.9028657518454956, iteration: 205385
loss: 1.0058083534240723,grad_norm: 0.8551224540476082, iteration: 205386
loss: 1.0146369934082031,grad_norm: 0.9999994934176101, iteration: 205387
loss: 1.0279968976974487,grad_norm: 0.9389394488381952, iteration: 205388
loss: 0.9957331418991089,grad_norm: 0.9999992005647437, iteration: 205389
loss: 0.9768435955047607,grad_norm: 0.9677715251291388, iteration: 205390
loss: 0.9675940871238708,grad_norm: 0.985644264604724, iteration: 205391
loss: 1.030415415763855,grad_norm: 0.9999990418572594, iteration: 205392
loss: 1.008778691291809,grad_norm: 0.984925955358409, iteration: 205393
loss: 0.9839327335357666,grad_norm: 0.7719646374043884, iteration: 205394
loss: 0.9974384903907776,grad_norm: 0.8935231379234392, iteration: 205395
loss: 1.04697585105896,grad_norm: 0.8558751390286011, iteration: 205396
loss: 0.9913898706436157,grad_norm: 0.9999989221138855, iteration: 205397
loss: 0.9690143465995789,grad_norm: 0.9999991531552441, iteration: 205398
loss: 0.9803955554962158,grad_norm: 0.9999991305404249, iteration: 205399
loss: 1.0047264099121094,grad_norm: 0.9332369851524387, iteration: 205400
loss: 1.0138612985610962,grad_norm: 0.9999989529610397, iteration: 205401
loss: 0.9861588478088379,grad_norm: 0.8445788310201746, iteration: 205402
loss: 0.9890561103820801,grad_norm: 0.9999994733590089, iteration: 205403
loss: 1.0612057447433472,grad_norm: 0.9999991923425409, iteration: 205404
loss: 0.9901015162467957,grad_norm: 0.9999991743724359, iteration: 205405
loss: 1.0224279165267944,grad_norm: 0.9999991130629408, iteration: 205406
loss: 1.0124751329421997,grad_norm: 0.9430072122459181, iteration: 205407
loss: 1.0002031326293945,grad_norm: 0.999999212660404, iteration: 205408
loss: 1.0251868963241577,grad_norm: 0.8414217380094292, iteration: 205409
loss: 1.0337926149368286,grad_norm: 0.9372946202271631, iteration: 205410
loss: 0.9845842123031616,grad_norm: 0.8046107219347558, iteration: 205411
loss: 1.0105243921279907,grad_norm: 0.8021528105240128, iteration: 205412
loss: 1.0035825967788696,grad_norm: 0.8807753786454183, iteration: 205413
loss: 1.0280709266662598,grad_norm: 0.999998992660803, iteration: 205414
loss: 1.0251963138580322,grad_norm: 0.9591374390598638, iteration: 205415
loss: 0.9897370338439941,grad_norm: 0.99999908536561, iteration: 205416
loss: 0.9523605108261108,grad_norm: 0.9999991158645521, iteration: 205417
loss: 0.9913667440414429,grad_norm: 0.9254778610803032, iteration: 205418
loss: 1.1538276672363281,grad_norm: 0.9999993448540131, iteration: 205419
loss: 0.9918050765991211,grad_norm: 0.8196357069673025, iteration: 205420
loss: 0.9828056693077087,grad_norm: 0.9999990943515568, iteration: 205421
loss: 1.0371737480163574,grad_norm: 0.8951259042426492, iteration: 205422
loss: 1.006762146949768,grad_norm: 0.9807501517956199, iteration: 205423
loss: 0.9961742758750916,grad_norm: 0.9999989218537642, iteration: 205424
loss: 0.9654567241668701,grad_norm: 0.9999993049455242, iteration: 205425
loss: 1.033738136291504,grad_norm: 0.8787307625978346, iteration: 205426
loss: 1.004610538482666,grad_norm: 0.9999993138968319, iteration: 205427
loss: 1.0053722858428955,grad_norm: 0.9586129133649127, iteration: 205428
loss: 1.0065661668777466,grad_norm: 0.9782441396106759, iteration: 205429
loss: 1.0239619016647339,grad_norm: 0.9999996782015257, iteration: 205430
loss: 1.013001799583435,grad_norm: 0.7795627780879516, iteration: 205431
loss: 0.9881162047386169,grad_norm: 0.8732364629084142, iteration: 205432
loss: 1.0047569274902344,grad_norm: 0.8991180898879177, iteration: 205433
loss: 0.9808838367462158,grad_norm: 0.9999990189036474, iteration: 205434
loss: 1.0112860202789307,grad_norm: 0.8972850471640703, iteration: 205435
loss: 0.9507195949554443,grad_norm: 0.897614834395151, iteration: 205436
loss: 1.0147113800048828,grad_norm: 0.9247338185151607, iteration: 205437
loss: 1.0783541202545166,grad_norm: 0.9821191153441027, iteration: 205438
loss: 1.0120855569839478,grad_norm: 0.9932672167454855, iteration: 205439
loss: 0.9903793334960938,grad_norm: 0.9989517594635924, iteration: 205440
loss: 1.02295982837677,grad_norm: 0.8942579062205852, iteration: 205441
loss: 0.9837177395820618,grad_norm: 0.6630468687285175, iteration: 205442
loss: 0.9863599538803101,grad_norm: 0.8846441419546034, iteration: 205443
loss: 1.0140990018844604,grad_norm: 0.933487593643462, iteration: 205444
loss: 0.9758842587471008,grad_norm: 0.9999989910798525, iteration: 205445
loss: 1.016982913017273,grad_norm: 0.9999991558987523, iteration: 205446
loss: 0.9793516993522644,grad_norm: 0.9999990597886549, iteration: 205447
loss: 1.0206736326217651,grad_norm: 0.9999992215601474, iteration: 205448
loss: 1.0118820667266846,grad_norm: 0.9999990592596605, iteration: 205449
loss: 1.019827127456665,grad_norm: 0.9999991023716189, iteration: 205450
loss: 1.0152660608291626,grad_norm: 0.9999991021131597, iteration: 205451
loss: 1.0021334886550903,grad_norm: 0.8171696559264164, iteration: 205452
loss: 0.9784483909606934,grad_norm: 0.9999990420789505, iteration: 205453
loss: 1.0016038417816162,grad_norm: 0.9617778267047772, iteration: 205454
loss: 1.0405912399291992,grad_norm: 0.8508818970292488, iteration: 205455
loss: 1.0508465766906738,grad_norm: 0.9999993371138998, iteration: 205456
loss: 1.0352880954742432,grad_norm: 0.862351680489776, iteration: 205457
loss: 1.0203213691711426,grad_norm: 0.9999996399568871, iteration: 205458
loss: 1.0406852960586548,grad_norm: 0.9999989652669837, iteration: 205459
loss: 1.030029296875,grad_norm: 0.9999990795928771, iteration: 205460
loss: 1.0336573123931885,grad_norm: 0.9999990889533811, iteration: 205461
loss: 1.0073928833007812,grad_norm: 0.9999992402401277, iteration: 205462
loss: 0.9908083081245422,grad_norm: 0.9140954295431714, iteration: 205463
loss: 1.0234615802764893,grad_norm: 0.9999991024021481, iteration: 205464
loss: 0.978651762008667,grad_norm: 0.9999991491101917, iteration: 205465
loss: 1.002200722694397,grad_norm: 0.8126546294770601, iteration: 205466
loss: 0.9736971259117126,grad_norm: 0.9999990418285826, iteration: 205467
loss: 1.007920265197754,grad_norm: 0.999999183689423, iteration: 205468
loss: 1.011622428894043,grad_norm: 0.760485327313335, iteration: 205469
loss: 0.9899230599403381,grad_norm: 0.9999991694189063, iteration: 205470
loss: 0.9824044108390808,grad_norm: 0.9999991967325661, iteration: 205471
loss: 1.0148072242736816,grad_norm: 0.7691795341042036, iteration: 205472
loss: 0.9895800352096558,grad_norm: 0.9999991009173921, iteration: 205473
loss: 1.0235825777053833,grad_norm: 0.9999991196154742, iteration: 205474
loss: 1.0090267658233643,grad_norm: 0.8905926908197844, iteration: 205475
loss: 1.0067538022994995,grad_norm: 0.9006309290044313, iteration: 205476
loss: 1.0305997133255005,grad_norm: 0.9999991149275151, iteration: 205477
loss: 0.9823833703994751,grad_norm: 0.8256886827738236, iteration: 205478
loss: 1.039404034614563,grad_norm: 0.8008666504701296, iteration: 205479
loss: 1.0216898918151855,grad_norm: 0.9999991927266236, iteration: 205480
loss: 1.0248303413391113,grad_norm: 0.9999995633943412, iteration: 205481
loss: 1.025535225868225,grad_norm: 0.9326886736634099, iteration: 205482
loss: 1.0053457021713257,grad_norm: 0.7447134084481257, iteration: 205483
loss: 1.008284330368042,grad_norm: 0.9551455018237995, iteration: 205484
loss: 1.0231467485427856,grad_norm: 0.8020182609929578, iteration: 205485
loss: 0.9780302047729492,grad_norm: 0.9662462201987476, iteration: 205486
loss: 0.96442711353302,grad_norm: 0.9076159476044686, iteration: 205487
loss: 0.9894182682037354,grad_norm: 0.9999990878438875, iteration: 205488
loss: 0.9837473034858704,grad_norm: 0.9700446817884077, iteration: 205489
loss: 1.0050891637802124,grad_norm: 0.9394535881224093, iteration: 205490
loss: 1.0065194368362427,grad_norm: 0.8582044056104856, iteration: 205491
loss: 0.9982691407203674,grad_norm: 0.9999990462581517, iteration: 205492
loss: 1.0114260911941528,grad_norm: 0.8576422583685834, iteration: 205493
loss: 0.997262716293335,grad_norm: 0.9161688308531413, iteration: 205494
loss: 0.9894979000091553,grad_norm: 0.999999068377692, iteration: 205495
loss: 0.965690553188324,grad_norm: 0.8355558582768824, iteration: 205496
loss: 1.0192989110946655,grad_norm: 0.884781894108569, iteration: 205497
loss: 1.0131216049194336,grad_norm: 0.9999999356424132, iteration: 205498
loss: 0.989406406879425,grad_norm: 0.7940956942708755, iteration: 205499
loss: 0.9673459529876709,grad_norm: 0.9346765882408272, iteration: 205500
loss: 1.0071995258331299,grad_norm: 0.8476638092422993, iteration: 205501
loss: 0.997136116027832,grad_norm: 0.9999993367184342, iteration: 205502
loss: 1.017834186553955,grad_norm: 0.9999991940578223, iteration: 205503
loss: 1.0345698595046997,grad_norm: 0.9999990768546659, iteration: 205504
loss: 0.9625714421272278,grad_norm: 0.9543571019849025, iteration: 205505
loss: 1.0293749570846558,grad_norm: 0.9999995571014711, iteration: 205506
loss: 0.9957406520843506,grad_norm: 0.9999991456118981, iteration: 205507
loss: 1.0294877290725708,grad_norm: 0.9902217329207601, iteration: 205508
loss: 0.9688055515289307,grad_norm: 0.9274948326615176, iteration: 205509
loss: 0.9773925542831421,grad_norm: 0.8490857447158631, iteration: 205510
loss: 0.9905298948287964,grad_norm: 0.7870256797125198, iteration: 205511
loss: 1.0343924760818481,grad_norm: 0.9736687192572261, iteration: 205512
loss: 1.0767133235931396,grad_norm: 0.9999994647749415, iteration: 205513
loss: 1.0017956495285034,grad_norm: 0.9999997122302897, iteration: 205514
loss: 1.0042412281036377,grad_norm: 0.8009669695751296, iteration: 205515
loss: 1.0127769708633423,grad_norm: 0.9999992171459168, iteration: 205516
loss: 0.9734604358673096,grad_norm: 0.8590472538489203, iteration: 205517
loss: 0.9867420196533203,grad_norm: 0.8918670005864954, iteration: 205518
loss: 1.0367414951324463,grad_norm: 0.9999990250114458, iteration: 205519
loss: 0.998071014881134,grad_norm: 0.9775025462293593, iteration: 205520
loss: 0.9860742688179016,grad_norm: 0.858057248992579, iteration: 205521
loss: 1.0080868005752563,grad_norm: 0.9481904989106522, iteration: 205522
loss: 0.9704614281654358,grad_norm: 0.8922877503544163, iteration: 205523
loss: 0.9567006826400757,grad_norm: 0.9070355468842098, iteration: 205524
loss: 0.9893775582313538,grad_norm: 0.9999992101159068, iteration: 205525
loss: 0.963084876537323,grad_norm: 0.7808441515282836, iteration: 205526
loss: 1.0134178400039673,grad_norm: 0.9547650070602081, iteration: 205527
loss: 0.9635713696479797,grad_norm: 0.9999990692786196, iteration: 205528
loss: 0.9645189642906189,grad_norm: 0.8637485605766761, iteration: 205529
loss: 0.9927800297737122,grad_norm: 0.9587159508774047, iteration: 205530
loss: 1.0094102621078491,grad_norm: 0.9913924291542672, iteration: 205531
loss: 1.0417420864105225,grad_norm: 0.8747736207035756, iteration: 205532
loss: 1.1087473630905151,grad_norm: 0.9999991150080447, iteration: 205533
loss: 1.0086594820022583,grad_norm: 0.9999990569879569, iteration: 205534
loss: 1.0040380954742432,grad_norm: 0.8901456332391852, iteration: 205535
loss: 1.0133870840072632,grad_norm: 0.8651262965826435, iteration: 205536
loss: 1.0105571746826172,grad_norm: 0.8730662950107206, iteration: 205537
loss: 1.0378862619400024,grad_norm: 0.9999990895552575, iteration: 205538
loss: 0.9738507866859436,grad_norm: 0.9999991391241779, iteration: 205539
loss: 0.9650198221206665,grad_norm: 0.999999538740069, iteration: 205540
loss: 1.0069748163223267,grad_norm: 0.9857785578708625, iteration: 205541
loss: 0.9566797614097595,grad_norm: 0.9462140041519006, iteration: 205542
loss: 0.9793086051940918,grad_norm: 0.8587463469016008, iteration: 205543
loss: 1.083064317703247,grad_norm: 0.9999991853250448, iteration: 205544
loss: 1.0158894062042236,grad_norm: 0.9999990473631627, iteration: 205545
loss: 0.9957571029663086,grad_norm: 0.9999992099640358, iteration: 205546
loss: 1.0124363899230957,grad_norm: 0.9488975210070536, iteration: 205547
loss: 0.9951068162918091,grad_norm: 0.999999143824947, iteration: 205548
loss: 1.0029165744781494,grad_norm: 0.9073687931208548, iteration: 205549
loss: 0.9939762353897095,grad_norm: 0.9926727707457436, iteration: 205550
loss: 0.9935491681098938,grad_norm: 0.9999992473172155, iteration: 205551
loss: 1.0191055536270142,grad_norm: 0.9524714899537834, iteration: 205552
loss: 1.0326803922653198,grad_norm: 0.964276417747061, iteration: 205553
loss: 0.976878821849823,grad_norm: 0.859834981536703, iteration: 205554
loss: 0.9910591840744019,grad_norm: 0.8529369489934405, iteration: 205555
loss: 1.0009865760803223,grad_norm: 0.7837072606015252, iteration: 205556
loss: 1.0034236907958984,grad_norm: 0.8016149266049403, iteration: 205557
loss: 1.0081381797790527,grad_norm: 0.9999991016911666, iteration: 205558
loss: 0.9973480701446533,grad_norm: 0.9028385207706223, iteration: 205559
loss: 1.0267330408096313,grad_norm: 0.9999992171512248, iteration: 205560
loss: 0.9613174200057983,grad_norm: 0.9999990551272343, iteration: 205561
loss: 0.9839966297149658,grad_norm: 0.9999991450841552, iteration: 205562
loss: 0.9958544373512268,grad_norm: 0.9999991544806544, iteration: 205563
loss: 0.990419328212738,grad_norm: 0.9255607223137517, iteration: 205564
loss: 1.024082899093628,grad_norm: 0.9999989658369078, iteration: 205565
loss: 1.0117815732955933,grad_norm: 0.8438391225488291, iteration: 205566
loss: 0.9836074113845825,grad_norm: 0.9015326576970613, iteration: 205567
loss: 0.993425190448761,grad_norm: 0.8565596475940814, iteration: 205568
loss: 0.9907139539718628,grad_norm: 0.9238192050503172, iteration: 205569
loss: 0.9912410378456116,grad_norm: 0.9267525728129263, iteration: 205570
loss: 0.9633787274360657,grad_norm: 0.9603093492433239, iteration: 205571
loss: 1.0796796083450317,grad_norm: 0.9999997197428446, iteration: 205572
loss: 0.9973311424255371,grad_norm: 0.9471192453087672, iteration: 205573
loss: 0.9855188727378845,grad_norm: 0.9158419594062737, iteration: 205574
loss: 1.0274077653884888,grad_norm: 0.9999994929807092, iteration: 205575
loss: 0.9724149703979492,grad_norm: 0.7971186883641832, iteration: 205576
loss: 1.0193679332733154,grad_norm: 0.8589126752295752, iteration: 205577
loss: 0.9966973662376404,grad_norm: 0.7798439332248016, iteration: 205578
loss: 0.9622853398323059,grad_norm: 0.8124618821185143, iteration: 205579
loss: 0.99692702293396,grad_norm: 0.9999991350102155, iteration: 205580
loss: 0.9700658917427063,grad_norm: 0.9999990144944336, iteration: 205581
loss: 1.069294810295105,grad_norm: 0.8835112569853006, iteration: 205582
loss: 1.0423215627670288,grad_norm: 0.9999994149243604, iteration: 205583
loss: 1.0187424421310425,grad_norm: 0.999999451250853, iteration: 205584
loss: 1.0189937353134155,grad_norm: 0.999999091936015, iteration: 205585
loss: 0.9828896522521973,grad_norm: 0.93696380343846, iteration: 205586
loss: 0.9855263829231262,grad_norm: 0.9686200206746712, iteration: 205587
loss: 0.9810327887535095,grad_norm: 0.9344190390541076, iteration: 205588
loss: 0.9651704430580139,grad_norm: 0.9999991090436053, iteration: 205589
loss: 1.029189944267273,grad_norm: 0.9900425653688479, iteration: 205590
loss: 0.9950414896011353,grad_norm: 0.9999991133313877, iteration: 205591
loss: 0.9907451272010803,grad_norm: 0.9999990212118022, iteration: 205592
loss: 1.0043233633041382,grad_norm: 0.905394850634707, iteration: 205593
loss: 1.0241302251815796,grad_norm: 0.7505211550221785, iteration: 205594
loss: 0.9817277789115906,grad_norm: 0.9999992017164457, iteration: 205595
loss: 0.9783639907836914,grad_norm: 0.9953948675227349, iteration: 205596
loss: 0.9606019258499146,grad_norm: 0.8447627802521679, iteration: 205597
loss: 1.0276134014129639,grad_norm: 0.8596096776028311, iteration: 205598
loss: 0.9753686189651489,grad_norm: 0.9999991119843452, iteration: 205599
loss: 1.018290638923645,grad_norm: 0.8486673927471211, iteration: 205600
loss: 0.9749288558959961,grad_norm: 0.8347191462292026, iteration: 205601
loss: 1.0244569778442383,grad_norm: 0.8376210049912548, iteration: 205602
loss: 0.9992445111274719,grad_norm: 0.9820172624774466, iteration: 205603
loss: 0.981026828289032,grad_norm: 0.999999032450824, iteration: 205604
loss: 0.9863706231117249,grad_norm: 0.9969277018568865, iteration: 205605
loss: 1.014381766319275,grad_norm: 0.9004681608052142, iteration: 205606
loss: 0.9918167591094971,grad_norm: 0.9999991519037291, iteration: 205607
loss: 0.9931901097297668,grad_norm: 0.9253830306236535, iteration: 205608
loss: 0.9879096746444702,grad_norm: 0.8262480122255494, iteration: 205609
loss: 0.965545117855072,grad_norm: 0.999999801918837, iteration: 205610
loss: 0.9949119091033936,grad_norm: 0.8465310349044812, iteration: 205611
loss: 0.992330014705658,grad_norm: 0.9970892507570389, iteration: 205612
loss: 1.0484025478363037,grad_norm: 0.9068690871949384, iteration: 205613
loss: 0.987818717956543,grad_norm: 0.8920649824180233, iteration: 205614
loss: 1.0213541984558105,grad_norm: 0.9999992576331355, iteration: 205615
loss: 1.0168737173080444,grad_norm: 0.9309900724044894, iteration: 205616
loss: 1.0105388164520264,grad_norm: 0.8554037143752425, iteration: 205617
loss: 1.0825923681259155,grad_norm: 0.999999809040764, iteration: 205618
loss: 0.994579553604126,grad_norm: 0.9999990784768082, iteration: 205619
loss: 0.9913389086723328,grad_norm: 0.9999988697607715, iteration: 205620
loss: 1.000533103942871,grad_norm: 0.9131811999273072, iteration: 205621
loss: 1.0386816263198853,grad_norm: 0.9393738957661067, iteration: 205622
loss: 0.9767164587974548,grad_norm: 0.9433081993887301, iteration: 205623
loss: 0.9669243097305298,grad_norm: 0.8893526432549058, iteration: 205624
loss: 1.0124413967132568,grad_norm: 0.9389589105066413, iteration: 205625
loss: 0.9884236454963684,grad_norm: 0.8541947636873146, iteration: 205626
loss: 0.9944918751716614,grad_norm: 0.9999991937323331, iteration: 205627
loss: 1.0146987438201904,grad_norm: 0.9999991490932642, iteration: 205628
loss: 1.1605807542800903,grad_norm: 0.9999991883357514, iteration: 205629
loss: 0.9609370827674866,grad_norm: 0.8128935262443978, iteration: 205630
loss: 1.0430781841278076,grad_norm: 0.9999991774500072, iteration: 205631
loss: 1.004114031791687,grad_norm: 0.9999991273783451, iteration: 205632
loss: 1.030719518661499,grad_norm: 0.9020279140667822, iteration: 205633
loss: 1.0010650157928467,grad_norm: 0.8870276286811835, iteration: 205634
loss: 0.9794690012931824,grad_norm: 0.9945262098115005, iteration: 205635
loss: 0.9750070571899414,grad_norm: 0.8214458719972414, iteration: 205636
loss: 1.018183708190918,grad_norm: 0.8684147466433834, iteration: 205637
loss: 1.0179859399795532,grad_norm: 0.941548901266901, iteration: 205638
loss: 1.0392210483551025,grad_norm: 0.9999995154775922, iteration: 205639
loss: 0.9994884133338928,grad_norm: 0.909866779795309, iteration: 205640
loss: 1.0105904340744019,grad_norm: 0.9999990932130122, iteration: 205641
loss: 1.0276612043380737,grad_norm: 0.9999992092424445, iteration: 205642
loss: 1.0312551259994507,grad_norm: 0.82067835819087, iteration: 205643
loss: 0.9698309302330017,grad_norm: 0.9452290883127075, iteration: 205644
loss: 1.0217608213424683,grad_norm: 0.8638541240026253, iteration: 205645
loss: 1.0029181241989136,grad_norm: 0.8204786626129945, iteration: 205646
loss: 1.0026307106018066,grad_norm: 0.9999995320577103, iteration: 205647
loss: 1.0176186561584473,grad_norm: 0.9999991473193133, iteration: 205648
loss: 0.9975605607032776,grad_norm: 0.9999991590172876, iteration: 205649
loss: 0.9837617874145508,grad_norm: 0.8689121127350353, iteration: 205650
loss: 1.0069403648376465,grad_norm: 0.9180358734954701, iteration: 205651
loss: 0.9809236526489258,grad_norm: 0.955411525718825, iteration: 205652
loss: 1.0214729309082031,grad_norm: 0.9999992084471075, iteration: 205653
loss: 0.9841140508651733,grad_norm: 0.8695658190840739, iteration: 205654
loss: 0.9795239567756653,grad_norm: 0.828658189783722, iteration: 205655
loss: 1.0079132318496704,grad_norm: 0.9353365708241498, iteration: 205656
loss: 0.9725609421730042,grad_norm: 0.9115220792066748, iteration: 205657
loss: 1.0069911479949951,grad_norm: 0.999999147393121, iteration: 205658
loss: 1.0933935642242432,grad_norm: 0.940933583362476, iteration: 205659
loss: 0.9782529473304749,grad_norm: 0.9999990134602296, iteration: 205660
loss: 1.0145270824432373,grad_norm: 0.9271581923410728, iteration: 205661
loss: 1.0144704580307007,grad_norm: 0.9999993224620661, iteration: 205662
loss: 0.9937537908554077,grad_norm: 0.8941240397699145, iteration: 205663
loss: 1.0207284688949585,grad_norm: 0.9999993112835429, iteration: 205664
loss: 0.9914481043815613,grad_norm: 0.999999280171058, iteration: 205665
loss: 1.0215134620666504,grad_norm: 0.9999991651010542, iteration: 205666
loss: 1.0101256370544434,grad_norm: 0.894761802293451, iteration: 205667
loss: 0.9752916097640991,grad_norm: 0.9415559938094693, iteration: 205668
loss: 1.0526567697525024,grad_norm: 0.9999993926661627, iteration: 205669
loss: 1.0005762577056885,grad_norm: 0.9835207479723224, iteration: 205670
loss: 0.9912815690040588,grad_norm: 0.780715319925032, iteration: 205671
loss: 1.0062919855117798,grad_norm: 0.9267177998367169, iteration: 205672
loss: 1.0127967596054077,grad_norm: 0.9999990975975217, iteration: 205673
loss: 1.021329641342163,grad_norm: 0.8287795481083645, iteration: 205674
loss: 1.0060529708862305,grad_norm: 0.9181074139653534, iteration: 205675
loss: 1.008802056312561,grad_norm: 0.9999991535411904, iteration: 205676
loss: 1.0210483074188232,grad_norm: 0.9999996255680739, iteration: 205677
loss: 1.0217111110687256,grad_norm: 0.9194972350715556, iteration: 205678
loss: 1.0488160848617554,grad_norm: 0.9654339831309462, iteration: 205679
loss: 1.0597482919692993,grad_norm: 0.9999992346051144, iteration: 205680
loss: 1.0071083307266235,grad_norm: 0.8701271698333334, iteration: 205681
loss: 0.9897438287734985,grad_norm: 0.9426168264096191, iteration: 205682
loss: 0.9911969900131226,grad_norm: 0.9881351846773246, iteration: 205683
loss: 1.0128456354141235,grad_norm: 0.9534085229808178, iteration: 205684
loss: 0.9950142502784729,grad_norm: 0.813573967311978, iteration: 205685
loss: 0.9706760048866272,grad_norm: 0.9759235685689728, iteration: 205686
loss: 1.0027326345443726,grad_norm: 0.9999992719614981, iteration: 205687
loss: 1.0119082927703857,grad_norm: 0.9999991942867322, iteration: 205688
loss: 1.0215576887130737,grad_norm: 0.9999995064577143, iteration: 205689
loss: 1.007020354270935,grad_norm: 0.999999130732943, iteration: 205690
loss: 0.9673418402671814,grad_norm: 0.9044246505099176, iteration: 205691
loss: 0.9980831742286682,grad_norm: 0.9999991814913619, iteration: 205692
loss: 0.942807137966156,grad_norm: 0.9999994343707028, iteration: 205693
loss: 0.9963455200195312,grad_norm: 0.9787392580575177, iteration: 205694
loss: 0.9996012449264526,grad_norm: 0.8309139461488195, iteration: 205695
loss: 0.9749278426170349,grad_norm: 0.9121706007048588, iteration: 205696
loss: 1.0127687454223633,grad_norm: 0.9559624706114789, iteration: 205697
loss: 1.074974536895752,grad_norm: 0.8347301529173222, iteration: 205698
loss: 0.989909291267395,grad_norm: 0.9481272194144933, iteration: 205699
loss: 0.970771312713623,grad_norm: 0.9999990985694901, iteration: 205700
loss: 1.005989670753479,grad_norm: 0.9999990680265989, iteration: 205701
loss: 1.0220261812210083,grad_norm: 0.9604950862054139, iteration: 205702
loss: 1.0047527551651,grad_norm: 0.9426382226775516, iteration: 205703
loss: 0.9932516813278198,grad_norm: 0.9101832431578873, iteration: 205704
loss: 0.9775785803794861,grad_norm: 0.9314263135628041, iteration: 205705
loss: 1.000288724899292,grad_norm: 0.9999991090026097, iteration: 205706
loss: 0.9878944754600525,grad_norm: 0.9099448156752706, iteration: 205707
loss: 0.9719657301902771,grad_norm: 0.9999990050220453, iteration: 205708
loss: 1.0054881572723389,grad_norm: 0.9932624348787957, iteration: 205709
loss: 0.9923850893974304,grad_norm: 0.8934255695072307, iteration: 205710
loss: 1.0952627658843994,grad_norm: 0.999999705124699, iteration: 205711
loss: 1.016360878944397,grad_norm: 0.9999991967087627, iteration: 205712
loss: 0.9958497881889343,grad_norm: 0.8353759027452184, iteration: 205713
loss: 0.9511739611625671,grad_norm: 0.8694488266086285, iteration: 205714
loss: 0.9694159626960754,grad_norm: 0.9785250873832209, iteration: 205715
loss: 0.988376259803772,grad_norm: 0.9999993123049329, iteration: 205716
loss: 1.048483967781067,grad_norm: 0.9917712740775059, iteration: 205717
loss: 0.975134015083313,grad_norm: 0.9609309813547918, iteration: 205718
loss: 1.0031336545944214,grad_norm: 0.8653856833571872, iteration: 205719
loss: 0.9962012767791748,grad_norm: 0.9667542054316732, iteration: 205720
loss: 0.9983084797859192,grad_norm: 0.8885415453109183, iteration: 205721
loss: 1.0064889192581177,grad_norm: 0.9526160577302639, iteration: 205722
loss: 0.9848538637161255,grad_norm: 0.899684110754634, iteration: 205723
loss: 1.0247448682785034,grad_norm: 0.8377640838775868, iteration: 205724
loss: 0.9773234128952026,grad_norm: 0.9418593700708492, iteration: 205725
loss: 1.024874210357666,grad_norm: 0.9999991981292445, iteration: 205726
loss: 0.999901294708252,grad_norm: 0.9068463536312449, iteration: 205727
loss: 0.9598552584648132,grad_norm: 0.9293142338356607, iteration: 205728
loss: 1.0420522689819336,grad_norm: 0.9740979482414531, iteration: 205729
loss: 1.0107427835464478,grad_norm: 0.968519621038617, iteration: 205730
loss: 1.0544072389602661,grad_norm: 0.9999996295010977, iteration: 205731
loss: 1.0183314085006714,grad_norm: 0.9540032933611303, iteration: 205732
loss: 1.0161654949188232,grad_norm: 0.9999990339169094, iteration: 205733
loss: 1.0246986150741577,grad_norm: 0.9999989228999914, iteration: 205734
loss: 0.9675592184066772,grad_norm: 0.8025228798935581, iteration: 205735
loss: 0.9817305207252502,grad_norm: 0.9999990557392623, iteration: 205736
loss: 1.0338417291641235,grad_norm: 0.9050861282758905, iteration: 205737
loss: 1.0378007888793945,grad_norm: 0.8284405448997638, iteration: 205738
loss: 1.0422905683517456,grad_norm: 0.9999995349800596, iteration: 205739
loss: 1.0300384759902954,grad_norm: 0.9234976427651346, iteration: 205740
loss: 0.9671898484230042,grad_norm: 0.9999989206652695, iteration: 205741
loss: 0.9824413657188416,grad_norm: 0.8424248718883071, iteration: 205742
loss: 1.0184811353683472,grad_norm: 0.9907007869941787, iteration: 205743
loss: 0.997285008430481,grad_norm: 0.9844538892108664, iteration: 205744
loss: 1.00652277469635,grad_norm: 0.9999991899270838, iteration: 205745
loss: 1.0126022100448608,grad_norm: 0.8773172929825862, iteration: 205746
loss: 1.0145777463912964,grad_norm: 0.8156899639239958, iteration: 205747
loss: 1.007706642150879,grad_norm: 0.9028955845404809, iteration: 205748
loss: 1.0102598667144775,grad_norm: 0.8398606596714484, iteration: 205749
loss: 0.9916123151779175,grad_norm: 0.9798449921779846, iteration: 205750
loss: 0.9700785279273987,grad_norm: 0.9278807354834796, iteration: 205751
loss: 1.0248897075653076,grad_norm: 0.8201235375532163, iteration: 205752
loss: 1.0003918409347534,grad_norm: 0.9999990141168744, iteration: 205753
loss: 1.0089725255966187,grad_norm: 0.8728021604791129, iteration: 205754
loss: 1.0286073684692383,grad_norm: 0.9999990163061134, iteration: 205755
loss: 1.0110279321670532,grad_norm: 0.9865847293232738, iteration: 205756
loss: 1.027264952659607,grad_norm: 0.999999061575729, iteration: 205757
loss: 0.9853916764259338,grad_norm: 0.858741767416775, iteration: 205758
loss: 1.0283161401748657,grad_norm: 0.89622696784069, iteration: 205759
loss: 1.0293408632278442,grad_norm: 0.8424254422666112, iteration: 205760
loss: 0.986073911190033,grad_norm: 0.9999990914062417, iteration: 205761
loss: 1.0200330018997192,grad_norm: 0.9243870391673906, iteration: 205762
loss: 1.0110000371932983,grad_norm: 0.764886001848901, iteration: 205763
loss: 1.0121731758117676,grad_norm: 0.9689356873515712, iteration: 205764
loss: 1.0115200281143188,grad_norm: 0.9999991006378007, iteration: 205765
loss: 1.0232726335525513,grad_norm: 0.960360090804316, iteration: 205766
loss: 1.0425782203674316,grad_norm: 0.9999996763198341, iteration: 205767
loss: 0.9799667596817017,grad_norm: 0.9999990739637278, iteration: 205768
loss: 0.9872967004776001,grad_norm: 0.9982316429662687, iteration: 205769
loss: 1.0038120746612549,grad_norm: 0.8390765314399404, iteration: 205770
loss: 1.0038338899612427,grad_norm: 0.9999991048936806, iteration: 205771
loss: 1.029041051864624,grad_norm: 0.9999991704948595, iteration: 205772
loss: 0.9854889512062073,grad_norm: 0.9103916887259359, iteration: 205773
loss: 0.9665092825889587,grad_norm: 0.9960672152635957, iteration: 205774
loss: 1.006224513053894,grad_norm: 0.9999990664294477, iteration: 205775
loss: 1.0180426836013794,grad_norm: 0.8661079556802368, iteration: 205776
loss: 0.9762386679649353,grad_norm: 0.8019572686871822, iteration: 205777
loss: 1.010619878768921,grad_norm: 0.9402103021439375, iteration: 205778
loss: 1.0046981573104858,grad_norm: 0.8043097898760588, iteration: 205779
loss: 1.0220606327056885,grad_norm: 0.9146120610893342, iteration: 205780
loss: 0.9734125137329102,grad_norm: 0.7837925596187189, iteration: 205781
loss: 0.9897578358650208,grad_norm: 0.9013150784357339, iteration: 205782
loss: 0.9763858914375305,grad_norm: 0.8926045149235893, iteration: 205783
loss: 1.000381350517273,grad_norm: 0.826860568361246, iteration: 205784
loss: 0.9680629968643188,grad_norm: 0.9234505072395488, iteration: 205785
loss: 0.9896019697189331,grad_norm: 0.9888753167505473, iteration: 205786
loss: 1.0158427953720093,grad_norm: 0.8347734442013508, iteration: 205787
loss: 1.0122414827346802,grad_norm: 0.8343506934772446, iteration: 205788
loss: 0.9900892376899719,grad_norm: 0.9822519579074794, iteration: 205789
loss: 0.9876018166542053,grad_norm: 0.9918905420453921, iteration: 205790
loss: 0.9703097939491272,grad_norm: 0.9468052527405495, iteration: 205791
loss: 0.9968386888504028,grad_norm: 0.8249367969337514, iteration: 205792
loss: 1.013528823852539,grad_norm: 0.8804758172746863, iteration: 205793
loss: 0.9836601614952087,grad_norm: 0.8589705926030939, iteration: 205794
loss: 1.0207760334014893,grad_norm: 0.8939318049141111, iteration: 205795
loss: 1.0105911493301392,grad_norm: 0.9590834708694717, iteration: 205796
loss: 0.9725067615509033,grad_norm: 0.9999992832354122, iteration: 205797
loss: 0.9972421526908875,grad_norm: 0.9996871550952148, iteration: 205798
loss: 0.9562071561813354,grad_norm: 0.999999145425976, iteration: 205799
loss: 0.9945762157440186,grad_norm: 0.8867215505411576, iteration: 205800
loss: 1.0295624732971191,grad_norm: 0.9999991131338498, iteration: 205801
loss: 1.005508303642273,grad_norm: 0.9292201593107546, iteration: 205802
loss: 1.0595327615737915,grad_norm: 0.9999998169878247, iteration: 205803
loss: 0.9938486218452454,grad_norm: 0.8362288992559156, iteration: 205804
loss: 1.004137396812439,grad_norm: 0.9999991209154576, iteration: 205805
loss: 0.9828319549560547,grad_norm: 0.994272886064279, iteration: 205806
loss: 1.002206563949585,grad_norm: 0.862568656963074, iteration: 205807
loss: 0.9723639488220215,grad_norm: 0.9999989214694082, iteration: 205808
loss: 0.9684361219406128,grad_norm: 0.9616814839083581, iteration: 205809
loss: 0.9619695544242859,grad_norm: 0.9010003255676646, iteration: 205810
loss: 1.0005154609680176,grad_norm: 0.9174050632195475, iteration: 205811
loss: 1.0078767538070679,grad_norm: 0.9398785357448874, iteration: 205812
loss: 1.0176255702972412,grad_norm: 0.7974825383683293, iteration: 205813
loss: 1.0204675197601318,grad_norm: 0.9999991172234781, iteration: 205814
loss: 0.9835880994796753,grad_norm: 0.8520297053632778, iteration: 205815
loss: 0.971541166305542,grad_norm: 0.9999990450017461, iteration: 205816
loss: 1.014491081237793,grad_norm: 0.9999989656555367, iteration: 205817
loss: 1.070914626121521,grad_norm: 0.8989015833397771, iteration: 205818
loss: 0.9599699974060059,grad_norm: 0.9999989957917069, iteration: 205819
loss: 0.9712113738059998,grad_norm: 0.9920403334500779, iteration: 205820
loss: 1.0205680131912231,grad_norm: 0.8515977195082814, iteration: 205821
loss: 1.0182644128799438,grad_norm: 0.9999992139840085, iteration: 205822
loss: 0.981428861618042,grad_norm: 0.8122553354370053, iteration: 205823
loss: 1.0084017515182495,grad_norm: 0.8904286612246498, iteration: 205824
loss: 1.0032837390899658,grad_norm: 0.9999998760813489, iteration: 205825
loss: 1.0050128698349,grad_norm: 0.9999991779876151, iteration: 205826
loss: 1.0171258449554443,grad_norm: 0.8706440189551916, iteration: 205827
loss: 0.9926825165748596,grad_norm: 0.9563920644023143, iteration: 205828
loss: 0.9904618263244629,grad_norm: 0.966020224733325, iteration: 205829
loss: 1.0041333436965942,grad_norm: 0.9999998132226674, iteration: 205830
loss: 0.9984341859817505,grad_norm: 0.7624179638662156, iteration: 205831
loss: 0.984279990196228,grad_norm: 0.7707641255739438, iteration: 205832
loss: 0.9834774136543274,grad_norm: 0.9999989841972796, iteration: 205833
loss: 0.9816173315048218,grad_norm: 0.8207904037897074, iteration: 205834
loss: 0.9558942317962646,grad_norm: 0.9234941429137742, iteration: 205835
loss: 0.9794868230819702,grad_norm: 0.9558636072445941, iteration: 205836
loss: 0.9915841221809387,grad_norm: 0.7969885804756963, iteration: 205837
loss: 1.0206469297409058,grad_norm: 0.9795154720958221, iteration: 205838
loss: 1.0186314582824707,grad_norm: 0.9401106472334056, iteration: 205839
loss: 1.0271522998809814,grad_norm: 0.843551751560261, iteration: 205840
loss: 1.0164275169372559,grad_norm: 0.9999990134259802, iteration: 205841
loss: 0.9840524792671204,grad_norm: 0.9044743718873228, iteration: 205842
loss: 1.0027462244033813,grad_norm: 0.9999990638853508, iteration: 205843
loss: 0.9812272787094116,grad_norm: 0.8415762481933786, iteration: 205844
loss: 1.0299986600875854,grad_norm: 0.939080208490292, iteration: 205845
loss: 1.0124666690826416,grad_norm: 0.9399054402659923, iteration: 205846
loss: 1.0016628503799438,grad_norm: 0.8762761862996811, iteration: 205847
loss: 0.996667742729187,grad_norm: 0.8089176491319101, iteration: 205848
loss: 0.9863672852516174,grad_norm: 0.9999992822494422, iteration: 205849
loss: 0.9953557848930359,grad_norm: 0.8201506467560286, iteration: 205850
loss: 1.0136622190475464,grad_norm: 0.9778079023690738, iteration: 205851
loss: 0.9940972924232483,grad_norm: 0.9007796334209187, iteration: 205852
loss: 1.010994553565979,grad_norm: 0.854335211722695, iteration: 205853
loss: 1.0188606977462769,grad_norm: 0.8797150504454938, iteration: 205854
loss: 0.9917033314704895,grad_norm: 0.9999990202516675, iteration: 205855
loss: 1.0129367113113403,grad_norm: 0.9122319704219459, iteration: 205856
loss: 0.9772540926933289,grad_norm: 0.9697374799742935, iteration: 205857
loss: 1.0167251825332642,grad_norm: 0.9136703205757973, iteration: 205858
loss: 0.9881964921951294,grad_norm: 0.9555687103354379, iteration: 205859
loss: 0.9979063868522644,grad_norm: 0.8355430618355179, iteration: 205860
loss: 0.9965788722038269,grad_norm: 0.7941289591723495, iteration: 205861
loss: 0.9933686852455139,grad_norm: 0.9999992634478712, iteration: 205862
loss: 1.047633171081543,grad_norm: 0.9999993308058593, iteration: 205863
loss: 1.0070536136627197,grad_norm: 0.9999991468356224, iteration: 205864
loss: 1.0017075538635254,grad_norm: 0.9999991508911843, iteration: 205865
loss: 1.0069397687911987,grad_norm: 0.999998971478677, iteration: 205866
loss: 0.9824237823486328,grad_norm: 0.834462012345951, iteration: 205867
loss: 0.9718908071517944,grad_norm: 0.9999992974823451, iteration: 205868
loss: 1.0188090801239014,grad_norm: 0.9960394839334866, iteration: 205869
loss: 0.9877870082855225,grad_norm: 0.9674432680340519, iteration: 205870
loss: 0.9716217517852783,grad_norm: 0.9199607568791835, iteration: 205871
loss: 0.9776426553726196,grad_norm: 0.9999990771134107, iteration: 205872
loss: 0.9872469902038574,grad_norm: 0.7212895263089221, iteration: 205873
loss: 1.0098601579666138,grad_norm: 0.9999992046966768, iteration: 205874
loss: 1.0077394247055054,grad_norm: 0.9405893478462103, iteration: 205875
loss: 0.994354248046875,grad_norm: 0.9999991449222141, iteration: 205876
loss: 0.9761423468589783,grad_norm: 0.9199154524133071, iteration: 205877
loss: 0.9949288368225098,grad_norm: 0.9999990069070235, iteration: 205878
loss: 0.9950152635574341,grad_norm: 0.9985464876110016, iteration: 205879
loss: 1.0050028562545776,grad_norm: 0.853941915189692, iteration: 205880
loss: 1.0189933776855469,grad_norm: 0.8705254787262361, iteration: 205881
loss: 0.9864459037780762,grad_norm: 0.9999990474609589, iteration: 205882
loss: 0.9778097867965698,grad_norm: 0.9691724442977251, iteration: 205883
loss: 0.9900277853012085,grad_norm: 0.9999991690758376, iteration: 205884
loss: 1.0077086687088013,grad_norm: 0.9999992033845978, iteration: 205885
loss: 1.024487853050232,grad_norm: 0.9999990218383179, iteration: 205886
loss: 1.0184422731399536,grad_norm: 0.8990901939680653, iteration: 205887
loss: 1.024544596672058,grad_norm: 0.9100349758417314, iteration: 205888
loss: 1.037499189376831,grad_norm: 0.9999993790875855, iteration: 205889
loss: 1.0019991397857666,grad_norm: 0.972007016504462, iteration: 205890
loss: 1.0049458742141724,grad_norm: 0.999999129939888, iteration: 205891
loss: 1.0051084756851196,grad_norm: 0.9999993697034163, iteration: 205892
loss: 1.009689450263977,grad_norm: 0.999999093245342, iteration: 205893
loss: 1.0059653520584106,grad_norm: 0.787079596658536, iteration: 205894
loss: 0.9709588289260864,grad_norm: 0.9506746280594092, iteration: 205895
loss: 1.0045677423477173,grad_norm: 0.8064969112938186, iteration: 205896
loss: 1.0172494649887085,grad_norm: 0.871451614962638, iteration: 205897
loss: 1.0053960084915161,grad_norm: 0.9999991577413379, iteration: 205898
loss: 1.0365066528320312,grad_norm: 0.9999992378427934, iteration: 205899
loss: 0.9664316773414612,grad_norm: 0.8839422116511977, iteration: 205900
loss: 0.9835904836654663,grad_norm: 0.8748234597937584, iteration: 205901
loss: 0.9776462316513062,grad_norm: 0.8295399982335802, iteration: 205902
loss: 0.9879280924797058,grad_norm: 0.9999990731242333, iteration: 205903
loss: 0.9575192332267761,grad_norm: 0.8320026395418374, iteration: 205904
loss: 1.0200841426849365,grad_norm: 0.999999053148153, iteration: 205905
loss: 1.130629301071167,grad_norm: 0.9999999618998028, iteration: 205906
loss: 0.9796690940856934,grad_norm: 0.9999991564691076, iteration: 205907
loss: 1.0001224279403687,grad_norm: 0.8343503468856521, iteration: 205908
loss: 1.2028465270996094,grad_norm: 0.9999998627016913, iteration: 205909
loss: 0.9999462366104126,grad_norm: 0.8637711698714177, iteration: 205910
loss: 1.0122133493423462,grad_norm: 0.8735401578284914, iteration: 205911
loss: 1.0719125270843506,grad_norm: 1.0000000542077323, iteration: 205912
loss: 0.9962909817695618,grad_norm: 0.9566731199577679, iteration: 205913
loss: 0.9903101325035095,grad_norm: 0.8156500824697231, iteration: 205914
loss: 1.2524981498718262,grad_norm: 0.9999995938057515, iteration: 205915
loss: 1.0490692853927612,grad_norm: 0.9707238609757505, iteration: 205916
loss: 1.1322146654129028,grad_norm: 0.9999997854016376, iteration: 205917
loss: 1.0459928512573242,grad_norm: 0.9550494220156404, iteration: 205918
loss: 1.0039093494415283,grad_norm: 0.9999992786229411, iteration: 205919
loss: 0.9787770509719849,grad_norm: 0.9999990549637132, iteration: 205920
loss: 0.9782440066337585,grad_norm: 0.9307997239423651, iteration: 205921
loss: 0.9924127459526062,grad_norm: 0.9941692739689689, iteration: 205922
loss: 1.0279512405395508,grad_norm: 0.9999996389764034, iteration: 205923
loss: 1.0205574035644531,grad_norm: 0.9840807361005806, iteration: 205924
loss: 1.017423391342163,grad_norm: 0.9448043961762833, iteration: 205925
loss: 1.0077725648880005,grad_norm: 0.9999990290541952, iteration: 205926
loss: 0.995274007320404,grad_norm: 0.9999989444740378, iteration: 205927
loss: 0.9655652642250061,grad_norm: 0.9643768689099143, iteration: 205928
loss: 0.9678508639335632,grad_norm: 0.9999990244793828, iteration: 205929
loss: 0.9903913140296936,grad_norm: 0.9999991521963494, iteration: 205930
loss: 1.027236819267273,grad_norm: 0.8750043750416271, iteration: 205931
loss: 0.9956508874893188,grad_norm: 0.868116673597262, iteration: 205932
loss: 1.0025309324264526,grad_norm: 0.9999989086754536, iteration: 205933
loss: 0.983128011226654,grad_norm: 0.8523104510887891, iteration: 205934
loss: 0.9780024290084839,grad_norm: 0.8979872218333564, iteration: 205935
loss: 0.9706202745437622,grad_norm: 0.9408055897514519, iteration: 205936
loss: 0.9875279664993286,grad_norm: 0.9931255987406007, iteration: 205937
loss: 1.0043277740478516,grad_norm: 0.9307179551392825, iteration: 205938
loss: 0.9908730983734131,grad_norm: 0.9999991951922766, iteration: 205939
loss: 1.0197540521621704,grad_norm: 0.999999007933127, iteration: 205940
loss: 1.059960126876831,grad_norm: 0.9999990849190661, iteration: 205941
loss: 1.011459231376648,grad_norm: 0.9052600209209615, iteration: 205942
loss: 0.9890141487121582,grad_norm: 0.9862196660101017, iteration: 205943
loss: 1.0020793676376343,grad_norm: 0.9999991816746642, iteration: 205944
loss: 1.0052369832992554,grad_norm: 0.99999904640161, iteration: 205945
loss: 1.0228694677352905,grad_norm: 0.9528018366746919, iteration: 205946
loss: 1.0032711029052734,grad_norm: 0.999999096501975, iteration: 205947
loss: 1.0161051750183105,grad_norm: 0.920247220891064, iteration: 205948
loss: 1.0086005926132202,grad_norm: 0.9286409184156185, iteration: 205949
loss: 1.0097384452819824,grad_norm: 0.9999992416905116, iteration: 205950
loss: 1.0424590110778809,grad_norm: 0.9999992708853719, iteration: 205951
loss: 1.0143816471099854,grad_norm: 0.9369031280873901, iteration: 205952
loss: 1.0230448246002197,grad_norm: 0.9999995893213006, iteration: 205953
loss: 1.0014830827713013,grad_norm: 0.9058601418438922, iteration: 205954
loss: 1.0762959718704224,grad_norm: 0.9937912910867529, iteration: 205955
loss: 1.000203251838684,grad_norm: 0.9503421686033026, iteration: 205956
loss: 0.9967526197433472,grad_norm: 0.9999990370352354, iteration: 205957
loss: 1.0033901929855347,grad_norm: 0.8761614892638613, iteration: 205958
loss: 0.9900370240211487,grad_norm: 0.8746372027693489, iteration: 205959
loss: 0.9941778182983398,grad_norm: 0.8665977979992773, iteration: 205960
loss: 1.0335272550582886,grad_norm: 0.9100182781965845, iteration: 205961
loss: 1.0062016248703003,grad_norm: 0.9866295653485613, iteration: 205962
loss: 1.0779110193252563,grad_norm: 0.9225582402184032, iteration: 205963
loss: 1.020372748374939,grad_norm: 0.8062220789850747, iteration: 205964
loss: 1.03394615650177,grad_norm: 0.8221059133760105, iteration: 205965
loss: 0.9701329469680786,grad_norm: 0.8283393756161582, iteration: 205966
loss: 1.0190792083740234,grad_norm: 0.9999990991950966, iteration: 205967
loss: 0.9994261264801025,grad_norm: 0.9999990884903908, iteration: 205968
loss: 0.9962888360023499,grad_norm: 0.8493622641196424, iteration: 205969
loss: 1.0357487201690674,grad_norm: 0.9999991570051555, iteration: 205970
loss: 1.0047065019607544,grad_norm: 0.9999991277632188, iteration: 205971
loss: 0.998919665813446,grad_norm: 0.7808582097296418, iteration: 205972
loss: 1.0083755254745483,grad_norm: 0.9999990732384826, iteration: 205973
loss: 1.0238806009292603,grad_norm: 0.9999992044062166, iteration: 205974
loss: 0.9986678957939148,grad_norm: 0.999999087270838, iteration: 205975
loss: 0.9758188724517822,grad_norm: 0.882787076330867, iteration: 205976
loss: 0.9956619143486023,grad_norm: 0.9073417296610448, iteration: 205977
loss: 0.976484477519989,grad_norm: 0.8970166565799439, iteration: 205978
loss: 1.0061408281326294,grad_norm: 0.9011508519354665, iteration: 205979
loss: 1.0092493295669556,grad_norm: 0.9999991196836673, iteration: 205980
loss: 1.029044270515442,grad_norm: 0.7861713595078708, iteration: 205981
loss: 1.0046112537384033,grad_norm: 0.9999990846561954, iteration: 205982
loss: 1.0395454168319702,grad_norm: 0.8591974082508307, iteration: 205983
loss: 1.012655258178711,grad_norm: 0.8215025549903371, iteration: 205984
loss: 1.0102647542953491,grad_norm: 0.9640686004171942, iteration: 205985
loss: 1.1358661651611328,grad_norm: 0.9228207739777398, iteration: 205986
loss: 0.9820461273193359,grad_norm: 0.7649991480229718, iteration: 205987
loss: 0.9765365123748779,grad_norm: 0.8462572400507287, iteration: 205988
loss: 1.0097804069519043,grad_norm: 0.9999990316439482, iteration: 205989
loss: 0.9764352440834045,grad_norm: 0.9999990529018205, iteration: 205990
loss: 0.9684215784072876,grad_norm: 0.9999992168902092, iteration: 205991
loss: 0.9675539135932922,grad_norm: 0.9673150638146719, iteration: 205992
loss: 0.9927181005477905,grad_norm: 0.7883758509989437, iteration: 205993
loss: 1.0029268264770508,grad_norm: 0.8051384835372006, iteration: 205994
loss: 1.0056413412094116,grad_norm: 0.9669747238620269, iteration: 205995
loss: 1.0139787197113037,grad_norm: 0.9999992175469353, iteration: 205996
loss: 1.0034583806991577,grad_norm: 0.9999993160030637, iteration: 205997
loss: 0.9685831069946289,grad_norm: 0.7978868669915095, iteration: 205998
loss: 0.9972847700119019,grad_norm: 0.9999990564685678, iteration: 205999
loss: 0.9903781414031982,grad_norm: 0.7831108448308607, iteration: 206000
loss: 0.9903779625892639,grad_norm: 0.7829092343006263, iteration: 206001
loss: 1.0230190753936768,grad_norm: 0.922681101549614, iteration: 206002
loss: 1.0061628818511963,grad_norm: 0.927054933985481, iteration: 206003
loss: 0.9981908202171326,grad_norm: 0.9999991002832284, iteration: 206004
loss: 1.0124845504760742,grad_norm: 0.8651303804483095, iteration: 206005
loss: 1.0031907558441162,grad_norm: 0.9999992957671173, iteration: 206006
loss: 1.0192265510559082,grad_norm: 0.9999990466565083, iteration: 206007
loss: 0.9996095299720764,grad_norm: 0.9156643073206682, iteration: 206008
loss: 1.0014234781265259,grad_norm: 0.8586390095184407, iteration: 206009
loss: 1.0285241603851318,grad_norm: 0.9999990467045465, iteration: 206010
loss: 1.0131028890609741,grad_norm: 0.9212721352593066, iteration: 206011
loss: 0.9985266923904419,grad_norm: 0.8466704002442724, iteration: 206012
loss: 1.0159786939620972,grad_norm: 0.8323330700615675, iteration: 206013
loss: 1.0167510509490967,grad_norm: 0.90749609550582, iteration: 206014
loss: 1.030331015586853,grad_norm: 0.7766542386061371, iteration: 206015
loss: 1.034393548965454,grad_norm: 0.9738728773349278, iteration: 206016
loss: 0.9825273752212524,grad_norm: 0.9046346429473874, iteration: 206017
loss: 0.9740573167800903,grad_norm: 0.9935780442017227, iteration: 206018
loss: 1.0381979942321777,grad_norm: 0.999999057347642, iteration: 206019
loss: 0.9871045351028442,grad_norm: 0.8852470030068342, iteration: 206020
loss: 1.0094882249832153,grad_norm: 0.999999139585488, iteration: 206021
loss: 1.0289006233215332,grad_norm: 0.9999990548361574, iteration: 206022
loss: 0.9892311096191406,grad_norm: 0.9592283119225975, iteration: 206023
loss: 1.0091502666473389,grad_norm: 0.9999991755408414, iteration: 206024
loss: 0.9970082640647888,grad_norm: 0.9999991238560151, iteration: 206025
loss: 0.9964022040367126,grad_norm: 0.9999990297194977, iteration: 206026
loss: 1.009229063987732,grad_norm: 0.9999991057063413, iteration: 206027
loss: 1.0292290449142456,grad_norm: 0.8825996339381413, iteration: 206028
loss: 1.0170538425445557,grad_norm: 0.8129409688642019, iteration: 206029
loss: 1.0238926410675049,grad_norm: 0.9740688322901885, iteration: 206030
loss: 1.00925612449646,grad_norm: 0.9346241790618003, iteration: 206031
loss: 0.9953358173370361,grad_norm: 0.9999992990591953, iteration: 206032
loss: 0.9957241415977478,grad_norm: 0.8833878581735091, iteration: 206033
loss: 1.0066337585449219,grad_norm: 0.9344297116881544, iteration: 206034
loss: 1.033085584640503,grad_norm: 0.9999991790522389, iteration: 206035
loss: 1.0100643634796143,grad_norm: 0.9278769786522169, iteration: 206036
loss: 0.9675685167312622,grad_norm: 0.9557825059613734, iteration: 206037
loss: 0.9944757223129272,grad_norm: 0.7992240311802304, iteration: 206038
loss: 1.0251266956329346,grad_norm: 0.8642860670537402, iteration: 206039
loss: 0.9534319639205933,grad_norm: 0.9999990559200949, iteration: 206040
loss: 1.0564625263214111,grad_norm: 0.9999991145897343, iteration: 206041
loss: 0.972004234790802,grad_norm: 0.8375105687182459, iteration: 206042
loss: 1.0047430992126465,grad_norm: 0.968093949156277, iteration: 206043
loss: 0.9899532794952393,grad_norm: 0.9191448275646971, iteration: 206044
loss: 0.9735803008079529,grad_norm: 0.7573827039941492, iteration: 206045
loss: 0.9946632981300354,grad_norm: 0.7889550284027748, iteration: 206046
loss: 0.971486508846283,grad_norm: 0.9999990351289508, iteration: 206047
loss: 0.9783121943473816,grad_norm: 0.9593317719395348, iteration: 206048
loss: 0.9807929396629333,grad_norm: 0.9999990927626968, iteration: 206049
loss: 1.0167583227157593,grad_norm: 0.9999993108963352, iteration: 206050
loss: 0.9934795498847961,grad_norm: 0.9846504107941172, iteration: 206051
loss: 0.9641225337982178,grad_norm: 0.8995225616719387, iteration: 206052
loss: 0.971855103969574,grad_norm: 0.8654403537970551, iteration: 206053
loss: 1.0051286220550537,grad_norm: 0.9999994816271223, iteration: 206054
loss: 0.9653447270393372,grad_norm: 0.8936874145693932, iteration: 206055
loss: 0.990608811378479,grad_norm: 0.8679754796235565, iteration: 206056
loss: 0.9680158495903015,grad_norm: 0.9885032338774995, iteration: 206057
loss: 0.989447832107544,grad_norm: 0.8787993242530315, iteration: 206058
loss: 0.9815987944602966,grad_norm: 0.999999186566819, iteration: 206059
loss: 1.0518741607666016,grad_norm: 0.9999990724156085, iteration: 206060
loss: 0.9946296811103821,grad_norm: 0.74925161456102, iteration: 206061
loss: 0.9877581000328064,grad_norm: 0.9999990863742565, iteration: 206062
loss: 0.9874816536903381,grad_norm: 0.943611588642181, iteration: 206063
loss: 1.001940369606018,grad_norm: 0.999999396549912, iteration: 206064
loss: 0.9910295009613037,grad_norm: 0.847682881707662, iteration: 206065
loss: 1.0127513408660889,grad_norm: 0.9846306305190005, iteration: 206066
loss: 0.9653970003128052,grad_norm: 0.8999980241929223, iteration: 206067
loss: 1.0325332880020142,grad_norm: 0.8938456152753177, iteration: 206068
loss: 0.9997185468673706,grad_norm: 0.9999991485341876, iteration: 206069
loss: 1.0198969841003418,grad_norm: 0.9831940429260575, iteration: 206070
loss: 1.0114152431488037,grad_norm: 0.9622373594821555, iteration: 206071
loss: 0.9937363862991333,grad_norm: 0.8286770923283303, iteration: 206072
loss: 0.987152099609375,grad_norm: 0.8953201888922719, iteration: 206073
loss: 0.9894285202026367,grad_norm: 0.8405005586537382, iteration: 206074
loss: 1.0220084190368652,grad_norm: 0.9466940239076082, iteration: 206075
loss: 1.0057159662246704,grad_norm: 0.9248494047098608, iteration: 206076
loss: 0.985475480556488,grad_norm: 0.9999998665246714, iteration: 206077
loss: 1.0466606616973877,grad_norm: 0.999999943538899, iteration: 206078
loss: 1.0058526992797852,grad_norm: 0.8904627666679702, iteration: 206079
loss: 0.9904921054840088,grad_norm: 0.9934038165207107, iteration: 206080
loss: 1.0276447534561157,grad_norm: 0.8940436014582543, iteration: 206081
loss: 1.0217831134796143,grad_norm: 0.9999991917229378, iteration: 206082
loss: 1.0020021200180054,grad_norm: 0.7444130322216173, iteration: 206083
loss: 1.0031715631484985,grad_norm: 0.9230587094901397, iteration: 206084
loss: 1.0459413528442383,grad_norm: 0.9835039637364379, iteration: 206085
loss: 0.9966312050819397,grad_norm: 0.9924098455090892, iteration: 206086
loss: 0.9659781455993652,grad_norm: 0.9742405288551718, iteration: 206087
loss: 0.9778355360031128,grad_norm: 0.999999062690908, iteration: 206088
loss: 0.9838908314704895,grad_norm: 0.9999991594322702, iteration: 206089
loss: 1.0134845972061157,grad_norm: 0.9122067118082977, iteration: 206090
loss: 0.9665815830230713,grad_norm: 0.999999328667966, iteration: 206091
loss: 1.0110039710998535,grad_norm: 0.8596456368116009, iteration: 206092
loss: 0.9788975119590759,grad_norm: 0.8514450249058886, iteration: 206093
loss: 1.0107747316360474,grad_norm: 0.913012175087985, iteration: 206094
loss: 1.016047716140747,grad_norm: 0.9315008266465618, iteration: 206095
loss: 1.001768708229065,grad_norm: 0.9934789513013975, iteration: 206096
loss: 0.9649543166160583,grad_norm: 0.7941930569946845, iteration: 206097
loss: 0.9818477034568787,grad_norm: 0.9999992120735802, iteration: 206098
loss: 0.9729489088058472,grad_norm: 0.8732889328644932, iteration: 206099
loss: 0.992643415927887,grad_norm: 0.7734169231269911, iteration: 206100
loss: 1.0254992246627808,grad_norm: 0.7922537943098111, iteration: 206101
loss: 1.004141092300415,grad_norm: 0.9999990016289528, iteration: 206102
loss: 0.973579466342926,grad_norm: 0.9308631945197585, iteration: 206103
loss: 0.9499788284301758,grad_norm: 0.8681328046553528, iteration: 206104
loss: 0.991000235080719,grad_norm: 0.9009023652502188, iteration: 206105
loss: 1.0190757513046265,grad_norm: 0.9999990681305155, iteration: 206106
loss: 0.9864970445632935,grad_norm: 0.9402010029779337, iteration: 206107
loss: 0.9971327185630798,grad_norm: 0.9999990639096998, iteration: 206108
loss: 0.9967848658561707,grad_norm: 0.829455056444473, iteration: 206109
loss: 0.9518221616744995,grad_norm: 0.9999990466335626, iteration: 206110
loss: 0.9999993443489075,grad_norm: 0.9999990584504549, iteration: 206111
loss: 0.9999759793281555,grad_norm: 0.99999905600285, iteration: 206112
loss: 1.0075290203094482,grad_norm: 0.9391529056361883, iteration: 206113
loss: 1.0025300979614258,grad_norm: 0.9999990898248635, iteration: 206114
loss: 0.9861032366752625,grad_norm: 0.85741850574097, iteration: 206115
loss: 0.9956514835357666,grad_norm: 0.870740007687444, iteration: 206116
loss: 0.9826641082763672,grad_norm: 0.941080678524484, iteration: 206117
loss: 1.0070512294769287,grad_norm: 0.9999990955655157, iteration: 206118
loss: 0.9941791296005249,grad_norm: 0.9430818348389358, iteration: 206119
loss: 0.9756247401237488,grad_norm: 0.9255565110931062, iteration: 206120
loss: 0.9804026484489441,grad_norm: 0.9999995355430176, iteration: 206121
loss: 0.9992688894271851,grad_norm: 0.9999991564956643, iteration: 206122
loss: 1.0105574131011963,grad_norm: 0.9999989820069668, iteration: 206123
loss: 1.0233560800552368,grad_norm: 0.9280421735099864, iteration: 206124
loss: 1.1122041940689087,grad_norm: 0.9999995482681225, iteration: 206125
loss: 1.022441029548645,grad_norm: 0.8533662223089604, iteration: 206126
loss: 0.9644724726676941,grad_norm: 0.9274808273852648, iteration: 206127
loss: 0.982911229133606,grad_norm: 0.9999992699025778, iteration: 206128
loss: 0.9828307628631592,grad_norm: 0.9999991306753466, iteration: 206129
loss: 1.1201832294464111,grad_norm: 0.9999998504757229, iteration: 206130
loss: 1.0195279121398926,grad_norm: 0.9999990298209064, iteration: 206131
loss: 1.0166696310043335,grad_norm: 0.9425819846892582, iteration: 206132
loss: 1.0092546939849854,grad_norm: 0.8470734623564329, iteration: 206133
loss: 0.9899001717567444,grad_norm: 0.9999992019645619, iteration: 206134
loss: 0.9543924927711487,grad_norm: 0.9302134806544488, iteration: 206135
loss: 0.9918966293334961,grad_norm: 0.9999992017238951, iteration: 206136
loss: 1.03903067111969,grad_norm: 0.864494089267609, iteration: 206137
loss: 1.0141067504882812,grad_norm: 0.9083228099426633, iteration: 206138
loss: 0.9608343243598938,grad_norm: 0.857549461787738, iteration: 206139
loss: 0.98597651720047,grad_norm: 0.9999989864476191, iteration: 206140
loss: 1.0056359767913818,grad_norm: 0.9912719748104399, iteration: 206141
loss: 1.0302952527999878,grad_norm: 0.8755427763036608, iteration: 206142
loss: 0.9698464870452881,grad_norm: 0.9191163740170087, iteration: 206143
loss: 1.0112911462783813,grad_norm: 0.9999992219720252, iteration: 206144
loss: 0.9881953001022339,grad_norm: 0.8461583941906701, iteration: 206145
loss: 1.0082722902297974,grad_norm: 0.8770604085916304, iteration: 206146
loss: 1.0568329095840454,grad_norm: 0.958939597145473, iteration: 206147
loss: 1.0027599334716797,grad_norm: 0.9999991564983474, iteration: 206148
loss: 0.9920789003372192,grad_norm: 0.9743398717019652, iteration: 206149
loss: 0.9806711673736572,grad_norm: 0.9999990327036247, iteration: 206150
loss: 1.0014878511428833,grad_norm: 0.9999995096451313, iteration: 206151
loss: 0.9865924715995789,grad_norm: 0.8052837093021384, iteration: 206152
loss: 0.9653903245925903,grad_norm: 0.8818840419224, iteration: 206153
loss: 0.9541754722595215,grad_norm: 0.9999991141827607, iteration: 206154
loss: 1.0218003988265991,grad_norm: 0.9999992237971426, iteration: 206155
loss: 1.0219653844833374,grad_norm: 0.7995743545579581, iteration: 206156
loss: 0.9944921731948853,grad_norm: 0.9999989986109, iteration: 206157
loss: 1.0113856792449951,grad_norm: 0.9999990762050082, iteration: 206158
loss: 1.0140212774276733,grad_norm: 0.9999990885542795, iteration: 206159
loss: 0.9714112877845764,grad_norm: 0.9286315693187915, iteration: 206160
loss: 1.0250004529953003,grad_norm: 0.9999992422986789, iteration: 206161
loss: 1.0146235227584839,grad_norm: 0.9999989657553261, iteration: 206162
loss: 1.0257337093353271,grad_norm: 0.9999996445994838, iteration: 206163
loss: 0.9994156956672668,grad_norm: 0.9664715325603539, iteration: 206164
loss: 0.994900643825531,grad_norm: 0.999998922458916, iteration: 206165
loss: 1.0219552516937256,grad_norm: 0.9019915232440364, iteration: 206166
loss: 1.0040048360824585,grad_norm: 0.8878151216742154, iteration: 206167
loss: 1.0254350900650024,grad_norm: 0.8587925545949084, iteration: 206168
loss: 1.0135964155197144,grad_norm: 0.8404842443604372, iteration: 206169
loss: 0.9958484768867493,grad_norm: 0.7976659854113364, iteration: 206170
loss: 1.0234496593475342,grad_norm: 0.9660894674211101, iteration: 206171
loss: 1.028651475906372,grad_norm: 0.7916275656004852, iteration: 206172
loss: 0.9859862327575684,grad_norm: 0.9922097331979459, iteration: 206173
loss: 0.9947720766067505,grad_norm: 0.9999990814318416, iteration: 206174
loss: 1.005484938621521,grad_norm: 0.9999991126471596, iteration: 206175
loss: 1.121637225151062,grad_norm: 0.999999849813549, iteration: 206176
loss: 1.0025001764297485,grad_norm: 0.999999137381386, iteration: 206177
loss: 0.982602596282959,grad_norm: 0.8716658744726058, iteration: 206178
loss: 1.0171102285385132,grad_norm: 0.951219463447376, iteration: 206179
loss: 0.9990589022636414,grad_norm: 0.8543630527092492, iteration: 206180
loss: 0.9664773941040039,grad_norm: 0.7860947814699023, iteration: 206181
loss: 0.9857298731803894,grad_norm: 0.8547909685814191, iteration: 206182
loss: 1.034087061882019,grad_norm: 0.9999996018168592, iteration: 206183
loss: 0.9867625832557678,grad_norm: 0.9999990952755797, iteration: 206184
loss: 1.0147268772125244,grad_norm: 0.9999991384070033, iteration: 206185
loss: 0.9856639504432678,grad_norm: 0.9728480139644938, iteration: 206186
loss: 1.043807029724121,grad_norm: 0.9999991427372706, iteration: 206187
loss: 1.009788990020752,grad_norm: 0.8953507467958611, iteration: 206188
loss: 1.0021272897720337,grad_norm: 0.9999991126780288, iteration: 206189
loss: 0.979012131690979,grad_norm: 0.8733657232094268, iteration: 206190
loss: 1.0042577981948853,grad_norm: 0.9639970489402502, iteration: 206191
loss: 1.0224982500076294,grad_norm: 0.9864381227986411, iteration: 206192
loss: 0.988369345664978,grad_norm: 0.9446877543113672, iteration: 206193
loss: 0.9949031472206116,grad_norm: 0.9765724970571582, iteration: 206194
loss: 0.9929527640342712,grad_norm: 0.9999994262751021, iteration: 206195
loss: 1.0131964683532715,grad_norm: 0.9999991516562066, iteration: 206196
loss: 1.0363695621490479,grad_norm: 0.9280853536040529, iteration: 206197
loss: 0.9968175292015076,grad_norm: 0.9999991724651273, iteration: 206198
loss: 0.9826294183731079,grad_norm: 0.9999993661522156, iteration: 206199
loss: 0.9991704225540161,grad_norm: 0.9999991773235066, iteration: 206200
loss: 1.0027391910552979,grad_norm: 0.9803584001930153, iteration: 206201
loss: 1.0132027864456177,grad_norm: 0.9891433879671413, iteration: 206202
loss: 1.0919681787490845,grad_norm: 0.9999997113078362, iteration: 206203
loss: 0.9615616798400879,grad_norm: 0.7667564714964047, iteration: 206204
loss: 1.0086568593978882,grad_norm: 0.980958768837282, iteration: 206205
loss: 1.0325660705566406,grad_norm: 0.9999990811592022, iteration: 206206
loss: 0.9901955723762512,grad_norm: 0.9687702958146689, iteration: 206207
loss: 0.9569420218467712,grad_norm: 0.9999991913763252, iteration: 206208
loss: 0.9924893379211426,grad_norm: 0.8923326981961328, iteration: 206209
loss: 0.9999125599861145,grad_norm: 0.9999991305947058, iteration: 206210
loss: 1.050835371017456,grad_norm: 0.9235650619190403, iteration: 206211
loss: 1.0388263463974,grad_norm: 0.99999988081065, iteration: 206212
loss: 0.9842278361320496,grad_norm: 0.9999990610605963, iteration: 206213
loss: 1.0105342864990234,grad_norm: 0.9571269295562581, iteration: 206214
loss: 1.0100932121276855,grad_norm: 0.9517962156779998, iteration: 206215
loss: 1.0440703630447388,grad_norm: 0.9999990699802858, iteration: 206216
loss: 1.0092815160751343,grad_norm: 0.9930791822373644, iteration: 206217
loss: 1.042206048965454,grad_norm: 0.9088530169162063, iteration: 206218
loss: 1.0721418857574463,grad_norm: 0.9999992532783715, iteration: 206219
loss: 1.0266053676605225,grad_norm: 0.8844532437511327, iteration: 206220
loss: 1.003792405128479,grad_norm: 0.8870441985416444, iteration: 206221
loss: 0.9898605346679688,grad_norm: 0.9416750714647245, iteration: 206222
loss: 1.0159502029418945,grad_norm: 0.8036188208053021, iteration: 206223
loss: 0.9855644702911377,grad_norm: 0.9999991649650971, iteration: 206224
loss: 0.9559926390647888,grad_norm: 0.9829053832529371, iteration: 206225
loss: 0.9677507877349854,grad_norm: 0.838275177412097, iteration: 206226
loss: 1.0126135349273682,grad_norm: 0.927327408300172, iteration: 206227
loss: 1.012824535369873,grad_norm: 0.9999989994580252, iteration: 206228
loss: 1.002466082572937,grad_norm: 0.9197121216299339, iteration: 206229
loss: 1.0080407857894897,grad_norm: 0.8628149497629612, iteration: 206230
loss: 0.9878497123718262,grad_norm: 0.9782303028097976, iteration: 206231
loss: 0.9922754168510437,grad_norm: 0.9999991159708701, iteration: 206232
loss: 0.9878227710723877,grad_norm: 0.8872052984287873, iteration: 206233
loss: 0.9783115386962891,grad_norm: 0.8580857138542288, iteration: 206234
loss: 1.006050944328308,grad_norm: 0.9999989928742932, iteration: 206235
loss: 0.9993100762367249,grad_norm: 0.9238365602101598, iteration: 206236
loss: 0.9529063105583191,grad_norm: 0.9225839587804069, iteration: 206237
loss: 1.022406816482544,grad_norm: 0.9999993597950406, iteration: 206238
loss: 0.9855979084968567,grad_norm: 0.9195870715793539, iteration: 206239
loss: 0.9875540137290955,grad_norm: 0.84914793643811, iteration: 206240
loss: 1.0201109647750854,grad_norm: 0.9999988715323435, iteration: 206241
loss: 1.007997989654541,grad_norm: 0.9999992084992777, iteration: 206242
loss: 0.9880549907684326,grad_norm: 0.8571748382430744, iteration: 206243
loss: 1.0098333358764648,grad_norm: 0.9329459041899464, iteration: 206244
loss: 1.01822030544281,grad_norm: 0.964428469792422, iteration: 206245
loss: 1.0031465291976929,grad_norm: 0.9050496692349743, iteration: 206246
loss: 1.009755253791809,grad_norm: 0.7927677744758981, iteration: 206247
loss: 1.05035400390625,grad_norm: 0.9999992676950336, iteration: 206248
loss: 1.002402663230896,grad_norm: 0.8975235838423409, iteration: 206249
loss: 0.9935712218284607,grad_norm: 0.9231855800106088, iteration: 206250
loss: 1.01827073097229,grad_norm: 0.91922082676982, iteration: 206251
loss: 0.968891978263855,grad_norm: 0.9320947133504752, iteration: 206252
loss: 1.0059765577316284,grad_norm: 0.888204247731597, iteration: 206253
loss: 1.013382911682129,grad_norm: 0.930011382127833, iteration: 206254
loss: 0.9834936261177063,grad_norm: 0.9999991673671875, iteration: 206255
loss: 1.0107247829437256,grad_norm: 0.9999990760531653, iteration: 206256
loss: 1.0048261880874634,grad_norm: 0.9999992539199616, iteration: 206257
loss: 0.9501461386680603,grad_norm: 0.8416915939285269, iteration: 206258
loss: 0.9557760953903198,grad_norm: 0.9205421951009279, iteration: 206259
loss: 1.0141172409057617,grad_norm: 0.9438783214396783, iteration: 206260
loss: 0.9931214451789856,grad_norm: 0.8144629445859892, iteration: 206261
loss: 0.9790575504302979,grad_norm: 0.9585505512506256, iteration: 206262
loss: 1.0260413885116577,grad_norm: 0.8704596296471403, iteration: 206263
loss: 1.0014362335205078,grad_norm: 0.8821261568977709, iteration: 206264
loss: 0.9908620715141296,grad_norm: 0.9310646070497706, iteration: 206265
loss: 0.9967241287231445,grad_norm: 0.8932463038113241, iteration: 206266
loss: 0.9792539477348328,grad_norm: 0.9248352928270152, iteration: 206267
loss: 1.0204120874404907,grad_norm: 0.999999200307856, iteration: 206268
loss: 1.0041066408157349,grad_norm: 0.7829597714972198, iteration: 206269
loss: 1.0294764041900635,grad_norm: 0.9999990345473794, iteration: 206270
loss: 1.0101350545883179,grad_norm: 0.9999992978462449, iteration: 206271
loss: 0.970135509967804,grad_norm: 0.8060676922827733, iteration: 206272
loss: 0.9946971535682678,grad_norm: 0.9999990224369291, iteration: 206273
loss: 0.9957231283187866,grad_norm: 0.9999991137702153, iteration: 206274
loss: 0.9749794602394104,grad_norm: 0.9436427540343749, iteration: 206275
loss: 1.0049993991851807,grad_norm: 0.999998940266023, iteration: 206276
loss: 1.0215082168579102,grad_norm: 0.9226682331639737, iteration: 206277
loss: 1.0284572839736938,grad_norm: 0.7940037469742839, iteration: 206278
loss: 0.9886372685432434,grad_norm: 0.9999992000347155, iteration: 206279
loss: 1.0013078451156616,grad_norm: 0.8400711960434942, iteration: 206280
loss: 0.9981802701950073,grad_norm: 0.8595408746353408, iteration: 206281
loss: 1.0195170640945435,grad_norm: 0.9999990703802661, iteration: 206282
loss: 1.023269534111023,grad_norm: 0.9999993557343471, iteration: 206283
loss: 1.0011329650878906,grad_norm: 0.9892712961042797, iteration: 206284
loss: 1.0205543041229248,grad_norm: 0.9850376171316823, iteration: 206285
loss: 1.011390209197998,grad_norm: 0.9999991024629626, iteration: 206286
loss: 0.9908881187438965,grad_norm: 0.9999990187140172, iteration: 206287
loss: 0.9963181614875793,grad_norm: 0.9322468565468897, iteration: 206288
loss: 1.0061218738555908,grad_norm: 0.9999994111754394, iteration: 206289
loss: 0.9933224320411682,grad_norm: 0.9937310240784121, iteration: 206290
loss: 1.021356225013733,grad_norm: 0.9999991846569998, iteration: 206291
loss: 0.9552715420722961,grad_norm: 0.8977142810743425, iteration: 206292
loss: 1.0086159706115723,grad_norm: 0.9999994862715543, iteration: 206293
loss: 1.0008556842803955,grad_norm: 0.898467869682101, iteration: 206294
loss: 1.0363459587097168,grad_norm: 0.9999994851212477, iteration: 206295
loss: 0.9838950037956238,grad_norm: 0.9221313209281545, iteration: 206296
loss: 1.03694748878479,grad_norm: 0.9999990968700672, iteration: 206297
loss: 0.967149555683136,grad_norm: 0.9196395976028592, iteration: 206298
loss: 0.9956641793251038,grad_norm: 0.999999057310247, iteration: 206299
loss: 0.9962316751480103,grad_norm: 0.9999992503658058, iteration: 206300
loss: 0.9938423037528992,grad_norm: 0.9525300490114944, iteration: 206301
loss: 0.9914916157722473,grad_norm: 0.8134991660772317, iteration: 206302
loss: 0.9823764562606812,grad_norm: 0.9996179461417297, iteration: 206303
loss: 0.978607177734375,grad_norm: 0.9999989519151921, iteration: 206304
loss: 1.0215229988098145,grad_norm: 0.9999989917212571, iteration: 206305
loss: 1.01786208152771,grad_norm: 0.9999990910512206, iteration: 206306
loss: 0.9877049326896667,grad_norm: 0.9063007617757709, iteration: 206307
loss: 1.0004874467849731,grad_norm: 0.9306230754189424, iteration: 206308
loss: 0.977551281452179,grad_norm: 0.917824381644046, iteration: 206309
loss: 1.0238161087036133,grad_norm: 0.9999993290968247, iteration: 206310
loss: 1.0412585735321045,grad_norm: 0.9999992671118338, iteration: 206311
loss: 0.9993500709533691,grad_norm: 0.9999991095381815, iteration: 206312
loss: 0.9698481559753418,grad_norm: 0.9999989316972379, iteration: 206313
loss: 1.0414167642593384,grad_norm: 0.9767844222335474, iteration: 206314
loss: 1.003258228302002,grad_norm: 0.8876770048334514, iteration: 206315
loss: 1.0042476654052734,grad_norm: 0.9999991557448308, iteration: 206316
loss: 0.9684510827064514,grad_norm: 0.8106052383078964, iteration: 206317
loss: 1.0202902555465698,grad_norm: 0.8812383259661536, iteration: 206318
loss: 1.0088831186294556,grad_norm: 0.880683395502973, iteration: 206319
loss: 0.953254222869873,grad_norm: 0.9999990082699485, iteration: 206320
loss: 1.0181556940078735,grad_norm: 0.8891150481791585, iteration: 206321
loss: 0.9927328824996948,grad_norm: 0.97795263377605, iteration: 206322
loss: 0.9646440744400024,grad_norm: 0.9586611416712967, iteration: 206323
loss: 0.9934704303741455,grad_norm: 0.9793107033109612, iteration: 206324
loss: 1.0226554870605469,grad_norm: 0.999999248809744, iteration: 206325
loss: 0.9627256393432617,grad_norm: 0.8392206639632973, iteration: 206326
loss: 0.991455614566803,grad_norm: 0.9155960494788953, iteration: 206327
loss: 1.0151118040084839,grad_norm: 0.8797719385722821, iteration: 206328
loss: 0.9840160012245178,grad_norm: 0.9999991379535499, iteration: 206329
loss: 0.9926598072052002,grad_norm: 0.9037645323881648, iteration: 206330
loss: 1.0138068199157715,grad_norm: 0.840911304415617, iteration: 206331
loss: 1.0393885374069214,grad_norm: 0.9818176068983051, iteration: 206332
loss: 1.0198711156845093,grad_norm: 0.9999990237381124, iteration: 206333
loss: 1.028933048248291,grad_norm: 0.9999991016632843, iteration: 206334
loss: 1.018198847770691,grad_norm: 0.9402605570923089, iteration: 206335
loss: 1.0529022216796875,grad_norm: 0.9999993349369529, iteration: 206336
loss: 1.033474326133728,grad_norm: 0.8679636078797571, iteration: 206337
loss: 0.9866662621498108,grad_norm: 0.8970891006086485, iteration: 206338
loss: 0.9968615770339966,grad_norm: 0.8774439038806549, iteration: 206339
loss: 1.0417765378952026,grad_norm: 0.9999990363402587, iteration: 206340
loss: 1.0149348974227905,grad_norm: 0.9122948782005815, iteration: 206341
loss: 0.9797363877296448,grad_norm: 0.9999990494938688, iteration: 206342
loss: 0.9974943399429321,grad_norm: 0.8892469223448275, iteration: 206343
loss: 1.086648941040039,grad_norm: 0.9999998324812747, iteration: 206344
loss: 1.0216916799545288,grad_norm: 0.9999992914713215, iteration: 206345
loss: 1.0167359113693237,grad_norm: 0.9999990579034799, iteration: 206346
loss: 1.0164754390716553,grad_norm: 0.8495096379550924, iteration: 206347
loss: 1.0080598592758179,grad_norm: 0.9923844312814263, iteration: 206348
loss: 1.0008409023284912,grad_norm: 0.9999991912709689, iteration: 206349
loss: 0.9618328213691711,grad_norm: 0.9345281461302, iteration: 206350
loss: 0.9644941687583923,grad_norm: 0.9906739221292048, iteration: 206351
loss: 1.0098599195480347,grad_norm: 0.9999991883630387, iteration: 206352
loss: 1.0284830331802368,grad_norm: 0.9999991277625462, iteration: 206353
loss: 0.9730057716369629,grad_norm: 0.8151149399178073, iteration: 206354
loss: 1.0290749073028564,grad_norm: 0.9999991884281217, iteration: 206355
loss: 0.9888778924942017,grad_norm: 0.8510020080860691, iteration: 206356
loss: 1.00216805934906,grad_norm: 0.9108384996865135, iteration: 206357
loss: 1.025981068611145,grad_norm: 0.9999992939258552, iteration: 206358
loss: 1.014264464378357,grad_norm: 0.9931348625485596, iteration: 206359
loss: 1.0062263011932373,grad_norm: 0.9522621717768921, iteration: 206360
loss: 1.0183849334716797,grad_norm: 0.9027693588107326, iteration: 206361
loss: 1.0299161672592163,grad_norm: 0.9999992948299292, iteration: 206362
loss: 1.0096442699432373,grad_norm: 0.9800121389830637, iteration: 206363
loss: 0.9902278780937195,grad_norm: 0.970969119066773, iteration: 206364
loss: 1.0968841314315796,grad_norm: 0.9999993123068135, iteration: 206365
loss: 0.9500977396965027,grad_norm: 0.9708459557455282, iteration: 206366
loss: 0.9974105954170227,grad_norm: 0.7758132324312701, iteration: 206367
loss: 1.0225481986999512,grad_norm: 0.8045101822574255, iteration: 206368
loss: 0.9857242107391357,grad_norm: 0.9434402484015574, iteration: 206369
loss: 1.0217230319976807,grad_norm: 0.9431916630526286, iteration: 206370
loss: 0.9919126629829407,grad_norm: 0.9999992197934875, iteration: 206371
loss: 1.0528364181518555,grad_norm: 0.9590339448016906, iteration: 206372
loss: 0.976558268070221,grad_norm: 0.9212441474898287, iteration: 206373
loss: 0.9549039602279663,grad_norm: 0.8736228312185951, iteration: 206374
loss: 0.9633916020393372,grad_norm: 0.8851565643660423, iteration: 206375
loss: 0.9948785901069641,grad_norm: 0.8944831313218872, iteration: 206376
loss: 1.0034668445587158,grad_norm: 0.7845920728415784, iteration: 206377
loss: 0.9889808297157288,grad_norm: 0.9890537452018693, iteration: 206378
loss: 1.0017750263214111,grad_norm: 0.9411370900045342, iteration: 206379
loss: 1.013291358947754,grad_norm: 0.881745173005789, iteration: 206380
loss: 1.0203341245651245,grad_norm: 0.9348847289701783, iteration: 206381
loss: 1.0010120868682861,grad_norm: 0.904218201170878, iteration: 206382
loss: 0.9945113658905029,grad_norm: 0.9999991934573156, iteration: 206383
loss: 0.9832819104194641,grad_norm: 0.9999991195360534, iteration: 206384
loss: 0.9903875589370728,grad_norm: 0.9999990908847042, iteration: 206385
loss: 0.9984313249588013,grad_norm: 0.9999991341591399, iteration: 206386
loss: 0.9600174427032471,grad_norm: 0.9938572685071437, iteration: 206387
loss: 0.9807227253913879,grad_norm: 0.9999991529841339, iteration: 206388
loss: 1.0024762153625488,grad_norm: 0.9999992771552241, iteration: 206389
loss: 0.9759843349456787,grad_norm: 0.9999991834985816, iteration: 206390
loss: 1.0274803638458252,grad_norm: 0.8444381106812482, iteration: 206391
loss: 1.0000083446502686,grad_norm: 0.9406666516672687, iteration: 206392
loss: 0.999515950679779,grad_norm: 0.9372786856341812, iteration: 206393
loss: 1.0118076801300049,grad_norm: 0.9393757633641533, iteration: 206394
loss: 0.9980688095092773,grad_norm: 0.9185011977187619, iteration: 206395
loss: 1.0017539262771606,grad_norm: 0.8274177464103925, iteration: 206396
loss: 1.0134053230285645,grad_norm: 0.9692022350145245, iteration: 206397
loss: 1.0057698488235474,grad_norm: 0.904435554499978, iteration: 206398
loss: 0.9822144508361816,grad_norm: 0.8762029339849261, iteration: 206399
loss: 0.9890691637992859,grad_norm: 0.9999991984790185, iteration: 206400
loss: 0.9972202777862549,grad_norm: 0.9193369613330313, iteration: 206401
loss: 0.9888343811035156,grad_norm: 0.9999991081280662, iteration: 206402
loss: 1.0060733556747437,grad_norm: 0.9999990924730952, iteration: 206403
loss: 0.9874405860900879,grad_norm: 0.9574020669386427, iteration: 206404
loss: 1.0245240926742554,grad_norm: 0.7958081365751064, iteration: 206405
loss: 1.00087571144104,grad_norm: 0.9999989947003718, iteration: 206406
loss: 1.001975178718567,grad_norm: 0.9402245952022008, iteration: 206407
loss: 0.9664901494979858,grad_norm: 0.9317979269473254, iteration: 206408
loss: 0.9971797466278076,grad_norm: 0.8708220375212585, iteration: 206409
loss: 1.0093945264816284,grad_norm: 0.8409674119669103, iteration: 206410
loss: 0.9837468266487122,grad_norm: 0.99999909138646, iteration: 206411
loss: 1.0810669660568237,grad_norm: 0.9999990887076973, iteration: 206412
loss: 1.0045804977416992,grad_norm: 0.8521846476860572, iteration: 206413
loss: 0.9646857976913452,grad_norm: 0.9999991151582447, iteration: 206414
loss: 0.9975598454475403,grad_norm: 0.9525555184395555, iteration: 206415
loss: 0.9766406416893005,grad_norm: 0.9999991471610915, iteration: 206416
loss: 1.0271114110946655,grad_norm: 0.99999915165988, iteration: 206417
loss: 1.0273308753967285,grad_norm: 0.9999991803649578, iteration: 206418
loss: 1.0398831367492676,grad_norm: 0.9416042002163063, iteration: 206419
loss: 0.9871379137039185,grad_norm: 0.8281257239210297, iteration: 206420
loss: 1.0150768756866455,grad_norm: 0.777305841208483, iteration: 206421
loss: 0.9767376184463501,grad_norm: 0.9999989808798834, iteration: 206422
loss: 1.0516393184661865,grad_norm: 0.8851763732989232, iteration: 206423
loss: 1.0235313177108765,grad_norm: 0.8138745228251508, iteration: 206424
loss: 1.0254110097885132,grad_norm: 0.9999993487877317, iteration: 206425
loss: 0.9914165735244751,grad_norm: 0.9748697684517733, iteration: 206426
loss: 1.0029925107955933,grad_norm: 0.9999990566009183, iteration: 206427
loss: 1.031990885734558,grad_norm: 0.9999992188821235, iteration: 206428
loss: 0.9941182136535645,grad_norm: 0.9999991383225186, iteration: 206429
loss: 1.0072746276855469,grad_norm: 0.967385258802108, iteration: 206430
loss: 0.9811061024665833,grad_norm: 0.8970529086727995, iteration: 206431
loss: 1.0134921073913574,grad_norm: 0.9999991790247836, iteration: 206432
loss: 1.0819588899612427,grad_norm: 0.9999992499007608, iteration: 206433
loss: 0.9713206887245178,grad_norm: 0.8005816135343335, iteration: 206434
loss: 0.9883337616920471,grad_norm: 0.9999990610028142, iteration: 206435
loss: 0.9786731004714966,grad_norm: 0.9701860703560922, iteration: 206436
loss: 1.0260136127471924,grad_norm: 0.8732614262386883, iteration: 206437
loss: 1.0081567764282227,grad_norm: 0.8338734395071001, iteration: 206438
loss: 0.9922783970832825,grad_norm: 0.9999991140917488, iteration: 206439
loss: 1.0123471021652222,grad_norm: 0.9999996465697839, iteration: 206440
loss: 0.9694622159004211,grad_norm: 0.9083661800794524, iteration: 206441
loss: 1.012229323387146,grad_norm: 0.8230994265410505, iteration: 206442
loss: 1.041481375694275,grad_norm: 0.9581563909365353, iteration: 206443
loss: 1.0206058025360107,grad_norm: 0.9999991243720975, iteration: 206444
loss: 1.0307490825653076,grad_norm: 0.9999991263918775, iteration: 206445
loss: 0.9898192286491394,grad_norm: 0.9999990752585808, iteration: 206446
loss: 1.0227707624435425,grad_norm: 0.9713816943213694, iteration: 206447
loss: 0.995551586151123,grad_norm: 0.9885168112165813, iteration: 206448
loss: 1.0081396102905273,grad_norm: 0.9999990936313821, iteration: 206449
loss: 0.9982571005821228,grad_norm: 0.9219458527503693, iteration: 206450
loss: 0.9704164266586304,grad_norm: 0.9510694073121555, iteration: 206451
loss: 1.0326372385025024,grad_norm: 0.9568045465467939, iteration: 206452
loss: 0.9655216336250305,grad_norm: 0.9002076990733008, iteration: 206453
loss: 1.0026029348373413,grad_norm: 0.8392365801154291, iteration: 206454
loss: 0.9723368883132935,grad_norm: 0.8594856855901002, iteration: 206455
loss: 1.0115610361099243,grad_norm: 0.8727538077862226, iteration: 206456
loss: 0.9653364419937134,grad_norm: 0.9999991192783655, iteration: 206457
loss: 1.021333932876587,grad_norm: 0.9999992063597429, iteration: 206458
loss: 0.9645238518714905,grad_norm: 0.9999990591176777, iteration: 206459
loss: 0.9725395441055298,grad_norm: 0.9838458143952115, iteration: 206460
loss: 1.0121674537658691,grad_norm: 0.9090866615636932, iteration: 206461
loss: 1.0053727626800537,grad_norm: 0.9999991409671605, iteration: 206462
loss: 1.023844599723816,grad_norm: 0.8534184970168748, iteration: 206463
loss: 1.0267268419265747,grad_norm: 0.8467072258210693, iteration: 206464
loss: 0.9643673896789551,grad_norm: 0.9339997228434395, iteration: 206465
loss: 0.9985364675521851,grad_norm: 0.9820425985981718, iteration: 206466
loss: 0.9754999279975891,grad_norm: 0.9999998507365556, iteration: 206467
loss: 0.9934722185134888,grad_norm: 0.8919216820908316, iteration: 206468
loss: 1.000759243965149,grad_norm: 0.8259665851563275, iteration: 206469
loss: 1.005440592765808,grad_norm: 0.8929999553860963, iteration: 206470
loss: 1.051164150238037,grad_norm: 0.9999995081137987, iteration: 206471
loss: 1.0125948190689087,grad_norm: 0.945331761808085, iteration: 206472
loss: 1.0279512405395508,grad_norm: 0.999999044918663, iteration: 206473
loss: 1.0038316249847412,grad_norm: 0.9794760527949744, iteration: 206474
loss: 1.018263339996338,grad_norm: 0.9999990790413475, iteration: 206475
loss: 1.0144964456558228,grad_norm: 0.9581745522044455, iteration: 206476
loss: 1.0308465957641602,grad_norm: 0.8971029204795057, iteration: 206477
loss: 1.0069470405578613,grad_norm: 0.7875191833992424, iteration: 206478
loss: 1.0124658346176147,grad_norm: 0.9368919334620455, iteration: 206479
loss: 1.0355143547058105,grad_norm: 0.9999990939815181, iteration: 206480
loss: 0.9761975407600403,grad_norm: 0.8145892543353884, iteration: 206481
loss: 0.9759383797645569,grad_norm: 0.9941851379542582, iteration: 206482
loss: 1.1347122192382812,grad_norm: 0.9999998598834229, iteration: 206483
loss: 0.9965184330940247,grad_norm: 0.9999999522894231, iteration: 206484
loss: 1.0385005474090576,grad_norm: 0.9450966040193494, iteration: 206485
loss: 0.9834821224212646,grad_norm: 0.993363792066386, iteration: 206486
loss: 0.9862285852432251,grad_norm: 0.9999989933584814, iteration: 206487
loss: 1.0182970762252808,grad_norm: 0.993194784322338, iteration: 206488
loss: 0.9851502180099487,grad_norm: 0.9907052342526508, iteration: 206489
loss: 1.02863347530365,grad_norm: 0.9787222613168871, iteration: 206490
loss: 1.0137412548065186,grad_norm: 0.9085500698746342, iteration: 206491
loss: 1.050158977508545,grad_norm: 0.997466532996887, iteration: 206492
loss: 1.0103005170822144,grad_norm: 0.9552504551131497, iteration: 206493
loss: 1.0314160585403442,grad_norm: 0.9591147367557885, iteration: 206494
loss: 1.0120137929916382,grad_norm: 0.9536048255799343, iteration: 206495
loss: 0.9862521886825562,grad_norm: 0.9134435029826147, iteration: 206496
loss: 0.9887266755104065,grad_norm: 0.9999992257390264, iteration: 206497
loss: 1.0466421842575073,grad_norm: 0.9999998913852302, iteration: 206498
loss: 1.0083266496658325,grad_norm: 0.9870252612981966, iteration: 206499
loss: 1.0081084966659546,grad_norm: 0.9999990891705043, iteration: 206500
loss: 0.9932001233100891,grad_norm: 0.9394788987271891, iteration: 206501
loss: 1.0241615772247314,grad_norm: 0.890884690555013, iteration: 206502
loss: 1.0565881729125977,grad_norm: 0.9999997606744181, iteration: 206503
loss: 1.0196701288223267,grad_norm: 0.942468414623748, iteration: 206504
loss: 0.9817952513694763,grad_norm: 0.9555029957232785, iteration: 206505
loss: 0.9727001190185547,grad_norm: 0.9076874345522862, iteration: 206506
loss: 1.0101830959320068,grad_norm: 0.9999991990207641, iteration: 206507
loss: 1.0666993856430054,grad_norm: 0.9703138786572981, iteration: 206508
loss: 1.001060962677002,grad_norm: 0.9999996511215572, iteration: 206509
loss: 1.0149521827697754,grad_norm: 0.709745557775047, iteration: 206510
loss: 1.029689908027649,grad_norm: 0.8811601316173634, iteration: 206511
loss: 0.9807040095329285,grad_norm: 0.9999991055518522, iteration: 206512
loss: 1.02759850025177,grad_norm: 0.9999992689477367, iteration: 206513
loss: 1.0323861837387085,grad_norm: 0.9640143815168026, iteration: 206514
loss: 0.978443443775177,grad_norm: 0.8778842763940773, iteration: 206515
loss: 0.9969772100448608,grad_norm: 0.9999991803271766, iteration: 206516
loss: 1.026168704032898,grad_norm: 0.912184818961582, iteration: 206517
loss: 0.9982239603996277,grad_norm: 0.8406668836434419, iteration: 206518
loss: 0.9646713137626648,grad_norm: 0.8824702082075208, iteration: 206519
loss: 1.045399785041809,grad_norm: 0.9399397470745207, iteration: 206520
loss: 1.0117861032485962,grad_norm: 0.8695419463129298, iteration: 206521
loss: 0.9687131643295288,grad_norm: 0.9257350913705379, iteration: 206522
loss: 1.0045857429504395,grad_norm: 0.9244960793451872, iteration: 206523
loss: 0.977820873260498,grad_norm: 0.894271934484603, iteration: 206524
loss: 1.0225460529327393,grad_norm: 0.9999991767597922, iteration: 206525
loss: 0.9857074618339539,grad_norm: 0.9999992199799009, iteration: 206526
loss: 0.9892182946205139,grad_norm: 0.908286488285462, iteration: 206527
loss: 1.0090603828430176,grad_norm: 0.9160279704839619, iteration: 206528
loss: 1.0176161527633667,grad_norm: 0.9999990540627646, iteration: 206529
loss: 1.0188316106796265,grad_norm: 0.9999990561580733, iteration: 206530
loss: 1.06074059009552,grad_norm: 0.999999334561544, iteration: 206531
loss: 0.9872075915336609,grad_norm: 0.8408498109580312, iteration: 206532
loss: 1.0050244331359863,grad_norm: 0.9999991504784297, iteration: 206533
loss: 1.0112615823745728,grad_norm: 0.9983055515090598, iteration: 206534
loss: 1.0317444801330566,grad_norm: 0.9999996343343333, iteration: 206535
loss: 1.048581838607788,grad_norm: 0.9999990483245477, iteration: 206536
loss: 1.0256929397583008,grad_norm: 0.9829699522109244, iteration: 206537
loss: 1.0057628154754639,grad_norm: 0.999999144940836, iteration: 206538
loss: 1.0158090591430664,grad_norm: 0.9901939319815858, iteration: 206539
loss: 0.9816724061965942,grad_norm: 0.8051011764503107, iteration: 206540
loss: 1.0342050790786743,grad_norm: 0.99999931792509, iteration: 206541
loss: 0.9729006290435791,grad_norm: 0.8750271780040402, iteration: 206542
loss: 0.9563840627670288,grad_norm: 0.912099874708795, iteration: 206543
loss: 0.9823718667030334,grad_norm: 0.9999992881558813, iteration: 206544
loss: 0.9975070357322693,grad_norm: 0.799649181022024, iteration: 206545
loss: 0.9713020920753479,grad_norm: 0.9999989708008487, iteration: 206546
loss: 1.0900986194610596,grad_norm: 0.9999996386352279, iteration: 206547
loss: 1.022040843963623,grad_norm: 0.9898652523607425, iteration: 206548
loss: 1.1031328439712524,grad_norm: 0.9999997423543502, iteration: 206549
loss: 1.0851086378097534,grad_norm: 0.9707002696248053, iteration: 206550
loss: 1.029396653175354,grad_norm: 0.8745603630923795, iteration: 206551
loss: 0.9917594790458679,grad_norm: 0.9773329227747398, iteration: 206552
loss: 0.9904128909111023,grad_norm: 0.7958162092874416, iteration: 206553
loss: 0.9495571255683899,grad_norm: 0.8733124925717002, iteration: 206554
loss: 0.9705244302749634,grad_norm: 0.8441887383483735, iteration: 206555
loss: 0.9987377524375916,grad_norm: 0.9999991379258273, iteration: 206556
loss: 0.9760345220565796,grad_norm: 0.9992883517238694, iteration: 206557
loss: 1.0882190465927124,grad_norm: 0.9999991685168811, iteration: 206558
loss: 1.3148202896118164,grad_norm: 0.9999995407165873, iteration: 206559
loss: 1.1727458238601685,grad_norm: 0.9999995034966361, iteration: 206560
loss: 1.0541610717773438,grad_norm: 0.9999993655039973, iteration: 206561
loss: 1.0106537342071533,grad_norm: 0.8364722525634262, iteration: 206562
loss: 0.9702451825141907,grad_norm: 0.9837272687532777, iteration: 206563
loss: 1.00287926197052,grad_norm: 0.9999990203986351, iteration: 206564
loss: 0.9988153576850891,grad_norm: 0.8119055626362259, iteration: 206565
loss: 1.009056806564331,grad_norm: 0.9999995230820349, iteration: 206566
loss: 1.0738447904586792,grad_norm: 0.9999996782233951, iteration: 206567
loss: 1.0838161706924438,grad_norm: 0.9999995575799953, iteration: 206568
loss: 1.0262577533721924,grad_norm: 0.9999992407189497, iteration: 206569
loss: 1.0206955671310425,grad_norm: 0.9999993247137676, iteration: 206570
loss: 1.0324949026107788,grad_norm: 0.9999991955633214, iteration: 206571
loss: 1.007552981376648,grad_norm: 0.9999989828471652, iteration: 206572
loss: 1.0355331897735596,grad_norm: 0.9999995316239488, iteration: 206573
loss: 1.0100659132003784,grad_norm: 0.9999991795073992, iteration: 206574
loss: 0.9909651279449463,grad_norm: 0.9352750465736567, iteration: 206575
loss: 1.052662968635559,grad_norm: 0.9405758441613006, iteration: 206576
loss: 1.0403996706008911,grad_norm: 0.9999991242292929, iteration: 206577
loss: 1.0069754123687744,grad_norm: 0.9999989318058249, iteration: 206578
loss: 0.9946156144142151,grad_norm: 0.9849520885653046, iteration: 206579
loss: 1.048490285873413,grad_norm: 0.9999991570788506, iteration: 206580
loss: 1.0572720766067505,grad_norm: 0.9999997370896616, iteration: 206581
loss: 1.0395101308822632,grad_norm: 0.978126938797806, iteration: 206582
loss: 0.9723293781280518,grad_norm: 0.9633703844372117, iteration: 206583
loss: 1.009092092514038,grad_norm: 0.9999992338169834, iteration: 206584
loss: 0.9985270500183105,grad_norm: 0.9999992248666066, iteration: 206585
loss: 0.9938632845878601,grad_norm: 0.964137315455469, iteration: 206586
loss: 1.1026350259780884,grad_norm: 0.9999997110860629, iteration: 206587
loss: 1.0179346799850464,grad_norm: 0.9999993434455504, iteration: 206588
loss: 0.962177574634552,grad_norm: 0.8648123150190938, iteration: 206589
loss: 1.018203616142273,grad_norm: 0.9655895200632736, iteration: 206590
loss: 1.0130493640899658,grad_norm: 0.9423373598058551, iteration: 206591
loss: 1.009461760520935,grad_norm: 0.804947074720431, iteration: 206592
loss: 1.0316016674041748,grad_norm: 0.9999989613072766, iteration: 206593
loss: 1.06376314163208,grad_norm: 0.976766129866683, iteration: 206594
loss: 1.0214191675186157,grad_norm: 0.9999989838897119, iteration: 206595
loss: 1.0075533390045166,grad_norm: 0.9578226166336862, iteration: 206596
loss: 1.004353642463684,grad_norm: 0.9130482110501968, iteration: 206597
loss: 0.9844785928726196,grad_norm: 0.9305053034336306, iteration: 206598
loss: 0.9793403744697571,grad_norm: 0.9999991560021915, iteration: 206599
loss: 1.0050890445709229,grad_norm: 0.9931093799886733, iteration: 206600
loss: 1.0289620161056519,grad_norm: 0.9999991451854203, iteration: 206601
loss: 1.0031641721725464,grad_norm: 0.9085635230941757, iteration: 206602
loss: 0.9899796843528748,grad_norm: 0.7653209840851574, iteration: 206603
loss: 1.0159180164337158,grad_norm: 0.8423094533580336, iteration: 206604
loss: 0.9853467345237732,grad_norm: 0.9999992610974496, iteration: 206605
loss: 0.9704175591468811,grad_norm: 0.9505480929185899, iteration: 206606
loss: 0.9975264072418213,grad_norm: 0.9999990570005673, iteration: 206607
loss: 1.0366188287734985,grad_norm: 0.9934664290266731, iteration: 206608
loss: 0.9894008040428162,grad_norm: 0.9692375959427424, iteration: 206609
loss: 1.011677861213684,grad_norm: 0.8630616431756504, iteration: 206610
loss: 0.997322678565979,grad_norm: 0.8843869106667905, iteration: 206611
loss: 1.0143423080444336,grad_norm: 0.8357070024149376, iteration: 206612
loss: 1.0252408981323242,grad_norm: 0.7566672635565627, iteration: 206613
loss: 0.9798428416252136,grad_norm: 0.6939264093740726, iteration: 206614
loss: 0.999151885509491,grad_norm: 0.9486140735532639, iteration: 206615
loss: 0.9816508293151855,grad_norm: 0.9200635217269665, iteration: 206616
loss: 1.0089112520217896,grad_norm: 0.8613695828757355, iteration: 206617
loss: 0.9771707653999329,grad_norm: 0.8505509685215468, iteration: 206618
loss: 1.008341670036316,grad_norm: 0.9587281383928236, iteration: 206619
loss: 0.9996946454048157,grad_norm: 0.9999990626721869, iteration: 206620
loss: 0.9931486248970032,grad_norm: 0.9861288514815609, iteration: 206621
loss: 1.0162022113800049,grad_norm: 0.999999591257328, iteration: 206622
loss: 0.9925529360771179,grad_norm: 0.9020275470252215, iteration: 206623
loss: 1.0167163610458374,grad_norm: 0.9226807587438174, iteration: 206624
loss: 0.9614847898483276,grad_norm: 0.999999172643174, iteration: 206625
loss: 0.9955682158470154,grad_norm: 0.9999993260710035, iteration: 206626
loss: 0.9568629860877991,grad_norm: 0.9999994898799248, iteration: 206627
loss: 1.009321928024292,grad_norm: 0.9999992580121854, iteration: 206628
loss: 1.0321564674377441,grad_norm: 0.9999994186748544, iteration: 206629
loss: 1.0165565013885498,grad_norm: 0.9550973111994949, iteration: 206630
loss: 0.9814826846122742,grad_norm: 0.999999771999635, iteration: 206631
loss: 1.0481441020965576,grad_norm: 0.999999229464926, iteration: 206632
loss: 1.0196692943572998,grad_norm: 0.8197590687749169, iteration: 206633
loss: 0.9719641208648682,grad_norm: 0.7698399914307734, iteration: 206634
loss: 0.9904373288154602,grad_norm: 0.8902029729281722, iteration: 206635
loss: 0.9967358708381653,grad_norm: 0.9999998034378752, iteration: 206636
loss: 0.9899607300758362,grad_norm: 0.8893521100186051, iteration: 206637
loss: 1.010219931602478,grad_norm: 0.999999069754338, iteration: 206638
loss: 1.0626710653305054,grad_norm: 0.999999124501152, iteration: 206639
loss: 1.034432053565979,grad_norm: 0.897613453813303, iteration: 206640
loss: 0.9803996086120605,grad_norm: 0.9773020097060544, iteration: 206641
loss: 0.9958544969558716,grad_norm: 0.8586741898521913, iteration: 206642
loss: 0.9671331644058228,grad_norm: 0.9999989704078394, iteration: 206643
loss: 1.0820802450180054,grad_norm: 0.9999993122003241, iteration: 206644
loss: 1.0324710607528687,grad_norm: 0.9999998902744818, iteration: 206645
loss: 1.04096257686615,grad_norm: 0.9999996560419115, iteration: 206646
loss: 0.988560676574707,grad_norm: 0.9999990896075392, iteration: 206647
loss: 1.0290032625198364,grad_norm: 0.9733229619715466, iteration: 206648
loss: 1.0043154954910278,grad_norm: 0.8215538976821295, iteration: 206649
loss: 1.018890380859375,grad_norm: 0.8866875630222798, iteration: 206650
loss: 1.0228157043457031,grad_norm: 0.9999992782690501, iteration: 206651
loss: 1.0441789627075195,grad_norm: 0.959807419401659, iteration: 206652
loss: 0.9810742139816284,grad_norm: 0.7801156390357736, iteration: 206653
loss: 1.014840841293335,grad_norm: 0.9294352320897141, iteration: 206654
loss: 1.000538945198059,grad_norm: 0.9144816377609281, iteration: 206655
loss: 0.9931795001029968,grad_norm: 0.8045122200396282, iteration: 206656
loss: 1.0279384851455688,grad_norm: 0.9183986207359531, iteration: 206657
loss: 1.0031780004501343,grad_norm: 0.9999990920753311, iteration: 206658
loss: 0.9631190299987793,grad_norm: 0.9655351375570715, iteration: 206659
loss: 1.014216423034668,grad_norm: 0.9999991384622834, iteration: 206660
loss: 1.0249879360198975,grad_norm: 0.9767777750758068, iteration: 206661
loss: 0.978967547416687,grad_norm: 0.9999991596674037, iteration: 206662
loss: 0.9866865277290344,grad_norm: 0.8482867339595732, iteration: 206663
loss: 0.9651892781257629,grad_norm: 0.9226626406299234, iteration: 206664
loss: 0.9639134407043457,grad_norm: 0.9489394887737012, iteration: 206665
loss: 1.0116572380065918,grad_norm: 0.9370702244719125, iteration: 206666
loss: 1.0245544910430908,grad_norm: 0.999999329439644, iteration: 206667
loss: 0.9650174379348755,grad_norm: 0.9630748270491769, iteration: 206668
loss: 0.9920846223831177,grad_norm: 0.8978184690297903, iteration: 206669
loss: 0.9614103436470032,grad_norm: 0.9015220869771251, iteration: 206670
loss: 1.0130122900009155,grad_norm: 0.9999992818680125, iteration: 206671
loss: 0.9780802130699158,grad_norm: 0.9572041729556502, iteration: 206672
loss: 1.0185672044754028,grad_norm: 0.9999990328249762, iteration: 206673
loss: 0.9328311085700989,grad_norm: 0.9999989928230333, iteration: 206674
loss: 0.9841441512107849,grad_norm: 0.9999989759213602, iteration: 206675
loss: 0.9780420660972595,grad_norm: 0.9999990622047714, iteration: 206676
loss: 1.0135915279388428,grad_norm: 0.9999990973594487, iteration: 206677
loss: 1.0019819736480713,grad_norm: 0.9924907939391905, iteration: 206678
loss: 1.0074172019958496,grad_norm: 0.999999197534028, iteration: 206679
loss: 1.016649603843689,grad_norm: 0.7884119260958159, iteration: 206680
loss: 1.0268837213516235,grad_norm: 0.9715415628640648, iteration: 206681
loss: 1.009116768836975,grad_norm: 0.9043420981295016, iteration: 206682
loss: 1.0567777156829834,grad_norm: 0.9999993557881636, iteration: 206683
loss: 1.0328701734542847,grad_norm: 0.999999234770841, iteration: 206684
loss: 0.990506112575531,grad_norm: 0.940521555629991, iteration: 206685
loss: 0.9962566494941711,grad_norm: 0.9999991346441736, iteration: 206686
loss: 1.023366928100586,grad_norm: 0.9507333970948717, iteration: 206687
loss: 0.9687157869338989,grad_norm: 0.8602263607356786, iteration: 206688
loss: 0.9947435259819031,grad_norm: 0.9999991671173386, iteration: 206689
loss: 1.0831687450408936,grad_norm: 0.999999916604304, iteration: 206690
loss: 0.9952751398086548,grad_norm: 0.8393265905653385, iteration: 206691
loss: 0.998849630355835,grad_norm: 0.9324310762444725, iteration: 206692
loss: 0.9872626066207886,grad_norm: 0.8412224320355122, iteration: 206693
loss: 1.0182502269744873,grad_norm: 0.9107530680371119, iteration: 206694
loss: 0.9589422345161438,grad_norm: 0.9999990784781363, iteration: 206695
loss: 1.0176323652267456,grad_norm: 0.9999994206661943, iteration: 206696
loss: 1.0167638063430786,grad_norm: 0.9999991626442212, iteration: 206697
loss: 1.0052788257598877,grad_norm: 0.9793819364548438, iteration: 206698
loss: 0.9915749430656433,grad_norm: 0.9999994292178936, iteration: 206699
loss: 1.0025559663772583,grad_norm: 0.8469281352852934, iteration: 206700
loss: 0.9946311116218567,grad_norm: 0.9999991957496603, iteration: 206701
loss: 1.0672122240066528,grad_norm: 0.9999991428603594, iteration: 206702
loss: 0.9752764701843262,grad_norm: 0.9473194171541507, iteration: 206703
loss: 1.017008900642395,grad_norm: 0.9999990748393722, iteration: 206704
loss: 0.9921762347221375,grad_norm: 0.8830514032876743, iteration: 206705
loss: 1.1084916591644287,grad_norm: 0.9999995667533782, iteration: 206706
loss: 0.9907112717628479,grad_norm: 0.9999991235655472, iteration: 206707
loss: 1.0077275037765503,grad_norm: 0.999999603955129, iteration: 206708
loss: 1.0739283561706543,grad_norm: 0.9999997699881195, iteration: 206709
loss: 0.9852985143661499,grad_norm: 0.9431632349599222, iteration: 206710
loss: 0.9892405271530151,grad_norm: 0.9638710818536285, iteration: 206711
loss: 0.991968035697937,grad_norm: 0.9253664868059565, iteration: 206712
loss: 1.0037490129470825,grad_norm: 0.9290294727504651, iteration: 206713
loss: 1.026134729385376,grad_norm: 0.9999990845508093, iteration: 206714
loss: 1.0109411478042603,grad_norm: 0.9999991354903531, iteration: 206715
loss: 0.975831151008606,grad_norm: 0.9999995637659429, iteration: 206716
loss: 0.9793977737426758,grad_norm: 0.8453884497720319, iteration: 206717
loss: 1.0109807252883911,grad_norm: 0.9999990876617961, iteration: 206718
loss: 1.0027670860290527,grad_norm: 0.8445899208601407, iteration: 206719
loss: 0.9685375690460205,grad_norm: 0.7689749828423705, iteration: 206720
loss: 0.9723877310752869,grad_norm: 0.9999991124812987, iteration: 206721
loss: 1.0032795667648315,grad_norm: 0.9999992907334982, iteration: 206722
loss: 0.9732790589332581,grad_norm: 0.8089276657958684, iteration: 206723
loss: 1.0096583366394043,grad_norm: 0.8363156235317076, iteration: 206724
loss: 1.063031554222107,grad_norm: 0.9999992855085565, iteration: 206725
loss: 1.062633752822876,grad_norm: 0.9999996957837466, iteration: 206726
loss: 1.0045047998428345,grad_norm: 0.9614746592914653, iteration: 206727
loss: 1.0159960985183716,grad_norm: 0.9999990190188046, iteration: 206728
loss: 1.0149110555648804,grad_norm: 0.8924557636190689, iteration: 206729
loss: 0.9964502453804016,grad_norm: 0.9999990420923417, iteration: 206730
loss: 0.985612690448761,grad_norm: 0.963880410820484, iteration: 206731
loss: 0.9881464838981628,grad_norm: 0.9227605308908507, iteration: 206732
loss: 1.0343331098556519,grad_norm: 0.9999995821300955, iteration: 206733
loss: 1.0162689685821533,grad_norm: 0.99999960552738, iteration: 206734
loss: 0.9892765879631042,grad_norm: 0.907801564381933, iteration: 206735
loss: 1.0319435596466064,grad_norm: 0.7866644858355328, iteration: 206736
loss: 0.9796889424324036,grad_norm: 0.9999996284722185, iteration: 206737
loss: 0.9874907732009888,grad_norm: 0.9999991030587081, iteration: 206738
loss: 1.025709867477417,grad_norm: 0.9999992713111991, iteration: 206739
loss: 0.9599389433860779,grad_norm: 0.8839445404660881, iteration: 206740
loss: 1.006562352180481,grad_norm: 0.9999991849630557, iteration: 206741
loss: 1.2158581018447876,grad_norm: 0.9999999799247269, iteration: 206742
loss: 0.9758411049842834,grad_norm: 0.9999991317638962, iteration: 206743
loss: 1.0033512115478516,grad_norm: 0.8225749350650751, iteration: 206744
loss: 0.984407901763916,grad_norm: 0.9389340491158389, iteration: 206745
loss: 1.0154153108596802,grad_norm: 0.8712523269300749, iteration: 206746
loss: 1.0894392728805542,grad_norm: 0.9999989467370075, iteration: 206747
loss: 1.0094189643859863,grad_norm: 0.8995043350913574, iteration: 206748
loss: 0.9596391320228577,grad_norm: 0.9152485702259834, iteration: 206749
loss: 1.01494300365448,grad_norm: 0.6888548671463449, iteration: 206750
loss: 1.0304640531539917,grad_norm: 0.9999992005610921, iteration: 206751
loss: 0.9854837656021118,grad_norm: 0.9999992350798974, iteration: 206752
loss: 0.986719012260437,grad_norm: 0.9241740682768912, iteration: 206753
loss: 1.0204813480377197,grad_norm: 0.9999993017887396, iteration: 206754
loss: 0.958814799785614,grad_norm: 0.9999990682159219, iteration: 206755
loss: 0.9868255257606506,grad_norm: 0.9001264810495988, iteration: 206756
loss: 0.9878734946250916,grad_norm: 0.9999990588135095, iteration: 206757
loss: 1.0609999895095825,grad_norm: 0.9999999238982048, iteration: 206758
loss: 0.9659512639045715,grad_norm: 0.9999991631646737, iteration: 206759
loss: 1.0063343048095703,grad_norm: 0.9999990460742331, iteration: 206760
loss: 1.081641435623169,grad_norm: 0.9678701700748756, iteration: 206761
loss: 0.9977534413337708,grad_norm: 0.7754091447887306, iteration: 206762
loss: 1.0487314462661743,grad_norm: 0.8829779832229707, iteration: 206763
loss: 1.029406189918518,grad_norm: 0.9999997547691661, iteration: 206764
loss: 0.9620137214660645,grad_norm: 0.9550118047670431, iteration: 206765
loss: 1.010731816291809,grad_norm: 0.9608104018884241, iteration: 206766
loss: 0.9986057877540588,grad_norm: 0.9999990611799111, iteration: 206767
loss: 0.9803105592727661,grad_norm: 0.9899534217532844, iteration: 206768
loss: 1.0268120765686035,grad_norm: 0.9999991461525947, iteration: 206769
loss: 1.001968502998352,grad_norm: 0.9999992046447348, iteration: 206770
loss: 0.9495359659194946,grad_norm: 0.999999173737256, iteration: 206771
loss: 1.049622893333435,grad_norm: 0.9223417202591783, iteration: 206772
loss: 1.0122482776641846,grad_norm: 0.9999991058582175, iteration: 206773
loss: 0.9764646887779236,grad_norm: 0.9792723798255889, iteration: 206774
loss: 1.010988712310791,grad_norm: 0.9999993088732602, iteration: 206775
loss: 1.015478253364563,grad_norm: 0.9999990564508486, iteration: 206776
loss: 0.9801810383796692,grad_norm: 0.9476172891345196, iteration: 206777
loss: 0.9931477308273315,grad_norm: 0.9999990965783291, iteration: 206778
loss: 0.9973107576370239,grad_norm: 0.8842024563747211, iteration: 206779
loss: 1.0018486976623535,grad_norm: 0.9999992397379874, iteration: 206780
loss: 1.0176643133163452,grad_norm: 0.8999544041062367, iteration: 206781
loss: 0.9773263335227966,grad_norm: 0.9941528130913058, iteration: 206782
loss: 1.021759033203125,grad_norm: 0.9999991412576749, iteration: 206783
loss: 1.049605369567871,grad_norm: 0.9999995927120113, iteration: 206784
loss: 1.0346800088882446,grad_norm: 0.9999994449560546, iteration: 206785
loss: 1.0244330167770386,grad_norm: 0.999999122237303, iteration: 206786
loss: 0.988005518913269,grad_norm: 0.961392789979257, iteration: 206787
loss: 0.9709159135818481,grad_norm: 0.8016694412424485, iteration: 206788
loss: 1.0225998163223267,grad_norm: 0.8241142803236096, iteration: 206789
loss: 0.9994357228279114,grad_norm: 0.8829567771071795, iteration: 206790
loss: 0.9933053851127625,grad_norm: 0.9014477818809561, iteration: 206791
loss: 1.0080269575119019,grad_norm: 0.999999153084644, iteration: 206792
loss: 1.0103306770324707,grad_norm: 0.8845220415659542, iteration: 206793
loss: 1.0080281496047974,grad_norm: 0.9999990652920216, iteration: 206794
loss: 1.0177019834518433,grad_norm: 0.82957668555268, iteration: 206795
loss: 1.013620138168335,grad_norm: 0.8985983294325717, iteration: 206796
loss: 1.0130611658096313,grad_norm: 0.9900659479944843, iteration: 206797
loss: 0.9717519283294678,grad_norm: 0.9999990929899376, iteration: 206798
loss: 1.1011760234832764,grad_norm: 0.9999996743704372, iteration: 206799
loss: 1.008050799369812,grad_norm: 0.9999989547717751, iteration: 206800
loss: 0.9879841208457947,grad_norm: 0.853199110498332, iteration: 206801
loss: 1.0143824815750122,grad_norm: 0.9999992576279615, iteration: 206802
loss: 0.9924452304840088,grad_norm: 0.7896378493473355, iteration: 206803
loss: 0.9557573199272156,grad_norm: 0.989745002390692, iteration: 206804
loss: 0.9964341521263123,grad_norm: 0.8806823994666448, iteration: 206805
loss: 1.0238661766052246,grad_norm: 0.9999990943773169, iteration: 206806
loss: 0.9934866428375244,grad_norm: 0.8847540852656269, iteration: 206807
loss: 0.9666063189506531,grad_norm: 0.9999991502842811, iteration: 206808
loss: 1.0278619527816772,grad_norm: 0.9999989340850134, iteration: 206809
loss: 1.092484951019287,grad_norm: 0.999999039996676, iteration: 206810
loss: 0.9837546944618225,grad_norm: 0.9271999049541291, iteration: 206811
loss: 0.9914629459381104,grad_norm: 0.8805680569140766, iteration: 206812
loss: 1.0030851364135742,grad_norm: 0.9999990220490308, iteration: 206813
loss: 1.0005347728729248,grad_norm: 0.8011446013201002, iteration: 206814
loss: 1.0167045593261719,grad_norm: 0.9999991880411119, iteration: 206815
loss: 0.9893653392791748,grad_norm: 0.989588331966991, iteration: 206816
loss: 1.0294145345687866,grad_norm: 0.9255638432033555, iteration: 206817
loss: 0.980195164680481,grad_norm: 0.999999323576331, iteration: 206818
loss: 1.0306490659713745,grad_norm: 0.9999995514003798, iteration: 206819
loss: 0.9894245266914368,grad_norm: 0.9333622946406934, iteration: 206820
loss: 1.0247926712036133,grad_norm: 0.9999990057176719, iteration: 206821
loss: 1.0074152946472168,grad_norm: 0.9017518706539949, iteration: 206822
loss: 0.9782450199127197,grad_norm: 0.9999991712538899, iteration: 206823
loss: 0.999411404132843,grad_norm: 0.9379249515527955, iteration: 206824
loss: 0.961320161819458,grad_norm: 0.8340959950193312, iteration: 206825
loss: 1.0104451179504395,grad_norm: 0.9407584991815128, iteration: 206826
loss: 0.9944329261779785,grad_norm: 0.8927465098555909, iteration: 206827
loss: 1.0090396404266357,grad_norm: 0.9999991325878685, iteration: 206828
loss: 0.9975175857543945,grad_norm: 0.8259454253462694, iteration: 206829
loss: 0.9871415495872498,grad_norm: 0.9069876096199292, iteration: 206830
loss: 1.002048134803772,grad_norm: 0.999999074138124, iteration: 206831
loss: 0.9882012605667114,grad_norm: 0.9999990217340234, iteration: 206832
loss: 1.0070310831069946,grad_norm: 0.7564781182894044, iteration: 206833
loss: 0.9972544312477112,grad_norm: 0.9999990804650664, iteration: 206834
loss: 1.017999529838562,grad_norm: 0.9999990259938452, iteration: 206835
loss: 0.9994798898696899,grad_norm: 0.9999991605668145, iteration: 206836
loss: 0.9864292144775391,grad_norm: 0.9149477119950299, iteration: 206837
loss: 0.9930109977722168,grad_norm: 0.9999991382119091, iteration: 206838
loss: 1.0037249326705933,grad_norm: 0.8855938878532288, iteration: 206839
loss: 0.9752269983291626,grad_norm: 0.8476074696504682, iteration: 206840
loss: 0.9869630932807922,grad_norm: 0.8480365537886956, iteration: 206841
loss: 0.9912447333335876,grad_norm: 0.8807220956281465, iteration: 206842
loss: 0.9854572415351868,grad_norm: 0.9029407849695219, iteration: 206843
loss: 0.9951484203338623,grad_norm: 0.9540606690143274, iteration: 206844
loss: 0.9959763884544373,grad_norm: 0.9999991712804095, iteration: 206845
loss: 0.9884200692176819,grad_norm: 0.8376700660444928, iteration: 206846
loss: 1.0136736631393433,grad_norm: 0.999999077825266, iteration: 206847
loss: 0.9853150844573975,grad_norm: 0.999999079099274, iteration: 206848
loss: 1.0304369926452637,grad_norm: 0.9999991670126401, iteration: 206849
loss: 1.0552195310592651,grad_norm: 0.9798625936649108, iteration: 206850
loss: 0.9660161733627319,grad_norm: 0.9839940562930957, iteration: 206851
loss: 0.966960072517395,grad_norm: 0.9999991402606402, iteration: 206852
loss: 1.0260658264160156,grad_norm: 0.94046895325812, iteration: 206853
loss: 1.0061671733856201,grad_norm: 0.9864947307992777, iteration: 206854
loss: 1.0001275539398193,grad_norm: 0.9999991007381748, iteration: 206855
loss: 0.9582961797714233,grad_norm: 0.9830202091601304, iteration: 206856
loss: 1.0060843229293823,grad_norm: 0.8076513646259132, iteration: 206857
loss: 1.0164071321487427,grad_norm: 0.9999991074440243, iteration: 206858
loss: 0.9964902997016907,grad_norm: 0.8827576338843828, iteration: 206859
loss: 0.9891435503959656,grad_norm: 0.9999991967789317, iteration: 206860
loss: 0.978361964225769,grad_norm: 0.9158432517676925, iteration: 206861
loss: 0.9774134159088135,grad_norm: 0.8606187334823439, iteration: 206862
loss: 1.049665093421936,grad_norm: 0.999999617640563, iteration: 206863
loss: 0.9995883703231812,grad_norm: 0.9136577950636217, iteration: 206864
loss: 1.036930799484253,grad_norm: 0.9617462525744558, iteration: 206865
loss: 0.987277626991272,grad_norm: 0.8380720593418266, iteration: 206866
loss: 1.0008186101913452,grad_norm: 0.8727874522189256, iteration: 206867
loss: 1.000632405281067,grad_norm: 0.9999991823820159, iteration: 206868
loss: 0.988521158695221,grad_norm: 0.9722372722370428, iteration: 206869
loss: 0.9795780777931213,grad_norm: 0.9999992011267147, iteration: 206870
loss: 0.9871334433555603,grad_norm: 0.999999286867901, iteration: 206871
loss: 0.978413999080658,grad_norm: 0.967425468651545, iteration: 206872
loss: 0.9948602318763733,grad_norm: 0.9252487630353293, iteration: 206873
loss: 0.9946579337120056,grad_norm: 0.7771975455180239, iteration: 206874
loss: 1.015175461769104,grad_norm: 0.9252421485548218, iteration: 206875
loss: 0.9732842445373535,grad_norm: 0.8612325274685153, iteration: 206876
loss: 0.9676523804664612,grad_norm: 0.9999990270612649, iteration: 206877
loss: 0.970407247543335,grad_norm: 0.9682817166411042, iteration: 206878
loss: 1.0182507038116455,grad_norm: 0.9999992597584697, iteration: 206879
loss: 0.9941815733909607,grad_norm: 0.9593453227196269, iteration: 206880
loss: 1.0053274631500244,grad_norm: 0.8154991660094142, iteration: 206881
loss: 0.9623674154281616,grad_norm: 0.8846102250440344, iteration: 206882
loss: 1.0136116743087769,grad_norm: 0.987853865820829, iteration: 206883
loss: 1.0023123025894165,grad_norm: 0.9347863915577885, iteration: 206884
loss: 1.0529125928878784,grad_norm: 0.849914000496863, iteration: 206885
loss: 1.006624698638916,grad_norm: 0.7838276908067362, iteration: 206886
loss: 0.9656899571418762,grad_norm: 0.9499090603356221, iteration: 206887
loss: 0.9530278444290161,grad_norm: 0.9999991972142682, iteration: 206888
loss: 0.9860048890113831,grad_norm: 0.9107005339468193, iteration: 206889
loss: 1.0471540689468384,grad_norm: 1.0000000119123378, iteration: 206890
loss: 0.9799715280532837,grad_norm: 0.861340768060986, iteration: 206891
loss: 0.9818642735481262,grad_norm: 0.9656401177282746, iteration: 206892
loss: 1.0063230991363525,grad_norm: 0.9563094967438698, iteration: 206893
loss: 0.9957842826843262,grad_norm: 0.8779574649915699, iteration: 206894
loss: 0.9962142705917358,grad_norm: 0.9342929739920471, iteration: 206895
loss: 0.9676092863082886,grad_norm: 0.9105098029855544, iteration: 206896
loss: 0.9958212375640869,grad_norm: 0.9999990524192632, iteration: 206897
loss: 1.0119597911834717,grad_norm: 0.9999990963560348, iteration: 206898
loss: 1.0449621677398682,grad_norm: 0.9804926068458887, iteration: 206899
loss: 1.019044280052185,grad_norm: 0.8337518332324682, iteration: 206900
loss: 0.9821895360946655,grad_norm: 0.8247686806323152, iteration: 206901
loss: 0.9947236776351929,grad_norm: 0.9999991233940048, iteration: 206902
loss: 0.9932109117507935,grad_norm: 0.9999990869807903, iteration: 206903
loss: 0.9844739437103271,grad_norm: 0.9150444462736372, iteration: 206904
loss: 1.0330382585525513,grad_norm: 0.8743001071709328, iteration: 206905
loss: 0.9894920587539673,grad_norm: 0.9999989787157179, iteration: 206906
loss: 0.9802102446556091,grad_norm: 0.7722584964079058, iteration: 206907
loss: 0.9901030659675598,grad_norm: 0.9999992522426958, iteration: 206908
loss: 0.9977647662162781,grad_norm: 0.9355246000657803, iteration: 206909
loss: 0.9993547797203064,grad_norm: 0.8331451105901125, iteration: 206910
loss: 0.9853109121322632,grad_norm: 0.961664848494195, iteration: 206911
loss: 1.0201289653778076,grad_norm: 0.8252411872223242, iteration: 206912
loss: 0.9627789258956909,grad_norm: 0.7423032250271404, iteration: 206913
loss: 0.9414350390434265,grad_norm: 0.8576034214674669, iteration: 206914
loss: 1.0799052715301514,grad_norm: 0.9999991525917852, iteration: 206915
loss: 0.9867410659790039,grad_norm: 0.9999990078605814, iteration: 206916
loss: 0.9865943789482117,grad_norm: 0.9153548197165422, iteration: 206917
loss: 1.0190014839172363,grad_norm: 0.9252918945922682, iteration: 206918
loss: 1.0137125253677368,grad_norm: 0.9999989604191845, iteration: 206919
loss: 0.981232762336731,grad_norm: 0.8812216782456502, iteration: 206920
loss: 0.9542415142059326,grad_norm: 0.999999107664072, iteration: 206921
loss: 1.0189155340194702,grad_norm: 0.9483480852203866, iteration: 206922
loss: 1.010646939277649,grad_norm: 0.9999995687035803, iteration: 206923
loss: 1.0295839309692383,grad_norm: 0.855435072471886, iteration: 206924
loss: 1.2613091468811035,grad_norm: 0.9999996370247458, iteration: 206925
loss: 1.017077922821045,grad_norm: 0.9999992361819202, iteration: 206926
loss: 0.9966118335723877,grad_norm: 0.9999993296864187, iteration: 206927
loss: 1.0441863536834717,grad_norm: 0.9398463201286561, iteration: 206928
loss: 0.9789806008338928,grad_norm: 0.7141266545999843, iteration: 206929
loss: 0.9815125465393066,grad_norm: 0.8803190109586841, iteration: 206930
loss: 0.9947821497917175,grad_norm: 0.999999356110479, iteration: 206931
loss: 0.9948366284370422,grad_norm: 0.9999993369149158, iteration: 206932
loss: 0.9988098740577698,grad_norm: 0.8383852627263667, iteration: 206933
loss: 1.0358271598815918,grad_norm: 0.9999990634909665, iteration: 206934
loss: 1.1990618705749512,grad_norm: 0.9999999866425907, iteration: 206935
loss: 0.991407036781311,grad_norm: 0.9345811605493688, iteration: 206936
loss: 0.9946104288101196,grad_norm: 0.9685438371920366, iteration: 206937
loss: 1.0145927667617798,grad_norm: 0.9999992470512805, iteration: 206938
loss: 1.0494561195373535,grad_norm: 0.9999991839049742, iteration: 206939
loss: 1.1808470487594604,grad_norm: 0.9999996985087779, iteration: 206940
loss: 1.0191397666931152,grad_norm: 0.999999715946473, iteration: 206941
loss: 1.0254496335983276,grad_norm: 0.96956224153867, iteration: 206942
loss: 1.2092859745025635,grad_norm: 0.9999999116445643, iteration: 206943
loss: 0.9769172072410583,grad_norm: 0.9947555308792778, iteration: 206944
loss: 0.9851248860359192,grad_norm: 0.8777368558753553, iteration: 206945
loss: 1.0528229475021362,grad_norm: 0.9999995356378925, iteration: 206946
loss: 0.9853922128677368,grad_norm: 0.9999991211398683, iteration: 206947
loss: 0.9946382641792297,grad_norm: 0.9774157184825086, iteration: 206948
loss: 1.0039801597595215,grad_norm: 0.8382156572471999, iteration: 206949
loss: 1.01651132106781,grad_norm: 0.790581845751704, iteration: 206950
loss: 0.9877084493637085,grad_norm: 0.8504474142912415, iteration: 206951
loss: 1.005551815032959,grad_norm: 0.9474776542708412, iteration: 206952
loss: 0.9661634564399719,grad_norm: 0.9335453006581748, iteration: 206953
loss: 1.0083808898925781,grad_norm: 0.9225531525112728, iteration: 206954
loss: 1.0511056184768677,grad_norm: 0.9999990730379337, iteration: 206955
loss: 1.0216234922409058,grad_norm: 0.8741440856956096, iteration: 206956
loss: 0.9779013395309448,grad_norm: 0.9999992935175952, iteration: 206957
loss: 0.970109760761261,grad_norm: 0.8029007734822926, iteration: 206958
loss: 0.9706426858901978,grad_norm: 0.8917173186273304, iteration: 206959
loss: 0.9784591197967529,grad_norm: 0.8935054671479745, iteration: 206960
loss: 1.0090454816818237,grad_norm: 0.9999990145500521, iteration: 206961
loss: 0.9641809463500977,grad_norm: 0.8172216193829153, iteration: 206962
loss: 1.0224257707595825,grad_norm: 0.9516245242474558, iteration: 206963
loss: 1.007607340812683,grad_norm: 0.9914987754191618, iteration: 206964
loss: 0.9703797101974487,grad_norm: 0.8652560878124852, iteration: 206965
loss: 1.0055477619171143,grad_norm: 0.9999989310477594, iteration: 206966
loss: 0.9913309216499329,grad_norm: 0.9170307767793763, iteration: 206967
loss: 0.9985898733139038,grad_norm: 0.8889425248160236, iteration: 206968
loss: 1.0132153034210205,grad_norm: 0.881751695029752, iteration: 206969
loss: 0.9921489357948303,grad_norm: 0.7775200976117322, iteration: 206970
loss: 0.9789865612983704,grad_norm: 0.9999992300729933, iteration: 206971
loss: 1.005946397781372,grad_norm: 0.8428625220382834, iteration: 206972
loss: 1.011634111404419,grad_norm: 0.8863984042090897, iteration: 206973
loss: 0.995429515838623,grad_norm: 0.9999995854979731, iteration: 206974
loss: 0.9832258224487305,grad_norm: 0.9999990277069545, iteration: 206975
loss: 0.9959104657173157,grad_norm: 0.9383616722724579, iteration: 206976
loss: 1.0047783851623535,grad_norm: 0.9999990443187416, iteration: 206977
loss: 1.0320415496826172,grad_norm: 0.9050791399276003, iteration: 206978
loss: 0.9838760495185852,grad_norm: 0.895388321182636, iteration: 206979
loss: 1.001865267753601,grad_norm: 0.999999180198271, iteration: 206980
loss: 1.0324763059616089,grad_norm: 0.9123311195595468, iteration: 206981
loss: 0.978135883808136,grad_norm: 0.999999091463255, iteration: 206982
loss: 1.0071982145309448,grad_norm: 0.9120981881769931, iteration: 206983
loss: 0.9821019768714905,grad_norm: 0.831305087987512, iteration: 206984
loss: 1.0580322742462158,grad_norm: 0.9999995979632574, iteration: 206985
loss: 1.04763662815094,grad_norm: 0.9999992110219398, iteration: 206986
loss: 1.2288079261779785,grad_norm: 0.9999999695489814, iteration: 206987
loss: 1.0117541551589966,grad_norm: 0.9999996412354866, iteration: 206988
loss: 1.008233904838562,grad_norm: 0.9217240290921626, iteration: 206989
loss: 1.010245442390442,grad_norm: 0.8828209652410695, iteration: 206990
loss: 1.073508858680725,grad_norm: 0.999999841652988, iteration: 206991
loss: 0.9969077706336975,grad_norm: 0.9999991586873308, iteration: 206992
loss: 0.9358556270599365,grad_norm: 0.9842464861817048, iteration: 206993
loss: 0.9867834448814392,grad_norm: 0.9999990456234493, iteration: 206994
loss: 1.0043237209320068,grad_norm: 0.9737897342561209, iteration: 206995
loss: 0.9882211089134216,grad_norm: 0.9999991961987157, iteration: 206996
loss: 1.0628752708435059,grad_norm: 0.9999991669941182, iteration: 206997
loss: 1.0441089868545532,grad_norm: 0.9999990207201417, iteration: 206998
loss: 1.0184146165847778,grad_norm: 0.9040682521623514, iteration: 206999
loss: 0.9968874454498291,grad_norm: 0.9162851497129748, iteration: 207000
loss: 1.0201538801193237,grad_norm: 0.9357204835354471, iteration: 207001
loss: 0.9678986668586731,grad_norm: 0.9916638555707055, iteration: 207002
loss: 1.015909194946289,grad_norm: 0.9999990997168373, iteration: 207003
loss: 1.0021398067474365,grad_norm: 0.9726820365118629, iteration: 207004
loss: 0.9346237778663635,grad_norm: 0.9873630168931922, iteration: 207005
loss: 1.0183402299880981,grad_norm: 0.9999999149117403, iteration: 207006
loss: 1.0529788732528687,grad_norm: 0.9999991338418379, iteration: 207007
loss: 1.0268462896347046,grad_norm: 0.9564437881323452, iteration: 207008
loss: 1.059244155883789,grad_norm: 0.9999999705195349, iteration: 207009
loss: 1.14800226688385,grad_norm: 0.9999996310068539, iteration: 207010
loss: 1.1636825799942017,grad_norm: 0.9999994524809911, iteration: 207011
loss: 0.97263503074646,grad_norm: 0.9115461529353119, iteration: 207012
loss: 1.2075806856155396,grad_norm: 0.9999998696368423, iteration: 207013
loss: 1.0103813409805298,grad_norm: 0.903767005693784, iteration: 207014
loss: 0.9929860830307007,grad_norm: 0.9107622423403244, iteration: 207015
loss: 1.025651454925537,grad_norm: 0.9999992758948416, iteration: 207016
loss: 1.0272630453109741,grad_norm: 0.9589817798117343, iteration: 207017
loss: 1.0361838340759277,grad_norm: 0.9999990875022788, iteration: 207018
loss: 0.9972705841064453,grad_norm: 0.9999992033582717, iteration: 207019
loss: 1.0163053274154663,grad_norm: 0.8155068238153754, iteration: 207020
loss: 0.9911504983901978,grad_norm: 0.9662940960826105, iteration: 207021
loss: 1.0285903215408325,grad_norm: 0.9787674201171026, iteration: 207022
loss: 1.0500421524047852,grad_norm: 1.0000000134344733, iteration: 207023
loss: 1.0087553262710571,grad_norm: 0.9999990426482225, iteration: 207024
loss: 1.3280956745147705,grad_norm: 0.999999654368582, iteration: 207025
loss: 0.9897729158401489,grad_norm: 0.9999991713166366, iteration: 207026
loss: 1.0138849020004272,grad_norm: 0.9999991077856702, iteration: 207027
loss: 1.0428413152694702,grad_norm: 0.9999992942976623, iteration: 207028
loss: 1.037686824798584,grad_norm: 0.8785015837488687, iteration: 207029
loss: 1.0431125164031982,grad_norm: 0.9869984310828508, iteration: 207030
loss: 0.9786490797996521,grad_norm: 0.8811475880012559, iteration: 207031
loss: 1.0099737644195557,grad_norm: 0.9878942260809334, iteration: 207032
loss: 0.9992835521697998,grad_norm: 0.9999990640263234, iteration: 207033
loss: 0.9845779538154602,grad_norm: 0.854963672298794, iteration: 207034
loss: 1.024976372718811,grad_norm: 0.9999991395298323, iteration: 207035
loss: 0.9983956813812256,grad_norm: 0.9638618017098264, iteration: 207036
loss: 1.0445703268051147,grad_norm: 0.999999821781883, iteration: 207037
loss: 0.997386634349823,grad_norm: 0.9999992071675154, iteration: 207038
loss: 1.0192047357559204,grad_norm: 0.9999990816558597, iteration: 207039
loss: 0.9966893196105957,grad_norm: 0.967966676036571, iteration: 207040
loss: 1.0020581483840942,grad_norm: 0.9363934163578204, iteration: 207041
loss: 0.9978662133216858,grad_norm: 0.999999130120042, iteration: 207042
loss: 1.220650315284729,grad_norm: 0.9999993328396279, iteration: 207043
loss: 1.0736281871795654,grad_norm: 0.9999991957245994, iteration: 207044
loss: 0.9792557954788208,grad_norm: 0.9999992057959944, iteration: 207045
loss: 1.051681637763977,grad_norm: 0.9999997825631234, iteration: 207046
loss: 1.037465214729309,grad_norm: 0.8953135023435708, iteration: 207047
loss: 0.9716935753822327,grad_norm: 0.9895159033440211, iteration: 207048
loss: 1.0071954727172852,grad_norm: 0.9999990158168834, iteration: 207049
loss: 0.9889405369758606,grad_norm: 0.9999995984614665, iteration: 207050
loss: 1.0454134941101074,grad_norm: 0.8488570717003234, iteration: 207051
loss: 1.0412919521331787,grad_norm: 0.8762817805763088, iteration: 207052
loss: 0.9896778464317322,grad_norm: 0.9999993106766549, iteration: 207053
loss: 1.040211796760559,grad_norm: 0.9317031766619835, iteration: 207054
loss: 0.9762521386146545,grad_norm: 0.859042751131566, iteration: 207055
loss: 1.0207239389419556,grad_norm: 0.9999990692393045, iteration: 207056
loss: 1.012854814529419,grad_norm: 0.9598471724567478, iteration: 207057
loss: 0.9980377554893494,grad_norm: 0.999999231254637, iteration: 207058
loss: 0.9863004684448242,grad_norm: 0.9999993585557807, iteration: 207059
loss: 1.002511739730835,grad_norm: 0.8753999811265113, iteration: 207060
loss: 1.0169990062713623,grad_norm: 0.9981846441527898, iteration: 207061
loss: 0.9687418937683105,grad_norm: 0.9999990636137894, iteration: 207062
loss: 1.0334758758544922,grad_norm: 0.9999995433684591, iteration: 207063
loss: 1.0039819478988647,grad_norm: 0.9461983085536996, iteration: 207064
loss: 0.9985936284065247,grad_norm: 0.9581817073581029, iteration: 207065
loss: 1.002160906791687,grad_norm: 0.9999990924804408, iteration: 207066
loss: 1.008245825767517,grad_norm: 0.9315020316857008, iteration: 207067
loss: 1.0152055025100708,grad_norm: 0.999999209238073, iteration: 207068
loss: 1.0161190032958984,grad_norm: 0.9999991392366664, iteration: 207069
loss: 1.0071892738342285,grad_norm: 0.8521437845664963, iteration: 207070
loss: 0.9941920638084412,grad_norm: 0.9797490501350684, iteration: 207071
loss: 0.9925422072410583,grad_norm: 0.975775988345653, iteration: 207072
loss: 0.9894624948501587,grad_norm: 0.8873523175960459, iteration: 207073
loss: 1.0083221197128296,grad_norm: 0.9999996360099056, iteration: 207074
loss: 1.0228739976882935,grad_norm: 0.9204987900131296, iteration: 207075
loss: 1.0325807332992554,grad_norm: 0.9999989617051069, iteration: 207076
loss: 0.9713960886001587,grad_norm: 0.8766444816683083, iteration: 207077
loss: 0.9696075916290283,grad_norm: 0.8368207037037875, iteration: 207078
loss: 0.9979729652404785,grad_norm: 0.9999991317568128, iteration: 207079
loss: 0.9843758940696716,grad_norm: 0.9648729017505437, iteration: 207080
loss: 0.9929424524307251,grad_norm: 0.8971752704881883, iteration: 207081
loss: 1.0309056043624878,grad_norm: 0.9370610952005882, iteration: 207082
loss: 0.9989190697669983,grad_norm: 0.982919465070905, iteration: 207083
loss: 0.9919251799583435,grad_norm: 0.914346232278501, iteration: 207084
loss: 1.0443720817565918,grad_norm: 0.9999992013763526, iteration: 207085
loss: 0.9959368109703064,grad_norm: 0.9999992333908182, iteration: 207086
loss: 0.9600430727005005,grad_norm: 0.9071781553168503, iteration: 207087
loss: 0.9989507794380188,grad_norm: 0.9999992371587149, iteration: 207088
loss: 0.9865983724594116,grad_norm: 0.9500268948980466, iteration: 207089
loss: 0.9831642508506775,grad_norm: 0.8372379591635758, iteration: 207090
loss: 0.9490614533424377,grad_norm: 0.9559581539667183, iteration: 207091
loss: 1.0152337551116943,grad_norm: 0.9190947010109406, iteration: 207092
loss: 0.9962442517280579,grad_norm: 0.966649715756457, iteration: 207093
loss: 0.9827938079833984,grad_norm: 0.9999991345051229, iteration: 207094
loss: 1.0006990432739258,grad_norm: 0.8938484985276595, iteration: 207095
loss: 0.9940701723098755,grad_norm: 0.9999993357490674, iteration: 207096
loss: 1.0485748052597046,grad_norm: 0.999999118614828, iteration: 207097
loss: 1.0058032274246216,grad_norm: 0.9999994306096311, iteration: 207098
loss: 1.024527907371521,grad_norm: 0.9565514331106131, iteration: 207099
loss: 0.9552407264709473,grad_norm: 0.8772362577917028, iteration: 207100
loss: 1.0006673336029053,grad_norm: 0.9059355272535855, iteration: 207101
loss: 0.999698281288147,grad_norm: 0.9999992137502232, iteration: 207102
loss: 0.9970253705978394,grad_norm: 0.999999197807361, iteration: 207103
loss: 0.9718477129936218,grad_norm: 0.9999989904155591, iteration: 207104
loss: 0.9955399632453918,grad_norm: 0.9999990788355266, iteration: 207105
loss: 1.017331600189209,grad_norm: 0.8883305199562064, iteration: 207106
loss: 0.9917565584182739,grad_norm: 0.9423336773934745, iteration: 207107
loss: 0.9965723752975464,grad_norm: 0.9528703219453972, iteration: 207108
loss: 1.0302633047103882,grad_norm: 0.9999996543993215, iteration: 207109
loss: 1.0061911344528198,grad_norm: 0.9999995798079526, iteration: 207110
loss: 0.9819338917732239,grad_norm: 0.9114071808430685, iteration: 207111
loss: 0.9896283745765686,grad_norm: 0.8218843828830383, iteration: 207112
loss: 1.0168777704238892,grad_norm: 0.999999165633335, iteration: 207113
loss: 0.986652135848999,grad_norm: 0.9999990387973812, iteration: 207114
loss: 1.0048565864562988,grad_norm: 0.842035613182811, iteration: 207115
loss: 0.9762597680091858,grad_norm: 0.9999992186070776, iteration: 207116
loss: 0.999871015548706,grad_norm: 0.7441741572151005, iteration: 207117
loss: 0.987001895904541,grad_norm: 0.8418784304314851, iteration: 207118
loss: 1.0243538618087769,grad_norm: 0.9999990650355357, iteration: 207119
loss: 0.972964882850647,grad_norm: 0.9973547674890204, iteration: 207120
loss: 0.9965262413024902,grad_norm: 0.9999991307906296, iteration: 207121
loss: 1.0359408855438232,grad_norm: 0.8990144040180111, iteration: 207122
loss: 0.974712073802948,grad_norm: 0.9074977418018508, iteration: 207123
loss: 1.0077025890350342,grad_norm: 0.9007942105484279, iteration: 207124
loss: 1.0203042030334473,grad_norm: 0.9452988251342633, iteration: 207125
loss: 1.0170197486877441,grad_norm: 0.9999992843607126, iteration: 207126
loss: 0.9647253751754761,grad_norm: 0.9406912802810031, iteration: 207127
loss: 1.0053445100784302,grad_norm: 0.9471736313500697, iteration: 207128
loss: 1.0113465785980225,grad_norm: 0.999999588083029, iteration: 207129
loss: 0.9877474308013916,grad_norm: 0.9946121751352457, iteration: 207130
loss: 1.0200490951538086,grad_norm: 0.992659672277479, iteration: 207131
loss: 0.98419189453125,grad_norm: 0.9999990330780623, iteration: 207132
loss: 1.037892460823059,grad_norm: 0.8319202037831892, iteration: 207133
loss: 1.0410853624343872,grad_norm: 0.9999990534065376, iteration: 207134
loss: 1.0011136531829834,grad_norm: 0.9661793461144988, iteration: 207135
loss: 1.01374351978302,grad_norm: 0.9999990213329638, iteration: 207136
loss: 1.0079102516174316,grad_norm: 0.7962062231519542, iteration: 207137
loss: 1.040358543395996,grad_norm: 0.9571591613796657, iteration: 207138
loss: 1.026394009590149,grad_norm: 0.9043108541110131, iteration: 207139
loss: 1.058971881866455,grad_norm: 0.9999999382992352, iteration: 207140
loss: 0.9629266262054443,grad_norm: 0.9999996227648246, iteration: 207141
loss: 1.0091543197631836,grad_norm: 0.9999995292751487, iteration: 207142
loss: 0.9974609613418579,grad_norm: 0.9999990601226246, iteration: 207143
loss: 0.9555866718292236,grad_norm: 0.8283256996112256, iteration: 207144
loss: 1.0034621953964233,grad_norm: 0.7570185052122081, iteration: 207145
loss: 1.0025674104690552,grad_norm: 0.8611993274379491, iteration: 207146
loss: 0.9925026893615723,grad_norm: 0.9999989898186223, iteration: 207147
loss: 1.0327563285827637,grad_norm: 0.999999173059005, iteration: 207148
loss: 1.0108230113983154,grad_norm: 0.8109895476150764, iteration: 207149
loss: 0.957426905632019,grad_norm: 0.8799267311756181, iteration: 207150
loss: 1.0158495903015137,grad_norm: 0.9999990388258608, iteration: 207151
loss: 1.0080654621124268,grad_norm: 0.947850319928886, iteration: 207152
loss: 0.9740400314331055,grad_norm: 0.9999990460653975, iteration: 207153
loss: 1.0267608165740967,grad_norm: 0.9166252243698514, iteration: 207154
loss: 1.0280588865280151,grad_norm: 0.9646094667581515, iteration: 207155
loss: 1.1149804592132568,grad_norm: 0.9999997240153518, iteration: 207156
loss: 0.9901657104492188,grad_norm: 0.9673778435502542, iteration: 207157
loss: 0.9908735156059265,grad_norm: 0.9821063814471513, iteration: 207158
loss: 0.9955127835273743,grad_norm: 0.9115044335843877, iteration: 207159
loss: 1.0170347690582275,grad_norm: 0.810629255948086, iteration: 207160
loss: 0.9872975945472717,grad_norm: 0.9465654176622965, iteration: 207161
loss: 0.9534755945205688,grad_norm: 0.9841512137543733, iteration: 207162
loss: 1.0310492515563965,grad_norm: 0.9819769219735245, iteration: 207163
loss: 0.9939639568328857,grad_norm: 0.8779570967933958, iteration: 207164
loss: 1.0116801261901855,grad_norm: 0.8068725233498528, iteration: 207165
loss: 0.982591986656189,grad_norm: 0.9537250858611794, iteration: 207166
loss: 1.0153146982192993,grad_norm: 0.9429494215614824, iteration: 207167
loss: 1.0002391338348389,grad_norm: 0.9410676020426878, iteration: 207168
loss: 0.9819069504737854,grad_norm: 0.8670441453994946, iteration: 207169
loss: 1.0111055374145508,grad_norm: 0.999999414711663, iteration: 207170
loss: 1.0106967687606812,grad_norm: 0.9999992553650654, iteration: 207171
loss: 1.0237621068954468,grad_norm: 0.9999991154626738, iteration: 207172
loss: 1.0169621706008911,grad_norm: 0.902041388840829, iteration: 207173
loss: 1.0046216249465942,grad_norm: 0.9900242533591089, iteration: 207174
loss: 1.0118550062179565,grad_norm: 0.9320746081411769, iteration: 207175
loss: 1.0123310089111328,grad_norm: 0.9999990426262335, iteration: 207176
loss: 1.0092804431915283,grad_norm: 0.9999995651481541, iteration: 207177
loss: 0.9817823171615601,grad_norm: 0.999999035888182, iteration: 207178
loss: 1.0044255256652832,grad_norm: 0.9999994315386924, iteration: 207179
loss: 1.0448609590530396,grad_norm: 0.8650142209252893, iteration: 207180
loss: 0.9713448882102966,grad_norm: 0.9999990873016875, iteration: 207181
loss: 1.024062991142273,grad_norm: 0.9999991009525266, iteration: 207182
loss: 1.0175639390945435,grad_norm: 0.7586615697369126, iteration: 207183
loss: 1.047788143157959,grad_norm: 0.9999991739749995, iteration: 207184
loss: 1.003533959388733,grad_norm: 0.9569935865820157, iteration: 207185
loss: 1.0381405353546143,grad_norm: 0.9999991905147682, iteration: 207186
loss: 0.9978541135787964,grad_norm: 0.9190550467328781, iteration: 207187
loss: 0.9910297989845276,grad_norm: 0.8311184119044293, iteration: 207188
loss: 1.0111992359161377,grad_norm: 0.8791458860424436, iteration: 207189
loss: 0.9750906229019165,grad_norm: 0.78694760105081, iteration: 207190
loss: 0.9710729122161865,grad_norm: 0.9460480286896318, iteration: 207191
loss: 0.981533408164978,grad_norm: 0.9999992417014709, iteration: 207192
loss: 1.0481431484222412,grad_norm: 0.9468957599283402, iteration: 207193
loss: 1.0165455341339111,grad_norm: 0.999999407826696, iteration: 207194
loss: 0.9972636103630066,grad_norm: 0.9999990863258639, iteration: 207195
loss: 0.9976930618286133,grad_norm: 0.9608634777203958, iteration: 207196
loss: 1.0092295408248901,grad_norm: 0.9999993694258563, iteration: 207197
loss: 1.009594202041626,grad_norm: 0.9891943435954881, iteration: 207198
loss: 0.9481140375137329,grad_norm: 0.8572794644438182, iteration: 207199
loss: 1.0748555660247803,grad_norm: 0.9999998927905014, iteration: 207200
loss: 1.0072585344314575,grad_norm: 0.9999990698141574, iteration: 207201
loss: 1.0020936727523804,grad_norm: 0.9285172067136095, iteration: 207202
loss: 1.011155605316162,grad_norm: 0.8475838916336899, iteration: 207203
loss: 1.0092562437057495,grad_norm: 0.883092828917672, iteration: 207204
loss: 0.9526591897010803,grad_norm: 0.963077759492435, iteration: 207205
loss: 1.079282283782959,grad_norm: 0.9999991670590493, iteration: 207206
loss: 0.9491575360298157,grad_norm: 0.9999995701582216, iteration: 207207
loss: 0.9941439032554626,grad_norm: 0.985048373728328, iteration: 207208
loss: 1.03797447681427,grad_norm: 0.9999990659436638, iteration: 207209
loss: 1.0410503149032593,grad_norm: 0.9999990264087417, iteration: 207210
loss: 1.0789004564285278,grad_norm: 0.999999567138849, iteration: 207211
loss: 1.007481336593628,grad_norm: 0.9916085841444382, iteration: 207212
loss: 0.9803638458251953,grad_norm: 0.9674605524586823, iteration: 207213
loss: 0.9936214089393616,grad_norm: 0.7546390719161602, iteration: 207214
loss: 1.0008437633514404,grad_norm: 0.854451847109918, iteration: 207215
loss: 1.0264999866485596,grad_norm: 0.9999991039785562, iteration: 207216
loss: 1.060737133026123,grad_norm: 0.9999993160509266, iteration: 207217
loss: 1.054830551147461,grad_norm: 0.9999991248315955, iteration: 207218
loss: 1.0275579690933228,grad_norm: 0.8975662632040301, iteration: 207219
loss: 0.9870585203170776,grad_norm: 0.964019055524427, iteration: 207220
loss: 1.0249583721160889,grad_norm: 0.9948156122010627, iteration: 207221
loss: 0.9751841425895691,grad_norm: 0.894622171262973, iteration: 207222
loss: 0.9963452816009521,grad_norm: 0.9309857415226767, iteration: 207223
loss: 1.0191868543624878,grad_norm: 0.8766542779559843, iteration: 207224
loss: 1.1000069379806519,grad_norm: 0.9999999359539606, iteration: 207225
loss: 0.9880998730659485,grad_norm: 0.975309195853749, iteration: 207226
loss: 1.029043197631836,grad_norm: 0.7430325923035552, iteration: 207227
loss: 1.0507464408874512,grad_norm: 0.9999993464051561, iteration: 207228
loss: 1.0011425018310547,grad_norm: 0.9394429249669108, iteration: 207229
loss: 1.017696738243103,grad_norm: 0.9999991342594939, iteration: 207230
loss: 1.0012626647949219,grad_norm: 0.7906243852707376, iteration: 207231
loss: 0.9991675019264221,grad_norm: 0.9792269414334492, iteration: 207232
loss: 1.0105394124984741,grad_norm: 0.8034175910017112, iteration: 207233
loss: 0.9910115003585815,grad_norm: 0.9999991578157268, iteration: 207234
loss: 1.0244009494781494,grad_norm: 0.9999990693101054, iteration: 207235
loss: 1.0266958475112915,grad_norm: 0.999999124807967, iteration: 207236
loss: 0.9745696783065796,grad_norm: 0.9738845372254471, iteration: 207237
loss: 1.0125638246536255,grad_norm: 0.7985330156579609, iteration: 207238
loss: 1.010332465171814,grad_norm: 0.907249530121398, iteration: 207239
loss: 0.9795011281967163,grad_norm: 0.9920860000521794, iteration: 207240
loss: 1.0325671434402466,grad_norm: 0.9999994287580636, iteration: 207241
loss: 0.9962561130523682,grad_norm: 0.9999990828323498, iteration: 207242
loss: 1.0256826877593994,grad_norm: 0.9999994308236142, iteration: 207243
loss: 1.0187668800354004,grad_norm: 0.8928713236434156, iteration: 207244
loss: 1.0424450635910034,grad_norm: 0.9999996922445795, iteration: 207245
loss: 1.0081579685211182,grad_norm: 0.8003210943610546, iteration: 207246
loss: 1.0196201801300049,grad_norm: 0.9999991192655686, iteration: 207247
loss: 0.9666647911071777,grad_norm: 0.9999990474774861, iteration: 207248
loss: 1.0126579999923706,grad_norm: 0.8117753952578746, iteration: 207249
loss: 1.0172545909881592,grad_norm: 0.9999992845593786, iteration: 207250
loss: 0.9831995964050293,grad_norm: 0.9999991975618303, iteration: 207251
loss: 1.0023598670959473,grad_norm: 0.9999990979264953, iteration: 207252
loss: 1.0315009355545044,grad_norm: 0.9941006419222966, iteration: 207253
loss: 1.012636661529541,grad_norm: 0.9544632607260225, iteration: 207254
loss: 0.9866601228713989,grad_norm: 0.9999991567531747, iteration: 207255
loss: 1.0061804056167603,grad_norm: 0.9703973197892936, iteration: 207256
loss: 1.0108028650283813,grad_norm: 0.8955897806869316, iteration: 207257
loss: 1.013079285621643,grad_norm: 0.9196267647149826, iteration: 207258
loss: 0.9835723042488098,grad_norm: 0.8575600759412344, iteration: 207259
loss: 1.0217868089675903,grad_norm: 0.9999991332696396, iteration: 207260
loss: 1.1045764684677124,grad_norm: 0.9999996936490408, iteration: 207261
loss: 0.9547242522239685,grad_norm: 0.9305149428923326, iteration: 207262
loss: 1.1182235479354858,grad_norm: 0.9999997942948267, iteration: 207263
loss: 0.9980466961860657,grad_norm: 0.9999998388785948, iteration: 207264
loss: 0.977539598941803,grad_norm: 0.8412782930759057, iteration: 207265
loss: 1.0314974784851074,grad_norm: 0.9946196095437088, iteration: 207266
loss: 1.0167607069015503,grad_norm: 0.9504950864982881, iteration: 207267
loss: 0.9570792317390442,grad_norm: 0.9729563230430941, iteration: 207268
loss: 0.9862663745880127,grad_norm: 0.9576657954213705, iteration: 207269
loss: 1.0118552446365356,grad_norm: 0.9999991252491671, iteration: 207270
loss: 1.0188502073287964,grad_norm: 0.916194556634611, iteration: 207271
loss: 0.9741009473800659,grad_norm: 0.9911984667056619, iteration: 207272
loss: 0.983432948589325,grad_norm: 0.8971620817167241, iteration: 207273
loss: 0.995765209197998,grad_norm: 0.8565617304359959, iteration: 207274
loss: 1.0229195356369019,grad_norm: 0.9571009468673304, iteration: 207275
loss: 0.9804054498672485,grad_norm: 0.9552123346114981, iteration: 207276
loss: 0.9957454204559326,grad_norm: 0.8485328350619807, iteration: 207277
loss: 0.9851816892623901,grad_norm: 0.9370242118364983, iteration: 207278
loss: 0.9833611845970154,grad_norm: 0.7649473421239362, iteration: 207279
loss: 1.0506243705749512,grad_norm: 0.9999993015452583, iteration: 207280
loss: 1.0388178825378418,grad_norm: 0.9858894160478108, iteration: 207281
loss: 0.9817161560058594,grad_norm: 0.9999990592663787, iteration: 207282
loss: 1.002447485923767,grad_norm: 0.8753159486685386, iteration: 207283
loss: 1.005176305770874,grad_norm: 0.8256743544091842, iteration: 207284
loss: 1.0049489736557007,grad_norm: 0.9999994409432138, iteration: 207285
loss: 0.9679831266403198,grad_norm: 0.893392279305482, iteration: 207286
loss: 1.053098201751709,grad_norm: 0.9999990568556013, iteration: 207287
loss: 1.0039165019989014,grad_norm: 0.9664470873512765, iteration: 207288
loss: 1.0219788551330566,grad_norm: 0.9223377379918145, iteration: 207289
loss: 1.036320447921753,grad_norm: 0.999999179405545, iteration: 207290
loss: 0.9727676510810852,grad_norm: 0.99999894647748, iteration: 207291
loss: 1.0112608671188354,grad_norm: 0.8820055364525078, iteration: 207292
loss: 1.1376696825027466,grad_norm: 0.9999994514144087, iteration: 207293
loss: 1.0356931686401367,grad_norm: 0.9999992735010728, iteration: 207294
loss: 1.0090315341949463,grad_norm: 0.999999134195451, iteration: 207295
loss: 1.0065006017684937,grad_norm: 0.8429093705627221, iteration: 207296
loss: 1.0169936418533325,grad_norm: 0.8560974224738399, iteration: 207297
loss: 0.9885481595993042,grad_norm: 0.9999991783814983, iteration: 207298
loss: 0.9915156960487366,grad_norm: 0.844851944672646, iteration: 207299
loss: 0.9925488233566284,grad_norm: 0.95625089994533, iteration: 207300
loss: 0.9589404463768005,grad_norm: 0.8679457886428289, iteration: 207301
loss: 1.0426121950149536,grad_norm: 0.9999994215130776, iteration: 207302
loss: 0.971244215965271,grad_norm: 0.8823162746013994, iteration: 207303
loss: 1.007477045059204,grad_norm: 0.9524347292369157, iteration: 207304
loss: 0.9969726800918579,grad_norm: 0.9999991514586251, iteration: 207305
loss: 1.1159968376159668,grad_norm: 0.9999991800178547, iteration: 207306
loss: 1.0183230638504028,grad_norm: 0.9338081107319375, iteration: 207307
loss: 0.9971438050270081,grad_norm: 0.7029769965718253, iteration: 207308
loss: 1.0203499794006348,grad_norm: 0.9999997288311505, iteration: 207309
loss: 1.0031920671463013,grad_norm: 0.963720156423996, iteration: 207310
loss: 1.0293234586715698,grad_norm: 0.9999990770290137, iteration: 207311
loss: 1.025874376296997,grad_norm: 0.9999992172343358, iteration: 207312
loss: 1.0574686527252197,grad_norm: 0.9433238338153983, iteration: 207313
loss: 1.040017008781433,grad_norm: 0.9999990839202452, iteration: 207314
loss: 1.0428290367126465,grad_norm: 0.9819303629581944, iteration: 207315
loss: 1.029226303100586,grad_norm: 0.9996546390066141, iteration: 207316
loss: 1.0129332542419434,grad_norm: 0.9223703218692808, iteration: 207317
loss: 1.0293025970458984,grad_norm: 0.9928377104445909, iteration: 207318
loss: 0.9901615977287292,grad_norm: 0.9999990988807244, iteration: 207319
loss: 1.0208951234817505,grad_norm: 0.8459260268721437, iteration: 207320
loss: 0.9938065409660339,grad_norm: 0.9415282692236759, iteration: 207321
loss: 1.0013551712036133,grad_norm: 0.9999995951146731, iteration: 207322
loss: 0.9800506830215454,grad_norm: 0.7791075230096761, iteration: 207323
loss: 0.9710464477539062,grad_norm: 0.8646705784661659, iteration: 207324
loss: 0.9752798080444336,grad_norm: 0.8578174833528871, iteration: 207325
loss: 0.962041974067688,grad_norm: 0.9256224405120138, iteration: 207326
loss: 0.9976910948753357,grad_norm: 0.945746784005612, iteration: 207327
loss: 1.0113803148269653,grad_norm: 0.9878984536874355, iteration: 207328
loss: 1.039415717124939,grad_norm: 0.9542248040517318, iteration: 207329
loss: 0.9872651100158691,grad_norm: 0.9836335329879788, iteration: 207330
loss: 1.0212178230285645,grad_norm: 0.9999999060272794, iteration: 207331
loss: 0.9960463643074036,grad_norm: 0.9999994725907426, iteration: 207332
loss: 0.9772927761077881,grad_norm: 0.9999990212632646, iteration: 207333
loss: 0.9738929271697998,grad_norm: 0.9999992150727738, iteration: 207334
loss: 0.9727637767791748,grad_norm: 0.9557112916985934, iteration: 207335
loss: 0.9680389165878296,grad_norm: 0.9999990405570356, iteration: 207336
loss: 1.0202698707580566,grad_norm: 0.7860164019931362, iteration: 207337
loss: 1.0118532180786133,grad_norm: 0.7225158272235788, iteration: 207338
loss: 0.9688149094581604,grad_norm: 0.9999994877114894, iteration: 207339
loss: 1.1047919988632202,grad_norm: 0.9999993423021305, iteration: 207340
loss: 1.008085012435913,grad_norm: 0.9675063247171501, iteration: 207341
loss: 1.0419120788574219,grad_norm: 0.974038622473776, iteration: 207342
loss: 0.9874091148376465,grad_norm: 0.8386591766343237, iteration: 207343
loss: 1.0023497343063354,grad_norm: 0.9546629181906152, iteration: 207344
loss: 1.0239522457122803,grad_norm: 0.883210917247987, iteration: 207345
loss: 1.0011745691299438,grad_norm: 0.80321303640124, iteration: 207346
loss: 1.0392656326293945,grad_norm: 0.9999990925682426, iteration: 207347
loss: 0.9943158030509949,grad_norm: 0.7007841141116953, iteration: 207348
loss: 1.0117316246032715,grad_norm: 0.911177297081174, iteration: 207349
loss: 0.9958369731903076,grad_norm: 0.8178024782571509, iteration: 207350
loss: 0.9951046109199524,grad_norm: 0.8658751615934009, iteration: 207351
loss: 0.9644973278045654,grad_norm: 0.9999990297206968, iteration: 207352
loss: 1.0113236904144287,grad_norm: 1.000000024870042, iteration: 207353
loss: 1.00482177734375,grad_norm: 0.9426117642648076, iteration: 207354
loss: 0.9801770448684692,grad_norm: 0.8232829723006246, iteration: 207355
loss: 1.04237699508667,grad_norm: 0.9999995980164448, iteration: 207356
loss: 1.0177152156829834,grad_norm: 0.9999989842343568, iteration: 207357
loss: 0.9969644546508789,grad_norm: 0.886857513751487, iteration: 207358
loss: 1.011389970779419,grad_norm: 0.8517558417310799, iteration: 207359
loss: 0.9996367692947388,grad_norm: 0.8481385928169768, iteration: 207360
loss: 1.0088329315185547,grad_norm: 0.9982858783484637, iteration: 207361
loss: 0.9832584857940674,grad_norm: 0.9999992031073973, iteration: 207362
loss: 0.9759439826011658,grad_norm: 0.9999992876580384, iteration: 207363
loss: 1.0067291259765625,grad_norm: 0.9999991097601629, iteration: 207364
loss: 0.9897157549858093,grad_norm: 0.9918112435992756, iteration: 207365
loss: 1.0918309688568115,grad_norm: 0.999999211951978, iteration: 207366
loss: 1.0258102416992188,grad_norm: 0.9565710024610868, iteration: 207367
loss: 1.0142234563827515,grad_norm: 0.9890104691949722, iteration: 207368
loss: 0.9550151824951172,grad_norm: 0.9999992022952309, iteration: 207369
loss: 1.0126768350601196,grad_norm: 0.9700833063168679, iteration: 207370
loss: 1.0026289224624634,grad_norm: 0.9999991037208134, iteration: 207371
loss: 1.0288760662078857,grad_norm: 0.8478693176871439, iteration: 207372
loss: 1.03104829788208,grad_norm: 0.9999991023084388, iteration: 207373
loss: 0.9705412983894348,grad_norm: 0.9721936201688364, iteration: 207374
loss: 0.999101996421814,grad_norm: 0.8958493500733257, iteration: 207375
loss: 0.989649772644043,grad_norm: 0.9109008306429803, iteration: 207376
loss: 1.0151886940002441,grad_norm: 0.9759537146205347, iteration: 207377
loss: 1.0194920301437378,grad_norm: 0.8967263329604214, iteration: 207378
loss: 0.9882386326789856,grad_norm: 0.9999991133517167, iteration: 207379
loss: 1.0156711339950562,grad_norm: 0.9976977761197936, iteration: 207380
loss: 0.9510204195976257,grad_norm: 0.9121826983541566, iteration: 207381
loss: 0.9925490617752075,grad_norm: 0.9999993747642397, iteration: 207382
loss: 1.017095923423767,grad_norm: 0.9654756501955687, iteration: 207383
loss: 0.9957072138786316,grad_norm: 0.923341417606464, iteration: 207384
loss: 0.9870845079421997,grad_norm: 0.9169073590635026, iteration: 207385
loss: 0.9708083868026733,grad_norm: 0.8892993138365415, iteration: 207386
loss: 1.009962797164917,grad_norm: 0.9089905168286019, iteration: 207387
loss: 1.0186251401901245,grad_norm: 0.8935087426452406, iteration: 207388
loss: 0.9688808917999268,grad_norm: 0.9999989534341823, iteration: 207389
loss: 0.9728406667709351,grad_norm: 0.9423472352993352, iteration: 207390
loss: 0.990462064743042,grad_norm: 0.9999989679222901, iteration: 207391
loss: 0.9620503783226013,grad_norm: 0.7995476013875694, iteration: 207392
loss: 1.0440926551818848,grad_norm: 0.9319899094075212, iteration: 207393
loss: 0.9862610697746277,grad_norm: 0.9999990436190677, iteration: 207394
loss: 1.0027358531951904,grad_norm: 0.9999990526795435, iteration: 207395
loss: 1.0130733251571655,grad_norm: 0.9999992054586351, iteration: 207396
loss: 1.0118738412857056,grad_norm: 0.9040518796544996, iteration: 207397
loss: 1.059302806854248,grad_norm: 0.8658697262994265, iteration: 207398
loss: 1.017772912979126,grad_norm: 0.9999991100970823, iteration: 207399
loss: 0.9585759043693542,grad_norm: 0.865592029743758, iteration: 207400
loss: 0.9425954222679138,grad_norm: 0.8559958514255248, iteration: 207401
loss: 1.0090008974075317,grad_norm: 0.9999991607351467, iteration: 207402
loss: 1.0137232542037964,grad_norm: 0.7928998451197949, iteration: 207403
loss: 1.103040099143982,grad_norm: 0.9999990663711785, iteration: 207404
loss: 0.9990155100822449,grad_norm: 0.999999088708814, iteration: 207405
loss: 0.9523335695266724,grad_norm: 0.9999989457399537, iteration: 207406
loss: 0.9973575472831726,grad_norm: 0.9999990609692474, iteration: 207407
loss: 1.0159204006195068,grad_norm: 0.8982353722063088, iteration: 207408
loss: 1.1307214498519897,grad_norm: 0.9999992737610744, iteration: 207409
loss: 0.9589166045188904,grad_norm: 0.9999990736329627, iteration: 207410
loss: 1.010608434677124,grad_norm: 0.9999989771392833, iteration: 207411
loss: 1.0291924476623535,grad_norm: 0.8779515354865747, iteration: 207412
loss: 1.0200012922286987,grad_norm: 0.8224901888724767, iteration: 207413
loss: 0.962671160697937,grad_norm: 0.9999993075658619, iteration: 207414
loss: 1.013113021850586,grad_norm: 0.8983550940018553, iteration: 207415
loss: 1.0335516929626465,grad_norm: 0.9999991329924659, iteration: 207416
loss: 0.9711089134216309,grad_norm: 0.8921606167941575, iteration: 207417
loss: 0.9740265011787415,grad_norm: 0.9274629652618939, iteration: 207418
loss: 0.9922120571136475,grad_norm: 0.8809463813715253, iteration: 207419
loss: 1.0251590013504028,grad_norm: 0.9999993905575472, iteration: 207420
loss: 1.0396915674209595,grad_norm: 0.8322966449726605, iteration: 207421
loss: 1.0228737592697144,grad_norm: 0.9999989593021512, iteration: 207422
loss: 1.017316460609436,grad_norm: 0.9515786041061733, iteration: 207423
loss: 1.0094130039215088,grad_norm: 0.8975264127011237, iteration: 207424
loss: 0.9859422445297241,grad_norm: 0.9260243669017916, iteration: 207425
loss: 0.9768722653388977,grad_norm: 0.906983432040973, iteration: 207426
loss: 1.034294605255127,grad_norm: 0.9999991415192406, iteration: 207427
loss: 1.0268840789794922,grad_norm: 0.9655329605030174, iteration: 207428
loss: 0.987226665019989,grad_norm: 0.9194976620312987, iteration: 207429
loss: 0.9824393391609192,grad_norm: 0.815042146148037, iteration: 207430
loss: 0.9801363945007324,grad_norm: 0.9882505488875655, iteration: 207431
loss: 1.0147473812103271,grad_norm: 0.9250026291994913, iteration: 207432
loss: 1.0040366649627686,grad_norm: 0.8887296287745483, iteration: 207433
loss: 1.0372928380966187,grad_norm: 0.9999997185869236, iteration: 207434
loss: 1.0218396186828613,grad_norm: 0.904879263048666, iteration: 207435
loss: 1.01631498336792,grad_norm: 0.9999991534188712, iteration: 207436
loss: 0.9966860413551331,grad_norm: 0.9999990392421867, iteration: 207437
loss: 1.0008457899093628,grad_norm: 0.9523598586257151, iteration: 207438
loss: 0.9607441425323486,grad_norm: 0.9999992242102118, iteration: 207439
loss: 0.996878445148468,grad_norm: 0.9999989300995515, iteration: 207440
loss: 1.024048924446106,grad_norm: 0.9999990723022298, iteration: 207441
loss: 1.0025988817214966,grad_norm: 0.8738970334646549, iteration: 207442
loss: 0.9852174520492554,grad_norm: 0.9999992288817309, iteration: 207443
loss: 1.0243065357208252,grad_norm: 0.9999993121977154, iteration: 207444
loss: 0.9618134498596191,grad_norm: 0.9999990558281561, iteration: 207445
loss: 1.0076626539230347,grad_norm: 0.8497699896360115, iteration: 207446
loss: 0.9867658615112305,grad_norm: 0.8215402117178684, iteration: 207447
loss: 0.9956099987030029,grad_norm: 0.7786689486885257, iteration: 207448
loss: 0.9926754236221313,grad_norm: 0.9999990277521293, iteration: 207449
loss: 0.9958530068397522,grad_norm: 0.8334093468222977, iteration: 207450
loss: 1.0006556510925293,grad_norm: 0.9763728498294476, iteration: 207451
loss: 1.0148390531539917,grad_norm: 0.981939546146727, iteration: 207452
loss: 1.17392098903656,grad_norm: 1.0000000814133758, iteration: 207453
loss: 0.9338391423225403,grad_norm: 0.8352099523943316, iteration: 207454
loss: 1.0105068683624268,grad_norm: 0.9518581416066958, iteration: 207455
loss: 0.9857165813446045,grad_norm: 0.8748369583136971, iteration: 207456
loss: 0.9876519441604614,grad_norm: 0.999999099899686, iteration: 207457
loss: 0.9656563997268677,grad_norm: 0.9999990719697586, iteration: 207458
loss: 0.9847724437713623,grad_norm: 0.9999991824556925, iteration: 207459
loss: 1.0348509550094604,grad_norm: 0.9795411656915741, iteration: 207460
loss: 1.0072450637817383,grad_norm: 0.9977659230165188, iteration: 207461
loss: 0.9903209805488586,grad_norm: 0.9999989778860003, iteration: 207462
loss: 1.0067120790481567,grad_norm: 0.7806781538870478, iteration: 207463
loss: 1.0443642139434814,grad_norm: 0.9992941242128524, iteration: 207464
loss: 1.0176119804382324,grad_norm: 0.9241501975673885, iteration: 207465
loss: 1.0052216053009033,grad_norm: 0.9999991420829788, iteration: 207466
loss: 1.001963496208191,grad_norm: 0.9044437883440871, iteration: 207467
loss: 0.9931605458259583,grad_norm: 0.999999290619045, iteration: 207468
loss: 1.030224323272705,grad_norm: 0.9347334009402034, iteration: 207469
loss: 1.0306437015533447,grad_norm: 0.9737359727039148, iteration: 207470
loss: 0.997859001159668,grad_norm: 0.9113509296413916, iteration: 207471
loss: 0.9933695793151855,grad_norm: 0.9999994493328472, iteration: 207472
loss: 1.027203917503357,grad_norm: 0.9734656995922557, iteration: 207473
loss: 1.0000743865966797,grad_norm: 0.8492800583860695, iteration: 207474
loss: 1.077139139175415,grad_norm: 0.999999141106691, iteration: 207475
loss: 1.0038138628005981,grad_norm: 0.7278662263051703, iteration: 207476
loss: 0.9962365627288818,grad_norm: 0.9932859809248868, iteration: 207477
loss: 0.994245707988739,grad_norm: 0.9999990368612427, iteration: 207478
loss: 1.0067460536956787,grad_norm: 0.9180214216405705, iteration: 207479
loss: 1.0336101055145264,grad_norm: 0.9662766552189594, iteration: 207480
loss: 1.0199415683746338,grad_norm: 0.9999990023103438, iteration: 207481
loss: 1.0506194829940796,grad_norm: 0.9999991026187984, iteration: 207482
loss: 1.0227824449539185,grad_norm: 0.8747246258017534, iteration: 207483
loss: 1.0073477029800415,grad_norm: 0.9999904687265446, iteration: 207484
loss: 0.9873433113098145,grad_norm: 0.9327729969252556, iteration: 207485
loss: 0.9840837717056274,grad_norm: 0.8675840757198883, iteration: 207486
loss: 1.0042897462844849,grad_norm: 0.9802863040667148, iteration: 207487
loss: 1.0030783414840698,grad_norm: 0.9014296580029372, iteration: 207488
loss: 0.9807513356208801,grad_norm: 0.8220714006503528, iteration: 207489
loss: 1.0064672231674194,grad_norm: 0.9227867495214905, iteration: 207490
loss: 1.0313595533370972,grad_norm: 0.999999125393176, iteration: 207491
loss: 0.963526725769043,grad_norm: 0.8630873852422956, iteration: 207492
loss: 0.984926700592041,grad_norm: 0.8232862085631947, iteration: 207493
loss: 0.9985143542289734,grad_norm: 0.7797663190436769, iteration: 207494
loss: 0.9559204578399658,grad_norm: 0.8454285167536205, iteration: 207495
loss: 0.9616114497184753,grad_norm: 0.933065309336947, iteration: 207496
loss: 0.9805297255516052,grad_norm: 0.8135869048881388, iteration: 207497
loss: 1.010886311531067,grad_norm: 0.9316272182281743, iteration: 207498
loss: 1.0102492570877075,grad_norm: 0.9826988887969234, iteration: 207499
loss: 0.9634428024291992,grad_norm: 0.9999989645000984, iteration: 207500
loss: 0.9886857867240906,grad_norm: 0.9999994160393411, iteration: 207501
loss: 1.0068798065185547,grad_norm: 0.9791325702969166, iteration: 207502
loss: 1.1079202890396118,grad_norm: 0.9999999117777485, iteration: 207503
loss: 0.9849873185157776,grad_norm: 0.9999990186211628, iteration: 207504
loss: 0.9804423451423645,grad_norm: 0.8997142612073843, iteration: 207505
loss: 1.00898277759552,grad_norm: 0.8564882077173572, iteration: 207506
loss: 0.9977771639823914,grad_norm: 0.7791062177586389, iteration: 207507
loss: 1.0949517488479614,grad_norm: 0.8086819902494183, iteration: 207508
loss: 1.0073857307434082,grad_norm: 0.999952660379132, iteration: 207509
loss: 0.9810662865638733,grad_norm: 0.9999991891146355, iteration: 207510
loss: 1.0174169540405273,grad_norm: 0.8405284651247087, iteration: 207511
loss: 1.039384126663208,grad_norm: 0.7753066386043648, iteration: 207512
loss: 1.0645256042480469,grad_norm: 0.9999991245624059, iteration: 207513
loss: 0.9946382641792297,grad_norm: 0.800502594221545, iteration: 207514
loss: 0.9983035922050476,grad_norm: 0.9795604927178084, iteration: 207515
loss: 1.1091551780700684,grad_norm: 0.9999993298478507, iteration: 207516
loss: 1.001602053642273,grad_norm: 0.8875050420057036, iteration: 207517
loss: 1.01447331905365,grad_norm: 0.944059547283975, iteration: 207518
loss: 1.0172436237335205,grad_norm: 0.9311682903888905, iteration: 207519
loss: 0.9634392857551575,grad_norm: 0.9655126217820906, iteration: 207520
loss: 1.0260968208312988,grad_norm: 0.8930326842916813, iteration: 207521
loss: 0.9928658604621887,grad_norm: 0.9263342711758938, iteration: 207522
loss: 1.0020639896392822,grad_norm: 0.999999211969122, iteration: 207523
loss: 0.9579697251319885,grad_norm: 0.9351063707464933, iteration: 207524
loss: 1.0192339420318604,grad_norm: 0.9999996079950121, iteration: 207525
loss: 0.9794156551361084,grad_norm: 0.9101549743694066, iteration: 207526
loss: 0.9761540293693542,grad_norm: 0.9080824557781528, iteration: 207527
loss: 1.011254906654358,grad_norm: 0.7705803491174583, iteration: 207528
loss: 1.0483078956604004,grad_norm: 0.8242279178845016, iteration: 207529
loss: 1.0194733142852783,grad_norm: 0.9999991095573096, iteration: 207530
loss: 0.9943035244941711,grad_norm: 0.9181729542242923, iteration: 207531
loss: 0.9975299835205078,grad_norm: 0.9198559724943619, iteration: 207532
loss: 0.9604789018630981,grad_norm: 0.9077073289157543, iteration: 207533
loss: 1.03246009349823,grad_norm: 0.9999992109776763, iteration: 207534
loss: 1.0162997245788574,grad_norm: 0.9371138942037026, iteration: 207535
loss: 1.0681065320968628,grad_norm: 0.8820301741345254, iteration: 207536
loss: 1.0702929496765137,grad_norm: 0.9948829640780231, iteration: 207537
loss: 1.0888426303863525,grad_norm: 0.9517687935990429, iteration: 207538
loss: 0.9765952825546265,grad_norm: 0.9259830727005547, iteration: 207539
loss: 1.0072673559188843,grad_norm: 0.9769617294057857, iteration: 207540
loss: 0.9584675431251526,grad_norm: 0.9238354518809566, iteration: 207541
loss: 1.133240818977356,grad_norm: 0.9999991324606871, iteration: 207542
loss: 0.9785842895507812,grad_norm: 0.8885992800025343, iteration: 207543
loss: 1.0149424076080322,grad_norm: 0.907670544427005, iteration: 207544
loss: 0.9656370282173157,grad_norm: 0.9999990435768966, iteration: 207545
loss: 0.975806474685669,grad_norm: 0.999999279240544, iteration: 207546
loss: 1.0054147243499756,grad_norm: 0.974757240577988, iteration: 207547
loss: 1.0070079565048218,grad_norm: 0.9556095566546298, iteration: 207548
loss: 1.0138143301010132,grad_norm: 0.9999993647358649, iteration: 207549
loss: 0.9897698760032654,grad_norm: 0.7734610486334402, iteration: 207550
loss: 0.965690016746521,grad_norm: 0.9999990776096451, iteration: 207551
loss: 0.9702298045158386,grad_norm: 0.8567898044618365, iteration: 207552
loss: 0.9829761981964111,grad_norm: 0.8686483868692964, iteration: 207553
loss: 1.0904784202575684,grad_norm: 0.9999991410249828, iteration: 207554
loss: 0.9424452781677246,grad_norm: 0.9999989763623404, iteration: 207555
loss: 1.020435094833374,grad_norm: 0.981371190780738, iteration: 207556
loss: 1.020858883857727,grad_norm: 0.995948479264634, iteration: 207557
loss: 0.9869920611381531,grad_norm: 0.9999990541515703, iteration: 207558
loss: 1.0028129816055298,grad_norm: 0.8650668585122189, iteration: 207559
loss: 1.032307744026184,grad_norm: 0.9999993377122923, iteration: 207560
loss: 0.9910646080970764,grad_norm: 0.9999991007412777, iteration: 207561
loss: 0.9971867203712463,grad_norm: 0.8533663475173192, iteration: 207562
loss: 1.067152976989746,grad_norm: 0.9999991424270505, iteration: 207563
loss: 1.0393848419189453,grad_norm: 0.886798802490115, iteration: 207564
loss: 1.0371904373168945,grad_norm: 0.9999991679792427, iteration: 207565
loss: 0.9561465382575989,grad_norm: 0.9999988730177635, iteration: 207566
loss: 1.0030449628829956,grad_norm: 0.8524427097407112, iteration: 207567
loss: 1.0044275522232056,grad_norm: 0.9999997345864308, iteration: 207568
loss: 0.9922557473182678,grad_norm: 0.9625277428990852, iteration: 207569
loss: 1.0122056007385254,grad_norm: 0.9480268391576406, iteration: 207570
loss: 0.9553026556968689,grad_norm: 0.8940971042168946, iteration: 207571
loss: 0.9601500630378723,grad_norm: 0.9028804637902831, iteration: 207572
loss: 1.007782220840454,grad_norm: 0.900405786337357, iteration: 207573
loss: 1.0249778032302856,grad_norm: 0.9905471580392983, iteration: 207574
loss: 0.9451063275337219,grad_norm: 0.9232057745077988, iteration: 207575
loss: 1.0490671396255493,grad_norm: 0.8873217284250028, iteration: 207576
loss: 0.9983475804328918,grad_norm: 0.8979693235420126, iteration: 207577
loss: 1.0987311601638794,grad_norm: 0.9999995113986664, iteration: 207578
loss: 1.022987961769104,grad_norm: 0.99999987264744, iteration: 207579
loss: 0.9862450361251831,grad_norm: 0.8161691930315459, iteration: 207580
loss: 0.9824535846710205,grad_norm: 0.9641061735689738, iteration: 207581
loss: 1.0063283443450928,grad_norm: 0.9947206213298827, iteration: 207582
loss: 1.0605714321136475,grad_norm: 0.9234135115468449, iteration: 207583
loss: 0.9645904302597046,grad_norm: 0.9999990085578018, iteration: 207584
loss: 0.9830029010772705,grad_norm: 0.9999992371259752, iteration: 207585
loss: 0.9989084005355835,grad_norm: 0.9954322571553199, iteration: 207586
loss: 0.993577778339386,grad_norm: 0.9559121274217052, iteration: 207587
loss: 0.9813586473464966,grad_norm: 0.8884044051648804, iteration: 207588
loss: 1.0156294107437134,grad_norm: 0.8763459454608212, iteration: 207589
loss: 1.0478029251098633,grad_norm: 0.9711635097822289, iteration: 207590
loss: 0.9603397250175476,grad_norm: 0.9373879612664913, iteration: 207591
loss: 0.9853821992874146,grad_norm: 0.8801981898457863, iteration: 207592
loss: 1.0353771448135376,grad_norm: 0.8144831959665189, iteration: 207593
loss: 1.0408568382263184,grad_norm: 0.9999990549360751, iteration: 207594
loss: 1.0458508729934692,grad_norm: 0.9999990599341214, iteration: 207595
loss: 1.031822681427002,grad_norm: 0.8072232301929176, iteration: 207596
loss: 1.0260885953903198,grad_norm: 0.9999990043048077, iteration: 207597
loss: 0.9857596158981323,grad_norm: 0.9999992203103193, iteration: 207598
loss: 1.0224111080169678,grad_norm: 0.8703697584408145, iteration: 207599
loss: 0.9627904295921326,grad_norm: 0.9999990424875556, iteration: 207600
loss: 1.0369924306869507,grad_norm: 0.9999990655504278, iteration: 207601
loss: 1.053674578666687,grad_norm: 0.9999991600674467, iteration: 207602
loss: 0.9946324229240417,grad_norm: 0.9999989462464416, iteration: 207603
loss: 1.002087950706482,grad_norm: 0.8536108936633565, iteration: 207604
loss: 0.9825564622879028,grad_norm: 0.8820714915426228, iteration: 207605
loss: 1.1096549034118652,grad_norm: 1.0000001058546903, iteration: 207606
loss: 0.9826015830039978,grad_norm: 0.8929539175625986, iteration: 207607
loss: 0.997105062007904,grad_norm: 0.8680621097521197, iteration: 207608
loss: 0.9503694772720337,grad_norm: 0.99999905110586, iteration: 207609
loss: 0.9684419631958008,grad_norm: 0.8498999088113376, iteration: 207610
loss: 1.0192309617996216,grad_norm: 0.8459164499363832, iteration: 207611
loss: 1.03638756275177,grad_norm: 0.7979757551127353, iteration: 207612
loss: 1.014021396636963,grad_norm: 0.959050088852724, iteration: 207613
loss: 1.0064252614974976,grad_norm: 0.8038483809919971, iteration: 207614
loss: 1.027262568473816,grad_norm: 0.9603339332296281, iteration: 207615
loss: 0.9795641899108887,grad_norm: 0.9423217839499534, iteration: 207616
loss: 1.0028091669082642,grad_norm: 0.8537339874945955, iteration: 207617
loss: 0.9902359843254089,grad_norm: 0.9999997776305916, iteration: 207618
loss: 0.9938147068023682,grad_norm: 0.9431762360077129, iteration: 207619
loss: 1.0166363716125488,grad_norm: 0.8571351355022045, iteration: 207620
loss: 0.9936972856521606,grad_norm: 0.7323542892540092, iteration: 207621
loss: 1.0209146738052368,grad_norm: 0.9999992914284697, iteration: 207622
loss: 0.97109055519104,grad_norm: 0.8283854120632856, iteration: 207623
loss: 1.076035976409912,grad_norm: 0.9999991727246519, iteration: 207624
loss: 1.003861665725708,grad_norm: 0.8318095396068375, iteration: 207625
loss: 0.9968792796134949,grad_norm: 0.8783349554814817, iteration: 207626
loss: 0.9931465983390808,grad_norm: 0.9838326365559993, iteration: 207627
loss: 0.9984009265899658,grad_norm: 0.959317316343893, iteration: 207628
loss: 1.0297809839248657,grad_norm: 0.9999993519385129, iteration: 207629
loss: 1.0080510377883911,grad_norm: 0.9524761020558247, iteration: 207630
loss: 0.9876013398170471,grad_norm: 0.9497839851638341, iteration: 207631
loss: 1.0255649089813232,grad_norm: 0.8166002344143453, iteration: 207632
loss: 1.034714937210083,grad_norm: 0.9999991009858193, iteration: 207633
loss: 0.9876552820205688,grad_norm: 0.9766766986550792, iteration: 207634
loss: 1.0209177732467651,grad_norm: 0.9547163130997347, iteration: 207635
loss: 0.9854317307472229,grad_norm: 0.8931319475778622, iteration: 207636
loss: 0.9938123226165771,grad_norm: 0.9294022054595321, iteration: 207637
loss: 1.0022270679473877,grad_norm: 0.8958496167492577, iteration: 207638
loss: 0.9939853549003601,grad_norm: 0.824186404977782, iteration: 207639
loss: 1.003918170928955,grad_norm: 0.8101727217465537, iteration: 207640
loss: 0.983154833316803,grad_norm: 0.8748962611784157, iteration: 207641
loss: 0.9933938980102539,grad_norm: 0.9999989253584636, iteration: 207642
loss: 0.9806024432182312,grad_norm: 0.9377051688618382, iteration: 207643
loss: 0.9806793332099915,grad_norm: 0.8765034855057339, iteration: 207644
loss: 1.0291165113449097,grad_norm: 0.8875603197724017, iteration: 207645
loss: 0.9867975115776062,grad_norm: 0.9026747036656377, iteration: 207646
loss: 0.9734705090522766,grad_norm: 0.820281105139878, iteration: 207647
loss: 0.9894861578941345,grad_norm: 0.8705169292523288, iteration: 207648
loss: 0.9844266176223755,grad_norm: 0.9041833011077094, iteration: 207649
loss: 1.004774808883667,grad_norm: 0.9999990376843516, iteration: 207650
loss: 1.0196762084960938,grad_norm: 0.9779261290936679, iteration: 207651
loss: 1.0195989608764648,grad_norm: 0.9012088791222146, iteration: 207652
loss: 0.9970323443412781,grad_norm: 0.7603355077664923, iteration: 207653
loss: 0.9866023659706116,grad_norm: 0.9313943823678853, iteration: 207654
loss: 1.0029675960540771,grad_norm: 0.8610902069852211, iteration: 207655
loss: 0.9910439848899841,grad_norm: 0.8359326982733455, iteration: 207656
loss: 1.0317145586013794,grad_norm: 0.8819152251682865, iteration: 207657
loss: 0.9937798380851746,grad_norm: 0.9999990344489545, iteration: 207658
loss: 1.0464105606079102,grad_norm: 0.9910219913852338, iteration: 207659
loss: 0.9922938346862793,grad_norm: 0.9109108009557108, iteration: 207660
loss: 1.0047528743743896,grad_norm: 0.999999202452785, iteration: 207661
loss: 1.0116984844207764,grad_norm: 0.9218722507438865, iteration: 207662
loss: 1.0225831270217896,grad_norm: 0.8942136475484184, iteration: 207663
loss: 0.9858671426773071,grad_norm: 0.7989399477674534, iteration: 207664
loss: 0.9978764057159424,grad_norm: 0.9999991936785073, iteration: 207665
loss: 1.0130363702774048,grad_norm: 0.9473807041381923, iteration: 207666
loss: 0.9805483222007751,grad_norm: 0.8417614158285766, iteration: 207667
loss: 1.0141494274139404,grad_norm: 0.9999990064860934, iteration: 207668
loss: 0.9612369537353516,grad_norm: 0.9218435572744957, iteration: 207669
loss: 1.0318763256072998,grad_norm: 0.8848115306512032, iteration: 207670
loss: 0.9986268877983093,grad_norm: 0.8791554383223692, iteration: 207671
loss: 0.9782353639602661,grad_norm: 0.9300804443523145, iteration: 207672
loss: 1.00021231174469,grad_norm: 0.8685894069982666, iteration: 207673
loss: 0.9785527586936951,grad_norm: 0.9999990921359463, iteration: 207674
loss: 1.007462739944458,grad_norm: 0.9999990846213691, iteration: 207675
loss: 1.038381814956665,grad_norm: 0.9999995133555875, iteration: 207676
loss: 0.9748789072036743,grad_norm: 0.9297703780705758, iteration: 207677
loss: 1.0053107738494873,grad_norm: 0.9730486674053485, iteration: 207678
loss: 0.9934081435203552,grad_norm: 0.8790214885375311, iteration: 207679
loss: 0.9988730549812317,grad_norm: 0.9370127746458333, iteration: 207680
loss: 1.0271297693252563,grad_norm: 0.9999997872141267, iteration: 207681
loss: 0.9791048169136047,grad_norm: 0.9655488709137771, iteration: 207682
loss: 1.0280561447143555,grad_norm: 0.9151779657749631, iteration: 207683
loss: 1.0072295665740967,grad_norm: 0.8820995113823219, iteration: 207684
loss: 0.9961049556732178,grad_norm: 0.819290511125158, iteration: 207685
loss: 1.0178452730178833,grad_norm: 0.9999992261883522, iteration: 207686
loss: 0.9880171418190002,grad_norm: 0.9999992366623295, iteration: 207687
loss: 0.9962527751922607,grad_norm: 0.9999992405607664, iteration: 207688
loss: 0.9890232682228088,grad_norm: 0.9191494825640565, iteration: 207689
loss: 1.0373588800430298,grad_norm: 0.8731216975951283, iteration: 207690
loss: 1.0064871311187744,grad_norm: 0.9455393999311548, iteration: 207691
loss: 1.0001269578933716,grad_norm: 0.9330490367128269, iteration: 207692
loss: 1.0028010606765747,grad_norm: 0.9039285594301794, iteration: 207693
loss: 1.013748288154602,grad_norm: 0.8390950668690713, iteration: 207694
loss: 1.0652186870574951,grad_norm: 0.9999998468209559, iteration: 207695
loss: 1.0174198150634766,grad_norm: 0.9538251774416419, iteration: 207696
loss: 0.9815807342529297,grad_norm: 0.9999991926171478, iteration: 207697
loss: 1.0210391283035278,grad_norm: 0.8693405086927837, iteration: 207698
loss: 1.000478982925415,grad_norm: 0.9999991224315743, iteration: 207699
loss: 1.1100785732269287,grad_norm: 0.9999998880658799, iteration: 207700
loss: 1.0452231168746948,grad_norm: 0.9598984267104277, iteration: 207701
loss: 0.9720703363418579,grad_norm: 0.8736058936038378, iteration: 207702
loss: 1.0142585039138794,grad_norm: 0.9923111681549949, iteration: 207703
loss: 0.9858754277229309,grad_norm: 0.8968001561420791, iteration: 207704
loss: 1.0418949127197266,grad_norm: 0.978512200756448, iteration: 207705
loss: 1.0743945837020874,grad_norm: 0.999999663752052, iteration: 207706
loss: 1.018763780593872,grad_norm: 0.9021652337345067, iteration: 207707
loss: 1.0385040044784546,grad_norm: 0.9999993561306508, iteration: 207708
loss: 0.9966775178909302,grad_norm: 0.9197211373981325, iteration: 207709
loss: 0.9872276782989502,grad_norm: 0.9934442706474786, iteration: 207710
loss: 1.04559326171875,grad_norm: 0.8349546070330621, iteration: 207711
loss: 1.0616014003753662,grad_norm: 0.9999997129561995, iteration: 207712
loss: 1.0301578044891357,grad_norm: 0.8832773948610837, iteration: 207713
loss: 1.0115063190460205,grad_norm: 0.99999901156344, iteration: 207714
loss: 1.008887529373169,grad_norm: 0.8511788324910321, iteration: 207715
loss: 1.033797264099121,grad_norm: 0.9725529143317732, iteration: 207716
loss: 1.0351089239120483,grad_norm: 0.8573758498948766, iteration: 207717
loss: 0.9569997191429138,grad_norm: 0.999999080692085, iteration: 207718
loss: 1.1265485286712646,grad_norm: 0.9999999156334335, iteration: 207719
loss: 1.0381296873092651,grad_norm: 0.883679356163295, iteration: 207720
loss: 1.0089600086212158,grad_norm: 0.8191198906961072, iteration: 207721
loss: 0.9972423911094666,grad_norm: 0.9999990935509816, iteration: 207722
loss: 1.0015090703964233,grad_norm: 0.9250147851569017, iteration: 207723
loss: 0.9965836405754089,grad_norm: 0.8604350602787787, iteration: 207724
loss: 1.0221190452575684,grad_norm: 0.9999990437495981, iteration: 207725
loss: 1.0209177732467651,grad_norm: 0.9097034921771835, iteration: 207726
loss: 0.9825895428657532,grad_norm: 0.8481753059444628, iteration: 207727
loss: 0.9953011274337769,grad_norm: 0.9999992014188446, iteration: 207728
loss: 0.9692433476448059,grad_norm: 0.9935957590631033, iteration: 207729
loss: 0.9944339394569397,grad_norm: 0.9999990439130719, iteration: 207730
loss: 1.0270414352416992,grad_norm: 0.9262902338848695, iteration: 207731
loss: 1.0094630718231201,grad_norm: 0.9999992383835172, iteration: 207732
loss: 1.0291589498519897,grad_norm: 0.9999991740572028, iteration: 207733
loss: 0.9923396110534668,grad_norm: 0.9125027981646996, iteration: 207734
loss: 0.9872739315032959,grad_norm: 0.9634931755890951, iteration: 207735
loss: 1.0435609817504883,grad_norm: 0.9999996981757026, iteration: 207736
loss: 0.9918261170387268,grad_norm: 0.9999991172766611, iteration: 207737
loss: 0.9927798509597778,grad_norm: 0.9999991944141048, iteration: 207738
loss: 1.01688551902771,grad_norm: 0.9504019227462347, iteration: 207739
loss: 0.9953004121780396,grad_norm: 0.922298047077579, iteration: 207740
loss: 0.9944591522216797,grad_norm: 0.8322540403884483, iteration: 207741
loss: 1.0233821868896484,grad_norm: 0.8349778307743135, iteration: 207742
loss: 1.0377305746078491,grad_norm: 0.9999989520887418, iteration: 207743
loss: 0.9621289372444153,grad_norm: 0.9999990496432897, iteration: 207744
loss: 0.9957707524299622,grad_norm: 0.9999989171452751, iteration: 207745
loss: 1.0015250444412231,grad_norm: 0.9999991655503981, iteration: 207746
loss: 0.9833345413208008,grad_norm: 0.8525039070036636, iteration: 207747
loss: 0.9914435744285583,grad_norm: 0.7920634683305627, iteration: 207748
loss: 1.0018155574798584,grad_norm: 0.9197764312204676, iteration: 207749
loss: 0.9905380606651306,grad_norm: 0.8051236764997329, iteration: 207750
loss: 1.0219179391860962,grad_norm: 0.999998948863519, iteration: 207751
loss: 0.9979605078697205,grad_norm: 0.9999991567728157, iteration: 207752
loss: 1.0197434425354004,grad_norm: 0.9999992582605273, iteration: 207753
loss: 1.00690495967865,grad_norm: 0.9999991195915825, iteration: 207754
loss: 0.9511523842811584,grad_norm: 0.864591933553369, iteration: 207755
loss: 0.9950775504112244,grad_norm: 0.9943474631117131, iteration: 207756
loss: 1.0493144989013672,grad_norm: 0.9999991431184666, iteration: 207757
loss: 1.0027730464935303,grad_norm: 0.9762689544082191, iteration: 207758
loss: 1.0117100477218628,grad_norm: 0.8128547604894263, iteration: 207759
loss: 0.9937611818313599,grad_norm: 0.9999992094418386, iteration: 207760
loss: 1.0708602666854858,grad_norm: 0.9999991221479932, iteration: 207761
loss: 1.0377832651138306,grad_norm: 0.9999994030220966, iteration: 207762
loss: 1.005095362663269,grad_norm: 0.9999991681781587, iteration: 207763
loss: 0.9690880179405212,grad_norm: 0.866989290353155, iteration: 207764
loss: 1.0090185403823853,grad_norm: 0.9826919172067018, iteration: 207765
loss: 0.9915176630020142,grad_norm: 0.999999236830048, iteration: 207766
loss: 1.0193626880645752,grad_norm: 0.9661403952767947, iteration: 207767
loss: 1.0202109813690186,grad_norm: 0.9966217016616948, iteration: 207768
loss: 1.0068329572677612,grad_norm: 0.7227782795701714, iteration: 207769
loss: 0.9742997884750366,grad_norm: 0.924904928057715, iteration: 207770
loss: 1.0031299591064453,grad_norm: 0.8624556895147245, iteration: 207771
loss: 0.9747500419616699,grad_norm: 0.8203528715592896, iteration: 207772
loss: 1.0140701532363892,grad_norm: 0.8460362168042288, iteration: 207773
loss: 1.0190120935440063,grad_norm: 0.8346626905690773, iteration: 207774
loss: 1.0243593454360962,grad_norm: 0.9651222323397382, iteration: 207775
loss: 1.0476897954940796,grad_norm: 0.9999991401108299, iteration: 207776
loss: 1.1532641649246216,grad_norm: 0.9999999160872294, iteration: 207777
loss: 1.0217918157577515,grad_norm: 0.9036073697169418, iteration: 207778
loss: 1.083715796470642,grad_norm: 0.9999994642193433, iteration: 207779
loss: 1.0252329111099243,grad_norm: 0.9157442274872666, iteration: 207780
loss: 1.0340416431427002,grad_norm: 0.9999990548424372, iteration: 207781
loss: 0.9822245240211487,grad_norm: 0.9999990449186213, iteration: 207782
loss: 0.993367075920105,grad_norm: 0.9102243977335014, iteration: 207783
loss: 1.0203098058700562,grad_norm: 0.9007885328088289, iteration: 207784
loss: 1.0002572536468506,grad_norm: 0.8533919954407774, iteration: 207785
loss: 1.0315455198287964,grad_norm: 0.9999996632564715, iteration: 207786
loss: 0.9984745979309082,grad_norm: 0.9999992317503505, iteration: 207787
loss: 1.0082060098648071,grad_norm: 0.9502303265032637, iteration: 207788
loss: 0.9987578392028809,grad_norm: 0.9509388773486884, iteration: 207789
loss: 1.0112030506134033,grad_norm: 0.9999989641901759, iteration: 207790
loss: 0.9826004505157471,grad_norm: 0.8138107038770938, iteration: 207791
loss: 1.0141202211380005,grad_norm: 0.8678943119221983, iteration: 207792
loss: 1.0171630382537842,grad_norm: 0.9999990147847779, iteration: 207793
loss: 1.0021344423294067,grad_norm: 0.9999993328066065, iteration: 207794
loss: 1.0178940296173096,grad_norm: 0.9915929583009918, iteration: 207795
loss: 1.017160177230835,grad_norm: 0.9889002165638873, iteration: 207796
loss: 1.0088139772415161,grad_norm: 0.999999090194952, iteration: 207797
loss: 1.0121692419052124,grad_norm: 0.8888506553979418, iteration: 207798
loss: 1.0258986949920654,grad_norm: 0.8997676911871004, iteration: 207799
loss: 1.0389320850372314,grad_norm: 0.9999996594293243, iteration: 207800
loss: 1.0335516929626465,grad_norm: 0.9516686228649437, iteration: 207801
loss: 0.9985791444778442,grad_norm: 0.8894842974983039, iteration: 207802
loss: 0.999125599861145,grad_norm: 0.8007131936907085, iteration: 207803
loss: 0.9708645939826965,grad_norm: 0.845200986720723, iteration: 207804
loss: 0.9692935943603516,grad_norm: 0.9999990015556808, iteration: 207805
loss: 1.0017200708389282,grad_norm: 0.9426088327518966, iteration: 207806
loss: 0.981134295463562,grad_norm: 0.9639027454268517, iteration: 207807
loss: 1.0136983394622803,grad_norm: 0.8964748923453559, iteration: 207808
loss: 0.9880797863006592,grad_norm: 0.9999991455645579, iteration: 207809
loss: 1.0047647953033447,grad_norm: 0.9545129201320871, iteration: 207810
loss: 0.978256344795227,grad_norm: 0.9529613950470399, iteration: 207811
loss: 0.9917052388191223,grad_norm: 0.9999991566667826, iteration: 207812
loss: 0.9998714327812195,grad_norm: 0.8983602707738595, iteration: 207813
loss: 0.9923394918441772,grad_norm: 0.9717441895656045, iteration: 207814
loss: 0.9871025681495667,grad_norm: 0.9999990739007324, iteration: 207815
loss: 0.9806217551231384,grad_norm: 0.8600967427215336, iteration: 207816
loss: 0.9915227293968201,grad_norm: 0.9999989840532891, iteration: 207817
loss: 0.9834079742431641,grad_norm: 0.8590369822986069, iteration: 207818
loss: 0.9739559292793274,grad_norm: 0.9999991900790112, iteration: 207819
loss: 0.9824212789535522,grad_norm: 0.9497256887844704, iteration: 207820
loss: 0.9996000528335571,grad_norm: 0.8551448244761547, iteration: 207821
loss: 0.9905152320861816,grad_norm: 0.7643369146657949, iteration: 207822
loss: 0.992161750793457,grad_norm: 0.9663748395588599, iteration: 207823
loss: 1.0028882026672363,grad_norm: 0.9176523587262677, iteration: 207824
loss: 0.9938269257545471,grad_norm: 0.9789162422046505, iteration: 207825
loss: 1.0462703704833984,grad_norm: 0.9904565769314729, iteration: 207826
loss: 1.0206387042999268,grad_norm: 0.9704680223321561, iteration: 207827
loss: 1.2086774110794067,grad_norm: 0.9999998875348054, iteration: 207828
loss: 1.0077040195465088,grad_norm: 0.8127675101054133, iteration: 207829
loss: 1.038625717163086,grad_norm: 0.9709958026104518, iteration: 207830
loss: 0.9874315857887268,grad_norm: 0.9535701602469859, iteration: 207831
loss: 1.0106825828552246,grad_norm: 0.7445112909652712, iteration: 207832
loss: 0.9819141030311584,grad_norm: 0.839079056004891, iteration: 207833
loss: 0.9983844757080078,grad_norm: 0.93151898442335, iteration: 207834
loss: 1.0136072635650635,grad_norm: 0.9999990766672642, iteration: 207835
loss: 1.0006979703903198,grad_norm: 0.9807882212891603, iteration: 207836
loss: 0.948009729385376,grad_norm: 0.980569193183492, iteration: 207837
loss: 0.9762097597122192,grad_norm: 0.999998959332831, iteration: 207838
loss: 0.9898103475570679,grad_norm: 0.9565812723380948, iteration: 207839
loss: 1.025415062904358,grad_norm: 0.8269327437409207, iteration: 207840
loss: 0.989359974861145,grad_norm: 0.8182632219758919, iteration: 207841
loss: 0.9846315383911133,grad_norm: 0.8502791970010141, iteration: 207842
loss: 0.9521573781967163,grad_norm: 0.9206977201094043, iteration: 207843
loss: 1.00639009475708,grad_norm: 0.8829165099920198, iteration: 207844
loss: 0.9919827580451965,grad_norm: 0.9999990625507842, iteration: 207845
loss: 1.0019941329956055,grad_norm: 0.999999171565134, iteration: 207846
loss: 0.9910025000572205,grad_norm: 0.8674004981974266, iteration: 207847
loss: 1.0053610801696777,grad_norm: 0.8497617640413211, iteration: 207848
loss: 1.0298879146575928,grad_norm: 0.8248838880041457, iteration: 207849
loss: 0.9622770547866821,grad_norm: 0.975031319170165, iteration: 207850
loss: 0.9591395854949951,grad_norm: 0.9999991624015163, iteration: 207851
loss: 0.9947447776794434,grad_norm: 0.8563885304075746, iteration: 207852
loss: 1.0173161029815674,grad_norm: 0.9999991656913421, iteration: 207853
loss: 0.9930791258811951,grad_norm: 0.8453242883358059, iteration: 207854
loss: 0.9970486760139465,grad_norm: 0.9722619959081806, iteration: 207855
loss: 0.9856334328651428,grad_norm: 0.7959176141001472, iteration: 207856
loss: 0.9626662731170654,grad_norm: 0.9999990352075313, iteration: 207857
loss: 1.0016270875930786,grad_norm: 0.9999990649416604, iteration: 207858
loss: 1.0221744775772095,grad_norm: 0.7869760363208539, iteration: 207859
loss: 1.0035847425460815,grad_norm: 0.9999990915679309, iteration: 207860
loss: 1.0154162645339966,grad_norm: 0.9999990880730074, iteration: 207861
loss: 0.9914005398750305,grad_norm: 0.9999991172084023, iteration: 207862
loss: 0.9869747757911682,grad_norm: 0.9999990606484077, iteration: 207863
loss: 0.9972962141036987,grad_norm: 0.8484020510341688, iteration: 207864
loss: 0.9972590208053589,grad_norm: 0.861630344709557, iteration: 207865
loss: 0.9669599533081055,grad_norm: 0.8859305400618659, iteration: 207866
loss: 1.0186872482299805,grad_norm: 0.8916255518219546, iteration: 207867
loss: 1.0151565074920654,grad_norm: 0.8004519715802969, iteration: 207868
loss: 1.0447012186050415,grad_norm: 0.8482705701412119, iteration: 207869
loss: 1.0275171995162964,grad_norm: 0.8957813995193031, iteration: 207870
loss: 0.9834039211273193,grad_norm: 0.8357406032922148, iteration: 207871
loss: 1.0023620128631592,grad_norm: 0.8684875363198309, iteration: 207872
loss: 1.0190964937210083,grad_norm: 0.9999990740561194, iteration: 207873
loss: 0.9978278279304504,grad_norm: 0.9999991647031938, iteration: 207874
loss: 1.0085151195526123,grad_norm: 0.9999989606937242, iteration: 207875
loss: 0.9704933762550354,grad_norm: 0.9221261100122077, iteration: 207876
loss: 1.0782355070114136,grad_norm: 0.9999990517908807, iteration: 207877
loss: 1.0034855604171753,grad_norm: 0.897771111388572, iteration: 207878
loss: 0.9554538130760193,grad_norm: 0.9999992157897264, iteration: 207879
loss: 0.990256667137146,grad_norm: 0.8713528847064607, iteration: 207880
loss: 0.9963165521621704,grad_norm: 0.999999238649331, iteration: 207881
loss: 1.0281450748443604,grad_norm: 0.9504021576500999, iteration: 207882
loss: 1.0214149951934814,grad_norm: 0.9999991770090604, iteration: 207883
loss: 1.0227153301239014,grad_norm: 0.9999990806088204, iteration: 207884
loss: 1.059158205986023,grad_norm: 0.90666838763665, iteration: 207885
loss: 0.990658700466156,grad_norm: 0.8127456335507866, iteration: 207886
loss: 1.0137499570846558,grad_norm: 0.9402968674389081, iteration: 207887
loss: 1.0143686532974243,grad_norm: 0.9030495374461879, iteration: 207888
loss: 0.9764302968978882,grad_norm: 0.9999990520572397, iteration: 207889
loss: 1.0129472017288208,grad_norm: 0.8709990793382352, iteration: 207890
loss: 0.9950012564659119,grad_norm: 0.9432633359029876, iteration: 207891
loss: 0.9749992489814758,grad_norm: 0.8223536443891972, iteration: 207892
loss: 1.0101374387741089,grad_norm: 0.9999991229114292, iteration: 207893
loss: 0.9682950973510742,grad_norm: 0.9999990350097518, iteration: 207894
loss: 0.9769347906112671,grad_norm: 0.7802763086603677, iteration: 207895
loss: 0.9629483222961426,grad_norm: 0.9999990376016996, iteration: 207896
loss: 1.0122201442718506,grad_norm: 0.9548310397031742, iteration: 207897
loss: 1.0222169160842896,grad_norm: 0.9999989708069743, iteration: 207898
loss: 1.0290688276290894,grad_norm: 0.9122241736468614, iteration: 207899
loss: 0.979021430015564,grad_norm: 0.9999991158835712, iteration: 207900
loss: 1.046340823173523,grad_norm: 0.9075058262949994, iteration: 207901
loss: 0.9873335361480713,grad_norm: 0.8746128079835043, iteration: 207902
loss: 1.036630392074585,grad_norm: 0.999999256670565, iteration: 207903
loss: 0.964133620262146,grad_norm: 0.8886753773760699, iteration: 207904
loss: 0.9824416637420654,grad_norm: 0.9999991085228533, iteration: 207905
loss: 0.9694850444793701,grad_norm: 0.9999991068768185, iteration: 207906
loss: 0.9993676543235779,grad_norm: 0.8938920909675413, iteration: 207907
loss: 1.0195987224578857,grad_norm: 0.9999991819363753, iteration: 207908
loss: 0.989190936088562,grad_norm: 0.900628618747469, iteration: 207909
loss: 1.0411325693130493,grad_norm: 0.999999123455875, iteration: 207910
loss: 1.0115934610366821,grad_norm: 0.8604561745701244, iteration: 207911
loss: 0.9870409965515137,grad_norm: 0.9999990919320239, iteration: 207912
loss: 0.9949772357940674,grad_norm: 0.8940515676929334, iteration: 207913
loss: 1.0208282470703125,grad_norm: 0.9999990917987259, iteration: 207914
loss: 0.9734500646591187,grad_norm: 0.8742580186836654, iteration: 207915
loss: 1.021013617515564,grad_norm: 0.982220521471445, iteration: 207916
loss: 1.0128509998321533,grad_norm: 0.9257652807429582, iteration: 207917
loss: 1.005237340927124,grad_norm: 0.9914605034384887, iteration: 207918
loss: 1.015578031539917,grad_norm: 0.9999990915709628, iteration: 207919
loss: 0.9981777667999268,grad_norm: 0.9999991558666801, iteration: 207920
loss: 0.9975866675376892,grad_norm: 0.8478739372899102, iteration: 207921
loss: 0.9885711669921875,grad_norm: 0.8077678130071518, iteration: 207922
loss: 0.9936837553977966,grad_norm: 0.8223347469797638, iteration: 207923
loss: 0.9879782795906067,grad_norm: 0.9999990393849028, iteration: 207924
loss: 1.0393954515457153,grad_norm: 0.9345360192620558, iteration: 207925
loss: 1.012221336364746,grad_norm: 0.9999996323782073, iteration: 207926
loss: 0.9837480187416077,grad_norm: 0.8877966641638723, iteration: 207927
loss: 1.09345543384552,grad_norm: 0.9999993924060749, iteration: 207928
loss: 1.0838805437088013,grad_norm: 0.9999995297055962, iteration: 207929
loss: 1.026506781578064,grad_norm: 0.999999010073052, iteration: 207930
loss: 0.975564181804657,grad_norm: 0.8888290178907221, iteration: 207931
loss: 0.996189534664154,grad_norm: 0.8563455584544634, iteration: 207932
loss: 1.004964828491211,grad_norm: 0.8727556428353226, iteration: 207933
loss: 0.9839771389961243,grad_norm: 0.9445136373531223, iteration: 207934
loss: 1.016338586807251,grad_norm: 0.9112718254131148, iteration: 207935
loss: 0.9694833755493164,grad_norm: 0.7713794995354365, iteration: 207936
loss: 1.0083379745483398,grad_norm: 0.9999991544088378, iteration: 207937
loss: 1.0229450464248657,grad_norm: 0.947362145203015, iteration: 207938
loss: 1.0859174728393555,grad_norm: 0.9999994792256514, iteration: 207939
loss: 0.9864954352378845,grad_norm: 0.9733738090933876, iteration: 207940
loss: 1.0223582983016968,grad_norm: 0.9999990632705497, iteration: 207941
loss: 1.01426362991333,grad_norm: 0.9378545119105248, iteration: 207942
loss: 1.0288587808609009,grad_norm: 0.9013568684715961, iteration: 207943
loss: 1.0271615982055664,grad_norm: 0.8301378761739696, iteration: 207944
loss: 1.0081225633621216,grad_norm: 0.8434929668109515, iteration: 207945
loss: 0.9936046600341797,grad_norm: 0.9737698980089853, iteration: 207946
loss: 0.9771748185157776,grad_norm: 0.9999990625522762, iteration: 207947
loss: 0.9992313385009766,grad_norm: 0.9999991674925897, iteration: 207948
loss: 1.0009937286376953,grad_norm: 0.999999019350296, iteration: 207949
loss: 0.9913110136985779,grad_norm: 0.8551941288851697, iteration: 207950
loss: 1.003280758857727,grad_norm: 0.9354551338474162, iteration: 207951
loss: 1.0096355676651,grad_norm: 0.9999991058490667, iteration: 207952
loss: 0.9949631094932556,grad_norm: 0.7988426621159433, iteration: 207953
loss: 1.0159804821014404,grad_norm: 0.8368406132609606, iteration: 207954
loss: 1.000552773475647,grad_norm: 0.8373140549523642, iteration: 207955
loss: 0.9956822395324707,grad_norm: 0.9999991909755362, iteration: 207956
loss: 0.9675716161727905,grad_norm: 0.9999992471354614, iteration: 207957
loss: 1.0119831562042236,grad_norm: 0.938824981370837, iteration: 207958
loss: 1.015832543373108,grad_norm: 0.9742529321352893, iteration: 207959
loss: 0.9999687075614929,grad_norm: 0.7686328094165424, iteration: 207960
loss: 1.0153037309646606,grad_norm: 0.9999991362660158, iteration: 207961
loss: 1.0397300720214844,grad_norm: 0.9212547660667368, iteration: 207962
loss: 1.007784128189087,grad_norm: 0.9381306594908715, iteration: 207963
loss: 1.0034419298171997,grad_norm: 0.8841206208195145, iteration: 207964
loss: 1.0076022148132324,grad_norm: 0.7565331799983452, iteration: 207965
loss: 0.9832720756530762,grad_norm: 0.8807754384140467, iteration: 207966
loss: 0.989046573638916,grad_norm: 0.9999991113209501, iteration: 207967
loss: 0.9905462265014648,grad_norm: 0.8989638290854626, iteration: 207968
loss: 0.9835084676742554,grad_norm: 0.8243638919133408, iteration: 207969
loss: 1.0021418333053589,grad_norm: 0.844775151677676, iteration: 207970
loss: 1.010233998298645,grad_norm: 0.8799967398900226, iteration: 207971
loss: 0.9987378716468811,grad_norm: 0.9066999502129629, iteration: 207972
loss: 1.0195963382720947,grad_norm: 0.9780009625766878, iteration: 207973
loss: 0.990203320980072,grad_norm: 0.9999991277924535, iteration: 207974
loss: 0.9934051036834717,grad_norm: 0.9999992324557149, iteration: 207975
loss: 0.9946992993354797,grad_norm: 0.8364663560613086, iteration: 207976
loss: 1.0374869108200073,grad_norm: 0.999999742796022, iteration: 207977
loss: 1.010157585144043,grad_norm: 0.883551489123563, iteration: 207978
loss: 1.017264723777771,grad_norm: 0.9999990934369614, iteration: 207979
loss: 1.0107522010803223,grad_norm: 0.9618220066153702, iteration: 207980
loss: 1.0055454969406128,grad_norm: 0.8319748223400402, iteration: 207981
loss: 1.0298750400543213,grad_norm: 0.990328038478403, iteration: 207982
loss: 1.0114411115646362,grad_norm: 0.9599371312344713, iteration: 207983
loss: 1.0281753540039062,grad_norm: 0.9675607832492843, iteration: 207984
loss: 0.998301088809967,grad_norm: 0.9999989490097617, iteration: 207985
loss: 1.00093674659729,grad_norm: 0.9731991660855949, iteration: 207986
loss: 0.9601986408233643,grad_norm: 0.9323039156870295, iteration: 207987
loss: 0.9793745875358582,grad_norm: 0.9416654090618398, iteration: 207988
loss: 1.015446662902832,grad_norm: 0.9999989181984913, iteration: 207989
loss: 0.9866869449615479,grad_norm: 0.8401512827661106, iteration: 207990
loss: 0.9868224859237671,grad_norm: 0.999999171442281, iteration: 207991
loss: 1.002293586730957,grad_norm: 0.9668016658573205, iteration: 207992
loss: 1.0279061794281006,grad_norm: 0.8410094259709132, iteration: 207993
loss: 0.9853125810623169,grad_norm: 0.9736677799214494, iteration: 207994
loss: 0.9988290071487427,grad_norm: 0.8978312603147183, iteration: 207995
loss: 1.0031718015670776,grad_norm: 0.9967636841793859, iteration: 207996
loss: 1.0011305809020996,grad_norm: 0.9999990602095014, iteration: 207997
loss: 0.9713684916496277,grad_norm: 0.9505637514915769, iteration: 207998
loss: 0.9995932579040527,grad_norm: 0.9999991393007777, iteration: 207999
loss: 1.0193991661071777,grad_norm: 0.9999990849786591, iteration: 208000
loss: 0.977702260017395,grad_norm: 0.8769829024893228, iteration: 208001
loss: 1.0010536909103394,grad_norm: 0.8708572829576864, iteration: 208002
loss: 1.0069104433059692,grad_norm: 0.99338840641453, iteration: 208003
loss: 1.0359885692596436,grad_norm: 0.9210656037094368, iteration: 208004
loss: 1.0113459825515747,grad_norm: 0.8552733740061033, iteration: 208005
loss: 1.0416189432144165,grad_norm: 0.9344590654637558, iteration: 208006
loss: 0.9892444610595703,grad_norm: 0.8721656541385745, iteration: 208007
loss: 0.9799991846084595,grad_norm: 0.9176497406614027, iteration: 208008
loss: 1.0244784355163574,grad_norm: 0.9999991022920022, iteration: 208009
loss: 0.9870971441268921,grad_norm: 0.999999094302719, iteration: 208010
loss: 0.9811475872993469,grad_norm: 0.8574112151501633, iteration: 208011
loss: 0.9932497143745422,grad_norm: 0.92995241140376, iteration: 208012
loss: 1.01089346408844,grad_norm: 0.999999277885156, iteration: 208013
loss: 0.9761940240859985,grad_norm: 0.9508274218081463, iteration: 208014
loss: 0.9764611721038818,grad_norm: 0.8042981406887437, iteration: 208015
loss: 0.9679723381996155,grad_norm: 0.9579993213873532, iteration: 208016
loss: 1.0436276197433472,grad_norm: 0.9999990222513869, iteration: 208017
loss: 1.0171204805374146,grad_norm: 0.9999992311816984, iteration: 208018
loss: 0.9966667890548706,grad_norm: 0.99999912364973, iteration: 208019
loss: 0.9985231161117554,grad_norm: 0.8250914104090297, iteration: 208020
loss: 1.0624370574951172,grad_norm: 0.918465462278059, iteration: 208021
loss: 1.0172243118286133,grad_norm: 0.9051798705440733, iteration: 208022
loss: 0.9666032791137695,grad_norm: 0.8528638989091153, iteration: 208023
loss: 0.9788239002227783,grad_norm: 0.9999992115853026, iteration: 208024
loss: 0.987191379070282,grad_norm: 0.7955035366259868, iteration: 208025
loss: 0.990117609500885,grad_norm: 0.8730432303674206, iteration: 208026
loss: 1.0570026636123657,grad_norm: 0.8918680129347298, iteration: 208027
loss: 1.0229402780532837,grad_norm: 0.8076545734827476, iteration: 208028
loss: 1.022740125656128,grad_norm: 0.9999991765688938, iteration: 208029
loss: 0.9951213002204895,grad_norm: 0.9999993673597698, iteration: 208030
loss: 1.0187190771102905,grad_norm: 0.9058508098783752, iteration: 208031
loss: 0.9807994961738586,grad_norm: 0.8606309719190541, iteration: 208032
loss: 1.0333569049835205,grad_norm: 0.999999285721027, iteration: 208033
loss: 0.995036244392395,grad_norm: 0.9999992163831275, iteration: 208034
loss: 1.0270490646362305,grad_norm: 0.8800952819938285, iteration: 208035
loss: 0.9863927364349365,grad_norm: 0.9999992780999292, iteration: 208036
loss: 0.9740538001060486,grad_norm: 0.8437774002259405, iteration: 208037
loss: 1.0067964792251587,grad_norm: 0.8913355426947277, iteration: 208038
loss: 0.9839913845062256,grad_norm: 0.9425892161404035, iteration: 208039
loss: 1.011034607887268,grad_norm: 0.9681050520801939, iteration: 208040
loss: 0.9877370595932007,grad_norm: 0.999999249355342, iteration: 208041
loss: 1.0383124351501465,grad_norm: 0.9999989654109995, iteration: 208042
loss: 0.9976198077201843,grad_norm: 0.9999990543200599, iteration: 208043
loss: 1.095893383026123,grad_norm: 0.9999993139742087, iteration: 208044
loss: 1.0085557699203491,grad_norm: 0.9060297356167768, iteration: 208045
loss: 0.9722752571105957,grad_norm: 0.8859400400984372, iteration: 208046
loss: 1.0144741535186768,grad_norm: 0.999999132860261, iteration: 208047
loss: 1.0377483367919922,grad_norm: 0.89474678802613, iteration: 208048
loss: 1.0606071949005127,grad_norm: 0.9999993000144372, iteration: 208049
loss: 0.9666432738304138,grad_norm: 0.9856645710926932, iteration: 208050
loss: 1.0233244895935059,grad_norm: 0.9999991314342913, iteration: 208051
loss: 0.9981151819229126,grad_norm: 0.9999990647618242, iteration: 208052
loss: 1.0053067207336426,grad_norm: 0.9999989714411363, iteration: 208053
loss: 0.948561429977417,grad_norm: 0.9060912514059205, iteration: 208054
loss: 1.0241526365280151,grad_norm: 0.7953770759316905, iteration: 208055
loss: 1.008367896080017,grad_norm: 0.9930963188937629, iteration: 208056
loss: 0.9757149815559387,grad_norm: 0.9545490507304658, iteration: 208057
loss: 0.9800494313240051,grad_norm: 0.8770175845648093, iteration: 208058
loss: 0.9872690439224243,grad_norm: 0.7665115806871076, iteration: 208059
loss: 1.0117579698562622,grad_norm: 0.9999991053984072, iteration: 208060
loss: 0.9956364631652832,grad_norm: 0.9161958438898443, iteration: 208061
loss: 1.0197330713272095,grad_norm: 0.8276202105258976, iteration: 208062
loss: 0.9781389236450195,grad_norm: 0.7751785357566058, iteration: 208063
loss: 0.975936233997345,grad_norm: 0.9049796350081529, iteration: 208064
loss: 1.0631803274154663,grad_norm: 0.999999416630502, iteration: 208065
loss: 0.975674569606781,grad_norm: 0.9734007843313481, iteration: 208066
loss: 1.027768850326538,grad_norm: 0.8954720343496793, iteration: 208067
loss: 0.9629635214805603,grad_norm: 0.9999993055335071, iteration: 208068
loss: 0.9915260076522827,grad_norm: 0.886779073434807, iteration: 208069
loss: 1.0219111442565918,grad_norm: 0.8759821248569187, iteration: 208070
loss: 1.0213812589645386,grad_norm: 0.9999994563190409, iteration: 208071
loss: 0.964749276638031,grad_norm: 0.9999990511151767, iteration: 208072
loss: 0.993430495262146,grad_norm: 0.9999991151550697, iteration: 208073
loss: 1.0672515630722046,grad_norm: 0.9863939130528435, iteration: 208074
loss: 0.9983976483345032,grad_norm: 0.8345380881145537, iteration: 208075
loss: 0.9917314052581787,grad_norm: 0.9999992107398571, iteration: 208076
loss: 1.032482385635376,grad_norm: 0.8908126984190878, iteration: 208077
loss: 0.9857897162437439,grad_norm: 0.905845967342326, iteration: 208078
loss: 0.9989587664604187,grad_norm: 0.9054347886371998, iteration: 208079
loss: 1.0058178901672363,grad_norm: 0.9999989480466904, iteration: 208080
loss: 0.9757193326950073,grad_norm: 0.991060472304719, iteration: 208081
loss: 0.9858178496360779,grad_norm: 0.9999990714762925, iteration: 208082
loss: 0.9948605895042419,grad_norm: 0.9999989649215805, iteration: 208083
loss: 0.963026225566864,grad_norm: 0.7878007826872012, iteration: 208084
loss: 1.0037959814071655,grad_norm: 0.8763772232398666, iteration: 208085
loss: 1.0064294338226318,grad_norm: 0.8826224604557298, iteration: 208086
loss: 0.9862657785415649,grad_norm: 0.8999361346930829, iteration: 208087
loss: 0.9801622629165649,grad_norm: 0.933736600861298, iteration: 208088
loss: 1.0145426988601685,grad_norm: 0.8143703932134131, iteration: 208089
loss: 1.0181622505187988,grad_norm: 0.9604199285191442, iteration: 208090
loss: 1.0047980546951294,grad_norm: 0.9999993109143741, iteration: 208091
loss: 0.9858691692352295,grad_norm: 0.8979799950324087, iteration: 208092
loss: 1.018944263458252,grad_norm: 0.849267433929204, iteration: 208093
loss: 1.0502372980117798,grad_norm: 0.9896986710653856, iteration: 208094
loss: 0.9943042993545532,grad_norm: 0.9999997975389464, iteration: 208095
loss: 0.9810360670089722,grad_norm: 0.9999990794758103, iteration: 208096
loss: 1.0120398998260498,grad_norm: 0.99999939432086, iteration: 208097
loss: 0.9821297526359558,grad_norm: 0.9999992027337613, iteration: 208098
loss: 1.0033365488052368,grad_norm: 0.8487771942677338, iteration: 208099
loss: 0.9984486699104309,grad_norm: 0.9620414795368037, iteration: 208100
loss: 0.9936181306838989,grad_norm: 0.9999992667668316, iteration: 208101
loss: 1.005980134010315,grad_norm: 0.9373336268403648, iteration: 208102
loss: 0.9980369806289673,grad_norm: 0.9540729917886308, iteration: 208103
loss: 0.9785927534103394,grad_norm: 0.9987141111102416, iteration: 208104
loss: 0.9757983684539795,grad_norm: 0.9999991495004914, iteration: 208105
loss: 1.0053075551986694,grad_norm: 0.8859450655070037, iteration: 208106
loss: 1.0100877285003662,grad_norm: 0.9164225540507257, iteration: 208107
loss: 0.98952317237854,grad_norm: 0.8696418415259933, iteration: 208108
loss: 0.9833919405937195,grad_norm: 0.8296582591446413, iteration: 208109
loss: 1.021653175354004,grad_norm: 0.7898757762100361, iteration: 208110
loss: 0.9786549806594849,grad_norm: 0.9566545749719758, iteration: 208111
loss: 1.0683186054229736,grad_norm: 0.9895692297248054, iteration: 208112
loss: 0.9719731211662292,grad_norm: 0.9225979573116257, iteration: 208113
loss: 0.9670053720474243,grad_norm: 0.9999991962527353, iteration: 208114
loss: 1.014742374420166,grad_norm: 0.999999142624148, iteration: 208115
loss: 0.9696512222290039,grad_norm: 0.9999992231164753, iteration: 208116
loss: 0.9861950874328613,grad_norm: 0.9999989935664643, iteration: 208117
loss: 1.0184341669082642,grad_norm: 0.8968535547312888, iteration: 208118
loss: 0.9837682843208313,grad_norm: 0.916675782532468, iteration: 208119
loss: 1.0083740949630737,grad_norm: 0.9999990675017757, iteration: 208120
loss: 0.9830042719841003,grad_norm: 0.999999077312269, iteration: 208121
loss: 1.0130283832550049,grad_norm: 0.8166708563697642, iteration: 208122
loss: 0.9504873752593994,grad_norm: 0.9999990914233743, iteration: 208123
loss: 1.0645158290863037,grad_norm: 0.9999991351723738, iteration: 208124
loss: 1.0119991302490234,grad_norm: 0.8605583924352188, iteration: 208125
loss: 1.0195480585098267,grad_norm: 0.9999998188085716, iteration: 208126
loss: 0.9848048686981201,grad_norm: 0.8309963628425256, iteration: 208127
loss: 1.0612852573394775,grad_norm: 0.9999998549633811, iteration: 208128
loss: 0.9936915040016174,grad_norm: 0.9164475823456437, iteration: 208129
loss: 1.0045969486236572,grad_norm: 0.9999990231348768, iteration: 208130
loss: 1.0270642042160034,grad_norm: 0.999999156416613, iteration: 208131
loss: 1.0175224542617798,grad_norm: 0.9999990336862234, iteration: 208132
loss: 1.0188398361206055,grad_norm: 0.9999997490961895, iteration: 208133
loss: 0.9978242516517639,grad_norm: 0.9886354833594483, iteration: 208134
loss: 0.9726740717887878,grad_norm: 0.9355108399856019, iteration: 208135
loss: 1.0562324523925781,grad_norm: 0.9999995517206648, iteration: 208136
loss: 1.0735961198806763,grad_norm: 0.8930125406306005, iteration: 208137
loss: 1.0766900777816772,grad_norm: 0.9999995312220388, iteration: 208138
loss: 1.1373475790023804,grad_norm: 0.9999993355078178, iteration: 208139
loss: 1.0002849102020264,grad_norm: 0.8659379061110473, iteration: 208140
loss: 1.0200284719467163,grad_norm: 0.9464568403555353, iteration: 208141
loss: 0.9658475518226624,grad_norm: 0.9999991987580388, iteration: 208142
loss: 1.02023184299469,grad_norm: 0.999999073067322, iteration: 208143
loss: 0.9784755110740662,grad_norm: 0.871244660797025, iteration: 208144
loss: 1.0130642652511597,grad_norm: 0.933151793414724, iteration: 208145
loss: 0.9838525652885437,grad_norm: 0.9999990895236271, iteration: 208146
loss: 1.016442060470581,grad_norm: 0.9470719213104185, iteration: 208147
loss: 1.0172357559204102,grad_norm: 0.9999991891260689, iteration: 208148
loss: 0.9396717548370361,grad_norm: 0.999999076500217, iteration: 208149
loss: 1.0041332244873047,grad_norm: 0.9999992177916488, iteration: 208150
loss: 1.0029878616333008,grad_norm: 0.999999881490773, iteration: 208151
loss: 1.0033643245697021,grad_norm: 0.9999992374488835, iteration: 208152
loss: 1.011826753616333,grad_norm: 0.9999998558852298, iteration: 208153
loss: 0.9587297439575195,grad_norm: 0.9999991211288167, iteration: 208154
loss: 0.9742934107780457,grad_norm: 0.9999991855003617, iteration: 208155
loss: 1.1120551824569702,grad_norm: 0.987087787643831, iteration: 208156
loss: 1.081215262413025,grad_norm: 0.9999994669944315, iteration: 208157
loss: 1.0344502925872803,grad_norm: 0.8817398407547061, iteration: 208158
loss: 1.1205861568450928,grad_norm: 0.9999991894679225, iteration: 208159
loss: 1.168311595916748,grad_norm: 0.9299453685586551, iteration: 208160
loss: 1.0281240940093994,grad_norm: 0.99999916452944, iteration: 208161
loss: 1.272509217262268,grad_norm: 0.9999996867310041, iteration: 208162
loss: 1.0235414505004883,grad_norm: 0.9999991497487116, iteration: 208163
loss: 1.064072847366333,grad_norm: 0.9572894138866758, iteration: 208164
loss: 1.076996922492981,grad_norm: 0.9999990312347785, iteration: 208165
loss: 0.9855306148529053,grad_norm: 0.831630575431002, iteration: 208166
loss: 1.172933578491211,grad_norm: 0.9999995385723335, iteration: 208167
loss: 1.1628203392028809,grad_norm: 0.9999994104799556, iteration: 208168
loss: 1.0150272846221924,grad_norm: 0.9999990144373381, iteration: 208169
loss: 1.0999253988265991,grad_norm: 0.9999999329529151, iteration: 208170
loss: 1.0085914134979248,grad_norm: 0.970307700790123, iteration: 208171
loss: 0.9719070196151733,grad_norm: 0.9032072204495291, iteration: 208172
loss: 1.0416228771209717,grad_norm: 0.9999992689684094, iteration: 208173
loss: 1.0157853364944458,grad_norm: 0.9044895956608453, iteration: 208174
loss: 1.0528420209884644,grad_norm: 0.9560873118405039, iteration: 208175
loss: 0.9841002225875854,grad_norm: 0.9999991515928562, iteration: 208176
loss: 1.0142570734024048,grad_norm: 0.9743319917061135, iteration: 208177
loss: 1.0241870880126953,grad_norm: 0.9999992179265536, iteration: 208178
loss: 0.9883574843406677,grad_norm: 0.999999160066088, iteration: 208179
loss: 0.9922246932983398,grad_norm: 0.8922718082010455, iteration: 208180
loss: 1.0520870685577393,grad_norm: 0.9208912432842631, iteration: 208181
loss: 1.0409845113754272,grad_norm: 0.9721738138570337, iteration: 208182
loss: 0.9976832270622253,grad_norm: 0.9222509509034682, iteration: 208183
loss: 1.0147839784622192,grad_norm: 0.9443676759323291, iteration: 208184
loss: 1.16073477268219,grad_norm: 0.9999990375510197, iteration: 208185
loss: 1.0001050233840942,grad_norm: 0.8650982154318063, iteration: 208186
loss: 1.0858049392700195,grad_norm: 0.9999994079173351, iteration: 208187
loss: 0.976089596748352,grad_norm: 0.9999990230255792, iteration: 208188
loss: 1.0390390157699585,grad_norm: 0.9999990069068853, iteration: 208189
loss: 1.0114701986312866,grad_norm: 0.9999989483153169, iteration: 208190
loss: 0.9875791668891907,grad_norm: 0.9028242232246741, iteration: 208191
loss: 1.0554620027542114,grad_norm: 0.9999991399822222, iteration: 208192
loss: 0.987887978553772,grad_norm: 0.7969764628480699, iteration: 208193
loss: 0.9896069169044495,grad_norm: 0.9999990670946812, iteration: 208194
loss: 1.091897964477539,grad_norm: 0.9999990242435662, iteration: 208195
loss: 0.9806811213493347,grad_norm: 0.9999989768322736, iteration: 208196
loss: 0.9928034543991089,grad_norm: 0.9999991708214576, iteration: 208197
loss: 1.0230138301849365,grad_norm: 0.9999991413351007, iteration: 208198
loss: 1.0504504442214966,grad_norm: 0.9999992556647649, iteration: 208199
loss: 0.9969959855079651,grad_norm: 0.9321532990366923, iteration: 208200
loss: 1.073712944984436,grad_norm: 0.9999997727307507, iteration: 208201
loss: 1.079350471496582,grad_norm: 0.9999992644079738, iteration: 208202
loss: 1.0016778707504272,grad_norm: 0.8729837594597263, iteration: 208203
loss: 0.9470232129096985,grad_norm: 0.9999992377220391, iteration: 208204
loss: 0.9932941794395447,grad_norm: 0.8581760393685344, iteration: 208205
loss: 1.0034624338150024,grad_norm: 0.9999992372823467, iteration: 208206
loss: 1.0204907655715942,grad_norm: 0.999393217365509, iteration: 208207
loss: 0.9991303086280823,grad_norm: 0.9999993313909611, iteration: 208208
loss: 1.021484136581421,grad_norm: 0.9999990378666931, iteration: 208209
loss: 1.0141761302947998,grad_norm: 0.941523443904017, iteration: 208210
loss: 0.9970201849937439,grad_norm: 0.9999991860771389, iteration: 208211
loss: 1.0012233257293701,grad_norm: 0.9999990888464267, iteration: 208212
loss: 0.9811134338378906,grad_norm: 0.8150038780611847, iteration: 208213
loss: 1.0149003267288208,grad_norm: 0.9329988929013172, iteration: 208214
loss: 0.9789714217185974,grad_norm: 0.8557344875288245, iteration: 208215
loss: 0.968326985836029,grad_norm: 0.9908325871058712, iteration: 208216
loss: 0.9701075553894043,grad_norm: 0.9341693654976354, iteration: 208217
loss: 1.0274275541305542,grad_norm: 0.8722635727712342, iteration: 208218
loss: 1.0051790475845337,grad_norm: 0.9999999412699995, iteration: 208219
loss: 0.9793336987495422,grad_norm: 0.9253094647482693, iteration: 208220
loss: 1.0186575651168823,grad_norm: 0.9348552805848527, iteration: 208221
loss: 0.9806543588638306,grad_norm: 0.9999991078774494, iteration: 208222
loss: 1.0056376457214355,grad_norm: 0.948047928230931, iteration: 208223
loss: 0.9947155117988586,grad_norm: 0.9999998939175959, iteration: 208224
loss: 0.9676724076271057,grad_norm: 0.9999991432099283, iteration: 208225
loss: 1.0414743423461914,grad_norm: 0.9999997681937836, iteration: 208226
loss: 1.0269122123718262,grad_norm: 0.875431981786185, iteration: 208227
loss: 1.0493396520614624,grad_norm: 0.9999990341009592, iteration: 208228
loss: 0.9926131367683411,grad_norm: 0.9999990244203537, iteration: 208229
loss: 0.9941073060035706,grad_norm: 0.9108714772516, iteration: 208230
loss: 1.0444378852844238,grad_norm: 0.898655341546484, iteration: 208231
loss: 1.0313310623168945,grad_norm: 0.999999110485814, iteration: 208232
loss: 1.0347168445587158,grad_norm: 0.952618732717002, iteration: 208233
loss: 0.9856235980987549,grad_norm: 0.9632329581328586, iteration: 208234
loss: 1.0000516176223755,grad_norm: 0.9999991780285631, iteration: 208235
loss: 1.0083696842193604,grad_norm: 0.8582885968596996, iteration: 208236
loss: 1.002286434173584,grad_norm: 0.9256683701926557, iteration: 208237
loss: 0.9867746233940125,grad_norm: 0.9999990108916272, iteration: 208238
loss: 0.9851188659667969,grad_norm: 0.8892061486643554, iteration: 208239
loss: 0.9613991379737854,grad_norm: 0.9999990022303069, iteration: 208240
loss: 1.013149380683899,grad_norm: 0.9695927726397053, iteration: 208241
loss: 0.9984997510910034,grad_norm: 0.8468382935772566, iteration: 208242
loss: 0.9823328852653503,grad_norm: 0.7644956121077834, iteration: 208243
loss: 0.9879930019378662,grad_norm: 0.9999991693429214, iteration: 208244
loss: 1.0219494104385376,grad_norm: 0.9999991254681435, iteration: 208245
loss: 0.9930070042610168,grad_norm: 0.9999990066259904, iteration: 208246
loss: 0.9866597652435303,grad_norm: 0.9264434096352632, iteration: 208247
loss: 0.9786458611488342,grad_norm: 0.8898054815898363, iteration: 208248
loss: 0.9712156057357788,grad_norm: 0.991892479270984, iteration: 208249
loss: 1.0106160640716553,grad_norm: 0.9630691052504147, iteration: 208250
loss: 0.9829310178756714,grad_norm: 0.9999993353872624, iteration: 208251
loss: 0.9654867053031921,grad_norm: 0.7866162141399228, iteration: 208252
loss: 0.9972656965255737,grad_norm: 0.9999991444997514, iteration: 208253
loss: 0.9736519455909729,grad_norm: 0.8337305179775037, iteration: 208254
loss: 1.020505428314209,grad_norm: 0.8934382013594174, iteration: 208255
loss: 0.9911035895347595,grad_norm: 0.985744271779602, iteration: 208256
loss: 1.1743477582931519,grad_norm: 0.9999992176137408, iteration: 208257
loss: 1.0371346473693848,grad_norm: 0.9898760662429328, iteration: 208258
loss: 0.9759047031402588,grad_norm: 0.9101975890005323, iteration: 208259
loss: 1.0130387544631958,grad_norm: 0.9999989760681821, iteration: 208260
loss: 0.9742717146873474,grad_norm: 0.8726116210582311, iteration: 208261
loss: 1.0187309980392456,grad_norm: 0.9999989645450217, iteration: 208262
loss: 1.0451246500015259,grad_norm: 0.9660148797237166, iteration: 208263
loss: 0.979698896408081,grad_norm: 0.9999991048664767, iteration: 208264
loss: 0.9781297445297241,grad_norm: 0.9147430981337034, iteration: 208265
loss: 0.9681571125984192,grad_norm: 0.9342744385669545, iteration: 208266
loss: 1.0338828563690186,grad_norm: 0.9999993437153061, iteration: 208267
loss: 0.955881655216217,grad_norm: 0.9394273162200982, iteration: 208268
loss: 0.9952152967453003,grad_norm: 0.7544087347049744, iteration: 208269
loss: 0.9784657955169678,grad_norm: 0.999205798923924, iteration: 208270
loss: 1.0291430950164795,grad_norm: 0.9999990218121707, iteration: 208271
loss: 1.0181978940963745,grad_norm: 0.948860864667402, iteration: 208272
loss: 1.0036710500717163,grad_norm: 0.9586737369614201, iteration: 208273
loss: 1.0185474157333374,grad_norm: 0.9999992840203513, iteration: 208274
loss: 1.0164560079574585,grad_norm: 0.9999990565544139, iteration: 208275
loss: 0.9936419129371643,grad_norm: 0.9999991285147777, iteration: 208276
loss: 1.0678596496582031,grad_norm: 0.999999073727954, iteration: 208277
loss: 0.9789792895317078,grad_norm: 0.8262521039857219, iteration: 208278
loss: 1.0130091905593872,grad_norm: 0.883021512758939, iteration: 208279
loss: 1.0451098680496216,grad_norm: 0.7997411004585212, iteration: 208280
loss: 1.0155855417251587,grad_norm: 0.9055681195402023, iteration: 208281
loss: 1.0300707817077637,grad_norm: 0.7683045822499764, iteration: 208282
loss: 1.0092862844467163,grad_norm: 0.9480985865681452, iteration: 208283
loss: 0.9773568511009216,grad_norm: 0.944012690138892, iteration: 208284
loss: 1.0262084007263184,grad_norm: 0.98158710526099, iteration: 208285
loss: 1.0069620609283447,grad_norm: 0.8931624764627287, iteration: 208286
loss: 1.1308553218841553,grad_norm: 0.9999993624774033, iteration: 208287
loss: 1.047071933746338,grad_norm: 0.9999993503482907, iteration: 208288
loss: 0.991413950920105,grad_norm: 0.7665135346809975, iteration: 208289
loss: 1.0387846231460571,grad_norm: 0.9999992751404462, iteration: 208290
loss: 1.0065207481384277,grad_norm: 0.9526311024491302, iteration: 208291
loss: 1.0048736333847046,grad_norm: 0.9047842317257238, iteration: 208292
loss: 1.013451337814331,grad_norm: 0.909123389663582, iteration: 208293
loss: 0.9925178289413452,grad_norm: 0.9579560171606977, iteration: 208294
loss: 0.997990608215332,grad_norm: 0.9999992338176819, iteration: 208295
loss: 0.9961534738540649,grad_norm: 0.9435167227904395, iteration: 208296
loss: 1.015645980834961,grad_norm: 0.9632355956091633, iteration: 208297
loss: 0.9888219237327576,grad_norm: 0.9862280305996936, iteration: 208298
loss: 1.0257326364517212,grad_norm: 0.9225722159372638, iteration: 208299
loss: 0.9972757697105408,grad_norm: 0.8376455507307048, iteration: 208300
loss: 0.9833717942237854,grad_norm: 0.9999991969012952, iteration: 208301
loss: 0.9836081266403198,grad_norm: 0.8217526899796102, iteration: 208302
loss: 1.002112865447998,grad_norm: 0.9999991453586047, iteration: 208303
loss: 1.0189054012298584,grad_norm: 0.9306334253214213, iteration: 208304
loss: 0.9749346971511841,grad_norm: 0.9379798778743229, iteration: 208305
loss: 1.0196514129638672,grad_norm: 0.8853558651746825, iteration: 208306
loss: 1.0390769243240356,grad_norm: 0.9999989975018544, iteration: 208307
loss: 0.9908642172813416,grad_norm: 0.9740079175106573, iteration: 208308
loss: 1.0047485828399658,grad_norm: 0.8957478908700527, iteration: 208309
loss: 0.9975734949111938,grad_norm: 0.9999992112158036, iteration: 208310
loss: 0.983024001121521,grad_norm: 0.8366692755902573, iteration: 208311
loss: 1.0242891311645508,grad_norm: 0.9258990122771426, iteration: 208312
loss: 0.9935892820358276,grad_norm: 0.9989514551884169, iteration: 208313
loss: 0.9927215576171875,grad_norm: 0.9790215552732005, iteration: 208314
loss: 0.9997644424438477,grad_norm: 0.9483668630083423, iteration: 208315
loss: 0.9927331805229187,grad_norm: 0.8401111010764197, iteration: 208316
loss: 1.0135389566421509,grad_norm: 0.909113398778269, iteration: 208317
loss: 0.9935932159423828,grad_norm: 0.9999991958748523, iteration: 208318
loss: 1.0296045541763306,grad_norm: 0.9326520162151072, iteration: 208319
loss: 1.0344572067260742,grad_norm: 0.8866600054825027, iteration: 208320
loss: 0.9747780561447144,grad_norm: 0.8633599070152076, iteration: 208321
loss: 0.9785282015800476,grad_norm: 0.9656749572268911, iteration: 208322
loss: 0.9664372801780701,grad_norm: 0.9999990845381361, iteration: 208323
loss: 1.0106338262557983,grad_norm: 0.999999034334325, iteration: 208324
loss: 1.0063742399215698,grad_norm: 0.999999148144566, iteration: 208325
loss: 1.005460500717163,grad_norm: 0.9197359332593719, iteration: 208326
loss: 0.9853256940841675,grad_norm: 0.9763719430317578, iteration: 208327
loss: 1.0271615982055664,grad_norm: 0.7621180228428986, iteration: 208328
loss: 1.000656247138977,grad_norm: 0.8183029555648118, iteration: 208329
loss: 1.0348484516143799,grad_norm: 0.8753145426631025, iteration: 208330
loss: 0.9656323790550232,grad_norm: 0.9919382751450551, iteration: 208331
loss: 1.0025495290756226,grad_norm: 0.9999990148768657, iteration: 208332
loss: 0.9991776943206787,grad_norm: 0.9999998077759407, iteration: 208333
loss: 1.0243301391601562,grad_norm: 0.9999992778646374, iteration: 208334
loss: 0.9897339940071106,grad_norm: 0.9474834161258489, iteration: 208335
loss: 0.9968283772468567,grad_norm: 0.9298345076443852, iteration: 208336
loss: 0.9919439554214478,grad_norm: 0.81044004073239, iteration: 208337
loss: 1.0166006088256836,grad_norm: 0.9999991805948646, iteration: 208338
loss: 0.9903619289398193,grad_norm: 0.9999990444964336, iteration: 208339
loss: 0.9922071695327759,grad_norm: 0.9999990633858454, iteration: 208340
loss: 1.0261878967285156,grad_norm: 0.9453869880814718, iteration: 208341
loss: 1.02802574634552,grad_norm: 0.9999994388077479, iteration: 208342
loss: 0.988254964351654,grad_norm: 0.952795819187901, iteration: 208343
loss: 1.0022581815719604,grad_norm: 0.8218325386914164, iteration: 208344
loss: 1.0155699253082275,grad_norm: 0.8854973162760236, iteration: 208345
loss: 0.9783589839935303,grad_norm: 0.890810250646002, iteration: 208346
loss: 0.9729156494140625,grad_norm: 0.9968803519214252, iteration: 208347
loss: 0.9685638546943665,grad_norm: 0.7940484294207478, iteration: 208348
loss: 1.0278162956237793,grad_norm: 0.9634276440835985, iteration: 208349
loss: 1.018739938735962,grad_norm: 0.9999993145157468, iteration: 208350
loss: 1.0155569314956665,grad_norm: 0.9361620400947257, iteration: 208351
loss: 0.999798595905304,grad_norm: 0.9999992487498678, iteration: 208352
loss: 0.9973863363265991,grad_norm: 0.9890052167612238, iteration: 208353
loss: 0.9981259703636169,grad_norm: 0.9918569766963953, iteration: 208354
loss: 1.012640357017517,grad_norm: 0.9417595789363573, iteration: 208355
loss: 0.9843624234199524,grad_norm: 0.9447048281456261, iteration: 208356
loss: 0.9837515354156494,grad_norm: 0.871955495338807, iteration: 208357
loss: 1.0263259410858154,grad_norm: 0.9609346411205987, iteration: 208358
loss: 1.0517005920410156,grad_norm: 0.9261066550161774, iteration: 208359
loss: 1.0003875494003296,grad_norm: 0.9696838292979507, iteration: 208360
loss: 0.9978582262992859,grad_norm: 0.9864013680484099, iteration: 208361
loss: 0.9627929329872131,grad_norm: 0.9738091463692149, iteration: 208362
loss: 1.023400902748108,grad_norm: 0.9999989737304138, iteration: 208363
loss: 1.032796025276184,grad_norm: 0.8636546369906658, iteration: 208364
loss: 0.9924168586730957,grad_norm: 0.8827433268432072, iteration: 208365
loss: 0.9566057324409485,grad_norm: 0.9999991660731203, iteration: 208366
loss: 0.9976903796195984,grad_norm: 0.808748883706852, iteration: 208367
loss: 0.9893304705619812,grad_norm: 0.9535429564363604, iteration: 208368
loss: 1.0121309757232666,grad_norm: 0.9999991558687594, iteration: 208369
loss: 0.9932911992073059,grad_norm: 0.9664980592256414, iteration: 208370
loss: 1.0069012641906738,grad_norm: 0.9999989857519238, iteration: 208371
loss: 0.967681884765625,grad_norm: 0.9898893988631652, iteration: 208372
loss: 0.9983221888542175,grad_norm: 0.9156990987094273, iteration: 208373
loss: 1.0204541683197021,grad_norm: 0.9999997530784789, iteration: 208374
loss: 0.976042628288269,grad_norm: 0.9999990203325361, iteration: 208375
loss: 1.0307241678237915,grad_norm: 0.9370710715022929, iteration: 208376
loss: 0.9712789058685303,grad_norm: 0.809046427563214, iteration: 208377
loss: 0.9408524632453918,grad_norm: 0.934336007796183, iteration: 208378
loss: 1.00901198387146,grad_norm: 0.9999990665251117, iteration: 208379
loss: 0.9880722165107727,grad_norm: 0.9999992085533169, iteration: 208380
loss: 1.0022505521774292,grad_norm: 0.9961038930696956, iteration: 208381
loss: 1.0285356044769287,grad_norm: 0.9269370344530107, iteration: 208382
loss: 1.0157458782196045,grad_norm: 0.972431612595116, iteration: 208383
loss: 0.988152801990509,grad_norm: 0.9305014782301878, iteration: 208384
loss: 0.9849116802215576,grad_norm: 0.9018160610958215, iteration: 208385
loss: 1.0016893148422241,grad_norm: 0.999998989233211, iteration: 208386
loss: 1.0043275356292725,grad_norm: 0.7480812124898801, iteration: 208387
loss: 0.9684439897537231,grad_norm: 0.8092927912983061, iteration: 208388
loss: 0.9680511951446533,grad_norm: 0.8988415103059555, iteration: 208389
loss: 0.9813182353973389,grad_norm: 0.9999991941591013, iteration: 208390
loss: 1.0022859573364258,grad_norm: 0.9999990561705402, iteration: 208391
loss: 0.9792128205299377,grad_norm: 0.9886103207017721, iteration: 208392
loss: 1.0227208137512207,grad_norm: 0.8865133401536597, iteration: 208393
loss: 1.0185699462890625,grad_norm: 0.8415057609662175, iteration: 208394
loss: 1.0452150106430054,grad_norm: 0.9999991661488354, iteration: 208395
loss: 0.9602479934692383,grad_norm: 0.9636944137975774, iteration: 208396
loss: 0.9782956838607788,grad_norm: 0.9999991117420898, iteration: 208397
loss: 1.0041013956069946,grad_norm: 0.8530736392484265, iteration: 208398
loss: 1.0048916339874268,grad_norm: 0.9999989853670442, iteration: 208399
loss: 1.098095417022705,grad_norm: 0.9999991639252033, iteration: 208400
loss: 1.085105538368225,grad_norm: 0.9999996374733587, iteration: 208401
loss: 1.0025161504745483,grad_norm: 0.9974868376104081, iteration: 208402
loss: 1.0003790855407715,grad_norm: 0.8417162768072426, iteration: 208403
loss: 1.0020040273666382,grad_norm: 0.8900623336449011, iteration: 208404
loss: 1.0136961936950684,grad_norm: 0.9999991137150448, iteration: 208405
loss: 0.9881958961486816,grad_norm: 0.9999990793581589, iteration: 208406
loss: 1.0102603435516357,grad_norm: 0.8611429924953464, iteration: 208407
loss: 0.9910404086112976,grad_norm: 0.9999991795744965, iteration: 208408
loss: 0.9937471747398376,grad_norm: 0.9146238895204182, iteration: 208409
loss: 1.00909423828125,grad_norm: 0.9403210367714019, iteration: 208410
loss: 1.0113325119018555,grad_norm: 0.999999124685739, iteration: 208411
loss: 1.010925054550171,grad_norm: 0.8731019373076848, iteration: 208412
loss: 1.0122076272964478,grad_norm: 0.8288797317158749, iteration: 208413
loss: 0.9712821841239929,grad_norm: 0.999999175251449, iteration: 208414
loss: 0.9828181862831116,grad_norm: 0.8031544899954215, iteration: 208415
loss: 0.9783594608306885,grad_norm: 0.8778069022810022, iteration: 208416
loss: 0.9937018156051636,grad_norm: 0.8911086032522779, iteration: 208417
loss: 1.0199013948440552,grad_norm: 0.9999991134638703, iteration: 208418
loss: 0.9781908392906189,grad_norm: 0.9999992118165022, iteration: 208419
loss: 0.9928139448165894,grad_norm: 0.9145372333912725, iteration: 208420
loss: 1.0023316144943237,grad_norm: 0.9999991515638587, iteration: 208421
loss: 0.9882121682167053,grad_norm: 0.9999991472091369, iteration: 208422
loss: 1.0309005975723267,grad_norm: 0.9999992048560125, iteration: 208423
loss: 0.997070848941803,grad_norm: 0.9999991978785269, iteration: 208424
loss: 0.9712949991226196,grad_norm: 0.9013738838836481, iteration: 208425
loss: 1.0314750671386719,grad_norm: 0.9999992316933262, iteration: 208426
loss: 1.0006320476531982,grad_norm: 0.8682218245278798, iteration: 208427
loss: 1.035032868385315,grad_norm: 0.9999991932186053, iteration: 208428
loss: 1.0034149885177612,grad_norm: 0.999999132625775, iteration: 208429
loss: 0.9753676056861877,grad_norm: 0.8986772731413938, iteration: 208430
loss: 1.0660394430160522,grad_norm: 0.8677423252954072, iteration: 208431
loss: 1.01095712184906,grad_norm: 0.9233769202276768, iteration: 208432
loss: 1.0620452165603638,grad_norm: 0.9172983565175067, iteration: 208433
loss: 1.0521645545959473,grad_norm: 0.9999998898346837, iteration: 208434
loss: 0.9985883235931396,grad_norm: 0.8375992695889597, iteration: 208435
loss: 0.9825825691223145,grad_norm: 0.9999991571861815, iteration: 208436
loss: 0.9628878235816956,grad_norm: 0.9433290702429962, iteration: 208437
loss: 1.0073062181472778,grad_norm: 0.9067434405305085, iteration: 208438
loss: 1.030772089958191,grad_norm: 0.9999990289891862, iteration: 208439
loss: 1.0278894901275635,grad_norm: 0.8752918584173216, iteration: 208440
loss: 1.012132167816162,grad_norm: 0.8184982965175435, iteration: 208441
loss: 1.0448627471923828,grad_norm: 0.850339862785206, iteration: 208442
loss: 0.999308705329895,grad_norm: 0.9999992257616234, iteration: 208443
loss: 0.9864938259124756,grad_norm: 0.9999992229751513, iteration: 208444
loss: 1.028273582458496,grad_norm: 0.9999990908847138, iteration: 208445
loss: 1.0062743425369263,grad_norm: 0.9999989726259334, iteration: 208446
loss: 1.0416232347488403,grad_norm: 0.9413244137033745, iteration: 208447
loss: 0.9901501536369324,grad_norm: 0.9559938577125237, iteration: 208448
loss: 0.9964028000831604,grad_norm: 0.7658226800353224, iteration: 208449
loss: 1.0052992105484009,grad_norm: 0.9261701486521224, iteration: 208450
loss: 0.9823854565620422,grad_norm: 0.8237125234986656, iteration: 208451
loss: 1.005942940711975,grad_norm: 0.8924694085831777, iteration: 208452
loss: 1.0249738693237305,grad_norm: 0.9999992490326669, iteration: 208453
loss: 1.0106213092803955,grad_norm: 0.971510177420662, iteration: 208454
loss: 1.0063512325286865,grad_norm: 0.9937249718461917, iteration: 208455
loss: 0.9760411977767944,grad_norm: 0.9834385782992076, iteration: 208456
loss: 0.9953593611717224,grad_norm: 0.8570571654147824, iteration: 208457
loss: 0.9952351450920105,grad_norm: 0.9003748664778546, iteration: 208458
loss: 1.0289472341537476,grad_norm: 0.999999109125436, iteration: 208459
loss: 1.0047478675842285,grad_norm: 0.9999992323419148, iteration: 208460
loss: 1.0351585149765015,grad_norm: 0.8357958333939971, iteration: 208461
loss: 0.9891698360443115,grad_norm: 0.9611394134374089, iteration: 208462
loss: 1.0425223112106323,grad_norm: 0.9999990605080944, iteration: 208463
loss: 0.9933323264122009,grad_norm: 0.8333199434179229, iteration: 208464
loss: 1.0110901594161987,grad_norm: 0.999999554098336, iteration: 208465
loss: 1.0018845796585083,grad_norm: 0.9910597805128051, iteration: 208466
loss: 1.006797432899475,grad_norm: 0.9999990233943441, iteration: 208467
loss: 1.0210331678390503,grad_norm: 0.9627134238570968, iteration: 208468
loss: 1.0015860795974731,grad_norm: 0.9344508543991578, iteration: 208469
loss: 1.0293582677841187,grad_norm: 0.7670399380460334, iteration: 208470
loss: 1.0017797946929932,grad_norm: 0.999999188224114, iteration: 208471
loss: 1.0072596073150635,grad_norm: 0.9262870212514859, iteration: 208472
loss: 1.0477265119552612,grad_norm: 0.9999992020695154, iteration: 208473
loss: 1.008719801902771,grad_norm: 0.9312285383067321, iteration: 208474
loss: 0.9941098093986511,grad_norm: 0.8332088154381194, iteration: 208475
loss: 1.0034929513931274,grad_norm: 0.8376429280570787, iteration: 208476
loss: 1.0070242881774902,grad_norm: 0.793467473903264, iteration: 208477
loss: 1.0016776323318481,grad_norm: 0.999999016723789, iteration: 208478
loss: 1.023682951927185,grad_norm: 0.9999997720917766, iteration: 208479
loss: 1.0111113786697388,grad_norm: 0.8945627458438284, iteration: 208480
loss: 0.9535630345344543,grad_norm: 0.9023270718099431, iteration: 208481
loss: 1.003326177597046,grad_norm: 0.8409594816036099, iteration: 208482
loss: 1.0350526571273804,grad_norm: 0.9840768837540788, iteration: 208483
loss: 1.0076167583465576,grad_norm: 0.9912040978554769, iteration: 208484
loss: 0.9791207313537598,grad_norm: 0.9048206993968304, iteration: 208485
loss: 0.984722912311554,grad_norm: 0.9085834838309205, iteration: 208486
loss: 0.9898030161857605,grad_norm: 0.8382393486953711, iteration: 208487
loss: 1.026680588722229,grad_norm: 0.9999996906183048, iteration: 208488
loss: 1.022911548614502,grad_norm: 0.9999995352173132, iteration: 208489
loss: 1.0068426132202148,grad_norm: 0.9999991729508126, iteration: 208490
loss: 0.9850549697875977,grad_norm: 0.8420254330363844, iteration: 208491
loss: 1.0174733400344849,grad_norm: 0.9237754920225608, iteration: 208492
loss: 1.0062185525894165,grad_norm: 0.9999991344161613, iteration: 208493
loss: 1.0074678659439087,grad_norm: 0.9337024130259222, iteration: 208494
loss: 0.9945380687713623,grad_norm: 0.8832522691786098, iteration: 208495
loss: 1.0489706993103027,grad_norm: 0.9999993109743116, iteration: 208496
loss: 1.0193783044815063,grad_norm: 0.9999992019549702, iteration: 208497
loss: 1.0118571519851685,grad_norm: 0.8866684237205099, iteration: 208498
loss: 1.0217188596725464,grad_norm: 0.9999991509098519, iteration: 208499
loss: 1.0851722955703735,grad_norm: 0.9781141476266467, iteration: 208500
loss: 1.0600087642669678,grad_norm: 0.8692336837501424, iteration: 208501
loss: 1.006486177444458,grad_norm: 0.9999999454993196, iteration: 208502
loss: 1.0068349838256836,grad_norm: 0.9926176785453101, iteration: 208503
loss: 0.99363112449646,grad_norm: 0.9119097391975615, iteration: 208504
loss: 0.9883618950843811,grad_norm: 0.9352277162471088, iteration: 208505
loss: 0.9613297581672668,grad_norm: 0.910661644920766, iteration: 208506
loss: 0.9652377367019653,grad_norm: 0.9483252950154704, iteration: 208507
loss: 1.0807828903198242,grad_norm: 0.969026297424124, iteration: 208508
loss: 1.1970831155776978,grad_norm: 0.885183823380974, iteration: 208509
loss: 1.0044496059417725,grad_norm: 0.7991275374932857, iteration: 208510
loss: 1.0319167375564575,grad_norm: 0.999998909769448, iteration: 208511
loss: 0.9919142723083496,grad_norm: 0.9999990709280114, iteration: 208512
loss: 1.0255942344665527,grad_norm: 0.9300652034738923, iteration: 208513
loss: 0.9773085713386536,grad_norm: 0.8837134744509773, iteration: 208514
loss: 1.0289437770843506,grad_norm: 0.8933573536119772, iteration: 208515
loss: 0.9900752902030945,grad_norm: 0.8387857782062731, iteration: 208516
loss: 1.0073744058609009,grad_norm: 0.9999991194618699, iteration: 208517
loss: 1.065919280052185,grad_norm: 0.9999991705738172, iteration: 208518
loss: 0.9891071319580078,grad_norm: 0.8367612992741704, iteration: 208519
loss: 1.0196568965911865,grad_norm: 0.9999998197400837, iteration: 208520
loss: 0.9897747039794922,grad_norm: 0.9999991798423431, iteration: 208521
loss: 1.0052043199539185,grad_norm: 0.9515305937988469, iteration: 208522
loss: 1.0030877590179443,grad_norm: 0.9999995642588088, iteration: 208523
loss: 1.1353527307510376,grad_norm: 0.9998094029667454, iteration: 208524
loss: 1.055526852607727,grad_norm: 0.9104028712330093, iteration: 208525
loss: 1.0350250005722046,grad_norm: 0.9440213265724421, iteration: 208526
loss: 0.9682494401931763,grad_norm: 0.9027707523238203, iteration: 208527
loss: 0.9874494671821594,grad_norm: 0.9058025136603487, iteration: 208528
loss: 0.9995324611663818,grad_norm: 0.9760040205624633, iteration: 208529
loss: 0.9923364520072937,grad_norm: 0.8751897686492528, iteration: 208530
loss: 1.0581848621368408,grad_norm: 0.9092957578410331, iteration: 208531
loss: 1.0074018239974976,grad_norm: 0.8212568976489982, iteration: 208532
loss: 1.0050321817398071,grad_norm: 0.9999990233762795, iteration: 208533
loss: 0.9835975170135498,grad_norm: 0.8784498872593927, iteration: 208534
loss: 1.0037083625793457,grad_norm: 0.9999991078491502, iteration: 208535
loss: 1.0854865312576294,grad_norm: 0.9999991558274486, iteration: 208536
loss: 1.0703312158584595,grad_norm: 0.8739356014520624, iteration: 208537
loss: 1.1245845556259155,grad_norm: 0.9999997721628857, iteration: 208538
loss: 1.0367008447647095,grad_norm: 0.9999990153987217, iteration: 208539
loss: 1.1558432579040527,grad_norm: 0.9999990638747687, iteration: 208540
loss: 1.0287164449691772,grad_norm: 0.8353372358642095, iteration: 208541
loss: 1.0079270601272583,grad_norm: 0.81465166538851, iteration: 208542
loss: 1.0232658386230469,grad_norm: 0.9792525018721154, iteration: 208543
loss: 0.9984970092773438,grad_norm: 0.9517588139108428, iteration: 208544
loss: 0.9964097738265991,grad_norm: 0.7934880272808751, iteration: 208545
loss: 1.0739344358444214,grad_norm: 0.999999132793286, iteration: 208546
loss: 0.9690553545951843,grad_norm: 0.8089231287630205, iteration: 208547
loss: 0.9801003932952881,grad_norm: 0.9999999859167369, iteration: 208548
loss: 0.9986184239387512,grad_norm: 0.9741864603468903, iteration: 208549
loss: 0.9999049305915833,grad_norm: 0.7954446610426484, iteration: 208550
loss: 1.0349475145339966,grad_norm: 0.8902165889424185, iteration: 208551
loss: 1.0204540491104126,grad_norm: 0.999999091925351, iteration: 208552
loss: 0.999199390411377,grad_norm: 0.9080714841453446, iteration: 208553
loss: 0.9869153499603271,grad_norm: 0.9999994947615619, iteration: 208554
loss: 1.0903534889221191,grad_norm: 0.9999992721624096, iteration: 208555
loss: 0.9976912140846252,grad_norm: 0.9999991631446549, iteration: 208556
loss: 1.0172713994979858,grad_norm: 0.9710476469905309, iteration: 208557
loss: 0.9819556474685669,grad_norm: 0.9999991382051884, iteration: 208558
loss: 1.1068756580352783,grad_norm: 0.9999999274676816, iteration: 208559
loss: 1.0236773490905762,grad_norm: 0.9961399848670865, iteration: 208560
loss: 1.0724807977676392,grad_norm: 0.9999993987228019, iteration: 208561
loss: 0.9750317931175232,grad_norm: 0.9999993080418791, iteration: 208562
loss: 0.9890725612640381,grad_norm: 0.9581405937690491, iteration: 208563
loss: 1.0546387434005737,grad_norm: 0.9999994007327041, iteration: 208564
loss: 1.0033622980117798,grad_norm: 0.9306331374682216, iteration: 208565
loss: 1.0022926330566406,grad_norm: 0.9999991396866694, iteration: 208566
loss: 0.9820908308029175,grad_norm: 0.8262924945070381, iteration: 208567
loss: 1.0455844402313232,grad_norm: 0.9598366335292884, iteration: 208568
loss: 0.9663331508636475,grad_norm: 0.9999991033912565, iteration: 208569
loss: 1.0060352087020874,grad_norm: 0.999999669755472, iteration: 208570
loss: 1.0131317377090454,grad_norm: 0.9038654366621544, iteration: 208571
loss: 0.9983727335929871,grad_norm: 0.9999990216691994, iteration: 208572
loss: 1.00089430809021,grad_norm: 0.9999991247684629, iteration: 208573
loss: 0.997731626033783,grad_norm: 0.8589204619336233, iteration: 208574
loss: 0.9836646318435669,grad_norm: 0.8903346095898979, iteration: 208575
loss: 0.9950088262557983,grad_norm: 0.9541541140539613, iteration: 208576
loss: 1.016342043876648,grad_norm: 0.9645692330006466, iteration: 208577
loss: 0.9653382897377014,grad_norm: 0.9770050782241059, iteration: 208578
loss: 1.0967522859573364,grad_norm: 0.9999998198915894, iteration: 208579
loss: 1.0149232149124146,grad_norm: 0.903521803081778, iteration: 208580
loss: 1.0315179824829102,grad_norm: 0.999999981738775, iteration: 208581
loss: 1.0314137935638428,grad_norm: 0.9999997931354389, iteration: 208582
loss: 1.004543662071228,grad_norm: 0.8594048254628054, iteration: 208583
loss: 1.0799245834350586,grad_norm: 0.9953125316443849, iteration: 208584
loss: 1.0900171995162964,grad_norm: 0.99999985983857, iteration: 208585
loss: 0.9733362197875977,grad_norm: 0.8876525070872484, iteration: 208586
loss: 0.9809626340866089,grad_norm: 0.9015193525677506, iteration: 208587
loss: 1.0066626071929932,grad_norm: 0.9681098997693095, iteration: 208588
loss: 1.0066879987716675,grad_norm: 0.9760620414352618, iteration: 208589
loss: 1.0975865125656128,grad_norm: 0.9999992836435039, iteration: 208590
loss: 1.0349130630493164,grad_norm: 0.9999991104052544, iteration: 208591
loss: 1.0636931657791138,grad_norm: 0.9999990196488713, iteration: 208592
loss: 1.0329022407531738,grad_norm: 0.9093829681743638, iteration: 208593
loss: 0.9888640642166138,grad_norm: 0.8427614820239393, iteration: 208594
loss: 0.9943912029266357,grad_norm: 0.9999990923820828, iteration: 208595
loss: 1.0220880508422852,grad_norm: 0.9263046613157342, iteration: 208596
loss: 0.9963973164558411,grad_norm: 0.8214700469693837, iteration: 208597
loss: 1.0026354789733887,grad_norm: 0.9931936717196079, iteration: 208598
loss: 0.9696340560913086,grad_norm: 0.9999995985459563, iteration: 208599
loss: 0.9902766346931458,grad_norm: 0.8752792537590866, iteration: 208600
loss: 1.0013047456741333,grad_norm: 0.8789734096886502, iteration: 208601
loss: 0.9785499572753906,grad_norm: 0.9844549816582628, iteration: 208602
loss: 1.0204724073410034,grad_norm: 0.8953150737553035, iteration: 208603
loss: 1.0609549283981323,grad_norm: 0.9999991918001874, iteration: 208604
loss: 0.9816191792488098,grad_norm: 0.9020259855901347, iteration: 208605
loss: 1.0090720653533936,grad_norm: 0.9999991802236553, iteration: 208606
loss: 0.9999709129333496,grad_norm: 0.9999989772891608, iteration: 208607
loss: 0.9878512620925903,grad_norm: 0.9601221041495894, iteration: 208608
loss: 1.0136536359786987,grad_norm: 0.8804623824492112, iteration: 208609
loss: 1.026719570159912,grad_norm: 0.9120840091343481, iteration: 208610
loss: 0.9879817962646484,grad_norm: 0.9999994259842013, iteration: 208611
loss: 1.0177178382873535,grad_norm: 0.8541097656201152, iteration: 208612
loss: 1.1045225858688354,grad_norm: 0.9715500416895495, iteration: 208613
loss: 1.0509873628616333,grad_norm: 0.9999992198633739, iteration: 208614
loss: 1.042893648147583,grad_norm: 0.999999984395725, iteration: 208615
loss: 1.0058802366256714,grad_norm: 0.9999992332025801, iteration: 208616
loss: 0.9912084937095642,grad_norm: 0.9999990260975595, iteration: 208617
loss: 1.0219687223434448,grad_norm: 0.95958537522731, iteration: 208618
loss: 1.0216909646987915,grad_norm: 0.8785281695810189, iteration: 208619
loss: 1.0021083354949951,grad_norm: 0.9545889047048833, iteration: 208620
loss: 0.9979984164237976,grad_norm: 0.8837588405723736, iteration: 208621
loss: 0.9863225221633911,grad_norm: 0.9999995288242661, iteration: 208622
loss: 0.9762749075889587,grad_norm: 0.9160406104044019, iteration: 208623
loss: 0.9799106121063232,grad_norm: 0.9668623002868724, iteration: 208624
loss: 0.989474892616272,grad_norm: 0.9162666222119994, iteration: 208625
loss: 1.0406378507614136,grad_norm: 0.9528445459745589, iteration: 208626
loss: 0.9628084301948547,grad_norm: 0.9999991489991483, iteration: 208627
loss: 1.1304343938827515,grad_norm: 0.9999995923774324, iteration: 208628
loss: 1.01639723777771,grad_norm: 0.9999998641986071, iteration: 208629
loss: 1.0197900533676147,grad_norm: 0.89369493525686, iteration: 208630
loss: 1.012064814567566,grad_norm: 0.843150132939015, iteration: 208631
loss: 1.0015021562576294,grad_norm: 0.9999990092545563, iteration: 208632
loss: 1.02090585231781,grad_norm: 0.9289591906330473, iteration: 208633
loss: 0.9702702760696411,grad_norm: 0.8916098234395246, iteration: 208634
loss: 0.9998597502708435,grad_norm: 0.8750138868952809, iteration: 208635
loss: 0.9526560306549072,grad_norm: 0.8876441791350634, iteration: 208636
loss: 0.9965877532958984,grad_norm: 0.9507220721155669, iteration: 208637
loss: 0.9670867919921875,grad_norm: 0.9999992800222712, iteration: 208638
loss: 0.9918887615203857,grad_norm: 0.928817567138587, iteration: 208639
loss: 1.092236042022705,grad_norm: 0.9999998818258325, iteration: 208640
loss: 0.954694390296936,grad_norm: 0.9802534130568764, iteration: 208641
loss: 1.0053744316101074,grad_norm: 0.9999990622215299, iteration: 208642
loss: 0.9789323210716248,grad_norm: 0.7906003668872394, iteration: 208643
loss: 1.016790509223938,grad_norm: 0.9642164457771606, iteration: 208644
loss: 0.9840245246887207,grad_norm: 0.9839522952337969, iteration: 208645
loss: 1.0944973230361938,grad_norm: 0.9999991715487699, iteration: 208646
loss: 1.063401460647583,grad_norm: 0.9687820397934347, iteration: 208647
loss: 0.9918938279151917,grad_norm: 0.9925593953086935, iteration: 208648
loss: 1.0106397867202759,grad_norm: 0.9999989925096167, iteration: 208649
loss: 1.026298999786377,grad_norm: 0.9623095664502124, iteration: 208650
loss: 0.9941314458847046,grad_norm: 0.8575382938427135, iteration: 208651
loss: 0.9925611615180969,grad_norm: 0.8940672425877576, iteration: 208652
loss: 1.0297452211380005,grad_norm: 0.9999994596593396, iteration: 208653
loss: 1.0282866954803467,grad_norm: 0.8065306411235927, iteration: 208654
loss: 1.0040407180786133,grad_norm: 0.7892238949411418, iteration: 208655
loss: 0.9687638878822327,grad_norm: 0.9689358014723942, iteration: 208656
loss: 0.9748060703277588,grad_norm: 0.7751940871429972, iteration: 208657
loss: 1.0200964212417603,grad_norm: 0.896830410351617, iteration: 208658
loss: 0.9872905015945435,grad_norm: 0.8609074980181524, iteration: 208659
loss: 1.0601136684417725,grad_norm: 0.9999991335831436, iteration: 208660
loss: 0.9881174564361572,grad_norm: 0.9742354522574249, iteration: 208661
loss: 1.018466830253601,grad_norm: 0.9999992898515438, iteration: 208662
loss: 1.135298728942871,grad_norm: 0.9999993179458665, iteration: 208663
loss: 1.0244897603988647,grad_norm: 0.8088060976135626, iteration: 208664
loss: 0.9986348748207092,grad_norm: 0.7880252494553367, iteration: 208665
loss: 1.0262243747711182,grad_norm: 0.9999990269064342, iteration: 208666
loss: 0.9628370404243469,grad_norm: 0.9999990982019124, iteration: 208667
loss: 0.9995120763778687,grad_norm: 0.8977843014140044, iteration: 208668
loss: 1.0093761682510376,grad_norm: 0.8931730520071618, iteration: 208669
loss: 0.9809627532958984,grad_norm: 0.937350712634014, iteration: 208670
loss: 0.9712648391723633,grad_norm: 0.9999990690119759, iteration: 208671
loss: 0.9924218654632568,grad_norm: 0.8861060483289009, iteration: 208672
loss: 0.9636183977127075,grad_norm: 0.9352133473339563, iteration: 208673
loss: 1.0567017793655396,grad_norm: 0.9724374651208559, iteration: 208674
loss: 0.990429699420929,grad_norm: 0.9999991910719164, iteration: 208675
loss: 0.9618578553199768,grad_norm: 0.9999989980729193, iteration: 208676
loss: 1.0645027160644531,grad_norm: 0.9047230499682726, iteration: 208677
loss: 1.0470753908157349,grad_norm: 0.9372087435352423, iteration: 208678
loss: 1.0013110637664795,grad_norm: 0.9999989766898556, iteration: 208679
loss: 0.9584676623344421,grad_norm: 0.9047548971132989, iteration: 208680
loss: 0.989499568939209,grad_norm: 0.9999991108073851, iteration: 208681
loss: 0.969719648361206,grad_norm: 0.9999991734810468, iteration: 208682
loss: 1.0351330041885376,grad_norm: 1.0000000355055456, iteration: 208683
loss: 0.9922263026237488,grad_norm: 0.9999990867153816, iteration: 208684
loss: 0.9965255856513977,grad_norm: 0.8499120872542925, iteration: 208685
loss: 1.0110076665878296,grad_norm: 0.8567189203595028, iteration: 208686
loss: 0.9982427954673767,grad_norm: 0.9755515645273013, iteration: 208687
loss: 0.9369847774505615,grad_norm: 0.9999990608458297, iteration: 208688
loss: 1.023040771484375,grad_norm: 0.8393780922963364, iteration: 208689
loss: 1.011907935142517,grad_norm: 0.8573776581316581, iteration: 208690
loss: 0.9831944108009338,grad_norm: 0.9073958960577911, iteration: 208691
loss: 0.9964753985404968,grad_norm: 0.8687442085897883, iteration: 208692
loss: 0.9822943210601807,grad_norm: 0.9709486067647447, iteration: 208693
loss: 0.9811562895774841,grad_norm: 0.967758509597365, iteration: 208694
loss: 1.083615779876709,grad_norm: 0.999999052605413, iteration: 208695
loss: 0.9705149531364441,grad_norm: 0.8360454048109137, iteration: 208696
loss: 0.9754037261009216,grad_norm: 0.9999992009485238, iteration: 208697
loss: 1.0028977394104004,grad_norm: 0.8614495552155012, iteration: 208698
loss: 1.0153199434280396,grad_norm: 0.8384776087820139, iteration: 208699
loss: 1.0053893327713013,grad_norm: 0.9999999211448053, iteration: 208700
loss: 1.0804462432861328,grad_norm: 0.9999999016515703, iteration: 208701
loss: 1.0261038541793823,grad_norm: 0.9999991026175327, iteration: 208702
loss: 0.9202873110771179,grad_norm: 0.9999990367740148, iteration: 208703
loss: 1.015690803527832,grad_norm: 0.9747035802145866, iteration: 208704
loss: 1.0318655967712402,grad_norm: 0.9795092322668195, iteration: 208705
loss: 1.0207773447036743,grad_norm: 0.9240171849486236, iteration: 208706
loss: 1.0620942115783691,grad_norm: 0.9999991197337454, iteration: 208707
loss: 1.0181918144226074,grad_norm: 0.9999992629332016, iteration: 208708
loss: 0.9992994666099548,grad_norm: 0.9479966178333031, iteration: 208709
loss: 0.9765132665634155,grad_norm: 0.9999990522116725, iteration: 208710
loss: 1.0250601768493652,grad_norm: 0.9042038717628244, iteration: 208711
loss: 0.9528412222862244,grad_norm: 0.9741190950561042, iteration: 208712
loss: 1.1030395030975342,grad_norm: 0.9999998430530042, iteration: 208713
loss: 1.023921012878418,grad_norm: 0.9999992496882265, iteration: 208714
loss: 0.9967880845069885,grad_norm: 0.9999991686867858, iteration: 208715
loss: 0.9901320338249207,grad_norm: 0.9048023276317061, iteration: 208716
loss: 0.9802847504615784,grad_norm: 0.9530815866977745, iteration: 208717
loss: 0.9734896421432495,grad_norm: 0.9999997155030685, iteration: 208718
loss: 0.968390941619873,grad_norm: 0.9215337015266685, iteration: 208719
loss: 0.9895592331886292,grad_norm: 0.8720981680794697, iteration: 208720
loss: 0.9976163506507874,grad_norm: 0.99999901114338, iteration: 208721
loss: 0.9729648232460022,grad_norm: 0.942647942900844, iteration: 208722
loss: 0.9909457564353943,grad_norm: 0.864276697349982, iteration: 208723
loss: 0.9998162388801575,grad_norm: 0.9999990950451169, iteration: 208724
loss: 1.2185347080230713,grad_norm: 0.9999992786791562, iteration: 208725
loss: 0.9923977255821228,grad_norm: 0.9466330316242167, iteration: 208726
loss: 1.1392015218734741,grad_norm: 0.9999992637358242, iteration: 208727
loss: 1.2130752801895142,grad_norm: 0.9999999027766467, iteration: 208728
loss: 1.0676140785217285,grad_norm: 0.8465410205788895, iteration: 208729
loss: 0.99367356300354,grad_norm: 0.9349341971785582, iteration: 208730
loss: 1.1689982414245605,grad_norm: 0.999999117182237, iteration: 208731
loss: 1.2808902263641357,grad_norm: 0.99999970591097, iteration: 208732
loss: 1.0792112350463867,grad_norm: 0.9269543827676884, iteration: 208733
loss: 1.2485262155532837,grad_norm: 0.9999998800352783, iteration: 208734
loss: 1.072250247001648,grad_norm: 0.894992855217449, iteration: 208735
loss: 1.3965812921524048,grad_norm: 0.9999999615800297, iteration: 208736
loss: 1.2445175647735596,grad_norm: 0.9999993236173095, iteration: 208737
loss: 1.2377123832702637,grad_norm: 0.9999993249563449, iteration: 208738
loss: 1.1325629949569702,grad_norm: 0.9999989771064615, iteration: 208739
loss: 1.0941908359527588,grad_norm: 0.9999995384213504, iteration: 208740
loss: 1.0361979007720947,grad_norm: 0.999999063779587, iteration: 208741
loss: 0.9736203551292419,grad_norm: 0.891415131946842, iteration: 208742
loss: 1.0250561237335205,grad_norm: 0.999999264926918, iteration: 208743
loss: 1.0124062299728394,grad_norm: 0.9307295595390969, iteration: 208744
loss: 1.0646289587020874,grad_norm: 0.9999991076515462, iteration: 208745
loss: 1.129392147064209,grad_norm: 0.9999997629487717, iteration: 208746
loss: 0.9917902946472168,grad_norm: 0.9637667909716071, iteration: 208747
loss: 1.046952724456787,grad_norm: 0.9999992643086081, iteration: 208748
loss: 1.106158971786499,grad_norm: 0.9999991713874824, iteration: 208749
loss: 1.0987522602081299,grad_norm: 0.9999991785781855, iteration: 208750
loss: 1.0130877494812012,grad_norm: 0.8716309415159332, iteration: 208751
loss: 1.025452971458435,grad_norm: 0.9008584770852661, iteration: 208752
loss: 1.1703630685806274,grad_norm: 0.9999996700845614, iteration: 208753
loss: 1.1300361156463623,grad_norm: 0.9999993801011909, iteration: 208754
loss: 1.151940941810608,grad_norm: 0.9999992855803065, iteration: 208755
loss: 1.0799273252487183,grad_norm: 0.9999997771168606, iteration: 208756
loss: 1.1112865209579468,grad_norm: 0.9999998511010694, iteration: 208757
loss: 0.9764289259910583,grad_norm: 0.9999992299789867, iteration: 208758
loss: 1.0891193151474,grad_norm: 0.9999997878365198, iteration: 208759
loss: 1.0447934865951538,grad_norm: 0.9999993939912216, iteration: 208760
loss: 1.0720771551132202,grad_norm: 0.9999992935361751, iteration: 208761
loss: 1.0699249505996704,grad_norm: 0.9999992075114771, iteration: 208762
loss: 1.1605043411254883,grad_norm: 0.999999777777975, iteration: 208763
loss: 1.017913818359375,grad_norm: 0.9999991376423619, iteration: 208764
loss: 1.0744390487670898,grad_norm: 0.9999996687591685, iteration: 208765
loss: 1.0655546188354492,grad_norm: 0.9999998491761856, iteration: 208766
loss: 1.075110673904419,grad_norm: 0.9999999912656328, iteration: 208767
loss: 1.0634428262710571,grad_norm: 0.9999995625483444, iteration: 208768
loss: 1.0545402765274048,grad_norm: 0.9999992728365736, iteration: 208769
loss: 1.0390233993530273,grad_norm: 0.9745861074152173, iteration: 208770
loss: 1.0115174055099487,grad_norm: 0.8670984749359109, iteration: 208771
loss: 1.0545097589492798,grad_norm: 0.9999990812024261, iteration: 208772
loss: 0.9835206270217896,grad_norm: 0.9999992435246108, iteration: 208773
loss: 1.0307756662368774,grad_norm: 0.9999997099117149, iteration: 208774
loss: 1.1116209030151367,grad_norm: 0.9999997181344811, iteration: 208775
loss: 1.0037436485290527,grad_norm: 0.9999991175896452, iteration: 208776
loss: 1.0455083847045898,grad_norm: 0.9762474426540623, iteration: 208777
loss: 1.026397466659546,grad_norm: 0.9999994824149998, iteration: 208778
loss: 1.0075422525405884,grad_norm: 0.8343870925342457, iteration: 208779
loss: 1.0009312629699707,grad_norm: 0.9999993209526014, iteration: 208780
loss: 0.9991313219070435,grad_norm: 0.9999992862677559, iteration: 208781
loss: 0.9633290767669678,grad_norm: 0.9999990845295945, iteration: 208782
loss: 1.0147418975830078,grad_norm: 0.9215687857083097, iteration: 208783
loss: 1.0099085569381714,grad_norm: 0.9999993593103376, iteration: 208784
loss: 1.0186967849731445,grad_norm: 0.8965224919610266, iteration: 208785
loss: 1.0383254289627075,grad_norm: 0.9754926152598115, iteration: 208786
loss: 0.9935778379440308,grad_norm: 0.9999993109776966, iteration: 208787
loss: 1.0067288875579834,grad_norm: 0.9864708148931766, iteration: 208788
loss: 1.04759681224823,grad_norm: 0.9999992665982532, iteration: 208789
loss: 0.9885463118553162,grad_norm: 0.8804341371994815, iteration: 208790
loss: 0.9733418226242065,grad_norm: 0.8918610620135737, iteration: 208791
loss: 0.9998981952667236,grad_norm: 0.789012318468566, iteration: 208792
loss: 1.0222009420394897,grad_norm: 0.8596227272621996, iteration: 208793
loss: 0.9965568780899048,grad_norm: 0.9999990673196406, iteration: 208794
loss: 1.0005221366882324,grad_norm: 0.8876044250923472, iteration: 208795
loss: 1.0200320482254028,grad_norm: 0.9999992270257352, iteration: 208796
loss: 0.9817594289779663,grad_norm: 0.8215569808678078, iteration: 208797
loss: 1.0067613124847412,grad_norm: 0.9792402620315849, iteration: 208798
loss: 1.0284433364868164,grad_norm: 0.9999991947653925, iteration: 208799
loss: 1.0463171005249023,grad_norm: 0.999999144728142, iteration: 208800
loss: 1.0323890447616577,grad_norm: 0.9987338752704811, iteration: 208801
loss: 1.0042539834976196,grad_norm: 0.9999994599217, iteration: 208802
loss: 1.1400383710861206,grad_norm: 0.9999996932775459, iteration: 208803
loss: 0.9746353626251221,grad_norm: 0.920778685194859, iteration: 208804
loss: 0.9660224914550781,grad_norm: 0.9999990721131735, iteration: 208805
loss: 1.080427885055542,grad_norm: 0.9999992351862739, iteration: 208806
loss: 1.0538661479949951,grad_norm: 0.8974083867053382, iteration: 208807
loss: 0.9630445241928101,grad_norm: 0.9999991144467231, iteration: 208808
loss: 0.9797936677932739,grad_norm: 0.8431202857562501, iteration: 208809
loss: 1.0133721828460693,grad_norm: 0.9900260814007974, iteration: 208810
loss: 0.9858929514884949,grad_norm: 0.7119837044207509, iteration: 208811
loss: 0.9870785474777222,grad_norm: 0.9971873727363155, iteration: 208812
loss: 0.97407466173172,grad_norm: 0.9999991142718185, iteration: 208813
loss: 1.0125505924224854,grad_norm: 0.9999991300555514, iteration: 208814
loss: 1.0237122774124146,grad_norm: 0.9091932341976084, iteration: 208815
loss: 0.9842187166213989,grad_norm: 0.836386216935391, iteration: 208816
loss: 1.0321197509765625,grad_norm: 0.9999990378813175, iteration: 208817
loss: 0.9942824244499207,grad_norm: 0.9999992283416694, iteration: 208818
loss: 1.0093601942062378,grad_norm: 0.9999993019636405, iteration: 208819
loss: 0.9666539430618286,grad_norm: 0.9999994789465195, iteration: 208820
loss: 1.0007710456848145,grad_norm: 0.7940993385174512, iteration: 208821
loss: 0.9865122437477112,grad_norm: 0.9999992734579455, iteration: 208822
loss: 0.9850444197654724,grad_norm: 0.9999990190982834, iteration: 208823
loss: 1.0146872997283936,grad_norm: 0.956243980312483, iteration: 208824
loss: 1.007856011390686,grad_norm: 0.8433395421758264, iteration: 208825
loss: 1.0762107372283936,grad_norm: 0.9603244863847239, iteration: 208826
loss: 1.0054422616958618,grad_norm: 0.9999991050634706, iteration: 208827
loss: 1.0113023519515991,grad_norm: 0.9909107268377204, iteration: 208828
loss: 0.9877834916114807,grad_norm: 0.8277976670501055, iteration: 208829
loss: 0.9792988300323486,grad_norm: 0.7906763769367623, iteration: 208830
loss: 0.9755038022994995,grad_norm: 0.9029922979249069, iteration: 208831
loss: 0.9869329333305359,grad_norm: 0.890128408627084, iteration: 208832
loss: 1.101179838180542,grad_norm: 0.9999991903368299, iteration: 208833
loss: 0.9731159210205078,grad_norm: 0.8313199286656677, iteration: 208834
loss: 1.001862645149231,grad_norm: 0.7953869492481243, iteration: 208835
loss: 1.0010478496551514,grad_norm: 0.8236561234895559, iteration: 208836
loss: 1.0008569955825806,grad_norm: 0.9576376266884519, iteration: 208837
loss: 0.9777930378913879,grad_norm: 0.9373219387576662, iteration: 208838
loss: 0.974288284778595,grad_norm: 0.8300056804523057, iteration: 208839
loss: 0.9669355154037476,grad_norm: 0.8875056009237586, iteration: 208840
loss: 1.0047144889831543,grad_norm: 0.8245712744646267, iteration: 208841
loss: 1.09575617313385,grad_norm: 0.9999992940927014, iteration: 208842
loss: 1.0126713514328003,grad_norm: 0.8319212908993096, iteration: 208843
loss: 1.0053215026855469,grad_norm: 0.9853415320467329, iteration: 208844
loss: 1.0013314485549927,grad_norm: 0.8825942902985666, iteration: 208845
loss: 0.9673174619674683,grad_norm: 0.9999999375374862, iteration: 208846
loss: 0.9998297691345215,grad_norm: 0.9883783679806052, iteration: 208847
loss: 1.092498540878296,grad_norm: 0.9269878106985927, iteration: 208848
loss: 1.0052478313446045,grad_norm: 0.9086250422224272, iteration: 208849
loss: 0.996345579624176,grad_norm: 0.7567221849792016, iteration: 208850
loss: 0.9751791954040527,grad_norm: 0.8608755616830995, iteration: 208851
loss: 1.0004246234893799,grad_norm: 0.8816158964775401, iteration: 208852
loss: 0.9758428335189819,grad_norm: 0.9999997477578028, iteration: 208853
loss: 1.0006749629974365,grad_norm: 0.9999992479967521, iteration: 208854
loss: 0.9610828757286072,grad_norm: 0.9999989886750607, iteration: 208855
loss: 1.019410252571106,grad_norm: 0.9325519157721921, iteration: 208856
loss: 0.9972213506698608,grad_norm: 0.9999991981029152, iteration: 208857
loss: 0.9945685267448425,grad_norm: 0.9255461612815757, iteration: 208858
loss: 1.0288881063461304,grad_norm: 0.9999994543191202, iteration: 208859
loss: 0.9900773167610168,grad_norm: 0.8404532513676343, iteration: 208860
loss: 0.9671063423156738,grad_norm: 0.9999989788227086, iteration: 208861
loss: 0.9908153414726257,grad_norm: 0.9999991018707475, iteration: 208862
loss: 1.0316081047058105,grad_norm: 0.9618464388021883, iteration: 208863
loss: 0.9860890507698059,grad_norm: 0.7292023269289636, iteration: 208864
loss: 1.027405858039856,grad_norm: 0.9999991533467052, iteration: 208865
loss: 1.0170680284500122,grad_norm: 0.9999997444915972, iteration: 208866
loss: 0.9842209815979004,grad_norm: 0.9224338051356545, iteration: 208867
loss: 1.0163313150405884,grad_norm: 0.8142329042787918, iteration: 208868
loss: 1.0194607973098755,grad_norm: 0.9061141062890511, iteration: 208869
loss: 1.0956168174743652,grad_norm: 0.9999992872365029, iteration: 208870
loss: 0.9767911434173584,grad_norm: 0.9961486217772774, iteration: 208871
loss: 0.9757459759712219,grad_norm: 0.8808652772411452, iteration: 208872
loss: 0.9934712648391724,grad_norm: 0.9155714450298575, iteration: 208873
loss: 0.9772623181343079,grad_norm: 0.9999994310843766, iteration: 208874
loss: 1.0228959321975708,grad_norm: 0.9664649359810927, iteration: 208875
loss: 1.0175007581710815,grad_norm: 0.9034731723156276, iteration: 208876
loss: 0.9886882901191711,grad_norm: 0.9348244464763422, iteration: 208877
loss: 1.055972933769226,grad_norm: 0.9999992200856024, iteration: 208878
loss: 0.9846397638320923,grad_norm: 0.8240747624798728, iteration: 208879
loss: 0.984571635723114,grad_norm: 0.9999990879118917, iteration: 208880
loss: 1.0144152641296387,grad_norm: 0.999999316023153, iteration: 208881
loss: 0.998909592628479,grad_norm: 0.9304655165541081, iteration: 208882
loss: 0.9869825839996338,grad_norm: 0.8558946341161433, iteration: 208883
loss: 1.019444227218628,grad_norm: 0.999999207748841, iteration: 208884
loss: 1.0022867918014526,grad_norm: 0.8616091038707756, iteration: 208885
loss: 1.0302685499191284,grad_norm: 0.8363855910796459, iteration: 208886
loss: 1.0137882232666016,grad_norm: 0.8684098463941182, iteration: 208887
loss: 0.9930505156517029,grad_norm: 0.9309260348719037, iteration: 208888
loss: 1.0064464807510376,grad_norm: 0.8568476319706411, iteration: 208889
loss: 0.9770761132240295,grad_norm: 0.9539628143973562, iteration: 208890
loss: 1.0097265243530273,grad_norm: 0.9610298347971339, iteration: 208891
loss: 0.9993844628334045,grad_norm: 0.9082805214129569, iteration: 208892
loss: 0.9851823449134827,grad_norm: 0.936550969256277, iteration: 208893
loss: 1.0105994939804077,grad_norm: 0.9999991531980293, iteration: 208894
loss: 0.9883180260658264,grad_norm: 0.8418426378041303, iteration: 208895
loss: 0.9901809096336365,grad_norm: 0.9999994030667824, iteration: 208896
loss: 0.9974538683891296,grad_norm: 0.9802315642881543, iteration: 208897
loss: 1.0064715147018433,grad_norm: 0.9098952027210442, iteration: 208898
loss: 0.9739152789115906,grad_norm: 0.9999991034614185, iteration: 208899
loss: 1.0260475873947144,grad_norm: 0.9039813419249092, iteration: 208900
loss: 1.0074604749679565,grad_norm: 0.9999990614988865, iteration: 208901
loss: 0.9683977961540222,grad_norm: 0.9879884624006616, iteration: 208902
loss: 1.0014331340789795,grad_norm: 0.8828698474207802, iteration: 208903
loss: 1.018893837928772,grad_norm: 0.999999046605077, iteration: 208904
loss: 1.0117741823196411,grad_norm: 0.9675581982170597, iteration: 208905
loss: 1.000051736831665,grad_norm: 0.8230254536475888, iteration: 208906
loss: 0.9516410827636719,grad_norm: 0.9192748091509962, iteration: 208907
loss: 1.0183966159820557,grad_norm: 0.9999989800632809, iteration: 208908
loss: 0.9947850704193115,grad_norm: 0.9999990583596914, iteration: 208909
loss: 1.016653060913086,grad_norm: 0.7042770740206733, iteration: 208910
loss: 0.9810430407524109,grad_norm: 0.9999990982482608, iteration: 208911
loss: 1.0300461053848267,grad_norm: 0.9999997339136004, iteration: 208912
loss: 1.027117371559143,grad_norm: 0.9325350274139356, iteration: 208913
loss: 1.020202398300171,grad_norm: 0.8963839242650004, iteration: 208914
loss: 0.9817430973052979,grad_norm: 0.9999990794681872, iteration: 208915
loss: 1.0343726873397827,grad_norm: 0.9999993356149725, iteration: 208916
loss: 1.0233790874481201,grad_norm: 0.9999996277576294, iteration: 208917
loss: 0.994266927242279,grad_norm: 0.9999991238854502, iteration: 208918
loss: 0.9991058111190796,grad_norm: 0.9999993740304798, iteration: 208919
loss: 1.073813796043396,grad_norm: 0.9999993983179924, iteration: 208920
loss: 1.0543489456176758,grad_norm: 0.9496819534021537, iteration: 208921
loss: 1.0302760601043701,grad_norm: 0.9455513594103728, iteration: 208922
loss: 0.9897215366363525,grad_norm: 0.9861416310657298, iteration: 208923
loss: 0.9686742424964905,grad_norm: 0.9999990349388158, iteration: 208924
loss: 0.978827953338623,grad_norm: 0.9584426394020246, iteration: 208925
loss: 0.9687238931655884,grad_norm: 0.9340147165467345, iteration: 208926
loss: 1.0194125175476074,grad_norm: 0.8956257526734509, iteration: 208927
loss: 0.9747620224952698,grad_norm: 0.929631358373058, iteration: 208928
loss: 1.1509417295455933,grad_norm: 0.9999997654497529, iteration: 208929
loss: 0.9756088852882385,grad_norm: 0.9099627651934706, iteration: 208930
loss: 0.9947155117988586,grad_norm: 0.8919453733802384, iteration: 208931
loss: 0.9636849761009216,grad_norm: 0.8687356843799504, iteration: 208932
loss: 1.039265751838684,grad_norm: 0.9719092158204051, iteration: 208933
loss: 0.9940164685249329,grad_norm: 0.9897251605098892, iteration: 208934
loss: 1.0070316791534424,grad_norm: 0.9999990447516783, iteration: 208935
loss: 0.9880222678184509,grad_norm: 0.9196955278116317, iteration: 208936
loss: 1.0141998529434204,grad_norm: 0.9153036289248565, iteration: 208937
loss: 1.030361533164978,grad_norm: 0.9657833577433944, iteration: 208938
loss: 1.0363643169403076,grad_norm: 0.9083735716244219, iteration: 208939
loss: 1.028109073638916,grad_norm: 0.8834206890379763, iteration: 208940
loss: 1.005118489265442,grad_norm: 0.9107526738033934, iteration: 208941
loss: 0.9784014821052551,grad_norm: 0.9999998345129371, iteration: 208942
loss: 1.0070734024047852,grad_norm: 0.9999990696314077, iteration: 208943
loss: 1.0100860595703125,grad_norm: 0.9752619442783074, iteration: 208944
loss: 1.0045117139816284,grad_norm: 0.9932427418036572, iteration: 208945
loss: 0.9899139404296875,grad_norm: 0.8189386944753765, iteration: 208946
loss: 0.9782771468162537,grad_norm: 0.9804603634207814, iteration: 208947
loss: 0.9745416641235352,grad_norm: 0.882261294185277, iteration: 208948
loss: 0.9968780875205994,grad_norm: 0.7321946705334108, iteration: 208949
loss: 0.9783324003219604,grad_norm: 0.9999990682876485, iteration: 208950
loss: 0.9744324088096619,grad_norm: 0.9999990370357601, iteration: 208951
loss: 1.0258351564407349,grad_norm: 0.9999995716681336, iteration: 208952
loss: 1.0364285707473755,grad_norm: 1.000000023205751, iteration: 208953
loss: 1.007721185684204,grad_norm: 0.9999991381095262, iteration: 208954
loss: 1.0008572340011597,grad_norm: 0.8931345491141284, iteration: 208955
loss: 0.9746733903884888,grad_norm: 0.9999990373870349, iteration: 208956
loss: 0.9938478469848633,grad_norm: 0.9602728010117383, iteration: 208957
loss: 1.0045692920684814,grad_norm: 0.775606434928366, iteration: 208958
loss: 1.0031688213348389,grad_norm: 0.9999992139799087, iteration: 208959
loss: 1.0081431865692139,grad_norm: 0.9999990790504589, iteration: 208960
loss: 1.015291452407837,grad_norm: 0.8686318610129536, iteration: 208961
loss: 1.025122880935669,grad_norm: 0.9999991512181857, iteration: 208962
loss: 0.9673441052436829,grad_norm: 0.9279750712474488, iteration: 208963
loss: 1.0385141372680664,grad_norm: 0.9999990838228814, iteration: 208964
loss: 0.9965258240699768,grad_norm: 0.9758548943672433, iteration: 208965
loss: 1.0458006858825684,grad_norm: 0.946080018208469, iteration: 208966
loss: 1.0064351558685303,grad_norm: 0.8333579451724408, iteration: 208967
loss: 0.9952409267425537,grad_norm: 0.9999990116275326, iteration: 208968
loss: 0.9894649386405945,grad_norm: 0.9999991432784383, iteration: 208969
loss: 0.9757069945335388,grad_norm: 0.8680142004089038, iteration: 208970
loss: 0.9852876663208008,grad_norm: 0.8501321796839236, iteration: 208971
loss: 1.0410081148147583,grad_norm: 0.9170642982629853, iteration: 208972
loss: 0.9778279662132263,grad_norm: 0.9999998410590605, iteration: 208973
loss: 0.9763936400413513,grad_norm: 0.9939223054104408, iteration: 208974
loss: 0.9895467162132263,grad_norm: 0.9999991080569232, iteration: 208975
loss: 0.9836217761039734,grad_norm: 0.9983622706428523, iteration: 208976
loss: 0.9952364563941956,grad_norm: 0.9164592813364023, iteration: 208977
loss: 0.984422504901886,grad_norm: 0.8130847951229774, iteration: 208978
loss: 1.010945200920105,grad_norm: 0.9530190897768067, iteration: 208979
loss: 1.034659504890442,grad_norm: 0.9999991226178965, iteration: 208980
loss: 0.9975511431694031,grad_norm: 0.9999991970466343, iteration: 208981
loss: 1.0250215530395508,grad_norm: 0.9999991777266898, iteration: 208982
loss: 0.9951016902923584,grad_norm: 0.8763629212809817, iteration: 208983
loss: 0.9850588440895081,grad_norm: 0.9999992147608534, iteration: 208984
loss: 1.0092686414718628,grad_norm: 0.9999990560321126, iteration: 208985
loss: 1.0207499265670776,grad_norm: 0.9224062152944431, iteration: 208986
loss: 0.9954213500022888,grad_norm: 0.8558219470808285, iteration: 208987
loss: 0.9940474629402161,grad_norm: 0.9999992416966222, iteration: 208988
loss: 1.0202608108520508,grad_norm: 0.9999992212663397, iteration: 208989
loss: 1.0213333368301392,grad_norm: 0.9999991207744054, iteration: 208990
loss: 1.0044424533843994,grad_norm: 0.9999990769274074, iteration: 208991
loss: 1.0169172286987305,grad_norm: 0.9422729298101937, iteration: 208992
loss: 0.9983471035957336,grad_norm: 0.9999992670316662, iteration: 208993
loss: 1.043520212173462,grad_norm: 0.9999992263834275, iteration: 208994
loss: 1.0069912672042847,grad_norm: 0.9788735244858195, iteration: 208995
loss: 0.9732188582420349,grad_norm: 0.9362846475678028, iteration: 208996
loss: 0.96494460105896,grad_norm: 0.9446943305883778, iteration: 208997
loss: 0.996486246585846,grad_norm: 0.8700685005550401, iteration: 208998
loss: 0.9837361574172974,grad_norm: 0.9999991800157383, iteration: 208999
loss: 1.007738709449768,grad_norm: 0.9999992077698588, iteration: 209000
loss: 0.9675548076629639,grad_norm: 0.9636791062186838, iteration: 209001
loss: 1.0125789642333984,grad_norm: 0.8785444562305195, iteration: 209002
loss: 0.9862141013145447,grad_norm: 0.9999992449856544, iteration: 209003
loss: 0.9965594410896301,grad_norm: 0.9999990998338585, iteration: 209004
loss: 1.037268877029419,grad_norm: 0.9909748809887373, iteration: 209005
loss: 0.9821560978889465,grad_norm: 0.99999917759247, iteration: 209006
loss: 0.9705742597579956,grad_norm: 0.9999991659214478, iteration: 209007
loss: 1.0008234977722168,grad_norm: 0.8731842824786804, iteration: 209008
loss: 1.0160475969314575,grad_norm: 0.9544850517826035, iteration: 209009
loss: 1.0062909126281738,grad_norm: 0.9999990952851917, iteration: 209010
loss: 1.0031458139419556,grad_norm: 0.8820866527568857, iteration: 209011
loss: 1.0066173076629639,grad_norm: 0.9218177776077815, iteration: 209012
loss: 0.9877594113349915,grad_norm: 0.9139687284049673, iteration: 209013
loss: 0.9784368276596069,grad_norm: 0.9366432622738942, iteration: 209014
loss: 1.001118779182434,grad_norm: 0.8635971807296648, iteration: 209015
loss: 0.975559651851654,grad_norm: 0.9258367361047044, iteration: 209016
loss: 1.014805555343628,grad_norm: 0.7437203932477928, iteration: 209017
loss: 0.980163037776947,grad_norm: 0.8969652312620451, iteration: 209018
loss: 0.9928159117698669,grad_norm: 0.9999991339519013, iteration: 209019
loss: 1.0270594358444214,grad_norm: 0.996758753223464, iteration: 209020
loss: 1.0185788869857788,grad_norm: 0.8318526173822482, iteration: 209021
loss: 1.0030839443206787,grad_norm: 0.9968131071982719, iteration: 209022
loss: 1.0003597736358643,grad_norm: 0.8682978028489983, iteration: 209023
loss: 1.006978154182434,grad_norm: 0.9932490273183016, iteration: 209024
loss: 1.0170522928237915,grad_norm: 0.9241736048999312, iteration: 209025
loss: 1.018314003944397,grad_norm: 0.9999992645531246, iteration: 209026
loss: 0.9866728782653809,grad_norm: 0.9666201643826305, iteration: 209027
loss: 1.0243017673492432,grad_norm: 0.9999992266685614, iteration: 209028
loss: 0.9635260701179504,grad_norm: 0.879366195553713, iteration: 209029
loss: 1.0198006629943848,grad_norm: 0.8478547395608979, iteration: 209030
loss: 1.0033844709396362,grad_norm: 0.8948974195716608, iteration: 209031
loss: 1.020985722541809,grad_norm: 0.9354933050272983, iteration: 209032
loss: 1.0131118297576904,grad_norm: 0.9999991108702305, iteration: 209033
loss: 0.9905505776405334,grad_norm: 0.9192883850845703, iteration: 209034
loss: 0.9967320561408997,grad_norm: 0.9999990176784783, iteration: 209035
loss: 0.9947274923324585,grad_norm: 0.9981985950197607, iteration: 209036
loss: 0.9804628491401672,grad_norm: 0.8308491055178391, iteration: 209037
loss: 1.008874535560608,grad_norm: 0.9385814352955928, iteration: 209038
loss: 1.0239859819412231,grad_norm: 0.99999904489245, iteration: 209039
loss: 1.0271557569503784,grad_norm: 0.9226550526127089, iteration: 209040
loss: 0.9819005727767944,grad_norm: 0.9999991901713553, iteration: 209041
loss: 1.0027170181274414,grad_norm: 0.9884354272377626, iteration: 209042
loss: 1.0269891023635864,grad_norm: 0.9999991296815981, iteration: 209043
loss: 0.9913550615310669,grad_norm: 0.9999989995570243, iteration: 209044
loss: 0.9845660328865051,grad_norm: 0.8918557354060695, iteration: 209045
loss: 0.965918242931366,grad_norm: 0.8004503666887067, iteration: 209046
loss: 1.00328528881073,grad_norm: 0.8325772247512355, iteration: 209047
loss: 1.0087883472442627,grad_norm: 0.9808571348399714, iteration: 209048
loss: 0.9764922261238098,grad_norm: 0.9339474254374746, iteration: 209049
loss: 1.0043859481811523,grad_norm: 0.9016698367867328, iteration: 209050
loss: 0.9816654324531555,grad_norm: 0.9999989773494141, iteration: 209051
loss: 0.9761029481887817,grad_norm: 0.9999991603215024, iteration: 209052
loss: 1.0088274478912354,grad_norm: 0.9736528023892768, iteration: 209053
loss: 0.9799807071685791,grad_norm: 0.9999992583837908, iteration: 209054
loss: 1.0302369594573975,grad_norm: 0.971685360618913, iteration: 209055
loss: 0.9823300242424011,grad_norm: 0.9669581707946611, iteration: 209056
loss: 0.98743736743927,grad_norm: 0.893956235678103, iteration: 209057
loss: 0.9711447954177856,grad_norm: 0.933831204518446, iteration: 209058
loss: 0.9834024310112,grad_norm: 0.8953714188830131, iteration: 209059
loss: 0.9898092150688171,grad_norm: 0.8358798555795093, iteration: 209060
loss: 0.9678065180778503,grad_norm: 0.9682538543179549, iteration: 209061
loss: 1.012357473373413,grad_norm: 0.9999991573624352, iteration: 209062
loss: 1.0422565937042236,grad_norm: 0.999999181330132, iteration: 209063
loss: 0.9849344491958618,grad_norm: 0.9999989559633155, iteration: 209064
loss: 0.9923955798149109,grad_norm: 0.8878771461917017, iteration: 209065
loss: 1.0373679399490356,grad_norm: 0.8461035766524786, iteration: 209066
loss: 0.9771736264228821,grad_norm: 0.9525332100327525, iteration: 209067
loss: 1.0496081113815308,grad_norm: 0.9999992420145248, iteration: 209068
loss: 1.0347362756729126,grad_norm: 0.758945506142154, iteration: 209069
loss: 0.9869475364685059,grad_norm: 0.9999992267565134, iteration: 209070
loss: 0.9917974472045898,grad_norm: 0.8895491321519937, iteration: 209071
loss: 0.9999348521232605,grad_norm: 0.999999104018782, iteration: 209072
loss: 1.0246487855911255,grad_norm: 0.8381309684157237, iteration: 209073
loss: 1.0270987749099731,grad_norm: 0.979746035034301, iteration: 209074
loss: 0.9647669792175293,grad_norm: 0.829723658688188, iteration: 209075
loss: 0.9841278195381165,grad_norm: 0.8441221911082859, iteration: 209076
loss: 1.049260139465332,grad_norm: 1.0000000102760818, iteration: 209077
loss: 1.0133967399597168,grad_norm: 0.9999997488469006, iteration: 209078
loss: 0.9955600500106812,grad_norm: 0.9999991511328035, iteration: 209079
loss: 0.9854943156242371,grad_norm: 0.9261461096327273, iteration: 209080
loss: 0.9975523352622986,grad_norm: 0.9600190836389542, iteration: 209081
loss: 1.0099124908447266,grad_norm: 0.983059059763847, iteration: 209082
loss: 1.0166354179382324,grad_norm: 0.9999991644993516, iteration: 209083
loss: 1.0220187902450562,grad_norm: 0.9088092956733765, iteration: 209084
loss: 1.0082383155822754,grad_norm: 0.8986831768180489, iteration: 209085
loss: 1.0090186595916748,grad_norm: 0.7576753601871405, iteration: 209086
loss: 1.0014431476593018,grad_norm: 0.8834023279858074, iteration: 209087
loss: 1.0218759775161743,grad_norm: 0.999999334450924, iteration: 209088
loss: 1.013216257095337,grad_norm: 0.9306017255392628, iteration: 209089
loss: 0.980293869972229,grad_norm: 0.8342242839420325, iteration: 209090
loss: 0.9935766458511353,grad_norm: 0.9999990170913895, iteration: 209091
loss: 0.9715175628662109,grad_norm: 0.9844647261096461, iteration: 209092
loss: 1.0103199481964111,grad_norm: 0.8235393162378376, iteration: 209093
loss: 0.9944252967834473,grad_norm: 0.8666190492753438, iteration: 209094
loss: 0.9977471828460693,grad_norm: 0.9999989586773481, iteration: 209095
loss: 1.0327166318893433,grad_norm: 0.9999990192238293, iteration: 209096
loss: 0.9569538831710815,grad_norm: 0.8875697300417534, iteration: 209097
loss: 1.0191587209701538,grad_norm: 0.9115064015642725, iteration: 209098
loss: 1.0403635501861572,grad_norm: 0.925689683973473, iteration: 209099
loss: 0.993205726146698,grad_norm: 0.9999991296908817, iteration: 209100
loss: 0.9758566617965698,grad_norm: 0.8814722261647627, iteration: 209101
loss: 0.9992355704307556,grad_norm: 0.9124539732319267, iteration: 209102
loss: 0.9837908744812012,grad_norm: 0.9863644422467942, iteration: 209103
loss: 1.0048969984054565,grad_norm: 0.732624834399091, iteration: 209104
loss: 0.999880313873291,grad_norm: 0.9999996845908521, iteration: 209105
loss: 0.989005446434021,grad_norm: 0.8616537614718696, iteration: 209106
loss: 1.0043084621429443,grad_norm: 0.9048343227070218, iteration: 209107
loss: 0.956795871257782,grad_norm: 0.7748159854064979, iteration: 209108
loss: 0.9982038736343384,grad_norm: 0.9705729947129751, iteration: 209109
loss: 0.958358645439148,grad_norm: 0.999999152084038, iteration: 209110
loss: 1.021968960762024,grad_norm: 0.9284086931723812, iteration: 209111
loss: 0.9987013339996338,grad_norm: 0.8983954620116115, iteration: 209112
loss: 1.0017714500427246,grad_norm: 0.999999225702299, iteration: 209113
loss: 1.0078742504119873,grad_norm: 0.9999993143375991, iteration: 209114
loss: 1.0359840393066406,grad_norm: 0.9999990159734201, iteration: 209115
loss: 0.9781279563903809,grad_norm: 0.866488860221134, iteration: 209116
loss: 1.007955551147461,grad_norm: 0.8776460076631281, iteration: 209117
loss: 1.0087981224060059,grad_norm: 0.999999117407387, iteration: 209118
loss: 0.9891263842582703,grad_norm: 0.9999989976854811, iteration: 209119
loss: 0.9985145330429077,grad_norm: 0.8703542714426168, iteration: 209120
loss: 0.982633113861084,grad_norm: 0.9999993039498792, iteration: 209121
loss: 1.026978611946106,grad_norm: 0.920974232526279, iteration: 209122
loss: 0.978848934173584,grad_norm: 0.9999992226522846, iteration: 209123
loss: 1.0010595321655273,grad_norm: 0.7801111641700464, iteration: 209124
loss: 1.02639901638031,grad_norm: 0.9999992488284446, iteration: 209125
loss: 1.0603337287902832,grad_norm: 0.8598436275535012, iteration: 209126
loss: 1.0346568822860718,grad_norm: 0.8976544626793654, iteration: 209127
loss: 1.0087087154388428,grad_norm: 0.7872732874201885, iteration: 209128
loss: 0.9829592704772949,grad_norm: 0.9588729519102583, iteration: 209129
loss: 0.9723400473594666,grad_norm: 0.9730174130101609, iteration: 209130
loss: 0.9733385443687439,grad_norm: 0.9410069246776454, iteration: 209131
loss: 0.9836869835853577,grad_norm: 0.9601075116247862, iteration: 209132
loss: 1.0195906162261963,grad_norm: 0.9999993625327378, iteration: 209133
loss: 0.9927937388420105,grad_norm: 0.8293689740776122, iteration: 209134
loss: 1.0003626346588135,grad_norm: 0.9087790436608747, iteration: 209135
loss: 0.9698068499565125,grad_norm: 0.9490720580357375, iteration: 209136
loss: 0.9829344749450684,grad_norm: 0.9712482821486118, iteration: 209137
loss: 1.0344657897949219,grad_norm: 0.8105439889615231, iteration: 209138
loss: 1.02139151096344,grad_norm: 0.9835761745337708, iteration: 209139
loss: 1.0138757228851318,grad_norm: 0.9999999375136592, iteration: 209140
loss: 0.983194887638092,grad_norm: 0.9905779532039592, iteration: 209141
loss: 0.9970818758010864,grad_norm: 0.9476020581395928, iteration: 209142
loss: 1.014880657196045,grad_norm: 0.9491235856445733, iteration: 209143
loss: 0.9980830550193787,grad_norm: 0.9999991449275147, iteration: 209144
loss: 0.9716061353683472,grad_norm: 0.8134128680771312, iteration: 209145
loss: 0.9780392050743103,grad_norm: 0.9999990725632812, iteration: 209146
loss: 1.0109843015670776,grad_norm: 0.7419071847520465, iteration: 209147
loss: 0.9859247803688049,grad_norm: 0.9922037581916371, iteration: 209148
loss: 1.007864236831665,grad_norm: 0.9279850792295957, iteration: 209149
loss: 0.9906163215637207,grad_norm: 0.9999990067522558, iteration: 209150
loss: 0.9906684756278992,grad_norm: 0.9327677964557811, iteration: 209151
loss: 1.0878968238830566,grad_norm: 0.9999992395638594, iteration: 209152
loss: 1.0182971954345703,grad_norm: 0.8881141963899526, iteration: 209153
loss: 0.987540066242218,grad_norm: 0.8942382836674231, iteration: 209154
loss: 1.00809907913208,grad_norm: 0.877394650908193, iteration: 209155
loss: 1.0418412685394287,grad_norm: 0.9999994048446671, iteration: 209156
loss: 1.0152164697647095,grad_norm: 0.9999992723359594, iteration: 209157
loss: 0.9953739047050476,grad_norm: 0.950397411471096, iteration: 209158
loss: 0.963153600692749,grad_norm: 0.9999990803024571, iteration: 209159
loss: 0.9819088578224182,grad_norm: 0.9742915819824838, iteration: 209160
loss: 1.0066308975219727,grad_norm: 0.8884931716356483, iteration: 209161
loss: 0.9855825901031494,grad_norm: 0.8992338304167692, iteration: 209162
loss: 1.0175031423568726,grad_norm: 0.9616018249479834, iteration: 209163
loss: 1.0042237043380737,grad_norm: 0.9999990055083423, iteration: 209164
loss: 1.0363502502441406,grad_norm: 0.9999990759499982, iteration: 209165
loss: 0.9873039126396179,grad_norm: 0.9999991733747557, iteration: 209166
loss: 0.9674467444419861,grad_norm: 0.9678723179918671, iteration: 209167
loss: 0.9955503940582275,grad_norm: 0.9983884340544694, iteration: 209168
loss: 1.0257434844970703,grad_norm: 0.8504448223003243, iteration: 209169
loss: 1.0181005001068115,grad_norm: 0.976570966289575, iteration: 209170
loss: 0.9856756329536438,grad_norm: 0.8996221450822166, iteration: 209171
loss: 1.0295073986053467,grad_norm: 0.999999275629358, iteration: 209172
loss: 1.0210373401641846,grad_norm: 0.9999990290629862, iteration: 209173
loss: 0.9957709908485413,grad_norm: 0.8424148108896766, iteration: 209174
loss: 0.9963871240615845,grad_norm: 0.8607647756591426, iteration: 209175
loss: 1.0022468566894531,grad_norm: 0.9746831466367009, iteration: 209176
loss: 1.001104712486267,grad_norm: 0.8655756891954941, iteration: 209177
loss: 1.0028293132781982,grad_norm: 0.9999991922064929, iteration: 209178
loss: 1.0051461458206177,grad_norm: 0.8984413721477322, iteration: 209179
loss: 0.9624082446098328,grad_norm: 0.938265050892352, iteration: 209180
loss: 0.9943589568138123,grad_norm: 0.8689272295087468, iteration: 209181
loss: 1.0095953941345215,grad_norm: 0.8286962842640154, iteration: 209182
loss: 1.0070388317108154,grad_norm: 0.9221392623716849, iteration: 209183
loss: 0.9955449104309082,grad_norm: 0.9106187570872738, iteration: 209184
loss: 0.9846034646034241,grad_norm: 0.9313136049024863, iteration: 209185
loss: 0.9717774987220764,grad_norm: 0.999999193762291, iteration: 209186
loss: 1.0082439184188843,grad_norm: 0.99999921221877, iteration: 209187
loss: 1.013038992881775,grad_norm: 0.9897163172810044, iteration: 209188
loss: 1.0189036130905151,grad_norm: 0.8443600678844363, iteration: 209189
loss: 1.014583706855774,grad_norm: 0.9999989992859445, iteration: 209190
loss: 1.015406847000122,grad_norm: 0.9999990309505212, iteration: 209191
loss: 0.9688644409179688,grad_norm: 0.9360593925738607, iteration: 209192
loss: 0.9844004511833191,grad_norm: 0.9459543940002005, iteration: 209193
loss: 0.980660617351532,grad_norm: 0.9238782050014289, iteration: 209194
loss: 0.9921606183052063,grad_norm: 0.9999991251208566, iteration: 209195
loss: 0.9763675928115845,grad_norm: 0.9999991082939195, iteration: 209196
loss: 0.9720611572265625,grad_norm: 0.9516182292257392, iteration: 209197
loss: 1.0131677389144897,grad_norm: 0.9999992019219606, iteration: 209198
loss: 1.0053892135620117,grad_norm: 0.9999990661882194, iteration: 209199
loss: 0.9944694638252258,grad_norm: 0.773095187708136, iteration: 209200
loss: 1.044523000717163,grad_norm: 0.9357549001905783, iteration: 209201
loss: 0.9897540807723999,grad_norm: 0.9999991476037563, iteration: 209202
loss: 0.9954339861869812,grad_norm: 0.9999991490579826, iteration: 209203
loss: 0.974546492099762,grad_norm: 0.9363539758851765, iteration: 209204
loss: 1.047602653503418,grad_norm: 0.9999990662170869, iteration: 209205
loss: 0.9946804046630859,grad_norm: 0.8321989396315542, iteration: 209206
loss: 0.964601457118988,grad_norm: 0.9207118769748381, iteration: 209207
loss: 0.9847022294998169,grad_norm: 0.8923009581745692, iteration: 209208
loss: 0.9528176188468933,grad_norm: 0.9423324188916523, iteration: 209209
loss: 0.9629068970680237,grad_norm: 0.9999991572122954, iteration: 209210
loss: 1.0029733180999756,grad_norm: 0.825672405969682, iteration: 209211
loss: 0.9558749198913574,grad_norm: 0.9999990692637772, iteration: 209212
loss: 0.9810800552368164,grad_norm: 0.7545762827117652, iteration: 209213
loss: 1.0288875102996826,grad_norm: 0.9999991098462735, iteration: 209214
loss: 0.9790812134742737,grad_norm: 0.9999990718657675, iteration: 209215
loss: 0.9706661105155945,grad_norm: 0.9252654111052635, iteration: 209216
loss: 0.9565150737762451,grad_norm: 0.9487416129344983, iteration: 209217
loss: 1.0154942274093628,grad_norm: 0.9312474971028343, iteration: 209218
loss: 1.0147902965545654,grad_norm: 0.9432839442641813, iteration: 209219
loss: 0.9998703598976135,grad_norm: 0.9999991470329184, iteration: 209220
loss: 0.9640306234359741,grad_norm: 0.9217989228630771, iteration: 209221
loss: 0.9856340289115906,grad_norm: 0.8752157792896783, iteration: 209222
loss: 1.0181059837341309,grad_norm: 0.8357593769678945, iteration: 209223
loss: 1.0409889221191406,grad_norm: 0.9349370450969423, iteration: 209224
loss: 1.0038833618164062,grad_norm: 0.8303428398972461, iteration: 209225
loss: 0.9990652799606323,grad_norm: 0.9246567497972641, iteration: 209226
loss: 1.0199671983718872,grad_norm: 0.9999991538459666, iteration: 209227
loss: 1.0116891860961914,grad_norm: 0.9999999217486809, iteration: 209228
loss: 0.9891425967216492,grad_norm: 0.9309556021760124, iteration: 209229
loss: 1.0011354684829712,grad_norm: 0.9710504867828581, iteration: 209230
loss: 0.976561963558197,grad_norm: 0.9999991622974965, iteration: 209231
loss: 1.0074458122253418,grad_norm: 0.8984085407383905, iteration: 209232
loss: 1.0016015768051147,grad_norm: 0.9154374865195982, iteration: 209233
loss: 0.9093717336654663,grad_norm: 0.9154934883459646, iteration: 209234
loss: 0.9687694311141968,grad_norm: 0.9284760120779051, iteration: 209235
loss: 0.9827505946159363,grad_norm: 0.9999991645911191, iteration: 209236
loss: 1.0252785682678223,grad_norm: 0.751344651014075, iteration: 209237
loss: 0.9773091077804565,grad_norm: 0.8751647154505405, iteration: 209238
loss: 1.0090113878250122,grad_norm: 0.9999988968697152, iteration: 209239
loss: 1.0015034675598145,grad_norm: 0.879225659326355, iteration: 209240
loss: 1.0195863246917725,grad_norm: 0.9264313707880484, iteration: 209241
loss: 1.0262633562088013,grad_norm: 0.9999991488120085, iteration: 209242
loss: 1.0263224840164185,grad_norm: 0.9999991153757715, iteration: 209243
loss: 1.007259726524353,grad_norm: 0.9288057796772187, iteration: 209244
loss: 0.9901907444000244,grad_norm: 0.9999990724172071, iteration: 209245
loss: 1.00812828540802,grad_norm: 0.8826741620757127, iteration: 209246
loss: 1.0286210775375366,grad_norm: 0.8371868995437545, iteration: 209247
loss: 1.036765456199646,grad_norm: 0.9788898905888431, iteration: 209248
loss: 0.9560765027999878,grad_norm: 0.943260800621757, iteration: 209249
loss: 1.0169349908828735,grad_norm: 0.9999991045633064, iteration: 209250
loss: 1.020335078239441,grad_norm: 0.8442966926419195, iteration: 209251
loss: 0.9852045774459839,grad_norm: 0.8627621059244754, iteration: 209252
loss: 1.0450533628463745,grad_norm: 0.999999088460342, iteration: 209253
loss: 0.9788767695426941,grad_norm: 0.9377312421341983, iteration: 209254
loss: 1.005020260810852,grad_norm: 0.8816507997123464, iteration: 209255
loss: 0.9720528721809387,grad_norm: 0.8584271606311051, iteration: 209256
loss: 0.9940658211708069,grad_norm: 0.8228482527640779, iteration: 209257
loss: 1.0255749225616455,grad_norm: 0.8243512971632008, iteration: 209258
loss: 1.003910779953003,grad_norm: 0.9536767106512802, iteration: 209259
loss: 1.016831398010254,grad_norm: 0.9999990497754426, iteration: 209260
loss: 0.9877966046333313,grad_norm: 0.999999057452345, iteration: 209261
loss: 1.0271636247634888,grad_norm: 0.9999991429347125, iteration: 209262
loss: 1.0007634162902832,grad_norm: 0.7350956805380989, iteration: 209263
loss: 1.0020722150802612,grad_norm: 0.9548708810343219, iteration: 209264
loss: 1.019681453704834,grad_norm: 0.9144990085084624, iteration: 209265
loss: 0.9980642795562744,grad_norm: 0.8259860160940884, iteration: 209266
loss: 0.9884476661682129,grad_norm: 0.9160465986786561, iteration: 209267
loss: 0.9815292358398438,grad_norm: 0.8850077170300187, iteration: 209268
loss: 0.9904366135597229,grad_norm: 0.999999318530523, iteration: 209269
loss: 0.9762169122695923,grad_norm: 0.8931462374998772, iteration: 209270
loss: 1.0319364070892334,grad_norm: 0.9999991246020393, iteration: 209271
loss: 0.9568489193916321,grad_norm: 0.8578813560862697, iteration: 209272
loss: 1.0135893821716309,grad_norm: 0.9372770867904718, iteration: 209273
loss: 0.9998367428779602,grad_norm: 0.9999991347110391, iteration: 209274
loss: 0.95555579662323,grad_norm: 0.9015368931745115, iteration: 209275
loss: 0.9491804242134094,grad_norm: 0.8726959264854026, iteration: 209276
loss: 1.0087214708328247,grad_norm: 0.8560145234085332, iteration: 209277
loss: 1.0176881551742554,grad_norm: 0.9999989682085386, iteration: 209278
loss: 0.9641305208206177,grad_norm: 0.9925370468503175, iteration: 209279
loss: 1.0304017066955566,grad_norm: 0.9999990731141694, iteration: 209280
loss: 0.9941705465316772,grad_norm: 0.9418438375404227, iteration: 209281
loss: 1.0038856267929077,grad_norm: 0.7553212505681115, iteration: 209282
loss: 1.040473222732544,grad_norm: 0.9657525083821189, iteration: 209283
loss: 0.99526047706604,grad_norm: 0.9999991030483002, iteration: 209284
loss: 0.9963580965995789,grad_norm: 0.9999989772980614, iteration: 209285
loss: 0.9997817873954773,grad_norm: 0.9999990583634683, iteration: 209286
loss: 0.9998674988746643,grad_norm: 0.9296957435768298, iteration: 209287
loss: 0.9816147089004517,grad_norm: 0.8570591545701467, iteration: 209288
loss: 1.0188343524932861,grad_norm: 0.9999990429557557, iteration: 209289
loss: 0.9923532605171204,grad_norm: 0.815692049663746, iteration: 209290
loss: 1.0060322284698486,grad_norm: 0.8071040890912436, iteration: 209291
loss: 0.9903163909912109,grad_norm: 0.999999187095703, iteration: 209292
loss: 0.9782233834266663,grad_norm: 0.9911082524779864, iteration: 209293
loss: 1.0289828777313232,grad_norm: 0.7372059334845623, iteration: 209294
loss: 0.9981673955917358,grad_norm: 0.9502505823231876, iteration: 209295
loss: 0.9795840382575989,grad_norm: 0.9870700194905276, iteration: 209296
loss: 0.9763978719711304,grad_norm: 0.9468824562651504, iteration: 209297
loss: 1.000847578048706,grad_norm: 0.8185687805982919, iteration: 209298
loss: 1.0207247734069824,grad_norm: 0.9500321742334519, iteration: 209299
loss: 1.0022715330123901,grad_norm: 0.9394411514849491, iteration: 209300
loss: 1.0207446813583374,grad_norm: 0.9999989651085965, iteration: 209301
loss: 1.0074666738510132,grad_norm: 0.9872459439979829, iteration: 209302
loss: 0.9803402423858643,grad_norm: 0.9463740686772861, iteration: 209303
loss: 1.0507265329360962,grad_norm: 0.9293035250841116, iteration: 209304
loss: 0.9905750751495361,grad_norm: 0.8117195941484893, iteration: 209305
loss: 0.9934782385826111,grad_norm: 0.8671282047212218, iteration: 209306
loss: 1.0074596405029297,grad_norm: 0.9875896864103426, iteration: 209307
loss: 1.0026594400405884,grad_norm: 0.9999991303326572, iteration: 209308
loss: 0.989518404006958,grad_norm: 0.9999990296783581, iteration: 209309
loss: 0.9922483563423157,grad_norm: 0.7944487093589091, iteration: 209310
loss: 1.0157841444015503,grad_norm: 0.9078993818015724, iteration: 209311
loss: 1.0122687816619873,grad_norm: 0.9810869894645002, iteration: 209312
loss: 1.0277304649353027,grad_norm: 0.9999999251530037, iteration: 209313
loss: 0.9850868582725525,grad_norm: 0.9516568931500946, iteration: 209314
loss: 1.0307178497314453,grad_norm: 0.999998889966561, iteration: 209315
loss: 1.0236395597457886,grad_norm: 0.9300335931847379, iteration: 209316
loss: 0.9746195673942566,grad_norm: 0.9519807640089817, iteration: 209317
loss: 0.9861845970153809,grad_norm: 0.8298024238866949, iteration: 209318
loss: 1.105974793434143,grad_norm: 0.9999996547342659, iteration: 209319
loss: 1.0039420127868652,grad_norm: 0.9999995908076459, iteration: 209320
loss: 0.9932554960250854,grad_norm: 0.9999991941543458, iteration: 209321
loss: 1.0182386636734009,grad_norm: 0.999999233447919, iteration: 209322
loss: 1.0223091840744019,grad_norm: 0.8191066158946912, iteration: 209323
loss: 1.0272228717803955,grad_norm: 0.9138366967624929, iteration: 209324
loss: 0.9953837394714355,grad_norm: 0.9238819431003675, iteration: 209325
loss: 1.0285120010375977,grad_norm: 0.9999991754127605, iteration: 209326
loss: 1.0258617401123047,grad_norm: 0.9999994953487682, iteration: 209327
loss: 1.0273674726486206,grad_norm: 0.9999994388083818, iteration: 209328
loss: 0.9929839968681335,grad_norm: 0.9665331947244274, iteration: 209329
loss: 1.0115077495574951,grad_norm: 0.7882515038682694, iteration: 209330
loss: 1.0111706256866455,grad_norm: 0.9722216555030844, iteration: 209331
loss: 1.0040334463119507,grad_norm: 0.8052256429458851, iteration: 209332
loss: 1.0428836345672607,grad_norm: 0.9999991442703042, iteration: 209333
loss: 0.9911426901817322,grad_norm: 0.7839583367493724, iteration: 209334
loss: 1.0217374563217163,grad_norm: 0.9999991527120877, iteration: 209335
loss: 0.9796879887580872,grad_norm: 0.9017757029847141, iteration: 209336
loss: 1.0338214635849,grad_norm: 0.8348220343776516, iteration: 209337
loss: 1.0052348375320435,grad_norm: 0.8693527905230416, iteration: 209338
loss: 1.0097501277923584,grad_norm: 0.9999990503934952, iteration: 209339
loss: 0.9941508769989014,grad_norm: 0.9999990609509474, iteration: 209340
loss: 1.0198040008544922,grad_norm: 0.7964637557407562, iteration: 209341
loss: 1.0059814453125,grad_norm: 0.8227205661864301, iteration: 209342
loss: 0.9965041875839233,grad_norm: 0.9999996761182751, iteration: 209343
loss: 0.9879536628723145,grad_norm: 0.7909487160071014, iteration: 209344
loss: 1.025908350944519,grad_norm: 0.8290836059503807, iteration: 209345
loss: 1.0361990928649902,grad_norm: 0.9999990303689578, iteration: 209346
loss: 1.0404213666915894,grad_norm: 0.9999990559494236, iteration: 209347
loss: 1.0103739500045776,grad_norm: 0.9999989943795795, iteration: 209348
loss: 1.0205985307693481,grad_norm: 0.9044002050433156, iteration: 209349
loss: 0.9810123443603516,grad_norm: 0.9980855073281445, iteration: 209350
loss: 0.9629898071289062,grad_norm: 0.9999991092354147, iteration: 209351
loss: 0.9712338447570801,grad_norm: 0.917329118877107, iteration: 209352
loss: 1.0456434488296509,grad_norm: 0.9154258355738526, iteration: 209353
loss: 1.0146514177322388,grad_norm: 0.8845550359915498, iteration: 209354
loss: 0.9722375273704529,grad_norm: 0.8916366394442651, iteration: 209355
loss: 1.0121310949325562,grad_norm: 0.99999919071303, iteration: 209356
loss: 1.0459072589874268,grad_norm: 0.9598574287960318, iteration: 209357
loss: 0.9568411707878113,grad_norm: 0.8173889266556831, iteration: 209358
loss: 1.0128854513168335,grad_norm: 0.9999990826153388, iteration: 209359
loss: 0.9783114790916443,grad_norm: 0.9409668481979736, iteration: 209360
loss: 1.0210058689117432,grad_norm: 0.8282547896870814, iteration: 209361
loss: 0.9789273738861084,grad_norm: 0.959591580700146, iteration: 209362
loss: 1.09913170337677,grad_norm: 0.9999997060597812, iteration: 209363
loss: 0.9958052635192871,grad_norm: 0.8580336502490047, iteration: 209364
loss: 1.0298070907592773,grad_norm: 0.9999992921780894, iteration: 209365
loss: 0.9881709218025208,grad_norm: 0.9999992643524118, iteration: 209366
loss: 1.0214900970458984,grad_norm: 0.9999996432999038, iteration: 209367
loss: 0.9849980473518372,grad_norm: 0.9584388026845757, iteration: 209368
loss: 0.9998929500579834,grad_norm: 0.9999992627256016, iteration: 209369
loss: 0.9958842396736145,grad_norm: 0.9999990839885906, iteration: 209370
loss: 0.9680010080337524,grad_norm: 0.9238687862406173, iteration: 209371
loss: 0.985692024230957,grad_norm: 0.9605915921714815, iteration: 209372
loss: 1.0410704612731934,grad_norm: 0.9999989368428085, iteration: 209373
loss: 0.9978837370872498,grad_norm: 0.9455415348341273, iteration: 209374
loss: 0.9736651182174683,grad_norm: 0.9999992577390647, iteration: 209375
loss: 0.9499030113220215,grad_norm: 0.937348335881804, iteration: 209376
loss: 0.9974817633628845,grad_norm: 0.9999989572452371, iteration: 209377
loss: 0.9869680404663086,grad_norm: 0.9061260805607154, iteration: 209378
loss: 0.9772882461547852,grad_norm: 0.9999991291265353, iteration: 209379
loss: 0.989793598651886,grad_norm: 0.9999990902512177, iteration: 209380
loss: 0.9890891313552856,grad_norm: 0.8186566768561017, iteration: 209381
loss: 1.0033429861068726,grad_norm: 0.9696248455873685, iteration: 209382
loss: 0.9747991561889648,grad_norm: 0.8859669363518425, iteration: 209383
loss: 0.9626243710517883,grad_norm: 0.9999991308000462, iteration: 209384
loss: 1.0189722776412964,grad_norm: 0.9700918624346169, iteration: 209385
loss: 1.034500002861023,grad_norm: 0.9999989304786784, iteration: 209386
loss: 1.0200674533843994,grad_norm: 0.9999989965913075, iteration: 209387
loss: 1.0232807397842407,grad_norm: 0.7888957849842426, iteration: 209388
loss: 1.0404118299484253,grad_norm: 0.9999996102341283, iteration: 209389
loss: 1.0133494138717651,grad_norm: 0.9999991647514872, iteration: 209390
loss: 1.014288306236267,grad_norm: 0.795097186151271, iteration: 209391
loss: 1.0365118980407715,grad_norm: 0.9999996953382282, iteration: 209392
loss: 0.9702346920967102,grad_norm: 0.9342126784239966, iteration: 209393
loss: 1.0021162033081055,grad_norm: 0.9842717946541756, iteration: 209394
loss: 0.9785836935043335,grad_norm: 0.8407951293247202, iteration: 209395
loss: 0.999172031879425,grad_norm: 0.9999991447542875, iteration: 209396
loss: 0.9781123995780945,grad_norm: 0.9630121831031714, iteration: 209397
loss: 1.013572096824646,grad_norm: 0.8633394592433004, iteration: 209398
loss: 1.0160146951675415,grad_norm: 0.9642946950521307, iteration: 209399
loss: 0.9961543083190918,grad_norm: 0.9999991583629348, iteration: 209400
loss: 1.007212519645691,grad_norm: 0.9335395726841657, iteration: 209401
loss: 1.003244161605835,grad_norm: 0.9632572542772141, iteration: 209402
loss: 1.0213459730148315,grad_norm: 0.8663625939380768, iteration: 209403
loss: 1.0029879808425903,grad_norm: 0.9999991312010629, iteration: 209404
loss: 1.012153148651123,grad_norm: 0.9295266496057947, iteration: 209405
loss: 1.0208479166030884,grad_norm: 0.9389763155218567, iteration: 209406
loss: 0.9607822895050049,grad_norm: 0.9999990036448902, iteration: 209407
loss: 0.9988937377929688,grad_norm: 0.9999991787418278, iteration: 209408
loss: 0.9931656718254089,grad_norm: 0.8652029600149561, iteration: 209409
loss: 1.0135736465454102,grad_norm: 0.7208891768182515, iteration: 209410
loss: 1.0140128135681152,grad_norm: 0.9999991267549585, iteration: 209411
loss: 1.0262115001678467,grad_norm: 0.8533794280656786, iteration: 209412
loss: 1.040022611618042,grad_norm: 0.9999991691995622, iteration: 209413
loss: 1.0536848306655884,grad_norm: 0.8795825896564085, iteration: 209414
loss: 0.9906303286552429,grad_norm: 0.8798682826235142, iteration: 209415
loss: 0.9875063896179199,grad_norm: 0.9999992160868773, iteration: 209416
loss: 0.9590468406677246,grad_norm: 0.8839113496382017, iteration: 209417
loss: 1.0084245204925537,grad_norm: 0.8849914031282646, iteration: 209418
loss: 0.9975138902664185,grad_norm: 0.9824269734674186, iteration: 209419
loss: 0.9761021733283997,grad_norm: 0.939964514229393, iteration: 209420
loss: 1.0366829633712769,grad_norm: 0.958445795479792, iteration: 209421
loss: 0.9956482648849487,grad_norm: 0.9910411639297084, iteration: 209422
loss: 1.0002976655960083,grad_norm: 0.892171866900625, iteration: 209423
loss: 1.0047565698623657,grad_norm: 0.909650079942583, iteration: 209424
loss: 1.0234228372573853,grad_norm: 0.9999990956060754, iteration: 209425
loss: 0.9969311356544495,grad_norm: 0.8610858098048948, iteration: 209426
loss: 1.0194594860076904,grad_norm: 0.9200715160451869, iteration: 209427
loss: 1.016971468925476,grad_norm: 0.8295770188067585, iteration: 209428
loss: 0.9846014380455017,grad_norm: 0.8427492889773186, iteration: 209429
loss: 1.0135223865509033,grad_norm: 0.9999990557473737, iteration: 209430
loss: 0.980572521686554,grad_norm: 0.9872525716037572, iteration: 209431
loss: 0.9761212468147278,grad_norm: 0.999999176426988, iteration: 209432
loss: 0.9991597533226013,grad_norm: 0.9198299152187565, iteration: 209433
loss: 0.9736196398735046,grad_norm: 0.9098390834230401, iteration: 209434
loss: 0.9919815063476562,grad_norm: 0.9999990607000323, iteration: 209435
loss: 1.001366138458252,grad_norm: 0.8547378024234863, iteration: 209436
loss: 0.9818088412284851,grad_norm: 0.9875139699510065, iteration: 209437
loss: 1.0390467643737793,grad_norm: 0.9999992682488021, iteration: 209438
loss: 1.022570252418518,grad_norm: 0.9999992130432488, iteration: 209439
loss: 0.9862313866615295,grad_norm: 0.9939609078775931, iteration: 209440
loss: 1.0267070531845093,grad_norm: 0.9788469206083159, iteration: 209441
loss: 0.977724015712738,grad_norm: 0.9999991820740501, iteration: 209442
loss: 1.1003837585449219,grad_norm: 0.9930810853626431, iteration: 209443
loss: 0.9972755908966064,grad_norm: 0.7666110517019501, iteration: 209444
loss: 1.0033655166625977,grad_norm: 0.9066796910879625, iteration: 209445
loss: 1.0252879858016968,grad_norm: 0.9999992502836847, iteration: 209446
loss: 0.9917512536048889,grad_norm: 0.8325875867159584, iteration: 209447
loss: 0.9860706925392151,grad_norm: 0.8933127431977443, iteration: 209448
loss: 0.9599527716636658,grad_norm: 0.9680971591474784, iteration: 209449
loss: 0.9689237475395203,grad_norm: 0.9999992236547294, iteration: 209450
loss: 0.9809117913246155,grad_norm: 0.9999992250617347, iteration: 209451
loss: 0.9686017036437988,grad_norm: 0.9448398411414539, iteration: 209452
loss: 1.0257484912872314,grad_norm: 0.9999991795800349, iteration: 209453
loss: 0.9680280089378357,grad_norm: 0.9999990642057557, iteration: 209454
loss: 1.0090643167495728,grad_norm: 0.9999991312458254, iteration: 209455
loss: 1.1155930757522583,grad_norm: 0.9999999555673262, iteration: 209456
loss: 0.9729481339454651,grad_norm: 0.9999991868428542, iteration: 209457
loss: 0.997425377368927,grad_norm: 0.9999991836295118, iteration: 209458
loss: 0.961061418056488,grad_norm: 0.7246360719579347, iteration: 209459
loss: 1.0164361000061035,grad_norm: 0.9973108794298366, iteration: 209460
loss: 1.0239365100860596,grad_norm: 0.9959344391857424, iteration: 209461
loss: 1.0474425554275513,grad_norm: 0.9999991363688641, iteration: 209462
loss: 1.1468809843063354,grad_norm: 0.999999875349103, iteration: 209463
loss: 1.0057427883148193,grad_norm: 0.8926729165685477, iteration: 209464
loss: 0.9805706739425659,grad_norm: 0.9999993507895107, iteration: 209465
loss: 0.9943107962608337,grad_norm: 0.8490445812361226, iteration: 209466
loss: 1.1613576412200928,grad_norm: 0.9999994958941777, iteration: 209467
loss: 1.0221081972122192,grad_norm: 0.8961088541077141, iteration: 209468
loss: 1.16327965259552,grad_norm: 0.9999991392246055, iteration: 209469
loss: 0.9847965240478516,grad_norm: 0.86243588662054, iteration: 209470
loss: 1.0946383476257324,grad_norm: 0.9999989359034976, iteration: 209471
loss: 0.9992430806159973,grad_norm: 0.9999989442796632, iteration: 209472
loss: 1.067712664604187,grad_norm: 0.9999994761177663, iteration: 209473
loss: 1.0411030054092407,grad_norm: 0.9999990101322007, iteration: 209474
loss: 0.9765936732292175,grad_norm: 0.9999990793784234, iteration: 209475
loss: 1.0108821392059326,grad_norm: 0.8362695940197478, iteration: 209476
loss: 1.011615514755249,grad_norm: 0.9728956724628206, iteration: 209477
loss: 0.9815937876701355,grad_norm: 0.8442719717497342, iteration: 209478
loss: 1.0252877473831177,grad_norm: 0.9592110038781183, iteration: 209479
loss: 1.0795425176620483,grad_norm: 0.9999991907014791, iteration: 209480
loss: 1.0688960552215576,grad_norm: 0.9999998417880582, iteration: 209481
loss: 1.0785235166549683,grad_norm: 0.9999992691229135, iteration: 209482
loss: 0.9985559582710266,grad_norm: 0.9117817516308042, iteration: 209483
loss: 1.0163443088531494,grad_norm: 0.8716810729019728, iteration: 209484
loss: 0.9871283173561096,grad_norm: 0.9190546301239485, iteration: 209485
loss: 0.9581142067909241,grad_norm: 0.9386502572619144, iteration: 209486
loss: 1.0331212282180786,grad_norm: 0.999999148142407, iteration: 209487
loss: 0.9961124658584595,grad_norm: 0.8286876772097205, iteration: 209488
loss: 0.9943075776100159,grad_norm: 0.9471205172076425, iteration: 209489
loss: 1.0402601957321167,grad_norm: 0.9999992086081455, iteration: 209490
loss: 0.9849850535392761,grad_norm: 0.8823478721304517, iteration: 209491
loss: 1.0032089948654175,grad_norm: 0.9255622920197796, iteration: 209492
loss: 0.9547680616378784,grad_norm: 0.9999990468804838, iteration: 209493
loss: 1.0885424613952637,grad_norm: 0.9999998071678738, iteration: 209494
loss: 1.0543428659439087,grad_norm: 0.9999995592861739, iteration: 209495
loss: 1.1069246530532837,grad_norm: 0.9999992098334072, iteration: 209496
loss: 0.9967679381370544,grad_norm: 0.9999991518174679, iteration: 209497
loss: 1.0411409139633179,grad_norm: 0.9999992607111404, iteration: 209498
loss: 1.0173510313034058,grad_norm: 0.9407153210258855, iteration: 209499
loss: 0.9935522675514221,grad_norm: 0.9999992599694228, iteration: 209500
loss: 0.9831112623214722,grad_norm: 0.999999278235016, iteration: 209501
loss: 0.9813210368156433,grad_norm: 0.9999991194034819, iteration: 209502
loss: 0.9939126968383789,grad_norm: 0.9999991471554336, iteration: 209503
loss: 1.0279275178909302,grad_norm: 0.999999156295033, iteration: 209504
loss: 1.11754310131073,grad_norm: 0.9999994511218878, iteration: 209505
loss: 1.0612925291061401,grad_norm: 0.9999998149078535, iteration: 209506
loss: 1.0062679052352905,grad_norm: 0.9999991982007627, iteration: 209507
loss: 1.0149338245391846,grad_norm: 0.9999997382298088, iteration: 209508
loss: 1.0089575052261353,grad_norm: 0.8392816871067567, iteration: 209509
loss: 1.0322773456573486,grad_norm: 0.8537147866536, iteration: 209510
loss: 1.011555552482605,grad_norm: 0.9367558677674668, iteration: 209511
loss: 0.9907832741737366,grad_norm: 0.7626286638001816, iteration: 209512
loss: 1.0115447044372559,grad_norm: 0.9999994616367054, iteration: 209513
loss: 1.1125340461730957,grad_norm: 0.999999394975092, iteration: 209514
loss: 0.9769114851951599,grad_norm: 0.9999990693622821, iteration: 209515
loss: 0.9795770645141602,grad_norm: 0.8370951686394188, iteration: 209516
loss: 1.0306429862976074,grad_norm: 0.999999367010306, iteration: 209517
loss: 0.984182596206665,grad_norm: 0.9075344855662525, iteration: 209518
loss: 1.0264852046966553,grad_norm: 0.9999989943112801, iteration: 209519
loss: 1.0039787292480469,grad_norm: 0.8877963447043653, iteration: 209520
loss: 0.9842715263366699,grad_norm: 0.8473895980226368, iteration: 209521
loss: 1.0127136707305908,grad_norm: 0.9999992465119562, iteration: 209522
loss: 0.9991165399551392,grad_norm: 0.9999993194287727, iteration: 209523
loss: 0.9976349472999573,grad_norm: 0.9999992711735322, iteration: 209524
loss: 0.9960638880729675,grad_norm: 0.9999996636165679, iteration: 209525
loss: 1.024056315422058,grad_norm: 0.99999945595122, iteration: 209526
loss: 1.051342487335205,grad_norm: 0.9999998408554042, iteration: 209527
loss: 0.983609676361084,grad_norm: 0.8170503951127213, iteration: 209528
loss: 1.0061713457107544,grad_norm: 0.9999990210144735, iteration: 209529
loss: 0.9913010597229004,grad_norm: 0.999999070335759, iteration: 209530
loss: 1.0309808254241943,grad_norm: 0.9999990460502097, iteration: 209531
loss: 1.0096936225891113,grad_norm: 0.9556445760648272, iteration: 209532
loss: 1.0308700799942017,grad_norm: 0.9999992604126562, iteration: 209533
loss: 0.9940363168716431,grad_norm: 0.9999991477704889, iteration: 209534
loss: 1.0183923244476318,grad_norm: 0.9945712925368216, iteration: 209535
loss: 0.9919883012771606,grad_norm: 0.9999992163035296, iteration: 209536
loss: 1.126784086227417,grad_norm: 0.9999994462406611, iteration: 209537
loss: 0.9689307808876038,grad_norm: 0.999998979684036, iteration: 209538
loss: 1.007812261581421,grad_norm: 0.9999991265635921, iteration: 209539
loss: 0.9790359139442444,grad_norm: 0.900167385542613, iteration: 209540
loss: 1.0721561908721924,grad_norm: 0.9999992177554256, iteration: 209541
loss: 0.9623229503631592,grad_norm: 0.9999991877127448, iteration: 209542
loss: 1.1383531093597412,grad_norm: 0.9999994113343167, iteration: 209543
loss: 1.0141726732254028,grad_norm: 0.9999991638400731, iteration: 209544
loss: 1.0359255075454712,grad_norm: 0.9698953670956812, iteration: 209545
loss: 0.9850453734397888,grad_norm: 0.9708048311256309, iteration: 209546
loss: 1.0230238437652588,grad_norm: 0.9999992647356235, iteration: 209547
loss: 0.9994043707847595,grad_norm: 0.8970312058672278, iteration: 209548
loss: 0.9975059628486633,grad_norm: 0.8247597948364653, iteration: 209549
loss: 0.9874114394187927,grad_norm: 0.9999991362939971, iteration: 209550
loss: 1.0054144859313965,grad_norm: 0.908830710966165, iteration: 209551
loss: 1.0245872735977173,grad_norm: 0.9999990630031282, iteration: 209552
loss: 0.9810735583305359,grad_norm: 0.9999991030483782, iteration: 209553
loss: 1.0131114721298218,grad_norm: 0.924991151075823, iteration: 209554
loss: 0.9837942123413086,grad_norm: 0.9999992520623385, iteration: 209555
loss: 1.037402868270874,grad_norm: 0.999999178350074, iteration: 209556
loss: 0.9764954447746277,grad_norm: 0.96497848664193, iteration: 209557
loss: 1.0471998453140259,grad_norm: 0.9999991224973197, iteration: 209558
loss: 1.0417959690093994,grad_norm: 0.9999990106752149, iteration: 209559
loss: 1.0235432386398315,grad_norm: 0.9576672734687207, iteration: 209560
loss: 0.9833891987800598,grad_norm: 0.8179532931965867, iteration: 209561
loss: 1.039475917816162,grad_norm: 0.772184251467557, iteration: 209562
loss: 0.987030565738678,grad_norm: 0.9975294065394917, iteration: 209563
loss: 1.0179160833358765,grad_norm: 0.9999990277325779, iteration: 209564
loss: 1.0158755779266357,grad_norm: 0.935588305631308, iteration: 209565
loss: 1.0430182218551636,grad_norm: 0.9999998807691507, iteration: 209566
loss: 0.979343056678772,grad_norm: 0.8905472122879872, iteration: 209567
loss: 0.9759281277656555,grad_norm: 0.9562173396703466, iteration: 209568
loss: 0.9884583950042725,grad_norm: 0.9431230972597142, iteration: 209569
loss: 0.9767985343933105,grad_norm: 0.9999998306751983, iteration: 209570
loss: 1.0374269485473633,grad_norm: 0.9070392644949865, iteration: 209571
loss: 1.0188337564468384,grad_norm: 0.9358745787684539, iteration: 209572
loss: 0.9739908576011658,grad_norm: 0.8547935271141797, iteration: 209573
loss: 1.0342241525650024,grad_norm: 0.8268367873207928, iteration: 209574
loss: 1.0285285711288452,grad_norm: 0.8380173196907158, iteration: 209575
loss: 1.0026377439498901,grad_norm: 0.9684411851946549, iteration: 209576
loss: 0.987801194190979,grad_norm: 0.9999991212677907, iteration: 209577
loss: 1.0029141902923584,grad_norm: 0.9999991639465442, iteration: 209578
loss: 1.0080398321151733,grad_norm: 0.9999989817446154, iteration: 209579
loss: 1.0015467405319214,grad_norm: 0.9224203329895639, iteration: 209580
loss: 1.0279561281204224,grad_norm: 0.9999991230517202, iteration: 209581
loss: 1.0631181001663208,grad_norm: 0.99999909675001, iteration: 209582
loss: 0.9917111992835999,grad_norm: 0.9297465262358268, iteration: 209583
loss: 1.0244760513305664,grad_norm: 0.9999997174963007, iteration: 209584
loss: 0.9892547726631165,grad_norm: 0.9945516320739608, iteration: 209585
loss: 1.0352683067321777,grad_norm: 0.8298094398998458, iteration: 209586
loss: 1.0805540084838867,grad_norm: 0.9999994732379833, iteration: 209587
loss: 1.054366111755371,grad_norm: 0.9999991508050102, iteration: 209588
loss: 1.0014846324920654,grad_norm: 0.8007472648307085, iteration: 209589
loss: 1.0054537057876587,grad_norm: 0.999999133231531, iteration: 209590
loss: 0.9962654113769531,grad_norm: 0.9999992395804188, iteration: 209591
loss: 0.9814802408218384,grad_norm: 0.9999999620254465, iteration: 209592
loss: 1.019973635673523,grad_norm: 0.8940509572432751, iteration: 209593
loss: 1.0004627704620361,grad_norm: 0.999999100346422, iteration: 209594
loss: 1.0156927108764648,grad_norm: 0.9953918342669577, iteration: 209595
loss: 0.9729198217391968,grad_norm: 0.9999990267740296, iteration: 209596
loss: 0.973749041557312,grad_norm: 0.8603385908776059, iteration: 209597
loss: 0.9764037132263184,grad_norm: 0.999999114019225, iteration: 209598
loss: 1.002569317817688,grad_norm: 0.9742171516951019, iteration: 209599
loss: 0.9928109049797058,grad_norm: 0.9741176544562768, iteration: 209600
loss: 0.9864168763160706,grad_norm: 0.9372462859410086, iteration: 209601
loss: 1.1312154531478882,grad_norm: 0.9713732513060112, iteration: 209602
loss: 1.0214225053787231,grad_norm: 0.8741197694321379, iteration: 209603
loss: 0.9815071225166321,grad_norm: 0.9721815282765465, iteration: 209604
loss: 1.0073078870773315,grad_norm: 0.9610807304995518, iteration: 209605
loss: 1.021249532699585,grad_norm: 0.9268441000058548, iteration: 209606
loss: 0.9773332476615906,grad_norm: 0.899456558253869, iteration: 209607
loss: 0.9892118573188782,grad_norm: 0.9999990770182027, iteration: 209608
loss: 1.0114775896072388,grad_norm: 0.6999258626926319, iteration: 209609
loss: 1.0228415727615356,grad_norm: 0.9383389415072731, iteration: 209610
loss: 0.9964753985404968,grad_norm: 0.8520990448011222, iteration: 209611
loss: 1.0380446910858154,grad_norm: 0.9999993357593028, iteration: 209612
loss: 0.9580013155937195,grad_norm: 0.9999989986641246, iteration: 209613
loss: 0.9777620434761047,grad_norm: 0.9144640922444376, iteration: 209614
loss: 1.0075678825378418,grad_norm: 0.8162521157308367, iteration: 209615
loss: 1.0152194499969482,grad_norm: 0.9741500253155772, iteration: 209616
loss: 0.960912823677063,grad_norm: 0.8883156598338036, iteration: 209617
loss: 1.0195164680480957,grad_norm: 0.8305804835220809, iteration: 209618
loss: 1.026518702507019,grad_norm: 0.9741857116243162, iteration: 209619
loss: 0.9733335375785828,grad_norm: 0.9999992457858128, iteration: 209620
loss: 0.9682056903839111,grad_norm: 0.945196071574764, iteration: 209621
loss: 0.9978079199790955,grad_norm: 0.8639331131405487, iteration: 209622
loss: 0.9967172145843506,grad_norm: 0.9999990031014045, iteration: 209623
loss: 1.0107721090316772,grad_norm: 0.9887634750956867, iteration: 209624
loss: 0.9689110517501831,grad_norm: 0.9999990977170987, iteration: 209625
loss: 1.0243178606033325,grad_norm: 0.9999994723572745, iteration: 209626
loss: 0.9959074258804321,grad_norm: 0.9999992579939252, iteration: 209627
loss: 0.9697651267051697,grad_norm: 0.9999991450412116, iteration: 209628
loss: 1.0078085660934448,grad_norm: 0.9999991672385755, iteration: 209629
loss: 0.9879360795021057,grad_norm: 0.8285975750901404, iteration: 209630
loss: 0.980138897895813,grad_norm: 0.9999992281363885, iteration: 209631
loss: 1.041761040687561,grad_norm: 0.9999990424249063, iteration: 209632
loss: 0.9582618474960327,grad_norm: 0.9270481057706185, iteration: 209633
loss: 1.0148310661315918,grad_norm: 0.9999990385128926, iteration: 209634
loss: 0.9800777435302734,grad_norm: 0.9652725894750377, iteration: 209635
loss: 1.028812289237976,grad_norm: 0.999999213501045, iteration: 209636
loss: 0.9877046942710876,grad_norm: 0.9999991068386671, iteration: 209637
loss: 0.9924719929695129,grad_norm: 0.9999990376455742, iteration: 209638
loss: 0.9841553568840027,grad_norm: 0.7772917433779095, iteration: 209639
loss: 1.006032109260559,grad_norm: 0.9673285082763976, iteration: 209640
loss: 1.0155154466629028,grad_norm: 0.8627035887569388, iteration: 209641
loss: 0.9937950968742371,grad_norm: 0.9999998173856144, iteration: 209642
loss: 0.9907781481742859,grad_norm: 0.8494732457408508, iteration: 209643
loss: 0.9893836379051208,grad_norm: 0.9356752893259164, iteration: 209644
loss: 0.9824277758598328,grad_norm: 0.9603772349841695, iteration: 209645
loss: 1.0136208534240723,grad_norm: 0.8733000239795656, iteration: 209646
loss: 1.0414131879806519,grad_norm: 0.8735811225472211, iteration: 209647
loss: 0.9547274708747864,grad_norm: 0.834608461890592, iteration: 209648
loss: 1.0050325393676758,grad_norm: 0.9999990865432822, iteration: 209649
loss: 1.053017497062683,grad_norm: 0.9217096729705607, iteration: 209650
loss: 0.9967754483222961,grad_norm: 0.9999992753450581, iteration: 209651
loss: 1.0603395700454712,grad_norm: 0.8735160261107201, iteration: 209652
loss: 1.0326802730560303,grad_norm: 0.9417231549642415, iteration: 209653
loss: 0.9916740655899048,grad_norm: 0.9999991142816166, iteration: 209654
loss: 1.0355690717697144,grad_norm: 0.9493536555987873, iteration: 209655
loss: 0.9958202242851257,grad_norm: 1.0000000198619514, iteration: 209656
loss: 1.0052897930145264,grad_norm: 1.0000000027401892, iteration: 209657
loss: 1.0297335386276245,grad_norm: 0.8384364764705071, iteration: 209658
loss: 0.996303141117096,grad_norm: 0.9999995558316568, iteration: 209659
loss: 0.9841070771217346,grad_norm: 0.913991910889937, iteration: 209660
loss: 0.9982765913009644,grad_norm: 0.9999990319969273, iteration: 209661
loss: 1.0026437044143677,grad_norm: 0.96038943084639, iteration: 209662
loss: 0.9438992142677307,grad_norm: 0.8527231934689091, iteration: 209663
loss: 0.9996225833892822,grad_norm: 0.9233790041150629, iteration: 209664
loss: 1.0192633867263794,grad_norm: 0.9999991717239002, iteration: 209665
loss: 0.9936352968215942,grad_norm: 0.999999281602866, iteration: 209666
loss: 1.0389286279678345,grad_norm: 0.9874770993646962, iteration: 209667
loss: 0.970665454864502,grad_norm: 0.9035401928167957, iteration: 209668
loss: 1.0413672924041748,grad_norm: 0.836327604623098, iteration: 209669
loss: 1.0246410369873047,grad_norm: 0.8831839053181603, iteration: 209670
loss: 0.9625467658042908,grad_norm: 0.9848487448402525, iteration: 209671
loss: 1.0612993240356445,grad_norm: 0.9949114668005173, iteration: 209672
loss: 0.9810232520103455,grad_norm: 0.9999990418837575, iteration: 209673
loss: 0.9985746741294861,grad_norm: 0.9099914942893531, iteration: 209674
loss: 0.9860753417015076,grad_norm: 0.951638872143781, iteration: 209675
loss: 0.9854987859725952,grad_norm: 0.9999990883876221, iteration: 209676
loss: 1.0939944982528687,grad_norm: 0.9999995775900709, iteration: 209677
loss: 1.0017831325531006,grad_norm: 0.9999998793196296, iteration: 209678
loss: 0.9839609861373901,grad_norm: 0.8613079936456847, iteration: 209679
loss: 1.0496492385864258,grad_norm: 0.9999990283225039, iteration: 209680
loss: 0.9986463189125061,grad_norm: 0.9660572320761317, iteration: 209681
loss: 1.0022023916244507,grad_norm: 0.9999993558939656, iteration: 209682
loss: 1.0039297342300415,grad_norm: 0.9999990282002871, iteration: 209683
loss: 1.0251842737197876,grad_norm: 0.9999993243463227, iteration: 209684
loss: 1.026696801185608,grad_norm: 0.9999992715130012, iteration: 209685
loss: 0.9797230362892151,grad_norm: 0.9127716173010059, iteration: 209686
loss: 1.0006870031356812,grad_norm: 0.9999990983966051, iteration: 209687
loss: 1.0246756076812744,grad_norm: 0.9999989846835391, iteration: 209688
loss: 1.0079069137573242,grad_norm: 0.9284719940838724, iteration: 209689
loss: 0.9985741376876831,grad_norm: 0.9193023441566889, iteration: 209690
loss: 1.027036428451538,grad_norm: 0.9101929398028915, iteration: 209691
loss: 0.9895157217979431,grad_norm: 0.7781490006888877, iteration: 209692
loss: 0.9962556958198547,grad_norm: 0.9636811059364409, iteration: 209693
loss: 0.9758132696151733,grad_norm: 0.9999990743735098, iteration: 209694
loss: 1.007875919342041,grad_norm: 0.7841413306333486, iteration: 209695
loss: 1.01069974899292,grad_norm: 0.999491521149776, iteration: 209696
loss: 1.0375016927719116,grad_norm: 0.9999991026554631, iteration: 209697
loss: 1.0103154182434082,grad_norm: 0.9999993834335854, iteration: 209698
loss: 1.00105881690979,grad_norm: 0.9999991535384528, iteration: 209699
loss: 0.990516722202301,grad_norm: 0.9186496366089446, iteration: 209700
loss: 1.012073278427124,grad_norm: 0.9999990614276987, iteration: 209701
loss: 1.0254967212677002,grad_norm: 0.9528464630808892, iteration: 209702
loss: 1.009598970413208,grad_norm: 0.9480022953158015, iteration: 209703
loss: 0.9811140298843384,grad_norm: 0.9019074443749932, iteration: 209704
loss: 0.969577431678772,grad_norm: 0.9045806390653671, iteration: 209705
loss: 1.006208896636963,grad_norm: 0.9666116720842844, iteration: 209706
loss: 1.0122599601745605,grad_norm: 0.9107934115032001, iteration: 209707
loss: 0.9850975871086121,grad_norm: 0.742857314707514, iteration: 209708
loss: 1.0248054265975952,grad_norm: 0.9999992013691646, iteration: 209709
loss: 0.9492040872573853,grad_norm: 0.9293793492508515, iteration: 209710
loss: 1.0575499534606934,grad_norm: 0.9999989968387117, iteration: 209711
loss: 1.0603916645050049,grad_norm: 0.9999991577930197, iteration: 209712
loss: 1.0187169313430786,grad_norm: 0.8768515049983504, iteration: 209713
loss: 1.006022334098816,grad_norm: 0.9196105179603061, iteration: 209714
loss: 0.996071457862854,grad_norm: 0.9402941926711214, iteration: 209715
loss: 1.0000578165054321,grad_norm: 0.999999207885535, iteration: 209716
loss: 0.9853859543800354,grad_norm: 0.8267762310428222, iteration: 209717
loss: 1.018252968788147,grad_norm: 0.8181945899891202, iteration: 209718
loss: 0.9855079054832458,grad_norm: 0.9835702625265822, iteration: 209719
loss: 0.993088960647583,grad_norm: 0.9999991791913739, iteration: 209720
loss: 1.0191837549209595,grad_norm: 0.9578786621667911, iteration: 209721
loss: 1.0141435861587524,grad_norm: 0.9765035931145027, iteration: 209722
loss: 0.9949043989181519,grad_norm: 0.9628748510975599, iteration: 209723
loss: 0.9750989079475403,grad_norm: 0.8529013208631523, iteration: 209724
loss: 1.0032340288162231,grad_norm: 0.7923438559050695, iteration: 209725
loss: 0.9824539422988892,grad_norm: 0.8765993010104092, iteration: 209726
loss: 1.0459222793579102,grad_norm: 0.9934647944381918, iteration: 209727
loss: 0.9674744009971619,grad_norm: 0.8022243872739374, iteration: 209728
loss: 1.0237239599227905,grad_norm: 0.9999992008291569, iteration: 209729
loss: 1.0128215551376343,grad_norm: 0.9999990608413146, iteration: 209730
loss: 0.9866533875465393,grad_norm: 0.9091192035824126, iteration: 209731
loss: 0.9663069844245911,grad_norm: 0.873581088677538, iteration: 209732
loss: 1.0284851789474487,grad_norm: 0.9087169090720805, iteration: 209733
loss: 0.962399423122406,grad_norm: 0.9566323699907784, iteration: 209734
loss: 1.008299708366394,grad_norm: 0.9999990072088921, iteration: 209735
loss: 0.9874973893165588,grad_norm: 0.9999990096303131, iteration: 209736
loss: 1.019210934638977,grad_norm: 0.926187886906365, iteration: 209737
loss: 1.0080609321594238,grad_norm: 0.8183735279747782, iteration: 209738
loss: 1.0094579458236694,grad_norm: 0.9574369199163733, iteration: 209739
loss: 1.0260004997253418,grad_norm: 0.9293838227956669, iteration: 209740
loss: 0.9977645874023438,grad_norm: 0.9999992056087865, iteration: 209741
loss: 1.0074434280395508,grad_norm: 0.9091161807536576, iteration: 209742
loss: 0.9743643403053284,grad_norm: 0.9999989987077841, iteration: 209743
loss: 0.9793323278427124,grad_norm: 0.9325090871239617, iteration: 209744
loss: 0.9896077513694763,grad_norm: 0.9750746886453253, iteration: 209745
loss: 1.005253553390503,grad_norm: 0.9999990934296876, iteration: 209746
loss: 1.0048879384994507,grad_norm: 0.9089537015369255, iteration: 209747
loss: 0.9606060981750488,grad_norm: 0.9738342028731687, iteration: 209748
loss: 1.008744716644287,grad_norm: 0.9754875855035112, iteration: 209749
loss: 1.027594804763794,grad_norm: 0.880196427744966, iteration: 209750
loss: 1.0119980573654175,grad_norm: 0.9999993174739322, iteration: 209751
loss: 0.9949432611465454,grad_norm: 0.9999990757646428, iteration: 209752
loss: 1.0239348411560059,grad_norm: 0.9213302180125544, iteration: 209753
loss: 1.0152400732040405,grad_norm: 0.9255028626356189, iteration: 209754
loss: 1.0001206398010254,grad_norm: 0.999999128437632, iteration: 209755
loss: 1.0173920392990112,grad_norm: 0.9753030571629321, iteration: 209756
loss: 1.0270750522613525,grad_norm: 0.8867475130041974, iteration: 209757
loss: 1.0128816366195679,grad_norm: 0.9999992947608907, iteration: 209758
loss: 0.9898054599761963,grad_norm: 0.9745493832353572, iteration: 209759
loss: 0.9773157238960266,grad_norm: 0.7432532336165696, iteration: 209760
loss: 1.0226216316223145,grad_norm: 0.9220947594547131, iteration: 209761
loss: 0.9938069581985474,grad_norm: 0.9999991863465443, iteration: 209762
loss: 1.0050990581512451,grad_norm: 0.8913017346874188, iteration: 209763
loss: 1.016595721244812,grad_norm: 0.7848309936698238, iteration: 209764
loss: 1.035736322402954,grad_norm: 0.9220426552454407, iteration: 209765
loss: 0.9674434661865234,grad_norm: 0.9282090952246069, iteration: 209766
loss: 1.0148588418960571,grad_norm: 0.9027764638017871, iteration: 209767
loss: 1.03424870967865,grad_norm: 0.9654670645026378, iteration: 209768
loss: 0.991614580154419,grad_norm: 0.9999992489057259, iteration: 209769
loss: 0.999398410320282,grad_norm: 0.9999988775853218, iteration: 209770
loss: 1.011906623840332,grad_norm: 0.9999992335573409, iteration: 209771
loss: 0.9499026536941528,grad_norm: 0.9999991504980877, iteration: 209772
loss: 1.009706735610962,grad_norm: 0.9999994433146319, iteration: 209773
loss: 1.011093258857727,grad_norm: 0.9020351976532548, iteration: 209774
loss: 1.0191638469696045,grad_norm: 0.878791485982008, iteration: 209775
loss: 0.9728419780731201,grad_norm: 0.9007918276162977, iteration: 209776
loss: 1.0010881423950195,grad_norm: 0.849414912668192, iteration: 209777
loss: 0.9819715023040771,grad_norm: 0.8308782808773246, iteration: 209778
loss: 1.0130023956298828,grad_norm: 0.9186510946607831, iteration: 209779
loss: 0.9942559003829956,grad_norm: 0.999999090416092, iteration: 209780
loss: 0.9633450508117676,grad_norm: 0.9504918960869043, iteration: 209781
loss: 1.013384222984314,grad_norm: 0.9999990120047464, iteration: 209782
loss: 1.0146398544311523,grad_norm: 0.871784426512253, iteration: 209783
loss: 1.01735258102417,grad_norm: 0.9422437218780884, iteration: 209784
loss: 0.9913737773895264,grad_norm: 0.9999996183539107, iteration: 209785
loss: 0.990699052810669,grad_norm: 0.9580272229289927, iteration: 209786
loss: 1.0062450170516968,grad_norm: 0.999999118237306, iteration: 209787
loss: 0.9697078466415405,grad_norm: 0.8822948622953233, iteration: 209788
loss: 1.0002042055130005,grad_norm: 0.8172497974897301, iteration: 209789
loss: 1.0312964916229248,grad_norm: 0.8950889845973289, iteration: 209790
loss: 0.9977537989616394,grad_norm: 0.7705014087806664, iteration: 209791
loss: 1.0114514827728271,grad_norm: 0.9628731979834367, iteration: 209792
loss: 1.0929806232452393,grad_norm: 0.999999813737794, iteration: 209793
loss: 0.9681122303009033,grad_norm: 0.9857462410355412, iteration: 209794
loss: 1.0213894844055176,grad_norm: 0.9736069116802943, iteration: 209795
loss: 1.0166488885879517,grad_norm: 0.9072055076012412, iteration: 209796
loss: 0.954947829246521,grad_norm: 0.9999990711142908, iteration: 209797
loss: 1.0044937133789062,grad_norm: 0.8976508594484542, iteration: 209798
loss: 0.9908971190452576,grad_norm: 0.9575608822003768, iteration: 209799
loss: 0.9965555667877197,grad_norm: 0.9832418101090372, iteration: 209800
loss: 0.998677670955658,grad_norm: 0.7836439031985768, iteration: 209801
loss: 0.9768197536468506,grad_norm: 0.999999164819309, iteration: 209802
loss: 1.030562162399292,grad_norm: 0.8874902330661437, iteration: 209803
loss: 0.9649317860603333,grad_norm: 0.9999992338820065, iteration: 209804
loss: 0.9850088953971863,grad_norm: 0.9999991229446383, iteration: 209805
loss: 0.9772154688835144,grad_norm: 0.9623262856710164, iteration: 209806
loss: 1.0175451040267944,grad_norm: 0.8800862999499911, iteration: 209807
loss: 1.0061224699020386,grad_norm: 0.9999993384643976, iteration: 209808
loss: 1.0425595045089722,grad_norm: 0.9999995750326685, iteration: 209809
loss: 0.9748157858848572,grad_norm: 0.9999990694861284, iteration: 209810
loss: 1.002363681793213,grad_norm: 0.9999990768817828, iteration: 209811
loss: 1.0848007202148438,grad_norm: 0.9999993209516315, iteration: 209812
loss: 0.9658908247947693,grad_norm: 0.8660058070397394, iteration: 209813
loss: 1.0836262702941895,grad_norm: 0.9561007687511985, iteration: 209814
loss: 1.0296471118927002,grad_norm: 0.9999991692322813, iteration: 209815
loss: 0.98165363073349,grad_norm: 0.9999992778054274, iteration: 209816
loss: 1.0151607990264893,grad_norm: 0.9958164791091179, iteration: 209817
loss: 1.0035995244979858,grad_norm: 0.9999991728672498, iteration: 209818
loss: 0.9990604519844055,grad_norm: 0.9034112089224273, iteration: 209819
loss: 1.0143730640411377,grad_norm: 0.7927522546297989, iteration: 209820
loss: 1.0450572967529297,grad_norm: 0.999999155054607, iteration: 209821
loss: 0.990384578704834,grad_norm: 0.9999992198053839, iteration: 209822
loss: 1.0422908067703247,grad_norm: 0.999999022058314, iteration: 209823
loss: 0.9825543165206909,grad_norm: 0.8916177705842832, iteration: 209824
loss: 1.0228629112243652,grad_norm: 0.9146321922472381, iteration: 209825
loss: 1.0053943395614624,grad_norm: 0.9999990326716037, iteration: 209826
loss: 1.0270724296569824,grad_norm: 0.8628425668646439, iteration: 209827
loss: 1.0132960081100464,grad_norm: 0.7585757232880076, iteration: 209828
loss: 0.9687631726264954,grad_norm: 0.8898018506733876, iteration: 209829
loss: 1.0007717609405518,grad_norm: 0.9203824775497973, iteration: 209830
loss: 1.0243194103240967,grad_norm: 0.9561219785169045, iteration: 209831
loss: 1.035332202911377,grad_norm: 0.9999996901714849, iteration: 209832
loss: 1.0024621486663818,grad_norm: 0.8655301343606747, iteration: 209833
loss: 0.9787322878837585,grad_norm: 0.9999990892072202, iteration: 209834
loss: 1.000433325767517,grad_norm: 0.8456302461517462, iteration: 209835
loss: 0.9636783599853516,grad_norm: 0.8324219045858092, iteration: 209836
loss: 1.081744909286499,grad_norm: 0.999999575590038, iteration: 209837
loss: 0.9962138533592224,grad_norm: 0.9999990558218799, iteration: 209838
loss: 1.0381481647491455,grad_norm: 0.8524361908213154, iteration: 209839
loss: 0.9868150949478149,grad_norm: 0.8558348520841567, iteration: 209840
loss: 0.959622859954834,grad_norm: 0.9890346695549828, iteration: 209841
loss: 0.9913825988769531,grad_norm: 0.9999990573608933, iteration: 209842
loss: 1.0420749187469482,grad_norm: 0.9999992632349407, iteration: 209843
loss: 1.0424402952194214,grad_norm: 0.9999998932512218, iteration: 209844
loss: 1.0226404666900635,grad_norm: 0.951268913363592, iteration: 209845
loss: 1.0389370918273926,grad_norm: 0.9881032315103794, iteration: 209846
loss: 0.9918763637542725,grad_norm: 0.9847250926199235, iteration: 209847
loss: 1.0001581907272339,grad_norm: 0.8796850033825656, iteration: 209848
loss: 0.9888548254966736,grad_norm: 0.9581314605399566, iteration: 209849
loss: 0.9615742564201355,grad_norm: 0.8428837447294745, iteration: 209850
loss: 0.9761983752250671,grad_norm: 0.982967737885643, iteration: 209851
loss: 1.0151468515396118,grad_norm: 0.999999213252927, iteration: 209852
loss: 0.972410261631012,grad_norm: 0.9999992613747339, iteration: 209853
loss: 0.9997945427894592,grad_norm: 0.9999989329017951, iteration: 209854
loss: 1.025927186012268,grad_norm: 0.9999990951459978, iteration: 209855
loss: 1.0235906839370728,grad_norm: 0.9256339094996444, iteration: 209856
loss: 1.028334140777588,grad_norm: 0.8880175628337347, iteration: 209857
loss: 0.9956226348876953,grad_norm: 0.9625813762630353, iteration: 209858
loss: 0.987251341342926,grad_norm: 0.9108957020441448, iteration: 209859
loss: 1.0294348001480103,grad_norm: 0.9505426690824104, iteration: 209860
loss: 1.0036060810089111,grad_norm: 0.9131794570975964, iteration: 209861
loss: 1.0162748098373413,grad_norm: 0.9999992508455343, iteration: 209862
loss: 0.9627529382705688,grad_norm: 0.8767601727001828, iteration: 209863
loss: 0.9873175024986267,grad_norm: 0.9999990833965916, iteration: 209864
loss: 1.0318843126296997,grad_norm: 0.9222504053895546, iteration: 209865
loss: 0.9938021898269653,grad_norm: 0.9299897289563176, iteration: 209866
loss: 1.027333378791809,grad_norm: 0.951371997753718, iteration: 209867
loss: 1.0091419219970703,grad_norm: 0.9027509831652821, iteration: 209868
loss: 0.9679908156394958,grad_norm: 0.9999990314422307, iteration: 209869
loss: 0.9991844892501831,grad_norm: 0.9999991210940145, iteration: 209870
loss: 0.9941272735595703,grad_norm: 0.999999106365296, iteration: 209871
loss: 1.0159916877746582,grad_norm: 0.7940215971431998, iteration: 209872
loss: 0.9991588592529297,grad_norm: 0.9889950812548115, iteration: 209873
loss: 1.1593842506408691,grad_norm: 0.9999990787810636, iteration: 209874
loss: 1.0039561986923218,grad_norm: 0.9374062640950961, iteration: 209875
loss: 1.0044703483581543,grad_norm: 0.8797396387101801, iteration: 209876
loss: 1.0327332019805908,grad_norm: 0.9999992485767414, iteration: 209877
loss: 1.0055930614471436,grad_norm: 0.8988956620136772, iteration: 209878
loss: 0.9856933355331421,grad_norm: 0.9002460466765859, iteration: 209879
loss: 1.02892005443573,grad_norm: 0.9254242628334229, iteration: 209880
loss: 1.0236356258392334,grad_norm: 0.9351912955116851, iteration: 209881
loss: 1.1591633558273315,grad_norm: 0.9999990919507317, iteration: 209882
loss: 1.0800056457519531,grad_norm: 0.9999997138585918, iteration: 209883
loss: 1.023627519607544,grad_norm: 0.896415601545231, iteration: 209884
loss: 0.9859400987625122,grad_norm: 0.9761814874020616, iteration: 209885
loss: 0.9947962164878845,grad_norm: 0.8719346108992104, iteration: 209886
loss: 0.9918512105941772,grad_norm: 0.8697621522483441, iteration: 209887
loss: 1.004622220993042,grad_norm: 0.9999991596920896, iteration: 209888
loss: 1.003339171409607,grad_norm: 0.9196126796709182, iteration: 209889
loss: 1.0071550607681274,grad_norm: 0.946487190080152, iteration: 209890
loss: 0.9752573370933533,grad_norm: 0.999999087988369, iteration: 209891
loss: 1.0347710847854614,grad_norm: 0.8308276420231614, iteration: 209892
loss: 0.9601556062698364,grad_norm: 0.805554006743903, iteration: 209893
loss: 0.9776101112365723,grad_norm: 0.9999990509771415, iteration: 209894
loss: 0.9680282473564148,grad_norm: 0.9999997390963873, iteration: 209895
loss: 0.9694519639015198,grad_norm: 0.9330311645117708, iteration: 209896
loss: 1.0355488061904907,grad_norm: 0.9179079662547538, iteration: 209897
loss: 0.9774753451347351,grad_norm: 0.903128545075814, iteration: 209898
loss: 0.9545189738273621,grad_norm: 0.9999990879080453, iteration: 209899
loss: 1.0044081211090088,grad_norm: 0.9474710630413515, iteration: 209900
loss: 0.9744951725006104,grad_norm: 0.9562565098864255, iteration: 209901
loss: 1.012097716331482,grad_norm: 0.9342117817857832, iteration: 209902
loss: 1.0621973276138306,grad_norm: 0.999999127858244, iteration: 209903
loss: 0.9647793173789978,grad_norm: 0.9999992268012687, iteration: 209904
loss: 0.9926975965499878,grad_norm: 0.7724624605365529, iteration: 209905
loss: 1.0655220746994019,grad_norm: 0.9999994147928554, iteration: 209906
loss: 0.9814245700836182,grad_norm: 0.8203695456966398, iteration: 209907
loss: 0.9626043438911438,grad_norm: 0.8818426618403504, iteration: 209908
loss: 1.0357439517974854,grad_norm: 0.8860350905073971, iteration: 209909
loss: 0.9742245674133301,grad_norm: 0.999999035666139, iteration: 209910
loss: 0.9722212553024292,grad_norm: 0.8343111274837858, iteration: 209911
loss: 1.0147873163223267,grad_norm: 0.8648070575369654, iteration: 209912
loss: 1.0121136903762817,grad_norm: 0.9999991241363209, iteration: 209913
loss: 1.0017249584197998,grad_norm: 0.9603718232740048, iteration: 209914
loss: 0.9836184978485107,grad_norm: 0.9648537180162047, iteration: 209915
loss: 0.978176474571228,grad_norm: 0.7885477786219055, iteration: 209916
loss: 1.004110336303711,grad_norm: 0.8263962436164854, iteration: 209917
loss: 0.9785844683647156,grad_norm: 0.9999992373998409, iteration: 209918
loss: 0.9835567474365234,grad_norm: 0.9632884961513603, iteration: 209919
loss: 1.0152159929275513,grad_norm: 0.9999990795812792, iteration: 209920
loss: 1.0453009605407715,grad_norm: 0.9999990461654287, iteration: 209921
loss: 1.002640962600708,grad_norm: 0.916663904257627, iteration: 209922
loss: 0.9863293170928955,grad_norm: 0.8379387345425369, iteration: 209923
loss: 1.0112571716308594,grad_norm: 0.913309304445637, iteration: 209924
loss: 0.9827302098274231,grad_norm: 0.9510555894122534, iteration: 209925
loss: 1.0143052339553833,grad_norm: 0.9999992173917671, iteration: 209926
loss: 1.0023339986801147,grad_norm: 0.9015579596192745, iteration: 209927
loss: 1.0061136484146118,grad_norm: 0.914758758227808, iteration: 209928
loss: 1.005519986152649,grad_norm: 0.8768510620821942, iteration: 209929
loss: 1.0252103805541992,grad_norm: 0.9564419248518744, iteration: 209930
loss: 1.0029131174087524,grad_norm: 0.9686059266278721, iteration: 209931
loss: 0.9753270745277405,grad_norm: 0.8837896868649353, iteration: 209932
loss: 1.0905221700668335,grad_norm: 0.9999996761323287, iteration: 209933
loss: 0.9578162431716919,grad_norm: 0.9999990742386513, iteration: 209934
loss: 0.9804114103317261,grad_norm: 0.9199619757964456, iteration: 209935
loss: 0.9828287363052368,grad_norm: 0.9999991514923267, iteration: 209936
loss: 1.0207107067108154,grad_norm: 0.9330485266200478, iteration: 209937
loss: 0.9950917959213257,grad_norm: 0.9999990796399499, iteration: 209938
loss: 0.9827273488044739,grad_norm: 0.9999990520328179, iteration: 209939
loss: 0.9793338179588318,grad_norm: 0.9999991437418827, iteration: 209940
loss: 0.9603492617607117,grad_norm: 0.9689315177288045, iteration: 209941
loss: 0.9864071607589722,grad_norm: 0.9522046980876607, iteration: 209942
loss: 0.9469972252845764,grad_norm: 0.9881677124563757, iteration: 209943
loss: 0.9993500113487244,grad_norm: 0.8601984388592728, iteration: 209944
loss: 0.9912128448486328,grad_norm: 0.9550990158967437, iteration: 209945
loss: 0.9916948080062866,grad_norm: 0.796741787325736, iteration: 209946
loss: 1.0118536949157715,grad_norm: 0.9999991058077319, iteration: 209947
loss: 0.992533266544342,grad_norm: 0.9028113559615359, iteration: 209948
loss: 0.9791443347930908,grad_norm: 0.980868584587379, iteration: 209949
loss: 1.0144543647766113,grad_norm: 0.999999224069724, iteration: 209950
loss: 1.033810019493103,grad_norm: 0.9573413690818219, iteration: 209951
loss: 0.9835342168807983,grad_norm: 0.9746485453531378, iteration: 209952
loss: 1.005971908569336,grad_norm: 0.8211516709448263, iteration: 209953
loss: 0.982272744178772,grad_norm: 0.9999991549173264, iteration: 209954
loss: 1.075631856918335,grad_norm: 0.9999990269060948, iteration: 209955
loss: 1.0148320198059082,grad_norm: 0.9183825410053492, iteration: 209956
loss: 1.0223915576934814,grad_norm: 0.9999991323706447, iteration: 209957
loss: 1.0227729082107544,grad_norm: 0.9999993049767605, iteration: 209958
loss: 1.0008023977279663,grad_norm: 0.9210544695989512, iteration: 209959
loss: 1.0348130464553833,grad_norm: 0.9265745342754003, iteration: 209960
loss: 0.9994583129882812,grad_norm: 0.9999990058439999, iteration: 209961
loss: 1.034906029701233,grad_norm: 0.9999995334775562, iteration: 209962
loss: 0.9805761575698853,grad_norm: 0.9999991914801869, iteration: 209963
loss: 0.9850596785545349,grad_norm: 0.945409973642452, iteration: 209964
loss: 0.9544704556465149,grad_norm: 0.7935695892581467, iteration: 209965
loss: 0.9934703707695007,grad_norm: 0.8516118413726294, iteration: 209966
loss: 0.9682980179786682,grad_norm: 0.9398967144534665, iteration: 209967
loss: 1.0238215923309326,grad_norm: 0.8399706216805475, iteration: 209968
loss: 0.985377848148346,grad_norm: 0.9894605541263064, iteration: 209969
loss: 0.9838710427284241,grad_norm: 0.9999990516829383, iteration: 209970
loss: 1.042867660522461,grad_norm: 0.9275780691423182, iteration: 209971
loss: 0.9442242383956909,grad_norm: 0.8038802400249906, iteration: 209972
loss: 0.9996697902679443,grad_norm: 0.9999997522410177, iteration: 209973
loss: 0.9856610894203186,grad_norm: 0.8167251394302993, iteration: 209974
loss: 1.0099483728408813,grad_norm: 0.8736122550763642, iteration: 209975
loss: 0.9992017149925232,grad_norm: 0.9999990038619169, iteration: 209976
loss: 1.0062229633331299,grad_norm: 0.9999989841127367, iteration: 209977
loss: 1.0233653783798218,grad_norm: 0.9999997745558379, iteration: 209978
loss: 0.996256947517395,grad_norm: 0.9999990171832009, iteration: 209979
loss: 0.9804167151451111,grad_norm: 0.9293261875349984, iteration: 209980
loss: 1.0581263303756714,grad_norm: 0.9999991044180618, iteration: 209981
loss: 1.0098077058792114,grad_norm: 0.9322659522599971, iteration: 209982
loss: 1.010278344154358,grad_norm: 0.9999996062342884, iteration: 209983
loss: 1.0100593566894531,grad_norm: 0.8841073293150488, iteration: 209984
loss: 0.9844818711280823,grad_norm: 0.8519349410356889, iteration: 209985
loss: 1.0073155164718628,grad_norm: 0.9324146877925391, iteration: 209986
loss: 0.9973500967025757,grad_norm: 0.8580199891918685, iteration: 209987
loss: 1.0001943111419678,grad_norm: 0.84116072558948, iteration: 209988
loss: 1.019556999206543,grad_norm: 0.9836816347141848, iteration: 209989
loss: 1.0254937410354614,grad_norm: 0.891355884602551, iteration: 209990
loss: 0.9945412278175354,grad_norm: 0.8460883271245498, iteration: 209991
loss: 1.019963264465332,grad_norm: 0.9999991775454987, iteration: 209992
loss: 0.9730449914932251,grad_norm: 0.9722383926552401, iteration: 209993
loss: 1.0171735286712646,grad_norm: 0.9999990507238693, iteration: 209994
loss: 0.9629562497138977,grad_norm: 0.999998973920426, iteration: 209995
loss: 1.013683795928955,grad_norm: 0.8395078620946438, iteration: 209996
loss: 1.0261310338974,grad_norm: 0.9600382931909718, iteration: 209997
loss: 1.0192638635635376,grad_norm: 0.9999993158576717, iteration: 209998
loss: 0.9795846343040466,grad_norm: 0.8713774399577054, iteration: 209999
loss: 0.9737627506256104,grad_norm: 0.9035787711762786, iteration: 210000
Evaluating at step 210000
{'val': 0.9954665638506413, 'test': 2.708375833285883}
loss: 0.9930655360221863,grad_norm: 0.9999991691374328, iteration: 210001
loss: 1.017580509185791,grad_norm: 0.8207897181295946, iteration: 210002
loss: 0.9762744903564453,grad_norm: 0.9557692432999599, iteration: 210003
loss: 1.0046825408935547,grad_norm: 0.840071003713417, iteration: 210004
loss: 1.002772569656372,grad_norm: 0.9758681401344222, iteration: 210005
loss: 0.9567747712135315,grad_norm: 0.7987359743061853, iteration: 210006
loss: 0.9900431632995605,grad_norm: 0.9463920219190225, iteration: 210007
loss: 1.0069267749786377,grad_norm: 0.895733058362816, iteration: 210008
loss: 0.9534041881561279,grad_norm: 0.7882134771030544, iteration: 210009
loss: 1.0179263353347778,grad_norm: 0.9999990819592396, iteration: 210010
loss: 0.9752939343452454,grad_norm: 0.9194731133904933, iteration: 210011
loss: 0.9960514307022095,grad_norm: 0.9492022120940812, iteration: 210012
loss: 1.012658953666687,grad_norm: 0.9999991249992446, iteration: 210013
loss: 1.0202304124832153,grad_norm: 0.9999991233725116, iteration: 210014
loss: 1.0152335166931152,grad_norm: 0.9357221960688069, iteration: 210015
loss: 1.0597268342971802,grad_norm: 0.9999999801158613, iteration: 210016
loss: 1.0195542573928833,grad_norm: 0.9002833690246216, iteration: 210017
loss: 0.996373176574707,grad_norm: 0.9070050284289137, iteration: 210018
loss: 1.0176897048950195,grad_norm: 0.8914713639048653, iteration: 210019
loss: 0.9957315325737,grad_norm: 0.9351886160822794, iteration: 210020
loss: 0.9612458348274231,grad_norm: 0.9101412366963151, iteration: 210021
loss: 1.0520621538162231,grad_norm: 0.9999992114484898, iteration: 210022
loss: 0.9623624682426453,grad_norm: 0.9999990592923224, iteration: 210023
loss: 1.058524489402771,grad_norm: 0.9506588954086912, iteration: 210024
loss: 0.9888584613800049,grad_norm: 0.9999991648391008, iteration: 210025
loss: 0.9732239246368408,grad_norm: 0.8928436245319512, iteration: 210026
loss: 1.0148414373397827,grad_norm: 0.9999989817078712, iteration: 210027
loss: 0.9970189929008484,grad_norm: 0.9491408743232415, iteration: 210028
loss: 1.0505568981170654,grad_norm: 0.9569389309290428, iteration: 210029
loss: 1.0798805952072144,grad_norm: 0.9999993564632711, iteration: 210030
loss: 0.9675964117050171,grad_norm: 0.9999990791325913, iteration: 210031
loss: 0.9765666127204895,grad_norm: 0.8451644181041031, iteration: 210032
loss: 1.0078374147415161,grad_norm: 0.9729309824844131, iteration: 210033
loss: 1.0218536853790283,grad_norm: 0.9999991007596843, iteration: 210034
loss: 1.0082218647003174,grad_norm: 0.9058082106376127, iteration: 210035
loss: 1.0132502317428589,grad_norm: 0.9999997321758219, iteration: 210036
loss: 1.0308845043182373,grad_norm: 0.9999989834918951, iteration: 210037
loss: 0.9894455671310425,grad_norm: 0.9308301822414856, iteration: 210038
loss: 0.9912166595458984,grad_norm: 0.9216425943170787, iteration: 210039
loss: 1.0537539720535278,grad_norm: 0.9893566642849293, iteration: 210040
loss: 1.0033903121948242,grad_norm: 0.8738999139436054, iteration: 210041
loss: 1.0260871648788452,grad_norm: 0.999999077645595, iteration: 210042
loss: 0.9780890345573425,grad_norm: 0.939886811288804, iteration: 210043
loss: 0.9999496340751648,grad_norm: 0.9999990194562267, iteration: 210044
loss: 1.0268784761428833,grad_norm: 0.9999990481338071, iteration: 210045
loss: 0.9907399415969849,grad_norm: 0.9999990797711092, iteration: 210046
loss: 0.9584630727767944,grad_norm: 0.9023271590222002, iteration: 210047
loss: 0.9556888341903687,grad_norm: 0.9999991742413903, iteration: 210048
loss: 1.0009673833847046,grad_norm: 0.97336267192394, iteration: 210049
loss: 1.0115222930908203,grad_norm: 0.9395497726034225, iteration: 210050
loss: 0.9921735525131226,grad_norm: 0.9609108788292843, iteration: 210051
loss: 0.982615053653717,grad_norm: 0.9534974811184973, iteration: 210052
loss: 1.0205756425857544,grad_norm: 0.9999991309354446, iteration: 210053
loss: 1.0429799556732178,grad_norm: 0.999999442950949, iteration: 210054
loss: 1.000068187713623,grad_norm: 0.993498865055808, iteration: 210055
loss: 1.0157206058502197,grad_norm: 0.8959606605413064, iteration: 210056
loss: 0.9930486083030701,grad_norm: 0.9442784846879695, iteration: 210057
loss: 1.0300302505493164,grad_norm: 0.8870719910708105, iteration: 210058
loss: 0.9910314679145813,grad_norm: 0.9946412225425724, iteration: 210059
loss: 1.0188043117523193,grad_norm: 0.8138148204573762, iteration: 210060
loss: 0.9691265225410461,grad_norm: 0.7987058678097043, iteration: 210061
loss: 1.010060429573059,grad_norm: 0.8117631289325387, iteration: 210062
loss: 1.0325835943222046,grad_norm: 0.9786246445967126, iteration: 210063
loss: 1.0203001499176025,grad_norm: 0.9999991683934302, iteration: 210064
loss: 0.9858583211898804,grad_norm: 0.9692088250760832, iteration: 210065
loss: 0.9892355799674988,grad_norm: 0.9212981733122052, iteration: 210066
loss: 0.9762375354766846,grad_norm: 0.9999989863155023, iteration: 210067
loss: 0.9757242798805237,grad_norm: 0.999999309184788, iteration: 210068
loss: 0.983008086681366,grad_norm: 0.999999113247262, iteration: 210069
loss: 0.9738419055938721,grad_norm: 0.8700203592119659, iteration: 210070
loss: 1.0086863040924072,grad_norm: 0.9999992215220305, iteration: 210071
loss: 0.986433744430542,grad_norm: 0.8051185949765303, iteration: 210072
loss: 0.9623328447341919,grad_norm: 0.8513105417808676, iteration: 210073
loss: 1.0139652490615845,grad_norm: 0.9061932513508318, iteration: 210074
loss: 0.9947895407676697,grad_norm: 0.7261489366443248, iteration: 210075
loss: 0.966032862663269,grad_norm: 0.9999992253891375, iteration: 210076
loss: 1.0030301809310913,grad_norm: 0.9999991295437133, iteration: 210077
loss: 1.0075244903564453,grad_norm: 0.9999989875625978, iteration: 210078
loss: 1.0164834260940552,grad_norm: 0.9999990636747662, iteration: 210079
loss: 1.0491688251495361,grad_norm: 0.9999990426765292, iteration: 210080
loss: 0.9921174049377441,grad_norm: 0.9029139983577268, iteration: 210081
loss: 1.0626505613327026,grad_norm: 0.9999998861319964, iteration: 210082
loss: 0.9819680452346802,grad_norm: 0.8526611594270667, iteration: 210083
loss: 0.9727818965911865,grad_norm: 0.9372115667277494, iteration: 210084
loss: 1.0106488466262817,grad_norm: 0.9999991851489463, iteration: 210085
loss: 0.9736016988754272,grad_norm: 0.7970285650913312, iteration: 210086
loss: 1.0293389558792114,grad_norm: 0.836971070018606, iteration: 210087
loss: 0.9698012471199036,grad_norm: 0.9844758518858189, iteration: 210088
loss: 1.0673291683197021,grad_norm: 0.8257298203535, iteration: 210089
loss: 0.9919937252998352,grad_norm: 0.9999991565298497, iteration: 210090
loss: 1.012824535369873,grad_norm: 0.8491051860537595, iteration: 210091
loss: 1.0081861019134521,grad_norm: 0.9858397399766246, iteration: 210092
loss: 1.0168217420578003,grad_norm: 0.9273408174883672, iteration: 210093
loss: 0.9972627758979797,grad_norm: 0.9897108628759046, iteration: 210094
loss: 1.0029722452163696,grad_norm: 0.9665573420413202, iteration: 210095
loss: 0.9692021608352661,grad_norm: 0.7677153831211238, iteration: 210096
loss: 0.9923615455627441,grad_norm: 0.9042841194000928, iteration: 210097
loss: 0.9590861201286316,grad_norm: 0.9628614748341083, iteration: 210098
loss: 1.0007094144821167,grad_norm: 0.9999991161250742, iteration: 210099
loss: 0.9961382150650024,grad_norm: 0.999999098454431, iteration: 210100
loss: 0.9803748726844788,grad_norm: 0.8838151469457898, iteration: 210101
loss: 0.9939776062965393,grad_norm: 0.9999990352728875, iteration: 210102
loss: 1.010743260383606,grad_norm: 0.9605970594977824, iteration: 210103
loss: 1.013848066329956,grad_norm: 0.9280954259266155, iteration: 210104
loss: 0.9852951169013977,grad_norm: 0.9159913004193427, iteration: 210105
loss: 0.9669819474220276,grad_norm: 0.7887942515565466, iteration: 210106
loss: 1.0356695652008057,grad_norm: 0.9999996652041798, iteration: 210107
loss: 0.9717878699302673,grad_norm: 0.8589793499434032, iteration: 210108
loss: 0.9645522832870483,grad_norm: 0.7893408042227207, iteration: 210109
loss: 0.9803259968757629,grad_norm: 0.863510577514664, iteration: 210110
loss: 0.9959036707878113,grad_norm: 0.8169200355513837, iteration: 210111
loss: 0.9781030416488647,grad_norm: 0.9978483289286842, iteration: 210112
loss: 0.9996611475944519,grad_norm: 0.9999990741559638, iteration: 210113
loss: 1.0189228057861328,grad_norm: 0.9256753079025429, iteration: 210114
loss: 1.1627013683319092,grad_norm: 0.9999995491140984, iteration: 210115
loss: 0.9893298745155334,grad_norm: 0.9268987220964308, iteration: 210116
loss: 1.0193105936050415,grad_norm: 0.999999005043951, iteration: 210117
loss: 0.9637687802314758,grad_norm: 0.8787805912711852, iteration: 210118
loss: 1.0272349119186401,grad_norm: 0.9999992195682749, iteration: 210119
loss: 1.0412287712097168,grad_norm: 0.9999995933818923, iteration: 210120
loss: 0.9578313231468201,grad_norm: 0.9999990140280485, iteration: 210121
loss: 0.9996092319488525,grad_norm: 0.9999991406045141, iteration: 210122
loss: 0.9565500020980835,grad_norm: 0.9999992944571551, iteration: 210123
loss: 0.9701602458953857,grad_norm: 0.9203757794367342, iteration: 210124
loss: 0.9805862307548523,grad_norm: 0.916638715602339, iteration: 210125
loss: 0.9922840595245361,grad_norm: 0.8526291848324745, iteration: 210126
loss: 1.032851219177246,grad_norm: 0.9999992676016798, iteration: 210127
loss: 1.0355209112167358,grad_norm: 0.9153853522224855, iteration: 210128
loss: 1.0144065618515015,grad_norm: 0.8712980499755654, iteration: 210129
loss: 0.9866527318954468,grad_norm: 0.9999990823940724, iteration: 210130
loss: 1.0141569375991821,grad_norm: 0.9999992448103705, iteration: 210131
loss: 1.009819507598877,grad_norm: 0.9486937039967936, iteration: 210132
loss: 0.9528493285179138,grad_norm: 0.9393143555542012, iteration: 210133
loss: 0.9990091323852539,grad_norm: 0.9427088010986341, iteration: 210134
loss: 0.967862069606781,grad_norm: 0.9576577565412737, iteration: 210135
loss: 0.9581332802772522,grad_norm: 0.9771644751189923, iteration: 210136
loss: 1.0095784664154053,grad_norm: 0.9999989921883088, iteration: 210137
loss: 1.0083885192871094,grad_norm: 0.9793394650737608, iteration: 210138
loss: 1.0484848022460938,grad_norm: 0.9429389813021618, iteration: 210139
loss: 1.0144809484481812,grad_norm: 0.9897483701876014, iteration: 210140
loss: 1.0112805366516113,grad_norm: 0.9186592793141444, iteration: 210141
loss: 1.0461372137069702,grad_norm: 0.9780647604463132, iteration: 210142
loss: 0.9935274720191956,grad_norm: 0.9808369629447437, iteration: 210143
loss: 0.9994695782661438,grad_norm: 0.8952876544624188, iteration: 210144
loss: 1.0127649307250977,grad_norm: 0.7986267730251826, iteration: 210145
loss: 0.9862892031669617,grad_norm: 0.9821188639101243, iteration: 210146
loss: 1.0139719247817993,grad_norm: 0.9515629149895425, iteration: 210147
loss: 1.0094159841537476,grad_norm: 0.8329480871201042, iteration: 210148
loss: 1.0442713499069214,grad_norm: 0.9999991110709708, iteration: 210149
loss: 1.0207866430282593,grad_norm: 0.9877233121484292, iteration: 210150
loss: 1.0105541944503784,grad_norm: 0.9943454481452261, iteration: 210151
loss: 0.9979625344276428,grad_norm: 0.9388800652963621, iteration: 210152
loss: 1.0106245279312134,grad_norm: 0.9999990399480575, iteration: 210153
loss: 1.045007348060608,grad_norm: 0.9999993776517194, iteration: 210154
loss: 1.0021159648895264,grad_norm: 0.9999992364718261, iteration: 210155
loss: 1.0307395458221436,grad_norm: 0.9999990508750166, iteration: 210156
loss: 1.032842993736267,grad_norm: 0.9481628517599722, iteration: 210157
loss: 0.98555988073349,grad_norm: 0.9999990950492383, iteration: 210158
loss: 0.9893946647644043,grad_norm: 0.894917038139006, iteration: 210159
loss: 1.0320097208023071,grad_norm: 0.9999990871735647, iteration: 210160
loss: 0.9971504211425781,grad_norm: 0.9765523625644972, iteration: 210161
loss: 0.975040853023529,grad_norm: 0.8081652072863331, iteration: 210162
loss: 0.9920535087585449,grad_norm: 0.8541992958751504, iteration: 210163
loss: 1.0242356061935425,grad_norm: 0.9999992271933887, iteration: 210164
loss: 1.007881760597229,grad_norm: 0.9775958858348434, iteration: 210165
loss: 0.980074405670166,grad_norm: 0.8479183151563948, iteration: 210166
loss: 1.067753553390503,grad_norm: 0.9999991054181181, iteration: 210167
loss: 0.9992705583572388,grad_norm: 0.9090125380929457, iteration: 210168
loss: 0.9895012378692627,grad_norm: 0.9999991111193548, iteration: 210169
loss: 1.0086615085601807,grad_norm: 0.9999991278365432, iteration: 210170
loss: 1.023969292640686,grad_norm: 0.8073022938689709, iteration: 210171
loss: 0.9705798625946045,grad_norm: 0.8664571214920009, iteration: 210172
loss: 1.0106152296066284,grad_norm: 0.9999995280199537, iteration: 210173
loss: 0.9970332384109497,grad_norm: 0.9999991138555063, iteration: 210174
loss: 1.0324989557266235,grad_norm: 0.94338818225182, iteration: 210175
loss: 1.0293669700622559,grad_norm: 0.9999992642092724, iteration: 210176
loss: 0.9748103618621826,grad_norm: 0.9204717051089002, iteration: 210177
loss: 1.0167381763458252,grad_norm: 0.8987447365647083, iteration: 210178
loss: 1.0188661813735962,grad_norm: 0.9999991963074681, iteration: 210179
loss: 0.9918022155761719,grad_norm: 0.8912276971129104, iteration: 210180
loss: 1.170579433441162,grad_norm: 0.9999992524334296, iteration: 210181
loss: 1.045581340789795,grad_norm: 0.999999344310055, iteration: 210182
loss: 1.0231609344482422,grad_norm: 0.9597812181592974, iteration: 210183
loss: 1.0020887851715088,grad_norm: 0.9999991470839072, iteration: 210184
loss: 1.0820404291152954,grad_norm: 0.9999991484790712, iteration: 210185
loss: 1.0261050462722778,grad_norm: 0.8753682909106215, iteration: 210186
loss: 1.1350563764572144,grad_norm: 0.9999997297047637, iteration: 210187
loss: 0.968859076499939,grad_norm: 0.9038789189329657, iteration: 210188
loss: 1.0037226676940918,grad_norm: 0.8496615383831913, iteration: 210189
loss: 1.0304230451583862,grad_norm: 0.9245456564073976, iteration: 210190
loss: 1.0910131931304932,grad_norm: 0.9999994472698446, iteration: 210191
loss: 0.975723922252655,grad_norm: 0.9999991729285257, iteration: 210192
loss: 1.0982415676116943,grad_norm: 0.9999998138157066, iteration: 210193
loss: 1.1324458122253418,grad_norm: 0.9999989253519058, iteration: 210194
loss: 1.0275148153305054,grad_norm: 0.9999999871937977, iteration: 210195
loss: 1.0028986930847168,grad_norm: 0.9999990812336521, iteration: 210196
loss: 1.0202380418777466,grad_norm: 0.9999991026492707, iteration: 210197
loss: 1.0051628351211548,grad_norm: 0.8486093021163839, iteration: 210198
loss: 1.0993962287902832,grad_norm: 0.9999990592450266, iteration: 210199
loss: 1.0438990592956543,grad_norm: 0.8120095830390073, iteration: 210200
loss: 0.9841387271881104,grad_norm: 0.8215747964881436, iteration: 210201
loss: 0.9966822862625122,grad_norm: 0.9706411998695625, iteration: 210202
loss: 1.0027661323547363,grad_norm: 0.9999992863565875, iteration: 210203
loss: 1.0356359481811523,grad_norm: 0.9121313245453606, iteration: 210204
loss: 1.015113115310669,grad_norm: 0.9999994894650259, iteration: 210205
loss: 1.001585602760315,grad_norm: 0.8026714618950995, iteration: 210206
loss: 0.9869615435600281,grad_norm: 0.8773640684611801, iteration: 210207
loss: 0.998484194278717,grad_norm: 0.9395932157979197, iteration: 210208
loss: 0.9997206330299377,grad_norm: 0.9603043052953767, iteration: 210209
loss: 1.0039972066879272,grad_norm: 0.9999989949588608, iteration: 210210
loss: 0.9875656962394714,grad_norm: 0.9863589622571893, iteration: 210211
loss: 1.1353635787963867,grad_norm: 0.9999999823547311, iteration: 210212
loss: 1.0114384889602661,grad_norm: 0.8262432202595095, iteration: 210213
loss: 1.0094850063323975,grad_norm: 0.935831893448943, iteration: 210214
loss: 1.058243989944458,grad_norm: 0.9999991339683675, iteration: 210215
loss: 1.1581097841262817,grad_norm: 0.999999158047186, iteration: 210216
loss: 1.0115585327148438,grad_norm: 0.8397951291892044, iteration: 210217
loss: 1.1045477390289307,grad_norm: 0.9999991434106725, iteration: 210218
loss: 1.024759292602539,grad_norm: 0.7590279176185482, iteration: 210219
loss: 1.0361371040344238,grad_norm: 0.9999998056148354, iteration: 210220
loss: 1.0450395345687866,grad_norm: 0.9999991140406456, iteration: 210221
loss: 1.0011061429977417,grad_norm: 0.9999993122115635, iteration: 210222
loss: 0.983502209186554,grad_norm: 0.7906983019165273, iteration: 210223
loss: 1.0093908309936523,grad_norm: 0.9999991540528389, iteration: 210224
loss: 0.9860987067222595,grad_norm: 0.9999993072324453, iteration: 210225
loss: 1.053186058998108,grad_norm: 0.9583922461551845, iteration: 210226
loss: 0.981360673904419,grad_norm: 0.9378148001544064, iteration: 210227
loss: 1.0779392719268799,grad_norm: 0.9573223689447784, iteration: 210228
loss: 1.0089519023895264,grad_norm: 0.8343916611225769, iteration: 210229
loss: 0.9813910722732544,grad_norm: 0.8443368194960538, iteration: 210230
loss: 1.0132189989089966,grad_norm: 0.9999992191795988, iteration: 210231
loss: 1.298102617263794,grad_norm: 0.9999995603353328, iteration: 210232
loss: 0.9651600122451782,grad_norm: 0.9018159724033131, iteration: 210233
loss: 0.9584402441978455,grad_norm: 0.9359728818445527, iteration: 210234
loss: 1.0077803134918213,grad_norm: 0.9447592391708535, iteration: 210235
loss: 1.0084869861602783,grad_norm: 0.8931764395756324, iteration: 210236
loss: 1.0044469833374023,grad_norm: 0.9315133184626212, iteration: 210237
loss: 1.00484299659729,grad_norm: 0.8863508532550874, iteration: 210238
loss: 1.0619114637374878,grad_norm: 0.9988788598849091, iteration: 210239
loss: 0.950279951095581,grad_norm: 0.9837242589066036, iteration: 210240
loss: 0.9966718554496765,grad_norm: 0.999999264323825, iteration: 210241
loss: 0.9776241779327393,grad_norm: 0.9999991583936372, iteration: 210242
loss: 0.9897478818893433,grad_norm: 0.8342533658103042, iteration: 210243
loss: 1.0116173028945923,grad_norm: 0.9980552409956028, iteration: 210244
loss: 1.009903907775879,grad_norm: 0.8152668813171138, iteration: 210245
loss: 1.0390660762786865,grad_norm: 0.9999991695743164, iteration: 210246
loss: 0.9829016923904419,grad_norm: 0.813180281515541, iteration: 210247
loss: 1.001973032951355,grad_norm: 0.9875757560281044, iteration: 210248
loss: 1.0164605379104614,grad_norm: 0.9999990761578085, iteration: 210249
loss: 1.0202759504318237,grad_norm: 0.9227469055588088, iteration: 210250
loss: 1.0371135473251343,grad_norm: 0.9999989454369508, iteration: 210251
loss: 1.0264297723770142,grad_norm: 0.9999996027923924, iteration: 210252
loss: 1.0079002380371094,grad_norm: 0.9951922177429128, iteration: 210253
loss: 1.002716064453125,grad_norm: 0.9999991391656855, iteration: 210254
loss: 1.0142772197723389,grad_norm: 0.9425005485749847, iteration: 210255
loss: 1.0229120254516602,grad_norm: 0.9693301053115244, iteration: 210256
loss: 1.0831917524337769,grad_norm: 0.9948379786619059, iteration: 210257
loss: 1.0651565790176392,grad_norm: 0.9999999760667302, iteration: 210258
loss: 0.9978774785995483,grad_norm: 0.8846056716923376, iteration: 210259
loss: 1.0579767227172852,grad_norm: 0.9999991505695377, iteration: 210260
loss: 0.9836528897285461,grad_norm: 0.9147814119190197, iteration: 210261
loss: 0.9943174719810486,grad_norm: 0.9999991047921571, iteration: 210262
loss: 1.035420298576355,grad_norm: 0.9999990536512886, iteration: 210263
loss: 0.9815069437026978,grad_norm: 0.8758548349826888, iteration: 210264
loss: 0.9754482507705688,grad_norm: 0.8968196675664043, iteration: 210265
loss: 1.0091251134872437,grad_norm: 0.9999990450844961, iteration: 210266
loss: 1.0190491676330566,grad_norm: 0.9999991601733029, iteration: 210267
loss: 0.9872485399246216,grad_norm: 0.9954695637358697, iteration: 210268
loss: 1.0414625406265259,grad_norm: 0.9999989809888491, iteration: 210269
loss: 0.9769026041030884,grad_norm: 0.914493695256892, iteration: 210270
loss: 0.9786102175712585,grad_norm: 0.9715368835965471, iteration: 210271
loss: 1.0094870328903198,grad_norm: 0.9999996198423184, iteration: 210272
loss: 1.0530232191085815,grad_norm: 0.8834979493963941, iteration: 210273
loss: 0.9821831583976746,grad_norm: 0.8844206663991948, iteration: 210274
loss: 1.0054941177368164,grad_norm: 0.9999989653131575, iteration: 210275
loss: 1.0283902883529663,grad_norm: 0.9999989146834142, iteration: 210276
loss: 0.9972304105758667,grad_norm: 0.9999991512517481, iteration: 210277
loss: 0.9977176189422607,grad_norm: 0.806897527025775, iteration: 210278
loss: 0.9778352975845337,grad_norm: 0.9827914529507462, iteration: 210279
loss: 1.0044268369674683,grad_norm: 0.9426012872956997, iteration: 210280
loss: 0.9934402704238892,grad_norm: 0.967370818404985, iteration: 210281
loss: 0.9844953417778015,grad_norm: 0.9496179736919325, iteration: 210282
loss: 1.0152912139892578,grad_norm: 0.9999992646190525, iteration: 210283
loss: 1.0413192510604858,grad_norm: 0.816027910412446, iteration: 210284
loss: 1.0121607780456543,grad_norm: 0.8507127168264565, iteration: 210285
loss: 0.9919387698173523,grad_norm: 0.9815864908163422, iteration: 210286
loss: 0.9845683574676514,grad_norm: 0.9999990954568618, iteration: 210287
loss: 1.0070244073867798,grad_norm: 0.9999996412641916, iteration: 210288
loss: 1.0227454900741577,grad_norm: 0.8445619388214627, iteration: 210289
loss: 0.9874534010887146,grad_norm: 0.9246994771228196, iteration: 210290
loss: 1.0135143995285034,grad_norm: 0.7838807793739864, iteration: 210291
loss: 1.0049070119857788,grad_norm: 0.8317020765506119, iteration: 210292
loss: 1.0259323120117188,grad_norm: 0.9999991100676878, iteration: 210293
loss: 1.0594565868377686,grad_norm: 0.918921968670479, iteration: 210294
loss: 0.9803268313407898,grad_norm: 0.8237435788882316, iteration: 210295
loss: 1.0014598369598389,grad_norm: 0.9999991866618485, iteration: 210296
loss: 0.9964693784713745,grad_norm: 0.737147545463578, iteration: 210297
loss: 0.9978009462356567,grad_norm: 0.9482167238631578, iteration: 210298
loss: 0.9996938109397888,grad_norm: 0.9999992960296775, iteration: 210299
loss: 0.9805278182029724,grad_norm: 0.8738059820949925, iteration: 210300
loss: 0.9945631623268127,grad_norm: 0.8579925492500511, iteration: 210301
loss: 1.0044342279434204,grad_norm: 0.9321518996525534, iteration: 210302
loss: 0.9984163641929626,grad_norm: 0.9110921011543097, iteration: 210303
loss: 1.0082337856292725,grad_norm: 0.8227575239887708, iteration: 210304
loss: 0.9986645579338074,grad_norm: 0.895231553434184, iteration: 210305
loss: 0.9795628786087036,grad_norm: 0.9999991943801525, iteration: 210306
loss: 1.022454023361206,grad_norm: 0.8784567505686419, iteration: 210307
loss: 1.0099153518676758,grad_norm: 0.8947032185617553, iteration: 210308
loss: 1.0180389881134033,grad_norm: 0.999999059022049, iteration: 210309
loss: 1.0055625438690186,grad_norm: 0.9999989646797828, iteration: 210310
loss: 1.0219708681106567,grad_norm: 0.8463805045264833, iteration: 210311
loss: 0.9905254244804382,grad_norm: 0.848005765772656, iteration: 210312
loss: 0.9666165709495544,grad_norm: 0.954872016045492, iteration: 210313
loss: 0.9793148040771484,grad_norm: 0.9858374692251823, iteration: 210314
loss: 0.9771757125854492,grad_norm: 0.9969827684225065, iteration: 210315
loss: 1.0316230058670044,grad_norm: 0.8157276261501135, iteration: 210316
loss: 1.019498586654663,grad_norm: 0.9999992027986389, iteration: 210317
loss: 0.9929558038711548,grad_norm: 0.9999993838078418, iteration: 210318
loss: 1.0224093198776245,grad_norm: 0.9382975824968021, iteration: 210319
loss: 1.0176297426223755,grad_norm: 0.9999989692422564, iteration: 210320
loss: 1.0011489391326904,grad_norm: 0.999998986694593, iteration: 210321
loss: 1.0096628665924072,grad_norm: 0.8604185673449403, iteration: 210322
loss: 0.9821831583976746,grad_norm: 0.9999997809083934, iteration: 210323
loss: 0.966833770275116,grad_norm: 0.8555919209073078, iteration: 210324
loss: 1.0041714906692505,grad_norm: 0.9999991058459116, iteration: 210325
loss: 1.0127301216125488,grad_norm: 0.8318339244056413, iteration: 210326
loss: 1.012302279472351,grad_norm: 0.9999991280833246, iteration: 210327
loss: 1.056241512298584,grad_norm: 1.0000000587104172, iteration: 210328
loss: 0.9971379637718201,grad_norm: 0.9999990470501376, iteration: 210329
loss: 1.0130634307861328,grad_norm: 0.8984351913964064, iteration: 210330
loss: 1.005706787109375,grad_norm: 0.858370311851513, iteration: 210331
loss: 1.0168167352676392,grad_norm: 0.9999991218833147, iteration: 210332
loss: 0.9992738962173462,grad_norm: 0.7322340979340982, iteration: 210333
loss: 0.9930996894836426,grad_norm: 0.9980927467598023, iteration: 210334
loss: 1.0360513925552368,grad_norm: 0.9999994351875198, iteration: 210335
loss: 1.0043649673461914,grad_norm: 0.7677593170015297, iteration: 210336
loss: 1.0011054277420044,grad_norm: 0.999999182759996, iteration: 210337
loss: 1.0247176885604858,grad_norm: 0.9999989644460449, iteration: 210338
loss: 0.9840728640556335,grad_norm: 0.9999989144735678, iteration: 210339
loss: 0.9912756681442261,grad_norm: 0.9999992068562993, iteration: 210340
loss: 0.9459447264671326,grad_norm: 0.9155923830443403, iteration: 210341
loss: 0.9637457132339478,grad_norm: 0.7624894003733073, iteration: 210342
loss: 1.0657298564910889,grad_norm: 0.9999999542408025, iteration: 210343
loss: 0.9862509965896606,grad_norm: 0.8758671934024641, iteration: 210344
loss: 1.0161967277526855,grad_norm: 0.8973884823683416, iteration: 210345
loss: 1.0018079280853271,grad_norm: 0.9924073347295486, iteration: 210346
loss: 1.0346301794052124,grad_norm: 0.9172303316358636, iteration: 210347
loss: 1.1341661214828491,grad_norm: 0.9999992283131303, iteration: 210348
loss: 1.0075775384902954,grad_norm: 0.9999991531424922, iteration: 210349
loss: 1.0138254165649414,grad_norm: 0.8762178053191847, iteration: 210350
loss: 0.9798067212104797,grad_norm: 0.9196008784279545, iteration: 210351
loss: 0.9929444193840027,grad_norm: 0.9461847381322989, iteration: 210352
loss: 1.004509449005127,grad_norm: 0.9879277199902139, iteration: 210353
loss: 1.0148512125015259,grad_norm: 0.8665517299743926, iteration: 210354
loss: 1.0058554410934448,grad_norm: 0.9999990692562917, iteration: 210355
loss: 0.9830970168113708,grad_norm: 0.9999991393306868, iteration: 210356
loss: 0.9949365258216858,grad_norm: 0.9973471178781079, iteration: 210357
loss: 0.9763433337211609,grad_norm: 0.9999991626304138, iteration: 210358
loss: 0.9991723895072937,grad_norm: 0.8094962348004312, iteration: 210359
loss: 1.0083651542663574,grad_norm: 0.920375321759936, iteration: 210360
loss: 1.013086199760437,grad_norm: 0.9754695739413879, iteration: 210361
loss: 0.9684581160545349,grad_norm: 0.9597016192416791, iteration: 210362
loss: 0.9916642308235168,grad_norm: 0.898168547813669, iteration: 210363
loss: 0.9856835007667542,grad_norm: 0.917542124203834, iteration: 210364
loss: 0.9977726340293884,grad_norm: 0.8818152887694886, iteration: 210365
loss: 0.9742603302001953,grad_norm: 0.9999991812452751, iteration: 210366
loss: 0.9816329479217529,grad_norm: 0.8731246104136137, iteration: 210367
loss: 0.9875380396842957,grad_norm: 0.9999990814427752, iteration: 210368
loss: 1.0031886100769043,grad_norm: 0.9999990582145662, iteration: 210369
loss: 0.9612517356872559,grad_norm: 0.8866203366063706, iteration: 210370
loss: 1.014785647392273,grad_norm: 0.8579397344269625, iteration: 210371
loss: 0.9891144633293152,grad_norm: 0.7770930924996681, iteration: 210372
loss: 1.0407241582870483,grad_norm: 0.9018758725937204, iteration: 210373
loss: 1.0235868692398071,grad_norm: 0.9999990470869908, iteration: 210374
loss: 0.990598738193512,grad_norm: 0.8062168108835067, iteration: 210375
loss: 1.013516902923584,grad_norm: 0.7892749324776116, iteration: 210376
loss: 0.9923754930496216,grad_norm: 0.9999992115611318, iteration: 210377
loss: 0.9794606566429138,grad_norm: 0.980993613257173, iteration: 210378
loss: 0.994013249874115,grad_norm: 0.9254348142797327, iteration: 210379
loss: 0.988145649433136,grad_norm: 0.9120246141038865, iteration: 210380
loss: 0.998801052570343,grad_norm: 0.9999991323265877, iteration: 210381
loss: 0.9597891569137573,grad_norm: 0.8265575286561533, iteration: 210382
loss: 0.9641836881637573,grad_norm: 0.9999993055170995, iteration: 210383
loss: 1.0031806230545044,grad_norm: 0.99999990921378, iteration: 210384
loss: 0.9915041923522949,grad_norm: 0.8261695935982026, iteration: 210385
loss: 1.0015143156051636,grad_norm: 0.9999991925286482, iteration: 210386
loss: 1.0262857675552368,grad_norm: 0.9999990733139231, iteration: 210387
loss: 1.0310304164886475,grad_norm: 0.9457728485819019, iteration: 210388
loss: 1.0180741548538208,grad_norm: 0.9999990401395024, iteration: 210389
loss: 1.0132466554641724,grad_norm: 0.9851285349833084, iteration: 210390
loss: 1.0060620307922363,grad_norm: 0.999999294840867, iteration: 210391
loss: 0.9988721013069153,grad_norm: 0.9378574107491662, iteration: 210392
loss: 0.980938196182251,grad_norm: 0.9999991929907419, iteration: 210393
loss: 1.0243250131607056,grad_norm: 0.9999989705411012, iteration: 210394
loss: 1.0722647905349731,grad_norm: 0.9999990716730229, iteration: 210395
loss: 0.9767464399337769,grad_norm: 0.9999994312656293, iteration: 210396
loss: 1.0091453790664673,grad_norm: 0.9041849051132133, iteration: 210397
loss: 0.9954624176025391,grad_norm: 0.9083540967637541, iteration: 210398
loss: 0.9761859178543091,grad_norm: 0.9714163756939722, iteration: 210399
loss: 0.9779353737831116,grad_norm: 0.8149283960299067, iteration: 210400
loss: 1.0033458471298218,grad_norm: 0.9844294655620573, iteration: 210401
loss: 1.0041862726211548,grad_norm: 0.9999989605843085, iteration: 210402
loss: 1.0007203817367554,grad_norm: 0.9470407392674817, iteration: 210403
loss: 0.9960190057754517,grad_norm: 0.9491595135597853, iteration: 210404
loss: 1.0281263589859009,grad_norm: 0.9043665932029961, iteration: 210405
loss: 1.026477336883545,grad_norm: 0.915242477505442, iteration: 210406
loss: 0.980238676071167,grad_norm: 0.8137429136839719, iteration: 210407
loss: 1.019904375076294,grad_norm: 0.9716338465862147, iteration: 210408
loss: 1.023693561553955,grad_norm: 0.8693910144425674, iteration: 210409
loss: 0.9896481037139893,grad_norm: 0.7976735031526421, iteration: 210410
loss: 1.0061700344085693,grad_norm: 0.9999990117662941, iteration: 210411
loss: 0.997669517993927,grad_norm: 0.7962763057103959, iteration: 210412
loss: 0.9822001457214355,grad_norm: 0.9999991616362329, iteration: 210413
loss: 0.987615168094635,grad_norm: 0.9832015684872737, iteration: 210414
loss: 0.9896818995475769,grad_norm: 0.8462534905052097, iteration: 210415
loss: 1.0228445529937744,grad_norm: 0.9129320946663279, iteration: 210416
loss: 0.9993523955345154,grad_norm: 0.9153141581307143, iteration: 210417
loss: 1.0071510076522827,grad_norm: 0.8516849897255527, iteration: 210418
loss: 0.9651872515678406,grad_norm: 0.9239550202310872, iteration: 210419
loss: 1.0018514394760132,grad_norm: 0.8180933240985554, iteration: 210420
loss: 0.9869106411933899,grad_norm: 0.9415071247707968, iteration: 210421
loss: 1.012527585029602,grad_norm: 0.9999991904124103, iteration: 210422
loss: 1.0088411569595337,grad_norm: 0.9999990465388305, iteration: 210423
loss: 0.9774399995803833,grad_norm: 0.9999991535089849, iteration: 210424
loss: 1.0036653280258179,grad_norm: 0.9999990159761067, iteration: 210425
loss: 0.9913842678070068,grad_norm: 0.9999991968392049, iteration: 210426
loss: 0.9856310486793518,grad_norm: 0.9154943600006779, iteration: 210427
loss: 1.023873209953308,grad_norm: 0.8354916240053557, iteration: 210428
loss: 0.9876032471656799,grad_norm: 0.8475422670273515, iteration: 210429
loss: 0.9681839346885681,grad_norm: 0.9999991871361061, iteration: 210430
loss: 0.9998995661735535,grad_norm: 0.9225611820736792, iteration: 210431
loss: 1.031468391418457,grad_norm: 0.9999997971438355, iteration: 210432
loss: 0.9975295662879944,grad_norm: 0.844696388399124, iteration: 210433
loss: 1.0137896537780762,grad_norm: 0.8860428882209466, iteration: 210434
loss: 0.9639752507209778,grad_norm: 0.7774448700118359, iteration: 210435
loss: 0.9352902770042419,grad_norm: 0.9418950602306857, iteration: 210436
loss: 0.9818967580795288,grad_norm: 0.989106624777396, iteration: 210437
loss: 0.9648693203926086,grad_norm: 0.7437072587841247, iteration: 210438
loss: 1.1972341537475586,grad_norm: 0.9999997605791322, iteration: 210439
loss: 0.9804132580757141,grad_norm: 0.7935218396742237, iteration: 210440
loss: 0.9744073152542114,grad_norm: 0.9999991785975203, iteration: 210441
loss: 0.9922451972961426,grad_norm: 0.8494220553353319, iteration: 210442
loss: 1.0000990629196167,grad_norm: 0.9999990554983469, iteration: 210443
loss: 0.978617250919342,grad_norm: 0.8801472845184023, iteration: 210444
loss: 0.9858154654502869,grad_norm: 0.9999992141700711, iteration: 210445
loss: 1.0422106981277466,grad_norm: 0.9753444255072621, iteration: 210446
loss: 1.0146310329437256,grad_norm: 0.7667233639866028, iteration: 210447
loss: 1.0017493963241577,grad_norm: 0.8272449672789027, iteration: 210448
loss: 1.0743627548217773,grad_norm: 0.9999989983032553, iteration: 210449
loss: 0.9614710211753845,grad_norm: 0.9391749627021472, iteration: 210450
loss: 1.089857578277588,grad_norm: 0.9999990511791861, iteration: 210451
loss: 0.9719882607460022,grad_norm: 0.7488229625582679, iteration: 210452
loss: 0.997846782207489,grad_norm: 0.9999992970242643, iteration: 210453
loss: 0.925096869468689,grad_norm: 0.999999027616279, iteration: 210454
loss: 1.0231261253356934,grad_norm: 0.999999173534491, iteration: 210455
loss: 1.0349843502044678,grad_norm: 0.9999998363539909, iteration: 210456
loss: 0.9506897926330566,grad_norm: 0.9999990307430503, iteration: 210457
loss: 0.9827983975410461,grad_norm: 0.9999991233859866, iteration: 210458
loss: 0.9998562932014465,grad_norm: 0.9178763200737705, iteration: 210459
loss: 1.0068269968032837,grad_norm: 0.9999990703922382, iteration: 210460
loss: 1.0117603540420532,grad_norm: 0.9811216219506786, iteration: 210461
loss: 0.987235426902771,grad_norm: 0.9922662700811662, iteration: 210462
loss: 1.0263805389404297,grad_norm: 0.9538926240123325, iteration: 210463
loss: 1.2596397399902344,grad_norm: 0.9999993118838721, iteration: 210464
loss: 1.0203125476837158,grad_norm: 0.9999993062984709, iteration: 210465
loss: 0.9716715216636658,grad_norm: 0.8444985607807795, iteration: 210466
loss: 1.3742047548294067,grad_norm: 0.9999994134191322, iteration: 210467
loss: 1.002648115158081,grad_norm: 0.9999991987694872, iteration: 210468
loss: 1.0065548419952393,grad_norm: 0.9999990411740846, iteration: 210469
loss: 1.0739988088607788,grad_norm: 0.9999990912932117, iteration: 210470
loss: 0.9869152307510376,grad_norm: 0.9999989948477018, iteration: 210471
loss: 0.9907839894294739,grad_norm: 0.9101206461683801, iteration: 210472
loss: 1.280747890472412,grad_norm: 0.999999625039761, iteration: 210473
loss: 0.9925096035003662,grad_norm: 0.9890927793032748, iteration: 210474
loss: 0.9921934604644775,grad_norm: 0.999999254324001, iteration: 210475
loss: 1.0021864175796509,grad_norm: 0.9123738787452859, iteration: 210476
loss: 0.9994402527809143,grad_norm: 0.9999991518112142, iteration: 210477
loss: 1.0464824438095093,grad_norm: 0.9999992930184751, iteration: 210478
loss: 1.0760198831558228,grad_norm: 0.9999993224017333, iteration: 210479
loss: 0.9510153532028198,grad_norm: 0.9999990974133921, iteration: 210480
loss: 1.0232312679290771,grad_norm: 0.9999991120411555, iteration: 210481
loss: 1.1939350366592407,grad_norm: 0.9999995244115343, iteration: 210482
loss: 0.9690764546394348,grad_norm: 0.9231441680113259, iteration: 210483
loss: 1.007398247718811,grad_norm: 0.9257365111032003, iteration: 210484
loss: 1.1253994703292847,grad_norm: 0.9999996097112739, iteration: 210485
loss: 0.9679497480392456,grad_norm: 0.8694799796941598, iteration: 210486
loss: 1.2293767929077148,grad_norm: 0.9999999269522174, iteration: 210487
loss: 1.0142602920532227,grad_norm: 0.9246966780846232, iteration: 210488
loss: 1.0023547410964966,grad_norm: 0.9615195678542896, iteration: 210489
loss: 1.1023051738739014,grad_norm: 0.9999992802772453, iteration: 210490
loss: 1.008133053779602,grad_norm: 0.9999992377036704, iteration: 210491
loss: 0.9589651823043823,grad_norm: 0.8978790843261295, iteration: 210492
loss: 0.9890381097793579,grad_norm: 0.9999989686857981, iteration: 210493
loss: 0.9929168224334717,grad_norm: 0.9694513984608194, iteration: 210494
loss: 1.0519875288009644,grad_norm: 0.999999286557628, iteration: 210495
loss: 0.9866185784339905,grad_norm: 0.8451859301072326, iteration: 210496
loss: 1.0009924173355103,grad_norm: 0.9999990814846854, iteration: 210497
loss: 1.0712896585464478,grad_norm: 0.9999997834946334, iteration: 210498
loss: 0.9508811831474304,grad_norm: 0.9969767075500323, iteration: 210499
loss: 0.9849079251289368,grad_norm: 0.9999991315412724, iteration: 210500
loss: 1.1002610921859741,grad_norm: 0.9999993859774037, iteration: 210501
loss: 1.2024284601211548,grad_norm: 0.99999956968118, iteration: 210502
loss: 1.0272057056427002,grad_norm: 0.9999991122253429, iteration: 210503
loss: 1.2678170204162598,grad_norm: 0.999999595938392, iteration: 210504
loss: 0.989499032497406,grad_norm: 0.9621261458883081, iteration: 210505
loss: 0.9675901532173157,grad_norm: 0.9490416206017828, iteration: 210506
loss: 1.0575166940689087,grad_norm: 0.9999998872325788, iteration: 210507
loss: 1.2684441804885864,grad_norm: 0.9999998831445753, iteration: 210508
loss: 1.344672679901123,grad_norm: 0.9999992583131155, iteration: 210509
loss: 1.3252134323120117,grad_norm: 0.9999995578610957, iteration: 210510
loss: 1.0634716749191284,grad_norm: 0.9999997003057537, iteration: 210511
loss: 1.4828945398330688,grad_norm: 0.9999995565181401, iteration: 210512
loss: 1.007136344909668,grad_norm: 0.9030028619395101, iteration: 210513
loss: 1.3301243782043457,grad_norm: 0.999999469427471, iteration: 210514
loss: 1.3890445232391357,grad_norm: 0.9999996889747, iteration: 210515
loss: 1.2628943920135498,grad_norm: 0.9999994694909351, iteration: 210516
loss: 1.2940268516540527,grad_norm: 0.9999992496413651, iteration: 210517
loss: 1.407887578010559,grad_norm: 0.9999996195943129, iteration: 210518
loss: 1.080970287322998,grad_norm: 1.0000000521022991, iteration: 210519
loss: 1.0123698711395264,grad_norm: 0.9762882559155729, iteration: 210520
loss: 1.0227794647216797,grad_norm: 0.9999990679525242, iteration: 210521
loss: 1.0057936906814575,grad_norm: 0.9258468727004847, iteration: 210522
loss: 1.054510235786438,grad_norm: 0.9999992561702209, iteration: 210523
loss: 1.4102637767791748,grad_norm: 0.9999995783029811, iteration: 210524
loss: 1.033230185508728,grad_norm: 0.9999997673793675, iteration: 210525
loss: 1.0009896755218506,grad_norm: 0.9999991817360996, iteration: 210526
loss: 1.0290535688400269,grad_norm: 0.9999993837905482, iteration: 210527
loss: 1.0554450750350952,grad_norm: 0.999999124528, iteration: 210528
loss: 1.0211089849472046,grad_norm: 0.9109266358468725, iteration: 210529
loss: 1.0135647058486938,grad_norm: 0.9582325014002662, iteration: 210530
loss: 1.0235402584075928,grad_norm: 0.8689085040747477, iteration: 210531
loss: 1.2064356803894043,grad_norm: 0.999999784021842, iteration: 210532
loss: 0.9937946200370789,grad_norm: 0.8460282022125769, iteration: 210533
loss: 1.0283788442611694,grad_norm: 0.9907067929503462, iteration: 210534
loss: 1.1154934167861938,grad_norm: 0.9999998482154876, iteration: 210535
loss: 1.0458619594573975,grad_norm: 0.9999993523965208, iteration: 210536
loss: 0.9902123212814331,grad_norm: 0.7056742440252638, iteration: 210537
loss: 1.056183934211731,grad_norm: 0.9999997442328162, iteration: 210538
loss: 1.015031099319458,grad_norm: 0.8292022092102206, iteration: 210539
loss: 0.9942524433135986,grad_norm: 0.9999991129095513, iteration: 210540
loss: 0.9921562075614929,grad_norm: 0.8264777243590448, iteration: 210541
loss: 1.0232919454574585,grad_norm: 0.9549094683073088, iteration: 210542
loss: 1.0044199228286743,grad_norm: 0.9999991466874651, iteration: 210543
loss: 1.1509250402450562,grad_norm: 0.9999992576022579, iteration: 210544
loss: 0.9998552203178406,grad_norm: 0.9824349796002024, iteration: 210545
loss: 1.255703330039978,grad_norm: 0.9999994235835086, iteration: 210546
loss: 0.9757912158966064,grad_norm: 0.9999989752201216, iteration: 210547
loss: 1.2514268159866333,grad_norm: 0.9999993063843307, iteration: 210548
loss: 1.2108209133148193,grad_norm: 0.9999994666785551, iteration: 210549
loss: 1.0865492820739746,grad_norm: 0.9999991239863635, iteration: 210550
loss: 1.0019534826278687,grad_norm: 0.9477342072579569, iteration: 210551
loss: 1.110504388809204,grad_norm: 0.9999998805600305, iteration: 210552
loss: 1.026071548461914,grad_norm: 0.9999994324518108, iteration: 210553
loss: 0.9752923250198364,grad_norm: 0.9999991102785211, iteration: 210554
loss: 0.9787116050720215,grad_norm: 0.9443686735862982, iteration: 210555
loss: 1.1655226945877075,grad_norm: 0.9999993471499808, iteration: 210556
loss: 1.0147778987884521,grad_norm: 0.8869938971671948, iteration: 210557
loss: 0.9899821877479553,grad_norm: 0.9999990425267683, iteration: 210558
loss: 1.1485466957092285,grad_norm: 0.9999997691942569, iteration: 210559
loss: 1.0212446451187134,grad_norm: 0.9999992843936677, iteration: 210560
loss: 1.008121132850647,grad_norm: 0.999999181252878, iteration: 210561
loss: 1.2622326612472534,grad_norm: 0.9999999064478743, iteration: 210562
loss: 0.9682183861732483,grad_norm: 0.9999993310198754, iteration: 210563
loss: 1.0642122030258179,grad_norm: 0.9999991448496088, iteration: 210564
loss: 0.9854700565338135,grad_norm: 0.9999989874488217, iteration: 210565
loss: 0.9906719326972961,grad_norm: 0.8682574813322957, iteration: 210566
loss: 0.9539018869400024,grad_norm: 0.960757592049891, iteration: 210567
loss: 1.0232449769973755,grad_norm: 0.9999993332054569, iteration: 210568
loss: 1.004575490951538,grad_norm: 0.9999990995677857, iteration: 210569
loss: 1.1013875007629395,grad_norm: 0.9999992374797294, iteration: 210570
loss: 1.034957766532898,grad_norm: 0.9999990761671198, iteration: 210571
loss: 1.0502433776855469,grad_norm: 0.999999325063975, iteration: 210572
loss: 0.9647639989852905,grad_norm: 0.9999990094400873, iteration: 210573
loss: 0.9989343285560608,grad_norm: 0.9999991673077915, iteration: 210574
loss: 1.053378939628601,grad_norm: 0.9999996688603988, iteration: 210575
loss: 1.0011422634124756,grad_norm: 0.9999989794231122, iteration: 210576
loss: 0.9618031978607178,grad_norm: 0.8600698508631304, iteration: 210577
loss: 1.1065988540649414,grad_norm: 0.9999992337112684, iteration: 210578
loss: 1.0369863510131836,grad_norm: 0.9999991824333763, iteration: 210579
loss: 1.0097612142562866,grad_norm: 0.9560827878633952, iteration: 210580
loss: 1.0112414360046387,grad_norm: 0.9999996785705186, iteration: 210581
loss: 0.9829942584037781,grad_norm: 0.945256680375312, iteration: 210582
loss: 1.0124930143356323,grad_norm: 0.8061266322456067, iteration: 210583
loss: 0.9917396306991577,grad_norm: 0.9999990638088938, iteration: 210584
loss: 0.9918838739395142,grad_norm: 0.9999993608689078, iteration: 210585
loss: 0.9828255772590637,grad_norm: 0.977953143896235, iteration: 210586
loss: 0.9695116281509399,grad_norm: 0.999999491448997, iteration: 210587
loss: 1.0161106586456299,grad_norm: 0.9999994225024388, iteration: 210588
loss: 0.9812626242637634,grad_norm: 0.9999991131471474, iteration: 210589
loss: 1.032509684562683,grad_norm: 0.9999995217346745, iteration: 210590
loss: 0.988878071308136,grad_norm: 0.9999991852550234, iteration: 210591
loss: 1.0345768928527832,grad_norm: 0.9999992264277409, iteration: 210592
loss: 1.0023548603057861,grad_norm: 0.9999994316623619, iteration: 210593
loss: 0.9706552028656006,grad_norm: 0.8185621981371652, iteration: 210594
loss: 0.9931873083114624,grad_norm: 0.9999990480031757, iteration: 210595
loss: 1.004329800605774,grad_norm: 0.9999991890646361, iteration: 210596
loss: 0.9872056841850281,grad_norm: 0.9645397324442964, iteration: 210597
loss: 1.0530263185501099,grad_norm: 0.9999988647934406, iteration: 210598
loss: 0.9825697541236877,grad_norm: 0.9999991554197561, iteration: 210599
loss: 1.0310345888137817,grad_norm: 0.999999660646712, iteration: 210600
loss: 1.004521131515503,grad_norm: 0.9006013627468958, iteration: 210601
loss: 1.0338786840438843,grad_norm: 0.9400955565202852, iteration: 210602
loss: 0.9908921718597412,grad_norm: 0.8900448824060152, iteration: 210603
loss: 1.0229493379592896,grad_norm: 0.9999990583085445, iteration: 210604
loss: 0.9866098761558533,grad_norm: 0.9342102153308125, iteration: 210605
loss: 0.9702292084693909,grad_norm: 0.862049716091499, iteration: 210606
loss: 0.9503692984580994,grad_norm: 0.9532244145571926, iteration: 210607
loss: 0.9990984797477722,grad_norm: 0.9999990696206573, iteration: 210608
loss: 1.0088263750076294,grad_norm: 0.9999990703343231, iteration: 210609
loss: 1.0421990156173706,grad_norm: 0.9999991783075896, iteration: 210610
loss: 1.074570894241333,grad_norm: 0.9999992611971095, iteration: 210611
loss: 1.00217604637146,grad_norm: 0.9028304005269856, iteration: 210612
loss: 1.0152060985565186,grad_norm: 0.9210815497976933, iteration: 210613
loss: 1.0335263013839722,grad_norm: 0.9999991261321605, iteration: 210614
loss: 1.0198744535446167,grad_norm: 0.9537829383404826, iteration: 210615
loss: 1.0188449621200562,grad_norm: 0.9504052779868668, iteration: 210616
loss: 1.1822686195373535,grad_norm: 0.9999998812363734, iteration: 210617
loss: 1.0566215515136719,grad_norm: 0.9881798028597061, iteration: 210618
loss: 0.9987947940826416,grad_norm: 0.9834192908390382, iteration: 210619
loss: 1.0040404796600342,grad_norm: 0.999998991036066, iteration: 210620
loss: 1.0396310091018677,grad_norm: 0.8857686306131306, iteration: 210621
loss: 1.0250188112258911,grad_norm: 0.9631727008841307, iteration: 210622
loss: 0.9875288009643555,grad_norm: 0.9510958844945031, iteration: 210623
loss: 1.0132259130477905,grad_norm: 0.9999991596224121, iteration: 210624
loss: 1.0142453908920288,grad_norm: 0.8103157495527695, iteration: 210625
loss: 1.1506547927856445,grad_norm: 0.9999996754372087, iteration: 210626
loss: 0.9981997013092041,grad_norm: 0.8923491666818878, iteration: 210627
loss: 1.0222344398498535,grad_norm: 0.9999992617727268, iteration: 210628
loss: 0.9916895627975464,grad_norm: 0.9999989343949066, iteration: 210629
loss: 1.0105332136154175,grad_norm: 0.9847163080673685, iteration: 210630
loss: 1.0188761949539185,grad_norm: 0.9999991422751763, iteration: 210631
loss: 1.0043718814849854,grad_norm: 0.9999990931778279, iteration: 210632
loss: 1.0006269216537476,grad_norm: 0.717339458562343, iteration: 210633
loss: 0.9903888702392578,grad_norm: 0.9999990816267232, iteration: 210634
loss: 1.0854204893112183,grad_norm: 0.9999991266781016, iteration: 210635
loss: 1.0115634202957153,grad_norm: 0.9999990570613272, iteration: 210636
loss: 1.0227570533752441,grad_norm: 0.9999992737486505, iteration: 210637
loss: 1.0754308700561523,grad_norm: 0.9999991869929019, iteration: 210638
loss: 1.0584263801574707,grad_norm: 0.9999996854094075, iteration: 210639
loss: 0.9856653213500977,grad_norm: 0.9736578799496294, iteration: 210640
loss: 1.0774891376495361,grad_norm: 0.9627129788208366, iteration: 210641
loss: 0.9918450117111206,grad_norm: 0.9999990281083521, iteration: 210642
loss: 1.0472900867462158,grad_norm: 0.9999990265842833, iteration: 210643
loss: 1.1275264024734497,grad_norm: 0.9999995763949019, iteration: 210644
loss: 1.0144578218460083,grad_norm: 0.9999991334944687, iteration: 210645
loss: 1.0051331520080566,grad_norm: 0.8272378943319559, iteration: 210646
loss: 0.9827796816825867,grad_norm: 0.9999991563243863, iteration: 210647
loss: 0.9985293745994568,grad_norm: 0.9999996683159503, iteration: 210648
loss: 0.9757305383682251,grad_norm: 0.9414073995896665, iteration: 210649
loss: 1.0061990022659302,grad_norm: 0.9967553270041768, iteration: 210650
loss: 1.0150809288024902,grad_norm: 0.9999994905640707, iteration: 210651
loss: 0.9809596538543701,grad_norm: 0.9999991529542105, iteration: 210652
loss: 0.9936341047286987,grad_norm: 0.9130958731050661, iteration: 210653
loss: 1.003035068511963,grad_norm: 0.9999992187440967, iteration: 210654
loss: 1.069373369216919,grad_norm: 0.9654053370244573, iteration: 210655
loss: 0.9993082284927368,grad_norm: 0.949976758426411, iteration: 210656
loss: 0.9701400995254517,grad_norm: 0.8678243509708929, iteration: 210657
loss: 1.0334844589233398,grad_norm: 0.8826124361301783, iteration: 210658
loss: 1.0150192975997925,grad_norm: 0.999999163932806, iteration: 210659
loss: 1.0207654237747192,grad_norm: 0.9999992155295175, iteration: 210660
loss: 1.0174864530563354,grad_norm: 0.938262048245031, iteration: 210661
loss: 0.9991951584815979,grad_norm: 0.8465796954356015, iteration: 210662
loss: 1.0225132703781128,grad_norm: 0.9999989690269645, iteration: 210663
loss: 1.0069122314453125,grad_norm: 0.8350325795811697, iteration: 210664
loss: 0.9967788457870483,grad_norm: 0.8028295324753815, iteration: 210665
loss: 0.9788910150527954,grad_norm: 0.9300583179115669, iteration: 210666
loss: 0.9805505871772766,grad_norm: 0.9999991137424687, iteration: 210667
loss: 1.0431478023529053,grad_norm: 0.9999995620810624, iteration: 210668
loss: 0.9920071363449097,grad_norm: 0.8582599547371623, iteration: 210669
loss: 1.0127955675125122,grad_norm: 0.8884482085221879, iteration: 210670
loss: 0.9812000393867493,grad_norm: 0.9285246187154671, iteration: 210671
loss: 0.9877036213874817,grad_norm: 0.9999999509166618, iteration: 210672
loss: 1.0039585828781128,grad_norm: 0.8331554807163667, iteration: 210673
loss: 0.9861820340156555,grad_norm: 0.99999892055835, iteration: 210674
loss: 0.9836686849594116,grad_norm: 0.9457213360309349, iteration: 210675
loss: 1.022408127784729,grad_norm: 0.7994383731883193, iteration: 210676
loss: 1.002021312713623,grad_norm: 0.9555842409421859, iteration: 210677
loss: 0.9916568994522095,grad_norm: 0.9908057055315074, iteration: 210678
loss: 1.0130809545516968,grad_norm: 0.9646392905448674, iteration: 210679
loss: 1.003922939300537,grad_norm: 0.911224937482748, iteration: 210680
loss: 0.9777708053588867,grad_norm: 0.9010945305725813, iteration: 210681
loss: 1.0308724641799927,grad_norm: 0.9999990477221793, iteration: 210682
loss: 0.9586479067802429,grad_norm: 0.9999991443633901, iteration: 210683
loss: 0.9945347309112549,grad_norm: 0.8483806336898557, iteration: 210684
loss: 0.9905145168304443,grad_norm: 0.9107781697508477, iteration: 210685
loss: 1.0023961067199707,grad_norm: 0.9999992147202568, iteration: 210686
loss: 1.0235246419906616,grad_norm: 0.9409767981566296, iteration: 210687
loss: 1.014778733253479,grad_norm: 0.9961300362494288, iteration: 210688
loss: 1.0828803777694702,grad_norm: 0.9999996066808332, iteration: 210689
loss: 0.9670022130012512,grad_norm: 0.9273344925239093, iteration: 210690
loss: 0.9740044474601746,grad_norm: 0.9999990362114951, iteration: 210691
loss: 0.9820901155471802,grad_norm: 0.9999992922768833, iteration: 210692
loss: 0.9803089499473572,grad_norm: 0.999999094896196, iteration: 210693
loss: 0.9814766645431519,grad_norm: 0.8741047243644815, iteration: 210694
loss: 1.0978219509124756,grad_norm: 0.9999990776349542, iteration: 210695
loss: 0.9930511713027954,grad_norm: 0.79207166251594, iteration: 210696
loss: 0.9758768677711487,grad_norm: 0.8893834143335162, iteration: 210697
loss: 1.0018213987350464,grad_norm: 0.8749935591903871, iteration: 210698
loss: 1.02780282497406,grad_norm: 0.9655614400029906, iteration: 210699
loss: 0.9859353303909302,grad_norm: 0.8725102071296957, iteration: 210700
loss: 1.0268644094467163,grad_norm: 0.9798762810811178, iteration: 210701
loss: 0.9835389256477356,grad_norm: 0.8871136125000201, iteration: 210702
loss: 1.0029308795928955,grad_norm: 0.7920739676443056, iteration: 210703
loss: 1.0072112083435059,grad_norm: 0.9532117604949879, iteration: 210704
loss: 0.9875985383987427,grad_norm: 0.9999990283847586, iteration: 210705
loss: 0.9871461987495422,grad_norm: 0.8913553846573053, iteration: 210706
loss: 1.0426496267318726,grad_norm: 0.9999993838908506, iteration: 210707
loss: 1.0322775840759277,grad_norm: 0.9999991132434829, iteration: 210708
loss: 1.0011640787124634,grad_norm: 0.8321067333852041, iteration: 210709
loss: 1.0123939514160156,grad_norm: 0.9999990965401755, iteration: 210710
loss: 1.0267863273620605,grad_norm: 0.9999990207156321, iteration: 210711
loss: 0.9986574053764343,grad_norm: 0.9416520367680292, iteration: 210712
loss: 0.9975696206092834,grad_norm: 0.9999990764998619, iteration: 210713
loss: 1.0271486043930054,grad_norm: 0.8651503289449556, iteration: 210714
loss: 1.228255271911621,grad_norm: 0.9999991395503429, iteration: 210715
loss: 1.0086097717285156,grad_norm: 0.8589539268198447, iteration: 210716
loss: 0.9889390468597412,grad_norm: 0.8843875889695814, iteration: 210717
loss: 0.9673181176185608,grad_norm: 0.9487755825451963, iteration: 210718
loss: 1.027725338935852,grad_norm: 0.9999992729646695, iteration: 210719
loss: 0.9969784021377563,grad_norm: 0.9275347945655846, iteration: 210720
loss: 0.9727303981781006,grad_norm: 0.9248917002669033, iteration: 210721
loss: 1.0002524852752686,grad_norm: 0.9019604585308009, iteration: 210722
loss: 1.017022728919983,grad_norm: 0.9999991871940953, iteration: 210723
loss: 0.9980370402336121,grad_norm: 0.9999991974040191, iteration: 210724
loss: 0.9565936923027039,grad_norm: 0.9999990604787503, iteration: 210725
loss: 1.004705548286438,grad_norm: 0.7020061502220517, iteration: 210726
loss: 0.9608674049377441,grad_norm: 0.883209893581577, iteration: 210727
loss: 1.0130027532577515,grad_norm: 0.9363289970711607, iteration: 210728
loss: 1.0387957096099854,grad_norm: 0.9999990816177244, iteration: 210729
loss: 0.975703239440918,grad_norm: 0.9233369923378243, iteration: 210730
loss: 1.028436541557312,grad_norm: 0.94574318046548, iteration: 210731
loss: 0.9847018122673035,grad_norm: 0.7935404288603785, iteration: 210732
loss: 1.0096232891082764,grad_norm: 0.9999993878133214, iteration: 210733
loss: 1.0083398818969727,grad_norm: 0.8369132218710323, iteration: 210734
loss: 0.9836744070053101,grad_norm: 0.9903519079571218, iteration: 210735
loss: 1.0490078926086426,grad_norm: 0.9999992189845072, iteration: 210736
loss: 0.9965901970863342,grad_norm: 0.9010256590684924, iteration: 210737
loss: 0.9445396065711975,grad_norm: 0.9012773613646484, iteration: 210738
loss: 1.0213847160339355,grad_norm: 0.8263096832196506, iteration: 210739
loss: 0.9764680862426758,grad_norm: 0.9059991751175708, iteration: 210740
loss: 0.9944884181022644,grad_norm: 0.8247758725386249, iteration: 210741
loss: 0.981814980506897,grad_norm: 0.9316035268440429, iteration: 210742
loss: 1.008456826210022,grad_norm: 0.8670208115307494, iteration: 210743
loss: 1.0025862455368042,grad_norm: 0.9999991052688123, iteration: 210744
loss: 0.9899630546569824,grad_norm: 0.9575270853060108, iteration: 210745
loss: 0.9886237382888794,grad_norm: 0.9004250803329563, iteration: 210746
loss: 1.0070195198059082,grad_norm: 0.9007064777804747, iteration: 210747
loss: 1.0260056257247925,grad_norm: 0.9999991324241848, iteration: 210748
loss: 0.977777361869812,grad_norm: 0.9018162712056476, iteration: 210749
loss: 0.9708543419837952,grad_norm: 0.8680736991719821, iteration: 210750
loss: 1.0315132141113281,grad_norm: 0.9999991317138817, iteration: 210751
loss: 0.9764810800552368,grad_norm: 0.8526268431153583, iteration: 210752
loss: 0.9963656663894653,grad_norm: 0.7940802439823911, iteration: 210753
loss: 0.9972377419471741,grad_norm: 0.9999990275938089, iteration: 210754
loss: 0.9954668283462524,grad_norm: 0.9964378620422942, iteration: 210755
loss: 1.0017632246017456,grad_norm: 0.9999992834953365, iteration: 210756
loss: 1.0150457620620728,grad_norm: 0.8611304931279976, iteration: 210757
loss: 0.9954847693443298,grad_norm: 0.7398911367685957, iteration: 210758
loss: 0.9976640939712524,grad_norm: 0.9262433713449681, iteration: 210759
loss: 1.0048149824142456,grad_norm: 0.9734610333704812, iteration: 210760
loss: 0.974696934223175,grad_norm: 0.7868935636396179, iteration: 210761
loss: 1.0099716186523438,grad_norm: 0.9999992362916458, iteration: 210762
loss: 1.0037881135940552,grad_norm: 0.9117444007783468, iteration: 210763
loss: 0.9852203726768494,grad_norm: 0.8600896348195183, iteration: 210764
loss: 1.00096595287323,grad_norm: 0.932859870451086, iteration: 210765
loss: 0.9916043281555176,grad_norm: 0.7340747839742346, iteration: 210766
loss: 1.000740885734558,grad_norm: 0.9288710380029387, iteration: 210767
loss: 0.9874231815338135,grad_norm: 0.9929731933410101, iteration: 210768
loss: 0.9610166549682617,grad_norm: 0.9663767921448112, iteration: 210769
loss: 1.0255815982818604,grad_norm: 0.8590640959544281, iteration: 210770
loss: 1.0135527849197388,grad_norm: 0.9615340528099625, iteration: 210771
loss: 0.9852058291435242,grad_norm: 0.9155167425577753, iteration: 210772
loss: 0.9961676597595215,grad_norm: 0.9103391251834445, iteration: 210773
loss: 0.9950153231620789,grad_norm: 0.9399594939181334, iteration: 210774
loss: 1.019967794418335,grad_norm: 0.945176454925976, iteration: 210775
loss: 0.9630972743034363,grad_norm: 0.9834341198961811, iteration: 210776
loss: 1.0066922903060913,grad_norm: 0.9033919519526773, iteration: 210777
loss: 0.9797987341880798,grad_norm: 0.9629309516540555, iteration: 210778
loss: 1.0334100723266602,grad_norm: 0.9999991905438077, iteration: 210779
loss: 0.9958815574645996,grad_norm: 0.7970738163695886, iteration: 210780
loss: 1.0059993267059326,grad_norm: 0.8549585578149014, iteration: 210781
loss: 0.9856107831001282,grad_norm: 0.9999990681274646, iteration: 210782
loss: 0.9948577880859375,grad_norm: 0.9999994433419704, iteration: 210783
loss: 1.0022200345993042,grad_norm: 0.9431008048599722, iteration: 210784
loss: 1.0276485681533813,grad_norm: 0.999999171599465, iteration: 210785
loss: 1.0220268964767456,grad_norm: 0.9750489843170207, iteration: 210786
loss: 0.9910860061645508,grad_norm: 0.999999061650193, iteration: 210787
loss: 0.9575784802436829,grad_norm: 0.8517085743338385, iteration: 210788
loss: 0.9564829468727112,grad_norm: 0.9224219151705917, iteration: 210789
loss: 0.9931720495223999,grad_norm: 0.9584118473400216, iteration: 210790
loss: 1.0206553936004639,grad_norm: 0.9999991025658589, iteration: 210791
loss: 1.0408252477645874,grad_norm: 0.8828920827337219, iteration: 210792
loss: 0.9981908202171326,grad_norm: 0.9999991498123827, iteration: 210793
loss: 1.0605065822601318,grad_norm: 0.9999991297910705, iteration: 210794
loss: 1.077143669128418,grad_norm: 0.9999997791019344, iteration: 210795
loss: 1.0045825242996216,grad_norm: 0.9999991619820704, iteration: 210796
loss: 0.9715719819068909,grad_norm: 0.9999990123079877, iteration: 210797
loss: 1.1438713073730469,grad_norm: 0.9999991421327914, iteration: 210798
loss: 1.0128194093704224,grad_norm: 0.9721180033087948, iteration: 210799
loss: 0.998574435710907,grad_norm: 0.8139779462395685, iteration: 210800
loss: 0.9986832737922668,grad_norm: 0.9999991509116393, iteration: 210801
loss: 0.9891992807388306,grad_norm: 0.9606843297431266, iteration: 210802
loss: 0.9882801175117493,grad_norm: 0.9788981382255644, iteration: 210803
loss: 1.0127861499786377,grad_norm: 0.9999990726441343, iteration: 210804
loss: 1.0242419242858887,grad_norm: 0.980358950788566, iteration: 210805
loss: 0.9832583069801331,grad_norm: 0.8852438890904114, iteration: 210806
loss: 1.0319796800613403,grad_norm: 0.9999996617965703, iteration: 210807
loss: 1.0146534442901611,grad_norm: 0.9274155110666755, iteration: 210808
loss: 1.0043193101882935,grad_norm: 0.8596106892463791, iteration: 210809
loss: 0.9975360631942749,grad_norm: 0.9999991837581623, iteration: 210810
loss: 1.0004911422729492,grad_norm: 0.9221879150131538, iteration: 210811
loss: 0.9834726452827454,grad_norm: 0.9201081268351671, iteration: 210812
loss: 0.9898143410682678,grad_norm: 0.8974334653764657, iteration: 210813
loss: 0.9607356786727905,grad_norm: 0.8142279720086119, iteration: 210814
loss: 1.0105857849121094,grad_norm: 0.9999991547876982, iteration: 210815
loss: 0.9961947202682495,grad_norm: 0.978444259060719, iteration: 210816
loss: 1.048838496208191,grad_norm: 0.9032428650666048, iteration: 210817
loss: 0.999549388885498,grad_norm: 0.8354663463025489, iteration: 210818
loss: 1.0044612884521484,grad_norm: 0.9999990943946659, iteration: 210819
loss: 1.03147554397583,grad_norm: 0.9999997563972194, iteration: 210820
loss: 0.9903638958930969,grad_norm: 0.8376792933042249, iteration: 210821
loss: 0.9873697757720947,grad_norm: 0.9999991331335325, iteration: 210822
loss: 1.0077723264694214,grad_norm: 0.9999991644183889, iteration: 210823
loss: 0.9991580247879028,grad_norm: 0.9999991158860014, iteration: 210824
loss: 1.0067089796066284,grad_norm: 0.9396714608666783, iteration: 210825
loss: 1.0150047540664673,grad_norm: 0.7957219065330554, iteration: 210826
loss: 1.0036520957946777,grad_norm: 0.7919863479690201, iteration: 210827
loss: 1.0069026947021484,grad_norm: 0.9573968873159903, iteration: 210828
loss: 1.0313551425933838,grad_norm: 0.9958559570685699, iteration: 210829
loss: 1.056956171989441,grad_norm: 0.845703065631648, iteration: 210830
loss: 1.0011786222457886,grad_norm: 0.9772073016340479, iteration: 210831
loss: 0.9883249402046204,grad_norm: 0.9333525476076254, iteration: 210832
loss: 1.005845546722412,grad_norm: 0.9504407452588556, iteration: 210833
loss: 0.9729400277137756,grad_norm: 0.9999991201404479, iteration: 210834
loss: 0.9629601240158081,grad_norm: 0.999999093776609, iteration: 210835
loss: 1.0237632989883423,grad_norm: 0.9999990196293438, iteration: 210836
loss: 0.97813481092453,grad_norm: 0.9450229543689191, iteration: 210837
loss: 0.9932844638824463,grad_norm: 0.99999892219679, iteration: 210838
loss: 0.99556964635849,grad_norm: 0.8495917811163285, iteration: 210839
loss: 0.9838042855262756,grad_norm: 0.9260050260106766, iteration: 210840
loss: 0.9972237348556519,grad_norm: 0.8548964651641642, iteration: 210841
loss: 0.9634225368499756,grad_norm: 0.9469924960503826, iteration: 210842
loss: 1.0244957208633423,grad_norm: 0.9999989857950548, iteration: 210843
loss: 0.9906864166259766,grad_norm: 0.999999172280582, iteration: 210844
loss: 0.9632157683372498,grad_norm: 0.7372360724321211, iteration: 210845
loss: 0.9915162920951843,grad_norm: 0.8465311624558761, iteration: 210846
loss: 1.0686473846435547,grad_norm: 0.9999992139637476, iteration: 210847
loss: 0.9900757074356079,grad_norm: 0.9897635169600746, iteration: 210848
loss: 1.0116713047027588,grad_norm: 0.9999990583446386, iteration: 210849
loss: 0.9712134003639221,grad_norm: 0.8684526489609682, iteration: 210850
loss: 1.00796639919281,grad_norm: 0.9780761473493715, iteration: 210851
loss: 0.9921358823776245,grad_norm: 0.9999989574741298, iteration: 210852
loss: 1.0168967247009277,grad_norm: 0.9999991064688356, iteration: 210853
loss: 0.9821085333824158,grad_norm: 0.9583267699508807, iteration: 210854
loss: 0.9902679324150085,grad_norm: 0.9938384539606443, iteration: 210855
loss: 0.9787482619285583,grad_norm: 0.999999170746657, iteration: 210856
loss: 1.0570943355560303,grad_norm: 0.9999995518312225, iteration: 210857
loss: 0.998202919960022,grad_norm: 0.999999593005877, iteration: 210858
loss: 0.9992837905883789,grad_norm: 0.8404991179621346, iteration: 210859
loss: 0.9689258337020874,grad_norm: 0.9892008219326304, iteration: 210860
loss: 1.090543508529663,grad_norm: 0.9852278497887985, iteration: 210861
loss: 1.0060285329818726,grad_norm: 0.9133000904226432, iteration: 210862
loss: 1.0100866556167603,grad_norm: 0.8127900445625202, iteration: 210863
loss: 0.998637855052948,grad_norm: 0.9413623534912822, iteration: 210864
loss: 1.0110692977905273,grad_norm: 0.9999991483168381, iteration: 210865
loss: 1.0219600200653076,grad_norm: 0.935639146564373, iteration: 210866
loss: 1.0128052234649658,grad_norm: 0.8524098341172289, iteration: 210867
loss: 0.9829450249671936,grad_norm: 0.8640696617139473, iteration: 210868
loss: 0.9590358734130859,grad_norm: 0.9223219180308202, iteration: 210869
loss: 0.9571051597595215,grad_norm: 0.9304954823358218, iteration: 210870
loss: 1.0114656686782837,grad_norm: 0.725419399472193, iteration: 210871
loss: 0.9921689629554749,grad_norm: 0.9999990966257201, iteration: 210872
loss: 0.9831281900405884,grad_norm: 0.9324943483524895, iteration: 210873
loss: 0.9842104911804199,grad_norm: 0.8994114244235026, iteration: 210874
loss: 0.9711509943008423,grad_norm: 0.9999990776973467, iteration: 210875
loss: 0.9997992515563965,grad_norm: 0.9744123736312208, iteration: 210876
loss: 0.9849193692207336,grad_norm: 0.8550577614137803, iteration: 210877
loss: 0.9844040870666504,grad_norm: 0.9999990474399169, iteration: 210878
loss: 0.9854329824447632,grad_norm: 0.8356030719937185, iteration: 210879
loss: 0.9898204803466797,grad_norm: 0.8417141666500391, iteration: 210880
loss: 0.9921073317527771,grad_norm: 0.903784597388117, iteration: 210881
loss: 1.0202906131744385,grad_norm: 0.9999995149737875, iteration: 210882
loss: 1.0081660747528076,grad_norm: 0.8094458638915035, iteration: 210883
loss: 0.9944853782653809,grad_norm: 0.9469758756636113, iteration: 210884
loss: 1.0333584547042847,grad_norm: 0.9691641210239831, iteration: 210885
loss: 1.008905053138733,grad_norm: 0.9993979028971204, iteration: 210886
loss: 0.9987978339195251,grad_norm: 0.8268458137711655, iteration: 210887
loss: 0.996924638748169,grad_norm: 0.9286184136549778, iteration: 210888
loss: 0.9967678785324097,grad_norm: 0.9604134588966478, iteration: 210889
loss: 1.0072392225265503,grad_norm: 0.985673734129244, iteration: 210890
loss: 1.0199556350708008,grad_norm: 0.9999992100022717, iteration: 210891
loss: 0.9717961549758911,grad_norm: 0.9999990340115024, iteration: 210892
loss: 1.0183768272399902,grad_norm: 0.8697281306080419, iteration: 210893
loss: 0.9748669266700745,grad_norm: 0.9260570710941057, iteration: 210894
loss: 0.9765986800193787,grad_norm: 0.9999990768136304, iteration: 210895
loss: 0.9715021848678589,grad_norm: 0.9999990504578828, iteration: 210896
loss: 1.0436360836029053,grad_norm: 0.8900625544508921, iteration: 210897
loss: 0.989799439907074,grad_norm: 0.9136762918030339, iteration: 210898
loss: 0.9737789034843445,grad_norm: 0.9522289831598861, iteration: 210899
loss: 1.031766414642334,grad_norm: 0.9999996233506028, iteration: 210900
loss: 0.9757866263389587,grad_norm: 0.752983350310805, iteration: 210901
loss: 0.9526935815811157,grad_norm: 0.9450866603219273, iteration: 210902
loss: 0.9592863917350769,grad_norm: 0.9155982812941074, iteration: 210903
loss: 0.9750556945800781,grad_norm: 0.9999991702155233, iteration: 210904
loss: 0.9983239769935608,grad_norm: 0.9999990922515962, iteration: 210905
loss: 0.9804008603096008,grad_norm: 0.874302087151634, iteration: 210906
loss: 1.007785677909851,grad_norm: 0.9999995304368163, iteration: 210907
loss: 1.0082921981811523,grad_norm: 0.9986169252678377, iteration: 210908
loss: 1.0331542491912842,grad_norm: 0.9999990782793263, iteration: 210909
loss: 0.9792914390563965,grad_norm: 0.9999990942444509, iteration: 210910
loss: 0.9642473459243774,grad_norm: 0.952244515173064, iteration: 210911
loss: 1.0420979261398315,grad_norm: 0.9999992228884272, iteration: 210912
loss: 1.0183115005493164,grad_norm: 0.9999993754620053, iteration: 210913
loss: 1.0463223457336426,grad_norm: 0.9596767902003507, iteration: 210914
loss: 1.006080150604248,grad_norm: 0.9999990043517814, iteration: 210915
loss: 1.0362064838409424,grad_norm: 0.8683328878854288, iteration: 210916
loss: 0.9994045495986938,grad_norm: 0.9999990285881254, iteration: 210917
loss: 1.0356252193450928,grad_norm: 0.9999992727270157, iteration: 210918
loss: 1.0035945177078247,grad_norm: 0.8189323133403752, iteration: 210919
loss: 0.9866265654563904,grad_norm: 0.9744125339513614, iteration: 210920
loss: 1.0143479108810425,grad_norm: 0.9999990013956076, iteration: 210921
loss: 0.9908220767974854,grad_norm: 0.9436070617019388, iteration: 210922
loss: 1.0371637344360352,grad_norm: 0.9999991823231053, iteration: 210923
loss: 1.017370343208313,grad_norm: 0.9256839263147003, iteration: 210924
loss: 1.0012375116348267,grad_norm: 0.9999995281750491, iteration: 210925
loss: 1.0027978420257568,grad_norm: 0.9999989993294094, iteration: 210926
loss: 1.0166054964065552,grad_norm: 0.9660694699625367, iteration: 210927
loss: 1.0373473167419434,grad_norm: 0.9999998197513902, iteration: 210928
loss: 1.0004842281341553,grad_norm: 0.9694020689144273, iteration: 210929
loss: 1.0171027183532715,grad_norm: 0.9904948576068664, iteration: 210930
loss: 1.0325639247894287,grad_norm: 0.9999990033223498, iteration: 210931
loss: 0.9997785091400146,grad_norm: 0.905431165174372, iteration: 210932
loss: 0.9723614454269409,grad_norm: 0.9999990934671721, iteration: 210933
loss: 0.9732279777526855,grad_norm: 0.879832708229148, iteration: 210934
loss: 1.03640878200531,grad_norm: 0.8357540223708069, iteration: 210935
loss: 0.9663028717041016,grad_norm: 0.875169825174837, iteration: 210936
loss: 1.001070261001587,grad_norm: 0.8239029515097163, iteration: 210937
loss: 1.009845495223999,grad_norm: 0.8628475679451938, iteration: 210938
loss: 0.9889695048332214,grad_norm: 0.9542998541589854, iteration: 210939
loss: 0.980165421962738,grad_norm: 0.9999991438664048, iteration: 210940
loss: 0.9983913898468018,grad_norm: 0.910101048101862, iteration: 210941
loss: 1.0023058652877808,grad_norm: 0.8589513211621381, iteration: 210942
loss: 0.9771983027458191,grad_norm: 0.8802120778033887, iteration: 210943
loss: 0.9918555617332458,grad_norm: 0.7424101585785907, iteration: 210944
loss: 1.0083515644073486,grad_norm: 0.9999991820586915, iteration: 210945
loss: 1.0673154592514038,grad_norm: 1.0000000444317687, iteration: 210946
loss: 1.034591555595398,grad_norm: 0.9999990843833247, iteration: 210947
loss: 1.0563386678695679,grad_norm: 0.999999176684029, iteration: 210948
loss: 0.9501059651374817,grad_norm: 0.8381370155071155, iteration: 210949
loss: 0.9964287877082825,grad_norm: 0.8120693872878534, iteration: 210950
loss: 0.9870853424072266,grad_norm: 0.9999991530143233, iteration: 210951
loss: 0.9898543357849121,grad_norm: 0.9439000657591827, iteration: 210952
loss: 1.0282070636749268,grad_norm: 0.9142751415257169, iteration: 210953
loss: 0.9935734272003174,grad_norm: 0.9817328819503379, iteration: 210954
loss: 1.0212267637252808,grad_norm: 0.8457236722379948, iteration: 210955
loss: 0.9947041869163513,grad_norm: 0.8290879257831649, iteration: 210956
loss: 0.9909641742706299,grad_norm: 0.9730666731527667, iteration: 210957
loss: 0.9797210693359375,grad_norm: 0.8852890005085847, iteration: 210958
loss: 0.9874861240386963,grad_norm: 0.9993076385784551, iteration: 210959
loss: 1.0167717933654785,grad_norm: 0.999999243813267, iteration: 210960
loss: 0.9936815500259399,grad_norm: 0.8547389083485607, iteration: 210961
loss: 0.9870984554290771,grad_norm: 0.9999991010284296, iteration: 210962
loss: 1.026124119758606,grad_norm: 0.917270028010237, iteration: 210963
loss: 1.0160293579101562,grad_norm: 0.9818700909834799, iteration: 210964
loss: 0.957842230796814,grad_norm: 0.9999990944418545, iteration: 210965
loss: 1.0061378479003906,grad_norm: 0.9270654414154731, iteration: 210966
loss: 0.9950609803199768,grad_norm: 0.9299267768397866, iteration: 210967
loss: 1.0547327995300293,grad_norm: 0.9999994662728452, iteration: 210968
loss: 1.0170786380767822,grad_norm: 0.8653531007067629, iteration: 210969
loss: 1.0024991035461426,grad_norm: 0.9999990370643895, iteration: 210970
loss: 0.9755859971046448,grad_norm: 0.840933080059082, iteration: 210971
loss: 1.020138144493103,grad_norm: 0.9719121288904797, iteration: 210972
loss: 0.963844895362854,grad_norm: 0.9072830506429369, iteration: 210973
loss: 0.9937889575958252,grad_norm: 0.8886612665076333, iteration: 210974
loss: 1.0317223072052002,grad_norm: 0.9822010223930859, iteration: 210975
loss: 0.9749835729598999,grad_norm: 0.8965256022456904, iteration: 210976
loss: 0.9867527484893799,grad_norm: 0.9999991202876175, iteration: 210977
loss: 0.9789792895317078,grad_norm: 0.9999990558776076, iteration: 210978
loss: 0.9728314280509949,grad_norm: 0.9698573428139565, iteration: 210979
loss: 0.9857096076011658,grad_norm: 0.9999990220404807, iteration: 210980
loss: 1.0034507513046265,grad_norm: 0.8140178553026348, iteration: 210981
loss: 0.9796950221061707,grad_norm: 0.9560035790537406, iteration: 210982
loss: 0.9879021644592285,grad_norm: 0.8966684782143988, iteration: 210983
loss: 0.990718424320221,grad_norm: 0.9999994755040571, iteration: 210984
loss: 1.027353048324585,grad_norm: 0.9999993574518443, iteration: 210985
loss: 0.9880714416503906,grad_norm: 0.8230241252665, iteration: 210986
loss: 1.0176711082458496,grad_norm: 0.999999589643945, iteration: 210987
loss: 1.0112643241882324,grad_norm: 0.9999991610470415, iteration: 210988
loss: 0.9999170899391174,grad_norm: 0.9073080165429827, iteration: 210989
loss: 1.0071024894714355,grad_norm: 0.9999990080712994, iteration: 210990
loss: 1.027481198310852,grad_norm: 0.9999990889442374, iteration: 210991
loss: 0.9788985848426819,grad_norm: 0.8001510011381937, iteration: 210992
loss: 1.00052809715271,grad_norm: 0.8744325182331194, iteration: 210993
loss: 0.9645641446113586,grad_norm: 0.8518861714855773, iteration: 210994
loss: 0.9708954095840454,grad_norm: 0.9999990290940608, iteration: 210995
loss: 0.9951090216636658,grad_norm: 0.8968302310126307, iteration: 210996
loss: 0.9959020614624023,grad_norm: 0.8683897007775485, iteration: 210997
loss: 0.9743961095809937,grad_norm: 0.9999991923405056, iteration: 210998
loss: 1.018729329109192,grad_norm: 0.8726746281969708, iteration: 210999
loss: 1.0147498846054077,grad_norm: 0.804225976780701, iteration: 211000
loss: 0.9976053237915039,grad_norm: 0.9999992883286912, iteration: 211001
loss: 0.9891805052757263,grad_norm: 0.8169383916497246, iteration: 211002
loss: 0.9799787998199463,grad_norm: 0.999999418816285, iteration: 211003
loss: 0.9876565933227539,grad_norm: 0.9780670641115871, iteration: 211004
loss: 1.0785175561904907,grad_norm: 0.9999998618303003, iteration: 211005
loss: 0.9712410569190979,grad_norm: 0.8603128477251688, iteration: 211006
loss: 0.9866988658905029,grad_norm: 0.9014704765475081, iteration: 211007
loss: 0.9880553483963013,grad_norm: 0.8924819865009699, iteration: 211008
loss: 1.0807206630706787,grad_norm: 0.9999991294793257, iteration: 211009
loss: 0.9838230609893799,grad_norm: 0.9999991355024868, iteration: 211010
loss: 0.9697177410125732,grad_norm: 0.8808715629327766, iteration: 211011
loss: 1.0118218660354614,grad_norm: 0.8757374520722515, iteration: 211012
loss: 0.9941495656967163,grad_norm: 0.8772170193260054, iteration: 211013
loss: 1.0323007106781006,grad_norm: 0.9799473312978023, iteration: 211014
loss: 1.0368883609771729,grad_norm: 0.9999993226679768, iteration: 211015
loss: 1.0167902708053589,grad_norm: 0.810659504452713, iteration: 211016
loss: 0.9994668364524841,grad_norm: 0.9671908969510749, iteration: 211017
loss: 1.0141658782958984,grad_norm: 0.882646041495836, iteration: 211018
loss: 0.9963899850845337,grad_norm: 0.9204007181688385, iteration: 211019
loss: 0.9965059161186218,grad_norm: 0.88433322168861, iteration: 211020
loss: 1.0533119440078735,grad_norm: 0.9999991358643316, iteration: 211021
loss: 0.9959923624992371,grad_norm: 0.95382455760625, iteration: 211022
loss: 0.9891525506973267,grad_norm: 0.9184080998448093, iteration: 211023
loss: 1.0339436531066895,grad_norm: 0.9999989630848737, iteration: 211024
loss: 1.0134562253952026,grad_norm: 0.8535026116925493, iteration: 211025
loss: 0.9807242155075073,grad_norm: 0.9999993522672836, iteration: 211026
loss: 0.9674580693244934,grad_norm: 0.8647745992172655, iteration: 211027
loss: 1.0227696895599365,grad_norm: 0.9999992169693832, iteration: 211028
loss: 1.021217942237854,grad_norm: 0.9855747003857103, iteration: 211029
loss: 0.9895693063735962,grad_norm: 0.9734686807301388, iteration: 211030
loss: 1.0064456462860107,grad_norm: 0.999999115475886, iteration: 211031
loss: 0.9901551604270935,grad_norm: 0.9907414765646814, iteration: 211032
loss: 0.9630956053733826,grad_norm: 0.9999992170334111, iteration: 211033
loss: 0.9660772085189819,grad_norm: 0.8664929762934538, iteration: 211034
loss: 0.9841662049293518,grad_norm: 0.9936577842989797, iteration: 211035
loss: 0.9832667112350464,grad_norm: 0.9999991361564347, iteration: 211036
loss: 1.0019570589065552,grad_norm: 0.9037421866912071, iteration: 211037
loss: 1.0100224018096924,grad_norm: 0.9115325109326298, iteration: 211038
loss: 1.0137579441070557,grad_norm: 0.9842123007560132, iteration: 211039
loss: 1.0324962139129639,grad_norm: 0.8574310971773875, iteration: 211040
loss: 0.9883233308792114,grad_norm: 0.9763092287343219, iteration: 211041
loss: 1.016870379447937,grad_norm: 0.9999989752519282, iteration: 211042
loss: 1.0078192949295044,grad_norm: 0.7911789080601132, iteration: 211043
loss: 0.9828833341598511,grad_norm: 0.9306798554140576, iteration: 211044
loss: 0.9977478981018066,grad_norm: 0.8181588653030643, iteration: 211045
loss: 1.0041911602020264,grad_norm: 0.9546611240923396, iteration: 211046
loss: 0.9904451966285706,grad_norm: 0.7714000247511956, iteration: 211047
loss: 0.9851803183555603,grad_norm: 0.9999989304486241, iteration: 211048
loss: 1.0541787147521973,grad_norm: 0.9454786297120319, iteration: 211049
loss: 0.9877141714096069,grad_norm: 0.9999993510139253, iteration: 211050
loss: 1.0064125061035156,grad_norm: 0.9035106630698914, iteration: 211051
loss: 0.9857566356658936,grad_norm: 0.9999991228641015, iteration: 211052
loss: 1.0369981527328491,grad_norm: 0.9246250356526573, iteration: 211053
loss: 1.0312604904174805,grad_norm: 0.8396227331579729, iteration: 211054
loss: 1.003202199935913,grad_norm: 0.99999923407964, iteration: 211055
loss: 0.9914575815200806,grad_norm: 0.8921601444038124, iteration: 211056
loss: 0.9985179901123047,grad_norm: 0.9631918602686003, iteration: 211057
loss: 0.997707724571228,grad_norm: 0.9999990140513207, iteration: 211058
loss: 0.9979099035263062,grad_norm: 0.9120858469026267, iteration: 211059
loss: 0.993782639503479,grad_norm: 0.9999991536003806, iteration: 211060
loss: 1.0279300212860107,grad_norm: 0.9451645198829863, iteration: 211061
loss: 1.0268157720565796,grad_norm: 0.9999997136145932, iteration: 211062
loss: 1.0230066776275635,grad_norm: 0.9999991113892331, iteration: 211063
loss: 1.0128251314163208,grad_norm: 0.9178992503705661, iteration: 211064
loss: 1.006640911102295,grad_norm: 0.858441739819288, iteration: 211065
loss: 1.002070426940918,grad_norm: 0.8842895148169887, iteration: 211066
loss: 0.9864976406097412,grad_norm: 0.896545668742358, iteration: 211067
loss: 1.0235694646835327,grad_norm: 0.7948864475004314, iteration: 211068
loss: 0.9709315896034241,grad_norm: 0.932240118734324, iteration: 211069
loss: 1.0986682176589966,grad_norm: 0.9999992145555093, iteration: 211070
loss: 0.9940486550331116,grad_norm: 0.7531822774493537, iteration: 211071
loss: 1.0236903429031372,grad_norm: 0.9706866349785902, iteration: 211072
loss: 1.0148494243621826,grad_norm: 0.9560360313636513, iteration: 211073
loss: 0.9710760712623596,grad_norm: 0.8260073928599012, iteration: 211074
loss: 0.9920905828475952,grad_norm: 0.9768967254686711, iteration: 211075
loss: 1.0196170806884766,grad_norm: 0.8967040178999673, iteration: 211076
loss: 1.0261800289154053,grad_norm: 0.7713598179896513, iteration: 211077
loss: 1.0183796882629395,grad_norm: 0.8501669914788208, iteration: 211078
loss: 0.9569221138954163,grad_norm: 0.9128909228027785, iteration: 211079
loss: 0.9814704656600952,grad_norm: 0.9114114703790417, iteration: 211080
loss: 0.9724559783935547,grad_norm: 0.9223480984817026, iteration: 211081
loss: 1.0338298082351685,grad_norm: 0.8361680232010796, iteration: 211082
loss: 1.0078234672546387,grad_norm: 0.9999991891530533, iteration: 211083
loss: 0.9811327457427979,grad_norm: 0.8935223450673317, iteration: 211084
loss: 0.9572265148162842,grad_norm: 0.9525890007083962, iteration: 211085
loss: 1.0202364921569824,grad_norm: 0.9103526033329188, iteration: 211086
loss: 0.991244375705719,grad_norm: 0.8929116681653105, iteration: 211087
loss: 1.0042788982391357,grad_norm: 0.9999991974983536, iteration: 211088
loss: 1.011522650718689,grad_norm: 0.8960408838138408, iteration: 211089
loss: 1.0331321954727173,grad_norm: 0.9705155965862932, iteration: 211090
loss: 1.1401914358139038,grad_norm: 0.9999990277781781, iteration: 211091
loss: 1.0111061334609985,grad_norm: 0.9999991593019969, iteration: 211092
loss: 1.011069893836975,grad_norm: 0.9351394010337504, iteration: 211093
loss: 1.0392876863479614,grad_norm: 0.9448335182383029, iteration: 211094
loss: 0.981084406375885,grad_norm: 0.9984191087258001, iteration: 211095
loss: 1.003056287765503,grad_norm: 0.909934663737394, iteration: 211096
loss: 1.0382254123687744,grad_norm: 0.9999999575638487, iteration: 211097
loss: 1.0073126554489136,grad_norm: 0.9396449350896483, iteration: 211098
loss: 1.0391672849655151,grad_norm: 0.8081224353007195, iteration: 211099
loss: 0.9403393864631653,grad_norm: 0.985629139298775, iteration: 211100
loss: 0.9763543009757996,grad_norm: 0.8605109834618792, iteration: 211101
loss: 0.9772377610206604,grad_norm: 0.8833561893368608, iteration: 211102
loss: 1.0397465229034424,grad_norm: 0.9999994256160268, iteration: 211103
loss: 1.0210570096969604,grad_norm: 0.99999900949841, iteration: 211104
loss: 1.0367836952209473,grad_norm: 0.951125993467792, iteration: 211105
loss: 1.0172672271728516,grad_norm: 0.9999991497745796, iteration: 211106
loss: 0.9866994023323059,grad_norm: 0.7956016304692183, iteration: 211107
loss: 1.105947732925415,grad_norm: 0.9999997343168012, iteration: 211108
loss: 1.025206446647644,grad_norm: 0.8246684324505532, iteration: 211109
loss: 1.02560555934906,grad_norm: 0.9999990981445503, iteration: 211110
loss: 1.0033079385757446,grad_norm: 0.9999989945048773, iteration: 211111
loss: 0.9967053532600403,grad_norm: 0.8909257268687992, iteration: 211112
loss: 1.0207728147506714,grad_norm: 0.9999991190244807, iteration: 211113
loss: 1.041770100593567,grad_norm: 0.9373752783718523, iteration: 211114
loss: 0.979550302028656,grad_norm: 0.8424160813972676, iteration: 211115
loss: 0.9665035009384155,grad_norm: 0.9837203448413767, iteration: 211116
loss: 0.9817187190055847,grad_norm: 0.855411236114043, iteration: 211117
loss: 1.013123869895935,grad_norm: 0.9999992397690182, iteration: 211118
loss: 1.043269157409668,grad_norm: 0.9999993214696359, iteration: 211119
loss: 0.9707542061805725,grad_norm: 0.7208710436015987, iteration: 211120
loss: 0.9990462064743042,grad_norm: 0.9391877877102737, iteration: 211121
loss: 0.9862655401229858,grad_norm: 0.9999990454380494, iteration: 211122
loss: 0.9977704286575317,grad_norm: 0.8795528752694003, iteration: 211123
loss: 1.0266155004501343,grad_norm: 0.948689133368299, iteration: 211124
loss: 1.0367112159729004,grad_norm: 0.9999994464629975, iteration: 211125
loss: 1.027239203453064,grad_norm: 0.8045063535563729, iteration: 211126
loss: 0.9859157204627991,grad_norm: 0.9999990778720912, iteration: 211127
loss: 0.9970310926437378,grad_norm: 0.7995075121675053, iteration: 211128
loss: 1.053185224533081,grad_norm: 0.9837052737284961, iteration: 211129
loss: 0.9822074770927429,grad_norm: 0.9999990515797385, iteration: 211130
loss: 0.9671289920806885,grad_norm: 0.9999991501328848, iteration: 211131
loss: 1.0264939069747925,grad_norm: 0.8590736901375526, iteration: 211132
loss: 0.9692716002464294,grad_norm: 0.9999998423740787, iteration: 211133
loss: 0.9982815384864807,grad_norm: 0.9999992075069648, iteration: 211134
loss: 0.9632426500320435,grad_norm: 0.9382081747223141, iteration: 211135
loss: 1.0116456747055054,grad_norm: 0.9507914998629555, iteration: 211136
loss: 0.9904059767723083,grad_norm: 0.9999991250011543, iteration: 211137
loss: 1.0050896406173706,grad_norm: 0.9017271449235095, iteration: 211138
loss: 0.9886561036109924,grad_norm: 0.8262921615782675, iteration: 211139
loss: 1.02878737449646,grad_norm: 0.9999991230058806, iteration: 211140
loss: 1.0042545795440674,grad_norm: 0.9999989270920068, iteration: 211141
loss: 1.0099081993103027,grad_norm: 0.999998950065257, iteration: 211142
loss: 1.0001364946365356,grad_norm: 0.9716648946793633, iteration: 211143
loss: 0.967197835445404,grad_norm: 0.9691882542055129, iteration: 211144
loss: 1.0217725038528442,grad_norm: 0.9999992164961446, iteration: 211145
loss: 0.9951337575912476,grad_norm: 0.9999992627336727, iteration: 211146
loss: 1.0263482332229614,grad_norm: 0.9430975135371817, iteration: 211147
loss: 0.9585980176925659,grad_norm: 0.9255814516098653, iteration: 211148
loss: 0.9972053170204163,grad_norm: 0.9999994795616974, iteration: 211149
loss: 0.9922245740890503,grad_norm: 0.8135750700440894, iteration: 211150
loss: 0.9647267460823059,grad_norm: 0.9999992546181021, iteration: 211151
loss: 1.0055636167526245,grad_norm: 0.9999991429843503, iteration: 211152
loss: 0.9910638332366943,grad_norm: 0.9999990102660131, iteration: 211153
loss: 0.977908730506897,grad_norm: 0.8686100646294003, iteration: 211154
loss: 0.9495378732681274,grad_norm: 0.9999990793134595, iteration: 211155
loss: 0.9712291955947876,grad_norm: 0.9999991410877552, iteration: 211156
loss: 0.971379816532135,grad_norm: 0.7978020269082124, iteration: 211157
loss: 1.0108784437179565,grad_norm: 0.9999990499156541, iteration: 211158
loss: 1.0177284479141235,grad_norm: 0.8087887225514886, iteration: 211159
loss: 0.9912360906600952,grad_norm: 0.9999991912927102, iteration: 211160
loss: 1.0197124481201172,grad_norm: 0.8224843872056149, iteration: 211161
loss: 1.024530053138733,grad_norm: 0.9423718328447876, iteration: 211162
loss: 0.9881564974784851,grad_norm: 0.9406456057408378, iteration: 211163
loss: 1.0238518714904785,grad_norm: 0.9502205072768359, iteration: 211164
loss: 1.0100657939910889,grad_norm: 0.808359038702586, iteration: 211165
loss: 0.9745744466781616,grad_norm: 0.8917238196124091, iteration: 211166
loss: 0.98430997133255,grad_norm: 0.9114223562666022, iteration: 211167
loss: 0.9817069172859192,grad_norm: 0.8153117608043132, iteration: 211168
loss: 1.0166112184524536,grad_norm: 0.9129003339405647, iteration: 211169
loss: 1.0130091905593872,grad_norm: 0.9926726916590014, iteration: 211170
loss: 1.0095863342285156,grad_norm: 0.967315296856863, iteration: 211171
loss: 1.0118952989578247,grad_norm: 0.8965813151552826, iteration: 211172
loss: 1.031853199005127,grad_norm: 0.8708513985063631, iteration: 211173
loss: 0.988779604434967,grad_norm: 0.9999990249472547, iteration: 211174
loss: 1.059302568435669,grad_norm: 0.9999992180389876, iteration: 211175
loss: 0.9927158355712891,grad_norm: 0.8847784244632161, iteration: 211176
loss: 1.0409153699874878,grad_norm: 0.9058671998275285, iteration: 211177
loss: 0.9997066855430603,grad_norm: 0.9048128422061995, iteration: 211178
loss: 0.9486649036407471,grad_norm: 0.8727918803483387, iteration: 211179
loss: 1.000045657157898,grad_norm: 0.9999991870972629, iteration: 211180
loss: 1.0116894245147705,grad_norm: 0.9999991059732833, iteration: 211181
loss: 0.9978443384170532,grad_norm: 0.8211700869447786, iteration: 211182
loss: 1.0007210969924927,grad_norm: 0.9041410643751385, iteration: 211183
loss: 1.0111563205718994,grad_norm: 0.9093725407097978, iteration: 211184
loss: 1.0043673515319824,grad_norm: 0.7254582179563531, iteration: 211185
loss: 1.0184707641601562,grad_norm: 0.9327820313203464, iteration: 211186
loss: 0.9867811799049377,grad_norm: 0.9999991800733629, iteration: 211187
loss: 0.9862014651298523,grad_norm: 0.9870790050269785, iteration: 211188
loss: 0.9766433835029602,grad_norm: 0.8071133261486838, iteration: 211189
loss: 0.9986052513122559,grad_norm: 0.9999988487979579, iteration: 211190
loss: 0.9779841899871826,grad_norm: 0.9999992588415776, iteration: 211191
loss: 0.9990183115005493,grad_norm: 0.9405023353733886, iteration: 211192
loss: 0.9975599050521851,grad_norm: 0.9999990453943763, iteration: 211193
loss: 1.0137617588043213,grad_norm: 0.9999990656340709, iteration: 211194
loss: 0.9944233894348145,grad_norm: 0.8741489868141431, iteration: 211195
loss: 0.9903494119644165,grad_norm: 0.9081439307054084, iteration: 211196
loss: 1.0093013048171997,grad_norm: 0.8990588625578451, iteration: 211197
loss: 1.029043436050415,grad_norm: 0.9999991549255066, iteration: 211198
loss: 1.0118557214736938,grad_norm: 0.9999990529551089, iteration: 211199
loss: 0.9912787079811096,grad_norm: 0.9169059384370606, iteration: 211200
loss: 1.0202723741531372,grad_norm: 0.9450498924968539, iteration: 211201
loss: 1.0109448432922363,grad_norm: 0.9987542267147931, iteration: 211202
loss: 1.0347002744674683,grad_norm: 0.8941470596292622, iteration: 211203
loss: 1.0119391679763794,grad_norm: 0.9351374169233205, iteration: 211204
loss: 0.9996773600578308,grad_norm: 0.7263873628201113, iteration: 211205
loss: 0.9992045760154724,grad_norm: 0.9731629226722258, iteration: 211206
loss: 0.971046507358551,grad_norm: 0.997581476117421, iteration: 211207
loss: 1.03266179561615,grad_norm: 0.9755723108474861, iteration: 211208
loss: 0.9776775240898132,grad_norm: 0.9746887800032128, iteration: 211209
loss: 1.0363816022872925,grad_norm: 0.984044147816632, iteration: 211210
loss: 1.0220903158187866,grad_norm: 0.9632174547413266, iteration: 211211
loss: 1.004437804222107,grad_norm: 0.8327423773561385, iteration: 211212
loss: 1.0184208154678345,grad_norm: 0.9735167479590021, iteration: 211213
loss: 1.0013840198516846,grad_norm: 0.9999990679195493, iteration: 211214
loss: 1.0072380304336548,grad_norm: 0.9999991512875824, iteration: 211215
loss: 1.0232001543045044,grad_norm: 0.7329128044608699, iteration: 211216
loss: 1.0002557039260864,grad_norm: 0.9462170168084634, iteration: 211217
loss: 0.9931577444076538,grad_norm: 0.8785790387057012, iteration: 211218
loss: 0.9871120452880859,grad_norm: 0.9110580584209135, iteration: 211219
loss: 0.9677141904830933,grad_norm: 0.8864435864692141, iteration: 211220
loss: 0.9777119159698486,grad_norm: 0.9999991943329374, iteration: 211221
loss: 1.0026293992996216,grad_norm: 0.999999065532578, iteration: 211222
loss: 1.0166398286819458,grad_norm: 0.9253444331477598, iteration: 211223
loss: 0.9789988398551941,grad_norm: 0.9932039279778792, iteration: 211224
loss: 1.016606092453003,grad_norm: 0.8569676398877566, iteration: 211225
loss: 1.0191868543624878,grad_norm: 0.922910304411263, iteration: 211226
loss: 1.0619357824325562,grad_norm: 0.9804075311913277, iteration: 211227
loss: 0.9787428975105286,grad_norm: 0.9999991503864194, iteration: 211228
loss: 1.0040509700775146,grad_norm: 0.9999990666572551, iteration: 211229
loss: 1.0318865776062012,grad_norm: 0.9661303667650303, iteration: 211230
loss: 0.9916213154792786,grad_norm: 0.999998994162698, iteration: 211231
loss: 0.9978581070899963,grad_norm: 0.9954311281253977, iteration: 211232
loss: 0.9789578914642334,grad_norm: 0.9999991244802774, iteration: 211233
loss: 0.994807779788971,grad_norm: 0.9540250229908699, iteration: 211234
loss: 0.9505849480628967,grad_norm: 0.9999991695031146, iteration: 211235
loss: 1.005013346672058,grad_norm: 0.8898476917002026, iteration: 211236
loss: 1.038250207901001,grad_norm: 0.9999999229909141, iteration: 211237
loss: 1.0178158283233643,grad_norm: 0.9959520191475172, iteration: 211238
loss: 0.9540120363235474,grad_norm: 0.9703650297569845, iteration: 211239
loss: 1.0018726587295532,grad_norm: 0.8513006450355715, iteration: 211240
loss: 1.0178053379058838,grad_norm: 0.999999103416775, iteration: 211241
loss: 0.9834281206130981,grad_norm: 0.9571094572450853, iteration: 211242
loss: 1.033149003982544,grad_norm: 0.86095326773662, iteration: 211243
loss: 0.9684732556343079,grad_norm: 0.8940716840387983, iteration: 211244
loss: 1.0262020826339722,grad_norm: 0.7734218687495698, iteration: 211245
loss: 1.001900315284729,grad_norm: 0.8767321522494896, iteration: 211246
loss: 1.020833969116211,grad_norm: 0.8581278359905088, iteration: 211247
loss: 0.9811817407608032,grad_norm: 0.9571623590599775, iteration: 211248
loss: 1.0232497453689575,grad_norm: 0.9136845882870318, iteration: 211249
loss: 1.0166337490081787,grad_norm: 0.887183115448606, iteration: 211250
loss: 1.0052348375320435,grad_norm: 0.9294290554623993, iteration: 211251
loss: 1.038710355758667,grad_norm: 0.9999997557097766, iteration: 211252
loss: 1.018454670906067,grad_norm: 0.837849789719774, iteration: 211253
loss: 0.9901282787322998,grad_norm: 0.928655573344113, iteration: 211254
loss: 1.0541657209396362,grad_norm: 0.9988996538810078, iteration: 211255
loss: 0.9834520816802979,grad_norm: 0.9874775352759086, iteration: 211256
loss: 0.997941255569458,grad_norm: 0.7811479253924246, iteration: 211257
loss: 1.0063505172729492,grad_norm: 0.999999179042672, iteration: 211258
loss: 0.9932185411453247,grad_norm: 0.9999991170445582, iteration: 211259
loss: 0.993042528629303,grad_norm: 0.9999991820504744, iteration: 211260
loss: 0.9885563850402832,grad_norm: 0.9127894908741782, iteration: 211261
loss: 0.997056782245636,grad_norm: 0.9999991020484958, iteration: 211262
loss: 0.9895651936531067,grad_norm: 0.9705029978583268, iteration: 211263
loss: 0.9742198586463928,grad_norm: 0.8846250620386569, iteration: 211264
loss: 1.0063555240631104,grad_norm: 0.8746988394514627, iteration: 211265
loss: 0.9969605803489685,grad_norm: 0.9518535785000848, iteration: 211266
loss: 1.0017012357711792,grad_norm: 0.9999991308897014, iteration: 211267
loss: 1.021621584892273,grad_norm: 0.9999991388006849, iteration: 211268
loss: 1.019200086593628,grad_norm: 0.9999991335495111, iteration: 211269
loss: 0.988817036151886,grad_norm: 0.9999990722541368, iteration: 211270
loss: 0.9719357490539551,grad_norm: 0.9999989502378278, iteration: 211271
loss: 0.983367919921875,grad_norm: 0.999999070095786, iteration: 211272
loss: 1.021284818649292,grad_norm: 0.9999989511349686, iteration: 211273
loss: 1.000769019126892,grad_norm: 0.8007572734283589, iteration: 211274
loss: 0.9957567453384399,grad_norm: 0.9999991489794899, iteration: 211275
loss: 0.971373438835144,grad_norm: 0.9199234020031785, iteration: 211276
loss: 0.9637523293495178,grad_norm: 0.9999990323240295, iteration: 211277
loss: 0.9707072377204895,grad_norm: 0.8929979495073262, iteration: 211278
loss: 0.9688884019851685,grad_norm: 0.8828399160709103, iteration: 211279
loss: 0.9969524145126343,grad_norm: 0.886545224767147, iteration: 211280
loss: 0.9594791531562805,grad_norm: 0.9075020694211043, iteration: 211281
loss: 1.0601454973220825,grad_norm: 0.9997898394348912, iteration: 211282
loss: 1.0150023698806763,grad_norm: 0.7891684824967867, iteration: 211283
loss: 1.0623000860214233,grad_norm: 0.9999991871464474, iteration: 211284
loss: 0.9801231622695923,grad_norm: 0.9109895765851252, iteration: 211285
loss: 0.9603116512298584,grad_norm: 0.9390491544742987, iteration: 211286
loss: 1.0209057331085205,grad_norm: 0.9999991781485262, iteration: 211287
loss: 1.0020663738250732,grad_norm: 0.9781085603744322, iteration: 211288
loss: 0.9700602889060974,grad_norm: 0.838445427458422, iteration: 211289
loss: 0.9596454501152039,grad_norm: 0.9923333636533435, iteration: 211290
loss: 1.0013402700424194,grad_norm: 0.8848469911700947, iteration: 211291
loss: 0.9676694273948669,grad_norm: 0.9289266291921356, iteration: 211292
loss: 1.0130528211593628,grad_norm: 0.9999990640751232, iteration: 211293
loss: 0.9885751008987427,grad_norm: 0.7874554429441636, iteration: 211294
loss: 0.9791715741157532,grad_norm: 0.9999991767495189, iteration: 211295
loss: 0.9819033741950989,grad_norm: 0.8553477166359459, iteration: 211296
loss: 0.9844772815704346,grad_norm: 0.834171228937669, iteration: 211297
loss: 0.990288496017456,grad_norm: 0.9394027929654651, iteration: 211298
loss: 0.9961712956428528,grad_norm: 0.7898362308727365, iteration: 211299
loss: 1.0189898014068604,grad_norm: 0.9999991721514985, iteration: 211300
loss: 0.9855848550796509,grad_norm: 0.7811305476852751, iteration: 211301
loss: 1.0105493068695068,grad_norm: 0.9999991405722468, iteration: 211302
loss: 0.9997175335884094,grad_norm: 0.9336894547731396, iteration: 211303
loss: 1.0177114009857178,grad_norm: 0.9999990693788429, iteration: 211304
loss: 0.9624031186103821,grad_norm: 0.9131034751359303, iteration: 211305
loss: 0.9663669466972351,grad_norm: 0.9095847924477655, iteration: 211306
loss: 1.0245698690414429,grad_norm: 0.9553165145375204, iteration: 211307
loss: 1.0169631242752075,grad_norm: 0.9237987219576081, iteration: 211308
loss: 1.0014840364456177,grad_norm: 0.8877665682836403, iteration: 211309
loss: 1.0019283294677734,grad_norm: 0.8008758916021601, iteration: 211310
loss: 1.042249083518982,grad_norm: 0.9862406804987506, iteration: 211311
loss: 1.013979434967041,grad_norm: 0.8876220579131857, iteration: 211312
loss: 1.004137635231018,grad_norm: 0.7770828251140688, iteration: 211313
loss: 1.001197338104248,grad_norm: 0.9428892032380385, iteration: 211314
loss: 0.9701917767524719,grad_norm: 0.8488460585413982, iteration: 211315
loss: 1.013152003288269,grad_norm: 0.8809518621899275, iteration: 211316
loss: 1.0212945938110352,grad_norm: 0.8424500187444849, iteration: 211317
loss: 1.0134819746017456,grad_norm: 0.9999990428677484, iteration: 211318
loss: 1.0064165592193604,grad_norm: 0.9243564899215513, iteration: 211319
loss: 1.0019218921661377,grad_norm: 0.9974667446152694, iteration: 211320
loss: 1.0161734819412231,grad_norm: 0.876792692015615, iteration: 211321
loss: 0.9576268196105957,grad_norm: 0.8927701753332484, iteration: 211322
loss: 1.0306856632232666,grad_norm: 0.8305471718710813, iteration: 211323
loss: 0.9915855526924133,grad_norm: 0.9999990004980185, iteration: 211324
loss: 0.9970084428787231,grad_norm: 0.9999989516229449, iteration: 211325
loss: 1.0341112613677979,grad_norm: 0.9999990380356415, iteration: 211326
loss: 0.9992546439170837,grad_norm: 0.9999990588044332, iteration: 211327
loss: 1.0016871690750122,grad_norm: 0.8949178541051355, iteration: 211328
loss: 1.0260275602340698,grad_norm: 0.9999991594644556, iteration: 211329
loss: 0.998064398765564,grad_norm: 0.7985331941468465, iteration: 211330
loss: 0.9884553551673889,grad_norm: 0.9801120719014473, iteration: 211331
loss: 0.9950824975967407,grad_norm: 0.9999990753644399, iteration: 211332
loss: 1.004960298538208,grad_norm: 0.9146972069559476, iteration: 211333
loss: 1.037964105606079,grad_norm: 0.9999996045853157, iteration: 211334
loss: 1.0150099992752075,grad_norm: 0.9412926960659921, iteration: 211335
loss: 0.9613674879074097,grad_norm: 0.8691608675760552, iteration: 211336
loss: 1.0313596725463867,grad_norm: 1.0000000002117604, iteration: 211337
loss: 0.9873654842376709,grad_norm: 0.9056058500853896, iteration: 211338
loss: 0.9832659959793091,grad_norm: 0.9999990983495951, iteration: 211339
loss: 0.9868266582489014,grad_norm: 0.8972571996198253, iteration: 211340
loss: 0.9959621429443359,grad_norm: 0.8848519826517687, iteration: 211341
loss: 0.9570015072822571,grad_norm: 0.8905362964704373, iteration: 211342
loss: 0.9447106122970581,grad_norm: 0.8149835131167621, iteration: 211343
loss: 0.999649703502655,grad_norm: 0.9183044541786952, iteration: 211344
loss: 0.9654414653778076,grad_norm: 0.9999991086280108, iteration: 211345
loss: 0.9992735385894775,grad_norm: 0.9226200614983913, iteration: 211346
loss: 0.9788432717323303,grad_norm: 0.883157164720427, iteration: 211347
loss: 0.9816160798072815,grad_norm: 0.9801335071562071, iteration: 211348
loss: 1.058652400970459,grad_norm: 0.8025046772734744, iteration: 211349
loss: 0.9929482936859131,grad_norm: 0.9999990684014345, iteration: 211350
loss: 1.0206184387207031,grad_norm: 0.832646945721156, iteration: 211351
loss: 0.9995180368423462,grad_norm: 0.9999992195968788, iteration: 211352
loss: 0.965474545955658,grad_norm: 0.8618943598351646, iteration: 211353
loss: 0.9877166748046875,grad_norm: 0.8179518577933959, iteration: 211354
loss: 0.9946575164794922,grad_norm: 0.9820353622803041, iteration: 211355
loss: 1.0003246068954468,grad_norm: 0.922689838404281, iteration: 211356
loss: 1.324684977531433,grad_norm: 0.999999225151568, iteration: 211357
loss: 0.992057740688324,grad_norm: 0.8541822372791371, iteration: 211358
loss: 0.9933323860168457,grad_norm: 0.8105553864029944, iteration: 211359
loss: 1.0124034881591797,grad_norm: 0.9999990243272776, iteration: 211360
loss: 0.9981964230537415,grad_norm: 0.9999991494171488, iteration: 211361
loss: 0.9688220024108887,grad_norm: 0.8704248534369794, iteration: 211362
loss: 1.0607972145080566,grad_norm: 0.788623222560272, iteration: 211363
loss: 0.966313362121582,grad_norm: 0.9999991964560551, iteration: 211364
loss: 1.0300207138061523,grad_norm: 0.7940178131006768, iteration: 211365
loss: 1.0254743099212646,grad_norm: 0.9595688642367555, iteration: 211366
loss: 1.0210212469100952,grad_norm: 0.8077067612333191, iteration: 211367
loss: 1.0270072221755981,grad_norm: 0.9075641972803384, iteration: 211368
loss: 0.9444800615310669,grad_norm: 0.944406528104369, iteration: 211369
loss: 1.0188512802124023,grad_norm: 0.7661993582824967, iteration: 211370
loss: 0.9920414090156555,grad_norm: 0.8375238263308961, iteration: 211371
loss: 0.9888117909431458,grad_norm: 0.9999990370123891, iteration: 211372
loss: 1.0352530479431152,grad_norm: 0.9999992808469439, iteration: 211373
loss: 1.0007435083389282,grad_norm: 0.8081124002798606, iteration: 211374
loss: 0.9987762570381165,grad_norm: 0.9297267670025233, iteration: 211375
loss: 1.0179086923599243,grad_norm: 0.7483482985413166, iteration: 211376
loss: 1.020248293876648,grad_norm: 0.91556361313363, iteration: 211377
loss: 0.987888753414154,grad_norm: 0.9101997526601346, iteration: 211378
loss: 0.9453414082527161,grad_norm: 0.9999989418907743, iteration: 211379
loss: 0.9824680685997009,grad_norm: 0.9036436259192465, iteration: 211380
loss: 1.0208041667938232,grad_norm: 0.8699851284072916, iteration: 211381
loss: 1.0315254926681519,grad_norm: 0.9999990749331789, iteration: 211382
loss: 0.9827492237091064,grad_norm: 0.8219451391534428, iteration: 211383
loss: 1.0157204866409302,grad_norm: 0.9774173592394977, iteration: 211384
loss: 0.9950100183486938,grad_norm: 0.9391029025530742, iteration: 211385
loss: 0.9711084961891174,grad_norm: 0.9089202825715832, iteration: 211386
loss: 0.9867302775382996,grad_norm: 0.9081127628513835, iteration: 211387
loss: 1.0341615676879883,grad_norm: 0.8957597355460933, iteration: 211388
loss: 0.9852780103683472,grad_norm: 0.7059518252676991, iteration: 211389
loss: 0.9756345748901367,grad_norm: 0.9309186270728337, iteration: 211390
loss: 1.032068133354187,grad_norm: 0.9905326597110589, iteration: 211391
loss: 1.0063836574554443,grad_norm: 0.9773293172537875, iteration: 211392
loss: 0.9706025719642639,grad_norm: 0.8752710657551179, iteration: 211393
loss: 1.0389213562011719,grad_norm: 0.9581200334028086, iteration: 211394
loss: 1.0044186115264893,grad_norm: 0.8542334808789381, iteration: 211395
loss: 0.9805111885070801,grad_norm: 0.9479976302831673, iteration: 211396
loss: 1.012500286102295,grad_norm: 0.9999991305589324, iteration: 211397
loss: 1.0111263990402222,grad_norm: 0.9236938290139629, iteration: 211398
loss: 1.0233919620513916,grad_norm: 0.9961439639106859, iteration: 211399
loss: 1.0003986358642578,grad_norm: 0.9999997999583949, iteration: 211400
loss: 0.9951821565628052,grad_norm: 0.8993397158003315, iteration: 211401
loss: 0.9993242025375366,grad_norm: 0.9999991765337003, iteration: 211402
loss: 1.0032132863998413,grad_norm: 0.9049265529135428, iteration: 211403
loss: 0.9778701066970825,grad_norm: 0.8696151200809626, iteration: 211404
loss: 0.9576399326324463,grad_norm: 0.9999992870134533, iteration: 211405
loss: 1.0061397552490234,grad_norm: 0.9999991323076064, iteration: 211406
loss: 1.0120785236358643,grad_norm: 0.9621047648983094, iteration: 211407
loss: 0.9853521585464478,grad_norm: 0.8987013078673675, iteration: 211408
loss: 0.983525276184082,grad_norm: 0.9659118461731835, iteration: 211409
loss: 0.9981158971786499,grad_norm: 0.9386113434535369, iteration: 211410
loss: 0.9642515778541565,grad_norm: 0.9999989983560199, iteration: 211411
loss: 0.9954149723052979,grad_norm: 0.9377309617405605, iteration: 211412
loss: 1.0206485986709595,grad_norm: 0.7553470160597466, iteration: 211413
loss: 1.025659203529358,grad_norm: 0.7813474906486596, iteration: 211414
loss: 1.0095610618591309,grad_norm: 0.7596002201030747, iteration: 211415
loss: 0.9924660325050354,grad_norm: 0.8029512959260936, iteration: 211416
loss: 1.0056796073913574,grad_norm: 0.7841733490072721, iteration: 211417
loss: 0.9580643177032471,grad_norm: 0.8409011135941169, iteration: 211418
loss: 0.9778846502304077,grad_norm: 0.9971437841165476, iteration: 211419
loss: 0.9703709483146667,grad_norm: 0.8740072396919207, iteration: 211420
loss: 1.0026216506958008,grad_norm: 0.8866919787859308, iteration: 211421
loss: 0.9750300049781799,grad_norm: 0.95388668090106, iteration: 211422
loss: 0.9862159490585327,grad_norm: 0.9999991672656285, iteration: 211423
loss: 1.00389564037323,grad_norm: 0.9301285993894687, iteration: 211424
loss: 0.9614071249961853,grad_norm: 0.9999990574551251, iteration: 211425
loss: 1.010844111442566,grad_norm: 0.951477937790893, iteration: 211426
loss: 0.9624074101448059,grad_norm: 0.9807135220017579, iteration: 211427
loss: 1.0088512897491455,grad_norm: 0.8925236468913867, iteration: 211428
loss: 1.0116331577301025,grad_norm: 0.9999990336629666, iteration: 211429
loss: 0.995567262172699,grad_norm: 0.9220199330320014, iteration: 211430
loss: 1.0022830963134766,grad_norm: 0.9116694945353316, iteration: 211431
loss: 0.9859595894813538,grad_norm: 0.9814482031281963, iteration: 211432
loss: 0.9629616141319275,grad_norm: 0.8755900825957104, iteration: 211433
loss: 0.9694006443023682,grad_norm: 0.7232044951318571, iteration: 211434
loss: 1.012242317199707,grad_norm: 0.9999990937131288, iteration: 211435
loss: 0.9680624008178711,grad_norm: 0.9816837867596021, iteration: 211436
loss: 1.0083339214324951,grad_norm: 0.9999994456197228, iteration: 211437
loss: 1.0106357336044312,grad_norm: 0.912483038819028, iteration: 211438
loss: 0.9724969267845154,grad_norm: 0.9999991007278801, iteration: 211439
loss: 0.9714625477790833,grad_norm: 0.8604964243425745, iteration: 211440
loss: 0.9907885193824768,grad_norm: 0.9999997272427295, iteration: 211441
loss: 0.9811185598373413,grad_norm: 0.9820227852334893, iteration: 211442
loss: 1.0370820760726929,grad_norm: 0.9907275098574648, iteration: 211443
loss: 1.006482720375061,grad_norm: 0.9999991607101002, iteration: 211444
loss: 1.006986379623413,grad_norm: 0.8515875479985479, iteration: 211445
loss: 1.0231823921203613,grad_norm: 0.9269809979809793, iteration: 211446
loss: 0.980858564376831,grad_norm: 0.8929109958285806, iteration: 211447
loss: 1.0251359939575195,grad_norm: 0.9289859661155416, iteration: 211448
loss: 1.043950080871582,grad_norm: 0.9999998280961284, iteration: 211449
loss: 0.9810386896133423,grad_norm: 0.9999991853489613, iteration: 211450
loss: 1.0178972482681274,grad_norm: 0.9103134268899586, iteration: 211451
loss: 1.040169596672058,grad_norm: 0.9445190425800829, iteration: 211452
loss: 1.012603759765625,grad_norm: 0.887813424645005, iteration: 211453
loss: 0.9348369836807251,grad_norm: 0.9999991524054196, iteration: 211454
loss: 0.9907552003860474,grad_norm: 0.9211760000647673, iteration: 211455
loss: 0.9935064315795898,grad_norm: 0.9999991922524392, iteration: 211456
loss: 0.97856205701828,grad_norm: 0.9999991522565865, iteration: 211457
loss: 0.9707865715026855,grad_norm: 0.919214084164687, iteration: 211458
loss: 0.9810131192207336,grad_norm: 0.7129088999476683, iteration: 211459
loss: 0.9867751598358154,grad_norm: 0.7240512498651211, iteration: 211460
loss: 0.9674719572067261,grad_norm: 0.8270496428339159, iteration: 211461
loss: 0.9939048290252686,grad_norm: 0.9432538344372088, iteration: 211462
loss: 1.0221205949783325,grad_norm: 0.9999991830344499, iteration: 211463
loss: 0.9757741093635559,grad_norm: 0.9292134013708662, iteration: 211464
loss: 0.994109034538269,grad_norm: 0.8758812929913916, iteration: 211465
loss: 0.9898996353149414,grad_norm: 0.957180923316056, iteration: 211466
loss: 0.9790011048316956,grad_norm: 0.8989335729455116, iteration: 211467
loss: 1.0327831506729126,grad_norm: 0.9999991631905824, iteration: 211468
loss: 1.03264582157135,grad_norm: 0.9534668839939376, iteration: 211469
loss: 1.0362896919250488,grad_norm: 0.9999991041372128, iteration: 211470
loss: 1.0014281272888184,grad_norm: 0.8140155024521162, iteration: 211471
loss: 0.9923291802406311,grad_norm: 0.995669912861002, iteration: 211472
loss: 0.9661593437194824,grad_norm: 0.9999989943123085, iteration: 211473
loss: 0.9689432382583618,grad_norm: 0.9302992786329078, iteration: 211474
loss: 1.029980182647705,grad_norm: 0.9045600563600567, iteration: 211475
loss: 0.9778323769569397,grad_norm: 0.9719214042839581, iteration: 211476
loss: 0.9629927277565002,grad_norm: 0.795288158329275, iteration: 211477
loss: 0.9583723545074463,grad_norm: 0.9999990489866885, iteration: 211478
loss: 0.9855982065200806,grad_norm: 0.9615807605344228, iteration: 211479
loss: 1.0136747360229492,grad_norm: 0.7333330955433922, iteration: 211480
loss: 1.0104931592941284,grad_norm: 0.8420344298992978, iteration: 211481
loss: 0.9595772624015808,grad_norm: 0.9999990179143098, iteration: 211482
loss: 1.0109727382659912,grad_norm: 0.9999990851752929, iteration: 211483
loss: 1.0206542015075684,grad_norm: 0.8288654945825866, iteration: 211484
loss: 1.0130469799041748,grad_norm: 0.9999990611525313, iteration: 211485
loss: 0.9975181818008423,grad_norm: 0.7889525023981929, iteration: 211486
loss: 1.0482271909713745,grad_norm: 0.9947575488648444, iteration: 211487
loss: 1.0016793012619019,grad_norm: 0.8427630376957362, iteration: 211488
loss: 1.031112790107727,grad_norm: 0.999999125219163, iteration: 211489
loss: 0.9800270199775696,grad_norm: 0.999998973456594, iteration: 211490
loss: 0.9889142513275146,grad_norm: 0.904199454097341, iteration: 211491
loss: 1.007041096687317,grad_norm: 0.8465208003700724, iteration: 211492
loss: 0.9622406363487244,grad_norm: 0.9380393248704251, iteration: 211493
loss: 0.9791392087936401,grad_norm: 0.9999992362766994, iteration: 211494
loss: 0.9802982807159424,grad_norm: 0.9393735305044477, iteration: 211495
loss: 0.9892442226409912,grad_norm: 0.8360594666033729, iteration: 211496
loss: 0.9453577995300293,grad_norm: 0.999999169476943, iteration: 211497
loss: 1.0624072551727295,grad_norm: 0.9999992053847913, iteration: 211498
loss: 1.0006903409957886,grad_norm: 0.9520814693885049, iteration: 211499
loss: 0.9865949749946594,grad_norm: 0.9712447972311511, iteration: 211500
loss: 0.9623978734016418,grad_norm: 0.982736069779261, iteration: 211501
loss: 1.0040063858032227,grad_norm: 0.9999990961861686, iteration: 211502
loss: 0.9728673696517944,grad_norm: 0.9999990668190435, iteration: 211503
loss: 1.015223503112793,grad_norm: 0.8504301115408296, iteration: 211504
loss: 1.0051276683807373,grad_norm: 0.9999990064503155, iteration: 211505
loss: 0.9746986627578735,grad_norm: 0.9999990654166142, iteration: 211506
loss: 1.0125535726547241,grad_norm: 0.9630930314320207, iteration: 211507
loss: 0.9668254256248474,grad_norm: 0.9006867251225033, iteration: 211508
loss: 0.9893031716346741,grad_norm: 0.7740130627130019, iteration: 211509
loss: 1.0088214874267578,grad_norm: 0.943652902618454, iteration: 211510
loss: 1.0433822870254517,grad_norm: 0.9459075730366204, iteration: 211511
loss: 1.0134880542755127,grad_norm: 0.9223446847149926, iteration: 211512
loss: 0.9754330515861511,grad_norm: 0.9999995417990849, iteration: 211513
loss: 0.9933199882507324,grad_norm: 0.9659007033350954, iteration: 211514
loss: 1.0270785093307495,grad_norm: 0.9999990164158178, iteration: 211515
loss: 1.0400514602661133,grad_norm: 0.9021440753986006, iteration: 211516
loss: 1.0563075542449951,grad_norm: 0.9503868807303942, iteration: 211517
loss: 0.9787881374359131,grad_norm: 0.9999989688826257, iteration: 211518
loss: 1.0794377326965332,grad_norm: 0.9999997238031564, iteration: 211519
loss: 1.014449119567871,grad_norm: 0.9999996614529378, iteration: 211520
loss: 0.9545013308525085,grad_norm: 0.918240724845633, iteration: 211521
loss: 1.0290945768356323,grad_norm: 0.9999991752890451, iteration: 211522
loss: 1.0101932287216187,grad_norm: 0.9007644663030103, iteration: 211523
loss: 0.9502992630004883,grad_norm: 0.900329771405234, iteration: 211524
loss: 0.9471162557601929,grad_norm: 0.9757297882895082, iteration: 211525
loss: 0.964016854763031,grad_norm: 0.9999992990644954, iteration: 211526
loss: 0.9835498332977295,grad_norm: 0.899405778042311, iteration: 211527
loss: 0.9878548979759216,grad_norm: 0.9407186485238521, iteration: 211528
loss: 1.03602135181427,grad_norm: 0.9592340189691296, iteration: 211529
loss: 0.9466006755828857,grad_norm: 0.9594633744921697, iteration: 211530
loss: 0.9709969162940979,grad_norm: 0.9551968343002855, iteration: 211531
loss: 0.9877721071243286,grad_norm: 0.9933048483549823, iteration: 211532
loss: 1.0010669231414795,grad_norm: 0.8753179146803093, iteration: 211533
loss: 0.9797717928886414,grad_norm: 0.8748810474481592, iteration: 211534
loss: 1.0399932861328125,grad_norm: 0.9232229465691068, iteration: 211535
loss: 0.9907342195510864,grad_norm: 0.9999992110576732, iteration: 211536
loss: 1.0450129508972168,grad_norm: 0.8882585227386454, iteration: 211537
loss: 0.9834911227226257,grad_norm: 0.9385307920891366, iteration: 211538
loss: 1.012807846069336,grad_norm: 0.8845421444060517, iteration: 211539
loss: 0.9980806112289429,grad_norm: 0.8565178410884737, iteration: 211540
loss: 1.0251784324645996,grad_norm: 0.9999990019259193, iteration: 211541
loss: 1.1172890663146973,grad_norm: 0.9999990341227314, iteration: 211542
loss: 1.0433295965194702,grad_norm: 0.8451886138987835, iteration: 211543
loss: 1.0391126871109009,grad_norm: 0.9999990384381325, iteration: 211544
loss: 0.9731310606002808,grad_norm: 0.9999990047277432, iteration: 211545
loss: 0.9751197099685669,grad_norm: 0.9383902344499039, iteration: 211546
loss: 1.00252366065979,grad_norm: 0.9214217257093599, iteration: 211547
loss: 1.0006382465362549,grad_norm: 0.9999990270370078, iteration: 211548
loss: 0.96547532081604,grad_norm: 0.7573856735777026, iteration: 211549
loss: 1.0338563919067383,grad_norm: 0.9999999663644513, iteration: 211550
loss: 0.9884577393531799,grad_norm: 0.8804482018815082, iteration: 211551
loss: 1.0147595405578613,grad_norm: 0.7866874762580222, iteration: 211552
loss: 0.9859921336174011,grad_norm: 0.9212033620486931, iteration: 211553
loss: 1.0162456035614014,grad_norm: 0.9584695332829039, iteration: 211554
loss: 0.9768269658088684,grad_norm: 0.8423649249373018, iteration: 211555
loss: 1.0278133153915405,grad_norm: 0.9999992477406928, iteration: 211556
loss: 1.0605359077453613,grad_norm: 0.8686579960496257, iteration: 211557
loss: 0.9675993323326111,grad_norm: 0.9044159118496973, iteration: 211558
loss: 1.0026295185089111,grad_norm: 0.8651469617774236, iteration: 211559
loss: 0.9791408181190491,grad_norm: 0.964724042088913, iteration: 211560
loss: 1.0011513233184814,grad_norm: 0.958194371979428, iteration: 211561
loss: 1.0036753416061401,grad_norm: 0.8458400382945341, iteration: 211562
loss: 0.9886348247528076,grad_norm: 0.8570377192733131, iteration: 211563
loss: 1.0104752779006958,grad_norm: 0.9999992088832209, iteration: 211564
loss: 0.9988628625869751,grad_norm: 0.880493958777848, iteration: 211565
loss: 1.003976583480835,grad_norm: 0.8906844101608259, iteration: 211566
loss: 1.0050320625305176,grad_norm: 0.9540377595090351, iteration: 211567
loss: 1.0013152360916138,grad_norm: 0.7973613676001069, iteration: 211568
loss: 0.9834966063499451,grad_norm: 0.8409681552094255, iteration: 211569
loss: 1.0122712850570679,grad_norm: 0.8098086208289584, iteration: 211570
loss: 1.0266599655151367,grad_norm: 0.8368763674069092, iteration: 211571
loss: 0.9877945780754089,grad_norm: 0.999999122856919, iteration: 211572
loss: 1.002462387084961,grad_norm: 0.99999913025633, iteration: 211573
loss: 0.9693503975868225,grad_norm: 0.9206953250948154, iteration: 211574
loss: 1.0028862953186035,grad_norm: 0.940379044462451, iteration: 211575
loss: 0.9933385252952576,grad_norm: 0.9999990968301833, iteration: 211576
loss: 0.9726495742797852,grad_norm: 0.8379068502944215, iteration: 211577
loss: 1.0015463829040527,grad_norm: 0.8814039386455924, iteration: 211578
loss: 0.9814051985740662,grad_norm: 0.8382756381081884, iteration: 211579
loss: 0.9873868227005005,grad_norm: 0.8022389303815962, iteration: 211580
loss: 0.9843537211418152,grad_norm: 0.8402659314443328, iteration: 211581
loss: 1.0167584419250488,grad_norm: 0.9878791493355886, iteration: 211582
loss: 0.963570237159729,grad_norm: 0.9999991431316426, iteration: 211583
loss: 0.9971989393234253,grad_norm: 0.9140187897252015, iteration: 211584
loss: 0.9394534230232239,grad_norm: 0.9999991360318417, iteration: 211585
loss: 1.0094120502471924,grad_norm: 0.7894606539904883, iteration: 211586
loss: 0.991533637046814,grad_norm: 0.9695850968942911, iteration: 211587
loss: 0.9788573980331421,grad_norm: 0.9810227506520924, iteration: 211588
loss: 1.0048140287399292,grad_norm: 0.8732361458421344, iteration: 211589
loss: 0.9933774471282959,grad_norm: 0.9077757234800266, iteration: 211590
loss: 1.0194333791732788,grad_norm: 0.9872809531680202, iteration: 211591
loss: 1.007802128791809,grad_norm: 0.9999993064310814, iteration: 211592
loss: 0.9990915656089783,grad_norm: 0.7182895312636808, iteration: 211593
loss: 0.9859845638275146,grad_norm: 0.9426727843386689, iteration: 211594
loss: 0.9641892910003662,grad_norm: 0.9422156314360767, iteration: 211595
loss: 0.9953494071960449,grad_norm: 0.910352395683098, iteration: 211596
loss: 1.0067917108535767,grad_norm: 0.8107918245420298, iteration: 211597
loss: 1.0178488492965698,grad_norm: 0.9788991238111547, iteration: 211598
loss: 1.0103405714035034,grad_norm: 0.8413199507575775, iteration: 211599
loss: 0.9970900416374207,grad_norm: 0.8637712131468752, iteration: 211600
loss: 0.9743513464927673,grad_norm: 0.9618865107298066, iteration: 211601
loss: 0.9940376877784729,grad_norm: 0.9204826739508918, iteration: 211602
loss: 0.9784312844276428,grad_norm: 0.8074057589672985, iteration: 211603
loss: 1.010486125946045,grad_norm: 0.9999989224268329, iteration: 211604
loss: 1.0005115270614624,grad_norm: 0.7436575935360852, iteration: 211605
loss: 0.9931759238243103,grad_norm: 0.7672271101576599, iteration: 211606
loss: 1.0212312936782837,grad_norm: 0.9330461214916737, iteration: 211607
loss: 1.0162019729614258,grad_norm: 0.90485856928991, iteration: 211608
loss: 0.9890223741531372,grad_norm: 0.7926893046067903, iteration: 211609
loss: 1.0583826303482056,grad_norm: 0.9920073305655899, iteration: 211610
loss: 0.9922658205032349,grad_norm: 0.9334781972060573, iteration: 211611
loss: 0.9917561411857605,grad_norm: 0.9999995100272198, iteration: 211612
loss: 0.992144763469696,grad_norm: 0.9229125678206868, iteration: 211613
loss: 0.985427975654602,grad_norm: 0.9999991371329546, iteration: 211614
loss: 0.9796902537345886,grad_norm: 0.9956320292048214, iteration: 211615
loss: 0.9913948178291321,grad_norm: 0.878557872360507, iteration: 211616
loss: 1.005143642425537,grad_norm: 0.9138213547195235, iteration: 211617
loss: 1.005855917930603,grad_norm: 0.8571329162559466, iteration: 211618
loss: 1.0158255100250244,grad_norm: 0.9339977704596982, iteration: 211619
loss: 1.0369445085525513,grad_norm: 0.9999991299660662, iteration: 211620
loss: 1.0157750844955444,grad_norm: 0.6955571075068544, iteration: 211621
loss: 0.9657474756240845,grad_norm: 0.747276982527659, iteration: 211622
loss: 0.9970817565917969,grad_norm: 0.9998677433941906, iteration: 211623
loss: 0.977428674697876,grad_norm: 0.9720888260471978, iteration: 211624
loss: 1.0046042203903198,grad_norm: 0.7630031263815805, iteration: 211625
loss: 1.0180033445358276,grad_norm: 0.787600434792877, iteration: 211626
loss: 1.002903938293457,grad_norm: 0.8995256556461169, iteration: 211627
loss: 0.990091860294342,grad_norm: 0.7475620804225593, iteration: 211628
loss: 0.9743956327438354,grad_norm: 0.9883170653785047, iteration: 211629
loss: 0.9788646101951599,grad_norm: 0.9999991196093455, iteration: 211630
loss: 0.9857181906700134,grad_norm: 0.7591922326079145, iteration: 211631
loss: 0.9840465784072876,grad_norm: 0.8365638285338616, iteration: 211632
loss: 1.003757357597351,grad_norm: 0.9999992133945934, iteration: 211633
loss: 0.9713855981826782,grad_norm: 0.9112946793729366, iteration: 211634
loss: 1.0106595754623413,grad_norm: 0.8507059618280854, iteration: 211635
loss: 0.989365816116333,grad_norm: 0.9999991246822976, iteration: 211636
loss: 0.9962788224220276,grad_norm: 0.7682268870110333, iteration: 211637
loss: 0.9535630345344543,grad_norm: 0.9820149143349834, iteration: 211638
loss: 1.012831687927246,grad_norm: 0.999999461580935, iteration: 211639
loss: 1.0395759344100952,grad_norm: 0.9999990351343944, iteration: 211640
loss: 1.047739863395691,grad_norm: 0.9999992497023599, iteration: 211641
loss: 1.0346782207489014,grad_norm: 0.8665367756020418, iteration: 211642
loss: 1.0048593282699585,grad_norm: 0.9795756304015646, iteration: 211643
loss: 1.0064188241958618,grad_norm: 0.9999991606589244, iteration: 211644
loss: 0.9703106880187988,grad_norm: 0.9730625255424523, iteration: 211645
loss: 1.025536298751831,grad_norm: 0.9573014317979586, iteration: 211646
loss: 1.010029673576355,grad_norm: 0.9999990764909622, iteration: 211647
loss: 1.0219720602035522,grad_norm: 0.9999993245498597, iteration: 211648
loss: 0.9878346920013428,grad_norm: 0.9230949260139398, iteration: 211649
loss: 1.035103440284729,grad_norm: 0.9256074754397853, iteration: 211650
loss: 0.9680321216583252,grad_norm: 0.9206132989920601, iteration: 211651
loss: 1.0320430994033813,grad_norm: 0.99999916110632, iteration: 211652
loss: 1.0199055671691895,grad_norm: 0.8716084695744749, iteration: 211653
loss: 1.0427782535552979,grad_norm: 0.805583531425646, iteration: 211654
loss: 0.9694059491157532,grad_norm: 0.8686031279723754, iteration: 211655
loss: 0.9984927177429199,grad_norm: 0.745101868231227, iteration: 211656
loss: 1.0150777101516724,grad_norm: 0.9133698127665347, iteration: 211657
loss: 1.020017385482788,grad_norm: 0.9999992603643016, iteration: 211658
loss: 1.0276445150375366,grad_norm: 0.9707127625057845, iteration: 211659
loss: 0.9716989398002625,grad_norm: 0.8850369465220016, iteration: 211660
loss: 1.0169641971588135,grad_norm: 0.8308364024065741, iteration: 211661
loss: 1.0001575946807861,grad_norm: 0.7752852759668898, iteration: 211662
loss: 0.9621160626411438,grad_norm: 0.7625941880097346, iteration: 211663
loss: 0.99406498670578,grad_norm: 0.9593889919863422, iteration: 211664
loss: 1.010247826576233,grad_norm: 0.9999991998424025, iteration: 211665
loss: 0.9728397130966187,grad_norm: 0.7843755382991244, iteration: 211666
loss: 0.9765451550483704,grad_norm: 0.9999990596778114, iteration: 211667
loss: 0.9815482497215271,grad_norm: 0.9999990231111843, iteration: 211668
loss: 1.001218557357788,grad_norm: 0.9260404386185626, iteration: 211669
loss: 1.029686689376831,grad_norm: 0.9999991283402972, iteration: 211670
loss: 0.9855902791023254,grad_norm: 0.9181172179277214, iteration: 211671
loss: 0.9765861630439758,grad_norm: 0.9999991695668825, iteration: 211672
loss: 1.0185894966125488,grad_norm: 0.8343902961329853, iteration: 211673
loss: 1.031026840209961,grad_norm: 0.9999991424154357, iteration: 211674
loss: 0.983657717704773,grad_norm: 0.8642239724906012, iteration: 211675
loss: 0.9811196327209473,grad_norm: 0.9999990840503175, iteration: 211676
loss: 1.0109606981277466,grad_norm: 0.8846264004157965, iteration: 211677
loss: 1.0138795375823975,grad_norm: 0.9565789498366453, iteration: 211678
loss: 1.0178444385528564,grad_norm: 0.9999990656220891, iteration: 211679
loss: 0.9566059112548828,grad_norm: 0.8684997248877168, iteration: 211680
loss: 1.006608247756958,grad_norm: 0.7899373965845027, iteration: 211681
loss: 1.0049835443496704,grad_norm: 0.8301669134168932, iteration: 211682
loss: 1.0038999319076538,grad_norm: 0.8177130069950748, iteration: 211683
loss: 0.9628453254699707,grad_norm: 0.8682975917332838, iteration: 211684
loss: 0.9811185598373413,grad_norm: 0.8827178961457134, iteration: 211685
loss: 1.0410072803497314,grad_norm: 0.9786677471904865, iteration: 211686
loss: 1.0256601572036743,grad_norm: 0.9999991942080538, iteration: 211687
loss: 1.0129050016403198,grad_norm: 0.9999992048648922, iteration: 211688
loss: 1.0235600471496582,grad_norm: 0.9999990687831972, iteration: 211689
loss: 1.0010430812835693,grad_norm: 0.9549955667774249, iteration: 211690
loss: 0.9784970879554749,grad_norm: 0.9999989702788615, iteration: 211691
loss: 1.0382541418075562,grad_norm: 0.9562618043047106, iteration: 211692
loss: 1.0408776998519897,grad_norm: 0.9031524709811755, iteration: 211693
loss: 0.9666774868965149,grad_norm: 0.7710567449489032, iteration: 211694
loss: 0.976362407207489,grad_norm: 0.8762824614830709, iteration: 211695
loss: 1.0263468027114868,grad_norm: 0.9999991422744022, iteration: 211696
loss: 1.0036392211914062,grad_norm: 0.9999991517284802, iteration: 211697
loss: 0.9948871731758118,grad_norm: 0.9999990346317835, iteration: 211698
loss: 0.9715462327003479,grad_norm: 0.9947730087145192, iteration: 211699
loss: 1.0089157819747925,grad_norm: 0.7815960004615046, iteration: 211700
loss: 0.985914945602417,grad_norm: 0.8906164218908422, iteration: 211701
loss: 0.9732705354690552,grad_norm: 0.9999990834717128, iteration: 211702
loss: 1.0285826921463013,grad_norm: 0.9568966876913539, iteration: 211703
loss: 0.956066906452179,grad_norm: 0.9206909806374021, iteration: 211704
loss: 1.0008759498596191,grad_norm: 0.9554259762833671, iteration: 211705
loss: 0.9805829524993896,grad_norm: 0.9999992798492353, iteration: 211706
loss: 1.03114914894104,grad_norm: 0.9252481866544178, iteration: 211707
loss: 0.9850621223449707,grad_norm: 0.9445801223332831, iteration: 211708
loss: 0.9907549619674683,grad_norm: 0.9999991245862937, iteration: 211709
loss: 0.9825989603996277,grad_norm: 0.8887133111324405, iteration: 211710
loss: 1.0518661737442017,grad_norm: 0.9206698988084254, iteration: 211711
loss: 0.9900724291801453,grad_norm: 0.9999989992852293, iteration: 211712
loss: 1.0336732864379883,grad_norm: 0.9999991807186338, iteration: 211713
loss: 0.9697613716125488,grad_norm: 0.9515392726953183, iteration: 211714
loss: 1.061516523361206,grad_norm: 0.9999994916923265, iteration: 211715
loss: 0.9719277024269104,grad_norm: 0.8147999300957466, iteration: 211716
loss: 1.0050855875015259,grad_norm: 0.8355580973446547, iteration: 211717
loss: 0.980985701084137,grad_norm: 0.9164880615626473, iteration: 211718
loss: 1.0179708003997803,grad_norm: 0.8301157819722355, iteration: 211719
loss: 1.081596851348877,grad_norm: 0.9999998038181124, iteration: 211720
loss: 1.0254055261611938,grad_norm: 0.9742079024413822, iteration: 211721
loss: 0.9999987483024597,grad_norm: 0.971853179911989, iteration: 211722
loss: 0.9888612627983093,grad_norm: 0.9069862112065784, iteration: 211723
loss: 1.0140970945358276,grad_norm: 0.9348915069795138, iteration: 211724
loss: 1.007597804069519,grad_norm: 0.8858571767278096, iteration: 211725
loss: 1.007348895072937,grad_norm: 0.8847070074975265, iteration: 211726
loss: 0.9689393639564514,grad_norm: 0.999999178549012, iteration: 211727
loss: 0.9861127138137817,grad_norm: 0.9919356253488951, iteration: 211728
loss: 0.957710862159729,grad_norm: 0.9340578218915634, iteration: 211729
loss: 0.976728618144989,grad_norm: 0.8653127300897185, iteration: 211730
loss: 1.0120216608047485,grad_norm: 0.8976708575154231, iteration: 211731
loss: 1.0052980184555054,grad_norm: 0.9081318180900849, iteration: 211732
loss: 0.9682249426841736,grad_norm: 0.8300760252067813, iteration: 211733
loss: 1.0296343564987183,grad_norm: 0.9999999040952099, iteration: 211734
loss: 1.0077072381973267,grad_norm: 0.9999991747747522, iteration: 211735
loss: 0.9739064574241638,grad_norm: 0.9014768246127692, iteration: 211736
loss: 1.010138988494873,grad_norm: 0.9999992147105109, iteration: 211737
loss: 1.091615915298462,grad_norm: 0.9999993482338355, iteration: 211738
loss: 1.0105665922164917,grad_norm: 0.9400876333319905, iteration: 211739
loss: 0.996235728263855,grad_norm: 0.9755821734568104, iteration: 211740
loss: 0.9469035267829895,grad_norm: 0.9999990607537496, iteration: 211741
loss: 1.0192068815231323,grad_norm: 0.9126332022101096, iteration: 211742
loss: 0.9674137830734253,grad_norm: 0.9055850589209017, iteration: 211743
loss: 1.0085489749908447,grad_norm: 0.8892424860172529, iteration: 211744
loss: 0.9861685633659363,grad_norm: 0.9999991265118158, iteration: 211745
loss: 0.9653868675231934,grad_norm: 0.8794604004553807, iteration: 211746
loss: 0.9671581983566284,grad_norm: 0.990089484476383, iteration: 211747
loss: 0.9830672740936279,grad_norm: 0.9999991982795161, iteration: 211748
loss: 1.0322396755218506,grad_norm: 0.9348045166171416, iteration: 211749
loss: 1.0069507360458374,grad_norm: 0.9424152721963723, iteration: 211750
loss: 0.9948387145996094,grad_norm: 0.9048655085295315, iteration: 211751
loss: 0.9802668690681458,grad_norm: 0.9999991607960886, iteration: 211752
loss: 1.021340012550354,grad_norm: 0.999999715913074, iteration: 211753
loss: 1.0010806322097778,grad_norm: 0.7077370563649478, iteration: 211754
loss: 1.0062352418899536,grad_norm: 0.9999990389711169, iteration: 211755
loss: 1.022763729095459,grad_norm: 0.9999991798090283, iteration: 211756
loss: 0.9398283362388611,grad_norm: 0.9999991612911412, iteration: 211757
loss: 0.9749871492385864,grad_norm: 0.9253632021786504, iteration: 211758
loss: 1.0138976573944092,grad_norm: 0.9999989613826922, iteration: 211759
loss: 1.0083481073379517,grad_norm: 0.9200933816651887, iteration: 211760
loss: 1.0026345252990723,grad_norm: 0.8279163993783715, iteration: 211761
loss: 0.9725500345230103,grad_norm: 0.85652058345849, iteration: 211762
loss: 1.0469584465026855,grad_norm: 0.999999035404927, iteration: 211763
loss: 0.983981192111969,grad_norm: 0.9589439156309633, iteration: 211764
loss: 1.0321918725967407,grad_norm: 0.8681935459886587, iteration: 211765
loss: 1.0169531106948853,grad_norm: 0.9999994819662423, iteration: 211766
loss: 1.010937213897705,grad_norm: 0.9999990088938151, iteration: 211767
loss: 1.0089011192321777,grad_norm: 0.8558220396711596, iteration: 211768
loss: 1.0536476373672485,grad_norm: 0.999999096604361, iteration: 211769
loss: 0.9283117055892944,grad_norm: 0.9999991110954286, iteration: 211770
loss: 0.9909957051277161,grad_norm: 0.8138378673267855, iteration: 211771
loss: 1.0095031261444092,grad_norm: 0.8692389060718672, iteration: 211772
loss: 1.0089184045791626,grad_norm: 0.9999989664980368, iteration: 211773
loss: 0.9844643473625183,grad_norm: 0.9329380408505582, iteration: 211774
loss: 0.9886854290962219,grad_norm: 0.8153388904135828, iteration: 211775
loss: 1.0021710395812988,grad_norm: 0.9068548708967155, iteration: 211776
loss: 0.977205753326416,grad_norm: 0.9360201499606238, iteration: 211777
loss: 1.0289949178695679,grad_norm: 0.9082893848754804, iteration: 211778
loss: 0.997524619102478,grad_norm: 0.9999991477121689, iteration: 211779
loss: 1.0170283317565918,grad_norm: 0.9999990925117792, iteration: 211780
loss: 1.0072410106658936,grad_norm: 0.8446308158868704, iteration: 211781
loss: 0.9719340801239014,grad_norm: 0.8640457301374443, iteration: 211782
loss: 0.9935442805290222,grad_norm: 0.9999992717682749, iteration: 211783
loss: 1.0302038192749023,grad_norm: 0.9999991797163007, iteration: 211784
loss: 1.0343114137649536,grad_norm: 0.9999991940042415, iteration: 211785
loss: 0.9906014800071716,grad_norm: 0.999999052248695, iteration: 211786
loss: 1.031622052192688,grad_norm: 0.999998947468594, iteration: 211787
loss: 0.9608842730522156,grad_norm: 0.9692676188978562, iteration: 211788
loss: 0.9608630537986755,grad_norm: 0.9999997034887812, iteration: 211789
loss: 0.9955902099609375,grad_norm: 0.9999991875362565, iteration: 211790
loss: 1.0011252164840698,grad_norm: 0.9999991814709668, iteration: 211791
loss: 0.9984439611434937,grad_norm: 0.7223731580590399, iteration: 211792
loss: 1.0044310092926025,grad_norm: 0.9999991518439705, iteration: 211793
loss: 1.0154308080673218,grad_norm: 0.772118649238603, iteration: 211794
loss: 1.0840057134628296,grad_norm: 0.9999991943339855, iteration: 211795
loss: 1.0057978630065918,grad_norm: 0.8936940840057777, iteration: 211796
loss: 1.0531834363937378,grad_norm: 0.9999997159878219, iteration: 211797
loss: 0.9930448532104492,grad_norm: 0.9542365169439944, iteration: 211798
loss: 1.0227081775665283,grad_norm: 0.8555130465314662, iteration: 211799
loss: 1.001860499382019,grad_norm: 0.9779675903796401, iteration: 211800
loss: 0.9922332763671875,grad_norm: 0.880587273042914, iteration: 211801
loss: 1.0360996723175049,grad_norm: 0.8102093765203201, iteration: 211802
loss: 1.0222861766815186,grad_norm: 0.9369243392885142, iteration: 211803
loss: 0.990578830242157,grad_norm: 0.9615318670794083, iteration: 211804
loss: 1.0245112180709839,grad_norm: 0.9999999396049222, iteration: 211805
loss: 0.9992473125457764,grad_norm: 0.8425492122940665, iteration: 211806
loss: 0.9720109105110168,grad_norm: 0.9017208374377123, iteration: 211807
loss: 0.9772778749465942,grad_norm: 0.8962789897658249, iteration: 211808
loss: 1.0094718933105469,grad_norm: 0.92464433303207, iteration: 211809
loss: 0.9942383170127869,grad_norm: 0.9999991790521914, iteration: 211810
loss: 1.0490809679031372,grad_norm: 0.999999130338856, iteration: 211811
loss: 0.9836820960044861,grad_norm: 0.9999997617213331, iteration: 211812
loss: 1.000948429107666,grad_norm: 0.9999991602590924, iteration: 211813
loss: 1.0188673734664917,grad_norm: 0.9999991416585875, iteration: 211814
loss: 0.9989298582077026,grad_norm: 0.7710772264185202, iteration: 211815
loss: 1.0154578685760498,grad_norm: 0.9999989252077013, iteration: 211816
loss: 1.0138379335403442,grad_norm: 0.8230980140659067, iteration: 211817
loss: 0.9923329949378967,grad_norm: 0.9026241284117725, iteration: 211818
loss: 1.0073060989379883,grad_norm: 0.9999989706538602, iteration: 211819
loss: 1.0809903144836426,grad_norm: 0.9999992092087581, iteration: 211820
loss: 1.03191339969635,grad_norm: 0.9999992396639525, iteration: 211821
loss: 1.0479564666748047,grad_norm: 0.9999991673756465, iteration: 211822
loss: 1.0171747207641602,grad_norm: 0.9456812282483393, iteration: 211823
loss: 0.9956457614898682,grad_norm: 0.8955957942667601, iteration: 211824
loss: 0.9811825156211853,grad_norm: 0.9417799103681429, iteration: 211825
loss: 0.9873685240745544,grad_norm: 0.9999991683438736, iteration: 211826
loss: 1.0147684812545776,grad_norm: 0.9999997673715828, iteration: 211827
loss: 1.0180792808532715,grad_norm: 0.9690460926682587, iteration: 211828
loss: 1.039417028427124,grad_norm: 0.8414195749168004, iteration: 211829
loss: 1.0047260522842407,grad_norm: 0.9999992019154588, iteration: 211830
loss: 1.0205875635147095,grad_norm: 0.9561420167062932, iteration: 211831
loss: 0.9750914573669434,grad_norm: 0.929993234394425, iteration: 211832
loss: 0.9993011355400085,grad_norm: 0.8935515684455397, iteration: 211833
loss: 0.9565457105636597,grad_norm: 0.9999990106227422, iteration: 211834
loss: 1.0094484090805054,grad_norm: 0.8282458312709046, iteration: 211835
loss: 0.9936054348945618,grad_norm: 0.9024871266630153, iteration: 211836
loss: 0.9961357116699219,grad_norm: 0.9692844159705901, iteration: 211837
loss: 1.0109511613845825,grad_norm: 0.9602270350049485, iteration: 211838
loss: 0.9635964035987854,grad_norm: 0.9999992259639394, iteration: 211839
loss: 1.0459282398223877,grad_norm: 0.7333223236940498, iteration: 211840
loss: 0.9867590665817261,grad_norm: 0.8373792844724054, iteration: 211841
loss: 1.0534619092941284,grad_norm: 0.9999992342535219, iteration: 211842
loss: 1.016269564628601,grad_norm: 0.9999994588079717, iteration: 211843
loss: 1.0076544284820557,grad_norm: 0.9999994707279439, iteration: 211844
loss: 1.0021613836288452,grad_norm: 0.9999990672928161, iteration: 211845
loss: 1.008085012435913,grad_norm: 0.824372330128949, iteration: 211846
loss: 1.0127623081207275,grad_norm: 0.9395248324892042, iteration: 211847
loss: 1.0245592594146729,grad_norm: 0.9999990887139675, iteration: 211848
loss: 1.019677758216858,grad_norm: 0.9999992547441208, iteration: 211849
loss: 1.0149040222167969,grad_norm: 0.9999991412004001, iteration: 211850
loss: 0.9783976674079895,grad_norm: 0.9807302444071849, iteration: 211851
loss: 1.016859531402588,grad_norm: 0.987195967391838, iteration: 211852
loss: 1.0188440084457397,grad_norm: 0.8216459152430959, iteration: 211853
loss: 0.9836495518684387,grad_norm: 0.8948848162416232, iteration: 211854
loss: 0.978829026222229,grad_norm: 0.9999991187019163, iteration: 211855
loss: 1.0292741060256958,grad_norm: 0.7500320658407915, iteration: 211856
loss: 0.9863249063491821,grad_norm: 0.9268747927105381, iteration: 211857
loss: 1.0168594121932983,grad_norm: 0.999999194286205, iteration: 211858
loss: 1.0078506469726562,grad_norm: 0.7732481129409522, iteration: 211859
loss: 1.0090605020523071,grad_norm: 0.9999994791943233, iteration: 211860
loss: 0.9807895421981812,grad_norm: 0.9048128744214294, iteration: 211861
loss: 1.0014970302581787,grad_norm: 0.999999747629302, iteration: 211862
loss: 0.986292839050293,grad_norm: 0.9999991972695218, iteration: 211863
loss: 0.9700684547424316,grad_norm: 0.9999991679377875, iteration: 211864
loss: 0.9826272130012512,grad_norm: 0.9856252587909954, iteration: 211865
loss: 1.007508397102356,grad_norm: 0.896924563286208, iteration: 211866
loss: 1.0272347927093506,grad_norm: 0.9610788390523639, iteration: 211867
loss: 0.9656625986099243,grad_norm: 0.9999989053314037, iteration: 211868
loss: 0.9856150150299072,grad_norm: 0.8812794187074028, iteration: 211869
loss: 0.9963889122009277,grad_norm: 0.9514084722418545, iteration: 211870
loss: 0.9982925653457642,grad_norm: 0.9634523647306524, iteration: 211871
loss: 0.9955499768257141,grad_norm: 0.8982035709260833, iteration: 211872
loss: 0.9855005145072937,grad_norm: 0.8669328134269747, iteration: 211873
loss: 1.0053362846374512,grad_norm: 0.9375415354120846, iteration: 211874
loss: 1.004900574684143,grad_norm: 0.9999990746622905, iteration: 211875
loss: 1.0303188562393188,grad_norm: 0.8437563823674925, iteration: 211876
loss: 0.9642286896705627,grad_norm: 0.845318489014185, iteration: 211877
loss: 0.9954540729522705,grad_norm: 0.8807809310557999, iteration: 211878
loss: 0.9931980967521667,grad_norm: 0.9999990257129087, iteration: 211879
loss: 0.9800320267677307,grad_norm: 0.9999990610116184, iteration: 211880
loss: 0.9631277322769165,grad_norm: 0.9999996744782585, iteration: 211881
loss: 1.0315361022949219,grad_norm: 0.9149787489035297, iteration: 211882
loss: 0.9953557848930359,grad_norm: 0.9293947534489985, iteration: 211883
loss: 1.0087826251983643,grad_norm: 0.8442185535131926, iteration: 211884
loss: 0.978024423122406,grad_norm: 0.9999991550110828, iteration: 211885
loss: 1.0113807916641235,grad_norm: 0.9999990992654115, iteration: 211886
loss: 0.9966293573379517,grad_norm: 0.9387723610763857, iteration: 211887
loss: 0.9963839650154114,grad_norm: 0.9341643973442931, iteration: 211888
loss: 0.976608157157898,grad_norm: 0.9999992311315713, iteration: 211889
loss: 0.9438472986221313,grad_norm: 0.9116885388164542, iteration: 211890
loss: 1.0032771825790405,grad_norm: 0.9999991536146233, iteration: 211891
loss: 0.9928248524665833,grad_norm: 0.9984454626706777, iteration: 211892
loss: 1.0141969919204712,grad_norm: 0.9425035561026364, iteration: 211893
loss: 1.0143316984176636,grad_norm: 0.9153046087056707, iteration: 211894
loss: 0.9783565998077393,grad_norm: 0.99999911502025, iteration: 211895
loss: 1.0401593446731567,grad_norm: 0.868550611410222, iteration: 211896
loss: 0.998065710067749,grad_norm: 0.8601722806181366, iteration: 211897
loss: 1.0060526132583618,grad_norm: 0.9741723254864424, iteration: 211898
loss: 0.9995977282524109,grad_norm: 0.9237047257198755, iteration: 211899
loss: 1.029273509979248,grad_norm: 0.9999990471214377, iteration: 211900
loss: 0.9945158958435059,grad_norm: 0.8834255194568881, iteration: 211901
loss: 0.9913255572319031,grad_norm: 0.9673981320675274, iteration: 211902
loss: 1.0122443437576294,grad_norm: 0.9820274263624681, iteration: 211903
loss: 1.0000356435775757,grad_norm: 0.8232318625301798, iteration: 211904
loss: 1.0155830383300781,grad_norm: 0.999999117313012, iteration: 211905
loss: 1.0215071439743042,grad_norm: 0.8960019633335956, iteration: 211906
loss: 0.9779700636863708,grad_norm: 0.9188165060172045, iteration: 211907
loss: 0.9761248230934143,grad_norm: 0.9679177964665923, iteration: 211908
loss: 1.0132168531417847,grad_norm: 0.9999994843737721, iteration: 211909
loss: 1.036189317703247,grad_norm: 0.9622797547192256, iteration: 211910
loss: 0.9966374039649963,grad_norm: 0.8903210631903545, iteration: 211911
loss: 0.9973440766334534,grad_norm: 0.8288469452343838, iteration: 211912
loss: 1.035522222518921,grad_norm: 0.9383353838251037, iteration: 211913
loss: 1.0332409143447876,grad_norm: 0.780500038327448, iteration: 211914
loss: 1.0260777473449707,grad_norm: 0.9811719787260622, iteration: 211915
loss: 1.0577186346054077,grad_norm: 0.9870902924226784, iteration: 211916
loss: 0.9908431768417358,grad_norm: 0.9629331675826602, iteration: 211917
loss: 0.9890190362930298,grad_norm: 0.9588713987712152, iteration: 211918
loss: 0.9886153340339661,grad_norm: 0.9834332362720031, iteration: 211919
loss: 1.0142534971237183,grad_norm: 0.9999989956671478, iteration: 211920
loss: 1.025390386581421,grad_norm: 0.8532204838390844, iteration: 211921
loss: 1.009767770767212,grad_norm: 0.9999990613490751, iteration: 211922
loss: 0.9836277365684509,grad_norm: 0.9910224059631524, iteration: 211923
loss: 1.025302767753601,grad_norm: 0.8836494728736204, iteration: 211924
loss: 0.9990740418434143,grad_norm: 0.8569765185356645, iteration: 211925
loss: 0.979034423828125,grad_norm: 0.9234294295703566, iteration: 211926
loss: 0.9753013849258423,grad_norm: 0.9999992451314735, iteration: 211927
loss: 1.0248504877090454,grad_norm: 0.9999991043498409, iteration: 211928
loss: 1.0148403644561768,grad_norm: 0.7911033630747117, iteration: 211929
loss: 1.0160112380981445,grad_norm: 0.9336452631258849, iteration: 211930
loss: 1.0007619857788086,grad_norm: 0.795717148313771, iteration: 211931
loss: 0.9861067533493042,grad_norm: 0.999999140419279, iteration: 211932
loss: 0.9837639331817627,grad_norm: 0.9999991217987311, iteration: 211933
loss: 0.9470294713973999,grad_norm: 0.9385019888770668, iteration: 211934
loss: 0.9976822733879089,grad_norm: 0.9291303122768071, iteration: 211935
loss: 1.0053443908691406,grad_norm: 0.9999992170666265, iteration: 211936
loss: 0.9693260788917542,grad_norm: 0.9999990328834815, iteration: 211937
loss: 1.0484426021575928,grad_norm: 0.999999222375366, iteration: 211938
loss: 0.9872005581855774,grad_norm: 0.9379683975151536, iteration: 211939
loss: 0.988677442073822,grad_norm: 0.999998981710821, iteration: 211940
loss: 1.0142465829849243,grad_norm: 0.8737880733647532, iteration: 211941
loss: 0.9913734793663025,grad_norm: 0.8785800275129838, iteration: 211942
loss: 1.0391716957092285,grad_norm: 0.9353952849628222, iteration: 211943
loss: 1.008846640586853,grad_norm: 0.9423901569990816, iteration: 211944
loss: 0.982071042060852,grad_norm: 0.8217632666980538, iteration: 211945
loss: 0.9888013005256653,grad_norm: 0.9781130955512258, iteration: 211946
loss: 1.0196701288223267,grad_norm: 0.8345275962071155, iteration: 211947
loss: 0.970826268196106,grad_norm: 0.9243494920424662, iteration: 211948
loss: 1.0180530548095703,grad_norm: 0.9194362271329024, iteration: 211949
loss: 1.037414789199829,grad_norm: 0.9999991501152793, iteration: 211950
loss: 1.2101130485534668,grad_norm: 0.9999998000698594, iteration: 211951
loss: 0.9893912672996521,grad_norm: 0.961436374623084, iteration: 211952
loss: 0.988180935382843,grad_norm: 0.9184995535176166, iteration: 211953
loss: 0.9571148157119751,grad_norm: 0.9587906508970179, iteration: 211954
loss: 1.037147879600525,grad_norm: 0.9290927733040828, iteration: 211955
loss: 0.9983205795288086,grad_norm: 0.9999991427436148, iteration: 211956
loss: 0.9964657425880432,grad_norm: 0.816616907356774, iteration: 211957
loss: 1.0049941539764404,grad_norm: 0.9999990995601175, iteration: 211958
loss: 0.988187313079834,grad_norm: 0.8755864020417166, iteration: 211959
loss: 1.0143017768859863,grad_norm: 0.9999989619602632, iteration: 211960
loss: 0.9765663743019104,grad_norm: 0.9724164285678671, iteration: 211961
loss: 0.9802406430244446,grad_norm: 0.8900694298093572, iteration: 211962
loss: 1.006326675415039,grad_norm: 0.9999992294942781, iteration: 211963
loss: 0.9874343872070312,grad_norm: 0.824661494124762, iteration: 211964
loss: 1.0317786931991577,grad_norm: 0.9999990925571991, iteration: 211965
loss: 1.0099152326583862,grad_norm: 0.8309982738394192, iteration: 211966
loss: 1.0281327962875366,grad_norm: 0.9999990347109572, iteration: 211967
loss: 0.9754836559295654,grad_norm: 0.9226947076080991, iteration: 211968
loss: 0.9897724390029907,grad_norm: 0.9684373443533771, iteration: 211969
loss: 0.9839707016944885,grad_norm: 0.8988557384354133, iteration: 211970
loss: 0.9973706603050232,grad_norm: 0.7961634422002034, iteration: 211971
loss: 1.007890224456787,grad_norm: 0.9277788537065804, iteration: 211972
loss: 1.0017503499984741,grad_norm: 0.8409402692341239, iteration: 211973
loss: 1.0332404375076294,grad_norm: 0.9639970050527774, iteration: 211974
loss: 0.9712076187133789,grad_norm: 0.9489070073987618, iteration: 211975
loss: 0.992723286151886,grad_norm: 0.7449478363995645, iteration: 211976
loss: 0.9453946948051453,grad_norm: 0.9748737149018913, iteration: 211977
loss: 1.0319528579711914,grad_norm: 0.9999992175294592, iteration: 211978
loss: 1.038930058479309,grad_norm: 0.9134473395577762, iteration: 211979
loss: 1.0020885467529297,grad_norm: 0.9494781193262585, iteration: 211980
loss: 1.0070760250091553,grad_norm: 0.9999998488141477, iteration: 211981
loss: 1.014586091041565,grad_norm: 0.999999251940278, iteration: 211982
loss: 0.9900454878807068,grad_norm: 0.7209370539654824, iteration: 211983
loss: 1.0213394165039062,grad_norm: 0.9023488745021182, iteration: 211984
loss: 0.9561236500740051,grad_norm: 0.9402265854583322, iteration: 211985
loss: 0.9709367752075195,grad_norm: 0.8719179198013765, iteration: 211986
loss: 0.9869338274002075,grad_norm: 0.9999990533482934, iteration: 211987
loss: 0.9813998341560364,grad_norm: 0.9729903699972144, iteration: 211988
loss: 0.9885337948799133,grad_norm: 0.9999992275647257, iteration: 211989
loss: 1.0345814228057861,grad_norm: 0.9226811997073654, iteration: 211990
loss: 0.9850280284881592,grad_norm: 0.9999991306002346, iteration: 211991
loss: 0.9766971468925476,grad_norm: 0.9999990824418717, iteration: 211992
loss: 0.9792885780334473,grad_norm: 0.8253824866208559, iteration: 211993
loss: 1.009755253791809,grad_norm: 0.9237507673288855, iteration: 211994
loss: 0.9643028378486633,grad_norm: 0.9126186453481661, iteration: 211995
loss: 1.0001760721206665,grad_norm: 0.999998998012073, iteration: 211996
loss: 1.0030235052108765,grad_norm: 0.9999991516308996, iteration: 211997
loss: 0.9940069317817688,grad_norm: 0.7043543753342266, iteration: 211998
loss: 0.9678566455841064,grad_norm: 0.8052480104911897, iteration: 211999
loss: 1.005586862564087,grad_norm: 0.8172622852380627, iteration: 212000
loss: 0.9771971702575684,grad_norm: 0.8373999446597895, iteration: 212001
loss: 0.9964292049407959,grad_norm: 0.8635309094263328, iteration: 212002
loss: 0.9948979616165161,grad_norm: 0.7357685140398926, iteration: 212003
loss: 0.986960768699646,grad_norm: 0.8531836950675562, iteration: 212004
loss: 0.9800815582275391,grad_norm: 0.8362374968773436, iteration: 212005
loss: 1.0037068128585815,grad_norm: 0.8417009658047989, iteration: 212006
loss: 1.0254122018814087,grad_norm: 0.9806588068407106, iteration: 212007
loss: 0.9932900667190552,grad_norm: 0.9287752519496913, iteration: 212008
loss: 0.9758996963500977,grad_norm: 0.9999991946253421, iteration: 212009
loss: 1.0164365768432617,grad_norm: 0.937004856442223, iteration: 212010
loss: 0.9436890482902527,grad_norm: 0.9999991851664044, iteration: 212011
loss: 1.0115854740142822,grad_norm: 0.8633925137611489, iteration: 212012
loss: 0.9848658442497253,grad_norm: 0.9021674615570144, iteration: 212013
loss: 0.9947550892829895,grad_norm: 0.9999990451065152, iteration: 212014
loss: 0.9793065190315247,grad_norm: 0.8785293906837122, iteration: 212015
loss: 0.9897465109825134,grad_norm: 0.9999991212413336, iteration: 212016
loss: 1.025596261024475,grad_norm: 0.9999990879903852, iteration: 212017
loss: 1.020284652709961,grad_norm: 0.9999995007396424, iteration: 212018
loss: 0.9943556189537048,grad_norm: 0.9633302659548183, iteration: 212019
loss: 1.0467311143875122,grad_norm: 0.8454309313080963, iteration: 212020
loss: 0.9815284013748169,grad_norm: 0.9999991311544701, iteration: 212021
loss: 0.9880838394165039,grad_norm: 0.9713000434030501, iteration: 212022
loss: 0.9903562068939209,grad_norm: 0.9317979507333047, iteration: 212023
loss: 0.9728556871414185,grad_norm: 0.9999990795601789, iteration: 212024
loss: 0.9994266033172607,grad_norm: 0.9999991611912611, iteration: 212025
loss: 0.9649791121482849,grad_norm: 0.9999991116615096, iteration: 212026
loss: 0.9754278063774109,grad_norm: 0.8225088187203796, iteration: 212027
loss: 1.010023593902588,grad_norm: 0.7863338647190429, iteration: 212028
loss: 1.0127719640731812,grad_norm: 0.9607493610677134, iteration: 212029
loss: 1.0209840536117554,grad_norm: 0.9076993258163187, iteration: 212030
loss: 1.0367568731307983,grad_norm: 0.9999991352528568, iteration: 212031
loss: 0.9678653478622437,grad_norm: 0.9999990899444009, iteration: 212032
loss: 0.9876561164855957,grad_norm: 0.8811143283945506, iteration: 212033
loss: 0.9804278612136841,grad_norm: 0.9999990018341041, iteration: 212034
loss: 0.9674117565155029,grad_norm: 0.9668625978846951, iteration: 212035
loss: 0.9852708578109741,grad_norm: 0.9194907891003572, iteration: 212036
loss: 1.0123263597488403,grad_norm: 0.8552833482216909, iteration: 212037
loss: 1.0071172714233398,grad_norm: 0.9953192822855965, iteration: 212038
loss: 1.000428318977356,grad_norm: 0.6316843275111681, iteration: 212039
loss: 0.9980229735374451,grad_norm: 0.9174021102483164, iteration: 212040
loss: 0.9452824592590332,grad_norm: 0.8857115342631936, iteration: 212041
loss: 1.027145504951477,grad_norm: 0.9605487823356245, iteration: 212042
loss: 1.0728927850723267,grad_norm: 0.9999996004260104, iteration: 212043
loss: 1.000983476638794,grad_norm: 0.9999997835184001, iteration: 212044
loss: 1.0574467182159424,grad_norm: 0.9999998252176518, iteration: 212045
loss: 1.0083065032958984,grad_norm: 0.9193459858650869, iteration: 212046
loss: 1.0280998945236206,grad_norm: 0.9999991295563634, iteration: 212047
loss: 1.000014305114746,grad_norm: 0.9348007136354822, iteration: 212048
loss: 0.995032012462616,grad_norm: 0.8114084393813564, iteration: 212049
loss: 1.0078314542770386,grad_norm: 0.9844279618884006, iteration: 212050
loss: 0.9984579682350159,grad_norm: 0.9999991792493804, iteration: 212051
loss: 0.9704042673110962,grad_norm: 0.892020468085793, iteration: 212052
loss: 1.0282238721847534,grad_norm: 0.8295825861841337, iteration: 212053
loss: 0.9901725649833679,grad_norm: 0.9944641889969205, iteration: 212054
loss: 1.0294314622879028,grad_norm: 0.9999990789521019, iteration: 212055
loss: 1.057381272315979,grad_norm: 0.9389208840888261, iteration: 212056
loss: 1.0052531957626343,grad_norm: 0.8245392050147774, iteration: 212057
loss: 0.9951041340827942,grad_norm: 0.99999913526142, iteration: 212058
loss: 0.9823150038719177,grad_norm: 0.8880966671552193, iteration: 212059
loss: 1.0936150550842285,grad_norm: 0.9304427218486847, iteration: 212060
loss: 0.9878419041633606,grad_norm: 0.9999991152504338, iteration: 212061
loss: 0.9848731756210327,grad_norm: 0.9999990751926227, iteration: 212062
loss: 1.0510694980621338,grad_norm: 0.7619772890430833, iteration: 212063
loss: 0.974087655544281,grad_norm: 0.8607507369382892, iteration: 212064
loss: 1.0319920778274536,grad_norm: 0.9999990782313649, iteration: 212065
loss: 1.0041451454162598,grad_norm: 0.9962116043534324, iteration: 212066
loss: 0.9819060564041138,grad_norm: 0.8593582444958195, iteration: 212067
loss: 1.013711929321289,grad_norm: 0.9999994718530273, iteration: 212068
loss: 0.9792263507843018,grad_norm: 0.9836286744047961, iteration: 212069
loss: 0.9822345972061157,grad_norm: 0.9315968916384412, iteration: 212070
loss: 0.9966027140617371,grad_norm: 0.9498336103645573, iteration: 212071
loss: 1.0490477085113525,grad_norm: 0.7689498787660936, iteration: 212072
loss: 1.0051944255828857,grad_norm: 0.9263063114484288, iteration: 212073
loss: 1.0140453577041626,grad_norm: 0.9999991594676737, iteration: 212074
loss: 1.0095041990280151,grad_norm: 0.9999989921124965, iteration: 212075
loss: 1.0066475868225098,grad_norm: 0.9638049636258968, iteration: 212076
loss: 0.9627532362937927,grad_norm: 0.9533562816014992, iteration: 212077
loss: 1.0441535711288452,grad_norm: 0.8703898827642444, iteration: 212078
loss: 1.0162229537963867,grad_norm: 0.9411264507359579, iteration: 212079
loss: 0.99103182554245,grad_norm: 0.9603228304407675, iteration: 212080
loss: 0.9754708409309387,grad_norm: 0.8458075496081767, iteration: 212081
loss: 0.9867372512817383,grad_norm: 0.745706088348096, iteration: 212082
loss: 0.992911696434021,grad_norm: 0.9381841508961462, iteration: 212083
loss: 1.0138733386993408,grad_norm: 0.8756347379965819, iteration: 212084
loss: 0.9544550180435181,grad_norm: 0.9999991768364765, iteration: 212085
loss: 1.019741177558899,grad_norm: 0.9999996056835211, iteration: 212086
loss: 1.0136643648147583,grad_norm: 0.9814393599431022, iteration: 212087
loss: 1.0286792516708374,grad_norm: 0.9307318438431949, iteration: 212088
loss: 0.9973234534263611,grad_norm: 0.9972601361256643, iteration: 212089
loss: 0.9730142951011658,grad_norm: 0.9999991327969587, iteration: 212090
loss: 1.0021711587905884,grad_norm: 0.9024255376706307, iteration: 212091
loss: 1.0027931928634644,grad_norm: 0.8088299112180058, iteration: 212092
loss: 0.9921921491622925,grad_norm: 0.9687865300264216, iteration: 212093
loss: 0.9695032238960266,grad_norm: 0.9280177342034854, iteration: 212094
loss: 1.006355881690979,grad_norm: 0.9999990279100386, iteration: 212095
loss: 1.0175936222076416,grad_norm: 0.8925552615513512, iteration: 212096
loss: 0.9748286008834839,grad_norm: 0.99999933070135, iteration: 212097
loss: 1.015636682510376,grad_norm: 0.9784258667356251, iteration: 212098
loss: 1.0028105974197388,grad_norm: 0.9852975008367708, iteration: 212099
loss: 0.9623046517372131,grad_norm: 0.7657115939550486, iteration: 212100
loss: 1.03117036819458,grad_norm: 0.8703077472426214, iteration: 212101
loss: 0.9993090033531189,grad_norm: 0.9415493006822087, iteration: 212102
loss: 0.9967862367630005,grad_norm: 0.9179976669141429, iteration: 212103
loss: 0.9718182682991028,grad_norm: 0.8784731157366215, iteration: 212104
loss: 1.057478904724121,grad_norm: 0.895339902085459, iteration: 212105
loss: 1.0309903621673584,grad_norm: 0.8593664141213072, iteration: 212106
loss: 1.0117321014404297,grad_norm: 0.8692228706634018, iteration: 212107
loss: 0.9923104047775269,grad_norm: 0.9999992449322708, iteration: 212108
loss: 0.9752006530761719,grad_norm: 0.7875220994712512, iteration: 212109
loss: 0.9731588363647461,grad_norm: 0.9185129995439132, iteration: 212110
loss: 1.003481149673462,grad_norm: 0.9999990821657966, iteration: 212111
loss: 0.9993529319763184,grad_norm: 0.8152806657814664, iteration: 212112
loss: 0.9708221554756165,grad_norm: 0.9258595989111101, iteration: 212113
loss: 1.0012190341949463,grad_norm: 0.8396199672027359, iteration: 212114
loss: 1.015499472618103,grad_norm: 0.9999992633965302, iteration: 212115
loss: 0.9550138711929321,grad_norm: 0.9486933364207153, iteration: 212116
loss: 0.982972264289856,grad_norm: 0.765898695614327, iteration: 212117
loss: 1.0283808708190918,grad_norm: 0.9999991227670435, iteration: 212118
loss: 0.9897655844688416,grad_norm: 0.901657335126523, iteration: 212119
loss: 0.9761031866073608,grad_norm: 0.7866308273129933, iteration: 212120
loss: 0.9683547019958496,grad_norm: 0.9015639540769083, iteration: 212121
loss: 0.9939615726470947,grad_norm: 0.9999992051218275, iteration: 212122
loss: 1.0228813886642456,grad_norm: 0.7751496294587037, iteration: 212123
loss: 1.0134360790252686,grad_norm: 0.9999992064382468, iteration: 212124
loss: 0.9607042670249939,grad_norm: 0.8119568440485727, iteration: 212125
loss: 0.9715805649757385,grad_norm: 0.9999991771945416, iteration: 212126
loss: 1.0160361528396606,grad_norm: 0.9824008162699067, iteration: 212127
loss: 0.9936316013336182,grad_norm: 0.9999992604626207, iteration: 212128
loss: 0.9852451682090759,grad_norm: 0.9999990863621829, iteration: 212129
loss: 0.9745056629180908,grad_norm: 0.9453891925931242, iteration: 212130
loss: 1.0062609910964966,grad_norm: 0.8574346047439251, iteration: 212131
loss: 0.9670345783233643,grad_norm: 0.9671625042464803, iteration: 212132
loss: 0.9995088577270508,grad_norm: 0.9999992869505859, iteration: 212133
loss: 1.0006612539291382,grad_norm: 0.9224865205490984, iteration: 212134
loss: 0.9737895727157593,grad_norm: 0.9999990915872717, iteration: 212135
loss: 0.9872056841850281,grad_norm: 0.9999991604774552, iteration: 212136
loss: 0.9959741234779358,grad_norm: 0.7344176359590194, iteration: 212137
loss: 0.981271505355835,grad_norm: 0.9999991037715908, iteration: 212138
loss: 1.022269606590271,grad_norm: 0.9865692492333378, iteration: 212139
loss: 1.0227450132369995,grad_norm: 0.7615207488768142, iteration: 212140
loss: 0.9845640063285828,grad_norm: 0.9237516042968629, iteration: 212141
loss: 0.9998951554298401,grad_norm: 0.8668942737518056, iteration: 212142
loss: 1.0027804374694824,grad_norm: 0.9796247941527892, iteration: 212143
loss: 0.9779073596000671,grad_norm: 0.8527339581473489, iteration: 212144
loss: 0.9739459753036499,grad_norm: 0.9074213092265664, iteration: 212145
loss: 0.9862862229347229,grad_norm: 0.9284263621871724, iteration: 212146
loss: 1.0291756391525269,grad_norm: 0.9902206294446277, iteration: 212147
loss: 1.0083123445510864,grad_norm: 0.9999993194961038, iteration: 212148
loss: 1.0419902801513672,grad_norm: 0.9999992281348564, iteration: 212149
loss: 0.9935050010681152,grad_norm: 0.7869006729893746, iteration: 212150
loss: 1.0052145719528198,grad_norm: 0.7043564297679671, iteration: 212151
loss: 0.9931147694587708,grad_norm: 0.8391553999866699, iteration: 212152
loss: 0.9996040463447571,grad_norm: 0.9993097482780855, iteration: 212153
loss: 0.9540944695472717,grad_norm: 0.9747619912552477, iteration: 212154
loss: 1.1247987747192383,grad_norm: 0.9332541971745303, iteration: 212155
loss: 0.9913082718849182,grad_norm: 0.9999991543012486, iteration: 212156
loss: 1.0064090490341187,grad_norm: 0.9624577826067366, iteration: 212157
loss: 1.0278104543685913,grad_norm: 0.7420863165124777, iteration: 212158
loss: 1.009529948234558,grad_norm: 0.7998175050506755, iteration: 212159
loss: 0.9914271831512451,grad_norm: 0.9999991995390519, iteration: 212160
loss: 1.0043073892593384,grad_norm: 0.8944060185917924, iteration: 212161
loss: 0.9711156487464905,grad_norm: 0.9138653501783413, iteration: 212162
loss: 0.9835371375083923,grad_norm: 0.86279915341711, iteration: 212163
loss: 1.0012218952178955,grad_norm: 0.9999990544178576, iteration: 212164
loss: 1.065132737159729,grad_norm: 0.9922425973568764, iteration: 212165
loss: 1.0117402076721191,grad_norm: 0.8961018496091212, iteration: 212166
loss: 1.0114095211029053,grad_norm: 0.9999989993110681, iteration: 212167
loss: 1.0026899576187134,grad_norm: 0.8544288664747203, iteration: 212168
loss: 0.9812189936637878,grad_norm: 0.9408044010862845, iteration: 212169
loss: 1.0274271965026855,grad_norm: 0.9009154339228805, iteration: 212170
loss: 1.0308054685592651,grad_norm: 0.8928232829991174, iteration: 212171
loss: 1.0023167133331299,grad_norm: 0.7706288934227369, iteration: 212172
loss: 1.0027964115142822,grad_norm: 0.9999991652004996, iteration: 212173
loss: 0.971684992313385,grad_norm: 0.9165738717224424, iteration: 212174
loss: 0.9769725799560547,grad_norm: 0.7555292188602899, iteration: 212175
loss: 1.0043376684188843,grad_norm: 0.8471898724741342, iteration: 212176
loss: 1.00319504737854,grad_norm: 0.8667533121810714, iteration: 212177
loss: 1.0200854539871216,grad_norm: 0.9149812143867184, iteration: 212178
loss: 1.0179442167282104,grad_norm: 0.9116329408396265, iteration: 212179
loss: 1.0246334075927734,grad_norm: 0.8997344221748405, iteration: 212180
loss: 0.9773109555244446,grad_norm: 0.867320302416257, iteration: 212181
loss: 1.0005176067352295,grad_norm: 0.910200683863638, iteration: 212182
loss: 1.0010205507278442,grad_norm: 0.9999990794789921, iteration: 212183
loss: 1.0171220302581787,grad_norm: 0.889502796612846, iteration: 212184
loss: 1.021885871887207,grad_norm: 0.9349109104308777, iteration: 212185
loss: 1.0237313508987427,grad_norm: 0.9999990751838725, iteration: 212186
loss: 1.0211528539657593,grad_norm: 0.9193380478056148, iteration: 212187
loss: 0.9921708106994629,grad_norm: 0.9955623840814957, iteration: 212188
loss: 0.9878942966461182,grad_norm: 0.9999993738630983, iteration: 212189
loss: 0.9556894302368164,grad_norm: 0.8683475090089338, iteration: 212190
loss: 0.9692315459251404,grad_norm: 0.9449374513254268, iteration: 212191
loss: 0.981283962726593,grad_norm: 0.9398823855157268, iteration: 212192
loss: 0.9877135753631592,grad_norm: 0.954118050971394, iteration: 212193
loss: 0.9929662346839905,grad_norm: 0.9999989624258909, iteration: 212194
loss: 1.018467903137207,grad_norm: 0.9011286292980474, iteration: 212195
loss: 0.9799781441688538,grad_norm: 0.8786770940964365, iteration: 212196
loss: 0.9809626340866089,grad_norm: 0.7815146260403081, iteration: 212197
loss: 1.0207382440567017,grad_norm: 0.999999277541365, iteration: 212198
loss: 1.0250520706176758,grad_norm: 0.9658625780238046, iteration: 212199
loss: 0.9965847730636597,grad_norm: 0.8303777251341892, iteration: 212200
loss: 1.009731411933899,grad_norm: 0.9999992267228393, iteration: 212201
loss: 0.9883551001548767,grad_norm: 0.8062408945083857, iteration: 212202
loss: 1.0126279592514038,grad_norm: 0.8556252345131989, iteration: 212203
loss: 1.0220242738723755,grad_norm: 0.7686309674189977, iteration: 212204
loss: 1.0003607273101807,grad_norm: 0.9916049418598983, iteration: 212205
loss: 0.991484522819519,grad_norm: 0.8127574705388413, iteration: 212206
loss: 1.0180938243865967,grad_norm: 0.9767408493952948, iteration: 212207
loss: 1.0096416473388672,grad_norm: 0.9046601826537336, iteration: 212208
loss: 1.0232315063476562,grad_norm: 0.9999990818897323, iteration: 212209
loss: 1.0130232572555542,grad_norm: 0.9999991057128593, iteration: 212210
loss: 1.0266252756118774,grad_norm: 0.9463798379266488, iteration: 212211
loss: 0.9916307926177979,grad_norm: 0.9601846985832075, iteration: 212212
loss: 0.9786856770515442,grad_norm: 0.899719293520762, iteration: 212213
loss: 0.9530225992202759,grad_norm: 0.9393530301023694, iteration: 212214
loss: 0.9867016077041626,grad_norm: 0.9315330239161826, iteration: 212215
loss: 0.9847822189331055,grad_norm: 0.9683763021100638, iteration: 212216
loss: 1.0206867456436157,grad_norm: 0.8782323710189751, iteration: 212217
loss: 0.9618250727653503,grad_norm: 0.9563990578747864, iteration: 212218
loss: 1.020622968673706,grad_norm: 0.9999998653460543, iteration: 212219
loss: 1.0022752285003662,grad_norm: 0.9999991857675735, iteration: 212220
loss: 0.9725924134254456,grad_norm: 0.8500026495386102, iteration: 212221
loss: 0.9685769081115723,grad_norm: 0.9174985350838516, iteration: 212222
loss: 1.0212926864624023,grad_norm: 0.7760292762482204, iteration: 212223
loss: 1.0067631006240845,grad_norm: 0.9505837126787657, iteration: 212224
loss: 1.007185697555542,grad_norm: 0.9354799599718272, iteration: 212225
loss: 0.974420964717865,grad_norm: 0.9999990065669759, iteration: 212226
loss: 1.031974196434021,grad_norm: 0.9255003184258537, iteration: 212227
loss: 1.0347262620925903,grad_norm: 0.9999990088455075, iteration: 212228
loss: 0.9706777930259705,grad_norm: 0.8459749410299876, iteration: 212229
loss: 1.0071316957473755,grad_norm: 0.9501450312429465, iteration: 212230
loss: 1.0250731706619263,grad_norm: 0.9170067289578118, iteration: 212231
loss: 1.0243339538574219,grad_norm: 0.9999992257011289, iteration: 212232
loss: 0.9730967283248901,grad_norm: 0.8989276830878847, iteration: 212233
loss: 1.0152373313903809,grad_norm: 0.9577796122303462, iteration: 212234
loss: 1.0115433931350708,grad_norm: 0.9734718972288469, iteration: 212235
loss: 1.0092235803604126,grad_norm: 0.9945193929201567, iteration: 212236
loss: 1.0123575925827026,grad_norm: 0.9999992258917784, iteration: 212237
loss: 1.0063486099243164,grad_norm: 0.999999098012449, iteration: 212238
loss: 0.9830064177513123,grad_norm: 0.9999990573327798, iteration: 212239
loss: 0.9826106429100037,grad_norm: 0.8714568015411566, iteration: 212240
loss: 0.9673009514808655,grad_norm: 0.9999995304829437, iteration: 212241
loss: 1.051443338394165,grad_norm: 0.9182557520654338, iteration: 212242
loss: 1.0361145734786987,grad_norm: 0.9999994741960604, iteration: 212243
loss: 1.0004774332046509,grad_norm: 0.7501790364614137, iteration: 212244
loss: 1.0064520835876465,grad_norm: 0.9252279618256094, iteration: 212245
loss: 1.0217124223709106,grad_norm: 0.9999991211589065, iteration: 212246
loss: 0.9885449409484863,grad_norm: 0.999999275196138, iteration: 212247
loss: 1.0311083793640137,grad_norm: 0.9593841084469235, iteration: 212248
loss: 1.0276433229446411,grad_norm: 0.9999990709958763, iteration: 212249
loss: 1.0081478357315063,grad_norm: 0.9999991017958062, iteration: 212250
loss: 1.0191222429275513,grad_norm: 0.9337365569243746, iteration: 212251
loss: 0.9836206436157227,grad_norm: 0.8647484482245553, iteration: 212252
loss: 1.0028026103973389,grad_norm: 0.8875386591040197, iteration: 212253
loss: 1.005574345588684,grad_norm: 0.9999990390839884, iteration: 212254
loss: 1.0238040685653687,grad_norm: 0.9879065908206581, iteration: 212255
loss: 0.9982975721359253,grad_norm: 0.9293726888838595, iteration: 212256
loss: 0.9946434497833252,grad_norm: 0.9771717702096757, iteration: 212257
loss: 0.9799957275390625,grad_norm: 0.9916835793991988, iteration: 212258
loss: 1.0409682989120483,grad_norm: 0.9262963519544818, iteration: 212259
loss: 0.9731739163398743,grad_norm: 0.9818173218070024, iteration: 212260
loss: 0.9902940392494202,grad_norm: 0.9673051009852995, iteration: 212261
loss: 0.9704441428184509,grad_norm: 0.9999993004199356, iteration: 212262
loss: 0.9375249147415161,grad_norm: 0.979703136230215, iteration: 212263
loss: 1.0317212343215942,grad_norm: 0.9492863763187439, iteration: 212264
loss: 1.0147355794906616,grad_norm: 0.8604928556983534, iteration: 212265
loss: 1.0726796388626099,grad_norm: 0.9999991861553426, iteration: 212266
loss: 0.9990190863609314,grad_norm: 0.8344847204218723, iteration: 212267
loss: 0.9993976950645447,grad_norm: 0.9999989521261847, iteration: 212268
loss: 0.9855132699012756,grad_norm: 0.8082439614194318, iteration: 212269
loss: 1.0186456441879272,grad_norm: 0.8939584803199934, iteration: 212270
loss: 0.9930219650268555,grad_norm: 0.9766014276830962, iteration: 212271
loss: 1.0380536317825317,grad_norm: 0.9999996546695015, iteration: 212272
loss: 0.9794058799743652,grad_norm: 0.8175684225860747, iteration: 212273
loss: 1.0121843814849854,grad_norm: 0.999999361649865, iteration: 212274
loss: 1.02515709400177,grad_norm: 0.902570540024177, iteration: 212275
loss: 0.9836809039115906,grad_norm: 0.9936970127498127, iteration: 212276
loss: 1.0120244026184082,grad_norm: 0.7211796604411649, iteration: 212277
loss: 0.9898836016654968,grad_norm: 0.9069183527324193, iteration: 212278
loss: 1.014944314956665,grad_norm: 0.8556171574258079, iteration: 212279
loss: 1.00038743019104,grad_norm: 0.9999991629326126, iteration: 212280
loss: 0.9945586323738098,grad_norm: 0.9999990864684746, iteration: 212281
loss: 0.9978269338607788,grad_norm: 0.9356915735895437, iteration: 212282
loss: 0.9487395882606506,grad_norm: 0.9263589811566275, iteration: 212283
loss: 1.0003892183303833,grad_norm: 0.9108058997130233, iteration: 212284
loss: 0.9948121905326843,grad_norm: 0.915438399392427, iteration: 212285
loss: 0.9825490713119507,grad_norm: 0.9868153569971174, iteration: 212286
loss: 1.0492699146270752,grad_norm: 0.9373295299423267, iteration: 212287
loss: 0.9653450846672058,grad_norm: 0.9999992370768203, iteration: 212288
loss: 0.9753946661949158,grad_norm: 0.97153066496722, iteration: 212289
loss: 1.0257493257522583,grad_norm: 0.9999991307220071, iteration: 212290
loss: 0.996518075466156,grad_norm: 0.9223553801386599, iteration: 212291
loss: 0.9880905151367188,grad_norm: 0.976841988518476, iteration: 212292
loss: 0.9674361348152161,grad_norm: 0.922363523377462, iteration: 212293
loss: 1.0137892961502075,grad_norm: 0.9529566759392978, iteration: 212294
loss: 0.9875412583351135,grad_norm: 0.9718791339198122, iteration: 212295
loss: 1.0186020135879517,grad_norm: 0.9999990437147315, iteration: 212296
loss: 1.0071828365325928,grad_norm: 1.0000000539406309, iteration: 212297
loss: 1.0074560642242432,grad_norm: 0.8082663988346185, iteration: 212298
loss: 1.0061976909637451,grad_norm: 0.9093418377821901, iteration: 212299
loss: 0.9708954691886902,grad_norm: 0.9999990308107456, iteration: 212300
loss: 0.9766327142715454,grad_norm: 0.9969689868480275, iteration: 212301
loss: 1.0146915912628174,grad_norm: 0.8417165917535523, iteration: 212302
loss: 0.9956491589546204,grad_norm: 0.8833089657048656, iteration: 212303
loss: 0.9763191342353821,grad_norm: 0.9162627737114606, iteration: 212304
loss: 1.035979986190796,grad_norm: 0.8811272722885115, iteration: 212305
loss: 0.9681569933891296,grad_norm: 0.6864642255073311, iteration: 212306
loss: 0.9888681173324585,grad_norm: 0.9999991122058137, iteration: 212307
loss: 1.059192419052124,grad_norm: 0.9083830404965344, iteration: 212308
loss: 0.9828031063079834,grad_norm: 0.9653537840800995, iteration: 212309
loss: 1.0025556087493896,grad_norm: 0.9424086677760766, iteration: 212310
loss: 0.9862393140792847,grad_norm: 0.8374083011637913, iteration: 212311
loss: 1.0626732110977173,grad_norm: 0.9999992772145166, iteration: 212312
loss: 0.9489011764526367,grad_norm: 0.9663287164191896, iteration: 212313
loss: 1.009012222290039,grad_norm: 0.8937155288892631, iteration: 212314
loss: 0.994572639465332,grad_norm: 0.888548130414457, iteration: 212315
loss: 1.0055311918258667,grad_norm: 0.9999991195174793, iteration: 212316
loss: 0.9589555859565735,grad_norm: 0.868002585678648, iteration: 212317
loss: 0.9640711545944214,grad_norm: 0.8462988013131767, iteration: 212318
loss: 1.0054974555969238,grad_norm: 0.9643215089018037, iteration: 212319
loss: 1.0455759763717651,grad_norm: 0.9999992765610415, iteration: 212320
loss: 0.9703353643417358,grad_norm: 0.9052708443256927, iteration: 212321
loss: 1.002045750617981,grad_norm: 0.84013259196113, iteration: 212322
loss: 1.004807710647583,grad_norm: 0.9287769324112279, iteration: 212323
loss: 0.9696731567382812,grad_norm: 0.7621146848130167, iteration: 212324
loss: 1.0105267763137817,grad_norm: 0.9447085773406175, iteration: 212325
loss: 1.0037875175476074,grad_norm: 0.7965042994704267, iteration: 212326
loss: 0.9760880470275879,grad_norm: 0.9317037575280648, iteration: 212327
loss: 0.9672900438308716,grad_norm: 0.9999990527849223, iteration: 212328
loss: 0.9687215685844421,grad_norm: 0.9999992500554652, iteration: 212329
loss: 1.030800700187683,grad_norm: 0.9999991334638874, iteration: 212330
loss: 0.9783599972724915,grad_norm: 0.837126962478589, iteration: 212331
loss: 0.9844807982444763,grad_norm: 0.7969459453285626, iteration: 212332
loss: 0.9791368246078491,grad_norm: 0.9316021840620591, iteration: 212333
loss: 1.0274865627288818,grad_norm: 0.7926344214667099, iteration: 212334
loss: 0.9790989756584167,grad_norm: 0.9218523827708173, iteration: 212335
loss: 1.0094399452209473,grad_norm: 0.9999990884062427, iteration: 212336
loss: 0.9769327044487,grad_norm: 0.9999993740304554, iteration: 212337
loss: 0.9878889322280884,grad_norm: 0.9999993877463752, iteration: 212338
loss: 1.026112675666809,grad_norm: 0.9999992075431696, iteration: 212339
loss: 1.048671841621399,grad_norm: 0.8651580471435785, iteration: 212340
loss: 1.000404953956604,grad_norm: 0.8038241214389176, iteration: 212341
loss: 0.986170768737793,grad_norm: 0.947377091432377, iteration: 212342
loss: 1.0035240650177002,grad_norm: 0.8781142801020247, iteration: 212343
loss: 0.9754968285560608,grad_norm: 0.8753626568264778, iteration: 212344
loss: 1.0093473196029663,grad_norm: 0.9999994605706787, iteration: 212345
loss: 0.9829666018486023,grad_norm: 0.8501618104969983, iteration: 212346
loss: 1.0146952867507935,grad_norm: 0.8186672383706631, iteration: 212347
loss: 1.014660358428955,grad_norm: 0.9999991692813077, iteration: 212348
loss: 0.9853954911231995,grad_norm: 0.9558999829327116, iteration: 212349
loss: 1.0119593143463135,grad_norm: 0.999998982883804, iteration: 212350
loss: 1.034581184387207,grad_norm: 0.8932762672348642, iteration: 212351
loss: 0.9880204200744629,grad_norm: 0.8936777376276399, iteration: 212352
loss: 0.9999176859855652,grad_norm: 0.8760424017189742, iteration: 212353
loss: 1.012197732925415,grad_norm: 0.9835096032921983, iteration: 212354
loss: 1.0026882886886597,grad_norm: 0.758275593447474, iteration: 212355
loss: 0.971219003200531,grad_norm: 0.9119303191901255, iteration: 212356
loss: 1.0036888122558594,grad_norm: 0.7221548699183443, iteration: 212357
loss: 1.0387587547302246,grad_norm: 0.9999991547598365, iteration: 212358
loss: 0.9940588474273682,grad_norm: 0.9999993853541056, iteration: 212359
loss: 1.0096012353897095,grad_norm: 0.9999992128011094, iteration: 212360
loss: 0.9274763464927673,grad_norm: 0.9999992127931606, iteration: 212361
loss: 0.9678982496261597,grad_norm: 0.9753394249166417, iteration: 212362
loss: 1.0086787939071655,grad_norm: 0.8664194740938463, iteration: 212363
loss: 0.9990523457527161,grad_norm: 0.9999991260855208, iteration: 212364
loss: 0.9969877600669861,grad_norm: 0.7559080582143122, iteration: 212365
loss: 1.0264288187026978,grad_norm: 0.8449982020741742, iteration: 212366
loss: 1.0188069343566895,grad_norm: 0.9024122702002276, iteration: 212367
loss: 1.0358061790466309,grad_norm: 0.9999992060877725, iteration: 212368
loss: 1.0029692649841309,grad_norm: 0.9999990581423218, iteration: 212369
loss: 0.9888367056846619,grad_norm: 0.9143460993359306, iteration: 212370
loss: 0.9987894296646118,grad_norm: 0.9999990934152211, iteration: 212371
loss: 0.9957633018493652,grad_norm: 0.8571922850329384, iteration: 212372
loss: 1.0019079446792603,grad_norm: 0.9999991334350663, iteration: 212373
loss: 0.9952533841133118,grad_norm: 0.8745619314110791, iteration: 212374
loss: 1.0194991827011108,grad_norm: 0.9999992905502495, iteration: 212375
loss: 0.9853184223175049,grad_norm: 0.9232902917506268, iteration: 212376
loss: 0.9593192934989929,grad_norm: 0.9714492028517712, iteration: 212377
loss: 1.0027192831039429,grad_norm: 0.96690113956761, iteration: 212378
loss: 1.1670435667037964,grad_norm: 0.9999998310750445, iteration: 212379
loss: 1.0085521936416626,grad_norm: 0.956310588951631, iteration: 212380
loss: 0.9836413264274597,grad_norm: 0.9999991010668109, iteration: 212381
loss: 1.0061019659042358,grad_norm: 0.999999033884195, iteration: 212382
loss: 1.0327653884887695,grad_norm: 0.8813906510246494, iteration: 212383
loss: 1.0018459558486938,grad_norm: 0.8002489254132829, iteration: 212384
loss: 1.0068081617355347,grad_norm: 0.8650026035117521, iteration: 212385
loss: 1.043992042541504,grad_norm: 0.8835410560746236, iteration: 212386
loss: 1.0205345153808594,grad_norm: 0.9752261264346777, iteration: 212387
loss: 0.9752821326255798,grad_norm: 0.9999990679931533, iteration: 212388
loss: 1.0184510946273804,grad_norm: 0.9999992756254684, iteration: 212389
loss: 0.994240939617157,grad_norm: 0.8420882909571146, iteration: 212390
loss: 1.0660003423690796,grad_norm: 0.9999991112156993, iteration: 212391
loss: 1.0193461179733276,grad_norm: 0.9999991772789278, iteration: 212392
loss: 0.9643651247024536,grad_norm: 0.9908756513427553, iteration: 212393
loss: 0.9811865091323853,grad_norm: 0.999999155676264, iteration: 212394
loss: 1.0112030506134033,grad_norm: 0.9634981544349149, iteration: 212395
loss: 0.9658570885658264,grad_norm: 0.8123538220598786, iteration: 212396
loss: 0.9899376630783081,grad_norm: 0.9470671683211362, iteration: 212397
loss: 0.9934543967247009,grad_norm: 0.8567016312154848, iteration: 212398
loss: 1.000348687171936,grad_norm: 0.999999018252142, iteration: 212399
loss: 1.0331051349639893,grad_norm: 0.879947124184555, iteration: 212400
loss: 0.9907001256942749,grad_norm: 0.9999991194664885, iteration: 212401
loss: 1.003636360168457,grad_norm: 0.8498345847404443, iteration: 212402
loss: 0.9929157495498657,grad_norm: 0.7999039377480475, iteration: 212403
loss: 1.0153814554214478,grad_norm: 0.9999991180931883, iteration: 212404
loss: 1.0692248344421387,grad_norm: 0.9999993455584272, iteration: 212405
loss: 0.9978368282318115,grad_norm: 0.8759584016042077, iteration: 212406
loss: 1.0167145729064941,grad_norm: 0.9259923234420298, iteration: 212407
loss: 0.9937297105789185,grad_norm: 0.9000679747528251, iteration: 212408
loss: 1.0146888494491577,grad_norm: 0.9999999413960328, iteration: 212409
loss: 0.9949138760566711,grad_norm: 0.9481570519030295, iteration: 212410
loss: 1.0187172889709473,grad_norm: 0.8651686024082408, iteration: 212411
loss: 1.0095058679580688,grad_norm: 0.925882622122754, iteration: 212412
loss: 0.9845457673072815,grad_norm: 0.876682102787776, iteration: 212413
loss: 1.02621591091156,grad_norm: 0.9610609700885065, iteration: 212414
loss: 1.0294950008392334,grad_norm: 0.9999992846414287, iteration: 212415
loss: 1.0410560369491577,grad_norm: 0.9633534506777534, iteration: 212416
loss: 0.9658789038658142,grad_norm: 0.7658003207531898, iteration: 212417
loss: 1.0098598003387451,grad_norm: 0.818759123401892, iteration: 212418
loss: 0.9679931402206421,grad_norm: 0.8729327015239602, iteration: 212419
loss: 0.999687671661377,grad_norm: 0.7855762639655534, iteration: 212420
loss: 0.995145857334137,grad_norm: 0.9999993034789724, iteration: 212421
loss: 1.0184814929962158,grad_norm: 0.9999993892790405, iteration: 212422
loss: 1.0121960639953613,grad_norm: 0.9999992741654461, iteration: 212423
loss: 1.0283912420272827,grad_norm: 0.8682743179540242, iteration: 212424
loss: 0.9886817932128906,grad_norm: 0.9999991237564481, iteration: 212425
loss: 0.9917984008789062,grad_norm: 0.8480060863753488, iteration: 212426
loss: 1.0097575187683105,grad_norm: 0.9789902169912816, iteration: 212427
loss: 1.0124919414520264,grad_norm: 0.7465158894561448, iteration: 212428
loss: 1.051154613494873,grad_norm: 0.9999991657755509, iteration: 212429
loss: 1.02381432056427,grad_norm: 0.9999989914404434, iteration: 212430
loss: 0.9720009565353394,grad_norm: 0.8094330201078577, iteration: 212431
loss: 1.0015313625335693,grad_norm: 0.9999990063065203, iteration: 212432
loss: 1.003265380859375,grad_norm: 0.999999036037544, iteration: 212433
loss: 1.0232442617416382,grad_norm: 0.9248485862853083, iteration: 212434
loss: 0.9520476460456848,grad_norm: 0.999999169159788, iteration: 212435
loss: 0.9996466040611267,grad_norm: 0.949202666325516, iteration: 212436
loss: 1.011972427368164,grad_norm: 0.9077591518470398, iteration: 212437
loss: 1.024662971496582,grad_norm: 0.9016297551462371, iteration: 212438
loss: 1.0089589357376099,grad_norm: 0.7936013282155101, iteration: 212439
loss: 0.9887338280677795,grad_norm: 0.9911020371734779, iteration: 212440
loss: 1.0201870203018188,grad_norm: 0.8704368311888285, iteration: 212441
loss: 1.0035250186920166,grad_norm: 0.9999993319463678, iteration: 212442
loss: 0.9671303629875183,grad_norm: 0.936661377762798, iteration: 212443
loss: 1.0898730754852295,grad_norm: 0.9335232592535595, iteration: 212444
loss: 0.9842900633811951,grad_norm: 0.9999990824025352, iteration: 212445
loss: 1.0110044479370117,grad_norm: 0.7286771159517749, iteration: 212446
loss: 0.9928957223892212,grad_norm: 0.9999990914572796, iteration: 212447
loss: 0.9473837614059448,grad_norm: 0.9999990884446417, iteration: 212448
loss: 0.9900718927383423,grad_norm: 0.9894435582975881, iteration: 212449
loss: 0.9997020959854126,grad_norm: 0.9338932121451435, iteration: 212450
loss: 0.9941056966781616,grad_norm: 0.8711147897322284, iteration: 212451
loss: 0.9757612347602844,grad_norm: 0.8799199517617666, iteration: 212452
loss: 0.9940748810768127,grad_norm: 0.9034130889476683, iteration: 212453
loss: 0.9588431119918823,grad_norm: 0.9999990830392598, iteration: 212454
loss: 0.9666982293128967,grad_norm: 0.9999990556414005, iteration: 212455
loss: 0.9881578087806702,grad_norm: 0.9012430236338587, iteration: 212456
loss: 0.9869707226753235,grad_norm: 0.8487874972941271, iteration: 212457
loss: 0.9956741333007812,grad_norm: 0.9999990197028508, iteration: 212458
loss: 0.9957475662231445,grad_norm: 0.9999990246207382, iteration: 212459
loss: 0.992424488067627,grad_norm: 0.7835586722737329, iteration: 212460
loss: 1.0661038160324097,grad_norm: 0.9999997818806551, iteration: 212461
loss: 0.9833359122276306,grad_norm: 0.9425112867388866, iteration: 212462
loss: 0.9855453968048096,grad_norm: 0.9795652087500175, iteration: 212463
loss: 0.965003490447998,grad_norm: 0.9089873334115409, iteration: 212464
loss: 0.9637452960014343,grad_norm: 0.9799638396033153, iteration: 212465
loss: 0.9733603596687317,grad_norm: 0.8925226561648679, iteration: 212466
loss: 1.049684762954712,grad_norm: 0.9999992954790742, iteration: 212467
loss: 1.0155731439590454,grad_norm: 0.9481814553358391, iteration: 212468
loss: 0.9710281491279602,grad_norm: 0.8426140051483119, iteration: 212469
loss: 1.014913558959961,grad_norm: 0.8614378314751303, iteration: 212470
loss: 1.0279548168182373,grad_norm: 0.9999993353476306, iteration: 212471
loss: 0.983989953994751,grad_norm: 0.9999991624499668, iteration: 212472
loss: 1.0129462480545044,grad_norm: 0.8682526428103042, iteration: 212473
loss: 0.9652092456817627,grad_norm: 0.8035589979808877, iteration: 212474
loss: 1.0300910472869873,grad_norm: 0.9240333191394268, iteration: 212475
loss: 0.9866876006126404,grad_norm: 0.9301626739098106, iteration: 212476
loss: 1.003278374671936,grad_norm: 0.9999990502478374, iteration: 212477
loss: 0.9545649886131287,grad_norm: 0.9999989974216841, iteration: 212478
loss: 1.0194146633148193,grad_norm: 0.908390265857277, iteration: 212479
loss: 1.0205519199371338,grad_norm: 0.9210626875118975, iteration: 212480
loss: 1.02347993850708,grad_norm: 0.8310869216557453, iteration: 212481
loss: 1.0586761236190796,grad_norm: 0.9636238239834551, iteration: 212482
loss: 0.9948418140411377,grad_norm: 0.9994300408519019, iteration: 212483
loss: 1.0079851150512695,grad_norm: 0.9699644800216225, iteration: 212484
loss: 0.993823230266571,grad_norm: 0.7637833722562146, iteration: 212485
loss: 1.0374478101730347,grad_norm: 0.9999990418334533, iteration: 212486
loss: 0.9756212830543518,grad_norm: 0.8737177492218057, iteration: 212487
loss: 0.9626454710960388,grad_norm: 0.818385119311225, iteration: 212488
loss: 1.0080256462097168,grad_norm: 0.999999122533179, iteration: 212489
loss: 0.9733162522315979,grad_norm: 0.8588276490290015, iteration: 212490
loss: 1.0376111268997192,grad_norm: 0.8777768926360999, iteration: 212491
loss: 1.0491410493850708,grad_norm: 0.9999989332196205, iteration: 212492
loss: 1.0010249614715576,grad_norm: 0.9999991952250815, iteration: 212493
loss: 0.9791990518569946,grad_norm: 0.9999992117617064, iteration: 212494
loss: 1.0079337358474731,grad_norm: 0.9000885938623577, iteration: 212495
loss: 1.0367802381515503,grad_norm: 0.9999989763480768, iteration: 212496
loss: 1.0076044797897339,grad_norm: 0.7912467815583468, iteration: 212497
loss: 1.0223489999771118,grad_norm: 0.8281677829516407, iteration: 212498
loss: 1.0076851844787598,grad_norm: 0.993773865304984, iteration: 212499
loss: 1.0007209777832031,grad_norm: 0.9999992842767337, iteration: 212500
loss: 0.9973604083061218,grad_norm: 0.8676534558934688, iteration: 212501
loss: 0.9508605599403381,grad_norm: 0.8730077548449664, iteration: 212502
loss: 1.0100042819976807,grad_norm: 0.9632231110370899, iteration: 212503
loss: 0.9861515164375305,grad_norm: 0.8840127624362667, iteration: 212504
loss: 0.9927920699119568,grad_norm: 0.8832640518069841, iteration: 212505
loss: 1.003406286239624,grad_norm: 0.9602578143097056, iteration: 212506
loss: 1.032139539718628,grad_norm: 0.9464722478243921, iteration: 212507
loss: 0.9916619062423706,grad_norm: 0.9999990768389112, iteration: 212508
loss: 1.036628007888794,grad_norm: 0.9212146192386763, iteration: 212509
loss: 1.0396689176559448,grad_norm: 0.9999993657565625, iteration: 212510
loss: 0.9899812340736389,grad_norm: 0.8904605699896605, iteration: 212511
loss: 1.2331018447875977,grad_norm: 0.9999998789394852, iteration: 212512
loss: 0.9975540041923523,grad_norm: 0.8577912523869664, iteration: 212513
loss: 1.0080012083053589,grad_norm: 0.9898686071208425, iteration: 212514
loss: 1.1407051086425781,grad_norm: 0.9999993038619553, iteration: 212515
loss: 0.9854549765586853,grad_norm: 0.9072833625063652, iteration: 212516
loss: 0.9533179402351379,grad_norm: 0.9562033541743735, iteration: 212517
loss: 0.9976351857185364,grad_norm: 0.964977543788415, iteration: 212518
loss: 0.9793916940689087,grad_norm: 0.8745510577273002, iteration: 212519
loss: 1.0005980730056763,grad_norm: 0.9976334167430816, iteration: 212520
loss: 0.9937097430229187,grad_norm: 0.9731713259457312, iteration: 212521
loss: 0.9542403817176819,grad_norm: 0.9001279167804632, iteration: 212522
loss: 1.0086325407028198,grad_norm: 0.9992475235109987, iteration: 212523
loss: 1.0349812507629395,grad_norm: 0.9999993976686614, iteration: 212524
loss: 1.029313564300537,grad_norm: 0.9999990597783921, iteration: 212525
loss: 1.0073351860046387,grad_norm: 0.9383364415940558, iteration: 212526
loss: 0.9970543384552002,grad_norm: 0.90680634666526, iteration: 212527
loss: 1.0374412536621094,grad_norm: 0.8955123329784506, iteration: 212528
loss: 0.990444004535675,grad_norm: 0.9999990542310269, iteration: 212529
loss: 1.0133029222488403,grad_norm: 0.9999997983803796, iteration: 212530
loss: 1.0193541049957275,grad_norm: 0.9999992300980631, iteration: 212531
loss: 0.9930285215377808,grad_norm: 0.9688871094348144, iteration: 212532
loss: 1.0510023832321167,grad_norm: 0.7994414823129509, iteration: 212533
loss: 1.0123053789138794,grad_norm: 0.9508392779639911, iteration: 212534
loss: 0.9885690808296204,grad_norm: 0.9573391190286262, iteration: 212535
loss: 1.0689564943313599,grad_norm: 0.9999992877198043, iteration: 212536
loss: 1.0052717924118042,grad_norm: 0.9412009507943382, iteration: 212537
loss: 1.2852797508239746,grad_norm: 0.999999700186098, iteration: 212538
loss: 1.0204989910125732,grad_norm: 0.9999990978007816, iteration: 212539
loss: 1.0201013088226318,grad_norm: 0.9919881290616037, iteration: 212540
loss: 1.1019996404647827,grad_norm: 0.999999163920575, iteration: 212541
loss: 1.0186127424240112,grad_norm: 0.8571459680423603, iteration: 212542
loss: 1.0192207098007202,grad_norm: 0.9999993961130955, iteration: 212543
loss: 1.1239248514175415,grad_norm: 0.9999997977515844, iteration: 212544
loss: 1.0249919891357422,grad_norm: 0.8431464014168368, iteration: 212545
loss: 0.9993979334831238,grad_norm: 0.999998954492991, iteration: 212546
loss: 0.9858381152153015,grad_norm: 0.9530582106814327, iteration: 212547
loss: 0.9625210762023926,grad_norm: 0.967359329377749, iteration: 212548
loss: 0.9886306524276733,grad_norm: 0.9115687793356038, iteration: 212549
loss: 1.0237194299697876,grad_norm: 0.9999991944115334, iteration: 212550
loss: 0.999355673789978,grad_norm: 0.9841514648920863, iteration: 212551
loss: 0.9773991703987122,grad_norm: 0.9999991683094884, iteration: 212552
loss: 0.9538140892982483,grad_norm: 0.9250612951465012, iteration: 212553
loss: 0.9570217132568359,grad_norm: 0.9437516295987552, iteration: 212554
loss: 0.9735884666442871,grad_norm: 0.960943830995473, iteration: 212555
loss: 0.9881150126457214,grad_norm: 0.8741122302957015, iteration: 212556
loss: 0.9619064331054688,grad_norm: 0.8922585111881585, iteration: 212557
loss: 0.9715527892112732,grad_norm: 0.7505138187353, iteration: 212558
loss: 1.0031330585479736,grad_norm: 0.9252264934091711, iteration: 212559
loss: 1.0019346475601196,grad_norm: 0.7911379558557704, iteration: 212560
loss: 0.9837124347686768,grad_norm: 0.9999990618641059, iteration: 212561
loss: 1.0139192342758179,grad_norm: 0.931383712324432, iteration: 212562
loss: 1.0106887817382812,grad_norm: 0.8848111049454711, iteration: 212563
loss: 1.0057590007781982,grad_norm: 0.9379879306629789, iteration: 212564
loss: 1.0076141357421875,grad_norm: 0.9082498889033784, iteration: 212565
loss: 0.9858672022819519,grad_norm: 0.9999991251106333, iteration: 212566
loss: 1.3629529476165771,grad_norm: 0.9999999633010754, iteration: 212567
loss: 1.012894630432129,grad_norm: 0.9999990939006977, iteration: 212568
loss: 0.9777977466583252,grad_norm: 0.9999990500086677, iteration: 212569
loss: 1.0147846937179565,grad_norm: 0.842735694238205, iteration: 212570
loss: 1.0014744997024536,grad_norm: 0.9348857233510943, iteration: 212571
loss: 0.9959625601768494,grad_norm: 0.9999989872047715, iteration: 212572
loss: 0.9967471361160278,grad_norm: 0.9941947245025625, iteration: 212573
loss: 1.02442467212677,grad_norm: 0.9411456902351472, iteration: 212574
loss: 1.0220595598220825,grad_norm: 0.9999991281908781, iteration: 212575
loss: 1.1510077714920044,grad_norm: 0.9999999690474245, iteration: 212576
loss: 1.0041836500167847,grad_norm: 0.8249046780132572, iteration: 212577
loss: 1.04951012134552,grad_norm: 0.8787000431054532, iteration: 212578
loss: 0.9947669506072998,grad_norm: 0.9933572755234746, iteration: 212579
loss: 0.9942975044250488,grad_norm: 0.9009171065951648, iteration: 212580
loss: 0.99155193567276,grad_norm: 0.985573821914987, iteration: 212581
loss: 0.9940800666809082,grad_norm: 0.8497609187243356, iteration: 212582
loss: 0.9882943034172058,grad_norm: 0.9999990292941057, iteration: 212583
loss: 0.9971196055412292,grad_norm: 0.9999989961367673, iteration: 212584
loss: 1.005405306816101,grad_norm: 0.9273160517780694, iteration: 212585
loss: 0.9840617775917053,grad_norm: 0.8819368020381047, iteration: 212586
loss: 0.9988508820533752,grad_norm: 0.8789337950097903, iteration: 212587
loss: 1.0039222240447998,grad_norm: 0.9662931431574721, iteration: 212588
loss: 1.024689793586731,grad_norm: 0.8839366659557966, iteration: 212589
loss: 0.9418553709983826,grad_norm: 0.9280710112125778, iteration: 212590
loss: 1.013679027557373,grad_norm: 0.8958932892436556, iteration: 212591
loss: 1.1012896299362183,grad_norm: 0.9999991772521287, iteration: 212592
loss: 1.01434326171875,grad_norm: 0.9999989397568321, iteration: 212593
loss: 1.0250163078308105,grad_norm: 0.9310842122240274, iteration: 212594
loss: 1.0266109704971313,grad_norm: 0.9999991458603263, iteration: 212595
loss: 1.0273674726486206,grad_norm: 0.8924747853554662, iteration: 212596
loss: 1.0085978507995605,grad_norm: 0.7637278876309513, iteration: 212597
loss: 1.0045074224472046,grad_norm: 0.8631156657770823, iteration: 212598
loss: 0.988639235496521,grad_norm: 0.9999991336114158, iteration: 212599
loss: 1.0096412897109985,grad_norm: 0.9999990622833431, iteration: 212600
loss: 1.076836109161377,grad_norm: 0.9999990446152988, iteration: 212601
loss: 1.0131444931030273,grad_norm: 0.9560371205492163, iteration: 212602
loss: 1.022326111793518,grad_norm: 0.9999992640287211, iteration: 212603
loss: 0.9460583925247192,grad_norm: 0.7537436505285268, iteration: 212604
loss: 0.998656690120697,grad_norm: 0.8147867454434763, iteration: 212605
loss: 0.9911871552467346,grad_norm: 0.9999992610477915, iteration: 212606
loss: 1.021636724472046,grad_norm: 0.9353088927425638, iteration: 212607
loss: 1.0125148296356201,grad_norm: 0.9141042094406691, iteration: 212608
loss: 0.9587016701698303,grad_norm: 0.9354503314050012, iteration: 212609
loss: 0.974819004535675,grad_norm: 0.8584192952075809, iteration: 212610
loss: 1.0422717332839966,grad_norm: 0.9999989814376499, iteration: 212611
loss: 1.0123789310455322,grad_norm: 0.9200739319212792, iteration: 212612
loss: 0.986933171749115,grad_norm: 0.9782798890381755, iteration: 212613
loss: 0.9947680830955505,grad_norm: 0.8499776909079372, iteration: 212614
loss: 1.0554858446121216,grad_norm: 0.9999998450654128, iteration: 212615
loss: 1.051164984703064,grad_norm: 0.9509559061895324, iteration: 212616
loss: 0.9889335036277771,grad_norm: 0.9632434328253808, iteration: 212617
loss: 0.9913511872291565,grad_norm: 0.9199225588481339, iteration: 212618
loss: 1.039026141166687,grad_norm: 0.9999994435868514, iteration: 212619
loss: 1.0374844074249268,grad_norm: 0.9999992962666271, iteration: 212620
loss: 0.9937880635261536,grad_norm: 0.7737675786663639, iteration: 212621
loss: 0.9917621612548828,grad_norm: 0.9027972676088223, iteration: 212622
loss: 0.9834964871406555,grad_norm: 0.8829012439849369, iteration: 212623
loss: 0.9848442673683167,grad_norm: 0.8868815012943764, iteration: 212624
loss: 1.0401837825775146,grad_norm: 0.8712919120288798, iteration: 212625
loss: 0.9927499294281006,grad_norm: 0.8104514708228595, iteration: 212626
loss: 0.9888927340507507,grad_norm: 0.858791281748151, iteration: 212627
loss: 0.99970942735672,grad_norm: 0.9287643913110831, iteration: 212628
loss: 1.0547319650650024,grad_norm: 0.9999992060763594, iteration: 212629
loss: 0.9851582050323486,grad_norm: 0.9999996549851635, iteration: 212630
loss: 1.0339354276657104,grad_norm: 0.9999991105150041, iteration: 212631
loss: 0.9655929207801819,grad_norm: 0.9270039478541701, iteration: 212632
loss: 1.0369497537612915,grad_norm: 0.9999989676041219, iteration: 212633
loss: 1.0319095849990845,grad_norm: 0.9999991508375291, iteration: 212634
loss: 1.1073248386383057,grad_norm: 0.9999995137196094, iteration: 212635
loss: 1.0052506923675537,grad_norm: 0.8760573233385229, iteration: 212636
loss: 1.0079872608184814,grad_norm: 0.9134463067005268, iteration: 212637
loss: 1.0445088148117065,grad_norm: 0.9999992909191396, iteration: 212638
loss: 1.082695722579956,grad_norm: 0.9999996445977113, iteration: 212639
loss: 0.9701835513114929,grad_norm: 0.9187213875585813, iteration: 212640
loss: 1.1002295017242432,grad_norm: 0.9999990865345615, iteration: 212641
loss: 0.9758872985839844,grad_norm: 0.9999990952212285, iteration: 212642
loss: 1.0336742401123047,grad_norm: 0.9999991849724329, iteration: 212643
loss: 0.9875717163085938,grad_norm: 0.8160815671967054, iteration: 212644
loss: 1.0028705596923828,grad_norm: 0.9999991439124944, iteration: 212645
loss: 1.0077693462371826,grad_norm: 0.9389995426292154, iteration: 212646
loss: 0.9916724562644958,grad_norm: 0.7777596092146506, iteration: 212647
loss: 0.990349292755127,grad_norm: 0.9999991394319179, iteration: 212648
loss: 0.9942762851715088,grad_norm: 0.897693115437723, iteration: 212649
loss: 1.009775161743164,grad_norm: 0.8664491787246579, iteration: 212650
loss: 1.0783230066299438,grad_norm: 0.9999990315590362, iteration: 212651
loss: 0.9707111716270447,grad_norm: 0.9179345245294559, iteration: 212652
loss: 1.0480557680130005,grad_norm: 0.8692647570846765, iteration: 212653
loss: 0.9959558844566345,grad_norm: 0.9493555221536663, iteration: 212654
loss: 0.9875298142433167,grad_norm: 0.9106705469361334, iteration: 212655
loss: 0.9697144031524658,grad_norm: 0.9120544942783976, iteration: 212656
loss: 1.0232254266738892,grad_norm: 0.9847770499810042, iteration: 212657
loss: 1.0113009214401245,grad_norm: 0.9956271464382355, iteration: 212658
loss: 0.9869406223297119,grad_norm: 0.9999991317414774, iteration: 212659
loss: 0.9875075221061707,grad_norm: 0.9999992497445132, iteration: 212660
loss: 0.9848302006721497,grad_norm: 0.8544404495688869, iteration: 212661
loss: 0.999755859375,grad_norm: 0.9130184100744894, iteration: 212662
loss: 0.9947168231010437,grad_norm: 0.9894627971083693, iteration: 212663
loss: 1.0018609762191772,grad_norm: 0.8883062484176824, iteration: 212664
loss: 1.0140819549560547,grad_norm: 0.9999995870659759, iteration: 212665
loss: 0.9896316528320312,grad_norm: 0.9999990478667706, iteration: 212666
loss: 0.9932835698127747,grad_norm: 0.8971767714594804, iteration: 212667
loss: 0.999582052230835,grad_norm: 0.9999991502799771, iteration: 212668
loss: 1.0221846103668213,grad_norm: 0.7993126227339816, iteration: 212669
loss: 1.0016902685165405,grad_norm: 0.8851923177720077, iteration: 212670
loss: 1.007785439491272,grad_norm: 0.9999991568460254, iteration: 212671
loss: 0.9941877722740173,grad_norm: 0.9457372660210142, iteration: 212672
loss: 1.055578589439392,grad_norm: 0.9665882702881337, iteration: 212673
loss: 1.0049378871917725,grad_norm: 0.9999997155738369, iteration: 212674
loss: 1.0371415615081787,grad_norm: 0.962689348869624, iteration: 212675
loss: 1.0149587392807007,grad_norm: 0.8615037338972761, iteration: 212676
loss: 0.9826800227165222,grad_norm: 0.8540068443417008, iteration: 212677
loss: 0.948639988899231,grad_norm: 0.8132876344520085, iteration: 212678
loss: 1.0032161474227905,grad_norm: 0.9261875373834536, iteration: 212679
loss: 1.0370374917984009,grad_norm: 0.9889007444748552, iteration: 212680
loss: 0.9793646335601807,grad_norm: 0.8244180020449139, iteration: 212681
loss: 1.0057268142700195,grad_norm: 0.9526464073754208, iteration: 212682
loss: 0.9785623550415039,grad_norm: 0.9260945914387742, iteration: 212683
loss: 0.9899752736091614,grad_norm: 0.7936548248389192, iteration: 212684
loss: 0.9676216840744019,grad_norm: 0.9296211507306646, iteration: 212685
loss: 1.0151042938232422,grad_norm: 0.9999990603334067, iteration: 212686
loss: 1.0032402276992798,grad_norm: 0.9999991268540691, iteration: 212687
loss: 1.0281306505203247,grad_norm: 0.8810003723876207, iteration: 212688
loss: 0.9844557046890259,grad_norm: 0.8254322344042841, iteration: 212689
loss: 1.010948896408081,grad_norm: 0.8765847964919959, iteration: 212690
loss: 0.9754068851470947,grad_norm: 0.8047815828428685, iteration: 212691
loss: 1.0199167728424072,grad_norm: 0.871670088994993, iteration: 212692
loss: 1.0071223974227905,grad_norm: 0.9061996638315292, iteration: 212693
loss: 1.0022231340408325,grad_norm: 0.842721559433747, iteration: 212694
loss: 1.0002456903457642,grad_norm: 0.9944068241900276, iteration: 212695
loss: 0.963276743888855,grad_norm: 0.9105395685238619, iteration: 212696
loss: 0.9765603542327881,grad_norm: 0.9817293408423351, iteration: 212697
loss: 1.0607784986495972,grad_norm: 0.9999991179689538, iteration: 212698
loss: 0.9973360300064087,grad_norm: 0.8194614991597773, iteration: 212699
loss: 0.9863725900650024,grad_norm: 0.8344844016003211, iteration: 212700
loss: 1.0273174047470093,grad_norm: 0.8345566622727425, iteration: 212701
loss: 0.9869759678840637,grad_norm: 0.8749207253839606, iteration: 212702
loss: 0.9769688844680786,grad_norm: 0.9383250766073743, iteration: 212703
loss: 0.9920526742935181,grad_norm: 0.9999990176880166, iteration: 212704
loss: 1.0064014196395874,grad_norm: 0.9999991056096661, iteration: 212705
loss: 0.967646062374115,grad_norm: 0.8533027123011202, iteration: 212706
loss: 1.0149226188659668,grad_norm: 0.9999991177146869, iteration: 212707
loss: 1.0649855136871338,grad_norm: 0.999999162635665, iteration: 212708
loss: 1.0192573070526123,grad_norm: 0.9999992681177262, iteration: 212709
loss: 1.0249587297439575,grad_norm: 0.999999036249521, iteration: 212710
loss: 1.1244069337844849,grad_norm: 0.9999993174951868, iteration: 212711
loss: 0.9981746673583984,grad_norm: 0.7978760660458701, iteration: 212712
loss: 0.9866164922714233,grad_norm: 0.8894073267796208, iteration: 212713
loss: 1.0156610012054443,grad_norm: 0.9999991368409269, iteration: 212714
loss: 0.9803856611251831,grad_norm: 0.898592877527171, iteration: 212715
loss: 1.011910080909729,grad_norm: 0.9631015478344999, iteration: 212716
loss: 1.0569548606872559,grad_norm: 0.9999999290311321, iteration: 212717
loss: 0.9847270250320435,grad_norm: 0.8817865732780448, iteration: 212718
loss: 0.9932940006256104,grad_norm: 0.8457429232097586, iteration: 212719
loss: 1.1082957983016968,grad_norm: 0.9999999732535769, iteration: 212720
loss: 1.0097283124923706,grad_norm: 0.8212214650797725, iteration: 212721
loss: 1.0135971307754517,grad_norm: 0.8607124863141983, iteration: 212722
loss: 1.001915454864502,grad_norm: 0.9856862669559346, iteration: 212723
loss: 1.0207104682922363,grad_norm: 0.9522575045649504, iteration: 212724
loss: 1.0177130699157715,grad_norm: 0.9063817008034457, iteration: 212725
loss: 0.9592962265014648,grad_norm: 0.9999990068156676, iteration: 212726
loss: 1.0017181634902954,grad_norm: 0.8589068521584889, iteration: 212727
loss: 1.044131875038147,grad_norm: 0.9961584756854777, iteration: 212728
loss: 0.9689753651618958,grad_norm: 0.8721795004605988, iteration: 212729
loss: 1.015706181526184,grad_norm: 0.9826741773106572, iteration: 212730
loss: 1.0090742111206055,grad_norm: 0.9999994721881076, iteration: 212731
loss: 1.0140255689620972,grad_norm: 0.9835682383889294, iteration: 212732
loss: 1.009984016418457,grad_norm: 0.9999991964204202, iteration: 212733
loss: 0.9678291082382202,grad_norm: 0.9999990551605437, iteration: 212734
loss: 1.0371930599212646,grad_norm: 0.9999990570955029, iteration: 212735
loss: 1.0030224323272705,grad_norm: 0.9999992415988306, iteration: 212736
loss: 1.0001486539840698,grad_norm: 0.9999991375547974, iteration: 212737
loss: 0.9852701425552368,grad_norm: 0.9671008597767181, iteration: 212738
loss: 0.996026873588562,grad_norm: 0.9999995773870358, iteration: 212739
loss: 1.0249098539352417,grad_norm: 0.9427521937918877, iteration: 212740
loss: 1.019699215888977,grad_norm: 0.9999997092603056, iteration: 212741
loss: 0.9921962022781372,grad_norm: 0.966516245468081, iteration: 212742
loss: 1.0835360288619995,grad_norm: 0.9911263381859006, iteration: 212743
loss: 1.0308732986450195,grad_norm: 0.9746286101818331, iteration: 212744
loss: 1.0260164737701416,grad_norm: 0.9402402987445703, iteration: 212745
loss: 1.2583941221237183,grad_norm: 0.9999994848352088, iteration: 212746
loss: 0.9856379628181458,grad_norm: 0.8518182817587154, iteration: 212747
loss: 1.0394525527954102,grad_norm: 0.971040609877014, iteration: 212748
loss: 0.9786338210105896,grad_norm: 0.9015488074245785, iteration: 212749
loss: 0.9688168168067932,grad_norm: 0.917313010571879, iteration: 212750
loss: 1.0068227052688599,grad_norm: 0.8601493102334559, iteration: 212751
loss: 1.0591737031936646,grad_norm: 0.9999996542178964, iteration: 212752
loss: 1.0066375732421875,grad_norm: 0.9999990689842666, iteration: 212753
loss: 0.9894818663597107,grad_norm: 0.9316368027821106, iteration: 212754
loss: 0.976421594619751,grad_norm: 0.9999991518646958, iteration: 212755
loss: 0.9894133806228638,grad_norm: 0.9999993936963493, iteration: 212756
loss: 1.0060209035873413,grad_norm: 0.9999990686543168, iteration: 212757
loss: 1.1507771015167236,grad_norm: 0.9299966888025121, iteration: 212758
loss: 1.0209544897079468,grad_norm: 0.8068719389811598, iteration: 212759
loss: 1.0056779384613037,grad_norm: 0.9999990733794791, iteration: 212760
loss: 1.0105490684509277,grad_norm: 0.7426533996533177, iteration: 212761
loss: 0.9976379871368408,grad_norm: 0.9999992806048178, iteration: 212762
loss: 1.0438172817230225,grad_norm: 0.9849572897057599, iteration: 212763
loss: 0.9601935744285583,grad_norm: 0.9470909859152664, iteration: 212764
loss: 1.0442321300506592,grad_norm: 0.9138192443082472, iteration: 212765
loss: 1.0257532596588135,grad_norm: 0.9433649426985096, iteration: 212766
loss: 0.991051971912384,grad_norm: 0.8440792767990883, iteration: 212767
loss: 1.0185554027557373,grad_norm: 0.9841359180576915, iteration: 212768
loss: 0.9928523302078247,grad_norm: 0.9999991023933138, iteration: 212769
loss: 1.0571097135543823,grad_norm: 0.9999994633730015, iteration: 212770
loss: 1.0014249086380005,grad_norm: 0.8801408097842072, iteration: 212771
loss: 0.9993365406990051,grad_norm: 0.999999104484098, iteration: 212772
loss: 0.9647732377052307,grad_norm: 0.8523526140271828, iteration: 212773
loss: 1.0070228576660156,grad_norm: 0.8293420291603958, iteration: 212774
loss: 1.0045130252838135,grad_norm: 0.8910593023216428, iteration: 212775
loss: 0.9674974083900452,grad_norm: 0.9732472849806926, iteration: 212776
loss: 0.9954696893692017,grad_norm: 0.8840609186961776, iteration: 212777
loss: 1.0050028562545776,grad_norm: 0.9639513056155813, iteration: 212778
loss: 0.9751355648040771,grad_norm: 0.9999990942176867, iteration: 212779
loss: 0.9690864682197571,grad_norm: 0.906609885832518, iteration: 212780
loss: 0.9777230024337769,grad_norm: 0.919938975559271, iteration: 212781
loss: 1.031319260597229,grad_norm: 0.9281449262731558, iteration: 212782
loss: 0.9851309657096863,grad_norm: 0.8564207010555721, iteration: 212783
loss: 1.011096715927124,grad_norm: 0.9999991751234079, iteration: 212784
loss: 0.9701985716819763,grad_norm: 0.9999991660069556, iteration: 212785
loss: 1.0238319635391235,grad_norm: 0.9999991259790914, iteration: 212786
loss: 0.986846923828125,grad_norm: 0.8916503735708482, iteration: 212787
loss: 0.9734314680099487,grad_norm: 0.9999990744820982, iteration: 212788
loss: 0.9989283084869385,grad_norm: 0.9404635155907897, iteration: 212789
loss: 1.006584882736206,grad_norm: 0.9999992664356456, iteration: 212790
loss: 0.9989750981330872,grad_norm: 0.9999991952058824, iteration: 212791
loss: 1.0173656940460205,grad_norm: 0.8804606057931477, iteration: 212792
loss: 1.0165562629699707,grad_norm: 0.9999999378300073, iteration: 212793
loss: 1.0168325901031494,grad_norm: 0.8499731711582513, iteration: 212794
loss: 0.9822505712509155,grad_norm: 0.9999989575577387, iteration: 212795
loss: 0.992718517780304,grad_norm: 0.9999991402367031, iteration: 212796
loss: 0.9971956014633179,grad_norm: 0.9999991415183993, iteration: 212797
loss: 1.0101128816604614,grad_norm: 0.8436489144458027, iteration: 212798
loss: 1.009394645690918,grad_norm: 0.9999990907394127, iteration: 212799
loss: 1.0157527923583984,grad_norm: 0.8404248666138673, iteration: 212800
loss: 1.015950083732605,grad_norm: 0.7753073963716947, iteration: 212801
loss: 0.9783116579055786,grad_norm: 0.9999992094039298, iteration: 212802
loss: 0.9929152727127075,grad_norm: 0.9999992250419062, iteration: 212803
loss: 0.9534582495689392,grad_norm: 0.9272969519304957, iteration: 212804
loss: 1.0086212158203125,grad_norm: 0.8599788576202209, iteration: 212805
loss: 0.9645942449569702,grad_norm: 0.9371260103933695, iteration: 212806
loss: 1.0060698986053467,grad_norm: 0.7889930359908484, iteration: 212807
loss: 1.0241453647613525,grad_norm: 0.9510869229444125, iteration: 212808
loss: 0.9895299077033997,grad_norm: 0.7529982467058343, iteration: 212809
loss: 0.9685858488082886,grad_norm: 0.8941721037812572, iteration: 212810
loss: 1.0050724744796753,grad_norm: 0.9999991464678936, iteration: 212811
loss: 1.0246779918670654,grad_norm: 0.8136819875464573, iteration: 212812
loss: 1.0174012184143066,grad_norm: 0.6943561000701236, iteration: 212813
loss: 0.9844129085540771,grad_norm: 0.9999990243259331, iteration: 212814
loss: 1.0253807306289673,grad_norm: 0.9999989174801762, iteration: 212815
loss: 0.9878268837928772,grad_norm: 0.9999991817329792, iteration: 212816
loss: 1.0032659769058228,grad_norm: 0.8058467381802557, iteration: 212817
loss: 1.0148310661315918,grad_norm: 0.8705784439037666, iteration: 212818
loss: 0.9676599502563477,grad_norm: 0.7236862751314254, iteration: 212819
loss: 0.9788880944252014,grad_norm: 0.9408036761719695, iteration: 212820
loss: 0.9889808893203735,grad_norm: 0.9999992050680593, iteration: 212821
loss: 1.0073047876358032,grad_norm: 0.9033920913380608, iteration: 212822
loss: 0.9843212962150574,grad_norm: 0.8659016065141412, iteration: 212823
loss: 1.1239197254180908,grad_norm: 0.9999998660789515, iteration: 212824
loss: 1.0368613004684448,grad_norm: 0.9999998592982889, iteration: 212825
loss: 0.9778392910957336,grad_norm: 0.8847340857479998, iteration: 212826
loss: 1.013616919517517,grad_norm: 0.8618520401468001, iteration: 212827
loss: 0.9893926978111267,grad_norm: 0.9999989270695366, iteration: 212828
loss: 1.0309901237487793,grad_norm: 0.9999992377303707, iteration: 212829
loss: 0.9982628226280212,grad_norm: 0.8621436443267334, iteration: 212830
loss: 1.010618805885315,grad_norm: 0.8991808289689187, iteration: 212831
loss: 0.9687884449958801,grad_norm: 0.949269762173413, iteration: 212832
loss: 1.0151464939117432,grad_norm: 0.9999992485161385, iteration: 212833
loss: 1.0155390501022339,grad_norm: 0.8660415588017641, iteration: 212834
loss: 1.02191960811615,grad_norm: 0.8233262944847044, iteration: 212835
loss: 1.0457288026809692,grad_norm: 0.8294531152964548, iteration: 212836
loss: 0.9686293005943298,grad_norm: 0.8348728266721144, iteration: 212837
loss: 0.9964736700057983,grad_norm: 0.9249392874097135, iteration: 212838
loss: 1.0120640993118286,grad_norm: 0.8909089729267382, iteration: 212839
loss: 1.0950497388839722,grad_norm: 0.9999997662520029, iteration: 212840
loss: 1.0219379663467407,grad_norm: 0.8327297784069374, iteration: 212841
loss: 1.0503913164138794,grad_norm: 0.9900352651553276, iteration: 212842
loss: 1.4798566102981567,grad_norm: 0.9999996681907631, iteration: 212843
loss: 1.1801025867462158,grad_norm: 0.9999993054427461, iteration: 212844
loss: 1.1526011228561401,grad_norm: 0.99256926421386, iteration: 212845
loss: 1.0095425844192505,grad_norm: 0.9601729741419782, iteration: 212846
loss: 0.9972535967826843,grad_norm: 0.99999896410829, iteration: 212847
loss: 0.95991051197052,grad_norm: 0.9999991619625471, iteration: 212848
loss: 1.0316754579544067,grad_norm: 0.9353564736515381, iteration: 212849
loss: 1.0446809530258179,grad_norm: 0.9999995315425982, iteration: 212850
loss: 1.0248157978057861,grad_norm: 0.9666981696927652, iteration: 212851
loss: 1.0050623416900635,grad_norm: 0.9086833962951807, iteration: 212852
loss: 1.1295512914657593,grad_norm: 0.9999998805867554, iteration: 212853
loss: 1.0155333280563354,grad_norm: 0.9398103229674909, iteration: 212854
loss: 0.9889289736747742,grad_norm: 0.8176282207407773, iteration: 212855
loss: 0.9846101403236389,grad_norm: 0.966456285829036, iteration: 212856
loss: 0.9899853467941284,grad_norm: 0.8987679204428941, iteration: 212857
loss: 1.0403836965560913,grad_norm: 0.999999946601094, iteration: 212858
loss: 1.0016037225723267,grad_norm: 0.9958003312265328, iteration: 212859
loss: 1.0235687494277954,grad_norm: 0.8855937968642019, iteration: 212860
loss: 1.0226709842681885,grad_norm: 0.9619569321635855, iteration: 212861
loss: 1.0091968774795532,grad_norm: 0.881643601124088, iteration: 212862
loss: 1.018740177154541,grad_norm: 0.8934605098391423, iteration: 212863
loss: 0.9755180478096008,grad_norm: 0.8929219039097402, iteration: 212864
loss: 1.028691053390503,grad_norm: 0.9999994938358263, iteration: 212865
loss: 0.961590588092804,grad_norm: 0.9236872903538239, iteration: 212866
loss: 0.9721852540969849,grad_norm: 0.9305707905912383, iteration: 212867
loss: 1.0121428966522217,grad_norm: 0.9999991094335611, iteration: 212868
loss: 1.114471673965454,grad_norm: 0.9999993519658987, iteration: 212869
loss: 1.0790562629699707,grad_norm: 0.9740060986564236, iteration: 212870
loss: 1.1321083307266235,grad_norm: 0.9999992976674353, iteration: 212871
loss: 0.9993387460708618,grad_norm: 0.9999991353101698, iteration: 212872
loss: 1.001683235168457,grad_norm: 0.9999992656069628, iteration: 212873
loss: 0.9876400232315063,grad_norm: 0.8173009280133671, iteration: 212874
loss: 0.9731462597846985,grad_norm: 0.9407090574354101, iteration: 212875
loss: 1.0233774185180664,grad_norm: 0.9398453370841653, iteration: 212876
loss: 0.9684099555015564,grad_norm: 0.8388280443455324, iteration: 212877
loss: 0.9862622022628784,grad_norm: 0.8771101708551298, iteration: 212878
loss: 1.0027196407318115,grad_norm: 0.9251350503310811, iteration: 212879
loss: 1.0208004713058472,grad_norm: 0.9999996971488911, iteration: 212880
loss: 1.0192663669586182,grad_norm: 0.9999990428875165, iteration: 212881
loss: 1.0828148126602173,grad_norm: 0.9999997875316355, iteration: 212882
loss: 0.9981353282928467,grad_norm: 0.999999118125146, iteration: 212883
loss: 0.9530531167984009,grad_norm: 0.8969643146328429, iteration: 212884
loss: 0.9697760939598083,grad_norm: 0.9999991120105605, iteration: 212885
loss: 0.9643453359603882,grad_norm: 0.9258922037864206, iteration: 212886
loss: 1.0288623571395874,grad_norm: 0.8198003524093501, iteration: 212887
loss: 0.98683762550354,grad_norm: 0.8645733770909769, iteration: 212888
loss: 0.9858269095420837,grad_norm: 0.9876589250833581, iteration: 212889
loss: 1.13808274269104,grad_norm: 0.9999997306456477, iteration: 212890
loss: 1.1645348072052002,grad_norm: 0.9999997249250911, iteration: 212891
loss: 1.0033974647521973,grad_norm: 0.991230031306186, iteration: 212892
loss: 0.9669804573059082,grad_norm: 0.8954907232245716, iteration: 212893
loss: 1.0100375413894653,grad_norm: 0.7736554225117983, iteration: 212894
loss: 1.0027587413787842,grad_norm: 0.9230213015631633, iteration: 212895
loss: 0.9704993963241577,grad_norm: 0.8860903009551493, iteration: 212896
loss: 1.0743720531463623,grad_norm: 0.9533046968091164, iteration: 212897
loss: 0.9904651641845703,grad_norm: 0.9246480366786461, iteration: 212898
loss: 0.9991840124130249,grad_norm: 0.9999989458245891, iteration: 212899
loss: 0.988316535949707,grad_norm: 0.8404731289939544, iteration: 212900
loss: 1.0934295654296875,grad_norm: 0.9999992464432637, iteration: 212901
loss: 0.9896576404571533,grad_norm: 0.8015064512716031, iteration: 212902
loss: 1.029730200767517,grad_norm: 0.9999989814548214, iteration: 212903
loss: 1.0047752857208252,grad_norm: 0.995443428404143, iteration: 212904
loss: 1.025225281715393,grad_norm: 0.9999990649110884, iteration: 212905
loss: 1.0175801515579224,grad_norm: 0.9999991666792799, iteration: 212906
loss: 0.9730819463729858,grad_norm: 0.858042126732836, iteration: 212907
loss: 0.9929218292236328,grad_norm: 0.9528505012361547, iteration: 212908
loss: 1.0312941074371338,grad_norm: 0.8252272279944413, iteration: 212909
loss: 1.007860541343689,grad_norm: 0.9999989812777377, iteration: 212910
loss: 1.0480656623840332,grad_norm: 0.8701500934378864, iteration: 212911
loss: 1.0273717641830444,grad_norm: 0.9800511905561093, iteration: 212912
loss: 0.9821807742118835,grad_norm: 0.9219184633316168, iteration: 212913
loss: 0.9613544344902039,grad_norm: 0.8612316933060443, iteration: 212914
loss: 0.9945864677429199,grad_norm: 0.8254205545973872, iteration: 212915
loss: 1.0358026027679443,grad_norm: 0.8237166125786802, iteration: 212916
loss: 0.9745775461196899,grad_norm: 0.9363469467779295, iteration: 212917
loss: 0.9922417402267456,grad_norm: 0.9999992532081245, iteration: 212918
loss: 1.013112187385559,grad_norm: 0.8415368669336301, iteration: 212919
loss: 1.0993512868881226,grad_norm: 0.9422244837151974, iteration: 212920
loss: 1.20493745803833,grad_norm: 0.9999994140464387, iteration: 212921
loss: 1.0108163356781006,grad_norm: 0.99999944206085, iteration: 212922
loss: 0.9950310587882996,grad_norm: 0.9139564016538949, iteration: 212923
loss: 0.9990624189376831,grad_norm: 0.9999990821818601, iteration: 212924
loss: 1.0615949630737305,grad_norm: 0.9999990880557253, iteration: 212925
loss: 1.02544105052948,grad_norm: 0.9999995552416259, iteration: 212926
loss: 1.0243128538131714,grad_norm: 0.9963279538462982, iteration: 212927
loss: 1.035971760749817,grad_norm: 0.9999991931068071, iteration: 212928
loss: 1.0227062702178955,grad_norm: 0.9999991969564243, iteration: 212929
loss: 1.1666496992111206,grad_norm: 0.9999997256495816, iteration: 212930
loss: 1.0059179067611694,grad_norm: 0.7893232292970155, iteration: 212931
loss: 1.009102702140808,grad_norm: 0.9999990290520756, iteration: 212932
loss: 0.9964190721511841,grad_norm: 0.8094703855912003, iteration: 212933
loss: 1.0004340410232544,grad_norm: 0.9999992137893259, iteration: 212934
loss: 1.060071349143982,grad_norm: 0.9999992359159544, iteration: 212935
loss: 1.0826655626296997,grad_norm: 0.999999465934029, iteration: 212936
loss: 1.216461420059204,grad_norm: 0.9999991506204012, iteration: 212937
loss: 1.0774375200271606,grad_norm: 0.9999993265102448, iteration: 212938
loss: 0.9948665499687195,grad_norm: 0.7041539226750849, iteration: 212939
loss: 1.0381219387054443,grad_norm: 0.9999996100866702, iteration: 212940
loss: 1.0254706144332886,grad_norm: 0.8188231462989574, iteration: 212941
loss: 0.9986662268638611,grad_norm: 0.9999991417743035, iteration: 212942
loss: 1.1161726713180542,grad_norm: 0.9999999843204805, iteration: 212943
loss: 1.0000250339508057,grad_norm: 0.9999992255413026, iteration: 212944
loss: 0.9929009079933167,grad_norm: 0.908915648488258, iteration: 212945
loss: 1.0214747190475464,grad_norm: 0.8513587917847152, iteration: 212946
loss: 1.0452418327331543,grad_norm: 0.960423048123192, iteration: 212947
loss: 0.9922858476638794,grad_norm: 0.9705733102516337, iteration: 212948
loss: 1.107887625694275,grad_norm: 0.9999990112412326, iteration: 212949
loss: 1.0132496356964111,grad_norm: 0.9271509779978917, iteration: 212950
loss: 0.9658850431442261,grad_norm: 0.8943573634269071, iteration: 212951
loss: 1.006402850151062,grad_norm: 0.9961468510814246, iteration: 212952
loss: 1.0370484590530396,grad_norm: 0.9999988944864109, iteration: 212953
loss: 1.015480399131775,grad_norm: 0.8632488360260335, iteration: 212954
loss: 1.0834879875183105,grad_norm: 0.9999996734083806, iteration: 212955
loss: 1.0004639625549316,grad_norm: 0.9999990596799825, iteration: 212956
loss: 1.0236424207687378,grad_norm: 0.999999013804613, iteration: 212957
loss: 1.0176721811294556,grad_norm: 0.999999077320654, iteration: 212958
loss: 1.1243953704833984,grad_norm: 0.9999992108554255, iteration: 212959
loss: 1.003462791442871,grad_norm: 0.9930163371658475, iteration: 212960
loss: 0.9923107624053955,grad_norm: 0.8004418517524091, iteration: 212961
loss: 1.0074639320373535,grad_norm: 0.9999989045709007, iteration: 212962
loss: 1.0224997997283936,grad_norm: 0.9999993525809912, iteration: 212963
loss: 0.9710255861282349,grad_norm: 0.8617528371253247, iteration: 212964
loss: 0.982791543006897,grad_norm: 0.9829658431408321, iteration: 212965
loss: 1.0788081884384155,grad_norm: 0.9999999228942915, iteration: 212966
loss: 1.0058907270431519,grad_norm: 0.8752910611093302, iteration: 212967
loss: 1.0726585388183594,grad_norm: 0.9999997446121521, iteration: 212968
loss: 0.9694632887840271,grad_norm: 0.8485649460042771, iteration: 212969
loss: 0.9611769318580627,grad_norm: 0.9554363136172149, iteration: 212970
loss: 0.9878503680229187,grad_norm: 0.8893634037954565, iteration: 212971
loss: 0.9935871362686157,grad_norm: 0.9999988985243985, iteration: 212972
loss: 0.9676022529602051,grad_norm: 0.8530933441423236, iteration: 212973
loss: 0.9818834662437439,grad_norm: 0.9999990438257055, iteration: 212974
loss: 1.0111827850341797,grad_norm: 0.9999990091692105, iteration: 212975
loss: 1.070632815361023,grad_norm: 0.99999992676633, iteration: 212976
loss: 0.9677597880363464,grad_norm: 0.8065446324990891, iteration: 212977
loss: 0.9848940968513489,grad_norm: 0.9225206264202777, iteration: 212978
loss: 0.9718502759933472,grad_norm: 0.939037681649962, iteration: 212979
loss: 1.036818027496338,grad_norm: 0.8577445255223439, iteration: 212980
loss: 0.9891865849494934,grad_norm: 0.8715131055525972, iteration: 212981
loss: 0.9907470345497131,grad_norm: 0.9384195237340331, iteration: 212982
loss: 1.032307744026184,grad_norm: 0.9999990457569278, iteration: 212983
loss: 0.9837310910224915,grad_norm: 0.818499537199331, iteration: 212984
loss: 0.9723825454711914,grad_norm: 0.9999992184849573, iteration: 212985
loss: 0.9891206622123718,grad_norm: 0.9999989579214477, iteration: 212986
loss: 1.0511353015899658,grad_norm: 0.9999989984051145, iteration: 212987
loss: 0.9998123049736023,grad_norm: 0.9999990372629364, iteration: 212988
loss: 1.0177221298217773,grad_norm: 0.8901155106088394, iteration: 212989
loss: 1.0791367292404175,grad_norm: 0.9999990159376687, iteration: 212990
loss: 1.007182240486145,grad_norm: 0.9999994024064999, iteration: 212991
loss: 1.0191978216171265,grad_norm: 0.8525616233215115, iteration: 212992
loss: 1.0087255239486694,grad_norm: 0.8408391082187259, iteration: 212993
loss: 1.0248485803604126,grad_norm: 0.7423873450240482, iteration: 212994
loss: 1.0093411207199097,grad_norm: 0.9326021460130394, iteration: 212995
loss: 1.0380436182022095,grad_norm: 0.9999991351565661, iteration: 212996
loss: 0.9693078994750977,grad_norm: 0.7571303706127994, iteration: 212997
loss: 0.981159508228302,grad_norm: 0.9999991062641208, iteration: 212998
loss: 0.983531653881073,grad_norm: 0.9877379287179634, iteration: 212999
loss: 0.9795020818710327,grad_norm: 0.9932110485021333, iteration: 213000
loss: 1.0005725622177124,grad_norm: 0.8427302998853776, iteration: 213001
loss: 1.0176459550857544,grad_norm: 0.9999990229747031, iteration: 213002
loss: 0.9921962022781372,grad_norm: 0.893575147940112, iteration: 213003
loss: 1.010695457458496,grad_norm: 0.8445810604843097, iteration: 213004
loss: 1.0383179187774658,grad_norm: 0.9999990576428055, iteration: 213005
loss: 1.0190095901489258,grad_norm: 0.9454412829800478, iteration: 213006
loss: 1.0397162437438965,grad_norm: 0.9894910344502703, iteration: 213007
loss: 1.0115966796875,grad_norm: 0.9999997090875178, iteration: 213008
loss: 0.9708593487739563,grad_norm: 0.9581362623983473, iteration: 213009
loss: 0.9930812120437622,grad_norm: 0.9480279565567384, iteration: 213010
loss: 1.0347150564193726,grad_norm: 0.9999989229679387, iteration: 213011
loss: 1.007519006729126,grad_norm: 0.8442818228443784, iteration: 213012
loss: 1.0049128532409668,grad_norm: 0.9335957344303144, iteration: 213013
loss: 0.9753956198692322,grad_norm: 0.8737215071975207, iteration: 213014
loss: 1.034143090248108,grad_norm: 0.9999991562573552, iteration: 213015
loss: 0.9813922047615051,grad_norm: 0.9144995662389791, iteration: 213016
loss: 0.9753273725509644,grad_norm: 0.8109679556710798, iteration: 213017
loss: 1.0324212312698364,grad_norm: 0.9999991528959747, iteration: 213018
loss: 1.0320287942886353,grad_norm: 0.9999990852647919, iteration: 213019
loss: 0.991673469543457,grad_norm: 0.9462583185536593, iteration: 213020
loss: 1.0123854875564575,grad_norm: 0.903618456359151, iteration: 213021
loss: 0.9973981380462646,grad_norm: 0.9999991504531441, iteration: 213022
loss: 0.9872153997421265,grad_norm: 0.9869107261162325, iteration: 213023
loss: 1.011091947555542,grad_norm: 0.8741051403889892, iteration: 213024
loss: 0.9723647236824036,grad_norm: 0.9999990805569078, iteration: 213025
loss: 0.98215252161026,grad_norm: 0.9227701550727847, iteration: 213026
loss: 0.9782229065895081,grad_norm: 0.9099212730018233, iteration: 213027
loss: 0.9891917109489441,grad_norm: 0.8928090841601466, iteration: 213028
loss: 0.9731296300888062,grad_norm: 0.9999994654242316, iteration: 213029
loss: 1.1752591133117676,grad_norm: 0.9999995241081718, iteration: 213030
loss: 1.0142254829406738,grad_norm: 0.9667364809367244, iteration: 213031
loss: 1.035660743713379,grad_norm: 0.9999990683648309, iteration: 213032
loss: 1.0113625526428223,grad_norm: 0.999999183408124, iteration: 213033
loss: 1.0471229553222656,grad_norm: 0.8956922640682744, iteration: 213034
loss: 0.9928626418113708,grad_norm: 0.9999990686790853, iteration: 213035
loss: 0.9681581258773804,grad_norm: 0.7619121410214155, iteration: 213036
loss: 0.9738672971725464,grad_norm: 0.8366159249951637, iteration: 213037
loss: 0.9864543080329895,grad_norm: 0.942787531782182, iteration: 213038
loss: 0.9831265211105347,grad_norm: 0.9799775043912908, iteration: 213039
loss: 1.0556968450546265,grad_norm: 0.9624267348453834, iteration: 213040
loss: 0.987768292427063,grad_norm: 0.9999991391198367, iteration: 213041
loss: 0.9960274696350098,grad_norm: 0.9032168667030822, iteration: 213042
loss: 0.9806998372077942,grad_norm: 0.9099481919243322, iteration: 213043
loss: 1.0209898948669434,grad_norm: 0.9999996457239693, iteration: 213044
loss: 0.9824299812316895,grad_norm: 0.8320413635390774, iteration: 213045
loss: 1.028886318206787,grad_norm: 0.9713634200841352, iteration: 213046
loss: 0.9819331765174866,grad_norm: 0.9999990813387635, iteration: 213047
loss: 0.9964969754219055,grad_norm: 0.8420235373191616, iteration: 213048
loss: 0.9927518367767334,grad_norm: 0.8939495636107712, iteration: 213049
loss: 0.976076066493988,grad_norm: 0.9382948104993405, iteration: 213050
loss: 1.0717363357543945,grad_norm: 0.87178031259137, iteration: 213051
loss: 0.9994451403617859,grad_norm: 0.9728624779166698, iteration: 213052
loss: 0.9851370453834534,grad_norm: 0.8902024618707385, iteration: 213053
loss: 0.9820492267608643,grad_norm: 0.989691429801798, iteration: 213054
loss: 0.9989399909973145,grad_norm: 0.8504414431466782, iteration: 213055
loss: 1.0833711624145508,grad_norm: 0.9999993173101077, iteration: 213056
loss: 1.0606367588043213,grad_norm: 0.999999138474732, iteration: 213057
loss: 1.0197237730026245,grad_norm: 0.9999997952882169, iteration: 213058
loss: 0.9983647465705872,grad_norm: 0.8398283090796524, iteration: 213059
loss: 1.069927453994751,grad_norm: 0.9999991556846173, iteration: 213060
loss: 1.10147225856781,grad_norm: 0.9999999488819351, iteration: 213061
loss: 0.9514573216438293,grad_norm: 0.8372946064248395, iteration: 213062
loss: 0.9905902147293091,grad_norm: 0.930144053260079, iteration: 213063
loss: 1.0130653381347656,grad_norm: 0.9999991578943692, iteration: 213064
loss: 1.0256060361862183,grad_norm: 0.8423548385591693, iteration: 213065
loss: 1.074516773223877,grad_norm: 0.9999992880998528, iteration: 213066
loss: 0.9898367524147034,grad_norm: 0.9999992008938288, iteration: 213067
loss: 1.0247716903686523,grad_norm: 0.8640949668505068, iteration: 213068
loss: 1.0472736358642578,grad_norm: 0.9999991683764399, iteration: 213069
loss: 0.978344738483429,grad_norm: 0.9999995630364957, iteration: 213070
loss: 0.9586853981018066,grad_norm: 0.9999991047122161, iteration: 213071
loss: 0.977017343044281,grad_norm: 0.9999991175368056, iteration: 213072
loss: 0.9785171151161194,grad_norm: 0.9179727688084988, iteration: 213073
loss: 0.9649521708488464,grad_norm: 0.8662175932657855, iteration: 213074
loss: 1.0002174377441406,grad_norm: 0.9539811770186347, iteration: 213075
loss: 0.9566730856895447,grad_norm: 0.9654798357411735, iteration: 213076
loss: 0.9913572669029236,grad_norm: 0.9720578486555789, iteration: 213077
loss: 1.004705786705017,grad_norm: 0.8451671191231409, iteration: 213078
loss: 1.0197824239730835,grad_norm: 0.9999990287658345, iteration: 213079
loss: 0.9947113990783691,grad_norm: 0.9999992417242308, iteration: 213080
loss: 0.9885252118110657,grad_norm: 0.8399041228209903, iteration: 213081
loss: 1.0185471773147583,grad_norm: 0.9462408173344863, iteration: 213082
loss: 1.030646800994873,grad_norm: 0.8621070250488296, iteration: 213083
loss: 1.0320110321044922,grad_norm: 0.9999991448076835, iteration: 213084
loss: 1.0114762783050537,grad_norm: 0.96247453551062, iteration: 213085
loss: 0.9905188083648682,grad_norm: 0.9999991201033704, iteration: 213086
loss: 1.0302760601043701,grad_norm: 0.9305907026614217, iteration: 213087
loss: 1.0074164867401123,grad_norm: 0.8476486799064425, iteration: 213088
loss: 1.0004937648773193,grad_norm: 0.9854409295202718, iteration: 213089
loss: 0.9961608052253723,grad_norm: 0.9999992756329421, iteration: 213090
loss: 0.9764648675918579,grad_norm: 0.9999991494981918, iteration: 213091
loss: 0.9993453621864319,grad_norm: 0.9999997473098231, iteration: 213092
loss: 0.9819896817207336,grad_norm: 0.8819833001481907, iteration: 213093
loss: 0.9684817790985107,grad_norm: 0.9999990788851097, iteration: 213094
loss: 1.0157209634780884,grad_norm: 0.7875506737540983, iteration: 213095
loss: 0.9943989515304565,grad_norm: 0.8586262974852223, iteration: 213096
loss: 0.9700292348861694,grad_norm: 0.9690051285922937, iteration: 213097
loss: 1.0111960172653198,grad_norm: 0.9999991376842471, iteration: 213098
loss: 1.010208010673523,grad_norm: 0.9391153224098733, iteration: 213099
loss: 1.0144155025482178,grad_norm: 0.9825992933989187, iteration: 213100
loss: 1.0289640426635742,grad_norm: 0.9158210308174012, iteration: 213101
loss: 0.9943417906761169,grad_norm: 0.9682943307655395, iteration: 213102
loss: 1.0027154684066772,grad_norm: 0.9722362559322695, iteration: 213103
loss: 1.0549529790878296,grad_norm: 0.8585334444719285, iteration: 213104
loss: 1.0183017253875732,grad_norm: 0.9999995626744647, iteration: 213105
loss: 1.010899305343628,grad_norm: 0.8867051908651833, iteration: 213106
loss: 1.018505334854126,grad_norm: 0.8435223407911998, iteration: 213107
loss: 1.0158357620239258,grad_norm: 0.9999991711214592, iteration: 213108
loss: 0.9824405908584595,grad_norm: 0.9446683250014934, iteration: 213109
loss: 0.9905460476875305,grad_norm: 0.9341723118014411, iteration: 213110
loss: 0.9744399785995483,grad_norm: 0.9065653013662921, iteration: 213111
loss: 0.9750596880912781,grad_norm: 0.9428305099149638, iteration: 213112
loss: 1.0146288871765137,grad_norm: 0.970651323050852, iteration: 213113
loss: 0.9733756184577942,grad_norm: 0.8118411983241276, iteration: 213114
loss: 0.994022786617279,grad_norm: 0.9999991946534522, iteration: 213115
loss: 0.9665273427963257,grad_norm: 0.9130966753044703, iteration: 213116
loss: 0.9813213348388672,grad_norm: 0.8894074262178752, iteration: 213117
loss: 1.0337790250778198,grad_norm: 0.9307278420578564, iteration: 213118
loss: 0.9690565466880798,grad_norm: 0.8718540987432322, iteration: 213119
loss: 0.9802138805389404,grad_norm: 0.8319309346747021, iteration: 213120
loss: 1.0229535102844238,grad_norm: 0.8271018883119385, iteration: 213121
loss: 1.0146393775939941,grad_norm: 0.908500866240511, iteration: 213122
loss: 1.0265942811965942,grad_norm: 0.9999992026039205, iteration: 213123
loss: 0.9660751819610596,grad_norm: 0.8744660626577158, iteration: 213124
loss: 0.9819316267967224,grad_norm: 0.8774448643575944, iteration: 213125
loss: 0.9801028966903687,grad_norm: 0.9999996571048818, iteration: 213126
loss: 0.994346022605896,grad_norm: 0.9999990958820905, iteration: 213127
loss: 1.0137271881103516,grad_norm: 0.9986871101842192, iteration: 213128
loss: 1.0920250415802002,grad_norm: 0.9999991675417438, iteration: 213129
loss: 0.9936964511871338,grad_norm: 0.8647212445228606, iteration: 213130
loss: 1.0687546730041504,grad_norm: 0.9999995074888062, iteration: 213131
loss: 0.981049120426178,grad_norm: 0.999999118547389, iteration: 213132
loss: 1.0275636911392212,grad_norm: 0.9999990603917733, iteration: 213133
loss: 1.0205250978469849,grad_norm: 0.8305649669786378, iteration: 213134
loss: 1.0102460384368896,grad_norm: 0.8891053111059143, iteration: 213135
loss: 1.0330719947814941,grad_norm: 0.8965664182785374, iteration: 213136
loss: 0.9537562727928162,grad_norm: 0.9999992857930455, iteration: 213137
loss: 0.9702770113945007,grad_norm: 0.9151344393585575, iteration: 213138
loss: 0.971266508102417,grad_norm: 0.9999998580536781, iteration: 213139
loss: 0.9838030338287354,grad_norm: 0.9242044563004255, iteration: 213140
loss: 0.9951001405715942,grad_norm: 0.9791778074116075, iteration: 213141
loss: 1.0128285884857178,grad_norm: 0.9840027982163813, iteration: 213142
loss: 1.001115083694458,grad_norm: 0.8632223745771113, iteration: 213143
loss: 1.0247021913528442,grad_norm: 0.9604034404158428, iteration: 213144
loss: 1.0798084735870361,grad_norm: 0.999999189780882, iteration: 213145
loss: 1.003380537033081,grad_norm: 0.916168953211737, iteration: 213146
loss: 0.9850351810455322,grad_norm: 0.8743251304198597, iteration: 213147
loss: 0.9864992499351501,grad_norm: 0.8324170765718785, iteration: 213148
loss: 0.9826417565345764,grad_norm: 0.9665530411826585, iteration: 213149
loss: 0.9961749315261841,grad_norm: 0.9999991940200422, iteration: 213150
loss: 0.9798933863639832,grad_norm: 0.9999990689939057, iteration: 213151
loss: 0.9746622443199158,grad_norm: 0.9999992086923866, iteration: 213152
loss: 1.0216000080108643,grad_norm: 0.943954372665316, iteration: 213153
loss: 1.0265328884124756,grad_norm: 0.9101828297163378, iteration: 213154
loss: 1.00361168384552,grad_norm: 0.9999991178470379, iteration: 213155
loss: 1.0384914875030518,grad_norm: 0.9999991302657945, iteration: 213156
loss: 1.0939490795135498,grad_norm: 0.999999240878873, iteration: 213157
loss: 0.9766843914985657,grad_norm: 0.9517714690575529, iteration: 213158
loss: 0.9794782996177673,grad_norm: 0.858441447723016, iteration: 213159
loss: 0.9710901975631714,grad_norm: 0.9999990793724404, iteration: 213160
loss: 0.9926943778991699,grad_norm: 0.9999990952988792, iteration: 213161
loss: 1.0038797855377197,grad_norm: 0.9999991051246202, iteration: 213162
loss: 1.0257591009140015,grad_norm: 0.897375281443585, iteration: 213163
loss: 1.0847634077072144,grad_norm: 0.9999994824217443, iteration: 213164
loss: 1.0069985389709473,grad_norm: 0.9999993029408841, iteration: 213165
loss: 1.1191887855529785,grad_norm: 0.9985146718941926, iteration: 213166
loss: 1.0616750717163086,grad_norm: 0.9999993462683356, iteration: 213167
loss: 1.0217629671096802,grad_norm: 0.8708889162582141, iteration: 213168
loss: 1.0060522556304932,grad_norm: 0.9663112791280108, iteration: 213169
loss: 1.0250632762908936,grad_norm: 0.9051402548506863, iteration: 213170
loss: 1.0099890232086182,grad_norm: 0.859558778861667, iteration: 213171
loss: 0.9823340177536011,grad_norm: 0.877320503859612, iteration: 213172
loss: 0.9939345717430115,grad_norm: 0.9999991100167452, iteration: 213173
loss: 1.1891082525253296,grad_norm: 0.9999995447498943, iteration: 213174
loss: 1.0156795978546143,grad_norm: 0.99999916751065, iteration: 213175
loss: 1.0248064994812012,grad_norm: 0.9999989512973869, iteration: 213176
loss: 1.0702338218688965,grad_norm: 0.9999992717710908, iteration: 213177
loss: 0.9697535634040833,grad_norm: 0.8597951231686476, iteration: 213178
loss: 0.9647021889686584,grad_norm: 0.9790887230298448, iteration: 213179
loss: 0.9928547739982605,grad_norm: 0.9999991766128963, iteration: 213180
loss: 1.0435882806777954,grad_norm: 0.9999998558544525, iteration: 213181
loss: 1.0274068117141724,grad_norm: 0.9999994906556204, iteration: 213182
loss: 0.9745844602584839,grad_norm: 0.9999990653330002, iteration: 213183
loss: 1.0119969844818115,grad_norm: 0.9609074384743767, iteration: 213184
loss: 1.0485934019088745,grad_norm: 0.9999990407614601, iteration: 213185
loss: 0.99201899766922,grad_norm: 0.8788207939953798, iteration: 213186
loss: 1.0355395078659058,grad_norm: 0.999999216724228, iteration: 213187
loss: 0.9900427460670471,grad_norm: 0.9045882408384694, iteration: 213188
loss: 1.0136882066726685,grad_norm: 0.9299776173154488, iteration: 213189
loss: 1.0479365587234497,grad_norm: 0.8767370752552051, iteration: 213190
loss: 0.9907886385917664,grad_norm: 0.9999990879301484, iteration: 213191
loss: 1.0390515327453613,grad_norm: 0.9999994829249109, iteration: 213192
loss: 0.981032133102417,grad_norm: 0.9962448521439123, iteration: 213193
loss: 1.0286567211151123,grad_norm: 0.9999991180906092, iteration: 213194
loss: 0.9965567588806152,grad_norm: 0.9999993580418648, iteration: 213195
loss: 1.0352243185043335,grad_norm: 0.7877150331434066, iteration: 213196
loss: 1.065764307975769,grad_norm: 0.9281765629074714, iteration: 213197
loss: 1.005239486694336,grad_norm: 0.9999991536275944, iteration: 213198
loss: 0.9875339865684509,grad_norm: 0.9999992320419873, iteration: 213199
loss: 0.9692816734313965,grad_norm: 0.7814453682462307, iteration: 213200
loss: 0.9857951402664185,grad_norm: 0.9929639402230144, iteration: 213201
loss: 1.0225657224655151,grad_norm: 0.9999997582006024, iteration: 213202
loss: 0.9891837239265442,grad_norm: 0.9360870039049302, iteration: 213203
loss: 0.9969856142997742,grad_norm: 0.9970258224061345, iteration: 213204
loss: 1.0257726907730103,grad_norm: 1.0000000575425243, iteration: 213205
loss: 1.0139394998550415,grad_norm: 0.9999996995094291, iteration: 213206
loss: 1.0149942636489868,grad_norm: 0.9382685129575961, iteration: 213207
loss: 0.9938272833824158,grad_norm: 0.9999991524064, iteration: 213208
loss: 0.9885972142219543,grad_norm: 0.9767260767802355, iteration: 213209
loss: 1.003528356552124,grad_norm: 0.8999206111502349, iteration: 213210
loss: 1.019681692123413,grad_norm: 0.9999995216568801, iteration: 213211
loss: 1.0391029119491577,grad_norm: 0.9956521193221803, iteration: 213212
loss: 1.0229004621505737,grad_norm: 0.9999992390015611, iteration: 213213
loss: 1.1018338203430176,grad_norm: 0.9952610988770401, iteration: 213214
loss: 1.0006749629974365,grad_norm: 0.9351914044869373, iteration: 213215
loss: 0.9769325256347656,grad_norm: 0.8257218056933708, iteration: 213216
loss: 0.9874696731567383,grad_norm: 0.9289134789217752, iteration: 213217
loss: 0.9987816214561462,grad_norm: 0.8299005467434122, iteration: 213218
loss: 1.0585585832595825,grad_norm: 0.9999998138527777, iteration: 213219
loss: 1.054510474205017,grad_norm: 0.999999257498855, iteration: 213220
loss: 1.1492552757263184,grad_norm: 0.9999989908502525, iteration: 213221
loss: 1.026164174079895,grad_norm: 0.9999990719038652, iteration: 213222
loss: 1.0664656162261963,grad_norm: 0.9472843220395907, iteration: 213223
loss: 1.0041747093200684,grad_norm: 0.9999992082140863, iteration: 213224
loss: 0.9843177199363708,grad_norm: 0.999999174718489, iteration: 213225
loss: 1.050972819328308,grad_norm: 0.9999994510235233, iteration: 213226
loss: 0.9861239194869995,grad_norm: 0.8449788283856277, iteration: 213227
loss: 0.9873623251914978,grad_norm: 0.8770232934898889, iteration: 213228
loss: 0.9927402138710022,grad_norm: 0.894409152218401, iteration: 213229
loss: 1.0200573205947876,grad_norm: 0.9999991749432966, iteration: 213230
loss: 1.0071624517440796,grad_norm: 0.9999993348045121, iteration: 213231
loss: 0.9891838431358337,grad_norm: 0.950087825103766, iteration: 213232
loss: 1.0307800769805908,grad_norm: 0.736865626722702, iteration: 213233
loss: 0.9797171354293823,grad_norm: 0.9999991007688858, iteration: 213234
loss: 1.0126149654388428,grad_norm: 0.9999992333817691, iteration: 213235
loss: 1.043472170829773,grad_norm: 0.9927662791659192, iteration: 213236
loss: 1.014021396636963,grad_norm: 0.8823700226427426, iteration: 213237
loss: 0.9834539890289307,grad_norm: 0.9208778629117899, iteration: 213238
loss: 0.9807215929031372,grad_norm: 0.9736723856606806, iteration: 213239
loss: 1.0445775985717773,grad_norm: 0.9999990706382179, iteration: 213240
loss: 1.0583699941635132,grad_norm: 0.9999994377872364, iteration: 213241
loss: 1.0172566175460815,grad_norm: 0.9999991156314508, iteration: 213242
loss: 1.0310988426208496,grad_norm: 0.897175745041815, iteration: 213243
loss: 0.9797652959823608,grad_norm: 0.8409082747837157, iteration: 213244
loss: 1.0316896438598633,grad_norm: 0.9999996623031643, iteration: 213245
loss: 1.0837035179138184,grad_norm: 0.9999991434573944, iteration: 213246
loss: 1.0151033401489258,grad_norm: 0.9170434345633307, iteration: 213247
loss: 0.9976911544799805,grad_norm: 0.9999989844467772, iteration: 213248
loss: 1.0067042112350464,grad_norm: 0.8778901412441167, iteration: 213249
loss: 0.9904852509498596,grad_norm: 0.9338589126720003, iteration: 213250
loss: 0.9889557957649231,grad_norm: 0.6609783515100145, iteration: 213251
loss: 0.9977110028266907,grad_norm: 0.8837821909678265, iteration: 213252
loss: 0.9849221706390381,grad_norm: 0.9999991632382448, iteration: 213253
loss: 0.956321120262146,grad_norm: 0.9999990885265112, iteration: 213254
loss: 0.9901512861251831,grad_norm: 0.8832398697781207, iteration: 213255
loss: 1.2381898164749146,grad_norm: 0.9999999847946287, iteration: 213256
loss: 0.9679951667785645,grad_norm: 0.9999992245380596, iteration: 213257
loss: 0.9748966693878174,grad_norm: 0.7580185140801297, iteration: 213258
loss: 1.0379410982131958,grad_norm: 0.9999989849854113, iteration: 213259
loss: 1.0107543468475342,grad_norm: 0.9838136977376007, iteration: 213260
loss: 0.9733213186264038,grad_norm: 0.9999991272714047, iteration: 213261
loss: 1.0269523859024048,grad_norm: 0.9456163629382729, iteration: 213262
loss: 1.0109472274780273,grad_norm: 0.8302515584602149, iteration: 213263
loss: 0.9873530864715576,grad_norm: 0.8477199650749062, iteration: 213264
loss: 1.001846194267273,grad_norm: 0.9323523875026402, iteration: 213265
loss: 0.9838231801986694,grad_norm: 0.9289472339676916, iteration: 213266
loss: 0.9783257842063904,grad_norm: 0.9999993067320931, iteration: 213267
loss: 0.9844927191734314,grad_norm: 0.9999993938220719, iteration: 213268
loss: 1.0078980922698975,grad_norm: 0.8620317583253061, iteration: 213269
loss: 0.970360517501831,grad_norm: 0.8134861829848885, iteration: 213270
loss: 0.9884476065635681,grad_norm: 0.8954459823387462, iteration: 213271
loss: 1.0068531036376953,grad_norm: 0.8644162489379676, iteration: 213272
loss: 1.0166208744049072,grad_norm: 0.8545017140363478, iteration: 213273
loss: 0.9953799843788147,grad_norm: 0.9967158785200793, iteration: 213274
loss: 0.9726384878158569,grad_norm: 0.9735572069704527, iteration: 213275
loss: 1.0094141960144043,grad_norm: 0.9314403173217435, iteration: 213276
loss: 1.0131882429122925,grad_norm: 0.7876926030053533, iteration: 213277
loss: 1.0606549978256226,grad_norm: 0.8996048056820715, iteration: 213278
loss: 1.0347771644592285,grad_norm: 0.9999990772906948, iteration: 213279
loss: 1.0105032920837402,grad_norm: 0.8965649982002846, iteration: 213280
loss: 1.068347692489624,grad_norm: 0.999999173571397, iteration: 213281
loss: 1.0259495973587036,grad_norm: 0.999999084356016, iteration: 213282
loss: 1.011853575706482,grad_norm: 0.9999991101825533, iteration: 213283
loss: 1.0220683813095093,grad_norm: 0.9999995557483067, iteration: 213284
loss: 0.980970025062561,grad_norm: 0.8859898433063628, iteration: 213285
loss: 1.000700831413269,grad_norm: 0.9130111851765663, iteration: 213286
loss: 0.9790734052658081,grad_norm: 0.9999991661430003, iteration: 213287
loss: 1.0047167539596558,grad_norm: 0.9301421244116485, iteration: 213288
loss: 1.03263258934021,grad_norm: 0.9790755707964609, iteration: 213289
loss: 0.9968563914299011,grad_norm: 0.9244248236661075, iteration: 213290
loss: 1.0428218841552734,grad_norm: 0.9915973532777493, iteration: 213291
loss: 0.9824337363243103,grad_norm: 0.8899723432433987, iteration: 213292
loss: 1.0092006921768188,grad_norm: 0.9043805090105728, iteration: 213293
loss: 0.9389327168464661,grad_norm: 0.9999995403041934, iteration: 213294
loss: 0.9898314476013184,grad_norm: 0.9999990146859346, iteration: 213295
loss: 0.9818488955497742,grad_norm: 0.7640491035689574, iteration: 213296
loss: 0.9657160639762878,grad_norm: 0.8617666918176565, iteration: 213297
loss: 1.0237189531326294,grad_norm: 0.9944978676028328, iteration: 213298
loss: 1.0232206583023071,grad_norm: 0.807792591291301, iteration: 213299
loss: 1.0315423011779785,grad_norm: 0.9999992447653258, iteration: 213300
loss: 0.9771361351013184,grad_norm: 0.9999991861067755, iteration: 213301
loss: 1.015906810760498,grad_norm: 0.9999992211182046, iteration: 213302
loss: 1.010587215423584,grad_norm: 0.9999989846131778, iteration: 213303
loss: 1.0011316537857056,grad_norm: 0.8673877133248095, iteration: 213304
loss: 0.975482702255249,grad_norm: 0.9565749038460153, iteration: 213305
loss: 0.971772313117981,grad_norm: 0.8695922160698641, iteration: 213306
loss: 1.01334810256958,grad_norm: 0.9835152041301631, iteration: 213307
loss: 0.9779683351516724,grad_norm: 0.9999990583561438, iteration: 213308
loss: 0.9701760411262512,grad_norm: 0.8698712796955661, iteration: 213309
loss: 0.9548637270927429,grad_norm: 0.9426011179744831, iteration: 213310
loss: 0.9833434224128723,grad_norm: 0.9281027138718325, iteration: 213311
loss: 0.9655779600143433,grad_norm: 0.9161394101741537, iteration: 213312
loss: 0.9849130511283875,grad_norm: 0.95475274596457, iteration: 213313
loss: 1.012241005897522,grad_norm: 0.992862437150401, iteration: 213314
loss: 1.011813759803772,grad_norm: 0.7722835419342772, iteration: 213315
loss: 1.1031895875930786,grad_norm: 0.985099859595799, iteration: 213316
loss: 1.0022135972976685,grad_norm: 0.9999992492796856, iteration: 213317
loss: 1.0168561935424805,grad_norm: 0.8536879591976183, iteration: 213318
loss: 0.9932663440704346,grad_norm: 0.9079647366837688, iteration: 213319
loss: 1.0002721548080444,grad_norm: 0.8000365965950452, iteration: 213320
loss: 0.9577053785324097,grad_norm: 0.9999990817042518, iteration: 213321
loss: 1.0001394748687744,grad_norm: 0.9927947422724774, iteration: 213322
loss: 0.9761500358581543,grad_norm: 0.9394905124194756, iteration: 213323
loss: 1.0519437789916992,grad_norm: 0.9999992425265638, iteration: 213324
loss: 0.9853115081787109,grad_norm: 0.791181499193382, iteration: 213325
loss: 1.0059411525726318,grad_norm: 0.7941412284587115, iteration: 213326
loss: 1.0344525575637817,grad_norm: 0.9999995162838576, iteration: 213327
loss: 0.9912567734718323,grad_norm: 0.9424067311254746, iteration: 213328
loss: 1.051237940788269,grad_norm: 0.9186014057927978, iteration: 213329
loss: 1.0168060064315796,grad_norm: 0.7424022924151558, iteration: 213330
loss: 0.9828535318374634,grad_norm: 0.8584577483990382, iteration: 213331
loss: 1.0080305337905884,grad_norm: 0.8841416134312469, iteration: 213332
loss: 1.0072102546691895,grad_norm: 0.9999990431632798, iteration: 213333
loss: 1.0045576095581055,grad_norm: 0.8952139228028193, iteration: 213334
loss: 0.9829807877540588,grad_norm: 0.9462497652944557, iteration: 213335
loss: 0.9896903038024902,grad_norm: 0.9343417826555543, iteration: 213336
loss: 0.9815983176231384,grad_norm: 0.9727998947181489, iteration: 213337
loss: 1.0701484680175781,grad_norm: 0.9999991228926055, iteration: 213338
loss: 1.0228527784347534,grad_norm: 0.8871863601254243, iteration: 213339
loss: 0.9870843291282654,grad_norm: 0.9999989927011161, iteration: 213340
loss: 0.9601988196372986,grad_norm: 0.8402185307036711, iteration: 213341
loss: 1.0816205739974976,grad_norm: 0.9999992586120167, iteration: 213342
loss: 1.0282106399536133,grad_norm: 0.9999991888555827, iteration: 213343
loss: 0.9985865354537964,grad_norm: 0.9999992447817481, iteration: 213344
loss: 1.0175135135650635,grad_norm: 0.966379100405513, iteration: 213345
loss: 1.0449436902999878,grad_norm: 0.9363858462087342, iteration: 213346
loss: 1.0008095502853394,grad_norm: 0.9999991639805367, iteration: 213347
loss: 1.007466435432434,grad_norm: 0.8414505347756398, iteration: 213348
loss: 0.9978544116020203,grad_norm: 0.9999989959392087, iteration: 213349
loss: 1.0333579778671265,grad_norm: 0.8870394718653524, iteration: 213350
loss: 1.0164402723312378,grad_norm: 0.822935964159746, iteration: 213351
loss: 0.9877879619598389,grad_norm: 0.9999993266273461, iteration: 213352
loss: 0.9923318028450012,grad_norm: 0.9999990926994011, iteration: 213353
loss: 0.9646050930023193,grad_norm: 0.8678277558264932, iteration: 213354
loss: 0.9936598539352417,grad_norm: 0.8415656221118767, iteration: 213355
loss: 1.0239691734313965,grad_norm: 0.9999993528314814, iteration: 213356
loss: 1.0136507749557495,grad_norm: 0.9024300569642276, iteration: 213357
loss: 1.0016636848449707,grad_norm: 0.9999993746257583, iteration: 213358
loss: 0.9750325083732605,grad_norm: 0.9999991480247946, iteration: 213359
loss: 1.0188106298446655,grad_norm: 0.9999993073064997, iteration: 213360
loss: 0.9906101226806641,grad_norm: 0.999999222971495, iteration: 213361
loss: 0.9957453608512878,grad_norm: 0.9265770885066297, iteration: 213362
loss: 1.0203559398651123,grad_norm: 0.9999990411344843, iteration: 213363
loss: 0.9870873689651489,grad_norm: 0.9210476725258905, iteration: 213364
loss: 0.9971628189086914,grad_norm: 0.9999995257034444, iteration: 213365
loss: 1.014975666999817,grad_norm: 0.838360855934544, iteration: 213366
loss: 1.0118821859359741,grad_norm: 0.9999992648098798, iteration: 213367
loss: 1.010332465171814,grad_norm: 0.9799857145452173, iteration: 213368
loss: 1.0223561525344849,grad_norm: 0.8155026150967758, iteration: 213369
loss: 1.0497510433197021,grad_norm: 0.9999996418577313, iteration: 213370
loss: 1.0268226861953735,grad_norm: 0.9565560964766194, iteration: 213371
loss: 1.0080245733261108,grad_norm: 0.7946851087126852, iteration: 213372
loss: 0.9920951128005981,grad_norm: 0.9847020704498615, iteration: 213373
loss: 0.9768618941307068,grad_norm: 0.9219078598330966, iteration: 213374
loss: 1.0127415657043457,grad_norm: 0.9999991115573817, iteration: 213375
loss: 1.0184096097946167,grad_norm: 0.9999990157711671, iteration: 213376
loss: 0.98214191198349,grad_norm: 0.7967542659126374, iteration: 213377
loss: 0.9655835628509521,grad_norm: 0.910754925887266, iteration: 213378
loss: 0.9475651979446411,grad_norm: 0.917882526808226, iteration: 213379
loss: 0.9971649050712585,grad_norm: 0.8665706647522522, iteration: 213380
loss: 0.9966368079185486,grad_norm: 0.9390208844245551, iteration: 213381
loss: 1.0109319686889648,grad_norm: 0.999999318403043, iteration: 213382
loss: 1.0093295574188232,grad_norm: 0.9473211168758165, iteration: 213383
loss: 1.0126762390136719,grad_norm: 0.88153983609858, iteration: 213384
loss: 1.0163010358810425,grad_norm: 0.9677726260565302, iteration: 213385
loss: 0.9914794564247131,grad_norm: 0.8204767386733293, iteration: 213386
loss: 1.0543856620788574,grad_norm: 0.8616055211813404, iteration: 213387
loss: 1.0428179502487183,grad_norm: 0.8261558898496348, iteration: 213388
loss: 0.9957213401794434,grad_norm: 0.9663021925523915, iteration: 213389
loss: 0.9773246049880981,grad_norm: 0.8609904166551999, iteration: 213390
loss: 0.9915497899055481,grad_norm: 0.9999991483234302, iteration: 213391
loss: 1.0384925603866577,grad_norm: 0.9786404737923916, iteration: 213392
loss: 0.987226128578186,grad_norm: 0.9986151502046667, iteration: 213393
loss: 0.9562013149261475,grad_norm: 0.9215512299122977, iteration: 213394
loss: 1.002405047416687,grad_norm: 0.9760397045825157, iteration: 213395
loss: 1.0553641319274902,grad_norm: 0.9999992612857911, iteration: 213396
loss: 0.9797520041465759,grad_norm: 0.866098553869219, iteration: 213397
loss: 0.9853427410125732,grad_norm: 0.8184242217195749, iteration: 213398
loss: 1.0017411708831787,grad_norm: 0.9999990149384964, iteration: 213399
loss: 0.9968723654747009,grad_norm: 0.9084411368602333, iteration: 213400
loss: 0.9966073632240295,grad_norm: 0.9999995764764128, iteration: 213401
loss: 1.014491081237793,grad_norm: 0.9999991572404564, iteration: 213402
loss: 1.040481686592102,grad_norm: 0.967778383032923, iteration: 213403
loss: 0.9902592301368713,grad_norm: 0.9999990736450423, iteration: 213404
loss: 1.0327543020248413,grad_norm: 0.9757044650795135, iteration: 213405
loss: 1.0064030885696411,grad_norm: 0.935639951567572, iteration: 213406
loss: 1.0115149021148682,grad_norm: 0.9143367030878259, iteration: 213407
loss: 1.0095475912094116,grad_norm: 0.7614801355976326, iteration: 213408
loss: 1.015044093132019,grad_norm: 0.9999990622819103, iteration: 213409
loss: 1.0414787530899048,grad_norm: 0.9999990426610701, iteration: 213410
loss: 0.9752236604690552,grad_norm: 0.9241116120438874, iteration: 213411
loss: 1.005081057548523,grad_norm: 0.8622266114834025, iteration: 213412
loss: 0.9693825244903564,grad_norm: 0.8740740508203935, iteration: 213413
loss: 0.9932208061218262,grad_norm: 0.745108212019625, iteration: 213414
loss: 1.0000672340393066,grad_norm: 0.9210874950613887, iteration: 213415
loss: 1.0013132095336914,grad_norm: 0.8563263288009539, iteration: 213416
loss: 0.9966272115707397,grad_norm: 0.9999991008178093, iteration: 213417
loss: 0.9952352046966553,grad_norm: 0.9687622688793779, iteration: 213418
loss: 0.9968116283416748,grad_norm: 0.9999992004710803, iteration: 213419
loss: 0.9780924320220947,grad_norm: 0.7528493810833669, iteration: 213420
loss: 0.9828745722770691,grad_norm: 0.8581283009824946, iteration: 213421
loss: 0.9636825919151306,grad_norm: 0.999999267214132, iteration: 213422
loss: 1.0372060537338257,grad_norm: 0.9191073548652963, iteration: 213423
loss: 1.0183122158050537,grad_norm: 0.9524241503692438, iteration: 213424
loss: 1.0016270875930786,grad_norm: 0.9422375014183596, iteration: 213425
loss: 0.9935060143470764,grad_norm: 0.9999990481252302, iteration: 213426
loss: 1.022505283355713,grad_norm: 0.9104294362892908, iteration: 213427
loss: 1.0127450227737427,grad_norm: 0.9285563068990604, iteration: 213428
loss: 1.0201042890548706,grad_norm: 0.9721756058837118, iteration: 213429
loss: 1.0322693586349487,grad_norm: 0.9751496279906998, iteration: 213430
loss: 1.0015966892242432,grad_norm: 0.9999992431222241, iteration: 213431
loss: 0.9872743487358093,grad_norm: 0.7942903983049615, iteration: 213432
loss: 0.985859751701355,grad_norm: 0.9801985355537057, iteration: 213433
loss: 1.001883864402771,grad_norm: 0.9999991607849299, iteration: 213434
loss: 0.9818618297576904,grad_norm: 0.9999992398651665, iteration: 213435
loss: 1.015395164489746,grad_norm: 0.9226126076104831, iteration: 213436
loss: 0.9987351298332214,grad_norm: 0.9999994925912769, iteration: 213437
loss: 1.0007973909378052,grad_norm: 0.8575142707456922, iteration: 213438
loss: 0.9842255115509033,grad_norm: 0.9758714313646554, iteration: 213439
loss: 1.004388689994812,grad_norm: 0.8903446854000624, iteration: 213440
loss: 1.0046370029449463,grad_norm: 0.8654186958263032, iteration: 213441
loss: 1.0155465602874756,grad_norm: 0.9821328937141043, iteration: 213442
loss: 1.008301854133606,grad_norm: 0.9999992229654968, iteration: 213443
loss: 0.9846404790878296,grad_norm: 0.9999992779061772, iteration: 213444
loss: 0.9982408881187439,grad_norm: 0.999999044480787, iteration: 213445
loss: 0.9966844916343689,grad_norm: 0.8492166979152633, iteration: 213446
loss: 0.9687712788581848,grad_norm: 0.9413846393325835, iteration: 213447
loss: 0.9910169839859009,grad_norm: 0.8510146258349438, iteration: 213448
loss: 0.9842740297317505,grad_norm: 0.999999128900699, iteration: 213449
loss: 0.9893453121185303,grad_norm: 0.8151630585811273, iteration: 213450
loss: 0.9825021624565125,grad_norm: 0.9999990293052423, iteration: 213451
loss: 0.9960650205612183,grad_norm: 0.9999991032041197, iteration: 213452
loss: 0.9865329265594482,grad_norm: 0.8329668485599524, iteration: 213453
loss: 1.0004514455795288,grad_norm: 0.8885897668923695, iteration: 213454
loss: 0.965156078338623,grad_norm: 0.9594361353042926, iteration: 213455
loss: 1.0560096502304077,grad_norm: 0.9655988435262544, iteration: 213456
loss: 0.9918088316917419,grad_norm: 0.999999384579416, iteration: 213457
loss: 1.0014005899429321,grad_norm: 0.8932949341040264, iteration: 213458
loss: 1.0157978534698486,grad_norm: 0.9324612923301214, iteration: 213459
loss: 1.0250756740570068,grad_norm: 0.999999058038141, iteration: 213460
loss: 1.0049506425857544,grad_norm: 0.999999037164014, iteration: 213461
loss: 1.0189770460128784,grad_norm: 0.8879314190174629, iteration: 213462
loss: 0.9962459802627563,grad_norm: 0.7994602276454011, iteration: 213463
loss: 0.9848002791404724,grad_norm: 0.9690305869428786, iteration: 213464
loss: 0.9848122000694275,grad_norm: 0.77548612041217, iteration: 213465
loss: 0.9826654195785522,grad_norm: 0.9324824553157696, iteration: 213466
loss: 1.0028687715530396,grad_norm: 0.9999990655875701, iteration: 213467
loss: 0.9854516386985779,grad_norm: 0.9345805896481661, iteration: 213468
loss: 1.0107977390289307,grad_norm: 0.7689725443888302, iteration: 213469
loss: 0.9974061846733093,grad_norm: 0.9999992285485554, iteration: 213470
loss: 0.990551769733429,grad_norm: 0.9672642869585875, iteration: 213471
loss: 0.9672388434410095,grad_norm: 0.9097921365580767, iteration: 213472
loss: 1.0094444751739502,grad_norm: 0.8655735980607924, iteration: 213473
loss: 0.9732605814933777,grad_norm: 0.9999991440869689, iteration: 213474
loss: 0.9782643914222717,grad_norm: 0.999999103371575, iteration: 213475
loss: 0.9925267100334167,grad_norm: 0.999999112031284, iteration: 213476
loss: 0.9817150831222534,grad_norm: 0.8047906281906384, iteration: 213477
loss: 0.9981879591941833,grad_norm: 0.9644128611857695, iteration: 213478
loss: 0.9807752370834351,grad_norm: 0.9999992883789192, iteration: 213479
loss: 0.9795128107070923,grad_norm: 0.9999991109223018, iteration: 213480
loss: 1.004499912261963,grad_norm: 0.8310972698181992, iteration: 213481
loss: 1.010434627532959,grad_norm: 0.9503617629175949, iteration: 213482
loss: 1.023692011833191,grad_norm: 0.9098653361671233, iteration: 213483
loss: 1.0158778429031372,grad_norm: 0.921597416346948, iteration: 213484
loss: 0.9902774691581726,grad_norm: 0.683908760826155, iteration: 213485
loss: 0.9872193336486816,grad_norm: 0.8278988148454545, iteration: 213486
loss: 0.9883448481559753,grad_norm: 0.880561709926289, iteration: 213487
loss: 1.0033986568450928,grad_norm: 0.999999129133012, iteration: 213488
loss: 1.044610857963562,grad_norm: 0.999999583140951, iteration: 213489
loss: 1.0010842084884644,grad_norm: 0.8786000598379786, iteration: 213490
loss: 1.0292378664016724,grad_norm: 0.9999991598420063, iteration: 213491
loss: 0.9923803210258484,grad_norm: 0.9945521786458816, iteration: 213492
loss: 0.9833703637123108,grad_norm: 0.9951562442100298, iteration: 213493
loss: 0.9898264408111572,grad_norm: 0.869683948025996, iteration: 213494
loss: 1.0065542459487915,grad_norm: 0.963585607571351, iteration: 213495
loss: 0.9763691425323486,grad_norm: 0.9999991924273894, iteration: 213496
loss: 1.015523910522461,grad_norm: 0.999999007299913, iteration: 213497
loss: 1.0268120765686035,grad_norm: 0.9606988809921163, iteration: 213498
loss: 1.0074665546417236,grad_norm: 0.9999990175655132, iteration: 213499
loss: 0.995485782623291,grad_norm: 0.9999993749250347, iteration: 213500
loss: 0.9989734888076782,grad_norm: 0.9165179523021144, iteration: 213501
loss: 0.9623551964759827,grad_norm: 0.7737371122941413, iteration: 213502
loss: 0.977396547794342,grad_norm: 0.9999991702875863, iteration: 213503
loss: 0.9847061634063721,grad_norm: 0.9999989271565567, iteration: 213504
loss: 0.9899625182151794,grad_norm: 0.9999994766944736, iteration: 213505
loss: 0.9877187013626099,grad_norm: 0.9344125369282216, iteration: 213506
loss: 0.979666531085968,grad_norm: 0.99999925446805, iteration: 213507
loss: 0.9753329753875732,grad_norm: 0.9999991559983368, iteration: 213508
loss: 0.9806407690048218,grad_norm: 0.920978942087095, iteration: 213509
loss: 1.0450594425201416,grad_norm: 0.9999991591599139, iteration: 213510
loss: 0.9903751611709595,grad_norm: 0.8988142887904321, iteration: 213511
loss: 1.030842900276184,grad_norm: 0.9999992069079298, iteration: 213512
loss: 1.0062906742095947,grad_norm: 0.878430767751108, iteration: 213513
loss: 1.0216774940490723,grad_norm: 0.8855667852825507, iteration: 213514
loss: 0.9954720139503479,grad_norm: 0.9730978187280349, iteration: 213515
loss: 0.9602844715118408,grad_norm: 0.9999991371409044, iteration: 213516
loss: 1.0189690589904785,grad_norm: 0.9999992947798885, iteration: 213517
loss: 1.008552074432373,grad_norm: 0.7909818746834891, iteration: 213518
loss: 1.0191161632537842,grad_norm: 0.8972662198676165, iteration: 213519
loss: 0.968343198299408,grad_norm: 0.9995654852764002, iteration: 213520
loss: 0.9929046630859375,grad_norm: 0.9335985441570348, iteration: 213521
loss: 0.9946846961975098,grad_norm: 0.896945812499942, iteration: 213522
loss: 1.0028685331344604,grad_norm: 0.9999991658161552, iteration: 213523
loss: 0.9969743490219116,grad_norm: 0.8635714568819236, iteration: 213524
loss: 0.9674318432807922,grad_norm: 0.9854028310800896, iteration: 213525
loss: 0.9626629948616028,grad_norm: 0.9502905850119719, iteration: 213526
loss: 1.0131314992904663,grad_norm: 0.9374754986722706, iteration: 213527
loss: 1.0108522176742554,grad_norm: 0.9999992675188262, iteration: 213528
loss: 1.0584255456924438,grad_norm: 0.9163653909939455, iteration: 213529
loss: 0.9943599104881287,grad_norm: 0.7919345285942196, iteration: 213530
loss: 1.0055317878723145,grad_norm: 0.9999989552699212, iteration: 213531
loss: 1.0541744232177734,grad_norm: 0.9999998181793563, iteration: 213532
loss: 1.003944993019104,grad_norm: 0.9290785913817614, iteration: 213533
loss: 1.0034890174865723,grad_norm: 0.9580792242129627, iteration: 213534
loss: 1.0622563362121582,grad_norm: 0.9010870507385405, iteration: 213535
loss: 1.03680419921875,grad_norm: 0.965114830002224, iteration: 213536
loss: 1.0015791654586792,grad_norm: 0.8826302090074671, iteration: 213537
loss: 0.9800642132759094,grad_norm: 0.7495464658054214, iteration: 213538
loss: 0.9836046099662781,grad_norm: 0.7954067200084353, iteration: 213539
loss: 1.0111256837844849,grad_norm: 0.9999991128034106, iteration: 213540
loss: 1.0445787906646729,grad_norm: 0.9999991666325952, iteration: 213541
loss: 0.9783431887626648,grad_norm: 0.8490754107885694, iteration: 213542
loss: 1.0318934917449951,grad_norm: 0.9576474265456439, iteration: 213543
loss: 0.98398756980896,grad_norm: 0.918769927296236, iteration: 213544
loss: 1.0225117206573486,grad_norm: 0.9999990335738242, iteration: 213545
loss: 0.9744336605072021,grad_norm: 0.907098120687756, iteration: 213546
loss: 1.0353952646255493,grad_norm: 0.9999999833837745, iteration: 213547
loss: 1.0097894668579102,grad_norm: 0.9319805752050146, iteration: 213548
loss: 1.0104414224624634,grad_norm: 0.9611683120041745, iteration: 213549
loss: 1.0635672807693481,grad_norm: 0.9999991827235826, iteration: 213550
loss: 0.9541153907775879,grad_norm: 0.874008127628037, iteration: 213551
loss: 0.9953839182853699,grad_norm: 0.9998841222711332, iteration: 213552
loss: 1.0118387937545776,grad_norm: 0.9521136016038486, iteration: 213553
loss: 0.9678645730018616,grad_norm: 0.8640966462402911, iteration: 213554
loss: 1.0291751623153687,grad_norm: 0.9472033525330658, iteration: 213555
loss: 0.9530872702598572,grad_norm: 0.9324782743784388, iteration: 213556
loss: 1.0298529863357544,grad_norm: 0.9999999987194367, iteration: 213557
loss: 0.9532452821731567,grad_norm: 0.7650827547751221, iteration: 213558
loss: 1.0040030479431152,grad_norm: 0.9594190291428719, iteration: 213559
loss: 0.9835143089294434,grad_norm: 0.9999991038519102, iteration: 213560
loss: 1.0008156299591064,grad_norm: 0.8457574675894419, iteration: 213561
loss: 1.035853385925293,grad_norm: 0.9999991613947389, iteration: 213562
loss: 1.0047026872634888,grad_norm: 0.9999992624857773, iteration: 213563
loss: 1.0111089944839478,grad_norm: 0.8571645050218534, iteration: 213564
loss: 1.0066304206848145,grad_norm: 0.906622932763817, iteration: 213565
loss: 0.9873068332672119,grad_norm: 0.8463901603144095, iteration: 213566
loss: 0.9958983063697815,grad_norm: 0.918832475341874, iteration: 213567
loss: 0.9793879389762878,grad_norm: 0.9620424072891435, iteration: 213568
loss: 1.0314916372299194,grad_norm: 0.9268439246022924, iteration: 213569
loss: 0.9683449268341064,grad_norm: 0.9600209601670645, iteration: 213570
loss: 0.996239960193634,grad_norm: 0.9625361689405117, iteration: 213571
loss: 1.036683201789856,grad_norm: 0.9999989908363481, iteration: 213572
loss: 1.006124496459961,grad_norm: 0.9973152920057418, iteration: 213573
loss: 1.0009853839874268,grad_norm: 0.9708944223625847, iteration: 213574
loss: 1.0840610265731812,grad_norm: 0.9999991497130398, iteration: 213575
loss: 0.9806450605392456,grad_norm: 0.9485196993722883, iteration: 213576
loss: 1.0054106712341309,grad_norm: 0.8818185711461011, iteration: 213577
loss: 0.9780259132385254,grad_norm: 0.9303335648405185, iteration: 213578
loss: 0.9765269756317139,grad_norm: 0.7566188300156732, iteration: 213579
loss: 0.9948275089263916,grad_norm: 0.8058381600791304, iteration: 213580
loss: 1.0151100158691406,grad_norm: 0.9150704728386655, iteration: 213581
loss: 0.9826516509056091,grad_norm: 0.8907661527871292, iteration: 213582
loss: 0.9823678731918335,grad_norm: 0.8049518826087434, iteration: 213583
loss: 0.9575662016868591,grad_norm: 0.9999992337807355, iteration: 213584
loss: 0.9666726589202881,grad_norm: 0.9695284337628739, iteration: 213585
loss: 1.0204930305480957,grad_norm: 0.8592925591191239, iteration: 213586
loss: 0.9825736284255981,grad_norm: 0.9999991015125722, iteration: 213587
loss: 1.0526156425476074,grad_norm: 0.9749040125651879, iteration: 213588
loss: 1.035811185836792,grad_norm: 0.9999991067941791, iteration: 213589
loss: 0.9742898344993591,grad_norm: 0.997054250698966, iteration: 213590
loss: 0.9999482035636902,grad_norm: 0.839731687585673, iteration: 213591
loss: 0.9979850649833679,grad_norm: 0.9999992804540219, iteration: 213592
loss: 1.0179052352905273,grad_norm: 0.9999991021101083, iteration: 213593
loss: 1.0240062475204468,grad_norm: 0.9999991777644229, iteration: 213594
loss: 1.0450143814086914,grad_norm: 0.854941476602491, iteration: 213595
loss: 0.9840495586395264,grad_norm: 0.8670295656350956, iteration: 213596
loss: 1.007222056388855,grad_norm: 0.7117126398796118, iteration: 213597
loss: 0.967583417892456,grad_norm: 0.9408454879098632, iteration: 213598
loss: 0.9733138680458069,grad_norm: 0.9773395031953874, iteration: 213599
loss: 1.0169283151626587,grad_norm: 0.9999990625663713, iteration: 213600
loss: 0.9851624369621277,grad_norm: 0.99999901983918, iteration: 213601
loss: 0.9923260807991028,grad_norm: 0.9726393823783066, iteration: 213602
loss: 0.995593249797821,grad_norm: 0.9999990979392395, iteration: 213603
loss: 1.0677080154418945,grad_norm: 0.9999997938594198, iteration: 213604
loss: 1.001049518585205,grad_norm: 0.9590374087443816, iteration: 213605
loss: 0.992459237575531,grad_norm: 0.863619113050118, iteration: 213606
loss: 0.9776638746261597,grad_norm: 0.9999991248730399, iteration: 213607
loss: 1.0361663103103638,grad_norm: 0.999999162075853, iteration: 213608
loss: 1.0006506443023682,grad_norm: 0.8212875404473826, iteration: 213609
loss: 1.008804202079773,grad_norm: 0.9999990653762455, iteration: 213610
loss: 1.0318795442581177,grad_norm: 0.9597784923144598, iteration: 213611
loss: 1.0309057235717773,grad_norm: 0.9148719401582683, iteration: 213612
loss: 1.009294867515564,grad_norm: 0.9999990932140161, iteration: 213613
loss: 0.977429211139679,grad_norm: 0.8262135819846547, iteration: 213614
loss: 1.0008820295333862,grad_norm: 0.8271712707025014, iteration: 213615
loss: 1.0003986358642578,grad_norm: 0.8569450733419021, iteration: 213616
loss: 1.0107563734054565,grad_norm: 0.8767673478958757, iteration: 213617
loss: 1.0200096368789673,grad_norm: 0.8865491172321538, iteration: 213618
loss: 0.9672415256500244,grad_norm: 0.9999990277763874, iteration: 213619
loss: 0.9952955842018127,grad_norm: 0.9999992920736172, iteration: 213620
loss: 1.0276845693588257,grad_norm: 0.999999053443878, iteration: 213621
loss: 1.0041440725326538,grad_norm: 0.9999991503058984, iteration: 213622
loss: 0.9684429168701172,grad_norm: 0.9270829552304996, iteration: 213623
loss: 1.0152095556259155,grad_norm: 0.8366615275282688, iteration: 213624
loss: 1.0229219198226929,grad_norm: 0.8302847633864395, iteration: 213625
loss: 0.9632184505462646,grad_norm: 0.9999994403434252, iteration: 213626
loss: 0.9917189478874207,grad_norm: 0.8127448834835908, iteration: 213627
loss: 1.0175397396087646,grad_norm: 0.9508963904568967, iteration: 213628
loss: 1.0551480054855347,grad_norm: 0.9999996616492901, iteration: 213629
loss: 0.9686371088027954,grad_norm: 0.9999992202991298, iteration: 213630
loss: 0.9945654273033142,grad_norm: 0.9999991467470586, iteration: 213631
loss: 1.0511492490768433,grad_norm: 0.8629000014301603, iteration: 213632
loss: 0.9979397058486938,grad_norm: 0.9484017377383317, iteration: 213633
loss: 1.0249361991882324,grad_norm: 0.9490949582796138, iteration: 213634
loss: 0.9735119342803955,grad_norm: 0.9449159700980807, iteration: 213635
loss: 0.9494357705116272,grad_norm: 0.8401726724538979, iteration: 213636
loss: 0.9812589883804321,grad_norm: 0.9999990238222168, iteration: 213637
loss: 0.9978039860725403,grad_norm: 0.9902177757982017, iteration: 213638
loss: 1.0059338808059692,grad_norm: 0.9443652854268378, iteration: 213639
loss: 1.0119426250457764,grad_norm: 0.9653585132072818, iteration: 213640
loss: 0.9628921151161194,grad_norm: 0.7877980742437073, iteration: 213641
loss: 0.9936832189559937,grad_norm: 0.9232465993322071, iteration: 213642
loss: 0.9994555711746216,grad_norm: 0.9642455186727028, iteration: 213643
loss: 1.027381181716919,grad_norm: 0.9999990867977244, iteration: 213644
loss: 1.0200773477554321,grad_norm: 0.846501028221122, iteration: 213645
loss: 0.9904099106788635,grad_norm: 0.8760887301645797, iteration: 213646
loss: 0.9729178547859192,grad_norm: 0.9999991297336198, iteration: 213647
loss: 0.9973293542861938,grad_norm: 0.999999390705521, iteration: 213648
loss: 0.9439292550086975,grad_norm: 0.9845738798096801, iteration: 213649
loss: 1.0225752592086792,grad_norm: 0.9807864924117548, iteration: 213650
loss: 1.0508570671081543,grad_norm: 0.9999991189035016, iteration: 213651
loss: 0.9893578886985779,grad_norm: 0.9411890519160656, iteration: 213652
loss: 0.981207013130188,grad_norm: 0.9000755205906955, iteration: 213653
loss: 1.00045645236969,grad_norm: 0.9460946748585608, iteration: 213654
loss: 1.010682225227356,grad_norm: 0.9999992008328011, iteration: 213655
loss: 1.0056527853012085,grad_norm: 0.9014299053327237, iteration: 213656
loss: 1.0108973979949951,grad_norm: 0.9999992118652449, iteration: 213657
loss: 1.0093743801116943,grad_norm: 0.7764675790907317, iteration: 213658
loss: 0.9880407452583313,grad_norm: 0.8206227216989597, iteration: 213659
loss: 0.998550295829773,grad_norm: 0.9942900002697191, iteration: 213660
loss: 1.0157968997955322,grad_norm: 0.9999991292582564, iteration: 213661
loss: 1.0302789211273193,grad_norm: 0.9999993237353703, iteration: 213662
loss: 1.0019909143447876,grad_norm: 0.8810199864922647, iteration: 213663
loss: 1.0086103677749634,grad_norm: 0.8789671990518519, iteration: 213664
loss: 1.0044605731964111,grad_norm: 0.8418557451887139, iteration: 213665
loss: 1.0495023727416992,grad_norm: 0.999999518623781, iteration: 213666
loss: 1.0100363492965698,grad_norm: 0.9999990444844005, iteration: 213667
loss: 1.0001509189605713,grad_norm: 0.965176020982364, iteration: 213668
loss: 1.0224303007125854,grad_norm: 0.9999991331768407, iteration: 213669
loss: 1.017298698425293,grad_norm: 0.8569710188762827, iteration: 213670
loss: 1.0461468696594238,grad_norm: 0.9999990451589744, iteration: 213671
loss: 1.0417851209640503,grad_norm: 0.8671418339446773, iteration: 213672
loss: 1.0612194538116455,grad_norm: 0.9986343561655846, iteration: 213673
loss: 0.9696165323257446,grad_norm: 0.8703284262519879, iteration: 213674
loss: 0.9980096817016602,grad_norm: 0.9999990221235964, iteration: 213675
loss: 1.0419533252716064,grad_norm: 0.872845351478521, iteration: 213676
loss: 1.0042036771774292,grad_norm: 0.9999989655200434, iteration: 213677
loss: 0.9943177103996277,grad_norm: 0.8596368022469186, iteration: 213678
loss: 0.9831283092498779,grad_norm: 0.9003794046579036, iteration: 213679
loss: 0.9826844334602356,grad_norm: 0.9509714260603493, iteration: 213680
loss: 0.9893680214881897,grad_norm: 0.9554387293516372, iteration: 213681
loss: 0.9847130179405212,grad_norm: 0.999999168382327, iteration: 213682
loss: 0.9985781908035278,grad_norm: 0.8346232105475203, iteration: 213683
loss: 0.9577226638793945,grad_norm: 0.7929592303952967, iteration: 213684
loss: 0.9915579557418823,grad_norm: 0.9144438004420157, iteration: 213685
loss: 1.006788730621338,grad_norm: 0.9541289602301642, iteration: 213686
loss: 1.0010288953781128,grad_norm: 0.9908011374875912, iteration: 213687
loss: 1.0490728616714478,grad_norm: 0.9999990257410726, iteration: 213688
loss: 1.0003126859664917,grad_norm: 0.8010880591068448, iteration: 213689
loss: 1.0400073528289795,grad_norm: 0.9736946088941306, iteration: 213690
loss: 0.9760129451751709,grad_norm: 0.999998975034229, iteration: 213691
loss: 1.0028793811798096,grad_norm: 0.9402062653889632, iteration: 213692
loss: 0.9872690439224243,grad_norm: 0.7821917551721054, iteration: 213693
loss: 0.9854273200035095,grad_norm: 0.8429334424974074, iteration: 213694
loss: 0.9598541259765625,grad_norm: 0.8582303246178618, iteration: 213695
loss: 0.9840808510780334,grad_norm: 0.9999994583737905, iteration: 213696
loss: 1.0107797384262085,grad_norm: 0.96927609233623, iteration: 213697
loss: 0.9966861605644226,grad_norm: 0.8960025916191757, iteration: 213698
loss: 0.9826817512512207,grad_norm: 0.9437419115279062, iteration: 213699
loss: 0.9772339463233948,grad_norm: 0.99999917445997, iteration: 213700
loss: 1.0153627395629883,grad_norm: 0.9999992319537476, iteration: 213701
loss: 0.9952558279037476,grad_norm: 0.9841108685407189, iteration: 213702
loss: 1.0285308361053467,grad_norm: 0.9600528801386076, iteration: 213703
loss: 0.9539191126823425,grad_norm: 0.8511341326150288, iteration: 213704
loss: 0.982509970664978,grad_norm: 0.9999992729388617, iteration: 213705
loss: 0.9841897487640381,grad_norm: 0.9999990736475675, iteration: 213706
loss: 0.9710155129432678,grad_norm: 0.9999989790260871, iteration: 213707
loss: 0.9941921234130859,grad_norm: 0.9201654064763786, iteration: 213708
loss: 0.9782423973083496,grad_norm: 0.9301727352134493, iteration: 213709
loss: 0.9876158237457275,grad_norm: 0.9589407527125791, iteration: 213710
loss: 1.0199388265609741,grad_norm: 0.8941897793464388, iteration: 213711
loss: 0.9879039525985718,grad_norm: 0.7848388017918712, iteration: 213712
loss: 1.0001243352890015,grad_norm: 0.9638065751626474, iteration: 213713
loss: 1.003739833831787,grad_norm: 0.9380781174277081, iteration: 213714
loss: 1.0054954290390015,grad_norm: 0.8484306025367672, iteration: 213715
loss: 1.0069899559020996,grad_norm: 0.9999990377779397, iteration: 213716
loss: 0.9983565807342529,grad_norm: 0.9999991356770426, iteration: 213717
loss: 1.0419765710830688,grad_norm: 0.9773503318546978, iteration: 213718
loss: 0.9959667921066284,grad_norm: 0.999999070437057, iteration: 213719
loss: 0.9990714192390442,grad_norm: 0.8278242344821617, iteration: 213720
loss: 1.0009582042694092,grad_norm: 0.9999990994716396, iteration: 213721
loss: 0.9957000613212585,grad_norm: 0.934156506009422, iteration: 213722
loss: 1.0325833559036255,grad_norm: 0.8436927519282909, iteration: 213723
loss: 1.012184500694275,grad_norm: 0.8989265182936143, iteration: 213724
loss: 0.9924612641334534,grad_norm: 0.9999992165729792, iteration: 213725
loss: 0.9892690181732178,grad_norm: 0.9999990893710424, iteration: 213726
loss: 0.9824008345603943,grad_norm: 0.8661138351594675, iteration: 213727
loss: 1.0215774774551392,grad_norm: 0.9999991243341233, iteration: 213728
loss: 1.0320366621017456,grad_norm: 0.9999990614846158, iteration: 213729
loss: 0.9797744750976562,grad_norm: 0.9055995116183155, iteration: 213730
loss: 0.980879545211792,grad_norm: 0.9999990395683612, iteration: 213731
loss: 1.0040217638015747,grad_norm: 0.9999991366906088, iteration: 213732
loss: 0.9921020865440369,grad_norm: 0.9227432430722504, iteration: 213733
loss: 0.9991567134857178,grad_norm: 0.9999991378263419, iteration: 213734
loss: 0.9813282489776611,grad_norm: 0.8870967392211948, iteration: 213735
loss: 0.996403694152832,grad_norm: 0.8319651792971221, iteration: 213736
loss: 0.9587786197662354,grad_norm: 0.9999991883632962, iteration: 213737
loss: 1.0382167100906372,grad_norm: 0.814258455229978, iteration: 213738
loss: 0.992283046245575,grad_norm: 0.8493530848560638, iteration: 213739
loss: 0.9980953931808472,grad_norm: 0.9950591277273383, iteration: 213740
loss: 0.9689622521400452,grad_norm: 0.8483643237819962, iteration: 213741
loss: 1.0018912553787231,grad_norm: 0.9999992223426732, iteration: 213742
loss: 1.030214548110962,grad_norm: 0.9280002080004132, iteration: 213743
loss: 1.0083003044128418,grad_norm: 0.9063454585980151, iteration: 213744
loss: 0.9998053312301636,grad_norm: 0.9378219102832711, iteration: 213745
loss: 0.9985787272453308,grad_norm: 0.8817949357796455, iteration: 213746
loss: 0.9783045053482056,grad_norm: 0.8302248150906987, iteration: 213747
loss: 1.0199817419052124,grad_norm: 0.939270980703219, iteration: 213748
loss: 1.0171374082565308,grad_norm: 0.7720408856618901, iteration: 213749
loss: 0.9927039742469788,grad_norm: 0.8352516309174262, iteration: 213750
loss: 0.9900291562080383,grad_norm: 0.9081382522592367, iteration: 213751
loss: 0.9659279584884644,grad_norm: 0.984873593520367, iteration: 213752
loss: 0.9863817691802979,grad_norm: 0.9472337056097583, iteration: 213753
loss: 0.9892557263374329,grad_norm: 0.9999990518554919, iteration: 213754
loss: 0.9863596558570862,grad_norm: 0.8851355087812051, iteration: 213755
loss: 1.008102536201477,grad_norm: 0.9999990446829313, iteration: 213756
loss: 1.0152971744537354,grad_norm: 0.9559448604219465, iteration: 213757
loss: 0.9998738169670105,grad_norm: 0.9189623515738347, iteration: 213758
loss: 0.989348292350769,grad_norm: 0.9639078461944072, iteration: 213759
loss: 1.0133981704711914,grad_norm: 0.8669680869297107, iteration: 213760
loss: 1.0009419918060303,grad_norm: 0.9999992676770513, iteration: 213761
loss: 1.0803003311157227,grad_norm: 0.9999998148362707, iteration: 213762
loss: 0.9932444095611572,grad_norm: 0.9130137027130931, iteration: 213763
loss: 1.0256423950195312,grad_norm: 0.9999991167070473, iteration: 213764
loss: 1.014703631401062,grad_norm: 0.9999991150386827, iteration: 213765
loss: 1.0335078239440918,grad_norm: 0.9999990745628291, iteration: 213766
loss: 1.0393917560577393,grad_norm: 0.8595519997026155, iteration: 213767
loss: 1.0095243453979492,grad_norm: 0.86428420463432, iteration: 213768
loss: 1.0302166938781738,grad_norm: 0.9999990918439644, iteration: 213769
loss: 0.9930693507194519,grad_norm: 0.9186353569179996, iteration: 213770
loss: 0.9869610071182251,grad_norm: 0.8697100355986018, iteration: 213771
loss: 1.0138018131256104,grad_norm: 0.8856018343251304, iteration: 213772
loss: 0.9708574414253235,grad_norm: 0.8336851582217215, iteration: 213773
loss: 1.0132319927215576,grad_norm: 0.9118442074039458, iteration: 213774
loss: 1.064945101737976,grad_norm: 0.9028659146612363, iteration: 213775
loss: 1.029244065284729,grad_norm: 0.9999992719082378, iteration: 213776
loss: 1.016430139541626,grad_norm: 0.834126079854396, iteration: 213777
loss: 1.0218356847763062,grad_norm: 0.9999993024967292, iteration: 213778
loss: 0.9837646484375,grad_norm: 0.8554124166726836, iteration: 213779
loss: 1.0244882106781006,grad_norm: 0.9256664573232956, iteration: 213780
loss: 1.0197776556015015,grad_norm: 0.9999990190490133, iteration: 213781
loss: 0.9861945509910583,grad_norm: 0.8275305389055593, iteration: 213782
loss: 1.0507844686508179,grad_norm: 0.8014172769202622, iteration: 213783
loss: 1.0138083696365356,grad_norm: 0.8645916204375342, iteration: 213784
loss: 1.0062015056610107,grad_norm: 0.9553102386200083, iteration: 213785
loss: 1.0117533206939697,grad_norm: 0.7364517794065785, iteration: 213786
loss: 1.0158685445785522,grad_norm: 0.8730033204690658, iteration: 213787
loss: 1.0062930583953857,grad_norm: 0.8890657133647344, iteration: 213788
loss: 0.9789000153541565,grad_norm: 0.9999990333284448, iteration: 213789
loss: 1.017329454421997,grad_norm: 0.9999990570443114, iteration: 213790
loss: 1.0267384052276611,grad_norm: 0.9999990488591539, iteration: 213791
loss: 0.9766713380813599,grad_norm: 0.9027732023240171, iteration: 213792
loss: 0.948212206363678,grad_norm: 0.8817876708949942, iteration: 213793
loss: 1.0490912199020386,grad_norm: 0.847738957446276, iteration: 213794
loss: 1.023831844329834,grad_norm: 0.998744938924287, iteration: 213795
loss: 0.9727703928947449,grad_norm: 0.9999992725895371, iteration: 213796
loss: 1.0061001777648926,grad_norm: 0.8849932133082323, iteration: 213797
loss: 0.9743036031723022,grad_norm: 0.8407026872320645, iteration: 213798
loss: 0.9891816973686218,grad_norm: 0.9255470905415767, iteration: 213799
loss: 0.9901110529899597,grad_norm: 0.851645356618395, iteration: 213800
loss: 0.9720502495765686,grad_norm: 0.9656532348968696, iteration: 213801
loss: 0.9971112608909607,grad_norm: 0.9999991185786545, iteration: 213802
loss: 0.9933273196220398,grad_norm: 0.9160301442511769, iteration: 213803
loss: 0.9708493947982788,grad_norm: 0.9436560005648924, iteration: 213804
loss: 1.0088508129119873,grad_norm: 0.9281656773558625, iteration: 213805
loss: 1.0019471645355225,grad_norm: 0.8189572225360591, iteration: 213806
loss: 0.9766382575035095,grad_norm: 0.8937428564008327, iteration: 213807
loss: 1.0424610376358032,grad_norm: 0.9999991001301962, iteration: 213808
loss: 0.9966536164283752,grad_norm: 0.8250712233595408, iteration: 213809
loss: 0.97395920753479,grad_norm: 0.9196912020710032, iteration: 213810
loss: 1.0291481018066406,grad_norm: 0.9999996235711405, iteration: 213811
loss: 1.0003215074539185,grad_norm: 0.8259591246176002, iteration: 213812
loss: 1.0284689664840698,grad_norm: 0.8945929851819944, iteration: 213813
loss: 1.0578324794769287,grad_norm: 0.9999999658941457, iteration: 213814
loss: 1.0095276832580566,grad_norm: 0.9215379275632773, iteration: 213815
loss: 1.0924129486083984,grad_norm: 0.9999992894379087, iteration: 213816
loss: 0.9957554340362549,grad_norm: 0.8809815123093173, iteration: 213817
loss: 0.9925868511199951,grad_norm: 0.9041841277244815, iteration: 213818
loss: 1.0004448890686035,grad_norm: 0.8658321792941294, iteration: 213819
loss: 0.979850172996521,grad_norm: 0.9999991149681304, iteration: 213820
loss: 0.9717323780059814,grad_norm: 0.804613101112158, iteration: 213821
loss: 1.0184340476989746,grad_norm: 0.8955213253929476, iteration: 213822
loss: 0.9860844612121582,grad_norm: 0.9078836964421063, iteration: 213823
loss: 0.9980069398880005,grad_norm: 0.9101473313565505, iteration: 213824
loss: 1.004496693611145,grad_norm: 0.7792206090616037, iteration: 213825
loss: 1.0055766105651855,grad_norm: 0.9999991636836317, iteration: 213826
loss: 0.9857504367828369,grad_norm: 0.9999991059201372, iteration: 213827
loss: 1.0182781219482422,grad_norm: 0.9962649234824792, iteration: 213828
loss: 0.9948832988739014,grad_norm: 0.9414524192916476, iteration: 213829
loss: 0.9762771129608154,grad_norm: 0.9433591458788506, iteration: 213830
loss: 1.0213727951049805,grad_norm: 0.9686960523212638, iteration: 213831
loss: 0.987146258354187,grad_norm: 0.8529392837166926, iteration: 213832
loss: 0.9885523319244385,grad_norm: 0.9532880542288805, iteration: 213833
loss: 0.9580582976341248,grad_norm: 0.9999991294051531, iteration: 213834
loss: 0.9968227744102478,grad_norm: 0.9828074210273992, iteration: 213835
loss: 1.0063921213150024,grad_norm: 0.8381658103630252, iteration: 213836
loss: 1.0074410438537598,grad_norm: 0.9165666057748795, iteration: 213837
loss: 0.9788675308227539,grad_norm: 0.941760834735425, iteration: 213838
loss: 0.9689944386482239,grad_norm: 0.9999991634711745, iteration: 213839
loss: 1.016252875328064,grad_norm: 0.8432860016834038, iteration: 213840
loss: 0.9852269291877747,grad_norm: 0.9999992003124946, iteration: 213841
loss: 1.0150388479232788,grad_norm: 0.999999193176758, iteration: 213842
loss: 0.9757639765739441,grad_norm: 0.9999992102344158, iteration: 213843
loss: 0.9719521403312683,grad_norm: 0.9041913139020308, iteration: 213844
loss: 1.0340038537979126,grad_norm: 0.942275127719759, iteration: 213845
loss: 1.0081382989883423,grad_norm: 0.9999998998451621, iteration: 213846
loss: 1.0205204486846924,grad_norm: 0.9999991219452039, iteration: 213847
loss: 0.9689915776252747,grad_norm: 0.7668648813867173, iteration: 213848
loss: 1.0100021362304688,grad_norm: 0.8702653231162686, iteration: 213849
loss: 0.9262048602104187,grad_norm: 0.9694380847239343, iteration: 213850
loss: 1.0211206674575806,grad_norm: 0.8660311409395133, iteration: 213851
loss: 1.0262279510498047,grad_norm: 0.9999997640026779, iteration: 213852
loss: 1.0346221923828125,grad_norm: 0.9001089704504129, iteration: 213853
loss: 1.0215808153152466,grad_norm: 0.9999990454352861, iteration: 213854
loss: 0.9794085621833801,grad_norm: 0.795273930194381, iteration: 213855
loss: 1.0107033252716064,grad_norm: 0.9999991305535042, iteration: 213856
loss: 0.9905259609222412,grad_norm: 0.9867275221921478, iteration: 213857
loss: 1.0055440664291382,grad_norm: 0.9999990580722546, iteration: 213858
loss: 1.0539408922195435,grad_norm: 0.8139125579396755, iteration: 213859
loss: 1.0392142534255981,grad_norm: 0.9999999988695762, iteration: 213860
loss: 1.0034419298171997,grad_norm: 0.9290934796630929, iteration: 213861
loss: 1.0256320238113403,grad_norm: 0.9116337541531625, iteration: 213862
loss: 1.010947823524475,grad_norm: 0.978363625370243, iteration: 213863
loss: 1.0004446506500244,grad_norm: 0.8735035722776494, iteration: 213864
loss: 1.0021389722824097,grad_norm: 0.9939166603606039, iteration: 213865
loss: 1.0168508291244507,grad_norm: 0.9793932405165667, iteration: 213866
loss: 1.0109901428222656,grad_norm: 0.9085352335277416, iteration: 213867
loss: 0.9705672860145569,grad_norm: 0.9999991275075463, iteration: 213868
loss: 1.0001145601272583,grad_norm: 0.9444293892466679, iteration: 213869
loss: 1.0090566873550415,grad_norm: 0.9216789504110027, iteration: 213870
loss: 1.010684847831726,grad_norm: 0.9999990295848622, iteration: 213871
loss: 1.0309685468673706,grad_norm: 0.9999991578904923, iteration: 213872
loss: 1.029670000076294,grad_norm: 0.8493731094528435, iteration: 213873
loss: 1.0149561166763306,grad_norm: 0.9999991733394753, iteration: 213874
loss: 1.004763126373291,grad_norm: 0.9925124866199403, iteration: 213875
loss: 1.0073870420455933,grad_norm: 0.9999991574550942, iteration: 213876
loss: 1.0249192714691162,grad_norm: 0.9999990672523461, iteration: 213877
loss: 1.0266697406768799,grad_norm: 0.9789017852771389, iteration: 213878
loss: 1.0527979135513306,grad_norm: 0.8540716367379243, iteration: 213879
loss: 0.9332652688026428,grad_norm: 0.8890147629962272, iteration: 213880
loss: 1.0426688194274902,grad_norm: 0.9999994072753641, iteration: 213881
loss: 0.9608110189437866,grad_norm: 0.919759518134941, iteration: 213882
loss: 0.9799964427947998,grad_norm: 0.8799550600067328, iteration: 213883
loss: 0.9852668642997742,grad_norm: 0.942327802303403, iteration: 213884
loss: 0.9974253177642822,grad_norm: 0.8613572516417926, iteration: 213885
loss: 1.0425440073013306,grad_norm: 0.9139271287531001, iteration: 213886
loss: 1.0061765909194946,grad_norm: 0.9999990449729927, iteration: 213887
loss: 1.0094072818756104,grad_norm: 0.963615043217138, iteration: 213888
loss: 0.9801267385482788,grad_norm: 0.8548925774627615, iteration: 213889
loss: 1.0124341249465942,grad_norm: 0.8904263277506459, iteration: 213890
loss: 0.9764187335968018,grad_norm: 0.9456341783762137, iteration: 213891
loss: 0.9969524145126343,grad_norm: 0.9239149557925306, iteration: 213892
loss: 1.0063447952270508,grad_norm: 0.9050781472171807, iteration: 213893
loss: 0.9851384162902832,grad_norm: 0.8075704261179796, iteration: 213894
loss: 1.0963592529296875,grad_norm: 0.9368959540062516, iteration: 213895
loss: 0.9990987777709961,grad_norm: 0.930115410598514, iteration: 213896
loss: 1.0064412355422974,grad_norm: 0.9306033375196379, iteration: 213897
loss: 0.9968995451927185,grad_norm: 0.9558663230171801, iteration: 213898
loss: 0.9647966027259827,grad_norm: 0.9235897267088576, iteration: 213899
loss: 0.9879367351531982,grad_norm: 0.9000399844580463, iteration: 213900
loss: 1.0081473588943481,grad_norm: 0.9999998856990543, iteration: 213901
loss: 1.0859217643737793,grad_norm: 0.999999055734053, iteration: 213902
loss: 1.0264270305633545,grad_norm: 0.9999997455496159, iteration: 213903
loss: 0.9999731183052063,grad_norm: 0.841322132836301, iteration: 213904
loss: 0.9874568581581116,grad_norm: 0.9355248173648959, iteration: 213905
loss: 0.9923871755599976,grad_norm: 0.890928576947933, iteration: 213906
loss: 1.0087192058563232,grad_norm: 0.9999990732386502, iteration: 213907
loss: 1.0498147010803223,grad_norm: 0.9846340417480316, iteration: 213908
loss: 0.987831711769104,grad_norm: 0.9999990652555283, iteration: 213909
loss: 0.9808943867683411,grad_norm: 0.9999990965491067, iteration: 213910
loss: 0.9888144135475159,grad_norm: 0.8916787723963352, iteration: 213911
loss: 0.9761843681335449,grad_norm: 0.9617148415968038, iteration: 213912
loss: 0.977100133895874,grad_norm: 0.939622239055564, iteration: 213913
loss: 1.019757866859436,grad_norm: 0.999998943176479, iteration: 213914
loss: 0.9734354615211487,grad_norm: 0.7230182542122183, iteration: 213915
loss: 1.0149496793746948,grad_norm: 0.8543527405277475, iteration: 213916
loss: 1.0266867876052856,grad_norm: 0.9999989694298421, iteration: 213917
loss: 1.0313400030136108,grad_norm: 0.9999997914628175, iteration: 213918
loss: 0.9889755845069885,grad_norm: 0.9022616123455326, iteration: 213919
loss: 1.039584994316101,grad_norm: 0.9999995881801161, iteration: 213920
loss: 1.0645922422409058,grad_norm: 1.0000001019809217, iteration: 213921
loss: 1.0431339740753174,grad_norm: 0.9999996555806433, iteration: 213922
loss: 0.9894677996635437,grad_norm: 0.9999996636141599, iteration: 213923
loss: 1.0128281116485596,grad_norm: 0.9999991873802284, iteration: 213924
loss: 1.0292047262191772,grad_norm: 0.8021568072432934, iteration: 213925
loss: 1.002027988433838,grad_norm: 0.9999991124199433, iteration: 213926
loss: 1.0378038883209229,grad_norm: 0.9999995311458515, iteration: 213927
loss: 1.0649601221084595,grad_norm: 0.9999997422413767, iteration: 213928
loss: 1.0207514762878418,grad_norm: 0.9999992597980815, iteration: 213929
loss: 1.005751371383667,grad_norm: 0.9488900925919393, iteration: 213930
loss: 1.0439141988754272,grad_norm: 0.9999998180904254, iteration: 213931
loss: 1.007075548171997,grad_norm: 0.8083696201285618, iteration: 213932
loss: 0.9997054934501648,grad_norm: 0.999999166438851, iteration: 213933
loss: 1.0230664014816284,grad_norm: 0.9113182215058733, iteration: 213934
loss: 1.09983229637146,grad_norm: 0.8870267973817355, iteration: 213935
loss: 0.9975944757461548,grad_norm: 0.8798187801930284, iteration: 213936
loss: 1.044487714767456,grad_norm: 0.954922091537488, iteration: 213937
loss: 0.9714226722717285,grad_norm: 0.8638191087070507, iteration: 213938
loss: 1.0968655347824097,grad_norm: 0.9999994484416337, iteration: 213939
loss: 1.0046789646148682,grad_norm: 0.9999990852019425, iteration: 213940
loss: 0.9997736215591431,grad_norm: 0.8919075323956457, iteration: 213941
loss: 1.0011519193649292,grad_norm: 0.9999990863169141, iteration: 213942
loss: 1.039183259010315,grad_norm: 0.9999998408030388, iteration: 213943
loss: 1.0545638799667358,grad_norm: 0.9999993170766915, iteration: 213944
loss: 0.9489922523498535,grad_norm: 0.8913785664468655, iteration: 213945
loss: 1.0424141883850098,grad_norm: 0.9170587241073058, iteration: 213946
loss: 0.9635206460952759,grad_norm: 0.8678198590047825, iteration: 213947
loss: 0.9830195903778076,grad_norm: 0.9475788865992556, iteration: 213948
loss: 1.0119974613189697,grad_norm: 0.9999991941184955, iteration: 213949
loss: 0.9928341507911682,grad_norm: 0.8662306301268627, iteration: 213950
loss: 1.0328147411346436,grad_norm: 0.9999993488521697, iteration: 213951
loss: 1.0249345302581787,grad_norm: 0.8786306032870532, iteration: 213952
loss: 0.983905017375946,grad_norm: 0.9999992408288273, iteration: 213953
loss: 1.0091460943222046,grad_norm: 0.9999991752263037, iteration: 213954
loss: 1.029168725013733,grad_norm: 0.9332596594724151, iteration: 213955
loss: 1.1140836477279663,grad_norm: 0.9999998233425054, iteration: 213956
loss: 0.9817867279052734,grad_norm: 0.9999990114704077, iteration: 213957
loss: 0.9607594609260559,grad_norm: 0.9094665522234792, iteration: 213958
loss: 1.0208607912063599,grad_norm: 0.8738860636540435, iteration: 213959
loss: 0.9886816143989563,grad_norm: 0.8836642536071655, iteration: 213960
loss: 1.0327022075653076,grad_norm: 0.9760974125179883, iteration: 213961
loss: 1.0325499773025513,grad_norm: 0.9091373583835418, iteration: 213962
loss: 1.0216090679168701,grad_norm: 0.8988156587790986, iteration: 213963
loss: 1.026444673538208,grad_norm: 0.9999990748015016, iteration: 213964
loss: 1.0038021802902222,grad_norm: 0.7440186479431393, iteration: 213965
loss: 1.0029082298278809,grad_norm: 0.9999994182859991, iteration: 213966
loss: 1.0115267038345337,grad_norm: 0.9389681835869298, iteration: 213967
loss: 1.006001353263855,grad_norm: 0.9995123758346707, iteration: 213968
loss: 0.9952401518821716,grad_norm: 0.8250711705327762, iteration: 213969
loss: 0.9689836502075195,grad_norm: 0.9460410019773176, iteration: 213970
loss: 1.009639859199524,grad_norm: 0.9999990083445195, iteration: 213971
loss: 0.9901443123817444,grad_norm: 0.9999993106812213, iteration: 213972
loss: 0.9822648763656616,grad_norm: 0.9453455533307099, iteration: 213973
loss: 1.02545964717865,grad_norm: 0.9031531674252431, iteration: 213974
loss: 0.9786801338195801,grad_norm: 0.9999991020083007, iteration: 213975
loss: 0.9602696299552917,grad_norm: 0.9086200603698757, iteration: 213976
loss: 1.0193620920181274,grad_norm: 0.9038617241262968, iteration: 213977
loss: 0.9856821298599243,grad_norm: 0.8075715820524112, iteration: 213978
loss: 0.9786194562911987,grad_norm: 0.8346637864780321, iteration: 213979
loss: 0.9966172575950623,grad_norm: 0.9999990690929625, iteration: 213980
loss: 0.9697139263153076,grad_norm: 0.9235808105185757, iteration: 213981
loss: 0.9953649044036865,grad_norm: 0.892397796270925, iteration: 213982
loss: 0.9890711307525635,grad_norm: 0.8544595762667966, iteration: 213983
loss: 1.0291004180908203,grad_norm: 0.8971147662013509, iteration: 213984
loss: 1.0306735038757324,grad_norm: 0.99999945500247, iteration: 213985
loss: 0.9945735335350037,grad_norm: 0.9019868342295723, iteration: 213986
loss: 1.02043616771698,grad_norm: 0.9091082680502953, iteration: 213987
loss: 0.9818260073661804,grad_norm: 0.9999990971919299, iteration: 213988
loss: 1.0046429634094238,grad_norm: 0.9676174850393233, iteration: 213989
loss: 0.9554207921028137,grad_norm: 0.9476908891762614, iteration: 213990
loss: 0.9916521310806274,grad_norm: 0.9999993911845084, iteration: 213991
loss: 0.9729093313217163,grad_norm: 0.9999990035078933, iteration: 213992
loss: 0.9993959665298462,grad_norm: 0.9999991574387959, iteration: 213993
loss: 1.0293582677841187,grad_norm: 0.9999990408417199, iteration: 213994
loss: 1.079184889793396,grad_norm: 0.9999992287073858, iteration: 213995
loss: 0.9917304515838623,grad_norm: 0.9480700155810086, iteration: 213996
loss: 0.9815899729728699,grad_norm: 0.9013597786254006, iteration: 213997
loss: 1.0128921270370483,grad_norm: 0.9999991675507947, iteration: 213998
loss: 0.9693109393119812,grad_norm: 0.9999990764208521, iteration: 213999
loss: 0.9715145826339722,grad_norm: 0.9999990833824638, iteration: 214000
loss: 1.0217437744140625,grad_norm: 0.8119708522623498, iteration: 214001
loss: 0.9915461540222168,grad_norm: 0.8726596683745776, iteration: 214002
loss: 1.0032806396484375,grad_norm: 0.8654626801324661, iteration: 214003
loss: 0.9842429757118225,grad_norm: 0.8707024609802858, iteration: 214004
loss: 0.9818911552429199,grad_norm: 0.9744143868676707, iteration: 214005
loss: 1.0068883895874023,grad_norm: 0.9999991244630293, iteration: 214006
loss: 1.021899938583374,grad_norm: 0.9999993563779843, iteration: 214007
loss: 1.031780481338501,grad_norm: 0.9999989981185039, iteration: 214008
loss: 1.0124397277832031,grad_norm: 0.8560854547873252, iteration: 214009
loss: 0.9980331063270569,grad_norm: 0.8446576391269744, iteration: 214010
loss: 0.9868459105491638,grad_norm: 0.9999990402875316, iteration: 214011
loss: 1.0184544324874878,grad_norm: 0.973389335345668, iteration: 214012
loss: 1.0117435455322266,grad_norm: 0.8345245408159657, iteration: 214013
loss: 1.0050523281097412,grad_norm: 0.9861961704988574, iteration: 214014
loss: 0.9444620609283447,grad_norm: 0.8889975354260534, iteration: 214015
loss: 0.9634360671043396,grad_norm: 0.9999997728232348, iteration: 214016
loss: 1.045316457748413,grad_norm: 0.9093186841927035, iteration: 214017
loss: 0.9748994708061218,grad_norm: 0.8935523183465515, iteration: 214018
loss: 1.0070488452911377,grad_norm: 0.9999996574535549, iteration: 214019
loss: 0.9987204074859619,grad_norm: 0.9810612327517239, iteration: 214020
loss: 1.0106157064437866,grad_norm: 0.8035742201642891, iteration: 214021
loss: 1.0106861591339111,grad_norm: 0.8432846709738674, iteration: 214022
loss: 1.0128593444824219,grad_norm: 0.9105881865202041, iteration: 214023
loss: 1.0172067880630493,grad_norm: 0.9999989775383432, iteration: 214024
loss: 0.9807011485099792,grad_norm: 0.9999990268331962, iteration: 214025
loss: 0.9556659460067749,grad_norm: 0.9051294797450129, iteration: 214026
loss: 1.0064077377319336,grad_norm: 0.8996546984859803, iteration: 214027
loss: 0.933563232421875,grad_norm: 0.8038612989396515, iteration: 214028
loss: 1.005748987197876,grad_norm: 0.9347316900738613, iteration: 214029
loss: 1.024347186088562,grad_norm: 0.8133926072161696, iteration: 214030
loss: 0.9992625117301941,grad_norm: 0.8271678665662847, iteration: 214031
loss: 0.9802874326705933,grad_norm: 0.9999990650045278, iteration: 214032
loss: 0.9740329384803772,grad_norm: 0.8744516137630879, iteration: 214033
loss: 1.0297126770019531,grad_norm: 0.8358067102162196, iteration: 214034
loss: 0.9682044386863708,grad_norm: 0.9644908301364598, iteration: 214035
loss: 1.0089948177337646,grad_norm: 0.9999990551003811, iteration: 214036
loss: 1.0063896179199219,grad_norm: 0.9006143615762513, iteration: 214037
loss: 1.0104080438613892,grad_norm: 0.9835629644935837, iteration: 214038
loss: 1.023822546005249,grad_norm: 0.7701532962027755, iteration: 214039
loss: 0.9757418036460876,grad_norm: 0.8617924043960973, iteration: 214040
loss: 0.976334273815155,grad_norm: 0.9999991993226719, iteration: 214041
loss: 1.0113767385482788,grad_norm: 0.8274740659313552, iteration: 214042
loss: 1.0283466577529907,grad_norm: 0.9999997812838999, iteration: 214043
loss: 1.007315754890442,grad_norm: 0.8846555972526461, iteration: 214044
loss: 0.9587185978889465,grad_norm: 0.8496446022349982, iteration: 214045
loss: 1.0294219255447388,grad_norm: 0.9561651207163304, iteration: 214046
loss: 1.010501742362976,grad_norm: 0.9999990670337423, iteration: 214047
loss: 0.96563321352005,grad_norm: 0.7988489279469575, iteration: 214048
loss: 0.9998705983161926,grad_norm: 0.8503468808309087, iteration: 214049
loss: 1.0086349248886108,grad_norm: 0.7555559376003066, iteration: 214050
loss: 1.0045373439788818,grad_norm: 0.9056131908704749, iteration: 214051
loss: 1.0369385480880737,grad_norm: 0.9999991887636636, iteration: 214052
loss: 1.0134278535842896,grad_norm: 0.949707970795317, iteration: 214053
loss: 0.9827861189842224,grad_norm: 0.9999991511440148, iteration: 214054
loss: 1.0249991416931152,grad_norm: 0.901486374039966, iteration: 214055
loss: 1.0013518333435059,grad_norm: 0.8947424971856313, iteration: 214056
loss: 1.01095449924469,grad_norm: 0.9999992960458267, iteration: 214057
loss: 1.0026525259017944,grad_norm: 0.9394076280390933, iteration: 214058
loss: 1.02445650100708,grad_norm: 0.8897949111416704, iteration: 214059
loss: 1.0270546674728394,grad_norm: 0.8722652082683431, iteration: 214060
loss: 0.9977214336395264,grad_norm: 0.925624968496467, iteration: 214061
loss: 0.9703581929206848,grad_norm: 0.9999991415474151, iteration: 214062
loss: 1.0348966121673584,grad_norm: 0.9308732539241622, iteration: 214063
loss: 0.9896561503410339,grad_norm: 0.9012017337065615, iteration: 214064
loss: 0.9844827055931091,grad_norm: 0.9394153667893077, iteration: 214065
loss: 1.0149754285812378,grad_norm: 0.9999991271584933, iteration: 214066
loss: 1.0323702096939087,grad_norm: 0.9999992584140988, iteration: 214067
loss: 1.015588641166687,grad_norm: 0.8082152475681667, iteration: 214068
loss: 1.0129554271697998,grad_norm: 0.9999991075292326, iteration: 214069
loss: 1.002305269241333,grad_norm: 0.7739705099911887, iteration: 214070
loss: 0.9916910529136658,grad_norm: 0.9221032016788697, iteration: 214071
loss: 0.9853530526161194,grad_norm: 0.9999990955701029, iteration: 214072
loss: 1.0025038719177246,grad_norm: 0.9999989660108761, iteration: 214073
loss: 1.032045841217041,grad_norm: 0.9999992537634231, iteration: 214074
loss: 0.9998171329498291,grad_norm: 0.8425235764377735, iteration: 214075
loss: 0.9960366487503052,grad_norm: 0.8898640790409877, iteration: 214076
loss: 0.9650935530662537,grad_norm: 0.8543806331030251, iteration: 214077
loss: 0.9966615438461304,grad_norm: 0.999999183922009, iteration: 214078
loss: 0.996699869632721,grad_norm: 0.8719261378648466, iteration: 214079
loss: 1.0147206783294678,grad_norm: 0.9999991314793522, iteration: 214080
loss: 0.9831318855285645,grad_norm: 0.9368614845810076, iteration: 214081
loss: 0.9678803086280823,grad_norm: 0.9999991875070023, iteration: 214082
loss: 0.9710564613342285,grad_norm: 0.9648207991506444, iteration: 214083
loss: 1.0217665433883667,grad_norm: 0.9093161581742539, iteration: 214084
loss: 1.0132776498794556,grad_norm: 0.909311126964678, iteration: 214085
loss: 0.9854377508163452,grad_norm: 0.8935480693830743, iteration: 214086
loss: 1.0204373598098755,grad_norm: 0.9286782701129266, iteration: 214087
loss: 0.9844687581062317,grad_norm: 0.8063906604281551, iteration: 214088
loss: 0.9878793954849243,grad_norm: 0.8818942059518915, iteration: 214089
loss: 0.9810193777084351,grad_norm: 0.9999989341818705, iteration: 214090
loss: 0.9988991022109985,grad_norm: 0.9130931617650021, iteration: 214091
loss: 1.0268784761428833,grad_norm: 0.9999990541594437, iteration: 214092
loss: 0.9676175713539124,grad_norm: 0.9727607304357975, iteration: 214093
loss: 0.99466472864151,grad_norm: 0.9688042843624459, iteration: 214094
loss: 1.0171079635620117,grad_norm: 0.959396447435544, iteration: 214095
loss: 1.0010586977005005,grad_norm: 0.921444633632303, iteration: 214096
loss: 1.1040534973144531,grad_norm: 0.9999994199540709, iteration: 214097
loss: 1.0489497184753418,grad_norm: 0.9999998474779451, iteration: 214098
loss: 0.9826748967170715,grad_norm: 0.984325056897921, iteration: 214099
loss: 0.9893602728843689,grad_norm: 0.8865356737765546, iteration: 214100
loss: 0.9948924779891968,grad_norm: 0.7651589426832531, iteration: 214101
loss: 1.0127677917480469,grad_norm: 0.8364642817674026, iteration: 214102
loss: 0.9665190577507019,grad_norm: 0.9169522156995539, iteration: 214103
loss: 0.9985846281051636,grad_norm: 0.9124091343517299, iteration: 214104
loss: 0.9909458160400391,grad_norm: 0.9454913738660602, iteration: 214105
loss: 0.9716150164604187,grad_norm: 0.9839761809124519, iteration: 214106
loss: 1.009772539138794,grad_norm: 0.9999990446693757, iteration: 214107
loss: 1.0176695585250854,grad_norm: 0.9417526700261791, iteration: 214108
loss: 0.9527316093444824,grad_norm: 0.9955611943329837, iteration: 214109
loss: 1.0035823583602905,grad_norm: 0.8837038215876235, iteration: 214110
loss: 1.0002398490905762,grad_norm: 0.8698831166564502, iteration: 214111
loss: 0.9985786080360413,grad_norm: 0.9999990703840079, iteration: 214112
loss: 1.0123306512832642,grad_norm: 0.7814247848108548, iteration: 214113
loss: 1.0024523735046387,grad_norm: 0.9446118097124396, iteration: 214114
loss: 0.9728119969367981,grad_norm: 0.926669049436219, iteration: 214115
loss: 1.0532500743865967,grad_norm: 0.9598661375273985, iteration: 214116
loss: 1.0238724946975708,grad_norm: 0.9999998783525834, iteration: 214117
loss: 0.9684710502624512,grad_norm: 0.833596488653182, iteration: 214118
loss: 0.9937522411346436,grad_norm: 0.9029432053069045, iteration: 214119
loss: 0.9938637018203735,grad_norm: 0.9999991813641431, iteration: 214120
loss: 1.0141513347625732,grad_norm: 0.9082586613999946, iteration: 214121
loss: 0.961185872554779,grad_norm: 0.9999992593698841, iteration: 214122
loss: 0.9527787566184998,grad_norm: 0.9986241025610582, iteration: 214123
loss: 0.9862529635429382,grad_norm: 0.9999995368238442, iteration: 214124
loss: 1.0076614618301392,grad_norm: 0.9999991241560311, iteration: 214125
loss: 1.0061076879501343,grad_norm: 0.9999990199016691, iteration: 214126
loss: 1.0207136869430542,grad_norm: 0.8539836703414304, iteration: 214127
loss: 1.0408881902694702,grad_norm: 0.9571926811613317, iteration: 214128
loss: 1.1536402702331543,grad_norm: 0.9979155642996486, iteration: 214129
loss: 0.9931663870811462,grad_norm: 0.9064318464377287, iteration: 214130
loss: 1.0099729299545288,grad_norm: 0.8195707967809953, iteration: 214131
loss: 0.9728890657424927,grad_norm: 0.9274129079758487, iteration: 214132
loss: 1.0154187679290771,grad_norm: 0.9999993329297214, iteration: 214133
loss: 0.9818254113197327,grad_norm: 0.9999991212317203, iteration: 214134
loss: 0.9841238856315613,grad_norm: 0.8953793955668098, iteration: 214135
loss: 0.9614931344985962,grad_norm: 0.8830818917750765, iteration: 214136
loss: 1.0824453830718994,grad_norm: 0.9999990426773067, iteration: 214137
loss: 1.0148268938064575,grad_norm: 0.9047081942880477, iteration: 214138
loss: 1.006270408630371,grad_norm: 0.9692186691340289, iteration: 214139
loss: 0.9809531569480896,grad_norm: 0.9881489214550558, iteration: 214140
loss: 1.013297438621521,grad_norm: 0.999999061828625, iteration: 214141
loss: 1.0202610492706299,grad_norm: 0.9999994836917484, iteration: 214142
loss: 1.0038809776306152,grad_norm: 0.9999991327246756, iteration: 214143
loss: 0.9916464686393738,grad_norm: 0.9037545864614116, iteration: 214144
loss: 1.0042850971221924,grad_norm: 0.812732203901137, iteration: 214145
loss: 0.998184859752655,grad_norm: 0.9771699909581808, iteration: 214146
loss: 0.9627028107643127,grad_norm: 0.933515257128016, iteration: 214147
loss: 0.9751155972480774,grad_norm: 0.9119202414133742, iteration: 214148
loss: 0.9843090772628784,grad_norm: 0.9116435014901106, iteration: 214149
loss: 1.0198016166687012,grad_norm: 0.9999999401565941, iteration: 214150
loss: 1.0186353921890259,grad_norm: 0.9999997325110211, iteration: 214151
loss: 0.9339830875396729,grad_norm: 0.9430350424378213, iteration: 214152
loss: 0.9805818200111389,grad_norm: 0.7521915619997476, iteration: 214153
loss: 0.980678915977478,grad_norm: 0.8429561858703178, iteration: 214154
loss: 1.0351158380508423,grad_norm: 0.8222137110116736, iteration: 214155
loss: 0.9849579334259033,grad_norm: 0.999999889101199, iteration: 214156
loss: 1.004469633102417,grad_norm: 0.8842930694043079, iteration: 214157
loss: 0.9988369345664978,grad_norm: 0.9999993356846704, iteration: 214158
loss: 1.001563549041748,grad_norm: 0.9574761442276389, iteration: 214159
loss: 0.9537824988365173,grad_norm: 0.784370328854144, iteration: 214160
loss: 0.967687726020813,grad_norm: 0.8678987930785487, iteration: 214161
loss: 0.9832346439361572,grad_norm: 0.9629380268079896, iteration: 214162
loss: 1.0285700559616089,grad_norm: 0.9999992677964996, iteration: 214163
loss: 0.9869368076324463,grad_norm: 0.848037255122426, iteration: 214164
loss: 0.9571743607521057,grad_norm: 0.9930477937566277, iteration: 214165
loss: 1.035434603691101,grad_norm: 0.9443029611657274, iteration: 214166
loss: 0.9708391427993774,grad_norm: 0.9999991566952434, iteration: 214167
loss: 0.9622774124145508,grad_norm: 0.9074608707990076, iteration: 214168
loss: 1.0285574197769165,grad_norm: 0.9999991336930086, iteration: 214169
loss: 0.9901931285858154,grad_norm: 0.7995354030813786, iteration: 214170
loss: 0.9980284571647644,grad_norm: 0.9999990381659407, iteration: 214171
loss: 0.9720809459686279,grad_norm: 0.9999991023635236, iteration: 214172
loss: 0.9596471786499023,grad_norm: 0.9405436703088037, iteration: 214173
loss: 1.0174943208694458,grad_norm: 0.8856561043557548, iteration: 214174
loss: 0.9977176189422607,grad_norm: 0.8017755133587423, iteration: 214175
loss: 1.0170537233352661,grad_norm: 0.8804994812710584, iteration: 214176
loss: 0.9746707677841187,grad_norm: 0.9394694986526201, iteration: 214177
loss: 1.008500576019287,grad_norm: 0.913054102710327, iteration: 214178
loss: 0.9803513884544373,grad_norm: 0.9999991645315439, iteration: 214179
loss: 0.9490642547607422,grad_norm: 0.945829163790902, iteration: 214180
loss: 0.9966555833816528,grad_norm: 0.8853985148845703, iteration: 214181
loss: 0.9850122928619385,grad_norm: 0.985805199064072, iteration: 214182
loss: 0.9747621417045593,grad_norm: 0.9014494030270653, iteration: 214183
loss: 1.0124101638793945,grad_norm: 0.8260370458109424, iteration: 214184
loss: 1.0193506479263306,grad_norm: 0.9296748986582737, iteration: 214185
loss: 0.9999286532402039,grad_norm: 0.9443064992807811, iteration: 214186
loss: 0.9954652786254883,grad_norm: 0.9999992379180821, iteration: 214187
loss: 1.0268077850341797,grad_norm: 0.8159355050053901, iteration: 214188
loss: 0.9952442646026611,grad_norm: 0.9288177419299621, iteration: 214189
loss: 1.0417910814285278,grad_norm: 0.9999992191623089, iteration: 214190
loss: 1.0614444017410278,grad_norm: 0.9999990167543186, iteration: 214191
loss: 0.9982138276100159,grad_norm: 0.922979524033717, iteration: 214192
loss: 1.006490707397461,grad_norm: 0.8832939879234085, iteration: 214193
loss: 0.9827818870544434,grad_norm: 0.8202774243504272, iteration: 214194
loss: 1.1674069166183472,grad_norm: 0.9999993499937893, iteration: 214195
loss: 1.0062109231948853,grad_norm: 0.8907069897548378, iteration: 214196
loss: 1.0022461414337158,grad_norm: 0.9550679767795799, iteration: 214197
loss: 1.0150349140167236,grad_norm: 0.9999990272972115, iteration: 214198
loss: 1.0068660974502563,grad_norm: 0.8205092328074246, iteration: 214199
loss: 0.9703400731086731,grad_norm: 0.9999990932593613, iteration: 214200
loss: 1.0095593929290771,grad_norm: 0.881326662805571, iteration: 214201
loss: 0.983470618724823,grad_norm: 0.8947353703305646, iteration: 214202
loss: 1.005828619003296,grad_norm: 0.7264420943711549, iteration: 214203
loss: 0.9971361756324768,grad_norm: 0.9999990952490222, iteration: 214204
loss: 0.9962659478187561,grad_norm: 0.8345487546467489, iteration: 214205
loss: 0.9974842667579651,grad_norm: 0.9657920798114007, iteration: 214206
loss: 0.9697063565254211,grad_norm: 0.8403102219849633, iteration: 214207
loss: 0.9729882478713989,grad_norm: 0.9743040484068701, iteration: 214208
loss: 1.0220263004302979,grad_norm: 0.8537363392655132, iteration: 214209
loss: 1.0428959131240845,grad_norm: 0.9999989310929448, iteration: 214210
loss: 0.9907055497169495,grad_norm: 0.9999993182757755, iteration: 214211
loss: 1.048622727394104,grad_norm: 0.999999950717263, iteration: 214212
loss: 0.9696558117866516,grad_norm: 0.9955926052368603, iteration: 214213
loss: 0.9964373111724854,grad_norm: 0.9999991292166707, iteration: 214214
loss: 1.0311611890792847,grad_norm: 0.8674957578340198, iteration: 214215
loss: 0.9792764782905579,grad_norm: 0.8463550963230602, iteration: 214216
loss: 1.0441185235977173,grad_norm: 0.8960523174815201, iteration: 214217
loss: 0.9950109124183655,grad_norm: 0.8805191555991966, iteration: 214218
loss: 1.0193573236465454,grad_norm: 0.944072157323213, iteration: 214219
loss: 1.0018473863601685,grad_norm: 0.9999990584972686, iteration: 214220
loss: 1.0087320804595947,grad_norm: 0.99999953758547, iteration: 214221
loss: 1.0304832458496094,grad_norm: 0.9899917936231549, iteration: 214222
loss: 1.0150902271270752,grad_norm: 0.9999995675487869, iteration: 214223
loss: 1.0204906463623047,grad_norm: 0.9536358907204286, iteration: 214224
loss: 0.9953981041908264,grad_norm: 0.9999990350804251, iteration: 214225
loss: 0.9881965517997742,grad_norm: 0.9861939232116942, iteration: 214226
loss: 0.9863491654396057,grad_norm: 0.8908285031071691, iteration: 214227
loss: 1.0026775598526,grad_norm: 0.9668234743633588, iteration: 214228
loss: 0.9932327270507812,grad_norm: 0.8596112624492295, iteration: 214229
loss: 0.9986462593078613,grad_norm: 0.799707976124419, iteration: 214230
loss: 1.0119549036026,grad_norm: 0.9361237459779309, iteration: 214231
loss: 1.0069332122802734,grad_norm: 0.8868305343909462, iteration: 214232
loss: 1.0131324529647827,grad_norm: 0.9280667038261048, iteration: 214233
loss: 1.0072507858276367,grad_norm: 0.9999990576628165, iteration: 214234
loss: 0.9564825892448425,grad_norm: 0.90402826214939, iteration: 214235
loss: 1.0023486614227295,grad_norm: 0.9185594275246634, iteration: 214236
loss: 0.9977749586105347,grad_norm: 0.8752596401746037, iteration: 214237
loss: 1.1373049020767212,grad_norm: 0.9999998527366941, iteration: 214238
loss: 0.9938705563545227,grad_norm: 0.9508672430298601, iteration: 214239
loss: 1.0100990533828735,grad_norm: 0.8050658415940002, iteration: 214240
loss: 1.0071122646331787,grad_norm: 0.9681907926284358, iteration: 214241
loss: 0.9828441143035889,grad_norm: 0.9176826538572455, iteration: 214242
loss: 1.108825922012329,grad_norm: 1.000000008071817, iteration: 214243
loss: 0.9805744290351868,grad_norm: 0.9098567829366773, iteration: 214244
loss: 1.000439167022705,grad_norm: 0.9284966244125048, iteration: 214245
loss: 0.957064688205719,grad_norm: 0.8122897932583176, iteration: 214246
loss: 0.997616708278656,grad_norm: 0.8006560285714776, iteration: 214247
loss: 0.9921931028366089,grad_norm: 0.9319289744384467, iteration: 214248
loss: 1.0016371011734009,grad_norm: 0.9893734822384617, iteration: 214249
loss: 1.0102438926696777,grad_norm: 0.9761375237702998, iteration: 214250
loss: 1.008180022239685,grad_norm: 0.8275691576406495, iteration: 214251
loss: 1.0188100337982178,grad_norm: 0.8834413772814848, iteration: 214252
loss: 1.0496103763580322,grad_norm: 0.8596970561947721, iteration: 214253
loss: 1.0133569240570068,grad_norm: 0.9999992534878734, iteration: 214254
loss: 1.0002243518829346,grad_norm: 0.8788715501637979, iteration: 214255
loss: 1.0108704566955566,grad_norm: 0.8873970164439765, iteration: 214256
loss: 1.1816151142120361,grad_norm: 0.9999996292598342, iteration: 214257
loss: 1.0061348676681519,grad_norm: 0.97722374848532, iteration: 214258
loss: 0.9958464503288269,grad_norm: 0.8109517954612062, iteration: 214259
loss: 1.0182050466537476,grad_norm: 0.9057589383040389, iteration: 214260
loss: 1.0188297033309937,grad_norm: 0.9999995210377317, iteration: 214261
loss: 0.9653268456459045,grad_norm: 0.9259864016436556, iteration: 214262
loss: 0.988801896572113,grad_norm: 0.9223978975615741, iteration: 214263
loss: 0.9945394992828369,grad_norm: 0.9249647024082596, iteration: 214264
loss: 0.9906243085861206,grad_norm: 0.8208244176576867, iteration: 214265
loss: 0.9761835336685181,grad_norm: 0.9999991196274977, iteration: 214266
loss: 0.9597401022911072,grad_norm: 0.8396267083110984, iteration: 214267
loss: 1.0211827754974365,grad_norm: 0.9903861836846963, iteration: 214268
loss: 1.0036227703094482,grad_norm: 0.9999991793685735, iteration: 214269
loss: 1.0213977098464966,grad_norm: 0.999999094219827, iteration: 214270
loss: 1.0507023334503174,grad_norm: 0.93489256381015, iteration: 214271
loss: 0.9997079372406006,grad_norm: 0.8733901438593961, iteration: 214272
loss: 1.0159122943878174,grad_norm: 0.9999994329612537, iteration: 214273
loss: 1.0185861587524414,grad_norm: 0.8326530832250197, iteration: 214274
loss: 1.0136765241622925,grad_norm: 0.9999989569020165, iteration: 214275
loss: 0.9639118909835815,grad_norm: 0.999999415910618, iteration: 214276
loss: 1.0654819011688232,grad_norm: 0.9999995244970808, iteration: 214277
loss: 0.9763781428337097,grad_norm: 0.9999990217190562, iteration: 214278
loss: 1.0650126934051514,grad_norm: 0.9999998955488988, iteration: 214279
loss: 1.0269984006881714,grad_norm: 0.9999992234449343, iteration: 214280
loss: 1.0173308849334717,grad_norm: 0.8926125475756079, iteration: 214281
loss: 1.0972813367843628,grad_norm: 0.9999994115464876, iteration: 214282
loss: 1.0168108940124512,grad_norm: 0.7438202278861006, iteration: 214283
loss: 1.105443000793457,grad_norm: 0.999999837583202, iteration: 214284
loss: 0.9729218482971191,grad_norm: 0.9595576983891606, iteration: 214285
loss: 1.0206218957901,grad_norm: 0.953230770963612, iteration: 214286
loss: 1.0629457235336304,grad_norm: 0.9999998129399357, iteration: 214287
loss: 0.9855637550354004,grad_norm: 0.9999990407684014, iteration: 214288
loss: 1.0225210189819336,grad_norm: 0.9938241446173576, iteration: 214289
loss: 1.0026936531066895,grad_norm: 0.9999989679209127, iteration: 214290
loss: 0.9996076822280884,grad_norm: 0.9999999057578747, iteration: 214291
loss: 1.0053832530975342,grad_norm: 0.87704317159725, iteration: 214292
loss: 0.9820607304573059,grad_norm: 0.9999992501727478, iteration: 214293
loss: 0.9909761548042297,grad_norm: 0.9999991504259905, iteration: 214294
loss: 0.9861699342727661,grad_norm: 0.9213957983842757, iteration: 214295
loss: 1.0399079322814941,grad_norm: 0.9273062537325966, iteration: 214296
loss: 1.0887980461120605,grad_norm: 0.9999997413923174, iteration: 214297
loss: 0.9637518525123596,grad_norm: 0.8892017193892345, iteration: 214298
loss: 0.9915900826454163,grad_norm: 0.9825039120599411, iteration: 214299
loss: 0.9655324816703796,grad_norm: 0.9504072434510086, iteration: 214300
loss: 0.9667450785636902,grad_norm: 0.9351625083641458, iteration: 214301
loss: 0.9865043759346008,grad_norm: 0.9606827236484426, iteration: 214302
loss: 0.9579160809516907,grad_norm: 0.906805621074683, iteration: 214303
loss: 1.0521278381347656,grad_norm: 0.9999992658038547, iteration: 214304
loss: 1.0272300243377686,grad_norm: 0.9999993102617115, iteration: 214305
loss: 1.0013326406478882,grad_norm: 0.8281977191154738, iteration: 214306
loss: 1.019453525543213,grad_norm: 0.6917883405589769, iteration: 214307
loss: 0.9735605716705322,grad_norm: 0.9999992718077166, iteration: 214308
loss: 1.04279625415802,grad_norm: 0.993224066607737, iteration: 214309
loss: 1.0168123245239258,grad_norm: 0.9833723596733065, iteration: 214310
loss: 1.0064637660980225,grad_norm: 0.9688779565320663, iteration: 214311
loss: 0.9704030156135559,grad_norm: 0.898519459308427, iteration: 214312
loss: 0.988484263420105,grad_norm: 0.8699359881821189, iteration: 214313
loss: 1.018607497215271,grad_norm: 0.9527237409057594, iteration: 214314
loss: 0.9379191398620605,grad_norm: 0.9999994789236916, iteration: 214315
loss: 1.0525225400924683,grad_norm: 0.9999998383718741, iteration: 214316
loss: 0.9905088543891907,grad_norm: 0.7109375927081572, iteration: 214317
loss: 1.0241693258285522,grad_norm: 0.8847238974547381, iteration: 214318
loss: 1.0005967617034912,grad_norm: 0.9999990626377023, iteration: 214319
loss: 0.9906083941459656,grad_norm: 0.9210458973739585, iteration: 214320
loss: 1.0085865259170532,grad_norm: 0.8839720792334164, iteration: 214321
loss: 0.990860104560852,grad_norm: 0.8595597234584694, iteration: 214322
loss: 0.9937967658042908,grad_norm: 0.9999991027726638, iteration: 214323
loss: 0.9790546298027039,grad_norm: 0.9999998817598695, iteration: 214324
loss: 0.9955514073371887,grad_norm: 0.9999991973169994, iteration: 214325
loss: 0.9878033399581909,grad_norm: 0.8414088734438987, iteration: 214326
loss: 0.9449918866157532,grad_norm: 0.8221490534368603, iteration: 214327
loss: 1.0248608589172363,grad_norm: 0.7885423107550864, iteration: 214328
loss: 0.9969202876091003,grad_norm: 0.9635246180550388, iteration: 214329
loss: 1.0152013301849365,grad_norm: 0.8235436725579701, iteration: 214330
loss: 0.9919739961624146,grad_norm: 0.9999989721709637, iteration: 214331
loss: 0.9946248531341553,grad_norm: 0.9999990418713728, iteration: 214332
loss: 1.0291433334350586,grad_norm: 0.9951582453209017, iteration: 214333
loss: 0.9711427092552185,grad_norm: 0.999999208901042, iteration: 214334
loss: 1.024634838104248,grad_norm: 0.9999991655121786, iteration: 214335
loss: 0.9750354290008545,grad_norm: 0.9679424426214096, iteration: 214336
loss: 1.0400608777999878,grad_norm: 0.9999996210234788, iteration: 214337
loss: 0.9959676861763,grad_norm: 0.8437321831083614, iteration: 214338
loss: 1.0883171558380127,grad_norm: 0.9999999101552047, iteration: 214339
loss: 0.967543363571167,grad_norm: 0.8138055169654637, iteration: 214340
loss: 1.0064644813537598,grad_norm: 0.9999992471465768, iteration: 214341
loss: 1.019733190536499,grad_norm: 0.9776759187009288, iteration: 214342
loss: 0.9918267130851746,grad_norm: 0.999999263355618, iteration: 214343
loss: 1.0190953016281128,grad_norm: 0.8422167444735832, iteration: 214344
loss: 1.0039963722229004,grad_norm: 0.9246746155679706, iteration: 214345
loss: 1.0403857231140137,grad_norm: 0.9999993305947855, iteration: 214346
loss: 0.9534823894500732,grad_norm: 0.956659087982908, iteration: 214347
loss: 0.9689541459083557,grad_norm: 0.9999991377067595, iteration: 214348
loss: 0.9804964661598206,grad_norm: 0.9999989518201277, iteration: 214349
loss: 1.0242395401000977,grad_norm: 0.9364181151748144, iteration: 214350
loss: 0.9384781122207642,grad_norm: 0.8938603257289248, iteration: 214351
loss: 1.0281476974487305,grad_norm: 0.9999991843618963, iteration: 214352
loss: 1.0222935676574707,grad_norm: 0.9999990804561311, iteration: 214353
loss: 1.0118364095687866,grad_norm: 0.8827581838294463, iteration: 214354
loss: 1.0216927528381348,grad_norm: 0.999999175714884, iteration: 214355
loss: 1.0873154401779175,grad_norm: 0.9999997823890278, iteration: 214356
loss: 0.9809090495109558,grad_norm: 0.7992903438105764, iteration: 214357
loss: 1.007567286491394,grad_norm: 0.9999990894129692, iteration: 214358
loss: 1.0195801258087158,grad_norm: 0.999999196637315, iteration: 214359
loss: 0.9790677428245544,grad_norm: 0.9501109415451081, iteration: 214360
loss: 0.9856170415878296,grad_norm: 0.8555223239467078, iteration: 214361
loss: 0.9793452620506287,grad_norm: 0.898444083685015, iteration: 214362
loss: 1.0634490251541138,grad_norm: 0.9999998268569784, iteration: 214363
loss: 1.0048437118530273,grad_norm: 0.8028896367708205, iteration: 214364
loss: 0.9772613048553467,grad_norm: 0.9184702834793517, iteration: 214365
loss: 1.0335816144943237,grad_norm: 0.9999990479669191, iteration: 214366
loss: 1.0100610256195068,grad_norm: 0.9999990515579706, iteration: 214367
loss: 0.9871227145195007,grad_norm: 0.9999992295029294, iteration: 214368
loss: 1.0162572860717773,grad_norm: 0.999998988598621, iteration: 214369
loss: 0.9935824871063232,grad_norm: 0.9999992932816948, iteration: 214370
loss: 1.0292978286743164,grad_norm: 0.9859409596764251, iteration: 214371
loss: 0.9847074747085571,grad_norm: 0.9021996840108242, iteration: 214372
loss: 1.013323426246643,grad_norm: 0.99999910714109, iteration: 214373
loss: 0.9700990915298462,grad_norm: 0.9999991039855087, iteration: 214374
loss: 0.970857560634613,grad_norm: 0.9931739347810704, iteration: 214375
loss: 1.050111174583435,grad_norm: 0.9999993945292845, iteration: 214376
loss: 1.0286829471588135,grad_norm: 0.9902858741550182, iteration: 214377
loss: 0.994098424911499,grad_norm: 0.9999990892217038, iteration: 214378
loss: 1.0297253131866455,grad_norm: 0.9999989760613945, iteration: 214379
loss: 1.0088555812835693,grad_norm: 0.9741361476587895, iteration: 214380
loss: 0.9853283762931824,grad_norm: 0.9537814363412043, iteration: 214381
loss: 0.9864588379859924,grad_norm: 0.9999991147880175, iteration: 214382
loss: 1.0309208631515503,grad_norm: 0.9999992442444049, iteration: 214383
loss: 1.032025694847107,grad_norm: 0.9999990465850304, iteration: 214384
loss: 1.0016106367111206,grad_norm: 0.9236598739853933, iteration: 214385
loss: 0.9754301309585571,grad_norm: 0.9812212005893296, iteration: 214386
loss: 0.9854930639266968,grad_norm: 0.9972841316368424, iteration: 214387
loss: 1.0963079929351807,grad_norm: 0.9999996660433925, iteration: 214388
loss: 0.9818605184555054,grad_norm: 0.9280683479846513, iteration: 214389
loss: 0.954664945602417,grad_norm: 0.9819192514151419, iteration: 214390
loss: 1.009588599205017,grad_norm: 0.827242297566277, iteration: 214391
loss: 1.0155287981033325,grad_norm: 0.8863562100446776, iteration: 214392
loss: 0.9736660122871399,grad_norm: 0.8653377943814896, iteration: 214393
loss: 1.0238162279129028,grad_norm: 0.8723417198761405, iteration: 214394
loss: 1.0162558555603027,grad_norm: 0.8544247383675135, iteration: 214395
loss: 0.9658103585243225,grad_norm: 0.9999990573033285, iteration: 214396
loss: 0.9615585207939148,grad_norm: 0.8644411050040641, iteration: 214397
loss: 0.9875502586364746,grad_norm: 0.9999991634250874, iteration: 214398
loss: 1.0068609714508057,grad_norm: 0.9999990734952668, iteration: 214399
loss: 0.9700720906257629,grad_norm: 0.897593859674272, iteration: 214400
loss: 0.9944418668746948,grad_norm: 0.9270418154500097, iteration: 214401
loss: 0.9948854446411133,grad_norm: 0.7804910145663654, iteration: 214402
loss: 1.0198215246200562,grad_norm: 0.9540030370754611, iteration: 214403
loss: 0.997438371181488,grad_norm: 0.8290055482031033, iteration: 214404
loss: 1.0413293838500977,grad_norm: 0.9999991383479497, iteration: 214405
loss: 0.9873765110969543,grad_norm: 0.9429749624447565, iteration: 214406
loss: 1.009976863861084,grad_norm: 0.9542809416599031, iteration: 214407
loss: 1.0124366283416748,grad_norm: 0.918423251509325, iteration: 214408
loss: 1.0079036951065063,grad_norm: 0.9907700859243785, iteration: 214409
loss: 0.9647178053855896,grad_norm: 0.9999999585692445, iteration: 214410
loss: 0.9930846691131592,grad_norm: 0.9120852058803783, iteration: 214411
loss: 0.996523380279541,grad_norm: 0.9188316887671442, iteration: 214412
loss: 1.0107910633087158,grad_norm: 0.8329143182648362, iteration: 214413
loss: 1.0199131965637207,grad_norm: 0.9999991299152308, iteration: 214414
loss: 0.9800711870193481,grad_norm: 0.9999999477504926, iteration: 214415
loss: 0.982640266418457,grad_norm: 0.9327581705282305, iteration: 214416
loss: 1.0239813327789307,grad_norm: 0.747616698817033, iteration: 214417
loss: 0.950736939907074,grad_norm: 0.898807365752068, iteration: 214418
loss: 1.0305668115615845,grad_norm: 0.9476939970798022, iteration: 214419
loss: 0.9794014096260071,grad_norm: 0.8828376030807141, iteration: 214420
loss: 1.0017319917678833,grad_norm: 0.8258901272438633, iteration: 214421
loss: 1.037294626235962,grad_norm: 0.9999999008952317, iteration: 214422
loss: 1.0007740259170532,grad_norm: 0.8959539136540553, iteration: 214423
loss: 1.0253905057907104,grad_norm: 0.9167430046931381, iteration: 214424
loss: 0.986662745475769,grad_norm: 0.6825401268458097, iteration: 214425
loss: 1.0297625064849854,grad_norm: 0.8437782541966572, iteration: 214426
loss: 1.0183665752410889,grad_norm: 0.9999991396501594, iteration: 214427
loss: 1.0020612478256226,grad_norm: 0.774646543295355, iteration: 214428
loss: 1.0223181247711182,grad_norm: 0.9403372801758347, iteration: 214429
loss: 0.9865623116493225,grad_norm: 0.9999989165912408, iteration: 214430
loss: 1.0325345993041992,grad_norm: 0.9999989812894302, iteration: 214431
loss: 0.9909331798553467,grad_norm: 0.8511946804082274, iteration: 214432
loss: 0.962838888168335,grad_norm: 0.9999992839707164, iteration: 214433
loss: 1.0122066736221313,grad_norm: 0.960469081716233, iteration: 214434
loss: 1.1116764545440674,grad_norm: 0.999999521766968, iteration: 214435
loss: 1.0467944145202637,grad_norm: 0.999999124963641, iteration: 214436
loss: 0.9880850911140442,grad_norm: 0.9999991088795452, iteration: 214437
loss: 0.989974319934845,grad_norm: 0.9041891269902854, iteration: 214438
loss: 1.052121639251709,grad_norm: 0.9999997089583098, iteration: 214439
loss: 1.0353883504867554,grad_norm: 0.9999999872144357, iteration: 214440
loss: 1.000414252281189,grad_norm: 0.8981024021173355, iteration: 214441
loss: 1.0140244960784912,grad_norm: 0.793569788762, iteration: 214442
loss: 0.9885802865028381,grad_norm: 0.9142100556976488, iteration: 214443
loss: 1.0175644159317017,grad_norm: 0.9134819828514567, iteration: 214444
loss: 0.9865973591804504,grad_norm: 0.9999991375628016, iteration: 214445
loss: 1.0235180854797363,grad_norm: 0.9999989967304397, iteration: 214446
loss: 0.9736109972000122,grad_norm: 0.9999995297937385, iteration: 214447
loss: 1.0485846996307373,grad_norm: 0.9200773768251753, iteration: 214448
loss: 0.9935203194618225,grad_norm: 0.9999992553229233, iteration: 214449
loss: 0.9638080596923828,grad_norm: 0.9059836117010176, iteration: 214450
loss: 1.0232151746749878,grad_norm: 0.74214549560786, iteration: 214451
loss: 1.0457348823547363,grad_norm: 0.9999989658548181, iteration: 214452
loss: 1.0049114227294922,grad_norm: 0.9999990307086628, iteration: 214453
loss: 0.9873992800712585,grad_norm: 0.9559137586074496, iteration: 214454
loss: 0.9748578667640686,grad_norm: 0.9999991193431846, iteration: 214455
loss: 1.0320974588394165,grad_norm: 0.999999356411904, iteration: 214456
loss: 1.0111899375915527,grad_norm: 0.9015074317490512, iteration: 214457
loss: 0.9830377101898193,grad_norm: 0.8441625289930069, iteration: 214458
loss: 1.0217254161834717,grad_norm: 0.9999995457033357, iteration: 214459
loss: 0.9996586441993713,grad_norm: 0.834431604147399, iteration: 214460
loss: 1.0524883270263672,grad_norm: 0.9999991708154464, iteration: 214461
loss: 1.0433191061019897,grad_norm: 0.7924254778509799, iteration: 214462
loss: 0.9913999438285828,grad_norm: 0.7687123213994275, iteration: 214463
loss: 1.0094552040100098,grad_norm: 0.9673863064039376, iteration: 214464
loss: 1.087766408920288,grad_norm: 0.9999996363210603, iteration: 214465
loss: 1.0320818424224854,grad_norm: 0.9999994725688335, iteration: 214466
loss: 1.277855634689331,grad_norm: 0.9999994213457672, iteration: 214467
loss: 1.1359000205993652,grad_norm: 0.9999990194150647, iteration: 214468
loss: 1.0214192867279053,grad_norm: 0.9434442516909946, iteration: 214469
loss: 0.9845942258834839,grad_norm: 0.9999994300489695, iteration: 214470
loss: 1.0283076763153076,grad_norm: 0.989543726480287, iteration: 214471
loss: 1.1386257410049438,grad_norm: 0.9999997792818831, iteration: 214472
loss: 1.092939019203186,grad_norm: 0.9999996329170486, iteration: 214473
loss: 1.2076635360717773,grad_norm: 0.9999998748527944, iteration: 214474
loss: 1.008167028427124,grad_norm: 0.9999992948590283, iteration: 214475
loss: 1.0539939403533936,grad_norm: 0.9999996863089853, iteration: 214476
loss: 1.1222120523452759,grad_norm: 0.9999996741749037, iteration: 214477
loss: 1.0049673318862915,grad_norm: 0.8733942210490194, iteration: 214478
loss: 0.9755604863166809,grad_norm: 0.8901284103290624, iteration: 214479
loss: 1.052026629447937,grad_norm: 0.999999399203192, iteration: 214480
loss: 0.998026430606842,grad_norm: 0.9268997541881113, iteration: 214481
loss: 1.00322425365448,grad_norm: 0.9999991005386581, iteration: 214482
loss: 0.9939442276954651,grad_norm: 0.9319830280263588, iteration: 214483
loss: 1.0237579345703125,grad_norm: 0.9707977738758117, iteration: 214484
loss: 0.9538399577140808,grad_norm: 0.9999989387753269, iteration: 214485
loss: 0.9972991943359375,grad_norm: 0.9999995989195699, iteration: 214486
loss: 0.9791929721832275,grad_norm: 0.9999991462711996, iteration: 214487
loss: 0.9983656406402588,grad_norm: 0.8676401339200313, iteration: 214488
loss: 1.0138880014419556,grad_norm: 0.9999998588354002, iteration: 214489
loss: 1.0389996767044067,grad_norm: 0.9999994077494581, iteration: 214490
loss: 1.0222257375717163,grad_norm: 0.9999991650017735, iteration: 214491
loss: 1.01493501663208,grad_norm: 0.9999992198702933, iteration: 214492
loss: 1.0037399530410767,grad_norm: 0.8821917075381212, iteration: 214493
loss: 0.9968599081039429,grad_norm: 0.8240177902879379, iteration: 214494
loss: 1.0058534145355225,grad_norm: 0.9999990978941619, iteration: 214495
loss: 0.9927865862846375,grad_norm: 0.937983993205872, iteration: 214496
loss: 0.9531263709068298,grad_norm: 0.9999989484415869, iteration: 214497
loss: 1.0223301649093628,grad_norm: 0.9350545246282137, iteration: 214498
loss: 1.0205881595611572,grad_norm: 0.9999995654643921, iteration: 214499
loss: 0.9921486377716064,grad_norm: 0.8864136551651124, iteration: 214500
loss: 1.0424473285675049,grad_norm: 0.9999991852858374, iteration: 214501
loss: 1.0050309896469116,grad_norm: 0.8880113432048211, iteration: 214502
loss: 0.9868108630180359,grad_norm: 0.9999995678843798, iteration: 214503
loss: 1.0131038427352905,grad_norm: 0.8052760769764757, iteration: 214504
loss: 1.0620921850204468,grad_norm: 0.9999997834205566, iteration: 214505
loss: 1.0421624183654785,grad_norm: 0.9868065273096859, iteration: 214506
loss: 1.0026757717132568,grad_norm: 0.8646632176310928, iteration: 214507
loss: 0.9488813877105713,grad_norm: 0.831298416114091, iteration: 214508
loss: 1.022202730178833,grad_norm: 0.9133499996849782, iteration: 214509
loss: 0.9901164770126343,grad_norm: 0.7614926588807012, iteration: 214510
loss: 1.0015077590942383,grad_norm: 0.9999998391211167, iteration: 214511
loss: 1.0382940769195557,grad_norm: 0.9999991470314058, iteration: 214512
loss: 0.9939901232719421,grad_norm: 0.9497588437024975, iteration: 214513
loss: 1.0335103273391724,grad_norm: 0.9999991078256071, iteration: 214514
loss: 0.9866485595703125,grad_norm: 0.9999991608706935, iteration: 214515
loss: 0.9863619208335876,grad_norm: 0.8989109272151244, iteration: 214516
loss: 1.0474048852920532,grad_norm: 0.9999996366302436, iteration: 214517
loss: 1.0247178077697754,grad_norm: 0.85962617111092, iteration: 214518
loss: 0.9953506588935852,grad_norm: 0.9822281574845297, iteration: 214519
loss: 1.036193609237671,grad_norm: 0.9999991650660567, iteration: 214520
loss: 0.9973686933517456,grad_norm: 0.8330664112420963, iteration: 214521
loss: 0.9809398055076599,grad_norm: 0.9894788355463255, iteration: 214522
loss: 0.981962263584137,grad_norm: 0.8427120544182816, iteration: 214523
loss: 1.103137493133545,grad_norm: 0.999999574482057, iteration: 214524
loss: 0.9855687022209167,grad_norm: 0.8718311049243215, iteration: 214525
loss: 1.041959524154663,grad_norm: 0.9999990978433149, iteration: 214526
loss: 1.021902084350586,grad_norm: 0.9999991315430704, iteration: 214527
loss: 1.0116299390792847,grad_norm: 0.9180092132499673, iteration: 214528
loss: 1.027457356452942,grad_norm: 0.8808326063766143, iteration: 214529
loss: 0.9767962098121643,grad_norm: 0.9999991504303684, iteration: 214530
loss: 0.999261736869812,grad_norm: 0.9999991467005487, iteration: 214531
loss: 1.0209414958953857,grad_norm: 0.9279930235513324, iteration: 214532
loss: 0.9848546981811523,grad_norm: 0.9999991989659923, iteration: 214533
loss: 1.0337326526641846,grad_norm: 0.9999999129806677, iteration: 214534
loss: 0.9802502989768982,grad_norm: 0.9481575596663561, iteration: 214535
loss: 1.0051312446594238,grad_norm: 0.7758366693819831, iteration: 214536
loss: 1.0109621286392212,grad_norm: 0.999999100836203, iteration: 214537
loss: 0.9642114043235779,grad_norm: 0.9754428817934273, iteration: 214538
loss: 0.9748491048812866,grad_norm: 0.9459595225322381, iteration: 214539
loss: 1.0285131931304932,grad_norm: 0.9999990903449492, iteration: 214540
loss: 0.9872477054595947,grad_norm: 0.9149135584359428, iteration: 214541
loss: 0.9653867483139038,grad_norm: 0.9999997965276735, iteration: 214542
loss: 0.9790273308753967,grad_norm: 0.9999989274930438, iteration: 214543
loss: 1.005586862564087,grad_norm: 0.8767834378628614, iteration: 214544
loss: 1.0197616815567017,grad_norm: 0.8713378261714895, iteration: 214545
loss: 0.9927363395690918,grad_norm: 0.9999991305810431, iteration: 214546
loss: 0.9989333748817444,grad_norm: 0.9525297284089498, iteration: 214547
loss: 0.9957623481750488,grad_norm: 0.7980096484064938, iteration: 214548
loss: 1.0303236246109009,grad_norm: 0.9999991975941954, iteration: 214549
loss: 0.9958180785179138,grad_norm: 0.999999161533602, iteration: 214550
loss: 0.9816713929176331,grad_norm: 0.999999333949211, iteration: 214551
loss: 1.0104857683181763,grad_norm: 0.9999991316290652, iteration: 214552
loss: 1.007993459701538,grad_norm: 0.97231260377604, iteration: 214553
loss: 0.9765050411224365,grad_norm: 0.8214787829471292, iteration: 214554
loss: 1.0108720064163208,grad_norm: 0.8628012565465791, iteration: 214555
loss: 0.9848352074623108,grad_norm: 0.9242896800367033, iteration: 214556
loss: 0.9769145846366882,grad_norm: 0.8432599629687796, iteration: 214557
loss: 1.0030505657196045,grad_norm: 0.9574366977016631, iteration: 214558
loss: 0.9892919659614563,grad_norm: 0.9040444825751554, iteration: 214559
loss: 0.9709172248840332,grad_norm: 0.9357194593064738, iteration: 214560
loss: 1.0293058156967163,grad_norm: 0.8059082606512875, iteration: 214561
loss: 1.0260672569274902,grad_norm: 0.9768839474597297, iteration: 214562
loss: 0.9515934586524963,grad_norm: 0.9192682877071715, iteration: 214563
loss: 1.017516851425171,grad_norm: 0.9999989231963591, iteration: 214564
loss: 1.0229835510253906,grad_norm: 0.9754226336586278, iteration: 214565
loss: 0.9903321862220764,grad_norm: 0.9719235329076333, iteration: 214566
loss: 0.9801225662231445,grad_norm: 0.8932082975288366, iteration: 214567
loss: 1.001136064529419,grad_norm: 0.9999992092440253, iteration: 214568
loss: 1.0622106790542603,grad_norm: 0.9890436663191191, iteration: 214569
loss: 1.0405468940734863,grad_norm: 0.8139343002874073, iteration: 214570
loss: 0.988143265247345,grad_norm: 0.9999990588183439, iteration: 214571
loss: 0.9911699295043945,grad_norm: 0.999999242209926, iteration: 214572
loss: 0.9911590814590454,grad_norm: 0.8634858266748104, iteration: 214573
loss: 0.9908628463745117,grad_norm: 0.9999990502221364, iteration: 214574
loss: 0.9588873386383057,grad_norm: 0.9213651513674956, iteration: 214575
loss: 0.9818927645683289,grad_norm: 0.9395426190124776, iteration: 214576
loss: 1.021828055381775,grad_norm: 0.902732321362711, iteration: 214577
loss: 0.9646033048629761,grad_norm: 0.8973508848280679, iteration: 214578
loss: 1.060582160949707,grad_norm: 0.999999112389624, iteration: 214579
loss: 0.9767645001411438,grad_norm: 0.9055319369688464, iteration: 214580
loss: 1.0130666494369507,grad_norm: 0.99999906846481, iteration: 214581
loss: 0.9882165193557739,grad_norm: 0.9409590859946104, iteration: 214582
loss: 1.0130085945129395,grad_norm: 0.931446850264102, iteration: 214583
loss: 0.9832093715667725,grad_norm: 0.9999991673868796, iteration: 214584
loss: 0.9782796502113342,grad_norm: 0.9999992357540359, iteration: 214585
loss: 1.009334921836853,grad_norm: 0.8749404946604319, iteration: 214586
loss: 0.957933783531189,grad_norm: 0.8315571798943755, iteration: 214587
loss: 1.012580156326294,grad_norm: 0.9999989930091143, iteration: 214588
loss: 0.9728872179985046,grad_norm: 0.8502782983959801, iteration: 214589
loss: 1.0351760387420654,grad_norm: 0.804302654362467, iteration: 214590
loss: 1.0556436777114868,grad_norm: 0.9999996793034764, iteration: 214591
loss: 1.0252918004989624,grad_norm: 0.8555951867066305, iteration: 214592
loss: 0.9792959094047546,grad_norm: 0.9158178880023079, iteration: 214593
loss: 1.0245949029922485,grad_norm: 0.8073428382805572, iteration: 214594
loss: 0.9831060767173767,grad_norm: 0.9642696853730668, iteration: 214595
loss: 1.0192872285842896,grad_norm: 0.8822458396566264, iteration: 214596
loss: 0.999908983707428,grad_norm: 0.8493472543377188, iteration: 214597
loss: 0.994137167930603,grad_norm: 0.8783742836392342, iteration: 214598
loss: 0.9840711951255798,grad_norm: 0.9562140091326254, iteration: 214599
loss: 0.9841083884239197,grad_norm: 0.8846386387818831, iteration: 214600
loss: 1.1312118768692017,grad_norm: 0.9999995432766213, iteration: 214601
loss: 0.9885725378990173,grad_norm: 0.9643735524087553, iteration: 214602
loss: 0.9869056344032288,grad_norm: 0.8569677627498279, iteration: 214603
loss: 0.9982315301895142,grad_norm: 0.8594894526557877, iteration: 214604
loss: 1.0371743440628052,grad_norm: 0.9637924915637207, iteration: 214605
loss: 1.0252338647842407,grad_norm: 0.999999198556423, iteration: 214606
loss: 0.9530351758003235,grad_norm: 0.9999990383380273, iteration: 214607
loss: 1.0199790000915527,grad_norm: 0.9424062273203543, iteration: 214608
loss: 1.004540205001831,grad_norm: 0.9626901522723669, iteration: 214609
loss: 1.0181010961532593,grad_norm: 0.9999994953694648, iteration: 214610
loss: 1.0001589059829712,grad_norm: 0.9999990530411287, iteration: 214611
loss: 1.0007389783859253,grad_norm: 0.920009524046961, iteration: 214612
loss: 1.0239086151123047,grad_norm: 0.8206025220133665, iteration: 214613
loss: 1.071028232574463,grad_norm: 0.9999990835362181, iteration: 214614
loss: 0.9880610108375549,grad_norm: 0.9975164514663318, iteration: 214615
loss: 1.0649056434631348,grad_norm: 0.99999925677037, iteration: 214616
loss: 0.9285455346107483,grad_norm: 0.9999990196162162, iteration: 214617
loss: 0.9894487857818604,grad_norm: 0.7662027887780637, iteration: 214618
loss: 1.1575145721435547,grad_norm: 0.9999992604511491, iteration: 214619
loss: 0.9510903358459473,grad_norm: 0.8148779192944416, iteration: 214620
loss: 0.9696364402770996,grad_norm: 0.8612720577172456, iteration: 214621
loss: 0.9384627342224121,grad_norm: 0.9248881210700954, iteration: 214622
loss: 1.0063557624816895,grad_norm: 0.8917804711587172, iteration: 214623
loss: 1.0221883058547974,grad_norm: 0.8734658992438002, iteration: 214624
loss: 1.0008169412612915,grad_norm: 0.9999989381622774, iteration: 214625
loss: 1.025328278541565,grad_norm: 0.9071275803510357, iteration: 214626
loss: 1.0151796340942383,grad_norm: 0.9095872723243007, iteration: 214627
loss: 0.9557127356529236,grad_norm: 0.7922234384253914, iteration: 214628
loss: 0.9944470524787903,grad_norm: 0.9999990630238654, iteration: 214629
loss: 0.9779959917068481,grad_norm: 0.9464522927335565, iteration: 214630
loss: 0.9585232734680176,grad_norm: 0.9999992096077913, iteration: 214631
loss: 1.0004533529281616,grad_norm: 0.8466615435959076, iteration: 214632
loss: 0.9726228713989258,grad_norm: 0.9999990996112472, iteration: 214633
loss: 1.0245929956436157,grad_norm: 0.9999989901579226, iteration: 214634
loss: 1.0117651224136353,grad_norm: 0.9999095594707753, iteration: 214635
loss: 1.0053300857543945,grad_norm: 0.9999990481740978, iteration: 214636
loss: 1.0192415714263916,grad_norm: 0.8812085371736259, iteration: 214637
loss: 1.0132759809494019,grad_norm: 0.9999991947190913, iteration: 214638
loss: 1.0239068269729614,grad_norm: 0.9999991939945547, iteration: 214639
loss: 1.034454107284546,grad_norm: 0.999999219815557, iteration: 214640
loss: 0.9980894327163696,grad_norm: 0.8541505477071499, iteration: 214641
loss: 1.0175880193710327,grad_norm: 0.8921421227251219, iteration: 214642
loss: 0.9961287379264832,grad_norm: 0.7461835196319335, iteration: 214643
loss: 1.030269980430603,grad_norm: 0.9999998819592809, iteration: 214644
loss: 0.9938790202140808,grad_norm: 0.8396915084120812, iteration: 214645
loss: 1.0114854574203491,grad_norm: 0.927518596396669, iteration: 214646
loss: 1.0316684246063232,grad_norm: 0.8309287557401812, iteration: 214647
loss: 0.9761037230491638,grad_norm: 0.9236955819226481, iteration: 214648
loss: 1.020501971244812,grad_norm: 0.8806016241742233, iteration: 214649
loss: 0.9840821027755737,grad_norm: 0.9999992264021389, iteration: 214650
loss: 1.0396511554718018,grad_norm: 0.999999127713242, iteration: 214651
loss: 1.0095062255859375,grad_norm: 0.9700872318718737, iteration: 214652
loss: 1.0093721151351929,grad_norm: 0.9999990474752353, iteration: 214653
loss: 0.9832918047904968,grad_norm: 0.9518953449242484, iteration: 214654
loss: 1.0099382400512695,grad_norm: 0.9857394407814235, iteration: 214655
loss: 1.006060242652893,grad_norm: 0.9999992318522091, iteration: 214656
loss: 1.0096745491027832,grad_norm: 0.9597119241982123, iteration: 214657
loss: 0.9974222779273987,grad_norm: 0.9999996365558922, iteration: 214658
loss: 1.0092551708221436,grad_norm: 0.9438350157533015, iteration: 214659
loss: 1.0183994770050049,grad_norm: 0.7399795235498983, iteration: 214660
loss: 1.0361509323120117,grad_norm: 0.8075901366875997, iteration: 214661
loss: 0.9771705269813538,grad_norm: 0.9363907806131045, iteration: 214662
loss: 1.0101739168167114,grad_norm: 0.9374184688414882, iteration: 214663
loss: 1.0200767517089844,grad_norm: 0.83383801358862, iteration: 214664
loss: 0.9956020712852478,grad_norm: 0.8495592209872649, iteration: 214665
loss: 1.0001590251922607,grad_norm: 0.9306955227570072, iteration: 214666
loss: 1.0197316408157349,grad_norm: 0.9999994202720118, iteration: 214667
loss: 1.003762125968933,grad_norm: 0.689933300794951, iteration: 214668
loss: 1.0033992528915405,grad_norm: 0.9309800268250761, iteration: 214669
loss: 0.9931366443634033,grad_norm: 0.9327077311114107, iteration: 214670
loss: 1.0195263624191284,grad_norm: 0.8505811445194021, iteration: 214671
loss: 0.9882866740226746,grad_norm: 0.9117897277978446, iteration: 214672
loss: 0.9905480146408081,grad_norm: 0.8511936192606804, iteration: 214673
loss: 0.9900686740875244,grad_norm: 0.824865522950666, iteration: 214674
loss: 0.9841171503067017,grad_norm: 0.9999990494600132, iteration: 214675
loss: 0.9958372116088867,grad_norm: 0.8193455452067612, iteration: 214676
loss: 1.0208019018173218,grad_norm: 0.9183926919359009, iteration: 214677
loss: 0.9942077994346619,grad_norm: 0.8984456447747434, iteration: 214678
loss: 1.0751045942306519,grad_norm: 0.9999991812709488, iteration: 214679
loss: 0.9940516352653503,grad_norm: 0.9999990690781881, iteration: 214680
loss: 0.9998108148574829,grad_norm: 0.8208555304098835, iteration: 214681
loss: 0.9646105170249939,grad_norm: 0.9117392234583638, iteration: 214682
loss: 1.0078272819519043,grad_norm: 0.9623051285184395, iteration: 214683
loss: 0.9958586692810059,grad_norm: 0.8066521255172732, iteration: 214684
loss: 0.9865469932556152,grad_norm: 0.9999990591325927, iteration: 214685
loss: 0.9629092812538147,grad_norm: 0.7905012284087258, iteration: 214686
loss: 0.9930809736251831,grad_norm: 0.9999989874821572, iteration: 214687
loss: 1.0353562831878662,grad_norm: 0.9999990393322205, iteration: 214688
loss: 0.9650120735168457,grad_norm: 0.8449558423119893, iteration: 214689
loss: 1.0111892223358154,grad_norm: 0.9078137514116019, iteration: 214690
loss: 1.0039763450622559,grad_norm: 0.9557090920414958, iteration: 214691
loss: 1.019957423210144,grad_norm: 0.9999992612943952, iteration: 214692
loss: 1.0002151727676392,grad_norm: 0.9999991254432887, iteration: 214693
loss: 0.9980639219284058,grad_norm: 0.9428847254653668, iteration: 214694
loss: 1.0036273002624512,grad_norm: 0.9195896947345759, iteration: 214695
loss: 0.9573155045509338,grad_norm: 0.923786260549695, iteration: 214696
loss: 0.9796134829521179,grad_norm: 0.7656372775192609, iteration: 214697
loss: 0.9796209931373596,grad_norm: 0.9353824349780238, iteration: 214698
loss: 1.0364694595336914,grad_norm: 0.9999990764302231, iteration: 214699
loss: 0.9918456673622131,grad_norm: 0.8440500880232811, iteration: 214700
loss: 0.9833158850669861,grad_norm: 0.956155265831738, iteration: 214701
loss: 0.9943657517433167,grad_norm: 0.9460275566424111, iteration: 214702
loss: 0.9936622381210327,grad_norm: 0.9074129240082106, iteration: 214703
loss: 1.0063852071762085,grad_norm: 0.9713208423897021, iteration: 214704
loss: 1.0009971857070923,grad_norm: 0.8452324114760833, iteration: 214705
loss: 0.9630423188209534,grad_norm: 0.8983408854619356, iteration: 214706
loss: 0.9728106260299683,grad_norm: 0.9461599858498354, iteration: 214707
loss: 0.9780589938163757,grad_norm: 0.999998966879347, iteration: 214708
loss: 1.0289347171783447,grad_norm: 0.9999991289297815, iteration: 214709
loss: 1.0035459995269775,grad_norm: 0.7887275102370401, iteration: 214710
loss: 0.9799820780754089,grad_norm: 0.8376642533621542, iteration: 214711
loss: 0.9688016772270203,grad_norm: 0.9999990762818447, iteration: 214712
loss: 1.0010712146759033,grad_norm: 0.9887765529137242, iteration: 214713
loss: 0.9908225536346436,grad_norm: 0.9893636848266363, iteration: 214714
loss: 1.008479356765747,grad_norm: 0.999999852514516, iteration: 214715
loss: 1.0467684268951416,grad_norm: 0.9999990304289612, iteration: 214716
loss: 0.984772264957428,grad_norm: 0.7730055517125776, iteration: 214717
loss: 1.0110491514205933,grad_norm: 0.8718018981247422, iteration: 214718
loss: 0.9933229684829712,grad_norm: 0.8515599058846679, iteration: 214719
loss: 1.0087826251983643,grad_norm: 0.9475695907835865, iteration: 214720
loss: 1.021848440170288,grad_norm: 0.91849923739399, iteration: 214721
loss: 1.0204106569290161,grad_norm: 0.9999989965994895, iteration: 214722
loss: 1.0140960216522217,grad_norm: 0.9999998825345459, iteration: 214723
loss: 1.0005629062652588,grad_norm: 0.8775458043692806, iteration: 214724
loss: 0.9784592390060425,grad_norm: 0.970229017039752, iteration: 214725
loss: 0.9922226071357727,grad_norm: 0.9702522799891251, iteration: 214726
loss: 0.9576936364173889,grad_norm: 0.9999990853460241, iteration: 214727
loss: 1.022223949432373,grad_norm: 0.9999993142966871, iteration: 214728
loss: 0.9832454919815063,grad_norm: 0.8983009529759268, iteration: 214729
loss: 1.132839322090149,grad_norm: 0.9999991611379284, iteration: 214730
loss: 1.008334755897522,grad_norm: 0.9999991931515848, iteration: 214731
loss: 0.9978684782981873,grad_norm: 0.9999991133778016, iteration: 214732
loss: 1.0145454406738281,grad_norm: 0.855943889281388, iteration: 214733
loss: 1.0229867696762085,grad_norm: 0.8976349204647596, iteration: 214734
loss: 0.9901416301727295,grad_norm: 0.9999990290616779, iteration: 214735
loss: 1.008906364440918,grad_norm: 0.9999997479905938, iteration: 214736
loss: 1.0201442241668701,grad_norm: 0.9999991646044448, iteration: 214737
loss: 0.9990185499191284,grad_norm: 0.9999990764235889, iteration: 214738
loss: 1.005846381187439,grad_norm: 0.9999990126862797, iteration: 214739
loss: 1.076648473739624,grad_norm: 0.9954876571471581, iteration: 214740
loss: 1.0167721509933472,grad_norm: 0.9863076683928512, iteration: 214741
loss: 0.9911594390869141,grad_norm: 0.9999990161818811, iteration: 214742
loss: 1.0084576606750488,grad_norm: 0.9999992678124059, iteration: 214743
loss: 1.0103094577789307,grad_norm: 0.999999071556718, iteration: 214744
loss: 1.0022931098937988,grad_norm: 0.8607675505785162, iteration: 214745
loss: 0.9821732640266418,grad_norm: 0.9449221626151175, iteration: 214746
loss: 0.9519524574279785,grad_norm: 0.9719891981419051, iteration: 214747
loss: 1.0203890800476074,grad_norm: 0.9999991492861466, iteration: 214748
loss: 1.0145295858383179,grad_norm: 0.960437762573613, iteration: 214749
loss: 1.007899522781372,grad_norm: 0.9999990050209738, iteration: 214750
loss: 1.0185593366622925,grad_norm: 0.9999992604666966, iteration: 214751
loss: 1.0265560150146484,grad_norm: 0.9999994805632684, iteration: 214752
loss: 1.0085951089859009,grad_norm: 0.9999990264174768, iteration: 214753
loss: 1.021200180053711,grad_norm: 0.9413547980124435, iteration: 214754
loss: 0.9891180992126465,grad_norm: 0.8829473811114456, iteration: 214755
loss: 0.9834873080253601,grad_norm: 0.9999993133556153, iteration: 214756
loss: 1.028799295425415,grad_norm: 0.9999992625671776, iteration: 214757
loss: 1.0193352699279785,grad_norm: 0.9017182161349474, iteration: 214758
loss: 1.022642731666565,grad_norm: 0.961040289771775, iteration: 214759
loss: 1.0109561681747437,grad_norm: 0.9171445508633584, iteration: 214760
loss: 1.006617784500122,grad_norm: 0.8601447914430477, iteration: 214761
loss: 1.0516892671585083,grad_norm: 0.9999991459888598, iteration: 214762
loss: 1.0211721658706665,grad_norm: 0.8661977575395348, iteration: 214763
loss: 0.99201500415802,grad_norm: 0.9999994129006824, iteration: 214764
loss: 1.0003007650375366,grad_norm: 0.8581398595213832, iteration: 214765
loss: 0.9846600890159607,grad_norm: 0.7551401698046822, iteration: 214766
loss: 1.0173510313034058,grad_norm: 0.9997770228505455, iteration: 214767
loss: 1.0159639120101929,grad_norm: 0.865692882659354, iteration: 214768
loss: 1.0061391592025757,grad_norm: 0.9051045506699336, iteration: 214769
loss: 0.9871839880943298,grad_norm: 0.9616915070714029, iteration: 214770
loss: 0.9847081303596497,grad_norm: 0.9329071545048829, iteration: 214771
loss: 1.0192465782165527,grad_norm: 0.9999990227631982, iteration: 214772
loss: 0.989206075668335,grad_norm: 0.8516457246142208, iteration: 214773
loss: 1.0227839946746826,grad_norm: 0.975989417216753, iteration: 214774
loss: 1.0549812316894531,grad_norm: 0.9999992749834435, iteration: 214775
loss: 0.9796016812324524,grad_norm: 0.9139540954612535, iteration: 214776
loss: 1.0224601030349731,grad_norm: 0.9999990659188435, iteration: 214777
loss: 0.9982263445854187,grad_norm: 0.9999991109337703, iteration: 214778
loss: 1.0634063482284546,grad_norm: 0.9278047866541597, iteration: 214779
loss: 0.9875617027282715,grad_norm: 0.999999604964773, iteration: 214780
loss: 1.0175706148147583,grad_norm: 0.976923122254146, iteration: 214781
loss: 1.0382496118545532,grad_norm: 0.9999992924670225, iteration: 214782
loss: 1.030967354774475,grad_norm: 0.9999992363850022, iteration: 214783
loss: 1.0153872966766357,grad_norm: 0.8558531127857825, iteration: 214784
loss: 0.9825254678726196,grad_norm: 0.84752775182169, iteration: 214785
loss: 1.0336110591888428,grad_norm: 0.9999997092995988, iteration: 214786
loss: 0.9857034087181091,grad_norm: 0.8425620724777155, iteration: 214787
loss: 1.0000736713409424,grad_norm: 0.9999991492912691, iteration: 214788
loss: 1.0038450956344604,grad_norm: 0.8774972143600225, iteration: 214789
loss: 0.9776133298873901,grad_norm: 0.8789536717092342, iteration: 214790
loss: 0.9766851663589478,grad_norm: 0.8520570259406104, iteration: 214791
loss: 0.9902734756469727,grad_norm: 0.9999990662515722, iteration: 214792
loss: 0.9864522814750671,grad_norm: 0.9999991823340009, iteration: 214793
loss: 1.0078142881393433,grad_norm: 0.9999992560359439, iteration: 214794
loss: 1.0154634714126587,grad_norm: 0.9999990997226264, iteration: 214795
loss: 0.9967464208602905,grad_norm: 0.8614725184937958, iteration: 214796
loss: 1.0132089853286743,grad_norm: 0.8452341439694261, iteration: 214797
loss: 0.9921122193336487,grad_norm: 0.8488892152748588, iteration: 214798
loss: 1.0169014930725098,grad_norm: 0.999999661152005, iteration: 214799
loss: 0.9750934839248657,grad_norm: 0.9999991552153483, iteration: 214800
loss: 1.0008845329284668,grad_norm: 0.9999991303415422, iteration: 214801
loss: 1.0038777589797974,grad_norm: 0.8877564833237187, iteration: 214802
loss: 1.0110247135162354,grad_norm: 0.9999994216712017, iteration: 214803
loss: 1.039894938468933,grad_norm: 0.9999991233207202, iteration: 214804
loss: 0.9712799191474915,grad_norm: 0.9999998395123418, iteration: 214805
loss: 1.0055402517318726,grad_norm: 0.9999992179414944, iteration: 214806
loss: 1.0105462074279785,grad_norm: 0.9999990148825397, iteration: 214807
loss: 1.0520081520080566,grad_norm: 0.9999997991592013, iteration: 214808
loss: 1.041157841682434,grad_norm: 0.9999996401529027, iteration: 214809
loss: 1.1865960359573364,grad_norm: 0.9999992822843484, iteration: 214810
loss: 1.1433331966400146,grad_norm: 0.9999992961599142, iteration: 214811
loss: 1.0865204334259033,grad_norm: 0.85887602980022, iteration: 214812
loss: 1.0426777601242065,grad_norm: 0.9999993023584619, iteration: 214813
loss: 1.0834054946899414,grad_norm: 0.9999998664650485, iteration: 214814
loss: 1.0182533264160156,grad_norm: 0.9999993072771312, iteration: 214815
loss: 1.1079076528549194,grad_norm: 0.9999997161173416, iteration: 214816
loss: 1.039194107055664,grad_norm: 0.9999997037477019, iteration: 214817
loss: 1.061568260192871,grad_norm: 0.9999994768304139, iteration: 214818
loss: 1.1027623414993286,grad_norm: 0.9999997158753817, iteration: 214819
loss: 1.0078824758529663,grad_norm: 0.9501451625000633, iteration: 214820
loss: 0.9832272529602051,grad_norm: 0.9379396598350934, iteration: 214821
loss: 0.9757160544395447,grad_norm: 0.8760826819228444, iteration: 214822
loss: 0.9937922954559326,grad_norm: 0.8752370425267202, iteration: 214823
loss: 0.9783995747566223,grad_norm: 0.9999994158277233, iteration: 214824
loss: 1.0002927780151367,grad_norm: 0.862089465008021, iteration: 214825
loss: 1.000684142112732,grad_norm: 0.9741546355012215, iteration: 214826
loss: 1.0391110181808472,grad_norm: 0.99999991277601, iteration: 214827
loss: 0.9892578125,grad_norm: 0.8757626011183383, iteration: 214828
loss: 1.00924551486969,grad_norm: 0.7583194547852924, iteration: 214829
loss: 1.0484977960586548,grad_norm: 0.977503081948499, iteration: 214830
loss: 1.0027121305465698,grad_norm: 0.9876958433874613, iteration: 214831
loss: 0.9679193496704102,grad_norm: 0.9999991642515703, iteration: 214832
loss: 1.009158730506897,grad_norm: 0.999999132787938, iteration: 214833
loss: 0.9832028746604919,grad_norm: 0.999999123220951, iteration: 214834
loss: 1.0069609880447388,grad_norm: 0.9999991495122547, iteration: 214835
loss: 1.0001765489578247,grad_norm: 0.948422404275591, iteration: 214836
loss: 0.9881145358085632,grad_norm: 0.9840898938992731, iteration: 214837
loss: 0.9956390261650085,grad_norm: 0.9999990551116416, iteration: 214838
loss: 1.02955961227417,grad_norm: 0.9439147397233486, iteration: 214839
loss: 0.997700035572052,grad_norm: 0.8441797474205296, iteration: 214840
loss: 0.9951423406600952,grad_norm: 0.8723079348738259, iteration: 214841
loss: 1.0244249105453491,grad_norm: 0.9999991410766731, iteration: 214842
loss: 0.9874982237815857,grad_norm: 0.9999990832303753, iteration: 214843
loss: 1.0817826986312866,grad_norm: 0.9999990769543926, iteration: 214844
loss: 0.9606021046638489,grad_norm: 0.9999991400454785, iteration: 214845
loss: 1.0158445835113525,grad_norm: 0.9999998462570859, iteration: 214846
loss: 0.9890111088752747,grad_norm: 0.9999991141108006, iteration: 214847
loss: 1.0466346740722656,grad_norm: 0.9999999579203782, iteration: 214848
loss: 1.0149574279785156,grad_norm: 0.7964713821923048, iteration: 214849
loss: 0.9485716819763184,grad_norm: 0.9596181811767135, iteration: 214850
loss: 0.9914202690124512,grad_norm: 0.8787199771974519, iteration: 214851
loss: 1.0397489070892334,grad_norm: 0.9999994045443549, iteration: 214852
loss: 1.0126512050628662,grad_norm: 0.9999990601154587, iteration: 214853
loss: 1.0170924663543701,grad_norm: 0.9999991247179807, iteration: 214854
loss: 0.9868630170822144,grad_norm: 0.8876282295342869, iteration: 214855
loss: 0.9969860315322876,grad_norm: 0.8080218963789657, iteration: 214856
loss: 0.9741013050079346,grad_norm: 0.9913538585612734, iteration: 214857
loss: 0.9946126341819763,grad_norm: 0.9999991190624924, iteration: 214858
loss: 1.0301053524017334,grad_norm: 0.9999992425356996, iteration: 214859
loss: 0.9970619678497314,grad_norm: 0.999999060489155, iteration: 214860
loss: 0.9756069183349609,grad_norm: 0.9999992433817699, iteration: 214861
loss: 1.260265827178955,grad_norm: 0.999999153882668, iteration: 214862
loss: 1.0108798742294312,grad_norm: 0.9338625133416013, iteration: 214863
loss: 0.9887422323226929,grad_norm: 0.9999990275073465, iteration: 214864
loss: 1.0135912895202637,grad_norm: 0.8603299714361469, iteration: 214865
loss: 1.0677889585494995,grad_norm: 0.9999998145375404, iteration: 214866
loss: 1.127678632736206,grad_norm: 0.9764063538766993, iteration: 214867
loss: 1.0422816276550293,grad_norm: 0.9184804869435717, iteration: 214868
loss: 1.203423261642456,grad_norm: 0.9999996004774914, iteration: 214869
loss: 1.0769623517990112,grad_norm: 0.9999991632366565, iteration: 214870
loss: 1.0521721839904785,grad_norm: 0.9867374693699651, iteration: 214871
loss: 0.9694218635559082,grad_norm: 0.9999990433031948, iteration: 214872
loss: 0.9737894535064697,grad_norm: 0.9279414325211435, iteration: 214873
loss: 0.9836717247962952,grad_norm: 0.9696590119594168, iteration: 214874
loss: 1.0954325199127197,grad_norm: 0.9999997724544584, iteration: 214875
loss: 1.0180712938308716,grad_norm: 0.9999991572611396, iteration: 214876
loss: 1.0086324214935303,grad_norm: 0.7626457689058809, iteration: 214877
loss: 0.9930605888366699,grad_norm: 0.9737611106746927, iteration: 214878
loss: 0.9986670613288879,grad_norm: 0.9739906969266615, iteration: 214879
loss: 0.9660890698432922,grad_norm: 0.9999991505901747, iteration: 214880
loss: 1.0531411170959473,grad_norm: 0.9999998387209775, iteration: 214881
loss: 1.0088958740234375,grad_norm: 0.9999998326665561, iteration: 214882
loss: 1.1207388639450073,grad_norm: 0.9999999699061473, iteration: 214883
loss: 1.0029940605163574,grad_norm: 0.8037011169040084, iteration: 214884
loss: 1.0137571096420288,grad_norm: 0.9999990950511659, iteration: 214885
loss: 1.045281171798706,grad_norm: 0.9999992312116389, iteration: 214886
loss: 1.0103144645690918,grad_norm: 0.9999991284734983, iteration: 214887
loss: 1.086350679397583,grad_norm: 0.9999999536480603, iteration: 214888
loss: 0.9233896732330322,grad_norm: 0.9999990004159405, iteration: 214889
loss: 0.9718780517578125,grad_norm: 0.9999992300971494, iteration: 214890
loss: 1.0114995241165161,grad_norm: 0.8469567988985084, iteration: 214891
loss: 1.008069634437561,grad_norm: 0.8397684958395818, iteration: 214892
loss: 1.0111095905303955,grad_norm: 0.9190520917389688, iteration: 214893
loss: 1.0099583864212036,grad_norm: 0.9999992156264748, iteration: 214894
loss: 1.0091482400894165,grad_norm: 0.9999990428998957, iteration: 214895
loss: 1.005255937576294,grad_norm: 0.9999991212075348, iteration: 214896
loss: 1.001927137374878,grad_norm: 0.9310254632195923, iteration: 214897
loss: 1.003138542175293,grad_norm: 0.999998893448685, iteration: 214898
loss: 1.0368012189865112,grad_norm: 0.9528353924996625, iteration: 214899
loss: 0.984835684299469,grad_norm: 0.9999996813819263, iteration: 214900
loss: 0.9709195494651794,grad_norm: 0.999999274722611, iteration: 214901
loss: 1.0059798955917358,grad_norm: 0.8930729300466559, iteration: 214902
loss: 1.0110232830047607,grad_norm: 0.9374611769002643, iteration: 214903
loss: 0.9829548597335815,grad_norm: 0.9999990102405789, iteration: 214904
loss: 1.0230967998504639,grad_norm: 0.9999990768993469, iteration: 214905
loss: 0.9760518074035645,grad_norm: 0.9088520680048222, iteration: 214906
loss: 1.0007498264312744,grad_norm: 0.999999183065035, iteration: 214907
loss: 1.004569172859192,grad_norm: 0.9999989851366939, iteration: 214908
loss: 1.0095818042755127,grad_norm: 0.9999990289848486, iteration: 214909
loss: 0.984499454498291,grad_norm: 0.8469860723098118, iteration: 214910
loss: 1.0036404132843018,grad_norm: 0.8217740972274921, iteration: 214911
loss: 0.9830186367034912,grad_norm: 0.8185157431769933, iteration: 214912
loss: 1.0290368795394897,grad_norm: 0.9999990509899609, iteration: 214913
loss: 0.9887875318527222,grad_norm: 0.8751072539461769, iteration: 214914
loss: 0.9992145299911499,grad_norm: 0.965629317303041, iteration: 214915
loss: 0.9900975823402405,grad_norm: 0.9999991496855667, iteration: 214916
loss: 1.0979939699172974,grad_norm: 0.9999992704798893, iteration: 214917
loss: 0.9563344717025757,grad_norm: 0.9999990832842163, iteration: 214918
loss: 0.969260036945343,grad_norm: 0.734480500686392, iteration: 214919
loss: 1.0250928401947021,grad_norm: 0.9906634116785533, iteration: 214920
loss: 1.0041544437408447,grad_norm: 0.7788716752370897, iteration: 214921
loss: 1.0040128231048584,grad_norm: 0.9999990221457832, iteration: 214922
loss: 0.9836723804473877,grad_norm: 0.8165563374554557, iteration: 214923
loss: 0.9949163198471069,grad_norm: 0.9188334199985007, iteration: 214924
loss: 0.9608061909675598,grad_norm: 0.9999991584753708, iteration: 214925
loss: 0.9690141677856445,grad_norm: 0.811570955560268, iteration: 214926
loss: 0.9720189571380615,grad_norm: 0.9244925139566347, iteration: 214927
loss: 0.9960200786590576,grad_norm: 0.7964632956054372, iteration: 214928
loss: 0.9977952241897583,grad_norm: 0.9999989730562371, iteration: 214929
loss: 1.0090423822402954,grad_norm: 0.8518552210710472, iteration: 214930
loss: 0.9809532165527344,grad_norm: 0.9999990696926314, iteration: 214931
loss: 0.9977574944496155,grad_norm: 0.9999991910537142, iteration: 214932
loss: 0.9731795191764832,grad_norm: 0.9188554485295145, iteration: 214933
loss: 1.0603262186050415,grad_norm: 1.0000000354162983, iteration: 214934
loss: 0.9481790065765381,grad_norm: 0.9395826289945775, iteration: 214935
loss: 0.957107663154602,grad_norm: 0.9999990708152933, iteration: 214936
loss: 0.9823875427246094,grad_norm: 0.9213431934767184, iteration: 214937
loss: 0.9908291101455688,grad_norm: 0.9205825317278928, iteration: 214938
loss: 1.008960247039795,grad_norm: 0.8779162051656054, iteration: 214939
loss: 1.0243383646011353,grad_norm: 0.875484134422144, iteration: 214940
loss: 1.0240418910980225,grad_norm: 0.9788711871723034, iteration: 214941
loss: 1.005007028579712,grad_norm: 0.9999992825767424, iteration: 214942
loss: 0.9660532474517822,grad_norm: 0.9014358124494646, iteration: 214943
loss: 1.0502060651779175,grad_norm: 0.9965409385135023, iteration: 214944
loss: 0.9861419796943665,grad_norm: 0.9266543716470462, iteration: 214945
loss: 0.9900245070457458,grad_norm: 0.9999990793683647, iteration: 214946
loss: 0.9669613242149353,grad_norm: 0.7981646196135314, iteration: 214947
loss: 0.969816267490387,grad_norm: 0.9181269858891057, iteration: 214948
loss: 1.0132797956466675,grad_norm: 0.9760674426869347, iteration: 214949
loss: 1.013314127922058,grad_norm: 0.9999990417299434, iteration: 214950
loss: 0.9382274746894836,grad_norm: 0.9999990315943303, iteration: 214951
loss: 1.0024250745773315,grad_norm: 0.7986358443630489, iteration: 214952
loss: 1.0476738214492798,grad_norm: 0.8808804100869048, iteration: 214953
loss: 1.0197770595550537,grad_norm: 0.826438691367948, iteration: 214954
loss: 1.0346566438674927,grad_norm: 0.9999992922773935, iteration: 214955
loss: 1.0238455533981323,grad_norm: 0.9999990106157719, iteration: 214956
loss: 1.0180202722549438,grad_norm: 0.993149036332651, iteration: 214957
loss: 0.9882652163505554,grad_norm: 0.7881131441118859, iteration: 214958
loss: 1.004225254058838,grad_norm: 0.9999992175784884, iteration: 214959
loss: 0.9883743524551392,grad_norm: 0.9999990403950072, iteration: 214960
loss: 1.0404179096221924,grad_norm: 0.9999997748378574, iteration: 214961
loss: 0.982915461063385,grad_norm: 0.8697296066292693, iteration: 214962
loss: 0.9982151985168457,grad_norm: 0.8399529618484829, iteration: 214963
loss: 0.9661071300506592,grad_norm: 0.8457357905614294, iteration: 214964
loss: 1.009861707687378,grad_norm: 0.9337196329941113, iteration: 214965
loss: 1.0121090412139893,grad_norm: 0.8435607950971314, iteration: 214966
loss: 0.9827602505683899,grad_norm: 0.8463912376927323, iteration: 214967
loss: 1.0030665397644043,grad_norm: 0.9999990556057821, iteration: 214968
loss: 1.0272916555404663,grad_norm: 0.9999992213704556, iteration: 214969
loss: 1.005111813545227,grad_norm: 0.9367706602424402, iteration: 214970
loss: 0.9982828497886658,grad_norm: 0.8085843460185034, iteration: 214971
loss: 1.0058964490890503,grad_norm: 0.9999990546308761, iteration: 214972
loss: 1.0004762411117554,grad_norm: 0.8412618766671328, iteration: 214973
loss: 1.0028775930404663,grad_norm: 0.9609539225983459, iteration: 214974
loss: 0.9665389657020569,grad_norm: 0.8014150659158272, iteration: 214975
loss: 1.0517311096191406,grad_norm: 0.9042250671261379, iteration: 214976
loss: 1.0173450708389282,grad_norm: 0.9999989702117174, iteration: 214977
loss: 0.9987552762031555,grad_norm: 0.9999990970578163, iteration: 214978
loss: 1.0020670890808105,grad_norm: 0.8808870448999075, iteration: 214979
loss: 1.011120080947876,grad_norm: 0.9219577019895921, iteration: 214980
loss: 1.013091802597046,grad_norm: 0.8975407764227822, iteration: 214981
loss: 1.0000815391540527,grad_norm: 0.9999990963922436, iteration: 214982
loss: 1.0237709283828735,grad_norm: 0.9999991014371915, iteration: 214983
loss: 0.9932149052619934,grad_norm: 0.8932319711798499, iteration: 214984
loss: 1.0455158948898315,grad_norm: 0.8310576287349571, iteration: 214985
loss: 0.975974440574646,grad_norm: 0.9674594782597197, iteration: 214986
loss: 0.9779253602027893,grad_norm: 0.9999994752017358, iteration: 214987
loss: 1.0118699073791504,grad_norm: 0.999999292712746, iteration: 214988
loss: 0.9805310368537903,grad_norm: 0.7985893610044477, iteration: 214989
loss: 0.9856910109519958,grad_norm: 0.9999991379053006, iteration: 214990
loss: 1.01896333694458,grad_norm: 0.9999994564626555, iteration: 214991
loss: 1.0073503255844116,grad_norm: 0.9512674989158885, iteration: 214992
loss: 0.9803617000579834,grad_norm: 0.9999992001191336, iteration: 214993
loss: 1.0194343328475952,grad_norm: 0.8037337417111189, iteration: 214994
loss: 1.00098717212677,grad_norm: 0.9999990379710145, iteration: 214995
loss: 0.9819481372833252,grad_norm: 0.88623848080927, iteration: 214996
loss: 0.9924072623252869,grad_norm: 0.955622111449548, iteration: 214997
loss: 0.9925075173377991,grad_norm: 0.9837443240471825, iteration: 214998
loss: 0.9882434606552124,grad_norm: 0.9999998269432059, iteration: 214999
loss: 0.9961501359939575,grad_norm: 0.9999991683017978, iteration: 215000
loss: 1.0208221673965454,grad_norm: 0.8519380391615636, iteration: 215001
loss: 0.9968212842941284,grad_norm: 0.9595225903511884, iteration: 215002
loss: 0.9892181754112244,grad_norm: 0.950777102997827, iteration: 215003
loss: 1.0111390352249146,grad_norm: 0.969765541631973, iteration: 215004
loss: 1.047309398651123,grad_norm: 0.9999991902010964, iteration: 215005
loss: 0.9995415806770325,grad_norm: 0.8804951544679136, iteration: 215006
loss: 0.9948956370353699,grad_norm: 0.9010018567677927, iteration: 215007
loss: 1.0091854333877563,grad_norm: 0.9448570954682359, iteration: 215008
loss: 0.9966977834701538,grad_norm: 0.9322743835326567, iteration: 215009
loss: 1.0615625381469727,grad_norm: 0.8770165288524396, iteration: 215010
loss: 0.9997493028640747,grad_norm: 0.9948564926961698, iteration: 215011
loss: 1.019161581993103,grad_norm: 0.8678527114242806, iteration: 215012
loss: 1.0149078369140625,grad_norm: 0.9853523498739395, iteration: 215013
loss: 1.0071080923080444,grad_norm: 0.888978760770415, iteration: 215014
loss: 1.0077167749404907,grad_norm: 0.999999189593267, iteration: 215015
loss: 1.0345754623413086,grad_norm: 0.9822842840171699, iteration: 215016
loss: 1.0045884847640991,grad_norm: 0.9863552989563482, iteration: 215017
loss: 0.9891940355300903,grad_norm: 0.8218444753299372, iteration: 215018
loss: 0.9939990043640137,grad_norm: 0.9999990767413123, iteration: 215019
loss: 0.9981796145439148,grad_norm: 0.9999992097796341, iteration: 215020
loss: 0.9795982837677002,grad_norm: 0.8261953914961387, iteration: 215021
loss: 0.9828912615776062,grad_norm: 0.9999992089822519, iteration: 215022
loss: 0.9973374009132385,grad_norm: 0.9474063402036158, iteration: 215023
loss: 0.9571824669837952,grad_norm: 0.8266614230610434, iteration: 215024
loss: 0.9954022169113159,grad_norm: 0.9999990705942136, iteration: 215025
loss: 1.029480218887329,grad_norm: 0.9999991455338479, iteration: 215026
loss: 0.9643473625183105,grad_norm: 0.9999990755799678, iteration: 215027
loss: 0.9919315576553345,grad_norm: 0.9491641633907985, iteration: 215028
loss: 1.0012507438659668,grad_norm: 0.8499150146568103, iteration: 215029
loss: 1.044020414352417,grad_norm: 0.911377171119578, iteration: 215030
loss: 1.007206916809082,grad_norm: 0.9324908088959081, iteration: 215031
loss: 1.002484679222107,grad_norm: 0.9039950554976312, iteration: 215032
loss: 0.9863346815109253,grad_norm: 0.9999998173296815, iteration: 215033
loss: 1.0095210075378418,grad_norm: 0.9714907733422136, iteration: 215034
loss: 1.0261591672897339,grad_norm: 0.9999991866774651, iteration: 215035
loss: 0.9859786033630371,grad_norm: 0.9258915457467085, iteration: 215036
loss: 0.9831196665763855,grad_norm: 0.9999991629886181, iteration: 215037
loss: 1.0304749011993408,grad_norm: 0.8159735060956522, iteration: 215038
loss: 0.9768552184104919,grad_norm: 0.9999990983716704, iteration: 215039
loss: 0.9614466428756714,grad_norm: 0.8096401074253257, iteration: 215040
loss: 1.0023188591003418,grad_norm: 0.9081216488259755, iteration: 215041
loss: 0.962371826171875,grad_norm: 0.9980704249954505, iteration: 215042
loss: 1.0045335292816162,grad_norm: 0.888768587624328, iteration: 215043
loss: 0.9809684753417969,grad_norm: 0.9999993195922945, iteration: 215044
loss: 1.0009264945983887,grad_norm: 0.9029827050653817, iteration: 215045
loss: 1.000731348991394,grad_norm: 0.7714406368214443, iteration: 215046
loss: 0.9997981190681458,grad_norm: 0.9999993618251173, iteration: 215047
loss: 1.0352600812911987,grad_norm: 0.8755365937784004, iteration: 215048
loss: 1.0115433931350708,grad_norm: 0.9999990465686414, iteration: 215049
loss: 1.0257165431976318,grad_norm: 0.9268422025404833, iteration: 215050
loss: 1.0078823566436768,grad_norm: 0.9924829228120624, iteration: 215051
loss: 0.9492575526237488,grad_norm: 0.9384883887269881, iteration: 215052
loss: 1.0001527070999146,grad_norm: 0.8799132105736851, iteration: 215053
loss: 0.9827275276184082,grad_norm: 0.8542309563674114, iteration: 215054
loss: 1.0217205286026,grad_norm: 0.9243444626262608, iteration: 215055
loss: 1.0168331861495972,grad_norm: 0.9659104438208297, iteration: 215056
loss: 0.9861452579498291,grad_norm: 0.9112422142143625, iteration: 215057
loss: 1.0319759845733643,grad_norm: 0.8375227536588852, iteration: 215058
loss: 0.9903919696807861,grad_norm: 0.8040124346302465, iteration: 215059
loss: 1.0124188661575317,grad_norm: 0.9690488168411262, iteration: 215060
loss: 0.9743644595146179,grad_norm: 0.7942042188724973, iteration: 215061
loss: 1.0036543607711792,grad_norm: 0.9999995489793083, iteration: 215062
loss: 1.0128580331802368,grad_norm: 0.9493640108977042, iteration: 215063
loss: 1.0415117740631104,grad_norm: 0.9176203629077005, iteration: 215064
loss: 0.9997212290763855,grad_norm: 0.905075294299148, iteration: 215065
loss: 0.9913334846496582,grad_norm: 0.9450741767535157, iteration: 215066
loss: 0.9766970872879028,grad_norm: 0.9999991972273153, iteration: 215067
loss: 0.9990968704223633,grad_norm: 0.949297682909716, iteration: 215068
loss: 0.9956291913986206,grad_norm: 0.88360508841066, iteration: 215069
loss: 0.9974097013473511,grad_norm: 0.9353427809485528, iteration: 215070
loss: 1.0209064483642578,grad_norm: 0.9999990752174633, iteration: 215071
loss: 1.023174524307251,grad_norm: 0.9999990301423014, iteration: 215072
loss: 1.0033035278320312,grad_norm: 0.9999989658268961, iteration: 215073
loss: 1.05635404586792,grad_norm: 0.9999990571243024, iteration: 215074
loss: 0.9876208901405334,grad_norm: 0.8853112432871387, iteration: 215075
loss: 1.018135905265808,grad_norm: 0.9757030111236762, iteration: 215076
loss: 1.0243819952011108,grad_norm: 0.9999999018491816, iteration: 215077
loss: 0.9677059054374695,grad_norm: 0.8846447395898606, iteration: 215078
loss: 0.9988833069801331,grad_norm: 0.9500879118936366, iteration: 215079
loss: 1.033307433128357,grad_norm: 0.9318292559612403, iteration: 215080
loss: 0.9846550822257996,grad_norm: 0.9999992102140395, iteration: 215081
loss: 1.0146867036819458,grad_norm: 0.8697031203393277, iteration: 215082
loss: 1.0201494693756104,grad_norm: 0.9449323781333598, iteration: 215083
loss: 0.9883415102958679,grad_norm: 0.8912056359250646, iteration: 215084
loss: 0.9343828558921814,grad_norm: 0.9999993680417238, iteration: 215085
loss: 0.9449265599250793,grad_norm: 0.9561369129145061, iteration: 215086
loss: 1.0039420127868652,grad_norm: 0.8716077343647728, iteration: 215087
loss: 0.987346887588501,grad_norm: 0.9509887861324683, iteration: 215088
loss: 0.9988976120948792,grad_norm: 0.9460305828102239, iteration: 215089
loss: 1.0318121910095215,grad_norm: 0.9999989692584257, iteration: 215090
loss: 1.0158379077911377,grad_norm: 0.789077250280164, iteration: 215091
loss: 1.2078533172607422,grad_norm: 0.9999997319747072, iteration: 215092
loss: 0.992995023727417,grad_norm: 0.9978443321043055, iteration: 215093
loss: 0.9587410688400269,grad_norm: 0.8538384379602925, iteration: 215094
loss: 0.9838724732398987,grad_norm: 0.9999991925926106, iteration: 215095
loss: 1.0290919542312622,grad_norm: 0.9816987126717335, iteration: 215096
loss: 0.9991859793663025,grad_norm: 0.9999990878247441, iteration: 215097
loss: 1.0050463676452637,grad_norm: 0.7935681499289812, iteration: 215098
loss: 1.0288541316986084,grad_norm: 0.9999990594639809, iteration: 215099
loss: 1.0033208131790161,grad_norm: 0.9420549788765379, iteration: 215100
loss: 1.0024735927581787,grad_norm: 0.9261600383134796, iteration: 215101
loss: 0.9888418316841125,grad_norm: 0.8859250527462846, iteration: 215102
loss: 1.020602822303772,grad_norm: 0.8484368849079905, iteration: 215103
loss: 0.9679320454597473,grad_norm: 0.9999991532120601, iteration: 215104
loss: 0.9866787195205688,grad_norm: 0.9291501286962772, iteration: 215105
loss: 0.9930282831192017,grad_norm: 0.999999168232131, iteration: 215106
loss: 1.0159610509872437,grad_norm: 0.9999990074072299, iteration: 215107
loss: 0.9894184470176697,grad_norm: 0.8929121364882942, iteration: 215108
loss: 0.9821411967277527,grad_norm: 0.8473164123952607, iteration: 215109
loss: 0.9816277623176575,grad_norm: 0.8969283696336529, iteration: 215110
loss: 1.0311330556869507,grad_norm: 0.9873960918075128, iteration: 215111
loss: 0.9695979952812195,grad_norm: 0.8374184512389303, iteration: 215112
loss: 1.019333004951477,grad_norm: 0.9999991107361507, iteration: 215113
loss: 1.0383058786392212,grad_norm: 0.999999218746692, iteration: 215114
loss: 1.0337095260620117,grad_norm: 0.9999990778585791, iteration: 215115
loss: 1.0427486896514893,grad_norm: 0.9332385726145392, iteration: 215116
loss: 1.0020545721054077,grad_norm: 0.9999991118464082, iteration: 215117
loss: 1.0114550590515137,grad_norm: 0.9999995409825261, iteration: 215118
loss: 0.987571656703949,grad_norm: 0.9434189693320758, iteration: 215119
loss: 1.020491361618042,grad_norm: 0.8871194806878209, iteration: 215120
loss: 1.004847764968872,grad_norm: 0.9999989571824016, iteration: 215121
loss: 1.0371203422546387,grad_norm: 0.9940891346470014, iteration: 215122
loss: 0.9976696968078613,grad_norm: 0.921460689303494, iteration: 215123
loss: 0.9856544733047485,grad_norm: 0.9999992359742308, iteration: 215124
loss: 1.0200157165527344,grad_norm: 0.9999991194108059, iteration: 215125
loss: 1.0047894716262817,grad_norm: 0.9386521534131042, iteration: 215126
loss: 1.0050100088119507,grad_norm: 0.8930796530879196, iteration: 215127
loss: 0.9748841524124146,grad_norm: 0.9597989208233412, iteration: 215128
loss: 0.995310366153717,grad_norm: 0.924123788249454, iteration: 215129
loss: 1.108132243156433,grad_norm: 0.9999997074010555, iteration: 215130
loss: 0.9942207932472229,grad_norm: 0.9345615471085379, iteration: 215131
loss: 0.9872886538505554,grad_norm: 0.8152382887134569, iteration: 215132
loss: 0.9795827865600586,grad_norm: 0.8887416482261666, iteration: 215133
loss: 0.9932772517204285,grad_norm: 0.9922075606695122, iteration: 215134
loss: 1.0516972541809082,grad_norm: 0.8462610901794562, iteration: 215135
loss: 1.020675778388977,grad_norm: 0.7951019258219978, iteration: 215136
loss: 0.9883385896682739,grad_norm: 0.9940642664162092, iteration: 215137
loss: 1.0089646577835083,grad_norm: 0.9383794615480432, iteration: 215138
loss: 0.9824331998825073,grad_norm: 0.9247859096511334, iteration: 215139
loss: 1.0491029024124146,grad_norm: 0.9219450446292023, iteration: 215140
loss: 1.0232701301574707,grad_norm: 0.999999088032229, iteration: 215141
loss: 1.0077664852142334,grad_norm: 0.7522377763623351, iteration: 215142
loss: 1.0126674175262451,grad_norm: 0.7869757293586042, iteration: 215143
loss: 1.0085383653640747,grad_norm: 0.7541174785259436, iteration: 215144
loss: 0.9987133741378784,grad_norm: 0.9999996480615898, iteration: 215145
loss: 0.9824231266975403,grad_norm: 0.9385942717872293, iteration: 215146
loss: 0.9719277620315552,grad_norm: 0.999999165663175, iteration: 215147
loss: 1.0118036270141602,grad_norm: 0.9351013526897418, iteration: 215148
loss: 0.9964786171913147,grad_norm: 0.9315243270686294, iteration: 215149
loss: 1.0006446838378906,grad_norm: 0.9456404539445297, iteration: 215150
loss: 0.9802790284156799,grad_norm: 0.9343530163876087, iteration: 215151
loss: 1.0072579383850098,grad_norm: 0.9264725191308123, iteration: 215152
loss: 1.0100020170211792,grad_norm: 0.9999991038206196, iteration: 215153
loss: 1.0085670948028564,grad_norm: 0.9195010557454718, iteration: 215154
loss: 1.0451669692993164,grad_norm: 0.9269507993492908, iteration: 215155
loss: 1.075856328010559,grad_norm: 0.9999993040217025, iteration: 215156
loss: 0.9866032004356384,grad_norm: 0.9360832375877468, iteration: 215157
loss: 0.9735782146453857,grad_norm: 0.9670443471239826, iteration: 215158
loss: 0.9904460906982422,grad_norm: 0.933716014792744, iteration: 215159
loss: 1.024914026260376,grad_norm: 0.8691971621197159, iteration: 215160
loss: 1.0001859664916992,grad_norm: 0.999999115143065, iteration: 215161
loss: 0.9565966725349426,grad_norm: 0.9735455885309561, iteration: 215162
loss: 0.9914599061012268,grad_norm: 0.9068939259905795, iteration: 215163
loss: 1.0328407287597656,grad_norm: 0.8960319453501188, iteration: 215164
loss: 0.9924563765525818,grad_norm: 0.8687598203471132, iteration: 215165
loss: 0.992774486541748,grad_norm: 0.8330131337633662, iteration: 215166
loss: 1.0104304552078247,grad_norm: 0.9252406159773018, iteration: 215167
loss: 0.970409631729126,grad_norm: 0.8550543156709853, iteration: 215168
loss: 1.0066689252853394,grad_norm: 0.765652956056301, iteration: 215169
loss: 1.001045823097229,grad_norm: 0.9999992023231611, iteration: 215170
loss: 1.0001722574234009,grad_norm: 0.999999237963738, iteration: 215171
loss: 0.9895384907722473,grad_norm: 0.999999153047062, iteration: 215172
loss: 0.9713341593742371,grad_norm: 0.8981123495225994, iteration: 215173
loss: 0.9794982671737671,grad_norm: 0.9196104801630702, iteration: 215174
loss: 1.0067473649978638,grad_norm: 0.9621517934086958, iteration: 215175
loss: 1.0499353408813477,grad_norm: 0.9999997893191158, iteration: 215176
loss: 0.9871780872344971,grad_norm: 0.921941768196985, iteration: 215177
loss: 1.0283429622650146,grad_norm: 0.9081729176186377, iteration: 215178
loss: 1.0709524154663086,grad_norm: 0.9999994949115915, iteration: 215179
loss: 0.9890193939208984,grad_norm: 0.9508381871564412, iteration: 215180
loss: 0.9882978796958923,grad_norm: 0.904480644074015, iteration: 215181
loss: 1.0227243900299072,grad_norm: 0.9999990883714871, iteration: 215182
loss: 1.0124423503875732,grad_norm: 0.9473709945285582, iteration: 215183
loss: 0.9833671450614929,grad_norm: 0.9688335830243041, iteration: 215184
loss: 1.009775996208191,grad_norm: 0.9377662928461564, iteration: 215185
loss: 0.9554070234298706,grad_norm: 0.9069674792937691, iteration: 215186
loss: 0.9843102097511292,grad_norm: 0.9237449556483408, iteration: 215187
loss: 0.9918634295463562,grad_norm: 0.9999993608502016, iteration: 215188
loss: 0.9985754489898682,grad_norm: 0.9185013991807538, iteration: 215189
loss: 1.0267539024353027,grad_norm: 0.9405125056279817, iteration: 215190
loss: 0.9621210694313049,grad_norm: 0.9999991948007383, iteration: 215191
loss: 0.9768036007881165,grad_norm: 0.8452306031740472, iteration: 215192
loss: 0.9989265203475952,grad_norm: 0.8957517974446425, iteration: 215193
loss: 1.0452989339828491,grad_norm: 0.9999993721730971, iteration: 215194
loss: 1.0025529861450195,grad_norm: 0.9999991455879574, iteration: 215195
loss: 1.0295733213424683,grad_norm: 0.9573453412323575, iteration: 215196
loss: 1.0004487037658691,grad_norm: 0.7832603446559099, iteration: 215197
loss: 0.9962670803070068,grad_norm: 0.9251560498545123, iteration: 215198
loss: 0.9573066830635071,grad_norm: 0.999999080419027, iteration: 215199
loss: 1.0484224557876587,grad_norm: 0.8878536848077365, iteration: 215200
loss: 0.9907669425010681,grad_norm: 0.916832566676438, iteration: 215201
loss: 0.9820655584335327,grad_norm: 0.9808890967152851, iteration: 215202
loss: 0.997629702091217,grad_norm: 0.891099557589518, iteration: 215203
loss: 0.9928032159805298,grad_norm: 0.8221227766382105, iteration: 215204
loss: 1.0007916688919067,grad_norm: 0.9999991570187343, iteration: 215205
loss: 0.9990956783294678,grad_norm: 0.913927760809438, iteration: 215206
loss: 0.9687211513519287,grad_norm: 0.8930486592240937, iteration: 215207
loss: 0.9710236191749573,grad_norm: 0.7225559171846861, iteration: 215208
loss: 0.9995600581169128,grad_norm: 0.9999991412459177, iteration: 215209
loss: 1.0053699016571045,grad_norm: 0.9969440938184502, iteration: 215210
loss: 0.9620548486709595,grad_norm: 0.9999989705023957, iteration: 215211
loss: 0.995799720287323,grad_norm: 0.9999990395999866, iteration: 215212
loss: 1.0163360834121704,grad_norm: 0.9052400566455843, iteration: 215213
loss: 1.0426193475723267,grad_norm: 0.9999990919088698, iteration: 215214
loss: 1.0106345415115356,grad_norm: 0.8528652394150887, iteration: 215215
loss: 0.9938555955886841,grad_norm: 0.9664190430867359, iteration: 215216
loss: 1.0157015323638916,grad_norm: 0.9999998398270935, iteration: 215217
loss: 0.9867283701896667,grad_norm: 0.7495686096336696, iteration: 215218
loss: 0.9757152795791626,grad_norm: 0.9999990781349949, iteration: 215219
loss: 1.0327953100204468,grad_norm: 0.8952691816859446, iteration: 215220
loss: 1.0056601762771606,grad_norm: 0.9063418876321716, iteration: 215221
loss: 0.9735579490661621,grad_norm: 0.9963181958275596, iteration: 215222
loss: 1.0558967590332031,grad_norm: 0.9494035553869218, iteration: 215223
loss: 1.0275245904922485,grad_norm: 0.8901826894392413, iteration: 215224
loss: 0.9916414618492126,grad_norm: 0.970600306137698, iteration: 215225
loss: 1.0307892560958862,grad_norm: 0.7552540516480072, iteration: 215226
loss: 1.0575501918792725,grad_norm: 0.9569373834375108, iteration: 215227
loss: 0.9632318615913391,grad_norm: 0.8030409206503876, iteration: 215228
loss: 1.0195295810699463,grad_norm: 0.9206480978672925, iteration: 215229
loss: 1.0083496570587158,grad_norm: 0.9547712865906444, iteration: 215230
loss: 1.0403307676315308,grad_norm: 0.9610065357452446, iteration: 215231
loss: 1.0033820867538452,grad_norm: 0.9999993211640509, iteration: 215232
loss: 1.0120594501495361,grad_norm: 0.8497891919797853, iteration: 215233
loss: 0.9754456877708435,grad_norm: 0.927262495854463, iteration: 215234
loss: 1.013711929321289,grad_norm: 0.87210622544268, iteration: 215235
loss: 1.0252997875213623,grad_norm: 0.9278314522716242, iteration: 215236
loss: 0.9886420369148254,grad_norm: 0.9166111272585087, iteration: 215237
loss: 1.0402684211730957,grad_norm: 0.9999998463374523, iteration: 215238
loss: 1.000680685043335,grad_norm: 0.9999991693814877, iteration: 215239
loss: 0.9986844658851624,grad_norm: 0.9664933424326554, iteration: 215240
loss: 1.0693336725234985,grad_norm: 0.999999056271207, iteration: 215241
loss: 1.0144535303115845,grad_norm: 0.9999991033776953, iteration: 215242
loss: 0.9356021881103516,grad_norm: 0.9999991079553453, iteration: 215243
loss: 1.0111801624298096,grad_norm: 0.9999990067856168, iteration: 215244
loss: 1.0166937112808228,grad_norm: 0.9999991730018201, iteration: 215245
loss: 0.9948865175247192,grad_norm: 0.9999992120780087, iteration: 215246
loss: 1.011744499206543,grad_norm: 0.999999636042087, iteration: 215247
loss: 0.9901190400123596,grad_norm: 0.9999990435187283, iteration: 215248
loss: 1.004387378692627,grad_norm: 0.9999990975428119, iteration: 215249
loss: 1.0031996965408325,grad_norm: 0.9999989836111473, iteration: 215250
loss: 0.9872807860374451,grad_norm: 0.7883725724577885, iteration: 215251
loss: 0.9749602675437927,grad_norm: 0.999999137797784, iteration: 215252
loss: 1.0109237432479858,grad_norm: 0.9959203378024657, iteration: 215253
loss: 0.9743069410324097,grad_norm: 0.9535936029374694, iteration: 215254
loss: 0.9860347509384155,grad_norm: 0.9675372823930527, iteration: 215255
loss: 1.0088348388671875,grad_norm: 0.8545547583106535, iteration: 215256
loss: 0.9854439496994019,grad_norm: 0.8768777322607287, iteration: 215257
loss: 1.0795303583145142,grad_norm: 0.9999996155375009, iteration: 215258
loss: 0.9677685499191284,grad_norm: 0.97700708549872, iteration: 215259
loss: 1.0043801069259644,grad_norm: 0.9986930214140471, iteration: 215260
loss: 1.0589039325714111,grad_norm: 0.9992492063506511, iteration: 215261
loss: 1.0253726243972778,grad_norm: 0.929892341336541, iteration: 215262
loss: 1.0232752561569214,grad_norm: 0.9999993005725142, iteration: 215263
loss: 1.0262113809585571,grad_norm: 0.999999227026512, iteration: 215264
loss: 0.9671037197113037,grad_norm: 0.9161610655020356, iteration: 215265
loss: 0.9742094874382019,grad_norm: 0.9246758357168624, iteration: 215266
loss: 1.0243664979934692,grad_norm: 0.7665320995144644, iteration: 215267
loss: 0.9834119081497192,grad_norm: 0.8192684898312933, iteration: 215268
loss: 0.9973099827766418,grad_norm: 0.8079636195664893, iteration: 215269
loss: 1.000523328781128,grad_norm: 0.8374139933204984, iteration: 215270
loss: 1.0396283864974976,grad_norm: 0.9940888530097008, iteration: 215271
loss: 1.005313754081726,grad_norm: 0.9740476093505646, iteration: 215272
loss: 0.9925448894500732,grad_norm: 0.7640227436089599, iteration: 215273
loss: 0.9897930026054382,grad_norm: 0.7866422049833558, iteration: 215274
loss: 0.9463459253311157,grad_norm: 0.8603698258046759, iteration: 215275
loss: 1.0212818384170532,grad_norm: 0.9999990802057501, iteration: 215276
loss: 0.9887707829475403,grad_norm: 0.9657538496868137, iteration: 215277
loss: 1.0059282779693604,grad_norm: 0.9672820947804314, iteration: 215278
loss: 0.9483371376991272,grad_norm: 0.9999991758806994, iteration: 215279
loss: 1.0160834789276123,grad_norm: 0.9999992206084325, iteration: 215280
loss: 0.9694994688034058,grad_norm: 0.9999992084310445, iteration: 215281
loss: 1.0220320224761963,grad_norm: 0.9999991039752407, iteration: 215282
loss: 0.9714046716690063,grad_norm: 0.8611580621927661, iteration: 215283
loss: 0.9915988445281982,grad_norm: 0.9909312798877807, iteration: 215284
loss: 1.0444667339324951,grad_norm: 0.8880118089538992, iteration: 215285
loss: 1.0195673704147339,grad_norm: 0.8182329620292194, iteration: 215286
loss: 1.007327675819397,grad_norm: 0.9483974542794988, iteration: 215287
loss: 1.0675301551818848,grad_norm: 0.8485187452727085, iteration: 215288
loss: 0.9505642652511597,grad_norm: 0.7708077572973627, iteration: 215289
loss: 1.0215412378311157,grad_norm: 0.9930489673005967, iteration: 215290
loss: 1.0084929466247559,grad_norm: 0.9506294244164375, iteration: 215291
loss: 0.9940733909606934,grad_norm: 0.9338011356095643, iteration: 215292
loss: 0.9968605637550354,grad_norm: 0.999999101469781, iteration: 215293
loss: 1.049937129020691,grad_norm: 0.9999991547409657, iteration: 215294
loss: 1.0404026508331299,grad_norm: 0.7695805104599385, iteration: 215295
loss: 0.9864277839660645,grad_norm: 0.9267758681070678, iteration: 215296
loss: 0.9833734631538391,grad_norm: 0.999999045768209, iteration: 215297
loss: 0.9933835864067078,grad_norm: 0.8115198260648445, iteration: 215298
loss: 0.9842022657394409,grad_norm: 0.8588921517345656, iteration: 215299
loss: 0.9737836718559265,grad_norm: 0.9089159977434177, iteration: 215300
loss: 0.9962242841720581,grad_norm: 0.999999215061646, iteration: 215301
loss: 0.9987785220146179,grad_norm: 0.9413715240159531, iteration: 215302
loss: 1.041650652885437,grad_norm: 0.9999994980521852, iteration: 215303
loss: 1.044176459312439,grad_norm: 0.9999990686302167, iteration: 215304
loss: 1.0084577798843384,grad_norm: 0.9579591261889254, iteration: 215305
loss: 0.9791699051856995,grad_norm: 0.9999991623507948, iteration: 215306
loss: 0.9950541853904724,grad_norm: 0.9461289414904819, iteration: 215307
loss: 1.0063542127609253,grad_norm: 0.8270276893953923, iteration: 215308
loss: 0.9484984874725342,grad_norm: 0.9583271290346599, iteration: 215309
loss: 0.9711971879005432,grad_norm: 0.8624048160031585, iteration: 215310
loss: 1.0767390727996826,grad_norm: 0.9999992381603432, iteration: 215311
loss: 0.9940878748893738,grad_norm: 0.9999991692772013, iteration: 215312
loss: 1.045885682106018,grad_norm: 0.999999316691407, iteration: 215313
loss: 1.008934497833252,grad_norm: 0.912075142568549, iteration: 215314
loss: 0.9711248278617859,grad_norm: 0.999999289625547, iteration: 215315
loss: 1.017106056213379,grad_norm: 0.9643159454079415, iteration: 215316
loss: 1.0135035514831543,grad_norm: 0.999999163351915, iteration: 215317
loss: 0.9686539173126221,grad_norm: 0.8488113116280326, iteration: 215318
loss: 0.99495530128479,grad_norm: 0.9578661374976117, iteration: 215319
loss: 1.0024018287658691,grad_norm: 0.9999992806896044, iteration: 215320
loss: 1.0182048082351685,grad_norm: 0.8790498738646715, iteration: 215321
loss: 1.0182708501815796,grad_norm: 0.999999713241168, iteration: 215322
loss: 0.9964532852172852,grad_norm: 0.9019263169804113, iteration: 215323
loss: 1.0111883878707886,grad_norm: 0.8259134796635595, iteration: 215324
loss: 1.0185372829437256,grad_norm: 0.999999159771207, iteration: 215325
loss: 0.9681276679039001,grad_norm: 0.8227424955371089, iteration: 215326
loss: 0.995538055896759,grad_norm: 0.8966205155830217, iteration: 215327
loss: 0.9758870005607605,grad_norm: 0.9433868193067005, iteration: 215328
loss: 1.0104988813400269,grad_norm: 0.9999990612350006, iteration: 215329
loss: 1.0180001258850098,grad_norm: 0.9308188985034808, iteration: 215330
loss: 0.9629842638969421,grad_norm: 0.8859620126659942, iteration: 215331
loss: 1.0153661966323853,grad_norm: 0.9999991998005626, iteration: 215332
loss: 0.985710084438324,grad_norm: 0.832108203045754, iteration: 215333
loss: 0.9854649901390076,grad_norm: 0.8077498006278447, iteration: 215334
loss: 1.0253053903579712,grad_norm: 0.9999990695806061, iteration: 215335
loss: 0.9764209985733032,grad_norm: 0.7006203708461132, iteration: 215336
loss: 1.037237524986267,grad_norm: 0.9884936567715702, iteration: 215337
loss: 0.9630025029182434,grad_norm: 0.844982073014805, iteration: 215338
loss: 0.9589188098907471,grad_norm: 0.8908929689255095, iteration: 215339
loss: 0.9962663054466248,grad_norm: 0.9999991823622354, iteration: 215340
loss: 0.9614408016204834,grad_norm: 0.9999991799294361, iteration: 215341
loss: 1.0079545974731445,grad_norm: 0.9999989714400784, iteration: 215342
loss: 1.089658260345459,grad_norm: 0.9999992668850249, iteration: 215343
loss: 0.9758239388465881,grad_norm: 0.9999990386537582, iteration: 215344
loss: 0.9903117418289185,grad_norm: 0.9781405048004473, iteration: 215345
loss: 1.002389907836914,grad_norm: 0.8816974930731905, iteration: 215346
loss: 1.00326406955719,grad_norm: 0.9215335008869471, iteration: 215347
loss: 0.9912093281745911,grad_norm: 0.8831309996109388, iteration: 215348
loss: 0.9652467370033264,grad_norm: 0.8315894264586003, iteration: 215349
loss: 1.0428824424743652,grad_norm: 0.9999990774593847, iteration: 215350
loss: 1.0077667236328125,grad_norm: 0.8871542808000894, iteration: 215351
loss: 1.0349434614181519,grad_norm: 0.9318254227247299, iteration: 215352
loss: 0.9797369837760925,grad_norm: 0.8886005226695683, iteration: 215353
loss: 1.002941608428955,grad_norm: 0.8448031834745142, iteration: 215354
loss: 1.0366078615188599,grad_norm: 0.9999991014529143, iteration: 215355
loss: 1.0002299547195435,grad_norm: 0.9999989955673982, iteration: 215356
loss: 0.9928179979324341,grad_norm: 0.936667309718979, iteration: 215357
loss: 0.9868773818016052,grad_norm: 0.9912465477994725, iteration: 215358
loss: 0.9975170493125916,grad_norm: 0.9999991686769593, iteration: 215359
loss: 1.0096830129623413,grad_norm: 0.8634450449401696, iteration: 215360
loss: 1.0101513862609863,grad_norm: 0.8952167208282753, iteration: 215361
loss: 0.9629572033882141,grad_norm: 0.7542041627896415, iteration: 215362
loss: 0.9917489886283875,grad_norm: 0.7182712442449567, iteration: 215363
loss: 1.0091207027435303,grad_norm: 0.8004163397366395, iteration: 215364
loss: 0.9927232265472412,grad_norm: 0.9999990399669487, iteration: 215365
loss: 0.977095365524292,grad_norm: 0.9571054911060626, iteration: 215366
loss: 1.004577875137329,grad_norm: 0.9426668753663728, iteration: 215367
loss: 1.0028430223464966,grad_norm: 0.889987005360106, iteration: 215368
loss: 1.0037859678268433,grad_norm: 0.8713254279615049, iteration: 215369
loss: 0.9977997541427612,grad_norm: 0.9074361922647806, iteration: 215370
loss: 1.0164103507995605,grad_norm: 0.9999991028809168, iteration: 215371
loss: 1.1205750703811646,grad_norm: 1.000000020261714, iteration: 215372
loss: 0.9844281673431396,grad_norm: 0.7827188090786783, iteration: 215373
loss: 0.9944154024124146,grad_norm: 0.8311799414579348, iteration: 215374
loss: 1.0221678018569946,grad_norm: 0.9859805126956428, iteration: 215375
loss: 1.0110055208206177,grad_norm: 0.9586099728943694, iteration: 215376
loss: 1.0044703483581543,grad_norm: 0.9999989805691019, iteration: 215377
loss: 0.9947421550750732,grad_norm: 0.9916995669425459, iteration: 215378
loss: 0.9972118139266968,grad_norm: 0.9801479636160096, iteration: 215379
loss: 0.9757965207099915,grad_norm: 0.7541356443496784, iteration: 215380
loss: 0.9765489101409912,grad_norm: 0.8440493956785989, iteration: 215381
loss: 1.0160605907440186,grad_norm: 0.9231113966860699, iteration: 215382
loss: 1.0085235834121704,grad_norm: 0.9999992155498202, iteration: 215383
loss: 0.9825387597084045,grad_norm: 0.9999990316404948, iteration: 215384
loss: 1.0038659572601318,grad_norm: 0.9579169593139933, iteration: 215385
loss: 0.9950371980667114,grad_norm: 0.9999989958795008, iteration: 215386
loss: 0.9971712231636047,grad_norm: 0.8827455826785168, iteration: 215387
loss: 0.980408787727356,grad_norm: 0.9978523698037134, iteration: 215388
loss: 0.990805447101593,grad_norm: 0.9496012618658316, iteration: 215389
loss: 1.0195573568344116,grad_norm: 0.763663938424055, iteration: 215390
loss: 0.9713956117630005,grad_norm: 0.9999991208273709, iteration: 215391
loss: 0.9819364547729492,grad_norm: 0.8891454354391419, iteration: 215392
loss: 1.0338928699493408,grad_norm: 0.9999996699744903, iteration: 215393
loss: 1.0387698411941528,grad_norm: 0.9718833244456924, iteration: 215394
loss: 1.0399245023727417,grad_norm: 0.8850336417426059, iteration: 215395
loss: 1.0158298015594482,grad_norm: 0.999999185848203, iteration: 215396
loss: 0.9797464609146118,grad_norm: 0.9279174180013466, iteration: 215397
loss: 1.0561375617980957,grad_norm: 0.9196388481358091, iteration: 215398
loss: 0.9920092225074768,grad_norm: 0.9999991447910227, iteration: 215399
loss: 0.9699971675872803,grad_norm: 0.9396748007236033, iteration: 215400
loss: 0.9979162812232971,grad_norm: 0.9999989601442887, iteration: 215401
loss: 1.0362321138381958,grad_norm: 0.984976970494586, iteration: 215402
loss: 1.0235532522201538,grad_norm: 0.9999991888336583, iteration: 215403
loss: 1.0311552286148071,grad_norm: 0.9999990748071705, iteration: 215404
loss: 1.0124326944351196,grad_norm: 0.8428057289479557, iteration: 215405
loss: 0.9353801608085632,grad_norm: 0.9999991277815313, iteration: 215406
loss: 1.013740062713623,grad_norm: 0.9999993835903673, iteration: 215407
loss: 1.035408854484558,grad_norm: 0.999999174130076, iteration: 215408
loss: 0.9869419932365417,grad_norm: 0.9999990785432487, iteration: 215409
loss: 1.0251215696334839,grad_norm: 0.9999993191152426, iteration: 215410
loss: 0.98332279920578,grad_norm: 0.9069681983697433, iteration: 215411
loss: 1.0146663188934326,grad_norm: 0.9952538956423239, iteration: 215412
loss: 1.0054960250854492,grad_norm: 0.8387720447434639, iteration: 215413
loss: 0.9962783455848694,grad_norm: 0.9078391441190498, iteration: 215414
loss: 1.0017032623291016,grad_norm: 0.9118736708632351, iteration: 215415
loss: 0.9750880599021912,grad_norm: 0.8439198085421092, iteration: 215416
loss: 1.0085930824279785,grad_norm: 0.9552514466380223, iteration: 215417
loss: 0.9854394197463989,grad_norm: 0.809321129873165, iteration: 215418
loss: 0.9905797839164734,grad_norm: 0.9685979295830097, iteration: 215419
loss: 0.9681141376495361,grad_norm: 0.8433062389443787, iteration: 215420
loss: 0.9987351298332214,grad_norm: 0.9455775697705028, iteration: 215421
loss: 0.9847867488861084,grad_norm: 0.943364524644577, iteration: 215422
loss: 1.0363507270812988,grad_norm: 0.999999195897333, iteration: 215423
loss: 0.9926677942276001,grad_norm: 0.9999991046185546, iteration: 215424
loss: 1.0253885984420776,grad_norm: 0.9192693757411503, iteration: 215425
loss: 1.0461909770965576,grad_norm: 0.8184416281995164, iteration: 215426
loss: 1.0420892238616943,grad_norm: 0.795423548006369, iteration: 215427
loss: 1.0280088186264038,grad_norm: 0.9999992366399444, iteration: 215428
loss: 1.0164343118667603,grad_norm: 0.9999990641645393, iteration: 215429
loss: 1.0306005477905273,grad_norm: 0.8489611134932389, iteration: 215430
loss: 0.9908289909362793,grad_norm: 0.9999991473276104, iteration: 215431
loss: 0.9647657871246338,grad_norm: 0.8697058853541767, iteration: 215432
loss: 1.0158251523971558,grad_norm: 0.9304889731914003, iteration: 215433
loss: 0.967217743396759,grad_norm: 0.9742674228646853, iteration: 215434
loss: 0.9987283945083618,grad_norm: 0.9999991539144922, iteration: 215435
loss: 0.982237696647644,grad_norm: 0.9549563095834045, iteration: 215436
loss: 0.9685925245285034,grad_norm: 0.8006351690583892, iteration: 215437
loss: 1.0165249109268188,grad_norm: 0.9999994698269786, iteration: 215438
loss: 1.002794861793518,grad_norm: 0.8907188796134493, iteration: 215439
loss: 0.9612114429473877,grad_norm: 0.9999991974679834, iteration: 215440
loss: 1.0300601720809937,grad_norm: 0.9160920071423009, iteration: 215441
loss: 1.0038682222366333,grad_norm: 0.9788503579611287, iteration: 215442
loss: 0.992365300655365,grad_norm: 0.8055776656678816, iteration: 215443
loss: 0.9856178164482117,grad_norm: 0.9032778872282523, iteration: 215444
loss: 0.9738835692405701,grad_norm: 0.9558561470524051, iteration: 215445
loss: 1.102388620376587,grad_norm: 0.9999994067589827, iteration: 215446
loss: 0.9793522357940674,grad_norm: 0.9786334485530584, iteration: 215447
loss: 0.9843354225158691,grad_norm: 0.8785541917037984, iteration: 215448
loss: 0.9882908463478088,grad_norm: 0.8871667155663897, iteration: 215449
loss: 1.0129384994506836,grad_norm: 0.7722501899167816, iteration: 215450
loss: 1.1064705848693848,grad_norm: 1.0000001226034383, iteration: 215451
loss: 1.0220791101455688,grad_norm: 0.9291945395333054, iteration: 215452
loss: 1.0218592882156372,grad_norm: 0.9913422104410728, iteration: 215453
loss: 0.9722447395324707,grad_norm: 0.9617991879538198, iteration: 215454
loss: 0.989001989364624,grad_norm: 0.958586104398927, iteration: 215455
loss: 0.9965550303459167,grad_norm: 0.8328306567540821, iteration: 215456
loss: 1.0372240543365479,grad_norm: 0.999999954936704, iteration: 215457
loss: 1.012711763381958,grad_norm: 0.9750534762169343, iteration: 215458
loss: 1.0195966958999634,grad_norm: 0.9999990926198029, iteration: 215459
loss: 1.0032817125320435,grad_norm: 0.969041593460274, iteration: 215460
loss: 0.9652529358863831,grad_norm: 0.9156289881065202, iteration: 215461
loss: 1.0366445779800415,grad_norm: 0.9999998412511053, iteration: 215462
loss: 0.9681004285812378,grad_norm: 0.9999998547896999, iteration: 215463
loss: 1.0161857604980469,grad_norm: 0.9999989623001343, iteration: 215464
loss: 1.0307188034057617,grad_norm: 0.9800731848154662, iteration: 215465
loss: 0.9929482340812683,grad_norm: 0.7963355601286863, iteration: 215466
loss: 0.9926127195358276,grad_norm: 0.8934105119042532, iteration: 215467
loss: 1.0177875757217407,grad_norm: 0.9999992961854595, iteration: 215468
loss: 1.0084340572357178,grad_norm: 0.9661820899397765, iteration: 215469
loss: 1.0112278461456299,grad_norm: 0.7577071287129861, iteration: 215470
loss: 0.9635286927223206,grad_norm: 0.9999990452434725, iteration: 215471
loss: 1.0391684770584106,grad_norm: 0.8064100292632477, iteration: 215472
loss: 0.9975102543830872,grad_norm: 0.950870604137361, iteration: 215473
loss: 1.047065258026123,grad_norm: 0.9926139560864635, iteration: 215474
loss: 1.0246587991714478,grad_norm: 0.9999991128557826, iteration: 215475
loss: 1.0021847486495972,grad_norm: 0.9882030529965973, iteration: 215476
loss: 1.002333641052246,grad_norm: 0.9047127527095233, iteration: 215477
loss: 0.9894728660583496,grad_norm: 0.9999998545381069, iteration: 215478
loss: 1.014634370803833,grad_norm: 0.9999992638777622, iteration: 215479
loss: 0.983167290687561,grad_norm: 0.9165930366700832, iteration: 215480
loss: 1.039057970046997,grad_norm: 0.9999990434600383, iteration: 215481
loss: 0.9930698871612549,grad_norm: 0.8174597193020445, iteration: 215482
loss: 1.0359981060028076,grad_norm: 0.9999996767079568, iteration: 215483
loss: 1.007766842842102,grad_norm: 0.9999990862869761, iteration: 215484
loss: 1.0024980306625366,grad_norm: 0.9999991358603054, iteration: 215485
loss: 1.0216914415359497,grad_norm: 0.9145188507385641, iteration: 215486
loss: 0.9952141642570496,grad_norm: 0.9005930897291902, iteration: 215487
loss: 1.0038260221481323,grad_norm: 0.9999988671907846, iteration: 215488
loss: 0.976918637752533,grad_norm: 0.9999990129779615, iteration: 215489
loss: 1.0145679712295532,grad_norm: 0.9658222741106564, iteration: 215490
loss: 1.0032050609588623,grad_norm: 0.8735857329937275, iteration: 215491
loss: 1.0285919904708862,grad_norm: 0.9999992353825059, iteration: 215492
loss: 0.9925751686096191,grad_norm: 0.7967703270140912, iteration: 215493
loss: 0.9613537788391113,grad_norm: 0.9658880592722606, iteration: 215494
loss: 1.0044634342193604,grad_norm: 0.9999991777830386, iteration: 215495
loss: 0.9906522035598755,grad_norm: 0.9227987674082873, iteration: 215496
loss: 1.0246456861495972,grad_norm: 0.8165528249421204, iteration: 215497
loss: 1.0352113246917725,grad_norm: 0.9999991503334286, iteration: 215498
loss: 0.9897581338882446,grad_norm: 0.9676828271295211, iteration: 215499
loss: 0.9923790693283081,grad_norm: 0.9515273876625211, iteration: 215500
loss: 1.027915358543396,grad_norm: 0.9999990856401537, iteration: 215501
loss: 0.977689266204834,grad_norm: 0.9685692924502199, iteration: 215502
loss: 0.996100664138794,grad_norm: 0.9913705010003842, iteration: 215503
loss: 1.0287513732910156,grad_norm: 0.9999991284102755, iteration: 215504
loss: 1.0203578472137451,grad_norm: 0.7969823903122363, iteration: 215505
loss: 0.9891090393066406,grad_norm: 0.7268276442449656, iteration: 215506
loss: 0.9932048320770264,grad_norm: 0.9425969427923608, iteration: 215507
loss: 1.0027602910995483,grad_norm: 0.9554530726978265, iteration: 215508
loss: 1.0024844408035278,grad_norm: 0.9999990668924325, iteration: 215509
loss: 0.9988569021224976,grad_norm: 0.999999095757319, iteration: 215510
loss: 1.0205023288726807,grad_norm: 0.8636123578087432, iteration: 215511
loss: 0.9906656742095947,grad_norm: 0.9999989899169553, iteration: 215512
loss: 1.037088394165039,grad_norm: 0.9999996620219054, iteration: 215513
loss: 0.9837324023246765,grad_norm: 0.9568082993275416, iteration: 215514
loss: 1.0081573724746704,grad_norm: 0.9999991939918026, iteration: 215515
loss: 0.9907524585723877,grad_norm: 0.8227753893695572, iteration: 215516
loss: 0.995408296585083,grad_norm: 0.9258757512896338, iteration: 215517
loss: 1.0319890975952148,grad_norm: 0.8881066910028709, iteration: 215518
loss: 1.004231333732605,grad_norm: 0.9567215071418639, iteration: 215519
loss: 1.0168462991714478,grad_norm: 0.9999991514434802, iteration: 215520
loss: 1.002276062965393,grad_norm: 0.9488237589404437, iteration: 215521
loss: 0.9741175770759583,grad_norm: 0.8025052069644096, iteration: 215522
loss: 0.9962282776832581,grad_norm: 0.9070296275177677, iteration: 215523
loss: 1.0407532453536987,grad_norm: 0.9172219000763777, iteration: 215524
loss: 0.9925641417503357,grad_norm: 0.9999992056055823, iteration: 215525
loss: 1.021854043006897,grad_norm: 0.9379878226190879, iteration: 215526
loss: 1.0224359035491943,grad_norm: 0.9999991035823385, iteration: 215527
loss: 0.9709157943725586,grad_norm: 0.9996164152763316, iteration: 215528
loss: 1.0249065160751343,grad_norm: 0.7715053513123494, iteration: 215529
loss: 1.2800416946411133,grad_norm: 0.9999997031298865, iteration: 215530
loss: 0.9901828765869141,grad_norm: 0.9237802421408264, iteration: 215531
loss: 1.0326449871063232,grad_norm: 0.9258603558481936, iteration: 215532
loss: 0.9920997619628906,grad_norm: 0.9382616651923087, iteration: 215533
loss: 1.046446442604065,grad_norm: 0.9999990777051142, iteration: 215534
loss: 0.9707778692245483,grad_norm: 0.9999992747348133, iteration: 215535
loss: 1.0042420625686646,grad_norm: 0.8272334664505658, iteration: 215536
loss: 1.0111780166625977,grad_norm: 0.9999997929483275, iteration: 215537
loss: 0.9777383208274841,grad_norm: 0.9999991029506857, iteration: 215538
loss: 1.0083446502685547,grad_norm: 0.9999996573167494, iteration: 215539
loss: 0.9972604513168335,grad_norm: 0.9999991601395251, iteration: 215540
loss: 1.0243232250213623,grad_norm: 0.8576925321517039, iteration: 215541
loss: 1.1389065980911255,grad_norm: 0.9999995885923403, iteration: 215542
loss: 0.9900734424591064,grad_norm: 0.8133272087336851, iteration: 215543
loss: 0.9643344283103943,grad_norm: 0.9763753077368376, iteration: 215544
loss: 1.0144082307815552,grad_norm: 0.9256886391003581, iteration: 215545
loss: 1.1574417352676392,grad_norm: 0.9999995692087433, iteration: 215546
loss: 1.0121937990188599,grad_norm: 0.8422864730663173, iteration: 215547
loss: 1.296163558959961,grad_norm: 0.9999997862439228, iteration: 215548
loss: 0.9825916886329651,grad_norm: 0.8788179445979428, iteration: 215549
loss: 1.1968636512756348,grad_norm: 0.9999991778510796, iteration: 215550
loss: 0.9755215048789978,grad_norm: 0.9999991772971649, iteration: 215551
loss: 1.0879528522491455,grad_norm: 0.9999990914875139, iteration: 215552
loss: 1.0084282159805298,grad_norm: 0.9999992933278783, iteration: 215553
loss: 0.9399884939193726,grad_norm: 0.8210496952628676, iteration: 215554
loss: 1.072486162185669,grad_norm: 0.9319654111003868, iteration: 215555
loss: 0.9977995157241821,grad_norm: 0.843916355743525, iteration: 215556
loss: 1.06377375125885,grad_norm: 0.999999880468646, iteration: 215557
loss: 1.122110366821289,grad_norm: 0.9999998239842702, iteration: 215558
loss: 1.0720834732055664,grad_norm: 0.9504180837535656, iteration: 215559
loss: 1.0347226858139038,grad_norm: 0.9953804862751635, iteration: 215560
loss: 1.0337806940078735,grad_norm: 0.9999997397104512, iteration: 215561
loss: 0.9780348539352417,grad_norm: 0.9999990014342246, iteration: 215562
loss: 1.094034194946289,grad_norm: 0.9563583468053259, iteration: 215563
loss: 1.008994698524475,grad_norm: 0.9999990924982202, iteration: 215564
loss: 0.9969459176063538,grad_norm: 0.9999990827629129, iteration: 215565
loss: 0.956742525100708,grad_norm: 0.9999990034588894, iteration: 215566
loss: 0.9767646193504333,grad_norm: 0.99999910426216, iteration: 215567
loss: 1.0015288591384888,grad_norm: 0.9999993324419352, iteration: 215568
loss: 0.9896624088287354,grad_norm: 0.9630261032220452, iteration: 215569
loss: 1.041225552558899,grad_norm: 0.9999992883885773, iteration: 215570
loss: 0.9923031330108643,grad_norm: 0.9418848606567579, iteration: 215571
loss: 0.9844917058944702,grad_norm: 0.987754015898062, iteration: 215572
loss: 1.0121636390686035,grad_norm: 0.9999989849465815, iteration: 215573
loss: 1.0050554275512695,grad_norm: 0.9999991094019971, iteration: 215574
loss: 0.9926500916481018,grad_norm: 0.9999989614928301, iteration: 215575
loss: 1.0309056043624878,grad_norm: 0.9685600650338438, iteration: 215576
loss: 0.9607663750648499,grad_norm: 0.9300133759165108, iteration: 215577
loss: 1.021537184715271,grad_norm: 0.819494516913274, iteration: 215578
loss: 1.0622749328613281,grad_norm: 0.8892239250363456, iteration: 215579
loss: 0.9695855379104614,grad_norm: 0.8211232596399605, iteration: 215580
loss: 0.9685975909233093,grad_norm: 0.9084341368905695, iteration: 215581
loss: 1.0235258340835571,grad_norm: 0.8845377934341697, iteration: 215582
loss: 1.0166242122650146,grad_norm: 0.9581870145418909, iteration: 215583
loss: 0.998751163482666,grad_norm: 0.9515897597658555, iteration: 215584
loss: 0.986980140209198,grad_norm: 0.9999990994273866, iteration: 215585
loss: 1.038411021232605,grad_norm: 0.8813934028977745, iteration: 215586
loss: 0.9772803783416748,grad_norm: 0.9999990495042924, iteration: 215587
loss: 0.9838960766792297,grad_norm: 0.7861372905114365, iteration: 215588
loss: 1.022006630897522,grad_norm: 0.9316057922278964, iteration: 215589
loss: 1.0138156414031982,grad_norm: 0.9999990503700236, iteration: 215590
loss: 0.9838886260986328,grad_norm: 0.9999989669972665, iteration: 215591
loss: 1.0137280225753784,grad_norm: 0.9999993025198747, iteration: 215592
loss: 0.9759436249732971,grad_norm: 0.9062221050875321, iteration: 215593
loss: 1.0022375583648682,grad_norm: 0.9999992607912188, iteration: 215594
loss: 1.0232419967651367,grad_norm: 0.9823977301066944, iteration: 215595
loss: 0.9582478404045105,grad_norm: 0.8172109269759786, iteration: 215596
loss: 0.9684963822364807,grad_norm: 0.9781160437431531, iteration: 215597
loss: 0.9807565212249756,grad_norm: 0.9765099276678083, iteration: 215598
loss: 0.9821667075157166,grad_norm: 0.9624512590469009, iteration: 215599
loss: 0.9554746747016907,grad_norm: 0.8956537993932977, iteration: 215600
loss: 0.9462358355522156,grad_norm: 0.9999989164382425, iteration: 215601
loss: 1.021313190460205,grad_norm: 0.9999991208285048, iteration: 215602
loss: 1.040932059288025,grad_norm: 0.9848293829103882, iteration: 215603
loss: 1.006657361984253,grad_norm: 0.9999990411919623, iteration: 215604
loss: 1.0132452249526978,grad_norm: 0.9999992571131822, iteration: 215605
loss: 0.9802284836769104,grad_norm: 0.9942865553739945, iteration: 215606
loss: 1.037076473236084,grad_norm: 0.9719947103988904, iteration: 215607
loss: 0.9972543120384216,grad_norm: 0.8873859927858633, iteration: 215608
loss: 1.0234860181808472,grad_norm: 0.8105005170018978, iteration: 215609
loss: 0.9550317525863647,grad_norm: 0.7764094491257315, iteration: 215610
loss: 0.9843327403068542,grad_norm: 0.9101488235241615, iteration: 215611
loss: 0.998811662197113,grad_norm: 0.7012362922347514, iteration: 215612
loss: 1.0067288875579834,grad_norm: 0.8991402907612791, iteration: 215613
loss: 1.0313035249710083,grad_norm: 0.9999991206691771, iteration: 215614
loss: 1.020902395248413,grad_norm: 0.8567352280362619, iteration: 215615
loss: 0.9866112470626831,grad_norm: 0.8343294606863537, iteration: 215616
loss: 1.0197477340698242,grad_norm: 0.7995670011472974, iteration: 215617
loss: 0.9532331228256226,grad_norm: 0.9240801354605437, iteration: 215618
loss: 0.9318467974662781,grad_norm: 0.9999992119623594, iteration: 215619
loss: 0.9972444176673889,grad_norm: 0.7278436880356198, iteration: 215620
loss: 0.9644826054573059,grad_norm: 0.8636491842234244, iteration: 215621
loss: 0.9811992049217224,grad_norm: 0.9999990913647671, iteration: 215622
loss: 0.9899707436561584,grad_norm: 0.999999310881697, iteration: 215623
loss: 0.9848470091819763,grad_norm: 0.8948253301896483, iteration: 215624
loss: 0.9662319421768188,grad_norm: 0.8969834403682321, iteration: 215625
loss: 0.9922534227371216,grad_norm: 0.8907916587184689, iteration: 215626
loss: 1.0030821561813354,grad_norm: 0.9458906893783202, iteration: 215627
loss: 0.9694183468818665,grad_norm: 0.9999990026181184, iteration: 215628
loss: 1.0146864652633667,grad_norm: 0.9434176695469554, iteration: 215629
loss: 0.9998385906219482,grad_norm: 0.8076799082689416, iteration: 215630
loss: 0.9831712245941162,grad_norm: 0.999999164590147, iteration: 215631
loss: 1.0263155698776245,grad_norm: 0.9054875707536076, iteration: 215632
loss: 0.9557839632034302,grad_norm: 0.7406317159203942, iteration: 215633
loss: 0.995435893535614,grad_norm: 0.8679620392408024, iteration: 215634
loss: 0.9814937114715576,grad_norm: 0.9233027600475825, iteration: 215635
loss: 0.9674677848815918,grad_norm: 0.8583403646316908, iteration: 215636
loss: 0.996740996837616,grad_norm: 0.9576325978729959, iteration: 215637
loss: 0.9783896207809448,grad_norm: 0.8501057509729983, iteration: 215638
loss: 0.9840789437294006,grad_norm: 0.8584622632620998, iteration: 215639
loss: 1.0142508745193481,grad_norm: 0.9999990574922613, iteration: 215640
loss: 0.9809112548828125,grad_norm: 0.797292143255292, iteration: 215641
loss: 1.003514289855957,grad_norm: 0.9675860980026071, iteration: 215642
loss: 1.001456618309021,grad_norm: 0.9906004316787658, iteration: 215643
loss: 0.9786209464073181,grad_norm: 0.9999991977989037, iteration: 215644
loss: 0.9738863110542297,grad_norm: 0.8884661489363757, iteration: 215645
loss: 0.9888430237770081,grad_norm: 0.9999991155513615, iteration: 215646
loss: 1.019145131111145,grad_norm: 0.9999994860821382, iteration: 215647
loss: 0.9863660335540771,grad_norm: 0.7690964766032204, iteration: 215648
loss: 1.025457739830017,grad_norm: 0.9999991065107434, iteration: 215649
loss: 0.9980168342590332,grad_norm: 0.8563365148175186, iteration: 215650
loss: 0.9999494552612305,grad_norm: 0.8057519443654645, iteration: 215651
loss: 1.0101804733276367,grad_norm: 0.8579743928478989, iteration: 215652
loss: 0.9942166209220886,grad_norm: 0.9509956124339172, iteration: 215653
loss: 1.0414259433746338,grad_norm: 0.9999996104769512, iteration: 215654
loss: 0.9963564276695251,grad_norm: 0.9999991696649295, iteration: 215655
loss: 0.9869454503059387,grad_norm: 0.9521548875908064, iteration: 215656
loss: 0.9911599159240723,grad_norm: 0.9999991946033061, iteration: 215657
loss: 1.1258653402328491,grad_norm: 0.9999998253238486, iteration: 215658
loss: 1.039231300354004,grad_norm: 0.9742298994350701, iteration: 215659
loss: 1.010796070098877,grad_norm: 0.9999993422475065, iteration: 215660
loss: 1.0162265300750732,grad_norm: 0.8168073413875305, iteration: 215661
loss: 0.9767776727676392,grad_norm: 0.9999990525338107, iteration: 215662
loss: 0.9916969537734985,grad_norm: 0.888280961650813, iteration: 215663
loss: 1.003748893737793,grad_norm: 0.9465190814171581, iteration: 215664
loss: 1.0030758380889893,grad_norm: 0.853319749042721, iteration: 215665
loss: 1.0291212797164917,grad_norm: 0.9999995410204755, iteration: 215666
loss: 1.0111112594604492,grad_norm: 0.9793760964021266, iteration: 215667
loss: 0.9783328175544739,grad_norm: 0.9389628243119813, iteration: 215668
loss: 1.031648874282837,grad_norm: 0.9554432576372363, iteration: 215669
loss: 1.0020843744277954,grad_norm: 0.887803603379974, iteration: 215670
loss: 0.9900161027908325,grad_norm: 0.9999991805174567, iteration: 215671
loss: 0.9998540878295898,grad_norm: 0.8586615631593285, iteration: 215672
loss: 1.031394600868225,grad_norm: 0.9999991001466326, iteration: 215673
loss: 0.9877748489379883,grad_norm: 0.876053142747291, iteration: 215674
loss: 1.0536249876022339,grad_norm: 0.9468271750078939, iteration: 215675
loss: 0.998350203037262,grad_norm: 0.9999989976440805, iteration: 215676
loss: 1.0078269243240356,grad_norm: 0.9572553731100354, iteration: 215677
loss: 1.064590334892273,grad_norm: 1.0000001138413448, iteration: 215678
loss: 1.0059252977371216,grad_norm: 0.9999989022228282, iteration: 215679
loss: 1.0055028200149536,grad_norm: 0.9184884874775873, iteration: 215680
loss: 1.0029085874557495,grad_norm: 0.8530606558823682, iteration: 215681
loss: 1.0105540752410889,grad_norm: 0.8055663124054904, iteration: 215682
loss: 1.0204567909240723,grad_norm: 0.8432691393522722, iteration: 215683
loss: 1.0153396129608154,grad_norm: 0.9999997214009372, iteration: 215684
loss: 1.0258427858352661,grad_norm: 0.8588784583721742, iteration: 215685
loss: 1.0638257265090942,grad_norm: 0.9999999923105652, iteration: 215686
loss: 1.030720829963684,grad_norm: 0.9265320600170796, iteration: 215687
loss: 1.161165475845337,grad_norm: 0.9999996482332868, iteration: 215688
loss: 1.0949279069900513,grad_norm: 0.9999997569341632, iteration: 215689
loss: 1.0252145528793335,grad_norm: 0.8228787166734048, iteration: 215690
loss: 1.0528552532196045,grad_norm: 0.9999991357062936, iteration: 215691
loss: 0.9759950637817383,grad_norm: 0.9347495427274218, iteration: 215692
loss: 0.9984622597694397,grad_norm: 0.8563303008214783, iteration: 215693
loss: 1.0021311044692993,grad_norm: 0.9177344071162074, iteration: 215694
loss: 1.0672460794448853,grad_norm: 0.9393303023472521, iteration: 215695
loss: 1.0662015676498413,grad_norm: 0.8538730120660883, iteration: 215696
loss: 1.006340503692627,grad_norm: 0.9394269233832534, iteration: 215697
loss: 1.0058938264846802,grad_norm: 0.8454404492125714, iteration: 215698
loss: 1.0454246997833252,grad_norm: 0.9007417792095354, iteration: 215699
loss: 0.9943148493766785,grad_norm: 0.9003318848163105, iteration: 215700
loss: 0.9974196553230286,grad_norm: 0.8537311098678989, iteration: 215701
loss: 1.0160812139511108,grad_norm: 0.9999991747824982, iteration: 215702
loss: 0.9905226230621338,grad_norm: 0.9330501143526746, iteration: 215703
loss: 0.9872536063194275,grad_norm: 0.9999992246758488, iteration: 215704
loss: 1.022482991218567,grad_norm: 0.9999995334579312, iteration: 215705
loss: 0.9955204725265503,grad_norm: 0.8532311009720706, iteration: 215706
loss: 1.0588465929031372,grad_norm: 0.9223690649014842, iteration: 215707
loss: 0.9782240986824036,grad_norm: 0.951531692287955, iteration: 215708
loss: 0.9898600578308105,grad_norm: 0.9999992641561865, iteration: 215709
loss: 0.9846317172050476,grad_norm: 0.9892964578872677, iteration: 215710
loss: 0.9919549822807312,grad_norm: 0.889015170674347, iteration: 215711
loss: 0.9971665143966675,grad_norm: 0.969956090188123, iteration: 215712
loss: 1.012156367301941,grad_norm: 0.9945724043935614, iteration: 215713
loss: 1.008242130279541,grad_norm: 0.9999992450409748, iteration: 215714
loss: 1.0053088665008545,grad_norm: 0.9999990614391709, iteration: 215715
loss: 1.0244392156600952,grad_norm: 0.999999132826965, iteration: 215716
loss: 0.9606000781059265,grad_norm: 0.9683571455563664, iteration: 215717
loss: 1.000277042388916,grad_norm: 0.9005698193446396, iteration: 215718
loss: 1.0191372632980347,grad_norm: 0.919365660982122, iteration: 215719
loss: 0.9897345304489136,grad_norm: 0.825224772476273, iteration: 215720
loss: 1.0137873888015747,grad_norm: 0.9294816440186222, iteration: 215721
loss: 1.0002306699752808,grad_norm: 0.8438287973116779, iteration: 215722
loss: 1.0727111101150513,grad_norm: 0.9233946720050267, iteration: 215723
loss: 1.011532187461853,grad_norm: 0.8910933648007986, iteration: 215724
loss: 1.0046229362487793,grad_norm: 0.9999990275385569, iteration: 215725
loss: 0.97312331199646,grad_norm: 0.7828463058930193, iteration: 215726
loss: 1.009106159210205,grad_norm: 0.9445453025399875, iteration: 215727
loss: 1.015784740447998,grad_norm: 0.9999989759072371, iteration: 215728
loss: 1.038631796836853,grad_norm: 0.9009726969320714, iteration: 215729
loss: 1.0244662761688232,grad_norm: 0.9999990697541926, iteration: 215730
loss: 0.9618932604789734,grad_norm: 0.9782905042986656, iteration: 215731
loss: 1.003792405128479,grad_norm: 0.9999993083443035, iteration: 215732
loss: 1.011409878730774,grad_norm: 0.9999998424741959, iteration: 215733
loss: 0.9828740954399109,grad_norm: 0.803690574309846, iteration: 215734
loss: 0.9797948002815247,grad_norm: 0.9262101769356469, iteration: 215735
loss: 1.016117811203003,grad_norm: 0.8767985881773838, iteration: 215736
loss: 0.9808657765388489,grad_norm: 0.9869016329624317, iteration: 215737
loss: 1.0017461776733398,grad_norm: 0.9702193207471607, iteration: 215738
loss: 1.0417205095291138,grad_norm: 0.8915975622453035, iteration: 215739
loss: 1.0057929754257202,grad_norm: 0.9999992565808236, iteration: 215740
loss: 0.9945448637008667,grad_norm: 0.907125013444218, iteration: 215741
loss: 1.0063828229904175,grad_norm: 0.8894392422890429, iteration: 215742
loss: 1.039360761642456,grad_norm: 0.9503060297887497, iteration: 215743
loss: 0.985393226146698,grad_norm: 0.9632748909965523, iteration: 215744
loss: 0.991209089756012,grad_norm: 0.8724618950282838, iteration: 215745
loss: 1.0231190919876099,grad_norm: 0.9999991331316817, iteration: 215746
loss: 1.0124982595443726,grad_norm: 0.7967543526009383, iteration: 215747
loss: 1.001009464263916,grad_norm: 0.9423851991368746, iteration: 215748
loss: 0.9783596992492676,grad_norm: 0.8098822089769392, iteration: 215749
loss: 1.007216453552246,grad_norm: 0.9566793685978261, iteration: 215750
loss: 1.004563570022583,grad_norm: 0.7638862224024121, iteration: 215751
loss: 0.985717236995697,grad_norm: 0.999999090912553, iteration: 215752
loss: 1.0186039209365845,grad_norm: 0.8705937116980808, iteration: 215753
loss: 1.0361526012420654,grad_norm: 0.9999997881371329, iteration: 215754
loss: 0.9976886510848999,grad_norm: 0.8431664836818583, iteration: 215755
loss: 0.9978081583976746,grad_norm: 0.9999990712410323, iteration: 215756
loss: 1.0184670686721802,grad_norm: 0.9609035662684942, iteration: 215757
loss: 0.9628788232803345,grad_norm: 0.805947463720236, iteration: 215758
loss: 1.0001890659332275,grad_norm: 0.8470993014189439, iteration: 215759
loss: 1.005948781967163,grad_norm: 0.9089122557297447, iteration: 215760
loss: 1.0005559921264648,grad_norm: 0.9999991868913465, iteration: 215761
loss: 0.9842259883880615,grad_norm: 0.8910139773031545, iteration: 215762
loss: 1.0522270202636719,grad_norm: 0.9999994481077975, iteration: 215763
loss: 1.027100920677185,grad_norm: 0.7215762007128419, iteration: 215764
loss: 0.9815819263458252,grad_norm: 0.9445896553406452, iteration: 215765
loss: 1.008415699005127,grad_norm: 0.9999991611576736, iteration: 215766
loss: 1.0414139032363892,grad_norm: 0.9999991951447617, iteration: 215767
loss: 0.9969568848609924,grad_norm: 0.9999989462130269, iteration: 215768
loss: 0.9919888377189636,grad_norm: 0.8660352830931377, iteration: 215769
loss: 0.9934212565422058,grad_norm: 0.9999991018339692, iteration: 215770
loss: 0.9870379567146301,grad_norm: 0.9999991229732214, iteration: 215771
loss: 1.008764624595642,grad_norm: 0.9999993945187315, iteration: 215772
loss: 0.9653946161270142,grad_norm: 0.819759127214835, iteration: 215773
loss: 0.9895235300064087,grad_norm: 0.7906236434993503, iteration: 215774
loss: 1.025078296661377,grad_norm: 0.9999991170942512, iteration: 215775
loss: 0.9675548672676086,grad_norm: 0.8012330507708351, iteration: 215776
loss: 1.0136289596557617,grad_norm: 0.9758915518178284, iteration: 215777
loss: 1.0105749368667603,grad_norm: 0.9999990747035978, iteration: 215778
loss: 0.9786930084228516,grad_norm: 0.8866365974204158, iteration: 215779
loss: 1.0211763381958008,grad_norm: 0.9158827599379281, iteration: 215780
loss: 1.0120986700057983,grad_norm: 0.9999990276807316, iteration: 215781
loss: 0.9999857544898987,grad_norm: 0.922809097609555, iteration: 215782
loss: 0.976995050907135,grad_norm: 0.9999991703933355, iteration: 215783
loss: 1.0265791416168213,grad_norm: 0.8368854787631611, iteration: 215784
loss: 0.9830424189567566,grad_norm: 0.9987606147830219, iteration: 215785
loss: 0.999004602432251,grad_norm: 0.8190920118712154, iteration: 215786
loss: 1.005299687385559,grad_norm: 0.7657217726732575, iteration: 215787
loss: 1.0418994426727295,grad_norm: 0.9306421808537964, iteration: 215788
loss: 1.0048778057098389,grad_norm: 0.8288746311377846, iteration: 215789
loss: 1.0021343231201172,grad_norm: 0.833454223602192, iteration: 215790
loss: 0.9924414157867432,grad_norm: 0.9539861548120161, iteration: 215791
loss: 1.0286946296691895,grad_norm: 0.9226029886130056, iteration: 215792
loss: 0.9790335893630981,grad_norm: 0.8601418708269473, iteration: 215793
loss: 0.9984534978866577,grad_norm: 0.7783770564758503, iteration: 215794
loss: 1.0002663135528564,grad_norm: 0.9432178365419809, iteration: 215795
loss: 1.002257227897644,grad_norm: 0.8514795302339535, iteration: 215796
loss: 0.9845314621925354,grad_norm: 0.8611326181456764, iteration: 215797
loss: 1.035275936126709,grad_norm: 0.9770928446822148, iteration: 215798
loss: 1.0002455711364746,grad_norm: 0.8973366188905019, iteration: 215799
loss: 1.0050314664840698,grad_norm: 0.826639240727934, iteration: 215800
loss: 1.0018616914749146,grad_norm: 0.9219570184072732, iteration: 215801
loss: 1.0838655233383179,grad_norm: 0.9583900128171148, iteration: 215802
loss: 1.0115225315093994,grad_norm: 0.9999995075704079, iteration: 215803
loss: 1.0377142429351807,grad_norm: 0.8638745270341202, iteration: 215804
loss: 1.0030156373977661,grad_norm: 0.999999149124035, iteration: 215805
loss: 1.0054805278778076,grad_norm: 0.9105886132478225, iteration: 215806
loss: 0.9838839769363403,grad_norm: 0.9479237846287356, iteration: 215807
loss: 1.0145076513290405,grad_norm: 0.8997207612283745, iteration: 215808
loss: 1.042843222618103,grad_norm: 0.9999993132115206, iteration: 215809
loss: 1.1136971712112427,grad_norm: 0.9999992270796755, iteration: 215810
loss: 1.000089168548584,grad_norm: 0.818360593298553, iteration: 215811
loss: 0.9905643463134766,grad_norm: 0.8718235177388919, iteration: 215812
loss: 1.0012375116348267,grad_norm: 0.9094835549501215, iteration: 215813
loss: 1.0703935623168945,grad_norm: 0.999999107752533, iteration: 215814
loss: 1.0275964736938477,grad_norm: 0.9410159873828756, iteration: 215815
loss: 1.0242207050323486,grad_norm: 0.9514515888118033, iteration: 215816
loss: 0.9800639748573303,grad_norm: 0.8594269142331207, iteration: 215817
loss: 1.0113004446029663,grad_norm: 0.7924839836019633, iteration: 215818
loss: 0.9657040238380432,grad_norm: 0.8823315273122139, iteration: 215819
loss: 0.9914103150367737,grad_norm: 0.8898347391303489, iteration: 215820
loss: 1.004581093788147,grad_norm: 0.8725220302588713, iteration: 215821
loss: 1.0181487798690796,grad_norm: 0.9999992586924226, iteration: 215822
loss: 0.9982926249504089,grad_norm: 0.9999991100555861, iteration: 215823
loss: 0.9747682809829712,grad_norm: 0.7927368886171181, iteration: 215824
loss: 0.9702230095863342,grad_norm: 0.9999992484389933, iteration: 215825
loss: 1.009610891342163,grad_norm: 0.8199516311038528, iteration: 215826
loss: 0.9704566597938538,grad_norm: 0.8385306592319464, iteration: 215827
loss: 0.9949033856391907,grad_norm: 0.999998984490935, iteration: 215828
loss: 1.0343995094299316,grad_norm: 0.9999991271806781, iteration: 215829
loss: 0.9810633659362793,grad_norm: 0.824553651769936, iteration: 215830
loss: 0.9910326600074768,grad_norm: 0.9390146087681128, iteration: 215831
loss: 0.996189296245575,grad_norm: 0.7960607037620981, iteration: 215832
loss: 1.0030122995376587,grad_norm: 0.934820360294564, iteration: 215833
loss: 0.9719353318214417,grad_norm: 0.9290978314551303, iteration: 215834
loss: 0.9844085574150085,grad_norm: 0.9999991829620022, iteration: 215835
loss: 0.9808456897735596,grad_norm: 0.8932518668091187, iteration: 215836
loss: 1.0029985904693604,grad_norm: 0.8481928559412949, iteration: 215837
loss: 0.9565958380699158,grad_norm: 0.8584812609369712, iteration: 215838
loss: 0.9923288226127625,grad_norm: 0.8864378693535386, iteration: 215839
loss: 0.9948436617851257,grad_norm: 0.8853945138192519, iteration: 215840
loss: 1.0209537744522095,grad_norm: 0.9999998633513497, iteration: 215841
loss: 0.9819018840789795,grad_norm: 0.8182234839782808, iteration: 215842
loss: 1.0400991439819336,grad_norm: 0.9394003257799832, iteration: 215843
loss: 0.9691628813743591,grad_norm: 0.9235450716616664, iteration: 215844
loss: 1.0076173543930054,grad_norm: 0.9999990465396161, iteration: 215845
loss: 0.9824626445770264,grad_norm: 0.9999991363721381, iteration: 215846
loss: 1.0039008855819702,grad_norm: 0.9999996010433069, iteration: 215847
loss: 0.9821088910102844,grad_norm: 0.9999990719546067, iteration: 215848
loss: 0.9530311226844788,grad_norm: 0.9131546243083746, iteration: 215849
loss: 1.0059105157852173,grad_norm: 0.7969835868035876, iteration: 215850
loss: 0.9615684747695923,grad_norm: 0.9884733212552106, iteration: 215851
loss: 1.0077964067459106,grad_norm: 0.9965102091207364, iteration: 215852
loss: 1.0242701768875122,grad_norm: 0.9999991208980512, iteration: 215853
loss: 1.0142526626586914,grad_norm: 0.9175394167635785, iteration: 215854
loss: 1.0283899307250977,grad_norm: 0.9787217091013171, iteration: 215855
loss: 1.0445023775100708,grad_norm: 0.8693521529794855, iteration: 215856
loss: 0.9801642298698425,grad_norm: 0.9086454191335851, iteration: 215857
loss: 1.0016605854034424,grad_norm: 0.8761716253336655, iteration: 215858
loss: 1.0094953775405884,grad_norm: 0.9999994347001092, iteration: 215859
loss: 0.9741154909133911,grad_norm: 0.8688425339224313, iteration: 215860
loss: 0.9986015558242798,grad_norm: 0.999999065952073, iteration: 215861
loss: 0.9667872190475464,grad_norm: 0.9719774141056341, iteration: 215862
loss: 0.9572170376777649,grad_norm: 0.9999991168307633, iteration: 215863
loss: 1.0123851299285889,grad_norm: 0.9999991060771491, iteration: 215864
loss: 0.9704588651657104,grad_norm: 0.9577322915335789, iteration: 215865
loss: 0.9892175197601318,grad_norm: 0.861647243820068, iteration: 215866
loss: 0.9848048686981201,grad_norm: 0.9332407796401935, iteration: 215867
loss: 1.014702320098877,grad_norm: 0.9999995198776543, iteration: 215868
loss: 0.9890663623809814,grad_norm: 0.974417707974429, iteration: 215869
loss: 0.9585881233215332,grad_norm: 0.8912286290521939, iteration: 215870
loss: 0.9836087822914124,grad_norm: 0.8399388287780527, iteration: 215871
loss: 1.0322281122207642,grad_norm: 0.9999992165017925, iteration: 215872
loss: 1.0172420740127563,grad_norm: 0.81148826975231, iteration: 215873
loss: 0.9744885563850403,grad_norm: 0.9422338404547331, iteration: 215874
loss: 1.0188989639282227,grad_norm: 0.9545894338522944, iteration: 215875
loss: 1.0015524625778198,grad_norm: 0.8755671611785346, iteration: 215876
loss: 1.0072786808013916,grad_norm: 0.9556077289754517, iteration: 215877
loss: 1.0326173305511475,grad_norm: 0.9890544609661763, iteration: 215878
loss: 0.9740424156188965,grad_norm: 0.8638508439890377, iteration: 215879
loss: 1.0281100273132324,grad_norm: 0.999999012515444, iteration: 215880
loss: 1.0252394676208496,grad_norm: 0.9999991727348, iteration: 215881
loss: 0.9752368330955505,grad_norm: 0.9827470270721185, iteration: 215882
loss: 1.0320522785186768,grad_norm: 0.9999998677828383, iteration: 215883
loss: 1.0017565488815308,grad_norm: 0.8920771457190708, iteration: 215884
loss: 1.0209729671478271,grad_norm: 0.9999991595677957, iteration: 215885
loss: 0.99899822473526,grad_norm: 0.9999990834280104, iteration: 215886
loss: 1.0119582414627075,grad_norm: 0.9425022351645773, iteration: 215887
loss: 0.9901513457298279,grad_norm: 0.9776783284338171, iteration: 215888
loss: 1.0037457942962646,grad_norm: 0.85474187408452, iteration: 215889
loss: 0.984493613243103,grad_norm: 0.999999087558511, iteration: 215890
loss: 0.9974672198295593,grad_norm: 0.8476309176298105, iteration: 215891
loss: 1.0055207014083862,grad_norm: 0.9584033796452374, iteration: 215892
loss: 0.9651452898979187,grad_norm: 0.9347529324894852, iteration: 215893
loss: 1.003017783164978,grad_norm: 0.9241979871025547, iteration: 215894
loss: 1.0277451276779175,grad_norm: 0.999999376905749, iteration: 215895
loss: 0.9865212440490723,grad_norm: 0.8099177296764555, iteration: 215896
loss: 1.0409966707229614,grad_norm: 0.9832080394881929, iteration: 215897
loss: 0.9900884032249451,grad_norm: 0.8937401316585547, iteration: 215898
loss: 0.9635304808616638,grad_norm: 0.8315243672785666, iteration: 215899
loss: 0.9809084534645081,grad_norm: 0.9999990556080505, iteration: 215900
loss: 1.0210227966308594,grad_norm: 0.8286811597529259, iteration: 215901
loss: 1.0409984588623047,grad_norm: 0.8777439291798776, iteration: 215902
loss: 0.9678823947906494,grad_norm: 0.9940014477060121, iteration: 215903
loss: 1.0202999114990234,grad_norm: 0.8968980654701247, iteration: 215904
loss: 0.997830867767334,grad_norm: 0.9999989973861609, iteration: 215905
loss: 0.9974404573440552,grad_norm: 0.9633013879052934, iteration: 215906
loss: 0.9643569588661194,grad_norm: 0.8926664471198559, iteration: 215907
loss: 1.0174651145935059,grad_norm: 0.8269478010050778, iteration: 215908
loss: 1.040105938911438,grad_norm: 0.865734155505199, iteration: 215909
loss: 1.0164895057678223,grad_norm: 0.8457108192817521, iteration: 215910
loss: 1.0214076042175293,grad_norm: 0.9908547847695887, iteration: 215911
loss: 1.0151842832565308,grad_norm: 0.7937304972564608, iteration: 215912
loss: 0.9838570952415466,grad_norm: 0.9999991334933395, iteration: 215913
loss: 0.9922789931297302,grad_norm: 0.9999991101581083, iteration: 215914
loss: 1.026499629020691,grad_norm: 0.9714575223874868, iteration: 215915
loss: 0.9941325783729553,grad_norm: 0.8698447017698815, iteration: 215916
loss: 1.0059815645217896,grad_norm: 0.9985121373316215, iteration: 215917
loss: 0.9499391913414001,grad_norm: 0.9655271919140241, iteration: 215918
loss: 1.0246411561965942,grad_norm: 0.917352642693422, iteration: 215919
loss: 1.0101968050003052,grad_norm: 0.8617827741966043, iteration: 215920
loss: 0.9891780614852905,grad_norm: 0.7371426612651015, iteration: 215921
loss: 1.0304994583129883,grad_norm: 0.9092590092129654, iteration: 215922
loss: 1.0184459686279297,grad_norm: 0.8602048789469671, iteration: 215923
loss: 1.0009511709213257,grad_norm: 0.9639812096961441, iteration: 215924
loss: 0.9831079244613647,grad_norm: 0.9078056943256713, iteration: 215925
loss: 0.9967354536056519,grad_norm: 0.8238824702568012, iteration: 215926
loss: 0.9922462701797485,grad_norm: 0.9735740566149864, iteration: 215927
loss: 1.0024023056030273,grad_norm: 0.999999031221486, iteration: 215928
loss: 0.9789230227470398,grad_norm: 0.9999991016322003, iteration: 215929
loss: 1.0211076736450195,grad_norm: 0.8359855508800101, iteration: 215930
loss: 0.9866740703582764,grad_norm: 0.8455762113545484, iteration: 215931
loss: 1.0132701396942139,grad_norm: 0.9113081422758177, iteration: 215932
loss: 1.0132665634155273,grad_norm: 0.9587552949104509, iteration: 215933
loss: 1.0062916278839111,grad_norm: 0.7400800744631484, iteration: 215934
loss: 1.0022391080856323,grad_norm: 0.8184688401580056, iteration: 215935
loss: 0.9902668595314026,grad_norm: 0.7953225514177541, iteration: 215936
loss: 1.031131386756897,grad_norm: 0.8417855282641153, iteration: 215937
loss: 0.9902971386909485,grad_norm: 0.9481236199219841, iteration: 215938
loss: 1.0194307565689087,grad_norm: 0.99999902759304, iteration: 215939
loss: 0.9933580160140991,grad_norm: 0.7860634838090169, iteration: 215940
loss: 1.0653046369552612,grad_norm: 0.8546544967474267, iteration: 215941
loss: 1.0041534900665283,grad_norm: 0.8937081713827582, iteration: 215942
loss: 0.9994369149208069,grad_norm: 0.8064139861963412, iteration: 215943
loss: 0.9989523887634277,grad_norm: 0.9999990517544572, iteration: 215944
loss: 1.0011273622512817,grad_norm: 0.9999994662835107, iteration: 215945
loss: 0.990521252155304,grad_norm: 0.9999990752961279, iteration: 215946
loss: 1.0027832984924316,grad_norm: 0.9998835751231151, iteration: 215947
loss: 0.9853302836418152,grad_norm: 0.956759938317193, iteration: 215948
loss: 0.9766814112663269,grad_norm: 0.921620128154205, iteration: 215949
loss: 1.0415294170379639,grad_norm: 0.8171037987418227, iteration: 215950
loss: 1.0159944295883179,grad_norm: 0.8945793614647641, iteration: 215951
loss: 1.0198707580566406,grad_norm: 0.7963479484520359, iteration: 215952
loss: 0.9897691607475281,grad_norm: 0.97557016133795, iteration: 215953
loss: 1.0067886114120483,grad_norm: 0.802546184295827, iteration: 215954
loss: 0.9802004098892212,grad_norm: 0.9999989437033453, iteration: 215955
loss: 1.0236473083496094,grad_norm: 0.9618776780674322, iteration: 215956
loss: 1.0619345903396606,grad_norm: 0.8509251192729016, iteration: 215957
loss: 1.0170148611068726,grad_norm: 0.9999990578517371, iteration: 215958
loss: 1.0014886856079102,grad_norm: 0.9999991604072286, iteration: 215959
loss: 0.9893147945404053,grad_norm: 0.9999990829090405, iteration: 215960
loss: 1.013994574546814,grad_norm: 0.7924762933425387, iteration: 215961
loss: 0.9566933512687683,grad_norm: 0.9999995960942, iteration: 215962
loss: 1.0016237497329712,grad_norm: 0.9816844191500532, iteration: 215963
loss: 1.0003764629364014,grad_norm: 0.9370814136036753, iteration: 215964
loss: 1.0181809663772583,grad_norm: 0.9999990568684721, iteration: 215965
loss: 1.0076621770858765,grad_norm: 0.9999992029225967, iteration: 215966
loss: 1.0319626331329346,grad_norm: 0.999999665567445, iteration: 215967
loss: 0.997368335723877,grad_norm: 0.9625555729232983, iteration: 215968
loss: 0.9844703674316406,grad_norm: 0.8198534881626669, iteration: 215969
loss: 0.9958996772766113,grad_norm: 0.94073139004884, iteration: 215970
loss: 1.0135432481765747,grad_norm: 0.9904070401015684, iteration: 215971
loss: 1.0423673391342163,grad_norm: 0.9999996206595522, iteration: 215972
loss: 1.0990427732467651,grad_norm: 0.9999996962436674, iteration: 215973
loss: 0.9959442019462585,grad_norm: 0.8998255535020517, iteration: 215974
loss: 1.0102688074111938,grad_norm: 0.9907288885405231, iteration: 215975
loss: 1.01706862449646,grad_norm: 0.9425641198356955, iteration: 215976
loss: 0.9785799980163574,grad_norm: 0.9560805723574533, iteration: 215977
loss: 0.993832528591156,grad_norm: 0.8100079254019049, iteration: 215978
loss: 0.9945907592773438,grad_norm: 0.9236683430840609, iteration: 215979
loss: 0.9958246946334839,grad_norm: 0.9999991457458394, iteration: 215980
loss: 0.9693163633346558,grad_norm: 0.9195678525170466, iteration: 215981
loss: 0.9971439838409424,grad_norm: 0.9999990872836614, iteration: 215982
loss: 1.0153840780258179,grad_norm: 0.8798448819100643, iteration: 215983
loss: 1.0070571899414062,grad_norm: 0.9999992145946551, iteration: 215984
loss: 1.0904208421707153,grad_norm: 0.9999998168217066, iteration: 215985
loss: 1.0016003847122192,grad_norm: 0.9999992036354436, iteration: 215986
loss: 1.0079139471054077,grad_norm: 0.7901363827854451, iteration: 215987
loss: 1.0346368551254272,grad_norm: 0.955288898767762, iteration: 215988
loss: 1.0160728693008423,grad_norm: 0.8888916201040786, iteration: 215989
loss: 1.0066615343093872,grad_norm: 0.8938401180250751, iteration: 215990
loss: 1.0611499547958374,grad_norm: 0.9999994494838501, iteration: 215991
loss: 0.9882047176361084,grad_norm: 0.89918115849016, iteration: 215992
loss: 1.0107084512710571,grad_norm: 0.9941805541864948, iteration: 215993
loss: 1.005804181098938,grad_norm: 0.9999998303891309, iteration: 215994
loss: 0.9925305843353271,grad_norm: 0.7713273585422546, iteration: 215995
loss: 1.0022799968719482,grad_norm: 0.8684151276930498, iteration: 215996
loss: 0.9931914210319519,grad_norm: 0.9795001719969593, iteration: 215997
loss: 1.0185725688934326,grad_norm: 0.8828360477812418, iteration: 215998
loss: 0.948919415473938,grad_norm: 0.7713624011872904, iteration: 215999
loss: 0.9937006831169128,grad_norm: 0.8726764467212088, iteration: 216000
loss: 0.9984641671180725,grad_norm: 0.8558664910608444, iteration: 216001
loss: 1.0189971923828125,grad_norm: 0.8937597792390449, iteration: 216002
loss: 1.0799901485443115,grad_norm: 0.999999345985502, iteration: 216003
loss: 0.9596767425537109,grad_norm: 0.8895626012247658, iteration: 216004
loss: 0.9947960376739502,grad_norm: 0.8552981632431185, iteration: 216005
loss: 0.9834362268447876,grad_norm: 0.9999991521650415, iteration: 216006
loss: 0.9838248491287231,grad_norm: 0.9240819114331796, iteration: 216007
loss: 1.037554144859314,grad_norm: 0.8704816571391089, iteration: 216008
loss: 0.99174964427948,grad_norm: 0.999998991238972, iteration: 216009
loss: 1.059930443763733,grad_norm: 0.8848290124702711, iteration: 216010
loss: 1.050285816192627,grad_norm: 0.999999101532399, iteration: 216011
loss: 1.0116232633590698,grad_norm: 0.8465470709187283, iteration: 216012
loss: 1.0396156311035156,grad_norm: 0.9999991132972076, iteration: 216013
loss: 1.0139926671981812,grad_norm: 0.8741494543208456, iteration: 216014
loss: 1.15939199924469,grad_norm: 0.9999997102497783, iteration: 216015
loss: 1.0147325992584229,grad_norm: 0.9999991844640531, iteration: 216016
loss: 0.9588170051574707,grad_norm: 0.970658211380385, iteration: 216017
loss: 0.9949225187301636,grad_norm: 0.8190373731956461, iteration: 216018
loss: 0.9649425745010376,grad_norm: 0.9445115149605099, iteration: 216019
loss: 1.0135481357574463,grad_norm: 0.9999990143156327, iteration: 216020
loss: 0.9910719394683838,grad_norm: 0.8487502429356407, iteration: 216021
loss: 1.0182024240493774,grad_norm: 0.9999990527046142, iteration: 216022
loss: 0.9600530862808228,grad_norm: 0.8706828762665237, iteration: 216023
loss: 0.9915191531181335,grad_norm: 0.8766499740914279, iteration: 216024
loss: 0.9851760864257812,grad_norm: 0.9339849918084485, iteration: 216025
loss: 1.0113557577133179,grad_norm: 0.9022471676879871, iteration: 216026
loss: 1.0034804344177246,grad_norm: 0.8570963929279317, iteration: 216027
loss: 1.0272160768508911,grad_norm: 0.8351861912384514, iteration: 216028
loss: 0.9920278191566467,grad_norm: 0.999999361770315, iteration: 216029
loss: 0.9620388150215149,grad_norm: 0.9999992209008026, iteration: 216030
loss: 0.9805828332901001,grad_norm: 0.9999991622540613, iteration: 216031
loss: 1.0075290203094482,grad_norm: 0.999999013708737, iteration: 216032
loss: 1.0012836456298828,grad_norm: 0.9999992171918114, iteration: 216033
loss: 0.9667696952819824,grad_norm: 0.7593377619153201, iteration: 216034
loss: 1.0065083503723145,grad_norm: 0.9999990360321934, iteration: 216035
loss: 1.014747142791748,grad_norm: 0.8070297772281741, iteration: 216036
loss: 0.983964204788208,grad_norm: 0.9999990446408417, iteration: 216037
loss: 0.9721745252609253,grad_norm: 0.7582640971611859, iteration: 216038
loss: 0.973792314529419,grad_norm: 0.9417182036097701, iteration: 216039
loss: 1.049755334854126,grad_norm: 0.9999993696737771, iteration: 216040
loss: 0.9540311694145203,grad_norm: 0.856317927101627, iteration: 216041
loss: 1.009356141090393,grad_norm: 0.9107273998472403, iteration: 216042
loss: 0.9886242747306824,grad_norm: 0.8551393146598187, iteration: 216043
loss: 1.0348873138427734,grad_norm: 0.9999990646504219, iteration: 216044
loss: 0.996850311756134,grad_norm: 0.8259592105255024, iteration: 216045
loss: 1.0314863920211792,grad_norm: 0.9999990436030307, iteration: 216046
loss: 0.9312012195587158,grad_norm: 0.9949226376409833, iteration: 216047
loss: 1.03079354763031,grad_norm: 0.9999995955869783, iteration: 216048
loss: 0.9976950883865356,grad_norm: 0.9867572297777626, iteration: 216049
loss: 0.9953394532203674,grad_norm: 0.963053262456479, iteration: 216050
loss: 0.9909098744392395,grad_norm: 0.9216716354320598, iteration: 216051
loss: 0.9843057990074158,grad_norm: 0.7698164059769983, iteration: 216052
loss: 0.9634288549423218,grad_norm: 0.999999163224855, iteration: 216053
loss: 0.9654707908630371,grad_norm: 0.8793266728588685, iteration: 216054
loss: 1.0247218608856201,grad_norm: 0.860905635638525, iteration: 216055
loss: 1.0225178003311157,grad_norm: 0.9999991683727059, iteration: 216056
loss: 1.0230239629745483,grad_norm: 0.9999991394837062, iteration: 216057
loss: 0.9951601624488831,grad_norm: 0.9587356250814462, iteration: 216058
loss: 0.9945793747901917,grad_norm: 0.8278593357535148, iteration: 216059
loss: 0.9927254915237427,grad_norm: 0.8127916823011817, iteration: 216060
loss: 0.9971548914909363,grad_norm: 0.8955545260949039, iteration: 216061
loss: 0.9906808733940125,grad_norm: 0.7706476990448385, iteration: 216062
loss: 0.9754360914230347,grad_norm: 0.8093791854053421, iteration: 216063
loss: 1.102191686630249,grad_norm: 0.9999997887875216, iteration: 216064
loss: 1.0039482116699219,grad_norm: 0.9358246274693933, iteration: 216065
loss: 0.9876503944396973,grad_norm: 0.9999992030761975, iteration: 216066
loss: 1.0146392583847046,grad_norm: 0.9999999522195914, iteration: 216067
loss: 1.043653130531311,grad_norm: 0.9999991902627461, iteration: 216068
loss: 1.0113945007324219,grad_norm: 0.8878754919202925, iteration: 216069
loss: 1.0411663055419922,grad_norm: 0.8744508825817606, iteration: 216070
loss: 1.0175151824951172,grad_norm: 0.9761751718802381, iteration: 216071
loss: 0.990049421787262,grad_norm: 0.9230499821696398, iteration: 216072
loss: 0.9892497658729553,grad_norm: 0.9999990667831672, iteration: 216073
loss: 1.0136643648147583,grad_norm: 0.9999991737338231, iteration: 216074
loss: 0.9945995211601257,grad_norm: 0.8999605565325888, iteration: 216075
loss: 0.9737091064453125,grad_norm: 0.96420666986175, iteration: 216076
loss: 1.0250457525253296,grad_norm: 0.9614348330657275, iteration: 216077
loss: 1.0089201927185059,grad_norm: 0.8572390016338344, iteration: 216078
loss: 0.9928501844406128,grad_norm: 0.7570523197852228, iteration: 216079
loss: 1.0000306367874146,grad_norm: 0.9572104728140621, iteration: 216080
loss: 1.027629017829895,grad_norm: 0.8615267401495456, iteration: 216081
loss: 1.0330642461776733,grad_norm: 0.9999992367231879, iteration: 216082
loss: 1.014624834060669,grad_norm: 0.9202441100182238, iteration: 216083
loss: 0.956279993057251,grad_norm: 0.9742456269215349, iteration: 216084
loss: 1.045446515083313,grad_norm: 0.9748810874614133, iteration: 216085
loss: 0.9833780527114868,grad_norm: 0.9428604557806133, iteration: 216086
loss: 0.9918727874755859,grad_norm: 0.9999990955592576, iteration: 216087
loss: 1.0059502124786377,grad_norm: 0.9182109486441881, iteration: 216088
loss: 1.0024476051330566,grad_norm: 0.8279807108743045, iteration: 216089
loss: 0.9994178414344788,grad_norm: 0.8982864762340945, iteration: 216090
loss: 0.9998970031738281,grad_norm: 0.9677254434875338, iteration: 216091
loss: 1.0096460580825806,grad_norm: 0.9216859017230019, iteration: 216092
loss: 0.983580470085144,grad_norm: 0.8510041919895128, iteration: 216093
loss: 1.0159199237823486,grad_norm: 0.9993639208584406, iteration: 216094
loss: 0.9980859160423279,grad_norm: 0.9999991490630872, iteration: 216095
loss: 1.0063565969467163,grad_norm: 0.9999989465784485, iteration: 216096
loss: 0.9845839738845825,grad_norm: 0.9999991449579646, iteration: 216097
loss: 0.9766719937324524,grad_norm: 0.9999991640892998, iteration: 216098
loss: 1.004991054534912,grad_norm: 0.9463672787824482, iteration: 216099
loss: 1.0073292255401611,grad_norm: 0.9999998813134091, iteration: 216100
loss: 1.0109620094299316,grad_norm: 0.7732912107801576, iteration: 216101
loss: 0.965690553188324,grad_norm: 0.882025925524361, iteration: 216102
loss: 0.9991670846939087,grad_norm: 0.9999990255876643, iteration: 216103
loss: 1.0361844301223755,grad_norm: 0.9999992500546775, iteration: 216104
loss: 1.0187153816223145,grad_norm: 0.8362181589347274, iteration: 216105
loss: 1.0339475870132446,grad_norm: 0.9975587961578626, iteration: 216106
loss: 1.0020383596420288,grad_norm: 0.9016309254320901, iteration: 216107
loss: 1.0064774751663208,grad_norm: 0.9999991455200714, iteration: 216108
loss: 1.0345813035964966,grad_norm: 0.8991747266829222, iteration: 216109
loss: 0.9622938632965088,grad_norm: 0.9511958882048455, iteration: 216110
loss: 0.9829958081245422,grad_norm: 0.9999993442481055, iteration: 216111
loss: 1.0349159240722656,grad_norm: 0.9218276457323696, iteration: 216112
loss: 0.9904422760009766,grad_norm: 0.8173822145650725, iteration: 216113
loss: 0.987062931060791,grad_norm: 0.9691504991899059, iteration: 216114
loss: 0.9702304601669312,grad_norm: 0.9815514852202072, iteration: 216115
loss: 0.9865042567253113,grad_norm: 0.9971515031543751, iteration: 216116
loss: 1.0042191743850708,grad_norm: 0.8155818895424163, iteration: 216117
loss: 0.9840643405914307,grad_norm: 0.9239614390496915, iteration: 216118
loss: 0.9732074737548828,grad_norm: 0.7911859699117784, iteration: 216119
loss: 1.0149840116500854,grad_norm: 0.8953723957347691, iteration: 216120
loss: 0.9885927438735962,grad_norm: 0.9931967661154144, iteration: 216121
loss: 1.0231064558029175,grad_norm: 0.9999997710834697, iteration: 216122
loss: 0.9703025817871094,grad_norm: 0.8566865437141169, iteration: 216123
loss: 1.1663089990615845,grad_norm: 0.9999998847636573, iteration: 216124
loss: 1.0192794799804688,grad_norm: 0.999999151265515, iteration: 216125
loss: 1.0681575536727905,grad_norm: 0.9999991873914853, iteration: 216126
loss: 1.0096790790557861,grad_norm: 0.7916018133830266, iteration: 216127
loss: 0.9545120596885681,grad_norm: 0.8240364944306359, iteration: 216128
loss: 0.9832751750946045,grad_norm: 0.9637751013462555, iteration: 216129
loss: 1.030667781829834,grad_norm: 0.88410938819282, iteration: 216130
loss: 1.015249252319336,grad_norm: 0.995696272813759, iteration: 216131
loss: 0.9564828872680664,grad_norm: 0.8760891781892242, iteration: 216132
loss: 1.0188030004501343,grad_norm: 0.9417210245485271, iteration: 216133
loss: 1.0354461669921875,grad_norm: 0.9999993979183722, iteration: 216134
loss: 1.0359677076339722,grad_norm: 0.9999990161570024, iteration: 216135
loss: 0.9551592469215393,grad_norm: 0.9914955747294324, iteration: 216136
loss: 1.003896713256836,grad_norm: 0.959577702362712, iteration: 216137
loss: 0.9741466641426086,grad_norm: 0.9999999115356852, iteration: 216138
loss: 1.0128871202468872,grad_norm: 0.7974889446082897, iteration: 216139
loss: 0.9778079390525818,grad_norm: 0.9848189065477625, iteration: 216140
loss: 0.9604617357254028,grad_norm: 0.8928807655294861, iteration: 216141
loss: 1.021366834640503,grad_norm: 0.9999992748690807, iteration: 216142
loss: 0.982929527759552,grad_norm: 0.818053693437031, iteration: 216143
loss: 1.033742904663086,grad_norm: 0.9020738382376063, iteration: 216144
loss: 1.0422972440719604,grad_norm: 0.9999993544809967, iteration: 216145
loss: 0.9985918998718262,grad_norm: 0.8249407214513841, iteration: 216146
loss: 1.0115547180175781,grad_norm: 0.9924962466684039, iteration: 216147
loss: 0.9853869676589966,grad_norm: 0.8350072544640171, iteration: 216148
loss: 1.0237563848495483,grad_norm: 0.8191720425875124, iteration: 216149
loss: 1.0121899843215942,grad_norm: 0.9821376184529971, iteration: 216150
loss: 0.9908462166786194,grad_norm: 0.9999992940824832, iteration: 216151
loss: 0.9884063005447388,grad_norm: 0.8689453415374365, iteration: 216152
loss: 1.0495526790618896,grad_norm: 0.9999992040591044, iteration: 216153
loss: 1.0100061893463135,grad_norm: 0.9999990683960053, iteration: 216154
loss: 0.9865341186523438,grad_norm: 0.9999990821141034, iteration: 216155
loss: 0.9787709712982178,grad_norm: 0.9999990925862454, iteration: 216156
loss: 1.061611533164978,grad_norm: 0.9999995045064367, iteration: 216157
loss: 0.9771848917007446,grad_norm: 0.8564195678635558, iteration: 216158
loss: 0.9786014556884766,grad_norm: 0.9117783393122711, iteration: 216159
loss: 1.023409366607666,grad_norm: 0.9070193337303286, iteration: 216160
loss: 0.9616621136665344,grad_norm: 0.9999990979855938, iteration: 216161
loss: 1.015575885772705,grad_norm: 0.8930977022527185, iteration: 216162
loss: 0.9833190441131592,grad_norm: 0.9999991139515587, iteration: 216163
loss: 1.0050852298736572,grad_norm: 0.917007056249902, iteration: 216164
loss: 1.00478196144104,grad_norm: 0.8462813570722184, iteration: 216165
loss: 0.9851797223091125,grad_norm: 0.8641022171205279, iteration: 216166
loss: 1.020692229270935,grad_norm: 0.8694544499563076, iteration: 216167
loss: 0.9954261183738708,grad_norm: 0.9625440405978457, iteration: 216168
loss: 0.9914605617523193,grad_norm: 0.953477712242653, iteration: 216169
loss: 0.9806572794914246,grad_norm: 0.8394820788132822, iteration: 216170
loss: 1.0016701221466064,grad_norm: 0.9999992203531669, iteration: 216171
loss: 0.9547207951545715,grad_norm: 0.9999990602664612, iteration: 216172
loss: 0.9897094964981079,grad_norm: 0.9092147206037313, iteration: 216173
loss: 1.008739709854126,grad_norm: 0.8426687748383836, iteration: 216174
loss: 1.0312703847885132,grad_norm: 0.9602635805618029, iteration: 216175
loss: 0.9875668883323669,grad_norm: 0.8847776120397899, iteration: 216176
loss: 0.975830614566803,grad_norm: 0.8156648208143242, iteration: 216177
loss: 1.0104845762252808,grad_norm: 0.801840336426602, iteration: 216178
loss: 1.0053164958953857,grad_norm: 0.9663231517886686, iteration: 216179
loss: 1.015600323677063,grad_norm: 0.9495235494454548, iteration: 216180
loss: 1.0166937112808228,grad_norm: 0.9999990494900086, iteration: 216181
loss: 0.9869747757911682,grad_norm: 0.9999990753496697, iteration: 216182
loss: 0.9757388830184937,grad_norm: 0.9999991709860109, iteration: 216183
loss: 1.0421696901321411,grad_norm: 0.9999991186491728, iteration: 216184
loss: 0.9990164637565613,grad_norm: 0.8876314140419123, iteration: 216185
loss: 0.9925095438957214,grad_norm: 0.9999990094941018, iteration: 216186
loss: 1.0162156820297241,grad_norm: 0.9033661704900839, iteration: 216187
loss: 0.9809384942054749,grad_norm: 0.8873623218753082, iteration: 216188
loss: 0.983214259147644,grad_norm: 0.9443697980839839, iteration: 216189
loss: 0.9885605573654175,grad_norm: 0.9999990168588402, iteration: 216190
loss: 1.00548255443573,grad_norm: 0.8898553512357185, iteration: 216191
loss: 1.0358277559280396,grad_norm: 0.924129960496857, iteration: 216192
loss: 1.0174000263214111,grad_norm: 0.9999992039396098, iteration: 216193
loss: 0.951815128326416,grad_norm: 0.8848409923208647, iteration: 216194
loss: 0.995018482208252,grad_norm: 0.9999990599290303, iteration: 216195
loss: 1.0062918663024902,grad_norm: 0.84896648671728, iteration: 216196
loss: 1.0367538928985596,grad_norm: 0.8124789191329422, iteration: 216197
loss: 0.9870174527168274,grad_norm: 0.9944771593322659, iteration: 216198
loss: 1.013150930404663,grad_norm: 0.9999990838278394, iteration: 216199
loss: 0.9671960473060608,grad_norm: 0.9999990311263985, iteration: 216200
loss: 0.9884606599807739,grad_norm: 0.9999990286702308, iteration: 216201
loss: 0.9891913533210754,grad_norm: 0.8343693877667638, iteration: 216202
loss: 0.9992282390594482,grad_norm: 0.9214485048754848, iteration: 216203
loss: 0.982007622718811,grad_norm: 0.9161878927371621, iteration: 216204
loss: 0.9803118109703064,grad_norm: 0.9999992874761922, iteration: 216205
loss: 1.0024341344833374,grad_norm: 0.999999370260787, iteration: 216206
loss: 1.0062673091888428,grad_norm: 0.9427643942805862, iteration: 216207
loss: 0.9658401608467102,grad_norm: 0.9029054589762936, iteration: 216208
loss: 0.9839195609092712,grad_norm: 0.7557692796566806, iteration: 216209
loss: 0.9753468632698059,grad_norm: 0.9127666625967154, iteration: 216210
loss: 0.996762216091156,grad_norm: 0.8981745226194976, iteration: 216211
loss: 0.9743696451187134,grad_norm: 0.8787511630918509, iteration: 216212
loss: 1.0082957744598389,grad_norm: 0.8192504911564876, iteration: 216213
loss: 1.049667477607727,grad_norm: 0.9771026364122573, iteration: 216214
loss: 0.9831035733222961,grad_norm: 0.8706886457082776, iteration: 216215
loss: 0.9907063245773315,grad_norm: 0.8707851294706072, iteration: 216216
loss: 0.981127142906189,grad_norm: 0.8194805313400464, iteration: 216217
loss: 1.020696759223938,grad_norm: 0.9999995039551901, iteration: 216218
loss: 1.0181008577346802,grad_norm: 0.9999991214001835, iteration: 216219
loss: 1.0196186304092407,grad_norm: 0.9999996736765058, iteration: 216220
loss: 1.0145416259765625,grad_norm: 0.9999989789166691, iteration: 216221
loss: 0.9960468411445618,grad_norm: 0.9303298697589267, iteration: 216222
loss: 0.9951221942901611,grad_norm: 0.9999990810367791, iteration: 216223
loss: 0.9716351628303528,grad_norm: 0.7668484573300617, iteration: 216224
loss: 1.0017460584640503,grad_norm: 0.8348533749438508, iteration: 216225
loss: 1.0217186212539673,grad_norm: 0.8040077054942268, iteration: 216226
loss: 0.9741973876953125,grad_norm: 0.8304927462582798, iteration: 216227
loss: 0.9833120107650757,grad_norm: 0.9259235611792157, iteration: 216228
loss: 0.9807088971138,grad_norm: 0.8221841470795985, iteration: 216229
loss: 0.9934598207473755,grad_norm: 0.9390108498502036, iteration: 216230
loss: 0.9921112060546875,grad_norm: 0.9999992625630219, iteration: 216231
loss: 1.033692479133606,grad_norm: 0.8796079755645582, iteration: 216232
loss: 1.0229485034942627,grad_norm: 0.9999991672174297, iteration: 216233
loss: 1.015716314315796,grad_norm: 0.9236417288779648, iteration: 216234
loss: 0.998995840549469,grad_norm: 0.90390581919944, iteration: 216235
loss: 0.9663689136505127,grad_norm: 0.9501215422867734, iteration: 216236
loss: 1.0028492212295532,grad_norm: 0.9999991344250823, iteration: 216237
loss: 1.0009480714797974,grad_norm: 0.9999989834871419, iteration: 216238
loss: 1.0032109022140503,grad_norm: 0.994012338795003, iteration: 216239
loss: 1.0440034866333008,grad_norm: 0.9834772596206369, iteration: 216240
loss: 0.9657119512557983,grad_norm: 0.999999160196754, iteration: 216241
loss: 1.0108052492141724,grad_norm: 0.9502576865035893, iteration: 216242
loss: 0.9709570407867432,grad_norm: 0.8449745328302684, iteration: 216243
loss: 0.9891277551651001,grad_norm: 0.951148797152075, iteration: 216244
loss: 0.9960357546806335,grad_norm: 0.924117041110233, iteration: 216245
loss: 0.9707853198051453,grad_norm: 0.9999991226528199, iteration: 216246
loss: 1.0096752643585205,grad_norm: 0.7980404087009902, iteration: 216247
loss: 0.9700894951820374,grad_norm: 0.9999999436589884, iteration: 216248
loss: 0.9735665321350098,grad_norm: 0.999999111806892, iteration: 216249
loss: 1.1772769689559937,grad_norm: 0.9999992585729042, iteration: 216250
loss: 0.9945400953292847,grad_norm: 0.9557957101923039, iteration: 216251
loss: 1.013506531715393,grad_norm: 0.8436665621806752, iteration: 216252
loss: 0.9965019822120667,grad_norm: 0.9827757785902453, iteration: 216253
loss: 1.0359697341918945,grad_norm: 0.9125686685346256, iteration: 216254
loss: 0.9827368855476379,grad_norm: 0.848585837766646, iteration: 216255
loss: 0.9551833868026733,grad_norm: 0.9684222867952451, iteration: 216256
loss: 1.0049517154693604,grad_norm: 0.8867074438997578, iteration: 216257
loss: 0.9945534467697144,grad_norm: 0.955650025299733, iteration: 216258
loss: 0.9908298254013062,grad_norm: 0.9999990319911221, iteration: 216259
loss: 1.0044001340866089,grad_norm: 0.921918106768364, iteration: 216260
loss: 1.0098342895507812,grad_norm: 0.9530212432935179, iteration: 216261
loss: 1.0119152069091797,grad_norm: 0.7689320929052418, iteration: 216262
loss: 1.0512698888778687,grad_norm: 0.9999993601108635, iteration: 216263
loss: 0.9692267179489136,grad_norm: 0.9999989999110127, iteration: 216264
loss: 0.9822486639022827,grad_norm: 0.8339291296493855, iteration: 216265
loss: 0.9863512516021729,grad_norm: 0.96884480793691, iteration: 216266
loss: 0.9621594548225403,grad_norm: 0.9100552838012446, iteration: 216267
loss: 1.0423041582107544,grad_norm: 0.9999992046305113, iteration: 216268
loss: 1.0076751708984375,grad_norm: 0.9999994630690711, iteration: 216269
loss: 0.9783564805984497,grad_norm: 0.901280736146115, iteration: 216270
loss: 0.9897131323814392,grad_norm: 0.8370068437839694, iteration: 216271
loss: 1.0062023401260376,grad_norm: 0.9999991361465674, iteration: 216272
loss: 0.9934453964233398,grad_norm: 0.9223190534449349, iteration: 216273
loss: 0.9913011193275452,grad_norm: 0.7430846680167175, iteration: 216274
loss: 0.9866818189620972,grad_norm: 0.7966727056437184, iteration: 216275
loss: 1.0307517051696777,grad_norm: 0.9220619652413665, iteration: 216276
loss: 0.9527162313461304,grad_norm: 0.7745623533674484, iteration: 216277
loss: 0.9673508405685425,grad_norm: 0.9999991791159274, iteration: 216278
loss: 0.9841514825820923,grad_norm: 0.9999991548999576, iteration: 216279
loss: 0.9720650911331177,grad_norm: 0.9728675347485436, iteration: 216280
loss: 1.0100903511047363,grad_norm: 0.905112408661923, iteration: 216281
loss: 1.0025415420532227,grad_norm: 0.8308754272725442, iteration: 216282
loss: 0.9884330630302429,grad_norm: 0.8846093909642838, iteration: 216283
loss: 0.9850566983222961,grad_norm: 0.8759376109207064, iteration: 216284
loss: 0.9980028867721558,grad_norm: 0.9577888820069697, iteration: 216285
loss: 1.0111196041107178,grad_norm: 0.8300905804982398, iteration: 216286
loss: 0.9851150512695312,grad_norm: 0.922058505959003, iteration: 216287
loss: 1.0256364345550537,grad_norm: 0.8852937596291384, iteration: 216288
loss: 0.9778020977973938,grad_norm: 0.896918828753516, iteration: 216289
loss: 1.0192270278930664,grad_norm: 0.9768188661456506, iteration: 216290
loss: 0.9489008784294128,grad_norm: 0.9999992166267693, iteration: 216291
loss: 0.9737889766693115,grad_norm: 0.9336692543557277, iteration: 216292
loss: 0.9695720076560974,grad_norm: 0.9429652915360621, iteration: 216293
loss: 1.031488299369812,grad_norm: 0.8415151329284304, iteration: 216294
loss: 0.9945261478424072,grad_norm: 0.8885685425000108, iteration: 216295
loss: 0.9571220874786377,grad_norm: 0.9999991808878809, iteration: 216296
loss: 0.9872137904167175,grad_norm: 0.8412066916314638, iteration: 216297
loss: 1.0149502754211426,grad_norm: 0.8322762607492595, iteration: 216298
loss: 1.0335521697998047,grad_norm: 0.9999990441288282, iteration: 216299
loss: 0.9657698273658752,grad_norm: 0.9999991182378403, iteration: 216300
loss: 1.007304072380066,grad_norm: 0.8606015724511941, iteration: 216301
loss: 0.9563566446304321,grad_norm: 0.9284757249773169, iteration: 216302
loss: 1.032840371131897,grad_norm: 0.9921549401828109, iteration: 216303
loss: 0.9610405564308167,grad_norm: 0.7446388953077748, iteration: 216304
loss: 0.9998879432678223,grad_norm: 0.9521490900548158, iteration: 216305
loss: 1.0057786703109741,grad_norm: 0.9405006681563911, iteration: 216306
loss: 1.0050315856933594,grad_norm: 0.8121330945189421, iteration: 216307
loss: 1.0211316347122192,grad_norm: 0.8706036044371316, iteration: 216308
loss: 1.0121188163757324,grad_norm: 0.8656747661009113, iteration: 216309
loss: 1.003001093864441,grad_norm: 0.8745508676539494, iteration: 216310
loss: 0.9656040072441101,grad_norm: 0.9128658208112277, iteration: 216311
loss: 0.9795181155204773,grad_norm: 0.7945541513342875, iteration: 216312
loss: 1.0119297504425049,grad_norm: 0.999999205804053, iteration: 216313
loss: 0.9991163015365601,grad_norm: 0.8518915937954802, iteration: 216314
loss: 1.0099343061447144,grad_norm: 0.8482126395582772, iteration: 216315
loss: 0.9816290140151978,grad_norm: 0.9999998692331542, iteration: 216316
loss: 1.0333621501922607,grad_norm: 0.7744857497689898, iteration: 216317
loss: 0.9873093366622925,grad_norm: 0.9428597162556387, iteration: 216318
loss: 0.9971458315849304,grad_norm: 0.9999990374053154, iteration: 216319
loss: 1.0170849561691284,grad_norm: 0.9170910920828679, iteration: 216320
loss: 0.9750064611434937,grad_norm: 0.7151528818516768, iteration: 216321
loss: 1.0392647981643677,grad_norm: 0.9999991333541224, iteration: 216322
loss: 1.0340001583099365,grad_norm: 0.8028505985760898, iteration: 216323
loss: 1.0219374895095825,grad_norm: 0.9664746879634243, iteration: 216324
loss: 1.0283595323562622,grad_norm: 0.8620491117079471, iteration: 216325
loss: 1.0343836545944214,grad_norm: 0.8437521786465076, iteration: 216326
loss: 1.039391040802002,grad_norm: 0.9480323532000697, iteration: 216327
loss: 1.0033595561981201,grad_norm: 0.9999990872708356, iteration: 216328
loss: 1.0082594156265259,grad_norm: 0.9100061048273383, iteration: 216329
loss: 1.0368733406066895,grad_norm: 0.8434093537347825, iteration: 216330
loss: 0.9720409512519836,grad_norm: 0.9999990364043662, iteration: 216331
loss: 0.9935764074325562,grad_norm: 0.6768860446549735, iteration: 216332
loss: 1.0361523628234863,grad_norm: 0.9459937000992392, iteration: 216333
loss: 0.9983997941017151,grad_norm: 0.8437299536232054, iteration: 216334
loss: 0.9752799868583679,grad_norm: 0.9999992398871785, iteration: 216335
loss: 0.9705560207366943,grad_norm: 0.9002824754893829, iteration: 216336
loss: 1.0210540294647217,grad_norm: 0.9263181400842464, iteration: 216337
loss: 0.9957736730575562,grad_norm: 0.9999991489273615, iteration: 216338
loss: 1.0164027214050293,grad_norm: 0.9942079451927972, iteration: 216339
loss: 1.0120995044708252,grad_norm: 0.872724660921183, iteration: 216340
loss: 1.0112981796264648,grad_norm: 0.9393516702477989, iteration: 216341
loss: 0.989811360836029,grad_norm: 0.9326353338783109, iteration: 216342
loss: 1.0468460321426392,grad_norm: 0.9999991442915116, iteration: 216343
loss: 1.0464712381362915,grad_norm: 0.9234524759957534, iteration: 216344
loss: 0.9593174457550049,grad_norm: 0.9999991594361728, iteration: 216345
loss: 1.0383343696594238,grad_norm: 0.9999991800760083, iteration: 216346
loss: 0.9835272431373596,grad_norm: 0.9161513514825987, iteration: 216347
loss: 1.0377675294876099,grad_norm: 0.9988114798018264, iteration: 216348
loss: 1.0085803270339966,grad_norm: 0.9999989598891125, iteration: 216349
loss: 1.0217010974884033,grad_norm: 0.963244565088617, iteration: 216350
loss: 0.9876810312271118,grad_norm: 0.9085050924934046, iteration: 216351
loss: 0.9881634712219238,grad_norm: 0.7272385109024326, iteration: 216352
loss: 1.019328236579895,grad_norm: 0.8791883661910309, iteration: 216353
loss: 1.0085370540618896,grad_norm: 0.9999991243939425, iteration: 216354
loss: 0.9896946549415588,grad_norm: 0.7797935172558881, iteration: 216355
loss: 1.019598364830017,grad_norm: 0.9900630020311163, iteration: 216356
loss: 0.9755425453186035,grad_norm: 0.8328680382749248, iteration: 216357
loss: 1.0090633630752563,grad_norm: 0.999999074401107, iteration: 216358
loss: 1.0110622644424438,grad_norm: 0.8756373202782698, iteration: 216359
loss: 1.0106251239776611,grad_norm: 0.9742446950612584, iteration: 216360
loss: 0.9985103011131287,grad_norm: 0.7791570918614065, iteration: 216361
loss: 1.016602873802185,grad_norm: 0.9999991066745598, iteration: 216362
loss: 1.0053282976150513,grad_norm: 0.9999991628080606, iteration: 216363
loss: 1.0416682958602905,grad_norm: 0.9624349967310841, iteration: 216364
loss: 0.9820974469184875,grad_norm: 0.9999991839516721, iteration: 216365
loss: 0.9903993606567383,grad_norm: 0.9999990825212542, iteration: 216366
loss: 0.9793100953102112,grad_norm: 0.9546266848485605, iteration: 216367
loss: 1.014733076095581,grad_norm: 0.9999991267117762, iteration: 216368
loss: 0.9534140825271606,grad_norm: 0.9236254848726394, iteration: 216369
loss: 0.9866892695426941,grad_norm: 0.9999990055746816, iteration: 216370
loss: 1.006643295288086,grad_norm: 0.9148002369198316, iteration: 216371
loss: 1.0175081491470337,grad_norm: 0.8448995923112675, iteration: 216372
loss: 0.9648082256317139,grad_norm: 0.879791008984628, iteration: 216373
loss: 0.9937717914581299,grad_norm: 0.8219799481665235, iteration: 216374
loss: 1.0282543897628784,grad_norm: 0.8792345529483399, iteration: 216375
loss: 0.9874384999275208,grad_norm: 0.9737008186067379, iteration: 216376
loss: 1.0337116718292236,grad_norm: 0.9999993491959986, iteration: 216377
loss: 0.9941307306289673,grad_norm: 0.7927498969467451, iteration: 216378
loss: 1.0165985822677612,grad_norm: 0.9777204220428267, iteration: 216379
loss: 0.9907845258712769,grad_norm: 0.8709921628120115, iteration: 216380
loss: 0.9967410564422607,grad_norm: 0.9450540465730696, iteration: 216381
loss: 0.9910609722137451,grad_norm: 0.9567304165893791, iteration: 216382
loss: 0.9679031372070312,grad_norm: 0.8080012376955877, iteration: 216383
loss: 0.9882277846336365,grad_norm: 0.9596596238936648, iteration: 216384
loss: 1.0456719398498535,grad_norm: 0.946216064538069, iteration: 216385
loss: 1.0059921741485596,grad_norm: 0.9291089736145071, iteration: 216386
loss: 1.029092788696289,grad_norm: 0.9169038303588952, iteration: 216387
loss: 0.9883802533149719,grad_norm: 0.8861305344634095, iteration: 216388
loss: 0.9857836961746216,grad_norm: 0.9733976975821725, iteration: 216389
loss: 0.975867748260498,grad_norm: 0.7022917150963547, iteration: 216390
loss: 0.9853672981262207,grad_norm: 0.9999996716742974, iteration: 216391
loss: 1.0330556631088257,grad_norm: 0.9326683901949363, iteration: 216392
loss: 0.9864675402641296,grad_norm: 0.8492071746819156, iteration: 216393
loss: 1.0162034034729004,grad_norm: 0.8090983107953004, iteration: 216394
loss: 0.9749643802642822,grad_norm: 0.9109541410794549, iteration: 216395
loss: 1.012320637702942,grad_norm: 0.8772002874770393, iteration: 216396
loss: 1.0119415521621704,grad_norm: 0.8958545585817466, iteration: 216397
loss: 1.0319867134094238,grad_norm: 0.8453976922123274, iteration: 216398
loss: 1.010216474533081,grad_norm: 0.9625184737720613, iteration: 216399
loss: 1.0021103620529175,grad_norm: 0.9999989889508113, iteration: 216400
loss: 0.9680703282356262,grad_norm: 0.834310665616547, iteration: 216401
loss: 1.0158493518829346,grad_norm: 0.8368645954285999, iteration: 216402
loss: 0.9965603947639465,grad_norm: 0.9342405321271662, iteration: 216403
loss: 1.0299772024154663,grad_norm: 0.8144771662404784, iteration: 216404
loss: 0.9850282073020935,grad_norm: 0.8899877184078011, iteration: 216405
loss: 1.0195424556732178,grad_norm: 0.9999991207353024, iteration: 216406
loss: 1.0107133388519287,grad_norm: 0.9999990041140143, iteration: 216407
loss: 0.9893338680267334,grad_norm: 0.916505317711813, iteration: 216408
loss: 0.9948697686195374,grad_norm: 0.8022504640793001, iteration: 216409
loss: 1.0071158409118652,grad_norm: 0.9901774563994035, iteration: 216410
loss: 1.0331134796142578,grad_norm: 0.969879249752692, iteration: 216411
loss: 0.9914540648460388,grad_norm: 0.799598072601114, iteration: 216412
loss: 1.008699893951416,grad_norm: 0.9027089634483071, iteration: 216413
loss: 0.9687655568122864,grad_norm: 0.9228404827359058, iteration: 216414
loss: 0.9790518283843994,grad_norm: 0.8670517940466321, iteration: 216415
loss: 0.97560054063797,grad_norm: 0.9684436137315645, iteration: 216416
loss: 1.0022401809692383,grad_norm: 0.8523080571761935, iteration: 216417
loss: 0.9848315715789795,grad_norm: 0.895935409225387, iteration: 216418
loss: 0.9956158399581909,grad_norm: 0.8377375211431186, iteration: 216419
loss: 1.0173617601394653,grad_norm: 0.9780200699168674, iteration: 216420
loss: 0.9929628372192383,grad_norm: 0.9877667989445316, iteration: 216421
loss: 1.031588077545166,grad_norm: 0.9814964448699313, iteration: 216422
loss: 0.9975648522377014,grad_norm: 0.9518177437791976, iteration: 216423
loss: 0.9823252558708191,grad_norm: 0.9999991928073773, iteration: 216424
loss: 1.0060577392578125,grad_norm: 0.8302906426755762, iteration: 216425
loss: 0.9858395457267761,grad_norm: 0.8513754451403559, iteration: 216426
loss: 1.016005516052246,grad_norm: 0.8772306043174704, iteration: 216427
loss: 1.0071618556976318,grad_norm: 0.9614674964922576, iteration: 216428
loss: 1.016197919845581,grad_norm: 0.8343653732714008, iteration: 216429
loss: 1.0245920419692993,grad_norm: 0.9240281619362889, iteration: 216430
loss: 1.040094256401062,grad_norm: 0.9999997333750533, iteration: 216431
loss: 1.001220464706421,grad_norm: 0.9468106587125747, iteration: 216432
loss: 1.0187175273895264,grad_norm: 0.9999989598344476, iteration: 216433
loss: 0.9987623691558838,grad_norm: 0.95591006123001, iteration: 216434
loss: 1.0170660018920898,grad_norm: 0.9897265988234308, iteration: 216435
loss: 1.0445564985275269,grad_norm: 0.956841527751529, iteration: 216436
loss: 1.006478190422058,grad_norm: 0.8901003553138052, iteration: 216437
loss: 0.9739558696746826,grad_norm: 0.7124662772946432, iteration: 216438
loss: 0.9971961379051208,grad_norm: 0.9978155711933003, iteration: 216439
loss: 1.0235998630523682,grad_norm: 0.9769085854478372, iteration: 216440
loss: 1.000015377998352,grad_norm: 0.8648370061955323, iteration: 216441
loss: 1.0417743921279907,grad_norm: 0.9999990440120426, iteration: 216442
loss: 1.0137205123901367,grad_norm: 0.8212553258476584, iteration: 216443
loss: 0.9711653590202332,grad_norm: 0.9999990249748948, iteration: 216444
loss: 0.9955946803092957,grad_norm: 0.9999990335985677, iteration: 216445
loss: 1.002541422843933,grad_norm: 0.999999207408585, iteration: 216446
loss: 0.9977181553840637,grad_norm: 0.8307013185110905, iteration: 216447
loss: 1.0029628276824951,grad_norm: 0.9990635932145853, iteration: 216448
loss: 0.9796836972236633,grad_norm: 0.9999991169163331, iteration: 216449
loss: 0.9909546971321106,grad_norm: 0.9743142501796485, iteration: 216450
loss: 1.0075507164001465,grad_norm: 0.9999991715654263, iteration: 216451
loss: 1.1678375005722046,grad_norm: 0.9999998062405577, iteration: 216452
loss: 1.0445630550384521,grad_norm: 0.9999991139520781, iteration: 216453
loss: 0.9928873181343079,grad_norm: 0.9018561478745231, iteration: 216454
loss: 0.9972115755081177,grad_norm: 0.9668209216429571, iteration: 216455
loss: 1.0246309041976929,grad_norm: 0.8979533023368252, iteration: 216456
loss: 1.0217630863189697,grad_norm: 0.999999060138703, iteration: 216457
loss: 0.9806954264640808,grad_norm: 0.8925007783667905, iteration: 216458
loss: 0.9750839471817017,grad_norm: 0.973077774979198, iteration: 216459
loss: 1.0232927799224854,grad_norm: 0.7728320026898896, iteration: 216460
loss: 1.0286765098571777,grad_norm: 0.9646956033467233, iteration: 216461
loss: 0.9922118782997131,grad_norm: 0.9999991331189403, iteration: 216462
loss: 0.9718965888023376,grad_norm: 0.7601008192117505, iteration: 216463
loss: 0.9728377461433411,grad_norm: 0.8837565295823595, iteration: 216464
loss: 1.0158628225326538,grad_norm: 0.8178088650197716, iteration: 216465
loss: 0.9993524551391602,grad_norm: 0.9871660463634394, iteration: 216466
loss: 1.029211401939392,grad_norm: 0.910198988383441, iteration: 216467
loss: 0.9821228981018066,grad_norm: 0.8359483753987054, iteration: 216468
loss: 0.9803555011749268,grad_norm: 0.7197865654114523, iteration: 216469
loss: 0.9753530025482178,grad_norm: 0.8219638909149797, iteration: 216470
loss: 1.0195808410644531,grad_norm: 0.9999990155079282, iteration: 216471
loss: 1.0227826833724976,grad_norm: 0.9999999593622032, iteration: 216472
loss: 1.0589854717254639,grad_norm: 0.9999996345781125, iteration: 216473
loss: 1.10733163356781,grad_norm: 0.8772033311064246, iteration: 216474
loss: 0.9835602641105652,grad_norm: 0.7861750722277654, iteration: 216475
loss: 0.9975759983062744,grad_norm: 0.7813990282837509, iteration: 216476
loss: 1.1105257272720337,grad_norm: 0.999999444482426, iteration: 216477
loss: 1.0032931566238403,grad_norm: 0.8500848929041193, iteration: 216478
loss: 0.9540438652038574,grad_norm: 0.9999991529162413, iteration: 216479
loss: 1.0186649560928345,grad_norm: 0.7874827158690471, iteration: 216480
loss: 1.2593069076538086,grad_norm: 0.9999997247110753, iteration: 216481
loss: 1.0018789768218994,grad_norm: 0.9999993758463552, iteration: 216482
loss: 1.0489503145217896,grad_norm: 0.9999991618798202, iteration: 216483
loss: 1.1495716571807861,grad_norm: 0.9999994284526249, iteration: 216484
loss: 1.073589563369751,grad_norm: 0.9999992870511116, iteration: 216485
loss: 1.010345697402954,grad_norm: 0.9999990045791795, iteration: 216486
loss: 1.104404091835022,grad_norm: 0.9999990529425451, iteration: 216487
loss: 1.0073752403259277,grad_norm: 0.999999202635902, iteration: 216488
loss: 1.1061128377914429,grad_norm: 0.9999991094198551, iteration: 216489
loss: 1.070748209953308,grad_norm: 0.999999880738263, iteration: 216490
loss: 0.9699350595474243,grad_norm: 0.9999994821872062, iteration: 216491
loss: 1.2831493616104126,grad_norm: 0.999999423173838, iteration: 216492
loss: 1.1100170612335205,grad_norm: 0.999999691179613, iteration: 216493
loss: 0.9934192299842834,grad_norm: 0.9999991436818392, iteration: 216494
loss: 0.9798175096511841,grad_norm: 0.9999997323651147, iteration: 216495
loss: 1.2128868103027344,grad_norm: 0.9999997421746818, iteration: 216496
loss: 1.0176992416381836,grad_norm: 0.8857558391292387, iteration: 216497
loss: 1.1424180269241333,grad_norm: 0.9999994600528392, iteration: 216498
loss: 1.0084809064865112,grad_norm: 0.9833278221765365, iteration: 216499
loss: 1.1142431497573853,grad_norm: 0.9999998406221701, iteration: 216500
loss: 1.114920735359192,grad_norm: 0.9999994059763498, iteration: 216501
loss: 1.00846529006958,grad_norm: 0.973650501432084, iteration: 216502
loss: 0.9828919172286987,grad_norm: 0.7653771036469623, iteration: 216503
loss: 1.03949773311615,grad_norm: 0.9999994016518284, iteration: 216504
loss: 0.9614154100418091,grad_norm: 0.9999990151316901, iteration: 216505
loss: 1.0119820833206177,grad_norm: 0.9620970803048606, iteration: 216506
loss: 1.003945231437683,grad_norm: 0.9999994591113888, iteration: 216507
loss: 1.0294244289398193,grad_norm: 0.9999991202935502, iteration: 216508
loss: 1.059629201889038,grad_norm: 0.9999991047296222, iteration: 216509
loss: 0.993881106376648,grad_norm: 0.9999991042772901, iteration: 216510
loss: 1.1273317337036133,grad_norm: 0.9999997252831079, iteration: 216511
loss: 1.0070796012878418,grad_norm: 0.8023143997517386, iteration: 216512
loss: 1.0068459510803223,grad_norm: 0.9999991346834254, iteration: 216513
loss: 0.9753032922744751,grad_norm: 0.9985162398702128, iteration: 216514
loss: 1.017674446105957,grad_norm: 0.9999997754309173, iteration: 216515
loss: 1.0035611391067505,grad_norm: 0.9999990524361193, iteration: 216516
loss: 0.979086697101593,grad_norm: 0.9999995520371413, iteration: 216517
loss: 0.981158435344696,grad_norm: 0.9156396192200766, iteration: 216518
loss: 1.065272331237793,grad_norm: 0.9348112788279299, iteration: 216519
loss: 0.9975882172584534,grad_norm: 0.9999989807126989, iteration: 216520
loss: 0.9665848016738892,grad_norm: 0.8957330073934925, iteration: 216521
loss: 1.019002079963684,grad_norm: 0.9999996655051696, iteration: 216522
loss: 0.9590266942977905,grad_norm: 0.999998990360543, iteration: 216523
loss: 1.008624792098999,grad_norm: 0.9999991351066471, iteration: 216524
loss: 1.0293718576431274,grad_norm: 0.9999991326732192, iteration: 216525
loss: 1.016939401626587,grad_norm: 0.8349054596480361, iteration: 216526
loss: 0.9555321335792542,grad_norm: 0.9999996539652622, iteration: 216527
loss: 1.0399945974349976,grad_norm: 0.9999992334441465, iteration: 216528
loss: 0.9900597929954529,grad_norm: 0.9493233384701176, iteration: 216529
loss: 1.026220440864563,grad_norm: 0.991530812669826, iteration: 216530
loss: 1.0918761491775513,grad_norm: 0.9544079534168481, iteration: 216531
loss: 1.071794033050537,grad_norm: 0.999999327478787, iteration: 216532
loss: 1.03990638256073,grad_norm: 0.9999991154772484, iteration: 216533
loss: 1.0523285865783691,grad_norm: 0.9999991843049632, iteration: 216534
loss: 1.1200770139694214,grad_norm: 0.9999996781320643, iteration: 216535
loss: 0.9923871159553528,grad_norm: 0.9341810366082219, iteration: 216536
loss: 1.0529637336730957,grad_norm: 0.9770112353192557, iteration: 216537
loss: 1.0352084636688232,grad_norm: 0.9999997858043086, iteration: 216538
loss: 1.0459810495376587,grad_norm: 0.9999993895892724, iteration: 216539
loss: 1.0474488735198975,grad_norm: 0.8620635843629036, iteration: 216540
loss: 0.9602060914039612,grad_norm: 0.9999991941937851, iteration: 216541
loss: 1.014920711517334,grad_norm: 0.9999991751646287, iteration: 216542
loss: 1.0825570821762085,grad_norm: 0.9999995376328318, iteration: 216543
loss: 0.9851617813110352,grad_norm: 0.7787338203839594, iteration: 216544
loss: 1.0591132640838623,grad_norm: 0.9999991223768486, iteration: 216545
loss: 0.9867951273918152,grad_norm: 0.970321640904301, iteration: 216546
loss: 1.0310503244400024,grad_norm: 0.9999993921040256, iteration: 216547
loss: 0.9659487009048462,grad_norm: 0.8768006248442415, iteration: 216548
loss: 1.0093563795089722,grad_norm: 0.8099592044404499, iteration: 216549
loss: 1.0182911157608032,grad_norm: 0.8969239707433926, iteration: 216550
loss: 1.035672664642334,grad_norm: 0.9999991617261598, iteration: 216551
loss: 0.9765613079071045,grad_norm: 0.8316959897811353, iteration: 216552
loss: 0.9780827760696411,grad_norm: 0.9999991369562917, iteration: 216553
loss: 1.0191272497177124,grad_norm: 0.9999993544989125, iteration: 216554
loss: 1.0379536151885986,grad_norm: 0.9735627622064298, iteration: 216555
loss: 0.9930287599563599,grad_norm: 0.8958737129746951, iteration: 216556
loss: 1.0351601839065552,grad_norm: 0.9999992458279323, iteration: 216557
loss: 1.0121605396270752,grad_norm: 0.9999990922902615, iteration: 216558
loss: 0.9442257881164551,grad_norm: 0.7607183833232866, iteration: 216559
loss: 1.0968008041381836,grad_norm: 0.9999999534553569, iteration: 216560
loss: 1.0186588764190674,grad_norm: 0.934187119337688, iteration: 216561
loss: 1.10152268409729,grad_norm: 0.9999990447579744, iteration: 216562
loss: 1.029125452041626,grad_norm: 0.8206531505163869, iteration: 216563
loss: 1.1028276681900024,grad_norm: 0.9999991117899516, iteration: 216564
loss: 1.0116968154907227,grad_norm: 0.9053256721635937, iteration: 216565
loss: 1.0213168859481812,grad_norm: 0.8600714460028347, iteration: 216566
loss: 1.0686020851135254,grad_norm: 0.9999999370109405, iteration: 216567
loss: 1.0359714031219482,grad_norm: 0.9999990920917581, iteration: 216568
loss: 0.9910990595817566,grad_norm: 0.9999991813385846, iteration: 216569
loss: 1.0453084707260132,grad_norm: 0.9999991137068346, iteration: 216570
loss: 1.040711760520935,grad_norm: 0.9209272043286301, iteration: 216571
loss: 1.0140255689620972,grad_norm: 0.9999996116631273, iteration: 216572
loss: 1.0307878255844116,grad_norm: 0.9999998203342859, iteration: 216573
loss: 0.9997513294219971,grad_norm: 0.8786286865225349, iteration: 216574
loss: 0.973056972026825,grad_norm: 0.8318281523561236, iteration: 216575
loss: 0.9951967000961304,grad_norm: 0.896131354616515, iteration: 216576
loss: 1.0128741264343262,grad_norm: 0.9999993315935921, iteration: 216577
loss: 1.001880168914795,grad_norm: 0.9643388591463423, iteration: 216578
loss: 0.9626950025558472,grad_norm: 0.947550574910778, iteration: 216579
loss: 1.0591012239456177,grad_norm: 0.9999994424002814, iteration: 216580
loss: 0.9794847369194031,grad_norm: 0.8470816982432934, iteration: 216581
loss: 1.0469348430633545,grad_norm: 0.999999201685647, iteration: 216582
loss: 1.034531831741333,grad_norm: 0.7449871517980993, iteration: 216583
loss: 1.0081919431686401,grad_norm: 0.9377355412903885, iteration: 216584
loss: 1.0523759126663208,grad_norm: 0.999999939680355, iteration: 216585
loss: 0.9972704648971558,grad_norm: 0.8203684447956812, iteration: 216586
loss: 1.007353663444519,grad_norm: 0.9999990465627427, iteration: 216587
loss: 1.0251049995422363,grad_norm: 0.9817783305984662, iteration: 216588
loss: 0.9961487054824829,grad_norm: 0.9132804746919793, iteration: 216589
loss: 0.9809536337852478,grad_norm: 0.8766791069258385, iteration: 216590
loss: 1.0319089889526367,grad_norm: 0.9999997518833893, iteration: 216591
loss: 1.0610414743423462,grad_norm: 0.9967769800630155, iteration: 216592
loss: 1.0425994396209717,grad_norm: 0.9999995875097746, iteration: 216593
loss: 0.9799715876579285,grad_norm: 0.8332528274948514, iteration: 216594
loss: 1.0974386930465698,grad_norm: 0.9999995950261834, iteration: 216595
loss: 1.0111018419265747,grad_norm: 0.7928730082706484, iteration: 216596
loss: 1.0008853673934937,grad_norm: 0.9999992856286993, iteration: 216597
loss: 1.0068433284759521,grad_norm: 0.9999990180721048, iteration: 216598
loss: 1.0015429258346558,grad_norm: 0.9999991264604136, iteration: 216599
loss: 1.060341715812683,grad_norm: 0.999999345310731, iteration: 216600
loss: 0.9560553431510925,grad_norm: 0.9840765102640074, iteration: 216601
loss: 1.0237561464309692,grad_norm: 0.7553507167415693, iteration: 216602
loss: 1.045446753501892,grad_norm: 0.9236946688792387, iteration: 216603
loss: 1.0200568437576294,grad_norm: 0.9999994477125435, iteration: 216604
loss: 0.9932585954666138,grad_norm: 0.9999991617755012, iteration: 216605
loss: 1.0439252853393555,grad_norm: 0.9191453941096313, iteration: 216606
loss: 1.0314570665359497,grad_norm: 0.9999992076556189, iteration: 216607
loss: 1.0079957246780396,grad_norm: 0.8665952771946873, iteration: 216608
loss: 1.025468111038208,grad_norm: 0.9999992325827618, iteration: 216609
loss: 1.0633513927459717,grad_norm: 0.9170760804428024, iteration: 216610
loss: 0.9763627648353577,grad_norm: 0.741527850379867, iteration: 216611
loss: 0.9615672826766968,grad_norm: 0.9999991059141053, iteration: 216612
loss: 1.019994854927063,grad_norm: 0.7802014065605639, iteration: 216613
loss: 1.0196075439453125,grad_norm: 0.9999991691277672, iteration: 216614
loss: 1.0162123441696167,grad_norm: 0.7815916169969714, iteration: 216615
loss: 1.002589464187622,grad_norm: 0.9999991332586772, iteration: 216616
loss: 1.0060032606124878,grad_norm: 0.8801718004615392, iteration: 216617
loss: 1.0367566347122192,grad_norm: 0.9999991025898894, iteration: 216618
loss: 1.014246940612793,grad_norm: 0.999999068660834, iteration: 216619
loss: 0.9979252219200134,grad_norm: 0.9999990518698895, iteration: 216620
loss: 0.9948408603668213,grad_norm: 0.8925698610843974, iteration: 216621
loss: 0.9957417845726013,grad_norm: 0.9694423790889026, iteration: 216622
loss: 1.0430835485458374,grad_norm: 0.9999991422996392, iteration: 216623
loss: 1.0201890468597412,grad_norm: 0.9999993286051062, iteration: 216624
loss: 1.0088139772415161,grad_norm: 0.9999989511787676, iteration: 216625
loss: 1.0485044717788696,grad_norm: 0.9999990684908836, iteration: 216626
loss: 1.068627953529358,grad_norm: 0.9999994546234207, iteration: 216627
loss: 0.9736214280128479,grad_norm: 0.7673212835916133, iteration: 216628
loss: 0.9763985872268677,grad_norm: 0.9678018249165039, iteration: 216629
loss: 1.0079472064971924,grad_norm: 0.9999991842045353, iteration: 216630
loss: 1.0357205867767334,grad_norm: 0.9999992325256278, iteration: 216631
loss: 0.9797133803367615,grad_norm: 0.9279032140416633, iteration: 216632
loss: 1.0392268896102905,grad_norm: 0.9999990871350675, iteration: 216633
loss: 1.0540238618850708,grad_norm: 0.8831852966891849, iteration: 216634
loss: 1.000356912612915,grad_norm: 0.9999990181599085, iteration: 216635
loss: 1.1101019382476807,grad_norm: 0.9999991576305032, iteration: 216636
loss: 0.979679524898529,grad_norm: 0.999999069550086, iteration: 216637
loss: 0.9761762619018555,grad_norm: 0.9999990176686228, iteration: 216638
loss: 0.9987451434135437,grad_norm: 0.9672930875955927, iteration: 216639
loss: 0.9978812336921692,grad_norm: 0.9999995603366235, iteration: 216640
loss: 1.0499614477157593,grad_norm: 0.9999994326210125, iteration: 216641
loss: 1.0484155416488647,grad_norm: 0.9999998530086145, iteration: 216642
loss: 1.003633975982666,grad_norm: 0.8749458288668974, iteration: 216643
loss: 0.9466267228126526,grad_norm: 0.8586604931013944, iteration: 216644
loss: 0.9805300235748291,grad_norm: 0.9778999707818168, iteration: 216645
loss: 1.0247129201889038,grad_norm: 0.9999989921226781, iteration: 216646
loss: 1.0421174764633179,grad_norm: 0.9305337126212234, iteration: 216647
loss: 1.0984359979629517,grad_norm: 0.9999990988128741, iteration: 216648
loss: 1.085220217704773,grad_norm: 0.9999999484965982, iteration: 216649
loss: 1.0434424877166748,grad_norm: 0.9358333188147434, iteration: 216650
loss: 1.0406895875930786,grad_norm: 0.9075273536242189, iteration: 216651
loss: 1.0341081619262695,grad_norm: 0.9999991824820845, iteration: 216652
loss: 0.9608615636825562,grad_norm: 0.9423307919369138, iteration: 216653
loss: 1.0405627489089966,grad_norm: 0.8967251137413694, iteration: 216654
loss: 0.9734911918640137,grad_norm: 0.9697054242878183, iteration: 216655
loss: 1.1298346519470215,grad_norm: 0.9999994524216597, iteration: 216656
loss: 0.9840591549873352,grad_norm: 0.9999989929889571, iteration: 216657
loss: 1.0018423795700073,grad_norm: 0.9437976406375083, iteration: 216658
loss: 1.0341440439224243,grad_norm: 0.9139350715517933, iteration: 216659
loss: 1.0105571746826172,grad_norm: 0.9260214992935814, iteration: 216660
loss: 1.11735999584198,grad_norm: 0.9999996185008928, iteration: 216661
loss: 1.0152323246002197,grad_norm: 0.8656537983668692, iteration: 216662
loss: 1.0779449939727783,grad_norm: 0.9469945860647798, iteration: 216663
loss: 1.0770258903503418,grad_norm: 0.9999992291225441, iteration: 216664
loss: 1.085893988609314,grad_norm: 0.847423965820466, iteration: 216665
loss: 1.089215636253357,grad_norm: 0.9999996518582793, iteration: 216666
loss: 0.976855456829071,grad_norm: 0.9999990934771652, iteration: 216667
loss: 1.093513011932373,grad_norm: 0.8934740880955762, iteration: 216668
loss: 0.9982218742370605,grad_norm: 0.8369443787432717, iteration: 216669
loss: 1.0918488502502441,grad_norm: 0.9999990740137127, iteration: 216670
loss: 0.9896343946456909,grad_norm: 0.9572673388578531, iteration: 216671
loss: 1.2542340755462646,grad_norm: 0.9999993951761695, iteration: 216672
loss: 1.0186138153076172,grad_norm: 0.9999991588689863, iteration: 216673
loss: 1.0020521879196167,grad_norm: 0.9999991730901475, iteration: 216674
loss: 1.0444176197052002,grad_norm: 0.9999990902265933, iteration: 216675
loss: 1.0166234970092773,grad_norm: 0.9999991939471452, iteration: 216676
loss: 1.0349888801574707,grad_norm: 0.9999991085162101, iteration: 216677
loss: 1.045991063117981,grad_norm: 0.9999992310907718, iteration: 216678
loss: 1.0504636764526367,grad_norm: 0.9999993814259512, iteration: 216679
loss: 0.988943874835968,grad_norm: 0.9999990323108721, iteration: 216680
loss: 1.0661617517471313,grad_norm: 0.9999999710453079, iteration: 216681
loss: 1.0616763830184937,grad_norm: 0.96924812639297, iteration: 216682
loss: 0.9993176460266113,grad_norm: 0.9999992096077199, iteration: 216683
loss: 1.0573556423187256,grad_norm: 0.9999993179612336, iteration: 216684
loss: 1.105905532836914,grad_norm: 0.9487962786154438, iteration: 216685
loss: 0.9956116676330566,grad_norm: 0.9999997326485311, iteration: 216686
loss: 1.1184965372085571,grad_norm: 0.9999991743968363, iteration: 216687
loss: 1.060562252998352,grad_norm: 0.9999994901282979, iteration: 216688
loss: 1.072712779045105,grad_norm: 0.9999999822136775, iteration: 216689
loss: 1.005474328994751,grad_norm: 0.9999990474259224, iteration: 216690
loss: 0.9594162106513977,grad_norm: 0.9999991371984148, iteration: 216691
loss: 0.9949883222579956,grad_norm: 0.9999991131237209, iteration: 216692
loss: 1.0063241720199585,grad_norm: 0.9999993984343958, iteration: 216693
loss: 1.021224856376648,grad_norm: 0.9383108994834729, iteration: 216694
loss: 1.0253099203109741,grad_norm: 0.8297260691147688, iteration: 216695
loss: 1.0091031789779663,grad_norm: 0.9999990999446353, iteration: 216696
loss: 1.0156607627868652,grad_norm: 0.9999990982963992, iteration: 216697
loss: 1.0111883878707886,grad_norm: 0.872596141730774, iteration: 216698
loss: 1.0273455381393433,grad_norm: 0.9849730892633111, iteration: 216699
loss: 1.065515398979187,grad_norm: 0.9999999516884569, iteration: 216700
loss: 0.9873300790786743,grad_norm: 0.9999991839152764, iteration: 216701
loss: 1.0004674196243286,grad_norm: 0.9999991030646295, iteration: 216702
loss: 0.9913421869277954,grad_norm: 0.9999991529110986, iteration: 216703
loss: 0.9994709491729736,grad_norm: 0.9593105160276938, iteration: 216704
loss: 1.0128517150878906,grad_norm: 0.9999991107233763, iteration: 216705
loss: 0.9829115271568298,grad_norm: 0.929061679029683, iteration: 216706
loss: 1.024840235710144,grad_norm: 0.9999990094521766, iteration: 216707
loss: 1.0115680694580078,grad_norm: 0.9719813026213884, iteration: 216708
loss: 1.0746277570724487,grad_norm: 0.999999161551271, iteration: 216709
loss: 1.0091019868850708,grad_norm: 0.858190833786956, iteration: 216710
loss: 0.9816758632659912,grad_norm: 0.9695505264854268, iteration: 216711
loss: 1.0153881311416626,grad_norm: 0.9999997951790203, iteration: 216712
loss: 1.0290961265563965,grad_norm: 0.9257371904172091, iteration: 216713
loss: 1.0329220294952393,grad_norm: 0.999999156350603, iteration: 216714
loss: 1.0243269205093384,grad_norm: 0.99999968644048, iteration: 216715
loss: 1.0160282850265503,grad_norm: 0.8868673314369051, iteration: 216716
loss: 1.0019183158874512,grad_norm: 0.9377542496444776, iteration: 216717
loss: 1.081384301185608,grad_norm: 0.9999993226113861, iteration: 216718
loss: 0.9764302968978882,grad_norm: 0.9999992786124189, iteration: 216719
loss: 1.0128837823867798,grad_norm: 0.9999992718273581, iteration: 216720
loss: 1.026634693145752,grad_norm: 0.9999994392155205, iteration: 216721
loss: 1.091539978981018,grad_norm: 0.9999998012969581, iteration: 216722
loss: 0.9855239987373352,grad_norm: 0.8786610636581782, iteration: 216723
loss: 1.008055329322815,grad_norm: 0.7805385310256494, iteration: 216724
loss: 0.9926468133926392,grad_norm: 0.8571703594535534, iteration: 216725
loss: 1.0027767419815063,grad_norm: 0.9207266153410661, iteration: 216726
loss: 0.9573955535888672,grad_norm: 0.8173834293886669, iteration: 216727
loss: 0.9973756074905396,grad_norm: 0.9999992385834812, iteration: 216728
loss: 1.1175662279129028,grad_norm: 0.9999992508709451, iteration: 216729
loss: 0.9897526502609253,grad_norm: 0.9911055762031985, iteration: 216730
loss: 1.004296898841858,grad_norm: 0.9769974596990113, iteration: 216731
loss: 1.188839077949524,grad_norm: 0.9999990346323038, iteration: 216732
loss: 0.9957854747772217,grad_norm: 0.9999991023920393, iteration: 216733
loss: 1.0261447429656982,grad_norm: 0.9999992308533339, iteration: 216734
loss: 1.0252126455307007,grad_norm: 0.9999995424591097, iteration: 216735
loss: 1.0870304107666016,grad_norm: 0.9999990669538827, iteration: 216736
loss: 0.9787131547927856,grad_norm: 0.8022279574881507, iteration: 216737
loss: 1.0062400102615356,grad_norm: 0.9999992113929311, iteration: 216738
loss: 1.0465537309646606,grad_norm: 0.9999991661269435, iteration: 216739
loss: 1.0085505247116089,grad_norm: 0.8466463865421004, iteration: 216740
loss: 1.0461245775222778,grad_norm: 0.9666701294016132, iteration: 216741
loss: 1.0029491186141968,grad_norm: 0.999999301591499, iteration: 216742
loss: 1.0770848989486694,grad_norm: 0.9999992209830092, iteration: 216743
loss: 0.9844723343849182,grad_norm: 0.8649557275648821, iteration: 216744
loss: 1.0142029523849487,grad_norm: 0.9999991024139557, iteration: 216745
loss: 1.0397685766220093,grad_norm: 0.999999870192855, iteration: 216746
loss: 1.0214953422546387,grad_norm: 0.9999997413727268, iteration: 216747
loss: 0.977516233921051,grad_norm: 0.9999992073365727, iteration: 216748
loss: 1.0447465181350708,grad_norm: 0.9636163731336898, iteration: 216749
loss: 1.1301287412643433,grad_norm: 0.9999998515767621, iteration: 216750
loss: 1.023772120475769,grad_norm: 0.9999997090956051, iteration: 216751
loss: 1.0424656867980957,grad_norm: 0.999999568011753, iteration: 216752
loss: 1.0005429983139038,grad_norm: 0.9320235034950736, iteration: 216753
loss: 0.9656241536140442,grad_norm: 0.999999102009795, iteration: 216754
loss: 1.081064224243164,grad_norm: 0.9999998237323764, iteration: 216755
loss: 1.0045702457427979,grad_norm: 0.9999991455139668, iteration: 216756
loss: 1.0350065231323242,grad_norm: 0.9999991345500067, iteration: 216757
loss: 1.0483951568603516,grad_norm: 0.9572071496700735, iteration: 216758
loss: 1.0984513759613037,grad_norm: 0.9999999363657467, iteration: 216759
loss: 0.9842503666877747,grad_norm: 0.9557994196307262, iteration: 216760
loss: 1.0183056592941284,grad_norm: 0.9177036848284938, iteration: 216761
loss: 1.0164668560028076,grad_norm: 0.999999139702222, iteration: 216762
loss: 1.0679458379745483,grad_norm: 0.9915053586300444, iteration: 216763
loss: 1.0154469013214111,grad_norm: 0.9474991863067705, iteration: 216764
loss: 0.9595577120780945,grad_norm: 0.9771781415682672, iteration: 216765
loss: 1.0613880157470703,grad_norm: 0.9999991492142812, iteration: 216766
loss: 0.9975422024726868,grad_norm: 0.9999989896052093, iteration: 216767
loss: 1.063425898551941,grad_norm: 0.9999991828864062, iteration: 216768
loss: 0.9961462616920471,grad_norm: 0.8213586982157138, iteration: 216769
loss: 0.9876599311828613,grad_norm: 0.8207313616875289, iteration: 216770
loss: 1.017357587814331,grad_norm: 0.9999994084886372, iteration: 216771
loss: 1.0631358623504639,grad_norm: 0.9888465422860333, iteration: 216772
loss: 0.9887309074401855,grad_norm: 0.9999990582854184, iteration: 216773
loss: 1.070397973060608,grad_norm: 0.9999993347680747, iteration: 216774
loss: 0.9711388945579529,grad_norm: 0.8798870499847766, iteration: 216775
loss: 1.0392347574234009,grad_norm: 0.9999991625242688, iteration: 216776
loss: 1.069344162940979,grad_norm: 0.9999996608309178, iteration: 216777
loss: 1.0493637323379517,grad_norm: 0.9999995812828159, iteration: 216778
loss: 0.9883603453636169,grad_norm: 0.8512201441614001, iteration: 216779
loss: 1.012120246887207,grad_norm: 0.8127594733902918, iteration: 216780
loss: 1.1365691423416138,grad_norm: 0.99999977842076, iteration: 216781
loss: 1.030408501625061,grad_norm: 0.9999991351331772, iteration: 216782
loss: 1.0161683559417725,grad_norm: 0.9999998577169835, iteration: 216783
loss: 1.0025737285614014,grad_norm: 0.9999997339327771, iteration: 216784
loss: 0.9645295739173889,grad_norm: 0.8185388577786911, iteration: 216785
loss: 1.0447267293930054,grad_norm: 0.8499587978874527, iteration: 216786
loss: 1.0315260887145996,grad_norm: 0.8642762900724097, iteration: 216787
loss: 1.0544525384902954,grad_norm: 0.9999994653570037, iteration: 216788
loss: 1.0087417364120483,grad_norm: 0.9999989927750084, iteration: 216789
loss: 1.0030019283294678,grad_norm: 0.9999990568279521, iteration: 216790
loss: 1.0379894971847534,grad_norm: 0.9999995656934397, iteration: 216791
loss: 1.2768150568008423,grad_norm: 0.9999998120920573, iteration: 216792
loss: 1.0171211957931519,grad_norm: 0.7915634402912333, iteration: 216793
loss: 1.0912055969238281,grad_norm: 0.9999998447242916, iteration: 216794
loss: 1.1431653499603271,grad_norm: 0.9999991213677, iteration: 216795
loss: 1.0101714134216309,grad_norm: 0.9999991616454014, iteration: 216796
loss: 1.0576865673065186,grad_norm: 0.8913225305750088, iteration: 216797
loss: 1.0490624904632568,grad_norm: 0.9999993083170918, iteration: 216798
loss: 0.9780586361885071,grad_norm: 0.9044693984845489, iteration: 216799
loss: 0.9978781938552856,grad_norm: 0.8431497942244102, iteration: 216800
loss: 1.1694931983947754,grad_norm: 0.9999997733873155, iteration: 216801
loss: 0.9647345542907715,grad_norm: 0.8800254408111347, iteration: 216802
loss: 1.0161296129226685,grad_norm: 0.9046154874920901, iteration: 216803
loss: 1.1138322353363037,grad_norm: 0.9999993656306729, iteration: 216804
loss: 1.0288840532302856,grad_norm: 0.999999057646223, iteration: 216805
loss: 1.041862964630127,grad_norm: 0.9999996232801495, iteration: 216806
loss: 0.9732519388198853,grad_norm: 0.9999991544849688, iteration: 216807
loss: 1.1437958478927612,grad_norm: 0.9999998694005934, iteration: 216808
loss: 0.9960907697677612,grad_norm: 0.9999993131419326, iteration: 216809
loss: 1.0708863735198975,grad_norm: 0.999999560594311, iteration: 216810
loss: 0.980900228023529,grad_norm: 0.9968830558162552, iteration: 216811
loss: 0.9940999746322632,grad_norm: 0.9749215762263153, iteration: 216812
loss: 0.9803359508514404,grad_norm: 0.9437825840779931, iteration: 216813
loss: 1.0045545101165771,grad_norm: 0.9129709547059977, iteration: 216814
loss: 1.1277145147323608,grad_norm: 0.9999994785018907, iteration: 216815
loss: 1.0311301946640015,grad_norm: 0.8428349945625004, iteration: 216816
loss: 1.1782617568969727,grad_norm: 0.9999990749150243, iteration: 216817
loss: 1.0599486827850342,grad_norm: 0.9999996629312765, iteration: 216818
loss: 1.007403016090393,grad_norm: 0.95228953726336, iteration: 216819
loss: 1.0400161743164062,grad_norm: 0.999999878856056, iteration: 216820
loss: 1.0229665040969849,grad_norm: 0.9999990635099183, iteration: 216821
loss: 0.9942227602005005,grad_norm: 0.9944029630297717, iteration: 216822
loss: 0.9907965660095215,grad_norm: 0.999999918413254, iteration: 216823
loss: 1.0719151496887207,grad_norm: 0.9999992218063264, iteration: 216824
loss: 0.9625218510627747,grad_norm: 0.9999992895325667, iteration: 216825
loss: 0.9748353958129883,grad_norm: 0.8409974847491625, iteration: 216826
loss: 0.966565728187561,grad_norm: 0.9278511238446702, iteration: 216827
loss: 1.1378220319747925,grad_norm: 0.948875129826364, iteration: 216828
loss: 0.9858711957931519,grad_norm: 0.8246938346925965, iteration: 216829
loss: 1.0222920179367065,grad_norm: 0.9715040914820309, iteration: 216830
loss: 1.0657470226287842,grad_norm: 0.9999991417280857, iteration: 216831
loss: 1.0076465606689453,grad_norm: 0.7717280149380976, iteration: 216832
loss: 1.0124675035476685,grad_norm: 0.9999990575775948, iteration: 216833
loss: 1.005535364151001,grad_norm: 0.941565081512051, iteration: 216834
loss: 1.0295709371566772,grad_norm: 0.9375856831368986, iteration: 216835
loss: 1.041522741317749,grad_norm: 0.9486858902491802, iteration: 216836
loss: 1.0210169553756714,grad_norm: 0.9660671939064975, iteration: 216837
loss: 0.9697265625,grad_norm: 0.9999990247587697, iteration: 216838
loss: 0.993344247341156,grad_norm: 0.9999989867811284, iteration: 216839
loss: 1.029226303100586,grad_norm: 0.9999996050418413, iteration: 216840
loss: 1.0168498754501343,grad_norm: 0.9999994381568607, iteration: 216841
loss: 1.0315135717391968,grad_norm: 0.9986957355286145, iteration: 216842
loss: 0.9859926104545593,grad_norm: 0.8854284407867349, iteration: 216843
loss: 0.9982021450996399,grad_norm: 0.9999995133513323, iteration: 216844
loss: 0.9790844917297363,grad_norm: 0.9670093722041883, iteration: 216845
loss: 0.9657017588615417,grad_norm: 0.9274266390832037, iteration: 216846
loss: 1.0327448844909668,grad_norm: 0.9999996630076626, iteration: 216847
loss: 1.0495449304580688,grad_norm: 0.9511929331971899, iteration: 216848
loss: 1.2351685762405396,grad_norm: 0.9999997239202318, iteration: 216849
loss: 0.9836028814315796,grad_norm: 0.911821245919387, iteration: 216850
loss: 1.0040427446365356,grad_norm: 0.9999990575222091, iteration: 216851
loss: 1.0448025465011597,grad_norm: 0.8436254103923937, iteration: 216852
loss: 1.0378623008728027,grad_norm: 0.9999994709335573, iteration: 216853
loss: 0.9834336638450623,grad_norm: 0.8408205870657731, iteration: 216854
loss: 1.0039048194885254,grad_norm: 0.9999990441055404, iteration: 216855
loss: 0.9788227677345276,grad_norm: 0.9999992474646197, iteration: 216856
loss: 0.9931834936141968,grad_norm: 0.9999998529837735, iteration: 216857
loss: 1.0287655591964722,grad_norm: 0.9999992839464709, iteration: 216858
loss: 1.023079514503479,grad_norm: 0.9999991368667525, iteration: 216859
loss: 1.0035642385482788,grad_norm: 0.9953933566625919, iteration: 216860
loss: 1.0247105360031128,grad_norm: 0.8027568508341724, iteration: 216861
loss: 1.0143446922302246,grad_norm: 0.999999681167449, iteration: 216862
loss: 1.1402279138565063,grad_norm: 0.9999992790429139, iteration: 216863
loss: 1.123861312866211,grad_norm: 0.9999989979831618, iteration: 216864
loss: 1.001698613166809,grad_norm: 0.922145897121339, iteration: 216865
loss: 0.9807510375976562,grad_norm: 0.9603775222306062, iteration: 216866
loss: 0.9644889235496521,grad_norm: 0.9999994525281993, iteration: 216867
loss: 1.0415948629379272,grad_norm: 0.884626402318115, iteration: 216868
loss: 0.9982349872589111,grad_norm: 0.8942419940573012, iteration: 216869
loss: 0.9861891269683838,grad_norm: 0.8807673595063098, iteration: 216870
loss: 1.0056854486465454,grad_norm: 0.9999990628239591, iteration: 216871
loss: 1.029921054840088,grad_norm: 0.9999991705614302, iteration: 216872
loss: 1.021849274635315,grad_norm: 0.9722659988883452, iteration: 216873
loss: 1.05970299243927,grad_norm: 0.8451661204052375, iteration: 216874
loss: 0.9587002992630005,grad_norm: 0.9999996239719138, iteration: 216875
loss: 1.0085855722427368,grad_norm: 0.9999997466649431, iteration: 216876
loss: 1.0055553913116455,grad_norm: 0.9999991847810469, iteration: 216877
loss: 1.0816832780838013,grad_norm: 0.999999198503217, iteration: 216878
loss: 1.0283958911895752,grad_norm: 0.999999186081828, iteration: 216879
loss: 0.9991644024848938,grad_norm: 0.8441695201930738, iteration: 216880
loss: 1.0277563333511353,grad_norm: 0.9334542966815159, iteration: 216881
loss: 1.016952395439148,grad_norm: 0.9999990580226179, iteration: 216882
loss: 0.9764664769172668,grad_norm: 0.99510239679097, iteration: 216883
loss: 0.9498072862625122,grad_norm: 0.9211789327379869, iteration: 216884
loss: 1.0276665687561035,grad_norm: 0.9999995097075457, iteration: 216885
loss: 1.0227490663528442,grad_norm: 0.9027890481373568, iteration: 216886
loss: 0.982553243637085,grad_norm: 0.8684699269635933, iteration: 216887
loss: 1.1238840818405151,grad_norm: 0.9999991816445096, iteration: 216888
loss: 1.040346384048462,grad_norm: 0.9999998972733001, iteration: 216889
loss: 0.9843964576721191,grad_norm: 0.9908622665539636, iteration: 216890
loss: 1.0044851303100586,grad_norm: 0.9023173093221205, iteration: 216891
loss: 1.1092820167541504,grad_norm: 0.9999995025309123, iteration: 216892
loss: 0.9847675561904907,grad_norm: 0.9374272900156496, iteration: 216893
loss: 0.9759030342102051,grad_norm: 0.8389858514887178, iteration: 216894
loss: 0.9496660232543945,grad_norm: 0.9999989195205072, iteration: 216895
loss: 0.9826953411102295,grad_norm: 0.9999996241848471, iteration: 216896
loss: 1.0472558736801147,grad_norm: 0.9999989167143681, iteration: 216897
loss: 1.0373706817626953,grad_norm: 0.8119255854340012, iteration: 216898
loss: 1.0062012672424316,grad_norm: 0.7650517960703568, iteration: 216899
loss: 1.0815051794052124,grad_norm: 0.950254393290353, iteration: 216900
loss: 1.0996005535125732,grad_norm: 0.8371964285958885, iteration: 216901
loss: 1.0488144159317017,grad_norm: 0.999999762914561, iteration: 216902
loss: 0.9742682576179504,grad_norm: 0.9165258089551593, iteration: 216903
loss: 1.0190913677215576,grad_norm: 0.9999991578084987, iteration: 216904
loss: 0.9960066676139832,grad_norm: 0.8662917339636089, iteration: 216905
loss: 0.9660692811012268,grad_norm: 0.9566284059963508, iteration: 216906
loss: 1.035089135169983,grad_norm: 0.956014261978418, iteration: 216907
loss: 1.0511958599090576,grad_norm: 0.9999991227431048, iteration: 216908
loss: 1.033121109008789,grad_norm: 0.9999990262635003, iteration: 216909
loss: 1.0222963094711304,grad_norm: 0.99999936947434, iteration: 216910
loss: 0.9925491809844971,grad_norm: 0.9999991638621296, iteration: 216911
loss: 0.9933739900588989,grad_norm: 0.8934532733848987, iteration: 216912
loss: 1.0083448886871338,grad_norm: 0.8377714540309233, iteration: 216913
loss: 1.0322927236557007,grad_norm: 0.9955911196554161, iteration: 216914
loss: 1.024693489074707,grad_norm: 0.9999991947301924, iteration: 216915
loss: 0.9802963733673096,grad_norm: 0.9999998689937295, iteration: 216916
loss: 1.0765125751495361,grad_norm: 0.9999995835264395, iteration: 216917
loss: 1.0470234155654907,grad_norm: 0.9999822666918067, iteration: 216918
loss: 0.998542845249176,grad_norm: 0.9999990668621002, iteration: 216919
loss: 1.0631951093673706,grad_norm: 0.999999179709135, iteration: 216920
loss: 1.0314604043960571,grad_norm: 0.9999990621912189, iteration: 216921
loss: 1.0548707246780396,grad_norm: 0.9999992038999015, iteration: 216922
loss: 1.1414804458618164,grad_norm: 0.9999992773504878, iteration: 216923
loss: 1.0845232009887695,grad_norm: 0.9999998752767651, iteration: 216924
loss: 1.0183855295181274,grad_norm: 0.999998960431902, iteration: 216925
loss: 0.9817545413970947,grad_norm: 0.8600005467634133, iteration: 216926
loss: 1.0134402513504028,grad_norm: 0.9209574165186584, iteration: 216927
loss: 1.0689823627471924,grad_norm: 0.8992479352945482, iteration: 216928
loss: 1.0220789909362793,grad_norm: 0.9999994654677805, iteration: 216929
loss: 0.9869303703308105,grad_norm: 0.9014122777510116, iteration: 216930
loss: 1.152114987373352,grad_norm: 0.9999991329724992, iteration: 216931
loss: 1.0073895454406738,grad_norm: 0.9985660773073224, iteration: 216932
loss: 1.0265296697616577,grad_norm: 0.9999990042964231, iteration: 216933
loss: 1.0044416189193726,grad_norm: 0.9858867506400765, iteration: 216934
loss: 1.0009077787399292,grad_norm: 0.9264261832896497, iteration: 216935
loss: 1.0244873762130737,grad_norm: 0.9999999079131492, iteration: 216936
loss: 0.9845578074455261,grad_norm: 0.9945739558414025, iteration: 216937
loss: 1.0734516382217407,grad_norm: 1.000000026901841, iteration: 216938
loss: 1.0260446071624756,grad_norm: 0.999999131236662, iteration: 216939
loss: 1.0295101404190063,grad_norm: 0.9999991799318498, iteration: 216940
loss: 1.0301897525787354,grad_norm: 0.9999995577210464, iteration: 216941
loss: 1.0127439498901367,grad_norm: 0.9492529983048263, iteration: 216942
loss: 1.0226948261260986,grad_norm: 0.8933012497791294, iteration: 216943
loss: 1.0312671661376953,grad_norm: 0.999999102620775, iteration: 216944
loss: 0.959054708480835,grad_norm: 0.9640748038219317, iteration: 216945
loss: 1.0558829307556152,grad_norm: 0.9999998004790398, iteration: 216946
loss: 1.064579725265503,grad_norm: 0.9999993647597342, iteration: 216947
loss: 1.001518964767456,grad_norm: 0.9999992372847495, iteration: 216948
loss: 0.9863596558570862,grad_norm: 0.9999992980938504, iteration: 216949
loss: 1.1041306257247925,grad_norm: 0.907202195273109, iteration: 216950
loss: 0.9909592866897583,grad_norm: 0.9999990467811621, iteration: 216951
loss: 1.0869897603988647,grad_norm: 0.9999994927918459, iteration: 216952
loss: 1.0190430879592896,grad_norm: 0.9999995524461319, iteration: 216953
loss: 1.026806116104126,grad_norm: 0.9999992083816368, iteration: 216954
loss: 0.9675297737121582,grad_norm: 0.999998961026239, iteration: 216955
loss: 1.0643231868743896,grad_norm: 0.9041912992800771, iteration: 216956
loss: 1.0021878480911255,grad_norm: 0.8956254932850325, iteration: 216957
loss: 1.019566297531128,grad_norm: 0.999999231496172, iteration: 216958
loss: 0.9697165489196777,grad_norm: 0.8244140547093348, iteration: 216959
loss: 1.01528799533844,grad_norm: 0.9999991582152434, iteration: 216960
loss: 1.096130132675171,grad_norm: 0.9999996272234453, iteration: 216961
loss: 1.0000590085983276,grad_norm: 0.9999992170163391, iteration: 216962
loss: 1.113778829574585,grad_norm: 0.9999990865735119, iteration: 216963
loss: 1.0368818044662476,grad_norm: 0.9999991585444088, iteration: 216964
loss: 1.022489070892334,grad_norm: 0.8911003569055502, iteration: 216965
loss: 1.0422319173812866,grad_norm: 0.999999180369106, iteration: 216966
loss: 1.0395941734313965,grad_norm: 0.997928636331102, iteration: 216967
loss: 1.1207882165908813,grad_norm: 0.9999994298161541, iteration: 216968
loss: 1.0257693529129028,grad_norm: 0.9999989551364771, iteration: 216969
loss: 1.0933679342269897,grad_norm: 0.9999997685535087, iteration: 216970
loss: 1.0793445110321045,grad_norm: 0.9972623737602954, iteration: 216971
loss: 1.0676391124725342,grad_norm: 0.9999992738313376, iteration: 216972
loss: 1.0635395050048828,grad_norm: 0.9999995208968889, iteration: 216973
loss: 1.0180555582046509,grad_norm: 0.9999990166826026, iteration: 216974
loss: 0.9907353520393372,grad_norm: 0.9093448858532995, iteration: 216975
loss: 1.0349161624908447,grad_norm: 0.9999990834342538, iteration: 216976
loss: 1.0867146253585815,grad_norm: 0.9999996264789129, iteration: 216977
loss: 0.99486243724823,grad_norm: 0.9999989887859742, iteration: 216978
loss: 0.9829136729240417,grad_norm: 0.9999990260490167, iteration: 216979
loss: 1.0159180164337158,grad_norm: 0.9999991680270354, iteration: 216980
loss: 1.0092341899871826,grad_norm: 0.9999991543865964, iteration: 216981
loss: 1.0144753456115723,grad_norm: 0.9999991514777057, iteration: 216982
loss: 1.2808458805084229,grad_norm: 0.9999993382177248, iteration: 216983
loss: 1.0060020685195923,grad_norm: 0.9999991229541025, iteration: 216984
loss: 1.1082470417022705,grad_norm: 0.999999170182385, iteration: 216985
loss: 1.0082364082336426,grad_norm: 0.9681188338732074, iteration: 216986
loss: 0.9773557782173157,grad_norm: 0.9230723735578701, iteration: 216987
loss: 1.0208641290664673,grad_norm: 0.9268010108575441, iteration: 216988
loss: 1.011844277381897,grad_norm: 0.9999991757596081, iteration: 216989
loss: 0.9917412996292114,grad_norm: 0.9101132087726331, iteration: 216990
loss: 1.1580747365951538,grad_norm: 0.9999990934252254, iteration: 216991
loss: 1.0771276950836182,grad_norm: 0.9999992336396321, iteration: 216992
loss: 1.0502357482910156,grad_norm: 0.9043893398636866, iteration: 216993
loss: 1.0321273803710938,grad_norm: 0.999999552031273, iteration: 216994
loss: 1.0710967779159546,grad_norm: 0.9999990333417615, iteration: 216995
loss: 0.970062792301178,grad_norm: 0.8492204149657285, iteration: 216996
loss: 1.106346607208252,grad_norm: 0.9999995924891621, iteration: 216997
loss: 1.0156338214874268,grad_norm: 0.8734868533318382, iteration: 216998
loss: 0.9960345029830933,grad_norm: 0.8295594329607391, iteration: 216999
loss: 0.9946262240409851,grad_norm: 0.9325743624173229, iteration: 217000
loss: 1.020015835762024,grad_norm: 0.9653461611697788, iteration: 217001
loss: 1.0384095907211304,grad_norm: 0.9999990526394006, iteration: 217002
loss: 1.0228477716445923,grad_norm: 0.9999991265313406, iteration: 217003
loss: 1.0114634037017822,grad_norm: 0.9999992941723378, iteration: 217004
loss: 0.9777437448501587,grad_norm: 0.9999991181145121, iteration: 217005
loss: 1.0047757625579834,grad_norm: 0.9486256540784465, iteration: 217006
loss: 1.0489716529846191,grad_norm: 0.8430155777334409, iteration: 217007
loss: 1.1201121807098389,grad_norm: 0.9999997764185103, iteration: 217008
loss: 1.022148847579956,grad_norm: 0.9999992614315851, iteration: 217009
loss: 1.003175139427185,grad_norm: 0.9999990675928492, iteration: 217010
loss: 1.1076140403747559,grad_norm: 0.9999996731366091, iteration: 217011
loss: 1.068565011024475,grad_norm: 0.9999991534108229, iteration: 217012
loss: 1.0013957023620605,grad_norm: 0.84614547654125, iteration: 217013
loss: 0.985738217830658,grad_norm: 0.8579425758199561, iteration: 217014
loss: 1.0178661346435547,grad_norm: 0.9999994403888195, iteration: 217015
loss: 1.0492582321166992,grad_norm: 0.9999998469757466, iteration: 217016
loss: 1.0173841714859009,grad_norm: 0.7333951136262356, iteration: 217017
loss: 1.0202316045761108,grad_norm: 0.8970233384136795, iteration: 217018
loss: 0.9888446927070618,grad_norm: 0.8976674279342375, iteration: 217019
loss: 1.0181564092636108,grad_norm: 0.9999996404290685, iteration: 217020
loss: 1.0132968425750732,grad_norm: 0.9627738096371234, iteration: 217021
loss: 1.027586579322815,grad_norm: 0.9999991562049774, iteration: 217022
loss: 1.016487717628479,grad_norm: 0.9999990917474954, iteration: 217023
loss: 0.9611670970916748,grad_norm: 0.9139403774750273, iteration: 217024
loss: 0.9736946821212769,grad_norm: 0.9757522383740399, iteration: 217025
loss: 0.9773839116096497,grad_norm: 0.9380237974577228, iteration: 217026
loss: 1.0042040348052979,grad_norm: 0.8915906903171377, iteration: 217027
loss: 0.9930815100669861,grad_norm: 0.857471789390249, iteration: 217028
loss: 0.9871591329574585,grad_norm: 0.9792514461996925, iteration: 217029
loss: 0.9723454713821411,grad_norm: 0.9999991158058757, iteration: 217030
loss: 1.086743950843811,grad_norm: 0.9999992483137476, iteration: 217031
loss: 1.016140341758728,grad_norm: 0.9999996550847867, iteration: 217032
loss: 1.0055485963821411,grad_norm: 0.9059930305086402, iteration: 217033
loss: 0.9921130537986755,grad_norm: 0.9824327658986974, iteration: 217034
loss: 1.0995609760284424,grad_norm: 0.9999992629862972, iteration: 217035
loss: 1.1611886024475098,grad_norm: 0.8202421104009173, iteration: 217036
loss: 1.1309913396835327,grad_norm: 0.999999849504459, iteration: 217037
loss: 1.0139731168746948,grad_norm: 0.999999211830785, iteration: 217038
loss: 1.0067161321640015,grad_norm: 0.9576694928582922, iteration: 217039
loss: 1.0058525800704956,grad_norm: 0.9999990036922406, iteration: 217040
loss: 1.0354809761047363,grad_norm: 0.9999995506340442, iteration: 217041
loss: 0.9745431542396545,grad_norm: 0.9726372283657306, iteration: 217042
loss: 0.970180869102478,grad_norm: 0.9947189225495027, iteration: 217043
loss: 1.0709713697433472,grad_norm: 0.9856666954282176, iteration: 217044
loss: 1.072677731513977,grad_norm: 0.9999997471216393, iteration: 217045
loss: 0.9712474942207336,grad_norm: 0.8011226848870457, iteration: 217046
loss: 1.0479602813720703,grad_norm: 0.9999989881053154, iteration: 217047
loss: 0.9751079082489014,grad_norm: 0.9999991055096201, iteration: 217048
loss: 0.9597616791725159,grad_norm: 0.7715318180408556, iteration: 217049
loss: 0.9711644649505615,grad_norm: 0.9612202823647713, iteration: 217050
loss: 1.020085334777832,grad_norm: 0.999999687318168, iteration: 217051
loss: 0.9749308228492737,grad_norm: 0.9999990526857513, iteration: 217052
loss: 1.109224796295166,grad_norm: 0.9973140490522767, iteration: 217053
loss: 0.9921053051948547,grad_norm: 0.9999990336464111, iteration: 217054
loss: 0.9835081100463867,grad_norm: 0.9999994009860916, iteration: 217055
loss: 1.0166376829147339,grad_norm: 0.9999994817164418, iteration: 217056
loss: 1.034132719039917,grad_norm: 0.9999997148323908, iteration: 217057
loss: 1.0577689409255981,grad_norm: 0.8284391958252381, iteration: 217058
loss: 0.9786860942840576,grad_norm: 0.9377993392081828, iteration: 217059
loss: 0.9701681137084961,grad_norm: 0.9999992315824591, iteration: 217060
loss: 1.1350764036178589,grad_norm: 0.9999995205789395, iteration: 217061
loss: 0.9959700107574463,grad_norm: 0.9853913686282086, iteration: 217062
loss: 1.015217661857605,grad_norm: 0.8554862784910624, iteration: 217063
loss: 1.0814718008041382,grad_norm: 0.8431501929577664, iteration: 217064
loss: 1.021793246269226,grad_norm: 0.9999991013907974, iteration: 217065
loss: 1.0219274759292603,grad_norm: 0.8885778867710075, iteration: 217066
loss: 0.9930043816566467,grad_norm: 0.747702676790476, iteration: 217067
loss: 1.0014888048171997,grad_norm: 0.9491027474072707, iteration: 217068
loss: 0.959600567817688,grad_norm: 0.9085274565025439, iteration: 217069
loss: 0.98551344871521,grad_norm: 0.999999147075415, iteration: 217070
loss: 1.2178080081939697,grad_norm: 0.9999992514481889, iteration: 217071
loss: 1.0212024450302124,grad_norm: 0.9999998254900789, iteration: 217072
loss: 0.9941774606704712,grad_norm: 0.9999991953053395, iteration: 217073
loss: 1.018202781677246,grad_norm: 0.888698783940316, iteration: 217074
loss: 1.0474896430969238,grad_norm: 0.999999104882135, iteration: 217075
loss: 1.0523167848587036,grad_norm: 0.9999998842640613, iteration: 217076
loss: 1.0954887866973877,grad_norm: 0.9999991851322048, iteration: 217077
loss: 0.9781763553619385,grad_norm: 0.8561954455962156, iteration: 217078
loss: 1.0754109621047974,grad_norm: 0.9476299231079799, iteration: 217079
loss: 0.9690786004066467,grad_norm: 0.9999992393068874, iteration: 217080
loss: 1.0453277826309204,grad_norm: 0.9999993798880334, iteration: 217081
loss: 1.0909425020217896,grad_norm: 0.9999995211417896, iteration: 217082
loss: 1.0025633573532104,grad_norm: 0.9999994146346133, iteration: 217083
loss: 1.0466951131820679,grad_norm: 0.9999998200407284, iteration: 217084
loss: 1.029083251953125,grad_norm: 0.9843699663100411, iteration: 217085
loss: 0.9596884846687317,grad_norm: 0.8775337841148126, iteration: 217086
loss: 1.0539672374725342,grad_norm: 0.9999991539946343, iteration: 217087
loss: 1.0122082233428955,grad_norm: 0.9394522955905148, iteration: 217088
loss: 0.966765820980072,grad_norm: 0.8467931333443295, iteration: 217089
loss: 0.992830216884613,grad_norm: 0.8166477241529572, iteration: 217090
loss: 1.0131430625915527,grad_norm: 0.869958462747177, iteration: 217091
loss: 1.0219546556472778,grad_norm: 0.8273682462441787, iteration: 217092
loss: 0.9833976030349731,grad_norm: 0.8146220845596778, iteration: 217093
loss: 1.0419728755950928,grad_norm: 0.9999993703887342, iteration: 217094
loss: 1.1313430070877075,grad_norm: 0.9999997065636048, iteration: 217095
loss: 0.9942340850830078,grad_norm: 0.9999991623185108, iteration: 217096
loss: 1.0168938636779785,grad_norm: 0.9999991705686341, iteration: 217097
loss: 0.9459340572357178,grad_norm: 0.9999990734295914, iteration: 217098
loss: 1.0350215435028076,grad_norm: 0.9999992699674422, iteration: 217099
loss: 1.0292558670043945,grad_norm: 0.9999993702187829, iteration: 217100
loss: 1.045544147491455,grad_norm: 0.9665622516362355, iteration: 217101
loss: 0.9974985718727112,grad_norm: 0.9385610367251399, iteration: 217102
loss: 1.0199379920959473,grad_norm: 0.999999036765561, iteration: 217103
loss: 1.1276402473449707,grad_norm: 0.9999998702588939, iteration: 217104
loss: 1.0114495754241943,grad_norm: 0.9999998034250921, iteration: 217105
loss: 0.9853731393814087,grad_norm: 0.9999989949728599, iteration: 217106
loss: 1.044923186302185,grad_norm: 0.9999992021050598, iteration: 217107
loss: 0.9894559979438782,grad_norm: 0.9176276323887094, iteration: 217108
loss: 1.0344934463500977,grad_norm: 0.886069031279811, iteration: 217109
loss: 1.148607611656189,grad_norm: 0.999999598012404, iteration: 217110
loss: 1.1335912942886353,grad_norm: 0.9999993201728671, iteration: 217111
loss: 1.132222294807434,grad_norm: 0.9999992069095331, iteration: 217112
loss: 1.03914213180542,grad_norm: 0.9999990212369019, iteration: 217113
loss: 1.02040433883667,grad_norm: 0.9999999223865444, iteration: 217114
loss: 1.0290220975875854,grad_norm: 0.9177336126577693, iteration: 217115
loss: 1.0100765228271484,grad_norm: 0.9999992219717538, iteration: 217116
loss: 0.9979008436203003,grad_norm: 0.9999990771028354, iteration: 217117
loss: 0.998725950717926,grad_norm: 0.8356059309719791, iteration: 217118
loss: 1.0547746419906616,grad_norm: 0.999999035291586, iteration: 217119
loss: 1.0655516386032104,grad_norm: 0.997055091717312, iteration: 217120
loss: 1.014229416847229,grad_norm: 0.9263517969304409, iteration: 217121
loss: 1.044945240020752,grad_norm: 0.9999997823444833, iteration: 217122
loss: 0.9916378259658813,grad_norm: 0.9999992866562939, iteration: 217123
loss: 1.0122063159942627,grad_norm: 0.999999052537479, iteration: 217124
loss: 1.0338643789291382,grad_norm: 0.9229894140043259, iteration: 217125
loss: 0.9923279881477356,grad_norm: 0.9809525155043468, iteration: 217126
loss: 1.008355975151062,grad_norm: 0.9999990062787272, iteration: 217127
loss: 0.9648962020874023,grad_norm: 0.9999998270331486, iteration: 217128
loss: 1.001143455505371,grad_norm: 0.7690281729537443, iteration: 217129
loss: 1.0234495401382446,grad_norm: 0.9999992737666059, iteration: 217130
loss: 1.0144957304000854,grad_norm: 0.9999990392451217, iteration: 217131
loss: 1.0344130992889404,grad_norm: 0.9999990647670125, iteration: 217132
loss: 0.9794660806655884,grad_norm: 0.8106338593072835, iteration: 217133
loss: 0.9592565894126892,grad_norm: 0.9783356024926202, iteration: 217134
loss: 1.0109353065490723,grad_norm: 0.9741440847005001, iteration: 217135
loss: 0.9949370622634888,grad_norm: 0.9999989844904157, iteration: 217136
loss: 1.0072535276412964,grad_norm: 0.9892148119882737, iteration: 217137
loss: 1.0785675048828125,grad_norm: 0.9999996426381147, iteration: 217138
loss: 1.0040172338485718,grad_norm: 0.8728430842355854, iteration: 217139
loss: 0.967605471611023,grad_norm: 0.9999991559918537, iteration: 217140
loss: 1.0367954969406128,grad_norm: 0.9999993152085802, iteration: 217141
loss: 0.9933150410652161,grad_norm: 0.8289628370430825, iteration: 217142
loss: 1.0703409910202026,grad_norm: 0.9999994798519961, iteration: 217143
loss: 1.0109659433364868,grad_norm: 0.9696714915315859, iteration: 217144
loss: 0.9451347589492798,grad_norm: 0.7915559849311075, iteration: 217145
loss: 1.0264079570770264,grad_norm: 0.999999810537215, iteration: 217146
loss: 0.958724319934845,grad_norm: 0.8832810335656065, iteration: 217147
loss: 1.0426157712936401,grad_norm: 0.9542369287655347, iteration: 217148
loss: 1.0115779638290405,grad_norm: 0.9999994355592231, iteration: 217149
loss: 1.0057321786880493,grad_norm: 0.9999991015497516, iteration: 217150
loss: 1.081703782081604,grad_norm: 0.9999997755966711, iteration: 217151
loss: 1.0394887924194336,grad_norm: 0.9999998469883709, iteration: 217152
loss: 1.0481873750686646,grad_norm: 0.999999823183464, iteration: 217153
loss: 1.008353590965271,grad_norm: 0.8672872550521985, iteration: 217154
loss: 1.0503228902816772,grad_norm: 0.9966652440811621, iteration: 217155
loss: 0.9694347977638245,grad_norm: 0.8051326481124015, iteration: 217156
loss: 0.9913544058799744,grad_norm: 0.9999996671628995, iteration: 217157
loss: 1.008378267288208,grad_norm: 0.9999992427811443, iteration: 217158
loss: 1.0593385696411133,grad_norm: 0.9999996559130477, iteration: 217159
loss: 0.9874141216278076,grad_norm: 0.8232394777133257, iteration: 217160
loss: 0.9976919293403625,grad_norm: 0.999999169007226, iteration: 217161
loss: 0.9894885420799255,grad_norm: 0.9999999444217327, iteration: 217162
loss: 1.0148016214370728,grad_norm: 0.9067706525797499, iteration: 217163
loss: 1.055253505706787,grad_norm: 0.9999994478519002, iteration: 217164
loss: 1.015038013458252,grad_norm: 0.8302631784628033, iteration: 217165
loss: 1.0122485160827637,grad_norm: 0.9999991766807064, iteration: 217166
loss: 1.1130281686782837,grad_norm: 0.9999990847947765, iteration: 217167
loss: 0.9952260255813599,grad_norm: 0.9467467430444161, iteration: 217168
loss: 1.0902963876724243,grad_norm: 0.9400443331229307, iteration: 217169
loss: 0.9820569753646851,grad_norm: 0.9999989927602504, iteration: 217170
loss: 1.1362088918685913,grad_norm: 0.9914792516267751, iteration: 217171
loss: 1.0580878257751465,grad_norm: 0.9947809973620995, iteration: 217172
loss: 1.0021363496780396,grad_norm: 0.8898818369805687, iteration: 217173
loss: 1.1504158973693848,grad_norm: 0.999999743701745, iteration: 217174
loss: 0.9991106390953064,grad_norm: 0.8810235493162233, iteration: 217175
loss: 1.0259010791778564,grad_norm: 0.9999993060881371, iteration: 217176
loss: 1.0485384464263916,grad_norm: 0.9999995089944315, iteration: 217177
loss: 1.0469963550567627,grad_norm: 0.9999990775803318, iteration: 217178
loss: 1.0391268730163574,grad_norm: 0.8676932313274771, iteration: 217179
loss: 1.0612229108810425,grad_norm: 0.9999995357311381, iteration: 217180
loss: 1.002769112586975,grad_norm: 0.9999991284458615, iteration: 217181
loss: 0.9930721521377563,grad_norm: 0.8004822469412538, iteration: 217182
loss: 0.995919942855835,grad_norm: 0.9999991276286057, iteration: 217183
loss: 0.9996446967124939,grad_norm: 0.9213629540213533, iteration: 217184
loss: 1.0852651596069336,grad_norm: 0.8555565460906099, iteration: 217185
loss: 1.0743505954742432,grad_norm: 0.9999999262056494, iteration: 217186
loss: 1.0550261735916138,grad_norm: 0.9999995159445805, iteration: 217187
loss: 1.0480234622955322,grad_norm: 0.99999954587221, iteration: 217188
loss: 0.9849519729614258,grad_norm: 0.9999990894291486, iteration: 217189
loss: 1.020344853401184,grad_norm: 0.9999993422337144, iteration: 217190
loss: 0.9856517314910889,grad_norm: 0.9999990638178308, iteration: 217191
loss: 1.0000733137130737,grad_norm: 0.9999998065764042, iteration: 217192
loss: 1.0464649200439453,grad_norm: 0.999999220468991, iteration: 217193
loss: 0.9903677105903625,grad_norm: 0.9999991646698054, iteration: 217194
loss: 1.0387367010116577,grad_norm: 0.9999998130898247, iteration: 217195
loss: 0.9744770526885986,grad_norm: 0.9999989637953309, iteration: 217196
loss: 0.9450602531433105,grad_norm: 0.9430012700011917, iteration: 217197
loss: 0.9748759865760803,grad_norm: 0.9626215469192242, iteration: 217198
loss: 0.980584979057312,grad_norm: 0.9178958438018435, iteration: 217199
loss: 0.9766566157341003,grad_norm: 0.999999020889727, iteration: 217200
loss: 0.9960644841194153,grad_norm: 0.9999993783945714, iteration: 217201
loss: 1.0924700498580933,grad_norm: 0.9999991692065596, iteration: 217202
loss: 1.048717975616455,grad_norm: 0.9999991692644113, iteration: 217203
loss: 1.0290343761444092,grad_norm: 0.9999990087958229, iteration: 217204
loss: 1.0568255186080933,grad_norm: 0.9999997776060784, iteration: 217205
loss: 1.0033719539642334,grad_norm: 0.9999989491677085, iteration: 217206
loss: 0.9901027083396912,grad_norm: 0.999999146371668, iteration: 217207
loss: 1.2230620384216309,grad_norm: 0.9999992549954916, iteration: 217208
loss: 1.1576297283172607,grad_norm: 0.9999998077821844, iteration: 217209
loss: 1.0569132566452026,grad_norm: 0.9999990005063766, iteration: 217210
loss: 1.0010766983032227,grad_norm: 0.7369314178963252, iteration: 217211
loss: 1.0042266845703125,grad_norm: 0.9999991674920043, iteration: 217212
loss: 1.0487879514694214,grad_norm: 0.9999994526107253, iteration: 217213
loss: 1.0245939493179321,grad_norm: 0.999999378523584, iteration: 217214
loss: 1.0069241523742676,grad_norm: 0.999999257666148, iteration: 217215
loss: 1.0114095211029053,grad_norm: 0.8548731964886701, iteration: 217216
loss: 1.0353761911392212,grad_norm: 0.9999990754981569, iteration: 217217
loss: 1.0188798904418945,grad_norm: 0.9581603601865466, iteration: 217218
loss: 0.9913338422775269,grad_norm: 0.9999990468021304, iteration: 217219
loss: 1.0384057760238647,grad_norm: 0.9999996440260108, iteration: 217220
loss: 0.9991884231567383,grad_norm: 0.8813448484664702, iteration: 217221
loss: 1.1065654754638672,grad_norm: 0.9999992106569274, iteration: 217222
loss: 1.1389431953430176,grad_norm: 0.9999999346840784, iteration: 217223
loss: 1.1215753555297852,grad_norm: 0.9999995212711587, iteration: 217224
loss: 0.9953701496124268,grad_norm: 0.8504032253313524, iteration: 217225
loss: 1.030927300453186,grad_norm: 0.9999992484169722, iteration: 217226
loss: 1.210114598274231,grad_norm: 0.99999943516247, iteration: 217227
loss: 0.9772952795028687,grad_norm: 0.9952338651983785, iteration: 217228
loss: 1.0121229887008667,grad_norm: 0.9878768198524541, iteration: 217229
loss: 1.0382685661315918,grad_norm: 0.9530234810786098, iteration: 217230
loss: 1.0072035789489746,grad_norm: 0.921875186031322, iteration: 217231
loss: 1.113157868385315,grad_norm: 0.9999997380030237, iteration: 217232
loss: 1.2075045108795166,grad_norm: 0.9999998165867624, iteration: 217233
loss: 1.1071752309799194,grad_norm: 0.9999992114846327, iteration: 217234
loss: 0.9933453798294067,grad_norm: 0.9999993018526235, iteration: 217235
loss: 1.009009838104248,grad_norm: 0.9999989476247199, iteration: 217236
loss: 1.0366337299346924,grad_norm: 0.9455284876391412, iteration: 217237
loss: 1.006492257118225,grad_norm: 0.9999993499284067, iteration: 217238
loss: 1.3426978588104248,grad_norm: 0.9999996036707991, iteration: 217239
loss: 1.0024527311325073,grad_norm: 0.9999994927438608, iteration: 217240
loss: 1.0309995412826538,grad_norm: 0.9999994623509021, iteration: 217241
loss: 0.9912170767784119,grad_norm: 0.9999992157327833, iteration: 217242
loss: 1.185660719871521,grad_norm: 0.9999992354834256, iteration: 217243
loss: 0.9913718700408936,grad_norm: 0.999999303962258, iteration: 217244
loss: 1.0093592405319214,grad_norm: 0.9406920159380087, iteration: 217245
loss: 0.983528733253479,grad_norm: 0.834839263704079, iteration: 217246
loss: 1.028881549835205,grad_norm: 0.9999996352078662, iteration: 217247
loss: 1.0192134380340576,grad_norm: 0.9999991630820007, iteration: 217248
loss: 1.0240517854690552,grad_norm: 0.9999991084152856, iteration: 217249
loss: 1.1446746587753296,grad_norm: 0.9999998303575237, iteration: 217250
loss: 1.0127946138381958,grad_norm: 0.8807059756902257, iteration: 217251
loss: 1.026860237121582,grad_norm: 0.9266176844650262, iteration: 217252
loss: 1.0169583559036255,grad_norm: 0.9999991792111771, iteration: 217253
loss: 0.9963251352310181,grad_norm: 0.9999991600085725, iteration: 217254
loss: 1.0748891830444336,grad_norm: 0.9999995557420983, iteration: 217255
loss: 1.0059764385223389,grad_norm: 0.9244846462452669, iteration: 217256
loss: 1.0110641717910767,grad_norm: 0.8770519414704885, iteration: 217257
loss: 0.9878363609313965,grad_norm: 0.9633794560251162, iteration: 217258
loss: 1.0508549213409424,grad_norm: 0.9239232912553462, iteration: 217259
loss: 0.9864039421081543,grad_norm: 0.9999991286954915, iteration: 217260
loss: 0.9750561118125916,grad_norm: 0.9999991127481472, iteration: 217261
loss: 1.0253002643585205,grad_norm: 0.9999991871196342, iteration: 217262
loss: 1.0011500120162964,grad_norm: 0.9554687332818848, iteration: 217263
loss: 1.008280634880066,grad_norm: 0.9999995724558938, iteration: 217264
loss: 0.9778844714164734,grad_norm: 0.8762906241403614, iteration: 217265
loss: 1.0654656887054443,grad_norm: 0.9999998917802109, iteration: 217266
loss: 1.0346481800079346,grad_norm: 0.9999999002354891, iteration: 217267
loss: 1.0093411207199097,grad_norm: 0.9999990101149046, iteration: 217268
loss: 1.0396631956100464,grad_norm: 0.999999735585875, iteration: 217269
loss: 1.001516580581665,grad_norm: 0.867938179659969, iteration: 217270
loss: 0.9887030720710754,grad_norm: 0.9999992848900088, iteration: 217271
loss: 1.0501022338867188,grad_norm: 1.0000000306983234, iteration: 217272
loss: 0.9943559765815735,grad_norm: 0.9999991222157787, iteration: 217273
loss: 1.017903447151184,grad_norm: 0.9999991754575067, iteration: 217274
loss: 1.0593326091766357,grad_norm: 0.9999998042400575, iteration: 217275
loss: 1.039801836013794,grad_norm: 0.9999997907084927, iteration: 217276
loss: 1.0234322547912598,grad_norm: 0.9999990569051406, iteration: 217277
loss: 1.0276809930801392,grad_norm: 0.9999994654654073, iteration: 217278
loss: 1.0098649263381958,grad_norm: 0.9999991334587917, iteration: 217279
loss: 1.0286259651184082,grad_norm: 0.9999998220979687, iteration: 217280
loss: 1.0469235181808472,grad_norm: 0.9999996476681348, iteration: 217281
loss: 1.0269888639450073,grad_norm: 0.8951237519722561, iteration: 217282
loss: 0.9765315651893616,grad_norm: 0.9999993229215847, iteration: 217283
loss: 1.0096131563186646,grad_norm: 0.9999991894885493, iteration: 217284
loss: 0.9759792685508728,grad_norm: 0.7872546845607125, iteration: 217285
loss: 1.009027123451233,grad_norm: 0.9583653569930629, iteration: 217286
loss: 0.9770551323890686,grad_norm: 0.8942458114928777, iteration: 217287
loss: 1.0885809659957886,grad_norm: 0.9999996995798338, iteration: 217288
loss: 1.0051637887954712,grad_norm: 0.8782472915182529, iteration: 217289
loss: 0.9920461773872375,grad_norm: 0.9180205281960637, iteration: 217290
loss: 0.9712708592414856,grad_norm: 0.9502688272282124, iteration: 217291
loss: 1.0178673267364502,grad_norm: 0.8236945317519199, iteration: 217292
loss: 1.0125946998596191,grad_norm: 0.981693949706765, iteration: 217293
loss: 0.9756868481636047,grad_norm: 0.9999991444911127, iteration: 217294
loss: 0.9754654169082642,grad_norm: 0.911279786249993, iteration: 217295
loss: 1.0046966075897217,grad_norm: 0.9999993751384751, iteration: 217296
loss: 1.00458824634552,grad_norm: 0.9999991773887859, iteration: 217297
loss: 0.9703189134597778,grad_norm: 0.9999991249230156, iteration: 217298
loss: 1.0277384519577026,grad_norm: 0.8076649529906289, iteration: 217299
loss: 0.9862440228462219,grad_norm: 0.9999989397390985, iteration: 217300
loss: 0.9726623892784119,grad_norm: 0.999999239942161, iteration: 217301
loss: 1.0144397020339966,grad_norm: 0.9999992240902832, iteration: 217302
loss: 1.0119905471801758,grad_norm: 0.9870398633006706, iteration: 217303
loss: 0.9973175525665283,grad_norm: 0.9999999342836491, iteration: 217304
loss: 1.0212230682373047,grad_norm: 0.9999989957835076, iteration: 217305
loss: 0.9973161220550537,grad_norm: 0.9999991051439989, iteration: 217306
loss: 0.9858031868934631,grad_norm: 0.8929075497143368, iteration: 217307
loss: 1.026816964149475,grad_norm: 0.9999991591589262, iteration: 217308
loss: 0.9810800552368164,grad_norm: 0.9400427740294185, iteration: 217309
loss: 1.1056849956512451,grad_norm: 0.9999992174200956, iteration: 217310
loss: 0.9853760600090027,grad_norm: 0.9999992022044752, iteration: 217311
loss: 1.0323256254196167,grad_norm: 0.9075262072238456, iteration: 217312
loss: 0.9764516949653625,grad_norm: 0.8821087967371833, iteration: 217313
loss: 1.0096619129180908,grad_norm: 0.9089149710776236, iteration: 217314
loss: 0.997707724571228,grad_norm: 0.9964559271068062, iteration: 217315
loss: 0.9945522546768188,grad_norm: 0.8691075941385666, iteration: 217316
loss: 1.0241053104400635,grad_norm: 0.9666825298675622, iteration: 217317
loss: 0.9820224046707153,grad_norm: 0.9999991273500449, iteration: 217318
loss: 0.9959822297096252,grad_norm: 0.9048575270133025, iteration: 217319
loss: 1.0062419176101685,grad_norm: 0.9999994716746959, iteration: 217320
loss: 0.9913920760154724,grad_norm: 0.8628664810265142, iteration: 217321
loss: 1.129308819770813,grad_norm: 0.9999999075027698, iteration: 217322
loss: 0.9623562693595886,grad_norm: 0.9827249998580555, iteration: 217323
loss: 0.9741981029510498,grad_norm: 0.9844549990337633, iteration: 217324
loss: 0.9902967810630798,grad_norm: 0.9637463615706136, iteration: 217325
loss: 1.0361725091934204,grad_norm: 0.9096309806243814, iteration: 217326
loss: 1.0564788579940796,grad_norm: 0.9999990594644229, iteration: 217327
loss: 0.9819956421852112,grad_norm: 0.9439200015123259, iteration: 217328
loss: 1.0114942789077759,grad_norm: 0.9999990593928892, iteration: 217329
loss: 0.9889929294586182,grad_norm: 0.9222266456182877, iteration: 217330
loss: 1.0428391695022583,grad_norm: 0.9999991512708986, iteration: 217331
loss: 1.010042667388916,grad_norm: 0.9563387633264568, iteration: 217332
loss: 1.0009127855300903,grad_norm: 0.8259426866274286, iteration: 217333
loss: 1.0027793645858765,grad_norm: 0.8969278275055022, iteration: 217334
loss: 1.0226161479949951,grad_norm: 0.9999991335208129, iteration: 217335
loss: 1.018733024597168,grad_norm: 0.9999999189274372, iteration: 217336
loss: 1.0108031034469604,grad_norm: 0.9999991208283949, iteration: 217337
loss: 1.0047410726547241,grad_norm: 0.9507267344829021, iteration: 217338
loss: 1.025353193283081,grad_norm: 0.9999990738339257, iteration: 217339
loss: 0.9732407927513123,grad_norm: 0.9011090853146149, iteration: 217340
loss: 0.9613568782806396,grad_norm: 0.9322141455097034, iteration: 217341
loss: 0.9896807670593262,grad_norm: 0.993015876029051, iteration: 217342
loss: 1.018883466720581,grad_norm: 0.9999990509117532, iteration: 217343
loss: 1.0160191059112549,grad_norm: 0.9690291526237403, iteration: 217344
loss: 1.0318232774734497,grad_norm: 0.8863624230314852, iteration: 217345
loss: 1.002950668334961,grad_norm: 0.8756602277126514, iteration: 217346
loss: 1.0287220478057861,grad_norm: 0.8568752639888619, iteration: 217347
loss: 0.9994040727615356,grad_norm: 0.8020449769110435, iteration: 217348
loss: 0.9845989942550659,grad_norm: 0.9956964042723109, iteration: 217349
loss: 1.009415864944458,grad_norm: 0.9999994643022986, iteration: 217350
loss: 1.0479519367218018,grad_norm: 0.9999993201673706, iteration: 217351
loss: 0.9929811358451843,grad_norm: 0.9906672417103726, iteration: 217352
loss: 0.9632513523101807,grad_norm: 0.9440158258728814, iteration: 217353
loss: 0.991942822933197,grad_norm: 0.851401461779995, iteration: 217354
loss: 0.9698657989501953,grad_norm: 0.8198112643491721, iteration: 217355
loss: 0.9936084151268005,grad_norm: 0.8386300946559726, iteration: 217356
loss: 1.0181454420089722,grad_norm: 0.7142963159049097, iteration: 217357
loss: 1.0270472764968872,grad_norm: 0.8984062655901138, iteration: 217358
loss: 1.006658673286438,grad_norm: 0.9999990690502787, iteration: 217359
loss: 0.9951076507568359,grad_norm: 0.8830126856500249, iteration: 217360
loss: 0.9872815608978271,grad_norm: 0.8150766385861979, iteration: 217361
loss: 1.0184868574142456,grad_norm: 0.9195956542866429, iteration: 217362
loss: 0.9902663826942444,grad_norm: 0.8604145982558074, iteration: 217363
loss: 1.012637972831726,grad_norm: 0.9880120681724763, iteration: 217364
loss: 1.0831385850906372,grad_norm: 0.9999992164243396, iteration: 217365
loss: 1.0657202005386353,grad_norm: 0.999999285430206, iteration: 217366
loss: 0.9632832407951355,grad_norm: 0.9357513935822521, iteration: 217367
loss: 1.0533581972122192,grad_norm: 0.86887022604217, iteration: 217368
loss: 1.0084664821624756,grad_norm: 0.9999991597510784, iteration: 217369
loss: 1.010047435760498,grad_norm: 0.9999991143219945, iteration: 217370
loss: 1.0151339769363403,grad_norm: 0.9999992445303538, iteration: 217371
loss: 0.9820381999015808,grad_norm: 0.8956545420551193, iteration: 217372
loss: 0.9916112422943115,grad_norm: 0.9999991988589528, iteration: 217373
loss: 1.0067036151885986,grad_norm: 0.9645757189374499, iteration: 217374
loss: 1.0055909156799316,grad_norm: 0.9999992422710674, iteration: 217375
loss: 0.9852967858314514,grad_norm: 0.910785809295662, iteration: 217376
loss: 1.016515851020813,grad_norm: 0.8834535039478288, iteration: 217377
loss: 0.9993537664413452,grad_norm: 0.8559486905893537, iteration: 217378
loss: 0.983931839466095,grad_norm: 0.9041629072478083, iteration: 217379
loss: 1.0122737884521484,grad_norm: 0.9791858705079819, iteration: 217380
loss: 0.985481321811676,grad_norm: 0.8110394404723361, iteration: 217381
loss: 0.9959924221038818,grad_norm: 0.9876406680286373, iteration: 217382
loss: 0.9817459583282471,grad_norm: 0.9277613182962501, iteration: 217383
loss: 0.991266131401062,grad_norm: 0.9457311355132967, iteration: 217384
loss: 1.0036228895187378,grad_norm: 0.7919344139896428, iteration: 217385
loss: 0.9819361567497253,grad_norm: 0.9318072193783048, iteration: 217386
loss: 0.9650481343269348,grad_norm: 0.8978723265862513, iteration: 217387
loss: 1.0091561079025269,grad_norm: 0.9999990803147064, iteration: 217388
loss: 1.0351128578186035,grad_norm: 0.9999992120022464, iteration: 217389
loss: 1.0214893817901611,grad_norm: 0.8782845469176385, iteration: 217390
loss: 1.032494306564331,grad_norm: 0.9999991379478598, iteration: 217391
loss: 1.0050448179244995,grad_norm: 0.9843653335700318, iteration: 217392
loss: 0.9949638247489929,grad_norm: 0.8789085968282837, iteration: 217393
loss: 1.0169553756713867,grad_norm: 0.8737071301779881, iteration: 217394
loss: 1.0079994201660156,grad_norm: 0.9542966768139085, iteration: 217395
loss: 1.0586035251617432,grad_norm: 0.9999990034986593, iteration: 217396
loss: 0.99140864610672,grad_norm: 0.9999990101344146, iteration: 217397
loss: 0.9940374493598938,grad_norm: 0.9770698906846933, iteration: 217398
loss: 0.9820124506950378,grad_norm: 0.882440903468153, iteration: 217399
loss: 1.0274230241775513,grad_norm: 0.9331461098610719, iteration: 217400
loss: 0.9699825644493103,grad_norm: 0.9155986043271748, iteration: 217401
loss: 0.9975867867469788,grad_norm: 0.8196197523202227, iteration: 217402
loss: 0.9745113849639893,grad_norm: 0.9299007753507906, iteration: 217403
loss: 1.0083069801330566,grad_norm: 0.8585061591305647, iteration: 217404
loss: 0.9803131818771362,grad_norm: 0.9893910803365624, iteration: 217405
loss: 1.0696932077407837,grad_norm: 0.9935110824555992, iteration: 217406
loss: 0.9695439338684082,grad_norm: 0.9999990109617648, iteration: 217407
loss: 0.9762125611305237,grad_norm: 0.9999989697343458, iteration: 217408
loss: 1.000427484512329,grad_norm: 0.9999990243582134, iteration: 217409
loss: 0.989591658115387,grad_norm: 0.854375882542798, iteration: 217410
loss: 1.0305838584899902,grad_norm: 0.9999990263250502, iteration: 217411
loss: 1.1793060302734375,grad_norm: 0.9999995720911028, iteration: 217412
loss: 1.0394359827041626,grad_norm: 0.9999993429264928, iteration: 217413
loss: 0.9719855785369873,grad_norm: 0.9409615013725383, iteration: 217414
loss: 0.9647016525268555,grad_norm: 0.8605115785675659, iteration: 217415
loss: 0.9933454394340515,grad_norm: 0.9438122824180514, iteration: 217416
loss: 1.020521640777588,grad_norm: 0.9224694721743562, iteration: 217417
loss: 1.0123753547668457,grad_norm: 0.8798209360857033, iteration: 217418
loss: 1.0065929889678955,grad_norm: 0.901873258213902, iteration: 217419
loss: 0.9753595590591431,grad_norm: 0.999999389796913, iteration: 217420
loss: 1.0113685131072998,grad_norm: 0.8827188892440566, iteration: 217421
loss: 0.9601593017578125,grad_norm: 0.9999992189234868, iteration: 217422
loss: 1.1191606521606445,grad_norm: 0.9999993367044849, iteration: 217423
loss: 1.0106465816497803,grad_norm: 0.9702497032048, iteration: 217424
loss: 1.003840684890747,grad_norm: 0.9842857242978594, iteration: 217425
loss: 0.9555549025535583,grad_norm: 0.9999991366730244, iteration: 217426
loss: 1.0323773622512817,grad_norm: 0.9109330709062771, iteration: 217427
loss: 0.9953365921974182,grad_norm: 0.9999992510037734, iteration: 217428
loss: 1.0875614881515503,grad_norm: 0.924607694981602, iteration: 217429
loss: 1.0317602157592773,grad_norm: 0.994794117787358, iteration: 217430
loss: 0.976262629032135,grad_norm: 0.9654700648075575, iteration: 217431
loss: 0.9973682761192322,grad_norm: 0.8774153820743594, iteration: 217432
loss: 0.9928892254829407,grad_norm: 0.9999992563340998, iteration: 217433
loss: 1.0058828592300415,grad_norm: 0.9138788298493217, iteration: 217434
loss: 0.9782620072364807,grad_norm: 0.940185836987455, iteration: 217435
loss: 1.0182090997695923,grad_norm: 0.9999994405652805, iteration: 217436
loss: 1.0030078887939453,grad_norm: 0.9917314916054909, iteration: 217437
loss: 1.0270106792449951,grad_norm: 0.999999043219608, iteration: 217438
loss: 0.9939159154891968,grad_norm: 0.9169047680830942, iteration: 217439
loss: 1.0213818550109863,grad_norm: 0.9999990755154744, iteration: 217440
loss: 1.044783353805542,grad_norm: 0.9999998872991732, iteration: 217441
loss: 1.0108957290649414,grad_norm: 0.9078096220962467, iteration: 217442
loss: 1.026282787322998,grad_norm: 0.8351103221708032, iteration: 217443
loss: 1.030177116394043,grad_norm: 0.9845167095781908, iteration: 217444
loss: 0.9915257096290588,grad_norm: 0.9143557559524687, iteration: 217445
loss: 0.9761112928390503,grad_norm: 0.9999992916678837, iteration: 217446
loss: 1.1017569303512573,grad_norm: 0.9999997153381787, iteration: 217447
loss: 1.0164976119995117,grad_norm: 0.8346673617531949, iteration: 217448
loss: 0.9727699160575867,grad_norm: 0.8212581459636323, iteration: 217449
loss: 1.003219723701477,grad_norm: 0.8032593810612915, iteration: 217450
loss: 1.4449584484100342,grad_norm: 0.9999993842099116, iteration: 217451
loss: 1.007926106452942,grad_norm: 0.9999994886291402, iteration: 217452
loss: 0.973420262336731,grad_norm: 0.8604615424289257, iteration: 217453
loss: 0.9938907623291016,grad_norm: 0.9999991571470027, iteration: 217454
loss: 0.9996606111526489,grad_norm: 0.8321771720800054, iteration: 217455
loss: 1.0169364213943481,grad_norm: 0.7984958014233662, iteration: 217456
loss: 1.0096714496612549,grad_norm: 0.9329914071202761, iteration: 217457
loss: 1.0129461288452148,grad_norm: 0.946169099781455, iteration: 217458
loss: 1.0062613487243652,grad_norm: 0.97748845705333, iteration: 217459
loss: 1.0145564079284668,grad_norm: 0.8707114157822772, iteration: 217460
loss: 1.0020098686218262,grad_norm: 0.9166084672002146, iteration: 217461
loss: 0.9877490401268005,grad_norm: 0.7902231991717463, iteration: 217462
loss: 0.9901259541511536,grad_norm: 0.744224641921146, iteration: 217463
loss: 1.0139400959014893,grad_norm: 0.9999997712486769, iteration: 217464
loss: 1.0148203372955322,grad_norm: 0.8064451618219094, iteration: 217465
loss: 0.954719066619873,grad_norm: 0.9999992144200427, iteration: 217466
loss: 1.0105605125427246,grad_norm: 0.9999994692476731, iteration: 217467
loss: 1.0152039527893066,grad_norm: 0.9999991037707144, iteration: 217468
loss: 1.0411227941513062,grad_norm: 0.9999993299000911, iteration: 217469
loss: 0.987392783164978,grad_norm: 0.8827688532885177, iteration: 217470
loss: 1.021243691444397,grad_norm: 0.9999989917145622, iteration: 217471
loss: 0.9755295515060425,grad_norm: 0.9232114052283922, iteration: 217472
loss: 1.0120716094970703,grad_norm: 0.873265370956418, iteration: 217473
loss: 1.0279117822647095,grad_norm: 0.9999991565882752, iteration: 217474
loss: 0.9853981733322144,grad_norm: 0.9999990714761764, iteration: 217475
loss: 1.0106571912765503,grad_norm: 0.8701889749330424, iteration: 217476
loss: 0.9589358568191528,grad_norm: 0.8384631913925187, iteration: 217477
loss: 0.9856777787208557,grad_norm: 0.971047326477091, iteration: 217478
loss: 1.0133867263793945,grad_norm: 0.9399528068631576, iteration: 217479
loss: 1.0368671417236328,grad_norm: 0.8944762274590453, iteration: 217480
loss: 1.004541277885437,grad_norm: 0.9303877796419658, iteration: 217481
loss: 0.9974619150161743,grad_norm: 0.9272694613263021, iteration: 217482
loss: 0.964695394039154,grad_norm: 0.9698142916514152, iteration: 217483
loss: 1.00839102268219,grad_norm: 0.9722048480559479, iteration: 217484
loss: 1.0338208675384521,grad_norm: 0.9094139820358346, iteration: 217485
loss: 1.014926552772522,grad_norm: 0.8874539236884158, iteration: 217486
loss: 1.009660243988037,grad_norm: 0.8590376912027486, iteration: 217487
loss: 1.0992257595062256,grad_norm: 0.9999999065749515, iteration: 217488
loss: 1.0593595504760742,grad_norm: 0.9999993556058374, iteration: 217489
loss: 1.0377719402313232,grad_norm: 0.8589953209733556, iteration: 217490
loss: 0.9793031215667725,grad_norm: 0.9999990869290241, iteration: 217491
loss: 1.0091826915740967,grad_norm: 0.999999509050125, iteration: 217492
loss: 0.990027904510498,grad_norm: 0.9291853194271683, iteration: 217493
loss: 0.9670508503913879,grad_norm: 0.8581011874335095, iteration: 217494
loss: 1.033831238746643,grad_norm: 0.8418527011376854, iteration: 217495
loss: 0.998881995677948,grad_norm: 0.9514376226958843, iteration: 217496
loss: 1.0197029113769531,grad_norm: 0.9999992514338398, iteration: 217497
loss: 1.0164672136306763,grad_norm: 0.7847793762421188, iteration: 217498
loss: 1.0236681699752808,grad_norm: 0.7580052488439519, iteration: 217499
loss: 1.018473744392395,grad_norm: 0.8122784623546542, iteration: 217500
loss: 1.007529616355896,grad_norm: 0.7331457076962608, iteration: 217501
loss: 1.0096243619918823,grad_norm: 0.9389658207846531, iteration: 217502
loss: 0.9770155549049377,grad_norm: 0.8452776752029068, iteration: 217503
loss: 1.0023815631866455,grad_norm: 0.9999990024775413, iteration: 217504
loss: 0.9918285012245178,grad_norm: 0.9736087963499913, iteration: 217505
loss: 0.9778713583946228,grad_norm: 0.9897883973125956, iteration: 217506
loss: 1.000484585762024,grad_norm: 0.9999991379535784, iteration: 217507
loss: 1.0091966390609741,grad_norm: 0.8094376357572614, iteration: 217508
loss: 1.0809404850006104,grad_norm: 0.908536232114248, iteration: 217509
loss: 1.0071539878845215,grad_norm: 0.8218826866203052, iteration: 217510
loss: 0.9308393001556396,grad_norm: 0.9332876870095477, iteration: 217511
loss: 0.9844906330108643,grad_norm: 0.9305191029939184, iteration: 217512
loss: 1.0658804178237915,grad_norm: 0.9999990859724356, iteration: 217513
loss: 1.016309142112732,grad_norm: 0.8921121656915092, iteration: 217514
loss: 1.0339847803115845,grad_norm: 0.8832725628083258, iteration: 217515
loss: 0.9892186522483826,grad_norm: 0.9137337042765755, iteration: 217516
loss: 0.9925978183746338,grad_norm: 0.9999990687251394, iteration: 217517
loss: 0.9945852160453796,grad_norm: 0.674296591094413, iteration: 217518
loss: 0.9447312951087952,grad_norm: 0.9999992738343119, iteration: 217519
loss: 0.9380151629447937,grad_norm: 0.9999989540215732, iteration: 217520
loss: 1.041316270828247,grad_norm: 0.9999999507233548, iteration: 217521
loss: 1.00899338722229,grad_norm: 0.7888576887114834, iteration: 217522
loss: 1.0113667249679565,grad_norm: 0.8477296438088269, iteration: 217523
loss: 1.017409324645996,grad_norm: 0.9410934643218178, iteration: 217524
loss: 1.0067168474197388,grad_norm: 0.9999990319924139, iteration: 217525
loss: 0.9921973347663879,grad_norm: 0.925830089530883, iteration: 217526
loss: 0.9860021471977234,grad_norm: 0.9999991008903103, iteration: 217527
loss: 1.0007520914077759,grad_norm: 0.9553861552339143, iteration: 217528
loss: 0.9917070865631104,grad_norm: 0.8028224328559156, iteration: 217529
loss: 1.008995771408081,grad_norm: 0.9999992590997901, iteration: 217530
loss: 0.9684774875640869,grad_norm: 0.9688100488742791, iteration: 217531
loss: 0.9843172430992126,grad_norm: 0.9483351796465909, iteration: 217532
loss: 1.006065845489502,grad_norm: 0.9834684004004395, iteration: 217533
loss: 1.0510315895080566,grad_norm: 0.999999067295373, iteration: 217534
loss: 1.0175095796585083,grad_norm: 0.9095945348639759, iteration: 217535
loss: 1.090537190437317,grad_norm: 0.9999990185566066, iteration: 217536
loss: 1.0460565090179443,grad_norm: 0.9583615544629186, iteration: 217537
loss: 1.0189038515090942,grad_norm: 0.9999996249001338, iteration: 217538
loss: 0.9811606407165527,grad_norm: 0.9999998277772727, iteration: 217539
loss: 1.0176726579666138,grad_norm: 0.9999989849204773, iteration: 217540
loss: 1.003806471824646,grad_norm: 0.8940865580650403, iteration: 217541
loss: 1.017379879951477,grad_norm: 0.8643176533005301, iteration: 217542
loss: 0.971672534942627,grad_norm: 0.9603514021303979, iteration: 217543
loss: 0.9976760745048523,grad_norm: 0.88923652683924, iteration: 217544
loss: 0.9969931840896606,grad_norm: 0.967628377462382, iteration: 217545
loss: 1.0254544019699097,grad_norm: 0.9999991125301255, iteration: 217546
loss: 1.0216619968414307,grad_norm: 0.9605948276024806, iteration: 217547
loss: 1.021896481513977,grad_norm: 0.829653987859611, iteration: 217548
loss: 0.9981801509857178,grad_norm: 0.9517851740430479, iteration: 217549
loss: 0.9635494351387024,grad_norm: 0.8651634988650406, iteration: 217550
loss: 1.038057804107666,grad_norm: 0.9999997007216443, iteration: 217551
loss: 1.00320303440094,grad_norm: 0.8704448342955193, iteration: 217552
loss: 1.0052212476730347,grad_norm: 0.9999991417976312, iteration: 217553
loss: 0.9987685680389404,grad_norm: 0.998211668277917, iteration: 217554
loss: 0.9749677777290344,grad_norm: 0.9999991902244896, iteration: 217555
loss: 0.9837179780006409,grad_norm: 0.9999995460601703, iteration: 217556
loss: 1.0322152376174927,grad_norm: 0.9999991355458606, iteration: 217557
loss: 0.9970450401306152,grad_norm: 0.9999991255766929, iteration: 217558
loss: 1.055328607559204,grad_norm: 0.9869415662249471, iteration: 217559
loss: 0.9746859073638916,grad_norm: 0.9593546735193839, iteration: 217560
loss: 1.0360283851623535,grad_norm: 0.999999426689277, iteration: 217561
loss: 0.9628480672836304,grad_norm: 0.9241251340921337, iteration: 217562
loss: 0.9963444471359253,grad_norm: 0.7227908789379098, iteration: 217563
loss: 1.0449395179748535,grad_norm: 0.9684005096959859, iteration: 217564
loss: 0.9796323776245117,grad_norm: 0.9999991299326549, iteration: 217565
loss: 0.9767827987670898,grad_norm: 0.926128595408115, iteration: 217566
loss: 0.9713118076324463,grad_norm: 0.9472090649486928, iteration: 217567
loss: 0.976308286190033,grad_norm: 0.9373256397105488, iteration: 217568
loss: 0.9969118237495422,grad_norm: 0.9999990173116063, iteration: 217569
loss: 0.9799009561538696,grad_norm: 0.999999213339702, iteration: 217570
loss: 1.0134844779968262,grad_norm: 0.9999992697934171, iteration: 217571
loss: 1.0078569650650024,grad_norm: 0.848005656399978, iteration: 217572
loss: 1.0281909704208374,grad_norm: 0.9999993658728239, iteration: 217573
loss: 0.9966474771499634,grad_norm: 0.9999990595053936, iteration: 217574
loss: 1.0298129320144653,grad_norm: 0.974725811789907, iteration: 217575
loss: 1.0061417818069458,grad_norm: 0.9813800399929475, iteration: 217576
loss: 1.0006543397903442,grad_norm: 0.9806132386648156, iteration: 217577
loss: 1.049871563911438,grad_norm: 0.9999994538034537, iteration: 217578
loss: 0.9778110384941101,grad_norm: 0.9746771578749439, iteration: 217579
loss: 1.0919491052627563,grad_norm: 0.9999998214206749, iteration: 217580
loss: 1.0960004329681396,grad_norm: 0.9999996128132131, iteration: 217581
loss: 0.9874983429908752,grad_norm: 0.9999991274824248, iteration: 217582
loss: 1.0484673976898193,grad_norm: 0.9234867341337064, iteration: 217583
loss: 0.9784996509552002,grad_norm: 0.9943971533738719, iteration: 217584
loss: 1.0360467433929443,grad_norm: 0.9999990058202114, iteration: 217585
loss: 1.0268117189407349,grad_norm: 0.9999990551664177, iteration: 217586
loss: 0.9880999326705933,grad_norm: 0.9090793381047859, iteration: 217587
loss: 0.983140766620636,grad_norm: 0.9354141559618707, iteration: 217588
loss: 0.9494365453720093,grad_norm: 0.9381502801629367, iteration: 217589
loss: 0.9735225439071655,grad_norm: 0.9999992351853717, iteration: 217590
loss: 0.9795076251029968,grad_norm: 0.8517997831338573, iteration: 217591
loss: 1.0297186374664307,grad_norm: 0.9999993119576268, iteration: 217592
loss: 0.9857920408248901,grad_norm: 0.9013315526401582, iteration: 217593
loss: 1.0553102493286133,grad_norm: 0.9999992819931358, iteration: 217594
loss: 1.0242482423782349,grad_norm: 0.7146409526191889, iteration: 217595
loss: 1.021230936050415,grad_norm: 0.9999998575178227, iteration: 217596
loss: 1.0648022890090942,grad_norm: 0.9999993124249663, iteration: 217597
loss: 1.0375878810882568,grad_norm: 0.9668492623428995, iteration: 217598
loss: 0.9947566390037537,grad_norm: 0.9999992453193532, iteration: 217599
loss: 0.9978402256965637,grad_norm: 0.8666273333867214, iteration: 217600
loss: 1.0122190713882446,grad_norm: 0.901450805336872, iteration: 217601
loss: 1.0020403861999512,grad_norm: 0.9625339500954653, iteration: 217602
loss: 1.0032838582992554,grad_norm: 0.9371024248551354, iteration: 217603
loss: 1.0239890813827515,grad_norm: 0.9999993817044432, iteration: 217604
loss: 1.0415605306625366,grad_norm: 0.9999996099263274, iteration: 217605
loss: 0.970206618309021,grad_norm: 0.7683851897378401, iteration: 217606
loss: 1.036335825920105,grad_norm: 0.9999996909619555, iteration: 217607
loss: 1.003914713859558,grad_norm: 0.826117536638732, iteration: 217608
loss: 1.0316275358200073,grad_norm: 0.8909819994106866, iteration: 217609
loss: 1.0198984146118164,grad_norm: 0.8884069837771851, iteration: 217610
loss: 1.0117677450180054,grad_norm: 0.8652203210361596, iteration: 217611
loss: 0.9950710535049438,grad_norm: 0.9094995526636507, iteration: 217612
loss: 0.9981964826583862,grad_norm: 0.9999992950697738, iteration: 217613
loss: 1.0205926895141602,grad_norm: 0.8425472083505487, iteration: 217614
loss: 0.980059027671814,grad_norm: 0.9845564830619239, iteration: 217615
loss: 1.0068764686584473,grad_norm: 0.9040431605034254, iteration: 217616
loss: 1.0121983289718628,grad_norm: 0.9999990736268313, iteration: 217617
loss: 0.9869949221611023,grad_norm: 0.8544193823744443, iteration: 217618
loss: 0.9903489947319031,grad_norm: 0.9408912658198618, iteration: 217619
loss: 1.013963222503662,grad_norm: 0.9523563479065753, iteration: 217620
loss: 1.0266703367233276,grad_norm: 0.8847615989667705, iteration: 217621
loss: 1.001625657081604,grad_norm: 0.9739427390346125, iteration: 217622
loss: 0.9842420220375061,grad_norm: 0.9999989568607863, iteration: 217623
loss: 1.0085210800170898,grad_norm: 0.999999123317506, iteration: 217624
loss: 0.986271321773529,grad_norm: 0.8830583415493737, iteration: 217625
loss: 1.022554636001587,grad_norm: 0.9587993672552839, iteration: 217626
loss: 0.9957312941551208,grad_norm: 0.9999992600879312, iteration: 217627
loss: 1.00312077999115,grad_norm: 0.8312162727408838, iteration: 217628
loss: 1.0189329385757446,grad_norm: 0.9746164919032054, iteration: 217629
loss: 1.002422571182251,grad_norm: 0.7835332459927966, iteration: 217630
loss: 1.09946608543396,grad_norm: 0.9999991598323625, iteration: 217631
loss: 1.006581425666809,grad_norm: 0.9999992098289497, iteration: 217632
loss: 1.000524640083313,grad_norm: 0.8682387877674191, iteration: 217633
loss: 1.020015835762024,grad_norm: 0.9232493175078278, iteration: 217634
loss: 1.0026860237121582,grad_norm: 0.937114637049232, iteration: 217635
loss: 1.0357635021209717,grad_norm: 0.9999991345344702, iteration: 217636
loss: 1.0056768655776978,grad_norm: 0.8777873262862261, iteration: 217637
loss: 0.9777225852012634,grad_norm: 0.7492573876150429, iteration: 217638
loss: 1.0335428714752197,grad_norm: 0.8899785126863364, iteration: 217639
loss: 0.9884272217750549,grad_norm: 0.8747192247086025, iteration: 217640
loss: 0.9525213241577148,grad_norm: 0.9999990378620531, iteration: 217641
loss: 0.9946596026420593,grad_norm: 0.9593514693598356, iteration: 217642
loss: 1.0161221027374268,grad_norm: 0.9999991236790874, iteration: 217643
loss: 1.0031508207321167,grad_norm: 0.7640972430146696, iteration: 217644
loss: 0.9847666621208191,grad_norm: 0.8830673250159085, iteration: 217645
loss: 0.9950639605522156,grad_norm: 0.8221849636903771, iteration: 217646
loss: 1.0011340379714966,grad_norm: 0.8437813093940123, iteration: 217647
loss: 0.9887335896492004,grad_norm: 0.8174947609085971, iteration: 217648
loss: 1.0215778350830078,grad_norm: 0.9093964417126603, iteration: 217649
loss: 1.0488156080245972,grad_norm: 0.9999997165554076, iteration: 217650
loss: 1.032048225402832,grad_norm: 0.9519309619268084, iteration: 217651
loss: 0.9885407090187073,grad_norm: 0.7729053627136139, iteration: 217652
loss: 1.114954948425293,grad_norm: 0.9999999089432583, iteration: 217653
loss: 0.9699755907058716,grad_norm: 0.9575085460058302, iteration: 217654
loss: 1.0022799968719482,grad_norm: 0.8832481428480534, iteration: 217655
loss: 1.0381981134414673,grad_norm: 0.9929734701811307, iteration: 217656
loss: 0.9573412537574768,grad_norm: 0.9301588152451343, iteration: 217657
loss: 1.0077576637268066,grad_norm: 0.9999990590303826, iteration: 217658
loss: 0.9870880246162415,grad_norm: 0.8614371537466472, iteration: 217659
loss: 0.9812637567520142,grad_norm: 0.9616025799108511, iteration: 217660
loss: 1.0232768058776855,grad_norm: 0.866820285316815, iteration: 217661
loss: 0.9554888010025024,grad_norm: 0.8540140843447787, iteration: 217662
loss: 0.9962948560714722,grad_norm: 0.8601580297757758, iteration: 217663
loss: 1.00583016872406,grad_norm: 0.9999991632563825, iteration: 217664
loss: 1.001716136932373,grad_norm: 0.8983291144113161, iteration: 217665
loss: 1.0051981210708618,grad_norm: 0.9999990374987493, iteration: 217666
loss: 0.9785927534103394,grad_norm: 0.9184780296557493, iteration: 217667
loss: 1.000929832458496,grad_norm: 0.9999991008842585, iteration: 217668
loss: 1.0112541913986206,grad_norm: 0.969973720414806, iteration: 217669
loss: 1.0020986795425415,grad_norm: 0.999999020038977, iteration: 217670
loss: 1.02162766456604,grad_norm: 0.9999992646983313, iteration: 217671
loss: 0.9845101833343506,grad_norm: 0.9999992386568093, iteration: 217672
loss: 0.9917265772819519,grad_norm: 0.978795615994666, iteration: 217673
loss: 1.019877314567566,grad_norm: 0.9999994898852415, iteration: 217674
loss: 0.9930392503738403,grad_norm: 0.8033942338902609, iteration: 217675
loss: 0.9733332395553589,grad_norm: 0.9520028073759582, iteration: 217676
loss: 1.0299872159957886,grad_norm: 0.9365824820782094, iteration: 217677
loss: 1.0029207468032837,grad_norm: 0.9999994965396486, iteration: 217678
loss: 1.0014129877090454,grad_norm: 0.913270369825804, iteration: 217679
loss: 0.9823293685913086,grad_norm: 0.9763359047990953, iteration: 217680
loss: 1.0153361558914185,grad_norm: 0.8170055657735298, iteration: 217681
loss: 0.9948800206184387,grad_norm: 0.9999991677703409, iteration: 217682
loss: 0.9706855416297913,grad_norm: 0.9204320215664619, iteration: 217683
loss: 0.959919810295105,grad_norm: 0.951409184345274, iteration: 217684
loss: 1.0369784832000732,grad_norm: 0.9999991891056464, iteration: 217685
loss: 0.9840265512466431,grad_norm: 0.9679543254772713, iteration: 217686
loss: 1.0648571252822876,grad_norm: 0.9999997554155431, iteration: 217687
loss: 0.9915801882743835,grad_norm: 0.8290215438831993, iteration: 217688
loss: 0.9930056929588318,grad_norm: 0.8421101296112645, iteration: 217689
loss: 0.9937180876731873,grad_norm: 0.974159518496972, iteration: 217690
loss: 1.0145901441574097,grad_norm: 0.8304808367233502, iteration: 217691
loss: 0.9923573136329651,grad_norm: 0.8046061260243961, iteration: 217692
loss: 0.9945378303527832,grad_norm: 0.8989718949661578, iteration: 217693
loss: 1.0580618381500244,grad_norm: 0.9999991745145995, iteration: 217694
loss: 0.9690170884132385,grad_norm: 0.8666011709035757, iteration: 217695
loss: 0.9841936230659485,grad_norm: 0.9611700203085389, iteration: 217696
loss: 1.014626145362854,grad_norm: 0.9631603878421279, iteration: 217697
loss: 1.0247167348861694,grad_norm: 0.9321179341044503, iteration: 217698
loss: 0.9840089678764343,grad_norm: 0.9999991710159188, iteration: 217699
loss: 0.9962299466133118,grad_norm: 0.7715274211175134, iteration: 217700
loss: 0.9926435947418213,grad_norm: 0.906983525820962, iteration: 217701
loss: 1.0811272859573364,grad_norm: 0.999999223359962, iteration: 217702
loss: 1.005541443824768,grad_norm: 0.999999183859027, iteration: 217703
loss: 1.0039070844650269,grad_norm: 0.993252131142046, iteration: 217704
loss: 1.013676643371582,grad_norm: 0.9999992759940253, iteration: 217705
loss: 0.9891475439071655,grad_norm: 0.8460476062025359, iteration: 217706
loss: 0.9778764247894287,grad_norm: 0.8792638515856173, iteration: 217707
loss: 1.021342396736145,grad_norm: 0.9999990948832044, iteration: 217708
loss: 1.018442153930664,grad_norm: 0.9999989980779364, iteration: 217709
loss: 1.0711617469787598,grad_norm: 0.9633506931264042, iteration: 217710
loss: 0.9863735437393188,grad_norm: 0.9985846126603347, iteration: 217711
loss: 0.9646387100219727,grad_norm: 0.9999991630319953, iteration: 217712
loss: 0.9956516027450562,grad_norm: 0.8827545152308914, iteration: 217713
loss: 1.1058428287506104,grad_norm: 0.9999996917248322, iteration: 217714
loss: 1.0531572103500366,grad_norm: 0.9647901876280072, iteration: 217715
loss: 1.02293062210083,grad_norm: 0.9999998801015518, iteration: 217716
loss: 1.0160772800445557,grad_norm: 0.9639138508119646, iteration: 217717
loss: 1.0062311887741089,grad_norm: 0.9349879321669422, iteration: 217718
loss: 0.9810547828674316,grad_norm: 0.8054896029935306, iteration: 217719
loss: 0.991970419883728,grad_norm: 0.9999992691416036, iteration: 217720
loss: 0.9795621037483215,grad_norm: 0.9999990856106787, iteration: 217721
loss: 0.985002338886261,grad_norm: 0.9277623848212486, iteration: 217722
loss: 1.0169490575790405,grad_norm: 0.9999990539968575, iteration: 217723
loss: 1.052248477935791,grad_norm: 0.9999990824065575, iteration: 217724
loss: 1.016717553138733,grad_norm: 0.9289244847797964, iteration: 217725
loss: 0.999505341053009,grad_norm: 0.9601905135033623, iteration: 217726
loss: 0.9936494827270508,grad_norm: 0.8456028501537083, iteration: 217727
loss: 0.9736001491546631,grad_norm: 0.9999990856910596, iteration: 217728
loss: 0.9933288097381592,grad_norm: 0.8292756310974798, iteration: 217729
loss: 0.9953359365463257,grad_norm: 0.9999991128853343, iteration: 217730
loss: 0.9869539141654968,grad_norm: 0.7962354565000102, iteration: 217731
loss: 1.0640792846679688,grad_norm: 0.9216070194194084, iteration: 217732
loss: 0.9878128170967102,grad_norm: 0.7847575417403868, iteration: 217733
loss: 0.9892591238021851,grad_norm: 0.9999991470599057, iteration: 217734
loss: 1.0282632112503052,grad_norm: 0.8376370405539111, iteration: 217735
loss: 0.9989209175109863,grad_norm: 0.9999990430868939, iteration: 217736
loss: 1.0169697999954224,grad_norm: 0.8922787499957597, iteration: 217737
loss: 0.9851571917533875,grad_norm: 0.9763548078908645, iteration: 217738
loss: 0.9887866973876953,grad_norm: 0.9964530687507956, iteration: 217739
loss: 1.0200704336166382,grad_norm: 0.9445597590459723, iteration: 217740
loss: 1.0394549369812012,grad_norm: 0.9657612766577247, iteration: 217741
loss: 0.9683079123497009,grad_norm: 0.8554535656146901, iteration: 217742
loss: 0.9981609582901001,grad_norm: 0.9999991594987377, iteration: 217743
loss: 1.0010466575622559,grad_norm: 0.9999991121475806, iteration: 217744
loss: 1.0188368558883667,grad_norm: 0.8242343472906808, iteration: 217745
loss: 1.000404953956604,grad_norm: 0.9867998665860012, iteration: 217746
loss: 0.9813245534896851,grad_norm: 0.8452581359826448, iteration: 217747
loss: 0.970297634601593,grad_norm: 0.9526281585940489, iteration: 217748
loss: 0.9991863369941711,grad_norm: 0.8495501437394913, iteration: 217749
loss: 1.0067182779312134,grad_norm: 0.9999990261649667, iteration: 217750
loss: 1.027576208114624,grad_norm: 0.9094543915861623, iteration: 217751
loss: 1.0054692029953003,grad_norm: 0.9850855361096631, iteration: 217752
loss: 0.9747126698493958,grad_norm: 0.9999990524963447, iteration: 217753
loss: 0.9692354798316956,grad_norm: 0.8259398617102723, iteration: 217754
loss: 1.007781982421875,grad_norm: 0.9999991899253614, iteration: 217755
loss: 1.0080856084823608,grad_norm: 0.999998982669901, iteration: 217756
loss: 1.019176721572876,grad_norm: 0.9023944041698602, iteration: 217757
loss: 1.0425255298614502,grad_norm: 0.899372439360688, iteration: 217758
loss: 1.0237791538238525,grad_norm: 0.8132211565532459, iteration: 217759
loss: 0.9737734794616699,grad_norm: 0.9458466060106668, iteration: 217760
loss: 0.9551308751106262,grad_norm: 0.8684736609147953, iteration: 217761
loss: 1.0350565910339355,grad_norm: 0.8910283980105033, iteration: 217762
loss: 1.0004570484161377,grad_norm: 0.8153368330254154, iteration: 217763
loss: 0.9985717535018921,grad_norm: 0.7616096284668123, iteration: 217764
loss: 1.005006194114685,grad_norm: 0.999999171924962, iteration: 217765
loss: 1.0168770551681519,grad_norm: 0.999999636626604, iteration: 217766
loss: 0.9907302260398865,grad_norm: 0.7908936034017595, iteration: 217767
loss: 1.0350828170776367,grad_norm: 0.9793639412908742, iteration: 217768
loss: 0.9987838268280029,grad_norm: 0.7497609863302108, iteration: 217769
loss: 1.0801302194595337,grad_norm: 0.9999998746709963, iteration: 217770
loss: 0.9821817278862,grad_norm: 0.9814150755329861, iteration: 217771
loss: 1.229233980178833,grad_norm: 0.9999994577802566, iteration: 217772
loss: 1.0316858291625977,grad_norm: 0.9999995374260643, iteration: 217773
loss: 1.0218390226364136,grad_norm: 0.9999995762717627, iteration: 217774
loss: 0.9861047267913818,grad_norm: 0.8951812623246372, iteration: 217775
loss: 1.0094045400619507,grad_norm: 0.9999990749817914, iteration: 217776
loss: 0.9564857482910156,grad_norm: 0.9941337170998815, iteration: 217777
loss: 1.0440102815628052,grad_norm: 0.9999991427947824, iteration: 217778
loss: 1.0239636898040771,grad_norm: 0.9999991300900611, iteration: 217779
loss: 1.0207735300064087,grad_norm: 0.9583231628394054, iteration: 217780
loss: 0.9905564785003662,grad_norm: 0.9302864257413198, iteration: 217781
loss: 1.042372703552246,grad_norm: 0.9999995371602335, iteration: 217782
loss: 0.9756103754043579,grad_norm: 0.9999990666992216, iteration: 217783
loss: 1.0898184776306152,grad_norm: 0.9999993767045026, iteration: 217784
loss: 0.9554145932197571,grad_norm: 0.8127354005539552, iteration: 217785
loss: 1.1344681978225708,grad_norm: 0.999999307701062, iteration: 217786
loss: 1.0161616802215576,grad_norm: 0.9444095570002435, iteration: 217787
loss: 1.0122991800308228,grad_norm: 0.8954010058125071, iteration: 217788
loss: 1.0190523862838745,grad_norm: 0.9999997774748363, iteration: 217789
loss: 0.9974009990692139,grad_norm: 0.9999992779053831, iteration: 217790
loss: 1.02474045753479,grad_norm: 0.9999991517388617, iteration: 217791
loss: 0.9844546318054199,grad_norm: 0.7968572051581922, iteration: 217792
loss: 0.9349609613418579,grad_norm: 0.9592333510537246, iteration: 217793
loss: 1.0360559225082397,grad_norm: 0.9507844343148989, iteration: 217794
loss: 1.008665919303894,grad_norm: 0.9999998692537652, iteration: 217795
loss: 1.0127702951431274,grad_norm: 0.9862853607161527, iteration: 217796
loss: 1.0523160696029663,grad_norm: 0.9627014210844315, iteration: 217797
loss: 1.0055516958236694,grad_norm: 0.9999993941215867, iteration: 217798
loss: 1.0038955211639404,grad_norm: 0.9509764822206651, iteration: 217799
loss: 0.9820465445518494,grad_norm: 0.9744474037255504, iteration: 217800
loss: 0.9620069265365601,grad_norm: 0.9398842943802189, iteration: 217801
loss: 0.9826360940933228,grad_norm: 0.956993859702847, iteration: 217802
loss: 0.9815579652786255,grad_norm: 0.8599434301028651, iteration: 217803
loss: 1.0389763116836548,grad_norm: 0.991472483676864, iteration: 217804
loss: 1.0210843086242676,grad_norm: 0.9806060102102074, iteration: 217805
loss: 1.038406491279602,grad_norm: 0.9999990870018923, iteration: 217806
loss: 1.025481104850769,grad_norm: 0.8703009927211101, iteration: 217807
loss: 0.9931755661964417,grad_norm: 0.9999990370426893, iteration: 217808
loss: 1.0400713682174683,grad_norm: 0.9460122056825414, iteration: 217809
loss: 0.9962695240974426,grad_norm: 0.8883528176868889, iteration: 217810
loss: 1.112115740776062,grad_norm: 0.9999998492042462, iteration: 217811
loss: 0.985665500164032,grad_norm: 0.7863738403346484, iteration: 217812
loss: 1.054091215133667,grad_norm: 0.9999996921870075, iteration: 217813
loss: 0.9736339449882507,grad_norm: 0.9999991378394832, iteration: 217814
loss: 1.0326913595199585,grad_norm: 0.9323449682994671, iteration: 217815
loss: 0.9954008460044861,grad_norm: 0.831611600810025, iteration: 217816
loss: 0.9713828563690186,grad_norm: 0.9211532367929564, iteration: 217817
loss: 0.9665403962135315,grad_norm: 0.9281791397745738, iteration: 217818
loss: 1.007959246635437,grad_norm: 0.7666592513259111, iteration: 217819
loss: 0.9953514337539673,grad_norm: 0.8365558783580922, iteration: 217820
loss: 1.0156172513961792,grad_norm: 0.6876035428448894, iteration: 217821
loss: 1.0152034759521484,grad_norm: 0.9999992057340713, iteration: 217822
loss: 0.9786197543144226,grad_norm: 0.9702256893261463, iteration: 217823
loss: 1.0033345222473145,grad_norm: 0.9724537345801441, iteration: 217824
loss: 1.0078208446502686,grad_norm: 0.9497938685880702, iteration: 217825
loss: 1.0033848285675049,grad_norm: 0.9407672492798175, iteration: 217826
loss: 0.9554065465927124,grad_norm: 0.9999990601563409, iteration: 217827
loss: 0.984656810760498,grad_norm: 0.9849788037578093, iteration: 217828
loss: 0.9778785705566406,grad_norm: 0.999998952802996, iteration: 217829
loss: 0.993678629398346,grad_norm: 0.9375378567914422, iteration: 217830
loss: 0.9582178592681885,grad_norm: 0.9999989226426989, iteration: 217831
loss: 0.9946597218513489,grad_norm: 0.9999989991537475, iteration: 217832
loss: 1.019446849822998,grad_norm: 0.9189747158485353, iteration: 217833
loss: 1.0101162195205688,grad_norm: 0.9999990980213209, iteration: 217834
loss: 1.0247098207473755,grad_norm: 0.9999997017964968, iteration: 217835
loss: 1.0196129083633423,grad_norm: 0.9481071392241471, iteration: 217836
loss: 1.0036685466766357,grad_norm: 0.878155815244289, iteration: 217837
loss: 0.9937654137611389,grad_norm: 0.8092482403369561, iteration: 217838
loss: 1.0275399684906006,grad_norm: 0.9810425112596523, iteration: 217839
loss: 1.0060049295425415,grad_norm: 0.7701587304183742, iteration: 217840
loss: 1.0231189727783203,grad_norm: 0.8669690986792337, iteration: 217841
loss: 0.9641491174697876,grad_norm: 0.999999213302138, iteration: 217842
loss: 1.0394506454467773,grad_norm: 0.9591025736821485, iteration: 217843
loss: 1.0318158864974976,grad_norm: 0.9999995717744466, iteration: 217844
loss: 0.9756996035575867,grad_norm: 0.8341088390977044, iteration: 217845
loss: 0.9898471236228943,grad_norm: 0.9787365096673138, iteration: 217846
loss: 1.0209347009658813,grad_norm: 0.9388604045384902, iteration: 217847
loss: 0.973821759223938,grad_norm: 0.9892876521365112, iteration: 217848
loss: 0.9702423810958862,grad_norm: 0.9495460295836169, iteration: 217849
loss: 1.0028728246688843,grad_norm: 0.9999989681018332, iteration: 217850
loss: 0.9840636849403381,grad_norm: 0.9042564426887127, iteration: 217851
loss: 1.0360901355743408,grad_norm: 0.9763504071824775, iteration: 217852
loss: 0.981380045413971,grad_norm: 0.9617754065234235, iteration: 217853
loss: 0.9567955136299133,grad_norm: 0.9999992844264197, iteration: 217854
loss: 0.9944312572479248,grad_norm: 0.9021863502825104, iteration: 217855
loss: 0.9971311688423157,grad_norm: 0.9999990267769064, iteration: 217856
loss: 0.9856362342834473,grad_norm: 0.9999991340825737, iteration: 217857
loss: 1.0258020162582397,grad_norm: 0.8870686654952235, iteration: 217858
loss: 1.0470731258392334,grad_norm: 0.9999992360824039, iteration: 217859
loss: 0.9753948450088501,grad_norm: 0.9999990421870119, iteration: 217860
loss: 0.9685283303260803,grad_norm: 0.9384819257449286, iteration: 217861
loss: 0.9973412752151489,grad_norm: 0.7976639629954793, iteration: 217862
loss: 1.067597508430481,grad_norm: 0.9999991303991927, iteration: 217863
loss: 0.9620525240898132,grad_norm: 0.8813715222193462, iteration: 217864
loss: 0.998782217502594,grad_norm: 0.8920620126286152, iteration: 217865
loss: 1.0198031663894653,grad_norm: 0.985236850995323, iteration: 217866
loss: 1.0060582160949707,grad_norm: 0.9404323138426058, iteration: 217867
loss: 0.9970928430557251,grad_norm: 0.9252973106206417, iteration: 217868
loss: 1.0095345973968506,grad_norm: 0.9999991876825646, iteration: 217869
loss: 1.0020806789398193,grad_norm: 0.9822298852141365, iteration: 217870
loss: 1.0047924518585205,grad_norm: 0.9322751669266885, iteration: 217871
loss: 1.0045949220657349,grad_norm: 0.9272164612758613, iteration: 217872
loss: 0.9765465259552002,grad_norm: 0.8800454137244178, iteration: 217873
loss: 0.979603111743927,grad_norm: 0.9120744837035323, iteration: 217874
loss: 0.9821569323539734,grad_norm: 0.9354826654833636, iteration: 217875
loss: 0.9887551665306091,grad_norm: 0.9109723765437682, iteration: 217876
loss: 0.9432904720306396,grad_norm: 0.8766371753839053, iteration: 217877
loss: 1.0098110437393188,grad_norm: 0.9999989462482566, iteration: 217878
loss: 0.9829020500183105,grad_norm: 0.8414279009516202, iteration: 217879
loss: 0.9721623063087463,grad_norm: 0.8632858865754232, iteration: 217880
loss: 1.0662672519683838,grad_norm: 0.999999045823993, iteration: 217881
loss: 1.0766243934631348,grad_norm: 0.9999991043047445, iteration: 217882
loss: 1.0198097229003906,grad_norm: 0.8423160918882531, iteration: 217883
loss: 1.0011086463928223,grad_norm: 0.8742509444259537, iteration: 217884
loss: 0.9968833327293396,grad_norm: 0.7622061977678865, iteration: 217885
loss: 0.9449676275253296,grad_norm: 0.9991401198717722, iteration: 217886
loss: 1.0470201969146729,grad_norm: 0.9999991079298272, iteration: 217887
loss: 0.9729261994361877,grad_norm: 0.8027910985909357, iteration: 217888
loss: 0.9780639410018921,grad_norm: 0.8708443055260904, iteration: 217889
loss: 0.9643072485923767,grad_norm: 0.7907779262991222, iteration: 217890
loss: 0.9946815371513367,grad_norm: 0.7843041852538076, iteration: 217891
loss: 1.00007164478302,grad_norm: 0.8907634988043899, iteration: 217892
loss: 1.01689875125885,grad_norm: 0.9999991675034132, iteration: 217893
loss: 0.9777957797050476,grad_norm: 0.9662077812586106, iteration: 217894
loss: 1.021618127822876,grad_norm: 0.9999992050311609, iteration: 217895
loss: 1.0126392841339111,grad_norm: 0.9999989704544195, iteration: 217896
loss: 0.9993810057640076,grad_norm: 0.9999990073489413, iteration: 217897
loss: 1.0283371210098267,grad_norm: 0.9999991905854609, iteration: 217898
loss: 0.9865251183509827,grad_norm: 0.8436365028754886, iteration: 217899
loss: 1.0097163915634155,grad_norm: 0.8848041609072345, iteration: 217900
loss: 0.995489239692688,grad_norm: 0.9999991165542856, iteration: 217901
loss: 0.9643520712852478,grad_norm: 0.9393401603930993, iteration: 217902
loss: 1.0173379182815552,grad_norm: 0.9999999980097615, iteration: 217903
loss: 1.09212327003479,grad_norm: 0.9999996393327718, iteration: 217904
loss: 0.9910686612129211,grad_norm: 0.8572846606507987, iteration: 217905
loss: 1.0704481601715088,grad_norm: 0.99999992979807, iteration: 217906
loss: 1.0314112901687622,grad_norm: 0.9999991280472429, iteration: 217907
loss: 1.0161266326904297,grad_norm: 0.8104997982710739, iteration: 217908
loss: 1.0685948133468628,grad_norm: 0.9999990653935374, iteration: 217909
loss: 0.9821300506591797,grad_norm: 0.804203457574933, iteration: 217910
loss: 1.0041321516036987,grad_norm: 0.8042418822911862, iteration: 217911
loss: 0.9683552980422974,grad_norm: 0.9975194752002584, iteration: 217912
loss: 1.0151692628860474,grad_norm: 0.99999917798679, iteration: 217913
loss: 0.9835750460624695,grad_norm: 0.8177779601895422, iteration: 217914
loss: 1.0747722387313843,grad_norm: 0.9999999990237676, iteration: 217915
loss: 1.0100736618041992,grad_norm: 0.9167615834920207, iteration: 217916
loss: 1.0175243616104126,grad_norm: 0.9999990782185311, iteration: 217917
loss: 1.0100901126861572,grad_norm: 0.8458435695714589, iteration: 217918
loss: 0.9712042808532715,grad_norm: 0.9999990802651849, iteration: 217919
loss: 1.0235018730163574,grad_norm: 0.9999996799601774, iteration: 217920
loss: 1.019431710243225,grad_norm: 0.9989175450042238, iteration: 217921
loss: 1.0408066511154175,grad_norm: 0.9999991371977117, iteration: 217922
loss: 1.06883704662323,grad_norm: 0.9999995852691983, iteration: 217923
loss: 0.9981315732002258,grad_norm: 0.9086878651539658, iteration: 217924
loss: 1.0078564882278442,grad_norm: 0.999999016563666, iteration: 217925
loss: 1.036595344543457,grad_norm: 0.9999991616055686, iteration: 217926
loss: 1.0907975435256958,grad_norm: 0.9999999740411283, iteration: 217927
loss: 1.1856505870819092,grad_norm: 0.9999995189448283, iteration: 217928
loss: 1.0682545900344849,grad_norm: 0.999999505750098, iteration: 217929
loss: 1.0061581134796143,grad_norm: 0.9300442737104764, iteration: 217930
loss: 1.0558096170425415,grad_norm: 0.9999993028135262, iteration: 217931
loss: 0.9872320890426636,grad_norm: 0.8049437240524453, iteration: 217932
loss: 1.0253331661224365,grad_norm: 0.9783526500998642, iteration: 217933
loss: 0.9806559085845947,grad_norm: 0.8896285791881761, iteration: 217934
loss: 0.9991576671600342,grad_norm: 0.8713048766240299, iteration: 217935
loss: 1.0226975679397583,grad_norm: 0.8475564595633871, iteration: 217936
loss: 1.0532631874084473,grad_norm: 0.9448540183708459, iteration: 217937
loss: 0.9802485108375549,grad_norm: 0.9999990778313188, iteration: 217938
loss: 0.9410958886146545,grad_norm: 0.9999990320572277, iteration: 217939
loss: 1.003057837486267,grad_norm: 0.9999991507613569, iteration: 217940
loss: 1.014155626296997,grad_norm: 0.7960615278749715, iteration: 217941
loss: 1.0156434774398804,grad_norm: 0.9745322610339338, iteration: 217942
loss: 0.9962999820709229,grad_norm: 0.8615684146162004, iteration: 217943
loss: 1.020415186882019,grad_norm: 0.9355852955853006, iteration: 217944
loss: 1.0698045492172241,grad_norm: 0.9999998252066666, iteration: 217945
loss: 0.9656392931938171,grad_norm: 0.9131780017022278, iteration: 217946
loss: 0.9919427037239075,grad_norm: 0.9999997435023804, iteration: 217947
loss: 1.0359997749328613,grad_norm: 0.8988905078574262, iteration: 217948
loss: 1.0564064979553223,grad_norm: 0.9999991328645382, iteration: 217949
loss: 1.0000090599060059,grad_norm: 0.8848481589897933, iteration: 217950
loss: 0.9693116545677185,grad_norm: 0.9999991500939749, iteration: 217951
loss: 0.9886037111282349,grad_norm: 0.9999992120057986, iteration: 217952
loss: 1.0057969093322754,grad_norm: 0.9999991947784624, iteration: 217953
loss: 0.9825538396835327,grad_norm: 0.9327089604424297, iteration: 217954
loss: 1.1130008697509766,grad_norm: 0.9999992890779105, iteration: 217955
loss: 0.9791414141654968,grad_norm: 0.9015818829095859, iteration: 217956
loss: 0.9626886248588562,grad_norm: 0.8708837021189152, iteration: 217957
loss: 1.0723804235458374,grad_norm: 0.9931824856349454, iteration: 217958
loss: 1.0244468450546265,grad_norm: 0.9999990744015875, iteration: 217959
loss: 0.9504128694534302,grad_norm: 0.9999992052860739, iteration: 217960
loss: 0.9970090985298157,grad_norm: 0.9999989618518116, iteration: 217961
loss: 1.013999581336975,grad_norm: 0.973469596529652, iteration: 217962
loss: 1.0203185081481934,grad_norm: 0.9999990540952809, iteration: 217963
loss: 1.0077457427978516,grad_norm: 0.9999998930555035, iteration: 217964
loss: 1.0235118865966797,grad_norm: 0.8657146977561562, iteration: 217965
loss: 1.0121816396713257,grad_norm: 0.9999991851961653, iteration: 217966
loss: 1.0012508630752563,grad_norm: 0.9999992188477643, iteration: 217967
loss: 0.9776032567024231,grad_norm: 0.9907025205809983, iteration: 217968
loss: 0.9507589936256409,grad_norm: 0.9999989933975176, iteration: 217969
loss: 1.016616940498352,grad_norm: 0.964208154817578, iteration: 217970
loss: 0.9537472724914551,grad_norm: 0.8720950173303351, iteration: 217971
loss: 0.9841489195823669,grad_norm: 0.9999995106021314, iteration: 217972
loss: 0.9742433428764343,grad_norm: 0.8512373535101155, iteration: 217973
loss: 0.9818059802055359,grad_norm: 0.8724476884319612, iteration: 217974
loss: 0.9693928360939026,grad_norm: 0.9999991602080949, iteration: 217975
loss: 0.9952945709228516,grad_norm: 0.8852113482566258, iteration: 217976
loss: 0.9763375520706177,grad_norm: 0.9999998367682942, iteration: 217977
loss: 1.0063129663467407,grad_norm: 0.9999990255687078, iteration: 217978
loss: 1.0393460988998413,grad_norm: 0.967484862843908, iteration: 217979
loss: 1.0092086791992188,grad_norm: 0.9231706121687715, iteration: 217980
loss: 1.030741572380066,grad_norm: 0.99999907608716, iteration: 217981
loss: 1.0186958312988281,grad_norm: 0.9999991720960939, iteration: 217982
loss: 0.9992568492889404,grad_norm: 0.937926053624275, iteration: 217983
loss: 1.029840111732483,grad_norm: 0.933956718766976, iteration: 217984
loss: 0.9562293887138367,grad_norm: 0.9999991309798127, iteration: 217985
loss: 0.9803298711776733,grad_norm: 0.9999990934294817, iteration: 217986
loss: 0.9805638194084167,grad_norm: 0.9901960088782598, iteration: 217987
loss: 1.1008703708648682,grad_norm: 0.9999994355947419, iteration: 217988
loss: 0.9977508783340454,grad_norm: 0.9525305885953405, iteration: 217989
loss: 1.0065059661865234,grad_norm: 0.8413616030346555, iteration: 217990
loss: 1.0451023578643799,grad_norm: 0.8987568559824047, iteration: 217991
loss: 0.9937368631362915,grad_norm: 0.9643709682862607, iteration: 217992
loss: 1.0128755569458008,grad_norm: 0.9999990773584079, iteration: 217993
loss: 0.9857023358345032,grad_norm: 0.9999991159296451, iteration: 217994
loss: 1.0333539247512817,grad_norm: 0.8524573818580625, iteration: 217995
loss: 1.0013556480407715,grad_norm: 0.8709708259639387, iteration: 217996
loss: 0.9784632921218872,grad_norm: 0.9999992360866223, iteration: 217997
loss: 1.032083511352539,grad_norm: 0.9470830495745715, iteration: 217998
loss: 0.9874905943870544,grad_norm: 0.9799592404315999, iteration: 217999
loss: 0.9798796772956848,grad_norm: 0.9999998788380177, iteration: 218000
loss: 0.9977105259895325,grad_norm: 0.9999991660033068, iteration: 218001
loss: 0.9738869667053223,grad_norm: 0.9357326699482615, iteration: 218002
loss: 0.9663679599761963,grad_norm: 0.9999990065187971, iteration: 218003
loss: 1.0009199380874634,grad_norm: 0.9060090871765623, iteration: 218004
loss: 0.9962838292121887,grad_norm: 0.99999914655825, iteration: 218005
loss: 0.9904882907867432,grad_norm: 0.9999991652467993, iteration: 218006
loss: 0.979823648929596,grad_norm: 0.9321822686175786, iteration: 218007
loss: 1.0609740018844604,grad_norm: 0.9028099763091789, iteration: 218008
loss: 1.0092788934707642,grad_norm: 0.9049035211879165, iteration: 218009
loss: 1.0470389127731323,grad_norm: 0.9999990399374622, iteration: 218010
loss: 0.973852813243866,grad_norm: 0.9999989949264079, iteration: 218011
loss: 0.9414108991622925,grad_norm: 0.9028075289371703, iteration: 218012
loss: 1.0327924489974976,grad_norm: 0.8911243993940703, iteration: 218013
loss: 0.9632977843284607,grad_norm: 0.999999040476003, iteration: 218014
loss: 1.0133939981460571,grad_norm: 0.8726624782142537, iteration: 218015
loss: 0.9837912321090698,grad_norm: 0.9350122333171959, iteration: 218016
loss: 0.9802259206771851,grad_norm: 0.9999989662800407, iteration: 218017
loss: 1.0033718347549438,grad_norm: 0.8818025935435535, iteration: 218018
loss: 0.9943994879722595,grad_norm: 0.8734991155775144, iteration: 218019
loss: 1.0075867176055908,grad_norm: 0.9999992106963661, iteration: 218020
loss: 1.015589952468872,grad_norm: 0.9999990420444697, iteration: 218021
loss: 0.9783927798271179,grad_norm: 0.9281742958034763, iteration: 218022
loss: 1.0095493793487549,grad_norm: 0.9999990701567464, iteration: 218023
loss: 1.0259939432144165,grad_norm: 0.999999622228342, iteration: 218024
loss: 0.9904270768165588,grad_norm: 0.811672386539864, iteration: 218025
loss: 0.9737393260002136,grad_norm: 0.8883483553914701, iteration: 218026
loss: 1.0318083763122559,grad_norm: 0.8173594936999191, iteration: 218027
loss: 0.9632266163825989,grad_norm: 0.9348980894340578, iteration: 218028
loss: 0.9936833381652832,grad_norm: 0.9999989639072906, iteration: 218029
loss: 1.009080171585083,grad_norm: 0.868359844989879, iteration: 218030
loss: 1.0009562969207764,grad_norm: 0.862891281490638, iteration: 218031
loss: 0.9514892101287842,grad_norm: 0.9608437007440769, iteration: 218032
loss: 0.9612114429473877,grad_norm: 0.9354002474341413, iteration: 218033
loss: 0.9725968241691589,grad_norm: 0.9264412260098566, iteration: 218034
loss: 1.0483875274658203,grad_norm: 0.9999993658168085, iteration: 218035
loss: 1.0446476936340332,grad_norm: 0.9999990020353288, iteration: 218036
loss: 1.0052385330200195,grad_norm: 0.9374402147806724, iteration: 218037
loss: 0.9819346070289612,grad_norm: 0.8911781549191773, iteration: 218038
loss: 0.9985649585723877,grad_norm: 0.9999990410337123, iteration: 218039
loss: 0.9921610951423645,grad_norm: 0.9257711025916213, iteration: 218040
loss: 0.9972219467163086,grad_norm: 0.8069602026582074, iteration: 218041
loss: 1.0069634914398193,grad_norm: 0.9001596228286091, iteration: 218042
loss: 1.0319265127182007,grad_norm: 0.9696756500698075, iteration: 218043
loss: 1.00295090675354,grad_norm: 0.9999990978881547, iteration: 218044
loss: 1.0109524726867676,grad_norm: 0.9823447505690646, iteration: 218045
loss: 0.9539951682090759,grad_norm: 0.9371725693284134, iteration: 218046
loss: 0.9989109635353088,grad_norm: 0.9999991003437108, iteration: 218047
loss: 1.001546025276184,grad_norm: 0.8482074615538296, iteration: 218048
loss: 1.0131853818893433,grad_norm: 0.9111305591941972, iteration: 218049
loss: 0.9854543209075928,grad_norm: 0.9999990774880323, iteration: 218050
loss: 1.006090521812439,grad_norm: 0.9999998963292078, iteration: 218051
loss: 0.9604741930961609,grad_norm: 0.9593434926281896, iteration: 218052
loss: 0.9560195207595825,grad_norm: 0.914867233403925, iteration: 218053
loss: 1.0004140138626099,grad_norm: 0.8155396785272473, iteration: 218054
loss: 1.0209856033325195,grad_norm: 0.7548354913108573, iteration: 218055
loss: 0.9802556037902832,grad_norm: 0.9386858482134182, iteration: 218056
loss: 1.0189144611358643,grad_norm: 0.8168956464849444, iteration: 218057
loss: 1.018173336982727,grad_norm: 0.9468254297512446, iteration: 218058
loss: 0.9992789626121521,grad_norm: 0.7791393061332577, iteration: 218059
loss: 1.013302206993103,grad_norm: 0.8983228728270273, iteration: 218060
loss: 1.0119495391845703,grad_norm: 0.9437861286257715, iteration: 218061
loss: 0.9973428845405579,grad_norm: 0.9999990094173818, iteration: 218062
loss: 0.986212968826294,grad_norm: 0.9999992115950787, iteration: 218063
loss: 1.0325286388397217,grad_norm: 0.9999999022538305, iteration: 218064
loss: 1.0053240060806274,grad_norm: 0.999999122025754, iteration: 218065
loss: 1.0005046129226685,grad_norm: 0.8243289949494915, iteration: 218066
loss: 0.994234561920166,grad_norm: 0.8591591684428901, iteration: 218067
loss: 0.9623529314994812,grad_norm: 0.9729013636657033, iteration: 218068
loss: 1.01752769947052,grad_norm: 0.8601428805401251, iteration: 218069
loss: 1.0192238092422485,grad_norm: 0.905672264315627, iteration: 218070
loss: 1.0209343433380127,grad_norm: 0.9999991411209845, iteration: 218071
loss: 0.9899453520774841,grad_norm: 0.9358108187158931, iteration: 218072
loss: 1.0622881650924683,grad_norm: 0.9999992331538887, iteration: 218073
loss: 0.9925593137741089,grad_norm: 0.9999994712641477, iteration: 218074
loss: 1.001456379890442,grad_norm: 0.9799813650398753, iteration: 218075
loss: 1.0126787424087524,grad_norm: 0.9999992241576384, iteration: 218076
loss: 1.086272120475769,grad_norm: 0.9999990528759017, iteration: 218077
loss: 0.9891201853752136,grad_norm: 0.774829927156052, iteration: 218078
loss: 0.9856905937194824,grad_norm: 0.8413224102759599, iteration: 218079
loss: 1.023504376411438,grad_norm: 0.9999993278174536, iteration: 218080
loss: 1.0401268005371094,grad_norm: 1.0000000266711624, iteration: 218081
loss: 1.0042062997817993,grad_norm: 0.9478187006573251, iteration: 218082
loss: 0.9644442796707153,grad_norm: 0.8927211909593186, iteration: 218083
loss: 1.006439447402954,grad_norm: 0.9999991140946844, iteration: 218084
loss: 1.0078089237213135,grad_norm: 0.9999992329207168, iteration: 218085
loss: 0.9925345182418823,grad_norm: 0.9999990990748594, iteration: 218086
loss: 1.0757015943527222,grad_norm: 0.9999991434869013, iteration: 218087
loss: 1.0396130084991455,grad_norm: 0.9999996688642144, iteration: 218088
loss: 0.9557457566261292,grad_norm: 0.9999990003000571, iteration: 218089
loss: 1.0091193914413452,grad_norm: 0.9254982581512626, iteration: 218090
loss: 0.9585868120193481,grad_norm: 0.8083922647792932, iteration: 218091
loss: 0.9916831254959106,grad_norm: 0.8879866415886577, iteration: 218092
loss: 1.0271345376968384,grad_norm: 0.9403532889606693, iteration: 218093
loss: 1.0199474096298218,grad_norm: 0.8574528149425364, iteration: 218094
loss: 1.0018885135650635,grad_norm: 0.9002529460835046, iteration: 218095
loss: 1.0293207168579102,grad_norm: 0.9999992389602036, iteration: 218096
loss: 0.9848406314849854,grad_norm: 0.8143641600220715, iteration: 218097
loss: 0.9853087067604065,grad_norm: 0.955651591598732, iteration: 218098
loss: 1.0015920400619507,grad_norm: 0.8134364918491328, iteration: 218099
loss: 1.0140414237976074,grad_norm: 0.9836203008310769, iteration: 218100
loss: 1.021290898323059,grad_norm: 0.9999994874549828, iteration: 218101
loss: 1.018458604812622,grad_norm: 0.8761586969683375, iteration: 218102
loss: 1.0309529304504395,grad_norm: 0.9999998216210813, iteration: 218103
loss: 1.002318024635315,grad_norm: 0.863990177678169, iteration: 218104
loss: 0.9740360975265503,grad_norm: 0.9999991959993484, iteration: 218105
loss: 1.0248827934265137,grad_norm: 0.7824790595589325, iteration: 218106
loss: 0.9955810308456421,grad_norm: 0.9044410066530434, iteration: 218107
loss: 1.0482391119003296,grad_norm: 0.8459919621224606, iteration: 218108
loss: 1.0276429653167725,grad_norm: 0.9999989971339274, iteration: 218109
loss: 0.9781075119972229,grad_norm: 0.8498466036105757, iteration: 218110
loss: 0.993098795413971,grad_norm: 0.9376418420922351, iteration: 218111
loss: 1.0338735580444336,grad_norm: 0.9999991611183188, iteration: 218112
loss: 0.9788086414337158,grad_norm: 0.9522861652151067, iteration: 218113
loss: 1.0019901990890503,grad_norm: 0.9999991951081767, iteration: 218114
loss: 1.0194100141525269,grad_norm: 0.9788948958659108, iteration: 218115
loss: 0.9941151738166809,grad_norm: 0.9051175210296728, iteration: 218116
loss: 0.9847957491874695,grad_norm: 0.9496681679277358, iteration: 218117
loss: 1.0035035610198975,grad_norm: 0.905666145558552, iteration: 218118
loss: 1.0492948293685913,grad_norm: 0.9999997672592564, iteration: 218119
loss: 0.9813599586486816,grad_norm: 0.9656294020520273, iteration: 218120
loss: 0.9826892018318176,grad_norm: 0.7680619363997198, iteration: 218121
loss: 1.1231223344802856,grad_norm: 0.9999993327924644, iteration: 218122
loss: 1.0128459930419922,grad_norm: 0.9999991374097857, iteration: 218123
loss: 1.0053482055664062,grad_norm: 0.8317085028055774, iteration: 218124
loss: 0.9921972751617432,grad_norm: 0.9999990589135641, iteration: 218125
loss: 1.0104246139526367,grad_norm: 0.900176277351461, iteration: 218126
loss: 1.002460241317749,grad_norm: 0.9441018833083769, iteration: 218127
loss: 0.9995136857032776,grad_norm: 0.9999992187471797, iteration: 218128
loss: 1.0225787162780762,grad_norm: 0.9999991075642286, iteration: 218129
loss: 1.0143715143203735,grad_norm: 0.9999996322143725, iteration: 218130
loss: 0.9908530712127686,grad_norm: 0.9105968082557275, iteration: 218131
loss: 0.9858894944190979,grad_norm: 0.9999992152298359, iteration: 218132
loss: 1.0088618993759155,grad_norm: 0.9999991075453469, iteration: 218133
loss: 0.9992024302482605,grad_norm: 0.8032437872871172, iteration: 218134
loss: 0.9934420585632324,grad_norm: 0.999999136095204, iteration: 218135
loss: 0.9982352256774902,grad_norm: 0.9089379682570522, iteration: 218136
loss: 0.9960834980010986,grad_norm: 0.9999991489713503, iteration: 218137
loss: 1.0314013957977295,grad_norm: 0.9999991986932416, iteration: 218138
loss: 1.0166813135147095,grad_norm: 0.9646897493058598, iteration: 218139
loss: 1.0908422470092773,grad_norm: 0.9999993473335983, iteration: 218140
loss: 0.9734624624252319,grad_norm: 0.9129490424438149, iteration: 218141
loss: 1.019811749458313,grad_norm: 0.9963276382361897, iteration: 218142
loss: 1.0971983671188354,grad_norm: 0.9999992777747372, iteration: 218143
loss: 0.9805769920349121,grad_norm: 0.9677062900378012, iteration: 218144
loss: 1.031253457069397,grad_norm: 0.9999991518731527, iteration: 218145
loss: 0.9685907959938049,grad_norm: 0.8941130650431589, iteration: 218146
loss: 0.9827293157577515,grad_norm: 0.9274005483822181, iteration: 218147
loss: 1.0094934701919556,grad_norm: 0.9587903466802525, iteration: 218148
loss: 1.0157241821289062,grad_norm: 0.9765645224555517, iteration: 218149
loss: 1.002199411392212,grad_norm: 0.8179092339538402, iteration: 218150
loss: 1.0229452848434448,grad_norm: 0.8929945322263285, iteration: 218151
loss: 0.9730257987976074,grad_norm: 0.9999990258268049, iteration: 218152
loss: 0.9594756960868835,grad_norm: 0.9388102334305829, iteration: 218153
loss: 1.0269263982772827,grad_norm: 0.9113099715982105, iteration: 218154
loss: 1.0127825736999512,grad_norm: 0.9901647270836885, iteration: 218155
loss: 0.9884654879570007,grad_norm: 0.9999991501826412, iteration: 218156
loss: 1.012269139289856,grad_norm: 0.9999992447129771, iteration: 218157
loss: 1.0235859155654907,grad_norm: 0.9999991676095019, iteration: 218158
loss: 1.004128336906433,grad_norm: 0.9999999212880749, iteration: 218159
loss: 0.9896626472473145,grad_norm: 0.9997711918173097, iteration: 218160
loss: 1.0151147842407227,grad_norm: 0.8262038060403236, iteration: 218161
loss: 0.9682915806770325,grad_norm: 0.8654146670965257, iteration: 218162
loss: 1.015058994293213,grad_norm: 0.8299213250713733, iteration: 218163
loss: 0.9984038472175598,grad_norm: 0.9856666606898363, iteration: 218164
loss: 1.041739583015442,grad_norm: 0.9999990077153056, iteration: 218165
loss: 1.0105081796646118,grad_norm: 0.9305749833787732, iteration: 218166
loss: 1.0164648294448853,grad_norm: 0.9999998457616147, iteration: 218167
loss: 0.9772653579711914,grad_norm: 0.8720213230555177, iteration: 218168
loss: 1.0031163692474365,grad_norm: 0.8453122009095831, iteration: 218169
loss: 1.0121333599090576,grad_norm: 0.9999990686298132, iteration: 218170
loss: 0.9767850041389465,grad_norm: 0.8684423048863367, iteration: 218171
loss: 1.0046361684799194,grad_norm: 0.9999991459840699, iteration: 218172
loss: 0.9732285141944885,grad_norm: 0.8038373723140454, iteration: 218173
loss: 1.0239073038101196,grad_norm: 0.9999991499298435, iteration: 218174
loss: 0.9776046872138977,grad_norm: 0.9999995270848201, iteration: 218175
loss: 1.013486623764038,grad_norm: 0.9999991325489174, iteration: 218176
loss: 0.985916793346405,grad_norm: 0.9077357791515577, iteration: 218177
loss: 0.9473177194595337,grad_norm: 0.9999991707701269, iteration: 218178
loss: 1.0171809196472168,grad_norm: 0.8483753718891345, iteration: 218179
loss: 1.0045483112335205,grad_norm: 0.9999991044732564, iteration: 218180
loss: 1.0064160823822021,grad_norm: 0.8716391098066448, iteration: 218181
loss: 0.9880895018577576,grad_norm: 0.9181256114937294, iteration: 218182
loss: 1.019620656967163,grad_norm: 0.969476848731881, iteration: 218183
loss: 0.9969812035560608,grad_norm: 0.8839484509458921, iteration: 218184
loss: 1.0293724536895752,grad_norm: 0.9783729998402158, iteration: 218185
loss: 0.9636380672454834,grad_norm: 0.9999991735756947, iteration: 218186
loss: 0.9782936573028564,grad_norm: 0.99999982041668, iteration: 218187
loss: 1.0093796253204346,grad_norm: 0.9999992548460764, iteration: 218188
loss: 1.0245544910430908,grad_norm: 0.9382087748671246, iteration: 218189
loss: 0.9919997453689575,grad_norm: 0.8738226612180606, iteration: 218190
loss: 0.9738547205924988,grad_norm: 0.9474476438754574, iteration: 218191
loss: 0.9931654930114746,grad_norm: 0.9999991216335546, iteration: 218192
loss: 1.0460494756698608,grad_norm: 0.9629129666622098, iteration: 218193
loss: 1.0054796934127808,grad_norm: 0.8094208240399516, iteration: 218194
loss: 0.9638258218765259,grad_norm: 0.8303567291523728, iteration: 218195
loss: 1.0074150562286377,grad_norm: 0.8501778218085771, iteration: 218196
loss: 1.003103494644165,grad_norm: 0.8612869861269955, iteration: 218197
loss: 1.0027910470962524,grad_norm: 0.9511664323254131, iteration: 218198
loss: 1.0107018947601318,grad_norm: 0.9585447631792475, iteration: 218199
loss: 0.9652177691459656,grad_norm: 0.9515886820146224, iteration: 218200
loss: 0.9846659898757935,grad_norm: 0.8612537909049339, iteration: 218201
loss: 1.02945876121521,grad_norm: 0.9547195587699898, iteration: 218202
loss: 0.9875519275665283,grad_norm: 0.9999990610879503, iteration: 218203
loss: 1.0145372152328491,grad_norm: 0.9999992513091392, iteration: 218204
loss: 1.018763542175293,grad_norm: 0.9999989476880362, iteration: 218205
loss: 1.0015969276428223,grad_norm: 0.7307083009404289, iteration: 218206
loss: 0.9701932072639465,grad_norm: 0.9772296155776763, iteration: 218207
loss: 1.0076708793640137,grad_norm: 0.8864787640301168, iteration: 218208
loss: 1.007818341255188,grad_norm: 0.7758434164277833, iteration: 218209
loss: 1.0135122537612915,grad_norm: 0.9999990690497842, iteration: 218210
loss: 1.0148036479949951,grad_norm: 0.9999991072717698, iteration: 218211
loss: 0.9976984262466431,grad_norm: 0.9999989628611761, iteration: 218212
loss: 1.0371509790420532,grad_norm: 0.9579894876911058, iteration: 218213
loss: 1.029082179069519,grad_norm: 0.9999989374164981, iteration: 218214
loss: 0.9996349811553955,grad_norm: 0.9286810587486068, iteration: 218215
loss: 1.0063096284866333,grad_norm: 0.9104247414917714, iteration: 218216
loss: 0.9695292711257935,grad_norm: 0.8940838130053652, iteration: 218217
loss: 1.0160713195800781,grad_norm: 0.9999991111632327, iteration: 218218
loss: 0.9940489530563354,grad_norm: 0.7831539956438485, iteration: 218219
loss: 1.0189769268035889,grad_norm: 0.9180820349809363, iteration: 218220
loss: 0.9893635511398315,grad_norm: 0.9999990224158198, iteration: 218221
loss: 0.9865429401397705,grad_norm: 0.9347628362232802, iteration: 218222
loss: 0.9764915108680725,grad_norm: 0.9999990247823772, iteration: 218223
loss: 0.9723039269447327,grad_norm: 0.9999991462534993, iteration: 218224
loss: 1.0068573951721191,grad_norm: 0.9221193847537821, iteration: 218225
loss: 1.021329402923584,grad_norm: 0.9220643329129316, iteration: 218226
loss: 0.9753957986831665,grad_norm: 0.9277542773354179, iteration: 218227
loss: 0.995094895362854,grad_norm: 0.948806075240155, iteration: 218228
loss: 1.0257041454315186,grad_norm: 0.9663695144591108, iteration: 218229
loss: 0.9833011627197266,grad_norm: 0.9397875651264544, iteration: 218230
loss: 1.0102030038833618,grad_norm: 0.8750751220254133, iteration: 218231
loss: 0.9788507223129272,grad_norm: 0.9497509885635848, iteration: 218232
loss: 1.0160552263259888,grad_norm: 0.9521517308861223, iteration: 218233
loss: 1.003755807876587,grad_norm: 1.0000000482743336, iteration: 218234
loss: 0.9940552711486816,grad_norm: 0.9544968572916127, iteration: 218235
loss: 0.988586962223053,grad_norm: 0.9269751479175974, iteration: 218236
loss: 1.0400193929672241,grad_norm: 0.9543730538941751, iteration: 218237
loss: 0.98926842212677,grad_norm: 0.9375954445705937, iteration: 218238
loss: 0.9998239874839783,grad_norm: 0.8482556634558603, iteration: 218239
loss: 1.038707971572876,grad_norm: 0.9214101836988163, iteration: 218240
loss: 0.968707263469696,grad_norm: 0.8657260772353997, iteration: 218241
loss: 1.053965449333191,grad_norm: 0.9999991869495628, iteration: 218242
loss: 1.0413020849227905,grad_norm: 0.999999625882605, iteration: 218243
loss: 0.9932717680931091,grad_norm: 0.8269602185373591, iteration: 218244
loss: 1.0317567586898804,grad_norm: 0.8835967598749327, iteration: 218245
loss: 1.0034689903259277,grad_norm: 0.9999990335652086, iteration: 218246
loss: 0.9905936121940613,grad_norm: 0.8480307470682155, iteration: 218247
loss: 0.9679598808288574,grad_norm: 0.9410336252320135, iteration: 218248
loss: 0.9749507308006287,grad_norm: 0.8396007560798472, iteration: 218249
loss: 0.9552837610244751,grad_norm: 0.8870925069864454, iteration: 218250
loss: 0.9843947887420654,grad_norm: 0.9205477343620754, iteration: 218251
loss: 1.0883833169937134,grad_norm: 0.9999989645822213, iteration: 218252
loss: 1.0031530857086182,grad_norm: 0.9999990912926583, iteration: 218253
loss: 1.0238114595413208,grad_norm: 0.9060203078989746, iteration: 218254
loss: 1.0004581212997437,grad_norm: 0.8559290986085467, iteration: 218255
loss: 1.0134632587432861,grad_norm: 0.933040232547658, iteration: 218256
loss: 0.9693470001220703,grad_norm: 0.9767486625298679, iteration: 218257
loss: 1.0290923118591309,grad_norm: 0.7844461718851783, iteration: 218258
loss: 0.9858137965202332,grad_norm: 0.9230912023896457, iteration: 218259
loss: 0.9909465312957764,grad_norm: 0.9999991184895386, iteration: 218260
loss: 0.9764884114265442,grad_norm: 0.8038966990559056, iteration: 218261
loss: 0.9802344441413879,grad_norm: 0.9999992260472459, iteration: 218262
loss: 0.9772149920463562,grad_norm: 0.970720890754843, iteration: 218263
loss: 0.9693044424057007,grad_norm: 0.999998919930183, iteration: 218264
loss: 0.9891250133514404,grad_norm: 0.9186724526516806, iteration: 218265
loss: 0.9978101253509521,grad_norm: 0.9879232809721908, iteration: 218266
loss: 1.0284150838851929,grad_norm: 0.9999990980016972, iteration: 218267
loss: 0.934910774230957,grad_norm: 0.9999991271832744, iteration: 218268
loss: 0.9637291431427002,grad_norm: 0.9777632371719625, iteration: 218269
loss: 1.0122063159942627,grad_norm: 0.8047022398057366, iteration: 218270
loss: 1.0290837287902832,grad_norm: 0.9217278024922275, iteration: 218271
loss: 0.9745327234268188,grad_norm: 0.8982227648694137, iteration: 218272
loss: 1.0218476057052612,grad_norm: 0.8432832660239881, iteration: 218273
loss: 1.0044820308685303,grad_norm: 0.9820004635996822, iteration: 218274
loss: 0.9989972114562988,grad_norm: 0.9550942814801204, iteration: 218275
loss: 0.9768043756484985,grad_norm: 0.917133780215492, iteration: 218276
loss: 1.0066646337509155,grad_norm: 0.7714874037128145, iteration: 218277
loss: 0.993179440498352,grad_norm: 0.9999989466846618, iteration: 218278
loss: 1.0102471113204956,grad_norm: 0.999999320565074, iteration: 218279
loss: 1.0047575235366821,grad_norm: 0.8487577276268236, iteration: 218280
loss: 0.9657853841781616,grad_norm: 0.7926269720616539, iteration: 218281
loss: 0.9800994396209717,grad_norm: 0.9999990620162484, iteration: 218282
loss: 0.9790830016136169,grad_norm: 0.9874272569370572, iteration: 218283
loss: 1.0239328145980835,grad_norm: 0.9999990467112724, iteration: 218284
loss: 0.9816335439682007,grad_norm: 0.98061329878604, iteration: 218285
loss: 1.0191314220428467,grad_norm: 0.8433529042849479, iteration: 218286
loss: 0.9579180479049683,grad_norm: 0.9999991375836563, iteration: 218287
loss: 1.0259374380111694,grad_norm: 0.8082618207452992, iteration: 218288
loss: 0.9700206518173218,grad_norm: 0.9999991585543275, iteration: 218289
loss: 1.0317851305007935,grad_norm: 0.8808500019375775, iteration: 218290
loss: 0.9763486981391907,grad_norm: 0.8644603305663643, iteration: 218291
loss: 0.953475296497345,grad_norm: 0.85292191245829, iteration: 218292
loss: 1.0011067390441895,grad_norm: 0.9212693894895803, iteration: 218293
loss: 1.0089722871780396,grad_norm: 0.9026382962741121, iteration: 218294
loss: 1.0022516250610352,grad_norm: 0.9156948647954581, iteration: 218295
loss: 0.9630343317985535,grad_norm: 0.8804284251778804, iteration: 218296
loss: 1.0342350006103516,grad_norm: 0.999999176631329, iteration: 218297
loss: 1.0721043348312378,grad_norm: 0.9999997361882423, iteration: 218298
loss: 1.0129953622817993,grad_norm: 0.9870397297128528, iteration: 218299
loss: 1.0296854972839355,grad_norm: 0.8762271828156482, iteration: 218300
loss: 0.9803701043128967,grad_norm: 0.76406936992599, iteration: 218301
loss: 1.041684627532959,grad_norm: 0.8629011600109185, iteration: 218302
loss: 1.0414947271347046,grad_norm: 0.9999995158709354, iteration: 218303
loss: 1.0357580184936523,grad_norm: 0.8586830762041177, iteration: 218304
loss: 0.9834125638008118,grad_norm: 0.8548722618811951, iteration: 218305
loss: 1.0162158012390137,grad_norm: 0.8113025492427772, iteration: 218306
loss: 1.0120949745178223,grad_norm: 0.8115493761174297, iteration: 218307
loss: 1.0090619325637817,grad_norm: 0.790505710813133, iteration: 218308
loss: 0.9765022397041321,grad_norm: 0.9999991208594807, iteration: 218309
loss: 0.9534940719604492,grad_norm: 0.9999991313171872, iteration: 218310
loss: 0.998230516910553,grad_norm: 0.9999990279127485, iteration: 218311
loss: 1.008177399635315,grad_norm: 0.9557037937530829, iteration: 218312
loss: 1.0274838209152222,grad_norm: 0.9999991348350966, iteration: 218313
loss: 1.0294617414474487,grad_norm: 0.8739830746496459, iteration: 218314
loss: 1.0283881425857544,grad_norm: 0.9999991808585683, iteration: 218315
loss: 0.9870762825012207,grad_norm: 0.9999992084588026, iteration: 218316
loss: 1.0554989576339722,grad_norm: 0.9999996895094927, iteration: 218317
loss: 1.154987096786499,grad_norm: 0.9999999707795668, iteration: 218318
loss: 1.0055088996887207,grad_norm: 0.8486248622091996, iteration: 218319
loss: 0.9963345527648926,grad_norm: 0.8513657288251941, iteration: 218320
loss: 1.0065810680389404,grad_norm: 0.9999991469148756, iteration: 218321
loss: 0.9715749025344849,grad_norm: 0.8464857021637257, iteration: 218322
loss: 1.024739146232605,grad_norm: 0.9999991261354245, iteration: 218323
loss: 0.9766202569007874,grad_norm: 0.9999992079288734, iteration: 218324
loss: 1.0646913051605225,grad_norm: 0.9999992207286408, iteration: 218325
loss: 0.9861100912094116,grad_norm: 0.8600891977572116, iteration: 218326
loss: 0.9966063499450684,grad_norm: 0.9290243934019933, iteration: 218327
loss: 1.0028133392333984,grad_norm: 0.8238542313558382, iteration: 218328
loss: 0.9727087616920471,grad_norm: 0.9715636018826138, iteration: 218329
loss: 1.0083227157592773,grad_norm: 0.9045904038492403, iteration: 218330
loss: 0.9985553622245789,grad_norm: 0.9395379534504549, iteration: 218331
loss: 0.9748558402061462,grad_norm: 0.8406690470890243, iteration: 218332
loss: 0.9937794804573059,grad_norm: 0.9999991351352953, iteration: 218333
loss: 1.0081233978271484,grad_norm: 0.9859141230459636, iteration: 218334
loss: 0.99526447057724,grad_norm: 0.9688563284473346, iteration: 218335
loss: 0.9724735021591187,grad_norm: 0.9999991293951674, iteration: 218336
loss: 0.9966682195663452,grad_norm: 0.999999057324194, iteration: 218337
loss: 1.006115436553955,grad_norm: 0.9856181597484526, iteration: 218338
loss: 1.0437703132629395,grad_norm: 0.9041866678698357, iteration: 218339
loss: 1.025818943977356,grad_norm: 0.963153660877273, iteration: 218340
loss: 1.1206949949264526,grad_norm: 0.9488079182308046, iteration: 218341
loss: 0.9623374938964844,grad_norm: 0.9399223786001214, iteration: 218342
loss: 0.976282000541687,grad_norm: 0.9486955712270428, iteration: 218343
loss: 0.9735401272773743,grad_norm: 0.8485129767419909, iteration: 218344
loss: 1.0284175872802734,grad_norm: 0.8525773636547552, iteration: 218345
loss: 1.0068113803863525,grad_norm: 0.9097156249645552, iteration: 218346
loss: 0.9957197308540344,grad_norm: 0.993432482935075, iteration: 218347
loss: 1.0237679481506348,grad_norm: 0.9999990004747169, iteration: 218348
loss: 0.9818898439407349,grad_norm: 0.8664364623335452, iteration: 218349
loss: 0.996758222579956,grad_norm: 0.913209154295326, iteration: 218350
loss: 1.001391053199768,grad_norm: 0.9999990470589184, iteration: 218351
loss: 1.0316411256790161,grad_norm: 0.9894163477240384, iteration: 218352
loss: 1.0052900314331055,grad_norm: 0.9999990958925543, iteration: 218353
loss: 0.9701375365257263,grad_norm: 0.9248037139172743, iteration: 218354
loss: 0.9658942818641663,grad_norm: 0.9205795441245651, iteration: 218355
loss: 0.9814474582672119,grad_norm: 0.9011980784697432, iteration: 218356
loss: 1.0294253826141357,grad_norm: 0.9157710991484995, iteration: 218357
loss: 0.985083281993866,grad_norm: 0.9999990934648361, iteration: 218358
loss: 1.0108981132507324,grad_norm: 0.8478606825369265, iteration: 218359
loss: 1.0233044624328613,grad_norm: 0.9999991362063936, iteration: 218360
loss: 0.9546582698822021,grad_norm: 0.999999190861106, iteration: 218361
loss: 0.9918454885482788,grad_norm: 0.8858601931187775, iteration: 218362
loss: 1.0262815952301025,grad_norm: 0.956548308786805, iteration: 218363
loss: 1.0051594972610474,grad_norm: 0.8599947726794217, iteration: 218364
loss: 0.9818693995475769,grad_norm: 0.802811450945914, iteration: 218365
loss: 0.9715279340744019,grad_norm: 0.9999990592921535, iteration: 218366
loss: 0.9916857481002808,grad_norm: 0.9999990799268836, iteration: 218367
loss: 1.0206931829452515,grad_norm: 0.9843674899585334, iteration: 218368
loss: 1.034196138381958,grad_norm: 0.9148665822943624, iteration: 218369
loss: 1.0139672756195068,grad_norm: 0.9999990576467385, iteration: 218370
loss: 0.9826624989509583,grad_norm: 0.9999990517056897, iteration: 218371
loss: 1.0135385990142822,grad_norm: 0.999999026745586, iteration: 218372
loss: 0.9706591367721558,grad_norm: 0.9999990506866213, iteration: 218373
loss: 0.9863852858543396,grad_norm: 0.9999991459982567, iteration: 218374
loss: 0.9594263434410095,grad_norm: 0.999999191432439, iteration: 218375
loss: 0.9817249774932861,grad_norm: 0.9958885403534715, iteration: 218376
loss: 0.9813066124916077,grad_norm: 0.9999990611889005, iteration: 218377
loss: 0.9724399447441101,grad_norm: 0.8369616553197704, iteration: 218378
loss: 1.00017511844635,grad_norm: 0.9733935716954468, iteration: 218379
loss: 1.0067455768585205,grad_norm: 0.8805479807056265, iteration: 218380
loss: 1.0389331579208374,grad_norm: 0.8489931241405025, iteration: 218381
loss: 0.9195194840431213,grad_norm: 0.9147962854446866, iteration: 218382
loss: 1.0179288387298584,grad_norm: 0.750597859410326, iteration: 218383
loss: 1.011238694190979,grad_norm: 0.9999990783578901, iteration: 218384
loss: 1.0132721662521362,grad_norm: 0.7469614465505977, iteration: 218385
loss: 0.9915223121643066,grad_norm: 0.8850054156319968, iteration: 218386
loss: 1.0148894786834717,grad_norm: 0.9119600711629535, iteration: 218387
loss: 1.0115946531295776,grad_norm: 0.9973417475648148, iteration: 218388
loss: 1.0071769952774048,grad_norm: 0.771575078730299, iteration: 218389
loss: 1.0245260000228882,grad_norm: 0.9690536281846835, iteration: 218390
loss: 1.006190299987793,grad_norm: 0.9999990157409754, iteration: 218391
loss: 0.9968167543411255,grad_norm: 0.8962168906950844, iteration: 218392
loss: 1.0104504823684692,grad_norm: 0.8820306230523677, iteration: 218393
loss: 0.9637555480003357,grad_norm: 0.9417891972084869, iteration: 218394
loss: 1.0258684158325195,grad_norm: 0.9999992115351783, iteration: 218395
loss: 1.0043182373046875,grad_norm: 0.8941716011767686, iteration: 218396
loss: 1.031191110610962,grad_norm: 0.9852040048370803, iteration: 218397
loss: 0.9908638000488281,grad_norm: 0.8443644850627563, iteration: 218398
loss: 0.9965585470199585,grad_norm: 0.9618326829525092, iteration: 218399
loss: 0.9838623404502869,grad_norm: 0.9999992149541364, iteration: 218400
loss: 0.9993029832839966,grad_norm: 0.8699724153354268, iteration: 218401
loss: 1.0151506662368774,grad_norm: 0.9999990968912214, iteration: 218402
loss: 0.9700078368186951,grad_norm: 0.8965081093400156, iteration: 218403
loss: 0.9784802794456482,grad_norm: 0.9619210042691413, iteration: 218404
loss: 1.008173942565918,grad_norm: 0.9999992486975656, iteration: 218405
loss: 0.9953250885009766,grad_norm: 0.7975683644381529, iteration: 218406
loss: 1.0416237115859985,grad_norm: 0.9999992492053673, iteration: 218407
loss: 1.0002180337905884,grad_norm: 0.8810511727607336, iteration: 218408
loss: 0.9924386143684387,grad_norm: 0.9006805563885628, iteration: 218409
loss: 0.9632099270820618,grad_norm: 0.994333472887235, iteration: 218410
loss: 1.0573444366455078,grad_norm: 0.9771879879757717, iteration: 218411
loss: 0.9918164610862732,grad_norm: 0.9298551991886531, iteration: 218412
loss: 0.9980181455612183,grad_norm: 0.961320871859223, iteration: 218413
loss: 0.9966912865638733,grad_norm: 0.9999990215987271, iteration: 218414
loss: 0.9818502068519592,grad_norm: 0.7298792266440324, iteration: 218415
loss: 1.0061753988265991,grad_norm: 0.9999990630599739, iteration: 218416
loss: 0.9975249767303467,grad_norm: 0.844854515893122, iteration: 218417
loss: 1.01509428024292,grad_norm: 0.9999990208315521, iteration: 218418
loss: 1.0025018453598022,grad_norm: 0.9999992066234783, iteration: 218419
loss: 0.9893863797187805,grad_norm: 0.9764102972786552, iteration: 218420
loss: 1.052262306213379,grad_norm: 0.9999992192032707, iteration: 218421
loss: 1.0002250671386719,grad_norm: 0.8616861300930694, iteration: 218422
loss: 1.0162891149520874,grad_norm: 0.9773340512135621, iteration: 218423
loss: 0.9982005953788757,grad_norm: 0.9987269828885347, iteration: 218424
loss: 0.9855736494064331,grad_norm: 0.9999992211412664, iteration: 218425
loss: 1.028078556060791,grad_norm: 0.9148146299934049, iteration: 218426
loss: 0.9925358295440674,grad_norm: 0.9999991728670987, iteration: 218427
loss: 1.014215350151062,grad_norm: 0.9999991204750662, iteration: 218428
loss: 1.0115423202514648,grad_norm: 0.9999998271264171, iteration: 218429
loss: 0.9742481708526611,grad_norm: 0.8524664436215462, iteration: 218430
loss: 1.029422640800476,grad_norm: 0.9999991721219202, iteration: 218431
loss: 1.0176903009414673,grad_norm: 0.9999990669456083, iteration: 218432
loss: 1.0580564737319946,grad_norm: 0.9999994151330823, iteration: 218433
loss: 0.9561792016029358,grad_norm: 0.9412729331967639, iteration: 218434
loss: 1.0046601295471191,grad_norm: 0.9999991081694912, iteration: 218435
loss: 1.0868126153945923,grad_norm: 0.9999998404388416, iteration: 218436
loss: 1.0046659708023071,grad_norm: 0.8847127722050657, iteration: 218437
loss: 0.9835178256034851,grad_norm: 0.9357491679358685, iteration: 218438
loss: 1.0137826204299927,grad_norm: 0.8299839638415236, iteration: 218439
loss: 1.057631492614746,grad_norm: 0.9646175973920931, iteration: 218440
loss: 1.00271475315094,grad_norm: 0.8789757314148605, iteration: 218441
loss: 1.0122003555297852,grad_norm: 0.9653651570616558, iteration: 218442
loss: 0.9892140030860901,grad_norm: 0.9999995822516466, iteration: 218443
loss: 1.0007461309432983,grad_norm: 0.9800376690800248, iteration: 218444
loss: 1.017622470855713,grad_norm: 0.9818658259288795, iteration: 218445
loss: 1.0341544151306152,grad_norm: 0.8541756683636178, iteration: 218446
loss: 1.0059033632278442,grad_norm: 0.9999990742907942, iteration: 218447
loss: 0.9912335276603699,grad_norm: 0.9999989984927915, iteration: 218448
loss: 1.04776132106781,grad_norm: 0.9450135017419454, iteration: 218449
loss: 0.9880719780921936,grad_norm: 0.9117661287853931, iteration: 218450
loss: 1.04066002368927,grad_norm: 0.9999990543805054, iteration: 218451
loss: 0.9918044805526733,grad_norm: 0.9662423411810066, iteration: 218452
loss: 1.04189133644104,grad_norm: 0.9999990864601653, iteration: 218453
loss: 0.9927177429199219,grad_norm: 0.9999990639311515, iteration: 218454
loss: 1.0045279264450073,grad_norm: 0.8263256103741468, iteration: 218455
loss: 1.0156172513961792,grad_norm: 0.8648426715590132, iteration: 218456
loss: 0.965575098991394,grad_norm: 0.9999989832500463, iteration: 218457
loss: 1.008020281791687,grad_norm: 0.9016295464551798, iteration: 218458
loss: 0.9734753966331482,grad_norm: 0.9999991277521993, iteration: 218459
loss: 1.0005431175231934,grad_norm: 0.980282060643869, iteration: 218460
loss: 1.0181617736816406,grad_norm: 0.9999991607734421, iteration: 218461
loss: 0.9907211065292358,grad_norm: 0.9112262041037122, iteration: 218462
loss: 0.9886395335197449,grad_norm: 0.9999989865250185, iteration: 218463
loss: 0.9825600981712341,grad_norm: 0.9123521644091447, iteration: 218464
loss: 1.0181329250335693,grad_norm: 0.9999990418178244, iteration: 218465
loss: 1.0130884647369385,grad_norm: 0.7886585194998542, iteration: 218466
loss: 0.9884692430496216,grad_norm: 0.9842462313818439, iteration: 218467
loss: 1.040697455406189,grad_norm: 0.9999992064127641, iteration: 218468
loss: 0.9616276025772095,grad_norm: 0.9027197812069161, iteration: 218469
loss: 1.0142759084701538,grad_norm: 0.9803067384865851, iteration: 218470
loss: 0.9863234162330627,grad_norm: 0.8159946177632301, iteration: 218471
loss: 1.0483694076538086,grad_norm: 0.9999990801649951, iteration: 218472
loss: 0.979486346244812,grad_norm: 0.9999989850451172, iteration: 218473
loss: 0.9694123268127441,grad_norm: 0.9827640380420442, iteration: 218474
loss: 0.9670199751853943,grad_norm: 0.8248048600564133, iteration: 218475
loss: 1.0099576711654663,grad_norm: 0.9999996657945499, iteration: 218476
loss: 0.9580190777778625,grad_norm: 0.7814300538053133, iteration: 218477
loss: 1.015336036682129,grad_norm: 0.9999994850615129, iteration: 218478
loss: 1.0626832246780396,grad_norm: 0.9999990572906837, iteration: 218479
loss: 0.9916876554489136,grad_norm: 0.9999990021915951, iteration: 218480
loss: 1.0019432306289673,grad_norm: 0.8844364608415051, iteration: 218481
loss: 0.97019362449646,grad_norm: 0.9999992225156917, iteration: 218482
loss: 0.9620642066001892,grad_norm: 0.906590961760999, iteration: 218483
loss: 1.00607430934906,grad_norm: 0.9641494016607116, iteration: 218484
loss: 1.0061359405517578,grad_norm: 0.9999989981473134, iteration: 218485
loss: 1.023752212524414,grad_norm: 0.807665182049745, iteration: 218486
loss: 0.9833958745002747,grad_norm: 0.9999991350401812, iteration: 218487
loss: 1.0309184789657593,grad_norm: 0.9897704887229269, iteration: 218488
loss: 0.964703381061554,grad_norm: 0.9999990094026661, iteration: 218489
loss: 1.008821964263916,grad_norm: 0.8147522757299961, iteration: 218490
loss: 0.9894192218780518,grad_norm: 0.9389427907111313, iteration: 218491
loss: 1.0299339294433594,grad_norm: 0.9999991114994926, iteration: 218492
loss: 0.9836723208427429,grad_norm: 0.7555511395309701, iteration: 218493
loss: 1.0102206468582153,grad_norm: 0.9999990491112171, iteration: 218494
loss: 0.9785552620887756,grad_norm: 0.9733247942533463, iteration: 218495
loss: 0.9832669496536255,grad_norm: 0.9999991105537612, iteration: 218496
loss: 0.9872593879699707,grad_norm: 0.8383758479197447, iteration: 218497
loss: 1.005607008934021,grad_norm: 0.988493883851378, iteration: 218498
loss: 1.0119175910949707,grad_norm: 0.8489453780201528, iteration: 218499
loss: 1.0206537246704102,grad_norm: 0.9999992669835649, iteration: 218500
loss: 0.9979241490364075,grad_norm: 0.8434679000301565, iteration: 218501
loss: 1.0106360912322998,grad_norm: 0.8846547145521375, iteration: 218502
loss: 1.017218828201294,grad_norm: 0.9999993148528996, iteration: 218503
loss: 1.0190011262893677,grad_norm: 0.9999991763796583, iteration: 218504
loss: 0.9986379742622375,grad_norm: 0.9125048884591599, iteration: 218505
loss: 1.0178977251052856,grad_norm: 0.9999993681841103, iteration: 218506
loss: 0.9781520366668701,grad_norm: 0.9341199259805861, iteration: 218507
loss: 0.9849552512168884,grad_norm: 0.8755237403796183, iteration: 218508
loss: 1.0207228660583496,grad_norm: 0.9116274049642208, iteration: 218509
loss: 1.0225672721862793,grad_norm: 0.999999349561381, iteration: 218510
loss: 0.9825517535209656,grad_norm: 0.9999989344009707, iteration: 218511
loss: 0.9673159718513489,grad_norm: 0.9177113387867678, iteration: 218512
loss: 0.9886141419410706,grad_norm: 0.9999990994005563, iteration: 218513
loss: 1.0016655921936035,grad_norm: 0.9788820737398087, iteration: 218514
loss: 1.0120584964752197,grad_norm: 0.9753967423164728, iteration: 218515
loss: 0.9703803658485413,grad_norm: 0.7993419169846422, iteration: 218516
loss: 0.9645136594772339,grad_norm: 0.8517376116919678, iteration: 218517
loss: 0.9962562918663025,grad_norm: 0.8753879065946303, iteration: 218518
loss: 1.0022482872009277,grad_norm: 0.8579377008428887, iteration: 218519
loss: 1.0160518884658813,grad_norm: 0.9999990505277861, iteration: 218520
loss: 1.0424528121948242,grad_norm: 0.9682037068469576, iteration: 218521
loss: 1.0223474502563477,grad_norm: 0.933586323962562, iteration: 218522
loss: 0.9811654090881348,grad_norm: 0.9999996005386689, iteration: 218523
loss: 0.9822328686714172,grad_norm: 0.9999990820021807, iteration: 218524
loss: 1.107036828994751,grad_norm: 0.9999998132091799, iteration: 218525
loss: 0.9804669618606567,grad_norm: 0.9999991238680936, iteration: 218526
loss: 0.9601226449012756,grad_norm: 0.9841817386529655, iteration: 218527
loss: 0.9674514532089233,grad_norm: 0.9186270096150896, iteration: 218528
loss: 0.9697645902633667,grad_norm: 0.9999991279434859, iteration: 218529
loss: 1.0118021965026855,grad_norm: 0.9218769219210692, iteration: 218530
loss: 0.9863793849945068,grad_norm: 0.8521808569146762, iteration: 218531
loss: 0.955248236656189,grad_norm: 0.9999992628493456, iteration: 218532
loss: 1.0029866695404053,grad_norm: 0.9349651560147499, iteration: 218533
loss: 0.9759085774421692,grad_norm: 0.9999991567067951, iteration: 218534
loss: 0.9799265265464783,grad_norm: 0.999999171902535, iteration: 218535
loss: 0.973690927028656,grad_norm: 0.7853911767260725, iteration: 218536
loss: 0.9863308668136597,grad_norm: 0.9982441741967022, iteration: 218537
loss: 1.047262191772461,grad_norm: 0.955203563892861, iteration: 218538
loss: 0.9754186272621155,grad_norm: 0.9220801728577054, iteration: 218539
loss: 1.003680944442749,grad_norm: 0.9612685240815584, iteration: 218540
loss: 1.0022411346435547,grad_norm: 0.9999991017678156, iteration: 218541
loss: 1.0490429401397705,grad_norm: 0.9999992573477141, iteration: 218542
loss: 0.9910275340080261,grad_norm: 0.9999990503689017, iteration: 218543
loss: 1.0582696199417114,grad_norm: 0.9999992315800579, iteration: 218544
loss: 1.1173210144042969,grad_norm: 0.9518023346014349, iteration: 218545
loss: 1.0166395902633667,grad_norm: 0.9062447203015138, iteration: 218546
loss: 0.9802410006523132,grad_norm: 0.894377645141849, iteration: 218547
loss: 1.0320156812667847,grad_norm: 0.9999990742202276, iteration: 218548
loss: 1.0166895389556885,grad_norm: 0.7607332109208584, iteration: 218549
loss: 0.9600047469139099,grad_norm: 0.865715430875106, iteration: 218550
loss: 1.0202471017837524,grad_norm: 0.8452830832771975, iteration: 218551
loss: 1.039789080619812,grad_norm: 0.9999991712170477, iteration: 218552
loss: 0.9852726459503174,grad_norm: 0.9646185536977313, iteration: 218553
loss: 0.9818742275238037,grad_norm: 0.8251836318151153, iteration: 218554
loss: 1.015786051750183,grad_norm: 0.9999993788171774, iteration: 218555
loss: 1.0055261850357056,grad_norm: 0.9601493099053094, iteration: 218556
loss: 1.0058534145355225,grad_norm: 0.9999989551677921, iteration: 218557
loss: 1.0200634002685547,grad_norm: 0.9999990488003158, iteration: 218558
loss: 1.0549627542495728,grad_norm: 0.9932314727199215, iteration: 218559
loss: 1.0016112327575684,grad_norm: 0.961076941418357, iteration: 218560
loss: 0.987275242805481,grad_norm: 0.8797950654863408, iteration: 218561
loss: 0.9883416295051575,grad_norm: 0.8968023998961077, iteration: 218562
loss: 1.0261427164077759,grad_norm: 0.9999990680170023, iteration: 218563
loss: 1.004594087600708,grad_norm: 0.9999991000057615, iteration: 218564
loss: 0.9786380529403687,grad_norm: 0.7833221741402394, iteration: 218565
loss: 1.0146145820617676,grad_norm: 0.9612686013041564, iteration: 218566
loss: 1.0618175268173218,grad_norm: 0.9999990487847691, iteration: 218567
loss: 1.0413554906845093,grad_norm: 0.9999994351242334, iteration: 218568
loss: 1.011452555656433,grad_norm: 0.9999991201675297, iteration: 218569
loss: 1.0222997665405273,grad_norm: 0.8909527058335172, iteration: 218570
loss: 1.0070459842681885,grad_norm: 0.7655542667893754, iteration: 218571
loss: 1.089036226272583,grad_norm: 0.9999996236364476, iteration: 218572
loss: 0.9883307218551636,grad_norm: 0.9014816425929663, iteration: 218573
loss: 0.9877994656562805,grad_norm: 0.9999991532980014, iteration: 218574
loss: 0.9873430132865906,grad_norm: 0.9179930418595528, iteration: 218575
loss: 1.002569317817688,grad_norm: 0.8775143965312494, iteration: 218576
loss: 1.0473445653915405,grad_norm: 0.9999996626817454, iteration: 218577
loss: 0.9848583936691284,grad_norm: 0.935202343146892, iteration: 218578
loss: 0.9969460368156433,grad_norm: 0.7930251593591647, iteration: 218579
loss: 0.9917051792144775,grad_norm: 0.8831950206293607, iteration: 218580
loss: 1.0063799619674683,grad_norm: 0.9547893018095257, iteration: 218581
loss: 1.0016999244689941,grad_norm: 0.9999990288552822, iteration: 218582
loss: 0.972235918045044,grad_norm: 0.7885809522145404, iteration: 218583
loss: 0.9941776990890503,grad_norm: 0.9999990611569155, iteration: 218584
loss: 0.9965548515319824,grad_norm: 0.7819432384725544, iteration: 218585
loss: 0.9803531169891357,grad_norm: 0.9496091418627431, iteration: 218586
loss: 1.0053802728652954,grad_norm: 0.9992775131211264, iteration: 218587
loss: 0.9743962287902832,grad_norm: 0.9650267203554294, iteration: 218588
loss: 1.0397489070892334,grad_norm: 0.9616754795982773, iteration: 218589
loss: 1.0366400480270386,grad_norm: 0.9999995322380435, iteration: 218590
loss: 1.0867750644683838,grad_norm: 0.9999992539772172, iteration: 218591
loss: 1.0090043544769287,grad_norm: 0.941580495589961, iteration: 218592
loss: 0.9723005294799805,grad_norm: 0.9999989816234789, iteration: 218593
loss: 1.0187760591506958,grad_norm: 0.8348436419899522, iteration: 218594
loss: 1.0193828344345093,grad_norm: 0.9164417016458836, iteration: 218595
loss: 0.9905359745025635,grad_norm: 0.9714633285565515, iteration: 218596
loss: 0.9978022575378418,grad_norm: 0.9067776943167224, iteration: 218597
loss: 1.0144015550613403,grad_norm: 0.9781277405828553, iteration: 218598
loss: 0.9996793866157532,grad_norm: 0.8455011463787011, iteration: 218599
loss: 0.987014651298523,grad_norm: 0.9908408169969162, iteration: 218600
loss: 0.9934583306312561,grad_norm: 0.9016193338154936, iteration: 218601
loss: 0.9980623126029968,grad_norm: 0.8518822708685263, iteration: 218602
loss: 0.9690783023834229,grad_norm: 0.8564115865546648, iteration: 218603
loss: 1.0065193176269531,grad_norm: 0.963431790019877, iteration: 218604
loss: 0.9989601373672485,grad_norm: 0.7870482283141998, iteration: 218605
loss: 0.9912403225898743,grad_norm: 0.9600649430246478, iteration: 218606
loss: 0.9774588346481323,grad_norm: 0.9999993226808497, iteration: 218607
loss: 0.951782763004303,grad_norm: 0.9999990713821283, iteration: 218608
loss: 1.0147883892059326,grad_norm: 0.9512401627353072, iteration: 218609
loss: 1.0052525997161865,grad_norm: 0.8242959021393571, iteration: 218610
loss: 1.015364170074463,grad_norm: 0.9778819884431252, iteration: 218611
loss: 0.9860467910766602,grad_norm: 0.8504449131081807, iteration: 218612
loss: 1.0093472003936768,grad_norm: 0.9999988995732895, iteration: 218613
loss: 1.0291385650634766,grad_norm: 0.8277702188662455, iteration: 218614
loss: 1.0028369426727295,grad_norm: 0.8988106405037144, iteration: 218615
loss: 1.0410118103027344,grad_norm: 0.9999996006396205, iteration: 218616
loss: 0.9983149766921997,grad_norm: 0.9999991588416279, iteration: 218617
loss: 1.0068589448928833,grad_norm: 0.8019416486464958, iteration: 218618
loss: 0.986774742603302,grad_norm: 0.9999990015762419, iteration: 218619
loss: 0.9847787618637085,grad_norm: 0.8739800056431761, iteration: 218620
loss: 0.9969651699066162,grad_norm: 0.8908708977392179, iteration: 218621
loss: 0.9842233657836914,grad_norm: 0.8175472217909968, iteration: 218622
loss: 0.9991979598999023,grad_norm: 0.9999991549212579, iteration: 218623
loss: 0.9848067760467529,grad_norm: 0.8294568943091399, iteration: 218624
loss: 0.9869338870048523,grad_norm: 0.8618081049720379, iteration: 218625
loss: 0.9618520140647888,grad_norm: 0.9404655814562699, iteration: 218626
loss: 1.0146009922027588,grad_norm: 0.9934340669976283, iteration: 218627
loss: 0.9691656827926636,grad_norm: 0.9999991668241576, iteration: 218628
loss: 1.0375698804855347,grad_norm: 0.8542629147178797, iteration: 218629
loss: 1.0464516878128052,grad_norm: 0.9999991337201065, iteration: 218630
loss: 0.9702173471450806,grad_norm: 0.9999990261851343, iteration: 218631
loss: 1.0010812282562256,grad_norm: 0.7917499796831566, iteration: 218632
loss: 0.9814664125442505,grad_norm: 0.9307525835266495, iteration: 218633
loss: 1.013810157775879,grad_norm: 0.754395190286361, iteration: 218634
loss: 1.0215941667556763,grad_norm: 0.8185540368284318, iteration: 218635
loss: 0.9863845705986023,grad_norm: 0.8623461004684001, iteration: 218636
loss: 0.9936423897743225,grad_norm: 0.9999991224012758, iteration: 218637
loss: 1.0299025774002075,grad_norm: 0.9550571022558457, iteration: 218638
loss: 1.0508352518081665,grad_norm: 0.9999991046307655, iteration: 218639
loss: 1.020111083984375,grad_norm: 0.7745006666017461, iteration: 218640
loss: 1.0170040130615234,grad_norm: 0.9999990970441861, iteration: 218641
loss: 1.0041968822479248,grad_norm: 0.9999991837190035, iteration: 218642
loss: 1.0406148433685303,grad_norm: 0.9999990633393367, iteration: 218643
loss: 1.0016919374465942,grad_norm: 0.9999991411752696, iteration: 218644
loss: 1.0575692653656006,grad_norm: 0.9574862924566152, iteration: 218645
loss: 0.9876047968864441,grad_norm: 0.9999992393731402, iteration: 218646
loss: 0.9461674690246582,grad_norm: 0.9917499257985223, iteration: 218647
loss: 0.9993063807487488,grad_norm: 0.9191070595480654, iteration: 218648
loss: 0.9812271595001221,grad_norm: 0.8117184772528259, iteration: 218649
loss: 1.0095378160476685,grad_norm: 0.9999990973227973, iteration: 218650
loss: 0.9716071486473083,grad_norm: 0.8484440518276722, iteration: 218651
loss: 0.9832273125648499,grad_norm: 0.8606422321799999, iteration: 218652
loss: 1.0106252431869507,grad_norm: 0.9999998513596529, iteration: 218653
loss: 0.9711335897445679,grad_norm: 0.876828127916388, iteration: 218654
loss: 1.004786729812622,grad_norm: 0.8875300796406821, iteration: 218655
loss: 1.0258046388626099,grad_norm: 0.9999990928116299, iteration: 218656
loss: 1.0114997625350952,grad_norm: 0.9705399360579433, iteration: 218657
loss: 0.9960517287254333,grad_norm: 0.9999992562879716, iteration: 218658
loss: 1.0109702348709106,grad_norm: 0.9999990133075083, iteration: 218659
loss: 1.0452394485473633,grad_norm: 0.9999998064716225, iteration: 218660
loss: 1.0287275314331055,grad_norm: 0.9999990189730834, iteration: 218661
loss: 0.9889695644378662,grad_norm: 0.9999989712635262, iteration: 218662
loss: 1.0479921102523804,grad_norm: 0.9999993893148059, iteration: 218663
loss: 1.0074654817581177,grad_norm: 0.8166734883049023, iteration: 218664
loss: 0.9854941368103027,grad_norm: 0.9049812592817463, iteration: 218665
loss: 1.0057824850082397,grad_norm: 0.8863719389890098, iteration: 218666
loss: 1.0094624757766724,grad_norm: 0.9728267692044207, iteration: 218667
loss: 1.0055731534957886,grad_norm: 0.90300149408535, iteration: 218668
loss: 1.0182329416275024,grad_norm: 0.9999991218138237, iteration: 218669
loss: 1.0099955797195435,grad_norm: 0.9997282462818483, iteration: 218670
loss: 1.0109843015670776,grad_norm: 0.9999990339767566, iteration: 218671
loss: 1.0125482082366943,grad_norm: 0.9006773272603129, iteration: 218672
loss: 0.9975040555000305,grad_norm: 0.8659835757432714, iteration: 218673
loss: 0.981681227684021,grad_norm: 0.8470861350435693, iteration: 218674
loss: 1.006919264793396,grad_norm: 0.9999990105385494, iteration: 218675
loss: 0.9849314093589783,grad_norm: 0.9999991378278157, iteration: 218676
loss: 0.9878575801849365,grad_norm: 0.9999991400793791, iteration: 218677
loss: 0.9878615736961365,grad_norm: 0.8060944909591571, iteration: 218678
loss: 1.0342826843261719,grad_norm: 0.9999991723093701, iteration: 218679
loss: 0.9633709192276001,grad_norm: 0.7792069513858869, iteration: 218680
loss: 1.0013465881347656,grad_norm: 0.9327961035107206, iteration: 218681
loss: 0.9959205985069275,grad_norm: 0.9571074437016581, iteration: 218682
loss: 1.0391696691513062,grad_norm: 0.9033032223581254, iteration: 218683
loss: 1.0009081363677979,grad_norm: 0.935475489790862, iteration: 218684
loss: 0.9883587956428528,grad_norm: 0.8525578205726329, iteration: 218685
loss: 0.9992486238479614,grad_norm: 0.7658248794145408, iteration: 218686
loss: 1.0092391967773438,grad_norm: 0.8757182754732266, iteration: 218687
loss: 0.9676243662834167,grad_norm: 0.9163450842397429, iteration: 218688
loss: 0.9922125935554504,grad_norm: 0.9832162494159228, iteration: 218689
loss: 0.9962247610092163,grad_norm: 0.9635552775426298, iteration: 218690
loss: 1.0079360008239746,grad_norm: 0.9999991375803097, iteration: 218691
loss: 1.0023136138916016,grad_norm: 0.9146880297061218, iteration: 218692
loss: 1.0230157375335693,grad_norm: 0.8933968614240335, iteration: 218693
loss: 1.0020920038223267,grad_norm: 0.8037822961061715, iteration: 218694
loss: 0.9767619967460632,grad_norm: 0.9906463078589969, iteration: 218695
loss: 0.9944859147071838,grad_norm: 0.9438337305836397, iteration: 218696
loss: 0.9869580268859863,grad_norm: 0.9954537929237202, iteration: 218697
loss: 1.0178476572036743,grad_norm: 0.9999991768482461, iteration: 218698
loss: 0.9909578561782837,grad_norm: 0.8944285067260365, iteration: 218699
loss: 1.055383324623108,grad_norm: 0.999999140444145, iteration: 218700
loss: 0.9545704126358032,grad_norm: 0.8987068213112875, iteration: 218701
loss: 1.002002239227295,grad_norm: 0.8025392885864878, iteration: 218702
loss: 1.0035113096237183,grad_norm: 0.9644782071482377, iteration: 218703
loss: 1.0236680507659912,grad_norm: 0.9999995400939885, iteration: 218704
loss: 0.9650585651397705,grad_norm: 0.912245749872156, iteration: 218705
loss: 1.0114284753799438,grad_norm: 0.8141122211236165, iteration: 218706
loss: 1.0053348541259766,grad_norm: 0.8363739821843084, iteration: 218707
loss: 0.9953309297561646,grad_norm: 0.9999990870148037, iteration: 218708
loss: 0.9768810272216797,grad_norm: 0.9506812828483894, iteration: 218709
loss: 0.987277626991272,grad_norm: 0.980822146498313, iteration: 218710
loss: 0.9867345690727234,grad_norm: 0.9238961893226987, iteration: 218711
loss: 0.9814386367797852,grad_norm: 0.9013346965872678, iteration: 218712
loss: 1.0436372756958008,grad_norm: 0.8769353607037957, iteration: 218713
loss: 0.966197669506073,grad_norm: 0.9737807710437023, iteration: 218714
loss: 1.0265916585922241,grad_norm: 0.8692562031736774, iteration: 218715
loss: 0.973271906375885,grad_norm: 0.799856208111942, iteration: 218716
loss: 1.0056241750717163,grad_norm: 0.9999991347122642, iteration: 218717
loss: 0.9867804050445557,grad_norm: 0.7697259093949534, iteration: 218718
loss: 0.9795846343040466,grad_norm: 0.8941775765238817, iteration: 218719
loss: 1.0142848491668701,grad_norm: 0.9797884249262906, iteration: 218720
loss: 0.9858941435813904,grad_norm: 0.9274134683393789, iteration: 218721
loss: 1.037109375,grad_norm: 0.9999989739594203, iteration: 218722
loss: 0.9726307392120361,grad_norm: 0.9075338187719473, iteration: 218723
loss: 0.9785112738609314,grad_norm: 0.994223232616293, iteration: 218724
loss: 1.021390676498413,grad_norm: 0.9999991985357666, iteration: 218725
loss: 0.9800112843513489,grad_norm: 0.9999990304817545, iteration: 218726
loss: 0.997990071773529,grad_norm: 0.9999990733906525, iteration: 218727
loss: 1.0001176595687866,grad_norm: 0.8900028190300541, iteration: 218728
loss: 0.9904133081436157,grad_norm: 0.8850660873523003, iteration: 218729
loss: 0.9927329421043396,grad_norm: 0.9034990093706495, iteration: 218730
loss: 0.9986093640327454,grad_norm: 0.8458614644043789, iteration: 218731
loss: 0.9823592901229858,grad_norm: 0.8967865395230611, iteration: 218732
loss: 0.9710705280303955,grad_norm: 0.8667771491325241, iteration: 218733
loss: 0.9771180152893066,grad_norm: 0.9999990103359945, iteration: 218734
loss: 1.042884349822998,grad_norm: 0.8017910251370746, iteration: 218735
loss: 0.9868203401565552,grad_norm: 0.819712027516006, iteration: 218736
loss: 0.9850285649299622,grad_norm: 0.9999992476233102, iteration: 218737
loss: 0.976469874382019,grad_norm: 0.967462519111041, iteration: 218738
loss: 0.9441542625427246,grad_norm: 0.8723182557562881, iteration: 218739
loss: 1.0457842350006104,grad_norm: 0.9828823225434951, iteration: 218740
loss: 1.0044888257980347,grad_norm: 0.9999990384532756, iteration: 218741
loss: 0.9830570220947266,grad_norm: 0.9058487848630047, iteration: 218742
loss: 1.00642991065979,grad_norm: 0.939257734350238, iteration: 218743
loss: 0.972021222114563,grad_norm: 0.9999991060605421, iteration: 218744
loss: 1.010018229484558,grad_norm: 0.9238819153672595, iteration: 218745
loss: 1.0523799657821655,grad_norm: 0.8947608790397119, iteration: 218746
loss: 0.9580689668655396,grad_norm: 0.9999989962595356, iteration: 218747
loss: 1.0007458925247192,grad_norm: 0.8640660600867841, iteration: 218748
loss: 0.9576695561408997,grad_norm: 0.9999991572477729, iteration: 218749
loss: 0.9822614789009094,grad_norm: 0.8775306512275274, iteration: 218750
loss: 0.989105761051178,grad_norm: 0.9253918158923212, iteration: 218751
loss: 0.996553897857666,grad_norm: 0.8195046824376679, iteration: 218752
loss: 0.976854681968689,grad_norm: 0.9999990634426643, iteration: 218753
loss: 0.9887393116950989,grad_norm: 0.999999211682181, iteration: 218754
loss: 0.9909349679946899,grad_norm: 0.8329587728133496, iteration: 218755
loss: 1.0090616941452026,grad_norm: 0.999999163131185, iteration: 218756
loss: 0.9692988991737366,grad_norm: 0.999999242281612, iteration: 218757
loss: 1.0321176052093506,grad_norm: 0.9792591850314991, iteration: 218758
loss: 0.9829688668251038,grad_norm: 0.8745699534728031, iteration: 218759
loss: 0.9767215251922607,grad_norm: 0.9354460660295966, iteration: 218760
loss: 0.9944983720779419,grad_norm: 0.9999992431078962, iteration: 218761
loss: 0.9836421608924866,grad_norm: 0.9999991552460201, iteration: 218762
loss: 0.9853772521018982,grad_norm: 0.7895051549020595, iteration: 218763
loss: 0.9433945417404175,grad_norm: 0.9261979749276738, iteration: 218764
loss: 1.0428234338760376,grad_norm: 0.9999995670700157, iteration: 218765
loss: 0.9633379578590393,grad_norm: 0.8519993342365326, iteration: 218766
loss: 0.9692510366439819,grad_norm: 0.9551269729146387, iteration: 218767
loss: 1.0451393127441406,grad_norm: 0.9999990424398341, iteration: 218768
loss: 1.0121064186096191,grad_norm: 0.9927658847299248, iteration: 218769
loss: 0.9681564569473267,grad_norm: 0.8737074560213335, iteration: 218770
loss: 1.0028680562973022,grad_norm: 0.931405014318338, iteration: 218771
loss: 1.021862506866455,grad_norm: 0.8315229771634687, iteration: 218772
loss: 1.0195540189743042,grad_norm: 0.883873232324234, iteration: 218773
loss: 1.0137243270874023,grad_norm: 0.9406586374583484, iteration: 218774
loss: 1.0150421857833862,grad_norm: 0.999999041411495, iteration: 218775
loss: 0.9723630547523499,grad_norm: 0.9258050541272035, iteration: 218776
loss: 1.0159289836883545,grad_norm: 0.9477718482702945, iteration: 218777
loss: 0.988448441028595,grad_norm: 0.9999991205167335, iteration: 218778
loss: 1.0130548477172852,grad_norm: 0.9999992092215971, iteration: 218779
loss: 1.0298938751220703,grad_norm: 0.9232904926418256, iteration: 218780
loss: 0.9777065515518188,grad_norm: 0.8730614724301229, iteration: 218781
loss: 1.0247273445129395,grad_norm: 0.9999991606267103, iteration: 218782
loss: 0.9913707375526428,grad_norm: 0.9108845368160575, iteration: 218783
loss: 1.0582091808319092,grad_norm: 0.9999998209973592, iteration: 218784
loss: 0.9641152620315552,grad_norm: 0.8966709805809083, iteration: 218785
loss: 1.010095238685608,grad_norm: 0.9805195807242615, iteration: 218786
loss: 0.9964413046836853,grad_norm: 0.9999998734526316, iteration: 218787
loss: 1.021957278251648,grad_norm: 0.999999106858386, iteration: 218788
loss: 0.9765300750732422,grad_norm: 0.9933450282601904, iteration: 218789
loss: 0.9971684217453003,grad_norm: 0.9999992100885982, iteration: 218790
loss: 0.9404895305633545,grad_norm: 0.9190403641544235, iteration: 218791
loss: 0.9767916202545166,grad_norm: 0.9778442452339688, iteration: 218792
loss: 1.0947227478027344,grad_norm: 0.9999993162112981, iteration: 218793
loss: 0.9718787670135498,grad_norm: 0.9999990516623374, iteration: 218794
loss: 1.0305131673812866,grad_norm: 0.9713684793731109, iteration: 218795
loss: 1.0042542219161987,grad_norm: 0.9156180282024633, iteration: 218796
loss: 0.9954702854156494,grad_norm: 0.9813611738531991, iteration: 218797
loss: 1.036002516746521,grad_norm: 0.8323526527902138, iteration: 218798
loss: 0.9948210120201111,grad_norm: 0.938695823414297, iteration: 218799
loss: 1.0007725954055786,grad_norm: 0.8871794494818176, iteration: 218800
loss: 1.0051140785217285,grad_norm: 0.8615841336419655, iteration: 218801
loss: 0.9911452531814575,grad_norm: 0.8055705582795156, iteration: 218802
loss: 0.9684931635856628,grad_norm: 0.9775838967911987, iteration: 218803
loss: 0.9989261627197266,grad_norm: 0.9999991301015347, iteration: 218804
loss: 1.01519775390625,grad_norm: 0.8791564340040845, iteration: 218805
loss: 0.9533126354217529,grad_norm: 0.9999990800330703, iteration: 218806
loss: 0.9645611047744751,grad_norm: 0.9999990716573295, iteration: 218807
loss: 1.0113914012908936,grad_norm: 0.9999993089566975, iteration: 218808
loss: 1.003321886062622,grad_norm: 0.7889695164073903, iteration: 218809
loss: 1.023726463317871,grad_norm: 1.0000000205118442, iteration: 218810
loss: 1.0153218507766724,grad_norm: 0.8995537080965822, iteration: 218811
loss: 0.9833552837371826,grad_norm: 0.9532747847170148, iteration: 218812
loss: 0.9925795793533325,grad_norm: 0.9999991382226762, iteration: 218813
loss: 1.0232399702072144,grad_norm: 0.9795583806660999, iteration: 218814
loss: 0.9897192120552063,grad_norm: 0.924705688097713, iteration: 218815
loss: 0.9776086211204529,grad_norm: 0.9999989950206855, iteration: 218816
loss: 0.9744046330451965,grad_norm: 0.999999136905948, iteration: 218817
loss: 0.9954898953437805,grad_norm: 0.8392254277151642, iteration: 218818
loss: 1.100020170211792,grad_norm: 0.9999999370103574, iteration: 218819
loss: 1.008522868156433,grad_norm: 0.9393313759328707, iteration: 218820
loss: 0.9940947890281677,grad_norm: 0.8235690830811152, iteration: 218821
loss: 0.9719099998474121,grad_norm: 0.9290945559285193, iteration: 218822
loss: 0.9641929268836975,grad_norm: 0.8529645319659623, iteration: 218823
loss: 1.0235317945480347,grad_norm: 0.9286594021500946, iteration: 218824
loss: 0.9986288547515869,grad_norm: 0.9991166598153808, iteration: 218825
loss: 0.9888545274734497,grad_norm: 0.8723780301283891, iteration: 218826
loss: 1.034148931503296,grad_norm: 0.9999997589019661, iteration: 218827
loss: 0.9667545557022095,grad_norm: 0.944010883414876, iteration: 218828
loss: 1.014377236366272,grad_norm: 0.9999990521459712, iteration: 218829
loss: 0.9939619302749634,grad_norm: 0.9866115997525422, iteration: 218830
loss: 0.9913446307182312,grad_norm: 0.8445382377211397, iteration: 218831
loss: 0.9666345119476318,grad_norm: 0.9999990830865231, iteration: 218832
loss: 0.9811720252037048,grad_norm: 0.9420122249876103, iteration: 218833
loss: 0.997441291809082,grad_norm: 0.977926932056632, iteration: 218834
loss: 1.0493704080581665,grad_norm: 0.9999992219524283, iteration: 218835
loss: 1.022191047668457,grad_norm: 0.999999053134068, iteration: 218836
loss: 1.0180556774139404,grad_norm: 0.9999992390295811, iteration: 218837
loss: 1.0168355703353882,grad_norm: 0.9178494465452368, iteration: 218838
loss: 1.0227606296539307,grad_norm: 0.958296793014577, iteration: 218839
loss: 0.9861829876899719,grad_norm: 0.9999991074347442, iteration: 218840
loss: 1.0080091953277588,grad_norm: 0.7825978530160753, iteration: 218841
loss: 0.9636641144752502,grad_norm: 0.9999991953532402, iteration: 218842
loss: 1.047096610069275,grad_norm: 0.9999991735171629, iteration: 218843
loss: 1.0175509452819824,grad_norm: 0.9481302318278588, iteration: 218844
loss: 0.9910673499107361,grad_norm: 0.8399939337000202, iteration: 218845
loss: 1.0082744359970093,grad_norm: 0.9618285783796859, iteration: 218846
loss: 0.999082624912262,grad_norm: 0.790435485219545, iteration: 218847
loss: 1.0107496976852417,grad_norm: 0.9571681539682421, iteration: 218848
loss: 1.0098437070846558,grad_norm: 0.9999990963196168, iteration: 218849
loss: 1.0085625648498535,grad_norm: 0.8422982297715802, iteration: 218850
loss: 1.0082316398620605,grad_norm: 0.9678664384488395, iteration: 218851
loss: 0.9713988900184631,grad_norm: 0.9777956267520405, iteration: 218852
loss: 1.0052380561828613,grad_norm: 0.9494573326849137, iteration: 218853
loss: 1.0203418731689453,grad_norm: 0.8571842734720638, iteration: 218854
loss: 0.9915165305137634,grad_norm: 0.898365222334359, iteration: 218855
loss: 0.9916530847549438,grad_norm: 0.9045062715939202, iteration: 218856
loss: 0.9941236972808838,grad_norm: 0.8609866201227344, iteration: 218857
loss: 0.9990464448928833,grad_norm: 0.9999990283641851, iteration: 218858
loss: 0.9984510540962219,grad_norm: 0.9715114296781003, iteration: 218859
loss: 1.0264090299606323,grad_norm: 0.8733346402424594, iteration: 218860
loss: 1.007116675376892,grad_norm: 0.8668359906562342, iteration: 218861
loss: 1.0206485986709595,grad_norm: 0.8286934721955884, iteration: 218862
loss: 1.0348652601242065,grad_norm: 0.9999998174998194, iteration: 218863
loss: 1.0097191333770752,grad_norm: 0.9999991949549337, iteration: 218864
loss: 1.0088531970977783,grad_norm: 0.8141575975981836, iteration: 218865
loss: 0.9744085669517517,grad_norm: 0.8371726313645076, iteration: 218866
loss: 0.9678568840026855,grad_norm: 0.7252937885998277, iteration: 218867
loss: 0.9716566205024719,grad_norm: 0.8341980814603986, iteration: 218868
loss: 0.9948974847793579,grad_norm: 0.9230567840471422, iteration: 218869
loss: 0.9873874187469482,grad_norm: 0.9999993601842467, iteration: 218870
loss: 1.0103001594543457,grad_norm: 0.7801054662727075, iteration: 218871
loss: 0.9614853858947754,grad_norm: 0.9432817691830218, iteration: 218872
loss: 0.994458019733429,grad_norm: 0.8171513982519261, iteration: 218873
loss: 1.0108569860458374,grad_norm: 0.9361054531329943, iteration: 218874
loss: 0.9409628510475159,grad_norm: 0.9183614932857903, iteration: 218875
loss: 0.9807224869728088,grad_norm: 0.8734403176658836, iteration: 218876
loss: 0.9861372113227844,grad_norm: 0.9999994441005017, iteration: 218877
loss: 1.0215470790863037,grad_norm: 0.9258347876057256, iteration: 218878
loss: 1.0082834959030151,grad_norm: 0.9550254291315028, iteration: 218879
loss: 0.9883090257644653,grad_norm: 0.8004977806422308, iteration: 218880
loss: 0.9884496331214905,grad_norm: 0.838203112074363, iteration: 218881
loss: 1.0350910425186157,grad_norm: 0.9843251701463646, iteration: 218882
loss: 1.0107535123825073,grad_norm: 0.8990348215523106, iteration: 218883
loss: 0.980739176273346,grad_norm: 0.9999991457424081, iteration: 218884
loss: 1.0232423543930054,grad_norm: 0.9328332593376514, iteration: 218885
loss: 1.0028777122497559,grad_norm: 0.9428319603714307, iteration: 218886
loss: 1.0241352319717407,grad_norm: 0.9999990569912002, iteration: 218887
loss: 0.9940210580825806,grad_norm: 0.9999990648297913, iteration: 218888
loss: 0.9711772203445435,grad_norm: 0.8734332803619522, iteration: 218889
loss: 0.9935335516929626,grad_norm: 0.8738136183736944, iteration: 218890
loss: 0.9530296921730042,grad_norm: 0.999998932765533, iteration: 218891
loss: 1.0357352495193481,grad_norm: 0.9163249469524418, iteration: 218892
loss: 1.0283554792404175,grad_norm: 0.8536086852682626, iteration: 218893
loss: 0.9785083532333374,grad_norm: 0.988967506050183, iteration: 218894
loss: 0.9863852858543396,grad_norm: 0.7841472066678995, iteration: 218895
loss: 1.0193969011306763,grad_norm: 0.7214293974331243, iteration: 218896
loss: 0.9943925738334656,grad_norm: 0.9462248722536377, iteration: 218897
loss: 0.9363314509391785,grad_norm: 0.8478669845152528, iteration: 218898
loss: 1.0246137380599976,grad_norm: 0.9999991619709397, iteration: 218899
loss: 1.0034700632095337,grad_norm: 0.9309049026182892, iteration: 218900
loss: 0.9842130541801453,grad_norm: 0.9698008425277018, iteration: 218901
loss: 1.0301513671875,grad_norm: 0.9999990424988213, iteration: 218902
loss: 1.077789306640625,grad_norm: 0.9999994928461284, iteration: 218903
loss: 0.9745368361473083,grad_norm: 0.9999992014589076, iteration: 218904
loss: 1.0460011959075928,grad_norm: 0.9999992025350813, iteration: 218905
loss: 0.9595552682876587,grad_norm: 0.9999990776968049, iteration: 218906
loss: 1.0057238340377808,grad_norm: 0.9356731347256487, iteration: 218907
loss: 0.9924793839454651,grad_norm: 0.8224301767164256, iteration: 218908
loss: 1.0065760612487793,grad_norm: 0.7631079464718092, iteration: 218909
loss: 0.9805585741996765,grad_norm: 0.9929794895234844, iteration: 218910
loss: 0.9961880445480347,grad_norm: 0.9248459033318235, iteration: 218911
loss: 1.018967866897583,grad_norm: 0.8828798694746702, iteration: 218912
loss: 0.9820844531059265,grad_norm: 0.968690185853825, iteration: 218913
loss: 1.0045852661132812,grad_norm: 0.9999993738014435, iteration: 218914
loss: 0.9770916700363159,grad_norm: 0.9334625890984029, iteration: 218915
loss: 0.9776938557624817,grad_norm: 0.9999991397435424, iteration: 218916
loss: 1.1170727014541626,grad_norm: 0.9999991322332201, iteration: 218917
loss: 1.0090419054031372,grad_norm: 0.9967231723186317, iteration: 218918
loss: 1.0281003713607788,grad_norm: 0.9999992624302665, iteration: 218919
loss: 0.9888978004455566,grad_norm: 0.9142124341639343, iteration: 218920
loss: 1.0038414001464844,grad_norm: 0.9999991348999193, iteration: 218921
loss: 0.9848302602767944,grad_norm: 0.9506818734016449, iteration: 218922
loss: 0.9812116622924805,grad_norm: 0.7966384059597423, iteration: 218923
loss: 1.1572967767715454,grad_norm: 0.9839947896494746, iteration: 218924
loss: 0.9971977472305298,grad_norm: 0.9573960878619091, iteration: 218925
loss: 0.9833729863166809,grad_norm: 0.9543338663150047, iteration: 218926
loss: 0.9847382307052612,grad_norm: 0.9605058866213266, iteration: 218927
loss: 1.029370903968811,grad_norm: 0.9999991548301616, iteration: 218928
loss: 1.0327296257019043,grad_norm: 0.9516427067192748, iteration: 218929
loss: 0.9890767931938171,grad_norm: 0.9999991275863591, iteration: 218930
loss: 1.0202974081039429,grad_norm: 0.9999991735278285, iteration: 218931
loss: 0.9903331995010376,grad_norm: 0.871418975654816, iteration: 218932
loss: 0.9934136271476746,grad_norm: 0.9933702655700503, iteration: 218933
loss: 0.9891480803489685,grad_norm: 0.9999995537054694, iteration: 218934
loss: 0.9900373816490173,grad_norm: 0.8171043173172389, iteration: 218935
loss: 0.969521701335907,grad_norm: 0.9232680561896581, iteration: 218936
loss: 1.0011262893676758,grad_norm: 0.8328637026041396, iteration: 218937
loss: 0.9923427700996399,grad_norm: 0.8331652665377847, iteration: 218938
loss: 0.9840209484100342,grad_norm: 0.7758561850078015, iteration: 218939
loss: 0.9884142875671387,grad_norm: 0.7986806131082447, iteration: 218940
loss: 1.0265592336654663,grad_norm: 0.999998944014806, iteration: 218941
loss: 0.9921068549156189,grad_norm: 0.9337794088615322, iteration: 218942
loss: 1.0376726388931274,grad_norm: 0.9999991832481251, iteration: 218943
loss: 0.9969391226768494,grad_norm: 0.8323647032834103, iteration: 218944
loss: 0.9703072309494019,grad_norm: 0.8997125997469182, iteration: 218945
loss: 0.9905682802200317,grad_norm: 0.999998976074521, iteration: 218946
loss: 0.9797067046165466,grad_norm: 0.999999126152005, iteration: 218947
loss: 1.0681378841400146,grad_norm: 0.9999993399464645, iteration: 218948
loss: 1.0060951709747314,grad_norm: 0.892645598723275, iteration: 218949
loss: 1.0248216390609741,grad_norm: 0.9233289401392689, iteration: 218950
loss: 0.9747087359428406,grad_norm: 0.9999992843663845, iteration: 218951
loss: 0.9454604387283325,grad_norm: 0.9148011125122834, iteration: 218952
loss: 1.0192338228225708,grad_norm: 0.999999159593773, iteration: 218953
loss: 0.9599227905273438,grad_norm: 0.9999991396400921, iteration: 218954
loss: 0.9527366161346436,grad_norm: 0.9059054115543053, iteration: 218955
loss: 1.0135904550552368,grad_norm: 0.9117469814925051, iteration: 218956
loss: 0.9956663846969604,grad_norm: 0.8728802272191352, iteration: 218957
loss: 0.9570964574813843,grad_norm: 0.9999989831870013, iteration: 218958
loss: 0.9923108220100403,grad_norm: 0.9999991235211826, iteration: 218959
loss: 1.0335649251937866,grad_norm: 0.9999991447396184, iteration: 218960
loss: 1.0822975635528564,grad_norm: 0.9999990173454106, iteration: 218961
loss: 1.0255745649337769,grad_norm: 0.8805802452966192, iteration: 218962
loss: 0.9930087327957153,grad_norm: 0.9068698210107612, iteration: 218963
loss: 1.0052752494812012,grad_norm: 0.9473714738311265, iteration: 218964
loss: 1.0070387125015259,grad_norm: 0.83433203623789, iteration: 218965
loss: 1.0136350393295288,grad_norm: 0.9999990714984341, iteration: 218966
loss: 0.9864782691001892,grad_norm: 0.784595615025486, iteration: 218967
loss: 1.0102652311325073,grad_norm: 0.9999991957381769, iteration: 218968
loss: 0.969882607460022,grad_norm: 0.9207175890279381, iteration: 218969
loss: 0.978036105632782,grad_norm: 0.9999989872661936, iteration: 218970
loss: 1.015194296836853,grad_norm: 0.9999991279223397, iteration: 218971
loss: 0.9952006340026855,grad_norm: 0.8620215144447684, iteration: 218972
loss: 0.993506669998169,grad_norm: 0.7667462516018345, iteration: 218973
loss: 0.971153736114502,grad_norm: 0.95642441165441, iteration: 218974
loss: 0.9756991863250732,grad_norm: 0.8261346356558072, iteration: 218975
loss: 1.021397352218628,grad_norm: 0.9448011292384161, iteration: 218976
loss: 0.9731781482696533,grad_norm: 0.9386046308268694, iteration: 218977
loss: 1.0097925662994385,grad_norm: 0.8292182909392459, iteration: 218978
loss: 1.0359177589416504,grad_norm: 0.8811539907029832, iteration: 218979
loss: 1.020436406135559,grad_norm: 0.9999997885429781, iteration: 218980
loss: 0.979293167591095,grad_norm: 0.9999992610103304, iteration: 218981
loss: 0.9910070896148682,grad_norm: 0.9999991186625918, iteration: 218982
loss: 0.9856054782867432,grad_norm: 0.880869507028459, iteration: 218983
loss: 1.016660451889038,grad_norm: 0.9653238471837268, iteration: 218984
loss: 1.0028527975082397,grad_norm: 0.9032292661355842, iteration: 218985
loss: 0.9726526141166687,grad_norm: 0.9999990027571622, iteration: 218986
loss: 1.0096838474273682,grad_norm: 0.923760878177452, iteration: 218987
loss: 0.9483055472373962,grad_norm: 0.9460160554353095, iteration: 218988
loss: 1.0379021167755127,grad_norm: 0.9999992711327267, iteration: 218989
loss: 1.0130834579467773,grad_norm: 0.9401158780754162, iteration: 218990
loss: 1.0000859498977661,grad_norm: 0.9999991826299278, iteration: 218991
loss: 1.0262304544448853,grad_norm: 0.937651227410508, iteration: 218992
loss: 0.9880741834640503,grad_norm: 0.7921132643875777, iteration: 218993
loss: 0.9861797094345093,grad_norm: 0.9999990774750552, iteration: 218994
loss: 0.9969392418861389,grad_norm: 0.9999990493256672, iteration: 218995
loss: 0.9991579055786133,grad_norm: 0.999999994445542, iteration: 218996
loss: 0.9801853895187378,grad_norm: 0.9999990716919164, iteration: 218997
loss: 1.029972791671753,grad_norm: 0.9999990784258049, iteration: 218998
loss: 0.9830803275108337,grad_norm: 0.8523653305337673, iteration: 218999
loss: 1.0093324184417725,grad_norm: 0.9999990310558798, iteration: 219000
loss: 1.014005422592163,grad_norm: 0.999998947817461, iteration: 219001
loss: 0.9858781695365906,grad_norm: 0.9047864290594015, iteration: 219002
loss: 1.0146894454956055,grad_norm: 0.935997961067649, iteration: 219003
loss: 0.9891514778137207,grad_norm: 0.7936381776605829, iteration: 219004
loss: 1.0338997840881348,grad_norm: 0.9999992721953086, iteration: 219005
loss: 0.9992250800132751,grad_norm: 0.8621220853929384, iteration: 219006
loss: 1.0220365524291992,grad_norm: 0.9999989417585519, iteration: 219007
loss: 1.0198262929916382,grad_norm: 0.9337361323887252, iteration: 219008
loss: 1.08369779586792,grad_norm: 0.9999999510881513, iteration: 219009
loss: 0.9825703501701355,grad_norm: 0.9999990522056282, iteration: 219010
loss: 0.9979082345962524,grad_norm: 0.9759898487393626, iteration: 219011
loss: 0.9800679683685303,grad_norm: 0.91926330803433, iteration: 219012
loss: 0.9923791885375977,grad_norm: 0.9999991528533201, iteration: 219013
loss: 0.9970501661300659,grad_norm: 0.9192896721539342, iteration: 219014
loss: 1.0082659721374512,grad_norm: 0.7905973007037539, iteration: 219015
loss: 0.9532826542854309,grad_norm: 0.9740503663662157, iteration: 219016
loss: 0.990108072757721,grad_norm: 0.716857045748852, iteration: 219017
loss: 0.9993891716003418,grad_norm: 0.9779369255900737, iteration: 219018
loss: 0.9591867327690125,grad_norm: 0.8315764670382325, iteration: 219019
loss: 0.9862667322158813,grad_norm: 0.8843174992372159, iteration: 219020
loss: 1.0493760108947754,grad_norm: 0.9965758317953588, iteration: 219021
loss: 1.0092436075210571,grad_norm: 0.9071332152656607, iteration: 219022
loss: 0.9836768507957458,grad_norm: 0.9424907356199184, iteration: 219023
loss: 1.0218920707702637,grad_norm: 0.9882693864615593, iteration: 219024
loss: 1.011380672454834,grad_norm: 0.9999990697840203, iteration: 219025
loss: 0.9798779487609863,grad_norm: 0.9605755062083894, iteration: 219026
loss: 1.0068230628967285,grad_norm: 0.9999990440526666, iteration: 219027
loss: 1.019127368927002,grad_norm: 0.964772642179064, iteration: 219028
loss: 1.0136001110076904,grad_norm: 0.8003306494989831, iteration: 219029
loss: 0.9639438390731812,grad_norm: 0.7520368826379153, iteration: 219030
loss: 1.0082932710647583,grad_norm: 0.8574767226482466, iteration: 219031
loss: 0.9839827418327332,grad_norm: 0.8815587105287641, iteration: 219032
loss: 0.9945759773254395,grad_norm: 0.8923424324104572, iteration: 219033
loss: 1.0107101202011108,grad_norm: 0.9999990287497476, iteration: 219034
loss: 0.9791117310523987,grad_norm: 0.8378646545356019, iteration: 219035
loss: 1.0006641149520874,grad_norm: 0.98767395135345, iteration: 219036
loss: 1.0410833358764648,grad_norm: 0.9507223748860129, iteration: 219037
loss: 0.9784354567527771,grad_norm: 0.9775085944396182, iteration: 219038
loss: 1.0076342821121216,grad_norm: 0.9999990752632422, iteration: 219039
loss: 0.9983369708061218,grad_norm: 0.9365999031109595, iteration: 219040
loss: 0.9599317908287048,grad_norm: 0.8022650468162743, iteration: 219041
loss: 1.0202666521072388,grad_norm: 0.9675090476777654, iteration: 219042
loss: 1.024886965751648,grad_norm: 0.8090991517278379, iteration: 219043
loss: 1.0091103315353394,grad_norm: 0.7924338101254729, iteration: 219044
loss: 1.0156012773513794,grad_norm: 0.9999990961139629, iteration: 219045
loss: 0.9916385412216187,grad_norm: 0.9438420191314459, iteration: 219046
loss: 0.9634828567504883,grad_norm: 0.9551294882943644, iteration: 219047
loss: 0.9995827674865723,grad_norm: 0.9462983157856615, iteration: 219048
loss: 1.0033701658248901,grad_norm: 0.999999345478319, iteration: 219049
loss: 1.0078449249267578,grad_norm: 0.852131255949072, iteration: 219050
loss: 0.9878526329994202,grad_norm: 0.9864668569473577, iteration: 219051
loss: 0.9921674132347107,grad_norm: 0.8092441335431672, iteration: 219052
loss: 0.961825966835022,grad_norm: 0.9999990464190044, iteration: 219053
loss: 1.007849931716919,grad_norm: 0.852611286714103, iteration: 219054
loss: 0.9857351183891296,grad_norm: 0.9999989759087766, iteration: 219055
loss: 1.031836748123169,grad_norm: 0.999999026145182, iteration: 219056
loss: 1.0040868520736694,grad_norm: 0.7877364321040667, iteration: 219057
loss: 0.9680527448654175,grad_norm: 0.999998894476574, iteration: 219058
loss: 0.9761207103729248,grad_norm: 0.9999991165760569, iteration: 219059
loss: 0.9574993252754211,grad_norm: 0.7739042471521098, iteration: 219060
loss: 0.9905703067779541,grad_norm: 0.9119460590335396, iteration: 219061
loss: 0.996999204158783,grad_norm: 0.9999991409148073, iteration: 219062
loss: 0.9890933036804199,grad_norm: 0.8941562211120323, iteration: 219063
loss: 0.9985389113426208,grad_norm: 0.6914733356838193, iteration: 219064
loss: 1.015845775604248,grad_norm: 0.9999991667788332, iteration: 219065
loss: 1.012459635734558,grad_norm: 0.7815587496321023, iteration: 219066
loss: 1.0456830263137817,grad_norm: 0.9999994004091953, iteration: 219067
loss: 0.9921383261680603,grad_norm: 0.9999991016899782, iteration: 219068
loss: 0.9976263046264648,grad_norm: 0.9097177697222694, iteration: 219069
loss: 1.0254658460617065,grad_norm: 0.9999992191468988, iteration: 219070
loss: 0.9849986433982849,grad_norm: 0.8641213022261375, iteration: 219071
loss: 0.9599454998970032,grad_norm: 0.9999991140512992, iteration: 219072
loss: 1.0117723941802979,grad_norm: 0.8774453684167142, iteration: 219073
loss: 0.9755253195762634,grad_norm: 0.8775738507759634, iteration: 219074
loss: 0.999929666519165,grad_norm: 0.9999989751707966, iteration: 219075
loss: 1.0046908855438232,grad_norm: 0.7725672374370344, iteration: 219076
loss: 1.014203429222107,grad_norm: 0.7886645346873834, iteration: 219077
loss: 0.9912655353546143,grad_norm: 0.7501786975638323, iteration: 219078
loss: 1.0034502744674683,grad_norm: 0.7783293690607641, iteration: 219079
loss: 0.9899560809135437,grad_norm: 0.8751122579040923, iteration: 219080
loss: 0.9667772650718689,grad_norm: 0.9195908464364113, iteration: 219081
loss: 0.9828760027885437,grad_norm: 0.9999990509224034, iteration: 219082
loss: 0.9829110503196716,grad_norm: 0.9999990549990658, iteration: 219083
loss: 1.038059949874878,grad_norm: 0.8218488182296534, iteration: 219084
loss: 0.9866024255752563,grad_norm: 0.960610323167026, iteration: 219085
loss: 0.9995082020759583,grad_norm: 0.9999992764195469, iteration: 219086
loss: 0.9884142875671387,grad_norm: 0.9999991819282481, iteration: 219087
loss: 1.0040104389190674,grad_norm: 0.7993681753606517, iteration: 219088
loss: 1.0012035369873047,grad_norm: 0.971951813572542, iteration: 219089
loss: 1.0491127967834473,grad_norm: 0.9999990763994423, iteration: 219090
loss: 0.983523964881897,grad_norm: 0.8049839850969277, iteration: 219091
loss: 1.0140278339385986,grad_norm: 0.9999991827111219, iteration: 219092
loss: 1.0444990396499634,grad_norm: 0.9999991783449269, iteration: 219093
loss: 1.0024125576019287,grad_norm: 0.7685981089462903, iteration: 219094
loss: 0.9879229068756104,grad_norm: 0.9379566194785647, iteration: 219095
loss: 1.0057951211929321,grad_norm: 0.8398829367924178, iteration: 219096
loss: 1.0270190238952637,grad_norm: 0.9999991040649645, iteration: 219097
loss: 0.9962634444236755,grad_norm: 0.9999993731680495, iteration: 219098
loss: 1.0360866785049438,grad_norm: 0.9999992120925291, iteration: 219099
loss: 0.9438444375991821,grad_norm: 0.9358708666704924, iteration: 219100
loss: 1.0142909288406372,grad_norm: 0.9531396969341054, iteration: 219101
loss: 0.9939013719558716,grad_norm: 0.9128838427038422, iteration: 219102
loss: 0.9929590821266174,grad_norm: 0.94207540734035, iteration: 219103
loss: 0.998082160949707,grad_norm: 0.9999992150520413, iteration: 219104
loss: 0.9753229022026062,grad_norm: 0.8878005735563647, iteration: 219105
loss: 1.0714960098266602,grad_norm: 0.9999995233797415, iteration: 219106
loss: 1.0310866832733154,grad_norm: 0.9857227519231039, iteration: 219107
loss: 0.9990646839141846,grad_norm: 0.9999992388540933, iteration: 219108
loss: 1.0073316097259521,grad_norm: 0.8470423082179994, iteration: 219109
loss: 0.9965002536773682,grad_norm: 0.7883991397001906, iteration: 219110
loss: 1.0169543027877808,grad_norm: 0.8345463125017591, iteration: 219111
loss: 1.0259432792663574,grad_norm: 0.9999995796939342, iteration: 219112
loss: 0.9629373550415039,grad_norm: 0.999999091767589, iteration: 219113
loss: 1.0249724388122559,grad_norm: 0.9999992253821431, iteration: 219114
loss: 0.9940232038497925,grad_norm: 0.999998928136369, iteration: 219115
loss: 1.018250584602356,grad_norm: 0.9494794753292403, iteration: 219116
loss: 1.0293331146240234,grad_norm: 0.9999999504786958, iteration: 219117
loss: 0.978288471698761,grad_norm: 0.9528627710109335, iteration: 219118
loss: 0.9985355138778687,grad_norm: 0.9598731013265156, iteration: 219119
loss: 0.9954838752746582,grad_norm: 0.9038712751139651, iteration: 219120
loss: 0.9965547919273376,grad_norm: 0.8900115642154532, iteration: 219121
loss: 1.051779866218567,grad_norm: 0.9157002455613096, iteration: 219122
loss: 0.9921496510505676,grad_norm: 0.9269980097500851, iteration: 219123
loss: 0.9891251921653748,grad_norm: 0.9999991013849311, iteration: 219124
loss: 1.0387053489685059,grad_norm: 0.875714531292024, iteration: 219125
loss: 0.9537078738212585,grad_norm: 0.9024385361026004, iteration: 219126
loss: 1.0325655937194824,grad_norm: 0.9999992737397807, iteration: 219127
loss: 0.987531304359436,grad_norm: 0.9814302443354093, iteration: 219128
loss: 1.0193895101547241,grad_norm: 0.9999993027184254, iteration: 219129
loss: 1.0389293432235718,grad_norm: 0.9999990731789885, iteration: 219130
loss: 1.0150598287582397,grad_norm: 0.8978656482923507, iteration: 219131
loss: 0.9645811319351196,grad_norm: 0.9113911703862878, iteration: 219132
loss: 0.9779772758483887,grad_norm: 0.8693890895857501, iteration: 219133
loss: 0.9822912216186523,grad_norm: 0.9229231488913971, iteration: 219134
loss: 0.9780578017234802,grad_norm: 0.950963288453205, iteration: 219135
loss: 0.9916588068008423,grad_norm: 0.997059793082414, iteration: 219136
loss: 1.018770694732666,grad_norm: 0.966367650616248, iteration: 219137
loss: 1.0220614671707153,grad_norm: 0.7841475389198624, iteration: 219138
loss: 1.050559163093567,grad_norm: 0.9999996471097005, iteration: 219139
loss: 0.9598475098609924,grad_norm: 0.9424917907723865, iteration: 219140
loss: 0.9585045576095581,grad_norm: 0.9999990801039614, iteration: 219141
loss: 1.0291485786437988,grad_norm: 0.9022385674701128, iteration: 219142
loss: 1.0025581121444702,grad_norm: 0.9999992371949536, iteration: 219143
loss: 1.0494556427001953,grad_norm: 0.999999235256141, iteration: 219144
loss: 0.975376546382904,grad_norm: 0.9890355583867866, iteration: 219145
loss: 0.9868521690368652,grad_norm: 0.99999900356986, iteration: 219146
loss: 1.0109180212020874,grad_norm: 0.8628082689187109, iteration: 219147
loss: 1.0080249309539795,grad_norm: 0.7899190007242916, iteration: 219148
loss: 0.9965308904647827,grad_norm: 0.9400563809901566, iteration: 219149
loss: 1.00811767578125,grad_norm: 0.9525712706416556, iteration: 219150
loss: 1.0057380199432373,grad_norm: 0.9517074573065624, iteration: 219151
loss: 0.9876547455787659,grad_norm: 0.9050472883988673, iteration: 219152
loss: 1.0121132135391235,grad_norm: 0.8730130393006393, iteration: 219153
loss: 1.0065147876739502,grad_norm: 0.9061482287672449, iteration: 219154
loss: 1.004462718963623,grad_norm: 0.9999992659551232, iteration: 219155
loss: 0.9929627180099487,grad_norm: 0.8497223266645678, iteration: 219156
loss: 0.9857912659645081,grad_norm: 0.9565330245894915, iteration: 219157
loss: 1.0107775926589966,grad_norm: 0.8766533019970558, iteration: 219158
loss: 0.9611258506774902,grad_norm: 0.9999989980427871, iteration: 219159
loss: 1.025598406791687,grad_norm: 0.9999994712132759, iteration: 219160
loss: 1.0171879529953003,grad_norm: 0.908199700221893, iteration: 219161
loss: 1.0035067796707153,grad_norm: 0.9999990886710558, iteration: 219162
loss: 0.9836081266403198,grad_norm: 0.9915215505566746, iteration: 219163
loss: 1.0229380130767822,grad_norm: 0.9999992679026046, iteration: 219164
loss: 1.0152289867401123,grad_norm: 0.9475768458906092, iteration: 219165
loss: 1.0050817728042603,grad_norm: 0.8803057627187072, iteration: 219166
loss: 1.0438395738601685,grad_norm: 0.9326171423854408, iteration: 219167
loss: 0.9934114813804626,grad_norm: 0.7757661184820512, iteration: 219168
loss: 1.0183205604553223,grad_norm: 0.8362429364872773, iteration: 219169
loss: 1.0049453973770142,grad_norm: 0.9999990166893771, iteration: 219170
loss: 1.0028600692749023,grad_norm: 0.7482781049789264, iteration: 219171
loss: 0.9797890782356262,grad_norm: 0.8771930725890671, iteration: 219172
loss: 1.040230631828308,grad_norm: 0.9722220207821651, iteration: 219173
loss: 0.99905925989151,grad_norm: 0.9999990972646436, iteration: 219174
loss: 0.9739385843276978,grad_norm: 0.931772057535672, iteration: 219175
loss: 1.0055094957351685,grad_norm: 0.9999992067266081, iteration: 219176
loss: 0.9937494993209839,grad_norm: 0.9819601290861969, iteration: 219177
loss: 0.9931805729866028,grad_norm: 0.8849396549829193, iteration: 219178
loss: 1.0471190214157104,grad_norm: 0.9042469917477631, iteration: 219179
loss: 1.043514609336853,grad_norm: 0.9999998901772097, iteration: 219180
loss: 1.0301004648208618,grad_norm: 0.8393543975811486, iteration: 219181
loss: 1.0901826620101929,grad_norm: 0.9999989993876264, iteration: 219182
loss: 0.9763622879981995,grad_norm: 0.8983432353805164, iteration: 219183
loss: 1.0219063758850098,grad_norm: 0.8612417886271527, iteration: 219184
loss: 0.9665236473083496,grad_norm: 0.9881455326252365, iteration: 219185
loss: 0.9553071856498718,grad_norm: 0.856853315158769, iteration: 219186
loss: 1.0045608282089233,grad_norm: 0.9725633435002954, iteration: 219187
loss: 1.026887059211731,grad_norm: 0.9824565637948587, iteration: 219188
loss: 1.016216516494751,grad_norm: 0.9999991016424503, iteration: 219189
loss: 1.1527758836746216,grad_norm: 0.9999991145598992, iteration: 219190
loss: 1.0216556787490845,grad_norm: 0.7880548006874746, iteration: 219191
loss: 1.006456732749939,grad_norm: 0.8949341238079731, iteration: 219192
loss: 1.0165271759033203,grad_norm: 0.899772382064409, iteration: 219193
loss: 0.9609729647636414,grad_norm: 0.9267870792431977, iteration: 219194
loss: 0.9874138236045837,grad_norm: 0.7016760209074906, iteration: 219195
loss: 0.952227771282196,grad_norm: 0.8392320213830464, iteration: 219196
loss: 1.0024336576461792,grad_norm: 0.9002156372550437, iteration: 219197
loss: 1.0065162181854248,grad_norm: 0.9059713842752889, iteration: 219198
loss: 0.9826577305793762,grad_norm: 0.8754491143337209, iteration: 219199
loss: 0.9995018839836121,grad_norm: 0.9745562731977606, iteration: 219200
loss: 0.9647676348686218,grad_norm: 0.8953052675430557, iteration: 219201
loss: 1.0408390760421753,grad_norm: 0.9433179529585999, iteration: 219202
loss: 0.9854934811592102,grad_norm: 0.9670898293125029, iteration: 219203
loss: 1.0122426748275757,grad_norm: 0.9309625600429459, iteration: 219204
loss: 1.0293054580688477,grad_norm: 0.9999991376000699, iteration: 219205
loss: 1.0346204042434692,grad_norm: 0.9753277676511137, iteration: 219206
loss: 1.0148096084594727,grad_norm: 0.9978148108098099, iteration: 219207
loss: 0.9934614300727844,grad_norm: 0.7947960903844253, iteration: 219208
loss: 1.0034056901931763,grad_norm: 0.9999991013173047, iteration: 219209
loss: 1.0010180473327637,grad_norm: 0.8956225558678426, iteration: 219210
loss: 1.0072824954986572,grad_norm: 0.80738150820569, iteration: 219211
loss: 0.955994188785553,grad_norm: 0.8236932133079945, iteration: 219212
loss: 0.9998780488967896,grad_norm: 0.8971639504475047, iteration: 219213
loss: 0.9850127100944519,grad_norm: 0.9903744674739419, iteration: 219214
loss: 0.9921690225601196,grad_norm: 0.7346158310164308, iteration: 219215
loss: 0.9726002216339111,grad_norm: 0.8563557370680397, iteration: 219216
loss: 1.0266200304031372,grad_norm: 0.9742734882342071, iteration: 219217
loss: 1.0442073345184326,grad_norm: 0.8042060335640051, iteration: 219218
loss: 1.0092878341674805,grad_norm: 0.762048750154795, iteration: 219219
loss: 1.002023696899414,grad_norm: 0.9999991534204742, iteration: 219220
loss: 1.009912133216858,grad_norm: 0.9099568274114282, iteration: 219221
loss: 1.016055703163147,grad_norm: 0.9999992468770007, iteration: 219222
loss: 1.0259158611297607,grad_norm: 0.9579931248871524, iteration: 219223
loss: 0.9922571778297424,grad_norm: 0.9999991497481495, iteration: 219224
loss: 0.9580032825469971,grad_norm: 0.963807593099671, iteration: 219225
loss: 1.0012321472167969,grad_norm: 0.9474029485543624, iteration: 219226
loss: 1.0006386041641235,grad_norm: 0.9254314273931185, iteration: 219227
loss: 1.0070760250091553,grad_norm: 0.8076898132983481, iteration: 219228
loss: 0.999764084815979,grad_norm: 0.84614985879277, iteration: 219229
loss: 1.023269772529602,grad_norm: 0.9999990949795114, iteration: 219230
loss: 0.9972094297409058,grad_norm: 0.9999992414143648, iteration: 219231
loss: 1.0228290557861328,grad_norm: 0.9992417904867713, iteration: 219232
loss: 0.9856582283973694,grad_norm: 0.8910442398794324, iteration: 219233
loss: 1.0117790699005127,grad_norm: 0.8363739908330896, iteration: 219234
loss: 0.9652228355407715,grad_norm: 0.9333767472525089, iteration: 219235
loss: 0.9560834765434265,grad_norm: 0.9999991561994052, iteration: 219236
loss: 0.9877095818519592,grad_norm: 0.8576989457122274, iteration: 219237
loss: 0.9924861192703247,grad_norm: 0.9115694008630082, iteration: 219238
loss: 0.9816970229148865,grad_norm: 0.9099237973404507, iteration: 219239
loss: 0.9836093187332153,grad_norm: 0.972884971675052, iteration: 219240
loss: 0.9901053309440613,grad_norm: 0.9588216900673965, iteration: 219241
loss: 1.0009396076202393,grad_norm: 0.9488281982239313, iteration: 219242
loss: 0.9909746646881104,grad_norm: 0.742598099383265, iteration: 219243
loss: 0.9704459309577942,grad_norm: 0.8084425887908482, iteration: 219244
loss: 0.9888364672660828,grad_norm: 0.9999990159809617, iteration: 219245
loss: 1.0026260614395142,grad_norm: 0.811733284993637, iteration: 219246
loss: 0.9933669567108154,grad_norm: 0.8212980037983917, iteration: 219247
loss: 1.0267298221588135,grad_norm: 0.9278184930396466, iteration: 219248
loss: 0.9834520816802979,grad_norm: 0.9677714719345686, iteration: 219249
loss: 1.0070186853408813,grad_norm: 0.9648503707911583, iteration: 219250
loss: 0.9695689082145691,grad_norm: 0.9999991442812076, iteration: 219251
loss: 0.9893585443496704,grad_norm: 0.9579923770215606, iteration: 219252
loss: 0.9938105940818787,grad_norm: 0.886890512090989, iteration: 219253
loss: 0.9923660159111023,grad_norm: 0.812382295856313, iteration: 219254
loss: 0.9946873188018799,grad_norm: 0.8793823707128965, iteration: 219255
loss: 1.0047072172164917,grad_norm: 0.9999992642425782, iteration: 219256
loss: 0.9790257215499878,grad_norm: 0.9999990918844899, iteration: 219257
loss: 0.9700952768325806,grad_norm: 0.8679990833308966, iteration: 219258
loss: 1.0077540874481201,grad_norm: 0.9644091081848495, iteration: 219259
loss: 1.0319147109985352,grad_norm: 0.9833646322705328, iteration: 219260
loss: 1.002726435661316,grad_norm: 0.9999991103468894, iteration: 219261
loss: 1.0178937911987305,grad_norm: 0.9999990076395131, iteration: 219262
loss: 0.9937719106674194,grad_norm: 0.7594428095773448, iteration: 219263
loss: 1.0102643966674805,grad_norm: 0.8844555883502417, iteration: 219264
loss: 0.9883803725242615,grad_norm: 0.8999313396504104, iteration: 219265
loss: 1.034820556640625,grad_norm: 0.7973616066731926, iteration: 219266
loss: 0.9796777367591858,grad_norm: 0.9028977693129658, iteration: 219267
loss: 0.9455024600028992,grad_norm: 0.901922266825133, iteration: 219268
loss: 1.0068550109863281,grad_norm: 0.9999991041075375, iteration: 219269
loss: 1.0104658603668213,grad_norm: 0.7584369335928601, iteration: 219270
loss: 1.0036054849624634,grad_norm: 0.9992530224496244, iteration: 219271
loss: 0.9986644983291626,grad_norm: 0.8108633490130743, iteration: 219272
loss: 0.9763169288635254,grad_norm: 0.8106107068932649, iteration: 219273
loss: 0.9847856760025024,grad_norm: 0.9999991009186678, iteration: 219274
loss: 1.0218820571899414,grad_norm: 0.9171863453097165, iteration: 219275
loss: 1.0243263244628906,grad_norm: 0.908685309055176, iteration: 219276
loss: 0.9994591474533081,grad_norm: 0.869844886274958, iteration: 219277
loss: 0.9780038595199585,grad_norm: 0.9999991733635067, iteration: 219278
loss: 0.9946538805961609,grad_norm: 0.9999991041714217, iteration: 219279
loss: 1.005070447921753,grad_norm: 0.9721237139032174, iteration: 219280
loss: 1.0095800161361694,grad_norm: 0.7399035841315368, iteration: 219281
loss: 1.029281735420227,grad_norm: 0.8949170139329578, iteration: 219282
loss: 1.0317238569259644,grad_norm: 0.9999990991198796, iteration: 219283
loss: 0.9772405624389648,grad_norm: 0.8034542434613768, iteration: 219284
loss: 0.9555408358573914,grad_norm: 0.7983185227860125, iteration: 219285
loss: 1.0052341222763062,grad_norm: 0.8475938843597858, iteration: 219286
loss: 0.9947423934936523,grad_norm: 0.9235542800555921, iteration: 219287
loss: 1.0349868535995483,grad_norm: 0.9781368740974538, iteration: 219288
loss: 1.0198055505752563,grad_norm: 0.941311171816781, iteration: 219289
loss: 0.9717751741409302,grad_norm: 0.9999990957959108, iteration: 219290
loss: 1.0083945989608765,grad_norm: 0.8866609419535699, iteration: 219291
loss: 1.0057142972946167,grad_norm: 0.9936276825922626, iteration: 219292
loss: 0.9983487725257874,grad_norm: 0.999998962937491, iteration: 219293
loss: 0.9775328040122986,grad_norm: 0.8210996149779072, iteration: 219294
loss: 1.0164172649383545,grad_norm: 0.9691516707471002, iteration: 219295
loss: 1.0108672380447388,grad_norm: 0.9999992009848152, iteration: 219296
loss: 0.9994422793388367,grad_norm: 0.9025296341621423, iteration: 219297
loss: 1.0168124437332153,grad_norm: 0.9301334460868854, iteration: 219298
loss: 1.0164812803268433,grad_norm: 0.9999990012192543, iteration: 219299
loss: 0.997605562210083,grad_norm: 0.8414172617235641, iteration: 219300
loss: 0.9956248998641968,grad_norm: 0.9999991916041628, iteration: 219301
loss: 0.9864341020584106,grad_norm: 0.8703062322740632, iteration: 219302
loss: 0.9631879925727844,grad_norm: 0.7898289059608201, iteration: 219303
loss: 1.0779699087142944,grad_norm: 0.9932397845920904, iteration: 219304
loss: 0.9937036037445068,grad_norm: 0.83779415450396, iteration: 219305
loss: 0.9778605103492737,grad_norm: 0.9494435244677573, iteration: 219306
loss: 0.9681679606437683,grad_norm: 0.8465463452375823, iteration: 219307
loss: 1.0014023780822754,grad_norm: 0.9999991927424146, iteration: 219308
loss: 0.9918904304504395,grad_norm: 0.9013311404629764, iteration: 219309
loss: 0.9564680457115173,grad_norm: 0.9999992264376851, iteration: 219310
loss: 0.9905874133110046,grad_norm: 0.8447039226450508, iteration: 219311
loss: 1.0083866119384766,grad_norm: 0.999998969044595, iteration: 219312
loss: 1.005614161491394,grad_norm: 0.9999990213155204, iteration: 219313
loss: 1.0194486379623413,grad_norm: 0.9999990018081832, iteration: 219314
loss: 0.998944878578186,grad_norm: 0.9999994708125232, iteration: 219315
loss: 0.977534830570221,grad_norm: 0.901706235730483, iteration: 219316
loss: 1.0058003664016724,grad_norm: 0.9470052958567468, iteration: 219317
loss: 1.0025197267532349,grad_norm: 0.9299635806146717, iteration: 219318
loss: 0.9821045398712158,grad_norm: 0.9999991493184229, iteration: 219319
loss: 0.950032114982605,grad_norm: 0.9855144124917488, iteration: 219320
loss: 1.020275592803955,grad_norm: 0.9999990034115979, iteration: 219321
loss: 0.986551821231842,grad_norm: 0.999999159612398, iteration: 219322
loss: 1.0340023040771484,grad_norm: 0.9999991690709212, iteration: 219323
loss: 1.032967448234558,grad_norm: 0.9136456992124741, iteration: 219324
loss: 0.9737961292266846,grad_norm: 0.9169031858979783, iteration: 219325
loss: 1.0188186168670654,grad_norm: 0.9999996577879767, iteration: 219326
loss: 1.0160300731658936,grad_norm: 0.9999990274973446, iteration: 219327
loss: 0.999950110912323,grad_norm: 0.9628497636963274, iteration: 219328
loss: 1.007248044013977,grad_norm: 0.7444461328345315, iteration: 219329
loss: 1.0017523765563965,grad_norm: 0.9308458623641152, iteration: 219330
loss: 1.0179861783981323,grad_norm: 0.752361966104506, iteration: 219331
loss: 0.9727705121040344,grad_norm: 0.9588716800005443, iteration: 219332
loss: 1.0175533294677734,grad_norm: 0.8204344432083625, iteration: 219333
loss: 0.9834425449371338,grad_norm: 0.8259763895726466, iteration: 219334
loss: 1.0128623247146606,grad_norm: 0.9589807375907917, iteration: 219335
loss: 1.039536476135254,grad_norm: 0.999999767860938, iteration: 219336
loss: 1.0252078771591187,grad_norm: 0.9668318261160626, iteration: 219337
loss: 1.1074336767196655,grad_norm: 0.9999992820093367, iteration: 219338
loss: 1.0079396963119507,grad_norm: 0.8915078440786846, iteration: 219339
loss: 0.9953609108924866,grad_norm: 0.9120620948373649, iteration: 219340
loss: 0.9849223494529724,grad_norm: 0.9999990605537568, iteration: 219341
loss: 0.9997796416282654,grad_norm: 0.9264364944205701, iteration: 219342
loss: 0.9953240156173706,grad_norm: 0.8600530466544789, iteration: 219343
loss: 1.0134130716323853,grad_norm: 0.9082917942817262, iteration: 219344
loss: 0.9623185992240906,grad_norm: 0.9999991456725148, iteration: 219345
loss: 1.0014756917953491,grad_norm: 0.9999992344438707, iteration: 219346
loss: 1.0098811388015747,grad_norm: 0.9999990684774486, iteration: 219347
loss: 1.014704704284668,grad_norm: 0.8004673752132698, iteration: 219348
loss: 0.9706478714942932,grad_norm: 0.8830404827296361, iteration: 219349
loss: 0.9955546855926514,grad_norm: 0.8740534147093262, iteration: 219350
loss: 0.9800079464912415,grad_norm: 0.9503570990383616, iteration: 219351
loss: 1.0193235874176025,grad_norm: 0.99999910134721, iteration: 219352
loss: 0.9502653479576111,grad_norm: 0.9999992281543248, iteration: 219353
loss: 0.9964250922203064,grad_norm: 0.9999990393583494, iteration: 219354
loss: 1.0494755506515503,grad_norm: 0.9999995628373072, iteration: 219355
loss: 0.9902747273445129,grad_norm: 0.9999990113376267, iteration: 219356
loss: 0.9448668956756592,grad_norm: 0.9999990483875113, iteration: 219357
loss: 0.9720607995986938,grad_norm: 0.9999990740861876, iteration: 219358
loss: 1.0147703886032104,grad_norm: 0.9080941316118352, iteration: 219359
loss: 0.9710437655448914,grad_norm: 0.9668961486844623, iteration: 219360
loss: 1.028982162475586,grad_norm: 0.9806524123151515, iteration: 219361
loss: 0.9972901940345764,grad_norm: 0.9807465927053577, iteration: 219362
loss: 1.042643666267395,grad_norm: 0.99999926467163, iteration: 219363
loss: 0.997372567653656,grad_norm: 0.9999990799961962, iteration: 219364
loss: 1.0108516216278076,grad_norm: 0.8515407597569676, iteration: 219365
loss: 0.9948655962944031,grad_norm: 0.8597351015490177, iteration: 219366
loss: 0.991483747959137,grad_norm: 0.8942917720394019, iteration: 219367
loss: 0.9501981735229492,grad_norm: 0.8256486771387155, iteration: 219368
loss: 1.0230356454849243,grad_norm: 0.9484127948022811, iteration: 219369
loss: 0.9745781421661377,grad_norm: 0.9999992110210728, iteration: 219370
loss: 0.9889172911643982,grad_norm: 0.9999990787587179, iteration: 219371
loss: 0.9843888878822327,grad_norm: 0.9999990759846361, iteration: 219372
loss: 0.955296516418457,grad_norm: 0.9502314035812862, iteration: 219373
loss: 1.0185645818710327,grad_norm: 0.876678008163912, iteration: 219374
loss: 0.9741467237472534,grad_norm: 0.8980968590370425, iteration: 219375
loss: 0.9737955331802368,grad_norm: 0.9811690560524547, iteration: 219376
loss: 1.0472043752670288,grad_norm: 0.689699155055134, iteration: 219377
loss: 1.0054607391357422,grad_norm: 0.9996539464322285, iteration: 219378
loss: 1.0323786735534668,grad_norm: 0.9751332336276405, iteration: 219379
loss: 1.0056047439575195,grad_norm: 0.9999992008619214, iteration: 219380
loss: 1.032618761062622,grad_norm: 0.9999991668629684, iteration: 219381
loss: 0.9755769968032837,grad_norm: 0.9999990801979302, iteration: 219382
loss: 1.0197253227233887,grad_norm: 0.9999992377137215, iteration: 219383
loss: 0.9723563194274902,grad_norm: 0.8904769933542825, iteration: 219384
loss: 1.0141303539276123,grad_norm: 0.9999990976806742, iteration: 219385
loss: 1.0096317529678345,grad_norm: 0.9629735613316046, iteration: 219386
loss: 1.0177768468856812,grad_norm: 0.8865384820366992, iteration: 219387
loss: 1.0027586221694946,grad_norm: 0.8926710925835395, iteration: 219388
loss: 1.03410005569458,grad_norm: 0.9825369178523451, iteration: 219389
loss: 0.9921544790267944,grad_norm: 0.8535018175999363, iteration: 219390
loss: 1.0076380968093872,grad_norm: 0.7976014533686463, iteration: 219391
loss: 1.0754040479660034,grad_norm: 0.9999998574216169, iteration: 219392
loss: 1.0036152601242065,grad_norm: 0.8237601443457111, iteration: 219393
loss: 0.982694685459137,grad_norm: 0.8817001683959422, iteration: 219394
loss: 0.9880568981170654,grad_norm: 0.8053424880459786, iteration: 219395
loss: 0.9623029828071594,grad_norm: 0.9379750417685234, iteration: 219396
loss: 0.982754111289978,grad_norm: 0.9999991641274377, iteration: 219397
loss: 1.0022555589675903,grad_norm: 0.9999990959051624, iteration: 219398
loss: 0.9915773272514343,grad_norm: 0.9182078276126748, iteration: 219399
loss: 0.9903033971786499,grad_norm: 0.9953644926559122, iteration: 219400
loss: 0.9849849343299866,grad_norm: 0.8753165138075053, iteration: 219401
loss: 1.0102757215499878,grad_norm: 0.9073687292099492, iteration: 219402
loss: 1.0164790153503418,grad_norm: 0.8196257859018959, iteration: 219403
loss: 0.9988548159599304,grad_norm: 0.96845778736753, iteration: 219404
loss: 1.0111581087112427,grad_norm: 0.7748304351671284, iteration: 219405
loss: 0.9890763163566589,grad_norm: 0.9999992248988714, iteration: 219406
loss: 1.0054643154144287,grad_norm: 0.9999990493069382, iteration: 219407
loss: 1.0211652517318726,grad_norm: 0.9999991921232912, iteration: 219408
loss: 0.9456775188446045,grad_norm: 0.8792363187903212, iteration: 219409
loss: 0.9872816801071167,grad_norm: 0.9522166660792032, iteration: 219410
loss: 0.9997965693473816,grad_norm: 0.8628452759736092, iteration: 219411
loss: 0.9646435976028442,grad_norm: 0.99999913340902, iteration: 219412
loss: 0.9802908301353455,grad_norm: 0.7948039363030196, iteration: 219413
loss: 0.9769441485404968,grad_norm: 0.8160675961046616, iteration: 219414
loss: 0.9843406081199646,grad_norm: 0.9334932975743108, iteration: 219415
loss: 0.983328104019165,grad_norm: 0.9877512093857961, iteration: 219416
loss: 0.9883944392204285,grad_norm: 0.8972748555772567, iteration: 219417
loss: 1.0416837930679321,grad_norm: 0.9999993143854345, iteration: 219418
loss: 0.9772168397903442,grad_norm: 0.9362362026030021, iteration: 219419
loss: 0.9715644717216492,grad_norm: 0.9999991644157031, iteration: 219420
loss: 0.9504188299179077,grad_norm: 0.9276735209654058, iteration: 219421
loss: 1.00444495677948,grad_norm: 0.8941797758392794, iteration: 219422
loss: 1.0057048797607422,grad_norm: 0.9944449693834654, iteration: 219423
loss: 1.0171904563903809,grad_norm: 0.9559229838456652, iteration: 219424
loss: 0.9684274792671204,grad_norm: 0.9160102076129435, iteration: 219425
loss: 0.9642316699028015,grad_norm: 0.8916492844960205, iteration: 219426
loss: 1.0095325708389282,grad_norm: 0.8367423128156698, iteration: 219427
loss: 0.9799804091453552,grad_norm: 0.8865154566748675, iteration: 219428
loss: 0.9997839331626892,grad_norm: 0.8717383277522013, iteration: 219429
loss: 1.0363504886627197,grad_norm: 0.9999992195949324, iteration: 219430
loss: 0.9859060049057007,grad_norm: 0.9999991933283204, iteration: 219431
loss: 1.0177358388900757,grad_norm: 0.9225885752189438, iteration: 219432
loss: 0.9710421562194824,grad_norm: 0.8311609486908199, iteration: 219433
loss: 0.9815841913223267,grad_norm: 0.8608074485273269, iteration: 219434
loss: 1.0261293649673462,grad_norm: 0.9964560114430107, iteration: 219435
loss: 0.9630441069602966,grad_norm: 0.9836255943820884, iteration: 219436
loss: 0.9969666600227356,grad_norm: 0.9927730256053806, iteration: 219437
loss: 1.0175617933273315,grad_norm: 0.9999991245544886, iteration: 219438
loss: 0.9946122765541077,grad_norm: 0.9352787592340776, iteration: 219439
loss: 0.9994718432426453,grad_norm: 0.8547142516717889, iteration: 219440
loss: 1.009543776512146,grad_norm: 0.9034761886609689, iteration: 219441
loss: 1.0280729532241821,grad_norm: 0.9999990124838829, iteration: 219442
loss: 0.9710131883621216,grad_norm: 0.929370531429862, iteration: 219443
loss: 1.0050991773605347,grad_norm: 0.9995342412234665, iteration: 219444
loss: 1.0059770345687866,grad_norm: 0.9679451421973446, iteration: 219445
loss: 1.0113615989685059,grad_norm: 0.9999990650995871, iteration: 219446
loss: 0.9819437861442566,grad_norm: 0.8829804179737314, iteration: 219447
loss: 1.0054075717926025,grad_norm: 0.6979525747160633, iteration: 219448
loss: 1.0034221410751343,grad_norm: 0.9999991594681785, iteration: 219449
loss: 1.0291547775268555,grad_norm: 0.8417195099246881, iteration: 219450
loss: 1.0331695079803467,grad_norm: 0.9999995204989245, iteration: 219451
loss: 1.0475457906723022,grad_norm: 0.999999119963411, iteration: 219452
loss: 0.9854260683059692,grad_norm: 0.8831133529627934, iteration: 219453
loss: 0.9966376423835754,grad_norm: 0.9999992177997094, iteration: 219454
loss: 0.9549136161804199,grad_norm: 0.8511524814909612, iteration: 219455
loss: 0.9483310580253601,grad_norm: 0.9999992160097034, iteration: 219456
loss: 0.9874921441078186,grad_norm: 0.9673587200156318, iteration: 219457
loss: 1.0361486673355103,grad_norm: 0.999999089454158, iteration: 219458
loss: 1.0221405029296875,grad_norm: 0.8453795419452925, iteration: 219459
loss: 0.9906697869300842,grad_norm: 0.9945488619735158, iteration: 219460
loss: 1.0333596467971802,grad_norm: 0.9841519939715241, iteration: 219461
loss: 1.0089325904846191,grad_norm: 0.9619323110171304, iteration: 219462
loss: 0.9864130616188049,grad_norm: 0.9999990319984176, iteration: 219463
loss: 1.0136250257492065,grad_norm: 0.9924891266809092, iteration: 219464
loss: 0.9955610632896423,grad_norm: 0.9999990550410875, iteration: 219465
loss: 0.98362797498703,grad_norm: 0.9999991255787822, iteration: 219466
loss: 0.9945600628852844,grad_norm: 0.7916179827225014, iteration: 219467
loss: 1.0142065286636353,grad_norm: 0.9999991442701351, iteration: 219468
loss: 1.0089828968048096,grad_norm: 0.8437439289532092, iteration: 219469
loss: 1.001783847808838,grad_norm: 0.8588019548344857, iteration: 219470
loss: 1.0105060338974,grad_norm: 0.9999991065729784, iteration: 219471
loss: 0.9741380214691162,grad_norm: 0.9881547697626393, iteration: 219472
loss: 1.0371829271316528,grad_norm: 0.9230020078085812, iteration: 219473
loss: 1.001469373703003,grad_norm: 0.8978401708160841, iteration: 219474
loss: 1.0429719686508179,grad_norm: 0.8153277154375784, iteration: 219475
loss: 0.9838331341743469,grad_norm: 0.8431859646211695, iteration: 219476
loss: 1.02987802028656,grad_norm: 0.9913007369423766, iteration: 219477
loss: 1.0006837844848633,grad_norm: 0.7777399627176307, iteration: 219478
loss: 1.0079379081726074,grad_norm: 0.9999990836987601, iteration: 219479
loss: 0.9766008257865906,grad_norm: 0.9999990837190357, iteration: 219480
loss: 0.9963972568511963,grad_norm: 0.8391870093202501, iteration: 219481
loss: 0.9985785484313965,grad_norm: 0.9999992500874909, iteration: 219482
loss: 0.9868662357330322,grad_norm: 0.7946612337148274, iteration: 219483
loss: 0.982351541519165,grad_norm: 0.9999994768651345, iteration: 219484
loss: 0.9973165988922119,grad_norm: 0.8412909467908083, iteration: 219485
loss: 1.00458824634552,grad_norm: 0.9999991308482638, iteration: 219486
loss: 1.011880874633789,grad_norm: 0.9894545901550573, iteration: 219487
loss: 1.0238925218582153,grad_norm: 0.9984291966426209, iteration: 219488
loss: 0.9945354461669922,grad_norm: 0.8610448499979095, iteration: 219489
loss: 0.9979228973388672,grad_norm: 0.8880160349932686, iteration: 219490
loss: 1.020688772201538,grad_norm: 0.8946902821358135, iteration: 219491
loss: 1.2750792503356934,grad_norm: 0.9999992169850027, iteration: 219492
loss: 1.030402660369873,grad_norm: 0.9999991357583028, iteration: 219493
loss: 1.0465445518493652,grad_norm: 0.7243903757193179, iteration: 219494
loss: 0.9935436844825745,grad_norm: 0.9243954381719802, iteration: 219495
loss: 1.0307406187057495,grad_norm: 0.9999995078410959, iteration: 219496
loss: 1.0264639854431152,grad_norm: 0.978321453438356, iteration: 219497
loss: 1.0240356922149658,grad_norm: 0.9829746813733697, iteration: 219498
loss: 1.026293158531189,grad_norm: 0.851040866582738, iteration: 219499
loss: 0.9860928058624268,grad_norm: 0.9999992626923515, iteration: 219500
loss: 0.9794645309448242,grad_norm: 0.9448487581676703, iteration: 219501
loss: 0.9672930240631104,grad_norm: 0.9577028028348789, iteration: 219502
loss: 0.9751223921775818,grad_norm: 0.960337963235343, iteration: 219503
loss: 0.9777217507362366,grad_norm: 0.9999991860583763, iteration: 219504
loss: 1.018235445022583,grad_norm: 0.8141535774219725, iteration: 219505
loss: 1.0146048069000244,grad_norm: 0.9999990149693937, iteration: 219506
loss: 1.0255533456802368,grad_norm: 0.9999991040171416, iteration: 219507
loss: 1.0456922054290771,grad_norm: 0.9999992440902371, iteration: 219508
loss: 0.9852200150489807,grad_norm: 0.9978510873136853, iteration: 219509
loss: 0.993963360786438,grad_norm: 0.9999992390715654, iteration: 219510
loss: 1.0231502056121826,grad_norm: 0.8976933286674138, iteration: 219511
loss: 1.0075441598892212,grad_norm: 0.9189976169918245, iteration: 219512
loss: 1.0192091464996338,grad_norm: 0.9999989066808965, iteration: 219513
loss: 1.022180438041687,grad_norm: 0.999999102782961, iteration: 219514
loss: 1.0207816362380981,grad_norm: 0.9999999079970714, iteration: 219515
loss: 1.0003477334976196,grad_norm: 0.9999991109861184, iteration: 219516
loss: 0.9782543182373047,grad_norm: 0.8861540941470908, iteration: 219517
loss: 0.9943715929985046,grad_norm: 0.9445393688199846, iteration: 219518
loss: 0.9826268553733826,grad_norm: 0.945731884840878, iteration: 219519
loss: 1.0103719234466553,grad_norm: 0.9999992151066254, iteration: 219520
loss: 1.0195927619934082,grad_norm: 0.9999990333980643, iteration: 219521
loss: 1.0049151182174683,grad_norm: 0.9012146581375006, iteration: 219522
loss: 0.9305956959724426,grad_norm: 0.8374893215087077, iteration: 219523
loss: 0.966552197933197,grad_norm: 0.8692879364824104, iteration: 219524
loss: 1.0115959644317627,grad_norm: 0.9999992062809223, iteration: 219525
loss: 0.9666207432746887,grad_norm: 0.860233570307109, iteration: 219526
loss: 1.0100575685501099,grad_norm: 0.9016714569223722, iteration: 219527
loss: 1.0047167539596558,grad_norm: 0.9094021818810171, iteration: 219528
loss: 1.0243501663208008,grad_norm: 0.8995188942546688, iteration: 219529
loss: 0.973893404006958,grad_norm: 0.9136731201768118, iteration: 219530
loss: 1.0063576698303223,grad_norm: 0.9999990379144581, iteration: 219531
loss: 0.9737191796302795,grad_norm: 0.8753336206539755, iteration: 219532
loss: 1.0175435543060303,grad_norm: 0.8298904183653079, iteration: 219533
loss: 0.996269941329956,grad_norm: 0.9060837478403843, iteration: 219534
loss: 0.9672274589538574,grad_norm: 0.9028260868250121, iteration: 219535
loss: 0.996300220489502,grad_norm: 0.808723095258705, iteration: 219536
loss: 0.960812509059906,grad_norm: 0.9999992877565614, iteration: 219537
loss: 0.9815826416015625,grad_norm: 0.940406571390256, iteration: 219538
loss: 1.0268540382385254,grad_norm: 0.9999996815190041, iteration: 219539
loss: 0.999541699886322,grad_norm: 0.9421409742639187, iteration: 219540
loss: 0.9923335313796997,grad_norm: 0.7041558140540017, iteration: 219541
loss: 0.9790907502174377,grad_norm: 0.9284882840017463, iteration: 219542
loss: 1.0064377784729004,grad_norm: 0.9999990918967351, iteration: 219543
loss: 1.0760138034820557,grad_norm: 0.9999994240847664, iteration: 219544
loss: 1.019089698791504,grad_norm: 0.7896933392957622, iteration: 219545
loss: 1.0111348628997803,grad_norm: 0.8896957514741047, iteration: 219546
loss: 1.0154807567596436,grad_norm: 0.8889109366441413, iteration: 219547
loss: 0.9980072379112244,grad_norm: 0.8198262601100782, iteration: 219548
loss: 0.9897157549858093,grad_norm: 0.8754794013242632, iteration: 219549
loss: 1.0120022296905518,grad_norm: 0.8480799316639959, iteration: 219550
loss: 1.016273856163025,grad_norm: 0.9999991720094356, iteration: 219551
loss: 1.1341981887817383,grad_norm: 0.9999996520910733, iteration: 219552
loss: 1.0142762660980225,grad_norm: 0.9999994849026312, iteration: 219553
loss: 0.9809185266494751,grad_norm: 0.807719967609003, iteration: 219554
loss: 0.9755240678787231,grad_norm: 0.9999991419442572, iteration: 219555
loss: 0.9855360388755798,grad_norm: 0.914944336321807, iteration: 219556
loss: 1.005071759223938,grad_norm: 0.9418361545860029, iteration: 219557
loss: 0.9847552180290222,grad_norm: 0.9844175267647819, iteration: 219558
loss: 1.0428110361099243,grad_norm: 0.7954837710474482, iteration: 219559
loss: 0.9801515936851501,grad_norm: 0.9999991101578772, iteration: 219560
loss: 0.9726102948188782,grad_norm: 0.7849438168395872, iteration: 219561
loss: 1.0017387866973877,grad_norm: 0.8154727354091185, iteration: 219562
loss: 0.9861399531364441,grad_norm: 0.9999991033866935, iteration: 219563
loss: 1.0294513702392578,grad_norm: 0.998921943503315, iteration: 219564
loss: 1.0092815160751343,grad_norm: 0.9291314358946818, iteration: 219565
loss: 0.9804063439369202,grad_norm: 0.8973372132407367, iteration: 219566
loss: 0.9803946614265442,grad_norm: 0.8907790221023759, iteration: 219567
loss: 0.9891374111175537,grad_norm: 0.9238546805014552, iteration: 219568
loss: 0.9993602633476257,grad_norm: 0.9999990932751323, iteration: 219569
loss: 0.9902094006538391,grad_norm: 0.9021641671899546, iteration: 219570
loss: 1.0498257875442505,grad_norm: 0.9902889449522674, iteration: 219571
loss: 0.9762961268424988,grad_norm: 0.8658790806871742, iteration: 219572
loss: 0.9824742674827576,grad_norm: 0.9999999727618616, iteration: 219573
loss: 0.9870154857635498,grad_norm: 0.9999992360979136, iteration: 219574
loss: 1.011487603187561,grad_norm: 0.9999991727473733, iteration: 219575
loss: 1.0087281465530396,grad_norm: 0.8997460152988226, iteration: 219576
loss: 1.0016586780548096,grad_norm: 0.8154157504719337, iteration: 219577
loss: 0.9590238928794861,grad_norm: 0.8933928802022466, iteration: 219578
loss: 1.006410837173462,grad_norm: 0.9999991328156109, iteration: 219579
loss: 0.9979185461997986,grad_norm: 0.8211930152781265, iteration: 219580
loss: 1.0053445100784302,grad_norm: 0.9999991674411753, iteration: 219581
loss: 0.9910572171211243,grad_norm: 0.9146285194370952, iteration: 219582
loss: 1.002854585647583,grad_norm: 0.9114925096294757, iteration: 219583
loss: 1.0197755098342896,grad_norm: 0.8852601752808101, iteration: 219584
loss: 0.9808721542358398,grad_norm: 0.9263696365379209, iteration: 219585
loss: 0.9766760468482971,grad_norm: 0.8338036389763781, iteration: 219586
loss: 1.0146143436431885,grad_norm: 0.7439650896247774, iteration: 219587
loss: 1.0313208103179932,grad_norm: 0.9175976018047087, iteration: 219588
loss: 1.0039783716201782,grad_norm: 0.9673907721804356, iteration: 219589
loss: 1.031626582145691,grad_norm: 0.9999992213606947, iteration: 219590
loss: 0.9934825897216797,grad_norm: 0.9976586585255821, iteration: 219591
loss: 1.0122777223587036,grad_norm: 0.8621307111315459, iteration: 219592
loss: 1.0105668306350708,grad_norm: 0.9999989850913017, iteration: 219593
loss: 0.9788450002670288,grad_norm: 0.9267470111287109, iteration: 219594
loss: 0.9949693083763123,grad_norm: 0.971525971718233, iteration: 219595
loss: 0.9964908361434937,grad_norm: 0.9344779090059339, iteration: 219596
loss: 1.0093071460723877,grad_norm: 0.9999991019310502, iteration: 219597
loss: 0.98792964220047,grad_norm: 0.8833639628637276, iteration: 219598
loss: 1.0066289901733398,grad_norm: 0.9491857921529805, iteration: 219599
loss: 0.9846894145011902,grad_norm: 0.9999990527633178, iteration: 219600
loss: 1.0019093751907349,grad_norm: 0.9207810863198803, iteration: 219601
loss: 0.999362051486969,grad_norm: 0.9999991204137433, iteration: 219602
loss: 1.0401111841201782,grad_norm: 0.8833102005522586, iteration: 219603
loss: 1.0103446245193481,grad_norm: 0.8595662425160709, iteration: 219604
loss: 1.02732515335083,grad_norm: 0.9890287471204638, iteration: 219605
loss: 0.9890958666801453,grad_norm: 0.8982246684770627, iteration: 219606
loss: 1.0066330432891846,grad_norm: 0.9999990077352954, iteration: 219607
loss: 0.9876397848129272,grad_norm: 0.8721756051975325, iteration: 219608
loss: 1.0143665075302124,grad_norm: 0.8537923505823022, iteration: 219609
loss: 0.9854140281677246,grad_norm: 0.8433789135403207, iteration: 219610
loss: 1.013445258140564,grad_norm: 0.9725647593292264, iteration: 219611
loss: 0.9726497530937195,grad_norm: 0.8338405784649442, iteration: 219612
loss: 1.018263816833496,grad_norm: 0.9999990749176919, iteration: 219613
loss: 0.9942198991775513,grad_norm: 0.8211476954040472, iteration: 219614
loss: 1.0252925157546997,grad_norm: 0.8557060921908527, iteration: 219615
loss: 0.9670575261116028,grad_norm: 0.819648010442733, iteration: 219616
loss: 1.0177748203277588,grad_norm: 0.9999994833495859, iteration: 219617
loss: 0.9925031661987305,grad_norm: 0.9532464448987842, iteration: 219618
loss: 1.006544828414917,grad_norm: 0.8304613012367196, iteration: 219619
loss: 1.0324008464813232,grad_norm: 0.9720321379999186, iteration: 219620
loss: 1.000709891319275,grad_norm: 0.9999990512610442, iteration: 219621
loss: 1.030207872390747,grad_norm: 0.8926625719409456, iteration: 219622
loss: 1.0093519687652588,grad_norm: 0.8777088585715666, iteration: 219623
loss: 0.9704192280769348,grad_norm: 0.9444406613054035, iteration: 219624
loss: 1.0438830852508545,grad_norm: 0.9999993523520995, iteration: 219625
loss: 0.9930853247642517,grad_norm: 0.922371577634253, iteration: 219626
loss: 0.9896049499511719,grad_norm: 0.781363920849864, iteration: 219627
loss: 0.9754372239112854,grad_norm: 0.9371171951518393, iteration: 219628
loss: 1.0210403203964233,grad_norm: 0.9999992260011638, iteration: 219629
loss: 0.9873713850975037,grad_norm: 0.8289087512720676, iteration: 219630
loss: 0.9784469604492188,grad_norm: 0.9999990821004264, iteration: 219631
loss: 0.9645238518714905,grad_norm: 0.9999990931369687, iteration: 219632
loss: 1.0051223039627075,grad_norm: 0.9851834336624496, iteration: 219633
loss: 1.014169692993164,grad_norm: 0.9265324573208693, iteration: 219634
loss: 0.9854403138160706,grad_norm: 0.9947571692455914, iteration: 219635
loss: 1.0281622409820557,grad_norm: 0.8559962357174832, iteration: 219636
loss: 0.9935728907585144,grad_norm: 0.9020363605843583, iteration: 219637
loss: 1.017311692237854,grad_norm: 0.9999989751905729, iteration: 219638
loss: 1.0129923820495605,grad_norm: 0.9798321582732104, iteration: 219639
loss: 0.9934855699539185,grad_norm: 0.9617605837538241, iteration: 219640
loss: 0.9773616194725037,grad_norm: 0.8924395944848855, iteration: 219641
loss: 0.9941238760948181,grad_norm: 0.8583703457467575, iteration: 219642
loss: 0.982962965965271,grad_norm: 0.9999992046591315, iteration: 219643
loss: 1.0071440935134888,grad_norm: 0.7907558600804802, iteration: 219644
loss: 0.9530782699584961,grad_norm: 0.9375244782953717, iteration: 219645
loss: 0.9835173487663269,grad_norm: 0.8669665241240612, iteration: 219646
loss: 0.9763509035110474,grad_norm: 0.8128416382373344, iteration: 219647
loss: 0.9821425080299377,grad_norm: 0.9620973590937781, iteration: 219648
loss: 1.0442157983779907,grad_norm: 0.9999991146113175, iteration: 219649
loss: 0.9854239821434021,grad_norm: 0.7797349529738957, iteration: 219650
loss: 1.0060499906539917,grad_norm: 0.9774953889917887, iteration: 219651
loss: 1.0115166902542114,grad_norm: 0.9532813312304349, iteration: 219652
loss: 0.9933897852897644,grad_norm: 0.8167881257826327, iteration: 219653
loss: 0.9508695006370544,grad_norm: 0.7839484918049603, iteration: 219654
loss: 0.9830448031425476,grad_norm: 0.9387460305184524, iteration: 219655
loss: 1.0175634622573853,grad_norm: 0.9999991322215606, iteration: 219656
loss: 0.9755539298057556,grad_norm: 0.847690393555761, iteration: 219657
loss: 1.1187855005264282,grad_norm: 0.9999994273593625, iteration: 219658
loss: 1.0541971921920776,grad_norm: 0.9999989378218025, iteration: 219659
loss: 0.9762220978736877,grad_norm: 0.8685993348346345, iteration: 219660
loss: 0.9784804582595825,grad_norm: 0.9999990574772755, iteration: 219661
loss: 1.010559320449829,grad_norm: 0.862335848966791, iteration: 219662
loss: 1.0071989297866821,grad_norm: 0.8986081579770977, iteration: 219663
loss: 1.0145610570907593,grad_norm: 0.9999990499346153, iteration: 219664
loss: 1.0241011381149292,grad_norm: 0.9999990573662638, iteration: 219665
loss: 1.0144553184509277,grad_norm: 0.9217071833241862, iteration: 219666
loss: 0.9997991323471069,grad_norm: 0.9999990365270293, iteration: 219667
loss: 0.9807886481285095,grad_norm: 0.9654969189747135, iteration: 219668
loss: 0.9592899084091187,grad_norm: 0.9622211986901853, iteration: 219669
loss: 1.053775668144226,grad_norm: 0.9999990354981032, iteration: 219670
loss: 1.0168641805648804,grad_norm: 0.9999991094048837, iteration: 219671
loss: 0.9929725527763367,grad_norm: 0.9755153343051798, iteration: 219672
loss: 1.0038859844207764,grad_norm: 0.8213759261690043, iteration: 219673
loss: 0.9705120325088501,grad_norm: 0.8711012351230757, iteration: 219674
loss: 0.9914723634719849,grad_norm: 0.8145903333602033, iteration: 219675
loss: 1.0044307708740234,grad_norm: 0.999999113993425, iteration: 219676
loss: 0.959298312664032,grad_norm: 0.8918785759907113, iteration: 219677
loss: 0.9904610514640808,grad_norm: 0.8602465554664699, iteration: 219678
loss: 0.9923917055130005,grad_norm: 0.8651718818460314, iteration: 219679
loss: 0.9642546772956848,grad_norm: 0.9159133100749327, iteration: 219680
loss: 0.9687470197677612,grad_norm: 0.8493895643741963, iteration: 219681
loss: 1.0011404752731323,grad_norm: 0.9587180673051491, iteration: 219682
loss: 1.0068062543869019,grad_norm: 0.8733245709056051, iteration: 219683
loss: 0.9942334890365601,grad_norm: 0.999999243502992, iteration: 219684
loss: 0.9941535592079163,grad_norm: 0.9096668881838489, iteration: 219685
loss: 0.9905855059623718,grad_norm: 0.971785643554087, iteration: 219686
loss: 1.0270568132400513,grad_norm: 0.8910340432415736, iteration: 219687
loss: 0.9939009547233582,grad_norm: 0.9743929339460279, iteration: 219688
loss: 1.0052598714828491,grad_norm: 0.9866361262191036, iteration: 219689
loss: 1.0109293460845947,grad_norm: 0.9999990869549951, iteration: 219690
loss: 1.036461353302002,grad_norm: 0.9867936906384183, iteration: 219691
loss: 0.9518349170684814,grad_norm: 0.8119804318474463, iteration: 219692
loss: 0.9651486277580261,grad_norm: 0.9872283772035293, iteration: 219693
loss: 0.9689417481422424,grad_norm: 0.9999998134853254, iteration: 219694
loss: 1.0066735744476318,grad_norm: 0.7900279966877736, iteration: 219695
loss: 1.0357849597930908,grad_norm: 0.8310354932540739, iteration: 219696
loss: 1.0250877141952515,grad_norm: 0.999999231636539, iteration: 219697
loss: 1.016015887260437,grad_norm: 0.862136477071273, iteration: 219698
loss: 1.005443811416626,grad_norm: 0.9999990457535438, iteration: 219699
loss: 0.9739603996276855,grad_norm: 0.9999991548182996, iteration: 219700
loss: 0.9446856379508972,grad_norm: 0.9067859723944403, iteration: 219701
loss: 1.0395294427871704,grad_norm: 0.9999997371390645, iteration: 219702
loss: 0.9982646703720093,grad_norm: 0.9999991562873626, iteration: 219703
loss: 1.027010202407837,grad_norm: 0.8847977531256052, iteration: 219704
loss: 1.0690628290176392,grad_norm: 0.9999999109825759, iteration: 219705
loss: 1.013744831085205,grad_norm: 0.9999990204192868, iteration: 219706
loss: 1.014146327972412,grad_norm: 0.9999991191315255, iteration: 219707
loss: 1.0608018636703491,grad_norm: 0.9999992554381889, iteration: 219708
loss: 0.9353682398796082,grad_norm: 0.868840728685061, iteration: 219709
loss: 1.0061441659927368,grad_norm: 0.9999991304884477, iteration: 219710
loss: 0.9982343316078186,grad_norm: 0.850327685587109, iteration: 219711
loss: 1.0338854789733887,grad_norm: 0.9999989892088622, iteration: 219712
loss: 1.0610848665237427,grad_norm: 0.9999990334190779, iteration: 219713
loss: 1.0988446474075317,grad_norm: 0.9999998607183263, iteration: 219714
loss: 1.0015698671340942,grad_norm: 0.8293234732211495, iteration: 219715
loss: 1.0005302429199219,grad_norm: 0.8881499215369799, iteration: 219716
loss: 0.9872573614120483,grad_norm: 0.9999990794689119, iteration: 219717
loss: 1.013206124305725,grad_norm: 0.999999012541625, iteration: 219718
loss: 1.063753366470337,grad_norm: 0.9999991277090015, iteration: 219719
loss: 1.146086573600769,grad_norm: 0.9413116773038274, iteration: 219720
loss: 1.0114887952804565,grad_norm: 0.9999991715144989, iteration: 219721
loss: 1.1727486848831177,grad_norm: 0.9999997209566458, iteration: 219722
loss: 1.0330631732940674,grad_norm: 0.9999991115190104, iteration: 219723
loss: 0.9971863627433777,grad_norm: 0.9999998351733378, iteration: 219724
loss: 1.0059505701065063,grad_norm: 0.9999991983285054, iteration: 219725
loss: 1.0171886682510376,grad_norm: 0.7469595255613005, iteration: 219726
loss: 1.0072031021118164,grad_norm: 0.9999994211605563, iteration: 219727
loss: 1.0097216367721558,grad_norm: 0.878391870242054, iteration: 219728
loss: 1.0207587480545044,grad_norm: 0.8902644277141407, iteration: 219729
loss: 0.952544629573822,grad_norm: 0.8163716705775084, iteration: 219730
loss: 0.9951751232147217,grad_norm: 0.9408869443445955, iteration: 219731
loss: 1.0213954448699951,grad_norm: 0.8670194593018433, iteration: 219732
loss: 1.0020651817321777,grad_norm: 0.9470248954546638, iteration: 219733
loss: 0.9667222499847412,grad_norm: 0.9278408438612246, iteration: 219734
loss: 0.9759551286697388,grad_norm: 0.8563964297019817, iteration: 219735
loss: 1.0140597820281982,grad_norm: 0.9807065975006849, iteration: 219736
loss: 1.0019901990890503,grad_norm: 0.9616021772839533, iteration: 219737
loss: 0.9845772385597229,grad_norm: 0.8287291941266103, iteration: 219738
loss: 0.984899640083313,grad_norm: 0.9705254382395936, iteration: 219739
loss: 0.9670705795288086,grad_norm: 0.9999991844506266, iteration: 219740
loss: 1.0008734464645386,grad_norm: 0.9401694678843537, iteration: 219741
loss: 1.0130808353424072,grad_norm: 0.9532902829868396, iteration: 219742
loss: 1.001304268836975,grad_norm: 0.7954483886724443, iteration: 219743
loss: 1.0962940454483032,grad_norm: 0.9999991280786534, iteration: 219744
loss: 0.9854775071144104,grad_norm: 0.8027330035280813, iteration: 219745
loss: 1.142511010169983,grad_norm: 0.9999992499033391, iteration: 219746
loss: 0.9927045702934265,grad_norm: 0.8653377491452812, iteration: 219747
loss: 0.984809398651123,grad_norm: 0.9978787154218355, iteration: 219748
loss: 1.0593534708023071,grad_norm: 0.9482726481347014, iteration: 219749
loss: 0.9875169992446899,grad_norm: 0.9999991146930911, iteration: 219750
loss: 1.0357080698013306,grad_norm: 0.8589199095696106, iteration: 219751
loss: 0.9755471348762512,grad_norm: 0.8308065619123307, iteration: 219752
loss: 1.0421055555343628,grad_norm: 0.9999992025071813, iteration: 219753
loss: 0.9543566703796387,grad_norm: 0.9810987836365368, iteration: 219754
loss: 0.9679243564605713,grad_norm: 0.8035202881765487, iteration: 219755
loss: 0.9466190338134766,grad_norm: 0.9594785914543587, iteration: 219756
loss: 0.9626675248146057,grad_norm: 0.9999992072734128, iteration: 219757
loss: 1.0293779373168945,grad_norm: 0.9999992644147759, iteration: 219758
loss: 0.9999273419380188,grad_norm: 0.9999990675296612, iteration: 219759
loss: 1.0189467668533325,grad_norm: 0.9999992532979247, iteration: 219760
loss: 1.0054254531860352,grad_norm: 0.9999992802708504, iteration: 219761
loss: 1.0072191953659058,grad_norm: 0.7191543077659003, iteration: 219762
loss: 1.0111546516418457,grad_norm: 0.92934568084913, iteration: 219763
loss: 1.001309871673584,grad_norm: 0.8828498269861136, iteration: 219764
loss: 0.9839234948158264,grad_norm: 0.999999109267894, iteration: 219765
loss: 0.9815176725387573,grad_norm: 0.9837448711475948, iteration: 219766
loss: 0.9962483048439026,grad_norm: 0.9597921102955419, iteration: 219767
loss: 0.9735320210456848,grad_norm: 0.9999995969561026, iteration: 219768
loss: 1.0196728706359863,grad_norm: 0.8800270779629941, iteration: 219769
loss: 0.9958603382110596,grad_norm: 0.8439106109581676, iteration: 219770
loss: 1.0199943780899048,grad_norm: 0.7535252368014597, iteration: 219771
loss: 0.9903399348258972,grad_norm: 0.9856645303714716, iteration: 219772
loss: 0.9919255971908569,grad_norm: 0.8262960187185432, iteration: 219773
loss: 1.0130178928375244,grad_norm: 0.8499567278476717, iteration: 219774
loss: 0.9788936376571655,grad_norm: 0.9999990229880136, iteration: 219775
loss: 0.9953079223632812,grad_norm: 0.9999990498929512, iteration: 219776
loss: 1.0320056676864624,grad_norm: 0.8734244824619913, iteration: 219777
loss: 1.1166906356811523,grad_norm: 0.9999990859789334, iteration: 219778
loss: 0.9923364520072937,grad_norm: 0.9999990133915103, iteration: 219779
loss: 1.027754783630371,grad_norm: 0.9499169745685558, iteration: 219780
loss: 1.0180084705352783,grad_norm: 0.9999999240651696, iteration: 219781
loss: 0.9843244552612305,grad_norm: 0.9710195091182464, iteration: 219782
loss: 0.9762906432151794,grad_norm: 0.73621101328293, iteration: 219783
loss: 0.9950787425041199,grad_norm: 0.9598889033931888, iteration: 219784
loss: 0.9823010563850403,grad_norm: 0.9999991666316611, iteration: 219785
loss: 0.9670356512069702,grad_norm: 0.8287388214343903, iteration: 219786
loss: 1.0097697973251343,grad_norm: 0.8978717618627398, iteration: 219787
loss: 0.9824432134628296,grad_norm: 0.776042839967787, iteration: 219788
loss: 1.0153905153274536,grad_norm: 0.9999991762554108, iteration: 219789
loss: 0.9922617077827454,grad_norm: 0.8832227522299114, iteration: 219790
loss: 0.9920878410339355,grad_norm: 0.8910481005485718, iteration: 219791
loss: 1.020205020904541,grad_norm: 0.843214792303964, iteration: 219792
loss: 1.0844500064849854,grad_norm: 0.9999991777559512, iteration: 219793
loss: 0.9964324235916138,grad_norm: 0.9999990330816512, iteration: 219794
loss: 0.9941185712814331,grad_norm: 0.952512986428393, iteration: 219795
loss: 1.0009093284606934,grad_norm: 0.9344220503699113, iteration: 219796
loss: 1.0362555980682373,grad_norm: 0.8820768945643492, iteration: 219797
loss: 1.0053447484970093,grad_norm: 0.9409422065153139, iteration: 219798
loss: 0.9931206703186035,grad_norm: 0.8834360076083695, iteration: 219799
loss: 1.0069125890731812,grad_norm: 0.8799861871948474, iteration: 219800
loss: 0.9704493880271912,grad_norm: 0.9999991366269041, iteration: 219801
loss: 1.0285406112670898,grad_norm: 0.868993763457423, iteration: 219802
loss: 1.0172204971313477,grad_norm: 0.9999991598762854, iteration: 219803
loss: 0.9836382269859314,grad_norm: 0.9879856572522621, iteration: 219804
loss: 1.025363802909851,grad_norm: 0.9999992703910835, iteration: 219805
loss: 1.000328779220581,grad_norm: 0.9273012742703335, iteration: 219806
loss: 0.9806782007217407,grad_norm: 0.81054347185743, iteration: 219807
loss: 0.966491162776947,grad_norm: 0.9999991554676216, iteration: 219808
loss: 1.008203148841858,grad_norm: 0.9594506818065952, iteration: 219809
loss: 0.969016432762146,grad_norm: 0.8883849808397216, iteration: 219810
loss: 0.9921293258666992,grad_norm: 0.7357382020373654, iteration: 219811
loss: 0.9743524193763733,grad_norm: 0.8717544305938102, iteration: 219812
loss: 0.9847541451454163,grad_norm: 0.8893846295310199, iteration: 219813
loss: 1.0197612047195435,grad_norm: 0.8099643135780474, iteration: 219814
loss: 1.0122973918914795,grad_norm: 0.9293640769710605, iteration: 219815
loss: 0.9901161789894104,grad_norm: 0.8880645116394646, iteration: 219816
loss: 1.0030442476272583,grad_norm: 0.9730817524603445, iteration: 219817
loss: 1.0198843479156494,grad_norm: 0.9999990163774514, iteration: 219818
loss: 0.9780397415161133,grad_norm: 0.9999992244731527, iteration: 219819
loss: 0.9920656681060791,grad_norm: 0.9296171251882476, iteration: 219820
loss: 0.9699447154998779,grad_norm: 0.9425481151107914, iteration: 219821
loss: 0.9942701458930969,grad_norm: 0.9291675902723511, iteration: 219822
loss: 0.9740399718284607,grad_norm: 0.9999988883358406, iteration: 219823
loss: 0.9627363681793213,grad_norm: 0.9816575207202829, iteration: 219824
loss: 0.9931408762931824,grad_norm: 0.9999993191791638, iteration: 219825
loss: 1.0536537170410156,grad_norm: 0.9699191451657686, iteration: 219826
loss: 0.967970609664917,grad_norm: 0.9687375341890804, iteration: 219827
loss: 1.0125030279159546,grad_norm: 0.9999990974896675, iteration: 219828
loss: 1.0579982995986938,grad_norm: 0.9999998326975486, iteration: 219829
loss: 1.031664252281189,grad_norm: 0.9999990985801571, iteration: 219830
loss: 0.9760909080505371,grad_norm: 0.8379419373369031, iteration: 219831
loss: 0.9679005146026611,grad_norm: 0.9377789403141515, iteration: 219832
loss: 1.0024298429489136,grad_norm: 0.9899640519425038, iteration: 219833
loss: 1.0036828517913818,grad_norm: 0.7821319907240739, iteration: 219834
loss: 0.9835237860679626,grad_norm: 0.9999990593253489, iteration: 219835
loss: 0.986979067325592,grad_norm: 0.7649925686332143, iteration: 219836
loss: 1.0300416946411133,grad_norm: 0.9106881744022844, iteration: 219837
loss: 0.9938108921051025,grad_norm: 0.8752073953600945, iteration: 219838
loss: 1.0112351179122925,grad_norm: 0.919530600903817, iteration: 219839
loss: 1.0117861032485962,grad_norm: 0.7905116417664624, iteration: 219840
loss: 0.9887398481369019,grad_norm: 0.8603718440877589, iteration: 219841
loss: 1.0576419830322266,grad_norm: 0.999999128878188, iteration: 219842
loss: 1.0206087827682495,grad_norm: 0.9655454243531669, iteration: 219843
loss: 0.972893238067627,grad_norm: 0.8464314047314501, iteration: 219844
loss: 1.0020651817321777,grad_norm: 0.9999990380777949, iteration: 219845
loss: 0.9858960509300232,grad_norm: 0.8157711895546321, iteration: 219846
loss: 1.0091681480407715,grad_norm: 0.9749698225307456, iteration: 219847
loss: 0.9784089922904968,grad_norm: 0.901031086605972, iteration: 219848
loss: 1.0496100187301636,grad_norm: 0.9473521084316295, iteration: 219849
loss: 0.9651603698730469,grad_norm: 0.9388869582983193, iteration: 219850
loss: 1.0194612741470337,grad_norm: 0.99999999969843, iteration: 219851
loss: 1.0199910402297974,grad_norm: 0.9999992795725604, iteration: 219852
loss: 1.0013365745544434,grad_norm: 0.9796043832615935, iteration: 219853
loss: 0.9719622731208801,grad_norm: 0.9999989915082813, iteration: 219854
loss: 0.9965237975120544,grad_norm: 0.9282718748680009, iteration: 219855
loss: 0.9872704148292542,grad_norm: 0.8887611986870168, iteration: 219856
loss: 0.9912612438201904,grad_norm: 0.8223746421667155, iteration: 219857
loss: 1.0035698413848877,grad_norm: 0.9703015559311858, iteration: 219858
loss: 0.948027491569519,grad_norm: 0.9725897369230203, iteration: 219859
loss: 1.0010261535644531,grad_norm: 0.9999991765697915, iteration: 219860
loss: 1.0195029973983765,grad_norm: 0.8646629678654739, iteration: 219861
loss: 0.9751864671707153,grad_norm: 0.968980594270073, iteration: 219862
loss: 0.9978341460227966,grad_norm: 0.999999008200329, iteration: 219863
loss: 0.9891876578330994,grad_norm: 0.879527156132645, iteration: 219864
loss: 0.9826738834381104,grad_norm: 0.9346399665460924, iteration: 219865
loss: 0.9971027970314026,grad_norm: 0.9543700350633784, iteration: 219866
loss: 0.966533899307251,grad_norm: 0.873817704309772, iteration: 219867
loss: 1.0293430089950562,grad_norm: 0.9999998026260187, iteration: 219868
loss: 0.9995660185813904,grad_norm: 0.8297996007560964, iteration: 219869
loss: 0.9879477620124817,grad_norm: 0.9999988499859777, iteration: 219870
loss: 1.0235763788223267,grad_norm: 0.8659941891905795, iteration: 219871
loss: 1.0350953340530396,grad_norm: 0.9879341687280186, iteration: 219872
loss: 1.0388585329055786,grad_norm: 0.9999992648963094, iteration: 219873
loss: 1.0009815692901611,grad_norm: 0.8736961559306774, iteration: 219874
loss: 1.0151642560958862,grad_norm: 0.8246606554647251, iteration: 219875
loss: 0.965312123298645,grad_norm: 0.8547118380239779, iteration: 219876
loss: 1.026869773864746,grad_norm: 0.9431518444806157, iteration: 219877
loss: 1.0143892765045166,grad_norm: 0.9247452395155638, iteration: 219878
loss: 1.0249112844467163,grad_norm: 0.9753657027734608, iteration: 219879
loss: 1.022371768951416,grad_norm: 0.7368160138926341, iteration: 219880
loss: 1.0172466039657593,grad_norm: 0.8337858970607114, iteration: 219881
loss: 1.0032826662063599,grad_norm: 0.9999990902508246, iteration: 219882
loss: 1.0106467008590698,grad_norm: 0.9949626764349496, iteration: 219883
loss: 0.9721802473068237,grad_norm: 0.9999989235120115, iteration: 219884
loss: 1.0040220022201538,grad_norm: 0.8374928641396916, iteration: 219885
loss: 1.0188974142074585,grad_norm: 0.9999992823985421, iteration: 219886
loss: 1.024739384651184,grad_norm: 0.9999991187693917, iteration: 219887
loss: 1.0141788721084595,grad_norm: 0.9694925210174947, iteration: 219888
loss: 0.9890329837799072,grad_norm: 0.9408472155514783, iteration: 219889
loss: 0.9958633184432983,grad_norm: 0.9710419360205975, iteration: 219890
loss: 1.0502259731292725,grad_norm: 0.8114035151972365, iteration: 219891
loss: 1.0266475677490234,grad_norm: 0.9999991300509138, iteration: 219892
loss: 1.0836559534072876,grad_norm: 0.9999993312166792, iteration: 219893
loss: 0.9821963906288147,grad_norm: 0.9782297753823266, iteration: 219894
loss: 0.97944176197052,grad_norm: 0.9999991498776143, iteration: 219895
loss: 0.9761607050895691,grad_norm: 0.785286300298874, iteration: 219896
loss: 1.0294514894485474,grad_norm: 0.9154983426524332, iteration: 219897
loss: 0.9807062149047852,grad_norm: 0.9999991309636912, iteration: 219898
loss: 0.9871529936790466,grad_norm: 0.9604923090971198, iteration: 219899
loss: 0.9934728741645813,grad_norm: 0.8835987966647354, iteration: 219900
loss: 0.9950979948043823,grad_norm: 0.9999995974092214, iteration: 219901
loss: 0.9908149242401123,grad_norm: 0.999999074973284, iteration: 219902
loss: 0.9940996766090393,grad_norm: 0.9100727211998157, iteration: 219903
loss: 1.0084846019744873,grad_norm: 0.9791425345446965, iteration: 219904
loss: 1.0306745767593384,grad_norm: 0.8035518843363988, iteration: 219905
loss: 0.9957342743873596,grad_norm: 0.978148578497849, iteration: 219906
loss: 0.9844098687171936,grad_norm: 0.9999991684572821, iteration: 219907
loss: 1.0118554830551147,grad_norm: 0.9999991294597296, iteration: 219908
loss: 0.9756778478622437,grad_norm: 0.9808049775785384, iteration: 219909
loss: 0.9860795736312866,grad_norm: 0.8119847796187135, iteration: 219910
loss: 0.9983580708503723,grad_norm: 0.9999991455835893, iteration: 219911
loss: 1.025321364402771,grad_norm: 0.9999991150852667, iteration: 219912
loss: 1.0107117891311646,grad_norm: 0.9818924559025423, iteration: 219913
loss: 1.0090851783752441,grad_norm: 0.9054525215643767, iteration: 219914
loss: 1.027868390083313,grad_norm: 0.9999991573991114, iteration: 219915
loss: 0.9906080365180969,grad_norm: 0.9234616086857419, iteration: 219916
loss: 0.9981683492660522,grad_norm: 0.9999994822696832, iteration: 219917
loss: 1.0129328966140747,grad_norm: 0.9347915868548998, iteration: 219918
loss: 0.9739270806312561,grad_norm: 0.963330537536883, iteration: 219919
loss: 0.9978682398796082,grad_norm: 0.819994629730097, iteration: 219920
loss: 1.0438722372055054,grad_norm: 0.9999991051407073, iteration: 219921
loss: 0.9874439835548401,grad_norm: 0.9943648264569543, iteration: 219922
loss: 0.9875192046165466,grad_norm: 0.991722994924497, iteration: 219923
loss: 0.993333637714386,grad_norm: 0.9172822012573729, iteration: 219924
loss: 1.0079418420791626,grad_norm: 0.9619637622676624, iteration: 219925
loss: 0.9777772426605225,grad_norm: 0.9378179750255942, iteration: 219926
loss: 0.9775558710098267,grad_norm: 0.9744601067886337, iteration: 219927
loss: 0.9935590028762817,grad_norm: 0.9605351888172154, iteration: 219928
loss: 1.017181158065796,grad_norm: 0.9791340395558149, iteration: 219929
loss: 0.968110203742981,grad_norm: 0.8337338715494044, iteration: 219930
loss: 0.9383485317230225,grad_norm: 0.9999991205506851, iteration: 219931
loss: 0.9998255372047424,grad_norm: 0.8690676866266872, iteration: 219932
loss: 1.0296051502227783,grad_norm: 0.8813889525247232, iteration: 219933
loss: 1.0088697671890259,grad_norm: 0.9482378449555096, iteration: 219934
loss: 0.9891231656074524,grad_norm: 0.8646651378485853, iteration: 219935
loss: 1.0023908615112305,grad_norm: 0.7491566565255673, iteration: 219936
loss: 1.0323395729064941,grad_norm: 0.9999995776712827, iteration: 219937
loss: 1.0024774074554443,grad_norm: 0.9155476086188769, iteration: 219938
loss: 0.9727906584739685,grad_norm: 0.8019263075363186, iteration: 219939
loss: 0.9943504333496094,grad_norm: 0.8927826460700817, iteration: 219940
loss: 0.9615866541862488,grad_norm: 0.9352675717224027, iteration: 219941
loss: 0.9963316917419434,grad_norm: 0.9999990624239785, iteration: 219942
loss: 1.0118601322174072,grad_norm: 0.9999991971172638, iteration: 219943
loss: 1.0673185586929321,grad_norm: 0.953971634596575, iteration: 219944
loss: 0.9817214608192444,grad_norm: 0.9999990690538121, iteration: 219945
loss: 1.017707347869873,grad_norm: 0.9556846408224344, iteration: 219946
loss: 0.9659109711647034,grad_norm: 0.8448988899339811, iteration: 219947
loss: 0.991424024105072,grad_norm: 0.9999998943365171, iteration: 219948
loss: 0.9668830633163452,grad_norm: 0.9739077638020156, iteration: 219949
loss: 1.0076498985290527,grad_norm: 0.9112781768096719, iteration: 219950
loss: 0.9673563241958618,grad_norm: 0.8804144708523388, iteration: 219951
loss: 0.9877709150314331,grad_norm: 0.9519676958387973, iteration: 219952
loss: 1.0159515142440796,grad_norm: 0.9259975916125628, iteration: 219953
loss: 0.9926824569702148,grad_norm: 0.9999989588853554, iteration: 219954
loss: 0.9850071668624878,grad_norm: 0.9999990664841528, iteration: 219955
loss: 0.9736393094062805,grad_norm: 0.9801024279455569, iteration: 219956
loss: 0.9875723719596863,grad_norm: 0.9514147215607619, iteration: 219957
loss: 0.9767814874649048,grad_norm: 0.9999990770367513, iteration: 219958
loss: 1.0002615451812744,grad_norm: 0.9254996608301383, iteration: 219959
loss: 1.0098292827606201,grad_norm: 0.9437209912533401, iteration: 219960
loss: 0.9993070363998413,grad_norm: 0.9281445341254574, iteration: 219961
loss: 0.9767865538597107,grad_norm: 0.9999991362459294, iteration: 219962
loss: 0.9968246817588806,grad_norm: 0.9999990576165833, iteration: 219963
loss: 0.991952657699585,grad_norm: 0.9275811178689478, iteration: 219964
loss: 1.001874327659607,grad_norm: 0.9508097320907895, iteration: 219965
loss: 1.0129979848861694,grad_norm: 0.8584689536553357, iteration: 219966
loss: 0.9664943218231201,grad_norm: 0.9999991700820241, iteration: 219967
loss: 0.9990923404693604,grad_norm: 0.8956550301530738, iteration: 219968
loss: 0.9839099645614624,grad_norm: 0.9999991910267781, iteration: 219969
loss: 1.0233877897262573,grad_norm: 0.9999990932263626, iteration: 219970
loss: 0.9984489679336548,grad_norm: 0.9999990757920731, iteration: 219971
loss: 1.0275765657424927,grad_norm: 0.8059795066836357, iteration: 219972
loss: 1.0187839269638062,grad_norm: 0.9999993874440359, iteration: 219973
loss: 1.0316005945205688,grad_norm: 0.9999991787913551, iteration: 219974
loss: 0.9854696989059448,grad_norm: 0.9999990064192473, iteration: 219975
loss: 0.9625493884086609,grad_norm: 0.9999991120360896, iteration: 219976
loss: 1.0170679092407227,grad_norm: 0.9999996375992402, iteration: 219977
loss: 1.0364714860916138,grad_norm: 0.9579507055480451, iteration: 219978
loss: 0.9847676157951355,grad_norm: 0.9999991636042106, iteration: 219979
loss: 0.9957001209259033,grad_norm: 0.8494919086524805, iteration: 219980
loss: 1.0212640762329102,grad_norm: 0.9999991214682905, iteration: 219981
loss: 1.0264524221420288,grad_norm: 0.9725309971656589, iteration: 219982
loss: 0.9524765610694885,grad_norm: 0.7334400566343026, iteration: 219983
loss: 1.028924584388733,grad_norm: 0.9629809661163621, iteration: 219984
loss: 1.0423365831375122,grad_norm: 0.9999997654105951, iteration: 219985
loss: 0.9893538355827332,grad_norm: 0.999999119530547, iteration: 219986
loss: 0.9574942588806152,grad_norm: 0.98264226001805, iteration: 219987
loss: 0.9871273636817932,grad_norm: 0.8807084676839547, iteration: 219988
loss: 0.985521674156189,grad_norm: 0.9999991035816654, iteration: 219989
loss: 1.0029734373092651,grad_norm: 0.9999989392953113, iteration: 219990
loss: 0.9784517884254456,grad_norm: 0.7634599016342418, iteration: 219991
loss: 1.0379711389541626,grad_norm: 0.9999991071027879, iteration: 219992
loss: 0.9857627749443054,grad_norm: 0.9999992898435068, iteration: 219993
loss: 0.9812254309654236,grad_norm: 0.8823427648407802, iteration: 219994
loss: 1.0302586555480957,grad_norm: 0.8167051966840705, iteration: 219995
loss: 0.9624544978141785,grad_norm: 0.8658670811109881, iteration: 219996
loss: 1.0134665966033936,grad_norm: 0.9430158150605605, iteration: 219997
loss: 0.993313729763031,grad_norm: 0.8263534564233381, iteration: 219998
loss: 0.9806438684463501,grad_norm: 0.957276551947758, iteration: 219999
loss: 0.9931192994117737,grad_norm: 0.9250026723657528, iteration: 220000
Evaluating at step 220000
{'val': 0.9963664691895247, 'test': 2.780555811933816}
loss: 1.0134371519088745,grad_norm: 0.8748659951435551, iteration: 220001
loss: 1.0026134252548218,grad_norm: 0.9226593789718387, iteration: 220002
loss: 0.9999483227729797,grad_norm: 0.7592667164450164, iteration: 220003
loss: 0.9935094714164734,grad_norm: 0.7528223854501696, iteration: 220004
loss: 0.978336751461029,grad_norm: 0.9830758939907448, iteration: 220005
loss: 1.0034383535385132,grad_norm: 0.8911246367450231, iteration: 220006
loss: 1.0404521226882935,grad_norm: 0.9999990549986617, iteration: 220007
loss: 0.9999836683273315,grad_norm: 0.9999991191754639, iteration: 220008
loss: 0.989048421382904,grad_norm: 0.8615790346265398, iteration: 220009
loss: 0.9977495074272156,grad_norm: 0.7455499084957051, iteration: 220010
loss: 0.9949336051940918,grad_norm: 0.8489128788060316, iteration: 220011
loss: 1.026332139968872,grad_norm: 0.9305420453814115, iteration: 220012
loss: 0.9757394790649414,grad_norm: 0.91099803803338, iteration: 220013
loss: 1.0616949796676636,grad_norm: 0.9081988931053361, iteration: 220014
loss: 1.0268057584762573,grad_norm: 0.9999991253315652, iteration: 220015
loss: 1.0108833312988281,grad_norm: 0.9119870849951628, iteration: 220016
loss: 0.9709097743034363,grad_norm: 0.8579545478593061, iteration: 220017
loss: 0.9974210858345032,grad_norm: 0.8515225398046966, iteration: 220018
loss: 0.988756537437439,grad_norm: 0.902284144863494, iteration: 220019
loss: 1.0061399936676025,grad_norm: 0.9392147363156432, iteration: 220020
loss: 0.9876329302787781,grad_norm: 0.8843744657029131, iteration: 220021
loss: 1.0052796602249146,grad_norm: 0.9999989450815086, iteration: 220022
loss: 0.9810342788696289,grad_norm: 0.9999990670722163, iteration: 220023
loss: 1.007930874824524,grad_norm: 0.8154558418072171, iteration: 220024
loss: 1.0066627264022827,grad_norm: 0.9999992255161164, iteration: 220025
loss: 1.0017662048339844,grad_norm: 0.9999990878803494, iteration: 220026
loss: 1.0154057741165161,grad_norm: 0.871147554849116, iteration: 220027
loss: 0.9837849736213684,grad_norm: 0.9339138592197899, iteration: 220028
loss: 0.9635197520256042,grad_norm: 0.9896221092110021, iteration: 220029
loss: 0.9961875081062317,grad_norm: 0.8120133315591397, iteration: 220030
loss: 1.0154855251312256,grad_norm: 0.9999993987995314, iteration: 220031
loss: 0.9804084897041321,grad_norm: 0.9999991058879655, iteration: 220032
loss: 1.0221976041793823,grad_norm: 0.8817126874558985, iteration: 220033
loss: 1.0400307178497314,grad_norm: 0.9999992032900711, iteration: 220034
loss: 1.0110033750534058,grad_norm: 0.9999992981943429, iteration: 220035
loss: 0.9799137711524963,grad_norm: 0.8154237183081088, iteration: 220036
loss: 0.9984728693962097,grad_norm: 0.7756660743061294, iteration: 220037
loss: 0.9842762351036072,grad_norm: 0.9999993758774076, iteration: 220038
loss: 1.0313681364059448,grad_norm: 0.9181129262710743, iteration: 220039
loss: 0.9790561199188232,grad_norm: 0.7934722206541439, iteration: 220040
loss: 0.975981593132019,grad_norm: 0.9394268898073213, iteration: 220041
loss: 0.9922966361045837,grad_norm: 0.8917989613592716, iteration: 220042
loss: 0.9562733173370361,grad_norm: 0.8972057975263306, iteration: 220043
loss: 0.9720331430435181,grad_norm: 0.8795490922536129, iteration: 220044
loss: 1.0016913414001465,grad_norm: 0.8503388666003514, iteration: 220045
loss: 1.0430552959442139,grad_norm: 0.9999989973740913, iteration: 220046
loss: 0.9716772437095642,grad_norm: 0.8611024239509494, iteration: 220047
loss: 0.9751418232917786,grad_norm: 0.8666439234042077, iteration: 220048
loss: 1.0365419387817383,grad_norm: 0.9720587185817036, iteration: 220049
loss: 1.0520737171173096,grad_norm: 0.9999994077840703, iteration: 220050
loss: 1.020546555519104,grad_norm: 0.8586883991890316, iteration: 220051
loss: 0.9747390151023865,grad_norm: 0.8587706228705341, iteration: 220052
loss: 1.0219489336013794,grad_norm: 0.9999990272172032, iteration: 220053
loss: 0.9524102807044983,grad_norm: 0.9999990075255552, iteration: 220054
loss: 1.0802650451660156,grad_norm: 0.8261080684097624, iteration: 220055
loss: 1.0298569202423096,grad_norm: 0.7985991342387327, iteration: 220056
loss: 0.9969342350959778,grad_norm: 0.985352061118713, iteration: 220057
loss: 0.9739859700202942,grad_norm: 0.9999990876519462, iteration: 220058
loss: 0.9755896329879761,grad_norm: 0.9999990825821422, iteration: 220059
loss: 1.0205738544464111,grad_norm: 0.865312544570903, iteration: 220060
loss: 0.9983335733413696,grad_norm: 0.8670492383435912, iteration: 220061
loss: 0.9863340854644775,grad_norm: 0.9999993230590167, iteration: 220062
loss: 0.9886874556541443,grad_norm: 0.9999990796796075, iteration: 220063
loss: 1.0004578828811646,grad_norm: 0.9999990329605289, iteration: 220064
loss: 0.9927020072937012,grad_norm: 0.8591610705424995, iteration: 220065
loss: 1.0229575634002686,grad_norm: 0.8631635886733471, iteration: 220066
loss: 0.9893667101860046,grad_norm: 0.9770521017751358, iteration: 220067
loss: 1.002431869506836,grad_norm: 0.8443326388903936, iteration: 220068
loss: 1.0211586952209473,grad_norm: 0.9597578688072798, iteration: 220069
loss: 0.9694515466690063,grad_norm: 0.9999989719026288, iteration: 220070
loss: 0.9876015186309814,grad_norm: 0.8087428070745992, iteration: 220071
loss: 0.9905714988708496,grad_norm: 0.9131768618988753, iteration: 220072
loss: 0.9931809902191162,grad_norm: 0.9112543492442478, iteration: 220073
loss: 1.0212377309799194,grad_norm: 0.9179329084637186, iteration: 220074
loss: 1.079764723777771,grad_norm: 0.9999992116886053, iteration: 220075
loss: 1.0192400217056274,grad_norm: 0.8822997544715507, iteration: 220076
loss: 1.0945743322372437,grad_norm: 0.9999998902009694, iteration: 220077
loss: 1.0049282312393188,grad_norm: 0.9369869299660112, iteration: 220078
loss: 1.029222011566162,grad_norm: 0.9103418067450391, iteration: 220079
loss: 0.9920258522033691,grad_norm: 0.9999990110832403, iteration: 220080
loss: 0.9837448000907898,grad_norm: 0.8746702450396855, iteration: 220081
loss: 1.0144462585449219,grad_norm: 0.7602679467864129, iteration: 220082
loss: 1.0258677005767822,grad_norm: 0.9999989409359602, iteration: 220083
loss: 1.0334776639938354,grad_norm: 0.8203028242302544, iteration: 220084
loss: 1.04135262966156,grad_norm: 0.9999991521576417, iteration: 220085
loss: 0.9997667074203491,grad_norm: 0.9676555632494857, iteration: 220086
loss: 0.978989839553833,grad_norm: 0.9999990608415139, iteration: 220087
loss: 1.0442067384719849,grad_norm: 0.9341958080417472, iteration: 220088
loss: 1.0037492513656616,grad_norm: 0.8821709095440267, iteration: 220089
loss: 1.0133551359176636,grad_norm: 0.9613862070231269, iteration: 220090
loss: 1.0013593435287476,grad_norm: 0.8431200550631548, iteration: 220091
loss: 1.007295846939087,grad_norm: 0.8419166866480724, iteration: 220092
loss: 0.9994524121284485,grad_norm: 0.9422413097618747, iteration: 220093
loss: 1.0410083532333374,grad_norm: 0.9999999221144941, iteration: 220094
loss: 0.9552558660507202,grad_norm: 0.999999924344712, iteration: 220095
loss: 1.0091050863265991,grad_norm: 0.8990907272158816, iteration: 220096
loss: 1.0112544298171997,grad_norm: 0.827468172713149, iteration: 220097
loss: 1.0212386846542358,grad_norm: 0.8293950320463014, iteration: 220098
loss: 0.9898552894592285,grad_norm: 0.9443246637655943, iteration: 220099
loss: 1.0591089725494385,grad_norm: 0.9999996076247069, iteration: 220100
loss: 1.024410605430603,grad_norm: 0.8929729482192303, iteration: 220101
loss: 1.0045057535171509,grad_norm: 0.9999990973999415, iteration: 220102
loss: 1.006547212600708,grad_norm: 0.9999991317451914, iteration: 220103
loss: 0.9542712569236755,grad_norm: 0.9426274062345695, iteration: 220104
loss: 1.0052340030670166,grad_norm: 0.7634302509318721, iteration: 220105
loss: 0.9663875699043274,grad_norm: 0.9894924140448736, iteration: 220106
loss: 1.0234564542770386,grad_norm: 0.8674300634101499, iteration: 220107
loss: 1.0326018333435059,grad_norm: 0.9999990505217793, iteration: 220108
loss: 0.9638332724571228,grad_norm: 0.989852246292747, iteration: 220109
loss: 1.0087851285934448,grad_norm: 0.9999992984645116, iteration: 220110
loss: 1.0306131839752197,grad_norm: 0.9999998204211277, iteration: 220111
loss: 0.9725309014320374,grad_norm: 0.9999991912284651, iteration: 220112
loss: 0.9842133522033691,grad_norm: 0.9090322928946947, iteration: 220113
loss: 1.014694094657898,grad_norm: 0.8221411199313031, iteration: 220114
loss: 1.0051213502883911,grad_norm: 0.9999992313516222, iteration: 220115
loss: 0.9949320554733276,grad_norm: 0.9735970754649734, iteration: 220116
loss: 0.9768627882003784,grad_norm: 0.9999991653218469, iteration: 220117
loss: 1.0175349712371826,grad_norm: 0.7503023628929933, iteration: 220118
loss: 1.0021671056747437,grad_norm: 0.9481819376524457, iteration: 220119
loss: 1.0237663984298706,grad_norm: 0.9999990939912865, iteration: 220120
loss: 1.024379014968872,grad_norm: 0.8155922544212829, iteration: 220121
loss: 0.9964190125465393,grad_norm: 0.9525544881508752, iteration: 220122
loss: 0.9960839152336121,grad_norm: 0.8382370770245967, iteration: 220123
loss: 0.9964491724967957,grad_norm: 0.870742424404786, iteration: 220124
loss: 1.0062346458435059,grad_norm: 0.9820072751073029, iteration: 220125
loss: 1.0609984397888184,grad_norm: 0.8307541539243182, iteration: 220126
loss: 0.9778483510017395,grad_norm: 0.8647237151517311, iteration: 220127
loss: 1.0170936584472656,grad_norm: 0.9302185181700499, iteration: 220128
loss: 0.9925997853279114,grad_norm: 0.8128136178685363, iteration: 220129
loss: 1.0286821126937866,grad_norm: 0.999999146673669, iteration: 220130
loss: 1.0083461999893188,grad_norm: 0.9715559520444534, iteration: 220131
loss: 0.9835521578788757,grad_norm: 0.9999992343462011, iteration: 220132
loss: 1.006386637687683,grad_norm: 0.9999994592192353, iteration: 220133
loss: 0.9619386792182922,grad_norm: 0.9857210051352958, iteration: 220134
loss: 1.0342777967453003,grad_norm: 0.8615107823586022, iteration: 220135
loss: 0.9977439045906067,grad_norm: 0.9034074626181338, iteration: 220136
loss: 0.9801866412162781,grad_norm: 0.8052763240981957, iteration: 220137
loss: 0.9831196665763855,grad_norm: 0.9999991754969697, iteration: 220138
loss: 0.998152494430542,grad_norm: 0.9880832999374833, iteration: 220139
loss: 0.9983710646629333,grad_norm: 0.8909077809641088, iteration: 220140
loss: 0.9975522756576538,grad_norm: 0.9146804450923273, iteration: 220141
loss: 0.9987358450889587,grad_norm: 0.9767065323396249, iteration: 220142
loss: 0.9914866089820862,grad_norm: 0.9999989863620496, iteration: 220143
loss: 1.0015313625335693,grad_norm: 0.8636924979380507, iteration: 220144
loss: 0.9988245368003845,grad_norm: 0.9999991454245571, iteration: 220145
loss: 1.0001654624938965,grad_norm: 0.9969959134899151, iteration: 220146
loss: 0.9893761277198792,grad_norm: 0.8748082763179745, iteration: 220147
loss: 0.9967131018638611,grad_norm: 0.958313476224148, iteration: 220148
loss: 1.0183583498001099,grad_norm: 0.9440444873545571, iteration: 220149
loss: 1.0079680681228638,grad_norm: 0.9338432955872854, iteration: 220150
loss: 0.9909453988075256,grad_norm: 0.9149770298925665, iteration: 220151
loss: 0.9991001486778259,grad_norm: 0.9169273734079445, iteration: 220152
loss: 0.9416135549545288,grad_norm: 0.8239758497101222, iteration: 220153
loss: 0.9713791012763977,grad_norm: 0.8289313370612843, iteration: 220154
loss: 0.9871180653572083,grad_norm: 0.9999992165247934, iteration: 220155
loss: 0.9850391745567322,grad_norm: 0.9999990925695902, iteration: 220156
loss: 0.9923450946807861,grad_norm: 0.999999006270283, iteration: 220157
loss: 0.9772478938102722,grad_norm: 0.87698947683237, iteration: 220158
loss: 1.0286672115325928,grad_norm: 0.9999990690676744, iteration: 220159
loss: 0.9872000217437744,grad_norm: 0.9688979502921246, iteration: 220160
loss: 1.0056803226470947,grad_norm: 0.9593294399868622, iteration: 220161
loss: 0.9829138517379761,grad_norm: 0.870037525338734, iteration: 220162
loss: 0.9899011850357056,grad_norm: 0.946307472635395, iteration: 220163
loss: 1.318980097770691,grad_norm: 0.9999998686141919, iteration: 220164
loss: 1.0244221687316895,grad_norm: 0.9999998395064954, iteration: 220165
loss: 0.9840390682220459,grad_norm: 0.9108945863611442, iteration: 220166
loss: 1.0027716159820557,grad_norm: 0.8784311001921814, iteration: 220167
loss: 1.071295142173767,grad_norm: 0.9999996793501691, iteration: 220168
loss: 1.0772823095321655,grad_norm: 0.9999998456187517, iteration: 220169
loss: 1.0969325304031372,grad_norm: 0.99999948298696, iteration: 220170
loss: 0.9861389398574829,grad_norm: 0.8528461513721781, iteration: 220171
loss: 1.0123810768127441,grad_norm: 0.9050268506377357, iteration: 220172
loss: 0.9612362384796143,grad_norm: 0.9081904361814226, iteration: 220173
loss: 0.9965341091156006,grad_norm: 0.7683148442958384, iteration: 220174
loss: 0.978707492351532,grad_norm: 0.915710653579886, iteration: 220175
loss: 0.9947166442871094,grad_norm: 0.9159857410720021, iteration: 220176
loss: 1.0051475763320923,grad_norm: 0.9999989044855392, iteration: 220177
loss: 1.0266538858413696,grad_norm: 0.999999119220467, iteration: 220178
loss: 1.0386290550231934,grad_norm: 0.8078670332342974, iteration: 220179
loss: 0.9990404844284058,grad_norm: 0.9733072858512135, iteration: 220180
loss: 1.0071247816085815,grad_norm: 0.9999991813162075, iteration: 220181
loss: 1.0058571100234985,grad_norm: 0.8280546293457495, iteration: 220182
loss: 0.9815809726715088,grad_norm: 0.9197955659143315, iteration: 220183
loss: 0.9802451729774475,grad_norm: 0.9999992820221344, iteration: 220184
loss: 1.0344377756118774,grad_norm: 0.7717323633329823, iteration: 220185
loss: 0.9804178476333618,grad_norm: 0.9280874802876057, iteration: 220186
loss: 1.0052859783172607,grad_norm: 0.9999991457732769, iteration: 220187
loss: 1.03094482421875,grad_norm: 0.9212820457047086, iteration: 220188
loss: 1.025080680847168,grad_norm: 0.999998914381168, iteration: 220189
loss: 1.0053223371505737,grad_norm: 0.9112404091227526, iteration: 220190
loss: 1.0457329750061035,grad_norm: 0.9999991697992615, iteration: 220191
loss: 1.0169432163238525,grad_norm: 0.8913183699118761, iteration: 220192
loss: 1.0335142612457275,grad_norm: 0.9999989291480054, iteration: 220193
loss: 0.9977731704711914,grad_norm: 0.9999991821516769, iteration: 220194
loss: 1.0091477632522583,grad_norm: 0.8357167092664021, iteration: 220195
loss: 0.9871867299079895,grad_norm: 0.819254721356064, iteration: 220196
loss: 0.9661644101142883,grad_norm: 0.858687448969935, iteration: 220197
loss: 0.9920448660850525,grad_norm: 0.9252051037649786, iteration: 220198
loss: 1.002784252166748,grad_norm: 0.7897070208819478, iteration: 220199
loss: 1.0147477388381958,grad_norm: 0.9635971191581952, iteration: 220200
loss: 1.00682532787323,grad_norm: 0.9999991840896462, iteration: 220201
loss: 1.0182340145111084,grad_norm: 0.8294227126615885, iteration: 220202
loss: 0.9862851500511169,grad_norm: 0.9209214278660592, iteration: 220203
loss: 0.9661241769790649,grad_norm: 0.8253626609485086, iteration: 220204
loss: 0.98676997423172,grad_norm: 0.8235087353005834, iteration: 220205
loss: 1.0053540468215942,grad_norm: 0.858983991065433, iteration: 220206
loss: 0.971760630607605,grad_norm: 0.8958381698775207, iteration: 220207
loss: 1.0056476593017578,grad_norm: 0.7974641254125758, iteration: 220208
loss: 0.9863259792327881,grad_norm: 0.9645535101919614, iteration: 220209
loss: 0.9909414649009705,grad_norm: 0.8525909233356709, iteration: 220210
loss: 1.0116513967514038,grad_norm: 0.9774155046064636, iteration: 220211
loss: 1.0029717683792114,grad_norm: 0.8139332189339066, iteration: 220212
loss: 1.0055216550827026,grad_norm: 0.813627898530347, iteration: 220213
loss: 1.0017844438552856,grad_norm: 0.8158311892555944, iteration: 220214
loss: 1.0191099643707275,grad_norm: 0.8678385494045363, iteration: 220215
loss: 0.9932858943939209,grad_norm: 0.9805433825731064, iteration: 220216
loss: 1.040454387664795,grad_norm: 0.8717417036597361, iteration: 220217
loss: 1.0230696201324463,grad_norm: 0.8553815876933807, iteration: 220218
loss: 1.008047103881836,grad_norm: 0.9245228845889747, iteration: 220219
loss: 1.0050404071807861,grad_norm: 0.9410507196025389, iteration: 220220
loss: 0.985268771648407,grad_norm: 0.9655311923939478, iteration: 220221
loss: 1.0121018886566162,grad_norm: 0.8647565435968718, iteration: 220222
loss: 0.978186309337616,grad_norm: 0.8494509139381407, iteration: 220223
loss: 0.9837279915809631,grad_norm: 0.9999989943823141, iteration: 220224
loss: 0.9842700362205505,grad_norm: 0.9359673779985179, iteration: 220225
loss: 1.017833948135376,grad_norm: 0.9999992288889746, iteration: 220226
loss: 0.9989951848983765,grad_norm: 0.940490042719794, iteration: 220227
loss: 1.0072790384292603,grad_norm: 0.928249030415077, iteration: 220228
loss: 0.993476390838623,grad_norm: 0.9416725878537164, iteration: 220229
loss: 0.9971066117286682,grad_norm: 0.9999992983283726, iteration: 220230
loss: 0.9840551018714905,grad_norm: 0.9158602640800222, iteration: 220231
loss: 1.041448950767517,grad_norm: 0.9999991208199377, iteration: 220232
loss: 1.015242576599121,grad_norm: 0.9203266248064493, iteration: 220233
loss: 0.9393577575683594,grad_norm: 0.990994776626783, iteration: 220234
loss: 1.0073294639587402,grad_norm: 0.887701271258694, iteration: 220235
loss: 0.9918817281723022,grad_norm: 0.9822808939492758, iteration: 220236
loss: 0.9808477759361267,grad_norm: 0.8286861995283517, iteration: 220237
loss: 0.9969040751457214,grad_norm: 0.8899418350394402, iteration: 220238
loss: 0.9998415112495422,grad_norm: 0.9000007812345053, iteration: 220239
loss: 0.967754065990448,grad_norm: 0.950753514112783, iteration: 220240
loss: 1.0112757682800293,grad_norm: 0.9999990806770593, iteration: 220241
loss: 0.9971182346343994,grad_norm: 0.8443104099982265, iteration: 220242
loss: 0.9602739214897156,grad_norm: 0.9388425416668579, iteration: 220243
loss: 0.9699281454086304,grad_norm: 0.9521536872018055, iteration: 220244
loss: 1.0251131057739258,grad_norm: 0.8864645451975411, iteration: 220245
loss: 1.0256789922714233,grad_norm: 0.9999999176956683, iteration: 220246
loss: 0.9963330030441284,grad_norm: 0.9999990744839145, iteration: 220247
loss: 1.096082329750061,grad_norm: 0.9999998356319335, iteration: 220248
loss: 1.0157146453857422,grad_norm: 0.9947795569125035, iteration: 220249
loss: 1.0188353061676025,grad_norm: 0.9651721195763314, iteration: 220250
loss: 0.9718225598335266,grad_norm: 0.8906779067448519, iteration: 220251
loss: 0.9938845038414001,grad_norm: 0.9381933725609602, iteration: 220252
loss: 0.9779110550880432,grad_norm: 0.86408635638739, iteration: 220253
loss: 0.9661803841590881,grad_norm: 0.8908547003665979, iteration: 220254
loss: 1.0332682132720947,grad_norm: 0.9861866040755533, iteration: 220255
loss: 1.0247503519058228,grad_norm: 0.7562641777043839, iteration: 220256
loss: 0.9749647378921509,grad_norm: 0.999999188319721, iteration: 220257
loss: 1.002388596534729,grad_norm: 0.9719208372942386, iteration: 220258
loss: 1.0083448886871338,grad_norm: 0.9999992005058829, iteration: 220259
loss: 0.9684544801712036,grad_norm: 0.9999994529209499, iteration: 220260
loss: 0.9822990298271179,grad_norm: 0.9231868472248476, iteration: 220261
loss: 0.9972463250160217,grad_norm: 0.9596897562274018, iteration: 220262
loss: 0.9759340882301331,grad_norm: 0.9220398620990982, iteration: 220263
loss: 0.9979966878890991,grad_norm: 0.7266377457548253, iteration: 220264
loss: 1.0099389553070068,grad_norm: 0.9386366625483377, iteration: 220265
loss: 1.0107340812683105,grad_norm: 0.8117284184720611, iteration: 220266
loss: 0.9435646533966064,grad_norm: 0.99999902715418, iteration: 220267
loss: 0.9748859405517578,grad_norm: 0.8817669670989996, iteration: 220268
loss: 1.0210044384002686,grad_norm: 0.8660398143366904, iteration: 220269
loss: 0.9985553026199341,grad_norm: 0.8219374801922339, iteration: 220270
loss: 0.9788355231285095,grad_norm: 0.8167376800089315, iteration: 220271
loss: 0.9984171390533447,grad_norm: 0.9999991015148998, iteration: 220272
loss: 0.992257833480835,grad_norm: 0.9305290940749256, iteration: 220273
loss: 1.0236796140670776,grad_norm: 0.8881078575618935, iteration: 220274
loss: 0.9820166230201721,grad_norm: 0.8999579427661468, iteration: 220275
loss: 0.9919874668121338,grad_norm: 0.8947365130030631, iteration: 220276
loss: 1.0004884004592896,grad_norm: 0.9999991283486587, iteration: 220277
loss: 1.0005255937576294,grad_norm: 0.788997731865857, iteration: 220278
loss: 1.007865071296692,grad_norm: 0.8622660159864758, iteration: 220279
loss: 1.0092393159866333,grad_norm: 0.9999991910351362, iteration: 220280
loss: 0.9796993136405945,grad_norm: 0.8323511739280695, iteration: 220281
loss: 1.0208815336227417,grad_norm: 0.9999997797139412, iteration: 220282
loss: 0.9688848257064819,grad_norm: 0.848170989586825, iteration: 220283
loss: 0.988797128200531,grad_norm: 0.8346233196566855, iteration: 220284
loss: 0.9789944291114807,grad_norm: 0.6946222329380909, iteration: 220285
loss: 1.0335123538970947,grad_norm: 0.9999990413305604, iteration: 220286
loss: 1.005460500717163,grad_norm: 0.9999991339536758, iteration: 220287
loss: 0.981911301612854,grad_norm: 0.8549744075679158, iteration: 220288
loss: 0.9895609021186829,grad_norm: 0.9999990584957019, iteration: 220289
loss: 1.000046968460083,grad_norm: 0.8810800841238864, iteration: 220290
loss: 1.1543325185775757,grad_norm: 0.9999991033260017, iteration: 220291
loss: 1.0153460502624512,grad_norm: 0.8919344710465713, iteration: 220292
loss: 0.9799724221229553,grad_norm: 0.927596546708858, iteration: 220293
loss: 1.0022807121276855,grad_norm: 0.9421443278981416, iteration: 220294
loss: 0.972771167755127,grad_norm: 0.9999991351925487, iteration: 220295
loss: 0.9854543209075928,grad_norm: 0.9999992924015532, iteration: 220296
loss: 0.9599046111106873,grad_norm: 0.8902753711489992, iteration: 220297
loss: 0.9760594964027405,grad_norm: 0.8166066370020796, iteration: 220298
loss: 1.0062514543533325,grad_norm: 0.7569596561383486, iteration: 220299
loss: 1.0421786308288574,grad_norm: 0.9999990756697549, iteration: 220300
loss: 1.001352071762085,grad_norm: 0.9999990397185372, iteration: 220301
loss: 0.9533814191818237,grad_norm: 0.9892532005879707, iteration: 220302
loss: 1.0007926225662231,grad_norm: 0.9997096512380175, iteration: 220303
loss: 1.0123958587646484,grad_norm: 0.9789461379995997, iteration: 220304
loss: 0.9878118634223938,grad_norm: 0.8635719854480894, iteration: 220305
loss: 1.0364983081817627,grad_norm: 0.9999991302388257, iteration: 220306
loss: 0.9842163920402527,grad_norm: 0.9999992751594375, iteration: 220307
loss: 0.9823038578033447,grad_norm: 0.9468921937982775, iteration: 220308
loss: 1.0186975002288818,grad_norm: 0.9999992417726881, iteration: 220309
loss: 0.9964852333068848,grad_norm: 0.819002254129459, iteration: 220310
loss: 0.9836954474449158,grad_norm: 0.7259929061684308, iteration: 220311
loss: 0.9979248046875,grad_norm: 0.9999993291069994, iteration: 220312
loss: 1.013392448425293,grad_norm: 0.9205557262069357, iteration: 220313
loss: 0.9986280202865601,grad_norm: 0.9813675360915152, iteration: 220314
loss: 0.991046130657196,grad_norm: 0.989681139729111, iteration: 220315
loss: 1.0215331315994263,grad_norm: 0.950705436289619, iteration: 220316
loss: 0.9843059778213501,grad_norm: 0.9999992673779943, iteration: 220317
loss: 1.0516705513000488,grad_norm: 0.8748865442007925, iteration: 220318
loss: 0.9857438802719116,grad_norm: 0.979505519890023, iteration: 220319
loss: 1.0065442323684692,grad_norm: 0.9285934085955212, iteration: 220320
loss: 1.0060269832611084,grad_norm: 0.9999990365407273, iteration: 220321
loss: 0.983353316783905,grad_norm: 0.9317925935541846, iteration: 220322
loss: 0.9686282873153687,grad_norm: 0.8501872763652482, iteration: 220323
loss: 1.0370968580245972,grad_norm: 0.9531345519902461, iteration: 220324
loss: 0.9905976057052612,grad_norm: 0.8994943440925797, iteration: 220325
loss: 1.008259654045105,grad_norm: 0.8330468230828588, iteration: 220326
loss: 1.0462571382522583,grad_norm: 0.9920083142722177, iteration: 220327
loss: 1.0347838401794434,grad_norm: 0.8910594404512454, iteration: 220328
loss: 0.9854562282562256,grad_norm: 0.9568937697618917, iteration: 220329
loss: 1.0287513732910156,grad_norm: 0.9999991278722024, iteration: 220330
loss: 1.0209201574325562,grad_norm: 0.8808585411991782, iteration: 220331
loss: 0.9857072830200195,grad_norm: 0.9999991772930034, iteration: 220332
loss: 0.9688779711723328,grad_norm: 0.9148001600715097, iteration: 220333
loss: 1.0024027824401855,grad_norm: 0.9999991538071176, iteration: 220334
loss: 0.9908886551856995,grad_norm: 0.9677351569099044, iteration: 220335
loss: 0.9640927314758301,grad_norm: 0.9999990572837882, iteration: 220336
loss: 0.9966093897819519,grad_norm: 0.8085681211049355, iteration: 220337
loss: 0.9922983646392822,grad_norm: 0.9106500657808599, iteration: 220338
loss: 0.9866234660148621,grad_norm: 0.8119739087206725, iteration: 220339
loss: 0.9790390729904175,grad_norm: 0.8178269389942483, iteration: 220340
loss: 0.9839004874229431,grad_norm: 0.8484937931809707, iteration: 220341
loss: 1.0725010633468628,grad_norm: 0.9999991162791462, iteration: 220342
loss: 1.027069091796875,grad_norm: 0.9999992786815528, iteration: 220343
loss: 0.993478000164032,grad_norm: 0.8141432791997671, iteration: 220344
loss: 0.9956571459770203,grad_norm: 0.9999997462005761, iteration: 220345
loss: 0.9951446652412415,grad_norm: 0.8347190237077313, iteration: 220346
loss: 0.9879950284957886,grad_norm: 0.8670484472346769, iteration: 220347
loss: 0.9915309548377991,grad_norm: 0.9459360798369695, iteration: 220348
loss: 1.0246845483779907,grad_norm: 0.9999991600067608, iteration: 220349
loss: 0.9937140941619873,grad_norm: 0.8979411233028429, iteration: 220350
loss: 1.063050389289856,grad_norm: 0.9479675455456259, iteration: 220351
loss: 0.97211092710495,grad_norm: 0.9591690545127229, iteration: 220352
loss: 1.0085455179214478,grad_norm: 0.9831066303691929, iteration: 220353
loss: 0.9620223045349121,grad_norm: 0.9711955862739736, iteration: 220354
loss: 0.9951809048652649,grad_norm: 0.7723619540866961, iteration: 220355
loss: 1.0265088081359863,grad_norm: 0.9347006268020436, iteration: 220356
loss: 1.0074801445007324,grad_norm: 0.8062030052581113, iteration: 220357
loss: 0.996249794960022,grad_norm: 0.9999992721152362, iteration: 220358
loss: 1.0250146389007568,grad_norm: 0.9177983232822516, iteration: 220359
loss: 0.9688928723335266,grad_norm: 0.9467076752935528, iteration: 220360
loss: 1.0299811363220215,grad_norm: 0.9999990419152425, iteration: 220361
loss: 1.0052891969680786,grad_norm: 0.9183818174287031, iteration: 220362
loss: 1.046427845954895,grad_norm: 0.9179689116719358, iteration: 220363
loss: 1.02078115940094,grad_norm: 0.9264503238132161, iteration: 220364
loss: 0.973845899105072,grad_norm: 0.8626287912857463, iteration: 220365
loss: 1.0309513807296753,grad_norm: 0.9999992133414103, iteration: 220366
loss: 1.0116007328033447,grad_norm: 0.8805014036760721, iteration: 220367
loss: 0.9956191182136536,grad_norm: 0.8393826527994745, iteration: 220368
loss: 0.9959293603897095,grad_norm: 0.9999995565957666, iteration: 220369
loss: 0.9664831161499023,grad_norm: 0.8460780626329976, iteration: 220370
loss: 0.9776163697242737,grad_norm: 0.8279822469931117, iteration: 220371
loss: 1.0014086961746216,grad_norm: 0.9243737841030696, iteration: 220372
loss: 0.9947782158851624,grad_norm: 0.9999989637384362, iteration: 220373
loss: 1.0024765729904175,grad_norm: 0.9942518037332166, iteration: 220374
loss: 0.9772692918777466,grad_norm: 0.9999991347145145, iteration: 220375
loss: 0.9835097193717957,grad_norm: 0.9999991620424256, iteration: 220376
loss: 1.0140129327774048,grad_norm: 0.8023620925000443, iteration: 220377
loss: 1.0191376209259033,grad_norm: 0.9999991029326579, iteration: 220378
loss: 0.9679670333862305,grad_norm: 0.8722175659741613, iteration: 220379
loss: 1.017937421798706,grad_norm: 0.8481384808851193, iteration: 220380
loss: 0.9976291060447693,grad_norm: 0.9605466000280634, iteration: 220381
loss: 0.9625871181488037,grad_norm: 0.8558829993949965, iteration: 220382
loss: 0.9903027415275574,grad_norm: 0.9072981620500224, iteration: 220383
loss: 1.0231810808181763,grad_norm: 0.9999991564432169, iteration: 220384
loss: 1.0323939323425293,grad_norm: 0.8834351681382355, iteration: 220385
loss: 1.1153745651245117,grad_norm: 0.9999992094926686, iteration: 220386
loss: 1.0796318054199219,grad_norm: 0.946033623525889, iteration: 220387
loss: 0.9989528656005859,grad_norm: 0.924838529292391, iteration: 220388
loss: 1.013595700263977,grad_norm: 0.9999990625212046, iteration: 220389
loss: 1.0011622905731201,grad_norm: 0.8148641937357692, iteration: 220390
loss: 1.0057612657546997,grad_norm: 0.9999992453195766, iteration: 220391
loss: 0.9764975905418396,grad_norm: 0.9717065368550734, iteration: 220392
loss: 1.0287941694259644,grad_norm: 0.9999991451903665, iteration: 220393
loss: 1.0204839706420898,grad_norm: 0.905963292660006, iteration: 220394
loss: 0.9847891330718994,grad_norm: 0.9617113742668897, iteration: 220395
loss: 1.036970615386963,grad_norm: 0.8782573992171759, iteration: 220396
loss: 1.0017398595809937,grad_norm: 0.9636201419011069, iteration: 220397
loss: 0.9953894019126892,grad_norm: 0.9154633328205651, iteration: 220398
loss: 0.9720296859741211,grad_norm: 0.9604177535461476, iteration: 220399
loss: 1.0355569124221802,grad_norm: 0.9555152398130292, iteration: 220400
loss: 0.9805867075920105,grad_norm: 0.8610486952885835, iteration: 220401
loss: 1.0111342668533325,grad_norm: 0.9862231776025214, iteration: 220402
loss: 0.9809718132019043,grad_norm: 0.9978948984484131, iteration: 220403
loss: 0.9997645616531372,grad_norm: 0.9526684186643742, iteration: 220404
loss: 1.0318713188171387,grad_norm: 0.9999997256493003, iteration: 220405
loss: 1.027589201927185,grad_norm: 0.9999990834191395, iteration: 220406
loss: 1.0000063180923462,grad_norm: 0.9232624577813126, iteration: 220407
loss: 1.0364086627960205,grad_norm: 0.9918325460599022, iteration: 220408
loss: 1.011387586593628,grad_norm: 0.8612037576876368, iteration: 220409
loss: 0.9829723238945007,grad_norm: 0.9999991686178076, iteration: 220410
loss: 0.9873853325843811,grad_norm: 0.9344724069328291, iteration: 220411
loss: 0.9495171904563904,grad_norm: 0.9771799307904268, iteration: 220412
loss: 0.996951162815094,grad_norm: 0.9560431393513499, iteration: 220413
loss: 1.0148897171020508,grad_norm: 0.7005226749507381, iteration: 220414
loss: 1.029401183128357,grad_norm: 0.8518519529094423, iteration: 220415
loss: 1.0181626081466675,grad_norm: 0.884513967962295, iteration: 220416
loss: 1.0134594440460205,grad_norm: 0.80201050266024, iteration: 220417
loss: 0.9815724492073059,grad_norm: 0.7912801147978098, iteration: 220418
loss: 1.00991690158844,grad_norm: 0.8895364422283012, iteration: 220419
loss: 0.9542918801307678,grad_norm: 0.92987314353206, iteration: 220420
loss: 1.0283600091934204,grad_norm: 0.8759650393695139, iteration: 220421
loss: 1.016076922416687,grad_norm: 0.9968058105313575, iteration: 220422
loss: 1.0072886943817139,grad_norm: 0.9999991261687099, iteration: 220423
loss: 0.9916426539421082,grad_norm: 0.9931784318469565, iteration: 220424
loss: 0.9820694923400879,grad_norm: 0.97661474106661, iteration: 220425
loss: 0.9881621599197388,grad_norm: 0.9999990887194177, iteration: 220426
loss: 1.026759147644043,grad_norm: 0.9518739968827139, iteration: 220427
loss: 1.0224902629852295,grad_norm: 0.9999990752167721, iteration: 220428
loss: 1.0074639320373535,grad_norm: 0.8245345973115233, iteration: 220429
loss: 1.0348782539367676,grad_norm: 0.858652015947825, iteration: 220430
loss: 1.018019199371338,grad_norm: 0.8019081135754671, iteration: 220431
loss: 1.0018398761749268,grad_norm: 0.9999991925939693, iteration: 220432
loss: 1.0188969373703003,grad_norm: 0.9332074581536759, iteration: 220433
loss: 0.9794190526008606,grad_norm: 0.9611284074883445, iteration: 220434
loss: 1.017959713935852,grad_norm: 0.9105085887570877, iteration: 220435
loss: 1.0246951580047607,grad_norm: 0.8490798982671465, iteration: 220436
loss: 0.9690929651260376,grad_norm: 0.9999992315807271, iteration: 220437
loss: 1.0057545900344849,grad_norm: 0.8059817210860887, iteration: 220438
loss: 1.007108211517334,grad_norm: 0.9269950429910437, iteration: 220439
loss: 1.0046992301940918,grad_norm: 0.9999998269337262, iteration: 220440
loss: 1.0088127851486206,grad_norm: 0.9712638550329351, iteration: 220441
loss: 1.026808500289917,grad_norm: 0.9511262582976192, iteration: 220442
loss: 0.9635758996009827,grad_norm: 0.9999992853876059, iteration: 220443
loss: 0.9923136234283447,grad_norm: 0.9364760430007384, iteration: 220444
loss: 1.0235884189605713,grad_norm: 0.9477427485752763, iteration: 220445
loss: 1.0020724534988403,grad_norm: 0.7354539278521186, iteration: 220446
loss: 1.0086274147033691,grad_norm: 0.9999990116037086, iteration: 220447
loss: 1.0220438241958618,grad_norm: 0.9557796317655782, iteration: 220448
loss: 1.0275883674621582,grad_norm: 0.865688194535983, iteration: 220449
loss: 0.9731970429420471,grad_norm: 0.9031994703350384, iteration: 220450
loss: 1.1951580047607422,grad_norm: 0.9544875986078482, iteration: 220451
loss: 0.9826782941818237,grad_norm: 0.9115447079266064, iteration: 220452
loss: 0.9921730756759644,grad_norm: 0.8115945668004158, iteration: 220453
loss: 0.9738867282867432,grad_norm: 0.9033868880940236, iteration: 220454
loss: 1.011130690574646,grad_norm: 0.9999991073915978, iteration: 220455
loss: 1.0083545446395874,grad_norm: 0.9999993586880814, iteration: 220456
loss: 0.968228280544281,grad_norm: 0.9749298783143512, iteration: 220457
loss: 0.9819194078445435,grad_norm: 0.880811421076961, iteration: 220458
loss: 1.041219711303711,grad_norm: 0.9999990959234992, iteration: 220459
loss: 1.003420114517212,grad_norm: 0.9762947810041562, iteration: 220460
loss: 0.9724074602127075,grad_norm: 0.9201130964012961, iteration: 220461
loss: 1.0017423629760742,grad_norm: 0.8539982205851352, iteration: 220462
loss: 1.0124591588974,grad_norm: 0.8516576297052583, iteration: 220463
loss: 1.0204195976257324,grad_norm: 0.905909646550492, iteration: 220464
loss: 0.9602833390235901,grad_norm: 0.9816046721399653, iteration: 220465
loss: 1.0268391370773315,grad_norm: 0.9999990232156044, iteration: 220466
loss: 1.0002143383026123,grad_norm: 0.9864904625069272, iteration: 220467
loss: 0.9670088291168213,grad_norm: 0.8840853413877301, iteration: 220468
loss: 1.0006797313690186,grad_norm: 0.9999992346915342, iteration: 220469
loss: 1.0197752714157104,grad_norm: 0.9024192525773495, iteration: 220470
loss: 1.038225531578064,grad_norm: 0.893462296583866, iteration: 220471
loss: 1.045730471611023,grad_norm: 0.9999995625130815, iteration: 220472
loss: 0.9819962382316589,grad_norm: 0.9999990949338914, iteration: 220473
loss: 0.9840896725654602,grad_norm: 0.9155321934324179, iteration: 220474
loss: 0.973162829875946,grad_norm: 0.8735023162439989, iteration: 220475
loss: 1.0146340131759644,grad_norm: 0.9999991594922663, iteration: 220476
loss: 0.9908930659294128,grad_norm: 0.767166038770595, iteration: 220477
loss: 0.9930405616760254,grad_norm: 0.955161705893925, iteration: 220478
loss: 0.9995303750038147,grad_norm: 0.967732521051557, iteration: 220479
loss: 0.9833977222442627,grad_norm: 0.8776553294308896, iteration: 220480
loss: 1.0169203281402588,grad_norm: 0.9323401004495412, iteration: 220481
loss: 0.9441215395927429,grad_norm: 0.8336203936755922, iteration: 220482
loss: 1.00555419921875,grad_norm: 0.9855861593109484, iteration: 220483
loss: 1.0266693830490112,grad_norm: 0.9039154003512163, iteration: 220484
loss: 0.9659256339073181,grad_norm: 0.9032285468294536, iteration: 220485
loss: 1.007764220237732,grad_norm: 0.9033882490625971, iteration: 220486
loss: 1.0315378904342651,grad_norm: 0.9135261086230517, iteration: 220487
loss: 0.9567383527755737,grad_norm: 0.8769515247613543, iteration: 220488
loss: 1.0447964668273926,grad_norm: 0.9999990125013518, iteration: 220489
loss: 0.9875117540359497,grad_norm: 0.9655697708652716, iteration: 220490
loss: 1.0164587497711182,grad_norm: 0.9999997774118937, iteration: 220491
loss: 0.9611577987670898,grad_norm: 0.8423426947309508, iteration: 220492
loss: 0.9954867959022522,grad_norm: 0.8568481971693794, iteration: 220493
loss: 0.9699844717979431,grad_norm: 0.9423448420205414, iteration: 220494
loss: 1.0221879482269287,grad_norm: 0.9631164978532188, iteration: 220495
loss: 0.9970884919166565,grad_norm: 0.8922383252802146, iteration: 220496
loss: 0.9779207110404968,grad_norm: 0.9683169158875227, iteration: 220497
loss: 1.01505708694458,grad_norm: 0.8699724540550406, iteration: 220498
loss: 1.006662130355835,grad_norm: 0.9999991848458765, iteration: 220499
loss: 0.9973618984222412,grad_norm: 0.8495635128142287, iteration: 220500
loss: 1.0212187767028809,grad_norm: 0.9861721107239813, iteration: 220501
loss: 0.9905845522880554,grad_norm: 0.9999992051149202, iteration: 220502
loss: 1.013243556022644,grad_norm: 0.8441532967333565, iteration: 220503
loss: 1.0145800113677979,grad_norm: 0.9443751043152268, iteration: 220504
loss: 1.0089516639709473,grad_norm: 0.9244658623580088, iteration: 220505
loss: 0.9473103880882263,grad_norm: 0.8186416051309773, iteration: 220506
loss: 1.070239782333374,grad_norm: 0.9728944082867426, iteration: 220507
loss: 1.0487254858016968,grad_norm: 0.9355200960109941, iteration: 220508
loss: 1.0170146226882935,grad_norm: 0.8948799961281907, iteration: 220509
loss: 1.0123353004455566,grad_norm: 0.6784722371057604, iteration: 220510
loss: 0.9755531549453735,grad_norm: 0.9178022786836751, iteration: 220511
loss: 1.022865653038025,grad_norm: 0.9999995519800303, iteration: 220512
loss: 1.0145119428634644,grad_norm: 0.8038340707886981, iteration: 220513
loss: 0.9896987080574036,grad_norm: 0.9469945667265358, iteration: 220514
loss: 0.9920686483383179,grad_norm: 0.8719193517541611, iteration: 220515
loss: 1.002866506576538,grad_norm: 0.9182137930796938, iteration: 220516
loss: 1.0311415195465088,grad_norm: 0.8214477313491639, iteration: 220517
loss: 0.9892224669456482,grad_norm: 0.9999990641791612, iteration: 220518
loss: 1.0142897367477417,grad_norm: 0.8790375956456743, iteration: 220519
loss: 1.0024139881134033,grad_norm: 0.9558843755800923, iteration: 220520
loss: 0.9855158925056458,grad_norm: 0.7589912953472427, iteration: 220521
loss: 0.9942883253097534,grad_norm: 0.9402507128751243, iteration: 220522
loss: 1.0116652250289917,grad_norm: 0.7908020193454115, iteration: 220523
loss: 1.024724006652832,grad_norm: 0.9999989632368763, iteration: 220524
loss: 0.9796087741851807,grad_norm: 0.9197598632875691, iteration: 220525
loss: 1.024802565574646,grad_norm: 0.951884368397625, iteration: 220526
loss: 0.9551512598991394,grad_norm: 0.9183526731352727, iteration: 220527
loss: 0.967312216758728,grad_norm: 0.8954717115864025, iteration: 220528
loss: 1.0164731740951538,grad_norm: 0.8865078618979566, iteration: 220529
loss: 1.0362184047698975,grad_norm: 0.9999990687412182, iteration: 220530
loss: 0.990814208984375,grad_norm: 0.9999991933360131, iteration: 220531
loss: 0.9951295852661133,grad_norm: 0.9680349995739811, iteration: 220532
loss: 0.9749271273612976,grad_norm: 0.8807412949753832, iteration: 220533
loss: 0.9818918704986572,grad_norm: 0.9275728148314434, iteration: 220534
loss: 0.9809840321540833,grad_norm: 0.836499773157924, iteration: 220535
loss: 1.028451681137085,grad_norm: 0.9244548191795948, iteration: 220536
loss: 0.9737297892570496,grad_norm: 0.9999991556105273, iteration: 220537
loss: 1.009061336517334,grad_norm: 0.9072750049568403, iteration: 220538
loss: 1.0293043851852417,grad_norm: 0.9999990525518191, iteration: 220539
loss: 1.0170942544937134,grad_norm: 0.9999989089885608, iteration: 220540
loss: 1.012549877166748,grad_norm: 0.9450567825288727, iteration: 220541
loss: 0.9992388486862183,grad_norm: 0.877810773976766, iteration: 220542
loss: 0.9771991968154907,grad_norm: 0.9309454782625899, iteration: 220543
loss: 1.0064241886138916,grad_norm: 0.8543306311047878, iteration: 220544
loss: 1.0020908117294312,grad_norm: 0.9999993263599886, iteration: 220545
loss: 0.9865773320198059,grad_norm: 0.9109767701684807, iteration: 220546
loss: 0.9376702308654785,grad_norm: 0.891298972724683, iteration: 220547
loss: 0.978187620639801,grad_norm: 0.857396883295403, iteration: 220548
loss: 1.0144466161727905,grad_norm: 0.9999991132090144, iteration: 220549
loss: 1.0035996437072754,grad_norm: 0.9999991404625211, iteration: 220550
loss: 1.040324330329895,grad_norm: 0.8990716953071368, iteration: 220551
loss: 0.989086925983429,grad_norm: 0.999999023366138, iteration: 220552
loss: 0.970123827457428,grad_norm: 0.951817794879658, iteration: 220553
loss: 0.9793543815612793,grad_norm: 0.9035377653744006, iteration: 220554
loss: 0.9804366230964661,grad_norm: 0.8201073146574396, iteration: 220555
loss: 1.0328277349472046,grad_norm: 0.9999992481670261, iteration: 220556
loss: 0.9890212416648865,grad_norm: 0.8317795147132206, iteration: 220557
loss: 0.9444950222969055,grad_norm: 0.9085528754043899, iteration: 220558
loss: 0.9968769550323486,grad_norm: 0.849068898080457, iteration: 220559
loss: 0.9935954213142395,grad_norm: 0.8710621086487063, iteration: 220560
loss: 0.9962772130966187,grad_norm: 0.8997643980175226, iteration: 220561
loss: 0.9873808026313782,grad_norm: 0.9562102777090654, iteration: 220562
loss: 0.9543874859809875,grad_norm: 0.9999991445337529, iteration: 220563
loss: 1.0077544450759888,grad_norm: 0.9809572273439253, iteration: 220564
loss: 1.0010604858398438,grad_norm: 0.9821146977673602, iteration: 220565
loss: 0.9988659024238586,grad_norm: 0.8167260696813227, iteration: 220566
loss: 0.9892694354057312,grad_norm: 0.8923143460251218, iteration: 220567
loss: 0.954079270362854,grad_norm: 0.9397258754105022, iteration: 220568
loss: 1.037384033203125,grad_norm: 0.9164516315718256, iteration: 220569
loss: 1.0103455781936646,grad_norm: 0.9421363840815478, iteration: 220570
loss: 0.9829609990119934,grad_norm: 0.826462583466018, iteration: 220571
loss: 1.0139844417572021,grad_norm: 0.9386927862959606, iteration: 220572
loss: 0.9876691102981567,grad_norm: 0.9087366896539684, iteration: 220573
loss: 0.9909486174583435,grad_norm: 0.9806298162573005, iteration: 220574
loss: 1.002568244934082,grad_norm: 0.8538241816060195, iteration: 220575
loss: 0.9978792071342468,grad_norm: 0.9112263354520882, iteration: 220576
loss: 0.9815889000892639,grad_norm: 0.9999990383785771, iteration: 220577
loss: 0.9848961234092712,grad_norm: 0.9935726057509114, iteration: 220578
loss: 1.0189900398254395,grad_norm: 0.8137882703573899, iteration: 220579
loss: 1.0056029558181763,grad_norm: 0.968110138209468, iteration: 220580
loss: 0.9846274852752686,grad_norm: 0.9999990811118604, iteration: 220581
loss: 1.011169672012329,grad_norm: 0.7760605464136079, iteration: 220582
loss: 1.009140133857727,grad_norm: 0.8689556408595073, iteration: 220583
loss: 1.014299988746643,grad_norm: 0.9142650309496986, iteration: 220584
loss: 0.9550337195396423,grad_norm: 0.999999091578879, iteration: 220585
loss: 0.9966568946838379,grad_norm: 0.8012164772355929, iteration: 220586
loss: 1.0211261510849,grad_norm: 0.8949678455912023, iteration: 220587
loss: 1.010293960571289,grad_norm: 0.780172498819269, iteration: 220588
loss: 1.0162999629974365,grad_norm: 0.9999990953454384, iteration: 220589
loss: 1.0287024974822998,grad_norm: 0.9806160833142038, iteration: 220590
loss: 0.9939982891082764,grad_norm: 0.9999990270932654, iteration: 220591
loss: 1.0073139667510986,grad_norm: 0.8978298051521795, iteration: 220592
loss: 1.0004500150680542,grad_norm: 0.7954545624365171, iteration: 220593
loss: 0.9878332614898682,grad_norm: 0.8877640481055964, iteration: 220594
loss: 0.9589462876319885,grad_norm: 0.8539408671045446, iteration: 220595
loss: 0.967756986618042,grad_norm: 0.8326148711621874, iteration: 220596
loss: 0.9365395903587341,grad_norm: 0.9538092360208807, iteration: 220597
loss: 0.999556839466095,grad_norm: 0.8569869622255332, iteration: 220598
loss: 0.9982948303222656,grad_norm: 0.9398641071622085, iteration: 220599
loss: 1.0380843877792358,grad_norm: 0.985974214930909, iteration: 220600
loss: 0.9754350185394287,grad_norm: 0.8497057198336688, iteration: 220601
loss: 0.9689046740531921,grad_norm: 0.7891543957715613, iteration: 220602
loss: 0.9826074838638306,grad_norm: 0.9389082757551094, iteration: 220603
loss: 1.0206894874572754,grad_norm: 0.7793964124297001, iteration: 220604
loss: 1.0175833702087402,grad_norm: 0.8544384091184736, iteration: 220605
loss: 1.0176708698272705,grad_norm: 0.7583085458699301, iteration: 220606
loss: 0.995910108089447,grad_norm: 0.999999268262605, iteration: 220607
loss: 1.0064899921417236,grad_norm: 0.8544492761954746, iteration: 220608
loss: 0.9794893264770508,grad_norm: 0.8228991388453324, iteration: 220609
loss: 0.9672976732254028,grad_norm: 0.9163263679306932, iteration: 220610
loss: 1.00230872631073,grad_norm: 0.9112397090937967, iteration: 220611
loss: 0.9518548250198364,grad_norm: 0.9999990126079372, iteration: 220612
loss: 1.0168567895889282,grad_norm: 0.9051223934225548, iteration: 220613
loss: 1.012526512145996,grad_norm: 0.9786208865965618, iteration: 220614
loss: 1.0050864219665527,grad_norm: 0.8993410146509005, iteration: 220615
loss: 1.0207394361495972,grad_norm: 0.8670583447313509, iteration: 220616
loss: 1.0178228616714478,grad_norm: 0.9167687055898065, iteration: 220617
loss: 0.9834267497062683,grad_norm: 0.9999992066052651, iteration: 220618
loss: 0.9917171001434326,grad_norm: 0.9854209017539628, iteration: 220619
loss: 0.9613269567489624,grad_norm: 0.9999989928746478, iteration: 220620
loss: 0.9383280277252197,grad_norm: 0.9004429930531189, iteration: 220621
loss: 1.001948356628418,grad_norm: 0.9901051937941879, iteration: 220622
loss: 1.0845155715942383,grad_norm: 0.9999991775124639, iteration: 220623
loss: 1.018572449684143,grad_norm: 0.9346270750729213, iteration: 220624
loss: 1.0200597047805786,grad_norm: 0.8203616060357434, iteration: 220625
loss: 1.023971438407898,grad_norm: 0.9999991428402154, iteration: 220626
loss: 1.0009733438491821,grad_norm: 0.8456789892600205, iteration: 220627
loss: 0.9371118545532227,grad_norm: 0.9931962212911413, iteration: 220628
loss: 0.9903973340988159,grad_norm: 0.9406894019866018, iteration: 220629
loss: 1.0240867137908936,grad_norm: 0.8997308526758859, iteration: 220630
loss: 1.0135055780410767,grad_norm: 0.9999990072153524, iteration: 220631
loss: 0.9976228475570679,grad_norm: 0.9999990655993148, iteration: 220632
loss: 0.9860137104988098,grad_norm: 0.9999991632118458, iteration: 220633
loss: 0.986352264881134,grad_norm: 0.9999998369566934, iteration: 220634
loss: 0.9807883501052856,grad_norm: 0.8634098565413126, iteration: 220635
loss: 1.0460857152938843,grad_norm: 0.9783796994978845, iteration: 220636
loss: 0.9786795377731323,grad_norm: 0.9691644048866769, iteration: 220637
loss: 0.9714860320091248,grad_norm: 0.922208786644751, iteration: 220638
loss: 0.9899356365203857,grad_norm: 0.9999991476548084, iteration: 220639
loss: 0.9949370622634888,grad_norm: 0.9022918326145544, iteration: 220640
loss: 1.0202864408493042,grad_norm: 0.9999990643430932, iteration: 220641
loss: 0.9931637644767761,grad_norm: 0.7564123746656906, iteration: 220642
loss: 1.0052381753921509,grad_norm: 0.9837209566847546, iteration: 220643
loss: 1.0249090194702148,grad_norm: 0.9999990635006011, iteration: 220644
loss: 0.979840874671936,grad_norm: 0.8149360738165335, iteration: 220645
loss: 1.0433906316757202,grad_norm: 0.9999993552236678, iteration: 220646
loss: 1.0103511810302734,grad_norm: 0.9999991220082278, iteration: 220647
loss: 0.9915987849235535,grad_norm: 0.9955431099118783, iteration: 220648
loss: 0.9779903888702393,grad_norm: 0.9629468105277286, iteration: 220649
loss: 1.0017726421356201,grad_norm: 0.9999989769789811, iteration: 220650
loss: 1.1331589221954346,grad_norm: 0.9999991431486104, iteration: 220651
loss: 0.9977887272834778,grad_norm: 0.959967674661288, iteration: 220652
loss: 1.0172470808029175,grad_norm: 0.8654866472620107, iteration: 220653
loss: 1.0210676193237305,grad_norm: 0.8732504529123006, iteration: 220654
loss: 1.0524940490722656,grad_norm: 0.9999992688956755, iteration: 220655
loss: 1.0081754922866821,grad_norm: 0.8889896727206723, iteration: 220656
loss: 1.0954076051712036,grad_norm: 0.9999992558249804, iteration: 220657
loss: 1.0464167594909668,grad_norm: 0.9082298447888218, iteration: 220658
loss: 1.0383508205413818,grad_norm: 0.9999991790470957, iteration: 220659
loss: 1.0238949060440063,grad_norm: 0.9392839323691683, iteration: 220660
loss: 1.058469295501709,grad_norm: 0.9999995587782476, iteration: 220661
loss: 1.0127227306365967,grad_norm: 0.9701855557483589, iteration: 220662
loss: 1.1119747161865234,grad_norm: 0.999999130925053, iteration: 220663
loss: 0.9924342632293701,grad_norm: 0.9481396852683142, iteration: 220664
loss: 0.9801791906356812,grad_norm: 0.9854699598710123, iteration: 220665
loss: 1.0095525979995728,grad_norm: 0.9825628495711776, iteration: 220666
loss: 1.0115097761154175,grad_norm: 0.9999989106404293, iteration: 220667
loss: 1.0112541913986206,grad_norm: 0.9873588469172168, iteration: 220668
loss: 1.000217080116272,grad_norm: 0.9348214388035212, iteration: 220669
loss: 0.9902153611183167,grad_norm: 0.7673520064304469, iteration: 220670
loss: 0.9635870456695557,grad_norm: 0.9145377446790054, iteration: 220671
loss: 1.0013669729232788,grad_norm: 0.893015647735608, iteration: 220672
loss: 0.9953426718711853,grad_norm: 0.8353173839876765, iteration: 220673
loss: 1.0240532159805298,grad_norm: 0.9126465002209679, iteration: 220674
loss: 1.0288282632827759,grad_norm: 0.9962996393296539, iteration: 220675
loss: 1.0441851615905762,grad_norm: 0.9999993947235201, iteration: 220676
loss: 0.9788643717765808,grad_norm: 0.9614940478851584, iteration: 220677
loss: 0.9875578284263611,grad_norm: 0.9999992548767503, iteration: 220678
loss: 1.1349129676818848,grad_norm: 0.9999994043315743, iteration: 220679
loss: 0.9907842874526978,grad_norm: 0.9999993903860465, iteration: 220680
loss: 0.9960551857948303,grad_norm: 0.8938134612995282, iteration: 220681
loss: 1.0871574878692627,grad_norm: 0.9999994612460013, iteration: 220682
loss: 1.000106930732727,grad_norm: 0.9347162799216024, iteration: 220683
loss: 0.9851406216621399,grad_norm: 0.8849776554170186, iteration: 220684
loss: 0.9960744380950928,grad_norm: 0.9699806944428933, iteration: 220685
loss: 0.9947549700737,grad_norm: 0.9291063380781228, iteration: 220686
loss: 0.9904343485832214,grad_norm: 0.9698561449503464, iteration: 220687
loss: 1.0180730819702148,grad_norm: 0.9216051011157302, iteration: 220688
loss: 1.023171305656433,grad_norm: 0.9999993690736526, iteration: 220689
loss: 1.0111299753189087,grad_norm: 0.8881601282943623, iteration: 220690
loss: 1.0559107065200806,grad_norm: 0.9311156991044442, iteration: 220691
loss: 1.0656518936157227,grad_norm: 0.9999993342515089, iteration: 220692
loss: 0.9943587183952332,grad_norm: 0.8335518702994585, iteration: 220693
loss: 1.0135667324066162,grad_norm: 0.970641531366512, iteration: 220694
loss: 0.989948034286499,grad_norm: 0.99999913932129, iteration: 220695
loss: 1.0433497428894043,grad_norm: 0.9999993059897637, iteration: 220696
loss: 1.0378952026367188,grad_norm: 0.8832679839102326, iteration: 220697
loss: 1.0163671970367432,grad_norm: 0.9999991937601872, iteration: 220698
loss: 0.9548338055610657,grad_norm: 0.8654930096408832, iteration: 220699
loss: 0.9765738248825073,grad_norm: 0.9999991649529275, iteration: 220700
loss: 0.9908173084259033,grad_norm: 0.9879652097231154, iteration: 220701
loss: 1.052618145942688,grad_norm: 0.940300867193008, iteration: 220702
loss: 0.9957544207572937,grad_norm: 0.999999108188442, iteration: 220703
loss: 0.9860734343528748,grad_norm: 0.8491385359947563, iteration: 220704
loss: 1.0033001899719238,grad_norm: 0.9706115940951998, iteration: 220705
loss: 1.000991702079773,grad_norm: 0.998235482384782, iteration: 220706
loss: 0.9959409832954407,grad_norm: 0.8631779726089527, iteration: 220707
loss: 0.9898257851600647,grad_norm: 0.8739271608194703, iteration: 220708
loss: 1.1148054599761963,grad_norm: 0.9999994796702404, iteration: 220709
loss: 1.051679253578186,grad_norm: 0.7293566179243249, iteration: 220710
loss: 0.9856189489364624,grad_norm: 0.990338244007495, iteration: 220711
loss: 1.02944016456604,grad_norm: 0.9003223234367475, iteration: 220712
loss: 1.3667306900024414,grad_norm: 0.9999998049987118, iteration: 220713
loss: 0.985869824886322,grad_norm: 0.7985978953013941, iteration: 220714
loss: 1.283476710319519,grad_norm: 0.9999994461235332, iteration: 220715
loss: 0.9897836446762085,grad_norm: 0.9999989931428318, iteration: 220716
loss: 0.9892264008522034,grad_norm: 0.8366313016695267, iteration: 220717
loss: 1.0010026693344116,grad_norm: 0.8444187192230409, iteration: 220718
loss: 0.990444540977478,grad_norm: 0.8871070959391463, iteration: 220719
loss: 1.0180586576461792,grad_norm: 0.9999991821741293, iteration: 220720
loss: 1.0108919143676758,grad_norm: 0.9999991135683669, iteration: 220721
loss: 1.0203287601470947,grad_norm: 0.9999991881629351, iteration: 220722
loss: 1.0344512462615967,grad_norm: 0.9370325945451724, iteration: 220723
loss: 0.9989545941352844,grad_norm: 0.9999992006404207, iteration: 220724
loss: 0.9814391136169434,grad_norm: 0.8639155932251947, iteration: 220725
loss: 1.0174375772476196,grad_norm: 0.9999990751948569, iteration: 220726
loss: 1.0048861503601074,grad_norm: 0.747975760582725, iteration: 220727
loss: 0.9741361737251282,grad_norm: 0.7594739565749105, iteration: 220728
loss: 0.9665151834487915,grad_norm: 0.8469693467436551, iteration: 220729
loss: 0.9956387877464294,grad_norm: 0.7921875485026811, iteration: 220730
loss: 1.0439784526824951,grad_norm: 0.9598311399163021, iteration: 220731
loss: 1.1750718355178833,grad_norm: 0.9999998397700259, iteration: 220732
loss: 0.9699653387069702,grad_norm: 0.9999992370410206, iteration: 220733
loss: 1.0327192544937134,grad_norm: 0.8537303905595948, iteration: 220734
loss: 0.9925631880760193,grad_norm: 0.8039467564621358, iteration: 220735
loss: 0.9353002905845642,grad_norm: 0.9790872406452699, iteration: 220736
loss: 0.9909018278121948,grad_norm: 0.9743093493341564, iteration: 220737
loss: 0.9793839454650879,grad_norm: 0.9999991705258443, iteration: 220738
loss: 1.0827921628952026,grad_norm: 0.9999999071302872, iteration: 220739
loss: 1.0869947671890259,grad_norm: 0.9999994669125921, iteration: 220740
loss: 1.0250271558761597,grad_norm: 0.999999017759327, iteration: 220741
loss: 0.9753064513206482,grad_norm: 0.9238615814394306, iteration: 220742
loss: 0.9948694109916687,grad_norm: 0.9189902184721588, iteration: 220743
loss: 0.9974141716957092,grad_norm: 0.9999990966100962, iteration: 220744
loss: 0.9955262541770935,grad_norm: 0.9999991037806343, iteration: 220745
loss: 1.4151822328567505,grad_norm: 0.9999993522395766, iteration: 220746
loss: 0.9827921390533447,grad_norm: 0.8033700214474249, iteration: 220747
loss: 1.0046985149383545,grad_norm: 0.9719527270033239, iteration: 220748
loss: 1.0295220613479614,grad_norm: 0.9999998648089552, iteration: 220749
loss: 1.0276446342468262,grad_norm: 0.9999991078591065, iteration: 220750
loss: 0.9915252327919006,grad_norm: 0.7804293295457773, iteration: 220751
loss: 1.0145924091339111,grad_norm: 0.8573007152452505, iteration: 220752
loss: 0.9888114333152771,grad_norm: 0.8366410931500211, iteration: 220753
loss: 1.1356793642044067,grad_norm: 0.999999735831792, iteration: 220754
loss: 1.017594814300537,grad_norm: 0.9057280742466335, iteration: 220755
loss: 1.0186606645584106,grad_norm: 0.9271148731842305, iteration: 220756
loss: 1.0052400827407837,grad_norm: 0.9999992318658611, iteration: 220757
loss: 1.4086705446243286,grad_norm: 0.9999994069445677, iteration: 220758
loss: 0.9748041033744812,grad_norm: 0.8382878209294304, iteration: 220759
loss: 1.03793466091156,grad_norm: 0.9999991603606314, iteration: 220760
loss: 1.0111722946166992,grad_norm: 0.9999991332327028, iteration: 220761
loss: 0.9958512783050537,grad_norm: 0.9999990601644505, iteration: 220762
loss: 1.0164164304733276,grad_norm: 0.9099720595716506, iteration: 220763
loss: 1.023956060409546,grad_norm: 0.8479116796008255, iteration: 220764
loss: 0.999051034450531,grad_norm: 0.999999021105432, iteration: 220765
loss: 0.9829368591308594,grad_norm: 0.946403086020483, iteration: 220766
loss: 1.08488929271698,grad_norm: 0.9999991166723237, iteration: 220767
loss: 1.0375354290008545,grad_norm: 0.9999992981159451, iteration: 220768
loss: 1.191023588180542,grad_norm: 0.9999994985036438, iteration: 220769
loss: 1.0284481048583984,grad_norm: 0.999999146281623, iteration: 220770
loss: 1.0062311887741089,grad_norm: 0.9808046802363353, iteration: 220771
loss: 1.3066612482070923,grad_norm: 0.9999995490306418, iteration: 220772
loss: 1.0788085460662842,grad_norm: 0.9999993708233753, iteration: 220773
loss: 1.1045058965682983,grad_norm: 0.9999991397479032, iteration: 220774
loss: 1.0031883716583252,grad_norm: 0.9951699228569827, iteration: 220775
loss: 1.0072429180145264,grad_norm: 0.9198277342021061, iteration: 220776
loss: 0.9872257113456726,grad_norm: 0.9453916801433859, iteration: 220777
loss: 1.0170782804489136,grad_norm: 0.8135498727478893, iteration: 220778
loss: 1.027964472770691,grad_norm: 0.9999992583343281, iteration: 220779
loss: 0.9778922200202942,grad_norm: 0.9999989921872217, iteration: 220780
loss: 1.0130895376205444,grad_norm: 0.9643442118312915, iteration: 220781
loss: 1.0085831880569458,grad_norm: 0.9574490114726226, iteration: 220782
loss: 1.0335818529129028,grad_norm: 0.999999443199985, iteration: 220783
loss: 1.112607717514038,grad_norm: 0.9999993818542455, iteration: 220784
loss: 0.9561729431152344,grad_norm: 0.9413872205014664, iteration: 220785
loss: 1.059183955192566,grad_norm: 0.9772759266642279, iteration: 220786
loss: 0.9945545196533203,grad_norm: 0.8219350606485898, iteration: 220787
loss: 0.9918624758720398,grad_norm: 0.8758687183006164, iteration: 220788
loss: 1.0159038305282593,grad_norm: 0.9155582998162801, iteration: 220789
loss: 1.0411659479141235,grad_norm: 0.9560810782208143, iteration: 220790
loss: 1.0332751274108887,grad_norm: 0.7812073297886662, iteration: 220791
loss: 1.024032711982727,grad_norm: 0.9171645763244111, iteration: 220792
loss: 0.9749886989593506,grad_norm: 0.9083171383434153, iteration: 220793
loss: 1.0217461585998535,grad_norm: 0.9999991362266606, iteration: 220794
loss: 0.9914201498031616,grad_norm: 0.8981265592816806, iteration: 220795
loss: 1.0262720584869385,grad_norm: 0.9999995372493793, iteration: 220796
loss: 1.0031198263168335,grad_norm: 0.9999992937528339, iteration: 220797
loss: 1.0059367418289185,grad_norm: 0.8914931666299649, iteration: 220798
loss: 0.9996622204780579,grad_norm: 0.7952764724031665, iteration: 220799
loss: 1.0797626972198486,grad_norm: 0.8816637541782352, iteration: 220800
loss: 0.9864586591720581,grad_norm: 0.9751612576737501, iteration: 220801
loss: 0.9723283052444458,grad_norm: 0.999999090797207, iteration: 220802
loss: 0.9972203373908997,grad_norm: 0.7766256870030688, iteration: 220803
loss: 0.9986323118209839,grad_norm: 0.9999992394656954, iteration: 220804
loss: 1.0105973482131958,grad_norm: 0.9715248510953742, iteration: 220805
loss: 0.9629340767860413,grad_norm: 0.8927611551972667, iteration: 220806
loss: 1.0481489896774292,grad_norm: 0.9999990542465591, iteration: 220807
loss: 0.994317352771759,grad_norm: 0.9999990805889724, iteration: 220808
loss: 1.0339571237564087,grad_norm: 0.9039907713844781, iteration: 220809
loss: 1.0193158388137817,grad_norm: 0.9999991828147884, iteration: 220810
loss: 0.9817389845848083,grad_norm: 0.8886813332085589, iteration: 220811
loss: 1.325309157371521,grad_norm: 0.9999999329438304, iteration: 220812
loss: 0.9787804484367371,grad_norm: 0.9999991337179454, iteration: 220813
loss: 1.004629373550415,grad_norm: 0.9669984957914615, iteration: 220814
loss: 0.9822909235954285,grad_norm: 0.9130059044624036, iteration: 220815
loss: 1.1056708097457886,grad_norm: 0.9999994721304242, iteration: 220816
loss: 1.0523241758346558,grad_norm: 0.99999898901878, iteration: 220817
loss: 1.0169634819030762,grad_norm: 0.9999989665304334, iteration: 220818
loss: 1.2260191440582275,grad_norm: 0.9999993528820853, iteration: 220819
loss: 1.007588505744934,grad_norm: 0.999999004133633, iteration: 220820
loss: 1.021767497062683,grad_norm: 0.9483039167089907, iteration: 220821
loss: 1.2959853410720825,grad_norm: 1.000000020726951, iteration: 220822
loss: 1.0214117765426636,grad_norm: 0.9999991201716393, iteration: 220823
loss: 1.0069149732589722,grad_norm: 0.9999993632438631, iteration: 220824
loss: 0.9829851984977722,grad_norm: 0.8969211149874281, iteration: 220825
loss: 0.9878249168395996,grad_norm: 0.8620192645738278, iteration: 220826
loss: 1.1399892568588257,grad_norm: 0.9999997219885997, iteration: 220827
loss: 1.0215215682983398,grad_norm: 0.9999997711441359, iteration: 220828
loss: 1.0025570392608643,grad_norm: 0.9999992968960713, iteration: 220829
loss: 0.9776997566223145,grad_norm: 0.9266518075683302, iteration: 220830
loss: 0.97205650806427,grad_norm: 0.9999991242279088, iteration: 220831
loss: 0.9713334441184998,grad_norm: 0.8793943172839515, iteration: 220832
loss: 1.2242037057876587,grad_norm: 0.9999992019553023, iteration: 220833
loss: 1.0173169374465942,grad_norm: 0.9999991280121078, iteration: 220834
loss: 1.0125439167022705,grad_norm: 0.9475787037408334, iteration: 220835
loss: 0.9780341386795044,grad_norm: 0.999999350662208, iteration: 220836
loss: 0.9895948171615601,grad_norm: 0.9736220082832135, iteration: 220837
loss: 1.0068703889846802,grad_norm: 0.8697262045941135, iteration: 220838
loss: 1.0185372829437256,grad_norm: 0.9999993870722856, iteration: 220839
loss: 0.978430449962616,grad_norm: 0.9756026613317432, iteration: 220840
loss: 1.0121729373931885,grad_norm: 0.7537606342943375, iteration: 220841
loss: 1.0051994323730469,grad_norm: 0.9999990959617339, iteration: 220842
loss: 1.0209996700286865,grad_norm: 0.9999992573225384, iteration: 220843
loss: 0.9953128695487976,grad_norm: 0.9626569295296235, iteration: 220844
loss: 0.986356258392334,grad_norm: 0.8779890931083604, iteration: 220845
loss: 0.992983877658844,grad_norm: 0.9681275040973039, iteration: 220846
loss: 1.00741446018219,grad_norm: 0.9999991239299945, iteration: 220847
loss: 0.9982374906539917,grad_norm: 0.9999992489681759, iteration: 220848
loss: 1.0413718223571777,grad_norm: 0.9999998366394429, iteration: 220849
loss: 0.9866341948509216,grad_norm: 0.894068536842323, iteration: 220850
loss: 1.0304131507873535,grad_norm: 0.9213036201021206, iteration: 220851
loss: 1.0453287363052368,grad_norm: 0.9999999214892262, iteration: 220852
loss: 1.0160008668899536,grad_norm: 0.9185960217657505, iteration: 220853
loss: 1.02446448802948,grad_norm: 0.8637236547201778, iteration: 220854
loss: 0.987948477268219,grad_norm: 0.986720960232839, iteration: 220855
loss: 1.0023173093795776,grad_norm: 0.9999990288534063, iteration: 220856
loss: 1.0208097696304321,grad_norm: 0.9490364071138264, iteration: 220857
loss: 1.0062085390090942,grad_norm: 0.9970348246628992, iteration: 220858
loss: 1.0087705850601196,grad_norm: 0.9999991532122653, iteration: 220859
loss: 1.0231947898864746,grad_norm: 0.9999993573343435, iteration: 220860
loss: 0.9857248663902283,grad_norm: 0.9999988777213569, iteration: 220861
loss: 0.9493112564086914,grad_norm: 0.9999990458432195, iteration: 220862
loss: 1.058858036994934,grad_norm: 0.9999992326304511, iteration: 220863
loss: 1.0452189445495605,grad_norm: 0.999999091455737, iteration: 220864
loss: 1.0120261907577515,grad_norm: 0.7853095693418177, iteration: 220865
loss: 1.0275003910064697,grad_norm: 0.9597229988972548, iteration: 220866
loss: 0.9786689281463623,grad_norm: 0.9573664750820917, iteration: 220867
loss: 1.0218865871429443,grad_norm: 0.9999991860457003, iteration: 220868
loss: 1.009634017944336,grad_norm: 0.7649722442401247, iteration: 220869
loss: 1.1834322214126587,grad_norm: 0.9999994636276164, iteration: 220870
loss: 1.0877492427825928,grad_norm: 0.9999997443501903, iteration: 220871
loss: 0.9883688688278198,grad_norm: 0.907508626053594, iteration: 220872
loss: 0.9834955334663391,grad_norm: 0.901760661902271, iteration: 220873
loss: 0.9807839393615723,grad_norm: 0.9621438088227516, iteration: 220874
loss: 1.1329556703567505,grad_norm: 0.9999995238842349, iteration: 220875
loss: 1.005815029144287,grad_norm: 0.9572827195780154, iteration: 220876
loss: 1.062326431274414,grad_norm: 0.9999994895856843, iteration: 220877
loss: 1.0497545003890991,grad_norm: 0.9999992478778237, iteration: 220878
loss: 1.0229239463806152,grad_norm: 0.9656960600064453, iteration: 220879
loss: 1.0258865356445312,grad_norm: 0.9999992216443623, iteration: 220880
loss: 1.0060197114944458,grad_norm: 0.9999992783639615, iteration: 220881
loss: 0.9801071286201477,grad_norm: 0.9999992483503534, iteration: 220882
loss: 1.0363398790359497,grad_norm: 0.9823455250401111, iteration: 220883
loss: 1.0370875597000122,grad_norm: 0.9647427167294216, iteration: 220884
loss: 1.0056159496307373,grad_norm: 0.9999997142091224, iteration: 220885
loss: 1.0071102380752563,grad_norm: 0.9038000819675454, iteration: 220886
loss: 1.0273900032043457,grad_norm: 0.9483157792698754, iteration: 220887
loss: 0.9906610250473022,grad_norm: 0.9999991405560148, iteration: 220888
loss: 0.9674811959266663,grad_norm: 0.7934221958671303, iteration: 220889
loss: 1.0267242193222046,grad_norm: 0.9799233204360174, iteration: 220890
loss: 1.0206202268600464,grad_norm: 0.9999990912998515, iteration: 220891
loss: 1.2257224321365356,grad_norm: 0.9999995261577612, iteration: 220892
loss: 1.0581918954849243,grad_norm: 0.9999993465630339, iteration: 220893
loss: 1.0691746473312378,grad_norm: 0.999999184159292, iteration: 220894
loss: 1.0321766138076782,grad_norm: 0.9999994271522439, iteration: 220895
loss: 1.0206031799316406,grad_norm: 0.8853093071896548, iteration: 220896
loss: 1.0359668731689453,grad_norm: 0.9723255552866304, iteration: 220897
loss: 1.0198242664337158,grad_norm: 0.9999994294791056, iteration: 220898
loss: 1.0049210786819458,grad_norm: 0.9359579425544663, iteration: 220899
loss: 1.004577875137329,grad_norm: 0.9086495653905202, iteration: 220900
loss: 0.977063775062561,grad_norm: 0.937785081305153, iteration: 220901
loss: 1.0159629583358765,grad_norm: 0.813198167619675, iteration: 220902
loss: 1.030048131942749,grad_norm: 0.999999585626324, iteration: 220903
loss: 1.0230458974838257,grad_norm: 0.9999996454784231, iteration: 220904
loss: 1.0416501760482788,grad_norm: 0.8806947589562879, iteration: 220905
loss: 1.010118842124939,grad_norm: 0.9999990002276764, iteration: 220906
loss: 0.9983596205711365,grad_norm: 0.8501184180865642, iteration: 220907
loss: 1.0111432075500488,grad_norm: 0.9414232328926726, iteration: 220908
loss: 1.0422581434249878,grad_norm: 0.9999997698469505, iteration: 220909
loss: 1.0115575790405273,grad_norm: 0.9999990801790759, iteration: 220910
loss: 1.0054709911346436,grad_norm: 0.9999991355574473, iteration: 220911
loss: 0.9884897470474243,grad_norm: 0.9641249365000556, iteration: 220912
loss: 1.039131999015808,grad_norm: 0.9999992072905896, iteration: 220913
loss: 1.0388792753219604,grad_norm: 0.9999991673530212, iteration: 220914
loss: 0.9785742163658142,grad_norm: 0.9479367461923792, iteration: 220915
loss: 1.0354442596435547,grad_norm: 0.827601303805545, iteration: 220916
loss: 0.9693020582199097,grad_norm: 0.836947948473611, iteration: 220917
loss: 1.008619785308838,grad_norm: 0.9999990373919243, iteration: 220918
loss: 0.9841552376747131,grad_norm: 0.9999991656133116, iteration: 220919
loss: 0.9956830143928528,grad_norm: 0.9284627320606836, iteration: 220920
loss: 0.9978600740432739,grad_norm: 0.9999990749784909, iteration: 220921
loss: 0.9968767762184143,grad_norm: 0.9999995718119494, iteration: 220922
loss: 0.997284471988678,grad_norm: 0.9332055073882843, iteration: 220923
loss: 0.9474709033966064,grad_norm: 0.9856648642402871, iteration: 220924
loss: 1.0090878009796143,grad_norm: 0.9370122647722102, iteration: 220925
loss: 1.0576999187469482,grad_norm: 0.9999991572757994, iteration: 220926
loss: 1.0078076124191284,grad_norm: 0.9999992303282673, iteration: 220927
loss: 1.0719588994979858,grad_norm: 0.9999997079688792, iteration: 220928
loss: 1.0953940153121948,grad_norm: 0.9999997364461369, iteration: 220929
loss: 0.9552087187767029,grad_norm: 0.8763580618736672, iteration: 220930
loss: 1.0892713069915771,grad_norm: 0.9917364240276931, iteration: 220931
loss: 0.9751964211463928,grad_norm: 0.9193954279897961, iteration: 220932
loss: 1.0081149339675903,grad_norm: 0.867186790763174, iteration: 220933
loss: 0.9807305932044983,grad_norm: 0.8315791015456163, iteration: 220934
loss: 0.9859335422515869,grad_norm: 0.8583745844502186, iteration: 220935
loss: 0.9943169951438904,grad_norm: 0.8870056822845858, iteration: 220936
loss: 1.027127981185913,grad_norm: 0.9595751829423119, iteration: 220937
loss: 1.0193798542022705,grad_norm: 0.9197773005609867, iteration: 220938
loss: 1.0310232639312744,grad_norm: 0.9746942484148385, iteration: 220939
loss: 0.9834159016609192,grad_norm: 0.8595293344760221, iteration: 220940
loss: 1.009621262550354,grad_norm: 0.9999991189437014, iteration: 220941
loss: 1.0564074516296387,grad_norm: 0.9999994468782379, iteration: 220942
loss: 0.9707761406898499,grad_norm: 0.8152426907646263, iteration: 220943
loss: 1.0105977058410645,grad_norm: 0.9665399129275989, iteration: 220944
loss: 0.9693982601165771,grad_norm: 0.8992392263379297, iteration: 220945
loss: 0.9740159511566162,grad_norm: 0.7989616532459157, iteration: 220946
loss: 0.9490048289299011,grad_norm: 0.9999992873167157, iteration: 220947
loss: 1.0261112451553345,grad_norm: 0.9746094852313777, iteration: 220948
loss: 1.0055088996887207,grad_norm: 0.8994060742242397, iteration: 220949
loss: 0.9890229105949402,grad_norm: 0.8712083522741464, iteration: 220950
loss: 1.0327413082122803,grad_norm: 0.999999028509846, iteration: 220951
loss: 0.9981814622879028,grad_norm: 0.8568410885391059, iteration: 220952
loss: 1.0176414251327515,grad_norm: 0.8053087938192678, iteration: 220953
loss: 0.9617911577224731,grad_norm: 0.9162886328903278, iteration: 220954
loss: 1.0336664915084839,grad_norm: 0.9999992566759347, iteration: 220955
loss: 0.9617239236831665,grad_norm: 0.9340367497450867, iteration: 220956
loss: 1.0216577053070068,grad_norm: 0.8855643762200793, iteration: 220957
loss: 0.9668158888816833,grad_norm: 0.999999423764577, iteration: 220958
loss: 0.9765254855155945,grad_norm: 0.9790640639845306, iteration: 220959
loss: 1.0267494916915894,grad_norm: 0.936644900322802, iteration: 220960
loss: 0.9619939923286438,grad_norm: 0.8714288991509244, iteration: 220961
loss: 1.0754421949386597,grad_norm: 0.9999990975398373, iteration: 220962
loss: 0.9705772995948792,grad_norm: 0.8468595042168418, iteration: 220963
loss: 1.0033074617385864,grad_norm: 0.9999991613610433, iteration: 220964
loss: 1.0399539470672607,grad_norm: 0.9089636335776007, iteration: 220965
loss: 1.0125755071640015,grad_norm: 0.999998954577127, iteration: 220966
loss: 0.9930071830749512,grad_norm: 0.9999992476697163, iteration: 220967
loss: 0.9664976596832275,grad_norm: 0.8959168179202394, iteration: 220968
loss: 1.0039433240890503,grad_norm: 0.9730097586985863, iteration: 220969
loss: 1.008439064025879,grad_norm: 0.9262483032995016, iteration: 220970
loss: 1.0251493453979492,grad_norm: 0.9999991575077516, iteration: 220971
loss: 1.0654149055480957,grad_norm: 0.9999991133504199, iteration: 220972
loss: 1.0242780447006226,grad_norm: 0.9999996685614999, iteration: 220973
loss: 0.9760022163391113,grad_norm: 0.9501868166386441, iteration: 220974
loss: 1.0959806442260742,grad_norm: 0.9101669349505936, iteration: 220975
loss: 0.9775218963623047,grad_norm: 0.9999990234912692, iteration: 220976
loss: 1.0023832321166992,grad_norm: 0.9145039685118398, iteration: 220977
loss: 1.0443681478500366,grad_norm: 0.9999989899462534, iteration: 220978
loss: 0.9995561838150024,grad_norm: 0.7458114944153917, iteration: 220979
loss: 1.0147372484207153,grad_norm: 0.8075731648539588, iteration: 220980
loss: 0.982482373714447,grad_norm: 0.8464489314375581, iteration: 220981
loss: 1.0099157094955444,grad_norm: 0.9999990725718078, iteration: 220982
loss: 1.0666389465332031,grad_norm: 0.9999992122054093, iteration: 220983
loss: 1.0059468746185303,grad_norm: 0.9932438247684712, iteration: 220984
loss: 1.0383392572402954,grad_norm: 0.999999674366931, iteration: 220985
loss: 1.0045433044433594,grad_norm: 0.9999993723089233, iteration: 220986
loss: 0.9812875986099243,grad_norm: 0.9599179975989924, iteration: 220987
loss: 0.9863260388374329,grad_norm: 0.9721430089160139, iteration: 220988
loss: 0.9996591210365295,grad_norm: 0.9580802605750052, iteration: 220989
loss: 1.0260319709777832,grad_norm: 0.999999061630565, iteration: 220990
loss: 1.0055652856826782,grad_norm: 0.9313540157992531, iteration: 220991
loss: 0.9639226794242859,grad_norm: 0.9999989699007493, iteration: 220992
loss: 1.0018516778945923,grad_norm: 0.9999990279932619, iteration: 220993
loss: 1.008859395980835,grad_norm: 0.9999992345478362, iteration: 220994
loss: 0.9857633709907532,grad_norm: 0.999998930916819, iteration: 220995
loss: 1.0512332916259766,grad_norm: 0.9999991040312151, iteration: 220996
loss: 1.0053433179855347,grad_norm: 0.8628374099912167, iteration: 220997
loss: 0.9528322219848633,grad_norm: 0.9999990944704837, iteration: 220998
loss: 0.9856576323509216,grad_norm: 0.9999990756025817, iteration: 220999
loss: 1.0308752059936523,grad_norm: 0.9917764530905475, iteration: 221000
loss: 0.9509397149085999,grad_norm: 0.9005983747892983, iteration: 221001
loss: 0.999930202960968,grad_norm: 0.9591885486115643, iteration: 221002
loss: 1.063084602355957,grad_norm: 0.9937312216663393, iteration: 221003
loss: 0.9916189908981323,grad_norm: 0.9999991314238604, iteration: 221004
loss: 1.0166447162628174,grad_norm: 0.9999991988457019, iteration: 221005
loss: 0.9732493162155151,grad_norm: 0.8658333780205073, iteration: 221006
loss: 1.0019333362579346,grad_norm: 0.8588786940748747, iteration: 221007
loss: 0.9731625318527222,grad_norm: 0.9999996749850392, iteration: 221008
loss: 0.98443204164505,grad_norm: 0.931906079168938, iteration: 221009
loss: 1.041153907775879,grad_norm: 0.999999821896136, iteration: 221010
loss: 0.9923982620239258,grad_norm: 0.9999997210119139, iteration: 221011
loss: 1.004042625427246,grad_norm: 0.9419812458541033, iteration: 221012
loss: 1.0105069875717163,grad_norm: 0.8841234023290302, iteration: 221013
loss: 0.9909271001815796,grad_norm: 0.9999991636377972, iteration: 221014
loss: 0.9942938089370728,grad_norm: 0.8610848681403298, iteration: 221015
loss: 1.0135616064071655,grad_norm: 0.8586549926967121, iteration: 221016
loss: 0.9915385246276855,grad_norm: 0.7602121372843235, iteration: 221017
loss: 1.000168800354004,grad_norm: 0.8604218052756717, iteration: 221018
loss: 0.9860868453979492,grad_norm: 0.9150960533286755, iteration: 221019
loss: 1.0138555765151978,grad_norm: 0.8454326346299741, iteration: 221020
loss: 1.0805929899215698,grad_norm: 0.7644452907700001, iteration: 221021
loss: 0.9830780029296875,grad_norm: 0.9483546860254651, iteration: 221022
loss: 0.9737815856933594,grad_norm: 0.8645973416480789, iteration: 221023
loss: 1.0195670127868652,grad_norm: 0.9999993809804251, iteration: 221024
loss: 1.0698552131652832,grad_norm: 0.9999995986287536, iteration: 221025
loss: 0.9774248600006104,grad_norm: 0.9999990643796408, iteration: 221026
loss: 0.9757676720619202,grad_norm: 0.9999999280345717, iteration: 221027
loss: 0.9899740815162659,grad_norm: 0.9999991802816985, iteration: 221028
loss: 1.0275013446807861,grad_norm: 0.999999082495828, iteration: 221029
loss: 1.0221362113952637,grad_norm: 0.8504665779013569, iteration: 221030
loss: 0.9840222597122192,grad_norm: 0.9391563089990839, iteration: 221031
loss: 1.0035566091537476,grad_norm: 0.9999992849735923, iteration: 221032
loss: 0.9787790775299072,grad_norm: 0.856268974691216, iteration: 221033
loss: 1.0117851495742798,grad_norm: 0.9999991181271869, iteration: 221034
loss: 1.0056945085525513,grad_norm: 0.9950974167541602, iteration: 221035
loss: 0.9716936349868774,grad_norm: 0.8594361247791824, iteration: 221036
loss: 0.9860734343528748,grad_norm: 0.9694097394961635, iteration: 221037
loss: 1.029497504234314,grad_norm: 0.9999995313424824, iteration: 221038
loss: 0.996629536151886,grad_norm: 0.8491804862169117, iteration: 221039
loss: 1.0052402019500732,grad_norm: 0.9319197414260928, iteration: 221040
loss: 1.0022350549697876,grad_norm: 0.9290642118722505, iteration: 221041
loss: 1.0483852624893188,grad_norm: 0.999999126919292, iteration: 221042
loss: 0.9795505404472351,grad_norm: 0.9836216031203922, iteration: 221043
loss: 1.0146516561508179,grad_norm: 0.9078761169342187, iteration: 221044
loss: 1.010084629058838,grad_norm: 0.8919626123473604, iteration: 221045
loss: 1.013249397277832,grad_norm: 0.8407540143902983, iteration: 221046
loss: 0.9700281023979187,grad_norm: 0.885510382854857, iteration: 221047
loss: 1.0089638233184814,grad_norm: 0.9999991757700268, iteration: 221048
loss: 1.0007026195526123,grad_norm: 0.945367015151822, iteration: 221049
loss: 0.9946275949478149,grad_norm: 0.9315123615046873, iteration: 221050
loss: 1.0016247034072876,grad_norm: 0.9017763614200583, iteration: 221051
loss: 1.023087501525879,grad_norm: 0.9515489623008879, iteration: 221052
loss: 0.9600478410720825,grad_norm: 0.859220136393631, iteration: 221053
loss: 1.0169949531555176,grad_norm: 0.9999992072231706, iteration: 221054
loss: 1.0154529809951782,grad_norm: 0.9558051918280792, iteration: 221055
loss: 0.9665029048919678,grad_norm: 0.9702511688519874, iteration: 221056
loss: 0.9866071343421936,grad_norm: 0.9999991419297974, iteration: 221057
loss: 1.0088565349578857,grad_norm: 0.9999992982239382, iteration: 221058
loss: 1.0166906118392944,grad_norm: 0.9999991553899414, iteration: 221059
loss: 1.0247212648391724,grad_norm: 0.9704223874920396, iteration: 221060
loss: 1.0052274465560913,grad_norm: 0.9999995612499266, iteration: 221061
loss: 1.0196653604507446,grad_norm: 0.898798053129822, iteration: 221062
loss: 0.987938642501831,grad_norm: 0.9999992394751432, iteration: 221063
loss: 1.0097625255584717,grad_norm: 0.9765018242087328, iteration: 221064
loss: 1.0126172304153442,grad_norm: 0.9399436995608471, iteration: 221065
loss: 0.9982274174690247,grad_norm: 0.9999989965465081, iteration: 221066
loss: 0.9855659008026123,grad_norm: 0.8535069108688588, iteration: 221067
loss: 1.0231667757034302,grad_norm: 0.7233587231519111, iteration: 221068
loss: 1.0386654138565063,grad_norm: 0.9356636755804494, iteration: 221069
loss: 1.0029568672180176,grad_norm: 0.975823800582877, iteration: 221070
loss: 0.9992531538009644,grad_norm: 0.9999993591913294, iteration: 221071
loss: 1.0224846601486206,grad_norm: 0.7839059116542891, iteration: 221072
loss: 1.0661356449127197,grad_norm: 0.9216184847315803, iteration: 221073
loss: 1.018280029296875,grad_norm: 0.7915263920229766, iteration: 221074
loss: 0.9702997803688049,grad_norm: 0.8783343972154252, iteration: 221075
loss: 1.0250135660171509,grad_norm: 0.8806164057697816, iteration: 221076
loss: 1.0003941059112549,grad_norm: 0.9627246112799914, iteration: 221077
loss: 0.9681330919265747,grad_norm: 0.966782691334656, iteration: 221078
loss: 1.0367225408554077,grad_norm: 0.9757681708200828, iteration: 221079
loss: 0.9753904938697815,grad_norm: 0.9999990976851385, iteration: 221080
loss: 1.0644705295562744,grad_norm: 0.9999991491550783, iteration: 221081
loss: 1.0089679956436157,grad_norm: 0.9999992663705718, iteration: 221082
loss: 1.0252825021743774,grad_norm: 0.8365834793489575, iteration: 221083
loss: 1.0296151638031006,grad_norm: 0.9447218243316163, iteration: 221084
loss: 1.0187404155731201,grad_norm: 0.9215176342097205, iteration: 221085
loss: 1.0224980115890503,grad_norm: 0.9999998801590364, iteration: 221086
loss: 0.9927555322647095,grad_norm: 0.999999611265146, iteration: 221087
loss: 1.0335394144058228,grad_norm: 0.9836719942677158, iteration: 221088
loss: 1.0039188861846924,grad_norm: 0.9826038993494542, iteration: 221089
loss: 1.0158430337905884,grad_norm: 0.9509922001370759, iteration: 221090
loss: 1.0123916864395142,grad_norm: 0.8369810909447857, iteration: 221091
loss: 1.0134280920028687,grad_norm: 0.9999993047113133, iteration: 221092
loss: 0.9909878373146057,grad_norm: 0.9276583067776202, iteration: 221093
loss: 1.0081921815872192,grad_norm: 0.9039022545408335, iteration: 221094
loss: 1.0025559663772583,grad_norm: 0.9999992455854277, iteration: 221095
loss: 0.9992173910140991,grad_norm: 0.9999992625051223, iteration: 221096
loss: 0.9924174547195435,grad_norm: 0.9025996706373524, iteration: 221097
loss: 1.000688076019287,grad_norm: 0.9999989887372396, iteration: 221098
loss: 0.9816958904266357,grad_norm: 0.8929591131532779, iteration: 221099
loss: 1.026649832725525,grad_norm: 0.9999996574772465, iteration: 221100
loss: 1.0457556247711182,grad_norm: 0.999999643153569, iteration: 221101
loss: 1.0153628587722778,grad_norm: 0.8534621473008007, iteration: 221102
loss: 0.9598676562309265,grad_norm: 0.9242371276406296, iteration: 221103
loss: 0.9608566761016846,grad_norm: 0.999999196177966, iteration: 221104
loss: 1.0374979972839355,grad_norm: 0.7628356770882571, iteration: 221105
loss: 1.0357173681259155,grad_norm: 0.9999991143237179, iteration: 221106
loss: 0.9870671629905701,grad_norm: 0.9906515003213443, iteration: 221107
loss: 0.9711305499076843,grad_norm: 0.7855987887752163, iteration: 221108
loss: 0.9980435371398926,grad_norm: 0.9429388168703432, iteration: 221109
loss: 1.0255794525146484,grad_norm: 0.9999991824738099, iteration: 221110
loss: 1.0413248538970947,grad_norm: 0.9431462486872128, iteration: 221111
loss: 1.0413422584533691,grad_norm: 0.9999992485592084, iteration: 221112
loss: 1.0004417896270752,grad_norm: 0.8653353468593968, iteration: 221113
loss: 0.9846653938293457,grad_norm: 0.884063929919481, iteration: 221114
loss: 1.018635630607605,grad_norm: 0.8441320673744144, iteration: 221115
loss: 0.9871827960014343,grad_norm: 0.9315744553133763, iteration: 221116
loss: 1.0071922540664673,grad_norm: 0.9999990828255609, iteration: 221117
loss: 1.0180529356002808,grad_norm: 0.9999991031912815, iteration: 221118
loss: 1.020087718963623,grad_norm: 0.9715314806140528, iteration: 221119
loss: 1.0310922861099243,grad_norm: 0.9999996855155132, iteration: 221120
loss: 1.0139148235321045,grad_norm: 0.9999989624381339, iteration: 221121
loss: 1.0031481981277466,grad_norm: 0.7544596789024423, iteration: 221122
loss: 0.9871360659599304,grad_norm: 0.8992515321656742, iteration: 221123
loss: 0.9806095361709595,grad_norm: 0.8117304464317524, iteration: 221124
loss: 0.9956591129302979,grad_norm: 0.9811825581999506, iteration: 221125
loss: 0.9899441599845886,grad_norm: 0.9414206057762042, iteration: 221126
loss: 1.0140503644943237,grad_norm: 0.9605201892743832, iteration: 221127
loss: 1.0009304285049438,grad_norm: 0.9364693737696104, iteration: 221128
loss: 1.0164726972579956,grad_norm: 0.9999991357574188, iteration: 221129
loss: 0.9944839477539062,grad_norm: 0.9038275372595926, iteration: 221130
loss: 0.9924444556236267,grad_norm: 0.7963453662715527, iteration: 221131
loss: 1.0248147249221802,grad_norm: 0.9999992322288931, iteration: 221132
loss: 0.9699912071228027,grad_norm: 0.7478683172181398, iteration: 221133
loss: 1.005934476852417,grad_norm: 0.8919200767986859, iteration: 221134
loss: 0.9849659204483032,grad_norm: 0.9407461485457325, iteration: 221135
loss: 0.973244845867157,grad_norm: 0.8554447713810798, iteration: 221136
loss: 1.0289345979690552,grad_norm: 0.999999728096983, iteration: 221137
loss: 1.013890027999878,grad_norm: 0.9999990095306613, iteration: 221138
loss: 0.9999538660049438,grad_norm: 0.8212402367845293, iteration: 221139
loss: 0.9857510924339294,grad_norm: 0.7628941226127847, iteration: 221140
loss: 1.001831293106079,grad_norm: 0.8527342781645401, iteration: 221141
loss: 0.9532548189163208,grad_norm: 0.9999991509187937, iteration: 221142
loss: 1.0057841539382935,grad_norm: 0.9193908271679173, iteration: 221143
loss: 1.013178825378418,grad_norm: 0.9999991555668257, iteration: 221144
loss: 0.959295928478241,grad_norm: 0.8073873230414226, iteration: 221145
loss: 0.9968526363372803,grad_norm: 0.9999990889970138, iteration: 221146
loss: 1.01896071434021,grad_norm: 0.9999991327616963, iteration: 221147
loss: 0.9659111499786377,grad_norm: 0.9999990685005109, iteration: 221148
loss: 1.033823013305664,grad_norm: 0.9371852528355408, iteration: 221149
loss: 1.0198651552200317,grad_norm: 0.9650889383929663, iteration: 221150
loss: 1.0449193716049194,grad_norm: 0.9999991442218471, iteration: 221151
loss: 0.9932848215103149,grad_norm: 0.9863199116358119, iteration: 221152
loss: 0.9903036952018738,grad_norm: 0.930416044483503, iteration: 221153
loss: 0.9829074740409851,grad_norm: 0.9434806552208557, iteration: 221154
loss: 0.9907733798027039,grad_norm: 0.8836768676940008, iteration: 221155
loss: 1.0272142887115479,grad_norm: 0.8858099499326968, iteration: 221156
loss: 0.9780381321907043,grad_norm: 0.9812186271971247, iteration: 221157
loss: 1.0364419221878052,grad_norm: 0.9999994382745225, iteration: 221158
loss: 1.0684380531311035,grad_norm: 0.9999997656457258, iteration: 221159
loss: 0.9751150608062744,grad_norm: 0.9999988778970944, iteration: 221160
loss: 0.9803311228752136,grad_norm: 0.8642825126721487, iteration: 221161
loss: 0.9709741473197937,grad_norm: 0.775471293389954, iteration: 221162
loss: 0.9690894484519958,grad_norm: 0.9999990209918185, iteration: 221163
loss: 0.9813600778579712,grad_norm: 0.772611661962577, iteration: 221164
loss: 0.9920031428337097,grad_norm: 0.9999990449656737, iteration: 221165
loss: 1.0247746706008911,grad_norm: 0.988226766314801, iteration: 221166
loss: 0.9867521524429321,grad_norm: 0.8548283621022381, iteration: 221167
loss: 1.026980996131897,grad_norm: 0.9999990946890818, iteration: 221168
loss: 1.00674307346344,grad_norm: 0.8809322224424077, iteration: 221169
loss: 1.0399543046951294,grad_norm: 0.9999991283066635, iteration: 221170
loss: 1.0027815103530884,grad_norm: 0.8748110138924053, iteration: 221171
loss: 0.9964815378189087,grad_norm: 0.8490514374585428, iteration: 221172
loss: 1.0155407190322876,grad_norm: 0.9260801821431149, iteration: 221173
loss: 0.9974139928817749,grad_norm: 0.9693875345321552, iteration: 221174
loss: 1.0073888301849365,grad_norm: 0.9705932329168302, iteration: 221175
loss: 0.9539433121681213,grad_norm: 0.8645642984860373, iteration: 221176
loss: 0.9611583352088928,grad_norm: 0.8202494697513691, iteration: 221177
loss: 1.0232481956481934,grad_norm: 0.999999148387532, iteration: 221178
loss: 0.9686813950538635,grad_norm: 0.9241426104150477, iteration: 221179
loss: 1.0520745515823364,grad_norm: 0.9999996287191424, iteration: 221180
loss: 0.9951730966567993,grad_norm: 0.9999991819126186, iteration: 221181
loss: 0.9825745820999146,grad_norm: 0.911911827866137, iteration: 221182
loss: 1.0126224756240845,grad_norm: 0.9585451382917772, iteration: 221183
loss: 1.0734176635742188,grad_norm: 0.9424709368622839, iteration: 221184
loss: 1.003432273864746,grad_norm: 0.8640534543473909, iteration: 221185
loss: 1.0449132919311523,grad_norm: 0.9403622204287816, iteration: 221186
loss: 0.9931961894035339,grad_norm: 0.99999956551293, iteration: 221187
loss: 1.0004887580871582,grad_norm: 0.9657138145096708, iteration: 221188
loss: 0.9803013205528259,grad_norm: 0.8879401968611768, iteration: 221189
loss: 0.9597687721252441,grad_norm: 0.9375472707182804, iteration: 221190
loss: 0.9900119304656982,grad_norm: 0.8652434859160438, iteration: 221191
loss: 1.019034504890442,grad_norm: 0.9528136149284339, iteration: 221192
loss: 0.9796909093856812,grad_norm: 0.9304310435772986, iteration: 221193
loss: 0.9953041672706604,grad_norm: 0.9106785102267059, iteration: 221194
loss: 1.030124306678772,grad_norm: 0.8161201826918154, iteration: 221195
loss: 0.996749758720398,grad_norm: 0.9738533983736507, iteration: 221196
loss: 0.9979574680328369,grad_norm: 0.8599000381303918, iteration: 221197
loss: 0.9993802905082703,grad_norm: 0.9999991548631674, iteration: 221198
loss: 1.019673228263855,grad_norm: 0.8813962744505441, iteration: 221199
loss: 0.9513221383094788,grad_norm: 0.895548941633872, iteration: 221200
loss: 1.011513590812683,grad_norm: 0.9999990819471083, iteration: 221201
loss: 0.9962031841278076,grad_norm: 0.7941832608978998, iteration: 221202
loss: 1.0167481899261475,grad_norm: 0.8705777359966264, iteration: 221203
loss: 1.0092240571975708,grad_norm: 0.9999992838833498, iteration: 221204
loss: 0.9875901341438293,grad_norm: 0.9999992513726446, iteration: 221205
loss: 1.0862282514572144,grad_norm: 0.9999994876990969, iteration: 221206
loss: 1.0084766149520874,grad_norm: 0.9530766033965933, iteration: 221207
loss: 1.0277563333511353,grad_norm: 0.8450950027342421, iteration: 221208
loss: 0.9892215132713318,grad_norm: 0.8746340548713888, iteration: 221209
loss: 0.9685412645339966,grad_norm: 0.8726441002238324, iteration: 221210
loss: 1.0240155458450317,grad_norm: 0.905467605621643, iteration: 221211
loss: 1.0155278444290161,grad_norm: 0.9999991584614619, iteration: 221212
loss: 0.9967308640480042,grad_norm: 0.7958722396415684, iteration: 221213
loss: 1.0329108238220215,grad_norm: 0.9354491611072187, iteration: 221214
loss: 1.0321158170700073,grad_norm: 0.9627645368024402, iteration: 221215
loss: 1.0085116624832153,grad_norm: 0.9754342952226445, iteration: 221216
loss: 0.9641175866127014,grad_norm: 0.9999990963272494, iteration: 221217
loss: 0.9855849146842957,grad_norm: 0.7798113326443719, iteration: 221218
loss: 1.0494608879089355,grad_norm: 0.9999992346748844, iteration: 221219
loss: 0.9944919347763062,grad_norm: 0.999999104455675, iteration: 221220
loss: 1.087659239768982,grad_norm: 0.842308546060951, iteration: 221221
loss: 0.9856382608413696,grad_norm: 0.9999991922471376, iteration: 221222
loss: 1.023038625717163,grad_norm: 0.9999990515548342, iteration: 221223
loss: 0.9963151216506958,grad_norm: 0.9897154392061797, iteration: 221224
loss: 0.9944006204605103,grad_norm: 0.9175366262716649, iteration: 221225
loss: 1.0622676610946655,grad_norm: 0.9999999278616397, iteration: 221226
loss: 0.9908174276351929,grad_norm: 0.9246463791755676, iteration: 221227
loss: 1.0239722728729248,grad_norm: 0.9999991883321792, iteration: 221228
loss: 1.0412509441375732,grad_norm: 0.9709088878541753, iteration: 221229
loss: 1.0117501020431519,grad_norm: 0.999999232030742, iteration: 221230
loss: 1.084829330444336,grad_norm: 0.9999990139275862, iteration: 221231
loss: 1.0153110027313232,grad_norm: 0.8080463914686695, iteration: 221232
loss: 1.0948975086212158,grad_norm: 0.9629208564651655, iteration: 221233
loss: 0.9847595691680908,grad_norm: 0.8143778987435158, iteration: 221234
loss: 0.9914227724075317,grad_norm: 0.9865326877205983, iteration: 221235
loss: 1.0058350563049316,grad_norm: 0.9199856591300508, iteration: 221236
loss: 0.9961310625076294,grad_norm: 0.9999991060456854, iteration: 221237
loss: 1.051059603691101,grad_norm: 0.9999993291995811, iteration: 221238
loss: 1.004029631614685,grad_norm: 0.9157145609628844, iteration: 221239
loss: 0.991052508354187,grad_norm: 0.9999998074200381, iteration: 221240
loss: 1.0155712366104126,grad_norm: 0.8203102427157727, iteration: 221241
loss: 0.9759842157363892,grad_norm: 0.9959886236548122, iteration: 221242
loss: 1.0051298141479492,grad_norm: 0.9999990804402475, iteration: 221243
loss: 0.9817148447036743,grad_norm: 0.9887173570983122, iteration: 221244
loss: 1.0126585960388184,grad_norm: 0.8975043626349097, iteration: 221245
loss: 0.9650965929031372,grad_norm: 0.9421592630738512, iteration: 221246
loss: 0.9468831419944763,grad_norm: 0.9999990085628858, iteration: 221247
loss: 1.0081007480621338,grad_norm: 0.8562945581267737, iteration: 221248
loss: 1.031416654586792,grad_norm: 0.9999990990172092, iteration: 221249
loss: 1.0115717649459839,grad_norm: 0.9684262008585275, iteration: 221250
loss: 1.0482478141784668,grad_norm: 0.9999989437049804, iteration: 221251
loss: 0.9812437295913696,grad_norm: 0.9999991759452874, iteration: 221252
loss: 1.0011121034622192,grad_norm: 0.8292592061998412, iteration: 221253
loss: 1.0017290115356445,grad_norm: 0.9999991207746413, iteration: 221254
loss: 0.9941667318344116,grad_norm: 0.9999994076004529, iteration: 221255
loss: 0.9569634199142456,grad_norm: 0.9243783475602332, iteration: 221256
loss: 0.9611566662788391,grad_norm: 0.9480416489259402, iteration: 221257
loss: 1.0778768062591553,grad_norm: 0.9999992780638282, iteration: 221258
loss: 0.9543260931968689,grad_norm: 0.999999070307171, iteration: 221259
loss: 0.9878664612770081,grad_norm: 0.8629630269763564, iteration: 221260
loss: 1.010352373123169,grad_norm: 0.8822659535138899, iteration: 221261
loss: 0.9779503345489502,grad_norm: 0.9344757910150354, iteration: 221262
loss: 0.9704812169075012,grad_norm: 0.8984770649071162, iteration: 221263
loss: 1.017447829246521,grad_norm: 0.8513765593464042, iteration: 221264
loss: 1.0317951440811157,grad_norm: 0.9999993254547829, iteration: 221265
loss: 1.0000845193862915,grad_norm: 0.999999254625017, iteration: 221266
loss: 0.9925020933151245,grad_norm: 0.9999992586394345, iteration: 221267
loss: 0.9721062183380127,grad_norm: 0.8888834971206225, iteration: 221268
loss: 1.0024619102478027,grad_norm: 0.9999992308193716, iteration: 221269
loss: 1.0252217054367065,grad_norm: 0.9134255161997981, iteration: 221270
loss: 0.9917977452278137,grad_norm: 0.8072709273700187, iteration: 221271
loss: 0.9757840633392334,grad_norm: 0.9071308685213237, iteration: 221272
loss: 0.9754312634468079,grad_norm: 0.999999416031778, iteration: 221273
loss: 0.9994141459465027,grad_norm: 0.927601958480444, iteration: 221274
loss: 1.0011988878250122,grad_norm: 0.8512745629744409, iteration: 221275
loss: 1.002866506576538,grad_norm: 0.9263350061130781, iteration: 221276
loss: 1.0792666673660278,grad_norm: 0.9999995412475144, iteration: 221277
loss: 0.9914082884788513,grad_norm: 0.8506112478219727, iteration: 221278
loss: 0.9993615746498108,grad_norm: 0.741328349788888, iteration: 221279
loss: 1.0931719541549683,grad_norm: 0.9999990283484074, iteration: 221280
loss: 0.9687573909759521,grad_norm: 0.8151890382395299, iteration: 221281
loss: 1.0248374938964844,grad_norm: 0.9999992437848816, iteration: 221282
loss: 1.004434585571289,grad_norm: 0.9999991162638139, iteration: 221283
loss: 1.008501410484314,grad_norm: 0.9810493733060123, iteration: 221284
loss: 1.033368468284607,grad_norm: 0.9999990941029189, iteration: 221285
loss: 1.0118471384048462,grad_norm: 0.9999989912260852, iteration: 221286
loss: 1.0241490602493286,grad_norm: 0.9999991972286422, iteration: 221287
loss: 0.9869190454483032,grad_norm: 0.9999991108136687, iteration: 221288
loss: 1.016634225845337,grad_norm: 0.8575387353267268, iteration: 221289
loss: 0.9847418665885925,grad_norm: 0.9999990419140468, iteration: 221290
loss: 0.9839195609092712,grad_norm: 0.8658246552922128, iteration: 221291
loss: 0.9778149127960205,grad_norm: 0.8878163287690589, iteration: 221292
loss: 0.9695963859558105,grad_norm: 0.9728270934150981, iteration: 221293
loss: 0.9625439643859863,grad_norm: 0.9869030000764495, iteration: 221294
loss: 1.021051049232483,grad_norm: 0.9124344458382189, iteration: 221295
loss: 1.0143646001815796,grad_norm: 0.9999988863043238, iteration: 221296
loss: 1.0150980949401855,grad_norm: 0.9999990452377256, iteration: 221297
loss: 1.0163543224334717,grad_norm: 0.8546730283836692, iteration: 221298
loss: 1.0513230562210083,grad_norm: 0.9279613147254243, iteration: 221299
loss: 1.0216517448425293,grad_norm: 0.9999990209691493, iteration: 221300
loss: 0.977881133556366,grad_norm: 0.9454726028216227, iteration: 221301
loss: 1.00898015499115,grad_norm: 0.9999991573842992, iteration: 221302
loss: 0.9443536400794983,grad_norm: 0.9660911826643473, iteration: 221303
loss: 1.04512357711792,grad_norm: 0.8692976956271009, iteration: 221304
loss: 0.9637832641601562,grad_norm: 0.8949761657177652, iteration: 221305
loss: 1.0177834033966064,grad_norm: 0.9999995618022891, iteration: 221306
loss: 1.0015015602111816,grad_norm: 0.9840465986592265, iteration: 221307
loss: 1.0026404857635498,grad_norm: 0.9999989681400764, iteration: 221308
loss: 0.9866867065429688,grad_norm: 0.9301363900179516, iteration: 221309
loss: 0.9906123280525208,grad_norm: 0.8894188235816186, iteration: 221310
loss: 0.9898719191551208,grad_norm: 0.9999992334127045, iteration: 221311
loss: 1.0045496225357056,grad_norm: 0.9999991114897863, iteration: 221312
loss: 0.9837949275970459,grad_norm: 0.9571943035121402, iteration: 221313
loss: 0.9894170761108398,grad_norm: 0.8487499248832131, iteration: 221314
loss: 1.043054461479187,grad_norm: 0.8424739832665729, iteration: 221315
loss: 0.968810498714447,grad_norm: 0.8897390479372761, iteration: 221316
loss: 0.9903019070625305,grad_norm: 0.9999990256237926, iteration: 221317
loss: 0.9962580800056458,grad_norm: 0.9999991882635711, iteration: 221318
loss: 0.9794706702232361,grad_norm: 0.8835941527358193, iteration: 221319
loss: 1.030709147453308,grad_norm: 0.8462029029945153, iteration: 221320
loss: 0.9997649192810059,grad_norm: 0.906038615677987, iteration: 221321
loss: 0.9531840682029724,grad_norm: 0.9981068616020242, iteration: 221322
loss: 0.9808465242385864,grad_norm: 0.9264946503535937, iteration: 221323
loss: 1.004996657371521,grad_norm: 0.9999990449083672, iteration: 221324
loss: 0.9883155822753906,grad_norm: 0.7575993906634757, iteration: 221325
loss: 0.993984043598175,grad_norm: 0.8626119140659473, iteration: 221326
loss: 0.9589954018592834,grad_norm: 0.8089714798217601, iteration: 221327
loss: 0.9878886342048645,grad_norm: 0.8094854782091838, iteration: 221328
loss: 1.0182429552078247,grad_norm: 0.9999991196002157, iteration: 221329
loss: 1.0029510259628296,grad_norm: 0.8677467672562742, iteration: 221330
loss: 0.9844406247138977,grad_norm: 0.9578801406550654, iteration: 221331
loss: 1.0365355014801025,grad_norm: 0.9999991091242357, iteration: 221332
loss: 1.0322755575180054,grad_norm: 0.8186017383648465, iteration: 221333
loss: 1.0056862831115723,grad_norm: 0.9777152089124374, iteration: 221334
loss: 1.0442770719528198,grad_norm: 0.9056134196147037, iteration: 221335
loss: 0.9836252927780151,grad_norm: 0.9999990422198821, iteration: 221336
loss: 0.9824719429016113,grad_norm: 0.9384382090500945, iteration: 221337
loss: 0.9672592282295227,grad_norm: 0.9187437250943656, iteration: 221338
loss: 1.0018717050552368,grad_norm: 0.9999989927268932, iteration: 221339
loss: 0.9600458741188049,grad_norm: 0.9221591776919829, iteration: 221340
loss: 1.0035505294799805,grad_norm: 0.8746666580198341, iteration: 221341
loss: 1.0140645503997803,grad_norm: 0.8947664400213539, iteration: 221342
loss: 1.0146427154541016,grad_norm: 0.9919933939959136, iteration: 221343
loss: 1.005849838256836,grad_norm: 0.999999171806399, iteration: 221344
loss: 0.983944296836853,grad_norm: 0.9333142127839056, iteration: 221345
loss: 1.015761375427246,grad_norm: 0.9999991473070248, iteration: 221346
loss: 0.9972936511039734,grad_norm: 0.8772469034649277, iteration: 221347
loss: 1.000388741493225,grad_norm: 0.8611284476284989, iteration: 221348
loss: 0.9998969435691833,grad_norm: 0.9669471997453646, iteration: 221349
loss: 1.019278645515442,grad_norm: 0.9999990252802758, iteration: 221350
loss: 0.9709862470626831,grad_norm: 0.843115161806891, iteration: 221351
loss: 0.9917208552360535,grad_norm: 0.9484779270867224, iteration: 221352
loss: 0.9670227766036987,grad_norm: 0.753154985487929, iteration: 221353
loss: 1.0021950006484985,grad_norm: 0.9785522555541079, iteration: 221354
loss: 0.9788681864738464,grad_norm: 0.8459695071361292, iteration: 221355
loss: 1.0824302434921265,grad_norm: 0.8704814347246023, iteration: 221356
loss: 0.9742574095726013,grad_norm: 0.9028723375455817, iteration: 221357
loss: 1.0083799362182617,grad_norm: 0.7839495448347831, iteration: 221358
loss: 1.0279955863952637,grad_norm: 0.9492962413728133, iteration: 221359
loss: 0.9860714673995972,grad_norm: 0.8627409537748542, iteration: 221360
loss: 1.0061780214309692,grad_norm: 0.9999991941923838, iteration: 221361
loss: 1.0631412267684937,grad_norm: 0.9999992876562529, iteration: 221362
loss: 1.0039423704147339,grad_norm: 0.8549565368885725, iteration: 221363
loss: 0.9768248796463013,grad_norm: 0.9687856078345566, iteration: 221364
loss: 0.9725319147109985,grad_norm: 0.8121730790413522, iteration: 221365
loss: 0.993535041809082,grad_norm: 0.8198015269119965, iteration: 221366
loss: 1.0490905046463013,grad_norm: 0.9265130028010607, iteration: 221367
loss: 1.0039105415344238,grad_norm: 0.8802386943716657, iteration: 221368
loss: 1.0500078201293945,grad_norm: 0.9999996961247963, iteration: 221369
loss: 1.0321580171585083,grad_norm: 0.9999992303066271, iteration: 221370
loss: 1.011676549911499,grad_norm: 0.8927134086325659, iteration: 221371
loss: 0.9923290014266968,grad_norm: 0.9401938973824533, iteration: 221372
loss: 1.0364943742752075,grad_norm: 0.9797300373181449, iteration: 221373
loss: 0.9739865064620972,grad_norm: 0.9594525003264618, iteration: 221374
loss: 1.0052906274795532,grad_norm: 0.9999990059866286, iteration: 221375
loss: 1.0155147314071655,grad_norm: 0.9882379792855378, iteration: 221376
loss: 1.0437363386154175,grad_norm: 0.9389524310578804, iteration: 221377
loss: 0.9744027256965637,grad_norm: 0.9774450830911129, iteration: 221378
loss: 1.0107976198196411,grad_norm: 0.9446126829803737, iteration: 221379
loss: 0.9784042835235596,grad_norm: 0.8728707662304732, iteration: 221380
loss: 1.0069113969802856,grad_norm: 0.9999991776226955, iteration: 221381
loss: 0.9833221435546875,grad_norm: 0.8909774900041597, iteration: 221382
loss: 0.9495802521705627,grad_norm: 0.9470659005283217, iteration: 221383
loss: 0.9691187143325806,grad_norm: 0.9999991539426386, iteration: 221384
loss: 1.0172369480133057,grad_norm: 0.8852222409563103, iteration: 221385
loss: 0.9757042527198792,grad_norm: 0.9999991364961888, iteration: 221386
loss: 1.0481960773468018,grad_norm: 0.9279764357812106, iteration: 221387
loss: 0.9881368279457092,grad_norm: 0.9306004838951029, iteration: 221388
loss: 1.0229092836380005,grad_norm: 0.896744696581232, iteration: 221389
loss: 1.068798542022705,grad_norm: 0.9999991980378237, iteration: 221390
loss: 1.017991542816162,grad_norm: 0.9667757654534984, iteration: 221391
loss: 0.9504352807998657,grad_norm: 0.8069618679832726, iteration: 221392
loss: 0.9985741376876831,grad_norm: 0.7856561225517485, iteration: 221393
loss: 1.0013006925582886,grad_norm: 0.9925249009391237, iteration: 221394
loss: 0.9959850311279297,grad_norm: 0.9999992401619845, iteration: 221395
loss: 1.0004363059997559,grad_norm: 0.8181424208790745, iteration: 221396
loss: 0.9877139925956726,grad_norm: 0.9438745685599307, iteration: 221397
loss: 0.9826483130455017,grad_norm: 0.8745069964864176, iteration: 221398
loss: 0.9559422731399536,grad_norm: 0.8201935816194024, iteration: 221399
loss: 0.9808582663536072,grad_norm: 0.8298919277144501, iteration: 221400
loss: 1.0190000534057617,grad_norm: 0.9594763297193956, iteration: 221401
loss: 1.0183696746826172,grad_norm: 0.7984450725655708, iteration: 221402
loss: 0.992965042591095,grad_norm: 0.8461336350682217, iteration: 221403
loss: 0.9982905983924866,grad_norm: 0.8552386988766957, iteration: 221404
loss: 0.9528976082801819,grad_norm: 0.9569634098209878, iteration: 221405
loss: 0.9991612434387207,grad_norm: 0.9227785127312718, iteration: 221406
loss: 0.9943305850028992,grad_norm: 0.9999989332011872, iteration: 221407
loss: 1.0010933876037598,grad_norm: 0.9999991415268931, iteration: 221408
loss: 1.0445995330810547,grad_norm: 0.9984690297216271, iteration: 221409
loss: 1.0142728090286255,grad_norm: 0.999999417965226, iteration: 221410
loss: 0.9966665506362915,grad_norm: 0.863871172853297, iteration: 221411
loss: 0.9682137966156006,grad_norm: 0.9426465807781675, iteration: 221412
loss: 1.0037548542022705,grad_norm: 0.8420010599227531, iteration: 221413
loss: 1.0070616006851196,grad_norm: 0.7492968419397152, iteration: 221414
loss: 0.9756688475608826,grad_norm: 0.9970795875843274, iteration: 221415
loss: 1.0336235761642456,grad_norm: 0.9577916852024566, iteration: 221416
loss: 0.9898858070373535,grad_norm: 0.9458661721751013, iteration: 221417
loss: 0.9923909902572632,grad_norm: 0.9999993085912263, iteration: 221418
loss: 0.9708652496337891,grad_norm: 0.8781053715140335, iteration: 221419
loss: 0.9995814561843872,grad_norm: 0.8573393528476927, iteration: 221420
loss: 0.9897398352622986,grad_norm: 0.8527939495001529, iteration: 221421
loss: 0.9946156144142151,grad_norm: 0.9311051118715836, iteration: 221422
loss: 0.9813628196716309,grad_norm: 0.8003044586617893, iteration: 221423
loss: 1.029451847076416,grad_norm: 0.968664277882624, iteration: 221424
loss: 0.9752550721168518,grad_norm: 0.8972254466946116, iteration: 221425
loss: 1.0137355327606201,grad_norm: 0.9999990580523958, iteration: 221426
loss: 1.0109370946884155,grad_norm: 0.8199875059182222, iteration: 221427
loss: 0.9943777918815613,grad_norm: 0.9199415269238302, iteration: 221428
loss: 1.0244442224502563,grad_norm: 0.890886523283947, iteration: 221429
loss: 1.0889966487884521,grad_norm: 0.9999992910533279, iteration: 221430
loss: 0.992518961429596,grad_norm: 0.877136556667904, iteration: 221431
loss: 1.0386779308319092,grad_norm: 0.9999990399962363, iteration: 221432
loss: 1.0143486261367798,grad_norm: 0.9199853674318641, iteration: 221433
loss: 1.0134025812149048,grad_norm: 0.9238713982464274, iteration: 221434
loss: 0.9983927607536316,grad_norm: 0.6956617836539757, iteration: 221435
loss: 1.009870171546936,grad_norm: 0.8782994891481094, iteration: 221436
loss: 0.9888420104980469,grad_norm: 0.9999991907290668, iteration: 221437
loss: 0.979535698890686,grad_norm: 0.8961439413513429, iteration: 221438
loss: 0.9856786131858826,grad_norm: 0.966746753640226, iteration: 221439
loss: 1.0286043882369995,grad_norm: 0.9999993228067179, iteration: 221440
loss: 1.0099059343338013,grad_norm: 0.9189685234168621, iteration: 221441
loss: 1.016983151435852,grad_norm: 0.9999990438668893, iteration: 221442
loss: 0.9999642372131348,grad_norm: 0.9797025867367214, iteration: 221443
loss: 0.9924103617668152,grad_norm: 0.8711533559998529, iteration: 221444
loss: 1.007219672203064,grad_norm: 0.8992724953575048, iteration: 221445
loss: 0.9928818345069885,grad_norm: 0.9423057631462534, iteration: 221446
loss: 0.9984888434410095,grad_norm: 0.8334065925437741, iteration: 221447
loss: 0.9696424007415771,grad_norm: 0.9903625974210934, iteration: 221448
loss: 1.0018625259399414,grad_norm: 0.8872193170387217, iteration: 221449
loss: 1.0130773782730103,grad_norm: 0.9999993393641077, iteration: 221450
loss: 0.9593390226364136,grad_norm: 0.9999990158605708, iteration: 221451
loss: 0.9686164259910583,grad_norm: 0.907101018222904, iteration: 221452
loss: 0.9991738200187683,grad_norm: 0.965154203062227, iteration: 221453
loss: 1.0400056838989258,grad_norm: 0.9822728989841613, iteration: 221454
loss: 1.0014017820358276,grad_norm: 0.9999990507462209, iteration: 221455
loss: 0.9969550371170044,grad_norm: 0.999999704642146, iteration: 221456
loss: 1.0608506202697754,grad_norm: 0.9999992253725425, iteration: 221457
loss: 1.0048022270202637,grad_norm: 0.9999990383951595, iteration: 221458
loss: 1.0173115730285645,grad_norm: 0.9951981195094856, iteration: 221459
loss: 0.9974443316459656,grad_norm: 0.882520013920737, iteration: 221460
loss: 0.9768024682998657,grad_norm: 0.8957221472407981, iteration: 221461
loss: 1.0014290809631348,grad_norm: 0.9970436103491671, iteration: 221462
loss: 0.9953257441520691,grad_norm: 0.7510495907219634, iteration: 221463
loss: 1.034292221069336,grad_norm: 0.9999990599294393, iteration: 221464
loss: 0.9969084858894348,grad_norm: 0.86562747765305, iteration: 221465
loss: 1.0122613906860352,grad_norm: 0.9999990810720937, iteration: 221466
loss: 1.0044053792953491,grad_norm: 0.933218930118899, iteration: 221467
loss: 0.9973924160003662,grad_norm: 0.9394610601239005, iteration: 221468
loss: 0.9889451265335083,grad_norm: 0.87028545063202, iteration: 221469
loss: 1.0077903270721436,grad_norm: 0.811333020701132, iteration: 221470
loss: 1.0647231340408325,grad_norm: 0.9999990142411627, iteration: 221471
loss: 1.0310468673706055,grad_norm: 0.9999992063484456, iteration: 221472
loss: 0.9989785552024841,grad_norm: 0.9671564294016086, iteration: 221473
loss: 1.0144513845443726,grad_norm: 0.9035157686462733, iteration: 221474
loss: 1.0249383449554443,grad_norm: 0.9999998253320461, iteration: 221475
loss: 0.9555505514144897,grad_norm: 0.9999991635055304, iteration: 221476
loss: 1.0027145147323608,grad_norm: 0.9999990930055448, iteration: 221477
loss: 1.0105026960372925,grad_norm: 0.908684175690565, iteration: 221478
loss: 1.0023741722106934,grad_norm: 0.9999991789796306, iteration: 221479
loss: 1.0101635456085205,grad_norm: 0.9999991468718666, iteration: 221480
loss: 0.9897161722183228,grad_norm: 0.8399418422785493, iteration: 221481
loss: 0.9959605932235718,grad_norm: 0.9791436694638116, iteration: 221482
loss: 0.9870792031288147,grad_norm: 0.9904337852314908, iteration: 221483
loss: 0.99847012758255,grad_norm: 0.949597506182488, iteration: 221484
loss: 1.0333518981933594,grad_norm: 0.9381605053450568, iteration: 221485
loss: 1.0165058374404907,grad_norm: 0.9707513160548448, iteration: 221486
loss: 0.9789502620697021,grad_norm: 0.9999990826239011, iteration: 221487
loss: 0.9811707139015198,grad_norm: 0.8460828883172841, iteration: 221488
loss: 0.9535515308380127,grad_norm: 0.9235815362744741, iteration: 221489
loss: 1.019679307937622,grad_norm: 0.999999083605773, iteration: 221490
loss: 0.9896339178085327,grad_norm: 0.9999998654077055, iteration: 221491
loss: 0.967816948890686,grad_norm: 0.9936034927500097, iteration: 221492
loss: 1.0084993839263916,grad_norm: 0.8345160405757164, iteration: 221493
loss: 0.9958598613739014,grad_norm: 0.947226214995965, iteration: 221494
loss: 1.0912481546401978,grad_norm: 0.9999998501763315, iteration: 221495
loss: 1.0318372249603271,grad_norm: 0.8467702705643751, iteration: 221496
loss: 0.9698414206504822,grad_norm: 0.8553261949231474, iteration: 221497
loss: 0.989558756351471,grad_norm: 0.9262911705573159, iteration: 221498
loss: 1.0009185075759888,grad_norm: 0.7548816102376557, iteration: 221499
loss: 1.0247454643249512,grad_norm: 0.8859192080253097, iteration: 221500
loss: 0.9796504378318787,grad_norm: 0.9013903671793287, iteration: 221501
loss: 1.0136699676513672,grad_norm: 0.9999996418959696, iteration: 221502
loss: 1.0361297130584717,grad_norm: 0.9999998534321628, iteration: 221503
loss: 1.0286749601364136,grad_norm: 0.8823543819783015, iteration: 221504
loss: 0.9873725771903992,grad_norm: 0.9052742711481911, iteration: 221505
loss: 1.0009254217147827,grad_norm: 0.9453233425087356, iteration: 221506
loss: 0.9768930077552795,grad_norm: 0.8339478052207971, iteration: 221507
loss: 0.9853919744491577,grad_norm: 0.9999994733201423, iteration: 221508
loss: 0.9846051931381226,grad_norm: 0.8752015162672626, iteration: 221509
loss: 1.0615190267562866,grad_norm: 0.9999994631221142, iteration: 221510
loss: 1.008813500404358,grad_norm: 0.9999991171540861, iteration: 221511
loss: 0.9844288229942322,grad_norm: 0.9999989755829258, iteration: 221512
loss: 1.0310567617416382,grad_norm: 0.911841941327945, iteration: 221513
loss: 1.0298823118209839,grad_norm: 0.9999994618975799, iteration: 221514
loss: 0.9993523359298706,grad_norm: 0.9999990368980596, iteration: 221515
loss: 1.013222336769104,grad_norm: 0.9245226489594993, iteration: 221516
loss: 1.0138736963272095,grad_norm: 0.9038664104327827, iteration: 221517
loss: 1.0299434661865234,grad_norm: 0.9999990873597085, iteration: 221518
loss: 1.013240933418274,grad_norm: 0.9969661228224118, iteration: 221519
loss: 0.9853436350822449,grad_norm: 0.9999990127736339, iteration: 221520
loss: 1.059055209159851,grad_norm: 0.9999999696296362, iteration: 221521
loss: 1.012117624282837,grad_norm: 0.7413878199714252, iteration: 221522
loss: 1.02582848072052,grad_norm: 0.9999996397586483, iteration: 221523
loss: 0.9867113828659058,grad_norm: 0.8687100026156482, iteration: 221524
loss: 0.980410635471344,grad_norm: 0.9712167796312349, iteration: 221525
loss: 0.9595218896865845,grad_norm: 0.8718729983297827, iteration: 221526
loss: 0.9858971238136292,grad_norm: 0.847780658322917, iteration: 221527
loss: 1.002095341682434,grad_norm: 0.8856152163442742, iteration: 221528
loss: 0.9866352677345276,grad_norm: 0.9018525631700963, iteration: 221529
loss: 1.0407686233520508,grad_norm: 0.999999227636806, iteration: 221530
loss: 1.0277345180511475,grad_norm: 0.9607976557542489, iteration: 221531
loss: 0.9889425039291382,grad_norm: 0.9419286850633606, iteration: 221532
loss: 1.0055363178253174,grad_norm: 0.8280581326989561, iteration: 221533
loss: 1.0609227418899536,grad_norm: 0.999999930324023, iteration: 221534
loss: 1.0162298679351807,grad_norm: 0.9496134513895468, iteration: 221535
loss: 0.9922935366630554,grad_norm: 0.9258891382254646, iteration: 221536
loss: 0.9917324185371399,grad_norm: 0.9203708729823378, iteration: 221537
loss: 1.0473366975784302,grad_norm: 0.9999995130596625, iteration: 221538
loss: 0.9815711975097656,grad_norm: 0.9370932154478083, iteration: 221539
loss: 0.9976734519004822,grad_norm: 0.8796912839933806, iteration: 221540
loss: 1.0191147327423096,grad_norm: 0.9697849362365158, iteration: 221541
loss: 0.9767248630523682,grad_norm: 0.893509610413229, iteration: 221542
loss: 1.0166833400726318,grad_norm: 0.9999990625853961, iteration: 221543
loss: 1.0214202404022217,grad_norm: 0.9999990395463062, iteration: 221544
loss: 0.9710403084754944,grad_norm: 0.971404296843431, iteration: 221545
loss: 0.9748268723487854,grad_norm: 0.8162280941355161, iteration: 221546
loss: 1.0051867961883545,grad_norm: 0.8807446172148614, iteration: 221547
loss: 0.9870589971542358,grad_norm: 0.9999990198017484, iteration: 221548
loss: 0.9980999231338501,grad_norm: 0.9999992789352211, iteration: 221549
loss: 0.977272093296051,grad_norm: 0.9758826238292696, iteration: 221550
loss: 0.994453489780426,grad_norm: 0.9999991194328145, iteration: 221551
loss: 1.0088012218475342,grad_norm: 0.9558300425614308, iteration: 221552
loss: 1.0258005857467651,grad_norm: 0.8442833180860189, iteration: 221553
loss: 1.0183179378509521,grad_norm: 0.9999990731711175, iteration: 221554
loss: 1.0744853019714355,grad_norm: 0.8857049769530116, iteration: 221555
loss: 0.9898542761802673,grad_norm: 0.8251309584877453, iteration: 221556
loss: 0.968939483165741,grad_norm: 0.884944998319291, iteration: 221557
loss: 0.9841205477714539,grad_norm: 0.8385982809806621, iteration: 221558
loss: 0.9528105854988098,grad_norm: 0.9999990563462913, iteration: 221559
loss: 0.9949450492858887,grad_norm: 0.7796063628799065, iteration: 221560
loss: 0.9898355007171631,grad_norm: 0.8645862661690855, iteration: 221561
loss: 0.9909676313400269,grad_norm: 0.9999991422536129, iteration: 221562
loss: 1.00840163230896,grad_norm: 0.9999998436296258, iteration: 221563
loss: 0.9859567880630493,grad_norm: 0.9362349785079571, iteration: 221564
loss: 0.9840725660324097,grad_norm: 0.8293012649078443, iteration: 221565
loss: 0.9916319251060486,grad_norm: 0.9339963688749061, iteration: 221566
loss: 1.0390459299087524,grad_norm: 0.9999990392555429, iteration: 221567
loss: 1.0140047073364258,grad_norm: 0.9663259990526005, iteration: 221568
loss: 1.0598872900009155,grad_norm: 0.9999991998105999, iteration: 221569
loss: 1.0185590982437134,grad_norm: 0.9999991994849085, iteration: 221570
loss: 0.9875476360321045,grad_norm: 0.8471218729815846, iteration: 221571
loss: 0.9470521807670593,grad_norm: 0.9161260776502613, iteration: 221572
loss: 1.001315951347351,grad_norm: 0.9999990886645721, iteration: 221573
loss: 0.9396379590034485,grad_norm: 0.7655694627301455, iteration: 221574
loss: 1.115890622138977,grad_norm: 0.9999994615888332, iteration: 221575
loss: 0.9833829402923584,grad_norm: 0.8685149653715302, iteration: 221576
loss: 1.0010918378829956,grad_norm: 0.9999991298707223, iteration: 221577
loss: 1.0080019235610962,grad_norm: 0.7663371653474201, iteration: 221578
loss: 0.9672437310218811,grad_norm: 0.936144137167981, iteration: 221579
loss: 0.9867665767669678,grad_norm: 0.9999992557168599, iteration: 221580
loss: 1.0201451778411865,grad_norm: 0.772054111871649, iteration: 221581
loss: 0.9908065795898438,grad_norm: 0.999999087420273, iteration: 221582
loss: 1.0199384689331055,grad_norm: 0.9999990094962424, iteration: 221583
loss: 0.9735512733459473,grad_norm: 0.7720748325058026, iteration: 221584
loss: 1.0117566585540771,grad_norm: 0.8243820579172154, iteration: 221585
loss: 0.9902445077896118,grad_norm: 0.9497923236331454, iteration: 221586
loss: 0.9905505776405334,grad_norm: 0.8357331409198479, iteration: 221587
loss: 1.0049291849136353,grad_norm: 0.929733161313155, iteration: 221588
loss: 1.0265188217163086,grad_norm: 0.907749539681935, iteration: 221589
loss: 0.9984654784202576,grad_norm: 0.9999992456545583, iteration: 221590
loss: 1.000834345817566,grad_norm: 0.9794004855115525, iteration: 221591
loss: 1.0205142498016357,grad_norm: 0.9522718289723385, iteration: 221592
loss: 1.0248541831970215,grad_norm: 0.8422556643151865, iteration: 221593
loss: 0.9859417080879211,grad_norm: 0.85261654403555, iteration: 221594
loss: 1.0290488004684448,grad_norm: 0.8818834535628314, iteration: 221595
loss: 1.0125489234924316,grad_norm: 0.9999999648803889, iteration: 221596
loss: 1.0076960325241089,grad_norm: 0.9774925335032827, iteration: 221597
loss: 1.0347480773925781,grad_norm: 0.9999990667579262, iteration: 221598
loss: 1.001312494277954,grad_norm: 0.9868081205533031, iteration: 221599
loss: 0.9948474168777466,grad_norm: 0.9018609645004972, iteration: 221600
loss: 1.0301086902618408,grad_norm: 0.9999991479964936, iteration: 221601
loss: 1.0638799667358398,grad_norm: 0.9999994636191992, iteration: 221602
loss: 1.0064094066619873,grad_norm: 0.8114807392635094, iteration: 221603
loss: 0.9706514477729797,grad_norm: 0.8765062069472843, iteration: 221604
loss: 0.9866952896118164,grad_norm: 0.9999991171683632, iteration: 221605
loss: 0.9576913714408875,grad_norm: 0.9999990317903029, iteration: 221606
loss: 1.016870141029358,grad_norm: 0.8603428294563092, iteration: 221607
loss: 0.9777688384056091,grad_norm: 0.8925795182778866, iteration: 221608
loss: 1.0137977600097656,grad_norm: 0.9999991286834667, iteration: 221609
loss: 1.0135573148727417,grad_norm: 0.7827958183548223, iteration: 221610
loss: 0.9921267032623291,grad_norm: 0.7872986612828843, iteration: 221611
loss: 1.0743786096572876,grad_norm: 0.9999996108146652, iteration: 221612
loss: 1.0229853391647339,grad_norm: 0.9201085885615675, iteration: 221613
loss: 0.9891268610954285,grad_norm: 0.9037633490981872, iteration: 221614
loss: 1.0081920623779297,grad_norm: 0.9199764074961264, iteration: 221615
loss: 1.0386521816253662,grad_norm: 0.9999992593304443, iteration: 221616
loss: 1.060714840888977,grad_norm: 0.9999992091783195, iteration: 221617
loss: 1.0118978023529053,grad_norm: 0.9546104935549994, iteration: 221618
loss: 1.0367200374603271,grad_norm: 0.9372836249644766, iteration: 221619
loss: 0.9897361993789673,grad_norm: 0.9999992246102894, iteration: 221620
loss: 1.0421098470687866,grad_norm: 0.9999997513100984, iteration: 221621
loss: 1.018349051475525,grad_norm: 0.9707776641213017, iteration: 221622
loss: 1.0500198602676392,grad_norm: 0.999999615048381, iteration: 221623
loss: 0.9761478900909424,grad_norm: 0.9331956133850746, iteration: 221624
loss: 0.9937499761581421,grad_norm: 0.9999990903120638, iteration: 221625
loss: 1.0149401426315308,grad_norm: 0.9787031165691275, iteration: 221626
loss: 1.0553570985794067,grad_norm: 0.9999993515280994, iteration: 221627
loss: 1.0091232061386108,grad_norm: 0.999999101942495, iteration: 221628
loss: 1.243493914604187,grad_norm: 0.9999997271954008, iteration: 221629
loss: 1.016493558883667,grad_norm: 0.9999991251898236, iteration: 221630
loss: 0.9930016398429871,grad_norm: 0.7649425918652851, iteration: 221631
loss: 1.0158873796463013,grad_norm: 0.9999991380201698, iteration: 221632
loss: 1.0225911140441895,grad_norm: 0.9999991320767015, iteration: 221633
loss: 1.0190876722335815,grad_norm: 0.9999991438833818, iteration: 221634
loss: 0.9863631725311279,grad_norm: 0.9409648842148746, iteration: 221635
loss: 1.0711970329284668,grad_norm: 0.9999991089072764, iteration: 221636
loss: 1.039089322090149,grad_norm: 0.9999992101966334, iteration: 221637
loss: 1.033530354499817,grad_norm: 0.9999999779503671, iteration: 221638
loss: 1.0045260190963745,grad_norm: 0.9502333123421307, iteration: 221639
loss: 1.0246959924697876,grad_norm: 0.9999990264179974, iteration: 221640
loss: 0.9602891802787781,grad_norm: 0.7862029116791075, iteration: 221641
loss: 0.975569486618042,grad_norm: 0.9629429448116679, iteration: 221642
loss: 1.0159043073654175,grad_norm: 0.9710423542976606, iteration: 221643
loss: 1.0034630298614502,grad_norm: 0.8203206396737563, iteration: 221644
loss: 0.9775241613388062,grad_norm: 0.9999990414459631, iteration: 221645
loss: 1.0277475118637085,grad_norm: 0.8784781080533257, iteration: 221646
loss: 1.0426955223083496,grad_norm: 0.9082971601048493, iteration: 221647
loss: 0.989605724811554,grad_norm: 0.9549523992978219, iteration: 221648
loss: 1.0102497339248657,grad_norm: 0.841769006319878, iteration: 221649
loss: 0.9749516844749451,grad_norm: 0.999999064775885, iteration: 221650
loss: 1.0214589834213257,grad_norm: 0.9999993144608338, iteration: 221651
loss: 1.0091493129730225,grad_norm: 0.9999991505825833, iteration: 221652
loss: 0.9810011982917786,grad_norm: 0.913258139641646, iteration: 221653
loss: 1.0266025066375732,grad_norm: 0.9356648025754805, iteration: 221654
loss: 1.046573519706726,grad_norm: 0.9999994959375281, iteration: 221655
loss: 1.0217233896255493,grad_norm: 0.9999989643207772, iteration: 221656
loss: 0.988194465637207,grad_norm: 0.847878516932469, iteration: 221657
loss: 1.0182753801345825,grad_norm: 0.838745130553923, iteration: 221658
loss: 0.9912944436073303,grad_norm: 0.9321468206485503, iteration: 221659
loss: 1.0259188413619995,grad_norm: 0.9999996584306922, iteration: 221660
loss: 0.9708162546157837,grad_norm: 0.9999992312004037, iteration: 221661
loss: 0.9929366707801819,grad_norm: 0.91736735455198, iteration: 221662
loss: 1.032314419746399,grad_norm: 0.968564168270731, iteration: 221663
loss: 1.016533613204956,grad_norm: 0.8229067047519382, iteration: 221664
loss: 0.9808037281036377,grad_norm: 0.8095787918214012, iteration: 221665
loss: 1.006645917892456,grad_norm: 0.9999991278338244, iteration: 221666
loss: 0.9976458549499512,grad_norm: 0.9043539977584204, iteration: 221667
loss: 0.990733802318573,grad_norm: 0.8210313256151694, iteration: 221668
loss: 0.9941025376319885,grad_norm: 0.9999997013056935, iteration: 221669
loss: 1.015509843826294,grad_norm: 0.9999992108319663, iteration: 221670
loss: 1.018980622291565,grad_norm: 0.948865552158026, iteration: 221671
loss: 0.9894903898239136,grad_norm: 0.9999995138078538, iteration: 221672
loss: 1.0230872631072998,grad_norm: 0.9770686195684831, iteration: 221673
loss: 1.1754838228225708,grad_norm: 0.9723715324162665, iteration: 221674
loss: 0.9915405511856079,grad_norm: 0.921221939133559, iteration: 221675
loss: 0.9855947494506836,grad_norm: 0.8859600418615081, iteration: 221676
loss: 0.9910646080970764,grad_norm: 0.9617658729866484, iteration: 221677
loss: 1.039121150970459,grad_norm: 0.9322802745330341, iteration: 221678
loss: 1.0232659578323364,grad_norm: 0.8500870029878095, iteration: 221679
loss: 0.9914906620979309,grad_norm: 0.8972886436288052, iteration: 221680
loss: 0.9964551329612732,grad_norm: 0.9999990954334211, iteration: 221681
loss: 0.997748076915741,grad_norm: 0.8797652393506523, iteration: 221682
loss: 0.9926062822341919,grad_norm: 0.9999996132324788, iteration: 221683
loss: 0.989362359046936,grad_norm: 0.9152521936445249, iteration: 221684
loss: 1.0317869186401367,grad_norm: 0.9999991199016032, iteration: 221685
loss: 1.0079765319824219,grad_norm: 0.7698200118818325, iteration: 221686
loss: 1.015859842300415,grad_norm: 0.8528637032853288, iteration: 221687
loss: 1.007264494895935,grad_norm: 0.9537484188140263, iteration: 221688
loss: 0.9820046424865723,grad_norm: 0.9751175504732449, iteration: 221689
loss: 1.0279954671859741,grad_norm: 0.909253973280616, iteration: 221690
loss: 0.9908463954925537,grad_norm: 0.9999991772342682, iteration: 221691
loss: 1.0461666584014893,grad_norm: 0.8882826023235861, iteration: 221692
loss: 1.059921145439148,grad_norm: 0.9068942522708093, iteration: 221693
loss: 1.0044467449188232,grad_norm: 0.8991637096290956, iteration: 221694
loss: 0.9842903017997742,grad_norm: 0.9999991609368419, iteration: 221695
loss: 1.0113266706466675,grad_norm: 0.9828819649290493, iteration: 221696
loss: 1.0034449100494385,grad_norm: 0.9116350299921023, iteration: 221697
loss: 1.0363523960113525,grad_norm: 0.9999996037168694, iteration: 221698
loss: 0.9977177977561951,grad_norm: 0.8194756138597127, iteration: 221699
loss: 1.0029933452606201,grad_norm: 0.6805019249135495, iteration: 221700
loss: 1.0331698656082153,grad_norm: 0.9208552852291096, iteration: 221701
loss: 0.981113612651825,grad_norm: 0.9860364617822993, iteration: 221702
loss: 1.0267212390899658,grad_norm: 0.9999990584601098, iteration: 221703
loss: 0.9813721776008606,grad_norm: 0.8896509433694667, iteration: 221704
loss: 0.9793018698692322,grad_norm: 0.9999997555334672, iteration: 221705
loss: 0.9818910956382751,grad_norm: 0.8847803648849812, iteration: 221706
loss: 1.0166181325912476,grad_norm: 0.9999991567231346, iteration: 221707
loss: 1.04508376121521,grad_norm: 0.9999992320744173, iteration: 221708
loss: 0.9721028208732605,grad_norm: 0.9999995398356192, iteration: 221709
loss: 1.0144456624984741,grad_norm: 0.9862590224591267, iteration: 221710
loss: 0.9834297299385071,grad_norm: 0.8596914167224484, iteration: 221711
loss: 1.1018260717391968,grad_norm: 0.9999993568587556, iteration: 221712
loss: 0.9381303787231445,grad_norm: 0.999999047849494, iteration: 221713
loss: 0.988040566444397,grad_norm: 0.7865077308429814, iteration: 221714
loss: 1.1186984777450562,grad_norm: 0.9999990554956315, iteration: 221715
loss: 1.0063505172729492,grad_norm: 0.9999999108894554, iteration: 221716
loss: 1.000761866569519,grad_norm: 0.999999259099884, iteration: 221717
loss: 0.9649330377578735,grad_norm: 0.8862018146120131, iteration: 221718
loss: 1.014648199081421,grad_norm: 0.9999991359649194, iteration: 221719
loss: 1.0270317792892456,grad_norm: 0.9999996232727669, iteration: 221720
loss: 0.98675537109375,grad_norm: 0.9999992851362492, iteration: 221721
loss: 0.9762386679649353,grad_norm: 0.9149639579930542, iteration: 221722
loss: 0.9815918803215027,grad_norm: 0.8226022726841227, iteration: 221723
loss: 1.0089889764785767,grad_norm: 0.9999991794003754, iteration: 221724
loss: 1.021605134010315,grad_norm: 0.8481427855648398, iteration: 221725
loss: 0.9968925714492798,grad_norm: 0.9999991659738932, iteration: 221726
loss: 0.9919045567512512,grad_norm: 0.9999990450925362, iteration: 221727
loss: 1.0072921514511108,grad_norm: 0.8468047681849291, iteration: 221728
loss: 1.0408507585525513,grad_norm: 0.9999992359820914, iteration: 221729
loss: 0.9935185313224792,grad_norm: 0.905663493891112, iteration: 221730
loss: 0.9621413946151733,grad_norm: 0.8435827086607178, iteration: 221731
loss: 1.0239226818084717,grad_norm: 0.9999994109138992, iteration: 221732
loss: 0.9836080074310303,grad_norm: 0.8206173338531966, iteration: 221733
loss: 1.0544239282608032,grad_norm: 0.8391036440029391, iteration: 221734
loss: 1.0386987924575806,grad_norm: 0.9353964415224204, iteration: 221735
loss: 1.010679006576538,grad_norm: 0.9999990435223891, iteration: 221736
loss: 1.030847191810608,grad_norm: 0.9999992392757004, iteration: 221737
loss: 0.9950932860374451,grad_norm: 0.9999993994361663, iteration: 221738
loss: 1.009351134300232,grad_norm: 0.9637939113100016, iteration: 221739
loss: 0.9876343011856079,grad_norm: 0.9469680075459577, iteration: 221740
loss: 0.997904360294342,grad_norm: 0.9999990249782391, iteration: 221741
loss: 0.9793735146522522,grad_norm: 0.9298663534230653, iteration: 221742
loss: 1.0244983434677124,grad_norm: 0.7708200050273388, iteration: 221743
loss: 0.9991337060928345,grad_norm: 0.9806944755140621, iteration: 221744
loss: 0.9837292432785034,grad_norm: 0.9171733143209093, iteration: 221745
loss: 0.996305525302887,grad_norm: 0.9999990871907413, iteration: 221746
loss: 0.9687851071357727,grad_norm: 0.9707237453140258, iteration: 221747
loss: 1.030587911605835,grad_norm: 0.9999991024441355, iteration: 221748
loss: 0.9980311989784241,grad_norm: 0.8690353090893139, iteration: 221749
loss: 1.0236752033233643,grad_norm: 0.9999991909230753, iteration: 221750
loss: 0.9917864799499512,grad_norm: 0.9026840115939959, iteration: 221751
loss: 1.0051289796829224,grad_norm: 0.776851916962463, iteration: 221752
loss: 0.9594929218292236,grad_norm: 0.9999990880300567, iteration: 221753
loss: 1.0366203784942627,grad_norm: 0.9999991160420901, iteration: 221754
loss: 1.0653823614120483,grad_norm: 0.9979324452365016, iteration: 221755
loss: 1.1696175336837769,grad_norm: 0.9999996714435473, iteration: 221756
loss: 0.9717026352882385,grad_norm: 0.999999380290545, iteration: 221757
loss: 0.9909444451332092,grad_norm: 0.8684709644003108, iteration: 221758
loss: 0.9948192238807678,grad_norm: 0.8519529337286958, iteration: 221759
loss: 1.0247398614883423,grad_norm: 0.9999989434872723, iteration: 221760
loss: 1.0154825448989868,grad_norm: 0.954114707676855, iteration: 221761
loss: 0.9965545535087585,grad_norm: 0.9085616266342293, iteration: 221762
loss: 1.030746340751648,grad_norm: 0.9520762477793957, iteration: 221763
loss: 0.9674530029296875,grad_norm: 0.9365726065619988, iteration: 221764
loss: 0.9698200225830078,grad_norm: 0.9363148842141339, iteration: 221765
loss: 0.9757760763168335,grad_norm: 0.8180177933953501, iteration: 221766
loss: 1.0526371002197266,grad_norm: 0.9852194516172028, iteration: 221767
loss: 1.0143728256225586,grad_norm: 0.99999913666794, iteration: 221768
loss: 1.0247740745544434,grad_norm: 0.9358278472134546, iteration: 221769
loss: 1.0094119310379028,grad_norm: 0.9837449003367601, iteration: 221770
loss: 0.9961987137794495,grad_norm: 0.9588950138709964, iteration: 221771
loss: 1.0021284818649292,grad_norm: 0.9721927546387608, iteration: 221772
loss: 1.028181552886963,grad_norm: 0.8004164870847822, iteration: 221773
loss: 0.9931894540786743,grad_norm: 0.9999989351534694, iteration: 221774
loss: 1.0328525304794312,grad_norm: 0.9999992960662112, iteration: 221775
loss: 0.9943751096725464,grad_norm: 1.0000000302831948, iteration: 221776
loss: 1.0250109434127808,grad_norm: 0.8627318808964933, iteration: 221777
loss: 1.048525094985962,grad_norm: 0.9999990323138741, iteration: 221778
loss: 0.9739348292350769,grad_norm: 0.9999992244856318, iteration: 221779
loss: 1.0338201522827148,grad_norm: 0.9999989865421774, iteration: 221780
loss: 1.216741681098938,grad_norm: 0.9999995566264814, iteration: 221781
loss: 0.9740597605705261,grad_norm: 0.7634258936795599, iteration: 221782
loss: 0.978488564491272,grad_norm: 0.8746106891591569, iteration: 221783
loss: 1.0305503606796265,grad_norm: 0.904265034737261, iteration: 221784
loss: 1.0426769256591797,grad_norm: 0.8683958534665495, iteration: 221785
loss: 1.0139645338058472,grad_norm: 0.8091210189688782, iteration: 221786
loss: 0.9896013736724854,grad_norm: 0.8425731015567594, iteration: 221787
loss: 1.0187451839447021,grad_norm: 0.9999992050326326, iteration: 221788
loss: 0.9820959568023682,grad_norm: 0.9999991188508547, iteration: 221789
loss: 0.9748668074607849,grad_norm: 0.9679945474837471, iteration: 221790
loss: 0.979094922542572,grad_norm: 0.9999992524616726, iteration: 221791
loss: 0.9964677095413208,grad_norm: 0.9176523518358679, iteration: 221792
loss: 1.0259156227111816,grad_norm: 0.8637421199209738, iteration: 221793
loss: 1.0114022493362427,grad_norm: 0.9175070561089962, iteration: 221794
loss: 1.0050395727157593,grad_norm: 0.9999990706240104, iteration: 221795
loss: 0.9790764451026917,grad_norm: 0.9603832847933204, iteration: 221796
loss: 0.9614032506942749,grad_norm: 0.9999990139133088, iteration: 221797
loss: 0.9964660406112671,grad_norm: 0.8784876197337101, iteration: 221798
loss: 0.982218325138092,grad_norm: 0.8689670074084146, iteration: 221799
loss: 0.9699481725692749,grad_norm: 0.7929443902576535, iteration: 221800
loss: 1.0112953186035156,grad_norm: 0.8438727233421194, iteration: 221801
loss: 1.0201163291931152,grad_norm: 0.9145567457154375, iteration: 221802
loss: 1.0936206579208374,grad_norm: 0.9999990651945843, iteration: 221803
loss: 1.0289640426635742,grad_norm: 0.9999999345238682, iteration: 221804
loss: 1.0498311519622803,grad_norm: 0.9999992260411176, iteration: 221805
loss: 1.0272562503814697,grad_norm: 0.9368209795648559, iteration: 221806
loss: 0.9658672213554382,grad_norm: 0.9999989866765413, iteration: 221807
loss: 1.0037662982940674,grad_norm: 0.9195538279034894, iteration: 221808
loss: 0.9777621030807495,grad_norm: 0.9999989702918187, iteration: 221809
loss: 0.9614851474761963,grad_norm: 0.9945697266870615, iteration: 221810
loss: 0.996741533279419,grad_norm: 0.8067373891217235, iteration: 221811
loss: 1.0046253204345703,grad_norm: 0.925621257558163, iteration: 221812
loss: 0.9558987021446228,grad_norm: 0.8743404059537868, iteration: 221813
loss: 0.9901472926139832,grad_norm: 0.786775320690234, iteration: 221814
loss: 1.0023030042648315,grad_norm: 0.9885357513412051, iteration: 221815
loss: 1.0283474922180176,grad_norm: 0.8106552453269676, iteration: 221816
loss: 0.9825335144996643,grad_norm: 0.9999991537986505, iteration: 221817
loss: 1.0021530389785767,grad_norm: 0.8338383552593137, iteration: 221818
loss: 1.0918573141098022,grad_norm: 0.8213482179740104, iteration: 221819
loss: 0.9475259184837341,grad_norm: 0.9954757601179144, iteration: 221820
loss: 1.170214056968689,grad_norm: 0.9999993177337393, iteration: 221821
loss: 1.0653433799743652,grad_norm: 0.9999996801344303, iteration: 221822
loss: 1.0447074174880981,grad_norm: 0.9999997101627459, iteration: 221823
loss: 1.0008928775787354,grad_norm: 0.9244135874802724, iteration: 221824
loss: 1.0257028341293335,grad_norm: 0.8240113223086415, iteration: 221825
loss: 1.0334912538528442,grad_norm: 0.9804622869552552, iteration: 221826
loss: 1.0196053981781006,grad_norm: 0.8383869241594727, iteration: 221827
loss: 0.9745404124259949,grad_norm: 0.9731044529243595, iteration: 221828
loss: 0.9729670286178589,grad_norm: 0.8930679225043422, iteration: 221829
loss: 1.0602104663848877,grad_norm: 0.9999991403656876, iteration: 221830
loss: 1.0210529565811157,grad_norm: 0.9999990689900654, iteration: 221831
loss: 0.9852786064147949,grad_norm: 0.823855001783639, iteration: 221832
loss: 1.0181872844696045,grad_norm: 0.8459358593993492, iteration: 221833
loss: 0.9939113855361938,grad_norm: 0.9607261290370307, iteration: 221834
loss: 1.0717761516571045,grad_norm: 0.9999992388504355, iteration: 221835
loss: 1.0316652059555054,grad_norm: 0.9303397256477388, iteration: 221836
loss: 0.9859126806259155,grad_norm: 0.9999990721933705, iteration: 221837
loss: 0.9787096381187439,grad_norm: 0.9999991345733212, iteration: 221838
loss: 0.9687398672103882,grad_norm: 0.9999989834589952, iteration: 221839
loss: 1.0183453559875488,grad_norm: 0.9539890197412276, iteration: 221840
loss: 0.9893659353256226,grad_norm: 0.9999994165836844, iteration: 221841
loss: 0.9622568488121033,grad_norm: 0.8227776148279741, iteration: 221842
loss: 1.0022050142288208,grad_norm: 0.9999991991229108, iteration: 221843
loss: 0.9954485297203064,grad_norm: 0.8394844621652426, iteration: 221844
loss: 1.0005956888198853,grad_norm: 0.8132794783853428, iteration: 221845
loss: 1.015044093132019,grad_norm: 0.9999990142737368, iteration: 221846
loss: 1.0243130922317505,grad_norm: 0.8908072544195673, iteration: 221847
loss: 0.9312365651130676,grad_norm: 0.7976572528838226, iteration: 221848
loss: 1.0159039497375488,grad_norm: 0.7984402234796921, iteration: 221849
loss: 0.9811694025993347,grad_norm: 0.9999992347757314, iteration: 221850
loss: 1.0079470872879028,grad_norm: 0.9999990749080101, iteration: 221851
loss: 0.9913768172264099,grad_norm: 0.9627427177514212, iteration: 221852
loss: 0.9726861715316772,grad_norm: 0.8457204758256868, iteration: 221853
loss: 1.0071756839752197,grad_norm: 0.9999990560397747, iteration: 221854
loss: 1.0393480062484741,grad_norm: 0.9999993884920713, iteration: 221855
loss: 1.040795922279358,grad_norm: 0.9224701755340478, iteration: 221856
loss: 0.9618359208106995,grad_norm: 0.837691686884454, iteration: 221857
loss: 0.9727352261543274,grad_norm: 0.9264960049691547, iteration: 221858
loss: 0.9752046465873718,grad_norm: 0.9666247330806981, iteration: 221859
loss: 1.0060625076293945,grad_norm: 0.9006618238248065, iteration: 221860
loss: 1.0304280519485474,grad_norm: 0.9999997988693416, iteration: 221861
loss: 0.9777966737747192,grad_norm: 0.8241882015077188, iteration: 221862
loss: 1.0100393295288086,grad_norm: 0.9300827076800651, iteration: 221863
loss: 0.9927788376808167,grad_norm: 0.9103750143138879, iteration: 221864
loss: 1.027488350868225,grad_norm: 0.998748078244868, iteration: 221865
loss: 0.9969277381896973,grad_norm: 0.9026185826190192, iteration: 221866
loss: 0.997189462184906,grad_norm: 0.9999991833474823, iteration: 221867
loss: 0.9954138994216919,grad_norm: 0.9542307070351882, iteration: 221868
loss: 1.0141127109527588,grad_norm: 0.9999991026638156, iteration: 221869
loss: 1.0102893114089966,grad_norm: 0.9999999399647184, iteration: 221870
loss: 1.0141538381576538,grad_norm: 0.9999992530636682, iteration: 221871
loss: 1.0201010704040527,grad_norm: 0.9999990437045291, iteration: 221872
loss: 1.1375224590301514,grad_norm: 0.9999992308632334, iteration: 221873
loss: 1.051759123802185,grad_norm: 0.9999991811108822, iteration: 221874
loss: 0.9875099658966064,grad_norm: 0.9324202642870287, iteration: 221875
loss: 0.973870038986206,grad_norm: 0.977108241483312, iteration: 221876
loss: 1.018243670463562,grad_norm: 0.9999991717184494, iteration: 221877
loss: 0.987491250038147,grad_norm: 0.8811490432020492, iteration: 221878
loss: 1.0337570905685425,grad_norm: 0.8661302354979558, iteration: 221879
loss: 0.9900871515274048,grad_norm: 0.8686009671490947, iteration: 221880
loss: 0.9976052045822144,grad_norm: 0.9517608722888524, iteration: 221881
loss: 0.9964759945869446,grad_norm: 0.8214987539262538, iteration: 221882
loss: 1.0024223327636719,grad_norm: 0.9999990725192649, iteration: 221883
loss: 0.9931782484054565,grad_norm: 0.8249904860173936, iteration: 221884
loss: 1.0510164499282837,grad_norm: 0.95025650197151, iteration: 221885
loss: 0.9857333898544312,grad_norm: 0.9595357713667705, iteration: 221886
loss: 1.031297206878662,grad_norm: 0.9999993636692183, iteration: 221887
loss: 1.0179674625396729,grad_norm: 0.8170613250350341, iteration: 221888
loss: 0.9910304546356201,grad_norm: 0.9999989927957135, iteration: 221889
loss: 0.9743355512619019,grad_norm: 0.8115098496305737, iteration: 221890
loss: 1.0237934589385986,grad_norm: 0.7940613822365504, iteration: 221891
loss: 1.0206294059753418,grad_norm: 0.9999994093514383, iteration: 221892
loss: 0.9779406189918518,grad_norm: 0.860938683972877, iteration: 221893
loss: 1.0251798629760742,grad_norm: 0.9999993755903172, iteration: 221894
loss: 1.0142618417739868,grad_norm: 0.8233895142988291, iteration: 221895
loss: 1.0143358707427979,grad_norm: 0.9999991925401276, iteration: 221896
loss: 1.0234384536743164,grad_norm: 0.943864647864377, iteration: 221897
loss: 1.018667459487915,grad_norm: 0.9139164737702802, iteration: 221898
loss: 1.048702597618103,grad_norm: 0.9999992129722222, iteration: 221899
loss: 0.9770758748054504,grad_norm: 0.8137927749890145, iteration: 221900
loss: 0.9872965216636658,grad_norm: 0.999999167436, iteration: 221901
loss: 1.00503408908844,grad_norm: 0.9556615395624231, iteration: 221902
loss: 0.9754182696342468,grad_norm: 0.8955819430993408, iteration: 221903
loss: 1.0382747650146484,grad_norm: 0.9581344133112237, iteration: 221904
loss: 0.9668302536010742,grad_norm: 0.9419991230506279, iteration: 221905
loss: 1.0430316925048828,grad_norm: 0.981853417939255, iteration: 221906
loss: 0.9803087115287781,grad_norm: 0.7836408700847586, iteration: 221907
loss: 1.045653223991394,grad_norm: 0.885323951218567, iteration: 221908
loss: 0.9871103763580322,grad_norm: 0.8166616382439322, iteration: 221909
loss: 0.9778456091880798,grad_norm: 0.8781679943916473, iteration: 221910
loss: 0.9952138066291809,grad_norm: 0.9341270945226421, iteration: 221911
loss: 1.0569312572479248,grad_norm: 0.9999989322639192, iteration: 221912
loss: 0.9986492395401001,grad_norm: 0.7786421044161232, iteration: 221913
loss: 0.9848403930664062,grad_norm: 0.9526686202252478, iteration: 221914
loss: 0.9944488406181335,grad_norm: 0.8700808804121253, iteration: 221915
loss: 1.0857900381088257,grad_norm: 0.9999995301318337, iteration: 221916
loss: 0.9982433319091797,grad_norm: 0.8335777607466957, iteration: 221917
loss: 0.9994075894355774,grad_norm: 0.999999182657098, iteration: 221918
loss: 0.9881178140640259,grad_norm: 0.9200187448530908, iteration: 221919
loss: 0.9817827939987183,grad_norm: 0.8790344246334704, iteration: 221920
loss: 0.9910040497779846,grad_norm: 0.9999991660324627, iteration: 221921
loss: 1.0245496034622192,grad_norm: 0.7947760543265514, iteration: 221922
loss: 1.0315847396850586,grad_norm: 0.9999990535296802, iteration: 221923
loss: 1.0133715867996216,grad_norm: 0.9627527521198317, iteration: 221924
loss: 1.004734992980957,grad_norm: 0.9069007719918387, iteration: 221925
loss: 1.0175007581710815,grad_norm: 0.9999992417960654, iteration: 221926
loss: 0.9955583214759827,grad_norm: 0.9894545833982024, iteration: 221927
loss: 1.0838754177093506,grad_norm: 0.9999993197739603, iteration: 221928
loss: 1.0017085075378418,grad_norm: 0.9999990151130188, iteration: 221929
loss: 0.9759625792503357,grad_norm: 0.9285507467734794, iteration: 221930
loss: 1.006754755973816,grad_norm: 0.9999994804143129, iteration: 221931
loss: 0.9750460386276245,grad_norm: 0.7599102029613867, iteration: 221932
loss: 1.0195565223693848,grad_norm: 0.9999990109528977, iteration: 221933
loss: 1.0146846771240234,grad_norm: 0.8021350764053309, iteration: 221934
loss: 1.035270094871521,grad_norm: 0.8470426624085546, iteration: 221935
loss: 1.0324971675872803,grad_norm: 0.999999220483556, iteration: 221936
loss: 1.021382212638855,grad_norm: 0.8272669919650376, iteration: 221937
loss: 1.0066620111465454,grad_norm: 0.8109453746596196, iteration: 221938
loss: 0.9886008501052856,grad_norm: 0.8275436243508897, iteration: 221939
loss: 0.9508809447288513,grad_norm: 0.9999991502682619, iteration: 221940
loss: 0.9962174892425537,grad_norm: 0.9999991377869968, iteration: 221941
loss: 1.0022317171096802,grad_norm: 0.9999990083531045, iteration: 221942
loss: 0.9746939539909363,grad_norm: 0.8771096137487496, iteration: 221943
loss: 1.0023102760314941,grad_norm: 0.999999143696285, iteration: 221944
loss: 1.0342522859573364,grad_norm: 0.7297399702791776, iteration: 221945
loss: 1.0346875190734863,grad_norm: 0.9076504828176045, iteration: 221946
loss: 0.9867720007896423,grad_norm: 0.9861927304982764, iteration: 221947
loss: 0.9554556608200073,grad_norm: 0.9808858456708591, iteration: 221948
loss: 0.9877780079841614,grad_norm: 0.9999992361851117, iteration: 221949
loss: 0.9755347371101379,grad_norm: 0.8892446044658961, iteration: 221950
loss: 1.0443212985992432,grad_norm: 0.9999995170254617, iteration: 221951
loss: 0.9984026551246643,grad_norm: 0.7385969299831803, iteration: 221952
loss: 1.0232994556427002,grad_norm: 0.9225551749816139, iteration: 221953
loss: 1.0471763610839844,grad_norm: 0.9769318865463366, iteration: 221954
loss: 0.9909272789955139,grad_norm: 0.9999990783067151, iteration: 221955
loss: 0.9776884317398071,grad_norm: 0.9999990771325643, iteration: 221956
loss: 0.944318413734436,grad_norm: 0.95453435565859, iteration: 221957
loss: 0.9696518182754517,grad_norm: 0.9999991174122326, iteration: 221958
loss: 1.0195889472961426,grad_norm: 0.9999990576573119, iteration: 221959
loss: 0.9722173810005188,grad_norm: 0.9999993187880881, iteration: 221960
loss: 0.9963915944099426,grad_norm: 0.9868690617075465, iteration: 221961
loss: 1.0074257850646973,grad_norm: 0.9999989272042753, iteration: 221962
loss: 0.9930111169815063,grad_norm: 0.999998987777192, iteration: 221963
loss: 1.0117121934890747,grad_norm: 0.9327833806725754, iteration: 221964
loss: 0.9578492045402527,grad_norm: 0.9970824087041633, iteration: 221965
loss: 1.0087144374847412,grad_norm: 0.7519953758732588, iteration: 221966
loss: 0.9962381720542908,grad_norm: 0.9117232400618696, iteration: 221967
loss: 1.0108095407485962,grad_norm: 0.9999992744621118, iteration: 221968
loss: 0.9919725060462952,grad_norm: 0.9729710170025506, iteration: 221969
loss: 0.986383855342865,grad_norm: 0.9377561361255867, iteration: 221970
loss: 0.9900648593902588,grad_norm: 0.8671958696488554, iteration: 221971
loss: 1.0188024044036865,grad_norm: 0.9163366692622745, iteration: 221972
loss: 1.005882740020752,grad_norm: 0.99999913580228, iteration: 221973
loss: 0.9988425374031067,grad_norm: 0.882683776229948, iteration: 221974
loss: 1.1191191673278809,grad_norm: 0.999999299062994, iteration: 221975
loss: 0.9939187169075012,grad_norm: 0.8404967153549763, iteration: 221976
loss: 1.0238794088363647,grad_norm: 0.9601984971045693, iteration: 221977
loss: 0.9731107950210571,grad_norm: 0.9272973657236, iteration: 221978
loss: 1.0048191547393799,grad_norm: 0.8468494722962697, iteration: 221979
loss: 0.984617292881012,grad_norm: 0.9999992195527381, iteration: 221980
loss: 1.0230070352554321,grad_norm: 0.8523154270437602, iteration: 221981
loss: 0.9949072003364563,grad_norm: 0.9798201271508539, iteration: 221982
loss: 0.9906352162361145,grad_norm: 0.8663656852184134, iteration: 221983
loss: 1.03812837600708,grad_norm: 0.9999991204997025, iteration: 221984
loss: 1.076509952545166,grad_norm: 0.9999991577753109, iteration: 221985
loss: 1.0461418628692627,grad_norm: 0.9999995905130882, iteration: 221986
loss: 1.0312241315841675,grad_norm: 0.9999990411070447, iteration: 221987
loss: 0.9771202802658081,grad_norm: 0.9999991183028502, iteration: 221988
loss: 1.0307589769363403,grad_norm: 0.9999991683919331, iteration: 221989
loss: 0.9864280223846436,grad_norm: 0.9377640497970493, iteration: 221990
loss: 0.9711067080497742,grad_norm: 0.9133186626545547, iteration: 221991
loss: 1.002871036529541,grad_norm: 0.9227231039690404, iteration: 221992
loss: 0.9899237155914307,grad_norm: 0.8419444063270171, iteration: 221993
loss: 1.0122389793395996,grad_norm: 0.9999993985978026, iteration: 221994
loss: 0.9910585284233093,grad_norm: 0.8289725329482304, iteration: 221995
loss: 0.9931834936141968,grad_norm: 0.999999112838607, iteration: 221996
loss: 1.0232837200164795,grad_norm: 0.8068826536695802, iteration: 221997
loss: 1.0388429164886475,grad_norm: 0.8109091067613813, iteration: 221998
loss: 0.9715387225151062,grad_norm: 0.9518964226074939, iteration: 221999
loss: 1.1402840614318848,grad_norm: 0.9118112786152538, iteration: 222000
loss: 0.9759355187416077,grad_norm: 0.8085078961617501, iteration: 222001
loss: 0.9852708578109741,grad_norm: 0.999998995634913, iteration: 222002
loss: 1.0015430450439453,grad_norm: 0.9703984020177411, iteration: 222003
loss: 0.9622102379798889,grad_norm: 0.9230769017801962, iteration: 222004
loss: 1.0127936601638794,grad_norm: 0.9999991534214091, iteration: 222005
loss: 1.002359390258789,grad_norm: 0.8543412948675835, iteration: 222006
loss: 0.9935668110847473,grad_norm: 0.9999990797902762, iteration: 222007
loss: 1.048219919204712,grad_norm: 0.9999993474589145, iteration: 222008
loss: 1.018541932106018,grad_norm: 0.9576535555056025, iteration: 222009
loss: 0.9818854928016663,grad_norm: 0.9687744291089043, iteration: 222010
loss: 1.0260167121887207,grad_norm: 0.85610998158423, iteration: 222011
loss: 0.9686952829360962,grad_norm: 0.9999992961436472, iteration: 222012
loss: 0.99220210313797,grad_norm: 0.9999989756648378, iteration: 222013
loss: 0.9935414791107178,grad_norm: 0.9999991218885759, iteration: 222014
loss: 1.0073976516723633,grad_norm: 0.9999996140875068, iteration: 222015
loss: 1.0004661083221436,grad_norm: 0.9743824305328581, iteration: 222016
loss: 0.9909598231315613,grad_norm: 0.9996063481016048, iteration: 222017
loss: 1.0271040201187134,grad_norm: 0.9700822249018973, iteration: 222018
loss: 1.0354663133621216,grad_norm: 0.9882985309839091, iteration: 222019
loss: 1.0089080333709717,grad_norm: 0.8349408664492411, iteration: 222020
loss: 0.98964524269104,grad_norm: 0.9999989764417136, iteration: 222021
loss: 0.9980307221412659,grad_norm: 0.8592079343683746, iteration: 222022
loss: 0.9867556095123291,grad_norm: 0.8479868701533424, iteration: 222023
loss: 1.0309916734695435,grad_norm: 0.9890461523458514, iteration: 222024
loss: 0.9953355193138123,grad_norm: 0.7643235383551928, iteration: 222025
loss: 1.0083736181259155,grad_norm: 0.9749440860326072, iteration: 222026
loss: 1.0324313640594482,grad_norm: 0.8957603106125384, iteration: 222027
loss: 0.9809451699256897,grad_norm: 0.9368683296473499, iteration: 222028
loss: 0.985086977481842,grad_norm: 0.846565047467459, iteration: 222029
loss: 1.029415488243103,grad_norm: 0.9999998078686446, iteration: 222030
loss: 1.0266432762145996,grad_norm: 0.9944502664413518, iteration: 222031
loss: 0.9931873083114624,grad_norm: 0.9999990214931589, iteration: 222032
loss: 0.9859795570373535,grad_norm: 0.9027206489788909, iteration: 222033
loss: 0.9875649809837341,grad_norm: 0.8364601528125417, iteration: 222034
loss: 1.0280705690383911,grad_norm: 0.9982820622558752, iteration: 222035
loss: 0.9920545816421509,grad_norm: 0.8709870697155254, iteration: 222036
loss: 1.0150245428085327,grad_norm: 0.9999990488523747, iteration: 222037
loss: 1.0016307830810547,grad_norm: 0.9415299712673653, iteration: 222038
loss: 0.9813568592071533,grad_norm: 0.9189641421716868, iteration: 222039
loss: 1.03471839427948,grad_norm: 0.9999990536993422, iteration: 222040
loss: 0.9633110761642456,grad_norm: 0.9999990771212096, iteration: 222041
loss: 0.9989656805992126,grad_norm: 0.9196991643814314, iteration: 222042
loss: 1.0123766660690308,grad_norm: 0.973675438339303, iteration: 222043
loss: 0.9885359406471252,grad_norm: 0.9673505044406536, iteration: 222044
loss: 1.028863549232483,grad_norm: 0.9195702339541584, iteration: 222045
loss: 1.0528775453567505,grad_norm: 0.8587215763520705, iteration: 222046
loss: 1.0097603797912598,grad_norm: 0.9999990283135023, iteration: 222047
loss: 0.9860721230506897,grad_norm: 0.9665212471845521, iteration: 222048
loss: 0.955632746219635,grad_norm: 0.9999990489270187, iteration: 222049
loss: 0.9890971183776855,grad_norm: 0.9500252177114469, iteration: 222050
loss: 0.9963372349739075,grad_norm: 0.9999991356380863, iteration: 222051
loss: 1.042738437652588,grad_norm: 0.9999989290563137, iteration: 222052
loss: 0.9819608330726624,grad_norm: 0.7597737940555961, iteration: 222053
loss: 1.0118861198425293,grad_norm: 0.9883316918974742, iteration: 222054
loss: 1.0227338075637817,grad_norm: 0.87872625886507, iteration: 222055
loss: 1.012030005455017,grad_norm: 0.892526224522278, iteration: 222056
loss: 0.9846092462539673,grad_norm: 0.9217352563957766, iteration: 222057
loss: 1.0099236965179443,grad_norm: 0.9999994122722069, iteration: 222058
loss: 1.0144497156143188,grad_norm: 0.8256202227858981, iteration: 222059
loss: 0.9910398125648499,grad_norm: 0.9563365295201883, iteration: 222060
loss: 1.0081634521484375,grad_norm: 0.8991395141266021, iteration: 222061
loss: 1.002421498298645,grad_norm: 0.8526202606057077, iteration: 222062
loss: 0.9357138872146606,grad_norm: 0.8725068953559181, iteration: 222063
loss: 1.0057507753372192,grad_norm: 0.9999991144348197, iteration: 222064
loss: 0.9821260571479797,grad_norm: 0.9339364584676617, iteration: 222065
loss: 1.0444140434265137,grad_norm: 0.9999997578373618, iteration: 222066
loss: 0.9920107126235962,grad_norm: 0.9168434093858273, iteration: 222067
loss: 0.9903808236122131,grad_norm: 0.8704160101468837, iteration: 222068
loss: 1.0162127017974854,grad_norm: 0.9110628266539925, iteration: 222069
loss: 1.0101155042648315,grad_norm: 0.9427871456937899, iteration: 222070
loss: 1.0201623439788818,grad_norm: 0.9999989593566684, iteration: 222071
loss: 1.008001446723938,grad_norm: 0.8885134922944238, iteration: 222072
loss: 0.9748420119285583,grad_norm: 0.8181104594917381, iteration: 222073
loss: 0.9614918231964111,grad_norm: 0.9999990513740181, iteration: 222074
loss: 1.0333019495010376,grad_norm: 0.9267712009527406, iteration: 222075
loss: 0.9884940385818481,grad_norm: 0.8869457138318256, iteration: 222076
loss: 0.9968910813331604,grad_norm: 0.8890103467157896, iteration: 222077
loss: 0.9741339683532715,grad_norm: 0.9999990258520557, iteration: 222078
loss: 1.0036977529525757,grad_norm: 0.9076384860216831, iteration: 222079
loss: 1.0246319770812988,grad_norm: 0.999999093161777, iteration: 222080
loss: 1.1243953704833984,grad_norm: 0.9999994555239344, iteration: 222081
loss: 0.9994205832481384,grad_norm: 0.9622496377044084, iteration: 222082
loss: 1.0341323614120483,grad_norm: 0.9962714544732453, iteration: 222083
loss: 0.992506206035614,grad_norm: 0.9174856789950354, iteration: 222084
loss: 1.0317665338516235,grad_norm: 0.9114301864923801, iteration: 222085
loss: 0.9874275326728821,grad_norm: 0.9025022052545973, iteration: 222086
loss: 1.0167758464813232,grad_norm: 0.8690438972300328, iteration: 222087
loss: 1.0002970695495605,grad_norm: 0.9618424104686731, iteration: 222088
loss: 0.9981199502944946,grad_norm: 0.9274559687919829, iteration: 222089
loss: 1.0064456462860107,grad_norm: 0.775573211969125, iteration: 222090
loss: 1.0033754110336304,grad_norm: 0.9699342771752145, iteration: 222091
loss: 1.0002696514129639,grad_norm: 0.9999992487289973, iteration: 222092
loss: 0.9920073747634888,grad_norm: 0.9999992493387085, iteration: 222093
loss: 0.9876517057418823,grad_norm: 0.8979022514416148, iteration: 222094
loss: 1.0292809009552002,grad_norm: 0.9093868364363011, iteration: 222095
loss: 0.9938828945159912,grad_norm: 0.8265787124387326, iteration: 222096
loss: 0.9991859793663025,grad_norm: 0.9999990694730692, iteration: 222097
loss: 1.0324256420135498,grad_norm: 0.9423134614796937, iteration: 222098
loss: 1.0245661735534668,grad_norm: 0.9564570286428496, iteration: 222099
loss: 0.9870561361312866,grad_norm: 0.7805461653268663, iteration: 222100
loss: 1.0067511796951294,grad_norm: 0.893160428225371, iteration: 222101
loss: 1.0109795331954956,grad_norm: 0.9999994631732118, iteration: 222102
loss: 1.0216540098190308,grad_norm: 0.8156342827609672, iteration: 222103
loss: 0.9929808974266052,grad_norm: 0.8486500291247037, iteration: 222104
loss: 0.965994119644165,grad_norm: 0.9289362642197597, iteration: 222105
loss: 0.9937496781349182,grad_norm: 0.8083340602104682, iteration: 222106
loss: 1.0164275169372559,grad_norm: 0.9999990426342745, iteration: 222107
loss: 0.9960142374038696,grad_norm: 0.9067360228874725, iteration: 222108
loss: 0.9630539417266846,grad_norm: 0.928610401316756, iteration: 222109
loss: 1.00238037109375,grad_norm: 0.9134466305955199, iteration: 222110
loss: 0.9780246019363403,grad_norm: 0.8103519777031445, iteration: 222111
loss: 0.9985127449035645,grad_norm: 0.9487943322114271, iteration: 222112
loss: 1.028708815574646,grad_norm: 0.8598339647284068, iteration: 222113
loss: 1.0019022226333618,grad_norm: 0.8724522415695606, iteration: 222114
loss: 1.01768958568573,grad_norm: 0.876206279477947, iteration: 222115
loss: 1.0108919143676758,grad_norm: 0.9624771903068488, iteration: 222116
loss: 0.9984558820724487,grad_norm: 0.922163697667608, iteration: 222117
loss: 1.0023016929626465,grad_norm: 0.9816881296205388, iteration: 222118
loss: 0.9864639639854431,grad_norm: 0.8883093246381026, iteration: 222119
loss: 0.9829404354095459,grad_norm: 0.9999990579120013, iteration: 222120
loss: 0.9868266582489014,grad_norm: 0.9026461254683579, iteration: 222121
loss: 1.0270687341690063,grad_norm: 0.8108115271947287, iteration: 222122
loss: 1.0048023462295532,grad_norm: 0.8538319801890872, iteration: 222123
loss: 0.9905490279197693,grad_norm: 0.8964579730885577, iteration: 222124
loss: 0.9741090536117554,grad_norm: 0.9999991991606236, iteration: 222125
loss: 0.9901938438415527,grad_norm: 0.9999990661315932, iteration: 222126
loss: 0.9846583604812622,grad_norm: 0.8593193045717965, iteration: 222127
loss: 0.9838833808898926,grad_norm: 0.9999990612661926, iteration: 222128
loss: 0.9890810251235962,grad_norm: 0.8949622826668586, iteration: 222129
loss: 0.9921268820762634,grad_norm: 0.8784950643472427, iteration: 222130
loss: 1.0066392421722412,grad_norm: 0.9999992015231162, iteration: 222131
loss: 0.9934800267219543,grad_norm: 0.955880913707337, iteration: 222132
loss: 0.9622235298156738,grad_norm: 0.936851095469617, iteration: 222133
loss: 0.9811533093452454,grad_norm: 0.963653824742525, iteration: 222134
loss: 1.0156036615371704,grad_norm: 0.9459280497811471, iteration: 222135
loss: 1.0257506370544434,grad_norm: 0.8369241765294205, iteration: 222136
loss: 0.9827395677566528,grad_norm: 0.999999966579622, iteration: 222137
loss: 0.9916425347328186,grad_norm: 0.8007587385975757, iteration: 222138
loss: 1.0041115283966064,grad_norm: 0.8616133540622234, iteration: 222139
loss: 1.003135323524475,grad_norm: 0.9999990020569359, iteration: 222140
loss: 1.0101770162582397,grad_norm: 0.9999990050211413, iteration: 222141
loss: 0.9998410940170288,grad_norm: 0.9977890153673525, iteration: 222142
loss: 1.003275752067566,grad_norm: 0.9999990739346268, iteration: 222143
loss: 1.0151177644729614,grad_norm: 0.9999992085077624, iteration: 222144
loss: 1.018398642539978,grad_norm: 0.9630678207910491, iteration: 222145
loss: 1.0199952125549316,grad_norm: 0.9999992001706115, iteration: 222146
loss: 0.9799264669418335,grad_norm: 0.902715540769065, iteration: 222147
loss: 1.017863392829895,grad_norm: 0.7516243918106094, iteration: 222148
loss: 0.9458509683609009,grad_norm: 0.999999014459383, iteration: 222149
loss: 1.0298703908920288,grad_norm: 0.9999996329132044, iteration: 222150
loss: 1.0171263217926025,grad_norm: 0.9999990865179826, iteration: 222151
loss: 1.0048998594284058,grad_norm: 0.9999990890145363, iteration: 222152
loss: 1.022301435470581,grad_norm: 0.9339334791155419, iteration: 222153
loss: 1.0244795083999634,grad_norm: 0.8439598663512887, iteration: 222154
loss: 1.0184866189956665,grad_norm: 0.895815989029697, iteration: 222155
loss: 0.9823490977287292,grad_norm: 0.8997643814541277, iteration: 222156
loss: 1.003293752670288,grad_norm: 0.9999991231098369, iteration: 222157
loss: 0.976331889629364,grad_norm: 0.999999076092557, iteration: 222158
loss: 1.0075452327728271,grad_norm: 0.8650977036357648, iteration: 222159
loss: 0.9950123429298401,grad_norm: 0.9840611572724702, iteration: 222160
loss: 1.0170526504516602,grad_norm: 0.9239571373737534, iteration: 222161
loss: 1.0106045007705688,grad_norm: 0.8426253264005455, iteration: 222162
loss: 1.008054256439209,grad_norm: 0.8762789630358109, iteration: 222163
loss: 0.9733936190605164,grad_norm: 0.9466645450347269, iteration: 222164
loss: 1.0332449674606323,grad_norm: 0.9429154703588105, iteration: 222165
loss: 1.0195120573043823,grad_norm: 0.8493806409684037, iteration: 222166
loss: 1.0050387382507324,grad_norm: 0.7972361770390419, iteration: 222167
loss: 1.002994418144226,grad_norm: 0.9749596448091954, iteration: 222168
loss: 1.0359824895858765,grad_norm: 0.967248658197798, iteration: 222169
loss: 1.042374849319458,grad_norm: 0.9691336491369933, iteration: 222170
loss: 0.9506487250328064,grad_norm: 0.8881074368064233, iteration: 222171
loss: 1.0106186866760254,grad_norm: 0.9163040789843464, iteration: 222172
loss: 1.047523021697998,grad_norm: 0.99999935122783, iteration: 222173
loss: 1.0074816942214966,grad_norm: 0.9954862259828092, iteration: 222174
loss: 0.9991970062255859,grad_norm: 0.956919558698129, iteration: 222175
loss: 0.9649065136909485,grad_norm: 0.8946588297158329, iteration: 222176
loss: 1.009418249130249,grad_norm: 0.8758658477702435, iteration: 222177
loss: 0.985113799571991,grad_norm: 0.9999989836294141, iteration: 222178
loss: 0.9846269488334656,grad_norm: 0.9999990988056633, iteration: 222179
loss: 0.9907862544059753,grad_norm: 0.9999990556508667, iteration: 222180
loss: 1.0747637748718262,grad_norm: 0.9999996112991028, iteration: 222181
loss: 0.9679253697395325,grad_norm: 0.9241576985588374, iteration: 222182
loss: 1.0397647619247437,grad_norm: 0.9999991049957853, iteration: 222183
loss: 1.045898199081421,grad_norm: 0.7746012147207241, iteration: 222184
loss: 1.0076931715011597,grad_norm: 0.913219045856327, iteration: 222185
loss: 0.9915175437927246,grad_norm: 0.7945198398072708, iteration: 222186
loss: 0.9948701858520508,grad_norm: 0.934207874926733, iteration: 222187
loss: 1.010302186012268,grad_norm: 0.8981579447960382, iteration: 222188
loss: 1.031398057937622,grad_norm: 0.9999997332992923, iteration: 222189
loss: 1.0687638521194458,grad_norm: 0.9999997152597679, iteration: 222190
loss: 0.9789058566093445,grad_norm: 0.8599566036321981, iteration: 222191
loss: 1.0313544273376465,grad_norm: 0.8954295943833508, iteration: 222192
loss: 1.0003217458724976,grad_norm: 0.8329917705650252, iteration: 222193
loss: 1.0121135711669922,grad_norm: 0.866109714245584, iteration: 222194
loss: 1.0278921127319336,grad_norm: 0.9132573342810129, iteration: 222195
loss: 0.9907566905021667,grad_norm: 0.791954623399904, iteration: 222196
loss: 1.00959312915802,grad_norm: 0.8268225603375347, iteration: 222197
loss: 0.9895924925804138,grad_norm: 0.9341355516627426, iteration: 222198
loss: 0.9591748118400574,grad_norm: 0.9999991432494627, iteration: 222199
loss: 0.9993152022361755,grad_norm: 0.8917469743384508, iteration: 222200
loss: 1.0301984548568726,grad_norm: 0.8473513117247367, iteration: 222201
loss: 1.0160681009292603,grad_norm: 0.9504776291777848, iteration: 222202
loss: 1.0248264074325562,grad_norm: 0.9999990485100434, iteration: 222203
loss: 0.9806079864501953,grad_norm: 0.999999047986591, iteration: 222204
loss: 0.976190984249115,grad_norm: 0.9957732264553908, iteration: 222205
loss: 0.9893523454666138,grad_norm: 0.863648245379454, iteration: 222206
loss: 0.964740514755249,grad_norm: 0.9639559575222443, iteration: 222207
loss: 1.015523076057434,grad_norm: 0.8659497867654302, iteration: 222208
loss: 1.0143547058105469,grad_norm: 0.9695699046477068, iteration: 222209
loss: 1.0044001340866089,grad_norm: 0.9999989864566212, iteration: 222210
loss: 1.007465124130249,grad_norm: 0.7671534826319122, iteration: 222211
loss: 1.016210675239563,grad_norm: 0.787776605685802, iteration: 222212
loss: 1.0256768465042114,grad_norm: 0.8481242151268277, iteration: 222213
loss: 1.0203198194503784,grad_norm: 0.8522488044105236, iteration: 222214
loss: 1.0042028427124023,grad_norm: 0.9403019589252731, iteration: 222215
loss: 1.0306278467178345,grad_norm: 0.9701442803527311, iteration: 222216
loss: 0.9639483690261841,grad_norm: 0.9999992248734979, iteration: 222217
loss: 0.9879972338676453,grad_norm: 0.9464883938288733, iteration: 222218
loss: 0.9972969889640808,grad_norm: 0.8742851897141014, iteration: 222219
loss: 0.9968608617782593,grad_norm: 0.9144103282306262, iteration: 222220
loss: 0.9969167709350586,grad_norm: 0.9684605280852596, iteration: 222221
loss: 1.0304337739944458,grad_norm: 0.999998995711381, iteration: 222222
loss: 1.0015982389450073,grad_norm: 0.999998874311972, iteration: 222223
loss: 0.9717427492141724,grad_norm: 0.9071407727766904, iteration: 222224
loss: 1.0069798231124878,grad_norm: 0.9999994660599516, iteration: 222225
loss: 0.9531927704811096,grad_norm: 0.8173475904934825, iteration: 222226
loss: 0.9960538148880005,grad_norm: 0.9578796667381264, iteration: 222227
loss: 1.0047249794006348,grad_norm: 0.9999990983104595, iteration: 222228
loss: 1.0044000148773193,grad_norm: 0.9318582520181046, iteration: 222229
loss: 1.0470749139785767,grad_norm: 0.9999991509973942, iteration: 222230
loss: 1.007389783859253,grad_norm: 0.9999998580321691, iteration: 222231
loss: 0.9885550737380981,grad_norm: 0.9999990474292508, iteration: 222232
loss: 0.9866284728050232,grad_norm: 0.81457693291384, iteration: 222233
loss: 0.9604265689849854,grad_norm: 0.9999991077854642, iteration: 222234
loss: 0.9980826377868652,grad_norm: 0.7818142680354065, iteration: 222235
loss: 1.0222461223602295,grad_norm: 0.8346044425699084, iteration: 222236
loss: 1.0058979988098145,grad_norm: 0.864165533544864, iteration: 222237
loss: 0.9894234538078308,grad_norm: 0.9999990521412413, iteration: 222238
loss: 0.9936130046844482,grad_norm: 0.9519023042265784, iteration: 222239
loss: 0.9886003136634827,grad_norm: 0.873723987978017, iteration: 222240
loss: 1.015830159187317,grad_norm: 0.9999999014091726, iteration: 222241
loss: 0.9622327089309692,grad_norm: 0.9074456387094152, iteration: 222242
loss: 0.9992413520812988,grad_norm: 0.9999992733334235, iteration: 222243
loss: 1.0119496583938599,grad_norm: 0.9347488493633714, iteration: 222244
loss: 1.0072261095046997,grad_norm: 0.9999997019533119, iteration: 222245
loss: 0.9801414012908936,grad_norm: 0.9999991548889308, iteration: 222246
loss: 0.9832078814506531,grad_norm: 0.9363311068914099, iteration: 222247
loss: 1.0370173454284668,grad_norm: 0.9999996909486996, iteration: 222248
loss: 0.9859387278556824,grad_norm: 0.9633015930704639, iteration: 222249
loss: 0.9563269019126892,grad_norm: 0.9999990850831245, iteration: 222250
loss: 1.0033255815505981,grad_norm: 0.9999989805201448, iteration: 222251
loss: 1.002078652381897,grad_norm: 0.92760497596496, iteration: 222252
loss: 1.0086820125579834,grad_norm: 0.9856802773683823, iteration: 222253
loss: 1.008669137954712,grad_norm: 0.8455210873579088, iteration: 222254
loss: 0.9812009930610657,grad_norm: 0.9999989651545025, iteration: 222255
loss: 1.016878366470337,grad_norm: 0.9624640661465342, iteration: 222256
loss: 0.9874038100242615,grad_norm: 0.8231812514262145, iteration: 222257
loss: 0.9993339776992798,grad_norm: 0.8256590540645544, iteration: 222258
loss: 0.9631709456443787,grad_norm: 0.9999992463282443, iteration: 222259
loss: 0.9886782169342041,grad_norm: 0.9743606617368851, iteration: 222260
loss: 1.0133609771728516,grad_norm: 0.917980560983189, iteration: 222261
loss: 1.0438368320465088,grad_norm: 0.9999997245752037, iteration: 222262
loss: 1.0004795789718628,grad_norm: 0.9557569099553818, iteration: 222263
loss: 0.9991781115531921,grad_norm: 0.9658235996467719, iteration: 222264
loss: 1.0086565017700195,grad_norm: 0.9999990371223256, iteration: 222265
loss: 0.9744430184364319,grad_norm: 0.9999991288656663, iteration: 222266
loss: 0.994716227054596,grad_norm: 0.9556424245072549, iteration: 222267
loss: 1.0306971073150635,grad_norm: 0.9999996710992898, iteration: 222268
loss: 1.0091708898544312,grad_norm: 0.9337051274086491, iteration: 222269
loss: 1.019229769706726,grad_norm: 0.8488984004900774, iteration: 222270
loss: 0.9912169575691223,grad_norm: 0.9101257106056916, iteration: 222271
loss: 0.984659731388092,grad_norm: 0.9999996056269639, iteration: 222272
loss: 0.9891502261161804,grad_norm: 0.9999991407789439, iteration: 222273
loss: 1.0012707710266113,grad_norm: 0.9919368795540792, iteration: 222274
loss: 1.0308408737182617,grad_norm: 0.9716966897806572, iteration: 222275
loss: 0.9868409633636475,grad_norm: 0.9999993654896917, iteration: 222276
loss: 1.040876865386963,grad_norm: 0.8726589570479334, iteration: 222277
loss: 1.0057640075683594,grad_norm: 0.9378925315762298, iteration: 222278
loss: 0.9580039381980896,grad_norm: 0.8601848786766471, iteration: 222279
loss: 1.0072044134140015,grad_norm: 0.9592303024706198, iteration: 222280
loss: 1.014223575592041,grad_norm: 0.7573415013835975, iteration: 222281
loss: 1.0350033044815063,grad_norm: 0.9999991117072624, iteration: 222282
loss: 1.0000253915786743,grad_norm: 0.9999990953546098, iteration: 222283
loss: 0.9806581735610962,grad_norm: 0.8614755483990628, iteration: 222284
loss: 0.9686427116394043,grad_norm: 0.9999993522075654, iteration: 222285
loss: 0.9849408268928528,grad_norm: 0.9547666205501781, iteration: 222286
loss: 0.9671245813369751,grad_norm: 0.9999990200650734, iteration: 222287
loss: 0.9904567003250122,grad_norm: 0.9999991203736289, iteration: 222288
loss: 1.0365521907806396,grad_norm: 0.9999990968991862, iteration: 222289
loss: 1.009871006011963,grad_norm: 0.9999990087290012, iteration: 222290
loss: 1.0238951444625854,grad_norm: 0.9999991801151538, iteration: 222291
loss: 0.9945478439331055,grad_norm: 0.9999992432380506, iteration: 222292
loss: 1.0301260948181152,grad_norm: 0.9764678737350077, iteration: 222293
loss: 1.0341089963912964,grad_norm: 0.8398592932289181, iteration: 222294
loss: 1.0114837884902954,grad_norm: 0.9625284122503747, iteration: 222295
loss: 1.005847454071045,grad_norm: 0.9999992740147275, iteration: 222296
loss: 0.9957873821258545,grad_norm: 0.9999991704742637, iteration: 222297
loss: 1.0087907314300537,grad_norm: 0.9999993671850778, iteration: 222298
loss: 0.9885563254356384,grad_norm: 0.9619478547284529, iteration: 222299
loss: 1.0011646747589111,grad_norm: 0.9999990777490457, iteration: 222300
loss: 1.0285674333572388,grad_norm: 0.8865197394343532, iteration: 222301
loss: 0.9784449338912964,grad_norm: 0.9999992071067906, iteration: 222302
loss: 0.9729657173156738,grad_norm: 0.9580048061408899, iteration: 222303
loss: 1.0942620038986206,grad_norm: 0.8201410392401557, iteration: 222304
loss: 1.0129302740097046,grad_norm: 0.9242892194973159, iteration: 222305
loss: 1.0112088918685913,grad_norm: 0.9999995263433052, iteration: 222306
loss: 1.031217098236084,grad_norm: 0.8523570303381194, iteration: 222307
loss: 0.9988769292831421,grad_norm: 0.9036116719735771, iteration: 222308
loss: 1.0181164741516113,grad_norm: 0.9832457064104585, iteration: 222309
loss: 0.9867244362831116,grad_norm: 0.8943422265507317, iteration: 222310
loss: 1.0074104070663452,grad_norm: 0.8885819912430662, iteration: 222311
loss: 0.9744340777397156,grad_norm: 0.9388426886524189, iteration: 222312
loss: 1.0071661472320557,grad_norm: 0.9999991232344687, iteration: 222313
loss: 0.992443859577179,grad_norm: 0.8274656911149046, iteration: 222314
loss: 0.9984003305435181,grad_norm: 0.8796310416370366, iteration: 222315
loss: 0.9980342984199524,grad_norm: 0.9657932939234656, iteration: 222316
loss: 1.0830012559890747,grad_norm: 0.999999337026454, iteration: 222317
loss: 1.012724757194519,grad_norm: 0.9999990983618342, iteration: 222318
loss: 1.1420457363128662,grad_norm: 0.9999999100602309, iteration: 222319
loss: 1.0188120603561401,grad_norm: 0.8281701827262945, iteration: 222320
loss: 0.9911525845527649,grad_norm: 0.9362718805131762, iteration: 222321
loss: 0.9532747864723206,grad_norm: 0.8598797237399978, iteration: 222322
loss: 1.004551887512207,grad_norm: 0.8301886123895162, iteration: 222323
loss: 1.014320969581604,grad_norm: 0.8502768524490447, iteration: 222324
loss: 1.0504287481307983,grad_norm: 0.9999990513343487, iteration: 222325
loss: 1.0250248908996582,grad_norm: 0.8331462856451001, iteration: 222326
loss: 1.0033810138702393,grad_norm: 0.9999991951726802, iteration: 222327
loss: 0.9671100974082947,grad_norm: 0.8227890476940816, iteration: 222328
loss: 1.0321890115737915,grad_norm: 0.9999990467303839, iteration: 222329
loss: 0.999933660030365,grad_norm: 0.8949260137340636, iteration: 222330
loss: 1.031185269355774,grad_norm: 0.9999992121053763, iteration: 222331
loss: 1.0302842855453491,grad_norm: 0.9994120708053662, iteration: 222332
loss: 1.021175742149353,grad_norm: 0.9999990107511021, iteration: 222333
loss: 1.0241550207138062,grad_norm: 0.9999991070654394, iteration: 222334
loss: 0.9794101715087891,grad_norm: 0.9555758213310366, iteration: 222335
loss: 1.0333802700042725,grad_norm: 0.9005893377688133, iteration: 222336
loss: 0.9862073063850403,grad_norm: 0.9999992172617683, iteration: 222337
loss: 1.0160961151123047,grad_norm: 0.909117093000164, iteration: 222338
loss: 1.0086016654968262,grad_norm: 0.9902647084869, iteration: 222339
loss: 1.026120901107788,grad_norm: 0.8624544178044754, iteration: 222340
loss: 1.011077880859375,grad_norm: 0.85263330988306, iteration: 222341
loss: 0.9862233996391296,grad_norm: 0.9999990473232319, iteration: 222342
loss: 1.0087770223617554,grad_norm: 0.9999991521128152, iteration: 222343
loss: 1.0215342044830322,grad_norm: 0.8632753618419047, iteration: 222344
loss: 1.0185405015945435,grad_norm: 0.9006834932659513, iteration: 222345
loss: 1.0631102323532104,grad_norm: 0.99999900973948, iteration: 222346
loss: 0.9660528302192688,grad_norm: 0.9896759912164801, iteration: 222347
loss: 0.9409475326538086,grad_norm: 0.9999990100136082, iteration: 222348
loss: 1.0319045782089233,grad_norm: 0.8820221795696513, iteration: 222349
loss: 0.9996614456176758,grad_norm: 0.7918738184687085, iteration: 222350
loss: 1.0233712196350098,grad_norm: 0.9999991099291611, iteration: 222351
loss: 0.984880268573761,grad_norm: 0.9999990908609423, iteration: 222352
loss: 1.0058526992797852,grad_norm: 0.9452076492684113, iteration: 222353
loss: 0.9858294725418091,grad_norm: 0.9999991118458731, iteration: 222354
loss: 0.9762579202651978,grad_norm: 0.836581732780011, iteration: 222355
loss: 1.0002037286758423,grad_norm: 0.882960155478075, iteration: 222356
loss: 1.005752682685852,grad_norm: 0.9422366081430298, iteration: 222357
loss: 0.9714779257774353,grad_norm: 0.832088624534259, iteration: 222358
loss: 0.979893147945404,grad_norm: 0.9790100028371643, iteration: 222359
loss: 1.0133296251296997,grad_norm: 0.8998574130686812, iteration: 222360
loss: 1.0050909519195557,grad_norm: 0.9647980225862516, iteration: 222361
loss: 0.994271993637085,grad_norm: 0.9207173311307895, iteration: 222362
loss: 0.9931275844573975,grad_norm: 0.8467360759964604, iteration: 222363
loss: 0.9648948907852173,grad_norm: 0.9385685924828763, iteration: 222364
loss: 0.9948038458824158,grad_norm: 0.9990922608227334, iteration: 222365
loss: 1.0451569557189941,grad_norm: 0.9999991709288896, iteration: 222366
loss: 0.9689966440200806,grad_norm: 0.8484514663908512, iteration: 222367
loss: 0.98423832654953,grad_norm: 0.854957599483301, iteration: 222368
loss: 1.0120564699172974,grad_norm: 0.8069446707508748, iteration: 222369
loss: 1.0234880447387695,grad_norm: 0.9999994900055352, iteration: 222370
loss: 1.0017890930175781,grad_norm: 0.9999992910149889, iteration: 222371
loss: 0.9870613813400269,grad_norm: 0.8542550223855663, iteration: 222372
loss: 0.9933401346206665,grad_norm: 0.9999990419102344, iteration: 222373
loss: 0.9845182299613953,grad_norm: 0.8179521992727514, iteration: 222374
loss: 0.9812250733375549,grad_norm: 0.7528328696827613, iteration: 222375
loss: 1.0179321765899658,grad_norm: 0.9999994978173936, iteration: 222376
loss: 0.973590612411499,grad_norm: 0.9467905477156742, iteration: 222377
loss: 1.002151608467102,grad_norm: 0.809367761837261, iteration: 222378
loss: 1.0240095853805542,grad_norm: 0.8543949803291331, iteration: 222379
loss: 1.0068696737289429,grad_norm: 0.7871916175223594, iteration: 222380
loss: 0.987641453742981,grad_norm: 0.999999852719469, iteration: 222381
loss: 1.0135540962219238,grad_norm: 0.9999990662876534, iteration: 222382
loss: 0.9928529262542725,grad_norm: 0.9999990886266158, iteration: 222383
loss: 1.011083722114563,grad_norm: 0.839676575546572, iteration: 222384
loss: 1.000807523727417,grad_norm: 0.9073672101832404, iteration: 222385
loss: 1.0100237131118774,grad_norm: 0.9999991985591931, iteration: 222386
loss: 1.0126718282699585,grad_norm: 0.8077284189656391, iteration: 222387
loss: 1.011826515197754,grad_norm: 0.9809103778158967, iteration: 222388
loss: 1.006238341331482,grad_norm: 0.9459132503773324, iteration: 222389
loss: 0.9960129857063293,grad_norm: 0.79328714800303, iteration: 222390
loss: 0.9944964647293091,grad_norm: 0.8648543984460747, iteration: 222391
loss: 0.9575038552284241,grad_norm: 0.9999990863036236, iteration: 222392
loss: 0.9660584330558777,grad_norm: 0.9572058924325444, iteration: 222393
loss: 0.9862902164459229,grad_norm: 0.8631719229545216, iteration: 222394
loss: 0.9587559700012207,grad_norm: 0.8302562856514268, iteration: 222395
loss: 0.9791135787963867,grad_norm: 0.8883973304711936, iteration: 222396
loss: 0.988385796546936,grad_norm: 0.9999990985147548, iteration: 222397
loss: 1.014913558959961,grad_norm: 0.9036869840781769, iteration: 222398
loss: 1.003867506980896,grad_norm: 0.9440105481760472, iteration: 222399
loss: 0.9593217968940735,grad_norm: 0.882551159854072, iteration: 222400
loss: 0.9857450723648071,grad_norm: 0.9068728844104762, iteration: 222401
loss: 1.007426142692566,grad_norm: 0.9875380953754763, iteration: 222402
loss: 1.02829110622406,grad_norm: 0.8095977899919524, iteration: 222403
loss: 0.9883447885513306,grad_norm: 0.9999990133127138, iteration: 222404
loss: 1.0140860080718994,grad_norm: 0.9999990818274658, iteration: 222405
loss: 0.999367892742157,grad_norm: 0.8404657038617916, iteration: 222406
loss: 1.010807991027832,grad_norm: 0.8318786455220469, iteration: 222407
loss: 0.9865996837615967,grad_norm: 0.9999989979417816, iteration: 222408
loss: 0.9956198334693909,grad_norm: 0.8944155377349724, iteration: 222409
loss: 0.9653318524360657,grad_norm: 0.8930031261430186, iteration: 222410
loss: 0.9472107887268066,grad_norm: 0.9209010742402391, iteration: 222411
loss: 0.993309736251831,grad_norm: 0.973079047007094, iteration: 222412
loss: 1.0078725814819336,grad_norm: 0.9800604789511903, iteration: 222413
loss: 1.0109628438949585,grad_norm: 0.9785335464905167, iteration: 222414
loss: 0.9839118123054504,grad_norm: 0.9516351854435581, iteration: 222415
loss: 1.041100263595581,grad_norm: 0.9928049678933294, iteration: 222416
loss: 1.0302807092666626,grad_norm: 0.9999996889169679, iteration: 222417
loss: 0.9527316689491272,grad_norm: 0.9216637853709453, iteration: 222418
loss: 0.981108546257019,grad_norm: 0.9607867718964074, iteration: 222419
loss: 1.0293375253677368,grad_norm: 0.7777644785970919, iteration: 222420
loss: 1.0518501996994019,grad_norm: 0.9999995495505022, iteration: 222421
loss: 1.032150149345398,grad_norm: 0.7910351504761624, iteration: 222422
loss: 1.0086289644241333,grad_norm: 0.9651078888951897, iteration: 222423
loss: 0.99757981300354,grad_norm: 0.9277367401633189, iteration: 222424
loss: 1.0290179252624512,grad_norm: 0.9053036190326306, iteration: 222425
loss: 1.0406967401504517,grad_norm: 0.999999093589384, iteration: 222426
loss: 1.0425642728805542,grad_norm: 0.9999992662243044, iteration: 222427
loss: 1.0003987550735474,grad_norm: 0.9999990895105674, iteration: 222428
loss: 1.02692711353302,grad_norm: 0.9999990147945264, iteration: 222429
loss: 0.9736413359642029,grad_norm: 0.8142270076596708, iteration: 222430
loss: 0.9866048097610474,grad_norm: 0.9110627394205334, iteration: 222431
loss: 0.9993889331817627,grad_norm: 0.855206849004399, iteration: 222432
loss: 0.9980727434158325,grad_norm: 0.8711004103065865, iteration: 222433
loss: 0.9987879395484924,grad_norm: 0.8876370448672469, iteration: 222434
loss: 1.0070072412490845,grad_norm: 0.8313363354254578, iteration: 222435
loss: 0.9799649715423584,grad_norm: 0.9947622282307138, iteration: 222436
loss: 1.0234605073928833,grad_norm: 0.87334355657399, iteration: 222437
loss: 1.0122157335281372,grad_norm: 0.9999991316274801, iteration: 222438
loss: 1.0020489692687988,grad_norm: 0.8906536331438678, iteration: 222439
loss: 0.9928051233291626,grad_norm: 0.7861315932809575, iteration: 222440
loss: 1.0022852420806885,grad_norm: 0.8403598521337715, iteration: 222441
loss: 1.0316358804702759,grad_norm: 0.9324406554422081, iteration: 222442
loss: 1.0109597444534302,grad_norm: 0.9318917615459289, iteration: 222443
loss: 1.0274312496185303,grad_norm: 0.8801586799178042, iteration: 222444
loss: 1.0085811614990234,grad_norm: 0.8668332669364711, iteration: 222445
loss: 0.9298415184020996,grad_norm: 0.9999991102559163, iteration: 222446
loss: 0.9990416765213013,grad_norm: 0.9999995883183416, iteration: 222447
loss: 1.0053499937057495,grad_norm: 0.9999993999640376, iteration: 222448
loss: 0.9614907503128052,grad_norm: 0.9999990373239576, iteration: 222449
loss: 0.9790722727775574,grad_norm: 0.9670009377657885, iteration: 222450
loss: 0.9883891940116882,grad_norm: 0.8821453271237463, iteration: 222451
loss: 0.9788869023323059,grad_norm: 0.8292955756368144, iteration: 222452
loss: 0.9919614195823669,grad_norm: 0.8320815775346179, iteration: 222453
loss: 1.0047216415405273,grad_norm: 0.9967837562284593, iteration: 222454
loss: 0.998187780380249,grad_norm: 0.9029499571770377, iteration: 222455
loss: 1.0122994184494019,grad_norm: 0.9621522494648206, iteration: 222456
loss: 0.9968750476837158,grad_norm: 0.8207744967203835, iteration: 222457
loss: 0.975953221321106,grad_norm: 0.9183426659183929, iteration: 222458
loss: 1.0015760660171509,grad_norm: 0.8142144426318897, iteration: 222459
loss: 0.9971487522125244,grad_norm: 0.9999991376702276, iteration: 222460
loss: 1.0040796995162964,grad_norm: 0.9999991283364064, iteration: 222461
loss: 0.9853001236915588,grad_norm: 0.8198163445996509, iteration: 222462
loss: 1.0312418937683105,grad_norm: 0.9999991607747393, iteration: 222463
loss: 1.0334020853042603,grad_norm: 0.791167107857546, iteration: 222464
loss: 1.0041217803955078,grad_norm: 0.9469519147696588, iteration: 222465
loss: 1.0022482872009277,grad_norm: 0.8809125847801194, iteration: 222466
loss: 0.9577021598815918,grad_norm: 0.9999990742703911, iteration: 222467
loss: 1.0137591361999512,grad_norm: 0.9999990524598447, iteration: 222468
loss: 1.0010998249053955,grad_norm: 0.9999991347529382, iteration: 222469
loss: 0.9988424181938171,grad_norm: 0.9999990649878409, iteration: 222470
loss: 0.9912854433059692,grad_norm: 0.8261730417345736, iteration: 222471
loss: 1.019882321357727,grad_norm: 0.9999991811984174, iteration: 222472
loss: 1.01017427444458,grad_norm: 0.9999992260405611, iteration: 222473
loss: 0.9953055381774902,grad_norm: 0.999999074963817, iteration: 222474
loss: 0.9802954792976379,grad_norm: 0.8946942469262782, iteration: 222475
loss: 0.9974072575569153,grad_norm: 0.7695625949481516, iteration: 222476
loss: 1.0280524492263794,grad_norm: 0.9999989504133799, iteration: 222477
loss: 1.0081254243850708,grad_norm: 0.9164483803141836, iteration: 222478
loss: 0.9877297878265381,grad_norm: 0.9999992642060439, iteration: 222479
loss: 1.0017268657684326,grad_norm: 0.9404851380116355, iteration: 222480
loss: 1.0240172147750854,grad_norm: 0.9999991127254841, iteration: 222481
loss: 0.9941057562828064,grad_norm: 0.9999992533442903, iteration: 222482
loss: 0.9888846278190613,grad_norm: 0.9020690871194712, iteration: 222483
loss: 1.0261859893798828,grad_norm: 0.8189255863607716, iteration: 222484
loss: 1.0223450660705566,grad_norm: 0.9068513451074972, iteration: 222485
loss: 0.986077606678009,grad_norm: 0.8885810812071735, iteration: 222486
loss: 1.0309693813323975,grad_norm: 0.9905888335246482, iteration: 222487
loss: 0.9784963130950928,grad_norm: 0.9557667789097887, iteration: 222488
loss: 1.0142860412597656,grad_norm: 0.9078635949948308, iteration: 222489
loss: 1.017684817314148,grad_norm: 0.9813105706286072, iteration: 222490
loss: 1.0114753246307373,grad_norm: 0.9271376803832478, iteration: 222491
loss: 0.9908914566040039,grad_norm: 0.9628002333559953, iteration: 222492
loss: 0.9942119121551514,grad_norm: 0.8937784093106079, iteration: 222493
loss: 1.0275222063064575,grad_norm: 0.9625883915811193, iteration: 222494
loss: 0.9629020094871521,grad_norm: 0.8452978912927761, iteration: 222495
loss: 0.9998911023139954,grad_norm: 0.8661180571035673, iteration: 222496
loss: 0.9479928612709045,grad_norm: 0.999999176713894, iteration: 222497
loss: 0.9879602193832397,grad_norm: 0.8898310182072404, iteration: 222498
loss: 0.9757843017578125,grad_norm: 0.9999989633973583, iteration: 222499
loss: 0.9848206043243408,grad_norm: 0.9029613581680171, iteration: 222500
loss: 0.996971845626831,grad_norm: 0.8699570396751101, iteration: 222501
loss: 0.986542820930481,grad_norm: 0.9537203718268981, iteration: 222502
loss: 0.9772098660469055,grad_norm: 0.9447569046681564, iteration: 222503
loss: 0.9980975985527039,grad_norm: 0.9762351397828696, iteration: 222504
loss: 1.0069068670272827,grad_norm: 0.9415076794309939, iteration: 222505
loss: 1.011915683746338,grad_norm: 0.9999997389427199, iteration: 222506
loss: 0.967871367931366,grad_norm: 0.9811293481187968, iteration: 222507
loss: 0.973792552947998,grad_norm: 0.8321009573614716, iteration: 222508
loss: 0.9767972826957703,grad_norm: 0.8675768265711381, iteration: 222509
loss: 0.9808704257011414,grad_norm: 0.9999992158738332, iteration: 222510
loss: 0.9903334975242615,grad_norm: 0.9999991076056626, iteration: 222511
loss: 1.035589575767517,grad_norm: 0.8470830152126717, iteration: 222512
loss: 1.0171905755996704,grad_norm: 0.8386797065345108, iteration: 222513
loss: 1.000762939453125,grad_norm: 0.8519155105028383, iteration: 222514
loss: 0.9821403622627258,grad_norm: 0.7669610751882895, iteration: 222515
loss: 1.0101362466812134,grad_norm: 0.9193663146454011, iteration: 222516
loss: 1.0045703649520874,grad_norm: 0.9728184649512635, iteration: 222517
loss: 0.986974835395813,grad_norm: 0.7932551836196133, iteration: 222518
loss: 0.9567613005638123,grad_norm: 0.9868137528647801, iteration: 222519
loss: 1.0099588632583618,grad_norm: 0.9850388347050157, iteration: 222520
loss: 1.0029369592666626,grad_norm: 0.9168609665466266, iteration: 222521
loss: 1.0282974243164062,grad_norm: 0.7934867491587358, iteration: 222522
loss: 1.0407235622406006,grad_norm: 0.9999994600404445, iteration: 222523
loss: 1.0266258716583252,grad_norm: 0.9534150748988086, iteration: 222524
loss: 0.9932256937026978,grad_norm: 0.9895216864752733, iteration: 222525
loss: 0.9763410687446594,grad_norm: 0.8920104169316235, iteration: 222526
loss: 1.0267590284347534,grad_norm: 0.791363516879515, iteration: 222527
loss: 0.9883970618247986,grad_norm: 0.9999992780532123, iteration: 222528
loss: 1.0146955251693726,grad_norm: 0.995856705497615, iteration: 222529
loss: 0.9750019907951355,grad_norm: 0.8654202582911495, iteration: 222530
loss: 1.01081120967865,grad_norm: 0.9519801109870858, iteration: 222531
loss: 1.0146840810775757,grad_norm: 0.9999990061276047, iteration: 222532
loss: 0.9982037544250488,grad_norm: 0.7396925758716345, iteration: 222533
loss: 0.9896508455276489,grad_norm: 0.9179791276593496, iteration: 222534
loss: 1.009558081626892,grad_norm: 0.9999990207324365, iteration: 222535
loss: 0.9954496026039124,grad_norm: 0.8151038774073669, iteration: 222536
loss: 0.9915451407432556,grad_norm: 0.9999991681105489, iteration: 222537
loss: 0.9854637384414673,grad_norm: 0.9106193256657711, iteration: 222538
loss: 0.9921407699584961,grad_norm: 0.9810268679408848, iteration: 222539
loss: 0.9780718684196472,grad_norm: 0.999999204726775, iteration: 222540
loss: 0.9965466856956482,grad_norm: 0.8653311408901457, iteration: 222541
loss: 0.9996347427368164,grad_norm: 0.9999991472374339, iteration: 222542
loss: 0.9899513125419617,grad_norm: 0.8412375563250504, iteration: 222543
loss: 1.0445950031280518,grad_norm: 0.8923561970695758, iteration: 222544
loss: 0.9741024971008301,grad_norm: 0.830493638007172, iteration: 222545
loss: 1.0061402320861816,grad_norm: 0.9075433973396094, iteration: 222546
loss: 0.9822112321853638,grad_norm: 0.8909334407422155, iteration: 222547
loss: 1.022328495979309,grad_norm: 0.8498192370645667, iteration: 222548
loss: 0.9747312664985657,grad_norm: 0.9999990243956077, iteration: 222549
loss: 1.0206021070480347,grad_norm: 0.8301915087888012, iteration: 222550
loss: 0.9839380383491516,grad_norm: 0.8596187635095306, iteration: 222551
loss: 1.0158692598342896,grad_norm: 0.8436733645554615, iteration: 222552
loss: 1.0301419496536255,grad_norm: 0.999999068670043, iteration: 222553
loss: 0.9858275651931763,grad_norm: 0.9780758270864838, iteration: 222554
loss: 1.0288245677947998,grad_norm: 0.9999991294464375, iteration: 222555
loss: 1.017982840538025,grad_norm: 0.8261472231166889, iteration: 222556
loss: 0.997741162776947,grad_norm: 0.9817098179680851, iteration: 222557
loss: 0.9664623737335205,grad_norm: 0.7926485416103866, iteration: 222558
loss: 0.9697865843772888,grad_norm: 0.8151275247147362, iteration: 222559
loss: 1.002017617225647,grad_norm: 0.9999992083718245, iteration: 222560
loss: 1.0118261575698853,grad_norm: 0.9999991331903559, iteration: 222561
loss: 0.9906919002532959,grad_norm: 0.9999991953922969, iteration: 222562
loss: 1.0075205564498901,grad_norm: 0.9999988297610717, iteration: 222563
loss: 1.062864899635315,grad_norm: 0.999999843965178, iteration: 222564
loss: 1.0067977905273438,grad_norm: 0.9999992072889511, iteration: 222565
loss: 1.0008701086044312,grad_norm: 0.9999992408893823, iteration: 222566
loss: 1.0222474336624146,grad_norm: 0.8955406941724259, iteration: 222567
loss: 1.0173949003219604,grad_norm: 0.9999991978223601, iteration: 222568
loss: 0.9682226181030273,grad_norm: 0.9999989635755847, iteration: 222569
loss: 0.9968386888504028,grad_norm: 0.9999990959952977, iteration: 222570
loss: 0.9826452732086182,grad_norm: 0.9999992238450158, iteration: 222571
loss: 0.9845784902572632,grad_norm: 0.9999997564621836, iteration: 222572
loss: 1.0043010711669922,grad_norm: 0.9999989286858859, iteration: 222573
loss: 0.9988018274307251,grad_norm: 0.9585525570174669, iteration: 222574
loss: 0.9985278844833374,grad_norm: 0.7498323222338334, iteration: 222575
loss: 1.025627613067627,grad_norm: 0.9431307760677177, iteration: 222576
loss: 0.9779074192047119,grad_norm: 0.978393094084613, iteration: 222577
loss: 1.0238004922866821,grad_norm: 0.8452411678141051, iteration: 222578
loss: 1.0058238506317139,grad_norm: 0.999999108370731, iteration: 222579
loss: 1.0102298259735107,grad_norm: 0.9846358077695073, iteration: 222580
loss: 1.0037397146224976,grad_norm: 0.9043479830297151, iteration: 222581
loss: 1.0573744773864746,grad_norm: 0.9126144515088426, iteration: 222582
loss: 0.9485460519790649,grad_norm: 0.9557339397868847, iteration: 222583
loss: 0.9959948658943176,grad_norm: 0.9358668676056034, iteration: 222584
loss: 1.059652328491211,grad_norm: 0.9999991824396222, iteration: 222585
loss: 0.9878745675086975,grad_norm: 0.9276211834241868, iteration: 222586
loss: 1.0182067155838013,grad_norm: 0.9999991364558444, iteration: 222587
loss: 0.9770213961601257,grad_norm: 0.8878841212531231, iteration: 222588
loss: 0.9805958271026611,grad_norm: 0.8668563041963199, iteration: 222589
loss: 1.0297906398773193,grad_norm: 0.9705608365858988, iteration: 222590
loss: 1.0068349838256836,grad_norm: 0.9999991613672646, iteration: 222591
loss: 1.0339003801345825,grad_norm: 0.8983254788213677, iteration: 222592
loss: 0.9904855489730835,grad_norm: 0.9580868176534704, iteration: 222593
loss: 1.0317397117614746,grad_norm: 0.9999989958993316, iteration: 222594
loss: 1.004469871520996,grad_norm: 0.9001876084326618, iteration: 222595
loss: 0.9923962354660034,grad_norm: 0.9576311626691316, iteration: 222596
loss: 1.0156599283218384,grad_norm: 0.816397812004179, iteration: 222597
loss: 1.012556791305542,grad_norm: 0.9148140287079938, iteration: 222598
loss: 0.9742110967636108,grad_norm: 0.9999991472935764, iteration: 222599
loss: 0.9865509867668152,grad_norm: 0.9999989989073791, iteration: 222600
loss: 1.0099166631698608,grad_norm: 0.999998995112112, iteration: 222601
loss: 1.007582426071167,grad_norm: 0.9251557376031048, iteration: 222602
loss: 1.009229302406311,grad_norm: 0.9999990234450618, iteration: 222603
loss: 1.017135739326477,grad_norm: 0.9079717496475793, iteration: 222604
loss: 1.0027207136154175,grad_norm: 0.9999991217181412, iteration: 222605
loss: 0.9828373193740845,grad_norm: 0.8708417743814211, iteration: 222606
loss: 0.9912600517272949,grad_norm: 0.8552459184466165, iteration: 222607
loss: 1.0208091735839844,grad_norm: 0.9763682599011423, iteration: 222608
loss: 0.9630261063575745,grad_norm: 0.9999990944772412, iteration: 222609
loss: 1.0116420984268188,grad_norm: 0.9774303341307935, iteration: 222610
loss: 0.9399961829185486,grad_norm: 0.9999992223210291, iteration: 222611
loss: 0.9920459389686584,grad_norm: 0.8990907497943335, iteration: 222612
loss: 1.0150848627090454,grad_norm: 0.936721742402192, iteration: 222613
loss: 0.9747165441513062,grad_norm: 0.9698212158363304, iteration: 222614
loss: 1.007483959197998,grad_norm: 0.9999991560905682, iteration: 222615
loss: 1.0153677463531494,grad_norm: 0.8320098999692089, iteration: 222616
loss: 0.9915983080863953,grad_norm: 0.9999992288365923, iteration: 222617
loss: 1.005866289138794,grad_norm: 0.9472009637175481, iteration: 222618
loss: 0.9571797251701355,grad_norm: 0.8084111411367331, iteration: 222619
loss: 0.9919506311416626,grad_norm: 0.9989583898981176, iteration: 222620
loss: 0.9804946184158325,grad_norm: 0.9999990109579685, iteration: 222621
loss: 0.9753740429878235,grad_norm: 0.9383126072674739, iteration: 222622
loss: 0.9758919477462769,grad_norm: 0.9999998075961242, iteration: 222623
loss: 0.9949542880058289,grad_norm: 0.9428132242897532, iteration: 222624
loss: 0.9585391283035278,grad_norm: 0.9025733405822449, iteration: 222625
loss: 1.002137303352356,grad_norm: 0.7821508788506047, iteration: 222626
loss: 1.0163918733596802,grad_norm: 0.8477270188345478, iteration: 222627
loss: 1.0219467878341675,grad_norm: 0.9999992226729043, iteration: 222628
loss: 0.9932466745376587,grad_norm: 0.999999046816052, iteration: 222629
loss: 1.0229283571243286,grad_norm: 0.8875656425143149, iteration: 222630
loss: 1.0063568353652954,grad_norm: 0.7958151897301453, iteration: 222631
loss: 1.0055723190307617,grad_norm: 0.8531245578008744, iteration: 222632
loss: 0.9633625149726868,grad_norm: 0.8933776539970285, iteration: 222633
loss: 0.9752359986305237,grad_norm: 0.9999991307622484, iteration: 222634
loss: 1.0329346656799316,grad_norm: 0.8787429824692472, iteration: 222635
loss: 1.0225765705108643,grad_norm: 0.9625673157936818, iteration: 222636
loss: 0.9804113507270813,grad_norm: 0.9277450387383622, iteration: 222637
loss: 1.0002230405807495,grad_norm: 0.9999992973954798, iteration: 222638
loss: 1.0304707288742065,grad_norm: 0.8709617741259783, iteration: 222639
loss: 0.9957278370857239,grad_norm: 0.8049502835795743, iteration: 222640
loss: 1.0184721946716309,grad_norm: 0.7875825366003764, iteration: 222641
loss: 0.9941516518592834,grad_norm: 0.9699337690836203, iteration: 222642
loss: 1.011392593383789,grad_norm: 0.8926406008435687, iteration: 222643
loss: 0.9965264201164246,grad_norm: 0.9629642044962561, iteration: 222644
loss: 1.0042097568511963,grad_norm: 0.8385543123823627, iteration: 222645
loss: 1.0369831323623657,grad_norm: 0.8824327737694264, iteration: 222646
loss: 0.9553495049476624,grad_norm: 0.9963640295931169, iteration: 222647
loss: 0.9466175436973572,grad_norm: 0.9999989477295125, iteration: 222648
loss: 0.9716854095458984,grad_norm: 0.9999990385235895, iteration: 222649
loss: 0.9802114367485046,grad_norm: 0.8119857493670357, iteration: 222650
loss: 0.9863630533218384,grad_norm: 0.830888301890392, iteration: 222651
loss: 1.0322656631469727,grad_norm: 0.9832713876620991, iteration: 222652
loss: 0.9917519092559814,grad_norm: 0.9959160195225001, iteration: 222653
loss: 1.017236351966858,grad_norm: 0.9999994616981986, iteration: 222654
loss: 1.009537696838379,grad_norm: 0.9999995810815341, iteration: 222655
loss: 0.9979100823402405,grad_norm: 0.9462466783186329, iteration: 222656
loss: 0.9972517490386963,grad_norm: 0.9948958647693393, iteration: 222657
loss: 0.9928326606750488,grad_norm: 0.8533418523851065, iteration: 222658
loss: 0.9885340929031372,grad_norm: 0.9719450214984989, iteration: 222659
loss: 0.9921101331710815,grad_norm: 0.9878823525756063, iteration: 222660
loss: 1.0042144060134888,grad_norm: 0.8474501779146133, iteration: 222661
loss: 1.006165623664856,grad_norm: 0.9800238386194324, iteration: 222662
loss: 1.0047308206558228,grad_norm: 0.8144889036391297, iteration: 222663
loss: 0.9919770359992981,grad_norm: 0.8911646602939659, iteration: 222664
loss: 0.9852068424224854,grad_norm: 0.892542377580405, iteration: 222665
loss: 1.0067248344421387,grad_norm: 0.955435902026964, iteration: 222666
loss: 1.0317168235778809,grad_norm: 0.8395578003418203, iteration: 222667
loss: 1.0207502841949463,grad_norm: 0.8286626943920163, iteration: 222668
loss: 0.9692363142967224,grad_norm: 0.9558554769441331, iteration: 222669
loss: 0.9834123253822327,grad_norm: 0.9910216714139805, iteration: 222670
loss: 1.0064470767974854,grad_norm: 0.8281120245609805, iteration: 222671
loss: 0.9968018531799316,grad_norm: 0.9830530168869693, iteration: 222672
loss: 0.9956791400909424,grad_norm: 0.9999994903757292, iteration: 222673
loss: 1.026787281036377,grad_norm: 0.9999997366252273, iteration: 222674
loss: 0.9976747632026672,grad_norm: 0.8679036462097431, iteration: 222675
loss: 0.9574909210205078,grad_norm: 0.9145683707304781, iteration: 222676
loss: 0.9904861450195312,grad_norm: 0.8773432455783378, iteration: 222677
loss: 1.0036972761154175,grad_norm: 0.9999990989591963, iteration: 222678
loss: 0.9968180060386658,grad_norm: 0.8631744179598406, iteration: 222679
loss: 1.0125696659088135,grad_norm: 0.8596409544871517, iteration: 222680
loss: 1.049216628074646,grad_norm: 0.9999990951056712, iteration: 222681
loss: 0.9911017417907715,grad_norm: 0.8654656942015467, iteration: 222682
loss: 1.0335991382598877,grad_norm: 0.8599391810156038, iteration: 222683
loss: 1.0398719310760498,grad_norm: 0.9999993214892775, iteration: 222684
loss: 1.0147727727890015,grad_norm: 0.9248048549166494, iteration: 222685
loss: 1.019495964050293,grad_norm: 0.8747576425756463, iteration: 222686
loss: 0.9901298880577087,grad_norm: 0.8574111688132519, iteration: 222687
loss: 1.0210587978363037,grad_norm: 0.931127544302786, iteration: 222688
loss: 1.1618462800979614,grad_norm: 0.9999996933044788, iteration: 222689
loss: 0.9636213779449463,grad_norm: 0.9903073627155604, iteration: 222690
loss: 0.9936210513114929,grad_norm: 0.9844475685388617, iteration: 222691
loss: 0.9958087801933289,grad_norm: 0.7768570488911374, iteration: 222692
loss: 1.0073407888412476,grad_norm: 0.9390769230865963, iteration: 222693
loss: 1.0195220708847046,grad_norm: 0.9993764564022114, iteration: 222694
loss: 1.0136728286743164,grad_norm: 0.99999908825836, iteration: 222695
loss: 1.0292185544967651,grad_norm: 0.9999996554769902, iteration: 222696
loss: 1.0081576108932495,grad_norm: 0.9496931340076562, iteration: 222697
loss: 1.0680242776870728,grad_norm: 0.9999997975119453, iteration: 222698
loss: 0.9762771129608154,grad_norm: 0.9926120380856981, iteration: 222699
loss: 1.0156011581420898,grad_norm: 0.8793801044931896, iteration: 222700
loss: 1.0459237098693848,grad_norm: 0.9999995034659497, iteration: 222701
loss: 0.9868799448013306,grad_norm: 0.7482702485470505, iteration: 222702
loss: 0.9979249238967896,grad_norm: 0.9999992934327172, iteration: 222703
loss: 0.9888430833816528,grad_norm: 0.9999990591262335, iteration: 222704
loss: 0.9644950032234192,grad_norm: 0.9372442330145507, iteration: 222705
loss: 1.0051026344299316,grad_norm: 0.9011054488639326, iteration: 222706
loss: 1.0794646739959717,grad_norm: 0.9072298473936146, iteration: 222707
loss: 1.0141271352767944,grad_norm: 0.9469550549829535, iteration: 222708
loss: 1.0056884288787842,grad_norm: 0.9125374331961177, iteration: 222709
loss: 1.0095441341400146,grad_norm: 0.9933771835803042, iteration: 222710
loss: 0.9817492961883545,grad_norm: 0.9849995478003442, iteration: 222711
loss: 1.1648893356323242,grad_norm: 0.9999993658434296, iteration: 222712
loss: 1.1187559366226196,grad_norm: 0.8707711535835517, iteration: 222713
loss: 1.0800150632858276,grad_norm: 0.9999996868514138, iteration: 222714
loss: 0.9865243434906006,grad_norm: 0.999999520919926, iteration: 222715
loss: 0.9904012680053711,grad_norm: 0.7953163460724441, iteration: 222716
loss: 1.0950390100479126,grad_norm: 0.9999998347928725, iteration: 222717
loss: 1.019777774810791,grad_norm: 0.9999999466720935, iteration: 222718
loss: 1.0929696559906006,grad_norm: 0.8482895900354077, iteration: 222719
loss: 1.0546433925628662,grad_norm: 0.9999999710034081, iteration: 222720
loss: 1.0964545011520386,grad_norm: 0.9999999778993759, iteration: 222721
loss: 1.0167436599731445,grad_norm: 0.9999990593958776, iteration: 222722
loss: 0.9822021722793579,grad_norm: 0.7554917532159864, iteration: 222723
loss: 0.995076596736908,grad_norm: 0.9486857896858066, iteration: 222724
loss: 1.0051894187927246,grad_norm: 0.7856750609795756, iteration: 222725
loss: 0.968346118927002,grad_norm: 0.8417896814074448, iteration: 222726
loss: 0.9938039183616638,grad_norm: 0.96537676047106, iteration: 222727
loss: 1.0373998880386353,grad_norm: 0.9999992497102553, iteration: 222728
loss: 0.9724036455154419,grad_norm: 0.8099664712024967, iteration: 222729
loss: 0.9584743976593018,grad_norm: 0.8265628143392156, iteration: 222730
loss: 1.0402469635009766,grad_norm: 0.9784622004991991, iteration: 222731
loss: 1.0108137130737305,grad_norm: 0.7773083723934469, iteration: 222732
loss: 0.9905644655227661,grad_norm: 0.8552427256937147, iteration: 222733
loss: 0.9735741019248962,grad_norm: 0.8677519026335424, iteration: 222734
loss: 1.0034737586975098,grad_norm: 0.9316371176674217, iteration: 222735
loss: 1.0350383520126343,grad_norm: 0.9324026045728361, iteration: 222736
loss: 0.9914149641990662,grad_norm: 0.8617153507884208, iteration: 222737
loss: 0.949781060218811,grad_norm: 0.9191506753965833, iteration: 222738
loss: 1.0175669193267822,grad_norm: 0.8795333391752935, iteration: 222739
loss: 0.9715315699577332,grad_norm: 0.7914484596639344, iteration: 222740
loss: 0.9781512022018433,grad_norm: 0.8312326913717532, iteration: 222741
loss: 0.9716518521308899,grad_norm: 0.9999991117800341, iteration: 222742
loss: 0.9967091083526611,grad_norm: 0.901423605105671, iteration: 222743
loss: 1.0101855993270874,grad_norm: 0.9999992269746592, iteration: 222744
loss: 0.9604332447052002,grad_norm: 0.9999992398642049, iteration: 222745
loss: 1.005030870437622,grad_norm: 0.9732192562445658, iteration: 222746
loss: 0.9817788004875183,grad_norm: 0.9999990679378289, iteration: 222747
loss: 0.9689394235610962,grad_norm: 0.9869732571176061, iteration: 222748
loss: 1.0160272121429443,grad_norm: 0.9999992930436044, iteration: 222749
loss: 0.9661617875099182,grad_norm: 0.9761291963819322, iteration: 222750
loss: 0.9850888848304749,grad_norm: 0.8678549155569474, iteration: 222751
loss: 1.0133076906204224,grad_norm: 0.9232987064883411, iteration: 222752
loss: 1.1761586666107178,grad_norm: 0.9999992498871868, iteration: 222753
loss: 1.0241345167160034,grad_norm: 0.8792941265234724, iteration: 222754
loss: 1.010493516921997,grad_norm: 0.7592609066131376, iteration: 222755
loss: 1.0262656211853027,grad_norm: 0.999999417829486, iteration: 222756
loss: 1.0000611543655396,grad_norm: 0.9275273710050664, iteration: 222757
loss: 1.01365065574646,grad_norm: 0.8757718509018957, iteration: 222758
loss: 0.9555730819702148,grad_norm: 0.8640820768999651, iteration: 222759
loss: 1.0263688564300537,grad_norm: 0.9347495099010671, iteration: 222760
loss: 0.9860226511955261,grad_norm: 0.963113466960617, iteration: 222761
loss: 1.0155352354049683,grad_norm: 0.9999990405914819, iteration: 222762
loss: 0.9886684417724609,grad_norm: 0.9999992101829012, iteration: 222763
loss: 1.0113447904586792,grad_norm: 0.9822496315418748, iteration: 222764
loss: 0.9940452575683594,grad_norm: 0.9172861292276204, iteration: 222765
loss: 0.9930733442306519,grad_norm: 0.9834620363632463, iteration: 222766
loss: 1.0013521909713745,grad_norm: 0.8105604491917809, iteration: 222767
loss: 1.0272173881530762,grad_norm: 0.9999992361235952, iteration: 222768
loss: 0.9882230758666992,grad_norm: 0.9999991810289858, iteration: 222769
loss: 0.9744995832443237,grad_norm: 0.8448486208528923, iteration: 222770
loss: 0.9800240993499756,grad_norm: 0.9999991339446316, iteration: 222771
loss: 0.9703750610351562,grad_norm: 0.8819562041410778, iteration: 222772
loss: 1.0077530145645142,grad_norm: 0.9620768263926521, iteration: 222773
loss: 0.962212324142456,grad_norm: 0.9658384751242733, iteration: 222774
loss: 1.0016131401062012,grad_norm: 0.8036117905825892, iteration: 222775
loss: 1.0130072832107544,grad_norm: 0.9999994040784862, iteration: 222776
loss: 1.0007768869400024,grad_norm: 0.9999992347823496, iteration: 222777
loss: 0.9829210638999939,grad_norm: 0.9999990858806841, iteration: 222778
loss: 0.9516009092330933,grad_norm: 0.9999990245737563, iteration: 222779
loss: 0.9568198323249817,grad_norm: 0.9999990794686577, iteration: 222780
loss: 0.9951866269111633,grad_norm: 1.0000000149284753, iteration: 222781
loss: 0.9862789511680603,grad_norm: 0.9999991114401047, iteration: 222782
loss: 1.0110235214233398,grad_norm: 0.9999990051501482, iteration: 222783
loss: 1.0149418115615845,grad_norm: 0.966226410761549, iteration: 222784
loss: 1.017917513847351,grad_norm: 0.9730701255189615, iteration: 222785
loss: 0.9798935651779175,grad_norm: 0.9650285977974535, iteration: 222786
loss: 1.0201752185821533,grad_norm: 0.9302440469040728, iteration: 222787
loss: 1.0013015270233154,grad_norm: 0.9005422148958577, iteration: 222788
loss: 1.0170291662216187,grad_norm: 0.9713644510465329, iteration: 222789
loss: 1.0042095184326172,grad_norm: 0.9379802092873968, iteration: 222790
loss: 1.0139483213424683,grad_norm: 0.9999991764732001, iteration: 222791
loss: 0.9976881146430969,grad_norm: 0.8482453053911857, iteration: 222792
loss: 1.0093679428100586,grad_norm: 0.9932576850544028, iteration: 222793
loss: 1.2318181991577148,grad_norm: 0.9999993032648783, iteration: 222794
loss: 1.035733699798584,grad_norm: 0.9416550803535839, iteration: 222795
loss: 1.025559425354004,grad_norm: 0.9766281049515079, iteration: 222796
loss: 0.9908900856971741,grad_norm: 0.9999990272348164, iteration: 222797
loss: 1.0111668109893799,grad_norm: 0.9999991051537892, iteration: 222798
loss: 1.008321762084961,grad_norm: 0.9999999207589836, iteration: 222799
loss: 0.9677788019180298,grad_norm: 0.8442661957041372, iteration: 222800
loss: 1.034486174583435,grad_norm: 0.8568437864869061, iteration: 222801
loss: 1.0083153247833252,grad_norm: 0.8689536102290767, iteration: 222802
loss: 1.0091732740402222,grad_norm: 0.8823915465057126, iteration: 222803
loss: 0.9837570786476135,grad_norm: 0.7606260218530561, iteration: 222804
loss: 1.015779972076416,grad_norm: 0.8190879185186487, iteration: 222805
loss: 0.9848259687423706,grad_norm: 0.8952235370704692, iteration: 222806
loss: 1.025166392326355,grad_norm: 0.9999991858571948, iteration: 222807
loss: 1.0270004272460938,grad_norm: 0.9560257662750464, iteration: 222808
loss: 0.9721868634223938,grad_norm: 0.999999199203799, iteration: 222809
loss: 0.9937257766723633,grad_norm: 0.9350305541093497, iteration: 222810
loss: 1.0214130878448486,grad_norm: 0.9999994725920917, iteration: 222811
loss: 0.9921782612800598,grad_norm: 0.9572247895704853, iteration: 222812
loss: 1.0727167129516602,grad_norm: 1.0000000975589598, iteration: 222813
loss: 1.063923716545105,grad_norm: 0.951606550719213, iteration: 222814
loss: 1.0006481409072876,grad_norm: 0.9999995026141408, iteration: 222815
loss: 1.0022135972976685,grad_norm: 0.8799733002785238, iteration: 222816
loss: 0.9950385093688965,grad_norm: 0.9161185936300958, iteration: 222817
loss: 1.0074986219406128,grad_norm: 0.999999198110241, iteration: 222818
loss: 0.9982308149337769,grad_norm: 0.9999991383791473, iteration: 222819
loss: 1.0346882343292236,grad_norm: 0.8635212484770927, iteration: 222820
loss: 1.0114370584487915,grad_norm: 0.9364466004546906, iteration: 222821
loss: 1.0561556816101074,grad_norm: 0.9999991064124186, iteration: 222822
loss: 0.9465935230255127,grad_norm: 0.9473043000687089, iteration: 222823
loss: 1.0035171508789062,grad_norm: 0.999999179375715, iteration: 222824
loss: 1.0447317361831665,grad_norm: 0.9486716813874185, iteration: 222825
loss: 0.9673430919647217,grad_norm: 0.9253671787652555, iteration: 222826
loss: 1.0128780603408813,grad_norm: 0.9083377537424395, iteration: 222827
loss: 0.9974741339683533,grad_norm: 0.9315253885093662, iteration: 222828
loss: 0.9744220972061157,grad_norm: 0.8098769922749879, iteration: 222829
loss: 1.0089588165283203,grad_norm: 0.788874382517592, iteration: 222830
loss: 1.011804461479187,grad_norm: 0.922485566974473, iteration: 222831
loss: 0.974573016166687,grad_norm: 0.9999993596914637, iteration: 222832
loss: 1.0125923156738281,grad_norm: 0.9137792494647927, iteration: 222833
loss: 1.0388820171356201,grad_norm: 0.9999990416170982, iteration: 222834
loss: 0.9897769093513489,grad_norm: 0.9999990750305324, iteration: 222835
loss: 1.0119110345840454,grad_norm: 0.946845177350775, iteration: 222836
loss: 1.0013363361358643,grad_norm: 0.8364125750266468, iteration: 222837
loss: 1.0101560354232788,grad_norm: 0.9126316674288121, iteration: 222838
loss: 1.0206981897354126,grad_norm: 0.9117065835057776, iteration: 222839
loss: 1.0081353187561035,grad_norm: 0.9999990399737166, iteration: 222840
loss: 0.9562202095985413,grad_norm: 0.9999991496693614, iteration: 222841
loss: 0.9763827323913574,grad_norm: 0.9999993032612511, iteration: 222842
loss: 0.9928792119026184,grad_norm: 0.8985618060574441, iteration: 222843
loss: 1.0046213865280151,grad_norm: 0.9999991419910208, iteration: 222844
loss: 1.0021710395812988,grad_norm: 0.933456773355786, iteration: 222845
loss: 0.9678491950035095,grad_norm: 0.885463478096703, iteration: 222846
loss: 0.9765393137931824,grad_norm: 0.9999990325972998, iteration: 222847
loss: 1.0102593898773193,grad_norm: 0.9800058048868554, iteration: 222848
loss: 1.0194939374923706,grad_norm: 0.8934806373608895, iteration: 222849
loss: 0.9912261366844177,grad_norm: 0.7662335209728078, iteration: 222850
loss: 1.0166980028152466,grad_norm: 0.999999087464919, iteration: 222851
loss: 1.0037460327148438,grad_norm: 0.8439627889745411, iteration: 222852
loss: 1.006897211074829,grad_norm: 0.9738187646245571, iteration: 222853
loss: 0.9871797561645508,grad_norm: 0.9999990832671249, iteration: 222854
loss: 1.026456356048584,grad_norm: 0.9961497427070638, iteration: 222855
loss: 0.9847760200500488,grad_norm: 0.999999141597383, iteration: 222856
loss: 1.0006320476531982,grad_norm: 0.7938099604492174, iteration: 222857
loss: 1.032286524772644,grad_norm: 0.9999999435328195, iteration: 222858
loss: 1.011376976966858,grad_norm: 0.9813413794653888, iteration: 222859
loss: 0.9744516015052795,grad_norm: 0.9999995188459759, iteration: 222860
loss: 0.9954236745834351,grad_norm: 0.999999339389928, iteration: 222861
loss: 1.0005179643630981,grad_norm: 0.9342386280462441, iteration: 222862
loss: 0.9584402441978455,grad_norm: 0.9947239583308661, iteration: 222863
loss: 1.0137734413146973,grad_norm: 0.766879663768186, iteration: 222864
loss: 0.9753713011741638,grad_norm: 0.8537580467201832, iteration: 222865
loss: 0.9611210823059082,grad_norm: 0.8199411230278125, iteration: 222866
loss: 1.0093251466751099,grad_norm: 0.7971587028297422, iteration: 222867
loss: 1.0034844875335693,grad_norm: 0.9999990425245526, iteration: 222868
loss: 0.9761953949928284,grad_norm: 0.8635136553683861, iteration: 222869
loss: 0.9591336250305176,grad_norm: 0.9200896950451735, iteration: 222870
loss: 1.0307247638702393,grad_norm: 0.9900731031177623, iteration: 222871
loss: 1.026914119720459,grad_norm: 0.8829552779484383, iteration: 222872
loss: 1.014169454574585,grad_norm: 0.9151025590917711, iteration: 222873
loss: 1.005070447921753,grad_norm: 0.999999071250282, iteration: 222874
loss: 0.9900071024894714,grad_norm: 0.9999995586478999, iteration: 222875
loss: 1.0116639137268066,grad_norm: 0.9800935515671925, iteration: 222876
loss: 1.008903980255127,grad_norm: 0.8873868337419126, iteration: 222877
loss: 1.0146558284759521,grad_norm: 0.9750191254463417, iteration: 222878
loss: 1.0196199417114258,grad_norm: 0.9999991762487908, iteration: 222879
loss: 1.0133720636367798,grad_norm: 0.9999991867267279, iteration: 222880
loss: 1.0141687393188477,grad_norm: 0.9999992094733748, iteration: 222881
loss: 1.004970669746399,grad_norm: 0.9999991589052278, iteration: 222882
loss: 1.0005035400390625,grad_norm: 0.9384277607436884, iteration: 222883
loss: 0.9748960733413696,grad_norm: 0.9999990955247329, iteration: 222884
loss: 0.9981099367141724,grad_norm: 0.8552903941673318, iteration: 222885
loss: 0.9793172478675842,grad_norm: 0.9999990312915219, iteration: 222886
loss: 0.9610338807106018,grad_norm: 0.8763372713732716, iteration: 222887
loss: 1.024167776107788,grad_norm: 0.8215986267301664, iteration: 222888
loss: 0.935456395149231,grad_norm: 0.8441275233270268, iteration: 222889
loss: 0.9859740734100342,grad_norm: 0.9885253865416411, iteration: 222890
loss: 1.0308862924575806,grad_norm: 0.9999990336012436, iteration: 222891
loss: 1.172116994857788,grad_norm: 0.9999997480956506, iteration: 222892
loss: 0.9944048523902893,grad_norm: 0.8464107402719763, iteration: 222893
loss: 0.9983075261116028,grad_norm: 0.8078355490291044, iteration: 222894
loss: 0.9705660343170166,grad_norm: 0.8119101846546434, iteration: 222895
loss: 0.97992342710495,grad_norm: 0.9999992053299747, iteration: 222896
loss: 1.0009649991989136,grad_norm: 0.9703484428000747, iteration: 222897
loss: 1.0263596773147583,grad_norm: 0.783366637195668, iteration: 222898
loss: 1.0110429525375366,grad_norm: 0.8422772627884576, iteration: 222899
loss: 1.0144938230514526,grad_norm: 0.9547503734819114, iteration: 222900
loss: 1.004820466041565,grad_norm: 0.9999992780450059, iteration: 222901
loss: 1.0133488178253174,grad_norm: 0.9084027160256289, iteration: 222902
loss: 0.98995041847229,grad_norm: 0.9999990220845173, iteration: 222903
loss: 1.0242483615875244,grad_norm: 0.8072442253767499, iteration: 222904
loss: 1.015836238861084,grad_norm: 0.9999990316369027, iteration: 222905
loss: 0.9850610494613647,grad_norm: 0.8978358179505983, iteration: 222906
loss: 0.9755575656890869,grad_norm: 0.8803169709622592, iteration: 222907
loss: 0.9682735204696655,grad_norm: 0.8917426907233273, iteration: 222908
loss: 1.0235306024551392,grad_norm: 0.8916450092806526, iteration: 222909
loss: 1.0023161172866821,grad_norm: 0.8556313531991937, iteration: 222910
loss: 1.011276125907898,grad_norm: 0.9999991250409235, iteration: 222911
loss: 1.0086787939071655,grad_norm: 0.8247972405250152, iteration: 222912
loss: 1.0301201343536377,grad_norm: 0.9051290097363903, iteration: 222913
loss: 0.9966934323310852,grad_norm: 0.9999990321793778, iteration: 222914
loss: 0.9812760353088379,grad_norm: 0.9848396989439233, iteration: 222915
loss: 0.9871765971183777,grad_norm: 0.775560497933524, iteration: 222916
loss: 0.9772424697875977,grad_norm: 0.902479752661523, iteration: 222917
loss: 0.9967178106307983,grad_norm: 0.8960487646022999, iteration: 222918
loss: 0.9904143810272217,grad_norm: 0.7498499960908129, iteration: 222919
loss: 0.9914504885673523,grad_norm: 0.8637618348957073, iteration: 222920
loss: 0.9963643550872803,grad_norm: 0.7887760675353656, iteration: 222921
loss: 1.003784418106079,grad_norm: 0.938957932777275, iteration: 222922
loss: 1.0202205181121826,grad_norm: 0.9170768597393791, iteration: 222923
loss: 1.0370995998382568,grad_norm: 0.9901307487809546, iteration: 222924
loss: 0.972262978553772,grad_norm: 0.8431407428349893, iteration: 222925
loss: 0.9866098761558533,grad_norm: 0.9999993311456488, iteration: 222926
loss: 0.9746904969215393,grad_norm: 0.8518051582733174, iteration: 222927
loss: 0.9926005005836487,grad_norm: 0.9999991671498326, iteration: 222928
loss: 0.9928113222122192,grad_norm: 0.9562992018404103, iteration: 222929
loss: 1.0046477317810059,grad_norm: 0.8626549598819432, iteration: 222930
loss: 1.0328012704849243,grad_norm: 0.9999991105341329, iteration: 222931
loss: 0.97361820936203,grad_norm: 0.915639285707045, iteration: 222932
loss: 0.9809430837631226,grad_norm: 0.7605912122532985, iteration: 222933
loss: 1.0011557340621948,grad_norm: 0.8239712754989669, iteration: 222934
loss: 1.0122476816177368,grad_norm: 0.978032090064623, iteration: 222935
loss: 1.02974534034729,grad_norm: 0.9939154164439951, iteration: 222936
loss: 0.9758564829826355,grad_norm: 0.8680337727236915, iteration: 222937
loss: 1.0243557691574097,grad_norm: 0.8720556528262804, iteration: 222938
loss: 0.9938693642616272,grad_norm: 0.9999990264664124, iteration: 222939
loss: 1.0327823162078857,grad_norm: 0.9402437292478645, iteration: 222940
loss: 0.9800242185592651,grad_norm: 0.9999990571624345, iteration: 222941
loss: 0.9916436672210693,grad_norm: 0.9326023912739818, iteration: 222942
loss: 0.9953895807266235,grad_norm: 0.9346087866413899, iteration: 222943
loss: 0.9888317584991455,grad_norm: 0.9203047281867471, iteration: 222944
loss: 1.0143070220947266,grad_norm: 0.9754214669916975, iteration: 222945
loss: 0.998176634311676,grad_norm: 0.6963498336048634, iteration: 222946
loss: 1.0199470520019531,grad_norm: 0.9999994387231261, iteration: 222947
loss: 1.0388712882995605,grad_norm: 0.9573651135002096, iteration: 222948
loss: 1.0172126293182373,grad_norm: 0.9678810496149022, iteration: 222949
loss: 1.0015885829925537,grad_norm: 0.8968841766324145, iteration: 222950
loss: 1.0189062356948853,grad_norm: 0.8091126613092505, iteration: 222951
loss: 1.015702247619629,grad_norm: 0.9133648435383428, iteration: 222952
loss: 0.9756935834884644,grad_norm: 0.8511296801172232, iteration: 222953
loss: 1.0008964538574219,grad_norm: 0.9999990154611327, iteration: 222954
loss: 0.9973961114883423,grad_norm: 0.7981777891976086, iteration: 222955
loss: 0.9823317527770996,grad_norm: 0.924389993416813, iteration: 222956
loss: 0.9643397927284241,grad_norm: 0.9999991212667566, iteration: 222957
loss: 1.0012125968933105,grad_norm: 0.7999424082924013, iteration: 222958
loss: 0.9819074869155884,grad_norm: 0.8049874450264451, iteration: 222959
loss: 0.9927466511726379,grad_norm: 0.9999992035737115, iteration: 222960
loss: 1.0105074644088745,grad_norm: 0.9999991338523064, iteration: 222961
loss: 0.9888902306556702,grad_norm: 0.9999992561772348, iteration: 222962
loss: 1.087317705154419,grad_norm: 0.9235193237849254, iteration: 222963
loss: 0.9757722616195679,grad_norm: 0.8563212088882326, iteration: 222964
loss: 1.0332905054092407,grad_norm: 0.7860671201179577, iteration: 222965
loss: 1.0119256973266602,grad_norm: 0.9999995405195525, iteration: 222966
loss: 1.0129191875457764,grad_norm: 0.9999990812425822, iteration: 222967
loss: 0.996081531047821,grad_norm: 0.9999993317081997, iteration: 222968
loss: 1.021823763847351,grad_norm: 0.9434434287722556, iteration: 222969
loss: 0.9948342442512512,grad_norm: 0.9363516285200724, iteration: 222970
loss: 0.9201467633247375,grad_norm: 0.93036702921366, iteration: 222971
loss: 1.0094150304794312,grad_norm: 0.8975529859749681, iteration: 222972
loss: 1.0303549766540527,grad_norm: 0.9999990865762572, iteration: 222973
loss: 0.9753985404968262,grad_norm: 0.9999990246316254, iteration: 222974
loss: 0.9902611970901489,grad_norm: 0.9339742002514155, iteration: 222975
loss: 1.050644040107727,grad_norm: 0.9098854483425131, iteration: 222976
loss: 1.039438247680664,grad_norm: 0.8673868384231892, iteration: 222977
loss: 1.0036952495574951,grad_norm: 0.9999991618229006, iteration: 222978
loss: 1.025810718536377,grad_norm: 0.9999990196934355, iteration: 222979
loss: 1.021807074546814,grad_norm: 0.9489055239667349, iteration: 222980
loss: 0.9844741821289062,grad_norm: 0.8568949024110402, iteration: 222981
loss: 0.9899545907974243,grad_norm: 0.9671992865315452, iteration: 222982
loss: 1.0114868879318237,grad_norm: 0.9609912420070442, iteration: 222983
loss: 1.0117565393447876,grad_norm: 0.8491581842623585, iteration: 222984
loss: 1.0197430849075317,grad_norm: 0.7795634549703592, iteration: 222985
loss: 1.015975832939148,grad_norm: 0.9999991843974962, iteration: 222986
loss: 1.0361580848693848,grad_norm: 0.9999991700099476, iteration: 222987
loss: 1.0411282777786255,grad_norm: 0.9999991334722539, iteration: 222988
loss: 1.0038416385650635,grad_norm: 0.9999991332684198, iteration: 222989
loss: 1.0077153444290161,grad_norm: 0.8344122079519178, iteration: 222990
loss: 1.023319125175476,grad_norm: 0.880802833309299, iteration: 222991
loss: 0.9951200485229492,grad_norm: 0.9999990515495668, iteration: 222992
loss: 1.0222539901733398,grad_norm: 0.9493296732586007, iteration: 222993
loss: 1.1023924350738525,grad_norm: 0.9913146355636319, iteration: 222994
loss: 0.957880973815918,grad_norm: 0.8117273431750182, iteration: 222995
loss: 0.9948053359985352,grad_norm: 0.8033889714027374, iteration: 222996
loss: 0.9994010925292969,grad_norm: 0.9015269936629647, iteration: 222997
loss: 0.9824585914611816,grad_norm: 0.7675240328788857, iteration: 222998
loss: 1.0734037160873413,grad_norm: 0.9999992828215859, iteration: 222999
loss: 1.007305383682251,grad_norm: 0.959491594120005, iteration: 223000
loss: 1.010632872581482,grad_norm: 0.9999991591931849, iteration: 223001
loss: 0.9974215030670166,grad_norm: 0.9111083838541568, iteration: 223002
loss: 0.9991005659103394,grad_norm: 0.9533033898937486, iteration: 223003
loss: 1.0372425317764282,grad_norm: 0.8976233827844207, iteration: 223004
loss: 1.0281933546066284,grad_norm: 0.9267733594358133, iteration: 223005
loss: 0.9903713464736938,grad_norm: 0.8612724037113055, iteration: 223006
loss: 0.9900406002998352,grad_norm: 0.9122125834473445, iteration: 223007
loss: 0.9990271329879761,grad_norm: 0.9999990986440314, iteration: 223008
loss: 1.0088942050933838,grad_norm: 0.9204264035368018, iteration: 223009
loss: 1.0058594942092896,grad_norm: 0.9239776943951463, iteration: 223010
loss: 0.9905113577842712,grad_norm: 0.9999990954118643, iteration: 223011
loss: 0.9566237330436707,grad_norm: 0.9999989711710494, iteration: 223012
loss: 1.0067486763000488,grad_norm: 0.9550386842380284, iteration: 223013
loss: 1.0341318845748901,grad_norm: 0.9999992030291786, iteration: 223014
loss: 1.0143444538116455,grad_norm: 0.9999991867218996, iteration: 223015
loss: 0.9810278415679932,grad_norm: 0.9999990233275705, iteration: 223016
loss: 1.0155915021896362,grad_norm: 0.9628035751154765, iteration: 223017
loss: 1.0095194578170776,grad_norm: 0.8036100039406611, iteration: 223018
loss: 1.0133291482925415,grad_norm: 0.7838411799866407, iteration: 223019
loss: 1.0283697843551636,grad_norm: 0.8723876509220977, iteration: 223020
loss: 1.0350090265274048,grad_norm: 0.990263837373541, iteration: 223021
loss: 0.9954008460044861,grad_norm: 0.8565007324137555, iteration: 223022
loss: 1.0775539875030518,grad_norm: 0.9999994794036572, iteration: 223023
loss: 1.0319339036941528,grad_norm: 0.9999990976702096, iteration: 223024
loss: 0.97657710313797,grad_norm: 0.9347030623908085, iteration: 223025
loss: 0.9759801030158997,grad_norm: 0.9999991736384951, iteration: 223026
loss: 0.9756932854652405,grad_norm: 0.9462378023922758, iteration: 223027
loss: 0.9859820008277893,grad_norm: 0.8141205493774774, iteration: 223028
loss: 0.9904505014419556,grad_norm: 0.967134705575432, iteration: 223029
loss: 0.9873471260070801,grad_norm: 0.9416534467305779, iteration: 223030
loss: 1.1200640201568604,grad_norm: 0.9999992262376041, iteration: 223031
loss: 1.1495232582092285,grad_norm: 0.9942412036356144, iteration: 223032
loss: 1.0066447257995605,grad_norm: 0.8903647215746517, iteration: 223033
loss: 1.0284967422485352,grad_norm: 0.9999990728031533, iteration: 223034
loss: 0.9844011664390564,grad_norm: 0.8444567018880712, iteration: 223035
loss: 1.0186980962753296,grad_norm: 0.8522821343953875, iteration: 223036
loss: 0.9964521527290344,grad_norm: 0.9999990194788114, iteration: 223037
loss: 0.9755796790122986,grad_norm: 0.8660471766117025, iteration: 223038
loss: 1.0356779098510742,grad_norm: 0.8572015822405501, iteration: 223039
loss: 0.9998562932014465,grad_norm: 0.9999990925088872, iteration: 223040
loss: 1.0064204931259155,grad_norm: 0.8416773138758435, iteration: 223041
loss: 0.9877064228057861,grad_norm: 0.9625638711445905, iteration: 223042
loss: 0.9661102294921875,grad_norm: 0.9163540568849524, iteration: 223043
loss: 0.9947630763053894,grad_norm: 0.942485573816619, iteration: 223044
loss: 1.0003952980041504,grad_norm: 0.9609365550868528, iteration: 223045
loss: 0.9582803249359131,grad_norm: 0.9066677314793246, iteration: 223046
loss: 1.0074248313903809,grad_norm: 0.9999991575357569, iteration: 223047
loss: 0.9845827221870422,grad_norm: 0.8508809294403434, iteration: 223048
loss: 0.9760669469833374,grad_norm: 0.7954326610008323, iteration: 223049
loss: 1.00413179397583,grad_norm: 0.9999991295631382, iteration: 223050
loss: 1.0090937614440918,grad_norm: 0.8898350731485026, iteration: 223051
loss: 1.0120803117752075,grad_norm: 0.9744881044065723, iteration: 223052
loss: 1.0010043382644653,grad_norm: 0.9999990673629825, iteration: 223053
loss: 1.0103209018707275,grad_norm: 0.9364514700112776, iteration: 223054
loss: 1.0143204927444458,grad_norm: 0.8950875236219553, iteration: 223055
loss: 1.031660795211792,grad_norm: 0.9999995273954865, iteration: 223056
loss: 1.0148357152938843,grad_norm: 0.8640548402656821, iteration: 223057
loss: 0.9835582971572876,grad_norm: 0.8376004764528071, iteration: 223058
loss: 1.0061448812484741,grad_norm: 0.8617026633015067, iteration: 223059
loss: 1.0291141271591187,grad_norm: 0.8830661173030032, iteration: 223060
loss: 1.0162277221679688,grad_norm: 0.9999990612203628, iteration: 223061
loss: 1.0043189525604248,grad_norm: 0.8663938323645403, iteration: 223062
loss: 0.985103964805603,grad_norm: 0.870729475966655, iteration: 223063
loss: 1.0091780424118042,grad_norm: 0.8713073852905101, iteration: 223064
loss: 0.9819667935371399,grad_norm: 0.7963954589233289, iteration: 223065
loss: 0.9788668155670166,grad_norm: 0.9674508439587722, iteration: 223066
loss: 0.9746590256690979,grad_norm: 0.9913065386006943, iteration: 223067
loss: 1.006344199180603,grad_norm: 0.8392237732185719, iteration: 223068
loss: 0.9668681621551514,grad_norm: 0.924040274847674, iteration: 223069
loss: 1.001518726348877,grad_norm: 0.9621038567595778, iteration: 223070
loss: 0.9887261390686035,grad_norm: 0.9327277455566257, iteration: 223071
loss: 1.0252978801727295,grad_norm: 0.9999991133617524, iteration: 223072
loss: 0.9712490439414978,grad_norm: 0.8962547310624785, iteration: 223073
loss: 1.0000100135803223,grad_norm: 0.8653175527350896, iteration: 223074
loss: 1.0298488140106201,grad_norm: 0.7310555015129631, iteration: 223075
loss: 1.0005395412445068,grad_norm: 0.9999990310813655, iteration: 223076
loss: 1.0266695022583008,grad_norm: 0.9999991509184396, iteration: 223077
loss: 1.01204252243042,grad_norm: 0.8148162239242408, iteration: 223078
loss: 1.008034110069275,grad_norm: 0.9999990551640141, iteration: 223079
loss: 1.0084292888641357,grad_norm: 0.8674127753571148, iteration: 223080
loss: 1.037044644355774,grad_norm: 0.8896234988894153, iteration: 223081
loss: 1.0023670196533203,grad_norm: 0.805088518497079, iteration: 223082
loss: 0.9682046175003052,grad_norm: 0.9618872402999309, iteration: 223083
loss: 0.9986440539360046,grad_norm: 0.9764768763981508, iteration: 223084
loss: 0.9870707988739014,grad_norm: 0.884162078493993, iteration: 223085
loss: 1.0000169277191162,grad_norm: 0.9999990799534508, iteration: 223086
loss: 1.0225874185562134,grad_norm: 0.8729082885002055, iteration: 223087
loss: 0.992134690284729,grad_norm: 0.8513854475338644, iteration: 223088
loss: 0.9834071397781372,grad_norm: 0.7727392942447473, iteration: 223089
loss: 1.0078226327896118,grad_norm: 0.9011124412354015, iteration: 223090
loss: 1.0014171600341797,grad_norm: 0.8514721435041238, iteration: 223091
loss: 1.0100926160812378,grad_norm: 0.935112314369544, iteration: 223092
loss: 0.9790934324264526,grad_norm: 0.9629063318403707, iteration: 223093
loss: 0.9992634057998657,grad_norm: 0.9425003457497478, iteration: 223094
loss: 1.0003817081451416,grad_norm: 0.9958312796342109, iteration: 223095
loss: 1.0217366218566895,grad_norm: 0.8783505233087765, iteration: 223096
loss: 0.9904638528823853,grad_norm: 0.9088938371902912, iteration: 223097
loss: 0.9890680313110352,grad_norm: 0.912375380107484, iteration: 223098
loss: 0.9964056015014648,grad_norm: 0.9827635756237935, iteration: 223099
loss: 0.9926648139953613,grad_norm: 0.999999057667943, iteration: 223100
loss: 0.9918504357337952,grad_norm: 0.9999993083324971, iteration: 223101
loss: 1.029689073562622,grad_norm: 0.9853178885254937, iteration: 223102
loss: 1.0199122428894043,grad_norm: 0.9519035044204762, iteration: 223103
loss: 0.9859931468963623,grad_norm: 0.8928565890025928, iteration: 223104
loss: 0.9956799745559692,grad_norm: 0.8376562410227075, iteration: 223105
loss: 0.9673658013343811,grad_norm: 0.9498328392464181, iteration: 223106
loss: 1.0150338411331177,grad_norm: 0.8434498809735737, iteration: 223107
loss: 0.9685581922531128,grad_norm: 0.9201906947763507, iteration: 223108
loss: 1.0015891790390015,grad_norm: 0.8690760143858828, iteration: 223109
loss: 1.0946249961853027,grad_norm: 0.9999996616707908, iteration: 223110
loss: 1.0032321214675903,grad_norm: 0.9999989383616745, iteration: 223111
loss: 1.0525881052017212,grad_norm: 0.9999990471473064, iteration: 223112
loss: 1.0230140686035156,grad_norm: 0.9999991620797957, iteration: 223113
loss: 0.9645932912826538,grad_norm: 0.9634538987322204, iteration: 223114
loss: 1.0041624307632446,grad_norm: 0.9070528346706379, iteration: 223115
loss: 1.0033535957336426,grad_norm: 0.9999990654131433, iteration: 223116
loss: 0.9795633554458618,grad_norm: 0.9462041415760689, iteration: 223117
loss: 1.0275561809539795,grad_norm: 0.9936424298042027, iteration: 223118
loss: 0.9779098629951477,grad_norm: 0.9204656452469845, iteration: 223119
loss: 1.0026893615722656,grad_norm: 0.953034820940982, iteration: 223120
loss: 0.9965735673904419,grad_norm: 0.9800286068611717, iteration: 223121
loss: 1.0166465044021606,grad_norm: 0.7959293231797616, iteration: 223122
loss: 0.9704006314277649,grad_norm: 0.9020190558795647, iteration: 223123
loss: 0.9783845543861389,grad_norm: 0.9999991485892246, iteration: 223124
loss: 1.0012867450714111,grad_norm: 0.9709210238666471, iteration: 223125
loss: 0.9708985090255737,grad_norm: 0.9147955848744141, iteration: 223126
loss: 1.0039725303649902,grad_norm: 0.9999989837076897, iteration: 223127
loss: 0.97539883852005,grad_norm: 0.8913841908907149, iteration: 223128
loss: 1.0140637159347534,grad_norm: 0.9999992005752046, iteration: 223129
loss: 0.9934000372886658,grad_norm: 0.8181477904192438, iteration: 223130
loss: 0.9915341734886169,grad_norm: 0.9999991213432305, iteration: 223131
loss: 0.9865163564682007,grad_norm: 0.9999991766219476, iteration: 223132
loss: 1.0139572620391846,grad_norm: 0.9396223671847481, iteration: 223133
loss: 1.0176997184753418,grad_norm: 0.9005588839395524, iteration: 223134
loss: 0.9811024069786072,grad_norm: 0.9788256546935934, iteration: 223135
loss: 1.0244675874710083,grad_norm: 0.8603789146639657, iteration: 223136
loss: 0.9708112478256226,grad_norm: 0.9168408595663035, iteration: 223137
loss: 0.9883595705032349,grad_norm: 0.8111297452112939, iteration: 223138
loss: 1.0294381380081177,grad_norm: 0.9999990130626071, iteration: 223139
loss: 0.9917023777961731,grad_norm: 0.9127453493994703, iteration: 223140
loss: 0.993436872959137,grad_norm: 0.8552988442290224, iteration: 223141
loss: 0.98591148853302,grad_norm: 0.9999991860402986, iteration: 223142
loss: 0.9845026135444641,grad_norm: 0.9642716067139776, iteration: 223143
loss: 1.015246868133545,grad_norm: 0.8962659537947718, iteration: 223144
loss: 1.0161173343658447,grad_norm: 0.9509580992288542, iteration: 223145
loss: 0.992256760597229,grad_norm: 0.9878186104282392, iteration: 223146
loss: 0.9828311800956726,grad_norm: 0.8647334583448629, iteration: 223147
loss: 0.9948199391365051,grad_norm: 0.8248088088385727, iteration: 223148
loss: 0.9630138874053955,grad_norm: 0.8076922163021126, iteration: 223149
loss: 0.9905076622962952,grad_norm: 0.8656285708277112, iteration: 223150
loss: 0.9956424832344055,grad_norm: 0.9999990757976489, iteration: 223151
loss: 1.029417872428894,grad_norm: 0.8478817092714972, iteration: 223152
loss: 0.9930015802383423,grad_norm: 0.9543904777214369, iteration: 223153
loss: 1.0036965608596802,grad_norm: 0.9999989821481164, iteration: 223154
loss: 1.0230809450149536,grad_norm: 0.9571547097153937, iteration: 223155
loss: 1.072922945022583,grad_norm: 0.8001832826503149, iteration: 223156
loss: 1.0369079113006592,grad_norm: 0.977195270563337, iteration: 223157
loss: 0.9886064529418945,grad_norm: 0.9999991866687021, iteration: 223158
loss: 0.9622830152511597,grad_norm: 0.8729623217401847, iteration: 223159
loss: 0.9634084701538086,grad_norm: 0.9999991648395681, iteration: 223160
loss: 0.9871072173118591,grad_norm: 0.8603614592742166, iteration: 223161
loss: 0.9988987445831299,grad_norm: 0.9308702135466116, iteration: 223162
loss: 0.9795524477958679,grad_norm: 0.8039041247564785, iteration: 223163
loss: 1.002572774887085,grad_norm: 0.9999997760880341, iteration: 223164
loss: 0.9892657399177551,grad_norm: 0.94544410223113, iteration: 223165
loss: 1.0149048566818237,grad_norm: 0.8405956000931594, iteration: 223166
loss: 0.9805781841278076,grad_norm: 0.7684009099443914, iteration: 223167
loss: 1.0032439231872559,grad_norm: 0.999998954528234, iteration: 223168
loss: 0.9600327610969543,grad_norm: 0.8309757258957691, iteration: 223169
loss: 1.0154906511306763,grad_norm: 0.9932559840380984, iteration: 223170
loss: 1.0179883241653442,grad_norm: 0.7551217498275669, iteration: 223171
loss: 0.950447142124176,grad_norm: 0.7399874361687055, iteration: 223172
loss: 0.9961241483688354,grad_norm: 0.9999990334307928, iteration: 223173
loss: 1.015602469444275,grad_norm: 0.9999990406455916, iteration: 223174
loss: 0.989692747592926,grad_norm: 0.823769617097229, iteration: 223175
loss: 0.9975160360336304,grad_norm: 0.8336034324318493, iteration: 223176
loss: 0.9856392741203308,grad_norm: 0.8761604185640984, iteration: 223177
loss: 1.0419676303863525,grad_norm: 0.8437278140267324, iteration: 223178
loss: 1.0025153160095215,grad_norm: 0.9082172621448442, iteration: 223179
loss: 0.9685178399085999,grad_norm: 0.7179587584306057, iteration: 223180
loss: 0.9882738590240479,grad_norm: 0.9945828650783285, iteration: 223181
loss: 0.9813341498374939,grad_norm: 0.9297864729213237, iteration: 223182
loss: 0.999054491519928,grad_norm: 0.9999990545891796, iteration: 223183
loss: 1.0098968744277954,grad_norm: 0.797418112721762, iteration: 223184
loss: 0.9996563792228699,grad_norm: 0.8707977026778777, iteration: 223185
loss: 0.9954276084899902,grad_norm: 0.9999990989779017, iteration: 223186
loss: 0.978661060333252,grad_norm: 0.9999991373142597, iteration: 223187
loss: 0.9851394295692444,grad_norm: 0.9977503946922406, iteration: 223188
loss: 0.9762559533119202,grad_norm: 0.7855043749732544, iteration: 223189
loss: 0.9980947375297546,grad_norm: 0.9999991086003065, iteration: 223190
loss: 1.0213313102722168,grad_norm: 0.8831895020324745, iteration: 223191
loss: 1.0061384439468384,grad_norm: 0.999999158700304, iteration: 223192
loss: 1.014743447303772,grad_norm: 0.9124665572106827, iteration: 223193
loss: 1.0095480680465698,grad_norm: 0.7736485760650355, iteration: 223194
loss: 0.9724306464195251,grad_norm: 0.8713797853196317, iteration: 223195
loss: 1.012423038482666,grad_norm: 0.9075733676602016, iteration: 223196
loss: 1.0002506971359253,grad_norm: 0.9196759650046822, iteration: 223197
loss: 0.9652530550956726,grad_norm: 0.9999990378218538, iteration: 223198
loss: 1.0256447792053223,grad_norm: 0.999998979296695, iteration: 223199
loss: 1.006636619567871,grad_norm: 0.8641958323336751, iteration: 223200
loss: 1.0409189462661743,grad_norm: 0.8841002213026455, iteration: 223201
loss: 1.0234607458114624,grad_norm: 0.9999991570651796, iteration: 223202
loss: 0.9964353442192078,grad_norm: 0.9387172469928581, iteration: 223203
loss: 0.9877154231071472,grad_norm: 0.9411677925773271, iteration: 223204
loss: 1.0433778762817383,grad_norm: 0.9336397977430412, iteration: 223205
loss: 0.9998636841773987,grad_norm: 0.8125111485805767, iteration: 223206
loss: 1.0006498098373413,grad_norm: 0.9849535279645515, iteration: 223207
loss: 1.0705770254135132,grad_norm: 0.9999998375452485, iteration: 223208
loss: 0.9883898496627808,grad_norm: 0.9999991287081159, iteration: 223209
loss: 0.9922339916229248,grad_norm: 0.7878224249974752, iteration: 223210
loss: 1.0111223459243774,grad_norm: 0.9261031355219529, iteration: 223211
loss: 1.0030879974365234,grad_norm: 0.999999097235051, iteration: 223212
loss: 0.9977683424949646,grad_norm: 0.9999990877236307, iteration: 223213
loss: 1.0101585388183594,grad_norm: 0.9999990892715188, iteration: 223214
loss: 1.0141268968582153,grad_norm: 0.9999992773986499, iteration: 223215
loss: 0.9976125955581665,grad_norm: 0.9999995102042865, iteration: 223216
loss: 1.0330063104629517,grad_norm: 0.9883189130438281, iteration: 223217
loss: 0.990343451499939,grad_norm: 0.9999990628779235, iteration: 223218
loss: 1.0290378332138062,grad_norm: 0.9027170558571959, iteration: 223219
loss: 0.998664379119873,grad_norm: 0.9723804952045697, iteration: 223220
loss: 0.9859459400177002,grad_norm: 0.9271938520831797, iteration: 223221
loss: 0.9753252267837524,grad_norm: 0.930733688386086, iteration: 223222
loss: 1.027061104774475,grad_norm: 0.9999990993668135, iteration: 223223
loss: 0.98384028673172,grad_norm: 0.8240702334416026, iteration: 223224
loss: 1.035091519355774,grad_norm: 0.8956050376524288, iteration: 223225
loss: 0.9860293865203857,grad_norm: 0.9741517314033827, iteration: 223226
loss: 1.0044827461242676,grad_norm: 0.9239560351977243, iteration: 223227
loss: 0.9691854119300842,grad_norm: 0.9699967851769145, iteration: 223228
loss: 0.974372923374176,grad_norm: 0.9268733250084042, iteration: 223229
loss: 1.0075489282608032,grad_norm: 0.9999991469002547, iteration: 223230
loss: 0.9932867288589478,grad_norm: 0.9999992378834778, iteration: 223231
loss: 1.0364004373550415,grad_norm: 0.972298983219658, iteration: 223232
loss: 0.9972279071807861,grad_norm: 0.9999991148542839, iteration: 223233
loss: 0.9952340126037598,grad_norm: 0.9999991103351736, iteration: 223234
loss: 1.020277738571167,grad_norm: 0.9788168651814053, iteration: 223235
loss: 1.0113049745559692,grad_norm: 0.9999992328514643, iteration: 223236
loss: 0.9693893194198608,grad_norm: 0.7549645249518099, iteration: 223237
loss: 0.9750677943229675,grad_norm: 0.7417039488200573, iteration: 223238
loss: 0.9787089228630066,grad_norm: 0.9639755388758936, iteration: 223239
loss: 0.9937830567359924,grad_norm: 0.7806160916538105, iteration: 223240
loss: 1.0020158290863037,grad_norm: 0.8911698404368243, iteration: 223241
loss: 0.9849252104759216,grad_norm: 0.9496398963425615, iteration: 223242
loss: 1.0022696256637573,grad_norm: 0.9999991450308447, iteration: 223243
loss: 1.0268644094467163,grad_norm: 0.9999992141132155, iteration: 223244
loss: 0.983728289604187,grad_norm: 0.9591757984563007, iteration: 223245
loss: 1.0324825048446655,grad_norm: 0.9523845221314479, iteration: 223246
loss: 0.9881986975669861,grad_norm: 0.8786880745801475, iteration: 223247
loss: 0.9853132367134094,grad_norm: 0.9040877726174462, iteration: 223248
loss: 0.9970120191574097,grad_norm: 0.8169407867491217, iteration: 223249
loss: 0.9707244038581848,grad_norm: 0.9518356924895992, iteration: 223250
loss: 0.9875671863555908,grad_norm: 0.8987987386893821, iteration: 223251
loss: 1.020369052886963,grad_norm: 0.9999992526526479, iteration: 223252
loss: 0.9610311985015869,grad_norm: 0.9431942418656097, iteration: 223253
loss: 1.0121878385543823,grad_norm: 0.8196169625251842, iteration: 223254
loss: 0.9926838874816895,grad_norm: 0.9999990456596892, iteration: 223255
loss: 1.001004934310913,grad_norm: 0.866326380622309, iteration: 223256
loss: 0.9801870584487915,grad_norm: 0.9829989131805212, iteration: 223257
loss: 1.042375922203064,grad_norm: 0.9680718246008017, iteration: 223258
loss: 0.9999442100524902,grad_norm: 0.9999997711709445, iteration: 223259
loss: 1.015445351600647,grad_norm: 0.9999993550430141, iteration: 223260
loss: 0.9899768829345703,grad_norm: 0.8486071417911866, iteration: 223261
loss: 1.0624772310256958,grad_norm: 0.8618968519553808, iteration: 223262
loss: 0.9800132513046265,grad_norm: 0.9269559820660601, iteration: 223263
loss: 0.9888530969619751,grad_norm: 0.9662711776488798, iteration: 223264
loss: 1.0003321170806885,grad_norm: 0.8811461789120457, iteration: 223265
loss: 0.9730880260467529,grad_norm: 0.8758064941053755, iteration: 223266
loss: 0.9911136627197266,grad_norm: 0.9834074151187836, iteration: 223267
loss: 0.9994660019874573,grad_norm: 0.9619574090210068, iteration: 223268
loss: 0.9984791278839111,grad_norm: 0.8727647674633325, iteration: 223269
loss: 1.0042226314544678,grad_norm: 0.708661147571172, iteration: 223270
loss: 0.9630200862884521,grad_norm: 0.7841980916254567, iteration: 223271
loss: 0.9879131317138672,grad_norm: 0.9387047532413056, iteration: 223272
loss: 1.0322195291519165,grad_norm: 0.9999998512788576, iteration: 223273
loss: 1.0151252746582031,grad_norm: 0.9267692772035935, iteration: 223274
loss: 0.9944144487380981,grad_norm: 0.9999990983213529, iteration: 223275
loss: 0.9900330305099487,grad_norm: 0.8616820495949308, iteration: 223276
loss: 1.0083523988723755,grad_norm: 0.9990235527694855, iteration: 223277
loss: 1.0123645067214966,grad_norm: 0.9671590574461485, iteration: 223278
loss: 0.9532431960105896,grad_norm: 0.9032924137289361, iteration: 223279
loss: 0.9882544279098511,grad_norm: 0.8987605685126565, iteration: 223280
loss: 1.0321691036224365,grad_norm: 0.9932967712957645, iteration: 223281
loss: 1.023801565170288,grad_norm: 0.9839691659986735, iteration: 223282
loss: 1.1181739568710327,grad_norm: 0.9999998233547067, iteration: 223283
loss: 0.9909284710884094,grad_norm: 0.8840016231657633, iteration: 223284
loss: 0.9826538562774658,grad_norm: 0.9387908364237018, iteration: 223285
loss: 1.0133328437805176,grad_norm: 0.9999992161125393, iteration: 223286
loss: 1.0209351778030396,grad_norm: 0.7602776758121638, iteration: 223287
loss: 0.9973831176757812,grad_norm: 0.83206156363167, iteration: 223288
loss: 0.9838092923164368,grad_norm: 0.8161447810781075, iteration: 223289
loss: 0.9692856669425964,grad_norm: 0.920880438144406, iteration: 223290
loss: 1.0185285806655884,grad_norm: 0.9999990818462169, iteration: 223291
loss: 1.0486526489257812,grad_norm: 0.8379925315002756, iteration: 223292
loss: 0.9695795178413391,grad_norm: 0.8731317417408819, iteration: 223293
loss: 0.9953666925430298,grad_norm: 0.9933520202534034, iteration: 223294
loss: 0.9724649786949158,grad_norm: 0.999999080151482, iteration: 223295
loss: 0.9830694198608398,grad_norm: 0.8955980930030785, iteration: 223296
loss: 0.990760087966919,grad_norm: 0.9504472951609848, iteration: 223297
loss: 1.0128037929534912,grad_norm: 0.9999996161806469, iteration: 223298
loss: 1.0215708017349243,grad_norm: 0.9335386241048058, iteration: 223299
loss: 1.012249231338501,grad_norm: 0.9093704370476593, iteration: 223300
loss: 0.984302282333374,grad_norm: 0.8546017198156607, iteration: 223301
loss: 0.988491952419281,grad_norm: 0.769979469227376, iteration: 223302
loss: 0.98392653465271,grad_norm: 0.8790979424251665, iteration: 223303
loss: 0.9773280024528503,grad_norm: 0.8594751892060716, iteration: 223304
loss: 1.0211620330810547,grad_norm: 0.9999991039315042, iteration: 223305
loss: 0.9793444871902466,grad_norm: 0.999999023633459, iteration: 223306
loss: 1.017594337463379,grad_norm: 0.9959384658144906, iteration: 223307
loss: 1.0238901376724243,grad_norm: 0.8917073650004919, iteration: 223308
loss: 1.061253547668457,grad_norm: 0.9999992055588834, iteration: 223309
loss: 1.0423715114593506,grad_norm: 0.959810221547921, iteration: 223310
loss: 0.9820345044136047,grad_norm: 0.9451508122065763, iteration: 223311
loss: 0.9608908295631409,grad_norm: 0.9999992116724888, iteration: 223312
loss: 1.0163590908050537,grad_norm: 0.9999991027049786, iteration: 223313
loss: 0.9697616100311279,grad_norm: 0.938332965345271, iteration: 223314
loss: 0.9836645126342773,grad_norm: 0.9002007386821173, iteration: 223315
loss: 1.0256246328353882,grad_norm: 0.9999997880140269, iteration: 223316
loss: 1.005993366241455,grad_norm: 0.8895714048048448, iteration: 223317
loss: 0.975826621055603,grad_norm: 0.8781618854928844, iteration: 223318
loss: 0.9710038900375366,grad_norm: 0.8973276226085007, iteration: 223319
loss: 1.0130587816238403,grad_norm: 0.9999989604081452, iteration: 223320
loss: 1.0140950679779053,grad_norm: 0.7931726347586685, iteration: 223321
loss: 0.9649550914764404,grad_norm: 0.9940604151990208, iteration: 223322
loss: 0.9942883253097534,grad_norm: 0.999999443036293, iteration: 223323
loss: 0.978074312210083,grad_norm: 0.8233316583414831, iteration: 223324
loss: 0.968505322933197,grad_norm: 0.9999991745274289, iteration: 223325
loss: 1.0336891412734985,grad_norm: 0.9999990439447931, iteration: 223326
loss: 0.9748072624206543,grad_norm: 0.8286690085160622, iteration: 223327
loss: 0.9655838012695312,grad_norm: 0.9413134038098628, iteration: 223328
loss: 0.9866296052932739,grad_norm: 0.9999990730170604, iteration: 223329
loss: 1.00169837474823,grad_norm: 0.9999992847246908, iteration: 223330
loss: 0.9800384640693665,grad_norm: 0.7806439557118233, iteration: 223331
loss: 0.9982437491416931,grad_norm: 0.9709283103809083, iteration: 223332
loss: 1.0121759176254272,grad_norm: 0.9837515056552572, iteration: 223333
loss: 1.0146938562393188,grad_norm: 0.9999991266469526, iteration: 223334
loss: 1.000136375427246,grad_norm: 0.7456181690518049, iteration: 223335
loss: 0.960293710231781,grad_norm: 0.9999990358833436, iteration: 223336
loss: 1.0010019540786743,grad_norm: 0.999999036319396, iteration: 223337
loss: 0.9504289031028748,grad_norm: 0.9897592131154558, iteration: 223338
loss: 0.9714968800544739,grad_norm: 0.9441928704750765, iteration: 223339
loss: 1.0147218704223633,grad_norm: 0.9038746090202907, iteration: 223340
loss: 1.0555710792541504,grad_norm: 0.9077259716409125, iteration: 223341
loss: 0.9503692984580994,grad_norm: 0.8614348772693234, iteration: 223342
loss: 1.008499264717102,grad_norm: 0.946483225432349, iteration: 223343
loss: 1.0074400901794434,grad_norm: 0.9999992489807775, iteration: 223344
loss: 0.954409122467041,grad_norm: 0.8924904333449671, iteration: 223345
loss: 0.9799344539642334,grad_norm: 0.8943032731959416, iteration: 223346
loss: 0.9881876111030579,grad_norm: 0.8747630656975063, iteration: 223347
loss: 1.0246540307998657,grad_norm: 0.9408591759075906, iteration: 223348
loss: 1.0131739377975464,grad_norm: 0.9999990102905634, iteration: 223349
loss: 1.0081394910812378,grad_norm: 0.8328534911435211, iteration: 223350
loss: 1.0018097162246704,grad_norm: 0.9921635401646005, iteration: 223351
loss: 1.0362060070037842,grad_norm: 0.99999967583002, iteration: 223352
loss: 0.9967077970504761,grad_norm: 0.9007041627893558, iteration: 223353
loss: 1.0347987413406372,grad_norm: 0.872684364061111, iteration: 223354
loss: 1.0273810625076294,grad_norm: 0.9140199318650915, iteration: 223355
loss: 0.9893862009048462,grad_norm: 0.9999991178235085, iteration: 223356
loss: 1.0131572484970093,grad_norm: 0.8419204033179227, iteration: 223357
loss: 0.9501835703849792,grad_norm: 0.9999991322264653, iteration: 223358
loss: 1.0179946422576904,grad_norm: 0.8736129629437293, iteration: 223359
loss: 0.9982385635375977,grad_norm: 0.9057376502487454, iteration: 223360
loss: 0.9808704853057861,grad_norm: 0.8859041979472995, iteration: 223361
loss: 0.9870573282241821,grad_norm: 0.9998555892106694, iteration: 223362
loss: 0.9935013055801392,grad_norm: 0.7572817994692321, iteration: 223363
loss: 0.9770000576972961,grad_norm: 0.9999989729930396, iteration: 223364
loss: 1.0203191041946411,grad_norm: 0.9999997593527594, iteration: 223365
loss: 0.9580073952674866,grad_norm: 0.89398804695622, iteration: 223366
loss: 1.029727816581726,grad_norm: 0.88650453007001, iteration: 223367
loss: 1.0007717609405518,grad_norm: 0.8152232825657045, iteration: 223368
loss: 0.9941128492355347,grad_norm: 0.965238363093739, iteration: 223369
loss: 1.0247392654418945,grad_norm: 0.8872674943705452, iteration: 223370
loss: 0.9673361778259277,grad_norm: 0.9999993020145718, iteration: 223371
loss: 0.9991472959518433,grad_norm: 0.8360754234509632, iteration: 223372
loss: 1.0074877738952637,grad_norm: 0.999999177880592, iteration: 223373
loss: 1.0148981809616089,grad_norm: 0.9438440615174551, iteration: 223374
loss: 0.9805543422698975,grad_norm: 0.9768995692168029, iteration: 223375
loss: 0.9736503958702087,grad_norm: 0.832859424265329, iteration: 223376
loss: 0.9865902662277222,grad_norm: 0.9999990197216307, iteration: 223377
loss: 1.101853609085083,grad_norm: 0.999999421078373, iteration: 223378
loss: 0.9966784715652466,grad_norm: 0.9636439053184774, iteration: 223379
loss: 0.9793988466262817,grad_norm: 0.9708751471344725, iteration: 223380
loss: 0.9732377529144287,grad_norm: 0.8821053402938328, iteration: 223381
loss: 1.0109868049621582,grad_norm: 0.9999990786247651, iteration: 223382
loss: 0.9829161763191223,grad_norm: 0.9261314348260817, iteration: 223383
loss: 1.010927677154541,grad_norm: 0.7486327699930622, iteration: 223384
loss: 0.9489679932594299,grad_norm: 0.8317219436761918, iteration: 223385
loss: 1.017715334892273,grad_norm: 0.9999990606541012, iteration: 223386
loss: 0.9628764390945435,grad_norm: 0.8871283243721262, iteration: 223387
loss: 1.0118532180786133,grad_norm: 0.8609010243207893, iteration: 223388
loss: 0.9925088882446289,grad_norm: 0.8779742039140691, iteration: 223389
loss: 0.9698415398597717,grad_norm: 0.9603317119284773, iteration: 223390
loss: 1.0280362367630005,grad_norm: 0.9999991485895441, iteration: 223391
loss: 0.9533188939094543,grad_norm: 0.7809177508476636, iteration: 223392
loss: 0.9980684518814087,grad_norm: 0.961315177576804, iteration: 223393
loss: 0.9838260412216187,grad_norm: 0.99999921076123, iteration: 223394
loss: 1.0413621664047241,grad_norm: 0.9219869390779664, iteration: 223395
loss: 1.0139015913009644,grad_norm: 0.7750815880898577, iteration: 223396
loss: 1.0030323266983032,grad_norm: 0.7966759961873914, iteration: 223397
loss: 0.9803081750869751,grad_norm: 0.8531313278123683, iteration: 223398
loss: 0.9936968684196472,grad_norm: 0.9468826897958676, iteration: 223399
loss: 0.9844127297401428,grad_norm: 0.8957313723944658, iteration: 223400
loss: 1.045809030532837,grad_norm: 0.9999998935795879, iteration: 223401
loss: 0.9983330368995667,grad_norm: 0.9959407012609928, iteration: 223402
loss: 1.000569224357605,grad_norm: 0.8288243331476112, iteration: 223403
loss: 1.025437831878662,grad_norm: 0.9999990553057415, iteration: 223404
loss: 0.9869723320007324,grad_norm: 0.9575528713152367, iteration: 223405
loss: 1.0516467094421387,grad_norm: 0.8364530510192556, iteration: 223406
loss: 1.0180970430374146,grad_norm: 0.8825400453373214, iteration: 223407
loss: 1.02556312084198,grad_norm: 0.9165035003412645, iteration: 223408
loss: 0.9917449951171875,grad_norm: 0.9626577387299982, iteration: 223409
loss: 0.9702882766723633,grad_norm: 0.9121722609209276, iteration: 223410
loss: 1.0247876644134521,grad_norm: 0.993688328345495, iteration: 223411
loss: 0.9959967732429504,grad_norm: 0.9427968159788204, iteration: 223412
loss: 0.9866310358047485,grad_norm: 0.8791107571316605, iteration: 223413
loss: 0.9826587438583374,grad_norm: 0.9869514672889469, iteration: 223414
loss: 1.0004183053970337,grad_norm: 0.9727775386692122, iteration: 223415
loss: 0.9569039940834045,grad_norm: 0.9873466570416842, iteration: 223416
loss: 1.0494540929794312,grad_norm: 0.9999991356914314, iteration: 223417
loss: 1.003883719444275,grad_norm: 0.9484865694711438, iteration: 223418
loss: 0.9504896402359009,grad_norm: 0.8885292252608861, iteration: 223419
loss: 0.9733137488365173,grad_norm: 0.9999990779137802, iteration: 223420
loss: 1.019187569618225,grad_norm: 0.9999991241611438, iteration: 223421
loss: 1.0005382299423218,grad_norm: 0.999999132717438, iteration: 223422
loss: 1.000922441482544,grad_norm: 0.9492787135666232, iteration: 223423
loss: 1.0137372016906738,grad_norm: 0.9251131180365103, iteration: 223424
loss: 1.015523910522461,grad_norm: 0.9796297846852507, iteration: 223425
loss: 1.0329375267028809,grad_norm: 0.9504857054264778, iteration: 223426
loss: 0.9963151812553406,grad_norm: 0.9999991702090425, iteration: 223427
loss: 1.0274726152420044,grad_norm: 0.991638667656555, iteration: 223428
loss: 0.9918493628501892,grad_norm: 0.9999991708755872, iteration: 223429
loss: 0.9809467792510986,grad_norm: 0.9009734328140944, iteration: 223430
loss: 1.0059036016464233,grad_norm: 0.8749561352240024, iteration: 223431
loss: 1.036264181137085,grad_norm: 0.9510504156996586, iteration: 223432
loss: 0.9885771870613098,grad_norm: 0.9846189488820156, iteration: 223433
loss: 0.9900099635124207,grad_norm: 0.8768135658144945, iteration: 223434
loss: 0.9701004028320312,grad_norm: 0.8412020533618508, iteration: 223435
loss: 0.9818288087844849,grad_norm: 0.8642351120368057, iteration: 223436
loss: 1.0111523866653442,grad_norm: 0.7878533204940308, iteration: 223437
loss: 0.9729709029197693,grad_norm: 0.940486905127603, iteration: 223438
loss: 0.9808083176612854,grad_norm: 0.893811521944231, iteration: 223439
loss: 0.9944477677345276,grad_norm: 0.9156392254581318, iteration: 223440
loss: 1.0119483470916748,grad_norm: 0.9691429254575905, iteration: 223441
loss: 0.9937220215797424,grad_norm: 0.8674980616597946, iteration: 223442
loss: 1.0149595737457275,grad_norm: 0.8866971012103044, iteration: 223443
loss: 0.9941152930259705,grad_norm: 0.8253376632675716, iteration: 223444
loss: 1.003427505493164,grad_norm: 0.9999990456671073, iteration: 223445
loss: 0.9872677326202393,grad_norm: 0.8922129169845556, iteration: 223446
loss: 1.006803274154663,grad_norm: 0.8302748306425763, iteration: 223447
loss: 0.9956632256507874,grad_norm: 0.7492322020404395, iteration: 223448
loss: 0.9995989203453064,grad_norm: 0.9999992947922777, iteration: 223449
loss: 0.9999150037765503,grad_norm: 0.9999990755025066, iteration: 223450
loss: 0.9874157309532166,grad_norm: 0.942992691228002, iteration: 223451
loss: 0.9797852039337158,grad_norm: 0.9755637118595417, iteration: 223452
loss: 1.0209652185440063,grad_norm: 0.9527565053879125, iteration: 223453
loss: 0.9617547392845154,grad_norm: 0.99999905138576, iteration: 223454
loss: 1.0093023777008057,grad_norm: 0.7656579670658614, iteration: 223455
loss: 0.9668088555335999,grad_norm: 0.9999991129142615, iteration: 223456
loss: 1.0021281242370605,grad_norm: 0.7859775188143996, iteration: 223457
loss: 0.9983618259429932,grad_norm: 0.9999990573805633, iteration: 223458
loss: 1.011096477508545,grad_norm: 0.9999990691326043, iteration: 223459
loss: 1.0010285377502441,grad_norm: 0.9713662133031755, iteration: 223460
loss: 0.9810141921043396,grad_norm: 0.9837712824745264, iteration: 223461
loss: 1.0085701942443848,grad_norm: 0.9388344737210362, iteration: 223462
loss: 1.0084995031356812,grad_norm: 0.9999991610642993, iteration: 223463
loss: 1.0005855560302734,grad_norm: 0.8046583192304373, iteration: 223464
loss: 0.9766645431518555,grad_norm: 0.8682963534259951, iteration: 223465
loss: 1.0019763708114624,grad_norm: 0.9999990537327733, iteration: 223466
loss: 0.9622205495834351,grad_norm: 0.9653958395380198, iteration: 223467
loss: 0.9939320683479309,grad_norm: 0.9607196907068992, iteration: 223468
loss: 1.018320918083191,grad_norm: 0.9999990016613868, iteration: 223469
loss: 0.9775989055633545,grad_norm: 0.9999997418584504, iteration: 223470
loss: 1.0065112113952637,grad_norm: 0.9780274661696704, iteration: 223471
loss: 0.99050372838974,grad_norm: 0.999999096536535, iteration: 223472
loss: 1.0075269937515259,grad_norm: 0.9999991270876574, iteration: 223473
loss: 0.9650534987449646,grad_norm: 0.868056692090627, iteration: 223474
loss: 1.021917462348938,grad_norm: 0.9999991037862012, iteration: 223475
loss: 1.013466477394104,grad_norm: 0.9408278515258279, iteration: 223476
loss: 1.0111885070800781,grad_norm: 0.9999991658695637, iteration: 223477
loss: 1.0360462665557861,grad_norm: 0.8384622719355375, iteration: 223478
loss: 0.9825312495231628,grad_norm: 0.9999990506788986, iteration: 223479
loss: 1.020432472229004,grad_norm: 0.8992381407889917, iteration: 223480
loss: 0.9816629886627197,grad_norm: 0.9760260062133468, iteration: 223481
loss: 1.0223071575164795,grad_norm: 0.904000875711381, iteration: 223482
loss: 1.0036908388137817,grad_norm: 0.9058693580095185, iteration: 223483
loss: 0.9957436919212341,grad_norm: 0.7888521699652946, iteration: 223484
loss: 1.003991723060608,grad_norm: 0.9999990506615514, iteration: 223485
loss: 0.9826606512069702,grad_norm: 0.9324602674837302, iteration: 223486
loss: 1.000274896621704,grad_norm: 0.9069714668969114, iteration: 223487
loss: 0.99135822057724,grad_norm: 0.9443588070949922, iteration: 223488
loss: 1.0014902353286743,grad_norm: 0.9999991187499718, iteration: 223489
loss: 1.0083321332931519,grad_norm: 0.8757126515082138, iteration: 223490
loss: 1.009337306022644,grad_norm: 0.9999991313227706, iteration: 223491
loss: 0.9672863483428955,grad_norm: 0.9818760707801998, iteration: 223492
loss: 1.0281444787979126,grad_norm: 0.9999998888961259, iteration: 223493
loss: 1.0192679166793823,grad_norm: 0.7867741286395122, iteration: 223494
loss: 0.9917082190513611,grad_norm: 0.9999998951590254, iteration: 223495
loss: 1.0067850351333618,grad_norm: 0.9654436704944318, iteration: 223496
loss: 1.0207586288452148,grad_norm: 0.9999992968094722, iteration: 223497
loss: 0.9656896591186523,grad_norm: 0.8443766579342162, iteration: 223498
loss: 0.978951632976532,grad_norm: 0.9999991783472281, iteration: 223499
loss: 0.9818159341812134,grad_norm: 0.7982648010400563, iteration: 223500
loss: 0.9674802422523499,grad_norm: 0.9140141941822637, iteration: 223501
loss: 1.0016740560531616,grad_norm: 0.9199591980362314, iteration: 223502
loss: 0.982936680316925,grad_norm: 0.8722566741724568, iteration: 223503
loss: 1.0179311037063599,grad_norm: 0.9999990967144853, iteration: 223504
loss: 1.0067578554153442,grad_norm: 0.9999991040910544, iteration: 223505
loss: 0.9992057681083679,grad_norm: 0.9307294326017953, iteration: 223506
loss: 0.9944724440574646,grad_norm: 0.9028045945095232, iteration: 223507
loss: 0.9982223510742188,grad_norm: 0.9536914221554788, iteration: 223508
loss: 1.0013362169265747,grad_norm: 0.78222781256211, iteration: 223509
loss: 0.9998179078102112,grad_norm: 0.9506786852559902, iteration: 223510
loss: 1.013166069984436,grad_norm: 0.9591032356503529, iteration: 223511
loss: 1.0301414728164673,grad_norm: 0.8189877457671495, iteration: 223512
loss: 1.000651478767395,grad_norm: 0.9646852886710128, iteration: 223513
loss: 1.0169408321380615,grad_norm: 0.8168794725884946, iteration: 223514
loss: 0.9820482134819031,grad_norm: 0.7753860218103756, iteration: 223515
loss: 0.996185839176178,grad_norm: 0.9999991472800991, iteration: 223516
loss: 0.9902494549751282,grad_norm: 0.9106032506631898, iteration: 223517
loss: 0.9618946313858032,grad_norm: 0.9488982647907361, iteration: 223518
loss: 0.988346517086029,grad_norm: 0.9999991074670068, iteration: 223519
loss: 1.0113677978515625,grad_norm: 0.8922290426030357, iteration: 223520
loss: 1.0420877933502197,grad_norm: 0.8175875651806143, iteration: 223521
loss: 1.0037641525268555,grad_norm: 0.8350656453718213, iteration: 223522
loss: 1.013416051864624,grad_norm: 0.8561257544062254, iteration: 223523
loss: 1.0199427604675293,grad_norm: 0.9999992010345499, iteration: 223524
loss: 0.9806392192840576,grad_norm: 0.8537774787804756, iteration: 223525
loss: 0.999511182308197,grad_norm: 0.8839483391135685, iteration: 223526
loss: 0.9717099070549011,grad_norm: 0.847837942706873, iteration: 223527
loss: 0.964663565158844,grad_norm: 0.9999989623560044, iteration: 223528
loss: 0.9844162464141846,grad_norm: 0.8921492281962001, iteration: 223529
loss: 0.986477255821228,grad_norm: 0.9181144320685207, iteration: 223530
loss: 1.007891058921814,grad_norm: 0.9510645984907864, iteration: 223531
loss: 0.9827200174331665,grad_norm: 0.9060798217861452, iteration: 223532
loss: 0.9941231608390808,grad_norm: 0.814204888513638, iteration: 223533
loss: 0.975382924079895,grad_norm: 0.9999991770277397, iteration: 223534
loss: 1.0093451738357544,grad_norm: 0.8380584068712946, iteration: 223535
loss: 1.006663203239441,grad_norm: 0.9086646358001568, iteration: 223536
loss: 0.9907709360122681,grad_norm: 0.7694506181471498, iteration: 223537
loss: 0.9992728233337402,grad_norm: 0.99139127128212, iteration: 223538
loss: 1.0328820943832397,grad_norm: 0.9765247968070416, iteration: 223539
loss: 1.0003072023391724,grad_norm: 0.9082756643326553, iteration: 223540
loss: 0.9878788590431213,grad_norm: 0.8812059190787779, iteration: 223541
loss: 0.9800922274589539,grad_norm: 0.9289091862379709, iteration: 223542
loss: 1.0091853141784668,grad_norm: 0.9327960071742499, iteration: 223543
loss: 0.9968093633651733,grad_norm: 0.8314192976260989, iteration: 223544
loss: 0.9677749276161194,grad_norm: 0.8498180351199256, iteration: 223545
loss: 0.9861769080162048,grad_norm: 0.8776126124374708, iteration: 223546
loss: 1.006151556968689,grad_norm: 0.9999991639747554, iteration: 223547
loss: 0.9863407015800476,grad_norm: 0.8174177308319656, iteration: 223548
loss: 0.9950708746910095,grad_norm: 0.9962516650482808, iteration: 223549
loss: 0.9813783764839172,grad_norm: 0.869505432178156, iteration: 223550
loss: 0.9608798623085022,grad_norm: 0.9999992570113637, iteration: 223551
loss: 1.0202833414077759,grad_norm: 0.9978770269633401, iteration: 223552
loss: 0.9933767914772034,grad_norm: 0.9118608445805158, iteration: 223553
loss: 1.0155837535858154,grad_norm: 0.9711392143089217, iteration: 223554
loss: 1.0097134113311768,grad_norm: 0.972229955238928, iteration: 223555
loss: 0.9906408190727234,grad_norm: 0.9239928839958211, iteration: 223556
loss: 0.980482816696167,grad_norm: 0.8109575148904064, iteration: 223557
loss: 0.9831891655921936,grad_norm: 0.999999183439396, iteration: 223558
loss: 0.9692025184631348,grad_norm: 0.8046217023026273, iteration: 223559
loss: 0.9954584836959839,grad_norm: 0.9700022237930697, iteration: 223560
loss: 0.9934139847755432,grad_norm: 0.966832701832642, iteration: 223561
loss: 1.0254604816436768,grad_norm: 0.8279259885564921, iteration: 223562
loss: 0.9998701214790344,grad_norm: 0.9592615165319681, iteration: 223563
loss: 0.9871708154678345,grad_norm: 0.9999991876483864, iteration: 223564
loss: 0.9923122525215149,grad_norm: 0.9780252936019557, iteration: 223565
loss: 0.9774349927902222,grad_norm: 0.8821359351098316, iteration: 223566
loss: 0.9596673846244812,grad_norm: 0.8143556750172065, iteration: 223567
loss: 1.0018941164016724,grad_norm: 0.9999989441629271, iteration: 223568
loss: 0.9916476607322693,grad_norm: 0.8614922314897294, iteration: 223569
loss: 1.008266806602478,grad_norm: 0.9999989914616959, iteration: 223570
loss: 1.0137059688568115,grad_norm: 0.7306466390472482, iteration: 223571
loss: 1.037591814994812,grad_norm: 0.8984750183172296, iteration: 223572
loss: 0.9887095093727112,grad_norm: 0.9292682156141981, iteration: 223573
loss: 1.0717148780822754,grad_norm: 0.9999990206369047, iteration: 223574
loss: 0.9946975708007812,grad_norm: 0.7817842960957191, iteration: 223575
loss: 1.0186694860458374,grad_norm: 0.8997073502033925, iteration: 223576
loss: 1.0513639450073242,grad_norm: 0.8891465634054971, iteration: 223577
loss: 1.0211031436920166,grad_norm: 0.9735076619785121, iteration: 223578
loss: 1.0093244314193726,grad_norm: 0.9251546889169099, iteration: 223579
loss: 1.0134128332138062,grad_norm: 0.7566341146310238, iteration: 223580
loss: 0.9987464547157288,grad_norm: 0.9999991661996221, iteration: 223581
loss: 1.0216271877288818,grad_norm: 0.8075089154974896, iteration: 223582
loss: 1.0220377445220947,grad_norm: 0.896574130752813, iteration: 223583
loss: 1.0221941471099854,grad_norm: 0.8534148763650437, iteration: 223584
loss: 0.9874415993690491,grad_norm: 0.9550112051257539, iteration: 223585
loss: 1.0253323316574097,grad_norm: 0.9515733375754221, iteration: 223586
loss: 0.99628084897995,grad_norm: 0.9123087761241668, iteration: 223587
loss: 0.9913809299468994,grad_norm: 0.8533579369377695, iteration: 223588
loss: 1.0197100639343262,grad_norm: 0.9200489700305206, iteration: 223589
loss: 1.088037133216858,grad_norm: 0.9999991143573763, iteration: 223590
loss: 1.0297770500183105,grad_norm: 0.999999303026896, iteration: 223591
loss: 0.9911315441131592,grad_norm: 0.9999996032196712, iteration: 223592
loss: 0.9712960124015808,grad_norm: 0.9158666031624066, iteration: 223593
loss: 0.9571161866188049,grad_norm: 0.765756339214729, iteration: 223594
loss: 1.0298089981079102,grad_norm: 0.9999990875726933, iteration: 223595
loss: 0.998783528804779,grad_norm: 0.8506802658752508, iteration: 223596
loss: 1.0165319442749023,grad_norm: 0.8495301613487007, iteration: 223597
loss: 0.9945224523544312,grad_norm: 0.8216033443805504, iteration: 223598
loss: 1.0101252794265747,grad_norm: 0.8694439430961095, iteration: 223599
loss: 0.9720967411994934,grad_norm: 0.8941173465731806, iteration: 223600
loss: 0.9808602929115295,grad_norm: 0.870826741784728, iteration: 223601
loss: 0.9888031482696533,grad_norm: 0.8969784197546781, iteration: 223602
loss: 0.9640229940414429,grad_norm: 0.9952015004671244, iteration: 223603
loss: 0.9519354701042175,grad_norm: 0.8062925055618673, iteration: 223604
loss: 1.0124602317810059,grad_norm: 0.8152682047033236, iteration: 223605
loss: 1.0082651376724243,grad_norm: 0.8092731316585319, iteration: 223606
loss: 0.9965265393257141,grad_norm: 0.837760257934036, iteration: 223607
loss: 0.9492543935775757,grad_norm: 0.9974147350915067, iteration: 223608
loss: 0.973804235458374,grad_norm: 0.9999992291708681, iteration: 223609
loss: 0.9938740730285645,grad_norm: 0.9999990147268065, iteration: 223610
loss: 1.0562502145767212,grad_norm: 0.8564592534846349, iteration: 223611
loss: 0.9879350066184998,grad_norm: 0.9451949625940866, iteration: 223612
loss: 1.0292384624481201,grad_norm: 0.9999991019192904, iteration: 223613
loss: 0.9882306456565857,grad_norm: 0.8093460903465121, iteration: 223614
loss: 1.028896450996399,grad_norm: 0.9999992000211737, iteration: 223615
loss: 1.1266148090362549,grad_norm: 1.0000000319497466, iteration: 223616
loss: 0.9648635387420654,grad_norm: 0.8637403862367828, iteration: 223617
loss: 0.99021977186203,grad_norm: 0.9999996764063522, iteration: 223618
loss: 0.9879345893859863,grad_norm: 0.9586909254387085, iteration: 223619
loss: 1.0244622230529785,grad_norm: 0.9999992576975673, iteration: 223620
loss: 0.9937523007392883,grad_norm: 0.8931822901250834, iteration: 223621
loss: 0.9996185302734375,grad_norm: 0.9999992241595483, iteration: 223622
loss: 1.02428138256073,grad_norm: 0.9999990584788172, iteration: 223623
loss: 0.9614995121955872,grad_norm: 0.7886979857766842, iteration: 223624
loss: 1.0097805261611938,grad_norm: 0.99999901515711, iteration: 223625
loss: 1.0299389362335205,grad_norm: 0.9999990567330818, iteration: 223626
loss: 1.0261585712432861,grad_norm: 0.8425233021011317, iteration: 223627
loss: 1.0341702699661255,grad_norm: 0.9999990880147696, iteration: 223628
loss: 0.9811351299285889,grad_norm: 0.9566607591792716, iteration: 223629
loss: 0.989716649055481,grad_norm: 0.9999989304436157, iteration: 223630
loss: 1.0621402263641357,grad_norm: 0.9179406140388953, iteration: 223631
loss: 1.0109385251998901,grad_norm: 0.8111324601410197, iteration: 223632
loss: 1.0163100957870483,grad_norm: 0.9999990409160144, iteration: 223633
loss: 0.9907582998275757,grad_norm: 0.9999992580070306, iteration: 223634
loss: 0.9980196356773376,grad_norm: 0.9999990127432177, iteration: 223635
loss: 0.9817803502082825,grad_norm: 0.8044541582832818, iteration: 223636
loss: 1.0111368894577026,grad_norm: 0.9999991048239132, iteration: 223637
loss: 0.9956129193305969,grad_norm: 0.9641151833518851, iteration: 223638
loss: 0.9748043417930603,grad_norm: 0.7407232347760165, iteration: 223639
loss: 0.987571656703949,grad_norm: 0.9081155868983288, iteration: 223640
loss: 0.9870428442955017,grad_norm: 0.9999990359766172, iteration: 223641
loss: 0.9927387833595276,grad_norm: 0.9999990080503182, iteration: 223642
loss: 0.9779298901557922,grad_norm: 0.7813507205598634, iteration: 223643
loss: 1.0115631818771362,grad_norm: 0.9074240060817378, iteration: 223644
loss: 0.9776695966720581,grad_norm: 0.9999991183167211, iteration: 223645
loss: 0.9930571913719177,grad_norm: 0.9643200055275726, iteration: 223646
loss: 0.9575819969177246,grad_norm: 0.9999991246240653, iteration: 223647
loss: 0.9629784822463989,grad_norm: 0.9868406339234629, iteration: 223648
loss: 0.9889715313911438,grad_norm: 0.9999990821331389, iteration: 223649
loss: 0.9996559023857117,grad_norm: 0.8939038826297514, iteration: 223650
loss: 1.0110766887664795,grad_norm: 0.9999992814018454, iteration: 223651
loss: 1.0060170888900757,grad_norm: 0.9636414480052383, iteration: 223652
loss: 1.0111033916473389,grad_norm: 0.8716172794492468, iteration: 223653
loss: 1.0576096773147583,grad_norm: 0.9999995349294675, iteration: 223654
loss: 0.9961326122283936,grad_norm: 0.9999991027212968, iteration: 223655
loss: 0.9959165453910828,grad_norm: 0.8897752380992349, iteration: 223656
loss: 0.9959554672241211,grad_norm: 0.9999990463069486, iteration: 223657
loss: 1.0194507837295532,grad_norm: 0.9999994649103243, iteration: 223658
loss: 0.9851650595664978,grad_norm: 0.820705138963086, iteration: 223659
loss: 1.0131019353866577,grad_norm: 0.808755714145528, iteration: 223660
loss: 1.033557653427124,grad_norm: 0.9999992026805224, iteration: 223661
loss: 0.9905104041099548,grad_norm: 0.8268504510003133, iteration: 223662
loss: 1.008228063583374,grad_norm: 0.9999991424955005, iteration: 223663
loss: 0.9937930107116699,grad_norm: 0.9999991205931548, iteration: 223664
loss: 0.9881781935691833,grad_norm: 0.8356314468910198, iteration: 223665
loss: 0.9667024612426758,grad_norm: 0.9808199974253251, iteration: 223666
loss: 1.008344292640686,grad_norm: 0.9999832366786007, iteration: 223667
loss: 0.9775949716567993,grad_norm: 0.9508848242329184, iteration: 223668
loss: 1.0185822248458862,grad_norm: 0.9354311909133024, iteration: 223669
loss: 1.0216349363327026,grad_norm: 0.9433624638753827, iteration: 223670
loss: 1.0297155380249023,grad_norm: 0.999999029373872, iteration: 223671
loss: 0.998312771320343,grad_norm: 0.9999990874726155, iteration: 223672
loss: 1.0022584199905396,grad_norm: 0.8018849740947283, iteration: 223673
loss: 0.9563856720924377,grad_norm: 0.9192760506243313, iteration: 223674
loss: 0.9782920479774475,grad_norm: 0.8635220446641987, iteration: 223675
loss: 1.0374128818511963,grad_norm: 0.8995608437069816, iteration: 223676
loss: 0.9875755310058594,grad_norm: 0.9999990519180945, iteration: 223677
loss: 1.0158108472824097,grad_norm: 0.9807478073747005, iteration: 223678
loss: 0.9742572903633118,grad_norm: 0.9292355893097229, iteration: 223679
loss: 1.0255826711654663,grad_norm: 0.999999557312918, iteration: 223680
loss: 0.9653542637825012,grad_norm: 0.9099080305747559, iteration: 223681
loss: 1.0243664979934692,grad_norm: 0.9999991442757946, iteration: 223682
loss: 1.0128095149993896,grad_norm: 0.9100295894645136, iteration: 223683
loss: 0.974490225315094,grad_norm: 0.9508562678323427, iteration: 223684
loss: 1.004036784172058,grad_norm: 0.9999991255447179, iteration: 223685
loss: 0.9985132217407227,grad_norm: 0.8317122211928925, iteration: 223686
loss: 1.0019409656524658,grad_norm: 0.9178907732141233, iteration: 223687
loss: 1.0001215934753418,grad_norm: 0.9999992197010923, iteration: 223688
loss: 0.9931663274765015,grad_norm: 0.9446275431532777, iteration: 223689
loss: 0.9694210290908813,grad_norm: 0.7684974487456733, iteration: 223690
loss: 0.990214467048645,grad_norm: 0.9740530368059408, iteration: 223691
loss: 1.0199713706970215,grad_norm: 0.9999991757906869, iteration: 223692
loss: 0.9989054799079895,grad_norm: 0.962280626642034, iteration: 223693
loss: 0.997566819190979,grad_norm: 0.999999127579699, iteration: 223694
loss: 1.0277796983718872,grad_norm: 0.9410569891187983, iteration: 223695
loss: 0.9907239079475403,grad_norm: 0.94531100215614, iteration: 223696
loss: 1.0185940265655518,grad_norm: 0.999999154710713, iteration: 223697
loss: 0.982586681842804,grad_norm: 0.8748465687325928, iteration: 223698
loss: 1.00117027759552,grad_norm: 0.9343701619732021, iteration: 223699
loss: 1.0048699378967285,grad_norm: 0.7069623971293192, iteration: 223700
loss: 1.002681851387024,grad_norm: 0.884153997374478, iteration: 223701
loss: 1.021302342414856,grad_norm: 0.9999992761663673, iteration: 223702
loss: 1.0005522966384888,grad_norm: 0.9214776000609554, iteration: 223703
loss: 0.9771531820297241,grad_norm: 0.727689083374712, iteration: 223704
loss: 0.9781768321990967,grad_norm: 0.9999991235704221, iteration: 223705
loss: 0.9890996217727661,grad_norm: 0.9999991536759383, iteration: 223706
loss: 0.991631031036377,grad_norm: 0.8017097356366164, iteration: 223707
loss: 0.9585155248641968,grad_norm: 0.8997286195353407, iteration: 223708
loss: 0.9549042582511902,grad_norm: 0.8498200560410565, iteration: 223709
loss: 0.983303964138031,grad_norm: 0.9999990041140906, iteration: 223710
loss: 0.9711942076683044,grad_norm: 0.9029994847545222, iteration: 223711
loss: 1.0031312704086304,grad_norm: 0.7560084196102939, iteration: 223712
loss: 0.9741178154945374,grad_norm: 0.9331711839611059, iteration: 223713
loss: 0.9466801881790161,grad_norm: 0.9999990589012278, iteration: 223714
loss: 0.9909502267837524,grad_norm: 0.9042861004409584, iteration: 223715
loss: 1.0309312343597412,grad_norm: 0.8991861148637262, iteration: 223716
loss: 1.0033468008041382,grad_norm: 0.9568779488493877, iteration: 223717
loss: 1.0378327369689941,grad_norm: 0.9999990968546524, iteration: 223718
loss: 1.0241787433624268,grad_norm: 0.9819338719775409, iteration: 223719
loss: 1.0300612449645996,grad_norm: 0.999999128760938, iteration: 223720
loss: 1.0065184831619263,grad_norm: 0.8882211559666376, iteration: 223721
loss: 1.0259835720062256,grad_norm: 0.7950190379543794, iteration: 223722
loss: 1.0510241985321045,grad_norm: 0.9999991613088062, iteration: 223723
loss: 0.9951239824295044,grad_norm: 0.9505538925165465, iteration: 223724
loss: 1.0304676294326782,grad_norm: 0.999999185617558, iteration: 223725
loss: 0.9881893992424011,grad_norm: 0.838480491945659, iteration: 223726
loss: 1.0293418169021606,grad_norm: 0.9999991636818957, iteration: 223727
loss: 0.9336185455322266,grad_norm: 0.9669923425655076, iteration: 223728
loss: 0.9780005216598511,grad_norm: 0.9369956469086218, iteration: 223729
loss: 0.9897508025169373,grad_norm: 0.9297665249714293, iteration: 223730
loss: 1.0602717399597168,grad_norm: 0.9999992183972923, iteration: 223731
loss: 0.9678688645362854,grad_norm: 0.9093968577046673, iteration: 223732
loss: 0.9826231598854065,grad_norm: 0.8220591320888204, iteration: 223733
loss: 1.0402946472167969,grad_norm: 0.9999998814307198, iteration: 223734
loss: 0.9630404114723206,grad_norm: 0.8831443265334441, iteration: 223735
loss: 0.9736999273300171,grad_norm: 0.8725179989132159, iteration: 223736
loss: 1.0298850536346436,grad_norm: 0.838680300385751, iteration: 223737
loss: 0.9980989098548889,grad_norm: 0.9022320046417506, iteration: 223738
loss: 1.0034674406051636,grad_norm: 0.9922060490895651, iteration: 223739
loss: 0.9994931817054749,grad_norm: 0.999999840901612, iteration: 223740
loss: 1.0453071594238281,grad_norm: 0.9597765306917164, iteration: 223741
loss: 1.0207284688949585,grad_norm: 0.899711358660045, iteration: 223742
loss: 0.9899643063545227,grad_norm: 0.9199739534673239, iteration: 223743
loss: 1.0342015027999878,grad_norm: 0.9036850259978197, iteration: 223744
loss: 0.9583877325057983,grad_norm: 0.9235512607994365, iteration: 223745
loss: 0.9952640533447266,grad_norm: 0.890273338780699, iteration: 223746
loss: 0.9907926321029663,grad_norm: 0.9188490721769876, iteration: 223747
loss: 1.0366798639297485,grad_norm: 0.999999866817734, iteration: 223748
loss: 1.0026768445968628,grad_norm: 0.9110063187820009, iteration: 223749
loss: 1.0255967378616333,grad_norm: 0.8916351678715898, iteration: 223750
loss: 1.0291252136230469,grad_norm: 0.9065962081116582, iteration: 223751
loss: 0.9487638473510742,grad_norm: 0.837425501160688, iteration: 223752
loss: 1.008425235748291,grad_norm: 0.9999990432671257, iteration: 223753
loss: 1.0120936632156372,grad_norm: 0.9999992232387585, iteration: 223754
loss: 1.0203765630722046,grad_norm: 0.8898373280067463, iteration: 223755
loss: 0.9466918706893921,grad_norm: 0.9999991291758424, iteration: 223756
loss: 0.9797409176826477,grad_norm: 0.7635698938621657, iteration: 223757
loss: 1.029708743095398,grad_norm: 0.9849281462184897, iteration: 223758
loss: 0.9855365753173828,grad_norm: 0.9999990327062172, iteration: 223759
loss: 0.9529721736907959,grad_norm: 0.9343204969081936, iteration: 223760
loss: 0.9903554320335388,grad_norm: 0.9819997862776816, iteration: 223761
loss: 0.9978883862495422,grad_norm: 0.9338491008147252, iteration: 223762
loss: 0.9954162240028381,grad_norm: 0.8958802168486579, iteration: 223763
loss: 1.0193983316421509,grad_norm: 0.8913029147453425, iteration: 223764
loss: 1.0029568672180176,grad_norm: 0.9999990289186623, iteration: 223765
loss: 0.9541316032409668,grad_norm: 0.9999991668470424, iteration: 223766
loss: 0.9916530847549438,grad_norm: 0.8660697178882383, iteration: 223767
loss: 1.0093693733215332,grad_norm: 0.7950309730517131, iteration: 223768
loss: 1.0058300495147705,grad_norm: 0.9999990706364131, iteration: 223769
loss: 1.027623176574707,grad_norm: 0.9999990417535212, iteration: 223770
loss: 0.9862017035484314,grad_norm: 0.9583315628013895, iteration: 223771
loss: 1.019180178642273,grad_norm: 0.7948075159008727, iteration: 223772
loss: 1.0432177782058716,grad_norm: 0.9999990387416869, iteration: 223773
loss: 0.9973131418228149,grad_norm: 0.9291563886417208, iteration: 223774
loss: 1.0051233768463135,grad_norm: 0.8703267609556745, iteration: 223775
loss: 1.00422203540802,grad_norm: 0.8775878937190522, iteration: 223776
loss: 1.0251585245132446,grad_norm: 0.7872411532254303, iteration: 223777
loss: 1.005283236503601,grad_norm: 0.9086371340610644, iteration: 223778
loss: 0.972814679145813,grad_norm: 0.7451787575289271, iteration: 223779
loss: 1.020478367805481,grad_norm: 0.9270901864497864, iteration: 223780
loss: 1.007187008857727,grad_norm: 0.9999989488664107, iteration: 223781
loss: 0.9810473918914795,grad_norm: 0.920142272331943, iteration: 223782
loss: 0.9598977565765381,grad_norm: 0.8721479541263455, iteration: 223783
loss: 1.0201221704483032,grad_norm: 0.9999990793810463, iteration: 223784
loss: 1.0199753046035767,grad_norm: 0.8360095550693745, iteration: 223785
loss: 0.9944324493408203,grad_norm: 0.8890672624506387, iteration: 223786
loss: 1.0061726570129395,grad_norm: 0.9999991286614498, iteration: 223787
loss: 0.9975541234016418,grad_norm: 0.9454596691473938, iteration: 223788
loss: 1.001002311706543,grad_norm: 0.9100712870409993, iteration: 223789
loss: 1.0264286994934082,grad_norm: 0.8485141818657455, iteration: 223790
loss: 1.0540250539779663,grad_norm: 0.9999990173835583, iteration: 223791
loss: 0.9931581616401672,grad_norm: 0.99999910375233, iteration: 223792
loss: 1.018810749053955,grad_norm: 0.9820426237352553, iteration: 223793
loss: 0.9964304566383362,grad_norm: 0.9353441553778712, iteration: 223794
loss: 0.9891679883003235,grad_norm: 0.8645017823700265, iteration: 223795
loss: 0.9754691123962402,grad_norm: 0.999998997876789, iteration: 223796
loss: 0.9866628050804138,grad_norm: 0.9999990360794893, iteration: 223797
loss: 0.981303334236145,grad_norm: 0.8985530469765632, iteration: 223798
loss: 0.9977440237998962,grad_norm: 0.9600219774553955, iteration: 223799
loss: 0.969778835773468,grad_norm: 0.851206749979467, iteration: 223800
loss: 0.9830747842788696,grad_norm: 0.888545099276599, iteration: 223801
loss: 0.9762636423110962,grad_norm: 0.9999991277020001, iteration: 223802
loss: 1.0044127702713013,grad_norm: 0.9516802599581405, iteration: 223803
loss: 0.99931800365448,grad_norm: 0.9855441311022209, iteration: 223804
loss: 1.0041934251785278,grad_norm: 0.8836804276155996, iteration: 223805
loss: 1.0117613077163696,grad_norm: 0.9999990425071007, iteration: 223806
loss: 0.9741262793540955,grad_norm: 0.8362461781703753, iteration: 223807
loss: 0.9948816895484924,grad_norm: 0.9409859609590785, iteration: 223808
loss: 1.0077660083770752,grad_norm: 0.9745316082945344, iteration: 223809
loss: 0.9950651526451111,grad_norm: 0.9011600097851412, iteration: 223810
loss: 1.0049315690994263,grad_norm: 0.9999990440752197, iteration: 223811
loss: 1.0126492977142334,grad_norm: 0.9999991900326977, iteration: 223812
loss: 0.9787861108779907,grad_norm: 0.9176478231433689, iteration: 223813
loss: 0.9964175224304199,grad_norm: 0.9999991975662562, iteration: 223814
loss: 1.033372163772583,grad_norm: 0.9328130485782609, iteration: 223815
loss: 0.9918179512023926,grad_norm: 0.8901840294402426, iteration: 223816
loss: 1.0300178527832031,grad_norm: 0.8417819720737001, iteration: 223817
loss: 1.0340226888656616,grad_norm: 0.9999988967775846, iteration: 223818
loss: 1.0010604858398438,grad_norm: 0.9880885819652537, iteration: 223819
loss: 1.002825379371643,grad_norm: 0.8593819948012447, iteration: 223820
loss: 0.992837131023407,grad_norm: 0.9671225813955001, iteration: 223821
loss: 0.9890773296356201,grad_norm: 0.8306271401418653, iteration: 223822
loss: 1.0203174352645874,grad_norm: 0.8410918386694622, iteration: 223823
loss: 1.0023105144500732,grad_norm: 0.9999989820897426, iteration: 223824
loss: 0.9876832962036133,grad_norm: 0.9543835481775446, iteration: 223825
loss: 0.9552497267723083,grad_norm: 0.9268891599410161, iteration: 223826
loss: 1.0286163091659546,grad_norm: 0.8039384744026163, iteration: 223827
loss: 0.9647020697593689,grad_norm: 0.8763164919058339, iteration: 223828
loss: 0.9628906846046448,grad_norm: 0.999999284018088, iteration: 223829
loss: 0.9561156034469604,grad_norm: 0.9637123251415329, iteration: 223830
loss: 1.0168226957321167,grad_norm: 0.796904656240421, iteration: 223831
loss: 0.9736731648445129,grad_norm: 0.8159196045125549, iteration: 223832
loss: 1.0134464502334595,grad_norm: 0.9041406012800336, iteration: 223833
loss: 0.9922377467155457,grad_norm: 0.9999990548661696, iteration: 223834
loss: 0.9471476078033447,grad_norm: 0.8758983965889752, iteration: 223835
loss: 1.0227482318878174,grad_norm: 0.9999991860000729, iteration: 223836
loss: 0.9824691414833069,grad_norm: 0.9999991696262279, iteration: 223837
loss: 1.0225833654403687,grad_norm: 0.9999989840905957, iteration: 223838
loss: 1.0367776155471802,grad_norm: 0.9999993556040724, iteration: 223839
loss: 0.9794595241546631,grad_norm: 0.9556261264865447, iteration: 223840
loss: 0.9857550859451294,grad_norm: 0.7913441769065116, iteration: 223841
loss: 0.96487957239151,grad_norm: 0.7988737524471514, iteration: 223842
loss: 0.9862064123153687,grad_norm: 0.8176478367661794, iteration: 223843
loss: 0.9934348464012146,grad_norm: 0.9323233165307395, iteration: 223844
loss: 0.9592478275299072,grad_norm: 0.8771614396813933, iteration: 223845
loss: 0.9915246367454529,grad_norm: 0.9621215982027147, iteration: 223846
loss: 1.0063916444778442,grad_norm: 0.9999992289957643, iteration: 223847
loss: 1.0036733150482178,grad_norm: 0.8547532506394726, iteration: 223848
loss: 0.9985880851745605,grad_norm: 0.9999991104680895, iteration: 223849
loss: 0.9518746733665466,grad_norm: 0.9339121120800665, iteration: 223850
loss: 0.9792433977127075,grad_norm: 0.9999990430226642, iteration: 223851
loss: 0.9825984835624695,grad_norm: 0.8797072939567735, iteration: 223852
loss: 1.0110217332839966,grad_norm: 0.864938124508288, iteration: 223853
loss: 0.9791358113288879,grad_norm: 0.7588912567863471, iteration: 223854
loss: 1.0034515857696533,grad_norm: 0.9999990013804618, iteration: 223855
loss: 0.9971114993095398,grad_norm: 0.9383523698276736, iteration: 223856
loss: 0.9804319739341736,grad_norm: 0.9999992856953165, iteration: 223857
loss: 0.9608559012413025,grad_norm: 0.9955738716174328, iteration: 223858
loss: 1.0251593589782715,grad_norm: 0.9013105240971616, iteration: 223859
loss: 0.991430401802063,grad_norm: 0.8733612926066286, iteration: 223860
loss: 0.9913508892059326,grad_norm: 0.9999990979848568, iteration: 223861
loss: 1.0010695457458496,grad_norm: 0.7937102382538943, iteration: 223862
loss: 0.995877206325531,grad_norm: 0.9006773030541398, iteration: 223863
loss: 0.9778226613998413,grad_norm: 0.9999990027228615, iteration: 223864
loss: 0.9503324627876282,grad_norm: 0.8056959052955311, iteration: 223865
loss: 0.9862255454063416,grad_norm: 0.8719746875770448, iteration: 223866
loss: 0.9474849700927734,grad_norm: 0.9496706431852086, iteration: 223867
loss: 1.0250684022903442,grad_norm: 0.9866073873120449, iteration: 223868
loss: 1.0349419116973877,grad_norm: 0.9999991888763076, iteration: 223869
loss: 0.9810341596603394,grad_norm: 0.8775338199032398, iteration: 223870
loss: 0.997215211391449,grad_norm: 0.9220266147306232, iteration: 223871
loss: 0.9894741177558899,grad_norm: 0.9999991472602708, iteration: 223872
loss: 1.0171178579330444,grad_norm: 0.9999994650924342, iteration: 223873
loss: 0.9897717237472534,grad_norm: 0.8932425026904424, iteration: 223874
loss: 0.9864433407783508,grad_norm: 0.8525985549997734, iteration: 223875
loss: 0.9980390071868896,grad_norm: 0.9420852203922911, iteration: 223876
loss: 0.9797614216804504,grad_norm: 0.9153075782791209, iteration: 223877
loss: 1.023525595664978,grad_norm: 0.8601906145021299, iteration: 223878
loss: 0.9841372966766357,grad_norm: 0.9595630703684963, iteration: 223879
loss: 1.0198363065719604,grad_norm: 0.9999990722615637, iteration: 223880
loss: 1.017463207244873,grad_norm: 0.9999990251018566, iteration: 223881
loss: 0.9984465837478638,grad_norm: 0.9999989940098805, iteration: 223882
loss: 1.0116658210754395,grad_norm: 0.9302253519858581, iteration: 223883
loss: 0.9974134564399719,grad_norm: 0.8590722260319109, iteration: 223884
loss: 0.970995306968689,grad_norm: 0.9999990161900905, iteration: 223885
loss: 0.9815595746040344,grad_norm: 0.9999990879900782, iteration: 223886
loss: 0.9968343377113342,grad_norm: 0.7522591173660155, iteration: 223887
loss: 0.947202742099762,grad_norm: 0.9208901624151133, iteration: 223888
loss: 0.9578936100006104,grad_norm: 0.9225715254074544, iteration: 223889
loss: 0.9761407971382141,grad_norm: 0.8782292057878868, iteration: 223890
loss: 0.9986222386360168,grad_norm: 0.9999996526883214, iteration: 223891
loss: 1.007098913192749,grad_norm: 0.9999997569194332, iteration: 223892
loss: 1.038642168045044,grad_norm: 0.8797920708574914, iteration: 223893
loss: 1.0255038738250732,grad_norm: 0.9059272975339193, iteration: 223894
loss: 1.037334680557251,grad_norm: 0.9596449056089261, iteration: 223895
loss: 1.0046160221099854,grad_norm: 0.9999991151739798, iteration: 223896
loss: 1.0261359214782715,grad_norm: 0.999999143709444, iteration: 223897
loss: 1.0144610404968262,grad_norm: 0.9999989774721705, iteration: 223898
loss: 1.000738501548767,grad_norm: 0.8376035080300364, iteration: 223899
loss: 0.998187780380249,grad_norm: 0.9999990936299527, iteration: 223900
loss: 0.9974968433380127,grad_norm: 0.9550992716875173, iteration: 223901
loss: 1.0431526899337769,grad_norm: 0.9593996871669801, iteration: 223902
loss: 1.008891224861145,grad_norm: 0.7864778050297935, iteration: 223903
loss: 0.9952460527420044,grad_norm: 0.9999991930865237, iteration: 223904
loss: 1.0176295042037964,grad_norm: 0.8947650873272689, iteration: 223905
loss: 1.0165607929229736,grad_norm: 0.9999996948235607, iteration: 223906
loss: 0.9992215037345886,grad_norm: 0.8405765513448403, iteration: 223907
loss: 0.9958431720733643,grad_norm: 0.9806588272830714, iteration: 223908
loss: 1.0218234062194824,grad_norm: 0.9550058373588922, iteration: 223909
loss: 1.0028302669525146,grad_norm: 0.9999989492251754, iteration: 223910
loss: 0.974368155002594,grad_norm: 0.9563349682280052, iteration: 223911
loss: 1.0358359813690186,grad_norm: 0.9389456729573304, iteration: 223912
loss: 0.9872861504554749,grad_norm: 0.9740838885565941, iteration: 223913
loss: 0.9647728204727173,grad_norm: 0.9999990606410765, iteration: 223914
loss: 1.0279947519302368,grad_norm: 0.999999138245045, iteration: 223915
loss: 1.0009173154830933,grad_norm: 0.8792270742939062, iteration: 223916
loss: 0.9943181276321411,grad_norm: 0.9349752082442442, iteration: 223917
loss: 1.0197737216949463,grad_norm: 0.9081076018296852, iteration: 223918
loss: 1.0079166889190674,grad_norm: 0.9999992104162786, iteration: 223919
loss: 0.9755561351776123,grad_norm: 0.9999998782367924, iteration: 223920
loss: 0.9941865801811218,grad_norm: 0.9896780350841259, iteration: 223921
loss: 0.9726380705833435,grad_norm: 0.9270854862967838, iteration: 223922
loss: 1.0034232139587402,grad_norm: 0.9279607159192359, iteration: 223923
loss: 0.9942156672477722,grad_norm: 0.8215719856719369, iteration: 223924
loss: 1.0065834522247314,grad_norm: 0.9999991443777195, iteration: 223925
loss: 1.0023703575134277,grad_norm: 0.9628593180681246, iteration: 223926
loss: 0.9829686880111694,grad_norm: 0.9999990333790172, iteration: 223927
loss: 1.0013175010681152,grad_norm: 0.9356692377153034, iteration: 223928
loss: 0.9912747144699097,grad_norm: 0.9999991058772174, iteration: 223929
loss: 0.9677126407623291,grad_norm: 0.9999991169791838, iteration: 223930
loss: 1.0188469886779785,grad_norm: 0.9999994933286034, iteration: 223931
loss: 0.953465461730957,grad_norm: 0.9986427215455993, iteration: 223932
loss: 0.9873900413513184,grad_norm: 0.8546226071168941, iteration: 223933
loss: 1.0511332750320435,grad_norm: 0.9999995287521413, iteration: 223934
loss: 1.0023407936096191,grad_norm: 0.9995746567317247, iteration: 223935
loss: 1.0035396814346313,grad_norm: 0.8518363842537996, iteration: 223936
loss: 1.0018254518508911,grad_norm: 0.9999991511815937, iteration: 223937
loss: 1.0018984079360962,grad_norm: 0.893334545937656, iteration: 223938
loss: 1.0451719760894775,grad_norm: 0.9999990596648128, iteration: 223939
loss: 1.0034544467926025,grad_norm: 0.9188403931331821, iteration: 223940
loss: 1.0572917461395264,grad_norm: 0.9999998921628215, iteration: 223941
loss: 0.9562254548072815,grad_norm: 0.7247553858249772, iteration: 223942
loss: 1.0020039081573486,grad_norm: 0.9040445036184925, iteration: 223943
loss: 1.0667712688446045,grad_norm: 0.9367056955238108, iteration: 223944
loss: 0.9883949160575867,grad_norm: 0.9999991552393684, iteration: 223945
loss: 1.0085302591323853,grad_norm: 0.7952412691574038, iteration: 223946
loss: 1.0008246898651123,grad_norm: 0.8227418952330711, iteration: 223947
loss: 1.032677173614502,grad_norm: 0.999999079462622, iteration: 223948
loss: 1.0272657871246338,grad_norm: 0.8112254670591549, iteration: 223949
loss: 1.01339590549469,grad_norm: 0.9999991547789692, iteration: 223950
loss: 1.003555178642273,grad_norm: 0.9999990922548407, iteration: 223951
loss: 0.9899148941040039,grad_norm: 0.8710683717982709, iteration: 223952
loss: 1.0024083852767944,grad_norm: 0.9612736827782536, iteration: 223953
loss: 0.9727921485900879,grad_norm: 0.8907971464775256, iteration: 223954
loss: 0.9610484838485718,grad_norm: 0.9999991464926754, iteration: 223955
loss: 1.043774962425232,grad_norm: 0.7720999195704651, iteration: 223956
loss: 1.0236867666244507,grad_norm: 0.9230865667495295, iteration: 223957
loss: 1.002821445465088,grad_norm: 0.9460932546216453, iteration: 223958
loss: 1.0070085525512695,grad_norm: 0.9999998075719257, iteration: 223959
loss: 0.9651365280151367,grad_norm: 0.8331639951816276, iteration: 223960
loss: 0.9501795768737793,grad_norm: 0.946492695999037, iteration: 223961
loss: 1.0188349485397339,grad_norm: 0.842116880669465, iteration: 223962
loss: 0.9825935959815979,grad_norm: 0.93325076201371, iteration: 223963
loss: 1.0000969171524048,grad_norm: 0.999999069401382, iteration: 223964
loss: 1.0091736316680908,grad_norm: 0.7663177309586, iteration: 223965
loss: 0.968149721622467,grad_norm: 0.9533483039653038, iteration: 223966
loss: 0.9787427186965942,grad_norm: 0.8680584440035287, iteration: 223967
loss: 1.0384209156036377,grad_norm: 0.9999996759704591, iteration: 223968
loss: 1.0207774639129639,grad_norm: 0.7899902980197403, iteration: 223969
loss: 0.9608628749847412,grad_norm: 0.9054531324245934, iteration: 223970
loss: 1.0117141008377075,grad_norm: 0.9278813747037191, iteration: 223971
loss: 1.010584831237793,grad_norm: 0.9999990740811191, iteration: 223972
loss: 1.007177472114563,grad_norm: 0.7830006709218575, iteration: 223973
loss: 1.01594078540802,grad_norm: 0.8232858462569262, iteration: 223974
loss: 0.9882069826126099,grad_norm: 0.8699265525364682, iteration: 223975
loss: 1.0024248361587524,grad_norm: 0.8336243477563042, iteration: 223976
loss: 0.986369252204895,grad_norm: 0.9678167075151181, iteration: 223977
loss: 0.9842226505279541,grad_norm: 0.9999992339752102, iteration: 223978
loss: 1.0293560028076172,grad_norm: 0.9999991124550812, iteration: 223979
loss: 1.0092849731445312,grad_norm: 0.9677889028598793, iteration: 223980
loss: 1.0178401470184326,grad_norm: 0.9999990933946423, iteration: 223981
loss: 1.0480434894561768,grad_norm: 0.8882863864038978, iteration: 223982
loss: 0.9542883038520813,grad_norm: 0.9999991677195305, iteration: 223983
loss: 0.9874075055122375,grad_norm: 0.9865571938306993, iteration: 223984
loss: 1.023844599723816,grad_norm: 0.7771693969392157, iteration: 223985
loss: 1.0036661624908447,grad_norm: 0.9999993399867143, iteration: 223986
loss: 1.0093963146209717,grad_norm: 0.98710607848486, iteration: 223987
loss: 1.011040210723877,grad_norm: 0.9023777209478817, iteration: 223988
loss: 1.0165989398956299,grad_norm: 0.9777694639618434, iteration: 223989
loss: 0.9476392865180969,grad_norm: 0.9999990858630172, iteration: 223990
loss: 0.9830337762832642,grad_norm: 0.8993517280231328, iteration: 223991
loss: 1.0221906900405884,grad_norm: 0.783829023836666, iteration: 223992
loss: 1.0200875997543335,grad_norm: 0.9028533713082432, iteration: 223993
loss: 0.9832205176353455,grad_norm: 0.9355335019025636, iteration: 223994
loss: 1.0002083778381348,grad_norm: 0.9999998751007158, iteration: 223995
loss: 1.0240962505340576,grad_norm: 0.9125660189301072, iteration: 223996
loss: 0.9887591004371643,grad_norm: 0.9999992658647662, iteration: 223997
loss: 1.0022802352905273,grad_norm: 0.8528684969443293, iteration: 223998
loss: 0.9897019267082214,grad_norm: 0.9999990184827668, iteration: 223999
loss: 0.9986622333526611,grad_norm: 0.8563572665671557, iteration: 224000
loss: 1.0211355686187744,grad_norm: 0.8986696246280338, iteration: 224001
loss: 1.0252593755722046,grad_norm: 0.90110669157378, iteration: 224002
loss: 0.9701592326164246,grad_norm: 0.9999990505501926, iteration: 224003
loss: 0.9861353635787964,grad_norm: 0.8251646653661806, iteration: 224004
loss: 1.0263407230377197,grad_norm: 0.9117558672978873, iteration: 224005
loss: 0.9778807163238525,grad_norm: 0.9807127812580373, iteration: 224006
loss: 1.0035278797149658,grad_norm: 0.9398953135856215, iteration: 224007
loss: 1.0011647939682007,grad_norm: 0.9103192840297979, iteration: 224008
loss: 0.9941349029541016,grad_norm: 0.8871653677476399, iteration: 224009
loss: 0.9867660403251648,grad_norm: 0.9080646761936869, iteration: 224010
loss: 1.0017025470733643,grad_norm: 0.8315725583892715, iteration: 224011
loss: 0.9671492576599121,grad_norm: 0.909823857035059, iteration: 224012
loss: 1.0090786218643188,grad_norm: 0.871705819842915, iteration: 224013
loss: 1.0089486837387085,grad_norm: 0.8465150155153827, iteration: 224014
loss: 1.0265768766403198,grad_norm: 0.764039898290926, iteration: 224015
loss: 1.0197175741195679,grad_norm: 0.9069671095040027, iteration: 224016
loss: 1.004092812538147,grad_norm: 0.9421945610042265, iteration: 224017
loss: 1.0032356977462769,grad_norm: 0.999999498049783, iteration: 224018
loss: 1.013035535812378,grad_norm: 0.9999991147549944, iteration: 224019
loss: 1.0283522605895996,grad_norm: 0.9206920557434156, iteration: 224020
loss: 0.979306697845459,grad_norm: 0.9661507077465992, iteration: 224021
loss: 0.979703426361084,grad_norm: 0.8987069995974443, iteration: 224022
loss: 1.0157053470611572,grad_norm: 0.9847925560553348, iteration: 224023
loss: 1.0316303968429565,grad_norm: 0.9614918181297446, iteration: 224024
loss: 0.9888145327568054,grad_norm: 0.885908710487134, iteration: 224025
loss: 0.9912452697753906,grad_norm: 0.9999997805928562, iteration: 224026
loss: 0.9634110927581787,grad_norm: 0.9165230045732329, iteration: 224027
loss: 0.9783468246459961,grad_norm: 0.9999989337736246, iteration: 224028
loss: 0.993367612361908,grad_norm: 0.9206240000250042, iteration: 224029
loss: 0.9837657809257507,grad_norm: 0.9481825437107897, iteration: 224030
loss: 0.9763543009757996,grad_norm: 0.9052439367300977, iteration: 224031
loss: 1.0259525775909424,grad_norm: 0.9322286044313935, iteration: 224032
loss: 1.029854655265808,grad_norm: 0.858579759838795, iteration: 224033
loss: 0.9864264130592346,grad_norm: 0.99999920281526, iteration: 224034
loss: 1.0051621198654175,grad_norm: 0.999998983441491, iteration: 224035
loss: 1.0071396827697754,grad_norm: 0.9134620991336186, iteration: 224036
loss: 1.0016933679580688,grad_norm: 0.9999990641548543, iteration: 224037
loss: 1.0104509592056274,grad_norm: 0.9014858541327605, iteration: 224038
loss: 1.0271140336990356,grad_norm: 0.9020208444603499, iteration: 224039
loss: 1.0029362440109253,grad_norm: 0.9815505140454408, iteration: 224040
loss: 1.004138469696045,grad_norm: 0.9999991659718973, iteration: 224041
loss: 1.0178183317184448,grad_norm: 0.8504781165981845, iteration: 224042
loss: 0.9939296841621399,grad_norm: 0.9787048587796133, iteration: 224043
loss: 1.0306758880615234,grad_norm: 0.8674255441992895, iteration: 224044
loss: 0.9976975321769714,grad_norm: 0.9999990554145529, iteration: 224045
loss: 0.9847550392150879,grad_norm: 0.989500713448152, iteration: 224046
loss: 1.0206451416015625,grad_norm: 0.7424841324239153, iteration: 224047
loss: 1.0368837118148804,grad_norm: 0.9999991115650544, iteration: 224048
loss: 1.0194655656814575,grad_norm: 0.9289598546421827, iteration: 224049
loss: 0.9677652716636658,grad_norm: 0.8387192079365657, iteration: 224050
loss: 0.9897143244743347,grad_norm: 0.9999988989476719, iteration: 224051
loss: 1.0081897974014282,grad_norm: 0.9174408586120891, iteration: 224052
loss: 0.9884046912193298,grad_norm: 0.9999995494498929, iteration: 224053
loss: 0.9826256036758423,grad_norm: 0.9527460424223034, iteration: 224054
loss: 1.0257600545883179,grad_norm: 0.9501401386644478, iteration: 224055
loss: 1.0166386365890503,grad_norm: 0.9452131566907312, iteration: 224056
loss: 1.0044686794281006,grad_norm: 0.9999992862342906, iteration: 224057
loss: 1.0153213739395142,grad_norm: 0.9467816691980528, iteration: 224058
loss: 0.9926398396492004,grad_norm: 0.9999992158646058, iteration: 224059
loss: 1.0104155540466309,grad_norm: 0.7785399694420786, iteration: 224060
loss: 0.9678364992141724,grad_norm: 0.8481552262829724, iteration: 224061
loss: 1.0047141313552856,grad_norm: 0.9999989224882522, iteration: 224062
loss: 0.9889424443244934,grad_norm: 0.9999991240133607, iteration: 224063
loss: 0.9724948406219482,grad_norm: 0.95763332640854, iteration: 224064
loss: 0.9748945832252502,grad_norm: 0.7663745535989925, iteration: 224065
loss: 1.0512669086456299,grad_norm: 0.9999990878344323, iteration: 224066
loss: 1.0167999267578125,grad_norm: 0.8696532530743243, iteration: 224067
loss: 1.0024598836898804,grad_norm: 0.9953757870056612, iteration: 224068
loss: 1.0038331747055054,grad_norm: 0.9999992690993239, iteration: 224069
loss: 0.9999886751174927,grad_norm: 0.7822138926405582, iteration: 224070
loss: 0.9935899972915649,grad_norm: 0.9999992143054852, iteration: 224071
loss: 0.9946548342704773,grad_norm: 0.9003164954745205, iteration: 224072
loss: 0.9936663508415222,grad_norm: 0.999999030683074, iteration: 224073
loss: 0.9910197854042053,grad_norm: 0.9999989397092137, iteration: 224074
loss: 0.9661828279495239,grad_norm: 0.9999991211296174, iteration: 224075
loss: 1.0346806049346924,grad_norm: 0.968366141289468, iteration: 224076
loss: 1.011754035949707,grad_norm: 0.7829008398825391, iteration: 224077
loss: 1.0172377824783325,grad_norm: 0.9999991792429688, iteration: 224078
loss: 1.007781744003296,grad_norm: 0.976639934307662, iteration: 224079
loss: 1.0089788436889648,grad_norm: 0.9229816517170991, iteration: 224080
loss: 0.9815303683280945,grad_norm: 0.8589827332818756, iteration: 224081
loss: 1.0059553384780884,grad_norm: 0.7832958676098601, iteration: 224082
loss: 0.9992719292640686,grad_norm: 0.9382500756626369, iteration: 224083
loss: 1.001710057258606,grad_norm: 0.9999990698993905, iteration: 224084
loss: 0.9657710790634155,grad_norm: 0.9352982403830523, iteration: 224085
loss: 0.9813786745071411,grad_norm: 0.9999991074372533, iteration: 224086
loss: 1.0217026472091675,grad_norm: 0.9335617387040592, iteration: 224087
loss: 1.020028829574585,grad_norm: 0.999999204324038, iteration: 224088
loss: 1.0164787769317627,grad_norm: 0.9325869760892131, iteration: 224089
loss: 1.0024349689483643,grad_norm: 0.999999353128245, iteration: 224090
loss: 0.9932264089584351,grad_norm: 0.8622138516127383, iteration: 224091
loss: 0.9508705735206604,grad_norm: 0.9999991218507117, iteration: 224092
loss: 1.0097707509994507,grad_norm: 0.8249167020748138, iteration: 224093
loss: 1.020669937133789,grad_norm: 0.9164262003807747, iteration: 224094
loss: 1.0001914501190186,grad_norm: 0.9364348669437519, iteration: 224095
loss: 1.002457618713379,grad_norm: 0.9325254750026983, iteration: 224096
loss: 1.0098801851272583,grad_norm: 0.9999991703662959, iteration: 224097
loss: 1.0175299644470215,grad_norm: 0.9532104118048836, iteration: 224098
loss: 0.9992213845252991,grad_norm: 0.8536556930997241, iteration: 224099
loss: 1.037630558013916,grad_norm: 0.837915876732698, iteration: 224100
loss: 0.9838282465934753,grad_norm: 0.9430299726751603, iteration: 224101
loss: 0.9845041632652283,grad_norm: 0.9440823564772922, iteration: 224102
loss: 1.0237668752670288,grad_norm: 0.9999989867357476, iteration: 224103
loss: 1.021191120147705,grad_norm: 0.9999992812617642, iteration: 224104
loss: 1.0419321060180664,grad_norm: 0.9155836045812931, iteration: 224105
loss: 0.9774162173271179,grad_norm: 0.8500775078836281, iteration: 224106
loss: 1.0165342092514038,grad_norm: 0.9999994864322662, iteration: 224107
loss: 1.0013794898986816,grad_norm: 0.9999994064532807, iteration: 224108
loss: 1.017568588256836,grad_norm: 0.9999989789189984, iteration: 224109
loss: 1.0174481868743896,grad_norm: 0.8900602266881011, iteration: 224110
loss: 0.9771152138710022,grad_norm: 0.9830097628626919, iteration: 224111
loss: 1.0077165365219116,grad_norm: 0.9467771913540345, iteration: 224112
loss: 0.987852156162262,grad_norm: 0.9070712944569579, iteration: 224113
loss: 0.9990883469581604,grad_norm: 0.8000989692876028, iteration: 224114
loss: 0.9828824400901794,grad_norm: 0.993721373295242, iteration: 224115
loss: 0.9892961382865906,grad_norm: 0.781958404796487, iteration: 224116
loss: 1.0094305276870728,grad_norm: 0.9999991300053895, iteration: 224117
loss: 0.9834058284759521,grad_norm: 0.7870791687100273, iteration: 224118
loss: 1.0268243551254272,grad_norm: 0.9223980890505541, iteration: 224119
loss: 1.003361463546753,grad_norm: 0.7278039722581959, iteration: 224120
loss: 0.9925397038459778,grad_norm: 0.9999990905907381, iteration: 224121
loss: 0.9944506287574768,grad_norm: 0.9999991200386497, iteration: 224122
loss: 1.024138331413269,grad_norm: 0.9999990746128707, iteration: 224123
loss: 0.9849236607551575,grad_norm: 0.9399669610242201, iteration: 224124
loss: 0.9692036509513855,grad_norm: 0.95427831040901, iteration: 224125
loss: 0.9935188889503479,grad_norm: 0.9973735510768921, iteration: 224126
loss: 1.032118320465088,grad_norm: 0.9999991730986821, iteration: 224127
loss: 1.0063636302947998,grad_norm: 0.9999990801244888, iteration: 224128
loss: 1.009130597114563,grad_norm: 0.9291537271090351, iteration: 224129
loss: 0.982499361038208,grad_norm: 0.9999990770407814, iteration: 224130
loss: 0.9937856793403625,grad_norm: 0.9172490891924981, iteration: 224131
loss: 0.9843452572822571,grad_norm: 0.8322437028513139, iteration: 224132
loss: 1.0364775657653809,grad_norm: 0.8043170020589975, iteration: 224133
loss: 1.0037040710449219,grad_norm: 0.9760471606592316, iteration: 224134
loss: 0.9706430435180664,grad_norm: 0.999999154798004, iteration: 224135
loss: 0.9931350350379944,grad_norm: 0.9999989694522683, iteration: 224136
loss: 1.023874044418335,grad_norm: 0.797181880244446, iteration: 224137
loss: 0.977959394454956,grad_norm: 0.9999990286283759, iteration: 224138
loss: 1.022803544998169,grad_norm: 0.7602866058255889, iteration: 224139
loss: 1.0094839334487915,grad_norm: 0.9665002153238424, iteration: 224140
loss: 1.0063585042953491,grad_norm: 0.9988271887423372, iteration: 224141
loss: 0.9844672679901123,grad_norm: 0.8000233508347525, iteration: 224142
loss: 0.9826071262359619,grad_norm: 0.9999997976745311, iteration: 224143
loss: 1.0196616649627686,grad_norm: 0.9749176359530093, iteration: 224144
loss: 0.9532856345176697,grad_norm: 0.867308489784096, iteration: 224145
loss: 0.9765030145645142,grad_norm: 0.9953276620908129, iteration: 224146
loss: 1.0067296028137207,grad_norm: 0.8524134096016783, iteration: 224147
loss: 0.9890785217285156,grad_norm: 0.9999991297098393, iteration: 224148
loss: 0.963208794593811,grad_norm: 0.999999064724676, iteration: 224149
loss: 0.9908592700958252,grad_norm: 0.9888622776890906, iteration: 224150
loss: 0.9893451929092407,grad_norm: 0.9339963435755073, iteration: 224151
loss: 0.980658769607544,grad_norm: 0.9999989781816121, iteration: 224152
loss: 0.9769635200500488,grad_norm: 0.8753264423492418, iteration: 224153
loss: 0.9723970890045166,grad_norm: 0.7948221836733325, iteration: 224154
loss: 1.0342826843261719,grad_norm: 0.9999996181185873, iteration: 224155
loss: 1.0137863159179688,grad_norm: 0.9999991415011176, iteration: 224156
loss: 0.9665359258651733,grad_norm: 0.945435217244554, iteration: 224157
loss: 1.0105314254760742,grad_norm: 0.901697250176753, iteration: 224158
loss: 0.996372401714325,grad_norm: 0.9024329351882487, iteration: 224159
loss: 0.9789488315582275,grad_norm: 0.8155205227630448, iteration: 224160
loss: 0.9839611053466797,grad_norm: 0.864215019868774, iteration: 224161
loss: 0.9915276765823364,grad_norm: 0.8902363512583087, iteration: 224162
loss: 1.0151159763336182,grad_norm: 0.9999990573295162, iteration: 224163
loss: 1.0077602863311768,grad_norm: 0.9999992964561969, iteration: 224164
loss: 1.017806887626648,grad_norm: 0.9999991771109944, iteration: 224165
loss: 1.0219368934631348,grad_norm: 0.9152712119233424, iteration: 224166
loss: 1.0051802396774292,grad_norm: 0.9999992843636492, iteration: 224167
loss: 1.0004116296768188,grad_norm: 0.841775546317287, iteration: 224168
loss: 1.0032693147659302,grad_norm: 0.9945088711164943, iteration: 224169
loss: 0.9897641539573669,grad_norm: 0.9999990733705217, iteration: 224170
loss: 0.9747201800346375,grad_norm: 0.8705860597386448, iteration: 224171
loss: 1.0124269723892212,grad_norm: 0.999472936639268, iteration: 224172
loss: 1.0295242071151733,grad_norm: 0.8658884829414709, iteration: 224173
loss: 1.0031460523605347,grad_norm: 0.840319572523711, iteration: 224174
loss: 1.0158041715621948,grad_norm: 0.9999993303502483, iteration: 224175
loss: 1.0103729963302612,grad_norm: 0.9999991513710729, iteration: 224176
loss: 1.0004535913467407,grad_norm: 0.7524763551425884, iteration: 224177
loss: 1.0170737504959106,grad_norm: 0.9184278475683536, iteration: 224178
loss: 0.9790884852409363,grad_norm: 0.9209573683712181, iteration: 224179
loss: 0.9646679759025574,grad_norm: 0.9999989511336584, iteration: 224180
loss: 1.0289136171340942,grad_norm: 0.9999990905624867, iteration: 224181
loss: 1.0196977853775024,grad_norm: 0.9999992066964437, iteration: 224182
loss: 1.01929771900177,grad_norm: 0.8998564964653832, iteration: 224183
loss: 0.9875091910362244,grad_norm: 0.872287578605192, iteration: 224184
loss: 1.001573920249939,grad_norm: 0.6807726110188813, iteration: 224185
loss: 0.9962502717971802,grad_norm: 0.9373403973574945, iteration: 224186
loss: 1.003288745880127,grad_norm: 0.9999992084611609, iteration: 224187
loss: 0.9609781503677368,grad_norm: 0.9357816932036367, iteration: 224188
loss: 1.028623342514038,grad_norm: 0.8977754765918792, iteration: 224189
loss: 0.9920036196708679,grad_norm: 0.9327596317032523, iteration: 224190
loss: 1.0383509397506714,grad_norm: 0.9283325884487329, iteration: 224191
loss: 0.9814675450325012,grad_norm: 0.9999991803944301, iteration: 224192
loss: 0.9793329834938049,grad_norm: 0.9999990333986667, iteration: 224193
loss: 1.0317991971969604,grad_norm: 0.9853550411205743, iteration: 224194
loss: 1.0030925273895264,grad_norm: 0.8828769428155602, iteration: 224195
loss: 1.0249216556549072,grad_norm: 0.8731367869138563, iteration: 224196
loss: 1.0235564708709717,grad_norm: 0.927122697146468, iteration: 224197
loss: 1.0403274297714233,grad_norm: 0.8138395890387286, iteration: 224198
loss: 0.9785284996032715,grad_norm: 0.9999990644192096, iteration: 224199
loss: 1.014925479888916,grad_norm: 0.999998983253501, iteration: 224200
loss: 1.0168635845184326,grad_norm: 0.8826286450716486, iteration: 224201
loss: 1.0041407346725464,grad_norm: 0.9577372261360455, iteration: 224202
loss: 1.029573678970337,grad_norm: 0.9999993118444005, iteration: 224203
loss: 0.9972694516181946,grad_norm: 0.8775684303295423, iteration: 224204
loss: 0.9978640675544739,grad_norm: 0.8065810294898262, iteration: 224205
loss: 1.0058040618896484,grad_norm: 0.9999990710898307, iteration: 224206
loss: 0.9780226945877075,grad_norm: 0.8883242111331826, iteration: 224207
loss: 1.0315805673599243,grad_norm: 0.8054496093191007, iteration: 224208
loss: 1.009485125541687,grad_norm: 0.9702043399535647, iteration: 224209
loss: 0.9770281910896301,grad_norm: 0.991425082546846, iteration: 224210
loss: 1.0057687759399414,grad_norm: 0.9895588437238174, iteration: 224211
loss: 0.9816719889640808,grad_norm: 0.9999990224778046, iteration: 224212
loss: 1.0139659643173218,grad_norm: 0.9999991487773271, iteration: 224213
loss: 1.043766975402832,grad_norm: 0.9999992012375882, iteration: 224214
loss: 1.0246502161026,grad_norm: 0.8958825613144894, iteration: 224215
loss: 0.9995811581611633,grad_norm: 0.9999992038090109, iteration: 224216
loss: 1.0125269889831543,grad_norm: 0.9196185224777409, iteration: 224217
loss: 0.9977774024009705,grad_norm: 0.7952452694757643, iteration: 224218
loss: 1.007968783378601,grad_norm: 0.9965297210685813, iteration: 224219
loss: 1.0055062770843506,grad_norm: 0.9625883380522718, iteration: 224220
loss: 0.9812358021736145,grad_norm: 0.8820148536982005, iteration: 224221
loss: 0.9998323321342468,grad_norm: 0.9999997351975217, iteration: 224222
loss: 1.0045238733291626,grad_norm: 0.9948931295365894, iteration: 224223
loss: 0.9999715089797974,grad_norm: 0.9999990852094486, iteration: 224224
loss: 0.9981945157051086,grad_norm: 0.8171511605281759, iteration: 224225
loss: 1.0161042213439941,grad_norm: 0.9063271838537592, iteration: 224226
loss: 1.0078740119934082,grad_norm: 0.9144054056643737, iteration: 224227
loss: 0.9797512292861938,grad_norm: 0.9999990889834226, iteration: 224228
loss: 1.016652226448059,grad_norm: 0.9999991666958065, iteration: 224229
loss: 0.9934855699539185,grad_norm: 0.9576908044088326, iteration: 224230
loss: 0.9909098744392395,grad_norm: 0.9103794829356188, iteration: 224231
loss: 0.9910488724708557,grad_norm: 0.8223451404955341, iteration: 224232
loss: 1.0094681978225708,grad_norm: 0.9468960002407913, iteration: 224233
loss: 1.0142403841018677,grad_norm: 0.8993318463059669, iteration: 224234
loss: 1.0336499214172363,grad_norm: 0.9245291978188565, iteration: 224235
loss: 0.9820728898048401,grad_norm: 0.9999996273067481, iteration: 224236
loss: 0.9621866941452026,grad_norm: 0.8911168266786867, iteration: 224237
loss: 1.0381443500518799,grad_norm: 0.9999990307743569, iteration: 224238
loss: 1.0109254121780396,grad_norm: 0.9999992149592725, iteration: 224239
loss: 1.0855830907821655,grad_norm: 0.999999386319854, iteration: 224240
loss: 1.0181572437286377,grad_norm: 0.8153283900755066, iteration: 224241
loss: 1.0143564939498901,grad_norm: 0.9999993566325139, iteration: 224242
loss: 0.9994857907295227,grad_norm: 0.8177693990614407, iteration: 224243
loss: 0.9756101965904236,grad_norm: 0.8611300785192869, iteration: 224244
loss: 0.9974582195281982,grad_norm: 0.9999991711460144, iteration: 224245
loss: 0.9629412293434143,grad_norm: 0.9480041669299356, iteration: 224246
loss: 1.0194295644760132,grad_norm: 0.8136682876430794, iteration: 224247
loss: 1.0212124586105347,grad_norm: 0.9999992417032779, iteration: 224248
loss: 1.0197136402130127,grad_norm: 0.9948776545119107, iteration: 224249
loss: 0.9880432486534119,grad_norm: 0.9689538564937221, iteration: 224250
loss: 1.0119630098342896,grad_norm: 0.9999991301024691, iteration: 224251
loss: 0.9951723217964172,grad_norm: 0.8557737627530603, iteration: 224252
loss: 0.9659133553504944,grad_norm: 0.9869115482783277, iteration: 224253
loss: 1.0226576328277588,grad_norm: 0.9557670253629401, iteration: 224254
loss: 1.038253664970398,grad_norm: 0.9999991259448392, iteration: 224255
loss: 0.971889078617096,grad_norm: 0.9999992065764799, iteration: 224256
loss: 0.9694575667381287,grad_norm: 0.8731668254796645, iteration: 224257
loss: 0.9986677169799805,grad_norm: 0.8001595694044414, iteration: 224258
loss: 1.0177421569824219,grad_norm: 0.9049104805515055, iteration: 224259
loss: 1.0100938081741333,grad_norm: 0.8745617646416892, iteration: 224260
loss: 1.0013701915740967,grad_norm: 0.9999992075670648, iteration: 224261
loss: 0.9556309580802917,grad_norm: 0.9514274954504938, iteration: 224262
loss: 0.9544764161109924,grad_norm: 0.9999990291790488, iteration: 224263
loss: 0.9896571636199951,grad_norm: 0.8996277716178936, iteration: 224264
loss: 1.0178824663162231,grad_norm: 0.9999992032856022, iteration: 224265
loss: 0.981355607509613,grad_norm: 0.9999990865232004, iteration: 224266
loss: 1.0188807249069214,grad_norm: 0.9999991054852954, iteration: 224267
loss: 1.019423246383667,grad_norm: 0.9999990553189563, iteration: 224268
loss: 1.0041053295135498,grad_norm: 0.9999991685184277, iteration: 224269
loss: 1.0174256563186646,grad_norm: 0.8484897419515064, iteration: 224270
loss: 1.0082781314849854,grad_norm: 0.9950849641589418, iteration: 224271
loss: 0.965827465057373,grad_norm: 0.9576116801017768, iteration: 224272
loss: 0.9987400770187378,grad_norm: 0.8624522371266843, iteration: 224273
loss: 1.023985743522644,grad_norm: 0.9209825902141969, iteration: 224274
loss: 1.0320115089416504,grad_norm: 0.8007024740802333, iteration: 224275
loss: 0.9514214992523193,grad_norm: 0.8812078795030042, iteration: 224276
loss: 0.9750720262527466,grad_norm: 0.8664633914016996, iteration: 224277
loss: 1.0162675380706787,grad_norm: 0.8970429532249836, iteration: 224278
loss: 0.9653124213218689,grad_norm: 0.9016434832939453, iteration: 224279
loss: 0.9930164813995361,grad_norm: 0.9618538585890742, iteration: 224280
loss: 1.0046350955963135,grad_norm: 0.9853577201025601, iteration: 224281
loss: 0.9963451027870178,grad_norm: 0.9421218949901772, iteration: 224282
loss: 1.0429266691207886,grad_norm: 0.9999991194508342, iteration: 224283
loss: 1.022331953048706,grad_norm: 0.9929611415572993, iteration: 224284
loss: 1.052598237991333,grad_norm: 0.9999995794229826, iteration: 224285
loss: 0.9753162264823914,grad_norm: 0.9999991063889877, iteration: 224286
loss: 0.9872461557388306,grad_norm: 0.9083627282013405, iteration: 224287
loss: 1.0472854375839233,grad_norm: 0.8275611754943902, iteration: 224288
loss: 1.0047528743743896,grad_norm: 0.9999992692556108, iteration: 224289
loss: 0.9827036261558533,grad_norm: 0.9999990044869804, iteration: 224290
loss: 0.9989581108093262,grad_norm: 0.9999992083892423, iteration: 224291
loss: 0.9922075271606445,grad_norm: 0.9803682845446596, iteration: 224292
loss: 1.0334469079971313,grad_norm: 0.999999065334226, iteration: 224293
loss: 1.0153855085372925,grad_norm: 0.999999028979058, iteration: 224294
loss: 1.0225886106491089,grad_norm: 0.9999991154452119, iteration: 224295
loss: 1.0127760171890259,grad_norm: 0.9999990634924667, iteration: 224296
loss: 1.048099160194397,grad_norm: 0.9028277460603902, iteration: 224297
loss: 0.9794430732727051,grad_norm: 0.8521960833645468, iteration: 224298
loss: 0.9574708938598633,grad_norm: 0.8551946342519042, iteration: 224299
loss: 0.9593490958213806,grad_norm: 0.999999081278473, iteration: 224300
loss: 1.0041251182556152,grad_norm: 0.9999990216930347, iteration: 224301
loss: 0.9840249419212341,grad_norm: 0.90993234432281, iteration: 224302
loss: 0.9928846955299377,grad_norm: 0.8904446657748919, iteration: 224303
loss: 1.015310287475586,grad_norm: 0.9623661458670995, iteration: 224304
loss: 1.0020201206207275,grad_norm: 0.8065054934747506, iteration: 224305
loss: 1.0131633281707764,grad_norm: 0.8783627366129756, iteration: 224306
loss: 1.0274735689163208,grad_norm: 0.9999999274294715, iteration: 224307
loss: 0.9853901863098145,grad_norm: 0.7892843240390327, iteration: 224308
loss: 0.9742285013198853,grad_norm: 0.9269145709848802, iteration: 224309
loss: 1.019271731376648,grad_norm: 0.900773445039601, iteration: 224310
loss: 0.9871762990951538,grad_norm: 0.8856685027446282, iteration: 224311
loss: 1.031926155090332,grad_norm: 0.9999990768832984, iteration: 224312
loss: 1.0013892650604248,grad_norm: 0.99999923561767, iteration: 224313
loss: 0.9690544009208679,grad_norm: 0.9434521148914727, iteration: 224314
loss: 1.0524815320968628,grad_norm: 0.9999994218567088, iteration: 224315
loss: 0.9933840036392212,grad_norm: 0.9999990984916011, iteration: 224316
loss: 1.0140271186828613,grad_norm: 0.9365996692143477, iteration: 224317
loss: 0.9803374409675598,grad_norm: 0.9891465358477622, iteration: 224318
loss: 0.9825904965400696,grad_norm: 0.9940610054100512, iteration: 224319
loss: 0.9752040505409241,grad_norm: 0.9522506133980049, iteration: 224320
loss: 1.0179678201675415,grad_norm: 0.9733755073707484, iteration: 224321
loss: 1.0368313789367676,grad_norm: 0.9999990086642269, iteration: 224322
loss: 0.989067792892456,grad_norm: 0.9081066720677958, iteration: 224323
loss: 1.0593022108078003,grad_norm: 0.9999992279608967, iteration: 224324
loss: 0.9840244054794312,grad_norm: 0.9202898135219602, iteration: 224325
loss: 1.0450313091278076,grad_norm: 0.9999991093796128, iteration: 224326
loss: 1.0174592733383179,grad_norm: 0.8864658800015338, iteration: 224327
loss: 0.979909360408783,grad_norm: 0.9389622621902803, iteration: 224328
loss: 0.9853094816207886,grad_norm: 0.9999992621848702, iteration: 224329
loss: 1.0056896209716797,grad_norm: 0.9308394484093394, iteration: 224330
loss: 1.0102686882019043,grad_norm: 0.7788348573642527, iteration: 224331
loss: 1.0051671266555786,grad_norm: 0.9999990883799675, iteration: 224332
loss: 1.020445704460144,grad_norm: 0.9629647344232849, iteration: 224333
loss: 1.0390173196792603,grad_norm: 0.9549492594250245, iteration: 224334
loss: 1.033105492591858,grad_norm: 0.800547754491405, iteration: 224335
loss: 0.986528754234314,grad_norm: 0.8969772837637691, iteration: 224336
loss: 0.9878095388412476,grad_norm: 0.9999992435698358, iteration: 224337
loss: 0.9843396544456482,grad_norm: 0.9999990547393617, iteration: 224338
loss: 1.013932466506958,grad_norm: 0.9120645664470781, iteration: 224339
loss: 1.0442672967910767,grad_norm: 0.8212794048485818, iteration: 224340
loss: 0.9974588751792908,grad_norm: 0.94352825833718, iteration: 224341
loss: 0.9638951420783997,grad_norm: 0.7490988701475543, iteration: 224342
loss: 1.0427509546279907,grad_norm: 0.9999991017211164, iteration: 224343
loss: 1.0285760164260864,grad_norm: 0.9999994609795041, iteration: 224344
loss: 0.9693918824195862,grad_norm: 0.9096861276643344, iteration: 224345
loss: 0.959045946598053,grad_norm: 0.9730567298151273, iteration: 224346
loss: 0.9989186525344849,grad_norm: 0.99999904838294, iteration: 224347
loss: 0.9860008955001831,grad_norm: 0.7950488421747142, iteration: 224348
loss: 0.9812292456626892,grad_norm: 0.8844491930959849, iteration: 224349
loss: 0.997169554233551,grad_norm: 0.8617453428797169, iteration: 224350
loss: 0.9578219652175903,grad_norm: 0.7745253635178242, iteration: 224351
loss: 1.0149396657943726,grad_norm: 0.9999992761248092, iteration: 224352
loss: 0.9934188723564148,grad_norm: 0.9865607277540098, iteration: 224353
loss: 1.0971055030822754,grad_norm: 0.9999992786972574, iteration: 224354
loss: 1.0180572271347046,grad_norm: 0.999999079694218, iteration: 224355
loss: 1.0591596364974976,grad_norm: 0.9999993058284555, iteration: 224356
loss: 1.026315689086914,grad_norm: 0.7528498936482296, iteration: 224357
loss: 1.0060597658157349,grad_norm: 0.9477447716475801, iteration: 224358
loss: 0.9918926954269409,grad_norm: 0.8565639716673389, iteration: 224359
loss: 1.0197508335113525,grad_norm: 0.9999991309997953, iteration: 224360
loss: 0.9964752197265625,grad_norm: 0.9952738283780187, iteration: 224361
loss: 0.9861822128295898,grad_norm: 0.8797239345051227, iteration: 224362
loss: 1.055983543395996,grad_norm: 0.9999990416913032, iteration: 224363
loss: 0.9858670830726624,grad_norm: 0.8836071584804865, iteration: 224364
loss: 1.0393977165222168,grad_norm: 0.9999991324322919, iteration: 224365
loss: 1.0152201652526855,grad_norm: 0.8480283508506464, iteration: 224366
loss: 1.0188754796981812,grad_norm: 0.8262725724663144, iteration: 224367
loss: 0.9862858057022095,grad_norm: 0.815733121066367, iteration: 224368
loss: 1.004433512687683,grad_norm: 0.9402443962500027, iteration: 224369
loss: 0.9816343188285828,grad_norm: 0.9999992939807305, iteration: 224370
loss: 0.9948126077651978,grad_norm: 0.9849590958920446, iteration: 224371
loss: 0.9908842444419861,grad_norm: 0.8146186172852157, iteration: 224372
loss: 1.0068095922470093,grad_norm: 0.9009427984104135, iteration: 224373
loss: 1.005961298942566,grad_norm: 0.8853921648317575, iteration: 224374
loss: 0.9753701090812683,grad_norm: 0.9999991781116856, iteration: 224375
loss: 0.9697316288948059,grad_norm: 0.8161790839711973, iteration: 224376
loss: 0.9938979148864746,grad_norm: 0.9018349975724036, iteration: 224377
loss: 1.038436770439148,grad_norm: 0.9999995836712541, iteration: 224378
loss: 1.0084903240203857,grad_norm: 0.9999990996756085, iteration: 224379
loss: 1.0087212324142456,grad_norm: 0.9999989444667355, iteration: 224380
loss: 0.9855026602745056,grad_norm: 0.8241371929286508, iteration: 224381
loss: 0.9836628437042236,grad_norm: 0.9999989484729862, iteration: 224382
loss: 1.0191411972045898,grad_norm: 0.8481548049219731, iteration: 224383
loss: 1.0000500679016113,grad_norm: 0.9277549591328238, iteration: 224384
loss: 1.0541216135025024,grad_norm: 0.8629556194728321, iteration: 224385
loss: 1.005584716796875,grad_norm: 0.9638533755560963, iteration: 224386
loss: 1.036864995956421,grad_norm: 0.966399501121981, iteration: 224387
loss: 1.0072156190872192,grad_norm: 0.9106156355332408, iteration: 224388
loss: 1.0133458375930786,grad_norm: 0.9999991191431573, iteration: 224389
loss: 0.9988417625427246,grad_norm: 0.9019731757920001, iteration: 224390
loss: 0.9940314292907715,grad_norm: 0.8519643990187025, iteration: 224391
loss: 0.9843050241470337,grad_norm: 0.9999992149293392, iteration: 224392
loss: 1.0041248798370361,grad_norm: 0.992724409697515, iteration: 224393
loss: 0.9940099120140076,grad_norm: 0.9999990915721804, iteration: 224394
loss: 1.0279895067214966,grad_norm: 0.9999991664195856, iteration: 224395
loss: 0.9837467670440674,grad_norm: 0.7997038160103106, iteration: 224396
loss: 0.9752431511878967,grad_norm: 0.9596066472267312, iteration: 224397
loss: 1.0025871992111206,grad_norm: 0.85742525820378, iteration: 224398
loss: 1.0119526386260986,grad_norm: 0.8819088874941238, iteration: 224399
loss: 1.0691300630569458,grad_norm: 0.9999999400798656, iteration: 224400
loss: 1.0498385429382324,grad_norm: 0.9999995878560539, iteration: 224401
loss: 1.0171531438827515,grad_norm: 0.7714529008561583, iteration: 224402
loss: 1.0455549955368042,grad_norm: 0.8813723564742361, iteration: 224403
loss: 1.000772476196289,grad_norm: 0.9999992288464888, iteration: 224404
loss: 1.0100674629211426,grad_norm: 0.9999991240198086, iteration: 224405
loss: 1.0098183155059814,grad_norm: 0.9999991097928714, iteration: 224406
loss: 1.0052703619003296,grad_norm: 0.9999990581080255, iteration: 224407
loss: 0.9879723191261292,grad_norm: 0.877130089503256, iteration: 224408
loss: 0.985099732875824,grad_norm: 0.8098612638703145, iteration: 224409
loss: 1.004302740097046,grad_norm: 0.8734644471889903, iteration: 224410
loss: 1.0148628950119019,grad_norm: 0.920305525913204, iteration: 224411
loss: 0.9787440299987793,grad_norm: 0.966220223653736, iteration: 224412
loss: 1.0004585981369019,grad_norm: 0.999999055937202, iteration: 224413
loss: 1.1152334213256836,grad_norm: 0.9234934745082949, iteration: 224414
loss: 0.9977130889892578,grad_norm: 0.999999347629667, iteration: 224415
loss: 1.0344661474227905,grad_norm: 0.873861608055352, iteration: 224416
loss: 0.9660393595695496,grad_norm: 0.9999992877831257, iteration: 224417
loss: 1.0090916156768799,grad_norm: 0.8538209505598824, iteration: 224418
loss: 0.9930388927459717,grad_norm: 0.9999989571820181, iteration: 224419
loss: 0.9792777895927429,grad_norm: 0.9528752740905445, iteration: 224420
loss: 1.0069929361343384,grad_norm: 0.9450197781798461, iteration: 224421
loss: 0.983495831489563,grad_norm: 0.7998550089590042, iteration: 224422
loss: 0.9774227142333984,grad_norm: 0.861384754543456, iteration: 224423
loss: 0.9760069847106934,grad_norm: 0.9278035336130769, iteration: 224424
loss: 0.9816830158233643,grad_norm: 0.8726144372108284, iteration: 224425
loss: 1.015026330947876,grad_norm: 0.9999989946541709, iteration: 224426
loss: 1.0353004932403564,grad_norm: 0.9531057074095953, iteration: 224427
loss: 1.0040546655654907,grad_norm: 0.9231607563372244, iteration: 224428
loss: 0.981207013130188,grad_norm: 0.9371729676308898, iteration: 224429
loss: 1.0018718242645264,grad_norm: 0.99999908512777, iteration: 224430
loss: 0.9905805587768555,grad_norm: 0.9688532826526044, iteration: 224431
loss: 0.9711310863494873,grad_norm: 0.9683397282254785, iteration: 224432
loss: 1.0272839069366455,grad_norm: 0.9999995888406091, iteration: 224433
loss: 0.9764994978904724,grad_norm: 0.9999988731969781, iteration: 224434
loss: 0.9881369471549988,grad_norm: 0.9137605999512257, iteration: 224435
loss: 0.9974159002304077,grad_norm: 0.9999995444314533, iteration: 224436
loss: 0.9826198816299438,grad_norm: 0.9198872796445495, iteration: 224437
loss: 1.056775689125061,grad_norm: 0.987197306552563, iteration: 224438
loss: 1.005976915359497,grad_norm: 0.999999111594343, iteration: 224439
loss: 1.0307880640029907,grad_norm: 0.8284151616717591, iteration: 224440
loss: 1.049423336982727,grad_norm: 0.9999990736044785, iteration: 224441
loss: 0.984408974647522,grad_norm: 0.8850243261644906, iteration: 224442
loss: 1.0333329439163208,grad_norm: 0.9999995022378625, iteration: 224443
loss: 1.0442999601364136,grad_norm: 0.8321238197152617, iteration: 224444
loss: 1.0037883520126343,grad_norm: 0.9999991283865719, iteration: 224445
loss: 1.013526439666748,grad_norm: 0.9999989882190004, iteration: 224446
loss: 1.0138120651245117,grad_norm: 0.999999037877522, iteration: 224447
loss: 1.01418936252594,grad_norm: 0.8677587939424173, iteration: 224448
loss: 1.0252710580825806,grad_norm: 0.8239461733144382, iteration: 224449
loss: 0.9993180632591248,grad_norm: 0.9702554776791174, iteration: 224450
loss: 0.9806455969810486,grad_norm: 0.9247482499938996, iteration: 224451
loss: 1.034205675125122,grad_norm: 0.9999990484073653, iteration: 224452
loss: 0.976371705532074,grad_norm: 0.9137702914806782, iteration: 224453
loss: 1.0225253105163574,grad_norm: 0.9999991035829016, iteration: 224454
loss: 0.9831933975219727,grad_norm: 0.918538864750623, iteration: 224455
loss: 0.9914038777351379,grad_norm: 0.9830991900094456, iteration: 224456
loss: 0.9846946001052856,grad_norm: 0.9999992258232574, iteration: 224457
loss: 1.0133482217788696,grad_norm: 0.8676855262352513, iteration: 224458
loss: 0.9906412363052368,grad_norm: 0.9999997912129799, iteration: 224459
loss: 0.9799740314483643,grad_norm: 0.9523747176948562, iteration: 224460
loss: 0.9683327674865723,grad_norm: 0.8199971006305078, iteration: 224461
loss: 1.0162615776062012,grad_norm: 0.9999991461498229, iteration: 224462
loss: 1.035096287727356,grad_norm: 0.9788008885578573, iteration: 224463
loss: 0.9876676201820374,grad_norm: 0.9462295144867096, iteration: 224464
loss: 0.9963790774345398,grad_norm: 0.9999991682500599, iteration: 224465
loss: 1.0240812301635742,grad_norm: 0.9610188126812026, iteration: 224466
loss: 0.9934342503547668,grad_norm: 0.9983258339419248, iteration: 224467
loss: 1.0204553604125977,grad_norm: 0.8069225902536847, iteration: 224468
loss: 1.0370073318481445,grad_norm: 0.999998942644229, iteration: 224469
loss: 1.014680027961731,grad_norm: 0.9999990556478979, iteration: 224470
loss: 0.9990783929824829,grad_norm: 0.8969632921043168, iteration: 224471
loss: 0.9905245900154114,grad_norm: 0.999999005028986, iteration: 224472
loss: 1.0264191627502441,grad_norm: 0.9794793635375036, iteration: 224473
loss: 1.016330599784851,grad_norm: 0.9999992175471031, iteration: 224474
loss: 0.9881924390792847,grad_norm: 0.9999990204665503, iteration: 224475
loss: 0.9663122892379761,grad_norm: 0.8375137066457414, iteration: 224476
loss: 0.9849595427513123,grad_norm: 0.9999992565799628, iteration: 224477
loss: 1.0107285976409912,grad_norm: 0.8848205408424487, iteration: 224478
loss: 1.0030794143676758,grad_norm: 0.9999991121339147, iteration: 224479
loss: 0.9672425985336304,grad_norm: 0.9999991344625031, iteration: 224480
loss: 0.9712934494018555,grad_norm: 0.9430171209084776, iteration: 224481
loss: 0.9692906737327576,grad_norm: 0.9999992317861981, iteration: 224482
loss: 0.9954482316970825,grad_norm: 0.8653651112108298, iteration: 224483
loss: 0.9977041482925415,grad_norm: 0.8276380541715391, iteration: 224484
loss: 0.9894511103630066,grad_norm: 0.9114904128723942, iteration: 224485
loss: 0.9989571571350098,grad_norm: 0.8512116764967573, iteration: 224486
loss: 1.0212115049362183,grad_norm: 0.9608962642780846, iteration: 224487
loss: 0.970029354095459,grad_norm: 0.9871004344766979, iteration: 224488
loss: 1.0188779830932617,grad_norm: 0.8738189001294315, iteration: 224489
loss: 0.999253511428833,grad_norm: 0.7481071174354137, iteration: 224490
loss: 0.986656665802002,grad_norm: 0.8135723652940738, iteration: 224491
loss: 0.990731418132782,grad_norm: 0.8698941761814066, iteration: 224492
loss: 0.9773833751678467,grad_norm: 0.8333637432849673, iteration: 224493
loss: 0.9779991507530212,grad_norm: 0.9556619207324317, iteration: 224494
loss: 0.98453289270401,grad_norm: 0.9229401275067334, iteration: 224495
loss: 1.0298380851745605,grad_norm: 0.8147255305988038, iteration: 224496
loss: 0.9817556738853455,grad_norm: 0.9791041606035467, iteration: 224497
loss: 0.9998385310173035,grad_norm: 0.9799362261800282, iteration: 224498
loss: 1.0159889459609985,grad_norm: 0.9999992355364897, iteration: 224499
loss: 1.0119597911834717,grad_norm: 0.999999262142041, iteration: 224500
loss: 1.0113866329193115,grad_norm: 0.9999992107679121, iteration: 224501
loss: 0.9854902029037476,grad_norm: 0.9999991895985024, iteration: 224502
loss: 0.9773080348968506,grad_norm: 0.8365057453069479, iteration: 224503
loss: 1.0256541967391968,grad_norm: 0.9686220232324282, iteration: 224504
loss: 1.0208998918533325,grad_norm: 0.8114971386009587, iteration: 224505
loss: 0.9857221841812134,grad_norm: 0.7118638194825687, iteration: 224506
loss: 0.9866436719894409,grad_norm: 0.9076295002568967, iteration: 224507
loss: 1.0335789918899536,grad_norm: 0.8227527777998705, iteration: 224508
loss: 0.9800869822502136,grad_norm: 0.8166605267102903, iteration: 224509
loss: 1.0119588375091553,grad_norm: 0.8686978765793036, iteration: 224510
loss: 0.9761525988578796,grad_norm: 0.9179791510541864, iteration: 224511
loss: 1.0110387802124023,grad_norm: 0.9363019124035981, iteration: 224512
loss: 1.0404733419418335,grad_norm: 0.9765730997554398, iteration: 224513
loss: 0.9647200107574463,grad_norm: 0.9999990122949792, iteration: 224514
loss: 0.9928277730941772,grad_norm: 0.9031068200943682, iteration: 224515
loss: 1.0076885223388672,grad_norm: 0.9999990522109056, iteration: 224516
loss: 0.984718918800354,grad_norm: 0.9999990522807122, iteration: 224517
loss: 0.9987510442733765,grad_norm: 0.8277257500216639, iteration: 224518
loss: 0.9914575815200806,grad_norm: 0.9999991265199683, iteration: 224519
loss: 0.995963990688324,grad_norm: 0.9516878221199205, iteration: 224520
loss: 1.0420546531677246,grad_norm: 0.9617954663986421, iteration: 224521
loss: 0.9716707468032837,grad_norm: 0.931302162115323, iteration: 224522
loss: 1.0429526567459106,grad_norm: 0.9999992274633756, iteration: 224523
loss: 0.9853239059448242,grad_norm: 0.9639047449172118, iteration: 224524
loss: 1.0370711088180542,grad_norm: 0.8762337224277518, iteration: 224525
loss: 1.0023391246795654,grad_norm: 0.9999990772163575, iteration: 224526
loss: 1.0162850618362427,grad_norm: 0.9999991702236127, iteration: 224527
loss: 0.998332679271698,grad_norm: 0.9136957971577673, iteration: 224528
loss: 1.0178943872451782,grad_norm: 0.85615420751343, iteration: 224529
loss: 0.9714862108230591,grad_norm: 0.9003350499773265, iteration: 224530
loss: 0.9809833765029907,grad_norm: 0.9035069768328015, iteration: 224531
loss: 1.0710411071777344,grad_norm: 0.9218204030423335, iteration: 224532
loss: 1.0120868682861328,grad_norm: 0.846676496755604, iteration: 224533
loss: 1.0260926485061646,grad_norm: 0.7318215737107084, iteration: 224534
loss: 0.9848847985267639,grad_norm: 0.8267877298928027, iteration: 224535
loss: 1.0102565288543701,grad_norm: 0.9999992217944944, iteration: 224536
loss: 1.0229073762893677,grad_norm: 0.893745508142122, iteration: 224537
loss: 1.0177773237228394,grad_norm: 0.9309180371670184, iteration: 224538
loss: 1.008398413658142,grad_norm: 0.8593059696982134, iteration: 224539
loss: 1.023105263710022,grad_norm: 0.8506190155245604, iteration: 224540
loss: 0.9880391955375671,grad_norm: 0.9521624208142664, iteration: 224541
loss: 0.9848361015319824,grad_norm: 0.892077708734059, iteration: 224542
loss: 0.9720185995101929,grad_norm: 0.9004824178865428, iteration: 224543
loss: 1.0304733514785767,grad_norm: 0.8783984530022713, iteration: 224544
loss: 0.996269166469574,grad_norm: 0.8954358383299154, iteration: 224545
loss: 1.0157532691955566,grad_norm: 0.8099461260928128, iteration: 224546
loss: 0.9997212290763855,grad_norm: 0.9942847360183236, iteration: 224547
loss: 1.0267131328582764,grad_norm: 0.9057581793230793, iteration: 224548
loss: 0.9817632436752319,grad_norm: 0.9176692532205486, iteration: 224549
loss: 1.0085963010787964,grad_norm: 0.9999989319612986, iteration: 224550
loss: 1.0028115510940552,grad_norm: 0.9999991791809435, iteration: 224551
loss: 1.018153429031372,grad_norm: 0.9999988461462666, iteration: 224552
loss: 1.0734810829162598,grad_norm: 0.9999990789749135, iteration: 224553
loss: 0.9874680638313293,grad_norm: 0.7752877711964306, iteration: 224554
loss: 1.0145835876464844,grad_norm: 0.8482511343425886, iteration: 224555
loss: 0.9724873900413513,grad_norm: 0.9163795031978345, iteration: 224556
loss: 0.993681013584137,grad_norm: 0.9179179348183334, iteration: 224557
loss: 1.0661495923995972,grad_norm: 0.9999999286015558, iteration: 224558
loss: 1.0040383338928223,grad_norm: 0.7895727639299638, iteration: 224559
loss: 1.004528522491455,grad_norm: 0.9751178786697389, iteration: 224560
loss: 1.0308958292007446,grad_norm: 0.9999989784817214, iteration: 224561
loss: 1.0346691608428955,grad_norm: 0.9999991428630017, iteration: 224562
loss: 1.0124379396438599,grad_norm: 0.8510094319584051, iteration: 224563
loss: 0.9998946189880371,grad_norm: 0.9106857189116255, iteration: 224564
loss: 1.0082812309265137,grad_norm: 0.9999991842673571, iteration: 224565
loss: 1.0013643503189087,grad_norm: 0.9790846668621471, iteration: 224566
loss: 0.984048068523407,grad_norm: 0.9776198033072576, iteration: 224567
loss: 0.9445526599884033,grad_norm: 0.8438607481855416, iteration: 224568
loss: 0.9920831918716431,grad_norm: 0.8718472252357166, iteration: 224569
loss: 0.9588611721992493,grad_norm: 0.9999991748654904, iteration: 224570
loss: 1.0147805213928223,grad_norm: 0.7428029598686748, iteration: 224571
loss: 1.0496456623077393,grad_norm: 0.9085972271116031, iteration: 224572
loss: 0.9820092916488647,grad_norm: 0.8654272984478072, iteration: 224573
loss: 0.9920129776000977,grad_norm: 0.7259127549146485, iteration: 224574
loss: 1.0008971691131592,grad_norm: 0.8883153555714017, iteration: 224575
loss: 0.9476807117462158,grad_norm: 0.7578671659153925, iteration: 224576
loss: 1.0440633296966553,grad_norm: 0.9935717812725569, iteration: 224577
loss: 1.035748839378357,grad_norm: 0.9999998162313424, iteration: 224578
loss: 0.9844624996185303,grad_norm: 0.8814170509337687, iteration: 224579
loss: 1.0337947607040405,grad_norm: 0.9999990135397806, iteration: 224580
loss: 1.1137291193008423,grad_norm: 0.9999990823162995, iteration: 224581
loss: 1.0241594314575195,grad_norm: 0.9260217756883055, iteration: 224582
loss: 0.9722586870193481,grad_norm: 0.907388974077847, iteration: 224583
loss: 0.9857034087181091,grad_norm: 0.973387916113093, iteration: 224584
loss: 1.0064866542816162,grad_norm: 0.8170067951444343, iteration: 224585
loss: 0.991743266582489,grad_norm: 0.9999990382927016, iteration: 224586
loss: 1.0149061679840088,grad_norm: 0.9606144006545956, iteration: 224587
loss: 1.0298527479171753,grad_norm: 0.88653065658658, iteration: 224588
loss: 1.011813998222351,grad_norm: 0.9624011221791097, iteration: 224589
loss: 0.9980220794677734,grad_norm: 0.999999124614312, iteration: 224590
loss: 1.0617142915725708,grad_norm: 0.9999991683132482, iteration: 224591
loss: 1.018227458000183,grad_norm: 0.9710557331861852, iteration: 224592
loss: 1.0434396266937256,grad_norm: 0.8549782376249067, iteration: 224593
loss: 0.9981333613395691,grad_norm: 0.9170696460457064, iteration: 224594
loss: 1.0206608772277832,grad_norm: 0.9999991710212653, iteration: 224595
loss: 1.0128273963928223,grad_norm: 0.9614044385221422, iteration: 224596
loss: 1.095280647277832,grad_norm: 0.9047805126765776, iteration: 224597
loss: 1.011878490447998,grad_norm: 0.8939492642083583, iteration: 224598
loss: 0.9831306338310242,grad_norm: 0.8805954835816867, iteration: 224599
loss: 1.072415828704834,grad_norm: 0.9999997858401597, iteration: 224600
loss: 1.3203500509262085,grad_norm: 0.9999993489918154, iteration: 224601
loss: 0.9943981170654297,grad_norm: 0.9443975055591963, iteration: 224602
loss: 1.0255825519561768,grad_norm: 0.9300585334668388, iteration: 224603
loss: 1.0080530643463135,grad_norm: 0.9538787687747863, iteration: 224604
loss: 0.9945358633995056,grad_norm: 0.8991084484542827, iteration: 224605
loss: 0.9977008700370789,grad_norm: 0.9716856815106896, iteration: 224606
loss: 1.0820577144622803,grad_norm: 0.9999998031649618, iteration: 224607
loss: 0.9872618913650513,grad_norm: 0.9999991394516635, iteration: 224608
loss: 0.9970065355300903,grad_norm: 0.8801819284785292, iteration: 224609
loss: 0.9862293004989624,grad_norm: 0.9528476141190684, iteration: 224610
loss: 1.0151097774505615,grad_norm: 0.8984564895144079, iteration: 224611
loss: 1.0167099237442017,grad_norm: 0.999999109763108, iteration: 224612
loss: 1.0211926698684692,grad_norm: 0.935656671114702, iteration: 224613
loss: 1.0156348943710327,grad_norm: 0.9850641417271795, iteration: 224614
loss: 0.9978963136672974,grad_norm: 0.9832250337863181, iteration: 224615
loss: 1.0031719207763672,grad_norm: 0.9579061025617337, iteration: 224616
loss: 0.9583725929260254,grad_norm: 0.9999990061185143, iteration: 224617
loss: 1.0226868391036987,grad_norm: 0.9999990866890931, iteration: 224618
loss: 0.997118353843689,grad_norm: 0.9728454943100087, iteration: 224619
loss: 0.9996523857116699,grad_norm: 0.9999991177632405, iteration: 224620
loss: 1.0272772312164307,grad_norm: 0.9999991250637028, iteration: 224621
loss: 0.988182783126831,grad_norm: 0.9999991239365137, iteration: 224622
loss: 1.0143349170684814,grad_norm: 0.9730644937735895, iteration: 224623
loss: 1.0293058156967163,grad_norm: 0.99999914576019, iteration: 224624
loss: 0.997782289981842,grad_norm: 0.9931593197639239, iteration: 224625
loss: 1.002949595451355,grad_norm: 0.9884188606222742, iteration: 224626
loss: 0.9968233108520508,grad_norm: 0.9999992105285456, iteration: 224627
loss: 0.9585720300674438,grad_norm: 0.9999990741744051, iteration: 224628
loss: 0.998298168182373,grad_norm: 0.9307995985272767, iteration: 224629
loss: 0.9983668923377991,grad_norm: 0.865030687223178, iteration: 224630
loss: 1.0505821704864502,grad_norm: 0.999999662754019, iteration: 224631
loss: 0.9868797063827515,grad_norm: 0.8912103060165418, iteration: 224632
loss: 0.9838255047798157,grad_norm: 0.9999990078504243, iteration: 224633
loss: 1.0585062503814697,grad_norm: 0.9999998011027824, iteration: 224634
loss: 0.9896488785743713,grad_norm: 0.9344656966289506, iteration: 224635
loss: 1.0549453496932983,grad_norm: 0.8440916187627097, iteration: 224636
loss: 1.0266722440719604,grad_norm: 0.9179230105518398, iteration: 224637
loss: 1.0217117071151733,grad_norm: 0.9429453390242631, iteration: 224638
loss: 0.9902282953262329,grad_norm: 0.7980107417434166, iteration: 224639
loss: 0.9875284433364868,grad_norm: 0.8713138976073269, iteration: 224640
loss: 1.050275206565857,grad_norm: 0.9999997783351132, iteration: 224641
loss: 0.9705290794372559,grad_norm: 0.9999989979724947, iteration: 224642
loss: 0.999904453754425,grad_norm: 0.9999998053375396, iteration: 224643
loss: 1.0545345544815063,grad_norm: 0.7966870367508078, iteration: 224644
loss: 0.9800155758857727,grad_norm: 0.9901648776004335, iteration: 224645
loss: 0.9877781271934509,grad_norm: 0.9832311890458324, iteration: 224646
loss: 1.0532931089401245,grad_norm: 0.9275714379808635, iteration: 224647
loss: 0.9960582256317139,grad_norm: 0.9631916346866789, iteration: 224648
loss: 1.0012394189834595,grad_norm: 0.8895695257899753, iteration: 224649
loss: 0.9947180151939392,grad_norm: 0.8741590743662137, iteration: 224650
loss: 0.9862599968910217,grad_norm: 0.970463262927126, iteration: 224651
loss: 0.9819324016571045,grad_norm: 0.9400475454965664, iteration: 224652
loss: 1.0064477920532227,grad_norm: 0.9999990722182481, iteration: 224653
loss: 1.0024206638336182,grad_norm: 0.9672958251767338, iteration: 224654
loss: 0.9998887777328491,grad_norm: 0.890139331866229, iteration: 224655
loss: 1.0910143852233887,grad_norm: 0.9186042577410376, iteration: 224656
loss: 0.9764935970306396,grad_norm: 0.9825747125402083, iteration: 224657
loss: 0.9690094590187073,grad_norm: 0.9277182097881909, iteration: 224658
loss: 0.991950273513794,grad_norm: 0.9159806507004568, iteration: 224659
loss: 0.9907340407371521,grad_norm: 0.7410130957431037, iteration: 224660
loss: 1.0641748905181885,grad_norm: 0.9999999411533897, iteration: 224661
loss: 1.0306710004806519,grad_norm: 0.9089565914280259, iteration: 224662
loss: 1.067809820175171,grad_norm: 0.9999991694702959, iteration: 224663
loss: 1.0207804441452026,grad_norm: 0.8750574248557969, iteration: 224664
loss: 0.9810716509819031,grad_norm: 0.9999991677036536, iteration: 224665
loss: 1.10857093334198,grad_norm: 0.9999999024320773, iteration: 224666
loss: 1.082960844039917,grad_norm: 0.9999992272190001, iteration: 224667
loss: 1.025170922279358,grad_norm: 0.999999669884373, iteration: 224668
loss: 0.9793367981910706,grad_norm: 0.989640221848679, iteration: 224669
loss: 0.9893289804458618,grad_norm: 0.9370340250316547, iteration: 224670
loss: 0.9970508813858032,grad_norm: 0.9999991731212857, iteration: 224671
loss: 1.0340484380722046,grad_norm: 0.8281144811984107, iteration: 224672
loss: 0.9571887254714966,grad_norm: 0.9473366458409944, iteration: 224673
loss: 1.119573712348938,grad_norm: 0.999999333481748, iteration: 224674
loss: 1.0214300155639648,grad_norm: 0.9335482727803442, iteration: 224675
loss: 1.0748549699783325,grad_norm: 0.9999992861397173, iteration: 224676
loss: 1.0132768154144287,grad_norm: 0.9658656625251072, iteration: 224677
loss: 0.9805710911750793,grad_norm: 0.9115500124000503, iteration: 224678
loss: 1.0259552001953125,grad_norm: 0.9933561796615027, iteration: 224679
loss: 1.0542352199554443,grad_norm: 0.8897612654344176, iteration: 224680
loss: 1.0415059328079224,grad_norm: 0.9999991148880653, iteration: 224681
loss: 1.0047743320465088,grad_norm: 0.9999991061174696, iteration: 224682
loss: 0.99925696849823,grad_norm: 0.7930868065848478, iteration: 224683
loss: 0.9930589199066162,grad_norm: 0.8086226119156499, iteration: 224684
loss: 0.9935033917427063,grad_norm: 0.8635834990307992, iteration: 224685
loss: 0.96500563621521,grad_norm: 0.931141083158106, iteration: 224686
loss: 1.0204976797103882,grad_norm: 0.9999993101727854, iteration: 224687
loss: 1.1096692085266113,grad_norm: 0.9999996697737845, iteration: 224688
loss: 0.978734016418457,grad_norm: 0.9999992646860294, iteration: 224689
loss: 0.9915274977684021,grad_norm: 0.9999990609057452, iteration: 224690
loss: 0.9759213328361511,grad_norm: 0.948695623574493, iteration: 224691
loss: 1.010373830795288,grad_norm: 0.8643098292163047, iteration: 224692
loss: 1.014763593673706,grad_norm: 0.9999992930671625, iteration: 224693
loss: 1.0036693811416626,grad_norm: 0.886470236500682, iteration: 224694
loss: 1.022352695465088,grad_norm: 0.9756629381760007, iteration: 224695
loss: 1.0940016508102417,grad_norm: 0.9999995567745853, iteration: 224696
loss: 0.9643814563751221,grad_norm: 0.8427444860048736, iteration: 224697
loss: 1.0169380903244019,grad_norm: 0.8936629026527856, iteration: 224698
loss: 0.9920725226402283,grad_norm: 0.9999992133067676, iteration: 224699
loss: 0.9889254570007324,grad_norm: 0.93718480155463, iteration: 224700
loss: 1.0196634531021118,grad_norm: 0.9022453569308176, iteration: 224701
loss: 0.9916306138038635,grad_norm: 0.8905613139783035, iteration: 224702
loss: 0.9813019037246704,grad_norm: 0.8700430151964939, iteration: 224703
loss: 1.0013911724090576,grad_norm: 0.8825235352316205, iteration: 224704
loss: 0.9480792284011841,grad_norm: 0.8224227140770077, iteration: 224705
loss: 0.977754533290863,grad_norm: 0.8936014856582978, iteration: 224706
loss: 1.0360082387924194,grad_norm: 0.9999992530928276, iteration: 224707
loss: 0.9881488084793091,grad_norm: 0.7809756758257452, iteration: 224708
loss: 0.9418362379074097,grad_norm: 0.7994916835850523, iteration: 224709
loss: 0.9994402527809143,grad_norm: 0.999999313612704, iteration: 224710
loss: 1.0052927732467651,grad_norm: 0.9999991433159555, iteration: 224711
loss: 1.009110927581787,grad_norm: 0.9999990538432948, iteration: 224712
loss: 1.0015102624893188,grad_norm: 0.9016945092327592, iteration: 224713
loss: 1.0226744413375854,grad_norm: 0.9999992136454205, iteration: 224714
loss: 0.9857912063598633,grad_norm: 0.9999991854316415, iteration: 224715
loss: 1.0048470497131348,grad_norm: 0.9999991828624816, iteration: 224716
loss: 1.0082552433013916,grad_norm: 0.9914623022075137, iteration: 224717
loss: 1.021099328994751,grad_norm: 0.9999994145369163, iteration: 224718
loss: 0.9838806986808777,grad_norm: 0.992701851429336, iteration: 224719
loss: 0.9663306474685669,grad_norm: 0.8828268929047606, iteration: 224720
loss: 1.0074914693832397,grad_norm: 0.9240390510192245, iteration: 224721
loss: 0.9665855765342712,grad_norm: 0.9999990424747071, iteration: 224722
loss: 0.9793145060539246,grad_norm: 0.9999990836752684, iteration: 224723
loss: 0.9918028116226196,grad_norm: 0.9999990175122326, iteration: 224724
loss: 1.0345758199691772,grad_norm: 0.9999991133232141, iteration: 224725
loss: 1.0479605197906494,grad_norm: 0.9999993722304084, iteration: 224726
loss: 0.9898929595947266,grad_norm: 0.7738754972876443, iteration: 224727
loss: 0.9702128171920776,grad_norm: 0.8366174051805748, iteration: 224728
loss: 1.0271612405776978,grad_norm: 0.924826926357417, iteration: 224729
loss: 0.9883916974067688,grad_norm: 0.8873194814581059, iteration: 224730
loss: 0.9801530838012695,grad_norm: 0.9999990375178522, iteration: 224731
loss: 0.994829535484314,grad_norm: 0.9789037918565207, iteration: 224732
loss: 0.9941943287849426,grad_norm: 0.9999991363703432, iteration: 224733
loss: 1.0257956981658936,grad_norm: 0.9880243032995615, iteration: 224734
loss: 1.002644658088684,grad_norm: 0.9999992200371988, iteration: 224735
loss: 1.02658212184906,grad_norm: 0.9999990121415243, iteration: 224736
loss: 1.0376380681991577,grad_norm: 0.999999258217148, iteration: 224737
loss: 0.9967458844184875,grad_norm: 0.9999991752908047, iteration: 224738
loss: 0.965158224105835,grad_norm: 0.8387993310862121, iteration: 224739
loss: 0.9745052456855774,grad_norm: 0.8896524733345001, iteration: 224740
loss: 1.0090405941009521,grad_norm: 0.812820206004897, iteration: 224741
loss: 1.0141485929489136,grad_norm: 0.8978652836122569, iteration: 224742
loss: 1.002821683883667,grad_norm: 0.9999992299589898, iteration: 224743
loss: 1.0393822193145752,grad_norm: 0.9999991117924546, iteration: 224744
loss: 0.9848769903182983,grad_norm: 0.9675968608484053, iteration: 224745
loss: 1.018875241279602,grad_norm: 0.9753446176732066, iteration: 224746
loss: 1.013063669204712,grad_norm: 0.955361824483729, iteration: 224747
loss: 1.022373914718628,grad_norm: 0.9927467699361343, iteration: 224748
loss: 0.9826655387878418,grad_norm: 0.8823843197045804, iteration: 224749
loss: 1.014705777168274,grad_norm: 0.9999992011306361, iteration: 224750
loss: 0.9569827914237976,grad_norm: 0.9999991466003799, iteration: 224751
loss: 0.9733882546424866,grad_norm: 0.9255082579604299, iteration: 224752
loss: 1.0120929479599,grad_norm: 0.972071135598795, iteration: 224753
loss: 1.0090839862823486,grad_norm: 0.9999991722851372, iteration: 224754
loss: 0.9856188893318176,grad_norm: 0.9137729027630574, iteration: 224755
loss: 0.9906141757965088,grad_norm: 0.9634811624285569, iteration: 224756
loss: 1.1205788850784302,grad_norm: 0.9999999473498218, iteration: 224757
loss: 1.0083653926849365,grad_norm: 0.7682831127123825, iteration: 224758
loss: 0.9705359935760498,grad_norm: 0.8782030337691501, iteration: 224759
loss: 0.9778859615325928,grad_norm: 0.9344456352906731, iteration: 224760
loss: 0.9578807353973389,grad_norm: 0.9953384421724899, iteration: 224761
loss: 1.0563715696334839,grad_norm: 0.9999994212259546, iteration: 224762
loss: 0.9775673747062683,grad_norm: 0.904640482411926, iteration: 224763
loss: 1.0096800327301025,grad_norm: 0.9043147457920065, iteration: 224764
loss: 0.9845059514045715,grad_norm: 0.9999990341198016, iteration: 224765
loss: 1.0019538402557373,grad_norm: 0.9999991334472277, iteration: 224766
loss: 0.9765967130661011,grad_norm: 0.8785374502256725, iteration: 224767
loss: 0.9909579753875732,grad_norm: 0.9460628123123891, iteration: 224768
loss: 1.0096346139907837,grad_norm: 0.8163411732176861, iteration: 224769
loss: 1.0840529203414917,grad_norm: 0.9999999413736519, iteration: 224770
loss: 1.0081840753555298,grad_norm: 0.9999992695257518, iteration: 224771
loss: 1.0307661294937134,grad_norm: 0.9984980274324281, iteration: 224772
loss: 0.9880720376968384,grad_norm: 0.9999993726284793, iteration: 224773
loss: 0.9909929633140564,grad_norm: 0.8660660010060218, iteration: 224774
loss: 0.9853140711784363,grad_norm: 0.8428284043161712, iteration: 224775
loss: 1.0099984407424927,grad_norm: 0.9555379634442311, iteration: 224776
loss: 1.0465935468673706,grad_norm: 0.9999996239600916, iteration: 224777
loss: 1.0080897808074951,grad_norm: 0.9999992672120656, iteration: 224778
loss: 0.9993751049041748,grad_norm: 0.7385565977090575, iteration: 224779
loss: 0.9415357708930969,grad_norm: 0.9999991452478374, iteration: 224780
loss: 1.0889281034469604,grad_norm: 0.9550378662801059, iteration: 224781
loss: 0.9913420081138611,grad_norm: 0.9909317107117612, iteration: 224782
loss: 0.9634070992469788,grad_norm: 0.8418679371932768, iteration: 224783
loss: 1.0156309604644775,grad_norm: 0.7921189506787022, iteration: 224784
loss: 0.9980792999267578,grad_norm: 0.8750302331972213, iteration: 224785
loss: 0.9994419813156128,grad_norm: 0.9999992126730588, iteration: 224786
loss: 1.0218746662139893,grad_norm: 0.9371720697319785, iteration: 224787
loss: 0.9706556797027588,grad_norm: 0.9491976754597645, iteration: 224788
loss: 0.9734171032905579,grad_norm: 0.8621043357534907, iteration: 224789
loss: 0.9909180998802185,grad_norm: 0.9999990663069547, iteration: 224790
loss: 1.0042972564697266,grad_norm: 0.8661741805312586, iteration: 224791
loss: 1.0493038892745972,grad_norm: 0.9063316486423427, iteration: 224792
loss: 0.9997463226318359,grad_norm: 0.9019505714578916, iteration: 224793
loss: 1.0086491107940674,grad_norm: 0.9102738294227629, iteration: 224794
loss: 0.9936938285827637,grad_norm: 0.944719028284508, iteration: 224795
loss: 1.0046201944351196,grad_norm: 0.9451474166110172, iteration: 224796
loss: 1.070769190788269,grad_norm: 0.9999995656487599, iteration: 224797
loss: 0.9474254846572876,grad_norm: 0.9415800447977987, iteration: 224798
loss: 0.9957676529884338,grad_norm: 0.7973041493277192, iteration: 224799
loss: 1.0129565000534058,grad_norm: 0.8529641340139089, iteration: 224800
loss: 0.9558528065681458,grad_norm: 0.8632690556228686, iteration: 224801
loss: 0.9578325152397156,grad_norm: 0.891685030297397, iteration: 224802
loss: 1.020819902420044,grad_norm: 0.9193821129337222, iteration: 224803
loss: 1.024978518486023,grad_norm: 0.9328670996869356, iteration: 224804
loss: 0.9846845269203186,grad_norm: 0.8982776700072956, iteration: 224805
loss: 0.9656737446784973,grad_norm: 0.8394540680273183, iteration: 224806
loss: 1.0164927244186401,grad_norm: 0.8413527479146955, iteration: 224807
loss: 0.9814879894256592,grad_norm: 0.9999989969363846, iteration: 224808
loss: 1.0375787019729614,grad_norm: 0.8559353033891861, iteration: 224809
loss: 0.9965009689331055,grad_norm: 0.9999990831775566, iteration: 224810
loss: 1.0232309103012085,grad_norm: 0.9999991682338885, iteration: 224811
loss: 0.9922174215316772,grad_norm: 0.8669675647385261, iteration: 224812
loss: 1.0048308372497559,grad_norm: 0.8310443292141719, iteration: 224813
loss: 1.0305252075195312,grad_norm: 0.8801175681520821, iteration: 224814
loss: 0.9999212622642517,grad_norm: 0.8098057333748622, iteration: 224815
loss: 1.062803864479065,grad_norm: 0.9023896993041411, iteration: 224816
loss: 1.0027568340301514,grad_norm: 0.9999991030197031, iteration: 224817
loss: 0.9986727833747864,grad_norm: 0.9178593962190095, iteration: 224818
loss: 1.004185676574707,grad_norm: 0.7586104122867037, iteration: 224819
loss: 0.9767290949821472,grad_norm: 0.9227468056526859, iteration: 224820
loss: 0.9830415844917297,grad_norm: 0.9322110856003153, iteration: 224821
loss: 1.0090538263320923,grad_norm: 0.9363659462530995, iteration: 224822
loss: 1.0195266008377075,grad_norm: 0.9916028478843162, iteration: 224823
loss: 0.9838870763778687,grad_norm: 0.9725769316475458, iteration: 224824
loss: 1.0180315971374512,grad_norm: 0.9999993727778006, iteration: 224825
loss: 0.9922329187393188,grad_norm: 0.999998995510489, iteration: 224826
loss: 1.006679654121399,grad_norm: 0.8912666069974914, iteration: 224827
loss: 1.0260863304138184,grad_norm: 0.9999995939184975, iteration: 224828
loss: 1.0245167016983032,grad_norm: 0.8263271212881037, iteration: 224829
loss: 0.974541187286377,grad_norm: 0.9999991819612138, iteration: 224830
loss: 0.9994627237319946,grad_norm: 0.961343521435027, iteration: 224831
loss: 0.971778929233551,grad_norm: 0.8925141641030092, iteration: 224832
loss: 1.0222305059432983,grad_norm: 0.9660331875960809, iteration: 224833
loss: 0.9910194873809814,grad_norm: 0.9999991188474188, iteration: 224834
loss: 1.016226053237915,grad_norm: 0.9999993979401537, iteration: 224835
loss: 0.990766167640686,grad_norm: 0.9322653871006488, iteration: 224836
loss: 0.9779213666915894,grad_norm: 0.7463049972147783, iteration: 224837
loss: 0.9482319355010986,grad_norm: 0.9999989396483593, iteration: 224838
loss: 1.0157508850097656,grad_norm: 0.8298041113742407, iteration: 224839
loss: 1.0172475576400757,grad_norm: 0.8877805894800851, iteration: 224840
loss: 1.023208498954773,grad_norm: 0.999999047524814, iteration: 224841
loss: 0.9747425317764282,grad_norm: 0.8771251447942016, iteration: 224842
loss: 1.0102750062942505,grad_norm: 0.9999990175081858, iteration: 224843
loss: 0.9725399613380432,grad_norm: 0.9999992390548624, iteration: 224844
loss: 0.9968571662902832,grad_norm: 0.7933352462685436, iteration: 224845
loss: 1.0325578451156616,grad_norm: 0.9412491808741684, iteration: 224846
loss: 0.9696786999702454,grad_norm: 0.9999992398323321, iteration: 224847
loss: 1.0063055753707886,grad_norm: 0.9999990885975404, iteration: 224848
loss: 1.0091221332550049,grad_norm: 0.9814706582968677, iteration: 224849
loss: 1.0367685556411743,grad_norm: 0.9283817326852907, iteration: 224850
loss: 1.031602382659912,grad_norm: 0.8199417837608597, iteration: 224851
loss: 1.0104831457138062,grad_norm: 0.9999993120432348, iteration: 224852
loss: 0.9873051643371582,grad_norm: 0.9644205859499182, iteration: 224853
loss: 0.9635762572288513,grad_norm: 0.9253602463122422, iteration: 224854
loss: 1.061081051826477,grad_norm: 0.9999999122632471, iteration: 224855
loss: 0.984790027141571,grad_norm: 0.9999991310802013, iteration: 224856
loss: 1.014492392539978,grad_norm: 0.8824066554261352, iteration: 224857
loss: 1.000382423400879,grad_norm: 0.9575407316363233, iteration: 224858
loss: 1.0064523220062256,grad_norm: 0.8891008288669888, iteration: 224859
loss: 1.0249783992767334,grad_norm: 0.9999989976831134, iteration: 224860
loss: 0.9791837930679321,grad_norm: 0.9098919999174575, iteration: 224861
loss: 0.9842220544815063,grad_norm: 0.9999989840964395, iteration: 224862
loss: 0.9702962636947632,grad_norm: 0.8417514219898301, iteration: 224863
loss: 0.9836722016334534,grad_norm: 0.9189174607342633, iteration: 224864
loss: 1.0544143915176392,grad_norm: 0.9999993290991696, iteration: 224865
loss: 1.026894450187683,grad_norm: 0.9999989914730874, iteration: 224866
loss: 1.026288628578186,grad_norm: 0.9999990781842261, iteration: 224867
loss: 1.0243886709213257,grad_norm: 0.9089731472404097, iteration: 224868
loss: 0.9758932590484619,grad_norm: 0.8547712852094063, iteration: 224869
loss: 1.0194147825241089,grad_norm: 0.7815681441726228, iteration: 224870
loss: 1.017970323562622,grad_norm: 0.9228524113779057, iteration: 224871
loss: 0.9625223875045776,grad_norm: 0.865497115713518, iteration: 224872
loss: 0.9860493540763855,grad_norm: 0.8494510072942812, iteration: 224873
loss: 1.0109338760375977,grad_norm: 0.9353463279638996, iteration: 224874
loss: 0.9754204750061035,grad_norm: 0.9047949774504199, iteration: 224875
loss: 0.9906064867973328,grad_norm: 0.9240435962208859, iteration: 224876
loss: 1.0055570602416992,grad_norm: 0.8976451587589973, iteration: 224877
loss: 1.0127004384994507,grad_norm: 0.9999990250451241, iteration: 224878
loss: 1.011997103691101,grad_norm: 0.8719472331253758, iteration: 224879
loss: 1.0495930910110474,grad_norm: 0.9222542413299158, iteration: 224880
loss: 0.96816086769104,grad_norm: 0.9999989724338678, iteration: 224881
loss: 0.9908860325813293,grad_norm: 0.8482541788175961, iteration: 224882
loss: 0.9875126481056213,grad_norm: 0.8231299487237276, iteration: 224883
loss: 0.9821021556854248,grad_norm: 0.7673938736743462, iteration: 224884
loss: 0.9969609379768372,grad_norm: 0.8582277332582826, iteration: 224885
loss: 0.940828263759613,grad_norm: 0.8973534979097363, iteration: 224886
loss: 1.0205029249191284,grad_norm: 0.8095182017128129, iteration: 224887
loss: 0.9922781586647034,grad_norm: 0.884230495365287, iteration: 224888
loss: 0.9901041984558105,grad_norm: 0.9999992471807215, iteration: 224889
loss: 0.9939714074134827,grad_norm: 0.9848540557200768, iteration: 224890
loss: 0.9827969074249268,grad_norm: 0.9999993027902557, iteration: 224891
loss: 1.0897455215454102,grad_norm: 0.9999993628044506, iteration: 224892
loss: 1.0216001272201538,grad_norm: 0.9999997512194274, iteration: 224893
loss: 1.0065048933029175,grad_norm: 0.999999144341772, iteration: 224894
loss: 0.9921517372131348,grad_norm: 0.7595564076794532, iteration: 224895
loss: 1.029615879058838,grad_norm: 0.8741619361403121, iteration: 224896
loss: 1.0293954610824585,grad_norm: 0.9089357668649743, iteration: 224897
loss: 0.970914363861084,grad_norm: 0.999999229332154, iteration: 224898
loss: 1.01466703414917,grad_norm: 0.8048207730181421, iteration: 224899
loss: 1.0058027505874634,grad_norm: 0.967668169125729, iteration: 224900
loss: 0.9923965930938721,grad_norm: 0.9838409178590036, iteration: 224901
loss: 1.0813478231430054,grad_norm: 0.9999995253975672, iteration: 224902
loss: 1.0296753644943237,grad_norm: 0.9999991731218953, iteration: 224903
loss: 0.9694175720214844,grad_norm: 0.9254817479604809, iteration: 224904
loss: 0.9607598781585693,grad_norm: 0.8698976409261407, iteration: 224905
loss: 1.004475712776184,grad_norm: 0.9844958673674646, iteration: 224906
loss: 0.9784680008888245,grad_norm: 0.8652968611423503, iteration: 224907
loss: 1.0278503894805908,grad_norm: 0.8629251697639035, iteration: 224908
loss: 0.9779948592185974,grad_norm: 0.812198035809882, iteration: 224909
loss: 0.9839145541191101,grad_norm: 0.8007242437461052, iteration: 224910
loss: 1.0087295770645142,grad_norm: 0.8120829474243914, iteration: 224911
loss: 0.9981188178062439,grad_norm: 0.967259999243544, iteration: 224912
loss: 1.0239994525909424,grad_norm: 0.9770661014608454, iteration: 224913
loss: 0.9775978326797485,grad_norm: 0.8674077811470213, iteration: 224914
loss: 0.9624320864677429,grad_norm: 0.8896271136329701, iteration: 224915
loss: 1.0454211235046387,grad_norm: 0.9999990053459952, iteration: 224916
loss: 0.9965476393699646,grad_norm: 0.9999991461251793, iteration: 224917
loss: 0.9660543203353882,grad_norm: 0.9796090968602243, iteration: 224918
loss: 1.0119693279266357,grad_norm: 0.9999991788201346, iteration: 224919
loss: 1.034874677658081,grad_norm: 0.9999991521142663, iteration: 224920
loss: 0.9943520426750183,grad_norm: 0.9999991083320214, iteration: 224921
loss: 1.0156834125518799,grad_norm: 0.9999990333505787, iteration: 224922
loss: 1.0456358194351196,grad_norm: 0.9999990958344087, iteration: 224923
loss: 1.014027714729309,grad_norm: 0.9999990306431047, iteration: 224924
loss: 1.001226544380188,grad_norm: 0.9999989264128595, iteration: 224925
loss: 1.0026459693908691,grad_norm: 0.8777413978636859, iteration: 224926
loss: 0.9881676435470581,grad_norm: 0.999999236866017, iteration: 224927
loss: 1.0155587196350098,grad_norm: 0.9476558516291226, iteration: 224928
loss: 0.9744266867637634,grad_norm: 0.8304136554122981, iteration: 224929
loss: 0.9663323163986206,grad_norm: 0.8256296238321182, iteration: 224930
loss: 0.9866137504577637,grad_norm: 0.7790521197956701, iteration: 224931
loss: 0.9806298613548279,grad_norm: 0.9712919935765061, iteration: 224932
loss: 1.0085293054580688,grad_norm: 0.8695657631668096, iteration: 224933
loss: 1.0534286499023438,grad_norm: 0.9999990921651698, iteration: 224934
loss: 1.0285035371780396,grad_norm: 0.8519860876350469, iteration: 224935
loss: 1.0214271545410156,grad_norm: 0.871107957018727, iteration: 224936
loss: 0.9490504860877991,grad_norm: 0.856454645020049, iteration: 224937
loss: 1.0151172876358032,grad_norm: 0.9999990693895887, iteration: 224938
loss: 1.016748309135437,grad_norm: 0.8533477602146706, iteration: 224939
loss: 0.9923571944236755,grad_norm: 0.999999050874459, iteration: 224940
loss: 0.9835268259048462,grad_norm: 0.7787954506776019, iteration: 224941
loss: 0.9896170496940613,grad_norm: 0.9999990792968914, iteration: 224942
loss: 0.9688987135887146,grad_norm: 0.7970644448502083, iteration: 224943
loss: 0.9930312037467957,grad_norm: 0.895036322240312, iteration: 224944
loss: 1.005635380744934,grad_norm: 0.8244158031513339, iteration: 224945
loss: 0.9892932772636414,grad_norm: 0.8743821348994252, iteration: 224946
loss: 0.957967221736908,grad_norm: 0.9578727698938988, iteration: 224947
loss: 0.9990590214729309,grad_norm: 0.827641591700934, iteration: 224948
loss: 0.9965736865997314,grad_norm: 0.8821789594546553, iteration: 224949
loss: 1.0112979412078857,grad_norm: 0.9530145477524327, iteration: 224950
loss: 1.0109933614730835,grad_norm: 0.9999995347436208, iteration: 224951
loss: 0.9871902465820312,grad_norm: 0.9999990912425222, iteration: 224952
loss: 0.9924092292785645,grad_norm: 0.7982558173944165, iteration: 224953
loss: 0.9896389245986938,grad_norm: 0.9640733627317107, iteration: 224954
loss: 1.0718024969100952,grad_norm: 0.7986606551387843, iteration: 224955
loss: 1.0146671533584595,grad_norm: 0.8920851747029158, iteration: 224956
loss: 1.0120364427566528,grad_norm: 0.8487049050478654, iteration: 224957
loss: 1.0184093713760376,grad_norm: 0.8754321472899763, iteration: 224958
loss: 1.014625072479248,grad_norm: 0.9707031408832827, iteration: 224959
loss: 0.9861356019973755,grad_norm: 0.92398771443561, iteration: 224960
loss: 1.0061330795288086,grad_norm: 0.8749261391838037, iteration: 224961
loss: 1.0313594341278076,grad_norm: 0.9521093756166746, iteration: 224962
loss: 0.9871061444282532,grad_norm: 0.8233991693162739, iteration: 224963
loss: 1.0282214879989624,grad_norm: 0.9999996677506225, iteration: 224964
loss: 1.0301198959350586,grad_norm: 0.8926573784087227, iteration: 224965
loss: 0.9942196011543274,grad_norm: 0.9999993060460984, iteration: 224966
loss: 0.9668449759483337,grad_norm: 0.9999991409022528, iteration: 224967
loss: 0.9944948554039001,grad_norm: 0.9960686268139162, iteration: 224968
loss: 1.004385232925415,grad_norm: 0.9674805129541639, iteration: 224969
loss: 0.9821251034736633,grad_norm: 0.7444364241598519, iteration: 224970
loss: 0.9709967970848083,grad_norm: 0.8959656463059446, iteration: 224971
loss: 1.0247676372528076,grad_norm: 0.78950136964049, iteration: 224972
loss: 1.0352213382720947,grad_norm: 0.8701678771665924, iteration: 224973
loss: 0.9824034571647644,grad_norm: 0.8710027702199098, iteration: 224974
loss: 1.0000609159469604,grad_norm: 0.9882775876291698, iteration: 224975
loss: 1.0184156894683838,grad_norm: 0.8613801859257371, iteration: 224976
loss: 0.9964569807052612,grad_norm: 0.9292864957036222, iteration: 224977
loss: 0.9425786137580872,grad_norm: 0.8900427950124695, iteration: 224978
loss: 1.0219725370407104,grad_norm: 0.9999990523869168, iteration: 224979
loss: 0.9913474917411804,grad_norm: 0.9999991310638451, iteration: 224980
loss: 0.962688684463501,grad_norm: 0.9146605369662317, iteration: 224981
loss: 0.9893226027488708,grad_norm: 0.9347076434092203, iteration: 224982
loss: 0.9932417869567871,grad_norm: 0.9070949309259151, iteration: 224983
loss: 1.0236254930496216,grad_norm: 0.9640216211486806, iteration: 224984
loss: 0.9916574358940125,grad_norm: 0.8094920774359927, iteration: 224985
loss: 1.001987338066101,grad_norm: 0.9204888237069244, iteration: 224986
loss: 0.9926950931549072,grad_norm: 0.9999990753878637, iteration: 224987
loss: 1.0013127326965332,grad_norm: 0.8445443224852602, iteration: 224988
loss: 1.0084949731826782,grad_norm: 0.925342965665974, iteration: 224989
loss: 0.9947547316551208,grad_norm: 0.9870575095952765, iteration: 224990
loss: 1.0273795127868652,grad_norm: 0.9999997686589739, iteration: 224991
loss: 1.003338098526001,grad_norm: 0.9999993324776403, iteration: 224992
loss: 1.0014957189559937,grad_norm: 0.8622404028497522, iteration: 224993
loss: 1.0422627925872803,grad_norm: 0.9999991466064592, iteration: 224994
loss: 1.0509673357009888,grad_norm: 0.9196450983585903, iteration: 224995
loss: 1.0042847394943237,grad_norm: 0.9729325795762991, iteration: 224996
loss: 1.0047262907028198,grad_norm: 0.9999991664668468, iteration: 224997
loss: 0.9959724545478821,grad_norm: 0.8551530553386321, iteration: 224998
loss: 0.9806571006774902,grad_norm: 0.9999991624977687, iteration: 224999
loss: 1.0357660055160522,grad_norm: 0.838587277252375, iteration: 225000
loss: 1.0093564987182617,grad_norm: 0.9707107212643412, iteration: 225001
loss: 0.9785599112510681,grad_norm: 0.9999993145759296, iteration: 225002
loss: 1.03946053981781,grad_norm: 0.9735998245004652, iteration: 225003
loss: 0.9859243035316467,grad_norm: 0.9999989594464872, iteration: 225004
loss: 0.9958561062812805,grad_norm: 0.8334892598251219, iteration: 225005
loss: 0.9763143658638,grad_norm: 0.882387248390237, iteration: 225006
loss: 1.0131593942642212,grad_norm: 0.9999990312194005, iteration: 225007
loss: 1.0075187683105469,grad_norm: 0.896121782646322, iteration: 225008
loss: 0.9982859492301941,grad_norm: 0.8548536038292024, iteration: 225009
loss: 1.000400424003601,grad_norm: 0.9051211803015464, iteration: 225010
loss: 0.9884700179100037,grad_norm: 0.9783943960941021, iteration: 225011
loss: 0.9529780149459839,grad_norm: 0.9040243314965035, iteration: 225012
loss: 1.0036779642105103,grad_norm: 0.9210658475445637, iteration: 225013
loss: 1.0308294296264648,grad_norm: 0.9999991079291185, iteration: 225014
loss: 1.0173890590667725,grad_norm: 0.9999990410975614, iteration: 225015
loss: 1.002249002456665,grad_norm: 0.9679055807238254, iteration: 225016
loss: 0.975580096244812,grad_norm: 0.9887146946295, iteration: 225017
loss: 0.995366632938385,grad_norm: 0.9264369842344677, iteration: 225018
loss: 0.9957442283630371,grad_norm: 0.9744178211716161, iteration: 225019
loss: 0.9785016775131226,grad_norm: 0.9999993569627618, iteration: 225020
loss: 0.981483519077301,grad_norm: 0.9999989736554351, iteration: 225021
loss: 0.9941402673721313,grad_norm: 0.9683159424878922, iteration: 225022
loss: 0.9886497855186462,grad_norm: 0.9999991980855237, iteration: 225023
loss: 0.9449267983436584,grad_norm: 0.9999991169063634, iteration: 225024
loss: 1.005183458328247,grad_norm: 0.9999993508439988, iteration: 225025
loss: 1.02660071849823,grad_norm: 0.9999992167970265, iteration: 225026
loss: 1.008646845817566,grad_norm: 0.9175645089652441, iteration: 225027
loss: 0.9917218089103699,grad_norm: 0.9999991648746466, iteration: 225028
loss: 1.0173195600509644,grad_norm: 0.9989579151570618, iteration: 225029
loss: 1.018847942352295,grad_norm: 0.9928853298707042, iteration: 225030
loss: 1.048225998878479,grad_norm: 0.9088106894799817, iteration: 225031
loss: 0.9851117134094238,grad_norm: 0.9999989740765939, iteration: 225032
loss: 1.010131597518921,grad_norm: 0.9716722427639432, iteration: 225033
loss: 1.0066462755203247,grad_norm: 0.9999991608344978, iteration: 225034
loss: 1.0161558389663696,grad_norm: 0.9219142159833639, iteration: 225035
loss: 1.0149340629577637,grad_norm: 0.9999998809120436, iteration: 225036
loss: 0.9687661528587341,grad_norm: 0.9999991737075559, iteration: 225037
loss: 1.030420184135437,grad_norm: 0.8597018567725849, iteration: 225038
loss: 0.9984080195426941,grad_norm: 0.9999991731353036, iteration: 225039
loss: 0.9978013038635254,grad_norm: 0.9243576485460193, iteration: 225040
loss: 1.0270345211029053,grad_norm: 0.9999990177976525, iteration: 225041
loss: 0.9929596185684204,grad_norm: 0.9999992572412094, iteration: 225042
loss: 1.0153878927230835,grad_norm: 0.898713279842891, iteration: 225043
loss: 0.978725016117096,grad_norm: 0.9032090946109681, iteration: 225044
loss: 0.9974468350410461,grad_norm: 0.9207851232561836, iteration: 225045
loss: 1.0234025716781616,grad_norm: 0.9172066775470405, iteration: 225046
loss: 1.012888789176941,grad_norm: 0.8886026008880814, iteration: 225047
loss: 0.9671710729598999,grad_norm: 0.9900301278535382, iteration: 225048
loss: 1.030637502670288,grad_norm: 0.8663191540445391, iteration: 225049
loss: 1.0215575695037842,grad_norm: 0.9999991056578226, iteration: 225050
loss: 1.0648057460784912,grad_norm: 0.999999153371712, iteration: 225051
loss: 1.0151699781417847,grad_norm: 0.8433980009603783, iteration: 225052
loss: 0.9949569702148438,grad_norm: 0.999999255438984, iteration: 225053
loss: 1.0151360034942627,grad_norm: 0.9388744503613979, iteration: 225054
loss: 1.020257830619812,grad_norm: 0.9999991731666347, iteration: 225055
loss: 0.9913085699081421,grad_norm: 0.9707883070223144, iteration: 225056
loss: 0.9368970394134521,grad_norm: 0.9999991667765757, iteration: 225057
loss: 0.9863120913505554,grad_norm: 0.8294577262702506, iteration: 225058
loss: 1.0135303735733032,grad_norm: 0.9999993004512008, iteration: 225059
loss: 0.9946344494819641,grad_norm: 0.9673299677868336, iteration: 225060
loss: 0.9432603120803833,grad_norm: 0.9999992897845004, iteration: 225061
loss: 1.0398743152618408,grad_norm: 0.9637246126636932, iteration: 225062
loss: 1.0347278118133545,grad_norm: 0.7545700143260763, iteration: 225063
loss: 1.0267362594604492,grad_norm: 0.999999091183839, iteration: 225064
loss: 1.0806552171707153,grad_norm: 0.8721207384238131, iteration: 225065
loss: 0.9790471196174622,grad_norm: 0.8971440335542068, iteration: 225066
loss: 0.9897500276565552,grad_norm: 0.6814274629166677, iteration: 225067
loss: 0.9810804724693298,grad_norm: 0.9999991797664646, iteration: 225068
loss: 0.9646009802818298,grad_norm: 0.8845219645982932, iteration: 225069
loss: 1.0210169553756714,grad_norm: 0.9937374275320388, iteration: 225070
loss: 1.0248045921325684,grad_norm: 0.7334684386823175, iteration: 225071
loss: 0.9801703691482544,grad_norm: 0.8879020088224787, iteration: 225072
loss: 0.9980716109275818,grad_norm: 0.9999989854895438, iteration: 225073
loss: 1.027899980545044,grad_norm: 0.7738437337622854, iteration: 225074
loss: 0.958903968334198,grad_norm: 0.9706695627433376, iteration: 225075
loss: 1.0238301753997803,grad_norm: 0.9435021977916339, iteration: 225076
loss: 0.9995047450065613,grad_norm: 0.746395856310632, iteration: 225077
loss: 1.0118969678878784,grad_norm: 0.9999990779774333, iteration: 225078
loss: 0.9892268776893616,grad_norm: 0.9641196475950464, iteration: 225079
loss: 0.9868329167366028,grad_norm: 0.8312335098275778, iteration: 225080
loss: 1.0172897577285767,grad_norm: 0.9999991343303936, iteration: 225081
loss: 1.0096889734268188,grad_norm: 0.8436975697832346, iteration: 225082
loss: 1.0306061506271362,grad_norm: 0.8990101113177917, iteration: 225083
loss: 1.0267667770385742,grad_norm: 0.985879946695251, iteration: 225084
loss: 0.9824514985084534,grad_norm: 0.7940524603681783, iteration: 225085
loss: 1.000846028327942,grad_norm: 0.810079365618676, iteration: 225086
loss: 0.9913370609283447,grad_norm: 0.8500921847106614, iteration: 225087
loss: 1.002620816230774,grad_norm: 0.999999051647116, iteration: 225088
loss: 1.0216578245162964,grad_norm: 0.9431483899780657, iteration: 225089
loss: 1.0222554206848145,grad_norm: 0.8786501670195378, iteration: 225090
loss: 1.022550344467163,grad_norm: 0.8613637993819441, iteration: 225091
loss: 0.9800149202346802,grad_norm: 0.9999990417547474, iteration: 225092
loss: 1.0076158046722412,grad_norm: 0.8023145189667067, iteration: 225093
loss: 0.9877777099609375,grad_norm: 0.970455405666394, iteration: 225094
loss: 1.0353440046310425,grad_norm: 0.8687760556515332, iteration: 225095
loss: 0.9831863641738892,grad_norm: 0.9909748548809078, iteration: 225096
loss: 0.9920051097869873,grad_norm: 0.9999990694025171, iteration: 225097
loss: 1.0018892288208008,grad_norm: 0.8121427408357185, iteration: 225098
loss: 0.9641746282577515,grad_norm: 0.9999990274991241, iteration: 225099
loss: 0.9983447790145874,grad_norm: 0.9788174933400846, iteration: 225100
loss: 1.0143121480941772,grad_norm: 0.9999991048715716, iteration: 225101
loss: 0.9640101194381714,grad_norm: 0.8050951930557588, iteration: 225102
loss: 0.9963230490684509,grad_norm: 0.922585561164897, iteration: 225103
loss: 1.0311918258666992,grad_norm: 0.9999997976577053, iteration: 225104
loss: 1.008600115776062,grad_norm: 0.8362291086298622, iteration: 225105
loss: 1.0041966438293457,grad_norm: 0.9999994666331143, iteration: 225106
loss: 1.0024213790893555,grad_norm: 0.8960839183858216, iteration: 225107
loss: 0.9813143610954285,grad_norm: 0.823976540696047, iteration: 225108
loss: 1.0329043865203857,grad_norm: 0.9999995249732417, iteration: 225109
loss: 0.9694271683692932,grad_norm: 0.9530150072837114, iteration: 225110
loss: 1.0421712398529053,grad_norm: 0.8050878852758853, iteration: 225111
loss: 1.0038679838180542,grad_norm: 0.9999990880081349, iteration: 225112
loss: 1.0026271343231201,grad_norm: 0.9035494516381253, iteration: 225113
loss: 1.0146024227142334,grad_norm: 0.9168067719028522, iteration: 225114
loss: 1.0308798551559448,grad_norm: 0.999999098176756, iteration: 225115
loss: 1.0098854303359985,grad_norm: 0.8443150283514904, iteration: 225116
loss: 0.9963504672050476,grad_norm: 0.9715908198710136, iteration: 225117
loss: 0.9499294757843018,grad_norm: 0.8933661507474151, iteration: 225118
loss: 1.0118581056594849,grad_norm: 0.8973194840503433, iteration: 225119
loss: 0.9929806590080261,grad_norm: 0.9048055935043062, iteration: 225120
loss: 1.0129376649856567,grad_norm: 0.9999992182104608, iteration: 225121
loss: 1.0041745901107788,grad_norm: 0.8635041888756317, iteration: 225122
loss: 1.0269538164138794,grad_norm: 0.9999991257496017, iteration: 225123
loss: 1.0018202066421509,grad_norm: 0.8447615999256487, iteration: 225124
loss: 1.0171176195144653,grad_norm: 0.9747629526690189, iteration: 225125
loss: 1.0227060317993164,grad_norm: 0.8284653615752262, iteration: 225126
loss: 1.0206977128982544,grad_norm: 0.9999992478594701, iteration: 225127
loss: 1.0018428564071655,grad_norm: 0.865772969488264, iteration: 225128
loss: 1.0081232786178589,grad_norm: 0.9999990912305216, iteration: 225129
loss: 0.9874869585037231,grad_norm: 0.9052559652481691, iteration: 225130
loss: 0.997569739818573,grad_norm: 0.8386081730331972, iteration: 225131
loss: 1.0165348052978516,grad_norm: 0.8922613115838066, iteration: 225132
loss: 0.9843053817749023,grad_norm: 0.8297992621185784, iteration: 225133
loss: 1.0184372663497925,grad_norm: 0.9999992410152289, iteration: 225134
loss: 1.039878010749817,grad_norm: 0.9999990386892372, iteration: 225135
loss: 0.9930376410484314,grad_norm: 0.8899260913076422, iteration: 225136
loss: 1.0114498138427734,grad_norm: 0.866574000552414, iteration: 225137
loss: 1.0168720483779907,grad_norm: 0.8986911140052212, iteration: 225138
loss: 1.0629162788391113,grad_norm: 0.9999991264284609, iteration: 225139
loss: 1.0211427211761475,grad_norm: 0.8528076575205741, iteration: 225140
loss: 0.9751468896865845,grad_norm: 0.9999990331194691, iteration: 225141
loss: 0.9957795739173889,grad_norm: 0.7657005352876742, iteration: 225142
loss: 1.0223872661590576,grad_norm: 0.9292375188742714, iteration: 225143
loss: 0.9842402338981628,grad_norm: 0.9034721074633918, iteration: 225144
loss: 0.9850912690162659,grad_norm: 0.8772124396394521, iteration: 225145
loss: 0.9965993762016296,grad_norm: 0.9172648946170524, iteration: 225146
loss: 0.9963541626930237,grad_norm: 0.8324707951376995, iteration: 225147
loss: 0.9664740562438965,grad_norm: 0.9239811639425201, iteration: 225148
loss: 0.9907870292663574,grad_norm: 0.9727920018050014, iteration: 225149
loss: 1.0035332441329956,grad_norm: 0.8520929128367326, iteration: 225150
loss: 0.9939557909965515,grad_norm: 0.9999991215506175, iteration: 225151
loss: 1.0137189626693726,grad_norm: 0.9999990074325751, iteration: 225152
loss: 1.0497288703918457,grad_norm: 0.9878636555611526, iteration: 225153
loss: 1.009146809577942,grad_norm: 0.9999993772861541, iteration: 225154
loss: 0.9573411345481873,grad_norm: 0.9203816776365779, iteration: 225155
loss: 1.001822829246521,grad_norm: 0.9999990651333139, iteration: 225156
loss: 0.9622769355773926,grad_norm: 0.9218984788531107, iteration: 225157
loss: 1.003116250038147,grad_norm: 0.9491974946425379, iteration: 225158
loss: 0.9852663278579712,grad_norm: 0.8588694558475732, iteration: 225159
loss: 1.0062271356582642,grad_norm: 0.8125263140099338, iteration: 225160
loss: 0.9895851016044617,grad_norm: 0.9999990118349891, iteration: 225161
loss: 1.0089459419250488,grad_norm: 0.8676446293882024, iteration: 225162
loss: 0.9832141399383545,grad_norm: 0.999999029426694, iteration: 225163
loss: 1.0012205839157104,grad_norm: 0.9999990207666736, iteration: 225164
loss: 1.001818299293518,grad_norm: 0.9999992006893857, iteration: 225165
loss: 0.9624974131584167,grad_norm: 0.9752748459109151, iteration: 225166
loss: 1.0320335626602173,grad_norm: 0.9256238110925396, iteration: 225167
loss: 1.000920057296753,grad_norm: 0.910702460580398, iteration: 225168
loss: 0.9864357709884644,grad_norm: 0.9999990497918706, iteration: 225169
loss: 1.0002888441085815,grad_norm: 0.8029169648184791, iteration: 225170
loss: 0.9834368824958801,grad_norm: 0.8601602917284124, iteration: 225171
loss: 0.9868646264076233,grad_norm: 0.9999991716021075, iteration: 225172
loss: 1.0153409242630005,grad_norm: 0.9006374630547087, iteration: 225173
loss: 0.9744076728820801,grad_norm: 0.7760445248180988, iteration: 225174
loss: 0.9793636202812195,grad_norm: 0.9788125423761095, iteration: 225175
loss: 1.0363188982009888,grad_norm: 0.9999990133999629, iteration: 225176
loss: 0.9771311283111572,grad_norm: 0.9999991785040554, iteration: 225177
loss: 0.9632571339607239,grad_norm: 0.8524188268682468, iteration: 225178
loss: 1.027366042137146,grad_norm: 0.8568068510837531, iteration: 225179
loss: 0.9762765765190125,grad_norm: 0.999999173151324, iteration: 225180
loss: 0.9759433269500732,grad_norm: 0.959748317105083, iteration: 225181
loss: 0.9744710326194763,grad_norm: 0.858868881422177, iteration: 225182
loss: 0.9808396697044373,grad_norm: 0.9999990457393831, iteration: 225183
loss: 1.0075838565826416,grad_norm: 0.9363673124602314, iteration: 225184
loss: 0.9971727728843689,grad_norm: 0.8890171728211419, iteration: 225185
loss: 0.9818220734596252,grad_norm: 0.8412304958860509, iteration: 225186
loss: 1.0249288082122803,grad_norm: 0.9366545836345448, iteration: 225187
loss: 1.0144846439361572,grad_norm: 0.9999990193116259, iteration: 225188
loss: 1.2160439491271973,grad_norm: 0.999999891212491, iteration: 225189
loss: 1.238519310951233,grad_norm: 0.9999995416739939, iteration: 225190
loss: 0.9721490144729614,grad_norm: 0.9359501939885062, iteration: 225191
loss: 1.0188246965408325,grad_norm: 0.8513749911223806, iteration: 225192
loss: 0.9875777959823608,grad_norm: 0.9713870516613955, iteration: 225193
loss: 1.0016398429870605,grad_norm: 0.9999990495318171, iteration: 225194
loss: 1.0472439527511597,grad_norm: 0.9579542321969442, iteration: 225195
loss: 1.0179414749145508,grad_norm: 0.9999991105301653, iteration: 225196
loss: 0.9851511120796204,grad_norm: 0.9813698936526141, iteration: 225197
loss: 0.9978762269020081,grad_norm: 0.7660936676114365, iteration: 225198
loss: 1.0194828510284424,grad_norm: 0.9999992034523231, iteration: 225199
loss: 0.993436872959137,grad_norm: 0.9999990436745975, iteration: 225200
loss: 0.9955670833587646,grad_norm: 0.9809257505537778, iteration: 225201
loss: 1.0606025457382202,grad_norm: 0.9999992179881166, iteration: 225202
loss: 0.9789754152297974,grad_norm: 0.9999990522726924, iteration: 225203
loss: 1.0175825357437134,grad_norm: 0.9999990221099117, iteration: 225204
loss: 0.9989140033721924,grad_norm: 0.7734607913731093, iteration: 225205
loss: 1.0156054496765137,grad_norm: 0.9999992345674051, iteration: 225206
loss: 0.9953444600105286,grad_norm: 0.9602738267544718, iteration: 225207
loss: 1.0091339349746704,grad_norm: 0.846705857212316, iteration: 225208
loss: 0.9917910695075989,grad_norm: 0.9759511315727022, iteration: 225209
loss: 1.0705054998397827,grad_norm: 0.9999995306841406, iteration: 225210
loss: 1.0459486246109009,grad_norm: 0.9814418857982958, iteration: 225211
loss: 1.0068416595458984,grad_norm: 0.8040588075992751, iteration: 225212
loss: 0.9914513826370239,grad_norm: 0.9661139610884428, iteration: 225213
loss: 0.970309853553772,grad_norm: 0.8739635425008409, iteration: 225214
loss: 1.0068622827529907,grad_norm: 0.9740366284213339, iteration: 225215
loss: 0.9463250637054443,grad_norm: 0.7551939134604633, iteration: 225216
loss: 0.9881559014320374,grad_norm: 0.8418400485180717, iteration: 225217
loss: 1.0248934030532837,grad_norm: 0.9503917672583915, iteration: 225218
loss: 1.0044490098953247,grad_norm: 0.8515892876374482, iteration: 225219
loss: 1.0021356344223022,grad_norm: 0.8036440677204504, iteration: 225220
loss: 0.988914966583252,grad_norm: 0.8974268869684999, iteration: 225221
loss: 0.9940427541732788,grad_norm: 0.9387353389123585, iteration: 225222
loss: 1.021716594696045,grad_norm: 0.975745957824651, iteration: 225223
loss: 0.9591470956802368,grad_norm: 0.8769015971056063, iteration: 225224
loss: 1.0191575288772583,grad_norm: 0.9999991510569485, iteration: 225225
loss: 0.9719047546386719,grad_norm: 0.8772061430162578, iteration: 225226
loss: 0.9875081777572632,grad_norm: 0.7823619968647568, iteration: 225227
loss: 1.0097308158874512,grad_norm: 0.9096322911268135, iteration: 225228
loss: 0.9938968420028687,grad_norm: 0.8143702655923323, iteration: 225229
loss: 1.0192759037017822,grad_norm: 0.9843004021299117, iteration: 225230
loss: 1.0163527727127075,grad_norm: 0.8136431844243284, iteration: 225231
loss: 0.984067976474762,grad_norm: 0.9943499343385798, iteration: 225232
loss: 1.0016257762908936,grad_norm: 0.7887388954836456, iteration: 225233
loss: 0.9897530674934387,grad_norm: 0.7527354765278099, iteration: 225234
loss: 1.025039792060852,grad_norm: 0.8216136081808763, iteration: 225235
loss: 0.9858443140983582,grad_norm: 0.8606782745740503, iteration: 225236
loss: 0.9879258871078491,grad_norm: 0.8152796928623856, iteration: 225237
loss: 1.0336990356445312,grad_norm: 0.8611205664953162, iteration: 225238
loss: 0.9993128776550293,grad_norm: 0.9098451734444138, iteration: 225239
loss: 0.9818379878997803,grad_norm: 0.9542304842941973, iteration: 225240
loss: 1.0033304691314697,grad_norm: 0.9732830946290947, iteration: 225241
loss: 0.9699054956436157,grad_norm: 0.9883514719693238, iteration: 225242
loss: 1.181039810180664,grad_norm: 0.9999990971775715, iteration: 225243
loss: 0.9922506809234619,grad_norm: 0.9999999982078954, iteration: 225244
loss: 1.0285162925720215,grad_norm: 0.9878096437706205, iteration: 225245
loss: 1.0196924209594727,grad_norm: 0.8780656384410378, iteration: 225246
loss: 1.1012521982192993,grad_norm: 0.9999997861928164, iteration: 225247
loss: 1.0022565126419067,grad_norm: 0.9999992910659821, iteration: 225248
loss: 1.007665991783142,grad_norm: 0.9999991138509029, iteration: 225249
loss: 1.0013126134872437,grad_norm: 0.8174964701196804, iteration: 225250
loss: 1.0050196647644043,grad_norm: 0.9487508468070954, iteration: 225251
loss: 0.9599710702896118,grad_norm: 0.9058106782889045, iteration: 225252
loss: 1.0027400255203247,grad_norm: 0.8351964470729735, iteration: 225253
loss: 0.9790046215057373,grad_norm: 0.8979300515617233, iteration: 225254
loss: 1.028496503829956,grad_norm: 0.9744786335677577, iteration: 225255
loss: 0.947601854801178,grad_norm: 0.8971194909931508, iteration: 225256
loss: 0.9925087690353394,grad_norm: 0.9999990004195718, iteration: 225257
loss: 1.0003607273101807,grad_norm: 0.9397014018176841, iteration: 225258
loss: 1.0031862258911133,grad_norm: 0.9542573646639303, iteration: 225259
loss: 1.0311049222946167,grad_norm: 0.7827992912617738, iteration: 225260
loss: 0.9714287519454956,grad_norm: 0.9999989174845039, iteration: 225261
loss: 1.0184171199798584,grad_norm: 0.999999539329441, iteration: 225262
loss: 0.9812109470367432,grad_norm: 0.9457120170696693, iteration: 225263
loss: 0.9695719480514526,grad_norm: 0.859686296803776, iteration: 225264
loss: 1.049165964126587,grad_norm: 0.9999994521943801, iteration: 225265
loss: 1.0538995265960693,grad_norm: 0.999999128779839, iteration: 225266
loss: 1.0164836645126343,grad_norm: 0.9999993354485985, iteration: 225267
loss: 0.9863113164901733,grad_norm: 0.9999991864091643, iteration: 225268
loss: 1.0150892734527588,grad_norm: 0.7878982325650599, iteration: 225269
loss: 1.1002838611602783,grad_norm: 0.9999993379228638, iteration: 225270
loss: 0.9970327019691467,grad_norm: 0.9478579354093961, iteration: 225271
loss: 1.0299569368362427,grad_norm: 0.9999993751507241, iteration: 225272
loss: 1.0026408433914185,grad_norm: 0.862180445469574, iteration: 225273
loss: 1.01505446434021,grad_norm: 0.9999991225742878, iteration: 225274
loss: 0.9944219589233398,grad_norm: 0.8630889634764806, iteration: 225275
loss: 0.9944900274276733,grad_norm: 0.9393331446234724, iteration: 225276
loss: 0.9739648699760437,grad_norm: 0.8974550752181937, iteration: 225277
loss: 0.9630026817321777,grad_norm: 0.9180523305288468, iteration: 225278
loss: 1.0562324523925781,grad_norm: 0.9999996179082894, iteration: 225279
loss: 1.0002797842025757,grad_norm: 0.8292782271532099, iteration: 225280
loss: 0.9791146516799927,grad_norm: 0.9999994031493561, iteration: 225281
loss: 1.0327814817428589,grad_norm: 0.9425233513203402, iteration: 225282
loss: 0.9910958409309387,grad_norm: 0.9539580422551902, iteration: 225283
loss: 1.0203511714935303,grad_norm: 0.8973693862366735, iteration: 225284
loss: 1.0174907445907593,grad_norm: 0.8455536560802468, iteration: 225285
loss: 0.9894121885299683,grad_norm: 0.849266671501239, iteration: 225286
loss: 0.9855705499649048,grad_norm: 0.9430209914659353, iteration: 225287
loss: 0.9808365106582642,grad_norm: 0.8700482326941636, iteration: 225288
loss: 1.0286929607391357,grad_norm: 0.9387437896255059, iteration: 225289
loss: 0.9968835115432739,grad_norm: 0.8542171617581696, iteration: 225290
loss: 0.9871611595153809,grad_norm: 0.8321628959933928, iteration: 225291
loss: 0.9695708155632019,grad_norm: 0.996706955840028, iteration: 225292
loss: 1.0376050472259521,grad_norm: 0.9999996024805591, iteration: 225293
loss: 1.0034053325653076,grad_norm: 0.9695706504358116, iteration: 225294
loss: 0.9717832803726196,grad_norm: 0.8341210400632947, iteration: 225295
loss: 0.9932812452316284,grad_norm: 0.931909531579739, iteration: 225296
loss: 1.0317931175231934,grad_norm: 0.8783622344309484, iteration: 225297
loss: 1.0153758525848389,grad_norm: 0.9999998643715295, iteration: 225298
loss: 0.9710438847541809,grad_norm: 0.9999990138862634, iteration: 225299
loss: 1.0080355405807495,grad_norm: 0.9999991689684797, iteration: 225300
loss: 0.9746755957603455,grad_norm: 0.8779114577041764, iteration: 225301
loss: 0.9802846908569336,grad_norm: 0.9675158454421015, iteration: 225302
loss: 0.9622734785079956,grad_norm: 0.8119594721352296, iteration: 225303
loss: 1.0009034872055054,grad_norm: 0.9999993015168033, iteration: 225304
loss: 1.0176037549972534,grad_norm: 0.9999990838580396, iteration: 225305
loss: 0.9927247762680054,grad_norm: 0.8377272565017261, iteration: 225306
loss: 1.0414379835128784,grad_norm: 0.8973645141091585, iteration: 225307
loss: 1.0009955167770386,grad_norm: 0.9606678955185326, iteration: 225308
loss: 1.004234790802002,grad_norm: 0.8446682787582439, iteration: 225309
loss: 0.9931408166885376,grad_norm: 0.8251657205075086, iteration: 225310
loss: 1.0203365087509155,grad_norm: 0.9999991139835469, iteration: 225311
loss: 1.000356912612915,grad_norm: 0.9999991487437647, iteration: 225312
loss: 0.9859420657157898,grad_norm: 0.9999992265102332, iteration: 225313
loss: 0.9430072903633118,grad_norm: 0.9244289548274799, iteration: 225314
loss: 0.9954635500907898,grad_norm: 0.8814750122817709, iteration: 225315
loss: 0.961907148361206,grad_norm: 0.9999990460594608, iteration: 225316
loss: 1.005026936531067,grad_norm: 0.9674340194104231, iteration: 225317
loss: 0.9703784584999084,grad_norm: 0.854631423684579, iteration: 225318
loss: 0.9979262948036194,grad_norm: 0.9999991576423999, iteration: 225319
loss: 1.0380637645721436,grad_norm: 0.8220006975913955, iteration: 225320
loss: 1.0880659818649292,grad_norm: 0.9999991936001108, iteration: 225321
loss: 1.0182254314422607,grad_norm: 0.9999991245896989, iteration: 225322
loss: 1.0396651029586792,grad_norm: 0.9313730675605731, iteration: 225323
loss: 0.9715262651443481,grad_norm: 0.9999990962533593, iteration: 225324
loss: 1.0002261400222778,grad_norm: 0.9999990488542901, iteration: 225325
loss: 1.0400419235229492,grad_norm: 0.999999458773901, iteration: 225326
loss: 0.9930475950241089,grad_norm: 0.9025709250428989, iteration: 225327
loss: 0.9851890206336975,grad_norm: 0.9762518503473112, iteration: 225328
loss: 1.0014281272888184,grad_norm: 0.9493165944442385, iteration: 225329
loss: 1.0203837156295776,grad_norm: 0.9055196429992776, iteration: 225330
loss: 0.9671074151992798,grad_norm: 0.999999067431595, iteration: 225331
loss: 1.0100547075271606,grad_norm: 0.9999992713242882, iteration: 225332
loss: 0.9770561456680298,grad_norm: 0.999999203634544, iteration: 225333
loss: 1.026816725730896,grad_norm: 0.7958118912721538, iteration: 225334
loss: 0.9740680456161499,grad_norm: 0.8971941230430379, iteration: 225335
loss: 0.9883831143379211,grad_norm: 0.9999989765011554, iteration: 225336
loss: 1.0128751993179321,grad_norm: 0.8485541204355295, iteration: 225337
loss: 1.0135667324066162,grad_norm: 0.9346717321658189, iteration: 225338
loss: 1.015698790550232,grad_norm: 0.9999991010300364, iteration: 225339
loss: 0.9931932091712952,grad_norm: 0.9999990304780049, iteration: 225340
loss: 1.0173258781433105,grad_norm: 0.7719762658984041, iteration: 225341
loss: 1.0154767036437988,grad_norm: 0.8060227490061289, iteration: 225342
loss: 1.0328541994094849,grad_norm: 0.8319023899927057, iteration: 225343
loss: 1.0136750936508179,grad_norm: 0.7971032464048087, iteration: 225344
loss: 0.986096978187561,grad_norm: 0.962529861469632, iteration: 225345
loss: 1.0185375213623047,grad_norm: 0.9812747663522204, iteration: 225346
loss: 0.9944219589233398,grad_norm: 0.9397910387730236, iteration: 225347
loss: 1.0087918043136597,grad_norm: 0.8879443861097721, iteration: 225348
loss: 1.0150237083435059,grad_norm: 0.9999991421226504, iteration: 225349
loss: 0.998563289642334,grad_norm: 0.8783884129420351, iteration: 225350
loss: 1.0006499290466309,grad_norm: 0.8745013084267245, iteration: 225351
loss: 0.966000497341156,grad_norm: 0.9517586337154462, iteration: 225352
loss: 1.0128991603851318,grad_norm: 0.7629829567824169, iteration: 225353
loss: 1.0106877088546753,grad_norm: 0.9999991461757908, iteration: 225354
loss: 0.9954032301902771,grad_norm: 0.8416387169116145, iteration: 225355
loss: 1.0288063287734985,grad_norm: 0.9999994783131183, iteration: 225356
loss: 1.1060361862182617,grad_norm: 0.9999994865056521, iteration: 225357
loss: 1.0046648979187012,grad_norm: 0.8533135545936177, iteration: 225358
loss: 1.0355734825134277,grad_norm: 0.8356162858797235, iteration: 225359
loss: 0.9986066222190857,grad_norm: 0.9999991975352777, iteration: 225360
loss: 0.9960722327232361,grad_norm: 0.9105884054374518, iteration: 225361
loss: 0.9912045001983643,grad_norm: 0.9999991238537059, iteration: 225362
loss: 1.0238796472549438,grad_norm: 0.9242811376381951, iteration: 225363
loss: 0.9730337262153625,grad_norm: 0.9112562238146781, iteration: 225364
loss: 1.0488249063491821,grad_norm: 0.776803108414739, iteration: 225365
loss: 1.0383718013763428,grad_norm: 0.9999991215371442, iteration: 225366
loss: 0.9597986936569214,grad_norm: 0.8064065265436637, iteration: 225367
loss: 1.018852949142456,grad_norm: 0.8958786411971458, iteration: 225368
loss: 1.0181472301483154,grad_norm: 0.9714182764663686, iteration: 225369
loss: 1.0221796035766602,grad_norm: 0.8930712924938172, iteration: 225370
loss: 0.9888010025024414,grad_norm: 0.9455452618025381, iteration: 225371
loss: 0.9806033968925476,grad_norm: 0.9249750064993253, iteration: 225372
loss: 1.0062452554702759,grad_norm: 0.9999990599848787, iteration: 225373
loss: 1.0168596506118774,grad_norm: 0.7985115336734383, iteration: 225374
loss: 1.0134813785552979,grad_norm: 0.7400461281643417, iteration: 225375
loss: 0.9994499683380127,grad_norm: 0.7768195497850431, iteration: 225376
loss: 0.9602277278900146,grad_norm: 0.9254884628667167, iteration: 225377
loss: 0.9802542924880981,grad_norm: 0.8064166393077155, iteration: 225378
loss: 0.9691972732543945,grad_norm: 0.9999990856286893, iteration: 225379
loss: 0.986906111240387,grad_norm: 0.9396761069621407, iteration: 225380
loss: 1.0129625797271729,grad_norm: 0.9999992595294208, iteration: 225381
loss: 1.0093135833740234,grad_norm: 0.6998617775117255, iteration: 225382
loss: 1.0118910074234009,grad_norm: 0.8840182584184465, iteration: 225383
loss: 1.0217082500457764,grad_norm: 0.9999989892671761, iteration: 225384
loss: 1.0106124877929688,grad_norm: 0.9202087078956753, iteration: 225385
loss: 1.001886010169983,grad_norm: 0.9999990630282046, iteration: 225386
loss: 0.9950836896896362,grad_norm: 0.9999990935186347, iteration: 225387
loss: 0.9831598997116089,grad_norm: 0.9237484771066035, iteration: 225388
loss: 1.0087308883666992,grad_norm: 0.9999994003143182, iteration: 225389
loss: 0.9533275365829468,grad_norm: 0.999999181406242, iteration: 225390
loss: 1.0115818977355957,grad_norm: 0.7556119082128196, iteration: 225391
loss: 0.9617975950241089,grad_norm: 0.8142345598208919, iteration: 225392
loss: 0.9936915040016174,grad_norm: 0.8243072217326848, iteration: 225393
loss: 1.0281566381454468,grad_norm: 0.911755612040489, iteration: 225394
loss: 0.98598313331604,grad_norm: 0.9999990950617423, iteration: 225395
loss: 1.0013607740402222,grad_norm: 0.8078763766248716, iteration: 225396
loss: 1.0176500082015991,grad_norm: 0.9999990724845338, iteration: 225397
loss: 1.0183298587799072,grad_norm: 0.9935510178051438, iteration: 225398
loss: 0.9990447163581848,grad_norm: 0.9999989101678398, iteration: 225399
loss: 0.9822598099708557,grad_norm: 0.813197435146731, iteration: 225400
loss: 0.9876670837402344,grad_norm: 0.9625129019729043, iteration: 225401
loss: 1.0195791721343994,grad_norm: 0.9537247577143942, iteration: 225402
loss: 1.006453514099121,grad_norm: 0.999999316404609, iteration: 225403
loss: 0.9969652891159058,grad_norm: 0.9999989923465815, iteration: 225404
loss: 0.9967251420021057,grad_norm: 0.9688500739826075, iteration: 225405
loss: 0.9946369528770447,grad_norm: 0.9999991387996915, iteration: 225406
loss: 1.025360107421875,grad_norm: 0.8054586787461355, iteration: 225407
loss: 1.0109119415283203,grad_norm: 0.9903037475431473, iteration: 225408
loss: 0.9970296025276184,grad_norm: 0.9999991997956494, iteration: 225409
loss: 0.9888286590576172,grad_norm: 0.8169700166933094, iteration: 225410
loss: 0.9995586276054382,grad_norm: 0.9004860070985541, iteration: 225411
loss: 0.9978243708610535,grad_norm: 0.9308894359028633, iteration: 225412
loss: 0.9606501460075378,grad_norm: 0.8118311744752156, iteration: 225413
loss: 0.996050238609314,grad_norm: 0.8350596495829873, iteration: 225414
loss: 1.1945239305496216,grad_norm: 0.9999995597662529, iteration: 225415
loss: 0.9984341859817505,grad_norm: 0.9829205284184611, iteration: 225416
loss: 0.9966564178466797,grad_norm: 0.8210838195801712, iteration: 225417
loss: 1.0004446506500244,grad_norm: 0.854799915190156, iteration: 225418
loss: 1.0204086303710938,grad_norm: 0.9930001322843621, iteration: 225419
loss: 0.9811491370201111,grad_norm: 0.9999992263891528, iteration: 225420
loss: 1.0139479637145996,grad_norm: 0.8305115076868012, iteration: 225421
loss: 0.9754867553710938,grad_norm: 0.8877051799561004, iteration: 225422
loss: 0.9734851121902466,grad_norm: 0.9999991216354782, iteration: 225423
loss: 0.9960227608680725,grad_norm: 0.8773620183475354, iteration: 225424
loss: 1.0301355123519897,grad_norm: 0.7967492965420124, iteration: 225425
loss: 1.0343972444534302,grad_norm: 0.9170417034315708, iteration: 225426
loss: 1.0017273426055908,grad_norm: 0.9999991332380481, iteration: 225427
loss: 1.0075387954711914,grad_norm: 0.9999992390448115, iteration: 225428
loss: 0.9684840440750122,grad_norm: 0.9999990375533383, iteration: 225429
loss: 0.9507297277450562,grad_norm: 0.929362823594142, iteration: 225430
loss: 1.0093390941619873,grad_norm: 0.9235873981238928, iteration: 225431
loss: 0.9717420935630798,grad_norm: 0.9495136519565549, iteration: 225432
loss: 0.9684040546417236,grad_norm: 0.9999989093441313, iteration: 225433
loss: 1.0523152351379395,grad_norm: 0.8688967067987359, iteration: 225434
loss: 0.9722517132759094,grad_norm: 0.9999990408820713, iteration: 225435
loss: 1.0003132820129395,grad_norm: 0.875826951593786, iteration: 225436
loss: 0.9875536561012268,grad_norm: 0.9999991732283722, iteration: 225437
loss: 1.0210798978805542,grad_norm: 0.9999990856497342, iteration: 225438
loss: 1.021545648574829,grad_norm: 0.9605963669659789, iteration: 225439
loss: 1.0179121494293213,grad_norm: 0.9999991981807898, iteration: 225440
loss: 0.9997810125350952,grad_norm: 0.9999995575762449, iteration: 225441
loss: 0.9930394887924194,grad_norm: 0.9999991650449651, iteration: 225442
loss: 1.0037304162979126,grad_norm: 0.9438904468214061, iteration: 225443
loss: 1.1207472085952759,grad_norm: 0.9999999167935618, iteration: 225444
loss: 0.994562029838562,grad_norm: 0.8645039376070232, iteration: 225445
loss: 1.0080714225769043,grad_norm: 0.7938884454371979, iteration: 225446
loss: 0.9863430261611938,grad_norm: 0.9976433091401751, iteration: 225447
loss: 0.9954818487167358,grad_norm: 0.9999991363611381, iteration: 225448
loss: 0.9902337193489075,grad_norm: 0.9999991324482393, iteration: 225449
loss: 1.0112005472183228,grad_norm: 0.8292202300275721, iteration: 225450
loss: 0.9982737302780151,grad_norm: 0.935834723254228, iteration: 225451
loss: 0.9874835014343262,grad_norm: 0.8496349844596065, iteration: 225452
loss: 0.9483857750892639,grad_norm: 0.975224983217148, iteration: 225453
loss: 1.0060698986053467,grad_norm: 0.9277094621683658, iteration: 225454
loss: 0.9921465516090393,grad_norm: 0.9999991167522553, iteration: 225455
loss: 1.0388445854187012,grad_norm: 0.8660011681449398, iteration: 225456
loss: 1.0254619121551514,grad_norm: 0.9741126131929686, iteration: 225457
loss: 0.9869722723960876,grad_norm: 0.9999990607464052, iteration: 225458
loss: 1.0106005668640137,grad_norm: 0.8883209992028924, iteration: 225459
loss: 0.9861636757850647,grad_norm: 0.7937695147371767, iteration: 225460
loss: 0.9805148839950562,grad_norm: 0.8588381549667835, iteration: 225461
loss: 0.9908179044723511,grad_norm: 0.9309618122678699, iteration: 225462
loss: 0.9890321493148804,grad_norm: 0.8928484598121105, iteration: 225463
loss: 1.0160168409347534,grad_norm: 0.9384563240396242, iteration: 225464
loss: 1.0088850259780884,grad_norm: 0.9999991204478882, iteration: 225465
loss: 1.0011398792266846,grad_norm: 0.874486830595488, iteration: 225466
loss: 1.1118180751800537,grad_norm: 0.9999993064519774, iteration: 225467
loss: 0.976842999458313,grad_norm: 0.8082242351866034, iteration: 225468
loss: 1.0045247077941895,grad_norm: 0.9587800628720191, iteration: 225469
loss: 1.0294033288955688,grad_norm: 0.9125888154466738, iteration: 225470
loss: 0.9968360066413879,grad_norm: 0.9999991038948669, iteration: 225471
loss: 0.9920693635940552,grad_norm: 0.9844746871405314, iteration: 225472
loss: 0.9613081216812134,grad_norm: 0.88904320038896, iteration: 225473
loss: 1.0353726148605347,grad_norm: 0.9389359521634021, iteration: 225474
loss: 0.9792919754981995,grad_norm: 0.9999990699874352, iteration: 225475
loss: 1.0037118196487427,grad_norm: 0.7854859273316976, iteration: 225476
loss: 0.9942113757133484,grad_norm: 0.8647747218258768, iteration: 225477
loss: 1.0445191860198975,grad_norm: 0.977781197124924, iteration: 225478
loss: 0.9781975150108337,grad_norm: 0.8809916225928635, iteration: 225479
loss: 0.987368106842041,grad_norm: 0.9999989723481342, iteration: 225480
loss: 0.9822250008583069,grad_norm: 0.8632334611855236, iteration: 225481
loss: 0.9932045340538025,grad_norm: 0.9999988727445053, iteration: 225482
loss: 1.0422337055206299,grad_norm: 0.9999992265453155, iteration: 225483
loss: 1.0996651649475098,grad_norm: 0.9999991656951581, iteration: 225484
loss: 1.0187792778015137,grad_norm: 0.8572266744342713, iteration: 225485
loss: 0.9862440824508667,grad_norm: 0.7667612036344031, iteration: 225486
loss: 1.03443443775177,grad_norm: 0.9999999540690059, iteration: 225487
loss: 0.9781854748725891,grad_norm: 0.879792274884529, iteration: 225488
loss: 0.9767942428588867,grad_norm: 0.9999993550191574, iteration: 225489
loss: 1.027323842048645,grad_norm: 0.8988571181106999, iteration: 225490
loss: 0.9960730075836182,grad_norm: 0.9158279087557462, iteration: 225491
loss: 1.0360791683197021,grad_norm: 0.9145534204839364, iteration: 225492
loss: 1.0675832033157349,grad_norm: 0.9999995617951122, iteration: 225493
loss: 1.0021376609802246,grad_norm: 0.9477903636696434, iteration: 225494
loss: 0.9850866794586182,grad_norm: 0.9999991002661847, iteration: 225495
loss: 0.995542049407959,grad_norm: 0.9689770975254478, iteration: 225496
loss: 1.0653667449951172,grad_norm: 0.9999995856565241, iteration: 225497
loss: 1.1301074028015137,grad_norm: 0.9999992669273052, iteration: 225498
loss: 1.197455883026123,grad_norm: 0.8792589354153991, iteration: 225499
loss: 1.0013870000839233,grad_norm: 0.9920807617147893, iteration: 225500
loss: 1.037713885307312,grad_norm: 0.9999992327990441, iteration: 225501
loss: 1.0345017910003662,grad_norm: 0.9999994101022935, iteration: 225502
loss: 1.0005905628204346,grad_norm: 0.8936481975194598, iteration: 225503
loss: 1.0244085788726807,grad_norm: 0.9118448588830513, iteration: 225504
loss: 0.9816721677780151,grad_norm: 0.9780514148311131, iteration: 225505
loss: 1.028106689453125,grad_norm: 0.8143573611372136, iteration: 225506
loss: 1.0075714588165283,grad_norm: 0.6529412213616166, iteration: 225507
loss: 1.040502667427063,grad_norm: 0.9999990472038375, iteration: 225508
loss: 1.0065138339996338,grad_norm: 0.8521090798418826, iteration: 225509
loss: 0.9797523021697998,grad_norm: 0.7971117083364601, iteration: 225510
loss: 0.9877511262893677,grad_norm: 0.9999989249568408, iteration: 225511
loss: 1.004088282585144,grad_norm: 0.8613028215555695, iteration: 225512
loss: 0.9722882509231567,grad_norm: 0.9223212508129078, iteration: 225513
loss: 0.9784998893737793,grad_norm: 0.9999993063182813, iteration: 225514
loss: 1.0030597448349,grad_norm: 0.9366925283862231, iteration: 225515
loss: 0.9963764548301697,grad_norm: 0.7418160480231734, iteration: 225516
loss: 1.0456267595291138,grad_norm: 0.8177411627217087, iteration: 225517
loss: 1.1315844058990479,grad_norm: 0.9999991551679448, iteration: 225518
loss: 1.0005549192428589,grad_norm: 0.9507220304331663, iteration: 225519
loss: 0.9915707111358643,grad_norm: 0.8612783902123966, iteration: 225520
loss: 0.997609555721283,grad_norm: 0.9999991389061158, iteration: 225521
loss: 1.0167981386184692,grad_norm: 0.8579120136399653, iteration: 225522
loss: 1.0126758813858032,grad_norm: 0.9999990837307079, iteration: 225523
loss: 1.029715657234192,grad_norm: 0.9999990666861879, iteration: 225524
loss: 0.9978731870651245,grad_norm: 0.8947722007807338, iteration: 225525
loss: 0.9968630075454712,grad_norm: 0.9186412861116701, iteration: 225526
loss: 0.9903488159179688,grad_norm: 0.9999991344346075, iteration: 225527
loss: 0.99952632188797,grad_norm: 0.9999995224974862, iteration: 225528
loss: 1.003118872642517,grad_norm: 0.7630831249216907, iteration: 225529
loss: 0.9863321185112,grad_norm: 0.8275840039538752, iteration: 225530
loss: 1.0308889150619507,grad_norm: 0.999999440685006, iteration: 225531
loss: 1.0039019584655762,grad_norm: 0.9999990255167304, iteration: 225532
loss: 0.9964414834976196,grad_norm: 0.8941407667966127, iteration: 225533
loss: 1.043835163116455,grad_norm: 0.9999992177854231, iteration: 225534
loss: 1.023057222366333,grad_norm: 0.9999991125006141, iteration: 225535
loss: 1.0251742601394653,grad_norm: 0.8564647367234622, iteration: 225536
loss: 0.9999511241912842,grad_norm: 0.9999990297477245, iteration: 225537
loss: 1.0815329551696777,grad_norm: 0.999999112498244, iteration: 225538
loss: 1.0066064596176147,grad_norm: 0.7512365678803549, iteration: 225539
loss: 1.0186415910720825,grad_norm: 0.9999991772301684, iteration: 225540
loss: 1.0454374551773071,grad_norm: 0.9999990865532499, iteration: 225541
loss: 0.9897337555885315,grad_norm: 0.9999990762410527, iteration: 225542
loss: 0.9941205382347107,grad_norm: 0.9999990725506637, iteration: 225543
loss: 0.96978360414505,grad_norm: 0.999999135446347, iteration: 225544
loss: 1.0128897428512573,grad_norm: 0.9999991661970326, iteration: 225545
loss: 1.1073460578918457,grad_norm: 0.999999041239195, iteration: 225546
loss: 0.9793417453765869,grad_norm: 0.7716765615609064, iteration: 225547
loss: 0.9876365661621094,grad_norm: 0.8986346446161488, iteration: 225548
loss: 0.9913245439529419,grad_norm: 0.8960473494647917, iteration: 225549
loss: 0.9644598960876465,grad_norm: 0.8305266746879637, iteration: 225550
loss: 0.9762773513793945,grad_norm: 0.9999990973787051, iteration: 225551
loss: 0.9685373902320862,grad_norm: 0.9730377689656926, iteration: 225552
loss: 1.0238105058670044,grad_norm: 0.8734762547240758, iteration: 225553
loss: 0.9969503879547119,grad_norm: 0.9999990765245297, iteration: 225554
loss: 1.023611068725586,grad_norm: 0.9957746007392231, iteration: 225555
loss: 1.0032283067703247,grad_norm: 0.9250677646073165, iteration: 225556
loss: 1.0118439197540283,grad_norm: 0.8914682722746483, iteration: 225557
loss: 0.9336630702018738,grad_norm: 0.874664615964291, iteration: 225558
loss: 0.9561115503311157,grad_norm: 0.960893455311648, iteration: 225559
loss: 1.1068953275680542,grad_norm: 0.9999990003977144, iteration: 225560
loss: 0.9790878891944885,grad_norm: 0.9999991816061633, iteration: 225561
loss: 0.9949846863746643,grad_norm: 0.8706747260833436, iteration: 225562
loss: 0.9778843522071838,grad_norm: 0.9979373552821845, iteration: 225563
loss: 1.0198371410369873,grad_norm: 0.8242697194075396, iteration: 225564
loss: 1.0152894258499146,grad_norm: 0.9683526100818602, iteration: 225565
loss: 1.0300291776657104,grad_norm: 0.9269936035411186, iteration: 225566
loss: 1.0316802263259888,grad_norm: 0.8684332998390045, iteration: 225567
loss: 1.0217567682266235,grad_norm: 0.8699310164541041, iteration: 225568
loss: 0.9762385487556458,grad_norm: 0.8853429713001824, iteration: 225569
loss: 1.029941201210022,grad_norm: 0.8166136727541715, iteration: 225570
loss: 0.973059892654419,grad_norm: 0.984121727271474, iteration: 225571
loss: 0.9952777028083801,grad_norm: 0.9999990998567859, iteration: 225572
loss: 0.9932578802108765,grad_norm: 0.8830262598265555, iteration: 225573
loss: 1.0194666385650635,grad_norm: 0.9347343602385345, iteration: 225574
loss: 1.032225489616394,grad_norm: 0.9999991790021894, iteration: 225575
loss: 0.9645373821258545,grad_norm: 0.9636996087074854, iteration: 225576
loss: 1.0070286989212036,grad_norm: 0.8912334241708966, iteration: 225577
loss: 0.9921255111694336,grad_norm: 0.8560203545243181, iteration: 225578
loss: 0.961300790309906,grad_norm: 0.8512255748940378, iteration: 225579
loss: 1.020853042602539,grad_norm: 0.9999994924402249, iteration: 225580
loss: 1.0504223108291626,grad_norm: 0.9999996031947282, iteration: 225581
loss: 0.9803558588027954,grad_norm: 0.8799059801014879, iteration: 225582
loss: 0.9996952414512634,grad_norm: 0.937604098870438, iteration: 225583
loss: 1.0302108526229858,grad_norm: 0.9061721469268679, iteration: 225584
loss: 0.9839558601379395,grad_norm: 0.9999990499747042, iteration: 225585
loss: 0.9780169725418091,grad_norm: 0.8961395724289817, iteration: 225586
loss: 1.0236427783966064,grad_norm: 0.7892777997898248, iteration: 225587
loss: 1.0041862726211548,grad_norm: 0.9999994448780157, iteration: 225588
loss: 0.9911890029907227,grad_norm: 0.9378336647367581, iteration: 225589
loss: 1.0279948711395264,grad_norm: 0.8855972706323247, iteration: 225590
loss: 1.014100193977356,grad_norm: 0.979447048412454, iteration: 225591
loss: 1.0085939168930054,grad_norm: 0.9579469937229392, iteration: 225592
loss: 0.9841513633728027,grad_norm: 0.9304915993057695, iteration: 225593
loss: 0.9598283767700195,grad_norm: 0.8809434103009547, iteration: 225594
loss: 1.0465596914291382,grad_norm: 0.9999991195441468, iteration: 225595
loss: 0.9953960180282593,grad_norm: 0.999999936834808, iteration: 225596
loss: 1.0109004974365234,grad_norm: 0.9999990967900932, iteration: 225597
loss: 1.0242221355438232,grad_norm: 0.9423303763943874, iteration: 225598
loss: 1.010834813117981,grad_norm: 0.842135485399685, iteration: 225599
loss: 1.002252459526062,grad_norm: 0.8905225062731131, iteration: 225600
loss: 1.0558478832244873,grad_norm: 0.9999990701667243, iteration: 225601
loss: 0.9833816289901733,grad_norm: 0.9976899370106502, iteration: 225602
loss: 0.9739835262298584,grad_norm: 0.9448612752059544, iteration: 225603
loss: 0.9689394235610962,grad_norm: 0.8772852389079621, iteration: 225604
loss: 1.0261059999465942,grad_norm: 0.9999996047719888, iteration: 225605
loss: 1.0113506317138672,grad_norm: 0.775492535248578, iteration: 225606
loss: 0.9946092367172241,grad_norm: 0.9712907778390054, iteration: 225607
loss: 0.9724853038787842,grad_norm: 0.801353318781691, iteration: 225608
loss: 1.0609959363937378,grad_norm: 0.9432861404025901, iteration: 225609
loss: 1.000744104385376,grad_norm: 0.9214340046096021, iteration: 225610
loss: 1.0326913595199585,grad_norm: 0.970480278838564, iteration: 225611
loss: 0.9981909990310669,grad_norm: 0.9007800435419461, iteration: 225612
loss: 0.9921849966049194,grad_norm: 0.9999991592409244, iteration: 225613
loss: 0.998272180557251,grad_norm: 0.9999991515730773, iteration: 225614
loss: 0.9937751293182373,grad_norm: 0.9999990164635977, iteration: 225615
loss: 1.1100271940231323,grad_norm: 0.9999997265544898, iteration: 225616
loss: 0.997873842716217,grad_norm: 0.9999992351331765, iteration: 225617
loss: 0.9718891978263855,grad_norm: 0.8341212545166987, iteration: 225618
loss: 0.9713928699493408,grad_norm: 0.9999990074597989, iteration: 225619
loss: 0.9608508944511414,grad_norm: 0.9298771775978762, iteration: 225620
loss: 0.9802395105361938,grad_norm: 0.9999990218293755, iteration: 225621
loss: 0.9828431010246277,grad_norm: 0.7944819490239311, iteration: 225622
loss: 0.9989567399024963,grad_norm: 0.9221427014511258, iteration: 225623
loss: 0.9729336500167847,grad_norm: 0.9999991482074689, iteration: 225624
loss: 0.9904258251190186,grad_norm: 0.9999990976590174, iteration: 225625
loss: 1.0010653734207153,grad_norm: 0.9736587987425512, iteration: 225626
loss: 0.9890204071998596,grad_norm: 1.000000024342691, iteration: 225627
loss: 0.9984075427055359,grad_norm: 0.8463504459835566, iteration: 225628
loss: 0.9761471152305603,grad_norm: 0.9948673020138259, iteration: 225629
loss: 1.0264482498168945,grad_norm: 0.886268878270834, iteration: 225630
loss: 0.9776591062545776,grad_norm: 0.8956212169870839, iteration: 225631
loss: 0.9643158912658691,grad_norm: 0.9999990638636818, iteration: 225632
loss: 1.0156997442245483,grad_norm: 0.8775488132532672, iteration: 225633
loss: 1.2251569032669067,grad_norm: 0.9999993557562902, iteration: 225634
loss: 0.9759335517883301,grad_norm: 0.9287056761563219, iteration: 225635
loss: 1.0053248405456543,grad_norm: 0.969909687072721, iteration: 225636
loss: 0.9795972108840942,grad_norm: 0.934001473712565, iteration: 225637
loss: 0.9638473391532898,grad_norm: 0.9477572726028219, iteration: 225638
loss: 1.0168819427490234,grad_norm: 0.9999993886928887, iteration: 225639
loss: 0.9794220924377441,grad_norm: 0.897454048067978, iteration: 225640
loss: 1.0351938009262085,grad_norm: 0.9999993502703532, iteration: 225641
loss: 1.0450589656829834,grad_norm: 0.9367742144819212, iteration: 225642
loss: 0.9823381304740906,grad_norm: 0.9999992286985829, iteration: 225643
loss: 1.012141227722168,grad_norm: 0.9043935288433976, iteration: 225644
loss: 1.0179688930511475,grad_norm: 0.9999991844789184, iteration: 225645
loss: 0.9800849556922913,grad_norm: 0.9999990035414019, iteration: 225646
loss: 1.3087395429611206,grad_norm: 0.9999999373324011, iteration: 225647
loss: 1.0136265754699707,grad_norm: 0.9999991456709681, iteration: 225648
loss: 0.9851665496826172,grad_norm: 0.8406929482251077, iteration: 225649
loss: 0.9870237708091736,grad_norm: 0.9230792150726279, iteration: 225650
loss: 1.024458646774292,grad_norm: 0.8927471869187443, iteration: 225651
loss: 1.0279908180236816,grad_norm: 0.8389278427871942, iteration: 225652
loss: 1.0579419136047363,grad_norm: 0.9999992191380143, iteration: 225653
loss: 0.9626958966255188,grad_norm: 0.7809708626232195, iteration: 225654
loss: 1.0315520763397217,grad_norm: 0.8652138278505501, iteration: 225655
loss: 1.0098295211791992,grad_norm: 0.9999993093194374, iteration: 225656
loss: 0.9900301098823547,grad_norm: 0.9999992407861114, iteration: 225657
loss: 1.0010502338409424,grad_norm: 0.7441649795486643, iteration: 225658
loss: 1.0485897064208984,grad_norm: 0.8332700546781477, iteration: 225659
loss: 1.0032799243927002,grad_norm: 0.9999999268922078, iteration: 225660
loss: 0.993582546710968,grad_norm: 0.8942317000218993, iteration: 225661
loss: 0.9423931837081909,grad_norm: 0.7986832142339985, iteration: 225662
loss: 1.0613125562667847,grad_norm: 0.9999992679118496, iteration: 225663
loss: 0.9739109873771667,grad_norm: 0.9999990943288819, iteration: 225664
loss: 0.9735516309738159,grad_norm: 0.9662937601396876, iteration: 225665
loss: 1.053176760673523,grad_norm: 0.9999999700760634, iteration: 225666
loss: 1.1477731466293335,grad_norm: 0.9999997547485847, iteration: 225667
loss: 1.0074255466461182,grad_norm: 0.9315649235131067, iteration: 225668
loss: 0.995718240737915,grad_norm: 0.8670421667436907, iteration: 225669
loss: 1.046340823173523,grad_norm: 0.999999034932952, iteration: 225670
loss: 1.0074032545089722,grad_norm: 0.9999990891458828, iteration: 225671
loss: 1.0459753274917603,grad_norm: 0.999999114229178, iteration: 225672
loss: 1.0080691576004028,grad_norm: 0.8634998139973605, iteration: 225673
loss: 1.0381124019622803,grad_norm: 0.9999992672462521, iteration: 225674
loss: 1.0121506452560425,grad_norm: 0.9999991352577431, iteration: 225675
loss: 1.0119714736938477,grad_norm: 0.9999992112682424, iteration: 225676
loss: 0.9842267036437988,grad_norm: 0.9829365095334247, iteration: 225677
loss: 1.0304421186447144,grad_norm: 0.999999199769754, iteration: 225678
loss: 0.9837005138397217,grad_norm: 0.9999992752097363, iteration: 225679
loss: 1.00177001953125,grad_norm: 0.9999990358634002, iteration: 225680
loss: 1.0189385414123535,grad_norm: 0.9666943792409132, iteration: 225681
loss: 1.0099351406097412,grad_norm: 0.9999991216000095, iteration: 225682
loss: 1.0270230770111084,grad_norm: 0.9999990398803545, iteration: 225683
loss: 0.9846999645233154,grad_norm: 0.8911861031644318, iteration: 225684
loss: 1.070479154586792,grad_norm: 0.9999998575930216, iteration: 225685
loss: 0.9700806140899658,grad_norm: 0.845307810740372, iteration: 225686
loss: 1.0141478776931763,grad_norm: 0.9390265395833798, iteration: 225687
loss: 0.9785307049751282,grad_norm: 0.9999999732378203, iteration: 225688
loss: 0.9850032329559326,grad_norm: 0.926997700379204, iteration: 225689
loss: 0.971243679523468,grad_norm: 0.7886151717218105, iteration: 225690
loss: 1.0217374563217163,grad_norm: 0.9999991618237134, iteration: 225691
loss: 1.005391001701355,grad_norm: 0.930828947118594, iteration: 225692
loss: 1.0206536054611206,grad_norm: 0.9999997770607127, iteration: 225693
loss: 1.022662878036499,grad_norm: 0.9541647945965032, iteration: 225694
loss: 1.034805417060852,grad_norm: 0.9999991498388429, iteration: 225695
loss: 0.9954134225845337,grad_norm: 0.930522155160924, iteration: 225696
loss: 1.0483723878860474,grad_norm: 0.9999995988690508, iteration: 225697
loss: 1.0063118934631348,grad_norm: 0.9999992571831511, iteration: 225698
loss: 1.0176143646240234,grad_norm: 0.9999990789815448, iteration: 225699
loss: 1.0045177936553955,grad_norm: 0.9999990280639697, iteration: 225700
loss: 1.0209945440292358,grad_norm: 0.9999989743117035, iteration: 225701
loss: 1.0780037641525269,grad_norm: 0.9785789627449013, iteration: 225702
loss: 1.0348472595214844,grad_norm: 0.8796379597810445, iteration: 225703
loss: 1.0028220415115356,grad_norm: 0.8867459095534392, iteration: 225704
loss: 1.0274640321731567,grad_norm: 0.9999991153372101, iteration: 225705
loss: 1.0268311500549316,grad_norm: 0.9087283954218057, iteration: 225706
loss: 0.9844385385513306,grad_norm: 0.9927491231771733, iteration: 225707
loss: 1.0149973630905151,grad_norm: 0.9035272672519418, iteration: 225708
loss: 1.0306950807571411,grad_norm: 0.9047819576875391, iteration: 225709
loss: 1.0299742221832275,grad_norm: 0.9848777612844214, iteration: 225710
loss: 0.9997079968452454,grad_norm: 0.7913694616258163, iteration: 225711
loss: 1.0004595518112183,grad_norm: 0.9999990371613857, iteration: 225712
loss: 1.000317096710205,grad_norm: 0.9999990663103255, iteration: 225713
loss: 1.0187695026397705,grad_norm: 0.8229975254758005, iteration: 225714
loss: 0.9977072477340698,grad_norm: 0.999999118133315, iteration: 225715
loss: 1.0235737562179565,grad_norm: 0.8852437998913162, iteration: 225716
loss: 0.9870468378067017,grad_norm: 0.9488109414258531, iteration: 225717
loss: 0.9864456653594971,grad_norm: 0.999999118582463, iteration: 225718
loss: 0.9703273773193359,grad_norm: 0.8368880077052052, iteration: 225719
loss: 0.9897674322128296,grad_norm: 0.9633876927057998, iteration: 225720
loss: 0.9903217554092407,grad_norm: 0.9999990568634136, iteration: 225721
loss: 1.1231164932250977,grad_norm: 0.9999993058328402, iteration: 225722
loss: 1.0195262432098389,grad_norm: 0.9999995241644187, iteration: 225723
loss: 1.016870141029358,grad_norm: 0.9096955036578229, iteration: 225724
loss: 1.0828989744186401,grad_norm: 0.9999990468991483, iteration: 225725
loss: 0.9943867921829224,grad_norm: 0.99999900210634, iteration: 225726
loss: 1.0045479536056519,grad_norm: 0.9999990350737433, iteration: 225727
loss: 0.9842647910118103,grad_norm: 0.9999990140387758, iteration: 225728
loss: 1.0216639041900635,grad_norm: 0.8816220261789007, iteration: 225729
loss: 1.0068230628967285,grad_norm: 0.9552480205655121, iteration: 225730
loss: 0.9948115944862366,grad_norm: 0.9999990637445594, iteration: 225731
loss: 0.969954788684845,grad_norm: 0.8792303988835335, iteration: 225732
loss: 0.9764176607131958,grad_norm: 0.9998205913002751, iteration: 225733
loss: 0.9646131992340088,grad_norm: 0.8162148194573126, iteration: 225734
loss: 1.0274566411972046,grad_norm: 0.9999993614650688, iteration: 225735
loss: 0.9968740344047546,grad_norm: 0.8500762910836941, iteration: 225736
loss: 1.010717511177063,grad_norm: 0.911474214771552, iteration: 225737
loss: 0.9735708832740784,grad_norm: 0.9999990377090853, iteration: 225738
loss: 0.9504857063293457,grad_norm: 0.8727781741883257, iteration: 225739
loss: 0.9781339168548584,grad_norm: 0.9939118792848968, iteration: 225740
loss: 0.9872779250144958,grad_norm: 0.9999993747554285, iteration: 225741
loss: 0.998226523399353,grad_norm: 0.9999991284629458, iteration: 225742
loss: 1.1233090162277222,grad_norm: 0.999999788115127, iteration: 225743
loss: 1.1021422147750854,grad_norm: 0.9999992310782803, iteration: 225744
loss: 1.021721363067627,grad_norm: 0.9999992366360981, iteration: 225745
loss: 1.0389518737792969,grad_norm: 0.9116652099523075, iteration: 225746
loss: 0.9859524369239807,grad_norm: 0.8858625655159407, iteration: 225747
loss: 1.0179240703582764,grad_norm: 0.8915457077566128, iteration: 225748
loss: 0.9596057534217834,grad_norm: 0.9687845766859309, iteration: 225749
loss: 1.01475989818573,grad_norm: 0.9517890241429937, iteration: 225750
loss: 1.016796588897705,grad_norm: 0.9544185116891364, iteration: 225751
loss: 1.0375921726226807,grad_norm: 0.9283008187376904, iteration: 225752
loss: 0.9962096214294434,grad_norm: 0.8352312052185769, iteration: 225753
loss: 0.9999347925186157,grad_norm: 0.9052851924649443, iteration: 225754
loss: 0.9809731841087341,grad_norm: 0.8312743218968411, iteration: 225755
loss: 1.0112285614013672,grad_norm: 0.9399536024959889, iteration: 225756
loss: 1.0088884830474854,grad_norm: 0.999998968380199, iteration: 225757
loss: 1.0064482688903809,grad_norm: 0.9478497657656093, iteration: 225758
loss: 1.0029436349868774,grad_norm: 0.999999337080227, iteration: 225759
loss: 1.0135297775268555,grad_norm: 0.7887171320025694, iteration: 225760
loss: 1.010252594947815,grad_norm: 0.8555367459078497, iteration: 225761
loss: 0.9788897633552551,grad_norm: 0.9520491935742526, iteration: 225762
loss: 0.951747715473175,grad_norm: 0.875173744382824, iteration: 225763
loss: 1.0093625783920288,grad_norm: 0.8464513617963982, iteration: 225764
loss: 0.9838609099388123,grad_norm: 0.8570404461462449, iteration: 225765
loss: 0.9402247071266174,grad_norm: 0.8114597528717036, iteration: 225766
loss: 0.9889311194419861,grad_norm: 0.9896053885353632, iteration: 225767
loss: 0.9805955290794373,grad_norm: 0.9999628591361351, iteration: 225768
loss: 1.0191333293914795,grad_norm: 0.9999991158544866, iteration: 225769
loss: 1.0003063678741455,grad_norm: 0.9053168849721079, iteration: 225770
loss: 1.0162190198898315,grad_norm: 0.8816582392856381, iteration: 225771
loss: 1.0004997253417969,grad_norm: 0.9177240328063366, iteration: 225772
loss: 0.9968060255050659,grad_norm: 0.8618045454175487, iteration: 225773
loss: 1.0370701551437378,grad_norm: 0.8664150761408782, iteration: 225774
loss: 0.9987261891365051,grad_norm: 0.9999998808192267, iteration: 225775
loss: 1.019952654838562,grad_norm: 0.9509775362780764, iteration: 225776
loss: 0.9957475066184998,grad_norm: 0.9741513637383632, iteration: 225777
loss: 0.9982686638832092,grad_norm: 0.9255720979522718, iteration: 225778
loss: 0.9803165793418884,grad_norm: 0.9999991393444208, iteration: 225779
loss: 1.0089495182037354,grad_norm: 0.9999989203130804, iteration: 225780
loss: 1.013864278793335,grad_norm: 0.9999991052603562, iteration: 225781
loss: 0.972737729549408,grad_norm: 0.9857163556397683, iteration: 225782
loss: 0.9676199555397034,grad_norm: 0.9132482227009661, iteration: 225783
loss: 0.991690993309021,grad_norm: 0.8791090285523971, iteration: 225784
loss: 1.0068475008010864,grad_norm: 0.9999991007946578, iteration: 225785
loss: 0.9933661818504333,grad_norm: 0.7736649200428847, iteration: 225786
loss: 1.0284345149993896,grad_norm: 0.8559738654885376, iteration: 225787
loss: 1.0019452571868896,grad_norm: 0.8935212417238854, iteration: 225788
loss: 0.9900491237640381,grad_norm: 0.9290729459733102, iteration: 225789
loss: 1.005074381828308,grad_norm: 0.8546311955035191, iteration: 225790
loss: 1.0362597703933716,grad_norm: 0.9498808478993728, iteration: 225791
loss: 0.9851679801940918,grad_norm: 0.8580518299984933, iteration: 225792
loss: 1.0157018899917603,grad_norm: 0.8830777745679215, iteration: 225793
loss: 1.001773476600647,grad_norm: 0.9521557643804901, iteration: 225794
loss: 0.9972586631774902,grad_norm: 0.999999697953167, iteration: 225795
loss: 1.0155731439590454,grad_norm: 0.8399581654005911, iteration: 225796
loss: 0.9922680258750916,grad_norm: 0.8086744056498676, iteration: 225797
loss: 1.0038058757781982,grad_norm: 0.9820987099051373, iteration: 225798
loss: 1.015435814857483,grad_norm: 0.8853606843184626, iteration: 225799
loss: 1.020229458808899,grad_norm: 0.9712672938962194, iteration: 225800
loss: 0.9878278970718384,grad_norm: 0.9145199205997002, iteration: 225801
loss: 1.0098432302474976,grad_norm: 0.9424908223071539, iteration: 225802
loss: 0.9977331161499023,grad_norm: 0.9999990406464515, iteration: 225803
loss: 0.9628878235816956,grad_norm: 0.8256486031656891, iteration: 225804
loss: 0.9919030666351318,grad_norm: 0.8423354717467018, iteration: 225805
loss: 0.9881882071495056,grad_norm: 0.9326667247148889, iteration: 225806
loss: 0.9908959865570068,grad_norm: 0.8351052826911735, iteration: 225807
loss: 1.0005677938461304,grad_norm: 0.9831179821319738, iteration: 225808
loss: 1.0514349937438965,grad_norm: 0.999999488620001, iteration: 225809
loss: 1.0247821807861328,grad_norm: 0.999999061393143, iteration: 225810
loss: 1.00383722782135,grad_norm: 0.9659906521931225, iteration: 225811
loss: 1.0192933082580566,grad_norm: 0.8522117782958596, iteration: 225812
loss: 0.9900393486022949,grad_norm: 0.907700229578095, iteration: 225813
loss: 0.985533595085144,grad_norm: 0.9999991500419704, iteration: 225814
loss: 0.9801979660987854,grad_norm: 0.7714919578285507, iteration: 225815
loss: 0.9865149855613708,grad_norm: 0.8121942904706366, iteration: 225816
loss: 1.0028082132339478,grad_norm: 0.9999991792300432, iteration: 225817
loss: 1.0876425504684448,grad_norm: 0.9999996508907935, iteration: 225818
loss: 0.9600306749343872,grad_norm: 0.8179105820202612, iteration: 225819
loss: 0.977130115032196,grad_norm: 0.9999990177407713, iteration: 225820
loss: 0.9841980338096619,grad_norm: 0.918210698397624, iteration: 225821
loss: 0.998604416847229,grad_norm: 0.9999990511381178, iteration: 225822
loss: 0.9787188172340393,grad_norm: 0.9757078062093676, iteration: 225823
loss: 0.9988650679588318,grad_norm: 0.8332573451534038, iteration: 225824
loss: 1.000941514968872,grad_norm: 0.9999991714368093, iteration: 225825
loss: 0.9702206254005432,grad_norm: 0.9999991849063821, iteration: 225826
loss: 0.9748525023460388,grad_norm: 0.9999990973114776, iteration: 225827
loss: 1.0189321041107178,grad_norm: 0.926254605742536, iteration: 225828
loss: 1.0247138738632202,grad_norm: 0.9999994420420916, iteration: 225829
loss: 0.9758591651916504,grad_norm: 0.8918334197107403, iteration: 225830
loss: 0.9591191411018372,grad_norm: 0.7558501812144741, iteration: 225831
loss: 1.0345826148986816,grad_norm: 0.9999995184090386, iteration: 225832
loss: 1.0172392129898071,grad_norm: 0.9795528089326421, iteration: 225833
loss: 1.001335859298706,grad_norm: 0.8841437150882022, iteration: 225834
loss: 0.9251712560653687,grad_norm: 0.9999993031636858, iteration: 225835
loss: 0.9944355487823486,grad_norm: 0.9632830504937522, iteration: 225836
loss: 0.9751493334770203,grad_norm: 0.867161719433309, iteration: 225837
loss: 0.9874829053878784,grad_norm: 0.8837818328781883, iteration: 225838
loss: 1.0061960220336914,grad_norm: 0.9282847719802965, iteration: 225839
loss: 0.994850218296051,grad_norm: 0.8631339862763105, iteration: 225840
loss: 1.0041708946228027,grad_norm: 0.8962761150499137, iteration: 225841
loss: 0.9743533134460449,grad_norm: 0.9470007371682336, iteration: 225842
loss: 1.0274142026901245,grad_norm: 0.9999991033503869, iteration: 225843
loss: 1.0023198127746582,grad_norm: 0.859834050325363, iteration: 225844
loss: 0.9910220503807068,grad_norm: 0.9498727176422141, iteration: 225845
loss: 0.9518977999687195,grad_norm: 0.9339630688803885, iteration: 225846
loss: 1.0888890027999878,grad_norm: 0.9999989083105896, iteration: 225847
loss: 0.9877753853797913,grad_norm: 0.9999994200651141, iteration: 225848
loss: 0.9998335838317871,grad_norm: 0.8217186888593397, iteration: 225849
loss: 0.9874263405799866,grad_norm: 0.8969388188328796, iteration: 225850
loss: 1.0028005838394165,grad_norm: 0.8592147061289426, iteration: 225851
loss: 0.9898211359977722,grad_norm: 0.9999990776763117, iteration: 225852
loss: 0.9702698588371277,grad_norm: 0.9550519059773313, iteration: 225853
loss: 0.9934360384941101,grad_norm: 0.8836606072672561, iteration: 225854
loss: 0.973573625087738,grad_norm: 0.8209181159869712, iteration: 225855
loss: 0.9948421716690063,grad_norm: 0.9485632135313087, iteration: 225856
loss: 0.9915071725845337,grad_norm: 0.8703350675563571, iteration: 225857
loss: 1.0152760744094849,grad_norm: 0.9570533133662608, iteration: 225858
loss: 1.2579463720321655,grad_norm: 0.9999999892500415, iteration: 225859
loss: 1.0125147104263306,grad_norm: 0.9876510120092722, iteration: 225860
loss: 1.01674222946167,grad_norm: 0.7903171856708454, iteration: 225861
loss: 0.9980908632278442,grad_norm: 0.9999991574626865, iteration: 225862
loss: 1.0142335891723633,grad_norm: 0.9999990498986481, iteration: 225863
loss: 0.992341935634613,grad_norm: 0.9999991472199898, iteration: 225864
loss: 1.0599181652069092,grad_norm: 0.9999999070708682, iteration: 225865
loss: 0.9877138733863831,grad_norm: 0.8702286612025372, iteration: 225866
loss: 1.0094871520996094,grad_norm: 0.9999990770935743, iteration: 225867
loss: 0.9774624705314636,grad_norm: 0.8506494822954905, iteration: 225868
loss: 1.026464819908142,grad_norm: 0.7507894100595723, iteration: 225869
loss: 1.0096447467803955,grad_norm: 0.821003550811943, iteration: 225870
loss: 0.9724401831626892,grad_norm: 0.8964034340186436, iteration: 225871
loss: 1.0306252241134644,grad_norm: 0.7747273722445364, iteration: 225872
loss: 1.0022207498550415,grad_norm: 0.8626371388133878, iteration: 225873
loss: 0.9860163331031799,grad_norm: 0.924978174849127, iteration: 225874
loss: 0.9858262538909912,grad_norm: 0.9288252258402829, iteration: 225875
loss: 1.0354560613632202,grad_norm: 0.9999993580761899, iteration: 225876
loss: 1.0466352701187134,grad_norm: 0.9457483086969037, iteration: 225877
loss: 0.9617685079574585,grad_norm: 0.9999991779405538, iteration: 225878
loss: 0.9896969199180603,grad_norm: 0.9999999230513034, iteration: 225879
loss: 0.982659101486206,grad_norm: 0.9767462934106148, iteration: 225880
loss: 1.01862370967865,grad_norm: 0.9999991559021522, iteration: 225881
loss: 1.0051953792572021,grad_norm: 0.7423152232141573, iteration: 225882
loss: 1.0247167348861694,grad_norm: 0.855723285673765, iteration: 225883
loss: 1.0394842624664307,grad_norm: 0.9285891584188259, iteration: 225884
loss: 1.0494087934494019,grad_norm: 0.9999997094300597, iteration: 225885
loss: 1.0071444511413574,grad_norm: 0.8408543701792743, iteration: 225886
loss: 0.9988946318626404,grad_norm: 0.9268967603629448, iteration: 225887
loss: 1.0540989637374878,grad_norm: 0.9999997873419083, iteration: 225888
loss: 0.9913639426231384,grad_norm: 0.9999992706787453, iteration: 225889
loss: 1.0121126174926758,grad_norm: 0.8841491818736759, iteration: 225890
loss: 1.0892157554626465,grad_norm: 0.9999992919890497, iteration: 225891
loss: 1.004359245300293,grad_norm: 0.999999186447292, iteration: 225892
loss: 0.9915176033973694,grad_norm: 0.9632219817183609, iteration: 225893
loss: 1.005405306816101,grad_norm: 0.9999992271181368, iteration: 225894
loss: 0.9913811683654785,grad_norm: 0.8107055737488444, iteration: 225895
loss: 1.0026291608810425,grad_norm: 0.7672177282873435, iteration: 225896
loss: 1.2002298831939697,grad_norm: 0.9999996455455316, iteration: 225897
loss: 0.988524854183197,grad_norm: 0.8999129472115363, iteration: 225898
loss: 1.0367168188095093,grad_norm: 0.9999993671746595, iteration: 225899
loss: 0.9974767565727234,grad_norm: 0.9999997411308197, iteration: 225900
loss: 0.9645324349403381,grad_norm: 0.8218060982967903, iteration: 225901
loss: 0.9851304292678833,grad_norm: 0.9250897547596771, iteration: 225902
loss: 0.9797637462615967,grad_norm: 0.9286097513337854, iteration: 225903
loss: 0.9750348329544067,grad_norm: 0.9107119167800564, iteration: 225904
loss: 1.097182273864746,grad_norm: 0.999999987466228, iteration: 225905
loss: 1.0135626792907715,grad_norm: 0.9053298045368624, iteration: 225906
loss: 1.0190606117248535,grad_norm: 0.9671309851743529, iteration: 225907
loss: 1.139906883239746,grad_norm: 0.9999997745752133, iteration: 225908
loss: 1.0184030532836914,grad_norm: 0.958084573795961, iteration: 225909
loss: 1.1272974014282227,grad_norm: 0.9999991973906851, iteration: 225910
loss: 1.0453734397888184,grad_norm: 0.9999991095974844, iteration: 225911
loss: 0.999608039855957,grad_norm: 0.7518506419922574, iteration: 225912
loss: 1.01547110080719,grad_norm: 0.9999991265031368, iteration: 225913
loss: 0.9921600222587585,grad_norm: 0.999999144037864, iteration: 225914
loss: 1.0058579444885254,grad_norm: 0.9999990308335626, iteration: 225915
loss: 0.9532579183578491,grad_norm: 0.9620526001510012, iteration: 225916
loss: 0.9462090730667114,grad_norm: 0.9999990776697509, iteration: 225917
loss: 1.021557331085205,grad_norm: 0.9999990088635483, iteration: 225918
loss: 1.008701205253601,grad_norm: 0.9999991032534987, iteration: 225919
loss: 1.0113556385040283,grad_norm: 0.999999110723342, iteration: 225920
loss: 0.9906581044197083,grad_norm: 0.9393883297099155, iteration: 225921
loss: 0.9985902309417725,grad_norm: 0.952770741517776, iteration: 225922
loss: 0.989501953125,grad_norm: 0.8415858022050974, iteration: 225923
loss: 0.9907907843589783,grad_norm: 0.9999990763233394, iteration: 225924
loss: 1.015075922012329,grad_norm: 0.9999990360978991, iteration: 225925
loss: 0.9904593229293823,grad_norm: 0.9142392512530592, iteration: 225926
loss: 0.9939470291137695,grad_norm: 0.8581494928723753, iteration: 225927
loss: 0.9797943830490112,grad_norm: 0.9999991345732537, iteration: 225928
loss: 1.0296669006347656,grad_norm: 0.9341798055996263, iteration: 225929
loss: 0.9955716729164124,grad_norm: 0.8968224897929405, iteration: 225930
loss: 0.9817126393318176,grad_norm: 0.9388410066771086, iteration: 225931
loss: 0.991586446762085,grad_norm: 0.992018977560402, iteration: 225932
loss: 0.9954540133476257,grad_norm: 0.8194194535483028, iteration: 225933
loss: 1.025557279586792,grad_norm: 0.9766733779887985, iteration: 225934
loss: 0.9977536797523499,grad_norm: 0.7882637760616389, iteration: 225935
loss: 1.0168882608413696,grad_norm: 0.9437354446244628, iteration: 225936
loss: 1.0353463888168335,grad_norm: 0.999999909154841, iteration: 225937
loss: 0.987001895904541,grad_norm: 0.9238277339789709, iteration: 225938
loss: 1.0035829544067383,grad_norm: 0.9329303201594511, iteration: 225939
loss: 0.9656254649162292,grad_norm: 0.9999992355850571, iteration: 225940
loss: 0.9905264377593994,grad_norm: 0.8869939643908433, iteration: 225941
loss: 0.9938035607337952,grad_norm: 0.8820636237973284, iteration: 225942
loss: 1.0000232458114624,grad_norm: 0.9953089783943851, iteration: 225943
loss: 0.9839740991592407,grad_norm: 0.7774698192185389, iteration: 225944
loss: 0.9864916801452637,grad_norm: 0.8454947237848464, iteration: 225945
loss: 1.0310288667678833,grad_norm: 0.9842488598057557, iteration: 225946
loss: 1.0675147771835327,grad_norm: 0.9999995608994169, iteration: 225947
loss: 1.0126475095748901,grad_norm: 0.9314295804774814, iteration: 225948
loss: 1.0231420993804932,grad_norm: 0.9999989104767386, iteration: 225949
loss: 1.365546464920044,grad_norm: 0.9999996593664006, iteration: 225950
loss: 0.9620166420936584,grad_norm: 0.9999991114261519, iteration: 225951
loss: 0.9763274192810059,grad_norm: 0.9456249699023329, iteration: 225952
loss: 1.0020239353179932,grad_norm: 0.99999923118258, iteration: 225953
loss: 1.041810393333435,grad_norm: 0.999999173902422, iteration: 225954
loss: 1.0128055810928345,grad_norm: 0.6940850221789154, iteration: 225955
loss: 1.0160906314849854,grad_norm: 0.963133881652279, iteration: 225956
loss: 0.9997970461845398,grad_norm: 0.9158053639279271, iteration: 225957
loss: 1.037401795387268,grad_norm: 0.998928762973491, iteration: 225958
loss: 0.9927732944488525,grad_norm: 0.9034577970458001, iteration: 225959
loss: 1.0122220516204834,grad_norm: 0.8354229379320118, iteration: 225960
loss: 1.0170022249221802,grad_norm: 0.8837234987603519, iteration: 225961
loss: 1.0174596309661865,grad_norm: 0.9999990906489604, iteration: 225962
loss: 1.0257014036178589,grad_norm: 0.9999992706379576, iteration: 225963
loss: 1.0008184909820557,grad_norm: 0.9894877692069529, iteration: 225964
loss: 1.0229675769805908,grad_norm: 0.7996919104678009, iteration: 225965
loss: 1.0179708003997803,grad_norm: 0.9459208401884927, iteration: 225966
loss: 0.9908062815666199,grad_norm: 0.907862700089576, iteration: 225967
loss: 1.0127736330032349,grad_norm: 0.9247974254299318, iteration: 225968
loss: 1.0306072235107422,grad_norm: 0.9999992203064975, iteration: 225969
loss: 0.9811326861381531,grad_norm: 0.9999991179073919, iteration: 225970
loss: 0.9877128005027771,grad_norm: 0.922404173567575, iteration: 225971
loss: 0.9621787071228027,grad_norm: 0.8638164681082874, iteration: 225972
loss: 0.9560230374336243,grad_norm: 0.9179660428311924, iteration: 225973
loss: 1.0126967430114746,grad_norm: 0.9999991022206636, iteration: 225974
loss: 0.9768744111061096,grad_norm: 0.999999055690515, iteration: 225975
loss: 0.9921507239341736,grad_norm: 0.9673081244044235, iteration: 225976
loss: 1.0145303010940552,grad_norm: 0.8416575804925194, iteration: 225977
loss: 0.9699461460113525,grad_norm: 0.9048026023612001, iteration: 225978
loss: 1.0243717432022095,grad_norm: 0.9999992250209976, iteration: 225979
loss: 1.009835958480835,grad_norm: 0.8891946510586656, iteration: 225980
loss: 0.9649232029914856,grad_norm: 0.9999991085993889, iteration: 225981
loss: 0.9849711060523987,grad_norm: 0.9999989576874644, iteration: 225982
loss: 1.0127758979797363,grad_norm: 0.9999992684655096, iteration: 225983
loss: 1.0059813261032104,grad_norm: 0.8280574558657406, iteration: 225984
loss: 1.0430898666381836,grad_norm: 0.9999992014121856, iteration: 225985
loss: 0.997023344039917,grad_norm: 0.999999093395237, iteration: 225986
loss: 0.9892531037330627,grad_norm: 0.9817069245564162, iteration: 225987
loss: 0.9901837706565857,grad_norm: 0.9227867295978163, iteration: 225988
loss: 1.0411107540130615,grad_norm: 0.9578287607891914, iteration: 225989
loss: 1.037328839302063,grad_norm: 0.9999991822404202, iteration: 225990
loss: 0.984941303730011,grad_norm: 0.9033623019912542, iteration: 225991
loss: 0.9522581696510315,grad_norm: 0.9174063589942196, iteration: 225992
loss: 0.9717146754264832,grad_norm: 0.8662505155536598, iteration: 225993
loss: 0.9668644666671753,grad_norm: 0.9184144224445059, iteration: 225994
loss: 1.032103180885315,grad_norm: 0.8688614011906495, iteration: 225995
loss: 1.0111439228057861,grad_norm: 0.8503905763819778, iteration: 225996
loss: 1.0219753980636597,grad_norm: 0.9999991760679281, iteration: 225997
loss: 0.973732590675354,grad_norm: 0.9949341353402982, iteration: 225998
loss: 1.018722653388977,grad_norm: 0.8582949546309723, iteration: 225999
loss: 0.9798443913459778,grad_norm: 0.8890559213806288, iteration: 226000
loss: 1.026870608329773,grad_norm: 0.9686277798397193, iteration: 226001
loss: 1.0081580877304077,grad_norm: 0.788293605549017, iteration: 226002
loss: 0.9950008988380432,grad_norm: 0.8962091858796992, iteration: 226003
loss: 0.9871358275413513,grad_norm: 0.7604113380731986, iteration: 226004
loss: 1.0148184299468994,grad_norm: 0.9999992513287793, iteration: 226005
loss: 0.9792473912239075,grad_norm: 0.891722438093998, iteration: 226006
loss: 1.1697444915771484,grad_norm: 0.9999996766506344, iteration: 226007
loss: 0.9897930026054382,grad_norm: 0.9025036701402136, iteration: 226008
loss: 1.1106435060501099,grad_norm: 0.9999994255160768, iteration: 226009
loss: 1.0031360387802124,grad_norm: 0.8663640972349781, iteration: 226010
loss: 0.9636500477790833,grad_norm: 0.9177249600413708, iteration: 226011
loss: 1.0093374252319336,grad_norm: 0.7824896707192645, iteration: 226012
loss: 1.0004304647445679,grad_norm: 0.8694135761317126, iteration: 226013
loss: 1.0326409339904785,grad_norm: 0.9999990207729335, iteration: 226014
loss: 1.0107496976852417,grad_norm: 0.9999989706458067, iteration: 226015
loss: 0.9991970062255859,grad_norm: 0.9014543428406959, iteration: 226016
loss: 0.9724287390708923,grad_norm: 0.8866727277127352, iteration: 226017
loss: 1.0028657913208008,grad_norm: 0.9999992338412204, iteration: 226018
loss: 0.9754087924957275,grad_norm: 0.8766422416593778, iteration: 226019
loss: 1.0023525953292847,grad_norm: 0.9999991138468679, iteration: 226020
loss: 1.0252970457077026,grad_norm: 0.9072573512063172, iteration: 226021
loss: 0.9772483110427856,grad_norm: 0.7714602074243867, iteration: 226022
loss: 1.0144777297973633,grad_norm: 0.9057122694751363, iteration: 226023
loss: 0.9995860457420349,grad_norm: 0.8676391581191355, iteration: 226024
loss: 1.0674936771392822,grad_norm: 0.9999991137579797, iteration: 226025
loss: 0.9451100826263428,grad_norm: 0.8878275646376317, iteration: 226026
loss: 1.0067733526229858,grad_norm: 0.8763891567920348, iteration: 226027
loss: 0.9997389912605286,grad_norm: 0.8832429078159211, iteration: 226028
loss: 1.0063014030456543,grad_norm: 0.7999602649513278, iteration: 226029
loss: 0.9995561838150024,grad_norm: 0.9341268674737644, iteration: 226030
loss: 0.9834293127059937,grad_norm: 0.8164616710839101, iteration: 226031
loss: 0.9912716746330261,grad_norm: 0.9403536477070077, iteration: 226032
loss: 1.0074849128723145,grad_norm: 0.7770410263459958, iteration: 226033
loss: 0.9733099937438965,grad_norm: 0.8467930970620757, iteration: 226034
loss: 0.9640161395072937,grad_norm: 0.9999990095014624, iteration: 226035
loss: 1.0377228260040283,grad_norm: 0.8776055209025623, iteration: 226036
loss: 0.9648038744926453,grad_norm: 0.999999142709006, iteration: 226037
loss: 1.0069973468780518,grad_norm: 0.9999991017242605, iteration: 226038
loss: 0.9909799098968506,grad_norm: 0.9999990835129392, iteration: 226039
loss: 0.9822608232498169,grad_norm: 0.9999995357048309, iteration: 226040
loss: 1.025214672088623,grad_norm: 0.9999993762741107, iteration: 226041
loss: 0.9976887702941895,grad_norm: 0.9344467998622786, iteration: 226042
loss: 1.012363076210022,grad_norm: 0.9999991866147708, iteration: 226043
loss: 1.0240346193313599,grad_norm: 0.9999989841300986, iteration: 226044
loss: 1.0248359441757202,grad_norm: 0.8374648604247953, iteration: 226045
loss: 0.9482138156890869,grad_norm: 0.9999989439332998, iteration: 226046
loss: 1.0616049766540527,grad_norm: 0.8400871201316029, iteration: 226047
loss: 1.0073996782302856,grad_norm: 0.8764342865037328, iteration: 226048
loss: 0.9639663100242615,grad_norm: 0.9406785909276069, iteration: 226049
loss: 0.9848906993865967,grad_norm: 0.9337967195743134, iteration: 226050
loss: 1.0446996688842773,grad_norm: 0.9999992188209758, iteration: 226051
loss: 0.9993039965629578,grad_norm: 0.9698996619884103, iteration: 226052
loss: 1.0458844900131226,grad_norm: 0.9999991919752108, iteration: 226053
loss: 1.037140130996704,grad_norm: 0.8090072808867219, iteration: 226054
loss: 1.061435341835022,grad_norm: 0.9999999325056591, iteration: 226055
loss: 0.9953003525733948,grad_norm: 0.9999991565899183, iteration: 226056
loss: 0.9963679909706116,grad_norm: 0.9910788869082165, iteration: 226057
loss: 1.002694010734558,grad_norm: 0.9999998959789631, iteration: 226058
loss: 0.9937078952789307,grad_norm: 0.9064676991308332, iteration: 226059
loss: 1.0616815090179443,grad_norm: 0.9667905733467592, iteration: 226060
loss: 0.9968807101249695,grad_norm: 0.8825880036329359, iteration: 226061
loss: 1.0093575716018677,grad_norm: 0.9999990913168666, iteration: 226062
loss: 1.0069000720977783,grad_norm: 0.8224197012283038, iteration: 226063
loss: 0.9630318880081177,grad_norm: 0.8426242141293385, iteration: 226064
loss: 1.0249024629592896,grad_norm: 0.7722760455511603, iteration: 226065
loss: 1.0033485889434814,grad_norm: 0.9999989094062816, iteration: 226066
loss: 0.9742417335510254,grad_norm: 0.9999990923541513, iteration: 226067
loss: 1.0091702938079834,grad_norm: 0.969349961368075, iteration: 226068
loss: 0.999837338924408,grad_norm: 0.9587041050266296, iteration: 226069
loss: 1.0089157819747925,grad_norm: 0.9678934231313129, iteration: 226070
loss: 0.9611804485321045,grad_norm: 0.9615276381425302, iteration: 226071
loss: 0.9855825304985046,grad_norm: 0.9999992245078935, iteration: 226072
loss: 1.0265730619430542,grad_norm: 0.9999998033082235, iteration: 226073
loss: 0.9949347376823425,grad_norm: 0.7708948253961523, iteration: 226074
loss: 1.0318031311035156,grad_norm: 0.8723280155481485, iteration: 226075
loss: 1.002812385559082,grad_norm: 0.9707755988689242, iteration: 226076
loss: 0.986147940158844,grad_norm: 0.9999991205724305, iteration: 226077
loss: 0.9687180519104004,grad_norm: 0.9403318081514059, iteration: 226078
loss: 1.1265323162078857,grad_norm: 0.9999995598269665, iteration: 226079
loss: 1.0258902311325073,grad_norm: 0.9762605078545659, iteration: 226080
loss: 0.9820787906646729,grad_norm: 0.8368803882070593, iteration: 226081
loss: 0.9918739795684814,grad_norm: 0.9999990480393333, iteration: 226082
loss: 0.9929143190383911,grad_norm: 0.9999990146214565, iteration: 226083
loss: 1.0476276874542236,grad_norm: 0.9999991597695832, iteration: 226084
loss: 0.9879634976387024,grad_norm: 0.8700736300014965, iteration: 226085
loss: 1.0220836400985718,grad_norm: 0.9574782932737239, iteration: 226086
loss: 0.9722593426704407,grad_norm: 0.9043796904920522, iteration: 226087
loss: 1.0219464302062988,grad_norm: 0.8782087897078746, iteration: 226088
loss: 1.009570598602295,grad_norm: 0.9165135943551582, iteration: 226089
loss: 0.9945493340492249,grad_norm: 0.9890541775120689, iteration: 226090
loss: 0.986316442489624,grad_norm: 0.8393546593323176, iteration: 226091
loss: 0.9917035102844238,grad_norm: 0.885372823853582, iteration: 226092
loss: 0.9979611039161682,grad_norm: 0.7754018706232562, iteration: 226093
loss: 0.977856457233429,grad_norm: 0.9999989663071238, iteration: 226094
loss: 1.036089301109314,grad_norm: 0.9999994836464619, iteration: 226095
loss: 1.0372310876846313,grad_norm: 0.9999991047613884, iteration: 226096
loss: 1.142053246498108,grad_norm: 0.9999998937299777, iteration: 226097
loss: 0.9748491048812866,grad_norm: 0.999999066968676, iteration: 226098
loss: 1.0005923509597778,grad_norm: 0.8030376353061922, iteration: 226099
loss: 1.2613931894302368,grad_norm: 0.999999980351233, iteration: 226100
loss: 1.0772725343704224,grad_norm: 0.9999995381110516, iteration: 226101
loss: 0.9924619793891907,grad_norm: 0.8509482895560068, iteration: 226102
loss: 0.9859377145767212,grad_norm: 0.7769984659455188, iteration: 226103
loss: 1.008816123008728,grad_norm: 0.9999992992306778, iteration: 226104
loss: 0.9971241354942322,grad_norm: 0.9892966902920776, iteration: 226105
loss: 1.0136672258377075,grad_norm: 0.8892586998697736, iteration: 226106
loss: 1.2412549257278442,grad_norm: 0.9999992582523889, iteration: 226107
loss: 1.0534318685531616,grad_norm: 0.999999830355477, iteration: 226108
loss: 0.9669614434242249,grad_norm: 0.9999992767537907, iteration: 226109
loss: 0.958336353302002,grad_norm: 0.9558367575135466, iteration: 226110
loss: 1.1036128997802734,grad_norm: 0.9999998411653106, iteration: 226111
loss: 1.0044150352478027,grad_norm: 0.8249111915641109, iteration: 226112
loss: 1.027664065361023,grad_norm: 0.8824268274911299, iteration: 226113
loss: 0.9673449993133545,grad_norm: 0.8752599610119084, iteration: 226114
loss: 1.0107706785202026,grad_norm: 0.9999990018960403, iteration: 226115
loss: 0.9986788630485535,grad_norm: 0.9999992164252198, iteration: 226116
loss: 1.055336356163025,grad_norm: 0.9999996010762197, iteration: 226117
loss: 0.9988918900489807,grad_norm: 0.9792242284006735, iteration: 226118
loss: 1.0182312726974487,grad_norm: 0.8660277156413193, iteration: 226119
loss: 1.0310120582580566,grad_norm: 0.8027820404961731, iteration: 226120
loss: 1.149417757987976,grad_norm: 0.9999991181567504, iteration: 226121
loss: 0.9939724802970886,grad_norm: 0.8485812955620329, iteration: 226122
loss: 1.0205343961715698,grad_norm: 0.9999998824219838, iteration: 226123
loss: 1.035789966583252,grad_norm: 0.8893567354984137, iteration: 226124
loss: 0.9692133665084839,grad_norm: 0.852226561253055, iteration: 226125
loss: 0.98210209608078,grad_norm: 0.8430214233462772, iteration: 226126
loss: 1.239460825920105,grad_norm: 0.9999997717698699, iteration: 226127
loss: 1.024550437927246,grad_norm: 0.9999991953592333, iteration: 226128
loss: 1.0126928091049194,grad_norm: 0.9999991265317444, iteration: 226129
loss: 1.0226846933364868,grad_norm: 0.8999940384571081, iteration: 226130
loss: 0.9631929397583008,grad_norm: 0.9999990067372126, iteration: 226131
loss: 0.9833131432533264,grad_norm: 0.9385037478585968, iteration: 226132
loss: 0.9775273203849792,grad_norm: 0.9526103699683174, iteration: 226133
loss: 1.0225963592529297,grad_norm: 0.901879634374063, iteration: 226134
loss: 1.0973035097122192,grad_norm: 0.9999999690078559, iteration: 226135
loss: 1.1873714923858643,grad_norm: 0.9999998379571461, iteration: 226136
loss: 0.9629424214363098,grad_norm: 0.9549283950023605, iteration: 226137
loss: 0.9784536361694336,grad_norm: 0.9257636946391553, iteration: 226138
loss: 1.089698076248169,grad_norm: 0.9999991221598704, iteration: 226139
loss: 0.9765905737876892,grad_norm: 0.971032198582604, iteration: 226140
loss: 0.951978325843811,grad_norm: 0.9272425293458082, iteration: 226141
loss: 0.9842988848686218,grad_norm: 0.9277879409399525, iteration: 226142
loss: 0.9734277725219727,grad_norm: 0.9999990659225537, iteration: 226143
loss: 1.0050071477890015,grad_norm: 0.9999992835395097, iteration: 226144
loss: 1.0241949558258057,grad_norm: 0.9999992088006485, iteration: 226145
loss: 1.0030124187469482,grad_norm: 0.8634056776456162, iteration: 226146
loss: 1.037104845046997,grad_norm: 0.9323513148759045, iteration: 226147
loss: 1.0036615133285522,grad_norm: 0.88962371910116, iteration: 226148
loss: 1.0089412927627563,grad_norm: 0.9999992023149926, iteration: 226149
loss: 0.9998814463615417,grad_norm: 0.9999991059395777, iteration: 226150
loss: 0.9801232218742371,grad_norm: 0.9999989539252075, iteration: 226151
loss: 1.0494271516799927,grad_norm: 0.9999995112322976, iteration: 226152
loss: 1.0023458003997803,grad_norm: 0.8767685286722983, iteration: 226153
loss: 1.0003234148025513,grad_norm: 0.8885654804709658, iteration: 226154
loss: 0.9875409007072449,grad_norm: 0.8412914477377454, iteration: 226155
loss: 0.9778187274932861,grad_norm: 0.9999989804632208, iteration: 226156
loss: 0.9801010489463806,grad_norm: 0.9362974317153586, iteration: 226157
loss: 1.046683669090271,grad_norm: 0.9999996045367865, iteration: 226158
loss: 0.9645935297012329,grad_norm: 0.7755458997193482, iteration: 226159
loss: 0.9726967811584473,grad_norm: 0.9635756102678764, iteration: 226160
loss: 1.0075474977493286,grad_norm: 0.8615659357528891, iteration: 226161
loss: 1.064256191253662,grad_norm: 0.9999990996456213, iteration: 226162
loss: 1.0228694677352905,grad_norm: 0.9495585483597794, iteration: 226163
loss: 0.9861859679222107,grad_norm: 0.8744679164373681, iteration: 226164
loss: 1.0408971309661865,grad_norm: 0.8722366337174748, iteration: 226165
loss: 0.9584535360336304,grad_norm: 0.9527188684024888, iteration: 226166
loss: 1.2205538749694824,grad_norm: 0.99999946120864, iteration: 226167
loss: 1.0076813697814941,grad_norm: 0.993988844807106, iteration: 226168
loss: 1.0046168565750122,grad_norm: 0.9161340475387015, iteration: 226169
loss: 0.9682762622833252,grad_norm: 0.9999990969493785, iteration: 226170
loss: 1.0003551244735718,grad_norm: 0.808619817404291, iteration: 226171
loss: 0.993455171585083,grad_norm: 0.9999993197571567, iteration: 226172
loss: 0.9943452477455139,grad_norm: 0.8440774446784671, iteration: 226173
loss: 1.0209027528762817,grad_norm: 0.9999990978529619, iteration: 226174
loss: 0.9802035689353943,grad_norm: 0.9999999922533017, iteration: 226175
loss: 0.9851455688476562,grad_norm: 0.9999992887198447, iteration: 226176
loss: 0.9916437864303589,grad_norm: 0.9999991435084306, iteration: 226177
loss: 0.9836280941963196,grad_norm: 1.00000000601719, iteration: 226178
loss: 1.016316533088684,grad_norm: 0.9999991095352371, iteration: 226179
loss: 1.0148404836654663,grad_norm: 0.8579682531579548, iteration: 226180
loss: 1.0437204837799072,grad_norm: 0.8870282421437266, iteration: 226181
loss: 0.9979466199874878,grad_norm: 0.9641114086893009, iteration: 226182
loss: 1.0104793310165405,grad_norm: 0.8231189689812084, iteration: 226183
loss: 1.0020928382873535,grad_norm: 0.9046902736567133, iteration: 226184
loss: 0.9765269756317139,grad_norm: 0.8705096378572356, iteration: 226185
loss: 1.0808887481689453,grad_norm: 0.877103517852938, iteration: 226186
loss: 0.9497187733650208,grad_norm: 0.9999991226512376, iteration: 226187
loss: 0.9747728109359741,grad_norm: 0.8574221026923486, iteration: 226188
loss: 0.9826623797416687,grad_norm: 0.8991929906338332, iteration: 226189
loss: 0.9827714562416077,grad_norm: 0.7700491061717328, iteration: 226190
loss: 1.0509980916976929,grad_norm: 0.999999109350481, iteration: 226191
loss: 1.0466980934143066,grad_norm: 0.9999993743642739, iteration: 226192
loss: 1.0022833347320557,grad_norm: 0.841348536285594, iteration: 226193
loss: 1.0063180923461914,grad_norm: 0.9537156602108576, iteration: 226194
loss: 1.0126606225967407,grad_norm: 0.9999990300599451, iteration: 226195
loss: 0.9958146810531616,grad_norm: 0.8900683948427734, iteration: 226196
loss: 0.9644874334335327,grad_norm: 0.9633164079338302, iteration: 226197
loss: 1.023822546005249,grad_norm: 0.9999990872372757, iteration: 226198
loss: 1.007506251335144,grad_norm: 0.875892641624318, iteration: 226199
loss: 0.978206217288971,grad_norm: 0.8408236755258693, iteration: 226200
loss: 1.0072097778320312,grad_norm: 0.9999996138024617, iteration: 226201
loss: 1.0330013036727905,grad_norm: 0.9999990285565968, iteration: 226202
loss: 0.9705184698104858,grad_norm: 0.7737030628142065, iteration: 226203
loss: 0.99826979637146,grad_norm: 0.7864961723722592, iteration: 226204
loss: 1.0001112222671509,grad_norm: 0.9389536535536643, iteration: 226205
loss: 1.0352320671081543,grad_norm: 0.9999997515458622, iteration: 226206
loss: 0.9956350922584534,grad_norm: 0.8406822120043039, iteration: 226207
loss: 0.9878193140029907,grad_norm: 0.9446634184486762, iteration: 226208
loss: 0.9962636828422546,grad_norm: 0.9324753946189857, iteration: 226209
loss: 1.0222752094268799,grad_norm: 0.8723188728685868, iteration: 226210
loss: 0.9974449872970581,grad_norm: 0.999999072069915, iteration: 226211
loss: 0.9879287481307983,grad_norm: 0.8080956592837635, iteration: 226212
loss: 0.9850801229476929,grad_norm: 0.9727657879048741, iteration: 226213
loss: 0.9989498257637024,grad_norm: 0.9999991590055914, iteration: 226214
loss: 1.006759762763977,grad_norm: 0.9275510605014686, iteration: 226215
loss: 1.0074816942214966,grad_norm: 0.9855771331620521, iteration: 226216
loss: 1.0218428373336792,grad_norm: 0.9324919639714344, iteration: 226217
loss: 1.0241750478744507,grad_norm: 0.9828063049435232, iteration: 226218
loss: 0.9651874899864197,grad_norm: 0.9336795555534366, iteration: 226219
loss: 0.9950929284095764,grad_norm: 0.83567474003554, iteration: 226220
loss: 0.9357326626777649,grad_norm: 0.9999991477582312, iteration: 226221
loss: 1.0235693454742432,grad_norm: 0.8743853171147028, iteration: 226222
loss: 1.0264991521835327,grad_norm: 0.8617773775430707, iteration: 226223
loss: 0.9783701300621033,grad_norm: 0.9821836649335214, iteration: 226224
loss: 0.966126561164856,grad_norm: 0.9999993043131753, iteration: 226225
loss: 0.990048348903656,grad_norm: 0.9999990655681491, iteration: 226226
loss: 1.0247533321380615,grad_norm: 0.9398839439389999, iteration: 226227
loss: 1.0127956867218018,grad_norm: 0.993143003783514, iteration: 226228
loss: 1.0153204202651978,grad_norm: 0.8336162812279473, iteration: 226229
loss: 0.9807143807411194,grad_norm: 0.821120605808722, iteration: 226230
loss: 0.9859506487846375,grad_norm: 0.9999989579933664, iteration: 226231
loss: 1.0140153169631958,grad_norm: 0.8945556667929455, iteration: 226232
loss: 1.0114704370498657,grad_norm: 0.8950421549735377, iteration: 226233
loss: 0.9789453744888306,grad_norm: 0.8567468881049912, iteration: 226234
loss: 0.9854837656021118,grad_norm: 0.8298045516999205, iteration: 226235
loss: 0.9567535519599915,grad_norm: 0.9999991668356971, iteration: 226236
loss: 0.9829988479614258,grad_norm: 0.948490999110175, iteration: 226237
loss: 1.1798735857009888,grad_norm: 0.9999998540438887, iteration: 226238
loss: 1.0816967487335205,grad_norm: 0.9999996078060873, iteration: 226239
loss: 0.9936363697052002,grad_norm: 0.9541584381085677, iteration: 226240
loss: 1.0266685485839844,grad_norm: 0.8131766917992561, iteration: 226241
loss: 1.003159523010254,grad_norm: 0.9819343691167269, iteration: 226242
loss: 1.0510051250457764,grad_norm: 0.9999993180696785, iteration: 226243
loss: 1.0864864587783813,grad_norm: 0.9999995623977658, iteration: 226244
loss: 0.996033251285553,grad_norm: 0.99999909207462, iteration: 226245
loss: 0.9967838525772095,grad_norm: 0.9999991272500316, iteration: 226246
loss: 0.9562146067619324,grad_norm: 0.8177189434227273, iteration: 226247
loss: 1.0524495840072632,grad_norm: 0.9999991781193855, iteration: 226248
loss: 0.9892452955245972,grad_norm: 0.9657473716580991, iteration: 226249
loss: 1.0054757595062256,grad_norm: 0.8641619376082113, iteration: 226250
loss: 0.988409161567688,grad_norm: 0.9702342794589339, iteration: 226251
loss: 0.9765105843544006,grad_norm: 0.9768298254892103, iteration: 226252
loss: 1.0042227506637573,grad_norm: 0.9930980256783629, iteration: 226253
loss: 1.1480896472930908,grad_norm: 0.9999994501653702, iteration: 226254
loss: 1.0221285820007324,grad_norm: 0.9210932649414456, iteration: 226255
loss: 0.989111602306366,grad_norm: 0.999999294629792, iteration: 226256
loss: 0.9786719679832458,grad_norm: 0.9999992403610889, iteration: 226257
loss: 1.0267889499664307,grad_norm: 0.9316321119400894, iteration: 226258
loss: 1.0348436832427979,grad_norm: 0.9880564500865191, iteration: 226259
loss: 1.001175880432129,grad_norm: 0.9769503260887045, iteration: 226260
loss: 0.9460663199424744,grad_norm: 0.9284357069538924, iteration: 226261
loss: 0.9893279671669006,grad_norm: 0.8388210804521888, iteration: 226262
loss: 0.975009024143219,grad_norm: 0.9999991075523544, iteration: 226263
loss: 1.0070767402648926,grad_norm: 0.8703297630560572, iteration: 226264
loss: 1.0096628665924072,grad_norm: 0.8766552473992796, iteration: 226265
loss: 0.9913906455039978,grad_norm: 0.9999990537744415, iteration: 226266
loss: 1.0058062076568604,grad_norm: 0.9999991663930404, iteration: 226267
loss: 1.014822244644165,grad_norm: 0.999999712202301, iteration: 226268
loss: 0.9666026830673218,grad_norm: 0.9999991342136849, iteration: 226269
loss: 1.0044220685958862,grad_norm: 0.9999989821614532, iteration: 226270
loss: 1.006174921989441,grad_norm: 0.8115365144472166, iteration: 226271
loss: 0.9943211078643799,grad_norm: 0.8855183556591838, iteration: 226272
loss: 1.0266094207763672,grad_norm: 0.9999991038110528, iteration: 226273
loss: 0.9915099143981934,grad_norm: 0.9910289763162639, iteration: 226274
loss: 1.0461344718933105,grad_norm: 0.9318045882363639, iteration: 226275
loss: 0.9541085958480835,grad_norm: 0.8371388806196498, iteration: 226276
loss: 1.043662428855896,grad_norm: 0.8569074832423623, iteration: 226277
loss: 1.0170495510101318,grad_norm: 0.9717933574151031, iteration: 226278
loss: 0.9647886753082275,grad_norm: 0.872044269924903, iteration: 226279
loss: 0.987689197063446,grad_norm: 0.9680729925022068, iteration: 226280
loss: 1.0108058452606201,grad_norm: 0.7587346848141329, iteration: 226281
loss: 1.0008625984191895,grad_norm: 0.9514326565593337, iteration: 226282
loss: 1.0231244564056396,grad_norm: 0.9910476176445179, iteration: 226283
loss: 0.9897940754890442,grad_norm: 0.9531659459853892, iteration: 226284
loss: 1.0177680253982544,grad_norm: 0.8192939257054633, iteration: 226285
loss: 0.9822901487350464,grad_norm: 0.8000645505511136, iteration: 226286
loss: 1.0034558773040771,grad_norm: 0.806767952991496, iteration: 226287
loss: 0.9662777781486511,grad_norm: 0.9999990858493708, iteration: 226288
loss: 1.0005854368209839,grad_norm: 0.9123360394300482, iteration: 226289
loss: 1.034380316734314,grad_norm: 0.9999991520400101, iteration: 226290
loss: 1.0275404453277588,grad_norm: 0.9999996646675101, iteration: 226291
loss: 0.988808274269104,grad_norm: 0.9973355385989053, iteration: 226292
loss: 0.9862520098686218,grad_norm: 0.9999991657245773, iteration: 226293
loss: 0.9946500062942505,grad_norm: 0.9999998088884244, iteration: 226294
loss: 0.9914346933364868,grad_norm: 0.8782974886739281, iteration: 226295
loss: 1.0236701965332031,grad_norm: 0.9999991480138106, iteration: 226296
loss: 0.9700690507888794,grad_norm: 0.8771057321439645, iteration: 226297
loss: 1.00137197971344,grad_norm: 0.8491504088386062, iteration: 226298
loss: 0.9817078113555908,grad_norm: 0.810538203111077, iteration: 226299
loss: 0.9966017603874207,grad_norm: 0.8268156521681925, iteration: 226300
loss: 1.018187165260315,grad_norm: 0.8104882723561241, iteration: 226301
loss: 1.0291775465011597,grad_norm: 0.822845055707051, iteration: 226302
loss: 0.9690991044044495,grad_norm: 0.8084630273558793, iteration: 226303
loss: 0.9954265356063843,grad_norm: 0.9360755199580378, iteration: 226304
loss: 0.9948188066482544,grad_norm: 0.8996995545734956, iteration: 226305
loss: 1.0741498470306396,grad_norm: 0.9869486146524437, iteration: 226306
loss: 1.0019954442977905,grad_norm: 0.9999991358147406, iteration: 226307
loss: 0.9890419840812683,grad_norm: 0.9142986124565055, iteration: 226308
loss: 0.9877524375915527,grad_norm: 0.9999992608027511, iteration: 226309
loss: 0.9971317648887634,grad_norm: 0.9819185568317532, iteration: 226310
loss: 1.0114071369171143,grad_norm: 0.9999990781036039, iteration: 226311
loss: 0.982390820980072,grad_norm: 0.92219083304031, iteration: 226312
loss: 1.0059934854507446,grad_norm: 0.9123434273245906, iteration: 226313
loss: 1.0541410446166992,grad_norm: 0.9999997686722062, iteration: 226314
loss: 0.9776362776756287,grad_norm: 0.9074475533232831, iteration: 226315
loss: 0.9827603697776794,grad_norm: 0.8934368304476201, iteration: 226316
loss: 1.0121662616729736,grad_norm: 0.9999992807994831, iteration: 226317
loss: 0.9761542677879333,grad_norm: 0.928796597360055, iteration: 226318
loss: 1.006544589996338,grad_norm: 0.9999991600369343, iteration: 226319
loss: 1.0330829620361328,grad_norm: 0.9999990986011157, iteration: 226320
loss: 0.9995367527008057,grad_norm: 0.7746710513538547, iteration: 226321
loss: 0.9800489544868469,grad_norm: 0.9495072436885291, iteration: 226322
loss: 0.9788336753845215,grad_norm: 0.9999991855216592, iteration: 226323
loss: 0.956403374671936,grad_norm: 0.9999991133805763, iteration: 226324
loss: 0.9965440630912781,grad_norm: 0.9999990707499982, iteration: 226325
loss: 1.0073275566101074,grad_norm: 0.7578231349986291, iteration: 226326
loss: 1.023321509361267,grad_norm: 0.8758597346238428, iteration: 226327
loss: 0.9934597015380859,grad_norm: 0.8076853629694403, iteration: 226328
loss: 0.9982266426086426,grad_norm: 0.8705947675153372, iteration: 226329
loss: 1.0059237480163574,grad_norm: 0.8404191143103639, iteration: 226330
loss: 0.9965553283691406,grad_norm: 0.8928447596682877, iteration: 226331
loss: 1.0235393047332764,grad_norm: 0.9999988635128318, iteration: 226332
loss: 1.0461645126342773,grad_norm: 0.9999992820540583, iteration: 226333
loss: 0.9945038557052612,grad_norm: 0.8053380860875463, iteration: 226334
loss: 1.014380931854248,grad_norm: 0.9180943295493, iteration: 226335
loss: 0.980125367641449,grad_norm: 0.9739960970292904, iteration: 226336
loss: 1.0719058513641357,grad_norm: 0.9999990011578329, iteration: 226337
loss: 0.9928951859474182,grad_norm: 0.7860615324089568, iteration: 226338
loss: 1.0451737642288208,grad_norm: 0.9999992978993293, iteration: 226339
loss: 0.9719663262367249,grad_norm: 0.9999991055220756, iteration: 226340
loss: 1.0035227537155151,grad_norm: 0.8818933631973809, iteration: 226341
loss: 1.0078712701797485,grad_norm: 0.9999994419918757, iteration: 226342
loss: 0.9763131737709045,grad_norm: 0.8335865441527578, iteration: 226343
loss: 0.9757378697395325,grad_norm: 0.7730192042205055, iteration: 226344
loss: 1.028213620185852,grad_norm: 0.8421575616398086, iteration: 226345
loss: 1.0109856128692627,grad_norm: 0.7850873500273733, iteration: 226346
loss: 1.0144832134246826,grad_norm: 0.9471254349140124, iteration: 226347
loss: 0.9781020283699036,grad_norm: 0.9592223252738699, iteration: 226348
loss: 1.0009511709213257,grad_norm: 0.9707479370690436, iteration: 226349
loss: 1.006264328956604,grad_norm: 0.7980130025902356, iteration: 226350
loss: 1.0118675231933594,grad_norm: 0.9999990824292975, iteration: 226351
loss: 1.0413131713867188,grad_norm: 0.999999816100199, iteration: 226352
loss: 1.0125526189804077,grad_norm: 0.9999993710489942, iteration: 226353
loss: 1.0129287242889404,grad_norm: 0.930443398468364, iteration: 226354
loss: 0.9855636954307556,grad_norm: 0.9084241374652349, iteration: 226355
loss: 0.984223484992981,grad_norm: 0.9480527732353744, iteration: 226356
loss: 0.9532797336578369,grad_norm: 0.8690376277121163, iteration: 226357
loss: 1.0046758651733398,grad_norm: 0.9285232718794426, iteration: 226358
loss: 0.981846034526825,grad_norm: 0.8854671841565288, iteration: 226359
loss: 1.0462101697921753,grad_norm: 0.9036636817750311, iteration: 226360
loss: 1.019167184829712,grad_norm: 0.7699311453827437, iteration: 226361
loss: 1.0146586894989014,grad_norm: 0.9426981073562595, iteration: 226362
loss: 0.9750020503997803,grad_norm: 0.9019646030931096, iteration: 226363
loss: 0.9698880910873413,grad_norm: 0.7883701818281932, iteration: 226364
loss: 0.9872015118598938,grad_norm: 0.9999991599917741, iteration: 226365
loss: 0.9947741627693176,grad_norm: 0.7955827031423478, iteration: 226366
loss: 1.001103162765503,grad_norm: 0.8660293268501383, iteration: 226367
loss: 0.9917142987251282,grad_norm: 0.8424504627645325, iteration: 226368
loss: 0.9750764966011047,grad_norm: 0.9938176137633608, iteration: 226369
loss: 0.9659956693649292,grad_norm: 0.8516772408522116, iteration: 226370
loss: 1.0303844213485718,grad_norm: 0.9426607930643246, iteration: 226371
loss: 1.0191984176635742,grad_norm: 0.9999990984497149, iteration: 226372
loss: 0.9853571057319641,grad_norm: 0.826769024278893, iteration: 226373
loss: 1.0123158693313599,grad_norm: 0.8258485889827161, iteration: 226374
loss: 0.9715052843093872,grad_norm: 0.9437025825349764, iteration: 226375
loss: 0.9981635212898254,grad_norm: 0.9999990677655916, iteration: 226376
loss: 1.0358808040618896,grad_norm: 0.9999990086621774, iteration: 226377
loss: 1.0016638040542603,grad_norm: 0.9284199533682874, iteration: 226378
loss: 1.0056557655334473,grad_norm: 0.9216710739783412, iteration: 226379
loss: 1.0459595918655396,grad_norm: 0.8847243865072121, iteration: 226380
loss: 0.9980803728103638,grad_norm: 0.7781609318818195, iteration: 226381
loss: 0.9780130386352539,grad_norm: 0.9999992021183601, iteration: 226382
loss: 1.084257960319519,grad_norm: 0.9528292750278186, iteration: 226383
loss: 1.0174798965454102,grad_norm: 0.9099324439875637, iteration: 226384
loss: 1.008341670036316,grad_norm: 0.9999992435579322, iteration: 226385
loss: 0.9871789813041687,grad_norm: 0.8827485192456458, iteration: 226386
loss: 0.9898753762245178,grad_norm: 0.9999990923782531, iteration: 226387
loss: 0.9883509874343872,grad_norm: 0.9999990974387889, iteration: 226388
loss: 0.9725427031517029,grad_norm: 0.9956055989725021, iteration: 226389
loss: 0.981084406375885,grad_norm: 0.8520529007350586, iteration: 226390
loss: 0.9882053136825562,grad_norm: 0.9999989998580567, iteration: 226391
loss: 1.012479305267334,grad_norm: 0.7888739156376775, iteration: 226392
loss: 0.9855514764785767,grad_norm: 0.9999991575273107, iteration: 226393
loss: 1.0167616605758667,grad_norm: 0.999998913684623, iteration: 226394
loss: 0.9675525426864624,grad_norm: 0.9994519859886137, iteration: 226395
loss: 1.0135091543197632,grad_norm: 0.90817398367756, iteration: 226396
loss: 1.0064024925231934,grad_norm: 0.9144225917341503, iteration: 226397
loss: 1.013465404510498,grad_norm: 0.9772718656117171, iteration: 226398
loss: 0.9940528273582458,grad_norm: 0.8954610675239816, iteration: 226399
loss: 1.0123183727264404,grad_norm: 0.8170734052427007, iteration: 226400
loss: 1.0278942584991455,grad_norm: 0.9999991726978598, iteration: 226401
loss: 1.0087521076202393,grad_norm: 0.8639787589457686, iteration: 226402
loss: 0.985556960105896,grad_norm: 0.9569457745811577, iteration: 226403
loss: 0.9944732189178467,grad_norm: 0.9558108189879473, iteration: 226404
loss: 1.0149227380752563,grad_norm: 0.999999275376964, iteration: 226405
loss: 1.029072880744934,grad_norm: 0.9999991163803622, iteration: 226406
loss: 0.966437041759491,grad_norm: 0.9465150813614143, iteration: 226407
loss: 0.9993258118629456,grad_norm: 0.811005736682886, iteration: 226408
loss: 1.0331131219863892,grad_norm: 0.8725070020453628, iteration: 226409
loss: 1.0189961194992065,grad_norm: 0.9274217993511885, iteration: 226410
loss: 0.9920540452003479,grad_norm: 0.8936368364251888, iteration: 226411
loss: 1.011684775352478,grad_norm: 0.9999994821972948, iteration: 226412
loss: 1.0036379098892212,grad_norm: 0.916227598964534, iteration: 226413
loss: 1.0429301261901855,grad_norm: 0.9501293962774803, iteration: 226414
loss: 0.9853718876838684,grad_norm: 0.9456168846135883, iteration: 226415
loss: 0.9542543292045593,grad_norm: 0.9048713449295829, iteration: 226416
loss: 0.9722027778625488,grad_norm: 0.9999991423432959, iteration: 226417
loss: 0.9987125396728516,grad_norm: 0.9999995774732263, iteration: 226418
loss: 0.9981456398963928,grad_norm: 0.7920014483992189, iteration: 226419
loss: 0.9843924045562744,grad_norm: 0.8365359334329588, iteration: 226420
loss: 1.0195866823196411,grad_norm: 0.963830579027469, iteration: 226421
loss: 0.9662342667579651,grad_norm: 0.9334769621179086, iteration: 226422
loss: 1.0009548664093018,grad_norm: 0.783243579387796, iteration: 226423
loss: 0.9893547296524048,grad_norm: 0.8218041932957733, iteration: 226424
loss: 1.0186185836791992,grad_norm: 0.8730682277893312, iteration: 226425
loss: 0.9876208305358887,grad_norm: 0.8979758120811224, iteration: 226426
loss: 0.9865395426750183,grad_norm: 0.9999991440662407, iteration: 226427
loss: 0.9746752977371216,grad_norm: 0.9725372704980779, iteration: 226428
loss: 1.0057449340820312,grad_norm: 0.9999990758052235, iteration: 226429
loss: 0.9846208095550537,grad_norm: 0.9020050992170873, iteration: 226430
loss: 0.9804282784461975,grad_norm: 0.9077103866310061, iteration: 226431
loss: 1.0248053073883057,grad_norm: 0.9999993154082547, iteration: 226432
loss: 0.999177098274231,grad_norm: 0.9999991866358343, iteration: 226433
loss: 0.9999184608459473,grad_norm: 0.8893729068429639, iteration: 226434
loss: 1.0185843706130981,grad_norm: 0.8735207495534829, iteration: 226435
loss: 1.0212169885635376,grad_norm: 0.8107646865254984, iteration: 226436
loss: 1.0515177249908447,grad_norm: 0.9999997529638426, iteration: 226437
loss: 0.9876784682273865,grad_norm: 0.9999990597109567, iteration: 226438
loss: 1.0182528495788574,grad_norm: 0.8342982219907833, iteration: 226439
loss: 0.9551531672477722,grad_norm: 0.9546611032578839, iteration: 226440
loss: 1.0514768362045288,grad_norm: 0.9999992376220631, iteration: 226441
loss: 1.019282579421997,grad_norm: 0.9999991712948231, iteration: 226442
loss: 1.0264662504196167,grad_norm: 0.999999158376268, iteration: 226443
loss: 0.9991911053657532,grad_norm: 0.9999992910541418, iteration: 226444
loss: 0.9813829064369202,grad_norm: 0.8624043887045002, iteration: 226445
loss: 0.9734892845153809,grad_norm: 0.9625288792235929, iteration: 226446
loss: 1.0243291854858398,grad_norm: 0.9999995831812202, iteration: 226447
loss: 1.000474214553833,grad_norm: 0.9999991683395061, iteration: 226448
loss: 1.0106513500213623,grad_norm: 0.884130463619564, iteration: 226449
loss: 0.9786139130592346,grad_norm: 0.9513681563043224, iteration: 226450
loss: 1.0189907550811768,grad_norm: 0.9406981226568455, iteration: 226451
loss: 0.9736390113830566,grad_norm: 0.8531896465127233, iteration: 226452
loss: 0.9693089127540588,grad_norm: 0.9671366376936416, iteration: 226453
loss: 1.0188161134719849,grad_norm: 0.9806582332061022, iteration: 226454
loss: 1.0144978761672974,grad_norm: 0.8807299456802347, iteration: 226455
loss: 0.9800485372543335,grad_norm: 0.9464721832522542, iteration: 226456
loss: 0.9783145189285278,grad_norm: 0.8884423787456349, iteration: 226457
loss: 0.970893919467926,grad_norm: 0.9528332084148756, iteration: 226458
loss: 1.0222357511520386,grad_norm: 0.881099029117574, iteration: 226459
loss: 0.9937924146652222,grad_norm: 0.8505868719718698, iteration: 226460
loss: 1.0110383033752441,grad_norm: 0.8894944653634517, iteration: 226461
loss: 1.0101937055587769,grad_norm: 0.9794116312126869, iteration: 226462
loss: 0.9944232702255249,grad_norm: 0.9566469127355908, iteration: 226463
loss: 1.0489282608032227,grad_norm: 0.9999991043473967, iteration: 226464
loss: 0.9987893104553223,grad_norm: 0.8099680591651373, iteration: 226465
loss: 0.9914844036102295,grad_norm: 0.8792853263928624, iteration: 226466
loss: 1.0042188167572021,grad_norm: 0.9595921095662158, iteration: 226467
loss: 0.9639129042625427,grad_norm: 0.8079914758865226, iteration: 226468
loss: 0.988776445388794,grad_norm: 0.8751826539898567, iteration: 226469
loss: 0.9849534034729004,grad_norm: 0.9872943454569285, iteration: 226470
loss: 0.9583619832992554,grad_norm: 0.891492089650338, iteration: 226471
loss: 1.0015459060668945,grad_norm: 0.9958741470642106, iteration: 226472
loss: 0.9687870740890503,grad_norm: 0.8128980141636468, iteration: 226473
loss: 1.0305864810943604,grad_norm: 0.9999989814722664, iteration: 226474
loss: 0.9860378503799438,grad_norm: 0.8689333654213157, iteration: 226475
loss: 1.0319437980651855,grad_norm: 0.9999989843301091, iteration: 226476
loss: 0.9947035908699036,grad_norm: 0.9114152712027253, iteration: 226477
loss: 0.9686797261238098,grad_norm: 0.9693601760953445, iteration: 226478
loss: 0.9458891153335571,grad_norm: 0.9844799292355022, iteration: 226479
loss: 0.9976745843887329,grad_norm: 0.8048533016471715, iteration: 226480
loss: 0.9811898469924927,grad_norm: 0.8746135357318189, iteration: 226481
loss: 1.0022557973861694,grad_norm: 0.9844410891781233, iteration: 226482
loss: 1.0341929197311401,grad_norm: 0.9841863797298401, iteration: 226483
loss: 1.0423020124435425,grad_norm: 0.8915568098413343, iteration: 226484
loss: 1.0018264055252075,grad_norm: 0.9093512371702772, iteration: 226485
loss: 1.0469549894332886,grad_norm: 0.7823536110639828, iteration: 226486
loss: 0.9860827922821045,grad_norm: 0.999999082524043, iteration: 226487
loss: 0.9924446940422058,grad_norm: 0.9999992086681968, iteration: 226488
loss: 0.9774183034896851,grad_norm: 0.9999990386942538, iteration: 226489
loss: 0.9945740699768066,grad_norm: 0.9224240997562215, iteration: 226490
loss: 1.027078628540039,grad_norm: 0.8978177969430389, iteration: 226491
loss: 0.9692368507385254,grad_norm: 0.9347351132185335, iteration: 226492
loss: 0.962314784526825,grad_norm: 0.844754501328116, iteration: 226493
loss: 0.9836030006408691,grad_norm: 0.8535304239532501, iteration: 226494
loss: 0.9481082558631897,grad_norm: 0.898920125536869, iteration: 226495
loss: 1.0506386756896973,grad_norm: 0.9999990329197427, iteration: 226496
loss: 0.9710608124732971,grad_norm: 0.9615833469164444, iteration: 226497
loss: 0.965734601020813,grad_norm: 0.9999998761685631, iteration: 226498
loss: 0.9619768857955933,grad_norm: 0.7764072124562764, iteration: 226499
loss: 1.0705523490905762,grad_norm: 0.9999990103513342, iteration: 226500
loss: 0.9956364035606384,grad_norm: 0.9342536116178763, iteration: 226501
loss: 1.0313303470611572,grad_norm: 0.8936570170030896, iteration: 226502
loss: 1.0187774896621704,grad_norm: 0.85421417150228, iteration: 226503
loss: 0.978337287902832,grad_norm: 0.9999990539671193, iteration: 226504
loss: 1.0058043003082275,grad_norm: 0.875138765761716, iteration: 226505
loss: 1.0511984825134277,grad_norm: 0.9999991825271172, iteration: 226506
loss: 0.9654685258865356,grad_norm: 0.8146116396188436, iteration: 226507
loss: 1.005521535873413,grad_norm: 0.9999989083428205, iteration: 226508
loss: 0.9874147176742554,grad_norm: 0.8637684303100991, iteration: 226509
loss: 0.9640452265739441,grad_norm: 0.99999899341458, iteration: 226510
loss: 1.034458875656128,grad_norm: 0.9033215996923796, iteration: 226511
loss: 1.0180482864379883,grad_norm: 0.9335736400181531, iteration: 226512
loss: 0.9876432418823242,grad_norm: 0.7384674802689472, iteration: 226513
loss: 0.9720382690429688,grad_norm: 0.9835971147026512, iteration: 226514
loss: 1.0231863260269165,grad_norm: 0.9528828473270794, iteration: 226515
loss: 0.9742444753646851,grad_norm: 0.9983302026124465, iteration: 226516
loss: 1.016839861869812,grad_norm: 0.945750371218862, iteration: 226517
loss: 0.9932072162628174,grad_norm: 0.999998934455132, iteration: 226518
loss: 0.9859974384307861,grad_norm: 0.9432876095615147, iteration: 226519
loss: 0.9940611720085144,grad_norm: 0.9686568062270252, iteration: 226520
loss: 1.0177115201950073,grad_norm: 0.7896818535656434, iteration: 226521
loss: 0.9919770359992981,grad_norm: 0.867958212304578, iteration: 226522
loss: 0.9949530959129333,grad_norm: 0.7567314704526295, iteration: 226523
loss: 0.9655708074569702,grad_norm: 0.8271551063925943, iteration: 226524
loss: 1.0006638765335083,grad_norm: 0.7777892078700588, iteration: 226525
loss: 1.005081057548523,grad_norm: 0.7503828110374708, iteration: 226526
loss: 1.011702537536621,grad_norm: 0.9999991108470132, iteration: 226527
loss: 1.0110474824905396,grad_norm: 0.9790192057538426, iteration: 226528
loss: 0.9832788705825806,grad_norm: 0.8652680780466447, iteration: 226529
loss: 1.0163414478302002,grad_norm: 0.8281257365812155, iteration: 226530
loss: 1.038759708404541,grad_norm: 0.9512267734605208, iteration: 226531
loss: 0.9928386211395264,grad_norm: 0.9999993014033453, iteration: 226532
loss: 1.0117207765579224,grad_norm: 0.796653397334771, iteration: 226533
loss: 0.9596041440963745,grad_norm: 0.9441914465047279, iteration: 226534
loss: 0.9822813868522644,grad_norm: 0.9999990866830775, iteration: 226535
loss: 0.9518973231315613,grad_norm: 0.9999990811643075, iteration: 226536
loss: 1.006142497062683,grad_norm: 0.869617004329591, iteration: 226537
loss: 1.0064440965652466,grad_norm: 0.8802587060938031, iteration: 226538
loss: 0.9993316531181335,grad_norm: 0.9999991733434382, iteration: 226539
loss: 0.994878888130188,grad_norm: 0.9038216334807255, iteration: 226540
loss: 0.9987726211547852,grad_norm: 0.8291934615932642, iteration: 226541
loss: 0.9905455112457275,grad_norm: 0.8527065674922325, iteration: 226542
loss: 1.006662130355835,grad_norm: 0.853892230165993, iteration: 226543
loss: 1.0343822240829468,grad_norm: 0.9226977958692677, iteration: 226544
loss: 0.9495242834091187,grad_norm: 0.8922878018553627, iteration: 226545
loss: 1.0076003074645996,grad_norm: 0.9636001390837458, iteration: 226546
loss: 0.9745641350746155,grad_norm: 0.9999993705884055, iteration: 226547
loss: 0.9641010165214539,grad_norm: 0.9494108762691099, iteration: 226548
loss: 0.9941644072532654,grad_norm: 0.8204251239502611, iteration: 226549
loss: 1.0117093324661255,grad_norm: 0.9862888485116157, iteration: 226550
loss: 0.9844871759414673,grad_norm: 0.7898285453642805, iteration: 226551
loss: 1.0456387996673584,grad_norm: 0.7568552313055481, iteration: 226552
loss: 1.0012180805206299,grad_norm: 0.8324601336930254, iteration: 226553
loss: 0.9913581013679504,grad_norm: 0.94465850582792, iteration: 226554
loss: 1.0001487731933594,grad_norm: 0.9230122045652834, iteration: 226555
loss: 1.0309854745864868,grad_norm: 0.9817088560776507, iteration: 226556
loss: 0.9968364834785461,grad_norm: 0.998123542060793, iteration: 226557
loss: 1.0451648235321045,grad_norm: 0.9541752886996145, iteration: 226558
loss: 0.9859821796417236,grad_norm: 0.9607297553516632, iteration: 226559
loss: 1.0288478136062622,grad_norm: 0.7581263473886849, iteration: 226560
loss: 1.0206196308135986,grad_norm: 0.9288044897523512, iteration: 226561
loss: 0.9954269528388977,grad_norm: 0.9370497820760767, iteration: 226562
loss: 1.058332920074463,grad_norm: 0.9999993468088855, iteration: 226563
loss: 0.9501082897186279,grad_norm: 0.9715587357598994, iteration: 226564
loss: 0.9897753000259399,grad_norm: 0.8552235499872185, iteration: 226565
loss: 0.9839668273925781,grad_norm: 0.9570645950928162, iteration: 226566
loss: 1.0162105560302734,grad_norm: 0.9546081189416804, iteration: 226567
loss: 0.9904595017433167,grad_norm: 0.9999989628742997, iteration: 226568
loss: 1.0074408054351807,grad_norm: 0.9418212581068919, iteration: 226569
loss: 1.0211187601089478,grad_norm: 0.8536473606134348, iteration: 226570
loss: 1.033064365386963,grad_norm: 0.8577165020852243, iteration: 226571
loss: 1.010311484336853,grad_norm: 0.900290148221935, iteration: 226572
loss: 0.9849206209182739,grad_norm: 0.8611273880708505, iteration: 226573
loss: 1.0035902261734009,grad_norm: 0.9561373030339197, iteration: 226574
loss: 0.961331844329834,grad_norm: 0.9451619085810905, iteration: 226575
loss: 0.9522221684455872,grad_norm: 0.9985921448381841, iteration: 226576
loss: 1.0036225318908691,grad_norm: 0.9999990994836241, iteration: 226577
loss: 1.028898000717163,grad_norm: 0.8532644248554611, iteration: 226578
loss: 0.9707112908363342,grad_norm: 0.7437543797387282, iteration: 226579
loss: 0.9912939667701721,grad_norm: 0.9919068887703679, iteration: 226580
loss: 0.9750891327857971,grad_norm: 0.8398928063252796, iteration: 226581
loss: 1.042897343635559,grad_norm: 0.9999992009223622, iteration: 226582
loss: 1.0262269973754883,grad_norm: 0.925399300753201, iteration: 226583
loss: 1.0128896236419678,grad_norm: 0.9999991943692564, iteration: 226584
loss: 1.0027656555175781,grad_norm: 0.8200495603064194, iteration: 226585
loss: 0.9854366183280945,grad_norm: 0.9505505095792491, iteration: 226586
loss: 1.093913197517395,grad_norm: 0.9999996113227521, iteration: 226587
loss: 0.9700302481651306,grad_norm: 0.9297959577247337, iteration: 226588
loss: 1.0681208372116089,grad_norm: 0.9999991671780171, iteration: 226589
loss: 0.9713635444641113,grad_norm: 0.9701884516774787, iteration: 226590
loss: 1.0285272598266602,grad_norm: 0.9999993366760082, iteration: 226591
loss: 1.2304795980453491,grad_norm: 0.9999992032611524, iteration: 226592
loss: 1.0197139978408813,grad_norm: 0.9999991154071879, iteration: 226593
loss: 0.9911336302757263,grad_norm: 0.8004592516394727, iteration: 226594
loss: 1.0800729990005493,grad_norm: 0.995858018179401, iteration: 226595
loss: 0.996283769607544,grad_norm: 0.9722041101529099, iteration: 226596
loss: 0.9869278073310852,grad_norm: 0.9999992335957989, iteration: 226597
loss: 0.9683617949485779,grad_norm: 0.9575629255668584, iteration: 226598
loss: 1.0152548551559448,grad_norm: 0.8731734841880422, iteration: 226599
loss: 0.9877020120620728,grad_norm: 0.7993964159568964, iteration: 226600
loss: 0.9902727007865906,grad_norm: 0.8964931778549199, iteration: 226601
loss: 0.9951901435852051,grad_norm: 0.9999991156253786, iteration: 226602
loss: 0.975979745388031,grad_norm: 0.9275218306673186, iteration: 226603
loss: 0.9727593660354614,grad_norm: 0.9835504425535245, iteration: 226604
loss: 1.013090968132019,grad_norm: 0.9999990508989302, iteration: 226605
loss: 1.0217565298080444,grad_norm: 0.8079905772884408, iteration: 226606
loss: 0.9889494776725769,grad_norm: 0.9253716308335569, iteration: 226607
loss: 0.9619199633598328,grad_norm: 0.9999989991008533, iteration: 226608
loss: 0.9998283386230469,grad_norm: 0.9999991759685233, iteration: 226609
loss: 1.014424443244934,grad_norm: 0.9999989929464758, iteration: 226610
loss: 0.9583540558815002,grad_norm: 0.8435065454813044, iteration: 226611
loss: 0.9925910830497742,grad_norm: 0.9019396417224328, iteration: 226612
loss: 0.9674311876296997,grad_norm: 0.9012648630048523, iteration: 226613
loss: 1.0148539543151855,grad_norm: 0.9999991612501126, iteration: 226614
loss: 0.9605501294136047,grad_norm: 0.8974494134520922, iteration: 226615
loss: 1.0909969806671143,grad_norm: 0.9999991569669302, iteration: 226616
loss: 1.0076676607131958,grad_norm: 0.7897561477458304, iteration: 226617
loss: 1.0231736898422241,grad_norm: 0.8902184838813801, iteration: 226618
loss: 1.0462920665740967,grad_norm: 0.8367200326439026, iteration: 226619
loss: 0.9991104006767273,grad_norm: 0.9999991849418444, iteration: 226620
loss: 1.0403341054916382,grad_norm: 0.9999991669979466, iteration: 226621
loss: 1.0018446445465088,grad_norm: 0.9999992978807251, iteration: 226622
loss: 1.0269728899002075,grad_norm: 0.8810819715164482, iteration: 226623
loss: 1.0139681100845337,grad_norm: 0.9243464325435329, iteration: 226624
loss: 0.995527982711792,grad_norm: 0.9179304739773106, iteration: 226625
loss: 1.0488215684890747,grad_norm: 0.8496619822323906, iteration: 226626
loss: 1.0012623071670532,grad_norm: 0.8094095249668273, iteration: 226627
loss: 0.9928322434425354,grad_norm: 0.9999993197094484, iteration: 226628
loss: 0.9953933954238892,grad_norm: 0.9884812519106023, iteration: 226629
loss: 0.9821060299873352,grad_norm: 0.9999991813504596, iteration: 226630
loss: 1.0386111736297607,grad_norm: 0.9999990149354477, iteration: 226631
loss: 0.9956425428390503,grad_norm: 0.8988314433708355, iteration: 226632
loss: 0.976250946521759,grad_norm: 0.9999990477920452, iteration: 226633
loss: 1.0024319887161255,grad_norm: 0.9999994652965357, iteration: 226634
loss: 1.0521408319473267,grad_norm: 0.9999992393937396, iteration: 226635
loss: 1.0013200044631958,grad_norm: 0.9150228033332912, iteration: 226636
loss: 1.0093556642532349,grad_norm: 0.8610510252386548, iteration: 226637
loss: 1.0010002851486206,grad_norm: 0.9368155456344069, iteration: 226638
loss: 1.0090714693069458,grad_norm: 0.9999992038079474, iteration: 226639
loss: 1.0223886966705322,grad_norm: 0.9999990951612462, iteration: 226640
loss: 1.0286364555358887,grad_norm: 0.8833433210863305, iteration: 226641
loss: 0.970786988735199,grad_norm: 0.9999991827188772, iteration: 226642
loss: 0.9603301882743835,grad_norm: 0.9759530232623725, iteration: 226643
loss: 1.0215904712677002,grad_norm: 0.9299782391506938, iteration: 226644
loss: 0.9701160788536072,grad_norm: 0.99780002334485, iteration: 226645
loss: 1.0062743425369263,grad_norm: 0.9999992076387928, iteration: 226646
loss: 1.0281109809875488,grad_norm: 0.9381000786031405, iteration: 226647
loss: 0.9873320460319519,grad_norm: 0.9736603597936456, iteration: 226648
loss: 0.9878746867179871,grad_norm: 0.8901152754656149, iteration: 226649
loss: 0.9686610102653503,grad_norm: 0.9999990198999694, iteration: 226650
loss: 1.0001914501190186,grad_norm: 0.8618763559332164, iteration: 226651
loss: 0.9660435318946838,grad_norm: 0.9999995112571315, iteration: 226652
loss: 1.033843755722046,grad_norm: 0.9999989138845768, iteration: 226653
loss: 1.028755784034729,grad_norm: 0.8704676979920235, iteration: 226654
loss: 0.9728696346282959,grad_norm: 0.9999990247203776, iteration: 226655
loss: 1.0048032999038696,grad_norm: 0.9999989133508851, iteration: 226656
loss: 1.0150244235992432,grad_norm: 0.8487379876882916, iteration: 226657
loss: 1.012505054473877,grad_norm: 0.884212923716509, iteration: 226658
loss: 1.0582760572433472,grad_norm: 0.965866852419244, iteration: 226659
loss: 0.9637928009033203,grad_norm: 0.844028911167811, iteration: 226660
loss: 1.0405093431472778,grad_norm: 0.9999991432425602, iteration: 226661
loss: 0.9665607810020447,grad_norm: 0.7966898066442764, iteration: 226662
loss: 1.0023956298828125,grad_norm: 0.9592775213459278, iteration: 226663
loss: 0.991430938243866,grad_norm: 0.7822666363427554, iteration: 226664
loss: 1.0059632062911987,grad_norm: 0.784147118911609, iteration: 226665
loss: 1.0108671188354492,grad_norm: 0.949457592085822, iteration: 226666
loss: 1.0080703496932983,grad_norm: 0.9999994369451198, iteration: 226667
loss: 0.9972577095031738,grad_norm: 0.9058514373792268, iteration: 226668
loss: 1.0148972272872925,grad_norm: 0.9999993137251747, iteration: 226669
loss: 1.0133296251296997,grad_norm: 0.9416300188826661, iteration: 226670
loss: 1.0099022388458252,grad_norm: 0.8083398866814084, iteration: 226671
loss: 0.9798785448074341,grad_norm: 0.9389804098670356, iteration: 226672
loss: 1.0152957439422607,grad_norm: 0.8153824290626701, iteration: 226673
loss: 0.9712217450141907,grad_norm: 0.9999991725699339, iteration: 226674
loss: 0.9876272678375244,grad_norm: 0.9784633026382412, iteration: 226675
loss: 1.0135128498077393,grad_norm: 0.825046239581554, iteration: 226676
loss: 0.9900906682014465,grad_norm: 0.9999990120759839, iteration: 226677
loss: 1.0472148656845093,grad_norm: 0.8362088700080453, iteration: 226678
loss: 0.9868975281715393,grad_norm: 0.99999919868326, iteration: 226679
loss: 1.0281062126159668,grad_norm: 0.974974943905235, iteration: 226680
loss: 0.9796505570411682,grad_norm: 0.7950846441368853, iteration: 226681
loss: 1.036346197128296,grad_norm: 0.9999990973820483, iteration: 226682
loss: 0.9854931235313416,grad_norm: 0.8336412672957958, iteration: 226683
loss: 0.9593392014503479,grad_norm: 0.9584103393484217, iteration: 226684
loss: 1.0341994762420654,grad_norm: 0.9421546100638692, iteration: 226685
loss: 1.016230583190918,grad_norm: 0.9360972595001797, iteration: 226686
loss: 1.0492589473724365,grad_norm: 0.9999991570228325, iteration: 226687
loss: 1.0139453411102295,grad_norm: 0.9999992889579653, iteration: 226688
loss: 0.9847315549850464,grad_norm: 0.8215147426316531, iteration: 226689
loss: 1.0018951892852783,grad_norm: 0.7637040135612964, iteration: 226690
loss: 1.0189441442489624,grad_norm: 0.8314734878270904, iteration: 226691
loss: 0.9724584221839905,grad_norm: 0.9365039903805272, iteration: 226692
loss: 0.9819456934928894,grad_norm: 0.9999990770374647, iteration: 226693
loss: 0.9957311749458313,grad_norm: 0.9999991288331119, iteration: 226694
loss: 1.0905420780181885,grad_norm: 0.9999996809155284, iteration: 226695
loss: 0.9952186346054077,grad_norm: 0.9506054291026397, iteration: 226696
loss: 1.0089558362960815,grad_norm: 0.8023284390971959, iteration: 226697
loss: 0.9720144271850586,grad_norm: 0.8368381109468476, iteration: 226698
loss: 0.9902584552764893,grad_norm: 0.863331789385324, iteration: 226699
loss: 1.0171674489974976,grad_norm: 0.9999991383730532, iteration: 226700
loss: 1.0108178853988647,grad_norm: 0.8783506383816984, iteration: 226701
loss: 1.0124104022979736,grad_norm: 0.8822007556823858, iteration: 226702
loss: 1.1684938669204712,grad_norm: 0.9993656991112866, iteration: 226703
loss: 1.0100620985031128,grad_norm: 0.9195006717903873, iteration: 226704
loss: 0.9840909242630005,grad_norm: 0.967035417487727, iteration: 226705
loss: 1.0404644012451172,grad_norm: 0.8215786546409122, iteration: 226706
loss: 0.9958064556121826,grad_norm: 0.9904878335948369, iteration: 226707
loss: 0.9589613080024719,grad_norm: 0.7408822312697919, iteration: 226708
loss: 0.9457000494003296,grad_norm: 0.9999990512773655, iteration: 226709
loss: 0.9759197235107422,grad_norm: 0.8129265130425635, iteration: 226710
loss: 1.0221123695373535,grad_norm: 0.9488562103015828, iteration: 226711
loss: 0.9903106689453125,grad_norm: 0.943448773797932, iteration: 226712
loss: 1.0272172689437866,grad_norm: 0.9425908411426749, iteration: 226713
loss: 0.9834573864936829,grad_norm: 0.9999992374005264, iteration: 226714
loss: 0.9799384474754333,grad_norm: 0.9999992860527939, iteration: 226715
loss: 0.9920980930328369,grad_norm: 0.9057705393081994, iteration: 226716
loss: 1.0016944408416748,grad_norm: 0.9304736025145353, iteration: 226717
loss: 1.001283049583435,grad_norm: 0.9185404210248309, iteration: 226718
loss: 1.0189841985702515,grad_norm: 0.999999246848185, iteration: 226719
loss: 0.9933953285217285,grad_norm: 0.94465785810294, iteration: 226720
loss: 1.0168030261993408,grad_norm: 0.9097644380808929, iteration: 226721
loss: 0.9472825527191162,grad_norm: 0.8070075108577741, iteration: 226722
loss: 1.0080492496490479,grad_norm: 0.9999991311892369, iteration: 226723
loss: 0.9975184798240662,grad_norm: 0.7949249059450163, iteration: 226724
loss: 1.0026435852050781,grad_norm: 0.9999991886885282, iteration: 226725
loss: 1.0213518142700195,grad_norm: 0.9999989652097149, iteration: 226726
loss: 0.9773134589195251,grad_norm: 0.8902594655421208, iteration: 226727
loss: 1.0426254272460938,grad_norm: 0.8513978602651515, iteration: 226728
loss: 0.9715026617050171,grad_norm: 0.8938723306528154, iteration: 226729
loss: 0.9820225834846497,grad_norm: 0.8040155929643865, iteration: 226730
loss: 0.9620988368988037,grad_norm: 0.8773821906325948, iteration: 226731
loss: 0.9801480174064636,grad_norm: 0.8885729727047783, iteration: 226732
loss: 1.0160850286483765,grad_norm: 0.9999990773080277, iteration: 226733
loss: 0.9704534411430359,grad_norm: 0.9489427543327537, iteration: 226734
loss: 1.0232480764389038,grad_norm: 0.9226432567265701, iteration: 226735
loss: 0.954682469367981,grad_norm: 0.9999990658920028, iteration: 226736
loss: 1.0284472703933716,grad_norm: 0.9307917161323617, iteration: 226737
loss: 1.033967137336731,grad_norm: 0.9999993556151964, iteration: 226738
loss: 1.0253695249557495,grad_norm: 0.997722244830119, iteration: 226739
loss: 1.0006847381591797,grad_norm: 0.9359047152298904, iteration: 226740
loss: 0.9728154540061951,grad_norm: 0.8572966689881488, iteration: 226741
loss: 1.0323867797851562,grad_norm: 0.9999989519474657, iteration: 226742
loss: 0.9680478572845459,grad_norm: 0.8565893600368211, iteration: 226743
loss: 0.9370307326316833,grad_norm: 0.9999991446788171, iteration: 226744
loss: 0.990685224533081,grad_norm: 0.9022540359136413, iteration: 226745
loss: 0.9781296253204346,grad_norm: 0.9468342532544575, iteration: 226746
loss: 0.9672727584838867,grad_norm: 0.9596335706897335, iteration: 226747
loss: 1.0264304876327515,grad_norm: 0.9404018680613785, iteration: 226748
loss: 0.9822580814361572,grad_norm: 0.9326663310164859, iteration: 226749
loss: 1.0014787912368774,grad_norm: 0.9459305492125327, iteration: 226750
loss: 0.9947995543479919,grad_norm: 0.9999991566277737, iteration: 226751
loss: 1.0131648778915405,grad_norm: 0.8515014416084474, iteration: 226752
loss: 1.038738489151001,grad_norm: 0.8650725974358278, iteration: 226753
loss: 1.0076351165771484,grad_norm: 0.8489715479056302, iteration: 226754
loss: 1.018772840499878,grad_norm: 0.880397235800878, iteration: 226755
loss: 1.0316085815429688,grad_norm: 0.9999990282639587, iteration: 226756
loss: 0.9795735478401184,grad_norm: 0.9999991616630427, iteration: 226757
loss: 0.9845409989356995,grad_norm: 0.961379951549086, iteration: 226758
loss: 0.9599023461341858,grad_norm: 0.9999991342283079, iteration: 226759
loss: 1.001107931137085,grad_norm: 0.6931803756813775, iteration: 226760
loss: 1.0152387619018555,grad_norm: 0.9999991513452747, iteration: 226761
loss: 0.9943233132362366,grad_norm: 0.932702952210566, iteration: 226762
loss: 1.0201081037521362,grad_norm: 0.8499832856154719, iteration: 226763
loss: 0.9943524599075317,grad_norm: 0.7903987554778749, iteration: 226764
loss: 1.0104949474334717,grad_norm: 0.8965487421058127, iteration: 226765
loss: 0.9637198448181152,grad_norm: 0.9999991253413792, iteration: 226766
loss: 1.0252609252929688,grad_norm: 0.9373825232003742, iteration: 226767
loss: 0.9841824173927307,grad_norm: 0.900849262058093, iteration: 226768
loss: 0.9946356415748596,grad_norm: 0.9999991631485875, iteration: 226769
loss: 0.9648260474205017,grad_norm: 0.9999990749797563, iteration: 226770
loss: 0.990892231464386,grad_norm: 0.8333801785623821, iteration: 226771
loss: 1.0086849927902222,grad_norm: 0.8996476361287001, iteration: 226772
loss: 0.9795858860015869,grad_norm: 0.8374534481797187, iteration: 226773
loss: 0.9815222024917603,grad_norm: 0.9815503244790519, iteration: 226774
loss: 0.9569758772850037,grad_norm: 0.9126577512730851, iteration: 226775
loss: 0.993829071521759,grad_norm: 0.9290392615773906, iteration: 226776
loss: 1.01054048538208,grad_norm: 0.91886984795297, iteration: 226777
loss: 1.017684817314148,grad_norm: 0.8543744741166904, iteration: 226778
loss: 0.9800143241882324,grad_norm: 0.9532921396064458, iteration: 226779
loss: 0.9715012311935425,grad_norm: 0.9453420868164238, iteration: 226780
loss: 1.0177021026611328,grad_norm: 0.8478588131959041, iteration: 226781
loss: 0.995535135269165,grad_norm: 0.9999995641222823, iteration: 226782
loss: 1.0081260204315186,grad_norm: 0.9596050499489324, iteration: 226783
loss: 1.0032812356948853,grad_norm: 0.7971209803428937, iteration: 226784
loss: 0.9912359118461609,grad_norm: 0.9999990798592058, iteration: 226785
loss: 1.088720440864563,grad_norm: 0.9999991624073248, iteration: 226786
loss: 1.0904321670532227,grad_norm: 0.9999997954472181, iteration: 226787
loss: 0.9805295467376709,grad_norm: 0.9999991523733194, iteration: 226788
loss: 0.9851959943771362,grad_norm: 0.936625122797282, iteration: 226789
loss: 1.0057311058044434,grad_norm: 0.9999989555653699, iteration: 226790
loss: 0.9759976863861084,grad_norm: 0.9589169618794773, iteration: 226791
loss: 0.9790317416191101,grad_norm: 0.9999990102132135, iteration: 226792
loss: 1.008561134338379,grad_norm: 0.9438879682011246, iteration: 226793
loss: 1.010777235031128,grad_norm: 0.9999992715421315, iteration: 226794
loss: 0.9532892107963562,grad_norm: 0.9999991280767823, iteration: 226795
loss: 0.9812011122703552,grad_norm: 0.9999991498821137, iteration: 226796
loss: 1.0310697555541992,grad_norm: 0.9999992629563144, iteration: 226797
loss: 1.0298373699188232,grad_norm: 0.7551113321556471, iteration: 226798
loss: 0.9851085543632507,grad_norm: 0.992105187949664, iteration: 226799
loss: 0.9881336092948914,grad_norm: 0.9999992174296688, iteration: 226800
loss: 1.006137728691101,grad_norm: 0.9289963181333738, iteration: 226801
loss: 1.0356720685958862,grad_norm: 0.9723323391561051, iteration: 226802
loss: 0.9541782140731812,grad_norm: 0.9941992880051537, iteration: 226803
loss: 0.9737702012062073,grad_norm: 0.9867387944069536, iteration: 226804
loss: 0.9738004803657532,grad_norm: 0.8717219112385605, iteration: 226805
loss: 1.0137990713119507,grad_norm: 0.8480212874087519, iteration: 226806
loss: 1.0116298198699951,grad_norm: 0.9999991282501415, iteration: 226807
loss: 0.9802948832511902,grad_norm: 0.9583794104151927, iteration: 226808
loss: 1.0092129707336426,grad_norm: 0.9999991441082754, iteration: 226809
loss: 0.9880790114402771,grad_norm: 0.8972491948230619, iteration: 226810
loss: 1.0409657955169678,grad_norm: 0.9999991678209371, iteration: 226811
loss: 0.9582219123840332,grad_norm: 0.8685192865879745, iteration: 226812
loss: 1.0115575790405273,grad_norm: 0.9999991488684333, iteration: 226813
loss: 0.9937266707420349,grad_norm: 0.8652384585031117, iteration: 226814
loss: 1.0085301399230957,grad_norm: 0.9999995295336942, iteration: 226815
loss: 0.97508305311203,grad_norm: 0.9033242604335057, iteration: 226816
loss: 1.0010792016983032,grad_norm: 0.9999991137765464, iteration: 226817
loss: 1.0283000469207764,grad_norm: 0.9999991247404009, iteration: 226818
loss: 0.9998655915260315,grad_norm: 0.7190815195420016, iteration: 226819
loss: 0.994465708732605,grad_norm: 0.9999990656674927, iteration: 226820
loss: 1.059515118598938,grad_norm: 0.9999990015790179, iteration: 226821
loss: 0.9666423797607422,grad_norm: 0.9651518679405369, iteration: 226822
loss: 0.9806712865829468,grad_norm: 0.8377305977742935, iteration: 226823
loss: 1.0226577520370483,grad_norm: 0.8851324684675559, iteration: 226824
loss: 0.9980422258377075,grad_norm: 0.9999989032283918, iteration: 226825
loss: 1.003796100616455,grad_norm: 0.9145488064127827, iteration: 226826
loss: 0.9940459132194519,grad_norm: 0.815572340903477, iteration: 226827
loss: 0.9996106624603271,grad_norm: 0.896707515094832, iteration: 226828
loss: 1.0018000602722168,grad_norm: 0.9013725273548919, iteration: 226829
loss: 0.9970932602882385,grad_norm: 0.9593892842404572, iteration: 226830
loss: 0.9796980023384094,grad_norm: 0.9999991016330136, iteration: 226831
loss: 1.0194605588912964,grad_norm: 0.8240424290366413, iteration: 226832
loss: 1.019364833831787,grad_norm: 0.9372784919784247, iteration: 226833
loss: 1.0004510879516602,grad_norm: 0.971301997328456, iteration: 226834
loss: 1.0374324321746826,grad_norm: 0.9999990196733691, iteration: 226835
loss: 0.9787064790725708,grad_norm: 0.9271179690950149, iteration: 226836
loss: 1.0205942392349243,grad_norm: 0.9059964670778842, iteration: 226837
loss: 1.0171722173690796,grad_norm: 0.8516514974475737, iteration: 226838
loss: 0.9750754833221436,grad_norm: 0.9137211034371305, iteration: 226839
loss: 0.9988210201263428,grad_norm: 0.9179296977386453, iteration: 226840
loss: 1.090911865234375,grad_norm: 0.999999103009202, iteration: 226841
loss: 0.9790407419204712,grad_norm: 0.9309861478718263, iteration: 226842
loss: 0.9881443977355957,grad_norm: 0.7588490214956187, iteration: 226843
loss: 1.0095858573913574,grad_norm: 0.9999992638443438, iteration: 226844
loss: 0.976540207862854,grad_norm: 0.9493846125666694, iteration: 226845
loss: 1.021601676940918,grad_norm: 0.8715134373839497, iteration: 226846
loss: 0.9913313388824463,grad_norm: 0.8327368954725872, iteration: 226847
loss: 1.0201125144958496,grad_norm: 0.9644747494901864, iteration: 226848
loss: 1.0154012441635132,grad_norm: 0.9244339919734038, iteration: 226849
loss: 0.9939234256744385,grad_norm: 0.9859811330009906, iteration: 226850
loss: 0.9963350296020508,grad_norm: 0.8616275763792333, iteration: 226851
loss: 0.9809051752090454,grad_norm: 0.9090141357663643, iteration: 226852
loss: 0.9976815581321716,grad_norm: 0.9999992782434354, iteration: 226853
loss: 0.970221996307373,grad_norm: 0.8844392322074971, iteration: 226854
loss: 1.0288552045822144,grad_norm: 0.8663941685573194, iteration: 226855
loss: 0.9961452484130859,grad_norm: 0.9999991707930458, iteration: 226856
loss: 1.011609673500061,grad_norm: 0.9999990817937557, iteration: 226857
loss: 1.0160547494888306,grad_norm: 0.9999990601631662, iteration: 226858
loss: 1.0060456991195679,grad_norm: 0.763923338578131, iteration: 226859
loss: 0.9878084063529968,grad_norm: 0.8922488750258111, iteration: 226860
loss: 1.0130963325500488,grad_norm: 0.8511315266161188, iteration: 226861
loss: 0.9917558431625366,grad_norm: 0.9135871474441543, iteration: 226862
loss: 0.9921712279319763,grad_norm: 0.9999991436903543, iteration: 226863
loss: 1.0401970148086548,grad_norm: 0.9999991747265996, iteration: 226864
loss: 1.0067263841629028,grad_norm: 0.9999990999054877, iteration: 226865
loss: 0.9944894909858704,grad_norm: 0.9999990121117462, iteration: 226866
loss: 0.9926251173019409,grad_norm: 0.957259231831787, iteration: 226867
loss: 1.0170501470565796,grad_norm: 0.7909692029573782, iteration: 226868
loss: 1.0521693229675293,grad_norm: 0.9999991305577955, iteration: 226869
loss: 1.0166184902191162,grad_norm: 0.9999989971442688, iteration: 226870
loss: 1.0136483907699585,grad_norm: 0.9999991485935812, iteration: 226871
loss: 1.0020748376846313,grad_norm: 0.9340159093202473, iteration: 226872
loss: 0.9967544078826904,grad_norm: 0.933314730538338, iteration: 226873
loss: 1.0168206691741943,grad_norm: 0.859539486202888, iteration: 226874
loss: 0.9995524287223816,grad_norm: 0.7808574970366063, iteration: 226875
loss: 0.9817804098129272,grad_norm: 0.9999992111215134, iteration: 226876
loss: 0.9924702644348145,grad_norm: 0.9611864001022534, iteration: 226877
loss: 1.032530665397644,grad_norm: 0.8854523888551189, iteration: 226878
loss: 1.0096315145492554,grad_norm: 0.9787775939101326, iteration: 226879
loss: 0.986699640750885,grad_norm: 0.8941504267797658, iteration: 226880
loss: 1.0069687366485596,grad_norm: 0.9737019532531468, iteration: 226881
loss: 0.9516735076904297,grad_norm: 0.9361807470450854, iteration: 226882
loss: 1.01474130153656,grad_norm: 0.9757557541265981, iteration: 226883
loss: 1.0108383893966675,grad_norm: 0.999998989551212, iteration: 226884
loss: 0.9965997338294983,grad_norm: 0.979653763721411, iteration: 226885
loss: 0.9935495257377625,grad_norm: 0.999999020628402, iteration: 226886
loss: 0.9994716048240662,grad_norm: 0.9999990502180194, iteration: 226887
loss: 0.9953332543373108,grad_norm: 0.9999989991312886, iteration: 226888
loss: 1.0374553203582764,grad_norm: 0.9999995916574352, iteration: 226889
loss: 1.0239871740341187,grad_norm: 0.9999990431826508, iteration: 226890
loss: 0.9942757487297058,grad_norm: 0.9999991726871922, iteration: 226891
loss: 0.9880002737045288,grad_norm: 0.9889592216061084, iteration: 226892
loss: 1.0039048194885254,grad_norm: 0.9999990207704909, iteration: 226893
loss: 0.9777848720550537,grad_norm: 0.7692087716707993, iteration: 226894
loss: 0.9704796075820923,grad_norm: 0.793042372202509, iteration: 226895
loss: 1.0079967975616455,grad_norm: 0.9429585263522962, iteration: 226896
loss: 0.9727844595909119,grad_norm: 0.7830626047875359, iteration: 226897
loss: 0.9874802231788635,grad_norm: 0.9397450040849684, iteration: 226898
loss: 0.9819190502166748,grad_norm: 0.9999990821350595, iteration: 226899
loss: 1.0174038410186768,grad_norm: 0.9050209057545736, iteration: 226900
loss: 1.0004664659500122,grad_norm: 0.9999990226145075, iteration: 226901
loss: 0.989695131778717,grad_norm: 0.8177419076701703, iteration: 226902
loss: 1.0222140550613403,grad_norm: 0.9378367656531723, iteration: 226903
loss: 0.9832956790924072,grad_norm: 0.8740210275574053, iteration: 226904
loss: 1.0313316583633423,grad_norm: 0.9175720046831036, iteration: 226905
loss: 1.0255616903305054,grad_norm: 0.9999990101253142, iteration: 226906
loss: 1.0273319482803345,grad_norm: 0.9894444457367736, iteration: 226907
loss: 0.9824865460395813,grad_norm: 0.6831058955516205, iteration: 226908
loss: 0.9699504971504211,grad_norm: 0.8295093323154996, iteration: 226909
loss: 0.9899908900260925,grad_norm: 0.937201170049337, iteration: 226910
loss: 0.9831835627555847,grad_norm: 0.9581076304766833, iteration: 226911
loss: 0.9882369637489319,grad_norm: 0.9461866001756215, iteration: 226912
loss: 1.0278053283691406,grad_norm: 0.9999991686337055, iteration: 226913
loss: 1.0234562158584595,grad_norm: 0.8969493235722398, iteration: 226914
loss: 0.9869152307510376,grad_norm: 0.999999072357269, iteration: 226915
loss: 0.9754351377487183,grad_norm: 0.9999989811377508, iteration: 226916
loss: 1.0026236772537231,grad_norm: 0.9208552076066852, iteration: 226917
loss: 0.9999668598175049,grad_norm: 0.9794168786922011, iteration: 226918
loss: 1.0051825046539307,grad_norm: 0.7837749714703197, iteration: 226919
loss: 1.0025442838668823,grad_norm: 0.989511507202534, iteration: 226920
loss: 1.0118478536605835,grad_norm: 0.9886073212135777, iteration: 226921
loss: 0.9904947876930237,grad_norm: 0.9999991685324515, iteration: 226922
loss: 1.0046876668930054,grad_norm: 0.9999991205082079, iteration: 226923
loss: 1.032996654510498,grad_norm: 0.9999991582564712, iteration: 226924
loss: 0.9928109049797058,grad_norm: 0.8372957264043772, iteration: 226925
loss: 1.005664348602295,grad_norm: 0.9999990717649522, iteration: 226926
loss: 0.989831805229187,grad_norm: 0.8424515640535108, iteration: 226927
loss: 0.9473633170127869,grad_norm: 0.8696391393818282, iteration: 226928
loss: 1.0492761135101318,grad_norm: 0.8130301461810937, iteration: 226929
loss: 0.9818698167800903,grad_norm: 0.9728483979168144, iteration: 226930
loss: 1.0194355249404907,grad_norm: 0.9928593329271457, iteration: 226931
loss: 1.0010157823562622,grad_norm: 0.9999990861151178, iteration: 226932
loss: 0.9833330512046814,grad_norm: 0.9244809222048721, iteration: 226933
loss: 1.0083564519882202,grad_norm: 0.9684605415282598, iteration: 226934
loss: 1.02620530128479,grad_norm: 0.948016235450004, iteration: 226935
loss: 0.9979221224784851,grad_norm: 0.9046265102226428, iteration: 226936
loss: 0.9612643718719482,grad_norm: 0.859831445658433, iteration: 226937
loss: 1.02580726146698,grad_norm: 0.999999116618481, iteration: 226938
loss: 0.9681031703948975,grad_norm: 0.8041248525370869, iteration: 226939
loss: 0.9799860119819641,grad_norm: 0.8759372317355821, iteration: 226940
loss: 0.9953840374946594,grad_norm: 0.8309464581194521, iteration: 226941
loss: 1.0036894083023071,grad_norm: 0.9999991283110298, iteration: 226942
loss: 0.9777553081512451,grad_norm: 0.7993311453249455, iteration: 226943
loss: 1.0906081199645996,grad_norm: 0.999999081129917, iteration: 226944
loss: 0.9483520984649658,grad_norm: 0.9839905559102892, iteration: 226945
loss: 1.0154409408569336,grad_norm: 0.733198171748633, iteration: 226946
loss: 0.9887969493865967,grad_norm: 0.9999989999501623, iteration: 226947
loss: 1.02647864818573,grad_norm: 0.9999988889666251, iteration: 226948
loss: 0.9196314811706543,grad_norm: 0.9999989078893118, iteration: 226949
loss: 0.9929473996162415,grad_norm: 0.9999989675567749, iteration: 226950
loss: 0.9875079989433289,grad_norm: 0.9757526540062267, iteration: 226951
loss: 1.00368070602417,grad_norm: 0.938094065180266, iteration: 226952
loss: 0.9915086627006531,grad_norm: 0.7826145741683319, iteration: 226953
loss: 1.0210649967193604,grad_norm: 0.9586326968080653, iteration: 226954
loss: 1.0012387037277222,grad_norm: 0.9354112386530069, iteration: 226955
loss: 1.0321241617202759,grad_norm: 0.9784911389444322, iteration: 226956
loss: 1.0035337209701538,grad_norm: 0.9754814157637368, iteration: 226957
loss: 1.0078914165496826,grad_norm: 0.9999990499803276, iteration: 226958
loss: 1.0023715496063232,grad_norm: 0.9999990785973101, iteration: 226959
loss: 0.986020565032959,grad_norm: 0.8496557776550481, iteration: 226960
loss: 1.0025957822799683,grad_norm: 0.7846803910246147, iteration: 226961
loss: 1.002605676651001,grad_norm: 0.999999058524602, iteration: 226962
loss: 1.0034693479537964,grad_norm: 0.8919408995038683, iteration: 226963
loss: 1.02977454662323,grad_norm: 0.9671144600813074, iteration: 226964
loss: 0.98813396692276,grad_norm: 0.9147002721838711, iteration: 226965
loss: 0.9854820370674133,grad_norm: 0.9889063978571917, iteration: 226966
loss: 1.0214895009994507,grad_norm: 0.8311881314677975, iteration: 226967
loss: 0.9963513016700745,grad_norm: 0.8819280696958302, iteration: 226968
loss: 0.9892253279685974,grad_norm: 0.9895176662546487, iteration: 226969
loss: 1.0345072746276855,grad_norm: 0.9076196720365339, iteration: 226970
loss: 1.023158311843872,grad_norm: 0.9011083458560476, iteration: 226971
loss: 0.9778868556022644,grad_norm: 0.6872047249774266, iteration: 226972
loss: 1.0137288570404053,grad_norm: 0.8596971718153207, iteration: 226973
loss: 1.0151405334472656,grad_norm: 0.7963490720473343, iteration: 226974
loss: 1.0117053985595703,grad_norm: 0.9197382840656596, iteration: 226975
loss: 0.9912958741188049,grad_norm: 0.9488027560618287, iteration: 226976
loss: 1.0368865728378296,grad_norm: 0.885159045912418, iteration: 226977
loss: 0.9341349005699158,grad_norm: 0.9999990388414964, iteration: 226978
loss: 1.0269924402236938,grad_norm: 0.9999995355383989, iteration: 226979
loss: 0.9854145050048828,grad_norm: 0.9999991138033875, iteration: 226980
loss: 1.0024585723876953,grad_norm: 0.9014718444419428, iteration: 226981
loss: 0.9818772673606873,grad_norm: 0.9999989097895836, iteration: 226982
loss: 0.9951265454292297,grad_norm: 0.9874191133128726, iteration: 226983
loss: 0.9854600429534912,grad_norm: 0.9734158533505161, iteration: 226984
loss: 0.9421473145484924,grad_norm: 0.9999990992312497, iteration: 226985
loss: 1.012117862701416,grad_norm: 0.9241121893158837, iteration: 226986
loss: 0.9823266863822937,grad_norm: 0.9999991211854378, iteration: 226987
loss: 0.9789305925369263,grad_norm: 0.9858517801360726, iteration: 226988
loss: 1.0484682321548462,grad_norm: 0.9999997569082033, iteration: 226989
loss: 1.0083836317062378,grad_norm: 0.99026527254779, iteration: 226990
loss: 1.0387977361679077,grad_norm: 0.9755785711968435, iteration: 226991
loss: 0.9917675852775574,grad_norm: 0.984586565524741, iteration: 226992
loss: 0.9882642030715942,grad_norm: 0.9622985414184726, iteration: 226993
loss: 1.0079878568649292,grad_norm: 0.9999990322020218, iteration: 226994
loss: 1.0108686685562134,grad_norm: 0.9498630980205028, iteration: 226995
loss: 1.0210964679718018,grad_norm: 0.9885596342283353, iteration: 226996
loss: 1.0205382108688354,grad_norm: 0.96578420188162, iteration: 226997
loss: 0.9874359369277954,grad_norm: 0.9999990980151818, iteration: 226998
loss: 0.9538604021072388,grad_norm: 0.9430400023105776, iteration: 226999
loss: 1.0102499723434448,grad_norm: 0.8824792620844341, iteration: 227000
loss: 0.9985021352767944,grad_norm: 0.9999990543567302, iteration: 227001
loss: 1.0284991264343262,grad_norm: 0.9371279732642109, iteration: 227002
loss: 0.9878555536270142,grad_norm: 0.9999992825046912, iteration: 227003
loss: 1.0049090385437012,grad_norm: 0.8104568555142545, iteration: 227004
loss: 1.0311092138290405,grad_norm: 0.884717847600422, iteration: 227005
loss: 0.9871860146522522,grad_norm: 0.818178346550523, iteration: 227006
loss: 1.0154390335083008,grad_norm: 0.9315069342712697, iteration: 227007
loss: 1.0131338834762573,grad_norm: 0.8940657434839387, iteration: 227008
loss: 0.9680557250976562,grad_norm: 0.8853436546018887, iteration: 227009
loss: 1.0182757377624512,grad_norm: 0.9999990675607108, iteration: 227010
loss: 0.9841597080230713,grad_norm: 0.8913265454688334, iteration: 227011
loss: 1.0202114582061768,grad_norm: 0.9999992477801568, iteration: 227012
loss: 0.9559818506240845,grad_norm: 0.8935827590666089, iteration: 227013
loss: 0.9763047099113464,grad_norm: 0.9003693443620294, iteration: 227014
loss: 1.0644748210906982,grad_norm: 0.9999999627857998, iteration: 227015
loss: 1.0264787673950195,grad_norm: 0.9227508446530729, iteration: 227016
loss: 1.011629581451416,grad_norm: 0.8857700240809248, iteration: 227017
loss: 1.0104236602783203,grad_norm: 0.9999992229615254, iteration: 227018
loss: 0.9800031781196594,grad_norm: 0.8393727894363762, iteration: 227019
loss: 1.0011578798294067,grad_norm: 0.8939598942299698, iteration: 227020
loss: 1.0179643630981445,grad_norm: 0.8865441670068793, iteration: 227021
loss: 0.9743696451187134,grad_norm: 0.7437424380761596, iteration: 227022
loss: 0.9720814824104309,grad_norm: 0.8672943252406364, iteration: 227023
loss: 1.0222152471542358,grad_norm: 0.9461596937307866, iteration: 227024
loss: 1.0800607204437256,grad_norm: 0.9999999129863377, iteration: 227025
loss: 1.037563443183899,grad_norm: 0.9999991880551851, iteration: 227026
loss: 0.9867997765541077,grad_norm: 0.9999992127055529, iteration: 227027
loss: 1.014309287071228,grad_norm: 0.7880492299724219, iteration: 227028
loss: 0.9825528860092163,grad_norm: 0.9999991046078158, iteration: 227029
loss: 1.001615285873413,grad_norm: 0.9829429208215532, iteration: 227030
loss: 1.0004512071609497,grad_norm: 0.9428735966779923, iteration: 227031
loss: 1.0113260746002197,grad_norm: 0.9999989631407639, iteration: 227032
loss: 0.9853512048721313,grad_norm: 0.977565184181342, iteration: 227033
loss: 1.038792371749878,grad_norm: 0.9981903298317859, iteration: 227034
loss: 0.9792139530181885,grad_norm: 0.9999991543832458, iteration: 227035
loss: 0.9812843203544617,grad_norm: 0.9999990244855912, iteration: 227036
loss: 0.9654700756072998,grad_norm: 0.9557799204885244, iteration: 227037
loss: 1.1529737710952759,grad_norm: 0.9999998198754674, iteration: 227038
loss: 1.012081503868103,grad_norm: 0.976428147114721, iteration: 227039
loss: 1.0144352912902832,grad_norm: 0.9999991611266402, iteration: 227040
loss: 0.9576150178909302,grad_norm: 0.999999100453626, iteration: 227041
loss: 0.9970027804374695,grad_norm: 0.9590572232719152, iteration: 227042
loss: 0.9585154056549072,grad_norm: 0.9101125894893021, iteration: 227043
loss: 0.9942023158073425,grad_norm: 0.8511893085867885, iteration: 227044
loss: 1.0282909870147705,grad_norm: 0.9436417265675171, iteration: 227045
loss: 1.2456979751586914,grad_norm: 0.9999995583974065, iteration: 227046
loss: 0.9995033144950867,grad_norm: 0.7622966578801212, iteration: 227047
loss: 1.0057777166366577,grad_norm: 0.9999992158323882, iteration: 227048
loss: 1.0194894075393677,grad_norm: 0.8367710672047621, iteration: 227049
loss: 0.9361104369163513,grad_norm: 0.9487486178951358, iteration: 227050
loss: 0.9807563424110413,grad_norm: 0.9349142125132632, iteration: 227051
loss: 1.0120540857315063,grad_norm: 0.8236947194504244, iteration: 227052
loss: 1.00713312625885,grad_norm: 0.946418324668007, iteration: 227053
loss: 0.985744833946228,grad_norm: 0.8716688037852991, iteration: 227054
loss: 1.0094658136367798,grad_norm: 0.9896944092633452, iteration: 227055
loss: 1.0076470375061035,grad_norm: 0.9999991219923807, iteration: 227056
loss: 1.0093058347702026,grad_norm: 0.7799225785783497, iteration: 227057
loss: 1.0379528999328613,grad_norm: 0.9993772177470104, iteration: 227058
loss: 1.0945334434509277,grad_norm: 0.9999997704590675, iteration: 227059
loss: 1.100734829902649,grad_norm: 0.9999997094322862, iteration: 227060
loss: 1.0061421394348145,grad_norm: 0.8983072450038294, iteration: 227061
loss: 1.0001916885375977,grad_norm: 0.8462791696363469, iteration: 227062
loss: 1.0295192003250122,grad_norm: 0.9868516656110796, iteration: 227063
loss: 0.9955393075942993,grad_norm: 0.9902743028105763, iteration: 227064
loss: 0.9887503385543823,grad_norm: 0.9450894227414591, iteration: 227065
loss: 0.9664058685302734,grad_norm: 0.9513941521458216, iteration: 227066
loss: 0.9781298637390137,grad_norm: 0.869093039636193, iteration: 227067
loss: 0.9655362963676453,grad_norm: 0.9178664670997664, iteration: 227068
loss: 0.9891818761825562,grad_norm: 0.934057852243571, iteration: 227069
loss: 0.9695892930030823,grad_norm: 0.7835559785580251, iteration: 227070
loss: 1.0075693130493164,grad_norm: 0.8780122341889878, iteration: 227071
loss: 0.9928919672966003,grad_norm: 0.9999995350870619, iteration: 227072
loss: 1.04641592502594,grad_norm: 0.9748739173318043, iteration: 227073
loss: 0.9850568175315857,grad_norm: 0.9796086051596419, iteration: 227074
loss: 0.9918289184570312,grad_norm: 0.9999988739228381, iteration: 227075
loss: 1.016990303993225,grad_norm: 0.9999991709372976, iteration: 227076
loss: 1.0142827033996582,grad_norm: 0.8962209449368785, iteration: 227077
loss: 1.020641803741455,grad_norm: 0.9999991368183883, iteration: 227078
loss: 1.0344258546829224,grad_norm: 0.8012914509559238, iteration: 227079
loss: 1.0309032201766968,grad_norm: 0.9999991067811557, iteration: 227080
loss: 0.9779765605926514,grad_norm: 0.9660228301242473, iteration: 227081
loss: 0.9954230189323425,grad_norm: 0.9470364157527881, iteration: 227082
loss: 1.0280064344406128,grad_norm: 0.9579888360048006, iteration: 227083
loss: 1.0310766696929932,grad_norm: 0.8498555219080886, iteration: 227084
loss: 0.9615001678466797,grad_norm: 0.9605927666153778, iteration: 227085
loss: 1.013085126876831,grad_norm: 0.9999991966435032, iteration: 227086
loss: 0.9922332763671875,grad_norm: 0.8571552902713261, iteration: 227087
loss: 1.0188084840774536,grad_norm: 0.7876076988907257, iteration: 227088
loss: 0.9892600774765015,grad_norm: 0.9999991464107718, iteration: 227089
loss: 0.9652698040008545,grad_norm: 0.9580985559833722, iteration: 227090
loss: 0.995425283908844,grad_norm: 0.7708357788113381, iteration: 227091
loss: 1.0109635591506958,grad_norm: 0.999999121655346, iteration: 227092
loss: 1.0055032968521118,grad_norm: 0.8313170309034673, iteration: 227093
loss: 1.0362077951431274,grad_norm: 0.9999992267862347, iteration: 227094
loss: 0.9936308264732361,grad_norm: 0.9999989472320121, iteration: 227095
loss: 1.003142237663269,grad_norm: 0.8227399986387404, iteration: 227096
loss: 1.0331188440322876,grad_norm: 0.8039703517874867, iteration: 227097
loss: 1.0069503784179688,grad_norm: 0.808294533952472, iteration: 227098
loss: 1.0081260204315186,grad_norm: 0.9401487522107753, iteration: 227099
loss: 1.0113060474395752,grad_norm: 0.916662117100724, iteration: 227100
loss: 0.9597527384757996,grad_norm: 0.9999991028676671, iteration: 227101
loss: 0.9287553429603577,grad_norm: 0.9935521551152987, iteration: 227102
loss: 1.0069283246994019,grad_norm: 0.9022529285225561, iteration: 227103
loss: 0.9989457130432129,grad_norm: 0.6705976466896769, iteration: 227104
loss: 1.0188641548156738,grad_norm: 0.9304366830239568, iteration: 227105
loss: 1.0297479629516602,grad_norm: 0.9216982993366253, iteration: 227106
loss: 0.9844923615455627,grad_norm: 0.9999991954046499, iteration: 227107
loss: 0.9675924181938171,grad_norm: 0.8559557653334208, iteration: 227108
loss: 0.9863170385360718,grad_norm: 0.9364920055690991, iteration: 227109
loss: 0.9855830669403076,grad_norm: 0.9999989959112633, iteration: 227110
loss: 0.995280385017395,grad_norm: 0.8705166936699775, iteration: 227111
loss: 0.9865552186965942,grad_norm: 0.8520003630046977, iteration: 227112
loss: 1.009467363357544,grad_norm: 0.7646376215297912, iteration: 227113
loss: 1.008960485458374,grad_norm: 0.9134591865006795, iteration: 227114
loss: 1.040345311164856,grad_norm: 0.9999992051329906, iteration: 227115
loss: 1.0136842727661133,grad_norm: 0.9999998560325828, iteration: 227116
loss: 1.012548565864563,grad_norm: 0.8808271727937342, iteration: 227117
loss: 0.9738059043884277,grad_norm: 0.9999991694309743, iteration: 227118
loss: 0.9746102690696716,grad_norm: 0.8207940584963601, iteration: 227119
loss: 1.0084201097488403,grad_norm: 0.9315975061200598, iteration: 227120
loss: 1.0200843811035156,grad_norm: 0.806017202733655, iteration: 227121
loss: 1.0200655460357666,grad_norm: 0.8485220079365233, iteration: 227122
loss: 0.9762160181999207,grad_norm: 0.9343954753750375, iteration: 227123
loss: 0.9767588376998901,grad_norm: 0.9681494001071265, iteration: 227124
loss: 0.9862058758735657,grad_norm: 0.906366585840584, iteration: 227125
loss: 0.9855762124061584,grad_norm: 0.9513989358962752, iteration: 227126
loss: 1.0105229616165161,grad_norm: 0.8905143099449843, iteration: 227127
loss: 1.0167726278305054,grad_norm: 0.8992479765983619, iteration: 227128
loss: 0.9757068157196045,grad_norm: 0.7923969935749959, iteration: 227129
loss: 0.9837892055511475,grad_norm: 0.7230178488728622, iteration: 227130
loss: 1.0377347469329834,grad_norm: 0.999999659765158, iteration: 227131
loss: 1.0053118467330933,grad_norm: 0.9162371405884612, iteration: 227132
loss: 0.9910104870796204,grad_norm: 0.9206163896271684, iteration: 227133
loss: 1.0404798984527588,grad_norm: 0.9999990443894091, iteration: 227134
loss: 0.9895123243331909,grad_norm: 0.9550299271488246, iteration: 227135
loss: 0.9561383128166199,grad_norm: 0.8918582074298814, iteration: 227136
loss: 0.9834350347518921,grad_norm: 0.909360772616418, iteration: 227137
loss: 1.095070481300354,grad_norm: 0.9999992621339415, iteration: 227138
loss: 0.9646385312080383,grad_norm: 0.8991987772621506, iteration: 227139
loss: 1.0693511962890625,grad_norm: 0.9138191472560374, iteration: 227140
loss: 1.029651403427124,grad_norm: 0.8522579706049932, iteration: 227141
loss: 0.9611757397651672,grad_norm: 0.9255521927153308, iteration: 227142
loss: 0.992963969707489,grad_norm: 0.974053598063696, iteration: 227143
loss: 0.981614351272583,grad_norm: 0.9125433322328916, iteration: 227144
loss: 0.9580180048942566,grad_norm: 0.9999990734256119, iteration: 227145
loss: 0.9819672703742981,grad_norm: 0.9025910994679629, iteration: 227146
loss: 0.9865812659263611,grad_norm: 0.9999991981775161, iteration: 227147
loss: 0.9924934506416321,grad_norm: 0.8132522898712267, iteration: 227148
loss: 1.0079517364501953,grad_norm: 0.9068451233222462, iteration: 227149
loss: 1.1322259902954102,grad_norm: 0.9999997994978211, iteration: 227150
loss: 1.0134456157684326,grad_norm: 0.9060459749750917, iteration: 227151
loss: 0.947690486907959,grad_norm: 0.9257063960123401, iteration: 227152
loss: 0.9975452423095703,grad_norm: 0.9845565957231756, iteration: 227153
loss: 1.0391323566436768,grad_norm: 0.8765839546969196, iteration: 227154
loss: 1.0062607526779175,grad_norm: 0.9999991961596207, iteration: 227155
loss: 0.986356794834137,grad_norm: 0.8407251545233185, iteration: 227156
loss: 1.0226752758026123,grad_norm: 0.8991355835609872, iteration: 227157
loss: 0.9856159090995789,grad_norm: 0.9515343591339961, iteration: 227158
loss: 0.9848836660385132,grad_norm: 0.9608645222335505, iteration: 227159
loss: 0.9826124310493469,grad_norm: 0.948942380272228, iteration: 227160
loss: 1.013020634651184,grad_norm: 0.960864920059297, iteration: 227161
loss: 0.9904128909111023,grad_norm: 0.9335933184116377, iteration: 227162
loss: 1.019660472869873,grad_norm: 0.999999131182373, iteration: 227163
loss: 0.9712260961532593,grad_norm: 0.777583367497632, iteration: 227164
loss: 0.9465938806533813,grad_norm: 0.8875698228538537, iteration: 227165
loss: 0.9825888872146606,grad_norm: 0.9847373417060235, iteration: 227166
loss: 0.9855286478996277,grad_norm: 0.9319853239328616, iteration: 227167
loss: 1.0245945453643799,grad_norm: 0.9323146574030402, iteration: 227168
loss: 0.991615355014801,grad_norm: 0.8988372597013118, iteration: 227169
loss: 1.0149682760238647,grad_norm: 0.9010775668969581, iteration: 227170
loss: 0.9866950511932373,grad_norm: 0.9999990456124921, iteration: 227171
loss: 1.0348860025405884,grad_norm: 0.9749351598451185, iteration: 227172
loss: 1.0325117111206055,grad_norm: 0.9999992003853093, iteration: 227173
loss: 0.9329054355621338,grad_norm: 0.9999992105243987, iteration: 227174
loss: 1.0366666316986084,grad_norm: 0.7969064400047625, iteration: 227175
loss: 1.015608787536621,grad_norm: 0.9335410417542744, iteration: 227176
loss: 0.9902467727661133,grad_norm: 0.9999996137332094, iteration: 227177
loss: 1.0326188802719116,grad_norm: 0.9753657697762708, iteration: 227178
loss: 1.0714280605316162,grad_norm: 0.9999997761842124, iteration: 227179
loss: 1.022603988647461,grad_norm: 0.9999991898866705, iteration: 227180
loss: 1.0112476348876953,grad_norm: 0.9999992572384038, iteration: 227181
loss: 1.0128172636032104,grad_norm: 0.9999990819306218, iteration: 227182
loss: 0.9839534163475037,grad_norm: 0.9162518622051876, iteration: 227183
loss: 1.0448626279830933,grad_norm: 0.9999991798697394, iteration: 227184
loss: 0.9942176938056946,grad_norm: 0.9327257623053349, iteration: 227185
loss: 1.0017812252044678,grad_norm: 0.9210017432187798, iteration: 227186
loss: 1.0207490921020508,grad_norm: 0.8933214436194221, iteration: 227187
loss: 1.00773286819458,grad_norm: 0.9999992953277482, iteration: 227188
loss: 1.1921067237854004,grad_norm: 0.999999129659866, iteration: 227189
loss: 0.9572591185569763,grad_norm: 0.8532909156140286, iteration: 227190
loss: 1.0302947759628296,grad_norm: 0.9535211759429405, iteration: 227191
loss: 0.9832829236984253,grad_norm: 0.9671681767476737, iteration: 227192
loss: 0.9987213611602783,grad_norm: 0.8311705372097284, iteration: 227193
loss: 0.9881452322006226,grad_norm: 0.9625449964513643, iteration: 227194
loss: 1.0145506858825684,grad_norm: 0.9999994663142212, iteration: 227195
loss: 1.0129915475845337,grad_norm: 0.9757891181055607, iteration: 227196
loss: 1.0091078281402588,grad_norm: 0.9454998506421615, iteration: 227197
loss: 0.9954861998558044,grad_norm: 0.9404202186308906, iteration: 227198
loss: 0.9537380933761597,grad_norm: 0.999999149198257, iteration: 227199
loss: 0.9965577125549316,grad_norm: 0.9309659175497609, iteration: 227200
loss: 1.0042164325714111,grad_norm: 0.9999989426840195, iteration: 227201
loss: 0.9881080985069275,grad_norm: 0.9999991004371154, iteration: 227202
loss: 1.0048941373825073,grad_norm: 0.9069318967689546, iteration: 227203
loss: 0.9746217131614685,grad_norm: 0.9216265085053731, iteration: 227204
loss: 1.009492039680481,grad_norm: 0.9999989193074873, iteration: 227205
loss: 1.0484156608581543,grad_norm: 0.999999162438971, iteration: 227206
loss: 1.0224332809448242,grad_norm: 0.9999989982128321, iteration: 227207
loss: 0.9916777610778809,grad_norm: 0.9999990309421977, iteration: 227208
loss: 1.0621232986450195,grad_norm: 0.8510728247722709, iteration: 227209
loss: 0.9995869994163513,grad_norm: 0.8190338285889325, iteration: 227210
loss: 1.0044528245925903,grad_norm: 0.9999991422656516, iteration: 227211
loss: 1.0519859790802002,grad_norm: 0.9999992287349436, iteration: 227212
loss: 1.0016199350357056,grad_norm: 0.9896428782451159, iteration: 227213
loss: 1.0333423614501953,grad_norm: 0.9869928312110594, iteration: 227214
loss: 0.9659907817840576,grad_norm: 0.9830867550763398, iteration: 227215
loss: 0.9742587208747864,grad_norm: 0.834226368291601, iteration: 227216
loss: 1.0217431783676147,grad_norm: 0.9999996337804461, iteration: 227217
loss: 0.9919281005859375,grad_norm: 0.8967176532908616, iteration: 227218
loss: 0.9837932586669922,grad_norm: 0.9999991362703615, iteration: 227219
loss: 0.9935007691383362,grad_norm: 0.9187597148844198, iteration: 227220
loss: 1.0200549364089966,grad_norm: 0.9547163279272443, iteration: 227221
loss: 1.0362310409545898,grad_norm: 0.9999992239959111, iteration: 227222
loss: 0.9886114001274109,grad_norm: 0.9999990393931019, iteration: 227223
loss: 1.0162663459777832,grad_norm: 0.8194322895927174, iteration: 227224
loss: 0.9960628151893616,grad_norm: 0.8842332069919582, iteration: 227225
loss: 0.9606518745422363,grad_norm: 0.8976527426047487, iteration: 227226
loss: 1.0143417119979858,grad_norm: 0.8557234623142891, iteration: 227227
loss: 0.9720780253410339,grad_norm: 0.9999990924571572, iteration: 227228
loss: 1.0092195272445679,grad_norm: 0.999998948698967, iteration: 227229
loss: 1.0234297513961792,grad_norm: 0.999999131435796, iteration: 227230
loss: 1.0158535242080688,grad_norm: 0.9999995927457873, iteration: 227231
loss: 1.035262107849121,grad_norm: 0.999999456406176, iteration: 227232
loss: 1.0449116230010986,grad_norm: 0.9999990846876651, iteration: 227233
loss: 0.9598929286003113,grad_norm: 0.8155601985075274, iteration: 227234
loss: 0.9666257500648499,grad_norm: 0.9999989166010645, iteration: 227235
loss: 1.0595486164093018,grad_norm: 0.9999996734145222, iteration: 227236
loss: 1.0208781957626343,grad_norm: 0.9463091015671062, iteration: 227237
loss: 1.0253199338912964,grad_norm: 0.9999991497983187, iteration: 227238
loss: 1.018019437789917,grad_norm: 0.8050840138514651, iteration: 227239
loss: 1.0431859493255615,grad_norm: 0.9269505091595649, iteration: 227240
loss: 0.9876871705055237,grad_norm: 0.9999991910005844, iteration: 227241
loss: 0.9901318550109863,grad_norm: 0.9999991107892269, iteration: 227242
loss: 1.0136774778366089,grad_norm: 0.9999991701721453, iteration: 227243
loss: 0.9908856153488159,grad_norm: 0.9999990722555345, iteration: 227244
loss: 1.055922508239746,grad_norm: 0.9999991920914205, iteration: 227245
loss: 1.0311366319656372,grad_norm: 0.9999994262970759, iteration: 227246
loss: 1.014948844909668,grad_norm: 0.9999999525850178, iteration: 227247
loss: 1.0124863386154175,grad_norm: 0.9814150110100833, iteration: 227248
loss: 1.00246000289917,grad_norm: 0.9999990384153717, iteration: 227249
loss: 1.002124547958374,grad_norm: 0.9832064100963429, iteration: 227250
loss: 1.020976185798645,grad_norm: 0.9583526546406049, iteration: 227251
loss: 0.9711570143699646,grad_norm: 0.974044689985138, iteration: 227252
loss: 1.028759479522705,grad_norm: 0.9999991491022012, iteration: 227253
loss: 0.9964367747306824,grad_norm: 0.9999991101193143, iteration: 227254
loss: 1.0175803899765015,grad_norm: 0.8758941727983229, iteration: 227255
loss: 1.0042301416397095,grad_norm: 0.9999994657648076, iteration: 227256
loss: 1.009294867515564,grad_norm: 0.9999991960237145, iteration: 227257
loss: 1.0539021492004395,grad_norm: 0.9999998953212731, iteration: 227258
loss: 1.0298489332199097,grad_norm: 0.9415001597122769, iteration: 227259
loss: 0.9897210597991943,grad_norm: 0.8742582448907112, iteration: 227260
loss: 0.9814372658729553,grad_norm: 0.9525420189080311, iteration: 227261
loss: 0.9645085334777832,grad_norm: 0.9738987016644407, iteration: 227262
loss: 1.0032633543014526,grad_norm: 0.8394156590035549, iteration: 227263
loss: 0.9912721514701843,grad_norm: 0.999999073073292, iteration: 227264
loss: 1.0804569721221924,grad_norm: 0.9999991773944766, iteration: 227265
loss: 1.0066306591033936,grad_norm: 0.9118435416386252, iteration: 227266
loss: 0.9790960550308228,grad_norm: 0.8869615916985479, iteration: 227267
loss: 0.9851276874542236,grad_norm: 0.8695965899819549, iteration: 227268
loss: 1.015971302986145,grad_norm: 0.999999209783415, iteration: 227269
loss: 0.9530929923057556,grad_norm: 0.9624079044864752, iteration: 227270
loss: 0.9781866669654846,grad_norm: 0.9663903948342961, iteration: 227271
loss: 1.0016077756881714,grad_norm: 0.8921396908641324, iteration: 227272
loss: 1.0066509246826172,grad_norm: 0.846788901271117, iteration: 227273
loss: 0.9829232096672058,grad_norm: 0.874941150542081, iteration: 227274
loss: 1.0014005899429321,grad_norm: 0.8886798243446299, iteration: 227275
loss: 0.9873085618019104,grad_norm: 0.9238374747544791, iteration: 227276
loss: 0.9766097068786621,grad_norm: 0.8996486879414533, iteration: 227277
loss: 1.0082371234893799,grad_norm: 0.9154911358921698, iteration: 227278
loss: 1.0046778917312622,grad_norm: 0.9999995396800279, iteration: 227279
loss: 1.1236169338226318,grad_norm: 0.9999991835015966, iteration: 227280
loss: 1.0104435682296753,grad_norm: 0.9999990458765022, iteration: 227281
loss: 1.006256103515625,grad_norm: 0.9705209565555778, iteration: 227282
loss: 1.0137357711791992,grad_norm: 0.9999991261100312, iteration: 227283
loss: 0.9615499377250671,grad_norm: 0.930302214769205, iteration: 227284
loss: 1.0142096281051636,grad_norm: 0.9999993501412687, iteration: 227285
loss: 1.0232008695602417,grad_norm: 0.840641937188939, iteration: 227286
loss: 1.1000243425369263,grad_norm: 0.9999993263544728, iteration: 227287
loss: 0.9876584410667419,grad_norm: 0.9999994081933273, iteration: 227288
loss: 1.022266149520874,grad_norm: 0.9999992175001222, iteration: 227289
loss: 0.9928133487701416,grad_norm: 0.9999990268456269, iteration: 227290
loss: 1.0042126178741455,grad_norm: 0.9454623239734469, iteration: 227291
loss: 0.9938628673553467,grad_norm: 0.9421154678884078, iteration: 227292
loss: 1.0141936540603638,grad_norm: 0.7186583855264579, iteration: 227293
loss: 1.0678013563156128,grad_norm: 0.9999993517024243, iteration: 227294
loss: 0.9806162714958191,grad_norm: 0.8772054677084947, iteration: 227295
loss: 1.0266649723052979,grad_norm: 0.7490138398709507, iteration: 227296
loss: 1.0888010263442993,grad_norm: 0.9999994715128291, iteration: 227297
loss: 1.0179314613342285,grad_norm: 0.9999996317811414, iteration: 227298
loss: 0.9760125875473022,grad_norm: 0.9213045341376893, iteration: 227299
loss: 0.9912057518959045,grad_norm: 0.9280084747682488, iteration: 227300
loss: 0.9895260334014893,grad_norm: 0.999999264931836, iteration: 227301
loss: 0.9841060042381287,grad_norm: 0.8606144114647852, iteration: 227302
loss: 1.0601133108139038,grad_norm: 0.8748078043535165, iteration: 227303
loss: 1.0384750366210938,grad_norm: 0.9436557567963101, iteration: 227304
loss: 1.0038812160491943,grad_norm: 0.9999992898338045, iteration: 227305
loss: 0.9890956878662109,grad_norm: 0.9999989748203845, iteration: 227306
loss: 0.9948227405548096,grad_norm: 0.9807423606066933, iteration: 227307
loss: 1.0463041067123413,grad_norm: 0.8720315648112672, iteration: 227308
loss: 1.0380091667175293,grad_norm: 0.9999997703407685, iteration: 227309
loss: 1.005441665649414,grad_norm: 0.9999991774474997, iteration: 227310
loss: 0.9801115393638611,grad_norm: 0.9999991734570791, iteration: 227311
loss: 1.0087604522705078,grad_norm: 0.9373906125825768, iteration: 227312
loss: 1.0157493352890015,grad_norm: 0.9999994517508457, iteration: 227313
loss: 1.032831072807312,grad_norm: 0.999999651093228, iteration: 227314
loss: 1.000328779220581,grad_norm: 0.8725924547502131, iteration: 227315
loss: 1.0225814580917358,grad_norm: 0.9999990185078735, iteration: 227316
loss: 0.9973658919334412,grad_norm: 0.9381468758635836, iteration: 227317
loss: 0.9796802401542664,grad_norm: 0.9802498350269203, iteration: 227318
loss: 1.0160419940948486,grad_norm: 0.814086556398027, iteration: 227319
loss: 1.014894723892212,grad_norm: 0.9999992788383697, iteration: 227320
loss: 0.9559717774391174,grad_norm: 0.9155403063575506, iteration: 227321
loss: 0.9826334118843079,grad_norm: 0.9489431810419824, iteration: 227322
loss: 0.9928333759307861,grad_norm: 0.9999992516989578, iteration: 227323
loss: 0.9703717827796936,grad_norm: 0.9999992387138487, iteration: 227324
loss: 1.0105311870574951,grad_norm: 0.8220322874180804, iteration: 227325
loss: 0.9579810500144958,grad_norm: 0.8708053130360696, iteration: 227326
loss: 1.012935996055603,grad_norm: 0.8686345435078212, iteration: 227327
loss: 1.0455522537231445,grad_norm: 0.7739717700040483, iteration: 227328
loss: 1.041335940361023,grad_norm: 0.9999989884959098, iteration: 227329
loss: 0.9715462327003479,grad_norm: 0.8914739891011847, iteration: 227330
loss: 1.0441882610321045,grad_norm: 0.9999997543180744, iteration: 227331
loss: 0.989310622215271,grad_norm: 0.9048320687713463, iteration: 227332
loss: 1.0204275846481323,grad_norm: 0.9671494466373102, iteration: 227333
loss: 1.0009093284606934,grad_norm: 0.9999993844216377, iteration: 227334
loss: 1.0038707256317139,grad_norm: 0.9999991881484079, iteration: 227335
loss: 0.9729481935501099,grad_norm: 0.7887117701074161, iteration: 227336
loss: 1.020889401435852,grad_norm: 0.8003715205920613, iteration: 227337
loss: 0.9716203808784485,grad_norm: 0.9472991057745164, iteration: 227338
loss: 1.0838022232055664,grad_norm: 0.999999063297186, iteration: 227339
loss: 0.986932098865509,grad_norm: 0.8558194365172157, iteration: 227340
loss: 1.0060604810714722,grad_norm: 0.9970326562147116, iteration: 227341
loss: 1.0084545612335205,grad_norm: 0.876819244596869, iteration: 227342
loss: 1.0014889240264893,grad_norm: 0.81962911098822, iteration: 227343
loss: 1.000481367111206,grad_norm: 0.9999992240055074, iteration: 227344
loss: 1.0208572149276733,grad_norm: 0.999999049909257, iteration: 227345
loss: 1.0286563634872437,grad_norm: 0.8072987178172916, iteration: 227346
loss: 0.9901425838470459,grad_norm: 0.910133545143514, iteration: 227347
loss: 1.0134977102279663,grad_norm: 0.8203449842629906, iteration: 227348
loss: 1.0222147703170776,grad_norm: 0.9713493611645933, iteration: 227349
loss: 0.9967056512832642,grad_norm: 0.9440159443306395, iteration: 227350
loss: 1.0307015180587769,grad_norm: 0.9550171558753844, iteration: 227351
loss: 0.9768400192260742,grad_norm: 0.9999992242296329, iteration: 227352
loss: 1.0175994634628296,grad_norm: 0.8310978541586882, iteration: 227353
loss: 1.0220578908920288,grad_norm: 0.9999990018125184, iteration: 227354
loss: 0.9848921298980713,grad_norm: 0.9473414661174501, iteration: 227355
loss: 1.1037226915359497,grad_norm: 0.9999999309075286, iteration: 227356
loss: 1.0070637464523315,grad_norm: 0.752916241073285, iteration: 227357
loss: 1.0209450721740723,grad_norm: 0.9999993005748194, iteration: 227358
loss: 1.0391769409179688,grad_norm: 0.9999996692167991, iteration: 227359
loss: 1.0041115283966064,grad_norm: 0.9614843767377195, iteration: 227360
loss: 1.0002386569976807,grad_norm: 0.905446367121096, iteration: 227361
loss: 0.9774952530860901,grad_norm: 0.9999991273191805, iteration: 227362
loss: 0.9820272326469421,grad_norm: 0.9999990474759456, iteration: 227363
loss: 1.002115249633789,grad_norm: 0.9999991984863436, iteration: 227364
loss: 1.010762333869934,grad_norm: 0.9999996257214743, iteration: 227365
loss: 1.0272035598754883,grad_norm: 0.9999992938612198, iteration: 227366
loss: 1.087721586227417,grad_norm: 0.9999999210828081, iteration: 227367
loss: 1.0706571340560913,grad_norm: 0.9999996561032779, iteration: 227368
loss: 0.9794940948486328,grad_norm: 0.8769964461021358, iteration: 227369
loss: 0.9875010848045349,grad_norm: 0.8122174246845505, iteration: 227370
loss: 0.9676859974861145,grad_norm: 0.8104877242357258, iteration: 227371
loss: 1.0154998302459717,grad_norm: 0.9828691557225318, iteration: 227372
loss: 0.9804731011390686,grad_norm: 0.9999991480550208, iteration: 227373
loss: 0.9566860795021057,grad_norm: 0.983381538639413, iteration: 227374
loss: 0.9897145628929138,grad_norm: 0.7901769084346663, iteration: 227375
loss: 0.9839664697647095,grad_norm: 0.9999990980713637, iteration: 227376
loss: 1.00466787815094,grad_norm: 0.8412702883448931, iteration: 227377
loss: 0.9633947610855103,grad_norm: 0.9999989772591311, iteration: 227378
loss: 0.9862817525863647,grad_norm: 0.9077175519806227, iteration: 227379
loss: 1.040029764175415,grad_norm: 0.9635274789172369, iteration: 227380
loss: 0.985022783279419,grad_norm: 0.9999991057456996, iteration: 227381
loss: 1.0036194324493408,grad_norm: 0.9999996629024255, iteration: 227382
loss: 1.0284836292266846,grad_norm: 0.9999990023079918, iteration: 227383
loss: 0.9917595982551575,grad_norm: 0.7618184897271367, iteration: 227384
loss: 1.0255318880081177,grad_norm: 0.9999999669948354, iteration: 227385
loss: 0.9951990842819214,grad_norm: 0.9999991308792737, iteration: 227386
loss: 0.9956554770469666,grad_norm: 0.9999996322771565, iteration: 227387
loss: 1.0155009031295776,grad_norm: 0.7325041946537304, iteration: 227388
loss: 1.0126378536224365,grad_norm: 0.7837132537336358, iteration: 227389
loss: 1.1976442337036133,grad_norm: 0.999999446530871, iteration: 227390
loss: 0.9829824566841125,grad_norm: 0.7982515579289177, iteration: 227391
loss: 1.0528144836425781,grad_norm: 0.999999082139584, iteration: 227392
loss: 0.9818269610404968,grad_norm: 0.999998982060788, iteration: 227393
loss: 0.9822810888290405,grad_norm: 0.9734670118067457, iteration: 227394
loss: 0.9790963530540466,grad_norm: 0.8924799133750286, iteration: 227395
loss: 0.9741469025611877,grad_norm: 0.9638161519945248, iteration: 227396
loss: 0.9828405380249023,grad_norm: 0.9973490138423949, iteration: 227397
loss: 1.0043483972549438,grad_norm: 0.9446985962926748, iteration: 227398
loss: 0.9984522461891174,grad_norm: 0.9911684563068819, iteration: 227399
loss: 1.0178732872009277,grad_norm: 0.9451042516660478, iteration: 227400
loss: 0.9757128953933716,grad_norm: 0.9631258588639269, iteration: 227401
loss: 1.0009039640426636,grad_norm: 0.8678591935027717, iteration: 227402
loss: 1.0739136934280396,grad_norm: 0.999999180905084, iteration: 227403
loss: 0.9870339035987854,grad_norm: 0.9999991690202595, iteration: 227404
loss: 0.9982511401176453,grad_norm: 0.8895316464899878, iteration: 227405
loss: 1.1120120286941528,grad_norm: 0.9999999789271824, iteration: 227406
loss: 0.9804975390434265,grad_norm: 0.7693266491103677, iteration: 227407
loss: 1.085868239402771,grad_norm: 0.9999994607758936, iteration: 227408
loss: 1.1118084192276,grad_norm: 0.999999864292693, iteration: 227409
loss: 1.003827691078186,grad_norm: 0.9999991396600223, iteration: 227410
loss: 1.0139830112457275,grad_norm: 0.9738425537588137, iteration: 227411
loss: 1.0070419311523438,grad_norm: 0.9482301334216876, iteration: 227412
loss: 1.0268019437789917,grad_norm: 0.9999996721152205, iteration: 227413
loss: 0.9851199388504028,grad_norm: 0.757502453333746, iteration: 227414
loss: 1.0398287773132324,grad_norm: 0.991397699608631, iteration: 227415
loss: 0.9938990473747253,grad_norm: 0.9333437916774349, iteration: 227416
loss: 1.03810453414917,grad_norm: 0.9999991589018894, iteration: 227417
loss: 0.948287308216095,grad_norm: 0.8855095035106598, iteration: 227418
loss: 1.0051294565200806,grad_norm: 0.9999989429465577, iteration: 227419
loss: 1.0112276077270508,grad_norm: 0.9945316574240268, iteration: 227420
loss: 0.9885103702545166,grad_norm: 0.8960711041912084, iteration: 227421
loss: 1.0553481578826904,grad_norm: 0.9999997962764954, iteration: 227422
loss: 0.9887343049049377,grad_norm: 0.9181344304452612, iteration: 227423
loss: 1.0793451070785522,grad_norm: 0.9050132963847484, iteration: 227424
loss: 1.023315668106079,grad_norm: 0.9999990218566973, iteration: 227425
loss: 0.9921422004699707,grad_norm: 0.9131972396451163, iteration: 227426
loss: 1.0583598613739014,grad_norm: 0.9999999866420666, iteration: 227427
loss: 0.9882981181144714,grad_norm: 0.9957002061573252, iteration: 227428
loss: 1.0031083822250366,grad_norm: 0.9999990543380687, iteration: 227429
loss: 0.9540728330612183,grad_norm: 0.9760638267531493, iteration: 227430
loss: 0.9995775818824768,grad_norm: 0.8430491497349173, iteration: 227431
loss: 1.0413892269134521,grad_norm: 0.9999991142516174, iteration: 227432
loss: 1.006795883178711,grad_norm: 0.7826859079546488, iteration: 227433
loss: 1.0069005489349365,grad_norm: 0.8496839777563748, iteration: 227434
loss: 1.0175405740737915,grad_norm: 0.9999997618079811, iteration: 227435
loss: 1.0272839069366455,grad_norm: 0.6713359114243916, iteration: 227436
loss: 0.9728075861930847,grad_norm: 0.8818924573624716, iteration: 227437
loss: 0.9754915237426758,grad_norm: 0.9999990918200251, iteration: 227438
loss: 1.008223295211792,grad_norm: 0.895442478740536, iteration: 227439
loss: 1.1418182849884033,grad_norm: 0.9999996960363724, iteration: 227440
loss: 0.9743567109107971,grad_norm: 0.9295654220862924, iteration: 227441
loss: 0.9819027781486511,grad_norm: 0.9999992953008617, iteration: 227442
loss: 0.9950692057609558,grad_norm: 0.8596975675380129, iteration: 227443
loss: 1.0581552982330322,grad_norm: 0.9362915856014489, iteration: 227444
loss: 0.9578523635864258,grad_norm: 0.9089720117605316, iteration: 227445
loss: 1.0177218914031982,grad_norm: 0.999999087117984, iteration: 227446
loss: 1.0144871473312378,grad_norm: 0.8631626999407055, iteration: 227447
loss: 0.9854721426963806,grad_norm: 0.9550469028201863, iteration: 227448
loss: 1.0120846033096313,grad_norm: 0.8964893361618238, iteration: 227449
loss: 1.126672625541687,grad_norm: 0.9999992316353827, iteration: 227450
loss: 1.0230562686920166,grad_norm: 0.9999999120413099, iteration: 227451
loss: 1.0887300968170166,grad_norm: 0.9999990603652553, iteration: 227452
loss: 1.0069596767425537,grad_norm: 0.9421729854831972, iteration: 227453
loss: 1.0178295373916626,grad_norm: 0.932546608429992, iteration: 227454
loss: 1.0131951570510864,grad_norm: 0.999999224933408, iteration: 227455
loss: 0.9956004023551941,grad_norm: 0.8922848743067535, iteration: 227456
loss: 1.14412260055542,grad_norm: 1.000000011550044, iteration: 227457
loss: 1.0092689990997314,grad_norm: 0.9665815340034408, iteration: 227458
loss: 0.9943342804908752,grad_norm: 0.8119225627174379, iteration: 227459
loss: 1.0131510496139526,grad_norm: 0.9999995048496098, iteration: 227460
loss: 0.9589122533798218,grad_norm: 0.9632612961521732, iteration: 227461
loss: 1.0048097372055054,grad_norm: 0.9633038368296378, iteration: 227462
loss: 1.000045895576477,grad_norm: 0.9887632879261953, iteration: 227463
loss: 0.9805045127868652,grad_norm: 0.9999992588631845, iteration: 227464
loss: 1.0375163555145264,grad_norm: 0.9560820941879663, iteration: 227465
loss: 0.9987971186637878,grad_norm: 0.9999990758091266, iteration: 227466
loss: 0.9852055907249451,grad_norm: 0.9464895610008925, iteration: 227467
loss: 1.0043351650238037,grad_norm: 0.991328239546996, iteration: 227468
loss: 0.980455756187439,grad_norm: 0.990588131377276, iteration: 227469
loss: 0.9597388505935669,grad_norm: 0.9117675164064857, iteration: 227470
loss: 1.1093478202819824,grad_norm: 0.9999993477370689, iteration: 227471
loss: 1.0093886852264404,grad_norm: 0.9836243739742787, iteration: 227472
loss: 0.9595015645027161,grad_norm: 0.8159501044643309, iteration: 227473
loss: 0.9913491010665894,grad_norm: 0.8587760330942225, iteration: 227474
loss: 0.9810757637023926,grad_norm: 0.9101198257225929, iteration: 227475
loss: 1.0342357158660889,grad_norm: 0.9999996724363689, iteration: 227476
loss: 0.9967007040977478,grad_norm: 0.8593937215883352, iteration: 227477
loss: 1.0114169120788574,grad_norm: 0.9999995072406662, iteration: 227478
loss: 1.0055924654006958,grad_norm: 0.9999995822522041, iteration: 227479
loss: 1.0241241455078125,grad_norm: 0.9999992397820867, iteration: 227480
loss: 1.0292372703552246,grad_norm: 0.999999779201826, iteration: 227481
loss: 0.9738979935646057,grad_norm: 0.9347931837530302, iteration: 227482
loss: 1.0039982795715332,grad_norm: 0.9999990668892956, iteration: 227483
loss: 1.0162712335586548,grad_norm: 0.9999990454748716, iteration: 227484
loss: 1.0139329433441162,grad_norm: 0.9999991034316842, iteration: 227485
loss: 1.053566813468933,grad_norm: 0.989285703672345, iteration: 227486
loss: 0.9705916047096252,grad_norm: 0.9999991782118992, iteration: 227487
loss: 1.0222656726837158,grad_norm: 0.7494618362393055, iteration: 227488
loss: 1.0149527788162231,grad_norm: 0.96904005634311, iteration: 227489
loss: 0.9885184168815613,grad_norm: 0.9891084331178932, iteration: 227490
loss: 0.9857630133628845,grad_norm: 0.8525234127349546, iteration: 227491
loss: 1.000861644744873,grad_norm: 0.8462951103009475, iteration: 227492
loss: 0.9910244941711426,grad_norm: 0.8127274746071538, iteration: 227493
loss: 1.042708396911621,grad_norm: 0.9723694385271319, iteration: 227494
loss: 1.0036592483520508,grad_norm: 0.9999990763384249, iteration: 227495
loss: 1.0022687911987305,grad_norm: 0.7937620801423377, iteration: 227496
loss: 1.008160948753357,grad_norm: 0.9297810102141105, iteration: 227497
loss: 0.9722657203674316,grad_norm: 0.9968310721674555, iteration: 227498
loss: 1.0091876983642578,grad_norm: 0.9805847957134607, iteration: 227499
loss: 1.0016347169876099,grad_norm: 0.9610461133241243, iteration: 227500
loss: 1.0103518962860107,grad_norm: 0.8031834302211985, iteration: 227501
loss: 1.0143097639083862,grad_norm: 0.9106699343026271, iteration: 227502
loss: 0.9750021696090698,grad_norm: 0.9999990772265, iteration: 227503
loss: 0.9926540851593018,grad_norm: 0.9999991006744036, iteration: 227504
loss: 0.9912375807762146,grad_norm: 0.9274674047864427, iteration: 227505
loss: 0.9880411624908447,grad_norm: 0.9999996799062344, iteration: 227506
loss: 0.9776845574378967,grad_norm: 0.9749460035699113, iteration: 227507
loss: 1.0282574892044067,grad_norm: 0.9261073918619559, iteration: 227508
loss: 0.98285311460495,grad_norm: 0.9999997578917497, iteration: 227509
loss: 0.9991706609725952,grad_norm: 0.9999990694165739, iteration: 227510
loss: 1.020098328590393,grad_norm: 0.8783387796419293, iteration: 227511
loss: 0.9971888065338135,grad_norm: 0.9999992211282067, iteration: 227512
loss: 0.998966634273529,grad_norm: 0.7906265383548582, iteration: 227513
loss: 1.0102653503417969,grad_norm: 0.9999994549942596, iteration: 227514
loss: 1.0113019943237305,grad_norm: 0.7734761361024404, iteration: 227515
loss: 0.9881203770637512,grad_norm: 0.8662436005433378, iteration: 227516
loss: 1.0150812864303589,grad_norm: 0.9611634442324284, iteration: 227517
loss: 0.9990664720535278,grad_norm: 0.7871234654365279, iteration: 227518
loss: 1.0462348461151123,grad_norm: 0.9999998506447654, iteration: 227519
loss: 0.9972675442695618,grad_norm: 0.8733154692197174, iteration: 227520
loss: 0.979031503200531,grad_norm: 0.9999992711695411, iteration: 227521
loss: 1.002689003944397,grad_norm: 0.9282323620864771, iteration: 227522
loss: 1.0039715766906738,grad_norm: 0.9432303451532046, iteration: 227523
loss: 0.9985780119895935,grad_norm: 0.9079892179563576, iteration: 227524
loss: 0.9798638224601746,grad_norm: 0.9999995328979371, iteration: 227525
loss: 1.045328140258789,grad_norm: 0.9999991442578317, iteration: 227526
loss: 0.9778502583503723,grad_norm: 0.915660876463841, iteration: 227527
loss: 0.9971980452537537,grad_norm: 0.9100035581713453, iteration: 227528
loss: 0.9627541303634644,grad_norm: 0.949633489532725, iteration: 227529
loss: 1.005366563796997,grad_norm: 0.8905519399515877, iteration: 227530
loss: 1.000291109085083,grad_norm: 0.9999991086318597, iteration: 227531
loss: 0.985954999923706,grad_norm: 0.8068779134061917, iteration: 227532
loss: 1.0082682371139526,grad_norm: 0.9999995571757959, iteration: 227533
loss: 1.0102375745773315,grad_norm: 0.8543616039716904, iteration: 227534
loss: 1.0122841596603394,grad_norm: 0.8744459514505323, iteration: 227535
loss: 1.008966326713562,grad_norm: 0.9273091844485373, iteration: 227536
loss: 0.9992466568946838,grad_norm: 0.9843264475714271, iteration: 227537
loss: 1.0160305500030518,grad_norm: 0.8912589074559999, iteration: 227538
loss: 1.0084562301635742,grad_norm: 0.9490038520123636, iteration: 227539
loss: 0.9674555659294128,grad_norm: 0.9999991767046231, iteration: 227540
loss: 0.9902519583702087,grad_norm: 0.9468052393946121, iteration: 227541
loss: 0.9807915687561035,grad_norm: 0.8495693946864539, iteration: 227542
loss: 0.9552620053291321,grad_norm: 0.9299412372590473, iteration: 227543
loss: 0.9875359535217285,grad_norm: 0.9999990184505352, iteration: 227544
loss: 1.0244156122207642,grad_norm: 0.9540005199335233, iteration: 227545
loss: 0.9820561408996582,grad_norm: 0.8197199171833572, iteration: 227546
loss: 0.9941318035125732,grad_norm: 0.8487988544279091, iteration: 227547
loss: 1.0060529708862305,grad_norm: 0.9171677842414505, iteration: 227548
loss: 1.0649893283843994,grad_norm: 0.9787572246705226, iteration: 227549
loss: 0.9808937907218933,grad_norm: 0.9999991112048564, iteration: 227550
loss: 1.0382728576660156,grad_norm: 0.9230479410965262, iteration: 227551
loss: 1.0013236999511719,grad_norm: 0.9999991028724792, iteration: 227552
loss: 1.2232393026351929,grad_norm: 0.9999999249477075, iteration: 227553
loss: 1.0130906105041504,grad_norm: 0.9999991220166926, iteration: 227554
loss: 0.985232949256897,grad_norm: 0.9120474960133428, iteration: 227555
loss: 0.9868354201316833,grad_norm: 0.8623979118894417, iteration: 227556
loss: 1.0256487131118774,grad_norm: 0.8483944418679218, iteration: 227557
loss: 1.0122467279434204,grad_norm: 0.9494087441135891, iteration: 227558
loss: 0.9808954000473022,grad_norm: 0.999999380871096, iteration: 227559
loss: 0.9871493577957153,grad_norm: 0.8434599136367492, iteration: 227560
loss: 1.0312142372131348,grad_norm: 0.9823766262398705, iteration: 227561
loss: 0.9852704405784607,grad_norm: 0.9884133975620402, iteration: 227562
loss: 1.0257850885391235,grad_norm: 0.9999990461483144, iteration: 227563
loss: 1.0086510181427002,grad_norm: 0.9913789683173716, iteration: 227564
loss: 1.0195266008377075,grad_norm: 0.7764803256543041, iteration: 227565
loss: 0.9865918755531311,grad_norm: 0.8461289927332096, iteration: 227566
loss: 0.9898706674575806,grad_norm: 0.9759218210005087, iteration: 227567
loss: 0.9840127825737,grad_norm: 0.8966101817690565, iteration: 227568
loss: 1.0814547538757324,grad_norm: 0.9999991393224249, iteration: 227569
loss: 1.0625836849212646,grad_norm: 0.9999999768669762, iteration: 227570
loss: 1.0454024076461792,grad_norm: 0.9999990382158851, iteration: 227571
loss: 0.9967166781425476,grad_norm: 0.9305222544944783, iteration: 227572
loss: 0.9923545718193054,grad_norm: 0.797556050535686, iteration: 227573
loss: 1.0058215856552124,grad_norm: 0.9999989729273883, iteration: 227574
loss: 0.9572589993476868,grad_norm: 0.9999991549480792, iteration: 227575
loss: 0.9639356732368469,grad_norm: 0.9559602828888173, iteration: 227576
loss: 0.9929839968681335,grad_norm: 0.9999990871073128, iteration: 227577
loss: 1.018004059791565,grad_norm: 0.8801169147835838, iteration: 227578
loss: 0.9691874980926514,grad_norm: 0.9999990314934954, iteration: 227579
loss: 0.9962558150291443,grad_norm: 0.7995857829436839, iteration: 227580
loss: 1.0145477056503296,grad_norm: 0.9408026609243221, iteration: 227581
loss: 0.9777039289474487,grad_norm: 0.8558842771628221, iteration: 227582
loss: 0.9856482744216919,grad_norm: 0.899415385206106, iteration: 227583
loss: 0.9911341071128845,grad_norm: 0.8300223471446713, iteration: 227584
loss: 1.0193912982940674,grad_norm: 0.9281082290140268, iteration: 227585
loss: 1.0896176099777222,grad_norm: 0.9999997659676543, iteration: 227586
loss: 1.0551477670669556,grad_norm: 0.9999995896049938, iteration: 227587
loss: 1.0108616352081299,grad_norm: 0.9691234412746201, iteration: 227588
loss: 1.0156192779541016,grad_norm: 0.9999992792273253, iteration: 227589
loss: 0.9952700138092041,grad_norm: 0.8277962857429426, iteration: 227590
loss: 0.9943576455116272,grad_norm: 0.9999989626694561, iteration: 227591
loss: 1.0177453756332397,grad_norm: 0.9999990767247828, iteration: 227592
loss: 0.9747253656387329,grad_norm: 0.8543588491248919, iteration: 227593
loss: 0.998041033744812,grad_norm: 0.9566461440825185, iteration: 227594
loss: 1.1554192304611206,grad_norm: 0.9999998538249915, iteration: 227595
loss: 1.0179357528686523,grad_norm: 0.9023116895401071, iteration: 227596
loss: 1.0073930025100708,grad_norm: 0.8960006643945891, iteration: 227597
loss: 0.999302327632904,grad_norm: 0.9541139633186607, iteration: 227598
loss: 1.040820598602295,grad_norm: 0.9999995074687154, iteration: 227599
loss: 0.9953078031539917,grad_norm: 0.9999991095980707, iteration: 227600
loss: 1.086637020111084,grad_norm: 0.9999993697682376, iteration: 227601
loss: 1.007982611656189,grad_norm: 1.0000000028977558, iteration: 227602
loss: 1.0398675203323364,grad_norm: 0.8726093621832329, iteration: 227603
loss: 1.024772047996521,grad_norm: 0.9999998345103068, iteration: 227604
loss: 1.0031098127365112,grad_norm: 0.8707097724230455, iteration: 227605
loss: 0.9607217311859131,grad_norm: 0.8829685512657851, iteration: 227606
loss: 1.0170927047729492,grad_norm: 0.9007805331718899, iteration: 227607
loss: 1.0066627264022827,grad_norm: 0.9358168239670811, iteration: 227608
loss: 1.0025509595870972,grad_norm: 0.8610672792988773, iteration: 227609
loss: 1.0081270933151245,grad_norm: 0.7916635997091277, iteration: 227610
loss: 1.0251569747924805,grad_norm: 0.8386507091057914, iteration: 227611
loss: 0.9767385721206665,grad_norm: 0.999999048974777, iteration: 227612
loss: 0.9958630204200745,grad_norm: 0.9177807226318226, iteration: 227613
loss: 1.0149009227752686,grad_norm: 0.8037522254949692, iteration: 227614
loss: 1.0251163244247437,grad_norm: 0.9999989985622205, iteration: 227615
loss: 1.0285532474517822,grad_norm: 0.9469829209862792, iteration: 227616
loss: 1.0243496894836426,grad_norm: 0.9999992212864118, iteration: 227617
loss: 0.9930406808853149,grad_norm: 0.9133107982406657, iteration: 227618
loss: 1.0223472118377686,grad_norm: 0.9999990793766241, iteration: 227619
loss: 1.044521689414978,grad_norm: 0.9999996082414246, iteration: 227620
loss: 1.075308918952942,grad_norm: 0.9999997285247815, iteration: 227621
loss: 0.974295973777771,grad_norm: 0.832903023365944, iteration: 227622
loss: 1.0330321788787842,grad_norm: 0.999999064162139, iteration: 227623
loss: 1.0083494186401367,grad_norm: 0.8396322050761073, iteration: 227624
loss: 1.0309481620788574,grad_norm: 0.9621150092212218, iteration: 227625
loss: 1.0433577299118042,grad_norm: 0.9999995294393335, iteration: 227626
loss: 1.0003025531768799,grad_norm: 0.9280081106908141, iteration: 227627
loss: 1.0143011808395386,grad_norm: 0.9999990981824248, iteration: 227628
loss: 0.9739248752593994,grad_norm: 0.8207235145824289, iteration: 227629
loss: 1.0262441635131836,grad_norm: 0.8680123676367041, iteration: 227630
loss: 1.0076713562011719,grad_norm: 0.93159697160727, iteration: 227631
loss: 0.9717212319374084,grad_norm: 0.9999992018372131, iteration: 227632
loss: 0.989136815071106,grad_norm: 0.8889164330796532, iteration: 227633
loss: 0.985586404800415,grad_norm: 0.9999990036296897, iteration: 227634
loss: 0.9896448850631714,grad_norm: 0.8607455867701093, iteration: 227635
loss: 1.012428641319275,grad_norm: 0.8469273247579321, iteration: 227636
loss: 1.0003827810287476,grad_norm: 0.9999991003615891, iteration: 227637
loss: 0.9598751664161682,grad_norm: 0.9999994256943702, iteration: 227638
loss: 0.9930847883224487,grad_norm: 0.9853408482850541, iteration: 227639
loss: 0.9621005654335022,grad_norm: 0.948795878320447, iteration: 227640
loss: 1.1456856727600098,grad_norm: 0.9999997809267818, iteration: 227641
loss: 1.0289274454116821,grad_norm: 0.8719768047921931, iteration: 227642
loss: 1.0163094997406006,grad_norm: 0.8455018009501518, iteration: 227643
loss: 0.9816979765892029,grad_norm: 0.7909720029510794, iteration: 227644
loss: 0.9923897981643677,grad_norm: 0.9999991876779092, iteration: 227645
loss: 1.0091989040374756,grad_norm: 0.9795712655915804, iteration: 227646
loss: 1.015662431716919,grad_norm: 0.9999991266828777, iteration: 227647
loss: 1.0107834339141846,grad_norm: 0.9999998953169993, iteration: 227648
loss: 0.9820842742919922,grad_norm: 0.980639100076088, iteration: 227649
loss: 1.0107849836349487,grad_norm: 0.9182569787827829, iteration: 227650
loss: 0.9848341345787048,grad_norm: 0.967276932549947, iteration: 227651
loss: 0.9698358774185181,grad_norm: 0.9581361120481914, iteration: 227652
loss: 1.0220234394073486,grad_norm: 0.9228268847502985, iteration: 227653
loss: 0.9620391130447388,grad_norm: 0.9240162126602561, iteration: 227654
loss: 1.0076218843460083,grad_norm: 0.8911182708842263, iteration: 227655
loss: 1.0134289264678955,grad_norm: 0.9248742610372757, iteration: 227656
loss: 0.9981082081794739,grad_norm: 0.890557818874543, iteration: 227657
loss: 1.0330719947814941,grad_norm: 0.9999991915090433, iteration: 227658
loss: 0.9947708249092102,grad_norm: 0.9661570649033782, iteration: 227659
loss: 1.0014532804489136,grad_norm: 0.96971079528953, iteration: 227660
loss: 0.9863197803497314,grad_norm: 0.9999990060782612, iteration: 227661
loss: 0.9893088340759277,grad_norm: 0.8059710546864657, iteration: 227662
loss: 0.982799768447876,grad_norm: 0.8356294712568206, iteration: 227663
loss: 0.9934849143028259,grad_norm: 0.8895893972922498, iteration: 227664
loss: 1.0175098180770874,grad_norm: 0.8656449627726941, iteration: 227665
loss: 1.0361684560775757,grad_norm: 0.9676136735048179, iteration: 227666
loss: 1.0108643770217896,grad_norm: 0.9999996270675239, iteration: 227667
loss: 1.0228067636489868,grad_norm: 0.8374523763180328, iteration: 227668
loss: 1.0126245021820068,grad_norm: 0.9999989458970285, iteration: 227669
loss: 1.0079450607299805,grad_norm: 0.999999191206366, iteration: 227670
loss: 1.022925853729248,grad_norm: 0.9069162316601781, iteration: 227671
loss: 1.0365005731582642,grad_norm: 0.9999995179441329, iteration: 227672
loss: 0.9830713868141174,grad_norm: 0.8611450096538656, iteration: 227673
loss: 0.967792809009552,grad_norm: 0.8865044248668161, iteration: 227674
loss: 0.9467865824699402,grad_norm: 0.908609872355841, iteration: 227675
loss: 0.9904895424842834,grad_norm: 0.9999991530195627, iteration: 227676
loss: 0.9701954126358032,grad_norm: 0.9335868147702047, iteration: 227677
loss: 0.9410555362701416,grad_norm: 0.9696518562841748, iteration: 227678
loss: 0.9890464544296265,grad_norm: 0.8411920265661312, iteration: 227679
loss: 0.9547115564346313,grad_norm: 0.9999990022697176, iteration: 227680
loss: 0.9825780987739563,grad_norm: 0.8384609538703298, iteration: 227681
loss: 0.9831127524375916,grad_norm: 0.8342531364423442, iteration: 227682
loss: 0.9994308352470398,grad_norm: 0.9986130986891663, iteration: 227683
loss: 1.02196204662323,grad_norm: 0.8928519062657103, iteration: 227684
loss: 0.9902958273887634,grad_norm: 0.8710092628510436, iteration: 227685
loss: 0.9813986420631409,grad_norm: 0.960387224633729, iteration: 227686
loss: 0.9824207425117493,grad_norm: 0.9600330604258012, iteration: 227687
loss: 1.0119235515594482,grad_norm: 0.8348329137810864, iteration: 227688
loss: 1.0139042139053345,grad_norm: 0.9952495087842904, iteration: 227689
loss: 1.0340813398361206,grad_norm: 0.9935391222858327, iteration: 227690
loss: 1.032822847366333,grad_norm: 0.9032032534356114, iteration: 227691
loss: 0.9831125736236572,grad_norm: 0.9999991397469529, iteration: 227692
loss: 0.9569276571273804,grad_norm: 0.9975652739505211, iteration: 227693
loss: 0.976068377494812,grad_norm: 0.9999991605780731, iteration: 227694
loss: 0.9631572365760803,grad_norm: 0.7999277953794454, iteration: 227695
loss: 0.9984725713729858,grad_norm: 0.7753950579050192, iteration: 227696
loss: 0.9995755553245544,grad_norm: 0.9237335407339298, iteration: 227697
loss: 0.9843065142631531,grad_norm: 0.9999989466865122, iteration: 227698
loss: 1.012187123298645,grad_norm: 0.856823125663574, iteration: 227699
loss: 1.0108214616775513,grad_norm: 0.8520850272892349, iteration: 227700
loss: 0.9945996403694153,grad_norm: 0.9115403344283134, iteration: 227701
loss: 0.9917631149291992,grad_norm: 0.9376980411782175, iteration: 227702
loss: 0.9928265810012817,grad_norm: 0.9647830394126385, iteration: 227703
loss: 0.9773275852203369,grad_norm: 0.963863806428664, iteration: 227704
loss: 1.0059270858764648,grad_norm: 0.8635494366787049, iteration: 227705
loss: 0.9806744456291199,grad_norm: 0.9999991773705451, iteration: 227706
loss: 0.9958611130714417,grad_norm: 0.9287892593428453, iteration: 227707
loss: 1.021228313446045,grad_norm: 0.9113012728167128, iteration: 227708
loss: 1.0132806301116943,grad_norm: 0.9823418751994569, iteration: 227709
loss: 1.028859257698059,grad_norm: 0.914812911758259, iteration: 227710
loss: 1.0588937997817993,grad_norm: 0.8250778217395949, iteration: 227711
loss: 1.0100862979888916,grad_norm: 0.900015564467416, iteration: 227712
loss: 0.9641151428222656,grad_norm: 0.9553270979134961, iteration: 227713
loss: 0.9841225147247314,grad_norm: 0.9999992415475576, iteration: 227714
loss: 0.9749048352241516,grad_norm: 0.8559021733170226, iteration: 227715
loss: 1.014879822731018,grad_norm: 0.9999991352716656, iteration: 227716
loss: 0.968647301197052,grad_norm: 0.8635683476125123, iteration: 227717
loss: 0.9872220158576965,grad_norm: 0.9999990602212065, iteration: 227718
loss: 0.9891822338104248,grad_norm: 0.9468802638094004, iteration: 227719
loss: 1.0282657146453857,grad_norm: 0.8996318501832058, iteration: 227720
loss: 0.9868402481079102,grad_norm: 0.9999991443709713, iteration: 227721
loss: 1.0211766958236694,grad_norm: 0.8361705703370271, iteration: 227722
loss: 1.0055902004241943,grad_norm: 0.9457147297654962, iteration: 227723
loss: 1.0096728801727295,grad_norm: 0.9999996550100438, iteration: 227724
loss: 0.968707799911499,grad_norm: 0.9515322794668017, iteration: 227725
loss: 0.9786992073059082,grad_norm: 0.8126146713132494, iteration: 227726
loss: 0.993550181388855,grad_norm: 0.8756480207263132, iteration: 227727
loss: 0.9722342491149902,grad_norm: 0.999999030356175, iteration: 227728
loss: 1.0155616998672485,grad_norm: 0.8742163370589474, iteration: 227729
loss: 1.0754505395889282,grad_norm: 0.972603954371894, iteration: 227730
loss: 1.0357728004455566,grad_norm: 0.9581169294785604, iteration: 227731
loss: 1.019873023033142,grad_norm: 0.9297242417409157, iteration: 227732
loss: 1.013034462928772,grad_norm: 0.8477929912579998, iteration: 227733
loss: 0.9874130487442017,grad_norm: 0.9999990411897445, iteration: 227734
loss: 1.0101357698440552,grad_norm: 0.9201105828668067, iteration: 227735
loss: 1.0427902936935425,grad_norm: 0.9456471078779919, iteration: 227736
loss: 0.985046923160553,grad_norm: 0.9999991125490589, iteration: 227737
loss: 0.9954742789268494,grad_norm: 0.9322132558143855, iteration: 227738
loss: 1.0010377168655396,grad_norm: 0.9999995339907454, iteration: 227739
loss: 1.0275719165802002,grad_norm: 0.840453568750436, iteration: 227740
loss: 0.9948267936706543,grad_norm: 0.8819181110939682, iteration: 227741
loss: 1.0446697473526,grad_norm: 0.9531982515341104, iteration: 227742
loss: 0.9838361144065857,grad_norm: 0.8962839832096099, iteration: 227743
loss: 0.9860478043556213,grad_norm: 0.8760429324810801, iteration: 227744
loss: 0.9823132157325745,grad_norm: 0.9999992205349442, iteration: 227745
loss: 1.050658106803894,grad_norm: 0.9999992812108421, iteration: 227746
loss: 1.0579336881637573,grad_norm: 0.9999990346825063, iteration: 227747
loss: 0.9987333416938782,grad_norm: 0.8157437446406415, iteration: 227748
loss: 0.968556821346283,grad_norm: 0.9336061693157767, iteration: 227749
loss: 1.009080171585083,grad_norm: 0.8754501519979994, iteration: 227750
loss: 0.9741569757461548,grad_norm: 0.8476956445009034, iteration: 227751
loss: 1.0124236345291138,grad_norm: 0.8533359346022826, iteration: 227752
loss: 1.013332486152649,grad_norm: 0.8681681953528173, iteration: 227753
loss: 0.9794721007347107,grad_norm: 0.999999006746291, iteration: 227754
loss: 1.0720990896224976,grad_norm: 0.9999991536899285, iteration: 227755
loss: 0.9785667657852173,grad_norm: 0.8315497399253511, iteration: 227756
loss: 0.9743999242782593,grad_norm: 0.999999125717386, iteration: 227757
loss: 0.9900751709938049,grad_norm: 0.8502752793714723, iteration: 227758
loss: 0.9782580733299255,grad_norm: 0.8987902147696133, iteration: 227759
loss: 1.0032718181610107,grad_norm: 0.8304399930645097, iteration: 227760
loss: 1.016963243484497,grad_norm: 1.0000000614158793, iteration: 227761
loss: 1.0270253419876099,grad_norm: 0.9999991076616555, iteration: 227762
loss: 0.9903765916824341,grad_norm: 0.8555847722558516, iteration: 227763
loss: 0.9958205223083496,grad_norm: 0.9298177044755739, iteration: 227764
loss: 1.018231987953186,grad_norm: 0.9999990428305321, iteration: 227765
loss: 0.9852242469787598,grad_norm: 0.9416096670313817, iteration: 227766
loss: 0.9754270315170288,grad_norm: 0.8668239846466002, iteration: 227767
loss: 0.9863632917404175,grad_norm: 0.9999992369632683, iteration: 227768
loss: 1.0149883031845093,grad_norm: 0.999999187463867, iteration: 227769
loss: 0.9526472091674805,grad_norm: 0.9999992162169474, iteration: 227770
loss: 1.0089282989501953,grad_norm: 0.9999990718063467, iteration: 227771
loss: 0.9955753087997437,grad_norm: 0.9201226461264042, iteration: 227772
loss: 1.0335041284561157,grad_norm: 0.8382241303495565, iteration: 227773
loss: 0.9827151298522949,grad_norm: 0.9419752683403296, iteration: 227774
loss: 1.0230997800827026,grad_norm: 0.8274468584117954, iteration: 227775
loss: 1.054046630859375,grad_norm: 0.9999995403965013, iteration: 227776
loss: 1.0322775840759277,grad_norm: 0.9999996094076576, iteration: 227777
loss: 0.9524861574172974,grad_norm: 0.9999991524996517, iteration: 227778
loss: 1.0029406547546387,grad_norm: 0.7410657306441439, iteration: 227779
loss: 0.9760341048240662,grad_norm: 0.7998702680294305, iteration: 227780
loss: 1.0119273662567139,grad_norm: 0.8136917088281734, iteration: 227781
loss: 1.0832191705703735,grad_norm: 0.8845909637539178, iteration: 227782
loss: 0.972825288772583,grad_norm: 0.9999992057608005, iteration: 227783
loss: 0.945202112197876,grad_norm: 0.883216079490965, iteration: 227784
loss: 1.0965375900268555,grad_norm: 0.9560869907093738, iteration: 227785
loss: 0.98409503698349,grad_norm: 0.907846752157261, iteration: 227786
loss: 1.035147786140442,grad_norm: 0.9999989689661581, iteration: 227787
loss: 1.0405571460723877,grad_norm: 0.9999991933029523, iteration: 227788
loss: 0.9597281217575073,grad_norm: 0.8934004316516637, iteration: 227789
loss: 1.0133846998214722,grad_norm: 0.7227932361566403, iteration: 227790
loss: 1.0474891662597656,grad_norm: 0.9999990348244439, iteration: 227791
loss: 1.0101826190948486,grad_norm: 0.924666512384746, iteration: 227792
loss: 0.9609488844871521,grad_norm: 0.9999990896688845, iteration: 227793
loss: 1.0184651613235474,grad_norm: 0.9849055676104198, iteration: 227794
loss: 0.9709576964378357,grad_norm: 0.8711116013618695, iteration: 227795
loss: 0.9917609691619873,grad_norm: 0.9770809149603431, iteration: 227796
loss: 0.9832982420921326,grad_norm: 0.9138157585352878, iteration: 227797
loss: 1.0029070377349854,grad_norm: 0.8093538073062595, iteration: 227798
loss: 0.9877662062644958,grad_norm: 0.9999990265165287, iteration: 227799
loss: 1.0168538093566895,grad_norm: 0.8679443862801166, iteration: 227800
loss: 0.9159967303276062,grad_norm: 0.9999990886822513, iteration: 227801
loss: 1.0294599533081055,grad_norm: 0.7597384224694113, iteration: 227802
loss: 0.9539629817008972,grad_norm: 0.7951160378232723, iteration: 227803
loss: 1.0414272546768188,grad_norm: 0.8777732444082607, iteration: 227804
loss: 0.9535626769065857,grad_norm: 0.8727348862521364, iteration: 227805
loss: 1.1112083196640015,grad_norm: 0.9999991791532327, iteration: 227806
loss: 0.9783203601837158,grad_norm: 0.8268073171017413, iteration: 227807
loss: 0.9843840003013611,grad_norm: 0.7288008300788587, iteration: 227808
loss: 1.0175179243087769,grad_norm: 0.8754451975482808, iteration: 227809
loss: 0.9640024304389954,grad_norm: 0.999999167228041, iteration: 227810
loss: 1.0441914796829224,grad_norm: 0.9999991428504047, iteration: 227811
loss: 1.005384087562561,grad_norm: 0.7858606338473466, iteration: 227812
loss: 1.108190655708313,grad_norm: 0.9999992465351072, iteration: 227813
loss: 1.0563089847564697,grad_norm: 0.8655691885565752, iteration: 227814
loss: 1.0922788381576538,grad_norm: 0.9999991965850372, iteration: 227815
loss: 1.0542751550674438,grad_norm: 0.9999996647932754, iteration: 227816
loss: 1.0284615755081177,grad_norm: 0.944049005012569, iteration: 227817
loss: 1.0108095407485962,grad_norm: 0.8081575849063206, iteration: 227818
loss: 0.9772600531578064,grad_norm: 0.9666807888946688, iteration: 227819
loss: 1.025050163269043,grad_norm: 0.8604872059936757, iteration: 227820
loss: 0.9776297211647034,grad_norm: 0.8532860311548872, iteration: 227821
loss: 1.0137287378311157,grad_norm: 0.9140390759641535, iteration: 227822
loss: 1.0234229564666748,grad_norm: 0.9999991123731159, iteration: 227823
loss: 1.027402400970459,grad_norm: 0.9999997718558196, iteration: 227824
loss: 1.0137286186218262,grad_norm: 0.9271792084933895, iteration: 227825
loss: 0.984636664390564,grad_norm: 0.9115870033683147, iteration: 227826
loss: 0.976797342300415,grad_norm: 0.8760419295249343, iteration: 227827
loss: 1.0995835065841675,grad_norm: 0.9999991046509397, iteration: 227828
loss: 1.0974371433258057,grad_norm: 0.9999996019903992, iteration: 227829
loss: 0.9549016356468201,grad_norm: 0.8229120252879178, iteration: 227830
loss: 1.069862961769104,grad_norm: 0.9999992356122201, iteration: 227831
loss: 1.030240774154663,grad_norm: 0.9461170773681281, iteration: 227832
loss: 1.0482103824615479,grad_norm: 0.9886677963537818, iteration: 227833
loss: 1.0277129411697388,grad_norm: 0.9737555659858198, iteration: 227834
loss: 1.1013778448104858,grad_norm: 0.9400938686110831, iteration: 227835
loss: 1.0216455459594727,grad_norm: 0.9999990520939397, iteration: 227836
loss: 0.9734795093536377,grad_norm: 0.9999991459604184, iteration: 227837
loss: 0.960925281047821,grad_norm: 0.9056942580144002, iteration: 227838
loss: 0.9939476847648621,grad_norm: 0.9506120960350666, iteration: 227839
loss: 0.9689466953277588,grad_norm: 0.9999990934841113, iteration: 227840
loss: 0.9939156174659729,grad_norm: 0.9417585446468837, iteration: 227841
loss: 1.0556204319000244,grad_norm: 0.9999991207246711, iteration: 227842
loss: 0.9333083033561707,grad_norm: 0.9878863730645533, iteration: 227843
loss: 0.9943590760231018,grad_norm: 0.9747927318590841, iteration: 227844
loss: 0.9954495429992676,grad_norm: 0.8579805877347201, iteration: 227845
loss: 0.9983780384063721,grad_norm: 0.9558384467513116, iteration: 227846
loss: 0.9766576290130615,grad_norm: 0.9428888099907597, iteration: 227847
loss: 1.0698912143707275,grad_norm: 0.9999993106344462, iteration: 227848
loss: 1.0749921798706055,grad_norm: 0.9999991316623368, iteration: 227849
loss: 0.9760887622833252,grad_norm: 0.8641355974309821, iteration: 227850
loss: 1.0030758380889893,grad_norm: 0.8246721421266688, iteration: 227851
loss: 1.0038349628448486,grad_norm: 0.9999995629902109, iteration: 227852
loss: 1.097510576248169,grad_norm: 0.999999474231682, iteration: 227853
loss: 1.0527702569961548,grad_norm: 0.9999992737657525, iteration: 227854
loss: 0.9652023911476135,grad_norm: 0.8889637400946023, iteration: 227855
loss: 1.003913164138794,grad_norm: 0.8524308098978033, iteration: 227856
loss: 1.0318597555160522,grad_norm: 0.9201400494211495, iteration: 227857
loss: 1.0071266889572144,grad_norm: 0.9999991156135704, iteration: 227858
loss: 1.0712751150131226,grad_norm: 0.9999992649074017, iteration: 227859
loss: 1.015602707862854,grad_norm: 0.8551773098130239, iteration: 227860
loss: 0.9782767295837402,grad_norm: 0.9999990866845698, iteration: 227861
loss: 0.9983199834823608,grad_norm: 0.9999997304097805, iteration: 227862
loss: 1.0140825510025024,grad_norm: 0.928591569773484, iteration: 227863
loss: 1.015244960784912,grad_norm: 0.8668159456347639, iteration: 227864
loss: 0.9647010564804077,grad_norm: 0.857249767141164, iteration: 227865
loss: 1.015828251838684,grad_norm: 0.8067551914903288, iteration: 227866
loss: 0.9980326294898987,grad_norm: 0.92084444707281, iteration: 227867
loss: 1.0097421407699585,grad_norm: 0.9999993517917559, iteration: 227868
loss: 0.9898787140846252,grad_norm: 0.9999991413600574, iteration: 227869
loss: 0.9954807758331299,grad_norm: 0.9999990556833271, iteration: 227870
loss: 1.0133936405181885,grad_norm: 0.9057168577792176, iteration: 227871
loss: 0.9947416186332703,grad_norm: 0.8809869123161508, iteration: 227872
loss: 1.0042921304702759,grad_norm: 0.9569606028442914, iteration: 227873
loss: 1.0042937994003296,grad_norm: 0.9999990534980452, iteration: 227874
loss: 0.9824617505073547,grad_norm: 0.9979235269549371, iteration: 227875
loss: 1.0440611839294434,grad_norm: 0.9482412951655079, iteration: 227876
loss: 1.0261552333831787,grad_norm: 0.976894322288587, iteration: 227877
loss: 1.0053677558898926,grad_norm: 0.9999989557466636, iteration: 227878
loss: 0.9818905591964722,grad_norm: 0.9999990346028333, iteration: 227879
loss: 1.0467499494552612,grad_norm: 0.9267435981761003, iteration: 227880
loss: 1.0225040912628174,grad_norm: 0.7946180327091146, iteration: 227881
loss: 1.037445068359375,grad_norm: 0.9646514673531409, iteration: 227882
loss: 1.0047870874404907,grad_norm: 0.9254464447341341, iteration: 227883
loss: 0.974966287612915,grad_norm: 0.9999991252230747, iteration: 227884
loss: 0.985956609249115,grad_norm: 0.9316351268341528, iteration: 227885
loss: 0.9885798096656799,grad_norm: 0.9428050622788855, iteration: 227886
loss: 1.003258466720581,grad_norm: 0.9154794857881418, iteration: 227887
loss: 1.0180320739746094,grad_norm: 0.7753612863604094, iteration: 227888
loss: 0.9918491840362549,grad_norm: 0.914538715645917, iteration: 227889
loss: 1.0099107027053833,grad_norm: 0.7834381854028573, iteration: 227890
loss: 1.0060299634933472,grad_norm: 0.8909469525032068, iteration: 227891
loss: 1.0472941398620605,grad_norm: 0.9425690058400787, iteration: 227892
loss: 1.0021615028381348,grad_norm: 0.9999992453854084, iteration: 227893
loss: 0.987875759601593,grad_norm: 0.8774444288328992, iteration: 227894
loss: 1.0023201704025269,grad_norm: 0.9999990924235166, iteration: 227895
loss: 1.0853780508041382,grad_norm: 0.9607174356046346, iteration: 227896
loss: 1.0213344097137451,grad_norm: 0.9160752837464273, iteration: 227897
loss: 1.0047223567962646,grad_norm: 0.9999990543437405, iteration: 227898
loss: 0.9911969304084778,grad_norm: 0.7936340673121817, iteration: 227899
loss: 0.9982459545135498,grad_norm: 0.9999990458842786, iteration: 227900
loss: 0.9879345297813416,grad_norm: 0.9759510739141916, iteration: 227901
loss: 1.0129435062408447,grad_norm: 0.9186681122497737, iteration: 227902
loss: 0.9795634746551514,grad_norm: 0.9999992012725647, iteration: 227903
loss: 0.983572781085968,grad_norm: 0.8817654583415505, iteration: 227904
loss: 1.0087162256240845,grad_norm: 0.9999990497349099, iteration: 227905
loss: 0.9807106852531433,grad_norm: 0.8838109106353541, iteration: 227906
loss: 0.9730319976806641,grad_norm: 0.8201378226605466, iteration: 227907
loss: 1.015963077545166,grad_norm: 0.9127868142807568, iteration: 227908
loss: 1.108425259590149,grad_norm: 0.9999992915587965, iteration: 227909
loss: 0.9957692623138428,grad_norm: 0.8811322397091389, iteration: 227910
loss: 1.0158233642578125,grad_norm: 0.7557294327105974, iteration: 227911
loss: 1.0019254684448242,grad_norm: 0.8843873585917106, iteration: 227912
loss: 0.9732201099395752,grad_norm: 0.9874826204121336, iteration: 227913
loss: 1.009292721748352,grad_norm: 0.9878964018047575, iteration: 227914
loss: 0.9868268370628357,grad_norm: 0.808229121830437, iteration: 227915
loss: 0.986273467540741,grad_norm: 0.9999991347908688, iteration: 227916
loss: 1.0258839130401611,grad_norm: 0.983338143622802, iteration: 227917
loss: 0.9823305010795593,grad_norm: 0.9065787431931019, iteration: 227918
loss: 1.0028233528137207,grad_norm: 0.9999997931382489, iteration: 227919
loss: 0.9810376167297363,grad_norm: 0.9428839830991549, iteration: 227920
loss: 1.0128178596496582,grad_norm: 0.9907509742676424, iteration: 227921
loss: 1.068044662475586,grad_norm: 0.9081797504712878, iteration: 227922
loss: 0.9844055771827698,grad_norm: 0.9944534009738845, iteration: 227923
loss: 1.0237044095993042,grad_norm: 0.9150204531367232, iteration: 227924
loss: 0.9935756325721741,grad_norm: 0.7971103926312549, iteration: 227925
loss: 1.0100966691970825,grad_norm: 0.9163720365896111, iteration: 227926
loss: 0.9951202273368835,grad_norm: 0.9509950954138203, iteration: 227927
loss: 0.9878442883491516,grad_norm: 0.95918819364127, iteration: 227928
loss: 1.002540946006775,grad_norm: 0.9426607320193474, iteration: 227929
loss: 1.006757140159607,grad_norm: 0.8758000518769833, iteration: 227930
loss: 1.0211713314056396,grad_norm: 0.9999994823988233, iteration: 227931
loss: 0.9759421944618225,grad_norm: 0.894488857763375, iteration: 227932
loss: 1.0538066625595093,grad_norm: 0.9999993900723642, iteration: 227933
loss: 0.9637025594711304,grad_norm: 0.8536425102044186, iteration: 227934
loss: 1.018678903579712,grad_norm: 0.9999991227047621, iteration: 227935
loss: 1.0139580965042114,grad_norm: 0.9999992443571173, iteration: 227936
loss: 1.020975947380066,grad_norm: 0.8835854933584133, iteration: 227937
loss: 0.9946805238723755,grad_norm: 0.9668596373492403, iteration: 227938
loss: 0.9963597655296326,grad_norm: 0.8600029861980342, iteration: 227939
loss: 1.0020474195480347,grad_norm: 0.924422074189791, iteration: 227940
loss: 0.9731841683387756,grad_norm: 0.9565966788645467, iteration: 227941
loss: 0.9417176246643066,grad_norm: 0.9580144171345605, iteration: 227942
loss: 1.0087919235229492,grad_norm: 0.8608851472946326, iteration: 227943
loss: 0.9551924467086792,grad_norm: 0.9965313478303849, iteration: 227944
loss: 0.9815067648887634,grad_norm: 0.9106980156196219, iteration: 227945
loss: 0.999771773815155,grad_norm: 0.7132671729033926, iteration: 227946
loss: 1.032670259475708,grad_norm: 0.9999991882035104, iteration: 227947
loss: 1.0097428560256958,grad_norm: 0.9976740268556267, iteration: 227948
loss: 1.0070487260818481,grad_norm: 0.9746049408833412, iteration: 227949
loss: 0.9993533492088318,grad_norm: 0.9999992596064933, iteration: 227950
loss: 0.983140230178833,grad_norm: 0.9999990608903602, iteration: 227951
loss: 0.9745935797691345,grad_norm: 0.946860942572903, iteration: 227952
loss: 1.0136889219284058,grad_norm: 0.8327872593363482, iteration: 227953
loss: 0.9581654071807861,grad_norm: 0.8610298591213509, iteration: 227954
loss: 1.0085303783416748,grad_norm: 0.8537497163501826, iteration: 227955
loss: 1.0318816900253296,grad_norm: 0.9999991430806711, iteration: 227956
loss: 1.026469111442566,grad_norm: 0.9999995543571533, iteration: 227957
loss: 1.0251942873001099,grad_norm: 0.9999992042687403, iteration: 227958
loss: 0.9754060506820679,grad_norm: 0.9999990754314602, iteration: 227959
loss: 0.953641414642334,grad_norm: 0.9999991152671723, iteration: 227960
loss: 1.118564248085022,grad_norm: 0.9999991317430242, iteration: 227961
loss: 0.9645191431045532,grad_norm: 0.9607818094084782, iteration: 227962
loss: 0.9922498464584351,grad_norm: 0.8934034591322129, iteration: 227963
loss: 1.0390729904174805,grad_norm: 0.8993202607643302, iteration: 227964
loss: 0.972270667552948,grad_norm: 0.8534752654542522, iteration: 227965
loss: 0.9837990403175354,grad_norm: 0.7936616900633618, iteration: 227966
loss: 0.9958121180534363,grad_norm: 0.9167503132295185, iteration: 227967
loss: 1.0059268474578857,grad_norm: 0.9334819619271253, iteration: 227968
loss: 1.01567804813385,grad_norm: 0.9999991533529627, iteration: 227969
loss: 0.9904976487159729,grad_norm: 0.9917304364142409, iteration: 227970
loss: 1.065470576286316,grad_norm: 0.9999993471578664, iteration: 227971
loss: 0.9977543950080872,grad_norm: 0.8641516131022517, iteration: 227972
loss: 1.0063753128051758,grad_norm: 0.9841447037023899, iteration: 227973
loss: 1.0232945680618286,grad_norm: 0.9999993227987376, iteration: 227974
loss: 1.0133922100067139,grad_norm: 0.9999991206714317, iteration: 227975
loss: 0.9754739999771118,grad_norm: 0.9485917728580282, iteration: 227976
loss: 1.0397251844406128,grad_norm: 0.9387467224175303, iteration: 227977
loss: 1.01656973361969,grad_norm: 0.9291748594839655, iteration: 227978
loss: 0.9766938090324402,grad_norm: 0.9344045391049651, iteration: 227979
loss: 0.9617890119552612,grad_norm: 0.9803626378945043, iteration: 227980
loss: 0.9827395081520081,grad_norm: 0.9145350410820898, iteration: 227981
loss: 0.9792206883430481,grad_norm: 0.8240579485348087, iteration: 227982
loss: 0.9675782918930054,grad_norm: 0.9171883229725737, iteration: 227983
loss: 1.006805181503296,grad_norm: 0.9102655540013193, iteration: 227984
loss: 0.9838066101074219,grad_norm: 0.9216592240690775, iteration: 227985
loss: 0.9868718385696411,grad_norm: 0.7848496012360832, iteration: 227986
loss: 0.958655834197998,grad_norm: 0.94269354511003, iteration: 227987
loss: 0.9911925792694092,grad_norm: 0.9466641854804729, iteration: 227988
loss: 1.0097824335098267,grad_norm: 0.9999992992811202, iteration: 227989
loss: 1.010757565498352,grad_norm: 0.8375121688169583, iteration: 227990
loss: 1.0092836618423462,grad_norm: 0.9438370831793181, iteration: 227991
loss: 0.9600799083709717,grad_norm: 0.9966324859186065, iteration: 227992
loss: 0.9791190028190613,grad_norm: 0.8498147843377181, iteration: 227993
loss: 1.002787470817566,grad_norm: 0.9999991824941271, iteration: 227994
loss: 1.0236899852752686,grad_norm: 0.8876202170857801, iteration: 227995
loss: 1.0285556316375732,grad_norm: 0.8011196989351784, iteration: 227996
loss: 0.9598804116249084,grad_norm: 0.9204762998092425, iteration: 227997
loss: 1.0130064487457275,grad_norm: 0.9999991156639126, iteration: 227998
loss: 0.9914401769638062,grad_norm: 0.9999990897132298, iteration: 227999
loss: 0.9620032906532288,grad_norm: 0.8362463367341691, iteration: 228000
loss: 1.0070594549179077,grad_norm: 0.9533799612909534, iteration: 228001
loss: 1.0276530981063843,grad_norm: 0.9027696082593395, iteration: 228002
loss: 0.981259286403656,grad_norm: 0.8863039408044233, iteration: 228003
loss: 1.021403193473816,grad_norm: 0.8952450367754647, iteration: 228004
loss: 0.9970162510871887,grad_norm: 0.9999991483333736, iteration: 228005
loss: 0.9729465842247009,grad_norm: 0.873290223939147, iteration: 228006
loss: 0.9667609333992004,grad_norm: 0.9625756710847947, iteration: 228007
loss: 0.980237603187561,grad_norm: 0.797618376525506, iteration: 228008
loss: 1.022986650466919,grad_norm: 0.9853239353315871, iteration: 228009
loss: 1.0320260524749756,grad_norm: 0.9394715004375416, iteration: 228010
loss: 0.9705798625946045,grad_norm: 0.8925445923865832, iteration: 228011
loss: 1.0462993383407593,grad_norm: 0.9137216360904402, iteration: 228012
loss: 1.0183466672897339,grad_norm: 0.9649833099718361, iteration: 228013
loss: 0.9713302850723267,grad_norm: 0.9246528583468858, iteration: 228014
loss: 0.9349543452262878,grad_norm: 0.8603119403976804, iteration: 228015
loss: 1.012624740600586,grad_norm: 0.9172399360684138, iteration: 228016
loss: 1.0457700490951538,grad_norm: 0.9999999488293027, iteration: 228017
loss: 1.0143598318099976,grad_norm: 0.7390028462322167, iteration: 228018
loss: 1.0579266548156738,grad_norm: 0.9999993643645997, iteration: 228019
loss: 0.9672380685806274,grad_norm: 0.9999996560782294, iteration: 228020
loss: 0.9913231730461121,grad_norm: 0.9999990694134665, iteration: 228021
loss: 0.9954853057861328,grad_norm: 0.9921376222892745, iteration: 228022
loss: 0.9848102927207947,grad_norm: 0.9999990242162266, iteration: 228023
loss: 1.021863579750061,grad_norm: 0.991700457805272, iteration: 228024
loss: 1.0051047801971436,grad_norm: 0.9999990107466818, iteration: 228025
loss: 1.0106515884399414,grad_norm: 0.9162783552559676, iteration: 228026
loss: 0.9730367064476013,grad_norm: 0.94108640876235, iteration: 228027
loss: 0.9801679253578186,grad_norm: 0.9868769610920002, iteration: 228028
loss: 1.0321006774902344,grad_norm: 0.9940697041000052, iteration: 228029
loss: 0.993510901927948,grad_norm: 0.9999989721494313, iteration: 228030
loss: 1.018405795097351,grad_norm: 0.979548475117559, iteration: 228031
loss: 1.0123435258865356,grad_norm: 0.9360116214689534, iteration: 228032
loss: 0.9802471995353699,grad_norm: 0.8811284215795802, iteration: 228033
loss: 0.9830131530761719,grad_norm: 0.9999990244181736, iteration: 228034
loss: 0.9825555086135864,grad_norm: 0.9489342587274848, iteration: 228035
loss: 0.9661778211593628,grad_norm: 0.7897431673911355, iteration: 228036
loss: 0.9667143821716309,grad_norm: 0.9940103488920013, iteration: 228037
loss: 0.9607643485069275,grad_norm: 0.9528603790098679, iteration: 228038
loss: 1.0192370414733887,grad_norm: 0.9999991887183611, iteration: 228039
loss: 0.9933936595916748,grad_norm: 0.81222855732437, iteration: 228040
loss: 0.9985851049423218,grad_norm: 0.9999991547648169, iteration: 228041
loss: 0.9908197522163391,grad_norm: 0.7868819481882807, iteration: 228042
loss: 0.9869619607925415,grad_norm: 0.9442131521203097, iteration: 228043
loss: 0.9935325384140015,grad_norm: 0.9856041856349284, iteration: 228044
loss: 0.9959189891815186,grad_norm: 0.9175825786478721, iteration: 228045
loss: 0.9943133592605591,grad_norm: 0.999999108798693, iteration: 228046
loss: 1.0256054401397705,grad_norm: 0.8500009924836123, iteration: 228047
loss: 0.9736878275871277,grad_norm: 0.9999990681303474, iteration: 228048
loss: 0.9948088526725769,grad_norm: 0.9999991516172341, iteration: 228049
loss: 1.054108738899231,grad_norm: 0.9999999219814325, iteration: 228050
loss: 0.9738257527351379,grad_norm: 0.8604433255651329, iteration: 228051
loss: 0.9881574511528015,grad_norm: 0.825172884855888, iteration: 228052
loss: 1.0118016004562378,grad_norm: 0.999999197497716, iteration: 228053
loss: 0.9679911732673645,grad_norm: 0.9999991814449586, iteration: 228054
loss: 1.0275005102157593,grad_norm: 0.9999999856284241, iteration: 228055
loss: 0.9739070534706116,grad_norm: 0.9999992124201013, iteration: 228056
loss: 1.004483938217163,grad_norm: 0.9957820628210137, iteration: 228057
loss: 0.9863820672035217,grad_norm: 0.9154888794240922, iteration: 228058
loss: 1.1067560911178589,grad_norm: 0.866811494270912, iteration: 228059
loss: 1.0267915725708008,grad_norm: 0.9999990667711838, iteration: 228060
loss: 1.0229591131210327,grad_norm: 0.8018795311000124, iteration: 228061
loss: 0.9790050387382507,grad_norm: 0.9581669539510218, iteration: 228062
loss: 0.9815765619277954,grad_norm: 0.9999991156713852, iteration: 228063
loss: 1.0068159103393555,grad_norm: 0.8609019805023215, iteration: 228064
loss: 0.9781608581542969,grad_norm: 0.9999990955700244, iteration: 228065
loss: 0.9834186434745789,grad_norm: 0.86462250628861, iteration: 228066
loss: 0.9686266779899597,grad_norm: 0.9588537310217687, iteration: 228067
loss: 0.9748468399047852,grad_norm: 0.9999991301024282, iteration: 228068
loss: 1.1941392421722412,grad_norm: 0.887643745006894, iteration: 228069
loss: 0.9981557130813599,grad_norm: 0.942424774166643, iteration: 228070
loss: 1.005264401435852,grad_norm: 0.8855414392322251, iteration: 228071
loss: 0.959717869758606,grad_norm: 0.8000671069384111, iteration: 228072
loss: 1.0314841270446777,grad_norm: 0.9999992419338859, iteration: 228073
loss: 0.9797306656837463,grad_norm: 0.8495568871539918, iteration: 228074
loss: 0.9819956421852112,grad_norm: 0.9341664207880217, iteration: 228075
loss: 1.0129541158676147,grad_norm: 0.9240296849294796, iteration: 228076
loss: 0.9981086254119873,grad_norm: 0.9645740057497498, iteration: 228077
loss: 0.998359739780426,grad_norm: 0.9101435424452757, iteration: 228078
loss: 0.9734305739402771,grad_norm: 0.8885447271666361, iteration: 228079
loss: 1.0203404426574707,grad_norm: 0.9597542614634843, iteration: 228080
loss: 0.9890899658203125,grad_norm: 0.8282545273303641, iteration: 228081
loss: 1.0051219463348389,grad_norm: 0.8915526262163803, iteration: 228082
loss: 1.0154099464416504,grad_norm: 0.8145478931376163, iteration: 228083
loss: 1.0466415882110596,grad_norm: 0.956224900533622, iteration: 228084
loss: 1.0450743436813354,grad_norm: 0.999998920972573, iteration: 228085
loss: 0.9709265232086182,grad_norm: 0.7946148801998903, iteration: 228086
loss: 0.9999826550483704,grad_norm: 0.9999991256171806, iteration: 228087
loss: 1.0440019369125366,grad_norm: 0.9999990398253876, iteration: 228088
loss: 0.9956569671630859,grad_norm: 0.892473535832248, iteration: 228089
loss: 0.99217289686203,grad_norm: 0.9999991229995157, iteration: 228090
loss: 0.9888587594032288,grad_norm: 0.9999989823206079, iteration: 228091
loss: 0.9866513609886169,grad_norm: 0.9999990465182336, iteration: 228092
loss: 0.9444583654403687,grad_norm: 0.9928348388032701, iteration: 228093
loss: 1.0643559694290161,grad_norm: 0.781853701889268, iteration: 228094
loss: 0.9587177038192749,grad_norm: 0.8860087658640312, iteration: 228095
loss: 0.9817425608634949,grad_norm: 0.885123057992313, iteration: 228096
loss: 0.9645444750785828,grad_norm: 0.9660614295141455, iteration: 228097
loss: 1.0320193767547607,grad_norm: 0.8596290992678887, iteration: 228098
loss: 0.9859510660171509,grad_norm: 0.7977265985041587, iteration: 228099
loss: 0.9917809367179871,grad_norm: 0.8802633469331882, iteration: 228100
loss: 1.0483906269073486,grad_norm: 0.9254320707308399, iteration: 228101
loss: 1.0213662385940552,grad_norm: 0.9999991789931448, iteration: 228102
loss: 1.0265002250671387,grad_norm: 0.9999991555025481, iteration: 228103
loss: 1.1307655572891235,grad_norm: 0.999999754314908, iteration: 228104
loss: 1.099179744720459,grad_norm: 0.9999999142476849, iteration: 228105
loss: 0.9820498824119568,grad_norm: 0.9666812788076101, iteration: 228106
loss: 0.9760767817497253,grad_norm: 0.9352159333799644, iteration: 228107
loss: 1.1170803308486938,grad_norm: 0.9999992599508452, iteration: 228108
loss: 0.9854808449745178,grad_norm: 0.9999992045379805, iteration: 228109
loss: 1.003579020500183,grad_norm: 0.9999990905310645, iteration: 228110
loss: 1.019923448562622,grad_norm: 0.999999253845289, iteration: 228111
loss: 1.0242887735366821,grad_norm: 0.9947200195618042, iteration: 228112
loss: 0.9918000102043152,grad_norm: 0.9999991262511855, iteration: 228113
loss: 0.983245849609375,grad_norm: 0.796226035049726, iteration: 228114
loss: 0.9929563999176025,grad_norm: 0.9999990434307915, iteration: 228115
loss: 0.973577618598938,grad_norm: 0.8942698031358906, iteration: 228116
loss: 0.9752326011657715,grad_norm: 0.9511776735768686, iteration: 228117
loss: 1.1019327640533447,grad_norm: 0.9999996171739455, iteration: 228118
loss: 1.0112460851669312,grad_norm: 0.90076102720668, iteration: 228119
loss: 1.0377534627914429,grad_norm: 0.9330270216360933, iteration: 228120
loss: 1.0139659643173218,grad_norm: 0.9109320306645118, iteration: 228121
loss: 1.0013132095336914,grad_norm: 0.9999991732396156, iteration: 228122
loss: 0.9797271490097046,grad_norm: 0.8884181216241192, iteration: 228123
loss: 1.0133659839630127,grad_norm: 0.9999990227075053, iteration: 228124
loss: 1.0080209970474243,grad_norm: 0.8630955062674375, iteration: 228125
loss: 0.9795331358909607,grad_norm: 0.9036040040362772, iteration: 228126
loss: 1.029934287071228,grad_norm: 0.9215787657868236, iteration: 228127
loss: 0.9839673042297363,grad_norm: 0.8455533872274305, iteration: 228128
loss: 0.9915831089019775,grad_norm: 0.8577054814082641, iteration: 228129
loss: 1.0214403867721558,grad_norm: 0.9031534292135761, iteration: 228130
loss: 1.0155233144760132,grad_norm: 0.9999995675935834, iteration: 228131
loss: 1.0191527605056763,grad_norm: 0.905868155785532, iteration: 228132
loss: 1.0008246898651123,grad_norm: 0.9756828024611987, iteration: 228133
loss: 1.0205714702606201,grad_norm: 0.9427478054876616, iteration: 228134
loss: 1.016268253326416,grad_norm: 0.9573662735829581, iteration: 228135
loss: 0.9733614325523376,grad_norm: 0.9083819991573243, iteration: 228136
loss: 0.9771270751953125,grad_norm: 0.936097927556933, iteration: 228137
loss: 0.9738321900367737,grad_norm: 0.8948004735859197, iteration: 228138
loss: 0.9774823784828186,grad_norm: 0.9035091922339805, iteration: 228139
loss: 0.9817997813224792,grad_norm: 0.9999990743195883, iteration: 228140
loss: 1.0101898908615112,grad_norm: 0.9999995835826649, iteration: 228141
loss: 1.0076262950897217,grad_norm: 0.8548167069968056, iteration: 228142
loss: 0.987648069858551,grad_norm: 0.9329553351431663, iteration: 228143
loss: 1.0086926221847534,grad_norm: 0.9999990228055377, iteration: 228144
loss: 0.9642099142074585,grad_norm: 0.9999991615516374, iteration: 228145
loss: 1.0072067975997925,grad_norm: 0.8835289807367745, iteration: 228146
loss: 0.9464170336723328,grad_norm: 0.8949756483552297, iteration: 228147
loss: 1.0358411073684692,grad_norm: 0.9999995256821409, iteration: 228148
loss: 1.0089913606643677,grad_norm: 0.8617431066047229, iteration: 228149
loss: 1.0129164457321167,grad_norm: 0.9999999092936745, iteration: 228150
loss: 0.9990692138671875,grad_norm: 0.9999990005602286, iteration: 228151
loss: 0.9967885613441467,grad_norm: 0.815129914636171, iteration: 228152
loss: 0.9995388984680176,grad_norm: 0.9585607201261938, iteration: 228153
loss: 1.0274591445922852,grad_norm: 0.9999989982221955, iteration: 228154
loss: 0.9983245730400085,grad_norm: 0.9898492633977352, iteration: 228155
loss: 1.0043931007385254,grad_norm: 0.9158533379924423, iteration: 228156
loss: 0.9815423488616943,grad_norm: 0.9506812476052187, iteration: 228157
loss: 1.0976049900054932,grad_norm: 0.9999992996579582, iteration: 228158
loss: 0.987736701965332,grad_norm: 0.8062362868854528, iteration: 228159
loss: 0.9971073865890503,grad_norm: 0.9999990933678741, iteration: 228160
loss: 0.9584528803825378,grad_norm: 0.8401183245848319, iteration: 228161
loss: 1.0268194675445557,grad_norm: 0.9371242791619571, iteration: 228162
loss: 0.9711902141571045,grad_norm: 0.7688083708076958, iteration: 228163
loss: 0.9746381044387817,grad_norm: 0.8552835519968646, iteration: 228164
loss: 0.9859534502029419,grad_norm: 0.8205050013107978, iteration: 228165
loss: 1.0311229228973389,grad_norm: 0.9999990919796297, iteration: 228166
loss: 0.9954196214675903,grad_norm: 0.9064922844399729, iteration: 228167
loss: 1.0195033550262451,grad_norm: 0.853703497556423, iteration: 228168
loss: 1.0044879913330078,grad_norm: 0.9999990905367133, iteration: 228169
loss: 1.0043567419052124,grad_norm: 0.8627380344490696, iteration: 228170
loss: 0.9756724238395691,grad_norm: 0.8297823793673921, iteration: 228171
loss: 0.991013765335083,grad_norm: 0.9999989813407609, iteration: 228172
loss: 1.0450724363327026,grad_norm: 0.9999991874984832, iteration: 228173
loss: 1.0092511177062988,grad_norm: 0.8755097603185225, iteration: 228174
loss: 1.0015357732772827,grad_norm: 0.9999991521501975, iteration: 228175
loss: 1.00238835811615,grad_norm: 0.9999990512747576, iteration: 228176
loss: 0.9967824220657349,grad_norm: 0.9999989960618658, iteration: 228177
loss: 1.025958776473999,grad_norm: 0.7474269457171201, iteration: 228178
loss: 1.0076833963394165,grad_norm: 0.7832338037949308, iteration: 228179
loss: 1.0088955163955688,grad_norm: 0.9865452603048395, iteration: 228180
loss: 0.9854409694671631,grad_norm: 0.9227553995823519, iteration: 228181
loss: 1.0035662651062012,grad_norm: 0.9898975732280695, iteration: 228182
loss: 1.0019466876983643,grad_norm: 0.9562173656634444, iteration: 228183
loss: 1.0185554027557373,grad_norm: 0.9999990957957097, iteration: 228184
loss: 0.999095618724823,grad_norm: 0.838129676814448, iteration: 228185
loss: 1.0146714448928833,grad_norm: 0.8527341465667211, iteration: 228186
loss: 0.972380518913269,grad_norm: 0.8100029078492722, iteration: 228187
loss: 0.9884822964668274,grad_norm: 0.9718461547979951, iteration: 228188
loss: 0.9754767417907715,grad_norm: 0.9577337226042351, iteration: 228189
loss: 0.9372232556343079,grad_norm: 0.9999992677364283, iteration: 228190
loss: 0.9707356095314026,grad_norm: 0.9551518306019426, iteration: 228191
loss: 1.023244023323059,grad_norm: 0.9107998770683432, iteration: 228192
loss: 0.990179181098938,grad_norm: 0.999999093065969, iteration: 228193
loss: 0.9560579061508179,grad_norm: 0.9352403243970853, iteration: 228194
loss: 0.9975773096084595,grad_norm: 0.8384981444764984, iteration: 228195
loss: 1.001829981803894,grad_norm: 0.8670645473693247, iteration: 228196
loss: 1.0247032642364502,grad_norm: 0.8613926347080235, iteration: 228197
loss: 1.0716369152069092,grad_norm: 0.9999991272948228, iteration: 228198
loss: 1.003441572189331,grad_norm: 0.8150394825244481, iteration: 228199
loss: 1.0506346225738525,grad_norm: 0.9999993868030245, iteration: 228200
loss: 1.0077792406082153,grad_norm: 0.7917657259201827, iteration: 228201
loss: 0.9816162586212158,grad_norm: 0.9999999305830863, iteration: 228202
loss: 0.973587155342102,grad_norm: 0.7201234248545706, iteration: 228203
loss: 0.9540468454360962,grad_norm: 0.9999990934391187, iteration: 228204
loss: 0.9978827834129333,grad_norm: 0.8100092113856456, iteration: 228205
loss: 0.997555136680603,grad_norm: 0.9753895002446981, iteration: 228206
loss: 0.9815794825553894,grad_norm: 0.9660861893961447, iteration: 228207
loss: 0.9578316807746887,grad_norm: 0.9553338318773706, iteration: 228208
loss: 1.0405908823013306,grad_norm: 0.9999990309230159, iteration: 228209
loss: 0.9902356863021851,grad_norm: 0.9814243670302067, iteration: 228210
loss: 1.0197607278823853,grad_norm: 0.9999997750316578, iteration: 228211
loss: 1.012918472290039,grad_norm: 0.8366556458551768, iteration: 228212
loss: 0.9512106776237488,grad_norm: 0.9999991500818199, iteration: 228213
loss: 0.980539858341217,grad_norm: 0.999998993536693, iteration: 228214
loss: 1.0063929557800293,grad_norm: 0.8226997281206063, iteration: 228215
loss: 0.9683470129966736,grad_norm: 0.9999990514740132, iteration: 228216
loss: 1.2106326818466187,grad_norm: 0.9999999852811469, iteration: 228217
loss: 1.0403785705566406,grad_norm: 0.8522496097086355, iteration: 228218
loss: 0.9974918365478516,grad_norm: 0.91766761707113, iteration: 228219
loss: 1.0262647867202759,grad_norm: 0.9999998005840146, iteration: 228220
loss: 0.9787047505378723,grad_norm: 0.9999992566582837, iteration: 228221
loss: 1.0119867324829102,grad_norm: 0.9657170943173713, iteration: 228222
loss: 0.9534139037132263,grad_norm: 0.9999992487981738, iteration: 228223
loss: 1.03099524974823,grad_norm: 0.9956494385345441, iteration: 228224
loss: 0.9891296029090881,grad_norm: 0.8917648258269337, iteration: 228225
loss: 1.0623704195022583,grad_norm: 0.9458155881846074, iteration: 228226
loss: 1.0142580270767212,grad_norm: 0.9999990253475279, iteration: 228227
loss: 0.988069474697113,grad_norm: 0.9999989772150021, iteration: 228228
loss: 0.9721792340278625,grad_norm: 0.8671139555324554, iteration: 228229
loss: 0.9995629191398621,grad_norm: 0.7994295717068562, iteration: 228230
loss: 0.9633829593658447,grad_norm: 0.840471304711063, iteration: 228231
loss: 0.9919115900993347,grad_norm: 0.9999997440434115, iteration: 228232
loss: 1.0262118577957153,grad_norm: 0.9298290845321759, iteration: 228233
loss: 0.9378548860549927,grad_norm: 0.8308946199887451, iteration: 228234
loss: 0.9746933579444885,grad_norm: 0.8478402961725365, iteration: 228235
loss: 1.1036068201065063,grad_norm: 0.9999991717725647, iteration: 228236
loss: 0.9833032488822937,grad_norm: 0.9999994377487083, iteration: 228237
loss: 1.0322725772857666,grad_norm: 0.9476113496327768, iteration: 228238
loss: 0.9838958978652954,grad_norm: 0.9999991819868869, iteration: 228239
loss: 1.0229123830795288,grad_norm: 0.9377696552709932, iteration: 228240
loss: 0.9625648260116577,grad_norm: 0.9999991563278707, iteration: 228241
loss: 1.0422837734222412,grad_norm: 0.9999993120074316, iteration: 228242
loss: 0.9969607591629028,grad_norm: 0.9999991332024474, iteration: 228243
loss: 1.0020124912261963,grad_norm: 0.8326681572597159, iteration: 228244
loss: 0.997488796710968,grad_norm: 0.7925272309804806, iteration: 228245
loss: 1.011838674545288,grad_norm: 0.9999992705506257, iteration: 228246
loss: 1.0263882875442505,grad_norm: 0.903249577491033, iteration: 228247
loss: 1.0367313623428345,grad_norm: 0.9999991862524997, iteration: 228248
loss: 0.9949315786361694,grad_norm: 0.9848305917667582, iteration: 228249
loss: 1.0304336547851562,grad_norm: 0.9999997245537808, iteration: 228250
loss: 1.0064289569854736,grad_norm: 0.937827504986251, iteration: 228251
loss: 0.9881455898284912,grad_norm: 0.9131291807254327, iteration: 228252
loss: 1.0044925212860107,grad_norm: 0.8088219229448733, iteration: 228253
loss: 0.995518684387207,grad_norm: 0.8682183517426972, iteration: 228254
loss: 0.9993234872817993,grad_norm: 0.9327831258220775, iteration: 228255
loss: 1.1543439626693726,grad_norm: 0.9999995208893251, iteration: 228256
loss: 0.9753097295761108,grad_norm: 0.8497328278794045, iteration: 228257
loss: 0.9798232913017273,grad_norm: 0.9141206768292633, iteration: 228258
loss: 0.964135468006134,grad_norm: 0.8973823313746772, iteration: 228259
loss: 1.023877501487732,grad_norm: 0.9999994735078155, iteration: 228260
loss: 0.9879944920539856,grad_norm: 0.8724722958032355, iteration: 228261
loss: 0.9749171733856201,grad_norm: 0.9999991281400354, iteration: 228262
loss: 0.9982736706733704,grad_norm: 0.9817385143121039, iteration: 228263
loss: 1.0143859386444092,grad_norm: 0.8467213912050172, iteration: 228264
loss: 1.0209702253341675,grad_norm: 0.9999989069347592, iteration: 228265
loss: 1.3942571878433228,grad_norm: 0.9999998102910181, iteration: 228266
loss: 0.985637366771698,grad_norm: 0.890163809139937, iteration: 228267
loss: 0.9822842478752136,grad_norm: 0.8811763312114913, iteration: 228268
loss: 1.0204970836639404,grad_norm: 0.9257016586165085, iteration: 228269
loss: 0.9865767955780029,grad_norm: 0.8451266005528438, iteration: 228270
loss: 0.98103928565979,grad_norm: 0.7094894448472869, iteration: 228271
loss: 1.020258903503418,grad_norm: 0.9496910720863275, iteration: 228272
loss: 0.9528246521949768,grad_norm: 0.9999991171003947, iteration: 228273
loss: 0.9422591924667358,grad_norm: 0.8713417338665949, iteration: 228274
loss: 0.9792608022689819,grad_norm: 0.9287690664171835, iteration: 228275
loss: 1.0113102197647095,grad_norm: 0.7248265106287984, iteration: 228276
loss: 1.0322885513305664,grad_norm: 0.842053096386699, iteration: 228277
loss: 0.9734691977500916,grad_norm: 0.9999990406984777, iteration: 228278
loss: 0.9820024967193604,grad_norm: 0.9169398899431159, iteration: 228279
loss: 0.9936367869377136,grad_norm: 0.8296935764371645, iteration: 228280
loss: 1.0734671354293823,grad_norm: 0.9999990508253971, iteration: 228281
loss: 1.0178877115249634,grad_norm: 0.9999991025245226, iteration: 228282
loss: 0.9538470506668091,grad_norm: 0.9516362801112259, iteration: 228283
loss: 1.0254688262939453,grad_norm: 0.9134414767853625, iteration: 228284
loss: 0.9772509932518005,grad_norm: 0.8689373110137814, iteration: 228285
loss: 1.0183045864105225,grad_norm: 0.9999993211630436, iteration: 228286
loss: 0.9725707769393921,grad_norm: 0.9197324371747733, iteration: 228287
loss: 1.0603488683700562,grad_norm: 0.9999991483290562, iteration: 228288
loss: 0.9609659314155579,grad_norm: 0.8870620833444971, iteration: 228289
loss: 0.9767742156982422,grad_norm: 0.9701931510908055, iteration: 228290
loss: 1.0558453798294067,grad_norm: 0.8843977215775198, iteration: 228291
loss: 1.0188499689102173,grad_norm: 0.8961577198670916, iteration: 228292
loss: 1.00234055519104,grad_norm: 0.903168980129476, iteration: 228293
loss: 0.9503147602081299,grad_norm: 0.940475491021283, iteration: 228294
loss: 1.0380431413650513,grad_norm: 0.9010548778207443, iteration: 228295
loss: 0.9809101223945618,grad_norm: 0.9999995589905495, iteration: 228296
loss: 0.9807644486427307,grad_norm: 0.8888616600249507, iteration: 228297
loss: 0.9960728883743286,grad_norm: 0.9999991574067065, iteration: 228298
loss: 0.997833251953125,grad_norm: 0.9320228263093447, iteration: 228299
loss: 1.0270549058914185,grad_norm: 0.9337429878287512, iteration: 228300
loss: 1.0112810134887695,grad_norm: 0.9999991097353491, iteration: 228301
loss: 1.0106375217437744,grad_norm: 0.8337350641390765, iteration: 228302
loss: 0.9873369932174683,grad_norm: 0.8330828105801062, iteration: 228303
loss: 1.0331870317459106,grad_norm: 0.9999998690155657, iteration: 228304
loss: 0.9838660955429077,grad_norm: 0.9227797169866234, iteration: 228305
loss: 1.0208817720413208,grad_norm: 0.8530006408633305, iteration: 228306
loss: 0.9808384776115417,grad_norm: 0.9665895468873821, iteration: 228307
loss: 1.0141969919204712,grad_norm: 0.999999590792663, iteration: 228308
loss: 0.9975298047065735,grad_norm: 0.8511889048867594, iteration: 228309
loss: 0.9679232835769653,grad_norm: 0.8447230369013218, iteration: 228310
loss: 0.9805415272712708,grad_norm: 0.9999990490572155, iteration: 228311
loss: 1.0042115449905396,grad_norm: 0.8071551914184822, iteration: 228312
loss: 1.2288964986801147,grad_norm: 0.9999994269965133, iteration: 228313
loss: 1.140568733215332,grad_norm: 0.9999997494631594, iteration: 228314
loss: 1.0460540056228638,grad_norm: 0.9999992396434514, iteration: 228315
loss: 0.998202919960022,grad_norm: 0.9153823161267405, iteration: 228316
loss: 1.1692783832550049,grad_norm: 0.9999999655991884, iteration: 228317
loss: 1.1638123989105225,grad_norm: 0.9999996478599296, iteration: 228318
loss: 0.9991488456726074,grad_norm: 0.9999992457754291, iteration: 228319
loss: 1.0151926279067993,grad_norm: 0.8829344551349434, iteration: 228320
loss: 1.006892204284668,grad_norm: 0.8615309568842643, iteration: 228321
loss: 1.2660590410232544,grad_norm: 0.9999998463393385, iteration: 228322
loss: 1.0876723527908325,grad_norm: 0.8664324308014899, iteration: 228323
loss: 0.9906865954399109,grad_norm: 0.9379730816548504, iteration: 228324
loss: 1.0253068208694458,grad_norm: 0.9999991733923972, iteration: 228325
loss: 0.9854431748390198,grad_norm: 0.7970293917461384, iteration: 228326
loss: 1.009166955947876,grad_norm: 0.9791565111597719, iteration: 228327
loss: 1.0128713846206665,grad_norm: 0.9766659759677677, iteration: 228328
loss: 0.9898577928543091,grad_norm: 0.9582543565798215, iteration: 228329
loss: 1.0208584070205688,grad_norm: 0.8625653461299853, iteration: 228330
loss: 0.9853689670562744,grad_norm: 0.966868449340294, iteration: 228331
loss: 1.0341945886611938,grad_norm: 0.9001732764565085, iteration: 228332
loss: 1.0215609073638916,grad_norm: 0.9799910662817948, iteration: 228333
loss: 1.0015177726745605,grad_norm: 0.8053240813530861, iteration: 228334
loss: 0.9912012815475464,grad_norm: 0.9999998831561666, iteration: 228335
loss: 0.9852315783500671,grad_norm: 0.9167584588393167, iteration: 228336
loss: 1.2380480766296387,grad_norm: 0.9999995685839572, iteration: 228337
loss: 1.0087566375732422,grad_norm: 0.9999990352632121, iteration: 228338
loss: 0.9776865243911743,grad_norm: 0.7247326623255121, iteration: 228339
loss: 1.0373027324676514,grad_norm: 0.9999997289413425, iteration: 228340
loss: 0.9889015555381775,grad_norm: 0.9883621169076647, iteration: 228341
loss: 0.9876850247383118,grad_norm: 0.9508823359200026, iteration: 228342
loss: 1.0042649507522583,grad_norm: 0.8344873770426791, iteration: 228343
loss: 0.999921977519989,grad_norm: 0.9060277864300562, iteration: 228344
loss: 1.0351500511169434,grad_norm: 0.9999990481058849, iteration: 228345
loss: 0.9583313465118408,grad_norm: 0.97346242117882, iteration: 228346
loss: 1.0347974300384521,grad_norm: 0.9999998001033916, iteration: 228347
loss: 1.0032004117965698,grad_norm: 0.815202673313765, iteration: 228348
loss: 1.0371429920196533,grad_norm: 0.9037617687810074, iteration: 228349
loss: 1.0079790353775024,grad_norm: 0.9999989610506003, iteration: 228350
loss: 0.9874776005744934,grad_norm: 0.9999991682880265, iteration: 228351
loss: 0.9952692985534668,grad_norm: 0.9999989912169128, iteration: 228352
loss: 0.9679003953933716,grad_norm: 0.9552798769058071, iteration: 228353
loss: 0.9885507822036743,grad_norm: 0.9477394714759362, iteration: 228354
loss: 0.9941898584365845,grad_norm: 0.9999992516242088, iteration: 228355
loss: 1.0018523931503296,grad_norm: 0.8444804415394878, iteration: 228356
loss: 1.0161104202270508,grad_norm: 0.9855270965922912, iteration: 228357
loss: 0.9944180846214294,grad_norm: 0.9864250729301406, iteration: 228358
loss: 1.0957227945327759,grad_norm: 0.9999999239009107, iteration: 228359
loss: 1.0217338800430298,grad_norm: 0.9999998126298929, iteration: 228360
loss: 1.0030461549758911,grad_norm: 0.7382933460498502, iteration: 228361
loss: 1.0173662900924683,grad_norm: 0.9248256865418493, iteration: 228362
loss: 0.9993690848350525,grad_norm: 0.9299815856337126, iteration: 228363
loss: 0.9978254437446594,grad_norm: 0.9999989699854803, iteration: 228364
loss: 0.9909528493881226,grad_norm: 0.9332467484462393, iteration: 228365
loss: 1.012917399406433,grad_norm: 0.9999991493270812, iteration: 228366
loss: 0.9827920198440552,grad_norm: 0.9999991531505376, iteration: 228367
loss: 0.9914999604225159,grad_norm: 0.7756591753516885, iteration: 228368
loss: 1.1975438594818115,grad_norm: 0.9999994786025509, iteration: 228369
loss: 0.9768985509872437,grad_norm: 0.99999941584035, iteration: 228370
loss: 1.0007840394973755,grad_norm: 0.7921692049528375, iteration: 228371
loss: 1.0043145418167114,grad_norm: 0.9046337291215594, iteration: 228372
loss: 0.9903228282928467,grad_norm: 0.9999991393826426, iteration: 228373
loss: 1.0195282697677612,grad_norm: 0.9671646982963336, iteration: 228374
loss: 1.150958776473999,grad_norm: 0.9999990743284175, iteration: 228375
loss: 1.1226191520690918,grad_norm: 0.9999999148811238, iteration: 228376
loss: 1.0679980516433716,grad_norm: 0.99999905880284, iteration: 228377
loss: 1.2743772268295288,grad_norm: 0.9999994136330884, iteration: 228378
loss: 1.117814064025879,grad_norm: 0.9999991991611898, iteration: 228379
loss: 0.9626681804656982,grad_norm: 0.9677435089974739, iteration: 228380
loss: 1.075613260269165,grad_norm: 0.9999996016194644, iteration: 228381
loss: 0.9924392700195312,grad_norm: 0.8569350996731917, iteration: 228382
loss: 1.0850743055343628,grad_norm: 0.9999994194093368, iteration: 228383
loss: 0.9972435832023621,grad_norm: 0.92705266896478, iteration: 228384
loss: 1.0241748094558716,grad_norm: 0.9999992382446843, iteration: 228385
loss: 1.0197443962097168,grad_norm: 0.9999998834750892, iteration: 228386
loss: 0.9960713982582092,grad_norm: 0.9999990146483374, iteration: 228387
loss: 1.1792118549346924,grad_norm: 0.9999993050451712, iteration: 228388
loss: 1.021716833114624,grad_norm: 0.9999989757072016, iteration: 228389
loss: 1.1229679584503174,grad_norm: 0.999999915510318, iteration: 228390
loss: 1.1131713390350342,grad_norm: 1.0000000649465464, iteration: 228391
loss: 1.0546247959136963,grad_norm: 0.9999992303754341, iteration: 228392
loss: 1.0303051471710205,grad_norm: 0.9526083491801204, iteration: 228393
loss: 1.1021244525909424,grad_norm: 0.9999990484933182, iteration: 228394
loss: 1.0981082916259766,grad_norm: 1.0000000028923615, iteration: 228395
loss: 1.160004734992981,grad_norm: 0.9999998727680919, iteration: 228396
loss: 0.984684407711029,grad_norm: 0.857707575023298, iteration: 228397
loss: 1.110243558883667,grad_norm: 0.9999993403353611, iteration: 228398
loss: 1.021195888519287,grad_norm: 0.9999993039345596, iteration: 228399
loss: 1.156957745552063,grad_norm: 0.999999986578795, iteration: 228400
loss: 1.2487019300460815,grad_norm: 1.0000000236579807, iteration: 228401
loss: 1.123104214668274,grad_norm: 0.9999994660342562, iteration: 228402
loss: 1.078553318977356,grad_norm: 0.9999998098058988, iteration: 228403
loss: 1.0236032009124756,grad_norm: 0.9999998917002851, iteration: 228404
loss: 1.0971052646636963,grad_norm: 0.9999993283525472, iteration: 228405
loss: 1.1757856607437134,grad_norm: 0.9999992430149492, iteration: 228406
loss: 1.1339924335479736,grad_norm: 1.000000004213743, iteration: 228407
loss: 1.0214787721633911,grad_norm: 0.9999994989383937, iteration: 228408
loss: 1.0500272512435913,grad_norm: 0.9999999120644083, iteration: 228409
loss: 1.0902224779129028,grad_norm: 0.9999999841333844, iteration: 228410
loss: 1.1285934448242188,grad_norm: 0.9999996819132186, iteration: 228411
loss: 1.1059857606887817,grad_norm: 0.9999998206724591, iteration: 228412
loss: 0.9936314821243286,grad_norm: 0.9999991046987396, iteration: 228413
loss: 0.9855644106864929,grad_norm: 0.9178897767753413, iteration: 228414
loss: 1.1735097169876099,grad_norm: 0.999999372637592, iteration: 228415
loss: 1.2178927659988403,grad_norm: 0.9999999326113571, iteration: 228416
loss: 1.003235936164856,grad_norm: 0.9999991936415658, iteration: 228417
loss: 0.9858539700508118,grad_norm: 0.9999990866631991, iteration: 228418
loss: 1.3428062200546265,grad_norm: 1.000000035347271, iteration: 228419
loss: 1.3127869367599487,grad_norm: 0.9999998658765995, iteration: 228420
loss: 1.2558997869491577,grad_norm: 0.9999996877645869, iteration: 228421
loss: 1.1631158590316772,grad_norm: 0.999999543044897, iteration: 228422
loss: 1.1011900901794434,grad_norm: 0.9999999049276483, iteration: 228423
loss: 1.1272681951522827,grad_norm: 0.9999999371591156, iteration: 228424
loss: 1.034524917602539,grad_norm: 0.9999999095016786, iteration: 228425
loss: 1.300986409187317,grad_norm: 0.999999655104063, iteration: 228426
loss: 1.2084616422653198,grad_norm: 0.999999514695382, iteration: 228427
loss: 1.2371242046356201,grad_norm: 0.9999998151741214, iteration: 228428
loss: 1.1360224485397339,grad_norm: 1.000000034094298, iteration: 228429
loss: 1.1304607391357422,grad_norm: 0.999999806836693, iteration: 228430
loss: 1.1717720031738281,grad_norm: 0.9999997094574128, iteration: 228431
loss: 1.246462345123291,grad_norm: 0.9999993908277969, iteration: 228432
loss: 1.0244807004928589,grad_norm: 0.999999860482363, iteration: 228433
loss: 1.086878776550293,grad_norm: 0.9999993105072996, iteration: 228434
loss: 1.3008493185043335,grad_norm: 0.9999997219095063, iteration: 228435
loss: 1.4406559467315674,grad_norm: 0.9999996216016369, iteration: 228436
loss: 1.2363018989562988,grad_norm: 0.9999998007250042, iteration: 228437
loss: 1.2847892045974731,grad_norm: 0.9999998511937622, iteration: 228438
loss: 1.3265835046768188,grad_norm: 0.9999997140828445, iteration: 228439
loss: 1.193333625793457,grad_norm: 0.9999999577939634, iteration: 228440
loss: 1.272369384765625,grad_norm: 0.9999999865918333, iteration: 228441
loss: 1.1334741115570068,grad_norm: 0.9999998850559363, iteration: 228442
loss: 1.2623754739761353,grad_norm: 0.9999997662420324, iteration: 228443
loss: 1.277706265449524,grad_norm: 0.9999999300846256, iteration: 228444
loss: 1.2732489109039307,grad_norm: 0.9999996900853761, iteration: 228445
loss: 1.1308166980743408,grad_norm: 0.9999995864220007, iteration: 228446
loss: 1.0894235372543335,grad_norm: 0.999999895602451, iteration: 228447
loss: 1.175848364830017,grad_norm: 0.9999997669565948, iteration: 228448
loss: 1.3642462491989136,grad_norm: 0.9999998722242205, iteration: 228449
loss: 1.1759979724884033,grad_norm: 0.9999998857353657, iteration: 228450
loss: 1.2732502222061157,grad_norm: 0.9999999149854992, iteration: 228451
loss: 1.1635318994522095,grad_norm: 0.9999999171023266, iteration: 228452
loss: 1.1473546028137207,grad_norm: 0.9999998254632764, iteration: 228453
loss: 1.2434608936309814,grad_norm: 0.9999999178402948, iteration: 228454
loss: 1.1835057735443115,grad_norm: 0.999999314040915, iteration: 228455
loss: 1.397311806678772,grad_norm: 0.9999999109364024, iteration: 228456
loss: 1.1214176416397095,grad_norm: 0.9999996172007976, iteration: 228457
loss: 1.2863119840621948,grad_norm: 0.9999997311129551, iteration: 228458
loss: 1.2063947916030884,grad_norm: 0.99999953841518, iteration: 228459
loss: 1.0877639055252075,grad_norm: 0.9999995440931959, iteration: 228460
loss: 1.109487771987915,grad_norm: 0.9999998950922826, iteration: 228461
loss: 1.1474882364273071,grad_norm: 0.9999996962583788, iteration: 228462
loss: 1.2545984983444214,grad_norm: 0.9999999247739508, iteration: 228463
loss: 1.167185664176941,grad_norm: 0.9999998978251795, iteration: 228464
loss: 1.1404551267623901,grad_norm: 0.9999997821977463, iteration: 228465
loss: 1.2105824947357178,grad_norm: 0.999999467286316, iteration: 228466
loss: 1.2134358882904053,grad_norm: 0.9999999987656604, iteration: 228467
loss: 1.1668285131454468,grad_norm: 0.9999996552099978, iteration: 228468
loss: 1.2583796977996826,grad_norm: 0.9999996175751044, iteration: 228469
loss: 1.2270901203155518,grad_norm: 0.9999998196484274, iteration: 228470
loss: 1.1300057172775269,grad_norm: 0.9999996353284495, iteration: 228471
loss: 1.2964991331100464,grad_norm: 0.9999995247036079, iteration: 228472
loss: 1.1829110383987427,grad_norm: 1.000000042547908, iteration: 228473
loss: 1.051501989364624,grad_norm: 0.9999997401126529, iteration: 228474
loss: 1.1416375637054443,grad_norm: 0.9999991620041339, iteration: 228475
loss: 1.006157398223877,grad_norm: 0.9999993893395549, iteration: 228476
loss: 1.1358202695846558,grad_norm: 0.9999993683035407, iteration: 228477
loss: 1.0198205709457397,grad_norm: 0.9443597891192089, iteration: 228478
loss: 1.0853359699249268,grad_norm: 0.9999995444982752, iteration: 228479
loss: 1.068280816078186,grad_norm: 0.9999998796630943, iteration: 228480
loss: 1.0613877773284912,grad_norm: 0.9999998786785025, iteration: 228481
loss: 1.1199982166290283,grad_norm: 0.9427534599381941, iteration: 228482
loss: 1.1951502561569214,grad_norm: 0.9999992476370946, iteration: 228483
loss: 1.0734504461288452,grad_norm: 1.0000000078078615, iteration: 228484
loss: 1.127648115158081,grad_norm: 0.9999999960437976, iteration: 228485
loss: 0.9529791474342346,grad_norm: 0.9999991384103232, iteration: 228486
loss: 1.0459179878234863,grad_norm: 0.9999991301514799, iteration: 228487
loss: 1.0331732034683228,grad_norm: 0.9999998044878429, iteration: 228488
loss: 1.0224157571792603,grad_norm: 0.9999991593500127, iteration: 228489
loss: 1.088910698890686,grad_norm: 0.9999996319881361, iteration: 228490
loss: 1.046979546546936,grad_norm: 0.9999991420334245, iteration: 228491
loss: 1.1000230312347412,grad_norm: 0.9999994923203808, iteration: 228492
loss: 1.1494852304458618,grad_norm: 0.9999991193020638, iteration: 228493
loss: 1.0319997072219849,grad_norm: 0.9999990679202017, iteration: 228494
loss: 1.1344571113586426,grad_norm: 0.9999991747832033, iteration: 228495
loss: 1.0361871719360352,grad_norm: 0.999999846661908, iteration: 228496
loss: 1.1304810047149658,grad_norm: 0.9999998773593033, iteration: 228497
loss: 1.0033670663833618,grad_norm: 0.8870513372096857, iteration: 228498
loss: 1.0044217109680176,grad_norm: 0.9999991388282209, iteration: 228499
loss: 1.0494146347045898,grad_norm: 0.9999997368934539, iteration: 228500
loss: 1.029667615890503,grad_norm: 0.9999989613668275, iteration: 228501
loss: 0.9715798497200012,grad_norm: 0.8260378585864147, iteration: 228502
loss: 1.018627405166626,grad_norm: 0.9999991459788065, iteration: 228503
loss: 1.2903584241867065,grad_norm: 0.9999999299273038, iteration: 228504
loss: 1.0646624565124512,grad_norm: 0.9999998926296058, iteration: 228505
loss: 1.0991991758346558,grad_norm: 0.9999992761757593, iteration: 228506
loss: 1.1450544595718384,grad_norm: 0.9999998167411757, iteration: 228507
loss: 1.0949640274047852,grad_norm: 0.9999998831116084, iteration: 228508
loss: 1.1274867057800293,grad_norm: 0.9999993713600707, iteration: 228509
loss: 1.1403435468673706,grad_norm: 0.9999998485578112, iteration: 228510
loss: 1.102083444595337,grad_norm: 0.9999999621868525, iteration: 228511
loss: 1.0785413980484009,grad_norm: 0.9582898482876299, iteration: 228512
loss: 1.0438224077224731,grad_norm: 0.9999998862830275, iteration: 228513
loss: 1.0087130069732666,grad_norm: 0.9311652136800095, iteration: 228514
loss: 1.0209881067276,grad_norm: 0.9999997618724308, iteration: 228515
loss: 1.0299999713897705,grad_norm: 0.9999992822595325, iteration: 228516
loss: 1.0444339513778687,grad_norm: 0.9999997487004106, iteration: 228517
loss: 1.0119047164916992,grad_norm: 0.9999990699850406, iteration: 228518
loss: 1.1031492948532104,grad_norm: 0.9999995898710988, iteration: 228519
loss: 1.1445990800857544,grad_norm: 0.9999999473717384, iteration: 228520
loss: 1.0650283098220825,grad_norm: 0.9999991555256449, iteration: 228521
loss: 0.9975493550300598,grad_norm: 0.9999992166211163, iteration: 228522
loss: 1.0263752937316895,grad_norm: 0.886808739526521, iteration: 228523
loss: 0.9869773387908936,grad_norm: 0.9999994992446112, iteration: 228524
loss: 1.0220351219177246,grad_norm: 0.9772455663415673, iteration: 228525
loss: 1.0680177211761475,grad_norm: 1.0000000237951143, iteration: 228526
loss: 0.9696059823036194,grad_norm: 0.9999991364078858, iteration: 228527
loss: 0.9791367650032043,grad_norm: 0.9999989196036055, iteration: 228528
loss: 1.042604684829712,grad_norm: 0.9976384492295304, iteration: 228529
loss: 1.0007786750793457,grad_norm: 0.999999260492968, iteration: 228530
loss: 0.9698617458343506,grad_norm: 0.8649518145144299, iteration: 228531
loss: 1.0344020128250122,grad_norm: 0.9937890671574902, iteration: 228532
loss: 1.104307770729065,grad_norm: 0.9999994680782677, iteration: 228533
loss: 0.9877861142158508,grad_norm: 0.9833206762744405, iteration: 228534
loss: 1.1085489988327026,grad_norm: 0.9757101409037771, iteration: 228535
loss: 1.008951187133789,grad_norm: 0.9268166657593331, iteration: 228536
loss: 1.0226346254348755,grad_norm: 0.8547107709990355, iteration: 228537
loss: 1.0103591680526733,grad_norm: 0.9418120660367708, iteration: 228538
loss: 1.0083231925964355,grad_norm: 0.9090925828439516, iteration: 228539
loss: 1.0423921346664429,grad_norm: 0.9999994484615399, iteration: 228540
loss: 1.0378469228744507,grad_norm: 0.9999999204074076, iteration: 228541
loss: 0.9816113710403442,grad_norm: 0.9491740964996338, iteration: 228542
loss: 1.0477230548858643,grad_norm: 0.9999998604234508, iteration: 228543
loss: 0.9728787541389465,grad_norm: 0.9522932653997037, iteration: 228544
loss: 1.0489197969436646,grad_norm: 0.9999991446077885, iteration: 228545
loss: 1.1259022951126099,grad_norm: 0.9999997635147692, iteration: 228546
loss: 1.0220022201538086,grad_norm: 0.9999999273368348, iteration: 228547
loss: 1.176953911781311,grad_norm: 0.9999993286287585, iteration: 228548
loss: 0.9837943315505981,grad_norm: 0.9999992600599159, iteration: 228549
loss: 1.1310179233551025,grad_norm: 0.9999995886043682, iteration: 228550
loss: 1.098897099494934,grad_norm: 0.9999998530775591, iteration: 228551
loss: 1.0511364936828613,grad_norm: 0.9999998105695267, iteration: 228552
loss: 1.0065648555755615,grad_norm: 0.9999999423646068, iteration: 228553
loss: 1.0060983896255493,grad_norm: 0.9467756102126421, iteration: 228554
loss: 1.1994082927703857,grad_norm: 1.0000000151004165, iteration: 228555
loss: 0.9610719084739685,grad_norm: 0.9999989456436077, iteration: 228556
loss: 1.0357482433319092,grad_norm: 0.999999682421883, iteration: 228557
loss: 0.9936505556106567,grad_norm: 0.9023802315198313, iteration: 228558
loss: 1.0040252208709717,grad_norm: 0.9999991815987123, iteration: 228559
loss: 1.1954940557479858,grad_norm: 0.9999999242866717, iteration: 228560
loss: 1.0120015144348145,grad_norm: 0.8832901347210046, iteration: 228561
loss: 0.9823182821273804,grad_norm: 0.9857433524373996, iteration: 228562
loss: 0.9604960083961487,grad_norm: 0.7682031304224695, iteration: 228563
loss: 1.0260789394378662,grad_norm: 0.9999992045090873, iteration: 228564
loss: 1.037980556488037,grad_norm: 0.9374311365406772, iteration: 228565
loss: 1.0072038173675537,grad_norm: 0.9999999029128274, iteration: 228566
loss: 1.0458178520202637,grad_norm: 0.9999998753298618, iteration: 228567
loss: 1.0270390510559082,grad_norm: 0.9999994861956211, iteration: 228568
loss: 1.2960118055343628,grad_norm: 0.9999998454509077, iteration: 228569
loss: 1.0333476066589355,grad_norm: 0.9999998495349094, iteration: 228570
loss: 1.0540648698806763,grad_norm: 0.9999996396225513, iteration: 228571
loss: 1.0738158226013184,grad_norm: 0.9999996885621483, iteration: 228572
loss: 1.0181318521499634,grad_norm: 0.9999998617051241, iteration: 228573
loss: 1.064202070236206,grad_norm: 0.9999997509964312, iteration: 228574
loss: 0.989818274974823,grad_norm: 0.827682736362153, iteration: 228575
loss: 1.0036455392837524,grad_norm: 0.9008182731364408, iteration: 228576
loss: 0.9949716329574585,grad_norm: 0.7733541867361, iteration: 228577
loss: 1.0818909406661987,grad_norm: 0.9999991461096028, iteration: 228578
loss: 1.0189502239227295,grad_norm: 0.9999992626300356, iteration: 228579
loss: 1.003462314605713,grad_norm: 0.9999992719096353, iteration: 228580
loss: 1.0104044675827026,grad_norm: 0.9171322699375399, iteration: 228581
loss: 1.0221991539001465,grad_norm: 0.9999998328598403, iteration: 228582
loss: 1.040764570236206,grad_norm: 0.9999995681350506, iteration: 228583
loss: 0.9772849082946777,grad_norm: 0.9415269978485926, iteration: 228584
loss: 1.0367178916931152,grad_norm: 0.9999994284949889, iteration: 228585
loss: 1.1003259420394897,grad_norm: 0.9999991790082784, iteration: 228586
loss: 1.0850045680999756,grad_norm: 0.9999996719773946, iteration: 228587
loss: 0.9941916465759277,grad_norm: 0.9999990362521265, iteration: 228588
loss: 1.0777982473373413,grad_norm: 0.9999991709755542, iteration: 228589
loss: 0.9938130974769592,grad_norm: 0.9999992326598408, iteration: 228590
loss: 1.059489369392395,grad_norm: 0.9999997908510433, iteration: 228591
loss: 0.9923460483551025,grad_norm: 0.9999989196329766, iteration: 228592
loss: 1.0812594890594482,grad_norm: 0.9999992804059415, iteration: 228593
loss: 1.0646450519561768,grad_norm: 0.9999995984461928, iteration: 228594
loss: 0.9753883481025696,grad_norm: 0.9999999149582287, iteration: 228595
loss: 1.0207680463790894,grad_norm: 0.9999997564592977, iteration: 228596
loss: 0.9788779616355896,grad_norm: 0.8132705145003611, iteration: 228597
loss: 1.001463532447815,grad_norm: 0.8621123125216633, iteration: 228598
loss: 1.0049424171447754,grad_norm: 0.999999911985507, iteration: 228599
loss: 0.9749734401702881,grad_norm: 0.9999995765353238, iteration: 228600
loss: 1.0184273719787598,grad_norm: 0.999999103671972, iteration: 228601
loss: 1.0120103359222412,grad_norm: 0.9999993933653208, iteration: 228602
loss: 1.020588755607605,grad_norm: 0.9999992483303217, iteration: 228603
loss: 0.9926409125328064,grad_norm: 0.9999991821589554, iteration: 228604
loss: 1.0218178033828735,grad_norm: 0.9999991236880235, iteration: 228605
loss: 1.0072405338287354,grad_norm: 0.9999995863863913, iteration: 228606
loss: 0.9775227904319763,grad_norm: 0.9999991695161033, iteration: 228607
loss: 0.9527139067649841,grad_norm: 0.9345206885394552, iteration: 228608
loss: 1.008678913116455,grad_norm: 0.9307496497982698, iteration: 228609
loss: 0.9639225602149963,grad_norm: 0.8919862936331614, iteration: 228610
loss: 1.0977410078048706,grad_norm: 0.9999998727105008, iteration: 228611
loss: 0.9695331454277039,grad_norm: 0.999999092298246, iteration: 228612
loss: 1.042944312095642,grad_norm: 0.9999990982258911, iteration: 228613
loss: 1.1330815553665161,grad_norm: 0.9999994404440717, iteration: 228614
loss: 1.0289307832717896,grad_norm: 0.893883750975522, iteration: 228615
loss: 1.046067476272583,grad_norm: 0.9999997148458357, iteration: 228616
loss: 1.0215145349502563,grad_norm: 0.9928079051589224, iteration: 228617
loss: 0.9790289998054504,grad_norm: 0.8983035058841623, iteration: 228618
loss: 1.1081055402755737,grad_norm: 1.0000000210948508, iteration: 228619
loss: 1.0046837329864502,grad_norm: 0.9437906813594852, iteration: 228620
loss: 1.0389076471328735,grad_norm: 0.9999995056648441, iteration: 228621
loss: 1.0436917543411255,grad_norm: 0.9999991067415089, iteration: 228622
loss: 0.9935644268989563,grad_norm: 0.9999991238337188, iteration: 228623
loss: 0.9898092746734619,grad_norm: 0.9999991132439754, iteration: 228624
loss: 0.9772833585739136,grad_norm: 0.9999997618062885, iteration: 228625
loss: 1.0271719694137573,grad_norm: 0.9999991010687411, iteration: 228626
loss: 1.0587284564971924,grad_norm: 0.9999991103926613, iteration: 228627
loss: 0.9983493685722351,grad_norm: 0.9999990968441611, iteration: 228628
loss: 0.9852534532546997,grad_norm: 0.9346964872276173, iteration: 228629
loss: 1.0559444427490234,grad_norm: 0.9999991252224465, iteration: 228630
loss: 1.04777991771698,grad_norm: 0.9999998751166994, iteration: 228631
loss: 0.993553876876831,grad_norm: 0.9078122843790802, iteration: 228632
loss: 1.0251368284225464,grad_norm: 0.9999990626178878, iteration: 228633
loss: 1.0142682790756226,grad_norm: 0.7527429262707995, iteration: 228634
loss: 1.0166370868682861,grad_norm: 0.8802057365176645, iteration: 228635
loss: 0.9850894212722778,grad_norm: 0.999999184583715, iteration: 228636
loss: 1.0203973054885864,grad_norm: 0.887450788207128, iteration: 228637
loss: 0.9833270907402039,grad_norm: 0.9650279458404055, iteration: 228638
loss: 1.028540015220642,grad_norm: 0.9866196428962605, iteration: 228639
loss: 1.0206323862075806,grad_norm: 0.9999992751784211, iteration: 228640
loss: 1.0042508840560913,grad_norm: 0.9999994657883163, iteration: 228641
loss: 0.9613837599754333,grad_norm: 0.9349961130522442, iteration: 228642
loss: 0.9751558303833008,grad_norm: 0.9999989944944313, iteration: 228643
loss: 1.0378355979919434,grad_norm: 0.9999990678023837, iteration: 228644
loss: 0.9603034257888794,grad_norm: 0.9999990453834197, iteration: 228645
loss: 1.0352810621261597,grad_norm: 0.9802851365328462, iteration: 228646
loss: 1.0206788778305054,grad_norm: 0.9554747071591259, iteration: 228647
loss: 1.055640459060669,grad_norm: 0.9999991376195616, iteration: 228648
loss: 1.030592441558838,grad_norm: 0.9999991482199407, iteration: 228649
loss: 1.1103273630142212,grad_norm: 0.9999991804178899, iteration: 228650
loss: 1.2778273820877075,grad_norm: 0.9999992591027129, iteration: 228651
loss: 0.9947037696838379,grad_norm: 0.8550965970643829, iteration: 228652
loss: 1.0541256666183472,grad_norm: 0.9999995455229562, iteration: 228653
loss: 0.974288821220398,grad_norm: 0.9642311335098543, iteration: 228654
loss: 1.0202423334121704,grad_norm: 0.9999998554022791, iteration: 228655
loss: 0.974807858467102,grad_norm: 0.9999990799667587, iteration: 228656
loss: 1.008802890777588,grad_norm: 0.9441787319747966, iteration: 228657
loss: 1.0353362560272217,grad_norm: 0.9999997787647802, iteration: 228658
loss: 1.0110379457473755,grad_norm: 0.9219182443749084, iteration: 228659
loss: 1.023646354675293,grad_norm: 0.9336362943746764, iteration: 228660
loss: 1.031545639038086,grad_norm: 0.9999992131651795, iteration: 228661
loss: 1.0232564210891724,grad_norm: 0.7785709668322027, iteration: 228662
loss: 0.9856730103492737,grad_norm: 0.9999992661794754, iteration: 228663
loss: 1.0081286430358887,grad_norm: 0.9999990771284205, iteration: 228664
loss: 0.9820605516433716,grad_norm: 0.9999991722688553, iteration: 228665
loss: 1.0315139293670654,grad_norm: 0.9999991036342648, iteration: 228666
loss: 0.9849503040313721,grad_norm: 0.871569925164216, iteration: 228667
loss: 1.0893054008483887,grad_norm: 0.999999206543917, iteration: 228668
loss: 0.9843210577964783,grad_norm: 0.8594177453221583, iteration: 228669
loss: 1.030991554260254,grad_norm: 0.9999992507860447, iteration: 228670
loss: 1.0239388942718506,grad_norm: 0.9999991213584111, iteration: 228671
loss: 0.961821436882019,grad_norm: 0.9999991699489261, iteration: 228672
loss: 1.1073423624038696,grad_norm: 0.9999992392503018, iteration: 228673
loss: 1.006933569908142,grad_norm: 0.9999992703526541, iteration: 228674
loss: 1.0348010063171387,grad_norm: 0.9999999034890203, iteration: 228675
loss: 1.035629391670227,grad_norm: 0.9999992225106563, iteration: 228676
loss: 1.0049680471420288,grad_norm: 0.9953011422614227, iteration: 228677
loss: 0.9804412722587585,grad_norm: 0.9999991062056821, iteration: 228678
loss: 0.9817415475845337,grad_norm: 0.9999995333023943, iteration: 228679
loss: 0.9435523152351379,grad_norm: 0.9999989777121329, iteration: 228680
loss: 0.9909555912017822,grad_norm: 0.9999992466606702, iteration: 228681
loss: 1.0367562770843506,grad_norm: 0.9999992772097595, iteration: 228682
loss: 0.9713285565376282,grad_norm: 0.9254987808378294, iteration: 228683
loss: 0.9978452920913696,grad_norm: 0.9855896556131722, iteration: 228684
loss: 1.0161107778549194,grad_norm: 0.9420235771376531, iteration: 228685
loss: 0.9899461269378662,grad_norm: 0.7335232596748797, iteration: 228686
loss: 1.0369206666946411,grad_norm: 0.9999996965909651, iteration: 228687
loss: 1.0254729986190796,grad_norm: 0.9999992171720903, iteration: 228688
loss: 1.0058964490890503,grad_norm: 0.9999990224544177, iteration: 228689
loss: 0.9748995900154114,grad_norm: 0.9999990640005032, iteration: 228690
loss: 0.9771885275840759,grad_norm: 0.839478622836694, iteration: 228691
loss: 1.001693606376648,grad_norm: 0.9999998094553745, iteration: 228692
loss: 1.0125964879989624,grad_norm: 0.9999991837421289, iteration: 228693
loss: 1.0199851989746094,grad_norm: 0.9999991865413171, iteration: 228694
loss: 0.9560683369636536,grad_norm: 0.9999989796044045, iteration: 228695
loss: 0.999455451965332,grad_norm: 0.831286813843797, iteration: 228696
loss: 0.9836795926094055,grad_norm: 0.9999990601679738, iteration: 228697
loss: 0.9983814358711243,grad_norm: 0.99999950140902, iteration: 228698
loss: 0.9731192588806152,grad_norm: 0.984970174930364, iteration: 228699
loss: 0.9922372102737427,grad_norm: 0.9999996892101639, iteration: 228700
loss: 1.0220012664794922,grad_norm: 0.8130930735830709, iteration: 228701
loss: 1.0867640972137451,grad_norm: 0.999999584285942, iteration: 228702
loss: 0.9877499341964722,grad_norm: 0.942393424249077, iteration: 228703
loss: 1.0398755073547363,grad_norm: 0.9999990464579167, iteration: 228704
loss: 1.14691162109375,grad_norm: 0.9999997507937284, iteration: 228705
loss: 0.9595816135406494,grad_norm: 0.9999990576837213, iteration: 228706
loss: 1.0126242637634277,grad_norm: 0.9999992136997093, iteration: 228707
loss: 0.9874074459075928,grad_norm: 0.7890064064419368, iteration: 228708
loss: 1.097642421722412,grad_norm: 0.9999998825868164, iteration: 228709
loss: 1.0330710411071777,grad_norm: 0.9999997795921021, iteration: 228710
loss: 1.0055283308029175,grad_norm: 0.9999994338541067, iteration: 228711
loss: 1.2111607789993286,grad_norm: 0.9999998883393528, iteration: 228712
loss: 1.0324825048446655,grad_norm: 0.9999999841503951, iteration: 228713
loss: 0.9832022190093994,grad_norm: 0.9999990731297228, iteration: 228714
loss: 1.1541383266448975,grad_norm: 0.9999999660365105, iteration: 228715
loss: 1.0018649101257324,grad_norm: 0.976189534002305, iteration: 228716
loss: 0.940569281578064,grad_norm: 0.8550896675198306, iteration: 228717
loss: 1.0108065605163574,grad_norm: 0.99999904587692, iteration: 228718
loss: 1.003522276878357,grad_norm: 0.9984387693956023, iteration: 228719
loss: 0.9720016717910767,grad_norm: 0.8482253074402961, iteration: 228720
loss: 1.1481178998947144,grad_norm: 0.9999990353976498, iteration: 228721
loss: 1.004946231842041,grad_norm: 0.9897939007009521, iteration: 228722
loss: 0.9574934244155884,grad_norm: 0.8987986145896654, iteration: 228723
loss: 1.026414155960083,grad_norm: 0.8876764949046897, iteration: 228724
loss: 1.0072299242019653,grad_norm: 0.8563318743151105, iteration: 228725
loss: 1.038324236869812,grad_norm: 0.9999991748693235, iteration: 228726
loss: 1.0467584133148193,grad_norm: 0.8173170927833191, iteration: 228727
loss: 1.029016375541687,grad_norm: 0.989283271448528, iteration: 228728
loss: 0.9782394170761108,grad_norm: 0.8015463377964678, iteration: 228729
loss: 0.9897611737251282,grad_norm: 0.9760183818117786, iteration: 228730
loss: 1.0540410280227661,grad_norm: 0.9999991614891449, iteration: 228731
loss: 1.0199635028839111,grad_norm: 0.9840763120001285, iteration: 228732
loss: 1.0500810146331787,grad_norm: 0.9999993900632898, iteration: 228733
loss: 1.0147526264190674,grad_norm: 0.8113827281245558, iteration: 228734
loss: 0.9953604340553284,grad_norm: 0.9837450018715039, iteration: 228735
loss: 1.0048387050628662,grad_norm: 0.9819776671531651, iteration: 228736
loss: 1.0234355926513672,grad_norm: 0.9109625808362856, iteration: 228737
loss: 0.994368851184845,grad_norm: 0.8692409336017141, iteration: 228738
loss: 1.0475138425827026,grad_norm: 0.9999992993678843, iteration: 228739
loss: 1.0171598196029663,grad_norm: 0.9999996767792892, iteration: 228740
loss: 1.0571507215499878,grad_norm: 0.9999991233469941, iteration: 228741
loss: 1.0219212770462036,grad_norm: 0.999999334693923, iteration: 228742
loss: 0.9564688205718994,grad_norm: 0.9906413324044493, iteration: 228743
loss: 0.9633044600486755,grad_norm: 0.8915364039742811, iteration: 228744
loss: 1.0288687944412231,grad_norm: 0.994058587635211, iteration: 228745
loss: 0.9990580677986145,grad_norm: 0.9999991183697142, iteration: 228746
loss: 1.02490234375,grad_norm: 0.90438675007167, iteration: 228747
loss: 1.0156645774841309,grad_norm: 0.8626001972122426, iteration: 228748
loss: 1.0086333751678467,grad_norm: 0.999999274762065, iteration: 228749
loss: 1.0839266777038574,grad_norm: 0.9999996688430048, iteration: 228750
loss: 1.0250065326690674,grad_norm: 0.99999906997276, iteration: 228751
loss: 0.9708641767501831,grad_norm: 0.9423433979634734, iteration: 228752
loss: 1.0095102787017822,grad_norm: 0.8954421611348052, iteration: 228753
loss: 1.0306676626205444,grad_norm: 0.7549798276355161, iteration: 228754
loss: 1.0319271087646484,grad_norm: 1.0000000261718753, iteration: 228755
loss: 0.9782534837722778,grad_norm: 0.9637336300265397, iteration: 228756
loss: 1.0630340576171875,grad_norm: 0.9999998508116448, iteration: 228757
loss: 1.0089809894561768,grad_norm: 0.9261949911987108, iteration: 228758
loss: 0.9860464334487915,grad_norm: 0.9653642545281242, iteration: 228759
loss: 1.0458383560180664,grad_norm: 0.9999990462086696, iteration: 228760
loss: 0.9859734773635864,grad_norm: 0.999999227254425, iteration: 228761
loss: 1.211824655532837,grad_norm: 0.9999998511285645, iteration: 228762
loss: 1.0084340572357178,grad_norm: 0.999999910743346, iteration: 228763
loss: 0.9912956953048706,grad_norm: 0.999999046994425, iteration: 228764
loss: 0.9677316546440125,grad_norm: 0.9453013193464559, iteration: 228765
loss: 0.9870647192001343,grad_norm: 0.7440573404689591, iteration: 228766
loss: 1.0688635110855103,grad_norm: 0.9999991096107455, iteration: 228767
loss: 1.0239423513412476,grad_norm: 0.8005493849528742, iteration: 228768
loss: 0.986807644367218,grad_norm: 0.9528524113159189, iteration: 228769
loss: 0.9988213777542114,grad_norm: 0.9999992445297292, iteration: 228770
loss: 1.0786442756652832,grad_norm: 0.9999997793908016, iteration: 228771
loss: 1.006020426750183,grad_norm: 0.999999136706437, iteration: 228772
loss: 1.0304975509643555,grad_norm: 0.9999996520180073, iteration: 228773
loss: 0.9857383966445923,grad_norm: 0.9190916576736093, iteration: 228774
loss: 1.0245698690414429,grad_norm: 0.9999997653098226, iteration: 228775
loss: 0.9783891439437866,grad_norm: 0.9489376956546214, iteration: 228776
loss: 1.0101299285888672,grad_norm: 0.8474882415318, iteration: 228777
loss: 0.986096978187561,grad_norm: 0.7803593208825615, iteration: 228778
loss: 1.0150514841079712,grad_norm: 0.9999990157361445, iteration: 228779
loss: 0.9702190160751343,grad_norm: 0.9012063035070436, iteration: 228780
loss: 0.9782763123512268,grad_norm: 0.8187523394723328, iteration: 228781
loss: 1.024265170097351,grad_norm: 0.9999991448191031, iteration: 228782
loss: 1.018988013267517,grad_norm: 0.953287289812922, iteration: 228783
loss: 0.9857217669487,grad_norm: 0.9999991816700844, iteration: 228784
loss: 1.0445219278335571,grad_norm: 0.9999995229085883, iteration: 228785
loss: 1.0719982385635376,grad_norm: 1.0000000195460474, iteration: 228786
loss: 1.0863162279129028,grad_norm: 0.9999993906818399, iteration: 228787
loss: 0.9610145688056946,grad_norm: 0.9999989736160051, iteration: 228788
loss: 0.9742394089698792,grad_norm: 0.8946255059916839, iteration: 228789
loss: 0.9852607250213623,grad_norm: 0.9999990430455586, iteration: 228790
loss: 1.0153663158416748,grad_norm: 0.9999998825129113, iteration: 228791
loss: 1.015741229057312,grad_norm: 0.8965769645423689, iteration: 228792
loss: 1.0136444568634033,grad_norm: 0.9999991098984229, iteration: 228793
loss: 1.0221806764602661,grad_norm: 0.8427860676239255, iteration: 228794
loss: 1.0239529609680176,grad_norm: 0.993172955252279, iteration: 228795
loss: 0.9946274161338806,grad_norm: 0.8427169203832456, iteration: 228796
loss: 1.0062954425811768,grad_norm: 0.9999994382890354, iteration: 228797
loss: 0.9889499545097351,grad_norm: 0.9722015639823379, iteration: 228798
loss: 0.9724848866462708,grad_norm: 0.9321022715414976, iteration: 228799
loss: 1.0380672216415405,grad_norm: 0.9999998702520789, iteration: 228800
loss: 0.9853646755218506,grad_norm: 0.9999995600088264, iteration: 228801
loss: 1.046627163887024,grad_norm: 0.9999997865518382, iteration: 228802
loss: 0.9698669910430908,grad_norm: 1.0000000086504157, iteration: 228803
loss: 1.0090668201446533,grad_norm: 0.9479267280723218, iteration: 228804
loss: 1.0220205783843994,grad_norm: 0.9999990188685633, iteration: 228805
loss: 1.0841952562332153,grad_norm: 0.9999998897201428, iteration: 228806
loss: 1.0332874059677124,grad_norm: 0.9111325271108079, iteration: 228807
loss: 1.005605936050415,grad_norm: 0.9703131990152517, iteration: 228808
loss: 1.0147260427474976,grad_norm: 0.7563682253693029, iteration: 228809
loss: 1.0398645401000977,grad_norm: 0.8612025112001418, iteration: 228810
loss: 0.9671468734741211,grad_norm: 0.9999989914098264, iteration: 228811
loss: 0.9615420699119568,grad_norm: 0.9999990181662557, iteration: 228812
loss: 1.0041898488998413,grad_norm: 0.9099632999185149, iteration: 228813
loss: 1.0978890657424927,grad_norm: 0.999999726855737, iteration: 228814
loss: 1.0301393270492554,grad_norm: 0.9604100548211313, iteration: 228815
loss: 1.140273094177246,grad_norm: 0.9999994535265353, iteration: 228816
loss: 0.9972186088562012,grad_norm: 0.8321169558067545, iteration: 228817
loss: 1.0057328939437866,grad_norm: 0.987616671024988, iteration: 228818
loss: 1.0396854877471924,grad_norm: 0.8393979148358949, iteration: 228819
loss: 0.9437639117240906,grad_norm: 0.9972744698662279, iteration: 228820
loss: 0.961077868938446,grad_norm: 0.9770231408619412, iteration: 228821
loss: 1.0197350978851318,grad_norm: 0.9999990418393734, iteration: 228822
loss: 1.0338982343673706,grad_norm: 0.9999990864070938, iteration: 228823
loss: 0.9680535793304443,grad_norm: 0.999999035034814, iteration: 228824
loss: 1.0171594619750977,grad_norm: 0.8364439930572164, iteration: 228825
loss: 0.9667620062828064,grad_norm: 0.9716493564273891, iteration: 228826
loss: 1.0274606943130493,grad_norm: 0.9999990177674947, iteration: 228827
loss: 1.0230151414871216,grad_norm: 0.8020115764564469, iteration: 228828
loss: 0.9749597311019897,grad_norm: 0.8152603914202008, iteration: 228829
loss: 1.0122594833374023,grad_norm: 0.9999991569711009, iteration: 228830
loss: 0.9847524166107178,grad_norm: 0.999998881219971, iteration: 228831
loss: 1.1282830238342285,grad_norm: 0.9999994248964719, iteration: 228832
loss: 0.9466476440429688,grad_norm: 0.8959844432574262, iteration: 228833
loss: 0.9979116320610046,grad_norm: 0.9686926621728202, iteration: 228834
loss: 1.0284950733184814,grad_norm: 0.9432857203857001, iteration: 228835
loss: 0.9690377712249756,grad_norm: 0.9603507065992676, iteration: 228836
loss: 1.012552261352539,grad_norm: 0.9999989815647213, iteration: 228837
loss: 1.0061265230178833,grad_norm: 0.9909106847772768, iteration: 228838
loss: 0.9958819150924683,grad_norm: 0.9999991191562377, iteration: 228839
loss: 0.9745184779167175,grad_norm: 0.9999993919811777, iteration: 228840
loss: 0.9986514449119568,grad_norm: 0.9999992534706288, iteration: 228841
loss: 1.0333940982818604,grad_norm: 0.9999992665202403, iteration: 228842
loss: 0.9791076183319092,grad_norm: 0.8501546012793698, iteration: 228843
loss: 1.0320415496826172,grad_norm: 0.9270099266975995, iteration: 228844
loss: 0.9836731553077698,grad_norm: 0.9999998047413053, iteration: 228845
loss: 0.9918134808540344,grad_norm: 0.7453321718178112, iteration: 228846
loss: 1.0019901990890503,grad_norm: 0.9100101254366754, iteration: 228847
loss: 1.0038930177688599,grad_norm: 0.9999989985289109, iteration: 228848
loss: 1.081652283668518,grad_norm: 0.9999991252596073, iteration: 228849
loss: 0.9797578454017639,grad_norm: 0.9804304176240278, iteration: 228850
loss: 1.0212641954421997,grad_norm: 0.9999990353291847, iteration: 228851
loss: 0.9898518323898315,grad_norm: 0.901443678269208, iteration: 228852
loss: 1.0104975700378418,grad_norm: 0.9995719694351215, iteration: 228853
loss: 1.0073506832122803,grad_norm: 0.9999991559515471, iteration: 228854
loss: 1.076655387878418,grad_norm: 0.9999990390812782, iteration: 228855
loss: 0.9802871346473694,grad_norm: 0.8993614151279805, iteration: 228856
loss: 0.9882651567459106,grad_norm: 0.8881976985088273, iteration: 228857
loss: 0.9800795912742615,grad_norm: 0.9301133780404689, iteration: 228858
loss: 1.0098559856414795,grad_norm: 0.9999991822242492, iteration: 228859
loss: 0.9857957363128662,grad_norm: 0.8087124331527249, iteration: 228860
loss: 1.0599340200424194,grad_norm: 0.9999995471054633, iteration: 228861
loss: 1.006413459777832,grad_norm: 0.840669929493108, iteration: 228862
loss: 1.0019210577011108,grad_norm: 0.9927794399067679, iteration: 228863
loss: 0.9680517315864563,grad_norm: 0.9122775398760886, iteration: 228864
loss: 0.9675866365432739,grad_norm: 0.9999990559984425, iteration: 228865
loss: 0.9601264595985413,grad_norm: 0.999999221878787, iteration: 228866
loss: 1.048416018486023,grad_norm: 0.999999218097902, iteration: 228867
loss: 0.9779689908027649,grad_norm: 0.7604686570496516, iteration: 228868
loss: 0.9743626713752747,grad_norm: 0.8115540178375497, iteration: 228869
loss: 0.9986078143119812,grad_norm: 0.9999990270685595, iteration: 228870
loss: 0.984396755695343,grad_norm: 0.9270603996861094, iteration: 228871
loss: 1.0076696872711182,grad_norm: 0.9999991754429758, iteration: 228872
loss: 1.1193190813064575,grad_norm: 0.9999993301255602, iteration: 228873
loss: 1.0046145915985107,grad_norm: 0.7632344085351678, iteration: 228874
loss: 1.0211433172225952,grad_norm: 0.9999989770507444, iteration: 228875
loss: 1.055345058441162,grad_norm: 0.9999990922074997, iteration: 228876
loss: 1.0173190832138062,grad_norm: 0.9492315840150078, iteration: 228877
loss: 0.9728044271469116,grad_norm: 0.9850722306433256, iteration: 228878
loss: 0.9599930644035339,grad_norm: 0.8917638933466938, iteration: 228879
loss: 1.0179976224899292,grad_norm: 0.9394536822385171, iteration: 228880
loss: 0.9729989171028137,grad_norm: 0.9999992774947732, iteration: 228881
loss: 0.9934490323066711,grad_norm: 0.7656264717463509, iteration: 228882
loss: 1.0725340843200684,grad_norm: 0.9999997563509126, iteration: 228883
loss: 1.0168761014938354,grad_norm: 0.9999994763137469, iteration: 228884
loss: 1.0186283588409424,grad_norm: 0.9913202918856004, iteration: 228885
loss: 0.9713724255561829,grad_norm: 0.9818448020995005, iteration: 228886
loss: 1.0071632862091064,grad_norm: 0.9999996349802611, iteration: 228887
loss: 1.0344494581222534,grad_norm: 0.999999681824281, iteration: 228888
loss: 1.0126006603240967,grad_norm: 0.9999998709585783, iteration: 228889
loss: 1.0339399576187134,grad_norm: 0.9999990527508132, iteration: 228890
loss: 0.9741633534431458,grad_norm: 0.8908618615796005, iteration: 228891
loss: 0.9858968257904053,grad_norm: 0.8921416544828403, iteration: 228892
loss: 0.996113121509552,grad_norm: 0.8844010655004829, iteration: 228893
loss: 1.031245470046997,grad_norm: 0.8437497939643924, iteration: 228894
loss: 1.107005000114441,grad_norm: 0.999999213759958, iteration: 228895
loss: 0.9707971811294556,grad_norm: 0.8827650204980171, iteration: 228896
loss: 1.0430799722671509,grad_norm: 0.9190218538184127, iteration: 228897
loss: 1.0085381269454956,grad_norm: 0.938660599545304, iteration: 228898
loss: 0.992059588432312,grad_norm: 0.9548026160479413, iteration: 228899
loss: 0.9941251873970032,grad_norm: 0.8999426857841891, iteration: 228900
loss: 0.9962309002876282,grad_norm: 0.9042071856937158, iteration: 228901
loss: 1.015028476715088,grad_norm: 0.9999991545554155, iteration: 228902
loss: 0.9643540382385254,grad_norm: 0.9529011654327442, iteration: 228903
loss: 1.1375030279159546,grad_norm: 0.9999998926783022, iteration: 228904
loss: 1.0210975408554077,grad_norm: 0.9999995572087806, iteration: 228905
loss: 1.0162924528121948,grad_norm: 0.9999990611884331, iteration: 228906
loss: 0.9865990281105042,grad_norm: 0.9999991157762249, iteration: 228907
loss: 1.0098191499710083,grad_norm: 0.9123958207107559, iteration: 228908
loss: 0.9955224394798279,grad_norm: 0.8984643940236356, iteration: 228909
loss: 1.0499240159988403,grad_norm: 0.8554397239187833, iteration: 228910
loss: 1.0099506378173828,grad_norm: 0.7397568876069888, iteration: 228911
loss: 0.9603338241577148,grad_norm: 0.9999990968357364, iteration: 228912
loss: 0.9822715520858765,grad_norm: 0.8704707401360938, iteration: 228913
loss: 1.0503536462783813,grad_norm: 0.9999990837966911, iteration: 228914
loss: 1.04401695728302,grad_norm: 0.9999993933973398, iteration: 228915
loss: 1.0088304281234741,grad_norm: 0.9684526969367095, iteration: 228916
loss: 1.0384838581085205,grad_norm: 0.9999993379196583, iteration: 228917
loss: 1.0368653535842896,grad_norm: 0.9999989383900192, iteration: 228918
loss: 0.9997738599777222,grad_norm: 0.8410851597410931, iteration: 228919
loss: 1.085050344467163,grad_norm: 0.9999995449398114, iteration: 228920
loss: 0.9905880093574524,grad_norm: 0.9999990268689234, iteration: 228921
loss: 0.9790637493133545,grad_norm: 0.8585847429671207, iteration: 228922
loss: 1.1593557596206665,grad_norm: 0.9999993869573246, iteration: 228923
loss: 1.0042695999145508,grad_norm: 0.99999905694592, iteration: 228924
loss: 1.0283275842666626,grad_norm: 0.9372481965154107, iteration: 228925
loss: 1.0090678930282593,grad_norm: 0.9999991747951091, iteration: 228926
loss: 0.9988248944282532,grad_norm: 0.9999991429828199, iteration: 228927
loss: 0.9815647006034851,grad_norm: 0.899109295140154, iteration: 228928
loss: 1.018134355545044,grad_norm: 0.8977286602652916, iteration: 228929
loss: 0.9888661503791809,grad_norm: 0.7144180890482628, iteration: 228930
loss: 1.0059298276901245,grad_norm: 0.8658092971478438, iteration: 228931
loss: 0.9719435572624207,grad_norm: 0.9915989651324192, iteration: 228932
loss: 1.1003414392471313,grad_norm: 0.7708794942409077, iteration: 228933
loss: 1.0470103025436401,grad_norm: 0.999999191409987, iteration: 228934
loss: 1.0109987258911133,grad_norm: 0.9701319381491053, iteration: 228935
loss: 1.0209603309631348,grad_norm: 0.9369244639908823, iteration: 228936
loss: 0.9880120754241943,grad_norm: 0.999999056491881, iteration: 228937
loss: 1.0372616052627563,grad_norm: 0.8687903483418449, iteration: 228938
loss: 1.0254201889038086,grad_norm: 0.9999991337455135, iteration: 228939
loss: 1.0241115093231201,grad_norm: 0.9783502423799577, iteration: 228940
loss: 1.0196627378463745,grad_norm: 0.8945378768515538, iteration: 228941
loss: 1.014958381652832,grad_norm: 0.9999991491715632, iteration: 228942
loss: 0.9644098281860352,grad_norm: 0.9788855508176754, iteration: 228943
loss: 1.0205848217010498,grad_norm: 0.8380568112001278, iteration: 228944
loss: 1.015642523765564,grad_norm: 0.9999991874207036, iteration: 228945
loss: 0.9889232516288757,grad_norm: 0.9002390976177653, iteration: 228946
loss: 0.9852648377418518,grad_norm: 0.9999994854739046, iteration: 228947
loss: 1.024572491645813,grad_norm: 0.8680993596758805, iteration: 228948
loss: 1.0026302337646484,grad_norm: 0.8670406968406311, iteration: 228949
loss: 0.979858934879303,grad_norm: 0.9999988936376996, iteration: 228950
loss: 1.0000486373901367,grad_norm: 0.9999990940659559, iteration: 228951
loss: 0.9869381189346313,grad_norm: 0.999999132604096, iteration: 228952
loss: 1.0359522104263306,grad_norm: 0.9999990833571049, iteration: 228953
loss: 0.9859657883644104,grad_norm: 0.8988935954312266, iteration: 228954
loss: 0.9866968393325806,grad_norm: 0.8830274509504327, iteration: 228955
loss: 0.999600887298584,grad_norm: 0.8390452866202152, iteration: 228956
loss: 0.9918130040168762,grad_norm: 0.9999990969864403, iteration: 228957
loss: 0.9727688431739807,grad_norm: 0.9208061334706833, iteration: 228958
loss: 0.9964879155158997,grad_norm: 0.9325593927089749, iteration: 228959
loss: 0.973858118057251,grad_norm: 0.8108314986431596, iteration: 228960
loss: 1.0078537464141846,grad_norm: 0.7773872230923192, iteration: 228961
loss: 1.0595448017120361,grad_norm: 0.9999991201136424, iteration: 228962
loss: 0.9845076203346252,grad_norm: 0.8969437836690057, iteration: 228963
loss: 0.9728782176971436,grad_norm: 0.9223630222371354, iteration: 228964
loss: 0.9999687075614929,grad_norm: 0.9020827764163908, iteration: 228965
loss: 1.0071806907653809,grad_norm: 0.9999997694943475, iteration: 228966
loss: 1.0057741403579712,grad_norm: 0.999999692284088, iteration: 228967
loss: 1.0037896633148193,grad_norm: 0.9999990869160897, iteration: 228968
loss: 1.0061177015304565,grad_norm: 0.8592366328991029, iteration: 228969
loss: 1.0148088932037354,grad_norm: 0.771399960482712, iteration: 228970
loss: 1.0042657852172852,grad_norm: 0.920348135763455, iteration: 228971
loss: 1.0147353410720825,grad_norm: 0.9999992258321575, iteration: 228972
loss: 0.9965497851371765,grad_norm: 0.872738465450275, iteration: 228973
loss: 1.165331482887268,grad_norm: 0.9999992852155345, iteration: 228974
loss: 0.9981706738471985,grad_norm: 0.9999991146471235, iteration: 228975
loss: 1.0034661293029785,grad_norm: 0.9688641408532399, iteration: 228976
loss: 1.0024805068969727,grad_norm: 0.9999990459919392, iteration: 228977
loss: 1.015011191368103,grad_norm: 0.999998996645049, iteration: 228978
loss: 1.0297496318817139,grad_norm: 0.7959962180889614, iteration: 228979
loss: 0.9701502323150635,grad_norm: 0.7926774813467115, iteration: 228980
loss: 1.0208348035812378,grad_norm: 0.999999057872486, iteration: 228981
loss: 0.994713306427002,grad_norm: 0.8909604371184839, iteration: 228982
loss: 1.050464153289795,grad_norm: 0.99999946382179, iteration: 228983
loss: 1.1095174551010132,grad_norm: 0.9999999546197348, iteration: 228984
loss: 1.0463241338729858,grad_norm: 0.8558211685139486, iteration: 228985
loss: 1.014542579650879,grad_norm: 0.9999991808286804, iteration: 228986
loss: 0.985459566116333,grad_norm: 0.6716225097461505, iteration: 228987
loss: 1.0637388229370117,grad_norm: 0.9999997739849663, iteration: 228988
loss: 0.9822535514831543,grad_norm: 0.9999989724165343, iteration: 228989
loss: 0.9458982944488525,grad_norm: 0.9895648132108207, iteration: 228990
loss: 1.0462299585342407,grad_norm: 0.9999992182124506, iteration: 228991
loss: 1.0013682842254639,grad_norm: 0.8630774098195315, iteration: 228992
loss: 0.973491907119751,grad_norm: 0.8969160068040376, iteration: 228993
loss: 1.0938249826431274,grad_norm: 0.9999993866387249, iteration: 228994
loss: 1.0361793041229248,grad_norm: 0.9889472851963907, iteration: 228995
loss: 1.034677267074585,grad_norm: 0.825281135473114, iteration: 228996
loss: 1.0167279243469238,grad_norm: 0.7990638973817454, iteration: 228997
loss: 1.014887809753418,grad_norm: 0.8743598272703028, iteration: 228998
loss: 1.013048768043518,grad_norm: 0.9999989673367279, iteration: 228999
loss: 0.996266782283783,grad_norm: 0.9826630842465247, iteration: 229000
loss: 0.9957985877990723,grad_norm: 0.9999992247659336, iteration: 229001
loss: 0.9897337555885315,grad_norm: 0.9073252678427794, iteration: 229002
loss: 0.9932807087898254,grad_norm: 0.7819097864758991, iteration: 229003
loss: 1.0276545286178589,grad_norm: 0.9999991156899734, iteration: 229004
loss: 1.0344359874725342,grad_norm: 0.7508234000491502, iteration: 229005
loss: 1.0067763328552246,grad_norm: 0.9423320302586987, iteration: 229006
loss: 1.0603572130203247,grad_norm: 0.9821537395157299, iteration: 229007
loss: 1.0105259418487549,grad_norm: 0.9668636308860373, iteration: 229008
loss: 1.0704773664474487,grad_norm: 0.9999992622865694, iteration: 229009
loss: 1.0451546907424927,grad_norm: 0.9999989710406405, iteration: 229010
loss: 0.9986360669136047,grad_norm: 0.9999992008089483, iteration: 229011
loss: 1.0133235454559326,grad_norm: 0.8767328745231102, iteration: 229012
loss: 1.0065869092941284,grad_norm: 0.8360166299091749, iteration: 229013
loss: 1.0166852474212646,grad_norm: 0.9261594511458766, iteration: 229014
loss: 0.994879424571991,grad_norm: 0.6942477537250673, iteration: 229015
loss: 1.0158799886703491,grad_norm: 0.9999992077112231, iteration: 229016
loss: 1.0252125263214111,grad_norm: 0.840783533414378, iteration: 229017
loss: 0.9735593199729919,grad_norm: 0.862596682170089, iteration: 229018
loss: 0.987915575504303,grad_norm: 0.9790387960678015, iteration: 229019
loss: 1.0310680866241455,grad_norm: 0.9999999917994142, iteration: 229020
loss: 0.9766398072242737,grad_norm: 0.9999991453597906, iteration: 229021
loss: 1.0021331310272217,grad_norm: 0.8147403628310685, iteration: 229022
loss: 1.0192692279815674,grad_norm: 0.9999988717086301, iteration: 229023
loss: 1.0264383554458618,grad_norm: 0.9999990726587318, iteration: 229024
loss: 1.0636628866195679,grad_norm: 0.9999991682185386, iteration: 229025
loss: 1.0042235851287842,grad_norm: 0.999999569820369, iteration: 229026
loss: 1.0197393894195557,grad_norm: 0.866861815552591, iteration: 229027
loss: 0.9966149926185608,grad_norm: 0.8327083366251112, iteration: 229028
loss: 1.0252249240875244,grad_norm: 0.9999992184763326, iteration: 229029
loss: 0.9975338578224182,grad_norm: 0.9999990117943753, iteration: 229030
loss: 1.0051250457763672,grad_norm: 0.7816845501883493, iteration: 229031
loss: 1.0190595388412476,grad_norm: 0.9999991879896998, iteration: 229032
loss: 1.0160166025161743,grad_norm: 0.9999990385994784, iteration: 229033
loss: 0.9765889644622803,grad_norm: 0.7934380417381929, iteration: 229034
loss: 0.9948005676269531,grad_norm: 0.9999994122179829, iteration: 229035
loss: 0.9954987168312073,grad_norm: 0.9621000312807739, iteration: 229036
loss: 0.957987904548645,grad_norm: 0.9999992338087897, iteration: 229037
loss: 0.9824172854423523,grad_norm: 0.8918201747139111, iteration: 229038
loss: 1.011500597000122,grad_norm: 0.9999990640537582, iteration: 229039
loss: 1.0203500986099243,grad_norm: 0.9845606334795027, iteration: 229040
loss: 1.0042195320129395,grad_norm: 0.9999990810503244, iteration: 229041
loss: 0.9587557315826416,grad_norm: 0.8220518035827115, iteration: 229042
loss: 0.9896315336227417,grad_norm: 0.9999991604551532, iteration: 229043
loss: 1.0206027030944824,grad_norm: 0.873004851131602, iteration: 229044
loss: 1.0311999320983887,grad_norm: 0.997379540656993, iteration: 229045
loss: 1.0344064235687256,grad_norm: 0.9999992326827447, iteration: 229046
loss: 1.009393334388733,grad_norm: 0.9394753531317791, iteration: 229047
loss: 0.9815607666969299,grad_norm: 0.934451636671276, iteration: 229048
loss: 0.9861186742782593,grad_norm: 0.9999992177867514, iteration: 229049
loss: 1.0237454175949097,grad_norm: 0.891483392777751, iteration: 229050
loss: 1.010617971420288,grad_norm: 0.9536602567938813, iteration: 229051
loss: 1.0072206258773804,grad_norm: 0.973917305638566, iteration: 229052
loss: 0.9924229979515076,grad_norm: 0.9999992267515743, iteration: 229053
loss: 1.034540057182312,grad_norm: 0.9999991344249779, iteration: 229054
loss: 1.0176421403884888,grad_norm: 0.9999991406992466, iteration: 229055
loss: 1.0120042562484741,grad_norm: 0.8576416462353394, iteration: 229056
loss: 0.929093599319458,grad_norm: 0.9999989813110853, iteration: 229057
loss: 0.9876034259796143,grad_norm: 0.9999991033077126, iteration: 229058
loss: 0.9912109971046448,grad_norm: 0.9291438182214579, iteration: 229059
loss: 1.0142370462417603,grad_norm: 0.8397649547294277, iteration: 229060
loss: 0.9995867013931274,grad_norm: 0.7675849281672712, iteration: 229061
loss: 0.9996399879455566,grad_norm: 0.9999991593919094, iteration: 229062
loss: 0.986645519733429,grad_norm: 0.8742834637579958, iteration: 229063
loss: 1.021200180053711,grad_norm: 0.843768750489074, iteration: 229064
loss: 1.0141572952270508,grad_norm: 0.999999144986718, iteration: 229065
loss: 1.0034594535827637,grad_norm: 0.9227300797029084, iteration: 229066
loss: 1.0084635019302368,grad_norm: 0.9999991184798018, iteration: 229067
loss: 1.0346695184707642,grad_norm: 0.9999990927041754, iteration: 229068
loss: 1.0278908014297485,grad_norm: 0.8615488449218752, iteration: 229069
loss: 0.9897512197494507,grad_norm: 0.97476969388897, iteration: 229070
loss: 1.0196657180786133,grad_norm: 0.9756486728494193, iteration: 229071
loss: 1.003671646118164,grad_norm: 0.9999990663857335, iteration: 229072
loss: 1.0400030612945557,grad_norm: 0.9999998516952645, iteration: 229073
loss: 0.9779409170150757,grad_norm: 0.9514870446557543, iteration: 229074
loss: 0.9942678213119507,grad_norm: 0.999999080132148, iteration: 229075
loss: 1.0131396055221558,grad_norm: 0.936228864633866, iteration: 229076
loss: 1.0026583671569824,grad_norm: 0.9536889301037765, iteration: 229077
loss: 1.1587971448898315,grad_norm: 0.999999686745045, iteration: 229078
loss: 1.0110818147659302,grad_norm: 0.9633632004968166, iteration: 229079
loss: 1.0278337001800537,grad_norm: 0.9999998532896238, iteration: 229080
loss: 1.0436527729034424,grad_norm: 0.999999001168351, iteration: 229081
loss: 1.0189427137374878,grad_norm: 0.9999997444199672, iteration: 229082
loss: 1.0074831247329712,grad_norm: 0.9248371277577271, iteration: 229083
loss: 1.104377031326294,grad_norm: 0.9999990859190853, iteration: 229084
loss: 0.9962331056594849,grad_norm: 0.9473257783521014, iteration: 229085
loss: 1.0161086320877075,grad_norm: 0.8311914211681328, iteration: 229086
loss: 1.0427743196487427,grad_norm: 0.9999994973664341, iteration: 229087
loss: 1.024579405784607,grad_norm: 0.9999993385995355, iteration: 229088
loss: 1.0290371179580688,grad_norm: 0.999999296082973, iteration: 229089
loss: 1.00684654712677,grad_norm: 0.9999994183243197, iteration: 229090
loss: 1.0015672445297241,grad_norm: 0.9206052091794725, iteration: 229091
loss: 0.9725719690322876,grad_norm: 0.9534903164636317, iteration: 229092
loss: 1.0281018018722534,grad_norm: 0.9999990841974923, iteration: 229093
loss: 0.9939019680023193,grad_norm: 0.9751434945348113, iteration: 229094
loss: 1.025045394897461,grad_norm: 0.9999993061000584, iteration: 229095
loss: 0.987069845199585,grad_norm: 0.9291067352801265, iteration: 229096
loss: 0.9859805107116699,grad_norm: 0.9738663904062916, iteration: 229097
loss: 0.9800310134887695,grad_norm: 0.9984509822359862, iteration: 229098
loss: 1.0170936584472656,grad_norm: 0.9999989116084008, iteration: 229099
loss: 0.9464958310127258,grad_norm: 0.9598038392562487, iteration: 229100
loss: 0.9958457350730896,grad_norm: 0.9999993711014363, iteration: 229101
loss: 1.0004128217697144,grad_norm: 0.9999991783982921, iteration: 229102
loss: 0.9442868232727051,grad_norm: 0.9304468776937611, iteration: 229103
loss: 1.0036566257476807,grad_norm: 0.9999991999866429, iteration: 229104
loss: 1.0072392225265503,grad_norm: 0.9090537462948586, iteration: 229105
loss: 0.9929749965667725,grad_norm: 0.9999990171914062, iteration: 229106
loss: 0.9968028664588928,grad_norm: 0.9905510861578943, iteration: 229107
loss: 1.038770318031311,grad_norm: 0.9999990079867618, iteration: 229108
loss: 1.0414437055587769,grad_norm: 0.9999992789100722, iteration: 229109
loss: 1.045047640800476,grad_norm: 0.898060704437653, iteration: 229110
loss: 0.9539745450019836,grad_norm: 0.726268580109203, iteration: 229111
loss: 0.978088915348053,grad_norm: 0.877374958241702, iteration: 229112
loss: 0.9922082424163818,grad_norm: 0.9919162363596217, iteration: 229113
loss: 0.9683197140693665,grad_norm: 0.8458653737021579, iteration: 229114
loss: 0.98920738697052,grad_norm: 0.9832753687095476, iteration: 229115
loss: 1.0158284902572632,grad_norm: 0.8820364013368867, iteration: 229116
loss: 1.026749849319458,grad_norm: 0.9999995986997311, iteration: 229117
loss: 1.0445326566696167,grad_norm: 0.988169867099549, iteration: 229118
loss: 1.035765290260315,grad_norm: 0.8164543737267133, iteration: 229119
loss: 1.0016090869903564,grad_norm: 0.9911702269176796, iteration: 229120
loss: 0.9924330711364746,grad_norm: 0.9431636116873711, iteration: 229121
loss: 1.034898281097412,grad_norm: 0.850277532997588, iteration: 229122
loss: 1.0194976329803467,grad_norm: 0.9999990073159898, iteration: 229123
loss: 1.0285409688949585,grad_norm: 0.999999206268173, iteration: 229124
loss: 1.0023963451385498,grad_norm: 0.8147805399537977, iteration: 229125
loss: 0.9789188504219055,grad_norm: 0.9134540267922531, iteration: 229126
loss: 0.9655335545539856,grad_norm: 0.9452783629047309, iteration: 229127
loss: 1.0011255741119385,grad_norm: 0.8113218576489069, iteration: 229128
loss: 1.0340913534164429,grad_norm: 0.9999991412641906, iteration: 229129
loss: 1.0213326215744019,grad_norm: 0.9999994656101515, iteration: 229130
loss: 0.9601038694381714,grad_norm: 0.9322649573788435, iteration: 229131
loss: 0.999985933303833,grad_norm: 0.8959110126770782, iteration: 229132
loss: 0.9578304886817932,grad_norm: 0.8935972331017792, iteration: 229133
loss: 1.0470407009124756,grad_norm: 0.9315126734898054, iteration: 229134
loss: 0.9493340253829956,grad_norm: 0.9105290252403742, iteration: 229135
loss: 1.040984869003296,grad_norm: 0.9882637932633274, iteration: 229136
loss: 1.0890270471572876,grad_norm: 0.9999994107860516, iteration: 229137
loss: 1.0286381244659424,grad_norm: 0.9999990872621344, iteration: 229138
loss: 0.9886537790298462,grad_norm: 0.9481820231244114, iteration: 229139
loss: 1.0215723514556885,grad_norm: 0.8043730138152461, iteration: 229140
loss: 0.9984213709831238,grad_norm: 0.9999992598397568, iteration: 229141
loss: 1.0030286312103271,grad_norm: 0.9999990808793222, iteration: 229142
loss: 1.0306799411773682,grad_norm: 0.9646463667476454, iteration: 229143
loss: 1.0145546197891235,grad_norm: 0.9043854371831129, iteration: 229144
loss: 1.016610026359558,grad_norm: 0.9999989994634914, iteration: 229145
loss: 0.9710625410079956,grad_norm: 0.9388630009248929, iteration: 229146
loss: 1.023451566696167,grad_norm: 0.99999911162317, iteration: 229147
loss: 0.9740070104598999,grad_norm: 0.9106144972352589, iteration: 229148
loss: 0.9956282377243042,grad_norm: 0.9535817835617549, iteration: 229149
loss: 0.9869892001152039,grad_norm: 0.9999992101828397, iteration: 229150
loss: 1.0147417783737183,grad_norm: 0.8385792573656422, iteration: 229151
loss: 0.9702935814857483,grad_norm: 0.8432818621209263, iteration: 229152
loss: 0.9645369648933411,grad_norm: 0.826975211581388, iteration: 229153
loss: 1.0476348400115967,grad_norm: 1.0000000156828428, iteration: 229154
loss: 0.9799688458442688,grad_norm: 0.9877164619657263, iteration: 229155
loss: 1.0098860263824463,grad_norm: 0.9999991111341037, iteration: 229156
loss: 0.9910529255867004,grad_norm: 0.9999991310474367, iteration: 229157
loss: 1.0065463781356812,grad_norm: 0.99999968070657, iteration: 229158
loss: 1.011156439781189,grad_norm: 0.9148805602269299, iteration: 229159
loss: 1.0113590955734253,grad_norm: 0.9999991426905278, iteration: 229160
loss: 1.0281014442443848,grad_norm: 0.7828611252311999, iteration: 229161
loss: 1.0062958002090454,grad_norm: 0.9831891211632928, iteration: 229162
loss: 1.0323312282562256,grad_norm: 0.9877088746240812, iteration: 229163
loss: 0.973767101764679,grad_norm: 0.9999991542317368, iteration: 229164
loss: 1.0605456829071045,grad_norm: 0.9999991709538969, iteration: 229165
loss: 1.0046659708023071,grad_norm: 0.9465668952646711, iteration: 229166
loss: 0.9735411405563354,grad_norm: 0.9999990993118659, iteration: 229167
loss: 1.0107861757278442,grad_norm: 0.8539965689498153, iteration: 229168
loss: 1.0313345193862915,grad_norm: 0.9999992987032614, iteration: 229169
loss: 1.0084092617034912,grad_norm: 0.9999998005592488, iteration: 229170
loss: 1.0516810417175293,grad_norm: 0.9999998306664751, iteration: 229171
loss: 0.9961830973625183,grad_norm: 0.770444143200755, iteration: 229172
loss: 1.0334275960922241,grad_norm: 0.9690878188356931, iteration: 229173
loss: 1.0225199460983276,grad_norm: 0.9113923587625559, iteration: 229174
loss: 1.014121413230896,grad_norm: 0.99999897089018, iteration: 229175
loss: 0.9940187931060791,grad_norm: 0.8783613616442957, iteration: 229176
loss: 1.038678526878357,grad_norm: 0.9996581350823477, iteration: 229177
loss: 1.0226932764053345,grad_norm: 0.8778474448224971, iteration: 229178
loss: 1.0305290222167969,grad_norm: 0.9033878507318883, iteration: 229179
loss: 1.0064120292663574,grad_norm: 0.8796474602043463, iteration: 229180
loss: 0.9940576553344727,grad_norm: 0.9953355012374231, iteration: 229181
loss: 1.0118919610977173,grad_norm: 0.9415627169536429, iteration: 229182
loss: 1.0399320125579834,grad_norm: 0.9038005641880111, iteration: 229183
loss: 1.0152676105499268,grad_norm: 0.7746955281904806, iteration: 229184
loss: 1.0454833507537842,grad_norm: 0.9129693228663274, iteration: 229185
loss: 1.053216576576233,grad_norm: 0.9663517645690182, iteration: 229186
loss: 0.9829694032669067,grad_norm: 0.8844189517876565, iteration: 229187
loss: 0.9835628867149353,grad_norm: 0.9310305038940667, iteration: 229188
loss: 1.0116159915924072,grad_norm: 0.8267895984348507, iteration: 229189
loss: 1.0007147789001465,grad_norm: 0.7762656815052078, iteration: 229190
loss: 1.0082693099975586,grad_norm: 0.9999990486923246, iteration: 229191
loss: 1.0179831981658936,grad_norm: 0.8630923000945732, iteration: 229192
loss: 0.9999720454216003,grad_norm: 0.9133600774166919, iteration: 229193
loss: 0.985057532787323,grad_norm: 0.8985522601505738, iteration: 229194
loss: 1.1065046787261963,grad_norm: 0.9999999375962405, iteration: 229195
loss: 1.0220165252685547,grad_norm: 0.8222430241491617, iteration: 229196
loss: 1.004658818244934,grad_norm: 0.9069418646292858, iteration: 229197
loss: 1.0290268659591675,grad_norm: 0.9999990443119705, iteration: 229198
loss: 1.020067811012268,grad_norm: 0.9999990275635151, iteration: 229199
loss: 1.0012383460998535,grad_norm: 0.9541271314810498, iteration: 229200
loss: 1.0045697689056396,grad_norm: 0.9475329568297403, iteration: 229201
loss: 1.035014033317566,grad_norm: 0.9999995308657487, iteration: 229202
loss: 0.994601309299469,grad_norm: 0.9999992080361227, iteration: 229203
loss: 1.0732632875442505,grad_norm: 0.9999999110814481, iteration: 229204
loss: 0.9665517210960388,grad_norm: 0.9175308085246314, iteration: 229205
loss: 1.0448895692825317,grad_norm: 0.972883112086828, iteration: 229206
loss: 1.0127962827682495,grad_norm: 0.9035644285569422, iteration: 229207
loss: 1.0331826210021973,grad_norm: 0.9181413260204282, iteration: 229208
loss: 1.0167102813720703,grad_norm: 0.8877377123794893, iteration: 229209
loss: 0.9740680456161499,grad_norm: 0.9795250204774009, iteration: 229210
loss: 0.962677001953125,grad_norm: 0.8219186856751477, iteration: 229211
loss: 1.0078258514404297,grad_norm: 0.999999123616291, iteration: 229212
loss: 0.9991207718849182,grad_norm: 0.9451277655686027, iteration: 229213
loss: 0.9968352317810059,grad_norm: 0.8770244390340061, iteration: 229214
loss: 1.0549925565719604,grad_norm: 0.9899604365036827, iteration: 229215
loss: 1.0023200511932373,grad_norm: 0.8658710586367282, iteration: 229216
loss: 0.971473753452301,grad_norm: 0.9493445258570125, iteration: 229217
loss: 1.002758264541626,grad_norm: 0.8631223699707248, iteration: 229218
loss: 1.0013539791107178,grad_norm: 0.9224058576774419, iteration: 229219
loss: 0.9955825805664062,grad_norm: 0.9175110796861432, iteration: 229220
loss: 0.9996070265769958,grad_norm: 0.940948350974383, iteration: 229221
loss: 1.0012032985687256,grad_norm: 0.9635617358358095, iteration: 229222
loss: 1.044022798538208,grad_norm: 0.9823844191665899, iteration: 229223
loss: 0.9963017106056213,grad_norm: 0.9999995650429113, iteration: 229224
loss: 0.9752280116081238,grad_norm: 0.8519758610982232, iteration: 229225
loss: 1.022451639175415,grad_norm: 0.9999996471659467, iteration: 229226
loss: 0.9950976371765137,grad_norm: 0.9999993243162801, iteration: 229227
loss: 1.0212695598602295,grad_norm: 0.9338305246215775, iteration: 229228
loss: 1.00277841091156,grad_norm: 0.8669289820141086, iteration: 229229
loss: 1.011552095413208,grad_norm: 0.9450064219020149, iteration: 229230
loss: 0.9760564565658569,grad_norm: 0.9178627788340905, iteration: 229231
loss: 1.0334397554397583,grad_norm: 0.923967902933336, iteration: 229232
loss: 1.0740278959274292,grad_norm: 0.9999991503526998, iteration: 229233
loss: 1.0073306560516357,grad_norm: 0.8514801249352937, iteration: 229234
loss: 0.9753609895706177,grad_norm: 0.8738029910461642, iteration: 229235
loss: 0.9742780923843384,grad_norm: 0.9332960943141884, iteration: 229236
loss: 1.0100555419921875,grad_norm: 0.9999989881306293, iteration: 229237
loss: 0.9734299182891846,grad_norm: 0.8998988611553751, iteration: 229238
loss: 0.9865905046463013,grad_norm: 0.8542461942271035, iteration: 229239
loss: 0.9854509234428406,grad_norm: 0.9457374064266998, iteration: 229240
loss: 1.0043548345565796,grad_norm: 0.9233792946213223, iteration: 229241
loss: 0.9926149845123291,grad_norm: 0.9571020994424952, iteration: 229242
loss: 1.032502293586731,grad_norm: 0.8216258603153835, iteration: 229243
loss: 0.9740831255912781,grad_norm: 0.711944172235859, iteration: 229244
loss: 1.050355076789856,grad_norm: 0.9999994469066692, iteration: 229245
loss: 0.9835953116416931,grad_norm: 0.9999992284314618, iteration: 229246
loss: 1.0082746744155884,grad_norm: 0.9484400807169645, iteration: 229247
loss: 0.9739178419113159,grad_norm: 0.8789698808385961, iteration: 229248
loss: 1.00739324092865,grad_norm: 0.8054845341669633, iteration: 229249
loss: 0.9659329652786255,grad_norm: 0.9278231091017627, iteration: 229250
loss: 1.0867283344268799,grad_norm: 1.0000000466356294, iteration: 229251
loss: 0.9490313529968262,grad_norm: 0.9999989373426438, iteration: 229252
loss: 1.079101800918579,grad_norm: 0.9999993025765463, iteration: 229253
loss: 1.0200070142745972,grad_norm: 0.999999662674751, iteration: 229254
loss: 0.9877291917800903,grad_norm: 0.9645884038524274, iteration: 229255
loss: 0.9994826316833496,grad_norm: 0.9999992998415943, iteration: 229256
loss: 1.0118745565414429,grad_norm: 0.7872058125143352, iteration: 229257
loss: 0.974502444267273,grad_norm: 0.972814322352811, iteration: 229258
loss: 1.0070114135742188,grad_norm: 0.9999995587097037, iteration: 229259
loss: 1.0048162937164307,grad_norm: 0.9082633632312684, iteration: 229260
loss: 0.9764868021011353,grad_norm: 0.9178250128527538, iteration: 229261
loss: 1.0001904964447021,grad_norm: 0.9999991610791583, iteration: 229262
loss: 1.0140007734298706,grad_norm: 0.9224633397031654, iteration: 229263
loss: 1.0197099447250366,grad_norm: 0.9316415353222224, iteration: 229264
loss: 1.014125943183899,grad_norm: 0.8246291692816667, iteration: 229265
loss: 0.9910436868667603,grad_norm: 0.8324692136008794, iteration: 229266
loss: 1.0266361236572266,grad_norm: 0.9999991920225845, iteration: 229267
loss: 1.0061819553375244,grad_norm: 0.9999992084005984, iteration: 229268
loss: 0.9939162135124207,grad_norm: 0.8014697045770344, iteration: 229269
loss: 1.0024112462997437,grad_norm: 0.9987865693568058, iteration: 229270
loss: 1.0360661745071411,grad_norm: 0.8261240598780317, iteration: 229271
loss: 1.0173906087875366,grad_norm: 0.854610168966931, iteration: 229272
loss: 0.9516899585723877,grad_norm: 0.8656241157784796, iteration: 229273
loss: 0.9860504865646362,grad_norm: 0.8532458439522439, iteration: 229274
loss: 1.0419058799743652,grad_norm: 0.9409333418973213, iteration: 229275
loss: 0.9678886532783508,grad_norm: 0.9999991332462005, iteration: 229276
loss: 1.0097761154174805,grad_norm: 0.8958344839434668, iteration: 229277
loss: 0.9859990477561951,grad_norm: 0.9999998415871177, iteration: 229278
loss: 0.9881687164306641,grad_norm: 0.9999991187704098, iteration: 229279
loss: 0.9743481278419495,grad_norm: 0.9999989685629552, iteration: 229280
loss: 0.9770685434341431,grad_norm: 0.9999992468016754, iteration: 229281
loss: 1.008589744567871,grad_norm: 0.9631327412710766, iteration: 229282
loss: 1.0094941854476929,grad_norm: 0.7589854791466667, iteration: 229283
loss: 1.0187017917633057,grad_norm: 0.9999996732871195, iteration: 229284
loss: 0.9663785696029663,grad_norm: 0.9886147384803831, iteration: 229285
loss: 0.9885443449020386,grad_norm: 0.9999991422920657, iteration: 229286
loss: 1.0513691902160645,grad_norm: 0.9656366820875393, iteration: 229287
loss: 0.9937894940376282,grad_norm: 0.9999991128564575, iteration: 229288
loss: 0.9876850247383118,grad_norm: 0.9387667033562297, iteration: 229289
loss: 1.010318636894226,grad_norm: 0.9720191597582986, iteration: 229290
loss: 1.0033509731292725,grad_norm: 0.9006801420106032, iteration: 229291
loss: 0.9962747097015381,grad_norm: 0.8649949940314142, iteration: 229292
loss: 1.0010970830917358,grad_norm: 0.9999991630133496, iteration: 229293
loss: 0.9875907301902771,grad_norm: 0.7704574282926343, iteration: 229294
loss: 1.004197359085083,grad_norm: 0.9999990917776443, iteration: 229295
loss: 0.9913448095321655,grad_norm: 0.9999992006479325, iteration: 229296
loss: 0.989638090133667,grad_norm: 0.9999992233630413, iteration: 229297
loss: 0.9587211012840271,grad_norm: 0.8777472419302896, iteration: 229298
loss: 0.9810778498649597,grad_norm: 0.7727300978272795, iteration: 229299
loss: 1.0122041702270508,grad_norm: 0.9999989941097921, iteration: 229300
loss: 1.0228828191757202,grad_norm: 0.9869942337761717, iteration: 229301
loss: 0.9987988471984863,grad_norm: 0.9999991926251294, iteration: 229302
loss: 0.9777976870536804,grad_norm: 0.9982996881015626, iteration: 229303
loss: 0.9831389784812927,grad_norm: 0.8601176940183222, iteration: 229304
loss: 1.0118509531021118,grad_norm: 0.999999015863259, iteration: 229305
loss: 0.9748169779777527,grad_norm: 0.7872797689781641, iteration: 229306
loss: 1.036399006843567,grad_norm: 0.8601774048408343, iteration: 229307
loss: 0.98101806640625,grad_norm: 0.8676285663277308, iteration: 229308
loss: 0.9931246042251587,grad_norm: 0.915590104368041, iteration: 229309
loss: 1.0281774997711182,grad_norm: 0.9617703723339555, iteration: 229310
loss: 0.9820619225502014,grad_norm: 0.9999990189841479, iteration: 229311
loss: 0.9768293499946594,grad_norm: 0.857091889362919, iteration: 229312
loss: 0.9954074025154114,grad_norm: 0.9921015596523016, iteration: 229313
loss: 0.9685624241828918,grad_norm: 0.8567449919111759, iteration: 229314
loss: 0.987189531326294,grad_norm: 0.8980090446947442, iteration: 229315
loss: 0.982047438621521,grad_norm: 0.9999990311509777, iteration: 229316
loss: 1.011581301689148,grad_norm: 0.9999992391031316, iteration: 229317
loss: 1.001201868057251,grad_norm: 0.9393187441446935, iteration: 229318
loss: 0.9959955811500549,grad_norm: 0.9276970366354937, iteration: 229319
loss: 1.0134824514389038,grad_norm: 0.909850367868037, iteration: 229320
loss: 1.0206350088119507,grad_norm: 0.8105042571285415, iteration: 229321
loss: 1.0156989097595215,grad_norm: 0.9521392517122762, iteration: 229322
loss: 0.9893624186515808,grad_norm: 0.8342340975194009, iteration: 229323
loss: 1.0013818740844727,grad_norm: 0.949857295905818, iteration: 229324
loss: 1.0022320747375488,grad_norm: 0.9750439312177757, iteration: 229325
loss: 1.02155339717865,grad_norm: 0.9999990884433578, iteration: 229326
loss: 0.9925562739372253,grad_norm: 0.999998955979582, iteration: 229327
loss: 1.0145251750946045,grad_norm: 0.8508886956048183, iteration: 229328
loss: 0.9793306589126587,grad_norm: 0.9564144458061177, iteration: 229329
loss: 0.9845682382583618,grad_norm: 0.9699933261349764, iteration: 229330
loss: 0.9927467703819275,grad_norm: 0.9999990795095719, iteration: 229331
loss: 0.9838550090789795,grad_norm: 0.9999990870360999, iteration: 229332
loss: 0.970617413520813,grad_norm: 0.8897397918748674, iteration: 229333
loss: 1.0058119297027588,grad_norm: 0.9999991791318509, iteration: 229334
loss: 1.012220859527588,grad_norm: 0.7854972025374395, iteration: 229335
loss: 0.9556637406349182,grad_norm: 0.8634766517065803, iteration: 229336
loss: 1.040724754333496,grad_norm: 0.8970315862116582, iteration: 229337
loss: 0.9934791326522827,grad_norm: 0.8591115296857547, iteration: 229338
loss: 0.9823207259178162,grad_norm: 0.852759168073008, iteration: 229339
loss: 0.9964127540588379,grad_norm: 0.8928187008348598, iteration: 229340
loss: 1.0268429517745972,grad_norm: 0.7681994033286019, iteration: 229341
loss: 0.9874974489212036,grad_norm: 0.9760663168718116, iteration: 229342
loss: 1.0472077131271362,grad_norm: 0.9999997406466505, iteration: 229343
loss: 0.9923070073127747,grad_norm: 0.922396397068171, iteration: 229344
loss: 1.035212755203247,grad_norm: 0.8713931966983002, iteration: 229345
loss: 1.0010045766830444,grad_norm: 0.9214496956463548, iteration: 229346
loss: 0.9958653450012207,grad_norm: 0.8661808213831784, iteration: 229347
loss: 1.077341914176941,grad_norm: 0.847987731180538, iteration: 229348
loss: 0.9964858889579773,grad_norm: 0.9999994406513767, iteration: 229349
loss: 0.9598450660705566,grad_norm: 0.86084985073086, iteration: 229350
loss: 0.9976745843887329,grad_norm: 0.9073667085914764, iteration: 229351
loss: 1.024680495262146,grad_norm: 0.8675994319094186, iteration: 229352
loss: 0.991035521030426,grad_norm: 0.9799295232073031, iteration: 229353
loss: 1.0557749271392822,grad_norm: 0.8522841933973049, iteration: 229354
loss: 1.0579506158828735,grad_norm: 0.9950659927899249, iteration: 229355
loss: 0.9987612366676331,grad_norm: 0.912342974915187, iteration: 229356
loss: 0.9998018145561218,grad_norm: 0.9774512904970958, iteration: 229357
loss: 0.9949206709861755,grad_norm: 0.8680576759727033, iteration: 229358
loss: 1.0752978324890137,grad_norm: 0.9999991095456874, iteration: 229359
loss: 1.0758177042007446,grad_norm: 0.9999990277650844, iteration: 229360
loss: 1.0041851997375488,grad_norm: 0.8795720239285275, iteration: 229361
loss: 0.9877975583076477,grad_norm: 0.8701873484038154, iteration: 229362
loss: 0.9652491211891174,grad_norm: 0.9999992515458951, iteration: 229363
loss: 1.1219993829727173,grad_norm: 1.000000082221003, iteration: 229364
loss: 1.0096874237060547,grad_norm: 0.8546304058893137, iteration: 229365
loss: 1.127938151359558,grad_norm: 0.9999996608983173, iteration: 229366
loss: 1.2777208089828491,grad_norm: 0.9999998832607916, iteration: 229367
loss: 1.092072606086731,grad_norm: 0.9999996554483959, iteration: 229368
loss: 1.0125007629394531,grad_norm: 0.7582191454697667, iteration: 229369
loss: 0.9802933931350708,grad_norm: 0.9999991421734438, iteration: 229370
loss: 0.9603937268257141,grad_norm: 0.8934232054278295, iteration: 229371
loss: 1.0167193412780762,grad_norm: 0.9999991039718455, iteration: 229372
loss: 1.0369106531143188,grad_norm: 0.9999995713403115, iteration: 229373
loss: 1.0164227485656738,grad_norm: 1.0000000043963093, iteration: 229374
loss: 0.9967143535614014,grad_norm: 0.7111909456535089, iteration: 229375
loss: 1.0188993215560913,grad_norm: 0.7968896353453133, iteration: 229376
loss: 1.0149824619293213,grad_norm: 0.9999990551150694, iteration: 229377
loss: 0.9889750480651855,grad_norm: 0.9999991500870969, iteration: 229378
loss: 1.0138206481933594,grad_norm: 0.9999993355931692, iteration: 229379
loss: 0.9958283305168152,grad_norm: 0.8695121676054955, iteration: 229380
loss: 1.0131938457489014,grad_norm: 0.9560248069196848, iteration: 229381
loss: 1.043920636177063,grad_norm: 0.9114241768827469, iteration: 229382
loss: 0.9952497482299805,grad_norm: 0.8653063828290785, iteration: 229383
loss: 0.9796964526176453,grad_norm: 0.9050400552702904, iteration: 229384
loss: 0.9913592338562012,grad_norm: 0.7239319110850524, iteration: 229385
loss: 0.9615378975868225,grad_norm: 0.9999993011490222, iteration: 229386
loss: 0.9778760671615601,grad_norm: 0.9187156589758212, iteration: 229387
loss: 0.9957476854324341,grad_norm: 0.9942223333084188, iteration: 229388
loss: 0.9874136447906494,grad_norm: 0.9097248150883985, iteration: 229389
loss: 1.021066427230835,grad_norm: 0.9207192923827812, iteration: 229390
loss: 0.9529958963394165,grad_norm: 0.862393513940274, iteration: 229391
loss: 0.9988272786140442,grad_norm: 0.8051140477637049, iteration: 229392
loss: 1.018231749534607,grad_norm: 0.9999990357840876, iteration: 229393
loss: 0.9984145760536194,grad_norm: 0.8187013435705081, iteration: 229394
loss: 0.9575029611587524,grad_norm: 0.8645158738019696, iteration: 229395
loss: 0.9577723145484924,grad_norm: 0.9999990591351552, iteration: 229396
loss: 1.0067205429077148,grad_norm: 0.9999989725979459, iteration: 229397
loss: 1.0408869981765747,grad_norm: 0.9999997456936465, iteration: 229398
loss: 0.9889763593673706,grad_norm: 0.8798197114880516, iteration: 229399
loss: 0.9814620614051819,grad_norm: 0.9556173177544406, iteration: 229400
loss: 0.994519054889679,grad_norm: 0.9789618340545866, iteration: 229401
loss: 1.0192265510559082,grad_norm: 0.9999991014286913, iteration: 229402
loss: 0.99686598777771,grad_norm: 0.9999990031504847, iteration: 229403
loss: 1.02028489112854,grad_norm: 0.9740596923789782, iteration: 229404
loss: 1.0214996337890625,grad_norm: 0.9719539547341652, iteration: 229405
loss: 0.9895276427268982,grad_norm: 0.9999990993772616, iteration: 229406
loss: 1.033231258392334,grad_norm: 0.8885420974158292, iteration: 229407
loss: 1.0003329515457153,grad_norm: 0.9999991030494679, iteration: 229408
loss: 0.9855818748474121,grad_norm: 0.896611709741248, iteration: 229409
loss: 0.9704869985580444,grad_norm: 0.9701346430105215, iteration: 229410
loss: 1.0032849311828613,grad_norm: 0.9999990572501789, iteration: 229411
loss: 1.0269434452056885,grad_norm: 0.838700239424981, iteration: 229412
loss: 0.9788657426834106,grad_norm: 0.9999991272223943, iteration: 229413
loss: 1.000414252281189,grad_norm: 0.7914380557664673, iteration: 229414
loss: 1.0142858028411865,grad_norm: 0.8967890994202156, iteration: 229415
loss: 1.0410748720169067,grad_norm: 0.999999096870524, iteration: 229416
loss: 1.052128791809082,grad_norm: 0.9999998856221763, iteration: 229417
loss: 1.004111886024475,grad_norm: 0.9027188880868943, iteration: 229418
loss: 1.0650554895401,grad_norm: 0.9703642783770179, iteration: 229419
loss: 1.008685827255249,grad_norm: 0.9999991277936379, iteration: 229420
loss: 0.9840450286865234,grad_norm: 0.8515254640896134, iteration: 229421
loss: 0.9836345911026001,grad_norm: 0.9707094501272082, iteration: 229422
loss: 1.039574384689331,grad_norm: 0.8733966593268379, iteration: 229423
loss: 1.0847116708755493,grad_norm: 0.9578372786525189, iteration: 229424
loss: 0.965952455997467,grad_norm: 0.8610326474615143, iteration: 229425
loss: 0.9771386981010437,grad_norm: 0.9999991203337515, iteration: 229426
loss: 0.9565799236297607,grad_norm: 0.8676156251121121, iteration: 229427
loss: 1.035561442375183,grad_norm: 0.841157661105386, iteration: 229428
loss: 0.9974403381347656,grad_norm: 0.9639417500496755, iteration: 229429
loss: 1.0619215965270996,grad_norm: 0.8805898938368677, iteration: 229430
loss: 0.9851712584495544,grad_norm: 0.9999992460418236, iteration: 229431
loss: 1.006136178970337,grad_norm: 0.999999127469264, iteration: 229432
loss: 0.9738814830780029,grad_norm: 0.9999992601544366, iteration: 229433
loss: 1.0192797183990479,grad_norm: 0.8599299484156708, iteration: 229434
loss: 1.0149171352386475,grad_norm: 0.9999990463303169, iteration: 229435
loss: 1.0253483057022095,grad_norm: 0.999999808890303, iteration: 229436
loss: 0.9913876056671143,grad_norm: 0.9999999655219334, iteration: 229437
loss: 0.9890424013137817,grad_norm: 0.8038172174979483, iteration: 229438
loss: 0.9735358357429504,grad_norm: 0.9077706289306005, iteration: 229439
loss: 0.9841434359550476,grad_norm: 0.9816937971363944, iteration: 229440
loss: 0.982898473739624,grad_norm: 0.8799166314604396, iteration: 229441
loss: 1.0149952173233032,grad_norm: 0.9999991380239273, iteration: 229442
loss: 0.9820499420166016,grad_norm: 0.8979223167275231, iteration: 229443
loss: 1.0297235250473022,grad_norm: 0.9999992440709127, iteration: 229444
loss: 0.9947622418403625,grad_norm: 0.8637929135200002, iteration: 229445
loss: 0.976622462272644,grad_norm: 0.9022003104180822, iteration: 229446
loss: 0.9869844317436218,grad_norm: 0.7886231778090494, iteration: 229447
loss: 1.0290497541427612,grad_norm: 0.7908442197376346, iteration: 229448
loss: 0.969217836856842,grad_norm: 0.9999990229259018, iteration: 229449
loss: 1.0180491209030151,grad_norm: 0.8751838241519276, iteration: 229450
loss: 1.0157159566879272,grad_norm: 0.9999990937811304, iteration: 229451
loss: 1.059086799621582,grad_norm: 0.9999997100741662, iteration: 229452
loss: 1.0194872617721558,grad_norm: 0.9999991424648504, iteration: 229453
loss: 1.0140094757080078,grad_norm: 0.999999162274799, iteration: 229454
loss: 1.00400972366333,grad_norm: 0.9999992141519504, iteration: 229455
loss: 1.0234442949295044,grad_norm: 0.999999020982637, iteration: 229456
loss: 0.9814243316650391,grad_norm: 0.9248380056312637, iteration: 229457
loss: 0.984318196773529,grad_norm: 0.862793711863951, iteration: 229458
loss: 1.0081462860107422,grad_norm: 0.9999989682330024, iteration: 229459
loss: 0.994914710521698,grad_norm: 0.8650782143609878, iteration: 229460
loss: 1.0219897031784058,grad_norm: 0.8276110917349074, iteration: 229461
loss: 1.0228098630905151,grad_norm: 0.8669054552033736, iteration: 229462
loss: 0.9658413529396057,grad_norm: 0.8797282867346294, iteration: 229463
loss: 0.9915997385978699,grad_norm: 0.8982492617777769, iteration: 229464
loss: 0.9780615568161011,grad_norm: 0.9084869247818469, iteration: 229465
loss: 0.9930300116539001,grad_norm: 0.898707931679387, iteration: 229466
loss: 1.031803011894226,grad_norm: 0.9766425546533949, iteration: 229467
loss: 1.001397728919983,grad_norm: 0.8401122503336399, iteration: 229468
loss: 1.0221257209777832,grad_norm: 0.9337950672969842, iteration: 229469
loss: 0.9948238730430603,grad_norm: 0.8567851765903147, iteration: 229470
loss: 1.0172638893127441,grad_norm: 0.9999991318846512, iteration: 229471
loss: 0.9960830211639404,grad_norm: 0.999999101248525, iteration: 229472
loss: 0.9851089119911194,grad_norm: 0.9751193345358565, iteration: 229473
loss: 1.0346447229385376,grad_norm: 0.993832794896446, iteration: 229474
loss: 1.0128495693206787,grad_norm: 0.9868186803964654, iteration: 229475
loss: 0.9986263513565063,grad_norm: 0.9999990402489576, iteration: 229476
loss: 1.0136826038360596,grad_norm: 0.9999991007038145, iteration: 229477
loss: 1.0023728609085083,grad_norm: 0.9857021352145722, iteration: 229478
loss: 0.9646823406219482,grad_norm: 0.9932624927206438, iteration: 229479
loss: 0.9826196432113647,grad_norm: 0.9999990804752623, iteration: 229480
loss: 0.9741307497024536,grad_norm: 0.9452245482158225, iteration: 229481
loss: 0.9817779660224915,grad_norm: 0.9100332501150156, iteration: 229482
loss: 0.9940049052238464,grad_norm: 0.9999992610849292, iteration: 229483
loss: 0.9892125129699707,grad_norm: 0.9999991398157811, iteration: 229484
loss: 1.0055334568023682,grad_norm: 0.8521163845202602, iteration: 229485
loss: 1.0171414613723755,grad_norm: 0.8006137228265718, iteration: 229486
loss: 0.9574106335639954,grad_norm: 0.8966835423723053, iteration: 229487
loss: 1.009748101234436,grad_norm: 0.8059293833631079, iteration: 229488
loss: 0.9823058843612671,grad_norm: 0.9206969679450742, iteration: 229489
loss: 1.0147579908370972,grad_norm: 0.919164491180169, iteration: 229490
loss: 1.0305057764053345,grad_norm: 0.8002851800753502, iteration: 229491
loss: 0.9948345422744751,grad_norm: 0.7508595433612437, iteration: 229492
loss: 0.9791019558906555,grad_norm: 0.9410691995052457, iteration: 229493
loss: 0.9693242311477661,grad_norm: 0.9376174058525876, iteration: 229494
loss: 1.0135490894317627,grad_norm: 0.9999991433524948, iteration: 229495
loss: 0.9714291095733643,grad_norm: 0.7761439099581525, iteration: 229496
loss: 1.0032572746276855,grad_norm: 0.9794479182560408, iteration: 229497
loss: 1.0164480209350586,grad_norm: 0.9999993835318167, iteration: 229498
loss: 0.9728679656982422,grad_norm: 0.9999990740046969, iteration: 229499
loss: 0.9973180890083313,grad_norm: 0.9957074837433556, iteration: 229500
loss: 1.003414511680603,grad_norm: 0.9789880289525491, iteration: 229501
loss: 0.9985194802284241,grad_norm: 0.9459390071012882, iteration: 229502
loss: 1.0174214839935303,grad_norm: 0.8793709802805999, iteration: 229503
loss: 0.9559230804443359,grad_norm: 0.9999990491239776, iteration: 229504
loss: 1.0013920068740845,grad_norm: 0.9067842185065564, iteration: 229505
loss: 0.9995615482330322,grad_norm: 0.9999991885880103, iteration: 229506
loss: 1.0173805952072144,grad_norm: 0.9456332485832168, iteration: 229507
loss: 1.0055943727493286,grad_norm: 0.9999989857669088, iteration: 229508
loss: 1.0037206411361694,grad_norm: 0.965323441877749, iteration: 229509
loss: 0.9915069937705994,grad_norm: 0.9770890663302854, iteration: 229510
loss: 0.9846144318580627,grad_norm: 0.9447924473224811, iteration: 229511
loss: 0.9898653626441956,grad_norm: 0.99999903500053, iteration: 229512
loss: 1.0247288942337036,grad_norm: 0.9657080180369106, iteration: 229513
loss: 1.0096818208694458,grad_norm: 0.8389964388119898, iteration: 229514
loss: 1.043410301208496,grad_norm: 0.999999223492635, iteration: 229515
loss: 0.9965280294418335,grad_norm: 0.838924627295926, iteration: 229516
loss: 1.0533517599105835,grad_norm: 0.9999990324352913, iteration: 229517
loss: 1.011437177658081,grad_norm: 0.9731923533551956, iteration: 229518
loss: 1.0283960103988647,grad_norm: 0.9801883061881482, iteration: 229519
loss: 1.0018141269683838,grad_norm: 0.9519332188576246, iteration: 229520
loss: 1.0143786668777466,grad_norm: 0.9999989714559192, iteration: 229521
loss: 1.0530414581298828,grad_norm: 0.9421475620772337, iteration: 229522
loss: 1.0057648420333862,grad_norm: 0.8454507178575508, iteration: 229523
loss: 1.0126761198043823,grad_norm: 0.9999989827931396, iteration: 229524
loss: 0.9754078388214111,grad_norm: 0.8826875379566357, iteration: 229525
loss: 1.004073143005371,grad_norm: 0.8743449459851916, iteration: 229526
loss: 0.9948638677597046,grad_norm: 0.9999990471351534, iteration: 229527
loss: 1.0114026069641113,grad_norm: 0.999999070449696, iteration: 229528
loss: 1.0032811164855957,grad_norm: 0.9999992171140223, iteration: 229529
loss: 1.0133308172225952,grad_norm: 0.9299659501096079, iteration: 229530
loss: 1.0036004781723022,grad_norm: 0.9999991633685656, iteration: 229531
loss: 1.0119787454605103,grad_norm: 0.9999990825863356, iteration: 229532
loss: 1.0176854133605957,grad_norm: 0.9999998795781272, iteration: 229533
loss: 1.0152183771133423,grad_norm: 0.9999991160277858, iteration: 229534
loss: 1.007140874862671,grad_norm: 0.7823258029895285, iteration: 229535
loss: 0.9915767312049866,grad_norm: 0.9516154677957228, iteration: 229536
loss: 0.9695494174957275,grad_norm: 0.9089013929572523, iteration: 229537
loss: 0.9919747114181519,grad_norm: 0.8074036596341995, iteration: 229538
loss: 1.0024402141571045,grad_norm: 0.7961257966034488, iteration: 229539
loss: 0.9836232662200928,grad_norm: 0.8559763228990666, iteration: 229540
loss: 0.9667876362800598,grad_norm: 0.9999990450324682, iteration: 229541
loss: 0.9904098510742188,grad_norm: 0.9446216766552432, iteration: 229542
loss: 0.9860467910766602,grad_norm: 0.9626991106248948, iteration: 229543
loss: 0.9652659893035889,grad_norm: 0.9999991679199021, iteration: 229544
loss: 1.009425163269043,grad_norm: 0.8725689210656611, iteration: 229545
loss: 1.074256420135498,grad_norm: 0.9189394295573947, iteration: 229546
loss: 1.0188164710998535,grad_norm: 0.922996425722748, iteration: 229547
loss: 0.9957991242408752,grad_norm: 0.9999990357087291, iteration: 229548
loss: 1.009772777557373,grad_norm: 0.8170623657579164, iteration: 229549
loss: 0.9564400315284729,grad_norm: 0.8391297535852172, iteration: 229550
loss: 1.0008368492126465,grad_norm: 0.9464645658468464, iteration: 229551
loss: 0.9759386777877808,grad_norm: 0.8722130815276223, iteration: 229552
loss: 1.001242995262146,grad_norm: 0.8721034306734994, iteration: 229553
loss: 0.9881887435913086,grad_norm: 0.9700107311310464, iteration: 229554
loss: 1.1770223379135132,grad_norm: 0.9999996949114318, iteration: 229555
loss: 0.9905377626419067,grad_norm: 0.9999990854640276, iteration: 229556
loss: 0.9972278475761414,grad_norm: 0.9999992402769131, iteration: 229557
loss: 1.0169144868850708,grad_norm: 0.999999274367173, iteration: 229558
loss: 1.0036718845367432,grad_norm: 0.9992838860088934, iteration: 229559
loss: 0.984537661075592,grad_norm: 0.9658638094118427, iteration: 229560
loss: 1.0026899576187134,grad_norm: 0.9024865242246454, iteration: 229561
loss: 1.0173017978668213,grad_norm: 0.9999992507337447, iteration: 229562
loss: 1.0286870002746582,grad_norm: 0.9999992454630554, iteration: 229563
loss: 0.9778788685798645,grad_norm: 0.9999991885884871, iteration: 229564
loss: 0.9885234832763672,grad_norm: 0.9999989922037148, iteration: 229565
loss: 0.9890871047973633,grad_norm: 0.999999085486471, iteration: 229566
loss: 1.0044361352920532,grad_norm: 0.835665023260045, iteration: 229567
loss: 0.9831421971321106,grad_norm: 0.9439702793236894, iteration: 229568
loss: 1.0452806949615479,grad_norm: 0.884757080518508, iteration: 229569
loss: 0.9926183819770813,grad_norm: 0.835975947471759, iteration: 229570
loss: 1.0176584720611572,grad_norm: 0.8724256876519679, iteration: 229571
loss: 0.9528288841247559,grad_norm: 0.9746173585661619, iteration: 229572
loss: 1.0041450262069702,grad_norm: 0.9999990924786143, iteration: 229573
loss: 1.0025540590286255,grad_norm: 0.9860053666241821, iteration: 229574
loss: 0.9638747572898865,grad_norm: 0.8326179315260255, iteration: 229575
loss: 0.9824551343917847,grad_norm: 0.999998981727745, iteration: 229576
loss: 0.9735219478607178,grad_norm: 0.7402016705918447, iteration: 229577
loss: 1.010947346687317,grad_norm: 0.9480024100602971, iteration: 229578
loss: 1.016658067703247,grad_norm: 0.8960030429088786, iteration: 229579
loss: 0.9863752722740173,grad_norm: 0.9278840781862934, iteration: 229580
loss: 1.0097976922988892,grad_norm: 0.9775058224681675, iteration: 229581
loss: 1.0276769399642944,grad_norm: 0.8785608382891831, iteration: 229582
loss: 0.9876481890678406,grad_norm: 0.8452031951150861, iteration: 229583
loss: 1.023287057876587,grad_norm: 0.9999991997385272, iteration: 229584
loss: 0.9967102408409119,grad_norm: 0.9999992701225346, iteration: 229585
loss: 1.006074070930481,grad_norm: 0.8062650383662076, iteration: 229586
loss: 0.9670934081077576,grad_norm: 0.9669911188714883, iteration: 229587
loss: 1.0105445384979248,grad_norm: 0.7782612912601379, iteration: 229588
loss: 1.0049338340759277,grad_norm: 0.8347350521005489, iteration: 229589
loss: 1.0425230264663696,grad_norm: 0.9999998736879858, iteration: 229590
loss: 1.0072835683822632,grad_norm: 0.9924689688255349, iteration: 229591
loss: 0.968625545501709,grad_norm: 0.9117025079139538, iteration: 229592
loss: 1.004433274269104,grad_norm: 0.9999991551087375, iteration: 229593
loss: 1.105095386505127,grad_norm: 0.9999996866906953, iteration: 229594
loss: 0.9680294990539551,grad_norm: 0.9523521685341302, iteration: 229595
loss: 0.9713231921195984,grad_norm: 0.8671125216785125, iteration: 229596
loss: 1.053049921989441,grad_norm: 0.9938685560087542, iteration: 229597
loss: 1.0157703161239624,grad_norm: 0.8853788538347194, iteration: 229598
loss: 0.9913522601127625,grad_norm: 0.9999991203166286, iteration: 229599
loss: 0.9699148535728455,grad_norm: 0.8348068810507592, iteration: 229600
loss: 1.0134103298187256,grad_norm: 0.9963409590610105, iteration: 229601
loss: 1.0424022674560547,grad_norm: 0.9999991037306525, iteration: 229602
loss: 1.229857325553894,grad_norm: 0.9999999065084383, iteration: 229603
loss: 0.9699593186378479,grad_norm: 0.9987786965876164, iteration: 229604
loss: 1.0177721977233887,grad_norm: 0.8075965385958805, iteration: 229605
loss: 0.9798343777656555,grad_norm: 0.9602180309305341, iteration: 229606
loss: 0.9933976531028748,grad_norm: 0.9007595751066161, iteration: 229607
loss: 0.9831889867782593,grad_norm: 0.9999991567200814, iteration: 229608
loss: 1.0950897932052612,grad_norm: 0.9999992380237918, iteration: 229609
loss: 1.087944507598877,grad_norm: 0.9999991495264455, iteration: 229610
loss: 0.9933173060417175,grad_norm: 0.8424175226246735, iteration: 229611
loss: 1.0293569564819336,grad_norm: 0.9999995796305295, iteration: 229612
loss: 0.9900994300842285,grad_norm: 0.8385390062944363, iteration: 229613
loss: 0.9877880215644836,grad_norm: 0.9700614154906828, iteration: 229614
loss: 1.0023423433303833,grad_norm: 0.9366469806814743, iteration: 229615
loss: 0.9559552073478699,grad_norm: 0.9085075925710703, iteration: 229616
loss: 1.031252384185791,grad_norm: 0.9999997806013414, iteration: 229617
loss: 1.0010533332824707,grad_norm: 0.9611172301729483, iteration: 229618
loss: 0.9935432076454163,grad_norm: 0.8110815813774709, iteration: 229619
loss: 0.9994013905525208,grad_norm: 0.9999990432981941, iteration: 229620
loss: 1.0041048526763916,grad_norm: 0.999999130593288, iteration: 229621
loss: 1.0433430671691895,grad_norm: 0.9999994820302562, iteration: 229622
loss: 0.9885327816009521,grad_norm: 0.9999990834793043, iteration: 229623
loss: 0.9933212995529175,grad_norm: 0.8922485571045494, iteration: 229624
loss: 0.9825159907341003,grad_norm: 0.8087995901880279, iteration: 229625
loss: 0.9714587926864624,grad_norm: 0.8989963224236786, iteration: 229626
loss: 0.9887885451316833,grad_norm: 0.9999989687459224, iteration: 229627
loss: 1.0253124237060547,grad_norm: 0.8684204957693507, iteration: 229628
loss: 0.9875954389572144,grad_norm: 0.7942453643606016, iteration: 229629
loss: 0.9894900321960449,grad_norm: 0.9999991290337301, iteration: 229630
loss: 0.9889165759086609,grad_norm: 0.9993803043747856, iteration: 229631
loss: 1.0148760080337524,grad_norm: 0.9680473914404248, iteration: 229632
loss: 0.9997912049293518,grad_norm: 0.9507178526849692, iteration: 229633
loss: 1.0481414794921875,grad_norm: 0.9191848093017372, iteration: 229634
loss: 0.990489661693573,grad_norm: 0.896546990975072, iteration: 229635
loss: 0.986785888671875,grad_norm: 0.9310861045566001, iteration: 229636
loss: 0.9964271783828735,grad_norm: 0.9622997862896334, iteration: 229637
loss: 0.980586051940918,grad_norm: 0.9999992360062532, iteration: 229638
loss: 1.0272477865219116,grad_norm: 0.8624832759890699, iteration: 229639
loss: 0.9706786274909973,grad_norm: 0.8566271369486514, iteration: 229640
loss: 0.9678013920783997,grad_norm: 0.9341859764312601, iteration: 229641
loss: 1.0063937902450562,grad_norm: 0.8508789313734187, iteration: 229642
loss: 1.0242820978164673,grad_norm: 0.8479544675707508, iteration: 229643
loss: 1.013823390007019,grad_norm: 0.8568147556013322, iteration: 229644
loss: 1.0062254667282104,grad_norm: 0.999999174655098, iteration: 229645
loss: 0.9866448044776917,grad_norm: 0.8452378264385939, iteration: 229646
loss: 1.0041627883911133,grad_norm: 0.8905960724604326, iteration: 229647
loss: 1.0039489269256592,grad_norm: 0.733487393623547, iteration: 229648
loss: 1.0138376951217651,grad_norm: 0.9181618460729258, iteration: 229649
loss: 0.9725762009620667,grad_norm: 0.9426212620662874, iteration: 229650
loss: 0.9780768156051636,grad_norm: 0.8895649761185782, iteration: 229651
loss: 0.9566828608512878,grad_norm: 0.9576768327830681, iteration: 229652
loss: 1.0169994831085205,grad_norm: 0.8114989722009832, iteration: 229653
loss: 1.0138846635818481,grad_norm: 0.9999995030015724, iteration: 229654
loss: 1.0249522924423218,grad_norm: 0.9999991723921551, iteration: 229655
loss: 1.013962984085083,grad_norm: 0.9509825371338558, iteration: 229656
loss: 1.025605320930481,grad_norm: 0.9485470905113255, iteration: 229657
loss: 0.9757660031318665,grad_norm: 0.9999991040538087, iteration: 229658
loss: 1.0300616025924683,grad_norm: 0.999999150132159, iteration: 229659
loss: 1.0168988704681396,grad_norm: 0.9999999374668235, iteration: 229660
loss: 1.0176173448562622,grad_norm: 0.8979422810811879, iteration: 229661
loss: 0.9853233098983765,grad_norm: 0.8650633268363686, iteration: 229662
loss: 1.0114465951919556,grad_norm: 0.999999124665315, iteration: 229663
loss: 1.0341123342514038,grad_norm: 0.9881709210551023, iteration: 229664
loss: 0.9760800004005432,grad_norm: 0.863425424943059, iteration: 229665
loss: 0.9935210943222046,grad_norm: 0.8620404389647677, iteration: 229666
loss: 0.9902735352516174,grad_norm: 0.8458158839974748, iteration: 229667
loss: 0.9710958003997803,grad_norm: 0.8877661701786621, iteration: 229668
loss: 0.9934388995170593,grad_norm: 0.9428365038564192, iteration: 229669
loss: 0.9758261442184448,grad_norm: 0.9354291404923236, iteration: 229670
loss: 1.0173473358154297,grad_norm: 0.7799972397800432, iteration: 229671
loss: 1.0345603227615356,grad_norm: 0.9999991898795915, iteration: 229672
loss: 0.9889854192733765,grad_norm: 0.8607354604224221, iteration: 229673
loss: 0.9789941906929016,grad_norm: 0.9999991683596228, iteration: 229674
loss: 0.995337963104248,grad_norm: 0.8882358167895525, iteration: 229675
loss: 0.9775097370147705,grad_norm: 0.8679380293178026, iteration: 229676
loss: 1.0239158868789673,grad_norm: 0.7953653774534214, iteration: 229677
loss: 0.9819570779800415,grad_norm: 0.9656993365007993, iteration: 229678
loss: 1.0030393600463867,grad_norm: 0.8964116244045369, iteration: 229679
loss: 0.9846161603927612,grad_norm: 0.9612486863932043, iteration: 229680
loss: 1.0085207223892212,grad_norm: 0.791809290710236, iteration: 229681
loss: 1.1472524404525757,grad_norm: 0.9999992732570262, iteration: 229682
loss: 1.0356156826019287,grad_norm: 0.9624505193996935, iteration: 229683
loss: 0.9903501272201538,grad_norm: 0.9546363922471882, iteration: 229684
loss: 1.0344473123550415,grad_norm: 0.8875017407376086, iteration: 229685
loss: 1.0345717668533325,grad_norm: 0.9368892043025854, iteration: 229686
loss: 0.9733008146286011,grad_norm: 0.8233767835538386, iteration: 229687
loss: 1.0421271324157715,grad_norm: 0.8996926659187122, iteration: 229688
loss: 1.03165602684021,grad_norm: 0.8213388024728645, iteration: 229689
loss: 1.0083165168762207,grad_norm: 0.9837083879866465, iteration: 229690
loss: 1.017555594444275,grad_norm: 0.8368163685176615, iteration: 229691
loss: 1.1319618225097656,grad_norm: 0.9999993227311792, iteration: 229692
loss: 1.028829574584961,grad_norm: 0.9403498391373807, iteration: 229693
loss: 1.016534447669983,grad_norm: 0.8469856005910734, iteration: 229694
loss: 1.0369648933410645,grad_norm: 0.8221772234664922, iteration: 229695
loss: 1.0328738689422607,grad_norm: 0.9999990410744888, iteration: 229696
loss: 1.012231469154358,grad_norm: 0.751659033976153, iteration: 229697
loss: 0.9995920062065125,grad_norm: 0.982418898268786, iteration: 229698
loss: 0.9922792315483093,grad_norm: 0.9999991235740724, iteration: 229699
loss: 1.0009808540344238,grad_norm: 0.8371947960729145, iteration: 229700
loss: 0.9961640238761902,grad_norm: 0.9999990698794483, iteration: 229701
loss: 1.0196515321731567,grad_norm: 0.8738713675550367, iteration: 229702
loss: 0.987009584903717,grad_norm: 0.999999081311907, iteration: 229703
loss: 1.0200462341308594,grad_norm: 0.9999992448748283, iteration: 229704
loss: 0.9947659373283386,grad_norm: 0.7574201875439239, iteration: 229705
loss: 1.0057440996170044,grad_norm: 1.0000000353790555, iteration: 229706
loss: 1.0089833736419678,grad_norm: 0.9999991362343646, iteration: 229707
loss: 0.9853847026824951,grad_norm: 0.9999991080496637, iteration: 229708
loss: 1.0289596319198608,grad_norm: 0.9009133431815461, iteration: 229709
loss: 1.0177545547485352,grad_norm: 0.9256756332283883, iteration: 229710
loss: 1.0328370332717896,grad_norm: 0.8800462294458674, iteration: 229711
loss: 1.004149079322815,grad_norm: 0.8368218870251967, iteration: 229712
loss: 0.9732519388198853,grad_norm: 0.90082477548475, iteration: 229713
loss: 0.9777976274490356,grad_norm: 0.8770988257388098, iteration: 229714
loss: 0.9889845252037048,grad_norm: 0.9844177098926641, iteration: 229715
loss: 1.0208876132965088,grad_norm: 0.9999996635195669, iteration: 229716
loss: 0.9783129096031189,grad_norm: 0.9999991586822997, iteration: 229717
loss: 0.998805046081543,grad_norm: 0.8743548461771733, iteration: 229718
loss: 1.0439165830612183,grad_norm: 0.9738976616340635, iteration: 229719
loss: 0.9589188694953918,grad_norm: 0.9273660145271896, iteration: 229720
loss: 0.9797307252883911,grad_norm: 0.9999991703142599, iteration: 229721
loss: 0.9946948885917664,grad_norm: 0.9999992953889364, iteration: 229722
loss: 1.0035308599472046,grad_norm: 0.9362392450430701, iteration: 229723
loss: 1.0246179103851318,grad_norm: 0.9999990592505682, iteration: 229724
loss: 0.995155394077301,grad_norm: 0.8569039973701102, iteration: 229725
loss: 1.0198127031326294,grad_norm: 0.9827304508632126, iteration: 229726
loss: 0.9953904151916504,grad_norm: 0.818088432758038, iteration: 229727
loss: 1.0116385221481323,grad_norm: 0.9453034657766672, iteration: 229728
loss: 1.0289452075958252,grad_norm: 0.8886440278090463, iteration: 229729
loss: 1.0407286882400513,grad_norm: 0.9033308606959704, iteration: 229730
loss: 0.9960804581642151,grad_norm: 0.9862473084200538, iteration: 229731
loss: 1.0838582515716553,grad_norm: 0.9356789899931637, iteration: 229732
loss: 0.9811769723892212,grad_norm: 0.9999991322800743, iteration: 229733
loss: 1.0016416311264038,grad_norm: 0.7234848378648732, iteration: 229734
loss: 0.9972174167633057,grad_norm: 0.9324564702174053, iteration: 229735
loss: 1.034922480583191,grad_norm: 0.8538390106395944, iteration: 229736
loss: 0.976726770401001,grad_norm: 0.9916302286618105, iteration: 229737
loss: 1.0023053884506226,grad_norm: 0.9943330641411886, iteration: 229738
loss: 0.9703217148780823,grad_norm: 0.80238452888672, iteration: 229739
loss: 1.0122706890106201,grad_norm: 0.9999991029049411, iteration: 229740
loss: 0.9780110120773315,grad_norm: 0.9422614427984298, iteration: 229741
loss: 0.9756538271903992,grad_norm: 0.9846991977482344, iteration: 229742
loss: 1.012048602104187,grad_norm: 0.8751482847647197, iteration: 229743
loss: 0.9747712016105652,grad_norm: 0.9242076888504253, iteration: 229744
loss: 0.9976893663406372,grad_norm: 0.9012494170844276, iteration: 229745
loss: 1.0379691123962402,grad_norm: 0.9999991550396421, iteration: 229746
loss: 1.0139938592910767,grad_norm: 0.9745148991627837, iteration: 229747
loss: 0.9994052052497864,grad_norm: 0.8502978329358719, iteration: 229748
loss: 1.0030823945999146,grad_norm: 0.9999990125328141, iteration: 229749
loss: 1.009596824645996,grad_norm: 0.999999196471738, iteration: 229750
loss: 1.01976478099823,grad_norm: 0.8366689369310725, iteration: 229751
loss: 1.0033982992172241,grad_norm: 0.9429760933150587, iteration: 229752
loss: 1.008172869682312,grad_norm: 0.9447083151439795, iteration: 229753
loss: 0.9888852834701538,grad_norm: 0.9999991871200619, iteration: 229754
loss: 0.9871981143951416,grad_norm: 0.9999990896751353, iteration: 229755
loss: 0.9892799854278564,grad_norm: 0.9999993822063824, iteration: 229756
loss: 1.0050431489944458,grad_norm: 0.9590364803252048, iteration: 229757
loss: 1.108021855354309,grad_norm: 0.9999997849085701, iteration: 229758
loss: 0.994938313961029,grad_norm: 0.9999991390306363, iteration: 229759
loss: 1.0177206993103027,grad_norm: 0.9388757219548786, iteration: 229760
loss: 1.0598642826080322,grad_norm: 0.8055133959282793, iteration: 229761
loss: 0.9958850741386414,grad_norm: 0.9467107421117641, iteration: 229762
loss: 1.037443995475769,grad_norm: 0.9999991450485244, iteration: 229763
loss: 0.997011661529541,grad_norm: 0.891962039051789, iteration: 229764
loss: 0.9901492595672607,grad_norm: 0.9384510322772612, iteration: 229765
loss: 1.0137401819229126,grad_norm: 0.8767904431709673, iteration: 229766
loss: 1.0827730894088745,grad_norm: 0.9538177067918422, iteration: 229767
loss: 0.953488826751709,grad_norm: 0.9657408353461496, iteration: 229768
loss: 1.0065356492996216,grad_norm: 0.8383063243424189, iteration: 229769
loss: 0.9923065304756165,grad_norm: 0.9988093680556921, iteration: 229770
loss: 1.0300307273864746,grad_norm: 0.9999991593768731, iteration: 229771
loss: 0.9837063550949097,grad_norm: 0.82302957020694, iteration: 229772
loss: 1.1683218479156494,grad_norm: 0.9999993540554529, iteration: 229773
loss: 1.0410315990447998,grad_norm: 0.8085053703739974, iteration: 229774
loss: 0.9977015852928162,grad_norm: 0.9517169149920633, iteration: 229775
loss: 1.0374531745910645,grad_norm: 0.9999998535312703, iteration: 229776
loss: 1.0014196634292603,grad_norm: 0.8858990382979621, iteration: 229777
loss: 0.9642384648323059,grad_norm: 0.8566204575596863, iteration: 229778
loss: 0.981754720211029,grad_norm: 0.9999999877690727, iteration: 229779
loss: 1.0241191387176514,grad_norm: 0.8861818704505915, iteration: 229780
loss: 1.0335283279418945,grad_norm: 0.7671012837800835, iteration: 229781
loss: 1.0637778043746948,grad_norm: 0.9999998091791794, iteration: 229782
loss: 1.0128003358840942,grad_norm: 0.9716731821716327, iteration: 229783
loss: 0.9995126128196716,grad_norm: 0.8537744792960689, iteration: 229784
loss: 1.0005559921264648,grad_norm: 0.9389505344260703, iteration: 229785
loss: 1.0521459579467773,grad_norm: 0.9999995904949379, iteration: 229786
loss: 0.9911258816719055,grad_norm: 0.802410211129718, iteration: 229787
loss: 1.0144199132919312,grad_norm: 0.8731177307400267, iteration: 229788
loss: 1.1773583889007568,grad_norm: 0.9949224096463359, iteration: 229789
loss: 1.0012876987457275,grad_norm: 0.999497441713771, iteration: 229790
loss: 1.0355716943740845,grad_norm: 0.9999992026088317, iteration: 229791
loss: 1.0231637954711914,grad_norm: 0.7659637685668207, iteration: 229792
loss: 1.0341500043869019,grad_norm: 0.9130208340136878, iteration: 229793
loss: 1.029066562652588,grad_norm: 0.8757054830337664, iteration: 229794
loss: 0.9630326628684998,grad_norm: 0.911255213272439, iteration: 229795
loss: 0.9843602776527405,grad_norm: 0.9349165678415691, iteration: 229796
loss: 1.0136500597000122,grad_norm: 0.9999990779182888, iteration: 229797
loss: 0.9812231659889221,grad_norm: 0.7432701854928283, iteration: 229798
loss: 0.9937285780906677,grad_norm: 0.9272425729777692, iteration: 229799
loss: 0.9862629771232605,grad_norm: 0.904556988338335, iteration: 229800
loss: 0.9780474901199341,grad_norm: 0.9526123990213095, iteration: 229801
loss: 1.0126657485961914,grad_norm: 0.7546744754680015, iteration: 229802
loss: 1.0225740671157837,grad_norm: 0.9160388505567056, iteration: 229803
loss: 1.0022790431976318,grad_norm: 0.999999064475989, iteration: 229804
loss: 0.9270550012588501,grad_norm: 0.918395165316568, iteration: 229805
loss: 1.029665231704712,grad_norm: 0.865158250763958, iteration: 229806
loss: 0.9883471727371216,grad_norm: 0.9844874575992514, iteration: 229807
loss: 1.0129635334014893,grad_norm: 0.9034956999573199, iteration: 229808
loss: 0.9719284176826477,grad_norm: 0.9163915486044534, iteration: 229809
loss: 1.0025147199630737,grad_norm: 0.9999991207696679, iteration: 229810
loss: 1.0294463634490967,grad_norm: 0.9296372561959256, iteration: 229811
loss: 1.0019065141677856,grad_norm: 0.9589639555423313, iteration: 229812
loss: 1.0694340467453003,grad_norm: 1.000000017415924, iteration: 229813
loss: 1.0430160760879517,grad_norm: 0.9035110598284608, iteration: 229814
loss: 1.0164390802383423,grad_norm: 0.9065196432716744, iteration: 229815
loss: 1.0244537591934204,grad_norm: 0.9999991182431254, iteration: 229816
loss: 1.0170981884002686,grad_norm: 0.9999991549853371, iteration: 229817
loss: 1.0096079111099243,grad_norm: 0.8903982058101371, iteration: 229818
loss: 1.0427218675613403,grad_norm: 0.9999990916226997, iteration: 229819
loss: 1.0191287994384766,grad_norm: 0.9629837692843197, iteration: 229820
loss: 1.0190428495407104,grad_norm: 0.9999991314701847, iteration: 229821
loss: 1.0039950609207153,grad_norm: 0.8945041302067533, iteration: 229822
loss: 0.9869736433029175,grad_norm: 0.9999997262873792, iteration: 229823
loss: 1.026154637336731,grad_norm: 0.9572757180390782, iteration: 229824
loss: 1.03408944606781,grad_norm: 0.9999997364382167, iteration: 229825
loss: 1.0015895366668701,grad_norm: 0.7523548388095636, iteration: 229826
loss: 0.9834824204444885,grad_norm: 0.6620466246089199, iteration: 229827
loss: 1.0209319591522217,grad_norm: 0.95856137983904, iteration: 229828
loss: 0.9845733642578125,grad_norm: 0.9612166251439456, iteration: 229829
loss: 0.9774873852729797,grad_norm: 0.9999990289698673, iteration: 229830
loss: 0.9821528792381287,grad_norm: 0.9314702136916744, iteration: 229831
loss: 0.9981983304023743,grad_norm: 0.9063546352295683, iteration: 229832
loss: 1.0235248804092407,grad_norm: 0.9695440205245308, iteration: 229833
loss: 0.9646511673927307,grad_norm: 0.9503106252788919, iteration: 229834
loss: 0.9804224371910095,grad_norm: 0.8109484577920635, iteration: 229835
loss: 1.018761396408081,grad_norm: 0.9129886129963843, iteration: 229836
loss: 1.021226167678833,grad_norm: 0.9999992284532867, iteration: 229837
loss: 0.9989435076713562,grad_norm: 0.7519053414081641, iteration: 229838
loss: 0.9896474480628967,grad_norm: 0.985463189784987, iteration: 229839
loss: 0.9702833294868469,grad_norm: 0.9323942569771214, iteration: 229840
loss: 0.9597229957580566,grad_norm: 0.9999999145764803, iteration: 229841
loss: 0.9880495071411133,grad_norm: 0.956844359680189, iteration: 229842
loss: 1.0376722812652588,grad_norm: 0.8900100110242558, iteration: 229843
loss: 0.9823201298713684,grad_norm: 0.9494944912457999, iteration: 229844
loss: 1.0121468305587769,grad_norm: 0.9917699718936421, iteration: 229845
loss: 1.0180772542953491,grad_norm: 0.9999990805926153, iteration: 229846
loss: 0.972478449344635,grad_norm: 0.8664968917683383, iteration: 229847
loss: 0.9797961115837097,grad_norm: 0.9088450984650138, iteration: 229848
loss: 1.0020097494125366,grad_norm: 0.994234847831026, iteration: 229849
loss: 0.9797999262809753,grad_norm: 0.9382460108519505, iteration: 229850
loss: 0.992422878742218,grad_norm: 0.9999990945101508, iteration: 229851
loss: 0.9802446365356445,grad_norm: 0.8190413799499815, iteration: 229852
loss: 1.0302190780639648,grad_norm: 0.9999994413849552, iteration: 229853
loss: 0.9898993372917175,grad_norm: 0.9217226401085853, iteration: 229854
loss: 0.9812748432159424,grad_norm: 0.983458944997327, iteration: 229855
loss: 1.0277780294418335,grad_norm: 0.9320901869900252, iteration: 229856
loss: 0.9665706753730774,grad_norm: 0.8455976507215941, iteration: 229857
loss: 1.039544701576233,grad_norm: 0.9388735856045581, iteration: 229858
loss: 1.0173397064208984,grad_norm: 0.7929318022618232, iteration: 229859
loss: 0.9888852834701538,grad_norm: 0.7916540136116126, iteration: 229860
loss: 0.9939291477203369,grad_norm: 0.8971725156198419, iteration: 229861
loss: 0.9271208047866821,grad_norm: 0.9831766020874457, iteration: 229862
loss: 1.0796762704849243,grad_norm: 0.8981070098113428, iteration: 229863
loss: 1.015687108039856,grad_norm: 0.8520038757229808, iteration: 229864
loss: 1.0025017261505127,grad_norm: 0.8454958651902188, iteration: 229865
loss: 0.9813883304595947,grad_norm: 0.9436091675276466, iteration: 229866
loss: 1.026650309562683,grad_norm: 0.9999991290025986, iteration: 229867
loss: 0.993807315826416,grad_norm: 0.8769170381644746, iteration: 229868
loss: 1.022331714630127,grad_norm: 0.9541442417162332, iteration: 229869
loss: 0.9612204432487488,grad_norm: 0.8430469660669002, iteration: 229870
loss: 0.9569428563117981,grad_norm: 0.8635459620358634, iteration: 229871
loss: 1.0382944345474243,grad_norm: 0.847650040532998, iteration: 229872
loss: 0.9858551025390625,grad_norm: 0.9999990457396084, iteration: 229873
loss: 0.9625057578086853,grad_norm: 0.9999991918247371, iteration: 229874
loss: 1.1822052001953125,grad_norm: 0.9999997927987248, iteration: 229875
loss: 0.988767683506012,grad_norm: 0.910102092790259, iteration: 229876
loss: 0.9900821447372437,grad_norm: 0.7828843001670209, iteration: 229877
loss: 0.9859358072280884,grad_norm: 0.8610061812463068, iteration: 229878
loss: 1.0154330730438232,grad_norm: 0.8732473124710052, iteration: 229879
loss: 1.001291036605835,grad_norm: 0.9095887123399157, iteration: 229880
loss: 1.0371413230895996,grad_norm: 0.8837642402054119, iteration: 229881
loss: 1.004819631576538,grad_norm: 0.896612975465322, iteration: 229882
loss: 0.9771395921707153,grad_norm: 0.9463126208374533, iteration: 229883
loss: 1.0371400117874146,grad_norm: 0.8640251855094673, iteration: 229884
loss: 1.0036158561706543,grad_norm: 0.9257480633673195, iteration: 229885
loss: 1.0176875591278076,grad_norm: 0.9178519468041695, iteration: 229886
loss: 0.9846189618110657,grad_norm: 0.999999043627777, iteration: 229887
loss: 1.005820870399475,grad_norm: 0.9392739335513831, iteration: 229888
loss: 0.9904317855834961,grad_norm: 0.7965482747011823, iteration: 229889
loss: 0.997586190700531,grad_norm: 0.9999990607061202, iteration: 229890
loss: 0.9904323816299438,grad_norm: 0.9999992516870534, iteration: 229891
loss: 1.0045416355133057,grad_norm: 0.759292675655378, iteration: 229892
loss: 0.9844114780426025,grad_norm: 0.894219144351577, iteration: 229893
loss: 0.9923273324966431,grad_norm: 0.9162457218275697, iteration: 229894
loss: 0.9736762046813965,grad_norm: 0.9620213094210813, iteration: 229895
loss: 0.999339759349823,grad_norm: 0.9999991552318931, iteration: 229896
loss: 0.9763380885124207,grad_norm: 0.942884866745096, iteration: 229897
loss: 0.9921493530273438,grad_norm: 0.9999991411480138, iteration: 229898
loss: 1.034024715423584,grad_norm: 0.8830533392126069, iteration: 229899
loss: 1.01832914352417,grad_norm: 0.9994565585997678, iteration: 229900
loss: 1.0074480772018433,grad_norm: 0.7523636819752659, iteration: 229901
loss: 1.0145831108093262,grad_norm: 0.7439500280048167, iteration: 229902
loss: 0.9768033027648926,grad_norm: 0.9110494169547199, iteration: 229903
loss: 1.0187859535217285,grad_norm: 0.891791305135605, iteration: 229904
loss: 0.9923162460327148,grad_norm: 0.7557425736076866, iteration: 229905
loss: 0.9988807439804077,grad_norm: 0.9552696911148959, iteration: 229906
loss: 0.9947468638420105,grad_norm: 0.9657120427423354, iteration: 229907
loss: 1.0050270557403564,grad_norm: 0.9999989962306338, iteration: 229908
loss: 1.0186293125152588,grad_norm: 0.8945028956735878, iteration: 229909
loss: 0.9629325270652771,grad_norm: 0.9656722045137682, iteration: 229910
loss: 0.9559045433998108,grad_norm: 0.9999992261238128, iteration: 229911
loss: 0.9890223741531372,grad_norm: 0.959406997975896, iteration: 229912
loss: 0.9676405787467957,grad_norm: 0.9999991247048142, iteration: 229913
loss: 1.0954984426498413,grad_norm: 0.9999991693438551, iteration: 229914
loss: 1.0125200748443604,grad_norm: 0.9999991272783907, iteration: 229915
loss: 0.997887909412384,grad_norm: 0.9060453095535739, iteration: 229916
loss: 1.0168167352676392,grad_norm: 0.8892860429613755, iteration: 229917
loss: 0.99996018409729,grad_norm: 0.878215946784862, iteration: 229918
loss: 0.983931839466095,grad_norm: 0.8348699352891614, iteration: 229919
loss: 1.0750524997711182,grad_norm: 0.9135867561008529, iteration: 229920
loss: 0.9636422395706177,grad_norm: 0.7721097415093137, iteration: 229921
loss: 1.0316252708435059,grad_norm: 0.9053088425588275, iteration: 229922
loss: 1.0170540809631348,grad_norm: 0.9449300361240124, iteration: 229923
loss: 0.9630846381187439,grad_norm: 0.9413867256665057, iteration: 229924
loss: 1.0066276788711548,grad_norm: 0.8203164437261361, iteration: 229925
loss: 0.9604768753051758,grad_norm: 0.9897669150263418, iteration: 229926
loss: 0.957175076007843,grad_norm: 0.985818700615688, iteration: 229927
loss: 0.9835082292556763,grad_norm: 0.9369951827506183, iteration: 229928
loss: 0.9721617698669434,grad_norm: 0.9757068808014444, iteration: 229929
loss: 1.006316900253296,grad_norm: 0.9390039633768557, iteration: 229930
loss: 1.0037269592285156,grad_norm: 0.7755886996981498, iteration: 229931
loss: 0.9698790311813354,grad_norm: 0.6439605503157952, iteration: 229932
loss: 0.9927738308906555,grad_norm: 0.8781467820543168, iteration: 229933
loss: 0.9720668792724609,grad_norm: 0.9999992665351854, iteration: 229934
loss: 1.0126564502716064,grad_norm: 0.8907407616300966, iteration: 229935
loss: 0.9507415294647217,grad_norm: 0.9092387136440315, iteration: 229936
loss: 1.0082740783691406,grad_norm: 0.9999990082882707, iteration: 229937
loss: 1.0109115839004517,grad_norm: 0.8860456940087571, iteration: 229938
loss: 1.0245258808135986,grad_norm: 0.8555302227031683, iteration: 229939
loss: 1.031597375869751,grad_norm: 0.999999255858477, iteration: 229940
loss: 0.9936093091964722,grad_norm: 0.9072115932348992, iteration: 229941
loss: 0.9831327795982361,grad_norm: 0.9999991469212937, iteration: 229942
loss: 1.0135091543197632,grad_norm: 0.9754045223603781, iteration: 229943
loss: 0.996925950050354,grad_norm: 0.8322716091482223, iteration: 229944
loss: 0.9639380574226379,grad_norm: 0.9999990290687713, iteration: 229945
loss: 1.0491969585418701,grad_norm: 0.9999989937929211, iteration: 229946
loss: 0.9994500875473022,grad_norm: 0.8116858110615887, iteration: 229947
loss: 0.9769278168678284,grad_norm: 0.8975619829949613, iteration: 229948
loss: 0.9916741251945496,grad_norm: 0.8392646029998555, iteration: 229949
loss: 0.9681422114372253,grad_norm: 0.9999991207627568, iteration: 229950
loss: 1.0505014657974243,grad_norm: 0.9999996754154592, iteration: 229951
loss: 1.0186841487884521,grad_norm: 0.9999991829935047, iteration: 229952
loss: 1.0140712261199951,grad_norm: 0.8449651100262038, iteration: 229953
loss: 1.0112905502319336,grad_norm: 0.9066670429864766, iteration: 229954
loss: 0.9899828433990479,grad_norm: 0.8709399375466045, iteration: 229955
loss: 1.0023789405822754,grad_norm: 0.9709747075485895, iteration: 229956
loss: 1.0032914876937866,grad_norm: 0.9451114718796606, iteration: 229957
loss: 1.0008224248886108,grad_norm: 0.99999926540513, iteration: 229958
loss: 0.9931039214134216,grad_norm: 0.9648322368191184, iteration: 229959
loss: 1.1103157997131348,grad_norm: 0.999999039465738, iteration: 229960
loss: 0.9708796739578247,grad_norm: 0.9999991298976891, iteration: 229961
loss: 1.001080870628357,grad_norm: 0.8076482841230818, iteration: 229962
loss: 1.0128856897354126,grad_norm: 0.9049321610326898, iteration: 229963
loss: 1.0135775804519653,grad_norm: 0.8864724669091748, iteration: 229964
loss: 1.0097600221633911,grad_norm: 0.99999924753459, iteration: 229965
loss: 0.9736602902412415,grad_norm: 0.9380224805552566, iteration: 229966
loss: 0.9932476878166199,grad_norm: 0.9247114495639487, iteration: 229967
loss: 1.0074834823608398,grad_norm: 0.9999990649991045, iteration: 229968
loss: 1.0188183784484863,grad_norm: 0.9999990294477388, iteration: 229969
loss: 0.9985963702201843,grad_norm: 0.8844742454538989, iteration: 229970
loss: 0.9961339235305786,grad_norm: 0.8754288286109715, iteration: 229971
loss: 1.0095754861831665,grad_norm: 0.9874502430792902, iteration: 229972
loss: 0.977834165096283,grad_norm: 0.7954222129995578, iteration: 229973
loss: 1.0232139825820923,grad_norm: 0.8882860796063237, iteration: 229974
loss: 1.0006822347640991,grad_norm: 0.8404512865841269, iteration: 229975
loss: 0.9889696836471558,grad_norm: 0.999999119975472, iteration: 229976
loss: 1.011948585510254,grad_norm: 0.8401138176716663, iteration: 229977
loss: 1.038249135017395,grad_norm: 0.9999990091784416, iteration: 229978
loss: 1.025896668434143,grad_norm: 0.9999990436655027, iteration: 229979
loss: 1.0195908546447754,grad_norm: 0.7826356857082478, iteration: 229980
loss: 0.9860823154449463,grad_norm: 0.8022818775875596, iteration: 229981
loss: 1.020034909248352,grad_norm: 0.7941979932513954, iteration: 229982
loss: 0.9911059737205505,grad_norm: 0.9848356520976698, iteration: 229983
loss: 1.011958360671997,grad_norm: 0.9522604750224893, iteration: 229984
loss: 0.9732717275619507,grad_norm: 0.8709816711851662, iteration: 229985
loss: 1.0128767490386963,grad_norm: 0.99999905816712, iteration: 229986
loss: 0.9966279864311218,grad_norm: 0.7928110068426676, iteration: 229987
loss: 0.9944066405296326,grad_norm: 0.880634110439746, iteration: 229988
loss: 0.9896991848945618,grad_norm: 0.9312507809269226, iteration: 229989
loss: 0.9997556209564209,grad_norm: 0.8625502567747438, iteration: 229990
loss: 1.0013220310211182,grad_norm: 0.9547651225442186, iteration: 229991
loss: 0.9884470701217651,grad_norm: 0.794179640185052, iteration: 229992
loss: 0.9801440834999084,grad_norm: 0.8839445216997871, iteration: 229993
loss: 0.9921866655349731,grad_norm: 0.9624217619579852, iteration: 229994
loss: 1.0203180313110352,grad_norm: 0.9955034667101309, iteration: 229995
loss: 0.9990313053131104,grad_norm: 0.8582437095710529, iteration: 229996
loss: 0.9916967153549194,grad_norm: 0.8903993714684182, iteration: 229997
loss: 0.9983968138694763,grad_norm: 0.9999990270196858, iteration: 229998
loss: 1.0025920867919922,grad_norm: 0.8018799627414266, iteration: 229999
loss: 0.9921936988830566,grad_norm: 0.9999990205251681, iteration: 230000
Evaluating at step 230000
{'val': 0.9940560236573219, 'test': 2.3918078996883794}
loss: 0.9790878891944885,grad_norm: 0.9791104978823609, iteration: 230001
loss: 0.9821694493293762,grad_norm: 0.9491009362497537, iteration: 230002
loss: 0.9901260137557983,grad_norm: 0.9712346402509446, iteration: 230003
loss: 1.0168622732162476,grad_norm: 0.9999991630785474, iteration: 230004
loss: 1.027543544769287,grad_norm: 0.9799076970400136, iteration: 230005
loss: 0.9685884118080139,grad_norm: 0.8196952550208076, iteration: 230006
loss: 1.0515942573547363,grad_norm: 0.9999991542700627, iteration: 230007
loss: 1.0280988216400146,grad_norm: 0.9999992546065045, iteration: 230008
loss: 0.9747185707092285,grad_norm: 0.8234751750501422, iteration: 230009
loss: 0.9874324202537537,grad_norm: 0.9479061272677426, iteration: 230010
loss: 0.9989968538284302,grad_norm: 0.9999990087719246, iteration: 230011
loss: 1.0389283895492554,grad_norm: 0.8781801303668862, iteration: 230012
loss: 0.9870381951332092,grad_norm: 0.7794997751764156, iteration: 230013
loss: 1.0087714195251465,grad_norm: 0.9999994903488589, iteration: 230014
loss: 1.0356707572937012,grad_norm: 0.9711376739766658, iteration: 230015
loss: 1.0240978002548218,grad_norm: 0.9543990696585292, iteration: 230016
loss: 1.0230430364608765,grad_norm: 0.8421035892776438, iteration: 230017
loss: 0.9945546984672546,grad_norm: 0.9563107931016475, iteration: 230018
loss: 1.0101771354675293,grad_norm: 0.9869696026687715, iteration: 230019
loss: 1.0127075910568237,grad_norm: 0.9763861160217623, iteration: 230020
loss: 1.0100497007369995,grad_norm: 0.9999990398563235, iteration: 230021
loss: 0.9887059330940247,grad_norm: 0.9720329021009306, iteration: 230022
loss: 1.004504919052124,grad_norm: 0.907680002850643, iteration: 230023
loss: 0.9589378833770752,grad_norm: 0.9999990886777731, iteration: 230024
loss: 0.9940400123596191,grad_norm: 0.9020405290420782, iteration: 230025
loss: 1.0132776498794556,grad_norm: 0.7679512050186235, iteration: 230026
loss: 1.0316890478134155,grad_norm: 0.9157056783355474, iteration: 230027
loss: 0.9353037476539612,grad_norm: 0.9355285752129002, iteration: 230028
loss: 1.0380258560180664,grad_norm: 0.8597003020767691, iteration: 230029
loss: 0.9706311821937561,grad_norm: 0.9999990939557215, iteration: 230030
loss: 0.9883319735527039,grad_norm: 0.9999990220702552, iteration: 230031
loss: 1.0168448686599731,grad_norm: 0.9999991986144007, iteration: 230032
loss: 1.0042918920516968,grad_norm: 0.9999990724258853, iteration: 230033
loss: 0.9947316646575928,grad_norm: 0.8113417346112328, iteration: 230034
loss: 1.0211377143859863,grad_norm: 0.7105374323490444, iteration: 230035
loss: 0.9938061237335205,grad_norm: 0.9188309008621466, iteration: 230036
loss: 1.0295358896255493,grad_norm: 0.9999991534780448, iteration: 230037
loss: 1.0480647087097168,grad_norm: 0.9999991666086436, iteration: 230038
loss: 1.0208123922348022,grad_norm: 0.953165185547861, iteration: 230039
loss: 0.9841390252113342,grad_norm: 0.9052415488150329, iteration: 230040
loss: 0.9756855964660645,grad_norm: 0.8382066610735464, iteration: 230041
loss: 1.001520037651062,grad_norm: 0.9439689508790428, iteration: 230042
loss: 0.9786090850830078,grad_norm: 0.9999990728186269, iteration: 230043
loss: 0.9740514159202576,grad_norm: 0.8687945335481946, iteration: 230044
loss: 0.9763956069946289,grad_norm: 0.8988471285020267, iteration: 230045
loss: 1.0153104066848755,grad_norm: 0.9999989864355494, iteration: 230046
loss: 0.9774190187454224,grad_norm: 0.7501091429300005, iteration: 230047
loss: 0.9827023148536682,grad_norm: 0.8839325304412163, iteration: 230048
loss: 0.9941373467445374,grad_norm: 0.999999084348887, iteration: 230049
loss: 0.9863062500953674,grad_norm: 0.950241706530156, iteration: 230050
loss: 1.028242588043213,grad_norm: 0.9485584367366762, iteration: 230051
loss: 1.005318284034729,grad_norm: 0.9999989995071672, iteration: 230052
loss: 1.0297387838363647,grad_norm: 0.985861574738402, iteration: 230053
loss: 0.9545755386352539,grad_norm: 0.882206317795617, iteration: 230054
loss: 1.0082279443740845,grad_norm: 0.7725835571113268, iteration: 230055
loss: 0.9843167662620544,grad_norm: 0.9999990740005259, iteration: 230056
loss: 1.0065968036651611,grad_norm: 0.9869621907059652, iteration: 230057
loss: 0.9485673308372498,grad_norm: 0.9352113102094882, iteration: 230058
loss: 0.9859947562217712,grad_norm: 0.9578625790916828, iteration: 230059
loss: 1.0118221044540405,grad_norm: 0.9415657920794619, iteration: 230060
loss: 1.002395749092102,grad_norm: 0.7901240587409128, iteration: 230061
loss: 1.0073920488357544,grad_norm: 0.9999991254842523, iteration: 230062
loss: 0.987877607345581,grad_norm: 0.9369860529093728, iteration: 230063
loss: 0.9925810098648071,grad_norm: 0.8790860544933787, iteration: 230064
loss: 0.9509566426277161,grad_norm: 0.8083937352150362, iteration: 230065
loss: 0.9644386172294617,grad_norm: 0.8253941130648504, iteration: 230066
loss: 1.0285612344741821,grad_norm: 0.7522996501426589, iteration: 230067
loss: 0.9911358952522278,grad_norm: 0.8950122667611802, iteration: 230068
loss: 0.9891892075538635,grad_norm: 0.7920352512334063, iteration: 230069
loss: 0.9921281337738037,grad_norm: 0.999999895940388, iteration: 230070
loss: 1.0172213315963745,grad_norm: 0.9999992010069647, iteration: 230071
loss: 1.0254892110824585,grad_norm: 0.9999990354895189, iteration: 230072
loss: 0.985653281211853,grad_norm: 0.9999991463545465, iteration: 230073
loss: 1.0251480340957642,grad_norm: 0.9740337163994139, iteration: 230074
loss: 1.0471382141113281,grad_norm: 0.811260379435566, iteration: 230075
loss: 1.0059382915496826,grad_norm: 0.8473926498654195, iteration: 230076
loss: 1.001869797706604,grad_norm: 0.999999112880541, iteration: 230077
loss: 1.0089489221572876,grad_norm: 0.999999153181078, iteration: 230078
loss: 1.0191149711608887,grad_norm: 0.8251382996818669, iteration: 230079
loss: 1.0123156309127808,grad_norm: 0.9252127628530958, iteration: 230080
loss: 1.0151442289352417,grad_norm: 0.8891446672494461, iteration: 230081
loss: 0.9785656332969666,grad_norm: 0.8887647090452584, iteration: 230082
loss: 1.0001513957977295,grad_norm: 0.9999991182525155, iteration: 230083
loss: 0.9691007733345032,grad_norm: 0.9999991468402034, iteration: 230084
loss: 0.9477312564849854,grad_norm: 0.9278004955156306, iteration: 230085
loss: 0.99830561876297,grad_norm: 0.9999996296449506, iteration: 230086
loss: 0.9454032778739929,grad_norm: 0.8793185193288082, iteration: 230087
loss: 1.0321221351623535,grad_norm: 0.8192902875371498, iteration: 230088
loss: 1.0051926374435425,grad_norm: 0.9350728845021204, iteration: 230089
loss: 1.009682059288025,grad_norm: 0.9999991398821616, iteration: 230090
loss: 1.0180591344833374,grad_norm: 0.8231033084720706, iteration: 230091
loss: 0.9991357922554016,grad_norm: 0.8550192778822981, iteration: 230092
loss: 1.0019886493682861,grad_norm: 0.9585253668700254, iteration: 230093
loss: 0.9823286533355713,grad_norm: 0.9484206860440115, iteration: 230094
loss: 1.056628704071045,grad_norm: 0.9999992478159085, iteration: 230095
loss: 0.97690749168396,grad_norm: 0.8092892913567635, iteration: 230096
loss: 0.9897907376289368,grad_norm: 0.9999990642363226, iteration: 230097
loss: 1.0194790363311768,grad_norm: 0.9999991431225176, iteration: 230098
loss: 0.9940789341926575,grad_norm: 0.9999991979668341, iteration: 230099
loss: 0.9613358378410339,grad_norm: 0.9179226385203874, iteration: 230100
loss: 0.992444634437561,grad_norm: 0.9999990925663281, iteration: 230101
loss: 1.0160095691680908,grad_norm: 0.9237879082713976, iteration: 230102
loss: 1.0074362754821777,grad_norm: 0.7696546391889333, iteration: 230103
loss: 0.9866676330566406,grad_norm: 0.9871957868489583, iteration: 230104
loss: 0.9861481785774231,grad_norm: 0.9999996097848345, iteration: 230105
loss: 1.0063480138778687,grad_norm: 0.857582163765494, iteration: 230106
loss: 1.0046857595443726,grad_norm: 0.9700325138546828, iteration: 230107
loss: 0.9851093292236328,grad_norm: 0.8126959043797511, iteration: 230108
loss: 1.0225940942764282,grad_norm: 0.8309068207129625, iteration: 230109
loss: 0.9975476264953613,grad_norm: 0.9099154211513287, iteration: 230110
loss: 1.0117518901824951,grad_norm: 0.9999993004457253, iteration: 230111
loss: 1.0322651863098145,grad_norm: 0.9999996124893994, iteration: 230112
loss: 0.9839454293251038,grad_norm: 0.9029550290617232, iteration: 230113
loss: 1.0189485549926758,grad_norm: 0.8821957843803543, iteration: 230114
loss: 1.0201263427734375,grad_norm: 0.8403071709007743, iteration: 230115
loss: 0.9953268766403198,grad_norm: 0.8772531189392389, iteration: 230116
loss: 1.0074540376663208,grad_norm: 0.9999991154014464, iteration: 230117
loss: 0.9867895245552063,grad_norm: 0.9849279810854605, iteration: 230118
loss: 1.0148413181304932,grad_norm: 0.8720652054428719, iteration: 230119
loss: 0.9746600389480591,grad_norm: 0.9999991542539899, iteration: 230120
loss: 1.1111513376235962,grad_norm: 0.9999994218382996, iteration: 230121
loss: 1.008167028427124,grad_norm: 0.7664054301240067, iteration: 230122
loss: 1.0331515073776245,grad_norm: 0.9999990056005229, iteration: 230123
loss: 1.02861750125885,grad_norm: 0.999999052183679, iteration: 230124
loss: 0.9889892339706421,grad_norm: 0.9653034666496846, iteration: 230125
loss: 1.0098446607589722,grad_norm: 0.8575249826093375, iteration: 230126
loss: 1.0206815004348755,grad_norm: 0.9018983233757271, iteration: 230127
loss: 0.9851394295692444,grad_norm: 0.9999991042847921, iteration: 230128
loss: 1.0053244829177856,grad_norm: 0.730947692079477, iteration: 230129
loss: 1.017445683479309,grad_norm: 0.8681997984938884, iteration: 230130
loss: 1.0110050439834595,grad_norm: 0.8326952234401234, iteration: 230131
loss: 1.0155298709869385,grad_norm: 0.8672356401380273, iteration: 230132
loss: 1.0076072216033936,grad_norm: 0.7616900978698985, iteration: 230133
loss: 0.9990087747573853,grad_norm: 0.9727791817392741, iteration: 230134
loss: 1.0086644887924194,grad_norm: 0.8235884244732412, iteration: 230135
loss: 1.017352819442749,grad_norm: 0.9999989369163239, iteration: 230136
loss: 0.990020751953125,grad_norm: 0.8454917721025227, iteration: 230137
loss: 0.963657021522522,grad_norm: 0.996724530726257, iteration: 230138
loss: 0.9783530831336975,grad_norm: 0.9503527758245027, iteration: 230139
loss: 1.0277734994888306,grad_norm: 0.9999991744880596, iteration: 230140
loss: 0.9831368327140808,grad_norm: 0.9429141659784324, iteration: 230141
loss: 1.0027779340744019,grad_norm: 0.8524497993619563, iteration: 230142
loss: 1.0198487043380737,grad_norm: 0.9999990957641303, iteration: 230143
loss: 0.990138053894043,grad_norm: 0.9375974517077715, iteration: 230144
loss: 1.0873810052871704,grad_norm: 1.0000000564387705, iteration: 230145
loss: 1.0143985748291016,grad_norm: 0.7448592126435478, iteration: 230146
loss: 0.9990807771682739,grad_norm: 0.8833667201086939, iteration: 230147
loss: 1.124780535697937,grad_norm: 0.9532472133434837, iteration: 230148
loss: 1.0004596710205078,grad_norm: 0.9999991276129756, iteration: 230149
loss: 0.9578561782836914,grad_norm: 0.8419750175138807, iteration: 230150
loss: 1.0234187841415405,grad_norm: 0.9999991129587502, iteration: 230151
loss: 0.9992362856864929,grad_norm: 0.7512891689773086, iteration: 230152
loss: 0.971449613571167,grad_norm: 0.854288401554243, iteration: 230153
loss: 1.0220813751220703,grad_norm: 0.8911758709724538, iteration: 230154
loss: 0.992815375328064,grad_norm: 0.9812158231844225, iteration: 230155
loss: 0.9974122047424316,grad_norm: 0.7662077061696043, iteration: 230156
loss: 0.988037645816803,grad_norm: 0.9999990834689283, iteration: 230157
loss: 1.0358017683029175,grad_norm: 0.9519473669949199, iteration: 230158
loss: 1.0097224712371826,grad_norm: 0.999999227921756, iteration: 230159
loss: 1.0091572999954224,grad_norm: 0.8515481233711937, iteration: 230160
loss: 0.9997076392173767,grad_norm: 0.8293135778363894, iteration: 230161
loss: 0.9820727705955505,grad_norm: 0.807597834973418, iteration: 230162
loss: 1.015791893005371,grad_norm: 0.9999990493864337, iteration: 230163
loss: 0.9689209461212158,grad_norm: 0.8101957674142298, iteration: 230164
loss: 0.9949124455451965,grad_norm: 0.9999992016943352, iteration: 230165
loss: 1.0195554494857788,grad_norm: 0.7705011302673976, iteration: 230166
loss: 1.0188148021697998,grad_norm: 0.8810103494241709, iteration: 230167
loss: 0.9856877326965332,grad_norm: 0.8038777049824568, iteration: 230168
loss: 1.012960433959961,grad_norm: 0.9999991943308978, iteration: 230169
loss: 1.015402913093567,grad_norm: 0.9999990744437441, iteration: 230170
loss: 0.9845538139343262,grad_norm: 0.8571861588183737, iteration: 230171
loss: 1.0028471946716309,grad_norm: 0.9999991630449844, iteration: 230172
loss: 0.9924400448799133,grad_norm: 0.8969981767081567, iteration: 230173
loss: 0.9659121632575989,grad_norm: 0.999999174717211, iteration: 230174
loss: 1.014968752861023,grad_norm: 0.9619857277741855, iteration: 230175
loss: 0.9697014689445496,grad_norm: 0.9999991275113385, iteration: 230176
loss: 0.9961079955101013,grad_norm: 0.9788967804310219, iteration: 230177
loss: 0.9605035781860352,grad_norm: 0.9563489875461774, iteration: 230178
loss: 1.0239207744598389,grad_norm: 0.9999991235288492, iteration: 230179
loss: 0.9899033308029175,grad_norm: 0.9550199689141579, iteration: 230180
loss: 0.9909749031066895,grad_norm: 0.8969021957687948, iteration: 230181
loss: 0.981721043586731,grad_norm: 0.9508949470187886, iteration: 230182
loss: 0.9853624105453491,grad_norm: 0.999999081877674, iteration: 230183
loss: 0.9849117398262024,grad_norm: 0.7307458703780575, iteration: 230184
loss: 0.9873219132423401,grad_norm: 0.771708501842832, iteration: 230185
loss: 0.9553576111793518,grad_norm: 0.9021913770459569, iteration: 230186
loss: 0.9836392402648926,grad_norm: 0.9402569978171937, iteration: 230187
loss: 1.0148392915725708,grad_norm: 0.9198774192343852, iteration: 230188
loss: 1.0060864686965942,grad_norm: 0.9372187760855647, iteration: 230189
loss: 1.0514769554138184,grad_norm: 0.9999993212887385, iteration: 230190
loss: 0.9906237125396729,grad_norm: 0.790768609489007, iteration: 230191
loss: 0.989511251449585,grad_norm: 0.797541698641037, iteration: 230192
loss: 1.007550597190857,grad_norm: 0.9999992447149731, iteration: 230193
loss: 0.9845880270004272,grad_norm: 0.9333047755921438, iteration: 230194
loss: 1.0249121189117432,grad_norm: 0.8929875506117481, iteration: 230195
loss: 1.0042730569839478,grad_norm: 0.9999990869335207, iteration: 230196
loss: 0.9721420407295227,grad_norm: 0.8976658056730662, iteration: 230197
loss: 0.9899548888206482,grad_norm: 0.9940500642887257, iteration: 230198
loss: 1.021271824836731,grad_norm: 0.9999993975240898, iteration: 230199
loss: 1.0181868076324463,grad_norm: 0.9999991833431183, iteration: 230200
loss: 0.9965382218360901,grad_norm: 0.9999991779290627, iteration: 230201
loss: 1.0464760065078735,grad_norm: 0.9999992601719181, iteration: 230202
loss: 1.0158942937850952,grad_norm: 0.801094516706823, iteration: 230203
loss: 1.0346593856811523,grad_norm: 0.8296906189519724, iteration: 230204
loss: 0.998715341091156,grad_norm: 0.9197126225395986, iteration: 230205
loss: 1.004807949066162,grad_norm: 0.909611569953245, iteration: 230206
loss: 0.965288519859314,grad_norm: 0.7798573669210653, iteration: 230207
loss: 0.9844503402709961,grad_norm: 0.9862154398214794, iteration: 230208
loss: 1.0221967697143555,grad_norm: 0.8529623705857536, iteration: 230209
loss: 0.9797580242156982,grad_norm: 0.8688634763316918, iteration: 230210
loss: 0.9936590194702148,grad_norm: 0.8677629367893764, iteration: 230211
loss: 0.9934205412864685,grad_norm: 0.8759753507792235, iteration: 230212
loss: 0.9993699789047241,grad_norm: 0.9999991439518413, iteration: 230213
loss: 0.994550347328186,grad_norm: 0.8876469540135951, iteration: 230214
loss: 0.9826266169548035,grad_norm: 0.99999899953069, iteration: 230215
loss: 1.0145012140274048,grad_norm: 0.8583874856102388, iteration: 230216
loss: 0.998210608959198,grad_norm: 0.7705049041483426, iteration: 230217
loss: 1.0864397287368774,grad_norm: 0.9999997049019796, iteration: 230218
loss: 0.9802626371383667,grad_norm: 0.9716236770147259, iteration: 230219
loss: 1.0005486011505127,grad_norm: 0.9304948425024844, iteration: 230220
loss: 0.9947451949119568,grad_norm: 0.99999909048877, iteration: 230221
loss: 1.0240051746368408,grad_norm: 1.0000000066766466, iteration: 230222
loss: 1.0026859045028687,grad_norm: 0.9399223134169481, iteration: 230223
loss: 0.9934689998626709,grad_norm: 0.835314051235319, iteration: 230224
loss: 0.9891471266746521,grad_norm: 0.8525342285320222, iteration: 230225
loss: 1.0037202835083008,grad_norm: 0.8070149521670056, iteration: 230226
loss: 1.0375251770019531,grad_norm: 0.9999990799756091, iteration: 230227
loss: 0.9590888619422913,grad_norm: 0.8919211858837419, iteration: 230228
loss: 1.0074046850204468,grad_norm: 0.9999995576062843, iteration: 230229
loss: 0.9855546951293945,grad_norm: 0.9999992171061576, iteration: 230230
loss: 0.9654232263565063,grad_norm: 0.9981113910116632, iteration: 230231
loss: 0.9771001935005188,grad_norm: 0.8895611389661221, iteration: 230232
loss: 0.9944196343421936,grad_norm: 0.9634428062269341, iteration: 230233
loss: 0.9770587682723999,grad_norm: 0.9999991510939602, iteration: 230234
loss: 0.9742704033851624,grad_norm: 0.8306715168674287, iteration: 230235
loss: 0.990279495716095,grad_norm: 0.8317178016359729, iteration: 230236
loss: 1.0130174160003662,grad_norm: 0.9999989231851061, iteration: 230237
loss: 1.0277040004730225,grad_norm: 0.979000717991151, iteration: 230238
loss: 0.9828001260757446,grad_norm: 0.9906944460849944, iteration: 230239
loss: 1.0274962186813354,grad_norm: 0.9577059357246339, iteration: 230240
loss: 1.0358120203018188,grad_norm: 0.9999990247958688, iteration: 230241
loss: 1.0220005512237549,grad_norm: 0.9569437895996266, iteration: 230242
loss: 0.9958953857421875,grad_norm: 0.8133917409876636, iteration: 230243
loss: 1.0186573266983032,grad_norm: 0.9999991364065423, iteration: 230244
loss: 1.0100371837615967,grad_norm: 0.8380079063019867, iteration: 230245
loss: 1.0178221464157104,grad_norm: 0.8466751991872488, iteration: 230246
loss: 1.0146887302398682,grad_norm: 0.928542059714651, iteration: 230247
loss: 0.9827495813369751,grad_norm: 0.892777823881084, iteration: 230248
loss: 1.0530837774276733,grad_norm: 0.9999990374868207, iteration: 230249
loss: 0.9991504549980164,grad_norm: 0.9999991638614634, iteration: 230250
loss: 1.0063234567642212,grad_norm: 0.803282594897236, iteration: 230251
loss: 0.9837027788162231,grad_norm: 0.9999991211015669, iteration: 230252
loss: 0.9883987307548523,grad_norm: 0.885108489667535, iteration: 230253
loss: 0.9645164012908936,grad_norm: 0.7037199256752987, iteration: 230254
loss: 0.98811936378479,grad_norm: 0.9663761153694216, iteration: 230255
loss: 1.000325322151184,grad_norm: 0.8845106867682566, iteration: 230256
loss: 1.0043113231658936,grad_norm: 0.9999991813947062, iteration: 230257
loss: 0.998946487903595,grad_norm: 0.9999991282964152, iteration: 230258
loss: 1.0010734796524048,grad_norm: 0.7866419419756469, iteration: 230259
loss: 1.0152662992477417,grad_norm: 0.8923024559421726, iteration: 230260
loss: 0.9783458113670349,grad_norm: 0.7287516159378727, iteration: 230261
loss: 1.0458056926727295,grad_norm: 0.8218921407917069, iteration: 230262
loss: 1.044505000114441,grad_norm: 0.9999990114884209, iteration: 230263
loss: 0.9844563603401184,grad_norm: 0.9999991688737838, iteration: 230264
loss: 0.9855587482452393,grad_norm: 0.864736813110443, iteration: 230265
loss: 0.9831439256668091,grad_norm: 0.9605400128734402, iteration: 230266
loss: 1.037699580192566,grad_norm: 0.9999989735973159, iteration: 230267
loss: 0.9868017435073853,grad_norm: 0.8760240999369439, iteration: 230268
loss: 1.0143946409225464,grad_norm: 0.8862419191709114, iteration: 230269
loss: 0.9715801477432251,grad_norm: 0.9302004669494608, iteration: 230270
loss: 1.0260863304138184,grad_norm: 0.999999803840411, iteration: 230271
loss: 0.9999613165855408,grad_norm: 0.9999991318069683, iteration: 230272
loss: 1.0224131345748901,grad_norm: 0.9999995528459795, iteration: 230273
loss: 0.9773527383804321,grad_norm: 0.8776707372414684, iteration: 230274
loss: 0.9829011559486389,grad_norm: 0.9509703481182102, iteration: 230275
loss: 0.9982035160064697,grad_norm: 0.9999991429364258, iteration: 230276
loss: 0.9985648989677429,grad_norm: 0.8752278421891422, iteration: 230277
loss: 0.9948258399963379,grad_norm: 0.8883882237057614, iteration: 230278
loss: 0.9945032000541687,grad_norm: 0.8991322718116591, iteration: 230279
loss: 1.0646662712097168,grad_norm: 0.9999991509610492, iteration: 230280
loss: 0.9915542006492615,grad_norm: 0.8747261275430323, iteration: 230281
loss: 0.9837775826454163,grad_norm: 0.8517679613634688, iteration: 230282
loss: 0.981465220451355,grad_norm: 0.8893204918041719, iteration: 230283
loss: 1.0134484767913818,grad_norm: 0.9491378619280757, iteration: 230284
loss: 1.0049660205841064,grad_norm: 0.9999992469991359, iteration: 230285
loss: 1.0045156478881836,grad_norm: 0.8736904138490741, iteration: 230286
loss: 0.9940417408943176,grad_norm: 0.8406574785373426, iteration: 230287
loss: 1.021358847618103,grad_norm: 0.9999993063305442, iteration: 230288
loss: 1.0142656564712524,grad_norm: 0.999999261034784, iteration: 230289
loss: 0.9775294661521912,grad_norm: 0.8184654160227414, iteration: 230290
loss: 0.9866125583648682,grad_norm: 0.8977097018678288, iteration: 230291
loss: 1.0048259496688843,grad_norm: 0.8527524057512937, iteration: 230292
loss: 1.0224002599716187,grad_norm: 0.7325409837324823, iteration: 230293
loss: 1.0044318437576294,grad_norm: 0.9999991216645818, iteration: 230294
loss: 1.0209821462631226,grad_norm: 0.9999990712912227, iteration: 230295
loss: 1.00697922706604,grad_norm: 0.923109124749183, iteration: 230296
loss: 1.008052110671997,grad_norm: 0.9999991419601381, iteration: 230297
loss: 1.0706790685653687,grad_norm: 0.9152234067473122, iteration: 230298
loss: 1.0159755945205688,grad_norm: 0.9530973092954126, iteration: 230299
loss: 1.0004346370697021,grad_norm: 0.9999991072277035, iteration: 230300
loss: 0.9698200225830078,grad_norm: 0.8424881764878649, iteration: 230301
loss: 0.9513209462165833,grad_norm: 0.9999993199340018, iteration: 230302
loss: 0.9793812036514282,grad_norm: 0.9884217708932138, iteration: 230303
loss: 1.0006996393203735,grad_norm: 0.8251878685206111, iteration: 230304
loss: 1.0213744640350342,grad_norm: 0.9375512554899145, iteration: 230305
loss: 1.0121315717697144,grad_norm: 0.8876573616184948, iteration: 230306
loss: 0.9683313369750977,grad_norm: 0.8190655343583286, iteration: 230307
loss: 1.009700894355774,grad_norm: 0.9999990324670964, iteration: 230308
loss: 0.9791061878204346,grad_norm: 0.9318573948057083, iteration: 230309
loss: 1.020605444908142,grad_norm: 0.8716786328294164, iteration: 230310
loss: 1.0016989707946777,grad_norm: 0.9677844281680887, iteration: 230311
loss: 1.0282537937164307,grad_norm: 0.8970702215828723, iteration: 230312
loss: 0.9898329973220825,grad_norm: 0.906099179038899, iteration: 230313
loss: 1.0159465074539185,grad_norm: 0.9597224024495001, iteration: 230314
loss: 0.9776833057403564,grad_norm: 0.9314824725530081, iteration: 230315
loss: 0.9983421564102173,grad_norm: 0.9999991311037688, iteration: 230316
loss: 0.9727463722229004,grad_norm: 0.999999068307727, iteration: 230317
loss: 1.0146335363388062,grad_norm: 0.9620182621035206, iteration: 230318
loss: 1.0168038606643677,grad_norm: 0.7991520570274677, iteration: 230319
loss: 0.9838674664497375,grad_norm: 0.9529852873073765, iteration: 230320
loss: 0.9854806661605835,grad_norm: 0.9878719601988607, iteration: 230321
loss: 1.0251680612564087,grad_norm: 0.810247296744303, iteration: 230322
loss: 1.003431797027588,grad_norm: 0.9542289986534679, iteration: 230323
loss: 0.9929584264755249,grad_norm: 0.9948061404152989, iteration: 230324
loss: 1.008136510848999,grad_norm: 0.9599522244850097, iteration: 230325
loss: 0.9972095489501953,grad_norm: 0.9408978026653926, iteration: 230326
loss: 1.0252875089645386,grad_norm: 0.7543227454128703, iteration: 230327
loss: 1.0051891803741455,grad_norm: 0.8381599666337723, iteration: 230328
loss: 0.9417793154716492,grad_norm: 0.9258069108549487, iteration: 230329
loss: 0.9994211792945862,grad_norm: 0.9999993416423051, iteration: 230330
loss: 0.9783034920692444,grad_norm: 0.9108105915485192, iteration: 230331
loss: 1.0077824592590332,grad_norm: 0.9090848434611419, iteration: 230332
loss: 1.0117628574371338,grad_norm: 0.9522565320147692, iteration: 230333
loss: 1.029643177986145,grad_norm: 0.8108040572169385, iteration: 230334
loss: 1.05237877368927,grad_norm: 0.9310500283051564, iteration: 230335
loss: 0.9974635243415833,grad_norm: 0.999998989271978, iteration: 230336
loss: 1.007418155670166,grad_norm: 0.9421152817290933, iteration: 230337
loss: 1.0042600631713867,grad_norm: 0.999999091035235, iteration: 230338
loss: 0.9990300536155701,grad_norm: 0.9653002489301131, iteration: 230339
loss: 1.0277259349822998,grad_norm: 0.9999991985281784, iteration: 230340
loss: 1.018646240234375,grad_norm: 0.9515939626349283, iteration: 230341
loss: 0.9857997894287109,grad_norm: 0.9999990203931963, iteration: 230342
loss: 0.9671587944030762,grad_norm: 0.9999998961693738, iteration: 230343
loss: 1.0028990507125854,grad_norm: 0.9999990999478519, iteration: 230344
loss: 1.002305507659912,grad_norm: 0.7095341965751859, iteration: 230345
loss: 1.0341697931289673,grad_norm: 0.8635535586318317, iteration: 230346
loss: 0.9780004620552063,grad_norm: 0.9387973540766367, iteration: 230347
loss: 0.9960871338844299,grad_norm: 0.9999990548901202, iteration: 230348
loss: 0.9850788712501526,grad_norm: 0.8988240408245385, iteration: 230349
loss: 1.0050145387649536,grad_norm: 0.9591355119929776, iteration: 230350
loss: 1.0046557188034058,grad_norm: 0.8897651592742153, iteration: 230351
loss: 1.0053882598876953,grad_norm: 0.9999992658613028, iteration: 230352
loss: 1.0175551176071167,grad_norm: 0.8649081514762712, iteration: 230353
loss: 0.9785510301589966,grad_norm: 0.9371514662120051, iteration: 230354
loss: 0.9913451075553894,grad_norm: 0.8342492370240826, iteration: 230355
loss: 1.0198581218719482,grad_norm: 0.9999992133348992, iteration: 230356
loss: 1.0286259651184082,grad_norm: 0.9999990369029017, iteration: 230357
loss: 0.9914824366569519,grad_norm: 0.9306601098809805, iteration: 230358
loss: 0.9809131622314453,grad_norm: 0.903157226130581, iteration: 230359
loss: 0.9965261816978455,grad_norm: 0.7848575154922306, iteration: 230360
loss: 1.0281065702438354,grad_norm: 0.9999990874457091, iteration: 230361
loss: 0.9988329410552979,grad_norm: 0.9960667665648356, iteration: 230362
loss: 1.0055246353149414,grad_norm: 0.9709370622403442, iteration: 230363
loss: 0.9873998165130615,grad_norm: 0.8604752025327574, iteration: 230364
loss: 1.012601375579834,grad_norm: 0.8293612629876985, iteration: 230365
loss: 1.0174641609191895,grad_norm: 0.9999990873684511, iteration: 230366
loss: 1.0164618492126465,grad_norm: 0.9419872173150986, iteration: 230367
loss: 1.0297865867614746,grad_norm: 0.9290214710726523, iteration: 230368
loss: 1.0036089420318604,grad_norm: 0.9999997922618531, iteration: 230369
loss: 1.0188325643539429,grad_norm: 0.9999992206356798, iteration: 230370
loss: 0.9831461906433105,grad_norm: 0.8712375104382898, iteration: 230371
loss: 0.9373550415039062,grad_norm: 0.9728457380620044, iteration: 230372
loss: 0.9782418608665466,grad_norm: 0.9574768787292164, iteration: 230373
loss: 0.9851078391075134,grad_norm: 0.8808730957646691, iteration: 230374
loss: 1.0146220922470093,grad_norm: 0.9032109711781987, iteration: 230375
loss: 1.021876335144043,grad_norm: 0.9311932906058337, iteration: 230376
loss: 1.0082060098648071,grad_norm: 0.8149608166191633, iteration: 230377
loss: 1.0311212539672852,grad_norm: 0.9806302084940041, iteration: 230378
loss: 0.9948229193687439,grad_norm: 0.9999990616036988, iteration: 230379
loss: 0.9910522699356079,grad_norm: 0.9099729316073711, iteration: 230380
loss: 1.0150939226150513,grad_norm: 0.8835860014834191, iteration: 230381
loss: 0.9927306175231934,grad_norm: 0.8854404004640392, iteration: 230382
loss: 0.983805239200592,grad_norm: 0.9999989841454955, iteration: 230383
loss: 0.994717538356781,grad_norm: 0.7645266319648164, iteration: 230384
loss: 1.025803565979004,grad_norm: 0.7337806072012597, iteration: 230385
loss: 0.998904287815094,grad_norm: 0.9999994690774061, iteration: 230386
loss: 0.9783172607421875,grad_norm: 0.8497786539039254, iteration: 230387
loss: 0.9862527847290039,grad_norm: 0.7635766567138361, iteration: 230388
loss: 0.9859596490859985,grad_norm: 0.8432684878853263, iteration: 230389
loss: 0.9957929849624634,grad_norm: 0.9574030292082375, iteration: 230390
loss: 0.9794104695320129,grad_norm: 0.7994553389291199, iteration: 230391
loss: 1.021398663520813,grad_norm: 0.8955955499753555, iteration: 230392
loss: 1.0215811729431152,grad_norm: 0.8743283492898762, iteration: 230393
loss: 0.9719468355178833,grad_norm: 0.9999992482112584, iteration: 230394
loss: 1.0296883583068848,grad_norm: 0.7568880527496209, iteration: 230395
loss: 0.9905593395233154,grad_norm: 0.9999990462125434, iteration: 230396
loss: 1.0399831533432007,grad_norm: 0.9999991275238532, iteration: 230397
loss: 0.9952853322029114,grad_norm: 0.9226661696183044, iteration: 230398
loss: 0.9786967039108276,grad_norm: 0.9999995097892843, iteration: 230399
loss: 1.007500410079956,grad_norm: 0.9999991816413941, iteration: 230400
loss: 0.9545196890830994,grad_norm: 0.9999991296539948, iteration: 230401
loss: 1.1245079040527344,grad_norm: 0.9999996094149856, iteration: 230402
loss: 0.9839490652084351,grad_norm: 0.9392379869238341, iteration: 230403
loss: 0.9768965244293213,grad_norm: 0.8756157508332103, iteration: 230404
loss: 0.984411895275116,grad_norm: 0.8153355566768464, iteration: 230405
loss: 0.9786376953125,grad_norm: 0.9593620513077337, iteration: 230406
loss: 0.9680426120758057,grad_norm: 0.999999094777971, iteration: 230407
loss: 1.0954402685165405,grad_norm: 0.9999998524933434, iteration: 230408
loss: 0.9989277720451355,grad_norm: 0.9999991994450608, iteration: 230409
loss: 1.0204312801361084,grad_norm: 0.9999992052063641, iteration: 230410
loss: 1.0141351222991943,grad_norm: 0.9999990800884158, iteration: 230411
loss: 0.9699981808662415,grad_norm: 0.9237382919391242, iteration: 230412
loss: 1.0137503147125244,grad_norm: 0.8599807466189923, iteration: 230413
loss: 0.9736754298210144,grad_norm: 0.9662970563783017, iteration: 230414
loss: 1.0555862188339233,grad_norm: 0.99999912675683, iteration: 230415
loss: 0.991919994354248,grad_norm: 0.8831530831395493, iteration: 230416
loss: 0.9954516887664795,grad_norm: 0.8596276358418433, iteration: 230417
loss: 1.0181419849395752,grad_norm: 0.8905502756974791, iteration: 230418
loss: 1.0240529775619507,grad_norm: 0.655482689437655, iteration: 230419
loss: 0.9776555299758911,grad_norm: 0.8167643999624846, iteration: 230420
loss: 0.9999707937240601,grad_norm: 0.8157935760086581, iteration: 230421
loss: 1.0451141595840454,grad_norm: 0.9999992136393633, iteration: 230422
loss: 0.9563999176025391,grad_norm: 0.8950734223280709, iteration: 230423
loss: 0.994247317314148,grad_norm: 0.9999991236795835, iteration: 230424
loss: 0.9869033694267273,grad_norm: 0.9999990392975073, iteration: 230425
loss: 0.9656569361686707,grad_norm: 0.9915540390735728, iteration: 230426
loss: 1.0169261693954468,grad_norm: 0.9999989765069814, iteration: 230427
loss: 0.994209349155426,grad_norm: 0.910105840827921, iteration: 230428
loss: 0.9766387939453125,grad_norm: 0.8893510061967271, iteration: 230429
loss: 1.0186830759048462,grad_norm: 0.782556783963226, iteration: 230430
loss: 1.0099620819091797,grad_norm: 0.9019690945688434, iteration: 230431
loss: 1.0108286142349243,grad_norm: 0.8149008459357416, iteration: 230432
loss: 0.988675594329834,grad_norm: 0.8023908687493788, iteration: 230433
loss: 1.040124535560608,grad_norm: 0.9490079699987323, iteration: 230434
loss: 1.022553563117981,grad_norm: 0.9174014527356932, iteration: 230435
loss: 0.9875674247741699,grad_norm: 0.8276867767470214, iteration: 230436
loss: 0.9979183077812195,grad_norm: 0.9999997122028703, iteration: 230437
loss: 0.9614614248275757,grad_norm: 0.8093506766551525, iteration: 230438
loss: 1.048869252204895,grad_norm: 0.8838165237122957, iteration: 230439
loss: 1.00005042552948,grad_norm: 0.9106953135466438, iteration: 230440
loss: 1.0158175230026245,grad_norm: 0.9735628332997556, iteration: 230441
loss: 0.9599306583404541,grad_norm: 0.9999990575989902, iteration: 230442
loss: 1.0485719442367554,grad_norm: 0.9999990957464876, iteration: 230443
loss: 0.977351725101471,grad_norm: 1.0000000567705754, iteration: 230444
loss: 0.9834693074226379,grad_norm: 0.6775135769556596, iteration: 230445
loss: 0.9839581847190857,grad_norm: 0.8312845543309028, iteration: 230446
loss: 0.997239351272583,grad_norm: 0.9218594577741444, iteration: 230447
loss: 1.0110951662063599,grad_norm: 0.9069178416892149, iteration: 230448
loss: 1.0416737794876099,grad_norm: 0.9999989735294779, iteration: 230449
loss: 1.0217502117156982,grad_norm: 0.976345926498995, iteration: 230450
loss: 0.9778103232383728,grad_norm: 0.9999991313424293, iteration: 230451
loss: 0.9913100600242615,grad_norm: 0.8047342508221185, iteration: 230452
loss: 1.0143965482711792,grad_norm: 0.9999990426953043, iteration: 230453
loss: 0.9811097979545593,grad_norm: 0.8462473238419201, iteration: 230454
loss: 1.0090265274047852,grad_norm: 0.9999991011360159, iteration: 230455
loss: 1.0032799243927002,grad_norm: 0.9999990780463464, iteration: 230456
loss: 1.028133749961853,grad_norm: 0.8049662104545633, iteration: 230457
loss: 1.0270469188690186,grad_norm: 0.9999991969189785, iteration: 230458
loss: 1.0164213180541992,grad_norm: 0.8610713306270802, iteration: 230459
loss: 0.9684882760047913,grad_norm: 0.8153806498655618, iteration: 230460
loss: 1.0348886251449585,grad_norm: 0.9999992219496329, iteration: 230461
loss: 1.031489372253418,grad_norm: 0.9999991665178982, iteration: 230462
loss: 0.9808229804039001,grad_norm: 0.9999991821329482, iteration: 230463
loss: 1.023003101348877,grad_norm: 0.9161026189454068, iteration: 230464
loss: 1.0084984302520752,grad_norm: 0.8570535111907949, iteration: 230465
loss: 1.0108171701431274,grad_norm: 0.9999990720617961, iteration: 230466
loss: 0.9861283302307129,grad_norm: 0.7993200569084882, iteration: 230467
loss: 0.9813575744628906,grad_norm: 0.9999990186156804, iteration: 230468
loss: 0.9998490810394287,grad_norm: 0.9153315174270201, iteration: 230469
loss: 1.0086582899093628,grad_norm: 0.9999996217463477, iteration: 230470
loss: 0.9727514386177063,grad_norm: 0.9999991178048642, iteration: 230471
loss: 0.967484176158905,grad_norm: 0.9276946539867653, iteration: 230472
loss: 1.0051809549331665,grad_norm: 0.9030697823462059, iteration: 230473
loss: 0.9901460409164429,grad_norm: 0.8194207324744814, iteration: 230474
loss: 1.004542350769043,grad_norm: 0.845922059373387, iteration: 230475
loss: 1.039649486541748,grad_norm: 0.805383037238681, iteration: 230476
loss: 1.0493113994598389,grad_norm: 0.999999245248065, iteration: 230477
loss: 1.0270271301269531,grad_norm: 0.9357652329307348, iteration: 230478
loss: 0.9602024555206299,grad_norm: 0.9999990805565712, iteration: 230479
loss: 0.9922128915786743,grad_norm: 0.8603317554629971, iteration: 230480
loss: 0.950791597366333,grad_norm: 0.9999990418917157, iteration: 230481
loss: 1.041466474533081,grad_norm: 0.999999786312267, iteration: 230482
loss: 1.0516901016235352,grad_norm: 0.9999995924386457, iteration: 230483
loss: 1.0087257623672485,grad_norm: 0.8473596817137315, iteration: 230484
loss: 1.025113821029663,grad_norm: 0.8848925871380828, iteration: 230485
loss: 1.0005576610565186,grad_norm: 0.9212923808821748, iteration: 230486
loss: 0.985275149345398,grad_norm: 0.9999991649829382, iteration: 230487
loss: 0.9921574592590332,grad_norm: 0.9099035103266971, iteration: 230488
loss: 0.9697481989860535,grad_norm: 0.8579261282957293, iteration: 230489
loss: 1.0074299573898315,grad_norm: 0.9340845036341439, iteration: 230490
loss: 1.0763152837753296,grad_norm: 0.9995852833197605, iteration: 230491
loss: 1.0292497873306274,grad_norm: 0.8502599252226539, iteration: 230492
loss: 0.9887591600418091,grad_norm: 0.9784782948534848, iteration: 230493
loss: 0.9778614044189453,grad_norm: 0.8969369707303786, iteration: 230494
loss: 0.9988530874252319,grad_norm: 0.9635215165870663, iteration: 230495
loss: 0.9891181588172913,grad_norm: 0.8093513765986796, iteration: 230496
loss: 1.062369704246521,grad_norm: 0.9999991922899081, iteration: 230497
loss: 1.0221424102783203,grad_norm: 0.9999991420097619, iteration: 230498
loss: 1.0287972688674927,grad_norm: 0.8786392036066878, iteration: 230499
loss: 0.9961711764335632,grad_norm: 0.8551892220557197, iteration: 230500
loss: 1.0057603120803833,grad_norm: 0.9601820830111355, iteration: 230501
loss: 0.9850802421569824,grad_norm: 0.9148979853385591, iteration: 230502
loss: 0.9718508124351501,grad_norm: 0.9999991689289859, iteration: 230503
loss: 1.001482605934143,grad_norm: 0.9056455648385996, iteration: 230504
loss: 0.9939526319503784,grad_norm: 0.8938434044536276, iteration: 230505
loss: 0.9694580435752869,grad_norm: 0.9176861129660564, iteration: 230506
loss: 1.0085058212280273,grad_norm: 0.7844527290359162, iteration: 230507
loss: 1.0239226818084717,grad_norm: 0.895736231679571, iteration: 230508
loss: 0.9692848920822144,grad_norm: 0.9619300913237262, iteration: 230509
loss: 1.0213444232940674,grad_norm: 0.9999992197849347, iteration: 230510
loss: 0.9824061393737793,grad_norm: 0.7155284023851161, iteration: 230511
loss: 1.0057755708694458,grad_norm: 0.8087750647020671, iteration: 230512
loss: 0.9737085103988647,grad_norm: 0.8825131569419018, iteration: 230513
loss: 1.013210654258728,grad_norm: 0.8561756864924545, iteration: 230514
loss: 1.034470796585083,grad_norm: 0.9999995685399838, iteration: 230515
loss: 1.0420852899551392,grad_norm: 0.9480392098285346, iteration: 230516
loss: 0.9991307258605957,grad_norm: 0.76516145454911, iteration: 230517
loss: 1.0099263191223145,grad_norm: 0.9999990546235109, iteration: 230518
loss: 1.0263615846633911,grad_norm: 0.9999992247176777, iteration: 230519
loss: 0.9945059418678284,grad_norm: 0.9625644383905408, iteration: 230520
loss: 0.996605634689331,grad_norm: 0.8619679243780821, iteration: 230521
loss: 0.9902259707450867,grad_norm: 0.9999991291880393, iteration: 230522
loss: 1.0193614959716797,grad_norm: 0.999999037239804, iteration: 230523
loss: 1.013985514640808,grad_norm: 0.7982242030477597, iteration: 230524
loss: 1.0069472789764404,grad_norm: 0.7679539254035435, iteration: 230525
loss: 0.9616738557815552,grad_norm: 0.9739925110064129, iteration: 230526
loss: 1.0032609701156616,grad_norm: 0.8683312598764793, iteration: 230527
loss: 1.0152174234390259,grad_norm: 0.9572458903981285, iteration: 230528
loss: 1.0237046480178833,grad_norm: 0.9822100937010056, iteration: 230529
loss: 1.010188102722168,grad_norm: 0.9999991619780272, iteration: 230530
loss: 1.0306422710418701,grad_norm: 0.9999990874420139, iteration: 230531
loss: 1.0065144300460815,grad_norm: 0.9999989677968996, iteration: 230532
loss: 1.0231376886367798,grad_norm: 0.9111154549852336, iteration: 230533
loss: 1.0174955129623413,grad_norm: 0.9282409150171054, iteration: 230534
loss: 1.0157986879348755,grad_norm: 0.999999126258824, iteration: 230535
loss: 0.9956687688827515,grad_norm: 0.999999199554458, iteration: 230536
loss: 1.0385196208953857,grad_norm: 0.9419758459215144, iteration: 230537
loss: 1.0079426765441895,grad_norm: 0.9647854785463776, iteration: 230538
loss: 0.9985427260398865,grad_norm: 0.8991349814867567, iteration: 230539
loss: 0.9963037371635437,grad_norm: 0.9862077345779048, iteration: 230540
loss: 1.006853699684143,grad_norm: 0.9962445302857043, iteration: 230541
loss: 1.02456533908844,grad_norm: 0.8880118802784378, iteration: 230542
loss: 1.050270676612854,grad_norm: 0.9497216041818655, iteration: 230543
loss: 0.9773049354553223,grad_norm: 0.9270056389314757, iteration: 230544
loss: 1.0073373317718506,grad_norm: 0.9999990220778288, iteration: 230545
loss: 0.9721809029579163,grad_norm: 0.8983849785665416, iteration: 230546
loss: 0.9726253151893616,grad_norm: 0.9519767760502296, iteration: 230547
loss: 1.0149747133255005,grad_norm: 0.8186384052769544, iteration: 230548
loss: 0.9910330176353455,grad_norm: 0.9006146477366792, iteration: 230549
loss: 1.0171440839767456,grad_norm: 0.9999990768066267, iteration: 230550
loss: 1.0780401229858398,grad_norm: 0.9999989989597273, iteration: 230551
loss: 0.9976016879081726,grad_norm: 0.8693931456579406, iteration: 230552
loss: 1.0053709745407104,grad_norm: 0.9767770135305116, iteration: 230553
loss: 0.9992592930793762,grad_norm: 0.9999992302273503, iteration: 230554
loss: 0.994820237159729,grad_norm: 0.9999989502613658, iteration: 230555
loss: 0.9869333505630493,grad_norm: 0.9999990320289716, iteration: 230556
loss: 0.9688796401023865,grad_norm: 0.8566111291077876, iteration: 230557
loss: 1.0256527662277222,grad_norm: 0.9325178318537143, iteration: 230558
loss: 1.029967188835144,grad_norm: 0.8808278568890386, iteration: 230559
loss: 1.0147154331207275,grad_norm: 0.9739795490853682, iteration: 230560
loss: 0.9786770343780518,grad_norm: 0.9999990448917935, iteration: 230561
loss: 1.0038659572601318,grad_norm: 0.9585092470548727, iteration: 230562
loss: 0.998941957950592,grad_norm: 0.8321472556665382, iteration: 230563
loss: 1.0061167478561401,grad_norm: 0.9999994450528092, iteration: 230564
loss: 1.057491421699524,grad_norm: 0.9999994649353205, iteration: 230565
loss: 1.0357962846755981,grad_norm: 0.9999991901399377, iteration: 230566
loss: 1.0154978036880493,grad_norm: 0.9586633857637978, iteration: 230567
loss: 1.001875638961792,grad_norm: 0.9845203216188952, iteration: 230568
loss: 0.9956864714622498,grad_norm: 0.999999062464684, iteration: 230569
loss: 1.000938057899475,grad_norm: 0.9614877976402783, iteration: 230570
loss: 0.975138783454895,grad_norm: 0.7578625545999971, iteration: 230571
loss: 0.9988628029823303,grad_norm: 0.842042741189667, iteration: 230572
loss: 1.0125956535339355,grad_norm: 0.8775625889606935, iteration: 230573
loss: 0.962123453617096,grad_norm: 0.8529232869796739, iteration: 230574
loss: 0.9829167127609253,grad_norm: 0.9381005601262363, iteration: 230575
loss: 0.9988261461257935,grad_norm: 0.8905798539305396, iteration: 230576
loss: 1.0202730894088745,grad_norm: 0.9999991088456656, iteration: 230577
loss: 0.9626758098602295,grad_norm: 0.8243680221101102, iteration: 230578
loss: 1.0158761739730835,grad_norm: 0.9997666191515001, iteration: 230579
loss: 1.0132206678390503,grad_norm: 0.7362069857503737, iteration: 230580
loss: 1.0499895811080933,grad_norm: 0.9999992123122412, iteration: 230581
loss: 0.999357283115387,grad_norm: 0.8101956401135458, iteration: 230582
loss: 1.0460681915283203,grad_norm: 0.974671980022713, iteration: 230583
loss: 0.9833589196205139,grad_norm: 0.8645630160693031, iteration: 230584
loss: 0.9767904877662659,grad_norm: 0.9999993240917491, iteration: 230585
loss: 0.9923292398452759,grad_norm: 0.9176872581575035, iteration: 230586
loss: 1.003332495689392,grad_norm: 0.944539181934655, iteration: 230587
loss: 1.0215712785720825,grad_norm: 0.9397695788269174, iteration: 230588
loss: 0.9747070074081421,grad_norm: 0.9999990069894488, iteration: 230589
loss: 0.9651744365692139,grad_norm: 0.8192851320437848, iteration: 230590
loss: 1.0218087434768677,grad_norm: 0.8780480175753888, iteration: 230591
loss: 1.034448504447937,grad_norm: 0.9862231781414509, iteration: 230592
loss: 0.9970470666885376,grad_norm: 0.9534096357355825, iteration: 230593
loss: 0.9922329783439636,grad_norm: 0.9334862918430612, iteration: 230594
loss: 0.9873841404914856,grad_norm: 0.9999989095076856, iteration: 230595
loss: 1.0326975584030151,grad_norm: 0.9554152454542083, iteration: 230596
loss: 1.0228074789047241,grad_norm: 0.8860245352439895, iteration: 230597
loss: 1.0039998292922974,grad_norm: 0.8728340201815968, iteration: 230598
loss: 1.0244758129119873,grad_norm: 0.88616071461669, iteration: 230599
loss: 1.021188735961914,grad_norm: 0.8441558696939206, iteration: 230600
loss: 0.9860689043998718,grad_norm: 0.9278893333308093, iteration: 230601
loss: 1.0207886695861816,grad_norm: 0.9634145088211482, iteration: 230602
loss: 0.9994567036628723,grad_norm: 0.8834167987733432, iteration: 230603
loss: 1.0401724576950073,grad_norm: 0.7975952450860923, iteration: 230604
loss: 0.9919595718383789,grad_norm: 0.9999991489412557, iteration: 230605
loss: 1.02581787109375,grad_norm: 0.9999990974400566, iteration: 230606
loss: 0.980810821056366,grad_norm: 0.8924574832712404, iteration: 230607
loss: 1.0627827644348145,grad_norm: 0.9999991936431208, iteration: 230608
loss: 1.0355123281478882,grad_norm: 0.9999996545474118, iteration: 230609
loss: 1.009946584701538,grad_norm: 0.9999989489834594, iteration: 230610
loss: 1.0183137655258179,grad_norm: 0.7813061731020802, iteration: 230611
loss: 0.986421525478363,grad_norm: 0.9176621776755033, iteration: 230612
loss: 1.031571626663208,grad_norm: 0.9999992638328288, iteration: 230613
loss: 1.0064665079116821,grad_norm: 0.9417930836402281, iteration: 230614
loss: 1.0077744722366333,grad_norm: 0.8707790212679093, iteration: 230615
loss: 1.037988305091858,grad_norm: 0.9999994018769991, iteration: 230616
loss: 1.0198744535446167,grad_norm: 0.9552578415273542, iteration: 230617
loss: 1.016711950302124,grad_norm: 0.9999991853936682, iteration: 230618
loss: 1.0112842321395874,grad_norm: 0.799150554729841, iteration: 230619
loss: 1.0187089443206787,grad_norm: 0.9999999870182181, iteration: 230620
loss: 1.0063464641571045,grad_norm: 0.9999988908828255, iteration: 230621
loss: 0.9795741438865662,grad_norm: 0.978043946115768, iteration: 230622
loss: 1.0122953653335571,grad_norm: 0.9167291049787292, iteration: 230623
loss: 0.9817279577255249,grad_norm: 0.9783870613386227, iteration: 230624
loss: 0.9737982153892517,grad_norm: 0.9686144242930517, iteration: 230625
loss: 0.9866737723350525,grad_norm: 0.9888931994136706, iteration: 230626
loss: 0.9590096473693848,grad_norm: 0.9831944426084548, iteration: 230627
loss: 1.0277092456817627,grad_norm: 0.82566691407043, iteration: 230628
loss: 1.0235220193862915,grad_norm: 0.8314092164508148, iteration: 230629
loss: 0.9513083696365356,grad_norm: 0.8481216186663602, iteration: 230630
loss: 1.0310672521591187,grad_norm: 0.8643945634391772, iteration: 230631
loss: 0.9651027917861938,grad_norm: 0.9971891830431623, iteration: 230632
loss: 1.0020370483398438,grad_norm: 0.955537137722668, iteration: 230633
loss: 0.9898548722267151,grad_norm: 0.9053777662123548, iteration: 230634
loss: 1.02506422996521,grad_norm: 0.792320676138527, iteration: 230635
loss: 1.0869380235671997,grad_norm: 0.9581041490320336, iteration: 230636
loss: 0.9641554355621338,grad_norm: 0.9957082077243944, iteration: 230637
loss: 0.9945755004882812,grad_norm: 0.9865867635059711, iteration: 230638
loss: 0.987754762172699,grad_norm: 0.9796870668866707, iteration: 230639
loss: 1.0597823858261108,grad_norm: 0.9999992554577012, iteration: 230640
loss: 1.0309901237487793,grad_norm: 0.999999082018403, iteration: 230641
loss: 1.022181749343872,grad_norm: 0.9547732148931501, iteration: 230642
loss: 1.218631386756897,grad_norm: 0.999999167259864, iteration: 230643
loss: 1.013088583946228,grad_norm: 0.9511101039800338, iteration: 230644
loss: 0.9993720054626465,grad_norm: 0.9999990928433672, iteration: 230645
loss: 0.978155255317688,grad_norm: 0.9817655013002341, iteration: 230646
loss: 1.0268018245697021,grad_norm: 0.910143073230863, iteration: 230647
loss: 0.982309877872467,grad_norm: 0.8569487303344869, iteration: 230648
loss: 1.0149675607681274,grad_norm: 0.8099958614092581, iteration: 230649
loss: 0.9749216437339783,grad_norm: 0.9304573392872909, iteration: 230650
loss: 1.0542881488800049,grad_norm: 0.999999731185217, iteration: 230651
loss: 1.0118895769119263,grad_norm: 0.9999994348648596, iteration: 230652
loss: 1.0559437274932861,grad_norm: 0.9999992631339797, iteration: 230653
loss: 1.0054776668548584,grad_norm: 0.9352509177209529, iteration: 230654
loss: 1.0259586572647095,grad_norm: 0.9956599233472633, iteration: 230655
loss: 0.9913685917854309,grad_norm: 0.8689555127148406, iteration: 230656
loss: 0.985924482345581,grad_norm: 0.8223659588223015, iteration: 230657
loss: 0.9758060574531555,grad_norm: 0.8181296271066792, iteration: 230658
loss: 1.050150990486145,grad_norm: 0.9999993710818068, iteration: 230659
loss: 1.0048956871032715,grad_norm: 0.8092986869553129, iteration: 230660
loss: 1.0371075868606567,grad_norm: 0.9999993368668266, iteration: 230661
loss: 1.0327155590057373,grad_norm: 0.999999077501724, iteration: 230662
loss: 1.0557273626327515,grad_norm: 0.9999991220709276, iteration: 230663
loss: 1.0217413902282715,grad_norm: 0.9999993436040339, iteration: 230664
loss: 1.024415135383606,grad_norm: 0.947501051103338, iteration: 230665
loss: 1.023044466972351,grad_norm: 0.9559638395231043, iteration: 230666
loss: 0.996957540512085,grad_norm: 0.9999992457292783, iteration: 230667
loss: 1.0603963136672974,grad_norm: 0.9999994092079141, iteration: 230668
loss: 1.002660870552063,grad_norm: 0.9486741515480833, iteration: 230669
loss: 1.025532841682434,grad_norm: 0.9999991445287649, iteration: 230670
loss: 0.9708806872367859,grad_norm: 0.8477618050199477, iteration: 230671
loss: 1.0461264848709106,grad_norm: 0.8893752525112101, iteration: 230672
loss: 0.9957553744316101,grad_norm: 0.8217985753312063, iteration: 230673
loss: 1.015568494796753,grad_norm: 0.9552988137735002, iteration: 230674
loss: 1.0137327909469604,grad_norm: 0.740316543615706, iteration: 230675
loss: 0.9490711688995361,grad_norm: 0.9024083640557227, iteration: 230676
loss: 1.027213454246521,grad_norm: 0.9202452183130361, iteration: 230677
loss: 0.975413978099823,grad_norm: 0.8828375307238062, iteration: 230678
loss: 0.9677888751029968,grad_norm: 0.9889500342239753, iteration: 230679
loss: 1.0359385013580322,grad_norm: 0.9898369458761774, iteration: 230680
loss: 1.0103262662887573,grad_norm: 0.9999991048223635, iteration: 230681
loss: 1.0280542373657227,grad_norm: 0.9382949495396689, iteration: 230682
loss: 1.0112553834915161,grad_norm: 0.7749190574688938, iteration: 230683
loss: 0.9858375191688538,grad_norm: 0.860071994592418, iteration: 230684
loss: 0.9972158670425415,grad_norm: 0.842946477225863, iteration: 230685
loss: 0.9774202108383179,grad_norm: 0.9412933296035838, iteration: 230686
loss: 1.0026499032974243,grad_norm: 0.999999161621123, iteration: 230687
loss: 0.9882228374481201,grad_norm: 0.9203294885633925, iteration: 230688
loss: 1.00346839427948,grad_norm: 0.9999991219268967, iteration: 230689
loss: 0.9818720817565918,grad_norm: 0.9403902735683066, iteration: 230690
loss: 1.0089927911758423,grad_norm: 0.8323644155293521, iteration: 230691
loss: 0.976884126663208,grad_norm: 0.9450359018311, iteration: 230692
loss: 1.0335571765899658,grad_norm: 0.8017119260582523, iteration: 230693
loss: 1.0131916999816895,grad_norm: 0.9189349268301518, iteration: 230694
loss: 1.050918698310852,grad_norm: 0.9999999417369646, iteration: 230695
loss: 1.0042842626571655,grad_norm: 0.7701165182921571, iteration: 230696
loss: 1.0200531482696533,grad_norm: 0.9999992455034967, iteration: 230697
loss: 1.0432590246200562,grad_norm: 0.9999999089925161, iteration: 230698
loss: 0.9985065460205078,grad_norm: 0.9215300633188056, iteration: 230699
loss: 1.3668015003204346,grad_norm: 0.9999995910079402, iteration: 230700
loss: 1.211510419845581,grad_norm: 0.9999992130212689, iteration: 230701
loss: 1.0355050563812256,grad_norm: 0.7416763038428714, iteration: 230702
loss: 1.1613391637802124,grad_norm: 0.9999991095028875, iteration: 230703
loss: 1.140775442123413,grad_norm: 0.9999991554030654, iteration: 230704
loss: 1.2190526723861694,grad_norm: 0.9999998270040306, iteration: 230705
loss: 0.962072491645813,grad_norm: 0.9117945489728154, iteration: 230706
loss: 1.2669106721878052,grad_norm: 0.9999999222848762, iteration: 230707
loss: 1.249982476234436,grad_norm: 0.9999997912917961, iteration: 230708
loss: 1.1403638124465942,grad_norm: 0.9999994154186227, iteration: 230709
loss: 1.1266040802001953,grad_norm: 0.999999300377714, iteration: 230710
loss: 1.4727691411972046,grad_norm: 0.9999998131771567, iteration: 230711
loss: 1.4235293865203857,grad_norm: 0.9999996696677208, iteration: 230712
loss: 1.3192760944366455,grad_norm: 0.9999998115328665, iteration: 230713
loss: 0.9716827273368835,grad_norm: 0.8648853370992616, iteration: 230714
loss: 1.222023367881775,grad_norm: 0.9999998158240397, iteration: 230715
loss: 1.0865761041641235,grad_norm: 0.9999991505169676, iteration: 230716
loss: 1.0942769050598145,grad_norm: 0.9999997267953894, iteration: 230717
loss: 1.009044885635376,grad_norm: 0.9999989689311543, iteration: 230718
loss: 1.1651257276535034,grad_norm: 0.9999993286096698, iteration: 230719
loss: 0.9877043962478638,grad_norm: 0.9999990754821764, iteration: 230720
loss: 1.1761224269866943,grad_norm: 0.9999999220903589, iteration: 230721
loss: 1.5565024614334106,grad_norm: 0.9999995875882118, iteration: 230722
loss: 1.4225456714630127,grad_norm: 0.9999996584305825, iteration: 230723
loss: 1.1094701290130615,grad_norm: 0.9999992255722654, iteration: 230724
loss: 1.0279091596603394,grad_norm: 0.9999995876619244, iteration: 230725
loss: 1.1803016662597656,grad_norm: 0.9999998952111515, iteration: 230726
loss: 1.1143983602523804,grad_norm: 0.9999995271089483, iteration: 230727
loss: 1.059224247932434,grad_norm: 0.9999995426348541, iteration: 230728
loss: 1.1003819704055786,grad_norm: 0.999999859265208, iteration: 230729
loss: 1.0379176139831543,grad_norm: 0.924072978500644, iteration: 230730
loss: 1.1007506847381592,grad_norm: 0.9999993407412717, iteration: 230731
loss: 0.979388415813446,grad_norm: 0.9457351897086881, iteration: 230732
loss: 1.159145712852478,grad_norm: 1.0000000215821923, iteration: 230733
loss: 1.3256700038909912,grad_norm: 0.9999991759661981, iteration: 230734
loss: 0.9684032201766968,grad_norm: 0.8605654705739636, iteration: 230735
loss: 0.9990161657333374,grad_norm: 0.9999991462548815, iteration: 230736
loss: 1.0562143325805664,grad_norm: 0.9999993843790893, iteration: 230737
loss: 1.0192723274230957,grad_norm: 0.9999992030263658, iteration: 230738
loss: 1.06606125831604,grad_norm: 0.9999998992088933, iteration: 230739
loss: 1.0013991594314575,grad_norm: 0.9779074701515196, iteration: 230740
loss: 1.0034797191619873,grad_norm: 0.9609830640699021, iteration: 230741
loss: 0.9716713428497314,grad_norm: 0.9992402594483237, iteration: 230742
loss: 1.0267753601074219,grad_norm: 0.9999991994147632, iteration: 230743
loss: 0.9679169058799744,grad_norm: 0.8461402778237734, iteration: 230744
loss: 1.031899333000183,grad_norm: 0.9999997402190964, iteration: 230745
loss: 1.004006266593933,grad_norm: 0.9812336487323716, iteration: 230746
loss: 0.9864384531974792,grad_norm: 0.9278090138681314, iteration: 230747
loss: 1.0067675113677979,grad_norm: 0.9999991605783649, iteration: 230748
loss: 1.056621789932251,grad_norm: 0.9627648120755317, iteration: 230749
loss: 0.9876331090927124,grad_norm: 0.9999996094935776, iteration: 230750
loss: 0.9968210458755493,grad_norm: 0.8681017797419082, iteration: 230751
loss: 1.0428837537765503,grad_norm: 0.999999816339869, iteration: 230752
loss: 1.0204530954360962,grad_norm: 0.9999996160594251, iteration: 230753
loss: 1.0275564193725586,grad_norm: 0.999999731211448, iteration: 230754
loss: 1.0328304767608643,grad_norm: 0.9999992030992655, iteration: 230755
loss: 1.0922547578811646,grad_norm: 0.9999995170558194, iteration: 230756
loss: 1.1190241575241089,grad_norm: 0.9999991267021301, iteration: 230757
loss: 1.0338469743728638,grad_norm: 0.9999999448394434, iteration: 230758
loss: 1.0137392282485962,grad_norm: 0.8589814839259504, iteration: 230759
loss: 1.0119500160217285,grad_norm: 0.9018477565561056, iteration: 230760
loss: 1.0169483423233032,grad_norm: 0.8126763149272266, iteration: 230761
loss: 1.0476940870285034,grad_norm: 0.9999997574544119, iteration: 230762
loss: 1.0170438289642334,grad_norm: 0.9020337522742575, iteration: 230763
loss: 1.0458528995513916,grad_norm: 0.9999989823331616, iteration: 230764
loss: 1.0552140474319458,grad_norm: 0.9999991489771647, iteration: 230765
loss: 1.01438307762146,grad_norm: 0.999999047427008, iteration: 230766
loss: 0.9682270288467407,grad_norm: 0.9999989463588616, iteration: 230767
loss: 0.9524588584899902,grad_norm: 0.9999990827772284, iteration: 230768
loss: 1.0373947620391846,grad_norm: 0.9999994309403504, iteration: 230769
loss: 1.1160824298858643,grad_norm: 0.9999993234938479, iteration: 230770
loss: 1.0269618034362793,grad_norm: 0.9943574692514431, iteration: 230771
loss: 0.9836154580116272,grad_norm: 0.9999991305827786, iteration: 230772
loss: 1.004850149154663,grad_norm: 0.9232285226002992, iteration: 230773
loss: 1.031538486480713,grad_norm: 0.9999995446380894, iteration: 230774
loss: 0.9976925849914551,grad_norm: 0.9999995018131155, iteration: 230775
loss: 1.0027291774749756,grad_norm: 0.8370875084141081, iteration: 230776
loss: 1.0353736877441406,grad_norm: 0.9999998453095446, iteration: 230777
loss: 1.026809573173523,grad_norm: 0.9999993842965667, iteration: 230778
loss: 1.0762132406234741,grad_norm: 0.9999995396038136, iteration: 230779
loss: 0.9921894073486328,grad_norm: 0.9999989895810752, iteration: 230780
loss: 1.0052310228347778,grad_norm: 0.9276029041430693, iteration: 230781
loss: 1.0085400342941284,grad_norm: 0.9999991178989374, iteration: 230782
loss: 1.0074334144592285,grad_norm: 0.9585844530676825, iteration: 230783
loss: 0.9749302864074707,grad_norm: 0.999999995475294, iteration: 230784
loss: 1.0418123006820679,grad_norm: 0.9999992496939523, iteration: 230785
loss: 0.9795724153518677,grad_norm: 0.999999246048676, iteration: 230786
loss: 1.0296339988708496,grad_norm: 0.9679229400975089, iteration: 230787
loss: 0.9799349904060364,grad_norm: 0.9999990944195447, iteration: 230788
loss: 0.9980558156967163,grad_norm: 0.9399667550125843, iteration: 230789
loss: 1.0087648630142212,grad_norm: 0.9999994942528088, iteration: 230790
loss: 1.012798547744751,grad_norm: 0.9999990929877912, iteration: 230791
loss: 1.055947184562683,grad_norm: 0.999999099782222, iteration: 230792
loss: 0.9937354326248169,grad_norm: 0.9974266321403362, iteration: 230793
loss: 1.0092135667800903,grad_norm: 0.9999992885081685, iteration: 230794
loss: 1.095272183418274,grad_norm: 0.8868734164173989, iteration: 230795
loss: 1.007685661315918,grad_norm: 0.8006309712566705, iteration: 230796
loss: 1.0048609972000122,grad_norm: 0.9304065782732335, iteration: 230797
loss: 1.025130271911621,grad_norm: 0.9171423465822559, iteration: 230798
loss: 0.9895322918891907,grad_norm: 0.9333008204534868, iteration: 230799
loss: 0.987097442150116,grad_norm: 0.862195103261892, iteration: 230800
loss: 1.0109257698059082,grad_norm: 0.9999990514348378, iteration: 230801
loss: 0.9745699167251587,grad_norm: 0.934274969415526, iteration: 230802
loss: 0.9889934659004211,grad_norm: 0.9999993695506563, iteration: 230803
loss: 0.9538480043411255,grad_norm: 0.7292141424384123, iteration: 230804
loss: 0.9704815149307251,grad_norm: 0.9413457880733069, iteration: 230805
loss: 0.9690456390380859,grad_norm: 0.8854215785722279, iteration: 230806
loss: 0.9633633494377136,grad_norm: 0.9999990794298531, iteration: 230807
loss: 1.0061135292053223,grad_norm: 0.8497405717072531, iteration: 230808
loss: 0.9988023638725281,grad_norm: 0.9999991404872475, iteration: 230809
loss: 0.9593718647956848,grad_norm: 0.9094325570119747, iteration: 230810
loss: 1.0175098180770874,grad_norm: 0.9999991226907884, iteration: 230811
loss: 1.0693001747131348,grad_norm: 0.999999226215171, iteration: 230812
loss: 1.040753960609436,grad_norm: 0.9999998955320597, iteration: 230813
loss: 1.0056383609771729,grad_norm: 0.8429319228750136, iteration: 230814
loss: 1.0062353610992432,grad_norm: 0.9026949836143496, iteration: 230815
loss: 0.980790376663208,grad_norm: 0.9999990615920127, iteration: 230816
loss: 0.9776167869567871,grad_norm: 0.8747466195746039, iteration: 230817
loss: 1.000415563583374,grad_norm: 0.9999990894345869, iteration: 230818
loss: 1.0468790531158447,grad_norm: 0.8658057145774088, iteration: 230819
loss: 0.9769653081893921,grad_norm: 0.9201603456096038, iteration: 230820
loss: 1.016861915588379,grad_norm: 0.9943831412651418, iteration: 230821
loss: 0.992353081703186,grad_norm: 0.9999991701515696, iteration: 230822
loss: 0.9621073603630066,grad_norm: 0.8508217231787072, iteration: 230823
loss: 0.9967942833900452,grad_norm: 0.9841321135735553, iteration: 230824
loss: 1.0246214866638184,grad_norm: 0.9660183800262421, iteration: 230825
loss: 0.9672203660011292,grad_norm: 0.9617872067826941, iteration: 230826
loss: 0.9949124455451965,grad_norm: 0.9999990360117027, iteration: 230827
loss: 1.0784316062927246,grad_norm: 0.9999991101823039, iteration: 230828
loss: 1.0338274240493774,grad_norm: 0.9999990436712347, iteration: 230829
loss: 1.0036232471466064,grad_norm: 0.8541795310171674, iteration: 230830
loss: 1.0298088788986206,grad_norm: 0.9999992591182785, iteration: 230831
loss: 0.9886738061904907,grad_norm: 0.9999998753814656, iteration: 230832
loss: 0.9852463603019714,grad_norm: 0.9425214171179385, iteration: 230833
loss: 0.998387336730957,grad_norm: 0.987339903108256, iteration: 230834
loss: 1.0045744180679321,grad_norm: 0.9800706482427245, iteration: 230835
loss: 1.0044187307357788,grad_norm: 0.9853392116946955, iteration: 230836
loss: 0.9737560749053955,grad_norm: 0.7637079329714679, iteration: 230837
loss: 1.0134159326553345,grad_norm: 0.8790684571125812, iteration: 230838
loss: 1.0501856803894043,grad_norm: 0.999999355831248, iteration: 230839
loss: 0.9985695481300354,grad_norm: 0.9458564574129392, iteration: 230840
loss: 1.008344292640686,grad_norm: 0.9672811631402477, iteration: 230841
loss: 1.0071405172348022,grad_norm: 0.9999991667517671, iteration: 230842
loss: 0.9607915282249451,grad_norm: 0.9397850647607813, iteration: 230843
loss: 1.0714738368988037,grad_norm: 0.9999998678641265, iteration: 230844
loss: 0.9804330468177795,grad_norm: 0.9999992217715816, iteration: 230845
loss: 1.0050805807113647,grad_norm: 0.960041527984507, iteration: 230846
loss: 0.9560185670852661,grad_norm: 0.995656979322603, iteration: 230847
loss: 0.9743589162826538,grad_norm: 0.9012653539677524, iteration: 230848
loss: 1.0084278583526611,grad_norm: 0.9261297914565706, iteration: 230849
loss: 0.9833683371543884,grad_norm: 0.9654126903364011, iteration: 230850
loss: 1.0770879983901978,grad_norm: 0.9999993296601578, iteration: 230851
loss: 0.9888869524002075,grad_norm: 0.9070595140881131, iteration: 230852
loss: 1.025323748588562,grad_norm: 0.9302502145326571, iteration: 230853
loss: 1.0106109380722046,grad_norm: 0.9757931089658992, iteration: 230854
loss: 1.0108634233474731,grad_norm: 0.9999992274084879, iteration: 230855
loss: 1.0177112817764282,grad_norm: 0.9999991902286002, iteration: 230856
loss: 0.9755865335464478,grad_norm: 0.8725815981425609, iteration: 230857
loss: 1.045194387435913,grad_norm: 0.9999993411296108, iteration: 230858
loss: 1.025585651397705,grad_norm: 0.9999994083769351, iteration: 230859
loss: 0.9948418736457825,grad_norm: 0.9337671306274692, iteration: 230860
loss: 0.99339359998703,grad_norm: 0.9304864774364787, iteration: 230861
loss: 1.0212316513061523,grad_norm: 0.9211527484541657, iteration: 230862
loss: 1.0212246179580688,grad_norm: 0.9999998995825967, iteration: 230863
loss: 1.0037963390350342,grad_norm: 0.9999991039026808, iteration: 230864
loss: 0.975658655166626,grad_norm: 0.9084756949957431, iteration: 230865
loss: 1.0420454740524292,grad_norm: 0.9090798760494508, iteration: 230866
loss: 1.0002937316894531,grad_norm: 0.8102465749562732, iteration: 230867
loss: 0.9928657412528992,grad_norm: 0.8791712540400337, iteration: 230868
loss: 0.9983024597167969,grad_norm: 0.868193142631675, iteration: 230869
loss: 0.9961037635803223,grad_norm: 0.9696485849034814, iteration: 230870
loss: 1.0095343589782715,grad_norm: 0.9999991246501625, iteration: 230871
loss: 1.0088074207305908,grad_norm: 0.8121699928547584, iteration: 230872
loss: 1.0207072496414185,grad_norm: 0.9999994299402046, iteration: 230873
loss: 0.9550780057907104,grad_norm: 0.9756532817880329, iteration: 230874
loss: 0.9676641225814819,grad_norm: 0.7515401003908416, iteration: 230875
loss: 1.0107637643814087,grad_norm: 0.8397999473454015, iteration: 230876
loss: 0.963343620300293,grad_norm: 0.9956175382449337, iteration: 230877
loss: 0.9834268689155579,grad_norm: 0.9543450749704476, iteration: 230878
loss: 1.0158506631851196,grad_norm: 0.8929077901345165, iteration: 230879
loss: 1.0240567922592163,grad_norm: 0.8030196610652552, iteration: 230880
loss: 1.109740138053894,grad_norm: 0.999999028389084, iteration: 230881
loss: 0.9710427522659302,grad_norm: 0.7982611622985186, iteration: 230882
loss: 0.983005940914154,grad_norm: 0.912214969241146, iteration: 230883
loss: 0.9844169020652771,grad_norm: 0.8359497528106969, iteration: 230884
loss: 1.1976341009140015,grad_norm: 0.9999998186130522, iteration: 230885
loss: 0.981303870677948,grad_norm: 0.9999998275767384, iteration: 230886
loss: 1.073750615119934,grad_norm: 0.999999073865723, iteration: 230887
loss: 0.9742169976234436,grad_norm: 0.9999995564640572, iteration: 230888
loss: 1.0396475791931152,grad_norm: 0.975471622683487, iteration: 230889
loss: 1.002553939819336,grad_norm: 0.9852523955429672, iteration: 230890
loss: 0.9888806939125061,grad_norm: 0.9674348212851526, iteration: 230891
loss: 0.9994320869445801,grad_norm: 0.8773278338849351, iteration: 230892
loss: 0.9948118925094604,grad_norm: 0.9999991688738894, iteration: 230893
loss: 0.9935510158538818,grad_norm: 0.9874995051499903, iteration: 230894
loss: 1.0221306085586548,grad_norm: 0.8332853800074089, iteration: 230895
loss: 1.036605715751648,grad_norm: 0.8714393478247497, iteration: 230896
loss: 0.984233558177948,grad_norm: 0.8567619502446693, iteration: 230897
loss: 0.9782181978225708,grad_norm: 0.8637297549131625, iteration: 230898
loss: 1.0033632516860962,grad_norm: 0.9249932464744838, iteration: 230899
loss: 0.9999147653579712,grad_norm: 0.9769452522106689, iteration: 230900
loss: 1.0021151304244995,grad_norm: 0.9853769105360924, iteration: 230901
loss: 0.9983217120170593,grad_norm: 0.8223672103892271, iteration: 230902
loss: 0.9941923022270203,grad_norm: 0.9999991756343267, iteration: 230903
loss: 0.9996327757835388,grad_norm: 0.9999991154855292, iteration: 230904
loss: 1.169571042060852,grad_norm: 0.999999446893391, iteration: 230905
loss: 1.0271395444869995,grad_norm: 0.9285161074403628, iteration: 230906
loss: 0.9956837892532349,grad_norm: 0.9984358615231282, iteration: 230907
loss: 1.0677070617675781,grad_norm: 0.9999992607983442, iteration: 230908
loss: 1.0011800527572632,grad_norm: 0.7898140140956798, iteration: 230909
loss: 1.0122417211532593,grad_norm: 0.7636939948072963, iteration: 230910
loss: 1.0018612146377563,grad_norm: 0.8484405527875639, iteration: 230911
loss: 1.025720477104187,grad_norm: 0.9999991164514134, iteration: 230912
loss: 1.00058114528656,grad_norm: 0.9098365607909821, iteration: 230913
loss: 1.0159211158752441,grad_norm: 0.9999992454913486, iteration: 230914
loss: 1.0486713647842407,grad_norm: 0.9999999172470625, iteration: 230915
loss: 1.0000945329666138,grad_norm: 0.927758730297821, iteration: 230916
loss: 1.014452338218689,grad_norm: 0.7865163489689956, iteration: 230917
loss: 1.0172398090362549,grad_norm: 0.999999219944454, iteration: 230918
loss: 1.1014668941497803,grad_norm: 0.9999990281686519, iteration: 230919
loss: 1.0495514869689941,grad_norm: 0.9999991638377872, iteration: 230920
loss: 1.0081126689910889,grad_norm: 0.7458488272359692, iteration: 230921
loss: 1.0633317232131958,grad_norm: 0.9152248616112113, iteration: 230922
loss: 1.0402946472167969,grad_norm: 0.9999995820846811, iteration: 230923
loss: 1.0059943199157715,grad_norm: 0.8940525613716532, iteration: 230924
loss: 1.00966477394104,grad_norm: 0.971601110251422, iteration: 230925
loss: 1.0256173610687256,grad_norm: 0.9393472857089744, iteration: 230926
loss: 1.058306336402893,grad_norm: 0.9999995826810953, iteration: 230927
loss: 1.0016775131225586,grad_norm: 0.9999998441153651, iteration: 230928
loss: 0.9956061244010925,grad_norm: 0.999999029428195, iteration: 230929
loss: 0.98764967918396,grad_norm: 0.8387430245884151, iteration: 230930
loss: 1.0505352020263672,grad_norm: 0.9999999606232324, iteration: 230931
loss: 1.0192975997924805,grad_norm: 0.893603740979161, iteration: 230932
loss: 0.9896866083145142,grad_norm: 0.9999994604575785, iteration: 230933
loss: 1.0136915445327759,grad_norm: 0.8834710769366386, iteration: 230934
loss: 0.9628096222877502,grad_norm: 0.8834942478239524, iteration: 230935
loss: 0.972766101360321,grad_norm: 0.9485624615150702, iteration: 230936
loss: 1.0016549825668335,grad_norm: 0.8195746061776309, iteration: 230937
loss: 1.0298113822937012,grad_norm: 0.9999991440453865, iteration: 230938
loss: 1.022587537765503,grad_norm: 0.851235737569085, iteration: 230939
loss: 1.006817102432251,grad_norm: 0.9999990460655592, iteration: 230940
loss: 1.015454888343811,grad_norm: 0.9895264546599127, iteration: 230941
loss: 1.013301968574524,grad_norm: 0.790276693125914, iteration: 230942
loss: 1.0128345489501953,grad_norm: 0.9897166521648594, iteration: 230943
loss: 1.0085526704788208,grad_norm: 0.9999990882474443, iteration: 230944
loss: 0.9893625378608704,grad_norm: 0.9197273240879725, iteration: 230945
loss: 1.0500935316085815,grad_norm: 0.9196445379193202, iteration: 230946
loss: 1.0136754512786865,grad_norm: 0.9999992039421117, iteration: 230947
loss: 1.034439206123352,grad_norm: 0.9999995796844644, iteration: 230948
loss: 1.0039650201797485,grad_norm: 0.8878691914431623, iteration: 230949
loss: 0.9427343010902405,grad_norm: 0.9999991158525263, iteration: 230950
loss: 1.0233349800109863,grad_norm: 0.9999990734005679, iteration: 230951
loss: 0.9779899716377258,grad_norm: 0.8540323282684751, iteration: 230952
loss: 1.0080827474594116,grad_norm: 0.8987974118603634, iteration: 230953
loss: 1.1924469470977783,grad_norm: 0.9282667373252671, iteration: 230954
loss: 0.9653915762901306,grad_norm: 0.9028132320941266, iteration: 230955
loss: 1.0183955430984497,grad_norm: 0.9999990049912844, iteration: 230956
loss: 1.1134904623031616,grad_norm: 0.9999991990164405, iteration: 230957
loss: 1.0221503973007202,grad_norm: 0.9999992707934671, iteration: 230958
loss: 1.0085066556930542,grad_norm: 0.9600856139624302, iteration: 230959
loss: 1.0087534189224243,grad_norm: 0.9999994870481939, iteration: 230960
loss: 1.0122783184051514,grad_norm: 0.9374507730738436, iteration: 230961
loss: 0.9994720220565796,grad_norm: 0.999999974963115, iteration: 230962
loss: 0.9783368706703186,grad_norm: 0.9999990793656918, iteration: 230963
loss: 1.0282418727874756,grad_norm: 0.9999991831379216, iteration: 230964
loss: 1.1620962619781494,grad_norm: 0.9999991608509318, iteration: 230965
loss: 1.2243034839630127,grad_norm: 0.9999999466607497, iteration: 230966
loss: 0.9895001649856567,grad_norm: 0.7998886272023116, iteration: 230967
loss: 0.9766106009483337,grad_norm: 0.8348170379835782, iteration: 230968
loss: 1.0637321472167969,grad_norm: 0.9999998495906095, iteration: 230969
loss: 1.120102047920227,grad_norm: 0.999999758526674, iteration: 230970
loss: 1.0095348358154297,grad_norm: 0.99999912220575, iteration: 230971
loss: 0.9889786243438721,grad_norm: 0.8529425998039607, iteration: 230972
loss: 1.0456854104995728,grad_norm: 0.9033218015845621, iteration: 230973
loss: 1.0264478921890259,grad_norm: 0.9999992537994516, iteration: 230974
loss: 0.9716713428497314,grad_norm: 0.9999991370801677, iteration: 230975
loss: 0.9632129669189453,grad_norm: 0.8706100226519867, iteration: 230976
loss: 0.9522369503974915,grad_norm: 0.8460803920086152, iteration: 230977
loss: 0.9858590364456177,grad_norm: 0.8672511130520071, iteration: 230978
loss: 0.9720863699913025,grad_norm: 0.9999992243482586, iteration: 230979
loss: 1.0285145044326782,grad_norm: 0.9518559527064175, iteration: 230980
loss: 1.0072801113128662,grad_norm: 0.966085931722928, iteration: 230981
loss: 0.9685068726539612,grad_norm: 0.9999990385085116, iteration: 230982
loss: 1.034836769104004,grad_norm: 0.9999990500770193, iteration: 230983
loss: 0.9988072514533997,grad_norm: 0.9999990742526594, iteration: 230984
loss: 0.9806868433952332,grad_norm: 0.988287879467173, iteration: 230985
loss: 0.9990197420120239,grad_norm: 0.910027103900257, iteration: 230986
loss: 1.033872127532959,grad_norm: 0.9999991967769004, iteration: 230987
loss: 0.9914636015892029,grad_norm: 0.9999990081180178, iteration: 230988
loss: 1.0184412002563477,grad_norm: 0.8777129094214435, iteration: 230989
loss: 1.0283079147338867,grad_norm: 0.999999551792286, iteration: 230990
loss: 0.9956396222114563,grad_norm: 0.9999990726634366, iteration: 230991
loss: 0.9971021413803101,grad_norm: 0.9795139357191132, iteration: 230992
loss: 0.9682937860488892,grad_norm: 0.9995852654774012, iteration: 230993
loss: 1.0235153436660767,grad_norm: 0.8954441828292652, iteration: 230994
loss: 0.9828155040740967,grad_norm: 0.8625878794726729, iteration: 230995
loss: 0.9759382605552673,grad_norm: 0.9999990995430771, iteration: 230996
loss: 0.9839590787887573,grad_norm: 0.8936692664299607, iteration: 230997
loss: 1.069443941116333,grad_norm: 0.9999994258922212, iteration: 230998
loss: 1.031732201576233,grad_norm: 0.7565000709789457, iteration: 230999
loss: 1.0102171897888184,grad_norm: 0.9999997192798402, iteration: 231000
loss: 1.0125367641448975,grad_norm: 0.8790097376667256, iteration: 231001
loss: 0.9862514138221741,grad_norm: 0.9999990773480641, iteration: 231002
loss: 0.9558262825012207,grad_norm: 0.9999993431251312, iteration: 231003
loss: 1.0489283800125122,grad_norm: 0.9999990562505556, iteration: 231004
loss: 1.0141123533248901,grad_norm: 0.9999990472563035, iteration: 231005
loss: 0.9980231523513794,grad_norm: 0.9999990160905118, iteration: 231006
loss: 1.089095950126648,grad_norm: 0.9999996230477882, iteration: 231007
loss: 0.9923649430274963,grad_norm: 0.9305132654896674, iteration: 231008
loss: 1.0152045488357544,grad_norm: 0.8983434027931386, iteration: 231009
loss: 0.9929315447807312,grad_norm: 0.9426980477335037, iteration: 231010
loss: 1.0379070043563843,grad_norm: 0.8441012868225909, iteration: 231011
loss: 1.0221985578536987,grad_norm: 0.774508635602147, iteration: 231012
loss: 0.9783963561058044,grad_norm: 0.8191889505239593, iteration: 231013
loss: 0.9657526016235352,grad_norm: 0.882472849771537, iteration: 231014
loss: 0.9861494898796082,grad_norm: 0.9728079558724089, iteration: 231015
loss: 0.9927247762680054,grad_norm: 0.9999995210711112, iteration: 231016
loss: 0.9622591733932495,grad_norm: 0.7986886341133692, iteration: 231017
loss: 0.9996176958084106,grad_norm: 0.8116161703487541, iteration: 231018
loss: 1.0331940650939941,grad_norm: 0.9999993514533106, iteration: 231019
loss: 0.9755608439445496,grad_norm: 0.8961405849127845, iteration: 231020
loss: 0.9720556735992432,grad_norm: 0.9999992190187894, iteration: 231021
loss: 1.0220799446105957,grad_norm: 0.8993738384572983, iteration: 231022
loss: 1.029692530632019,grad_norm: 0.6734004197560326, iteration: 231023
loss: 1.0118833780288696,grad_norm: 0.9999990689374432, iteration: 231024
loss: 0.9957786798477173,grad_norm: 0.9138515598337696, iteration: 231025
loss: 0.9972099661827087,grad_norm: 0.8942119542509813, iteration: 231026
loss: 1.014573097229004,grad_norm: 0.9507936567605856, iteration: 231027
loss: 1.0046347379684448,grad_norm: 0.9479750290120642, iteration: 231028
loss: 1.05659019947052,grad_norm: 0.7723697028903608, iteration: 231029
loss: 1.0210797786712646,grad_norm: 0.9661017109260595, iteration: 231030
loss: 1.0054265260696411,grad_norm: 0.8919830373098663, iteration: 231031
loss: 1.051632046699524,grad_norm: 0.9999997248314648, iteration: 231032
loss: 0.9757981896400452,grad_norm: 0.8493432590909983, iteration: 231033
loss: 1.0073387622833252,grad_norm: 0.9613388051139808, iteration: 231034
loss: 1.0322757959365845,grad_norm: 0.9676426522265767, iteration: 231035
loss: 0.9833384156227112,grad_norm: 0.9999991551220121, iteration: 231036
loss: 1.0459476709365845,grad_norm: 0.9999989812176925, iteration: 231037
loss: 0.9721262454986572,grad_norm: 0.8903027260500213, iteration: 231038
loss: 0.9342349171638489,grad_norm: 0.999999058177072, iteration: 231039
loss: 1.0176782608032227,grad_norm: 0.7729305044004238, iteration: 231040
loss: 1.0014301538467407,grad_norm: 0.8577420660367394, iteration: 231041
loss: 0.9788923263549805,grad_norm: 0.865826421852933, iteration: 231042
loss: 0.966400146484375,grad_norm: 0.9999991115805335, iteration: 231043
loss: 1.0259957313537598,grad_norm: 0.8537882439581509, iteration: 231044
loss: 0.988892674446106,grad_norm: 0.9999992089231087, iteration: 231045
loss: 0.9929831027984619,grad_norm: 0.8647763537342268, iteration: 231046
loss: 0.996321439743042,grad_norm: 0.999998993253196, iteration: 231047
loss: 0.9764222502708435,grad_norm: 0.9686521607173378, iteration: 231048
loss: 1.027965784072876,grad_norm: 0.8932211020863788, iteration: 231049
loss: 1.0286883115768433,grad_norm: 0.9071716842368827, iteration: 231050
loss: 0.9998126029968262,grad_norm: 0.8423618227371538, iteration: 231051
loss: 1.032579779624939,grad_norm: 0.9999990565051952, iteration: 231052
loss: 1.0262517929077148,grad_norm: 0.999999085316754, iteration: 231053
loss: 1.0022450685501099,grad_norm: 0.8982161472170452, iteration: 231054
loss: 1.0467203855514526,grad_norm: 0.999999236582859, iteration: 231055
loss: 0.9639383554458618,grad_norm: 0.9513190493208641, iteration: 231056
loss: 1.0067691802978516,grad_norm: 0.9667806796282574, iteration: 231057
loss: 0.9965973496437073,grad_norm: 0.9999990631032348, iteration: 231058
loss: 0.9895075559616089,grad_norm: 0.7908091050195295, iteration: 231059
loss: 1.0226430892944336,grad_norm: 0.961969930977812, iteration: 231060
loss: 0.9377453327178955,grad_norm: 0.9489091569347171, iteration: 231061
loss: 1.0553761720657349,grad_norm: 0.9999991010204589, iteration: 231062
loss: 1.127995491027832,grad_norm: 0.9999992323791888, iteration: 231063
loss: 1.041567087173462,grad_norm: 0.999999487857572, iteration: 231064
loss: 1.0309323072433472,grad_norm: 0.9433792982092097, iteration: 231065
loss: 1.0330150127410889,grad_norm: 0.8072107501419565, iteration: 231066
loss: 0.9826077818870544,grad_norm: 0.9936133684065726, iteration: 231067
loss: 1.0873284339904785,grad_norm: 0.9999998080372087, iteration: 231068
loss: 1.004701852798462,grad_norm: 0.9999992043328445, iteration: 231069
loss: 1.0028260946273804,grad_norm: 0.8874869773603068, iteration: 231070
loss: 0.9937553405761719,grad_norm: 0.9373777317133324, iteration: 231071
loss: 1.0233385562896729,grad_norm: 0.8021927817039607, iteration: 231072
loss: 1.1168965101242065,grad_norm: 1.0000000428421667, iteration: 231073
loss: 0.9988135695457458,grad_norm: 0.875461564138081, iteration: 231074
loss: 1.0372203588485718,grad_norm: 0.8393933868058006, iteration: 231075
loss: 0.991938591003418,grad_norm: 0.825610789963346, iteration: 231076
loss: 1.0132602453231812,grad_norm: 0.9449544723166006, iteration: 231077
loss: 1.012982726097107,grad_norm: 0.7942609834846158, iteration: 231078
loss: 0.9676284790039062,grad_norm: 0.8293561885932528, iteration: 231079
loss: 1.0058590173721313,grad_norm: 0.9506619985459217, iteration: 231080
loss: 0.9880154132843018,grad_norm: 0.9760080897125727, iteration: 231081
loss: 0.9902047514915466,grad_norm: 0.8779301233361478, iteration: 231082
loss: 1.0233486890792847,grad_norm: 0.9423589296159431, iteration: 231083
loss: 1.1276663541793823,grad_norm: 0.999999593577431, iteration: 231084
loss: 1.0150357484817505,grad_norm: 0.9423748744175868, iteration: 231085
loss: 1.0976130962371826,grad_norm: 0.8810002246619749, iteration: 231086
loss: 0.9852557182312012,grad_norm: 0.9999991451771267, iteration: 231087
loss: 0.9867619276046753,grad_norm: 0.7693773456230071, iteration: 231088
loss: 0.9872667193412781,grad_norm: 0.9138404888278245, iteration: 231089
loss: 0.9830430746078491,grad_norm: 0.9521720724461761, iteration: 231090
loss: 1.025360345840454,grad_norm: 0.9999995088327251, iteration: 231091
loss: 1.0892888307571411,grad_norm: 0.9999990759115285, iteration: 231092
loss: 0.9777824878692627,grad_norm: 0.9794384375740463, iteration: 231093
loss: 1.01121187210083,grad_norm: 0.9999990035407169, iteration: 231094
loss: 1.0806217193603516,grad_norm: 0.9349472138481236, iteration: 231095
loss: 0.982757568359375,grad_norm: 0.9058029188665926, iteration: 231096
loss: 1.0135183334350586,grad_norm: 0.9949319511880149, iteration: 231097
loss: 0.9960449934005737,grad_norm: 0.8354444618180974, iteration: 231098
loss: 1.028798222541809,grad_norm: 0.9999996490163394, iteration: 231099
loss: 0.9675309658050537,grad_norm: 0.9906442909934128, iteration: 231100
loss: 0.9899963140487671,grad_norm: 0.9999993574717552, iteration: 231101
loss: 0.9722938537597656,grad_norm: 0.8920578040829656, iteration: 231102
loss: 1.014390468597412,grad_norm: 0.9999997955822746, iteration: 231103
loss: 1.0854896306991577,grad_norm: 0.8746497308444954, iteration: 231104
loss: 1.0471304655075073,grad_norm: 0.9999990077148556, iteration: 231105
loss: 1.003448486328125,grad_norm: 0.937269786875773, iteration: 231106
loss: 1.0007994174957275,grad_norm: 0.8728617047569042, iteration: 231107
loss: 1.030280590057373,grad_norm: 0.8376423274798679, iteration: 231108
loss: 1.0264328718185425,grad_norm: 0.9999995624172483, iteration: 231109
loss: 0.9761092066764832,grad_norm: 0.9423824483354514, iteration: 231110
loss: 1.010460615158081,grad_norm: 0.8999622221904258, iteration: 231111
loss: 0.9823172688484192,grad_norm: 0.9999994694158848, iteration: 231112
loss: 1.0047649145126343,grad_norm: 0.8179646764131123, iteration: 231113
loss: 1.057205319404602,grad_norm: 0.9496121932274674, iteration: 231114
loss: 1.015599250793457,grad_norm: 0.9606107434283317, iteration: 231115
loss: 1.061772108078003,grad_norm: 0.9649965931228016, iteration: 231116
loss: 0.9659695625305176,grad_norm: 0.8952614542550372, iteration: 231117
loss: 1.1686090230941772,grad_norm: 0.9999998813670185, iteration: 231118
loss: 0.9635962247848511,grad_norm: 0.8614739212897331, iteration: 231119
loss: 1.1613526344299316,grad_norm: 0.9999991787141643, iteration: 231120
loss: 1.0071033239364624,grad_norm: 0.9999993589126095, iteration: 231121
loss: 0.9800848364830017,grad_norm: 0.751894737599175, iteration: 231122
loss: 0.978132963180542,grad_norm: 0.8738799095406898, iteration: 231123
loss: 0.9940404891967773,grad_norm: 0.9219664259729055, iteration: 231124
loss: 0.983191192150116,grad_norm: 0.9595721014399966, iteration: 231125
loss: 0.9880747199058533,grad_norm: 0.9999989512963734, iteration: 231126
loss: 1.061499834060669,grad_norm: 0.9999996555535351, iteration: 231127
loss: 1.0255768299102783,grad_norm: 0.9999990112867918, iteration: 231128
loss: 0.9758370518684387,grad_norm: 0.8409698798680915, iteration: 231129
loss: 0.980394184589386,grad_norm: 0.9999991409341789, iteration: 231130
loss: 1.0270700454711914,grad_norm: 0.976171766829157, iteration: 231131
loss: 1.0935348272323608,grad_norm: 0.9999991027661236, iteration: 231132
loss: 0.9867192506790161,grad_norm: 0.8386274716619504, iteration: 231133
loss: 0.9873246550559998,grad_norm: 0.9999992340339207, iteration: 231134
loss: 1.026039719581604,grad_norm: 0.9999990497673091, iteration: 231135
loss: 1.032429814338684,grad_norm: 0.999998970137685, iteration: 231136
loss: 0.9891068339347839,grad_norm: 0.7531934669081646, iteration: 231137
loss: 0.9700135588645935,grad_norm: 0.9999990155411842, iteration: 231138
loss: 1.0936356782913208,grad_norm: 0.9151515484261635, iteration: 231139
loss: 0.9839458465576172,grad_norm: 0.8456905893753465, iteration: 231140
loss: 0.9664721488952637,grad_norm: 0.9999990328750614, iteration: 231141
loss: 0.9930433034896851,grad_norm: 0.9410998987576743, iteration: 231142
loss: 1.04746413230896,grad_norm: 0.8337360686329783, iteration: 231143
loss: 1.0006003379821777,grad_norm: 0.9999991061484429, iteration: 231144
loss: 1.026371717453003,grad_norm: 0.9999995036389048, iteration: 231145
loss: 1.023554801940918,grad_norm: 0.9999991848043125, iteration: 231146
loss: 1.0022295713424683,grad_norm: 0.9240509224988975, iteration: 231147
loss: 0.9799510836601257,grad_norm: 0.9222021331931128, iteration: 231148
loss: 1.0222581624984741,grad_norm: 0.89892131841337, iteration: 231149
loss: 1.032028079032898,grad_norm: 0.8927481645304957, iteration: 231150
loss: 1.0031005144119263,grad_norm: 0.9741615253390984, iteration: 231151
loss: 1.0403878688812256,grad_norm: 0.9999991058207395, iteration: 231152
loss: 1.0012468099594116,grad_norm: 0.9999991681944898, iteration: 231153
loss: 0.97446209192276,grad_norm: 0.9404596098376283, iteration: 231154
loss: 1.094343662261963,grad_norm: 0.9780569300988816, iteration: 231155
loss: 0.9571687579154968,grad_norm: 0.9999990562536739, iteration: 231156
loss: 1.0381839275360107,grad_norm: 0.9762299421627912, iteration: 231157
loss: 0.9947637319564819,grad_norm: 0.8209383298486893, iteration: 231158
loss: 0.985317051410675,grad_norm: 0.9999990051314203, iteration: 231159
loss: 0.9755818843841553,grad_norm: 0.79898222749997, iteration: 231160
loss: 0.9984680414199829,grad_norm: 0.9999990376675343, iteration: 231161
loss: 1.1995419263839722,grad_norm: 0.9999993215410449, iteration: 231162
loss: 0.9535908699035645,grad_norm: 0.9536945531214517, iteration: 231163
loss: 0.9958909749984741,grad_norm: 0.844658369788642, iteration: 231164
loss: 1.0075061321258545,grad_norm: 0.9384385916248985, iteration: 231165
loss: 0.9923664331436157,grad_norm: 0.8741538304625235, iteration: 231166
loss: 0.990803599357605,grad_norm: 0.8696478132893118, iteration: 231167
loss: 1.0019763708114624,grad_norm: 0.850527386957355, iteration: 231168
loss: 1.0026381015777588,grad_norm: 0.9074665044910692, iteration: 231169
loss: 1.0469063520431519,grad_norm: 0.9797203326329834, iteration: 231170
loss: 0.9799073934555054,grad_norm: 0.815673251296953, iteration: 231171
loss: 1.035927414894104,grad_norm: 0.9264451652677236, iteration: 231172
loss: 0.9894890785217285,grad_norm: 0.9399974350699589, iteration: 231173
loss: 1.0168476104736328,grad_norm: 0.7704628693420488, iteration: 231174
loss: 1.0022417306900024,grad_norm: 0.8454458941359781, iteration: 231175
loss: 0.9999631643295288,grad_norm: 0.9999990647173118, iteration: 231176
loss: 0.9888128042221069,grad_norm: 0.8115525364950725, iteration: 231177
loss: 1.008720874786377,grad_norm: 0.9319146596188109, iteration: 231178
loss: 1.01277494430542,grad_norm: 0.9999993656932394, iteration: 231179
loss: 0.9913980960845947,grad_norm: 0.9999990830824833, iteration: 231180
loss: 1.0025595426559448,grad_norm: 0.999999093917021, iteration: 231181
loss: 0.957202136516571,grad_norm: 0.9875084459891181, iteration: 231182
loss: 1.0197851657867432,grad_norm: 0.9999992327772649, iteration: 231183
loss: 1.0469160079956055,grad_norm: 0.999998999155391, iteration: 231184
loss: 1.0354828834533691,grad_norm: 0.9999991518868421, iteration: 231185
loss: 0.9967632293701172,grad_norm: 0.9999990903315016, iteration: 231186
loss: 1.0337132215499878,grad_norm: 0.9999989506569662, iteration: 231187
loss: 1.0154398679733276,grad_norm: 0.8037555344775591, iteration: 231188
loss: 0.9834861159324646,grad_norm: 0.9367002944380955, iteration: 231189
loss: 1.1012541055679321,grad_norm: 0.9999990805982456, iteration: 231190
loss: 1.0154244899749756,grad_norm: 0.973901156637109, iteration: 231191
loss: 1.00095796585083,grad_norm: 0.949751198888169, iteration: 231192
loss: 0.9671298265457153,grad_norm: 0.8439285246040643, iteration: 231193
loss: 1.009871006011963,grad_norm: 0.7614005842287205, iteration: 231194
loss: 0.9694688320159912,grad_norm: 0.9999994396350098, iteration: 231195
loss: 0.9972005486488342,grad_norm: 0.8676212245786981, iteration: 231196
loss: 1.0794658660888672,grad_norm: 0.9999997763111947, iteration: 231197
loss: 1.0931578874588013,grad_norm: 0.99999939322518, iteration: 231198
loss: 1.0298181772232056,grad_norm: 0.8129404232997388, iteration: 231199
loss: 0.9895188212394714,grad_norm: 0.8728519899534513, iteration: 231200
loss: 1.0109001398086548,grad_norm: 0.8404347916958199, iteration: 231201
loss: 0.9640958905220032,grad_norm: 0.8388514143510367, iteration: 231202
loss: 0.989734947681427,grad_norm: 0.8394078435564568, iteration: 231203
loss: 0.9751080870628357,grad_norm: 0.818022014960203, iteration: 231204
loss: 1.0036622285842896,grad_norm: 0.913709158753519, iteration: 231205
loss: 0.9683900475502014,grad_norm: 0.9999990468882272, iteration: 231206
loss: 0.9841786623001099,grad_norm: 0.8897354948016379, iteration: 231207
loss: 0.9914834499359131,grad_norm: 0.9429845993810975, iteration: 231208
loss: 1.0207141637802124,grad_norm: 0.9999992952730895, iteration: 231209
loss: 1.0115677118301392,grad_norm: 0.9562131307271567, iteration: 231210
loss: 1.0441356897354126,grad_norm: 0.9999991285090177, iteration: 231211
loss: 1.0191223621368408,grad_norm: 0.9373785061914082, iteration: 231212
loss: 0.9491760730743408,grad_norm: 0.9999997869335315, iteration: 231213
loss: 1.0622409582138062,grad_norm: 0.8860541632525384, iteration: 231214
loss: 1.0061943531036377,grad_norm: 0.9999991928722504, iteration: 231215
loss: 1.0015233755111694,grad_norm: 0.8170923151311305, iteration: 231216
loss: 0.9679870009422302,grad_norm: 0.9637787867796186, iteration: 231217
loss: 0.9977808594703674,grad_norm: 0.956129852296393, iteration: 231218
loss: 0.9987611770629883,grad_norm: 0.9810490738602234, iteration: 231219
loss: 1.100327730178833,grad_norm: 0.9999993402233998, iteration: 231220
loss: 1.0672506093978882,grad_norm: 0.9999990792578365, iteration: 231221
loss: 1.0166568756103516,grad_norm: 0.999999604627664, iteration: 231222
loss: 1.019482135772705,grad_norm: 0.9795418854073474, iteration: 231223
loss: 0.9906198978424072,grad_norm: 0.8551863403148954, iteration: 231224
loss: 0.9679908752441406,grad_norm: 0.9788174237659518, iteration: 231225
loss: 1.0160764455795288,grad_norm: 0.8618089122554717, iteration: 231226
loss: 0.9987102150917053,grad_norm: 0.8212175668677014, iteration: 231227
loss: 1.0063257217407227,grad_norm: 0.9999991537295853, iteration: 231228
loss: 0.9845607280731201,grad_norm: 0.8404811379226316, iteration: 231229
loss: 0.9980064630508423,grad_norm: 0.9999990608967187, iteration: 231230
loss: 1.0063469409942627,grad_norm: 0.9999990504261714, iteration: 231231
loss: 1.0142004489898682,grad_norm: 0.8037294864725749, iteration: 231232
loss: 1.0165430307388306,grad_norm: 0.8851447972885478, iteration: 231233
loss: 1.1011446714401245,grad_norm: 0.999999801081261, iteration: 231234
loss: 0.996253252029419,grad_norm: 0.9999992454163267, iteration: 231235
loss: 0.9829023480415344,grad_norm: 0.9846401024581974, iteration: 231236
loss: 1.0206098556518555,grad_norm: 0.9999989568495341, iteration: 231237
loss: 1.0150728225708008,grad_norm: 0.8376235301821732, iteration: 231238
loss: 0.9900544881820679,grad_norm: 0.9492779951075332, iteration: 231239
loss: 0.9384902715682983,grad_norm: 0.9149167192534167, iteration: 231240
loss: 1.0164380073547363,grad_norm: 0.9999991931190797, iteration: 231241
loss: 0.996536910533905,grad_norm: 0.8560811279060665, iteration: 231242
loss: 1.0763020515441895,grad_norm: 0.9999990008745986, iteration: 231243
loss: 0.9580721259117126,grad_norm: 0.859448922949341, iteration: 231244
loss: 0.977738082408905,grad_norm: 0.8926924098663317, iteration: 231245
loss: 1.0018764734268188,grad_norm: 0.9999999522246714, iteration: 231246
loss: 0.9930261373519897,grad_norm: 0.9999990395184977, iteration: 231247
loss: 0.993622362613678,grad_norm: 0.999998996441221, iteration: 231248
loss: 0.9869343638420105,grad_norm: 0.9056795515229917, iteration: 231249
loss: 1.0281141996383667,grad_norm: 0.903684979838791, iteration: 231250
loss: 0.996788740158081,grad_norm: 0.999999766331518, iteration: 231251
loss: 0.999636173248291,grad_norm: 0.9999989708904676, iteration: 231252
loss: 1.0196218490600586,grad_norm: 0.857376736816845, iteration: 231253
loss: 1.0059499740600586,grad_norm: 0.9175275544693651, iteration: 231254
loss: 0.9974045753479004,grad_norm: 0.9031942394315017, iteration: 231255
loss: 1.00868558883667,grad_norm: 0.9999992766422913, iteration: 231256
loss: 1.0072170495986938,grad_norm: 0.9671791281076917, iteration: 231257
loss: 0.9996062517166138,grad_norm: 0.792045345540615, iteration: 231258
loss: 0.9807151556015015,grad_norm: 0.7510411644158919, iteration: 231259
loss: 0.9849522113800049,grad_norm: 0.8103029609818787, iteration: 231260
loss: 1.0088305473327637,grad_norm: 0.9155920866877466, iteration: 231261
loss: 1.007534384727478,grad_norm: 0.9581263103664338, iteration: 231262
loss: 0.9901052713394165,grad_norm: 0.8540048469429564, iteration: 231263
loss: 0.970373809337616,grad_norm: 0.9809181179935673, iteration: 231264
loss: 1.0220915079116821,grad_norm: 0.9999992160365874, iteration: 231265
loss: 1.090165138244629,grad_norm: 0.9999999529961248, iteration: 231266
loss: 1.0040366649627686,grad_norm: 0.9319371227483737, iteration: 231267
loss: 0.991119921207428,grad_norm: 0.8259872416566308, iteration: 231268
loss: 1.0014721155166626,grad_norm: 0.8075165925784572, iteration: 231269
loss: 0.9904393553733826,grad_norm: 0.8465254970872861, iteration: 231270
loss: 1.0257048606872559,grad_norm: 0.9269174073424506, iteration: 231271
loss: 0.9750591516494751,grad_norm: 0.9600798039484316, iteration: 231272
loss: 1.0118393898010254,grad_norm: 0.9612723269767891, iteration: 231273
loss: 0.9948920011520386,grad_norm: 0.8561828027522159, iteration: 231274
loss: 0.9978042244911194,grad_norm: 0.9161161902244336, iteration: 231275
loss: 1.022929072380066,grad_norm: 0.9999991974341782, iteration: 231276
loss: 0.9774582386016846,grad_norm: 0.9767687140452121, iteration: 231277
loss: 0.9898271560668945,grad_norm: 0.9999991281251888, iteration: 231278
loss: 0.9703145623207092,grad_norm: 0.9999991121238523, iteration: 231279
loss: 1.0060315132141113,grad_norm: 0.9261182291681012, iteration: 231280
loss: 1.0825552940368652,grad_norm: 0.9999997735502815, iteration: 231281
loss: 1.0236846208572388,grad_norm: 0.8726424401795084, iteration: 231282
loss: 0.9567856192588806,grad_norm: 0.9999991508818462, iteration: 231283
loss: 0.9856470227241516,grad_norm: 0.9400747084558684, iteration: 231284
loss: 1.016310453414917,grad_norm: 0.9688365881001584, iteration: 231285
loss: 1.1092073917388916,grad_norm: 0.9999998874898629, iteration: 231286
loss: 0.9854219555854797,grad_norm: 0.9999990534303883, iteration: 231287
loss: 1.0114789009094238,grad_norm: 0.9536710332266198, iteration: 231288
loss: 0.9925680160522461,grad_norm: 0.9023828797859161, iteration: 231289
loss: 0.977891743183136,grad_norm: 0.9867580388625548, iteration: 231290
loss: 1.004129409790039,grad_norm: 0.8639319918573786, iteration: 231291
loss: 0.9824782609939575,grad_norm: 0.9970780114560222, iteration: 231292
loss: 0.9820898771286011,grad_norm: 0.9601340075934677, iteration: 231293
loss: 0.9567575454711914,grad_norm: 0.999999098740711, iteration: 231294
loss: 1.0191152095794678,grad_norm: 0.9418698111200461, iteration: 231295
loss: 1.0168216228485107,grad_norm: 0.9313250855301117, iteration: 231296
loss: 0.9962425827980042,grad_norm: 0.8060917160773804, iteration: 231297
loss: 1.0187095403671265,grad_norm: 0.8755850739029045, iteration: 231298
loss: 0.9781087636947632,grad_norm: 0.7678065528337834, iteration: 231299
loss: 1.0205696821212769,grad_norm: 0.864132752036099, iteration: 231300
loss: 0.9684303402900696,grad_norm: 0.7998802823161887, iteration: 231301
loss: 0.9977147579193115,grad_norm: 0.8790284916148945, iteration: 231302
loss: 1.0035313367843628,grad_norm: 0.9999989674360631, iteration: 231303
loss: 0.9781696200370789,grad_norm: 0.9330272035843565, iteration: 231304
loss: 1.0155668258666992,grad_norm: 0.9409463953936136, iteration: 231305
loss: 0.999993622303009,grad_norm: 0.9215038587546676, iteration: 231306
loss: 1.0243170261383057,grad_norm: 0.9229475030735322, iteration: 231307
loss: 1.0036695003509521,grad_norm: 0.9999989976053116, iteration: 231308
loss: 0.9897581934928894,grad_norm: 0.9929063694807807, iteration: 231309
loss: 1.0234401226043701,grad_norm: 0.8626636410371247, iteration: 231310
loss: 1.0668244361877441,grad_norm: 0.9999991158701872, iteration: 231311
loss: 0.9789881706237793,grad_norm: 0.8607077847881627, iteration: 231312
loss: 1.0567772388458252,grad_norm: 0.929468329529197, iteration: 231313
loss: 1.0188000202178955,grad_norm: 0.999999220845622, iteration: 231314
loss: 1.0348360538482666,grad_norm: 0.9264559683027652, iteration: 231315
loss: 1.054252028465271,grad_norm: 0.9999991269708735, iteration: 231316
loss: 0.9751312136650085,grad_norm: 0.9397723859362874, iteration: 231317
loss: 1.0069411993026733,grad_norm: 0.9999991086124429, iteration: 231318
loss: 1.0364375114440918,grad_norm: 0.8320363620574254, iteration: 231319
loss: 1.0331275463104248,grad_norm: 0.8222977962897918, iteration: 231320
loss: 1.0601329803466797,grad_norm: 0.9999991314546108, iteration: 231321
loss: 1.0239832401275635,grad_norm: 0.8380441611254946, iteration: 231322
loss: 0.9823574423789978,grad_norm: 0.875043606002741, iteration: 231323
loss: 0.9939203262329102,grad_norm: 0.8387261149015083, iteration: 231324
loss: 1.0427807569503784,grad_norm: 0.8609222789939349, iteration: 231325
loss: 0.9889180660247803,grad_norm: 0.9096856480746877, iteration: 231326
loss: 1.0072683095932007,grad_norm: 0.8210007882852557, iteration: 231327
loss: 1.013016939163208,grad_norm: 0.9748552196467875, iteration: 231328
loss: 0.9899372458457947,grad_norm: 0.8999906384958758, iteration: 231329
loss: 0.9676505923271179,grad_norm: 0.7582422604754868, iteration: 231330
loss: 0.9807240962982178,grad_norm: 0.9999990579575998, iteration: 231331
loss: 1.0376849174499512,grad_norm: 0.9869918583049615, iteration: 231332
loss: 0.9886594414710999,grad_norm: 0.9665645647024141, iteration: 231333
loss: 1.0277999639511108,grad_norm: 0.9999995009137719, iteration: 231334
loss: 0.9912503957748413,grad_norm: 0.9999990648297313, iteration: 231335
loss: 1.019840121269226,grad_norm: 0.9357537791693922, iteration: 231336
loss: 1.0179898738861084,grad_norm: 0.9999992360746842, iteration: 231337
loss: 1.0237232446670532,grad_norm: 0.9985781362842776, iteration: 231338
loss: 0.9892891049385071,grad_norm: 0.8263518053204858, iteration: 231339
loss: 1.0015504360198975,grad_norm: 0.9842358769969168, iteration: 231340
loss: 0.9958372116088867,grad_norm: 0.8788508209557483, iteration: 231341
loss: 1.0158082246780396,grad_norm: 0.9999990750963008, iteration: 231342
loss: 1.069301962852478,grad_norm: 0.9939274252535562, iteration: 231343
loss: 0.9896458387374878,grad_norm: 0.9003274914611757, iteration: 231344
loss: 0.9501408934593201,grad_norm: 0.7668247640779945, iteration: 231345
loss: 1.0283865928649902,grad_norm: 0.9261133715667743, iteration: 231346
loss: 1.0139704942703247,grad_norm: 0.9788487065197536, iteration: 231347
loss: 1.0170490741729736,grad_norm: 0.8829249037240703, iteration: 231348
loss: 1.082067608833313,grad_norm: 0.9999996118898652, iteration: 231349
loss: 0.980999231338501,grad_norm: 0.999999151527269, iteration: 231350
loss: 1.0488930940628052,grad_norm: 0.999999009801841, iteration: 231351
loss: 0.9611577987670898,grad_norm: 0.9241507072995742, iteration: 231352
loss: 1.0216704607009888,grad_norm: 0.9279415216677722, iteration: 231353
loss: 0.9952375888824463,grad_norm: 0.9080707671911251, iteration: 231354
loss: 1.023506760597229,grad_norm: 0.9115479611879804, iteration: 231355
loss: 1.0152461528778076,grad_norm: 0.8076302160548218, iteration: 231356
loss: 0.9691331386566162,grad_norm: 0.8130348398916954, iteration: 231357
loss: 1.0625711679458618,grad_norm: 0.9999989773767181, iteration: 231358
loss: 0.974766194820404,grad_norm: 0.9146453610446459, iteration: 231359
loss: 0.9913907647132874,grad_norm: 0.9999991233761364, iteration: 231360
loss: 0.9861663579940796,grad_norm: 0.9012669409773597, iteration: 231361
loss: 1.0152424573898315,grad_norm: 0.9999991233563573, iteration: 231362
loss: 1.0294835567474365,grad_norm: 0.7541866322427185, iteration: 231363
loss: 1.017474889755249,grad_norm: 0.9659863027201039, iteration: 231364
loss: 1.0123978853225708,grad_norm: 0.9454663427311383, iteration: 231365
loss: 1.0080127716064453,grad_norm: 0.9999991090868671, iteration: 231366
loss: 0.9992341995239258,grad_norm: 0.9670483100981051, iteration: 231367
loss: 1.0110363960266113,grad_norm: 0.9999990883784023, iteration: 231368
loss: 0.952305257320404,grad_norm: 0.9501206667664531, iteration: 231369
loss: 0.9938193559646606,grad_norm: 0.9999990989263335, iteration: 231370
loss: 0.9764397740364075,grad_norm: 0.8917703474896302, iteration: 231371
loss: 0.9936648607254028,grad_norm: 0.9398346680283648, iteration: 231372
loss: 1.0488179922103882,grad_norm: 0.9684423053491168, iteration: 231373
loss: 1.0229501724243164,grad_norm: 0.8732481532958857, iteration: 231374
loss: 1.0096474885940552,grad_norm: 0.9130651902812361, iteration: 231375
loss: 1.0354924201965332,grad_norm: 0.9999999713389263, iteration: 231376
loss: 1.0095311403274536,grad_norm: 0.9999992017610525, iteration: 231377
loss: 0.9897937774658203,grad_norm: 0.8915522386984958, iteration: 231378
loss: 1.0115395784378052,grad_norm: 0.9425335061160279, iteration: 231379
loss: 0.9908238649368286,grad_norm: 0.999999021638111, iteration: 231380
loss: 0.9817861318588257,grad_norm: 0.8996872218437881, iteration: 231381
loss: 0.9577930569648743,grad_norm: 0.9999990606400603, iteration: 231382
loss: 1.0178688764572144,grad_norm: 0.9999998585992806, iteration: 231383
loss: 1.0029672384262085,grad_norm: 0.7862316616154738, iteration: 231384
loss: 0.9944567084312439,grad_norm: 0.9521418689636295, iteration: 231385
loss: 0.9742012023925781,grad_norm: 0.855318221119115, iteration: 231386
loss: 1.0203028917312622,grad_norm: 0.931211716838181, iteration: 231387
loss: 1.0207300186157227,grad_norm: 0.8432815658944744, iteration: 231388
loss: 0.9506790041923523,grad_norm: 0.9751229050275176, iteration: 231389
loss: 1.0104032754898071,grad_norm: 0.8608101206864583, iteration: 231390
loss: 0.9925811886787415,grad_norm: 0.9739540276537233, iteration: 231391
loss: 1.0138354301452637,grad_norm: 0.9332678035011878, iteration: 231392
loss: 1.0398627519607544,grad_norm: 0.9999990141321091, iteration: 231393
loss: 0.994024395942688,grad_norm: 0.8935087340274388, iteration: 231394
loss: 1.010115385055542,grad_norm: 0.8943612517189987, iteration: 231395
loss: 0.967060923576355,grad_norm: 0.9468935069271683, iteration: 231396
loss: 0.9901332855224609,grad_norm: 0.8849309944370093, iteration: 231397
loss: 1.031627893447876,grad_norm: 0.9958077536935259, iteration: 231398
loss: 0.9813878536224365,grad_norm: 0.9583873490884594, iteration: 231399
loss: 1.015821099281311,grad_norm: 0.8782736603867811, iteration: 231400
loss: 0.9850015640258789,grad_norm: 0.7984170410547105, iteration: 231401
loss: 1.0084317922592163,grad_norm: 0.9944133338169476, iteration: 231402
loss: 0.960038959980011,grad_norm: 0.8951131549516231, iteration: 231403
loss: 1.0460532903671265,grad_norm: 0.8880135085849113, iteration: 231404
loss: 0.9529666304588318,grad_norm: 0.8533100639724948, iteration: 231405
loss: 0.9876441359519958,grad_norm: 0.8550095626393671, iteration: 231406
loss: 1.0070159435272217,grad_norm: 0.969390253653636, iteration: 231407
loss: 1.0276912450790405,grad_norm: 0.9999990887619622, iteration: 231408
loss: 1.0067596435546875,grad_norm: 0.8757675111316208, iteration: 231409
loss: 1.004867434501648,grad_norm: 0.9999990161384532, iteration: 231410
loss: 1.0149073600769043,grad_norm: 0.9493860427052871, iteration: 231411
loss: 1.0498902797698975,grad_norm: 0.9999990071453712, iteration: 231412
loss: 1.0125044584274292,grad_norm: 0.937788053464003, iteration: 231413
loss: 1.0258132219314575,grad_norm: 0.873864150998205, iteration: 231414
loss: 0.952756941318512,grad_norm: 0.9493349936408213, iteration: 231415
loss: 1.0106314420700073,grad_norm: 0.9024877472572677, iteration: 231416
loss: 1.0380295515060425,grad_norm: 0.871553130428644, iteration: 231417
loss: 1.0450555086135864,grad_norm: 0.994442967846535, iteration: 231418
loss: 0.9722831845283508,grad_norm: 0.8048095299920202, iteration: 231419
loss: 1.0197323560714722,grad_norm: 0.9694923416509119, iteration: 231420
loss: 1.0495779514312744,grad_norm: 0.9999990833381525, iteration: 231421
loss: 0.9859163165092468,grad_norm: 0.9999993981781248, iteration: 231422
loss: 1.0027140378952026,grad_norm: 0.9194011924585198, iteration: 231423
loss: 0.9740197658538818,grad_norm: 0.9172837848888157, iteration: 231424
loss: 0.9894170165061951,grad_norm: 0.9852856790995167, iteration: 231425
loss: 0.9710088968276978,grad_norm: 0.8467171098587328, iteration: 231426
loss: 1.0054937601089478,grad_norm: 0.9999993233277061, iteration: 231427
loss: 1.0098588466644287,grad_norm: 0.8253943075005438, iteration: 231428
loss: 1.0245413780212402,grad_norm: 0.942455269387837, iteration: 231429
loss: 1.0143386125564575,grad_norm: 0.9999993037328716, iteration: 231430
loss: 0.9968116879463196,grad_norm: 0.8031639201893657, iteration: 231431
loss: 1.0739738941192627,grad_norm: 0.9861174933558291, iteration: 231432
loss: 0.9590595960617065,grad_norm: 0.9999989706657675, iteration: 231433
loss: 1.0397586822509766,grad_norm: 0.9225554106089263, iteration: 231434
loss: 0.9907442331314087,grad_norm: 0.9485198161191378, iteration: 231435
loss: 0.9966676831245422,grad_norm: 0.996789266631711, iteration: 231436
loss: 0.980496883392334,grad_norm: 0.9243909969956529, iteration: 231437
loss: 1.0127604007720947,grad_norm: 0.9999992050779816, iteration: 231438
loss: 1.0164411067962646,grad_norm: 0.9999990256047631, iteration: 231439
loss: 1.0116456747055054,grad_norm: 0.9654795941999723, iteration: 231440
loss: 0.9800782203674316,grad_norm: 0.8613552220624682, iteration: 231441
loss: 0.9735484719276428,grad_norm: 0.9999992041506658, iteration: 231442
loss: 0.9959502816200256,grad_norm: 0.9979762865636465, iteration: 231443
loss: 0.9876236915588379,grad_norm: 0.8517477415750462, iteration: 231444
loss: 0.9883956909179688,grad_norm: 0.9999990757716566, iteration: 231445
loss: 1.0476528406143188,grad_norm: 0.999999275558045, iteration: 231446
loss: 1.0776033401489258,grad_norm: 0.9999989935412273, iteration: 231447
loss: 0.9666031002998352,grad_norm: 0.877060088793255, iteration: 231448
loss: 1.0074748992919922,grad_norm: 0.9541698859648118, iteration: 231449
loss: 0.9815906882286072,grad_norm: 0.9645777366442425, iteration: 231450
loss: 0.9916961789131165,grad_norm: 0.8954664150121291, iteration: 231451
loss: 1.0468624830245972,grad_norm: 0.8714781706672263, iteration: 231452
loss: 1.030383586883545,grad_norm: 0.999999834678736, iteration: 231453
loss: 1.0670688152313232,grad_norm: 0.8797606415412746, iteration: 231454
loss: 0.9658220410346985,grad_norm: 0.9999991722911815, iteration: 231455
loss: 0.955174446105957,grad_norm: 0.9999992095970202, iteration: 231456
loss: 0.9548494815826416,grad_norm: 0.9999990476596342, iteration: 231457
loss: 0.9915116429328918,grad_norm: 0.8928267846568992, iteration: 231458
loss: 0.996536910533905,grad_norm: 0.9999991674802865, iteration: 231459
loss: 0.9763291478157043,grad_norm: 0.983956812087586, iteration: 231460
loss: 0.9997972846031189,grad_norm: 0.9201417384071277, iteration: 231461
loss: 0.9666865468025208,grad_norm: 0.9087248381372879, iteration: 231462
loss: 1.0287867784500122,grad_norm: 0.9893604747784794, iteration: 231463
loss: 0.9906084537506104,grad_norm: 0.9611228745182495, iteration: 231464
loss: 0.9835646152496338,grad_norm: 0.9999992726762413, iteration: 231465
loss: 0.9874894022941589,grad_norm: 0.9999991687935482, iteration: 231466
loss: 0.9620757102966309,grad_norm: 0.8226866926295251, iteration: 231467
loss: 1.034079909324646,grad_norm: 0.9999991011441557, iteration: 231468
loss: 1.0273321866989136,grad_norm: 0.8216955898885335, iteration: 231469
loss: 1.064737319946289,grad_norm: 0.9999997478687965, iteration: 231470
loss: 1.0143119096755981,grad_norm: 0.7408278600534294, iteration: 231471
loss: 1.0011274814605713,grad_norm: 0.8300111111400349, iteration: 231472
loss: 0.9821199774742126,grad_norm: 0.9999990853874077, iteration: 231473
loss: 1.001766324043274,grad_norm: 0.9999989673099945, iteration: 231474
loss: 1.0884199142456055,grad_norm: 0.9999992687394426, iteration: 231475
loss: 0.9602277278900146,grad_norm: 0.9999989358796915, iteration: 231476
loss: 1.0359008312225342,grad_norm: 0.9999992564933851, iteration: 231477
loss: 1.0483143329620361,grad_norm: 0.9999991502397512, iteration: 231478
loss: 1.0121421813964844,grad_norm: 0.8754284237196095, iteration: 231479
loss: 0.9808201193809509,grad_norm: 0.9999996873346837, iteration: 231480
loss: 0.9985957145690918,grad_norm: 0.8426149901407782, iteration: 231481
loss: 1.0068892240524292,grad_norm: 0.9844486779304876, iteration: 231482
loss: 0.9976097941398621,grad_norm: 0.9374552070331273, iteration: 231483
loss: 0.9769080877304077,grad_norm: 0.7675680384637515, iteration: 231484
loss: 0.9523460268974304,grad_norm: 0.9999990718807868, iteration: 231485
loss: 0.9951452612876892,grad_norm: 0.8943919373141326, iteration: 231486
loss: 0.9848728179931641,grad_norm: 0.8466245405211974, iteration: 231487
loss: 0.9762656688690186,grad_norm: 0.772454118369647, iteration: 231488
loss: 0.9749457836151123,grad_norm: 0.999999014108791, iteration: 231489
loss: 0.997941255569458,grad_norm: 0.8801985055670937, iteration: 231490
loss: 1.003137230873108,grad_norm: 0.9851621381970312, iteration: 231491
loss: 1.0130294561386108,grad_norm: 0.9999990399046805, iteration: 231492
loss: 0.9860556721687317,grad_norm: 0.9970548232984424, iteration: 231493
loss: 1.000820279121399,grad_norm: 0.8649333266271945, iteration: 231494
loss: 0.9853115081787109,grad_norm: 0.999999193829777, iteration: 231495
loss: 0.9739084243774414,grad_norm: 0.8887914968988649, iteration: 231496
loss: 0.9511480331420898,grad_norm: 0.8898703457509975, iteration: 231497
loss: 0.9747409820556641,grad_norm: 0.9547725288776756, iteration: 231498
loss: 1.0281035900115967,grad_norm: 0.9640790557774068, iteration: 231499
loss: 0.9759953022003174,grad_norm: 0.9218302837270914, iteration: 231500
loss: 1.0002367496490479,grad_norm: 0.9999990129652736, iteration: 231501
loss: 0.9658706784248352,grad_norm: 0.9682197684776738, iteration: 231502
loss: 0.9639232754707336,grad_norm: 0.999998923222183, iteration: 231503
loss: 1.044195294380188,grad_norm: 0.8182448825003533, iteration: 231504
loss: 1.0032063722610474,grad_norm: 0.807123134993141, iteration: 231505
loss: 1.0292490720748901,grad_norm: 0.999999643885593, iteration: 231506
loss: 1.066078782081604,grad_norm: 0.999999185183285, iteration: 231507
loss: 1.0042389631271362,grad_norm: 0.9828501785351291, iteration: 231508
loss: 0.9797912836074829,grad_norm: 0.8989508326482504, iteration: 231509
loss: 0.9902307987213135,grad_norm: 0.9874803804760458, iteration: 231510
loss: 1.0085972547531128,grad_norm: 0.8782960202914034, iteration: 231511
loss: 1.0108578205108643,grad_norm: 0.9302343957613448, iteration: 231512
loss: 0.9816306233406067,grad_norm: 0.9946129629451281, iteration: 231513
loss: 0.9719271659851074,grad_norm: 0.9999991267068608, iteration: 231514
loss: 1.0222543478012085,grad_norm: 0.8067962624898265, iteration: 231515
loss: 0.9990529417991638,grad_norm: 0.9204700570438271, iteration: 231516
loss: 1.0033444166183472,grad_norm: 0.9771053676290341, iteration: 231517
loss: 1.011223554611206,grad_norm: 0.9907709203078015, iteration: 231518
loss: 0.9949274659156799,grad_norm: 0.9999990558643839, iteration: 231519
loss: 0.9881806373596191,grad_norm: 0.9473058971822274, iteration: 231520
loss: 0.9792550206184387,grad_norm: 0.9137260399183945, iteration: 231521
loss: 0.9883736371994019,grad_norm: 0.8001127647360978, iteration: 231522
loss: 0.9841071963310242,grad_norm: 0.8237082112649136, iteration: 231523
loss: 1.006659984588623,grad_norm: 0.9463009879507834, iteration: 231524
loss: 1.0030426979064941,grad_norm: 0.9120275103928388, iteration: 231525
loss: 0.9923120737075806,grad_norm: 0.8437232801893428, iteration: 231526
loss: 0.9985287189483643,grad_norm: 0.9662141566089618, iteration: 231527
loss: 1.0016955137252808,grad_norm: 0.9082733431578129, iteration: 231528
loss: 0.9840829968452454,grad_norm: 0.8418188797657195, iteration: 231529
loss: 0.9998583197593689,grad_norm: 0.9716282015833467, iteration: 231530
loss: 1.0067838430404663,grad_norm: 0.9362020346526226, iteration: 231531
loss: 0.9857724905014038,grad_norm: 0.9999990857912435, iteration: 231532
loss: 0.9663604497909546,grad_norm: 0.9747458719516103, iteration: 231533
loss: 1.0108013153076172,grad_norm: 0.853899002662288, iteration: 231534
loss: 1.0187581777572632,grad_norm: 0.9999991919721217, iteration: 231535
loss: 0.9918836355209351,grad_norm: 0.7794783591729624, iteration: 231536
loss: 0.9873951077461243,grad_norm: 0.8873849944591505, iteration: 231537
loss: 1.0467448234558105,grad_norm: 0.9728234926643808, iteration: 231538
loss: 0.9723626375198364,grad_norm: 0.8575613376478157, iteration: 231539
loss: 1.0185317993164062,grad_norm: 0.9201755684834785, iteration: 231540
loss: 1.0174028873443604,grad_norm: 0.9648066398538137, iteration: 231541
loss: 1.031440258026123,grad_norm: 0.9480572803019813, iteration: 231542
loss: 1.0252630710601807,grad_norm: 0.9585660946671093, iteration: 231543
loss: 0.994813084602356,grad_norm: 0.9736794990951854, iteration: 231544
loss: 1.011401891708374,grad_norm: 0.8664777240609731, iteration: 231545
loss: 1.0433861017227173,grad_norm: 0.9139542778203575, iteration: 231546
loss: 1.0393086671829224,grad_norm: 0.904951751956425, iteration: 231547
loss: 1.0252130031585693,grad_norm: 0.9292027435322434, iteration: 231548
loss: 1.005246639251709,grad_norm: 0.756604647769732, iteration: 231549
loss: 1.0021501779556274,grad_norm: 0.8541348881231348, iteration: 231550
loss: 1.0126512050628662,grad_norm: 0.9999996138954158, iteration: 231551
loss: 1.079972743988037,grad_norm: 0.9999998169666905, iteration: 231552
loss: 0.9859545230865479,grad_norm: 0.9068782335744355, iteration: 231553
loss: 0.9936803579330444,grad_norm: 0.9526483064388158, iteration: 231554
loss: 1.0041123628616333,grad_norm: 0.9999990113894279, iteration: 231555
loss: 1.0455718040466309,grad_norm: 0.8326834183862126, iteration: 231556
loss: 1.0042791366577148,grad_norm: 0.8262669653137661, iteration: 231557
loss: 1.0164225101470947,grad_norm: 0.8952317107280441, iteration: 231558
loss: 0.9622999429702759,grad_norm: 0.9999990104259178, iteration: 231559
loss: 1.0166385173797607,grad_norm: 0.8355085315431965, iteration: 231560
loss: 0.9939739108085632,grad_norm: 0.9768081248518564, iteration: 231561
loss: 0.9645171165466309,grad_norm: 0.8562150738537686, iteration: 231562
loss: 1.1339404582977295,grad_norm: 0.9999998931019858, iteration: 231563
loss: 1.0025033950805664,grad_norm: 0.8737987609642651, iteration: 231564
loss: 0.9952785968780518,grad_norm: 0.9452630997679515, iteration: 231565
loss: 1.0061389207839966,grad_norm: 0.9999990843901504, iteration: 231566
loss: 0.9818972945213318,grad_norm: 0.8514930500979765, iteration: 231567
loss: 0.9727451801300049,grad_norm: 0.7868422162178627, iteration: 231568
loss: 0.9854589104652405,grad_norm: 0.9278112483096431, iteration: 231569
loss: 1.034974217414856,grad_norm: 0.9999996996740963, iteration: 231570
loss: 1.022584080696106,grad_norm: 0.8221059781169693, iteration: 231571
loss: 0.9820889234542847,grad_norm: 0.9999990532135022, iteration: 231572
loss: 0.984004020690918,grad_norm: 0.9119170922332043, iteration: 231573
loss: 1.0082182884216309,grad_norm: 0.8875003089949339, iteration: 231574
loss: 1.1026332378387451,grad_norm: 0.9999990470451842, iteration: 231575
loss: 0.9915281534194946,grad_norm: 0.9961196776349037, iteration: 231576
loss: 0.9747761487960815,grad_norm: 0.9101489771070668, iteration: 231577
loss: 1.023949146270752,grad_norm: 0.9999991538688581, iteration: 231578
loss: 1.0772454738616943,grad_norm: 0.9999995121546609, iteration: 231579
loss: 0.9952034950256348,grad_norm: 0.9999990940245703, iteration: 231580
loss: 1.0627778768539429,grad_norm: 0.9999992421713377, iteration: 231581
loss: 0.9903055429458618,grad_norm: 0.838448262579341, iteration: 231582
loss: 0.9770917296409607,grad_norm: 0.9999994788835608, iteration: 231583
loss: 1.0119447708129883,grad_norm: 0.9602024790199102, iteration: 231584
loss: 0.998837411403656,grad_norm: 0.9999992948155808, iteration: 231585
loss: 0.9920665621757507,grad_norm: 0.9192245429801121, iteration: 231586
loss: 0.9906148314476013,grad_norm: 0.8772678995255221, iteration: 231587
loss: 0.9846143126487732,grad_norm: 0.843322643812689, iteration: 231588
loss: 1.3016445636749268,grad_norm: 0.9999995476882265, iteration: 231589
loss: 0.9834530353546143,grad_norm: 0.969345273507695, iteration: 231590
loss: 0.9672216773033142,grad_norm: 0.9585285289462442, iteration: 231591
loss: 1.0280601978302002,grad_norm: 0.8778829373048588, iteration: 231592
loss: 0.989165186882019,grad_norm: 0.8928540049598037, iteration: 231593
loss: 1.0290544033050537,grad_norm: 0.9999991104760878, iteration: 231594
loss: 1.0352648496627808,grad_norm: 0.9263434664665996, iteration: 231595
loss: 1.005652904510498,grad_norm: 0.9872925428065693, iteration: 231596
loss: 1.0167021751403809,grad_norm: 0.9303278877991978, iteration: 231597
loss: 0.9936532974243164,grad_norm: 0.9902916708245895, iteration: 231598
loss: 1.151602864265442,grad_norm: 0.9999990502232629, iteration: 231599
loss: 0.992729902267456,grad_norm: 0.7811542558259847, iteration: 231600
loss: 0.9858390688896179,grad_norm: 0.999999199119471, iteration: 231601
loss: 1.0277220010757446,grad_norm: 0.999999138254898, iteration: 231602
loss: 0.9809016585350037,grad_norm: 0.9088461909125167, iteration: 231603
loss: 0.981411337852478,grad_norm: 0.9338489003825509, iteration: 231604
loss: 0.9836729168891907,grad_norm: 0.9437221200672776, iteration: 231605
loss: 0.9667325019836426,grad_norm: 0.9492985006414698, iteration: 231606
loss: 1.0338337421417236,grad_norm: 0.8804849176238055, iteration: 231607
loss: 0.9907490015029907,grad_norm: 0.8645553224919117, iteration: 231608
loss: 1.0339044332504272,grad_norm: 0.999999857762885, iteration: 231609
loss: 0.9928669333457947,grad_norm: 0.9425420721806218, iteration: 231610
loss: 1.004492163658142,grad_norm: 0.9757057753621329, iteration: 231611
loss: 1.0021111965179443,grad_norm: 0.8949100141798134, iteration: 231612
loss: 0.9605372548103333,grad_norm: 0.9874397244559959, iteration: 231613
loss: 1.0931053161621094,grad_norm: 0.9999991057316756, iteration: 231614
loss: 1.0417510271072388,grad_norm: 0.9999998321050457, iteration: 231615
loss: 0.9891799688339233,grad_norm: 0.999999115206922, iteration: 231616
loss: 1.0447455644607544,grad_norm: 0.7671598944138127, iteration: 231617
loss: 0.9968984723091125,grad_norm: 0.8042216549553414, iteration: 231618
loss: 0.9798363447189331,grad_norm: 0.9999991550498071, iteration: 231619
loss: 0.9877333641052246,grad_norm: 0.8072906513983727, iteration: 231620
loss: 1.0046029090881348,grad_norm: 0.9999993902422248, iteration: 231621
loss: 1.000933289527893,grad_norm: 0.7126904263930858, iteration: 231622
loss: 1.0173676013946533,grad_norm: 0.999999112942497, iteration: 231623
loss: 0.9689897298812866,grad_norm: 0.9956442497060016, iteration: 231624
loss: 1.0295451879501343,grad_norm: 0.8305081671045147, iteration: 231625
loss: 0.9803523421287537,grad_norm: 0.8105893270663868, iteration: 231626
loss: 0.9717860817909241,grad_norm: 0.9132420934096708, iteration: 231627
loss: 0.985747754573822,grad_norm: 0.8967966882549036, iteration: 231628
loss: 1.042632818222046,grad_norm: 0.9999995139280136, iteration: 231629
loss: 0.9843527674674988,grad_norm: 0.9431152612417488, iteration: 231630
loss: 1.0234794616699219,grad_norm: 0.9592024817717315, iteration: 231631
loss: 1.0103281736373901,grad_norm: 0.8706132090361015, iteration: 231632
loss: 1.0095924139022827,grad_norm: 0.9999997046407018, iteration: 231633
loss: 1.039798378944397,grad_norm: 0.9062965862976375, iteration: 231634
loss: 0.9794607162475586,grad_norm: 0.9999990882004366, iteration: 231635
loss: 1.0093061923980713,grad_norm: 0.9096905556660131, iteration: 231636
loss: 0.9869272708892822,grad_norm: 0.9672922393948861, iteration: 231637
loss: 1.002437949180603,grad_norm: 0.8824919356220409, iteration: 231638
loss: 1.0038923025131226,grad_norm: 0.8844961291947397, iteration: 231639
loss: 0.9957760572433472,grad_norm: 0.9937468988870243, iteration: 231640
loss: 1.0330530405044556,grad_norm: 0.9999991162437973, iteration: 231641
loss: 0.9652462005615234,grad_norm: 0.7897089825636142, iteration: 231642
loss: 0.9898638129234314,grad_norm: 0.7859776079543435, iteration: 231643
loss: 0.9879814982414246,grad_norm: 0.9999992715288863, iteration: 231644
loss: 0.997142493724823,grad_norm: 0.9368599134623192, iteration: 231645
loss: 0.9969574809074402,grad_norm: 0.9775422590005122, iteration: 231646
loss: 1.0620391368865967,grad_norm: 0.999999356989316, iteration: 231647
loss: 1.0198091268539429,grad_norm: 0.9901230511185969, iteration: 231648
loss: 0.9972724318504333,grad_norm: 0.9328001404453731, iteration: 231649
loss: 1.0215122699737549,grad_norm: 0.9980107198272792, iteration: 231650
loss: 1.0254861116409302,grad_norm: 0.9630427138684469, iteration: 231651
loss: 1.0355387926101685,grad_norm: 0.9999989978812318, iteration: 231652
loss: 0.9679529070854187,grad_norm: 0.8946480202129826, iteration: 231653
loss: 1.0000433921813965,grad_norm: 0.7845807848548617, iteration: 231654
loss: 1.0872268676757812,grad_norm: 0.999999279602657, iteration: 231655
loss: 0.9992716908454895,grad_norm: 0.829967187802661, iteration: 231656
loss: 0.9933162927627563,grad_norm: 0.9759806035993376, iteration: 231657
loss: 1.3001259565353394,grad_norm: 0.9999995610605843, iteration: 231658
loss: 1.0509061813354492,grad_norm: 0.9128059147354498, iteration: 231659
loss: 0.996645987033844,grad_norm: 0.9650803808903591, iteration: 231660
loss: 0.9941835403442383,grad_norm: 0.8599048523181332, iteration: 231661
loss: 1.0167770385742188,grad_norm: 0.9751442471101828, iteration: 231662
loss: 0.9868465662002563,grad_norm: 0.8520087933819173, iteration: 231663
loss: 1.010607361793518,grad_norm: 0.9999993459741375, iteration: 231664
loss: 0.9928826093673706,grad_norm: 0.9999991225763972, iteration: 231665
loss: 1.0136704444885254,grad_norm: 0.8763980885954225, iteration: 231666
loss: 0.9892570376396179,grad_norm: 0.7889154744202203, iteration: 231667
loss: 1.058509111404419,grad_norm: 0.9999998501626595, iteration: 231668
loss: 1.0244781970977783,grad_norm: 0.9999992340359345, iteration: 231669
loss: 0.9940087795257568,grad_norm: 0.8898877765597372, iteration: 231670
loss: 1.0174922943115234,grad_norm: 0.9999991073024486, iteration: 231671
loss: 0.9830755591392517,grad_norm: 0.8110350336222057, iteration: 231672
loss: 1.0083788633346558,grad_norm: 0.999999119185059, iteration: 231673
loss: 0.9840953350067139,grad_norm: 0.861444299353955, iteration: 231674
loss: 0.9863549470901489,grad_norm: 0.8576003130011034, iteration: 231675
loss: 0.9891700148582458,grad_norm: 0.9941905347140096, iteration: 231676
loss: 1.0018171072006226,grad_norm: 0.9999991072999307, iteration: 231677
loss: 1.033113718032837,grad_norm: 0.9993648996371874, iteration: 231678
loss: 1.0729740858078003,grad_norm: 0.9105399349944153, iteration: 231679
loss: 0.9824100136756897,grad_norm: 0.8582079932877497, iteration: 231680
loss: 1.0206471681594849,grad_norm: 0.9999990770669714, iteration: 231681
loss: 0.9988625049591064,grad_norm: 0.8619362151283795, iteration: 231682
loss: 0.9905644059181213,grad_norm: 0.8138927448030858, iteration: 231683
loss: 0.9860712289810181,grad_norm: 0.9999990735564488, iteration: 231684
loss: 1.0239121913909912,grad_norm: 0.9244081986681447, iteration: 231685
loss: 0.9768961668014526,grad_norm: 0.8683204585790937, iteration: 231686
loss: 1.0338549613952637,grad_norm: 0.9999992313744958, iteration: 231687
loss: 1.0252047777175903,grad_norm: 0.9999992054859913, iteration: 231688
loss: 1.0683488845825195,grad_norm: 0.9703900985641757, iteration: 231689
loss: 0.991439163684845,grad_norm: 0.8940530039169565, iteration: 231690
loss: 1.0023713111877441,grad_norm: 0.8534257039315186, iteration: 231691
loss: 1.0876989364624023,grad_norm: 0.9999997768758178, iteration: 231692
loss: 0.9647344350814819,grad_norm: 0.9999990115191687, iteration: 231693
loss: 1.0232526063919067,grad_norm: 0.9999992303406692, iteration: 231694
loss: 0.9993855953216553,grad_norm: 0.8690370611701329, iteration: 231695
loss: 0.9948359727859497,grad_norm: 0.9227360767063878, iteration: 231696
loss: 1.015429139137268,grad_norm: 0.999999261981286, iteration: 231697
loss: 0.9960707426071167,grad_norm: 0.8551957333192932, iteration: 231698
loss: 0.9951258301734924,grad_norm: 0.8672418855422627, iteration: 231699
loss: 0.9996752738952637,grad_norm: 0.9999991264419745, iteration: 231700
loss: 0.9687105417251587,grad_norm: 0.9571473663551388, iteration: 231701
loss: 0.9524514079093933,grad_norm: 0.917950137540325, iteration: 231702
loss: 1.0131564140319824,grad_norm: 0.9999990539006823, iteration: 231703
loss: 1.0889899730682373,grad_norm: 0.999999107829249, iteration: 231704
loss: 1.031740427017212,grad_norm: 0.9999990983571428, iteration: 231705
loss: 1.02382493019104,grad_norm: 0.8512981056187903, iteration: 231706
loss: 0.9635800123214722,grad_norm: 0.9625991346093221, iteration: 231707
loss: 0.9942247271537781,grad_norm: 0.9999994033723835, iteration: 231708
loss: 1.0420526266098022,grad_norm: 0.9999998896740605, iteration: 231709
loss: 1.0035616159439087,grad_norm: 0.7881909054483554, iteration: 231710
loss: 0.9878233075141907,grad_norm: 0.9221681924185497, iteration: 231711
loss: 0.9936742186546326,grad_norm: 0.8092108591074831, iteration: 231712
loss: 0.9273392558097839,grad_norm: 0.8346146754418742, iteration: 231713
loss: 1.0184481143951416,grad_norm: 0.999999127378497, iteration: 231714
loss: 1.0279872417449951,grad_norm: 0.790198878908763, iteration: 231715
loss: 1.0048353672027588,grad_norm: 0.9999996763174266, iteration: 231716
loss: 1.0184197425842285,grad_norm: 0.7765398001654554, iteration: 231717
loss: 1.0107715129852295,grad_norm: 0.8620516822104901, iteration: 231718
loss: 1.0209476947784424,grad_norm: 0.8833590155636668, iteration: 231719
loss: 1.002648949623108,grad_norm: 0.9999993206904176, iteration: 231720
loss: 0.9636783599853516,grad_norm: 0.895702992552618, iteration: 231721
loss: 0.9775551557540894,grad_norm: 0.7680951447284182, iteration: 231722
loss: 1.0072333812713623,grad_norm: 0.8898845575864575, iteration: 231723
loss: 1.0125248432159424,grad_norm: 0.9899482225691542, iteration: 231724
loss: 1.0229014158248901,grad_norm: 0.8198970042129216, iteration: 231725
loss: 1.0161856412887573,grad_norm: 0.9999991067830606, iteration: 231726
loss: 1.0310211181640625,grad_norm: 0.9999991840204557, iteration: 231727
loss: 1.009828805923462,grad_norm: 0.9612274770108982, iteration: 231728
loss: 0.9850289225578308,grad_norm: 0.9298864923950486, iteration: 231729
loss: 1.131659984588623,grad_norm: 0.9999991618215359, iteration: 231730
loss: 0.9710779190063477,grad_norm: 0.9999991552607587, iteration: 231731
loss: 1.0071380138397217,grad_norm: 0.8067150827184968, iteration: 231732
loss: 1.0236663818359375,grad_norm: 0.9999990953358351, iteration: 231733
loss: 1.1243274211883545,grad_norm: 0.9999995942102793, iteration: 231734
loss: 1.0115766525268555,grad_norm: 0.9999990640619262, iteration: 231735
loss: 1.045649766921997,grad_norm: 0.9999995058249365, iteration: 231736
loss: 0.9944490194320679,grad_norm: 0.8084209032751373, iteration: 231737
loss: 0.9708409309387207,grad_norm: 0.92642836765412, iteration: 231738
loss: 1.0255788564682007,grad_norm: 0.8714320670576727, iteration: 231739
loss: 1.0059951543807983,grad_norm: 0.8249116964325421, iteration: 231740
loss: 1.0855128765106201,grad_norm: 0.9999992426017595, iteration: 231741
loss: 1.0073341131210327,grad_norm: 0.9010854502367849, iteration: 231742
loss: 1.0574686527252197,grad_norm: 0.9999992114308401, iteration: 231743
loss: 0.9937999248504639,grad_norm: 0.9999990432790676, iteration: 231744
loss: 0.9993236064910889,grad_norm: 0.9999991618314118, iteration: 231745
loss: 0.9853857159614563,grad_norm: 0.8365405987105069, iteration: 231746
loss: 1.0009993314743042,grad_norm: 0.9844889669224076, iteration: 231747
loss: 1.0163724422454834,grad_norm: 0.9554172364593753, iteration: 231748
loss: 1.0117355585098267,grad_norm: 0.9999990614253882, iteration: 231749
loss: 0.9669725298881531,grad_norm: 0.8078464645997248, iteration: 231750
loss: 0.9964500069618225,grad_norm: 0.782483554596912, iteration: 231751
loss: 1.0028971433639526,grad_norm: 0.9347498004960524, iteration: 231752
loss: 0.9623942971229553,grad_norm: 0.9019485032451748, iteration: 231753
loss: 1.0479156970977783,grad_norm: 0.9999990529374183, iteration: 231754
loss: 1.0120232105255127,grad_norm: 0.9052040906836027, iteration: 231755
loss: 1.0040810108184814,grad_norm: 0.9999998597509014, iteration: 231756
loss: 1.049160122871399,grad_norm: 0.999999757789347, iteration: 231757
loss: 0.9983035922050476,grad_norm: 0.9999991165448874, iteration: 231758
loss: 1.0054816007614136,grad_norm: 0.9896426311054348, iteration: 231759
loss: 1.2874864339828491,grad_norm: 0.9999999772857715, iteration: 231760
loss: 0.9838128089904785,grad_norm: 0.9999989735529543, iteration: 231761
loss: 1.0360873937606812,grad_norm: 0.9131498119567835, iteration: 231762
loss: 1.0186148881912231,grad_norm: 0.9999991347485468, iteration: 231763
loss: 1.0100518465042114,grad_norm: 0.9404911698965247, iteration: 231764
loss: 1.0029984712600708,grad_norm: 0.8898118590587041, iteration: 231765
loss: 1.202275037765503,grad_norm: 0.999999576642725, iteration: 231766
loss: 1.014267921447754,grad_norm: 0.832912563732822, iteration: 231767
loss: 1.0298994779586792,grad_norm: 0.9999989615186082, iteration: 231768
loss: 1.1865403652191162,grad_norm: 0.9999997187949263, iteration: 231769
loss: 1.1918543577194214,grad_norm: 0.9999996491510633, iteration: 231770
loss: 1.0626630783081055,grad_norm: 0.999999657593149, iteration: 231771
loss: 1.0314586162567139,grad_norm: 0.8156211383982495, iteration: 231772
loss: 1.01054048538208,grad_norm: 0.9999989863280674, iteration: 231773
loss: 0.9996405839920044,grad_norm: 0.8395326063229092, iteration: 231774
loss: 1.0795990228652954,grad_norm: 0.9999997858987801, iteration: 231775
loss: 1.0052677392959595,grad_norm: 0.9903986669619644, iteration: 231776
loss: 1.0173321962356567,grad_norm: 0.9999992111803606, iteration: 231777
loss: 1.0057350397109985,grad_norm: 0.9371608820721384, iteration: 231778
loss: 1.153754472732544,grad_norm: 0.9999992077677478, iteration: 231779
loss: 1.0119554996490479,grad_norm: 0.8405340624291923, iteration: 231780
loss: 1.0514812469482422,grad_norm: 0.9999993390033094, iteration: 231781
loss: 1.0322551727294922,grad_norm: 0.9739054007011769, iteration: 231782
loss: 1.01171875,grad_norm: 0.8777031759893577, iteration: 231783
loss: 1.0072612762451172,grad_norm: 0.7766693047475483, iteration: 231784
loss: 0.9747022986412048,grad_norm: 0.9830910995155603, iteration: 231785
loss: 0.9987605214118958,grad_norm: 0.9999991850127273, iteration: 231786
loss: 1.0029182434082031,grad_norm: 0.93486618492087, iteration: 231787
loss: 0.9763324856758118,grad_norm: 0.8622569843594756, iteration: 231788
loss: 1.0511764287948608,grad_norm: 0.9999998902250715, iteration: 231789
loss: 1.005151629447937,grad_norm: 0.9999991213979526, iteration: 231790
loss: 1.00232994556427,grad_norm: 0.9999991386765593, iteration: 231791
loss: 1.0054452419281006,grad_norm: 0.9999998630453092, iteration: 231792
loss: 0.9991604089736938,grad_norm: 0.9571411717670673, iteration: 231793
loss: 1.047133445739746,grad_norm: 0.999999194506072, iteration: 231794
loss: 0.9736560583114624,grad_norm: 0.8559462780947038, iteration: 231795
loss: 1.0752723217010498,grad_norm: 0.999999461447001, iteration: 231796
loss: 1.0927488803863525,grad_norm: 0.9066667468973896, iteration: 231797
loss: 0.9814732670783997,grad_norm: 0.9999993279757883, iteration: 231798
loss: 0.9824308156967163,grad_norm: 0.778067911146511, iteration: 231799
loss: 1.056321144104004,grad_norm: 0.9999994368721066, iteration: 231800
loss: 0.9970244765281677,grad_norm: 0.9105841027114235, iteration: 231801
loss: 0.9807199835777283,grad_norm: 0.9999997239521178, iteration: 231802
loss: 1.018027901649475,grad_norm: 0.9999992817886876, iteration: 231803
loss: 1.0111639499664307,grad_norm: 0.9952039009466926, iteration: 231804
loss: 1.144990086555481,grad_norm: 0.9999993527462978, iteration: 231805
loss: 0.9850360751152039,grad_norm: 0.9539727995369932, iteration: 231806
loss: 1.0312644243240356,grad_norm: 0.8189129887452669, iteration: 231807
loss: 1.1057476997375488,grad_norm: 0.99999957587958, iteration: 231808
loss: 1.0171210765838623,grad_norm: 0.944409645682944, iteration: 231809
loss: 1.009640097618103,grad_norm: 0.9660633633238231, iteration: 231810
loss: 1.0160586833953857,grad_norm: 0.999999660603705, iteration: 231811
loss: 1.0120600461959839,grad_norm: 0.9168186936437064, iteration: 231812
loss: 0.9968127012252808,grad_norm: 0.9497094096446165, iteration: 231813
loss: 1.087872862815857,grad_norm: 0.9999999063838518, iteration: 231814
loss: 0.9482439756393433,grad_norm: 0.9798838115824685, iteration: 231815
loss: 1.0116841793060303,grad_norm: 0.9999991327371394, iteration: 231816
loss: 0.9771966338157654,grad_norm: 0.9999990902573586, iteration: 231817
loss: 0.9768445491790771,grad_norm: 0.7739649468179518, iteration: 231818
loss: 0.9738762974739075,grad_norm: 0.9648635941725051, iteration: 231819
loss: 0.9802901148796082,grad_norm: 0.8012341927279958, iteration: 231820
loss: 1.0114426612854004,grad_norm: 0.8785158573881345, iteration: 231821
loss: 1.0042123794555664,grad_norm: 0.9999992365218332, iteration: 231822
loss: 0.9887774586677551,grad_norm: 0.8687186639806852, iteration: 231823
loss: 1.0344549417495728,grad_norm: 0.8971886447620874, iteration: 231824
loss: 0.9911335706710815,grad_norm: 0.9551638313335472, iteration: 231825
loss: 1.0457016229629517,grad_norm: 0.9999998109475423, iteration: 231826
loss: 0.9999958276748657,grad_norm: 0.8926976505701738, iteration: 231827
loss: 1.0491220951080322,grad_norm: 0.7767048796354251, iteration: 231828
loss: 0.9915633201599121,grad_norm: 0.9302152115103655, iteration: 231829
loss: 1.0250288248062134,grad_norm: 0.8132522951377258, iteration: 231830
loss: 1.0414389371871948,grad_norm: 0.8990323733236802, iteration: 231831
loss: 1.0448436737060547,grad_norm: 0.9999992247521126, iteration: 231832
loss: 1.0062949657440186,grad_norm: 0.8264916496653584, iteration: 231833
loss: 1.0055041313171387,grad_norm: 0.9642297078469168, iteration: 231834
loss: 0.9764399528503418,grad_norm: 0.9005603439044683, iteration: 231835
loss: 0.9686327576637268,grad_norm: 0.9260871768024217, iteration: 231836
loss: 1.0042898654937744,grad_norm: 0.9999997935392873, iteration: 231837
loss: 0.994659423828125,grad_norm: 0.9999993808046915, iteration: 231838
loss: 1.0237090587615967,grad_norm: 0.880995416547449, iteration: 231839
loss: 0.9811354875564575,grad_norm: 0.9999992409150528, iteration: 231840
loss: 0.9984101057052612,grad_norm: 0.9099233212808345, iteration: 231841
loss: 1.0357085466384888,grad_norm: 0.9999991361824935, iteration: 231842
loss: 1.006986141204834,grad_norm: 0.9604473890585592, iteration: 231843
loss: 1.003760814666748,grad_norm: 0.9462000642629362, iteration: 231844
loss: 0.9702228307723999,grad_norm: 0.8244280374979286, iteration: 231845
loss: 1.1960381269454956,grad_norm: 0.9999992668091578, iteration: 231846
loss: 0.9707958698272705,grad_norm: 0.8562681237737494, iteration: 231847
loss: 1.0616928339004517,grad_norm: 0.9999990288643852, iteration: 231848
loss: 1.0176517963409424,grad_norm: 0.8421880418408876, iteration: 231849
loss: 0.9911772608757019,grad_norm: 0.9585204896494043, iteration: 231850
loss: 0.9861173629760742,grad_norm: 0.8644920516428509, iteration: 231851
loss: 0.9704371094703674,grad_norm: 0.9638943362054239, iteration: 231852
loss: 0.999483585357666,grad_norm: 0.9999990056881674, iteration: 231853
loss: 1.1463967561721802,grad_norm: 0.9999998216380666, iteration: 231854
loss: 0.99773108959198,grad_norm: 0.9999995016016092, iteration: 231855
loss: 0.9896402955055237,grad_norm: 0.8521106216653264, iteration: 231856
loss: 0.9953789114952087,grad_norm: 0.9370679606481376, iteration: 231857
loss: 0.9786781668663025,grad_norm: 0.9647491021733043, iteration: 231858
loss: 1.0602326393127441,grad_norm: 0.9999994988783151, iteration: 231859
loss: 1.0142111778259277,grad_norm: 0.999999223872427, iteration: 231860
loss: 1.0053144693374634,grad_norm: 0.8657800767593089, iteration: 231861
loss: 1.074262261390686,grad_norm: 0.986153162910372, iteration: 231862
loss: 1.032067060470581,grad_norm: 0.8467129864604834, iteration: 231863
loss: 0.9976847767829895,grad_norm: 0.9357824197611371, iteration: 231864
loss: 1.018593192100525,grad_norm: 0.9145888468905984, iteration: 231865
loss: 1.035591721534729,grad_norm: 0.9999990846423128, iteration: 231866
loss: 0.9903789758682251,grad_norm: 0.9999991142600688, iteration: 231867
loss: 0.9889848828315735,grad_norm: 0.8949931831410433, iteration: 231868
loss: 1.0321071147918701,grad_norm: 0.9999994414409737, iteration: 231869
loss: 1.0001455545425415,grad_norm: 0.7517787004223456, iteration: 231870
loss: 0.9920040369033813,grad_norm: 0.992063665569744, iteration: 231871
loss: 1.0032765865325928,grad_norm: 0.9999995550305899, iteration: 231872
loss: 1.0143712759017944,grad_norm: 0.8723006063917995, iteration: 231873
loss: 0.9935840368270874,grad_norm: 0.9515490732380794, iteration: 231874
loss: 0.9877522587776184,grad_norm: 0.9999990899863812, iteration: 231875
loss: 1.003216028213501,grad_norm: 0.8687614083511179, iteration: 231876
loss: 1.1169148683547974,grad_norm: 0.9999998757660079, iteration: 231877
loss: 1.1050879955291748,grad_norm: 0.9999995067899117, iteration: 231878
loss: 1.055539608001709,grad_norm: 0.9999990763902767, iteration: 231879
loss: 1.0047762393951416,grad_norm: 0.7132710874426245, iteration: 231880
loss: 0.967967689037323,grad_norm: 0.9331806763505284, iteration: 231881
loss: 1.135236382484436,grad_norm: 0.999998989525587, iteration: 231882
loss: 1.0178782939910889,grad_norm: 0.8154164735717186, iteration: 231883
loss: 1.000850796699524,grad_norm: 0.9999993321110051, iteration: 231884
loss: 0.9782740473747253,grad_norm: 0.8979550042281197, iteration: 231885
loss: 1.001245379447937,grad_norm: 0.9544641716323182, iteration: 231886
loss: 1.008182168006897,grad_norm: 0.8152218296728022, iteration: 231887
loss: 1.0657551288604736,grad_norm: 0.9999994651516352, iteration: 231888
loss: 1.0403380393981934,grad_norm: 0.9805210518184762, iteration: 231889
loss: 1.040091633796692,grad_norm: 0.9540470024002083, iteration: 231890
loss: 0.9656981825828552,grad_norm: 0.8546489712552189, iteration: 231891
loss: 0.9756646156311035,grad_norm: 0.9999991492780814, iteration: 231892
loss: 1.0482662916183472,grad_norm: 0.9080622313206461, iteration: 231893
loss: 1.110207438468933,grad_norm: 0.9999998430432605, iteration: 231894
loss: 0.9946302175521851,grad_norm: 0.8498188279691005, iteration: 231895
loss: 0.9989752173423767,grad_norm: 0.8961541382587784, iteration: 231896
loss: 1.0415452718734741,grad_norm: 0.7923170646084419, iteration: 231897
loss: 0.928501546382904,grad_norm: 0.9999990891763928, iteration: 231898
loss: 1.0016934871673584,grad_norm: 0.9999994851193573, iteration: 231899
loss: 0.9540413618087769,grad_norm: 0.904400224201648, iteration: 231900
loss: 0.9976838827133179,grad_norm: 0.9207685806789255, iteration: 231901
loss: 1.0677963495254517,grad_norm: 0.9999989642321168, iteration: 231902
loss: 1.0365747213363647,grad_norm: 0.7638860246129733, iteration: 231903
loss: 1.154558777809143,grad_norm: 0.9999997904717199, iteration: 231904
loss: 1.0310386419296265,grad_norm: 0.9999991330457466, iteration: 231905
loss: 1.0233960151672363,grad_norm: 0.8856567187147132, iteration: 231906
loss: 1.0321011543273926,grad_norm: 0.8400954962587093, iteration: 231907
loss: 0.990110456943512,grad_norm: 0.9999992512160709, iteration: 231908
loss: 0.978415310382843,grad_norm: 0.8324424190368016, iteration: 231909
loss: 0.987310528755188,grad_norm: 0.8604637784976544, iteration: 231910
loss: 0.9866439700126648,grad_norm: 0.9999991582485974, iteration: 231911
loss: 0.9751927256584167,grad_norm: 0.9001733702975658, iteration: 231912
loss: 1.0466971397399902,grad_norm: 0.999999419322247, iteration: 231913
loss: 1.010974407196045,grad_norm: 0.9999990888830141, iteration: 231914
loss: 1.017045021057129,grad_norm: 0.8026462501158813, iteration: 231915
loss: 1.0146613121032715,grad_norm: 0.9999992145984347, iteration: 231916
loss: 1.0082578659057617,grad_norm: 0.9990582517978112, iteration: 231917
loss: 1.0075945854187012,grad_norm: 0.9999990433388933, iteration: 231918
loss: 1.0157275199890137,grad_norm: 0.852076648833188, iteration: 231919
loss: 1.002348780632019,grad_norm: 0.9558637702721124, iteration: 231920
loss: 1.0172910690307617,grad_norm: 0.9999990461163364, iteration: 231921
loss: 0.9819474220275879,grad_norm: 0.8575149593924843, iteration: 231922
loss: 0.9755408763885498,grad_norm: 0.8442788261071263, iteration: 231923
loss: 1.0187636613845825,grad_norm: 0.9828866407184615, iteration: 231924
loss: 1.0587416887283325,grad_norm: 0.8758889201393474, iteration: 231925
loss: 1.0229874849319458,grad_norm: 0.8906231658470531, iteration: 231926
loss: 1.033638596534729,grad_norm: 0.808473680276622, iteration: 231927
loss: 1.0231600999832153,grad_norm: 0.9999990170425066, iteration: 231928
loss: 0.9950979948043823,grad_norm: 0.8312151745825018, iteration: 231929
loss: 1.0119762420654297,grad_norm: 0.9999991417754999, iteration: 231930
loss: 0.996588408946991,grad_norm: 0.7615290123375859, iteration: 231931
loss: 1.018937349319458,grad_norm: 0.9256978766099742, iteration: 231932
loss: 0.9900782704353333,grad_norm: 0.8945013876963575, iteration: 231933
loss: 1.040470004081726,grad_norm: 0.9999992271631021, iteration: 231934
loss: 1.0069938898086548,grad_norm: 0.9393825667426491, iteration: 231935
loss: 0.9756631255149841,grad_norm: 0.8730775964198582, iteration: 231936
loss: 1.0119482278823853,grad_norm: 0.9258495631368923, iteration: 231937
loss: 1.0018463134765625,grad_norm: 0.986553151828308, iteration: 231938
loss: 1.0164493322372437,grad_norm: 0.9016774949600889, iteration: 231939
loss: 1.0151489973068237,grad_norm: 0.9439218684933438, iteration: 231940
loss: 1.004886507987976,grad_norm: 0.96186590492104, iteration: 231941
loss: 0.9946656823158264,grad_norm: 0.9805051657563527, iteration: 231942
loss: 1.0037633180618286,grad_norm: 0.8426558778608535, iteration: 231943
loss: 0.9873011708259583,grad_norm: 0.7383060543269028, iteration: 231944
loss: 0.9819642901420593,grad_norm: 0.9878302528556148, iteration: 231945
loss: 1.00666081905365,grad_norm: 0.999999037844497, iteration: 231946
loss: 0.9532737135887146,grad_norm: 0.8926887246430856, iteration: 231947
loss: 0.9799225926399231,grad_norm: 0.9999992497245277, iteration: 231948
loss: 0.9764575958251953,grad_norm: 0.8599125678544084, iteration: 231949
loss: 0.9607596397399902,grad_norm: 0.8095493809532935, iteration: 231950
loss: 0.9924730658531189,grad_norm: 0.9134190485286062, iteration: 231951
loss: 0.966865599155426,grad_norm: 0.9999991364582915, iteration: 231952
loss: 0.9569688439369202,grad_norm: 0.9374924793773871, iteration: 231953
loss: 1.0158051252365112,grad_norm: 0.9999993264575346, iteration: 231954
loss: 1.0041167736053467,grad_norm: 0.9999990563221386, iteration: 231955
loss: 1.0158743858337402,grad_norm: 0.881811382463606, iteration: 231956
loss: 1.0166418552398682,grad_norm: 0.9912236150990394, iteration: 231957
loss: 1.0115605592727661,grad_norm: 0.9327234368689308, iteration: 231958
loss: 1.0109838247299194,grad_norm: 0.9999991100869489, iteration: 231959
loss: 1.0153065919876099,grad_norm: 0.9445396135750975, iteration: 231960
loss: 1.0072530508041382,grad_norm: 0.7808089542736542, iteration: 231961
loss: 1.0382248163223267,grad_norm: 0.9999989918344367, iteration: 231962
loss: 1.096358299255371,grad_norm: 0.9999996536242152, iteration: 231963
loss: 1.02791428565979,grad_norm: 0.9999990489925544, iteration: 231964
loss: 1.018851637840271,grad_norm: 0.7988139375881149, iteration: 231965
loss: 1.0289307832717896,grad_norm: 0.9555573926131259, iteration: 231966
loss: 0.9582766890525818,grad_norm: 0.9999991296540718, iteration: 231967
loss: 1.0032662153244019,grad_norm: 0.994749893821236, iteration: 231968
loss: 1.01374351978302,grad_norm: 0.7797811598602513, iteration: 231969
loss: 1.0148663520812988,grad_norm: 0.8284326910573785, iteration: 231970
loss: 0.9978542923927307,grad_norm: 0.9684821777739634, iteration: 231971
loss: 1.0023080110549927,grad_norm: 0.9637824665121419, iteration: 231972
loss: 0.9831212759017944,grad_norm: 0.9999995912224676, iteration: 231973
loss: 1.0128251314163208,grad_norm: 0.9497823245221195, iteration: 231974
loss: 1.0832804441452026,grad_norm: 0.9999993124089797, iteration: 231975
loss: 1.0180608034133911,grad_norm: 0.9999998078488406, iteration: 231976
loss: 1.008738398551941,grad_norm: 0.9196425823081237, iteration: 231977
loss: 0.9859734177589417,grad_norm: 0.9999992665255149, iteration: 231978
loss: 1.0017560720443726,grad_norm: 0.843817004666904, iteration: 231979
loss: 0.9599658250808716,grad_norm: 0.9107690476488568, iteration: 231980
loss: 0.9916431307792664,grad_norm: 0.9999992002916644, iteration: 231981
loss: 0.9952353835105896,grad_norm: 0.9659370325963926, iteration: 231982
loss: 1.006502628326416,grad_norm: 0.8795963802670238, iteration: 231983
loss: 0.9767439365386963,grad_norm: 0.8981642167453288, iteration: 231984
loss: 1.040439248085022,grad_norm: 0.8481689158108653, iteration: 231985
loss: 0.952806293964386,grad_norm: 0.9999990981726091, iteration: 231986
loss: 1.0158443450927734,grad_norm: 0.9999990338687494, iteration: 231987
loss: 1.0072977542877197,grad_norm: 0.8965873440009928, iteration: 231988
loss: 1.02289879322052,grad_norm: 0.8822249042677397, iteration: 231989
loss: 1.009857177734375,grad_norm: 0.9999991106892759, iteration: 231990
loss: 0.9716822504997253,grad_norm: 0.8234762603534761, iteration: 231991
loss: 0.9861632585525513,grad_norm: 0.8960378333606429, iteration: 231992
loss: 1.0039833784103394,grad_norm: 0.9919890700958951, iteration: 231993
loss: 0.9731286764144897,grad_norm: 0.9925169173391418, iteration: 231994
loss: 1.026820421218872,grad_norm: 0.9999992255883082, iteration: 231995
loss: 0.974686324596405,grad_norm: 0.9705534740500577, iteration: 231996
loss: 1.0189286470413208,grad_norm: 0.9473202297554119, iteration: 231997
loss: 0.9893907904624939,grad_norm: 0.8677357765282867, iteration: 231998
loss: 1.030685305595398,grad_norm: 0.9999993063620977, iteration: 231999
loss: 0.9861297607421875,grad_norm: 0.9124450459029236, iteration: 232000
loss: 1.0113444328308105,grad_norm: 0.9999991951454514, iteration: 232001
loss: 1.0032597780227661,grad_norm: 0.999999023077629, iteration: 232002
loss: 1.040748953819275,grad_norm: 0.9164105539734259, iteration: 232003
loss: 1.0247527360916138,grad_norm: 0.9999990349056391, iteration: 232004
loss: 1.0269526243209839,grad_norm: 0.9999989941435546, iteration: 232005
loss: 1.012311577796936,grad_norm: 0.9999990585019465, iteration: 232006
loss: 0.9779812693595886,grad_norm: 0.798864037373705, iteration: 232007
loss: 1.00481116771698,grad_norm: 0.7707418355874784, iteration: 232008
loss: 1.0161298513412476,grad_norm: 0.9528090556950786, iteration: 232009
loss: 0.9972781538963318,grad_norm: 0.8955322776426763, iteration: 232010
loss: 0.9484990835189819,grad_norm: 0.8591098668126166, iteration: 232011
loss: 0.9855910539627075,grad_norm: 0.9999990221864054, iteration: 232012
loss: 0.9761976599693298,grad_norm: 0.9999991335814339, iteration: 232013
loss: 0.9906908869743347,grad_norm: 0.9641556652170324, iteration: 232014
loss: 1.0003422498703003,grad_norm: 0.8889965495546172, iteration: 232015
loss: 1.023363471031189,grad_norm: 0.8431505937493605, iteration: 232016
loss: 0.9839926362037659,grad_norm: 0.9911414902196138, iteration: 232017
loss: 0.99775630235672,grad_norm: 0.8299667206218906, iteration: 232018
loss: 0.9452266097068787,grad_norm: 0.8986703314696272, iteration: 232019
loss: 0.9993358254432678,grad_norm: 0.9034730949755909, iteration: 232020
loss: 0.990460216999054,grad_norm: 0.8417482318051238, iteration: 232021
loss: 1.0303901433944702,grad_norm: 0.9999991344966246, iteration: 232022
loss: 0.9856465458869934,grad_norm: 0.9999991892657654, iteration: 232023
loss: 1.0080246925354004,grad_norm: 0.8861899309398107, iteration: 232024
loss: 0.9676719307899475,grad_norm: 0.9662025820455057, iteration: 232025
loss: 1.0252597332000732,grad_norm: 0.9999990656100636, iteration: 232026
loss: 0.9947163462638855,grad_norm: 0.7612929716058979, iteration: 232027
loss: 1.0453546047210693,grad_norm: 0.9999998009111789, iteration: 232028
loss: 1.004564642906189,grad_norm: 0.8362518252873629, iteration: 232029
loss: 0.9929551482200623,grad_norm: 0.8669569559252914, iteration: 232030
loss: 0.9999064803123474,grad_norm: 0.8642244290651241, iteration: 232031
loss: 0.9685009717941284,grad_norm: 0.9143606985351125, iteration: 232032
loss: 1.0183804035186768,grad_norm: 0.9999990578314532, iteration: 232033
loss: 0.9978278279304504,grad_norm: 0.999999209384852, iteration: 232034
loss: 0.9718651175498962,grad_norm: 0.924707080200178, iteration: 232035
loss: 0.9719941020011902,grad_norm: 0.8953247138095204, iteration: 232036
loss: 1.0028314590454102,grad_norm: 0.8764263788193658, iteration: 232037
loss: 0.9831531047821045,grad_norm: 0.9147804089711394, iteration: 232038
loss: 1.0375235080718994,grad_norm: 0.910931795475666, iteration: 232039
loss: 1.0046932697296143,grad_norm: 0.7361008793025811, iteration: 232040
loss: 0.990062952041626,grad_norm: 0.9999998061978304, iteration: 232041
loss: 1.0182468891143799,grad_norm: 0.9080798159912896, iteration: 232042
loss: 1.0302257537841797,grad_norm: 0.8940946348322489, iteration: 232043
loss: 0.9864965677261353,grad_norm: 0.9965187055145205, iteration: 232044
loss: 0.9727508425712585,grad_norm: 0.999999219163828, iteration: 232045
loss: 0.9747264981269836,grad_norm: 0.7342179248501123, iteration: 232046
loss: 1.0010408163070679,grad_norm: 0.8151474418514214, iteration: 232047
loss: 1.0229333639144897,grad_norm: 0.7940411930092955, iteration: 232048
loss: 1.0157191753387451,grad_norm: 0.9753829689414055, iteration: 232049
loss: 0.9979091882705688,grad_norm: 0.9999992277200316, iteration: 232050
loss: 0.9794440269470215,grad_norm: 0.8762847086152753, iteration: 232051
loss: 0.9849964380264282,grad_norm: 0.9999990563746367, iteration: 232052
loss: 0.9900645017623901,grad_norm: 0.9999991651408224, iteration: 232053
loss: 0.9921437501907349,grad_norm: 0.9999991742488196, iteration: 232054
loss: 0.9860509634017944,grad_norm: 0.9124845529245161, iteration: 232055
loss: 0.9848256707191467,grad_norm: 0.9605022881040326, iteration: 232056
loss: 1.0237447023391724,grad_norm: 0.9443495246195011, iteration: 232057
loss: 0.998107373714447,grad_norm: 0.9424062486595495, iteration: 232058
loss: 1.0009896755218506,grad_norm: 0.9252534695192888, iteration: 232059
loss: 0.9922614693641663,grad_norm: 0.9188174239189245, iteration: 232060
loss: 1.0030643939971924,grad_norm: 0.999999485124426, iteration: 232061
loss: 1.0070455074310303,grad_norm: 0.9999990486186294, iteration: 232062
loss: 0.990786612033844,grad_norm: 0.9999991754269085, iteration: 232063
loss: 1.0079174041748047,grad_norm: 0.9434381575211647, iteration: 232064
loss: 0.9925018548965454,grad_norm: 0.9999991556873613, iteration: 232065
loss: 0.9946506023406982,grad_norm: 0.8262804589172945, iteration: 232066
loss: 0.9767916798591614,grad_norm: 0.9095991236071871, iteration: 232067
loss: 0.9845327734947205,grad_norm: 0.7491180602851919, iteration: 232068
loss: 0.981238067150116,grad_norm: 0.9999992405168443, iteration: 232069
loss: 1.0016158819198608,grad_norm: 0.9133212857404288, iteration: 232070
loss: 1.023182988166809,grad_norm: 0.9740020108531565, iteration: 232071
loss: 0.9504273533821106,grad_norm: 0.9999991205487676, iteration: 232072
loss: 0.9905813932418823,grad_norm: 0.9642756927588264, iteration: 232073
loss: 1.0054701566696167,grad_norm: 0.8710094631835091, iteration: 232074
loss: 0.9845649600028992,grad_norm: 0.9925909992958467, iteration: 232075
loss: 0.9879443049430847,grad_norm: 0.7581777785166052, iteration: 232076
loss: 1.015793800354004,grad_norm: 0.8606152859235437, iteration: 232077
loss: 1.0722962617874146,grad_norm: 0.9823290012161431, iteration: 232078
loss: 1.0249279737472534,grad_norm: 0.8237565638672916, iteration: 232079
loss: 0.9756360650062561,grad_norm: 0.9999992574588462, iteration: 232080
loss: 0.9759114384651184,grad_norm: 0.9386866188663033, iteration: 232081
loss: 0.9703751802444458,grad_norm: 0.9999991209665802, iteration: 232082
loss: 0.9933369755744934,grad_norm: 0.9823242416455834, iteration: 232083
loss: 0.9627041220664978,grad_norm: 0.8502028669737091, iteration: 232084
loss: 0.9919907450675964,grad_norm: 0.918045322606537, iteration: 232085
loss: 0.9729316234588623,grad_norm: 0.7469684732415683, iteration: 232086
loss: 1.02683687210083,grad_norm: 0.9999991084495694, iteration: 232087
loss: 0.9858567714691162,grad_norm: 0.7864406771134606, iteration: 232088
loss: 0.997825026512146,grad_norm: 0.9969911408077127, iteration: 232089
loss: 0.9805012345314026,grad_norm: 0.9999991479992784, iteration: 232090
loss: 1.0006253719329834,grad_norm: 0.859792121967757, iteration: 232091
loss: 0.9961374998092651,grad_norm: 0.8515311873118451, iteration: 232092
loss: 0.9524568915367126,grad_norm: 0.9722313787490255, iteration: 232093
loss: 0.9858025908470154,grad_norm: 0.9801803895288759, iteration: 232094
loss: 1.0314583778381348,grad_norm: 0.9575849912626786, iteration: 232095
loss: 0.9734896421432495,grad_norm: 0.8285336418681001, iteration: 232096
loss: 1.0148742198944092,grad_norm: 0.841921816287553, iteration: 232097
loss: 0.9802955389022827,grad_norm: 0.9494014863943485, iteration: 232098
loss: 0.9890037775039673,grad_norm: 0.9633427751743032, iteration: 232099
loss: 1.0348454713821411,grad_norm: 0.9522883333600292, iteration: 232100
loss: 1.0342059135437012,grad_norm: 0.8423777223097747, iteration: 232101
loss: 1.0151715278625488,grad_norm: 0.9999991945778335, iteration: 232102
loss: 1.016353726387024,grad_norm: 0.9999990402311112, iteration: 232103
loss: 0.9900104999542236,grad_norm: 0.907375916102422, iteration: 232104
loss: 0.9934090971946716,grad_norm: 0.9999991774583125, iteration: 232105
loss: 0.990576982498169,grad_norm: 0.9999991249514599, iteration: 232106
loss: 1.048648715019226,grad_norm: 0.9999992345279262, iteration: 232107
loss: 1.013492465019226,grad_norm: 0.9138981046743029, iteration: 232108
loss: 0.9967719316482544,grad_norm: 0.9584938128919578, iteration: 232109
loss: 1.0547199249267578,grad_norm: 0.9999995114440293, iteration: 232110
loss: 1.0397374629974365,grad_norm: 0.8513384908865953, iteration: 232111
loss: 0.962716281414032,grad_norm: 0.9651230767514318, iteration: 232112
loss: 1.0038574934005737,grad_norm: 0.9999989265954906, iteration: 232113
loss: 0.9889886975288391,grad_norm: 0.9226369338658841, iteration: 232114
loss: 0.982740581035614,grad_norm: 0.9623284969837566, iteration: 232115
loss: 1.0012381076812744,grad_norm: 0.9455009488483503, iteration: 232116
loss: 0.9974321126937866,grad_norm: 0.9999991260523693, iteration: 232117
loss: 1.1167601346969604,grad_norm: 0.9999990133162497, iteration: 232118
loss: 0.9518159031867981,grad_norm: 0.8986158272995285, iteration: 232119
loss: 0.9805471897125244,grad_norm: 0.8951624188705827, iteration: 232120
loss: 1.0280323028564453,grad_norm: 0.9812167656078932, iteration: 232121
loss: 0.979644775390625,grad_norm: 0.9999990938511188, iteration: 232122
loss: 0.9941457509994507,grad_norm: 0.7929007532318085, iteration: 232123
loss: 1.0255663394927979,grad_norm: 0.9999993002104288, iteration: 232124
loss: 1.0424903631210327,grad_norm: 0.9999991353023832, iteration: 232125
loss: 0.9996439814567566,grad_norm: 0.9999989895849144, iteration: 232126
loss: 1.0203979015350342,grad_norm: 0.8893585023230167, iteration: 232127
loss: 1.0039235353469849,grad_norm: 0.8919993724169056, iteration: 232128
loss: 0.9859334826469421,grad_norm: 0.8866705717339937, iteration: 232129
loss: 0.952610433101654,grad_norm: 0.9469983709099917, iteration: 232130
loss: 1.1224719285964966,grad_norm: 0.9999999712879453, iteration: 232131
loss: 1.0114593505859375,grad_norm: 0.9370193789972626, iteration: 232132
loss: 1.0016199350357056,grad_norm: 0.9999990981639116, iteration: 232133
loss: 0.9786503314971924,grad_norm: 0.9999989811504302, iteration: 232134
loss: 1.0300580263137817,grad_norm: 0.9702540305561983, iteration: 232135
loss: 0.9729236960411072,grad_norm: 0.9999991626670094, iteration: 232136
loss: 0.9929354190826416,grad_norm: 0.9280966718988292, iteration: 232137
loss: 1.0023704767227173,grad_norm: 0.9911674528372596, iteration: 232138
loss: 0.9725391864776611,grad_norm: 0.9647927320679285, iteration: 232139
loss: 0.9975890517234802,grad_norm: 0.8370547105381679, iteration: 232140
loss: 0.9932852983474731,grad_norm: 0.9283512413530016, iteration: 232141
loss: 1.0002942085266113,grad_norm: 0.9999990970873495, iteration: 232142
loss: 0.9913716912269592,grad_norm: 0.8836053329642942, iteration: 232143
loss: 0.9917160272598267,grad_norm: 0.9368095534092421, iteration: 232144
loss: 1.0159891843795776,grad_norm: 0.9562877139083859, iteration: 232145
loss: 0.9768704771995544,grad_norm: 0.7810930565375976, iteration: 232146
loss: 1.0163885354995728,grad_norm: 0.9969700927949149, iteration: 232147
loss: 1.0276788473129272,grad_norm: 0.9999993124019201, iteration: 232148
loss: 1.0266190767288208,grad_norm: 0.9395114801319855, iteration: 232149
loss: 1.0281857252120972,grad_norm: 0.9443486955756134, iteration: 232150
loss: 1.0182080268859863,grad_norm: 0.9999991655528545, iteration: 232151
loss: 0.9910467267036438,grad_norm: 0.9709424576374257, iteration: 232152
loss: 1.0162793397903442,grad_norm: 0.811041274372872, iteration: 232153
loss: 0.9991321563720703,grad_norm: 0.9999991108104453, iteration: 232154
loss: 1.0126087665557861,grad_norm: 0.8003886093381183, iteration: 232155
loss: 1.0253263711929321,grad_norm: 0.9783812275504212, iteration: 232156
loss: 1.0645049810409546,grad_norm: 0.7882657555400947, iteration: 232157
loss: 0.9784066081047058,grad_norm: 0.9999992033799469, iteration: 232158
loss: 0.9942432641983032,grad_norm: 0.976451860904268, iteration: 232159
loss: 0.9836032390594482,grad_norm: 0.9999993487303823, iteration: 232160
loss: 0.9729958772659302,grad_norm: 0.890431234702761, iteration: 232161
loss: 0.9912510514259338,grad_norm: 0.9999991850151461, iteration: 232162
loss: 1.004317283630371,grad_norm: 0.9308912119562895, iteration: 232163
loss: 0.9623252749443054,grad_norm: 0.8416154618904648, iteration: 232164
loss: 1.0120147466659546,grad_norm: 0.7621913620175766, iteration: 232165
loss: 1.0127243995666504,grad_norm: 0.902352808170359, iteration: 232166
loss: 0.973077654838562,grad_norm: 0.918999405167587, iteration: 232167
loss: 0.9684093594551086,grad_norm: 0.8679495199629637, iteration: 232168
loss: 1.0097732543945312,grad_norm: 0.9116488836648681, iteration: 232169
loss: 1.007637858390808,grad_norm: 0.966924378504306, iteration: 232170
loss: 0.9982026815414429,grad_norm: 0.9999992123273429, iteration: 232171
loss: 0.9965658187866211,grad_norm: 0.9739484113743512, iteration: 232172
loss: 0.9882717132568359,grad_norm: 0.8701389373664767, iteration: 232173
loss: 1.035469651222229,grad_norm: 0.9729614508725859, iteration: 232174
loss: 0.9543600082397461,grad_norm: 0.8002726281198205, iteration: 232175
loss: 1.0046437978744507,grad_norm: 0.9999990819148481, iteration: 232176
loss: 1.0004523992538452,grad_norm: 0.9238501853419363, iteration: 232177
loss: 1.0317281484603882,grad_norm: 0.9885870498640723, iteration: 232178
loss: 1.0051535367965698,grad_norm: 0.7743530803406707, iteration: 232179
loss: 0.9828746318817139,grad_norm: 0.951571815737739, iteration: 232180
loss: 0.9838161468505859,grad_norm: 0.747338154962638, iteration: 232181
loss: 1.0247749090194702,grad_norm: 0.9999991544185342, iteration: 232182
loss: 0.9805442094802856,grad_norm: 0.9999990714239498, iteration: 232183
loss: 1.0046360492706299,grad_norm: 0.9999992121625904, iteration: 232184
loss: 1.015231728553772,grad_norm: 0.8736110828994067, iteration: 232185
loss: 1.0590860843658447,grad_norm: 0.9999998174492815, iteration: 232186
loss: 1.0205132961273193,grad_norm: 0.8615443224520893, iteration: 232187
loss: 0.9943997263908386,grad_norm: 0.8869516480969966, iteration: 232188
loss: 1.0025231838226318,grad_norm: 0.8534561467787275, iteration: 232189
loss: 1.0035805702209473,grad_norm: 0.9498141446969901, iteration: 232190
loss: 0.9933900833129883,grad_norm: 0.7978134096238205, iteration: 232191
loss: 0.9917753338813782,grad_norm: 0.8900206238522006, iteration: 232192
loss: 0.9787946343421936,grad_norm: 0.999158213518875, iteration: 232193
loss: 0.9628340005874634,grad_norm: 0.9270801072502571, iteration: 232194
loss: 0.9949871897697449,grad_norm: 0.9373684385317768, iteration: 232195
loss: 0.9991177916526794,grad_norm: 0.7986770745046236, iteration: 232196
loss: 0.9864203333854675,grad_norm: 0.9579995028830638, iteration: 232197
loss: 0.9984160661697388,grad_norm: 0.9999992473968033, iteration: 232198
loss: 0.9822214841842651,grad_norm: 0.907005636490089, iteration: 232199
loss: 1.0196640491485596,grad_norm: 0.9999991055215728, iteration: 232200
loss: 1.017427921295166,grad_norm: 0.897270333245993, iteration: 232201
loss: 1.0292693376541138,grad_norm: 0.8761904296623748, iteration: 232202
loss: 1.0444486141204834,grad_norm: 0.9999997393125265, iteration: 232203
loss: 1.03013014793396,grad_norm: 0.9270554099325188, iteration: 232204
loss: 1.022329568862915,grad_norm: 0.9999991243779768, iteration: 232205
loss: 0.9964171648025513,grad_norm: 0.9865657882695017, iteration: 232206
loss: 1.009139060974121,grad_norm: 0.8458707673842811, iteration: 232207
loss: 0.9942514896392822,grad_norm: 0.8977148972791027, iteration: 232208
loss: 1.038251519203186,grad_norm: 0.9469549761103784, iteration: 232209
loss: 1.0372412204742432,grad_norm: 0.9023126353422753, iteration: 232210
loss: 0.9719944000244141,grad_norm: 0.931753481430538, iteration: 232211
loss: 1.0231173038482666,grad_norm: 0.8159296556225563, iteration: 232212
loss: 1.00302255153656,grad_norm: 0.9999989796226026, iteration: 232213
loss: 0.9886873364448547,grad_norm: 0.9999991652315571, iteration: 232214
loss: 1.0204395055770874,grad_norm: 0.8294370932320719, iteration: 232215
loss: 0.9889065027236938,grad_norm: 0.7848977810130946, iteration: 232216
loss: 1.01880943775177,grad_norm: 0.9999991097116279, iteration: 232217
loss: 1.0067206621170044,grad_norm: 0.8071041712648961, iteration: 232218
loss: 1.0098711252212524,grad_norm: 0.9581487078156357, iteration: 232219
loss: 0.9811035394668579,grad_norm: 0.9306088618797647, iteration: 232220
loss: 1.0311917066574097,grad_norm: 0.8646300406328853, iteration: 232221
loss: 1.005334496498108,grad_norm: 0.8908641649148641, iteration: 232222
loss: 0.9500706195831299,grad_norm: 0.9999990488427282, iteration: 232223
loss: 1.010550618171692,grad_norm: 0.9999993122868569, iteration: 232224
loss: 1.0091270208358765,grad_norm: 0.9999988992563427, iteration: 232225
loss: 1.0388976335525513,grad_norm: 0.9999993167798875, iteration: 232226
loss: 0.9964635372161865,grad_norm: 0.7775616317721318, iteration: 232227
loss: 0.9994897842407227,grad_norm: 0.9066380129649542, iteration: 232228
loss: 0.9993132948875427,grad_norm: 0.8999896309618896, iteration: 232229
loss: 0.9897423982620239,grad_norm: 0.9483264740207745, iteration: 232230
loss: 1.0239214897155762,grad_norm: 0.8731695924294935, iteration: 232231
loss: 1.0031312704086304,grad_norm: 0.9999991687290952, iteration: 232232
loss: 0.9669209718704224,grad_norm: 0.9999990247937722, iteration: 232233
loss: 0.9947106838226318,grad_norm: 0.9392992602923447, iteration: 232234
loss: 1.0251771211624146,grad_norm: 0.915292635458844, iteration: 232235
loss: 1.0244859457015991,grad_norm: 0.874420015438343, iteration: 232236
loss: 0.9618955850601196,grad_norm: 0.8199124960734263, iteration: 232237
loss: 1.0113868713378906,grad_norm: 0.8710736789450664, iteration: 232238
loss: 1.0023597478866577,grad_norm: 0.8151192733263771, iteration: 232239
loss: 1.0281274318695068,grad_norm: 0.9999991990407489, iteration: 232240
loss: 0.9849883913993835,grad_norm: 0.9061067604936724, iteration: 232241
loss: 0.9881519675254822,grad_norm: 0.9999991252599093, iteration: 232242
loss: 1.0066438913345337,grad_norm: 0.9222556046336077, iteration: 232243
loss: 0.9716299772262573,grad_norm: 0.9999990630761364, iteration: 232244
loss: 1.170843243598938,grad_norm: 0.9999995698181225, iteration: 232245
loss: 1.0027451515197754,grad_norm: 0.9999991438882677, iteration: 232246
loss: 0.9991068243980408,grad_norm: 0.874242749328991, iteration: 232247
loss: 0.9861254096031189,grad_norm: 0.9219255428654287, iteration: 232248
loss: 0.9911866188049316,grad_norm: 0.878111866813256, iteration: 232249
loss: 1.0006465911865234,grad_norm: 0.7362867434815427, iteration: 232250
loss: 1.033856987953186,grad_norm: 0.9999992847451089, iteration: 232251
loss: 1.012031078338623,grad_norm: 0.8112942979746044, iteration: 232252
loss: 1.021796464920044,grad_norm: 0.9789388942230415, iteration: 232253
loss: 0.9757493138313293,grad_norm: 0.9999991208347272, iteration: 232254
loss: 0.9580949544906616,grad_norm: 0.9492004383975907, iteration: 232255
loss: 0.9827125668525696,grad_norm: 0.9999992543731326, iteration: 232256
loss: 1.017594575881958,grad_norm: 0.9948224777759661, iteration: 232257
loss: 0.9989785552024841,grad_norm: 0.9790682408870998, iteration: 232258
loss: 0.9410353899002075,grad_norm: 0.8393152685676575, iteration: 232259
loss: 0.973444938659668,grad_norm: 0.9999990786172308, iteration: 232260
loss: 1.0054467916488647,grad_norm: 0.8205355707858479, iteration: 232261
loss: 0.9907526969909668,grad_norm: 0.9945293817447639, iteration: 232262
loss: 1.027414083480835,grad_norm: 0.7520782892719183, iteration: 232263
loss: 0.9716842174530029,grad_norm: 0.999999086403323, iteration: 232264
loss: 1.0361443758010864,grad_norm: 0.8450189787006095, iteration: 232265
loss: 1.0036048889160156,grad_norm: 0.8425095479199837, iteration: 232266
loss: 1.0700950622558594,grad_norm: 0.9318560550641385, iteration: 232267
loss: 1.0380691289901733,grad_norm: 0.9999992692683878, iteration: 232268
loss: 0.9719111323356628,grad_norm: 0.9856111300356643, iteration: 232269
loss: 0.974686861038208,grad_norm: 0.883509057251593, iteration: 232270
loss: 0.9970225095748901,grad_norm: 0.9304045549537092, iteration: 232271
loss: 0.9546363949775696,grad_norm: 0.7853192035022192, iteration: 232272
loss: 1.028414011001587,grad_norm: 0.9999990859275062, iteration: 232273
loss: 0.9904056787490845,grad_norm: 0.9999990123986611, iteration: 232274
loss: 0.9851042032241821,grad_norm: 0.9999998486866167, iteration: 232275
loss: 1.0331380367279053,grad_norm: 0.7903449895516211, iteration: 232276
loss: 0.9872807264328003,grad_norm: 0.8876442073974827, iteration: 232277
loss: 1.0047346353530884,grad_norm: 0.9999991774264363, iteration: 232278
loss: 0.9730509519577026,grad_norm: 0.9999989689775743, iteration: 232279
loss: 1.0068764686584473,grad_norm: 0.7524721506263233, iteration: 232280
loss: 1.0227768421173096,grad_norm: 0.8489054282438623, iteration: 232281
loss: 0.9957804083824158,grad_norm: 0.9572898509472478, iteration: 232282
loss: 1.0604511499404907,grad_norm: 0.8912868517739678, iteration: 232283
loss: 0.9934413433074951,grad_norm: 0.7448494136912044, iteration: 232284
loss: 1.0282155275344849,grad_norm: 0.9044511910609732, iteration: 232285
loss: 1.043677568435669,grad_norm: 0.9354230647164934, iteration: 232286
loss: 1.0076003074645996,grad_norm: 0.7949738776488692, iteration: 232287
loss: 0.9890877604484558,grad_norm: 0.9999996788463094, iteration: 232288
loss: 0.9820212125778198,grad_norm: 0.9497954446134402, iteration: 232289
loss: 1.0006818771362305,grad_norm: 0.7716544515937537, iteration: 232290
loss: 1.0261242389678955,grad_norm: 0.9999993341075789, iteration: 232291
loss: 1.0156246423721313,grad_norm: 0.8216300173727664, iteration: 232292
loss: 1.0155500173568726,grad_norm: 0.9974416235121262, iteration: 232293
loss: 0.9887452125549316,grad_norm: 0.9569432567395352, iteration: 232294
loss: 0.9833257794380188,grad_norm: 0.9999992151901473, iteration: 232295
loss: 0.9892362952232361,grad_norm: 0.8465687074625726, iteration: 232296
loss: 0.9952664375305176,grad_norm: 0.9999998030583913, iteration: 232297
loss: 0.9801268577575684,grad_norm: 0.8700161226318746, iteration: 232298
loss: 0.9792861938476562,grad_norm: 0.8037298469957619, iteration: 232299
loss: 1.0118213891983032,grad_norm: 0.7872584274740461, iteration: 232300
loss: 1.005201816558838,grad_norm: 0.9999989808416136, iteration: 232301
loss: 1.0167477130889893,grad_norm: 0.8548940563290328, iteration: 232302
loss: 0.9844410419464111,grad_norm: 0.9999992310588427, iteration: 232303
loss: 0.9854077100753784,grad_norm: 0.9325079944241381, iteration: 232304
loss: 0.9680137038230896,grad_norm: 0.8597246402743308, iteration: 232305
loss: 1.0028445720672607,grad_norm: 0.8201374678738494, iteration: 232306
loss: 0.9901740550994873,grad_norm: 0.7799088077633143, iteration: 232307
loss: 0.948110044002533,grad_norm: 0.865572347382377, iteration: 232308
loss: 0.999659538269043,grad_norm: 0.8811849762280057, iteration: 232309
loss: 1.015730381011963,grad_norm: 0.946233404204994, iteration: 232310
loss: 0.9986903667449951,grad_norm: 0.9292021332243949, iteration: 232311
loss: 1.0014923810958862,grad_norm: 0.9224205972616079, iteration: 232312
loss: 0.9818727374076843,grad_norm: 0.9593381732824683, iteration: 232313
loss: 0.9987506866455078,grad_norm: 0.9999990545437608, iteration: 232314
loss: 0.9965842962265015,grad_norm: 0.8473498492114758, iteration: 232315
loss: 1.0549323558807373,grad_norm: 0.9999991555578631, iteration: 232316
loss: 1.0098655223846436,grad_norm: 0.9999992324374615, iteration: 232317
loss: 0.9916083216667175,grad_norm: 0.9680038188737817, iteration: 232318
loss: 0.9895501732826233,grad_norm: 0.9017194960524935, iteration: 232319
loss: 1.0146719217300415,grad_norm: 0.9875620378134897, iteration: 232320
loss: 0.9977695345878601,grad_norm: 0.9042926916167334, iteration: 232321
loss: 0.9651663899421692,grad_norm: 0.9999995386007646, iteration: 232322
loss: 1.0027530193328857,grad_norm: 0.8946908645797146, iteration: 232323
loss: 0.9878605604171753,grad_norm: 0.7971802244545236, iteration: 232324
loss: 1.0037355422973633,grad_norm: 0.9999993545026409, iteration: 232325
loss: 1.0533744096755981,grad_norm: 0.950507589551002, iteration: 232326
loss: 0.9798999428749084,grad_norm: 0.9999990780219669, iteration: 232327
loss: 1.0188965797424316,grad_norm: 0.7378691976981228, iteration: 232328
loss: 1.0263549089431763,grad_norm: 0.9999991726759769, iteration: 232329
loss: 0.9847297072410583,grad_norm: 0.9999991925602376, iteration: 232330
loss: 0.9639264345169067,grad_norm: 0.8947301776264182, iteration: 232331
loss: 1.0061835050582886,grad_norm: 0.9999991566302663, iteration: 232332
loss: 1.0147629976272583,grad_norm: 0.9999998973064768, iteration: 232333
loss: 0.9791785478591919,grad_norm: 0.7617549720666906, iteration: 232334
loss: 1.0003657341003418,grad_norm: 0.7685641946951742, iteration: 232335
loss: 0.982529878616333,grad_norm: 0.9999991979472811, iteration: 232336
loss: 0.9594330787658691,grad_norm: 0.8868520172756017, iteration: 232337
loss: 1.0141593217849731,grad_norm: 0.899983746366656, iteration: 232338
loss: 1.0215747356414795,grad_norm: 0.8812959917872747, iteration: 232339
loss: 0.9800366759300232,grad_norm: 0.9999992976343872, iteration: 232340
loss: 0.9891114234924316,grad_norm: 0.9999992120463179, iteration: 232341
loss: 1.0086956024169922,grad_norm: 0.9756125548304818, iteration: 232342
loss: 0.9950227737426758,grad_norm: 0.8859064184767371, iteration: 232343
loss: 0.9883438348770142,grad_norm: 0.7844562743139817, iteration: 232344
loss: 0.9909636378288269,grad_norm: 0.9999990881133257, iteration: 232345
loss: 0.9976818561553955,grad_norm: 0.9366385824969026, iteration: 232346
loss: 1.0125735998153687,grad_norm: 0.8897648792792622, iteration: 232347
loss: 1.0205096006393433,grad_norm: 0.9388898223478785, iteration: 232348
loss: 1.034250020980835,grad_norm: 0.7879288733427857, iteration: 232349
loss: 0.9705933928489685,grad_norm: 0.9558492542004914, iteration: 232350
loss: 0.9978341460227966,grad_norm: 0.8017571809260424, iteration: 232351
loss: 1.007003903388977,grad_norm: 0.8377490253734889, iteration: 232352
loss: 0.9770155549049377,grad_norm: 0.9999997359510464, iteration: 232353
loss: 0.9972401857376099,grad_norm: 0.941741402784849, iteration: 232354
loss: 0.9873597621917725,grad_norm: 0.97611154940259, iteration: 232355
loss: 1.0353456735610962,grad_norm: 0.9999998562169936, iteration: 232356
loss: 1.0550843477249146,grad_norm: 0.9999992544925429, iteration: 232357
loss: 1.00441312789917,grad_norm: 0.8116519082409359, iteration: 232358
loss: 1.0176563262939453,grad_norm: 0.9936561835534112, iteration: 232359
loss: 1.0225480794906616,grad_norm: 0.9999990570349334, iteration: 232360
loss: 1.0280816555023193,grad_norm: 0.9999991502224024, iteration: 232361
loss: 0.9912878274917603,grad_norm: 0.8587868508380784, iteration: 232362
loss: 0.991396427154541,grad_norm: 0.9766801093895531, iteration: 232363
loss: 0.999267578125,grad_norm: 0.8189362202618241, iteration: 232364
loss: 0.9825349450111389,grad_norm: 0.8429268508888293, iteration: 232365
loss: 1.0170854330062866,grad_norm: 0.9996970806347781, iteration: 232366
loss: 0.9830949902534485,grad_norm: 0.7927594434086283, iteration: 232367
loss: 1.0146673917770386,grad_norm: 0.9999991599571858, iteration: 232368
loss: 1.0066981315612793,grad_norm: 0.944389537005094, iteration: 232369
loss: 1.0057122707366943,grad_norm: 0.9999990004873898, iteration: 232370
loss: 0.9808467030525208,grad_norm: 0.8324020025876274, iteration: 232371
loss: 1.0380853414535522,grad_norm: 0.981873233460983, iteration: 232372
loss: 0.9801251888275146,grad_norm: 0.9999991767605607, iteration: 232373
loss: 1.0020378828048706,grad_norm: 0.9999992669220978, iteration: 232374
loss: 0.9779767394065857,grad_norm: 0.9999990517848326, iteration: 232375
loss: 1.0257591009140015,grad_norm: 0.9999990999967638, iteration: 232376
loss: 0.993860125541687,grad_norm: 0.9925810903245911, iteration: 232377
loss: 0.9948935508728027,grad_norm: 0.9500396785542008, iteration: 232378
loss: 0.9916929006576538,grad_norm: 0.9698180595102041, iteration: 232379
loss: 1.0056856870651245,grad_norm: 0.9252402842659706, iteration: 232380
loss: 1.0093984603881836,grad_norm: 0.796433259652163, iteration: 232381
loss: 0.9944589138031006,grad_norm: 0.9528735656536932, iteration: 232382
loss: 1.0268486738204956,grad_norm: 0.9999995680745696, iteration: 232383
loss: 1.0209832191467285,grad_norm: 0.9999990003991833, iteration: 232384
loss: 1.0062512159347534,grad_norm: 0.8816487764565578, iteration: 232385
loss: 0.9844238758087158,grad_norm: 0.8787758326510984, iteration: 232386
loss: 0.9695456624031067,grad_norm: 0.9665946783191625, iteration: 232387
loss: 1.0179330110549927,grad_norm: 0.9999991841459118, iteration: 232388
loss: 1.0042517185211182,grad_norm: 0.9612117018138921, iteration: 232389
loss: 1.011689305305481,grad_norm: 0.8439554726356986, iteration: 232390
loss: 1.0075724124908447,grad_norm: 0.795408907439607, iteration: 232391
loss: 1.0146300792694092,grad_norm: 0.9834313942270226, iteration: 232392
loss: 1.0105035305023193,grad_norm: 0.9999991101354412, iteration: 232393
loss: 0.9950946569442749,grad_norm: 0.7414891510923789, iteration: 232394
loss: 0.9948243498802185,grad_norm: 0.9999991425805971, iteration: 232395
loss: 1.0014326572418213,grad_norm: 0.828266063307302, iteration: 232396
loss: 0.9578506350517273,grad_norm: 0.8071855740203583, iteration: 232397
loss: 1.024189829826355,grad_norm: 0.974858535019897, iteration: 232398
loss: 1.01918363571167,grad_norm: 0.8184351689092608, iteration: 232399
loss: 0.9807634353637695,grad_norm: 0.9999989270453319, iteration: 232400
loss: 0.9845612645149231,grad_norm: 0.832950588475547, iteration: 232401
loss: 0.9979898929595947,grad_norm: 0.8388265718393286, iteration: 232402
loss: 0.9985047578811646,grad_norm: 0.9999990743256713, iteration: 232403
loss: 1.0033479928970337,grad_norm: 0.9999990536035573, iteration: 232404
loss: 1.0266318321228027,grad_norm: 0.8663749855723223, iteration: 232405
loss: 0.9852432608604431,grad_norm: 0.9999992208591076, iteration: 232406
loss: 0.9910354018211365,grad_norm: 0.7976356591939245, iteration: 232407
loss: 1.0206421613693237,grad_norm: 0.850373572685769, iteration: 232408
loss: 0.9852997660636902,grad_norm: 0.9615778808874271, iteration: 232409
loss: 1.0062204599380493,grad_norm: 0.9999991320167326, iteration: 232410
loss: 1.0051484107971191,grad_norm: 0.9999991375950852, iteration: 232411
loss: 0.9737169146537781,grad_norm: 0.8722994153398744, iteration: 232412
loss: 0.9952017068862915,grad_norm: 0.9999992332184339, iteration: 232413
loss: 1.02607262134552,grad_norm: 0.9417355363393904, iteration: 232414
loss: 1.0261261463165283,grad_norm: 0.9237758162925689, iteration: 232415
loss: 1.0073423385620117,grad_norm: 0.9999992258705589, iteration: 232416
loss: 1.0172232389450073,grad_norm: 0.8946231342521587, iteration: 232417
loss: 1.0059659481048584,grad_norm: 0.8453903797527114, iteration: 232418
loss: 1.0181076526641846,grad_norm: 0.999999698574471, iteration: 232419
loss: 1.012937068939209,grad_norm: 0.7849951950596451, iteration: 232420
loss: 1.0121991634368896,grad_norm: 0.9999990516851113, iteration: 232421
loss: 0.9861366748809814,grad_norm: 0.960918260782664, iteration: 232422
loss: 1.037316083908081,grad_norm: 0.9999990670424104, iteration: 232423
loss: 0.986438512802124,grad_norm: 0.8139146365265403, iteration: 232424
loss: 0.9498671293258667,grad_norm: 0.9999991930907499, iteration: 232425
loss: 0.9879791140556335,grad_norm: 0.82298691924127, iteration: 232426
loss: 0.9883418679237366,grad_norm: 0.8351606513694432, iteration: 232427
loss: 0.9945849180221558,grad_norm: 0.8940786557805126, iteration: 232428
loss: 0.9985849857330322,grad_norm: 0.8525168604821519, iteration: 232429
loss: 0.9997178912162781,grad_norm: 0.8159655297705741, iteration: 232430
loss: 0.9388573169708252,grad_norm: 0.8983699351130194, iteration: 232431
loss: 0.977968692779541,grad_norm: 0.8959971228144602, iteration: 232432
loss: 1.0042011737823486,grad_norm: 0.9226141547665349, iteration: 232433
loss: 1.0317071676254272,grad_norm: 0.991634110440164, iteration: 232434
loss: 0.9879643321037292,grad_norm: 0.9146891178402774, iteration: 232435
loss: 0.9679991006851196,grad_norm: 0.8699505825339074, iteration: 232436
loss: 0.961723268032074,grad_norm: 0.975872762180907, iteration: 232437
loss: 1.0560318231582642,grad_norm: 0.9999993823535258, iteration: 232438
loss: 1.0243996381759644,grad_norm: 0.9555585147736791, iteration: 232439
loss: 1.0269144773483276,grad_norm: 0.7684945150715589, iteration: 232440
loss: 0.9913835525512695,grad_norm: 0.8176328463115562, iteration: 232441
loss: 0.9974497556686401,grad_norm: 0.9999992976933262, iteration: 232442
loss: 0.9756487011909485,grad_norm: 0.9999991052071171, iteration: 232443
loss: 0.9788782596588135,grad_norm: 0.8388180410195154, iteration: 232444
loss: 0.9738935828208923,grad_norm: 0.9599715942601545, iteration: 232445
loss: 1.0429773330688477,grad_norm: 0.9626253762270182, iteration: 232446
loss: 0.9947565793991089,grad_norm: 0.9293136435463806, iteration: 232447
loss: 1.0103323459625244,grad_norm: 0.9770668907788824, iteration: 232448
loss: 0.9889938235282898,grad_norm: 0.9999990844482294, iteration: 232449
loss: 1.0084859132766724,grad_norm: 0.8966519693359758, iteration: 232450
loss: 0.9918074011802673,grad_norm: 0.9999991638247775, iteration: 232451
loss: 0.9966146349906921,grad_norm: 0.9999990038479353, iteration: 232452
loss: 1.0040626525878906,grad_norm: 0.9999989147099277, iteration: 232453
loss: 1.0120580196380615,grad_norm: 0.8603727541910263, iteration: 232454
loss: 0.9887467622756958,grad_norm: 0.9439116247965527, iteration: 232455
loss: 0.9974977374076843,grad_norm: 0.9298654713292185, iteration: 232456
loss: 1.0242844820022583,grad_norm: 0.9999990077449761, iteration: 232457
loss: 1.0034306049346924,grad_norm: 0.976388499543573, iteration: 232458
loss: 1.016165018081665,grad_norm: 0.9250104012812423, iteration: 232459
loss: 0.9820375442504883,grad_norm: 0.941902549326851, iteration: 232460
loss: 1.0067416429519653,grad_norm: 0.8604226688085801, iteration: 232461
loss: 0.9822456240653992,grad_norm: 0.8926829010773745, iteration: 232462
loss: 0.9546396732330322,grad_norm: 0.9999992211468087, iteration: 232463
loss: 1.0775784254074097,grad_norm: 0.9871515853714531, iteration: 232464
loss: 0.9675735235214233,grad_norm: 0.999999048344904, iteration: 232465
loss: 1.0257909297943115,grad_norm: 0.9237298550192284, iteration: 232466
loss: 0.9802626967430115,grad_norm: 0.9365695217693332, iteration: 232467
loss: 1.0296679735183716,grad_norm: 0.750470411594747, iteration: 232468
loss: 0.986568808555603,grad_norm: 0.9999992480022495, iteration: 232469
loss: 1.017533779144287,grad_norm: 0.9026407795749846, iteration: 232470
loss: 0.9758654832839966,grad_norm: 0.9274956065187907, iteration: 232471
loss: 1.0158454179763794,grad_norm: 0.9547503329357948, iteration: 232472
loss: 1.0352871417999268,grad_norm: 0.9999991460871542, iteration: 232473
loss: 0.9990053176879883,grad_norm: 0.9572291754229423, iteration: 232474
loss: 1.0143927335739136,grad_norm: 0.8280622196715193, iteration: 232475
loss: 1.0178518295288086,grad_norm: 0.83963392903908, iteration: 232476
loss: 1.0294063091278076,grad_norm: 0.7920779307314006, iteration: 232477
loss: 1.0185471773147583,grad_norm: 0.9334531581007965, iteration: 232478
loss: 1.006364107131958,grad_norm: 0.8188275270576997, iteration: 232479
loss: 0.9978845119476318,grad_norm: 0.9258262915269069, iteration: 232480
loss: 0.9973186254501343,grad_norm: 0.9035407657646501, iteration: 232481
loss: 1.0103881359100342,grad_norm: 0.9975456045365875, iteration: 232482
loss: 1.0025935173034668,grad_norm: 0.9419854839918261, iteration: 232483
loss: 0.9681477546691895,grad_norm: 0.8170706280761476, iteration: 232484
loss: 0.9973800182342529,grad_norm: 0.8952882956750436, iteration: 232485
loss: 1.0040456056594849,grad_norm: 0.9999991329540773, iteration: 232486
loss: 1.0314791202545166,grad_norm: 0.867113087318387, iteration: 232487
loss: 1.0239691734313965,grad_norm: 0.9889676423069057, iteration: 232488
loss: 1.0025771856307983,grad_norm: 0.7631853313369585, iteration: 232489
loss: 1.0249842405319214,grad_norm: 0.958025404713403, iteration: 232490
loss: 1.0213664770126343,grad_norm: 0.9999991935638242, iteration: 232491
loss: 1.0078718662261963,grad_norm: 0.7773451123289027, iteration: 232492
loss: 0.9764845967292786,grad_norm: 0.9662189630539033, iteration: 232493
loss: 0.9758672714233398,grad_norm: 0.9790590909836935, iteration: 232494
loss: 1.0315731763839722,grad_norm: 0.900993235143131, iteration: 232495
loss: 1.0256131887435913,grad_norm: 0.9999990483509402, iteration: 232496
loss: 1.0062271356582642,grad_norm: 0.9999991397415183, iteration: 232497
loss: 1.0297284126281738,grad_norm: 0.9157851037557564, iteration: 232498
loss: 0.9688664674758911,grad_norm: 0.883618610636739, iteration: 232499
loss: 1.0057977437973022,grad_norm: 0.9326175101203996, iteration: 232500
loss: 1.0071884393692017,grad_norm: 0.9796285047831457, iteration: 232501
loss: 1.0262629985809326,grad_norm: 0.9157008754717287, iteration: 232502
loss: 0.9986355304718018,grad_norm: 0.9064971603581323, iteration: 232503
loss: 1.0358930826187134,grad_norm: 0.9752274194138926, iteration: 232504
loss: 0.9731392860412598,grad_norm: 0.7833568362878172, iteration: 232505
loss: 1.0351232290267944,grad_norm: 0.9999989041483961, iteration: 232506
loss: 1.0141171216964722,grad_norm: 0.9999991937294591, iteration: 232507
loss: 0.9799191355705261,grad_norm: 0.9999992568259829, iteration: 232508
loss: 0.9906963109970093,grad_norm: 0.8606869833567515, iteration: 232509
loss: 1.0072128772735596,grad_norm: 0.8711452682088775, iteration: 232510
loss: 0.9696589112281799,grad_norm: 0.9999998998747289, iteration: 232511
loss: 0.9773023128509521,grad_norm: 0.8081457416664362, iteration: 232512
loss: 1.0289438962936401,grad_norm: 0.6566936412652857, iteration: 232513
loss: 1.0254642963409424,grad_norm: 0.9999996589103891, iteration: 232514
loss: 1.0049649477005005,grad_norm: 0.9532003678967481, iteration: 232515
loss: 0.9991740584373474,grad_norm: 0.9999989832599714, iteration: 232516
loss: 0.9660428166389465,grad_norm: 0.9048920198191174, iteration: 232517
loss: 0.96040940284729,grad_norm: 0.8758955598419469, iteration: 232518
loss: 1.0114257335662842,grad_norm: 0.7630092107154247, iteration: 232519
loss: 1.0169806480407715,grad_norm: 0.8824235346106781, iteration: 232520
loss: 1.0267622470855713,grad_norm: 0.858280858198649, iteration: 232521
loss: 1.0334669351577759,grad_norm: 0.9478665608933573, iteration: 232522
loss: 1.0178800821304321,grad_norm: 0.9896439615338012, iteration: 232523
loss: 1.0091724395751953,grad_norm: 0.7513515903273648, iteration: 232524
loss: 1.0098545551300049,grad_norm: 0.7304486261462696, iteration: 232525
loss: 0.9708797931671143,grad_norm: 0.9999991733734862, iteration: 232526
loss: 1.0057226419448853,grad_norm: 0.9251123885680593, iteration: 232527
loss: 0.995887041091919,grad_norm: 0.9999991174374762, iteration: 232528
loss: 0.9970217943191528,grad_norm: 0.9999991507017165, iteration: 232529
loss: 0.9933784604072571,grad_norm: 0.871028686366703, iteration: 232530
loss: 1.0258495807647705,grad_norm: 0.9331800181119864, iteration: 232531
loss: 0.9944234490394592,grad_norm: 0.768733412478836, iteration: 232532
loss: 0.9966503977775574,grad_norm: 0.7825604057926738, iteration: 232533
loss: 0.9697754979133606,grad_norm: 0.8576373018603167, iteration: 232534
loss: 1.0346882343292236,grad_norm: 0.9999993735049733, iteration: 232535
loss: 0.9779815673828125,grad_norm: 0.8566335582056329, iteration: 232536
loss: 0.9769160151481628,grad_norm: 0.884588180572148, iteration: 232537
loss: 1.025053858757019,grad_norm: 0.8806000669191772, iteration: 232538
loss: 1.013932228088379,grad_norm: 0.9205528103381542, iteration: 232539
loss: 1.0253435373306274,grad_norm: 0.9999992098382445, iteration: 232540
loss: 1.0018329620361328,grad_norm: 0.9999992370969893, iteration: 232541
loss: 0.9760802984237671,grad_norm: 0.7759742168879818, iteration: 232542
loss: 1.0302579402923584,grad_norm: 0.8659306779818771, iteration: 232543
loss: 0.9605793952941895,grad_norm: 0.9040875930320103, iteration: 232544
loss: 0.9963268637657166,grad_norm: 0.8167290888675278, iteration: 232545
loss: 1.012152910232544,grad_norm: 0.8489745339839152, iteration: 232546
loss: 0.9549400806427002,grad_norm: 0.9741519560015904, iteration: 232547
loss: 0.9868621826171875,grad_norm: 0.8391869313288696, iteration: 232548
loss: 1.013929843902588,grad_norm: 0.8704882323998256, iteration: 232549
loss: 0.9910180568695068,grad_norm: 0.9999996278824701, iteration: 232550
loss: 0.9918996095657349,grad_norm: 0.9265389765336977, iteration: 232551
loss: 0.9668012857437134,grad_norm: 0.8110554309989557, iteration: 232552
loss: 1.0478569269180298,grad_norm: 0.9527560921348327, iteration: 232553
loss: 0.9670422673225403,grad_norm: 0.9779018674693718, iteration: 232554
loss: 1.038346290588379,grad_norm: 0.9999990662595186, iteration: 232555
loss: 1.002666711807251,grad_norm: 0.999999104161755, iteration: 232556
loss: 1.0061205625534058,grad_norm: 0.895712495749431, iteration: 232557
loss: 1.0074306726455688,grad_norm: 0.999999829411379, iteration: 232558
loss: 1.025022029876709,grad_norm: 0.9938760076186716, iteration: 232559
loss: 0.9866920113563538,grad_norm: 0.7372302774127397, iteration: 232560
loss: 0.9841821193695068,grad_norm: 0.9876063487707377, iteration: 232561
loss: 0.9997920989990234,grad_norm: 0.9999991497582845, iteration: 232562
loss: 0.9982751607894897,grad_norm: 0.9701698851535561, iteration: 232563
loss: 1.0355182886123657,grad_norm: 0.8171866794598981, iteration: 232564
loss: 0.9853641390800476,grad_norm: 0.8997699227659834, iteration: 232565
loss: 0.9905751347541809,grad_norm: 0.8584556193351427, iteration: 232566
loss: 0.9830473065376282,grad_norm: 0.9402221472455999, iteration: 232567
loss: 0.9279974102973938,grad_norm: 0.9498101008461323, iteration: 232568
loss: 1.0413609743118286,grad_norm: 0.9698053476401495, iteration: 232569
loss: 0.988970160484314,grad_norm: 0.9999989748437483, iteration: 232570
loss: 1.0091447830200195,grad_norm: 0.9407966616345164, iteration: 232571
loss: 0.964368999004364,grad_norm: 0.9999991275219299, iteration: 232572
loss: 1.0052368640899658,grad_norm: 0.9843419016413638, iteration: 232573
loss: 1.0097885131835938,grad_norm: 0.9999991481861278, iteration: 232574
loss: 1.0240384340286255,grad_norm: 0.9999990425647489, iteration: 232575
loss: 1.003368854522705,grad_norm: 0.9999993260201809, iteration: 232576
loss: 0.9668748378753662,grad_norm: 0.7861091212073794, iteration: 232577
loss: 0.9931840300559998,grad_norm: 0.9379720475539655, iteration: 232578
loss: 0.9880329370498657,grad_norm: 0.7474994745254213, iteration: 232579
loss: 1.008095145225525,grad_norm: 0.8025971722201757, iteration: 232580
loss: 0.9713906049728394,grad_norm: 0.9999989604566946, iteration: 232581
loss: 0.9645134210586548,grad_norm: 0.9999991462285815, iteration: 232582
loss: 0.9657424688339233,grad_norm: 0.9201646515014158, iteration: 232583
loss: 1.0295612812042236,grad_norm: 0.866979784406171, iteration: 232584
loss: 0.986235499382019,grad_norm: 0.9999991408501638, iteration: 232585
loss: 1.014732003211975,grad_norm: 0.8398255825192958, iteration: 232586
loss: 0.999180257320404,grad_norm: 0.896778920310665, iteration: 232587
loss: 1.008014440536499,grad_norm: 0.9002500086777265, iteration: 232588
loss: 0.9893607497215271,grad_norm: 0.9668818358550717, iteration: 232589
loss: 1.0160797834396362,grad_norm: 0.9958549032483991, iteration: 232590
loss: 0.9461781978607178,grad_norm: 0.8236628673818044, iteration: 232591
loss: 1.0173289775848389,grad_norm: 0.9849343022697385, iteration: 232592
loss: 0.9805576801300049,grad_norm: 0.8759031847050185, iteration: 232593
loss: 0.9871353507041931,grad_norm: 0.9989758194949002, iteration: 232594
loss: 1.0417577028274536,grad_norm: 0.8761553971279572, iteration: 232595
loss: 1.0071101188659668,grad_norm: 0.9999057873204441, iteration: 232596
loss: 1.021239995956421,grad_norm: 0.9999990692957332, iteration: 232597
loss: 1.011771559715271,grad_norm: 0.9101357023236311, iteration: 232598
loss: 0.983348548412323,grad_norm: 0.8350220974269794, iteration: 232599
loss: 0.9828762412071228,grad_norm: 0.7876606290493696, iteration: 232600
loss: 0.983738124370575,grad_norm: 0.921677761219219, iteration: 232601
loss: 1.010827898979187,grad_norm: 0.9999991333994648, iteration: 232602
loss: 1.070017695426941,grad_norm: 0.9999994162227814, iteration: 232603
loss: 0.984648585319519,grad_norm: 0.8263397701948348, iteration: 232604
loss: 1.0295201539993286,grad_norm: 0.8721806083320524, iteration: 232605
loss: 1.0158237218856812,grad_norm: 0.9999993005860388, iteration: 232606
loss: 1.0091663599014282,grad_norm: 0.9297208804949897, iteration: 232607
loss: 1.0288026332855225,grad_norm: 0.9602951776525429, iteration: 232608
loss: 0.9461164474487305,grad_norm: 0.9999991477685702, iteration: 232609
loss: 0.9894890785217285,grad_norm: 0.9251578342694429, iteration: 232610
loss: 1.0122061967849731,grad_norm: 0.9479900156721897, iteration: 232611
loss: 0.9773702025413513,grad_norm: 0.7813452709625938, iteration: 232612
loss: 1.0040572881698608,grad_norm: 0.8695295933287063, iteration: 232613
loss: 1.035711646080017,grad_norm: 0.99999901493721, iteration: 232614
loss: 0.9869049787521362,grad_norm: 0.9982406729563164, iteration: 232615
loss: 1.036914587020874,grad_norm: 0.9999998860001127, iteration: 232616
loss: 1.024182677268982,grad_norm: 0.9723652629909155, iteration: 232617
loss: 0.9992499351501465,grad_norm: 0.8829062719233863, iteration: 232618
loss: 1.0169787406921387,grad_norm: 0.9879604397609727, iteration: 232619
loss: 1.0295233726501465,grad_norm: 0.9999990854356138, iteration: 232620
loss: 0.9853720664978027,grad_norm: 0.8870048746122411, iteration: 232621
loss: 1.027779221534729,grad_norm: 0.973544244440481, iteration: 232622
loss: 0.9731733202934265,grad_norm: 0.9999990042668104, iteration: 232623
loss: 1.0281695127487183,grad_norm: 0.9561141096614827, iteration: 232624
loss: 1.0058516263961792,grad_norm: 0.8320980770692802, iteration: 232625
loss: 0.9752002954483032,grad_norm: 0.9999990612362334, iteration: 232626
loss: 1.008166790008545,grad_norm: 0.8788694559248004, iteration: 232627
loss: 1.0206557512283325,grad_norm: 0.9296055299097971, iteration: 232628
loss: 1.0189424753189087,grad_norm: 0.9496303801500611, iteration: 232629
loss: 0.9950687289237976,grad_norm: 0.8066997236645832, iteration: 232630
loss: 0.9889485836029053,grad_norm: 0.8373683243304266, iteration: 232631
loss: 0.9973611831665039,grad_norm: 0.9185029387693117, iteration: 232632
loss: 1.0031236410140991,grad_norm: 0.8785270820115123, iteration: 232633
loss: 1.0133932828903198,grad_norm: 0.9999991673518374, iteration: 232634
loss: 1.0029146671295166,grad_norm: 0.8856764833762785, iteration: 232635
loss: 0.9691130518913269,grad_norm: 0.9659080061152339, iteration: 232636
loss: 1.0100497007369995,grad_norm: 0.9999993471340836, iteration: 232637
loss: 0.9692977666854858,grad_norm: 0.834091888596294, iteration: 232638
loss: 0.99760502576828,grad_norm: 0.9322677731475194, iteration: 232639
loss: 1.0113170146942139,grad_norm: 0.9999991247361824, iteration: 232640
loss: 1.0008105039596558,grad_norm: 0.9999991601081585, iteration: 232641
loss: 1.0622214078903198,grad_norm: 0.9999989837967307, iteration: 232642
loss: 0.9714371562004089,grad_norm: 0.9084750403724616, iteration: 232643
loss: 0.9536501169204712,grad_norm: 0.8455144543351377, iteration: 232644
loss: 0.9996442198753357,grad_norm: 0.8270802041547265, iteration: 232645
loss: 1.0293513536453247,grad_norm: 0.8513425824203094, iteration: 232646
loss: 1.0052391290664673,grad_norm: 0.9999990047941749, iteration: 232647
loss: 1.0066999197006226,grad_norm: 0.9999991658222096, iteration: 232648
loss: 1.0042001008987427,grad_norm: 0.9369125944382037, iteration: 232649
loss: 0.9979434609413147,grad_norm: 0.9999991071274902, iteration: 232650
loss: 0.9696232080459595,grad_norm: 0.9999990851750885, iteration: 232651
loss: 0.973602831363678,grad_norm: 0.8876703816457675, iteration: 232652
loss: 0.9977685213088989,grad_norm: 0.9826708864260663, iteration: 232653
loss: 0.979941725730896,grad_norm: 0.9999991730725847, iteration: 232654
loss: 1.007779598236084,grad_norm: 1.0000001158521217, iteration: 232655
loss: 1.03834068775177,grad_norm: 0.9115057650835258, iteration: 232656
loss: 1.0027165412902832,grad_norm: 0.9999991025580548, iteration: 232657
loss: 1.0442110300064087,grad_norm: 0.9637531020321042, iteration: 232658
loss: 0.984713077545166,grad_norm: 0.8795081306466432, iteration: 232659
loss: 0.992811381816864,grad_norm: 0.9999990456014841, iteration: 232660
loss: 0.9711406230926514,grad_norm: 0.7474108166908199, iteration: 232661
loss: 1.0043898820877075,grad_norm: 0.9741366819534578, iteration: 232662
loss: 0.9864364266395569,grad_norm: 0.7914505425898708, iteration: 232663
loss: 1.0232481956481934,grad_norm: 0.9999990584767416, iteration: 232664
loss: 1.011870265007019,grad_norm: 0.8400979043587311, iteration: 232665
loss: 0.9691360592842102,grad_norm: 0.7656834992953284, iteration: 232666
loss: 1.010393500328064,grad_norm: 0.9999991639169403, iteration: 232667
loss: 1.00773286819458,grad_norm: 0.8610329103532492, iteration: 232668
loss: 1.0053292512893677,grad_norm: 0.9070064551597526, iteration: 232669
loss: 1.0646737813949585,grad_norm: 0.9999997263274534, iteration: 232670
loss: 0.9974066019058228,grad_norm: 0.8792307565835721, iteration: 232671
loss: 1.0075472593307495,grad_norm: 0.750128402376636, iteration: 232672
loss: 1.0384491682052612,grad_norm: 0.9999992298119141, iteration: 232673
loss: 0.9827931523323059,grad_norm: 0.92069285044441, iteration: 232674
loss: 0.9986273646354675,grad_norm: 0.9115263520996556, iteration: 232675
loss: 0.9639604091644287,grad_norm: 0.8661209317199764, iteration: 232676
loss: 0.9944673776626587,grad_norm: 0.9805330186344419, iteration: 232677
loss: 0.9862126111984253,grad_norm: 0.9128084994419611, iteration: 232678
loss: 0.9979224801063538,grad_norm: 0.9999989636286014, iteration: 232679
loss: 0.9810591340065002,grad_norm: 0.8342529391724403, iteration: 232680
loss: 0.995396614074707,grad_norm: 0.9722221804759772, iteration: 232681
loss: 0.9745305776596069,grad_norm: 0.9091076538581401, iteration: 232682
loss: 1.0149012804031372,grad_norm: 0.9999990685564067, iteration: 232683
loss: 1.0167757272720337,grad_norm: 0.942631056016952, iteration: 232684
loss: 0.9931578636169434,grad_norm: 0.9634072267794133, iteration: 232685
loss: 1.0209203958511353,grad_norm: 0.969011133545321, iteration: 232686
loss: 0.9644235372543335,grad_norm: 0.8019484709831193, iteration: 232687
loss: 1.0172703266143799,grad_norm: 0.9995613391320908, iteration: 232688
loss: 1.006969690322876,grad_norm: 0.905144990726426, iteration: 232689
loss: 1.0036163330078125,grad_norm: 0.9153255023545496, iteration: 232690
loss: 1.0056865215301514,grad_norm: 0.867834610171107, iteration: 232691
loss: 0.9813583493232727,grad_norm: 0.8411312033685825, iteration: 232692
loss: 0.9667779207229614,grad_norm: 0.9461823297770242, iteration: 232693
loss: 1.00221586227417,grad_norm: 0.8939001889839318, iteration: 232694
loss: 0.9849914312362671,grad_norm: 0.8359719504538025, iteration: 232695
loss: 0.9810340404510498,grad_norm: 0.8568803313275904, iteration: 232696
loss: 0.9566273093223572,grad_norm: 0.9999990339201784, iteration: 232697
loss: 0.9490001797676086,grad_norm: 0.9999990623988532, iteration: 232698
loss: 1.0171682834625244,grad_norm: 0.9999991218989908, iteration: 232699
loss: 0.9734193682670593,grad_norm: 0.8759087863962818, iteration: 232700
loss: 0.9733710885047913,grad_norm: 0.8505429372026958, iteration: 232701
loss: 1.0036447048187256,grad_norm: 0.9963841184671168, iteration: 232702
loss: 1.011403203010559,grad_norm: 0.8553246851325499, iteration: 232703
loss: 1.0271515846252441,grad_norm: 0.8962069809555526, iteration: 232704
loss: 1.0376722812652588,grad_norm: 0.9999992516378381, iteration: 232705
loss: 1.011710524559021,grad_norm: 0.9128975367046136, iteration: 232706
loss: 1.0462578535079956,grad_norm: 0.9292800583119338, iteration: 232707
loss: 0.9933768510818481,grad_norm: 0.8355842657904509, iteration: 232708
loss: 0.9911786913871765,grad_norm: 0.9266960871033978, iteration: 232709
loss: 0.9453025460243225,grad_norm: 0.9194730404821911, iteration: 232710
loss: 1.0323458909988403,grad_norm: 0.9821593176136891, iteration: 232711
loss: 0.9770134091377258,grad_norm: 0.8644365655971733, iteration: 232712
loss: 1.0127614736557007,grad_norm: 0.8090200819543586, iteration: 232713
loss: 0.9607365131378174,grad_norm: 0.9131708265389412, iteration: 232714
loss: 0.9926208257675171,grad_norm: 0.9022777383006528, iteration: 232715
loss: 0.9825450778007507,grad_norm: 0.8233306788307222, iteration: 232716
loss: 1.0349395275115967,grad_norm: 0.9999990065565632, iteration: 232717
loss: 0.9913077354431152,grad_norm: 0.9903985057951236, iteration: 232718
loss: 1.0210767984390259,grad_norm: 0.8006859902505596, iteration: 232719
loss: 0.9476461410522461,grad_norm: 0.9662977356422666, iteration: 232720
loss: 0.9804426431655884,grad_norm: 0.9034628921148161, iteration: 232721
loss: 0.995308518409729,grad_norm: 0.7621672366838267, iteration: 232722
loss: 1.030392050743103,grad_norm: 0.9229074719366118, iteration: 232723
loss: 1.018857717514038,grad_norm: 0.9692158424786012, iteration: 232724
loss: 1.0251567363739014,grad_norm: 0.9855605644565767, iteration: 232725
loss: 0.9943667054176331,grad_norm: 0.8580165857964263, iteration: 232726
loss: 1.0058376789093018,grad_norm: 0.8880743204355855, iteration: 232727
loss: 1.0219601392745972,grad_norm: 0.9999998188983483, iteration: 232728
loss: 0.9929031133651733,grad_norm: 0.9965073532203712, iteration: 232729
loss: 0.9438973069190979,grad_norm: 0.9329101583745028, iteration: 232730
loss: 1.0209881067276,grad_norm: 0.8183231979451737, iteration: 232731
loss: 0.9902921915054321,grad_norm: 0.7976181861890422, iteration: 232732
loss: 0.9462147951126099,grad_norm: 0.999999215100784, iteration: 232733
loss: 0.9953327775001526,grad_norm: 0.9236858624911543, iteration: 232734
loss: 1.029457688331604,grad_norm: 0.9999991546105751, iteration: 232735
loss: 1.0379091501235962,grad_norm: 0.9999991329732487, iteration: 232736
loss: 0.9843345880508423,grad_norm: 0.9708252713936174, iteration: 232737
loss: 0.9831951260566711,grad_norm: 0.861194022060188, iteration: 232738
loss: 1.0062198638916016,grad_norm: 0.9999991362479259, iteration: 232739
loss: 0.9928404092788696,grad_norm: 0.9622507145527531, iteration: 232740
loss: 1.000138282775879,grad_norm: 0.9999992751159273, iteration: 232741
loss: 1.0163389444351196,grad_norm: 0.9999989180854358, iteration: 232742
loss: 1.028794527053833,grad_norm: 0.9999990625079545, iteration: 232743
loss: 1.0003148317337036,grad_norm: 0.9999990782535246, iteration: 232744
loss: 0.9940077066421509,grad_norm: 0.8830795878742344, iteration: 232745
loss: 1.0129601955413818,grad_norm: 0.9010752124346371, iteration: 232746
loss: 1.0801846981048584,grad_norm: 0.9999996493054899, iteration: 232747
loss: 1.0376763343811035,grad_norm: 0.9335985400583673, iteration: 232748
loss: 1.0262049436569214,grad_norm: 0.8791909521062903, iteration: 232749
loss: 1.0040431022644043,grad_norm: 0.9999991907860519, iteration: 232750
loss: 0.9551740884780884,grad_norm: 0.817615493198673, iteration: 232751
loss: 0.9754612445831299,grad_norm: 0.801175136211244, iteration: 232752
loss: 1.0320647954940796,grad_norm: 0.9127274998989272, iteration: 232753
loss: 0.9646372199058533,grad_norm: 0.9205523009979567, iteration: 232754
loss: 0.982614278793335,grad_norm: 0.9616102497538104, iteration: 232755
loss: 1.0069375038146973,grad_norm: 0.9517884512076477, iteration: 232756
loss: 1.0003589391708374,grad_norm: 0.9780060339356396, iteration: 232757
loss: 1.1534932851791382,grad_norm: 0.9999992682437719, iteration: 232758
loss: 1.032033085823059,grad_norm: 0.8641337996117079, iteration: 232759
loss: 1.0179258584976196,grad_norm: 0.862900703659443, iteration: 232760
loss: 1.0136120319366455,grad_norm: 0.7638210498600644, iteration: 232761
loss: 0.9745169281959534,grad_norm: 0.9051344182018081, iteration: 232762
loss: 0.9706712365150452,grad_norm: 0.7826847032755984, iteration: 232763
loss: 1.0119967460632324,grad_norm: 0.9999991420609037, iteration: 232764
loss: 0.9863342642784119,grad_norm: 0.9780363101452653, iteration: 232765
loss: 1.0027828216552734,grad_norm: 0.7541612342278663, iteration: 232766
loss: 0.9564980268478394,grad_norm: 0.9839896554735158, iteration: 232767
loss: 0.9899806976318359,grad_norm: 0.842236090223221, iteration: 232768
loss: 0.9988695979118347,grad_norm: 0.9596882266921775, iteration: 232769
loss: 0.9921571612358093,grad_norm: 0.8966724281538646, iteration: 232770
loss: 0.9341281652450562,grad_norm: 0.9566270366252486, iteration: 232771
loss: 0.994240403175354,grad_norm: 0.7270980118438894, iteration: 232772
loss: 0.973628044128418,grad_norm: 0.8463658290195282, iteration: 232773
loss: 1.0004346370697021,grad_norm: 0.7714678854735825, iteration: 232774
loss: 1.0040148496627808,grad_norm: 0.9999991673841873, iteration: 232775
loss: 0.9511080384254456,grad_norm: 0.8494011103680836, iteration: 232776
loss: 1.0042436122894287,grad_norm: 0.9357541626501998, iteration: 232777
loss: 0.986079752445221,grad_norm: 0.7928795174591097, iteration: 232778
loss: 1.0226686000823975,grad_norm: 0.8874378498310631, iteration: 232779
loss: 1.0302748680114746,grad_norm: 0.8309750986460899, iteration: 232780
loss: 0.9617527723312378,grad_norm: 0.9999991354904698, iteration: 232781
loss: 0.9890541434288025,grad_norm: 0.880752640312368, iteration: 232782
loss: 0.9739039540290833,grad_norm: 0.9343562346166636, iteration: 232783
loss: 1.1561646461486816,grad_norm: 0.9999998732984118, iteration: 232784
loss: 1.0171854496002197,grad_norm: 0.999999425113487, iteration: 232785
loss: 1.0056034326553345,grad_norm: 0.8818214944072128, iteration: 232786
loss: 0.9758210778236389,grad_norm: 0.9722514980610731, iteration: 232787
loss: 0.9917507767677307,grad_norm: 0.9999991910991215, iteration: 232788
loss: 1.0082151889801025,grad_norm: 0.9999991608314104, iteration: 232789
loss: 0.98652583360672,grad_norm: 0.9976195576095113, iteration: 232790
loss: 1.0191359519958496,grad_norm: 0.999999100296192, iteration: 232791
loss: 1.0892192125320435,grad_norm: 0.9999991785958599, iteration: 232792
loss: 0.9684695601463318,grad_norm: 0.999999074407977, iteration: 232793
loss: 1.0318704843521118,grad_norm: 0.9999993558096735, iteration: 232794
loss: 1.009832501411438,grad_norm: 0.9664403806077426, iteration: 232795
loss: 1.0063351392745972,grad_norm: 0.9999990380474258, iteration: 232796
loss: 0.9638487696647644,grad_norm: 0.915735926387351, iteration: 232797
loss: 1.0434666872024536,grad_norm: 0.9999994359603385, iteration: 232798
loss: 0.9643513560295105,grad_norm: 0.9999991365065168, iteration: 232799
loss: 1.0028815269470215,grad_norm: 0.8532228257236949, iteration: 232800
loss: 0.995634913444519,grad_norm: 0.9999989589732463, iteration: 232801
loss: 0.9832645654678345,grad_norm: 0.9168600092500548, iteration: 232802
loss: 1.0255287885665894,grad_norm: 0.9999990349061431, iteration: 232803
loss: 1.014877200126648,grad_norm: 0.851893614033845, iteration: 232804
loss: 1.0192078351974487,grad_norm: 0.8622963040900468, iteration: 232805
loss: 0.978543758392334,grad_norm: 0.9999992534403336, iteration: 232806
loss: 1.0796852111816406,grad_norm: 0.8435336212052083, iteration: 232807
loss: 0.994378924369812,grad_norm: 0.999999138864342, iteration: 232808
loss: 0.970940351486206,grad_norm: 0.9802945141926195, iteration: 232809
loss: 1.0039597749710083,grad_norm: 0.8719932219861171, iteration: 232810
loss: 0.9681068062782288,grad_norm: 0.9999991135239967, iteration: 232811
loss: 0.9958862066268921,grad_norm: 0.8632830600186324, iteration: 232812
loss: 1.0104947090148926,grad_norm: 0.9999990593143109, iteration: 232813
loss: 1.0169856548309326,grad_norm: 0.9999992035162745, iteration: 232814
loss: 0.9953131675720215,grad_norm: 0.9569369476239434, iteration: 232815
loss: 0.9595816135406494,grad_norm: 0.8804659833798871, iteration: 232816
loss: 1.0097525119781494,grad_norm: 0.9814983862654293, iteration: 232817
loss: 0.9996752738952637,grad_norm: 0.8849205293440179, iteration: 232818
loss: 0.9923699498176575,grad_norm: 0.9999990221229227, iteration: 232819
loss: 1.0246820449829102,grad_norm: 0.9999991556636585, iteration: 232820
loss: 0.9766989350318909,grad_norm: 0.9479543060574948, iteration: 232821
loss: 0.9959899187088013,grad_norm: 0.9999992062510406, iteration: 232822
loss: 1.0042243003845215,grad_norm: 0.9292855270401389, iteration: 232823
loss: 0.9476003050804138,grad_norm: 0.9999990602795309, iteration: 232824
loss: 0.9773064851760864,grad_norm: 0.9223621730492491, iteration: 232825
loss: 0.9960314035415649,grad_norm: 0.963186087802487, iteration: 232826
loss: 1.0106456279754639,grad_norm: 0.9999992784584109, iteration: 232827
loss: 0.9536849856376648,grad_norm: 0.7982544662462565, iteration: 232828
loss: 1.0196242332458496,grad_norm: 0.9311500249758777, iteration: 232829
loss: 0.9927230477333069,grad_norm: 0.7930061864447694, iteration: 232830
loss: 0.9864488840103149,grad_norm: 0.7903379406769933, iteration: 232831
loss: 0.988156795501709,grad_norm: 0.9999990973164696, iteration: 232832
loss: 0.9999299645423889,grad_norm: 0.9999999050253039, iteration: 232833
loss: 0.9913307428359985,grad_norm: 0.8628838361798248, iteration: 232834
loss: 0.991000235080719,grad_norm: 0.7970085838245297, iteration: 232835
loss: 1.076369285583496,grad_norm: 0.9999996376948656, iteration: 232836
loss: 1.0046762228012085,grad_norm: 0.7825657043394237, iteration: 232837
loss: 1.021820068359375,grad_norm: 0.9999994244296068, iteration: 232838
loss: 0.9892557263374329,grad_norm: 0.9784032604839352, iteration: 232839
loss: 1.0257327556610107,grad_norm: 0.9999996015061092, iteration: 232840
loss: 1.0124659538269043,grad_norm: 0.9342379889168622, iteration: 232841
loss: 0.9762601852416992,grad_norm: 0.8842002458987692, iteration: 232842
loss: 0.9976159334182739,grad_norm: 0.9133247082609703, iteration: 232843
loss: 1.0115516185760498,grad_norm: 0.9778181067766899, iteration: 232844
loss: 1.0193450450897217,grad_norm: 0.9999990862582631, iteration: 232845
loss: 1.0164586305618286,grad_norm: 0.7532953958998244, iteration: 232846
loss: 1.0174031257629395,grad_norm: 0.8014331467434618, iteration: 232847
loss: 1.0179219245910645,grad_norm: 0.8759496824789186, iteration: 232848
loss: 1.0040875673294067,grad_norm: 0.9999990081623896, iteration: 232849
loss: 0.999838650226593,grad_norm: 0.744272902295036, iteration: 232850
loss: 1.0497971773147583,grad_norm: 0.9813186471571517, iteration: 232851
loss: 1.0097371339797974,grad_norm: 0.9313931064057988, iteration: 232852
loss: 1.0128240585327148,grad_norm: 0.9999991680782033, iteration: 232853
loss: 0.980110764503479,grad_norm: 0.8313533708835115, iteration: 232854
loss: 1.002512812614441,grad_norm: 0.8039040278700429, iteration: 232855
loss: 0.9732909798622131,grad_norm: 0.9491016654687691, iteration: 232856
loss: 0.9938754439353943,grad_norm: 0.9569744167161822, iteration: 232857
loss: 1.0248007774353027,grad_norm: 0.999999174649786, iteration: 232858
loss: 1.0055162906646729,grad_norm: 0.9999990591957237, iteration: 232859
loss: 1.0313665866851807,grad_norm: 0.9999994205680859, iteration: 232860
loss: 1.0080223083496094,grad_norm: 0.8599577065604703, iteration: 232861
loss: 1.0264519453048706,grad_norm: 0.9455856836589454, iteration: 232862
loss: 0.9851588606834412,grad_norm: 0.846204743089851, iteration: 232863
loss: 0.9961531758308411,grad_norm: 0.9712592211702438, iteration: 232864
loss: 0.9649399518966675,grad_norm: 0.799072928413234, iteration: 232865
loss: 0.9964888691902161,grad_norm: 0.9679185939302117, iteration: 232866
loss: 0.9826136827468872,grad_norm: 0.999999196162499, iteration: 232867
loss: 0.9980263710021973,grad_norm: 0.9999991157124777, iteration: 232868
loss: 1.034224510192871,grad_norm: 0.9999994629451404, iteration: 232869
loss: 0.9925801157951355,grad_norm: 0.8029639414650979, iteration: 232870
loss: 1.0054935216903687,grad_norm: 0.7914625992111957, iteration: 232871
loss: 1.0098413228988647,grad_norm: 0.9999999358015316, iteration: 232872
loss: 1.0250930786132812,grad_norm: 0.7936245041500591, iteration: 232873
loss: 1.0226582288742065,grad_norm: 0.8632596499582564, iteration: 232874
loss: 0.99684077501297,grad_norm: 0.9480865499348907, iteration: 232875
loss: 1.0149898529052734,grad_norm: 0.9999992185891943, iteration: 232876
loss: 0.9867555499076843,grad_norm: 0.9676299582479285, iteration: 232877
loss: 0.9687427282333374,grad_norm: 0.9391486530848028, iteration: 232878
loss: 1.0041801929473877,grad_norm: 0.9288800918132115, iteration: 232879
loss: 0.96504145860672,grad_norm: 0.9010238340404412, iteration: 232880
loss: 0.9721794724464417,grad_norm: 0.9696458179535569, iteration: 232881
loss: 1.010698914527893,grad_norm: 0.8711553516163127, iteration: 232882
loss: 0.9772672057151794,grad_norm: 0.9573341138552233, iteration: 232883
loss: 1.0116838216781616,grad_norm: 0.9235974910443697, iteration: 232884
loss: 0.9774777293205261,grad_norm: 0.9388270155091739, iteration: 232885
loss: 0.9747812747955322,grad_norm: 0.9629808619070632, iteration: 232886
loss: 1.0187151432037354,grad_norm: 0.9999996098014067, iteration: 232887
loss: 0.9828411936759949,grad_norm: 0.8373307339138555, iteration: 232888
loss: 1.0394394397735596,grad_norm: 0.9999992770348013, iteration: 232889
loss: 0.9669499397277832,grad_norm: 0.9224157925333702, iteration: 232890
loss: 0.9851429462432861,grad_norm: 0.8241182912372513, iteration: 232891
loss: 0.9919188618659973,grad_norm: 0.9559787117563183, iteration: 232892
loss: 0.9798294305801392,grad_norm: 0.8495960692342864, iteration: 232893
loss: 0.9890344738960266,grad_norm: 0.8692155241080147, iteration: 232894
loss: 0.9919269680976868,grad_norm: 0.9999990631351574, iteration: 232895
loss: 0.9932895302772522,grad_norm: 0.9999992402930222, iteration: 232896
loss: 1.0519261360168457,grad_norm: 0.9999997249771138, iteration: 232897
loss: 0.9838828444480896,grad_norm: 0.9999991447677501, iteration: 232898
loss: 1.0185853242874146,grad_norm: 0.8652808203376696, iteration: 232899
loss: 1.002972960472107,grad_norm: 0.7875041014887871, iteration: 232900
loss: 1.022356390953064,grad_norm: 0.9544476571452778, iteration: 232901
loss: 1.0114719867706299,grad_norm: 0.9402804022975927, iteration: 232902
loss: 0.9953432679176331,grad_norm: 0.9104822706550699, iteration: 232903
loss: 1.014757752418518,grad_norm: 0.8109414016115271, iteration: 232904
loss: 1.0050851106643677,grad_norm: 0.8910811231517488, iteration: 232905
loss: 1.0486325025558472,grad_norm: 0.9999994106492045, iteration: 232906
loss: 1.0237160921096802,grad_norm: 0.9335161767404694, iteration: 232907
loss: 1.0010972023010254,grad_norm: 0.9999994495814624, iteration: 232908
loss: 0.9878079891204834,grad_norm: 0.9529147589643808, iteration: 232909
loss: 0.9965305328369141,grad_norm: 0.99999897644878, iteration: 232910
loss: 1.0218206644058228,grad_norm: 0.8777813419477865, iteration: 232911
loss: 0.9976028800010681,grad_norm: 0.9999993113506951, iteration: 232912
loss: 0.9809978008270264,grad_norm: 0.9678883050755829, iteration: 232913
loss: 1.0369458198547363,grad_norm: 0.9999991162060752, iteration: 232914
loss: 0.9714912176132202,grad_norm: 0.9999990862722782, iteration: 232915
loss: 0.959409773349762,grad_norm: 0.900033869057524, iteration: 232916
loss: 0.9724541306495667,grad_norm: 0.982283454641472, iteration: 232917
loss: 0.9967118501663208,grad_norm: 0.9125733234266039, iteration: 232918
loss: 1.0093899965286255,grad_norm: 0.8072842674808467, iteration: 232919
loss: 0.9908140301704407,grad_norm: 0.8038161497581374, iteration: 232920
loss: 0.9586224555969238,grad_norm: 0.8733571952751162, iteration: 232921
loss: 0.9876134991645813,grad_norm: 0.920897401914553, iteration: 232922
loss: 0.9767006635665894,grad_norm: 0.9081706363699866, iteration: 232923
loss: 1.0098509788513184,grad_norm: 0.9999991447460845, iteration: 232924
loss: 1.00364089012146,grad_norm: 0.9218081196476232, iteration: 232925
loss: 1.0092254877090454,grad_norm: 0.8231842692830049, iteration: 232926
loss: 0.9766530394554138,grad_norm: 0.9463421107674643, iteration: 232927
loss: 0.9733979105949402,grad_norm: 0.8104837245361923, iteration: 232928
loss: 0.991306483745575,grad_norm: 0.8654417656980908, iteration: 232929
loss: 0.964306652545929,grad_norm: 0.817017014783591, iteration: 232930
loss: 0.9977984428405762,grad_norm: 0.9999993295834032, iteration: 232931
loss: 1.0357180833816528,grad_norm: 0.9999991204212133, iteration: 232932
loss: 1.0393660068511963,grad_norm: 0.9062385706775594, iteration: 232933
loss: 0.9914523363113403,grad_norm: 0.7593587390441016, iteration: 232934
loss: 1.0004761219024658,grad_norm: 0.9999990599066054, iteration: 232935
loss: 0.9985423684120178,grad_norm: 0.9999991813057036, iteration: 232936
loss: 1.0109262466430664,grad_norm: 0.9707002595823356, iteration: 232937
loss: 1.0280207395553589,grad_norm: 0.999999092300479, iteration: 232938
loss: 1.0109517574310303,grad_norm: 0.724611170352735, iteration: 232939
loss: 1.0089693069458008,grad_norm: 0.8898768492324028, iteration: 232940
loss: 1.025482177734375,grad_norm: 0.9045030358423217, iteration: 232941
loss: 1.0168792009353638,grad_norm: 0.9113432402760719, iteration: 232942
loss: 0.990650475025177,grad_norm: 0.9999990398148293, iteration: 232943
loss: 1.0229352712631226,grad_norm: 0.9539189648473625, iteration: 232944
loss: 1.0547044277191162,grad_norm: 0.9999994401516229, iteration: 232945
loss: 1.0196945667266846,grad_norm: 0.9999990996113329, iteration: 232946
loss: 1.0272705554962158,grad_norm: 0.9151170098521355, iteration: 232947
loss: 1.0164369344711304,grad_norm: 0.7095749241013222, iteration: 232948
loss: 1.0158429145812988,grad_norm: 0.9384502279876814, iteration: 232949
loss: 0.9922656416893005,grad_norm: 0.9202978765434608, iteration: 232950
loss: 0.9743790030479431,grad_norm: 0.9643897233973735, iteration: 232951
loss: 0.9965946674346924,grad_norm: 0.9885876863057584, iteration: 232952
loss: 0.995644748210907,grad_norm: 0.8435692421374142, iteration: 232953
loss: 1.0171185731887817,grad_norm: 0.8995820796022522, iteration: 232954
loss: 0.9928759932518005,grad_norm: 0.9443419965399024, iteration: 232955
loss: 0.9825608730316162,grad_norm: 0.9571845946694009, iteration: 232956
loss: 0.9589428305625916,grad_norm: 0.9999989633740428, iteration: 232957
loss: 1.0195955038070679,grad_norm: 0.9999991318499653, iteration: 232958
loss: 0.9894360899925232,grad_norm: 0.9469928902014587, iteration: 232959
loss: 0.9962526559829712,grad_norm: 0.9496850393809473, iteration: 232960
loss: 1.0143353939056396,grad_norm: 0.8260544931346536, iteration: 232961
loss: 1.0168490409851074,grad_norm: 0.9956900186375497, iteration: 232962
loss: 0.9624409675598145,grad_norm: 0.9999991720811848, iteration: 232963
loss: 0.9673230648040771,grad_norm: 0.8319321012638733, iteration: 232964
loss: 1.0219080448150635,grad_norm: 0.8472955137078524, iteration: 232965
loss: 1.0039490461349487,grad_norm: 0.7868173065675704, iteration: 232966
loss: 1.0023670196533203,grad_norm: 0.7995593751059165, iteration: 232967
loss: 1.0047329664230347,grad_norm: 0.9999990526557287, iteration: 232968
loss: 1.0126986503601074,grad_norm: 0.9035864613851018, iteration: 232969
loss: 1.0086026191711426,grad_norm: 0.772320260360636, iteration: 232970
loss: 1.0159412622451782,grad_norm: 0.9052975708457042, iteration: 232971
loss: 0.9834791421890259,grad_norm: 0.8942458147398844, iteration: 232972
loss: 0.9879491329193115,grad_norm: 0.9999991536087116, iteration: 232973
loss: 0.9836070537567139,grad_norm: 0.8065173820328205, iteration: 232974
loss: 0.9723976254463196,grad_norm: 0.935155180600991, iteration: 232975
loss: 1.0018776655197144,grad_norm: 0.9999991119177346, iteration: 232976
loss: 0.9915620684623718,grad_norm: 0.9999991970920368, iteration: 232977
loss: 1.0304298400878906,grad_norm: 0.9859722667176228, iteration: 232978
loss: 1.0173323154449463,grad_norm: 0.9999993071739746, iteration: 232979
loss: 0.9779014587402344,grad_norm: 0.891269670790552, iteration: 232980
loss: 0.9824619889259338,grad_norm: 0.8429091446723836, iteration: 232981
loss: 0.9983175992965698,grad_norm: 0.996668817564496, iteration: 232982
loss: 1.0111578702926636,grad_norm: 0.9787096311490067, iteration: 232983
loss: 1.0194700956344604,grad_norm: 0.8518455961141493, iteration: 232984
loss: 0.9761738181114197,grad_norm: 0.9011746346820221, iteration: 232985
loss: 0.9925918579101562,grad_norm: 0.9999990185356594, iteration: 232986
loss: 1.0272732973098755,grad_norm: 0.9420669060870537, iteration: 232987
loss: 1.0894850492477417,grad_norm: 0.9999996157458855, iteration: 232988
loss: 0.9819713234901428,grad_norm: 0.9017351885084636, iteration: 232989
loss: 0.9793837070465088,grad_norm: 0.9999995087373783, iteration: 232990
loss: 0.9676961302757263,grad_norm: 0.8873572339155293, iteration: 232991
loss: 0.9881559610366821,grad_norm: 0.8667006155448668, iteration: 232992
loss: 0.9956530928611755,grad_norm: 0.9505713171006867, iteration: 232993
loss: 1.0519986152648926,grad_norm: 0.9999996043091711, iteration: 232994
loss: 0.9450820684432983,grad_norm: 0.9211496027888992, iteration: 232995
loss: 1.048593521118164,grad_norm: 0.9999993103345872, iteration: 232996
loss: 0.9894283413887024,grad_norm: 0.99999919762194, iteration: 232997
loss: 1.0106502771377563,grad_norm: 0.7702602076181128, iteration: 232998
loss: 1.0040464401245117,grad_norm: 0.9607453952969064, iteration: 232999
loss: 1.0401239395141602,grad_norm: 0.9925473580512628, iteration: 233000
loss: 0.9687315821647644,grad_norm: 0.9912943651699831, iteration: 233001
loss: 0.9835543036460876,grad_norm: 0.8673874250422465, iteration: 233002
loss: 1.0091404914855957,grad_norm: 0.8055805603710187, iteration: 233003
loss: 1.0093344449996948,grad_norm: 0.9822573867443813, iteration: 233004
loss: 0.9659867286682129,grad_norm: 0.8832255370660562, iteration: 233005
loss: 0.9734019041061401,grad_norm: 0.8674915753541713, iteration: 233006
loss: 0.976710855960846,grad_norm: 0.999999167572626, iteration: 233007
loss: 1.0113188028335571,grad_norm: 0.9698520716371531, iteration: 233008
loss: 1.0051023960113525,grad_norm: 0.8607049415419479, iteration: 233009
loss: 1.0073386430740356,grad_norm: 0.9124002313300482, iteration: 233010
loss: 1.043843150138855,grad_norm: 0.9999993177077829, iteration: 233011
loss: 0.9921408891677856,grad_norm: 0.7505639844887568, iteration: 233012
loss: 1.0019807815551758,grad_norm: 0.9999990644723776, iteration: 233013
loss: 1.014441967010498,grad_norm: 0.9616527307556801, iteration: 233014
loss: 0.9366995692253113,grad_norm: 0.8200351670587902, iteration: 233015
loss: 0.9949330687522888,grad_norm: 0.9999991821872964, iteration: 233016
loss: 0.9645226001739502,grad_norm: 0.9801728090962001, iteration: 233017
loss: 1.0362656116485596,grad_norm: 0.9999995648962912, iteration: 233018
loss: 1.0064873695373535,grad_norm: 0.9999999033925453, iteration: 233019
loss: 0.982083797454834,grad_norm: 0.999999113448043, iteration: 233020
loss: 1.0027766227722168,grad_norm: 0.9999990747194905, iteration: 233021
loss: 1.0039077997207642,grad_norm: 0.8344565179354004, iteration: 233022
loss: 1.0086945295333862,grad_norm: 0.9999998452236952, iteration: 233023
loss: 1.007010579109192,grad_norm: 0.9999990331249042, iteration: 233024
loss: 0.9968663454055786,grad_norm: 0.9999990024290956, iteration: 233025
loss: 1.0217268466949463,grad_norm: 0.9999996194008299, iteration: 233026
loss: 0.9896314740180969,grad_norm: 0.9131451926995469, iteration: 233027
loss: 1.0301004648208618,grad_norm: 0.8930177216553195, iteration: 233028
loss: 1.0040146112442017,grad_norm: 0.9999993007020261, iteration: 233029
loss: 1.0041112899780273,grad_norm: 0.9999992760401591, iteration: 233030
loss: 1.0062593221664429,grad_norm: 0.9705210935312121, iteration: 233031
loss: 0.9708182215690613,grad_norm: 0.8331594640876134, iteration: 233032
loss: 1.0299477577209473,grad_norm: 0.9999997502043285, iteration: 233033
loss: 1.0318379402160645,grad_norm: 0.8185741325732178, iteration: 233034
loss: 0.9922561645507812,grad_norm: 0.8293440635051713, iteration: 233035
loss: 0.9962551593780518,grad_norm: 0.9717245997333723, iteration: 233036
loss: 0.9802452325820923,grad_norm: 0.9201581665958009, iteration: 233037
loss: 1.0150842666625977,grad_norm: 0.9485043998753103, iteration: 233038
loss: 1.0082405805587769,grad_norm: 0.8839252615352495, iteration: 233039
loss: 1.0138827562332153,grad_norm: 0.9811399437838226, iteration: 233040
loss: 0.9735527634620667,grad_norm: 0.9999991994984102, iteration: 233041
loss: 0.993165910243988,grad_norm: 0.9909405010351146, iteration: 233042
loss: 1.054102897644043,grad_norm: 0.9999991946052607, iteration: 233043
loss: 1.019342303276062,grad_norm: 0.9999989577994437, iteration: 233044
loss: 1.0152637958526611,grad_norm: 0.9481680591922524, iteration: 233045
loss: 0.9906473159790039,grad_norm: 0.8634005880096007, iteration: 233046
loss: 0.992100715637207,grad_norm: 0.8977149881806391, iteration: 233047
loss: 0.9959774017333984,grad_norm: 0.991890142095015, iteration: 233048
loss: 0.9722216725349426,grad_norm: 0.8697667189930808, iteration: 233049
loss: 1.0006297826766968,grad_norm: 0.9999990573796124, iteration: 233050
loss: 1.035133957862854,grad_norm: 0.8848456762843975, iteration: 233051
loss: 1.0625081062316895,grad_norm: 0.9434769701740415, iteration: 233052
loss: 0.9669894576072693,grad_norm: 0.9999993819084194, iteration: 233053
loss: 1.0056841373443604,grad_norm: 0.9999989604908232, iteration: 233054
loss: 1.0074161291122437,grad_norm: 0.9506583444713365, iteration: 233055
loss: 0.9942658543586731,grad_norm: 0.9999990540842244, iteration: 233056
loss: 1.0381945371627808,grad_norm: 0.9951185410286167, iteration: 233057
loss: 0.975693941116333,grad_norm: 0.8382765230417016, iteration: 233058
loss: 0.9933510422706604,grad_norm: 0.9092770986641908, iteration: 233059
loss: 0.9860081672668457,grad_norm: 0.8774262058515473, iteration: 233060
loss: 0.9836158156394958,grad_norm: 0.9999995418450697, iteration: 233061
loss: 1.0259701013565063,grad_norm: 0.8424536292722957, iteration: 233062
loss: 0.9871119260787964,grad_norm: 0.9663640940843614, iteration: 233063
loss: 1.1123939752578735,grad_norm: 0.8406377363148736, iteration: 233064
loss: 1.0158076286315918,grad_norm: 0.9999994716965884, iteration: 233065
loss: 1.01144540309906,grad_norm: 0.9999990996921788, iteration: 233066
loss: 0.9988038539886475,grad_norm: 0.9394224050732349, iteration: 233067
loss: 0.9379433393478394,grad_norm: 0.9999992535798173, iteration: 233068
loss: 0.9852034449577332,grad_norm: 0.999999135197001, iteration: 233069
loss: 0.9746617674827576,grad_norm: 0.9999990314695597, iteration: 233070
loss: 1.030144453048706,grad_norm: 0.9181292826930388, iteration: 233071
loss: 0.9633884429931641,grad_norm: 0.8463664371109865, iteration: 233072
loss: 1.0445926189422607,grad_norm: 0.9999994150417596, iteration: 233073
loss: 0.9761667251586914,grad_norm: 0.9999993034541369, iteration: 233074
loss: 0.9957505464553833,grad_norm: 0.9319140457087143, iteration: 233075
loss: 0.9682311415672302,grad_norm: 0.9464344075619469, iteration: 233076
loss: 1.013662576675415,grad_norm: 0.9999991717015566, iteration: 233077
loss: 0.9744544625282288,grad_norm: 0.9999993426007888, iteration: 233078
loss: 0.9675686955451965,grad_norm: 0.9999991710129845, iteration: 233079
loss: 0.9945477247238159,grad_norm: 0.787989481428906, iteration: 233080
loss: 1.002934217453003,grad_norm: 0.999999124758653, iteration: 233081
loss: 1.0023034811019897,grad_norm: 0.9295747260236621, iteration: 233082
loss: 1.0096865892410278,grad_norm: 0.894694888039634, iteration: 233083
loss: 0.9989145398139954,grad_norm: 0.883293657000408, iteration: 233084
loss: 1.0149495601654053,grad_norm: 0.9999992338672058, iteration: 233085
loss: 1.0032219886779785,grad_norm: 0.9265642625770951, iteration: 233086
loss: 1.043554663658142,grad_norm: 0.9226680313063503, iteration: 233087
loss: 1.0341392755508423,grad_norm: 0.999999124017813, iteration: 233088
loss: 1.030379295349121,grad_norm: 0.9999991996806978, iteration: 233089
loss: 0.986886203289032,grad_norm: 0.9686180569659035, iteration: 233090
loss: 0.9865341186523438,grad_norm: 0.9999990771049726, iteration: 233091
loss: 0.9735775589942932,grad_norm: 0.9226342989839714, iteration: 233092
loss: 0.9856841564178467,grad_norm: 0.999998953289914, iteration: 233093
loss: 1.0126231908798218,grad_norm: 0.8161867165919529, iteration: 233094
loss: 1.001584768295288,grad_norm: 0.901695400072863, iteration: 233095
loss: 0.9811134338378906,grad_norm: 0.8009572468373417, iteration: 233096
loss: 0.9879746437072754,grad_norm: 0.8532576145599894, iteration: 233097
loss: 1.006744146347046,grad_norm: 0.9647407148056915, iteration: 233098
loss: 0.9893956780433655,grad_norm: 0.999998955421903, iteration: 233099
loss: 0.956244170665741,grad_norm: 0.9999991499743238, iteration: 233100
loss: 0.974046528339386,grad_norm: 0.9440176741072623, iteration: 233101
loss: 0.9607145190238953,grad_norm: 0.937172290577169, iteration: 233102
loss: 1.0069793462753296,grad_norm: 0.9765173064383547, iteration: 233103
loss: 0.9934983253479004,grad_norm: 0.9566905384588887, iteration: 233104
loss: 1.0110350847244263,grad_norm: 0.99999913921585, iteration: 233105
loss: 0.9658651351928711,grad_norm: 0.9603786711034183, iteration: 233106
loss: 1.0189008712768555,grad_norm: 0.9999993397426603, iteration: 233107
loss: 1.0662208795547485,grad_norm: 0.9999995613539484, iteration: 233108
loss: 1.0156035423278809,grad_norm: 0.9827870223607085, iteration: 233109
loss: 0.994166374206543,grad_norm: 0.8549341324608859, iteration: 233110
loss: 0.9890307784080505,grad_norm: 0.8058909668826973, iteration: 233111
loss: 0.9898086190223694,grad_norm: 0.8666623325017716, iteration: 233112
loss: 0.9735929369926453,grad_norm: 0.9999990142109436, iteration: 233113
loss: 0.9993783831596375,grad_norm: 0.9865510904559208, iteration: 233114
loss: 0.9907711744308472,grad_norm: 0.8923702179897811, iteration: 233115
loss: 1.0088694095611572,grad_norm: 0.9999992700344196, iteration: 233116
loss: 0.9896309971809387,grad_norm: 0.9733041978581659, iteration: 233117
loss: 1.013819694519043,grad_norm: 0.9260492287249996, iteration: 233118
loss: 0.973268449306488,grad_norm: 0.8549161750717768, iteration: 233119
loss: 1.0073630809783936,grad_norm: 0.9999994110038088, iteration: 233120
loss: 0.9667912125587463,grad_norm: 0.9920929591548818, iteration: 233121
loss: 0.9714099168777466,grad_norm: 0.8225655750856031, iteration: 233122
loss: 1.0040452480316162,grad_norm: 0.9831990969738923, iteration: 233123
loss: 1.0134609937667847,grad_norm: 0.9999989160676308, iteration: 233124
loss: 1.019156575202942,grad_norm: 0.9999995377941008, iteration: 233125
loss: 0.9880543947219849,grad_norm: 0.8871278782907944, iteration: 233126
loss: 1.0131330490112305,grad_norm: 0.9999992550016845, iteration: 233127
loss: 1.009493350982666,grad_norm: 0.9098632299743462, iteration: 233128
loss: 0.9996087551116943,grad_norm: 0.9656406222896173, iteration: 233129
loss: 0.994505763053894,grad_norm: 0.8959809727410677, iteration: 233130
loss: 1.016098141670227,grad_norm: 0.9557837288131679, iteration: 233131
loss: 0.9937998652458191,grad_norm: 0.942530861670275, iteration: 233132
loss: 0.9687949419021606,grad_norm: 0.9999992270085514, iteration: 233133
loss: 0.9934711456298828,grad_norm: 0.8998763767217275, iteration: 233134
loss: 0.9856312870979309,grad_norm: 0.9765149275414299, iteration: 233135
loss: 0.9982143044471741,grad_norm: 0.9999994331849466, iteration: 233136
loss: 1.000840425491333,grad_norm: 0.9631013521895292, iteration: 233137
loss: 1.095229148864746,grad_norm: 0.8951423952453564, iteration: 233138
loss: 1.0094554424285889,grad_norm: 0.9999990337450504, iteration: 233139
loss: 0.9915778040885925,grad_norm: 0.9999990977732465, iteration: 233140
loss: 1.0374586582183838,grad_norm: 0.999999811160394, iteration: 233141
loss: 0.9899585247039795,grad_norm: 0.8201798226156499, iteration: 233142
loss: 1.008621096611023,grad_norm: 0.8238888345251617, iteration: 233143
loss: 1.0080114603042603,grad_norm: 0.8375252761676603, iteration: 233144
loss: 0.9845927357673645,grad_norm: 0.9486518403378178, iteration: 233145
loss: 0.9815598726272583,grad_norm: 0.9026003277712832, iteration: 233146
loss: 1.0173277854919434,grad_norm: 0.9999991903425546, iteration: 233147
loss: 1.0259642601013184,grad_norm: 0.9999990643609465, iteration: 233148
loss: 0.9674359560012817,grad_norm: 0.9279776487863426, iteration: 233149
loss: 1.0295782089233398,grad_norm: 0.9999994617749622, iteration: 233150
loss: 1.0192402601242065,grad_norm: 0.9999992405904821, iteration: 233151
loss: 1.0007994174957275,grad_norm: 0.7786905599511665, iteration: 233152
loss: 1.0399876832962036,grad_norm: 0.9999992195493418, iteration: 233153
loss: 1.040968418121338,grad_norm: 0.9999990900794569, iteration: 233154
loss: 0.9704419374465942,grad_norm: 0.8306350100314831, iteration: 233155
loss: 0.9430467486381531,grad_norm: 0.9247395718329199, iteration: 233156
loss: 1.0365103483200073,grad_norm: 0.8967172453125872, iteration: 233157
loss: 1.0462654829025269,grad_norm: 0.9999991685767704, iteration: 233158
loss: 1.0300371646881104,grad_norm: 0.9842510864758524, iteration: 233159
loss: 0.9854780435562134,grad_norm: 0.9709923746418228, iteration: 233160
loss: 1.0044527053833008,grad_norm: 0.9163267095957512, iteration: 233161
loss: 1.0009727478027344,grad_norm: 0.7255820930063231, iteration: 233162
loss: 1.0410308837890625,grad_norm: 0.999999170050712, iteration: 233163
loss: 1.0141284465789795,grad_norm: 0.8551148713844818, iteration: 233164
loss: 1.0210038423538208,grad_norm: 0.9999988964017396, iteration: 233165
loss: 0.9854404926300049,grad_norm: 0.9879130443926816, iteration: 233166
loss: 1.004462718963623,grad_norm: 0.7683664053244587, iteration: 233167
loss: 1.0446618795394897,grad_norm: 0.9602523631808237, iteration: 233168
loss: 0.9991611838340759,grad_norm: 0.8496125995103566, iteration: 233169
loss: 1.0104033946990967,grad_norm: 0.9999990780600292, iteration: 233170
loss: 0.9930516481399536,grad_norm: 0.9090025316544795, iteration: 233171
loss: 0.9836769104003906,grad_norm: 0.8515607998651542, iteration: 233172
loss: 0.9973856210708618,grad_norm: 0.8251000911320567, iteration: 233173
loss: 0.9861425757408142,grad_norm: 0.9999990985141283, iteration: 233174
loss: 1.042917251586914,grad_norm: 0.9914896277158903, iteration: 233175
loss: 0.9823195338249207,grad_norm: 0.8661679184233279, iteration: 233176
loss: 1.0005191564559937,grad_norm: 0.8743578222116429, iteration: 233177
loss: 0.99624103307724,grad_norm: 0.8667617115379013, iteration: 233178
loss: 1.0193612575531006,grad_norm: 0.940927017462575, iteration: 233179
loss: 0.9989907145500183,grad_norm: 0.9577701291271553, iteration: 233180
loss: 1.0080485343933105,grad_norm: 0.8569860576883486, iteration: 233181
loss: 0.9988332390785217,grad_norm: 0.830912284437605, iteration: 233182
loss: 1.014674186706543,grad_norm: 0.9999991007125412, iteration: 233183
loss: 0.9801796674728394,grad_norm: 0.9819149753833692, iteration: 233184
loss: 0.9495047330856323,grad_norm: 0.9525890341044766, iteration: 233185
loss: 1.0150690078735352,grad_norm: 0.9302938513859532, iteration: 233186
loss: 1.0266947746276855,grad_norm: 0.8481507839742771, iteration: 233187
loss: 1.015834093093872,grad_norm: 0.9999994893678046, iteration: 233188
loss: 0.9634195566177368,grad_norm: 0.9999989733904727, iteration: 233189
loss: 0.989597499370575,grad_norm: 0.9999989903472558, iteration: 233190
loss: 0.9935075044631958,grad_norm: 0.9999991884872105, iteration: 233191
loss: 1.0242531299591064,grad_norm: 0.9999998005771817, iteration: 233192
loss: 0.9938362240791321,grad_norm: 0.8201693492634723, iteration: 233193
loss: 0.932525634765625,grad_norm: 0.9294197206219226, iteration: 233194
loss: 0.9332566857337952,grad_norm: 0.9999991725272185, iteration: 233195
loss: 0.985700786113739,grad_norm: 0.9365981582163432, iteration: 233196
loss: 1.0159046649932861,grad_norm: 0.8592210786690282, iteration: 233197
loss: 1.0091971158981323,grad_norm: 0.9553171445289441, iteration: 233198
loss: 0.9762831926345825,grad_norm: 0.999999461600751, iteration: 233199
loss: 1.0209146738052368,grad_norm: 0.9214145943643665, iteration: 233200
loss: 1.0426474809646606,grad_norm: 0.8232919885037541, iteration: 233201
loss: 1.0074430704116821,grad_norm: 0.9518657714806356, iteration: 233202
loss: 1.0064841508865356,grad_norm: 0.9999992584209808, iteration: 233203
loss: 0.9910784959793091,grad_norm: 0.8355782165879793, iteration: 233204
loss: 0.9903593063354492,grad_norm: 0.9335414605349597, iteration: 233205
loss: 1.057211995124817,grad_norm: 0.9999990310628671, iteration: 233206
loss: 1.0177311897277832,grad_norm: 0.8920978737962327, iteration: 233207
loss: 0.9296218752861023,grad_norm: 0.8332715884738109, iteration: 233208
loss: 0.9910099506378174,grad_norm: 0.8197823731348026, iteration: 233209
loss: 0.9885667562484741,grad_norm: 0.8051646119509172, iteration: 233210
loss: 1.0049388408660889,grad_norm: 0.9999996209238429, iteration: 233211
loss: 0.988591730594635,grad_norm: 0.9365297026341303, iteration: 233212
loss: 0.9893308281898499,grad_norm: 0.9999990235856558, iteration: 233213
loss: 1.00387442111969,grad_norm: 0.8995006918075348, iteration: 233214
loss: 1.027588129043579,grad_norm: 0.9999996956776878, iteration: 233215
loss: 1.001319169998169,grad_norm: 0.8533182337687018, iteration: 233216
loss: 1.041732907295227,grad_norm: 0.9999990669843942, iteration: 233217
loss: 1.0243167877197266,grad_norm: 0.9999990901401128, iteration: 233218
loss: 1.0069141387939453,grad_norm: 0.854147482286471, iteration: 233219
loss: 1.0268445014953613,grad_norm: 0.8214644871607898, iteration: 233220
loss: 0.9780598878860474,grad_norm: 0.9999990802497455, iteration: 233221
loss: 1.0019572973251343,grad_norm: 0.9209254393809646, iteration: 233222
loss: 1.0140633583068848,grad_norm: 0.9999991615260302, iteration: 233223
loss: 0.9720936417579651,grad_norm: 0.9184180062145116, iteration: 233224
loss: 1.0360877513885498,grad_norm: 0.9999990679256648, iteration: 233225
loss: 1.0133659839630127,grad_norm: 0.9282959007114063, iteration: 233226
loss: 1.019821286201477,grad_norm: 0.9999998398184835, iteration: 233227
loss: 1.0144712924957275,grad_norm: 0.9999991891640476, iteration: 233228
loss: 1.0092432498931885,grad_norm: 0.9477341153817153, iteration: 233229
loss: 1.0065501928329468,grad_norm: 0.9158278645748319, iteration: 233230
loss: 0.9831233620643616,grad_norm: 0.9999991547705862, iteration: 233231
loss: 1.0112684965133667,grad_norm: 0.9115550298777403, iteration: 233232
loss: 1.0286624431610107,grad_norm: 0.9430373379569235, iteration: 233233
loss: 0.9897331595420837,grad_norm: 0.9999991036221931, iteration: 233234
loss: 0.9959303736686707,grad_norm: 0.8494707383467947, iteration: 233235
loss: 0.9467720985412598,grad_norm: 0.9702469522068506, iteration: 233236
loss: 1.0295168161392212,grad_norm: 0.8834893307245529, iteration: 233237
loss: 1.0163065195083618,grad_norm: 0.9133871488679952, iteration: 233238
loss: 1.0217078924179077,grad_norm: 0.9785474405941682, iteration: 233239
loss: 0.9774292707443237,grad_norm: 0.999999286289428, iteration: 233240
loss: 0.981540322303772,grad_norm: 0.9470578758320974, iteration: 233241
loss: 0.9587273597717285,grad_norm: 0.9999991374874835, iteration: 233242
loss: 0.9926019906997681,grad_norm: 0.8715062395612048, iteration: 233243
loss: 1.0535333156585693,grad_norm: 0.9999998200269811, iteration: 233244
loss: 1.0113226175308228,grad_norm: 0.8114042862620119, iteration: 233245
loss: 1.007968783378601,grad_norm: 0.8948592453622763, iteration: 233246
loss: 1.0081690549850464,grad_norm: 0.9104520142469582, iteration: 233247
loss: 0.9550281763076782,grad_norm: 0.999999056759844, iteration: 233248
loss: 0.9984888434410095,grad_norm: 0.774605264618323, iteration: 233249
loss: 1.0147204399108887,grad_norm: 0.9999991592209853, iteration: 233250
loss: 0.9710904955863953,grad_norm: 0.9434842078461589, iteration: 233251
loss: 1.0041399002075195,grad_norm: 0.9999990956713977, iteration: 233252
loss: 1.0142346620559692,grad_norm: 0.8490187189464735, iteration: 233253
loss: 0.9839320778846741,grad_norm: 0.8154168360141337, iteration: 233254
loss: 1.0436546802520752,grad_norm: 1.000000005053099, iteration: 233255
loss: 0.975379467010498,grad_norm: 0.9999990709880076, iteration: 233256
loss: 1.0183537006378174,grad_norm: 0.9228664803334106, iteration: 233257
loss: 1.0233385562896729,grad_norm: 0.9999998660999021, iteration: 233258
loss: 1.0168224573135376,grad_norm: 0.9088093948724545, iteration: 233259
loss: 1.0201692581176758,grad_norm: 0.9554950495325067, iteration: 233260
loss: 1.021557092666626,grad_norm: 0.9057275848143073, iteration: 233261
loss: 1.0168553590774536,grad_norm: 0.999999073370325, iteration: 233262
loss: 0.9766049981117249,grad_norm: 0.8186866476542536, iteration: 233263
loss: 1.0037471055984497,grad_norm: 0.8451924799379338, iteration: 233264
loss: 1.0146342515945435,grad_norm: 0.9999990506179072, iteration: 233265
loss: 1.0325630903244019,grad_norm: 0.9999998559981282, iteration: 233266
loss: 0.983514666557312,grad_norm: 0.8185822555537138, iteration: 233267
loss: 1.0544946193695068,grad_norm: 0.9916936036841361, iteration: 233268
loss: 1.0096328258514404,grad_norm: 0.8591886294911755, iteration: 233269
loss: 0.9426451325416565,grad_norm: 0.9999992198039253, iteration: 233270
loss: 1.0032132863998413,grad_norm: 0.9999991495910088, iteration: 233271
loss: 0.967136561870575,grad_norm: 0.7593112752427466, iteration: 233272
loss: 0.9715139865875244,grad_norm: 0.9315648032966795, iteration: 233273
loss: 0.9874946475028992,grad_norm: 0.9701232309741197, iteration: 233274
loss: 1.0189112424850464,grad_norm: 0.9737486431561472, iteration: 233275
loss: 1.0242184400558472,grad_norm: 0.860431153832284, iteration: 233276
loss: 0.9846795797348022,grad_norm: 0.9456316062404919, iteration: 233277
loss: 0.997404158115387,grad_norm: 0.9722052553964508, iteration: 233278
loss: 1.0106571912765503,grad_norm: 0.8709828320352514, iteration: 233279
loss: 0.960274875164032,grad_norm: 0.9276017533733151, iteration: 233280
loss: 0.98503577709198,grad_norm: 0.7840028592193425, iteration: 233281
loss: 1.0003905296325684,grad_norm: 0.7453198293803444, iteration: 233282
loss: 0.999177873134613,grad_norm: 0.996081207012403, iteration: 233283
loss: 1.0166070461273193,grad_norm: 0.7916098655208159, iteration: 233284
loss: 1.001206636428833,grad_norm: 0.9999989812918048, iteration: 233285
loss: 1.0317529439926147,grad_norm: 0.9021025607982119, iteration: 233286
loss: 0.9904192090034485,grad_norm: 0.994684853930296, iteration: 233287
loss: 0.9735662341117859,grad_norm: 0.8393083066368939, iteration: 233288
loss: 0.9883226156234741,grad_norm: 0.8829476484790894, iteration: 233289
loss: 0.9686132669448853,grad_norm: 0.8459022426126317, iteration: 233290
loss: 0.9875209331512451,grad_norm: 0.9837378686886101, iteration: 233291
loss: 1.039448618888855,grad_norm: 0.9630183424552902, iteration: 233292
loss: 1.002434492111206,grad_norm: 0.9999991075739897, iteration: 233293
loss: 1.0064208507537842,grad_norm: 0.9161930189106243, iteration: 233294
loss: 0.9849902391433716,grad_norm: 0.8884726175051874, iteration: 233295
loss: 1.0098787546157837,grad_norm: 0.9999991610575547, iteration: 233296
loss: 1.029901385307312,grad_norm: 0.9999992100331655, iteration: 233297
loss: 0.9884809851646423,grad_norm: 0.9085415587375212, iteration: 233298
loss: 0.9944298267364502,grad_norm: 0.8320273637309574, iteration: 233299
loss: 1.0816611051559448,grad_norm: 0.999999578693226, iteration: 233300
loss: 0.9285504221916199,grad_norm: 0.9576992074726917, iteration: 233301
loss: 1.0127205848693848,grad_norm: 0.9999989660495867, iteration: 233302
loss: 1.0028636455535889,grad_norm: 0.9605334383577204, iteration: 233303
loss: 1.0627814531326294,grad_norm: 0.999999467177216, iteration: 233304
loss: 1.0364750623703003,grad_norm: 0.7987873526415885, iteration: 233305
loss: 0.9883479475975037,grad_norm: 0.7241734914468765, iteration: 233306
loss: 0.9758306741714478,grad_norm: 0.929393201934836, iteration: 233307
loss: 1.0209274291992188,grad_norm: 0.8429719459162014, iteration: 233308
loss: 1.0039056539535522,grad_norm: 0.9743417956794337, iteration: 233309
loss: 0.9654808640480042,grad_norm: 0.8754587562183502, iteration: 233310
loss: 0.9895926713943481,grad_norm: 0.9999994727251235, iteration: 233311
loss: 0.9948680400848389,grad_norm: 0.7930770805878393, iteration: 233312
loss: 1.0126597881317139,grad_norm: 0.8945690440591244, iteration: 233313
loss: 1.0233598947525024,grad_norm: 0.9999991189600471, iteration: 233314
loss: 0.9995133876800537,grad_norm: 0.8008345565529117, iteration: 233315
loss: 0.9928393959999084,grad_norm: 0.8733452951618733, iteration: 233316
loss: 1.0088963508605957,grad_norm: 0.9326979007751609, iteration: 233317
loss: 1.0080252885818481,grad_norm: 0.999999100109841, iteration: 233318
loss: 0.9734358787536621,grad_norm: 0.8832000404489442, iteration: 233319
loss: 1.002336859703064,grad_norm: 0.9999989473528369, iteration: 233320
loss: 0.9778026342391968,grad_norm: 0.8690400141736295, iteration: 233321
loss: 0.9640961289405823,grad_norm: 0.8199103888416744, iteration: 233322
loss: 0.9768102765083313,grad_norm: 0.8756636092899891, iteration: 233323
loss: 0.996684193611145,grad_norm: 0.7877422509380376, iteration: 233324
loss: 0.9774507880210876,grad_norm: 0.8533670524802623, iteration: 233325
loss: 1.0230048894882202,grad_norm: 0.9999992529011413, iteration: 233326
loss: 0.9859522581100464,grad_norm: 0.8252434207544778, iteration: 233327
loss: 1.011252760887146,grad_norm: 0.931278529150116, iteration: 233328
loss: 0.968622624874115,grad_norm: 0.9999991608049742, iteration: 233329
loss: 0.9898834228515625,grad_norm: 0.8965274664856154, iteration: 233330
loss: 0.9715108871459961,grad_norm: 0.9897397468550786, iteration: 233331
loss: 1.0248228311538696,grad_norm: 0.8715198427535313, iteration: 233332
loss: 1.0251630544662476,grad_norm: 0.9999992225189624, iteration: 233333
loss: 0.9938217401504517,grad_norm: 0.8825096790048722, iteration: 233334
loss: 0.9845004081726074,grad_norm: 0.7214062242761669, iteration: 233335
loss: 1.0213260650634766,grad_norm: 0.99999909079326, iteration: 233336
loss: 1.0171974897384644,grad_norm: 0.7886079828275318, iteration: 233337
loss: 1.0218727588653564,grad_norm: 0.9999992909187522, iteration: 233338
loss: 1.034054160118103,grad_norm: 0.9999991696239177, iteration: 233339
loss: 0.9947271347045898,grad_norm: 0.9999991510428885, iteration: 233340
loss: 0.961327850818634,grad_norm: 0.9179065661472378, iteration: 233341
loss: 0.9731947779655457,grad_norm: 0.9105733148605504, iteration: 233342
loss: 1.0273758172988892,grad_norm: 0.942870696884884, iteration: 233343
loss: 0.9835205078125,grad_norm: 0.9133797706655079, iteration: 233344
loss: 1.026684045791626,grad_norm: 0.911522871130547, iteration: 233345
loss: 0.9931760430335999,grad_norm: 0.7870800094905842, iteration: 233346
loss: 1.0184577703475952,grad_norm: 0.7721572664455428, iteration: 233347
loss: 0.9941169023513794,grad_norm: 0.914506801264594, iteration: 233348
loss: 0.9702436327934265,grad_norm: 0.7772000517761644, iteration: 233349
loss: 1.03859281539917,grad_norm: 0.9406793890002512, iteration: 233350
loss: 1.0168486833572388,grad_norm: 0.8393816456993917, iteration: 233351
loss: 1.0160367488861084,grad_norm: 0.9049298175533691, iteration: 233352
loss: 1.0363202095031738,grad_norm: 0.7563862115652101, iteration: 233353
loss: 1.0057226419448853,grad_norm: 0.931555448240516, iteration: 233354
loss: 1.0485855340957642,grad_norm: 0.9438701253403714, iteration: 233355
loss: 1.0335785150527954,grad_norm: 0.9523842859025002, iteration: 233356
loss: 1.045247197151184,grad_norm: 0.9999992780881798, iteration: 233357
loss: 0.9993236660957336,grad_norm: 0.9999989915747345, iteration: 233358
loss: 1.0185927152633667,grad_norm: 0.923904984636417, iteration: 233359
loss: 0.9982492327690125,grad_norm: 0.9999996634472049, iteration: 233360
loss: 0.9972444772720337,grad_norm: 0.8415386565672649, iteration: 233361
loss: 0.9652910828590393,grad_norm: 0.9661797186426448, iteration: 233362
loss: 0.9933154582977295,grad_norm: 0.9793672481473518, iteration: 233363
loss: 1.009057641029358,grad_norm: 0.7836559183069218, iteration: 233364
loss: 1.0029091835021973,grad_norm: 0.9502366269642971, iteration: 233365
loss: 1.0101054906845093,grad_norm: 0.9468578091972107, iteration: 233366
loss: 1.006375789642334,grad_norm: 0.8387076768145356, iteration: 233367
loss: 1.006795883178711,grad_norm: 0.882539248942022, iteration: 233368
loss: 1.0099493265151978,grad_norm: 0.9400498423631538, iteration: 233369
loss: 0.9960072636604309,grad_norm: 0.9251679591515856, iteration: 233370
loss: 0.958322286605835,grad_norm: 0.999999011677241, iteration: 233371
loss: 1.0078165531158447,grad_norm: 0.9999995394252255, iteration: 233372
loss: 1.0072331428527832,grad_norm: 0.9999992181110249, iteration: 233373
loss: 0.9869031310081482,grad_norm: 0.9999992121627244, iteration: 233374
loss: 1.005975365638733,grad_norm: 0.9830300660681236, iteration: 233375
loss: 0.9697480797767639,grad_norm: 0.9999991154752693, iteration: 233376
loss: 1.0079690217971802,grad_norm: 0.9365723390577311, iteration: 233377
loss: 1.0362155437469482,grad_norm: 0.806742617333809, iteration: 233378
loss: 0.994634747505188,grad_norm: 0.9999990793960793, iteration: 233379
loss: 0.9588068127632141,grad_norm: 0.9732971146412633, iteration: 233380
loss: 0.998110830783844,grad_norm: 0.8597997822087923, iteration: 233381
loss: 0.9896082282066345,grad_norm: 0.9168578749216593, iteration: 233382
loss: 1.0007144212722778,grad_norm: 0.9999991503715266, iteration: 233383
loss: 0.9845377802848816,grad_norm: 0.8069094479633643, iteration: 233384
loss: 0.9643123149871826,grad_norm: 0.928497702949884, iteration: 233385
loss: 1.0254199504852295,grad_norm: 0.9805201740402716, iteration: 233386
loss: 1.0660948753356934,grad_norm: 0.9999999169196729, iteration: 233387
loss: 0.9931547045707703,grad_norm: 0.9374542977630453, iteration: 233388
loss: 0.9646710157394409,grad_norm: 0.8375838283222665, iteration: 233389
loss: 0.9896112084388733,grad_norm: 0.8383365144953223, iteration: 233390
loss: 1.0248771905899048,grad_norm: 0.9999990122561064, iteration: 233391
loss: 0.9615011811256409,grad_norm: 0.8227765313049281, iteration: 233392
loss: 0.9832060933113098,grad_norm: 0.9999990051324197, iteration: 233393
loss: 0.9742105603218079,grad_norm: 0.8989744078015759, iteration: 233394
loss: 1.0209351778030396,grad_norm: 0.9198340367776662, iteration: 233395
loss: 1.0023797750473022,grad_norm: 0.9999990713466901, iteration: 233396
loss: 0.9921694993972778,grad_norm: 0.8418748793851752, iteration: 233397
loss: 0.9912441372871399,grad_norm: 0.9999988855915123, iteration: 233398
loss: 0.9694056510925293,grad_norm: 0.8343340601383386, iteration: 233399
loss: 1.0582369565963745,grad_norm: 0.999999232221538, iteration: 233400
loss: 0.9767025709152222,grad_norm: 0.8958241160692249, iteration: 233401
loss: 1.0153586864471436,grad_norm: 0.9999991363962429, iteration: 233402
loss: 1.0048279762268066,grad_norm: 0.928883048306238, iteration: 233403
loss: 0.9873502850532532,grad_norm: 0.9999996711753687, iteration: 233404
loss: 0.9955732226371765,grad_norm: 0.9979159125033356, iteration: 233405
loss: 1.0391969680786133,grad_norm: 0.8293820026439288, iteration: 233406
loss: 0.9896045327186584,grad_norm: 0.9857269560820773, iteration: 233407
loss: 1.0106117725372314,grad_norm: 0.9999990281951119, iteration: 233408
loss: 1.000619649887085,grad_norm: 0.999998949760461, iteration: 233409
loss: 1.0150275230407715,grad_norm: 0.9744460205430943, iteration: 233410
loss: 0.9661491513252258,grad_norm: 0.9999991715194669, iteration: 233411
loss: 1.010994553565979,grad_norm: 0.8396138432361305, iteration: 233412
loss: 1.0110270977020264,grad_norm: 0.7591199225714286, iteration: 233413
loss: 1.007188081741333,grad_norm: 0.8360241654222015, iteration: 233414
loss: 0.9536112546920776,grad_norm: 0.9446350341469252, iteration: 233415
loss: 1.0217205286026,grad_norm: 0.9999992096042509, iteration: 233416
loss: 0.9983119964599609,grad_norm: 0.9999992148702589, iteration: 233417
loss: 0.9912639856338501,grad_norm: 0.9446531153569385, iteration: 233418
loss: 1.0031743049621582,grad_norm: 0.8180439468970369, iteration: 233419
loss: 0.9798938035964966,grad_norm: 0.9439088145684014, iteration: 233420
loss: 1.000090479850769,grad_norm: 0.8123557670598568, iteration: 233421
loss: 1.0320864915847778,grad_norm: 0.9073214239642261, iteration: 233422
loss: 0.9908355474472046,grad_norm: 0.999999162763875, iteration: 233423
loss: 0.9930771589279175,grad_norm: 0.9578883011808014, iteration: 233424
loss: 1.0112528800964355,grad_norm: 0.8984230500687442, iteration: 233425
loss: 0.9955918788909912,grad_norm: 0.9204429775809384, iteration: 233426
loss: 0.9704773426055908,grad_norm: 0.8538396182366867, iteration: 233427
loss: 1.0234918594360352,grad_norm: 0.943494679460441, iteration: 233428
loss: 1.006893277168274,grad_norm: 0.8877128626879957, iteration: 233429
loss: 1.0230576992034912,grad_norm: 0.9999994319418144, iteration: 233430
loss: 1.0171831846237183,grad_norm: 0.999999196005848, iteration: 233431
loss: 0.9774583578109741,grad_norm: 0.9282112991304635, iteration: 233432
loss: 1.0434801578521729,grad_norm: 0.9999990726980831, iteration: 233433
loss: 1.0275921821594238,grad_norm: 0.9685350566794233, iteration: 233434
loss: 0.9975308179855347,grad_norm: 0.9999990610506998, iteration: 233435
loss: 0.9767325520515442,grad_norm: 0.9005323634759268, iteration: 233436
loss: 1.007773756980896,grad_norm: 0.9211884134905947, iteration: 233437
loss: 0.9814918637275696,grad_norm: 0.8839451830933671, iteration: 233438
loss: 1.0112669467926025,grad_norm: 0.884586283995268, iteration: 233439
loss: 0.9690148234367371,grad_norm: 0.7752368934215433, iteration: 233440
loss: 0.9636957049369812,grad_norm: 0.9936727865337892, iteration: 233441
loss: 1.027081847190857,grad_norm: 0.9738479527392531, iteration: 233442
loss: 0.9772237539291382,grad_norm: 0.7957851581063274, iteration: 233443
loss: 0.9775785207748413,grad_norm: 0.9162995000200389, iteration: 233444
loss: 0.9905993342399597,grad_norm: 0.8918659513426055, iteration: 233445
loss: 0.9886683821678162,grad_norm: 0.821907601509446, iteration: 233446
loss: 1.0132160186767578,grad_norm: 0.8360523325256988, iteration: 233447
loss: 0.9958423972129822,grad_norm: 0.9756294286594576, iteration: 233448
loss: 0.9772388339042664,grad_norm: 0.9916073361491948, iteration: 233449
loss: 0.9910220503807068,grad_norm: 0.9163499872135568, iteration: 233450
loss: 0.9938849210739136,grad_norm: 0.8802416614761555, iteration: 233451
loss: 0.9674069881439209,grad_norm: 0.862427209031853, iteration: 233452
loss: 1.0008138418197632,grad_norm: 0.8672437560605556, iteration: 233453
loss: 1.0143680572509766,grad_norm: 0.9999991479534166, iteration: 233454
loss: 0.9896403551101685,grad_norm: 0.967183068928468, iteration: 233455
loss: 0.9751148819923401,grad_norm: 0.8023851955615979, iteration: 233456
loss: 0.9877294301986694,grad_norm: 0.9999991590923526, iteration: 233457
loss: 0.9861295819282532,grad_norm: 0.8241109362758052, iteration: 233458
loss: 1.014612078666687,grad_norm: 0.8733890068108109, iteration: 233459
loss: 1.0131480693817139,grad_norm: 0.8803302328244279, iteration: 233460
loss: 0.9827880859375,grad_norm: 0.7809005707404334, iteration: 233461
loss: 1.0169070959091187,grad_norm: 0.8416388600750877, iteration: 233462
loss: 1.0129081010818481,grad_norm: 0.9374105023934308, iteration: 233463
loss: 0.9651094675064087,grad_norm: 0.999999225424721, iteration: 233464
loss: 1.0283732414245605,grad_norm: 0.9999990554320661, iteration: 233465
loss: 0.9603739380836487,grad_norm: 0.8222963174275827, iteration: 233466
loss: 0.9815534949302673,grad_norm: 0.9081232050630443, iteration: 233467
loss: 1.0327019691467285,grad_norm: 0.9999993743768971, iteration: 233468
loss: 1.0043195486068726,grad_norm: 0.8360607809909225, iteration: 233469
loss: 1.0200254917144775,grad_norm: 0.8654676148120576, iteration: 233470
loss: 1.074040412902832,grad_norm: 0.9999990507056621, iteration: 233471
loss: 1.0303300619125366,grad_norm: 0.9999991578031592, iteration: 233472
loss: 1.0157092809677124,grad_norm: 0.999999991780584, iteration: 233473
loss: 0.9668570756912231,grad_norm: 0.8621004331861293, iteration: 233474
loss: 1.032540202140808,grad_norm: 0.8804373371138937, iteration: 233475
loss: 1.0061126947402954,grad_norm: 0.9644403029255479, iteration: 233476
loss: 1.0265913009643555,grad_norm: 0.9324739529297541, iteration: 233477
loss: 1.0172959566116333,grad_norm: 0.9574269210767729, iteration: 233478
loss: 0.9849770665168762,grad_norm: 0.8469543886435501, iteration: 233479
loss: 1.0366450548171997,grad_norm: 0.9486871327539603, iteration: 233480
loss: 1.0331956148147583,grad_norm: 0.9763678451942864, iteration: 233481
loss: 1.0211611986160278,grad_norm: 0.8268272472259094, iteration: 233482
loss: 0.9567146301269531,grad_norm: 0.9986475085657479, iteration: 233483
loss: 0.999205470085144,grad_norm: 0.7038273789025548, iteration: 233484
loss: 1.0088536739349365,grad_norm: 0.864571280642549, iteration: 233485
loss: 1.0041414499282837,grad_norm: 0.8964596313717627, iteration: 233486
loss: 1.0113567113876343,grad_norm: 0.9999990826362078, iteration: 233487
loss: 1.0185703039169312,grad_norm: 0.8485260524119205, iteration: 233488
loss: 0.979684054851532,grad_norm: 0.9372165274899206, iteration: 233489
loss: 1.0329225063323975,grad_norm: 0.9900667028603908, iteration: 233490
loss: 1.0130302906036377,grad_norm: 0.8258010270747022, iteration: 233491
loss: 0.9799643754959106,grad_norm: 0.9294434459699448, iteration: 233492
loss: 1.0978083610534668,grad_norm: 0.8862068087466332, iteration: 233493
loss: 0.9812371730804443,grad_norm: 0.9294896616349888, iteration: 233494
loss: 0.9940224885940552,grad_norm: 0.9999991387497902, iteration: 233495
loss: 1.0189354419708252,grad_norm: 0.9650190922093246, iteration: 233496
loss: 1.0048619508743286,grad_norm: 0.8971310456732868, iteration: 233497
loss: 0.988735556602478,grad_norm: 0.9045630332567081, iteration: 233498
loss: 0.9925673604011536,grad_norm: 0.8689913754811947, iteration: 233499
loss: 1.011085867881775,grad_norm: 0.9999990960201659, iteration: 233500
loss: 1.0154211521148682,grad_norm: 0.8861250868293985, iteration: 233501
loss: 0.9862663745880127,grad_norm: 0.9143149822999638, iteration: 233502
loss: 0.9621609449386597,grad_norm: 0.8930550251405739, iteration: 233503
loss: 1.0604158639907837,grad_norm: 0.9999990585877022, iteration: 233504
loss: 0.9662880897521973,grad_norm: 0.9349036619328688, iteration: 233505
loss: 0.9837052822113037,grad_norm: 0.7467097112071304, iteration: 233506
loss: 0.9758440852165222,grad_norm: 0.9609777664109244, iteration: 233507
loss: 1.0283440351486206,grad_norm: 0.9071477346851837, iteration: 233508
loss: 1.0216312408447266,grad_norm: 0.999998965161427, iteration: 233509
loss: 1.01011323928833,grad_norm: 0.9229846337831961, iteration: 233510
loss: 0.9941253662109375,grad_norm: 0.9423828932484152, iteration: 233511
loss: 1.067965030670166,grad_norm: 0.9506642793024753, iteration: 233512
loss: 0.9961041212081909,grad_norm: 0.999999969531331, iteration: 233513
loss: 1.0125092267990112,grad_norm: 0.9999990274094686, iteration: 233514
loss: 1.0065808296203613,grad_norm: 0.9027220757509895, iteration: 233515
loss: 1.0251778364181519,grad_norm: 0.999999922247049, iteration: 233516
loss: 0.9623358249664307,grad_norm: 0.9999990988050075, iteration: 233517
loss: 1.020326018333435,grad_norm: 0.9610524604903926, iteration: 233518
loss: 1.0158448219299316,grad_norm: 0.9999990556312592, iteration: 233519
loss: 0.9631479978561401,grad_norm: 0.9744353587625239, iteration: 233520
loss: 1.009916067123413,grad_norm: 0.9225583511261461, iteration: 233521
loss: 0.9991961121559143,grad_norm: 0.842253229651741, iteration: 233522
loss: 0.9726865887641907,grad_norm: 0.860919694512731, iteration: 233523
loss: 1.0267043113708496,grad_norm: 0.7554093991382508, iteration: 233524
loss: 0.9897842407226562,grad_norm: 0.9999991621448252, iteration: 233525
loss: 1.0250279903411865,grad_norm: 0.881458288864593, iteration: 233526
loss: 0.9998107552528381,grad_norm: 0.8197343661773993, iteration: 233527
loss: 0.997627317905426,grad_norm: 0.8629444510595677, iteration: 233528
loss: 1.002292275428772,grad_norm: 0.8024888676978962, iteration: 233529
loss: 1.035208821296692,grad_norm: 0.8964370377212086, iteration: 233530
loss: 0.9693858027458191,grad_norm: 0.999999023028387, iteration: 233531
loss: 1.0041561126708984,grad_norm: 0.8840287837492457, iteration: 233532
loss: 0.9970631003379822,grad_norm: 0.7950293343716709, iteration: 233533
loss: 0.9903362393379211,grad_norm: 0.897513407002778, iteration: 233534
loss: 1.0775957107543945,grad_norm: 0.9999992939323284, iteration: 233535
loss: 1.163221001625061,grad_norm: 0.9999994089171541, iteration: 233536
loss: 1.0238027572631836,grad_norm: 0.9414564922183978, iteration: 233537
loss: 1.0261292457580566,grad_norm: 0.9999995883838432, iteration: 233538
loss: 1.029379963874817,grad_norm: 0.9504944216388056, iteration: 233539
loss: 1.0254000425338745,grad_norm: 0.9900753092581215, iteration: 233540
loss: 1.0092811584472656,grad_norm: 0.9999992335995118, iteration: 233541
loss: 0.9860048890113831,grad_norm: 0.9601136721489768, iteration: 233542
loss: 0.9770838618278503,grad_norm: 0.9524770017731153, iteration: 233543
loss: 0.9857315421104431,grad_norm: 0.8615519269202224, iteration: 233544
loss: 1.0691766738891602,grad_norm: 0.8594220713123724, iteration: 233545
loss: 0.9899116158485413,grad_norm: 0.9999991797729283, iteration: 233546
loss: 1.0074080228805542,grad_norm: 0.9532583936101308, iteration: 233547
loss: 0.9575092792510986,grad_norm: 0.8015276106487814, iteration: 233548
loss: 0.9610871076583862,grad_norm: 0.8419418214646796, iteration: 233549
loss: 0.9744981527328491,grad_norm: 0.7366884824485609, iteration: 233550
loss: 0.9676060080528259,grad_norm: 0.9999989593901197, iteration: 233551
loss: 1.069809079170227,grad_norm: 0.999999148734682, iteration: 233552
loss: 0.9860988259315491,grad_norm: 0.999999221641428, iteration: 233553
loss: 1.0363160371780396,grad_norm: 0.9999990146756772, iteration: 233554
loss: 0.9857162833213806,grad_norm: 0.7455112410055383, iteration: 233555
loss: 1.0015952587127686,grad_norm: 0.9999991624907764, iteration: 233556
loss: 1.0427120923995972,grad_norm: 0.9053478243378069, iteration: 233557
loss: 0.9817174673080444,grad_norm: 0.999999047346885, iteration: 233558
loss: 0.986626923084259,grad_norm: 0.9999991024394709, iteration: 233559
loss: 1.015988826751709,grad_norm: 0.9606385236367402, iteration: 233560
loss: 0.9831546545028687,grad_norm: 0.9999991274004837, iteration: 233561
loss: 1.0057791471481323,grad_norm: 0.7541477713939397, iteration: 233562
loss: 1.019930124282837,grad_norm: 0.9999990934023507, iteration: 233563
loss: 0.998320460319519,grad_norm: 0.8680921820864687, iteration: 233564
loss: 0.9832643270492554,grad_norm: 0.9697119997434832, iteration: 233565
loss: 1.0052194595336914,grad_norm: 0.8580456431712032, iteration: 233566
loss: 1.0087778568267822,grad_norm: 0.8497885000672929, iteration: 233567
loss: 1.0100994110107422,grad_norm: 0.9238231513125988, iteration: 233568
loss: 0.9904746413230896,grad_norm: 0.9999991801177186, iteration: 233569
loss: 1.011317253112793,grad_norm: 0.8220913465590705, iteration: 233570
loss: 1.0174134969711304,grad_norm: 0.9335327324266691, iteration: 233571
loss: 1.0194488763809204,grad_norm: 0.8872619959389699, iteration: 233572
loss: 0.973038911819458,grad_norm: 0.8952937949867127, iteration: 233573
loss: 1.0116227865219116,grad_norm: 0.9426138159236206, iteration: 233574
loss: 0.9436584115028381,grad_norm: 0.9408635927736324, iteration: 233575
loss: 0.9682726263999939,grad_norm: 0.9107169918805127, iteration: 233576
loss: 1.0012866258621216,grad_norm: 0.8719135966390664, iteration: 233577
loss: 1.0013993978500366,grad_norm: 0.8257204165746287, iteration: 233578
loss: 1.0144182443618774,grad_norm: 0.9999992152674912, iteration: 233579
loss: 0.9843925833702087,grad_norm: 0.84913489211489, iteration: 233580
loss: 1.0074297189712524,grad_norm: 0.8009915007782866, iteration: 233581
loss: 0.974478542804718,grad_norm: 0.8719563557484349, iteration: 233582
loss: 0.9838433265686035,grad_norm: 0.9470537197489926, iteration: 233583
loss: 1.018734335899353,grad_norm: 0.8523027594594746, iteration: 233584
loss: 1.01618492603302,grad_norm: 0.9205184223516344, iteration: 233585
loss: 1.0158640146255493,grad_norm: 0.8733743404672257, iteration: 233586
loss: 1.0481539964675903,grad_norm: 0.967023456853442, iteration: 233587
loss: 1.0035334825515747,grad_norm: 0.755883854677972, iteration: 233588
loss: 0.9836165308952332,grad_norm: 0.9895370580661921, iteration: 233589
loss: 1.0452139377593994,grad_norm: 0.9999991418152375, iteration: 233590
loss: 1.0123728513717651,grad_norm: 0.875016467900152, iteration: 233591
loss: 0.9955487847328186,grad_norm: 0.9479743708987989, iteration: 233592
loss: 1.0265581607818604,grad_norm: 0.9999991121545855, iteration: 233593
loss: 1.001353144645691,grad_norm: 0.9730762530722081, iteration: 233594
loss: 1.0056440830230713,grad_norm: 0.9999989182880918, iteration: 233595
loss: 1.0047346353530884,grad_norm: 0.893174912724595, iteration: 233596
loss: 0.9597266912460327,grad_norm: 0.8515907778570772, iteration: 233597
loss: 0.9588781595230103,grad_norm: 0.952688173072852, iteration: 233598
loss: 0.9911553263664246,grad_norm: 0.8069804045834528, iteration: 233599
loss: 1.0091396570205688,grad_norm: 0.9999991435309997, iteration: 233600
loss: 0.9681920409202576,grad_norm: 0.9999993015729939, iteration: 233601
loss: 1.000933051109314,grad_norm: 0.7888467198471576, iteration: 233602
loss: 0.991128146648407,grad_norm: 0.8680938311705125, iteration: 233603
loss: 1.0020149946212769,grad_norm: 0.8781051372407521, iteration: 233604
loss: 1.0090421438217163,grad_norm: 0.9999990328885349, iteration: 233605
loss: 0.9985406398773193,grad_norm: 0.750744885205843, iteration: 233606
loss: 0.9859946370124817,grad_norm: 0.844900084647919, iteration: 233607
loss: 1.014097809791565,grad_norm: 0.9999991284983932, iteration: 233608
loss: 1.0034360885620117,grad_norm: 0.9776908805364153, iteration: 233609
loss: 0.9934685826301575,grad_norm: 0.8762005900294433, iteration: 233610
loss: 0.9847385287284851,grad_norm: 0.8692642264486249, iteration: 233611
loss: 1.0123989582061768,grad_norm: 0.8306415022236159, iteration: 233612
loss: 0.9982427358627319,grad_norm: 0.8336414680791258, iteration: 233613
loss: 0.9936984777450562,grad_norm: 0.9645955232998754, iteration: 233614
loss: 1.1415873765945435,grad_norm: 0.9999996696278511, iteration: 233615
loss: 1.0026111602783203,grad_norm: 0.9999989971923993, iteration: 233616
loss: 1.0613670349121094,grad_norm: 1.0000000331351189, iteration: 233617
loss: 0.9700296521186829,grad_norm: 0.9091204053705242, iteration: 233618
loss: 1.0363049507141113,grad_norm: 0.754553718893671, iteration: 233619
loss: 0.9713170528411865,grad_norm: 0.8459698388565753, iteration: 233620
loss: 0.9898595809936523,grad_norm: 0.9330420232467437, iteration: 233621
loss: 1.0154060125350952,grad_norm: 0.9914249921870971, iteration: 233622
loss: 0.9817735552787781,grad_norm: 0.7727439316399628, iteration: 233623
loss: 1.0048527717590332,grad_norm: 0.9999990836987069, iteration: 233624
loss: 0.9417792558670044,grad_norm: 0.876908928595356, iteration: 233625
loss: 0.9674620032310486,grad_norm: 0.9337360445655108, iteration: 233626
loss: 0.9843217730522156,grad_norm: 0.929299985169985, iteration: 233627
loss: 1.0240058898925781,grad_norm: 0.8883494780087706, iteration: 233628
loss: 1.0248075723648071,grad_norm: 0.8474021292321985, iteration: 233629
loss: 1.0237261056900024,grad_norm: 0.9999995377594353, iteration: 233630
loss: 0.9874228835105896,grad_norm: 0.935345686260514, iteration: 233631
loss: 1.0129643678665161,grad_norm: 0.9999992181843501, iteration: 233632
loss: 0.984398365020752,grad_norm: 0.9999990815284191, iteration: 233633
loss: 0.9731448888778687,grad_norm: 0.9999990682794924, iteration: 233634
loss: 0.9892818927764893,grad_norm: 0.7437469705520502, iteration: 233635
loss: 1.0096994638442993,grad_norm: 0.999999136585387, iteration: 233636
loss: 1.0133239030838013,grad_norm: 0.9335090091634871, iteration: 233637
loss: 1.0434134006500244,grad_norm: 0.848285772122517, iteration: 233638
loss: 1.0110292434692383,grad_norm: 0.9999992629522886, iteration: 233639
loss: 1.0419414043426514,grad_norm: 0.999999403577439, iteration: 233640
loss: 1.029926061630249,grad_norm: 0.8945258172132015, iteration: 233641
loss: 1.021896243095398,grad_norm: 0.8495563566666926, iteration: 233642
loss: 1.0072938203811646,grad_norm: 0.9999997470589266, iteration: 233643
loss: 1.0444231033325195,grad_norm: 0.9770118504958372, iteration: 233644
loss: 1.0114805698394775,grad_norm: 0.8516103757834024, iteration: 233645
loss: 0.9900931715965271,grad_norm: 0.9749168530815081, iteration: 233646
loss: 0.987442135810852,grad_norm: 0.9420226109726129, iteration: 233647
loss: 0.9818864464759827,grad_norm: 0.999999153643139, iteration: 233648
loss: 0.9834003448486328,grad_norm: 0.9999992829020695, iteration: 233649
loss: 1.0045086145401,grad_norm: 0.9999991421324661, iteration: 233650
loss: 1.0084788799285889,grad_norm: 0.9999988948680512, iteration: 233651
loss: 1.1300735473632812,grad_norm: 0.9999992210318148, iteration: 233652
loss: 0.9940733909606934,grad_norm: 0.9147780777190233, iteration: 233653
loss: 0.9886142015457153,grad_norm: 0.999999002007715, iteration: 233654
loss: 1.002561092376709,grad_norm: 0.839058872758099, iteration: 233655
loss: 0.986722469329834,grad_norm: 0.9999990218103507, iteration: 233656
loss: 1.0183881521224976,grad_norm: 0.876479599535132, iteration: 233657
loss: 1.0083184242248535,grad_norm: 0.9264368927450998, iteration: 233658
loss: 1.014527440071106,grad_norm: 0.8470132304218213, iteration: 233659
loss: 0.9971510767936707,grad_norm: 0.9263067707911878, iteration: 233660
loss: 0.9953190684318542,grad_norm: 0.9999991036374937, iteration: 233661
loss: 1.008034110069275,grad_norm: 0.9529977997552339, iteration: 233662
loss: 0.9614879488945007,grad_norm: 0.9563348312654839, iteration: 233663
loss: 1.0417979955673218,grad_norm: 0.8196754526635002, iteration: 233664
loss: 1.0340617895126343,grad_norm: 0.7828526010797108, iteration: 233665
loss: 0.9952386021614075,grad_norm: 0.7978349954701724, iteration: 233666
loss: 0.9753897786140442,grad_norm: 0.9999992638103249, iteration: 233667
loss: 0.9814028739929199,grad_norm: 0.9999989456401965, iteration: 233668
loss: 0.9699903130531311,grad_norm: 0.9999997546169517, iteration: 233669
loss: 0.9820360541343689,grad_norm: 0.9500620774708235, iteration: 233670
loss: 0.9649357199668884,grad_norm: 0.9696397167791241, iteration: 233671
loss: 1.0140526294708252,grad_norm: 0.999999018738759, iteration: 233672
loss: 1.0278617143630981,grad_norm: 0.9999992377837497, iteration: 233673
loss: 1.0272330045700073,grad_norm: 0.9999991322742088, iteration: 233674
loss: 1.0005369186401367,grad_norm: 0.9772502747410261, iteration: 233675
loss: 0.9847617745399475,grad_norm: 0.9141048028753324, iteration: 233676
loss: 1.0137685537338257,grad_norm: 0.7694115748999286, iteration: 233677
loss: 0.9972913265228271,grad_norm: 0.8458573743926449, iteration: 233678
loss: 1.0074107646942139,grad_norm: 0.9999989707666491, iteration: 233679
loss: 1.0236724615097046,grad_norm: 0.945269308355876, iteration: 233680
loss: 1.0085229873657227,grad_norm: 0.826359727458782, iteration: 233681
loss: 1.016018271446228,grad_norm: 0.9895810238868823, iteration: 233682
loss: 1.0340906381607056,grad_norm: 0.9589337135151234, iteration: 233683
loss: 1.004530429840088,grad_norm: 0.8516170432749127, iteration: 233684
loss: 1.0310300588607788,grad_norm: 0.9601673817764063, iteration: 233685
loss: 1.024170994758606,grad_norm: 0.9999990059090521, iteration: 233686
loss: 0.974398672580719,grad_norm: 0.8512633322438403, iteration: 233687
loss: 1.0757993459701538,grad_norm: 0.9208764539266746, iteration: 233688
loss: 0.9962077140808105,grad_norm: 0.911025141550972, iteration: 233689
loss: 0.9956909418106079,grad_norm: 0.8866300921490012, iteration: 233690
loss: 0.9926009178161621,grad_norm: 0.9695120982772364, iteration: 233691
loss: 0.9856603741645813,grad_norm: 0.9231491287912728, iteration: 233692
loss: 1.0372166633605957,grad_norm: 0.9999991133454494, iteration: 233693
loss: 1.0359132289886475,grad_norm: 0.9583346636052502, iteration: 233694
loss: 1.0350627899169922,grad_norm: 0.9999991536816232, iteration: 233695
loss: 1.0000442266464233,grad_norm: 0.9837814534661612, iteration: 233696
loss: 1.0212191343307495,grad_norm: 0.8555939780367161, iteration: 233697
loss: 0.9878373146057129,grad_norm: 0.8224687544130024, iteration: 233698
loss: 0.9868986010551453,grad_norm: 0.9285947686139402, iteration: 233699
loss: 0.9993550181388855,grad_norm: 0.9337720268529893, iteration: 233700
loss: 0.9554682970046997,grad_norm: 0.9030070221182621, iteration: 233701
loss: 1.0061498880386353,grad_norm: 0.9999991101060028, iteration: 233702
loss: 1.0162922143936157,grad_norm: 0.9999991023546762, iteration: 233703
loss: 1.0038599967956543,grad_norm: 0.9003791583396077, iteration: 233704
loss: 1.0017385482788086,grad_norm: 0.9999988565691579, iteration: 233705
loss: 1.0002599954605103,grad_norm: 0.9276146019418751, iteration: 233706
loss: 0.9710169434547424,grad_norm: 0.8979207722681074, iteration: 233707
loss: 1.00388765335083,grad_norm: 0.9240037038664493, iteration: 233708
loss: 0.9988328814506531,grad_norm: 0.8317787700782695, iteration: 233709
loss: 1.0251710414886475,grad_norm: 0.9577660327796464, iteration: 233710
loss: 1.0006214380264282,grad_norm: 0.971453181127737, iteration: 233711
loss: 1.001818299293518,grad_norm: 0.9353196187323842, iteration: 233712
loss: 1.0169532299041748,grad_norm: 0.9999991686122472, iteration: 233713
loss: 0.9618115425109863,grad_norm: 0.9474110586252116, iteration: 233714
loss: 0.9709346294403076,grad_norm: 0.8642195328309434, iteration: 233715
loss: 0.9884772300720215,grad_norm: 0.9999989175582678, iteration: 233716
loss: 1.0143033266067505,grad_norm: 0.8983081345508173, iteration: 233717
loss: 1.0330734252929688,grad_norm: 0.9380507525418404, iteration: 233718
loss: 1.010573387145996,grad_norm: 0.9271562590105425, iteration: 233719
loss: 1.0108059644699097,grad_norm: 0.8851281121304563, iteration: 233720
loss: 1.0166256427764893,grad_norm: 0.9945315527441154, iteration: 233721
loss: 0.9890554547309875,grad_norm: 0.9963343198121769, iteration: 233722
loss: 0.9824979901313782,grad_norm: 0.9373642614343467, iteration: 233723
loss: 0.992395281791687,grad_norm: 0.9649241021183025, iteration: 233724
loss: 1.0029776096343994,grad_norm: 0.8904606995401467, iteration: 233725
loss: 0.9864989519119263,grad_norm: 0.9709581319808388, iteration: 233726
loss: 1.008169174194336,grad_norm: 0.9004183262702495, iteration: 233727
loss: 0.9964021444320679,grad_norm: 0.9465679989180333, iteration: 233728
loss: 0.9842195510864258,grad_norm: 0.9866324191126188, iteration: 233729
loss: 0.9907413721084595,grad_norm: 0.9999991551029561, iteration: 233730
loss: 1.0251208543777466,grad_norm: 0.9601681062028605, iteration: 233731
loss: 0.9788066148757935,grad_norm: 0.9999991001145372, iteration: 233732
loss: 1.0308033227920532,grad_norm: 0.9999990833183232, iteration: 233733
loss: 0.975649356842041,grad_norm: 0.9510423638295235, iteration: 233734
loss: 1.0420165061950684,grad_norm: 0.9232075072999398, iteration: 233735
loss: 1.0156502723693848,grad_norm: 0.905580355312502, iteration: 233736
loss: 0.9920533895492554,grad_norm: 0.8799021171975654, iteration: 233737
loss: 0.9987526535987854,grad_norm: 0.9584631321912476, iteration: 233738
loss: 0.9653319716453552,grad_norm: 0.9303535619001922, iteration: 233739
loss: 1.0526466369628906,grad_norm: 0.999999168588314, iteration: 233740
loss: 1.0048940181732178,grad_norm: 0.9999990460020095, iteration: 233741
loss: 1.0024358034133911,grad_norm: 0.9973267995433945, iteration: 233742
loss: 0.9798967242240906,grad_norm: 0.895080685555092, iteration: 233743
loss: 1.0128142833709717,grad_norm: 0.8752959375180625, iteration: 233744
loss: 1.0031886100769043,grad_norm: 0.9999999936519958, iteration: 233745
loss: 0.9917364120483398,grad_norm: 0.9001360853552658, iteration: 233746
loss: 0.9685915112495422,grad_norm: 0.9303342654186482, iteration: 233747
loss: 0.9929391741752625,grad_norm: 0.7474049012252165, iteration: 233748
loss: 0.9539886116981506,grad_norm: 0.999998992707448, iteration: 233749
loss: 0.9992408156394958,grad_norm: 0.9315270910788656, iteration: 233750
loss: 1.066175937652588,grad_norm: 0.9999995376560027, iteration: 233751
loss: 1.030033826828003,grad_norm: 0.9434497450806194, iteration: 233752
loss: 1.0043169260025024,grad_norm: 0.9999989103142336, iteration: 233753
loss: 0.9903294444084167,grad_norm: 0.7948295744391429, iteration: 233754
loss: 0.9595293998718262,grad_norm: 0.9339518809347455, iteration: 233755
loss: 1.0168845653533936,grad_norm: 0.9999990950304855, iteration: 233756
loss: 0.9841740131378174,grad_norm: 0.8852408424447983, iteration: 233757
loss: 1.0275839567184448,grad_norm: 0.7922573831484815, iteration: 233758
loss: 0.9964814186096191,grad_norm: 0.9623595926300722, iteration: 233759
loss: 0.9965919256210327,grad_norm: 0.8803754116300151, iteration: 233760
loss: 1.0399450063705444,grad_norm: 0.9346272388908649, iteration: 233761
loss: 1.0152666568756104,grad_norm: 0.9685482279788417, iteration: 233762
loss: 1.0133213996887207,grad_norm: 0.779016345015685, iteration: 233763
loss: 1.0273765325546265,grad_norm: 0.8481538043509681, iteration: 233764
loss: 0.9925384521484375,grad_norm: 0.8330395645140836, iteration: 233765
loss: 0.9967828989028931,grad_norm: 0.9539565999279326, iteration: 233766
loss: 1.0096559524536133,grad_norm: 0.9999997663287913, iteration: 233767
loss: 1.0027530193328857,grad_norm: 0.8412049361938357, iteration: 233768
loss: 0.995459794998169,grad_norm: 0.8591980869925139, iteration: 233769
loss: 0.9828239679336548,grad_norm: 0.9822992317588454, iteration: 233770
loss: 1.0069594383239746,grad_norm: 0.8322061435689457, iteration: 233771
loss: 1.0383011102676392,grad_norm: 0.8880529927080119, iteration: 233772
loss: 1.0280338525772095,grad_norm: 0.9999991877580794, iteration: 233773
loss: 1.0192581415176392,grad_norm: 0.8748930580836558, iteration: 233774
loss: 0.9758450388908386,grad_norm: 0.9106028640524296, iteration: 233775
loss: 1.0098904371261597,grad_norm: 0.9581990215447571, iteration: 233776
loss: 1.0214356184005737,grad_norm: 0.9999991566019124, iteration: 233777
loss: 1.0571160316467285,grad_norm: 0.8624570399295263, iteration: 233778
loss: 0.9899479150772095,grad_norm: 0.7859549303881179, iteration: 233779
loss: 1.0034615993499756,grad_norm: 0.8305909799079179, iteration: 233780
loss: 0.9782714247703552,grad_norm: 0.8725999481329623, iteration: 233781
loss: 0.9754189848899841,grad_norm: 0.8336888849323315, iteration: 233782
loss: 0.9921699166297913,grad_norm: 0.8444367877532934, iteration: 233783
loss: 1.0147284269332886,grad_norm: 0.8119612473658988, iteration: 233784
loss: 1.078307867050171,grad_norm: 0.9999992833602592, iteration: 233785
loss: 1.0169709920883179,grad_norm: 0.9999991673695301, iteration: 233786
loss: 0.9847426414489746,grad_norm: 0.8629319293357486, iteration: 233787
loss: 1.030242919921875,grad_norm: 0.9999998286475268, iteration: 233788
loss: 0.9782419800758362,grad_norm: 0.9251376142568185, iteration: 233789
loss: 0.9798272252082825,grad_norm: 0.9999996237743639, iteration: 233790
loss: 0.9579899907112122,grad_norm: 0.8210307823004734, iteration: 233791
loss: 1.0201250314712524,grad_norm: 0.8561296571628814, iteration: 233792
loss: 0.9578035473823547,grad_norm: 0.7579338565660658, iteration: 233793
loss: 1.0290664434432983,grad_norm: 0.9999992170996355, iteration: 233794
loss: 1.009468674659729,grad_norm: 0.9687599267366752, iteration: 233795
loss: 1.0150377750396729,grad_norm: 0.9239090041584711, iteration: 233796
loss: 1.1042227745056152,grad_norm: 0.9999993215174207, iteration: 233797
loss: 0.987903356552124,grad_norm: 0.7464979976448168, iteration: 233798
loss: 0.9989264011383057,grad_norm: 0.7893850998093307, iteration: 233799
loss: 0.9516386985778809,grad_norm: 0.9416568508045955, iteration: 233800
loss: 0.9818230271339417,grad_norm: 0.9674891441693136, iteration: 233801
loss: 0.9631335735321045,grad_norm: 0.9999990813945372, iteration: 233802
loss: 1.0003105401992798,grad_norm: 0.9993879684398721, iteration: 233803
loss: 1.0362364053726196,grad_norm: 0.9999992938922818, iteration: 233804
loss: 1.038996696472168,grad_norm: 0.9627288317915121, iteration: 233805
loss: 0.9925413131713867,grad_norm: 0.9063588366115873, iteration: 233806
loss: 0.9962244629859924,grad_norm: 0.9251496244354207, iteration: 233807
loss: 0.979261577129364,grad_norm: 0.9492802091726332, iteration: 233808
loss: 0.9957861304283142,grad_norm: 0.8543716948961936, iteration: 233809
loss: 1.0034399032592773,grad_norm: 0.9999990071025081, iteration: 233810
loss: 0.9755532741546631,grad_norm: 0.9322888229063339, iteration: 233811
loss: 0.9974995851516724,grad_norm: 0.9999989914364427, iteration: 233812
loss: 0.9721331000328064,grad_norm: 0.9598073869730506, iteration: 233813
loss: 0.9905366897583008,grad_norm: 0.8381767409630364, iteration: 233814
loss: 0.998707115650177,grad_norm: 0.786203481066334, iteration: 233815
loss: 1.0302042961120605,grad_norm: 0.9057793419986073, iteration: 233816
loss: 1.02591872215271,grad_norm: 0.9999991263720273, iteration: 233817
loss: 1.020198106765747,grad_norm: 0.9724094087284901, iteration: 233818
loss: 1.0334850549697876,grad_norm: 0.7622283667691242, iteration: 233819
loss: 0.9727609753608704,grad_norm: 0.8805510690677754, iteration: 233820
loss: 1.0101622343063354,grad_norm: 0.8488530606547111, iteration: 233821
loss: 0.9874609112739563,grad_norm: 0.8217123157798301, iteration: 233822
loss: 0.9898318648338318,grad_norm: 0.9999991015254698, iteration: 233823
loss: 0.9932091236114502,grad_norm: 0.999999310281225, iteration: 233824
loss: 0.9786264896392822,grad_norm: 0.8113477792105279, iteration: 233825
loss: 1.0277997255325317,grad_norm: 0.8665298183233596, iteration: 233826
loss: 1.0198054313659668,grad_norm: 0.9360974976896428, iteration: 233827
loss: 0.9935266971588135,grad_norm: 0.9356196977638285, iteration: 233828
loss: 1.0030118227005005,grad_norm: 0.889299769826943, iteration: 233829
loss: 0.997693657875061,grad_norm: 0.8427377044378719, iteration: 233830
loss: 1.0009828805923462,grad_norm: 0.9999991164340378, iteration: 233831
loss: 1.0130661725997925,grad_norm: 0.999999057329595, iteration: 233832
loss: 0.9748485684394836,grad_norm: 0.8688183556344014, iteration: 233833
loss: 0.9964619874954224,grad_norm: 0.8258993144337861, iteration: 233834
loss: 1.0042279958724976,grad_norm: 0.9849552117389495, iteration: 233835
loss: 0.9754440188407898,grad_norm: 0.8826246288627387, iteration: 233836
loss: 0.990035355091095,grad_norm: 0.9635604419564364, iteration: 233837
loss: 0.9980738759040833,grad_norm: 0.9999991318309928, iteration: 233838
loss: 0.9991005659103394,grad_norm: 0.9675709556004966, iteration: 233839
loss: 0.9604335427284241,grad_norm: 0.9463346395922864, iteration: 233840
loss: 1.002048373222351,grad_norm: 0.9999991570639832, iteration: 233841
loss: 1.0195318460464478,grad_norm: 0.8695421644404938, iteration: 233842
loss: 1.0087848901748657,grad_norm: 0.965767894940266, iteration: 233843
loss: 1.0119106769561768,grad_norm: 0.999999494558879, iteration: 233844
loss: 0.995772123336792,grad_norm: 0.9540415500403269, iteration: 233845
loss: 0.9806963205337524,grad_norm: 0.90915493849285, iteration: 233846
loss: 0.977725625038147,grad_norm: 0.9167109574148818, iteration: 233847
loss: 1.0958400964736938,grad_norm: 0.9999998758857025, iteration: 233848
loss: 0.9848050475120544,grad_norm: 0.9685363005818131, iteration: 233849
loss: 0.9981796145439148,grad_norm: 0.9999990981651234, iteration: 233850
loss: 1.0076775550842285,grad_norm: 0.8654705633212085, iteration: 233851
loss: 1.0231305360794067,grad_norm: 0.999999844410275, iteration: 233852
loss: 0.9807810187339783,grad_norm: 0.9999991519440221, iteration: 233853
loss: 1.133542537689209,grad_norm: 0.9999991112955371, iteration: 233854
loss: 0.999674379825592,grad_norm: 0.8921635838255159, iteration: 233855
loss: 1.030646562576294,grad_norm: 0.9205786328174171, iteration: 233856
loss: 1.0136216878890991,grad_norm: 0.9999991979080084, iteration: 233857
loss: 1.0188790559768677,grad_norm: 0.9999989102702715, iteration: 233858
loss: 1.087648630142212,grad_norm: 0.9999990998237359, iteration: 233859
loss: 1.160049557685852,grad_norm: 0.9999997308502774, iteration: 233860
loss: 1.201798915863037,grad_norm: 0.9999999389622699, iteration: 233861
loss: 1.0363783836364746,grad_norm: 0.9999999734705723, iteration: 233862
loss: 1.014815330505371,grad_norm: 0.8606958858818979, iteration: 233863
loss: 1.0786834955215454,grad_norm: 0.8971247504549031, iteration: 233864
loss: 0.9798036813735962,grad_norm: 0.8282049037700633, iteration: 233865
loss: 0.9805660843849182,grad_norm: 0.7462452624864307, iteration: 233866
loss: 1.005717158317566,grad_norm: 0.8267750647309787, iteration: 233867
loss: 1.1829150915145874,grad_norm: 0.9999996681165886, iteration: 233868
loss: 1.1889413595199585,grad_norm: 0.9999990965947023, iteration: 233869
loss: 1.0833501815795898,grad_norm: 0.9999996794516351, iteration: 233870
loss: 1.189855694770813,grad_norm: 0.9999999535009916, iteration: 233871
loss: 1.1809617280960083,grad_norm: 0.8680946293473151, iteration: 233872
loss: 1.4379521608352661,grad_norm: 0.9999999529576706, iteration: 233873
loss: 1.1303932666778564,grad_norm: 0.9999998701067683, iteration: 233874
loss: 1.1220825910568237,grad_norm: 0.9999998124097724, iteration: 233875
loss: 1.0341887474060059,grad_norm: 0.9605802377954342, iteration: 233876
loss: 1.0875730514526367,grad_norm: 0.8992196663610074, iteration: 233877
loss: 1.0039193630218506,grad_norm: 0.9999994994893977, iteration: 233878
loss: 1.1449846029281616,grad_norm: 0.9999998679660956, iteration: 233879
loss: 1.2092721462249756,grad_norm: 0.9999999193078631, iteration: 233880
loss: 1.178995966911316,grad_norm: 0.99999909213518, iteration: 233881
loss: 1.0580490827560425,grad_norm: 0.9776712717580469, iteration: 233882
loss: 1.3767765760421753,grad_norm: 0.9999997670085495, iteration: 233883
loss: 1.242692470550537,grad_norm: 0.9999999125337884, iteration: 233884
loss: 1.397552490234375,grad_norm: 0.9999996679299672, iteration: 233885
loss: 1.0679322481155396,grad_norm: 0.9999989983339713, iteration: 233886
loss: 1.3566875457763672,grad_norm: 0.9999996966035388, iteration: 233887
loss: 1.0581164360046387,grad_norm: 0.9848898768631744, iteration: 233888
loss: 0.9966099262237549,grad_norm: 0.8547792311412451, iteration: 233889
loss: 1.0448975563049316,grad_norm: 0.9999992858943197, iteration: 233890
loss: 0.9875243902206421,grad_norm: 0.8550583932927517, iteration: 233891
loss: 1.0219320058822632,grad_norm: 0.9999990351396788, iteration: 233892
loss: 1.0779396295547485,grad_norm: 0.9999996968670196, iteration: 233893
loss: 1.006123423576355,grad_norm: 0.9999996758820596, iteration: 233894
loss: 1.0561336278915405,grad_norm: 0.9999998865988928, iteration: 233895
loss: 1.0236176252365112,grad_norm: 0.9999992657816166, iteration: 233896
loss: 1.0503116846084595,grad_norm: 0.9999991896302715, iteration: 233897
loss: 1.012136697769165,grad_norm: 0.9999994556655109, iteration: 233898
loss: 1.1441529989242554,grad_norm: 0.999999084150632, iteration: 233899
loss: 1.0096465349197388,grad_norm: 0.8248565886833249, iteration: 233900
loss: 1.0059691667556763,grad_norm: 0.999999060791109, iteration: 233901
loss: 1.028450608253479,grad_norm: 0.9956888600524666, iteration: 233902
loss: 0.9773620963096619,grad_norm: 0.9308843466185666, iteration: 233903
loss: 1.0085102319717407,grad_norm: 0.8654977542743271, iteration: 233904
loss: 1.003575086593628,grad_norm: 0.8898882544657795, iteration: 233905
loss: 1.001381278038025,grad_norm: 0.7894664124530989, iteration: 233906
loss: 1.0355838537216187,grad_norm: 0.9999992008224042, iteration: 233907
loss: 0.9758750796318054,grad_norm: 0.9071608189377194, iteration: 233908
loss: 0.9943146705627441,grad_norm: 0.9876447472058182, iteration: 233909
loss: 1.0071690082550049,grad_norm: 0.9999992094339487, iteration: 233910
loss: 1.0116323232650757,grad_norm: 0.912721303984745, iteration: 233911
loss: 0.9901102185249329,grad_norm: 0.9999995935149593, iteration: 233912
loss: 1.0292603969573975,grad_norm: 0.9999992220448322, iteration: 233913
loss: 0.9994490742683411,grad_norm: 0.8238923336609432, iteration: 233914
loss: 1.0199637413024902,grad_norm: 0.9999991362536227, iteration: 233915
loss: 1.0052176713943481,grad_norm: 0.9999990597978081, iteration: 233916
loss: 0.9964691400527954,grad_norm: 0.9999991272003697, iteration: 233917
loss: 1.0238699913024902,grad_norm: 0.9242751414096272, iteration: 233918
loss: 0.98990398645401,grad_norm: 0.9595303277241796, iteration: 233919
loss: 1.002226710319519,grad_norm: 0.9999991097466331, iteration: 233920
loss: 1.0485273599624634,grad_norm: 0.9999994268356343, iteration: 233921
loss: 0.9934048652648926,grad_norm: 0.99999897879216, iteration: 233922
loss: 1.0051897764205933,grad_norm: 0.9999990993676671, iteration: 233923
loss: 1.0314193964004517,grad_norm: 0.8351650055827071, iteration: 233924
loss: 1.0145288705825806,grad_norm: 0.9566808970036825, iteration: 233925
loss: 1.0038622617721558,grad_norm: 0.8982793889096577, iteration: 233926
loss: 0.9906172752380371,grad_norm: 0.9879951580625606, iteration: 233927
loss: 1.002599835395813,grad_norm: 0.8860548085242074, iteration: 233928
loss: 1.0416117906570435,grad_norm: 0.9632538030445607, iteration: 233929
loss: 1.1937575340270996,grad_norm: 0.8637369071011257, iteration: 233930
loss: 1.0251669883728027,grad_norm: 0.9999991323820068, iteration: 233931
loss: 1.0014433860778809,grad_norm: 0.9999990549440855, iteration: 233932
loss: 0.9964357614517212,grad_norm: 0.8001944307150508, iteration: 233933
loss: 0.9951801896095276,grad_norm: 0.8636611238707058, iteration: 233934
loss: 1.029036045074463,grad_norm: 0.9999998902015714, iteration: 233935
loss: 1.004762887954712,grad_norm: 0.9999991551333222, iteration: 233936
loss: 1.0029287338256836,grad_norm: 0.819373951425839, iteration: 233937
loss: 0.9693068861961365,grad_norm: 0.9999993218480708, iteration: 233938
loss: 1.0060938596725464,grad_norm: 0.9999994813911957, iteration: 233939
loss: 0.990253746509552,grad_norm: 0.9999991769560643, iteration: 233940
loss: 1.0267161130905151,grad_norm: 0.9999999493709546, iteration: 233941
loss: 1.060624122619629,grad_norm: 0.9999996490715307, iteration: 233942
loss: 1.0059319734573364,grad_norm: 0.9999992683871872, iteration: 233943
loss: 1.0872128009796143,grad_norm: 0.9112346966923329, iteration: 233944
loss: 0.9915469288825989,grad_norm: 0.7629820244450819, iteration: 233945
loss: 1.0022838115692139,grad_norm: 0.8837754399058689, iteration: 233946
loss: 0.9482821822166443,grad_norm: 0.9150118174949614, iteration: 233947
loss: 0.9721052050590515,grad_norm: 0.861002556159331, iteration: 233948
loss: 0.9964780807495117,grad_norm: 0.9999992570513361, iteration: 233949
loss: 1.0102839469909668,grad_norm: 0.9999994808377815, iteration: 233950
loss: 0.9891055822372437,grad_norm: 0.7988909391848596, iteration: 233951
loss: 0.9951326847076416,grad_norm: 0.9999992044778766, iteration: 233952
loss: 1.0156199932098389,grad_norm: 0.9546852570393816, iteration: 233953
loss: 1.0135775804519653,grad_norm: 0.9999991191905068, iteration: 233954
loss: 0.9777895212173462,grad_norm: 0.9999991253634126, iteration: 233955
loss: 1.157725214958191,grad_norm: 0.963603087904854, iteration: 233956
loss: 0.9985003471374512,grad_norm: 0.8887205686604746, iteration: 233957
loss: 1.0210576057434082,grad_norm: 0.9999990083445989, iteration: 233958
loss: 0.962128221988678,grad_norm: 0.9999989349832084, iteration: 233959
loss: 0.9919926524162292,grad_norm: 0.9999990038530859, iteration: 233960
loss: 1.006197452545166,grad_norm: 0.9451716311295996, iteration: 233961
loss: 0.9945892691612244,grad_norm: 0.999999356306121, iteration: 233962
loss: 1.0006873607635498,grad_norm: 0.9945222361378844, iteration: 233963
loss: 1.0063050985336304,grad_norm: 0.9999990893692149, iteration: 233964
loss: 0.9981755614280701,grad_norm: 0.9542068156675936, iteration: 233965
loss: 1.0354657173156738,grad_norm: 0.9576799583963083, iteration: 233966
loss: 1.0068951845169067,grad_norm: 0.9559734421848105, iteration: 233967
loss: 0.9943537712097168,grad_norm: 0.9583260566858011, iteration: 233968
loss: 1.0105243921279907,grad_norm: 0.9464311049807387, iteration: 233969
loss: 1.0190367698669434,grad_norm: 0.9151058651540072, iteration: 233970
loss: 1.012858271598816,grad_norm: 0.9999993550108397, iteration: 233971
loss: 0.9961051940917969,grad_norm: 0.999999450996678, iteration: 233972
loss: 1.0284802913665771,grad_norm: 0.8675774254035569, iteration: 233973
loss: 1.0438544750213623,grad_norm: 0.9999994615033749, iteration: 233974
loss: 0.9900619387626648,grad_norm: 0.9911354712258551, iteration: 233975
loss: 0.9612874388694763,grad_norm: 0.8952896900018911, iteration: 233976
loss: 0.9807159304618835,grad_norm: 0.785940591516073, iteration: 233977
loss: 1.0653107166290283,grad_norm: 0.999999820138569, iteration: 233978
loss: 0.9852252006530762,grad_norm: 0.9999990568718727, iteration: 233979
loss: 1.018660068511963,grad_norm: 0.9999993068134954, iteration: 233980
loss: 1.0151777267456055,grad_norm: 0.9999992658587795, iteration: 233981
loss: 1.06266188621521,grad_norm: 0.9999994089094295, iteration: 233982
loss: 1.017982840538025,grad_norm: 0.8440988411054965, iteration: 233983
loss: 0.9747915267944336,grad_norm: 0.9999994922611206, iteration: 233984
loss: 0.9899755716323853,grad_norm: 0.8650697044230684, iteration: 233985
loss: 1.024359941482544,grad_norm: 0.9662570504936873, iteration: 233986
loss: 1.0175673961639404,grad_norm: 0.7848570828716095, iteration: 233987
loss: 1.0173510313034058,grad_norm: 0.8962789287712014, iteration: 233988
loss: 1.0152488946914673,grad_norm: 0.908326451193395, iteration: 233989
loss: 0.9848076701164246,grad_norm: 0.7865944699533434, iteration: 233990
loss: 1.027764081954956,grad_norm: 0.9228660087324674, iteration: 233991
loss: 0.9895641207695007,grad_norm: 0.9999998201055817, iteration: 233992
loss: 1.0262324810028076,grad_norm: 0.9719948767349137, iteration: 233993
loss: 1.0327757596969604,grad_norm: 0.9999990751101507, iteration: 233994
loss: 0.9954774379730225,grad_norm: 0.8234623676142373, iteration: 233995
loss: 1.0332832336425781,grad_norm: 0.8752424500203798, iteration: 233996
loss: 0.9756592512130737,grad_norm: 0.8751353205409419, iteration: 233997
loss: 1.002855896949768,grad_norm: 0.7752742908977208, iteration: 233998
loss: 0.9791861772537231,grad_norm: 0.9460022205075386, iteration: 233999
loss: 0.9878228306770325,grad_norm: 0.7990016556181602, iteration: 234000
loss: 1.0077756643295288,grad_norm: 0.7959135378789381, iteration: 234001
loss: 0.9672581553459167,grad_norm: 0.9233867480470831, iteration: 234002
loss: 1.0160439014434814,grad_norm: 0.999999250960384, iteration: 234003
loss: 1.006001353263855,grad_norm: 0.9920176633596755, iteration: 234004
loss: 0.9885596632957458,grad_norm: 0.9538603499760991, iteration: 234005
loss: 1.0188453197479248,grad_norm: 0.8723591078957262, iteration: 234006
loss: 0.9724783897399902,grad_norm: 0.8692552515407884, iteration: 234007
loss: 1.0373802185058594,grad_norm: 0.9999992066254455, iteration: 234008
loss: 1.0527644157409668,grad_norm: 0.8064698344743817, iteration: 234009
loss: 1.0044933557510376,grad_norm: 0.9999992190962483, iteration: 234010
loss: 0.9759804010391235,grad_norm: 0.9624968900306358, iteration: 234011
loss: 1.0183249711990356,grad_norm: 0.9128170704736724, iteration: 234012
loss: 1.0179001092910767,grad_norm: 0.9999993173156044, iteration: 234013
loss: 0.9681567549705505,grad_norm: 0.9394256963663311, iteration: 234014
loss: 0.9963692426681519,grad_norm: 0.9398357213181945, iteration: 234015
loss: 1.0185868740081787,grad_norm: 0.8277938776236873, iteration: 234016
loss: 0.989899754524231,grad_norm: 0.9888958710704379, iteration: 234017
loss: 1.0269765853881836,grad_norm: 0.9999992302719592, iteration: 234018
loss: 1.0062530040740967,grad_norm: 0.999999201234994, iteration: 234019
loss: 0.9947613477706909,grad_norm: 0.9851646231989123, iteration: 234020
loss: 0.982277512550354,grad_norm: 0.9999991903248048, iteration: 234021
loss: 1.0435467958450317,grad_norm: 0.9999998376320381, iteration: 234022
loss: 1.0762466192245483,grad_norm: 0.9999992141565086, iteration: 234023
loss: 0.992868959903717,grad_norm: 0.9999992403875426, iteration: 234024
loss: 0.9787630438804626,grad_norm: 0.8962051393321251, iteration: 234025
loss: 0.9802650809288025,grad_norm: 0.8415732413983626, iteration: 234026
loss: 1.0059850215911865,grad_norm: 0.9999990946797646, iteration: 234027
loss: 0.9748309850692749,grad_norm: 0.9999989701101742, iteration: 234028
loss: 0.9976232647895813,grad_norm: 0.8913781758762549, iteration: 234029
loss: 1.015398621559143,grad_norm: 0.9999990920970937, iteration: 234030
loss: 0.9813933968544006,grad_norm: 0.8001856769107266, iteration: 234031
loss: 0.9972854852676392,grad_norm: 0.9602415312673875, iteration: 234032
loss: 0.9637618064880371,grad_norm: 0.8593460772610143, iteration: 234033
loss: 1.162028193473816,grad_norm: 0.9999999288779817, iteration: 234034
loss: 0.9838167428970337,grad_norm: 0.9999991266985103, iteration: 234035
loss: 0.957938551902771,grad_norm: 0.8475708812789554, iteration: 234036
loss: 1.0043656826019287,grad_norm: 0.8356720452509702, iteration: 234037
loss: 1.0638350248336792,grad_norm: 0.9999998805886939, iteration: 234038
loss: 0.9827624559402466,grad_norm: 0.8268679665554385, iteration: 234039
loss: 0.989958643913269,grad_norm: 0.9379732296351403, iteration: 234040
loss: 0.9761549830436707,grad_norm: 0.999999164033718, iteration: 234041
loss: 0.9958931803703308,grad_norm: 0.9999996803406862, iteration: 234042
loss: 0.9983076453208923,grad_norm: 0.9991052867178928, iteration: 234043
loss: 1.0847276449203491,grad_norm: 0.9999995161690098, iteration: 234044
loss: 1.0039564371109009,grad_norm: 0.9999990024797937, iteration: 234045
loss: 1.0707249641418457,grad_norm: 0.9999996783747062, iteration: 234046
loss: 1.0563733577728271,grad_norm: 0.9999996836638803, iteration: 234047
loss: 0.9965375065803528,grad_norm: 0.9999991413430241, iteration: 234048
loss: 0.9747039675712585,grad_norm: 0.9291963222394696, iteration: 234049
loss: 1.0047842264175415,grad_norm: 0.9999990123336109, iteration: 234050
loss: 1.0182011127471924,grad_norm: 0.8714513249317953, iteration: 234051
loss: 1.0232867002487183,grad_norm: 0.9553380464744462, iteration: 234052
loss: 0.9820125699043274,grad_norm: 0.8276392000630752, iteration: 234053
loss: 1.0312671661376953,grad_norm: 0.988032355307787, iteration: 234054
loss: 0.9742505550384521,grad_norm: 0.9431223799610202, iteration: 234055
loss: 1.2390350103378296,grad_norm: 0.9999999074329678, iteration: 234056
loss: 1.1242656707763672,grad_norm: 0.9999993247578515, iteration: 234057
loss: 0.9990732073783875,grad_norm: 0.8546967375531368, iteration: 234058
loss: 1.0212854146957397,grad_norm: 0.8993058570271298, iteration: 234059
loss: 1.0131386518478394,grad_norm: 0.7979113386718996, iteration: 234060
loss: 1.0034892559051514,grad_norm: 0.9999992688698904, iteration: 234061
loss: 1.1619755029678345,grad_norm: 0.9999996650664006, iteration: 234062
loss: 0.9807825684547424,grad_norm: 0.8578902211648836, iteration: 234063
loss: 0.9858881235122681,grad_norm: 0.984452777621517, iteration: 234064
loss: 1.0845035314559937,grad_norm: 0.8651079162152069, iteration: 234065
loss: 1.0234386920928955,grad_norm: 0.8449680910455296, iteration: 234066
loss: 1.0257563591003418,grad_norm: 0.965159841875764, iteration: 234067
loss: 1.0322896242141724,grad_norm: 0.999999123886406, iteration: 234068
loss: 1.0702857971191406,grad_norm: 0.9966410073194177, iteration: 234069
loss: 1.0642515420913696,grad_norm: 0.9999992115961955, iteration: 234070
loss: 1.017399787902832,grad_norm: 0.8633248583812513, iteration: 234071
loss: 0.9840191602706909,grad_norm: 0.9999991150202012, iteration: 234072
loss: 0.9803999662399292,grad_norm: 0.9999992585034186, iteration: 234073
loss: 1.0202592611312866,grad_norm: 0.8652467105782782, iteration: 234074
loss: 0.9921677708625793,grad_norm: 0.898997835120075, iteration: 234075
loss: 0.9772061109542847,grad_norm: 0.9533149531156712, iteration: 234076
loss: 0.9863432049751282,grad_norm: 0.866955385895353, iteration: 234077
loss: 0.9742306470870972,grad_norm: 0.8740857550793383, iteration: 234078
loss: 0.9644601941108704,grad_norm: 0.8893699966510039, iteration: 234079
loss: 0.9929667711257935,grad_norm: 0.999999185810831, iteration: 234080
loss: 0.9707695245742798,grad_norm: 0.9195282733016327, iteration: 234081
loss: 0.9512183666229248,grad_norm: 0.8570052465145445, iteration: 234082
loss: 0.960390567779541,grad_norm: 0.9999993040503924, iteration: 234083
loss: 1.0037320852279663,grad_norm: 0.9182637384906225, iteration: 234084
loss: 1.032780647277832,grad_norm: 0.8451314361987086, iteration: 234085
loss: 1.0095995664596558,grad_norm: 0.9999990764256558, iteration: 234086
loss: 1.013537883758545,grad_norm: 0.9999993017407425, iteration: 234087
loss: 1.001037359237671,grad_norm: 0.9075522188124338, iteration: 234088
loss: 0.9887723922729492,grad_norm: 0.8328258938147056, iteration: 234089
loss: 1.0201590061187744,grad_norm: 0.7281293815150712, iteration: 234090
loss: 0.9961202144622803,grad_norm: 0.999999090818605, iteration: 234091
loss: 0.9778171181678772,grad_norm: 0.9132829735566458, iteration: 234092
loss: 1.0195075273513794,grad_norm: 0.9999991909106136, iteration: 234093
loss: 1.0015697479248047,grad_norm: 0.9999995807696939, iteration: 234094
loss: 1.008092999458313,grad_norm: 0.8673580293769316, iteration: 234095
loss: 1.0053530931472778,grad_norm: 0.9999991446225703, iteration: 234096
loss: 1.065138339996338,grad_norm: 0.9999990494137214, iteration: 234097
loss: 0.9928069710731506,grad_norm: 0.9562404040903851, iteration: 234098
loss: 1.0064785480499268,grad_norm: 0.9999990900244908, iteration: 234099
loss: 1.0089367628097534,grad_norm: 0.9211445004844832, iteration: 234100
loss: 1.0377036333084106,grad_norm: 0.8574554585007276, iteration: 234101
loss: 0.9953920245170593,grad_norm: 0.9999996578471692, iteration: 234102
loss: 1.0273165702819824,grad_norm: 0.9999993608752984, iteration: 234103
loss: 1.0055756568908691,grad_norm: 0.9045567556043757, iteration: 234104
loss: 1.0080764293670654,grad_norm: 0.9999990870085478, iteration: 234105
loss: 0.9834007620811462,grad_norm: 0.951458462280781, iteration: 234106
loss: 0.9701316952705383,grad_norm: 0.9999993164360457, iteration: 234107
loss: 1.0861711502075195,grad_norm: 0.9273657492104805, iteration: 234108
loss: 0.9705581665039062,grad_norm: 0.9793000640118882, iteration: 234109
loss: 0.9882363677024841,grad_norm: 0.845172360191765, iteration: 234110
loss: 0.9996392130851746,grad_norm: 0.9999990789755899, iteration: 234111
loss: 1.00779390335083,grad_norm: 0.9709526715435302, iteration: 234112
loss: 1.0348743200302124,grad_norm: 0.9999997070634414, iteration: 234113
loss: 1.0094412565231323,grad_norm: 0.9999992052585575, iteration: 234114
loss: 1.0290385484695435,grad_norm: 0.8513962043691088, iteration: 234115
loss: 0.9593835473060608,grad_norm: 0.9999990921184255, iteration: 234116
loss: 1.0131547451019287,grad_norm: 0.9089309682280841, iteration: 234117
loss: 1.0063412189483643,grad_norm: 0.9890384459742714, iteration: 234118
loss: 1.002675175666809,grad_norm: 0.862391938375758, iteration: 234119
loss: 1.0138254165649414,grad_norm: 0.9634919328626701, iteration: 234120
loss: 0.9943927526473999,grad_norm: 0.9879809393584867, iteration: 234121
loss: 0.9950249791145325,grad_norm: 0.9795916999039622, iteration: 234122
loss: 0.9793128371238708,grad_norm: 0.9999990838073792, iteration: 234123
loss: 1.0683196783065796,grad_norm: 0.9999994788866415, iteration: 234124
loss: 1.0277947187423706,grad_norm: 0.9999991897859453, iteration: 234125
loss: 1.073098063468933,grad_norm: 0.999999093494571, iteration: 234126
loss: 0.9804239869117737,grad_norm: 0.9130434842789655, iteration: 234127
loss: 1.0422875881195068,grad_norm: 0.9069792406822372, iteration: 234128
loss: 1.0151010751724243,grad_norm: 0.8664407670781211, iteration: 234129
loss: 1.0522680282592773,grad_norm: 0.9999992852390903, iteration: 234130
loss: 1.055519700050354,grad_norm: 0.9999997383414709, iteration: 234131
loss: 0.9990236163139343,grad_norm: 0.9999994325168325, iteration: 234132
loss: 0.9795985221862793,grad_norm: 0.7510513099988909, iteration: 234133
loss: 0.9923053979873657,grad_norm: 0.9391406762192542, iteration: 234134
loss: 1.0259736776351929,grad_norm: 0.8907393094858966, iteration: 234135
loss: 1.0228902101516724,grad_norm: 0.9999991994380241, iteration: 234136
loss: 0.9637439250946045,grad_norm: 0.8943033359495496, iteration: 234137
loss: 1.0013490915298462,grad_norm: 0.935565886397852, iteration: 234138
loss: 0.9901920557022095,grad_norm: 0.7922153307046471, iteration: 234139
loss: 1.0878701210021973,grad_norm: 0.9999995864048902, iteration: 234140
loss: 0.9880462288856506,grad_norm: 0.7979217605672297, iteration: 234141
loss: 1.009558916091919,grad_norm: 0.8924864881215839, iteration: 234142
loss: 0.9940814971923828,grad_norm: 0.9107185094907952, iteration: 234143
loss: 1.0164285898208618,grad_norm: 0.704927117776066, iteration: 234144
loss: 1.002601146697998,grad_norm: 0.9999991609382242, iteration: 234145
loss: 1.0119221210479736,grad_norm: 0.8556068413453797, iteration: 234146
loss: 1.1968332529067993,grad_norm: 0.999999938344581, iteration: 234147
loss: 1.0597470998764038,grad_norm: 0.99999979795432, iteration: 234148
loss: 0.9930996894836426,grad_norm: 0.9999989989878025, iteration: 234149
loss: 0.9649526476860046,grad_norm: 0.8940153920859402, iteration: 234150
loss: 1.0522072315216064,grad_norm: 0.9999999546866423, iteration: 234151
loss: 0.9805043339729309,grad_norm: 0.9179766713393154, iteration: 234152
loss: 1.032654047012329,grad_norm: 0.8462935098257387, iteration: 234153
loss: 1.0508193969726562,grad_norm: 0.9999993117467, iteration: 234154
loss: 0.9735976457595825,grad_norm: 0.8773687836661866, iteration: 234155
loss: 1.0131596326828003,grad_norm: 0.9649891038710037, iteration: 234156
loss: 1.0109319686889648,grad_norm: 0.8853722931804353, iteration: 234157
loss: 0.9891525506973267,grad_norm: 0.7602875314949848, iteration: 234158
loss: 0.9455909132957458,grad_norm: 0.851000747665113, iteration: 234159
loss: 0.9925121068954468,grad_norm: 0.9744144147045163, iteration: 234160
loss: 1.001571774482727,grad_norm: 0.9999990893182107, iteration: 234161
loss: 1.004217505455017,grad_norm: 0.9999990421299227, iteration: 234162
loss: 0.9476476907730103,grad_norm: 0.9342198519488965, iteration: 234163
loss: 0.9388353824615479,grad_norm: 0.9697432602055708, iteration: 234164
loss: 1.0117347240447998,grad_norm: 0.9999998786162375, iteration: 234165
loss: 0.9823837280273438,grad_norm: 0.9563638466837779, iteration: 234166
loss: 1.0096794366836548,grad_norm: 0.9242044520379784, iteration: 234167
loss: 1.040541172027588,grad_norm: 0.7536292687545895, iteration: 234168
loss: 0.9979681968688965,grad_norm: 0.8164383016232635, iteration: 234169
loss: 1.0173131227493286,grad_norm: 0.9999990964357379, iteration: 234170
loss: 1.0304584503173828,grad_norm: 0.9252822945253092, iteration: 234171
loss: 1.054513692855835,grad_norm: 0.9999995816045548, iteration: 234172
loss: 1.0099027156829834,grad_norm: 0.9999996478040398, iteration: 234173
loss: 0.9634748697280884,grad_norm: 0.9001713971189256, iteration: 234174
loss: 0.9752769470214844,grad_norm: 0.9827461755353379, iteration: 234175
loss: 1.00407075881958,grad_norm: 0.9538777698939918, iteration: 234176
loss: 1.0332931280136108,grad_norm: 0.9999990886376207, iteration: 234177
loss: 1.0110726356506348,grad_norm: 0.7485020333365046, iteration: 234178
loss: 1.004644513130188,grad_norm: 0.8673587858855206, iteration: 234179
loss: 1.0307135581970215,grad_norm: 0.9999989963927779, iteration: 234180
loss: 1.0013595819473267,grad_norm: 0.8465579819527779, iteration: 234181
loss: 0.9836002588272095,grad_norm: 0.8883927270160266, iteration: 234182
loss: 0.97342848777771,grad_norm: 0.8062320617239456, iteration: 234183
loss: 0.9904265403747559,grad_norm: 0.9999990342997352, iteration: 234184
loss: 0.997526228427887,grad_norm: 0.976689113948959, iteration: 234185
loss: 0.9975886344909668,grad_norm: 0.9281833315588849, iteration: 234186
loss: 0.9873309135437012,grad_norm: 0.8253601795208466, iteration: 234187
loss: 0.9972978830337524,grad_norm: 0.9999991724038374, iteration: 234188
loss: 0.9914430975914001,grad_norm: 0.9338069647370241, iteration: 234189
loss: 1.0450494289398193,grad_norm: 0.999999800798089, iteration: 234190
loss: 0.978118360042572,grad_norm: 0.8967313969654617, iteration: 234191
loss: 0.9850232601165771,grad_norm: 0.8376161869732054, iteration: 234192
loss: 1.0362492799758911,grad_norm: 0.9999994234226369, iteration: 234193
loss: 0.9908075928688049,grad_norm: 0.8339529150289424, iteration: 234194
loss: 1.0053340196609497,grad_norm: 0.9999991584861608, iteration: 234195
loss: 1.2501071691513062,grad_norm: 0.9999999926792875, iteration: 234196
loss: 1.02975332736969,grad_norm: 0.999999983550501, iteration: 234197
loss: 1.020119547843933,grad_norm: 0.9999990885906012, iteration: 234198
loss: 0.9671292901039124,grad_norm: 0.7619627826846834, iteration: 234199
loss: 0.9890297055244446,grad_norm: 0.8643514012024262, iteration: 234200
loss: 1.0305293798446655,grad_norm: 0.9999990279452436, iteration: 234201
loss: 1.006723403930664,grad_norm: 0.9999994785370375, iteration: 234202
loss: 1.0295614004135132,grad_norm: 0.840584871071848, iteration: 234203
loss: 0.9872311949729919,grad_norm: 0.9745273764976516, iteration: 234204
loss: 1.0250239372253418,grad_norm: 0.9573921590578062, iteration: 234205
loss: 0.9892150163650513,grad_norm: 0.8445946010528287, iteration: 234206
loss: 1.019181489944458,grad_norm: 0.9909064337042763, iteration: 234207
loss: 0.947464108467102,grad_norm: 0.8807547384977948, iteration: 234208
loss: 1.0074975490570068,grad_norm: 0.955086343311287, iteration: 234209
loss: 0.9957128167152405,grad_norm: 0.9169863074541317, iteration: 234210
loss: 1.0270191431045532,grad_norm: 0.9722218687172446, iteration: 234211
loss: 1.0288915634155273,grad_norm: 0.9999990461378097, iteration: 234212
loss: 1.0162467956542969,grad_norm: 0.9550533030232252, iteration: 234213
loss: 0.9934723377227783,grad_norm: 0.999999340034445, iteration: 234214
loss: 0.9939871430397034,grad_norm: 0.8046020001256392, iteration: 234215
loss: 0.9952681660652161,grad_norm: 0.8478821494251625, iteration: 234216
loss: 1.0182541608810425,grad_norm: 0.8336425342350101, iteration: 234217
loss: 1.0342738628387451,grad_norm: 0.8328163378978929, iteration: 234218
loss: 1.0192402601242065,grad_norm: 0.8947304956828367, iteration: 234219
loss: 1.0085389614105225,grad_norm: 0.7906530653691806, iteration: 234220
loss: 0.9969328045845032,grad_norm: 0.7429960964140526, iteration: 234221
loss: 1.0156917572021484,grad_norm: 0.8399761740161592, iteration: 234222
loss: 1.0012484788894653,grad_norm: 0.9999992388411147, iteration: 234223
loss: 0.9894541501998901,grad_norm: 0.8754493858027418, iteration: 234224
loss: 0.9633160829544067,grad_norm: 0.8482538708638601, iteration: 234225
loss: 1.0106133222579956,grad_norm: 0.9999993359093774, iteration: 234226
loss: 1.0147080421447754,grad_norm: 0.8094410601502847, iteration: 234227
loss: 0.9906501770019531,grad_norm: 0.9999994148780854, iteration: 234228
loss: 1.0363489389419556,grad_norm: 0.9999989964517582, iteration: 234229
loss: 1.0538358688354492,grad_norm: 0.9497394907995668, iteration: 234230
loss: 0.9942389726638794,grad_norm: 0.81583685523983, iteration: 234231
loss: 0.985753059387207,grad_norm: 0.8457413452969691, iteration: 234232
loss: 0.9384996294975281,grad_norm: 0.8426867905482937, iteration: 234233
loss: 0.9932594895362854,grad_norm: 0.9573062292215201, iteration: 234234
loss: 1.0036519765853882,grad_norm: 0.8790518154124145, iteration: 234235
loss: 1.2786368131637573,grad_norm: 0.9999998615651523, iteration: 234236
loss: 1.0292388200759888,grad_norm: 0.8778271937141567, iteration: 234237
loss: 1.0146023035049438,grad_norm: 0.8649356035084886, iteration: 234238
loss: 0.9849877953529358,grad_norm: 0.9436440631922964, iteration: 234239
loss: 1.0039507150650024,grad_norm: 0.9999989020146566, iteration: 234240
loss: 1.0188947916030884,grad_norm: 0.9999990642414414, iteration: 234241
loss: 1.0412266254425049,grad_norm: 0.8473204431243708, iteration: 234242
loss: 1.0296165943145752,grad_norm: 0.9999991780063618, iteration: 234243
loss: 0.9867792725563049,grad_norm: 0.8528307830355871, iteration: 234244
loss: 1.003737449645996,grad_norm: 0.9854375751903266, iteration: 234245
loss: 0.9819850325584412,grad_norm: 0.981974856567378, iteration: 234246
loss: 0.9811680912971497,grad_norm: 0.892328704884784, iteration: 234247
loss: 1.0144370794296265,grad_norm: 0.8658824043397209, iteration: 234248
loss: 1.0164309740066528,grad_norm: 0.9999989663098752, iteration: 234249
loss: 0.9584925770759583,grad_norm: 0.974200360365497, iteration: 234250
loss: 1.007719874382019,grad_norm: 0.9918142003214093, iteration: 234251
loss: 0.9868654012680054,grad_norm: 0.9604159084037353, iteration: 234252
loss: 0.9901337027549744,grad_norm: 0.9999991998466058, iteration: 234253
loss: 0.9929507970809937,grad_norm: 0.796731766788323, iteration: 234254
loss: 1.0229679346084595,grad_norm: 0.8150201172132242, iteration: 234255
loss: 0.9542766213417053,grad_norm: 0.9999990447724914, iteration: 234256
loss: 1.013064980506897,grad_norm: 0.8644194839866794, iteration: 234257
loss: 1.0193541049957275,grad_norm: 0.82333415792246, iteration: 234258
loss: 0.9922884702682495,grad_norm: 0.9999990899825916, iteration: 234259
loss: 0.9503936767578125,grad_norm: 0.8213643336676879, iteration: 234260
loss: 0.9972071647644043,grad_norm: 0.8622643516776504, iteration: 234261
loss: 1.019938588142395,grad_norm: 0.9750801881510678, iteration: 234262
loss: 1.0751633644104004,grad_norm: 0.9286591802524173, iteration: 234263
loss: 0.9894575476646423,grad_norm: 0.8222422875462869, iteration: 234264
loss: 0.9724021553993225,grad_norm: 0.9093701285794126, iteration: 234265
loss: 1.0033413171768188,grad_norm: 0.9344298434740911, iteration: 234266
loss: 1.0048326253890991,grad_norm: 0.7878052299314451, iteration: 234267
loss: 1.0102678537368774,grad_norm: 0.9752056730528432, iteration: 234268
loss: 0.9775242209434509,grad_norm: 0.9641612145812727, iteration: 234269
loss: 0.9921321272850037,grad_norm: 0.9251064033033113, iteration: 234270
loss: 1.0095137357711792,grad_norm: 0.7609068923762501, iteration: 234271
loss: 0.9822679758071899,grad_norm: 0.9299484399368726, iteration: 234272
loss: 1.0179325342178345,grad_norm: 0.8947751381737187, iteration: 234273
loss: 1.0315912961959839,grad_norm: 0.9999992600920798, iteration: 234274
loss: 0.9943712949752808,grad_norm: 0.8396526413994441, iteration: 234275
loss: 0.9744512438774109,grad_norm: 0.9315251195546419, iteration: 234276
loss: 0.9826140999794006,grad_norm: 0.9999990321804904, iteration: 234277
loss: 1.0495977401733398,grad_norm: 0.9999999858189732, iteration: 234278
loss: 0.9547272324562073,grad_norm: 0.8551331806899521, iteration: 234279
loss: 1.034820795059204,grad_norm: 0.9999992368701965, iteration: 234280
loss: 0.9844079613685608,grad_norm: 0.7899629622233996, iteration: 234281
loss: 1.0057196617126465,grad_norm: 0.6801879438858517, iteration: 234282
loss: 0.969200611114502,grad_norm: 0.9335571031574273, iteration: 234283
loss: 1.0083707571029663,grad_norm: 0.8595759535397316, iteration: 234284
loss: 0.9898102879524231,grad_norm: 0.8063518387763865, iteration: 234285
loss: 1.0255712270736694,grad_norm: 0.781709662061949, iteration: 234286
loss: 0.9538638591766357,grad_norm: 0.8301360347817869, iteration: 234287
loss: 0.9646583199501038,grad_norm: 0.9999991060293038, iteration: 234288
loss: 0.9699294567108154,grad_norm: 0.9817570539187627, iteration: 234289
loss: 1.0121568441390991,grad_norm: 0.8712892601721279, iteration: 234290
loss: 0.97358638048172,grad_norm: 0.8540799113785975, iteration: 234291
loss: 1.007044792175293,grad_norm: 0.8197842847081297, iteration: 234292
loss: 1.023209810256958,grad_norm: 0.87458704457825, iteration: 234293
loss: 0.9780997633934021,grad_norm: 0.8217502986440314, iteration: 234294
loss: 1.0052120685577393,grad_norm: 0.908715364346437, iteration: 234295
loss: 1.003606915473938,grad_norm: 0.9999991056685535, iteration: 234296
loss: 1.013981819152832,grad_norm: 0.8699755071400364, iteration: 234297
loss: 1.0026674270629883,grad_norm: 0.8439456963966229, iteration: 234298
loss: 1.0078879594802856,grad_norm: 0.8224788731117331, iteration: 234299
loss: 1.010979175567627,grad_norm: 0.9172294959042074, iteration: 234300
loss: 0.9722604751586914,grad_norm: 0.8377144320505437, iteration: 234301
loss: 1.0211519002914429,grad_norm: 0.8316041712963885, iteration: 234302
loss: 1.0262033939361572,grad_norm: 0.9999989745605875, iteration: 234303
loss: 0.9911727905273438,grad_norm: 0.8858540994793185, iteration: 234304
loss: 1.0239357948303223,grad_norm: 0.9999991584401146, iteration: 234305
loss: 1.005759835243225,grad_norm: 0.8837311260674933, iteration: 234306
loss: 0.976394772529602,grad_norm: 0.9999989983479369, iteration: 234307
loss: 0.9999276995658875,grad_norm: 0.7453986679318414, iteration: 234308
loss: 1.0140751600265503,grad_norm: 0.7577502988896986, iteration: 234309
loss: 1.0401755571365356,grad_norm: 0.9437419867115141, iteration: 234310
loss: 0.9807994961738586,grad_norm: 0.9715140489626519, iteration: 234311
loss: 1.015688419342041,grad_norm: 0.9470712663950205, iteration: 234312
loss: 0.9829258322715759,grad_norm: 0.8840338910375902, iteration: 234313
loss: 1.0246179103851318,grad_norm: 0.8193990943946083, iteration: 234314
loss: 0.9890432953834534,grad_norm: 0.9669306405929828, iteration: 234315
loss: 1.1019682884216309,grad_norm: 0.9999990328878748, iteration: 234316
loss: 1.0259724855422974,grad_norm: 0.9727823251362789, iteration: 234317
loss: 0.9641170501708984,grad_norm: 0.7984704303306579, iteration: 234318
loss: 0.9776173233985901,grad_norm: 0.9999990872087943, iteration: 234319
loss: 0.9943893551826477,grad_norm: 0.8745309445471314, iteration: 234320
loss: 1.0082447528839111,grad_norm: 0.9999996069957218, iteration: 234321
loss: 1.0178933143615723,grad_norm: 0.9587079317327746, iteration: 234322
loss: 1.016370415687561,grad_norm: 0.9999990268965337, iteration: 234323
loss: 0.9997438788414001,grad_norm: 0.8936781769127111, iteration: 234324
loss: 0.9940422773361206,grad_norm: 0.9999991632566878, iteration: 234325
loss: 0.9801406860351562,grad_norm: 0.8704508782634889, iteration: 234326
loss: 0.9332411885261536,grad_norm: 0.9752026389697426, iteration: 234327
loss: 0.9833374619483948,grad_norm: 0.9171855810860824, iteration: 234328
loss: 1.0182067155838013,grad_norm: 0.8931044768269626, iteration: 234329
loss: 1.014099359512329,grad_norm: 0.910373252552874, iteration: 234330
loss: 1.0074087381362915,grad_norm: 0.8722282374593978, iteration: 234331
loss: 1.0077100992202759,grad_norm: 0.9372541348254536, iteration: 234332
loss: 1.0068124532699585,grad_norm: 0.798289091801764, iteration: 234333
loss: 0.9971573948860168,grad_norm: 0.8803191610919615, iteration: 234334
loss: 1.0038491487503052,grad_norm: 0.8745699458807815, iteration: 234335
loss: 1.0096850395202637,grad_norm: 0.9999991544687604, iteration: 234336
loss: 1.0058400630950928,grad_norm: 0.9846235369707297, iteration: 234337
loss: 1.1924347877502441,grad_norm: 0.9999990586682554, iteration: 234338
loss: 0.9744938611984253,grad_norm: 0.8223752952650303, iteration: 234339
loss: 1.0049004554748535,grad_norm: 0.9999990340166598, iteration: 234340
loss: 0.9896135926246643,grad_norm: 0.8670600138203753, iteration: 234341
loss: 0.9877718091011047,grad_norm: 0.776529824095066, iteration: 234342
loss: 1.013466715812683,grad_norm: 0.8251969238135743, iteration: 234343
loss: 0.9456881284713745,grad_norm: 0.8569502083567255, iteration: 234344
loss: 0.980726957321167,grad_norm: 0.8323016741875665, iteration: 234345
loss: 1.023324966430664,grad_norm: 0.8665887330271727, iteration: 234346
loss: 0.9983234405517578,grad_norm: 0.7474821211980832, iteration: 234347
loss: 1.005371332168579,grad_norm: 0.9999992413700133, iteration: 234348
loss: 0.985421359539032,grad_norm: 0.949411423900485, iteration: 234349
loss: 1.0255491733551025,grad_norm: 0.9999991024791292, iteration: 234350
loss: 1.0404738187789917,grad_norm: 0.9999991803092252, iteration: 234351
loss: 0.9596573710441589,grad_norm: 0.9022762467872613, iteration: 234352
loss: 1.0065417289733887,grad_norm: 0.9999992927891981, iteration: 234353
loss: 1.0080313682556152,grad_norm: 0.8987099853640657, iteration: 234354
loss: 0.9621599912643433,grad_norm: 0.8092852268464372, iteration: 234355
loss: 1.0239264965057373,grad_norm: 0.9999989711706341, iteration: 234356
loss: 1.0161445140838623,grad_norm: 0.9999997266775248, iteration: 234357
loss: 1.0046405792236328,grad_norm: 0.9133126543565673, iteration: 234358
loss: 0.9817488193511963,grad_norm: 0.9677675329043253, iteration: 234359
loss: 1.019951581954956,grad_norm: 0.8839279945943849, iteration: 234360
loss: 1.0183316469192505,grad_norm: 0.9999995900841292, iteration: 234361
loss: 0.9665207862854004,grad_norm: 0.8047005879042421, iteration: 234362
loss: 1.042802095413208,grad_norm: 0.8948900482248477, iteration: 234363
loss: 1.00818932056427,grad_norm: 0.8517893974092461, iteration: 234364
loss: 0.9940029382705688,grad_norm: 0.8686313531603204, iteration: 234365
loss: 0.9921103715896606,grad_norm: 0.9386371373876372, iteration: 234366
loss: 1.0353542566299438,grad_norm: 0.9999993157989201, iteration: 234367
loss: 0.986200213432312,grad_norm: 0.9449687861738307, iteration: 234368
loss: 1.0025770664215088,grad_norm: 0.9771826447267533, iteration: 234369
loss: 0.9989035129547119,grad_norm: 0.8838496087524317, iteration: 234370
loss: 1.0509920120239258,grad_norm: 0.9999991099814112, iteration: 234371
loss: 0.9949397444725037,grad_norm: 0.8026725371423358, iteration: 234372
loss: 1.0837830305099487,grad_norm: 0.9999990183265424, iteration: 234373
loss: 0.9579394459724426,grad_norm: 0.941411633901438, iteration: 234374
loss: 0.9956244826316833,grad_norm: 0.7902189470424874, iteration: 234375
loss: 1.024207353591919,grad_norm: 0.9999989543759819, iteration: 234376
loss: 1.0409648418426514,grad_norm: 0.9241700177075582, iteration: 234377
loss: 1.011935830116272,grad_norm: 0.9399112250640419, iteration: 234378
loss: 1.0048530101776123,grad_norm: 0.8794509642704529, iteration: 234379
loss: 0.980620801448822,grad_norm: 0.9999989861304255, iteration: 234380
loss: 1.0005213022232056,grad_norm: 0.9999991381988391, iteration: 234381
loss: 1.0022828578948975,grad_norm: 0.9999991037639051, iteration: 234382
loss: 1.0108132362365723,grad_norm: 0.9687215059693386, iteration: 234383
loss: 1.0200389623641968,grad_norm: 0.9999990287144069, iteration: 234384
loss: 1.0225902795791626,grad_norm: 0.7411171755712322, iteration: 234385
loss: 0.9955909848213196,grad_norm: 0.8001121179530597, iteration: 234386
loss: 1.0203142166137695,grad_norm: 0.9248766783270636, iteration: 234387
loss: 0.995232105255127,grad_norm: 0.8808874815152088, iteration: 234388
loss: 0.9597290754318237,grad_norm: 0.8511795255216008, iteration: 234389
loss: 1.003631591796875,grad_norm: 0.900660488502694, iteration: 234390
loss: 0.9789977073669434,grad_norm: 0.8604028336002758, iteration: 234391
loss: 1.026676058769226,grad_norm: 0.9999991869418432, iteration: 234392
loss: 1.0433368682861328,grad_norm: 0.9789654200277186, iteration: 234393
loss: 1.0140552520751953,grad_norm: 0.808389097552323, iteration: 234394
loss: 1.0144416093826294,grad_norm: 0.9810737429808278, iteration: 234395
loss: 1.0276349782943726,grad_norm: 0.9555086738406466, iteration: 234396
loss: 1.0143203735351562,grad_norm: 0.9959349546204769, iteration: 234397
loss: 1.0389509201049805,grad_norm: 0.999999132068737, iteration: 234398
loss: 1.0151792764663696,grad_norm: 0.9390704832195067, iteration: 234399
loss: 1.0278432369232178,grad_norm: 0.9318434746948032, iteration: 234400
loss: 0.9706626534461975,grad_norm: 0.9538336127406911, iteration: 234401
loss: 1.0298858880996704,grad_norm: 0.8138475219535968, iteration: 234402
loss: 1.0039207935333252,grad_norm: 0.9251748650154525, iteration: 234403
loss: 0.9936165809631348,grad_norm: 0.9999994647497261, iteration: 234404
loss: 1.014780879020691,grad_norm: 0.9999992087953526, iteration: 234405
loss: 0.9916507601737976,grad_norm: 0.9320738564965065, iteration: 234406
loss: 1.0061157941818237,grad_norm: 0.9684537196578288, iteration: 234407
loss: 0.9853194355964661,grad_norm: 0.7944041123650667, iteration: 234408
loss: 1.0149319171905518,grad_norm: 0.9999990832255959, iteration: 234409
loss: 0.9632166028022766,grad_norm: 0.7706520200968497, iteration: 234410
loss: 1.020469307899475,grad_norm: 0.9999993913059803, iteration: 234411
loss: 1.0227299928665161,grad_norm: 0.8939666883289632, iteration: 234412
loss: 0.9677388668060303,grad_norm: 0.9241141855393976, iteration: 234413
loss: 0.987061619758606,grad_norm: 0.8156306500808692, iteration: 234414
loss: 0.9725474715232849,grad_norm: 0.8394603879135099, iteration: 234415
loss: 0.9915814399719238,grad_norm: 0.9638869994438057, iteration: 234416
loss: 1.006035566329956,grad_norm: 0.9830991172549869, iteration: 234417
loss: 0.9717524647712708,grad_norm: 0.9999990772408858, iteration: 234418
loss: 1.0359609127044678,grad_norm: 0.8438080687990925, iteration: 234419
loss: 0.9753388166427612,grad_norm: 0.945117804319547, iteration: 234420
loss: 0.985284149646759,grad_norm: 0.9572497785014454, iteration: 234421
loss: 0.9821720123291016,grad_norm: 0.9999990662323087, iteration: 234422
loss: 1.0090199708938599,grad_norm: 0.9626742529049691, iteration: 234423
loss: 1.0384433269500732,grad_norm: 0.9999990860289337, iteration: 234424
loss: 1.0054255723953247,grad_norm: 0.9999990042432644, iteration: 234425
loss: 1.0301557779312134,grad_norm: 0.8331715111151726, iteration: 234426
loss: 1.0236696004867554,grad_norm: 0.9999991126157861, iteration: 234427
loss: 0.990536630153656,grad_norm: 0.8409406387500077, iteration: 234428
loss: 1.0118021965026855,grad_norm: 0.9999991489328386, iteration: 234429
loss: 0.9880496263504028,grad_norm: 0.806455136165817, iteration: 234430
loss: 0.9817665815353394,grad_norm: 0.9999994955458922, iteration: 234431
loss: 0.9696493744850159,grad_norm: 0.7316064878371427, iteration: 234432
loss: 1.00485360622406,grad_norm: 0.8063149397385966, iteration: 234433
loss: 0.9568029046058655,grad_norm: 0.9408983926123101, iteration: 234434
loss: 0.9896615147590637,grad_norm: 0.8693434297232773, iteration: 234435
loss: 0.9960421323776245,grad_norm: 0.8268799438699335, iteration: 234436
loss: 0.9771218299865723,grad_norm: 0.9771636924727664, iteration: 234437
loss: 0.9865118861198425,grad_norm: 0.8943633887586582, iteration: 234438
loss: 1.0092602968215942,grad_norm: 0.7807709793999248, iteration: 234439
loss: 0.9877163171768188,grad_norm: 0.9809394153993627, iteration: 234440
loss: 0.9811594486236572,grad_norm: 0.9977575633519609, iteration: 234441
loss: 0.9741619229316711,grad_norm: 0.8002111295470101, iteration: 234442
loss: 1.0889477729797363,grad_norm: 0.9999998695004277, iteration: 234443
loss: 0.9948543906211853,grad_norm: 0.9076228417424028, iteration: 234444
loss: 1.0093306303024292,grad_norm: 0.9716134401323133, iteration: 234445
loss: 1.0207791328430176,grad_norm: 0.8393370581018992, iteration: 234446
loss: 0.9988076686859131,grad_norm: 0.9734872740938944, iteration: 234447
loss: 1.1454603672027588,grad_norm: 0.9999998692854782, iteration: 234448
loss: 0.972801148891449,grad_norm: 0.7590696852838175, iteration: 234449
loss: 1.0059236288070679,grad_norm: 0.8392020055604539, iteration: 234450
loss: 1.005812644958496,grad_norm: 0.9999991948340246, iteration: 234451
loss: 1.0264915227890015,grad_norm: 0.9999991637066876, iteration: 234452
loss: 0.9854658246040344,grad_norm: 0.9407508739422135, iteration: 234453
loss: 1.015902042388916,grad_norm: 0.9999990626632423, iteration: 234454
loss: 1.0091392993927002,grad_norm: 0.9763624552992964, iteration: 234455
loss: 0.9658074378967285,grad_norm: 0.8840486374249724, iteration: 234456
loss: 0.9811820387840271,grad_norm: 0.9999991988753888, iteration: 234457
loss: 1.0066983699798584,grad_norm: 0.8856323296794453, iteration: 234458
loss: 1.0351569652557373,grad_norm: 0.9999996675708621, iteration: 234459
loss: 1.072608470916748,grad_norm: 0.999999231824891, iteration: 234460
loss: 1.0364928245544434,grad_norm: 0.8852045149397951, iteration: 234461
loss: 1.0585260391235352,grad_norm: 0.9999991774225153, iteration: 234462
loss: 0.9790377616882324,grad_norm: 0.7859260167190659, iteration: 234463
loss: 0.9626489877700806,grad_norm: 0.9999990030171004, iteration: 234464
loss: 1.066071629524231,grad_norm: 0.9999997083876393, iteration: 234465
loss: 1.0098189115524292,grad_norm: 0.845586521485608, iteration: 234466
loss: 1.1523417234420776,grad_norm: 0.9999994826711072, iteration: 234467
loss: 1.0512058734893799,grad_norm: 0.9999999027263632, iteration: 234468
loss: 1.0257349014282227,grad_norm: 0.9999994943713838, iteration: 234469
loss: 0.9618236422538757,grad_norm: 0.8415092130483742, iteration: 234470
loss: 1.0176950693130493,grad_norm: 0.9999994950566357, iteration: 234471
loss: 0.9871186017990112,grad_norm: 0.9306719093755104, iteration: 234472
loss: 0.9874148368835449,grad_norm: 0.9999991301581175, iteration: 234473
loss: 0.9601293802261353,grad_norm: 0.9293871307590199, iteration: 234474
loss: 0.9549074769020081,grad_norm: 0.9999991670859745, iteration: 234475
loss: 0.970930278301239,grad_norm: 0.9999989403170002, iteration: 234476
loss: 1.023556113243103,grad_norm: 0.9091750586642434, iteration: 234477
loss: 0.989349901676178,grad_norm: 0.9999991037131527, iteration: 234478
loss: 1.0181450843811035,grad_norm: 0.9076093956524616, iteration: 234479
loss: 1.1006869077682495,grad_norm: 0.999999463519158, iteration: 234480
loss: 0.9953979849815369,grad_norm: 0.8397826454478371, iteration: 234481
loss: 1.1243945360183716,grad_norm: 0.9999991723952203, iteration: 234482
loss: 1.0660661458969116,grad_norm: 0.9919668569145299, iteration: 234483
loss: 1.0710800886154175,grad_norm: 0.8697423742623027, iteration: 234484
loss: 1.0490436553955078,grad_norm: 0.979959275489356, iteration: 234485
loss: 0.9975206851959229,grad_norm: 0.8138761085497342, iteration: 234486
loss: 1.0050967931747437,grad_norm: 0.9239012593821955, iteration: 234487
loss: 1.1037514209747314,grad_norm: 0.8414985625193819, iteration: 234488
loss: 0.9777432680130005,grad_norm: 0.9945083606184603, iteration: 234489
loss: 1.0149284601211548,grad_norm: 0.8524268374965656, iteration: 234490
loss: 1.129513144493103,grad_norm: 0.8778358861935351, iteration: 234491
loss: 1.018795132637024,grad_norm: 0.8729125448364814, iteration: 234492
loss: 0.9784435033798218,grad_norm: 0.8447920948287917, iteration: 234493
loss: 0.9904736280441284,grad_norm: 0.8259068891104377, iteration: 234494
loss: 1.094227910041809,grad_norm: 0.9999998182366232, iteration: 234495
loss: 1.0642954111099243,grad_norm: 0.9999989944625605, iteration: 234496
loss: 1.191473364830017,grad_norm: 0.9999990310581602, iteration: 234497
loss: 0.9921410083770752,grad_norm: 0.9145377776456225, iteration: 234498
loss: 0.9901002049446106,grad_norm: 0.9999991585595176, iteration: 234499
loss: 1.2388105392456055,grad_norm: 0.9999999241243681, iteration: 234500
loss: 1.0597293376922607,grad_norm: 0.999999834113288, iteration: 234501
loss: 1.0433800220489502,grad_norm: 0.9153501018833723, iteration: 234502
loss: 1.0929456949234009,grad_norm: 0.999999471711494, iteration: 234503
loss: 0.9941847324371338,grad_norm: 0.9845102852618756, iteration: 234504
loss: 1.1388713121414185,grad_norm: 0.9999992460675968, iteration: 234505
loss: 1.0506809949874878,grad_norm: 0.9999992181532983, iteration: 234506
loss: 0.9957947731018066,grad_norm: 0.8659308985961778, iteration: 234507
loss: 1.076182246208191,grad_norm: 0.9999990704507123, iteration: 234508
loss: 1.1040173768997192,grad_norm: 0.9999995206147639, iteration: 234509
loss: 0.9785528779029846,grad_norm: 0.9999991422623725, iteration: 234510
loss: 1.1293338537216187,grad_norm: 0.9999998673678132, iteration: 234511
loss: 1.0755318403244019,grad_norm: 0.999999929845933, iteration: 234512
loss: 1.020188331604004,grad_norm: 0.8381010498478613, iteration: 234513
loss: 1.0554510354995728,grad_norm: 0.999999179415226, iteration: 234514
loss: 1.046069860458374,grad_norm: 0.9924284727440081, iteration: 234515
loss: 0.988074004650116,grad_norm: 0.9999990364638319, iteration: 234516
loss: 1.1597917079925537,grad_norm: 0.9999992521397633, iteration: 234517
loss: 1.120223879814148,grad_norm: 0.9999992982748844, iteration: 234518
loss: 0.9815104007720947,grad_norm: 0.8413927776064223, iteration: 234519
loss: 1.0400887727737427,grad_norm: 0.9999992012157091, iteration: 234520
loss: 0.9858644604682922,grad_norm: 0.9999990706651993, iteration: 234521
loss: 1.021243929862976,grad_norm: 0.9999993177118476, iteration: 234522
loss: 1.0048177242279053,grad_norm: 0.8402275177580542, iteration: 234523
loss: 0.9848671555519104,grad_norm: 0.956659447276969, iteration: 234524
loss: 1.0124025344848633,grad_norm: 0.9999998557047067, iteration: 234525
loss: 1.0287944078445435,grad_norm: 0.9999991647743134, iteration: 234526
loss: 1.071651577949524,grad_norm: 0.9999990919030255, iteration: 234527
loss: 1.0045534372329712,grad_norm: 0.8365752752506128, iteration: 234528
loss: 1.092936396598816,grad_norm: 0.9922754881748784, iteration: 234529
loss: 1.0213191509246826,grad_norm: 0.9082597242851197, iteration: 234530
loss: 1.0491633415222168,grad_norm: 0.8550074321913705, iteration: 234531
loss: 1.0318316221237183,grad_norm: 0.9575635996855117, iteration: 234532
loss: 0.990596354007721,grad_norm: 0.9999990241713825, iteration: 234533
loss: 1.0227090120315552,grad_norm: 0.9999991303621218, iteration: 234534
loss: 0.9852246642112732,grad_norm: 0.9666136586001436, iteration: 234535
loss: 1.017020344734192,grad_norm: 0.9999990587534694, iteration: 234536
loss: 1.0080265998840332,grad_norm: 0.9977923876918414, iteration: 234537
loss: 0.9937947988510132,grad_norm: 0.8016269403040015, iteration: 234538
loss: 0.98265141248703,grad_norm: 0.9999990864915806, iteration: 234539
loss: 0.9315149784088135,grad_norm: 0.9999990642493685, iteration: 234540
loss: 0.993677020072937,grad_norm: 0.9999990061794602, iteration: 234541
loss: 1.0808368921279907,grad_norm: 0.9765353634482474, iteration: 234542
loss: 1.0366034507751465,grad_norm: 0.9999994051687462, iteration: 234543
loss: 1.0579265356063843,grad_norm: 0.9999993965540215, iteration: 234544
loss: 1.0788791179656982,grad_norm: 0.9999992291067423, iteration: 234545
loss: 1.0260825157165527,grad_norm: 0.9999992193605525, iteration: 234546
loss: 1.004101037979126,grad_norm: 0.9604250008430686, iteration: 234547
loss: 1.078704833984375,grad_norm: 0.9999997159484948, iteration: 234548
loss: 1.084356665611267,grad_norm: 0.9999990838482659, iteration: 234549
loss: 1.0095922946929932,grad_norm: 0.9999999285640511, iteration: 234550
loss: 1.0214335918426514,grad_norm: 0.9999996143824617, iteration: 234551
loss: 0.9775176048278809,grad_norm: 0.9999992764435143, iteration: 234552
loss: 1.0625722408294678,grad_norm: 0.9999990052240888, iteration: 234553
loss: 0.9929392337799072,grad_norm: 0.8794606022959469, iteration: 234554
loss: 1.0773640871047974,grad_norm: 0.8727387488738206, iteration: 234555
loss: 1.0577703714370728,grad_norm: 0.9285870820867859, iteration: 234556
loss: 1.0159351825714111,grad_norm: 0.9999994640056578, iteration: 234557
loss: 1.094942331314087,grad_norm: 0.9999994586020517, iteration: 234558
loss: 1.03929603099823,grad_norm: 0.9999992650601289, iteration: 234559
loss: 0.9568167328834534,grad_norm: 0.9999996313854476, iteration: 234560
loss: 0.9763247966766357,grad_norm: 0.6900937902988944, iteration: 234561
loss: 1.0692676305770874,grad_norm: 0.9999999825383181, iteration: 234562
loss: 1.030006766319275,grad_norm: 0.9999991591492525, iteration: 234563
loss: 0.9687519669532776,grad_norm: 0.866144414598793, iteration: 234564
loss: 1.0649999380111694,grad_norm: 0.8028334879782205, iteration: 234565
loss: 1.020142674446106,grad_norm: 0.9699957595149684, iteration: 234566
loss: 1.024557113647461,grad_norm: 0.9999997318861068, iteration: 234567
loss: 1.053301215171814,grad_norm: 0.9999993762477684, iteration: 234568
loss: 1.0007128715515137,grad_norm: 0.9999993557113879, iteration: 234569
loss: 1.024426817893982,grad_norm: 0.9493122006138588, iteration: 234570
loss: 1.0754475593566895,grad_norm: 0.9999992914706156, iteration: 234571
loss: 1.0330528020858765,grad_norm: 0.9818855019846036, iteration: 234572
loss: 0.9504901170730591,grad_norm: 0.948364824019634, iteration: 234573
loss: 1.0282460451126099,grad_norm: 0.9999993990407351, iteration: 234574
loss: 0.9692229628562927,grad_norm: 0.7629334553207728, iteration: 234575
loss: 1.0403701066970825,grad_norm: 0.9999992095169157, iteration: 234576
loss: 1.0528017282485962,grad_norm: 0.9999990601718342, iteration: 234577
loss: 1.0089263916015625,grad_norm: 0.9999992166504931, iteration: 234578
loss: 1.018458366394043,grad_norm: 0.7519435747673422, iteration: 234579
loss: 1.0512537956237793,grad_norm: 0.9999997800759686, iteration: 234580
loss: 1.0092746019363403,grad_norm: 0.9999989805408778, iteration: 234581
loss: 0.9961327314376831,grad_norm: 0.7889212441471646, iteration: 234582
loss: 0.9512937068939209,grad_norm: 0.8321220679036336, iteration: 234583
loss: 1.0381773710250854,grad_norm: 0.9298227409096628, iteration: 234584
loss: 0.9738196134567261,grad_norm: 0.922038093670192, iteration: 234585
loss: 1.0166754722595215,grad_norm: 0.9999990556644093, iteration: 234586
loss: 1.1027597188949585,grad_norm: 0.9999997847537437, iteration: 234587
loss: 1.0226175785064697,grad_norm: 0.9066134557685839, iteration: 234588
loss: 1.0654189586639404,grad_norm: 0.999999204998113, iteration: 234589
loss: 1.0945147275924683,grad_norm: 0.9999995080316991, iteration: 234590
loss: 0.9855601787567139,grad_norm: 0.757270315641338, iteration: 234591
loss: 1.0417710542678833,grad_norm: 0.8583661763690209, iteration: 234592
loss: 1.0308444499969482,grad_norm: 0.9924820668701398, iteration: 234593
loss: 1.0231471061706543,grad_norm: 0.9189397351751835, iteration: 234594
loss: 0.9962157607078552,grad_norm: 0.9999996404434655, iteration: 234595
loss: 1.002087116241455,grad_norm: 0.9803405523394487, iteration: 234596
loss: 1.0502153635025024,grad_norm: 0.9999991311155837, iteration: 234597
loss: 1.061523675918579,grad_norm: 0.9999992709761137, iteration: 234598
loss: 1.064273476600647,grad_norm: 0.842113437486561, iteration: 234599
loss: 1.007102131843567,grad_norm: 0.8502702177301882, iteration: 234600
loss: 1.00310218334198,grad_norm: 0.9294017459848539, iteration: 234601
loss: 1.0988190174102783,grad_norm: 1.0000000614278934, iteration: 234602
loss: 1.1085915565490723,grad_norm: 0.9999992725703601, iteration: 234603
loss: 1.034706711769104,grad_norm: 0.9999991744519225, iteration: 234604
loss: 1.0454055070877075,grad_norm: 0.9816212118178292, iteration: 234605
loss: 1.0155999660491943,grad_norm: 0.9999992101190134, iteration: 234606
loss: 1.0343483686447144,grad_norm: 0.9320898440721254, iteration: 234607
loss: 0.998298704624176,grad_norm: 0.7650272488039249, iteration: 234608
loss: 1.0697156190872192,grad_norm: 0.9999999358139993, iteration: 234609
loss: 0.9877561330795288,grad_norm: 0.8759407381187153, iteration: 234610
loss: 1.0003323554992676,grad_norm: 0.9999991804676029, iteration: 234611
loss: 0.9755198359489441,grad_norm: 0.8791154088554388, iteration: 234612
loss: 0.9971319437026978,grad_norm: 0.9118721182329327, iteration: 234613
loss: 0.9952368140220642,grad_norm: 0.9999995729770658, iteration: 234614
loss: 0.9798696637153625,grad_norm: 0.8828969156544342, iteration: 234615
loss: 1.0025020837783813,grad_norm: 0.9990541070742647, iteration: 234616
loss: 1.0116865634918213,grad_norm: 0.9999991604889307, iteration: 234617
loss: 1.0020973682403564,grad_norm: 0.8664285349993255, iteration: 234618
loss: 1.0687081813812256,grad_norm: 0.9285576491026581, iteration: 234619
loss: 1.0392411947250366,grad_norm: 0.9999993597707796, iteration: 234620
loss: 1.0451586246490479,grad_norm: 0.9498501108674012, iteration: 234621
loss: 1.0171757936477661,grad_norm: 0.7857596977100804, iteration: 234622
loss: 1.0819419622421265,grad_norm: 0.9999994329073979, iteration: 234623
loss: 1.0478724241256714,grad_norm: 0.9999990557955653, iteration: 234624
loss: 1.0273096561431885,grad_norm: 0.9999998409881015, iteration: 234625
loss: 1.0181459188461304,grad_norm: 0.9999990934729036, iteration: 234626
loss: 0.9776228070259094,grad_norm: 0.9999990583635204, iteration: 234627
loss: 0.9630845785140991,grad_norm: 0.999999225767765, iteration: 234628
loss: 0.9764054417610168,grad_norm: 0.999999068996012, iteration: 234629
loss: 0.9908543825149536,grad_norm: 0.9999989917073876, iteration: 234630
loss: 1.1012942790985107,grad_norm: 0.9999992736061342, iteration: 234631
loss: 1.0421061515808105,grad_norm: 0.9842722427475608, iteration: 234632
loss: 0.9894461035728455,grad_norm: 0.8913462917013626, iteration: 234633
loss: 1.0219314098358154,grad_norm: 0.8792068525724874, iteration: 234634
loss: 0.9735143780708313,grad_norm: 0.9374771640328007, iteration: 234635
loss: 0.9743015766143799,grad_norm: 0.9999993420992891, iteration: 234636
loss: 1.176046371459961,grad_norm: 0.9999995503393244, iteration: 234637
loss: 1.003879189491272,grad_norm: 0.8898397475407721, iteration: 234638
loss: 0.986328125,grad_norm: 0.9999990590407521, iteration: 234639
loss: 1.00624680519104,grad_norm: 0.8785579232243941, iteration: 234640
loss: 1.0487966537475586,grad_norm: 0.9999995277491264, iteration: 234641
loss: 1.072813630104065,grad_norm: 0.9999995629900414, iteration: 234642
loss: 0.988944947719574,grad_norm: 0.999850627409983, iteration: 234643
loss: 0.9833534359931946,grad_norm: 0.8693898986668411, iteration: 234644
loss: 1.0100622177124023,grad_norm: 0.9999997895509574, iteration: 234645
loss: 1.0077095031738281,grad_norm: 0.999999069070413, iteration: 234646
loss: 1.0644280910491943,grad_norm: 0.9999990854195148, iteration: 234647
loss: 1.0605366230010986,grad_norm: 0.9999992712595244, iteration: 234648
loss: 1.007968544960022,grad_norm: 0.9999997809848604, iteration: 234649
loss: 1.0348284244537354,grad_norm: 0.9999994717687001, iteration: 234650
loss: 0.9891011118888855,grad_norm: 0.8627891267260684, iteration: 234651
loss: 0.9927425980567932,grad_norm: 0.8896615149218808, iteration: 234652
loss: 0.9920355677604675,grad_norm: 0.9564692019545228, iteration: 234653
loss: 1.028300166130066,grad_norm: 0.9999991631273267, iteration: 234654
loss: 1.0389351844787598,grad_norm: 0.8148042332065014, iteration: 234655
loss: 0.9709847569465637,grad_norm: 0.99639780256208, iteration: 234656
loss: 0.9938490986824036,grad_norm: 0.9173853925389519, iteration: 234657
loss: 1.0071192979812622,grad_norm: 0.9999991121067118, iteration: 234658
loss: 1.0111985206604004,grad_norm: 0.9098360012268106, iteration: 234659
loss: 1.131583333015442,grad_norm: 0.9999996489783447, iteration: 234660
loss: 1.037566900253296,grad_norm: 0.9999993275895817, iteration: 234661
loss: 1.11825430393219,grad_norm: 0.9999993811094461, iteration: 234662
loss: 0.9739024639129639,grad_norm: 0.9672575045019177, iteration: 234663
loss: 0.969294548034668,grad_norm: 0.9999992305467138, iteration: 234664
loss: 1.0251656770706177,grad_norm: 0.9402724199092951, iteration: 234665
loss: 1.0120447874069214,grad_norm: 0.9134572051185024, iteration: 234666
loss: 1.0120387077331543,grad_norm: 0.8055587096165657, iteration: 234667
loss: 1.0139645338058472,grad_norm: 0.9999993025720855, iteration: 234668
loss: 1.029619574546814,grad_norm: 1.0000000015752988, iteration: 234669
loss: 1.0312080383300781,grad_norm: 0.9222870290587448, iteration: 234670
loss: 1.0246210098266602,grad_norm: 0.9546146960025133, iteration: 234671
loss: 0.9811860918998718,grad_norm: 0.8613328300103473, iteration: 234672
loss: 0.9809322357177734,grad_norm: 0.9999990811496933, iteration: 234673
loss: 1.218482494354248,grad_norm: 0.9999998108262282, iteration: 234674
loss: 0.9941392540931702,grad_norm: 0.8114538251655355, iteration: 234675
loss: 0.9818317890167236,grad_norm: 0.9999992341488698, iteration: 234676
loss: 0.9909518957138062,grad_norm: 0.9999991987332968, iteration: 234677
loss: 1.0072603225708008,grad_norm: 0.99999919010437, iteration: 234678
loss: 1.007926344871521,grad_norm: 0.9999993901297592, iteration: 234679
loss: 0.9645382165908813,grad_norm: 0.8946000411493465, iteration: 234680
loss: 1.0182496309280396,grad_norm: 0.9999998741227553, iteration: 234681
loss: 1.006011724472046,grad_norm: 0.910482556077641, iteration: 234682
loss: 0.9854689240455627,grad_norm: 0.9024104828009274, iteration: 234683
loss: 1.0295500755310059,grad_norm: 0.9999990524750189, iteration: 234684
loss: 1.068278193473816,grad_norm: 0.9999991119455489, iteration: 234685
loss: 1.139365553855896,grad_norm: 0.9999995256421176, iteration: 234686
loss: 1.0010476112365723,grad_norm: 0.9299532409194752, iteration: 234687
loss: 1.0824334621429443,grad_norm: 0.999999661151179, iteration: 234688
loss: 0.9681435823440552,grad_norm: 0.8800598024539172, iteration: 234689
loss: 1.0056997537612915,grad_norm: 0.9997464915690017, iteration: 234690
loss: 0.956513524055481,grad_norm: 0.999999805733207, iteration: 234691
loss: 0.969340980052948,grad_norm: 0.9533440118737361, iteration: 234692
loss: 1.0726101398468018,grad_norm: 0.9999990719952324, iteration: 234693
loss: 1.0250442028045654,grad_norm: 0.8430875402755883, iteration: 234694
loss: 0.9774160385131836,grad_norm: 0.9077912508569671, iteration: 234695
loss: 1.0171014070510864,grad_norm: 0.9147598111300101, iteration: 234696
loss: 1.1068819761276245,grad_norm: 0.999999689745306, iteration: 234697
loss: 1.07669997215271,grad_norm: 0.9999999431854878, iteration: 234698
loss: 1.0188794136047363,grad_norm: 0.8276971548278381, iteration: 234699
loss: 1.089088797569275,grad_norm: 0.9999998390946209, iteration: 234700
loss: 0.9765520095825195,grad_norm: 0.9999989528135379, iteration: 234701
loss: 0.998988151550293,grad_norm: 0.9374586379555909, iteration: 234702
loss: 0.986107587814331,grad_norm: 0.8286701241040813, iteration: 234703
loss: 1.066493034362793,grad_norm: 0.9999991125884602, iteration: 234704
loss: 0.9952900409698486,grad_norm: 0.9152528895270132, iteration: 234705
loss: 0.9775363802909851,grad_norm: 0.8749657819671345, iteration: 234706
loss: 1.0001050233840942,grad_norm: 0.9999991255437781, iteration: 234707
loss: 1.0159885883331299,grad_norm: 0.9923672131598325, iteration: 234708
loss: 1.1378275156021118,grad_norm: 0.9999998243516008, iteration: 234709
loss: 1.027268886566162,grad_norm: 0.744523963001507, iteration: 234710
loss: 1.0505871772766113,grad_norm: 0.9506824757919327, iteration: 234711
loss: 0.98042893409729,grad_norm: 0.9423182384979901, iteration: 234712
loss: 0.9833998084068298,grad_norm: 0.9350000336763267, iteration: 234713
loss: 1.09822678565979,grad_norm: 0.9999992135030917, iteration: 234714
loss: 1.090829610824585,grad_norm: 0.9999996476548397, iteration: 234715
loss: 0.9922126531600952,grad_norm: 0.8294887780323106, iteration: 234716
loss: 1.0467242002487183,grad_norm: 0.9659337734557258, iteration: 234717
loss: 1.0171716213226318,grad_norm: 0.9999992942754211, iteration: 234718
loss: 1.010145664215088,grad_norm: 0.8889092505070082, iteration: 234719
loss: 1.0014163255691528,grad_norm: 0.9999993235113713, iteration: 234720
loss: 1.0104482173919678,grad_norm: 0.9999997624316036, iteration: 234721
loss: 1.0483351945877075,grad_norm: 0.9999992218310912, iteration: 234722
loss: 1.0554710626602173,grad_norm: 0.9999997088148616, iteration: 234723
loss: 1.0120582580566406,grad_norm: 0.8721642977968062, iteration: 234724
loss: 1.035210132598877,grad_norm: 0.9470870825135597, iteration: 234725
loss: 0.9989005923271179,grad_norm: 0.7967331545655255, iteration: 234726
loss: 1.076588749885559,grad_norm: 0.999999109391259, iteration: 234727
loss: 1.036947250366211,grad_norm: 0.8014413368290617, iteration: 234728
loss: 0.9808085560798645,grad_norm: 0.9999996959348625, iteration: 234729
loss: 1.0008881092071533,grad_norm: 0.9050167245424879, iteration: 234730
loss: 1.0723631381988525,grad_norm: 0.9999999342732945, iteration: 234731
loss: 1.0254086256027222,grad_norm: 0.8901440913632565, iteration: 234732
loss: 1.005696415901184,grad_norm: 0.915212953666091, iteration: 234733
loss: 0.986872136592865,grad_norm: 0.8786036920848175, iteration: 234734
loss: 1.013820767402649,grad_norm: 0.9999990949320756, iteration: 234735
loss: 0.974942684173584,grad_norm: 0.8829106716049584, iteration: 234736
loss: 0.9998664259910583,grad_norm: 0.8386267672730405, iteration: 234737
loss: 1.0052995681762695,grad_norm: 0.9999990693825241, iteration: 234738
loss: 0.9839789867401123,grad_norm: 0.9999991560555854, iteration: 234739
loss: 1.0947108268737793,grad_norm: 0.9999991396649297, iteration: 234740
loss: 0.9960172176361084,grad_norm: 0.7912044116238761, iteration: 234741
loss: 1.0177005529403687,grad_norm: 0.9999995219285402, iteration: 234742
loss: 0.9862978458404541,grad_norm: 0.9937705588950452, iteration: 234743
loss: 0.9790865778923035,grad_norm: 0.9999990827248258, iteration: 234744
loss: 1.0114394426345825,grad_norm: 0.7968143208748364, iteration: 234745
loss: 0.9835654497146606,grad_norm: 0.9424325238134477, iteration: 234746
loss: 1.0113391876220703,grad_norm: 0.9999998740001467, iteration: 234747
loss: 1.0456136465072632,grad_norm: 0.9999992391659678, iteration: 234748
loss: 1.0313279628753662,grad_norm: 0.7920581313329917, iteration: 234749
loss: 1.0974321365356445,grad_norm: 0.9999990619591873, iteration: 234750
loss: 1.0036240816116333,grad_norm: 0.8229056454739347, iteration: 234751
loss: 1.017645001411438,grad_norm: 0.999999076349853, iteration: 234752
loss: 1.1449991464614868,grad_norm: 0.999999903570538, iteration: 234753
loss: 0.9856595396995544,grad_norm: 0.9999991664069549, iteration: 234754
loss: 1.1423988342285156,grad_norm: 0.9999994857897507, iteration: 234755
loss: 0.9981436133384705,grad_norm: 0.8484079169876632, iteration: 234756
loss: 1.073720932006836,grad_norm: 0.9999996078575064, iteration: 234757
loss: 1.0041275024414062,grad_norm: 0.9999990612189074, iteration: 234758
loss: 0.9967133402824402,grad_norm: 0.8723963383251516, iteration: 234759
loss: 0.9822297096252441,grad_norm: 0.9706818482461207, iteration: 234760
loss: 0.9934601783752441,grad_norm: 0.9999991152525599, iteration: 234761
loss: 1.0914051532745361,grad_norm: 0.9999994787884929, iteration: 234762
loss: 1.009395718574524,grad_norm: 0.9952035625268743, iteration: 234763
loss: 0.9882280230522156,grad_norm: 0.926809037273115, iteration: 234764
loss: 1.041260838508606,grad_norm: 0.9999989997070502, iteration: 234765
loss: 0.9978444576263428,grad_norm: 0.9999991658424373, iteration: 234766
loss: 1.0119922161102295,grad_norm: 0.9999991249879436, iteration: 234767
loss: 1.000712275505066,grad_norm: 0.9999990667424695, iteration: 234768
loss: 1.0101100206375122,grad_norm: 0.9999991408836543, iteration: 234769
loss: 1.0497465133666992,grad_norm: 0.9999997132344137, iteration: 234770
loss: 1.017661452293396,grad_norm: 0.999999553371455, iteration: 234771
loss: 0.9743894338607788,grad_norm: 0.99999948521113, iteration: 234772
loss: 1.003766655921936,grad_norm: 0.8732348467164762, iteration: 234773
loss: 1.0039535760879517,grad_norm: 0.9999990662635401, iteration: 234774
loss: 1.0400744676589966,grad_norm: 0.999999143464866, iteration: 234775
loss: 1.0676294565200806,grad_norm: 0.9555173010611319, iteration: 234776
loss: 1.0219945907592773,grad_norm: 0.9999991664082181, iteration: 234777
loss: 0.9590734839439392,grad_norm: 0.9362109556454554, iteration: 234778
loss: 1.0062426328659058,grad_norm: 0.9092076880576347, iteration: 234779
loss: 1.01728355884552,grad_norm: 0.80647651511183, iteration: 234780
loss: 1.013128638267517,grad_norm: 0.9999999709217644, iteration: 234781
loss: 0.9791321754455566,grad_norm: 0.8873879867060245, iteration: 234782
loss: 1.0679771900177002,grad_norm: 0.9999991551701592, iteration: 234783
loss: 0.9764508605003357,grad_norm: 0.9276744905499678, iteration: 234784
loss: 1.0258796215057373,grad_norm: 0.9778800299224283, iteration: 234785
loss: 1.05728018283844,grad_norm: 0.9999996261161935, iteration: 234786
loss: 1.0266599655151367,grad_norm: 0.9321207381544364, iteration: 234787
loss: 1.0785928964614868,grad_norm: 0.9999991865887474, iteration: 234788
loss: 1.0449668169021606,grad_norm: 0.883626910163277, iteration: 234789
loss: 1.0149213075637817,grad_norm: 0.999999877609815, iteration: 234790
loss: 1.0424554347991943,grad_norm: 0.9999996692070624, iteration: 234791
loss: 1.1039897203445435,grad_norm: 0.9999991780536531, iteration: 234792
loss: 0.9754041433334351,grad_norm: 0.9999991025037144, iteration: 234793
loss: 1.003208875656128,grad_norm: 0.9999990021075358, iteration: 234794
loss: 0.9855555295944214,grad_norm: 0.9999991602622609, iteration: 234795
loss: 0.9705602526664734,grad_norm: 0.9758871485948338, iteration: 234796
loss: 0.9647329449653625,grad_norm: 0.9999993443358277, iteration: 234797
loss: 0.9515072107315063,grad_norm: 0.9999991477984798, iteration: 234798
loss: 0.9981974363327026,grad_norm: 0.9999995002217381, iteration: 234799
loss: 1.0289483070373535,grad_norm: 0.9999991455712124, iteration: 234800
loss: 0.9882627129554749,grad_norm: 0.9613829583686285, iteration: 234801
loss: 1.0333855152130127,grad_norm: 0.9471878976803284, iteration: 234802
loss: 1.032319188117981,grad_norm: 0.999999055064496, iteration: 234803
loss: 1.0124751329421997,grad_norm: 0.999999522057187, iteration: 234804
loss: 1.0289018154144287,grad_norm: 0.9999992898941196, iteration: 234805
loss: 0.9794533252716064,grad_norm: 0.7536886654657795, iteration: 234806
loss: 1.0161056518554688,grad_norm: 0.82571803771683, iteration: 234807
loss: 1.0334501266479492,grad_norm: 0.9999990090137572, iteration: 234808
loss: 1.0205026865005493,grad_norm: 0.9999994770669071, iteration: 234809
loss: 0.9768993258476257,grad_norm: 0.770429909585713, iteration: 234810
loss: 1.048903465270996,grad_norm: 0.9999996921235821, iteration: 234811
loss: 0.9874176383018494,grad_norm: 0.907610263957389, iteration: 234812
loss: 1.0206644535064697,grad_norm: 0.9999992020813026, iteration: 234813
loss: 1.0182490348815918,grad_norm: 0.9999997885185775, iteration: 234814
loss: 0.9952185153961182,grad_norm: 0.8902824727674086, iteration: 234815
loss: 0.9937120676040649,grad_norm: 0.9290172131476563, iteration: 234816
loss: 1.021759033203125,grad_norm: 0.9999990282161468, iteration: 234817
loss: 0.9390477538108826,grad_norm: 0.9320786863969849, iteration: 234818
loss: 1.0444194078445435,grad_norm: 0.9999999620732611, iteration: 234819
loss: 1.0265662670135498,grad_norm: 0.9260465934394546, iteration: 234820
loss: 1.029899001121521,grad_norm: 0.9999991675583884, iteration: 234821
loss: 1.0359622240066528,grad_norm: 0.9999997424747984, iteration: 234822
loss: 1.020813226699829,grad_norm: 0.7798648858883594, iteration: 234823
loss: 1.0163520574569702,grad_norm: 0.9999993666355882, iteration: 234824
loss: 1.199751377105713,grad_norm: 0.9999997052096806, iteration: 234825
loss: 1.013559103012085,grad_norm: 0.9999994740984391, iteration: 234826
loss: 1.0072035789489746,grad_norm: 0.9489750068761792, iteration: 234827
loss: 1.0492968559265137,grad_norm: 0.99999976895314, iteration: 234828
loss: 0.972390353679657,grad_norm: 0.9012063275791719, iteration: 234829
loss: 1.0290727615356445,grad_norm: 0.9818403418297262, iteration: 234830
loss: 0.9876487255096436,grad_norm: 0.8185325723988447, iteration: 234831
loss: 1.0153758525848389,grad_norm: 0.9999995850217513, iteration: 234832
loss: 0.9846808910369873,grad_norm: 0.9999993897032124, iteration: 234833
loss: 1.0309730768203735,grad_norm: 0.9999990342836383, iteration: 234834
loss: 1.0227901935577393,grad_norm: 0.9204412456375232, iteration: 234835
loss: 1.0251027345657349,grad_norm: 0.9404656040836057, iteration: 234836
loss: 1.0686702728271484,grad_norm: 0.9999997579773625, iteration: 234837
loss: 1.0458652973175049,grad_norm: 0.9999997079166572, iteration: 234838
loss: 1.0124948024749756,grad_norm: 0.999999811219226, iteration: 234839
loss: 1.0573481321334839,grad_norm: 0.9703179963480627, iteration: 234840
loss: 0.9938787817955017,grad_norm: 0.8885764744460478, iteration: 234841
loss: 1.0064154863357544,grad_norm: 0.810047191921347, iteration: 234842
loss: 1.0949565172195435,grad_norm: 0.862698989036468, iteration: 234843
loss: 0.9895543456077576,grad_norm: 0.9999990657693909, iteration: 234844
loss: 1.0161008834838867,grad_norm: 0.9999991094294045, iteration: 234845
loss: 1.001441240310669,grad_norm: 0.9999989596932517, iteration: 234846
loss: 1.0198780298233032,grad_norm: 0.9999997930182644, iteration: 234847
loss: 1.0368390083312988,grad_norm: 0.9999991620917367, iteration: 234848
loss: 1.0759272575378418,grad_norm: 1.0000001259915317, iteration: 234849
loss: 1.0272424221038818,grad_norm: 0.8021975271059848, iteration: 234850
loss: 1.0247915983200073,grad_norm: 0.9694088093883908, iteration: 234851
loss: 1.001368522644043,grad_norm: 0.9906762939258402, iteration: 234852
loss: 1.026745319366455,grad_norm: 0.8895884867014388, iteration: 234853
loss: 0.994769275188446,grad_norm: 0.8954149780481331, iteration: 234854
loss: 1.0409224033355713,grad_norm: 0.9999992153562963, iteration: 234855
loss: 1.02601957321167,grad_norm: 0.9999990874733642, iteration: 234856
loss: 0.9669949412345886,grad_norm: 0.8523567635466731, iteration: 234857
loss: 1.0196534395217896,grad_norm: 0.8670007929462276, iteration: 234858
loss: 1.0324456691741943,grad_norm: 0.9669971301663322, iteration: 234859
loss: 1.0021724700927734,grad_norm: 0.9999998380838809, iteration: 234860
loss: 1.0094407796859741,grad_norm: 0.8970502506837921, iteration: 234861
loss: 1.0672955513000488,grad_norm: 0.9999994840242741, iteration: 234862
loss: 0.9665737748146057,grad_norm: 0.9999997295481324, iteration: 234863
loss: 1.012352466583252,grad_norm: 0.9999991866748156, iteration: 234864
loss: 0.9787545800209045,grad_norm: 0.9999991267118205, iteration: 234865
loss: 1.0557279586791992,grad_norm: 0.9999990583809215, iteration: 234866
loss: 1.03653883934021,grad_norm: 0.9999998736214118, iteration: 234867
loss: 1.0610226392745972,grad_norm: 0.9999991222297187, iteration: 234868
loss: 1.0063565969467163,grad_norm: 0.8646644123818233, iteration: 234869
loss: 1.0194536447525024,grad_norm: 0.9559144586959505, iteration: 234870
loss: 1.0716224908828735,grad_norm: 0.9999992723721612, iteration: 234871
loss: 1.0081511735916138,grad_norm: 0.8605140487615922, iteration: 234872
loss: 0.9866239428520203,grad_norm: 0.9443953160791271, iteration: 234873
loss: 0.9888913035392761,grad_norm: 0.884015498407307, iteration: 234874
loss: 1.035287618637085,grad_norm: 0.999999156705836, iteration: 234875
loss: 1.0107277631759644,grad_norm: 0.9999992153346735, iteration: 234876
loss: 1.040924310684204,grad_norm: 0.9999991587253694, iteration: 234877
loss: 0.9735032916069031,grad_norm: 0.9999990483049646, iteration: 234878
loss: 1.0180233716964722,grad_norm: 0.9999995636284583, iteration: 234879
loss: 0.9698998928070068,grad_norm: 0.9765133605810585, iteration: 234880
loss: 0.9939223527908325,grad_norm: 0.9999992672452003, iteration: 234881
loss: 1.0046982765197754,grad_norm: 0.9264584325934462, iteration: 234882
loss: 1.023411750793457,grad_norm: 0.8436381192289345, iteration: 234883
loss: 1.0042310953140259,grad_norm: 0.999999024990823, iteration: 234884
loss: 1.0085458755493164,grad_norm: 0.8645947787506, iteration: 234885
loss: 1.0497466325759888,grad_norm: 0.9999991055656113, iteration: 234886
loss: 1.0137568712234497,grad_norm: 0.999999046186623, iteration: 234887
loss: 1.1068646907806396,grad_norm: 0.999999671357725, iteration: 234888
loss: 1.0213134288787842,grad_norm: 0.8184871647805835, iteration: 234889
loss: 0.9645112156867981,grad_norm: 0.8361088833107333, iteration: 234890
loss: 1.0158236026763916,grad_norm: 0.9828750393362575, iteration: 234891
loss: 1.009015440940857,grad_norm: 0.9362441095543277, iteration: 234892
loss: 1.0019621849060059,grad_norm: 0.8009474897429537, iteration: 234893
loss: 0.9820451140403748,grad_norm: 0.8591340633304988, iteration: 234894
loss: 0.9615849256515503,grad_norm: 0.9862578060415221, iteration: 234895
loss: 1.0778799057006836,grad_norm: 0.9999998061243288, iteration: 234896
loss: 0.9791590571403503,grad_norm: 0.7669665316557089, iteration: 234897
loss: 1.0302897691726685,grad_norm: 0.9999998823464448, iteration: 234898
loss: 1.019063949584961,grad_norm: 0.99999894503659, iteration: 234899
loss: 0.9926219582557678,grad_norm: 0.9332112894951824, iteration: 234900
loss: 0.9689657688140869,grad_norm: 0.9999991089505136, iteration: 234901
loss: 1.0004510879516602,grad_norm: 0.9666212479550239, iteration: 234902
loss: 1.0208691358566284,grad_norm: 0.999999168956008, iteration: 234903
loss: 1.016567587852478,grad_norm: 0.9999997459059896, iteration: 234904
loss: 1.0523669719696045,grad_norm: 0.9999998869835207, iteration: 234905
loss: 1.0427066087722778,grad_norm: 0.9999993733347139, iteration: 234906
loss: 1.0182485580444336,grad_norm: 0.9999992328909266, iteration: 234907
loss: 1.0580586194992065,grad_norm: 0.9999994227117048, iteration: 234908
loss: 1.03432035446167,grad_norm: 0.9999990645465534, iteration: 234909
loss: 1.0250861644744873,grad_norm: 0.999999336431014, iteration: 234910
loss: 1.0056805610656738,grad_norm: 0.7479717668423178, iteration: 234911
loss: 1.0685979127883911,grad_norm: 0.9999996558738004, iteration: 234912
loss: 1.0093942880630493,grad_norm: 0.8799112898761938, iteration: 234913
loss: 1.0572967529296875,grad_norm: 0.9999994116778963, iteration: 234914
loss: 1.0416219234466553,grad_norm: 0.9774999677527438, iteration: 234915
loss: 0.982280969619751,grad_norm: 0.8957332645749557, iteration: 234916
loss: 0.9874092936515808,grad_norm: 0.9211542805042453, iteration: 234917
loss: 1.0174106359481812,grad_norm: 0.8603335853724016, iteration: 234918
loss: 1.0192217826843262,grad_norm: 0.9999997468743063, iteration: 234919
loss: 1.0662589073181152,grad_norm: 0.9999997868717734, iteration: 234920
loss: 1.0142667293548584,grad_norm: 0.9999991574044323, iteration: 234921
loss: 0.9681242108345032,grad_norm: 0.9824397520993697, iteration: 234922
loss: 1.0256710052490234,grad_norm: 0.9999999160043895, iteration: 234923
loss: 1.0361002683639526,grad_norm: 0.9999991895882523, iteration: 234924
loss: 1.0341449975967407,grad_norm: 0.9999990874840278, iteration: 234925
loss: 0.9876888990402222,grad_norm: 0.9812902288732711, iteration: 234926
loss: 1.0320805311203003,grad_norm: 0.8447858706315223, iteration: 234927
loss: 1.0044835805892944,grad_norm: 0.9999992299173245, iteration: 234928
loss: 1.0069423913955688,grad_norm: 0.9238527329661106, iteration: 234929
loss: 1.0082305669784546,grad_norm: 0.8139151476479489, iteration: 234930
loss: 1.0250791311264038,grad_norm: 0.6782115318414398, iteration: 234931
loss: 1.1589990854263306,grad_norm: 1.0000000603213834, iteration: 234932
loss: 0.9689599871635437,grad_norm: 0.989726529709333, iteration: 234933
loss: 1.0453461408615112,grad_norm: 0.7922990096541276, iteration: 234934
loss: 1.0586833953857422,grad_norm: 0.800690805753802, iteration: 234935
loss: 1.0219627618789673,grad_norm: 0.9999996465167637, iteration: 234936
loss: 1.018444538116455,grad_norm: 0.9184248137578964, iteration: 234937
loss: 0.9801499843597412,grad_norm: 0.836289433521484, iteration: 234938
loss: 1.028336524963379,grad_norm: 0.9999999238941888, iteration: 234939
loss: 1.0674197673797607,grad_norm: 0.999999683917511, iteration: 234940
loss: 1.0090898275375366,grad_norm: 0.9212635524996895, iteration: 234941
loss: 1.012814998626709,grad_norm: 0.9999991932774566, iteration: 234942
loss: 1.0003641843795776,grad_norm: 0.9559581896169996, iteration: 234943
loss: 0.9894487857818604,grad_norm: 0.9566269964060046, iteration: 234944
loss: 0.9803084135055542,grad_norm: 0.9254287340467459, iteration: 234945
loss: 0.9955223798751831,grad_norm: 0.8971901840552089, iteration: 234946
loss: 1.0278868675231934,grad_norm: 0.9999989623509317, iteration: 234947
loss: 1.0260603427886963,grad_norm: 0.9330252805143615, iteration: 234948
loss: 1.0277527570724487,grad_norm: 0.9190986522086682, iteration: 234949
loss: 1.0462379455566406,grad_norm: 0.8371010605629268, iteration: 234950
loss: 1.0107061862945557,grad_norm: 0.8502553374778066, iteration: 234951
loss: 1.0140618085861206,grad_norm: 0.9999990349552673, iteration: 234952
loss: 0.9740095138549805,grad_norm: 0.9873885183957614, iteration: 234953
loss: 0.9917135834693909,grad_norm: 0.9999992221066407, iteration: 234954
loss: 1.0070810317993164,grad_norm: 0.9254376892458251, iteration: 234955
loss: 1.011812686920166,grad_norm: 0.7104652782562207, iteration: 234956
loss: 0.9760920405387878,grad_norm: 0.9999991272870168, iteration: 234957
loss: 1.0018271207809448,grad_norm: 0.9999991887989563, iteration: 234958
loss: 0.9940086007118225,grad_norm: 0.9999996605962707, iteration: 234959
loss: 1.2070295810699463,grad_norm: 0.9999997206696103, iteration: 234960
loss: 1.0129485130310059,grad_norm: 0.8339388585347925, iteration: 234961
loss: 0.9867509603500366,grad_norm: 0.9999991446002342, iteration: 234962
loss: 1.1605045795440674,grad_norm: 0.9999992137365958, iteration: 234963
loss: 0.9976858496665955,grad_norm: 0.7810942412516517, iteration: 234964
loss: 1.0202208757400513,grad_norm: 0.999999688577574, iteration: 234965
loss: 1.0518180131912231,grad_norm: 0.9999995559340643, iteration: 234966
loss: 0.9920659065246582,grad_norm: 0.8225028788353601, iteration: 234967
loss: 1.155502438545227,grad_norm: 0.9999995330148138, iteration: 234968
loss: 1.0051349401474,grad_norm: 0.8976314976282612, iteration: 234969
loss: 1.004783272743225,grad_norm: 0.8433581182701679, iteration: 234970
loss: 0.9984744787216187,grad_norm: 0.9999997005330254, iteration: 234971
loss: 1.0362658500671387,grad_norm: 0.9999997900043885, iteration: 234972
loss: 0.9848880767822266,grad_norm: 0.8945928809213766, iteration: 234973
loss: 1.026146411895752,grad_norm: 0.9999991790967374, iteration: 234974
loss: 0.9829476475715637,grad_norm: 0.9999990411387155, iteration: 234975
loss: 1.0041964054107666,grad_norm: 0.8105775434262567, iteration: 234976
loss: 0.9965037703514099,grad_norm: 0.9094651560019834, iteration: 234977
loss: 1.0781556367874146,grad_norm: 0.9369418280801266, iteration: 234978
loss: 1.0051299333572388,grad_norm: 0.8366562865927388, iteration: 234979
loss: 1.0354429483413696,grad_norm: 0.999999400626852, iteration: 234980
loss: 0.9713255763053894,grad_norm: 0.9340134059323146, iteration: 234981
loss: 1.0023952722549438,grad_norm: 0.7971635826498539, iteration: 234982
loss: 1.0466036796569824,grad_norm: 0.999999225082601, iteration: 234983
loss: 0.9821279048919678,grad_norm: 0.9999991791830016, iteration: 234984
loss: 1.1169878244400024,grad_norm: 0.9999993761187076, iteration: 234985
loss: 0.9970144629478455,grad_norm: 0.9999996522028498, iteration: 234986
loss: 0.9481699466705322,grad_norm: 0.9999998105940174, iteration: 234987
loss: 1.0463873147964478,grad_norm: 0.9999998730420027, iteration: 234988
loss: 0.9836501479148865,grad_norm: 0.9207242310770134, iteration: 234989
loss: 1.0059654712677002,grad_norm: 0.8905049643069493, iteration: 234990
loss: 1.0178468227386475,grad_norm: 0.9999992013256654, iteration: 234991
loss: 1.016296148300171,grad_norm: 0.9999994116683196, iteration: 234992
loss: 1.0492908954620361,grad_norm: 0.9560100179601211, iteration: 234993
loss: 0.9902232885360718,grad_norm: 0.9252192413542424, iteration: 234994
loss: 1.0226081609725952,grad_norm: 0.999999002440617, iteration: 234995
loss: 1.0114543437957764,grad_norm: 0.8977973199738132, iteration: 234996
loss: 1.044472098350525,grad_norm: 0.9473577776418866, iteration: 234997
loss: 0.9959869384765625,grad_norm: 0.9999993092609513, iteration: 234998
loss: 1.0198978185653687,grad_norm: 0.8177034179253162, iteration: 234999
loss: 0.9967830777168274,grad_norm: 0.8639938524793501, iteration: 235000
loss: 0.9969795346260071,grad_norm: 0.9999992795271521, iteration: 235001
loss: 0.997115433216095,grad_norm: 0.8559662688609441, iteration: 235002
loss: 1.0513434410095215,grad_norm: 0.9999996800534976, iteration: 235003
loss: 0.9909372925758362,grad_norm: 0.9981673340916425, iteration: 235004
loss: 1.0258203744888306,grad_norm: 0.9999993509403312, iteration: 235005
loss: 1.0647283792495728,grad_norm: 0.999999546339858, iteration: 235006
loss: 0.9766889214515686,grad_norm: 0.8304197534376998, iteration: 235007
loss: 0.9987790584564209,grad_norm: 0.9999994323251625, iteration: 235008
loss: 1.003949522972107,grad_norm: 0.9999993137320559, iteration: 235009
loss: 1.0012964010238647,grad_norm: 0.9681851247817932, iteration: 235010
loss: 0.9801153540611267,grad_norm: 0.9999994440461207, iteration: 235011
loss: 1.135728359222412,grad_norm: 0.9999999286151992, iteration: 235012
loss: 1.015059232711792,grad_norm: 0.999999516089148, iteration: 235013
loss: 1.0203890800476074,grad_norm: 0.8547291316273876, iteration: 235014
loss: 0.9807773232460022,grad_norm: 0.9999991738213195, iteration: 235015
loss: 1.0179479122161865,grad_norm: 0.9704621940264064, iteration: 235016
loss: 0.9966093897819519,grad_norm: 0.9999990790838861, iteration: 235017
loss: 0.9976834058761597,grad_norm: 0.9999991865154063, iteration: 235018
loss: 1.0172226428985596,grad_norm: 0.9999991006198028, iteration: 235019
loss: 1.0138189792633057,grad_norm: 0.8789290858949842, iteration: 235020
loss: 1.002393126487732,grad_norm: 0.8909853475695706, iteration: 235021
loss: 0.9712822437286377,grad_norm: 0.929931624065276, iteration: 235022
loss: 1.0255107879638672,grad_norm: 0.999999082662255, iteration: 235023
loss: 1.0618391036987305,grad_norm: 0.9999997971124501, iteration: 235024
loss: 0.9762898087501526,grad_norm: 0.9999990213427521, iteration: 235025
loss: 1.018211841583252,grad_norm: 0.9999994798311505, iteration: 235026
loss: 0.9731180667877197,grad_norm: 0.9215394712531303, iteration: 235027
loss: 1.2183812856674194,grad_norm: 0.999999833799719, iteration: 235028
loss: 0.9986257553100586,grad_norm: 0.9999989900858618, iteration: 235029
loss: 0.9987182021141052,grad_norm: 0.8016110636007513, iteration: 235030
loss: 0.9442910552024841,grad_norm: 0.8053424979849658, iteration: 235031
loss: 0.9928871393203735,grad_norm: 0.8450987998610863, iteration: 235032
loss: 0.9810085892677307,grad_norm: 0.7793742837169335, iteration: 235033
loss: 1.021292805671692,grad_norm: 0.9245393395069047, iteration: 235034
loss: 0.9741763472557068,grad_norm: 0.864420949557627, iteration: 235035
loss: 1.0962793827056885,grad_norm: 0.9999995567264465, iteration: 235036
loss: 0.9796175956726074,grad_norm: 0.9999995071161842, iteration: 235037
loss: 1.0032527446746826,grad_norm: 0.9999990495442664, iteration: 235038
loss: 1.011672854423523,grad_norm: 0.8796680027549333, iteration: 235039
loss: 1.000289797782898,grad_norm: 0.9010836866804652, iteration: 235040
loss: 0.9800611734390259,grad_norm: 0.9999989785702235, iteration: 235041
loss: 0.9452037811279297,grad_norm: 0.9999992807944699, iteration: 235042
loss: 0.9339305758476257,grad_norm: 0.999999198640976, iteration: 235043
loss: 1.0092709064483643,grad_norm: 0.8889895560104963, iteration: 235044
loss: 0.9953513741493225,grad_norm: 0.8539399352953836, iteration: 235045
loss: 0.9986004829406738,grad_norm: 0.9999990436530829, iteration: 235046
loss: 1.0409281253814697,grad_norm: 0.9999991910137621, iteration: 235047
loss: 0.9767802953720093,grad_norm: 0.8482589102553748, iteration: 235048
loss: 0.9956235885620117,grad_norm: 0.9999989719498538, iteration: 235049
loss: 1.011074185371399,grad_norm: 0.7479839592247312, iteration: 235050
loss: 1.0385798215866089,grad_norm: 0.9999990900756157, iteration: 235051
loss: 1.0450799465179443,grad_norm: 0.9999995367042207, iteration: 235052
loss: 1.0154341459274292,grad_norm: 0.9992371544028484, iteration: 235053
loss: 1.045000433921814,grad_norm: 0.9999994490551181, iteration: 235054
loss: 1.0098711252212524,grad_norm: 0.999999142822102, iteration: 235055
loss: 1.062545657157898,grad_norm: 0.948608616731748, iteration: 235056
loss: 1.0669103860855103,grad_norm: 0.9999994887969919, iteration: 235057
loss: 1.0111761093139648,grad_norm: 0.9999989525715874, iteration: 235058
loss: 1.014142394065857,grad_norm: 0.9999990341008111, iteration: 235059
loss: 0.96182781457901,grad_norm: 0.8707732385459069, iteration: 235060
loss: 0.9929051399230957,grad_norm: 0.9999991734396736, iteration: 235061
loss: 1.021010160446167,grad_norm: 0.9001357841346951, iteration: 235062
loss: 1.0127167701721191,grad_norm: 0.999999154293684, iteration: 235063
loss: 0.981260359287262,grad_norm: 0.8965766152190777, iteration: 235064
loss: 1.00664222240448,grad_norm: 0.893815480220795, iteration: 235065
loss: 0.9779108166694641,grad_norm: 0.9999990455660036, iteration: 235066
loss: 1.0613346099853516,grad_norm: 0.9999999409946354, iteration: 235067
loss: 1.0109813213348389,grad_norm: 0.9301643110350813, iteration: 235068
loss: 1.0155941247940063,grad_norm: 0.9240237711301866, iteration: 235069
loss: 1.04916512966156,grad_norm: 0.9609661168140986, iteration: 235070
loss: 1.0561782121658325,grad_norm: 0.9461854039153355, iteration: 235071
loss: 0.9860973358154297,grad_norm: 0.9822414359283987, iteration: 235072
loss: 1.0253294706344604,grad_norm: 0.9216097992872843, iteration: 235073
loss: 1.0262799263000488,grad_norm: 0.9471691893248613, iteration: 235074
loss: 1.0232815742492676,grad_norm: 0.9995795071523822, iteration: 235075
loss: 1.0053318738937378,grad_norm: 0.9237868246496945, iteration: 235076
loss: 0.9842186570167542,grad_norm: 0.9261679582237542, iteration: 235077
loss: 1.0046244859695435,grad_norm: 0.9560018539714698, iteration: 235078
loss: 1.148798942565918,grad_norm: 0.9999997537213174, iteration: 235079
loss: 1.0116995573043823,grad_norm: 0.8209808480049661, iteration: 235080
loss: 1.0446979999542236,grad_norm: 0.99999930217531, iteration: 235081
loss: 0.9697481393814087,grad_norm: 0.8337070489585212, iteration: 235082
loss: 0.9929304122924805,grad_norm: 0.9750134326094619, iteration: 235083
loss: 1.1143337488174438,grad_norm: 0.9999998846476476, iteration: 235084
loss: 1.023727536201477,grad_norm: 0.9999991673377338, iteration: 235085
loss: 1.145328402519226,grad_norm: 0.9999992737862636, iteration: 235086
loss: 1.1214960813522339,grad_norm: 0.9999994948790017, iteration: 235087
loss: 0.9943488240242004,grad_norm: 0.8628205087618576, iteration: 235088
loss: 0.9847331643104553,grad_norm: 0.999999216335476, iteration: 235089
loss: 1.0991531610488892,grad_norm: 0.9999993554693271, iteration: 235090
loss: 1.0496070384979248,grad_norm: 0.9999995259568799, iteration: 235091
loss: 0.9923936724662781,grad_norm: 0.9717781871974394, iteration: 235092
loss: 0.9913025498390198,grad_norm: 0.8539278516136333, iteration: 235093
loss: 1.0199639797210693,grad_norm: 0.9414874013108444, iteration: 235094
loss: 1.0157890319824219,grad_norm: 0.9999992671058633, iteration: 235095
loss: 1.0392584800720215,grad_norm: 0.99999906957504, iteration: 235096
loss: 0.988074541091919,grad_norm: 0.9651540540027043, iteration: 235097
loss: 1.130979061126709,grad_norm: 0.999999510841483, iteration: 235098
loss: 1.0088741779327393,grad_norm: 0.8945949858468542, iteration: 235099
loss: 0.9656782746315002,grad_norm: 0.9999994660947545, iteration: 235100
loss: 1.0242286920547485,grad_norm: 0.9999996738132759, iteration: 235101
loss: 0.9780394434928894,grad_norm: 0.8430287163683874, iteration: 235102
loss: 0.9692273139953613,grad_norm: 0.9999992730813819, iteration: 235103
loss: 0.9813603162765503,grad_norm: 0.9890604798485684, iteration: 235104
loss: 0.9730209112167358,grad_norm: 0.911227357253355, iteration: 235105
loss: 1.005452036857605,grad_norm: 0.9174374116514326, iteration: 235106
loss: 0.9736099243164062,grad_norm: 0.8719976546096614, iteration: 235107
loss: 0.9933967590332031,grad_norm: 0.9227397447451612, iteration: 235108
loss: 1.024664044380188,grad_norm: 0.999999521023642, iteration: 235109
loss: 1.0796778202056885,grad_norm: 0.9999992807501557, iteration: 235110
loss: 1.0039387941360474,grad_norm: 0.8746411389656799, iteration: 235111
loss: 1.0160194635391235,grad_norm: 0.962997895508152, iteration: 235112
loss: 0.9876624941825867,grad_norm: 0.9789753984657771, iteration: 235113
loss: 0.9927414655685425,grad_norm: 0.941373057479175, iteration: 235114
loss: 0.9811906218528748,grad_norm: 0.9999990887638435, iteration: 235115
loss: 0.9663062691688538,grad_norm: 0.8568119177359474, iteration: 235116
loss: 0.9981523752212524,grad_norm: 0.9965527751456538, iteration: 235117
loss: 1.0829514265060425,grad_norm: 0.8669202323594456, iteration: 235118
loss: 0.9981468319892883,grad_norm: 0.8990700891303719, iteration: 235119
loss: 1.032518744468689,grad_norm: 0.9140198022355797, iteration: 235120
loss: 0.9869330525398254,grad_norm: 0.916481364960415, iteration: 235121
loss: 0.9962745904922485,grad_norm: 0.9999994260694691, iteration: 235122
loss: 0.9643339514732361,grad_norm: 0.9999990975950948, iteration: 235123
loss: 1.0048102140426636,grad_norm: 0.9999989826967036, iteration: 235124
loss: 1.0782092809677124,grad_norm: 0.9999991044558694, iteration: 235125
loss: 1.03485107421875,grad_norm: 0.9999991856458316, iteration: 235126
loss: 1.0200079679489136,grad_norm: 0.9999999424034319, iteration: 235127
loss: 0.973557710647583,grad_norm: 0.9345703324487922, iteration: 235128
loss: 0.9840716123580933,grad_norm: 0.9900255217843076, iteration: 235129
loss: 0.9696145057678223,grad_norm: 0.8876258707426687, iteration: 235130
loss: 1.1424046754837036,grad_norm: 0.9999992154077335, iteration: 235131
loss: 1.0150506496429443,grad_norm: 0.8855443504163333, iteration: 235132
loss: 1.0659239292144775,grad_norm: 0.9999991101880569, iteration: 235133
loss: 1.0140135288238525,grad_norm: 0.8891187178185513, iteration: 235134
loss: 0.9924120306968689,grad_norm: 0.9999992024422046, iteration: 235135
loss: 0.9928547739982605,grad_norm: 0.9999991399953967, iteration: 235136
loss: 1.0148714780807495,grad_norm: 0.9999997595495411, iteration: 235137
loss: 1.0087815523147583,grad_norm: 0.9999995051385427, iteration: 235138
loss: 0.9943644404411316,grad_norm: 0.7457000771535417, iteration: 235139
loss: 1.006258249282837,grad_norm: 0.9511799861458047, iteration: 235140
loss: 1.012691617012024,grad_norm: 0.9881894853866219, iteration: 235141
loss: 1.1041607856750488,grad_norm: 0.9999993452127018, iteration: 235142
loss: 1.0241565704345703,grad_norm: 0.8700887855132374, iteration: 235143
loss: 1.0261311531066895,grad_norm: 0.9829921271431803, iteration: 235144
loss: 0.9874371290206909,grad_norm: 0.9999989417421796, iteration: 235145
loss: 1.005034327507019,grad_norm: 0.9747841601701409, iteration: 235146
loss: 0.9924968481063843,grad_norm: 0.9999989598252866, iteration: 235147
loss: 1.0219287872314453,grad_norm: 0.999999395708725, iteration: 235148
loss: 1.0127218961715698,grad_norm: 0.9040795204924448, iteration: 235149
loss: 1.0360535383224487,grad_norm: 0.9596191944051423, iteration: 235150
loss: 0.9982026219367981,grad_norm: 0.999999342022204, iteration: 235151
loss: 0.9984164834022522,grad_norm: 0.9999991727185961, iteration: 235152
loss: 1.0267634391784668,grad_norm: 0.9575488461699909, iteration: 235153
loss: 1.014968991279602,grad_norm: 0.87745866286906, iteration: 235154
loss: 1.0085687637329102,grad_norm: 0.999999522263507, iteration: 235155
loss: 1.01431143283844,grad_norm: 0.8335858149409313, iteration: 235156
loss: 1.0025534629821777,grad_norm: 0.8580648821941245, iteration: 235157
loss: 1.0068998336791992,grad_norm: 0.9092767147553137, iteration: 235158
loss: 0.9790318608283997,grad_norm: 0.9802466291674957, iteration: 235159
loss: 1.0158960819244385,grad_norm: 0.9999996621820864, iteration: 235160
loss: 0.998982310295105,grad_norm: 0.9999989579045092, iteration: 235161
loss: 0.9609253406524658,grad_norm: 0.9999991747043382, iteration: 235162
loss: 0.9974290132522583,grad_norm: 0.9999990429815115, iteration: 235163
loss: 0.9895448088645935,grad_norm: 0.9999990856133192, iteration: 235164
loss: 0.9908901453018188,grad_norm: 0.9999993006525809, iteration: 235165
loss: 1.013513445854187,grad_norm: 0.9467111829200643, iteration: 235166
loss: 0.9540556073188782,grad_norm: 0.846146931862734, iteration: 235167
loss: 0.9650971293449402,grad_norm: 0.8697383821899258, iteration: 235168
loss: 0.9875560402870178,grad_norm: 0.9236676234342349, iteration: 235169
loss: 0.9937862753868103,grad_norm: 0.8249343147099252, iteration: 235170
loss: 0.9699499011039734,grad_norm: 0.9999994272024905, iteration: 235171
loss: 1.0310271978378296,grad_norm: 0.9400811463132815, iteration: 235172
loss: 1.009547233581543,grad_norm: 0.8614890392580143, iteration: 235173
loss: 1.0650413036346436,grad_norm: 0.801867278068861, iteration: 235174
loss: 0.9994754195213318,grad_norm: 0.9999991653885623, iteration: 235175
loss: 0.9841247797012329,grad_norm: 0.9295005047410334, iteration: 235176
loss: 0.9513843059539795,grad_norm: 0.9335909690337244, iteration: 235177
loss: 0.9906818270683289,grad_norm: 0.8766324189581141, iteration: 235178
loss: 0.9576794505119324,grad_norm: 0.8056119778424594, iteration: 235179
loss: 1.0344856977462769,grad_norm: 0.9458463568249104, iteration: 235180
loss: 1.0240436792373657,grad_norm: 0.899489800073471, iteration: 235181
loss: 1.0159841775894165,grad_norm: 0.9999993034212245, iteration: 235182
loss: 0.9788535237312317,grad_norm: 0.9194468981271254, iteration: 235183
loss: 1.060213565826416,grad_norm: 0.9999994970841455, iteration: 235184
loss: 1.0065163373947144,grad_norm: 0.9999991463314701, iteration: 235185
loss: 1.0200164318084717,grad_norm: 0.9999999717965441, iteration: 235186
loss: 0.9656522870063782,grad_norm: 0.9999991483284854, iteration: 235187
loss: 0.9755648374557495,grad_norm: 0.884047967190057, iteration: 235188
loss: 1.1026955842971802,grad_norm: 0.776827758488476, iteration: 235189
loss: 0.9934240579605103,grad_norm: 0.9143365301631549, iteration: 235190
loss: 1.0003341436386108,grad_norm: 0.9999992911472342, iteration: 235191
loss: 1.0041812658309937,grad_norm: 0.9470500018262533, iteration: 235192
loss: 1.0065228939056396,grad_norm: 0.9826094783701624, iteration: 235193
loss: 0.9878022074699402,grad_norm: 0.8082804949589568, iteration: 235194
loss: 1.0459074974060059,grad_norm: 0.9999991222990834, iteration: 235195
loss: 0.9964852333068848,grad_norm: 0.9999991249060203, iteration: 235196
loss: 1.0207632780075073,grad_norm: 0.9999992204107307, iteration: 235197
loss: 1.0133967399597168,grad_norm: 0.9181148316677678, iteration: 235198
loss: 0.992927610874176,grad_norm: 0.9999991473791998, iteration: 235199
loss: 1.0026414394378662,grad_norm: 0.9999990057022162, iteration: 235200
loss: 1.0149587392807007,grad_norm: 0.7991515763718102, iteration: 235201
loss: 0.9770036935806274,grad_norm: 0.8832922859733819, iteration: 235202
loss: 0.9609883427619934,grad_norm: 0.9999996861494844, iteration: 235203
loss: 0.9676995873451233,grad_norm: 0.8955116605481306, iteration: 235204
loss: 0.9695415496826172,grad_norm: 0.9999989294628598, iteration: 235205
loss: 0.997832715511322,grad_norm: 0.9999993394938475, iteration: 235206
loss: 0.9766777753829956,grad_norm: 0.8177615876327404, iteration: 235207
loss: 1.0138143301010132,grad_norm: 0.9999992372476753, iteration: 235208
loss: 1.0039019584655762,grad_norm: 0.8849087902293122, iteration: 235209
loss: 1.0434784889221191,grad_norm: 0.9999995105550581, iteration: 235210
loss: 0.9619786143302917,grad_norm: 0.8413293778615333, iteration: 235211
loss: 1.144404411315918,grad_norm: 0.9999992810041752, iteration: 235212
loss: 0.994676947593689,grad_norm: 0.8362525266112248, iteration: 235213
loss: 1.0043339729309082,grad_norm: 0.9660565279459173, iteration: 235214
loss: 1.0189409255981445,grad_norm: 0.9999995574462801, iteration: 235215
loss: 1.0313528776168823,grad_norm: 0.9327118674231456, iteration: 235216
loss: 1.017316460609436,grad_norm: 0.7555994095403232, iteration: 235217
loss: 1.0134040117263794,grad_norm: 0.8141131388302992, iteration: 235218
loss: 0.9569821357727051,grad_norm: 0.8582494070523687, iteration: 235219
loss: 1.0022058486938477,grad_norm: 0.9405701235005586, iteration: 235220
loss: 1.1451507806777954,grad_norm: 0.9999998279436263, iteration: 235221
loss: 0.9869049191474915,grad_norm: 0.9999991018230774, iteration: 235222
loss: 0.979629397392273,grad_norm: 0.8865639292308236, iteration: 235223
loss: 1.038009762763977,grad_norm: 0.9999992781855356, iteration: 235224
loss: 1.0576056241989136,grad_norm: 0.9493087694300544, iteration: 235225
loss: 1.0413360595703125,grad_norm: 0.9999990150876444, iteration: 235226
loss: 0.977924644947052,grad_norm: 0.9094119544935519, iteration: 235227
loss: 1.05763578414917,grad_norm: 0.9069458701360291, iteration: 235228
loss: 0.9545237421989441,grad_norm: 0.9845126481691173, iteration: 235229
loss: 0.9932917952537537,grad_norm: 0.8431686691507541, iteration: 235230
loss: 1.0345990657806396,grad_norm: 0.9999991417442272, iteration: 235231
loss: 1.0200573205947876,grad_norm: 0.7685369209859371, iteration: 235232
loss: 1.0172606706619263,grad_norm: 0.9999990236119469, iteration: 235233
loss: 1.0074939727783203,grad_norm: 0.9999991860036268, iteration: 235234
loss: 0.9971439838409424,grad_norm: 0.8770009589887612, iteration: 235235
loss: 1.1276257038116455,grad_norm: 0.9999993204534948, iteration: 235236
loss: 1.0804619789123535,grad_norm: 0.9999996255841805, iteration: 235237
loss: 1.0108346939086914,grad_norm: 0.9999997514413173, iteration: 235238
loss: 1.1205114126205444,grad_norm: 0.999999370115559, iteration: 235239
loss: 1.0008059740066528,grad_norm: 0.9999996345740321, iteration: 235240
loss: 1.0152993202209473,grad_norm: 0.8557893840861706, iteration: 235241
loss: 1.0001152753829956,grad_norm: 0.874007336576429, iteration: 235242
loss: 1.0212666988372803,grad_norm: 0.9259023141605803, iteration: 235243
loss: 0.967856228351593,grad_norm: 0.8880758343838167, iteration: 235244
loss: 0.9934201240539551,grad_norm: 0.8386397106008366, iteration: 235245
loss: 0.9673300385475159,grad_norm: 0.7827111738085692, iteration: 235246
loss: 1.0557105541229248,grad_norm: 0.999999146841637, iteration: 235247
loss: 1.0112601518630981,grad_norm: 0.7685242218069719, iteration: 235248
loss: 1.0174918174743652,grad_norm: 0.9403218963415388, iteration: 235249
loss: 1.0300238132476807,grad_norm: 0.9999990999284669, iteration: 235250
loss: 1.0043922662734985,grad_norm: 0.887850870841852, iteration: 235251
loss: 0.9868771433830261,grad_norm: 0.9999992793283962, iteration: 235252
loss: 1.0167639255523682,grad_norm: 0.9999991616832175, iteration: 235253
loss: 0.9782190918922424,grad_norm: 0.9290155220450305, iteration: 235254
loss: 0.9673690795898438,grad_norm: 0.8080661856927268, iteration: 235255
loss: 0.994006872177124,grad_norm: 0.9449161392394188, iteration: 235256
loss: 1.0101789236068726,grad_norm: 0.8951790444230479, iteration: 235257
loss: 1.0168721675872803,grad_norm: 0.8100692827262731, iteration: 235258
loss: 0.9908710718154907,grad_norm: 0.9420944074139643, iteration: 235259
loss: 0.9881746172904968,grad_norm: 0.9271153806132268, iteration: 235260
loss: 1.0074074268341064,grad_norm: 0.883169568693094, iteration: 235261
loss: 0.9852438569068909,grad_norm: 0.8543430195903724, iteration: 235262
loss: 0.9750424027442932,grad_norm: 0.8888068303928351, iteration: 235263
loss: 0.9726537466049194,grad_norm: 0.9826863778540288, iteration: 235264
loss: 0.9845684170722961,grad_norm: 0.9223037777964535, iteration: 235265
loss: 1.0079774856567383,grad_norm: 0.8553894655809077, iteration: 235266
loss: 1.0319901704788208,grad_norm: 0.7208052873900073, iteration: 235267
loss: 1.02947199344635,grad_norm: 0.9999991908411262, iteration: 235268
loss: 0.9797622561454773,grad_norm: 0.9999990926781915, iteration: 235269
loss: 0.9910328388214111,grad_norm: 0.9671011206146913, iteration: 235270
loss: 0.995530903339386,grad_norm: 0.8890997239957807, iteration: 235271
loss: 0.9916892051696777,grad_norm: 0.8739167786954167, iteration: 235272
loss: 1.0125303268432617,grad_norm: 0.777709797129123, iteration: 235273
loss: 1.0034558773040771,grad_norm: 0.8926933308341799, iteration: 235274
loss: 1.0390986204147339,grad_norm: 0.9999990797444622, iteration: 235275
loss: 1.0294113159179688,grad_norm: 0.9999990314303909, iteration: 235276
loss: 0.9827274680137634,grad_norm: 0.9317888045917271, iteration: 235277
loss: 1.0355174541473389,grad_norm: 0.9150892452087379, iteration: 235278
loss: 0.9896759390830994,grad_norm: 0.9999988713500573, iteration: 235279
loss: 1.0544239282608032,grad_norm: 0.999999099717779, iteration: 235280
loss: 1.0143963098526,grad_norm: 0.9797982485638718, iteration: 235281
loss: 1.0155929327011108,grad_norm: 0.8585450543279498, iteration: 235282
loss: 1.0273858308792114,grad_norm: 0.9514364976894434, iteration: 235283
loss: 1.0297508239746094,grad_norm: 0.9999993811539689, iteration: 235284
loss: 1.024137258529663,grad_norm: 0.9247342227105813, iteration: 235285
loss: 1.0202107429504395,grad_norm: 0.9999992410327726, iteration: 235286
loss: 1.1243395805358887,grad_norm: 0.9999990870258957, iteration: 235287
loss: 1.0202189683914185,grad_norm: 0.8902265794064034, iteration: 235288
loss: 1.0269325971603394,grad_norm: 0.9905720752769461, iteration: 235289
loss: 1.0173664093017578,grad_norm: 0.8432023349033885, iteration: 235290
loss: 1.0405428409576416,grad_norm: 0.9999994031360022, iteration: 235291
loss: 1.0304316282272339,grad_norm: 0.8896178999572056, iteration: 235292
loss: 0.9949991703033447,grad_norm: 0.9999990994332109, iteration: 235293
loss: 1.002158522605896,grad_norm: 0.9999991480348598, iteration: 235294
loss: 0.98152095079422,grad_norm: 0.9589828470706862, iteration: 235295
loss: 0.9863274097442627,grad_norm: 0.8317212166017232, iteration: 235296
loss: 0.9925112128257751,grad_norm: 0.8948976049163021, iteration: 235297
loss: 1.0073983669281006,grad_norm: 0.9674422306174495, iteration: 235298
loss: 1.0007481575012207,grad_norm: 0.999999952161855, iteration: 235299
loss: 0.9894614219665527,grad_norm: 0.879815246003799, iteration: 235300
loss: 1.0017021894454956,grad_norm: 0.9269711816367133, iteration: 235301
loss: 0.9739192128181458,grad_norm: 0.9841329995462436, iteration: 235302
loss: 0.9937600493431091,grad_norm: 0.8595403934883119, iteration: 235303
loss: 0.964004397392273,grad_norm: 0.9838297040062263, iteration: 235304
loss: 0.9748406410217285,grad_norm: 0.9999990763299926, iteration: 235305
loss: 0.9922992587089539,grad_norm: 0.9276917403278102, iteration: 235306
loss: 1.0205949544906616,grad_norm: 0.9164123339118148, iteration: 235307
loss: 1.0256071090698242,grad_norm: 0.9999994311540047, iteration: 235308
loss: 1.0206409692764282,grad_norm: 0.7978355004963101, iteration: 235309
loss: 1.012313723564148,grad_norm: 0.9935343330305099, iteration: 235310
loss: 1.1027997732162476,grad_norm: 0.9903727593780738, iteration: 235311
loss: 1.068928599357605,grad_norm: 0.9750318042006143, iteration: 235312
loss: 1.094223141670227,grad_norm: 0.9999996289687759, iteration: 235313
loss: 1.3059056997299194,grad_norm: 0.9999991996918752, iteration: 235314
loss: 1.1600100994110107,grad_norm: 0.9999992897537543, iteration: 235315
loss: 1.050754189491272,grad_norm: 0.9999998710661202, iteration: 235316
loss: 1.1036337614059448,grad_norm: 0.9999992500213215, iteration: 235317
loss: 1.1573486328125,grad_norm: 0.9999995293049999, iteration: 235318
loss: 1.0900731086730957,grad_norm: 0.999999246230983, iteration: 235319
loss: 0.9688839912414551,grad_norm: 0.8710858043051805, iteration: 235320
loss: 1.0361549854278564,grad_norm: 0.9999991413517844, iteration: 235321
loss: 1.0012286901474,grad_norm: 0.9884260514543283, iteration: 235322
loss: 1.0014358758926392,grad_norm: 0.7554551240976836, iteration: 235323
loss: 1.0369820594787598,grad_norm: 0.889234069334112, iteration: 235324
loss: 1.0465697050094604,grad_norm: 0.9999994084214859, iteration: 235325
loss: 1.1495360136032104,grad_norm: 0.9999993337823533, iteration: 235326
loss: 1.0637565851211548,grad_norm: 0.9999998748828861, iteration: 235327
loss: 0.9567688703536987,grad_norm: 0.9938589932422714, iteration: 235328
loss: 1.0061664581298828,grad_norm: 0.9999990296621142, iteration: 235329
loss: 1.0014370679855347,grad_norm: 0.8683196220660819, iteration: 235330
loss: 1.0661879777908325,grad_norm: 0.9999995227234654, iteration: 235331
loss: 1.0066698789596558,grad_norm: 0.9999994876484155, iteration: 235332
loss: 1.0321100950241089,grad_norm: 0.9143565832617777, iteration: 235333
loss: 1.012079119682312,grad_norm: 0.7752003280850162, iteration: 235334
loss: 1.033959984779358,grad_norm: 0.9297451559663691, iteration: 235335
loss: 1.1463769674301147,grad_norm: 0.9999990743169946, iteration: 235336
loss: 1.1481438875198364,grad_norm: 0.9999999283869225, iteration: 235337
loss: 1.0433303117752075,grad_norm: 0.9999997978235422, iteration: 235338
loss: 0.966483473777771,grad_norm: 0.9173502876094743, iteration: 235339
loss: 0.9976522922515869,grad_norm: 0.8920507188684709, iteration: 235340
loss: 1.0252735614776611,grad_norm: 0.9999997402920073, iteration: 235341
loss: 0.958717942237854,grad_norm: 0.8701365703869932, iteration: 235342
loss: 1.0187885761260986,grad_norm: 0.9999992318403914, iteration: 235343
loss: 1.0892728567123413,grad_norm: 0.9999997283759214, iteration: 235344
loss: 1.024877905845642,grad_norm: 0.9999998604606591, iteration: 235345
loss: 1.0802351236343384,grad_norm: 0.9999991685748927, iteration: 235346
loss: 1.0390788316726685,grad_norm: 0.999999624775691, iteration: 235347
loss: 0.986643373966217,grad_norm: 0.7381177883276514, iteration: 235348
loss: 0.9621860980987549,grad_norm: 0.9163437645249503, iteration: 235349
loss: 1.012790560722351,grad_norm: 0.8991027929358278, iteration: 235350
loss: 1.0528582334518433,grad_norm: 0.9379384907447976, iteration: 235351
loss: 0.9981023669242859,grad_norm: 0.9378577905790348, iteration: 235352
loss: 1.0219290256500244,grad_norm: 0.8621136382027912, iteration: 235353
loss: 1.0056662559509277,grad_norm: 0.9751588427053223, iteration: 235354
loss: 1.0251091718673706,grad_norm: 0.9999988988796112, iteration: 235355
loss: 0.9998571872711182,grad_norm: 0.9490763197280134, iteration: 235356
loss: 1.063291311264038,grad_norm: 0.9621207759599396, iteration: 235357
loss: 0.9708963632583618,grad_norm: 0.9679761398455468, iteration: 235358
loss: 1.0205150842666626,grad_norm: 0.8001071190579198, iteration: 235359
loss: 1.0220927000045776,grad_norm: 0.9389931688719978, iteration: 235360
loss: 1.0079905986785889,grad_norm: 0.9999990219647756, iteration: 235361
loss: 1.0297096967697144,grad_norm: 0.9999993575941163, iteration: 235362
loss: 1.0022517442703247,grad_norm: 0.8621908315252953, iteration: 235363
loss: 1.0233327150344849,grad_norm: 0.8716949990623292, iteration: 235364
loss: 1.0128121376037598,grad_norm: 0.8573922236332103, iteration: 235365
loss: 1.0573877096176147,grad_norm: 0.9999990912206266, iteration: 235366
loss: 1.045015811920166,grad_norm: 0.7995360110456525, iteration: 235367
loss: 1.003105640411377,grad_norm: 0.9646696367172454, iteration: 235368
loss: 1.0169321298599243,grad_norm: 0.695867995056261, iteration: 235369
loss: 0.9966139197349548,grad_norm: 0.9999990453373709, iteration: 235370
loss: 1.0563820600509644,grad_norm: 0.99999985472341, iteration: 235371
loss: 0.976285994052887,grad_norm: 0.9999991271530853, iteration: 235372
loss: 1.0066148042678833,grad_norm: 0.9999989310657672, iteration: 235373
loss: 1.012436866760254,grad_norm: 0.8345562201844243, iteration: 235374
loss: 1.0061501264572144,grad_norm: 0.8937334094452422, iteration: 235375
loss: 1.0170212984085083,grad_norm: 0.9782476992210555, iteration: 235376
loss: 1.0121325254440308,grad_norm: 0.9346223029388743, iteration: 235377
loss: 1.0503265857696533,grad_norm: 0.9999998433438406, iteration: 235378
loss: 1.0130057334899902,grad_norm: 0.9999991930678046, iteration: 235379
loss: 1.0078774690628052,grad_norm: 0.9999991508668719, iteration: 235380
loss: 0.9610962867736816,grad_norm: 0.7914584203595224, iteration: 235381
loss: 1.0085880756378174,grad_norm: 0.9789477330039253, iteration: 235382
loss: 0.9821656346321106,grad_norm: 0.9999990520602529, iteration: 235383
loss: 1.0185331106185913,grad_norm: 0.7623615079914409, iteration: 235384
loss: 0.9815821051597595,grad_norm: 0.999999108174599, iteration: 235385
loss: 0.9873411655426025,grad_norm: 0.9999991269926383, iteration: 235386
loss: 0.9988276958465576,grad_norm: 0.7891345480845636, iteration: 235387
loss: 0.9776706695556641,grad_norm: 0.8261454716973816, iteration: 235388
loss: 0.9959327578544617,grad_norm: 0.9725964628684083, iteration: 235389
loss: 0.9852571487426758,grad_norm: 0.9999993422629452, iteration: 235390
loss: 0.984756588935852,grad_norm: 0.8994601611541347, iteration: 235391
loss: 1.0221631526947021,grad_norm: 0.8474800662112987, iteration: 235392
loss: 1.0015417337417603,grad_norm: 0.9999995626351603, iteration: 235393
loss: 1.0204941034317017,grad_norm: 0.9735721113633784, iteration: 235394
loss: 1.0188603401184082,grad_norm: 0.999999267763388, iteration: 235395
loss: 1.0178189277648926,grad_norm: 0.9999994458154534, iteration: 235396
loss: 1.0094828605651855,grad_norm: 0.9999991899658726, iteration: 235397
loss: 1.0088258981704712,grad_norm: 0.9774137273807071, iteration: 235398
loss: 0.9552546143531799,grad_norm: 0.966872967499221, iteration: 235399
loss: 1.0340497493743896,grad_norm: 0.9999991919381261, iteration: 235400
loss: 0.9951932430267334,grad_norm: 0.994503613447663, iteration: 235401
loss: 1.0138899087905884,grad_norm: 0.9999991362131516, iteration: 235402
loss: 1.0035722255706787,grad_norm: 0.9556727067521309, iteration: 235403
loss: 0.9984243512153625,grad_norm: 0.9999992793704265, iteration: 235404
loss: 1.011134147644043,grad_norm: 0.7966164676692227, iteration: 235405
loss: 1.0354083776474,grad_norm: 0.9999990290967661, iteration: 235406
loss: 1.020121455192566,grad_norm: 0.8562357237025604, iteration: 235407
loss: 1.045235514640808,grad_norm: 0.9999996077197881, iteration: 235408
loss: 1.028260350227356,grad_norm: 0.9999990563158184, iteration: 235409
loss: 1.0159107446670532,grad_norm: 0.9835499203863318, iteration: 235410
loss: 1.0242807865142822,grad_norm: 0.9999989957310802, iteration: 235411
loss: 1.0341193675994873,grad_norm: 0.99999953111585, iteration: 235412
loss: 0.9601948261260986,grad_norm: 0.8341563841867241, iteration: 235413
loss: 0.9971784949302673,grad_norm: 0.9999997980655779, iteration: 235414
loss: 1.0303590297698975,grad_norm: 0.9999991685446472, iteration: 235415
loss: 1.0464637279510498,grad_norm: 0.9636448700899657, iteration: 235416
loss: 1.0608084201812744,grad_norm: 0.9999992425576101, iteration: 235417
loss: 0.9941695332527161,grad_norm: 0.9570925760229367, iteration: 235418
loss: 1.023292064666748,grad_norm: 0.9246346818250151, iteration: 235419
loss: 1.0124117136001587,grad_norm: 0.899552777066796, iteration: 235420
loss: 0.9913046360015869,grad_norm: 0.8889333250611369, iteration: 235421
loss: 0.9634994864463806,grad_norm: 0.8214456262583526, iteration: 235422
loss: 1.0014801025390625,grad_norm: 0.8653776894626898, iteration: 235423
loss: 1.018583059310913,grad_norm: 0.9999991549804816, iteration: 235424
loss: 0.9843264222145081,grad_norm: 0.9976683928160714, iteration: 235425
loss: 1.0083428621292114,grad_norm: 0.9999991289536831, iteration: 235426
loss: 0.9829475283622742,grad_norm: 0.9143324352651726, iteration: 235427
loss: 0.9913431406021118,grad_norm: 0.9573760118773624, iteration: 235428
loss: 1.0299270153045654,grad_norm: 0.9328844721722713, iteration: 235429
loss: 0.9875752329826355,grad_norm: 0.9309123253222359, iteration: 235430
loss: 1.0653151273727417,grad_norm: 0.9999999276726884, iteration: 235431
loss: 1.0162326097488403,grad_norm: 0.9999991693467073, iteration: 235432
loss: 0.9970923066139221,grad_norm: 0.9999992266718232, iteration: 235433
loss: 1.0112781524658203,grad_norm: 0.734072750082853, iteration: 235434
loss: 0.9837629199028015,grad_norm: 0.8568154153639043, iteration: 235435
loss: 0.9937303066253662,grad_norm: 0.9999991618647445, iteration: 235436
loss: 1.0130372047424316,grad_norm: 0.8298127536696602, iteration: 235437
loss: 1.0425748825073242,grad_norm: 0.9389709446507053, iteration: 235438
loss: 1.0175065994262695,grad_norm: 0.9651339001658811, iteration: 235439
loss: 0.9801397323608398,grad_norm: 0.9999991729597942, iteration: 235440
loss: 1.0154112577438354,grad_norm: 0.9999991248274647, iteration: 235441
loss: 0.9855444431304932,grad_norm: 0.9671337497932805, iteration: 235442
loss: 0.9848334789276123,grad_norm: 0.9250203180154023, iteration: 235443
loss: 0.9890128374099731,grad_norm: 0.9859033269684068, iteration: 235444
loss: 1.0235884189605713,grad_norm: 0.9999994683902466, iteration: 235445
loss: 1.0614047050476074,grad_norm: 0.9999993760891819, iteration: 235446
loss: 1.012580156326294,grad_norm: 0.9999992986157724, iteration: 235447
loss: 0.9753849506378174,grad_norm: 0.999999359116293, iteration: 235448
loss: 1.0359888076782227,grad_norm: 0.9999991874077095, iteration: 235449
loss: 0.9722346663475037,grad_norm: 0.9999991092451151, iteration: 235450
loss: 1.015291452407837,grad_norm: 0.8367563323323768, iteration: 235451
loss: 0.9888049960136414,grad_norm: 0.7929202610392775, iteration: 235452
loss: 1.0077869892120361,grad_norm: 0.91093306696454, iteration: 235453
loss: 1.0536311864852905,grad_norm: 0.9437859089742946, iteration: 235454
loss: 1.0474411249160767,grad_norm: 0.999999704738002, iteration: 235455
loss: 1.0236245393753052,grad_norm: 0.9999990972456985, iteration: 235456
loss: 1.0783090591430664,grad_norm: 0.9999992572429489, iteration: 235457
loss: 1.0141218900680542,grad_norm: 0.9179259736904838, iteration: 235458
loss: 1.070227861404419,grad_norm: 0.9999991103314999, iteration: 235459
loss: 1.1077293157577515,grad_norm: 0.9540099430085803, iteration: 235460
loss: 0.9969630241394043,grad_norm: 0.7896542661657973, iteration: 235461
loss: 0.9779230952262878,grad_norm: 0.9999992062523383, iteration: 235462
loss: 0.9910271167755127,grad_norm: 0.8181162576206275, iteration: 235463
loss: 0.975919246673584,grad_norm: 0.9999991559355763, iteration: 235464
loss: 0.9835485816001892,grad_norm: 0.9395742539005795, iteration: 235465
loss: 1.0081273317337036,grad_norm: 0.9999991647145536, iteration: 235466
loss: 1.0209693908691406,grad_norm: 0.9999992108893568, iteration: 235467
loss: 1.0153913497924805,grad_norm: 0.999999361235939, iteration: 235468
loss: 1.049936294555664,grad_norm: 0.9999993185539747, iteration: 235469
loss: 0.9770339727401733,grad_norm: 0.8787674438928791, iteration: 235470
loss: 1.0046905279159546,grad_norm: 0.9999991081119926, iteration: 235471
loss: 0.996304452419281,grad_norm: 0.9321095882923351, iteration: 235472
loss: 0.9961353540420532,grad_norm: 0.9345390579120237, iteration: 235473
loss: 0.9836798906326294,grad_norm: 0.9750722641591768, iteration: 235474
loss: 1.0430266857147217,grad_norm: 0.9984548156556314, iteration: 235475
loss: 1.0274161100387573,grad_norm: 0.9492203117419259, iteration: 235476
loss: 0.9813525676727295,grad_norm: 0.999999079666296, iteration: 235477
loss: 1.0385425090789795,grad_norm: 0.8431678882642158, iteration: 235478
loss: 1.0460022687911987,grad_norm: 0.9999991776525824, iteration: 235479
loss: 1.0470436811447144,grad_norm: 0.9365135121570918, iteration: 235480
loss: 0.9983615279197693,grad_norm: 0.9009267114604558, iteration: 235481
loss: 0.9972854256629944,grad_norm: 0.9800079752613273, iteration: 235482
loss: 1.029475212097168,grad_norm: 0.9746504450850404, iteration: 235483
loss: 0.969388484954834,grad_norm: 0.8403651360631746, iteration: 235484
loss: 0.988394558429718,grad_norm: 0.9999989843601673, iteration: 235485
loss: 1.002843976020813,grad_norm: 0.8236589670227826, iteration: 235486
loss: 1.0280177593231201,grad_norm: 0.8299572786696084, iteration: 235487
loss: 1.059495449066162,grad_norm: 0.9662168079814203, iteration: 235488
loss: 1.0168509483337402,grad_norm: 0.9633508790757569, iteration: 235489
loss: 1.0109573602676392,grad_norm: 0.8220462496214106, iteration: 235490
loss: 0.9986822009086609,grad_norm: 0.9666802230075269, iteration: 235491
loss: 1.0192581415176392,grad_norm: 0.9211897650693526, iteration: 235492
loss: 1.0220911502838135,grad_norm: 0.7891905104193775, iteration: 235493
loss: 0.9910063743591309,grad_norm: 0.8908218903534691, iteration: 235494
loss: 0.9707035422325134,grad_norm: 0.8745176340216105, iteration: 235495
loss: 1.0761594772338867,grad_norm: 0.9999994285960918, iteration: 235496
loss: 1.117260217666626,grad_norm: 0.9999995314336352, iteration: 235497
loss: 0.9989644885063171,grad_norm: 0.9448924417439115, iteration: 235498
loss: 1.0188393592834473,grad_norm: 0.8015351165823437, iteration: 235499
loss: 1.0105088949203491,grad_norm: 0.9999995394826174, iteration: 235500
loss: 1.037803292274475,grad_norm: 0.9999995565057642, iteration: 235501
loss: 0.9809603691101074,grad_norm: 0.8954781139384601, iteration: 235502
loss: 1.0385454893112183,grad_norm: 0.9999991767812808, iteration: 235503
loss: 1.0046050548553467,grad_norm: 0.8552950003190319, iteration: 235504
loss: 1.0136640071868896,grad_norm: 0.9843553622010666, iteration: 235505
loss: 1.0155246257781982,grad_norm: 0.9443543379559595, iteration: 235506
loss: 1.0447252988815308,grad_norm: 0.9976674609780665, iteration: 235507
loss: 1.0153872966766357,grad_norm: 0.9999990522835627, iteration: 235508
loss: 0.9836555123329163,grad_norm: 0.9723855378054592, iteration: 235509
loss: 0.993438720703125,grad_norm: 0.9380725119041504, iteration: 235510
loss: 1.1021937131881714,grad_norm: 0.9999998494772311, iteration: 235511
loss: 1.0151453018188477,grad_norm: 0.9999995038201716, iteration: 235512
loss: 1.0075033903121948,grad_norm: 0.9999990477676327, iteration: 235513
loss: 0.9851011037826538,grad_norm: 0.8051798271326963, iteration: 235514
loss: 1.018869400024414,grad_norm: 0.9130142344078178, iteration: 235515
loss: 1.0263038873672485,grad_norm: 0.9999993021218236, iteration: 235516
loss: 0.9877473711967468,grad_norm: 0.9999993273433092, iteration: 235517
loss: 1.0176092386245728,grad_norm: 0.8978584925656815, iteration: 235518
loss: 1.0034412145614624,grad_norm: 0.9380979938089135, iteration: 235519
loss: 1.0327810049057007,grad_norm: 0.7784740594068453, iteration: 235520
loss: 1.087269902229309,grad_norm: 0.9999992107246618, iteration: 235521
loss: 1.0067042112350464,grad_norm: 0.9183925355997279, iteration: 235522
loss: 1.01261305809021,grad_norm: 0.9999990662414195, iteration: 235523
loss: 0.9971986413002014,grad_norm: 0.9999990941262986, iteration: 235524
loss: 0.9854716062545776,grad_norm: 0.9999993197081662, iteration: 235525
loss: 0.9977340698242188,grad_norm: 0.9107879486808292, iteration: 235526
loss: 1.05239999294281,grad_norm: 0.963858047435453, iteration: 235527
loss: 1.0586107969284058,grad_norm: 0.9999997638601312, iteration: 235528
loss: 1.0139200687408447,grad_norm: 0.8860477390800419, iteration: 235529
loss: 1.0145183801651,grad_norm: 0.9999994928766996, iteration: 235530
loss: 0.9968093037605286,grad_norm: 0.9239568426367321, iteration: 235531
loss: 1.034258246421814,grad_norm: 0.9999992312970213, iteration: 235532
loss: 0.991003155708313,grad_norm: 0.858960911848027, iteration: 235533
loss: 1.0113134384155273,grad_norm: 0.8954826153464167, iteration: 235534
loss: 1.1509116888046265,grad_norm: 0.9999999208446437, iteration: 235535
loss: 1.0282917022705078,grad_norm: 0.8129182538709865, iteration: 235536
loss: 1.0811177492141724,grad_norm: 0.9999995571769115, iteration: 235537
loss: 0.991724967956543,grad_norm: 0.9937327550737167, iteration: 235538
loss: 0.9833318591117859,grad_norm: 0.9999990024050535, iteration: 235539
loss: 1.0626877546310425,grad_norm: 0.9999998033859039, iteration: 235540
loss: 1.0246269702911377,grad_norm: 0.9999999071955193, iteration: 235541
loss: 0.9862285256385803,grad_norm: 0.9999993704247726, iteration: 235542
loss: 1.320794939994812,grad_norm: 0.999999933135574, iteration: 235543
loss: 1.0073010921478271,grad_norm: 0.9999994336592882, iteration: 235544
loss: 1.0293153524398804,grad_norm: 0.9999991662172042, iteration: 235545
loss: 1.1620854139328003,grad_norm: 0.999999135402037, iteration: 235546
loss: 1.083793044090271,grad_norm: 0.9999990888187197, iteration: 235547
loss: 0.9946684837341309,grad_norm: 0.906648225129956, iteration: 235548
loss: 1.1472806930541992,grad_norm: 0.9999991483348606, iteration: 235549
loss: 1.0162619352340698,grad_norm: 0.9999996837568392, iteration: 235550
loss: 1.002214789390564,grad_norm: 0.9999990992389863, iteration: 235551
loss: 1.0890506505966187,grad_norm: 0.9999996918154765, iteration: 235552
loss: 1.222898006439209,grad_norm: 0.999999323688441, iteration: 235553
loss: 1.0806922912597656,grad_norm: 0.9999994730369232, iteration: 235554
loss: 1.037961721420288,grad_norm: 0.9999993633601856, iteration: 235555
loss: 1.1083049774169922,grad_norm: 0.9999999978143472, iteration: 235556
loss: 1.0328118801116943,grad_norm: 0.9999992503804478, iteration: 235557
loss: 1.0220890045166016,grad_norm: 0.9999995442459206, iteration: 235558
loss: 1.1063205003738403,grad_norm: 1.00000006303208, iteration: 235559
loss: 1.2314690351486206,grad_norm: 0.9999996847319002, iteration: 235560
loss: 1.0309340953826904,grad_norm: 0.9999993123709517, iteration: 235561
loss: 1.002888798713684,grad_norm: 0.9999991658858763, iteration: 235562
loss: 1.051908254623413,grad_norm: 0.9560769316694016, iteration: 235563
loss: 1.0252888202667236,grad_norm: 0.9999991054900094, iteration: 235564
loss: 1.0368480682373047,grad_norm: 0.9999998523457687, iteration: 235565
loss: 1.0708292722702026,grad_norm: 0.9545327070081042, iteration: 235566
loss: 1.043274164199829,grad_norm: 0.9999999775523042, iteration: 235567
loss: 1.0262764692306519,grad_norm: 0.9999994357923744, iteration: 235568
loss: 1.0121732950210571,grad_norm: 0.9999995300668271, iteration: 235569
loss: 0.9842795729637146,grad_norm: 0.9999997073280052, iteration: 235570
loss: 0.9565587043762207,grad_norm: 0.9999991418073506, iteration: 235571
loss: 1.214468002319336,grad_norm: 0.9999993701592664, iteration: 235572
loss: 1.1844213008880615,grad_norm: 0.9999996982383624, iteration: 235573
loss: 1.1279008388519287,grad_norm: 0.9999997802811952, iteration: 235574
loss: 1.0056583881378174,grad_norm: 0.9999996288064504, iteration: 235575
loss: 1.0701696872711182,grad_norm: 0.8775940147240341, iteration: 235576
loss: 1.0991474390029907,grad_norm: 0.8504321101117682, iteration: 235577
loss: 1.2611722946166992,grad_norm: 0.9999998393322578, iteration: 235578
loss: 1.1069653034210205,grad_norm: 0.9999990544614853, iteration: 235579
loss: 1.0760835409164429,grad_norm: 0.9999999263385425, iteration: 235580
loss: 1.2016515731811523,grad_norm: 0.9999994736427301, iteration: 235581
loss: 1.0208406448364258,grad_norm: 0.98261431396326, iteration: 235582
loss: 1.038744330406189,grad_norm: 1.000000065225293, iteration: 235583
loss: 1.0016942024230957,grad_norm: 0.945430220007351, iteration: 235584
loss: 1.078315258026123,grad_norm: 0.9999999256197379, iteration: 235585
loss: 1.003921389579773,grad_norm: 0.8588399192804953, iteration: 235586
loss: 1.1402782201766968,grad_norm: 0.999999924224542, iteration: 235587
loss: 1.1104410886764526,grad_norm: 0.9999998664521661, iteration: 235588
loss: 0.9961308836936951,grad_norm: 0.975635017258214, iteration: 235589
loss: 1.063636064529419,grad_norm: 0.9999996820709446, iteration: 235590
loss: 1.1438417434692383,grad_norm: 0.9999996145409701, iteration: 235591
loss: 1.0494688749313354,grad_norm: 0.9999995432477837, iteration: 235592
loss: 0.9857812523841858,grad_norm: 0.9999991586629212, iteration: 235593
loss: 1.3109896183013916,grad_norm: 0.9999997103883917, iteration: 235594
loss: 1.1023287773132324,grad_norm: 1.0000000020974624, iteration: 235595
loss: 1.0382195711135864,grad_norm: 0.9346124785073632, iteration: 235596
loss: 1.063342809677124,grad_norm: 0.9999997571154139, iteration: 235597
loss: 1.025248646736145,grad_norm: 0.9999993639592812, iteration: 235598
loss: 1.0297311544418335,grad_norm: 0.9483836790173616, iteration: 235599
loss: 1.0226545333862305,grad_norm: 0.9999990036635499, iteration: 235600
loss: 1.0152114629745483,grad_norm: 0.9999996146729857, iteration: 235601
loss: 0.9701939821243286,grad_norm: 0.9999994806495582, iteration: 235602
loss: 1.0144281387329102,grad_norm: 0.999999692296434, iteration: 235603
loss: 1.0969289541244507,grad_norm: 0.999999744035602, iteration: 235604
loss: 1.1755282878875732,grad_norm: 0.9999999232830884, iteration: 235605
loss: 1.0376043319702148,grad_norm: 0.9999997850323415, iteration: 235606
loss: 1.054815411567688,grad_norm: 0.8776990252742413, iteration: 235607
loss: 1.141283392906189,grad_norm: 0.9663061193610133, iteration: 235608
loss: 0.9859825372695923,grad_norm: 0.9999992670030418, iteration: 235609
loss: 1.086146354675293,grad_norm: 0.9999990217306709, iteration: 235610
loss: 1.0024501085281372,grad_norm: 0.9659519895315045, iteration: 235611
loss: 1.0411921739578247,grad_norm: 0.9999992618676625, iteration: 235612
loss: 1.0204365253448486,grad_norm: 0.9999991458173567, iteration: 235613
loss: 1.0492953062057495,grad_norm: 0.9999991175387485, iteration: 235614
loss: 1.09320068359375,grad_norm: 0.9999992488249906, iteration: 235615
loss: 1.0214686393737793,grad_norm: 0.9999991061704722, iteration: 235616
loss: 1.0149509906768799,grad_norm: 0.999999885319684, iteration: 235617
loss: 0.965202808380127,grad_norm: 0.9999991008543978, iteration: 235618
loss: 1.0073192119598389,grad_norm: 0.8409714897334641, iteration: 235619
loss: 1.0235636234283447,grad_norm: 0.999999024250478, iteration: 235620
loss: 0.9885674715042114,grad_norm: 0.9999990297969772, iteration: 235621
loss: 1.0376538038253784,grad_norm: 0.9999991612475682, iteration: 235622
loss: 1.0492500066757202,grad_norm: 0.9999998591424331, iteration: 235623
loss: 0.9976370930671692,grad_norm: 0.9999991680151181, iteration: 235624
loss: 0.9955163598060608,grad_norm: 0.8990997114849545, iteration: 235625
loss: 0.9929419159889221,grad_norm: 0.9270084689620172, iteration: 235626
loss: 0.9929519891738892,grad_norm: 0.9999993563975511, iteration: 235627
loss: 1.034226417541504,grad_norm: 0.9999752466578722, iteration: 235628
loss: 1.115889549255371,grad_norm: 0.9999993741260572, iteration: 235629
loss: 1.020479679107666,grad_norm: 0.9999991688911115, iteration: 235630
loss: 0.9829619526863098,grad_norm: 0.897501364742087, iteration: 235631
loss: 1.143271803855896,grad_norm: 0.99999963167619, iteration: 235632
loss: 1.083234429359436,grad_norm: 0.999999347537494, iteration: 235633
loss: 1.0019056797027588,grad_norm: 0.999999282087312, iteration: 235634
loss: 1.1124835014343262,grad_norm: 0.9010921535647606, iteration: 235635
loss: 1.020750880241394,grad_norm: 0.8298993185866854, iteration: 235636
loss: 0.9812069535255432,grad_norm: 0.9494129349930507, iteration: 235637
loss: 0.9791252017021179,grad_norm: 0.9999991096976567, iteration: 235638
loss: 1.125462293624878,grad_norm: 0.9999993433235947, iteration: 235639
loss: 1.0623528957366943,grad_norm: 0.9999992093678536, iteration: 235640
loss: 1.000896692276001,grad_norm: 0.9999991721226584, iteration: 235641
loss: 1.0728579759597778,grad_norm: 0.999999769220556, iteration: 235642
loss: 0.9704990386962891,grad_norm: 0.8643571730100477, iteration: 235643
loss: 1.1332409381866455,grad_norm: 0.9999991619715061, iteration: 235644
loss: 1.0359752178192139,grad_norm: 0.999999184796486, iteration: 235645
loss: 1.12559974193573,grad_norm: 0.9999996429691937, iteration: 235646
loss: 0.9856722950935364,grad_norm: 0.7899645842261239, iteration: 235647
loss: 0.9867465496063232,grad_norm: 0.8264307855312812, iteration: 235648
loss: 1.0256636142730713,grad_norm: 0.999999100824225, iteration: 235649
loss: 1.0440579652786255,grad_norm: 0.9054637988198242, iteration: 235650
loss: 1.0145615339279175,grad_norm: 0.8252679226149053, iteration: 235651
loss: 0.9800106883049011,grad_norm: 0.8604723318333396, iteration: 235652
loss: 1.0219969749450684,grad_norm: 0.9999991461603014, iteration: 235653
loss: 1.0662493705749512,grad_norm: 0.999999690010264, iteration: 235654
loss: 1.0048905611038208,grad_norm: 0.7918762468511051, iteration: 235655
loss: 1.019783854484558,grad_norm: 0.9999990421848999, iteration: 235656
loss: 0.9818811416625977,grad_norm: 0.8741501551634117, iteration: 235657
loss: 1.0684901475906372,grad_norm: 0.9999989491796529, iteration: 235658
loss: 1.0016971826553345,grad_norm: 0.8261371501719167, iteration: 235659
loss: 0.9501024484634399,grad_norm: 0.8154293008565666, iteration: 235660
loss: 1.0046093463897705,grad_norm: 0.999999091615708, iteration: 235661
loss: 1.0997754335403442,grad_norm: 0.9999991290796256, iteration: 235662
loss: 0.9922329783439636,grad_norm: 0.910919445457095, iteration: 235663
loss: 1.0272530317306519,grad_norm: 0.9661387016200634, iteration: 235664
loss: 0.9888942837715149,grad_norm: 0.845962700747149, iteration: 235665
loss: 0.9869667291641235,grad_norm: 0.9999991854721331, iteration: 235666
loss: 0.9827662110328674,grad_norm: 0.9999991030824136, iteration: 235667
loss: 0.9961485266685486,grad_norm: 0.9999996215693778, iteration: 235668
loss: 1.0145522356033325,grad_norm: 0.9682139055704037, iteration: 235669
loss: 1.0457109212875366,grad_norm: 0.9999997861573139, iteration: 235670
loss: 0.9836542010307312,grad_norm: 0.9999991288350766, iteration: 235671
loss: 1.091238021850586,grad_norm: 0.9999992581584809, iteration: 235672
loss: 1.0418198108673096,grad_norm: 0.9999991575632586, iteration: 235673
loss: 1.0463327169418335,grad_norm: 0.9999990834307636, iteration: 235674
loss: 0.9830541014671326,grad_norm: 0.9999992408390848, iteration: 235675
loss: 1.01423978805542,grad_norm: 0.956312046304485, iteration: 235676
loss: 1.051222562789917,grad_norm: 0.9999992775157986, iteration: 235677
loss: 0.9818176627159119,grad_norm: 0.8721222324644771, iteration: 235678
loss: 1.053144931793213,grad_norm: 0.9999993090644343, iteration: 235679
loss: 1.0056296586990356,grad_norm: 0.8132403340333263, iteration: 235680
loss: 0.990203857421875,grad_norm: 0.933350543506909, iteration: 235681
loss: 0.9704891443252563,grad_norm: 0.856143590307955, iteration: 235682
loss: 0.9891102910041809,grad_norm: 0.8952328373816839, iteration: 235683
loss: 1.0090034008026123,grad_norm: 0.9726997595269441, iteration: 235684
loss: 1.0027763843536377,grad_norm: 0.9539615122002754, iteration: 235685
loss: 0.9888395667076111,grad_norm: 0.9999991070650799, iteration: 235686
loss: 1.0955064296722412,grad_norm: 0.9963327750899469, iteration: 235687
loss: 0.96103435754776,grad_norm: 0.8831149074636346, iteration: 235688
loss: 1.0261515378952026,grad_norm: 0.9705244072729192, iteration: 235689
loss: 1.1081714630126953,grad_norm: 0.9999993693785999, iteration: 235690
loss: 1.0108474493026733,grad_norm: 0.999999091388337, iteration: 235691
loss: 1.1563469171524048,grad_norm: 0.9999993284485623, iteration: 235692
loss: 1.0303484201431274,grad_norm: 0.9999991778940364, iteration: 235693
loss: 1.0540695190429688,grad_norm: 0.9999993391739616, iteration: 235694
loss: 0.9939993619918823,grad_norm: 0.9999989986183173, iteration: 235695
loss: 0.9790721535682678,grad_norm: 0.9999998939969472, iteration: 235696
loss: 0.9904752969741821,grad_norm: 0.7005677255973609, iteration: 235697
loss: 1.0204678773880005,grad_norm: 0.8044042459459789, iteration: 235698
loss: 0.9925826191902161,grad_norm: 0.822129190093505, iteration: 235699
loss: 1.0387219190597534,grad_norm: 0.926819805954503, iteration: 235700
loss: 1.0228372812271118,grad_norm: 0.9999989860585785, iteration: 235701
loss: 0.9982522130012512,grad_norm: 0.9889048455692858, iteration: 235702
loss: 1.0144282579421997,grad_norm: 0.772770819036567, iteration: 235703
loss: 0.994469165802002,grad_norm: 0.919460634833463, iteration: 235704
loss: 1.0215965509414673,grad_norm: 0.8492905902582258, iteration: 235705
loss: 1.0346530675888062,grad_norm: 0.9999992066978475, iteration: 235706
loss: 0.9953241348266602,grad_norm: 0.9999990475305909, iteration: 235707
loss: 1.0777935981750488,grad_norm: 0.8171617307645443, iteration: 235708
loss: 0.9921193718910217,grad_norm: 0.9150206831626458, iteration: 235709
loss: 1.0275272130966187,grad_norm: 0.9587531192091039, iteration: 235710
loss: 1.0182627439498901,grad_norm: 0.9999996751643068, iteration: 235711
loss: 0.9917351007461548,grad_norm: 0.9599811368248261, iteration: 235712
loss: 1.0748430490493774,grad_norm: 0.9999995270499826, iteration: 235713
loss: 1.0183446407318115,grad_norm: 0.9147853950130259, iteration: 235714
loss: 0.9786475300788879,grad_norm: 0.7983025873784504, iteration: 235715
loss: 1.007351040840149,grad_norm: 0.8202134756264535, iteration: 235716
loss: 1.0668220520019531,grad_norm: 0.9999993013050774, iteration: 235717
loss: 0.9831857085227966,grad_norm: 0.9577187955059514, iteration: 235718
loss: 1.0928155183792114,grad_norm: 0.9999993841257682, iteration: 235719
loss: 1.0423067808151245,grad_norm: 0.9999990799341215, iteration: 235720
loss: 0.9942744970321655,grad_norm: 0.9999996107062981, iteration: 235721
loss: 1.0296549797058105,grad_norm: 0.9999993094498159, iteration: 235722
loss: 1.1167069673538208,grad_norm: 0.9999994712283585, iteration: 235723
loss: 1.0691083669662476,grad_norm: 0.9618424639453407, iteration: 235724
loss: 1.030532717704773,grad_norm: 0.9999993847087517, iteration: 235725
loss: 1.0487427711486816,grad_norm: 0.9857540366975677, iteration: 235726
loss: 1.1463576555252075,grad_norm: 0.9999994676831173, iteration: 235727
loss: 1.0139265060424805,grad_norm: 0.8192049191176113, iteration: 235728
loss: 1.0442582368850708,grad_norm: 0.9999997236549912, iteration: 235729
loss: 1.064378261566162,grad_norm: 0.9999991881747268, iteration: 235730
loss: 1.0359658002853394,grad_norm: 0.8404048575671559, iteration: 235731
loss: 0.9782655239105225,grad_norm: 0.9999990435167317, iteration: 235732
loss: 1.0344116687774658,grad_norm: 0.9999991410457048, iteration: 235733
loss: 0.9941560626029968,grad_norm: 0.9280157309242488, iteration: 235734
loss: 1.1555968523025513,grad_norm: 0.9999995883914544, iteration: 235735
loss: 1.0302958488464355,grad_norm: 0.8100464808876774, iteration: 235736
loss: 0.9987779855728149,grad_norm: 0.9999996331984021, iteration: 235737
loss: 1.0335330963134766,grad_norm: 1.000000041544042, iteration: 235738
loss: 1.0109758377075195,grad_norm: 0.9495004914266796, iteration: 235739
loss: 0.9992812871932983,grad_norm: 0.9298237134769167, iteration: 235740
loss: 1.033312201499939,grad_norm: 0.9821793903988373, iteration: 235741
loss: 1.1079262495040894,grad_norm: 0.9999995709379742, iteration: 235742
loss: 1.04323148727417,grad_norm: 0.9999994431233227, iteration: 235743
loss: 1.037676453590393,grad_norm: 0.9099593514097195, iteration: 235744
loss: 0.9823976159095764,grad_norm: 0.9999990901774964, iteration: 235745
loss: 0.9999179840087891,grad_norm: 0.9445593781386553, iteration: 235746
loss: 1.0169273614883423,grad_norm: 0.8403207142452453, iteration: 235747
loss: 1.0911294221878052,grad_norm: 0.999999575862788, iteration: 235748
loss: 1.1023505926132202,grad_norm: 0.8681476385410997, iteration: 235749
loss: 1.0413060188293457,grad_norm: 0.9999999212271002, iteration: 235750
loss: 1.0563806295394897,grad_norm: 0.9999997472612285, iteration: 235751
loss: 1.2207837104797363,grad_norm: 0.9999996387533233, iteration: 235752
loss: 1.0397088527679443,grad_norm: 0.9681475691209853, iteration: 235753
loss: 1.189690351486206,grad_norm: 0.999999268043285, iteration: 235754
loss: 1.0780150890350342,grad_norm: 0.9999997584235892, iteration: 235755
loss: 0.9849053621292114,grad_norm: 0.9541485465589724, iteration: 235756
loss: 1.0120753049850464,grad_norm: 0.8401671494768154, iteration: 235757
loss: 1.0734821557998657,grad_norm: 0.9999997385126265, iteration: 235758
loss: 1.0515413284301758,grad_norm: 0.9999990285284078, iteration: 235759
loss: 0.9620985984802246,grad_norm: 0.9669531979733613, iteration: 235760
loss: 1.0623451471328735,grad_norm: 0.9787122101312071, iteration: 235761
loss: 1.0412383079528809,grad_norm: 0.9609551747217772, iteration: 235762
loss: 1.0241422653198242,grad_norm: 0.9999991891444562, iteration: 235763
loss: 0.9758298993110657,grad_norm: 0.9999991224546628, iteration: 235764
loss: 1.0169994831085205,grad_norm: 0.9736834457255782, iteration: 235765
loss: 1.0276622772216797,grad_norm: 0.9999991984272717, iteration: 235766
loss: 1.001238226890564,grad_norm: 0.8795820135996123, iteration: 235767
loss: 1.0074738264083862,grad_norm: 0.9143533021755725, iteration: 235768
loss: 1.0137767791748047,grad_norm: 0.9999991555923237, iteration: 235769
loss: 0.9696136713027954,grad_norm: 0.9983679108891597, iteration: 235770
loss: 1.0577845573425293,grad_norm: 0.9999999057276984, iteration: 235771
loss: 0.9991359710693359,grad_norm: 0.9999989801973894, iteration: 235772
loss: 1.060957431793213,grad_norm: 0.9999996397949447, iteration: 235773
loss: 0.991020143032074,grad_norm: 0.9599432449335229, iteration: 235774
loss: 0.9978046417236328,grad_norm: 0.9999998222859481, iteration: 235775
loss: 1.074338674545288,grad_norm: 0.9999992294361862, iteration: 235776
loss: 0.9738709926605225,grad_norm: 0.9197833839955118, iteration: 235777
loss: 1.081352949142456,grad_norm: 0.9999993778330498, iteration: 235778
loss: 1.1171993017196655,grad_norm: 0.9999999027137148, iteration: 235779
loss: 0.9822157621383667,grad_norm: 0.9959268723493944, iteration: 235780
loss: 1.0575182437896729,grad_norm: 0.9999991290111061, iteration: 235781
loss: 1.047797441482544,grad_norm: 0.9378133535424059, iteration: 235782
loss: 0.9797112941741943,grad_norm: 0.9252193078725451, iteration: 235783
loss: 0.9867637753486633,grad_norm: 0.9999991343412737, iteration: 235784
loss: 1.0201526880264282,grad_norm: 0.9273287313794564, iteration: 235785
loss: 1.0144143104553223,grad_norm: 0.9999991948467645, iteration: 235786
loss: 1.016785740852356,grad_norm: 0.9999990329908071, iteration: 235787
loss: 0.9488834142684937,grad_norm: 0.8185513241617991, iteration: 235788
loss: 1.099696397781372,grad_norm: 0.9692005761356897, iteration: 235789
loss: 1.077624797821045,grad_norm: 0.9999991466879525, iteration: 235790
loss: 1.0160958766937256,grad_norm: 0.9999990426059425, iteration: 235791
loss: 0.9644429087638855,grad_norm: 0.768161179603266, iteration: 235792
loss: 1.0482372045516968,grad_norm: 0.7823418584105299, iteration: 235793
loss: 1.145357608795166,grad_norm: 0.9999999217730601, iteration: 235794
loss: 1.0359907150268555,grad_norm: 0.9269290527116625, iteration: 235795
loss: 0.9979041814804077,grad_norm: 0.9886111837737687, iteration: 235796
loss: 1.1083775758743286,grad_norm: 0.9999997395242521, iteration: 235797
loss: 1.0687034130096436,grad_norm: 0.9729383812171472, iteration: 235798
loss: 1.0455740690231323,grad_norm: 0.9830014246355012, iteration: 235799
loss: 1.022632360458374,grad_norm: 0.9225130606042827, iteration: 235800
loss: 1.029907464981079,grad_norm: 0.868469604056144, iteration: 235801
loss: 0.9694087505340576,grad_norm: 0.8680508263675071, iteration: 235802
loss: 1.0033081769943237,grad_norm: 0.9813036888678416, iteration: 235803
loss: 1.0087997913360596,grad_norm: 0.9999997022532657, iteration: 235804
loss: 0.9727889895439148,grad_norm: 0.8901902133362548, iteration: 235805
loss: 1.071921706199646,grad_norm: 0.9999992796209122, iteration: 235806
loss: 1.0243487358093262,grad_norm: 0.9999992032808681, iteration: 235807
loss: 1.0061177015304565,grad_norm: 0.9999990199545261, iteration: 235808
loss: 1.0021083354949951,grad_norm: 0.9071659118031723, iteration: 235809
loss: 0.9808643460273743,grad_norm: 0.9999990076193876, iteration: 235810
loss: 1.0663257837295532,grad_norm: 0.9545871914606072, iteration: 235811
loss: 1.0110993385314941,grad_norm: 0.9999991421793661, iteration: 235812
loss: 0.9854465126991272,grad_norm: 0.8240771436703284, iteration: 235813
loss: 1.0674097537994385,grad_norm: 0.9999994843739544, iteration: 235814
loss: 1.0683434009552002,grad_norm: 0.9999996290422982, iteration: 235815
loss: 1.0362324714660645,grad_norm: 0.999998928277482, iteration: 235816
loss: 0.9831894636154175,grad_norm: 0.9444546530318856, iteration: 235817
loss: 0.9951553344726562,grad_norm: 0.860985218268625, iteration: 235818
loss: 0.9791700839996338,grad_norm: 0.9448750400547437, iteration: 235819
loss: 1.0248569250106812,grad_norm: 0.9152282992184909, iteration: 235820
loss: 0.9969421625137329,grad_norm: 0.7632186001502728, iteration: 235821
loss: 1.1340851783752441,grad_norm: 0.9999995978297894, iteration: 235822
loss: 1.090910792350769,grad_norm: 0.8496585374463351, iteration: 235823
loss: 1.0461474657058716,grad_norm: 0.9625074749989205, iteration: 235824
loss: 1.0071594715118408,grad_norm: 0.9999990068381399, iteration: 235825
loss: 1.0598987340927124,grad_norm: 0.9999993400320273, iteration: 235826
loss: 1.084273338317871,grad_norm: 0.9999995437186506, iteration: 235827
loss: 0.9628788232803345,grad_norm: 0.9117990581358045, iteration: 235828
loss: 0.989215612411499,grad_norm: 0.9045055462088417, iteration: 235829
loss: 1.0010156631469727,grad_norm: 0.9999992946005263, iteration: 235830
loss: 1.0411843061447144,grad_norm: 0.8462714894983094, iteration: 235831
loss: 0.9942482709884644,grad_norm: 0.9300449549647092, iteration: 235832
loss: 1.0507646799087524,grad_norm: 0.9999993552329468, iteration: 235833
loss: 0.9912641048431396,grad_norm: 0.8684978211873164, iteration: 235834
loss: 1.0414444208145142,grad_norm: 0.9999992837558384, iteration: 235835
loss: 0.993816614151001,grad_norm: 0.9999992516747453, iteration: 235836
loss: 0.9874370098114014,grad_norm: 0.9213988377197575, iteration: 235837
loss: 1.0849231481552124,grad_norm: 0.9999990884853964, iteration: 235838
loss: 1.0225194692611694,grad_norm: 0.999999026649799, iteration: 235839
loss: 1.0419358015060425,grad_norm: 0.9999989837869429, iteration: 235840
loss: 1.0258630514144897,grad_norm: 0.840762037636222, iteration: 235841
loss: 1.0005055665969849,grad_norm: 0.8713344652185682, iteration: 235842
loss: 0.9884529709815979,grad_norm: 0.8535194045730453, iteration: 235843
loss: 1.017000436782837,grad_norm: 0.948009241884628, iteration: 235844
loss: 1.0027968883514404,grad_norm: 0.8933548763878497, iteration: 235845
loss: 1.0970993041992188,grad_norm: 0.99999931576627, iteration: 235846
loss: 1.0271395444869995,grad_norm: 0.8289483357703538, iteration: 235847
loss: 1.0106303691864014,grad_norm: 0.8700996290446568, iteration: 235848
loss: 1.007007122039795,grad_norm: 0.9999989680374858, iteration: 235849
loss: 1.0171617269515991,grad_norm: 0.874717755322457, iteration: 235850
loss: 1.006060242652893,grad_norm: 0.9999989978682869, iteration: 235851
loss: 1.0059720277786255,grad_norm: 0.9132706887302263, iteration: 235852
loss: 1.0138847827911377,grad_norm: 0.9099097989206449, iteration: 235853
loss: 0.9865292906761169,grad_norm: 0.9013694471934269, iteration: 235854
loss: 0.9687298536300659,grad_norm: 0.8716659119485105, iteration: 235855
loss: 1.0078214406967163,grad_norm: 0.9999993786635625, iteration: 235856
loss: 1.0303608179092407,grad_norm: 0.9999994220823702, iteration: 235857
loss: 1.0164765119552612,grad_norm: 0.7974988285695356, iteration: 235858
loss: 1.0114072561264038,grad_norm: 0.8352617695860197, iteration: 235859
loss: 1.0228760242462158,grad_norm: 0.9308717462065155, iteration: 235860
loss: 0.9820477366447449,grad_norm: 0.9585405012997644, iteration: 235861
loss: 1.016687035560608,grad_norm: 0.8082269280660269, iteration: 235862
loss: 1.059952974319458,grad_norm: 0.999999897659597, iteration: 235863
loss: 0.985863208770752,grad_norm: 0.9125845867277987, iteration: 235864
loss: 1.0098190307617188,grad_norm: 0.9999997817299637, iteration: 235865
loss: 1.031563401222229,grad_norm: 0.8626253434490175, iteration: 235866
loss: 1.0181735754013062,grad_norm: 0.7228570666590812, iteration: 235867
loss: 1.0518038272857666,grad_norm: 0.9999991238341844, iteration: 235868
loss: 1.0053991079330444,grad_norm: 0.9999993664076011, iteration: 235869
loss: 1.0388232469558716,grad_norm: 0.9999991635580778, iteration: 235870
loss: 0.9707357883453369,grad_norm: 0.9420204715408514, iteration: 235871
loss: 1.0231388807296753,grad_norm: 0.9999990652887213, iteration: 235872
loss: 1.0149799585342407,grad_norm: 0.8568007963107152, iteration: 235873
loss: 0.9935455322265625,grad_norm: 0.9042531523218342, iteration: 235874
loss: 1.029794692993164,grad_norm: 0.999999894083243, iteration: 235875
loss: 1.0362671613693237,grad_norm: 0.999999895885321, iteration: 235876
loss: 1.0126410722732544,grad_norm: 0.9726547979782686, iteration: 235877
loss: 1.024134635925293,grad_norm: 0.9999997035756683, iteration: 235878
loss: 0.9819923639297485,grad_norm: 0.8449958571434265, iteration: 235879
loss: 1.047048568725586,grad_norm: 0.9999994233577768, iteration: 235880
loss: 1.014768123626709,grad_norm: 0.9465204535103633, iteration: 235881
loss: 1.001931071281433,grad_norm: 0.9999991204826306, iteration: 235882
loss: 0.9882739782333374,grad_norm: 0.9799041246493708, iteration: 235883
loss: 0.986090362071991,grad_norm: 0.9460669097470721, iteration: 235884
loss: 1.0021605491638184,grad_norm: 0.8647069129252478, iteration: 235885
loss: 1.0263065099716187,grad_norm: 0.9603670933767163, iteration: 235886
loss: 1.0356918573379517,grad_norm: 0.9999991533134933, iteration: 235887
loss: 1.1312813758850098,grad_norm: 0.9999994823840757, iteration: 235888
loss: 1.0599312782287598,grad_norm: 0.9999994636204791, iteration: 235889
loss: 1.0171457529067993,grad_norm: 0.9463202789751886, iteration: 235890
loss: 1.0077930688858032,grad_norm: 0.9999990464434185, iteration: 235891
loss: 1.0471503734588623,grad_norm: 0.999999037861205, iteration: 235892
loss: 1.000098705291748,grad_norm: 0.9999990437060191, iteration: 235893
loss: 1.0598911046981812,grad_norm: 0.9999998585435071, iteration: 235894
loss: 1.015306830406189,grad_norm: 0.8039643714406115, iteration: 235895
loss: 1.0147652626037598,grad_norm: 0.9999993854571745, iteration: 235896
loss: 1.0098921060562134,grad_norm: 0.9999990910216455, iteration: 235897
loss: 1.0271776914596558,grad_norm: 0.9999994949768773, iteration: 235898
loss: 1.0147324800491333,grad_norm: 0.9999990816692624, iteration: 235899
loss: 1.0203609466552734,grad_norm: 0.9411242215758747, iteration: 235900
loss: 1.0117093324661255,grad_norm: 0.8656645224936319, iteration: 235901
loss: 1.050551176071167,grad_norm: 0.9999996491001147, iteration: 235902
loss: 0.9846915602684021,grad_norm: 0.8398668615327262, iteration: 235903
loss: 0.9446179866790771,grad_norm: 0.9046909306534293, iteration: 235904
loss: 1.0256794691085815,grad_norm: 0.9975940397367701, iteration: 235905
loss: 1.0333542823791504,grad_norm: 0.8699000585496894, iteration: 235906
loss: 1.0433045625686646,grad_norm: 0.999999244114987, iteration: 235907
loss: 1.0042818784713745,grad_norm: 0.8748151451302297, iteration: 235908
loss: 0.9997458457946777,grad_norm: 0.9203556516848185, iteration: 235909
loss: 0.9817848801612854,grad_norm: 0.9999995786184227, iteration: 235910
loss: 0.942615807056427,grad_norm: 0.981869990909903, iteration: 235911
loss: 1.0035020112991333,grad_norm: 0.8588587779732538, iteration: 235912
loss: 0.9901841282844543,grad_norm: 0.9999990820983766, iteration: 235913
loss: 0.9892653226852417,grad_norm: 0.7572512096282925, iteration: 235914
loss: 0.9997944831848145,grad_norm: 0.7199155930036647, iteration: 235915
loss: 0.9913612604141235,grad_norm: 0.9999993993794266, iteration: 235916
loss: 1.0120447874069214,grad_norm: 0.8673522140376196, iteration: 235917
loss: 0.9995812177658081,grad_norm: 0.850882587796259, iteration: 235918
loss: 1.0029289722442627,grad_norm: 0.9974503253231851, iteration: 235919
loss: 0.9743664264678955,grad_norm: 0.9999991820225945, iteration: 235920
loss: 0.998770534992218,grad_norm: 0.8998601990709941, iteration: 235921
loss: 1.0240147113800049,grad_norm: 0.9483363596975396, iteration: 235922
loss: 1.016692876815796,grad_norm: 0.9999997766576317, iteration: 235923
loss: 0.9511497020721436,grad_norm: 0.9999990750827681, iteration: 235924
loss: 1.0104929208755493,grad_norm: 0.8746604698528421, iteration: 235925
loss: 0.9777438044548035,grad_norm: 0.9505103023913621, iteration: 235926
loss: 1.0066245794296265,grad_norm: 0.921341226524951, iteration: 235927
loss: 1.0088437795639038,grad_norm: 0.8413559021434447, iteration: 235928
loss: 0.9871413707733154,grad_norm: 0.9024920836791446, iteration: 235929
loss: 1.0175813436508179,grad_norm: 0.8067558024166727, iteration: 235930
loss: 1.0346887111663818,grad_norm: 0.9999993047840192, iteration: 235931
loss: 1.109813928604126,grad_norm: 0.9999998621497637, iteration: 235932
loss: 1.0094910860061646,grad_norm: 0.8847507117293509, iteration: 235933
loss: 0.9876333475112915,grad_norm: 0.8628121300739963, iteration: 235934
loss: 0.9560080766677856,grad_norm: 0.9999992991399088, iteration: 235935
loss: 0.9883200526237488,grad_norm: 0.8891705860674444, iteration: 235936
loss: 0.9942855834960938,grad_norm: 0.999998964242095, iteration: 235937
loss: 0.9720678329467773,grad_norm: 0.9999992127233536, iteration: 235938
loss: 0.9580748677253723,grad_norm: 0.9999994827146143, iteration: 235939
loss: 0.9807544946670532,grad_norm: 0.9999990205288951, iteration: 235940
loss: 1.0271986722946167,grad_norm: 0.8530604289747177, iteration: 235941
loss: 0.9932308793067932,grad_norm: 0.9999991951300305, iteration: 235942
loss: 1.051350712776184,grad_norm: 0.8962031669398997, iteration: 235943
loss: 0.9892306327819824,grad_norm: 0.8389903887715565, iteration: 235944
loss: 1.0250828266143799,grad_norm: 0.7806734543143887, iteration: 235945
loss: 1.0295984745025635,grad_norm: 0.8164056253232578, iteration: 235946
loss: 1.0072931051254272,grad_norm: 0.8150701537973286, iteration: 235947
loss: 1.0265473127365112,grad_norm: 0.999998987088862, iteration: 235948
loss: 1.0003077983856201,grad_norm: 0.8802681034523416, iteration: 235949
loss: 1.0083054304122925,grad_norm: 0.7622759050927714, iteration: 235950
loss: 1.0226839780807495,grad_norm: 0.8766387034170244, iteration: 235951
loss: 1.0243418216705322,grad_norm: 0.961665088276317, iteration: 235952
loss: 1.011911153793335,grad_norm: 0.9999991831252997, iteration: 235953
loss: 1.061875581741333,grad_norm: 0.963252530471342, iteration: 235954
loss: 0.9984732866287231,grad_norm: 0.9999991317388259, iteration: 235955
loss: 1.0423810482025146,grad_norm: 0.9902709834086468, iteration: 235956
loss: 0.9979145526885986,grad_norm: 0.8537826600833195, iteration: 235957
loss: 0.9776593446731567,grad_norm: 0.9999992469154021, iteration: 235958
loss: 0.9946146607398987,grad_norm: 0.8540679550390098, iteration: 235959
loss: 1.0252413749694824,grad_norm: 0.8856577150333258, iteration: 235960
loss: 1.0274189710617065,grad_norm: 0.9999993282527834, iteration: 235961
loss: 1.0069527626037598,grad_norm: 0.8294638062685176, iteration: 235962
loss: 0.9868267178535461,grad_norm: 0.9408056449707912, iteration: 235963
loss: 1.0121939182281494,grad_norm: 0.8948519055332609, iteration: 235964
loss: 1.021047830581665,grad_norm: 0.9999991046304427, iteration: 235965
loss: 0.9881672859191895,grad_norm: 0.9999991080598661, iteration: 235966
loss: 1.0227322578430176,grad_norm: 0.8379326906283178, iteration: 235967
loss: 1.010687232017517,grad_norm: 0.999999005587762, iteration: 235968
loss: 0.9715937972068787,grad_norm: 0.8643449197344117, iteration: 235969
loss: 0.9852938652038574,grad_norm: 0.7540392345784277, iteration: 235970
loss: 0.9478451013565063,grad_norm: 0.9754585370340645, iteration: 235971
loss: 1.0052835941314697,grad_norm: 0.9999993548197724, iteration: 235972
loss: 0.9885812997817993,grad_norm: 0.9219578214399022, iteration: 235973
loss: 1.07131826877594,grad_norm: 0.9999990326097613, iteration: 235974
loss: 0.9858172535896301,grad_norm: 0.9999990378515736, iteration: 235975
loss: 0.9998059272766113,grad_norm: 0.999999093498056, iteration: 235976
loss: 1.0776633024215698,grad_norm: 0.9299846184776601, iteration: 235977
loss: 0.9737209677696228,grad_norm: 0.967193967109907, iteration: 235978
loss: 0.9814375042915344,grad_norm: 0.9394078711090762, iteration: 235979
loss: 0.9966945648193359,grad_norm: 0.9319885586904867, iteration: 235980
loss: 1.011675477027893,grad_norm: 0.9529764027811634, iteration: 235981
loss: 0.9944760203361511,grad_norm: 0.9999990943828562, iteration: 235982
loss: 0.9671345353126526,grad_norm: 0.9213527164795072, iteration: 235983
loss: 1.0273833274841309,grad_norm: 0.9099338431246812, iteration: 235984
loss: 1.085888385772705,grad_norm: 0.9999995123004588, iteration: 235985
loss: 0.9830950498580933,grad_norm: 0.8679147427099113, iteration: 235986
loss: 1.054254412651062,grad_norm: 0.9999992762081784, iteration: 235987
loss: 1.1071970462799072,grad_norm: 0.9999991742434489, iteration: 235988
loss: 1.0272135734558105,grad_norm: 0.999999709143442, iteration: 235989
loss: 1.0132650136947632,grad_norm: 0.9999993123985541, iteration: 235990
loss: 1.02511727809906,grad_norm: 0.9999992074981213, iteration: 235991
loss: 1.0093896389007568,grad_norm: 0.9096235885293747, iteration: 235992
loss: 1.0061051845550537,grad_norm: 0.9999994733865234, iteration: 235993
loss: 1.0099762678146362,grad_norm: 0.9745819302241335, iteration: 235994
loss: 1.0096064805984497,grad_norm: 0.8396168063868726, iteration: 235995
loss: 1.067886233329773,grad_norm: 0.9999991595969437, iteration: 235996
loss: 1.0150165557861328,grad_norm: 0.999999051580133, iteration: 235997
loss: 1.0125906467437744,grad_norm: 0.7002660373611336, iteration: 235998
loss: 1.0489658117294312,grad_norm: 0.9999990224477291, iteration: 235999
loss: 0.9905949234962463,grad_norm: 0.8919497381087947, iteration: 236000
loss: 1.0259462594985962,grad_norm: 0.9999993418325896, iteration: 236001
loss: 1.0144153833389282,grad_norm: 0.9999996377853466, iteration: 236002
loss: 1.0035566091537476,grad_norm: 0.999999180093444, iteration: 236003
loss: 1.0352174043655396,grad_norm: 0.9999992689318946, iteration: 236004
loss: 0.9962866306304932,grad_norm: 0.999999186728756, iteration: 236005
loss: 1.0757676362991333,grad_norm: 0.9999999650586637, iteration: 236006
loss: 1.0159368515014648,grad_norm: 0.9974083707508647, iteration: 236007
loss: 0.984207272529602,grad_norm: 0.9999990061052761, iteration: 236008
loss: 0.9980326890945435,grad_norm: 0.9999991212757372, iteration: 236009
loss: 1.0199083089828491,grad_norm: 0.8048198619989841, iteration: 236010
loss: 0.9909195899963379,grad_norm: 0.8678597583687988, iteration: 236011
loss: 1.0978469848632812,grad_norm: 0.9999992570908591, iteration: 236012
loss: 1.0178072452545166,grad_norm: 0.9999990926549914, iteration: 236013
loss: 0.9998940825462341,grad_norm: 0.8300006807661068, iteration: 236014
loss: 1.0334731340408325,grad_norm: 0.999999331401105, iteration: 236015
loss: 0.9975045323371887,grad_norm: 0.99999918188609, iteration: 236016
loss: 1.0130504369735718,grad_norm: 0.8831851252589203, iteration: 236017
loss: 1.0884143114089966,grad_norm: 0.9999993682479587, iteration: 236018
loss: 1.01985764503479,grad_norm: 0.9999991012858139, iteration: 236019
loss: 1.045871376991272,grad_norm: 0.9138753429209864, iteration: 236020
loss: 0.9938647150993347,grad_norm: 0.884370259585143, iteration: 236021
loss: 0.9940512776374817,grad_norm: 0.9999990968871773, iteration: 236022
loss: 0.9664186239242554,grad_norm: 0.8954718378258257, iteration: 236023
loss: 1.0285799503326416,grad_norm: 0.9999994044289704, iteration: 236024
loss: 1.012781023979187,grad_norm: 0.9997875721767618, iteration: 236025
loss: 1.274939775466919,grad_norm: 0.9999997816838633, iteration: 236026
loss: 0.958767294883728,grad_norm: 0.8203418720310938, iteration: 236027
loss: 1.0019774436950684,grad_norm: 0.8639287178557616, iteration: 236028
loss: 1.038596749305725,grad_norm: 0.8253730053659744, iteration: 236029
loss: 1.000746488571167,grad_norm: 0.9999991076171061, iteration: 236030
loss: 0.9920639395713806,grad_norm: 0.999999731627989, iteration: 236031
loss: 1.020463228225708,grad_norm: 0.9597482420299669, iteration: 236032
loss: 1.0178210735321045,grad_norm: 0.9051784712519438, iteration: 236033
loss: 0.9727932214736938,grad_norm: 0.9189937591684633, iteration: 236034
loss: 0.9956071376800537,grad_norm: 0.9097553232949684, iteration: 236035
loss: 0.9902239441871643,grad_norm: 0.9839864859906756, iteration: 236036
loss: 1.0075005292892456,grad_norm: 0.9999993102193488, iteration: 236037
loss: 0.9899068474769592,grad_norm: 0.9999991279295934, iteration: 236038
loss: 0.9752517938613892,grad_norm: 0.887245433657553, iteration: 236039
loss: 1.0147123336791992,grad_norm: 0.999999129609704, iteration: 236040
loss: 0.9920161366462708,grad_norm: 0.8947755595576113, iteration: 236041
loss: 1.000231146812439,grad_norm: 0.9999991322514465, iteration: 236042
loss: 1.002833366394043,grad_norm: 0.8291810165181596, iteration: 236043
loss: 0.9993830919265747,grad_norm: 0.8837651191174747, iteration: 236044
loss: 0.9727190136909485,grad_norm: 0.8453076042291623, iteration: 236045
loss: 1.031009554862976,grad_norm: 0.9101269064304262, iteration: 236046
loss: 0.9803270697593689,grad_norm: 0.9515404667958952, iteration: 236047
loss: 1.0007010698318481,grad_norm: 0.9279886588848588, iteration: 236048
loss: 1.0173195600509644,grad_norm: 0.9999992246574297, iteration: 236049
loss: 0.9846171736717224,grad_norm: 0.8972948512833474, iteration: 236050
loss: 1.0146366357803345,grad_norm: 0.907843249830885, iteration: 236051
loss: 1.0489888191223145,grad_norm: 0.9999995306777236, iteration: 236052
loss: 1.0353858470916748,grad_norm: 0.9999996189643149, iteration: 236053
loss: 0.9698994159698486,grad_norm: 0.999999322354587, iteration: 236054
loss: 1.0245695114135742,grad_norm: 0.9999993042713266, iteration: 236055
loss: 0.9854446649551392,grad_norm: 0.9682777132617296, iteration: 236056
loss: 1.1098933219909668,grad_norm: 0.99999902217453, iteration: 236057
loss: 0.991152286529541,grad_norm: 0.9999990071894376, iteration: 236058
loss: 1.014889121055603,grad_norm: 0.9999994846036734, iteration: 236059
loss: 1.0122791528701782,grad_norm: 0.9999992633895379, iteration: 236060
loss: 1.0107605457305908,grad_norm: 0.999999054256595, iteration: 236061
loss: 0.977817177772522,grad_norm: 0.9999991271509759, iteration: 236062
loss: 1.0032511949539185,grad_norm: 0.9249832553353841, iteration: 236063
loss: 1.018843650817871,grad_norm: 0.9805246351713431, iteration: 236064
loss: 1.0021543502807617,grad_norm: 0.7644463507746778, iteration: 236065
loss: 1.0906635522842407,grad_norm: 0.9999992319331111, iteration: 236066
loss: 0.996999204158783,grad_norm: 0.9670786974740673, iteration: 236067
loss: 0.9779843688011169,grad_norm: 0.8026743374427034, iteration: 236068
loss: 1.024564266204834,grad_norm: 0.9119952402807036, iteration: 236069
loss: 0.9870391488075256,grad_norm: 0.9292022085607143, iteration: 236070
loss: 1.013484239578247,grad_norm: 0.9999989882540392, iteration: 236071
loss: 0.9618601202964783,grad_norm: 0.9900450140458127, iteration: 236072
loss: 0.9888774156570435,grad_norm: 0.9999995399164172, iteration: 236073
loss: 1.0080534219741821,grad_norm: 0.9175198660420175, iteration: 236074
loss: 1.0372846126556396,grad_norm: 0.999999672674941, iteration: 236075
loss: 1.0283616781234741,grad_norm: 0.9999991795709159, iteration: 236076
loss: 1.0496538877487183,grad_norm: 0.9068138909607847, iteration: 236077
loss: 0.9983716011047363,grad_norm: 0.9999990750149046, iteration: 236078
loss: 1.0178788900375366,grad_norm: 0.8916285512660369, iteration: 236079
loss: 1.0173135995864868,grad_norm: 0.8498646631644384, iteration: 236080
loss: 1.0024241209030151,grad_norm: 0.9999990905197371, iteration: 236081
loss: 0.9922195076942444,grad_norm: 0.9755485403038378, iteration: 236082
loss: 1.0199388265609741,grad_norm: 0.792573527819634, iteration: 236083
loss: 1.010615587234497,grad_norm: 0.8933649810531408, iteration: 236084
loss: 1.0469635725021362,grad_norm: 0.979428604044217, iteration: 236085
loss: 0.9900367856025696,grad_norm: 0.9076878737920966, iteration: 236086
loss: 0.9873414635658264,grad_norm: 0.9999998989546847, iteration: 236087
loss: 1.0073390007019043,grad_norm: 0.8559493865829422, iteration: 236088
loss: 1.008937954902649,grad_norm: 0.9061161118127352, iteration: 236089
loss: 0.9979171752929688,grad_norm: 0.8384566059103448, iteration: 236090
loss: 1.1352105140686035,grad_norm: 0.9999997292093095, iteration: 236091
loss: 0.987037181854248,grad_norm: 0.9610334864977387, iteration: 236092
loss: 1.0096397399902344,grad_norm: 0.964583452007235, iteration: 236093
loss: 0.9978063106536865,grad_norm: 0.9999993935445783, iteration: 236094
loss: 0.9925400614738464,grad_norm: 0.8129496402708639, iteration: 236095
loss: 1.0195327997207642,grad_norm: 0.9947245898989521, iteration: 236096
loss: 1.0398824214935303,grad_norm: 0.8456926764490683, iteration: 236097
loss: 0.9474568367004395,grad_norm: 0.8757234548312784, iteration: 236098
loss: 0.9762831926345825,grad_norm: 0.9288126948315248, iteration: 236099
loss: 1.0748944282531738,grad_norm: 0.9999995296472615, iteration: 236100
loss: 1.0520151853561401,grad_norm: 0.9693443851423105, iteration: 236101
loss: 0.9804274439811707,grad_norm: 0.999999201542237, iteration: 236102
loss: 0.9983803033828735,grad_norm: 0.9513410384780946, iteration: 236103
loss: 1.0487784147262573,grad_norm: 0.9999990752874017, iteration: 236104
loss: 1.0137016773223877,grad_norm: 0.9999990408314966, iteration: 236105
loss: 0.9859879612922668,grad_norm: 0.9120495691425542, iteration: 236106
loss: 0.9990828037261963,grad_norm: 0.7615929882769019, iteration: 236107
loss: 1.060322880744934,grad_norm: 0.8515101828133976, iteration: 236108
loss: 0.9501927495002747,grad_norm: 0.9694218004680297, iteration: 236109
loss: 0.9999845623970032,grad_norm: 0.9459636164102126, iteration: 236110
loss: 0.995810329914093,grad_norm: 1.0000000251996508, iteration: 236111
loss: 0.9848108291625977,grad_norm: 0.9999991644262372, iteration: 236112
loss: 1.0062097311019897,grad_norm: 0.8743309505841996, iteration: 236113
loss: 0.9854691624641418,grad_norm: 0.9473218614113541, iteration: 236114
loss: 0.992989182472229,grad_norm: 0.8440314047313242, iteration: 236115
loss: 1.0020891427993774,grad_norm: 0.9407723602239746, iteration: 236116
loss: 0.9848015308380127,grad_norm: 0.8865663328836338, iteration: 236117
loss: 0.9966947436332703,grad_norm: 0.9999991646068479, iteration: 236118
loss: 1.0134267807006836,grad_norm: 0.9999991997261714, iteration: 236119
loss: 1.0000979900360107,grad_norm: 0.9999991097201912, iteration: 236120
loss: 1.0365556478500366,grad_norm: 0.7722895619306877, iteration: 236121
loss: 0.95660400390625,grad_norm: 0.9372550181950029, iteration: 236122
loss: 1.048464059829712,grad_norm: 0.890408990571781, iteration: 236123
loss: 0.9926818609237671,grad_norm: 0.8497617248093664, iteration: 236124
loss: 1.1063666343688965,grad_norm: 0.9999998067478197, iteration: 236125
loss: 1.0192975997924805,grad_norm: 0.8318097139787691, iteration: 236126
loss: 1.0583797693252563,grad_norm: 0.9999990397335587, iteration: 236127
loss: 0.9844771027565002,grad_norm: 0.7972301532847453, iteration: 236128
loss: 0.9820740818977356,grad_norm: 0.9037202472304926, iteration: 236129
loss: 0.9994509816169739,grad_norm: 0.8428040662364624, iteration: 236130
loss: 1.0682953596115112,grad_norm: 0.9999992109554676, iteration: 236131
loss: 0.9990099668502808,grad_norm: 0.9147675774661509, iteration: 236132
loss: 1.0236923694610596,grad_norm: 0.874458635646915, iteration: 236133
loss: 1.0214667320251465,grad_norm: 0.9262448261761007, iteration: 236134
loss: 1.1080060005187988,grad_norm: 0.9999991001933567, iteration: 236135
loss: 1.0181266069412231,grad_norm: 0.999999298409073, iteration: 236136
loss: 1.0398039817810059,grad_norm: 0.9999996165070416, iteration: 236137
loss: 1.0910468101501465,grad_norm: 0.9999990743212804, iteration: 236138
loss: 0.9826425313949585,grad_norm: 0.9310494306533565, iteration: 236139
loss: 1.0272952318191528,grad_norm: 0.9999990673641849, iteration: 236140
loss: 0.9883170127868652,grad_norm: 0.7372863846701578, iteration: 236141
loss: 1.0090197324752808,grad_norm: 0.8418620535310364, iteration: 236142
loss: 0.9768215417861938,grad_norm: 0.9006926200112576, iteration: 236143
loss: 0.967292845249176,grad_norm: 0.9873600795483978, iteration: 236144
loss: 0.9795149564743042,grad_norm: 0.9999988807453247, iteration: 236145
loss: 0.9945802092552185,grad_norm: 0.9999992373347201, iteration: 236146
loss: 0.9790121912956238,grad_norm: 0.9556191056557563, iteration: 236147
loss: 0.9895011186599731,grad_norm: 0.8581816541284578, iteration: 236148
loss: 1.0233020782470703,grad_norm: 0.9445322545114186, iteration: 236149
loss: 0.9817200303077698,grad_norm: 0.6504676512705142, iteration: 236150
loss: 1.0207113027572632,grad_norm: 0.9252572072403118, iteration: 236151
loss: 1.0017163753509521,grad_norm: 0.9291303866196272, iteration: 236152
loss: 0.9877878427505493,grad_norm: 0.8836877279493188, iteration: 236153
loss: 1.0040580034255981,grad_norm: 0.9495867215055122, iteration: 236154
loss: 0.9758003354072571,grad_norm: 0.9999992601642596, iteration: 236155
loss: 1.0091055631637573,grad_norm: 0.8741951195059605, iteration: 236156
loss: 1.0295941829681396,grad_norm: 0.9999991239168549, iteration: 236157
loss: 0.9773950576782227,grad_norm: 0.999998877337354, iteration: 236158
loss: 1.027390956878662,grad_norm: 0.9999991197451722, iteration: 236159
loss: 0.9891844391822815,grad_norm: 0.8315087185608955, iteration: 236160
loss: 1.0086841583251953,grad_norm: 0.7925227928577327, iteration: 236161
loss: 0.9840372204780579,grad_norm: 0.9135531762863387, iteration: 236162
loss: 0.9655400514602661,grad_norm: 0.8076028229619233, iteration: 236163
loss: 1.028503656387329,grad_norm: 0.87727472550235, iteration: 236164
loss: 1.0375295877456665,grad_norm: 0.999999427932152, iteration: 236165
loss: 1.0033291578292847,grad_norm: 0.9006943384853485, iteration: 236166
loss: 1.0741844177246094,grad_norm: 0.9999999010749817, iteration: 236167
loss: 0.9644045233726501,grad_norm: 0.8857855893326965, iteration: 236168
loss: 1.0216172933578491,grad_norm: 0.999999180612163, iteration: 236169
loss: 1.0034246444702148,grad_norm: 0.9999991024142156, iteration: 236170
loss: 1.014451026916504,grad_norm: 0.9195842411033056, iteration: 236171
loss: 1.0172778367996216,grad_norm: 0.9153344185926718, iteration: 236172
loss: 1.0121034383773804,grad_norm: 0.9999991850987598, iteration: 236173
loss: 0.9757032990455627,grad_norm: 0.9999995426094214, iteration: 236174
loss: 0.9706859588623047,grad_norm: 0.8418865939450004, iteration: 236175
loss: 1.0513514280319214,grad_norm: 0.9999995312580442, iteration: 236176
loss: 1.0033520460128784,grad_norm: 0.8108325949431183, iteration: 236177
loss: 1.0677063465118408,grad_norm: 0.9999994504281842, iteration: 236178
loss: 1.0296481847763062,grad_norm: 0.9999991342146696, iteration: 236179
loss: 0.997391939163208,grad_norm: 0.999999042628132, iteration: 236180
loss: 0.971001148223877,grad_norm: 0.836227725965986, iteration: 236181
loss: 1.0312949419021606,grad_norm: 0.8733473420417485, iteration: 236182
loss: 0.992535412311554,grad_norm: 0.9999990750103972, iteration: 236183
loss: 1.0236307382583618,grad_norm: 0.9366901769413372, iteration: 236184
loss: 0.9717540740966797,grad_norm: 0.9275110616274792, iteration: 236185
loss: 1.0007550716400146,grad_norm: 0.8295089086427576, iteration: 236186
loss: 1.0076748132705688,grad_norm: 0.9999997823275851, iteration: 236187
loss: 1.0260753631591797,grad_norm: 0.9467371421231217, iteration: 236188
loss: 0.9888089895248413,grad_norm: 0.8434427160654996, iteration: 236189
loss: 0.9787698984146118,grad_norm: 0.8330679516542364, iteration: 236190
loss: 1.0434921979904175,grad_norm: 0.9966299606566928, iteration: 236191
loss: 0.9645809531211853,grad_norm: 0.9127786896080983, iteration: 236192
loss: 1.007188081741333,grad_norm: 0.8961403427083421, iteration: 236193
loss: 1.0130499601364136,grad_norm: 0.9999989646205638, iteration: 236194
loss: 0.9794613718986511,grad_norm: 0.9999991693462744, iteration: 236195
loss: 1.031319260597229,grad_norm: 0.9999990058378184, iteration: 236196
loss: 1.067787766456604,grad_norm: 0.9999989249835484, iteration: 236197
loss: 1.022958755493164,grad_norm: 0.9999992208130962, iteration: 236198
loss: 1.007159948348999,grad_norm: 0.9651869004698025, iteration: 236199
loss: 1.0075758695602417,grad_norm: 0.770314317816704, iteration: 236200
loss: 1.0242506265640259,grad_norm: 0.9814025619049873, iteration: 236201
loss: 1.0530421733856201,grad_norm: 0.9999996749299019, iteration: 236202
loss: 0.9948216080665588,grad_norm: 0.9999991782820528, iteration: 236203
loss: 0.984798014163971,grad_norm: 0.9999992057582358, iteration: 236204
loss: 1.0084844827651978,grad_norm: 0.9685829503108079, iteration: 236205
loss: 1.0065935850143433,grad_norm: 0.9999991077617021, iteration: 236206
loss: 0.9771878123283386,grad_norm: 0.8576067294201132, iteration: 236207
loss: 0.9783696532249451,grad_norm: 0.9354566120611748, iteration: 236208
loss: 0.9876419305801392,grad_norm: 0.9040585945976544, iteration: 236209
loss: 1.0309545993804932,grad_norm: 0.9865555213072125, iteration: 236210
loss: 1.014184594154358,grad_norm: 0.9999998992451826, iteration: 236211
loss: 0.9943852424621582,grad_norm: 0.9741561819721742, iteration: 236212
loss: 1.0210142135620117,grad_norm: 0.8238870595213154, iteration: 236213
loss: 1.0442981719970703,grad_norm: 0.9999989685932639, iteration: 236214
loss: 1.0315831899642944,grad_norm: 0.9999990488420064, iteration: 236215
loss: 0.9686658382415771,grad_norm: 0.9999989850971552, iteration: 236216
loss: 1.0184955596923828,grad_norm: 0.9796177548913171, iteration: 236217
loss: 0.9937512874603271,grad_norm: 0.9470732333498743, iteration: 236218
loss: 1.012218952178955,grad_norm: 0.9746597112362357, iteration: 236219
loss: 1.0423887968063354,grad_norm: 0.9700124683819823, iteration: 236220
loss: 0.9918825030326843,grad_norm: 0.8183119024182801, iteration: 236221
loss: 1.0401413440704346,grad_norm: 0.9999998534456498, iteration: 236222
loss: 1.0524131059646606,grad_norm: 0.9999991570273286, iteration: 236223
loss: 0.933986246585846,grad_norm: 0.9999990737888612, iteration: 236224
loss: 0.9919776320457458,grad_norm: 0.811698697379483, iteration: 236225
loss: 0.9767870306968689,grad_norm: 0.9553003365095079, iteration: 236226
loss: 1.0496352910995483,grad_norm: 0.7368254149377783, iteration: 236227
loss: 0.9931939244270325,grad_norm: 0.8037156089147326, iteration: 236228
loss: 0.9755163788795471,grad_norm: 0.9999991398379513, iteration: 236229
loss: 0.9984736442565918,grad_norm: 0.9999994200070396, iteration: 236230
loss: 0.994023323059082,grad_norm: 0.9219693047533902, iteration: 236231
loss: 0.9954668283462524,grad_norm: 0.9400025273774376, iteration: 236232
loss: 0.9644894599914551,grad_norm: 0.833942234616547, iteration: 236233
loss: 0.9879157543182373,grad_norm: 0.9341420089921062, iteration: 236234
loss: 1.0242027044296265,grad_norm: 0.9999992591608079, iteration: 236235
loss: 0.9578892588615417,grad_norm: 0.8015347958055418, iteration: 236236
loss: 1.0489693880081177,grad_norm: 0.9999990567909478, iteration: 236237
loss: 1.019810438156128,grad_norm: 0.9206671371550608, iteration: 236238
loss: 0.976735532283783,grad_norm: 0.9999990466984731, iteration: 236239
loss: 0.9850215911865234,grad_norm: 0.99999937439366, iteration: 236240
loss: 1.0735297203063965,grad_norm: 0.9999999663982378, iteration: 236241
loss: 0.9680331945419312,grad_norm: 0.8647966894200235, iteration: 236242
loss: 1.0089061260223389,grad_norm: 0.9999998848036874, iteration: 236243
loss: 1.1289749145507812,grad_norm: 0.9999993370407851, iteration: 236244
loss: 1.010610818862915,grad_norm: 0.8202273311307792, iteration: 236245
loss: 0.9793514609336853,grad_norm: 0.9828398182568613, iteration: 236246
loss: 0.9930089116096497,grad_norm: 0.8578350396731478, iteration: 236247
loss: 1.0143109560012817,grad_norm: 0.9999993218014892, iteration: 236248
loss: 0.9836806654930115,grad_norm: 0.9178698885289353, iteration: 236249
loss: 1.1260377168655396,grad_norm: 0.9999994804062512, iteration: 236250
loss: 0.9848350882530212,grad_norm: 0.9999995988041485, iteration: 236251
loss: 1.1000373363494873,grad_norm: 0.999999119792022, iteration: 236252
loss: 0.9701094031333923,grad_norm: 0.9662583951399764, iteration: 236253
loss: 1.0892016887664795,grad_norm: 0.9999990745230185, iteration: 236254
loss: 0.9766213893890381,grad_norm: 0.9999992035714549, iteration: 236255
loss: 1.0196993350982666,grad_norm: 0.9845078397583458, iteration: 236256
loss: 0.9794511198997498,grad_norm: 0.9999990358805604, iteration: 236257
loss: 0.9664826393127441,grad_norm: 0.9514116368323027, iteration: 236258
loss: 1.0743361711502075,grad_norm: 0.9999990326794653, iteration: 236259
loss: 1.0067986249923706,grad_norm: 0.9123210774797821, iteration: 236260
loss: 1.141818881034851,grad_norm: 0.9999991052872266, iteration: 236261
loss: 0.9855786561965942,grad_norm: 0.9287992110944024, iteration: 236262
loss: 1.0679879188537598,grad_norm: 0.9999991802957956, iteration: 236263
loss: 1.0131748914718628,grad_norm: 0.9999993684590988, iteration: 236264
loss: 1.0764986276626587,grad_norm: 0.9999991248480588, iteration: 236265
loss: 0.9741913080215454,grad_norm: 0.8241574030586122, iteration: 236266
loss: 1.0692808628082275,grad_norm: 0.9999993896856091, iteration: 236267
loss: 1.0980472564697266,grad_norm: 0.9999997155041432, iteration: 236268
loss: 1.033922553062439,grad_norm: 0.9527386927154902, iteration: 236269
loss: 1.060677170753479,grad_norm: 0.8933755682555364, iteration: 236270
loss: 1.1745775938034058,grad_norm: 0.9999991826026603, iteration: 236271
loss: 1.071556806564331,grad_norm: 0.9999994196502022, iteration: 236272
loss: 1.259377360343933,grad_norm: 0.9999996067175391, iteration: 236273
loss: 1.0265358686447144,grad_norm: 0.9999994660508484, iteration: 236274
loss: 0.9850653409957886,grad_norm: 0.9351824465173777, iteration: 236275
loss: 1.046886920928955,grad_norm: 0.9999990197774282, iteration: 236276
loss: 1.2345740795135498,grad_norm: 0.9999993370480108, iteration: 236277
loss: 0.9875439405441284,grad_norm: 0.9999991317924116, iteration: 236278
loss: 1.0880858898162842,grad_norm: 0.9999998661253103, iteration: 236279
loss: 0.9933298230171204,grad_norm: 0.9999989995254355, iteration: 236280
loss: 1.0997620820999146,grad_norm: 0.9999999544582675, iteration: 236281
loss: 1.0143253803253174,grad_norm: 0.9646368142365773, iteration: 236282
loss: 1.070885181427002,grad_norm: 0.9999992697295749, iteration: 236283
loss: 1.0882247686386108,grad_norm: 0.9999998717589726, iteration: 236284
loss: 1.006367802619934,grad_norm: 0.9999989857484349, iteration: 236285
loss: 1.0058307647705078,grad_norm: 0.9255460280391916, iteration: 236286
loss: 0.9968844652175903,grad_norm: 0.8793352225932329, iteration: 236287
loss: 0.976643443107605,grad_norm: 0.8510653510726258, iteration: 236288
loss: 1.0081868171691895,grad_norm: 0.895408371389296, iteration: 236289
loss: 1.155928611755371,grad_norm: 1.0000000310878692, iteration: 236290
loss: 1.0394121408462524,grad_norm: 0.9999998870081667, iteration: 236291
loss: 0.9709923267364502,grad_norm: 0.8292871135633954, iteration: 236292
loss: 0.998740017414093,grad_norm: 0.9562470575598663, iteration: 236293
loss: 0.9748469591140747,grad_norm: 0.8643369547807286, iteration: 236294
loss: 0.9837191700935364,grad_norm: 0.8896531280644399, iteration: 236295
loss: 0.9897615909576416,grad_norm: 0.75798055247651, iteration: 236296
loss: 1.087308645248413,grad_norm: 0.9999996411930822, iteration: 236297
loss: 1.0289804935455322,grad_norm: 0.9999995099980162, iteration: 236298
loss: 1.042587161064148,grad_norm: 0.9999990667455904, iteration: 236299
loss: 0.9611609578132629,grad_norm: 0.9999990113660716, iteration: 236300
loss: 1.076791524887085,grad_norm: 0.999999721855489, iteration: 236301
loss: 0.9744521975517273,grad_norm: 0.7863494634154921, iteration: 236302
loss: 1.0052474737167358,grad_norm: 0.9999991181463797, iteration: 236303
loss: 0.9731096029281616,grad_norm: 0.881025812554771, iteration: 236304
loss: 1.0264910459518433,grad_norm: 0.9999992439029528, iteration: 236305
loss: 1.0548670291900635,grad_norm: 0.9999990365184129, iteration: 236306
loss: 1.0915740728378296,grad_norm: 0.9999999074977018, iteration: 236307
loss: 1.0161782503128052,grad_norm: 0.9999991765121957, iteration: 236308
loss: 1.0057424306869507,grad_norm: 0.9999989550127061, iteration: 236309
loss: 1.0394797325134277,grad_norm: 0.8025279617032685, iteration: 236310
loss: 1.017958641052246,grad_norm: 0.8688327815805019, iteration: 236311
loss: 1.0478980541229248,grad_norm: 0.999999496408113, iteration: 236312
loss: 1.004517912864685,grad_norm: 0.906292033350017, iteration: 236313
loss: 1.0415294170379639,grad_norm: 0.8515589604531969, iteration: 236314
loss: 1.1416213512420654,grad_norm: 0.9999999580563679, iteration: 236315
loss: 1.0061546564102173,grad_norm: 0.7932812147001025, iteration: 236316
loss: 0.996067225933075,grad_norm: 0.9214341302892181, iteration: 236317
loss: 1.001711130142212,grad_norm: 0.9369704604847511, iteration: 236318
loss: 1.0242774486541748,grad_norm: 0.9118869097492004, iteration: 236319
loss: 1.054537057876587,grad_norm: 0.9999998881179034, iteration: 236320
loss: 1.0531164407730103,grad_norm: 0.9999990825162882, iteration: 236321
loss: 1.080741047859192,grad_norm: 0.9999993727051376, iteration: 236322
loss: 0.9925388097763062,grad_norm: 0.8536609548058303, iteration: 236323
loss: 1.0291939973831177,grad_norm: 0.8180721194994628, iteration: 236324
loss: 1.0501996278762817,grad_norm: 0.9999993760102853, iteration: 236325
loss: 1.0091999769210815,grad_norm: 0.9999992286170725, iteration: 236326
loss: 0.9791598916053772,grad_norm: 0.8966451335711203, iteration: 236327
loss: 1.0436774492263794,grad_norm: 0.9999990350063779, iteration: 236328
loss: 1.0098507404327393,grad_norm: 0.9910951586585851, iteration: 236329
loss: 0.9955142736434937,grad_norm: 0.9307543212583691, iteration: 236330
loss: 1.0306020975112915,grad_norm: 0.996029783185531, iteration: 236331
loss: 1.0136005878448486,grad_norm: 0.9999993211975419, iteration: 236332
loss: 0.9966375231742859,grad_norm: 0.9070825844220864, iteration: 236333
loss: 0.9910838007926941,grad_norm: 0.9999990573978417, iteration: 236334
loss: 0.9783143401145935,grad_norm: 0.8700549972099864, iteration: 236335
loss: 1.0785492658615112,grad_norm: 0.9999992006408535, iteration: 236336
loss: 0.9865953922271729,grad_norm: 0.9159746516017692, iteration: 236337
loss: 0.9860679507255554,grad_norm: 0.9075984094027257, iteration: 236338
loss: 0.9977722764015198,grad_norm: 0.8903185534456505, iteration: 236339
loss: 0.9827598929405212,grad_norm: 0.9862178789933432, iteration: 236340
loss: 1.0043374300003052,grad_norm: 0.9597869574185729, iteration: 236341
loss: 0.9786393046379089,grad_norm: 0.999999024279257, iteration: 236342
loss: 0.9855005145072937,grad_norm: 0.7738399626954003, iteration: 236343
loss: 0.9987732768058777,grad_norm: 0.8886137825303735, iteration: 236344
loss: 1.1088324785232544,grad_norm: 0.9999995072872012, iteration: 236345
loss: 1.0286953449249268,grad_norm: 0.9544651110942669, iteration: 236346
loss: 1.0190316438674927,grad_norm: 0.8859564377276944, iteration: 236347
loss: 1.0590369701385498,grad_norm: 0.9999998812104769, iteration: 236348
loss: 0.9923499822616577,grad_norm: 0.6919841225723242, iteration: 236349
loss: 0.97871333360672,grad_norm: 0.8749545068770695, iteration: 236350
loss: 0.9910849928855896,grad_norm: 0.9999995203606136, iteration: 236351
loss: 1.0146784782409668,grad_norm: 0.9381775768867261, iteration: 236352
loss: 0.974133312702179,grad_norm: 0.9999991366968507, iteration: 236353
loss: 0.981569230556488,grad_norm: 0.7309158620578239, iteration: 236354
loss: 1.0132426023483276,grad_norm: 0.9999991250726952, iteration: 236355
loss: 0.9500101208686829,grad_norm: 0.8083551711656625, iteration: 236356
loss: 0.9817495942115784,grad_norm: 0.8507340526262369, iteration: 236357
loss: 0.9976891279220581,grad_norm: 0.9556107529471647, iteration: 236358
loss: 1.0024890899658203,grad_norm: 0.809798996199743, iteration: 236359
loss: 1.0172001123428345,grad_norm: 0.9999991261972377, iteration: 236360
loss: 0.9505460858345032,grad_norm: 0.9999991621830695, iteration: 236361
loss: 1.0091580152511597,grad_norm: 0.9999989046035528, iteration: 236362
loss: 0.9952139258384705,grad_norm: 0.9310993550187161, iteration: 236363
loss: 1.0582274198532104,grad_norm: 0.999999872663494, iteration: 236364
loss: 1.0073670148849487,grad_norm: 0.9999990471069301, iteration: 236365
loss: 0.9948017001152039,grad_norm: 0.7852775359334676, iteration: 236366
loss: 0.9762536287307739,grad_norm: 0.9276150484928377, iteration: 236367
loss: 0.9884535670280457,grad_norm: 0.8691179689806232, iteration: 236368
loss: 1.0478707551956177,grad_norm: 0.9999993262750985, iteration: 236369
loss: 1.0022331476211548,grad_norm: 0.9529274512602719, iteration: 236370
loss: 1.0173475742340088,grad_norm: 0.9999992222503375, iteration: 236371
loss: 1.1135727167129517,grad_norm: 0.9999999520007375, iteration: 236372
loss: 1.0192493200302124,grad_norm: 0.8767548300800572, iteration: 236373
loss: 1.0219963788986206,grad_norm: 0.8338669724524543, iteration: 236374
loss: 1.0009615421295166,grad_norm: 0.9990189827271625, iteration: 236375
loss: 1.0645462274551392,grad_norm: 0.9999990992020173, iteration: 236376
loss: 0.9897850751876831,grad_norm: 0.7935001570209174, iteration: 236377
loss: 1.0076128244400024,grad_norm: 0.9999990995511729, iteration: 236378
loss: 0.9905937314033508,grad_norm: 0.9999991742388368, iteration: 236379
loss: 1.0218710899353027,grad_norm: 0.9999995461523297, iteration: 236380
loss: 0.982477605342865,grad_norm: 0.9151606884947577, iteration: 236381
loss: 0.9824630618095398,grad_norm: 0.9999991164166119, iteration: 236382
loss: 1.0160290002822876,grad_norm: 0.833342568446194, iteration: 236383
loss: 1.0270558595657349,grad_norm: 0.9999995459784523, iteration: 236384
loss: 1.0822079181671143,grad_norm: 0.9999997708338175, iteration: 236385
loss: 0.9838497042655945,grad_norm: 0.8697512070583036, iteration: 236386
loss: 1.0147696733474731,grad_norm: 0.9999991667672011, iteration: 236387
loss: 0.9844759106636047,grad_norm: 0.9999989053263276, iteration: 236388
loss: 1.0018013715744019,grad_norm: 0.8312036664556158, iteration: 236389
loss: 1.0144420862197876,grad_norm: 0.8871323129831054, iteration: 236390
loss: 1.0370607376098633,grad_norm: 0.9999992169000256, iteration: 236391
loss: 0.9974072575569153,grad_norm: 0.8229621718490561, iteration: 236392
loss: 1.0137580633163452,grad_norm: 0.9999993933936543, iteration: 236393
loss: 1.0252964496612549,grad_norm: 0.8565899035803558, iteration: 236394
loss: 1.1373987197875977,grad_norm: 0.999999465449714, iteration: 236395
loss: 1.0107935667037964,grad_norm: 0.847504940313859, iteration: 236396
loss: 1.1449981927871704,grad_norm: 0.9999999167166577, iteration: 236397
loss: 0.9906978011131287,grad_norm: 0.9999991458433928, iteration: 236398
loss: 0.9764541387557983,grad_norm: 0.793000445983288, iteration: 236399
loss: 0.956426203250885,grad_norm: 0.810782139277007, iteration: 236400
loss: 1.0403612852096558,grad_norm: 0.8719156264902038, iteration: 236401
loss: 1.0049209594726562,grad_norm: 0.9999992126135984, iteration: 236402
loss: 1.1025969982147217,grad_norm: 0.8831032770759661, iteration: 236403
loss: 0.9733103513717651,grad_norm: 0.9999997618643154, iteration: 236404
loss: 1.0291378498077393,grad_norm: 0.9999994404466718, iteration: 236405
loss: 1.0210213661193848,grad_norm: 0.9999996136927943, iteration: 236406
loss: 0.9802108407020569,grad_norm: 0.807465806654419, iteration: 236407
loss: 1.012557864189148,grad_norm: 0.9999991468568662, iteration: 236408
loss: 0.9937841296195984,grad_norm: 0.9999990164567065, iteration: 236409
loss: 1.0182591676712036,grad_norm: 0.9999998813127022, iteration: 236410
loss: 1.0054750442504883,grad_norm: 0.9999998398297344, iteration: 236411
loss: 0.9968663454055786,grad_norm: 0.999999041578923, iteration: 236412
loss: 1.1682988405227661,grad_norm: 0.9999998259666406, iteration: 236413
loss: 1.0219752788543701,grad_norm: 0.9999991892785871, iteration: 236414
loss: 1.004386305809021,grad_norm: 0.9999990588557893, iteration: 236415
loss: 0.9874027371406555,grad_norm: 0.7919843384476429, iteration: 236416
loss: 0.9818151593208313,grad_norm: 0.9481883875365184, iteration: 236417
loss: 1.0250788927078247,grad_norm: 0.8906306753166242, iteration: 236418
loss: 0.9994773268699646,grad_norm: 0.9999990932041662, iteration: 236419
loss: 1.051275372505188,grad_norm: 0.74535034122603, iteration: 236420
loss: 0.9669167399406433,grad_norm: 0.9999991969501455, iteration: 236421
loss: 1.0849177837371826,grad_norm: 0.999999904897329, iteration: 236422
loss: 0.9676458835601807,grad_norm: 0.9697466385036099, iteration: 236423
loss: 1.1131634712219238,grad_norm: 0.9999999070931994, iteration: 236424
loss: 0.9949741363525391,grad_norm: 0.999999083469019, iteration: 236425
loss: 1.0014286041259766,grad_norm: 0.9999989810825022, iteration: 236426
loss: 1.0157173871994019,grad_norm: 0.973261068701078, iteration: 236427
loss: 1.0123096704483032,grad_norm: 0.8714845087148395, iteration: 236428
loss: 1.0396925210952759,grad_norm: 0.9511588909315313, iteration: 236429
loss: 0.9768989086151123,grad_norm: 0.8825233125693147, iteration: 236430
loss: 0.9797922372817993,grad_norm: 0.9999989703368086, iteration: 236431
loss: 0.9832876920700073,grad_norm: 0.8981685603599178, iteration: 236432
loss: 1.0430290699005127,grad_norm: 0.8478059142246066, iteration: 236433
loss: 1.018162488937378,grad_norm: 0.8483374866424231, iteration: 236434
loss: 1.0093709230422974,grad_norm: 0.8844310323091417, iteration: 236435
loss: 1.04673433303833,grad_norm: 0.999999842001153, iteration: 236436
loss: 1.1468652486801147,grad_norm: 0.9999992276455115, iteration: 236437
loss: 0.9663038849830627,grad_norm: 0.9409285027161752, iteration: 236438
loss: 1.0049152374267578,grad_norm: 0.9999989183312733, iteration: 236439
loss: 1.337989091873169,grad_norm: 0.9999997581910313, iteration: 236440
loss: 0.9823472499847412,grad_norm: 0.9907031992184435, iteration: 236441
loss: 0.9830319285392761,grad_norm: 0.8660700307990228, iteration: 236442
loss: 1.0550312995910645,grad_norm: 0.8425557309893628, iteration: 236443
loss: 1.0188056230545044,grad_norm: 0.9497004471050587, iteration: 236444
loss: 0.9726378321647644,grad_norm: 0.7015307714196625, iteration: 236445
loss: 0.9735302925109863,grad_norm: 0.8594660569069876, iteration: 236446
loss: 1.0003700256347656,grad_norm: 0.958483525747102, iteration: 236447
loss: 1.0291236639022827,grad_norm: 0.9999993961602112, iteration: 236448
loss: 0.9779322743415833,grad_norm: 0.933389932569699, iteration: 236449
loss: 0.9951634407043457,grad_norm: 1.0000000245593403, iteration: 236450
loss: 1.1590912342071533,grad_norm: 0.9999998732710893, iteration: 236451
loss: 1.0009593963623047,grad_norm: 0.7894521846954244, iteration: 236452
loss: 1.0557650327682495,grad_norm: 0.9636011394357326, iteration: 236453
loss: 1.0232163667678833,grad_norm: 0.9999992176288713, iteration: 236454
loss: 1.0066208839416504,grad_norm: 0.9999991040019672, iteration: 236455
loss: 1.011940360069275,grad_norm: 0.9160686308360593, iteration: 236456
loss: 1.0733972787857056,grad_norm: 0.9999993622162061, iteration: 236457
loss: 1.0422450304031372,grad_norm: 0.9999998669683065, iteration: 236458
loss: 1.0012497901916504,grad_norm: 0.9999990601021888, iteration: 236459
loss: 0.9933654069900513,grad_norm: 0.921960000381508, iteration: 236460
loss: 0.967374324798584,grad_norm: 0.999998991310247, iteration: 236461
loss: 1.0267757177352905,grad_norm: 0.9999991549086881, iteration: 236462
loss: 0.9782904982566833,grad_norm: 0.9267383664238869, iteration: 236463
loss: 0.9693920016288757,grad_norm: 0.8831785389982069, iteration: 236464
loss: 0.9955784678459167,grad_norm: 0.8733065457119348, iteration: 236465
loss: 1.006285548210144,grad_norm: 0.8730312423519487, iteration: 236466
loss: 1.0195337533950806,grad_norm: 0.9072896307003223, iteration: 236467
loss: 1.0702571868896484,grad_norm: 0.9999992914333383, iteration: 236468
loss: 1.0152477025985718,grad_norm: 0.9999994515985416, iteration: 236469
loss: 0.9915481805801392,grad_norm: 0.999999287491094, iteration: 236470
loss: 0.9631242752075195,grad_norm: 0.9178319622395201, iteration: 236471
loss: 1.1038883924484253,grad_norm: 0.8662518738525798, iteration: 236472
loss: 1.0056639909744263,grad_norm: 0.9999992233989319, iteration: 236473
loss: 1.0154716968536377,grad_norm: 0.9999996995962703, iteration: 236474
loss: 1.0029711723327637,grad_norm: 0.9999991416430906, iteration: 236475
loss: 1.0665388107299805,grad_norm: 0.9999991366297769, iteration: 236476
loss: 0.975731372833252,grad_norm: 0.8787093581272106, iteration: 236477
loss: 0.9404589533805847,grad_norm: 0.7928440484660679, iteration: 236478
loss: 0.9932082891464233,grad_norm: 0.8685182865508374, iteration: 236479
loss: 1.0229244232177734,grad_norm: 0.999999011094404, iteration: 236480
loss: 0.9969967603683472,grad_norm: 0.9999992632562336, iteration: 236481
loss: 0.992880642414093,grad_norm: 0.8386169490909569, iteration: 236482
loss: 1.000082015991211,grad_norm: 0.8516191764483815, iteration: 236483
loss: 1.0121606588363647,grad_norm: 0.8679501080974633, iteration: 236484
loss: 0.9924816489219666,grad_norm: 0.9999990173662924, iteration: 236485
loss: 0.9944673180580139,grad_norm: 0.9999992927203933, iteration: 236486
loss: 0.9985200762748718,grad_norm: 0.9999990869026616, iteration: 236487
loss: 1.0075089931488037,grad_norm: 0.8709786179097646, iteration: 236488
loss: 1.0710443258285522,grad_norm: 0.8915340338473831, iteration: 236489
loss: 1.045775055885315,grad_norm: 0.9999992294471712, iteration: 236490
loss: 0.9979602694511414,grad_norm: 0.8573566312063124, iteration: 236491
loss: 1.0278098583221436,grad_norm: 0.9999991018294784, iteration: 236492
loss: 1.0086573362350464,grad_norm: 0.9446390635440728, iteration: 236493
loss: 0.9744277596473694,grad_norm: 0.8889883906868691, iteration: 236494
loss: 0.9981343150138855,grad_norm: 0.8679298586084567, iteration: 236495
loss: 0.9650195837020874,grad_norm: 0.999999321437392, iteration: 236496
loss: 1.0110745429992676,grad_norm: 0.9999991613569197, iteration: 236497
loss: 0.9591103792190552,grad_norm: 0.8382101283713183, iteration: 236498
loss: 1.0266788005828857,grad_norm: 0.9999990583234492, iteration: 236499
loss: 1.0137715339660645,grad_norm: 0.9999999658596589, iteration: 236500
loss: 1.0287963151931763,grad_norm: 0.9192909400110821, iteration: 236501
loss: 0.9894058108329773,grad_norm: 0.9999992207659992, iteration: 236502
loss: 1.0252376794815063,grad_norm: 0.946355410997093, iteration: 236503
loss: 1.1089000701904297,grad_norm: 0.9999991306327275, iteration: 236504
loss: 0.9976857900619507,grad_norm: 0.974314698500863, iteration: 236505
loss: 1.0279532670974731,grad_norm: 0.999999037022223, iteration: 236506
loss: 1.0191043615341187,grad_norm: 0.9694891781750519, iteration: 236507
loss: 1.345497965812683,grad_norm: 0.9999999335757319, iteration: 236508
loss: 1.0258082151412964,grad_norm: 0.8941429291822145, iteration: 236509
loss: 1.0241950750350952,grad_norm: 0.9822738569947166, iteration: 236510
loss: 1.3522495031356812,grad_norm: 0.9999995140345654, iteration: 236511
loss: 1.0159097909927368,grad_norm: 0.9888447717942598, iteration: 236512
loss: 1.0103453397750854,grad_norm: 0.9999990028258934, iteration: 236513
loss: 1.0101053714752197,grad_norm: 0.8907107248712436, iteration: 236514
loss: 1.0029414892196655,grad_norm: 0.9999991257166492, iteration: 236515
loss: 1.0080746412277222,grad_norm: 0.8509026407618647, iteration: 236516
loss: 1.0083363056182861,grad_norm: 0.9999992791298176, iteration: 236517
loss: 1.009666919708252,grad_norm: 0.7607027265191387, iteration: 236518
loss: 0.9596854448318481,grad_norm: 0.9492508135117361, iteration: 236519
loss: 0.9842482805252075,grad_norm: 0.8398424980701191, iteration: 236520
loss: 1.0066843032836914,grad_norm: 0.8608818259186513, iteration: 236521
loss: 1.0179576873779297,grad_norm: 0.8761587521048684, iteration: 236522
loss: 0.9538462162017822,grad_norm: 0.9023536116793899, iteration: 236523
loss: 1.0098391771316528,grad_norm: 0.7730454043651049, iteration: 236524
loss: 0.989168107509613,grad_norm: 0.9999991441696371, iteration: 236525
loss: 0.9894664883613586,grad_norm: 0.9735544831609336, iteration: 236526
loss: 0.9950723648071289,grad_norm: 0.8815515982607398, iteration: 236527
loss: 0.9753203392028809,grad_norm: 0.9156891629967711, iteration: 236528
loss: 1.0028116703033447,grad_norm: 0.7976397097823886, iteration: 236529
loss: 0.9999855160713196,grad_norm: 0.8145565293906022, iteration: 236530
loss: 0.9979103803634644,grad_norm: 0.8931928407169356, iteration: 236531
loss: 1.0138685703277588,grad_norm: 0.9999990106155162, iteration: 236532
loss: 1.0176554918289185,grad_norm: 0.9081181947195188, iteration: 236533
loss: 0.9974769949913025,grad_norm: 0.9873909951978651, iteration: 236534
loss: 0.9773365259170532,grad_norm: 0.961472163507158, iteration: 236535
loss: 1.0042064189910889,grad_norm: 0.881241230078959, iteration: 236536
loss: 1.0371187925338745,grad_norm: 0.8795458070068447, iteration: 236537
loss: 1.0093568563461304,grad_norm: 0.9999992124798224, iteration: 236538
loss: 1.036991834640503,grad_norm: 0.8675370015790386, iteration: 236539
loss: 1.001218318939209,grad_norm: 0.9157848245648534, iteration: 236540
loss: 1.0108234882354736,grad_norm: 0.8549474047933336, iteration: 236541
loss: 1.0923019647598267,grad_norm: 0.9999991860704422, iteration: 236542
loss: 1.0332627296447754,grad_norm: 0.999999219000103, iteration: 236543
loss: 1.00464928150177,grad_norm: 0.9136746355752086, iteration: 236544
loss: 1.0383771657943726,grad_norm: 0.7955849609568558, iteration: 236545
loss: 1.0003061294555664,grad_norm: 0.7357396440170625, iteration: 236546
loss: 0.9964863657951355,grad_norm: 0.9962693588917467, iteration: 236547
loss: 0.9853146076202393,grad_norm: 0.8624447372267877, iteration: 236548
loss: 1.0228302478790283,grad_norm: 0.827970095949299, iteration: 236549
loss: 1.0069283246994019,grad_norm: 0.8142637560429836, iteration: 236550
loss: 1.0100022554397583,grad_norm: 0.699877467070939, iteration: 236551
loss: 1.0067020654678345,grad_norm: 0.7854197953019638, iteration: 236552
loss: 1.0310229063034058,grad_norm: 0.9999997894143015, iteration: 236553
loss: 1.0288983583450317,grad_norm: 0.9999995592445372, iteration: 236554
loss: 1.0042097568511963,grad_norm: 0.9594749051715313, iteration: 236555
loss: 1.0654648542404175,grad_norm: 0.9999997354712323, iteration: 236556
loss: 1.0143730640411377,grad_norm: 0.9038937275681181, iteration: 236557
loss: 1.115390658378601,grad_norm: 0.9999998586030532, iteration: 236558
loss: 1.0115488767623901,grad_norm: 0.878946903030214, iteration: 236559
loss: 0.9704116582870483,grad_norm: 0.9636206963587246, iteration: 236560
loss: 0.984605610370636,grad_norm: 0.9999990031820502, iteration: 236561
loss: 1.2396388053894043,grad_norm: 0.9999990551471829, iteration: 236562
loss: 1.0003098249435425,grad_norm: 0.950267864968727, iteration: 236563
loss: 0.9901168942451477,grad_norm: 0.999999020089755, iteration: 236564
loss: 0.982634425163269,grad_norm: 0.9216629922644862, iteration: 236565
loss: 1.0639671087265015,grad_norm: 0.978640416946655, iteration: 236566
loss: 0.9962925314903259,grad_norm: 0.7877950381291399, iteration: 236567
loss: 0.9548365473747253,grad_norm: 0.9289770174845394, iteration: 236568
loss: 1.024165153503418,grad_norm: 0.825709333453649, iteration: 236569
loss: 0.9723532199859619,grad_norm: 0.968196627010206, iteration: 236570
loss: 0.9875611066818237,grad_norm: 0.99999912957843, iteration: 236571
loss: 1.0268560647964478,grad_norm: 0.9999994842908587, iteration: 236572
loss: 1.0737640857696533,grad_norm: 0.9999992305662184, iteration: 236573
loss: 1.0195550918579102,grad_norm: 0.9117636028106891, iteration: 236574
loss: 1.0072580575942993,grad_norm: 0.8334450868172889, iteration: 236575
loss: 0.9898933172225952,grad_norm: 0.9999991506537304, iteration: 236576
loss: 0.9971636533737183,grad_norm: 0.8693288380293737, iteration: 236577
loss: 1.0181033611297607,grad_norm: 0.9999995177310715, iteration: 236578
loss: 0.9913250803947449,grad_norm: 0.9409144233002833, iteration: 236579
loss: 1.000478744506836,grad_norm: 0.8850852003121218, iteration: 236580
loss: 1.0229986906051636,grad_norm: 0.7824343947902934, iteration: 236581
loss: 1.0222806930541992,grad_norm: 0.9999992217520376, iteration: 236582
loss: 1.0913366079330444,grad_norm: 0.9434238099558224, iteration: 236583
loss: 0.9894182682037354,grad_norm: 0.8476795771688564, iteration: 236584
loss: 1.0338915586471558,grad_norm: 0.8695347799949992, iteration: 236585
loss: 1.015128493309021,grad_norm: 0.9999990634294662, iteration: 236586
loss: 1.1033811569213867,grad_norm: 0.9999991667440312, iteration: 236587
loss: 1.021794319152832,grad_norm: 0.8281846320920538, iteration: 236588
loss: 1.0077134370803833,grad_norm: 0.856520422008957, iteration: 236589
loss: 0.975969135761261,grad_norm: 0.9999991432715889, iteration: 236590
loss: 0.9709814190864563,grad_norm: 0.768611476452527, iteration: 236591
loss: 1.0226131677627563,grad_norm: 0.9999993287149036, iteration: 236592
loss: 1.0607308149337769,grad_norm: 1.0000000828099282, iteration: 236593
loss: 1.169077754020691,grad_norm: 0.9999994531134881, iteration: 236594
loss: 0.9861907958984375,grad_norm: 0.9497190776268655, iteration: 236595
loss: 1.0142676830291748,grad_norm: 0.8483914884753548, iteration: 236596
loss: 1.0554511547088623,grad_norm: 0.9999992122165903, iteration: 236597
loss: 1.0275580883026123,grad_norm: 0.9519693292922629, iteration: 236598
loss: 0.9474173784255981,grad_norm: 0.9275175395251122, iteration: 236599
loss: 0.981501042842865,grad_norm: 0.9079576353226432, iteration: 236600
loss: 0.976612389087677,grad_norm: 0.914659182088092, iteration: 236601
loss: 0.9914833903312683,grad_norm: 0.9249604412456711, iteration: 236602
loss: 1.06047523021698,grad_norm: 0.9999994509116579, iteration: 236603
loss: 0.9923381805419922,grad_norm: 0.836430000723151, iteration: 236604
loss: 1.0224835872650146,grad_norm: 0.9825764863151962, iteration: 236605
loss: 1.0937658548355103,grad_norm: 0.9999994174053195, iteration: 236606
loss: 0.9895318150520325,grad_norm: 0.8141039545138259, iteration: 236607
loss: 1.0094248056411743,grad_norm: 0.9999990687071758, iteration: 236608
loss: 1.0073281526565552,grad_norm: 0.7404058036733093, iteration: 236609
loss: 1.1283166408538818,grad_norm: 0.9999997073225937, iteration: 236610
loss: 0.9774447679519653,grad_norm: 0.9999990609321402, iteration: 236611
loss: 1.011671543121338,grad_norm: 0.9999993234434856, iteration: 236612
loss: 0.9474641680717468,grad_norm: 0.9465643690533899, iteration: 236613
loss: 0.9990423321723938,grad_norm: 0.8732939604128741, iteration: 236614
loss: 0.9829359650611877,grad_norm: 0.999999195792456, iteration: 236615
loss: 1.0343689918518066,grad_norm: 0.888762033641546, iteration: 236616
loss: 1.0022706985473633,grad_norm: 0.9636026378268147, iteration: 236617
loss: 1.0064737796783447,grad_norm: 0.9999990314572254, iteration: 236618
loss: 0.9966594576835632,grad_norm: 0.8335773105839376, iteration: 236619
loss: 0.9932901263237,grad_norm: 0.8399562044379203, iteration: 236620
loss: 0.9975797533988953,grad_norm: 0.9140892611597643, iteration: 236621
loss: 0.9769198894500732,grad_norm: 0.9915606191275147, iteration: 236622
loss: 1.0360788106918335,grad_norm: 0.9004798841140336, iteration: 236623
loss: 0.9888112545013428,grad_norm: 0.9999989043661147, iteration: 236624
loss: 1.0270726680755615,grad_norm: 0.8138980505318237, iteration: 236625
loss: 1.0072251558303833,grad_norm: 0.9092452422321003, iteration: 236626
loss: 0.9704108238220215,grad_norm: 0.8725042650162317, iteration: 236627
loss: 0.9880229830741882,grad_norm: 0.886819821445282, iteration: 236628
loss: 1.0070875883102417,grad_norm: 0.9732150733830242, iteration: 236629
loss: 1.0861247777938843,grad_norm: 0.9999995378177076, iteration: 236630
loss: 0.9722502827644348,grad_norm: 0.8829481060925576, iteration: 236631
loss: 0.9744157195091248,grad_norm: 0.8444406049719968, iteration: 236632
loss: 0.9954217672348022,grad_norm: 0.8348812158076157, iteration: 236633
loss: 0.9895082712173462,grad_norm: 0.8456902351759078, iteration: 236634
loss: 0.9929129481315613,grad_norm: 0.9999995520900071, iteration: 236635
loss: 1.0538166761398315,grad_norm: 0.9615964907832953, iteration: 236636
loss: 1.0042147636413574,grad_norm: 0.9999990973300256, iteration: 236637
loss: 1.0228825807571411,grad_norm: 0.8523805591422534, iteration: 236638
loss: 1.01227605342865,grad_norm: 0.8199646754084838, iteration: 236639
loss: 1.004441499710083,grad_norm: 0.9999992158583951, iteration: 236640
loss: 1.0117865800857544,grad_norm: 0.9492792754725539, iteration: 236641
loss: 1.1123099327087402,grad_norm: 0.9999997479539579, iteration: 236642
loss: 0.9884315729141235,grad_norm: 0.717231163795532, iteration: 236643
loss: 0.9831560850143433,grad_norm: 0.9584618753998072, iteration: 236644
loss: 1.033621072769165,grad_norm: 0.8734470531510062, iteration: 236645
loss: 1.0224578380584717,grad_norm: 0.8725869798498327, iteration: 236646
loss: 1.0483168363571167,grad_norm: 0.8822361939035303, iteration: 236647
loss: 0.9928731322288513,grad_norm: 0.9973483322713708, iteration: 236648
loss: 1.029019832611084,grad_norm: 0.9999998355786814, iteration: 236649
loss: 1.028006672859192,grad_norm: 0.8847415147750594, iteration: 236650
loss: 1.0376442670822144,grad_norm: 0.893109504156751, iteration: 236651
loss: 1.0332659482955933,grad_norm: 0.8254748666019405, iteration: 236652
loss: 1.015864610671997,grad_norm: 0.869120313585992, iteration: 236653
loss: 0.9700294137001038,grad_norm: 0.9397616022862939, iteration: 236654
loss: 1.118120551109314,grad_norm: 0.9999998147780155, iteration: 236655
loss: 1.000499963760376,grad_norm: 0.9999997322615479, iteration: 236656
loss: 1.0120519399642944,grad_norm: 0.9999995107911391, iteration: 236657
loss: 0.9837521314620972,grad_norm: 0.8967428551820202, iteration: 236658
loss: 0.9755792617797852,grad_norm: 0.9999992594102411, iteration: 236659
loss: 0.9926466941833496,grad_norm: 0.9238762147755103, iteration: 236660
loss: 1.0293056964874268,grad_norm: 0.9999992997882929, iteration: 236661
loss: 1.133404016494751,grad_norm: 0.9999990141117031, iteration: 236662
loss: 1.0247255563735962,grad_norm: 0.7627347788874377, iteration: 236663
loss: 0.9857423305511475,grad_norm: 0.8921682759367514, iteration: 236664
loss: 0.9789054989814758,grad_norm: 0.9893184402530364, iteration: 236665
loss: 0.9898564219474792,grad_norm: 0.9999999046604813, iteration: 236666
loss: 0.9813537001609802,grad_norm: 0.8781710311022698, iteration: 236667
loss: 1.0109783411026,grad_norm: 0.995044939467682, iteration: 236668
loss: 1.0025670528411865,grad_norm: 0.9999990235958884, iteration: 236669
loss: 1.0102003812789917,grad_norm: 0.8557023098711042, iteration: 236670
loss: 1.0673468112945557,grad_norm: 0.972610527865888, iteration: 236671
loss: 0.997157871723175,grad_norm: 0.9999990219350139, iteration: 236672
loss: 1.0482642650604248,grad_norm: 0.999999380517707, iteration: 236673
loss: 1.0250682830810547,grad_norm: 0.9999991326234389, iteration: 236674
loss: 0.9910328388214111,grad_norm: 0.9999997157437919, iteration: 236675
loss: 1.0463067293167114,grad_norm: 0.9999990459444857, iteration: 236676
loss: 0.9962466955184937,grad_norm: 0.9923042543761572, iteration: 236677
loss: 1.082344889640808,grad_norm: 0.9999999117559283, iteration: 236678
loss: 0.9588780999183655,grad_norm: 0.972695306483892, iteration: 236679
loss: 1.1531157493591309,grad_norm: 0.9999991204650903, iteration: 236680
loss: 0.9815141558647156,grad_norm: 0.8560770049419129, iteration: 236681
loss: 1.08009934425354,grad_norm: 0.9636808113665062, iteration: 236682
loss: 1.0171949863433838,grad_norm: 0.8374542075749829, iteration: 236683
loss: 0.9578694105148315,grad_norm: 0.8810238466570333, iteration: 236684
loss: 1.0177348852157593,grad_norm: 0.9468826753419131, iteration: 236685
loss: 1.0601489543914795,grad_norm: 0.9999998448298973, iteration: 236686
loss: 1.0142533779144287,grad_norm: 0.9999992442509625, iteration: 236687
loss: 1.0011069774627686,grad_norm: 0.9999992340112174, iteration: 236688
loss: 0.9704766869544983,grad_norm: 0.9619306596563992, iteration: 236689
loss: 1.1414120197296143,grad_norm: 0.9999998021357787, iteration: 236690
loss: 1.1302216053009033,grad_norm: 0.9999998101313265, iteration: 236691
loss: 1.1240416765213013,grad_norm: 0.9999990301674584, iteration: 236692
loss: 1.0094791650772095,grad_norm: 0.9999990552007733, iteration: 236693
loss: 1.0618785619735718,grad_norm: 0.9999991648731558, iteration: 236694
loss: 1.1550378799438477,grad_norm: 0.9999991870672846, iteration: 236695
loss: 1.1213215589523315,grad_norm: 0.9999996629379315, iteration: 236696
loss: 1.0103919506072998,grad_norm: 0.999999131518654, iteration: 236697
loss: 1.0192097425460815,grad_norm: 0.9999992124932725, iteration: 236698
loss: 1.002928614616394,grad_norm: 0.8940926792836131, iteration: 236699
loss: 1.1321057081222534,grad_norm: 0.9999995222953211, iteration: 236700
loss: 1.1042120456695557,grad_norm: 0.999999194046272, iteration: 236701
loss: 0.9914590716362,grad_norm: 0.999999629578768, iteration: 236702
loss: 0.9800430536270142,grad_norm: 0.9999991268873465, iteration: 236703
loss: 1.1079696416854858,grad_norm: 0.999999111186345, iteration: 236704
loss: 1.0177758932113647,grad_norm: 0.9999997815466366, iteration: 236705
loss: 0.9912852644920349,grad_norm: 0.999999125691792, iteration: 236706
loss: 0.9583132863044739,grad_norm: 0.9999989361667017, iteration: 236707
loss: 1.2735934257507324,grad_norm: 0.999999821907208, iteration: 236708
loss: 1.2053942680358887,grad_norm: 0.99999926291395, iteration: 236709
loss: 1.069753885269165,grad_norm: 0.9999995870034878, iteration: 236710
loss: 1.0831081867218018,grad_norm: 0.9999996233048196, iteration: 236711
loss: 1.3595938682556152,grad_norm: 0.999999666900795, iteration: 236712
loss: 1.1638911962509155,grad_norm: 0.9999997230450388, iteration: 236713
loss: 1.1703635454177856,grad_norm: 0.9999995137876058, iteration: 236714
loss: 0.9919849038124084,grad_norm: 0.9342644903379348, iteration: 236715
loss: 1.0261566638946533,grad_norm: 0.9999993147188969, iteration: 236716
loss: 1.1487350463867188,grad_norm: 0.9999995452805907, iteration: 236717
loss: 1.0153752565383911,grad_norm: 0.8903475481769934, iteration: 236718
loss: 1.0889408588409424,grad_norm: 0.9999999446058808, iteration: 236719
loss: 1.1270487308502197,grad_norm: 0.9999999106630488, iteration: 236720
loss: 1.073764681816101,grad_norm: 0.9999994182628574, iteration: 236721
loss: 0.999447762966156,grad_norm: 0.8534031906437567, iteration: 236722
loss: 1.05287766456604,grad_norm: 0.9999992927890825, iteration: 236723
loss: 0.953321635723114,grad_norm: 0.9817024580339899, iteration: 236724
loss: 1.0405994653701782,grad_norm: 0.9999991934156768, iteration: 236725
loss: 0.9742169976234436,grad_norm: 0.9999993339094421, iteration: 236726
loss: 1.015310525894165,grad_norm: 0.9999994718508682, iteration: 236727
loss: 1.0161349773406982,grad_norm: 0.7450350924646394, iteration: 236728
loss: 1.0820554494857788,grad_norm: 0.9999990791948399, iteration: 236729
loss: 1.0034571886062622,grad_norm: 0.885668199801425, iteration: 236730
loss: 1.002535343170166,grad_norm: 0.8043239231371208, iteration: 236731
loss: 1.0359444618225098,grad_norm: 0.9999991071357759, iteration: 236732
loss: 1.0090450048446655,grad_norm: 0.9739591267682394, iteration: 236733
loss: 1.0485470294952393,grad_norm: 0.9694939344831351, iteration: 236734
loss: 1.0418493747711182,grad_norm: 0.861835985635196, iteration: 236735
loss: 1.0502583980560303,grad_norm: 0.9999990659667203, iteration: 236736
loss: 1.158322811126709,grad_norm: 0.9999999825537486, iteration: 236737
loss: 1.0020822286605835,grad_norm: 0.9300592578713638, iteration: 236738
loss: 0.9760650992393494,grad_norm: 0.8581942717312365, iteration: 236739
loss: 1.0686821937561035,grad_norm: 0.9999997909870346, iteration: 236740
loss: 0.9912574887275696,grad_norm: 0.9519250568082901, iteration: 236741
loss: 1.0343648195266724,grad_norm: 0.8722627987695835, iteration: 236742
loss: 0.988125205039978,grad_norm: 0.8385147161849678, iteration: 236743
loss: 1.0042389631271362,grad_norm: 0.936939713317484, iteration: 236744
loss: 0.9983415603637695,grad_norm: 0.7633440228684896, iteration: 236745
loss: 1.1325138807296753,grad_norm: 0.9999998085012975, iteration: 236746
loss: 0.9808098077774048,grad_norm: 0.8547010668163821, iteration: 236747
loss: 1.0859456062316895,grad_norm: 0.9999992346681498, iteration: 236748
loss: 1.0342644453048706,grad_norm: 0.9999994839334, iteration: 236749
loss: 1.0408803224563599,grad_norm: 0.9555552639747595, iteration: 236750
loss: 0.9972980618476868,grad_norm: 0.8750588149382684, iteration: 236751
loss: 0.9884693026542664,grad_norm: 0.8076189770172204, iteration: 236752
loss: 1.356186866760254,grad_norm: 0.9999997577720686, iteration: 236753
loss: 1.0244885683059692,grad_norm: 0.9769741202498387, iteration: 236754
loss: 0.9846890568733215,grad_norm: 0.9506067765456107, iteration: 236755
loss: 0.99285888671875,grad_norm: 0.8516878640297069, iteration: 236756
loss: 0.9849334359169006,grad_norm: 0.9157799629050287, iteration: 236757
loss: 1.0340280532836914,grad_norm: 0.8227180151641623, iteration: 236758
loss: 1.0005102157592773,grad_norm: 0.9999991688585695, iteration: 236759
loss: 1.0970039367675781,grad_norm: 0.999999338746572, iteration: 236760
loss: 0.991229236125946,grad_norm: 0.7785148558663437, iteration: 236761
loss: 1.0741362571716309,grad_norm: 0.9999996484114898, iteration: 236762
loss: 1.0087676048278809,grad_norm: 0.9999997761555923, iteration: 236763
loss: 0.9972448945045471,grad_norm: 0.7480771811605219, iteration: 236764
loss: 1.0285781621932983,grad_norm: 0.9311074641567284, iteration: 236765
loss: 1.039721131324768,grad_norm: 0.999999906849703, iteration: 236766
loss: 0.999417781829834,grad_norm: 0.9999991099532866, iteration: 236767
loss: 1.005689263343811,grad_norm: 0.8724039200232011, iteration: 236768
loss: 1.002478837966919,grad_norm: 0.9999992441837297, iteration: 236769
loss: 1.0123876333236694,grad_norm: 0.9999996524972257, iteration: 236770
loss: 0.983933687210083,grad_norm: 0.9999992126170105, iteration: 236771
loss: 0.9838775992393494,grad_norm: 0.9999991570162576, iteration: 236772
loss: 1.0723124742507935,grad_norm: 0.9999992029681692, iteration: 236773
loss: 0.9584839940071106,grad_norm: 0.8187774159143343, iteration: 236774
loss: 0.9734492301940918,grad_norm: 0.9999991097441912, iteration: 236775
loss: 0.9630140662193298,grad_norm: 0.9817963228414179, iteration: 236776
loss: 1.0137943029403687,grad_norm: 0.9999990148706462, iteration: 236777
loss: 1.028743028640747,grad_norm: 0.9999992065749292, iteration: 236778
loss: 0.9831098914146423,grad_norm: 0.9377180824585033, iteration: 236779
loss: 1.002192497253418,grad_norm: 0.8493747161829247, iteration: 236780
loss: 1.0045549869537354,grad_norm: 0.9017112293809002, iteration: 236781
loss: 0.9954346418380737,grad_norm: 0.7676082878241511, iteration: 236782
loss: 0.9897873401641846,grad_norm: 0.8341854556626512, iteration: 236783
loss: 1.0315296649932861,grad_norm: 0.8551398843158168, iteration: 236784
loss: 1.0069247484207153,grad_norm: 0.7968062784950888, iteration: 236785
loss: 0.9752318859100342,grad_norm: 0.9536377448169494, iteration: 236786
loss: 1.0098344087600708,grad_norm: 0.999999155472717, iteration: 236787
loss: 1.06135892868042,grad_norm: 0.9999999885285112, iteration: 236788
loss: 1.027532935142517,grad_norm: 0.8390744727717798, iteration: 236789
loss: 0.9797279238700867,grad_norm: 0.8053742491229999, iteration: 236790
loss: 1.1244380474090576,grad_norm: 0.9999999453028662, iteration: 236791
loss: 1.0799976587295532,grad_norm: 0.9999994327290388, iteration: 236792
loss: 1.0431230068206787,grad_norm: 0.9070236447184125, iteration: 236793
loss: 1.017047643661499,grad_norm: 0.8337618947695135, iteration: 236794
loss: 1.0652194023132324,grad_norm: 0.9999993034129346, iteration: 236795
loss: 0.997378945350647,grad_norm: 0.9953566798225929, iteration: 236796
loss: 1.0447611808776855,grad_norm: 0.9999993191439962, iteration: 236797
loss: 1.0527465343475342,grad_norm: 0.9999991917748365, iteration: 236798
loss: 1.039021611213684,grad_norm: 0.9999990905015003, iteration: 236799
loss: 0.9797636270523071,grad_norm: 0.8032145857327406, iteration: 236800
loss: 1.0638501644134521,grad_norm: 0.9999991549244002, iteration: 236801
loss: 1.1081119775772095,grad_norm: 0.9999997944514805, iteration: 236802
loss: 0.9925636053085327,grad_norm: 0.9999991522728647, iteration: 236803
loss: 1.022443413734436,grad_norm: 0.9548346291352644, iteration: 236804
loss: 0.9649809002876282,grad_norm: 0.9203194320073004, iteration: 236805
loss: 1.0348396301269531,grad_norm: 0.8399850928466639, iteration: 236806
loss: 0.993174135684967,grad_norm: 0.8369726659485106, iteration: 236807
loss: 0.9853109121322632,grad_norm: 0.9999993500503602, iteration: 236808
loss: 1.003743052482605,grad_norm: 0.8316389866526849, iteration: 236809
loss: 0.994034469127655,grad_norm: 0.938030624676958, iteration: 236810
loss: 1.0150444507598877,grad_norm: 0.7651196647271138, iteration: 236811
loss: 0.9948495626449585,grad_norm: 0.8586593963576172, iteration: 236812
loss: 0.9911197423934937,grad_norm: 0.9228588132263756, iteration: 236813
loss: 1.026779055595398,grad_norm: 0.9660671443788926, iteration: 236814
loss: 0.9867891669273376,grad_norm: 0.8408434769036583, iteration: 236815
loss: 0.978259801864624,grad_norm: 0.9999992072937263, iteration: 236816
loss: 1.0161736011505127,grad_norm: 0.9002267426991626, iteration: 236817
loss: 0.997818648815155,grad_norm: 0.8809582112232537, iteration: 236818
loss: 0.9864382147789001,grad_norm: 0.8594505028169327, iteration: 236819
loss: 0.990881621837616,grad_norm: 0.9516322867719088, iteration: 236820
loss: 0.9726214408874512,grad_norm: 0.8650560898314741, iteration: 236821
loss: 1.0155887603759766,grad_norm: 0.9941354706434952, iteration: 236822
loss: 0.9928739070892334,grad_norm: 0.8012010029618987, iteration: 236823
loss: 1.036895513534546,grad_norm: 1.0000000406629088, iteration: 236824
loss: 1.0152177810668945,grad_norm: 0.9789876345593265, iteration: 236825
loss: 1.0199390649795532,grad_norm: 0.8370011511051166, iteration: 236826
loss: 1.0234464406967163,grad_norm: 0.9147349987792727, iteration: 236827
loss: 0.9544512629508972,grad_norm: 0.938407766388711, iteration: 236828
loss: 0.9924744367599487,grad_norm: 0.9999991443003463, iteration: 236829
loss: 0.9565637111663818,grad_norm: 0.9544249623125914, iteration: 236830
loss: 1.0161104202270508,grad_norm: 0.9198132013947211, iteration: 236831
loss: 1.0216962099075317,grad_norm: 0.7688780415924267, iteration: 236832
loss: 1.0148251056671143,grad_norm: 0.9850193971761868, iteration: 236833
loss: 1.0530004501342773,grad_norm: 0.9999992253435122, iteration: 236834
loss: 0.971185564994812,grad_norm: 0.9279181611764906, iteration: 236835
loss: 1.019529938697815,grad_norm: 0.9903064098696033, iteration: 236836
loss: 1.016850471496582,grad_norm: 0.9999999463558946, iteration: 236837
loss: 0.9887292385101318,grad_norm: 0.8857757179083666, iteration: 236838
loss: 1.0227808952331543,grad_norm: 0.9999991195692606, iteration: 236839
loss: 0.958019495010376,grad_norm: 0.816137458207452, iteration: 236840
loss: 0.9809670448303223,grad_norm: 0.9236936915542372, iteration: 236841
loss: 0.9869049787521362,grad_norm: 0.8590585533733004, iteration: 236842
loss: 1.0241636037826538,grad_norm: 0.928263694115841, iteration: 236843
loss: 1.0459591150283813,grad_norm: 0.8788514987904641, iteration: 236844
loss: 1.0286248922348022,grad_norm: 0.9463622347972989, iteration: 236845
loss: 1.0490992069244385,grad_norm: 0.9999991281859968, iteration: 236846
loss: 1.0010319948196411,grad_norm: 0.8604369464420775, iteration: 236847
loss: 0.9951244592666626,grad_norm: 0.9999989787183091, iteration: 236848
loss: 1.0087802410125732,grad_norm: 0.9999991727906549, iteration: 236849
loss: 1.003400444984436,grad_norm: 0.915204863975421, iteration: 236850
loss: 1.0647295713424683,grad_norm: 0.9999992853924976, iteration: 236851
loss: 1.0542391538619995,grad_norm: 0.9999995668585376, iteration: 236852
loss: 0.9507651925086975,grad_norm: 0.8645786041069999, iteration: 236853
loss: 0.985593855381012,grad_norm: 0.9999990700178817, iteration: 236854
loss: 0.9970037341117859,grad_norm: 0.8975678592157531, iteration: 236855
loss: 1.0237150192260742,grad_norm: 0.9999992383127827, iteration: 236856
loss: 0.9811549782752991,grad_norm: 0.8975145704583455, iteration: 236857
loss: 0.9721803069114685,grad_norm: 0.9999991283818257, iteration: 236858
loss: 1.0042362213134766,grad_norm: 0.9999989624968086, iteration: 236859
loss: 0.9955464005470276,grad_norm: 0.8237450369936563, iteration: 236860
loss: 1.0195996761322021,grad_norm: 0.9999990158766678, iteration: 236861
loss: 1.009100079536438,grad_norm: 0.9355584890303295, iteration: 236862
loss: 1.0167310237884521,grad_norm: 0.9999991321676476, iteration: 236863
loss: 0.9966899156570435,grad_norm: 0.9999992041915625, iteration: 236864
loss: 1.0188555717468262,grad_norm: 0.8588246016597995, iteration: 236865
loss: 1.0782122611999512,grad_norm: 0.9999992798107491, iteration: 236866
loss: 0.9938154816627502,grad_norm: 0.9659072265519764, iteration: 236867
loss: 1.008324146270752,grad_norm: 0.9238593742516075, iteration: 236868
loss: 1.0421366691589355,grad_norm: 0.9037875645507621, iteration: 236869
loss: 1.0170364379882812,grad_norm: 0.8986194890941279, iteration: 236870
loss: 0.9954361915588379,grad_norm: 0.8939106031345654, iteration: 236871
loss: 0.9511890411376953,grad_norm: 0.8900498345023805, iteration: 236872
loss: 1.0254334211349487,grad_norm: 0.9999995554820525, iteration: 236873
loss: 0.9843918085098267,grad_norm: 0.9293296907452421, iteration: 236874
loss: 0.9820734858512878,grad_norm: 0.999999357356285, iteration: 236875
loss: 1.0250388383865356,grad_norm: 0.855597159849768, iteration: 236876
loss: 1.0011719465255737,grad_norm: 0.7992113288727368, iteration: 236877
loss: 1.0374325513839722,grad_norm: 0.8958134896942417, iteration: 236878
loss: 0.9948675036430359,grad_norm: 0.9193824576399667, iteration: 236879
loss: 1.0444393157958984,grad_norm: 0.8811404048702123, iteration: 236880
loss: 1.0145171880722046,grad_norm: 0.8680782471031312, iteration: 236881
loss: 1.0051945447921753,grad_norm: 0.9999998888447452, iteration: 236882
loss: 0.9999341368675232,grad_norm: 0.9448983580000893, iteration: 236883
loss: 0.9957507848739624,grad_norm: 0.9007329363578035, iteration: 236884
loss: 1.0020498037338257,grad_norm: 0.8570581264973088, iteration: 236885
loss: 0.9884839057922363,grad_norm: 0.8775311575340901, iteration: 236886
loss: 1.010953664779663,grad_norm: 0.8635799772958892, iteration: 236887
loss: 0.9802676439285278,grad_norm: 0.999999039293761, iteration: 236888
loss: 1.0056772232055664,grad_norm: 0.9726708375654984, iteration: 236889
loss: 1.0295085906982422,grad_norm: 0.9999991914611258, iteration: 236890
loss: 1.0005210638046265,grad_norm: 0.9999992434632349, iteration: 236891
loss: 1.007409691810608,grad_norm: 0.9727408862019131, iteration: 236892
loss: 0.9563871026039124,grad_norm: 0.8736149358536761, iteration: 236893
loss: 1.0027639865875244,grad_norm: 0.9999991878650578, iteration: 236894
loss: 1.0248595476150513,grad_norm: 0.9999989992817561, iteration: 236895
loss: 0.9913820624351501,grad_norm: 0.9472729453165105, iteration: 236896
loss: 1.0236458778381348,grad_norm: 0.999999086513645, iteration: 236897
loss: 1.0920034646987915,grad_norm: 0.999999946832629, iteration: 236898
loss: 0.9800836443901062,grad_norm: 0.8801862869931947, iteration: 236899
loss: 1.0202709436416626,grad_norm: 0.906842765436863, iteration: 236900
loss: 1.0259288549423218,grad_norm: 0.9999991632825138, iteration: 236901
loss: 0.9972400069236755,grad_norm: 0.9142159584527504, iteration: 236902
loss: 1.008888602256775,grad_norm: 0.930075022202009, iteration: 236903
loss: 1.1196702718734741,grad_norm: 0.9999998036455113, iteration: 236904
loss: 1.0088562965393066,grad_norm: 0.8292254643201191, iteration: 236905
loss: 1.0184791088104248,grad_norm: 0.8050327966272378, iteration: 236906
loss: 1.0055863857269287,grad_norm: 0.9870799106856016, iteration: 236907
loss: 0.9808683395385742,grad_norm: 0.9954971940869253, iteration: 236908
loss: 1.0073117017745972,grad_norm: 0.7977192144626752, iteration: 236909
loss: 0.9849499464035034,grad_norm: 0.9999991425812945, iteration: 236910
loss: 1.0571632385253906,grad_norm: 0.9999994530063838, iteration: 236911
loss: 0.9951462745666504,grad_norm: 0.908558542502231, iteration: 236912
loss: 1.0030583143234253,grad_norm: 0.999999194905152, iteration: 236913
loss: 0.9620642066001892,grad_norm: 0.8971576741025424, iteration: 236914
loss: 1.031943678855896,grad_norm: 0.9999994644051203, iteration: 236915
loss: 0.9773896336555481,grad_norm: 0.9999990688334449, iteration: 236916
loss: 1.0750720500946045,grad_norm: 0.9999991535899158, iteration: 236917
loss: 0.9450349807739258,grad_norm: 0.8371657803058576, iteration: 236918
loss: 0.9927077889442444,grad_norm: 0.9225452178853148, iteration: 236919
loss: 1.0250011682510376,grad_norm: 0.999999144217158, iteration: 236920
loss: 1.0348913669586182,grad_norm: 0.9164335200264365, iteration: 236921
loss: 1.044275164604187,grad_norm: 0.8378276327556899, iteration: 236922
loss: 0.9867050051689148,grad_norm: 0.8809371372592504, iteration: 236923
loss: 0.9995307922363281,grad_norm: 0.9532436121924711, iteration: 236924
loss: 1.052506923675537,grad_norm: 0.9999998565495457, iteration: 236925
loss: 1.0109854936599731,grad_norm: 0.9896507735563197, iteration: 236926
loss: 0.9688661098480225,grad_norm: 0.8679794914499207, iteration: 236927
loss: 1.0338399410247803,grad_norm: 0.8569752203099449, iteration: 236928
loss: 1.0080883502960205,grad_norm: 0.9557680314964018, iteration: 236929
loss: 1.0316542387008667,grad_norm: 0.8704965308505557, iteration: 236930
loss: 0.9705639481544495,grad_norm: 0.7890960611679402, iteration: 236931
loss: 0.9881197810173035,grad_norm: 0.9229357690996123, iteration: 236932
loss: 1.000359058380127,grad_norm: 0.9899674868961347, iteration: 236933
loss: 1.009461760520935,grad_norm: 0.8107069053225155, iteration: 236934
loss: 1.0536830425262451,grad_norm: 0.905503249523696, iteration: 236935
loss: 0.988621175289154,grad_norm: 0.9999992385569851, iteration: 236936
loss: 1.0095241069793701,grad_norm: 0.8361264765292961, iteration: 236937
loss: 1.0456463098526,grad_norm: 0.8938584042124633, iteration: 236938
loss: 1.0178831815719604,grad_norm: 0.9885009023269471, iteration: 236939
loss: 1.0042567253112793,grad_norm: 0.8678343792235279, iteration: 236940
loss: 1.0188589096069336,grad_norm: 0.9999990778814803, iteration: 236941
loss: 1.0005402565002441,grad_norm: 0.8789229569663257, iteration: 236942
loss: 0.9733813405036926,grad_norm: 0.9999991561193512, iteration: 236943
loss: 1.0515371561050415,grad_norm: 0.9999990330156775, iteration: 236944
loss: 1.0009448528289795,grad_norm: 0.8880421587002205, iteration: 236945
loss: 1.0216442346572876,grad_norm: 0.9999989504562079, iteration: 236946
loss: 0.9672247171401978,grad_norm: 0.9999990006929, iteration: 236947
loss: 1.0022028684616089,grad_norm: 0.7750163971602376, iteration: 236948
loss: 1.0177092552185059,grad_norm: 0.9999993296992633, iteration: 236949
loss: 1.0020227432250977,grad_norm: 0.9427804779209566, iteration: 236950
loss: 1.0144927501678467,grad_norm: 0.9999992108560306, iteration: 236951
loss: 0.9955219626426697,grad_norm: 0.8399666184548338, iteration: 236952
loss: 0.9747111201286316,grad_norm: 0.9999990700308374, iteration: 236953
loss: 1.0301486253738403,grad_norm: 0.7984336167279648, iteration: 236954
loss: 1.0020275115966797,grad_norm: 0.9999991283500599, iteration: 236955
loss: 1.026942491531372,grad_norm: 0.999999177376788, iteration: 236956
loss: 0.9982445240020752,grad_norm: 0.878444400424193, iteration: 236957
loss: 1.0455776453018188,grad_norm: 0.8924503638149727, iteration: 236958
loss: 1.0073020458221436,grad_norm: 0.9521895750363584, iteration: 236959
loss: 1.0486857891082764,grad_norm: 0.9999990520112012, iteration: 236960
loss: 1.0849883556365967,grad_norm: 0.9999991460371289, iteration: 236961
loss: 0.9953821301460266,grad_norm: 0.7615366149387892, iteration: 236962
loss: 1.0327259302139282,grad_norm: 0.9999999534420781, iteration: 236963
loss: 1.042455792427063,grad_norm: 0.8423214584269352, iteration: 236964
loss: 0.9998812675476074,grad_norm: 0.7979221780992237, iteration: 236965
loss: 1.0076704025268555,grad_norm: 0.8943195822417119, iteration: 236966
loss: 1.0059527158737183,grad_norm: 0.9813030932806508, iteration: 236967
loss: 1.0060241222381592,grad_norm: 0.9999993925200885, iteration: 236968
loss: 1.0055216550827026,grad_norm: 0.9278886498773241, iteration: 236969
loss: 1.0169707536697388,grad_norm: 0.9035466601669196, iteration: 236970
loss: 0.9816843867301941,grad_norm: 0.9654868449968126, iteration: 236971
loss: 1.0046759843826294,grad_norm: 0.8747038902649313, iteration: 236972
loss: 1.0167341232299805,grad_norm: 0.9992986969112481, iteration: 236973
loss: 0.9260028600692749,grad_norm: 0.9114354610297317, iteration: 236974
loss: 1.0274085998535156,grad_norm: 0.999998940694323, iteration: 236975
loss: 1.0620535612106323,grad_norm: 0.9999993171611341, iteration: 236976
loss: 0.9659194350242615,grad_norm: 0.888353177365691, iteration: 236977
loss: 1.0089296102523804,grad_norm: 0.8860059140590479, iteration: 236978
loss: 1.0731561183929443,grad_norm: 0.9999991350536497, iteration: 236979
loss: 0.99309241771698,grad_norm: 0.9466611429213582, iteration: 236980
loss: 0.9872375726699829,grad_norm: 0.8492168416130146, iteration: 236981
loss: 0.9864371418952942,grad_norm: 0.9095406426201715, iteration: 236982
loss: 0.9829034805297852,grad_norm: 0.9999992847836763, iteration: 236983
loss: 0.9730973243713379,grad_norm: 0.9934745845602475, iteration: 236984
loss: 1.012227177619934,grad_norm: 0.9999991795244815, iteration: 236985
loss: 1.0622984170913696,grad_norm: 0.9999995745895964, iteration: 236986
loss: 0.9774600863456726,grad_norm: 0.8173421648301724, iteration: 236987
loss: 1.0278244018554688,grad_norm: 0.8610521811112121, iteration: 236988
loss: 0.9896261692047119,grad_norm: 0.8211260891410324, iteration: 236989
loss: 1.0403881072998047,grad_norm: 0.999999119934426, iteration: 236990
loss: 0.9995079040527344,grad_norm: 0.9999995069953119, iteration: 236991
loss: 0.9493549466133118,grad_norm: 0.9999991221917094, iteration: 236992
loss: 0.971671998500824,grad_norm: 0.8339382583166185, iteration: 236993
loss: 1.0189311504364014,grad_norm: 0.9736615845703811, iteration: 236994
loss: 1.041661024093628,grad_norm: 0.9999990891485006, iteration: 236995
loss: 1.012940764427185,grad_norm: 0.9276151491435319, iteration: 236996
loss: 0.9789565801620483,grad_norm: 0.9280910918672737, iteration: 236997
loss: 0.992541491985321,grad_norm: 0.9999992113182918, iteration: 236998
loss: 0.9873213768005371,grad_norm: 0.9091049641469006, iteration: 236999
loss: 0.9984655380249023,grad_norm: 0.816617078677519, iteration: 237000
loss: 1.0013227462768555,grad_norm: 0.856396589225498, iteration: 237001
loss: 1.0184547901153564,grad_norm: 0.999999221233016, iteration: 237002
loss: 1.011403203010559,grad_norm: 0.9999990792854399, iteration: 237003
loss: 1.0100799798965454,grad_norm: 0.9080278263530038, iteration: 237004
loss: 1.0402750968933105,grad_norm: 0.8185464812069947, iteration: 237005
loss: 1.0675748586654663,grad_norm: 0.9999996679228393, iteration: 237006
loss: 1.067615032196045,grad_norm: 0.9999990394736953, iteration: 237007
loss: 1.0219733715057373,grad_norm: 0.9999989645159693, iteration: 237008
loss: 0.9970106482505798,grad_norm: 0.9990707365808446, iteration: 237009
loss: 0.9883040189743042,grad_norm: 0.9999993543999001, iteration: 237010
loss: 1.0118443965911865,grad_norm: 0.8742605251952626, iteration: 237011
loss: 0.9903081655502319,grad_norm: 0.9999992052741535, iteration: 237012
loss: 1.013015866279602,grad_norm: 0.8380138982761351, iteration: 237013
loss: 0.9786819219589233,grad_norm: 0.8118813745084972, iteration: 237014
loss: 0.9946507215499878,grad_norm: 0.7872475113000382, iteration: 237015
loss: 0.9976159930229187,grad_norm: 0.999999081321007, iteration: 237016
loss: 0.9940454363822937,grad_norm: 0.9999991868587639, iteration: 237017
loss: 1.0048348903656006,grad_norm: 0.9999989682787285, iteration: 237018
loss: 1.0004734992980957,grad_norm: 0.9999991740193486, iteration: 237019
loss: 1.0051608085632324,grad_norm: 0.9678155691607525, iteration: 237020
loss: 0.9917834401130676,grad_norm: 0.9999991124852708, iteration: 237021
loss: 0.9937923550605774,grad_norm: 0.9122543334848775, iteration: 237022
loss: 1.0043482780456543,grad_norm: 0.845768665576036, iteration: 237023
loss: 1.0313664674758911,grad_norm: 0.9999991266001304, iteration: 237024
loss: 1.0295612812042236,grad_norm: 0.9999990761753653, iteration: 237025
loss: 0.9966127872467041,grad_norm: 0.9999992315652506, iteration: 237026
loss: 1.0380232334136963,grad_norm: 0.8319791021839973, iteration: 237027
loss: 0.9964001178741455,grad_norm: 0.9446514313116227, iteration: 237028
loss: 1.0138942003250122,grad_norm: 0.9999992011117735, iteration: 237029
loss: 0.9915260076522827,grad_norm: 0.8485752640706392, iteration: 237030
loss: 0.9934867024421692,grad_norm: 0.866954075204029, iteration: 237031
loss: 0.9839513897895813,grad_norm: 0.8997471884528194, iteration: 237032
loss: 1.0208629369735718,grad_norm: 0.7514135986879014, iteration: 237033
loss: 1.0197489261627197,grad_norm: 1.00000001648765, iteration: 237034
loss: 1.0010465383529663,grad_norm: 0.7815190877335131, iteration: 237035
loss: 1.0015596151351929,grad_norm: 0.8240591239763216, iteration: 237036
loss: 0.967019259929657,grad_norm: 0.9388453395116152, iteration: 237037
loss: 0.9834464192390442,grad_norm: 0.9528393620672515, iteration: 237038
loss: 1.0070154666900635,grad_norm: 0.9120972432245916, iteration: 237039
loss: 0.9935346841812134,grad_norm: 0.7945803544106024, iteration: 237040
loss: 0.9795339107513428,grad_norm: 0.9999998665557266, iteration: 237041
loss: 0.9689925909042358,grad_norm: 0.9261542477089922, iteration: 237042
loss: 0.9928787350654602,grad_norm: 0.8477810601067963, iteration: 237043
loss: 0.9780381321907043,grad_norm: 0.9120333718551557, iteration: 237044
loss: 1.0177452564239502,grad_norm: 0.9999990926622283, iteration: 237045
loss: 1.0007679462432861,grad_norm: 0.9734542238577077, iteration: 237046
loss: 1.0060824155807495,grad_norm: 0.8379487787773819, iteration: 237047
loss: 1.0036040544509888,grad_norm: 0.8659000394282566, iteration: 237048
loss: 1.007412075996399,grad_norm: 0.9197412033674829, iteration: 237049
loss: 1.0233367681503296,grad_norm: 0.967480398779709, iteration: 237050
loss: 0.9921638369560242,grad_norm: 0.9837611783218244, iteration: 237051
loss: 0.9861679673194885,grad_norm: 0.7033915109050658, iteration: 237052
loss: 1.0282955169677734,grad_norm: 0.9158682826855866, iteration: 237053
loss: 1.0048267841339111,grad_norm: 0.8634885705994187, iteration: 237054
loss: 0.9794921875,grad_norm: 0.9999990970376992, iteration: 237055
loss: 0.9554130434989929,grad_norm: 0.9999989811032552, iteration: 237056
loss: 0.9963733553886414,grad_norm: 0.9999990909000943, iteration: 237057
loss: 1.0419631004333496,grad_norm: 0.9016659892332396, iteration: 237058
loss: 0.9953603148460388,grad_norm: 0.8433995315500439, iteration: 237059
loss: 1.0166230201721191,grad_norm: 0.9999990642172949, iteration: 237060
loss: 0.9538811445236206,grad_norm: 0.997295505715809, iteration: 237061
loss: 1.0494868755340576,grad_norm: 0.896345698993429, iteration: 237062
loss: 0.9656910300254822,grad_norm: 0.9999991791504187, iteration: 237063
loss: 0.9959323406219482,grad_norm: 0.9137159370245967, iteration: 237064
loss: 1.027725338935852,grad_norm: 0.9999990814359535, iteration: 237065
loss: 1.1520075798034668,grad_norm: 0.9999996997670932, iteration: 237066
loss: 1.0128623247146606,grad_norm: 0.9999991017975361, iteration: 237067
loss: 0.987697184085846,grad_norm: 0.9999999121693568, iteration: 237068
loss: 0.9770278930664062,grad_norm: 0.7643337651763291, iteration: 237069
loss: 1.000063419342041,grad_norm: 0.9611461321352828, iteration: 237070
loss: 0.9929584860801697,grad_norm: 0.9338262603122605, iteration: 237071
loss: 0.9923386573791504,grad_norm: 0.8656037060834714, iteration: 237072
loss: 1.0066252946853638,grad_norm: 0.9429419685773639, iteration: 237073
loss: 1.0140475034713745,grad_norm: 0.8662684027338262, iteration: 237074
loss: 0.9700341820716858,grad_norm: 0.8718371662932574, iteration: 237075
loss: 1.0955473184585571,grad_norm: 0.993078783223951, iteration: 237076
loss: 0.983113706111908,grad_norm: 0.7748091928897631, iteration: 237077
loss: 0.986742377281189,grad_norm: 0.8893482877562624, iteration: 237078
loss: 1.0049370527267456,grad_norm: 0.7563087149855153, iteration: 237079
loss: 0.9890304207801819,grad_norm: 0.8982134357876774, iteration: 237080
loss: 1.0111058950424194,grad_norm: 0.936324746070091, iteration: 237081
loss: 1.0145550966262817,grad_norm: 0.999999124039628, iteration: 237082
loss: 0.9889388680458069,grad_norm: 0.9999990421242683, iteration: 237083
loss: 0.9857901930809021,grad_norm: 0.9999991433688, iteration: 237084
loss: 0.9979115128517151,grad_norm: 0.9803520893187241, iteration: 237085
loss: 1.0195025205612183,grad_norm: 0.9999991138680989, iteration: 237086
loss: 0.957977294921875,grad_norm: 0.8750054282125012, iteration: 237087
loss: 1.0042412281036377,grad_norm: 0.8979067715233383, iteration: 237088
loss: 0.967272162437439,grad_norm: 0.9999991926097017, iteration: 237089
loss: 0.9763339757919312,grad_norm: 0.8007988677947945, iteration: 237090
loss: 0.9848757386207581,grad_norm: 0.9999993043268447, iteration: 237091
loss: 1.005759596824646,grad_norm: 0.767795669618167, iteration: 237092
loss: 1.0126856565475464,grad_norm: 0.9180163177881497, iteration: 237093
loss: 1.0138988494873047,grad_norm: 0.864231961285355, iteration: 237094
loss: 0.9866276383399963,grad_norm: 0.8655281673927067, iteration: 237095
loss: 1.0155144929885864,grad_norm: 0.9260748536205784, iteration: 237096
loss: 0.9867020845413208,grad_norm: 0.999999186506802, iteration: 237097
loss: 0.995427131652832,grad_norm: 0.9999994308859341, iteration: 237098
loss: 0.9724252820014954,grad_norm: 0.9732884266379676, iteration: 237099
loss: 1.000533103942871,grad_norm: 0.8233566715415629, iteration: 237100
loss: 1.0521730184555054,grad_norm: 0.999999492418391, iteration: 237101
loss: 1.00400710105896,grad_norm: 0.7348767507230877, iteration: 237102
loss: 1.081618070602417,grad_norm: 0.8104088423481884, iteration: 237103
loss: 0.9895281791687012,grad_norm: 0.9999989321577217, iteration: 237104
loss: 0.9916787147521973,grad_norm: 0.8841041066829379, iteration: 237105
loss: 0.9693801999092102,grad_norm: 0.9503731719125703, iteration: 237106
loss: 0.9932773113250732,grad_norm: 0.824723644604116, iteration: 237107
loss: 0.9967769384384155,grad_norm: 0.9532777327932821, iteration: 237108
loss: 1.1171972751617432,grad_norm: 0.9999991395599613, iteration: 237109
loss: 0.9765676259994507,grad_norm: 0.9086556950866371, iteration: 237110
loss: 1.0141644477844238,grad_norm: 0.9366837932851141, iteration: 237111
loss: 1.0357000827789307,grad_norm: 0.9081155593913458, iteration: 237112
loss: 1.0045230388641357,grad_norm: 0.9762532254038554, iteration: 237113
loss: 1.0065233707427979,grad_norm: 0.8346803509944754, iteration: 237114
loss: 0.9836658239364624,grad_norm: 0.8489365365890482, iteration: 237115
loss: 0.9633856415748596,grad_norm: 0.9361272475738753, iteration: 237116
loss: 0.9898346066474915,grad_norm: 0.7633953893943825, iteration: 237117
loss: 0.9902667999267578,grad_norm: 0.9115530417186191, iteration: 237118
loss: 1.0544463396072388,grad_norm: 0.9999993500281225, iteration: 237119
loss: 1.018447995185852,grad_norm: 0.9076920742240802, iteration: 237120
loss: 0.9799293279647827,grad_norm: 0.842621465676464, iteration: 237121
loss: 1.0465786457061768,grad_norm: 0.7434445509710825, iteration: 237122
loss: 0.9808454513549805,grad_norm: 0.7598014703145212, iteration: 237123
loss: 1.0431151390075684,grad_norm: 0.8607875743917168, iteration: 237124
loss: 0.9923027157783508,grad_norm: 0.8719138554380024, iteration: 237125
loss: 0.9977095127105713,grad_norm: 0.9129923401956632, iteration: 237126
loss: 1.0181756019592285,grad_norm: 0.8018867541447883, iteration: 237127
loss: 0.9883515238761902,grad_norm: 0.936989494626022, iteration: 237128
loss: 1.0324804782867432,grad_norm: 0.9511563133826247, iteration: 237129
loss: 0.9714672565460205,grad_norm: 0.8184594680723375, iteration: 237130
loss: 1.0186357498168945,grad_norm: 0.9999992928680659, iteration: 237131
loss: 1.015618920326233,grad_norm: 0.8451004597473321, iteration: 237132
loss: 0.9899749755859375,grad_norm: 0.9999990415046867, iteration: 237133
loss: 0.9891043305397034,grad_norm: 0.9148616270056814, iteration: 237134
loss: 1.0125627517700195,grad_norm: 0.8741219975717545, iteration: 237135
loss: 0.9963569641113281,grad_norm: 0.9999994847849502, iteration: 237136
loss: 1.0053611993789673,grad_norm: 0.8037100438458755, iteration: 237137
loss: 0.9866837859153748,grad_norm: 0.884097395100955, iteration: 237138
loss: 0.9929571747779846,grad_norm: 0.9214764497237438, iteration: 237139
loss: 1.0079859495162964,grad_norm: 0.7909572837499468, iteration: 237140
loss: 1.066452980041504,grad_norm: 0.9726056915255972, iteration: 237141
loss: 0.9808282852172852,grad_norm: 0.9855527002168124, iteration: 237142
loss: 1.0184597969055176,grad_norm: 0.881892260182805, iteration: 237143
loss: 0.9757394194602966,grad_norm: 0.999999140722315, iteration: 237144
loss: 0.9620140194892883,grad_norm: 0.9091961351300784, iteration: 237145
loss: 0.9608743786811829,grad_norm: 0.881489167642866, iteration: 237146
loss: 0.9944913983345032,grad_norm: 0.9692846559329109, iteration: 237147
loss: 1.015913724899292,grad_norm: 0.9346909203363886, iteration: 237148
loss: 1.033570647239685,grad_norm: 0.9999992898830279, iteration: 237149
loss: 1.0081274509429932,grad_norm: 0.89300103124105, iteration: 237150
loss: 1.0042001008987427,grad_norm: 0.9748918257849661, iteration: 237151
loss: 1.0068846940994263,grad_norm: 0.8455571168551496, iteration: 237152
loss: 1.0104326009750366,grad_norm: 0.9228791049596395, iteration: 237153
loss: 1.0432302951812744,grad_norm: 0.9635417628234891, iteration: 237154
loss: 1.03673255443573,grad_norm: 0.9915629707468329, iteration: 237155
loss: 1.0298932790756226,grad_norm: 0.9999991106127738, iteration: 237156
loss: 0.967883825302124,grad_norm: 0.9999992899746478, iteration: 237157
loss: 0.9505791068077087,grad_norm: 0.9999990181928508, iteration: 237158
loss: 0.9455334544181824,grad_norm: 0.9999991588175086, iteration: 237159
loss: 1.023186445236206,grad_norm: 0.915663757091486, iteration: 237160
loss: 1.072346568107605,grad_norm: 0.9999990213108827, iteration: 237161
loss: 1.02523934841156,grad_norm: 0.8531632873278792, iteration: 237162
loss: 1.0318915843963623,grad_norm: 0.9999991357678455, iteration: 237163
loss: 0.9937776327133179,grad_norm: 0.9406093335186319, iteration: 237164
loss: 0.9974088668823242,grad_norm: 0.9999991583902318, iteration: 237165
loss: 0.9713248610496521,grad_norm: 0.9122757880915526, iteration: 237166
loss: 0.9832565188407898,grad_norm: 0.8245437224525701, iteration: 237167
loss: 0.9768628478050232,grad_norm: 0.955121295289365, iteration: 237168
loss: 1.0097712278366089,grad_norm: 0.9553718192307117, iteration: 237169
loss: 0.9736593961715698,grad_norm: 0.949499525644072, iteration: 237170
loss: 1.0044294595718384,grad_norm: 0.9999997499999775, iteration: 237171
loss: 1.003843069076538,grad_norm: 0.9889282590067854, iteration: 237172
loss: 1.0193952322006226,grad_norm: 0.826929119529563, iteration: 237173
loss: 1.0580787658691406,grad_norm: 0.9850426873992121, iteration: 237174
loss: 1.0357166528701782,grad_norm: 0.8734656514568903, iteration: 237175
loss: 1.0287868976593018,grad_norm: 0.9050340894117372, iteration: 237176
loss: 0.9825839400291443,grad_norm: 0.8703886195374536, iteration: 237177
loss: 1.0322996377944946,grad_norm: 0.8993676050706064, iteration: 237178
loss: 1.002785325050354,grad_norm: 0.9999990589125164, iteration: 237179
loss: 1.032278060913086,grad_norm: 0.9396628493695452, iteration: 237180
loss: 0.9530895352363586,grad_norm: 0.942514633272338, iteration: 237181
loss: 0.9889355897903442,grad_norm: 0.8087411672824575, iteration: 237182
loss: 1.0262960195541382,grad_norm: 0.9746952037662193, iteration: 237183
loss: 1.017254114151001,grad_norm: 0.9999991651389236, iteration: 237184
loss: 0.9932210445404053,grad_norm: 0.9447283948864855, iteration: 237185
loss: 1.0059654712677002,grad_norm: 0.9999999446256411, iteration: 237186
loss: 1.0035282373428345,grad_norm: 0.8601853648980995, iteration: 237187
loss: 1.0382168292999268,grad_norm: 0.8539917837630584, iteration: 237188
loss: 1.0191062688827515,grad_norm: 0.9999994533055007, iteration: 237189
loss: 1.0894407033920288,grad_norm: 0.9999993063918605, iteration: 237190
loss: 0.9939004778862,grad_norm: 0.7183397060679014, iteration: 237191
loss: 0.967968225479126,grad_norm: 0.7878320554679178, iteration: 237192
loss: 1.0377228260040283,grad_norm: 0.9999994205198613, iteration: 237193
loss: 0.9713416695594788,grad_norm: 0.9009666914947685, iteration: 237194
loss: 1.0771756172180176,grad_norm: 0.9999992922256886, iteration: 237195
loss: 0.9933882355690002,grad_norm: 0.9999998030117386, iteration: 237196
loss: 0.9942280650138855,grad_norm: 0.9097085701297395, iteration: 237197
loss: 0.9790125489234924,grad_norm: 0.9307291624238622, iteration: 237198
loss: 0.9708500504493713,grad_norm: 0.9615213127488025, iteration: 237199
loss: 1.0029743909835815,grad_norm: 0.920741196228266, iteration: 237200
loss: 1.0367441177368164,grad_norm: 0.9698502186678177, iteration: 237201
loss: 1.0517553091049194,grad_norm: 0.9999999673129557, iteration: 237202
loss: 0.9734032154083252,grad_norm: 0.8666811268234497, iteration: 237203
loss: 1.0023020505905151,grad_norm: 0.9035297582331504, iteration: 237204
loss: 0.9997085332870483,grad_norm: 0.9989939881185415, iteration: 237205
loss: 1.014108419418335,grad_norm: 0.9602657692908495, iteration: 237206
loss: 1.015203833580017,grad_norm: 0.999998933922428, iteration: 237207
loss: 1.0279603004455566,grad_norm: 0.8783971178950873, iteration: 237208
loss: 1.0334250926971436,grad_norm: 0.9655021032411373, iteration: 237209
loss: 0.9966236352920532,grad_norm: 0.8748306094553714, iteration: 237210
loss: 1.0257669687271118,grad_norm: 0.999999149108667, iteration: 237211
loss: 1.105046272277832,grad_norm: 0.9999995300473203, iteration: 237212
loss: 1.0175684690475464,grad_norm: 0.8244476724322912, iteration: 237213
loss: 0.9960073232650757,grad_norm: 0.8466272290776461, iteration: 237214
loss: 0.9753695726394653,grad_norm: 0.7449488779753347, iteration: 237215
loss: 0.9869744181632996,grad_norm: 0.8146691031760771, iteration: 237216
loss: 1.0178290605545044,grad_norm: 0.9999992264847802, iteration: 237217
loss: 1.0029927492141724,grad_norm: 0.9999992493914299, iteration: 237218
loss: 0.9959508776664734,grad_norm: 0.9426793993820239, iteration: 237219
loss: 1.0144364833831787,grad_norm: 0.9180453441737467, iteration: 237220
loss: 0.993141233921051,grad_norm: 0.907692312726035, iteration: 237221
loss: 0.9717364311218262,grad_norm: 0.9445240423612212, iteration: 237222
loss: 0.99458247423172,grad_norm: 0.8327660974357592, iteration: 237223
loss: 1.0147414207458496,grad_norm: 0.9999991282730196, iteration: 237224
loss: 0.9872552156448364,grad_norm: 0.8296686228436837, iteration: 237225
loss: 0.9993268847465515,grad_norm: 0.9676307174155087, iteration: 237226
loss: 1.034799337387085,grad_norm: 0.8459075642800705, iteration: 237227
loss: 1.020750641822815,grad_norm: 0.8160986833143902, iteration: 237228
loss: 1.0377289056777954,grad_norm: 0.9999990549539723, iteration: 237229
loss: 1.0312058925628662,grad_norm: 0.880532614893928, iteration: 237230
loss: 0.9942294359207153,grad_norm: 0.9794733287152547, iteration: 237231
loss: 1.003137469291687,grad_norm: 0.9999993446009185, iteration: 237232
loss: 1.0197157859802246,grad_norm: 0.9999991024576521, iteration: 237233
loss: 1.0094795227050781,grad_norm: 0.7464388464060189, iteration: 237234
loss: 1.0156539678573608,grad_norm: 0.9999992180108446, iteration: 237235
loss: 0.9998156428337097,grad_norm: 0.8728026807876115, iteration: 237236
loss: 1.0235141515731812,grad_norm: 0.9820232505073448, iteration: 237237
loss: 0.9784752130508423,grad_norm: 0.8628310580212862, iteration: 237238
loss: 0.9715888500213623,grad_norm: 0.9295690102640228, iteration: 237239
loss: 1.0333282947540283,grad_norm: 0.8655050629265947, iteration: 237240
loss: 0.9844425320625305,grad_norm: 0.9999990778380256, iteration: 237241
loss: 0.9777606129646301,grad_norm: 0.9171290940728262, iteration: 237242
loss: 1.018160104751587,grad_norm: 0.7970529035655933, iteration: 237243
loss: 0.9835817217826843,grad_norm: 0.9999990317470215, iteration: 237244
loss: 1.0418967008590698,grad_norm: 0.8736859985712149, iteration: 237245
loss: 0.9672530889511108,grad_norm: 0.9058864397789829, iteration: 237246
loss: 1.0155733823776245,grad_norm: 0.9170334580648671, iteration: 237247
loss: 0.9978269934654236,grad_norm: 0.7292456310304015, iteration: 237248
loss: 1.0089410543441772,grad_norm: 0.9999994337163239, iteration: 237249
loss: 0.9925891160964966,grad_norm: 0.9415491142476009, iteration: 237250
loss: 0.9999275207519531,grad_norm: 0.9999991427141578, iteration: 237251
loss: 0.9990977048873901,grad_norm: 0.9162123256424457, iteration: 237252
loss: 0.9850282073020935,grad_norm: 0.8315145980020923, iteration: 237253
loss: 0.9862290024757385,grad_norm: 0.8611956705886132, iteration: 237254
loss: 1.0120848417282104,grad_norm: 0.944010342785869, iteration: 237255
loss: 1.0135868787765503,grad_norm: 0.9999990559760217, iteration: 237256
loss: 0.9950924515724182,grad_norm: 0.9157566697621978, iteration: 237257
loss: 0.9775440692901611,grad_norm: 0.876533665800579, iteration: 237258
loss: 1.0258674621582031,grad_norm: 0.9605148516322471, iteration: 237259
loss: 0.9983307123184204,grad_norm: 0.9999990820068536, iteration: 237260
loss: 1.0054209232330322,grad_norm: 0.9760295059229247, iteration: 237261
loss: 0.9753103852272034,grad_norm: 0.9999993421908931, iteration: 237262
loss: 1.0061537027359009,grad_norm: 0.9999990333526706, iteration: 237263
loss: 1.0546377897262573,grad_norm: 0.9999990275267451, iteration: 237264
loss: 0.984223484992981,grad_norm: 0.8595804863292503, iteration: 237265
loss: 1.0051076412200928,grad_norm: 0.7991165919336438, iteration: 237266
loss: 0.9997295141220093,grad_norm: 0.6651951584953013, iteration: 237267
loss: 1.0007091760635376,grad_norm: 0.9701855624398443, iteration: 237268
loss: 0.9853326082229614,grad_norm: 0.9045008682077454, iteration: 237269
loss: 0.9969714879989624,grad_norm: 0.830004939070394, iteration: 237270
loss: 0.9903615117073059,grad_norm: 0.955037140265689, iteration: 237271
loss: 1.0115721225738525,grad_norm: 0.9388980453217513, iteration: 237272
loss: 1.0590503215789795,grad_norm: 0.9999995355850331, iteration: 237273
loss: 0.9870465993881226,grad_norm: 0.9396871634227552, iteration: 237274
loss: 1.022682547569275,grad_norm: 0.9688314298806255, iteration: 237275
loss: 0.9947871565818787,grad_norm: 0.9999990148294208, iteration: 237276
loss: 0.9645560383796692,grad_norm: 0.8500802880323436, iteration: 237277
loss: 0.9950631856918335,grad_norm: 0.880936276976257, iteration: 237278
loss: 0.9717747569084167,grad_norm: 0.8173361710187405, iteration: 237279
loss: 1.0147030353546143,grad_norm: 0.9255199938354074, iteration: 237280
loss: 0.985935389995575,grad_norm: 0.9999990675080903, iteration: 237281
loss: 0.9965476393699646,grad_norm: 0.8706416317817838, iteration: 237282
loss: 0.9792699217796326,grad_norm: 0.935859092965153, iteration: 237283
loss: 1.0097569227218628,grad_norm: 0.883406536013964, iteration: 237284
loss: 0.9991959929466248,grad_norm: 0.9735283822361345, iteration: 237285
loss: 1.0058317184448242,grad_norm: 0.8907903541174179, iteration: 237286
loss: 1.0189284086227417,grad_norm: 0.8413959093464015, iteration: 237287
loss: 1.074863314628601,grad_norm: 0.9999991079616057, iteration: 237288
loss: 0.9722713232040405,grad_norm: 0.9999990503283172, iteration: 237289
loss: 0.9902065396308899,grad_norm: 0.9402348681115877, iteration: 237290
loss: 0.9785557389259338,grad_norm: 0.8680899074019618, iteration: 237291
loss: 0.9791843295097351,grad_norm: 0.9101059175828483, iteration: 237292
loss: 0.9533293843269348,grad_norm: 0.9999991105925057, iteration: 237293
loss: 1.0043771266937256,grad_norm: 0.9475930863770801, iteration: 237294
loss: 1.0104947090148926,grad_norm: 0.8089185230285457, iteration: 237295
loss: 1.0047343969345093,grad_norm: 0.8954537901888576, iteration: 237296
loss: 0.9991269111633301,grad_norm: 0.9220559306395361, iteration: 237297
loss: 0.9936349987983704,grad_norm: 0.8625396701693596, iteration: 237298
loss: 1.0025336742401123,grad_norm: 0.8533723385473729, iteration: 237299
loss: 0.9719938635826111,grad_norm: 0.9263136197109987, iteration: 237300
loss: 1.0357394218444824,grad_norm: 0.99999938912344, iteration: 237301
loss: 0.9908375144004822,grad_norm: 0.8643668658461888, iteration: 237302
loss: 0.9814978837966919,grad_norm: 0.9999991373666987, iteration: 237303
loss: 1.0262911319732666,grad_norm: 0.8601798125921027, iteration: 237304
loss: 1.019728183746338,grad_norm: 0.8196388665940564, iteration: 237305
loss: 1.0045080184936523,grad_norm: 0.9196302528305559, iteration: 237306
loss: 0.9846657514572144,grad_norm: 0.697024278755158, iteration: 237307
loss: 0.9711372256278992,grad_norm: 0.9844490106405349, iteration: 237308
loss: 1.0419384241104126,grad_norm: 0.8502811631924333, iteration: 237309
loss: 1.0200581550598145,grad_norm: 0.9651864538665439, iteration: 237310
loss: 1.0192447900772095,grad_norm: 0.8167233530479493, iteration: 237311
loss: 0.9545381665229797,grad_norm: 0.8944702793740967, iteration: 237312
loss: 0.9906759858131409,grad_norm: 0.9999992923075747, iteration: 237313
loss: 1.027182698249817,grad_norm: 0.9999991126452147, iteration: 237314
loss: 1.0207995176315308,grad_norm: 0.9515708595587656, iteration: 237315
loss: 1.0144723653793335,grad_norm: 0.8256131816948139, iteration: 237316
loss: 0.9751086831092834,grad_norm: 0.9999989458093425, iteration: 237317
loss: 0.9982084035873413,grad_norm: 0.7034713714204666, iteration: 237318
loss: 0.9989465475082397,grad_norm: 0.9034979669052275, iteration: 237319
loss: 0.9762333035469055,grad_norm: 0.8175804199627212, iteration: 237320
loss: 0.9458706378936768,grad_norm: 0.8408685954801256, iteration: 237321
loss: 1.0217704772949219,grad_norm: 0.9999992172699979, iteration: 237322
loss: 0.9932628273963928,grad_norm: 0.9999991918189255, iteration: 237323
loss: 0.9962117075920105,grad_norm: 0.9999991296149889, iteration: 237324
loss: 1.0260449647903442,grad_norm: 0.9328872071736648, iteration: 237325
loss: 1.006282091140747,grad_norm: 0.9999994999001486, iteration: 237326
loss: 1.014399528503418,grad_norm: 0.9278297938863849, iteration: 237327
loss: 0.9987671375274658,grad_norm: 0.8661558046557375, iteration: 237328
loss: 0.9852869510650635,grad_norm: 0.8578471148853609, iteration: 237329
loss: 1.0029163360595703,grad_norm: 0.926535015266951, iteration: 237330
loss: 0.9896081686019897,grad_norm: 0.9568715184127823, iteration: 237331
loss: 0.9813704490661621,grad_norm: 0.95656197550752, iteration: 237332
loss: 0.97693932056427,grad_norm: 0.9999992312057296, iteration: 237333
loss: 0.9600138068199158,grad_norm: 0.906445636237976, iteration: 237334
loss: 1.0360674858093262,grad_norm: 0.9999993660613361, iteration: 237335
loss: 0.9812652468681335,grad_norm: 0.9795319105411934, iteration: 237336
loss: 0.995553195476532,grad_norm: 0.8911868594730978, iteration: 237337
loss: 1.0017757415771484,grad_norm: 0.9999989720895878, iteration: 237338
loss: 0.9927859902381897,grad_norm: 0.8979718648630635, iteration: 237339
loss: 0.9921067357063293,grad_norm: 0.7422307767765575, iteration: 237340
loss: 0.9700430035591125,grad_norm: 0.8592846168276015, iteration: 237341
loss: 1.0105297565460205,grad_norm: 0.8683810276507162, iteration: 237342
loss: 0.9955030083656311,grad_norm: 0.8874424071672913, iteration: 237343
loss: 0.9946194887161255,grad_norm: 0.9659942745258837, iteration: 237344
loss: 1.087697148323059,grad_norm: 0.8464414782313565, iteration: 237345
loss: 0.9681276679039001,grad_norm: 0.9999991697910314, iteration: 237346
loss: 0.9510887265205383,grad_norm: 0.9898208864134185, iteration: 237347
loss: 1.0041182041168213,grad_norm: 0.9572326349083818, iteration: 237348
loss: 1.0330711603164673,grad_norm: 0.9816614490959406, iteration: 237349
loss: 1.050104022026062,grad_norm: 0.9999992097464929, iteration: 237350
loss: 1.0286028385162354,grad_norm: 0.8256106050466444, iteration: 237351
loss: 0.9760127067565918,grad_norm: 0.9999991840942035, iteration: 237352
loss: 1.0193334817886353,grad_norm: 0.7730994525415351, iteration: 237353
loss: 0.9911514520645142,grad_norm: 0.9637808160218925, iteration: 237354
loss: 0.9900444149971008,grad_norm: 0.9603493512134186, iteration: 237355
loss: 0.9932528734207153,grad_norm: 0.9103962416475423, iteration: 237356
loss: 1.0006847381591797,grad_norm: 0.9999991579221373, iteration: 237357
loss: 0.9577801823616028,grad_norm: 0.9417650097287553, iteration: 237358
loss: 1.0285853147506714,grad_norm: 0.9999990877818231, iteration: 237359
loss: 0.9614124894142151,grad_norm: 0.847326043999015, iteration: 237360
loss: 0.9597996473312378,grad_norm: 0.9999995196154398, iteration: 237361
loss: 0.9788047671318054,grad_norm: 0.9999992406935925, iteration: 237362
loss: 0.9777123928070068,grad_norm: 0.9622238674205303, iteration: 237363
loss: 0.966074526309967,grad_norm: 0.9566687598969615, iteration: 237364
loss: 1.0011723041534424,grad_norm: 0.9249811796931924, iteration: 237365
loss: 1.028988242149353,grad_norm: 0.7706813550641164, iteration: 237366
loss: 0.9988331198692322,grad_norm: 0.9999990900927396, iteration: 237367
loss: 0.966095507144928,grad_norm: 0.8394086404960129, iteration: 237368
loss: 1.0039429664611816,grad_norm: 0.7835698824508386, iteration: 237369
loss: 1.0512688159942627,grad_norm: 0.9999994555045332, iteration: 237370
loss: 1.0331670045852661,grad_norm: 0.9999990168112457, iteration: 237371
loss: 1.0735872983932495,grad_norm: 0.9999991658258831, iteration: 237372
loss: 1.0057989358901978,grad_norm: 0.9222362682960787, iteration: 237373
loss: 1.025212049484253,grad_norm: 0.9999991697206408, iteration: 237374
loss: 0.9773103594779968,grad_norm: 0.9999989693765527, iteration: 237375
loss: 0.9831008911132812,grad_norm: 0.7823907186916399, iteration: 237376
loss: 0.9930403828620911,grad_norm: 0.9999989844750387, iteration: 237377
loss: 0.9883550405502319,grad_norm: 0.9999989627868197, iteration: 237378
loss: 0.9696266055107117,grad_norm: 0.9341308682223707, iteration: 237379
loss: 0.9739809036254883,grad_norm: 0.8796446105611867, iteration: 237380
loss: 1.0257909297943115,grad_norm: 0.999999082331624, iteration: 237381
loss: 0.9757218956947327,grad_norm: 0.9999991985077967, iteration: 237382
loss: 1.0517001152038574,grad_norm: 0.8729583180270658, iteration: 237383
loss: 0.9857358932495117,grad_norm: 0.920059859967933, iteration: 237384
loss: 0.9945082664489746,grad_norm: 0.999999062715593, iteration: 237385
loss: 0.9930216670036316,grad_norm: 0.9552280414502524, iteration: 237386
loss: 0.9878060221672058,grad_norm: 0.9999995783275908, iteration: 237387
loss: 1.0108283758163452,grad_norm: 0.9999992810079313, iteration: 237388
loss: 1.0224155187606812,grad_norm: 0.8862585310779946, iteration: 237389
loss: 0.9893016219139099,grad_norm: 0.7869308312729557, iteration: 237390
loss: 1.0036864280700684,grad_norm: 0.9337880690883926, iteration: 237391
loss: 1.0249841213226318,grad_norm: 0.999998932108641, iteration: 237392
loss: 1.0148272514343262,grad_norm: 0.8301857670276115, iteration: 237393
loss: 0.997361958026886,grad_norm: 0.9872234409228591, iteration: 237394
loss: 0.994647204875946,grad_norm: 0.9259648704569491, iteration: 237395
loss: 1.000934362411499,grad_norm: 0.9391833483917056, iteration: 237396
loss: 0.9816057682037354,grad_norm: 0.956028397197258, iteration: 237397
loss: 0.9946247339248657,grad_norm: 0.8482749747641738, iteration: 237398
loss: 1.0209904909133911,grad_norm: 0.8565089335393085, iteration: 237399
loss: 0.9912903904914856,grad_norm: 0.8418515553720533, iteration: 237400
loss: 1.0084257125854492,grad_norm: 0.9985549213204619, iteration: 237401
loss: 0.9974919557571411,grad_norm: 0.7144486735495048, iteration: 237402
loss: 0.9380665421485901,grad_norm: 0.9274883724732053, iteration: 237403
loss: 0.9936630129814148,grad_norm: 0.8599353863487176, iteration: 237404
loss: 1.0229073762893677,grad_norm: 0.9999996902149787, iteration: 237405
loss: 1.198020339012146,grad_norm: 0.9999998734555727, iteration: 237406
loss: 0.9796065092086792,grad_norm: 0.923983221363936, iteration: 237407
loss: 1.0119922161102295,grad_norm: 0.9999991684301065, iteration: 237408
loss: 1.0178886651992798,grad_norm: 0.8641753600866956, iteration: 237409
loss: 1.0057356357574463,grad_norm: 0.999999547336578, iteration: 237410
loss: 1.0831223726272583,grad_norm: 0.9300343256713436, iteration: 237411
loss: 0.9663763642311096,grad_norm: 0.9923011967750531, iteration: 237412
loss: 0.9571012258529663,grad_norm: 0.9999998281172074, iteration: 237413
loss: 0.968898594379425,grad_norm: 0.815817181521068, iteration: 237414
loss: 0.9835969805717468,grad_norm: 0.9674215963087702, iteration: 237415
loss: 1.0096831321716309,grad_norm: 0.9513566605612165, iteration: 237416
loss: 1.0079941749572754,grad_norm: 0.8840412787416504, iteration: 237417
loss: 0.981916069984436,grad_norm: 0.8623378954723967, iteration: 237418
loss: 0.9585651159286499,grad_norm: 0.970921870033818, iteration: 237419
loss: 0.9902071356773376,grad_norm: 0.9999991063448008, iteration: 237420
loss: 1.0317827463150024,grad_norm: 0.9999990669634493, iteration: 237421
loss: 1.0005549192428589,grad_norm: 0.91399704733994, iteration: 237422
loss: 1.0293207168579102,grad_norm: 0.9908817232055671, iteration: 237423
loss: 1.0277516841888428,grad_norm: 0.9999992899440305, iteration: 237424
loss: 0.9791103601455688,grad_norm: 0.9470883928800381, iteration: 237425
loss: 0.9828089475631714,grad_norm: 0.9229993496959997, iteration: 237426
loss: 1.005120873451233,grad_norm: 0.9999990775800918, iteration: 237427
loss: 0.9932643175125122,grad_norm: 0.8772875611940651, iteration: 237428
loss: 1.0300605297088623,grad_norm: 0.847663112615389, iteration: 237429
loss: 0.9554072618484497,grad_norm: 0.8673635109717427, iteration: 237430
loss: 0.9905440211296082,grad_norm: 0.8709510371711732, iteration: 237431
loss: 0.9773097038269043,grad_norm: 0.8108236709067352, iteration: 237432
loss: 0.9700020551681519,grad_norm: 0.8367367321600828, iteration: 237433
loss: 1.0290522575378418,grad_norm: 0.9999990555248098, iteration: 237434
loss: 1.0961843729019165,grad_norm: 0.9999995485070358, iteration: 237435
loss: 1.0965090990066528,grad_norm: 0.9999991268703162, iteration: 237436
loss: 1.0119272470474243,grad_norm: 0.7262558220168308, iteration: 237437
loss: 0.9850923418998718,grad_norm: 0.99999924048907, iteration: 237438
loss: 0.996484637260437,grad_norm: 0.8285971401876168, iteration: 237439
loss: 1.0365232229232788,grad_norm: 0.9843740149088084, iteration: 237440
loss: 1.0279234647750854,grad_norm: 0.9999990478161039, iteration: 237441
loss: 0.9834005236625671,grad_norm: 0.999999771319115, iteration: 237442
loss: 0.9974665641784668,grad_norm: 0.9999998201626222, iteration: 237443
loss: 0.9919572472572327,grad_norm: 0.893688711778457, iteration: 237444
loss: 0.9993870854377747,grad_norm: 0.9627170754310033, iteration: 237445
loss: 1.0350885391235352,grad_norm: 0.9596724177770709, iteration: 237446
loss: 0.9787867069244385,grad_norm: 0.9751372350527105, iteration: 237447
loss: 0.9708617329597473,grad_norm: 0.9491913717378998, iteration: 237448
loss: 1.0546252727508545,grad_norm: 0.9524924143068548, iteration: 237449
loss: 0.9918778538703918,grad_norm: 0.882115132663537, iteration: 237450
loss: 0.9791586399078369,grad_norm: 0.8492723330910219, iteration: 237451
loss: 1.03786039352417,grad_norm: 0.9999994360887359, iteration: 237452
loss: 0.9985255599021912,grad_norm: 0.8217178367637321, iteration: 237453
loss: 1.0188974142074585,grad_norm: 0.7688109243514917, iteration: 237454
loss: 1.0164653062820435,grad_norm: 0.8689998747012969, iteration: 237455
loss: 1.0447208881378174,grad_norm: 0.9999992746816919, iteration: 237456
loss: 0.9976540803909302,grad_norm: 0.9999991574326536, iteration: 237457
loss: 0.9858924150466919,grad_norm: 0.8327234164158195, iteration: 237458
loss: 0.9926233887672424,grad_norm: 0.9431833025702216, iteration: 237459
loss: 0.982601523399353,grad_norm: 0.7968312638337023, iteration: 237460
loss: 0.9914819598197937,grad_norm: 0.8461677014578498, iteration: 237461
loss: 1.0383129119873047,grad_norm: 0.8942264843189318, iteration: 237462
loss: 0.9881964921951294,grad_norm: 0.8927200447988409, iteration: 237463
loss: 0.9783142805099487,grad_norm: 0.8401898356885205, iteration: 237464
loss: 0.9900713562965393,grad_norm: 0.9080259157690467, iteration: 237465
loss: 1.0198581218719482,grad_norm: 0.999999077237552, iteration: 237466
loss: 0.9798600673675537,grad_norm: 0.8761154361120324, iteration: 237467
loss: 0.9825922250747681,grad_norm: 0.9904596543326882, iteration: 237468
loss: 0.9825895428657532,grad_norm: 0.9245209282568595, iteration: 237469
loss: 0.9949677586555481,grad_norm: 0.9999998269884844, iteration: 237470
loss: 1.0144344568252563,grad_norm: 0.8556251570987186, iteration: 237471
loss: 1.000333547592163,grad_norm: 0.8979743982411207, iteration: 237472
loss: 0.9990793466567993,grad_norm: 0.999999029353313, iteration: 237473
loss: 1.0207419395446777,grad_norm: 0.9999991750274715, iteration: 237474
loss: 1.0427314043045044,grad_norm: 0.8181877726310958, iteration: 237475
loss: 1.001774549484253,grad_norm: 0.9067870092453805, iteration: 237476
loss: 1.0388234853744507,grad_norm: 0.9786782665427795, iteration: 237477
loss: 1.0159435272216797,grad_norm: 0.9631720886716286, iteration: 237478
loss: 1.0495524406433105,grad_norm: 0.9999996071710227, iteration: 237479
loss: 0.9805572628974915,grad_norm: 0.9999992763249849, iteration: 237480
loss: 1.018186092376709,grad_norm: 0.9192767200026536, iteration: 237481
loss: 0.9939329028129578,grad_norm: 0.9059629850136885, iteration: 237482
loss: 1.0292165279388428,grad_norm: 0.9999990601592937, iteration: 237483
loss: 0.9868077635765076,grad_norm: 0.8999960236539395, iteration: 237484
loss: 0.9931599497795105,grad_norm: 0.9900572194332375, iteration: 237485
loss: 1.0049395561218262,grad_norm: 0.8709257352803718, iteration: 237486
loss: 0.9895406365394592,grad_norm: 0.8080255924077184, iteration: 237487
loss: 1.0059839487075806,grad_norm: 0.8576105662583129, iteration: 237488
loss: 0.9939316511154175,grad_norm: 0.9999990713026597, iteration: 237489
loss: 0.9863677620887756,grad_norm: 0.9986082730308643, iteration: 237490
loss: 1.0039865970611572,grad_norm: 0.8188346940971218, iteration: 237491
loss: 1.0050855875015259,grad_norm: 0.9999991946330657, iteration: 237492
loss: 1.0077078342437744,grad_norm: 0.7497079912252286, iteration: 237493
loss: 0.9705073833465576,grad_norm: 0.9588403152851737, iteration: 237494
loss: 1.0329583883285522,grad_norm: 0.9999996416381576, iteration: 237495
loss: 0.9867588877677917,grad_norm: 0.9701424966357343, iteration: 237496
loss: 1.0542148351669312,grad_norm: 0.9999993529036487, iteration: 237497
loss: 0.9206088781356812,grad_norm: 0.9764178418304343, iteration: 237498
loss: 0.9422872066497803,grad_norm: 0.92422464881837, iteration: 237499
loss: 1.0081363916397095,grad_norm: 0.9999990444012997, iteration: 237500
loss: 1.010467290878296,grad_norm: 0.8681721215869963, iteration: 237501
loss: 1.0073908567428589,grad_norm: 0.8550410941848813, iteration: 237502
loss: 1.0027412176132202,grad_norm: 0.8752994454075482, iteration: 237503
loss: 1.0242586135864258,grad_norm: 0.9999991057633306, iteration: 237504
loss: 0.9791273474693298,grad_norm: 0.895188670585624, iteration: 237505
loss: 1.119484782218933,grad_norm: 0.9999999063591057, iteration: 237506
loss: 0.9785138368606567,grad_norm: 0.8866614280007227, iteration: 237507
loss: 1.036710500717163,grad_norm: 0.9470308798300235, iteration: 237508
loss: 1.0100140571594238,grad_norm: 0.9999997343437734, iteration: 237509
loss: 1.016727328300476,grad_norm: 0.9999991575559102, iteration: 237510
loss: 1.0141717195510864,grad_norm: 0.9999990889277095, iteration: 237511
loss: 0.9688732624053955,grad_norm: 0.9999989962671839, iteration: 237512
loss: 1.017297625541687,grad_norm: 0.9746989303284465, iteration: 237513
loss: 0.9861013293266296,grad_norm: 0.8772235262584732, iteration: 237514
loss: 0.9823489189147949,grad_norm: 0.9232685968492612, iteration: 237515
loss: 0.9993529319763184,grad_norm: 0.9266995272777996, iteration: 237516
loss: 1.1130778789520264,grad_norm: 0.9999998205146574, iteration: 237517
loss: 0.9910401701927185,grad_norm: 0.7514488773529928, iteration: 237518
loss: 1.023435354232788,grad_norm: 0.9999994094961748, iteration: 237519
loss: 1.017065405845642,grad_norm: 0.9239922772443501, iteration: 237520
loss: 1.014005184173584,grad_norm: 0.8249747953036481, iteration: 237521
loss: 1.0300391912460327,grad_norm: 0.9060944852653795, iteration: 237522
loss: 1.0278688669204712,grad_norm: 0.9335196206862516, iteration: 237523
loss: 0.9928432703018188,grad_norm: 0.7998250028760301, iteration: 237524
loss: 0.9797269701957703,grad_norm: 0.9999990221979886, iteration: 237525
loss: 1.1813979148864746,grad_norm: 0.9999992275605883, iteration: 237526
loss: 1.0238577127456665,grad_norm: 0.9999989334058468, iteration: 237527
loss: 0.9377381801605225,grad_norm: 0.9497842785073406, iteration: 237528
loss: 1.0434212684631348,grad_norm: 0.9999996623689623, iteration: 237529
loss: 1.0127003192901611,grad_norm: 0.9999990992741413, iteration: 237530
loss: 1.0336542129516602,grad_norm: 0.9999990665707558, iteration: 237531
loss: 1.0005762577056885,grad_norm: 0.8835839855986604, iteration: 237532
loss: 1.0163973569869995,grad_norm: 0.9775427870782347, iteration: 237533
loss: 1.069908857345581,grad_norm: 0.999999381966492, iteration: 237534
loss: 0.994828999042511,grad_norm: 0.970414795993922, iteration: 237535
loss: 0.9883275032043457,grad_norm: 0.8577274933681563, iteration: 237536
loss: 0.9968686103820801,grad_norm: 0.8520935146295486, iteration: 237537
loss: 0.9837497472763062,grad_norm: 0.8994089151148618, iteration: 237538
loss: 1.0450843572616577,grad_norm: 0.9999989995154335, iteration: 237539
loss: 1.0649420022964478,grad_norm: 0.9999995238704441, iteration: 237540
loss: 0.9845178127288818,grad_norm: 0.8438307013067552, iteration: 237541
loss: 1.068560242652893,grad_norm: 0.9999991385804942, iteration: 237542
loss: 0.9693152904510498,grad_norm: 0.9076158404298316, iteration: 237543
loss: 0.9630742073059082,grad_norm: 0.8999252052161147, iteration: 237544
loss: 1.0072407722473145,grad_norm: 0.8370015787462712, iteration: 237545
loss: 1.0103503465652466,grad_norm: 0.999999098766675, iteration: 237546
loss: 1.06537926197052,grad_norm: 0.9999991035284499, iteration: 237547
loss: 1.0112489461898804,grad_norm: 0.7748593243029482, iteration: 237548
loss: 0.9870960116386414,grad_norm: 0.9999991618403586, iteration: 237549
loss: 1.0003200769424438,grad_norm: 0.8224493606067321, iteration: 237550
loss: 1.0286301374435425,grad_norm: 0.9999991630576569, iteration: 237551
loss: 1.0153156518936157,grad_norm: 0.856198832854145, iteration: 237552
loss: 0.9829984903335571,grad_norm: 0.9999994464413654, iteration: 237553
loss: 0.963683009147644,grad_norm: 0.9999990155833104, iteration: 237554
loss: 1.1155366897583008,grad_norm: 0.9999997172435727, iteration: 237555
loss: 1.0735019445419312,grad_norm: 0.9074376826848183, iteration: 237556
loss: 1.0077465772628784,grad_norm: 0.7525358488021059, iteration: 237557
loss: 1.0141029357910156,grad_norm: 0.9999993077829632, iteration: 237558
loss: 0.9904897212982178,grad_norm: 0.8861648408589484, iteration: 237559
loss: 0.9602971076965332,grad_norm: 0.9999990893418594, iteration: 237560
loss: 0.9494373798370361,grad_norm: 0.924301667855366, iteration: 237561
loss: 0.9928221702575684,grad_norm: 0.7799734233670482, iteration: 237562
loss: 0.9980980157852173,grad_norm: 0.8853357961731745, iteration: 237563
loss: 1.0072247982025146,grad_norm: 0.9192871081742798, iteration: 237564
loss: 1.0011155605316162,grad_norm: 0.9999992312051577, iteration: 237565
loss: 1.0075629949569702,grad_norm: 0.9346108590159099, iteration: 237566
loss: 1.0324372053146362,grad_norm: 0.9999999100492691, iteration: 237567
loss: 1.047948956489563,grad_norm: 0.9999990740424185, iteration: 237568
loss: 0.9793542623519897,grad_norm: 0.999999121689128, iteration: 237569
loss: 1.0293596982955933,grad_norm: 0.9999989440073779, iteration: 237570
loss: 1.025793433189392,grad_norm: 0.8714233484262365, iteration: 237571
loss: 1.0495418310165405,grad_norm: 0.9999991883380855, iteration: 237572
loss: 1.023149847984314,grad_norm: 0.9484229608497273, iteration: 237573
loss: 0.987880527973175,grad_norm: 0.9999990893945492, iteration: 237574
loss: 1.021904706954956,grad_norm: 0.9999991961242939, iteration: 237575
loss: 1.0428524017333984,grad_norm: 0.9999991670744907, iteration: 237576
loss: 1.0199307203292847,grad_norm: 0.9679041319363715, iteration: 237577
loss: 1.0727174282073975,grad_norm: 0.9999990516169992, iteration: 237578
loss: 0.9908308982849121,grad_norm: 0.9999990947819442, iteration: 237579
loss: 0.9969774484634399,grad_norm: 0.795972318171564, iteration: 237580
loss: 0.9642573595046997,grad_norm: 0.9999989780918639, iteration: 237581
loss: 1.0698869228363037,grad_norm: 0.8660498970047138, iteration: 237582
loss: 0.9867550730705261,grad_norm: 0.9873270496440487, iteration: 237583
loss: 0.9752829074859619,grad_norm: 0.8842411904377537, iteration: 237584
loss: 1.0082383155822754,grad_norm: 0.9863272644268384, iteration: 237585
loss: 1.0237914323806763,grad_norm: 0.9925102300630669, iteration: 237586
loss: 1.0050342082977295,grad_norm: 0.9563734677932522, iteration: 237587
loss: 0.9895346164703369,grad_norm: 0.9999990589414602, iteration: 237588
loss: 0.9864434003829956,grad_norm: 0.741075515659294, iteration: 237589
loss: 1.005436897277832,grad_norm: 0.9999992219619681, iteration: 237590
loss: 1.0100277662277222,grad_norm: 0.833136771135503, iteration: 237591
loss: 0.9920902848243713,grad_norm: 0.999999157229951, iteration: 237592
loss: 1.015848159790039,grad_norm: 0.9999991036198033, iteration: 237593
loss: 0.9896824955940247,grad_norm: 0.8860478066654455, iteration: 237594
loss: 1.1425858736038208,grad_norm: 0.999999113789003, iteration: 237595
loss: 0.9824681282043457,grad_norm: 0.9523980126321203, iteration: 237596
loss: 0.9961974620819092,grad_norm: 0.9999989684350776, iteration: 237597
loss: 1.0006306171417236,grad_norm: 0.9716496956924597, iteration: 237598
loss: 1.0960627794265747,grad_norm: 0.9870585679825947, iteration: 237599
loss: 0.9643800258636475,grad_norm: 0.8467505110975968, iteration: 237600
loss: 1.0419995784759521,grad_norm: 0.999999068601248, iteration: 237601
loss: 0.997506320476532,grad_norm: 0.9999992007898032, iteration: 237602
loss: 1.0142277479171753,grad_norm: 0.874566715753233, iteration: 237603
loss: 0.9718450307846069,grad_norm: 0.766233542834045, iteration: 237604
loss: 1.033367395401001,grad_norm: 0.8627897858169177, iteration: 237605
loss: 1.0700573921203613,grad_norm: 0.8870565689360687, iteration: 237606
loss: 1.038965106010437,grad_norm: 0.9103734108159731, iteration: 237607
loss: 1.0054235458374023,grad_norm: 0.9999990870499949, iteration: 237608
loss: 0.9657152891159058,grad_norm: 0.9999991322918781, iteration: 237609
loss: 0.9642128944396973,grad_norm: 0.8167715005975109, iteration: 237610
loss: 1.0239264965057373,grad_norm: 0.8896410961410637, iteration: 237611
loss: 1.0310002565383911,grad_norm: 0.9999990585772339, iteration: 237612
loss: 0.9922690987586975,grad_norm: 0.9999992120726924, iteration: 237613
loss: 0.9839436411857605,grad_norm: 0.8545465149199056, iteration: 237614
loss: 0.9812565445899963,grad_norm: 0.9209665567145678, iteration: 237615
loss: 0.9443466067314148,grad_norm: 0.9999989839231186, iteration: 237616
loss: 0.981380820274353,grad_norm: 0.9527443781374041, iteration: 237617
loss: 0.9892901182174683,grad_norm: 0.7946664931585938, iteration: 237618
loss: 0.9932159781455994,grad_norm: 0.8450264151363477, iteration: 237619
loss: 1.0149532556533813,grad_norm: 0.8154348870427578, iteration: 237620
loss: 0.99365234375,grad_norm: 0.9199977860599368, iteration: 237621
loss: 0.973449170589447,grad_norm: 0.9190012829635932, iteration: 237622
loss: 1.0225611925125122,grad_norm: 0.9748133459111857, iteration: 237623
loss: 1.0173065662384033,grad_norm: 0.9999999117553104, iteration: 237624
loss: 1.0115727186203003,grad_norm: 0.9999990254250343, iteration: 237625
loss: 0.9936171174049377,grad_norm: 0.7613645157584059, iteration: 237626
loss: 1.013566493988037,grad_norm: 0.9434012361914241, iteration: 237627
loss: 0.9723944067955017,grad_norm: 0.8686398297570438, iteration: 237628
loss: 1.025085687637329,grad_norm: 0.9847075871806268, iteration: 237629
loss: 0.9896470904350281,grad_norm: 0.9606257072323008, iteration: 237630
loss: 0.9899334907531738,grad_norm: 0.9999991928268258, iteration: 237631
loss: 0.9327176809310913,grad_norm: 0.8945874774577102, iteration: 237632
loss: 1.0008504390716553,grad_norm: 0.7786343863810083, iteration: 237633
loss: 1.0317003726959229,grad_norm: 0.9999990592835871, iteration: 237634
loss: 1.0264769792556763,grad_norm: 1.0000000228327024, iteration: 237635
loss: 1.028017520904541,grad_norm: 0.9999988302050763, iteration: 237636
loss: 1.0007236003875732,grad_norm: 0.9999992677415255, iteration: 237637
loss: 0.985866129398346,grad_norm: 0.7623616939792489, iteration: 237638
loss: 1.1050326824188232,grad_norm: 0.9999990708561419, iteration: 237639
loss: 0.9694649577140808,grad_norm: 0.8473154460316791, iteration: 237640
loss: 1.0067051649093628,grad_norm: 0.999999236172137, iteration: 237641
loss: 1.0005851984024048,grad_norm: 0.9704194976719396, iteration: 237642
loss: 1.0075230598449707,grad_norm: 0.9999993728816241, iteration: 237643
loss: 0.9587225317955017,grad_norm: 0.9594541495598374, iteration: 237644
loss: 0.9924272298812866,grad_norm: 0.9999990596172729, iteration: 237645
loss: 1.009460687637329,grad_norm: 0.9458165914303415, iteration: 237646
loss: 1.10763680934906,grad_norm: 0.9999994326480854, iteration: 237647
loss: 1.0343728065490723,grad_norm: 0.9999990271474118, iteration: 237648
loss: 0.9878320693969727,grad_norm: 0.8763753209396733, iteration: 237649
loss: 1.0130043029785156,grad_norm: 0.977928696638508, iteration: 237650
loss: 0.9834133386611938,grad_norm: 0.9452820352351687, iteration: 237651
loss: 1.029675841331482,grad_norm: 0.9999991725900538, iteration: 237652
loss: 0.9965053200721741,grad_norm: 0.7944084044375834, iteration: 237653
loss: 1.002277135848999,grad_norm: 0.9999998505240817, iteration: 237654
loss: 1.0130618810653687,grad_norm: 0.9346754877282089, iteration: 237655
loss: 1.0177497863769531,grad_norm: 0.9999991036316354, iteration: 237656
loss: 0.9999186396598816,grad_norm: 0.9390965768103836, iteration: 237657
loss: 1.0226956605911255,grad_norm: 0.9999991446004876, iteration: 237658
loss: 1.06694757938385,grad_norm: 0.9999991385412964, iteration: 237659
loss: 0.9519886374473572,grad_norm: 0.9999989885837898, iteration: 237660
loss: 0.9995143413543701,grad_norm: 0.8984486853977106, iteration: 237661
loss: 1.0064972639083862,grad_norm: 0.8801779326951407, iteration: 237662
loss: 1.0332472324371338,grad_norm: 0.9999990396089243, iteration: 237663
loss: 0.9914023280143738,grad_norm: 0.9999990737904756, iteration: 237664
loss: 1.0076639652252197,grad_norm: 0.9999991152987177, iteration: 237665
loss: 0.9978712797164917,grad_norm: 0.9771905879257324, iteration: 237666
loss: 1.0142600536346436,grad_norm: 0.9125990594795119, iteration: 237667
loss: 1.019579529762268,grad_norm: 0.9910256707995078, iteration: 237668
loss: 1.008152723312378,grad_norm: 0.7454665524703998, iteration: 237669
loss: 1.0132838487625122,grad_norm: 0.7090585675578883, iteration: 237670
loss: 1.0651379823684692,grad_norm: 0.999999184908475, iteration: 237671
loss: 0.9870132803916931,grad_norm: 0.9046112237336067, iteration: 237672
loss: 1.0028271675109863,grad_norm: 0.8728471916404175, iteration: 237673
loss: 1.0402425527572632,grad_norm: 0.9999994477666851, iteration: 237674
loss: 1.0766829252243042,grad_norm: 0.9999990926987663, iteration: 237675
loss: 0.9940615892410278,grad_norm: 0.8704094660865219, iteration: 237676
loss: 0.981198787689209,grad_norm: 0.9814598085698534, iteration: 237677
loss: 0.9573544263839722,grad_norm: 0.8117130462816174, iteration: 237678
loss: 1.021317481994629,grad_norm: 0.9476433088120303, iteration: 237679
loss: 1.0229886770248413,grad_norm: 0.9086177274958851, iteration: 237680
loss: 1.0475636720657349,grad_norm: 0.9999990762790805, iteration: 237681
loss: 0.9912717938423157,grad_norm: 0.9999996132055352, iteration: 237682
loss: 1.0302597284317017,grad_norm: 0.9999989990834728, iteration: 237683
loss: 0.9776243567466736,grad_norm: 0.9253277085299383, iteration: 237684
loss: 0.9779241681098938,grad_norm: 0.9999991378294963, iteration: 237685
loss: 0.9916935563087463,grad_norm: 0.7538365801298496, iteration: 237686
loss: 0.9791719317436218,grad_norm: 0.9999991283272912, iteration: 237687
loss: 1.0061352252960205,grad_norm: 0.853740894295482, iteration: 237688
loss: 0.9917963147163391,grad_norm: 0.9999992523293771, iteration: 237689
loss: 0.9855201840400696,grad_norm: 0.9849855348979527, iteration: 237690
loss: 1.0144449472427368,grad_norm: 0.9999990962810652, iteration: 237691
loss: 0.9727672338485718,grad_norm: 0.991991594862199, iteration: 237692
loss: 1.0217241048812866,grad_norm: 0.8250902277705964, iteration: 237693
loss: 0.9865416884422302,grad_norm: 0.9158431250831552, iteration: 237694
loss: 1.1963118314743042,grad_norm: 0.9999999284444377, iteration: 237695
loss: 0.9835841059684753,grad_norm: 0.9999991622178886, iteration: 237696
loss: 1.0777881145477295,grad_norm: 0.9999991535052402, iteration: 237697
loss: 0.9685019850730896,grad_norm: 0.8727503342144083, iteration: 237698
loss: 0.9813959002494812,grad_norm: 0.7937858700447219, iteration: 237699
loss: 0.9845269322395325,grad_norm: 0.9638252599006163, iteration: 237700
loss: 1.0441182851791382,grad_norm: 0.9999997873352413, iteration: 237701
loss: 1.019842505455017,grad_norm: 0.9999990113512452, iteration: 237702
loss: 0.9923376441001892,grad_norm: 0.8653431606712643, iteration: 237703
loss: 0.9970439672470093,grad_norm: 0.8760099753220406, iteration: 237704
loss: 1.0143728256225586,grad_norm: 0.8570962362034067, iteration: 237705
loss: 0.997294545173645,grad_norm: 0.9999999387974794, iteration: 237706
loss: 1.0131094455718994,grad_norm: 0.9348313499029202, iteration: 237707
loss: 0.9829548001289368,grad_norm: 0.9023493490593396, iteration: 237708
loss: 1.0153628587722778,grad_norm: 0.9999991893153143, iteration: 237709
loss: 0.9723036289215088,grad_norm: 0.7613373788929261, iteration: 237710
loss: 1.0417333841323853,grad_norm: 0.9999990680063484, iteration: 237711
loss: 1.0044704675674438,grad_norm: 0.786048568247385, iteration: 237712
loss: 1.02302885055542,grad_norm: 0.8971660197327115, iteration: 237713
loss: 0.994982898235321,grad_norm: 0.9078071511938541, iteration: 237714
loss: 0.982668399810791,grad_norm: 0.990728775656899, iteration: 237715
loss: 0.9967123866081238,grad_norm: 0.8394474040027692, iteration: 237716
loss: 1.0076284408569336,grad_norm: 0.9999990649563514, iteration: 237717
loss: 1.0045439004898071,grad_norm: 0.8884566211888943, iteration: 237718
loss: 1.0082924365997314,grad_norm: 0.9999993232121361, iteration: 237719
loss: 1.0020804405212402,grad_norm: 0.8207131418245134, iteration: 237720
loss: 0.973484456539154,grad_norm: 0.8874685185261787, iteration: 237721
loss: 0.9922531247138977,grad_norm: 0.7544197811326974, iteration: 237722
loss: 0.9814091324806213,grad_norm: 0.941416949368985, iteration: 237723
loss: 1.0344200134277344,grad_norm: 0.8294448945202094, iteration: 237724
loss: 0.9952360391616821,grad_norm: 0.8502019951308233, iteration: 237725
loss: 1.011330485343933,grad_norm: 0.907291037061169, iteration: 237726
loss: 1.1886436939239502,grad_norm: 0.9999994486330019, iteration: 237727
loss: 0.9705354571342468,grad_norm: 0.8463768967336085, iteration: 237728
loss: 1.0168581008911133,grad_norm: 0.9999996739630824, iteration: 237729
loss: 1.0052473545074463,grad_norm: 0.9099381138083649, iteration: 237730
loss: 1.030488133430481,grad_norm: 0.7980052069319058, iteration: 237731
loss: 0.9991198778152466,grad_norm: 0.83714362290185, iteration: 237732
loss: 1.0229294300079346,grad_norm: 0.9999990867776597, iteration: 237733
loss: 0.9827448725700378,grad_norm: 0.8854207129046954, iteration: 237734
loss: 0.9962711930274963,grad_norm: 0.8307002617507047, iteration: 237735
loss: 0.9816824197769165,grad_norm: 0.9999992438332634, iteration: 237736
loss: 1.0168037414550781,grad_norm: 0.9482696191220136, iteration: 237737
loss: 0.9745616912841797,grad_norm: 0.9428766404631833, iteration: 237738
loss: 1.0473867654800415,grad_norm: 0.8245121039067994, iteration: 237739
loss: 1.0233449935913086,grad_norm: 0.9007351002428496, iteration: 237740
loss: 0.99858558177948,grad_norm: 0.7955211809117442, iteration: 237741
loss: 0.9856839179992676,grad_norm: 0.9999990554657181, iteration: 237742
loss: 1.006764531135559,grad_norm: 0.9999991122499349, iteration: 237743
loss: 1.0171600580215454,grad_norm: 0.9999990257226542, iteration: 237744
loss: 0.9934322834014893,grad_norm: 0.9999991858238684, iteration: 237745
loss: 0.9857844114303589,grad_norm: 0.939194651042892, iteration: 237746
loss: 0.9555753469467163,grad_norm: 0.8695683344845385, iteration: 237747
loss: 1.0007244348526,grad_norm: 0.9999990091463031, iteration: 237748
loss: 0.9626413583755493,grad_norm: 0.8564998360809726, iteration: 237749
loss: 0.9868345856666565,grad_norm: 0.8050523696649126, iteration: 237750
loss: 1.0355987548828125,grad_norm: 0.8965678881915566, iteration: 237751
loss: 0.9989712238311768,grad_norm: 0.999999126573398, iteration: 237752
loss: 0.9786893725395203,grad_norm: 0.9942644502743735, iteration: 237753
loss: 0.9735888242721558,grad_norm: 0.9221418533419954, iteration: 237754
loss: 1.009497880935669,grad_norm: 0.9999990347949683, iteration: 237755
loss: 0.9974092245101929,grad_norm: 0.9999988921300897, iteration: 237756
loss: 0.9974029660224915,grad_norm: 0.8223778811301059, iteration: 237757
loss: 1.0049092769622803,grad_norm: 0.9999989912109848, iteration: 237758
loss: 1.0571061372756958,grad_norm: 0.9999989202487728, iteration: 237759
loss: 1.0111843347549438,grad_norm: 0.9999990388208001, iteration: 237760
loss: 1.0092483758926392,grad_norm: 0.9758929509317956, iteration: 237761
loss: 1.017186164855957,grad_norm: 0.9999992383959162, iteration: 237762
loss: 0.9901350140571594,grad_norm: 0.801334088006179, iteration: 237763
loss: 1.0171542167663574,grad_norm: 0.9999992439129457, iteration: 237764
loss: 0.9781174659729004,grad_norm: 0.892941127461714, iteration: 237765
loss: 1.0060292482376099,grad_norm: 0.9217448856149729, iteration: 237766
loss: 1.004123568534851,grad_norm: 0.9133393732065463, iteration: 237767
loss: 0.9864786863327026,grad_norm: 0.9640057610839855, iteration: 237768
loss: 1.0366493463516235,grad_norm: 0.9999993172891973, iteration: 237769
loss: 0.9937945604324341,grad_norm: 0.886701440036689, iteration: 237770
loss: 1.0722999572753906,grad_norm: 0.9999994226234679, iteration: 237771
loss: 1.0017063617706299,grad_norm: 0.7359300154486567, iteration: 237772
loss: 0.9875918030738831,grad_norm: 0.82143883527889, iteration: 237773
loss: 0.9953117966651917,grad_norm: 0.9999990905507319, iteration: 237774
loss: 0.9985080361366272,grad_norm: 0.9999992216935488, iteration: 237775
loss: 0.9996433258056641,grad_norm: 0.9131958084429793, iteration: 237776
loss: 0.9818425178527832,grad_norm: 0.9933038834513959, iteration: 237777
loss: 0.9948782324790955,grad_norm: 0.9175108178664206, iteration: 237778
loss: 1.0004069805145264,grad_norm: 0.7766696666289054, iteration: 237779
loss: 0.997062087059021,grad_norm: 0.8622568993607813, iteration: 237780
loss: 0.9964452385902405,grad_norm: 0.9065312448287927, iteration: 237781
loss: 0.9840091466903687,grad_norm: 0.8945089038327609, iteration: 237782
loss: 1.0034925937652588,grad_norm: 0.9999992404729218, iteration: 237783
loss: 0.9670369029045105,grad_norm: 0.9999990557902041, iteration: 237784
loss: 1.0139821767807007,grad_norm: 0.9811416244224179, iteration: 237785
loss: 1.026564598083496,grad_norm: 0.9004829171798334, iteration: 237786
loss: 1.00395667552948,grad_norm: 0.9999998902758408, iteration: 237787
loss: 1.00180983543396,grad_norm: 0.8349023487116124, iteration: 237788
loss: 1.0033056735992432,grad_norm: 0.9630083680189399, iteration: 237789
loss: 1.0120841264724731,grad_norm: 0.8950573768500458, iteration: 237790
loss: 0.9681729078292847,grad_norm: 0.8354409035877477, iteration: 237791
loss: 0.999045729637146,grad_norm: 0.8374541660302703, iteration: 237792
loss: 0.9213491678237915,grad_norm: 0.9999990024300536, iteration: 237793
loss: 1.0198748111724854,grad_norm: 0.7906407613602752, iteration: 237794
loss: 1.0007901191711426,grad_norm: 0.9580669115201056, iteration: 237795
loss: 0.9913792610168457,grad_norm: 0.9012329449056298, iteration: 237796
loss: 0.9914999604225159,grad_norm: 0.9315602346101919, iteration: 237797
loss: 1.0201469659805298,grad_norm: 0.7661944370863969, iteration: 237798
loss: 0.9796566963195801,grad_norm: 0.8627144160835455, iteration: 237799
loss: 1.2632852792739868,grad_norm: 0.9999997531637008, iteration: 237800
loss: 0.9843015074729919,grad_norm: 0.968320956031899, iteration: 237801
loss: 1.018048882484436,grad_norm: 0.8605193859146125, iteration: 237802
loss: 1.014349341392517,grad_norm: 0.920592481438852, iteration: 237803
loss: 0.9882538914680481,grad_norm: 0.8651915256957887, iteration: 237804
loss: 1.0051063299179077,grad_norm: 0.9999991857832343, iteration: 237805
loss: 1.0101758241653442,grad_norm: 0.9999990661096956, iteration: 237806
loss: 1.0233432054519653,grad_norm: 0.8811844974579097, iteration: 237807
loss: 1.0269473791122437,grad_norm: 0.9292347533646058, iteration: 237808
loss: 1.032334327697754,grad_norm: 0.969860041121825, iteration: 237809
loss: 0.9601311683654785,grad_norm: 0.9021809588146308, iteration: 237810
loss: 0.9838617444038391,grad_norm: 0.9999990771173394, iteration: 237811
loss: 0.9902032017707825,grad_norm: 0.9999992532169527, iteration: 237812
loss: 0.9988541603088379,grad_norm: 0.8084930688997417, iteration: 237813
loss: 1.0131114721298218,grad_norm: 0.9210613300057846, iteration: 237814
loss: 1.0128365755081177,grad_norm: 0.9601328607221459, iteration: 237815
loss: 0.9756908416748047,grad_norm: 0.8603625157685504, iteration: 237816
loss: 0.9835489988327026,grad_norm: 0.9189607428264793, iteration: 237817
loss: 0.9954710602760315,grad_norm: 0.7632928007681686, iteration: 237818
loss: 1.004549503326416,grad_norm: 0.9999991194603889, iteration: 237819
loss: 0.9777041673660278,grad_norm: 0.7957462799130498, iteration: 237820
loss: 0.9911325573921204,grad_norm: 0.8765345690130288, iteration: 237821
loss: 1.0158084630966187,grad_norm: 0.7750313883727803, iteration: 237822
loss: 1.0119578838348389,grad_norm: 0.8423674116561041, iteration: 237823
loss: 1.056113362312317,grad_norm: 0.9088301016003213, iteration: 237824
loss: 0.9956963062286377,grad_norm: 0.9999990941445708, iteration: 237825
loss: 0.9527239203453064,grad_norm: 0.7582074621676695, iteration: 237826
loss: 0.9980534315109253,grad_norm: 0.9091915091916766, iteration: 237827
loss: 1.0076380968093872,grad_norm: 0.8065975438074982, iteration: 237828
loss: 1.0383427143096924,grad_norm: 0.9999993779944227, iteration: 237829
loss: 1.0536006689071655,grad_norm: 0.9999997755217761, iteration: 237830
loss: 1.021619200706482,grad_norm: 0.7358340959276906, iteration: 237831
loss: 1.019161581993103,grad_norm: 0.8498261903163987, iteration: 237832
loss: 1.0108702182769775,grad_norm: 0.9999991487238752, iteration: 237833
loss: 0.9870061874389648,grad_norm: 0.7866868996216807, iteration: 237834
loss: 0.9990947246551514,grad_norm: 0.7389130010196315, iteration: 237835
loss: 1.1465340852737427,grad_norm: 0.9999997952982541, iteration: 237836
loss: 1.0370440483093262,grad_norm: 0.9999997297185577, iteration: 237837
loss: 1.1957833766937256,grad_norm: 0.9999994805954139, iteration: 237838
loss: 0.9793884754180908,grad_norm: 0.9999989513820765, iteration: 237839
loss: 0.9934467077255249,grad_norm: 0.9999991089040079, iteration: 237840
loss: 1.0905965566635132,grad_norm: 0.999999623583248, iteration: 237841
loss: 0.9665021300315857,grad_norm: 0.9204358042952625, iteration: 237842
loss: 1.0074089765548706,grad_norm: 0.9999990379751489, iteration: 237843
loss: 1.1636837720870972,grad_norm: 0.9999994889381801, iteration: 237844
loss: 1.02505362033844,grad_norm: 0.8804344431704819, iteration: 237845
loss: 1.0112966299057007,grad_norm: 0.9999991470993247, iteration: 237846
loss: 1.3564809560775757,grad_norm: 0.9999995238524358, iteration: 237847
loss: 1.2641562223434448,grad_norm: 0.9999993369804863, iteration: 237848
loss: 1.0492136478424072,grad_norm: 0.9999995500743482, iteration: 237849
loss: 1.2208590507507324,grad_norm: 0.9999996156697505, iteration: 237850
loss: 1.2112540006637573,grad_norm: 0.9999992397433211, iteration: 237851
loss: 1.1356151103973389,grad_norm: 0.8913779032307438, iteration: 237852
loss: 1.6539355516433716,grad_norm: 0.9999998680151947, iteration: 237853
loss: 1.3976221084594727,grad_norm: 0.9999996597804965, iteration: 237854
loss: 1.1154850721359253,grad_norm: 0.999999516009939, iteration: 237855
loss: 1.185755968093872,grad_norm: 0.999999133780051, iteration: 237856
loss: 2.0097718238830566,grad_norm: 0.9999998251384976, iteration: 237857
loss: 1.526200532913208,grad_norm: 0.9999998930393337, iteration: 237858
loss: 1.4952223300933838,grad_norm: 0.9999998730796451, iteration: 237859
loss: 1.5674892663955688,grad_norm: 0.9999999539770121, iteration: 237860
loss: 1.5940297842025757,grad_norm: 0.9999996415158102, iteration: 237861
loss: 1.5885785818099976,grad_norm: 0.9999999171784604, iteration: 237862
loss: 1.4895700216293335,grad_norm: 0.9999997456289479, iteration: 237863
loss: 1.8433972597122192,grad_norm: 0.9999998902747547, iteration: 237864
loss: 1.5946723222732544,grad_norm: 0.9999996995296625, iteration: 237865
loss: 1.7967857122421265,grad_norm: 0.9999997886691304, iteration: 237866
loss: 1.2931292057037354,grad_norm: 0.9999994332939183, iteration: 237867
loss: 1.7628192901611328,grad_norm: 0.9999999578197445, iteration: 237868
loss: 1.5474708080291748,grad_norm: 0.9999997028795604, iteration: 237869
loss: 1.5888359546661377,grad_norm: 0.9999997931627017, iteration: 237870
loss: 1.6445659399032593,grad_norm: 0.999999692692929, iteration: 237871
loss: 1.7773927450180054,grad_norm: 0.9999998043681556, iteration: 237872
loss: 1.6506645679473877,grad_norm: 0.9999995393355037, iteration: 237873
loss: 1.4576157331466675,grad_norm: 0.9999993004967216, iteration: 237874
loss: 1.8475533723831177,grad_norm: 0.9999998823687472, iteration: 237875
loss: 1.4978896379470825,grad_norm: 0.9999998566222019, iteration: 237876
loss: 1.7160946130752563,grad_norm: 0.9999996237819874, iteration: 237877
loss: 1.3230732679367065,grad_norm: 0.999999894997673, iteration: 237878
loss: 1.4332361221313477,grad_norm: 0.9999996876516899, iteration: 237879
loss: 1.6398746967315674,grad_norm: 0.9999997482268661, iteration: 237880
loss: 1.6322393417358398,grad_norm: 1.0000000594546283, iteration: 237881
loss: 1.347809910774231,grad_norm: 0.999999527419101, iteration: 237882
loss: 1.3350155353546143,grad_norm: 0.999999374352943, iteration: 237883
loss: 1.6330150365829468,grad_norm: 0.999999801107208, iteration: 237884
loss: 1.2678061723709106,grad_norm: 0.9999998613223638, iteration: 237885
loss: 1.4417697191238403,grad_norm: 0.999999938026587, iteration: 237886
loss: 1.3770506381988525,grad_norm: 0.999999698042922, iteration: 237887
loss: 1.3139843940734863,grad_norm: 0.9999996305540911, iteration: 237888
loss: 1.3625421524047852,grad_norm: 0.9999996754115195, iteration: 237889
loss: 1.203615427017212,grad_norm: 0.9999995580391591, iteration: 237890
loss: 1.341493010520935,grad_norm: 0.9999994902440326, iteration: 237891
loss: 1.2731037139892578,grad_norm: 0.999999643190631, iteration: 237892
loss: 1.213427186012268,grad_norm: 0.9999994158816601, iteration: 237893
loss: 1.2801411151885986,grad_norm: 0.999999778252243, iteration: 237894
loss: 1.2030096054077148,grad_norm: 0.99999986905425, iteration: 237895
loss: 1.1225498914718628,grad_norm: 0.9999992453395739, iteration: 237896
loss: 1.2319769859313965,grad_norm: 0.9999997890736915, iteration: 237897
loss: 1.0211149454116821,grad_norm: 0.9999990009374707, iteration: 237898
loss: 1.2013278007507324,grad_norm: 0.9999991857137509, iteration: 237899
loss: 1.0723912715911865,grad_norm: 0.9999993436582804, iteration: 237900
loss: 1.1636724472045898,grad_norm: 0.9999996237630869, iteration: 237901
loss: 1.0675503015518188,grad_norm: 0.9999998962675096, iteration: 237902
loss: 1.140271544456482,grad_norm: 0.9999995249062811, iteration: 237903
loss: 1.036956548690796,grad_norm: 0.8901522856871076, iteration: 237904
loss: 1.0383678674697876,grad_norm: 0.9999991836688173, iteration: 237905
loss: 1.1060211658477783,grad_norm: 0.9999996536141572, iteration: 237906
loss: 1.1264138221740723,grad_norm: 0.9999995615324839, iteration: 237907
loss: 1.1560335159301758,grad_norm: 0.9999995470210916, iteration: 237908
loss: 0.9757022261619568,grad_norm: 0.9999991278879709, iteration: 237909
loss: 1.080111026763916,grad_norm: 0.9999999207884674, iteration: 237910
loss: 1.036002516746521,grad_norm: 0.9999996003814038, iteration: 237911
loss: 1.0087110996246338,grad_norm: 0.9999993909092905, iteration: 237912
loss: 1.1657074689865112,grad_norm: 0.9999996628613698, iteration: 237913
loss: 1.1184101104736328,grad_norm: 0.9999994011354767, iteration: 237914
loss: 1.0932585000991821,grad_norm: 0.9999996591292268, iteration: 237915
loss: 1.0408709049224854,grad_norm: 0.923711652165166, iteration: 237916
loss: 1.1232447624206543,grad_norm: 0.9999992866090768, iteration: 237917
loss: 1.0322027206420898,grad_norm: 0.9999991733100931, iteration: 237918
loss: 1.024254560470581,grad_norm: 0.9999991332711086, iteration: 237919
loss: 1.0383949279785156,grad_norm: 0.8364173026251145, iteration: 237920
loss: 1.0357718467712402,grad_norm: 0.999999692332902, iteration: 237921
loss: 1.1917613744735718,grad_norm: 0.9999996539650337, iteration: 237922
loss: 0.9889643788337708,grad_norm: 0.8759078217151235, iteration: 237923
loss: 0.9911195039749146,grad_norm: 0.9999992802169066, iteration: 237924
loss: 0.9586860537528992,grad_norm: 0.9999992920604351, iteration: 237925
loss: 1.135857343673706,grad_norm: 0.9999995568984131, iteration: 237926
loss: 1.1251845359802246,grad_norm: 0.9999997878235141, iteration: 237927
loss: 1.0403850078582764,grad_norm: 0.8433311472175197, iteration: 237928
loss: 1.055501103401184,grad_norm: 0.9326776216631605, iteration: 237929
loss: 1.099676489830017,grad_norm: 0.9999998542574123, iteration: 237930
loss: 1.130800724029541,grad_norm: 0.9999993383050336, iteration: 237931
loss: 1.035801887512207,grad_norm: 0.7931573734456273, iteration: 237932
loss: 0.9597082138061523,grad_norm: 0.8277652006763677, iteration: 237933
loss: 1.125091791152954,grad_norm: 0.9999998874091842, iteration: 237934
loss: 1.1028937101364136,grad_norm: 0.9999997239256151, iteration: 237935
loss: 0.995290219783783,grad_norm: 0.6952910022019414, iteration: 237936
loss: 1.0179718732833862,grad_norm: 0.9999992131715244, iteration: 237937
loss: 1.008136510848999,grad_norm: 0.9778089294163189, iteration: 237938
loss: 0.9757834672927856,grad_norm: 0.9999992306022863, iteration: 237939
loss: 1.0240108966827393,grad_norm: 0.9999992222562953, iteration: 237940
loss: 1.1094779968261719,grad_norm: 0.9999991919439823, iteration: 237941
loss: 0.9894779324531555,grad_norm: 0.7927540231931555, iteration: 237942
loss: 1.1205878257751465,grad_norm: 0.9999992518096892, iteration: 237943
loss: 0.9951555132865906,grad_norm: 0.9999992880837892, iteration: 237944
loss: 1.0191192626953125,grad_norm: 0.9999990328346787, iteration: 237945
loss: 1.0318193435668945,grad_norm: 0.9464249831463106, iteration: 237946
loss: 1.053161382675171,grad_norm: 0.7978644552480944, iteration: 237947
loss: 0.9639836549758911,grad_norm: 0.9145208078794987, iteration: 237948
loss: 0.9969695210456848,grad_norm: 0.9327430187269189, iteration: 237949
loss: 1.025403380393982,grad_norm: 0.9163761755363066, iteration: 237950
loss: 1.0816965103149414,grad_norm: 0.9999994523584269, iteration: 237951
loss: 1.0022995471954346,grad_norm: 0.8484696723668289, iteration: 237952
loss: 0.9849681258201599,grad_norm: 0.901723023920633, iteration: 237953
loss: 1.0527992248535156,grad_norm: 0.9556903761559092, iteration: 237954
loss: 1.0156149864196777,grad_norm: 0.9175362830072502, iteration: 237955
loss: 1.0051285028457642,grad_norm: 0.851631274517771, iteration: 237956
loss: 0.971433699131012,grad_norm: 0.9999991252092839, iteration: 237957
loss: 0.9815062880516052,grad_norm: 0.9559454476877168, iteration: 237958
loss: 0.9959316849708557,grad_norm: 0.9999993341442296, iteration: 237959
loss: 1.0288506746292114,grad_norm: 0.9087805191514553, iteration: 237960
loss: 0.9871068596839905,grad_norm: 0.9999990853466777, iteration: 237961
loss: 0.9987627267837524,grad_norm: 0.9999991366629443, iteration: 237962
loss: 0.9759424924850464,grad_norm: 0.8441471566180992, iteration: 237963
loss: 1.0246590375900269,grad_norm: 0.8667201405189654, iteration: 237964
loss: 1.0045562982559204,grad_norm: 0.8942376959524573, iteration: 237965
loss: 0.9938825964927673,grad_norm: 0.9578856156768837, iteration: 237966
loss: 0.984604001045227,grad_norm: 0.9470452460151584, iteration: 237967
loss: 1.0290247201919556,grad_norm: 0.9381443749798339, iteration: 237968
loss: 1.012164831161499,grad_norm: 0.9999990097667726, iteration: 237969
loss: 0.9746256470680237,grad_norm: 0.999999094396331, iteration: 237970
loss: 0.9845008254051208,grad_norm: 0.9964999384275356, iteration: 237971
loss: 1.0147215127944946,grad_norm: 0.7898017367667837, iteration: 237972
loss: 1.059246301651001,grad_norm: 0.999999584845446, iteration: 237973
loss: 1.0230284929275513,grad_norm: 0.9999990803886561, iteration: 237974
loss: 0.9815645217895508,grad_norm: 0.8930791136114087, iteration: 237975
loss: 1.02713143825531,grad_norm: 0.7730080410961577, iteration: 237976
loss: 1.0144093036651611,grad_norm: 0.7822327120914504, iteration: 237977
loss: 1.013126015663147,grad_norm: 0.9999990545524975, iteration: 237978
loss: 1.0009270906448364,grad_norm: 0.999999017333576, iteration: 237979
loss: 1.0064597129821777,grad_norm: 0.9069586245724944, iteration: 237980
loss: 0.9941211938858032,grad_norm: 0.7604532693737919, iteration: 237981
loss: 1.0101145505905151,grad_norm: 0.8092161698186814, iteration: 237982
loss: 1.2131379842758179,grad_norm: 0.9999998317216826, iteration: 237983
loss: 1.0138412714004517,grad_norm: 0.7995890313606203, iteration: 237984
loss: 0.9938409924507141,grad_norm: 0.9308586126375089, iteration: 237985
loss: 1.007519006729126,grad_norm: 0.8007878210812892, iteration: 237986
loss: 1.012778878211975,grad_norm: 0.9999998489365878, iteration: 237987
loss: 1.0012327432632446,grad_norm: 0.9761802118185019, iteration: 237988
loss: 0.9770864844322205,grad_norm: 0.7871232848692039, iteration: 237989
loss: 0.9853246212005615,grad_norm: 0.9999990174869441, iteration: 237990
loss: 0.9970136880874634,grad_norm: 0.9999992710237103, iteration: 237991
loss: 1.0023891925811768,grad_norm: 0.9580619854224149, iteration: 237992
loss: 0.9956132769584656,grad_norm: 0.7733347256281431, iteration: 237993
loss: 0.9908570051193237,grad_norm: 0.743159778791274, iteration: 237994
loss: 1.0374141931533813,grad_norm: 0.9559868796128136, iteration: 237995
loss: 1.0165764093399048,grad_norm: 0.9999990669805362, iteration: 237996
loss: 1.0503336191177368,grad_norm: 0.8617073766188661, iteration: 237997
loss: 0.9661635756492615,grad_norm: 0.7804601475773316, iteration: 237998
loss: 1.0430165529251099,grad_norm: 0.9999992556507316, iteration: 237999
loss: 0.9930452108383179,grad_norm: 0.9395046202838615, iteration: 238000
loss: 0.9639175534248352,grad_norm: 0.8266583167997312, iteration: 238001
loss: 1.0406508445739746,grad_norm: 0.9999990209211932, iteration: 238002
loss: 0.9689003825187683,grad_norm: 0.9265069187998398, iteration: 238003
loss: 1.2067933082580566,grad_norm: 0.9999999345346967, iteration: 238004
loss: 0.9896777272224426,grad_norm: 0.999999071606492, iteration: 238005
loss: 1.0154352188110352,grad_norm: 0.9999990179594819, iteration: 238006
loss: 0.9787890911102295,grad_norm: 0.9645651898899095, iteration: 238007
loss: 0.9863088130950928,grad_norm: 0.7854994692093208, iteration: 238008
loss: 1.0315287113189697,grad_norm: 0.999999097184449, iteration: 238009
loss: 0.9657177925109863,grad_norm: 0.9999990073949387, iteration: 238010
loss: 1.074613094329834,grad_norm: 0.999999460611696, iteration: 238011
loss: 0.952552855014801,grad_norm: 0.8954308268956525, iteration: 238012
loss: 0.9864650368690491,grad_norm: 0.9539431315702805, iteration: 238013
loss: 1.0100871324539185,grad_norm: 0.7393397351622423, iteration: 238014
loss: 1.015712022781372,grad_norm: 0.9559519165524918, iteration: 238015
loss: 1.028272271156311,grad_norm: 0.8476627198804806, iteration: 238016
loss: 1.0338248014450073,grad_norm: 0.9999994214129954, iteration: 238017
loss: 0.9948502779006958,grad_norm: 0.9999991154616765, iteration: 238018
loss: 1.04012930393219,grad_norm: 0.9900235752498904, iteration: 238019
loss: 1.0480393171310425,grad_norm: 0.9474795690942112, iteration: 238020
loss: 0.9903368949890137,grad_norm: 0.942580314917855, iteration: 238021
loss: 1.0165894031524658,grad_norm: 0.9999992475840304, iteration: 238022
loss: 0.97409588098526,grad_norm: 0.8701787338396626, iteration: 238023
loss: 0.9594878554344177,grad_norm: 0.9649271636463624, iteration: 238024
loss: 0.923122763633728,grad_norm: 0.9999991228766849, iteration: 238025
loss: 1.0124421119689941,grad_norm: 0.9999998044241751, iteration: 238026
loss: 1.0079784393310547,grad_norm: 0.8363805746544112, iteration: 238027
loss: 1.0017436742782593,grad_norm: 0.9999990445316806, iteration: 238028
loss: 0.9960119128227234,grad_norm: 0.9292342002707675, iteration: 238029
loss: 0.9900271892547607,grad_norm: 0.9999990417901815, iteration: 238030
loss: 1.0322469472885132,grad_norm: 0.9999990665727628, iteration: 238031
loss: 1.05318021774292,grad_norm: 0.9906909394659772, iteration: 238032
loss: 0.9692776799201965,grad_norm: 0.8642372352529613, iteration: 238033
loss: 0.9812908172607422,grad_norm: 0.9999990811825121, iteration: 238034
loss: 0.9752672910690308,grad_norm: 0.7561223847789778, iteration: 238035
loss: 1.0067203044891357,grad_norm: 0.9428870763171475, iteration: 238036
loss: 1.0218161344528198,grad_norm: 0.8860817937796112, iteration: 238037
loss: 0.9908905029296875,grad_norm: 0.9962029677384838, iteration: 238038
loss: 1.0096251964569092,grad_norm: 0.9999990754166603, iteration: 238039
loss: 1.04771888256073,grad_norm: 0.9999992838203874, iteration: 238040
loss: 0.9717280268669128,grad_norm: 0.8420805870501702, iteration: 238041
loss: 1.040417194366455,grad_norm: 0.9999997507471046, iteration: 238042
loss: 1.0023283958435059,grad_norm: 0.9203869020010788, iteration: 238043
loss: 1.0539183616638184,grad_norm: 0.9999994249523181, iteration: 238044
loss: 0.9873218536376953,grad_norm: 0.7178858837298173, iteration: 238045
loss: 0.9993435144424438,grad_norm: 0.8305827404344617, iteration: 238046
loss: 0.9639574289321899,grad_norm: 0.820151040255659, iteration: 238047
loss: 0.9931783080101013,grad_norm: 0.9961090477421725, iteration: 238048
loss: 1.0074270963668823,grad_norm: 0.7654493530920378, iteration: 238049
loss: 1.0251257419586182,grad_norm: 0.9938229017925087, iteration: 238050
loss: 1.0162702798843384,grad_norm: 0.9999994981443944, iteration: 238051
loss: 1.0059316158294678,grad_norm: 0.7477530311495799, iteration: 238052
loss: 1.0117279291152954,grad_norm: 0.8176727989233804, iteration: 238053
loss: 0.9754915237426758,grad_norm: 0.8676311272080125, iteration: 238054
loss: 0.9755988121032715,grad_norm: 0.9999990610510495, iteration: 238055
loss: 1.1282193660736084,grad_norm: 0.9999994357284975, iteration: 238056
loss: 0.9794065356254578,grad_norm: 0.7965986271526772, iteration: 238057
loss: 0.9759994149208069,grad_norm: 0.9883705233111233, iteration: 238058
loss: 1.0060749053955078,grad_norm: 0.9161333649922949, iteration: 238059
loss: 0.9897443056106567,grad_norm: 0.8553282284847109, iteration: 238060
loss: 1.0031318664550781,grad_norm: 0.9392176965301385, iteration: 238061
loss: 0.9896491169929504,grad_norm: 0.9219880182291381, iteration: 238062
loss: 1.0359200239181519,grad_norm: 0.8502566676801396, iteration: 238063
loss: 0.9988961219787598,grad_norm: 0.8841736091503317, iteration: 238064
loss: 0.9649971723556519,grad_norm: 0.9242289652209161, iteration: 238065
loss: 1.0094106197357178,grad_norm: 0.8160037776566272, iteration: 238066
loss: 1.006516933441162,grad_norm: 0.9999990424535697, iteration: 238067
loss: 1.0196787118911743,grad_norm: 0.843845934839763, iteration: 238068
loss: 0.954855740070343,grad_norm: 0.7899589637085251, iteration: 238069
loss: 1.0083845853805542,grad_norm: 0.8732636978476399, iteration: 238070
loss: 1.015831708908081,grad_norm: 0.9999993689339168, iteration: 238071
loss: 1.0018810033798218,grad_norm: 0.9999992085204745, iteration: 238072
loss: 0.9978887438774109,grad_norm: 0.999999261158935, iteration: 238073
loss: 0.9898226261138916,grad_norm: 0.8572481402214784, iteration: 238074
loss: 0.9860239624977112,grad_norm: 0.90159488702214, iteration: 238075
loss: 0.999416708946228,grad_norm: 0.9085177320996267, iteration: 238076
loss: 1.0118669271469116,grad_norm: 0.9241833489209108, iteration: 238077
loss: 1.0085939168930054,grad_norm: 1.0000000436468812, iteration: 238078
loss: 1.0171232223510742,grad_norm: 0.9875052515134994, iteration: 238079
loss: 0.9916002154350281,grad_norm: 0.9417003405547595, iteration: 238080
loss: 0.9811164736747742,grad_norm: 0.9334795710893689, iteration: 238081
loss: 1.0316232442855835,grad_norm: 0.9999993715010284, iteration: 238082
loss: 1.0058878660202026,grad_norm: 0.8819540972546869, iteration: 238083
loss: 1.0459885597229004,grad_norm: 0.9999990556418178, iteration: 238084
loss: 1.0181446075439453,grad_norm: 0.9999991246220976, iteration: 238085
loss: 0.9802646040916443,grad_norm: 0.860260203576798, iteration: 238086
loss: 1.005820870399475,grad_norm: 0.9438794251646533, iteration: 238087
loss: 1.0326743125915527,grad_norm: 0.9786379552053963, iteration: 238088
loss: 1.0075794458389282,grad_norm: 0.9072721492909482, iteration: 238089
loss: 0.9731847643852234,grad_norm: 0.8363024482551794, iteration: 238090
loss: 0.9947575926780701,grad_norm: 0.7669218154250541, iteration: 238091
loss: 1.0078450441360474,grad_norm: 0.82069066173279, iteration: 238092
loss: 1.0017472505569458,grad_norm: 0.9999991256654797, iteration: 238093
loss: 1.0420424938201904,grad_norm: 0.999999025793431, iteration: 238094
loss: 0.9547048211097717,grad_norm: 0.8623011338090384, iteration: 238095
loss: 0.9948514699935913,grad_norm: 0.8648985530989266, iteration: 238096
loss: 0.9953644275665283,grad_norm: 0.8478759188805286, iteration: 238097
loss: 0.9957382082939148,grad_norm: 0.7880739181368492, iteration: 238098
loss: 1.0182273387908936,grad_norm: 0.8402756482675283, iteration: 238099
loss: 1.0002236366271973,grad_norm: 0.999999235116713, iteration: 238100
loss: 1.016392707824707,grad_norm: 0.9999991626827115, iteration: 238101
loss: 0.9912072420120239,grad_norm: 0.9998059562072922, iteration: 238102
loss: 0.9795619249343872,grad_norm: 0.8624559416704253, iteration: 238103
loss: 1.0116716623306274,grad_norm: 0.857373104201651, iteration: 238104
loss: 1.0444366931915283,grad_norm: 0.9999998963638084, iteration: 238105
loss: 1.0109418630599976,grad_norm: 0.9470053982302439, iteration: 238106
loss: 0.9967095255851746,grad_norm: 0.839576431005796, iteration: 238107
loss: 1.042195439338684,grad_norm: 0.9285758349956422, iteration: 238108
loss: 0.9957769513130188,grad_norm: 0.8551893300394228, iteration: 238109
loss: 0.9999905824661255,grad_norm: 0.9999990079498086, iteration: 238110
loss: 0.9752922654151917,grad_norm: 0.8726641283941182, iteration: 238111
loss: 1.0008713006973267,grad_norm: 0.9096937605547204, iteration: 238112
loss: 1.0157873630523682,grad_norm: 0.9999993604651888, iteration: 238113
loss: 0.9862368106842041,grad_norm: 0.8557901782915079, iteration: 238114
loss: 1.0106239318847656,grad_norm: 0.9194793037942858, iteration: 238115
loss: 0.9780124425888062,grad_norm: 0.9158799667269087, iteration: 238116
loss: 1.0243732929229736,grad_norm: 0.9199708988017645, iteration: 238117
loss: 1.0077556371688843,grad_norm: 0.9999991318940388, iteration: 238118
loss: 1.0307828187942505,grad_norm: 0.9392133338488974, iteration: 238119
loss: 0.9781841039657593,grad_norm: 0.9335277574381348, iteration: 238120
loss: 0.9944069981575012,grad_norm: 0.99999912325363, iteration: 238121
loss: 1.023484230041504,grad_norm: 0.9028440221186251, iteration: 238122
loss: 1.028495192527771,grad_norm: 0.8764073469657998, iteration: 238123
loss: 0.9981079697608948,grad_norm: 0.8821020462295629, iteration: 238124
loss: 0.9739230275154114,grad_norm: 0.9792207486952673, iteration: 238125
loss: 1.0399566888809204,grad_norm: 0.9908894182010269, iteration: 238126
loss: 0.9692201614379883,grad_norm: 0.9999992168770693, iteration: 238127
loss: 1.0185704231262207,grad_norm: 0.9417250578480304, iteration: 238128
loss: 0.9999854564666748,grad_norm: 0.9999991331881892, iteration: 238129
loss: 0.991367757320404,grad_norm: 0.782265356803865, iteration: 238130
loss: 1.0025757551193237,grad_norm: 1.0000000021802287, iteration: 238131
loss: 0.9936414361000061,grad_norm: 0.9999990666974934, iteration: 238132
loss: 0.9423283934593201,grad_norm: 0.757388152767886, iteration: 238133
loss: 0.9979016780853271,grad_norm: 0.9999994982613626, iteration: 238134
loss: 1.138828158378601,grad_norm: 0.9999990358429198, iteration: 238135
loss: 1.0833677053451538,grad_norm: 0.9999998464025682, iteration: 238136
loss: 0.9875254034996033,grad_norm: 0.9548753476982835, iteration: 238137
loss: 1.0280115604400635,grad_norm: 0.9999993647876445, iteration: 238138
loss: 0.9989034533500671,grad_norm: 0.9984518189067735, iteration: 238139
loss: 0.9859212636947632,grad_norm: 0.8497871090659834, iteration: 238140
loss: 1.018263578414917,grad_norm: 0.9999991475046236, iteration: 238141
loss: 1.0032881498336792,grad_norm: 0.9999990717557584, iteration: 238142
loss: 0.9813797473907471,grad_norm: 0.8505995349339045, iteration: 238143
loss: 1.0850797891616821,grad_norm: 0.9999998646120754, iteration: 238144
loss: 0.9710708856582642,grad_norm: 0.7990111441050249, iteration: 238145
loss: 1.0141841173171997,grad_norm: 0.9705470757192163, iteration: 238146
loss: 1.0250821113586426,grad_norm: 0.9999997660694652, iteration: 238147
loss: 1.0553408861160278,grad_norm: 0.9999993233903898, iteration: 238148
loss: 1.0352445840835571,grad_norm: 0.9701784541482633, iteration: 238149
loss: 1.0227901935577393,grad_norm: 0.9043686212325723, iteration: 238150
loss: 1.0377790927886963,grad_norm: 0.9999992730385094, iteration: 238151
loss: 0.9927746057510376,grad_norm: 0.9999991049470346, iteration: 238152
loss: 1.0039350986480713,grad_norm: 0.9293498002769599, iteration: 238153
loss: 0.9460639357566833,grad_norm: 0.9999990689741881, iteration: 238154
loss: 1.0088526010513306,grad_norm: 0.999999473113294, iteration: 238155
loss: 0.9927661418914795,grad_norm: 0.9261744013164308, iteration: 238156
loss: 0.9600645303726196,grad_norm: 0.9323400013162093, iteration: 238157
loss: 0.9660007953643799,grad_norm: 0.9353015095939855, iteration: 238158
loss: 1.0263652801513672,grad_norm: 0.9999990226999037, iteration: 238159
loss: 0.9734017252922058,grad_norm: 0.9999989990575744, iteration: 238160
loss: 1.1598774194717407,grad_norm: 0.9999991114503683, iteration: 238161
loss: 1.1654237508773804,grad_norm: 0.9999993533676719, iteration: 238162
loss: 1.001467227935791,grad_norm: 0.9967438611156796, iteration: 238163
loss: 1.09393310546875,grad_norm: 0.9999992249415006, iteration: 238164
loss: 1.005724310874939,grad_norm: 0.9041909722721394, iteration: 238165
loss: 1.0120455026626587,grad_norm: 0.9999990682795172, iteration: 238166
loss: 0.9812033772468567,grad_norm: 0.9423151182963543, iteration: 238167
loss: 0.9970442652702332,grad_norm: 0.9999990681046359, iteration: 238168
loss: 0.9792497158050537,grad_norm: 0.9999996471120646, iteration: 238169
loss: 1.0318207740783691,grad_norm: 0.9929474322364091, iteration: 238170
loss: 1.1375442743301392,grad_norm: 0.9999997560901003, iteration: 238171
loss: 0.9896036386489868,grad_norm: 0.8462860556018857, iteration: 238172
loss: 0.9668938517570496,grad_norm: 0.9191398086463097, iteration: 238173
loss: 1.0331510305404663,grad_norm: 0.9999990854108454, iteration: 238174
loss: 1.0037841796875,grad_norm: 0.7800179338526715, iteration: 238175
loss: 1.0271660089492798,grad_norm: 0.9160790712872173, iteration: 238176
loss: 0.9876241087913513,grad_norm: 0.8460639028363516, iteration: 238177
loss: 0.9678061604499817,grad_norm: 0.9445944589394027, iteration: 238178
loss: 0.9865936040878296,grad_norm: 0.9999991803349866, iteration: 238179
loss: 1.0032371282577515,grad_norm: 0.8477307782754816, iteration: 238180
loss: 0.9970924258232117,grad_norm: 0.7809601833989039, iteration: 238181
loss: 1.0190348625183105,grad_norm: 0.9999990532333811, iteration: 238182
loss: 1.0290428400039673,grad_norm: 0.9487353545279062, iteration: 238183
loss: 1.0187016725540161,grad_norm: 0.8892522554800261, iteration: 238184
loss: 0.9980216026306152,grad_norm: 0.9214470543951634, iteration: 238185
loss: 0.9859881401062012,grad_norm: 0.8910904730691503, iteration: 238186
loss: 1.019330620765686,grad_norm: 0.9177075456370382, iteration: 238187
loss: 1.1102060079574585,grad_norm: 0.9999999014634536, iteration: 238188
loss: 1.0210974216461182,grad_norm: 0.9437659579290196, iteration: 238189
loss: 1.0035985708236694,grad_norm: 0.8117384574621009, iteration: 238190
loss: 1.0642415285110474,grad_norm: 0.9973849730412837, iteration: 238191
loss: 0.9952549934387207,grad_norm: 0.9999994176934326, iteration: 238192
loss: 0.999024510383606,grad_norm: 0.9999990389041263, iteration: 238193
loss: 0.9853186011314392,grad_norm: 0.9210960518307755, iteration: 238194
loss: 0.9792552590370178,grad_norm: 0.9999991581639192, iteration: 238195
loss: 0.995959997177124,grad_norm: 0.8342712396851161, iteration: 238196
loss: 1.0029593706130981,grad_norm: 0.9999992454227175, iteration: 238197
loss: 1.0264198780059814,grad_norm: 0.9740096746064714, iteration: 238198
loss: 0.9586310386657715,grad_norm: 0.9999991917736718, iteration: 238199
loss: 0.9755489826202393,grad_norm: 0.9999990887130772, iteration: 238200
loss: 1.0411008596420288,grad_norm: 0.8320692601149708, iteration: 238201
loss: 0.9709838628768921,grad_norm: 0.91304657279719, iteration: 238202
loss: 0.9776157140731812,grad_norm: 0.7731716215571819, iteration: 238203
loss: 0.9692149758338928,grad_norm: 0.8760655221687506, iteration: 238204
loss: 1.085463523864746,grad_norm: 0.8287447325187584, iteration: 238205
loss: 1.040589690208435,grad_norm: 0.9999995917928716, iteration: 238206
loss: 0.9657213687896729,grad_norm: 0.8182091996754727, iteration: 238207
loss: 1.0209836959838867,grad_norm: 0.9395164669992913, iteration: 238208
loss: 1.0077952146530151,grad_norm: 0.9999990730385124, iteration: 238209
loss: 1.024882197380066,grad_norm: 0.9999995069199357, iteration: 238210
loss: 1.0465220212936401,grad_norm: 0.8570587190274644, iteration: 238211
loss: 0.9943516254425049,grad_norm: 0.9320702518174769, iteration: 238212
loss: 1.0597047805786133,grad_norm: 0.8134859714678996, iteration: 238213
loss: 1.0011231899261475,grad_norm: 0.999999638290924, iteration: 238214
loss: 0.995867908000946,grad_norm: 0.9009341178028719, iteration: 238215
loss: 1.0288342237472534,grad_norm: 0.9999997463099058, iteration: 238216
loss: 1.0987493991851807,grad_norm: 0.9999991032993935, iteration: 238217
loss: 1.0405895709991455,grad_norm: 0.9999992136946534, iteration: 238218
loss: 1.0157723426818848,grad_norm: 0.9502530667923705, iteration: 238219
loss: 0.9991615414619446,grad_norm: 0.9674694124557597, iteration: 238220
loss: 1.0464069843292236,grad_norm: 0.7880372384267792, iteration: 238221
loss: 1.0009124279022217,grad_norm: 0.9229298961462525, iteration: 238222
loss: 1.0309345722198486,grad_norm: 0.9999999766313331, iteration: 238223
loss: 1.0888618230819702,grad_norm: 0.998424568264447, iteration: 238224
loss: 1.0730369091033936,grad_norm: 0.9999993592915949, iteration: 238225
loss: 0.9634131789207458,grad_norm: 0.9999992322231308, iteration: 238226
loss: 0.9926401972770691,grad_norm: 0.971388515416067, iteration: 238227
loss: 1.0165228843688965,grad_norm: 0.9340258917470241, iteration: 238228
loss: 0.9952338337898254,grad_norm: 0.9570922765993493, iteration: 238229
loss: 1.0918679237365723,grad_norm: 0.9999999226688894, iteration: 238230
loss: 1.1302270889282227,grad_norm: 1.000000043363209, iteration: 238231
loss: 0.9949337244033813,grad_norm: 0.979548974575255, iteration: 238232
loss: 1.0246877670288086,grad_norm: 0.9999989532063432, iteration: 238233
loss: 0.9839744567871094,grad_norm: 0.9999991606970723, iteration: 238234
loss: 1.0073325634002686,grad_norm: 0.9866616324085445, iteration: 238235
loss: 0.9988994002342224,grad_norm: 0.9999998870398621, iteration: 238236
loss: 1.0070641040802002,grad_norm: 0.8214262866345164, iteration: 238237
loss: 0.9800100326538086,grad_norm: 0.9999990196927356, iteration: 238238
loss: 0.9868662357330322,grad_norm: 0.9999990435928393, iteration: 238239
loss: 1.0053309202194214,grad_norm: 0.9999997321299717, iteration: 238240
loss: 1.0340389013290405,grad_norm: 0.9999991377644536, iteration: 238241
loss: 1.057965874671936,grad_norm: 0.9999991980482589, iteration: 238242
loss: 0.9896030426025391,grad_norm: 0.9905925874175793, iteration: 238243
loss: 1.0036429166793823,grad_norm: 0.9473731302458892, iteration: 238244
loss: 1.0035691261291504,grad_norm: 0.8256567319964637, iteration: 238245
loss: 0.9643240571022034,grad_norm: 0.8512556937872449, iteration: 238246
loss: 1.0294625759124756,grad_norm: 0.8442205911334865, iteration: 238247
loss: 1.0413841009140015,grad_norm: 0.96743516718411, iteration: 238248
loss: 1.014045238494873,grad_norm: 0.9220041391165148, iteration: 238249
loss: 0.9853085279464722,grad_norm: 0.9092919719807886, iteration: 238250
loss: 0.9987486600875854,grad_norm: 0.8649006638421284, iteration: 238251
loss: 0.9977869391441345,grad_norm: 0.9999989827193643, iteration: 238252
loss: 1.0043991804122925,grad_norm: 0.9999991188572652, iteration: 238253
loss: 1.0351924896240234,grad_norm: 0.9999991584085214, iteration: 238254
loss: 1.0300261974334717,grad_norm: 0.8016528019116085, iteration: 238255
loss: 0.9936322569847107,grad_norm: 0.859439916854916, iteration: 238256
loss: 0.9931086897850037,grad_norm: 0.7550650966538415, iteration: 238257
loss: 1.0413419008255005,grad_norm: 0.9687947282024071, iteration: 238258
loss: 1.016865849494934,grad_norm: 0.985531593247652, iteration: 238259
loss: 1.0124238729476929,grad_norm: 0.9518694571987406, iteration: 238260
loss: 0.9870618581771851,grad_norm: 0.9999993722077539, iteration: 238261
loss: 0.960181474685669,grad_norm: 0.9138068259185977, iteration: 238262
loss: 1.0059268474578857,grad_norm: 0.8018945528005496, iteration: 238263
loss: 0.9990109205245972,grad_norm: 0.9999991808831382, iteration: 238264
loss: 1.0144884586334229,grad_norm: 0.7679588052236137, iteration: 238265
loss: 0.9867101311683655,grad_norm: 0.8754064908341523, iteration: 238266
loss: 0.967302143573761,grad_norm: 0.9999989586029706, iteration: 238267
loss: 0.9567140340805054,grad_norm: 0.8453071739194273, iteration: 238268
loss: 0.9796456098556519,grad_norm: 0.9999990086657901, iteration: 238269
loss: 1.019199252128601,grad_norm: 0.8643034506972168, iteration: 238270
loss: 0.9959487318992615,grad_norm: 0.8417111691414577, iteration: 238271
loss: 1.0148924589157104,grad_norm: 0.8741926856903905, iteration: 238272
loss: 0.9998090863227844,grad_norm: 0.9588804744210307, iteration: 238273
loss: 0.9961740970611572,grad_norm: 0.9260606632511179, iteration: 238274
loss: 1.0751947164535522,grad_norm: 0.9999990808902248, iteration: 238275
loss: 0.9745709896087646,grad_norm: 0.884602599167715, iteration: 238276
loss: 1.0476406812667847,grad_norm: 0.8077767967314163, iteration: 238277
loss: 1.0152338743209839,grad_norm: 0.9107887006675366, iteration: 238278
loss: 0.9968515634536743,grad_norm: 0.8370480994672084, iteration: 238279
loss: 0.9803587794303894,grad_norm: 0.999999102710824, iteration: 238280
loss: 1.09926438331604,grad_norm: 0.9999991587342203, iteration: 238281
loss: 1.0573289394378662,grad_norm: 0.9900779712700796, iteration: 238282
loss: 0.9489099383354187,grad_norm: 0.902546350436765, iteration: 238283
loss: 0.9517635107040405,grad_norm: 0.7468038298824261, iteration: 238284
loss: 0.9777904152870178,grad_norm: 0.7865945534377964, iteration: 238285
loss: 0.9831268191337585,grad_norm: 0.8193298472395368, iteration: 238286
loss: 1.030396819114685,grad_norm: 0.9999990944742784, iteration: 238287
loss: 0.9968560338020325,grad_norm: 0.8402489316885724, iteration: 238288
loss: 0.9899114370346069,grad_norm: 0.8322594267916016, iteration: 238289
loss: 0.9939707517623901,grad_norm: 0.9999991587100965, iteration: 238290
loss: 0.9562506675720215,grad_norm: 0.9984651157138302, iteration: 238291
loss: 0.9994814991950989,grad_norm: 0.9959387034081902, iteration: 238292
loss: 0.9860423803329468,grad_norm: 0.9338719071899169, iteration: 238293
loss: 1.0129188299179077,grad_norm: 0.9999993386388073, iteration: 238294
loss: 1.0304476022720337,grad_norm: 0.9019341005322259, iteration: 238295
loss: 0.9994779825210571,grad_norm: 0.7510158215704664, iteration: 238296
loss: 0.9768868088722229,grad_norm: 0.7952067372262142, iteration: 238297
loss: 1.014075517654419,grad_norm: 0.9999992067061786, iteration: 238298
loss: 0.9523453712463379,grad_norm: 0.9799487666049616, iteration: 238299
loss: 1.021024465560913,grad_norm: 0.9406222636730065, iteration: 238300
loss: 1.0170351266860962,grad_norm: 0.9999992031062176, iteration: 238301
loss: 0.982052743434906,grad_norm: 0.999999102709959, iteration: 238302
loss: 1.0382795333862305,grad_norm: 0.9599103844430528, iteration: 238303
loss: 1.115103006362915,grad_norm: 0.9999990799897528, iteration: 238304
loss: 0.9923029541969299,grad_norm: 0.9206490010951646, iteration: 238305
loss: 1.0553503036499023,grad_norm: 0.9999991905723298, iteration: 238306
loss: 0.9897615313529968,grad_norm: 0.9793912000538515, iteration: 238307
loss: 1.0021225214004517,grad_norm: 0.8422165242985477, iteration: 238308
loss: 0.9973897933959961,grad_norm: 0.9722430287103736, iteration: 238309
loss: 0.9921449422836304,grad_norm: 0.8358886103824305, iteration: 238310
loss: 1.0160666704177856,grad_norm: 0.9029775606698986, iteration: 238311
loss: 0.9937676787376404,grad_norm: 0.8355568140470794, iteration: 238312
loss: 1.0591083765029907,grad_norm: 0.9999989673320192, iteration: 238313
loss: 1.0122876167297363,grad_norm: 0.8294257204887536, iteration: 238314
loss: 0.9992998838424683,grad_norm: 0.8033866467323965, iteration: 238315
loss: 0.9571762084960938,grad_norm: 0.9647303668915679, iteration: 238316
loss: 1.0030821561813354,grad_norm: 0.9999991272553845, iteration: 238317
loss: 0.9945216178894043,grad_norm: 0.7960192413126265, iteration: 238318
loss: 1.0644298791885376,grad_norm: 0.994605413733513, iteration: 238319
loss: 1.0057287216186523,grad_norm: 0.9999991182862883, iteration: 238320
loss: 1.0103347301483154,grad_norm: 0.7487706064530038, iteration: 238321
loss: 0.9925013184547424,grad_norm: 0.9112998583625779, iteration: 238322
loss: 0.9818669557571411,grad_norm: 0.9821413555241857, iteration: 238323
loss: 0.9936410784721375,grad_norm: 0.8295773799449263, iteration: 238324
loss: 1.0318948030471802,grad_norm: 0.8948232235847484, iteration: 238325
loss: 1.0009573698043823,grad_norm: 0.9306661049164137, iteration: 238326
loss: 1.0004428625106812,grad_norm: 0.8969131836722264, iteration: 238327
loss: 0.9973798394203186,grad_norm: 0.896008223718155, iteration: 238328
loss: 1.0109161138534546,grad_norm: 0.9999992040190036, iteration: 238329
loss: 1.013736367225647,grad_norm: 0.989250868652104, iteration: 238330
loss: 0.988987147808075,grad_norm: 0.9999990815090156, iteration: 238331
loss: 0.9923135042190552,grad_norm: 0.9337595364332107, iteration: 238332
loss: 1.0218431949615479,grad_norm: 0.9824307117368185, iteration: 238333
loss: 0.9912938475608826,grad_norm: 0.8503023326755548, iteration: 238334
loss: 1.01071035861969,grad_norm: 0.9697439616240895, iteration: 238335
loss: 0.9594714641571045,grad_norm: 0.8192162207880893, iteration: 238336
loss: 0.9764646291732788,grad_norm: 0.923835018638563, iteration: 238337
loss: 0.9900871515274048,grad_norm: 0.9096196116759699, iteration: 238338
loss: 1.0253995656967163,grad_norm: 0.8610298783600864, iteration: 238339
loss: 1.0070266723632812,grad_norm: 0.8097690048652585, iteration: 238340
loss: 0.9742876291275024,grad_norm: 0.9750344781484171, iteration: 238341
loss: 0.9861087799072266,grad_norm: 0.8812736822998747, iteration: 238342
loss: 1.0354437828063965,grad_norm: 0.9999998487891675, iteration: 238343
loss: 0.9875955581665039,grad_norm: 0.8356526447429952, iteration: 238344
loss: 1.0047951936721802,grad_norm: 0.910180804784979, iteration: 238345
loss: 0.9781903028488159,grad_norm: 0.9190705610732104, iteration: 238346
loss: 1.0018978118896484,grad_norm: 0.7604964567702639, iteration: 238347
loss: 1.0319715738296509,grad_norm: 0.9999989417004269, iteration: 238348
loss: 0.9883769750595093,grad_norm: 0.9999991549738305, iteration: 238349
loss: 0.9716817140579224,grad_norm: 0.9999990177685554, iteration: 238350
loss: 1.0222017765045166,grad_norm: 0.9999990581083547, iteration: 238351
loss: 0.9522173404693604,grad_norm: 0.9843294113217277, iteration: 238352
loss: 0.9921517372131348,grad_norm: 0.9999991492064386, iteration: 238353
loss: 0.9965047240257263,grad_norm: 0.9819811654333688, iteration: 238354
loss: 0.9479227066040039,grad_norm: 0.9723340138220826, iteration: 238355
loss: 0.9903814792633057,grad_norm: 0.9715912918942505, iteration: 238356
loss: 1.0003398656845093,grad_norm: 0.9999990783901266, iteration: 238357
loss: 1.0421795845031738,grad_norm: 0.9999992777304436, iteration: 238358
loss: 1.026119589805603,grad_norm: 0.9638059200671054, iteration: 238359
loss: 0.9514861702919006,grad_norm: 0.9999998103504729, iteration: 238360
loss: 0.9871589541435242,grad_norm: 0.9882794469055477, iteration: 238361
loss: 0.961147129535675,grad_norm: 0.9999991222342193, iteration: 238362
loss: 0.9885128736495972,grad_norm: 0.9999991041981662, iteration: 238363
loss: 0.9837954044342041,grad_norm: 0.9015443925417447, iteration: 238364
loss: 1.060620903968811,grad_norm: 0.9999995752567568, iteration: 238365
loss: 1.028194546699524,grad_norm: 0.9999991490294224, iteration: 238366
loss: 1.0159685611724854,grad_norm: 0.9737252948328138, iteration: 238367
loss: 0.99265456199646,grad_norm: 0.9340821301729622, iteration: 238368
loss: 1.0045512914657593,grad_norm: 0.8253609902227983, iteration: 238369
loss: 0.997877836227417,grad_norm: 0.8568534553088888, iteration: 238370
loss: 0.9972272515296936,grad_norm: 0.9999991483608032, iteration: 238371
loss: 0.9872966408729553,grad_norm: 0.8265209771668919, iteration: 238372
loss: 1.0026564598083496,grad_norm: 0.9796350520784398, iteration: 238373
loss: 0.9815773963928223,grad_norm: 0.8756699358770175, iteration: 238374
loss: 0.9727954864501953,grad_norm: 0.9999989978203793, iteration: 238375
loss: 1.016111135482788,grad_norm: 0.9655548719274503, iteration: 238376
loss: 0.9905260801315308,grad_norm: 0.8257258431693688, iteration: 238377
loss: 0.9608081579208374,grad_norm: 0.8580192238929165, iteration: 238378
loss: 0.9794446229934692,grad_norm: 0.8453270545678979, iteration: 238379
loss: 1.0388127565383911,grad_norm: 0.9630791885649276, iteration: 238380
loss: 0.9855323433876038,grad_norm: 0.8152804841326033, iteration: 238381
loss: 1.024868130683899,grad_norm: 0.9999991638673492, iteration: 238382
loss: 1.0254215002059937,grad_norm: 0.8447218579650377, iteration: 238383
loss: 1.001403570175171,grad_norm: 0.9675890984300326, iteration: 238384
loss: 1.0323641300201416,grad_norm: 0.8791582050558829, iteration: 238385
loss: 1.0107282400131226,grad_norm: 0.8161399024560084, iteration: 238386
loss: 0.9800344109535217,grad_norm: 0.7752817899153992, iteration: 238387
loss: 1.013973593711853,grad_norm: 0.8257610721736994, iteration: 238388
loss: 1.0041091442108154,grad_norm: 0.9688386787332762, iteration: 238389
loss: 0.9832581281661987,grad_norm: 0.8299192453993349, iteration: 238390
loss: 0.9822523593902588,grad_norm: 0.9305411976171889, iteration: 238391
loss: 1.0119154453277588,grad_norm: 0.9999990095584493, iteration: 238392
loss: 1.0385866165161133,grad_norm: 0.9999990833544393, iteration: 238393
loss: 0.9882698059082031,grad_norm: 0.9352183426801077, iteration: 238394
loss: 0.9901532530784607,grad_norm: 0.8143556744992378, iteration: 238395
loss: 0.9804216623306274,grad_norm: 0.8860987843923818, iteration: 238396
loss: 0.9837993383407593,grad_norm: 0.9123915326611653, iteration: 238397
loss: 1.0096646547317505,grad_norm: 0.9351715899303877, iteration: 238398
loss: 1.0404127836227417,grad_norm: 0.9854796011757325, iteration: 238399
loss: 1.0259393453598022,grad_norm: 0.9999992763054547, iteration: 238400
loss: 1.1092983484268188,grad_norm: 0.9999991966398716, iteration: 238401
loss: 1.136735439300537,grad_norm: 0.9999991151244645, iteration: 238402
loss: 0.9473605155944824,grad_norm: 0.9552988493231698, iteration: 238403
loss: 0.9908777475357056,grad_norm: 0.9999991899388226, iteration: 238404
loss: 0.9935579299926758,grad_norm: 0.7964317961198906, iteration: 238405
loss: 0.9870848655700684,grad_norm: 0.8559780394897158, iteration: 238406
loss: 1.0153015851974487,grad_norm: 0.8973209270749177, iteration: 238407
loss: 0.9996621012687683,grad_norm: 0.8560288510367032, iteration: 238408
loss: 1.005359172821045,grad_norm: 0.9999991479771718, iteration: 238409
loss: 1.0060179233551025,grad_norm: 0.7825078706632922, iteration: 238410
loss: 1.051633358001709,grad_norm: 0.9553622803044632, iteration: 238411
loss: 0.9709739089012146,grad_norm: 0.9793467833374444, iteration: 238412
loss: 1.024362325668335,grad_norm: 0.9999991757351016, iteration: 238413
loss: 0.9860999584197998,grad_norm: 0.9332934983234179, iteration: 238414
loss: 1.0227065086364746,grad_norm: 0.993695449888884, iteration: 238415
loss: 0.9819402694702148,grad_norm: 0.8907930307303704, iteration: 238416
loss: 1.0034630298614502,grad_norm: 0.9968046723562478, iteration: 238417
loss: 1.0118749141693115,grad_norm: 0.9569481503598767, iteration: 238418
loss: 1.0010932683944702,grad_norm: 0.8382081214399683, iteration: 238419
loss: 0.9722287654876709,grad_norm: 0.9999991841364794, iteration: 238420
loss: 1.0189332962036133,grad_norm: 1.0000000726650526, iteration: 238421
loss: 1.0350067615509033,grad_norm: 0.9805879358153013, iteration: 238422
loss: 1.0333563089370728,grad_norm: 0.999999375211843, iteration: 238423
loss: 0.9932457208633423,grad_norm: 0.9059832977222188, iteration: 238424
loss: 1.0076321363449097,grad_norm: 0.8171224694735919, iteration: 238425
loss: 1.0254276990890503,grad_norm: 0.9392671509328389, iteration: 238426
loss: 0.9776716232299805,grad_norm: 0.9999997911048154, iteration: 238427
loss: 0.9887503981590271,grad_norm: 0.8761728016062641, iteration: 238428
loss: 0.9604315161705017,grad_norm: 0.8332448977213804, iteration: 238429
loss: 0.9781988263130188,grad_norm: 0.8451683485943204, iteration: 238430
loss: 0.999489963054657,grad_norm: 0.8775197221121406, iteration: 238431
loss: 0.9969456791877747,grad_norm: 0.8627886897115283, iteration: 238432
loss: 1.0116777420043945,grad_norm: 0.8922724177082998, iteration: 238433
loss: 0.9661363959312439,grad_norm: 0.9999990313033874, iteration: 238434
loss: 0.9872053265571594,grad_norm: 0.9462258937543019, iteration: 238435
loss: 1.0532540082931519,grad_norm: 0.9999991068833145, iteration: 238436
loss: 0.9824551343917847,grad_norm: 0.9756643695043983, iteration: 238437
loss: 1.005212426185608,grad_norm: 0.8458627993654582, iteration: 238438
loss: 1.026705026626587,grad_norm: 0.815164298119077, iteration: 238439
loss: 0.9866477251052856,grad_norm: 0.8563678305460641, iteration: 238440
loss: 0.9606805443763733,grad_norm: 0.8690056585521145, iteration: 238441
loss: 0.9903995394706726,grad_norm: 0.9467629730801554, iteration: 238442
loss: 1.0041028261184692,grad_norm: 0.8151327706208092, iteration: 238443
loss: 0.980863630771637,grad_norm: 0.8015044631320264, iteration: 238444
loss: 0.9785046577453613,grad_norm: 0.7712926773413336, iteration: 238445
loss: 0.9604594707489014,grad_norm: 0.9999990863691318, iteration: 238446
loss: 1.0246665477752686,grad_norm: 0.8121248765998387, iteration: 238447
loss: 1.005163311958313,grad_norm: 0.9278054347608945, iteration: 238448
loss: 1.0332835912704468,grad_norm: 0.9890957823657328, iteration: 238449
loss: 1.0098810195922852,grad_norm: 0.9647667118257579, iteration: 238450
loss: 0.9944185614585876,grad_norm: 0.8873974904675096, iteration: 238451
loss: 0.9933069348335266,grad_norm: 0.8205541976232276, iteration: 238452
loss: 1.021591067314148,grad_norm: 0.8469851406801969, iteration: 238453
loss: 1.0005595684051514,grad_norm: 0.7291471128788898, iteration: 238454
loss: 1.017849326133728,grad_norm: 0.9860437009435338, iteration: 238455
loss: 1.0072076320648193,grad_norm: 0.8944459304717322, iteration: 238456
loss: 0.9924638867378235,grad_norm: 0.9999990428493563, iteration: 238457
loss: 1.0792267322540283,grad_norm: 0.9999992187302145, iteration: 238458
loss: 0.9961910843849182,grad_norm: 0.9802967631708356, iteration: 238459
loss: 0.9741665124893188,grad_norm: 0.9446994637793763, iteration: 238460
loss: 1.0122584104537964,grad_norm: 0.9811995008933135, iteration: 238461
loss: 1.0286338329315186,grad_norm: 0.9828696060902539, iteration: 238462
loss: 0.9734113216400146,grad_norm: 0.8416154466261399, iteration: 238463
loss: 0.9779788255691528,grad_norm: 0.8412502160297202, iteration: 238464
loss: 1.0004314184188843,grad_norm: 0.8432366740940914, iteration: 238465
loss: 0.9982361793518066,grad_norm: 0.90549692994185, iteration: 238466
loss: 0.9978932738304138,grad_norm: 0.9999990024209205, iteration: 238467
loss: 0.981550931930542,grad_norm: 0.8216049456520019, iteration: 238468
loss: 1.0046192407608032,grad_norm: 0.7511141525008046, iteration: 238469
loss: 0.9870522618293762,grad_norm: 0.8931833148720891, iteration: 238470
loss: 0.9712651371955872,grad_norm: 0.9927355311499245, iteration: 238471
loss: 1.012151837348938,grad_norm: 0.9006452383495563, iteration: 238472
loss: 1.0804044008255005,grad_norm: 0.9999998999849812, iteration: 238473
loss: 1.0059294700622559,grad_norm: 0.9999992659889464, iteration: 238474
loss: 1.0488828420639038,grad_norm: 0.887246072584856, iteration: 238475
loss: 0.9866803884506226,grad_norm: 0.8454181411134031, iteration: 238476
loss: 0.9897456765174866,grad_norm: 0.9442358271514231, iteration: 238477
loss: 0.9918073415756226,grad_norm: 0.8787113675832555, iteration: 238478
loss: 1.041788935661316,grad_norm: 0.9244014689681371, iteration: 238479
loss: 1.0108426809310913,grad_norm: 0.9999988194559606, iteration: 238480
loss: 0.9815843105316162,grad_norm: 0.8971618257984009, iteration: 238481
loss: 1.02912437915802,grad_norm: 0.8961781232644159, iteration: 238482
loss: 0.9876428246498108,grad_norm: 0.9310809945534494, iteration: 238483
loss: 1.0290874242782593,grad_norm: 0.9999990526489715, iteration: 238484
loss: 0.9953047633171082,grad_norm: 0.8192352922341064, iteration: 238485
loss: 0.983579695224762,grad_norm: 0.8477471850996675, iteration: 238486
loss: 1.01698899269104,grad_norm: 0.9999991301525999, iteration: 238487
loss: 0.9868924617767334,grad_norm: 0.9999998626785535, iteration: 238488
loss: 1.027530312538147,grad_norm: 0.9999994010433115, iteration: 238489
loss: 1.007150650024414,grad_norm: 0.8772517650607683, iteration: 238490
loss: 0.9939571619033813,grad_norm: 0.8495713758479286, iteration: 238491
loss: 0.9889671802520752,grad_norm: 0.8717504781325598, iteration: 238492
loss: 1.0562629699707031,grad_norm: 0.9999991907786348, iteration: 238493
loss: 0.9985917210578918,grad_norm: 0.9999991072489476, iteration: 238494
loss: 1.0015919208526611,grad_norm: 0.86571381157327, iteration: 238495
loss: 1.0028762817382812,grad_norm: 0.8868783494999959, iteration: 238496
loss: 1.0080506801605225,grad_norm: 0.9999992160346065, iteration: 238497
loss: 0.9820725321769714,grad_norm: 0.9999991298750555, iteration: 238498
loss: 1.0252488851547241,grad_norm: 0.999999008827966, iteration: 238499
loss: 1.0457923412322998,grad_norm: 0.9999991097204888, iteration: 238500
loss: 0.990300178527832,grad_norm: 0.8103902096848716, iteration: 238501
loss: 1.0065338611602783,grad_norm: 0.9999989719666581, iteration: 238502
loss: 0.9665043950080872,grad_norm: 0.9945113597776494, iteration: 238503
loss: 1.0204209089279175,grad_norm: 0.9007411979101181, iteration: 238504
loss: 0.9917972087860107,grad_norm: 0.9798587278716278, iteration: 238505
loss: 1.039366364479065,grad_norm: 0.7823515825232015, iteration: 238506
loss: 1.0121475458145142,grad_norm: 0.9999990465905512, iteration: 238507
loss: 1.0103951692581177,grad_norm: 0.9695979421617273, iteration: 238508
loss: 1.030371904373169,grad_norm: 0.9999993585091433, iteration: 238509
loss: 0.980968713760376,grad_norm: 0.8337096387519927, iteration: 238510
loss: 1.0116324424743652,grad_norm: 0.8745957752187336, iteration: 238511
loss: 1.0003693103790283,grad_norm: 0.7586417535246995, iteration: 238512
loss: 1.0174161195755005,grad_norm: 0.8435538283088792, iteration: 238513
loss: 0.9595177173614502,grad_norm: 0.8315089359416412, iteration: 238514
loss: 1.0047791004180908,grad_norm: 0.8653996363448004, iteration: 238515
loss: 1.0024089813232422,grad_norm: 0.8055664744004507, iteration: 238516
loss: 1.0199168920516968,grad_norm: 0.9999990706782002, iteration: 238517
loss: 1.028190016746521,grad_norm: 0.9999991043891461, iteration: 238518
loss: 1.0047030448913574,grad_norm: 0.8701005813883639, iteration: 238519
loss: 1.0276155471801758,grad_norm: 0.9999994995267252, iteration: 238520
loss: 1.0268858671188354,grad_norm: 0.9594433989979233, iteration: 238521
loss: 1.0325340032577515,grad_norm: 0.9999991351374343, iteration: 238522
loss: 1.0192347764968872,grad_norm: 0.9577442103576095, iteration: 238523
loss: 1.0141850709915161,grad_norm: 0.8671707635901565, iteration: 238524
loss: 0.9999672770500183,grad_norm: 0.9907678891164619, iteration: 238525
loss: 0.9590256810188293,grad_norm: 0.7881603634517886, iteration: 238526
loss: 1.0338947772979736,grad_norm: 0.9850933682984501, iteration: 238527
loss: 0.962969183921814,grad_norm: 0.9625915626470034, iteration: 238528
loss: 0.9972931742668152,grad_norm: 0.8662501942316306, iteration: 238529
loss: 0.9984593987464905,grad_norm: 0.8261661140467637, iteration: 238530
loss: 1.0105242729187012,grad_norm: 0.7192360614099246, iteration: 238531
loss: 1.0030112266540527,grad_norm: 0.8147687853069715, iteration: 238532
loss: 0.9876444935798645,grad_norm: 0.9445015971602065, iteration: 238533
loss: 1.1140801906585693,grad_norm: 1.0000000453998537, iteration: 238534
loss: 1.0150071382522583,grad_norm: 0.869947152477207, iteration: 238535
loss: 0.9983147382736206,grad_norm: 0.8110393462482012, iteration: 238536
loss: 1.0161960124969482,grad_norm: 0.9999998510324495, iteration: 238537
loss: 0.973074197769165,grad_norm: 0.8890938785597179, iteration: 238538
loss: 0.996373176574707,grad_norm: 0.9999996316731601, iteration: 238539
loss: 0.9960422515869141,grad_norm: 0.7570350031537186, iteration: 238540
loss: 0.9760929346084595,grad_norm: 0.901242319889365, iteration: 238541
loss: 0.9770716428756714,grad_norm: 0.8765785708018565, iteration: 238542
loss: 0.9817306995391846,grad_norm: 0.9999991892663405, iteration: 238543
loss: 1.0010039806365967,grad_norm: 0.8293599100338273, iteration: 238544
loss: 0.9423377513885498,grad_norm: 0.9999988584267514, iteration: 238545
loss: 0.9949169158935547,grad_norm: 0.8543845776383328, iteration: 238546
loss: 1.0044761896133423,grad_norm: 0.9028427243651793, iteration: 238547
loss: 1.0164737701416016,grad_norm: 0.8329991350304075, iteration: 238548
loss: 1.0304937362670898,grad_norm: 0.9865031713560987, iteration: 238549
loss: 0.994895875453949,grad_norm: 0.7899355368878644, iteration: 238550
loss: 0.9786320924758911,grad_norm: 0.7886155036588223, iteration: 238551
loss: 0.9879920482635498,grad_norm: 0.9058427614289233, iteration: 238552
loss: 1.0277161598205566,grad_norm: 0.8569482080575648, iteration: 238553
loss: 1.0152127742767334,grad_norm: 0.8282521570003639, iteration: 238554
loss: 0.9727677702903748,grad_norm: 0.904462510613602, iteration: 238555
loss: 1.001914381980896,grad_norm: 0.7854479643534671, iteration: 238556
loss: 1.019436240196228,grad_norm: 0.8820580637563461, iteration: 238557
loss: 1.0463370084762573,grad_norm: 0.9999995224691182, iteration: 238558
loss: 1.035402536392212,grad_norm: 0.8782074771510321, iteration: 238559
loss: 1.0231860876083374,grad_norm: 0.9999990387342778, iteration: 238560
loss: 0.9862280488014221,grad_norm: 0.7785081017082328, iteration: 238561
loss: 1.0033882856369019,grad_norm: 0.9940557418641821, iteration: 238562
loss: 1.0227597951889038,grad_norm: 0.9705232940515236, iteration: 238563
loss: 1.0184316635131836,grad_norm: 0.7944265064661193, iteration: 238564
loss: 1.031937837600708,grad_norm: 0.9999996572311273, iteration: 238565
loss: 0.9844953417778015,grad_norm: 0.9999992455393689, iteration: 238566
loss: 0.9928807616233826,grad_norm: 0.9598429166607132, iteration: 238567
loss: 1.0149784088134766,grad_norm: 0.871366796499982, iteration: 238568
loss: 0.9766718149185181,grad_norm: 0.8782661201002506, iteration: 238569
loss: 1.010655403137207,grad_norm: 0.960625305822127, iteration: 238570
loss: 0.9871057271957397,grad_norm: 0.9700628676112596, iteration: 238571
loss: 0.9740681648254395,grad_norm: 0.9999998946483108, iteration: 238572
loss: 1.0353565216064453,grad_norm: 0.9360138023202322, iteration: 238573
loss: 0.9453003406524658,grad_norm: 0.8639691248327065, iteration: 238574
loss: 1.0116990804672241,grad_norm: 0.7986915669730581, iteration: 238575
loss: 0.9784128665924072,grad_norm: 0.9166328714467343, iteration: 238576
loss: 1.0255217552185059,grad_norm: 0.9999992932401369, iteration: 238577
loss: 0.975745677947998,grad_norm: 0.9582856428360174, iteration: 238578
loss: 0.9943476319313049,grad_norm: 0.944548046200612, iteration: 238579
loss: 1.0009915828704834,grad_norm: 0.912740669275321, iteration: 238580
loss: 1.0370440483093262,grad_norm: 0.9726910038366745, iteration: 238581
loss: 0.9860898852348328,grad_norm: 0.7975627996298204, iteration: 238582
loss: 0.9946650266647339,grad_norm: 0.9999991239951457, iteration: 238583
loss: 0.991807758808136,grad_norm: 0.999998985229441, iteration: 238584
loss: 0.993062436580658,grad_norm: 0.9457676861742521, iteration: 238585
loss: 0.9923610687255859,grad_norm: 0.8739069827123284, iteration: 238586
loss: 1.0125924348831177,grad_norm: 0.9067876747408623, iteration: 238587
loss: 0.9939706325531006,grad_norm: 0.9638700228904808, iteration: 238588
loss: 0.9970594048500061,grad_norm: 0.9999990436603573, iteration: 238589
loss: 0.9803948402404785,grad_norm: 0.7908878920564829, iteration: 238590
loss: 1.0525579452514648,grad_norm: 0.9999990598634809, iteration: 238591
loss: 1.029761552810669,grad_norm: 0.9999994830837747, iteration: 238592
loss: 0.9868647456169128,grad_norm: 0.9543381151051045, iteration: 238593
loss: 1.0042123794555664,grad_norm: 0.8144207677783653, iteration: 238594
loss: 0.9462928175926208,grad_norm: 0.8605206756760521, iteration: 238595
loss: 0.991052508354187,grad_norm: 0.82318014673417, iteration: 238596
loss: 0.9966509938240051,grad_norm: 0.9166528564199895, iteration: 238597
loss: 0.9385634064674377,grad_norm: 0.8700083134161024, iteration: 238598
loss: 1.0568342208862305,grad_norm: 0.8612189915655584, iteration: 238599
loss: 1.011347770690918,grad_norm: 0.8736319148843767, iteration: 238600
loss: 0.9667558073997498,grad_norm: 0.969746440923449, iteration: 238601
loss: 1.0296388864517212,grad_norm: 0.9999993924323909, iteration: 238602
loss: 0.9990337491035461,grad_norm: 0.9999990890072099, iteration: 238603
loss: 1.002135157585144,grad_norm: 0.8548624120469978, iteration: 238604
loss: 0.9602162837982178,grad_norm: 0.96099527956261, iteration: 238605
loss: 1.0838721990585327,grad_norm: 0.9999990223828933, iteration: 238606
loss: 1.017641305923462,grad_norm: 0.9258788115110137, iteration: 238607
loss: 0.9845160841941833,grad_norm: 0.872564344509026, iteration: 238608
loss: 0.9985995888710022,grad_norm: 0.9035652185283173, iteration: 238609
loss: 0.9986676573753357,grad_norm: 0.9029521641968872, iteration: 238610
loss: 1.0273188352584839,grad_norm: 0.9999994949402156, iteration: 238611
loss: 0.972066342830658,grad_norm: 0.9286300724363363, iteration: 238612
loss: 1.0041662454605103,grad_norm: 0.9350300779140632, iteration: 238613
loss: 0.9564366936683655,grad_norm: 0.7077987560907183, iteration: 238614
loss: 0.9795237183570862,grad_norm: 0.8708669067922424, iteration: 238615
loss: 0.9446322321891785,grad_norm: 0.9999991820058924, iteration: 238616
loss: 1.003099799156189,grad_norm: 0.8999470868836715, iteration: 238617
loss: 1.0880677700042725,grad_norm: 0.9626358810300104, iteration: 238618
loss: 1.014191746711731,grad_norm: 0.9999991426648566, iteration: 238619
loss: 1.0918715000152588,grad_norm: 0.9999995356092715, iteration: 238620
loss: 1.0082659721374512,grad_norm: 0.8998668846217055, iteration: 238621
loss: 1.0388206243515015,grad_norm: 0.9007953246994608, iteration: 238622
loss: 0.9932393431663513,grad_norm: 0.8142907319966803, iteration: 238623
loss: 0.9743524789810181,grad_norm: 0.7984153401043679, iteration: 238624
loss: 1.0140899419784546,grad_norm: 0.723756907455736, iteration: 238625
loss: 0.988410472869873,grad_norm: 0.8787172838586973, iteration: 238626
loss: 0.992495596408844,grad_norm: 0.9595490329940283, iteration: 238627
loss: 1.0012000799179077,grad_norm: 0.9999992117658866, iteration: 238628
loss: 0.9677027463912964,grad_norm: 0.964620543307217, iteration: 238629
loss: 1.014387845993042,grad_norm: 0.8355661366246542, iteration: 238630
loss: 0.9834833741188049,grad_norm: 0.8556781090083407, iteration: 238631
loss: 0.9835116267204285,grad_norm: 0.8130291769160605, iteration: 238632
loss: 1.0354427099227905,grad_norm: 0.9243719906915111, iteration: 238633
loss: 0.9914029240608215,grad_norm: 0.9664154585195374, iteration: 238634
loss: 1.0146229267120361,grad_norm: 0.8517061488135946, iteration: 238635
loss: 1.0013467073440552,grad_norm: 0.9700884771849032, iteration: 238636
loss: 0.9970449805259705,grad_norm: 0.8830746114525186, iteration: 238637
loss: 0.9772195219993591,grad_norm: 0.97316698317939, iteration: 238638
loss: 0.9948391318321228,grad_norm: 0.7223880918440472, iteration: 238639
loss: 0.9693179130554199,grad_norm: 0.9999990940272725, iteration: 238640
loss: 1.0187139511108398,grad_norm: 0.954262685533366, iteration: 238641
loss: 1.005139946937561,grad_norm: 0.9664015449453742, iteration: 238642
loss: 1.017600178718567,grad_norm: 0.8746242273899816, iteration: 238643
loss: 0.9616476893424988,grad_norm: 0.9999989950607611, iteration: 238644
loss: 1.0208944082260132,grad_norm: 0.8310929072542584, iteration: 238645
loss: 0.9766173958778381,grad_norm: 0.8839647697925207, iteration: 238646
loss: 0.9628684520721436,grad_norm: 0.7973157044245391, iteration: 238647
loss: 1.0187939405441284,grad_norm: 0.9999989677373865, iteration: 238648
loss: 0.9716137051582336,grad_norm: 0.9558310745443382, iteration: 238649
loss: 0.9915928840637207,grad_norm: 0.9015513675464102, iteration: 238650
loss: 1.0581351518630981,grad_norm: 0.8766215016121708, iteration: 238651
loss: 1.0026706457138062,grad_norm: 0.8302629123829409, iteration: 238652
loss: 0.978670060634613,grad_norm: 0.999999050962675, iteration: 238653
loss: 0.9913848042488098,grad_norm: 0.9513502139155112, iteration: 238654
loss: 1.0594704151153564,grad_norm: 0.8300521611341343, iteration: 238655
loss: 0.9935625195503235,grad_norm: 0.9897540087119356, iteration: 238656
loss: 1.0119810104370117,grad_norm: 0.8997541253264645, iteration: 238657
loss: 1.047533392906189,grad_norm: 0.9999992894566393, iteration: 238658
loss: 0.9844436645507812,grad_norm: 0.8761785955608065, iteration: 238659
loss: 0.9768571257591248,grad_norm: 0.9999990514612616, iteration: 238660
loss: 1.0198309421539307,grad_norm: 0.9603950685647171, iteration: 238661
loss: 1.0128240585327148,grad_norm: 0.8262028832501854, iteration: 238662
loss: 1.0426098108291626,grad_norm: 0.8383413019647302, iteration: 238663
loss: 0.989666759967804,grad_norm: 0.8600678212045845, iteration: 238664
loss: 0.9603069424629211,grad_norm: 0.9249018856884835, iteration: 238665
loss: 1.0126887559890747,grad_norm: 0.8858942506413684, iteration: 238666
loss: 0.986542284488678,grad_norm: 0.9999990126457583, iteration: 238667
loss: 1.015613317489624,grad_norm: 0.9999991003952067, iteration: 238668
loss: 0.9826270341873169,grad_norm: 0.8903871357943558, iteration: 238669
loss: 1.0163135528564453,grad_norm: 0.9596107905174279, iteration: 238670
loss: 1.0223650932312012,grad_norm: 0.9999989094097788, iteration: 238671
loss: 1.0145132541656494,grad_norm: 0.9488006171512896, iteration: 238672
loss: 0.9798734188079834,grad_norm: 0.8960314981024269, iteration: 238673
loss: 0.9539696574211121,grad_norm: 0.8937652061223758, iteration: 238674
loss: 1.0154335498809814,grad_norm: 0.9999991210879315, iteration: 238675
loss: 1.0418298244476318,grad_norm: 0.9999989932301802, iteration: 238676
loss: 1.0361781120300293,grad_norm: 0.9999991269548731, iteration: 238677
loss: 1.0046534538269043,grad_norm: 0.9100997232391843, iteration: 238678
loss: 0.9856456518173218,grad_norm: 0.8726887008634004, iteration: 238679
loss: 0.9800345301628113,grad_norm: 0.9999990275954521, iteration: 238680
loss: 1.0339033603668213,grad_norm: 0.9204457841954923, iteration: 238681
loss: 1.021335244178772,grad_norm: 0.7944455421850565, iteration: 238682
loss: 0.9888064861297607,grad_norm: 0.895883761506499, iteration: 238683
loss: 0.967301607131958,grad_norm: 0.8334377208550467, iteration: 238684
loss: 0.9457853436470032,grad_norm: 0.8969469776756142, iteration: 238685
loss: 1.0295461416244507,grad_norm: 0.9392840772762321, iteration: 238686
loss: 0.9695060849189758,grad_norm: 0.880826403248435, iteration: 238687
loss: 0.9968447089195251,grad_norm: 0.9276356855107496, iteration: 238688
loss: 0.9838790893554688,grad_norm: 0.9311431786152242, iteration: 238689
loss: 0.9671416282653809,grad_norm: 0.8770205064587162, iteration: 238690
loss: 0.9894229769706726,grad_norm: 0.9064175516891807, iteration: 238691
loss: 1.0060614347457886,grad_norm: 0.9875518239461543, iteration: 238692
loss: 0.980477511882782,grad_norm: 0.9999991052020036, iteration: 238693
loss: 0.9976201057434082,grad_norm: 0.9999991413678373, iteration: 238694
loss: 0.9857264757156372,grad_norm: 0.9999997889897407, iteration: 238695
loss: 1.0332030057907104,grad_norm: 0.84958534396456, iteration: 238696
loss: 1.0067853927612305,grad_norm: 0.8677941891812853, iteration: 238697
loss: 1.0047504901885986,grad_norm: 0.9999990736028164, iteration: 238698
loss: 1.0056930780410767,grad_norm: 0.9022749475633437, iteration: 238699
loss: 0.9750354886054993,grad_norm: 0.9999991336117172, iteration: 238700
loss: 1.026401400566101,grad_norm: 0.7811958247610489, iteration: 238701
loss: 0.9672579765319824,grad_norm: 0.8804300878922157, iteration: 238702
loss: 1.006404995918274,grad_norm: 0.7187697576978236, iteration: 238703
loss: 0.9669325351715088,grad_norm: 0.8933381696517096, iteration: 238704
loss: 1.0211867094039917,grad_norm: 0.85574154332514, iteration: 238705
loss: 1.0683588981628418,grad_norm: 0.9999991998672425, iteration: 238706
loss: 0.9610728621482849,grad_norm: 0.77693190769993, iteration: 238707
loss: 0.9861118197441101,grad_norm: 0.9514404847479235, iteration: 238708
loss: 0.9964113831520081,grad_norm: 0.9999990989310557, iteration: 238709
loss: 1.0203769207000732,grad_norm: 0.962072568284848, iteration: 238710
loss: 0.968040406703949,grad_norm: 0.8463789732889458, iteration: 238711
loss: 1.0114948749542236,grad_norm: 0.8953432249251756, iteration: 238712
loss: 0.9808074831962585,grad_norm: 0.7609701685931882, iteration: 238713
loss: 1.0063940286636353,grad_norm: 0.7604069593127925, iteration: 238714
loss: 1.0295933485031128,grad_norm: 0.9999991685949253, iteration: 238715
loss: 1.026869297027588,grad_norm: 0.9999990640906932, iteration: 238716
loss: 0.9906255602836609,grad_norm: 0.8704967832467838, iteration: 238717
loss: 0.9804288744926453,grad_norm: 0.881146487243508, iteration: 238718
loss: 1.0111377239227295,grad_norm: 0.9242220043679051, iteration: 238719
loss: 1.0457338094711304,grad_norm: 0.9999990527165701, iteration: 238720
loss: 0.9571059942245483,grad_norm: 0.8802497737147649, iteration: 238721
loss: 0.9770309925079346,grad_norm: 0.8760001037526484, iteration: 238722
loss: 1.0269107818603516,grad_norm: 0.8951327319106733, iteration: 238723
loss: 0.9708293676376343,grad_norm: 0.9333670742878043, iteration: 238724
loss: 1.0731942653656006,grad_norm: 0.8810781699564626, iteration: 238725
loss: 0.989655077457428,grad_norm: 0.9999997807187215, iteration: 238726
loss: 0.9572526812553406,grad_norm: 0.9999992173792887, iteration: 238727
loss: 0.9976256489753723,grad_norm: 0.9999989985154919, iteration: 238728
loss: 0.9890201687812805,grad_norm: 0.9498026881506841, iteration: 238729
loss: 0.992031455039978,grad_norm: 0.9999990437267829, iteration: 238730
loss: 1.040082335472107,grad_norm: 0.8133965041836798, iteration: 238731
loss: 0.9839450716972351,grad_norm: 0.8604553683914673, iteration: 238732
loss: 0.9905185699462891,grad_norm: 0.999999202967624, iteration: 238733
loss: 0.9791499972343445,grad_norm: 0.9548810088069711, iteration: 238734
loss: 1.0275367498397827,grad_norm: 0.8977499722464316, iteration: 238735
loss: 0.9817569851875305,grad_norm: 0.9163820824065498, iteration: 238736
loss: 0.9804618954658508,grad_norm: 0.9015881329503114, iteration: 238737
loss: 1.0146284103393555,grad_norm: 0.9257652962622381, iteration: 238738
loss: 0.9721989035606384,grad_norm: 0.9391118169170184, iteration: 238739
loss: 1.0020742416381836,grad_norm: 0.9439841228539733, iteration: 238740
loss: 1.0300898551940918,grad_norm: 0.9024151206209214, iteration: 238741
loss: 0.9937474131584167,grad_norm: 0.8549953698861967, iteration: 238742
loss: 0.9881932735443115,grad_norm: 0.9607797040295437, iteration: 238743
loss: 0.9456831216812134,grad_norm: 0.9999990252312326, iteration: 238744
loss: 1.006706953048706,grad_norm: 0.9999991879253709, iteration: 238745
loss: 1.0445090532302856,grad_norm: 0.8660345077071312, iteration: 238746
loss: 1.0048489570617676,grad_norm: 0.8291224670839935, iteration: 238747
loss: 0.9585757255554199,grad_norm: 0.8218422006255057, iteration: 238748
loss: 1.0387284755706787,grad_norm: 0.8565807764248338, iteration: 238749
loss: 1.0287076234817505,grad_norm: 0.9716798370674189, iteration: 238750
loss: 0.9885295033454895,grad_norm: 0.8990860539144324, iteration: 238751
loss: 1.000268578529358,grad_norm: 0.9999991030956696, iteration: 238752
loss: 1.03476083278656,grad_norm: 0.8167740464887279, iteration: 238753
loss: 0.9938421845436096,grad_norm: 0.9951320041838561, iteration: 238754
loss: 1.0439645051956177,grad_norm: 0.8787948493772669, iteration: 238755
loss: 0.9971985220909119,grad_norm: 0.8761548113606094, iteration: 238756
loss: 0.9905844330787659,grad_norm: 0.8767438421914329, iteration: 238757
loss: 1.0051045417785645,grad_norm: 0.9999999405206808, iteration: 238758
loss: 1.0199319124221802,grad_norm: 0.9686452612326166, iteration: 238759
loss: 1.0399913787841797,grad_norm: 1.0000000256531683, iteration: 238760
loss: 0.9593781232833862,grad_norm: 0.9407868168008374, iteration: 238761
loss: 1.0060511827468872,grad_norm: 0.921012236894114, iteration: 238762
loss: 1.0039254426956177,grad_norm: 0.9999991701552703, iteration: 238763
loss: 1.00507652759552,grad_norm: 0.9552587690942028, iteration: 238764
loss: 1.0171397924423218,grad_norm: 0.9999991062176696, iteration: 238765
loss: 0.9835968017578125,grad_norm: 0.8328554448617939, iteration: 238766
loss: 0.9941168427467346,grad_norm: 0.9999990617114295, iteration: 238767
loss: 1.0214347839355469,grad_norm: 0.8333335712781685, iteration: 238768
loss: 1.016063928604126,grad_norm: 0.9999991598743934, iteration: 238769
loss: 0.9935755133628845,grad_norm: 0.9869137054369176, iteration: 238770
loss: 1.0206364393234253,grad_norm: 0.8901450083280091, iteration: 238771
loss: 0.9992887377738953,grad_norm: 0.985106225466897, iteration: 238772
loss: 0.9523929357528687,grad_norm: 0.9103572582866206, iteration: 238773
loss: 0.9924849271774292,grad_norm: 0.9730353866941054, iteration: 238774
loss: 1.0157362222671509,grad_norm: 0.9999993631449214, iteration: 238775
loss: 0.9897088408470154,grad_norm: 0.9535097348536805, iteration: 238776
loss: 1.0174928903579712,grad_norm: 0.8334487471572252, iteration: 238777
loss: 0.9600644707679749,grad_norm: 0.8452277824474451, iteration: 238778
loss: 1.0134440660476685,grad_norm: 0.8795667665797164, iteration: 238779
loss: 1.0343109369277954,grad_norm: 0.9999991018888278, iteration: 238780
loss: 1.0136723518371582,grad_norm: 0.999999024398895, iteration: 238781
loss: 0.9727485775947571,grad_norm: 0.8771071126052348, iteration: 238782
loss: 1.0289536714553833,grad_norm: 0.8305442578278838, iteration: 238783
loss: 1.0062357187271118,grad_norm: 0.9999990089052972, iteration: 238784
loss: 0.9861006736755371,grad_norm: 0.8311825702449759, iteration: 238785
loss: 1.0359525680541992,grad_norm: 0.9209162001975308, iteration: 238786
loss: 1.0002058744430542,grad_norm: 0.9949375464402881, iteration: 238787
loss: 0.9935824871063232,grad_norm: 0.9094268476751449, iteration: 238788
loss: 0.9871375560760498,grad_norm: 0.802018503547122, iteration: 238789
loss: 1.0358293056488037,grad_norm: 0.9790256981302851, iteration: 238790
loss: 0.9674965739250183,grad_norm: 0.7536275504810555, iteration: 238791
loss: 0.9733179807662964,grad_norm: 0.9477142108276164, iteration: 238792
loss: 0.9848216772079468,grad_norm: 0.8765085757553226, iteration: 238793
loss: 0.9781755805015564,grad_norm: 0.8394259407686779, iteration: 238794
loss: 0.9806432723999023,grad_norm: 0.9121700042717843, iteration: 238795
loss: 0.9655576944351196,grad_norm: 0.8515072510748057, iteration: 238796
loss: 1.0156594514846802,grad_norm: 0.9999991426656182, iteration: 238797
loss: 1.0287655591964722,grad_norm: 0.923448042088886, iteration: 238798
loss: 0.9910058379173279,grad_norm: 0.9400112093315673, iteration: 238799
loss: 1.0001084804534912,grad_norm: 0.9731124229308645, iteration: 238800
loss: 0.9903759956359863,grad_norm: 0.8904840003584128, iteration: 238801
loss: 0.9702765941619873,grad_norm: 0.8871938122915566, iteration: 238802
loss: 1.0767654180526733,grad_norm: 1.0000000502283959, iteration: 238803
loss: 0.9858568906784058,grad_norm: 0.9563382649518082, iteration: 238804
loss: 0.9981822967529297,grad_norm: 0.7275154087354542, iteration: 238805
loss: 0.9894472360610962,grad_norm: 0.9718320208713132, iteration: 238806
loss: 0.98282390832901,grad_norm: 0.9411405217426927, iteration: 238807
loss: 1.0545364618301392,grad_norm: 0.9999992671896882, iteration: 238808
loss: 1.0217636823654175,grad_norm: 0.9878728307959137, iteration: 238809
loss: 1.0008894205093384,grad_norm: 0.9999993930831036, iteration: 238810
loss: 1.0404785871505737,grad_norm: 0.9373882718324072, iteration: 238811
loss: 1.0725219249725342,grad_norm: 0.9999990392695306, iteration: 238812
loss: 0.9991337060928345,grad_norm: 0.957200930877146, iteration: 238813
loss: 0.9788066744804382,grad_norm: 0.7676125019262151, iteration: 238814
loss: 1.0108964443206787,grad_norm: 0.9924073440335803, iteration: 238815
loss: 1.0068773031234741,grad_norm: 0.9999990907231858, iteration: 238816
loss: 0.9786608815193176,grad_norm: 0.8221394445430152, iteration: 238817
loss: 0.9699127674102783,grad_norm: 0.941839845293295, iteration: 238818
loss: 1.017150640487671,grad_norm: 0.8190470727561969, iteration: 238819
loss: 1.007917881011963,grad_norm: 0.9585268840908352, iteration: 238820
loss: 0.9867604970932007,grad_norm: 0.8988244206495182, iteration: 238821
loss: 1.0057076215744019,grad_norm: 0.9694157063042391, iteration: 238822
loss: 1.0190222263336182,grad_norm: 0.9999992463923306, iteration: 238823
loss: 1.0414174795150757,grad_norm: 0.7806681482420713, iteration: 238824
loss: 0.9943931102752686,grad_norm: 0.9999992832660238, iteration: 238825
loss: 1.075249195098877,grad_norm: 0.9359250169891125, iteration: 238826
loss: 0.9575745463371277,grad_norm: 0.956524640480184, iteration: 238827
loss: 0.9939970970153809,grad_norm: 0.9999990911789394, iteration: 238828
loss: 0.9427361488342285,grad_norm: 0.9999991659651787, iteration: 238829
loss: 1.0271414518356323,grad_norm: 0.784829720674724, iteration: 238830
loss: 0.9975431561470032,grad_norm: 0.853535236495589, iteration: 238831
loss: 0.9972485899925232,grad_norm: 0.7721887500128487, iteration: 238832
loss: 1.0017964839935303,grad_norm: 0.9293085626978113, iteration: 238833
loss: 0.9993197917938232,grad_norm: 0.8851540802428292, iteration: 238834
loss: 1.0019479990005493,grad_norm: 0.8815159090189941, iteration: 238835
loss: 0.9664093255996704,grad_norm: 0.7462321104554248, iteration: 238836
loss: 0.9668461680412292,grad_norm: 0.8954970242811998, iteration: 238837
loss: 0.9934724569320679,grad_norm: 0.9714670066764732, iteration: 238838
loss: 0.9822695851325989,grad_norm: 0.7957547601610122, iteration: 238839
loss: 1.0001558065414429,grad_norm: 0.9018984948336817, iteration: 238840
loss: 1.019335389137268,grad_norm: 0.9999991374328614, iteration: 238841
loss: 0.9894116520881653,grad_norm: 0.9666188512329524, iteration: 238842
loss: 1.0162526369094849,grad_norm: 0.9999990747822646, iteration: 238843
loss: 0.9750827550888062,grad_norm: 0.9999993298155755, iteration: 238844
loss: 1.0169506072998047,grad_norm: 0.9999992198171025, iteration: 238845
loss: 1.0076403617858887,grad_norm: 0.929790905362424, iteration: 238846
loss: 1.0034840106964111,grad_norm: 0.9070257578352985, iteration: 238847
loss: 0.9894630312919617,grad_norm: 0.8743271348312711, iteration: 238848
loss: 1.05238938331604,grad_norm: 0.8731991307865982, iteration: 238849
loss: 1.0204793214797974,grad_norm: 0.9825505644793109, iteration: 238850
loss: 0.9757063984870911,grad_norm: 0.9072410242706295, iteration: 238851
loss: 1.0153144598007202,grad_norm: 0.8228335835034536, iteration: 238852
loss: 0.9759359359741211,grad_norm: 0.9272737900713709, iteration: 238853
loss: 0.9251834750175476,grad_norm: 0.882230076990394, iteration: 238854
loss: 1.0192044973373413,grad_norm: 0.8536256169021326, iteration: 238855
loss: 1.001600742340088,grad_norm: 0.9289956576470313, iteration: 238856
loss: 1.0169734954833984,grad_norm: 0.9999991180385008, iteration: 238857
loss: 1.0271698236465454,grad_norm: 0.7627523951457931, iteration: 238858
loss: 1.0420578718185425,grad_norm: 0.9999991270300438, iteration: 238859
loss: 1.0351976156234741,grad_norm: 0.9822768480762984, iteration: 238860
loss: 1.002902626991272,grad_norm: 0.8807163534583101, iteration: 238861
loss: 0.9832741618156433,grad_norm: 0.8126244038350375, iteration: 238862
loss: 0.9890104532241821,grad_norm: 0.9669076144750168, iteration: 238863
loss: 1.0032576322555542,grad_norm: 0.9344800561873305, iteration: 238864
loss: 1.0130597352981567,grad_norm: 0.8463862242004891, iteration: 238865
loss: 0.9465288519859314,grad_norm: 0.8609939109565379, iteration: 238866
loss: 1.0071403980255127,grad_norm: 0.9999990312335184, iteration: 238867
loss: 0.9835712909698486,grad_norm: 0.8545776055963012, iteration: 238868
loss: 0.9270960688591003,grad_norm: 0.9326379964253744, iteration: 238869
loss: 1.0032576322555542,grad_norm: 0.9576177363770731, iteration: 238870
loss: 1.0122973918914795,grad_norm: 0.9548359449991899, iteration: 238871
loss: 0.99970942735672,grad_norm: 0.9205969333048186, iteration: 238872
loss: 1.0290417671203613,grad_norm: 0.9999992371311244, iteration: 238873
loss: 0.9845906496047974,grad_norm: 0.8357876711572834, iteration: 238874
loss: 1.001908302307129,grad_norm: 0.7504485444466482, iteration: 238875
loss: 1.0047919750213623,grad_norm: 0.8665581399667666, iteration: 238876
loss: 1.0174232721328735,grad_norm: 0.9136997009451076, iteration: 238877
loss: 1.0756117105484009,grad_norm: 0.9999992438679576, iteration: 238878
loss: 1.0158151388168335,grad_norm: 0.9008995906051498, iteration: 238879
loss: 0.9963263869285583,grad_norm: 0.9943252560537469, iteration: 238880
loss: 0.9809836149215698,grad_norm: 0.9999990222724117, iteration: 238881
loss: 0.9720187783241272,grad_norm: 0.926462986025578, iteration: 238882
loss: 1.0214166641235352,grad_norm: 0.9678364202629206, iteration: 238883
loss: 0.9650068283081055,grad_norm: 0.9999990558959082, iteration: 238884
loss: 1.0226970911026,grad_norm: 0.9999990973242995, iteration: 238885
loss: 1.0382931232452393,grad_norm: 0.9999994579951365, iteration: 238886
loss: 0.9944791197776794,grad_norm: 0.7521615324761413, iteration: 238887
loss: 1.0231810808181763,grad_norm: 0.9427188683219454, iteration: 238888
loss: 1.0729964971542358,grad_norm: 0.7976578203690308, iteration: 238889
loss: 1.0352495908737183,grad_norm: 0.8422230695845667, iteration: 238890
loss: 1.001802921295166,grad_norm: 0.8384284659132678, iteration: 238891
loss: 0.99896240234375,grad_norm: 0.761080590449696, iteration: 238892
loss: 1.0152292251586914,grad_norm: 0.993599658178311, iteration: 238893
loss: 1.0732860565185547,grad_norm: 0.999999457964092, iteration: 238894
loss: 0.974717378616333,grad_norm: 0.7826867383303757, iteration: 238895
loss: 1.0195221900939941,grad_norm: 0.9999991953330776, iteration: 238896
loss: 1.0258843898773193,grad_norm: 0.9999991956731794, iteration: 238897
loss: 0.9785041809082031,grad_norm: 0.8774397497864064, iteration: 238898
loss: 0.9789853692054749,grad_norm: 0.9480760106311419, iteration: 238899
loss: 0.9798745512962341,grad_norm: 0.7974036893992488, iteration: 238900
loss: 0.9988134503364563,grad_norm: 0.9523368561954386, iteration: 238901
loss: 0.9784471392631531,grad_norm: 0.7824090741942162, iteration: 238902
loss: 1.0582318305969238,grad_norm: 0.9999991624167497, iteration: 238903
loss: 1.0103479623794556,grad_norm: 0.9999991750955606, iteration: 238904
loss: 1.0492453575134277,grad_norm: 0.9937218958295123, iteration: 238905
loss: 0.9786304235458374,grad_norm: 0.8080514188220087, iteration: 238906
loss: 0.9908031821250916,grad_norm: 0.9967046006984197, iteration: 238907
loss: 1.008740782737732,grad_norm: 0.9999991183503065, iteration: 238908
loss: 0.9925052523612976,grad_norm: 0.7674084668222964, iteration: 238909
loss: 0.9715531468391418,grad_norm: 0.8376728579867329, iteration: 238910
loss: 0.9950129985809326,grad_norm: 0.824972143391893, iteration: 238911
loss: 0.9851958155632019,grad_norm: 0.8542699405845589, iteration: 238912
loss: 0.9908865094184875,grad_norm: 0.9212698860287445, iteration: 238913
loss: 1.0406770706176758,grad_norm: 0.9691757952185464, iteration: 238914
loss: 1.0065020322799683,grad_norm: 0.8561606640680836, iteration: 238915
loss: 0.9650586247444153,grad_norm: 0.9983603467530723, iteration: 238916
loss: 0.9889292120933533,grad_norm: 0.9035592468728234, iteration: 238917
loss: 0.9984050393104553,grad_norm: 0.9126597909675468, iteration: 238918
loss: 1.0546526908874512,grad_norm: 0.9999998041871057, iteration: 238919
loss: 0.9819727540016174,grad_norm: 0.8826534300782153, iteration: 238920
loss: 0.9489706754684448,grad_norm: 0.91632024591676, iteration: 238921
loss: 1.0041707754135132,grad_norm: 0.9999990679908355, iteration: 238922
loss: 1.0202885866165161,grad_norm: 0.8393158314819603, iteration: 238923
loss: 0.9796624779701233,grad_norm: 0.8177049908418527, iteration: 238924
loss: 1.0131452083587646,grad_norm: 0.8875346752821527, iteration: 238925
loss: 1.0115575790405273,grad_norm: 0.8429977015527197, iteration: 238926
loss: 0.9769414067268372,grad_norm: 0.8194717846854559, iteration: 238927
loss: 0.96981281042099,grad_norm: 0.9887744347123617, iteration: 238928
loss: 1.0151360034942627,grad_norm: 0.9227679978422109, iteration: 238929
loss: 0.9970467686653137,grad_norm: 0.9999990044885345, iteration: 238930
loss: 1.001301884651184,grad_norm: 0.9658545975106201, iteration: 238931
loss: 1.0078543424606323,grad_norm: 0.9769001410979234, iteration: 238932
loss: 1.00267493724823,grad_norm: 0.8590774534551271, iteration: 238933
loss: 0.9840442538261414,grad_norm: 0.9157801834950143, iteration: 238934
loss: 1.000903844833374,grad_norm: 0.9999991909932778, iteration: 238935
loss: 1.006188988685608,grad_norm: 0.8149332766456647, iteration: 238936
loss: 0.9536651372909546,grad_norm: 0.9130542905564853, iteration: 238937
loss: 0.9614998698234558,grad_norm: 0.9515727379266312, iteration: 238938
loss: 0.9943455457687378,grad_norm: 0.9999990951951104, iteration: 238939
loss: 0.9684380292892456,grad_norm: 0.9999992271588898, iteration: 238940
loss: 1.0081263780593872,grad_norm: 0.9119025234774888, iteration: 238941
loss: 0.993363618850708,grad_norm: 0.9025888461223257, iteration: 238942
loss: 1.011968731880188,grad_norm: 0.9999990698425923, iteration: 238943
loss: 1.0400744676589966,grad_norm: 0.9600290524918988, iteration: 238944
loss: 1.0140094757080078,grad_norm: 0.9654080705403242, iteration: 238945
loss: 0.980767548084259,grad_norm: 0.920040592834345, iteration: 238946
loss: 1.0399153232574463,grad_norm: 0.8343189079934515, iteration: 238947
loss: 1.0080190896987915,grad_norm: 0.8832001438815418, iteration: 238948
loss: 1.0124750137329102,grad_norm: 0.9701758322534488, iteration: 238949
loss: 0.9621526002883911,grad_norm: 0.9999991483800145, iteration: 238950
loss: 0.9984738230705261,grad_norm: 0.8187253903042454, iteration: 238951
loss: 0.98466557264328,grad_norm: 0.9811673584651132, iteration: 238952
loss: 1.0274332761764526,grad_norm: 0.9999990481548395, iteration: 238953
loss: 1.021146297454834,grad_norm: 0.9999991012296187, iteration: 238954
loss: 1.0196603536605835,grad_norm: 0.950675541846607, iteration: 238955
loss: 0.9881628155708313,grad_norm: 0.9999991971822513, iteration: 238956
loss: 0.9889162182807922,grad_norm: 0.845864086862541, iteration: 238957
loss: 1.0241514444351196,grad_norm: 0.8141367844787047, iteration: 238958
loss: 0.9641636610031128,grad_norm: 0.9143918726563509, iteration: 238959
loss: 1.0549043416976929,grad_norm: 0.9186541340317123, iteration: 238960
loss: 1.0178017616271973,grad_norm: 0.8089814334136904, iteration: 238961
loss: 1.012969970703125,grad_norm: 0.9999998006663442, iteration: 238962
loss: 1.035054087638855,grad_norm: 0.7751915181822454, iteration: 238963
loss: 0.9895622134208679,grad_norm: 0.9001275126384203, iteration: 238964
loss: 1.1115742921829224,grad_norm: 0.9999989804937746, iteration: 238965
loss: 1.0176277160644531,grad_norm: 0.9422190388434292, iteration: 238966
loss: 0.9919477105140686,grad_norm: 0.8902984798498569, iteration: 238967
loss: 1.0072393417358398,grad_norm: 0.9897160409018688, iteration: 238968
loss: 1.0016028881072998,grad_norm: 0.999999426212897, iteration: 238969
loss: 1.0270450115203857,grad_norm: 0.9964483016585189, iteration: 238970
loss: 1.0203434228897095,grad_norm: 0.8005547289740599, iteration: 238971
loss: 0.9669153690338135,grad_norm: 0.9999991669075537, iteration: 238972
loss: 0.9662284255027771,grad_norm: 0.8928075070508754, iteration: 238973
loss: 0.9679406881332397,grad_norm: 0.9805344491012775, iteration: 238974
loss: 1.0387072563171387,grad_norm: 0.999999258770058, iteration: 238975
loss: 0.993251383304596,grad_norm: 0.9999990740121921, iteration: 238976
loss: 0.9972521662712097,grad_norm: 0.8396103210234843, iteration: 238977
loss: 1.000639796257019,grad_norm: 0.9093362170687603, iteration: 238978
loss: 1.0486901998519897,grad_norm: 0.945365774098647, iteration: 238979
loss: 1.0155760049819946,grad_norm: 0.9491806322894748, iteration: 238980
loss: 1.0019021034240723,grad_norm: 0.9752835672001317, iteration: 238981
loss: 1.0076968669891357,grad_norm: 0.9999990700237692, iteration: 238982
loss: 1.021099328994751,grad_norm: 0.9999993397961094, iteration: 238983
loss: 1.0189518928527832,grad_norm: 0.9669767036229239, iteration: 238984
loss: 0.9597252011299133,grad_norm: 0.7971132191256418, iteration: 238985
loss: 0.9937038421630859,grad_norm: 0.8334993756214699, iteration: 238986
loss: 0.9998672604560852,grad_norm: 0.7909985387594605, iteration: 238987
loss: 0.9875339865684509,grad_norm: 0.81225137717231, iteration: 238988
loss: 1.0096737146377563,grad_norm: 0.9440670738471104, iteration: 238989
loss: 1.0584501028060913,grad_norm: 0.8946087720701467, iteration: 238990
loss: 0.9951431155204773,grad_norm: 0.8438272290595433, iteration: 238991
loss: 1.0033687353134155,grad_norm: 0.9022871555195449, iteration: 238992
loss: 0.9893220663070679,grad_norm: 0.7892080803258084, iteration: 238993
loss: 0.993643045425415,grad_norm: 0.9999991253904078, iteration: 238994
loss: 0.9916386604309082,grad_norm: 0.8383901123791072, iteration: 238995
loss: 0.9878560900688171,grad_norm: 0.9999996367327935, iteration: 238996
loss: 0.9663635492324829,grad_norm: 0.9562266147074607, iteration: 238997
loss: 0.9973056316375732,grad_norm: 0.9999992043570766, iteration: 238998
loss: 0.9923960566520691,grad_norm: 0.9018575928797737, iteration: 238999
loss: 0.9981328845024109,grad_norm: 0.9200917809931464, iteration: 239000
loss: 1.038326621055603,grad_norm: 0.9999991290824335, iteration: 239001
loss: 1.0146052837371826,grad_norm: 0.9187418631967579, iteration: 239002
loss: 0.9813645482063293,grad_norm: 0.8882602748865341, iteration: 239003
loss: 0.9861972332000732,grad_norm: 0.8144665767316503, iteration: 239004
loss: 1.0153491497039795,grad_norm: 0.932178564680754, iteration: 239005
loss: 1.0117661952972412,grad_norm: 0.789826008747703, iteration: 239006
loss: 0.988008439540863,grad_norm: 0.9667251094850159, iteration: 239007
loss: 1.0234137773513794,grad_norm: 0.7665044415777789, iteration: 239008
loss: 1.0747730731964111,grad_norm: 0.8092745578610987, iteration: 239009
loss: 0.9830776453018188,grad_norm: 0.8937915992380697, iteration: 239010
loss: 0.9933646321296692,grad_norm: 0.7142453180697254, iteration: 239011
loss: 1.015580177307129,grad_norm: 0.8607077512143074, iteration: 239012
loss: 1.0067728757858276,grad_norm: 0.9999991125693555, iteration: 239013
loss: 1.0071076154708862,grad_norm: 0.8234663933132758, iteration: 239014
loss: 1.0250637531280518,grad_norm: 0.9369452985906438, iteration: 239015
loss: 0.9921528697013855,grad_norm: 0.8778833427663414, iteration: 239016
loss: 1.0180127620697021,grad_norm: 0.9343915751568721, iteration: 239017
loss: 1.0112066268920898,grad_norm: 0.8567601089064883, iteration: 239018
loss: 0.9903841018676758,grad_norm: 0.7804629931581433, iteration: 239019
loss: 1.021302580833435,grad_norm: 0.99999974684977, iteration: 239020
loss: 1.0628557205200195,grad_norm: 0.9063693872294687, iteration: 239021
loss: 1.0608783960342407,grad_norm: 0.999999155140095, iteration: 239022
loss: 1.0132815837860107,grad_norm: 0.9030678967237398, iteration: 239023
loss: 0.9833089113235474,grad_norm: 0.999999087202802, iteration: 239024
loss: 0.9847735166549683,grad_norm: 0.9360693782638893, iteration: 239025
loss: 0.9878078699111938,grad_norm: 0.9999990222354744, iteration: 239026
loss: 0.9899057745933533,grad_norm: 0.9999990951316982, iteration: 239027
loss: 0.9877926111221313,grad_norm: 0.8545170437005392, iteration: 239028
loss: 0.9737741351127625,grad_norm: 0.8057354614025573, iteration: 239029
loss: 0.9977449774742126,grad_norm: 0.9999992135854954, iteration: 239030
loss: 0.9697675704956055,grad_norm: 0.9231754269013748, iteration: 239031
loss: 1.0207183361053467,grad_norm: 0.9406261282159364, iteration: 239032
loss: 1.039833426475525,grad_norm: 0.9622229454730348, iteration: 239033
loss: 0.9784815311431885,grad_norm: 0.9312915233896402, iteration: 239034
loss: 1.0022425651550293,grad_norm: 0.9308580551556418, iteration: 239035
loss: 1.0669450759887695,grad_norm: 0.999999019887438, iteration: 239036
loss: 0.9677741527557373,grad_norm: 0.8252026506061411, iteration: 239037
loss: 1.004368782043457,grad_norm: 0.8663185639945186, iteration: 239038
loss: 1.0288417339324951,grad_norm: 0.8512934228136024, iteration: 239039
loss: 1.0069106817245483,grad_norm: 0.7502911830688376, iteration: 239040
loss: 1.0311009883880615,grad_norm: 0.791102564729563, iteration: 239041
loss: 0.9862691760063171,grad_norm: 0.899143852024431, iteration: 239042
loss: 0.9976387023925781,grad_norm: 0.9275070987876675, iteration: 239043
loss: 0.9343187212944031,grad_norm: 0.7750354249301791, iteration: 239044
loss: 0.9707897305488586,grad_norm: 0.8340790934932014, iteration: 239045
loss: 0.9836378693580627,grad_norm: 0.8356237429897997, iteration: 239046
loss: 1.0479270219802856,grad_norm: 0.9999996884098411, iteration: 239047
loss: 0.9776486754417419,grad_norm: 0.9560120345229559, iteration: 239048
loss: 1.0181918144226074,grad_norm: 0.7662408335333706, iteration: 239049
loss: 1.0214985609054565,grad_norm: 0.9999990255388657, iteration: 239050
loss: 1.0281363725662231,grad_norm: 0.9486366351466501, iteration: 239051
loss: 1.0072451829910278,grad_norm: 0.855586074754968, iteration: 239052
loss: 1.038304328918457,grad_norm: 0.9999994100716533, iteration: 239053
loss: 0.9869053363800049,grad_norm: 0.808576351733163, iteration: 239054
loss: 0.9897103905677795,grad_norm: 0.9999993534425713, iteration: 239055
loss: 1.0315840244293213,grad_norm: 0.8213152795053065, iteration: 239056
loss: 0.998712956905365,grad_norm: 0.9053392103284811, iteration: 239057
loss: 0.9662786722183228,grad_norm: 0.9150077395681709, iteration: 239058
loss: 0.9965771436691284,grad_norm: 0.919921378063178, iteration: 239059
loss: 1.0138795375823975,grad_norm: 0.9173330667653321, iteration: 239060
loss: 1.0047271251678467,grad_norm: 0.891114411018054, iteration: 239061
loss: 1.0120792388916016,grad_norm: 0.7808678255688817, iteration: 239062
loss: 0.9511037468910217,grad_norm: 0.9187542149122023, iteration: 239063
loss: 1.0303875207901,grad_norm: 0.9819921063339078, iteration: 239064
loss: 1.0282031297683716,grad_norm: 0.7704632630559385, iteration: 239065
loss: 0.9765775203704834,grad_norm: 0.9088553561656333, iteration: 239066
loss: 1.0106779336929321,grad_norm: 0.9999990248760046, iteration: 239067
loss: 0.9877175688743591,grad_norm: 0.8662634377591323, iteration: 239068
loss: 1.0003126859664917,grad_norm: 0.8610250651939196, iteration: 239069
loss: 0.9927361011505127,grad_norm: 0.9008159856987011, iteration: 239070
loss: 0.9932689070701599,grad_norm: 0.959443612764263, iteration: 239071
loss: 1.0005282163619995,grad_norm: 0.9999991411190428, iteration: 239072
loss: 1.000211477279663,grad_norm: 0.8031290900634723, iteration: 239073
loss: 0.9940381646156311,grad_norm: 0.8353360800442805, iteration: 239074
loss: 1.0288331508636475,grad_norm: 0.8214026931182037, iteration: 239075
loss: 0.9938467144966125,grad_norm: 0.841608497533666, iteration: 239076
loss: 0.971468985080719,grad_norm: 0.8246798145575883, iteration: 239077
loss: 0.9874222874641418,grad_norm: 0.9798308173152447, iteration: 239078
loss: 0.9567581415176392,grad_norm: 0.7797916602499545, iteration: 239079
loss: 1.0596510171890259,grad_norm: 0.9999997199798656, iteration: 239080
loss: 1.0043643712997437,grad_norm: 0.8985908533587149, iteration: 239081
loss: 1.0261642932891846,grad_norm: 0.9186427006189078, iteration: 239082
loss: 0.9877755641937256,grad_norm: 0.999998907723294, iteration: 239083
loss: 0.9865260720252991,grad_norm: 0.9568425374471287, iteration: 239084
loss: 0.9884542226791382,grad_norm: 0.9999990199973946, iteration: 239085
loss: 0.9747989177703857,grad_norm: 0.9648054300643791, iteration: 239086
loss: 0.9955287575721741,grad_norm: 0.8454640020154918, iteration: 239087
loss: 0.9937250018119812,grad_norm: 0.9576161598216572, iteration: 239088
loss: 0.9700596332550049,grad_norm: 0.8929975146486949, iteration: 239089
loss: 0.9856295585632324,grad_norm: 0.8094956353450029, iteration: 239090
loss: 1.0237702131271362,grad_norm: 0.7960425622935038, iteration: 239091
loss: 1.0076764822006226,grad_norm: 0.7888911496922958, iteration: 239092
loss: 1.12278413772583,grad_norm: 0.9999990526215152, iteration: 239093
loss: 0.9844959378242493,grad_norm: 0.7637734445858196, iteration: 239094
loss: 0.9829777479171753,grad_norm: 0.82020337792977, iteration: 239095
loss: 1.0016835927963257,grad_norm: 0.8343744367776235, iteration: 239096
loss: 1.0034650564193726,grad_norm: 0.8801880994506465, iteration: 239097
loss: 0.9942690134048462,grad_norm: 0.9966838348971817, iteration: 239098
loss: 0.9880544543266296,grad_norm: 0.923645438227492, iteration: 239099
loss: 0.9825771450996399,grad_norm: 0.8545982398115985, iteration: 239100
loss: 0.9660520553588867,grad_norm: 0.9696545319572722, iteration: 239101
loss: 0.9814268946647644,grad_norm: 0.8160570285259556, iteration: 239102
loss: 1.0051183700561523,grad_norm: 0.8985322720103036, iteration: 239103
loss: 1.0660769939422607,grad_norm: 0.9018873338419509, iteration: 239104
loss: 0.9728360176086426,grad_norm: 0.8460848937719472, iteration: 239105
loss: 1.0082547664642334,grad_norm: 0.7876436942672782, iteration: 239106
loss: 0.9964745044708252,grad_norm: 0.9999991747727686, iteration: 239107
loss: 0.9737864136695862,grad_norm: 0.7999164550967068, iteration: 239108
loss: 0.963784396648407,grad_norm: 0.8130287267259306, iteration: 239109
loss: 0.9873000979423523,grad_norm: 0.8548758996689196, iteration: 239110
loss: 1.0204887390136719,grad_norm: 0.9384243370688797, iteration: 239111
loss: 1.046964168548584,grad_norm: 0.9998889523430635, iteration: 239112
loss: 0.9749295711517334,grad_norm: 0.9018011077936874, iteration: 239113
loss: 0.9795156121253967,grad_norm: 0.9757554425141018, iteration: 239114
loss: 1.0212464332580566,grad_norm: 0.9180108796625198, iteration: 239115
loss: 0.9761168956756592,grad_norm: 0.9999992905828617, iteration: 239116
loss: 0.9539080858230591,grad_norm: 0.8480102299902004, iteration: 239117
loss: 1.0283493995666504,grad_norm: 0.9978689432462762, iteration: 239118
loss: 0.9599556922912598,grad_norm: 0.9360976879583195, iteration: 239119
loss: 1.0487422943115234,grad_norm: 0.9999990775227765, iteration: 239120
loss: 1.079450249671936,grad_norm: 0.9999991007428216, iteration: 239121
loss: 0.979260265827179,grad_norm: 0.9018322459328096, iteration: 239122
loss: 1.0091303586959839,grad_norm: 0.8420141984258949, iteration: 239123
loss: 0.9801149964332581,grad_norm: 0.9992107287879163, iteration: 239124
loss: 0.9983565807342529,grad_norm: 0.964043358689577, iteration: 239125
loss: 0.9835141897201538,grad_norm: 0.8773857153409511, iteration: 239126
loss: 1.0248950719833374,grad_norm: 0.8682000369404881, iteration: 239127
loss: 1.0102088451385498,grad_norm: 0.956902102069792, iteration: 239128
loss: 1.0037263631820679,grad_norm: 0.9999992366876126, iteration: 239129
loss: 1.0285247564315796,grad_norm: 0.971708645604305, iteration: 239130
loss: 0.9826956391334534,grad_norm: 0.9999991302821744, iteration: 239131
loss: 1.0317481756210327,grad_norm: 0.9999991256172565, iteration: 239132
loss: 0.989555835723877,grad_norm: 0.8315935454940485, iteration: 239133
loss: 0.9611706137657166,grad_norm: 0.8737568486191503, iteration: 239134
loss: 1.0488635301589966,grad_norm: 0.9087396735961847, iteration: 239135
loss: 1.0059860944747925,grad_norm: 0.7338519954856394, iteration: 239136
loss: 1.006658673286438,grad_norm: 0.857094477055865, iteration: 239137
loss: 0.9744642376899719,grad_norm: 0.990357616727, iteration: 239138
loss: 1.0350593328475952,grad_norm: 0.9697403911573409, iteration: 239139
loss: 1.0114785432815552,grad_norm: 0.9999989769620824, iteration: 239140
loss: 1.0091333389282227,grad_norm: 0.9643081790968394, iteration: 239141
loss: 0.9715542793273926,grad_norm: 0.8669268606019844, iteration: 239142
loss: 0.994397759437561,grad_norm: 0.8804324805105793, iteration: 239143
loss: 0.9750118851661682,grad_norm: 0.8794687638572384, iteration: 239144
loss: 1.00531005859375,grad_norm: 0.9999990184045731, iteration: 239145
loss: 0.9934505820274353,grad_norm: 0.9999990717479165, iteration: 239146
loss: 0.9825758934020996,grad_norm: 0.9868872825803034, iteration: 239147
loss: 0.9933323860168457,grad_norm: 0.9999991683900916, iteration: 239148
loss: 1.002478003501892,grad_norm: 0.9106654035942412, iteration: 239149
loss: 0.9732284545898438,grad_norm: 0.8724311834327569, iteration: 239150
loss: 0.96629798412323,grad_norm: 0.7954315732530721, iteration: 239151
loss: 1.0255504846572876,grad_norm: 0.9884925061189413, iteration: 239152
loss: 0.9969160556793213,grad_norm: 0.9158199936297278, iteration: 239153
loss: 0.960806131362915,grad_norm: 0.8549811217661832, iteration: 239154
loss: 0.9605249762535095,grad_norm: 0.9999990692087566, iteration: 239155
loss: 0.9873455166816711,grad_norm: 0.8806737404883417, iteration: 239156
loss: 0.9988700151443481,grad_norm: 0.9324834490807515, iteration: 239157
loss: 0.9806143045425415,grad_norm: 0.9724765930149825, iteration: 239158
loss: 0.9907764196395874,grad_norm: 0.9220962264869075, iteration: 239159
loss: 0.9703647494316101,grad_norm: 0.9251489995759146, iteration: 239160
loss: 0.9973497986793518,grad_norm: 0.8737673500121965, iteration: 239161
loss: 0.9541146159172058,grad_norm: 0.8927812788506999, iteration: 239162
loss: 1.0188249349594116,grad_norm: 0.8986189054843398, iteration: 239163
loss: 0.9783505201339722,grad_norm: 0.8787612167210872, iteration: 239164
loss: 1.0013079643249512,grad_norm: 0.8852607465406748, iteration: 239165
loss: 0.9509786367416382,grad_norm: 0.8122495043998962, iteration: 239166
loss: 1.0254830121994019,grad_norm: 0.9481362316697419, iteration: 239167
loss: 1.0510996580123901,grad_norm: 0.9031164795467986, iteration: 239168
loss: 1.0242217779159546,grad_norm: 0.8585307793415051, iteration: 239169
loss: 0.9818494319915771,grad_norm: 0.9010300048214541, iteration: 239170
loss: 1.01105535030365,grad_norm: 0.7382869252347983, iteration: 239171
loss: 1.0090761184692383,grad_norm: 0.9999991653147117, iteration: 239172
loss: 1.011008381843567,grad_norm: 0.7522492252154604, iteration: 239173
loss: 0.9979801177978516,grad_norm: 0.9322890691627088, iteration: 239174
loss: 1.0230426788330078,grad_norm: 0.8550567367498338, iteration: 239175
loss: 0.9969251155853271,grad_norm: 0.8844824885089594, iteration: 239176
loss: 0.9952073097229004,grad_norm: 0.8511736875130359, iteration: 239177
loss: 1.0123347043991089,grad_norm: 0.838139922565648, iteration: 239178
loss: 0.9704137444496155,grad_norm: 0.9999991446683923, iteration: 239179
loss: 1.0105385780334473,grad_norm: 0.8849094981477623, iteration: 239180
loss: 0.9656772613525391,grad_norm: 0.8677347753480887, iteration: 239181
loss: 1.0009785890579224,grad_norm: 0.8463251451336887, iteration: 239182
loss: 1.0254000425338745,grad_norm: 0.9999995248840974, iteration: 239183
loss: 0.9908050298690796,grad_norm: 0.9999989198176029, iteration: 239184
loss: 1.028584361076355,grad_norm: 0.9999989484271632, iteration: 239185
loss: 1.0086960792541504,grad_norm: 1.000000019784997, iteration: 239186
loss: 1.0082447528839111,grad_norm: 0.8301918624402614, iteration: 239187
loss: 1.0054843425750732,grad_norm: 0.9999991298879223, iteration: 239188
loss: 1.0602275133132935,grad_norm: 0.9999999231251953, iteration: 239189
loss: 0.9788486361503601,grad_norm: 0.8796153951530097, iteration: 239190
loss: 0.9819433689117432,grad_norm: 0.7738368334973336, iteration: 239191
loss: 1.0718662738800049,grad_norm: 0.860729025912348, iteration: 239192
loss: 0.980292022228241,grad_norm: 0.9333392901118557, iteration: 239193
loss: 0.9844471216201782,grad_norm: 0.8989774458856944, iteration: 239194
loss: 0.9679399132728577,grad_norm: 0.9999991800205735, iteration: 239195
loss: 1.0124778747558594,grad_norm: 0.8230629571010398, iteration: 239196
loss: 0.9853404760360718,grad_norm: 0.7831647325074815, iteration: 239197
loss: 1.0214543342590332,grad_norm: 0.9999998840742462, iteration: 239198
loss: 1.0114459991455078,grad_norm: 0.8556476725750612, iteration: 239199
loss: 0.9656733274459839,grad_norm: 0.9999992314955451, iteration: 239200
loss: 0.9996216297149658,grad_norm: 0.9925401504231666, iteration: 239201
loss: 1.0299500226974487,grad_norm: 0.794285504293036, iteration: 239202
loss: 1.0566067695617676,grad_norm: 0.9999998346291956, iteration: 239203
loss: 0.9795379042625427,grad_norm: 0.9683246707175637, iteration: 239204
loss: 1.001649260520935,grad_norm: 0.9999991179088307, iteration: 239205
loss: 1.0105880498886108,grad_norm: 0.9999991075607353, iteration: 239206
loss: 0.9821931719779968,grad_norm: 0.8928800589227568, iteration: 239207
loss: 0.9893147945404053,grad_norm: 0.9999992539245598, iteration: 239208
loss: 0.9889943599700928,grad_norm: 0.9999992144517655, iteration: 239209
loss: 1.0748850107192993,grad_norm: 0.9999991901983066, iteration: 239210
loss: 1.027562141418457,grad_norm: 0.9999990096258775, iteration: 239211
loss: 1.0377906560897827,grad_norm: 0.9543154388288427, iteration: 239212
loss: 0.9926559925079346,grad_norm: 0.9999993036230668, iteration: 239213
loss: 1.1028945446014404,grad_norm: 0.9999994354705526, iteration: 239214
loss: 1.1040668487548828,grad_norm: 0.999999344788622, iteration: 239215
loss: 1.0072698593139648,grad_norm: 0.9999991767125777, iteration: 239216
loss: 1.0060096979141235,grad_norm: 0.9274280216027629, iteration: 239217
loss: 1.01207435131073,grad_norm: 0.79577508709149, iteration: 239218
loss: 1.0589491128921509,grad_norm: 0.9999991752379828, iteration: 239219
loss: 1.0919221639633179,grad_norm: 0.9999998507729893, iteration: 239220
loss: 0.9964576363563538,grad_norm: 0.8961829525336797, iteration: 239221
loss: 1.0278692245483398,grad_norm: 0.9999995653398118, iteration: 239222
loss: 1.0184868574142456,grad_norm: 0.999999215453158, iteration: 239223
loss: 1.0141501426696777,grad_norm: 0.9100730508150556, iteration: 239224
loss: 1.1635345220565796,grad_norm: 0.9999992834709087, iteration: 239225
loss: 1.1468182802200317,grad_norm: 0.999999829833679, iteration: 239226
loss: 1.0827935934066772,grad_norm: 0.9999993191775692, iteration: 239227
loss: 1.031401515007019,grad_norm: 0.9999995487429301, iteration: 239228
loss: 1.0207937955856323,grad_norm: 0.9999992194538371, iteration: 239229
loss: 1.0554388761520386,grad_norm: 0.9999998241584751, iteration: 239230
loss: 1.171547293663025,grad_norm: 0.9999999139743356, iteration: 239231
loss: 1.1830679178237915,grad_norm: 0.9999999539023787, iteration: 239232
loss: 1.0208934545516968,grad_norm: 0.9607660039462924, iteration: 239233
loss: 0.9968538284301758,grad_norm: 0.8236407487091356, iteration: 239234
loss: 1.0248491764068604,grad_norm: 0.9953312422187568, iteration: 239235
loss: 1.018051028251648,grad_norm: 0.9999996909320121, iteration: 239236
loss: 1.0452169179916382,grad_norm: 0.9160454349093327, iteration: 239237
loss: 0.9884154796600342,grad_norm: 0.8600984892570793, iteration: 239238
loss: 0.943077802658081,grad_norm: 0.9062963018313099, iteration: 239239
loss: 0.9946597814559937,grad_norm: 0.7924157239098093, iteration: 239240
loss: 0.9975540637969971,grad_norm: 0.9999996678617488, iteration: 239241
loss: 1.069326400756836,grad_norm: 0.9999993342863477, iteration: 239242
loss: 1.0463639497756958,grad_norm: 0.9790680280089361, iteration: 239243
loss: 1.049234390258789,grad_norm: 0.9999994055007848, iteration: 239244
loss: 1.1086889505386353,grad_norm: 0.9999997617884969, iteration: 239245
loss: 1.0055344104766846,grad_norm: 0.8470810757203586, iteration: 239246
loss: 1.0466147661209106,grad_norm: 0.999999507031023, iteration: 239247
loss: 1.0763007402420044,grad_norm: 0.9999993278149819, iteration: 239248
loss: 0.9964758157730103,grad_norm: 0.9999994362194694, iteration: 239249
loss: 0.968553900718689,grad_norm: 0.8679627591179286, iteration: 239250
loss: 1.0133479833602905,grad_norm: 0.9999994108449338, iteration: 239251
loss: 1.0016100406646729,grad_norm: 0.9386563825163019, iteration: 239252
loss: 0.9982976317405701,grad_norm: 0.804869479418991, iteration: 239253
loss: 1.1195164918899536,grad_norm: 0.9999997981380372, iteration: 239254
loss: 0.9901731014251709,grad_norm: 0.9999990630440778, iteration: 239255
loss: 0.9862512350082397,grad_norm: 0.9999991023251134, iteration: 239256
loss: 0.9746693968772888,grad_norm: 0.8456334466817007, iteration: 239257
loss: 1.0008448362350464,grad_norm: 0.9570356537504787, iteration: 239258
loss: 0.9949196577072144,grad_norm: 0.8290708228345605, iteration: 239259
loss: 0.9816886186599731,grad_norm: 0.902596280824389, iteration: 239260
loss: 0.9766473174095154,grad_norm: 0.8417845492014442, iteration: 239261
loss: 1.0045993328094482,grad_norm: 0.8714025634718366, iteration: 239262
loss: 1.1575286388397217,grad_norm: 0.9999999244283587, iteration: 239263
loss: 1.0038511753082275,grad_norm: 0.895202417444675, iteration: 239264
loss: 0.9987431168556213,grad_norm: 0.938843360696159, iteration: 239265
loss: 1.1436735391616821,grad_norm: 0.9999998418587489, iteration: 239266
loss: 1.0137887001037598,grad_norm: 0.9744984541005558, iteration: 239267
loss: 0.981924295425415,grad_norm: 0.70666729616153, iteration: 239268
loss: 1.0129706859588623,grad_norm: 0.6904981269364777, iteration: 239269
loss: 0.9731444716453552,grad_norm: 0.9999999043213585, iteration: 239270
loss: 1.0851194858551025,grad_norm: 0.9999994595763843, iteration: 239271
loss: 0.9705608487129211,grad_norm: 0.8245283396247981, iteration: 239272
loss: 1.0077495574951172,grad_norm: 0.9999996746300646, iteration: 239273
loss: 0.9991806149482727,grad_norm: 0.8914332281922598, iteration: 239274
loss: 0.9734007716178894,grad_norm: 0.9999991093286537, iteration: 239275
loss: 1.0071994066238403,grad_norm: 0.9007892725714511, iteration: 239276
loss: 1.066447138786316,grad_norm: 0.9829000712748807, iteration: 239277
loss: 1.0025235414505005,grad_norm: 0.9271730377109361, iteration: 239278
loss: 1.0631564855575562,grad_norm: 0.916152314113474, iteration: 239279
loss: 1.036686658859253,grad_norm: 0.9145012655207578, iteration: 239280
loss: 1.097488284111023,grad_norm: 0.9999996420783062, iteration: 239281
loss: 1.0061016082763672,grad_norm: 0.9423933322387972, iteration: 239282
loss: 1.0345498323440552,grad_norm: 0.9727235710913884, iteration: 239283
loss: 1.082876205444336,grad_norm: 0.9999992356658024, iteration: 239284
loss: 1.046716570854187,grad_norm: 0.9999998885770711, iteration: 239285
loss: 1.0002868175506592,grad_norm: 0.9999991320068321, iteration: 239286
loss: 0.9761548042297363,grad_norm: 0.9341055567972281, iteration: 239287
loss: 1.0214660167694092,grad_norm: 0.863187360697626, iteration: 239288
loss: 0.9641937017440796,grad_norm: 0.9332510499229965, iteration: 239289
loss: 0.98427414894104,grad_norm: 0.9809775323475727, iteration: 239290
loss: 1.0206876993179321,grad_norm: 0.8797351631098549, iteration: 239291
loss: 1.0148224830627441,grad_norm: 0.9999991756052595, iteration: 239292
loss: 1.1128326654434204,grad_norm: 0.9999997363013838, iteration: 239293
loss: 1.0053503513336182,grad_norm: 0.9999991200674556, iteration: 239294
loss: 0.9688657522201538,grad_norm: 0.7799095102358625, iteration: 239295
loss: 1.055508017539978,grad_norm: 0.999999757202289, iteration: 239296
loss: 0.950564980506897,grad_norm: 0.852198806956284, iteration: 239297
loss: 1.0328835248947144,grad_norm: 0.9680237469058812, iteration: 239298
loss: 1.0340996980667114,grad_norm: 0.8316280776291596, iteration: 239299
loss: 1.0202624797821045,grad_norm: 0.9999993105966076, iteration: 239300
loss: 0.9982674717903137,grad_norm: 0.9999991649270323, iteration: 239301
loss: 1.0641008615493774,grad_norm: 0.954428165465635, iteration: 239302
loss: 0.9600054025650024,grad_norm: 0.941273522811703, iteration: 239303
loss: 0.9425405263900757,grad_norm: 0.7525690509559155, iteration: 239304
loss: 1.003082275390625,grad_norm: 0.9999990986352773, iteration: 239305
loss: 1.0118522644042969,grad_norm: 0.8183656131366045, iteration: 239306
loss: 1.0612454414367676,grad_norm: 0.9999991092321929, iteration: 239307
loss: 1.0238986015319824,grad_norm: 0.8963366158191166, iteration: 239308
loss: 0.9918743968009949,grad_norm: 0.9999992036004605, iteration: 239309
loss: 1.0100620985031128,grad_norm: 0.9999991444459878, iteration: 239310
loss: 1.0843322277069092,grad_norm: 0.9999999861911645, iteration: 239311
loss: 1.008503794670105,grad_norm: 0.8775562748266803, iteration: 239312
loss: 1.0113840103149414,grad_norm: 0.8432684133424253, iteration: 239313
loss: 1.023261547088623,grad_norm: 0.9999992475716832, iteration: 239314
loss: 1.0395407676696777,grad_norm: 0.8374065726153165, iteration: 239315
loss: 0.982912003993988,grad_norm: 0.9270987514999444, iteration: 239316
loss: 1.023893117904663,grad_norm: 0.9999989326517431, iteration: 239317
loss: 1.0109740495681763,grad_norm: 0.8980154863270432, iteration: 239318
loss: 1.0412291288375854,grad_norm: 0.962956346548677, iteration: 239319
loss: 1.0002232789993286,grad_norm: 0.9382422700611265, iteration: 239320
loss: 0.9792098999023438,grad_norm: 0.9999990004723686, iteration: 239321
loss: 0.9889020323753357,grad_norm: 0.8102227477182918, iteration: 239322
loss: 1.0032535791397095,grad_norm: 0.9638622162391675, iteration: 239323
loss: 1.0753740072250366,grad_norm: 0.9999994204028706, iteration: 239324
loss: 0.9568673372268677,grad_norm: 0.7972416655447727, iteration: 239325
loss: 0.954430341720581,grad_norm: 0.9365502344717531, iteration: 239326
loss: 1.0895806550979614,grad_norm: 0.9999991535684534, iteration: 239327
loss: 0.996587336063385,grad_norm: 0.8399266670123545, iteration: 239328
loss: 0.9911482334136963,grad_norm: 0.999999270443078, iteration: 239329
loss: 0.9954608082771301,grad_norm: 0.999999024258002, iteration: 239330
loss: 0.9949446320533752,grad_norm: 0.9210016578783544, iteration: 239331
loss: 0.9862072467803955,grad_norm: 0.7696129106082286, iteration: 239332
loss: 1.0681623220443726,grad_norm: 0.9999992245487951, iteration: 239333
loss: 1.0109525918960571,grad_norm: 0.9999991243815207, iteration: 239334
loss: 1.0040615797042847,grad_norm: 0.9402670561122576, iteration: 239335
loss: 0.9272891283035278,grad_norm: 0.9767658506483169, iteration: 239336
loss: 0.9761322140693665,grad_norm: 0.9839040726346977, iteration: 239337
loss: 1.002051591873169,grad_norm: 0.9999993633761284, iteration: 239338
loss: 1.0217463970184326,grad_norm: 0.9999991078161692, iteration: 239339
loss: 1.0080151557922363,grad_norm: 0.9414434729362462, iteration: 239340
loss: 1.0688029527664185,grad_norm: 0.9740802053257428, iteration: 239341
loss: 1.0270099639892578,grad_norm: 0.9999993208180045, iteration: 239342
loss: 0.9816511869430542,grad_norm: 0.9676989963970637, iteration: 239343
loss: 1.045139193534851,grad_norm: 0.9999989517372422, iteration: 239344
loss: 0.9586856961250305,grad_norm: 0.8403155418080318, iteration: 239345
loss: 1.0076875686645508,grad_norm: 0.9702219113992265, iteration: 239346
loss: 1.014500617980957,grad_norm: 0.9502055688049551, iteration: 239347
loss: 0.982444167137146,grad_norm: 0.9999990819431201, iteration: 239348
loss: 0.9554566144943237,grad_norm: 0.9999990609013866, iteration: 239349
loss: 1.0242687463760376,grad_norm: 0.9298510272710127, iteration: 239350
loss: 0.9968823790550232,grad_norm: 0.8788310926716983, iteration: 239351
loss: 0.9948150515556335,grad_norm: 0.9714956486892921, iteration: 239352
loss: 1.0064688920974731,grad_norm: 0.8468683714963372, iteration: 239353
loss: 0.958528459072113,grad_norm: 0.9004960783412742, iteration: 239354
loss: 1.0103739500045776,grad_norm: 0.8700779765687251, iteration: 239355
loss: 0.9955492615699768,grad_norm: 0.9050720106059978, iteration: 239356
loss: 0.9968167543411255,grad_norm: 0.9701146091538527, iteration: 239357
loss: 1.0169886350631714,grad_norm: 0.8458630832960575, iteration: 239358
loss: 0.9799275994300842,grad_norm: 0.9999991757394882, iteration: 239359
loss: 0.9935671091079712,grad_norm: 0.9841258819784326, iteration: 239360
loss: 0.9866633415222168,grad_norm: 0.9999990157076915, iteration: 239361
loss: 1.2490237951278687,grad_norm: 0.9999994853289517, iteration: 239362
loss: 1.0736833810806274,grad_norm: 0.9999992273052695, iteration: 239363
loss: 1.0136207342147827,grad_norm: 0.9628063840697122, iteration: 239364
loss: 1.0159592628479004,grad_norm: 0.9345710438111322, iteration: 239365
loss: 0.9947039484977722,grad_norm: 0.8635230847410381, iteration: 239366
loss: 0.9916600584983826,grad_norm: 0.985948727472987, iteration: 239367
loss: 1.046413540840149,grad_norm: 0.9999994426589534, iteration: 239368
loss: 0.990180253982544,grad_norm: 0.9846772524299365, iteration: 239369
loss: 0.973368763923645,grad_norm: 0.8369286334144832, iteration: 239370
loss: 0.997672975063324,grad_norm: 0.8406001106689358, iteration: 239371
loss: 1.038512110710144,grad_norm: 0.9999995139179415, iteration: 239372
loss: 1.0101368427276611,grad_norm: 0.8652371874493109, iteration: 239373
loss: 1.007896900177002,grad_norm: 0.9113221075710916, iteration: 239374
loss: 1.0178050994873047,grad_norm: 0.8615816880055617, iteration: 239375
loss: 0.9726338386535645,grad_norm: 0.7593572974657227, iteration: 239376
loss: 1.0286908149719238,grad_norm: 0.9999991984934188, iteration: 239377
loss: 0.9996809363365173,grad_norm: 0.8274426745568113, iteration: 239378
loss: 0.9795641899108887,grad_norm: 0.8652954730867534, iteration: 239379
loss: 0.9842971563339233,grad_norm: 0.9770929730952205, iteration: 239380
loss: 0.9841123819351196,grad_norm: 0.9999992550675513, iteration: 239381
loss: 1.0184605121612549,grad_norm: 0.9446590450008142, iteration: 239382
loss: 0.9762683510780334,grad_norm: 0.9999991431940047, iteration: 239383
loss: 1.006118893623352,grad_norm: 0.8424655390547227, iteration: 239384
loss: 1.006146788597107,grad_norm: 0.9999994804107323, iteration: 239385
loss: 0.9826840758323669,grad_norm: 0.9999996969380589, iteration: 239386
loss: 1.0070924758911133,grad_norm: 0.9089117893492547, iteration: 239387
loss: 1.0370091199874878,grad_norm: 0.9999989958484259, iteration: 239388
loss: 0.9840527176856995,grad_norm: 0.9999990929395209, iteration: 239389
loss: 1.0000861883163452,grad_norm: 0.8736447986237442, iteration: 239390
loss: 0.9957228899002075,grad_norm: 0.7605092329370028, iteration: 239391
loss: 1.0827808380126953,grad_norm: 0.9999998859335311, iteration: 239392
loss: 0.9914340972900391,grad_norm: 0.8727705979303889, iteration: 239393
loss: 0.9991249442100525,grad_norm: 0.9999991947864226, iteration: 239394
loss: 0.9939540028572083,grad_norm: 0.889787347299296, iteration: 239395
loss: 1.027927279472351,grad_norm: 0.941384154183207, iteration: 239396
loss: 1.01455557346344,grad_norm: 0.9299286562373755, iteration: 239397
loss: 0.9905017614364624,grad_norm: 0.8603033562075146, iteration: 239398
loss: 0.984442412853241,grad_norm: 0.9142975080568218, iteration: 239399
loss: 0.9857605695724487,grad_norm: 0.7672063137340331, iteration: 239400
loss: 1.0257205963134766,grad_norm: 0.9999998130368457, iteration: 239401
loss: 0.9928224086761475,grad_norm: 0.9999998570158274, iteration: 239402
loss: 0.9937602877616882,grad_norm: 0.9999994288864981, iteration: 239403
loss: 0.9774773716926575,grad_norm: 0.8471585296325739, iteration: 239404
loss: 1.0252575874328613,grad_norm: 0.9999989314692992, iteration: 239405
loss: 1.0357966423034668,grad_norm: 0.9476243032702067, iteration: 239406
loss: 1.031511664390564,grad_norm: 0.9999994104622179, iteration: 239407
loss: 1.0555260181427002,grad_norm: 0.9676879125651728, iteration: 239408
loss: 1.0707002878189087,grad_norm: 0.9999997724233214, iteration: 239409
loss: 0.9689103364944458,grad_norm: 0.911606266082021, iteration: 239410
loss: 0.9918916821479797,grad_norm: 0.859627204693283, iteration: 239411
loss: 0.9709479212760925,grad_norm: 0.8595874278813271, iteration: 239412
loss: 0.9941607713699341,grad_norm: 0.9999991592159105, iteration: 239413
loss: 0.9946814775466919,grad_norm: 0.999999300661686, iteration: 239414
loss: 1.0567153692245483,grad_norm: 0.8458337996364049, iteration: 239415
loss: 1.0368529558181763,grad_norm: 0.9999991876721778, iteration: 239416
loss: 0.9657850861549377,grad_norm: 0.9999990777498932, iteration: 239417
loss: 1.0292284488677979,grad_norm: 0.8625817684213268, iteration: 239418
loss: 0.9859642386436462,grad_norm: 0.8970442321177732, iteration: 239419
loss: 1.0411765575408936,grad_norm: 0.9999994597280742, iteration: 239420
loss: 0.9433519244194031,grad_norm: 0.8657314611573125, iteration: 239421
loss: 1.0299750566482544,grad_norm: 0.8794411662300412, iteration: 239422
loss: 1.0008760690689087,grad_norm: 0.9999991499789878, iteration: 239423
loss: 0.9487484097480774,grad_norm: 0.9275280866315877, iteration: 239424
loss: 1.0236464738845825,grad_norm: 0.9179975281875676, iteration: 239425
loss: 0.9771340489387512,grad_norm: 0.9322104479181134, iteration: 239426
loss: 0.9997560977935791,grad_norm: 0.7874055427157874, iteration: 239427
loss: 0.9722052812576294,grad_norm: 0.94644370102081, iteration: 239428
loss: 0.9813071489334106,grad_norm: 0.9999989772220821, iteration: 239429
loss: 1.0082170963287354,grad_norm: 0.9820975523683575, iteration: 239430
loss: 0.9779021143913269,grad_norm: 0.999999089613174, iteration: 239431
loss: 1.12434983253479,grad_norm: 0.9999996759758352, iteration: 239432
loss: 1.0266427993774414,grad_norm: 0.9727743148375034, iteration: 239433
loss: 1.0020256042480469,grad_norm: 0.8455469308379437, iteration: 239434
loss: 0.9997772574424744,grad_norm: 0.9109562118597583, iteration: 239435
loss: 1.017519474029541,grad_norm: 0.9999992763847891, iteration: 239436
loss: 1.0010805130004883,grad_norm: 0.9359897120314428, iteration: 239437
loss: 1.0508729219436646,grad_norm: 0.8228829646116947, iteration: 239438
loss: 1.0020893812179565,grad_norm: 0.9347736303308016, iteration: 239439
loss: 1.0167162418365479,grad_norm: 0.9999990431335598, iteration: 239440
loss: 0.9900538921356201,grad_norm: 0.8923233453411117, iteration: 239441
loss: 1.0331993103027344,grad_norm: 0.7738198012847559, iteration: 239442
loss: 1.0068167448043823,grad_norm: 0.9999992600223537, iteration: 239443
loss: 1.021500587463379,grad_norm: 0.9398325208713619, iteration: 239444
loss: 1.0189120769500732,grad_norm: 0.9999990378283797, iteration: 239445
loss: 1.0601342916488647,grad_norm: 0.9914124164450296, iteration: 239446
loss: 0.9921059012413025,grad_norm: 0.929287885299085, iteration: 239447
loss: 0.9622743129730225,grad_norm: 0.9434417587483177, iteration: 239448
loss: 0.9731504321098328,grad_norm: 0.9999991421488026, iteration: 239449
loss: 0.9740021228790283,grad_norm: 0.9999991480966708, iteration: 239450
loss: 1.0131652355194092,grad_norm: 0.7376867466465659, iteration: 239451
loss: 0.9891389608383179,grad_norm: 0.9356111186660816, iteration: 239452
loss: 0.9990429282188416,grad_norm: 0.8995612011176994, iteration: 239453
loss: 0.9869361519813538,grad_norm: 0.8470622831315083, iteration: 239454
loss: 0.9939188957214355,grad_norm: 0.9999992203733293, iteration: 239455
loss: 1.0357120037078857,grad_norm: 0.9999990477408329, iteration: 239456
loss: 0.9418039321899414,grad_norm: 0.9999990885967741, iteration: 239457
loss: 0.9968470931053162,grad_norm: 0.8851331730678982, iteration: 239458
loss: 1.0023616552352905,grad_norm: 0.9560579743469069, iteration: 239459
loss: 1.0411574840545654,grad_norm: 0.9999993190208453, iteration: 239460
loss: 1.0068998336791992,grad_norm: 0.8590249506591692, iteration: 239461
loss: 0.9991490244865417,grad_norm: 0.8809985106406806, iteration: 239462
loss: 1.0070754289627075,grad_norm: 0.8901226926839568, iteration: 239463
loss: 0.9824181199073792,grad_norm: 0.9531089286320892, iteration: 239464
loss: 1.0710642337799072,grad_norm: 0.9999992236024283, iteration: 239465
loss: 1.039307713508606,grad_norm: 0.9999991419424044, iteration: 239466
loss: 1.0333646535873413,grad_norm: 0.9999996505040162, iteration: 239467
loss: 1.0435376167297363,grad_norm: 0.8871502048419487, iteration: 239468
loss: 0.9730982780456543,grad_norm: 0.9096719564326033, iteration: 239469
loss: 1.0645815134048462,grad_norm: 0.999999997162439, iteration: 239470
loss: 1.0017188787460327,grad_norm: 0.958319573703075, iteration: 239471
loss: 0.9822792410850525,grad_norm: 0.9999992556094972, iteration: 239472
loss: 0.9821829795837402,grad_norm: 0.8458848795775591, iteration: 239473
loss: 1.0121119022369385,grad_norm: 0.7293021132388057, iteration: 239474
loss: 1.0063605308532715,grad_norm: 0.9645096670190345, iteration: 239475
loss: 1.0024546384811401,grad_norm: 0.9192644806388972, iteration: 239476
loss: 0.9973963499069214,grad_norm: 0.7429730012692505, iteration: 239477
loss: 0.9761251211166382,grad_norm: 0.8949082420896378, iteration: 239478
loss: 1.0043584108352661,grad_norm: 0.9082798255041136, iteration: 239479
loss: 1.0013397932052612,grad_norm: 0.8263674731892857, iteration: 239480
loss: 0.999362587928772,grad_norm: 0.9150417640233589, iteration: 239481
loss: 1.0365184545516968,grad_norm: 0.9506414596252274, iteration: 239482
loss: 1.0124679803848267,grad_norm: 0.9999992240437414, iteration: 239483
loss: 1.0516276359558105,grad_norm: 0.9999991420327495, iteration: 239484
loss: 1.0384745597839355,grad_norm: 0.9004251210209644, iteration: 239485
loss: 0.9850958585739136,grad_norm: 0.9999991755267036, iteration: 239486
loss: 0.996273934841156,grad_norm: 0.8405945427827987, iteration: 239487
loss: 0.9490851759910583,grad_norm: 0.9033078584929335, iteration: 239488
loss: 0.9940000772476196,grad_norm: 0.9355468005486101, iteration: 239489
loss: 1.0141867399215698,grad_norm: 0.7512626597505879, iteration: 239490
loss: 0.9764580726623535,grad_norm: 0.9780229659417199, iteration: 239491
loss: 0.9672650098800659,grad_norm: 0.9065103856765417, iteration: 239492
loss: 0.9825454950332642,grad_norm: 0.9999991150291305, iteration: 239493
loss: 0.9927219748497009,grad_norm: 0.7696262318017076, iteration: 239494
loss: 0.9714215397834778,grad_norm: 0.9999992067259726, iteration: 239495
loss: 0.9868837594985962,grad_norm: 0.823823237772476, iteration: 239496
loss: 0.9796206951141357,grad_norm: 0.8475573196101301, iteration: 239497
loss: 0.9932404160499573,grad_norm: 0.9999990357505243, iteration: 239498
loss: 1.020923376083374,grad_norm: 0.6945881193967988, iteration: 239499
loss: 1.0944350957870483,grad_norm: 0.9999994655418102, iteration: 239500
loss: 0.9684352874755859,grad_norm: 0.9426304292679899, iteration: 239501
loss: 1.0815236568450928,grad_norm: 0.9999990695498374, iteration: 239502
loss: 1.014332890510559,grad_norm: 0.9999992432799123, iteration: 239503
loss: 1.0409553050994873,grad_norm: 0.9348232868596191, iteration: 239504
loss: 1.030388593673706,grad_norm: 0.9963257596342021, iteration: 239505
loss: 0.9792419672012329,grad_norm: 0.9732318007914768, iteration: 239506
loss: 0.9772562980651855,grad_norm: 0.8306663843316934, iteration: 239507
loss: 0.9593758583068848,grad_norm: 0.9999990711438957, iteration: 239508
loss: 0.9843000173568726,grad_norm: 0.9999999598651934, iteration: 239509
loss: 0.9902060031890869,grad_norm: 0.9999990154214936, iteration: 239510
loss: 0.9857631921768188,grad_norm: 0.9999990805982486, iteration: 239511
loss: 1.0043079853057861,grad_norm: 0.9120336595264694, iteration: 239512
loss: 0.9671235680580139,grad_norm: 0.9758695227074629, iteration: 239513
loss: 0.9716415405273438,grad_norm: 0.9082222436166059, iteration: 239514
loss: 1.0648964643478394,grad_norm: 0.999999479950382, iteration: 239515
loss: 1.0065324306488037,grad_norm: 0.9292459274299344, iteration: 239516
loss: 0.9878202676773071,grad_norm: 0.9999990064400759, iteration: 239517
loss: 1.0158361196517944,grad_norm: 0.9999993570372415, iteration: 239518
loss: 1.0555264949798584,grad_norm: 0.9999998006564651, iteration: 239519
loss: 1.0195949077606201,grad_norm: 0.999999147111288, iteration: 239520
loss: 0.97943514585495,grad_norm: 0.899420093368625, iteration: 239521
loss: 1.013181447982788,grad_norm: 0.999999054998445, iteration: 239522
loss: 1.0370135307312012,grad_norm: 0.9999997776817046, iteration: 239523
loss: 0.9834288954734802,grad_norm: 0.9999996107408422, iteration: 239524
loss: 0.9655252695083618,grad_norm: 0.7243509729602755, iteration: 239525
loss: 0.9567944407463074,grad_norm: 0.9193962915412663, iteration: 239526
loss: 1.0038710832595825,grad_norm: 0.9859754159310803, iteration: 239527
loss: 0.9669464826583862,grad_norm: 0.8794893553456328, iteration: 239528
loss: 0.9995204210281372,grad_norm: 0.9447706782991994, iteration: 239529
loss: 1.003955602645874,grad_norm: 0.8709558407384168, iteration: 239530
loss: 0.9988288879394531,grad_norm: 0.8590384982997621, iteration: 239531
loss: 1.0161428451538086,grad_norm: 0.9999993858777045, iteration: 239532
loss: 1.0292896032333374,grad_norm: 0.8741839222189164, iteration: 239533
loss: 0.9780876040458679,grad_norm: 0.9999990537980098, iteration: 239534
loss: 0.9919354915618896,grad_norm: 0.9528459129446204, iteration: 239535
loss: 1.0079345703125,grad_norm: 0.9999992533475031, iteration: 239536
loss: 0.9609113931655884,grad_norm: 0.8608162104146732, iteration: 239537
loss: 1.0007169246673584,grad_norm: 0.8628072062427554, iteration: 239538
loss: 1.0189692974090576,grad_norm: 0.7636136391719903, iteration: 239539
loss: 0.9906413555145264,grad_norm: 0.8619990806535954, iteration: 239540
loss: 1.0284366607666016,grad_norm: 0.999999313671411, iteration: 239541
loss: 1.019787311553955,grad_norm: 0.8196180560853384, iteration: 239542
loss: 0.9925860166549683,grad_norm: 0.8786710695672417, iteration: 239543
loss: 0.9715876579284668,grad_norm: 0.9590716316734184, iteration: 239544
loss: 1.0169334411621094,grad_norm: 0.9999990403720418, iteration: 239545
loss: 1.0117442607879639,grad_norm: 0.9129346864121827, iteration: 239546
loss: 1.011684536933899,grad_norm: 0.8939971597529602, iteration: 239547
loss: 0.9804717898368835,grad_norm: 0.8688342265669166, iteration: 239548
loss: 1.052310585975647,grad_norm: 0.9999997787931327, iteration: 239549
loss: 1.0044914484024048,grad_norm: 0.8688899156010572, iteration: 239550
loss: 1.135439157485962,grad_norm: 0.9999995431878971, iteration: 239551
loss: 0.9984739422798157,grad_norm: 0.9999991468238659, iteration: 239552
loss: 1.0200477838516235,grad_norm: 0.8309758566304795, iteration: 239553
loss: 1.0065909624099731,grad_norm: 0.9251012327993626, iteration: 239554
loss: 1.018376350402832,grad_norm: 0.8023160040784459, iteration: 239555
loss: 1.014905333518982,grad_norm: 0.9999990647776372, iteration: 239556
loss: 1.0014255046844482,grad_norm: 0.8878275010109191, iteration: 239557
loss: 1.0959913730621338,grad_norm: 0.9999990452114389, iteration: 239558
loss: 1.031306505203247,grad_norm: 0.9999993368355606, iteration: 239559
loss: 0.9967579245567322,grad_norm: 0.9999991822124041, iteration: 239560
loss: 0.9440405964851379,grad_norm: 0.9999994224658582, iteration: 239561
loss: 0.9874568581581116,grad_norm: 0.9307903994685877, iteration: 239562
loss: 1.0164905786514282,grad_norm: 0.9183230983531818, iteration: 239563
loss: 0.9733213782310486,grad_norm: 0.7640860158289958, iteration: 239564
loss: 0.9915645718574524,grad_norm: 0.7431641739194463, iteration: 239565
loss: 0.9919925928115845,grad_norm: 0.8359867884756282, iteration: 239566
loss: 0.9968708753585815,grad_norm: 0.8702506238543627, iteration: 239567
loss: 0.9761344194412231,grad_norm: 0.9160167981232719, iteration: 239568
loss: 1.0020726919174194,grad_norm: 0.9999991251058616, iteration: 239569
loss: 1.0342843532562256,grad_norm: 0.8122539190016358, iteration: 239570
loss: 1.0082488059997559,grad_norm: 0.9999990428092064, iteration: 239571
loss: 1.0276973247528076,grad_norm: 0.9999990839117451, iteration: 239572
loss: 1.0731110572814941,grad_norm: 0.9999998429813397, iteration: 239573
loss: 1.0034584999084473,grad_norm: 0.9999990559266848, iteration: 239574
loss: 1.0145350694656372,grad_norm: 0.9999992329088456, iteration: 239575
loss: 0.9920482635498047,grad_norm: 0.9999995906455992, iteration: 239576
loss: 1.0217872858047485,grad_norm: 0.9999996887674854, iteration: 239577
loss: 0.9750321507453918,grad_norm: 0.8783380410751076, iteration: 239578
loss: 0.9946963787078857,grad_norm: 0.9734138627310633, iteration: 239579
loss: 0.9899490475654602,grad_norm: 0.9669643038363334, iteration: 239580
loss: 1.0398720502853394,grad_norm: 0.9999991778395239, iteration: 239581
loss: 0.9931885004043579,grad_norm: 0.9486607313406469, iteration: 239582
loss: 0.9894582629203796,grad_norm: 0.9999991459613943, iteration: 239583
loss: 1.011228084564209,grad_norm: 0.8400434545866989, iteration: 239584
loss: 0.9913459420204163,grad_norm: 0.9405649124466962, iteration: 239585
loss: 0.9930186867713928,grad_norm: 0.9875625078853855, iteration: 239586
loss: 1.0092582702636719,grad_norm: 0.9273734618121454, iteration: 239587
loss: 0.9722263813018799,grad_norm: 0.979244113065948, iteration: 239588
loss: 1.0211384296417236,grad_norm: 0.679030062705143, iteration: 239589
loss: 0.9949779510498047,grad_norm: 0.9883658586286909, iteration: 239590
loss: 1.0254980325698853,grad_norm: 0.9514691571985544, iteration: 239591
loss: 0.9795993566513062,grad_norm: 0.9528899050450285, iteration: 239592
loss: 1.000095009803772,grad_norm: 0.9288125644609543, iteration: 239593
loss: 1.0449986457824707,grad_norm: 0.8570536485045995, iteration: 239594
loss: 1.0126792192459106,grad_norm: 0.8437705769173179, iteration: 239595
loss: 1.079531192779541,grad_norm: 0.9999993227746953, iteration: 239596
loss: 0.9786588549613953,grad_norm: 0.9565154948659422, iteration: 239597
loss: 1.0689653158187866,grad_norm: 0.9999995888534522, iteration: 239598
loss: 0.9644091725349426,grad_norm: 0.956787238757235, iteration: 239599
loss: 0.9882152080535889,grad_norm: 0.9999993231641428, iteration: 239600
loss: 1.024864673614502,grad_norm: 0.9999991271736657, iteration: 239601
loss: 1.038900375366211,grad_norm: 0.8576174508919722, iteration: 239602
loss: 1.1179652214050293,grad_norm: 0.9999995352582668, iteration: 239603
loss: 1.004731297492981,grad_norm: 0.8060589243906403, iteration: 239604
loss: 1.0122958421707153,grad_norm: 0.9898181480776567, iteration: 239605
loss: 1.006420373916626,grad_norm: 0.8202166362898621, iteration: 239606
loss: 1.0069410800933838,grad_norm: 0.885663717769097, iteration: 239607
loss: 1.0551334619522095,grad_norm: 0.9966350461521849, iteration: 239608
loss: 1.044061541557312,grad_norm: 0.8548426707513345, iteration: 239609
loss: 0.9848220348358154,grad_norm: 0.7719080606271759, iteration: 239610
loss: 1.0213465690612793,grad_norm: 0.9999990703369238, iteration: 239611
loss: 1.0086665153503418,grad_norm: 0.9999990943673, iteration: 239612
loss: 1.0003713369369507,grad_norm: 0.7734633358161872, iteration: 239613
loss: 1.033574104309082,grad_norm: 0.9999989751099629, iteration: 239614
loss: 1.0243288278579712,grad_norm: 0.798914624713477, iteration: 239615
loss: 0.984805166721344,grad_norm: 0.9275590656618311, iteration: 239616
loss: 1.0023835897445679,grad_norm: 0.9999991850704805, iteration: 239617
loss: 0.968920111656189,grad_norm: 0.9961556208025879, iteration: 239618
loss: 0.9643729329109192,grad_norm: 0.881126090809061, iteration: 239619
loss: 1.0216639041900635,grad_norm: 0.8411650169141014, iteration: 239620
loss: 1.0054723024368286,grad_norm: 0.9999995720163957, iteration: 239621
loss: 1.0731865167617798,grad_norm: 0.9999990697240205, iteration: 239622
loss: 1.0036289691925049,grad_norm: 0.9213742973297094, iteration: 239623
loss: 0.9824726581573486,grad_norm: 0.9999990822393074, iteration: 239624
loss: 1.0090421438217163,grad_norm: 0.878560637024194, iteration: 239625
loss: 0.9647778272628784,grad_norm: 0.9999995272858243, iteration: 239626
loss: 1.026652455329895,grad_norm: 0.7260096435173545, iteration: 239627
loss: 1.0470281839370728,grad_norm: 0.9999993993992615, iteration: 239628
loss: 0.9765446186065674,grad_norm: 0.8119956875534425, iteration: 239629
loss: 1.0215003490447998,grad_norm: 0.961376612829547, iteration: 239630
loss: 1.0442571640014648,grad_norm: 0.8496283121027323, iteration: 239631
loss: 1.005664587020874,grad_norm: 0.8912429481544455, iteration: 239632
loss: 1.0072633028030396,grad_norm: 0.8344596767802532, iteration: 239633
loss: 1.0304110050201416,grad_norm: 0.9999992809730246, iteration: 239634
loss: 1.014599323272705,grad_norm: 0.8644510988009869, iteration: 239635
loss: 1.0825976133346558,grad_norm: 1.0000000499662958, iteration: 239636
loss: 0.9902659058570862,grad_norm: 0.8381988416546331, iteration: 239637
loss: 0.9782082438468933,grad_norm: 0.8783772350536745, iteration: 239638
loss: 0.9892497062683105,grad_norm: 0.9999991715164588, iteration: 239639
loss: 1.0111111402511597,grad_norm: 0.7610870113763789, iteration: 239640
loss: 0.9687628746032715,grad_norm: 0.869283540702754, iteration: 239641
loss: 1.0236313343048096,grad_norm: 0.9999991523953647, iteration: 239642
loss: 1.0251740217208862,grad_norm: 0.9999994785112899, iteration: 239643
loss: 1.0307148694992065,grad_norm: 0.8511825885998876, iteration: 239644
loss: 1.0024666786193848,grad_norm: 0.9455559987808805, iteration: 239645
loss: 1.0602869987487793,grad_norm: 1.0000000421677935, iteration: 239646
loss: 1.0118919610977173,grad_norm: 0.9999993425521243, iteration: 239647
loss: 0.9877289533615112,grad_norm: 0.9999991321651833, iteration: 239648
loss: 0.9857614636421204,grad_norm: 0.9999989421040594, iteration: 239649
loss: 1.0056841373443604,grad_norm: 0.9200942417756366, iteration: 239650
loss: 1.0109597444534302,grad_norm: 0.9999994399932015, iteration: 239651
loss: 1.0355148315429688,grad_norm: 0.9999991915347953, iteration: 239652
loss: 1.0696237087249756,grad_norm: 0.9999991791783254, iteration: 239653
loss: 1.0857757329940796,grad_norm: 0.9999998553055109, iteration: 239654
loss: 1.0119731426239014,grad_norm: 0.8335506010948752, iteration: 239655
loss: 1.0192017555236816,grad_norm: 0.8613147815102055, iteration: 239656
loss: 1.1257314682006836,grad_norm: 0.9999996080158967, iteration: 239657
loss: 1.001781702041626,grad_norm: 0.9408192385449611, iteration: 239658
loss: 1.0239696502685547,grad_norm: 0.9254861660478332, iteration: 239659
loss: 1.0131810903549194,grad_norm: 0.8167061937916351, iteration: 239660
loss: 0.9756085276603699,grad_norm: 0.8875310593316491, iteration: 239661
loss: 1.0000579357147217,grad_norm: 0.9999991093794827, iteration: 239662
loss: 0.9830731153488159,grad_norm: 0.9999991301231156, iteration: 239663
loss: 1.0067275762557983,grad_norm: 0.9999996469493387, iteration: 239664
loss: 1.007533311843872,grad_norm: 0.9999991378481223, iteration: 239665
loss: 0.9952834844589233,grad_norm: 0.9810106703498795, iteration: 239666
loss: 1.031443476676941,grad_norm: 0.9999991138794658, iteration: 239667
loss: 0.9765796661376953,grad_norm: 0.9195656869210862, iteration: 239668
loss: 1.0008219480514526,grad_norm: 0.999999513913127, iteration: 239669
loss: 0.9607442617416382,grad_norm: 0.7536499430873094, iteration: 239670
loss: 0.9849381446838379,grad_norm: 0.8089497130968841, iteration: 239671
loss: 1.0109889507293701,grad_norm: 0.9999991227914287, iteration: 239672
loss: 1.022189736366272,grad_norm: 0.9999991169296458, iteration: 239673
loss: 1.0195220708847046,grad_norm: 0.892796275941347, iteration: 239674
loss: 1.0234242677688599,grad_norm: 0.9999996769319331, iteration: 239675
loss: 1.0333372354507446,grad_norm: 0.9999991399740854, iteration: 239676
loss: 1.0537643432617188,grad_norm: 0.9999995391017694, iteration: 239677
loss: 1.0121079683303833,grad_norm: 0.9808242112364437, iteration: 239678
loss: 0.9956041574478149,grad_norm: 0.7929248714078815, iteration: 239679
loss: 1.0319607257843018,grad_norm: 0.9283116244035557, iteration: 239680
loss: 0.990872859954834,grad_norm: 0.8312932229300097, iteration: 239681
loss: 1.00236177444458,grad_norm: 0.9999990430560973, iteration: 239682
loss: 0.9975213408470154,grad_norm: 0.7803973939193957, iteration: 239683
loss: 1.0126054286956787,grad_norm: 0.9151204074899878, iteration: 239684
loss: 1.0282061100006104,grad_norm: 0.9999994819974125, iteration: 239685
loss: 1.010266900062561,grad_norm: 0.7591219473021151, iteration: 239686
loss: 0.9903380870819092,grad_norm: 0.9569489279425876, iteration: 239687
loss: 1.0685070753097534,grad_norm: 0.9999991336960338, iteration: 239688
loss: 0.9859999418258667,grad_norm: 0.9999995433938607, iteration: 239689
loss: 0.9849628806114197,grad_norm: 0.8654184837644477, iteration: 239690
loss: 1.009981393814087,grad_norm: 0.9999998347210763, iteration: 239691
loss: 1.003626823425293,grad_norm: 0.8147729845068079, iteration: 239692
loss: 0.9708220958709717,grad_norm: 0.8053414850410903, iteration: 239693
loss: 0.980108916759491,grad_norm: 0.946073182274161, iteration: 239694
loss: 0.9570926427841187,grad_norm: 0.9606032704222659, iteration: 239695
loss: 0.9979715347290039,grad_norm: 0.9374953166038005, iteration: 239696
loss: 0.9723037481307983,grad_norm: 0.9018242088595498, iteration: 239697
loss: 1.005603313446045,grad_norm: 0.9999991542435988, iteration: 239698
loss: 1.0063719749450684,grad_norm: 0.9638864534644083, iteration: 239699
loss: 0.9766808748245239,grad_norm: 0.8800238375536076, iteration: 239700
loss: 1.0303691625595093,grad_norm: 0.9144688986094994, iteration: 239701
loss: 1.0072941780090332,grad_norm: 0.9999993025477849, iteration: 239702
loss: 1.0382733345031738,grad_norm: 0.9999990930561241, iteration: 239703
loss: 0.9859384298324585,grad_norm: 0.9629182658864566, iteration: 239704
loss: 1.0250244140625,grad_norm: 0.9999990639734404, iteration: 239705
loss: 1.0123924016952515,grad_norm: 0.7527265689680374, iteration: 239706
loss: 0.9558851718902588,grad_norm: 0.9036176285687526, iteration: 239707
loss: 1.0073344707489014,grad_norm: 0.9999993220489146, iteration: 239708
loss: 0.9609624147415161,grad_norm: 0.9999992163020038, iteration: 239709
loss: 1.0316287279129028,grad_norm: 0.999999305463166, iteration: 239710
loss: 1.0011937618255615,grad_norm: 0.9007247745103097, iteration: 239711
loss: 0.9960114359855652,grad_norm: 0.9353376998456595, iteration: 239712
loss: 1.037463903427124,grad_norm: 0.999999280903219, iteration: 239713
loss: 0.991787850856781,grad_norm: 0.7767961647254682, iteration: 239714
loss: 0.9612499475479126,grad_norm: 0.8390014167731329, iteration: 239715
loss: 1.032874345779419,grad_norm: 0.9999993828509264, iteration: 239716
loss: 1.136582851409912,grad_norm: 0.9999996572481061, iteration: 239717
loss: 1.0138118267059326,grad_norm: 0.9999989312871176, iteration: 239718
loss: 0.9952943325042725,grad_norm: 0.9018316491114857, iteration: 239719
loss: 1.0131782293319702,grad_norm: 0.8979826088609357, iteration: 239720
loss: 1.0285569429397583,grad_norm: 0.929901631991413, iteration: 239721
loss: 1.011869192123413,grad_norm: 0.9999990565610346, iteration: 239722
loss: 1.0301316976547241,grad_norm: 0.9999997077091045, iteration: 239723
loss: 0.9625435471534729,grad_norm: 0.9703654668209812, iteration: 239724
loss: 1.0019621849060059,grad_norm: 0.8847763535575758, iteration: 239725
loss: 0.9841668605804443,grad_norm: 0.9999991522232865, iteration: 239726
loss: 0.9981911182403564,grad_norm: 0.9999991606204817, iteration: 239727
loss: 1.021475076675415,grad_norm: 0.9999990743244269, iteration: 239728
loss: 1.0123902559280396,grad_norm: 0.7487217371120455, iteration: 239729
loss: 1.0179122686386108,grad_norm: 0.9999990336201828, iteration: 239730
loss: 0.9894329309463501,grad_norm: 0.9237957658760161, iteration: 239731
loss: 0.9692441821098328,grad_norm: 0.9524321368763955, iteration: 239732
loss: 0.9772818088531494,grad_norm: 0.8675071257985598, iteration: 239733
loss: 1.0276020765304565,grad_norm: 0.8856686792291749, iteration: 239734
loss: 0.9902724027633667,grad_norm: 0.9999991783035145, iteration: 239735
loss: 1.0504144430160522,grad_norm: 0.9999994287790723, iteration: 239736
loss: 1.1732791662216187,grad_norm: 0.9999996539282122, iteration: 239737
loss: 0.9976454973220825,grad_norm: 0.7408395668349688, iteration: 239738
loss: 1.1117784976959229,grad_norm: 0.9999999304385235, iteration: 239739
loss: 0.991193950176239,grad_norm: 0.89046854874946, iteration: 239740
loss: 1.0318154096603394,grad_norm: 0.999999144983607, iteration: 239741
loss: 0.9802187085151672,grad_norm: 0.948679756015806, iteration: 239742
loss: 1.001794457435608,grad_norm: 0.8254669886467526, iteration: 239743
loss: 1.0214465856552124,grad_norm: 0.9999997754546289, iteration: 239744
loss: 1.0305126905441284,grad_norm: 0.999999881350338, iteration: 239745
loss: 1.0169535875320435,grad_norm: 0.9999990959078492, iteration: 239746
loss: 1.0615227222442627,grad_norm: 0.9999990552258785, iteration: 239747
loss: 1.053666114807129,grad_norm: 0.8858497384321383, iteration: 239748
loss: 1.004325032234192,grad_norm: 0.8681378667748212, iteration: 239749
loss: 1.002540946006775,grad_norm: 0.9999990833138885, iteration: 239750
loss: 1.0259318351745605,grad_norm: 0.9999992600777821, iteration: 239751
loss: 1.007755994796753,grad_norm: 0.9999992194838665, iteration: 239752
loss: 1.0232099294662476,grad_norm: 0.9999999459236366, iteration: 239753
loss: 1.021741509437561,grad_norm: 0.8206523961226534, iteration: 239754
loss: 1.0085244178771973,grad_norm: 0.9055401466009841, iteration: 239755
loss: 0.9865639805793762,grad_norm: 0.9999991917723406, iteration: 239756
loss: 1.1022268533706665,grad_norm: 0.9999990446759937, iteration: 239757
loss: 1.125759482383728,grad_norm: 0.9999996463309624, iteration: 239758
loss: 1.2390533685684204,grad_norm: 0.9999999426134684, iteration: 239759
loss: 1.0177123546600342,grad_norm: 0.9999990294185694, iteration: 239760
loss: 1.1102927923202515,grad_norm: 0.9999998754356558, iteration: 239761
loss: 1.0083142518997192,grad_norm: 0.9999993583868412, iteration: 239762
loss: 1.0566846132278442,grad_norm: 0.9999995656596936, iteration: 239763
loss: 1.0316277742385864,grad_norm: 0.8470836487808445, iteration: 239764
loss: 1.0044810771942139,grad_norm: 0.9999991527910663, iteration: 239765
loss: 0.975385844707489,grad_norm: 0.8512167591319142, iteration: 239766
loss: 1.0228047370910645,grad_norm: 0.9087115571948366, iteration: 239767
loss: 1.0739883184432983,grad_norm: 0.9999999228647075, iteration: 239768
loss: 1.0225050449371338,grad_norm: 0.9143694254734583, iteration: 239769
loss: 1.0083932876586914,grad_norm: 0.9999989326436556, iteration: 239770
loss: 1.0319888591766357,grad_norm: 0.9999998869468308, iteration: 239771
loss: 0.9957172870635986,grad_norm: 0.94001647769078, iteration: 239772
loss: 0.9889137744903564,grad_norm: 0.9999991034951715, iteration: 239773
loss: 1.0069314241409302,grad_norm: 0.9999990403376519, iteration: 239774
loss: 0.9949679970741272,grad_norm: 0.7596807798231096, iteration: 239775
loss: 1.008684754371643,grad_norm: 0.8264950196799975, iteration: 239776
loss: 0.9616956114768982,grad_norm: 0.9693576504866823, iteration: 239777
loss: 0.9862669706344604,grad_norm: 0.7913257504882227, iteration: 239778
loss: 1.024428129196167,grad_norm: 0.9999997937916155, iteration: 239779
loss: 1.036323070526123,grad_norm: 0.9999992702214743, iteration: 239780
loss: 0.9963474273681641,grad_norm: 0.91685476463274, iteration: 239781
loss: 0.9930689930915833,grad_norm: 0.848285069698674, iteration: 239782
loss: 1.1480778455734253,grad_norm: 0.9637241339352673, iteration: 239783
loss: 1.0012840032577515,grad_norm: 0.8344100873470048, iteration: 239784
loss: 1.0520044565200806,grad_norm: 0.9999995306693668, iteration: 239785
loss: 0.9725740551948547,grad_norm: 0.982618970881109, iteration: 239786
loss: 1.0698965787887573,grad_norm: 0.9620345338788555, iteration: 239787
loss: 0.9663814902305603,grad_norm: 0.9675863690508498, iteration: 239788
loss: 1.024211049079895,grad_norm: 0.9279578091436107, iteration: 239789
loss: 1.0697665214538574,grad_norm: 0.9999997426506073, iteration: 239790
loss: 1.072229266166687,grad_norm: 0.9999992359126588, iteration: 239791
loss: 1.0492645502090454,grad_norm: 0.9999994120221785, iteration: 239792
loss: 1.0542763471603394,grad_norm: 0.8847554490519777, iteration: 239793
loss: 0.9868975281715393,grad_norm: 0.8468715582189457, iteration: 239794
loss: 1.0550369024276733,grad_norm: 0.999999575517834, iteration: 239795
loss: 0.9892809987068176,grad_norm: 0.8307493563259886, iteration: 239796
loss: 1.0019131898880005,grad_norm: 0.9999992058331222, iteration: 239797
loss: 1.023507833480835,grad_norm: 0.9999998163946551, iteration: 239798
loss: 1.0053868293762207,grad_norm: 0.9277298300756682, iteration: 239799
loss: 1.0000207424163818,grad_norm: 0.9999992110015292, iteration: 239800
loss: 1.0353349447250366,grad_norm: 0.8900374876632079, iteration: 239801
loss: 1.0238115787506104,grad_norm: 0.9999991475621235, iteration: 239802
loss: 0.9746237397193909,grad_norm: 0.9554301648068367, iteration: 239803
loss: 0.9501091241836548,grad_norm: 0.8943547626925332, iteration: 239804
loss: 1.0042778253555298,grad_norm: 0.9999998916259814, iteration: 239805
loss: 1.002830982208252,grad_norm: 0.7840292382171762, iteration: 239806
loss: 1.0307815074920654,grad_norm: 0.9283907874810875, iteration: 239807
loss: 1.0401371717453003,grad_norm: 0.9999996996896983, iteration: 239808
loss: 0.9964069128036499,grad_norm: 0.9999992769647406, iteration: 239809
loss: 1.0349478721618652,grad_norm: 0.9999991711802116, iteration: 239810
loss: 0.9938889145851135,grad_norm: 0.9965495206300409, iteration: 239811
loss: 1.0051076412200928,grad_norm: 0.7648735782014741, iteration: 239812
loss: 1.1061989068984985,grad_norm: 0.9999999728273631, iteration: 239813
loss: 0.9766034483909607,grad_norm: 0.9400973895424038, iteration: 239814
loss: 1.011765718460083,grad_norm: 0.9530255104390959, iteration: 239815
loss: 0.9590927362442017,grad_norm: 0.9994344507001259, iteration: 239816
loss: 0.9600880146026611,grad_norm: 0.9391914018749563, iteration: 239817
loss: 0.9644375443458557,grad_norm: 0.9999992950202792, iteration: 239818
loss: 1.025470495223999,grad_norm: 0.9056480298447392, iteration: 239819
loss: 1.0036910772323608,grad_norm: 0.8840392923314287, iteration: 239820
loss: 0.9782143831253052,grad_norm: 0.9999995461032022, iteration: 239821
loss: 1.0709550380706787,grad_norm: 0.9367770506498767, iteration: 239822
loss: 0.9691905975341797,grad_norm: 0.9913633610500806, iteration: 239823
loss: 0.9946877956390381,grad_norm: 0.9999990430569392, iteration: 239824
loss: 0.9798863530158997,grad_norm: 0.999999200542941, iteration: 239825
loss: 0.9987477660179138,grad_norm: 0.7401905919486412, iteration: 239826
loss: 0.988372802734375,grad_norm: 0.999999195180043, iteration: 239827
loss: 1.0280934572219849,grad_norm: 0.8309120377918875, iteration: 239828
loss: 1.0168957710266113,grad_norm: 0.9999992319745037, iteration: 239829
loss: 1.0445317029953003,grad_norm: 0.8882193577231233, iteration: 239830
loss: 0.9911283254623413,grad_norm: 0.7895500595306121, iteration: 239831
loss: 1.0184056758880615,grad_norm: 0.9999993680258173, iteration: 239832
loss: 0.9874228239059448,grad_norm: 0.8963618038431079, iteration: 239833
loss: 1.0121527910232544,grad_norm: 0.8955131931972534, iteration: 239834
loss: 1.0204559564590454,grad_norm: 0.9999995875817824, iteration: 239835
loss: 0.9721412658691406,grad_norm: 0.8515206745453557, iteration: 239836
loss: 1.0237571001052856,grad_norm: 0.8599644437954707, iteration: 239837
loss: 0.9695785641670227,grad_norm: 0.9999990858135845, iteration: 239838
loss: 1.0118448734283447,grad_norm: 0.7866077393455688, iteration: 239839
loss: 0.9964452385902405,grad_norm: 0.999999074631517, iteration: 239840
loss: 1.0327599048614502,grad_norm: 0.9999992541249589, iteration: 239841
loss: 0.9689939022064209,grad_norm: 0.9146173132203841, iteration: 239842
loss: 0.997866153717041,grad_norm: 0.8367170252723821, iteration: 239843
loss: 1.0244824886322021,grad_norm: 0.9199233518405548, iteration: 239844
loss: 0.984043300151825,grad_norm: 0.8859311828295247, iteration: 239845
loss: 0.9796273112297058,grad_norm: 0.9184262208742155, iteration: 239846
loss: 1.0082067251205444,grad_norm: 0.8764555936443884, iteration: 239847
loss: 1.0676463842391968,grad_norm: 0.999999096278819, iteration: 239848
loss: 1.0394104719161987,grad_norm: 0.9202511112476951, iteration: 239849
loss: 1.0184946060180664,grad_norm: 0.9999991524969882, iteration: 239850
loss: 1.0021494626998901,grad_norm: 0.864876114160782, iteration: 239851
loss: 1.014306902885437,grad_norm: 0.8759536142314399, iteration: 239852
loss: 1.0386258363723755,grad_norm: 0.9999990582808046, iteration: 239853
loss: 0.9867655038833618,grad_norm: 0.8842848321408671, iteration: 239854
loss: 0.996727705001831,grad_norm: 0.9999990616593725, iteration: 239855
loss: 0.9883453845977783,grad_norm: 0.8796332565351622, iteration: 239856
loss: 0.9884388446807861,grad_norm: 0.9933204303685678, iteration: 239857
loss: 1.0086777210235596,grad_norm: 0.9339739675890586, iteration: 239858
loss: 1.108242392539978,grad_norm: 0.9999999223186539, iteration: 239859
loss: 0.9808903932571411,grad_norm: 0.8185196843757863, iteration: 239860
loss: 1.040392279624939,grad_norm: 0.9001115857573613, iteration: 239861
loss: 0.9953519701957703,grad_norm: 0.8111152300487944, iteration: 239862
loss: 0.9713428020477295,grad_norm: 0.9079494080052373, iteration: 239863
loss: 0.9991734027862549,grad_norm: 0.9999990307056802, iteration: 239864
loss: 0.9920227527618408,grad_norm: 0.9999989410913277, iteration: 239865
loss: 1.0441287755966187,grad_norm: 0.999999214377479, iteration: 239866
loss: 1.041700005531311,grad_norm: 0.9999991984579605, iteration: 239867
loss: 1.0440937280654907,grad_norm: 0.9508116920351176, iteration: 239868
loss: 0.9961858987808228,grad_norm: 0.9999996860993686, iteration: 239869
loss: 0.9905091524124146,grad_norm: 0.9999991241761901, iteration: 239870
loss: 0.9967119693756104,grad_norm: 0.8698282181668555, iteration: 239871
loss: 0.9787395000457764,grad_norm: 0.9465282158972066, iteration: 239872
loss: 1.052006721496582,grad_norm: 0.7798202530657882, iteration: 239873
loss: 1.0037904977798462,grad_norm: 0.981247991710998, iteration: 239874
loss: 1.0081787109375,grad_norm: 0.8192072490249743, iteration: 239875
loss: 1.1149669885635376,grad_norm: 0.9999992418409485, iteration: 239876
loss: 1.0354543924331665,grad_norm: 0.9999991091634229, iteration: 239877
loss: 1.0069807767868042,grad_norm: 0.8178139470987367, iteration: 239878
loss: 1.039081335067749,grad_norm: 0.9999991243490668, iteration: 239879
loss: 1.0555154085159302,grad_norm: 0.9079433745473261, iteration: 239880
loss: 0.9944171905517578,grad_norm: 0.8897542582057684, iteration: 239881
loss: 0.9955716133117676,grad_norm: 0.8913085193332025, iteration: 239882
loss: 0.9903262853622437,grad_norm: 0.8527152770408465, iteration: 239883
loss: 1.0334155559539795,grad_norm: 0.999999088474661, iteration: 239884
loss: 0.9949154853820801,grad_norm: 0.9999996481416242, iteration: 239885
loss: 0.9612352252006531,grad_norm: 0.8546528369300761, iteration: 239886
loss: 1.0212526321411133,grad_norm: 0.8354098319364751, iteration: 239887
loss: 0.9858568906784058,grad_norm: 0.8924071217904561, iteration: 239888
loss: 1.0309275388717651,grad_norm: 0.9999998587857155, iteration: 239889
loss: 1.030155062675476,grad_norm: 0.8930896100745582, iteration: 239890
loss: 0.9781641960144043,grad_norm: 0.9999991624982525, iteration: 239891
loss: 0.9828566908836365,grad_norm: 0.9999990968746495, iteration: 239892
loss: 1.008457064628601,grad_norm: 0.999999171479559, iteration: 239893
loss: 1.027700424194336,grad_norm: 0.9999996830866539, iteration: 239894
loss: 0.9982668161392212,grad_norm: 0.8178176038082189, iteration: 239895
loss: 1.0214104652404785,grad_norm: 0.981901033327819, iteration: 239896
loss: 1.0128401517868042,grad_norm: 0.9999990717261575, iteration: 239897
loss: 0.9987263083457947,grad_norm: 0.9999994323373326, iteration: 239898
loss: 1.0423122644424438,grad_norm: 0.8372546487457375, iteration: 239899
loss: 1.0360146760940552,grad_norm: 0.9999992094916275, iteration: 239900
loss: 1.0664680004119873,grad_norm: 0.9784632351235262, iteration: 239901
loss: 0.9918243885040283,grad_norm: 0.9999991374457897, iteration: 239902
loss: 1.0336952209472656,grad_norm: 0.9381505883639614, iteration: 239903
loss: 0.9943078756332397,grad_norm: 0.8811376570854867, iteration: 239904
loss: 1.0762815475463867,grad_norm: 0.9999992315574312, iteration: 239905
loss: 1.0011945962905884,grad_norm: 0.812903138982712, iteration: 239906
loss: 0.9835711717605591,grad_norm: 0.884940747654365, iteration: 239907
loss: 1.0271598100662231,grad_norm: 0.8038103457020784, iteration: 239908
loss: 0.9918696284294128,grad_norm: 0.8701982833542182, iteration: 239909
loss: 1.0278997421264648,grad_norm: 0.9999990301253394, iteration: 239910
loss: 0.9996749758720398,grad_norm: 0.9186817730819319, iteration: 239911
loss: 1.0246360301971436,grad_norm: 0.999999013208786, iteration: 239912
loss: 0.9511275887489319,grad_norm: 0.9999990428652724, iteration: 239913
loss: 0.9776961207389832,grad_norm: 0.7833472389858867, iteration: 239914
loss: 1.005282998085022,grad_norm: 0.9999990961131535, iteration: 239915
loss: 0.9917137622833252,grad_norm: 0.9386059789172236, iteration: 239916
loss: 1.037617802619934,grad_norm: 0.9999991626639048, iteration: 239917
loss: 0.9863603115081787,grad_norm: 0.9417255498396409, iteration: 239918
loss: 1.0338248014450073,grad_norm: 0.8954325487133524, iteration: 239919
loss: 0.9803234338760376,grad_norm: 0.9999990358828579, iteration: 239920
loss: 1.0087230205535889,grad_norm: 0.9999989652210449, iteration: 239921
loss: 1.060114860534668,grad_norm: 0.9999991560303154, iteration: 239922
loss: 0.9895596504211426,grad_norm: 0.9740344813024189, iteration: 239923
loss: 1.000491976737976,grad_norm: 0.870981756423988, iteration: 239924
loss: 0.9799556136131287,grad_norm: 0.9067519240657931, iteration: 239925
loss: 1.0289100408554077,grad_norm: 0.9690407883886667, iteration: 239926
loss: 0.9953137636184692,grad_norm: 0.9127622645120965, iteration: 239927
loss: 0.9756671190261841,grad_norm: 0.9999992510916375, iteration: 239928
loss: 0.9823352694511414,grad_norm: 0.9935879201377142, iteration: 239929
loss: 1.0333038568496704,grad_norm: 0.8605912165229661, iteration: 239930
loss: 0.9810419082641602,grad_norm: 0.9999993752499944, iteration: 239931
loss: 1.030383586883545,grad_norm: 0.8426003006945944, iteration: 239932
loss: 0.9998676776885986,grad_norm: 0.7570246896434498, iteration: 239933
loss: 0.984546422958374,grad_norm: 0.9999995260207093, iteration: 239934
loss: 0.9908385872840881,grad_norm: 0.9896590422878587, iteration: 239935
loss: 0.9997738003730774,grad_norm: 0.9999991278741084, iteration: 239936
loss: 1.0119646787643433,grad_norm: 0.9530515320721069, iteration: 239937
loss: 1.0149739980697632,grad_norm: 0.9174018361141069, iteration: 239938
loss: 0.9982292652130127,grad_norm: 0.7468874886591294, iteration: 239939
loss: 1.012845754623413,grad_norm: 0.9999990690952364, iteration: 239940
loss: 1.0633875131607056,grad_norm: 0.9999992344098777, iteration: 239941
loss: 1.0276652574539185,grad_norm: 0.7927520784757219, iteration: 239942
loss: 1.0355427265167236,grad_norm: 0.9999997282245706, iteration: 239943
loss: 1.0133919715881348,grad_norm: 0.8718423246239164, iteration: 239944
loss: 0.9887092113494873,grad_norm: 0.9999991052354051, iteration: 239945
loss: 1.0168343782424927,grad_norm: 0.8139925896400423, iteration: 239946
loss: 0.9667743444442749,grad_norm: 0.971491165898707, iteration: 239947
loss: 0.9932504296302795,grad_norm: 0.9988339337831793, iteration: 239948
loss: 1.0247186422348022,grad_norm: 0.856852108984115, iteration: 239949
loss: 1.0209611654281616,grad_norm: 0.9999991334931919, iteration: 239950
loss: 1.0081894397735596,grad_norm: 0.9125853487118791, iteration: 239951
loss: 0.9944837689399719,grad_norm: 0.8366359441109934, iteration: 239952
loss: 0.9963306784629822,grad_norm: 0.9255594786959986, iteration: 239953
loss: 1.0213619470596313,grad_norm: 0.948056342969174, iteration: 239954
loss: 0.9897390604019165,grad_norm: 0.7870344008754926, iteration: 239955
loss: 1.0071940422058105,grad_norm: 0.9320331086104102, iteration: 239956
loss: 0.9774535298347473,grad_norm: 0.9539778486715887, iteration: 239957
loss: 1.042818546295166,grad_norm: 0.9999995014437408, iteration: 239958
loss: 1.0046939849853516,grad_norm: 0.9999991032691887, iteration: 239959
loss: 1.0000967979431152,grad_norm: 0.9999991255948287, iteration: 239960
loss: 1.0066499710083008,grad_norm: 0.8193534199876625, iteration: 239961
loss: 0.9993439316749573,grad_norm: 0.9999991043477282, iteration: 239962
loss: 1.025839924812317,grad_norm: 0.9216648176989494, iteration: 239963
loss: 1.0239955186843872,grad_norm: 0.8728735337109341, iteration: 239964
loss: 0.9998602271080017,grad_norm: 0.9062817281575125, iteration: 239965
loss: 1.0248339176177979,grad_norm: 0.8685141243470246, iteration: 239966
loss: 0.9989045262336731,grad_norm: 0.8486957061979155, iteration: 239967
loss: 1.0125775337219238,grad_norm: 0.789437201860933, iteration: 239968
loss: 1.0979806184768677,grad_norm: 0.9999991705658733, iteration: 239969
loss: 1.1076977252960205,grad_norm: 0.9999995472944969, iteration: 239970
loss: 1.0276947021484375,grad_norm: 0.9648761620212278, iteration: 239971
loss: 0.9665451049804688,grad_norm: 0.8455998540175649, iteration: 239972
loss: 1.000578761100769,grad_norm: 0.9999990482344796, iteration: 239973
loss: 1.0128618478775024,grad_norm: 0.9760740070297523, iteration: 239974
loss: 0.9870591163635254,grad_norm: 0.9999991066699132, iteration: 239975
loss: 0.995635986328125,grad_norm: 0.9883797318016551, iteration: 239976
loss: 1.0091553926467896,grad_norm: 0.8532019761446292, iteration: 239977
loss: 1.0050032138824463,grad_norm: 0.9999994516421521, iteration: 239978
loss: 1.0323442220687866,grad_norm: 0.9999993509108726, iteration: 239979
loss: 1.0073564052581787,grad_norm: 0.8337430392844315, iteration: 239980
loss: 1.0074981451034546,grad_norm: 0.9953721427591374, iteration: 239981
loss: 0.9833117723464966,grad_norm: 0.910241613729261, iteration: 239982
loss: 0.957780659198761,grad_norm: 0.7538030000677557, iteration: 239983
loss: 0.9742552638053894,grad_norm: 0.9999994713209734, iteration: 239984
loss: 1.0017651319503784,grad_norm: 0.9552444826460464, iteration: 239985
loss: 0.9909371137619019,grad_norm: 0.8622897932551663, iteration: 239986
loss: 1.0020309686660767,grad_norm: 0.8604593756761121, iteration: 239987
loss: 0.9778887629508972,grad_norm: 0.9558725315967049, iteration: 239988
loss: 0.9406076669692993,grad_norm: 0.8477302870377004, iteration: 239989
loss: 0.9570180177688599,grad_norm: 0.9811836269601477, iteration: 239990
loss: 1.0203126668930054,grad_norm: 0.8939222526513236, iteration: 239991
loss: 1.074870228767395,grad_norm: 0.992341991172157, iteration: 239992
loss: 1.0257219076156616,grad_norm: 0.8616204265719709, iteration: 239993
loss: 0.9783986806869507,grad_norm: 0.9132688270074328, iteration: 239994
loss: 1.0036866664886475,grad_norm: 0.9013176198410701, iteration: 239995
loss: 1.0007280111312866,grad_norm: 0.8731127604563346, iteration: 239996
loss: 1.006290078163147,grad_norm: 0.9761749640547429, iteration: 239997
loss: 0.993895947933197,grad_norm: 0.9999990232841977, iteration: 239998
loss: 0.9976930022239685,grad_norm: 0.7878133970168181, iteration: 239999
loss: 1.0632829666137695,grad_norm: 0.9999998256249977, iteration: 240000
Evaluating at step 240000
{'val': 0.9946742374449968, 'test': 2.4593654539638434}
loss: 1.015120029449463,grad_norm: 0.8486059926977305, iteration: 240001
loss: 0.9906234741210938,grad_norm: 0.8436119476258636, iteration: 240002
loss: 1.0020145177841187,grad_norm: 0.8969981377490163, iteration: 240003
loss: 0.9978432059288025,grad_norm: 0.8768014014189464, iteration: 240004
loss: 1.0223342180252075,grad_norm: 0.999999176701828, iteration: 240005
loss: 0.9789220690727234,grad_norm: 0.9070876477342675, iteration: 240006
loss: 1.0450910329818726,grad_norm: 0.9999994529828227, iteration: 240007
loss: 1.008224606513977,grad_norm: 0.9999992592384186, iteration: 240008
loss: 1.086904764175415,grad_norm: 0.9776120124229464, iteration: 240009
loss: 1.0247341394424438,grad_norm: 0.8489947231755204, iteration: 240010
loss: 1.004309892654419,grad_norm: 0.8652763642238216, iteration: 240011
loss: 0.9737932085990906,grad_norm: 0.9999990042257036, iteration: 240012
loss: 0.997221052646637,grad_norm: 0.943904267707252, iteration: 240013
loss: 0.9843319654464722,grad_norm: 0.9740111236614852, iteration: 240014
loss: 0.9697695374488831,grad_norm: 0.9999989219780762, iteration: 240015
loss: 0.9569306969642639,grad_norm: 0.9372263923597878, iteration: 240016
loss: 0.9835789203643799,grad_norm: 0.933581298192665, iteration: 240017
loss: 0.9936268925666809,grad_norm: 0.7987660633378162, iteration: 240018
loss: 0.9600415229797363,grad_norm: 0.9999991357706834, iteration: 240019
loss: 1.0057796239852905,grad_norm: 0.7732999408791997, iteration: 240020
loss: 1.0427902936935425,grad_norm: 0.9999992884855615, iteration: 240021
loss: 1.015529751777649,grad_norm: 0.8569402200180161, iteration: 240022
loss: 1.1104793548583984,grad_norm: 0.9999993762366466, iteration: 240023
loss: 1.0170745849609375,grad_norm: 0.7858230197895089, iteration: 240024
loss: 1.0171337127685547,grad_norm: 0.8940282076700657, iteration: 240025
loss: 1.0279613733291626,grad_norm: 0.9999990888985452, iteration: 240026
loss: 0.9944193363189697,grad_norm: 0.871371898396528, iteration: 240027
loss: 1.079781174659729,grad_norm: 0.9999992728195095, iteration: 240028
loss: 1.054255485534668,grad_norm: 0.9999998992890312, iteration: 240029
loss: 0.990809977054596,grad_norm: 0.9184857115051958, iteration: 240030
loss: 0.9726321697235107,grad_norm: 0.8320137198830304, iteration: 240031
loss: 1.0868737697601318,grad_norm: 0.9999998869832523, iteration: 240032
loss: 0.9819529056549072,grad_norm: 0.9999999102928585, iteration: 240033
loss: 0.9687340259552002,grad_norm: 0.8800001318918694, iteration: 240034
loss: 0.9997632503509521,grad_norm: 0.8271021068308688, iteration: 240035
loss: 1.0089987516403198,grad_norm: 0.9999996823148937, iteration: 240036
loss: 1.0638620853424072,grad_norm: 0.9999997748468413, iteration: 240037
loss: 0.9848388433456421,grad_norm: 1.0000000079595002, iteration: 240038
loss: 0.980987012386322,grad_norm: 0.9999991630607752, iteration: 240039
loss: 1.0785473585128784,grad_norm: 0.9999992515736299, iteration: 240040
loss: 0.9355827569961548,grad_norm: 0.8630944358828679, iteration: 240041
loss: 1.0096794366836548,grad_norm: 0.9964557912671764, iteration: 240042
loss: 0.9844277501106262,grad_norm: 0.8233151698807835, iteration: 240043
loss: 1.0835825204849243,grad_norm: 0.8835203772861852, iteration: 240044
loss: 1.078860878944397,grad_norm: 0.9999993412499759, iteration: 240045
loss: 1.0404810905456543,grad_norm: 0.9999997738193348, iteration: 240046
loss: 1.0733636617660522,grad_norm: 0.9578665260506827, iteration: 240047
loss: 0.999861478805542,grad_norm: 0.9996309813364355, iteration: 240048
loss: 1.1165140867233276,grad_norm: 0.9999995277634495, iteration: 240049
loss: 0.9874111413955688,grad_norm: 0.9999990020051531, iteration: 240050
loss: 1.0328384637832642,grad_norm: 0.9999991061382904, iteration: 240051
loss: 1.1524053812026978,grad_norm: 0.9083408421600058, iteration: 240052
loss: 0.9836826324462891,grad_norm: 0.9999989970649082, iteration: 240053
loss: 1.134397268295288,grad_norm: 0.9999994758491592, iteration: 240054
loss: 1.0699775218963623,grad_norm: 0.9999993060273125, iteration: 240055
loss: 1.2112864255905151,grad_norm: 0.9999997981923706, iteration: 240056
loss: 1.112335205078125,grad_norm: 0.9999995158964172, iteration: 240057
loss: 1.0619179010391235,grad_norm: 0.8715932701083151, iteration: 240058
loss: 1.002645492553711,grad_norm: 0.9999992101581208, iteration: 240059
loss: 1.0459082126617432,grad_norm: 0.9733418168643384, iteration: 240060
loss: 1.0075509548187256,grad_norm: 0.8160138552443954, iteration: 240061
loss: 1.0856472253799438,grad_norm: 0.906543786810372, iteration: 240062
loss: 1.000724196434021,grad_norm: 0.9470886200254738, iteration: 240063
loss: 1.0122859477996826,grad_norm: 0.9999990942319387, iteration: 240064
loss: 0.987569272518158,grad_norm: 0.8821200069095989, iteration: 240065
loss: 1.0071656703948975,grad_norm: 0.9999998727553358, iteration: 240066
loss: 1.0078585147857666,grad_norm: 0.9999991282756316, iteration: 240067
loss: 1.0184742212295532,grad_norm: 0.9999992899884358, iteration: 240068
loss: 0.9846717715263367,grad_norm: 0.9602324475305131, iteration: 240069
loss: 1.0789493322372437,grad_norm: 0.9999990593898851, iteration: 240070
loss: 1.079543113708496,grad_norm: 0.9999992223232548, iteration: 240071
loss: 1.0947273969650269,grad_norm: 0.9716512135324282, iteration: 240072
loss: 1.0449362993240356,grad_norm: 0.9007526635100678, iteration: 240073
loss: 1.0006139278411865,grad_norm: 0.9057902393468414, iteration: 240074
loss: 1.0084948539733887,grad_norm: 0.872235634799165, iteration: 240075
loss: 1.1940075159072876,grad_norm: 0.9063784895848646, iteration: 240076
loss: 1.2404005527496338,grad_norm: 0.9999993574176679, iteration: 240077
loss: 1.0049233436584473,grad_norm: 0.9999990463703546, iteration: 240078
loss: 1.010567545890808,grad_norm: 0.9999991623164127, iteration: 240079
loss: 1.034454345703125,grad_norm: 0.9999991347408671, iteration: 240080
loss: 0.9981087446212769,grad_norm: 0.8968054684180969, iteration: 240081
loss: 0.9724403023719788,grad_norm: 0.8948093627858363, iteration: 240082
loss: 1.0321314334869385,grad_norm: 0.9181467819928031, iteration: 240083
loss: 0.9677698016166687,grad_norm: 0.9999990945541133, iteration: 240084
loss: 1.036063313484192,grad_norm: 0.9855685638543933, iteration: 240085
loss: 1.2179139852523804,grad_norm: 0.9999998792268695, iteration: 240086
loss: 1.0139505863189697,grad_norm: 0.8954242432828584, iteration: 240087
loss: 1.0416840314865112,grad_norm: 0.9999997344584084, iteration: 240088
loss: 0.9959823489189148,grad_norm: 0.9999993653758208, iteration: 240089
loss: 1.0161293745040894,grad_norm: 0.9999991461056884, iteration: 240090
loss: 1.057494044303894,grad_norm: 0.9999992407983017, iteration: 240091
loss: 1.051601529121399,grad_norm: 0.999999457022352, iteration: 240092
loss: 0.9834204316139221,grad_norm: 0.8913365940178544, iteration: 240093
loss: 1.0437270402908325,grad_norm: 0.9999992114034457, iteration: 240094
loss: 1.013997197151184,grad_norm: 0.9999403039656448, iteration: 240095
loss: 1.0725632905960083,grad_norm: 0.9999998045358071, iteration: 240096
loss: 1.0187923908233643,grad_norm: 0.9999994951242009, iteration: 240097
loss: 1.0113563537597656,grad_norm: 0.9614323305592988, iteration: 240098
loss: 1.0252937078475952,grad_norm: 0.9999989856331901, iteration: 240099
loss: 0.9968999028205872,grad_norm: 0.8925721087861483, iteration: 240100
loss: 1.068117380142212,grad_norm: 0.9999999486955433, iteration: 240101
loss: 1.1296621561050415,grad_norm: 0.9999993868888726, iteration: 240102
loss: 0.9609507322311401,grad_norm: 0.9999993333671677, iteration: 240103
loss: 1.1191799640655518,grad_norm: 0.9999992275917733, iteration: 240104
loss: 1.0369361639022827,grad_norm: 0.9999998535126884, iteration: 240105
loss: 1.1128904819488525,grad_norm: 0.9999993591015197, iteration: 240106
loss: 1.0535064935684204,grad_norm: 0.9999991743772608, iteration: 240107
loss: 1.0124623775482178,grad_norm: 0.9947954187115106, iteration: 240108
loss: 1.0085320472717285,grad_norm: 0.9999997886183464, iteration: 240109
loss: 0.9850234985351562,grad_norm: 0.8128167684126183, iteration: 240110
loss: 0.9965481162071228,grad_norm: 0.7618679613906894, iteration: 240111
loss: 1.0180081129074097,grad_norm: 0.864745137770895, iteration: 240112
loss: 0.9974485635757446,grad_norm: 0.9086943955493355, iteration: 240113
loss: 0.998785674571991,grad_norm: 0.8425127881313795, iteration: 240114
loss: 0.9657515287399292,grad_norm: 0.8682306439610072, iteration: 240115
loss: 0.9788710474967957,grad_norm: 0.9334114314032227, iteration: 240116
loss: 1.003186583518982,grad_norm: 0.9999991593967551, iteration: 240117
loss: 0.9820423126220703,grad_norm: 0.7921905169052199, iteration: 240118
loss: 1.0727325677871704,grad_norm: 0.9999993074486464, iteration: 240119
loss: 1.132643461227417,grad_norm: 0.9999994615892862, iteration: 240120
loss: 0.9910518527030945,grad_norm: 0.9999990073267626, iteration: 240121
loss: 0.9828389883041382,grad_norm: 0.9188906518390522, iteration: 240122
loss: 0.9694928526878357,grad_norm: 0.7840810040575984, iteration: 240123
loss: 0.9662743210792542,grad_norm: 0.7435485278558294, iteration: 240124
loss: 0.9880715012550354,grad_norm: 0.8522509456118172, iteration: 240125
loss: 0.9693311452865601,grad_norm: 0.8998924141751804, iteration: 240126
loss: 1.0311654806137085,grad_norm: 0.9949690929615885, iteration: 240127
loss: 0.9230007529258728,grad_norm: 0.845770000011546, iteration: 240128
loss: 1.0001846551895142,grad_norm: 0.9132231669885423, iteration: 240129
loss: 1.058042287826538,grad_norm: 0.9999990277329283, iteration: 240130
loss: 1.022411823272705,grad_norm: 0.9999996212464463, iteration: 240131
loss: 1.1177617311477661,grad_norm: 0.9041458895581219, iteration: 240132
loss: 1.0254040956497192,grad_norm: 0.9117685925475159, iteration: 240133
loss: 0.9979262948036194,grad_norm: 0.9060111770458987, iteration: 240134
loss: 1.0445173978805542,grad_norm: 0.9999991207292505, iteration: 240135
loss: 0.9958189129829407,grad_norm: 0.8842609710958499, iteration: 240136
loss: 1.0398070812225342,grad_norm: 0.9999997924403418, iteration: 240137
loss: 1.0133001804351807,grad_norm: 0.8806039714992089, iteration: 240138
loss: 0.9865597486495972,grad_norm: 0.9945015157918686, iteration: 240139
loss: 0.9995149374008179,grad_norm: 0.9999990253656813, iteration: 240140
loss: 0.9840133190155029,grad_norm: 0.8387908030284336, iteration: 240141
loss: 0.9782155156135559,grad_norm: 0.8309320006057807, iteration: 240142
loss: 1.0023140907287598,grad_norm: 0.8514715439975608, iteration: 240143
loss: 1.0253725051879883,grad_norm: 0.8804301941672374, iteration: 240144
loss: 0.9809086322784424,grad_norm: 0.8453198193536033, iteration: 240145
loss: 1.135001540184021,grad_norm: 0.9999995635124836, iteration: 240146
loss: 0.9805079102516174,grad_norm: 0.8556914950635589, iteration: 240147
loss: 0.9815009236335754,grad_norm: 0.9613388380185468, iteration: 240148
loss: 0.9849615693092346,grad_norm: 0.9443935487332408, iteration: 240149
loss: 1.0286661386489868,grad_norm: 0.9116430603645995, iteration: 240150
loss: 1.0562955141067505,grad_norm: 0.9355767572320604, iteration: 240151
loss: 1.0512677431106567,grad_norm: 0.7651099933382522, iteration: 240152
loss: 0.9640275239944458,grad_norm: 0.9556962110382896, iteration: 240153
loss: 0.9507460594177246,grad_norm: 0.999999048445524, iteration: 240154
loss: 1.0869221687316895,grad_norm: 0.9999995855767503, iteration: 240155
loss: 0.996518611907959,grad_norm: 0.9525727748995803, iteration: 240156
loss: 0.9736302495002747,grad_norm: 0.9545948593566307, iteration: 240157
loss: 1.0707051753997803,grad_norm: 0.9999992951917781, iteration: 240158
loss: 1.066727638244629,grad_norm: 0.8518771031748805, iteration: 240159
loss: 0.9638221859931946,grad_norm: 0.9999989819674192, iteration: 240160
loss: 1.0334937572479248,grad_norm: 0.8243082365108263, iteration: 240161
loss: 1.0076310634613037,grad_norm: 0.9041542049615312, iteration: 240162
loss: 1.0112017393112183,grad_norm: 0.9194171732330693, iteration: 240163
loss: 1.000974416732788,grad_norm: 0.9452897470872608, iteration: 240164
loss: 0.9794384837150574,grad_norm: 0.9847853225206228, iteration: 240165
loss: 1.0083907842636108,grad_norm: 0.9999990685148298, iteration: 240166
loss: 1.003817081451416,grad_norm: 0.9660692828724201, iteration: 240167
loss: 1.0794321298599243,grad_norm: 0.8556533147869282, iteration: 240168
loss: 1.0199040174484253,grad_norm: 0.7946797188137502, iteration: 240169
loss: 1.0234953165054321,grad_norm: 0.9999991505122311, iteration: 240170
loss: 1.0043487548828125,grad_norm: 0.9999989858432385, iteration: 240171
loss: 0.9635804295539856,grad_norm: 0.8416540119600148, iteration: 240172
loss: 0.9915299415588379,grad_norm: 0.8522677294274061, iteration: 240173
loss: 0.9911572337150574,grad_norm: 0.8133991007470973, iteration: 240174
loss: 1.0700178146362305,grad_norm: 0.9999991156876905, iteration: 240175
loss: 1.0916551351547241,grad_norm: 0.9999990650200355, iteration: 240176
loss: 1.0131393671035767,grad_norm: 0.9477436947085434, iteration: 240177
loss: 0.9638686776161194,grad_norm: 0.9999991912175341, iteration: 240178
loss: 1.0020700693130493,grad_norm: 0.956723141487427, iteration: 240179
loss: 1.0297842025756836,grad_norm: 0.9319333370054794, iteration: 240180
loss: 0.958576500415802,grad_norm: 0.9276478781979226, iteration: 240181
loss: 0.9797874093055725,grad_norm: 0.9200319378744214, iteration: 240182
loss: 0.9886729121208191,grad_norm: 0.7916355954362121, iteration: 240183
loss: 1.125573992729187,grad_norm: 0.9999995113832101, iteration: 240184
loss: 0.9994774460792542,grad_norm: 0.9999997446330657, iteration: 240185
loss: 1.1405556201934814,grad_norm: 0.9999998020436888, iteration: 240186
loss: 1.0342921018600464,grad_norm: 0.7802804785683409, iteration: 240187
loss: 0.996242880821228,grad_norm: 0.9999991584955277, iteration: 240188
loss: 0.9875754117965698,grad_norm: 0.9999991272244265, iteration: 240189
loss: 1.0362086296081543,grad_norm: 0.9999992110919192, iteration: 240190
loss: 1.18904447555542,grad_norm: 0.999999627445855, iteration: 240191
loss: 1.0325640439987183,grad_norm: 0.8205901181606037, iteration: 240192
loss: 0.9664860963821411,grad_norm: 0.8997204993229005, iteration: 240193
loss: 1.0347893238067627,grad_norm: 0.910188358396263, iteration: 240194
loss: 0.9967276453971863,grad_norm: 0.8693591107049167, iteration: 240195
loss: 1.0935014486312866,grad_norm: 0.9999999589801379, iteration: 240196
loss: 1.0540493726730347,grad_norm: 0.9999991484657713, iteration: 240197
loss: 0.9812337160110474,grad_norm: 0.9946273451167189, iteration: 240198
loss: 1.0149322748184204,grad_norm: 0.9999992351635036, iteration: 240199
loss: 1.1389844417572021,grad_norm: 0.9999992345185285, iteration: 240200
loss: 1.0850064754486084,grad_norm: 0.9999991605605846, iteration: 240201
loss: 1.0000393390655518,grad_norm: 0.9999999819513661, iteration: 240202
loss: 1.0576707124710083,grad_norm: 0.999999102174544, iteration: 240203
loss: 1.1145808696746826,grad_norm: 0.999999689626038, iteration: 240204
loss: 1.0447076559066772,grad_norm: 0.9394630474008406, iteration: 240205
loss: 1.0612432956695557,grad_norm: 0.9999993538721004, iteration: 240206
loss: 0.9927585124969482,grad_norm: 0.9999991505603456, iteration: 240207
loss: 1.0215427875518799,grad_norm: 0.7490945139274633, iteration: 240208
loss: 1.1366372108459473,grad_norm: 0.9999996392512646, iteration: 240209
loss: 1.0103065967559814,grad_norm: 0.999999397887393, iteration: 240210
loss: 1.067484736442566,grad_norm: 0.9999996257302907, iteration: 240211
loss: 1.0110152959823608,grad_norm: 0.9231806587762561, iteration: 240212
loss: 1.0032039880752563,grad_norm: 0.8315035586031944, iteration: 240213
loss: 1.0029518604278564,grad_norm: 0.9999992756760043, iteration: 240214
loss: 0.9708062410354614,grad_norm: 0.947725632978057, iteration: 240215
loss: 0.9777436852455139,grad_norm: 0.8643295323046472, iteration: 240216
loss: 1.066714882850647,grad_norm: 0.9999993580199318, iteration: 240217
loss: 1.058700442314148,grad_norm: 0.9999995663671168, iteration: 240218
loss: 0.9369490146636963,grad_norm: 0.9999989878830869, iteration: 240219
loss: 1.024609923362732,grad_norm: 0.9157473640866802, iteration: 240220
loss: 1.0145128965377808,grad_norm: 0.8825522256920626, iteration: 240221
loss: 1.039620280265808,grad_norm: 0.9999991469998935, iteration: 240222
loss: 1.1013391017913818,grad_norm: 0.999999596774408, iteration: 240223
loss: 1.0084948539733887,grad_norm: 0.9999997725391635, iteration: 240224
loss: 1.0508307218551636,grad_norm: 0.999999287292154, iteration: 240225
loss: 0.9993112683296204,grad_norm: 0.9603313519624421, iteration: 240226
loss: 1.0202383995056152,grad_norm: 0.9999995040816699, iteration: 240227
loss: 1.0489106178283691,grad_norm: 0.999999357676374, iteration: 240228
loss: 1.0613036155700684,grad_norm: 0.8147826802679131, iteration: 240229
loss: 0.9921786785125732,grad_norm: 0.9999989701467334, iteration: 240230
loss: 0.9757218956947327,grad_norm: 0.9417792782991086, iteration: 240231
loss: 0.9917117953300476,grad_norm: 0.9645545822998771, iteration: 240232
loss: 0.9917409420013428,grad_norm: 0.9869400275852505, iteration: 240233
loss: 1.0474350452423096,grad_norm: 0.9999993778201103, iteration: 240234
loss: 1.0098649263381958,grad_norm: 0.8284698016565346, iteration: 240235
loss: 1.0298835039138794,grad_norm: 0.9747152380435413, iteration: 240236
loss: 1.1273624897003174,grad_norm: 0.9999999057619807, iteration: 240237
loss: 1.0117799043655396,grad_norm: 0.940381635083206, iteration: 240238
loss: 1.0147452354431152,grad_norm: 0.9852465521412078, iteration: 240239
loss: 0.937800943851471,grad_norm: 0.8181701452099569, iteration: 240240
loss: 1.1002295017242432,grad_norm: 0.9999996555760093, iteration: 240241
loss: 1.0242146253585815,grad_norm: 1.0000000002739646, iteration: 240242
loss: 1.0571504831314087,grad_norm: 0.9999993777500034, iteration: 240243
loss: 0.9994397759437561,grad_norm: 0.968871191014727, iteration: 240244
loss: 0.9964824914932251,grad_norm: 0.7292224037120524, iteration: 240245
loss: 1.0072139501571655,grad_norm: 0.8355854974087927, iteration: 240246
loss: 1.045064091682434,grad_norm: 0.9737435643294782, iteration: 240247
loss: 1.0724081993103027,grad_norm: 0.9966207674427346, iteration: 240248
loss: 1.02960205078125,grad_norm: 0.9333315929073803, iteration: 240249
loss: 1.0316210985183716,grad_norm: 0.9999991033660662, iteration: 240250
loss: 0.9943462610244751,grad_norm: 0.9999989954449131, iteration: 240251
loss: 1.0248931646347046,grad_norm: 0.8811193012456865, iteration: 240252
loss: 1.0035921335220337,grad_norm: 0.9423735386634473, iteration: 240253
loss: 0.973879873752594,grad_norm: 0.9115858432994965, iteration: 240254
loss: 0.9873619675636292,grad_norm: 0.8789985871986058, iteration: 240255
loss: 1.0372430086135864,grad_norm: 0.7817017121026271, iteration: 240256
loss: 1.0147356986999512,grad_norm: 0.9774395717337588, iteration: 240257
loss: 0.968868613243103,grad_norm: 0.8551212074746661, iteration: 240258
loss: 1.0199507474899292,grad_norm: 0.9999990717496223, iteration: 240259
loss: 0.9925768971443176,grad_norm: 0.9680183192678246, iteration: 240260
loss: 0.9921072125434875,grad_norm: 0.9999990225075625, iteration: 240261
loss: 1.025330662727356,grad_norm: 0.9999990686697076, iteration: 240262
loss: 1.0066596269607544,grad_norm: 0.9999989272724111, iteration: 240263
loss: 1.022818684577942,grad_norm: 0.9698317695045852, iteration: 240264
loss: 1.0341805219650269,grad_norm: 0.8741837582102857, iteration: 240265
loss: 1.0153688192367554,grad_norm: 0.9999990354496633, iteration: 240266
loss: 1.0294768810272217,grad_norm: 0.9999990189914709, iteration: 240267
loss: 1.0073825120925903,grad_norm: 0.839213878676965, iteration: 240268
loss: 1.0163929462432861,grad_norm: 0.8639774360524165, iteration: 240269
loss: 1.0426033735275269,grad_norm: 0.9677982697528987, iteration: 240270
loss: 1.0088202953338623,grad_norm: 0.9999990294293205, iteration: 240271
loss: 0.9949215054512024,grad_norm: 0.9999990496631839, iteration: 240272
loss: 0.9964675903320312,grad_norm: 0.9264897031785552, iteration: 240273
loss: 0.9845674633979797,grad_norm: 0.8990606269560838, iteration: 240274
loss: 0.9938275218009949,grad_norm: 0.8292790892282549, iteration: 240275
loss: 1.0352481603622437,grad_norm: 0.9999990269890584, iteration: 240276
loss: 1.0226340293884277,grad_norm: 0.8577629856654476, iteration: 240277
loss: 1.006561040878296,grad_norm: 0.8563500236041062, iteration: 240278
loss: 0.9885526299476624,grad_norm: 0.9576578751069803, iteration: 240279
loss: 0.9839397668838501,grad_norm: 0.9471338374881679, iteration: 240280
loss: 0.9903621673583984,grad_norm: 0.9999992826114653, iteration: 240281
loss: 1.0052704811096191,grad_norm: 0.7590955842492848, iteration: 240282
loss: 0.9926092624664307,grad_norm: 0.9321291176500861, iteration: 240283
loss: 0.9923462271690369,grad_norm: 0.978896172217352, iteration: 240284
loss: 1.0385533571243286,grad_norm: 0.9721757644369614, iteration: 240285
loss: 1.0081307888031006,grad_norm: 0.7390962707916147, iteration: 240286
loss: 1.0173031091690063,grad_norm: 0.8383618755150902, iteration: 240287
loss: 0.9921373128890991,grad_norm: 0.9663682699557485, iteration: 240288
loss: 0.9663082361221313,grad_norm: 0.9099430323674388, iteration: 240289
loss: 0.9999760985374451,grad_norm: 0.7651274195540319, iteration: 240290
loss: 0.9965848326683044,grad_norm: 0.8481354164076234, iteration: 240291
loss: 1.0650588274002075,grad_norm: 0.9999990517638354, iteration: 240292
loss: 1.0192564725875854,grad_norm: 0.9999992835002188, iteration: 240293
loss: 0.9970032572746277,grad_norm: 0.882883030908169, iteration: 240294
loss: 0.9978512525558472,grad_norm: 0.9970112638314735, iteration: 240295
loss: 0.9865257740020752,grad_norm: 0.9318441167567769, iteration: 240296
loss: 0.9970294833183289,grad_norm: 0.9999995739056025, iteration: 240297
loss: 1.0049246549606323,grad_norm: 0.99649564361615, iteration: 240298
loss: 1.059425711631775,grad_norm: 0.9999993645328549, iteration: 240299
loss: 0.9912927746772766,grad_norm: 0.7533478553057208, iteration: 240300
loss: 1.021283507347107,grad_norm: 0.9999995625992055, iteration: 240301
loss: 0.9843557476997375,grad_norm: 0.8911707640775645, iteration: 240302
loss: 1.021979808807373,grad_norm: 0.9629849029879871, iteration: 240303
loss: 1.0219862461090088,grad_norm: 0.9999991922851877, iteration: 240304
loss: 0.9874651432037354,grad_norm: 0.8849612451969314, iteration: 240305
loss: 1.0114933252334595,grad_norm: 0.8869778490923896, iteration: 240306
loss: 1.0337302684783936,grad_norm: 0.9999993314714433, iteration: 240307
loss: 1.0488656759262085,grad_norm: 0.9999990932731608, iteration: 240308
loss: 0.9671683311462402,grad_norm: 0.8650934813117463, iteration: 240309
loss: 0.9947490692138672,grad_norm: 0.7468044109716312, iteration: 240310
loss: 1.0051802396774292,grad_norm: 0.9295173921705472, iteration: 240311
loss: 1.0324618816375732,grad_norm: 0.9999991423170933, iteration: 240312
loss: 0.9957146644592285,grad_norm: 0.8633877013703528, iteration: 240313
loss: 0.9967287182807922,grad_norm: 0.8668304954064444, iteration: 240314
loss: 1.014600396156311,grad_norm: 0.9730428941077527, iteration: 240315
loss: 1.027221918106079,grad_norm: 0.9999994909620913, iteration: 240316
loss: 0.9771466851234436,grad_norm: 0.8493554377579737, iteration: 240317
loss: 1.0052313804626465,grad_norm: 0.9485001594573494, iteration: 240318
loss: 1.0379855632781982,grad_norm: 0.9999992258093773, iteration: 240319
loss: 1.0006035566329956,grad_norm: 0.9142034759897513, iteration: 240320
loss: 1.014782190322876,grad_norm: 0.999999379197244, iteration: 240321
loss: 0.9962480664253235,grad_norm: 0.8306566743805431, iteration: 240322
loss: 1.0061894655227661,grad_norm: 0.8887054879464623, iteration: 240323
loss: 1.0582813024520874,grad_norm: 0.9525661072459867, iteration: 240324
loss: 0.9852543473243713,grad_norm: 0.8500428174938784, iteration: 240325
loss: 1.0385074615478516,grad_norm: 0.9999991301412193, iteration: 240326
loss: 1.0051897764205933,grad_norm: 0.9999072449462829, iteration: 240327
loss: 0.9554846882820129,grad_norm: 0.8509721950449933, iteration: 240328
loss: 1.032538652420044,grad_norm: 0.9601711750682306, iteration: 240329
loss: 0.9995177388191223,grad_norm: 0.8339275029155246, iteration: 240330
loss: 1.001354455947876,grad_norm: 0.8848118095330106, iteration: 240331
loss: 1.0030235052108765,grad_norm: 0.9137041856860602, iteration: 240332
loss: 0.9845866560935974,grad_norm: 0.8651512574771175, iteration: 240333
loss: 1.0183848142623901,grad_norm: 0.7896479384811673, iteration: 240334
loss: 1.0757333040237427,grad_norm: 0.9230557862564416, iteration: 240335
loss: 0.9671569466590881,grad_norm: 0.8422480141724394, iteration: 240336
loss: 0.9755142331123352,grad_norm: 0.9384477243345842, iteration: 240337
loss: 1.0028716325759888,grad_norm: 0.8722313549127898, iteration: 240338
loss: 0.9861013889312744,grad_norm: 0.9999989851131541, iteration: 240339
loss: 1.0070418119430542,grad_norm: 0.9818935243774123, iteration: 240340
loss: 0.9904267191886902,grad_norm: 0.999998938966703, iteration: 240341
loss: 0.946808397769928,grad_norm: 0.9533828266835576, iteration: 240342
loss: 1.0445970296859741,grad_norm: 0.9999990751863532, iteration: 240343
loss: 1.0104669332504272,grad_norm: 0.8235301194347245, iteration: 240344
loss: 1.046000599861145,grad_norm: 0.8951275238523037, iteration: 240345
loss: 1.027233362197876,grad_norm: 0.8340748801261328, iteration: 240346
loss: 1.0360264778137207,grad_norm: 0.8197065235979343, iteration: 240347
loss: 1.0079432725906372,grad_norm: 0.9999990928629612, iteration: 240348
loss: 1.003865361213684,grad_norm: 0.803408775735411, iteration: 240349
loss: 1.0365681648254395,grad_norm: 0.9664278900963001, iteration: 240350
loss: 1.1123740673065186,grad_norm: 0.9999993148222645, iteration: 240351
loss: 1.0126235485076904,grad_norm: 0.7588253666189705, iteration: 240352
loss: 0.995909571647644,grad_norm: 0.9194154681947675, iteration: 240353
loss: 0.9949411749839783,grad_norm: 0.9619340980602014, iteration: 240354
loss: 1.0647797584533691,grad_norm: 0.9999992590253953, iteration: 240355
loss: 1.0020486116409302,grad_norm: 0.9999996055778059, iteration: 240356
loss: 1.0031334161758423,grad_norm: 0.8012637787914014, iteration: 240357
loss: 0.9658923745155334,grad_norm: 0.8755489082569228, iteration: 240358
loss: 1.0021586418151855,grad_norm: 0.9999990203377251, iteration: 240359
loss: 0.9865111112594604,grad_norm: 0.9089429301946516, iteration: 240360
loss: 1.0252501964569092,grad_norm: 0.8692783552765093, iteration: 240361
loss: 1.0337743759155273,grad_norm: 0.9438636440524785, iteration: 240362
loss: 1.0274783372879028,grad_norm: 0.9258344233513652, iteration: 240363
loss: 0.9814580678939819,grad_norm: 0.9999990337960631, iteration: 240364
loss: 1.0923672914505005,grad_norm: 0.9999997513175485, iteration: 240365
loss: 0.9831314086914062,grad_norm: 0.8348787876990845, iteration: 240366
loss: 1.0098804235458374,grad_norm: 0.8609032104226185, iteration: 240367
loss: 1.012927770614624,grad_norm: 0.8274958538189572, iteration: 240368
loss: 1.0056072473526,grad_norm: 0.9999992425940519, iteration: 240369
loss: 0.981764554977417,grad_norm: 0.8097452079397108, iteration: 240370
loss: 1.0105876922607422,grad_norm: 0.9999992348011313, iteration: 240371
loss: 1.0333255529403687,grad_norm: 0.9421617520398141, iteration: 240372
loss: 1.0264813899993896,grad_norm: 0.9307478273287566, iteration: 240373
loss: 1.0625942945480347,grad_norm: 0.999999746775481, iteration: 240374
loss: 1.0039061307907104,grad_norm: 0.9176130469366754, iteration: 240375
loss: 1.0070472955703735,grad_norm: 0.9712133923532658, iteration: 240376
loss: 0.991047739982605,grad_norm: 0.9999990669845356, iteration: 240377
loss: 0.9879907369613647,grad_norm: 0.9999990654363439, iteration: 240378
loss: 1.0654724836349487,grad_norm: 0.9999997618394808, iteration: 240379
loss: 1.0031744241714478,grad_norm: 0.9044947091468993, iteration: 240380
loss: 1.000112533569336,grad_norm: 0.9090209082309865, iteration: 240381
loss: 1.0233646631240845,grad_norm: 0.8619626577272422, iteration: 240382
loss: 0.9789318442344666,grad_norm: 0.9999990082522583, iteration: 240383
loss: 1.0399513244628906,grad_norm: 0.999999088785756, iteration: 240384
loss: 1.030298113822937,grad_norm: 0.8489710925570388, iteration: 240385
loss: 1.05782151222229,grad_norm: 0.8938703376035805, iteration: 240386
loss: 0.9551552534103394,grad_norm: 0.8586897097790978, iteration: 240387
loss: 1.0223605632781982,grad_norm: 0.9924235655395071, iteration: 240388
loss: 0.9899506568908691,grad_norm: 0.9999992205185313, iteration: 240389
loss: 1.0052284002304077,grad_norm: 0.9731539713164898, iteration: 240390
loss: 1.0074836015701294,grad_norm: 0.8063417071499637, iteration: 240391
loss: 0.9969208240509033,grad_norm: 0.9704768636381037, iteration: 240392
loss: 1.0244377851486206,grad_norm: 0.9992140494949596, iteration: 240393
loss: 1.0527392625808716,grad_norm: 0.99999901618258, iteration: 240394
loss: 0.9843500256538391,grad_norm: 0.9999995758241707, iteration: 240395
loss: 0.9918583035469055,grad_norm: 0.9644373187508394, iteration: 240396
loss: 0.9707468748092651,grad_norm: 0.9794400740642915, iteration: 240397
loss: 1.0177125930786133,grad_norm: 0.9999990931852353, iteration: 240398
loss: 1.0607250928878784,grad_norm: 0.9265021472898712, iteration: 240399
loss: 1.0011454820632935,grad_norm: 0.8662145701474505, iteration: 240400
loss: 1.0058867931365967,grad_norm: 0.8397469629824019, iteration: 240401
loss: 1.0123090744018555,grad_norm: 0.9825810242629142, iteration: 240402
loss: 0.9899418950080872,grad_norm: 0.8826885986673269, iteration: 240403
loss: 0.9725180864334106,grad_norm: 0.8964678454514406, iteration: 240404
loss: 1.0162996053695679,grad_norm: 0.9999990377906602, iteration: 240405
loss: 1.000723958015442,grad_norm: 0.7475578815870024, iteration: 240406
loss: 1.0025590658187866,grad_norm: 0.9267000989437517, iteration: 240407
loss: 1.0257912874221802,grad_norm: 0.7832618762912061, iteration: 240408
loss: 0.9949862957000732,grad_norm: 0.8037994830435939, iteration: 240409
loss: 1.0084818601608276,grad_norm: 0.8406005629497011, iteration: 240410
loss: 1.009691834449768,grad_norm: 0.9474950120722269, iteration: 240411
loss: 0.9894425868988037,grad_norm: 0.9412213418990925, iteration: 240412
loss: 1.0505318641662598,grad_norm: 0.9999998668423271, iteration: 240413
loss: 0.9957358241081238,grad_norm: 0.806885711937645, iteration: 240414
loss: 1.0198149681091309,grad_norm: 0.9243842413232909, iteration: 240415
loss: 1.017725944519043,grad_norm: 0.9140414950176489, iteration: 240416
loss: 0.985657274723053,grad_norm: 0.999999062910177, iteration: 240417
loss: 0.9735551476478577,grad_norm: 0.8029936731367967, iteration: 240418
loss: 1.0028038024902344,grad_norm: 0.8563585899723763, iteration: 240419
loss: 1.0550123453140259,grad_norm: 0.8733119649669678, iteration: 240420
loss: 1.0180546045303345,grad_norm: 0.9999990945607754, iteration: 240421
loss: 0.978377103805542,grad_norm: 0.968496156329557, iteration: 240422
loss: 0.9793705940246582,grad_norm: 0.8604782596750057, iteration: 240423
loss: 0.9900789260864258,grad_norm: 0.8917004322122501, iteration: 240424
loss: 0.9632195830345154,grad_norm: 0.9999991279694274, iteration: 240425
loss: 0.9881460070610046,grad_norm: 0.9195206948345044, iteration: 240426
loss: 1.0314615964889526,grad_norm: 0.9698283578016005, iteration: 240427
loss: 0.9727472066879272,grad_norm: 0.9556962230913861, iteration: 240428
loss: 0.9919427037239075,grad_norm: 0.9287454886111232, iteration: 240429
loss: 1.0315839052200317,grad_norm: 0.8319646484226667, iteration: 240430
loss: 1.045062780380249,grad_norm: 0.9999996020329395, iteration: 240431
loss: 1.0475839376449585,grad_norm: 0.9999990742280569, iteration: 240432
loss: 0.9507564902305603,grad_norm: 0.823525789301534, iteration: 240433
loss: 0.9698997735977173,grad_norm: 0.8572033699676648, iteration: 240434
loss: 0.9933424592018127,grad_norm: 0.9999992111959185, iteration: 240435
loss: 0.9714486598968506,grad_norm: 0.8992501636339155, iteration: 240436
loss: 1.0264384746551514,grad_norm: 0.9306298297397676, iteration: 240437
loss: 0.9552211165428162,grad_norm: 0.9999990723665648, iteration: 240438
loss: 0.997700035572052,grad_norm: 0.9660010067876897, iteration: 240439
loss: 0.9753129482269287,grad_norm: 0.678701188729582, iteration: 240440
loss: 0.9950329065322876,grad_norm: 0.9070960936456269, iteration: 240441
loss: 0.9628331661224365,grad_norm: 0.839320052261148, iteration: 240442
loss: 0.9792053699493408,grad_norm: 0.9999991268600114, iteration: 240443
loss: 0.9813511967658997,grad_norm: 0.999999344502561, iteration: 240444
loss: 0.999254047870636,grad_norm: 0.9890199252303176, iteration: 240445
loss: 0.9934670925140381,grad_norm: 0.9999998435252268, iteration: 240446
loss: 1.0142912864685059,grad_norm: 0.9999989761737872, iteration: 240447
loss: 0.9984423518180847,grad_norm: 0.8256203948962214, iteration: 240448
loss: 1.029126763343811,grad_norm: 0.9999991861264416, iteration: 240449
loss: 0.9745689034461975,grad_norm: 0.9999992073217205, iteration: 240450
loss: 0.9922974109649658,grad_norm: 0.9394832041866471, iteration: 240451
loss: 1.0000039339065552,grad_norm: 0.8723667609107206, iteration: 240452
loss: 1.0194063186645508,grad_norm: 0.9335513238558772, iteration: 240453
loss: 1.0173282623291016,grad_norm: 0.9999990664221132, iteration: 240454
loss: 1.0281212329864502,grad_norm: 0.9999992045124262, iteration: 240455
loss: 1.011799931526184,grad_norm: 0.9769508488700797, iteration: 240456
loss: 1.021119475364685,grad_norm: 0.9542714583755356, iteration: 240457
loss: 0.9874463081359863,grad_norm: 0.7986383938587327, iteration: 240458
loss: 1.0073171854019165,grad_norm: 0.9720317159265112, iteration: 240459
loss: 0.9988431930541992,grad_norm: 0.8930354073347605, iteration: 240460
loss: 1.008152961730957,grad_norm: 0.8641024465800632, iteration: 240461
loss: 1.0779069662094116,grad_norm: 0.999999080129911, iteration: 240462
loss: 1.0008411407470703,grad_norm: 0.9697285331150256, iteration: 240463
loss: 1.0024958848953247,grad_norm: 0.9999989562493659, iteration: 240464
loss: 0.9713679552078247,grad_norm: 0.827048216367836, iteration: 240465
loss: 0.9952005743980408,grad_norm: 0.8853566077825303, iteration: 240466
loss: 1.0012954473495483,grad_norm: 0.8330186630996202, iteration: 240467
loss: 0.9886348843574524,grad_norm: 0.9965173954506181, iteration: 240468
loss: 0.9887396693229675,grad_norm: 0.8832083000425474, iteration: 240469
loss: 1.000014305114746,grad_norm: 0.9999990910192704, iteration: 240470
loss: 0.9762104749679565,grad_norm: 0.9494129016948626, iteration: 240471
loss: 1.004469633102417,grad_norm: 0.9333890173444698, iteration: 240472
loss: 1.0276522636413574,grad_norm: 0.8732941008660039, iteration: 240473
loss: 1.0132644176483154,grad_norm: 0.9068182260141777, iteration: 240474
loss: 1.0344713926315308,grad_norm: 0.9999990180991447, iteration: 240475
loss: 1.004991888999939,grad_norm: 0.9239897148629095, iteration: 240476
loss: 1.0263687372207642,grad_norm: 0.9965049592890253, iteration: 240477
loss: 0.9768105149269104,grad_norm: 0.7686655202708957, iteration: 240478
loss: 0.985444188117981,grad_norm: 0.7812415284012112, iteration: 240479
loss: 0.9506903290748596,grad_norm: 0.8862114410443417, iteration: 240480
loss: 1.033952236175537,grad_norm: 0.7651504623959582, iteration: 240481
loss: 1.027358055114746,grad_norm: 0.8453229258431689, iteration: 240482
loss: 1.0077362060546875,grad_norm: 0.9249575593324116, iteration: 240483
loss: 1.0129151344299316,grad_norm: 0.8828532668748967, iteration: 240484
loss: 1.026224970817566,grad_norm: 0.8500281334028146, iteration: 240485
loss: 0.9980111122131348,grad_norm: 0.9999991607133436, iteration: 240486
loss: 1.0853630304336548,grad_norm: 0.9999992477528992, iteration: 240487
loss: 1.0333670377731323,grad_norm: 0.9999990342658495, iteration: 240488
loss: 1.0242629051208496,grad_norm: 0.9925591326850742, iteration: 240489
loss: 1.0063831806182861,grad_norm: 0.7732911207945831, iteration: 240490
loss: 0.9957141876220703,grad_norm: 0.953984079572434, iteration: 240491
loss: 0.9905918836593628,grad_norm: 0.8214052632003322, iteration: 240492
loss: 1.0093880891799927,grad_norm: 0.9588110931918482, iteration: 240493
loss: 1.0522992610931396,grad_norm: 0.9999993505463539, iteration: 240494
loss: 1.0157394409179688,grad_norm: 0.9481256879713523, iteration: 240495
loss: 0.980446994304657,grad_norm: 0.9602797535465247, iteration: 240496
loss: 1.0035865306854248,grad_norm: 0.8343664134189283, iteration: 240497
loss: 0.9836075305938721,grad_norm: 0.920216127404822, iteration: 240498
loss: 1.0290582180023193,grad_norm: 0.8071068560651035, iteration: 240499
loss: 1.017121434211731,grad_norm: 0.9999989739918264, iteration: 240500
loss: 0.9820635318756104,grad_norm: 0.8497017113585994, iteration: 240501
loss: 0.9715196490287781,grad_norm: 0.9459584454252737, iteration: 240502
loss: 1.009708285331726,grad_norm: 0.8259069184031697, iteration: 240503
loss: 0.9805949926376343,grad_norm: 0.8694795192799994, iteration: 240504
loss: 0.9786974191665649,grad_norm: 0.8386340559553288, iteration: 240505
loss: 1.0718176364898682,grad_norm: 0.94505942070272, iteration: 240506
loss: 1.0004239082336426,grad_norm: 0.8626863843082491, iteration: 240507
loss: 0.9932273626327515,grad_norm: 0.8683373194249345, iteration: 240508
loss: 1.0170235633850098,grad_norm: 0.827813608396547, iteration: 240509
loss: 0.9932616353034973,grad_norm: 0.9999990584203398, iteration: 240510
loss: 1.041811466217041,grad_norm: 1.0000000297793548, iteration: 240511
loss: 1.0070066452026367,grad_norm: 0.9087266771743422, iteration: 240512
loss: 0.9504649639129639,grad_norm: 0.9059397732900826, iteration: 240513
loss: 0.9644932150840759,grad_norm: 0.8585760163883337, iteration: 240514
loss: 1.0051082372665405,grad_norm: 0.9082722361574685, iteration: 240515
loss: 1.0322744846343994,grad_norm: 0.9976868123534846, iteration: 240516
loss: 0.9812827706336975,grad_norm: 0.9999990897220151, iteration: 240517
loss: 1.0275394916534424,grad_norm: 0.9633669538683742, iteration: 240518
loss: 1.0074131488800049,grad_norm: 0.8720114601420837, iteration: 240519
loss: 0.9769415259361267,grad_norm: 0.9999992344531997, iteration: 240520
loss: 1.006084680557251,grad_norm: 0.937429380979253, iteration: 240521
loss: 1.0406403541564941,grad_norm: 0.9999996028074057, iteration: 240522
loss: 0.9798340797424316,grad_norm: 0.9470384219686053, iteration: 240523
loss: 1.0557281970977783,grad_norm: 0.99999929951667, iteration: 240524
loss: 0.9542405009269714,grad_norm: 0.9154105941231411, iteration: 240525
loss: 1.0004092454910278,grad_norm: 0.9999991334242994, iteration: 240526
loss: 1.029879093170166,grad_norm: 0.9897039952635246, iteration: 240527
loss: 0.9493570327758789,grad_norm: 0.9999989784137314, iteration: 240528
loss: 1.0147838592529297,grad_norm: 0.9252836256959833, iteration: 240529
loss: 0.9927323460578918,grad_norm: 0.8749558674342587, iteration: 240530
loss: 0.9837871193885803,grad_norm: 0.7799126135513073, iteration: 240531
loss: 1.0074776411056519,grad_norm: 0.879545673562146, iteration: 240532
loss: 1.0072109699249268,grad_norm: 0.9142979602590181, iteration: 240533
loss: 1.0272053480148315,grad_norm: 0.7934599402499986, iteration: 240534
loss: 0.9915453791618347,grad_norm: 0.9999990220422414, iteration: 240535
loss: 1.1011379957199097,grad_norm: 0.9999989823717671, iteration: 240536
loss: 0.9955539107322693,grad_norm: 0.898285478646464, iteration: 240537
loss: 1.0292257070541382,grad_norm: 0.9211660570014276, iteration: 240538
loss: 1.2087759971618652,grad_norm: 0.9168431016883619, iteration: 240539
loss: 1.0147199630737305,grad_norm: 0.916652708070643, iteration: 240540
loss: 1.0764321088790894,grad_norm: 0.8943251765895985, iteration: 240541
loss: 1.001083493232727,grad_norm: 0.9999990529430233, iteration: 240542
loss: 1.051178216934204,grad_norm: 0.9999991260466107, iteration: 240543
loss: 0.9896856546401978,grad_norm: 0.9650351416283826, iteration: 240544
loss: 0.9993008375167847,grad_norm: 0.999999281956479, iteration: 240545
loss: 1.0059601068496704,grad_norm: 0.9183897635154574, iteration: 240546
loss: 1.1346186399459839,grad_norm: 0.9999998948007481, iteration: 240547
loss: 1.0411936044692993,grad_norm: 0.9660071550290584, iteration: 240548
loss: 0.9903706312179565,grad_norm: 0.9999994185814556, iteration: 240549
loss: 0.9985398650169373,grad_norm: 0.9999991077436245, iteration: 240550
loss: 0.9650319814682007,grad_norm: 0.8349306648949214, iteration: 240551
loss: 1.092230200767517,grad_norm: 0.9999991801555208, iteration: 240552
loss: 1.0394245386123657,grad_norm: 0.923416582767579, iteration: 240553
loss: 1.0564987659454346,grad_norm: 0.9999991288098803, iteration: 240554
loss: 1.015026569366455,grad_norm: 0.9348295090331764, iteration: 240555
loss: 1.0686168670654297,grad_norm: 0.8802443002190448, iteration: 240556
loss: 0.9932587146759033,grad_norm: 0.9999990498704969, iteration: 240557
loss: 1.1762133836746216,grad_norm: 0.9999991949628669, iteration: 240558
loss: 0.9596359729766846,grad_norm: 0.7646585956550502, iteration: 240559
loss: 0.9941607713699341,grad_norm: 0.9776157620550534, iteration: 240560
loss: 0.9956414699554443,grad_norm: 0.8438312233055145, iteration: 240561
loss: 0.9902238249778748,grad_norm: 0.9536903305573661, iteration: 240562
loss: 0.9917389154434204,grad_norm: 0.9999990634174155, iteration: 240563
loss: 1.0183125734329224,grad_norm: 0.9999991747499113, iteration: 240564
loss: 0.9850644469261169,grad_norm: 0.8743756493354796, iteration: 240565
loss: 0.9879092574119568,grad_norm: 0.9999991183296968, iteration: 240566
loss: 1.0427517890930176,grad_norm: 0.9999992019413139, iteration: 240567
loss: 1.0827035903930664,grad_norm: 0.9999992620062803, iteration: 240568
loss: 0.9835280776023865,grad_norm: 0.960665814319806, iteration: 240569
loss: 1.0127333402633667,grad_norm: 0.9999990091191444, iteration: 240570
loss: 1.0804529190063477,grad_norm: 0.9999992135080269, iteration: 240571
loss: 0.9983263611793518,grad_norm: 0.9090783403069626, iteration: 240572
loss: 1.0110571384429932,grad_norm: 0.9999995365809539, iteration: 240573
loss: 1.076388955116272,grad_norm: 0.978470506941933, iteration: 240574
loss: 0.9924536943435669,grad_norm: 0.8965033170504031, iteration: 240575
loss: 1.0236737728118896,grad_norm: 0.9925932797613, iteration: 240576
loss: 0.9905233383178711,grad_norm: 0.9405051456459587, iteration: 240577
loss: 1.0060378313064575,grad_norm: 0.9999990331644528, iteration: 240578
loss: 1.0043715238571167,grad_norm: 0.9999992233475771, iteration: 240579
loss: 0.9950835108757019,grad_norm: 0.7904179045320898, iteration: 240580
loss: 1.0518279075622559,grad_norm: 0.8235907196483275, iteration: 240581
loss: 0.9562588334083557,grad_norm: 0.8666342886577979, iteration: 240582
loss: 0.99470055103302,grad_norm: 0.9690482468703059, iteration: 240583
loss: 1.0250182151794434,grad_norm: 0.9292434786023032, iteration: 240584
loss: 1.022966742515564,grad_norm: 0.9759649134787333, iteration: 240585
loss: 1.0313720703125,grad_norm: 0.9999991387661202, iteration: 240586
loss: 1.0044176578521729,grad_norm: 0.9999998732449268, iteration: 240587
loss: 1.0317580699920654,grad_norm: 0.9999991423656045, iteration: 240588
loss: 1.0184649229049683,grad_norm: 0.8953155747092469, iteration: 240589
loss: 1.0041484832763672,grad_norm: 0.8685478250268749, iteration: 240590
loss: 1.0055910348892212,grad_norm: 0.8379960988112395, iteration: 240591
loss: 1.0630725622177124,grad_norm: 0.9999993236799077, iteration: 240592
loss: 1.0613535642623901,grad_norm: 0.818671429643648, iteration: 240593
loss: 1.0251201391220093,grad_norm: 0.9999997844317795, iteration: 240594
loss: 1.0171189308166504,grad_norm: 0.9999993663484196, iteration: 240595
loss: 0.9970753192901611,grad_norm: 0.9865560769833254, iteration: 240596
loss: 0.9831909537315369,grad_norm: 0.9999990604214503, iteration: 240597
loss: 1.0303287506103516,grad_norm: 0.9999990680190726, iteration: 240598
loss: 1.0327703952789307,grad_norm: 0.9084307947989958, iteration: 240599
loss: 1.066828727722168,grad_norm: 0.9999990675879473, iteration: 240600
loss: 0.9759461879730225,grad_norm: 0.9712187087899466, iteration: 240601
loss: 0.9710443615913391,grad_norm: 0.8975110668660942, iteration: 240602
loss: 1.071650743484497,grad_norm: 0.9999993062470797, iteration: 240603
loss: 1.040648102760315,grad_norm: 0.9744542255116874, iteration: 240604
loss: 0.9901365637779236,grad_norm: 0.832485965001089, iteration: 240605
loss: 1.0122288465499878,grad_norm: 0.999999386227221, iteration: 240606
loss: 0.975227952003479,grad_norm: 0.9999992136405121, iteration: 240607
loss: 0.9987244009971619,grad_norm: 0.9173783383894699, iteration: 240608
loss: 1.0570961236953735,grad_norm: 0.9476666652620639, iteration: 240609
loss: 1.1333181858062744,grad_norm: 0.9999996168509857, iteration: 240610
loss: 1.0271291732788086,grad_norm: 0.9101635099102117, iteration: 240611
loss: 1.0172219276428223,grad_norm: 0.9999993974532146, iteration: 240612
loss: 1.1363457441329956,grad_norm: 0.9999998456224223, iteration: 240613
loss: 0.9955020546913147,grad_norm: 0.9999990888436929, iteration: 240614
loss: 1.0272822380065918,grad_norm: 0.8949079907826264, iteration: 240615
loss: 1.0166449546813965,grad_norm: 0.9999990727177838, iteration: 240616
loss: 1.0018746852874756,grad_norm: 0.9999998087964085, iteration: 240617
loss: 1.0183202028274536,grad_norm: 0.9999993585992468, iteration: 240618
loss: 1.0020180940628052,grad_norm: 0.9349379315622557, iteration: 240619
loss: 1.0144850015640259,grad_norm: 0.9549922566348554, iteration: 240620
loss: 0.9970982074737549,grad_norm: 0.8974967995307195, iteration: 240621
loss: 1.042102336883545,grad_norm: 0.9999993733288174, iteration: 240622
loss: 1.060347318649292,grad_norm: 0.999999136744274, iteration: 240623
loss: 1.0159823894500732,grad_norm: 0.8547564747664382, iteration: 240624
loss: 0.98472660779953,grad_norm: 0.9999991352659943, iteration: 240625
loss: 0.9818236231803894,grad_norm: 0.9999990712680297, iteration: 240626
loss: 1.0016785860061646,grad_norm: 0.9999992440342851, iteration: 240627
loss: 1.1220874786376953,grad_norm: 0.9999992284234552, iteration: 240628
loss: 1.019795298576355,grad_norm: 0.9999996441655821, iteration: 240629
loss: 0.9944019317626953,grad_norm: 0.9999992668008736, iteration: 240630
loss: 0.9757904410362244,grad_norm: 0.7323770395453578, iteration: 240631
loss: 1.0739872455596924,grad_norm: 0.999999208566612, iteration: 240632
loss: 0.986212432384491,grad_norm: 0.9925759376878642, iteration: 240633
loss: 1.0932137966156006,grad_norm: 0.9999992767641003, iteration: 240634
loss: 1.0174020528793335,grad_norm: 0.9999991208508814, iteration: 240635
loss: 1.0354477167129517,grad_norm: 0.9999995328871485, iteration: 240636
loss: 1.0235475301742554,grad_norm: 0.9670274545756403, iteration: 240637
loss: 0.9951562285423279,grad_norm: 0.8655895034528035, iteration: 240638
loss: 0.932682991027832,grad_norm: 0.8746856666760154, iteration: 240639
loss: 1.0444194078445435,grad_norm: 0.9999991693720696, iteration: 240640
loss: 1.0008739233016968,grad_norm: 0.9999995792371403, iteration: 240641
loss: 1.0463557243347168,grad_norm: 0.9999996787468549, iteration: 240642
loss: 0.9778848886489868,grad_norm: 0.9999991352430179, iteration: 240643
loss: 1.0121043920516968,grad_norm: 0.9999991866564719, iteration: 240644
loss: 0.9826095700263977,grad_norm: 0.8749164357291948, iteration: 240645
loss: 0.9867493510246277,grad_norm: 0.8852634789075818, iteration: 240646
loss: 1.0005249977111816,grad_norm: 0.8423364234987508, iteration: 240647
loss: 1.0058350563049316,grad_norm: 0.742840461850941, iteration: 240648
loss: 1.0104320049285889,grad_norm: 0.892934620488688, iteration: 240649
loss: 1.0348763465881348,grad_norm: 0.9999991845725857, iteration: 240650
loss: 1.0569356679916382,grad_norm: 0.8851508870064394, iteration: 240651
loss: 0.9558209776878357,grad_norm: 0.9999991941688343, iteration: 240652
loss: 0.9624855518341064,grad_norm: 0.9999993247219227, iteration: 240653
loss: 1.0230836868286133,grad_norm: 0.9999993185663303, iteration: 240654
loss: 0.9757208228111267,grad_norm: 0.8649862167236056, iteration: 240655
loss: 0.9901214241981506,grad_norm: 0.999999387100429, iteration: 240656
loss: 1.007151484489441,grad_norm: 0.8776500170630226, iteration: 240657
loss: 1.0408018827438354,grad_norm: 0.9999990492414027, iteration: 240658
loss: 1.0308444499969482,grad_norm: 0.9999990015078376, iteration: 240659
loss: 1.0489587783813477,grad_norm: 0.9999994009096399, iteration: 240660
loss: 0.9854747653007507,grad_norm: 0.9306517796403625, iteration: 240661
loss: 0.997327983379364,grad_norm: 0.7800414094769367, iteration: 240662
loss: 0.971916139125824,grad_norm: 0.7798118388326228, iteration: 240663
loss: 1.033097505569458,grad_norm: 0.9999996380047045, iteration: 240664
loss: 0.9901426434516907,grad_norm: 0.849930798580036, iteration: 240665
loss: 1.012709379196167,grad_norm: 0.9062068104622696, iteration: 240666
loss: 1.0291545391082764,grad_norm: 0.9820356075809398, iteration: 240667
loss: 1.0120561122894287,grad_norm: 0.9999998217348176, iteration: 240668
loss: 0.9633387923240662,grad_norm: 0.8654273881791856, iteration: 240669
loss: 1.0143035650253296,grad_norm: 0.9999991106709922, iteration: 240670
loss: 0.9820371270179749,grad_norm: 0.7677724412493604, iteration: 240671
loss: 0.9995099902153015,grad_norm: 0.9158466694549667, iteration: 240672
loss: 1.0248465538024902,grad_norm: 0.8095367284708019, iteration: 240673
loss: 1.0078635215759277,grad_norm: 0.9999992477609353, iteration: 240674
loss: 1.0057039260864258,grad_norm: 0.9425652145983529, iteration: 240675
loss: 0.9242212772369385,grad_norm: 0.9999990471743377, iteration: 240676
loss: 1.0424376726150513,grad_norm: 0.9999999214450273, iteration: 240677
loss: 1.0244879722595215,grad_norm: 0.9999990224887205, iteration: 240678
loss: 0.9964653253555298,grad_norm: 0.9999999367523207, iteration: 240679
loss: 1.0103118419647217,grad_norm: 0.8843704085570638, iteration: 240680
loss: 0.9901568293571472,grad_norm: 0.8379679311918458, iteration: 240681
loss: 1.0197523832321167,grad_norm: 0.9999996858051247, iteration: 240682
loss: 0.9861924648284912,grad_norm: 0.7821194501705362, iteration: 240683
loss: 0.9731493592262268,grad_norm: 0.9159142362730003, iteration: 240684
loss: 1.0079307556152344,grad_norm: 0.9999994142075567, iteration: 240685
loss: 1.0240296125411987,grad_norm: 0.9999990648141914, iteration: 240686
loss: 0.9876133799552917,grad_norm: 0.9026695377004588, iteration: 240687
loss: 1.0096735954284668,grad_norm: 0.791630580541792, iteration: 240688
loss: 0.9535390734672546,grad_norm: 0.865776841676491, iteration: 240689
loss: 1.0212171077728271,grad_norm: 0.8562794359618285, iteration: 240690
loss: 0.9652131199836731,grad_norm: 0.9999990676114522, iteration: 240691
loss: 1.0170772075653076,grad_norm: 0.8496374378969721, iteration: 240692
loss: 1.006081461906433,grad_norm: 0.7919793048659076, iteration: 240693
loss: 0.9725269675254822,grad_norm: 0.9999989651001497, iteration: 240694
loss: 0.9951669573783875,grad_norm: 0.9999992400375985, iteration: 240695
loss: 1.0066105127334595,grad_norm: 0.9687393319757702, iteration: 240696
loss: 1.0291259288787842,grad_norm: 0.9999998372121041, iteration: 240697
loss: 0.9852664470672607,grad_norm: 0.9264354159659706, iteration: 240698
loss: 1.0073453187942505,grad_norm: 0.929727742696568, iteration: 240699
loss: 1.0328004360198975,grad_norm: 0.999998994767781, iteration: 240700
loss: 0.987606942653656,grad_norm: 0.9999990177980675, iteration: 240701
loss: 1.015018105506897,grad_norm: 0.7153650241981163, iteration: 240702
loss: 1.0516000986099243,grad_norm: 0.9023088386525858, iteration: 240703
loss: 0.9962171316146851,grad_norm: 0.8488848190585512, iteration: 240704
loss: 0.9753866195678711,grad_norm: 0.9227164579090272, iteration: 240705
loss: 1.0107090473175049,grad_norm: 0.8576148380498745, iteration: 240706
loss: 1.0271680355072021,grad_norm: 0.8747518362276937, iteration: 240707
loss: 1.006075143814087,grad_norm: 0.9297981295478821, iteration: 240708
loss: 0.9865919947624207,grad_norm: 0.9999990909087888, iteration: 240709
loss: 1.0401901006698608,grad_norm: 0.9247098121658418, iteration: 240710
loss: 1.028934121131897,grad_norm: 0.9510608128678976, iteration: 240711
loss: 0.9984720945358276,grad_norm: 0.8954936879356169, iteration: 240712
loss: 0.9596236348152161,grad_norm: 0.958130958604363, iteration: 240713
loss: 1.020302653312683,grad_norm: 0.9397973069258522, iteration: 240714
loss: 1.0484176874160767,grad_norm: 0.9999990518905016, iteration: 240715
loss: 0.9933043718338013,grad_norm: 0.8348559167362926, iteration: 240716
loss: 0.9827161431312561,grad_norm: 0.7896097516423538, iteration: 240717
loss: 0.9501804709434509,grad_norm: 0.8501119274121494, iteration: 240718
loss: 1.0420713424682617,grad_norm: 0.9999990754957071, iteration: 240719
loss: 1.010041356086731,grad_norm: 0.8396927346530717, iteration: 240720
loss: 1.089529275894165,grad_norm: 0.9999998426877905, iteration: 240721
loss: 0.9858594536781311,grad_norm: 0.9957201548066978, iteration: 240722
loss: 1.028266429901123,grad_norm: 0.8911009254088466, iteration: 240723
loss: 0.9988256096839905,grad_norm: 0.9262734234151514, iteration: 240724
loss: 1.0036522150039673,grad_norm: 0.8856863218977636, iteration: 240725
loss: 1.0224000215530396,grad_norm: 0.9999990819682326, iteration: 240726
loss: 0.9641342759132385,grad_norm: 0.8146634557716484, iteration: 240727
loss: 0.9940869808197021,grad_norm: 0.9787186995993022, iteration: 240728
loss: 0.9954614639282227,grad_norm: 0.9999991215525911, iteration: 240729
loss: 1.0105706453323364,grad_norm: 0.8631943419689502, iteration: 240730
loss: 1.049314022064209,grad_norm: 0.8960578258242052, iteration: 240731
loss: 1.0187897682189941,grad_norm: 0.991048982854542, iteration: 240732
loss: 1.0479817390441895,grad_norm: 0.9999991143562372, iteration: 240733
loss: 1.0343847274780273,grad_norm: 0.9999991310167499, iteration: 240734
loss: 0.9847381114959717,grad_norm: 0.7816145999985318, iteration: 240735
loss: 1.0006903409957886,grad_norm: 0.9075669036558759, iteration: 240736
loss: 1.0246291160583496,grad_norm: 0.9130201421056499, iteration: 240737
loss: 1.0199370384216309,grad_norm: 0.9999999219151516, iteration: 240738
loss: 1.019113302230835,grad_norm: 0.9389183942894064, iteration: 240739
loss: 1.0035098791122437,grad_norm: 0.9999998252237339, iteration: 240740
loss: 0.9731513857841492,grad_norm: 0.999999081886193, iteration: 240741
loss: 0.9739841818809509,grad_norm: 0.8420160446864176, iteration: 240742
loss: 0.9911233186721802,grad_norm: 0.9338069093474524, iteration: 240743
loss: 1.0274807214736938,grad_norm: 0.9730106839177558, iteration: 240744
loss: 1.0140937566757202,grad_norm: 0.9015205151795641, iteration: 240745
loss: 1.0172147750854492,grad_norm: 0.9202257463935558, iteration: 240746
loss: 0.9882689714431763,grad_norm: 0.8766958043059913, iteration: 240747
loss: 0.9957259297370911,grad_norm: 0.8652804721564044, iteration: 240748
loss: 1.0347118377685547,grad_norm: 0.7506285093215398, iteration: 240749
loss: 1.0497214794158936,grad_norm: 0.9999992107363407, iteration: 240750
loss: 0.9919604063034058,grad_norm: 0.8945275193574458, iteration: 240751
loss: 1.019119381904602,grad_norm: 0.9311423205232395, iteration: 240752
loss: 0.9572104215621948,grad_norm: 0.7064006802481355, iteration: 240753
loss: 1.0016660690307617,grad_norm: 0.9910259367501529, iteration: 240754
loss: 0.999164342880249,grad_norm: 0.8583522355224888, iteration: 240755
loss: 0.9900554418563843,grad_norm: 0.785068959608073, iteration: 240756
loss: 0.9532498121261597,grad_norm: 0.8902558424097029, iteration: 240757
loss: 1.0097678899765015,grad_norm: 0.9555300174727308, iteration: 240758
loss: 1.0119683742523193,grad_norm: 0.8127159095062588, iteration: 240759
loss: 0.9740285873413086,grad_norm: 0.9024484835935774, iteration: 240760
loss: 1.053930401802063,grad_norm: 0.9999990722877427, iteration: 240761
loss: 0.9934160709381104,grad_norm: 0.960601707922826, iteration: 240762
loss: 0.9799901843070984,grad_norm: 0.8770618850174415, iteration: 240763
loss: 1.04139244556427,grad_norm: 0.789830427280688, iteration: 240764
loss: 1.010536789894104,grad_norm: 0.9417725976385748, iteration: 240765
loss: 1.027621865272522,grad_norm: 0.9999992020860542, iteration: 240766
loss: 1.029209017753601,grad_norm: 0.9135477935074721, iteration: 240767
loss: 0.9801425933837891,grad_norm: 0.8095751443033066, iteration: 240768
loss: 1.0302011966705322,grad_norm: 0.7992648733914567, iteration: 240769
loss: 0.9696946740150452,grad_norm: 0.9329157781399775, iteration: 240770
loss: 0.9820483326911926,grad_norm: 0.8513501031215085, iteration: 240771
loss: 1.0044317245483398,grad_norm: 0.8776981724989718, iteration: 240772
loss: 1.0096684694290161,grad_norm: 0.937645842755371, iteration: 240773
loss: 1.0294634103775024,grad_norm: 0.9999992188329201, iteration: 240774
loss: 0.9918659329414368,grad_norm: 0.8552127437168345, iteration: 240775
loss: 1.0070583820343018,grad_norm: 0.7722985407969488, iteration: 240776
loss: 0.9855880737304688,grad_norm: 0.8134680122551794, iteration: 240777
loss: 1.096997857093811,grad_norm: 0.9999992686225313, iteration: 240778
loss: 1.0033515691757202,grad_norm: 0.9860910266917188, iteration: 240779
loss: 1.058344841003418,grad_norm: 0.9999992147592306, iteration: 240780
loss: 0.9922296404838562,grad_norm: 0.9999992036351849, iteration: 240781
loss: 0.9712881445884705,grad_norm: 0.9999992924251129, iteration: 240782
loss: 1.0288442373275757,grad_norm: 0.871914691141249, iteration: 240783
loss: 0.9852761030197144,grad_norm: 0.9999990796881467, iteration: 240784
loss: 0.990437388420105,grad_norm: 0.9966846578759919, iteration: 240785
loss: 1.0252338647842407,grad_norm: 0.9120583533438065, iteration: 240786
loss: 0.9812361598014832,grad_norm: 0.9293027640645335, iteration: 240787
loss: 1.0234777927398682,grad_norm: 0.8159002810481703, iteration: 240788
loss: 0.9874280095100403,grad_norm: 0.9999997346944294, iteration: 240789
loss: 1.0485652685165405,grad_norm: 0.9521011989181273, iteration: 240790
loss: 0.9967826008796692,grad_norm: 0.9175177060214093, iteration: 240791
loss: 0.9803481698036194,grad_norm: 0.9999990154429101, iteration: 240792
loss: 0.9851479530334473,grad_norm: 0.9018054046956433, iteration: 240793
loss: 0.9414699673652649,grad_norm: 0.9501048988705972, iteration: 240794
loss: 0.9636004567146301,grad_norm: 0.999999224929938, iteration: 240795
loss: 1.1177303791046143,grad_norm: 0.9999991318496773, iteration: 240796
loss: 0.9642496109008789,grad_norm: 0.9957427705220283, iteration: 240797
loss: 1.0054322481155396,grad_norm: 0.9199880655600715, iteration: 240798
loss: 0.9914165139198303,grad_norm: 0.8738023320281452, iteration: 240799
loss: 0.9828594923019409,grad_norm: 0.9545240012049591, iteration: 240800
loss: 1.0322611331939697,grad_norm: 0.9999990440466654, iteration: 240801
loss: 0.9754785299301147,grad_norm: 0.8856557175134245, iteration: 240802
loss: 1.0620977878570557,grad_norm: 0.9999995055671649, iteration: 240803
loss: 1.0084519386291504,grad_norm: 0.8046666403469754, iteration: 240804
loss: 0.9856324195861816,grad_norm: 0.7933017586410616, iteration: 240805
loss: 0.9846155643463135,grad_norm: 0.9242690396568649, iteration: 240806
loss: 0.9693203568458557,grad_norm: 0.8561671870355306, iteration: 240807
loss: 0.9852499961853027,grad_norm: 0.8668672017859681, iteration: 240808
loss: 1.0471711158752441,grad_norm: 0.9999996124876894, iteration: 240809
loss: 0.9910867214202881,grad_norm: 0.9960379651436927, iteration: 240810
loss: 0.9899814128875732,grad_norm: 0.999999181851842, iteration: 240811
loss: 1.012361764907837,grad_norm: 0.8155081227535887, iteration: 240812
loss: 1.0001025199890137,grad_norm: 0.9802982953922085, iteration: 240813
loss: 1.000029444694519,grad_norm: 0.9999992364907823, iteration: 240814
loss: 0.9769035577774048,grad_norm: 0.7600753284452015, iteration: 240815
loss: 1.046200156211853,grad_norm: 0.9160602004465166, iteration: 240816
loss: 0.9839441180229187,grad_norm: 0.9999992799580664, iteration: 240817
loss: 1.0559316873550415,grad_norm: 0.9889976061924818, iteration: 240818
loss: 1.0067778825759888,grad_norm: 0.8774761369995071, iteration: 240819
loss: 0.9897996783256531,grad_norm: 0.7976002572947553, iteration: 240820
loss: 0.992127001285553,grad_norm: 0.8229613133997425, iteration: 240821
loss: 1.0182960033416748,grad_norm: 0.9999991707852136, iteration: 240822
loss: 1.0086761713027954,grad_norm: 0.949360010633367, iteration: 240823
loss: 1.0025908946990967,grad_norm: 0.99466197389007, iteration: 240824
loss: 1.013319969177246,grad_norm: 0.8163739521842068, iteration: 240825
loss: 1.029982328414917,grad_norm: 0.9999991604206357, iteration: 240826
loss: 1.0228902101516724,grad_norm: 0.999999757466926, iteration: 240827
loss: 1.0177507400512695,grad_norm: 0.8364301000998057, iteration: 240828
loss: 1.0453507900238037,grad_norm: 0.9999994141841151, iteration: 240829
loss: 1.0846378803253174,grad_norm: 0.9484155368251576, iteration: 240830
loss: 1.0092368125915527,grad_norm: 0.9999989797457615, iteration: 240831
loss: 1.0221198797225952,grad_norm: 0.9454345053052061, iteration: 240832
loss: 1.1038848161697388,grad_norm: 0.9999995728975659, iteration: 240833
loss: 0.9601685404777527,grad_norm: 0.8521496611883369, iteration: 240834
loss: 0.9973827600479126,grad_norm: 0.9317459924126035, iteration: 240835
loss: 0.9882128238677979,grad_norm: 0.9405236218654898, iteration: 240836
loss: 1.006629228591919,grad_norm: 0.9453506448265161, iteration: 240837
loss: 1.0250712633132935,grad_norm: 0.9033178219266057, iteration: 240838
loss: 1.055619716644287,grad_norm: 0.9819417553996591, iteration: 240839
loss: 1.006179690361023,grad_norm: 0.9855296332597054, iteration: 240840
loss: 0.9932701587677002,grad_norm: 0.7747772032466337, iteration: 240841
loss: 1.0703294277191162,grad_norm: 0.9999993982981193, iteration: 240842
loss: 0.9804207682609558,grad_norm: 0.9749244079503403, iteration: 240843
loss: 1.0083682537078857,grad_norm: 0.9999991242423527, iteration: 240844
loss: 1.0037943124771118,grad_norm: 0.9999991010614728, iteration: 240845
loss: 0.978148877620697,grad_norm: 0.9573877273141934, iteration: 240846
loss: 1.040871262550354,grad_norm: 0.8194605350419378, iteration: 240847
loss: 0.9784271121025085,grad_norm: 0.9999999549399744, iteration: 240848
loss: 1.0157448053359985,grad_norm: 0.8734024631876051, iteration: 240849
loss: 0.9836276769638062,grad_norm: 0.8578662842174893, iteration: 240850
loss: 1.0203860998153687,grad_norm: 0.8174255801198723, iteration: 240851
loss: 0.9996151328086853,grad_norm: 0.99999972173108, iteration: 240852
loss: 0.9704063534736633,grad_norm: 0.8895527778162126, iteration: 240853
loss: 1.0296913385391235,grad_norm: 0.8162928106199091, iteration: 240854
loss: 0.9972027540206909,grad_norm: 0.7969898823415095, iteration: 240855
loss: 0.9999799728393555,grad_norm: 0.8221265183002194, iteration: 240856
loss: 1.0172874927520752,grad_norm: 0.999999846437963, iteration: 240857
loss: 0.9985870122909546,grad_norm: 0.8666264645744521, iteration: 240858
loss: 0.9835533499717712,grad_norm: 0.8427886990806229, iteration: 240859
loss: 1.317781686782837,grad_norm: 0.9999995383521233, iteration: 240860
loss: 0.9810975193977356,grad_norm: 0.8335388597551731, iteration: 240861
loss: 1.12606680393219,grad_norm: 1.000000001055951, iteration: 240862
loss: 0.9855955243110657,grad_norm: 0.9999989825366271, iteration: 240863
loss: 0.951304018497467,grad_norm: 0.8816196848800522, iteration: 240864
loss: 1.008406162261963,grad_norm: 0.8823849586218937, iteration: 240865
loss: 1.0737636089324951,grad_norm: 0.9999990407111504, iteration: 240866
loss: 0.9805785417556763,grad_norm: 0.9999993242016091, iteration: 240867
loss: 1.0116466283798218,grad_norm: 0.9999992053443016, iteration: 240868
loss: 1.0327593088150024,grad_norm: 0.99999897005915, iteration: 240869
loss: 1.0276867151260376,grad_norm: 0.9999997888687207, iteration: 240870
loss: 1.0047030448913574,grad_norm: 0.9999994796752366, iteration: 240871
loss: 1.0091321468353271,grad_norm: 0.999999058493122, iteration: 240872
loss: 0.9689850211143494,grad_norm: 0.9999991153596566, iteration: 240873
loss: 0.9994103908538818,grad_norm: 0.7877889286485903, iteration: 240874
loss: 1.0223687887191772,grad_norm: 0.7610355516035661, iteration: 240875
loss: 0.9616836905479431,grad_norm: 0.964145159579793, iteration: 240876
loss: 1.0580710172653198,grad_norm: 0.9999996562956917, iteration: 240877
loss: 0.9897993206977844,grad_norm: 0.9784768202615022, iteration: 240878
loss: 1.0008726119995117,grad_norm: 0.9531841506980409, iteration: 240879
loss: 1.0002974271774292,grad_norm: 0.7842470826082534, iteration: 240880
loss: 1.0022261142730713,grad_norm: 0.9999990388643157, iteration: 240881
loss: 0.976678729057312,grad_norm: 0.9747393627550375, iteration: 240882
loss: 1.0251460075378418,grad_norm: 0.9999991383421294, iteration: 240883
loss: 0.9730512499809265,grad_norm: 0.9999990974191566, iteration: 240884
loss: 0.9796273112297058,grad_norm: 0.970733207917289, iteration: 240885
loss: 0.9787325263023376,grad_norm: 0.8516798061923042, iteration: 240886
loss: 1.017899990081787,grad_norm: 0.9651877501627097, iteration: 240887
loss: 1.0109243392944336,grad_norm: 0.9999990890027103, iteration: 240888
loss: 1.0289416313171387,grad_norm: 0.773519460766065, iteration: 240889
loss: 1.0577671527862549,grad_norm: 0.9999992456501219, iteration: 240890
loss: 0.9807219505310059,grad_norm: 0.7611110672713249, iteration: 240891
loss: 1.0002330541610718,grad_norm: 0.8740573586973246, iteration: 240892
loss: 1.0082588195800781,grad_norm: 0.7962317326835265, iteration: 240893
loss: 1.0169316530227661,grad_norm: 0.8970375869191385, iteration: 240894
loss: 1.0135284662246704,grad_norm: 0.9999992329063949, iteration: 240895
loss: 1.0045843124389648,grad_norm: 0.9999991294092192, iteration: 240896
loss: 1.0064542293548584,grad_norm: 0.7502707015698689, iteration: 240897
loss: 1.0149983167648315,grad_norm: 0.8197004364919517, iteration: 240898
loss: 1.0233720541000366,grad_norm: 0.8124980349746308, iteration: 240899
loss: 1.0023571252822876,grad_norm: 0.9114888869022738, iteration: 240900
loss: 1.0146313905715942,grad_norm: 0.7936768849495391, iteration: 240901
loss: 1.0040318965911865,grad_norm: 0.8716653504229109, iteration: 240902
loss: 0.9905127882957458,grad_norm: 0.8734976239207245, iteration: 240903
loss: 1.1288172006607056,grad_norm: 0.9999997480180814, iteration: 240904
loss: 1.034535527229309,grad_norm: 0.9999994520902126, iteration: 240905
loss: 1.0279273986816406,grad_norm: 0.7970115261575297, iteration: 240906
loss: 1.0091828107833862,grad_norm: 0.9603259538792775, iteration: 240907
loss: 1.0094196796417236,grad_norm: 0.990018895300755, iteration: 240908
loss: 1.0090482234954834,grad_norm: 0.9999990611399403, iteration: 240909
loss: 0.99851393699646,grad_norm: 0.8890500574571428, iteration: 240910
loss: 0.9576146006584167,grad_norm: 0.8967711743347753, iteration: 240911
loss: 1.0075268745422363,grad_norm: 0.9282810640031052, iteration: 240912
loss: 1.0350677967071533,grad_norm: 0.9999991479865524, iteration: 240913
loss: 1.0099482536315918,grad_norm: 0.9999990213196421, iteration: 240914
loss: 0.9894249439239502,grad_norm: 0.9999990717909306, iteration: 240915
loss: 0.9856181740760803,grad_norm: 0.8145882505590005, iteration: 240916
loss: 0.9950395226478577,grad_norm: 0.798167267346825, iteration: 240917
loss: 1.014816164970398,grad_norm: 0.9411934039891345, iteration: 240918
loss: 1.0102976560592651,grad_norm: 0.9999991355631102, iteration: 240919
loss: 1.1012532711029053,grad_norm: 0.999999778905234, iteration: 240920
loss: 1.0378615856170654,grad_norm: 0.9096718635348479, iteration: 240921
loss: 0.9654096961021423,grad_norm: 0.9357341854000573, iteration: 240922
loss: 0.9917379021644592,grad_norm: 0.935906104997461, iteration: 240923
loss: 0.9738072752952576,grad_norm: 0.9595147976037558, iteration: 240924
loss: 1.012510895729065,grad_norm: 0.8621621224181971, iteration: 240925
loss: 1.0181715488433838,grad_norm: 0.93084805153768, iteration: 240926
loss: 0.9911391735076904,grad_norm: 0.9999991523676993, iteration: 240927
loss: 1.0549931526184082,grad_norm: 0.9999991583772543, iteration: 240928
loss: 0.9976273775100708,grad_norm: 0.9506751853412394, iteration: 240929
loss: 0.98983234167099,grad_norm: 0.915840164782243, iteration: 240930
loss: 1.027367353439331,grad_norm: 0.9114215080899126, iteration: 240931
loss: 0.9915048480033875,grad_norm: 0.9999990956663036, iteration: 240932
loss: 1.258479118347168,grad_norm: 0.9999998466077015, iteration: 240933
loss: 1.0312238931655884,grad_norm: 0.8148190921470867, iteration: 240934
loss: 0.9819017648696899,grad_norm: 0.9133880757480296, iteration: 240935
loss: 1.0135036706924438,grad_norm: 0.8027063582495081, iteration: 240936
loss: 0.9991970062255859,grad_norm: 0.8617564684661615, iteration: 240937
loss: 1.0082557201385498,grad_norm: 0.9745849803378451, iteration: 240938
loss: 1.0198543071746826,grad_norm: 0.9758306962789925, iteration: 240939
loss: 1.0067026615142822,grad_norm: 0.950775181044871, iteration: 240940
loss: 1.0626007318496704,grad_norm: 0.9999999961220837, iteration: 240941
loss: 1.0048640966415405,grad_norm: 0.9999996373930246, iteration: 240942
loss: 0.9905943870544434,grad_norm: 0.9374057457390369, iteration: 240943
loss: 1.0253986120224,grad_norm: 0.9392453330212958, iteration: 240944
loss: 1.006439447402954,grad_norm: 0.8201481167573748, iteration: 240945
loss: 1.0267443656921387,grad_norm: 0.9999989996025165, iteration: 240946
loss: 1.0153061151504517,grad_norm: 0.8381212361476781, iteration: 240947
loss: 1.0526446104049683,grad_norm: 0.9999991754767574, iteration: 240948
loss: 1.000148057937622,grad_norm: 0.8504244446959659, iteration: 240949
loss: 1.006498098373413,grad_norm: 0.9999992203132607, iteration: 240950
loss: 1.0214766263961792,grad_norm: 0.9999992431898869, iteration: 240951
loss: 1.0140259265899658,grad_norm: 0.9865665327450537, iteration: 240952
loss: 0.9746565818786621,grad_norm: 0.9022777244212808, iteration: 240953
loss: 0.9944114089012146,grad_norm: 0.9999991476019046, iteration: 240954
loss: 1.0575727224349976,grad_norm: 0.9999993585226551, iteration: 240955
loss: 0.9801833629608154,grad_norm: 0.8400457264250796, iteration: 240956
loss: 1.069596529006958,grad_norm: 0.999999773744825, iteration: 240957
loss: 1.050888180732727,grad_norm: 0.9999991051414042, iteration: 240958
loss: 0.9947247505187988,grad_norm: 0.7969782685314313, iteration: 240959
loss: 0.998281717300415,grad_norm: 0.9999989857199916, iteration: 240960
loss: 0.9848196506500244,grad_norm: 0.9480643853273997, iteration: 240961
loss: 1.0485728979110718,grad_norm: 0.999999682116546, iteration: 240962
loss: 1.025293231010437,grad_norm: 0.9451249328151226, iteration: 240963
loss: 1.0009526014328003,grad_norm: 0.8023593980279955, iteration: 240964
loss: 1.0330389738082886,grad_norm: 0.9999991342837923, iteration: 240965
loss: 1.0122991800308228,grad_norm: 0.9999989467905149, iteration: 240966
loss: 0.9896794557571411,grad_norm: 0.8449325979531117, iteration: 240967
loss: 1.0028897523880005,grad_norm: 0.8794443947052618, iteration: 240968
loss: 0.9996251463890076,grad_norm: 0.8058782489271491, iteration: 240969
loss: 1.0244431495666504,grad_norm: 0.9032562620609901, iteration: 240970
loss: 0.9857547879219055,grad_norm: 0.9597409324510734, iteration: 240971
loss: 0.9524087905883789,grad_norm: 0.924615699954662, iteration: 240972
loss: 0.9944090843200684,grad_norm: 0.8356973508108304, iteration: 240973
loss: 0.9948077201843262,grad_norm: 0.9614960354067147, iteration: 240974
loss: 0.9856306314468384,grad_norm: 0.9809095778185188, iteration: 240975
loss: 0.9987489581108093,grad_norm: 0.8785539371234082, iteration: 240976
loss: 0.9823755025863647,grad_norm: 0.8689176128007284, iteration: 240977
loss: 1.0065999031066895,grad_norm: 0.9893911161695167, iteration: 240978
loss: 1.1024963855743408,grad_norm: 0.9999997130027446, iteration: 240979
loss: 1.0505807399749756,grad_norm: 0.9501230450915137, iteration: 240980
loss: 1.0157831907272339,grad_norm: 0.9999992653558124, iteration: 240981
loss: 1.0175167322158813,grad_norm: 0.9373129371003573, iteration: 240982
loss: 1.0756114721298218,grad_norm: 0.9999998952144762, iteration: 240983
loss: 0.991554319858551,grad_norm: 0.9097438402719, iteration: 240984
loss: 1.096274495124817,grad_norm: 0.9999995857087163, iteration: 240985
loss: 0.98554927110672,grad_norm: 0.9999991947858536, iteration: 240986
loss: 1.0007423162460327,grad_norm: 0.820244611496739, iteration: 240987
loss: 1.0032812356948853,grad_norm: 0.8783987112313402, iteration: 240988
loss: 1.034109115600586,grad_norm: 0.9638387033913628, iteration: 240989
loss: 1.0191394090652466,grad_norm: 0.8294403394776441, iteration: 240990
loss: 1.1108369827270508,grad_norm: 0.9483081936032415, iteration: 240991
loss: 1.0748976469039917,grad_norm: 0.9526410808363717, iteration: 240992
loss: 0.9637673497200012,grad_norm: 0.8105639024675068, iteration: 240993
loss: 1.032857894897461,grad_norm: 0.9045903220044519, iteration: 240994
loss: 0.9683473110198975,grad_norm: 0.8486546607881793, iteration: 240995
loss: 0.9996733665466309,grad_norm: 0.9758797031784928, iteration: 240996
loss: 0.9651676416397095,grad_norm: 0.8858390477855436, iteration: 240997
loss: 1.0012295246124268,grad_norm: 0.8441753187838505, iteration: 240998
loss: 0.9877524971961975,grad_norm: 0.9196786783366941, iteration: 240999
loss: 0.957794189453125,grad_norm: 0.9087679724986644, iteration: 241000
loss: 0.9889140725135803,grad_norm: 0.9357416426423841, iteration: 241001
loss: 1.0088571310043335,grad_norm: 0.8427886524918716, iteration: 241002
loss: 0.997483491897583,grad_norm: 0.9348669454622938, iteration: 241003
loss: 0.9746844172477722,grad_norm: 0.8206448123634367, iteration: 241004
loss: 1.019204020500183,grad_norm: 0.9999991863401456, iteration: 241005
loss: 1.010360836982727,grad_norm: 0.8384913318266576, iteration: 241006
loss: 1.0065988302230835,grad_norm: 0.8698628878813148, iteration: 241007
loss: 1.0205074548721313,grad_norm: 0.920238146087039, iteration: 241008
loss: 1.0327504873275757,grad_norm: 0.9284220578907975, iteration: 241009
loss: 1.017535924911499,grad_norm: 0.8548182523242699, iteration: 241010
loss: 0.9832733869552612,grad_norm: 0.790796688159028, iteration: 241011
loss: 1.0214459896087646,grad_norm: 0.8781114771698855, iteration: 241012
loss: 0.9721037745475769,grad_norm: 0.9480027336713612, iteration: 241013
loss: 1.0130188465118408,grad_norm: 0.999999026090712, iteration: 241014
loss: 1.0366981029510498,grad_norm: 0.9778606302650368, iteration: 241015
loss: 0.9920355677604675,grad_norm: 0.9999990749041734, iteration: 241016
loss: 0.9567815661430359,grad_norm: 0.9999996612914822, iteration: 241017
loss: 1.016236424446106,grad_norm: 0.892930897256472, iteration: 241018
loss: 0.9977573156356812,grad_norm: 0.9999990154456767, iteration: 241019
loss: 1.0091451406478882,grad_norm: 0.9327196139655239, iteration: 241020
loss: 0.9945226907730103,grad_norm: 0.8107186159198673, iteration: 241021
loss: 0.9700761437416077,grad_norm: 0.8527136537823673, iteration: 241022
loss: 1.014512300491333,grad_norm: 0.7677641571616537, iteration: 241023
loss: 1.0507732629776,grad_norm: 0.9096741384522798, iteration: 241024
loss: 1.022710919380188,grad_norm: 0.865714511521214, iteration: 241025
loss: 1.0234066247940063,grad_norm: 0.8238178937582522, iteration: 241026
loss: 0.9740648865699768,grad_norm: 0.9105849081418137, iteration: 241027
loss: 1.0710992813110352,grad_norm: 0.9240846404178793, iteration: 241028
loss: 1.071024775505066,grad_norm: 0.9999996781726752, iteration: 241029
loss: 0.986058235168457,grad_norm: 0.7797610737849642, iteration: 241030
loss: 0.9646751880645752,grad_norm: 0.8498981463237637, iteration: 241031
loss: 1.0040037631988525,grad_norm: 0.8245095626425519, iteration: 241032
loss: 0.9972229599952698,grad_norm: 0.9302594754680135, iteration: 241033
loss: 1.06132972240448,grad_norm: 0.8637802817051636, iteration: 241034
loss: 1.1478177309036255,grad_norm: 0.9999991525714579, iteration: 241035
loss: 1.0001095533370972,grad_norm: 0.7938370741789498, iteration: 241036
loss: 0.9926031827926636,grad_norm: 0.8973345091955025, iteration: 241037
loss: 0.9881783723831177,grad_norm: 0.8864823639492327, iteration: 241038
loss: 0.9668694138526917,grad_norm: 0.8537882082409624, iteration: 241039
loss: 0.9753963351249695,grad_norm: 0.84144590171249, iteration: 241040
loss: 1.0256612300872803,grad_norm: 0.902717564349552, iteration: 241041
loss: 0.9934533834457397,grad_norm: 0.9999991535229558, iteration: 241042
loss: 1.093615174293518,grad_norm: 0.999999788894045, iteration: 241043
loss: 0.9884545803070068,grad_norm: 0.9043508649994892, iteration: 241044
loss: 1.0016127824783325,grad_norm: 0.7945855893822146, iteration: 241045
loss: 1.014709711074829,grad_norm: 0.999999346588062, iteration: 241046
loss: 1.0309653282165527,grad_norm: 0.9999988709737314, iteration: 241047
loss: 1.0205016136169434,grad_norm: 0.9428445178933185, iteration: 241048
loss: 1.0000629425048828,grad_norm: 0.9078362653666792, iteration: 241049
loss: 1.0013960599899292,grad_norm: 0.9999993551375093, iteration: 241050
loss: 1.078295111656189,grad_norm: 0.8303187442175809, iteration: 241051
loss: 1.0875811576843262,grad_norm: 0.9999998845301903, iteration: 241052
loss: 0.9830867052078247,grad_norm: 0.9999993666257754, iteration: 241053
loss: 0.9868210554122925,grad_norm: 0.8779092805063792, iteration: 241054
loss: 1.0163699388504028,grad_norm: 0.8568290259047491, iteration: 241055
loss: 0.9770916104316711,grad_norm: 0.9989015557760302, iteration: 241056
loss: 1.1159330606460571,grad_norm: 0.9598815337260241, iteration: 241057
loss: 1.1122994422912598,grad_norm: 0.9999990360846129, iteration: 241058
loss: 1.0033832788467407,grad_norm: 0.86053757540732, iteration: 241059
loss: 1.0022221803665161,grad_norm: 0.9999994169060138, iteration: 241060
loss: 0.99083411693573,grad_norm: 0.8871689135973656, iteration: 241061
loss: 1.0130330324172974,grad_norm: 0.8106636235738213, iteration: 241062
loss: 0.9862441420555115,grad_norm: 0.972590141004998, iteration: 241063
loss: 0.9979791045188904,grad_norm: 0.9999993197562331, iteration: 241064
loss: 1.0204858779907227,grad_norm: 0.999999844832272, iteration: 241065
loss: 0.9667649269104004,grad_norm: 0.867864492585831, iteration: 241066
loss: 0.9837245941162109,grad_norm: 0.7605491445426672, iteration: 241067
loss: 1.01472806930542,grad_norm: 0.999999118996888, iteration: 241068
loss: 1.0185492038726807,grad_norm: 0.957561554649511, iteration: 241069
loss: 0.9890430569648743,grad_norm: 0.816533939954262, iteration: 241070
loss: 1.0289034843444824,grad_norm: 0.9315309419686274, iteration: 241071
loss: 1.0186270475387573,grad_norm: 0.8754771048393746, iteration: 241072
loss: 1.0341781377792358,grad_norm: 0.8029289648770978, iteration: 241073
loss: 1.0373601913452148,grad_norm: 0.7582115851740566, iteration: 241074
loss: 0.9910217523574829,grad_norm: 0.9999993066554222, iteration: 241075
loss: 0.996349573135376,grad_norm: 0.9999994980949349, iteration: 241076
loss: 1.0027601718902588,grad_norm: 0.9271628894602311, iteration: 241077
loss: 0.9580559134483337,grad_norm: 0.7459972713705406, iteration: 241078
loss: 1.0103459358215332,grad_norm: 0.8298757532305999, iteration: 241079
loss: 1.0605098009109497,grad_norm: 0.9999997180248918, iteration: 241080
loss: 0.9994986057281494,grad_norm: 0.7877068760528327, iteration: 241081
loss: 0.991461992263794,grad_norm: 0.9999991914010828, iteration: 241082
loss: 1.0423961877822876,grad_norm: 0.9778951045622751, iteration: 241083
loss: 0.9832091331481934,grad_norm: 0.9999991438165715, iteration: 241084
loss: 1.0374412536621094,grad_norm: 0.8836724569716192, iteration: 241085
loss: 0.9774672985076904,grad_norm: 0.999734472960038, iteration: 241086
loss: 0.9998569488525391,grad_norm: 0.9999990818809599, iteration: 241087
loss: 1.0110093355178833,grad_norm: 0.9999990919229453, iteration: 241088
loss: 1.020595669746399,grad_norm: 0.9999992022379784, iteration: 241089
loss: 1.0202124118804932,grad_norm: 0.999999219438033, iteration: 241090
loss: 1.0072425603866577,grad_norm: 0.9100209438190188, iteration: 241091
loss: 0.9812654852867126,grad_norm: 0.8919642468561366, iteration: 241092
loss: 1.0563979148864746,grad_norm: 0.9999992167061953, iteration: 241093
loss: 0.9874089956283569,grad_norm: 0.9999991556606119, iteration: 241094
loss: 1.0268754959106445,grad_norm: 0.9999991706649247, iteration: 241095
loss: 1.0284454822540283,grad_norm: 0.999999496330093, iteration: 241096
loss: 0.9836990237236023,grad_norm: 0.9370576280726337, iteration: 241097
loss: 1.0676782131195068,grad_norm: 0.999999456256011, iteration: 241098
loss: 1.1021755933761597,grad_norm: 0.9999999873774679, iteration: 241099
loss: 1.0017058849334717,grad_norm: 0.9999992257418278, iteration: 241100
loss: 1.0061829090118408,grad_norm: 0.9999992548778709, iteration: 241101
loss: 0.9933068156242371,grad_norm: 0.9005569413455264, iteration: 241102
loss: 1.0750230550765991,grad_norm: 0.9999991747107919, iteration: 241103
loss: 0.996070146560669,grad_norm: 0.9999995607043268, iteration: 241104
loss: 1.0344395637512207,grad_norm: 0.8548076388320346, iteration: 241105
loss: 0.9660935401916504,grad_norm: 0.8748815574727499, iteration: 241106
loss: 1.0143729448318481,grad_norm: 0.9907328778277593, iteration: 241107
loss: 1.0100593566894531,grad_norm: 0.9695461282136456, iteration: 241108
loss: 0.9624311923980713,grad_norm: 0.8019176129574838, iteration: 241109
loss: 0.9882391095161438,grad_norm: 0.8672061429158406, iteration: 241110
loss: 1.0323561429977417,grad_norm: 0.999999941528009, iteration: 241111
loss: 1.1157045364379883,grad_norm: 0.9999995040357033, iteration: 241112
loss: 0.9988022446632385,grad_norm: 0.9975457553785864, iteration: 241113
loss: 1.030882477760315,grad_norm: 0.9999994289686067, iteration: 241114
loss: 1.0513107776641846,grad_norm: 0.9999990414363924, iteration: 241115
loss: 1.019981861114502,grad_norm: 0.9999994559043077, iteration: 241116
loss: 0.9971505999565125,grad_norm: 0.9999990496555583, iteration: 241117
loss: 1.0299923419952393,grad_norm: 0.9999992274181291, iteration: 241118
loss: 1.0401027202606201,grad_norm: 0.99999970565402, iteration: 241119
loss: 0.9725925922393799,grad_norm: 0.953259369422704, iteration: 241120
loss: 0.9962701201438904,grad_norm: 0.9999992116692025, iteration: 241121
loss: 0.9811014533042908,grad_norm: 0.9684806387480098, iteration: 241122
loss: 0.9846893548965454,grad_norm: 0.8768565561372728, iteration: 241123
loss: 1.0376077890396118,grad_norm: 0.8252525207791378, iteration: 241124
loss: 0.999655544757843,grad_norm: 0.9553663793774179, iteration: 241125
loss: 1.031387448310852,grad_norm: 0.999999069033018, iteration: 241126
loss: 1.0085581541061401,grad_norm: 0.9999990346919959, iteration: 241127
loss: 1.0119426250457764,grad_norm: 0.912615287012968, iteration: 241128
loss: 0.9852311015129089,grad_norm: 0.9999992190694417, iteration: 241129
loss: 1.0244407653808594,grad_norm: 0.9999990057289436, iteration: 241130
loss: 1.0113646984100342,grad_norm: 0.9999992071200533, iteration: 241131
loss: 0.97288978099823,grad_norm: 0.9415278972275104, iteration: 241132
loss: 0.9997311234474182,grad_norm: 0.9014892080552462, iteration: 241133
loss: 1.007177472114563,grad_norm: 0.9999998377670962, iteration: 241134
loss: 1.0240018367767334,grad_norm: 0.9999991191583311, iteration: 241135
loss: 0.9775274991989136,grad_norm: 0.9999999802374966, iteration: 241136
loss: 1.017865777015686,grad_norm: 0.999999448678074, iteration: 241137
loss: 0.9856279492378235,grad_norm: 0.836457897984147, iteration: 241138
loss: 0.9855864644050598,grad_norm: 0.9999990993859624, iteration: 241139
loss: 0.9927707314491272,grad_norm: 0.7581552379132359, iteration: 241140
loss: 1.0040756464004517,grad_norm: 0.9041394619174885, iteration: 241141
loss: 1.033620834350586,grad_norm: 0.7796474459200856, iteration: 241142
loss: 1.0095981359481812,grad_norm: 0.9799143371153867, iteration: 241143
loss: 0.9977698922157288,grad_norm: 0.994649509046464, iteration: 241144
loss: 0.9965689778327942,grad_norm: 0.9830226337840423, iteration: 241145
loss: 1.0131045579910278,grad_norm: 0.802194989013154, iteration: 241146
loss: 0.9906835556030273,grad_norm: 0.9999998482774919, iteration: 241147
loss: 0.9891908764839172,grad_norm: 0.8497496226329518, iteration: 241148
loss: 0.9805865287780762,grad_norm: 0.8521875454322629, iteration: 241149
loss: 1.0071141719818115,grad_norm: 0.9318284734825624, iteration: 241150
loss: 1.031447410583496,grad_norm: 0.9999996472118768, iteration: 241151
loss: 0.9493400454521179,grad_norm: 0.8086747210910175, iteration: 241152
loss: 0.9693538546562195,grad_norm: 0.8450682767367718, iteration: 241153
loss: 1.033571720123291,grad_norm: 0.999999345241374, iteration: 241154
loss: 1.0017327070236206,grad_norm: 0.9034875849377336, iteration: 241155
loss: 0.9854084253311157,grad_norm: 0.9362731881366824, iteration: 241156
loss: 1.0041744709014893,grad_norm: 0.842325369218427, iteration: 241157
loss: 0.9963280558586121,grad_norm: 0.8674650694972302, iteration: 241158
loss: 0.9860283732414246,grad_norm: 0.8699395110977857, iteration: 241159
loss: 1.0638189315795898,grad_norm: 0.9999990787368511, iteration: 241160
loss: 1.1933672428131104,grad_norm: 0.9999997556753013, iteration: 241161
loss: 0.9775977730751038,grad_norm: 0.9451041643815657, iteration: 241162
loss: 1.0020990371704102,grad_norm: 0.9918144699503904, iteration: 241163
loss: 1.024686336517334,grad_norm: 0.9853965530355331, iteration: 241164
loss: 1.1211438179016113,grad_norm: 0.9999998096369042, iteration: 241165
loss: 0.9886484146118164,grad_norm: 0.9355099323472169, iteration: 241166
loss: 1.0616767406463623,grad_norm: 0.9999991055901449, iteration: 241167
loss: 1.0375230312347412,grad_norm: 0.9999998020174216, iteration: 241168
loss: 1.0191943645477295,grad_norm: 0.8663408593115549, iteration: 241169
loss: 1.000207781791687,grad_norm: 0.8952500302628827, iteration: 241170
loss: 1.000010371208191,grad_norm: 0.9766329053188804, iteration: 241171
loss: 0.9961149096488953,grad_norm: 0.8545513513876434, iteration: 241172
loss: 1.0066258907318115,grad_norm: 0.8978671055063955, iteration: 241173
loss: 1.0120903253555298,grad_norm: 0.9999992268535242, iteration: 241174
loss: 1.0325967073440552,grad_norm: 0.9221176041842299, iteration: 241175
loss: 1.1086643934249878,grad_norm: 0.9678139444462402, iteration: 241176
loss: 1.0945073366165161,grad_norm: 0.9999990476513332, iteration: 241177
loss: 1.017626404762268,grad_norm: 0.8714856249819346, iteration: 241178
loss: 0.9873523712158203,grad_norm: 0.9009463300336878, iteration: 241179
loss: 0.9705840349197388,grad_norm: 0.9999991191525436, iteration: 241180
loss: 1.1349515914916992,grad_norm: 0.9999991860765148, iteration: 241181
loss: 0.9467732310295105,grad_norm: 0.9999990990301811, iteration: 241182
loss: 1.0441607236862183,grad_norm: 0.9332888839261163, iteration: 241183
loss: 1.08242666721344,grad_norm: 0.9999996161861519, iteration: 241184
loss: 0.9929649829864502,grad_norm: 0.8620744044592882, iteration: 241185
loss: 1.0233997106552124,grad_norm: 0.8985711530740061, iteration: 241186
loss: 1.1119600534439087,grad_norm: 0.999999032704669, iteration: 241187
loss: 0.9720011949539185,grad_norm: 0.9999994209168173, iteration: 241188
loss: 1.033614158630371,grad_norm: 0.9999997913118137, iteration: 241189
loss: 1.0485919713974,grad_norm: 0.9999994846565309, iteration: 241190
loss: 0.978489339351654,grad_norm: 0.9710024846613641, iteration: 241191
loss: 0.9882010817527771,grad_norm: 0.8922537987525188, iteration: 241192
loss: 1.0346763134002686,grad_norm: 0.9999993020268387, iteration: 241193
loss: 1.0275483131408691,grad_norm: 0.9638805625233353, iteration: 241194
loss: 1.0077762603759766,grad_norm: 0.847072623857908, iteration: 241195
loss: 1.014317512512207,grad_norm: 0.9123570842745398, iteration: 241196
loss: 1.0426726341247559,grad_norm: 0.8058069635679166, iteration: 241197
loss: 1.0593761205673218,grad_norm: 0.9768530823566541, iteration: 241198
loss: 0.9615846276283264,grad_norm: 0.914677196944012, iteration: 241199
loss: 0.9947327375411987,grad_norm: 0.9490801927530607, iteration: 241200
loss: 1.0182969570159912,grad_norm: 0.8458396834040011, iteration: 241201
loss: 1.0464168787002563,grad_norm: 0.9999991545597938, iteration: 241202
loss: 0.9540866017341614,grad_norm: 0.8083697838492433, iteration: 241203
loss: 0.988283097743988,grad_norm: 0.9010921915282096, iteration: 241204
loss: 1.027674674987793,grad_norm: 0.9999992348523625, iteration: 241205
loss: 0.9916078448295593,grad_norm: 0.7878016711489458, iteration: 241206
loss: 0.9883909225463867,grad_norm: 0.9868165628212537, iteration: 241207
loss: 0.9853475093841553,grad_norm: 0.8592886425719334, iteration: 241208
loss: 0.9734771847724915,grad_norm: 0.7808906272728025, iteration: 241209
loss: 0.9945477843284607,grad_norm: 0.8641898973034964, iteration: 241210
loss: 0.9806333780288696,grad_norm: 0.7711184069707905, iteration: 241211
loss: 1.0320826768875122,grad_norm: 0.9999994158981506, iteration: 241212
loss: 1.0184634923934937,grad_norm: 0.9413096010625366, iteration: 241213
loss: 1.0056344270706177,grad_norm: 0.7440347577772709, iteration: 241214
loss: 1.0125880241394043,grad_norm: 0.9999990390264447, iteration: 241215
loss: 1.05018150806427,grad_norm: 0.9999991492961617, iteration: 241216
loss: 1.0100853443145752,grad_norm: 0.9316740140581224, iteration: 241217
loss: 0.9376049637794495,grad_norm: 0.9999991136570232, iteration: 241218
loss: 0.9958772659301758,grad_norm: 0.8323802515556885, iteration: 241219
loss: 0.9738592505455017,grad_norm: 0.9999991203181922, iteration: 241220
loss: 0.9646634459495544,grad_norm: 0.8714305998357031, iteration: 241221
loss: 0.9641773700714111,grad_norm: 0.8800511987591877, iteration: 241222
loss: 0.9909862279891968,grad_norm: 0.9999997276209066, iteration: 241223
loss: 1.0095239877700806,grad_norm: 0.8701324339587281, iteration: 241224
loss: 0.9949538707733154,grad_norm: 0.7890764292529885, iteration: 241225
loss: 1.0249723196029663,grad_norm: 0.9999997433267359, iteration: 241226
loss: 1.0301047563552856,grad_norm: 0.9999994961806793, iteration: 241227
loss: 1.0073918104171753,grad_norm: 0.8338680684181805, iteration: 241228
loss: 0.9652596712112427,grad_norm: 0.9999990811310981, iteration: 241229
loss: 0.994193971157074,grad_norm: 0.7976227430946125, iteration: 241230
loss: 1.014106273651123,grad_norm: 0.7692320143366307, iteration: 241231
loss: 0.9745168089866638,grad_norm: 0.8524609453296663, iteration: 241232
loss: 1.0604162216186523,grad_norm: 0.9999993714409237, iteration: 241233
loss: 1.0162395238876343,grad_norm: 0.999998991767755, iteration: 241234
loss: 1.04764986038208,grad_norm: 0.9639973700144574, iteration: 241235
loss: 0.9856630563735962,grad_norm: 0.8634168099124552, iteration: 241236
loss: 0.9601505398750305,grad_norm: 0.9521415003679063, iteration: 241237
loss: 0.9784311652183533,grad_norm: 0.9808418579848942, iteration: 241238
loss: 1.0366305112838745,grad_norm: 0.8881598882645677, iteration: 241239
loss: 0.985485851764679,grad_norm: 0.9999996982016071, iteration: 241240
loss: 0.9843502640724182,grad_norm: 0.8175391644017328, iteration: 241241
loss: 0.9612552523612976,grad_norm: 0.8942282866856305, iteration: 241242
loss: 0.9653964638710022,grad_norm: 0.9999990859437604, iteration: 241243
loss: 1.0240060091018677,grad_norm: 0.999999448880961, iteration: 241244
loss: 0.9863620400428772,grad_norm: 0.9999990735426624, iteration: 241245
loss: 1.0081255435943604,grad_norm: 0.9999991580833828, iteration: 241246
loss: 1.0185717344284058,grad_norm: 0.9213412370775416, iteration: 241247
loss: 0.9725552797317505,grad_norm: 0.9477697344091996, iteration: 241248
loss: 1.1049485206604004,grad_norm: 0.9999993943246867, iteration: 241249
loss: 1.0146231651306152,grad_norm: 0.7978696536872626, iteration: 241250
loss: 1.0582064390182495,grad_norm: 0.99999940510372, iteration: 241251
loss: 0.995490312576294,grad_norm: 0.9999991871621529, iteration: 241252
loss: 1.0216054916381836,grad_norm: 0.977158955896149, iteration: 241253
loss: 1.0498701333999634,grad_norm: 0.9999998685610024, iteration: 241254
loss: 1.0382704734802246,grad_norm: 0.877975802134176, iteration: 241255
loss: 0.9980584383010864,grad_norm: 0.9999991778959021, iteration: 241256
loss: 0.9849570393562317,grad_norm: 0.9999991419373432, iteration: 241257
loss: 1.020137071609497,grad_norm: 0.8899710405471116, iteration: 241258
loss: 1.1688716411590576,grad_norm: 0.9999992439770231, iteration: 241259
loss: 1.0596787929534912,grad_norm: 0.9999991076125927, iteration: 241260
loss: 1.0301671028137207,grad_norm: 0.8213371483725715, iteration: 241261
loss: 0.9732860922813416,grad_norm: 0.8653838652028976, iteration: 241262
loss: 1.0296236276626587,grad_norm: 0.9999998426725654, iteration: 241263
loss: 1.0181033611297607,grad_norm: 0.9159583898767133, iteration: 241264
loss: 1.0351078510284424,grad_norm: 0.8972204630533026, iteration: 241265
loss: 0.9984256029129028,grad_norm: 0.9999994536384473, iteration: 241266
loss: 0.9718736410140991,grad_norm: 0.9022497924011111, iteration: 241267
loss: 1.003958821296692,grad_norm: 0.9999990387448588, iteration: 241268
loss: 0.9722972512245178,grad_norm: 0.942044974662031, iteration: 241269
loss: 1.0101072788238525,grad_norm: 0.9163558652915352, iteration: 241270
loss: 1.0022960901260376,grad_norm: 0.9999991370719674, iteration: 241271
loss: 1.0217076539993286,grad_norm: 1.000000011824359, iteration: 241272
loss: 1.1532018184661865,grad_norm: 0.9999996935601869, iteration: 241273
loss: 0.9967247247695923,grad_norm: 0.99999931496032, iteration: 241274
loss: 0.990234375,grad_norm: 0.8168840509852969, iteration: 241275
loss: 1.1669516563415527,grad_norm: 0.9999993143591634, iteration: 241276
loss: 1.001758098602295,grad_norm: 0.9934090365311179, iteration: 241277
loss: 1.1446938514709473,grad_norm: 0.9999995363310303, iteration: 241278
loss: 0.9720184803009033,grad_norm: 0.9999990769771212, iteration: 241279
loss: 1.0416427850723267,grad_norm: 0.7804177396991315, iteration: 241280
loss: 1.0521970987319946,grad_norm: 0.6785870766077198, iteration: 241281
loss: 1.0535401105880737,grad_norm: 0.8879744771510987, iteration: 241282
loss: 1.0051056146621704,grad_norm: 0.9999993724825954, iteration: 241283
loss: 1.0349035263061523,grad_norm: 0.9999998696755317, iteration: 241284
loss: 0.9429864287376404,grad_norm: 0.897023146202735, iteration: 241285
loss: 1.0186920166015625,grad_norm: 0.8183493203998673, iteration: 241286
loss: 1.0174131393432617,grad_norm: 0.8713874418926355, iteration: 241287
loss: 1.0583710670471191,grad_norm: 0.9932016311282713, iteration: 241288
loss: 1.0134806632995605,grad_norm: 0.9999990137929408, iteration: 241289
loss: 1.0038366317749023,grad_norm: 0.8417146507223229, iteration: 241290
loss: 0.9944822192192078,grad_norm: 0.954859224558303, iteration: 241291
loss: 1.0898953676223755,grad_norm: 0.92205778166736, iteration: 241292
loss: 0.9923895001411438,grad_norm: 0.9999994076878753, iteration: 241293
loss: 1.0069122314453125,grad_norm: 0.8803622989920322, iteration: 241294
loss: 1.0075335502624512,grad_norm: 0.9942281656108173, iteration: 241295
loss: 0.9905006885528564,grad_norm: 0.882784105965406, iteration: 241296
loss: 1.01591157913208,grad_norm: 0.9104921162030162, iteration: 241297
loss: 1.0367472171783447,grad_norm: 0.9999992254342477, iteration: 241298
loss: 1.0347099304199219,grad_norm: 0.9317096476326748, iteration: 241299
loss: 1.0280966758728027,grad_norm: 0.9999990913190132, iteration: 241300
loss: 1.0070433616638184,grad_norm: 0.9999999333770043, iteration: 241301
loss: 0.9885117411613464,grad_norm: 0.9999991003509726, iteration: 241302
loss: 1.0010168552398682,grad_norm: 0.8538897289504559, iteration: 241303
loss: 1.0688750743865967,grad_norm: 0.9007004059186196, iteration: 241304
loss: 1.0115082263946533,grad_norm: 0.9999989908734143, iteration: 241305
loss: 1.017134189605713,grad_norm: 0.9999991912626595, iteration: 241306
loss: 0.9960176944732666,grad_norm: 0.8916975950291254, iteration: 241307
loss: 0.9704973101615906,grad_norm: 0.9150143705268646, iteration: 241308
loss: 1.0524787902832031,grad_norm: 0.9999993961875625, iteration: 241309
loss: 0.998196005821228,grad_norm: 0.7781306560158912, iteration: 241310
loss: 0.9775472283363342,grad_norm: 0.9999990799031874, iteration: 241311
loss: 0.9632940888404846,grad_norm: 0.8302698001401706, iteration: 241312
loss: 1.092018961906433,grad_norm: 0.9999999357869179, iteration: 241313
loss: 1.0341465473175049,grad_norm: 0.9999996424852563, iteration: 241314
loss: 1.015718936920166,grad_norm: 0.9999990939334613, iteration: 241315
loss: 1.0809521675109863,grad_norm: 0.9999998478810459, iteration: 241316
loss: 1.1957138776779175,grad_norm: 0.9999998521732908, iteration: 241317
loss: 0.9948041439056396,grad_norm: 0.8856269738312571, iteration: 241318
loss: 1.038775086402893,grad_norm: 0.9102004721477358, iteration: 241319
loss: 1.0448582172393799,grad_norm: 0.8751784461962261, iteration: 241320
loss: 1.0174098014831543,grad_norm: 0.9999992729368182, iteration: 241321
loss: 1.000322699546814,grad_norm: 0.9999997907627329, iteration: 241322
loss: 1.0517114400863647,grad_norm: 0.9999991632345777, iteration: 241323
loss: 0.9765641689300537,grad_norm: 0.9999990332298312, iteration: 241324
loss: 1.0008171796798706,grad_norm: 0.9999991047037027, iteration: 241325
loss: 0.9703080058097839,grad_norm: 0.9399728092024016, iteration: 241326
loss: 0.9825537204742432,grad_norm: 0.8314822337821814, iteration: 241327
loss: 1.0647966861724854,grad_norm: 0.8572225714710298, iteration: 241328
loss: 1.0180732011795044,grad_norm: 0.9999992120638611, iteration: 241329
loss: 1.0802544355392456,grad_norm: 0.9999997454649936, iteration: 241330
loss: 1.1054807901382446,grad_norm: 0.9999995961887794, iteration: 241331
loss: 1.0963385105133057,grad_norm: 0.9098149387789414, iteration: 241332
loss: 1.0828908681869507,grad_norm: 0.9475824156130193, iteration: 241333
loss: 1.0274710655212402,grad_norm: 0.9999997475606435, iteration: 241334
loss: 1.0225170850753784,grad_norm: 0.794990185102775, iteration: 241335
loss: 0.9691465497016907,grad_norm: 0.9444960659814433, iteration: 241336
loss: 1.0292980670928955,grad_norm: 0.9999994377518777, iteration: 241337
loss: 1.024517297744751,grad_norm: 0.9999997107887589, iteration: 241338
loss: 1.0045417547225952,grad_norm: 0.8975579786076167, iteration: 241339
loss: 0.9845129251480103,grad_norm: 0.9999990078842708, iteration: 241340
loss: 0.9778963923454285,grad_norm: 0.9999993106157246, iteration: 241341
loss: 1.0911402702331543,grad_norm: 0.9999993423543728, iteration: 241342
loss: 1.0173687934875488,grad_norm: 0.9999998743292007, iteration: 241343
loss: 1.0178537368774414,grad_norm: 0.9999991690413603, iteration: 241344
loss: 0.9702544212341309,grad_norm: 0.8572173218152508, iteration: 241345
loss: 0.984537661075592,grad_norm: 0.8882440011997752, iteration: 241346
loss: 1.090419054031372,grad_norm: 0.9999998695022061, iteration: 241347
loss: 1.0303902626037598,grad_norm: 0.7870435152825983, iteration: 241348
loss: 1.0530034303665161,grad_norm: 0.9999991660625177, iteration: 241349
loss: 1.133739709854126,grad_norm: 0.999999820304985, iteration: 241350
loss: 1.012840747833252,grad_norm: 0.8633927743680263, iteration: 241351
loss: 1.0418732166290283,grad_norm: 0.9074549304231968, iteration: 241352
loss: 1.0785337686538696,grad_norm: 0.9999996515151806, iteration: 241353
loss: 0.9931162595748901,grad_norm: 0.9999991830630873, iteration: 241354
loss: 1.0274593830108643,grad_norm: 0.9999991758108496, iteration: 241355
loss: 1.022000789642334,grad_norm: 0.8072217683435836, iteration: 241356
loss: 1.0714519023895264,grad_norm: 0.9999995044745934, iteration: 241357
loss: 1.228541612625122,grad_norm: 0.9999990755093643, iteration: 241358
loss: 1.0696028470993042,grad_norm: 0.906684750100096, iteration: 241359
loss: 1.0493683815002441,grad_norm: 0.9999993105188246, iteration: 241360
loss: 1.0753600597381592,grad_norm: 0.9892870827733797, iteration: 241361
loss: 1.049093246459961,grad_norm: 0.9999999191994284, iteration: 241362
loss: 1.0201224088668823,grad_norm: 0.9679735908302532, iteration: 241363
loss: 1.0217385292053223,grad_norm: 0.9438667586302036, iteration: 241364
loss: 1.095836877822876,grad_norm: 0.9999991260818917, iteration: 241365
loss: 1.0167168378829956,grad_norm: 0.9999990907583298, iteration: 241366
loss: 1.0517094135284424,grad_norm: 0.9999990054097375, iteration: 241367
loss: 1.0955032110214233,grad_norm: 0.9999997116662758, iteration: 241368
loss: 1.0361328125,grad_norm: 0.8636876470177055, iteration: 241369
loss: 1.0360438823699951,grad_norm: 0.9999996616157164, iteration: 241370
loss: 1.1980127096176147,grad_norm: 0.9999994444553221, iteration: 241371
loss: 1.0795568227767944,grad_norm: 0.999999575605376, iteration: 241372
loss: 1.0513981580734253,grad_norm: 0.9999994142168095, iteration: 241373
loss: 1.0532702207565308,grad_norm: 0.9999991334953844, iteration: 241374
loss: 1.0972000360488892,grad_norm: 0.9999990474515582, iteration: 241375
loss: 1.0553032159805298,grad_norm: 0.9006409529339909, iteration: 241376
loss: 0.9903868436813354,grad_norm: 0.9580917347160568, iteration: 241377
loss: 1.0388646125793457,grad_norm: 0.9999992906340806, iteration: 241378
loss: 1.0168719291687012,grad_norm: 0.9633433548960095, iteration: 241379
loss: 1.0152533054351807,grad_norm: 0.8247426209764498, iteration: 241380
loss: 1.0609508752822876,grad_norm: 0.9999996362021543, iteration: 241381
loss: 1.0465830564498901,grad_norm: 0.8255620361270181, iteration: 241382
loss: 1.0094330310821533,grad_norm: 0.9333197923184313, iteration: 241383
loss: 0.9943339824676514,grad_norm: 0.9999990506630626, iteration: 241384
loss: 0.9916603565216064,grad_norm: 0.8691785698730249, iteration: 241385
loss: 1.014354944229126,grad_norm: 0.9999992412891661, iteration: 241386
loss: 0.9875120520591736,grad_norm: 0.785092364980211, iteration: 241387
loss: 1.0868881940841675,grad_norm: 0.9366651619234871, iteration: 241388
loss: 0.9757292866706848,grad_norm: 0.7273392788674481, iteration: 241389
loss: 1.0296475887298584,grad_norm: 0.8016330236496414, iteration: 241390
loss: 1.0057545900344849,grad_norm: 0.9733226108414442, iteration: 241391
loss: 0.9899736046791077,grad_norm: 0.9999990456297925, iteration: 241392
loss: 1.0152095556259155,grad_norm: 0.9008255634057566, iteration: 241393
loss: 0.9914495944976807,grad_norm: 0.9684393275097452, iteration: 241394
loss: 1.1355609893798828,grad_norm: 0.9999999420528104, iteration: 241395
loss: 1.0643969774246216,grad_norm: 0.9999989948561816, iteration: 241396
loss: 1.0846775770187378,grad_norm: 0.9555356950983602, iteration: 241397
loss: 0.9813320636749268,grad_norm: 0.8585873371955896, iteration: 241398
loss: 0.9863728880882263,grad_norm: 0.9999990106607626, iteration: 241399
loss: 1.1018879413604736,grad_norm: 0.886897563809482, iteration: 241400
loss: 1.0543419122695923,grad_norm: 0.8959095182351012, iteration: 241401
loss: 1.0196304321289062,grad_norm: 0.8801268652281424, iteration: 241402
loss: 1.0837005376815796,grad_norm: 0.8845827385263012, iteration: 241403
loss: 0.9971134662628174,grad_norm: 0.9516856953228543, iteration: 241404
loss: 0.9954610466957092,grad_norm: 0.9999991144392796, iteration: 241405
loss: 0.987942099571228,grad_norm: 0.9901724444030321, iteration: 241406
loss: 0.9898529052734375,grad_norm: 0.9789732192529707, iteration: 241407
loss: 1.0063074827194214,grad_norm: 0.99999906426738, iteration: 241408
loss: 1.0027016401290894,grad_norm: 0.9999992104748672, iteration: 241409
loss: 1.0307334661483765,grad_norm: 0.7956359235677677, iteration: 241410
loss: 1.0743906497955322,grad_norm: 0.9363997734126895, iteration: 241411
loss: 1.0641692876815796,grad_norm: 0.8612530359693703, iteration: 241412
loss: 1.061808705329895,grad_norm: 0.966263475571392, iteration: 241413
loss: 1.2412484884262085,grad_norm: 0.9999993649567973, iteration: 241414
loss: 1.0051958560943604,grad_norm: 0.8427561722837175, iteration: 241415
loss: 1.112498164176941,grad_norm: 0.999999034867581, iteration: 241416
loss: 1.0622637271881104,grad_norm: 0.9936348938010997, iteration: 241417
loss: 1.0241721868515015,grad_norm: 0.9437058936515553, iteration: 241418
loss: 1.0041712522506714,grad_norm: 0.9411532826963329, iteration: 241419
loss: 0.9673839211463928,grad_norm: 0.9068927744776292, iteration: 241420
loss: 0.9965304732322693,grad_norm: 0.9346158096663892, iteration: 241421
loss: 0.9764830470085144,grad_norm: 0.9999991714591071, iteration: 241422
loss: 1.1098395586013794,grad_norm: 0.9999996755539142, iteration: 241423
loss: 0.9775147438049316,grad_norm: 0.9762385346795638, iteration: 241424
loss: 1.042792797088623,grad_norm: 0.999999111130269, iteration: 241425
loss: 1.1325453519821167,grad_norm: 0.9902975105366496, iteration: 241426
loss: 1.0303398370742798,grad_norm: 0.8270062910561253, iteration: 241427
loss: 1.0617822408676147,grad_norm: 0.831121598141738, iteration: 241428
loss: 1.0283509492874146,grad_norm: 0.9999990569390796, iteration: 241429
loss: 1.1093785762786865,grad_norm: 0.938741007347119, iteration: 241430
loss: 1.1180198192596436,grad_norm: 0.9999999298443439, iteration: 241431
loss: 1.0698364973068237,grad_norm: 0.9113718805231281, iteration: 241432
loss: 1.0723516941070557,grad_norm: 0.9999995210530102, iteration: 241433
loss: 1.0728180408477783,grad_norm: 0.9999998322755149, iteration: 241434
loss: 1.0350234508514404,grad_norm: 0.8638408312594353, iteration: 241435
loss: 1.0183502435684204,grad_norm: 0.9999996160288865, iteration: 241436
loss: 1.1119027137756348,grad_norm: 0.9562034439902529, iteration: 241437
loss: 1.1494848728179932,grad_norm: 0.9999996099760836, iteration: 241438
loss: 0.9861060380935669,grad_norm: 0.8704041616846946, iteration: 241439
loss: 1.1293840408325195,grad_norm: 0.9999997822978195, iteration: 241440
loss: 1.079042673110962,grad_norm: 0.9999996418071919, iteration: 241441
loss: 1.042302131652832,grad_norm: 0.7328700972969744, iteration: 241442
loss: 1.0750013589859009,grad_norm: 0.887759269789053, iteration: 241443
loss: 1.1595630645751953,grad_norm: 0.999999477248056, iteration: 241444
loss: 1.1446106433868408,grad_norm: 0.9999991597577861, iteration: 241445
loss: 1.0742084980010986,grad_norm: 0.809740360217225, iteration: 241446
loss: 1.1594504117965698,grad_norm: 0.9999991732879884, iteration: 241447
loss: 1.1025991439819336,grad_norm: 0.9786231495964324, iteration: 241448
loss: 1.0949267148971558,grad_norm: 0.9999990920777994, iteration: 241449
loss: 1.128068447113037,grad_norm: 0.7669887692591099, iteration: 241450
loss: 1.170272946357727,grad_norm: 0.9999993724343698, iteration: 241451
loss: 1.0771958827972412,grad_norm: 0.9999992321185288, iteration: 241452
loss: 1.0929362773895264,grad_norm: 0.9999990540888973, iteration: 241453
loss: 1.0089665651321411,grad_norm: 0.9999990193680337, iteration: 241454
loss: 1.1759698390960693,grad_norm: 0.9999992627658667, iteration: 241455
loss: 1.0075993537902832,grad_norm: 0.9999993655734115, iteration: 241456
loss: 1.171599268913269,grad_norm: 0.9999991204268799, iteration: 241457
loss: 1.1404489278793335,grad_norm: 0.9999992376960888, iteration: 241458
loss: 1.0703752040863037,grad_norm: 0.9999991674623468, iteration: 241459
loss: 1.0825321674346924,grad_norm: 0.9870126330115593, iteration: 241460
loss: 1.2299046516418457,grad_norm: 0.9999994529326816, iteration: 241461
loss: 1.2509597539901733,grad_norm: 0.9999993402879944, iteration: 241462
loss: 1.0122225284576416,grad_norm: 0.9999989803675389, iteration: 241463
loss: 1.1506319046020508,grad_norm: 0.9999990749958458, iteration: 241464
loss: 1.0891083478927612,grad_norm: 0.8326515678431761, iteration: 241465
loss: 1.1336026191711426,grad_norm: 0.9999990331734384, iteration: 241466
loss: 1.045008897781372,grad_norm: 0.999999177687473, iteration: 241467
loss: 1.167781949043274,grad_norm: 1.0000000714365886, iteration: 241468
loss: 1.3123350143432617,grad_norm: 0.9999999496080368, iteration: 241469
loss: 1.079297423362732,grad_norm: 0.9999992683197758, iteration: 241470
loss: 1.0741996765136719,grad_norm: 0.9999991923559719, iteration: 241471
loss: 1.0770961046218872,grad_norm: 0.7722228347095297, iteration: 241472
loss: 1.0632096529006958,grad_norm: 0.9999990486731949, iteration: 241473
loss: 1.1199589967727661,grad_norm: 0.9999995734765846, iteration: 241474
loss: 1.1590312719345093,grad_norm: 0.9999991304043847, iteration: 241475
loss: 1.2075295448303223,grad_norm: 0.9999991924372895, iteration: 241476
loss: 1.025168776512146,grad_norm: 0.9999992691302246, iteration: 241477
loss: 1.125350832939148,grad_norm: 0.9999990299227423, iteration: 241478
loss: 1.0913760662078857,grad_norm: 0.8633359416699893, iteration: 241479
loss: 1.2980360984802246,grad_norm: 0.9999991165318495, iteration: 241480
loss: 1.292065143585205,grad_norm: 0.9999996174833452, iteration: 241481
loss: 1.0671515464782715,grad_norm: 0.9999991681119641, iteration: 241482
loss: 1.1212798357009888,grad_norm: 0.9991278231234422, iteration: 241483
loss: 0.975957453250885,grad_norm: 0.9695213226762054, iteration: 241484
loss: 1.0078376531600952,grad_norm: 0.9964106112484161, iteration: 241485
loss: 1.0759600400924683,grad_norm: 0.999999613135879, iteration: 241486
loss: 1.1684539318084717,grad_norm: 0.9999992152450268, iteration: 241487
loss: 1.167365312576294,grad_norm: 0.9999994878118219, iteration: 241488
loss: 1.0465571880340576,grad_norm: 0.9999990563330782, iteration: 241489
loss: 1.0445301532745361,grad_norm: 0.9480568716845243, iteration: 241490
loss: 1.132496953010559,grad_norm: 0.9912153292757959, iteration: 241491
loss: 1.0291563272476196,grad_norm: 0.9663601532130737, iteration: 241492
loss: 1.100965142250061,grad_norm: 0.9308331373385476, iteration: 241493
loss: 1.0131001472473145,grad_norm: 0.955794145309735, iteration: 241494
loss: 1.0440049171447754,grad_norm: 0.9999991494242177, iteration: 241495
loss: 1.0201044082641602,grad_norm: 0.9999991633006847, iteration: 241496
loss: 1.1063454151153564,grad_norm: 0.9999994499936329, iteration: 241497
loss: 1.0724685192108154,grad_norm: 0.9999990207328538, iteration: 241498
loss: 1.052209734916687,grad_norm: 0.9999992527838372, iteration: 241499
loss: 1.091469407081604,grad_norm: 0.9999990833385412, iteration: 241500
loss: 1.0584251880645752,grad_norm: 0.9153061439911813, iteration: 241501
loss: 0.9976409673690796,grad_norm: 0.9999999097325873, iteration: 241502
loss: 1.1102783679962158,grad_norm: 0.9999991046265216, iteration: 241503
loss: 1.1314338445663452,grad_norm: 0.9999992438925397, iteration: 241504
loss: 0.9798843264579773,grad_norm: 0.9999991045242425, iteration: 241505
loss: 1.121774673461914,grad_norm: 0.9999991769764898, iteration: 241506
loss: 1.1334346532821655,grad_norm: 0.999999098714178, iteration: 241507
loss: 1.1640959978103638,grad_norm: 0.9999999510529582, iteration: 241508
loss: 1.0354543924331665,grad_norm: 0.9999996776386546, iteration: 241509
loss: 1.0733771324157715,grad_norm: 0.9999995393292648, iteration: 241510
loss: 1.1723082065582275,grad_norm: 0.9977008384318272, iteration: 241511
loss: 1.129036784172058,grad_norm: 0.9999999074977899, iteration: 241512
loss: 1.0850303173065186,grad_norm: 0.9999994594905004, iteration: 241513
loss: 1.0005519390106201,grad_norm: 0.8751020230853945, iteration: 241514
loss: 1.1482388973236084,grad_norm: 0.9999999749756574, iteration: 241515
loss: 1.191249966621399,grad_norm: 0.9999996724378183, iteration: 241516
loss: 1.0857913494110107,grad_norm: 0.8826434894029701, iteration: 241517
loss: 1.0050289630889893,grad_norm: 0.99999941498431, iteration: 241518
loss: 1.1667182445526123,grad_norm: 0.9999994522685753, iteration: 241519
loss: 1.007379412651062,grad_norm: 0.9999990514868301, iteration: 241520
loss: 1.11822509765625,grad_norm: 0.9999995280803401, iteration: 241521
loss: 1.0248371362686157,grad_norm: 0.9581328953312166, iteration: 241522
loss: 1.018771767616272,grad_norm: 0.9738749244889959, iteration: 241523
loss: 1.1175296306610107,grad_norm: 0.9999998455003679, iteration: 241524
loss: 1.2229435443878174,grad_norm: 0.9999998531234575, iteration: 241525
loss: 1.0241941213607788,grad_norm: 0.8404412382620777, iteration: 241526
loss: 0.9906963109970093,grad_norm: 0.9999997995036238, iteration: 241527
loss: 1.1087976694107056,grad_norm: 0.9999991878347891, iteration: 241528
loss: 1.025124192237854,grad_norm: 0.9578402262687347, iteration: 241529
loss: 1.053999662399292,grad_norm: 1.0000000422707327, iteration: 241530
loss: 1.0275905132293701,grad_norm: 0.8539488140764047, iteration: 241531
loss: 1.0531339645385742,grad_norm: 0.9999997537478033, iteration: 241532
loss: 1.2064030170440674,grad_norm: 0.9988830320235065, iteration: 241533
loss: 1.121358036994934,grad_norm: 0.999999243173374, iteration: 241534
loss: 1.1187382936477661,grad_norm: 0.9999994231343401, iteration: 241535
loss: 0.9972891211509705,grad_norm: 0.834240239370887, iteration: 241536
loss: 0.980593740940094,grad_norm: 0.9999991902088854, iteration: 241537
loss: 0.9880747199058533,grad_norm: 0.9225514289040825, iteration: 241538
loss: 1.064997911453247,grad_norm: 0.9999992473389598, iteration: 241539
loss: 1.0787497758865356,grad_norm: 0.8054285889420224, iteration: 241540
loss: 1.0600329637527466,grad_norm: 0.9999992039678125, iteration: 241541
loss: 0.9969168901443481,grad_norm: 0.8345118159064996, iteration: 241542
loss: 1.061592936515808,grad_norm: 0.9999990960516263, iteration: 241543
loss: 0.9914219975471497,grad_norm: 0.9999990774371734, iteration: 241544
loss: 0.9717052578926086,grad_norm: 0.9631461762955495, iteration: 241545
loss: 1.0159162282943726,grad_norm: 0.9999990305236661, iteration: 241546
loss: 1.0247836112976074,grad_norm: 0.947068320498707, iteration: 241547
loss: 1.0006169080734253,grad_norm: 0.8226364298586951, iteration: 241548
loss: 1.028857946395874,grad_norm: 0.9999990018276912, iteration: 241549
loss: 1.0510510206222534,grad_norm: 1.0000000205721682, iteration: 241550
loss: 1.1610056161880493,grad_norm: 0.9999998177173202, iteration: 241551
loss: 1.046770453453064,grad_norm: 0.9104841582744537, iteration: 241552
loss: 1.0172091722488403,grad_norm: 0.9999991627031484, iteration: 241553
loss: 1.0054243803024292,grad_norm: 0.8780141779367352, iteration: 241554
loss: 1.0491498708724976,grad_norm: 0.9827970829939396, iteration: 241555
loss: 1.0562692880630493,grad_norm: 0.9999998101926315, iteration: 241556
loss: 1.0268046855926514,grad_norm: 0.9999992185640275, iteration: 241557
loss: 1.0100111961364746,grad_norm: 0.9890935948190411, iteration: 241558
loss: 1.0679665803909302,grad_norm: 0.9999995273375734, iteration: 241559
loss: 0.9756158590316772,grad_norm: 0.9509663120731302, iteration: 241560
loss: 1.0232657194137573,grad_norm: 0.8985524075263752, iteration: 241561
loss: 1.0178271532058716,grad_norm: 0.9911553879948155, iteration: 241562
loss: 1.0328930616378784,grad_norm: 0.8998025200473465, iteration: 241563
loss: 0.9924613833427429,grad_norm: 0.9999989404983242, iteration: 241564
loss: 0.9921366572380066,grad_norm: 0.8672587565724665, iteration: 241565
loss: 1.020005702972412,grad_norm: 0.999999133039868, iteration: 241566
loss: 1.0402690172195435,grad_norm: 0.9999993984953172, iteration: 241567
loss: 1.0097790956497192,grad_norm: 0.9345723671681103, iteration: 241568
loss: 0.9789535403251648,grad_norm: 0.9653851808303129, iteration: 241569
loss: 0.9817510843276978,grad_norm: 0.8670382288496513, iteration: 241570
loss: 0.998617947101593,grad_norm: 0.9999991630765492, iteration: 241571
loss: 0.9895287156105042,grad_norm: 0.8111993983456971, iteration: 241572
loss: 1.0773605108261108,grad_norm: 0.8877930919933718, iteration: 241573
loss: 0.9830024838447571,grad_norm: 0.9098308159334366, iteration: 241574
loss: 1.0220601558685303,grad_norm: 0.9999997149491685, iteration: 241575
loss: 1.0146986246109009,grad_norm: 0.9999994387484447, iteration: 241576
loss: 0.9761196970939636,grad_norm: 0.8048437592198918, iteration: 241577
loss: 0.9554888606071472,grad_norm: 0.8416461486710283, iteration: 241578
loss: 0.9843288660049438,grad_norm: 0.9009279176947762, iteration: 241579
loss: 1.0046889781951904,grad_norm: 0.9999997434433148, iteration: 241580
loss: 0.9932131171226501,grad_norm: 0.9999989597720159, iteration: 241581
loss: 0.9950048327445984,grad_norm: 0.9999993144584459, iteration: 241582
loss: 1.0494691133499146,grad_norm: 0.9999991477665656, iteration: 241583
loss: 1.048130750656128,grad_norm: 0.9999999439082391, iteration: 241584
loss: 0.9878469109535217,grad_norm: 0.7490212160578832, iteration: 241585
loss: 1.0370336771011353,grad_norm: 0.8447167787465149, iteration: 241586
loss: 0.991215169429779,grad_norm: 0.9411974082335625, iteration: 241587
loss: 0.9680179357528687,grad_norm: 0.9999991319343784, iteration: 241588
loss: 1.0412105321884155,grad_norm: 0.8954856421019203, iteration: 241589
loss: 0.9395295977592468,grad_norm: 0.9659086767679818, iteration: 241590
loss: 1.0320314168930054,grad_norm: 0.8352336627635927, iteration: 241591
loss: 1.0468392372131348,grad_norm: 0.9999990377965039, iteration: 241592
loss: 1.0439084768295288,grad_norm: 0.999999586783831, iteration: 241593
loss: 0.9772809743881226,grad_norm: 0.8286325445591758, iteration: 241594
loss: 1.0546143054962158,grad_norm: 0.999999588712103, iteration: 241595
loss: 1.0077544450759888,grad_norm: 0.9610844099504875, iteration: 241596
loss: 1.0351148843765259,grad_norm: 0.9999991140047042, iteration: 241597
loss: 1.0531020164489746,grad_norm: 0.9999998457543586, iteration: 241598
loss: 0.9787460565567017,grad_norm: 0.9986381808275847, iteration: 241599
loss: 1.0477197170257568,grad_norm: 0.9999991079635538, iteration: 241600
loss: 1.0040947198867798,grad_norm: 0.9810024573065728, iteration: 241601
loss: 0.9981391429901123,grad_norm: 0.8997125875014134, iteration: 241602
loss: 1.001880407333374,grad_norm: 0.8425072169516055, iteration: 241603
loss: 0.9964920878410339,grad_norm: 0.8165367762860339, iteration: 241604
loss: 1.050634741783142,grad_norm: 0.9652345819083488, iteration: 241605
loss: 0.9713248014450073,grad_norm: 0.8655278214828417, iteration: 241606
loss: 1.0400397777557373,grad_norm: 0.999999253127285, iteration: 241607
loss: 1.0557781457901,grad_norm: 0.9999998806472834, iteration: 241608
loss: 1.0126473903656006,grad_norm: 0.885782170333244, iteration: 241609
loss: 1.0250846147537231,grad_norm: 0.8666433502142262, iteration: 241610
loss: 0.9971581101417542,grad_norm: 0.9143239717618606, iteration: 241611
loss: 0.9599617719650269,grad_norm: 0.8781134989579552, iteration: 241612
loss: 1.0341964960098267,grad_norm: 0.9999999781237379, iteration: 241613
loss: 1.0118746757507324,grad_norm: 0.9280544724228453, iteration: 241614
loss: 0.9625129103660583,grad_norm: 0.8911195970786472, iteration: 241615
loss: 1.0123560428619385,grad_norm: 0.9999990702015997, iteration: 241616
loss: 1.0183494091033936,grad_norm: 0.9280823787190061, iteration: 241617
loss: 1.054465651512146,grad_norm: 0.8927849678634499, iteration: 241618
loss: 0.9815696477890015,grad_norm: 0.982478192382743, iteration: 241619
loss: 1.0076191425323486,grad_norm: 0.8266863218850926, iteration: 241620
loss: 1.0197229385375977,grad_norm: 0.8712467537399368, iteration: 241621
loss: 1.0471181869506836,grad_norm: 0.9999992952964948, iteration: 241622
loss: 0.981819212436676,grad_norm: 0.8379552605501569, iteration: 241623
loss: 1.0092167854309082,grad_norm: 0.825741879635577, iteration: 241624
loss: 1.0051321983337402,grad_norm: 0.9997215087918319, iteration: 241625
loss: 1.0139135122299194,grad_norm: 0.9141656976409293, iteration: 241626
loss: 0.9854978919029236,grad_norm: 0.9203101498510541, iteration: 241627
loss: 0.9929815530776978,grad_norm: 0.8723882342780708, iteration: 241628
loss: 0.9937863349914551,grad_norm: 0.8908558800281323, iteration: 241629
loss: 1.0234397649765015,grad_norm: 0.9999998883414832, iteration: 241630
loss: 1.0251944065093994,grad_norm: 0.999999014956593, iteration: 241631
loss: 0.9892534613609314,grad_norm: 0.7727370069711839, iteration: 241632
loss: 0.9998363852500916,grad_norm: 0.8652947140104965, iteration: 241633
loss: 0.9808380603790283,grad_norm: 0.8995071682676435, iteration: 241634
loss: 1.0225958824157715,grad_norm: 0.9631480403163099, iteration: 241635
loss: 1.0569307804107666,grad_norm: 0.9999989360910354, iteration: 241636
loss: 1.0586260557174683,grad_norm: 0.9999996902886232, iteration: 241637
loss: 1.0365774631500244,grad_norm: 0.9014034683849594, iteration: 241638
loss: 1.0417262315750122,grad_norm: 0.7536647229046856, iteration: 241639
loss: 1.0505982637405396,grad_norm: 0.9999999002046313, iteration: 241640
loss: 1.064146637916565,grad_norm: 0.9359780294229311, iteration: 241641
loss: 1.0064338445663452,grad_norm: 0.9999991176105242, iteration: 241642
loss: 1.0158807039260864,grad_norm: 0.9314671080267264, iteration: 241643
loss: 1.104038119316101,grad_norm: 0.9999999323331296, iteration: 241644
loss: 1.0177499055862427,grad_norm: 0.9999991181370457, iteration: 241645
loss: 0.9849714040756226,grad_norm: 0.8804884732302423, iteration: 241646
loss: 0.9728053212165833,grad_norm: 0.867356806713954, iteration: 241647
loss: 1.0693070888519287,grad_norm: 1.000000001528609, iteration: 241648
loss: 1.0035890340805054,grad_norm: 0.9965039169144675, iteration: 241649
loss: 1.014127492904663,grad_norm: 0.8221579843622814, iteration: 241650
loss: 1.029203176498413,grad_norm: 0.9999996322286115, iteration: 241651
loss: 0.9999086260795593,grad_norm: 0.9999990743216628, iteration: 241652
loss: 1.04937744140625,grad_norm: 0.8890092724783819, iteration: 241653
loss: 0.9848529100418091,grad_norm: 0.8107197263437927, iteration: 241654
loss: 0.944254994392395,grad_norm: 0.7931543053133252, iteration: 241655
loss: 1.0127723217010498,grad_norm: 0.9512982451998929, iteration: 241656
loss: 0.9812514185905457,grad_norm: 0.8918793808772324, iteration: 241657
loss: 0.9907387495040894,grad_norm: 0.9766420018963076, iteration: 241658
loss: 1.032265067100525,grad_norm: 0.904168393205474, iteration: 241659
loss: 0.987531304359436,grad_norm: 0.8879732111926147, iteration: 241660
loss: 1.0081086158752441,grad_norm: 0.9654552109654778, iteration: 241661
loss: 1.0471183061599731,grad_norm: 0.8342891643610731, iteration: 241662
loss: 1.0283976793289185,grad_norm: 0.999999055288969, iteration: 241663
loss: 1.0407381057739258,grad_norm: 0.9571566879643352, iteration: 241664
loss: 1.0312386751174927,grad_norm: 0.9867332670608668, iteration: 241665
loss: 1.0284439325332642,grad_norm: 0.8098029592203064, iteration: 241666
loss: 0.9978421330451965,grad_norm: 0.9696688565967424, iteration: 241667
loss: 0.9431549310684204,grad_norm: 0.8227863181623312, iteration: 241668
loss: 0.9525313377380371,grad_norm: 0.8201192161336793, iteration: 241669
loss: 0.9758030772209167,grad_norm: 0.9098875115880652, iteration: 241670
loss: 0.9769713282585144,grad_norm: 0.8979009055426295, iteration: 241671
loss: 0.9952357411384583,grad_norm: 0.895342096000277, iteration: 241672
loss: 0.9690910577774048,grad_norm: 0.8864938179303884, iteration: 241673
loss: 1.0068162679672241,grad_norm: 0.724428173132073, iteration: 241674
loss: 1.0640085935592651,grad_norm: 0.9153994958112213, iteration: 241675
loss: 1.0170128345489502,grad_norm: 0.999999246089235, iteration: 241676
loss: 1.103920578956604,grad_norm: 0.9999996008888339, iteration: 241677
loss: 0.9554410576820374,grad_norm: 0.8546129666456013, iteration: 241678
loss: 0.9940106272697449,grad_norm: 0.9869985120750823, iteration: 241679
loss: 1.0484378337860107,grad_norm: 0.9363381684085599, iteration: 241680
loss: 0.9651111364364624,grad_norm: 0.8690526304965354, iteration: 241681
loss: 0.9919984340667725,grad_norm: 0.9999990860379037, iteration: 241682
loss: 1.0429649353027344,grad_norm: 0.999999916942949, iteration: 241683
loss: 1.089424967765808,grad_norm: 0.9579126712654661, iteration: 241684
loss: 0.9989845156669617,grad_norm: 0.99999904352882, iteration: 241685
loss: 1.0303034782409668,grad_norm: 0.9010280351843991, iteration: 241686
loss: 1.0116379261016846,grad_norm: 0.8483894347396544, iteration: 241687
loss: 1.0351711511611938,grad_norm: 0.9586992814835915, iteration: 241688
loss: 1.0338342189788818,grad_norm: 0.8338199180357894, iteration: 241689
loss: 0.9770913124084473,grad_norm: 0.9113918838385249, iteration: 241690
loss: 0.994932234287262,grad_norm: 0.9110517601837526, iteration: 241691
loss: 1.0225216150283813,grad_norm: 0.9999998233264874, iteration: 241692
loss: 1.007564663887024,grad_norm: 0.9999990922053911, iteration: 241693
loss: 1.0269557237625122,grad_norm: 0.8588387065844392, iteration: 241694
loss: 0.9899807572364807,grad_norm: 0.8649999179401068, iteration: 241695
loss: 1.02426016330719,grad_norm: 0.9880397607360557, iteration: 241696
loss: 1.0131735801696777,grad_norm: 0.9428795517373947, iteration: 241697
loss: 1.0817811489105225,grad_norm: 0.9999992859828108, iteration: 241698
loss: 0.9799941182136536,grad_norm: 0.813826873916389, iteration: 241699
loss: 0.9995288252830505,grad_norm: 0.9999999232129015, iteration: 241700
loss: 1.0022249221801758,grad_norm: 0.9717791298429137, iteration: 241701
loss: 0.9890909790992737,grad_norm: 0.8368545049620849, iteration: 241702
loss: 1.0138006210327148,grad_norm: 0.9999991814993769, iteration: 241703
loss: 1.030580997467041,grad_norm: 0.9999991824143426, iteration: 241704
loss: 0.9884440898895264,grad_norm: 0.8787077047965095, iteration: 241705
loss: 0.9685872197151184,grad_norm: 0.9238360957742678, iteration: 241706
loss: 1.122747540473938,grad_norm: 0.9999990290938707, iteration: 241707
loss: 1.0519983768463135,grad_norm: 0.9230935212444402, iteration: 241708
loss: 1.0094592571258545,grad_norm: 0.9816305518534771, iteration: 241709
loss: 0.9996587038040161,grad_norm: 0.8898215412672391, iteration: 241710
loss: 1.0222513675689697,grad_norm: 0.9714103138413681, iteration: 241711
loss: 1.0518988370895386,grad_norm: 0.9021900720427667, iteration: 241712
loss: 1.018894076347351,grad_norm: 0.9999990293628829, iteration: 241713
loss: 0.9713339805603027,grad_norm: 0.8589691733187221, iteration: 241714
loss: 0.9614992141723633,grad_norm: 0.8974981326763669, iteration: 241715
loss: 0.9879232048988342,grad_norm: 0.9216640445329849, iteration: 241716
loss: 0.9887866377830505,grad_norm: 0.9999990433439951, iteration: 241717
loss: 0.9999731779098511,grad_norm: 0.9275916524506477, iteration: 241718
loss: 0.987327516078949,grad_norm: 0.7772101647535581, iteration: 241719
loss: 1.0304851531982422,grad_norm: 0.7530913994039398, iteration: 241720
loss: 0.9911019206047058,grad_norm: 0.9047661343449621, iteration: 241721
loss: 0.9552214741706848,grad_norm: 0.8742011745157877, iteration: 241722
loss: 1.0196661949157715,grad_norm: 0.999999884078569, iteration: 241723
loss: 0.9870042204856873,grad_norm: 0.7981389881335978, iteration: 241724
loss: 0.9881481528282166,grad_norm: 0.8849729215924883, iteration: 241725
loss: 0.9889998435974121,grad_norm: 0.9808464117043071, iteration: 241726
loss: 1.0027308464050293,grad_norm: 0.9999991805840642, iteration: 241727
loss: 1.015300989151001,grad_norm: 0.8997599100141428, iteration: 241728
loss: 1.0077319145202637,grad_norm: 0.8683887851370273, iteration: 241729
loss: 1.0471900701522827,grad_norm: 0.8368257431210725, iteration: 241730
loss: 1.0192397832870483,grad_norm: 0.9452538198550884, iteration: 241731
loss: 1.0974222421646118,grad_norm: 0.9999991575926236, iteration: 241732
loss: 1.0034793615341187,grad_norm: 0.9999991974830519, iteration: 241733
loss: 1.0070782899856567,grad_norm: 0.8826988059641673, iteration: 241734
loss: 0.9873970150947571,grad_norm: 0.9999991468777994, iteration: 241735
loss: 1.0064446926116943,grad_norm: 0.850058237832718, iteration: 241736
loss: 1.0333325862884521,grad_norm: 0.9999991611633082, iteration: 241737
loss: 0.9955335855484009,grad_norm: 0.9197357823494103, iteration: 241738
loss: 1.026950478553772,grad_norm: 0.9999991189877815, iteration: 241739
loss: 1.0049704313278198,grad_norm: 0.9999991627254768, iteration: 241740
loss: 0.9615737795829773,grad_norm: 0.9999990851878018, iteration: 241741
loss: 0.9946037530899048,grad_norm: 0.7295298328364803, iteration: 241742
loss: 1.0302778482437134,grad_norm: 0.8193909153739046, iteration: 241743
loss: 1.008433222770691,grad_norm: 0.8268304323526944, iteration: 241744
loss: 0.9767951369285583,grad_norm: 0.9999996146862008, iteration: 241745
loss: 1.0167121887207031,grad_norm: 0.9999991330536384, iteration: 241746
loss: 1.0574709177017212,grad_norm: 0.999999893719131, iteration: 241747
loss: 1.0260512828826904,grad_norm: 0.9908586632272125, iteration: 241748
loss: 1.0386909246444702,grad_norm: 0.9999999344341989, iteration: 241749
loss: 0.9903373122215271,grad_norm: 0.9622429100218884, iteration: 241750
loss: 0.9953137636184692,grad_norm: 0.8203294350535258, iteration: 241751
loss: 0.9548412561416626,grad_norm: 0.9999992148313273, iteration: 241752
loss: 0.9706383347511292,grad_norm: 0.9999990389272633, iteration: 241753
loss: 1.0800840854644775,grad_norm: 0.9910247495761642, iteration: 241754
loss: 0.9632081985473633,grad_norm: 0.8503581753093632, iteration: 241755
loss: 1.024118423461914,grad_norm: 0.7737187171352489, iteration: 241756
loss: 1.0196865797042847,grad_norm: 0.9999989742780864, iteration: 241757
loss: 1.012366533279419,grad_norm: 0.9999992198370733, iteration: 241758
loss: 0.9957436919212341,grad_norm: 0.8859292055828412, iteration: 241759
loss: 1.0517641305923462,grad_norm: 0.9731445765539394, iteration: 241760
loss: 1.0114753246307373,grad_norm: 0.8642046735009795, iteration: 241761
loss: 1.010871171951294,grad_norm: 0.8874547059206236, iteration: 241762
loss: 1.00399911403656,grad_norm: 0.8960006484466312, iteration: 241763
loss: 0.988961935043335,grad_norm: 0.8671725531487596, iteration: 241764
loss: 0.9823440313339233,grad_norm: 0.9833036824774519, iteration: 241765
loss: 0.9947063326835632,grad_norm: 0.8197099554443221, iteration: 241766
loss: 1.0117305517196655,grad_norm: 0.9913422779430192, iteration: 241767
loss: 0.9807974696159363,grad_norm: 0.9999992952325206, iteration: 241768
loss: 1.024633765220642,grad_norm: 0.999999190834669, iteration: 241769
loss: 1.0008846521377563,grad_norm: 0.8396572482408438, iteration: 241770
loss: 1.006587028503418,grad_norm: 0.7677323707856931, iteration: 241771
loss: 1.0256309509277344,grad_norm: 0.9053145631965646, iteration: 241772
loss: 1.0145343542099,grad_norm: 0.8281063269914629, iteration: 241773
loss: 1.0127806663513184,grad_norm: 0.9622847306702633, iteration: 241774
loss: 1.0113657712936401,grad_norm: 0.8491102373204555, iteration: 241775
loss: 1.033017873764038,grad_norm: 0.9999994185651262, iteration: 241776
loss: 0.9999346733093262,grad_norm: 0.8698952909434594, iteration: 241777
loss: 1.0039767026901245,grad_norm: 0.9412801992175323, iteration: 241778
loss: 1.032081961631775,grad_norm: 0.9999998685828382, iteration: 241779
loss: 0.9942218661308289,grad_norm: 0.8994788227283603, iteration: 241780
loss: 1.0384999513626099,grad_norm: 0.9943280365406321, iteration: 241781
loss: 0.9800693392753601,grad_norm: 0.9380800748223678, iteration: 241782
loss: 1.0119577646255493,grad_norm: 0.9999992347045035, iteration: 241783
loss: 1.0631548166275024,grad_norm: 0.9861255395335253, iteration: 241784
loss: 0.9937608242034912,grad_norm: 0.8132035744541268, iteration: 241785
loss: 0.9887906908988953,grad_norm: 0.9503944437479028, iteration: 241786
loss: 1.0115734338760376,grad_norm: 0.999999049700682, iteration: 241787
loss: 1.0523881912231445,grad_norm: 0.9516909024246729, iteration: 241788
loss: 0.9859253168106079,grad_norm: 0.9237800859468617, iteration: 241789
loss: 1.059956431388855,grad_norm: 0.99999943233974, iteration: 241790
loss: 0.977327823638916,grad_norm: 0.9500779323240288, iteration: 241791
loss: 1.1086335182189941,grad_norm: 0.9999991820275557, iteration: 241792
loss: 0.9918648600578308,grad_norm: 0.8998981153490782, iteration: 241793
loss: 0.9875688552856445,grad_norm: 0.9748844183020445, iteration: 241794
loss: 1.014223575592041,grad_norm: 0.707774229814471, iteration: 241795
loss: 1.0154963731765747,grad_norm: 0.9355025501000689, iteration: 241796
loss: 0.9933978915214539,grad_norm: 0.9959585406215876, iteration: 241797
loss: 1.0098891258239746,grad_norm: 0.8446426029678723, iteration: 241798
loss: 1.0045080184936523,grad_norm: 0.9677302740648729, iteration: 241799
loss: 1.0358490943908691,grad_norm: 0.999999766188404, iteration: 241800
loss: 0.9906618595123291,grad_norm: 0.7887654554548957, iteration: 241801
loss: 0.991270124912262,grad_norm: 0.852169942306444, iteration: 241802
loss: 1.0502278804779053,grad_norm: 0.7722105327537105, iteration: 241803
loss: 1.0000580549240112,grad_norm: 0.7980352744302545, iteration: 241804
loss: 0.9977949261665344,grad_norm: 0.8426495721576767, iteration: 241805
loss: 1.0035505294799805,grad_norm: 0.9787127213687988, iteration: 241806
loss: 1.0093562602996826,grad_norm: 0.9681575948999408, iteration: 241807
loss: 1.0138040781021118,grad_norm: 0.9999996794053819, iteration: 241808
loss: 1.0593611001968384,grad_norm: 0.9999992186123446, iteration: 241809
loss: 0.9919905066490173,grad_norm: 0.8725979992614216, iteration: 241810
loss: 0.9887698888778687,grad_norm: 0.9898403277204593, iteration: 241811
loss: 0.9779719114303589,grad_norm: 0.9626802816932648, iteration: 241812
loss: 1.1147481203079224,grad_norm: 0.925583233401979, iteration: 241813
loss: 1.0044760704040527,grad_norm: 0.8095738236536923, iteration: 241814
loss: 1.0142148733139038,grad_norm: 0.8215135558803763, iteration: 241815
loss: 1.0018144845962524,grad_norm: 0.9999993813368944, iteration: 241816
loss: 0.9616748690605164,grad_norm: 0.947012302202134, iteration: 241817
loss: 0.9929800033569336,grad_norm: 0.9393386150050639, iteration: 241818
loss: 0.9978399872779846,grad_norm: 0.8655252527787178, iteration: 241819
loss: 1.021321177482605,grad_norm: 0.8772824388115531, iteration: 241820
loss: 0.99091637134552,grad_norm: 0.9722773868508983, iteration: 241821
loss: 1.0211764574050903,grad_norm: 0.9999990496589334, iteration: 241822
loss: 1.0396442413330078,grad_norm: 0.7426094285737906, iteration: 241823
loss: 1.0296260118484497,grad_norm: 0.8735878872871986, iteration: 241824
loss: 0.9915295839309692,grad_norm: 0.9354766280027197, iteration: 241825
loss: 1.039807677268982,grad_norm: 0.8601135102150349, iteration: 241826
loss: 0.9804608225822449,grad_norm: 0.9999992903781019, iteration: 241827
loss: 0.9990078806877136,grad_norm: 0.9999991587461242, iteration: 241828
loss: 0.9979683756828308,grad_norm: 0.848608573378562, iteration: 241829
loss: 0.9920892715454102,grad_norm: 0.9835282582087845, iteration: 241830
loss: 1.0037935972213745,grad_norm: 0.9042701400973822, iteration: 241831
loss: 1.0058784484863281,grad_norm: 0.8642966023103693, iteration: 241832
loss: 0.9977028369903564,grad_norm: 0.9666649451894838, iteration: 241833
loss: 0.9914848804473877,grad_norm: 0.9999990711971178, iteration: 241834
loss: 1.2032792568206787,grad_norm: 0.9999996788214429, iteration: 241835
loss: 1.0075534582138062,grad_norm: 0.9999992285638211, iteration: 241836
loss: 1.0218669176101685,grad_norm: 0.9999990769118672, iteration: 241837
loss: 0.9928516149520874,grad_norm: 0.8051307962160755, iteration: 241838
loss: 1.1552698612213135,grad_norm: 0.9999996855203029, iteration: 241839
loss: 1.062588095664978,grad_norm: 0.999999212222635, iteration: 241840
loss: 0.9905576705932617,grad_norm: 0.9327715654978541, iteration: 241841
loss: 0.9992433190345764,grad_norm: 0.9337600736602147, iteration: 241842
loss: 1.0058780908584595,grad_norm: 0.9880381335996389, iteration: 241843
loss: 1.006373643875122,grad_norm: 0.8725219229874576, iteration: 241844
loss: 1.0092118978500366,grad_norm: 0.9702862466499365, iteration: 241845
loss: 0.9728037118911743,grad_norm: 0.9999995539144471, iteration: 241846
loss: 1.0404486656188965,grad_norm: 0.8373911911355001, iteration: 241847
loss: 1.012783408164978,grad_norm: 0.8328878216778851, iteration: 241848
loss: 1.0092198848724365,grad_norm: 0.999999115447417, iteration: 241849
loss: 1.0012441873550415,grad_norm: 0.8755747668361248, iteration: 241850
loss: 0.9781588912010193,grad_norm: 0.8512411381801274, iteration: 241851
loss: 1.0969741344451904,grad_norm: 0.8557882073571383, iteration: 241852
loss: 1.0553661584854126,grad_norm: 0.8472433904799578, iteration: 241853
loss: 0.9931331872940063,grad_norm: 0.7824645644887304, iteration: 241854
loss: 1.1283633708953857,grad_norm: 0.9318342048332765, iteration: 241855
loss: 0.998399555683136,grad_norm: 0.9062058156404784, iteration: 241856
loss: 0.9973165988922119,grad_norm: 0.8652364539353391, iteration: 241857
loss: 0.997894287109375,grad_norm: 0.9999990833099975, iteration: 241858
loss: 1.026835560798645,grad_norm: 0.8786898812944902, iteration: 241859
loss: 0.9955474734306335,grad_norm: 0.9999991064868176, iteration: 241860
loss: 1.015903353691101,grad_norm: 0.9115996595128343, iteration: 241861
loss: 1.1352134943008423,grad_norm: 0.9999991325402339, iteration: 241862
loss: 1.0065553188323975,grad_norm: 0.9999991138281243, iteration: 241863
loss: 0.9789355397224426,grad_norm: 0.9087818779180826, iteration: 241864
loss: 0.9838882088661194,grad_norm: 0.9087516524049318, iteration: 241865
loss: 0.991145133972168,grad_norm: 0.9573006592411771, iteration: 241866
loss: 1.0358864068984985,grad_norm: 0.9999992632357236, iteration: 241867
loss: 1.008604884147644,grad_norm: 0.9999988506165175, iteration: 241868
loss: 0.9772931933403015,grad_norm: 0.873825653256292, iteration: 241869
loss: 1.002729058265686,grad_norm: 0.9949855215376268, iteration: 241870
loss: 1.002482295036316,grad_norm: 0.9999990999184748, iteration: 241871
loss: 0.9925119280815125,grad_norm: 0.8954050479306714, iteration: 241872
loss: 0.9849392175674438,grad_norm: 0.9999989383713922, iteration: 241873
loss: 1.0389291048049927,grad_norm: 0.9999993821926221, iteration: 241874
loss: 0.9984470009803772,grad_norm: 0.9198525943029869, iteration: 241875
loss: 0.9787773489952087,grad_norm: 0.9248746035900585, iteration: 241876
loss: 1.004494309425354,grad_norm: 0.8392234637709648, iteration: 241877
loss: 1.0724610090255737,grad_norm: 0.8880536336955778, iteration: 241878
loss: 0.9922301769256592,grad_norm: 0.9315048729665694, iteration: 241879
loss: 0.9773339629173279,grad_norm: 0.9313982551336146, iteration: 241880
loss: 0.9904620051383972,grad_norm: 0.8814008074395758, iteration: 241881
loss: 1.0080535411834717,grad_norm: 0.8408666772612821, iteration: 241882
loss: 0.973210334777832,grad_norm: 0.9836604401463774, iteration: 241883
loss: 1.0317991971969604,grad_norm: 0.9999990475782577, iteration: 241884
loss: 1.074384331703186,grad_norm: 0.9999990906896418, iteration: 241885
loss: 1.0271917581558228,grad_norm: 0.9999990504682795, iteration: 241886
loss: 1.002842903137207,grad_norm: 0.9095449002327709, iteration: 241887
loss: 0.9914233088493347,grad_norm: 0.8602766838596134, iteration: 241888
loss: 0.9946721792221069,grad_norm: 0.8803195203768522, iteration: 241889
loss: 0.9774544835090637,grad_norm: 0.8739203626924811, iteration: 241890
loss: 1.0099602937698364,grad_norm: 0.8811567592913278, iteration: 241891
loss: 1.0251202583312988,grad_norm: 0.9797547924472707, iteration: 241892
loss: 0.9981581568717957,grad_norm: 0.9999990836088662, iteration: 241893
loss: 1.1178067922592163,grad_norm: 0.9144692381363004, iteration: 241894
loss: 0.9852040410041809,grad_norm: 0.8842783130308008, iteration: 241895
loss: 0.9965338706970215,grad_norm: 0.8826546449036283, iteration: 241896
loss: 1.007423758506775,grad_norm: 0.8996031430710525, iteration: 241897
loss: 0.9802046418190002,grad_norm: 0.99999915087169, iteration: 241898
loss: 0.9849326610565186,grad_norm: 0.9999990690386249, iteration: 241899
loss: 1.0667657852172852,grad_norm: 0.9999995928419475, iteration: 241900
loss: 1.0111523866653442,grad_norm: 0.9108515187493265, iteration: 241901
loss: 0.9851927757263184,grad_norm: 0.9999992040850453, iteration: 241902
loss: 1.0538922548294067,grad_norm: 0.9999991579765679, iteration: 241903
loss: 1.0466636419296265,grad_norm: 0.9999993426776673, iteration: 241904
loss: 1.0421921014785767,grad_norm: 0.8453353052986162, iteration: 241905
loss: 1.0147054195404053,grad_norm: 0.7905571125569885, iteration: 241906
loss: 1.022792100906372,grad_norm: 0.9999991521361493, iteration: 241907
loss: 1.1218451261520386,grad_norm: 0.9999993157366424, iteration: 241908
loss: 1.0812915563583374,grad_norm: 0.9999997970363874, iteration: 241909
loss: 1.0104007720947266,grad_norm: 0.9784797132558574, iteration: 241910
loss: 0.9653794169425964,grad_norm: 0.9999991366155291, iteration: 241911
loss: 1.014563798904419,grad_norm: 0.9999991258822218, iteration: 241912
loss: 1.0400187969207764,grad_norm: 0.9999991283139972, iteration: 241913
loss: 1.0042787790298462,grad_norm: 0.8086174887016975, iteration: 241914
loss: 1.0029594898223877,grad_norm: 0.9011421273812033, iteration: 241915
loss: 0.9892011880874634,grad_norm: 0.841139851654442, iteration: 241916
loss: 1.0462404489517212,grad_norm: 0.999999842847408, iteration: 241917
loss: 1.0940849781036377,grad_norm: 0.9999992798027456, iteration: 241918
loss: 1.0183571577072144,grad_norm: 0.9999994006609207, iteration: 241919
loss: 1.0402361154556274,grad_norm: 0.8376579916300545, iteration: 241920
loss: 1.0970325469970703,grad_norm: 0.9999993013358652, iteration: 241921
loss: 0.9939262270927429,grad_norm: 0.8689527880655074, iteration: 241922
loss: 1.0095443725585938,grad_norm: 0.9934432226693245, iteration: 241923
loss: 1.0235199928283691,grad_norm: 0.884034680004517, iteration: 241924
loss: 1.0163702964782715,grad_norm: 0.9999998419093565, iteration: 241925
loss: 1.0334604978561401,grad_norm: 0.9999995222274419, iteration: 241926
loss: 0.99073725938797,grad_norm: 0.9999990457700932, iteration: 241927
loss: 1.0016170740127563,grad_norm: 0.8257675269868073, iteration: 241928
loss: 1.0074940919876099,grad_norm: 0.9664931518783069, iteration: 241929
loss: 1.016461730003357,grad_norm: 0.8607990268814162, iteration: 241930
loss: 1.023256778717041,grad_norm: 0.9940954347764657, iteration: 241931
loss: 1.000905990600586,grad_norm: 0.8156972780469836, iteration: 241932
loss: 1.0324292182922363,grad_norm: 0.9999995837913005, iteration: 241933
loss: 1.0204365253448486,grad_norm: 0.8913081375446844, iteration: 241934
loss: 0.9534015655517578,grad_norm: 0.8574236023960786, iteration: 241935
loss: 0.9716967940330505,grad_norm: 0.8859876876697094, iteration: 241936
loss: 0.981451690196991,grad_norm: 0.8671070362942721, iteration: 241937
loss: 1.039155125617981,grad_norm: 0.9999993227179649, iteration: 241938
loss: 0.9980798363685608,grad_norm: 0.9999990389388461, iteration: 241939
loss: 0.9784708619117737,grad_norm: 0.9999994355690763, iteration: 241940
loss: 0.9587581753730774,grad_norm: 0.7938211055838181, iteration: 241941
loss: 1.061299204826355,grad_norm: 0.9999992624591059, iteration: 241942
loss: 1.0324604511260986,grad_norm: 0.9888301738575501, iteration: 241943
loss: 1.017932653427124,grad_norm: 0.9250469362174343, iteration: 241944
loss: 1.0136158466339111,grad_norm: 0.8904442088606073, iteration: 241945
loss: 1.011033535003662,grad_norm: 0.8266073344693435, iteration: 241946
loss: 0.9997284412384033,grad_norm: 0.8796606330490938, iteration: 241947
loss: 1.0046610832214355,grad_norm: 0.9999991412720193, iteration: 241948
loss: 0.9847091436386108,grad_norm: 0.9999991411808351, iteration: 241949
loss: 0.9765384793281555,grad_norm: 0.9999992134751595, iteration: 241950
loss: 0.9790527820587158,grad_norm: 0.999999156663035, iteration: 241951
loss: 0.9918410778045654,grad_norm: 0.9042580674346059, iteration: 241952
loss: 1.0312026739120483,grad_norm: 0.9999996290139689, iteration: 241953
loss: 0.9386894106864929,grad_norm: 0.8257194576338677, iteration: 241954
loss: 1.0549935102462769,grad_norm: 0.9999995784456334, iteration: 241955
loss: 0.993191123008728,grad_norm: 0.9999991542669384, iteration: 241956
loss: 1.0133053064346313,grad_norm: 0.9133299827372806, iteration: 241957
loss: 1.0025882720947266,grad_norm: 0.9999990325950359, iteration: 241958
loss: 1.0202984809875488,grad_norm: 0.9999997159284156, iteration: 241959
loss: 1.0147775411605835,grad_norm: 0.9999989840259007, iteration: 241960
loss: 0.9862682223320007,grad_norm: 0.9555924653061078, iteration: 241961
loss: 0.9807582497596741,grad_norm: 0.999998974945131, iteration: 241962
loss: 0.993064820766449,grad_norm: 0.830749156059644, iteration: 241963
loss: 0.9993866682052612,grad_norm: 0.9999991952799407, iteration: 241964
loss: 0.9838827848434448,grad_norm: 0.9999992843525232, iteration: 241965
loss: 0.9931958317756653,grad_norm: 0.9789950616088777, iteration: 241966
loss: 0.9806453585624695,grad_norm: 0.9784390516534559, iteration: 241967
loss: 1.0611484050750732,grad_norm: 0.9999991778839753, iteration: 241968
loss: 1.0637826919555664,grad_norm: 0.9716775183217234, iteration: 241969
loss: 0.9624196887016296,grad_norm: 0.9100788666862065, iteration: 241970
loss: 1.0134990215301514,grad_norm: 0.8115333204884766, iteration: 241971
loss: 1.0027570724487305,grad_norm: 0.89584613918298, iteration: 241972
loss: 0.9825544953346252,grad_norm: 0.7398743368257672, iteration: 241973
loss: 1.1166143417358398,grad_norm: 0.9999996682891195, iteration: 241974
loss: 0.9893211126327515,grad_norm: 0.8461052223077294, iteration: 241975
loss: 0.9989855289459229,grad_norm: 0.9999993766074791, iteration: 241976
loss: 0.9751341342926025,grad_norm: 0.9633379573802435, iteration: 241977
loss: 0.9863293766975403,grad_norm: 0.9555354335582543, iteration: 241978
loss: 0.997832715511322,grad_norm: 0.93973594001357, iteration: 241979
loss: 0.9921535849571228,grad_norm: 0.9999991233236706, iteration: 241980
loss: 0.9835240244865417,grad_norm: 0.864803458948486, iteration: 241981
loss: 0.9989688396453857,grad_norm: 0.9999993143585179, iteration: 241982
loss: 1.1350154876708984,grad_norm: 0.9999994751764318, iteration: 241983
loss: 0.9721762537956238,grad_norm: 0.9331863053863737, iteration: 241984
loss: 1.0150017738342285,grad_norm: 0.9999989736362628, iteration: 241985
loss: 1.0035858154296875,grad_norm: 0.85323260343531, iteration: 241986
loss: 0.992218554019928,grad_norm: 0.8924265707622913, iteration: 241987
loss: 1.0199698209762573,grad_norm: 0.7973130689838814, iteration: 241988
loss: 0.9894346594810486,grad_norm: 0.9288275213574566, iteration: 241989
loss: 1.001761794090271,grad_norm: 0.9728599933711688, iteration: 241990
loss: 0.97768634557724,grad_norm: 0.9385168233475032, iteration: 241991
loss: 1.0064340829849243,grad_norm: 0.9725082451273932, iteration: 241992
loss: 1.0157380104064941,grad_norm: 0.9999991730371244, iteration: 241993
loss: 0.9975138902664185,grad_norm: 0.9056538305056903, iteration: 241994
loss: 1.0096360445022583,grad_norm: 0.9628984607773448, iteration: 241995
loss: 1.0628944635391235,grad_norm: 0.9999991633181728, iteration: 241996
loss: 1.0033513307571411,grad_norm: 0.8179878080910661, iteration: 241997
loss: 1.022033929824829,grad_norm: 0.9338644830015095, iteration: 241998
loss: 1.0065885782241821,grad_norm: 0.9452376241634147, iteration: 241999
loss: 0.9884384274482727,grad_norm: 0.7382145008569945, iteration: 242000
loss: 1.008857011795044,grad_norm: 0.8652311462585309, iteration: 242001
loss: 0.9637745022773743,grad_norm: 0.8175895488087198, iteration: 242002
loss: 1.004282832145691,grad_norm: 0.9999992321669221, iteration: 242003
loss: 0.9776951670646667,grad_norm: 0.8173121511583724, iteration: 242004
loss: 0.9862880706787109,grad_norm: 0.8536954712698214, iteration: 242005
loss: 0.9566129446029663,grad_norm: 0.89752007956318, iteration: 242006
loss: 1.1105163097381592,grad_norm: 0.9999991950286944, iteration: 242007
loss: 1.0156240463256836,grad_norm: 0.8302983228828058, iteration: 242008
loss: 0.9924333095550537,grad_norm: 0.7288596046970302, iteration: 242009
loss: 1.035888910293579,grad_norm: 0.9999993572042105, iteration: 242010
loss: 0.9890663027763367,grad_norm: 0.8072737123294006, iteration: 242011
loss: 1.0257635116577148,grad_norm: 0.9999990698908813, iteration: 242012
loss: 1.0210707187652588,grad_norm: 0.7925149329158458, iteration: 242013
loss: 0.9996345043182373,grad_norm: 0.8399196018237407, iteration: 242014
loss: 1.0461087226867676,grad_norm: 0.8613471396281719, iteration: 242015
loss: 1.0609183311462402,grad_norm: 0.9999991663515665, iteration: 242016
loss: 0.9853576421737671,grad_norm: 0.7751629711134065, iteration: 242017
loss: 1.0332528352737427,grad_norm: 0.8719883315506963, iteration: 242018
loss: 1.0322767496109009,grad_norm: 0.9999992152459183, iteration: 242019
loss: 0.9756969809532166,grad_norm: 0.9979065215786751, iteration: 242020
loss: 0.989084780216217,grad_norm: 0.9222285576173802, iteration: 242021
loss: 1.0215901136398315,grad_norm: 0.9999990619106445, iteration: 242022
loss: 1.063626766204834,grad_norm: 0.9999994541375535, iteration: 242023
loss: 0.9631215929985046,grad_norm: 0.8270006036798105, iteration: 242024
loss: 1.003432035446167,grad_norm: 0.9818957318619448, iteration: 242025
loss: 0.9919621348381042,grad_norm: 0.716245066458122, iteration: 242026
loss: 1.0249037742614746,grad_norm: 0.8564727645900435, iteration: 242027
loss: 1.0343915224075317,grad_norm: 0.9999991355530017, iteration: 242028
loss: 1.0702871084213257,grad_norm: 0.7630552248202797, iteration: 242029
loss: 1.0785400867462158,grad_norm: 0.9888198849422181, iteration: 242030
loss: 0.9501820206642151,grad_norm: 0.7653384907654351, iteration: 242031
loss: 1.009623646736145,grad_norm: 0.8635394414395179, iteration: 242032
loss: 0.9662860035896301,grad_norm: 0.9873188060536231, iteration: 242033
loss: 1.0104807615280151,grad_norm: 0.8239979153451323, iteration: 242034
loss: 0.9916090965270996,grad_norm: 0.9466351075982478, iteration: 242035
loss: 0.9827309250831604,grad_norm: 0.9118392520553898, iteration: 242036
loss: 0.9925189018249512,grad_norm: 0.9272034512685808, iteration: 242037
loss: 0.9856233596801758,grad_norm: 0.9999991195901652, iteration: 242038
loss: 1.1278151273727417,grad_norm: 0.9999991151329775, iteration: 242039
loss: 0.9673358201980591,grad_norm: 0.8616078461476095, iteration: 242040
loss: 1.0002028942108154,grad_norm: 0.7316551109739141, iteration: 242041
loss: 1.0429105758666992,grad_norm: 0.9999996859101784, iteration: 242042
loss: 0.9924293756484985,grad_norm: 0.8536159376755873, iteration: 242043
loss: 0.9924597144126892,grad_norm: 0.8382708850038905, iteration: 242044
loss: 0.9771093726158142,grad_norm: 0.9434923558304329, iteration: 242045
loss: 0.9866365790367126,grad_norm: 0.8434833460482307, iteration: 242046
loss: 1.0109899044036865,grad_norm: 0.9999993383172774, iteration: 242047
loss: 1.00475013256073,grad_norm: 0.7457470229810358, iteration: 242048
loss: 0.9847897291183472,grad_norm: 0.8985781149756484, iteration: 242049
loss: 1.1053417921066284,grad_norm: 0.9999990687449845, iteration: 242050
loss: 1.0029516220092773,grad_norm: 0.9878885447297946, iteration: 242051
loss: 0.9471560120582581,grad_norm: 0.9048706377767641, iteration: 242052
loss: 1.085971474647522,grad_norm: 0.9999992173963016, iteration: 242053
loss: 0.9930990934371948,grad_norm: 0.8343914465435188, iteration: 242054
loss: 1.0135751962661743,grad_norm: 0.9038631217359109, iteration: 242055
loss: 0.9888082146644592,grad_norm: 0.9276565256093802, iteration: 242056
loss: 0.9787946343421936,grad_norm: 0.9550070762539522, iteration: 242057
loss: 0.990409791469574,grad_norm: 0.9999990070941038, iteration: 242058
loss: 0.9854883551597595,grad_norm: 0.9207722509280781, iteration: 242059
loss: 1.0105557441711426,grad_norm: 0.9999991649928083, iteration: 242060
loss: 1.0218719244003296,grad_norm: 0.9479771783445445, iteration: 242061
loss: 1.0022938251495361,grad_norm: 0.8871158092369241, iteration: 242062
loss: 0.9670509696006775,grad_norm: 0.9722606084418346, iteration: 242063
loss: 0.9896079301834106,grad_norm: 0.9890723680759574, iteration: 242064
loss: 0.9787578582763672,grad_norm: 0.8012899108246141, iteration: 242065
loss: 1.029234528541565,grad_norm: 0.9844564832205455, iteration: 242066
loss: 0.965529203414917,grad_norm: 0.9616452441467163, iteration: 242067
loss: 1.01371169090271,grad_norm: 0.9110076015960471, iteration: 242068
loss: 1.028120994567871,grad_norm: 0.9999990690054507, iteration: 242069
loss: 1.011478304862976,grad_norm: 0.9999991665440586, iteration: 242070
loss: 1.006917953491211,grad_norm: 0.8885727215718948, iteration: 242071
loss: 1.0249844789505005,grad_norm: 0.9409457949372555, iteration: 242072
loss: 0.969114363193512,grad_norm: 0.9975112811236505, iteration: 242073
loss: 1.0308377742767334,grad_norm: 0.8892689814611678, iteration: 242074
loss: 0.9816863536834717,grad_norm: 0.7888354338661218, iteration: 242075
loss: 1.015549898147583,grad_norm: 0.9999991595080471, iteration: 242076
loss: 0.9687025547027588,grad_norm: 0.955703521234422, iteration: 242077
loss: 1.031589150428772,grad_norm: 0.9999992556147842, iteration: 242078
loss: 1.0107924938201904,grad_norm: 0.8552327806823501, iteration: 242079
loss: 0.9987068772315979,grad_norm: 0.9202981575848682, iteration: 242080
loss: 0.9999660849571228,grad_norm: 0.818524665305653, iteration: 242081
loss: 0.9812487363815308,grad_norm: 0.8561831254917911, iteration: 242082
loss: 0.949874222278595,grad_norm: 0.9687310418719401, iteration: 242083
loss: 0.977741003036499,grad_norm: 0.8413523597888912, iteration: 242084
loss: 1.0108447074890137,grad_norm: 0.9999990588769739, iteration: 242085
loss: 1.0241954326629639,grad_norm: 0.9999995347962254, iteration: 242086
loss: 1.027092695236206,grad_norm: 0.9999993024263018, iteration: 242087
loss: 1.0020005702972412,grad_norm: 0.9999990568918793, iteration: 242088
loss: 0.9742337465286255,grad_norm: 0.9041096925257428, iteration: 242089
loss: 1.0062575340270996,grad_norm: 0.8268338835213596, iteration: 242090
loss: 0.9996110796928406,grad_norm: 0.8396095106600149, iteration: 242091
loss: 1.011159896850586,grad_norm: 0.9844459293159359, iteration: 242092
loss: 1.0178282260894775,grad_norm: 0.9999990750512469, iteration: 242093
loss: 0.9965235590934753,grad_norm: 0.8635691311456644, iteration: 242094
loss: 0.9948969483375549,grad_norm: 0.9999990941825071, iteration: 242095
loss: 1.0751018524169922,grad_norm: 0.9999998067625359, iteration: 242096
loss: 0.967893123626709,grad_norm: 0.8266152622600819, iteration: 242097
loss: 1.0205464363098145,grad_norm: 0.8678513660737558, iteration: 242098
loss: 1.0184991359710693,grad_norm: 0.9999992381501339, iteration: 242099
loss: 0.9725798964500427,grad_norm: 0.8333886952492848, iteration: 242100
loss: 0.9979751706123352,grad_norm: 0.9389221609878713, iteration: 242101
loss: 0.9909810423851013,grad_norm: 0.9923259319454382, iteration: 242102
loss: 0.9928656220436096,grad_norm: 0.8478763317882164, iteration: 242103
loss: 0.9466297626495361,grad_norm: 0.9543336981661702, iteration: 242104
loss: 0.9921639561653137,grad_norm: 0.8087980675807525, iteration: 242105
loss: 1.0146249532699585,grad_norm: 0.8359850569395955, iteration: 242106
loss: 0.9982234239578247,grad_norm: 0.9844271373398112, iteration: 242107
loss: 0.9811198711395264,grad_norm: 0.8178656548111664, iteration: 242108
loss: 1.0399655103683472,grad_norm: 0.9999991341321814, iteration: 242109
loss: 1.0536572933197021,grad_norm: 0.9379222629798643, iteration: 242110
loss: 1.0157135725021362,grad_norm: 0.8439276000564139, iteration: 242111
loss: 1.0057467222213745,grad_norm: 0.8743208116834302, iteration: 242112
loss: 1.0011500120162964,grad_norm: 0.7156029613559352, iteration: 242113
loss: 0.9917482733726501,grad_norm: 0.9999996019323657, iteration: 242114
loss: 1.00021493434906,grad_norm: 0.919007670430119, iteration: 242115
loss: 1.0119094848632812,grad_norm: 0.9999991743161569, iteration: 242116
loss: 1.0222631692886353,grad_norm: 0.9999994419510818, iteration: 242117
loss: 1.005797266960144,grad_norm: 0.8881057227319861, iteration: 242118
loss: 1.0091413259506226,grad_norm: 0.9646211931638504, iteration: 242119
loss: 0.9988782405853271,grad_norm: 0.8829961392996342, iteration: 242120
loss: 1.001111388206482,grad_norm: 0.7718671994537599, iteration: 242121
loss: 1.0034375190734863,grad_norm: 0.927990010890552, iteration: 242122
loss: 1.0288833379745483,grad_norm: 0.9345194269851026, iteration: 242123
loss: 0.9844852089881897,grad_norm: 0.9243517151843018, iteration: 242124
loss: 0.9794490933418274,grad_norm: 0.8729941537837319, iteration: 242125
loss: 0.9973149299621582,grad_norm: 0.84716651839463, iteration: 242126
loss: 1.0615946054458618,grad_norm: 0.899419828383754, iteration: 242127
loss: 1.0338386297225952,grad_norm: 0.9081750394012622, iteration: 242128
loss: 0.9854342937469482,grad_norm: 0.9647237286288649, iteration: 242129
loss: 0.9720891118049622,grad_norm: 0.9999991022615621, iteration: 242130
loss: 1.0397199392318726,grad_norm: 0.9208969578480984, iteration: 242131
loss: 0.993462085723877,grad_norm: 0.9999992330925344, iteration: 242132
loss: 0.978554368019104,grad_norm: 0.8368880832396877, iteration: 242133
loss: 1.0182448625564575,grad_norm: 0.9999993849241023, iteration: 242134
loss: 0.9895215630531311,grad_norm: 0.8788999705339329, iteration: 242135
loss: 0.9964678287506104,grad_norm: 0.999998959011937, iteration: 242136
loss: 1.0066317319869995,grad_norm: 0.8247967446159544, iteration: 242137
loss: 0.9591012597084045,grad_norm: 0.8931486297610126, iteration: 242138
loss: 1.0435731410980225,grad_norm: 0.9863174610101312, iteration: 242139
loss: 0.9855461716651917,grad_norm: 0.9021760676825155, iteration: 242140
loss: 1.000736951828003,grad_norm: 0.7292461719293144, iteration: 242141
loss: 1.0060359239578247,grad_norm: 0.8467652544910568, iteration: 242142
loss: 1.0160175561904907,grad_norm: 0.9999991895274475, iteration: 242143
loss: 0.9656609296798706,grad_norm: 0.7493660615952221, iteration: 242144
loss: 1.021675944328308,grad_norm: 0.8729738407486806, iteration: 242145
loss: 0.9939268231391907,grad_norm: 0.9413817759231563, iteration: 242146
loss: 1.0258420705795288,grad_norm: 0.999998956105573, iteration: 242147
loss: 0.9641211032867432,grad_norm: 0.7909116147972627, iteration: 242148
loss: 0.9655309319496155,grad_norm: 0.9857859067809559, iteration: 242149
loss: 0.9796096086502075,grad_norm: 0.9503844456267551, iteration: 242150
loss: 0.975458562374115,grad_norm: 0.862842005237088, iteration: 242151
loss: 1.0797038078308105,grad_norm: 0.9999998806859721, iteration: 242152
loss: 0.9862084984779358,grad_norm: 0.8772974404938956, iteration: 242153
loss: 1.0424456596374512,grad_norm: 0.9767480730693925, iteration: 242154
loss: 1.042292594909668,grad_norm: 0.999999086393983, iteration: 242155
loss: 1.011978268623352,grad_norm: 0.7874917287106157, iteration: 242156
loss: 1.0013813972473145,grad_norm: 0.9999995422190254, iteration: 242157
loss: 0.9848164319992065,grad_norm: 0.8763447617197686, iteration: 242158
loss: 0.9624960422515869,grad_norm: 0.827905728339607, iteration: 242159
loss: 0.9988831877708435,grad_norm: 0.8489119785278437, iteration: 242160
loss: 1.0154447555541992,grad_norm: 0.8708967130061607, iteration: 242161
loss: 0.9865002632141113,grad_norm: 0.8967277281585591, iteration: 242162
loss: 0.9915810227394104,grad_norm: 0.7506980331191166, iteration: 242163
loss: 0.9630831480026245,grad_norm: 0.9518397979255478, iteration: 242164
loss: 0.9425857663154602,grad_norm: 0.9999992464611521, iteration: 242165
loss: 1.012434720993042,grad_norm: 0.9697746388245856, iteration: 242166
loss: 1.011470913887024,grad_norm: 0.9748193598590241, iteration: 242167
loss: 0.9781987071037292,grad_norm: 0.9819970859731647, iteration: 242168
loss: 1.0145933628082275,grad_norm: 0.9536802803800233, iteration: 242169
loss: 1.0277546644210815,grad_norm: 0.9999998185357483, iteration: 242170
loss: 0.9939721822738647,grad_norm: 0.9600022236342617, iteration: 242171
loss: 0.9954990744590759,grad_norm: 0.9999992033972411, iteration: 242172
loss: 1.0565499067306519,grad_norm: 0.9655556609427995, iteration: 242173
loss: 1.056319236755371,grad_norm: 0.9999996149171034, iteration: 242174
loss: 0.954687237739563,grad_norm: 0.8476931830040286, iteration: 242175
loss: 0.9926795363426208,grad_norm: 0.9109281018350798, iteration: 242176
loss: 0.9990264177322388,grad_norm: 0.7937897569016489, iteration: 242177
loss: 1.0097929239273071,grad_norm: 0.9907779100557063, iteration: 242178
loss: 0.9734740853309631,grad_norm: 0.89098466950284, iteration: 242179
loss: 1.014162302017212,grad_norm: 0.7960193963175004, iteration: 242180
loss: 0.9737663865089417,grad_norm: 0.8107089257796685, iteration: 242181
loss: 0.9880378842353821,grad_norm: 0.8503018903662077, iteration: 242182
loss: 1.047414779663086,grad_norm: 0.9583276620587479, iteration: 242183
loss: 0.9952620267868042,grad_norm: 0.8799842140890427, iteration: 242184
loss: 1.0208745002746582,grad_norm: 0.9601626876968492, iteration: 242185
loss: 1.0029703378677368,grad_norm: 0.9999991212141995, iteration: 242186
loss: 0.9929133653640747,grad_norm: 0.8689874902753496, iteration: 242187
loss: 0.9850841164588928,grad_norm: 0.8787680375469449, iteration: 242188
loss: 1.0016770362854004,grad_norm: 0.9724649654725415, iteration: 242189
loss: 0.9648174047470093,grad_norm: 0.9800042905423919, iteration: 242190
loss: 0.9853291511535645,grad_norm: 0.9777252569244438, iteration: 242191
loss: 0.9508529305458069,grad_norm: 0.9006932089763598, iteration: 242192
loss: 0.9787743091583252,grad_norm: 0.9999989976706022, iteration: 242193
loss: 0.9926913380622864,grad_norm: 0.9594849480948732, iteration: 242194
loss: 0.9947425127029419,grad_norm: 0.9908849910665868, iteration: 242195
loss: 0.992939829826355,grad_norm: 0.9517496545395651, iteration: 242196
loss: 0.9790669679641724,grad_norm: 0.7802391084568168, iteration: 242197
loss: 1.0161951780319214,grad_norm: 0.9288771937794724, iteration: 242198
loss: 0.9884307384490967,grad_norm: 0.8274351102632372, iteration: 242199
loss: 1.021675705909729,grad_norm: 0.8351832708596569, iteration: 242200
loss: 0.9938443303108215,grad_norm: 0.854749881280655, iteration: 242201
loss: 1.0360735654830933,grad_norm: 0.999999167070773, iteration: 242202
loss: 1.0235247611999512,grad_norm: 0.9700366958420791, iteration: 242203
loss: 0.9621189832687378,grad_norm: 0.7771011435486769, iteration: 242204
loss: 1.017998218536377,grad_norm: 0.8795631520320224, iteration: 242205
loss: 0.9887296557426453,grad_norm: 0.9015596697277156, iteration: 242206
loss: 1.0153782367706299,grad_norm: 0.9999992753181394, iteration: 242207
loss: 1.0399177074432373,grad_norm: 0.8402634963014569, iteration: 242208
loss: 1.0182825326919556,grad_norm: 0.7440426452489859, iteration: 242209
loss: 0.9788713455200195,grad_norm: 0.7679419053353489, iteration: 242210
loss: 0.9932036995887756,grad_norm: 0.9999991677015242, iteration: 242211
loss: 1.0114496946334839,grad_norm: 0.9521892849016629, iteration: 242212
loss: 0.9641929864883423,grad_norm: 0.9127274207241955, iteration: 242213
loss: 0.9581413865089417,grad_norm: 0.8843524791093976, iteration: 242214
loss: 1.005712628364563,grad_norm: 0.9999989489584012, iteration: 242215
loss: 1.0482827425003052,grad_norm: 0.9999991513372453, iteration: 242216
loss: 1.004615068435669,grad_norm: 0.9194538865585067, iteration: 242217
loss: 1.0059891939163208,grad_norm: 0.9422471630868942, iteration: 242218
loss: 0.9613466858863831,grad_norm: 0.7671199079418126, iteration: 242219
loss: 1.0270057916641235,grad_norm: 0.9321638595130599, iteration: 242220
loss: 0.9944896697998047,grad_norm: 0.9109319180048406, iteration: 242221
loss: 0.9951421022415161,grad_norm: 0.9999992516787374, iteration: 242222
loss: 1.1103965044021606,grad_norm: 0.8753070536115032, iteration: 242223
loss: 0.996353805065155,grad_norm: 0.9999990312226266, iteration: 242224
loss: 0.9928044676780701,grad_norm: 0.9999992021679538, iteration: 242225
loss: 1.0135072469711304,grad_norm: 0.793408750355723, iteration: 242226
loss: 1.0229641199111938,grad_norm: 0.8664564824609349, iteration: 242227
loss: 1.0326050519943237,grad_norm: 0.9368644622809672, iteration: 242228
loss: 0.9822815656661987,grad_norm: 0.8490625585769226, iteration: 242229
loss: 1.005814790725708,grad_norm: 0.8603291958269502, iteration: 242230
loss: 1.0294908285140991,grad_norm: 0.9999992300311272, iteration: 242231
loss: 0.9958475828170776,grad_norm: 0.8137679782072207, iteration: 242232
loss: 0.9793853759765625,grad_norm: 0.9999993628514652, iteration: 242233
loss: 0.9604737162590027,grad_norm: 0.8539103213784311, iteration: 242234
loss: 1.0345062017440796,grad_norm: 0.9999991845460764, iteration: 242235
loss: 0.9903504848480225,grad_norm: 0.9999990886243436, iteration: 242236
loss: 0.9744471907615662,grad_norm: 0.8993533579639443, iteration: 242237
loss: 1.0030267238616943,grad_norm: 0.9999990534524357, iteration: 242238
loss: 0.9914484620094299,grad_norm: 0.9334622933483283, iteration: 242239
loss: 0.975604236125946,grad_norm: 0.8964671424995464, iteration: 242240
loss: 0.9908186197280884,grad_norm: 0.9999991781620987, iteration: 242241
loss: 0.980842113494873,grad_norm: 0.9354849232018912, iteration: 242242
loss: 1.0234630107879639,grad_norm: 0.8280502856686665, iteration: 242243
loss: 0.984062135219574,grad_norm: 0.9044124733907735, iteration: 242244
loss: 0.9960150718688965,grad_norm: 0.9395624158559728, iteration: 242245
loss: 0.94200199842453,grad_norm: 0.9452843846134421, iteration: 242246
loss: 0.9889616370201111,grad_norm: 0.8636197668782948, iteration: 242247
loss: 0.9915561079978943,grad_norm: 0.951516025495585, iteration: 242248
loss: 1.009036898612976,grad_norm: 0.9990892050138153, iteration: 242249
loss: 1.0062490701675415,grad_norm: 0.8224806398978306, iteration: 242250
loss: 0.9807032942771912,grad_norm: 0.9999990665285089, iteration: 242251
loss: 1.0264697074890137,grad_norm: 0.8083953486009027, iteration: 242252
loss: 0.9996107220649719,grad_norm: 0.8590279285673245, iteration: 242253
loss: 0.9712582230567932,grad_norm: 0.973073197955196, iteration: 242254
loss: 0.9593879580497742,grad_norm: 0.8478239628683746, iteration: 242255
loss: 1.0508017539978027,grad_norm: 0.8969510193504461, iteration: 242256
loss: 1.0223450660705566,grad_norm: 0.9999992650617647, iteration: 242257
loss: 0.9717863202095032,grad_norm: 0.9063039377135135, iteration: 242258
loss: 1.013547420501709,grad_norm: 0.8424410772501427, iteration: 242259
loss: 0.9958553910255432,grad_norm: 0.999998873218667, iteration: 242260
loss: 1.0084928274154663,grad_norm: 0.9281918792381267, iteration: 242261
loss: 0.9639336466789246,grad_norm: 0.9857571272450427, iteration: 242262
loss: 1.0096079111099243,grad_norm: 0.9074897415336473, iteration: 242263
loss: 0.9832062721252441,grad_norm: 0.7609929717349609, iteration: 242264
loss: 0.9979203343391418,grad_norm: 0.7448339741540587, iteration: 242265
loss: 1.1003869771957397,grad_norm: 0.959038082046009, iteration: 242266
loss: 1.0247797966003418,grad_norm: 0.8831152766660076, iteration: 242267
loss: 1.1043035984039307,grad_norm: 0.9999993439024105, iteration: 242268
loss: 1.0054010152816772,grad_norm: 0.9999989686587576, iteration: 242269
loss: 0.9885557293891907,grad_norm: 0.9051208071495525, iteration: 242270
loss: 0.9949168562889099,grad_norm: 0.9189886184163285, iteration: 242271
loss: 1.0113526582717896,grad_norm: 0.8107112171632368, iteration: 242272
loss: 0.980579674243927,grad_norm: 0.8665479190340831, iteration: 242273
loss: 0.96642005443573,grad_norm: 0.9999992506511022, iteration: 242274
loss: 0.9847034215927124,grad_norm: 0.8674448536292985, iteration: 242275
loss: 0.9933261275291443,grad_norm: 0.999999637788881, iteration: 242276
loss: 1.0353896617889404,grad_norm: 0.8733523831966505, iteration: 242277
loss: 1.01952064037323,grad_norm: 0.8054754179869505, iteration: 242278
loss: 1.0193159580230713,grad_norm: 0.8044359330411848, iteration: 242279
loss: 1.0300074815750122,grad_norm: 0.8471180491923784, iteration: 242280
loss: 1.0368845462799072,grad_norm: 0.9999990717556186, iteration: 242281
loss: 0.9738754630088806,grad_norm: 0.9024403336869672, iteration: 242282
loss: 1.0435601472854614,grad_norm: 0.9568901259555946, iteration: 242283
loss: 1.003098487854004,grad_norm: 0.8860123631046645, iteration: 242284
loss: 1.0087164640426636,grad_norm: 0.8540636278925796, iteration: 242285
loss: 1.113304615020752,grad_norm: 0.7386214431002166, iteration: 242286
loss: 1.0130079984664917,grad_norm: 0.9999990156911618, iteration: 242287
loss: 1.035509705543518,grad_norm: 0.9999990764542538, iteration: 242288
loss: 0.9970513582229614,grad_norm: 0.8667680435280815, iteration: 242289
loss: 0.9830835461616516,grad_norm: 0.9999989879327074, iteration: 242290
loss: 0.9789266586303711,grad_norm: 0.8185163722623036, iteration: 242291
loss: 1.0236594676971436,grad_norm: 0.8605978673140698, iteration: 242292
loss: 0.9847878217697144,grad_norm: 0.8156600769260645, iteration: 242293
loss: 0.9817705750465393,grad_norm: 0.8738654015088936, iteration: 242294
loss: 1.0397073030471802,grad_norm: 0.999999021238887, iteration: 242295
loss: 0.9963576197624207,grad_norm: 0.9089407999068602, iteration: 242296
loss: 1.0196338891983032,grad_norm: 0.9888333507355834, iteration: 242297
loss: 0.9924234747886658,grad_norm: 0.9070721267362303, iteration: 242298
loss: 1.0611776113510132,grad_norm: 0.8679389628645462, iteration: 242299
loss: 1.1388550996780396,grad_norm: 0.9999990437045357, iteration: 242300
loss: 1.0165834426879883,grad_norm: 0.9999991468908369, iteration: 242301
loss: 0.9882941842079163,grad_norm: 0.8780291133490981, iteration: 242302
loss: 1.0804975032806396,grad_norm: 0.9999992150255325, iteration: 242303
loss: 1.0137187242507935,grad_norm: 0.8147724227459696, iteration: 242304
loss: 1.0669887065887451,grad_norm: 0.9201213748438367, iteration: 242305
loss: 0.9852427840232849,grad_norm: 0.9166872856820286, iteration: 242306
loss: 0.9639527797698975,grad_norm: 0.8916508207839537, iteration: 242307
loss: 0.9742693305015564,grad_norm: 0.8950060271885977, iteration: 242308
loss: 1.027556300163269,grad_norm: 0.8144833760063338, iteration: 242309
loss: 0.9956462979316711,grad_norm: 0.8573813347864003, iteration: 242310
loss: 0.9346408843994141,grad_norm: 0.9114320504679359, iteration: 242311
loss: 0.9936514496803284,grad_norm: 0.8139916522530979, iteration: 242312
loss: 1.0154564380645752,grad_norm: 0.9999990136220839, iteration: 242313
loss: 0.9850061535835266,grad_norm: 0.8598531129336342, iteration: 242314
loss: 0.9914169311523438,grad_norm: 0.767897177428906, iteration: 242315
loss: 1.0079864263534546,grad_norm: 0.8998258347803924, iteration: 242316
loss: 0.9789165258407593,grad_norm: 0.7988117194927281, iteration: 242317
loss: 0.9716828465461731,grad_norm: 0.9916764835145678, iteration: 242318
loss: 0.9793592691421509,grad_norm: 0.999998944567716, iteration: 242319
loss: 0.98787522315979,grad_norm: 0.8506213356477109, iteration: 242320
loss: 1.0110560655593872,grad_norm: 0.9427653258059131, iteration: 242321
loss: 0.9735649824142456,grad_norm: 0.7741779325184899, iteration: 242322
loss: 1.0174338817596436,grad_norm: 0.9018164772996854, iteration: 242323
loss: 1.0212020874023438,grad_norm: 0.999999104784558, iteration: 242324
loss: 1.0190794467926025,grad_norm: 0.9874128063110424, iteration: 242325
loss: 0.9882910251617432,grad_norm: 0.8888246369024637, iteration: 242326
loss: 0.9729445576667786,grad_norm: 0.8683233027383268, iteration: 242327
loss: 0.9778768420219421,grad_norm: 0.8834665859685088, iteration: 242328
loss: 1.0387345552444458,grad_norm: 0.9999999176394119, iteration: 242329
loss: 0.9671565294265747,grad_norm: 0.9999999337970741, iteration: 242330
loss: 0.9953835010528564,grad_norm: 0.9930043195189797, iteration: 242331
loss: 0.9557093381881714,grad_norm: 0.8888218053411947, iteration: 242332
loss: 0.9935075044631958,grad_norm: 0.9053475531176423, iteration: 242333
loss: 0.996367871761322,grad_norm: 0.9999990750525305, iteration: 242334
loss: 0.9655876159667969,grad_norm: 0.7090948569603769, iteration: 242335
loss: 1.033250093460083,grad_norm: 0.8671349410390943, iteration: 242336
loss: 1.0168375968933105,grad_norm: 0.7362466594634258, iteration: 242337
loss: 0.9819977283477783,grad_norm: 0.9272856976950005, iteration: 242338
loss: 1.006738305091858,grad_norm: 0.9380656492268172, iteration: 242339
loss: 1.0247567892074585,grad_norm: 0.9390740893207424, iteration: 242340
loss: 1.0718504190444946,grad_norm: 0.9999999410026364, iteration: 242341
loss: 0.985308825969696,grad_norm: 0.8144401811893968, iteration: 242342
loss: 0.9811925888061523,grad_norm: 0.8674216112870098, iteration: 242343
loss: 1.0057933330535889,grad_norm: 0.9999991334847079, iteration: 242344
loss: 1.0100537538528442,grad_norm: 0.92312074141214, iteration: 242345
loss: 0.980434238910675,grad_norm: 0.9471612571844276, iteration: 242346
loss: 1.0143756866455078,grad_norm: 0.9108161796284177, iteration: 242347
loss: 1.015156865119934,grad_norm: 0.9999989997340718, iteration: 242348
loss: 1.041661262512207,grad_norm: 0.9999990355769094, iteration: 242349
loss: 1.0017499923706055,grad_norm: 0.9999990608552584, iteration: 242350
loss: 0.9606475830078125,grad_norm: 0.9999991470040838, iteration: 242351
loss: 0.9996399879455566,grad_norm: 0.828400334156546, iteration: 242352
loss: 1.1468340158462524,grad_norm: 0.999999167843148, iteration: 242353
loss: 1.0167852640151978,grad_norm: 0.8396386135867194, iteration: 242354
loss: 1.0046359300613403,grad_norm: 0.9999990904824501, iteration: 242355
loss: 1.0170294046401978,grad_norm: 0.8550348518163602, iteration: 242356
loss: 0.9928444027900696,grad_norm: 0.8003036345819269, iteration: 242357
loss: 0.9525800943374634,grad_norm: 0.961206296324907, iteration: 242358
loss: 1.01741623878479,grad_norm: 0.8328972728817794, iteration: 242359
loss: 1.0243504047393799,grad_norm: 0.7613926226453528, iteration: 242360
loss: 1.0049437284469604,grad_norm: 0.9999991400935776, iteration: 242361
loss: 0.9686258435249329,grad_norm: 0.9999990473773628, iteration: 242362
loss: 0.9732643365859985,grad_norm: 0.8291495036212375, iteration: 242363
loss: 0.9738989472389221,grad_norm: 0.8665470326750593, iteration: 242364
loss: 0.9816863536834717,grad_norm: 0.994622187323482, iteration: 242365
loss: 0.9889240264892578,grad_norm: 0.8536629554791699, iteration: 242366
loss: 1.0346615314483643,grad_norm: 0.9412349081461497, iteration: 242367
loss: 1.014643669128418,grad_norm: 0.9999991496295998, iteration: 242368
loss: 0.9586337804794312,grad_norm: 0.8621629961476243, iteration: 242369
loss: 0.9912946224212646,grad_norm: 0.803473482401456, iteration: 242370
loss: 0.9909043908119202,grad_norm: 0.9999992632060093, iteration: 242371
loss: 0.980396568775177,grad_norm: 0.9874081314023672, iteration: 242372
loss: 1.1005126237869263,grad_norm: 0.9999998148832518, iteration: 242373
loss: 1.0672039985656738,grad_norm: 0.8473177707910203, iteration: 242374
loss: 0.9848908185958862,grad_norm: 0.9999991150850499, iteration: 242375
loss: 0.9616752862930298,grad_norm: 0.935601569007978, iteration: 242376
loss: 0.9802966117858887,grad_norm: 0.7278353923926374, iteration: 242377
loss: 0.9763844609260559,grad_norm: 0.895156175312813, iteration: 242378
loss: 0.9873960018157959,grad_norm: 0.9999991685731923, iteration: 242379
loss: 0.9995282292366028,grad_norm: 0.9525930159108068, iteration: 242380
loss: 0.9868131875991821,grad_norm: 0.9364539608729496, iteration: 242381
loss: 0.9765939712524414,grad_norm: 0.8124554342754203, iteration: 242382
loss: 1.026253581047058,grad_norm: 0.914164826895705, iteration: 242383
loss: 0.9965325593948364,grad_norm: 0.916960498645081, iteration: 242384
loss: 1.1594120264053345,grad_norm: 0.9999991406211497, iteration: 242385
loss: 1.0153175592422485,grad_norm: 0.9999991480373251, iteration: 242386
loss: 0.9888200759887695,grad_norm: 0.9743140171411593, iteration: 242387
loss: 1.0173979997634888,grad_norm: 0.9734166159484376, iteration: 242388
loss: 0.9625017046928406,grad_norm: 0.8661719493731045, iteration: 242389
loss: 0.9803997278213501,grad_norm: 0.938233665668221, iteration: 242390
loss: 0.9892072677612305,grad_norm: 0.9999990569611177, iteration: 242391
loss: 1.0112100839614868,grad_norm: 0.92219460687037, iteration: 242392
loss: 1.3028826713562012,grad_norm: 0.9999998622141588, iteration: 242393
loss: 0.9459739327430725,grad_norm: 0.9803682310483448, iteration: 242394
loss: 0.9983749389648438,grad_norm: 0.9150704073904113, iteration: 242395
loss: 0.9816321730613708,grad_norm: 0.9163662437894353, iteration: 242396
loss: 1.0038503408432007,grad_norm: 0.7335949125095071, iteration: 242397
loss: 1.0217357873916626,grad_norm: 0.925789014444467, iteration: 242398
loss: 1.0066015720367432,grad_norm: 0.9999991958823673, iteration: 242399
loss: 0.979641318321228,grad_norm: 0.9126397494982198, iteration: 242400
loss: 1.0444798469543457,grad_norm: 0.8840289484395126, iteration: 242401
loss: 1.005173683166504,grad_norm: 0.8138480207571939, iteration: 242402
loss: 1.000129222869873,grad_norm: 0.8128901695176591, iteration: 242403
loss: 0.976590096950531,grad_norm: 0.9999993069015968, iteration: 242404
loss: 0.9986023306846619,grad_norm: 0.9999990302877741, iteration: 242405
loss: 1.0225356817245483,grad_norm: 0.9999990859321708, iteration: 242406
loss: 0.9662113785743713,grad_norm: 0.9654527579051068, iteration: 242407
loss: 0.9903146028518677,grad_norm: 0.99999911574228, iteration: 242408
loss: 0.9988189935684204,grad_norm: 0.909334808863248, iteration: 242409
loss: 0.9938110113143921,grad_norm: 0.7763364694104368, iteration: 242410
loss: 1.0131334066390991,grad_norm: 0.9999991086478205, iteration: 242411
loss: 1.0080499649047852,grad_norm: 0.8325632617723463, iteration: 242412
loss: 1.0315040349960327,grad_norm: 0.9999990846526472, iteration: 242413
loss: 1.014063835144043,grad_norm: 0.948320969487092, iteration: 242414
loss: 1.0022847652435303,grad_norm: 0.7928680163534151, iteration: 242415
loss: 0.9693953394889832,grad_norm: 0.8967754755946153, iteration: 242416
loss: 0.9886192679405212,grad_norm: 0.7606204240896103, iteration: 242417
loss: 0.9706439971923828,grad_norm: 0.8195608218787667, iteration: 242418
loss: 0.9951218962669373,grad_norm: 0.9999989922503584, iteration: 242419
loss: 1.0027495622634888,grad_norm: 0.8704235479135335, iteration: 242420
loss: 0.9822338223457336,grad_norm: 0.9999991446085765, iteration: 242421
loss: 1.019958257675171,grad_norm: 0.7642471606771637, iteration: 242422
loss: 1.0052086114883423,grad_norm: 0.7880851446126389, iteration: 242423
loss: 0.9785846471786499,grad_norm: 0.9805277639535801, iteration: 242424
loss: 0.9820834398269653,grad_norm: 0.9292953894293842, iteration: 242425
loss: 0.9986070394515991,grad_norm: 0.8425351943317728, iteration: 242426
loss: 0.9992740154266357,grad_norm: 0.9948036652790865, iteration: 242427
loss: 1.0860042572021484,grad_norm: 0.9999995739912414, iteration: 242428
loss: 1.0190337896347046,grad_norm: 0.9378628249837537, iteration: 242429
loss: 1.1276775598526,grad_norm: 0.9222311277902591, iteration: 242430
loss: 0.9831398129463196,grad_norm: 0.8866063304043177, iteration: 242431
loss: 0.984096109867096,grad_norm: 0.8799089784796309, iteration: 242432
loss: 1.1985825300216675,grad_norm: 0.9999990546009937, iteration: 242433
loss: 1.0866451263427734,grad_norm: 0.999999544926088, iteration: 242434
loss: 0.9930412173271179,grad_norm: 0.8316461335524623, iteration: 242435
loss: 0.9591102600097656,grad_norm: 0.9115872364232125, iteration: 242436
loss: 1.1465510129928589,grad_norm: 0.9999993966317285, iteration: 242437
loss: 1.0083149671554565,grad_norm: 0.9071837403244368, iteration: 242438
loss: 1.036239504814148,grad_norm: 0.9606296836882816, iteration: 242439
loss: 1.0225902795791626,grad_norm: 0.8301235934305278, iteration: 242440
loss: 1.0274471044540405,grad_norm: 0.9545445661115799, iteration: 242441
loss: 0.9942049384117126,grad_norm: 0.9538545963686134, iteration: 242442
loss: 1.015529990196228,grad_norm: 0.961294610863363, iteration: 242443
loss: 0.942143976688385,grad_norm: 0.9999990411588209, iteration: 242444
loss: 1.0274171829223633,grad_norm: 0.7855134736570378, iteration: 242445
loss: 0.9908125996589661,grad_norm: 0.7968886411324214, iteration: 242446
loss: 1.022341251373291,grad_norm: 0.7581993075085612, iteration: 242447
loss: 1.0066756010055542,grad_norm: 0.9999998449947634, iteration: 242448
loss: 0.9906012415885925,grad_norm: 0.8837103684104841, iteration: 242449
loss: 0.9812865853309631,grad_norm: 0.9999989565713525, iteration: 242450
loss: 0.9577937722206116,grad_norm: 0.9373095547847058, iteration: 242451
loss: 1.099808692932129,grad_norm: 0.9999990574191316, iteration: 242452
loss: 1.0000218152999878,grad_norm: 0.7975891559353934, iteration: 242453
loss: 0.9956313967704773,grad_norm: 0.999999154797795, iteration: 242454
loss: 0.9785034656524658,grad_norm: 0.9548167701511274, iteration: 242455
loss: 1.050866961479187,grad_norm: 0.9999994473882672, iteration: 242456
loss: 1.0348538160324097,grad_norm: 0.9315595628840077, iteration: 242457
loss: 1.0367927551269531,grad_norm: 0.9999994784120424, iteration: 242458
loss: 1.0378379821777344,grad_norm: 0.7667408508953535, iteration: 242459
loss: 1.0118074417114258,grad_norm: 0.87117119543677, iteration: 242460
loss: 0.966856062412262,grad_norm: 0.7964539887636222, iteration: 242461
loss: 1.0276347398757935,grad_norm: 0.828526371881183, iteration: 242462
loss: 1.0299845933914185,grad_norm: 0.9999991307595468, iteration: 242463
loss: 0.9541758894920349,grad_norm: 0.9999990310625665, iteration: 242464
loss: 0.972729504108429,grad_norm: 0.8503858406523636, iteration: 242465
loss: 1.0010502338409424,grad_norm: 0.9018246610363646, iteration: 242466
loss: 1.019816517829895,grad_norm: 0.9999996702187097, iteration: 242467
loss: 1.0657634735107422,grad_norm: 0.9999992935656353, iteration: 242468
loss: 1.023392677307129,grad_norm: 0.871840242272229, iteration: 242469
loss: 0.9780059456825256,grad_norm: 0.9587603997902943, iteration: 242470
loss: 1.0154567956924438,grad_norm: 0.9016344251506736, iteration: 242471
loss: 1.0130293369293213,grad_norm: 0.8063110804881443, iteration: 242472
loss: 1.0364699363708496,grad_norm: 0.9999998044079278, iteration: 242473
loss: 1.0368108749389648,grad_norm: 0.8763438717562557, iteration: 242474
loss: 1.0174626111984253,grad_norm: 0.7795102559039668, iteration: 242475
loss: 0.978950560092926,grad_norm: 0.7172869239699747, iteration: 242476
loss: 1.0178698301315308,grad_norm: 0.9999990731385935, iteration: 242477
loss: 1.0100659132003784,grad_norm: 0.9999991973333795, iteration: 242478
loss: 0.9928997159004211,grad_norm: 0.8074890249696571, iteration: 242479
loss: 1.026488184928894,grad_norm: 0.9025124348463294, iteration: 242480
loss: 0.944438636302948,grad_norm: 0.8663458900123782, iteration: 242481
loss: 0.9850708246231079,grad_norm: 0.8007750842811734, iteration: 242482
loss: 1.051594614982605,grad_norm: 0.9999990643291101, iteration: 242483
loss: 0.9804604053497314,grad_norm: 0.9631611400526431, iteration: 242484
loss: 1.0897598266601562,grad_norm: 0.9766567751351866, iteration: 242485
loss: 0.9998130202293396,grad_norm: 0.8738516576958509, iteration: 242486
loss: 0.9968016743659973,grad_norm: 0.8109060098448487, iteration: 242487
loss: 0.9869137406349182,grad_norm: 0.9999999606376007, iteration: 242488
loss: 0.9838235378265381,grad_norm: 0.7384715841821967, iteration: 242489
loss: 0.9864563345909119,grad_norm: 0.7474793852226197, iteration: 242490
loss: 0.9830929636955261,grad_norm: 0.7946467785172002, iteration: 242491
loss: 0.9892847537994385,grad_norm: 0.8326155464914978, iteration: 242492
loss: 0.994866669178009,grad_norm: 0.8819125119955372, iteration: 242493
loss: 1.0138641595840454,grad_norm: 0.9302344758094759, iteration: 242494
loss: 1.0152426958084106,grad_norm: 0.9999996960217352, iteration: 242495
loss: 1.0291199684143066,grad_norm: 0.7661787539639099, iteration: 242496
loss: 0.9980417490005493,grad_norm: 0.8869031078753309, iteration: 242497
loss: 1.0029181241989136,grad_norm: 0.9999989937036913, iteration: 242498
loss: 0.9939201474189758,grad_norm: 0.9509056215275035, iteration: 242499
loss: 0.9815455079078674,grad_norm: 0.999999073613718, iteration: 242500
loss: 1.0033864974975586,grad_norm: 0.8293531754666739, iteration: 242501
loss: 1.026147484779358,grad_norm: 0.9624074131677912, iteration: 242502
loss: 1.018484354019165,grad_norm: 0.9408475360524993, iteration: 242503
loss: 1.0124918222427368,grad_norm: 0.9340868510218818, iteration: 242504
loss: 1.0207172632217407,grad_norm: 0.9999996696012182, iteration: 242505
loss: 1.022121548652649,grad_norm: 0.9999991510537078, iteration: 242506
loss: 1.0102057456970215,grad_norm: 0.8966442548833576, iteration: 242507
loss: 0.9674535393714905,grad_norm: 0.9186768306495291, iteration: 242508
loss: 1.0280287265777588,grad_norm: 0.9999991196278507, iteration: 242509
loss: 1.0047199726104736,grad_norm: 0.9451060306034997, iteration: 242510
loss: 1.0180699825286865,grad_norm: 0.9751626725409114, iteration: 242511
loss: 0.9916863441467285,grad_norm: 0.8962637505735764, iteration: 242512
loss: 0.9765776991844177,grad_norm: 0.7815421061389144, iteration: 242513
loss: 0.9872233867645264,grad_norm: 0.9498259813433558, iteration: 242514
loss: 1.0139788389205933,grad_norm: 0.9999992165904268, iteration: 242515
loss: 1.002139925956726,grad_norm: 0.9661060396865098, iteration: 242516
loss: 1.0040485858917236,grad_norm: 0.873523803994147, iteration: 242517
loss: 0.9312375783920288,grad_norm: 0.8193951798336613, iteration: 242518
loss: 1.0616552829742432,grad_norm: 0.9897282978757749, iteration: 242519
loss: 0.9823616147041321,grad_norm: 0.8247283770079558, iteration: 242520
loss: 1.0180021524429321,grad_norm: 0.8250590815783077, iteration: 242521
loss: 1.011965274810791,grad_norm: 0.9984215484751021, iteration: 242522
loss: 0.9927536845207214,grad_norm: 0.8195105798450848, iteration: 242523
loss: 0.9942460060119629,grad_norm: 0.9999990083683816, iteration: 242524
loss: 0.9939178824424744,grad_norm: 0.769973415070037, iteration: 242525
loss: 1.002575397491455,grad_norm: 0.700182323351577, iteration: 242526
loss: 0.9816704392433167,grad_norm: 0.8570518106607597, iteration: 242527
loss: 0.9994478821754456,grad_norm: 0.8626165642771222, iteration: 242528
loss: 0.9506009221076965,grad_norm: 0.9010394810083553, iteration: 242529
loss: 1.022214651107788,grad_norm: 0.735800548341208, iteration: 242530
loss: 1.0105538368225098,grad_norm: 0.9999991123634212, iteration: 242531
loss: 1.0029566287994385,grad_norm: 0.8953643371696408, iteration: 242532
loss: 1.009787678718567,grad_norm: 0.8596988209587149, iteration: 242533
loss: 0.974936306476593,grad_norm: 0.8766922780218569, iteration: 242534
loss: 1.013594388961792,grad_norm: 0.8036027804643856, iteration: 242535
loss: 1.0336120128631592,grad_norm: 0.7619849075736722, iteration: 242536
loss: 0.9709898233413696,grad_norm: 0.869225005987492, iteration: 242537
loss: 0.9867013096809387,grad_norm: 0.928344118585234, iteration: 242538
loss: 0.9765214920043945,grad_norm: 0.995150209688513, iteration: 242539
loss: 1.0130112171173096,grad_norm: 0.9966512544420648, iteration: 242540
loss: 0.9769485592842102,grad_norm: 0.9851883456244721, iteration: 242541
loss: 0.9740896224975586,grad_norm: 0.9491607505853312, iteration: 242542
loss: 1.0119149684906006,grad_norm: 0.865155318450276, iteration: 242543
loss: 1.0116221904754639,grad_norm: 0.9374184082633956, iteration: 242544
loss: 0.9719767570495605,grad_norm: 0.9999991385543129, iteration: 242545
loss: 1.0246248245239258,grad_norm: 0.9999991545193024, iteration: 242546
loss: 1.0085408687591553,grad_norm: 0.8198195879528253, iteration: 242547
loss: 1.021933674812317,grad_norm: 0.8663987596378265, iteration: 242548
loss: 0.9863862991333008,grad_norm: 0.9999991024744289, iteration: 242549
loss: 1.0420498847961426,grad_norm: 0.7317997885812259, iteration: 242550
loss: 1.0545823574066162,grad_norm: 0.914445744014669, iteration: 242551
loss: 1.034106731414795,grad_norm: 0.7671937060828021, iteration: 242552
loss: 1.0185446739196777,grad_norm: 0.9999993219744483, iteration: 242553
loss: 0.9793471693992615,grad_norm: 0.9828745745356274, iteration: 242554
loss: 1.0015684366226196,grad_norm: 0.9999990930066465, iteration: 242555
loss: 0.9923251867294312,grad_norm: 0.8408049062523182, iteration: 242556
loss: 1.0321056842803955,grad_norm: 0.9999993447546215, iteration: 242557
loss: 1.0079854726791382,grad_norm: 0.9201100022116486, iteration: 242558
loss: 0.9877894520759583,grad_norm: 0.9999991730845035, iteration: 242559
loss: 1.0179049968719482,grad_norm: 0.9934380796845075, iteration: 242560
loss: 0.9772617816925049,grad_norm: 0.8597810075112434, iteration: 242561
loss: 1.0313518047332764,grad_norm: 0.9412388375662394, iteration: 242562
loss: 0.9820640683174133,grad_norm: 0.9116598434504093, iteration: 242563
loss: 1.0038164854049683,grad_norm: 0.7455803955414837, iteration: 242564
loss: 1.0339975357055664,grad_norm: 0.889494752417567, iteration: 242565
loss: 0.9800823330879211,grad_norm: 0.9472863223895223, iteration: 242566
loss: 0.9844620227813721,grad_norm: 0.8667092641617762, iteration: 242567
loss: 1.0264376401901245,grad_norm: 0.9700550723719175, iteration: 242568
loss: 0.9900557398796082,grad_norm: 0.9734419608883551, iteration: 242569
loss: 1.0211125612258911,grad_norm: 0.9977212259417662, iteration: 242570
loss: 0.9932639002799988,grad_norm: 0.8685331336097722, iteration: 242571
loss: 0.9920940399169922,grad_norm: 0.9350870690731375, iteration: 242572
loss: 1.0033352375030518,grad_norm: 0.8768692839190795, iteration: 242573
loss: 1.019001841545105,grad_norm: 0.8908582174318659, iteration: 242574
loss: 0.9655449390411377,grad_norm: 0.8878301084075109, iteration: 242575
loss: 1.013514757156372,grad_norm: 0.8916887587575483, iteration: 242576
loss: 0.9984385371208191,grad_norm: 0.9639911908601227, iteration: 242577
loss: 1.0129536390304565,grad_norm: 0.9723700091976989, iteration: 242578
loss: 0.9325656890869141,grad_norm: 0.9999991359420939, iteration: 242579
loss: 0.9695696830749512,grad_norm: 0.8650180625102281, iteration: 242580
loss: 0.9720848798751831,grad_norm: 0.814561018413309, iteration: 242581
loss: 0.9711206555366516,grad_norm: 0.9999990936062043, iteration: 242582
loss: 0.9905466437339783,grad_norm: 0.8648752225557657, iteration: 242583
loss: 1.0168524980545044,grad_norm: 0.8485921009830769, iteration: 242584
loss: 0.9883129000663757,grad_norm: 0.7309209090978745, iteration: 242585
loss: 1.0129741430282593,grad_norm: 0.9999990727710101, iteration: 242586
loss: 1.0216444730758667,grad_norm: 0.9740134537791579, iteration: 242587
loss: 1.0520983934402466,grad_norm: 0.999999097921003, iteration: 242588
loss: 1.005433440208435,grad_norm: 0.88996862823456, iteration: 242589
loss: 1.0150994062423706,grad_norm: 0.9491842705077611, iteration: 242590
loss: 0.9972541332244873,grad_norm: 0.8886342083756964, iteration: 242591
loss: 0.993370532989502,grad_norm: 0.7364676196103045, iteration: 242592
loss: 1.0032525062561035,grad_norm: 0.930890588132422, iteration: 242593
loss: 0.9972326755523682,grad_norm: 0.8556405878631227, iteration: 242594
loss: 1.0150337219238281,grad_norm: 0.9999991055776243, iteration: 242595
loss: 1.0252726078033447,grad_norm: 0.8648287728571273, iteration: 242596
loss: 1.006459355354309,grad_norm: 0.9316271191372877, iteration: 242597
loss: 1.010188102722168,grad_norm: 0.7818902556966985, iteration: 242598
loss: 1.0183228254318237,grad_norm: 0.7464434117312987, iteration: 242599
loss: 1.0097520351409912,grad_norm: 0.9789631818050741, iteration: 242600
loss: 0.9900565147399902,grad_norm: 0.9999990075502886, iteration: 242601
loss: 1.0034934282302856,grad_norm: 0.9732540476606332, iteration: 242602
loss: 0.9792839884757996,grad_norm: 0.7685307023032959, iteration: 242603
loss: 0.9760246276855469,grad_norm: 0.9668448813670849, iteration: 242604
loss: 1.0084816217422485,grad_norm: 0.9999990801322591, iteration: 242605
loss: 1.0112624168395996,grad_norm: 0.9124473342148511, iteration: 242606
loss: 0.9734690189361572,grad_norm: 0.9999992531714477, iteration: 242607
loss: 1.0011239051818848,grad_norm: 0.9999995668912695, iteration: 242608
loss: 0.9971293210983276,grad_norm: 0.8730310779620022, iteration: 242609
loss: 0.9984031915664673,grad_norm: 0.9837019997715435, iteration: 242610
loss: 1.1723400354385376,grad_norm: 0.9999992731926141, iteration: 242611
loss: 1.0016354322433472,grad_norm: 0.9254966143893915, iteration: 242612
loss: 1.0135005712509155,grad_norm: 0.8325469686337632, iteration: 242613
loss: 1.0146480798721313,grad_norm: 0.7970601010857684, iteration: 242614
loss: 1.021992802619934,grad_norm: 0.9999990327310598, iteration: 242615
loss: 0.9497830867767334,grad_norm: 0.8462786767110207, iteration: 242616
loss: 1.0226762294769287,grad_norm: 0.8705821245158859, iteration: 242617
loss: 0.9965538382530212,grad_norm: 0.8116250078238226, iteration: 242618
loss: 1.0119256973266602,grad_norm: 0.7938544052578757, iteration: 242619
loss: 0.9753476977348328,grad_norm: 0.995167631751481, iteration: 242620
loss: 0.9926707744598389,grad_norm: 0.8665726935270907, iteration: 242621
loss: 1.0396040678024292,grad_norm: 0.9448854997163568, iteration: 242622
loss: 1.006187081336975,grad_norm: 0.902489199446094, iteration: 242623
loss: 1.01512610912323,grad_norm: 0.9101327885950592, iteration: 242624
loss: 0.9899069666862488,grad_norm: 0.981592527329957, iteration: 242625
loss: 1.0015980005264282,grad_norm: 0.9030697176324556, iteration: 242626
loss: 0.9799752235412598,grad_norm: 0.9831697718983163, iteration: 242627
loss: 1.0090367794036865,grad_norm: 0.9626790101876384, iteration: 242628
loss: 1.0003621578216553,grad_norm: 0.9999991895392398, iteration: 242629
loss: 0.9831864833831787,grad_norm: 0.7260681616850462, iteration: 242630
loss: 1.0032461881637573,grad_norm: 0.9999990956660656, iteration: 242631
loss: 1.0144860744476318,grad_norm: 0.9999992705320752, iteration: 242632
loss: 0.9623304605484009,grad_norm: 0.8429346419417654, iteration: 242633
loss: 0.9671653509140015,grad_norm: 0.9088090688825522, iteration: 242634
loss: 1.0042188167572021,grad_norm: 0.9617637318447083, iteration: 242635
loss: 0.9743779897689819,grad_norm: 0.8167252794435511, iteration: 242636
loss: 0.9849606156349182,grad_norm: 0.8383156901984966, iteration: 242637
loss: 1.0131959915161133,grad_norm: 0.9999990193007102, iteration: 242638
loss: 0.9851769804954529,grad_norm: 0.955503165529428, iteration: 242639
loss: 1.0080890655517578,grad_norm: 0.9770981514536308, iteration: 242640
loss: 1.034643292427063,grad_norm: 0.9999990879803591, iteration: 242641
loss: 1.0068858861923218,grad_norm: 0.9999994000676322, iteration: 242642
loss: 1.0253076553344727,grad_norm: 0.9276728748509442, iteration: 242643
loss: 1.1295467615127563,grad_norm: 0.9645896263008358, iteration: 242644
loss: 1.0487130880355835,grad_norm: 0.9113966542520899, iteration: 242645
loss: 0.9738624095916748,grad_norm: 0.9999991108850965, iteration: 242646
loss: 1.056950330734253,grad_norm: 0.9974200980756761, iteration: 242647
loss: 1.0151360034942627,grad_norm: 0.8720816806954005, iteration: 242648
loss: 1.0302218198776245,grad_norm: 0.9999991724433305, iteration: 242649
loss: 1.0102306604385376,grad_norm: 0.9004116818740774, iteration: 242650
loss: 0.9822891354560852,grad_norm: 0.7971130249353973, iteration: 242651
loss: 1.0036890506744385,grad_norm: 0.9512668694634255, iteration: 242652
loss: 1.0153483152389526,grad_norm: 0.999998978827813, iteration: 242653
loss: 1.039548397064209,grad_norm: 0.9964295561392211, iteration: 242654
loss: 0.9747333526611328,grad_norm: 0.7528968388983887, iteration: 242655
loss: 1.00138258934021,grad_norm: 0.9999994429335772, iteration: 242656
loss: 0.9727269411087036,grad_norm: 0.891559088524136, iteration: 242657
loss: 1.003940224647522,grad_norm: 0.8242019663459054, iteration: 242658
loss: 1.0283467769622803,grad_norm: 0.999999107737877, iteration: 242659
loss: 0.9967683553695679,grad_norm: 0.9891145555478362, iteration: 242660
loss: 0.9829933643341064,grad_norm: 0.931749869060581, iteration: 242661
loss: 1.06731116771698,grad_norm: 0.9999990066015161, iteration: 242662
loss: 1.0185978412628174,grad_norm: 0.9667669311901548, iteration: 242663
loss: 0.9992974996566772,grad_norm: 0.8314360189268776, iteration: 242664
loss: 0.9895312190055847,grad_norm: 0.8384834352409792, iteration: 242665
loss: 0.9973849654197693,grad_norm: 0.9521054727553864, iteration: 242666
loss: 1.0204133987426758,grad_norm: 0.8945492289109999, iteration: 242667
loss: 1.0049126148223877,grad_norm: 0.7563961467607435, iteration: 242668
loss: 0.9708787798881531,grad_norm: 0.9375658946236562, iteration: 242669
loss: 0.9878537654876709,grad_norm: 0.9999997466976364, iteration: 242670
loss: 0.9762489795684814,grad_norm: 0.9037193521261108, iteration: 242671
loss: 1.0196597576141357,grad_norm: 0.9462563707470592, iteration: 242672
loss: 1.041463017463684,grad_norm: 0.9999992008178326, iteration: 242673
loss: 1.0069899559020996,grad_norm: 0.772486602023487, iteration: 242674
loss: 0.998977541923523,grad_norm: 0.8688128097237194, iteration: 242675
loss: 1.0601056814193726,grad_norm: 0.9999996050077022, iteration: 242676
loss: 0.9855015277862549,grad_norm: 0.841996626174479, iteration: 242677
loss: 0.9782562255859375,grad_norm: 0.8414593557541484, iteration: 242678
loss: 1.0218613147735596,grad_norm: 0.9999991967002531, iteration: 242679
loss: 1.0205185413360596,grad_norm: 0.9930551755808071, iteration: 242680
loss: 1.0110771656036377,grad_norm: 0.8342775648350584, iteration: 242681
loss: 0.9961013793945312,grad_norm: 0.9999991408785589, iteration: 242682
loss: 0.9841452836990356,grad_norm: 0.8772660951803071, iteration: 242683
loss: 0.9928767085075378,grad_norm: 0.960055935170664, iteration: 242684
loss: 0.9546115398406982,grad_norm: 0.9724607558986345, iteration: 242685
loss: 0.9869451522827148,grad_norm: 0.9958058726361013, iteration: 242686
loss: 0.9892749786376953,grad_norm: 0.9999993083330762, iteration: 242687
loss: 0.9767112135887146,grad_norm: 0.9999991330293732, iteration: 242688
loss: 0.9891158938407898,grad_norm: 0.9999990809386244, iteration: 242689
loss: 1.0194988250732422,grad_norm: 0.830670987969405, iteration: 242690
loss: 0.9821080565452576,grad_norm: 0.9999991379664167, iteration: 242691
loss: 0.9854215383529663,grad_norm: 0.9166788909338682, iteration: 242692
loss: 0.9954511523246765,grad_norm: 0.9637938983447681, iteration: 242693
loss: 0.984029233455658,grad_norm: 0.8491037726154775, iteration: 242694
loss: 0.9608882069587708,grad_norm: 0.9739476943544739, iteration: 242695
loss: 1.0074131488800049,grad_norm: 0.9453233878532047, iteration: 242696
loss: 1.0343040227890015,grad_norm: 0.9723982878182007, iteration: 242697
loss: 0.964505672454834,grad_norm: 0.8373077349217011, iteration: 242698
loss: 1.0012190341949463,grad_norm: 0.7645355651998509, iteration: 242699
loss: 0.973879337310791,grad_norm: 0.8905021821461482, iteration: 242700
loss: 0.9952418804168701,grad_norm: 0.80854978052956, iteration: 242701
loss: 1.0174219608306885,grad_norm: 0.9999999067950782, iteration: 242702
loss: 0.9741491675376892,grad_norm: 0.8830491311833925, iteration: 242703
loss: 1.0035213232040405,grad_norm: 0.9042420895982245, iteration: 242704
loss: 0.9622992873191833,grad_norm: 0.9999991452402157, iteration: 242705
loss: 1.0172477960586548,grad_norm: 0.9999997020524445, iteration: 242706
loss: 1.012704610824585,grad_norm: 0.87561147302488, iteration: 242707
loss: 1.0127207040786743,grad_norm: 0.9999991642526707, iteration: 242708
loss: 1.0278152227401733,grad_norm: 0.9999994632713523, iteration: 242709
loss: 0.9992609620094299,grad_norm: 0.9999992648980324, iteration: 242710
loss: 1.0216394662857056,grad_norm: 0.8702585955433667, iteration: 242711
loss: 1.0124735832214355,grad_norm: 0.9369784159822567, iteration: 242712
loss: 1.0563671588897705,grad_norm: 0.8090772711389284, iteration: 242713
loss: 1.0255780220031738,grad_norm: 0.7868140973771521, iteration: 242714
loss: 0.9852128028869629,grad_norm: 0.8614903402740854, iteration: 242715
loss: 0.9935570359230042,grad_norm: 0.9999994718240598, iteration: 242716
loss: 1.0222134590148926,grad_norm: 0.8531777787457115, iteration: 242717
loss: 0.9665685892105103,grad_norm: 0.9177830285255866, iteration: 242718
loss: 0.9665986895561218,grad_norm: 0.9174572344515862, iteration: 242719
loss: 0.9947189092636108,grad_norm: 0.6827349385515656, iteration: 242720
loss: 0.9936224818229675,grad_norm: 0.9416604078959907, iteration: 242721
loss: 1.0027285814285278,grad_norm: 0.9275637590848169, iteration: 242722
loss: 0.9752073884010315,grad_norm: 0.9154689782914683, iteration: 242723
loss: 1.0366276502609253,grad_norm: 0.9503457596176618, iteration: 242724
loss: 1.0062848329544067,grad_norm: 0.7481861117346371, iteration: 242725
loss: 0.9554717540740967,grad_norm: 0.9999991680942188, iteration: 242726
loss: 0.9896978139877319,grad_norm: 0.9526818970696762, iteration: 242727
loss: 0.9848524332046509,grad_norm: 0.9999990813501185, iteration: 242728
loss: 1.0080482959747314,grad_norm: 0.9417186526205596, iteration: 242729
loss: 1.0174822807312012,grad_norm: 0.9999990164322571, iteration: 242730
loss: 0.9744237661361694,grad_norm: 0.9999990778965925, iteration: 242731
loss: 1.0210883617401123,grad_norm: 0.9999989664123113, iteration: 242732
loss: 0.9425447583198547,grad_norm: 0.8830973011461931, iteration: 242733
loss: 0.9747582077980042,grad_norm: 0.9999990997563062, iteration: 242734
loss: 0.9748952984809875,grad_norm: 0.8835774164174647, iteration: 242735
loss: 1.009159803390503,grad_norm: 0.9419106824070055, iteration: 242736
loss: 0.992548942565918,grad_norm: 0.7898221932090884, iteration: 242737
loss: 1.0197101831436157,grad_norm: 0.8872676640886863, iteration: 242738
loss: 0.9834639430046082,grad_norm: 0.9411594764198997, iteration: 242739
loss: 0.96219801902771,grad_norm: 0.8270265619430605, iteration: 242740
loss: 0.9477733969688416,grad_norm: 0.8125699933549438, iteration: 242741
loss: 0.9767271876335144,grad_norm: 0.9737204857791673, iteration: 242742
loss: 0.9921175837516785,grad_norm: 0.7886286606420068, iteration: 242743
loss: 0.9764981269836426,grad_norm: 0.9999990407671603, iteration: 242744
loss: 0.9756985902786255,grad_norm: 0.999999042108527, iteration: 242745
loss: 0.9604137539863586,grad_norm: 0.961152193174787, iteration: 242746
loss: 1.0054028034210205,grad_norm: 0.8679483288260107, iteration: 242747
loss: 0.9871827960014343,grad_norm: 0.8356554124329786, iteration: 242748
loss: 1.0193144083023071,grad_norm: 0.9103093398592672, iteration: 242749
loss: 1.0089865922927856,grad_norm: 0.7932127451028785, iteration: 242750
loss: 0.9905397891998291,grad_norm: 0.8948110448450574, iteration: 242751
loss: 0.9855343103408813,grad_norm: 0.9999990893932209, iteration: 242752
loss: 0.99001145362854,grad_norm: 0.8619818243304926, iteration: 242753
loss: 0.9973840117454529,grad_norm: 0.7635387665479743, iteration: 242754
loss: 1.0536298751831055,grad_norm: 0.9999994295796784, iteration: 242755
loss: 1.0067336559295654,grad_norm: 0.9433358131427907, iteration: 242756
loss: 1.037574291229248,grad_norm: 0.9725140464515215, iteration: 242757
loss: 1.0144294500350952,grad_norm: 0.999999051782493, iteration: 242758
loss: 0.9906471967697144,grad_norm: 0.9999990737010079, iteration: 242759
loss: 1.0217300653457642,grad_norm: 0.9999996628291539, iteration: 242760
loss: 0.9913023114204407,grad_norm: 0.9239172636516576, iteration: 242761
loss: 0.9829343557357788,grad_norm: 0.971740421003285, iteration: 242762
loss: 0.943180501461029,grad_norm: 0.8271710150977525, iteration: 242763
loss: 1.0046263933181763,grad_norm: 0.9574999664464218, iteration: 242764
loss: 1.004633903503418,grad_norm: 0.8899425063796061, iteration: 242765
loss: 0.9974242448806763,grad_norm: 0.9999991448287595, iteration: 242766
loss: 1.0146989822387695,grad_norm: 0.9999990574297652, iteration: 242767
loss: 1.0112029314041138,grad_norm: 0.8116464405498095, iteration: 242768
loss: 0.9808250069618225,grad_norm: 0.8958270992466383, iteration: 242769
loss: 0.9876471757888794,grad_norm: 0.8318312126337467, iteration: 242770
loss: 0.966404139995575,grad_norm: 0.8128181491718257, iteration: 242771
loss: 1.0020090341567993,grad_norm: 0.8887020602149854, iteration: 242772
loss: 0.9815082550048828,grad_norm: 0.9948476259304536, iteration: 242773
loss: 1.0112335681915283,grad_norm: 0.9243658805508518, iteration: 242774
loss: 0.9840797781944275,grad_norm: 0.6449591378179634, iteration: 242775
loss: 1.0236103534698486,grad_norm: 0.9330734845579411, iteration: 242776
loss: 0.9960666298866272,grad_norm: 0.9129775268291918, iteration: 242777
loss: 0.9926886558532715,grad_norm: 0.936113318285568, iteration: 242778
loss: 0.9916636943817139,grad_norm: 0.9043691088270517, iteration: 242779
loss: 0.9875875115394592,grad_norm: 0.9879911979927182, iteration: 242780
loss: 0.9782869815826416,grad_norm: 0.9289258167242589, iteration: 242781
loss: 1.009519100189209,grad_norm: 0.8826608858731714, iteration: 242782
loss: 0.9790785908699036,grad_norm: 0.9999990982283921, iteration: 242783
loss: 1.0061542987823486,grad_norm: 0.8345832926181195, iteration: 242784
loss: 0.9784053564071655,grad_norm: 0.8676133242242865, iteration: 242785
loss: 0.9927546381950378,grad_norm: 0.9999989649012243, iteration: 242786
loss: 1.0213855504989624,grad_norm: 0.7516915609110238, iteration: 242787
loss: 1.002793550491333,grad_norm: 0.940994038099577, iteration: 242788
loss: 1.0944161415100098,grad_norm: 0.8002943827823567, iteration: 242789
loss: 1.0176055431365967,grad_norm: 0.8327151299487091, iteration: 242790
loss: 1.029784917831421,grad_norm: 0.7904496461009015, iteration: 242791
loss: 1.0185518264770508,grad_norm: 0.9999991827532386, iteration: 242792
loss: 1.0229679346084595,grad_norm: 0.9852463602766502, iteration: 242793
loss: 0.9803710579872131,grad_norm: 0.9563020977313564, iteration: 242794
loss: 0.9779164791107178,grad_norm: 0.8807188552747968, iteration: 242795
loss: 1.017374873161316,grad_norm: 0.7944594768340637, iteration: 242796
loss: 1.0060535669326782,grad_norm: 0.8831425018982888, iteration: 242797
loss: 1.005116581916809,grad_norm: 0.8037194918019852, iteration: 242798
loss: 0.9547210931777954,grad_norm: 0.864579914251601, iteration: 242799
loss: 0.9821200370788574,grad_norm: 0.8067770073022471, iteration: 242800
loss: 1.0240463018417358,grad_norm: 0.8659757491192039, iteration: 242801
loss: 0.9980296492576599,grad_norm: 0.9639405562288456, iteration: 242802
loss: 1.0507336854934692,grad_norm: 0.9229752013955712, iteration: 242803
loss: 0.9960583448410034,grad_norm: 0.972653560367267, iteration: 242804
loss: 0.9854007959365845,grad_norm: 0.9999991643677076, iteration: 242805
loss: 1.0939486026763916,grad_norm: 0.9067909375634148, iteration: 242806
loss: 1.0037614107131958,grad_norm: 0.8670791712027784, iteration: 242807
loss: 0.9955258369445801,grad_norm: 0.9999990849278863, iteration: 242808
loss: 0.9785784482955933,grad_norm: 0.8937070198094492, iteration: 242809
loss: 1.0225574970245361,grad_norm: 0.9475362591999906, iteration: 242810
loss: 1.0136207342147827,grad_norm: 0.881396390570854, iteration: 242811
loss: 1.05085289478302,grad_norm: 0.99999897171097, iteration: 242812
loss: 1.0024828910827637,grad_norm: 0.984465497865389, iteration: 242813
loss: 1.0486831665039062,grad_norm: 0.9662401666905843, iteration: 242814
loss: 1.01435387134552,grad_norm: 0.9999995391705834, iteration: 242815
loss: 1.0413706302642822,grad_norm: 0.8654326686725121, iteration: 242816
loss: 0.9816584587097168,grad_norm: 0.9824987585406003, iteration: 242817
loss: 0.9994434714317322,grad_norm: 0.8926814998963738, iteration: 242818
loss: 0.9937241673469543,grad_norm: 0.9999990863802322, iteration: 242819
loss: 1.0095008611679077,grad_norm: 0.8962051504379828, iteration: 242820
loss: 1.010043978691101,grad_norm: 0.9653037474889995, iteration: 242821
loss: 0.9997659921646118,grad_norm: 0.7827988323522598, iteration: 242822
loss: 0.9969422817230225,grad_norm: 0.8964482264178167, iteration: 242823
loss: 1.098630428314209,grad_norm: 0.9999999394515063, iteration: 242824
loss: 0.9912928342819214,grad_norm: 0.815832963983838, iteration: 242825
loss: 0.982690691947937,grad_norm: 0.9999995226197979, iteration: 242826
loss: 0.9772990942001343,grad_norm: 0.7643033593530397, iteration: 242827
loss: 0.9717679619789124,grad_norm: 0.9148416609812567, iteration: 242828
loss: 0.9870002269744873,grad_norm: 0.8565016938423543, iteration: 242829
loss: 0.9588943719863892,grad_norm: 0.8733315263156625, iteration: 242830
loss: 0.9828535914421082,grad_norm: 0.8428812373233522, iteration: 242831
loss: 1.006721019744873,grad_norm: 0.7957673942905897, iteration: 242832
loss: 1.0026558637619019,grad_norm: 0.9999994343762385, iteration: 242833
loss: 0.9989586472511292,grad_norm: 0.9999992015902673, iteration: 242834
loss: 1.034114122390747,grad_norm: 0.8051845437133077, iteration: 242835
loss: 1.0133084058761597,grad_norm: 0.8522536912526267, iteration: 242836
loss: 0.9969375133514404,grad_norm: 0.8482185056196258, iteration: 242837
loss: 0.9938601851463318,grad_norm: 0.9999992240257326, iteration: 242838
loss: 1.0123260021209717,grad_norm: 0.9102808958150819, iteration: 242839
loss: 0.9962838292121887,grad_norm: 0.999999048334665, iteration: 242840
loss: 1.0478450059890747,grad_norm: 0.8175064279625553, iteration: 242841
loss: 0.9761664271354675,grad_norm: 0.8988631019748593, iteration: 242842
loss: 0.9996828436851501,grad_norm: 0.9999990924192345, iteration: 242843
loss: 1.0370498895645142,grad_norm: 0.9684217680740954, iteration: 242844
loss: 0.9889392852783203,grad_norm: 0.8616943276618497, iteration: 242845
loss: 0.9873760938644409,grad_norm: 0.7711686903490033, iteration: 242846
loss: 1.0008361339569092,grad_norm: 0.8271276021929828, iteration: 242847
loss: 1.1079211235046387,grad_norm: 0.9999989818775467, iteration: 242848
loss: 1.0251024961471558,grad_norm: 0.9999989972726296, iteration: 242849
loss: 0.9810792803764343,grad_norm: 0.9705046645438268, iteration: 242850
loss: 0.997974157333374,grad_norm: 0.9999993423603868, iteration: 242851
loss: 1.0019599199295044,grad_norm: 0.8423953334859969, iteration: 242852
loss: 1.0024311542510986,grad_norm: 0.8975234153835591, iteration: 242853
loss: 0.9808079600334167,grad_norm: 0.8427243920433206, iteration: 242854
loss: 0.9835498929023743,grad_norm: 0.9955145287716952, iteration: 242855
loss: 1.0137938261032104,grad_norm: 0.9188373108094725, iteration: 242856
loss: 0.9781266450881958,grad_norm: 0.9304979327966817, iteration: 242857
loss: 1.0274457931518555,grad_norm: 0.9101655179461087, iteration: 242858
loss: 0.9730847477912903,grad_norm: 0.8314654438929123, iteration: 242859
loss: 0.9814210534095764,grad_norm: 0.9999989636467708, iteration: 242860
loss: 1.007148265838623,grad_norm: 0.9999992846063989, iteration: 242861
loss: 1.039828896522522,grad_norm: 0.9262109398449492, iteration: 242862
loss: 0.9873746037483215,grad_norm: 0.9106851789056799, iteration: 242863
loss: 1.0122478008270264,grad_norm: 0.7640781977883744, iteration: 242864
loss: 0.9690693616867065,grad_norm: 0.9003191342678912, iteration: 242865
loss: 1.0120967626571655,grad_norm: 0.9999990411486532, iteration: 242866
loss: 1.0320104360580444,grad_norm: 0.9999990290572869, iteration: 242867
loss: 0.9864761233329773,grad_norm: 0.9771773250666336, iteration: 242868
loss: 1.018256664276123,grad_norm: 0.9321942890576658, iteration: 242869
loss: 1.0481923818588257,grad_norm: 0.9999995846377, iteration: 242870
loss: 0.9989631772041321,grad_norm: 0.9999991336840366, iteration: 242871
loss: 0.9762462973594666,grad_norm: 0.9181323812253133, iteration: 242872
loss: 0.9981117844581604,grad_norm: 0.999999207403551, iteration: 242873
loss: 0.9855300784111023,grad_norm: 0.8379828299538489, iteration: 242874
loss: 1.0189136266708374,grad_norm: 0.9237263166141337, iteration: 242875
loss: 1.0059545040130615,grad_norm: 0.9105209241933714, iteration: 242876
loss: 1.0352210998535156,grad_norm: 0.9716558514756799, iteration: 242877
loss: 0.9867807030677795,grad_norm: 0.8929541141433082, iteration: 242878
loss: 1.0016142129898071,grad_norm: 0.8926875263928439, iteration: 242879
loss: 1.0244719982147217,grad_norm: 0.9999991437989545, iteration: 242880
loss: 1.0076117515563965,grad_norm: 0.9999992633050976, iteration: 242881
loss: 0.993732213973999,grad_norm: 0.7934024708326677, iteration: 242882
loss: 1.0133097171783447,grad_norm: 0.8722330670053501, iteration: 242883
loss: 0.984652578830719,grad_norm: 0.9542602957553442, iteration: 242884
loss: 0.9848247766494751,grad_norm: 0.7958087645145381, iteration: 242885
loss: 1.030137062072754,grad_norm: 0.8947285113219225, iteration: 242886
loss: 1.0321030616760254,grad_norm: 0.9999997968931883, iteration: 242887
loss: 1.022477388381958,grad_norm: 0.9832046600772543, iteration: 242888
loss: 1.002878189086914,grad_norm: 0.918082946822043, iteration: 242889
loss: 0.9649748206138611,grad_norm: 0.8612237594976477, iteration: 242890
loss: 0.9815516471862793,grad_norm: 0.8706094044275972, iteration: 242891
loss: 0.9984915852546692,grad_norm: 0.9254752000865749, iteration: 242892
loss: 0.981478750705719,grad_norm: 0.8809203108512179, iteration: 242893
loss: 0.9996697902679443,grad_norm: 0.7615977609803276, iteration: 242894
loss: 1.0182470083236694,grad_norm: 0.9999992276203253, iteration: 242895
loss: 0.9945305585861206,grad_norm: 0.7830816335873839, iteration: 242896
loss: 1.0064105987548828,grad_norm: 0.9699613625643778, iteration: 242897
loss: 0.9924794435501099,grad_norm: 0.9755252747296473, iteration: 242898
loss: 0.9721072912216187,grad_norm: 0.999998992197379, iteration: 242899
loss: 1.0166014432907104,grad_norm: 0.8971017205356583, iteration: 242900
loss: 0.99383145570755,grad_norm: 0.8136736788225585, iteration: 242901
loss: 1.0006431341171265,grad_norm: 0.9816684176835931, iteration: 242902
loss: 1.0035394430160522,grad_norm: 0.8139378986596971, iteration: 242903
loss: 1.0244693756103516,grad_norm: 0.9839190749054169, iteration: 242904
loss: 0.9933106303215027,grad_norm: 0.9999991519762703, iteration: 242905
loss: 0.9825921058654785,grad_norm: 0.8306148905475071, iteration: 242906
loss: 1.0174695253372192,grad_norm: 0.879049858727801, iteration: 242907
loss: 1.0289145708084106,grad_norm: 0.9120275259892793, iteration: 242908
loss: 1.013226866722107,grad_norm: 0.7967783715766108, iteration: 242909
loss: 0.9969391822814941,grad_norm: 0.8876499292791595, iteration: 242910
loss: 0.9922425746917725,grad_norm: 0.9999992346823626, iteration: 242911
loss: 0.992009699344635,grad_norm: 0.9158812680693291, iteration: 242912
loss: 0.9571884870529175,grad_norm: 0.8333587394982362, iteration: 242913
loss: 1.0033491849899292,grad_norm: 0.8783330814294549, iteration: 242914
loss: 1.0291996002197266,grad_norm: 0.7930974737973432, iteration: 242915
loss: 1.0105782747268677,grad_norm: 0.9999990793072278, iteration: 242916
loss: 0.976395845413208,grad_norm: 0.9999991053148652, iteration: 242917
loss: 1.001582384109497,grad_norm: 0.8690950893040537, iteration: 242918
loss: 0.9821963310241699,grad_norm: 0.9569603342133652, iteration: 242919
loss: 1.035081386566162,grad_norm: 0.8500391498815267, iteration: 242920
loss: 0.9857227206230164,grad_norm: 0.8494498942321755, iteration: 242921
loss: 1.035607933998108,grad_norm: 0.8670781071004168, iteration: 242922
loss: 1.0200458765029907,grad_norm: 0.799681479583477, iteration: 242923
loss: 0.9913687109947205,grad_norm: 0.927142862958695, iteration: 242924
loss: 0.994401216506958,grad_norm: 0.8602285086476038, iteration: 242925
loss: 1.015189528465271,grad_norm: 0.9011852476998502, iteration: 242926
loss: 1.079722285270691,grad_norm: 0.9924981760479629, iteration: 242927
loss: 0.9953445792198181,grad_norm: 0.9999990385664175, iteration: 242928
loss: 0.9892444014549255,grad_norm: 0.8437057404772041, iteration: 242929
loss: 0.9912644028663635,grad_norm: 0.8471147144142317, iteration: 242930
loss: 0.9844909310340881,grad_norm: 0.7932254014856799, iteration: 242931
loss: 0.9564046263694763,grad_norm: 0.9569123254706242, iteration: 242932
loss: 0.9939990639686584,grad_norm: 0.9200718557168321, iteration: 242933
loss: 0.9810726046562195,grad_norm: 0.7827560346378629, iteration: 242934
loss: 1.0231844186782837,grad_norm: 0.9999989768071761, iteration: 242935
loss: 0.9883211255073547,grad_norm: 0.7369260951188558, iteration: 242936
loss: 0.969818115234375,grad_norm: 0.8924246644262942, iteration: 242937
loss: 0.9997261762619019,grad_norm: 0.9239805740095024, iteration: 242938
loss: 0.9824776649475098,grad_norm: 0.8625259457406025, iteration: 242939
loss: 1.0042911767959595,grad_norm: 0.9368396385296309, iteration: 242940
loss: 1.0227450132369995,grad_norm: 0.9401638197044058, iteration: 242941
loss: 1.0241360664367676,grad_norm: 0.7313964998548489, iteration: 242942
loss: 1.0081980228424072,grad_norm: 0.7157814016798587, iteration: 242943
loss: 1.0048763751983643,grad_norm: 0.9999995117985713, iteration: 242944
loss: 1.0100879669189453,grad_norm: 0.9999993627961168, iteration: 242945
loss: 1.0338588953018188,grad_norm: 0.873802295788747, iteration: 242946
loss: 0.991178035736084,grad_norm: 0.9118423768886436, iteration: 242947
loss: 0.973621666431427,grad_norm: 0.7844614118060225, iteration: 242948
loss: 1.000091314315796,grad_norm: 0.9999988909078479, iteration: 242949
loss: 0.9954783320426941,grad_norm: 0.9999991503780777, iteration: 242950
loss: 1.0271062850952148,grad_norm: 0.9999992775347439, iteration: 242951
loss: 1.0395393371582031,grad_norm: 0.9999994243766707, iteration: 242952
loss: 1.0077916383743286,grad_norm: 0.8707910258972037, iteration: 242953
loss: 1.00761079788208,grad_norm: 0.9999995772253222, iteration: 242954
loss: 1.010879397392273,grad_norm: 0.9739225997386685, iteration: 242955
loss: 1.0139038562774658,grad_norm: 0.9999992008162751, iteration: 242956
loss: 0.9804974794387817,grad_norm: 0.9470116684948549, iteration: 242957
loss: 0.9744200110435486,grad_norm: 0.9558263030573448, iteration: 242958
loss: 0.9533395171165466,grad_norm: 0.999999523424312, iteration: 242959
loss: 0.9976174235343933,grad_norm: 0.9835132423383828, iteration: 242960
loss: 1.0287622213363647,grad_norm: 0.9999990989754107, iteration: 242961
loss: 1.022445559501648,grad_norm: 0.8685257221025808, iteration: 242962
loss: 1.028052568435669,grad_norm: 0.8966096366691769, iteration: 242963
loss: 1.0043622255325317,grad_norm: 0.999999010758469, iteration: 242964
loss: 1.008000373840332,grad_norm: 0.9999990282680401, iteration: 242965
loss: 0.9662073850631714,grad_norm: 0.9999992251169209, iteration: 242966
loss: 1.0068186521530151,grad_norm: 0.8849441887204677, iteration: 242967
loss: 0.9641613960266113,grad_norm: 0.8646039542101772, iteration: 242968
loss: 0.9959423542022705,grad_norm: 0.8098318179252559, iteration: 242969
loss: 0.9807161688804626,grad_norm: 0.999999178431205, iteration: 242970
loss: 0.9933445453643799,grad_norm: 0.882976451495846, iteration: 242971
loss: 1.002956509590149,grad_norm: 0.9180237568378332, iteration: 242972
loss: 1.0328259468078613,grad_norm: 0.9630722440207278, iteration: 242973
loss: 1.0305583477020264,grad_norm: 0.9999992726423695, iteration: 242974
loss: 1.0168037414550781,grad_norm: 0.9378750486969788, iteration: 242975
loss: 0.9639526605606079,grad_norm: 0.7331418257624105, iteration: 242976
loss: 1.0064218044281006,grad_norm: 0.8660827540383058, iteration: 242977
loss: 0.9821234941482544,grad_norm: 0.8918469003097668, iteration: 242978
loss: 1.0057913064956665,grad_norm: 0.9999991470298801, iteration: 242979
loss: 0.9981837272644043,grad_norm: 0.9986635307195659, iteration: 242980
loss: 1.01103937625885,grad_norm: 0.8565118717871525, iteration: 242981
loss: 1.012204885482788,grad_norm: 0.9334486133614677, iteration: 242982
loss: 0.9730929136276245,grad_norm: 0.9999992092820285, iteration: 242983
loss: 1.002242922782898,grad_norm: 0.9123495830484181, iteration: 242984
loss: 0.9656242728233337,grad_norm: 0.8905774335752985, iteration: 242985
loss: 0.9882109761238098,grad_norm: 0.973098433498786, iteration: 242986
loss: 1.0055782794952393,grad_norm: 0.999999113089273, iteration: 242987
loss: 0.966610848903656,grad_norm: 0.9665373530495279, iteration: 242988
loss: 1.0241285562515259,grad_norm: 0.8954121902492349, iteration: 242989
loss: 1.0134670734405518,grad_norm: 0.7348778092068755, iteration: 242990
loss: 0.998538613319397,grad_norm: 0.8761083849827238, iteration: 242991
loss: 0.9757421016693115,grad_norm: 0.9070524516325855, iteration: 242992
loss: 1.0801100730895996,grad_norm: 0.9999992722225741, iteration: 242993
loss: 0.9939165115356445,grad_norm: 0.9999989577479653, iteration: 242994
loss: 0.9703208208084106,grad_norm: 0.8525104624838336, iteration: 242995
loss: 1.0242494344711304,grad_norm: 0.9999991010588222, iteration: 242996
loss: 0.9943811893463135,grad_norm: 0.7323577027905731, iteration: 242997
loss: 1.016575574874878,grad_norm: 0.9999992366140691, iteration: 242998
loss: 1.0112892389297485,grad_norm: 0.9663467078738808, iteration: 242999
loss: 0.9956862926483154,grad_norm: 0.9767455321409162, iteration: 243000
loss: 1.0067106485366821,grad_norm: 0.9879517392420063, iteration: 243001
loss: 0.9819852113723755,grad_norm: 0.8220830433231796, iteration: 243002
loss: 0.9556028246879578,grad_norm: 0.8710485401503074, iteration: 243003
loss: 0.9962526559829712,grad_norm: 0.9999989796811678, iteration: 243004
loss: 1.0118770599365234,grad_norm: 0.9932148718372237, iteration: 243005
loss: 0.989753782749176,grad_norm: 0.7864737561825297, iteration: 243006
loss: 1.007109522819519,grad_norm: 0.8722522270903141, iteration: 243007
loss: 0.9944358468055725,grad_norm: 0.9999990889395903, iteration: 243008
loss: 1.0452805757522583,grad_norm: 0.9058393370053176, iteration: 243009
loss: 0.9989696145057678,grad_norm: 0.8181838150207285, iteration: 243010
loss: 1.0043584108352661,grad_norm: 0.9999989657706126, iteration: 243011
loss: 0.9725602865219116,grad_norm: 0.8984692959748072, iteration: 243012
loss: 1.0038158893585205,grad_norm: 0.835749849208145, iteration: 243013
loss: 1.0256980657577515,grad_norm: 0.8177106500791139, iteration: 243014
loss: 1.0110901594161987,grad_norm: 0.8370785397064371, iteration: 243015
loss: 1.0029006004333496,grad_norm: 0.7782480592376773, iteration: 243016
loss: 1.0084054470062256,grad_norm: 0.8031981701326651, iteration: 243017
loss: 1.0014982223510742,grad_norm: 0.8242022388626853, iteration: 243018
loss: 1.0210462808609009,grad_norm: 0.7833717927112673, iteration: 243019
loss: 0.9973224401473999,grad_norm: 0.8210972903572522, iteration: 243020
loss: 0.9804514646530151,grad_norm: 0.7875930431821873, iteration: 243021
loss: 1.000168800354004,grad_norm: 0.8278607653308014, iteration: 243022
loss: 0.9918087124824524,grad_norm: 0.962379798238669, iteration: 243023
loss: 0.9915164709091187,grad_norm: 0.973656991123652, iteration: 243024
loss: 1.0262551307678223,grad_norm: 0.9999990902302603, iteration: 243025
loss: 1.0151704549789429,grad_norm: 0.9999993903230726, iteration: 243026
loss: 0.9956165552139282,grad_norm: 0.8223603453266275, iteration: 243027
loss: 0.9911779165267944,grad_norm: 0.9999990367716979, iteration: 243028
loss: 1.025541067123413,grad_norm: 0.8330785074505579, iteration: 243029
loss: 0.9900552034378052,grad_norm: 0.9137728496911979, iteration: 243030
loss: 1.0050125122070312,grad_norm: 0.9999990744908602, iteration: 243031
loss: 1.0099895000457764,grad_norm: 0.9999991238739877, iteration: 243032
loss: 1.0217220783233643,grad_norm: 0.9999990325874711, iteration: 243033
loss: 1.0155478715896606,grad_norm: 0.8843456529577604, iteration: 243034
loss: 0.9970991015434265,grad_norm: 0.969818317081575, iteration: 243035
loss: 0.9948463439941406,grad_norm: 0.8487269767479075, iteration: 243036
loss: 0.9753555059432983,grad_norm: 0.9386056932284434, iteration: 243037
loss: 0.9643359184265137,grad_norm: 0.8049474073864998, iteration: 243038
loss: 1.053317904472351,grad_norm: 0.9671659458903087, iteration: 243039
loss: 0.9825760126113892,grad_norm: 0.7817726363538885, iteration: 243040
loss: 1.0135540962219238,grad_norm: 0.9999995007027009, iteration: 243041
loss: 0.9991874098777771,grad_norm: 0.999999046621938, iteration: 243042
loss: 0.9784287214279175,grad_norm: 0.9414836920434612, iteration: 243043
loss: 0.9751647710800171,grad_norm: 0.8598764891404257, iteration: 243044
loss: 1.0319266319274902,grad_norm: 0.7377531220005823, iteration: 243045
loss: 1.0241732597351074,grad_norm: 0.7605489509657871, iteration: 243046
loss: 1.0035507678985596,grad_norm: 0.8784561050124118, iteration: 243047
loss: 0.9940693378448486,grad_norm: 0.9528069156442056, iteration: 243048
loss: 0.9601683020591736,grad_norm: 0.99999913255967, iteration: 243049
loss: 0.9527274966239929,grad_norm: 0.9240026314483272, iteration: 243050
loss: 0.9945271015167236,grad_norm: 0.8850331920733294, iteration: 243051
loss: 0.9499261975288391,grad_norm: 0.9560536722154251, iteration: 243052
loss: 1.0075595378875732,grad_norm: 0.9466450604827985, iteration: 243053
loss: 0.9952573180198669,grad_norm: 0.8157674258668276, iteration: 243054
loss: 0.996525228023529,grad_norm: 0.8512705159970776, iteration: 243055
loss: 1.0247026681900024,grad_norm: 0.8925600239487648, iteration: 243056
loss: 0.9928733110427856,grad_norm: 0.9804853546266169, iteration: 243057
loss: 1.0426944494247437,grad_norm: 0.8740696714296936, iteration: 243058
loss: 0.9983792304992676,grad_norm: 0.7323912441163646, iteration: 243059
loss: 0.9831844568252563,grad_norm: 0.930511142938641, iteration: 243060
loss: 0.9923450946807861,grad_norm: 0.981082355234716, iteration: 243061
loss: 0.9966294169425964,grad_norm: 0.9465112220216777, iteration: 243062
loss: 0.9885898232460022,grad_norm: 0.8387305410626356, iteration: 243063
loss: 0.9842841625213623,grad_norm: 0.9630368637191459, iteration: 243064
loss: 1.0173031091690063,grad_norm: 0.9091692245396412, iteration: 243065
loss: 0.9715184569358826,grad_norm: 0.7763190711312528, iteration: 243066
loss: 1.001584529876709,grad_norm: 0.9580906752388832, iteration: 243067
loss: 1.0179495811462402,grad_norm: 0.9001639645342479, iteration: 243068
loss: 1.0108095407485962,grad_norm: 0.7974532011130778, iteration: 243069
loss: 0.9806505441665649,grad_norm: 0.8227625871625496, iteration: 243070
loss: 1.0228610038757324,grad_norm: 0.9247535147322855, iteration: 243071
loss: 1.011658787727356,grad_norm: 0.8115336091745998, iteration: 243072
loss: 0.9556995034217834,grad_norm: 0.8545051595594665, iteration: 243073
loss: 1.0485919713974,grad_norm: 0.9999991084586528, iteration: 243074
loss: 0.9778937697410583,grad_norm: 0.9206885924486077, iteration: 243075
loss: 1.0827044248580933,grad_norm: 0.9240724434439612, iteration: 243076
loss: 1.0005995035171509,grad_norm: 0.9283314489090492, iteration: 243077
loss: 0.9495267271995544,grad_norm: 0.8268375544213964, iteration: 243078
loss: 0.9952926635742188,grad_norm: 0.8011024235739794, iteration: 243079
loss: 0.9859142303466797,grad_norm: 0.8840069926790598, iteration: 243080
loss: 0.9693835973739624,grad_norm: 0.84326524261062, iteration: 243081
loss: 0.9947690367698669,grad_norm: 0.9999991138803953, iteration: 243082
loss: 0.978316605091095,grad_norm: 0.843145120745258, iteration: 243083
loss: 1.0035130977630615,grad_norm: 0.8744855817601196, iteration: 243084
loss: 1.0196564197540283,grad_norm: 0.9715872424330269, iteration: 243085
loss: 0.9996649622917175,grad_norm: 0.9858211176887013, iteration: 243086
loss: 0.9997825026512146,grad_norm: 0.9002450032110586, iteration: 243087
loss: 0.9765295386314392,grad_norm: 0.764882902405507, iteration: 243088
loss: 1.003777027130127,grad_norm: 0.8073655677186156, iteration: 243089
loss: 1.076225996017456,grad_norm: 0.9999996555351524, iteration: 243090
loss: 0.9932329654693604,grad_norm: 0.8901878737388234, iteration: 243091
loss: 1.0000109672546387,grad_norm: 0.999999001624867, iteration: 243092
loss: 1.0004132986068726,grad_norm: 0.8773350854279917, iteration: 243093
loss: 1.0441495180130005,grad_norm: 0.8534261057427484, iteration: 243094
loss: 1.0560510158538818,grad_norm: 0.999999856201167, iteration: 243095
loss: 0.9711706638336182,grad_norm: 0.9778251969126265, iteration: 243096
loss: 1.0055559873580933,grad_norm: 0.99792217787507, iteration: 243097
loss: 1.0060617923736572,grad_norm: 0.8802642122780489, iteration: 243098
loss: 0.9884713292121887,grad_norm: 0.9999999633315054, iteration: 243099
loss: 0.9797636270523071,grad_norm: 0.8630347275776019, iteration: 243100
loss: 0.9728230237960815,grad_norm: 0.8397882927695319, iteration: 243101
loss: 0.9681435227394104,grad_norm: 0.7717157137801781, iteration: 243102
loss: 1.013169527053833,grad_norm: 0.9500581124301762, iteration: 243103
loss: 1.025408148765564,grad_norm: 0.9187343935549648, iteration: 243104
loss: 1.011401891708374,grad_norm: 0.9821834630388137, iteration: 243105
loss: 0.9778318405151367,grad_norm: 0.9999992774502512, iteration: 243106
loss: 1.0164158344268799,grad_norm: 0.9999989897874343, iteration: 243107
loss: 0.9852376580238342,grad_norm: 0.8374411326063304, iteration: 243108
loss: 1.0107837915420532,grad_norm: 0.9256698155449401, iteration: 243109
loss: 1.0632437467575073,grad_norm: 0.9999991623494869, iteration: 243110
loss: 0.996093213558197,grad_norm: 0.900573640320894, iteration: 243111
loss: 0.9529948234558105,grad_norm: 0.9237924439029065, iteration: 243112
loss: 1.0011132955551147,grad_norm: 0.9436728690664894, iteration: 243113
loss: 0.9423595666885376,grad_norm: 0.9037443250426918, iteration: 243114
loss: 1.0247994661331177,grad_norm: 0.8795026536702685, iteration: 243115
loss: 0.9910145401954651,grad_norm: 0.9611271626416377, iteration: 243116
loss: 0.9743005037307739,grad_norm: 0.9560443127676937, iteration: 243117
loss: 1.0115714073181152,grad_norm: 0.9999991909418918, iteration: 243118
loss: 0.9663864374160767,grad_norm: 0.9999990905163746, iteration: 243119
loss: 1.0259904861450195,grad_norm: 0.9058816380431626, iteration: 243120
loss: 0.9841101765632629,grad_norm: 0.8793290580991546, iteration: 243121
loss: 0.9926191568374634,grad_norm: 0.8530430736815601, iteration: 243122
loss: 1.0333716869354248,grad_norm: 0.9371318717904846, iteration: 243123
loss: 1.0105525255203247,grad_norm: 0.7935266965371147, iteration: 243124
loss: 0.997391402721405,grad_norm: 0.8679643544554485, iteration: 243125
loss: 1.0208086967468262,grad_norm: 0.9933414465864077, iteration: 243126
loss: 0.9936734437942505,grad_norm: 0.9999990299410396, iteration: 243127
loss: 0.9772388339042664,grad_norm: 0.999999033603647, iteration: 243128
loss: 0.9880899786949158,grad_norm: 0.8193654541180745, iteration: 243129
loss: 1.049296498298645,grad_norm: 0.9999993275336686, iteration: 243130
loss: 1.01054048538208,grad_norm: 0.8769091630977173, iteration: 243131
loss: 1.0011475086212158,grad_norm: 0.8998442206984179, iteration: 243132
loss: 0.9692254662513733,grad_norm: 0.9999990932900241, iteration: 243133
loss: 1.0148311853408813,grad_norm: 0.8838549704283307, iteration: 243134
loss: 0.9728015065193176,grad_norm: 0.8573683577363957, iteration: 243135
loss: 1.0000611543655396,grad_norm: 0.8830866231985642, iteration: 243136
loss: 1.0167522430419922,grad_norm: 0.927288367707366, iteration: 243137
loss: 0.9820308685302734,grad_norm: 0.8928909048405234, iteration: 243138
loss: 0.9702875018119812,grad_norm: 0.9332091542384644, iteration: 243139
loss: 0.9935913681983948,grad_norm: 0.8051496762286378, iteration: 243140
loss: 1.0242466926574707,grad_norm: 0.988855754396313, iteration: 243141
loss: 1.055301308631897,grad_norm: 0.9596428187080774, iteration: 243142
loss: 0.9733552932739258,grad_norm: 0.7956999870548882, iteration: 243143
loss: 0.9835771322250366,grad_norm: 0.974354688289486, iteration: 243144
loss: 0.9958783388137817,grad_norm: 0.882674105407162, iteration: 243145
loss: 1.0050395727157593,grad_norm: 0.9999991437839547, iteration: 243146
loss: 0.9871035814285278,grad_norm: 0.6860145445663812, iteration: 243147
loss: 0.999667763710022,grad_norm: 0.8374534599927294, iteration: 243148
loss: 1.0186054706573486,grad_norm: 0.8394841747208931, iteration: 243149
loss: 1.050569772720337,grad_norm: 0.8879975208478127, iteration: 243150
loss: 1.0105412006378174,grad_norm: 0.8398295940038265, iteration: 243151
loss: 0.9860990643501282,grad_norm: 0.8830722402952672, iteration: 243152
loss: 1.0051096677780151,grad_norm: 0.9999990115640064, iteration: 243153
loss: 1.0142484903335571,grad_norm: 0.988691891369726, iteration: 243154
loss: 0.9999597072601318,grad_norm: 0.8217128317903701, iteration: 243155
loss: 1.0187275409698486,grad_norm: 0.9427867908011144, iteration: 243156
loss: 1.00918710231781,grad_norm: 0.728431474440267, iteration: 243157
loss: 0.962402880191803,grad_norm: 0.8955760677789392, iteration: 243158
loss: 1.0094174146652222,grad_norm: 0.9999992620839813, iteration: 243159
loss: 0.970621645450592,grad_norm: 0.7749519498919663, iteration: 243160
loss: 0.9767961502075195,grad_norm: 0.7964031798211627, iteration: 243161
loss: 0.9672691226005554,grad_norm: 0.8978935767062037, iteration: 243162
loss: 1.0066397190093994,grad_norm: 0.9448577391675492, iteration: 243163
loss: 0.9904198050498962,grad_norm: 0.9999991268873863, iteration: 243164
loss: 1.0225565433502197,grad_norm: 0.9999994296413418, iteration: 243165
loss: 1.0177136659622192,grad_norm: 0.9040251099358296, iteration: 243166
loss: 0.9972161650657654,grad_norm: 0.7646519147154117, iteration: 243167
loss: 1.0552738904953003,grad_norm: 0.8859946346403235, iteration: 243168
loss: 0.9796197414398193,grad_norm: 0.8826260794282802, iteration: 243169
loss: 0.9832786321640015,grad_norm: 0.8794262342011154, iteration: 243170
loss: 1.0034691095352173,grad_norm: 0.9999991167543038, iteration: 243171
loss: 0.9873257279396057,grad_norm: 0.9999990404079439, iteration: 243172
loss: 1.0174614191055298,grad_norm: 0.8360423139858566, iteration: 243173
loss: 0.9804914593696594,grad_norm: 0.7285756912775431, iteration: 243174
loss: 1.017014741897583,grad_norm: 0.9999991535815104, iteration: 243175
loss: 0.9856328964233398,grad_norm: 0.9795605298417006, iteration: 243176
loss: 0.9940879940986633,grad_norm: 0.9797382138785649, iteration: 243177
loss: 0.9992817640304565,grad_norm: 0.9999992021469173, iteration: 243178
loss: 1.0130524635314941,grad_norm: 0.8933186182290164, iteration: 243179
loss: 0.9384362697601318,grad_norm: 0.9406018587529112, iteration: 243180
loss: 1.0389697551727295,grad_norm: 0.9999991140537787, iteration: 243181
loss: 0.9995500445365906,grad_norm: 0.9044143348518762, iteration: 243182
loss: 0.9571119546890259,grad_norm: 0.8075802240062425, iteration: 243183
loss: 1.005821943283081,grad_norm: 0.8430803282552024, iteration: 243184
loss: 1.03451669216156,grad_norm: 0.8841539805029826, iteration: 243185
loss: 1.0792466402053833,grad_norm: 0.9999993507890601, iteration: 243186
loss: 0.9954903721809387,grad_norm: 0.8729364925625517, iteration: 243187
loss: 1.008814811706543,grad_norm: 0.8908657195274264, iteration: 243188
loss: 1.0381721258163452,grad_norm: 0.9506005531765436, iteration: 243189
loss: 1.04411780834198,grad_norm: 0.8744416851906311, iteration: 243190
loss: 0.9969761967658997,grad_norm: 0.8261734458267661, iteration: 243191
loss: 1.0246915817260742,grad_norm: 0.9956530929858765, iteration: 243192
loss: 0.9831008911132812,grad_norm: 0.9615211448618378, iteration: 243193
loss: 1.009089469909668,grad_norm: 0.999999087947545, iteration: 243194
loss: 0.983483612537384,grad_norm: 0.8715341385232884, iteration: 243195
loss: 0.9565616250038147,grad_norm: 0.9999990689854653, iteration: 243196
loss: 1.0033953189849854,grad_norm: 0.9028037304453459, iteration: 243197
loss: 0.957055926322937,grad_norm: 0.8525673865874189, iteration: 243198
loss: 0.9856263995170593,grad_norm: 0.9698143223127328, iteration: 243199
loss: 1.017005205154419,grad_norm: 0.8725354553523986, iteration: 243200
loss: 1.008832573890686,grad_norm: 0.9999995369365172, iteration: 243201
loss: 1.0157657861709595,grad_norm: 0.7078352918283, iteration: 243202
loss: 0.9930237531661987,grad_norm: 0.7770578715651236, iteration: 243203
loss: 1.0375866889953613,grad_norm: 0.9977369191503777, iteration: 243204
loss: 1.000828742980957,grad_norm: 0.865829624827275, iteration: 243205
loss: 0.9808729290962219,grad_norm: 0.806503992055508, iteration: 243206
loss: 0.996845006942749,grad_norm: 0.852750134880954, iteration: 243207
loss: 1.022402286529541,grad_norm: 0.9999993516717021, iteration: 243208
loss: 0.994385302066803,grad_norm: 0.999999087906582, iteration: 243209
loss: 0.9735731482505798,grad_norm: 0.9534963828941418, iteration: 243210
loss: 1.0463162660598755,grad_norm: 0.9871009636826635, iteration: 243211
loss: 0.9858019351959229,grad_norm: 0.886630261894016, iteration: 243212
loss: 0.9962921142578125,grad_norm: 0.9219427382910482, iteration: 243213
loss: 1.0015524625778198,grad_norm: 0.9198376570416369, iteration: 243214
loss: 1.0081477165222168,grad_norm: 0.7696338228561871, iteration: 243215
loss: 1.0018600225448608,grad_norm: 0.9520458517880069, iteration: 243216
loss: 1.030908226966858,grad_norm: 0.9999998728350351, iteration: 243217
loss: 1.016599416732788,grad_norm: 0.9999992941497986, iteration: 243218
loss: 1.1038955450057983,grad_norm: 0.9687787465497426, iteration: 243219
loss: 1.0086919069290161,grad_norm: 0.8185362750659176, iteration: 243220
loss: 0.9732429385185242,grad_norm: 0.9374111211558332, iteration: 243221
loss: 0.9984514713287354,grad_norm: 0.9223423054129392, iteration: 243222
loss: 1.007581114768982,grad_norm: 0.9999993637994498, iteration: 243223
loss: 1.0076290369033813,grad_norm: 0.9936818078861575, iteration: 243224
loss: 1.0179287195205688,grad_norm: 0.8059037589764219, iteration: 243225
loss: 1.0165984630584717,grad_norm: 0.8616607424805338, iteration: 243226
loss: 0.9672321677207947,grad_norm: 0.924235691394869, iteration: 243227
loss: 1.0387762784957886,grad_norm: 0.9999993437555161, iteration: 243228
loss: 1.0236620903015137,grad_norm: 0.9999990651864568, iteration: 243229
loss: 0.9949042797088623,grad_norm: 0.9010589088692192, iteration: 243230
loss: 0.997951865196228,grad_norm: 0.8830588669881191, iteration: 243231
loss: 1.009470820426941,grad_norm: 0.8779932618737444, iteration: 243232
loss: 0.9690157175064087,grad_norm: 0.8375825277349996, iteration: 243233
loss: 1.028019666671753,grad_norm: 0.9933013897714681, iteration: 243234
loss: 0.9797516465187073,grad_norm: 0.9161648398355554, iteration: 243235
loss: 1.038779377937317,grad_norm: 0.9377418393731223, iteration: 243236
loss: 0.9906712770462036,grad_norm: 0.932523456416833, iteration: 243237
loss: 1.043163537979126,grad_norm: 0.9999992277934066, iteration: 243238
loss: 1.0042366981506348,grad_norm: 0.9595425143659917, iteration: 243239
loss: 0.9828882813453674,grad_norm: 0.7975777278806568, iteration: 243240
loss: 0.9989158511161804,grad_norm: 0.8300898388936844, iteration: 243241
loss: 1.0050007104873657,grad_norm: 0.9826340938460025, iteration: 243242
loss: 1.0003503561019897,grad_norm: 0.9013869294167134, iteration: 243243
loss: 1.0042989253997803,grad_norm: 0.9999991147729496, iteration: 243244
loss: 1.018794298171997,grad_norm: 0.8831275532638608, iteration: 243245
loss: 0.9904662370681763,grad_norm: 0.9080356547742108, iteration: 243246
loss: 1.0007336139678955,grad_norm: 0.9999991854646461, iteration: 243247
loss: 0.9585412740707397,grad_norm: 0.8220241238601632, iteration: 243248
loss: 1.2686717510223389,grad_norm: 0.9999998200459771, iteration: 243249
loss: 0.9644916653633118,grad_norm: 0.9999990421448796, iteration: 243250
loss: 0.9753586649894714,grad_norm: 0.8300107665914404, iteration: 243251
loss: 0.9997619390487671,grad_norm: 0.9118487056558187, iteration: 243252
loss: 1.02312171459198,grad_norm: 0.8161243981396604, iteration: 243253
loss: 1.0448030233383179,grad_norm: 0.9754373665576161, iteration: 243254
loss: 1.012531042098999,grad_norm: 0.8393917087613056, iteration: 243255
loss: 1.0275219678878784,grad_norm: 0.999999078454434, iteration: 243256
loss: 0.995035707950592,grad_norm: 0.9037952731164218, iteration: 243257
loss: 0.9590604901313782,grad_norm: 0.7983945195634642, iteration: 243258
loss: 1.0053192377090454,grad_norm: 0.818387552511365, iteration: 243259
loss: 1.0242927074432373,grad_norm: 0.8588800730839137, iteration: 243260
loss: 0.9876862168312073,grad_norm: 0.9999989823483068, iteration: 243261
loss: 0.9610527753829956,grad_norm: 0.9246898929903946, iteration: 243262
loss: 1.0100466012954712,grad_norm: 0.8061909167254376, iteration: 243263
loss: 1.039386510848999,grad_norm: 0.9999995285709687, iteration: 243264
loss: 0.9487358927726746,grad_norm: 0.8571498856736741, iteration: 243265
loss: 0.9984614253044128,grad_norm: 0.8489255876811819, iteration: 243266
loss: 0.9729968905448914,grad_norm: 0.6700959870649783, iteration: 243267
loss: 1.0176223516464233,grad_norm: 0.9999997049237661, iteration: 243268
loss: 0.9682472348213196,grad_norm: 0.9612833975874364, iteration: 243269
loss: 0.9713701605796814,grad_norm: 0.8972137763175183, iteration: 243270
loss: 0.998016893863678,grad_norm: 0.9953501019161177, iteration: 243271
loss: 0.9663534760475159,grad_norm: 0.8098908000551787, iteration: 243272
loss: 1.0023736953735352,grad_norm: 0.8641429023524806, iteration: 243273
loss: 0.9837154150009155,grad_norm: 0.8858940213647396, iteration: 243274
loss: 0.99446040391922,grad_norm: 0.9458252858286075, iteration: 243275
loss: 0.9729905128479004,grad_norm: 0.8602908488149655, iteration: 243276
loss: 0.9955303072929382,grad_norm: 0.9999990779645088, iteration: 243277
loss: 1.0444705486297607,grad_norm: 0.9999989162933329, iteration: 243278
loss: 1.0130584239959717,grad_norm: 0.8800220931753817, iteration: 243279
loss: 0.9845454692840576,grad_norm: 0.8088020908333982, iteration: 243280
loss: 0.980932891368866,grad_norm: 0.9692392886752518, iteration: 243281
loss: 1.002755880355835,grad_norm: 0.993368412150061, iteration: 243282
loss: 0.9754605889320374,grad_norm: 0.9199096307722991, iteration: 243283
loss: 1.0124536752700806,grad_norm: 0.9999994918772097, iteration: 243284
loss: 0.9211415648460388,grad_norm: 0.920921919550415, iteration: 243285
loss: 1.0006463527679443,grad_norm: 0.9706329768850459, iteration: 243286
loss: 1.0211079120635986,grad_norm: 0.9999990925172516, iteration: 243287
loss: 1.0697829723358154,grad_norm: 0.8993261325444932, iteration: 243288
loss: 0.9900414943695068,grad_norm: 0.8162839380017056, iteration: 243289
loss: 1.0076656341552734,grad_norm: 0.8656263758874901, iteration: 243290
loss: 1.0169153213500977,grad_norm: 0.9357224890183008, iteration: 243291
loss: 0.9962273240089417,grad_norm: 0.9333844621412556, iteration: 243292
loss: 1.0033599138259888,grad_norm: 0.9999993677674168, iteration: 243293
loss: 1.0166114568710327,grad_norm: 0.9999992465120564, iteration: 243294
loss: 1.0220277309417725,grad_norm: 0.8222640181767564, iteration: 243295
loss: 0.9824475646018982,grad_norm: 0.9485274224095345, iteration: 243296
loss: 0.9850276708602905,grad_norm: 0.8985585329728232, iteration: 243297
loss: 1.0301340818405151,grad_norm: 0.873788043414046, iteration: 243298
loss: 1.022322416305542,grad_norm: 0.8375044484724405, iteration: 243299
loss: 1.2786493301391602,grad_norm: 0.9999992456616835, iteration: 243300
loss: 1.035475492477417,grad_norm: 0.7669371577671262, iteration: 243301
loss: 0.9549942016601562,grad_norm: 0.7862370287574834, iteration: 243302
loss: 1.0058318376541138,grad_norm: 0.9999990100796556, iteration: 243303
loss: 0.9852167367935181,grad_norm: 0.7948450194424718, iteration: 243304
loss: 1.0271769762039185,grad_norm: 0.8674324923831893, iteration: 243305
loss: 1.0024439096450806,grad_norm: 0.9999996199697595, iteration: 243306
loss: 0.9949918985366821,grad_norm: 0.733329516489669, iteration: 243307
loss: 1.0221161842346191,grad_norm: 0.9164798334016683, iteration: 243308
loss: 1.0002164840698242,grad_norm: 0.9999991295841902, iteration: 243309
loss: 0.985270082950592,grad_norm: 0.8882859874281894, iteration: 243310
loss: 1.0241785049438477,grad_norm: 0.843997981668046, iteration: 243311
loss: 0.965468168258667,grad_norm: 0.9999992339245364, iteration: 243312
loss: 1.0182251930236816,grad_norm: 0.8354227429944056, iteration: 243313
loss: 1.001423716545105,grad_norm: 0.768068957759943, iteration: 243314
loss: 0.9809118509292603,grad_norm: 0.8969719789144373, iteration: 243315
loss: 1.0035128593444824,grad_norm: 0.999999135101352, iteration: 243316
loss: 1.0419456958770752,grad_norm: 0.9999993201461775, iteration: 243317
loss: 0.9958611130714417,grad_norm: 0.9999991327242956, iteration: 243318
loss: 1.0154787302017212,grad_norm: 0.9999992737029334, iteration: 243319
loss: 1.0401498079299927,grad_norm: 0.8302353770804392, iteration: 243320
loss: 1.0033668279647827,grad_norm: 0.8271296588052911, iteration: 243321
loss: 0.9614167809486389,grad_norm: 0.8922951638058553, iteration: 243322
loss: 1.001923680305481,grad_norm: 0.999999059332068, iteration: 243323
loss: 0.9756301045417786,grad_norm: 0.9564618909661317, iteration: 243324
loss: 0.9962854981422424,grad_norm: 0.84802474081547, iteration: 243325
loss: 0.9710787534713745,grad_norm: 0.9999991027809054, iteration: 243326
loss: 1.0567033290863037,grad_norm: 0.9999990812683738, iteration: 243327
loss: 0.9993408918380737,grad_norm: 0.9999991355375379, iteration: 243328
loss: 1.0052974224090576,grad_norm: 0.9367562139596054, iteration: 243329
loss: 0.9351205229759216,grad_norm: 0.9031896536093272, iteration: 243330
loss: 1.0017216205596924,grad_norm: 0.9206204039329225, iteration: 243331
loss: 0.9768245220184326,grad_norm: 0.995168518821046, iteration: 243332
loss: 0.9600985646247864,grad_norm: 0.9795154510790808, iteration: 243333
loss: 0.9913601875305176,grad_norm: 0.9999990687130326, iteration: 243334
loss: 0.9924361705780029,grad_norm: 0.9999991200306683, iteration: 243335
loss: 1.0320874452590942,grad_norm: 0.9270820243579275, iteration: 243336
loss: 0.9988884925842285,grad_norm: 0.9809868505691159, iteration: 243337
loss: 0.9980944395065308,grad_norm: 0.9999991229701283, iteration: 243338
loss: 0.9980877041816711,grad_norm: 0.8266732127186316, iteration: 243339
loss: 1.0066007375717163,grad_norm: 0.9954552129017004, iteration: 243340
loss: 1.009687066078186,grad_norm: 0.8324603920709825, iteration: 243341
loss: 0.9834901690483093,grad_norm: 0.851027673522145, iteration: 243342
loss: 0.9952388405799866,grad_norm: 0.7718921401418958, iteration: 243343
loss: 1.0229649543762207,grad_norm: 0.9101760842132147, iteration: 243344
loss: 0.977580726146698,grad_norm: 0.8456682972238154, iteration: 243345
loss: 1.0099987983703613,grad_norm: 0.8462720666543075, iteration: 243346
loss: 0.9689891338348389,grad_norm: 0.8709132558507235, iteration: 243347
loss: 0.9821758270263672,grad_norm: 0.9999990028725524, iteration: 243348
loss: 0.9877699017524719,grad_norm: 0.857352459598271, iteration: 243349
loss: 0.9735912680625916,grad_norm: 0.9865614131711117, iteration: 243350
loss: 1.0129905939102173,grad_norm: 0.9999991172325702, iteration: 243351
loss: 0.9791238903999329,grad_norm: 0.9362877419243243, iteration: 243352
loss: 1.0147420167922974,grad_norm: 0.8949919007127926, iteration: 243353
loss: 1.0471129417419434,grad_norm: 0.999999192617749, iteration: 243354
loss: 0.9929686784744263,grad_norm: 0.730824087420191, iteration: 243355
loss: 0.9825686812400818,grad_norm: 0.9149204019551043, iteration: 243356
loss: 1.0723768472671509,grad_norm: 0.9999996491866167, iteration: 243357
loss: 0.9714146852493286,grad_norm: 0.994341325887959, iteration: 243358
loss: 1.0265355110168457,grad_norm: 0.9999991805200837, iteration: 243359
loss: 0.9816768169403076,grad_norm: 0.864099138952011, iteration: 243360
loss: 1.0156662464141846,grad_norm: 0.9764983216029648, iteration: 243361
loss: 0.9976072907447815,grad_norm: 0.9127021068225426, iteration: 243362
loss: 0.9762117862701416,grad_norm: 0.9559099571697455, iteration: 243363
loss: 1.0442252159118652,grad_norm: 0.9287987969940743, iteration: 243364
loss: 0.9841808080673218,grad_norm: 0.7559536109804676, iteration: 243365
loss: 1.022033929824829,grad_norm: 0.9330022854482622, iteration: 243366
loss: 1.0134849548339844,grad_norm: 0.8590610617985244, iteration: 243367
loss: 0.9545437097549438,grad_norm: 0.9999991064324845, iteration: 243368
loss: 0.9794339537620544,grad_norm: 0.9999990578829001, iteration: 243369
loss: 1.0990352630615234,grad_norm: 0.9509267947982726, iteration: 243370
loss: 0.9564224481582642,grad_norm: 0.9199428633773398, iteration: 243371
loss: 1.0147991180419922,grad_norm: 0.7923895116298163, iteration: 243372
loss: 0.9958189129829407,grad_norm: 0.8989405585789195, iteration: 243373
loss: 1.0301295518875122,grad_norm: 0.9607931243575568, iteration: 243374
loss: 0.9694077968597412,grad_norm: 0.9467887060734883, iteration: 243375
loss: 1.0172975063323975,grad_norm: 0.8415084979472728, iteration: 243376
loss: 0.9976984858512878,grad_norm: 0.9170498252967597, iteration: 243377
loss: 1.0335302352905273,grad_norm: 0.893765464651929, iteration: 243378
loss: 1.016823410987854,grad_norm: 0.9999992162208835, iteration: 243379
loss: 0.9766291379928589,grad_norm: 0.9999990581136106, iteration: 243380
loss: 1.0513836145401,grad_norm: 0.875085671875563, iteration: 243381
loss: 1.0046792030334473,grad_norm: 0.99999914777507, iteration: 243382
loss: 1.0134657621383667,grad_norm: 0.9591666013401281, iteration: 243383
loss: 1.026269793510437,grad_norm: 0.9999990660967508, iteration: 243384
loss: 0.9928048849105835,grad_norm: 0.9409841106198064, iteration: 243385
loss: 1.0026464462280273,grad_norm: 0.8957367355960716, iteration: 243386
loss: 0.9781644940376282,grad_norm: 0.8172479432361061, iteration: 243387
loss: 1.0015209913253784,grad_norm: 0.869325535353555, iteration: 243388
loss: 1.0092661380767822,grad_norm: 0.8968079408621524, iteration: 243389
loss: 0.9361492395401001,grad_norm: 0.9999991300747862, iteration: 243390
loss: 1.0295491218566895,grad_norm: 0.9999991168118229, iteration: 243391
loss: 0.9775804281234741,grad_norm: 0.8228375221129106, iteration: 243392
loss: 0.9882634878158569,grad_norm: 0.990493188824106, iteration: 243393
loss: 1.0091311931610107,grad_norm: 0.6945292689212957, iteration: 243394
loss: 0.9861704111099243,grad_norm: 0.9803414181843969, iteration: 243395
loss: 0.9966752529144287,grad_norm: 0.9999990754650466, iteration: 243396
loss: 1.0173801183700562,grad_norm: 0.7862372608768626, iteration: 243397
loss: 1.019289493560791,grad_norm: 0.9253694208315301, iteration: 243398
loss: 1.0207632780075073,grad_norm: 0.9999994328671784, iteration: 243399
loss: 1.0148440599441528,grad_norm: 0.8547850845917151, iteration: 243400
loss: 0.9984158277511597,grad_norm: 0.9056838312030719, iteration: 243401
loss: 1.0030165910720825,grad_norm: 0.9999990699473504, iteration: 243402
loss: 0.9438692927360535,grad_norm: 0.9002366429491725, iteration: 243403
loss: 1.0450011491775513,grad_norm: 0.9999998565530639, iteration: 243404
loss: 0.996086061000824,grad_norm: 0.9999991160106917, iteration: 243405
loss: 1.0107758045196533,grad_norm: 0.8614481175530403, iteration: 243406
loss: 1.001180648803711,grad_norm: 0.9999995271766475, iteration: 243407
loss: 1.0336406230926514,grad_norm: 0.7905990696744662, iteration: 243408
loss: 1.0463348627090454,grad_norm: 0.8608182544019327, iteration: 243409
loss: 0.9657787084579468,grad_norm: 0.9128006543038987, iteration: 243410
loss: 1.0008900165557861,grad_norm: 0.9999994451934563, iteration: 243411
loss: 0.981156587600708,grad_norm: 0.9999991029776945, iteration: 243412
loss: 1.0358330011367798,grad_norm: 0.9409185711818582, iteration: 243413
loss: 1.0731372833251953,grad_norm: 0.9999998026987569, iteration: 243414
loss: 1.069972038269043,grad_norm: 0.999999846685425, iteration: 243415
loss: 0.991920530796051,grad_norm: 0.8174467305861794, iteration: 243416
loss: 1.0225571393966675,grad_norm: 0.9311321199776784, iteration: 243417
loss: 0.9938752055168152,grad_norm: 0.8795318143569156, iteration: 243418
loss: 0.9558002948760986,grad_norm: 0.9262851677124775, iteration: 243419
loss: 1.0029186010360718,grad_norm: 0.7416216704549329, iteration: 243420
loss: 1.0084562301635742,grad_norm: 0.98933169018875, iteration: 243421
loss: 1.0002106428146362,grad_norm: 0.9426792777720263, iteration: 243422
loss: 0.9680047631263733,grad_norm: 0.8909089259264616, iteration: 243423
loss: 1.0142525434494019,grad_norm: 0.9622501182111793, iteration: 243424
loss: 0.9704957008361816,grad_norm: 0.8367663122509776, iteration: 243425
loss: 0.9916760921478271,grad_norm: 0.8165955239605335, iteration: 243426
loss: 1.0090601444244385,grad_norm: 0.8618465553875911, iteration: 243427
loss: 0.9862536787986755,grad_norm: 0.8901554655913431, iteration: 243428
loss: 0.9812626838684082,grad_norm: 0.7685997549872028, iteration: 243429
loss: 1.0120611190795898,grad_norm: 0.8352017148681119, iteration: 243430
loss: 1.0248565673828125,grad_norm: 0.8802925728302488, iteration: 243431
loss: 0.9781056642532349,grad_norm: 0.7546249085491152, iteration: 243432
loss: 0.9865682125091553,grad_norm: 0.999999106502983, iteration: 243433
loss: 1.0102319717407227,grad_norm: 0.8501696783595206, iteration: 243434
loss: 0.9927488565444946,grad_norm: 0.7838029699738646, iteration: 243435
loss: 0.9923038482666016,grad_norm: 0.88064962861811, iteration: 243436
loss: 1.0113589763641357,grad_norm: 0.8321800698586097, iteration: 243437
loss: 1.02992844581604,grad_norm: 0.8249192986742361, iteration: 243438
loss: 0.9830393195152283,grad_norm: 0.9104037290447564, iteration: 243439
loss: 0.9793702960014343,grad_norm: 0.7957698559081292, iteration: 243440
loss: 0.9955810308456421,grad_norm: 0.7211139097394081, iteration: 243441
loss: 0.9990843534469604,grad_norm: 0.9999990035631501, iteration: 243442
loss: 0.9888947010040283,grad_norm: 0.8725348844259462, iteration: 243443
loss: 0.9937437176704407,grad_norm: 0.8620236230690465, iteration: 243444
loss: 0.9760770201683044,grad_norm: 0.9878417089216807, iteration: 243445
loss: 1.0534026622772217,grad_norm: 0.9999992128455809, iteration: 243446
loss: 1.0213996171951294,grad_norm: 0.9272193179816732, iteration: 243447
loss: 0.9798213839530945,grad_norm: 0.8638195596600329, iteration: 243448
loss: 1.0034058094024658,grad_norm: 0.9999991362424081, iteration: 243449
loss: 1.016581654548645,grad_norm: 0.869740859316077, iteration: 243450
loss: 0.9831331372261047,grad_norm: 0.9209999626188465, iteration: 243451
loss: 1.038174033164978,grad_norm: 0.850807070225753, iteration: 243452
loss: 1.0054763555526733,grad_norm: 0.7530317583970318, iteration: 243453
loss: 0.9894866943359375,grad_norm: 0.9082058448258749, iteration: 243454
loss: 0.9815146923065186,grad_norm: 0.8546893056371149, iteration: 243455
loss: 1.0067527294158936,grad_norm: 0.8009621335825816, iteration: 243456
loss: 1.0124529600143433,grad_norm: 0.8101798711063564, iteration: 243457
loss: 1.007738709449768,grad_norm: 0.999999176758425, iteration: 243458
loss: 0.9810150265693665,grad_norm: 0.9999990161271269, iteration: 243459
loss: 0.9729437232017517,grad_norm: 0.8825767695069502, iteration: 243460
loss: 1.0069772005081177,grad_norm: 0.999999247319752, iteration: 243461
loss: 1.0479702949523926,grad_norm: 0.9999991630719277, iteration: 243462
loss: 0.991759717464447,grad_norm: 0.8057045147363961, iteration: 243463
loss: 0.9665716290473938,grad_norm: 0.9030941270626509, iteration: 243464
loss: 0.9793801307678223,grad_norm: 0.8857850530648655, iteration: 243465
loss: 1.0217194557189941,grad_norm: 0.8312106108943754, iteration: 243466
loss: 1.0189027786254883,grad_norm: 0.9999992727330884, iteration: 243467
loss: 1.0102108716964722,grad_norm: 0.8945854243501724, iteration: 243468
loss: 0.9996005296707153,grad_norm: 0.9999991389994455, iteration: 243469
loss: 0.9768155217170715,grad_norm: 0.7041382734071522, iteration: 243470
loss: 1.040436863899231,grad_norm: 0.8901425812601491, iteration: 243471
loss: 1.02907395362854,grad_norm: 0.9999990109422335, iteration: 243472
loss: 0.9945921301841736,grad_norm: 0.9999991085113757, iteration: 243473
loss: 0.9617239832878113,grad_norm: 0.8373976870559017, iteration: 243474
loss: 0.9689990878105164,grad_norm: 0.9999995456674967, iteration: 243475
loss: 0.994303822517395,grad_norm: 0.9523827919656445, iteration: 243476
loss: 0.9996134042739868,grad_norm: 0.9512449821832372, iteration: 243477
loss: 0.9780575037002563,grad_norm: 0.8745260892149318, iteration: 243478
loss: 0.9818207025527954,grad_norm: 0.9718653823739917, iteration: 243479
loss: 1.0081332921981812,grad_norm: 0.8348527146854271, iteration: 243480
loss: 0.9983546137809753,grad_norm: 0.9200821583065825, iteration: 243481
loss: 1.0086297988891602,grad_norm: 0.9355829793381204, iteration: 243482
loss: 1.0029901266098022,grad_norm: 0.8204751657762714, iteration: 243483
loss: 1.0116853713989258,grad_norm: 0.999999003039018, iteration: 243484
loss: 1.0183725357055664,grad_norm: 0.7517298518313288, iteration: 243485
loss: 1.0195739269256592,grad_norm: 0.9156978525645818, iteration: 243486
loss: 0.9933406114578247,grad_norm: 0.8252175513001199, iteration: 243487
loss: 0.986268937587738,grad_norm: 0.9484136453718681, iteration: 243488
loss: 0.9854158163070679,grad_norm: 0.7781334766033681, iteration: 243489
loss: 0.9767621159553528,grad_norm: 0.89794467380827, iteration: 243490
loss: 0.9803193807601929,grad_norm: 0.9681577926427056, iteration: 243491
loss: 1.021079421043396,grad_norm: 0.9999990332816036, iteration: 243492
loss: 1.0147861242294312,grad_norm: 0.9984483933331302, iteration: 243493
loss: 0.9749736785888672,grad_norm: 0.7909053149093576, iteration: 243494
loss: 0.9564598202705383,grad_norm: 0.8037618646207932, iteration: 243495
loss: 1.0044299364089966,grad_norm: 0.8599445917143377, iteration: 243496
loss: 0.9731448888778687,grad_norm: 0.8644047267574333, iteration: 243497
loss: 1.0008301734924316,grad_norm: 0.9686613526638294, iteration: 243498
loss: 1.127529501914978,grad_norm: 0.9999999683904153, iteration: 243499
loss: 0.9792393445968628,grad_norm: 0.9999990122529162, iteration: 243500
loss: 0.9847757816314697,grad_norm: 0.8774327575279943, iteration: 243501
loss: 0.99904465675354,grad_norm: 0.871241275210392, iteration: 243502
loss: 1.0417968034744263,grad_norm: 0.9659362334406647, iteration: 243503
loss: 1.0209980010986328,grad_norm: 0.8877676802804111, iteration: 243504
loss: 0.9802200198173523,grad_norm: 0.9618378958852666, iteration: 243505
loss: 0.9729815721511841,grad_norm: 0.8197661232914103, iteration: 243506
loss: 0.9994816780090332,grad_norm: 0.9375198224307233, iteration: 243507
loss: 1.0167330503463745,grad_norm: 0.8713957155272573, iteration: 243508
loss: 1.034578800201416,grad_norm: 0.9999993104257858, iteration: 243509
loss: 0.9912116527557373,grad_norm: 0.8438479821460435, iteration: 243510
loss: 0.9969831705093384,grad_norm: 0.8018773600143447, iteration: 243511
loss: 1.0076234340667725,grad_norm: 0.8695468639639193, iteration: 243512
loss: 0.998069703578949,grad_norm: 0.8422689629703042, iteration: 243513
loss: 1.006361961364746,grad_norm: 0.7775116632726929, iteration: 243514
loss: 0.9940455555915833,grad_norm: 0.9999989512374718, iteration: 243515
loss: 0.9796600341796875,grad_norm: 0.7648599575504708, iteration: 243516
loss: 1.023127794265747,grad_norm: 0.8712916094957961, iteration: 243517
loss: 1.0275834798812866,grad_norm: 0.9999990314955371, iteration: 243518
loss: 0.9578883051872253,grad_norm: 0.893714000585751, iteration: 243519
loss: 0.9618924856185913,grad_norm: 0.7990097500862345, iteration: 243520
loss: 1.0256763696670532,grad_norm: 0.8545667974311923, iteration: 243521
loss: 0.971454381942749,grad_norm: 0.8396977806933147, iteration: 243522
loss: 0.9869526028633118,grad_norm: 0.9999990990403576, iteration: 243523
loss: 0.9802756905555725,grad_norm: 0.7427058900816572, iteration: 243524
loss: 1.0242916345596313,grad_norm: 0.8384039976970755, iteration: 243525
loss: 0.9965566396713257,grad_norm: 0.7866405617726981, iteration: 243526
loss: 0.9815617203712463,grad_norm: 0.9999992278809716, iteration: 243527
loss: 1.0484460592269897,grad_norm: 0.9999991991397236, iteration: 243528
loss: 1.0560508966445923,grad_norm: 0.856351202478896, iteration: 243529
loss: 0.9986684918403625,grad_norm: 0.8311786752161768, iteration: 243530
loss: 1.0118945837020874,grad_norm: 0.773864475644671, iteration: 243531
loss: 0.975885808467865,grad_norm: 0.9015823653478674, iteration: 243532
loss: 0.9631819128990173,grad_norm: 0.8890860105154232, iteration: 243533
loss: 1.017391562461853,grad_norm: 0.99999915991518, iteration: 243534
loss: 1.0067096948623657,grad_norm: 0.9565883868997478, iteration: 243535
loss: 1.021693468093872,grad_norm: 0.7719785687586509, iteration: 243536
loss: 1.030988335609436,grad_norm: 0.9999992014824127, iteration: 243537
loss: 1.3262068033218384,grad_norm: 0.999999817290303, iteration: 243538
loss: 0.9852178692817688,grad_norm: 0.9082383675974534, iteration: 243539
loss: 0.9943397045135498,grad_norm: 0.9031322134233478, iteration: 243540
loss: 1.0157508850097656,grad_norm: 0.9016501864713636, iteration: 243541
loss: 1.0121210813522339,grad_norm: 0.9894966864267636, iteration: 243542
loss: 0.9744929671287537,grad_norm: 0.8640501244212192, iteration: 243543
loss: 0.9619240760803223,grad_norm: 0.88872831485189, iteration: 243544
loss: 0.9946162104606628,grad_norm: 0.8542110165336223, iteration: 243545
loss: 1.023188591003418,grad_norm: 0.999999010884932, iteration: 243546
loss: 0.9602639675140381,grad_norm: 0.8960484056358297, iteration: 243547
loss: 0.9565208554267883,grad_norm: 0.9999992043678971, iteration: 243548
loss: 1.0138081312179565,grad_norm: 0.9999990051548979, iteration: 243549
loss: 1.0031002759933472,grad_norm: 0.9064954549922274, iteration: 243550
loss: 1.0553832054138184,grad_norm: 0.8537717707979277, iteration: 243551
loss: 0.9617581367492676,grad_norm: 0.9999990847052669, iteration: 243552
loss: 1.0018731355667114,grad_norm: 0.9568315732318349, iteration: 243553
loss: 1.0163289308547974,grad_norm: 0.9999989686415791, iteration: 243554
loss: 0.9960630536079407,grad_norm: 0.9808059642963771, iteration: 243555
loss: 0.9885603785514832,grad_norm: 0.9999990121176996, iteration: 243556
loss: 1.0037652254104614,grad_norm: 0.8947423974663989, iteration: 243557
loss: 1.0103495121002197,grad_norm: 0.9009395977561395, iteration: 243558
loss: 0.9915321469306946,grad_norm: 0.8681094540725784, iteration: 243559
loss: 0.971038281917572,grad_norm: 0.9304175813754874, iteration: 243560
loss: 0.9781432151794434,grad_norm: 0.9999990627528258, iteration: 243561
loss: 1.0122737884521484,grad_norm: 0.9999991878820382, iteration: 243562
loss: 1.0442503690719604,grad_norm: 0.9236433441344738, iteration: 243563
loss: 0.9806169271469116,grad_norm: 0.9328198525775958, iteration: 243564
loss: 0.9758933782577515,grad_norm: 0.9719500320476993, iteration: 243565
loss: 1.0024926662445068,grad_norm: 0.8537782233961015, iteration: 243566
loss: 1.0129419565200806,grad_norm: 0.9999991965078125, iteration: 243567
loss: 0.9955679774284363,grad_norm: 0.7986151845284588, iteration: 243568
loss: 0.9993501305580139,grad_norm: 0.9999992514417784, iteration: 243569
loss: 1.0190352201461792,grad_norm: 0.9999990702328542, iteration: 243570
loss: 1.0327266454696655,grad_norm: 0.7936315083790534, iteration: 243571
loss: 0.9957813024520874,grad_norm: 0.8474100372025601, iteration: 243572
loss: 0.9918020367622375,grad_norm: 0.9999990497530576, iteration: 243573
loss: 0.9990936517715454,grad_norm: 0.9151961271430605, iteration: 243574
loss: 0.9939236044883728,grad_norm: 0.9617792457871194, iteration: 243575
loss: 1.0103596448898315,grad_norm: 0.9283065827324979, iteration: 243576
loss: 1.0185905694961548,grad_norm: 0.9999990728330144, iteration: 243577
loss: 0.9888669848442078,grad_norm: 0.9572017100998019, iteration: 243578
loss: 1.0218547582626343,grad_norm: 0.8479865502729929, iteration: 243579
loss: 0.9936898946762085,grad_norm: 0.811473250368052, iteration: 243580
loss: 1.02097749710083,grad_norm: 0.8762379216136827, iteration: 243581
loss: 1.0181232690811157,grad_norm: 0.8817417280908645, iteration: 243582
loss: 0.9989776611328125,grad_norm: 0.9492885492734218, iteration: 243583
loss: 0.9890264868736267,grad_norm: 0.9206040423390285, iteration: 243584
loss: 1.0315706729888916,grad_norm: 0.908304953903117, iteration: 243585
loss: 1.0596989393234253,grad_norm: 0.9999998633360518, iteration: 243586
loss: 0.9745753407478333,grad_norm: 0.7608084877705708, iteration: 243587
loss: 1.0273038148880005,grad_norm: 0.9112740822274008, iteration: 243588
loss: 0.9999693036079407,grad_norm: 0.9545276239333171, iteration: 243589
loss: 1.1761634349822998,grad_norm: 0.9999999453020458, iteration: 243590
loss: 1.0324825048446655,grad_norm: 0.7730254105535167, iteration: 243591
loss: 0.9922437071800232,grad_norm: 0.9471210352183463, iteration: 243592
loss: 0.953117847442627,grad_norm: 0.8986847318838935, iteration: 243593
loss: 1.018857479095459,grad_norm: 0.9999990061529511, iteration: 243594
loss: 0.9984062910079956,grad_norm: 0.8771783125201776, iteration: 243595
loss: 1.1716978549957275,grad_norm: 0.9999995948975848, iteration: 243596
loss: 0.9943695068359375,grad_norm: 0.7943920286048336, iteration: 243597
loss: 1.0673582553863525,grad_norm: 0.8242396860962684, iteration: 243598
loss: 1.0319178104400635,grad_norm: 0.964036426171008, iteration: 243599
loss: 1.0106618404388428,grad_norm: 0.9999991164541758, iteration: 243600
loss: 0.9950626492500305,grad_norm: 0.9302602449383203, iteration: 243601
loss: 1.038053274154663,grad_norm: 0.9999996114368354, iteration: 243602
loss: 0.9909014701843262,grad_norm: 0.9345392722432907, iteration: 243603
loss: 1.0078297853469849,grad_norm: 0.8749346031705187, iteration: 243604
loss: 1.0652267932891846,grad_norm: 0.9999999600439246, iteration: 243605
loss: 1.0269993543624878,grad_norm: 0.9213094734155519, iteration: 243606
loss: 0.9958142042160034,grad_norm: 0.8043615690867807, iteration: 243607
loss: 1.0567048788070679,grad_norm: 0.9435148808782163, iteration: 243608
loss: 1.0167397260665894,grad_norm: 0.9999991804510835, iteration: 243609
loss: 0.9773720502853394,grad_norm: 0.8209728221388499, iteration: 243610
loss: 1.0046061277389526,grad_norm: 0.8788977333365783, iteration: 243611
loss: 0.9758217930793762,grad_norm: 0.9999993753988333, iteration: 243612
loss: 1.009312629699707,grad_norm: 0.99358125065012, iteration: 243613
loss: 0.9752841591835022,grad_norm: 0.9999991023142272, iteration: 243614
loss: 0.9830570220947266,grad_norm: 0.9123524400672671, iteration: 243615
loss: 0.9956787824630737,grad_norm: 0.8234080209795466, iteration: 243616
loss: 1.06009042263031,grad_norm: 0.8430807622659212, iteration: 243617
loss: 1.0743672847747803,grad_norm: 0.8551239170695633, iteration: 243618
loss: 1.0248414278030396,grad_norm: 0.9066980325879125, iteration: 243619
loss: 1.0257222652435303,grad_norm: 0.9301656049653169, iteration: 243620
loss: 0.9876589775085449,grad_norm: 0.88766488435649, iteration: 243621
loss: 1.0341706275939941,grad_norm: 0.9999991217817041, iteration: 243622
loss: 1.030034065246582,grad_norm: 0.928733955875678, iteration: 243623
loss: 1.0168882608413696,grad_norm: 0.9999994697123594, iteration: 243624
loss: 0.9880247712135315,grad_norm: 0.9999991565438576, iteration: 243625
loss: 0.9911563992500305,grad_norm: 0.9706737606358814, iteration: 243626
loss: 1.0071594715118408,grad_norm: 0.9231439898469814, iteration: 243627
loss: 1.0173273086547852,grad_norm: 0.9999991361831471, iteration: 243628
loss: 1.0440059900283813,grad_norm: 0.9999995551068676, iteration: 243629
loss: 0.9899271726608276,grad_norm: 0.9336241113065457, iteration: 243630
loss: 0.9770815968513489,grad_norm: 0.9999991747552175, iteration: 243631
loss: 1.052781581878662,grad_norm: 0.9999990831885956, iteration: 243632
loss: 0.9977724552154541,grad_norm: 0.8528549721266162, iteration: 243633
loss: 1.042878270149231,grad_norm: 0.9848139792774618, iteration: 243634
loss: 1.0015562772750854,grad_norm: 0.9999991424543945, iteration: 243635
loss: 1.0072991847991943,grad_norm: 0.8621316758757919, iteration: 243636
loss: 1.0087954998016357,grad_norm: 0.9999989245551946, iteration: 243637
loss: 1.0271217823028564,grad_norm: 0.9999991114497201, iteration: 243638
loss: 0.9942198395729065,grad_norm: 0.9802439023809232, iteration: 243639
loss: 0.9588184356689453,grad_norm: 0.9999992828724097, iteration: 243640
loss: 1.0000643730163574,grad_norm: 0.9294750427058907, iteration: 243641
loss: 1.0202150344848633,grad_norm: 0.9160561859449079, iteration: 243642
loss: 1.0169092416763306,grad_norm: 0.9999990707356825, iteration: 243643
loss: 1.0280565023422241,grad_norm: 0.8971108802598563, iteration: 243644
loss: 0.9716209173202515,grad_norm: 0.858484380489457, iteration: 243645
loss: 1.0166205167770386,grad_norm: 0.9912821491609155, iteration: 243646
loss: 1.0322675704956055,grad_norm: 0.9999990990215935, iteration: 243647
loss: 0.9990948438644409,grad_norm: 0.7842637471518553, iteration: 243648
loss: 1.0036572217941284,grad_norm: 0.9999990208562926, iteration: 243649
loss: 0.9872152805328369,grad_norm: 0.9999996876962732, iteration: 243650
loss: 1.031982660293579,grad_norm: 0.986665144176218, iteration: 243651
loss: 1.046372890472412,grad_norm: 0.9999991105848532, iteration: 243652
loss: 1.0121955871582031,grad_norm: 0.925817333874255, iteration: 243653
loss: 1.0176396369934082,grad_norm: 0.8377973354848522, iteration: 243654
loss: 1.017002820968628,grad_norm: 0.8101913975236881, iteration: 243655
loss: 1.003612756729126,grad_norm: 0.9162041400752419, iteration: 243656
loss: 0.9813418388366699,grad_norm: 0.9251589903622361, iteration: 243657
loss: 0.9909552931785583,grad_norm: 0.922388699724771, iteration: 243658
loss: 0.9898936152458191,grad_norm: 0.898881496982022, iteration: 243659
loss: 0.9781948924064636,grad_norm: 0.8474382831833718, iteration: 243660
loss: 1.0134284496307373,grad_norm: 0.8470371173613195, iteration: 243661
loss: 1.0190484523773193,grad_norm: 0.9374773294780663, iteration: 243662
loss: 0.9650357961654663,grad_norm: 0.9999988934315326, iteration: 243663
loss: 1.011985182762146,grad_norm: 0.9044726667115476, iteration: 243664
loss: 1.0458695888519287,grad_norm: 0.8387577008617941, iteration: 243665
loss: 1.0197738409042358,grad_norm: 0.8403556073002922, iteration: 243666
loss: 0.9987757205963135,grad_norm: 0.8859814746625121, iteration: 243667
loss: 0.9968036413192749,grad_norm: 0.999999312954926, iteration: 243668
loss: 0.9919522404670715,grad_norm: 0.9547702126582641, iteration: 243669
loss: 1.0794594287872314,grad_norm: 0.9999990241558392, iteration: 243670
loss: 0.9567267298698425,grad_norm: 0.9336755583890091, iteration: 243671
loss: 1.025028944015503,grad_norm: 0.9999992083627507, iteration: 243672
loss: 1.0165386199951172,grad_norm: 0.8677597800960101, iteration: 243673
loss: 1.1605701446533203,grad_norm: 0.9999998536635857, iteration: 243674
loss: 1.116762638092041,grad_norm: 0.8868378361170858, iteration: 243675
loss: 1.0035396814346313,grad_norm: 0.9999993326473183, iteration: 243676
loss: 1.0240272283554077,grad_norm: 0.9999990759314373, iteration: 243677
loss: 1.0366696119308472,grad_norm: 0.8675390888520407, iteration: 243678
loss: 0.9837769865989685,grad_norm: 0.7142580994546277, iteration: 243679
loss: 1.0115784406661987,grad_norm: 0.9441403230705374, iteration: 243680
loss: 1.0290848016738892,grad_norm: 0.8923849966210375, iteration: 243681
loss: 0.9791538715362549,grad_norm: 0.8381879916722775, iteration: 243682
loss: 0.9842769503593445,grad_norm: 0.9999991155339365, iteration: 243683
loss: 1.003584623336792,grad_norm: 0.9088106515492754, iteration: 243684
loss: 1.0169970989227295,grad_norm: 0.9652710831091852, iteration: 243685
loss: 0.9824600219726562,grad_norm: 0.8538038842400026, iteration: 243686
loss: 0.9984650611877441,grad_norm: 0.8893502951699994, iteration: 243687
loss: 0.9849656820297241,grad_norm: 0.8579691836873315, iteration: 243688
loss: 0.9681748151779175,grad_norm: 0.8551882173875824, iteration: 243689
loss: 1.026673674583435,grad_norm: 0.9999996453847569, iteration: 243690
loss: 0.9898814558982849,grad_norm: 0.735038963048207, iteration: 243691
loss: 1.1157186031341553,grad_norm: 1.0000000008331555, iteration: 243692
loss: 0.9712660312652588,grad_norm: 0.9999991938291203, iteration: 243693
loss: 0.968684732913971,grad_norm: 0.9131959865311111, iteration: 243694
loss: 1.0270487070083618,grad_norm: 0.8186861845432208, iteration: 243695
loss: 1.0005415678024292,grad_norm: 0.9433746108657663, iteration: 243696
loss: 1.0084364414215088,grad_norm: 0.8439643863205365, iteration: 243697
loss: 1.0237993001937866,grad_norm: 0.7340386762297205, iteration: 243698
loss: 1.0149680376052856,grad_norm: 0.7760135396555405, iteration: 243699
loss: 1.009323000907898,grad_norm: 0.8624921316824528, iteration: 243700
loss: 0.9918416738510132,grad_norm: 0.8281524128177908, iteration: 243701
loss: 0.9630473256111145,grad_norm: 0.864960736522615, iteration: 243702
loss: 1.0328612327575684,grad_norm: 0.999999556448989, iteration: 243703
loss: 0.9821793437004089,grad_norm: 0.7903161577836871, iteration: 243704
loss: 1.0229120254516602,grad_norm: 0.9999990227547492, iteration: 243705
loss: 1.0142059326171875,grad_norm: 0.7862089018715381, iteration: 243706
loss: 1.0047554969787598,grad_norm: 0.8903707660495453, iteration: 243707
loss: 1.0072156190872192,grad_norm: 0.8858236599667397, iteration: 243708
loss: 0.9995297193527222,grad_norm: 0.749008937999794, iteration: 243709
loss: 1.1180399656295776,grad_norm: 0.9999996696767488, iteration: 243710
loss: 0.9724199175834656,grad_norm: 0.9426533669241883, iteration: 243711
loss: 1.0112537145614624,grad_norm: 0.9999996086143763, iteration: 243712
loss: 0.9811612963676453,grad_norm: 0.9563214193315168, iteration: 243713
loss: 1.026851773262024,grad_norm: 0.8416604232885398, iteration: 243714
loss: 0.9912314414978027,grad_norm: 0.9999991146389314, iteration: 243715
loss: 1.0088798999786377,grad_norm: 0.9226718321867518, iteration: 243716
loss: 1.0085971355438232,grad_norm: 0.9593969579552236, iteration: 243717
loss: 1.0045151710510254,grad_norm: 0.796253835997622, iteration: 243718
loss: 0.9771291613578796,grad_norm: 0.8846751247596114, iteration: 243719
loss: 0.9965699315071106,grad_norm: 0.8573026587875479, iteration: 243720
loss: 0.9786814451217651,grad_norm: 0.8686311308003032, iteration: 243721
loss: 1.0016775131225586,grad_norm: 0.7976865011854286, iteration: 243722
loss: 1.0013532638549805,grad_norm: 0.9999997833788854, iteration: 243723
loss: 1.0174601078033447,grad_norm: 0.9999992762550267, iteration: 243724
loss: 0.9978713393211365,grad_norm: 0.9229945120416857, iteration: 243725
loss: 1.0032992362976074,grad_norm: 0.9617384768112596, iteration: 243726
loss: 1.264461874961853,grad_norm: 0.9999995067318715, iteration: 243727
loss: 0.9951071739196777,grad_norm: 0.8176729912146145, iteration: 243728
loss: 0.9997535347938538,grad_norm: 0.9999995625369577, iteration: 243729
loss: 0.9731396436691284,grad_norm: 0.8860862172134842, iteration: 243730
loss: 1.0154423713684082,grad_norm: 0.9373221715131888, iteration: 243731
loss: 1.0477699041366577,grad_norm: 0.9999991798202392, iteration: 243732
loss: 1.0118451118469238,grad_norm: 0.8700817851084185, iteration: 243733
loss: 1.0051461458206177,grad_norm: 0.9976426331732169, iteration: 243734
loss: 0.9654792547225952,grad_norm: 0.9762100211302973, iteration: 243735
loss: 1.0306205749511719,grad_norm: 0.9999992198597046, iteration: 243736
loss: 1.0043015480041504,grad_norm: 0.9881988373252889, iteration: 243737
loss: 1.0428129434585571,grad_norm: 0.8232118236657643, iteration: 243738
loss: 0.9682615995407104,grad_norm: 0.999999182819271, iteration: 243739
loss: 1.0093320608139038,grad_norm: 0.8995841691991435, iteration: 243740
loss: 0.9791702032089233,grad_norm: 0.9999990957964431, iteration: 243741
loss: 0.9845094084739685,grad_norm: 0.9704497226314605, iteration: 243742
loss: 0.9659072160720825,grad_norm: 0.9092986926166651, iteration: 243743
loss: 0.9779020547866821,grad_norm: 0.7522031196114098, iteration: 243744
loss: 0.9961021542549133,grad_norm: 0.9969765564737424, iteration: 243745
loss: 0.966553807258606,grad_norm: 0.8730907829058621, iteration: 243746
loss: 0.9973846673965454,grad_norm: 0.9215099568092439, iteration: 243747
loss: 0.9750679731369019,grad_norm: 0.8239514684044703, iteration: 243748
loss: 0.994906485080719,grad_norm: 0.8809854943104711, iteration: 243749
loss: 0.9569570422172546,grad_norm: 0.8688976672560819, iteration: 243750
loss: 1.004192590713501,grad_norm: 0.9999992277689937, iteration: 243751
loss: 1.0137770175933838,grad_norm: 0.9289599804817711, iteration: 243752
loss: 0.9671022891998291,grad_norm: 0.9990679791103506, iteration: 243753
loss: 0.9907740950584412,grad_norm: 0.9326802374390459, iteration: 243754
loss: 0.9367456436157227,grad_norm: 0.7576700245073009, iteration: 243755
loss: 0.9815321564674377,grad_norm: 0.9084623637825802, iteration: 243756
loss: 0.9837996959686279,grad_norm: 0.9495989186738346, iteration: 243757
loss: 1.0053033828735352,grad_norm: 0.9669398877125289, iteration: 243758
loss: 0.976387619972229,grad_norm: 0.9106651361469267, iteration: 243759
loss: 0.9982796907424927,grad_norm: 0.8185708700902596, iteration: 243760
loss: 0.9703369140625,grad_norm: 0.9999990471937611, iteration: 243761
loss: 0.9656509160995483,grad_norm: 0.888802278489675, iteration: 243762
loss: 1.0345046520233154,grad_norm: 0.9999997102617106, iteration: 243763
loss: 1.0339679718017578,grad_norm: 0.8398232263576976, iteration: 243764
loss: 1.0982698202133179,grad_norm: 0.9081531374444616, iteration: 243765
loss: 1.0135092735290527,grad_norm: 0.9999992464006708, iteration: 243766
loss: 1.0146267414093018,grad_norm: 0.9111197369537654, iteration: 243767
loss: 1.0175622701644897,grad_norm: 0.9134458453109485, iteration: 243768
loss: 1.0226608514785767,grad_norm: 0.7782041933125565, iteration: 243769
loss: 0.9568798542022705,grad_norm: 0.974597766978013, iteration: 243770
loss: 1.0761048793792725,grad_norm: 0.9999992939855769, iteration: 243771
loss: 1.0223290920257568,grad_norm: 0.9248820072443416, iteration: 243772
loss: 1.0160350799560547,grad_norm: 0.8951887875566622, iteration: 243773
loss: 0.9789080619812012,grad_norm: 0.8698865211956333, iteration: 243774
loss: 0.9886264801025391,grad_norm: 0.7706012954410977, iteration: 243775
loss: 0.9488833546638489,grad_norm: 0.8941854385018532, iteration: 243776
loss: 1.054895281791687,grad_norm: 0.9999993375308681, iteration: 243777
loss: 0.9808624386787415,grad_norm: 0.798755610363259, iteration: 243778
loss: 1.0204013586044312,grad_norm: 0.8196194186637754, iteration: 243779
loss: 0.976613461971283,grad_norm: 0.9302638773603056, iteration: 243780
loss: 1.0048364400863647,grad_norm: 0.9349212136732644, iteration: 243781
loss: 1.0135891437530518,grad_norm: 0.9999991514697852, iteration: 243782
loss: 0.974872350692749,grad_norm: 0.791630900530865, iteration: 243783
loss: 0.9748717546463013,grad_norm: 0.8606495911296858, iteration: 243784
loss: 0.9839375615119934,grad_norm: 0.9357174127059537, iteration: 243785
loss: 1.0239495038986206,grad_norm: 0.9843863298535301, iteration: 243786
loss: 0.975975513458252,grad_norm: 0.9999990912504108, iteration: 243787
loss: 0.9688255190849304,grad_norm: 0.9339335012070575, iteration: 243788
loss: 1.0822958946228027,grad_norm: 0.9999995949645636, iteration: 243789
loss: 1.032188892364502,grad_norm: 0.9906148945750105, iteration: 243790
loss: 1.051370620727539,grad_norm: 0.9999996184086809, iteration: 243791
loss: 1.029783010482788,grad_norm: 0.7455625935833963, iteration: 243792
loss: 0.9590319991111755,grad_norm: 0.9999990846794916, iteration: 243793
loss: 0.9967756271362305,grad_norm: 0.9999990970410222, iteration: 243794
loss: 0.9885739684104919,grad_norm: 0.7843846109006472, iteration: 243795
loss: 1.0051642656326294,grad_norm: 0.720838065027954, iteration: 243796
loss: 0.9902454614639282,grad_norm: 0.9386885393929405, iteration: 243797
loss: 1.0031062364578247,grad_norm: 0.8512356480225327, iteration: 243798
loss: 0.9440485835075378,grad_norm: 0.7412271187179169, iteration: 243799
loss: 0.9713622331619263,grad_norm: 0.8457692718664535, iteration: 243800
loss: 0.9954990148544312,grad_norm: 0.9999992186939591, iteration: 243801
loss: 0.9849970936775208,grad_norm: 0.791419974624049, iteration: 243802
loss: 0.9756014347076416,grad_norm: 0.9536090898285199, iteration: 243803
loss: 1.008034586906433,grad_norm: 0.9999990780165723, iteration: 243804
loss: 1.0225164890289307,grad_norm: 0.8898928808800303, iteration: 243805
loss: 0.9754121899604797,grad_norm: 0.7968407002429274, iteration: 243806
loss: 1.0133517980575562,grad_norm: 0.8582429984349504, iteration: 243807
loss: 0.9763823747634888,grad_norm: 0.9675349770537482, iteration: 243808
loss: 0.9596226811408997,grad_norm: 0.882776551929584, iteration: 243809
loss: 1.1114835739135742,grad_norm: 0.9999998619591574, iteration: 243810
loss: 0.9785827398300171,grad_norm: 0.9605706696721769, iteration: 243811
loss: 0.9932129383087158,grad_norm: 0.9475273205134888, iteration: 243812
loss: 1.0530202388763428,grad_norm: 0.9999997007090609, iteration: 243813
loss: 0.9863268733024597,grad_norm: 0.8468554191703668, iteration: 243814
loss: 0.976847767829895,grad_norm: 0.9723668183588728, iteration: 243815
loss: 0.9792799949645996,grad_norm: 0.999999217966459, iteration: 243816
loss: 1.00145423412323,grad_norm: 0.9921919054825656, iteration: 243817
loss: 0.9744455814361572,grad_norm: 0.9999992298122322, iteration: 243818
loss: 0.9717311263084412,grad_norm: 0.8170471485763654, iteration: 243819
loss: 0.9850878119468689,grad_norm: 0.9999990020615135, iteration: 243820
loss: 1.044663429260254,grad_norm: 0.999999634927639, iteration: 243821
loss: 0.9495856165885925,grad_norm: 0.7888686942926326, iteration: 243822
loss: 1.0114469528198242,grad_norm: 0.9999991754634262, iteration: 243823
loss: 1.0255292654037476,grad_norm: 0.8346549231371713, iteration: 243824
loss: 0.9681853652000427,grad_norm: 0.9556458745794284, iteration: 243825
loss: 0.9893304109573364,grad_norm: 0.9999991268880386, iteration: 243826
loss: 1.026686429977417,grad_norm: 0.9703105513035015, iteration: 243827
loss: 0.9823410511016846,grad_norm: 0.9999990734474387, iteration: 243828
loss: 0.9892482161521912,grad_norm: 0.792948295666513, iteration: 243829
loss: 1.0027629137039185,grad_norm: 0.9454416990193025, iteration: 243830
loss: 1.0196309089660645,grad_norm: 0.9707241259013033, iteration: 243831
loss: 1.0051947832107544,grad_norm: 0.8389828087673262, iteration: 243832
loss: 0.9955041408538818,grad_norm: 0.8062299825670914, iteration: 243833
loss: 0.9929584264755249,grad_norm: 0.8304395181542439, iteration: 243834
loss: 0.9330137372016907,grad_norm: 0.9999991370114327, iteration: 243835
loss: 0.98609858751297,grad_norm: 0.9371693025248452, iteration: 243836
loss: 1.0127328634262085,grad_norm: 0.9178649543596468, iteration: 243837
loss: 0.969848096370697,grad_norm: 0.9999991778253322, iteration: 243838
loss: 0.9931566715240479,grad_norm: 0.9999991571470654, iteration: 243839
loss: 0.9998046159744263,grad_norm: 0.9869689287154021, iteration: 243840
loss: 1.0079385042190552,grad_norm: 0.9999991362403456, iteration: 243841
loss: 0.9786500334739685,grad_norm: 0.9181134623313271, iteration: 243842
loss: 1.0135310888290405,grad_norm: 0.9516184192309768, iteration: 243843
loss: 0.9946426749229431,grad_norm: 0.9782300328080242, iteration: 243844
loss: 0.9749933481216431,grad_norm: 0.8596834099661912, iteration: 243845
loss: 1.0062406063079834,grad_norm: 0.9112286890741195, iteration: 243846
loss: 0.9929497838020325,grad_norm: 0.8902169935593838, iteration: 243847
loss: 0.978908121585846,grad_norm: 0.8120660941121824, iteration: 243848
loss: 0.9722796678543091,grad_norm: 0.775182650565151, iteration: 243849
loss: 0.9909472465515137,grad_norm: 0.944159696736209, iteration: 243850
loss: 0.9855838418006897,grad_norm: 0.7711984800151689, iteration: 243851
loss: 1.0210732221603394,grad_norm: 0.9054286325004154, iteration: 243852
loss: 0.9928154945373535,grad_norm: 0.9999990645450823, iteration: 243853
loss: 1.0081568956375122,grad_norm: 0.9999990918310094, iteration: 243854
loss: 1.0244029760360718,grad_norm: 0.8470246551606102, iteration: 243855
loss: 0.9811381697654724,grad_norm: 0.87848609417323, iteration: 243856
loss: 1.0304312705993652,grad_norm: 0.9431879085088694, iteration: 243857
loss: 1.0322265625,grad_norm: 0.9999995022969637, iteration: 243858
loss: 1.0246376991271973,grad_norm: 0.9999999489863519, iteration: 243859
loss: 1.0022261142730713,grad_norm: 0.7802698605600904, iteration: 243860
loss: 1.0031729936599731,grad_norm: 0.8471505063632686, iteration: 243861
loss: 0.9996832013130188,grad_norm: 0.9999991637920861, iteration: 243862
loss: 0.9997102618217468,grad_norm: 0.9999990264683433, iteration: 243863
loss: 1.0152924060821533,grad_norm: 0.9346872217411223, iteration: 243864
loss: 0.9721468687057495,grad_norm: 0.8579074278353572, iteration: 243865
loss: 0.9895464181900024,grad_norm: 0.7908802623232631, iteration: 243866
loss: 0.9594194293022156,grad_norm: 0.9431315193466592, iteration: 243867
loss: 1.0294452905654907,grad_norm: 0.8447824168049454, iteration: 243868
loss: 0.9632218480110168,grad_norm: 0.8803830385881757, iteration: 243869
loss: 0.9868674874305725,grad_norm: 0.8724407344220455, iteration: 243870
loss: 1.0570825338363647,grad_norm: 0.9628935203202974, iteration: 243871
loss: 0.9889857769012451,grad_norm: 0.920621311099275, iteration: 243872
loss: 0.9977086186408997,grad_norm: 0.9223194194395821, iteration: 243873
loss: 1.0040141344070435,grad_norm: 0.9026567993040672, iteration: 243874
loss: 1.0117098093032837,grad_norm: 0.9999996438652123, iteration: 243875
loss: 1.027418851852417,grad_norm: 0.9164939294309253, iteration: 243876
loss: 1.016802430152893,grad_norm: 0.7976784077163176, iteration: 243877
loss: 0.9623029828071594,grad_norm: 0.9784679655313799, iteration: 243878
loss: 1.0091842412948608,grad_norm: 0.9999991726925419, iteration: 243879
loss: 1.070468783378601,grad_norm: 0.9999992844136146, iteration: 243880
loss: 1.007953405380249,grad_norm: 0.9999995175198696, iteration: 243881
loss: 1.0060278177261353,grad_norm: 0.9999990513905955, iteration: 243882
loss: 0.9717671275138855,grad_norm: 0.9712864910215695, iteration: 243883
loss: 1.084542989730835,grad_norm: 0.9999991392496469, iteration: 243884
loss: 0.9837105870246887,grad_norm: 0.7715625300525422, iteration: 243885
loss: 1.0410505533218384,grad_norm: 0.9999990033533289, iteration: 243886
loss: 1.0641225576400757,grad_norm: 0.9999993325683539, iteration: 243887
loss: 0.982588529586792,grad_norm: 0.9999992491465776, iteration: 243888
loss: 1.0504738092422485,grad_norm: 0.9999991658137297, iteration: 243889
loss: 1.014374852180481,grad_norm: 0.7010020119637305, iteration: 243890
loss: 1.0122383832931519,grad_norm: 0.9657832364213446, iteration: 243891
loss: 1.0012614727020264,grad_norm: 0.896325194791436, iteration: 243892
loss: 0.9868946075439453,grad_norm: 0.8612768277021018, iteration: 243893
loss: 1.07687509059906,grad_norm: 0.9999994250815292, iteration: 243894
loss: 0.9831303358078003,grad_norm: 0.9999994089343317, iteration: 243895
loss: 1.0139392614364624,grad_norm: 0.8952799584360112, iteration: 243896
loss: 0.9682339429855347,grad_norm: 0.869456970130961, iteration: 243897
loss: 1.0415722131729126,grad_norm: 0.9999991326255104, iteration: 243898
loss: 1.1132909059524536,grad_norm: 0.9999997438432586, iteration: 243899
loss: 1.2033907175064087,grad_norm: 0.9999993863899067, iteration: 243900
loss: 1.1240993738174438,grad_norm: 0.9999991508074741, iteration: 243901
loss: 1.0949896574020386,grad_norm: 0.9151275135123816, iteration: 243902
loss: 0.9570930004119873,grad_norm: 0.9821379623573554, iteration: 243903
loss: 1.0435161590576172,grad_norm: 0.9999999354081209, iteration: 243904
loss: 1.124119520187378,grad_norm: 0.9999998391278668, iteration: 243905
loss: 1.1314488649368286,grad_norm: 0.9999999257787426, iteration: 243906
loss: 1.1481249332427979,grad_norm: 0.9999993886038756, iteration: 243907
loss: 1.091105341911316,grad_norm: 0.9264680856705055, iteration: 243908
loss: 0.9948301315307617,grad_norm: 0.9999996390126207, iteration: 243909
loss: 1.040816068649292,grad_norm: 0.9999991233029443, iteration: 243910
loss: 1.0154447555541992,grad_norm: 0.8213448211152089, iteration: 243911
loss: 1.0291192531585693,grad_norm: 0.8537775417519837, iteration: 243912
loss: 1.0461556911468506,grad_norm: 0.7382606691689267, iteration: 243913
loss: 1.1371513605117798,grad_norm: 0.9999991925634621, iteration: 243914
loss: 1.047372579574585,grad_norm: 0.9999993293940417, iteration: 243915
loss: 1.0103403329849243,grad_norm: 0.8995858528741205, iteration: 243916
loss: 0.9805638790130615,grad_norm: 0.8764475544556047, iteration: 243917
loss: 1.0298405885696411,grad_norm: 0.7520817160940121, iteration: 243918
loss: 1.1451940536499023,grad_norm: 0.9999996738748574, iteration: 243919
loss: 1.02418851852417,grad_norm: 0.9999992904321707, iteration: 243920
loss: 1.0028189420700073,grad_norm: 0.89829064937612, iteration: 243921
loss: 1.003196120262146,grad_norm: 0.9127602786556086, iteration: 243922
loss: 1.009293556213379,grad_norm: 0.8443941540630899, iteration: 243923
loss: 0.9802153706550598,grad_norm: 0.9212720150220292, iteration: 243924
loss: 1.0363855361938477,grad_norm: 0.9635604723335324, iteration: 243925
loss: 1.0141685009002686,grad_norm: 0.945794600732644, iteration: 243926
loss: 0.9783264398574829,grad_norm: 0.8171805441288787, iteration: 243927
loss: 1.067480444908142,grad_norm: 0.9999991786744087, iteration: 243928
loss: 1.3232393264770508,grad_norm: 0.9999997682233795, iteration: 243929
loss: 1.0079103708267212,grad_norm: 0.9072533557270471, iteration: 243930
loss: 0.9878955483436584,grad_norm: 0.9300973817400586, iteration: 243931
loss: 1.060894250869751,grad_norm: 0.9312846590221894, iteration: 243932
loss: 0.9892542958259583,grad_norm: 0.9379511597517481, iteration: 243933
loss: 1.0023752450942993,grad_norm: 0.8215790393299145, iteration: 243934
loss: 1.029953122138977,grad_norm: 0.9999996399495554, iteration: 243935
loss: 1.0604223012924194,grad_norm: 0.9163370296809816, iteration: 243936
loss: 0.9899912476539612,grad_norm: 0.9433139980177415, iteration: 243937
loss: 1.0860131978988647,grad_norm: 0.983239607761982, iteration: 243938
loss: 1.1339291334152222,grad_norm: 0.949877121260212, iteration: 243939
loss: 1.0516833066940308,grad_norm: 0.9999994927931496, iteration: 243940
loss: 1.1130082607269287,grad_norm: 0.9999995832641241, iteration: 243941
loss: 0.9777835011482239,grad_norm: 0.9551429619359878, iteration: 243942
loss: 1.0211390256881714,grad_norm: 0.9571913650477883, iteration: 243943
loss: 1.0039085149765015,grad_norm: 0.9999995246350035, iteration: 243944
loss: 0.9960702657699585,grad_norm: 0.9584603790583184, iteration: 243945
loss: 0.9944296479225159,grad_norm: 0.8120204134147879, iteration: 243946
loss: 1.0319775342941284,grad_norm: 0.8334694722229296, iteration: 243947
loss: 0.9898188710212708,grad_norm: 0.9999993125413715, iteration: 243948
loss: 1.1242529153823853,grad_norm: 0.997292070896122, iteration: 243949
loss: 1.013633370399475,grad_norm: 0.795622775143475, iteration: 243950
loss: 0.9851576089859009,grad_norm: 0.7907295545222116, iteration: 243951
loss: 0.9629473090171814,grad_norm: 0.8405090600889434, iteration: 243952
loss: 0.9756126999855042,grad_norm: 0.9999992884090076, iteration: 243953
loss: 0.984196126461029,grad_norm: 0.9301511607048631, iteration: 243954
loss: 1.0490387678146362,grad_norm: 0.7938105923692184, iteration: 243955
loss: 1.2450740337371826,grad_norm: 0.9999997051915046, iteration: 243956
loss: 0.9848092794418335,grad_norm: 0.99999963480912, iteration: 243957
loss: 1.0338293313980103,grad_norm: 0.9999999643116995, iteration: 243958
loss: 0.9825744032859802,grad_norm: 0.9538587776452807, iteration: 243959
loss: 1.0202280282974243,grad_norm: 0.9999991787432251, iteration: 243960
loss: 1.168845772743225,grad_norm: 0.9999998683089022, iteration: 243961
loss: 0.9827332496643066,grad_norm: 0.8907986094768149, iteration: 243962
loss: 0.9998918771743774,grad_norm: 0.815538232074545, iteration: 243963
loss: 1.20181143283844,grad_norm: 0.9999999612718802, iteration: 243964
loss: 0.9954894781112671,grad_norm: 0.9844478653350474, iteration: 243965
loss: 1.0116982460021973,grad_norm: 0.7589767565697463, iteration: 243966
loss: 1.327392578125,grad_norm: 0.9999996998073443, iteration: 243967
loss: 0.9919739961624146,grad_norm: 0.9999990190128215, iteration: 243968
loss: 0.9852301478385925,grad_norm: 0.999999146604654, iteration: 243969
loss: 1.0087827444076538,grad_norm: 0.7848099353326038, iteration: 243970
loss: 1.0017377138137817,grad_norm: 0.9999990331770855, iteration: 243971
loss: 0.9899812340736389,grad_norm: 0.8012719649388415, iteration: 243972
loss: 0.9922866225242615,grad_norm: 0.8970565231352327, iteration: 243973
loss: 0.999893307685852,grad_norm: 0.9999990302120856, iteration: 243974
loss: 1.0086297988891602,grad_norm: 0.7885741294108417, iteration: 243975
loss: 1.0072211027145386,grad_norm: 0.8736050763896469, iteration: 243976
loss: 0.9951176643371582,grad_norm: 0.7627622791840724, iteration: 243977
loss: 0.9913198351860046,grad_norm: 0.9746517642276846, iteration: 243978
loss: 0.9981516599655151,grad_norm: 0.8623177694679578, iteration: 243979
loss: 0.984197199344635,grad_norm: 0.99999920151822, iteration: 243980
loss: 1.059451699256897,grad_norm: 0.8999779710648294, iteration: 243981
loss: 1.1599642038345337,grad_norm: 0.9999997671026936, iteration: 243982
loss: 1.0327749252319336,grad_norm: 0.9501436230351338, iteration: 243983
loss: 0.9551894664764404,grad_norm: 0.9999998903460574, iteration: 243984
loss: 1.032503604888916,grad_norm: 0.8567410993276712, iteration: 243985
loss: 1.0052189826965332,grad_norm: 0.8708693130389582, iteration: 243986
loss: 0.9609061479568481,grad_norm: 0.980773588969331, iteration: 243987
loss: 0.9824466705322266,grad_norm: 0.9455466183450727, iteration: 243988
loss: 1.0436103343963623,grad_norm: 0.9999990079978593, iteration: 243989
loss: 1.0372296571731567,grad_norm: 0.8571908585419615, iteration: 243990
loss: 1.0005476474761963,grad_norm: 0.8828666147725264, iteration: 243991
loss: 0.9862895607948303,grad_norm: 0.880209094529161, iteration: 243992
loss: 1.188185453414917,grad_norm: 0.9999992348358839, iteration: 243993
loss: 1.0107405185699463,grad_norm: 0.9120297478589561, iteration: 243994
loss: 1.0250916481018066,grad_norm: 0.8901275106601871, iteration: 243995
loss: 0.9895736575126648,grad_norm: 0.9907010814300131, iteration: 243996
loss: 1.0256506204605103,grad_norm: 0.9999992854425902, iteration: 243997
loss: 0.9874998927116394,grad_norm: 0.7434592680086237, iteration: 243998
loss: 0.989372730255127,grad_norm: 0.8638322335904451, iteration: 243999
loss: 1.088616967201233,grad_norm: 0.9999991468911696, iteration: 244000
loss: 1.0373460054397583,grad_norm: 0.8422737885690913, iteration: 244001
loss: 1.0072754621505737,grad_norm: 0.7443026353979316, iteration: 244002
loss: 1.0419741868972778,grad_norm: 0.9296609853908855, iteration: 244003
loss: 1.0154972076416016,grad_norm: 0.7569673975857484, iteration: 244004
loss: 1.029310941696167,grad_norm: 0.8557576389463194, iteration: 244005
loss: 0.9681445956230164,grad_norm: 0.9422938454974434, iteration: 244006
loss: 0.9481709003448486,grad_norm: 0.8126370955941167, iteration: 244007
loss: 0.9736733436584473,grad_norm: 0.9470506742413608, iteration: 244008
loss: 1.031935453414917,grad_norm: 0.8178742650907861, iteration: 244009
loss: 1.0077314376831055,grad_norm: 0.8517005691334801, iteration: 244010
loss: 0.9709365367889404,grad_norm: 0.999999088821574, iteration: 244011
loss: 0.9954584240913391,grad_norm: 0.9361565144104306, iteration: 244012
loss: 1.0065799951553345,grad_norm: 0.9999990605483204, iteration: 244013
loss: 1.0069148540496826,grad_norm: 0.8988383493126473, iteration: 244014
loss: 0.9490096569061279,grad_norm: 0.9554537779449264, iteration: 244015
loss: 1.022925615310669,grad_norm: 0.9999990244869429, iteration: 244016
loss: 1.0265069007873535,grad_norm: 0.9999990833280709, iteration: 244017
loss: 1.0047277212142944,grad_norm: 0.80680970005616, iteration: 244018
loss: 0.9718268513679504,grad_norm: 0.9999994374252197, iteration: 244019
loss: 0.9556325078010559,grad_norm: 0.8340238021741597, iteration: 244020
loss: 0.987509548664093,grad_norm: 0.8032214436180525, iteration: 244021
loss: 1.0046309232711792,grad_norm: 0.9999989337439922, iteration: 244022
loss: 1.012813687324524,grad_norm: 0.9203965014540831, iteration: 244023
loss: 0.9877018332481384,grad_norm: 0.9157258059088454, iteration: 244024
loss: 1.000647783279419,grad_norm: 0.8783209803389572, iteration: 244025
loss: 1.0253065824508667,grad_norm: 0.9801238057329653, iteration: 244026
loss: 1.0327236652374268,grad_norm: 0.7825348904599798, iteration: 244027
loss: 1.0301488637924194,grad_norm: 0.882068268137984, iteration: 244028
loss: 1.0211910009384155,grad_norm: 0.8899463884059287, iteration: 244029
loss: 0.9753983616828918,grad_norm: 0.9169300543637067, iteration: 244030
loss: 0.9912678599357605,grad_norm: 0.999999823239486, iteration: 244031
loss: 0.9903314709663391,grad_norm: 0.9707333349669968, iteration: 244032
loss: 0.9882731437683105,grad_norm: 0.7552640654784398, iteration: 244033
loss: 0.9698276519775391,grad_norm: 0.9523281177214594, iteration: 244034
loss: 1.109094500541687,grad_norm: 0.9999998718057683, iteration: 244035
loss: 1.0193250179290771,grad_norm: 0.9999990798419294, iteration: 244036
loss: 0.9847363829612732,grad_norm: 0.8547848568042669, iteration: 244037
loss: 0.9922852516174316,grad_norm: 0.9328108165699434, iteration: 244038
loss: 1.004390835762024,grad_norm: 0.9999989932486841, iteration: 244039
loss: 0.9914230704307556,grad_norm: 0.9999996144408962, iteration: 244040
loss: 1.0046343803405762,grad_norm: 0.8477336833665478, iteration: 244041
loss: 1.012765884399414,grad_norm: 0.9999991712748668, iteration: 244042
loss: 0.9923567771911621,grad_norm: 0.9309621756128158, iteration: 244043
loss: 0.9754238128662109,grad_norm: 0.8612734276276899, iteration: 244044
loss: 0.9992936253547668,grad_norm: 0.7859377369143699, iteration: 244045
loss: 1.0015758275985718,grad_norm: 0.8285287993119277, iteration: 244046
loss: 0.9627619385719299,grad_norm: 0.9580159967022507, iteration: 244047
loss: 0.9849365949630737,grad_norm: 0.9999991632467576, iteration: 244048
loss: 1.0029710531234741,grad_norm: 0.8590213518697088, iteration: 244049
loss: 1.0722713470458984,grad_norm: 0.9999996688862072, iteration: 244050
loss: 0.9856655597686768,grad_norm: 0.9999990331281908, iteration: 244051
loss: 1.0979880094528198,grad_norm: 0.9999991257494368, iteration: 244052
loss: 0.9884808659553528,grad_norm: 0.9815979136446045, iteration: 244053
loss: 0.9829871654510498,grad_norm: 0.9999989672034038, iteration: 244054
loss: 1.0285567045211792,grad_norm: 0.7358094930361089, iteration: 244055
loss: 1.0142838954925537,grad_norm: 0.9184615293900132, iteration: 244056
loss: 0.999057948589325,grad_norm: 0.9390818115018911, iteration: 244057
loss: 1.1175066232681274,grad_norm: 0.9999994401783255, iteration: 244058
loss: 0.9966546893119812,grad_norm: 0.920480470857208, iteration: 244059
loss: 1.015834927558899,grad_norm: 0.9999990072055253, iteration: 244060
loss: 0.9779545664787292,grad_norm: 0.8746462461670814, iteration: 244061
loss: 0.9826003313064575,grad_norm: 0.9178439074841106, iteration: 244062
loss: 1.184647560119629,grad_norm: 0.999999854017096, iteration: 244063
loss: 0.9935819506645203,grad_norm: 0.9167290447546561, iteration: 244064
loss: 0.9773634672164917,grad_norm: 0.9119650363744884, iteration: 244065
loss: 1.0683223009109497,grad_norm: 0.9934495385649416, iteration: 244066
loss: 1.0378849506378174,grad_norm: 0.924088304918505, iteration: 244067
loss: 1.0178687572479248,grad_norm: 0.907535885614355, iteration: 244068
loss: 1.0170284509658813,grad_norm: 0.8394757381801184, iteration: 244069
loss: 1.0025932788848877,grad_norm: 0.9650913729906995, iteration: 244070
loss: 1.0945435762405396,grad_norm: 0.9999998696171482, iteration: 244071
loss: 1.0160616636276245,grad_norm: 0.8183387994146772, iteration: 244072
loss: 0.9820985198020935,grad_norm: 0.7610876401803052, iteration: 244073
loss: 1.111847162246704,grad_norm: 0.9043521751362458, iteration: 244074
loss: 0.9810289144515991,grad_norm: 0.8721198089025195, iteration: 244075
loss: 1.0227646827697754,grad_norm: 0.899330978283198, iteration: 244076
loss: 0.9678824543952942,grad_norm: 0.9999990302305253, iteration: 244077
loss: 1.0217890739440918,grad_norm: 0.9999994366685905, iteration: 244078
loss: 1.0408886671066284,grad_norm: 0.9999992035434261, iteration: 244079
loss: 1.0164451599121094,grad_norm: 0.999999158042627, iteration: 244080
loss: 1.165242075920105,grad_norm: 0.999999590821539, iteration: 244081
loss: 1.0215142965316772,grad_norm: 0.999999291389703, iteration: 244082
loss: 0.9990836381912231,grad_norm: 0.9999991433392482, iteration: 244083
loss: 1.0155214071273804,grad_norm: 0.8289132469437123, iteration: 244084
loss: 1.0147886276245117,grad_norm: 0.8837425980797177, iteration: 244085
loss: 1.0646693706512451,grad_norm: 0.9863518749903875, iteration: 244086
loss: 1.012030839920044,grad_norm: 0.8719049961643437, iteration: 244087
loss: 1.0211753845214844,grad_norm: 0.9999991934048535, iteration: 244088
loss: 1.0111589431762695,grad_norm: 0.7581338606609336, iteration: 244089
loss: 1.0261226892471313,grad_norm: 0.9999990795556651, iteration: 244090
loss: 1.029406189918518,grad_norm: 0.9072404281542901, iteration: 244091
loss: 1.0003920793533325,grad_norm: 0.9999991175252345, iteration: 244092
loss: 0.9680310487747192,grad_norm: 0.8948928764555641, iteration: 244093
loss: 1.01834237575531,grad_norm: 0.9503133304587663, iteration: 244094
loss: 1.026178002357483,grad_norm: 0.999999130178975, iteration: 244095
loss: 1.03087317943573,grad_norm: 0.9999997917163772, iteration: 244096
loss: 1.0444416999816895,grad_norm: 0.9999997364114974, iteration: 244097
loss: 1.0310834646224976,grad_norm: 0.8802902275596326, iteration: 244098
loss: 1.0096460580825806,grad_norm: 0.8963763079515457, iteration: 244099
loss: 0.967034637928009,grad_norm: 0.9618432388006121, iteration: 244100
loss: 1.07542085647583,grad_norm: 0.9999992101793999, iteration: 244101
loss: 1.0024763345718384,grad_norm: 0.7629927966482755, iteration: 244102
loss: 1.1748511791229248,grad_norm: 0.9999994425571599, iteration: 244103
loss: 0.995604932308197,grad_norm: 0.9645335767058026, iteration: 244104
loss: 1.0130592584609985,grad_norm: 0.9999992036092243, iteration: 244105
loss: 1.010134220123291,grad_norm: 0.8831757939567517, iteration: 244106
loss: 1.0063951015472412,grad_norm: 0.832759553454463, iteration: 244107
loss: 1.0050309896469116,grad_norm: 0.9999990308393164, iteration: 244108
loss: 0.985032320022583,grad_norm: 0.7546628355920676, iteration: 244109
loss: 0.9875893592834473,grad_norm: 0.8752927436965868, iteration: 244110
loss: 1.0308974981307983,grad_norm: 0.9999998799623265, iteration: 244111
loss: 0.9962536692619324,grad_norm: 0.7107123689534375, iteration: 244112
loss: 0.9987831711769104,grad_norm: 0.8094346752717907, iteration: 244113
loss: 1.091370701789856,grad_norm: 0.9999991175141337, iteration: 244114
loss: 0.9999170303344727,grad_norm: 0.8947245884833295, iteration: 244115
loss: 1.1773362159729004,grad_norm: 0.999999511329051, iteration: 244116
loss: 1.0202125310897827,grad_norm: 0.8432041137681087, iteration: 244117
loss: 1.0162969827651978,grad_norm: 0.8989798677305614, iteration: 244118
loss: 1.0463825464248657,grad_norm: 0.7828167102141882, iteration: 244119
loss: 1.2391682863235474,grad_norm: 0.9999996842419353, iteration: 244120
loss: 1.0157297849655151,grad_norm: 0.7969367786408503, iteration: 244121
loss: 0.9951198697090149,grad_norm: 0.9624288170726377, iteration: 244122
loss: 0.9997017979621887,grad_norm: 0.9999988801642267, iteration: 244123
loss: 1.0896743535995483,grad_norm: 0.9999997740187803, iteration: 244124
loss: 1.0162469148635864,grad_norm: 0.9999990933670668, iteration: 244125
loss: 1.0689728260040283,grad_norm: 0.9999994643618635, iteration: 244126
loss: 1.0820237398147583,grad_norm: 0.8567526147078349, iteration: 244127
loss: 0.9949375987052917,grad_norm: 0.9866164675028136, iteration: 244128
loss: 1.0537548065185547,grad_norm: 0.9999991568707457, iteration: 244129
loss: 1.4045101404190063,grad_norm: 1.0000000148224995, iteration: 244130
loss: 1.044003963470459,grad_norm: 0.9999991718677764, iteration: 244131
loss: 1.4394328594207764,grad_norm: 0.9999997281532369, iteration: 244132
loss: 1.0372304916381836,grad_norm: 0.9380866126260091, iteration: 244133
loss: 1.0005906820297241,grad_norm: 0.9999991401058423, iteration: 244134
loss: 0.9856040477752686,grad_norm: 0.9863751195207966, iteration: 244135
loss: 0.9508978724479675,grad_norm: 0.9217636838897901, iteration: 244136
loss: 1.1766794919967651,grad_norm: 0.9999992009004175, iteration: 244137
loss: 1.1045786142349243,grad_norm: 0.999999350816997, iteration: 244138
loss: 1.0270732641220093,grad_norm: 0.9109204438630647, iteration: 244139
loss: 1.0007308721542358,grad_norm: 0.9999989462547434, iteration: 244140
loss: 1.0312526226043701,grad_norm: 0.9822881279736501, iteration: 244141
loss: 0.9902026057243347,grad_norm: 0.999999922117157, iteration: 244142
loss: 1.058921217918396,grad_norm: 0.9999994163331998, iteration: 244143
loss: 1.197321891784668,grad_norm: 0.9999998161335926, iteration: 244144
loss: 0.991779088973999,grad_norm: 0.8911595201396716, iteration: 244145
loss: 1.126807451248169,grad_norm: 0.9999998931771338, iteration: 244146
loss: 0.9966347813606262,grad_norm: 0.8396661659475073, iteration: 244147
loss: 1.0509968996047974,grad_norm: 0.9999990657287208, iteration: 244148
loss: 0.9821417927742004,grad_norm: 0.7850040522647243, iteration: 244149
loss: 0.9792318940162659,grad_norm: 0.9999991227796383, iteration: 244150
loss: 1.0211403369903564,grad_norm: 0.8406809577418901, iteration: 244151
loss: 1.0024429559707642,grad_norm: 0.9496433814202311, iteration: 244152
loss: 1.0164918899536133,grad_norm: 0.9999999407080867, iteration: 244153
loss: 0.9807416200637817,grad_norm: 0.7337307774611338, iteration: 244154
loss: 1.0533367395401,grad_norm: 0.9214039965310477, iteration: 244155
loss: 0.9954246282577515,grad_norm: 0.9008047352394726, iteration: 244156
loss: 1.0076169967651367,grad_norm: 0.9996565954265322, iteration: 244157
loss: 1.0371376276016235,grad_norm: 0.9999996553288609, iteration: 244158
loss: 0.9994143843650818,grad_norm: 0.9999992010554647, iteration: 244159
loss: 1.0104929208755493,grad_norm: 0.866751337344809, iteration: 244160
loss: 1.005359172821045,grad_norm: 0.8972305576085742, iteration: 244161
loss: 1.1077916622161865,grad_norm: 0.9999991261444239, iteration: 244162
loss: 1.0648512840270996,grad_norm: 0.9196525630934287, iteration: 244163
loss: 1.049531102180481,grad_norm: 0.999999122420507, iteration: 244164
loss: 0.9873831868171692,grad_norm: 0.8441540610354765, iteration: 244165
loss: 1.1812219619750977,grad_norm: 1.0000000017396737, iteration: 244166
loss: 1.0066486597061157,grad_norm: 0.8306061379728316, iteration: 244167
loss: 0.9846091270446777,grad_norm: 0.9999989111592853, iteration: 244168
loss: 1.0299350023269653,grad_norm: 0.9014865013012672, iteration: 244169
loss: 0.9817366003990173,grad_norm: 0.8812887408052534, iteration: 244170
loss: 1.1161571741104126,grad_norm: 0.9999996060758795, iteration: 244171
loss: 1.0034306049346924,grad_norm: 0.8272271146393974, iteration: 244172
loss: 1.0910543203353882,grad_norm: 0.9232585916594775, iteration: 244173
loss: 1.1522608995437622,grad_norm: 0.9999997373086563, iteration: 244174
loss: 1.1033861637115479,grad_norm: 0.9999996648872663, iteration: 244175
loss: 1.0756332874298096,grad_norm: 0.9999999471844397, iteration: 244176
loss: 1.0341928005218506,grad_norm: 0.9999993898765303, iteration: 244177
loss: 1.0059149265289307,grad_norm: 0.9999992212016453, iteration: 244178
loss: 0.9674002528190613,grad_norm: 0.9646288512868464, iteration: 244179
loss: 1.028886318206787,grad_norm: 0.8475777893406913, iteration: 244180
loss: 1.0332844257354736,grad_norm: 0.9999996726288671, iteration: 244181
loss: 1.0286155939102173,grad_norm: 0.9089343943199651, iteration: 244182
loss: 0.9737861156463623,grad_norm: 0.8340308134257383, iteration: 244183
loss: 1.2281010150909424,grad_norm: 0.9999995948288118, iteration: 244184
loss: 0.9654266834259033,grad_norm: 0.9999990746387755, iteration: 244185
loss: 1.003310203552246,grad_norm: 0.9846772709189028, iteration: 244186
loss: 1.022032618522644,grad_norm: 0.8802709606479233, iteration: 244187
loss: 1.0432153940200806,grad_norm: 0.9999993335984038, iteration: 244188
loss: 1.0257395505905151,grad_norm: 0.8931874949541174, iteration: 244189
loss: 1.003043293952942,grad_norm: 0.9999994454787804, iteration: 244190
loss: 0.9724048972129822,grad_norm: 0.8622718118887109, iteration: 244191
loss: 1.0050978660583496,grad_norm: 0.9999991581356855, iteration: 244192
loss: 0.9867231845855713,grad_norm: 0.9007465496209407, iteration: 244193
loss: 0.9981813430786133,grad_norm: 0.8207245378481586, iteration: 244194
loss: 1.0137290954589844,grad_norm: 0.999999065292066, iteration: 244195
loss: 0.9758880734443665,grad_norm: 0.999999506114414, iteration: 244196
loss: 0.9691426157951355,grad_norm: 0.8338249650132363, iteration: 244197
loss: 1.0480974912643433,grad_norm: 0.9999993115750972, iteration: 244198
loss: 0.9779183268547058,grad_norm: 0.9445828364628291, iteration: 244199
loss: 1.0022255182266235,grad_norm: 0.82393837782714, iteration: 244200
loss: 1.027474045753479,grad_norm: 0.9999991608159942, iteration: 244201
loss: 1.0103569030761719,grad_norm: 0.9582262269804105, iteration: 244202
loss: 1.0260099172592163,grad_norm: 0.9999990872175097, iteration: 244203
loss: 0.9953117966651917,grad_norm: 0.8126005841965421, iteration: 244204
loss: 1.0212242603302002,grad_norm: 0.9952256018423322, iteration: 244205
loss: 1.0039570331573486,grad_norm: 0.9999998567277897, iteration: 244206
loss: 1.006535530090332,grad_norm: 0.8633108346580828, iteration: 244207
loss: 1.0117356777191162,grad_norm: 0.96483315922591, iteration: 244208
loss: 1.0314500331878662,grad_norm: 0.8593448658687811, iteration: 244209
loss: 1.0079149007797241,grad_norm: 0.9999992719391386, iteration: 244210
loss: 1.031891107559204,grad_norm: 0.9999993030382807, iteration: 244211
loss: 1.068648099899292,grad_norm: 0.9999996928302914, iteration: 244212
loss: 1.0093916654586792,grad_norm: 0.9999990282656444, iteration: 244213
loss: 1.0244660377502441,grad_norm: 0.9220670907208626, iteration: 244214
loss: 0.9923001527786255,grad_norm: 0.8439862973090402, iteration: 244215
loss: 1.2096830606460571,grad_norm: 0.9999998826314255, iteration: 244216
loss: 1.2613739967346191,grad_norm: 0.9999992405480581, iteration: 244217
loss: 0.9854843020439148,grad_norm: 0.9365548320170031, iteration: 244218
loss: 1.0005455017089844,grad_norm: 0.8529420287177125, iteration: 244219
loss: 1.0120234489440918,grad_norm: 0.9857322388462871, iteration: 244220
loss: 0.9891543388366699,grad_norm: 0.8490846613217037, iteration: 244221
loss: 0.9923193454742432,grad_norm: 0.9999992809204439, iteration: 244222
loss: 1.0098440647125244,grad_norm: 0.875379109039572, iteration: 244223
loss: 0.9935317039489746,grad_norm: 0.9507555800915443, iteration: 244224
loss: 0.9612656831741333,grad_norm: 0.9999991492528618, iteration: 244225
loss: 0.9878020286560059,grad_norm: 0.9017412564262112, iteration: 244226
loss: 1.0764397382736206,grad_norm: 0.8822116303273836, iteration: 244227
loss: 1.0001139640808105,grad_norm: 0.8988941036310001, iteration: 244228
loss: 0.9848344326019287,grad_norm: 0.9999990371225744, iteration: 244229
loss: 0.9808345437049866,grad_norm: 0.8670768749231281, iteration: 244230
loss: 1.0030840635299683,grad_norm: 0.9999990834584679, iteration: 244231
loss: 1.000773549079895,grad_norm: 0.9999990452505344, iteration: 244232
loss: 0.9994196891784668,grad_norm: 0.7360979202360338, iteration: 244233
loss: 0.9839229583740234,grad_norm: 0.9190924588305242, iteration: 244234
loss: 0.982275664806366,grad_norm: 0.7148720698167798, iteration: 244235
loss: 0.9905948638916016,grad_norm: 0.8712621176273945, iteration: 244236
loss: 1.022350549697876,grad_norm: 0.9999990686787512, iteration: 244237
loss: 0.9603896737098694,grad_norm: 0.8307564230130585, iteration: 244238
loss: 1.0112371444702148,grad_norm: 0.8876356726473522, iteration: 244239
loss: 0.972696840763092,grad_norm: 0.81507949602446, iteration: 244240
loss: 0.9957991242408752,grad_norm: 0.9999992375367984, iteration: 244241
loss: 0.9863560795783997,grad_norm: 0.836782255987886, iteration: 244242
loss: 0.9885389804840088,grad_norm: 0.9999992061991922, iteration: 244243
loss: 1.0750685930252075,grad_norm: 0.9999993801993875, iteration: 244244
loss: 0.9810904860496521,grad_norm: 0.9691595294065184, iteration: 244245
loss: 0.9935755729675293,grad_norm: 0.9743346853063306, iteration: 244246
loss: 1.024960994720459,grad_norm: 0.9170982680706714, iteration: 244247
loss: 1.0203171968460083,grad_norm: 0.7469911958025772, iteration: 244248
loss: 1.0214492082595825,grad_norm: 0.8361873903396282, iteration: 244249
loss: 0.9865463376045227,grad_norm: 0.9865528654372386, iteration: 244250
loss: 0.9894284009933472,grad_norm: 0.9999991422872965, iteration: 244251
loss: 1.1546639204025269,grad_norm: 0.9999997848710492, iteration: 244252
loss: 1.0247712135314941,grad_norm: 0.8226754039128175, iteration: 244253
loss: 1.0083997249603271,grad_norm: 0.9999990269396488, iteration: 244254
loss: 1.026573657989502,grad_norm: 0.7421521346157165, iteration: 244255
loss: 1.0105724334716797,grad_norm: 0.9999992750190406, iteration: 244256
loss: 0.9617599248886108,grad_norm: 0.908076680198181, iteration: 244257
loss: 0.989806592464447,grad_norm: 0.9644367232465899, iteration: 244258
loss: 1.0346591472625732,grad_norm: 0.928774618708607, iteration: 244259
loss: 1.0059804916381836,grad_norm: 0.9559329941908106, iteration: 244260
loss: 0.9773593544960022,grad_norm: 0.870271626666169, iteration: 244261
loss: 0.9952225089073181,grad_norm: 0.9999991167917974, iteration: 244262
loss: 1.0085957050323486,grad_norm: 0.999999401663123, iteration: 244263
loss: 1.0079442262649536,grad_norm: 0.9463388683752646, iteration: 244264
loss: 1.0372729301452637,grad_norm: 0.9999995234536839, iteration: 244265
loss: 1.0192168951034546,grad_norm: 0.9331521522744943, iteration: 244266
loss: 1.0276875495910645,grad_norm: 0.9859423763253589, iteration: 244267
loss: 0.9925336837768555,grad_norm: 0.8051559621391817, iteration: 244268
loss: 1.0562413930892944,grad_norm: 0.9999990795056345, iteration: 244269
loss: 0.9952043294906616,grad_norm: 0.8328663061996198, iteration: 244270
loss: 1.0247186422348022,grad_norm: 0.999999152937434, iteration: 244271
loss: 0.9850874543190002,grad_norm: 0.9384504322985828, iteration: 244272
loss: 0.9726716876029968,grad_norm: 0.9999992554636152, iteration: 244273
loss: 0.9537218809127808,grad_norm: 0.9999992188071473, iteration: 244274
loss: 0.986497700214386,grad_norm: 0.8562128410703332, iteration: 244275
loss: 1.0601156949996948,grad_norm: 0.9999989760825101, iteration: 244276
loss: 0.9996457695960999,grad_norm: 0.9667279436675973, iteration: 244277
loss: 1.1368290185928345,grad_norm: 0.9999997878596213, iteration: 244278
loss: 0.9664368629455566,grad_norm: 0.9999991530535327, iteration: 244279
loss: 1.0466854572296143,grad_norm: 0.9999999783438168, iteration: 244280
loss: 1.1357944011688232,grad_norm: 0.9322618203912547, iteration: 244281
loss: 1.019323468208313,grad_norm: 0.8299397952352614, iteration: 244282
loss: 1.0552722215652466,grad_norm: 0.9999991789471312, iteration: 244283
loss: 1.0436182022094727,grad_norm: 0.9999990652187308, iteration: 244284
loss: 1.0102012157440186,grad_norm: 0.9999990260501888, iteration: 244285
loss: 0.9964156746864319,grad_norm: 0.8075598670271456, iteration: 244286
loss: 0.9980605840682983,grad_norm: 0.9856432995855928, iteration: 244287
loss: 0.9837023615837097,grad_norm: 0.9185702077681264, iteration: 244288
loss: 0.9662577509880066,grad_norm: 0.9575356308062869, iteration: 244289
loss: 1.0421406030654907,grad_norm: 0.8324532349598689, iteration: 244290
loss: 1.0198817253112793,grad_norm: 0.8834180843173005, iteration: 244291
loss: 1.0355514287948608,grad_norm: 0.9999995521024589, iteration: 244292
loss: 1.0018552541732788,grad_norm: 0.8687920340559885, iteration: 244293
loss: 1.0000463724136353,grad_norm: 0.9999989415224011, iteration: 244294
loss: 0.9749399423599243,grad_norm: 0.8622672760437766, iteration: 244295
loss: 0.9946278929710388,grad_norm: 0.8957842255063243, iteration: 244296
loss: 0.9802067279815674,grad_norm: 0.9885497300486066, iteration: 244297
loss: 0.9718573689460754,grad_norm: 0.9113040305162922, iteration: 244298
loss: 1.0027899742126465,grad_norm: 0.9999989542240084, iteration: 244299
loss: 1.0018200874328613,grad_norm: 0.9999989809816058, iteration: 244300
loss: 1.0035808086395264,grad_norm: 0.8572496296263844, iteration: 244301
loss: 0.9948022365570068,grad_norm: 0.7805529461524073, iteration: 244302
loss: 0.9915392398834229,grad_norm: 0.7291640913853378, iteration: 244303
loss: 0.9999123811721802,grad_norm: 0.9141946833323662, iteration: 244304
loss: 1.0357263088226318,grad_norm: 0.9797483927543904, iteration: 244305
loss: 0.974761962890625,grad_norm: 0.9008517645093935, iteration: 244306
loss: 1.0002509355545044,grad_norm: 0.9960364586185609, iteration: 244307
loss: 0.9670894145965576,grad_norm: 0.9999990168623862, iteration: 244308
loss: 0.9666840434074402,grad_norm: 0.8983856898397551, iteration: 244309
loss: 0.9919747114181519,grad_norm: 0.8195497577130211, iteration: 244310
loss: 1.0658165216445923,grad_norm: 0.8988970484203735, iteration: 244311
loss: 1.1010550260543823,grad_norm: 0.9999994540191064, iteration: 244312
loss: 0.9886587262153625,grad_norm: 0.7648311001022485, iteration: 244313
loss: 1.016953706741333,grad_norm: 0.9999994447036956, iteration: 244314
loss: 0.9916233420372009,grad_norm: 0.9999991317970697, iteration: 244315
loss: 1.021803855895996,grad_norm: 0.8119800736475631, iteration: 244316
loss: 0.9712910056114197,grad_norm: 0.9447861647086083, iteration: 244317
loss: 1.0207806825637817,grad_norm: 0.9772250847088126, iteration: 244318
loss: 0.9975248575210571,grad_norm: 0.8500075740572623, iteration: 244319
loss: 0.9662150740623474,grad_norm: 0.9026441925799916, iteration: 244320
loss: 0.9938893914222717,grad_norm: 0.9999988550342745, iteration: 244321
loss: 1.0124479532241821,grad_norm: 0.8733253026905615, iteration: 244322
loss: 0.9899622797966003,grad_norm: 0.9624191718461625, iteration: 244323
loss: 1.0330424308776855,grad_norm: 0.8446986541085604, iteration: 244324
loss: 1.0355212688446045,grad_norm: 0.9999990645086676, iteration: 244325
loss: 0.9801313877105713,grad_norm: 0.937509238996753, iteration: 244326
loss: 0.9986698627471924,grad_norm: 0.7659032361006691, iteration: 244327
loss: 0.9564540386199951,grad_norm: 0.9285385605127827, iteration: 244328
loss: 1.012365460395813,grad_norm: 0.999998954014401, iteration: 244329
loss: 1.0199037790298462,grad_norm: 0.7636059038859786, iteration: 244330
loss: 1.0301048755645752,grad_norm: 0.7390454253520738, iteration: 244331
loss: 0.9844069480895996,grad_norm: 0.9457831996332637, iteration: 244332
loss: 0.9928738474845886,grad_norm: 0.9216307062818171, iteration: 244333
loss: 1.081081509590149,grad_norm: 0.9932275959452856, iteration: 244334
loss: 0.9790537357330322,grad_norm: 0.9495525833100261, iteration: 244335
loss: 0.9410629868507385,grad_norm: 0.8137200931972718, iteration: 244336
loss: 1.0281776189804077,grad_norm: 0.9999991161913963, iteration: 244337
loss: 0.9948979616165161,grad_norm: 0.8579545790792521, iteration: 244338
loss: 1.21382737159729,grad_norm: 1.0000000351770553, iteration: 244339
loss: 1.009871244430542,grad_norm: 0.9117808132277553, iteration: 244340
loss: 1.0149518251419067,grad_norm: 0.7113233829671632, iteration: 244341
loss: 0.9546324610710144,grad_norm: 0.7906354558687633, iteration: 244342
loss: 1.0422824621200562,grad_norm: 0.8162361105203614, iteration: 244343
loss: 1.0105416774749756,grad_norm: 0.9999990808629682, iteration: 244344
loss: 1.0225154161453247,grad_norm: 0.9999990311440837, iteration: 244345
loss: 0.9951474070549011,grad_norm: 0.9117248632988537, iteration: 244346
loss: 0.9745816588401794,grad_norm: 0.8170421572540143, iteration: 244347
loss: 1.0084092617034912,grad_norm: 0.9214041227771241, iteration: 244348
loss: 1.017574429512024,grad_norm: 0.9999990957963742, iteration: 244349
loss: 0.964406430721283,grad_norm: 0.9541325147866592, iteration: 244350
loss: 1.0174237489700317,grad_norm: 0.8742539243915074, iteration: 244351
loss: 0.9999701380729675,grad_norm: 0.9999990234754376, iteration: 244352
loss: 1.0224573612213135,grad_norm: 0.8431031629115742, iteration: 244353
loss: 1.0084564685821533,grad_norm: 0.9999995906686807, iteration: 244354
loss: 1.0165396928787231,grad_norm: 0.9358725700916134, iteration: 244355
loss: 1.0265637636184692,grad_norm: 0.9296308318255488, iteration: 244356
loss: 1.0016684532165527,grad_norm: 0.9134694242597436, iteration: 244357
loss: 1.0314693450927734,grad_norm: 0.708636349985915, iteration: 244358
loss: 0.9963429570198059,grad_norm: 0.867283866167371, iteration: 244359
loss: 0.9619445204734802,grad_norm: 0.7709316367900358, iteration: 244360
loss: 1.0557700395584106,grad_norm: 0.9999993306739119, iteration: 244361
loss: 0.967847466468811,grad_norm: 0.8350026074791338, iteration: 244362
loss: 1.022145390510559,grad_norm: 0.8370783442467787, iteration: 244363
loss: 0.9780082106590271,grad_norm: 0.8600208462980496, iteration: 244364
loss: 0.9797716736793518,grad_norm: 0.9087288799799432, iteration: 244365
loss: 1.1385142803192139,grad_norm: 0.9999991280378201, iteration: 244366
loss: 0.996476411819458,grad_norm: 0.8194510986621586, iteration: 244367
loss: 0.9579735398292542,grad_norm: 0.846953368300562, iteration: 244368
loss: 0.97561115026474,grad_norm: 0.9478726625679291, iteration: 244369
loss: 0.9599474668502808,grad_norm: 0.9169972869627527, iteration: 244370
loss: 0.9787335395812988,grad_norm: 0.964698325480695, iteration: 244371
loss: 1.015648603439331,grad_norm: 0.8246179790390531, iteration: 244372
loss: 0.9787015318870544,grad_norm: 0.9413729922935484, iteration: 244373
loss: 0.9989429712295532,grad_norm: 0.9999989555551779, iteration: 244374
loss: 1.0235962867736816,grad_norm: 0.9999995106067491, iteration: 244375
loss: 1.0005297660827637,grad_norm: 0.9580570351053508, iteration: 244376
loss: 1.0400949716567993,grad_norm: 0.8322693976798928, iteration: 244377
loss: 0.9964216947555542,grad_norm: 0.7559732102034722, iteration: 244378
loss: 1.0100359916687012,grad_norm: 0.8289007609314518, iteration: 244379
loss: 1.0148284435272217,grad_norm: 0.8714849393408158, iteration: 244380
loss: 1.0071241855621338,grad_norm: 0.9228602007050029, iteration: 244381
loss: 0.9936390519142151,grad_norm: 0.8653056076888719, iteration: 244382
loss: 0.9979063868522644,grad_norm: 0.7665254020353018, iteration: 244383
loss: 1.0281355381011963,grad_norm: 0.999999149752826, iteration: 244384
loss: 1.0239542722702026,grad_norm: 0.9999990899593351, iteration: 244385
loss: 1.0124410390853882,grad_norm: 0.9999991354511392, iteration: 244386
loss: 0.9779725670814514,grad_norm: 0.8484238751508746, iteration: 244387
loss: 1.009166955947876,grad_norm: 0.8364597803631183, iteration: 244388
loss: 0.9930334091186523,grad_norm: 0.9945362094289416, iteration: 244389
loss: 0.9992528557777405,grad_norm: 0.9999990075175095, iteration: 244390
loss: 1.004918098449707,grad_norm: 0.9999998693312736, iteration: 244391
loss: 0.9597934484481812,grad_norm: 0.9999991096101062, iteration: 244392
loss: 0.9984911680221558,grad_norm: 0.9999990866050912, iteration: 244393
loss: 1.0022380352020264,grad_norm: 0.9999992915943035, iteration: 244394
loss: 0.9941322207450867,grad_norm: 0.9200389914447361, iteration: 244395
loss: 1.0233567953109741,grad_norm: 0.9999990913797265, iteration: 244396
loss: 0.9988557696342468,grad_norm: 0.8150632560541197, iteration: 244397
loss: 0.9809537529945374,grad_norm: 0.773588735095546, iteration: 244398
loss: 1.0114353895187378,grad_norm: 0.9450196702853056, iteration: 244399
loss: 0.9599558115005493,grad_norm: 0.999999192414172, iteration: 244400
loss: 1.0178409814834595,grad_norm: 0.968347638399609, iteration: 244401
loss: 0.9865696430206299,grad_norm: 0.9999991905519662, iteration: 244402
loss: 1.0137449502944946,grad_norm: 0.8775635541862837, iteration: 244403
loss: 1.0026330947875977,grad_norm: 0.7351700663120214, iteration: 244404
loss: 1.0682328939437866,grad_norm: 0.946306035246286, iteration: 244405
loss: 1.008358120918274,grad_norm: 0.8926285257623654, iteration: 244406
loss: 1.0096784830093384,grad_norm: 0.9212050265607333, iteration: 244407
loss: 0.9770421385765076,grad_norm: 0.7937752160956741, iteration: 244408
loss: 0.9907654523849487,grad_norm: 0.9999990990668907, iteration: 244409
loss: 1.0139708518981934,grad_norm: 1.0000000255812833, iteration: 244410
loss: 1.084763526916504,grad_norm: 0.8708607719523362, iteration: 244411
loss: 1.0330770015716553,grad_norm: 0.9682474825289265, iteration: 244412
loss: 0.9874367713928223,grad_norm: 0.9999991167507818, iteration: 244413
loss: 1.0541605949401855,grad_norm: 0.9999991031287986, iteration: 244414
loss: 0.9770273566246033,grad_norm: 0.9999990947267262, iteration: 244415
loss: 1.0245417356491089,grad_norm: 0.9182154930144647, iteration: 244416
loss: 1.058707594871521,grad_norm: 0.9999993721599143, iteration: 244417
loss: 0.9925909638404846,grad_norm: 0.8856042079214707, iteration: 244418
loss: 1.0182101726531982,grad_norm: 0.9999992926244267, iteration: 244419
loss: 1.0347906351089478,grad_norm: 0.7809178106183127, iteration: 244420
loss: 0.9855412840843201,grad_norm: 0.8957618272530129, iteration: 244421
loss: 0.96219402551651,grad_norm: 0.8149170118402356, iteration: 244422
loss: 1.064023733139038,grad_norm: 0.9999991236220739, iteration: 244423
loss: 0.9557271599769592,grad_norm: 0.767581639978428, iteration: 244424
loss: 0.9934073090553284,grad_norm: 0.9205764028464499, iteration: 244425
loss: 0.9757007956504822,grad_norm: 0.8598483131502945, iteration: 244426
loss: 1.0219734907150269,grad_norm: 0.9999996830506154, iteration: 244427
loss: 0.9690382480621338,grad_norm: 0.8372864862281503, iteration: 244428
loss: 1.055351972579956,grad_norm: 0.9999990549239173, iteration: 244429
loss: 0.9638581275939941,grad_norm: 0.9999991022178374, iteration: 244430
loss: 0.9787753224372864,grad_norm: 0.8921194430854857, iteration: 244431
loss: 0.9924105405807495,grad_norm: 0.9703371930739016, iteration: 244432
loss: 0.9887226819992065,grad_norm: 0.9385610195305262, iteration: 244433
loss: 1.0170040130615234,grad_norm: 0.999999170093739, iteration: 244434
loss: 1.034704566001892,grad_norm: 0.9999991887160412, iteration: 244435
loss: 1.0413674116134644,grad_norm: 0.9999992713241651, iteration: 244436
loss: 1.017238974571228,grad_norm: 0.9181035763285483, iteration: 244437
loss: 1.0114554166793823,grad_norm: 0.7427552176791254, iteration: 244438
loss: 1.010066032409668,grad_norm: 0.7627720995157595, iteration: 244439
loss: 1.023396611213684,grad_norm: 0.8190689373648299, iteration: 244440
loss: 1.0281291007995605,grad_norm: 0.7800766240552512, iteration: 244441
loss: 0.9879729151725769,grad_norm: 0.8982479462974206, iteration: 244442
loss: 1.020599365234375,grad_norm: 0.8966340120941461, iteration: 244443
loss: 0.9952532649040222,grad_norm: 0.9584897586415315, iteration: 244444
loss: 0.9848843812942505,grad_norm: 0.8132698645035884, iteration: 244445
loss: 1.0210601091384888,grad_norm: 0.9999991969272833, iteration: 244446
loss: 0.9716095924377441,grad_norm: 0.8732984359024898, iteration: 244447
loss: 1.002370834350586,grad_norm: 0.9628843447827093, iteration: 244448
loss: 0.9793258905410767,grad_norm: 0.9825041347371464, iteration: 244449
loss: 0.9609050750732422,grad_norm: 0.8524664911329521, iteration: 244450
loss: 1.031803846359253,grad_norm: 0.9280571988317736, iteration: 244451
loss: 1.0040948390960693,grad_norm: 0.988317779539217, iteration: 244452
loss: 1.0740253925323486,grad_norm: 0.9999994741665755, iteration: 244453
loss: 0.9587576389312744,grad_norm: 0.9327824595931969, iteration: 244454
loss: 1.0019729137420654,grad_norm: 0.8243681156531749, iteration: 244455
loss: 0.9824256896972656,grad_norm: 0.8899295222587709, iteration: 244456
loss: 1.026374340057373,grad_norm: 0.8995334689778833, iteration: 244457
loss: 0.9681975841522217,grad_norm: 0.7528842939610589, iteration: 244458
loss: 0.9916785955429077,grad_norm: 0.9119760271129645, iteration: 244459
loss: 0.9839584231376648,grad_norm: 0.7890002860027207, iteration: 244460
loss: 0.9966803789138794,grad_norm: 0.7829116786171179, iteration: 244461
loss: 1.0012383460998535,grad_norm: 0.8904947576218494, iteration: 244462
loss: 0.9737499952316284,grad_norm: 0.9914839271407835, iteration: 244463
loss: 0.9633340835571289,grad_norm: 0.9146482215330397, iteration: 244464
loss: 1.0147302150726318,grad_norm: 0.890874594935164, iteration: 244465
loss: 0.9916875958442688,grad_norm: 0.9582401683838179, iteration: 244466
loss: 1.0352791547775269,grad_norm: 0.8056198344439288, iteration: 244467
loss: 1.024037480354309,grad_norm: 0.7017018752960228, iteration: 244468
loss: 1.012494683265686,grad_norm: 0.9604931555497104, iteration: 244469
loss: 1.0379647016525269,grad_norm: 0.9999997949557843, iteration: 244470
loss: 0.9820793867111206,grad_norm: 0.9999991736609103, iteration: 244471
loss: 0.9887441992759705,grad_norm: 0.8967265281483039, iteration: 244472
loss: 0.9998354911804199,grad_norm: 0.9073781468399649, iteration: 244473
loss: 1.0327694416046143,grad_norm: 0.9999995479980517, iteration: 244474
loss: 1.029361605644226,grad_norm: 0.9198886533854684, iteration: 244475
loss: 1.0084002017974854,grad_norm: 0.9999994783976356, iteration: 244476
loss: 0.9952085018157959,grad_norm: 0.8606546098679669, iteration: 244477
loss: 0.9554761648178101,grad_norm: 0.8633110175166088, iteration: 244478
loss: 1.0236784219741821,grad_norm: 0.8409893959274449, iteration: 244479
loss: 0.9916688203811646,grad_norm: 0.9504847301713606, iteration: 244480
loss: 1.0604406595230103,grad_norm: 0.9414187084053739, iteration: 244481
loss: 1.0076569318771362,grad_norm: 0.8402398031129826, iteration: 244482
loss: 0.9777392148971558,grad_norm: 0.8275481130949415, iteration: 244483
loss: 1.004858374595642,grad_norm: 0.9999993137540567, iteration: 244484
loss: 1.002598524093628,grad_norm: 0.8233117909870808, iteration: 244485
loss: 0.9976742267608643,grad_norm: 0.8229866289986691, iteration: 244486
loss: 0.9977638125419617,grad_norm: 0.999999135147877, iteration: 244487
loss: 1.0159070491790771,grad_norm: 0.8691658336574692, iteration: 244488
loss: 1.0036174058914185,grad_norm: 0.7912696593045961, iteration: 244489
loss: 0.9662275910377502,grad_norm: 0.8168989651159156, iteration: 244490
loss: 0.9964167475700378,grad_norm: 0.8841776898938242, iteration: 244491
loss: 0.9773727059364319,grad_norm: 0.999999781214549, iteration: 244492
loss: 1.0192792415618896,grad_norm: 0.9532997614801492, iteration: 244493
loss: 1.0313197374343872,grad_norm: 0.807027416775286, iteration: 244494
loss: 0.9811166524887085,grad_norm: 0.9999991220069229, iteration: 244495
loss: 0.9886468648910522,grad_norm: 0.7342827414995182, iteration: 244496
loss: 1.1777963638305664,grad_norm: 0.999999557388346, iteration: 244497
loss: 0.9791375398635864,grad_norm: 0.9841164571996208, iteration: 244498
loss: 1.0048415660858154,grad_norm: 0.9999991043648068, iteration: 244499
loss: 1.010738492012024,grad_norm: 0.7713363335316129, iteration: 244500
loss: 1.004590630531311,grad_norm: 0.9999991453575467, iteration: 244501
loss: 1.0019683837890625,grad_norm: 0.9999990897323049, iteration: 244502
loss: 0.9984411001205444,grad_norm: 0.9269382881890132, iteration: 244503
loss: 1.0724058151245117,grad_norm: 0.9081876721614034, iteration: 244504
loss: 1.0102568864822388,grad_norm: 0.7690180535247044, iteration: 244505
loss: 1.1519442796707153,grad_norm: 0.9999993642315024, iteration: 244506
loss: 0.9826356172561646,grad_norm: 0.9185279828067153, iteration: 244507
loss: 1.0018644332885742,grad_norm: 0.9377304684839676, iteration: 244508
loss: 0.9822968244552612,grad_norm: 0.8773048556244037, iteration: 244509
loss: 1.0751245021820068,grad_norm: 0.9195690997772802, iteration: 244510
loss: 1.0184071063995361,grad_norm: 0.9411281220418305, iteration: 244511
loss: 1.0861400365829468,grad_norm: 0.9999991062028282, iteration: 244512
loss: 1.0024253129959106,grad_norm: 0.8431072794889017, iteration: 244513
loss: 0.9874126315116882,grad_norm: 0.9999990988845036, iteration: 244514
loss: 0.985465407371521,grad_norm: 0.9218153757670929, iteration: 244515
loss: 0.9948790669441223,grad_norm: 0.9834824130216351, iteration: 244516
loss: 0.991743803024292,grad_norm: 0.787896043308155, iteration: 244517
loss: 1.0053943395614624,grad_norm: 0.999999054954498, iteration: 244518
loss: 1.0746394395828247,grad_norm: 0.9999999293250533, iteration: 244519
loss: 0.9907580018043518,grad_norm: 0.878265446306233, iteration: 244520
loss: 1.015039324760437,grad_norm: 0.9094640693046742, iteration: 244521
loss: 1.0378398895263672,grad_norm: 0.8567104006609356, iteration: 244522
loss: 0.9489356279373169,grad_norm: 0.9999989914706731, iteration: 244523
loss: 0.9919940233230591,grad_norm: 0.9836780386323106, iteration: 244524
loss: 1.001027226448059,grad_norm: 0.7965713642193438, iteration: 244525
loss: 0.9950416088104248,grad_norm: 0.9471113276114062, iteration: 244526
loss: 1.0163005590438843,grad_norm: 0.8373032372641165, iteration: 244527
loss: 0.9969363808631897,grad_norm: 0.9999992054656711, iteration: 244528
loss: 0.9920383095741272,grad_norm: 0.8307731205816842, iteration: 244529
loss: 0.9789620041847229,grad_norm: 0.9616195953893694, iteration: 244530
loss: 1.0000985860824585,grad_norm: 0.8854898546668961, iteration: 244531
loss: 0.9637625813484192,grad_norm: 0.8480690581670383, iteration: 244532
loss: 1.0683084726333618,grad_norm: 0.9999999409299867, iteration: 244533
loss: 0.9652689099311829,grad_norm: 0.9054589422947125, iteration: 244534
loss: 0.9523541331291199,grad_norm: 0.9356111482002403, iteration: 244535
loss: 0.9956801533699036,grad_norm: 0.9999990454124786, iteration: 244536
loss: 1.0220867395401,grad_norm: 0.8922615754476146, iteration: 244537
loss: 1.0206961631774902,grad_norm: 0.9999989127602157, iteration: 244538
loss: 1.0660364627838135,grad_norm: 0.9999999673268529, iteration: 244539
loss: 0.9711524844169617,grad_norm: 0.830502400370257, iteration: 244540
loss: 1.0151993036270142,grad_norm: 0.8296849220629441, iteration: 244541
loss: 1.0174956321716309,grad_norm: 0.9999995212412117, iteration: 244542
loss: 1.009286880493164,grad_norm: 0.9999990713087478, iteration: 244543
loss: 1.0319241285324097,grad_norm: 0.9999998641957927, iteration: 244544
loss: 0.9895092248916626,grad_norm: 0.9999992402244267, iteration: 244545
loss: 0.9752212762832642,grad_norm: 0.8404511811053286, iteration: 244546
loss: 0.9654842019081116,grad_norm: 0.8283276414764715, iteration: 244547
loss: 1.0647485256195068,grad_norm: 0.9999996893534073, iteration: 244548
loss: 0.9764121770858765,grad_norm: 0.9093879259949285, iteration: 244549
loss: 0.9917618036270142,grad_norm: 0.9554723611351247, iteration: 244550
loss: 0.9856255650520325,grad_norm: 0.9999990600687628, iteration: 244551
loss: 0.9745194315910339,grad_norm: 0.9583434954209116, iteration: 244552
loss: 0.9868767857551575,grad_norm: 0.8971531888911057, iteration: 244553
loss: 1.0075238943099976,grad_norm: 0.9945739301720361, iteration: 244554
loss: 1.0260744094848633,grad_norm: 0.999999925082008, iteration: 244555
loss: 1.0457216501235962,grad_norm: 0.8987083389658622, iteration: 244556
loss: 1.0149768590927124,grad_norm: 0.8800067853252981, iteration: 244557
loss: 1.0417838096618652,grad_norm: 0.8809344121746391, iteration: 244558
loss: 0.975226640701294,grad_norm: 0.9999991449054425, iteration: 244559
loss: 1.0210177898406982,grad_norm: 0.9999993217647741, iteration: 244560
loss: 0.9994762539863586,grad_norm: 0.9999988449256986, iteration: 244561
loss: 0.996632993221283,grad_norm: 0.844633551827727, iteration: 244562
loss: 0.964291512966156,grad_norm: 0.9620917064726199, iteration: 244563
loss: 1.0071629285812378,grad_norm: 0.9705109092696421, iteration: 244564
loss: 1.0091300010681152,grad_norm: 0.8308597682736276, iteration: 244565
loss: 0.9985148906707764,grad_norm: 0.7886577615777193, iteration: 244566
loss: 0.966601550579071,grad_norm: 0.9182727939434082, iteration: 244567
loss: 1.0061670541763306,grad_norm: 0.8101299104071917, iteration: 244568
loss: 1.0216256380081177,grad_norm: 0.999999041827702, iteration: 244569
loss: 0.9874929785728455,grad_norm: 0.9155786565679603, iteration: 244570
loss: 1.021621823310852,grad_norm: 0.9999990858927053, iteration: 244571
loss: 1.0531978607177734,grad_norm: 0.9999992690603504, iteration: 244572
loss: 1.0359467267990112,grad_norm: 0.944536549197352, iteration: 244573
loss: 1.0270615816116333,grad_norm: 0.9999991809291702, iteration: 244574
loss: 1.0034781694412231,grad_norm: 0.9999992086907529, iteration: 244575
loss: 1.031449794769287,grad_norm: 0.9518226727402821, iteration: 244576
loss: 0.9535914659500122,grad_norm: 0.8343218117819716, iteration: 244577
loss: 0.9836920499801636,grad_norm: 0.99635173155578, iteration: 244578
loss: 0.9745197892189026,grad_norm: 0.9013005736811274, iteration: 244579
loss: 1.0253037214279175,grad_norm: 0.7503792238964053, iteration: 244580
loss: 0.9806026220321655,grad_norm: 0.9101146394880897, iteration: 244581
loss: 0.9906964302062988,grad_norm: 0.9999991157179415, iteration: 244582
loss: 1.028029441833496,grad_norm: 0.9999998218917262, iteration: 244583
loss: 1.0353153944015503,grad_norm: 0.8951267929082302, iteration: 244584
loss: 0.99721759557724,grad_norm: 0.9873947896163954, iteration: 244585
loss: 0.9816431403160095,grad_norm: 0.9568504085384812, iteration: 244586
loss: 1.2806079387664795,grad_norm: 0.9999999565957933, iteration: 244587
loss: 1.0366698503494263,grad_norm: 0.9696530820252073, iteration: 244588
loss: 1.0163497924804688,grad_norm: 0.9999993070381721, iteration: 244589
loss: 1.187328815460205,grad_norm: 0.9999999210130531, iteration: 244590
loss: 1.0291286706924438,grad_norm: 0.9225712437287096, iteration: 244591
loss: 1.052092432975769,grad_norm: 0.8752775969103893, iteration: 244592
loss: 1.0081230401992798,grad_norm: 0.9976957372165295, iteration: 244593
loss: 1.1706902980804443,grad_norm: 0.9461419462519945, iteration: 244594
loss: 1.0478150844573975,grad_norm: 0.8866730715406186, iteration: 244595
loss: 0.9760573506355286,grad_norm: 0.8982764450475139, iteration: 244596
loss: 0.9916433095932007,grad_norm: 0.6956086244596759, iteration: 244597
loss: 0.9990763664245605,grad_norm: 0.9606974462060142, iteration: 244598
loss: 1.012671709060669,grad_norm: 0.9999992061711749, iteration: 244599
loss: 1.0025241374969482,grad_norm: 0.725184271623869, iteration: 244600
loss: 1.005415678024292,grad_norm: 0.8693846688617477, iteration: 244601
loss: 1.0136628150939941,grad_norm: 0.9999992405221311, iteration: 244602
loss: 1.2022589445114136,grad_norm: 0.9999996231776123, iteration: 244603
loss: 1.0172100067138672,grad_norm: 0.9568033702897764, iteration: 244604
loss: 1.0551230907440186,grad_norm: 0.9999991446873846, iteration: 244605
loss: 1.0045667886734009,grad_norm: 0.9999992091641198, iteration: 244606
loss: 0.96830153465271,grad_norm: 0.7448623624925174, iteration: 244607
loss: 0.9650834798812866,grad_norm: 0.9162758317617024, iteration: 244608
loss: 1.0152230262756348,grad_norm: 0.8863323928649703, iteration: 244609
loss: 1.0260703563690186,grad_norm: 0.9532705179706489, iteration: 244610
loss: 1.0447235107421875,grad_norm: 0.9999997448512503, iteration: 244611
loss: 0.9974678158760071,grad_norm: 0.9999990864441711, iteration: 244612
loss: 1.0509768724441528,grad_norm: 0.9493307190256977, iteration: 244613
loss: 1.1036536693572998,grad_norm: 0.9999990242415121, iteration: 244614
loss: 1.0597255229949951,grad_norm: 0.9999989275979927, iteration: 244615
loss: 1.165749192237854,grad_norm: 0.9999994841321157, iteration: 244616
loss: 0.97046959400177,grad_norm: 0.9789606242917175, iteration: 244617
loss: 1.1487751007080078,grad_norm: 0.999999210188037, iteration: 244618
loss: 1.1089565753936768,grad_norm: 1.000000099796492, iteration: 244619
loss: 0.982297420501709,grad_norm: 0.9999990758443543, iteration: 244620
loss: 0.9899323582649231,grad_norm: 0.9999990655195338, iteration: 244621
loss: 0.9987280964851379,grad_norm: 0.8424518325500829, iteration: 244622
loss: 1.1089459657669067,grad_norm: 0.9999991955339214, iteration: 244623
loss: 0.9803786277770996,grad_norm: 0.9999994919454673, iteration: 244624
loss: 1.0075466632843018,grad_norm: 0.9999990534517554, iteration: 244625
loss: 1.0233505964279175,grad_norm: 0.9999989742187347, iteration: 244626
loss: 1.0567246675491333,grad_norm: 0.9855419824431616, iteration: 244627
loss: 1.0309288501739502,grad_norm: 0.9999996484336284, iteration: 244628
loss: 0.9814763069152832,grad_norm: 0.8776007225693079, iteration: 244629
loss: 1.0641108751296997,grad_norm: 0.9999997524964882, iteration: 244630
loss: 1.139285922050476,grad_norm: 0.9999991370014395, iteration: 244631
loss: 1.0990513563156128,grad_norm: 0.8234495837772853, iteration: 244632
loss: 1.022603988647461,grad_norm: 1.000000000199373, iteration: 244633
loss: 1.1099604368209839,grad_norm: 0.9999991459617322, iteration: 244634
loss: 1.1339389085769653,grad_norm: 0.9999991043436713, iteration: 244635
loss: 1.236305832862854,grad_norm: 0.9999992341576786, iteration: 244636
loss: 1.0150171518325806,grad_norm: 0.7708690821394758, iteration: 244637
loss: 1.087085247039795,grad_norm: 0.9999990608922459, iteration: 244638
loss: 1.177602767944336,grad_norm: 0.9999993378888598, iteration: 244639
loss: 1.2019493579864502,grad_norm: 0.9999995890634553, iteration: 244640
loss: 1.1244465112686157,grad_norm: 0.999999032059714, iteration: 244641
loss: 1.2374495267868042,grad_norm: 0.999999451601114, iteration: 244642
loss: 1.1890287399291992,grad_norm: 0.9999998756472572, iteration: 244643
loss: 1.077035903930664,grad_norm: 0.9999992695493904, iteration: 244644
loss: 1.102113962173462,grad_norm: 0.9999997698471171, iteration: 244645
loss: 1.212881326675415,grad_norm: 0.9999997578606173, iteration: 244646
loss: 1.2649258375167847,grad_norm: 0.9999994778567363, iteration: 244647
loss: 1.2703207731246948,grad_norm: 0.9999994807070907, iteration: 244648
loss: 1.275578260421753,grad_norm: 0.9999992867697857, iteration: 244649
loss: 1.3186594247817993,grad_norm: 0.9999996888328627, iteration: 244650
loss: 1.345860481262207,grad_norm: 0.9999999037982933, iteration: 244651
loss: 1.1279438734054565,grad_norm: 0.9999991126258362, iteration: 244652
loss: 1.159524917602539,grad_norm: 0.9999998071258271, iteration: 244653
loss: 1.2551732063293457,grad_norm: 0.9999993996318567, iteration: 244654
loss: 1.2489386796951294,grad_norm: 0.9999998940699334, iteration: 244655
loss: 1.3108829259872437,grad_norm: 0.9999995869123023, iteration: 244656
loss: 1.029388427734375,grad_norm: 0.9999995301851967, iteration: 244657
loss: 1.2007511854171753,grad_norm: 0.999999271301726, iteration: 244658
loss: 1.2866826057434082,grad_norm: 0.9999994244657, iteration: 244659
loss: 1.4061195850372314,grad_norm: 0.9999994375489517, iteration: 244660
loss: 1.0390750169754028,grad_norm: 0.9999997403493945, iteration: 244661
loss: 1.1918318271636963,grad_norm: 0.9999998595012793, iteration: 244662
loss: 1.0696208477020264,grad_norm: 0.9999990542297481, iteration: 244663
loss: 1.2124351263046265,grad_norm: 0.9999997355726002, iteration: 244664
loss: 1.1011477708816528,grad_norm: 0.8329303090854765, iteration: 244665
loss: 1.2577993869781494,grad_norm: 0.9999995585459918, iteration: 244666
loss: 1.1171753406524658,grad_norm: 0.9999992922887768, iteration: 244667
loss: 1.098802924156189,grad_norm: 0.9999998048430684, iteration: 244668
loss: 1.0839641094207764,grad_norm: 0.9999999295000923, iteration: 244669
loss: 1.0736628770828247,grad_norm: 0.9999995365787877, iteration: 244670
loss: 1.073439598083496,grad_norm: 0.9999995789347615, iteration: 244671
loss: 1.174838662147522,grad_norm: 0.9999991750139133, iteration: 244672
loss: 1.0360661745071411,grad_norm: 0.9999993408832885, iteration: 244673
loss: 1.1405384540557861,grad_norm: 0.9999995090939549, iteration: 244674
loss: 1.025368094444275,grad_norm: 0.9999990995282955, iteration: 244675
loss: 1.0378923416137695,grad_norm: 0.9999991346223225, iteration: 244676
loss: 0.990746796131134,grad_norm: 0.8450741153141795, iteration: 244677
loss: 1.094399094581604,grad_norm: 0.9999993051788528, iteration: 244678
loss: 1.0150792598724365,grad_norm: 0.9999988827285995, iteration: 244679
loss: 1.0533629655838013,grad_norm: 0.9999991730441201, iteration: 244680
loss: 1.052308201789856,grad_norm: 0.999999772255017, iteration: 244681
loss: 1.0059679746627808,grad_norm: 0.8736263845358466, iteration: 244682
loss: 1.0205166339874268,grad_norm: 0.9999991780682433, iteration: 244683
loss: 1.0955860614776611,grad_norm: 0.9999990589232648, iteration: 244684
loss: 1.0004432201385498,grad_norm: 0.9999991500246934, iteration: 244685
loss: 1.0299726724624634,grad_norm: 0.9999989909034459, iteration: 244686
loss: 1.0823962688446045,grad_norm: 0.9999998753505475, iteration: 244687
loss: 1.1053376197814941,grad_norm: 0.9999990667758757, iteration: 244688
loss: 1.216826319694519,grad_norm: 0.9999992017566081, iteration: 244689
loss: 1.1066899299621582,grad_norm: 0.9999994213858711, iteration: 244690
loss: 1.1640057563781738,grad_norm: 0.9999991723252274, iteration: 244691
loss: 1.0562912225723267,grad_norm: 0.9999993333593834, iteration: 244692
loss: 1.0249639749526978,grad_norm: 0.8915683952142345, iteration: 244693
loss: 1.0122759342193604,grad_norm: 0.8153571390641011, iteration: 244694
loss: 1.0062721967697144,grad_norm: 0.9721705389694638, iteration: 244695
loss: 1.0584499835968018,grad_norm: 0.9999991799985619, iteration: 244696
loss: 1.1430318355560303,grad_norm: 0.9999997341544691, iteration: 244697
loss: 1.0743979215621948,grad_norm: 0.9526341849619346, iteration: 244698
loss: 1.0067315101623535,grad_norm: 0.999999149403547, iteration: 244699
loss: 1.069360375404358,grad_norm: 0.999999160332022, iteration: 244700
loss: 1.1218856573104858,grad_norm: 0.9999998493147696, iteration: 244701
loss: 1.0312385559082031,grad_norm: 0.9999992005641705, iteration: 244702
loss: 1.0409094095230103,grad_norm: 0.9312519819296116, iteration: 244703
loss: 1.1116560697555542,grad_norm: 0.9999992112780147, iteration: 244704
loss: 1.1397216320037842,grad_norm: 0.999999219659798, iteration: 244705
loss: 1.179629921913147,grad_norm: 0.9999995866959092, iteration: 244706
loss: 1.1787917613983154,grad_norm: 0.9999994534147134, iteration: 244707
loss: 1.1483690738677979,grad_norm: 0.9999998254012307, iteration: 244708
loss: 1.0279459953308105,grad_norm: 0.9999993511576992, iteration: 244709
loss: 1.007981777191162,grad_norm: 0.9257310206338619, iteration: 244710
loss: 1.0093319416046143,grad_norm: 0.9615516219789089, iteration: 244711
loss: 1.0922398567199707,grad_norm: 0.9999991517144591, iteration: 244712
loss: 1.0147907733917236,grad_norm: 0.9999991506713432, iteration: 244713
loss: 0.9816246032714844,grad_norm: 0.9746623454223758, iteration: 244714
loss: 1.0184440612792969,grad_norm: 0.9668408743381763, iteration: 244715
loss: 1.0980753898620605,grad_norm: 0.999999091412793, iteration: 244716
loss: 1.0449398756027222,grad_norm: 0.9999990271321534, iteration: 244717
loss: 1.2143402099609375,grad_norm: 0.9999992261649376, iteration: 244718
loss: 1.068902850151062,grad_norm: 0.999999844272131, iteration: 244719
loss: 1.0797381401062012,grad_norm: 0.9999989904947894, iteration: 244720
loss: 0.9829223155975342,grad_norm: 0.9764240468740912, iteration: 244721
loss: 0.9807732105255127,grad_norm: 0.9999992047000416, iteration: 244722
loss: 1.0543471574783325,grad_norm: 0.9999990856223595, iteration: 244723
loss: 1.0665395259857178,grad_norm: 0.999999382353092, iteration: 244724
loss: 1.0956902503967285,grad_norm: 0.9011638897977791, iteration: 244725
loss: 0.967433750629425,grad_norm: 0.8871631526419701, iteration: 244726
loss: 1.0428013801574707,grad_norm: 0.9999990637196994, iteration: 244727
loss: 0.980851411819458,grad_norm: 0.857816645465525, iteration: 244728
loss: 1.1986274719238281,grad_norm: 0.9999990868623859, iteration: 244729
loss: 0.9989522099494934,grad_norm: 0.9999993072524668, iteration: 244730
loss: 1.0632315874099731,grad_norm: 0.9061546745300745, iteration: 244731
loss: 1.0847759246826172,grad_norm: 0.9693562606674975, iteration: 244732
loss: 1.1104321479797363,grad_norm: 0.9999994544774199, iteration: 244733
loss: 1.0385812520980835,grad_norm: 0.9999992454457056, iteration: 244734
loss: 1.1514593362808228,grad_norm: 0.9999996622779145, iteration: 244735
loss: 1.2128686904907227,grad_norm: 0.9999995269754853, iteration: 244736
loss: 1.1070975065231323,grad_norm: 0.9999992442570289, iteration: 244737
loss: 1.0634890794754028,grad_norm: 0.9999996480837077, iteration: 244738
loss: 1.0427817106246948,grad_norm: 0.9999990743711344, iteration: 244739
loss: 1.0805714130401611,grad_norm: 0.9033502455080056, iteration: 244740
loss: 1.033294677734375,grad_norm: 0.9999991857097398, iteration: 244741
loss: 1.046642780303955,grad_norm: 0.9788237892109598, iteration: 244742
loss: 1.2168534994125366,grad_norm: 0.999999264189349, iteration: 244743
loss: 0.996593713760376,grad_norm: 0.8956601667025277, iteration: 244744
loss: 1.0628204345703125,grad_norm: 0.9999991610278017, iteration: 244745
loss: 1.0663700103759766,grad_norm: 0.999999688024467, iteration: 244746
loss: 1.0948408842086792,grad_norm: 0.9999997825382453, iteration: 244747
loss: 1.0769096612930298,grad_norm: 0.9999992019310411, iteration: 244748
loss: 1.089680552482605,grad_norm: 0.9999992279310003, iteration: 244749
loss: 1.087412714958191,grad_norm: 0.9999993015361291, iteration: 244750
loss: 1.0623077154159546,grad_norm: 0.9778159234953383, iteration: 244751
loss: 1.17122483253479,grad_norm: 0.9999994897749289, iteration: 244752
loss: 1.1004565954208374,grad_norm: 0.9999993926539671, iteration: 244753
loss: 1.0603952407836914,grad_norm: 0.9999993374631426, iteration: 244754
loss: 1.063881516456604,grad_norm: 0.9821427570476078, iteration: 244755
loss: 1.1401948928833008,grad_norm: 0.9999997870124561, iteration: 244756
loss: 1.1867892742156982,grad_norm: 0.9999996286537515, iteration: 244757
loss: 1.031252145767212,grad_norm: 0.9999994368468155, iteration: 244758
loss: 1.0062978267669678,grad_norm: 0.960011157265431, iteration: 244759
loss: 1.1691594123840332,grad_norm: 0.9999994701063137, iteration: 244760
loss: 1.1241481304168701,grad_norm: 0.9999996832081733, iteration: 244761
loss: 1.072548270225525,grad_norm: 0.9999990806789527, iteration: 244762
loss: 1.10978102684021,grad_norm: 0.9999993867557605, iteration: 244763
loss: 1.1493213176727295,grad_norm: 0.9999994734663575, iteration: 244764
loss: 0.997775137424469,grad_norm: 0.9688402795185197, iteration: 244765
loss: 1.2908433675765991,grad_norm: 0.9999995346500391, iteration: 244766
loss: 1.3815202713012695,grad_norm: 0.9999995759897449, iteration: 244767
loss: 1.181299090385437,grad_norm: 0.9999998202414094, iteration: 244768
loss: 0.9953023195266724,grad_norm: 0.9000678415356466, iteration: 244769
loss: 1.1200850009918213,grad_norm: 0.9999999259958205, iteration: 244770
loss: 1.0460638999938965,grad_norm: 0.9999990984490871, iteration: 244771
loss: 1.1544817686080933,grad_norm: 0.999999848763721, iteration: 244772
loss: 1.0598965883255005,grad_norm: 0.9999998821398584, iteration: 244773
loss: 1.1320202350616455,grad_norm: 0.9999998750969866, iteration: 244774
loss: 0.988240122795105,grad_norm: 0.99999912807948, iteration: 244775
loss: 1.4179939031600952,grad_norm: 0.9999997099937558, iteration: 244776
loss: 1.1340407133102417,grad_norm: 1.0000000339211181, iteration: 244777
loss: 1.2249928712844849,grad_norm: 0.9999998161185936, iteration: 244778
loss: 1.102968454360962,grad_norm: 0.9999994578931898, iteration: 244779
loss: 1.1489109992980957,grad_norm: 0.999999681287603, iteration: 244780
loss: 1.309620976448059,grad_norm: 0.9999999310775465, iteration: 244781
loss: 1.3517996072769165,grad_norm: 0.9999998615128972, iteration: 244782
loss: 1.1762553453445435,grad_norm: 0.9999993215654919, iteration: 244783
loss: 1.138473391532898,grad_norm: 0.9999990729761521, iteration: 244784
loss: 1.123081922531128,grad_norm: 0.9754423393100772, iteration: 244785
loss: 1.0851560831069946,grad_norm: 0.9198193358858068, iteration: 244786
loss: 1.1789464950561523,grad_norm: 0.999999208996983, iteration: 244787
loss: 1.174531102180481,grad_norm: 0.9999998244044159, iteration: 244788
loss: 1.1880136728286743,grad_norm: 0.9999996599334926, iteration: 244789
loss: 1.3019613027572632,grad_norm: 0.9999995404216017, iteration: 244790
loss: 1.2170324325561523,grad_norm: 0.9999995002030406, iteration: 244791
loss: 1.3494008779525757,grad_norm: 0.9999996772427101, iteration: 244792
loss: 1.2862119674682617,grad_norm: 0.9999994270877471, iteration: 244793
loss: 1.3173707723617554,grad_norm: 0.9999992961053874, iteration: 244794
loss: 1.1234972476959229,grad_norm: 0.9999992864281819, iteration: 244795
loss: 1.230309247970581,grad_norm: 1.0000000040258081, iteration: 244796
loss: 1.2018487453460693,grad_norm: 0.9999993998158885, iteration: 244797
loss: 1.2119253873825073,grad_norm: 0.9999999097867533, iteration: 244798
loss: 1.1064581871032715,grad_norm: 1.0000000495723447, iteration: 244799
loss: 1.130717158317566,grad_norm: 0.9999998368591729, iteration: 244800
loss: 1.10499107837677,grad_norm: 0.9999992433728446, iteration: 244801
loss: 1.1906156539916992,grad_norm: 0.9999997894784346, iteration: 244802
loss: 1.0914628505706787,grad_norm: 0.999999937730783, iteration: 244803
loss: 1.1069849729537964,grad_norm: 0.9999990379765887, iteration: 244804
loss: 1.073265790939331,grad_norm: 0.9999992419892133, iteration: 244805
loss: 1.1761397123336792,grad_norm: 0.9999992089872635, iteration: 244806
loss: 1.0802123546600342,grad_norm: 0.999999118650472, iteration: 244807
loss: 1.032716155052185,grad_norm: 0.9999996083850522, iteration: 244808
loss: 1.1692790985107422,grad_norm: 0.9999994235536727, iteration: 244809
loss: 1.0017989873886108,grad_norm: 0.999999059032122, iteration: 244810
loss: 1.2215989828109741,grad_norm: 0.9999995851732892, iteration: 244811
loss: 1.0539183616638184,grad_norm: 0.8436599938633533, iteration: 244812
loss: 1.1536551713943481,grad_norm: 0.9999992476209916, iteration: 244813
loss: 1.1721853017807007,grad_norm: 0.9999993143846172, iteration: 244814
loss: 1.097038984298706,grad_norm: 0.9999991128256375, iteration: 244815
loss: 1.0247427225112915,grad_norm: 0.9785706190973448, iteration: 244816
loss: 1.0881781578063965,grad_norm: 0.9999990410053459, iteration: 244817
loss: 1.0198417901992798,grad_norm: 0.9549333862227863, iteration: 244818
loss: 1.025987982749939,grad_norm: 0.8266552070865378, iteration: 244819
loss: 1.0145500898361206,grad_norm: 0.9999996676729245, iteration: 244820
loss: 1.103306770324707,grad_norm: 0.9999992580546536, iteration: 244821
loss: 1.063668131828308,grad_norm: 0.999999568034585, iteration: 244822
loss: 1.014417052268982,grad_norm: 0.9999991742992956, iteration: 244823
loss: 1.0322272777557373,grad_norm: 0.9623668946364513, iteration: 244824
loss: 1.0115413665771484,grad_norm: 0.9999998883848885, iteration: 244825
loss: 0.997509777545929,grad_norm: 0.9999991064303847, iteration: 244826
loss: 1.004555344581604,grad_norm: 0.8379025775146911, iteration: 244827
loss: 1.0197595357894897,grad_norm: 0.9999991168032594, iteration: 244828
loss: 1.044354796409607,grad_norm: 0.9999993203462573, iteration: 244829
loss: 1.089616298675537,grad_norm: 0.9999992301277734, iteration: 244830
loss: 1.0731565952301025,grad_norm: 0.9999989592126352, iteration: 244831
loss: 0.9433586597442627,grad_norm: 0.8500734401924813, iteration: 244832
loss: 1.0201029777526855,grad_norm: 0.999999087589008, iteration: 244833
loss: 1.0462857484817505,grad_norm: 0.8961914124066763, iteration: 244834
loss: 1.0066754817962646,grad_norm: 0.999999279217007, iteration: 244835
loss: 0.9773933291435242,grad_norm: 0.9314115233886031, iteration: 244836
loss: 1.1185734272003174,grad_norm: 0.9999997085934561, iteration: 244837
loss: 1.0135257244110107,grad_norm: 0.9835292004712021, iteration: 244838
loss: 1.0930923223495483,grad_norm: 0.9999991494357612, iteration: 244839
loss: 1.0175197124481201,grad_norm: 0.9999991186123326, iteration: 244840
loss: 1.013380765914917,grad_norm: 0.9487937230632801, iteration: 244841
loss: 1.044488549232483,grad_norm: 0.9999991683127083, iteration: 244842
loss: 1.0209059715270996,grad_norm: 0.8436788111873835, iteration: 244843
loss: 0.997178316116333,grad_norm: 0.9100273629715561, iteration: 244844
loss: 1.0474978685379028,grad_norm: 0.9999992766749819, iteration: 244845
loss: 0.9919719696044922,grad_norm: 0.8175033250708954, iteration: 244846
loss: 0.9970537424087524,grad_norm: 0.8534362787593917, iteration: 244847
loss: 1.055222749710083,grad_norm: 0.8767683767316192, iteration: 244848
loss: 1.0792135000228882,grad_norm: 0.9999998327757941, iteration: 244849
loss: 0.9679794907569885,grad_norm: 0.9846557141264788, iteration: 244850
loss: 0.9936264753341675,grad_norm: 0.999999657407521, iteration: 244851
loss: 0.9876129627227783,grad_norm: 0.9655085907442561, iteration: 244852
loss: 1.0043865442276,grad_norm: 0.958393127988257, iteration: 244853
loss: 0.965645968914032,grad_norm: 0.9999997230598562, iteration: 244854
loss: 1.0521841049194336,grad_norm: 0.9999999541816409, iteration: 244855
loss: 0.991225004196167,grad_norm: 0.7969413634733257, iteration: 244856
loss: 1.0652927160263062,grad_norm: 0.9999994367892031, iteration: 244857
loss: 1.0146673917770386,grad_norm: 0.9999991287622992, iteration: 244858
loss: 1.053858995437622,grad_norm: 0.9999996039645607, iteration: 244859
loss: 1.034220576286316,grad_norm: 0.8891275818188302, iteration: 244860
loss: 1.1708327531814575,grad_norm: 0.9999997952249762, iteration: 244861
loss: 0.9997351765632629,grad_norm: 0.9999999254132079, iteration: 244862
loss: 1.0143406391143799,grad_norm: 0.8082840327607318, iteration: 244863
loss: 1.0478161573410034,grad_norm: 0.9216431933353473, iteration: 244864
loss: 1.1247804164886475,grad_norm: 0.9999992913788729, iteration: 244865
loss: 1.0687038898468018,grad_norm: 0.8494438460924377, iteration: 244866
loss: 1.0533987283706665,grad_norm: 0.9999993215938334, iteration: 244867
loss: 1.0030019283294678,grad_norm: 0.8788612661591297, iteration: 244868
loss: 1.024489402770996,grad_norm: 0.9673838927185799, iteration: 244869
loss: 0.9968341588973999,grad_norm: 0.8402601953942207, iteration: 244870
loss: 1.0492843389511108,grad_norm: 0.9999991991931423, iteration: 244871
loss: 1.0404053926467896,grad_norm: 0.9999991689556064, iteration: 244872
loss: 0.9956223964691162,grad_norm: 0.999999086328957, iteration: 244873
loss: 1.0128488540649414,grad_norm: 0.9999997894128131, iteration: 244874
loss: 0.9852836728096008,grad_norm: 0.8793206127562488, iteration: 244875
loss: 1.039303183555603,grad_norm: 0.9999990167153905, iteration: 244876
loss: 1.100975513458252,grad_norm: 0.9999996519941252, iteration: 244877
loss: 1.044211745262146,grad_norm: 0.999999159499082, iteration: 244878
loss: 1.1654040813446045,grad_norm: 0.9999997037942692, iteration: 244879
loss: 0.9759310483932495,grad_norm: 0.9805385570181345, iteration: 244880
loss: 1.0134925842285156,grad_norm: 0.9999990664287299, iteration: 244881
loss: 1.061202883720398,grad_norm: 0.9999995411178079, iteration: 244882
loss: 1.0226198434829712,grad_norm: 0.9999989616657124, iteration: 244883
loss: 1.0449179410934448,grad_norm: 0.8895588998909126, iteration: 244884
loss: 1.033014178276062,grad_norm: 0.999999144811779, iteration: 244885
loss: 1.1881541013717651,grad_norm: 0.9999997027799729, iteration: 244886
loss: 1.0289260149002075,grad_norm: 0.8871893709224763, iteration: 244887
loss: 0.9882379174232483,grad_norm: 0.999999117547965, iteration: 244888
loss: 1.0870633125305176,grad_norm: 0.9999991417021656, iteration: 244889
loss: 1.0874634981155396,grad_norm: 0.9999996972861083, iteration: 244890
loss: 0.9806216955184937,grad_norm: 0.9999997256942234, iteration: 244891
loss: 1.039538860321045,grad_norm: 0.9999999536047365, iteration: 244892
loss: 0.995734691619873,grad_norm: 0.9016919759668404, iteration: 244893
loss: 0.9987151026725769,grad_norm: 0.9999989340139664, iteration: 244894
loss: 1.0433841943740845,grad_norm: 0.9999990449481223, iteration: 244895
loss: 1.0527520179748535,grad_norm: 0.898279131849213, iteration: 244896
loss: 1.054175853729248,grad_norm: 0.999999311143578, iteration: 244897
loss: 0.9800090193748474,grad_norm: 0.8480184474276362, iteration: 244898
loss: 1.0354984998703003,grad_norm: 0.9471663183769792, iteration: 244899
loss: 1.0009684562683105,grad_norm: 0.9999992163701, iteration: 244900
loss: 1.0906455516815186,grad_norm: 0.9999991438442974, iteration: 244901
loss: 1.0825724601745605,grad_norm: 0.9999991970052301, iteration: 244902
loss: 1.0619961023330688,grad_norm: 0.9999991275424944, iteration: 244903
loss: 1.0005611181259155,grad_norm: 0.861071132244123, iteration: 244904
loss: 1.0890566110610962,grad_norm: 0.9999993284884151, iteration: 244905
loss: 1.0138483047485352,grad_norm: 0.9999996401739188, iteration: 244906
loss: 1.00764799118042,grad_norm: 0.9999993185449587, iteration: 244907
loss: 1.0189552307128906,grad_norm: 0.9999991144609691, iteration: 244908
loss: 1.0823346376419067,grad_norm: 0.9263547571136806, iteration: 244909
loss: 1.0254632234573364,grad_norm: 0.9999996725857228, iteration: 244910
loss: 1.0884888172149658,grad_norm: 0.9999991517215553, iteration: 244911
loss: 1.0236953496932983,grad_norm: 0.9999993302037704, iteration: 244912
loss: 1.071338415145874,grad_norm: 0.9999991392130301, iteration: 244913
loss: 1.0185799598693848,grad_norm: 0.9999993549362651, iteration: 244914
loss: 1.0052878856658936,grad_norm: 0.9999991350289683, iteration: 244915
loss: 0.9871931672096252,grad_norm: 0.8969798004963432, iteration: 244916
loss: 1.0438801050186157,grad_norm: 0.9999991667407989, iteration: 244917
loss: 0.9855176210403442,grad_norm: 0.9460782568231078, iteration: 244918
loss: 1.0741673707962036,grad_norm: 0.9999992529223332, iteration: 244919
loss: 1.031493902206421,grad_norm: 0.9999999972298492, iteration: 244920
loss: 1.039321780204773,grad_norm: 0.9999997780995035, iteration: 244921
loss: 0.9836735725402832,grad_norm: 0.9252117741788048, iteration: 244922
loss: 1.017449975013733,grad_norm: 0.9962023888205009, iteration: 244923
loss: 1.0778776407241821,grad_norm: 0.9999990828453863, iteration: 244924
loss: 0.9735788106918335,grad_norm: 0.9890323750911503, iteration: 244925
loss: 1.1072484254837036,grad_norm: 0.9999996004929023, iteration: 244926
loss: 1.0148546695709229,grad_norm: 0.9999998418916993, iteration: 244927
loss: 1.1496608257293701,grad_norm: 0.9999991590924413, iteration: 244928
loss: 0.9929996132850647,grad_norm: 0.9149512310434833, iteration: 244929
loss: 1.0740233659744263,grad_norm: 0.9231844605920854, iteration: 244930
loss: 1.0786751508712769,grad_norm: 0.9915654256848555, iteration: 244931
loss: 0.9790328741073608,grad_norm: 0.9562614300429995, iteration: 244932
loss: 1.0304371118545532,grad_norm: 0.9225644662644701, iteration: 244933
loss: 1.0372639894485474,grad_norm: 0.9475126737495992, iteration: 244934
loss: 0.9551388025283813,grad_norm: 0.9609629424242335, iteration: 244935
loss: 1.0604875087738037,grad_norm: 0.9999994649518219, iteration: 244936
loss: 1.0259528160095215,grad_norm: 0.8938914904251417, iteration: 244937
loss: 1.0644257068634033,grad_norm: 0.9999989572411254, iteration: 244938
loss: 1.0381287336349487,grad_norm: 0.8425649786817178, iteration: 244939
loss: 1.127781629562378,grad_norm: 0.9999991217904951, iteration: 244940
loss: 1.0398281812667847,grad_norm: 0.9999994254620731, iteration: 244941
loss: 0.9640272259712219,grad_norm: 0.9488598008681927, iteration: 244942
loss: 1.0518232583999634,grad_norm: 0.9999999071094077, iteration: 244943
loss: 0.9870152473449707,grad_norm: 0.8857841770853645, iteration: 244944
loss: 0.992405354976654,grad_norm: 0.9999996968729629, iteration: 244945
loss: 0.9901090860366821,grad_norm: 0.8920537077990913, iteration: 244946
loss: 0.9910513758659363,grad_norm: 0.8390283007751731, iteration: 244947
loss: 1.043694019317627,grad_norm: 0.8243031060751024, iteration: 244948
loss: 1.0225963592529297,grad_norm: 0.9683325692803005, iteration: 244949
loss: 0.9798475503921509,grad_norm: 0.9999999582091234, iteration: 244950
loss: 1.0745820999145508,grad_norm: 0.9999997553676261, iteration: 244951
loss: 1.002049207687378,grad_norm: 0.8311699986907329, iteration: 244952
loss: 0.9941398501396179,grad_norm: 0.9999999074149325, iteration: 244953
loss: 1.1190062761306763,grad_norm: 0.9999994475446345, iteration: 244954
loss: 0.9847714304924011,grad_norm: 0.7704726510694044, iteration: 244955
loss: 1.0128395557403564,grad_norm: 0.8782045881066457, iteration: 244956
loss: 1.1723672151565552,grad_norm: 0.999999313912971, iteration: 244957
loss: 1.022243618965149,grad_norm: 0.8441841349985982, iteration: 244958
loss: 1.1100009679794312,grad_norm: 0.9999999688968352, iteration: 244959
loss: 0.9858381748199463,grad_norm: 0.9699505385579389, iteration: 244960
loss: 0.9824526309967041,grad_norm: 0.8494824734374247, iteration: 244961
loss: 1.1709840297698975,grad_norm: 0.9999999559361801, iteration: 244962
loss: 0.9882400631904602,grad_norm: 0.9359050578902556, iteration: 244963
loss: 1.0245680809020996,grad_norm: 0.9999997700694826, iteration: 244964
loss: 0.9834162592887878,grad_norm: 0.8775515217486988, iteration: 244965
loss: 1.1584258079528809,grad_norm: 0.9999993055815984, iteration: 244966
loss: 1.183669924736023,grad_norm: 0.999999799939833, iteration: 244967
loss: 0.9975128173828125,grad_norm: 0.9999994146039537, iteration: 244968
loss: 0.9813341498374939,grad_norm: 0.999999267200771, iteration: 244969
loss: 1.0218325853347778,grad_norm: 0.8893537861365687, iteration: 244970
loss: 1.1486186981201172,grad_norm: 0.9999996795440994, iteration: 244971
loss: 1.037412166595459,grad_norm: 0.9999992470316311, iteration: 244972
loss: 0.9646822214126587,grad_norm: 0.8949034073750988, iteration: 244973
loss: 1.0762816667556763,grad_norm: 0.9999993764337269, iteration: 244974
loss: 1.0566966533660889,grad_norm: 0.9999995284984609, iteration: 244975
loss: 1.0174471139907837,grad_norm: 0.9954697928847787, iteration: 244976
loss: 1.0469815731048584,grad_norm: 1.000000047783599, iteration: 244977
loss: 0.9966244101524353,grad_norm: 0.8934480189152874, iteration: 244978
loss: 1.017845869064331,grad_norm: 0.8825704174759599, iteration: 244979
loss: 1.0364346504211426,grad_norm: 0.9999991222041968, iteration: 244980
loss: 1.0403528213500977,grad_norm: 0.9999995742152545, iteration: 244981
loss: 0.9839039444923401,grad_norm: 0.9999992821504632, iteration: 244982
loss: 1.0056533813476562,grad_norm: 0.9095009310958333, iteration: 244983
loss: 1.0958009958267212,grad_norm: 0.9999996136605485, iteration: 244984
loss: 1.0054211616516113,grad_norm: 0.8436010006988658, iteration: 244985
loss: 1.0499694347381592,grad_norm: 0.9999995913618844, iteration: 244986
loss: 0.9552620649337769,grad_norm: 0.9231171796692987, iteration: 244987
loss: 1.00875985622406,grad_norm: 0.9999993110455676, iteration: 244988
loss: 1.0137522220611572,grad_norm: 0.7988131611177324, iteration: 244989
loss: 1.08980393409729,grad_norm: 0.9999992230856038, iteration: 244990
loss: 1.0390589237213135,grad_norm: 0.9999999038317298, iteration: 244991
loss: 1.1232839822769165,grad_norm: 0.9999996428296883, iteration: 244992
loss: 1.0259851217269897,grad_norm: 0.999999839149969, iteration: 244993
loss: 0.9930716156959534,grad_norm: 0.9218935129540395, iteration: 244994
loss: 1.2275582551956177,grad_norm: 0.9999998725067576, iteration: 244995
loss: 1.0559333562850952,grad_norm: 0.9999992546528095, iteration: 244996
loss: 1.0436631441116333,grad_norm: 0.999999375547275, iteration: 244997
loss: 1.019089937210083,grad_norm: 0.9899155482850898, iteration: 244998
loss: 1.0635812282562256,grad_norm: 0.9999991377609678, iteration: 244999
loss: 1.1040109395980835,grad_norm: 0.9999990922073958, iteration: 245000
loss: 1.0140780210494995,grad_norm: 0.9875808884025176, iteration: 245001
loss: 0.9948068261146545,grad_norm: 0.8555585961495616, iteration: 245002
loss: 1.0041121244430542,grad_norm: 0.999999638007027, iteration: 245003
loss: 1.0467174053192139,grad_norm: 0.9999994821349731, iteration: 245004
loss: 1.0079983472824097,grad_norm: 0.999999249260869, iteration: 245005
loss: 0.9943813681602478,grad_norm: 0.9488481969916867, iteration: 245006
loss: 0.965854287147522,grad_norm: 0.9999990929768252, iteration: 245007
loss: 0.9957395792007446,grad_norm: 0.9999992916536564, iteration: 245008
loss: 1.056403398513794,grad_norm: 0.99999984880438, iteration: 245009
loss: 1.0982754230499268,grad_norm: 0.999999160684238, iteration: 245010
loss: 1.0855709314346313,grad_norm: 0.9999996135992765, iteration: 245011
loss: 1.0384372472763062,grad_norm: 0.999999133065706, iteration: 245012
loss: 1.0145505666732788,grad_norm: 0.9999994936037825, iteration: 245013
loss: 1.117171049118042,grad_norm: 0.9999996791511329, iteration: 245014
loss: 1.0041837692260742,grad_norm: 0.8500786801182417, iteration: 245015
loss: 0.9736099243164062,grad_norm: 0.9007758456484248, iteration: 245016
loss: 1.181593656539917,grad_norm: 0.9999992556328204, iteration: 245017
loss: 1.0153108835220337,grad_norm: 0.9999998176415311, iteration: 245018
loss: 0.9874272346496582,grad_norm: 0.8949200744145567, iteration: 245019
loss: 1.0265816450119019,grad_norm: 0.999999011549144, iteration: 245020
loss: 0.988458514213562,grad_norm: 0.9012725719724987, iteration: 245021
loss: 0.9753242135047913,grad_norm: 0.9984077181345725, iteration: 245022
loss: 1.0190136432647705,grad_norm: 0.9047113121508934, iteration: 245023
loss: 1.0937421321868896,grad_norm: 0.9999991939771902, iteration: 245024
loss: 1.0239953994750977,grad_norm: 0.899440127251256, iteration: 245025
loss: 1.0689998865127563,grad_norm: 0.9999999060835485, iteration: 245026
loss: 0.9757260084152222,grad_norm: 0.8659207552245519, iteration: 245027
loss: 1.0087164640426636,grad_norm: 0.9999989826271559, iteration: 245028
loss: 0.9552804231643677,grad_norm: 0.8664120533329449, iteration: 245029
loss: 1.017657995223999,grad_norm: 0.9999992261080402, iteration: 245030
loss: 0.994703471660614,grad_norm: 0.7895546162610612, iteration: 245031
loss: 1.118333101272583,grad_norm: 0.9999997196702965, iteration: 245032
loss: 1.012569785118103,grad_norm: 0.9999999001796079, iteration: 245033
loss: 1.0039844512939453,grad_norm: 0.9999992282469551, iteration: 245034
loss: 1.0436722040176392,grad_norm: 0.9999997864085628, iteration: 245035
loss: 1.083446741104126,grad_norm: 0.8445142924905074, iteration: 245036
loss: 1.1806282997131348,grad_norm: 0.9999993120227094, iteration: 245037
loss: 1.008164405822754,grad_norm: 0.9999997197991765, iteration: 245038
loss: 1.0632331371307373,grad_norm: 0.9999989828574257, iteration: 245039
loss: 1.0041630268096924,grad_norm: 0.8318451662745722, iteration: 245040
loss: 1.0019432306289673,grad_norm: 0.8211304155557745, iteration: 245041
loss: 0.9892757534980774,grad_norm: 0.985389033388395, iteration: 245042
loss: 1.002502202987671,grad_norm: 0.8653847570201367, iteration: 245043
loss: 1.0571365356445312,grad_norm: 0.9999995374918914, iteration: 245044
loss: 1.0523277521133423,grad_norm: 0.9999990389665059, iteration: 245045
loss: 1.0344115495681763,grad_norm: 0.9999990319345196, iteration: 245046
loss: 1.038936972618103,grad_norm: 0.9822430811844992, iteration: 245047
loss: 1.005467414855957,grad_norm: 0.9708425555401805, iteration: 245048
loss: 1.0316919088363647,grad_norm: 0.9716161500762394, iteration: 245049
loss: 0.9940487742424011,grad_norm: 0.8849603060741585, iteration: 245050
loss: 1.0006613731384277,grad_norm: 0.9999991173564753, iteration: 245051
loss: 1.1054542064666748,grad_norm: 0.9999994286063476, iteration: 245052
loss: 1.0364443063735962,grad_norm: 0.9391839688222807, iteration: 245053
loss: 0.9972938299179077,grad_norm: 0.9736404877202055, iteration: 245054
loss: 1.0043628215789795,grad_norm: 0.86880637080603, iteration: 245055
loss: 0.9573996663093567,grad_norm: 0.9999992183387725, iteration: 245056
loss: 1.0911129713058472,grad_norm: 0.9999998115673003, iteration: 245057
loss: 0.9707230925559998,grad_norm: 0.8132248076034251, iteration: 245058
loss: 1.0156782865524292,grad_norm: 0.8555757396698692, iteration: 245059
loss: 1.0014469623565674,grad_norm: 0.7955730628728978, iteration: 245060
loss: 1.0015645027160645,grad_norm: 0.8040833634895918, iteration: 245061
loss: 0.9820766448974609,grad_norm: 0.7895287839156635, iteration: 245062
loss: 1.0199010372161865,grad_norm: 0.9999998804033613, iteration: 245063
loss: 1.0429610013961792,grad_norm: 0.9999995559706517, iteration: 245064
loss: 0.9817759990692139,grad_norm: 0.9884172716107292, iteration: 245065
loss: 1.0157582759857178,grad_norm: 0.9999992554387812, iteration: 245066
loss: 1.0187644958496094,grad_norm: 0.8389295426557121, iteration: 245067
loss: 1.0020865201950073,grad_norm: 0.9999990240924487, iteration: 245068
loss: 1.0055545568466187,grad_norm: 0.9999993633796619, iteration: 245069
loss: 0.9635497331619263,grad_norm: 0.936205068503668, iteration: 245070
loss: 0.9785304665565491,grad_norm: 0.9356885913009443, iteration: 245071
loss: 0.9751560091972351,grad_norm: 0.8825761079274204, iteration: 245072
loss: 0.9969756603240967,grad_norm: 0.9422476005426723, iteration: 245073
loss: 1.0170353651046753,grad_norm: 0.9999990183663308, iteration: 245074
loss: 0.9933041930198669,grad_norm: 0.9818500359786092, iteration: 245075
loss: 0.982417643070221,grad_norm: 0.9203746448836347, iteration: 245076
loss: 1.0141291618347168,grad_norm: 0.9999992208343043, iteration: 245077
loss: 1.0081480741500854,grad_norm: 0.9660664766432371, iteration: 245078
loss: 0.993489682674408,grad_norm: 0.9999991048051228, iteration: 245079
loss: 1.0047740936279297,grad_norm: 0.893096468781961, iteration: 245080
loss: 0.9707311987876892,grad_norm: 0.8880657695808566, iteration: 245081
loss: 0.9967337250709534,grad_norm: 0.9999992682305419, iteration: 245082
loss: 1.0523065328598022,grad_norm: 0.9999993828360911, iteration: 245083
loss: 0.9978920221328735,grad_norm: 0.8947919402219231, iteration: 245084
loss: 1.0146030187606812,grad_norm: 0.899811738124251, iteration: 245085
loss: 0.982245683670044,grad_norm: 0.8300125015302899, iteration: 245086
loss: 1.0453332662582397,grad_norm: 0.9999992142902316, iteration: 245087
loss: 0.9989721775054932,grad_norm: 0.8727832799249698, iteration: 245088
loss: 0.992987871170044,grad_norm: 0.8279172775233293, iteration: 245089
loss: 1.0055062770843506,grad_norm: 0.8320326568585076, iteration: 245090
loss: 1.05796217918396,grad_norm: 0.9999991149512932, iteration: 245091
loss: 1.0137070417404175,grad_norm: 0.8705459321737566, iteration: 245092
loss: 0.9567434191703796,grad_norm: 0.8844101115519606, iteration: 245093
loss: 0.9971321225166321,grad_norm: 0.9381783914260812, iteration: 245094
loss: 0.9902405142784119,grad_norm: 0.9861205304783511, iteration: 245095
loss: 0.9941014051437378,grad_norm: 0.9999990846657127, iteration: 245096
loss: 0.9827883839607239,grad_norm: 0.9296594756646389, iteration: 245097
loss: 0.9671817421913147,grad_norm: 0.9720357485461025, iteration: 245098
loss: 1.0027177333831787,grad_norm: 0.8121212340488224, iteration: 245099
loss: 1.2304238080978394,grad_norm: 0.9999991733438761, iteration: 245100
loss: 1.020982265472412,grad_norm: 0.9999991850579265, iteration: 245101
loss: 1.0041273832321167,grad_norm: 0.9537972203285551, iteration: 245102
loss: 0.9889078140258789,grad_norm: 0.8018855677121612, iteration: 245103
loss: 1.0398540496826172,grad_norm: 0.834045309205261, iteration: 245104
loss: 1.039960265159607,grad_norm: 0.9193820479839091, iteration: 245105
loss: 1.0082536935806274,grad_norm: 0.9341844290586929, iteration: 245106
loss: 1.0238990783691406,grad_norm: 0.9343006574200144, iteration: 245107
loss: 0.9647489190101624,grad_norm: 0.8167331251903981, iteration: 245108
loss: 0.9932311177253723,grad_norm: 0.8805352428028969, iteration: 245109
loss: 0.9665182828903198,grad_norm: 0.9999991534361338, iteration: 245110
loss: 1.001964807510376,grad_norm: 0.9907608716205871, iteration: 245111
loss: 1.054206371307373,grad_norm: 0.8467886526226582, iteration: 245112
loss: 1.0127874612808228,grad_norm: 0.9999992605613853, iteration: 245113
loss: 0.9576214551925659,grad_norm: 0.937431157124643, iteration: 245114
loss: 0.9675151109695435,grad_norm: 0.8080426046789323, iteration: 245115
loss: 1.0470805168151855,grad_norm: 0.914366568377637, iteration: 245116
loss: 1.0303401947021484,grad_norm: 0.9921443619742923, iteration: 245117
loss: 0.9975928664207458,grad_norm: 0.8350623230559083, iteration: 245118
loss: 0.9929113388061523,grad_norm: 0.8005263054512441, iteration: 245119
loss: 0.9980570673942566,grad_norm: 0.9999990723266461, iteration: 245120
loss: 1.0769590139389038,grad_norm: 0.9999991430301602, iteration: 245121
loss: 0.9892488121986389,grad_norm: 0.8699799841531531, iteration: 245122
loss: 1.0158989429473877,grad_norm: 0.8530450283940996, iteration: 245123
loss: 1.0468323230743408,grad_norm: 0.912093417578508, iteration: 245124
loss: 1.001893401145935,grad_norm: 0.8401146864321799, iteration: 245125
loss: 1.0011746883392334,grad_norm: 0.9691869871990315, iteration: 245126
loss: 1.0171668529510498,grad_norm: 0.9999991946407069, iteration: 245127
loss: 1.0762689113616943,grad_norm: 0.9999999259002215, iteration: 245128
loss: 1.0192457437515259,grad_norm: 0.9123737969737893, iteration: 245129
loss: 1.0122677087783813,grad_norm: 0.9569823960770144, iteration: 245130
loss: 1.0042855739593506,grad_norm: 0.8332507121674227, iteration: 245131
loss: 1.0226552486419678,grad_norm: 0.8414266790089082, iteration: 245132
loss: 0.9705508351325989,grad_norm: 0.816875414258879, iteration: 245133
loss: 0.9940145611763,grad_norm: 0.7965952031045855, iteration: 245134
loss: 0.9910045862197876,grad_norm: 0.999999309488491, iteration: 245135
loss: 1.007783055305481,grad_norm: 0.8981525809636524, iteration: 245136
loss: 1.03914213180542,grad_norm: 0.9999991274847282, iteration: 245137
loss: 0.9829303026199341,grad_norm: 0.9999989753691162, iteration: 245138
loss: 1.0107393264770508,grad_norm: 0.8335512395005384, iteration: 245139
loss: 0.9791916608810425,grad_norm: 0.9024054213821372, iteration: 245140
loss: 1.0111498832702637,grad_norm: 0.9999997840434136, iteration: 245141
loss: 1.0269606113433838,grad_norm: 0.9999991647674789, iteration: 245142
loss: 1.0070714950561523,grad_norm: 0.7813767342393004, iteration: 245143
loss: 1.0169419050216675,grad_norm: 0.7831141998219455, iteration: 245144
loss: 1.021885871887207,grad_norm: 0.9271592952480724, iteration: 245145
loss: 1.0486220121383667,grad_norm: 0.9624882452105538, iteration: 245146
loss: 1.0007717609405518,grad_norm: 0.8820516709041167, iteration: 245147
loss: 0.9981197118759155,grad_norm: 0.9323281030724562, iteration: 245148
loss: 1.022668480873108,grad_norm: 0.9999990313872786, iteration: 245149
loss: 1.0040957927703857,grad_norm: 0.8056772943366195, iteration: 245150
loss: 0.981945276260376,grad_norm: 0.9999990750212838, iteration: 245151
loss: 0.9567267298698425,grad_norm: 0.9999991266986763, iteration: 245152
loss: 0.9895874261856079,grad_norm: 0.9999991093316588, iteration: 245153
loss: 0.9598970413208008,grad_norm: 0.9122548136111671, iteration: 245154
loss: 1.0006471872329712,grad_norm: 0.861765606053442, iteration: 245155
loss: 1.1387308835983276,grad_norm: 0.9083756521505054, iteration: 245156
loss: 1.007393717765808,grad_norm: 0.8443331231604756, iteration: 245157
loss: 0.9978808760643005,grad_norm: 0.9170187218327011, iteration: 245158
loss: 1.0097787380218506,grad_norm: 0.9999991711266458, iteration: 245159
loss: 1.0641597509384155,grad_norm: 0.9999992508308327, iteration: 245160
loss: 1.0151656866073608,grad_norm: 0.8896182817636161, iteration: 245161
loss: 0.9923258423805237,grad_norm: 0.9724353800917096, iteration: 245162
loss: 0.9844229817390442,grad_norm: 0.9021387960992787, iteration: 245163
loss: 1.0437285900115967,grad_norm: 0.9999992640589742, iteration: 245164
loss: 0.9801985025405884,grad_norm: 0.9533066506288985, iteration: 245165
loss: 0.9851726293563843,grad_norm: 0.9003789662994856, iteration: 245166
loss: 1.0187859535217285,grad_norm: 0.9999990655672316, iteration: 245167
loss: 0.9945322871208191,grad_norm: 0.8340801016194804, iteration: 245168
loss: 1.0131151676177979,grad_norm: 0.9031898382646986, iteration: 245169
loss: 0.9805976152420044,grad_norm: 0.8094812380609575, iteration: 245170
loss: 1.0008519887924194,grad_norm: 0.828929142920899, iteration: 245171
loss: 1.022324562072754,grad_norm: 0.9999992231309243, iteration: 245172
loss: 1.0686711072921753,grad_norm: 0.9999994701782097, iteration: 245173
loss: 1.0131747722625732,grad_norm: 0.9999995430058402, iteration: 245174
loss: 1.032664179801941,grad_norm: 0.8596671475805798, iteration: 245175
loss: 1.0137637853622437,grad_norm: 0.9283848041485102, iteration: 245176
loss: 1.0116779804229736,grad_norm: 0.9491090774937413, iteration: 245177
loss: 1.0270180702209473,grad_norm: 0.999999480131237, iteration: 245178
loss: 0.9410178065299988,grad_norm: 0.8575770024295403, iteration: 245179
loss: 1.0116777420043945,grad_norm: 0.999999158391558, iteration: 245180
loss: 1.0358301401138306,grad_norm: 0.9999991143001885, iteration: 245181
loss: 0.9899325370788574,grad_norm: 0.99999900647026, iteration: 245182
loss: 0.9875288605690002,grad_norm: 0.894155044957613, iteration: 245183
loss: 1.0130255222320557,grad_norm: 0.9999992231971525, iteration: 245184
loss: 1.0342658758163452,grad_norm: 0.9999998990242608, iteration: 245185
loss: 0.9991861581802368,grad_norm: 0.8790432111388605, iteration: 245186
loss: 1.0002635717391968,grad_norm: 0.7892564926614973, iteration: 245187
loss: 1.0011464357376099,grad_norm: 0.970458859199057, iteration: 245188
loss: 1.0446088314056396,grad_norm: 0.9999992436794877, iteration: 245189
loss: 0.9887633323669434,grad_norm: 0.8098075240506645, iteration: 245190
loss: 0.9829909801483154,grad_norm: 0.7334992786239152, iteration: 245191
loss: 1.0069069862365723,grad_norm: 0.8728898275333661, iteration: 245192
loss: 1.0155268907546997,grad_norm: 0.7619611725035887, iteration: 245193
loss: 1.0111693143844604,grad_norm: 0.8521445285083105, iteration: 245194
loss: 0.9943864345550537,grad_norm: 0.7864235663970288, iteration: 245195
loss: 0.9869921803474426,grad_norm: 0.8769231946646723, iteration: 245196
loss: 0.9863219261169434,grad_norm: 0.8724090175223376, iteration: 245197
loss: 1.0811817646026611,grad_norm: 0.9999999392770008, iteration: 245198
loss: 1.002837061882019,grad_norm: 0.9999990634831717, iteration: 245199
loss: 0.9969432950019836,grad_norm: 0.857045888347966, iteration: 245200
loss: 1.0051960945129395,grad_norm: 0.8753236707686408, iteration: 245201
loss: 0.9794836044311523,grad_norm: 0.857821160405064, iteration: 245202
loss: 1.0260326862335205,grad_norm: 0.9999996389492781, iteration: 245203
loss: 1.0198580026626587,grad_norm: 0.72820243820835, iteration: 245204
loss: 1.0425317287445068,grad_norm: 0.9999991538068141, iteration: 245205
loss: 1.0353001356124878,grad_norm: 0.9999991085279128, iteration: 245206
loss: 1.0085293054580688,grad_norm: 0.8459491235586835, iteration: 245207
loss: 0.9807551503181458,grad_norm: 0.8220168890154658, iteration: 245208
loss: 1.1013715267181396,grad_norm: 0.9999990875786336, iteration: 245209
loss: 1.024300456047058,grad_norm: 0.9999997711829515, iteration: 245210
loss: 0.9912450313568115,grad_norm: 0.9798368029199602, iteration: 245211
loss: 1.0659453868865967,grad_norm: 0.9116119600012602, iteration: 245212
loss: 0.9906145334243774,grad_norm: 0.8054616249093706, iteration: 245213
loss: 1.064809799194336,grad_norm: 0.9999998738475029, iteration: 245214
loss: 0.9684323668479919,grad_norm: 0.9804138295014999, iteration: 245215
loss: 0.9715608954429626,grad_norm: 0.8123583254760827, iteration: 245216
loss: 1.0062377452850342,grad_norm: 0.9999990234166815, iteration: 245217
loss: 0.9962760806083679,grad_norm: 0.958172001198551, iteration: 245218
loss: 1.0030624866485596,grad_norm: 0.9999991787133529, iteration: 245219
loss: 0.9886063933372498,grad_norm: 0.9257919580341694, iteration: 245220
loss: 0.9701637029647827,grad_norm: 0.9472127394661093, iteration: 245221
loss: 1.0053668022155762,grad_norm: 0.9999991047427518, iteration: 245222
loss: 1.0180071592330933,grad_norm: 0.8656991470137891, iteration: 245223
loss: 0.9931703805923462,grad_norm: 0.996832318148807, iteration: 245224
loss: 0.9625315070152283,grad_norm: 0.9485592744849958, iteration: 245225
loss: 1.0050193071365356,grad_norm: 0.9999991541267435, iteration: 245226
loss: 1.0218580961227417,grad_norm: 0.9999990890931858, iteration: 245227
loss: 1.0320407152175903,grad_norm: 0.9999991430278046, iteration: 245228
loss: 0.9733582735061646,grad_norm: 0.9999990832809169, iteration: 245229
loss: 1.0003641843795776,grad_norm: 0.8221353238581841, iteration: 245230
loss: 1.0112825632095337,grad_norm: 0.8832711503541576, iteration: 245231
loss: 0.9773952960968018,grad_norm: 0.9999991514434426, iteration: 245232
loss: 1.0135598182678223,grad_norm: 0.9403626256387193, iteration: 245233
loss: 0.9861295819282532,grad_norm: 0.7748012850121964, iteration: 245234
loss: 0.965137779712677,grad_norm: 0.869295011907663, iteration: 245235
loss: 1.0103046894073486,grad_norm: 0.8307477329675195, iteration: 245236
loss: 1.0089540481567383,grad_norm: 0.8922290116217839, iteration: 245237
loss: 1.05098557472229,grad_norm: 0.9999992404626081, iteration: 245238
loss: 0.9825095534324646,grad_norm: 0.7788857055397749, iteration: 245239
loss: 1.0288065671920776,grad_norm: 0.9999992023528121, iteration: 245240
loss: 1.0153064727783203,grad_norm: 0.9999991448554464, iteration: 245241
loss: 1.0693553686141968,grad_norm: 0.9999995395446964, iteration: 245242
loss: 0.997053325176239,grad_norm: 0.821707665808598, iteration: 245243
loss: 1.0895094871520996,grad_norm: 0.8391393855971568, iteration: 245244
loss: 1.1037007570266724,grad_norm: 0.9999992719992614, iteration: 245245
loss: 0.9723700881004333,grad_norm: 0.9734338731992227, iteration: 245246
loss: 1.012120008468628,grad_norm: 0.9885374421691825, iteration: 245247
loss: 0.9898554086685181,grad_norm: 0.8327872923005648, iteration: 245248
loss: 0.9536690711975098,grad_norm: 0.8520973732849099, iteration: 245249
loss: 1.0025612115859985,grad_norm: 0.8353309986244175, iteration: 245250
loss: 1.0178090333938599,grad_norm: 0.8451125035098663, iteration: 245251
loss: 1.001383900642395,grad_norm: 0.8368706906914369, iteration: 245252
loss: 1.0571038722991943,grad_norm: 0.9999991737120466, iteration: 245253
loss: 1.0955291986465454,grad_norm: 0.999999435453706, iteration: 245254
loss: 1.017735481262207,grad_norm: 0.9999991104810069, iteration: 245255
loss: 0.9980120062828064,grad_norm: 0.9999991924898556, iteration: 245256
loss: 0.9814597964286804,grad_norm: 0.9454551819076658, iteration: 245257
loss: 0.971643328666687,grad_norm: 0.9331007326968485, iteration: 245258
loss: 0.9767420291900635,grad_norm: 0.9400195365696319, iteration: 245259
loss: 1.0176222324371338,grad_norm: 0.8705256344257366, iteration: 245260
loss: 0.9780006408691406,grad_norm: 0.9774025743075028, iteration: 245261
loss: 0.9996664524078369,grad_norm: 0.8709993465607528, iteration: 245262
loss: 1.0405728816986084,grad_norm: 0.9999990666929742, iteration: 245263
loss: 0.9992767572402954,grad_norm: 0.9999991328854687, iteration: 245264
loss: 0.9919969439506531,grad_norm: 0.8875831960166289, iteration: 245265
loss: 1.0191173553466797,grad_norm: 0.8477002473173452, iteration: 245266
loss: 1.000688910484314,grad_norm: 0.9473815210971375, iteration: 245267
loss: 1.0847140550613403,grad_norm: 0.8965912586079847, iteration: 245268
loss: 1.0111143589019775,grad_norm: 0.9316885734013112, iteration: 245269
loss: 0.9735913276672363,grad_norm: 0.9171174870487352, iteration: 245270
loss: 0.9791079163551331,grad_norm: 0.913693769367609, iteration: 245271
loss: 0.9806817173957825,grad_norm: 0.9527095163407351, iteration: 245272
loss: 1.0045815706253052,grad_norm: 0.9420038612850439, iteration: 245273
loss: 1.0244921445846558,grad_norm: 0.9999991865939297, iteration: 245274
loss: 1.0053017139434814,grad_norm: 0.9999992001668946, iteration: 245275
loss: 0.9957202076911926,grad_norm: 0.8946804945948733, iteration: 245276
loss: 1.015918254852295,grad_norm: 0.9475435779644621, iteration: 245277
loss: 1.0294400453567505,grad_norm: 0.8597630370331053, iteration: 245278
loss: 0.9816709756851196,grad_norm: 0.8156655674650475, iteration: 245279
loss: 0.9947597980499268,grad_norm: 0.8942018260778619, iteration: 245280
loss: 1.0793099403381348,grad_norm: 1.0000000467872736, iteration: 245281
loss: 0.9989296197891235,grad_norm: 0.9006995301174154, iteration: 245282
loss: 0.9529405236244202,grad_norm: 0.819186760372092, iteration: 245283
loss: 1.012040615081787,grad_norm: 0.8405916266872377, iteration: 245284
loss: 1.0233941078186035,grad_norm: 0.9874007367555371, iteration: 245285
loss: 1.005324125289917,grad_norm: 0.9903287093488875, iteration: 245286
loss: 1.0174403190612793,grad_norm: 0.8918377737166415, iteration: 245287
loss: 1.0265692472457886,grad_norm: 0.8360409079691141, iteration: 245288
loss: 1.0402652025222778,grad_norm: 0.999999787557223, iteration: 245289
loss: 0.9852572083473206,grad_norm: 0.8898309571588654, iteration: 245290
loss: 0.9750084280967712,grad_norm: 0.7923414399555336, iteration: 245291
loss: 1.0448471307754517,grad_norm: 0.9999994724519907, iteration: 245292
loss: 1.0215576887130737,grad_norm: 0.9402345672735452, iteration: 245293
loss: 1.0091580152511597,grad_norm: 0.9709575083734288, iteration: 245294
loss: 1.0141812562942505,grad_norm: 0.8573723648208897, iteration: 245295
loss: 0.9951659440994263,grad_norm: 0.9689661346311866, iteration: 245296
loss: 0.9411921501159668,grad_norm: 0.9822984286167911, iteration: 245297
loss: 0.9771448969841003,grad_norm: 0.9999992115649924, iteration: 245298
loss: 1.0488768815994263,grad_norm: 0.9999995219743624, iteration: 245299
loss: 1.0013738870620728,grad_norm: 0.9741434437486818, iteration: 245300
loss: 0.996626079082489,grad_norm: 0.8888445304864372, iteration: 245301
loss: 0.981143057346344,grad_norm: 0.8888782966482408, iteration: 245302
loss: 0.9827120900154114,grad_norm: 0.7968222137391713, iteration: 245303
loss: 0.9985870718955994,grad_norm: 0.9631290403349408, iteration: 245304
loss: 1.018873691558838,grad_norm: 0.996722467887294, iteration: 245305
loss: 0.993130624294281,grad_norm: 0.9921743367468054, iteration: 245306
loss: 1.018960952758789,grad_norm: 0.9302487497929944, iteration: 245307
loss: 0.9747756123542786,grad_norm: 0.9999991325276948, iteration: 245308
loss: 1.0018571615219116,grad_norm: 0.9641983472021646, iteration: 245309
loss: 1.0275005102157593,grad_norm: 0.9411176614370483, iteration: 245310
loss: 1.0119367837905884,grad_norm: 0.860296647377769, iteration: 245311
loss: 1.0118379592895508,grad_norm: 0.7974066197892238, iteration: 245312
loss: 1.007449984550476,grad_norm: 0.941539716832042, iteration: 245313
loss: 0.9828706383705139,grad_norm: 0.9970241339326572, iteration: 245314
loss: 0.9633693099021912,grad_norm: 0.852985792574195, iteration: 245315
loss: 1.0221439599990845,grad_norm: 0.9152502046919143, iteration: 245316
loss: 0.981555163860321,grad_norm: 0.879326330298263, iteration: 245317
loss: 1.0277043581008911,grad_norm: 0.9999991610934682, iteration: 245318
loss: 0.9742144346237183,grad_norm: 0.7876511430277652, iteration: 245319
loss: 1.1589953899383545,grad_norm: 0.9999992021382911, iteration: 245320
loss: 0.9760727882385254,grad_norm: 0.9999994239312922, iteration: 245321
loss: 1.0052297115325928,grad_norm: 0.806154991476805, iteration: 245322
loss: 0.9923185110092163,grad_norm: 0.9999990569800479, iteration: 245323
loss: 1.0250757932662964,grad_norm: 0.8736107892024885, iteration: 245324
loss: 0.9769384860992432,grad_norm: 0.8899538116644008, iteration: 245325
loss: 1.0020439624786377,grad_norm: 0.8909491596778885, iteration: 245326
loss: 1.009686827659607,grad_norm: 0.9792192098894802, iteration: 245327
loss: 1.0295963287353516,grad_norm: 0.9536088148641795, iteration: 245328
loss: 0.9902588725090027,grad_norm: 0.8466337684889733, iteration: 245329
loss: 0.9997392892837524,grad_norm: 0.8819784097156156, iteration: 245330
loss: 0.9989184737205505,grad_norm: 0.946166127679174, iteration: 245331
loss: 0.9914916157722473,grad_norm: 0.7876216741277353, iteration: 245332
loss: 1.0212910175323486,grad_norm: 0.8634018968202234, iteration: 245333
loss: 0.9979611039161682,grad_norm: 0.9003053450202831, iteration: 245334
loss: 1.0127623081207275,grad_norm: 0.8518682963441301, iteration: 245335
loss: 0.9775640964508057,grad_norm: 0.9999995116193926, iteration: 245336
loss: 1.0054845809936523,grad_norm: 0.8430526003995958, iteration: 245337
loss: 1.0494184494018555,grad_norm: 0.8951633891603369, iteration: 245338
loss: 1.0797905921936035,grad_norm: 0.9999994034997223, iteration: 245339
loss: 1.0024560689926147,grad_norm: 0.7935340218937349, iteration: 245340
loss: 0.9913725256919861,grad_norm: 0.99999933188519, iteration: 245341
loss: 1.0563112497329712,grad_norm: 0.9999990026060782, iteration: 245342
loss: 0.9660512804985046,grad_norm: 0.849317404004454, iteration: 245343
loss: 0.971034049987793,grad_norm: 0.8485897437548509, iteration: 245344
loss: 1.0193136930465698,grad_norm: 0.9510677175239652, iteration: 245345
loss: 0.9772618412971497,grad_norm: 0.7618183054203931, iteration: 245346
loss: 0.980977475643158,grad_norm: 0.7874767675084927, iteration: 245347
loss: 1.028523564338684,grad_norm: 0.9999996709924828, iteration: 245348
loss: 0.9974948167800903,grad_norm: 0.9999990787197379, iteration: 245349
loss: 0.9496122002601624,grad_norm: 0.7888008328417705, iteration: 245350
loss: 0.9867769479751587,grad_norm: 0.8275678533365906, iteration: 245351
loss: 1.0208344459533691,grad_norm: 0.6993615442865803, iteration: 245352
loss: 0.939233124256134,grad_norm: 0.8821379898403094, iteration: 245353
loss: 1.0022172927856445,grad_norm: 0.8345201890017165, iteration: 245354
loss: 1.014435887336731,grad_norm: 0.9999990559278437, iteration: 245355
loss: 0.9688418507575989,grad_norm: 0.9999998752505561, iteration: 245356
loss: 1.0447770357131958,grad_norm: 0.9953238215455056, iteration: 245357
loss: 1.0359652042388916,grad_norm: 0.7634131787183991, iteration: 245358
loss: 0.9818705916404724,grad_norm: 0.9180995462503629, iteration: 245359
loss: 0.9568805694580078,grad_norm: 0.8591181817199852, iteration: 245360
loss: 1.094596266746521,grad_norm: 0.9999998797859756, iteration: 245361
loss: 1.0104495286941528,grad_norm: 0.9573452800036678, iteration: 245362
loss: 0.9696298241615295,grad_norm: 0.9300728686116411, iteration: 245363
loss: 0.9980109333992004,grad_norm: 0.888197965981326, iteration: 245364
loss: 0.9732257723808289,grad_norm: 0.8224299503201474, iteration: 245365
loss: 1.0546433925628662,grad_norm: 0.880701549994433, iteration: 245366
loss: 0.9963650703430176,grad_norm: 0.8798753293650086, iteration: 245367
loss: 1.033366322517395,grad_norm: 0.9999999715320024, iteration: 245368
loss: 0.9961920976638794,grad_norm: 0.9205248850299742, iteration: 245369
loss: 0.9904190301895142,grad_norm: 0.9999990104068894, iteration: 245370
loss: 0.9770881533622742,grad_norm: 0.8474958464226257, iteration: 245371
loss: 1.0899684429168701,grad_norm: 0.9999997973329439, iteration: 245372
loss: 0.9591005444526672,grad_norm: 0.8696430919991597, iteration: 245373
loss: 1.0133376121520996,grad_norm: 0.9425360202102657, iteration: 245374
loss: 0.9727321267127991,grad_norm: 0.9321613032003544, iteration: 245375
loss: 0.9898421168327332,grad_norm: 0.9760966403025794, iteration: 245376
loss: 0.96513432264328,grad_norm: 0.9036322875631052, iteration: 245377
loss: 0.9540244340896606,grad_norm: 0.9999995345219803, iteration: 245378
loss: 1.032447338104248,grad_norm: 0.8943969656909164, iteration: 245379
loss: 1.0377309322357178,grad_norm: 0.8849914147141389, iteration: 245380
loss: 1.0177081823349,grad_norm: 0.9999991022698135, iteration: 245381
loss: 0.9777565002441406,grad_norm: 0.8572890825310718, iteration: 245382
loss: 1.0574498176574707,grad_norm: 0.9999995717101926, iteration: 245383
loss: 1.0037280321121216,grad_norm: 0.913979124686917, iteration: 245384
loss: 1.01221764087677,grad_norm: 0.7374185384289247, iteration: 245385
loss: 0.9953950047492981,grad_norm: 0.8262504934233889, iteration: 245386
loss: 1.024536371231079,grad_norm: 0.9999992753533464, iteration: 245387
loss: 1.0241601467132568,grad_norm: 0.808427848171718, iteration: 245388
loss: 1.0167286396026611,grad_norm: 0.9999990650996026, iteration: 245389
loss: 1.014344573020935,grad_norm: 0.9598381859966959, iteration: 245390
loss: 1.058668613433838,grad_norm: 0.9999997842557903, iteration: 245391
loss: 0.9897243976593018,grad_norm: 0.8112831557635789, iteration: 245392
loss: 1.1097502708435059,grad_norm: 0.9999998681962456, iteration: 245393
loss: 1.0112015008926392,grad_norm: 0.9259312818088329, iteration: 245394
loss: 1.0163013935089111,grad_norm: 0.9999991973316263, iteration: 245395
loss: 1.0096359252929688,grad_norm: 0.8532450342374733, iteration: 245396
loss: 1.0921120643615723,grad_norm: 0.9999999274195341, iteration: 245397
loss: 1.0888311862945557,grad_norm: 0.8591877128888442, iteration: 245398
loss: 0.9977219700813293,grad_norm: 0.8107352976910656, iteration: 245399
loss: 1.0850688219070435,grad_norm: 0.9481025062803109, iteration: 245400
loss: 1.0277456045150757,grad_norm: 0.8465459453509641, iteration: 245401
loss: 1.0159013271331787,grad_norm: 0.9999990912495704, iteration: 245402
loss: 1.0851585865020752,grad_norm: 0.9492233596991239, iteration: 245403
loss: 1.0688867568969727,grad_norm: 0.9999997756137031, iteration: 245404
loss: 0.9529131054878235,grad_norm: 0.8954656064167409, iteration: 245405
loss: 1.0194785594940186,grad_norm: 0.9999990279985116, iteration: 245406
loss: 1.0117040872573853,grad_norm: 0.8716172584058025, iteration: 245407
loss: 1.0428292751312256,grad_norm: 0.9999991021315454, iteration: 245408
loss: 1.023941159248352,grad_norm: 0.9096127219790414, iteration: 245409
loss: 0.9637237191200256,grad_norm: 0.9468738860283804, iteration: 245410
loss: 1.0379986763000488,grad_norm: 0.9855497383508761, iteration: 245411
loss: 1.2381514310836792,grad_norm: 0.9999998209307551, iteration: 245412
loss: 1.0427489280700684,grad_norm: 0.8962957759535629, iteration: 245413
loss: 1.0977799892425537,grad_norm: 0.999999897212987, iteration: 245414
loss: 1.1497398614883423,grad_norm: 0.9723203386987508, iteration: 245415
loss: 0.9952002763748169,grad_norm: 0.812993815798223, iteration: 245416
loss: 1.0748653411865234,grad_norm: 0.9999991659375839, iteration: 245417
loss: 1.0731472969055176,grad_norm: 0.9999990411997751, iteration: 245418
loss: 1.0754543542861938,grad_norm: 0.9999991245161146, iteration: 245419
loss: 1.003819227218628,grad_norm: 0.761521419922468, iteration: 245420
loss: 1.0234414339065552,grad_norm: 0.8692513046010372, iteration: 245421
loss: 1.0491803884506226,grad_norm: 0.9999994036473973, iteration: 245422
loss: 1.057877540588379,grad_norm: 0.9135782890220452, iteration: 245423
loss: 1.113519310951233,grad_norm: 0.999999851601372, iteration: 245424
loss: 1.16265070438385,grad_norm: 0.9999992033775853, iteration: 245425
loss: 0.9972053170204163,grad_norm: 0.7705349190043227, iteration: 245426
loss: 0.9970062971115112,grad_norm: 0.792300499002205, iteration: 245427
loss: 0.9985200762748718,grad_norm: 0.8632962732714531, iteration: 245428
loss: 0.9851874113082886,grad_norm: 0.9016031095742029, iteration: 245429
loss: 0.9578818082809448,grad_norm: 0.9556820883439205, iteration: 245430
loss: 1.0545936822891235,grad_norm: 0.9010558639251329, iteration: 245431
loss: 1.0468637943267822,grad_norm: 0.9999994760124895, iteration: 245432
loss: 1.0359914302825928,grad_norm: 0.8769646314822677, iteration: 245433
loss: 1.013062834739685,grad_norm: 0.9589173987537039, iteration: 245434
loss: 1.1744894981384277,grad_norm: 0.9999998704995249, iteration: 245435
loss: 1.0039820671081543,grad_norm: 0.9999999178709875, iteration: 245436
loss: 1.0102477073669434,grad_norm: 0.9999990100360255, iteration: 245437
loss: 1.0050687789916992,grad_norm: 0.8770676258097959, iteration: 245438
loss: 1.0030230283737183,grad_norm: 0.9999990900120656, iteration: 245439
loss: 1.0744216442108154,grad_norm: 0.9999991040279435, iteration: 245440
loss: 1.0256223678588867,grad_norm: 0.849655923517354, iteration: 245441
loss: 0.982670247554779,grad_norm: 0.8344665186257845, iteration: 245442
loss: 0.9854094982147217,grad_norm: 0.7735768246750397, iteration: 245443
loss: 1.0043128728866577,grad_norm: 0.8102052603407432, iteration: 245444
loss: 1.0469310283660889,grad_norm: 0.944646320472017, iteration: 245445
loss: 1.0064424276351929,grad_norm: 0.9110970440331289, iteration: 245446
loss: 0.9810053110122681,grad_norm: 0.8736177284952124, iteration: 245447
loss: 1.002561330795288,grad_norm: 0.9025696308957948, iteration: 245448
loss: 0.979895293712616,grad_norm: 0.7740154234461066, iteration: 245449
loss: 0.9741612672805786,grad_norm: 0.9182811007388594, iteration: 245450
loss: 0.9850485324859619,grad_norm: 0.8650313155251022, iteration: 245451
loss: 0.9786285161972046,grad_norm: 0.8759999450615228, iteration: 245452
loss: 0.9810651540756226,grad_norm: 0.9999990758906318, iteration: 245453
loss: 1.0359834432601929,grad_norm: 0.8157970823979761, iteration: 245454
loss: 1.0299837589263916,grad_norm: 0.9021116503391088, iteration: 245455
loss: 1.0041199922561646,grad_norm: 0.8096969477076759, iteration: 245456
loss: 0.9939030408859253,grad_norm: 0.9105674501829911, iteration: 245457
loss: 0.9505149722099304,grad_norm: 0.9177902888977474, iteration: 245458
loss: 1.0376554727554321,grad_norm: 0.8768736833617696, iteration: 245459
loss: 0.9933291077613831,grad_norm: 0.9018641056802543, iteration: 245460
loss: 0.9998069405555725,grad_norm: 0.941261430190723, iteration: 245461
loss: 1.035934329032898,grad_norm: 0.806674027371035, iteration: 245462
loss: 0.974101185798645,grad_norm: 0.909231299185497, iteration: 245463
loss: 1.030789852142334,grad_norm: 0.9999990960106911, iteration: 245464
loss: 0.9638292193412781,grad_norm: 0.9285471849001503, iteration: 245465
loss: 0.9962046146392822,grad_norm: 0.9999990232547735, iteration: 245466
loss: 1.0180481672286987,grad_norm: 0.7796608949174868, iteration: 245467
loss: 1.102036476135254,grad_norm: 0.9999995511194244, iteration: 245468
loss: 1.0314692258834839,grad_norm: 0.8573793712978559, iteration: 245469
loss: 1.045994758605957,grad_norm: 0.8623600560830588, iteration: 245470
loss: 0.9909327030181885,grad_norm: 0.9578508835004607, iteration: 245471
loss: 0.9631837606430054,grad_norm: 0.8959144770815937, iteration: 245472
loss: 1.023004174232483,grad_norm: 0.999999569800691, iteration: 245473
loss: 0.9998793601989746,grad_norm: 0.7960564114170153, iteration: 245474
loss: 0.9848490357398987,grad_norm: 0.8843512894406593, iteration: 245475
loss: 1.0282442569732666,grad_norm: 0.984411055884419, iteration: 245476
loss: 1.002753496170044,grad_norm: 0.8382264415258172, iteration: 245477
loss: 1.0761983394622803,grad_norm: 0.999999093549255, iteration: 245478
loss: 1.011814832687378,grad_norm: 0.999999101002617, iteration: 245479
loss: 1.0107821226119995,grad_norm: 0.9999991411252365, iteration: 245480
loss: 0.9946863055229187,grad_norm: 0.9766794645841084, iteration: 245481
loss: 0.9682068228721619,grad_norm: 0.8993362319013568, iteration: 245482
loss: 1.0823794603347778,grad_norm: 0.9999997513460158, iteration: 245483
loss: 0.9800069332122803,grad_norm: 0.9319879603124651, iteration: 245484
loss: 0.9745776057243347,grad_norm: 0.8064104551518883, iteration: 245485
loss: 1.0073384046554565,grad_norm: 0.8834013435214325, iteration: 245486
loss: 1.015594244003296,grad_norm: 0.9999997042962786, iteration: 245487
loss: 0.9718082547187805,grad_norm: 0.9999996532164889, iteration: 245488
loss: 1.067626714706421,grad_norm: 0.9305712792681838, iteration: 245489
loss: 1.001992106437683,grad_norm: 0.8744297904014879, iteration: 245490
loss: 0.9924002289772034,grad_norm: 0.8683689544883031, iteration: 245491
loss: 0.9951936602592468,grad_norm: 0.8491094536947966, iteration: 245492
loss: 0.9930599331855774,grad_norm: 0.999999599772706, iteration: 245493
loss: 1.0476030111312866,grad_norm: 0.9999996671610595, iteration: 245494
loss: 1.0153332948684692,grad_norm: 0.9999994102460218, iteration: 245495
loss: 1.0259885787963867,grad_norm: 0.8294662966048247, iteration: 245496
loss: 1.009677529335022,grad_norm: 0.8575073349229458, iteration: 245497
loss: 1.0339845418930054,grad_norm: 0.9012012024446595, iteration: 245498
loss: 0.9652565121650696,grad_norm: 0.8382792014325355, iteration: 245499
loss: 1.015740156173706,grad_norm: 0.9999999010898984, iteration: 245500
loss: 1.0026174783706665,grad_norm: 0.9533593852129041, iteration: 245501
loss: 0.993165135383606,grad_norm: 0.8223700944588057, iteration: 245502
loss: 1.0198001861572266,grad_norm: 0.9999992643504587, iteration: 245503
loss: 1.0187016725540161,grad_norm: 0.8145733216988015, iteration: 245504
loss: 1.0190433263778687,grad_norm: 0.9999990902830644, iteration: 245505
loss: 1.013293981552124,grad_norm: 0.97015149871071, iteration: 245506
loss: 1.019048810005188,grad_norm: 0.8166996707267639, iteration: 245507
loss: 1.0555723905563354,grad_norm: 0.9999998546602668, iteration: 245508
loss: 1.0040223598480225,grad_norm: 0.9999999126239953, iteration: 245509
loss: 0.9671101570129395,grad_norm: 0.8379573250137533, iteration: 245510
loss: 1.0083774328231812,grad_norm: 0.7339201129465216, iteration: 245511
loss: 1.0879666805267334,grad_norm: 0.9999993890542489, iteration: 245512
loss: 1.0123186111450195,grad_norm: 0.9395070691457967, iteration: 245513
loss: 0.9868669509887695,grad_norm: 0.7735496743482949, iteration: 245514
loss: 1.1338095664978027,grad_norm: 0.9999996056865127, iteration: 245515
loss: 1.0088986158370972,grad_norm: 0.713790983407129, iteration: 245516
loss: 1.0197018384933472,grad_norm: 0.9999989111830451, iteration: 245517
loss: 0.9854112863540649,grad_norm: 0.758252673038342, iteration: 245518
loss: 0.9806228280067444,grad_norm: 0.9536800245691859, iteration: 245519
loss: 0.9920735955238342,grad_norm: 0.8449821588473517, iteration: 245520
loss: 1.0151742696762085,grad_norm: 0.9865912016556507, iteration: 245521
loss: 0.959288477897644,grad_norm: 0.8138487058621438, iteration: 245522
loss: 1.0209283828735352,grad_norm: 0.9753238138688054, iteration: 245523
loss: 1.0046885013580322,grad_norm: 0.8497839416364029, iteration: 245524
loss: 0.996630847454071,grad_norm: 0.9999990607860701, iteration: 245525
loss: 1.0104469060897827,grad_norm: 0.9724832551350331, iteration: 245526
loss: 1.0181090831756592,grad_norm: 0.9999993136199201, iteration: 245527
loss: 0.9952563047409058,grad_norm: 0.995627405086956, iteration: 245528
loss: 1.0457556247711182,grad_norm: 0.999999323852518, iteration: 245529
loss: 0.9762301445007324,grad_norm: 0.8173462365373036, iteration: 245530
loss: 0.9828892946243286,grad_norm: 0.9096983023790871, iteration: 245531
loss: 1.0048253536224365,grad_norm: 0.9669407073740637, iteration: 245532
loss: 1.0029469728469849,grad_norm: 0.7767764895247797, iteration: 245533
loss: 1.0006110668182373,grad_norm: 0.9999990109509141, iteration: 245534
loss: 1.016918659210205,grad_norm: 0.8433691774362245, iteration: 245535
loss: 1.0405466556549072,grad_norm: 0.8765534581006704, iteration: 245536
loss: 1.0007699728012085,grad_norm: 0.9731664299447205, iteration: 245537
loss: 1.0119788646697998,grad_norm: 0.8854157332980099, iteration: 245538
loss: 0.9962098598480225,grad_norm: 0.8540689388197422, iteration: 245539
loss: 1.0095469951629639,grad_norm: 0.8064549735776888, iteration: 245540
loss: 1.009778618812561,grad_norm: 0.9185891767263122, iteration: 245541
loss: 1.0304160118103027,grad_norm: 0.8946847038577346, iteration: 245542
loss: 1.0202910900115967,grad_norm: 0.9999993493253205, iteration: 245543
loss: 1.0044796466827393,grad_norm: 0.8829795566300828, iteration: 245544
loss: 0.9844033122062683,grad_norm: 0.8856144581821129, iteration: 245545
loss: 1.01486074924469,grad_norm: 0.9432755146692567, iteration: 245546
loss: 0.9934679865837097,grad_norm: 0.8052478029364974, iteration: 245547
loss: 0.9478932023048401,grad_norm: 0.9749085739604114, iteration: 245548
loss: 0.9768387079238892,grad_norm: 0.9999996235108785, iteration: 245549
loss: 0.9981192350387573,grad_norm: 0.9999993593218225, iteration: 245550
loss: 1.0151914358139038,grad_norm: 0.9326666706186677, iteration: 245551
loss: 0.968656599521637,grad_norm: 0.9999989895434857, iteration: 245552
loss: 0.989943265914917,grad_norm: 0.8421615796100764, iteration: 245553
loss: 0.99750816822052,grad_norm: 0.7990804779206353, iteration: 245554
loss: 1.0025098323822021,grad_norm: 0.9388145763448367, iteration: 245555
loss: 0.9866931438446045,grad_norm: 0.7950274989509402, iteration: 245556
loss: 0.9987015724182129,grad_norm: 0.8787983970685307, iteration: 245557
loss: 1.002638578414917,grad_norm: 0.9999990127820347, iteration: 245558
loss: 1.003822922706604,grad_norm: 0.8377216810658199, iteration: 245559
loss: 1.0009464025497437,grad_norm: 0.9999991292073639, iteration: 245560
loss: 0.981074869632721,grad_norm: 0.8818044578616298, iteration: 245561
loss: 1.018135905265808,grad_norm: 0.7052952834602864, iteration: 245562
loss: 1.0041953325271606,grad_norm: 0.827948342060822, iteration: 245563
loss: 1.0306339263916016,grad_norm: 0.9999997133586866, iteration: 245564
loss: 1.0265024900436401,grad_norm: 0.8755343579286794, iteration: 245565
loss: 0.9789608120918274,grad_norm: 0.9999990690555682, iteration: 245566
loss: 1.0132019519805908,grad_norm: 0.8988075240287352, iteration: 245567
loss: 1.0315436124801636,grad_norm: 0.8166190906969452, iteration: 245568
loss: 1.0994646549224854,grad_norm: 0.9688655794751585, iteration: 245569
loss: 0.9984691739082336,grad_norm: 0.8979098153845262, iteration: 245570
loss: 1.0037657022476196,grad_norm: 0.8563149132368834, iteration: 245571
loss: 0.9964984655380249,grad_norm: 0.999999071519104, iteration: 245572
loss: 0.9831278920173645,grad_norm: 0.7881558549838019, iteration: 245573
loss: 0.9976049065589905,grad_norm: 0.9999990267161635, iteration: 245574
loss: 1.20708429813385,grad_norm: 0.9999998298695081, iteration: 245575
loss: 1.0116817951202393,grad_norm: 0.899004031792178, iteration: 245576
loss: 0.9908813238143921,grad_norm: 0.999998970405348, iteration: 245577
loss: 0.9968352913856506,grad_norm: 0.8047803828649825, iteration: 245578
loss: 0.9654028415679932,grad_norm: 0.8703797472848561, iteration: 245579
loss: 1.0135806798934937,grad_norm: 0.982354186050305, iteration: 245580
loss: 1.0031999349594116,grad_norm: 0.9999991209453405, iteration: 245581
loss: 0.9947729110717773,grad_norm: 0.9675699327962806, iteration: 245582
loss: 0.9837207198143005,grad_norm: 0.9999991285277435, iteration: 245583
loss: 1.0180221796035767,grad_norm: 0.9999998704972983, iteration: 245584
loss: 0.9948416948318481,grad_norm: 0.8685420762710127, iteration: 245585
loss: 1.0529463291168213,grad_norm: 0.9999990335039763, iteration: 245586
loss: 0.9696013331413269,grad_norm: 0.9999991787972691, iteration: 245587
loss: 1.0018579959869385,grad_norm: 0.9999990639629537, iteration: 245588
loss: 0.9611768126487732,grad_norm: 0.802191214614887, iteration: 245589
loss: 1.0142580270767212,grad_norm: 0.8660175098891905, iteration: 245590
loss: 1.0160053968429565,grad_norm: 0.8498013626736326, iteration: 245591
loss: 0.9884417057037354,grad_norm: 0.9896690448023985, iteration: 245592
loss: 1.018308401107788,grad_norm: 0.8180189117528553, iteration: 245593
loss: 0.9830838441848755,grad_norm: 0.9615658987148353, iteration: 245594
loss: 1.086451768875122,grad_norm: 0.999999180672742, iteration: 245595
loss: 1.004570722579956,grad_norm: 0.6344643375567238, iteration: 245596
loss: 1.015459418296814,grad_norm: 0.9999991716455978, iteration: 245597
loss: 1.058632493019104,grad_norm: 0.9999996350978667, iteration: 245598
loss: 0.9938370585441589,grad_norm: 0.979649640695604, iteration: 245599
loss: 0.9871380925178528,grad_norm: 0.9999990308263219, iteration: 245600
loss: 0.9960070848464966,grad_norm: 0.9999990555525686, iteration: 245601
loss: 0.9737311005592346,grad_norm: 0.9228414607896911, iteration: 245602
loss: 1.0017727613449097,grad_norm: 0.8703467755378834, iteration: 245603
loss: 0.973696231842041,grad_norm: 0.8618631735774759, iteration: 245604
loss: 1.0250749588012695,grad_norm: 0.912310869251339, iteration: 245605
loss: 1.002614140510559,grad_norm: 0.8792901296867791, iteration: 245606
loss: 0.9829286932945251,grad_norm: 0.8611272850954922, iteration: 245607
loss: 1.0135513544082642,grad_norm: 0.795014558598979, iteration: 245608
loss: 0.9618786573410034,grad_norm: 0.938777880158223, iteration: 245609
loss: 0.985639750957489,grad_norm: 0.9309112610844696, iteration: 245610
loss: 1.0537574291229248,grad_norm: 0.8942476690232942, iteration: 245611
loss: 1.0379042625427246,grad_norm: 0.8896197445272318, iteration: 245612
loss: 0.9382026791572571,grad_norm: 0.9743003681104054, iteration: 245613
loss: 0.9421988725662231,grad_norm: 0.9840234696542671, iteration: 245614
loss: 1.0267937183380127,grad_norm: 0.9999990624989247, iteration: 245615
loss: 1.0115931034088135,grad_norm: 0.7511457911571704, iteration: 245616
loss: 1.010813593864441,grad_norm: 0.8336467895764511, iteration: 245617
loss: 0.9479111433029175,grad_norm: 0.888081837336842, iteration: 245618
loss: 1.0438001155853271,grad_norm: 0.7762652227031195, iteration: 245619
loss: 1.024389624595642,grad_norm: 0.723173506880517, iteration: 245620
loss: 1.0030378103256226,grad_norm: 0.90259787911806, iteration: 245621
loss: 1.0744439363479614,grad_norm: 0.9999991706325986, iteration: 245622
loss: 0.9778485894203186,grad_norm: 0.9396209229656642, iteration: 245623
loss: 1.005007266998291,grad_norm: 0.8180754388103219, iteration: 245624
loss: 1.10161292552948,grad_norm: 0.9999995638987584, iteration: 245625
loss: 0.9638590216636658,grad_norm: 0.9166853359542733, iteration: 245626
loss: 0.9826102256774902,grad_norm: 0.9715351281036054, iteration: 245627
loss: 0.9799197316169739,grad_norm: 0.8251561113729714, iteration: 245628
loss: 1.0202561616897583,grad_norm: 0.8683500732894976, iteration: 245629
loss: 0.9909489750862122,grad_norm: 0.9999990204688207, iteration: 245630
loss: 0.9892306327819824,grad_norm: 0.8807272531847968, iteration: 245631
loss: 0.9800472259521484,grad_norm: 0.8194758519517977, iteration: 245632
loss: 1.0048459768295288,grad_norm: 0.9325525781744182, iteration: 245633
loss: 1.141510248184204,grad_norm: 1.0000000006849434, iteration: 245634
loss: 0.9683588743209839,grad_norm: 0.929135780582678, iteration: 245635
loss: 0.9885059595108032,grad_norm: 0.8362782883423401, iteration: 245636
loss: 0.9731582403182983,grad_norm: 0.7821962053853793, iteration: 245637
loss: 1.0086811780929565,grad_norm: 0.9343835788648228, iteration: 245638
loss: 0.9839492440223694,grad_norm: 0.9999991511085892, iteration: 245639
loss: 0.9971789717674255,grad_norm: 0.8785074152453028, iteration: 245640
loss: 0.9890393614768982,grad_norm: 0.7863874713663935, iteration: 245641
loss: 0.9599151611328125,grad_norm: 0.9999991166138102, iteration: 245642
loss: 1.0228060483932495,grad_norm: 0.9999991698902257, iteration: 245643
loss: 0.98470538854599,grad_norm: 0.9999992062957008, iteration: 245644
loss: 0.9462030529975891,grad_norm: 0.9824739331207143, iteration: 245645
loss: 0.9852550029754639,grad_norm: 0.9510734474087839, iteration: 245646
loss: 0.9871180057525635,grad_norm: 0.9505681012499999, iteration: 245647
loss: 1.027269721031189,grad_norm: 0.9999997609217802, iteration: 245648
loss: 0.9972490072250366,grad_norm: 0.9999996329475365, iteration: 245649
loss: 1.0283629894256592,grad_norm: 0.8216471437992229, iteration: 245650
loss: 0.9911056160926819,grad_norm: 0.9315504226536903, iteration: 245651
loss: 0.9911048412322998,grad_norm: 0.81488163196562, iteration: 245652
loss: 1.0568909645080566,grad_norm: 0.999999113763788, iteration: 245653
loss: 1.0045961141586304,grad_norm: 0.9914074722227701, iteration: 245654
loss: 1.0497325658798218,grad_norm: 0.8555059862559693, iteration: 245655
loss: 1.0433911085128784,grad_norm: 0.8813868634904577, iteration: 245656
loss: 0.9640553593635559,grad_norm: 0.9999991349962731, iteration: 245657
loss: 0.9936749339103699,grad_norm: 0.9999992876102576, iteration: 245658
loss: 0.9533342123031616,grad_norm: 0.9999992560724377, iteration: 245659
loss: 0.9769749045372009,grad_norm: 0.8216858927611222, iteration: 245660
loss: 1.0018428564071655,grad_norm: 0.963795254947263, iteration: 245661
loss: 1.0072916746139526,grad_norm: 0.8904405687166517, iteration: 245662
loss: 0.9981575012207031,grad_norm: 0.9999997602769244, iteration: 245663
loss: 0.997826874256134,grad_norm: 0.9429175779999157, iteration: 245664
loss: 0.978018581867218,grad_norm: 0.9999991854447811, iteration: 245665
loss: 0.9585496783256531,grad_norm: 0.9999992602208747, iteration: 245666
loss: 0.9940012693405151,grad_norm: 0.8856604799690266, iteration: 245667
loss: 1.00740647315979,grad_norm: 0.8701086513287007, iteration: 245668
loss: 1.0227056741714478,grad_norm: 0.8339726036581877, iteration: 245669
loss: 0.9740598797798157,grad_norm: 0.7511411434134138, iteration: 245670
loss: 1.0192188024520874,grad_norm: 0.9202362583592214, iteration: 245671
loss: 0.9961016774177551,grad_norm: 0.9033472932925237, iteration: 245672
loss: 1.023557424545288,grad_norm: 0.9941857083883338, iteration: 245673
loss: 0.9985979795455933,grad_norm: 0.9201288787393989, iteration: 245674
loss: 1.0195468664169312,grad_norm: 0.9298327013448797, iteration: 245675
loss: 1.0216753482818604,grad_norm: 0.8395911604249598, iteration: 245676
loss: 0.9973528981208801,grad_norm: 0.9901086788874911, iteration: 245677
loss: 0.9875113368034363,grad_norm: 0.9194207153689088, iteration: 245678
loss: 1.0072828531265259,grad_norm: 0.9706309820437082, iteration: 245679
loss: 0.9761801362037659,grad_norm: 0.7693243955339093, iteration: 245680
loss: 1.0169323682785034,grad_norm: 0.868403996673313, iteration: 245681
loss: 0.9659611582756042,grad_norm: 0.853864694618151, iteration: 245682
loss: 1.003658652305603,grad_norm: 0.999999412735209, iteration: 245683
loss: 0.9865573644638062,grad_norm: 0.8544998151090266, iteration: 245684
loss: 0.9961947202682495,grad_norm: 0.7810221193838273, iteration: 245685
loss: 1.0048307180404663,grad_norm: 0.8368337428625706, iteration: 245686
loss: 1.0095396041870117,grad_norm: 0.8759967593673729, iteration: 245687
loss: 0.9984501004219055,grad_norm: 0.9999991684872807, iteration: 245688
loss: 1.0054861307144165,grad_norm: 0.9217431464131736, iteration: 245689
loss: 1.0049257278442383,grad_norm: 0.9999990892825308, iteration: 245690
loss: 1.0127960443496704,grad_norm: 0.9999991036549453, iteration: 245691
loss: 0.9993538856506348,grad_norm: 0.9758185872082403, iteration: 245692
loss: 0.9983444809913635,grad_norm: 0.9375413241966049, iteration: 245693
loss: 1.0042988061904907,grad_norm: 0.9293534259240747, iteration: 245694
loss: 1.0071216821670532,grad_norm: 0.9999991619012818, iteration: 245695
loss: 0.9779234528541565,grad_norm: 0.8647876628980952, iteration: 245696
loss: 1.0234705209732056,grad_norm: 0.9075280894404976, iteration: 245697
loss: 1.0249661207199097,grad_norm: 0.828182000618894, iteration: 245698
loss: 1.0147860050201416,grad_norm: 0.9298293509434808, iteration: 245699
loss: 1.021881341934204,grad_norm: 0.9306107901717046, iteration: 245700
loss: 0.9688328504562378,grad_norm: 0.982186302401918, iteration: 245701
loss: 0.9902409911155701,grad_norm: 0.9140371465685428, iteration: 245702
loss: 0.9787609577178955,grad_norm: 0.9685622642741619, iteration: 245703
loss: 1.0251011848449707,grad_norm: 0.9999989574179683, iteration: 245704
loss: 0.9907196760177612,grad_norm: 0.8524936352226469, iteration: 245705
loss: 0.9972705245018005,grad_norm: 0.8868970566296304, iteration: 245706
loss: 1.0276728868484497,grad_norm: 0.9245943694939638, iteration: 245707
loss: 0.9978969693183899,grad_norm: 0.8163609993101204, iteration: 245708
loss: 0.9584891200065613,grad_norm: 0.9999991037709004, iteration: 245709
loss: 0.9876241683959961,grad_norm: 0.9188306776702388, iteration: 245710
loss: 1.0129152536392212,grad_norm: 0.770273003309023, iteration: 245711
loss: 0.9889425039291382,grad_norm: 0.914877783775856, iteration: 245712
loss: 0.9881344437599182,grad_norm: 0.8806041884135349, iteration: 245713
loss: 1.0111013650894165,grad_norm: 0.8988575689940757, iteration: 245714
loss: 1.0055148601531982,grad_norm: 0.847691254280533, iteration: 245715
loss: 0.9947738647460938,grad_norm: 0.8230730225939082, iteration: 245716
loss: 0.9750648140907288,grad_norm: 0.9194789163309489, iteration: 245717
loss: 1.0127873420715332,grad_norm: 0.7677500016752785, iteration: 245718
loss: 1.1241053342819214,grad_norm: 0.9999994717341987, iteration: 245719
loss: 1.0295751094818115,grad_norm: 0.9999990136632855, iteration: 245720
loss: 0.9905626177787781,grad_norm: 0.9505758433171912, iteration: 245721
loss: 1.0158966779708862,grad_norm: 0.9798170048336311, iteration: 245722
loss: 0.9920034408569336,grad_norm: 0.926916037938686, iteration: 245723
loss: 1.0229012966156006,grad_norm: 0.9101464501199147, iteration: 245724
loss: 0.9993440508842468,grad_norm: 0.999122524531739, iteration: 245725
loss: 1.0092891454696655,grad_norm: 0.9629210509739787, iteration: 245726
loss: 0.946516752243042,grad_norm: 0.8758981612757573, iteration: 245727
loss: 0.9920695424079895,grad_norm: 0.9128533641296784, iteration: 245728
loss: 0.986198365688324,grad_norm: 0.9468772627658968, iteration: 245729
loss: 1.0242286920547485,grad_norm: 0.8789045882048488, iteration: 245730
loss: 0.9845942258834839,grad_norm: 0.8356133589473387, iteration: 245731
loss: 0.9845425486564636,grad_norm: 0.9579162074726443, iteration: 245732
loss: 0.9954670667648315,grad_norm: 0.9037677943881074, iteration: 245733
loss: 1.000665545463562,grad_norm: 0.7561143752070942, iteration: 245734
loss: 1.025890588760376,grad_norm: 0.9162821400419963, iteration: 245735
loss: 0.9781284928321838,grad_norm: 0.9852147060093887, iteration: 245736
loss: 0.9846656322479248,grad_norm: 0.9999992571115177, iteration: 245737
loss: 1.0005472898483276,grad_norm: 0.7553043883816591, iteration: 245738
loss: 0.991094172000885,grad_norm: 0.9999996855508763, iteration: 245739
loss: 1.0621187686920166,grad_norm: 0.9999999468935197, iteration: 245740
loss: 1.0252201557159424,grad_norm: 0.833962075950768, iteration: 245741
loss: 1.0221000909805298,grad_norm: 0.9756893215562952, iteration: 245742
loss: 1.0106849670410156,grad_norm: 0.8747142805914282, iteration: 245743
loss: 0.9702901840209961,grad_norm: 0.9540295459274138, iteration: 245744
loss: 0.9938973188400269,grad_norm: 0.8017577681209431, iteration: 245745
loss: 0.9721812009811401,grad_norm: 0.891568191636658, iteration: 245746
loss: 1.052234411239624,grad_norm: 0.9999997328710608, iteration: 245747
loss: 1.0164185762405396,grad_norm: 0.7947286727097587, iteration: 245748
loss: 0.9851602911949158,grad_norm: 0.7823453556065479, iteration: 245749
loss: 1.0872957706451416,grad_norm: 0.8622098779308017, iteration: 245750
loss: 1.0289016962051392,grad_norm: 0.999999038453813, iteration: 245751
loss: 0.9737950563430786,grad_norm: 0.916656632426148, iteration: 245752
loss: 1.0143773555755615,grad_norm: 0.9337208674563741, iteration: 245753
loss: 1.0090469121932983,grad_norm: 0.9486318122428724, iteration: 245754
loss: 0.9998828172683716,grad_norm: 0.9391947548443886, iteration: 245755
loss: 1.0192582607269287,grad_norm: 0.9284596649445989, iteration: 245756
loss: 1.0263959169387817,grad_norm: 0.9999991110379929, iteration: 245757
loss: 1.014093279838562,grad_norm: 0.801858272180088, iteration: 245758
loss: 0.9752992987632751,grad_norm: 0.9999991924443242, iteration: 245759
loss: 0.9575470685958862,grad_norm: 0.9856850963509923, iteration: 245760
loss: 1.0003862380981445,grad_norm: 0.9811620610993856, iteration: 245761
loss: 1.0053930282592773,grad_norm: 0.7772868199767036, iteration: 245762
loss: 1.0005601644515991,grad_norm: 0.9428303334416218, iteration: 245763
loss: 1.0037943124771118,grad_norm: 0.8467658843339418, iteration: 245764
loss: 1.021767020225525,grad_norm: 0.975446999388702, iteration: 245765
loss: 1.0262469053268433,grad_norm: 0.9458372957434553, iteration: 245766
loss: 1.0129969120025635,grad_norm: 0.9243460237798756, iteration: 245767
loss: 0.9878560304641724,grad_norm: 0.9825490130925094, iteration: 245768
loss: 1.0802407264709473,grad_norm: 0.9999992025938752, iteration: 245769
loss: 1.0341193675994873,grad_norm: 0.8751744980274124, iteration: 245770
loss: 0.9572404026985168,grad_norm: 0.9416530947022126, iteration: 245771
loss: 0.9964088797569275,grad_norm: 0.8838833476827341, iteration: 245772
loss: 1.0381536483764648,grad_norm: 0.7976351515805836, iteration: 245773
loss: 1.0096869468688965,grad_norm: 0.8555463435816066, iteration: 245774
loss: 0.9992216229438782,grad_norm: 0.9265331385996909, iteration: 245775
loss: 0.9992027878761292,grad_norm: 0.9999995388077729, iteration: 245776
loss: 0.9958142042160034,grad_norm: 0.999999098407993, iteration: 245777
loss: 0.9856910705566406,grad_norm: 0.872710487155371, iteration: 245778
loss: 0.9813826084136963,grad_norm: 0.7610206690348811, iteration: 245779
loss: 1.0214375257492065,grad_norm: 0.9999991588109342, iteration: 245780
loss: 1.0126253366470337,grad_norm: 0.9169902361869583, iteration: 245781
loss: 0.976982593536377,grad_norm: 0.9261846991247835, iteration: 245782
loss: 1.0108311176300049,grad_norm: 0.9999990637704832, iteration: 245783
loss: 1.0074235200881958,grad_norm: 0.9315447848803206, iteration: 245784
loss: 1.0145485401153564,grad_norm: 0.9999991620580125, iteration: 245785
loss: 1.009193778038025,grad_norm: 0.9647931846487855, iteration: 245786
loss: 0.9654570817947388,grad_norm: 0.8613398973978025, iteration: 245787
loss: 0.9877597093582153,grad_norm: 0.8994576500852058, iteration: 245788
loss: 0.9705902338027954,grad_norm: 0.8767115648851168, iteration: 245789
loss: 0.9877177476882935,grad_norm: 0.9085670220659312, iteration: 245790
loss: 1.0038809776306152,grad_norm: 0.9335315050312153, iteration: 245791
loss: 1.0869674682617188,grad_norm: 0.9999994829782595, iteration: 245792
loss: 0.9866731762886047,grad_norm: 0.9765158875172119, iteration: 245793
loss: 1.019200325012207,grad_norm: 0.7398751366408802, iteration: 245794
loss: 1.0292810201644897,grad_norm: 0.9999995033577367, iteration: 245795
loss: 1.0061628818511963,grad_norm: 0.8818522100038081, iteration: 245796
loss: 1.0171576738357544,grad_norm: 0.8240157640741395, iteration: 245797
loss: 1.0193490982055664,grad_norm: 0.9920278447046951, iteration: 245798
loss: 0.9925721883773804,grad_norm: 0.8862290979149948, iteration: 245799
loss: 0.9870095252990723,grad_norm: 0.6975844207042265, iteration: 245800
loss: 1.0198163986206055,grad_norm: 0.9191903115424118, iteration: 245801
loss: 1.0040984153747559,grad_norm: 0.9999990978318041, iteration: 245802
loss: 0.9820330142974854,grad_norm: 0.9999992097067932, iteration: 245803
loss: 1.004501223564148,grad_norm: 0.9999999860922578, iteration: 245804
loss: 0.9946994185447693,grad_norm: 0.8681610011477585, iteration: 245805
loss: 1.0129921436309814,grad_norm: 0.9999994159228625, iteration: 245806
loss: 0.9862147569656372,grad_norm: 0.9504122957191182, iteration: 245807
loss: 1.0019938945770264,grad_norm: 0.839528716676895, iteration: 245808
loss: 1.02830171585083,grad_norm: 0.8560859049424566, iteration: 245809
loss: 0.9899169206619263,grad_norm: 0.9835302331359039, iteration: 245810
loss: 1.0090973377227783,grad_norm: 0.8479571686854778, iteration: 245811
loss: 1.0105414390563965,grad_norm: 0.796020274557239, iteration: 245812
loss: 0.9761569499969482,grad_norm: 0.8572287602670277, iteration: 245813
loss: 1.0441863536834717,grad_norm: 0.813796484871621, iteration: 245814
loss: 0.9864287972450256,grad_norm: 0.9999992211160451, iteration: 245815
loss: 0.9745937585830688,grad_norm: 0.9102007252976803, iteration: 245816
loss: 0.9827688336372375,grad_norm: 0.8759018559491852, iteration: 245817
loss: 0.9824913144111633,grad_norm: 0.9999989639611102, iteration: 245818
loss: 0.9991600513458252,grad_norm: 0.855815184158042, iteration: 245819
loss: 0.9988197088241577,grad_norm: 0.802564093437805, iteration: 245820
loss: 0.9553272724151611,grad_norm: 0.9053636464144793, iteration: 245821
loss: 0.9888679385185242,grad_norm: 0.9952736124481857, iteration: 245822
loss: 0.9958397746086121,grad_norm: 0.8728348922268161, iteration: 245823
loss: 1.0266191959381104,grad_norm: 0.9513988728067213, iteration: 245824
loss: 0.975113570690155,grad_norm: 0.8428578592182883, iteration: 245825
loss: 1.0311980247497559,grad_norm: 0.7357337198923745, iteration: 245826
loss: 0.9964596629142761,grad_norm: 0.7145361379811206, iteration: 245827
loss: 0.9750177264213562,grad_norm: 0.8411870843286785, iteration: 245828
loss: 0.9940592050552368,grad_norm: 0.9999990942349629, iteration: 245829
loss: 0.9702264070510864,grad_norm: 0.8731717469451656, iteration: 245830
loss: 1.0201613903045654,grad_norm: 0.925200703085782, iteration: 245831
loss: 1.0983914136886597,grad_norm: 0.9999992607800181, iteration: 245832
loss: 0.9614201784133911,grad_norm: 0.9519609468260102, iteration: 245833
loss: 1.0384821891784668,grad_norm: 0.9999998554324484, iteration: 245834
loss: 1.0333459377288818,grad_norm: 0.9681454751609765, iteration: 245835
loss: 0.9934954047203064,grad_norm: 0.8577790311058888, iteration: 245836
loss: 1.0000606775283813,grad_norm: 0.8657431015420848, iteration: 245837
loss: 1.014822006225586,grad_norm: 0.8745878388342093, iteration: 245838
loss: 1.0089662075042725,grad_norm: 0.8882919831888031, iteration: 245839
loss: 1.0118590593338013,grad_norm: 0.9583007020909606, iteration: 245840
loss: 0.9839625954627991,grad_norm: 0.9202376099251737, iteration: 245841
loss: 0.9871749877929688,grad_norm: 0.9999990747147853, iteration: 245842
loss: 1.033244252204895,grad_norm: 0.9874366018700427, iteration: 245843
loss: 0.9732054471969604,grad_norm: 0.9999990207469405, iteration: 245844
loss: 0.981775164604187,grad_norm: 0.8900484546577211, iteration: 245845
loss: 1.0504472255706787,grad_norm: 0.9999990262289973, iteration: 245846
loss: 0.9981213808059692,grad_norm: 0.8674940066348221, iteration: 245847
loss: 1.0143176317214966,grad_norm: 0.999999170109891, iteration: 245848
loss: 0.9857063293457031,grad_norm: 0.8534999851689664, iteration: 245849
loss: 1.009719729423523,grad_norm: 0.9038402786372505, iteration: 245850
loss: 0.9904102683067322,grad_norm: 0.9999991159746646, iteration: 245851
loss: 0.968165934085846,grad_norm: 0.7876504872186206, iteration: 245852
loss: 0.9698940515518188,grad_norm: 0.8116074841358704, iteration: 245853
loss: 1.0185065269470215,grad_norm: 0.999999010059285, iteration: 245854
loss: 1.0195704698562622,grad_norm: 0.8422984871442429, iteration: 245855
loss: 1.0092185735702515,grad_norm: 0.814668367177768, iteration: 245856
loss: 1.0094934701919556,grad_norm: 0.9999991101029547, iteration: 245857
loss: 0.9601835608482361,grad_norm: 0.8842694339676472, iteration: 245858
loss: 1.0009578466415405,grad_norm: 0.9587734790922177, iteration: 245859
loss: 0.9823718070983887,grad_norm: 0.9999991103392186, iteration: 245860
loss: 0.9867402911186218,grad_norm: 0.8349746060933255, iteration: 245861
loss: 1.0128365755081177,grad_norm: 0.9848230531426264, iteration: 245862
loss: 0.9945506453514099,grad_norm: 0.9538880985741853, iteration: 245863
loss: 0.9934660196304321,grad_norm: 0.8470672522407262, iteration: 245864
loss: 0.9982460737228394,grad_norm: 0.7632790962219785, iteration: 245865
loss: 0.9762548208236694,grad_norm: 0.8502737134392391, iteration: 245866
loss: 1.01858389377594,grad_norm: 0.859213143510695, iteration: 245867
loss: 1.0898655652999878,grad_norm: 0.9999990588973852, iteration: 245868
loss: 1.021019458770752,grad_norm: 0.9052425075681036, iteration: 245869
loss: 1.0103793144226074,grad_norm: 0.7999797031515682, iteration: 245870
loss: 0.9806556105613708,grad_norm: 0.852635218312187, iteration: 245871
loss: 1.0005401372909546,grad_norm: 0.933038470157453, iteration: 245872
loss: 1.0037412643432617,grad_norm: 0.9999996316544996, iteration: 245873
loss: 0.9625954627990723,grad_norm: 0.9742191366419549, iteration: 245874
loss: 1.0044761896133423,grad_norm: 0.955164885836148, iteration: 245875
loss: 1.0169100761413574,grad_norm: 0.9999991382212339, iteration: 245876
loss: 1.0391229391098022,grad_norm: 0.9580380601358415, iteration: 245877
loss: 1.0347981452941895,grad_norm: 0.8500847035283458, iteration: 245878
loss: 1.0531214475631714,grad_norm: 0.854295845694691, iteration: 245879
loss: 0.9789466857910156,grad_norm: 0.831772218448821, iteration: 245880
loss: 0.9837232232093811,grad_norm: 0.8659367684361986, iteration: 245881
loss: 0.9636190533638,grad_norm: 0.7881571570688073, iteration: 245882
loss: 1.0040736198425293,grad_norm: 0.8775930492392369, iteration: 245883
loss: 0.9609308838844299,grad_norm: 0.8516814204019184, iteration: 245884
loss: 1.0266131162643433,grad_norm: 0.8943282570572524, iteration: 245885
loss: 1.0115351676940918,grad_norm: 0.9329698218555353, iteration: 245886
loss: 0.971404492855072,grad_norm: 0.999999112740666, iteration: 245887
loss: 1.0028409957885742,grad_norm: 0.7578057832424188, iteration: 245888
loss: 1.0052870512008667,grad_norm: 0.9507253273372598, iteration: 245889
loss: 0.9835640788078308,grad_norm: 0.887675399536798, iteration: 245890
loss: 1.0081592798233032,grad_norm: 0.9248801756608527, iteration: 245891
loss: 0.9958862066268921,grad_norm: 0.9468368016661358, iteration: 245892
loss: 0.9721155762672424,grad_norm: 0.9337017828665567, iteration: 245893
loss: 1.0117096900939941,grad_norm: 0.9999998156423686, iteration: 245894
loss: 0.9711101651191711,grad_norm: 0.9724471228695909, iteration: 245895
loss: 0.9954448342323303,grad_norm: 0.9999991503346233, iteration: 245896
loss: 0.9892581105232239,grad_norm: 0.806293384445843, iteration: 245897
loss: 0.9859423637390137,grad_norm: 0.9451622608691703, iteration: 245898
loss: 1.0759897232055664,grad_norm: 0.9992982113942861, iteration: 245899
loss: 0.9844486117362976,grad_norm: 0.7628117509495008, iteration: 245900
loss: 0.9655233025550842,grad_norm: 0.8190729540776163, iteration: 245901
loss: 0.991577684879303,grad_norm: 0.9107324347847119, iteration: 245902
loss: 1.0051357746124268,grad_norm: 0.9868806759875951, iteration: 245903
loss: 1.0678908824920654,grad_norm: 0.8142545448810465, iteration: 245904
loss: 1.0432231426239014,grad_norm: 0.8723828768463533, iteration: 245905
loss: 1.0395203828811646,grad_norm: 0.8546479423538391, iteration: 245906
loss: 1.0049625635147095,grad_norm: 0.9984395238794008, iteration: 245907
loss: 0.9594669938087463,grad_norm: 0.9310939511293826, iteration: 245908
loss: 0.9944296479225159,grad_norm: 0.9999990176392421, iteration: 245909
loss: 1.0140435695648193,grad_norm: 0.9265760337769805, iteration: 245910
loss: 0.9924415946006775,grad_norm: 0.7386632769778607, iteration: 245911
loss: 0.9882797598838806,grad_norm: 0.8063756175541235, iteration: 245912
loss: 1.041938304901123,grad_norm: 0.9999990481201718, iteration: 245913
loss: 1.1180826425552368,grad_norm: 0.9999993771230578, iteration: 245914
loss: 1.0142993927001953,grad_norm: 0.9271942978339628, iteration: 245915
loss: 1.0079047679901123,grad_norm: 0.8242212924406024, iteration: 245916
loss: 0.975266695022583,grad_norm: 0.9999988835524727, iteration: 245917
loss: 1.0037895441055298,grad_norm: 0.9999992540799896, iteration: 245918
loss: 1.0301121473312378,grad_norm: 0.9999992029962813, iteration: 245919
loss: 1.0384830236434937,grad_norm: 0.9684404830414831, iteration: 245920
loss: 0.9907002449035645,grad_norm: 0.9999991386183742, iteration: 245921
loss: 1.0199209451675415,grad_norm: 0.8186103186444328, iteration: 245922
loss: 0.9874089360237122,grad_norm: 0.8361546896601298, iteration: 245923
loss: 1.0156428813934326,grad_norm: 0.9020371349734011, iteration: 245924
loss: 0.9942311644554138,grad_norm: 0.9161710007798469, iteration: 245925
loss: 1.0952064990997314,grad_norm: 0.9999999293116913, iteration: 245926
loss: 1.0092904567718506,grad_norm: 0.743987536341441, iteration: 245927
loss: 0.9850144982337952,grad_norm: 0.9999991087338339, iteration: 245928
loss: 0.986172616481781,grad_norm: 0.9237521598927949, iteration: 245929
loss: 1.0145877599716187,grad_norm: 0.7879561924061519, iteration: 245930
loss: 0.9941650629043579,grad_norm: 0.9999990917102675, iteration: 245931
loss: 0.9952633380889893,grad_norm: 0.8398512499936196, iteration: 245932
loss: 1.0135812759399414,grad_norm: 0.931202312031262, iteration: 245933
loss: 0.9586181640625,grad_norm: 0.8138531088804021, iteration: 245934
loss: 1.0291208028793335,grad_norm: 0.9999993453724492, iteration: 245935
loss: 0.9824061393737793,grad_norm: 0.9767853667031079, iteration: 245936
loss: 0.9965330362319946,grad_norm: 0.8120975858525256, iteration: 245937
loss: 1.0018082857131958,grad_norm: 0.8872277961347258, iteration: 245938
loss: 0.9850288033485413,grad_norm: 0.8044983341923226, iteration: 245939
loss: 1.0434942245483398,grad_norm: 0.8259008812091083, iteration: 245940
loss: 0.9754914045333862,grad_norm: 0.8834640913755464, iteration: 245941
loss: 1.116574764251709,grad_norm: 0.9999992159809672, iteration: 245942
loss: 0.9888686537742615,grad_norm: 0.8781148507216857, iteration: 245943
loss: 1.0051681995391846,grad_norm: 0.8802723697815518, iteration: 245944
loss: 1.0016005039215088,grad_norm: 0.9060112754261755, iteration: 245945
loss: 0.9923516511917114,grad_norm: 0.8704423276871535, iteration: 245946
loss: 1.0154978036880493,grad_norm: 0.7981772211100768, iteration: 245947
loss: 0.9954220056533813,grad_norm: 0.9844695515593992, iteration: 245948
loss: 0.9802955985069275,grad_norm: 0.9123762073311901, iteration: 245949
loss: 0.9788752198219299,grad_norm: 0.9335358589284849, iteration: 245950
loss: 1.0418930053710938,grad_norm: 0.9590950283741997, iteration: 245951
loss: 0.9850085973739624,grad_norm: 0.9999991731067147, iteration: 245952
loss: 1.0188169479370117,grad_norm: 0.8661915409851767, iteration: 245953
loss: 0.9922720789909363,grad_norm: 0.8627202114047771, iteration: 245954
loss: 1.0113513469696045,grad_norm: 0.8758547436085515, iteration: 245955
loss: 0.9969199895858765,grad_norm: 0.9999990424447684, iteration: 245956
loss: 0.9997361302375793,grad_norm: 0.8639856244308, iteration: 245957
loss: 1.0192322731018066,grad_norm: 0.898979289218055, iteration: 245958
loss: 1.0409245491027832,grad_norm: 0.9999992572372451, iteration: 245959
loss: 1.0378987789154053,grad_norm: 0.999999000606243, iteration: 245960
loss: 0.9922473430633545,grad_norm: 0.9638092386659634, iteration: 245961
loss: 0.9778720140457153,grad_norm: 0.8216074028709723, iteration: 245962
loss: 0.989523708820343,grad_norm: 0.9113013828444515, iteration: 245963
loss: 0.9795004725456238,grad_norm: 0.9478879171089392, iteration: 245964
loss: 0.975831925868988,grad_norm: 0.9999992562521052, iteration: 245965
loss: 1.1857737302780151,grad_norm: 0.9999998954621518, iteration: 245966
loss: 1.002576231956482,grad_norm: 0.8551242353317251, iteration: 245967
loss: 0.9768163561820984,grad_norm: 0.9122133898618958, iteration: 245968
loss: 0.9881426095962524,grad_norm: 0.9149714065429845, iteration: 245969
loss: 1.0108553171157837,grad_norm: 0.7961740975731006, iteration: 245970
loss: 1.0322529077529907,grad_norm: 0.9999993000049024, iteration: 245971
loss: 1.0141279697418213,grad_norm: 0.9999995373233627, iteration: 245972
loss: 1.0051581859588623,grad_norm: 0.8895285090059408, iteration: 245973
loss: 1.0030642747879028,grad_norm: 0.9183115172178564, iteration: 245974
loss: 1.0142335891723633,grad_norm: 0.9651401611606708, iteration: 245975
loss: 1.006060242652893,grad_norm: 0.7865495382821429, iteration: 245976
loss: 0.9926173090934753,grad_norm: 0.9999996137074212, iteration: 245977
loss: 1.0016084909439087,grad_norm: 0.9222942484996326, iteration: 245978
loss: 0.9679130911827087,grad_norm: 0.9609113420445382, iteration: 245979
loss: 1.0136607885360718,grad_norm: 0.999999103317214, iteration: 245980
loss: 1.0198763608932495,grad_norm: 0.9007684824557103, iteration: 245981
loss: 1.005569338798523,grad_norm: 0.9999997100619854, iteration: 245982
loss: 0.939331591129303,grad_norm: 0.9500706339496205, iteration: 245983
loss: 1.0166407823562622,grad_norm: 0.9999990317716488, iteration: 245984
loss: 1.0151286125183105,grad_norm: 0.953378108870973, iteration: 245985
loss: 1.0071591138839722,grad_norm: 0.9406285878901678, iteration: 245986
loss: 0.9881585836410522,grad_norm: 0.999999164222994, iteration: 245987
loss: 1.0138053894042969,grad_norm: 0.9999991146191115, iteration: 245988
loss: 1.0051345825195312,grad_norm: 0.9999990638719981, iteration: 245989
loss: 0.9859886765480042,grad_norm: 0.7924812280753913, iteration: 245990
loss: 0.9849710464477539,grad_norm: 0.917734072729575, iteration: 245991
loss: 1.017979621887207,grad_norm: 0.9999992305396568, iteration: 245992
loss: 1.0693727731704712,grad_norm: 0.9999991794660081, iteration: 245993
loss: 0.9546558856964111,grad_norm: 0.8269829110528266, iteration: 245994
loss: 0.9960868954658508,grad_norm: 0.8633118783073331, iteration: 245995
loss: 1.0114489793777466,grad_norm: 0.999999554289978, iteration: 245996
loss: 0.9884788393974304,grad_norm: 0.9751201365201808, iteration: 245997
loss: 1.0154496431350708,grad_norm: 0.9478567325844014, iteration: 245998
loss: 1.0112974643707275,grad_norm: 0.9133029946055694, iteration: 245999
loss: 1.0377269983291626,grad_norm: 0.8746385825942103, iteration: 246000
loss: 1.0022079944610596,grad_norm: 0.9583928590166113, iteration: 246001
loss: 0.9819819331169128,grad_norm: 0.8215328450857305, iteration: 246002
loss: 1.017716407775879,grad_norm: 0.9274473180996814, iteration: 246003
loss: 1.0221693515777588,grad_norm: 0.8883816907263343, iteration: 246004
loss: 1.027579665184021,grad_norm: 0.9002035565255805, iteration: 246005
loss: 1.0109230279922485,grad_norm: 0.880160142363728, iteration: 246006
loss: 0.9674938917160034,grad_norm: 0.9583719749112436, iteration: 246007
loss: 0.9891620874404907,grad_norm: 0.9999991422114786, iteration: 246008
loss: 1.0198533535003662,grad_norm: 0.8748796340387276, iteration: 246009
loss: 0.9944379925727844,grad_norm: 0.8754795953529525, iteration: 246010
loss: 1.0073142051696777,grad_norm: 0.9999996944299554, iteration: 246011
loss: 1.0110527276992798,grad_norm: 0.9473417997640841, iteration: 246012
loss: 0.9918350577354431,grad_norm: 0.8926439147159462, iteration: 246013
loss: 1.0011528730392456,grad_norm: 0.8110453185112431, iteration: 246014
loss: 1.062383770942688,grad_norm: 0.9647374561953849, iteration: 246015
loss: 0.9815836548805237,grad_norm: 0.9999991490510236, iteration: 246016
loss: 0.9588605761528015,grad_norm: 0.9461204384405125, iteration: 246017
loss: 0.9815301299095154,grad_norm: 0.7434879830426447, iteration: 246018
loss: 1.027627944946289,grad_norm: 0.8167801615856296, iteration: 246019
loss: 1.0369865894317627,grad_norm: 0.9999996286291833, iteration: 246020
loss: 0.996445894241333,grad_norm: 0.8952858472848854, iteration: 246021
loss: 1.0014243125915527,grad_norm: 0.9999996858649, iteration: 246022
loss: 0.9900684952735901,grad_norm: 0.6838132963593161, iteration: 246023
loss: 0.9918369054794312,grad_norm: 0.9999996735026151, iteration: 246024
loss: 1.0230581760406494,grad_norm: 0.8169024158603972, iteration: 246025
loss: 0.9926133751869202,grad_norm: 0.9168398418902038, iteration: 246026
loss: 1.031327724456787,grad_norm: 0.9568120637237155, iteration: 246027
loss: 0.9772592782974243,grad_norm: 0.8460538170156217, iteration: 246028
loss: 1.0162839889526367,grad_norm: 0.9999990807797392, iteration: 246029
loss: 0.9766783118247986,grad_norm: 0.83143422591713, iteration: 246030
loss: 0.983782172203064,grad_norm: 0.8464011794760506, iteration: 246031
loss: 1.0549993515014648,grad_norm: 0.9999994648845648, iteration: 246032
loss: 0.9835057258605957,grad_norm: 0.8680215438238608, iteration: 246033
loss: 0.9867461919784546,grad_norm: 0.8611435611951312, iteration: 246034
loss: 1.0188815593719482,grad_norm: 0.9401040222731575, iteration: 246035
loss: 0.9929181337356567,grad_norm: 0.890995186985447, iteration: 246036
loss: 0.9831042289733887,grad_norm: 0.9390638534199485, iteration: 246037
loss: 0.991476833820343,grad_norm: 0.8219215001121905, iteration: 246038
loss: 0.9885108470916748,grad_norm: 0.9999990647476814, iteration: 246039
loss: 1.029870629310608,grad_norm: 0.8846454505948812, iteration: 246040
loss: 1.0352592468261719,grad_norm: 0.9569030461816671, iteration: 246041
loss: 1.0264441967010498,grad_norm: 0.9039534890082297, iteration: 246042
loss: 0.9806342124938965,grad_norm: 0.8627730536890703, iteration: 246043
loss: 0.9870484471321106,grad_norm: 0.8892855684041311, iteration: 246044
loss: 0.9889805316925049,grad_norm: 0.8825186599364352, iteration: 246045
loss: 1.0101507902145386,grad_norm: 0.9218004929133311, iteration: 246046
loss: 1.0048109292984009,grad_norm: 0.9999992066225161, iteration: 246047
loss: 1.0061261653900146,grad_norm: 0.996317355299757, iteration: 246048
loss: 0.9815175533294678,grad_norm: 0.877619484459399, iteration: 246049
loss: 0.9877484440803528,grad_norm: 0.9026999786453261, iteration: 246050
loss: 1.0090299844741821,grad_norm: 0.9999993806059131, iteration: 246051
loss: 1.0168744325637817,grad_norm: 0.9999991020020662, iteration: 246052
loss: 0.9722694754600525,grad_norm: 0.9956489574100231, iteration: 246053
loss: 0.972074031829834,grad_norm: 0.8529876473970006, iteration: 246054
loss: 1.0094549655914307,grad_norm: 0.9216972922498373, iteration: 246055
loss: 0.9922739863395691,grad_norm: 0.8972444258151935, iteration: 246056
loss: 0.9764594435691833,grad_norm: 0.924060084651785, iteration: 246057
loss: 1.0218013525009155,grad_norm: 0.7604577710407372, iteration: 246058
loss: 1.0030850172042847,grad_norm: 0.9558564688322433, iteration: 246059
loss: 0.9886769652366638,grad_norm: 0.8702126733310731, iteration: 246060
loss: 0.9948202967643738,grad_norm: 0.7892853318153208, iteration: 246061
loss: 1.0029064416885376,grad_norm: 0.9999992488464399, iteration: 246062
loss: 1.0232406854629517,grad_norm: 0.9999991722210076, iteration: 246063
loss: 1.04691743850708,grad_norm: 0.8972715468568191, iteration: 246064
loss: 0.9846030473709106,grad_norm: 0.9318881766884954, iteration: 246065
loss: 1.0643947124481201,grad_norm: 0.9999992740971939, iteration: 246066
loss: 0.9716874957084656,grad_norm: 0.9999989607622932, iteration: 246067
loss: 1.0165151357650757,grad_norm: 0.9956717350647658, iteration: 246068
loss: 1.0210813283920288,grad_norm: 0.8986039301183572, iteration: 246069
loss: 1.0183008909225464,grad_norm: 0.8383309908749446, iteration: 246070
loss: 1.0012445449829102,grad_norm: 0.9895064848204912, iteration: 246071
loss: 0.9861875176429749,grad_norm: 0.7147682764773976, iteration: 246072
loss: 0.9667869210243225,grad_norm: 0.999998952007129, iteration: 246073
loss: 1.0217312574386597,grad_norm: 0.878280618631274, iteration: 246074
loss: 0.9686266779899597,grad_norm: 0.9400156973231895, iteration: 246075
loss: 0.9919842481613159,grad_norm: 0.8282855241594933, iteration: 246076
loss: 1.0014324188232422,grad_norm: 0.8009023975472046, iteration: 246077
loss: 1.0043143033981323,grad_norm: 0.9005789810110514, iteration: 246078
loss: 1.0143282413482666,grad_norm: 0.9999992430203593, iteration: 246079
loss: 0.9797263741493225,grad_norm: 0.9999990272600492, iteration: 246080
loss: 1.0122023820877075,grad_norm: 0.9013527415076155, iteration: 246081
loss: 1.0055406093597412,grad_norm: 0.9080068965993099, iteration: 246082
loss: 1.1097804307937622,grad_norm: 0.9999996550753767, iteration: 246083
loss: 0.9918842911720276,grad_norm: 0.8795615657168853, iteration: 246084
loss: 0.9732015132904053,grad_norm: 0.8893275316497657, iteration: 246085
loss: 1.0040420293807983,grad_norm: 0.9999999450770829, iteration: 246086
loss: 0.9574657082557678,grad_norm: 0.7890166406222057, iteration: 246087
loss: 0.9959469437599182,grad_norm: 0.9125857518930319, iteration: 246088
loss: 0.9868975281715393,grad_norm: 0.9999991224088483, iteration: 246089
loss: 1.024802327156067,grad_norm: 0.9999991096457326, iteration: 246090
loss: 1.0101672410964966,grad_norm: 0.7470435258742114, iteration: 246091
loss: 1.0016522407531738,grad_norm: 0.8009683057252526, iteration: 246092
loss: 0.9982141852378845,grad_norm: 0.9999992320425063, iteration: 246093
loss: 1.0877406597137451,grad_norm: 0.99999901712975, iteration: 246094
loss: 1.0067307949066162,grad_norm: 0.9999990472466727, iteration: 246095
loss: 0.978765070438385,grad_norm: 0.7540767943881075, iteration: 246096
loss: 0.9952139258384705,grad_norm: 0.7997252999564547, iteration: 246097
loss: 0.9767684936523438,grad_norm: 0.9977950898296969, iteration: 246098
loss: 1.0171383619308472,grad_norm: 0.8623977376229384, iteration: 246099
loss: 0.9738765358924866,grad_norm: 0.7831716021165652, iteration: 246100
loss: 1.0015676021575928,grad_norm: 0.7415380357570707, iteration: 246101
loss: 0.9932597875595093,grad_norm: 0.9999989857426677, iteration: 246102
loss: 0.9997602105140686,grad_norm: 0.8040569557691952, iteration: 246103
loss: 1.0102554559707642,grad_norm: 0.938330005673231, iteration: 246104
loss: 1.0421607494354248,grad_norm: 0.9579466597352021, iteration: 246105
loss: 0.9881629347801208,grad_norm: 0.9430076401750753, iteration: 246106
loss: 0.9707720875740051,grad_norm: 0.9999991741932437, iteration: 246107
loss: 1.0315686464309692,grad_norm: 0.999999028277755, iteration: 246108
loss: 0.998163104057312,grad_norm: 0.9236924059517583, iteration: 246109
loss: 0.9734829068183899,grad_norm: 0.9999991531903337, iteration: 246110
loss: 1.0693416595458984,grad_norm: 0.9999990893228633, iteration: 246111
loss: 1.018708348274231,grad_norm: 0.9999999700162036, iteration: 246112
loss: 0.9530820846557617,grad_norm: 0.9999989997783383, iteration: 246113
loss: 0.9861352443695068,grad_norm: 0.8995396195660549, iteration: 246114
loss: 0.9885337352752686,grad_norm: 0.8664047748138266, iteration: 246115
loss: 0.9932243227958679,grad_norm: 0.9856392483456857, iteration: 246116
loss: 0.9981676340103149,grad_norm: 0.8895113453033217, iteration: 246117
loss: 0.981708824634552,grad_norm: 0.9301713922485326, iteration: 246118
loss: 1.0101584196090698,grad_norm: 0.9715329251478183, iteration: 246119
loss: 0.9711190462112427,grad_norm: 0.9882562071233641, iteration: 246120
loss: 1.0174965858459473,grad_norm: 0.9682614118854562, iteration: 246121
loss: 1.0312649011611938,grad_norm: 0.9999998082475772, iteration: 246122
loss: 1.0370503664016724,grad_norm: 0.9875103346774009, iteration: 246123
loss: 1.0461961030960083,grad_norm: 0.9834827037563997, iteration: 246124
loss: 0.9963634610176086,grad_norm: 0.8456270325669689, iteration: 246125
loss: 0.9945764541625977,grad_norm: 0.88399277444572, iteration: 246126
loss: 1.031567931175232,grad_norm: 0.9582160905660978, iteration: 246127
loss: 0.992283284664154,grad_norm: 0.9638905211393923, iteration: 246128
loss: 1.0149575471878052,grad_norm: 0.9999992465174885, iteration: 246129
loss: 0.9734712839126587,grad_norm: 0.7146958234513404, iteration: 246130
loss: 1.021994948387146,grad_norm: 0.9290826470327802, iteration: 246131
loss: 1.0245726108551025,grad_norm: 0.8836777063711297, iteration: 246132
loss: 1.0132235288619995,grad_norm: 0.9085962205899399, iteration: 246133
loss: 0.9881282448768616,grad_norm: 0.8947892743523674, iteration: 246134
loss: 0.9859022498130798,grad_norm: 0.7616508044230218, iteration: 246135
loss: 1.0011992454528809,grad_norm: 0.871950153007098, iteration: 246136
loss: 1.0026185512542725,grad_norm: 0.9111526286710473, iteration: 246137
loss: 0.966550886631012,grad_norm: 0.9947453302750899, iteration: 246138
loss: 1.0287652015686035,grad_norm: 0.9580320975219141, iteration: 246139
loss: 1.0328449010849,grad_norm: 0.9215907362212483, iteration: 246140
loss: 0.9895720481872559,grad_norm: 0.8287113228637727, iteration: 246141
loss: 1.0306166410446167,grad_norm: 0.9999991894293697, iteration: 246142
loss: 1.000718355178833,grad_norm: 0.9649390215495267, iteration: 246143
loss: 1.0168410539627075,grad_norm: 0.9715655471371413, iteration: 246144
loss: 1.011296272277832,grad_norm: 0.9999999688524689, iteration: 246145
loss: 1.0151233673095703,grad_norm: 0.999999145480709, iteration: 246146
loss: 0.9895256757736206,grad_norm: 0.9527799691347079, iteration: 246147
loss: 1.0211167335510254,grad_norm: 0.8915400388930511, iteration: 246148
loss: 0.9896413683891296,grad_norm: 0.9028935456465212, iteration: 246149
loss: 0.9947060942649841,grad_norm: 0.9999991073227338, iteration: 246150
loss: 0.9999940395355225,grad_norm: 0.9999991874822315, iteration: 246151
loss: 0.9738508462905884,grad_norm: 0.8561737506493838, iteration: 246152
loss: 0.9494486451148987,grad_norm: 0.8536661733373726, iteration: 246153
loss: 1.094749927520752,grad_norm: 0.9999998570018065, iteration: 246154
loss: 1.000349760055542,grad_norm: 0.999999274229858, iteration: 246155
loss: 1.0081123113632202,grad_norm: 0.9635662978005054, iteration: 246156
loss: 1.027052640914917,grad_norm: 0.7322674645659689, iteration: 246157
loss: 0.9802611470222473,grad_norm: 0.9168805106629233, iteration: 246158
loss: 1.0026392936706543,grad_norm: 0.8124987726482317, iteration: 246159
loss: 0.9759012460708618,grad_norm: 0.9663656854167678, iteration: 246160
loss: 0.9840821027755737,grad_norm: 0.9060679891201483, iteration: 246161
loss: 1.0142686367034912,grad_norm: 0.999999587840251, iteration: 246162
loss: 0.9864484071731567,grad_norm: 0.9114954655247631, iteration: 246163
loss: 1.0804648399353027,grad_norm: 0.9999992649676791, iteration: 246164
loss: 0.9918805956840515,grad_norm: 0.7989811696245589, iteration: 246165
loss: 0.963416337966919,grad_norm: 0.9399195473961675, iteration: 246166
loss: 1.0114312171936035,grad_norm: 0.8661171051925103, iteration: 246167
loss: 1.0382601022720337,grad_norm: 0.9925563222533437, iteration: 246168
loss: 0.9878256916999817,grad_norm: 0.7588309120754418, iteration: 246169
loss: 0.9700127243995667,grad_norm: 0.999999749097493, iteration: 246170
loss: 0.9687376618385315,grad_norm: 0.7969537386343176, iteration: 246171
loss: 0.9971492886543274,grad_norm: 0.8011123197868092, iteration: 246172
loss: 1.0009610652923584,grad_norm: 0.8809807176228251, iteration: 246173
loss: 0.9640753269195557,grad_norm: 0.8709047295690584, iteration: 246174
loss: 0.9983144402503967,grad_norm: 0.9591583525549372, iteration: 246175
loss: 1.0217453241348267,grad_norm: 0.9999990338506305, iteration: 246176
loss: 0.9760321974754333,grad_norm: 0.7814520095779067, iteration: 246177
loss: 1.0098198652267456,grad_norm: 0.8087807042611506, iteration: 246178
loss: 0.9922882318496704,grad_norm: 0.79619321565031, iteration: 246179
loss: 1.0122668743133545,grad_norm: 0.8487004222789565, iteration: 246180
loss: 1.0110114812850952,grad_norm: 0.9999997632926834, iteration: 246181
loss: 1.052803874015808,grad_norm: 0.917466669070566, iteration: 246182
loss: 1.0059558153152466,grad_norm: 0.8878762318619879, iteration: 246183
loss: 1.007236123085022,grad_norm: 0.9999994651227446, iteration: 246184
loss: 1.000248908996582,grad_norm: 0.8163988924976039, iteration: 246185
loss: 1.0021840333938599,grad_norm: 0.9793857371801457, iteration: 246186
loss: 0.9991796612739563,grad_norm: 0.9999998473082851, iteration: 246187
loss: 0.9982089996337891,grad_norm: 0.833792004264037, iteration: 246188
loss: 1.009192705154419,grad_norm: 0.79464824331634, iteration: 246189
loss: 0.9970097541809082,grad_norm: 0.9537219801515642, iteration: 246190
loss: 0.9759796857833862,grad_norm: 0.9306590811879064, iteration: 246191
loss: 1.0295343399047852,grad_norm: 0.7879888351194088, iteration: 246192
loss: 1.0182496309280396,grad_norm: 0.886671015454028, iteration: 246193
loss: 1.0073328018188477,grad_norm: 0.996612088213672, iteration: 246194
loss: 1.0180999040603638,grad_norm: 0.9999992725060771, iteration: 246195
loss: 0.9587209224700928,grad_norm: 0.8489863870887887, iteration: 246196
loss: 1.0020369291305542,grad_norm: 0.7681289271571533, iteration: 246197
loss: 0.9791289567947388,grad_norm: 0.9999989893154868, iteration: 246198
loss: 1.0033446550369263,grad_norm: 0.999999137834589, iteration: 246199
loss: 1.0048764944076538,grad_norm: 0.9043731802919023, iteration: 246200
loss: 0.9975277781486511,grad_norm: 0.9402607688822225, iteration: 246201
loss: 0.9556010961532593,grad_norm: 0.8638547231343119, iteration: 246202
loss: 0.9834011793136597,grad_norm: 0.9999990091911468, iteration: 246203
loss: 1.014384388923645,grad_norm: 0.9999991979886359, iteration: 246204
loss: 0.9899048209190369,grad_norm: 0.9873125761790862, iteration: 246205
loss: 0.9908513426780701,grad_norm: 0.8296573999598443, iteration: 246206
loss: 0.9894856810569763,grad_norm: 0.9108695953053089, iteration: 246207
loss: 0.9955673813819885,grad_norm: 0.8455468102906872, iteration: 246208
loss: 1.0131471157073975,grad_norm: 0.8235965936890043, iteration: 246209
loss: 1.0076625347137451,grad_norm: 0.8548698743538133, iteration: 246210
loss: 0.9695121645927429,grad_norm: 0.8424164679521016, iteration: 246211
loss: 1.013919472694397,grad_norm: 0.9860300020983809, iteration: 246212
loss: 0.9930609464645386,grad_norm: 0.8667175611642635, iteration: 246213
loss: 1.0488216876983643,grad_norm: 0.882769176794706, iteration: 246214
loss: 0.9996429085731506,grad_norm: 0.9999994433464928, iteration: 246215
loss: 1.0055986642837524,grad_norm: 0.9302336645975947, iteration: 246216
loss: 1.0141366720199585,grad_norm: 0.9999990973768667, iteration: 246217
loss: 1.0027740001678467,grad_norm: 0.9712967917916482, iteration: 246218
loss: 1.0088706016540527,grad_norm: 0.7891084437659047, iteration: 246219
loss: 0.9877896308898926,grad_norm: 0.8971950440711999, iteration: 246220
loss: 0.9871983528137207,grad_norm: 0.8772231186410527, iteration: 246221
loss: 1.0052121877670288,grad_norm: 0.8568673603440582, iteration: 246222
loss: 0.9926503896713257,grad_norm: 0.9822012149299743, iteration: 246223
loss: 1.0237395763397217,grad_norm: 0.9999991563127358, iteration: 246224
loss: 0.9782730340957642,grad_norm: 0.9999990206494442, iteration: 246225
loss: 1.028915286064148,grad_norm: 0.7282963187910881, iteration: 246226
loss: 0.9300620555877686,grad_norm: 0.8372694582562906, iteration: 246227
loss: 1.0302420854568481,grad_norm: 0.8904128718127898, iteration: 246228
loss: 0.9806296229362488,grad_norm: 0.8376456044849874, iteration: 246229
loss: 1.0472089052200317,grad_norm: 0.9999991431563293, iteration: 246230
loss: 0.9908396005630493,grad_norm: 0.8753487566222906, iteration: 246231
loss: 0.9905807375907898,grad_norm: 0.8474629950211445, iteration: 246232
loss: 1.028775930404663,grad_norm: 0.7944431002093693, iteration: 246233
loss: 1.0001704692840576,grad_norm: 0.9937411207554695, iteration: 246234
loss: 1.009361982345581,grad_norm: 0.9474290453847215, iteration: 246235
loss: 1.0111843347549438,grad_norm: 0.7460523777305661, iteration: 246236
loss: 1.0160571336746216,grad_norm: 0.9999992161390397, iteration: 246237
loss: 0.9966785311698914,grad_norm: 0.9575824352303208, iteration: 246238
loss: 1.013784646987915,grad_norm: 0.9881252376746904, iteration: 246239
loss: 1.0313535928726196,grad_norm: 0.7881809489586432, iteration: 246240
loss: 0.9959788918495178,grad_norm: 0.9060166963464419, iteration: 246241
loss: 1.0013591051101685,grad_norm: 0.9820233023255786, iteration: 246242
loss: 0.9812585711479187,grad_norm: 0.8571812662611846, iteration: 246243
loss: 1.0146305561065674,grad_norm: 0.7373738073287254, iteration: 246244
loss: 0.9735356569290161,grad_norm: 0.9999991267707528, iteration: 246245
loss: 0.9859579801559448,grad_norm: 0.9999992016562135, iteration: 246246
loss: 1.0029946565628052,grad_norm: 0.9999992563474799, iteration: 246247
loss: 1.0039156675338745,grad_norm: 0.7998167122563773, iteration: 246248
loss: 0.9725660681724548,grad_norm: 0.9322151633263651, iteration: 246249
loss: 0.9844256043434143,grad_norm: 0.8483310837203116, iteration: 246250
loss: 0.9475792050361633,grad_norm: 0.7917781688728466, iteration: 246251
loss: 1.0112842321395874,grad_norm: 0.9387515645127621, iteration: 246252
loss: 1.0055710077285767,grad_norm: 0.8583058630507128, iteration: 246253
loss: 1.0038360357284546,grad_norm: 0.8261879217245967, iteration: 246254
loss: 1.032523512840271,grad_norm: 0.961898461424426, iteration: 246255
loss: 1.017099142074585,grad_norm: 0.7856996751503164, iteration: 246256
loss: 1.02726149559021,grad_norm: 0.9999990706834982, iteration: 246257
loss: 1.070217490196228,grad_norm: 0.9652090165499808, iteration: 246258
loss: 1.0074466466903687,grad_norm: 0.905188947789278, iteration: 246259
loss: 0.9972571134567261,grad_norm: 0.8724073687640472, iteration: 246260
loss: 0.9952722191810608,grad_norm: 0.8210921145414323, iteration: 246261
loss: 0.994209885597229,grad_norm: 0.8564606744943085, iteration: 246262
loss: 1.026845932006836,grad_norm: 0.794196630672433, iteration: 246263
loss: 0.9876075387001038,grad_norm: 0.9999991624876606, iteration: 246264
loss: 0.9619286060333252,grad_norm: 0.9077841030237567, iteration: 246265
loss: 0.9856562614440918,grad_norm: 0.9999990575343594, iteration: 246266
loss: 1.0200117826461792,grad_norm: 0.9738149299185352, iteration: 246267
loss: 0.9792180061340332,grad_norm: 0.8962831543976053, iteration: 246268
loss: 1.0208052396774292,grad_norm: 0.9459321742050285, iteration: 246269
loss: 1.0168101787567139,grad_norm: 0.9999990288629711, iteration: 246270
loss: 0.9758743047714233,grad_norm: 0.9999991187300437, iteration: 246271
loss: 1.0086610317230225,grad_norm: 0.8749631286220266, iteration: 246272
loss: 1.0382944345474243,grad_norm: 0.9999995440551329, iteration: 246273
loss: 1.0151280164718628,grad_norm: 0.9999994158484862, iteration: 246274
loss: 0.9773465991020203,grad_norm: 0.9544694658560062, iteration: 246275
loss: 1.0125006437301636,grad_norm: 0.9042493068632329, iteration: 246276
loss: 0.9852501153945923,grad_norm: 0.8786142336678207, iteration: 246277
loss: 1.0287995338439941,grad_norm: 0.9161338740102125, iteration: 246278
loss: 0.9816016554832458,grad_norm: 0.9046305039204886, iteration: 246279
loss: 0.9853753447532654,grad_norm: 0.8952719269831184, iteration: 246280
loss: 0.9975069761276245,grad_norm: 0.9999990150494606, iteration: 246281
loss: 0.9936636090278625,grad_norm: 0.9032676048328899, iteration: 246282
loss: 1.0112520456314087,grad_norm: 0.999999223382169, iteration: 246283
loss: 1.016952395439148,grad_norm: 0.9942945623514641, iteration: 246284
loss: 0.9760972857475281,grad_norm: 0.9999990864668383, iteration: 246285
loss: 1.0539804697036743,grad_norm: 0.8070323835827545, iteration: 246286
loss: 1.011768102645874,grad_norm: 0.9671278696417269, iteration: 246287
loss: 1.0155620574951172,grad_norm: 0.8515762832765529, iteration: 246288
loss: 0.9534528851509094,grad_norm: 0.7744185060742043, iteration: 246289
loss: 0.9444873332977295,grad_norm: 0.869689034246095, iteration: 246290
loss: 1.0285004377365112,grad_norm: 0.8986196324885755, iteration: 246291
loss: 1.0264698266983032,grad_norm: 0.8757312375122719, iteration: 246292
loss: 0.9891348481178284,grad_norm: 0.8628165052644698, iteration: 246293
loss: 0.9699224233627319,grad_norm: 0.9810956088744262, iteration: 246294
loss: 1.0046720504760742,grad_norm: 0.7710211671992402, iteration: 246295
loss: 0.9820269346237183,grad_norm: 0.8295548341290102, iteration: 246296
loss: 0.9432408809661865,grad_norm: 0.9999989593015368, iteration: 246297
loss: 0.993205189704895,grad_norm: 0.9999990311286634, iteration: 246298
loss: 1.0224841833114624,grad_norm: 0.8790962408936327, iteration: 246299
loss: 0.9512714147567749,grad_norm: 0.9346171573195381, iteration: 246300
loss: 1.012016773223877,grad_norm: 0.9999991103007105, iteration: 246301
loss: 0.9458189606666565,grad_norm: 0.9198736511045286, iteration: 246302
loss: 1.011818528175354,grad_norm: 0.8192504726435197, iteration: 246303
loss: 0.9710853695869446,grad_norm: 0.8531706177690801, iteration: 246304
loss: 0.9877104759216309,grad_norm: 0.9653443310325779, iteration: 246305
loss: 1.0067429542541504,grad_norm: 0.8232150852440315, iteration: 246306
loss: 1.0390546321868896,grad_norm: 0.9999991408359683, iteration: 246307
loss: 1.0046559572219849,grad_norm: 0.9885274012533471, iteration: 246308
loss: 1.0074046850204468,grad_norm: 0.9999990047797273, iteration: 246309
loss: 0.9997802972793579,grad_norm: 0.8891940364719803, iteration: 246310
loss: 1.195529580116272,grad_norm: 0.9999998607026942, iteration: 246311
loss: 1.0032185316085815,grad_norm: 0.9672334093827843, iteration: 246312
loss: 0.9981665015220642,grad_norm: 0.8367722714039414, iteration: 246313
loss: 0.9775699377059937,grad_norm: 0.9999991210310307, iteration: 246314
loss: 1.0716129541397095,grad_norm: 0.9999999406124618, iteration: 246315
loss: 0.9962400197982788,grad_norm: 0.8848974610508736, iteration: 246316
loss: 0.9986621737480164,grad_norm: 0.8307636849354386, iteration: 246317
loss: 1.0046825408935547,grad_norm: 0.9529444689242202, iteration: 246318
loss: 0.996448814868927,grad_norm: 0.9489214650175508, iteration: 246319
loss: 1.0355430841445923,grad_norm: 0.9999994011981063, iteration: 246320
loss: 1.0108128786087036,grad_norm: 0.822621396683971, iteration: 246321
loss: 0.9892358183860779,grad_norm: 0.960752241904101, iteration: 246322
loss: 0.9818419814109802,grad_norm: 0.8941885962885364, iteration: 246323
loss: 0.9716834425926208,grad_norm: 0.904087315008964, iteration: 246324
loss: 1.0083118677139282,grad_norm: 0.8607968314079052, iteration: 246325
loss: 1.0569133758544922,grad_norm: 0.999999798844142, iteration: 246326
loss: 0.9899749159812927,grad_norm: 0.7937247148642215, iteration: 246327
loss: 0.9854831695556641,grad_norm: 0.826500293665275, iteration: 246328
loss: 1.0217214822769165,grad_norm: 0.9079336745769508, iteration: 246329
loss: 1.0426043272018433,grad_norm: 0.9754811199942014, iteration: 246330
loss: 0.9912319779396057,grad_norm: 0.7862045850057933, iteration: 246331
loss: 0.9905108213424683,grad_norm: 0.9491870641833889, iteration: 246332
loss: 0.9770412445068359,grad_norm: 0.8257146883553128, iteration: 246333
loss: 0.9601248502731323,grad_norm: 0.9067243537056136, iteration: 246334
loss: 1.0004334449768066,grad_norm: 0.9999991411467796, iteration: 246335
loss: 1.039772391319275,grad_norm: 0.9237326826364661, iteration: 246336
loss: 1.031247854232788,grad_norm: 0.9076327561794941, iteration: 246337
loss: 0.9991018772125244,grad_norm: 0.8759825626353929, iteration: 246338
loss: 1.092186450958252,grad_norm: 0.9999997530902802, iteration: 246339
loss: 1.0278804302215576,grad_norm: 0.9559901944503649, iteration: 246340
loss: 1.0547254085540771,grad_norm: 0.8659552172243177, iteration: 246341
loss: 0.9643682837486267,grad_norm: 0.9226716597375734, iteration: 246342
loss: 1.005174160003662,grad_norm: 0.9266982921901947, iteration: 246343
loss: 0.9940112829208374,grad_norm: 0.9999991481751475, iteration: 246344
loss: 0.9912049174308777,grad_norm: 0.918400865843407, iteration: 246345
loss: 1.0103645324707031,grad_norm: 0.882807507085694, iteration: 246346
loss: 1.0109062194824219,grad_norm: 0.9968456717389838, iteration: 246347
loss: 1.002448320388794,grad_norm: 0.9139127855436598, iteration: 246348
loss: 0.9880467057228088,grad_norm: 0.8829313129607572, iteration: 246349
loss: 1.0334736108779907,grad_norm: 0.9999996010957233, iteration: 246350
loss: 1.092251181602478,grad_norm: 0.9999991360065511, iteration: 246351
loss: 1.0107622146606445,grad_norm: 0.8990621082037875, iteration: 246352
loss: 1.0185959339141846,grad_norm: 0.9168173802405919, iteration: 246353
loss: 0.9923709034919739,grad_norm: 0.8263107831509896, iteration: 246354
loss: 1.0152935981750488,grad_norm: 0.9999996151423246, iteration: 246355
loss: 0.9896630644798279,grad_norm: 0.9448651412508599, iteration: 246356
loss: 1.033799171447754,grad_norm: 0.9999990628209658, iteration: 246357
loss: 0.9448897242546082,grad_norm: 0.9084305772309766, iteration: 246358
loss: 0.9994750022888184,grad_norm: 0.7293011142596179, iteration: 246359
loss: 1.0029592514038086,grad_norm: 0.8711647949378802, iteration: 246360
loss: 0.9888787269592285,grad_norm: 0.9171683791717108, iteration: 246361
loss: 1.0411368608474731,grad_norm: 0.8981447918424956, iteration: 246362
loss: 0.9990524053573608,grad_norm: 0.8472721059059011, iteration: 246363
loss: 1.0678679943084717,grad_norm: 0.999999360959101, iteration: 246364
loss: 0.9912704229354858,grad_norm: 0.9463223849797122, iteration: 246365
loss: 1.051934003829956,grad_norm: 0.8749879471133192, iteration: 246366
loss: 1.0245435237884521,grad_norm: 0.9999998111131246, iteration: 246367
loss: 1.0464940071105957,grad_norm: 0.9999998717319071, iteration: 246368
loss: 1.0273571014404297,grad_norm: 0.999999189353132, iteration: 246369
loss: 0.987553596496582,grad_norm: 0.8519519313798845, iteration: 246370
loss: 1.0112751722335815,grad_norm: 0.9217746813569536, iteration: 246371
loss: 1.0766241550445557,grad_norm: 0.9999992133544857, iteration: 246372
loss: 1.0120238065719604,grad_norm: 0.9999995659352462, iteration: 246373
loss: 0.9835903644561768,grad_norm: 0.8023274454606261, iteration: 246374
loss: 1.0225175619125366,grad_norm: 0.852590860462641, iteration: 246375
loss: 1.0134304761886597,grad_norm: 0.8459003328455293, iteration: 246376
loss: 0.9764932990074158,grad_norm: 0.9128920000272629, iteration: 246377
loss: 0.992402195930481,grad_norm: 0.8462459776075468, iteration: 246378
loss: 0.9888851642608643,grad_norm: 0.9303930404543835, iteration: 246379
loss: 0.9958541393280029,grad_norm: 0.9999990528263449, iteration: 246380
loss: 0.986177384853363,grad_norm: 0.9127848497240336, iteration: 246381
loss: 1.0130078792572021,grad_norm: 0.9999990159020419, iteration: 246382
loss: 1.0192019939422607,grad_norm: 0.9569384806040014, iteration: 246383
loss: 0.9551961421966553,grad_norm: 0.9999989831349436, iteration: 246384
loss: 1.0255674123764038,grad_norm: 0.9999990816783857, iteration: 246385
loss: 1.0167287588119507,grad_norm: 0.8234581377837228, iteration: 246386
loss: 1.0513118505477905,grad_norm: 0.9971851322745559, iteration: 246387
loss: 0.9688390493392944,grad_norm: 0.8680683136723698, iteration: 246388
loss: 0.9637693166732788,grad_norm: 0.819746925839344, iteration: 246389
loss: 0.9792453646659851,grad_norm: 0.9999991572663464, iteration: 246390
loss: 0.9881309866905212,grad_norm: 0.8483418190041393, iteration: 246391
loss: 1.0415620803833008,grad_norm: 0.8349011881704325, iteration: 246392
loss: 1.0370944738388062,grad_norm: 0.9999991529612374, iteration: 246393
loss: 0.9672738909721375,grad_norm: 0.7952244115903102, iteration: 246394
loss: 0.9789599180221558,grad_norm: 0.7329141911262549, iteration: 246395
loss: 0.9956340193748474,grad_norm: 0.8771550194669245, iteration: 246396
loss: 0.9926266074180603,grad_norm: 0.9999989509320607, iteration: 246397
loss: 0.9856848120689392,grad_norm: 0.9999995046192198, iteration: 246398
loss: 0.9953553676605225,grad_norm: 0.891266452491806, iteration: 246399
loss: 0.970672607421875,grad_norm: 0.8261901882240572, iteration: 246400
loss: 0.9741770029067993,grad_norm: 0.8909046193081551, iteration: 246401
loss: 1.0009461641311646,grad_norm: 0.87840930581139, iteration: 246402
loss: 1.0119611024856567,grad_norm: 0.9885344189256777, iteration: 246403
loss: 0.9733587503433228,grad_norm: 0.8819157782481963, iteration: 246404
loss: 1.003403902053833,grad_norm: 0.8454686383799445, iteration: 246405
loss: 0.983948826789856,grad_norm: 0.9999989706653213, iteration: 246406
loss: 0.9788274765014648,grad_norm: 0.9742219050302362, iteration: 246407
loss: 0.9961389303207397,grad_norm: 0.8909934541282336, iteration: 246408
loss: 0.996635913848877,grad_norm: 0.9915593561686598, iteration: 246409
loss: 0.9381699562072754,grad_norm: 0.999999142817367, iteration: 246410
loss: 1.0122264623641968,grad_norm: 0.7503025970598179, iteration: 246411
loss: 0.9867921471595764,grad_norm: 0.775468161103922, iteration: 246412
loss: 0.9699869751930237,grad_norm: 0.9999990389180835, iteration: 246413
loss: 0.9852213263511658,grad_norm: 0.9449394634946615, iteration: 246414
loss: 1.0139515399932861,grad_norm: 0.846128488164294, iteration: 246415
loss: 0.9987041354179382,grad_norm: 0.999999100230342, iteration: 246416
loss: 1.0109100341796875,grad_norm: 0.9999990219452017, iteration: 246417
loss: 0.9768005609512329,grad_norm: 0.802987249594511, iteration: 246418
loss: 0.9543088674545288,grad_norm: 0.8833276348531468, iteration: 246419
loss: 0.9925781488418579,grad_norm: 0.811066546978128, iteration: 246420
loss: 0.9717622995376587,grad_norm: 0.7529823325140249, iteration: 246421
loss: 0.9963781237602234,grad_norm: 0.8762357963164205, iteration: 246422
loss: 0.9958609938621521,grad_norm: 0.9999991121979739, iteration: 246423
loss: 1.0021166801452637,grad_norm: 0.9015883682391002, iteration: 246424
loss: 1.0304982662200928,grad_norm: 0.9040357701808353, iteration: 246425
loss: 0.997192919254303,grad_norm: 0.9999991477346364, iteration: 246426
loss: 1.0021106004714966,grad_norm: 0.8080864911296812, iteration: 246427
loss: 0.9602295160293579,grad_norm: 0.9834963806278547, iteration: 246428
loss: 0.9994274973869324,grad_norm: 0.9415182612902189, iteration: 246429
loss: 1.020895004272461,grad_norm: 0.9999992208718943, iteration: 246430
loss: 0.9905248284339905,grad_norm: 0.8200256399829798, iteration: 246431
loss: 1.0353035926818848,grad_norm: 0.999999195251237, iteration: 246432
loss: 1.0081379413604736,grad_norm: 0.7788670685874203, iteration: 246433
loss: 1.0286935567855835,grad_norm: 0.9999996984202003, iteration: 246434
loss: 0.9874618649482727,grad_norm: 0.9005560479915524, iteration: 246435
loss: 1.0792292356491089,grad_norm: 0.9999993231580032, iteration: 246436
loss: 1.0451431274414062,grad_norm: 0.8693758484483879, iteration: 246437
loss: 0.9988696575164795,grad_norm: 0.9999990353053408, iteration: 246438
loss: 0.9845133423805237,grad_norm: 0.8502329537219292, iteration: 246439
loss: 1.0080246925354004,grad_norm: 0.9130175180830842, iteration: 246440
loss: 0.9901503920555115,grad_norm: 0.9999991446916207, iteration: 246441
loss: 0.9885094165802002,grad_norm: 0.9999990891256002, iteration: 246442
loss: 0.9997655749320984,grad_norm: 0.9999998405496023, iteration: 246443
loss: 1.005409836769104,grad_norm: 0.7873114553782211, iteration: 246444
loss: 1.0114587545394897,grad_norm: 0.9766060099913937, iteration: 246445
loss: 1.0299956798553467,grad_norm: 0.9091405884853715, iteration: 246446
loss: 0.9743798971176147,grad_norm: 0.999999381774183, iteration: 246447
loss: 0.9906820058822632,grad_norm: 0.9396643089004525, iteration: 246448
loss: 0.9729663729667664,grad_norm: 0.9006346557519175, iteration: 246449
loss: 0.9846795201301575,grad_norm: 0.797184251266542, iteration: 246450
loss: 0.9888170957565308,grad_norm: 0.9857891927391711, iteration: 246451
loss: 0.9620134234428406,grad_norm: 0.9310000804220345, iteration: 246452
loss: 0.9878343939781189,grad_norm: 0.9532009985825936, iteration: 246453
loss: 0.9957259893417358,grad_norm: 0.7304582088755833, iteration: 246454
loss: 1.0109589099884033,grad_norm: 0.8451250427187152, iteration: 246455
loss: 1.0386015176773071,grad_norm: 0.8725322616275588, iteration: 246456
loss: 1.0244338512420654,grad_norm: 0.9529199530715948, iteration: 246457
loss: 0.9842514991760254,grad_norm: 0.795270944055761, iteration: 246458
loss: 0.9728378057479858,grad_norm: 0.7619863285304758, iteration: 246459
loss: 1.0127429962158203,grad_norm: 0.9173869622969827, iteration: 246460
loss: 1.0072492361068726,grad_norm: 0.898126419644631, iteration: 246461
loss: 0.9851678609848022,grad_norm: 0.9150710052150868, iteration: 246462
loss: 0.9991692900657654,grad_norm: 0.993678080936477, iteration: 246463
loss: 0.97832852602005,grad_norm: 0.8817637869190018, iteration: 246464
loss: 1.0196231603622437,grad_norm: 0.982241592005967, iteration: 246465
loss: 0.988281786441803,grad_norm: 0.8666292745366742, iteration: 246466
loss: 1.0282769203186035,grad_norm: 0.7915306061720653, iteration: 246467
loss: 1.0099000930786133,grad_norm: 0.9585386875704274, iteration: 246468
loss: 0.9968448281288147,grad_norm: 0.9177814096572686, iteration: 246469
loss: 1.026609182357788,grad_norm: 0.9089739441200752, iteration: 246470
loss: 0.9948509335517883,grad_norm: 0.8583406672016168, iteration: 246471
loss: 1.0127692222595215,grad_norm: 0.9999990827338336, iteration: 246472
loss: 1.0521851778030396,grad_norm: 0.9999990743547221, iteration: 246473
loss: 0.9734099507331848,grad_norm: 0.7957724658778904, iteration: 246474
loss: 1.004145860671997,grad_norm: 0.8313842359705014, iteration: 246475
loss: 0.9959449172019958,grad_norm: 0.999999113282061, iteration: 246476
loss: 0.9715148210525513,grad_norm: 0.8543971742499797, iteration: 246477
loss: 1.0070104598999023,grad_norm: 0.825436576059349, iteration: 246478
loss: 1.0462005138397217,grad_norm: 0.886594216550884, iteration: 246479
loss: 1.020280122756958,grad_norm: 0.7628520263541121, iteration: 246480
loss: 0.9800298810005188,grad_norm: 0.8270012802163182, iteration: 246481
loss: 1.0016562938690186,grad_norm: 0.9199106210718312, iteration: 246482
loss: 0.9820488095283508,grad_norm: 0.7686374094416818, iteration: 246483
loss: 0.9674381613731384,grad_norm: 0.8616080665651098, iteration: 246484
loss: 1.0903315544128418,grad_norm: 0.9999998902463744, iteration: 246485
loss: 1.015112042427063,grad_norm: 0.9999993052870668, iteration: 246486
loss: 1.0010565519332886,grad_norm: 0.88281527474689, iteration: 246487
loss: 0.9573308229446411,grad_norm: 0.9813904646496474, iteration: 246488
loss: 1.0125893354415894,grad_norm: 0.9999992594104168, iteration: 246489
loss: 0.976776659488678,grad_norm: 0.8403914101480796, iteration: 246490
loss: 1.016012191772461,grad_norm: 0.9999991781038378, iteration: 246491
loss: 1.0064516067504883,grad_norm: 0.8137914194781096, iteration: 246492
loss: 0.9765328168869019,grad_norm: 0.9548330150483367, iteration: 246493
loss: 1.0662506818771362,grad_norm: 0.9260923634906608, iteration: 246494
loss: 1.0370025634765625,grad_norm: 0.9878071401306554, iteration: 246495
loss: 0.9770384430885315,grad_norm: 0.8686112582757072, iteration: 246496
loss: 1.0362317562103271,grad_norm: 0.9999994280151921, iteration: 246497
loss: 1.0509921312332153,grad_norm: 0.7320136340009205, iteration: 246498
loss: 1.006210207939148,grad_norm: 0.881443131462083, iteration: 246499
loss: 0.9889340400695801,grad_norm: 0.8723457675350238, iteration: 246500
loss: 0.9616173505783081,grad_norm: 0.7231131840617868, iteration: 246501
loss: 0.9993744492530823,grad_norm: 0.9999993009700445, iteration: 246502
loss: 0.9783958792686462,grad_norm: 0.8598137730064278, iteration: 246503
loss: 1.030236005783081,grad_norm: 0.9999992931991624, iteration: 246504
loss: 1.0447211265563965,grad_norm: 0.9999991639429233, iteration: 246505
loss: 1.0930194854736328,grad_norm: 0.8657636124798065, iteration: 246506
loss: 0.9513277411460876,grad_norm: 0.8155866428382433, iteration: 246507
loss: 1.026968240737915,grad_norm: 0.8139937860165652, iteration: 246508
loss: 1.0085865259170532,grad_norm: 0.9356270940956909, iteration: 246509
loss: 0.9841691851615906,grad_norm: 0.7748160456679914, iteration: 246510
loss: 1.014090895652771,grad_norm: 0.9905444894894099, iteration: 246511
loss: 1.0218278169631958,grad_norm: 0.8548591339187008, iteration: 246512
loss: 0.9833784103393555,grad_norm: 0.999999100837439, iteration: 246513
loss: 1.0376503467559814,grad_norm: 0.9999991467894825, iteration: 246514
loss: 1.0007742643356323,grad_norm: 0.9999991117696321, iteration: 246515
loss: 1.0026304721832275,grad_norm: 0.9186177640888964, iteration: 246516
loss: 1.012652039527893,grad_norm: 0.8678231410193867, iteration: 246517
loss: 0.9881299734115601,grad_norm: 0.9999991901880292, iteration: 246518
loss: 1.0498652458190918,grad_norm: 0.9999990722460811, iteration: 246519
loss: 0.9603061676025391,grad_norm: 0.861676866917847, iteration: 246520
loss: 0.9645878672599792,grad_norm: 0.8388814614642812, iteration: 246521
loss: 0.965244710445404,grad_norm: 0.8635659214948881, iteration: 246522
loss: 0.9637424945831299,grad_norm: 0.8834958767665945, iteration: 246523
loss: 1.0404250621795654,grad_norm: 0.9999992530422658, iteration: 246524
loss: 1.0164055824279785,grad_norm: 0.9999990914904711, iteration: 246525
loss: 1.0082119703292847,grad_norm: 0.8765956874992058, iteration: 246526
loss: 1.0190051794052124,grad_norm: 0.9847327845916455, iteration: 246527
loss: 0.9593728184700012,grad_norm: 0.8985997239194828, iteration: 246528
loss: 0.9755143523216248,grad_norm: 0.8660288222686351, iteration: 246529
loss: 1.0111067295074463,grad_norm: 0.9999992785112604, iteration: 246530
loss: 0.9910227060317993,grad_norm: 0.7973570215255509, iteration: 246531
loss: 0.9709526300430298,grad_norm: 0.8333387331960481, iteration: 246532
loss: 1.035853385925293,grad_norm: 0.9999991885248187, iteration: 246533
loss: 1.0161335468292236,grad_norm: 0.9999991605529643, iteration: 246534
loss: 1.0429232120513916,grad_norm: 0.8076482828178119, iteration: 246535
loss: 0.9941871166229248,grad_norm: 0.9164321849941666, iteration: 246536
loss: 0.9923633933067322,grad_norm: 0.9824567441778259, iteration: 246537
loss: 1.0169713497161865,grad_norm: 0.9999990094471558, iteration: 246538
loss: 1.0219733715057373,grad_norm: 0.8836529870074347, iteration: 246539
loss: 1.0255448818206787,grad_norm: 0.9999998748187839, iteration: 246540
loss: 0.9927895665168762,grad_norm: 0.9999991891713065, iteration: 246541
loss: 1.0180110931396484,grad_norm: 0.8602681493021942, iteration: 246542
loss: 1.0402039289474487,grad_norm: 0.999999299502685, iteration: 246543
loss: 0.9652816653251648,grad_norm: 0.829423613642334, iteration: 246544
loss: 1.0164114236831665,grad_norm: 0.9025863716644403, iteration: 246545
loss: 0.9419276118278503,grad_norm: 0.8572387071188243, iteration: 246546
loss: 0.9738255739212036,grad_norm: 0.9999991912239705, iteration: 246547
loss: 1.0025107860565186,grad_norm: 0.7867435944982554, iteration: 246548
loss: 1.0078352689743042,grad_norm: 0.9586255873395291, iteration: 246549
loss: 0.992590069770813,grad_norm: 0.9999991637561354, iteration: 246550
loss: 1.0472378730773926,grad_norm: 0.8248489912528038, iteration: 246551
loss: 1.0738381147384644,grad_norm: 0.9999995847303208, iteration: 246552
loss: 1.0234843492507935,grad_norm: 0.8967697139671851, iteration: 246553
loss: 0.9698193073272705,grad_norm: 0.9999991618349694, iteration: 246554
loss: 0.9881717562675476,grad_norm: 0.9999993545829373, iteration: 246555
loss: 1.0255343914031982,grad_norm: 0.8401523380328882, iteration: 246556
loss: 1.0020369291305542,grad_norm: 0.8145063679666448, iteration: 246557
loss: 1.0162458419799805,grad_norm: 0.9999999424141912, iteration: 246558
loss: 1.0097178220748901,grad_norm: 0.9544580405264076, iteration: 246559
loss: 0.9738470315933228,grad_norm: 0.8243868250576668, iteration: 246560
loss: 0.9516454935073853,grad_norm: 0.740577206561854, iteration: 246561
loss: 0.9946670532226562,grad_norm: 0.8462129366244311, iteration: 246562
loss: 0.9730084538459778,grad_norm: 0.9582437827742979, iteration: 246563
loss: 1.020402431488037,grad_norm: 0.7930605614720194, iteration: 246564
loss: 1.024471402168274,grad_norm: 0.8818147612162897, iteration: 246565
loss: 1.02839195728302,grad_norm: 0.8541473917234819, iteration: 246566
loss: 1.292963981628418,grad_norm: 0.9999999302280843, iteration: 246567
loss: 1.0597038269042969,grad_norm: 0.9999999213704591, iteration: 246568
loss: 1.0210922956466675,grad_norm: 0.9999990420298432, iteration: 246569
loss: 0.9904478192329407,grad_norm: 0.8950097531727187, iteration: 246570
loss: 0.9962605834007263,grad_norm: 0.9999992795238146, iteration: 246571
loss: 0.9812920093536377,grad_norm: 0.9475596893239749, iteration: 246572
loss: 0.9849445223808289,grad_norm: 0.8336695673737051, iteration: 246573
loss: 1.032313585281372,grad_norm: 0.8757429232800503, iteration: 246574
loss: 0.9616544842720032,grad_norm: 0.7171485087113214, iteration: 246575
loss: 1.0288923978805542,grad_norm: 0.8520598279953708, iteration: 246576
loss: 0.9832156896591187,grad_norm: 0.8980036322390741, iteration: 246577
loss: 0.9784890413284302,grad_norm: 0.9999990156646167, iteration: 246578
loss: 1.034356951713562,grad_norm: 0.9999995081697387, iteration: 246579
loss: 0.9836545586585999,grad_norm: 0.7994877648570844, iteration: 246580
loss: 0.9694784283638,grad_norm: 0.8733715962087801, iteration: 246581
loss: 1.1081161499023438,grad_norm: 0.9999999731156843, iteration: 246582
loss: 1.0288914442062378,grad_norm: 0.8056998945439819, iteration: 246583
loss: 1.055863618850708,grad_norm: 0.9397197915527526, iteration: 246584
loss: 0.991536557674408,grad_norm: 0.8997938673445107, iteration: 246585
loss: 0.9536775946617126,grad_norm: 0.9102935590441994, iteration: 246586
loss: 0.9959200620651245,grad_norm: 0.8399173419095428, iteration: 246587
loss: 1.0062555074691772,grad_norm: 0.8690474564423702, iteration: 246588
loss: 1.0283282995224,grad_norm: 0.9335083897666128, iteration: 246589
loss: 0.9830507040023804,grad_norm: 0.814043672585829, iteration: 246590
loss: 1.2275925874710083,grad_norm: 0.9999994376186153, iteration: 246591
loss: 0.9963936805725098,grad_norm: 0.9748760735344371, iteration: 246592
loss: 0.9631059765815735,grad_norm: 0.9999990248434051, iteration: 246593
loss: 1.008952260017395,grad_norm: 0.7643351810640832, iteration: 246594
loss: 1.0057971477508545,grad_norm: 0.8706967573725519, iteration: 246595
loss: 1.0035418272018433,grad_norm: 0.9101387126205397, iteration: 246596
loss: 1.0112888813018799,grad_norm: 0.986483796224491, iteration: 246597
loss: 1.0098998546600342,grad_norm: 0.950046113160251, iteration: 246598
loss: 0.9948850274085999,grad_norm: 0.7740492616336287, iteration: 246599
loss: 1.0250838994979858,grad_norm: 0.9849972365067496, iteration: 246600
loss: 0.9821128249168396,grad_norm: 0.9043532727170748, iteration: 246601
loss: 0.9981865286827087,grad_norm: 0.8739591949991977, iteration: 246602
loss: 1.0257164239883423,grad_norm: 0.8391736724981471, iteration: 246603
loss: 0.982891321182251,grad_norm: 0.9999991111387009, iteration: 246604
loss: 1.0596352815628052,grad_norm: 0.9999993378443941, iteration: 246605
loss: 1.0155186653137207,grad_norm: 0.9019457264740655, iteration: 246606
loss: 0.9996449947357178,grad_norm: 0.9999995556021776, iteration: 246607
loss: 1.077478289604187,grad_norm: 0.9999993450309819, iteration: 246608
loss: 0.9961268901824951,grad_norm: 0.9863632314177647, iteration: 246609
loss: 1.0088375806808472,grad_norm: 0.999999165122385, iteration: 246610
loss: 0.9902276396751404,grad_norm: 0.9013539555640322, iteration: 246611
loss: 1.0774846076965332,grad_norm: 0.9999995937083893, iteration: 246612
loss: 1.0223793983459473,grad_norm: 0.896521580977571, iteration: 246613
loss: 1.067299485206604,grad_norm: 0.9999992311286126, iteration: 246614
loss: 1.0040987730026245,grad_norm: 0.9190633554123782, iteration: 246615
loss: 1.0128285884857178,grad_norm: 0.8083616456433608, iteration: 246616
loss: 1.0496598482131958,grad_norm: 0.9068848711094292, iteration: 246617
loss: 0.9702100157737732,grad_norm: 0.952928954305341, iteration: 246618
loss: 1.0123499631881714,grad_norm: 0.9713871454591304, iteration: 246619
loss: 1.0071847438812256,grad_norm: 0.7037779373465838, iteration: 246620
loss: 0.9609105587005615,grad_norm: 0.9166636348676106, iteration: 246621
loss: 1.0056957006454468,grad_norm: 0.9664912190937062, iteration: 246622
loss: 1.0104814767837524,grad_norm: 0.8825531847187338, iteration: 246623
loss: 1.0241485834121704,grad_norm: 0.8823620788611455, iteration: 246624
loss: 1.091340184211731,grad_norm: 0.9999998758840979, iteration: 246625
loss: 0.9734915494918823,grad_norm: 0.999999078523847, iteration: 246626
loss: 0.9734101891517639,grad_norm: 0.8327578374287292, iteration: 246627
loss: 1.0517021417617798,grad_norm: 0.9999991069729967, iteration: 246628
loss: 1.040428876876831,grad_norm: 1.0000000650871543, iteration: 246629
loss: 0.9759572148323059,grad_norm: 0.9702263451818518, iteration: 246630
loss: 0.9828457236289978,grad_norm: 0.845271492981387, iteration: 246631
loss: 1.0021717548370361,grad_norm: 0.7305848555988833, iteration: 246632
loss: 0.9932434558868408,grad_norm: 0.9252730509934509, iteration: 246633
loss: 1.0002386569976807,grad_norm: 0.9999991570802372, iteration: 246634
loss: 1.039017915725708,grad_norm: 0.7631733547575432, iteration: 246635
loss: 1.0395723581314087,grad_norm: 0.8398595057868706, iteration: 246636
loss: 1.0052140951156616,grad_norm: 0.8263262930201655, iteration: 246637
loss: 1.0098215341567993,grad_norm: 0.8204785159521151, iteration: 246638
loss: 1.0191055536270142,grad_norm: 0.8523380972148513, iteration: 246639
loss: 0.97345370054245,grad_norm: 0.9999990598335102, iteration: 246640
loss: 1.0099831819534302,grad_norm: 0.959932606004082, iteration: 246641
loss: 0.985551655292511,grad_norm: 0.7461144673134822, iteration: 246642
loss: 0.9893495440483093,grad_norm: 0.7485582299076198, iteration: 246643
loss: 1.0490758419036865,grad_norm: 0.9999991027096218, iteration: 246644
loss: 0.9889453053474426,grad_norm: 0.7265624734101737, iteration: 246645
loss: 1.0080519914627075,grad_norm: 0.9999990655625097, iteration: 246646
loss: 0.9846568703651428,grad_norm: 0.8794064770136508, iteration: 246647
loss: 1.0194370746612549,grad_norm: 0.7288284771154139, iteration: 246648
loss: 1.0100749731063843,grad_norm: 0.9954122859292709, iteration: 246649
loss: 0.9960865378379822,grad_norm: 0.9678355725204367, iteration: 246650
loss: 0.9948339462280273,grad_norm: 0.9999999431291168, iteration: 246651
loss: 0.9648517370223999,grad_norm: 0.9999991109519164, iteration: 246652
loss: 0.973286509513855,grad_norm: 0.8240364918798945, iteration: 246653
loss: 0.9695273637771606,grad_norm: 0.8817831324429397, iteration: 246654
loss: 1.0141164064407349,grad_norm: 0.9999994181090871, iteration: 246655
loss: 0.9958341717720032,grad_norm: 0.9999991121541294, iteration: 246656
loss: 0.9680240154266357,grad_norm: 0.7196317311532084, iteration: 246657
loss: 0.9826245307922363,grad_norm: 0.9466035580950504, iteration: 246658
loss: 1.0266023874282837,grad_norm: 0.9999990870277439, iteration: 246659
loss: 0.9618638753890991,grad_norm: 0.7834454702562417, iteration: 246660
loss: 1.0095398426055908,grad_norm: 0.9950516386780779, iteration: 246661
loss: 1.142527461051941,grad_norm: 0.9999997130437346, iteration: 246662
loss: 1.02202308177948,grad_norm: 0.9292009983255458, iteration: 246663
loss: 1.0070502758026123,grad_norm: 0.8859512961556988, iteration: 246664
loss: 1.02825927734375,grad_norm: 0.8554338740191, iteration: 246665
loss: 1.0057246685028076,grad_norm: 0.999999045257072, iteration: 246666
loss: 1.0242961645126343,grad_norm: 0.9169677917565983, iteration: 246667
loss: 1.0542982816696167,grad_norm: 0.9999989807849408, iteration: 246668
loss: 0.9847126603126526,grad_norm: 0.8680816679006098, iteration: 246669
loss: 0.9863060712814331,grad_norm: 0.7719585705108829, iteration: 246670
loss: 0.9950734376907349,grad_norm: 0.74435117209449, iteration: 246671
loss: 0.9804800152778625,grad_norm: 0.910046774839023, iteration: 246672
loss: 0.9997685551643372,grad_norm: 0.8216175214916506, iteration: 246673
loss: 1.0092408657073975,grad_norm: 0.8671768455327404, iteration: 246674
loss: 1.010434865951538,grad_norm: 0.8790161199414461, iteration: 246675
loss: 0.9762560129165649,grad_norm: 0.6855401875251392, iteration: 246676
loss: 0.9923038482666016,grad_norm: 0.8340487452555643, iteration: 246677
loss: 0.9651114344596863,grad_norm: 0.8956920017629755, iteration: 246678
loss: 0.9833628535270691,grad_norm: 0.8194697414381382, iteration: 246679
loss: 1.0843262672424316,grad_norm: 0.9999991386993723, iteration: 246680
loss: 0.9909129738807678,grad_norm: 0.8825278767191366, iteration: 246681
loss: 0.9987823367118835,grad_norm: 0.929726593393709, iteration: 246682
loss: 1.0200895071029663,grad_norm: 0.8835786680516045, iteration: 246683
loss: 1.0018970966339111,grad_norm: 0.9111981194471447, iteration: 246684
loss: 1.0854697227478027,grad_norm: 0.9999998284629505, iteration: 246685
loss: 0.9954161047935486,grad_norm: 0.7060302660744084, iteration: 246686
loss: 0.9923896193504333,grad_norm: 0.8874623685193663, iteration: 246687
loss: 1.0105087757110596,grad_norm: 0.8189231435232452, iteration: 246688
loss: 0.984624981880188,grad_norm: 0.8761025814774723, iteration: 246689
loss: 1.0179319381713867,grad_norm: 0.999999873556341, iteration: 246690
loss: 0.980349600315094,grad_norm: 0.8672671342293433, iteration: 246691
loss: 0.9973170757293701,grad_norm: 0.9999991414445932, iteration: 246692
loss: 1.0273489952087402,grad_norm: 0.9777010158303502, iteration: 246693
loss: 1.0104049444198608,grad_norm: 0.8304824420168483, iteration: 246694
loss: 1.0156421661376953,grad_norm: 0.9819530238026327, iteration: 246695
loss: 0.9968183040618896,grad_norm: 0.8099645917023485, iteration: 246696
loss: 1.0186285972595215,grad_norm: 0.9151722815396601, iteration: 246697
loss: 0.9928836226463318,grad_norm: 0.9524325321620035, iteration: 246698
loss: 0.9960654377937317,grad_norm: 0.7450399706073869, iteration: 246699
loss: 0.9943928718566895,grad_norm: 0.7862088926802769, iteration: 246700
loss: 0.9606136679649353,grad_norm: 0.9999994477535952, iteration: 246701
loss: 1.065145492553711,grad_norm: 0.9999991984632387, iteration: 246702
loss: 0.9884392619132996,grad_norm: 0.8434252355830855, iteration: 246703
loss: 1.0143897533416748,grad_norm: 0.8616445268603571, iteration: 246704
loss: 1.0029881000518799,grad_norm: 0.9999990985955903, iteration: 246705
loss: 0.997821033000946,grad_norm: 0.7881713870629274, iteration: 246706
loss: 1.1948838233947754,grad_norm: 0.9999999109717669, iteration: 246707
loss: 1.0102221965789795,grad_norm: 0.846028171413876, iteration: 246708
loss: 0.9810370206832886,grad_norm: 0.9999991986670024, iteration: 246709
loss: 1.0042210817337036,grad_norm: 0.9848737540172852, iteration: 246710
loss: 1.0111230611801147,grad_norm: 0.9396769781411602, iteration: 246711
loss: 1.006953477859497,grad_norm: 0.9999998534249731, iteration: 246712
loss: 1.0080054998397827,grad_norm: 0.9999990306604379, iteration: 246713
loss: 0.987679123878479,grad_norm: 0.9329085605212777, iteration: 246714
loss: 0.9968822002410889,grad_norm: 0.9100808724781014, iteration: 246715
loss: 1.0036643743515015,grad_norm: 0.9675907024932417, iteration: 246716
loss: 1.022143840789795,grad_norm: 0.9361337608557425, iteration: 246717
loss: 0.9998957514762878,grad_norm: 0.7889932416612172, iteration: 246718
loss: 1.0631781816482544,grad_norm: 0.9999990960790931, iteration: 246719
loss: 0.9862056970596313,grad_norm: 0.9999991308507828, iteration: 246720
loss: 1.0283225774765015,grad_norm: 0.9212440259056468, iteration: 246721
loss: 1.01057767868042,grad_norm: 0.9999990932687777, iteration: 246722
loss: 0.9876124262809753,grad_norm: 0.8636260369934805, iteration: 246723
loss: 1.2108317613601685,grad_norm: 0.9999998496558681, iteration: 246724
loss: 0.9908733367919922,grad_norm: 0.8639746779249732, iteration: 246725
loss: 1.0220327377319336,grad_norm: 0.7908269827522425, iteration: 246726
loss: 0.9806463718414307,grad_norm: 0.9999992384444779, iteration: 246727
loss: 1.0209866762161255,grad_norm: 0.9159102870602077, iteration: 246728
loss: 1.0635802745819092,grad_norm: 0.9519190807766664, iteration: 246729
loss: 0.9947531819343567,grad_norm: 0.8414313376494137, iteration: 246730
loss: 0.9970415234565735,grad_norm: 0.9069264238207293, iteration: 246731
loss: 0.9725498557090759,grad_norm: 0.7740200073533721, iteration: 246732
loss: 0.9685537815093994,grad_norm: 0.8485512437496905, iteration: 246733
loss: 1.0059318542480469,grad_norm: 0.8220725612544699, iteration: 246734
loss: 0.9924043416976929,grad_norm: 0.8502882077037365, iteration: 246735
loss: 1.009297251701355,grad_norm: 0.984660686538722, iteration: 246736
loss: 0.9876119494438171,grad_norm: 0.8582404479349914, iteration: 246737
loss: 1.0267165899276733,grad_norm: 0.9259102368071541, iteration: 246738
loss: 0.9782688021659851,grad_norm: 0.8993505000676015, iteration: 246739
loss: 0.9795926809310913,grad_norm: 0.9019122053329017, iteration: 246740
loss: 0.9593863487243652,grad_norm: 0.8449753126903553, iteration: 246741
loss: 0.9800419807434082,grad_norm: 0.9341331030869156, iteration: 246742
loss: 0.9904716610908508,grad_norm: 0.7014625185895002, iteration: 246743
loss: 0.9848430752754211,grad_norm: 0.9999997518788969, iteration: 246744
loss: 1.0046223402023315,grad_norm: 0.9999990926688513, iteration: 246745
loss: 0.9783084988594055,grad_norm: 0.7997158660630201, iteration: 246746
loss: 0.9964969754219055,grad_norm: 0.9999994311817105, iteration: 246747
loss: 1.024545669555664,grad_norm: 0.9721561831892214, iteration: 246748
loss: 1.0061419010162354,grad_norm: 0.955048417865012, iteration: 246749
loss: 1.0233820676803589,grad_norm: 0.9999996707776792, iteration: 246750
loss: 0.9693984985351562,grad_norm: 0.9999990616916377, iteration: 246751
loss: 1.0042511224746704,grad_norm: 0.8598598739796963, iteration: 246752
loss: 0.9963904619216919,grad_norm: 0.9999989915919782, iteration: 246753
loss: 1.0393226146697998,grad_norm: 0.8202485675293214, iteration: 246754
loss: 1.1680792570114136,grad_norm: 0.999999794470319, iteration: 246755
loss: 1.0033118724822998,grad_norm: 0.8988054370591323, iteration: 246756
loss: 1.0392415523529053,grad_norm: 0.999999095895861, iteration: 246757
loss: 0.9747905135154724,grad_norm: 0.8937679128447775, iteration: 246758
loss: 1.0004463195800781,grad_norm: 0.8071457981894622, iteration: 246759
loss: 0.9832183122634888,grad_norm: 0.7645195405877045, iteration: 246760
loss: 1.0055102109909058,grad_norm: 0.9323353102881132, iteration: 246761
loss: 0.9960178732872009,grad_norm: 0.8386532876474144, iteration: 246762
loss: 0.9969024658203125,grad_norm: 0.8299712352948473, iteration: 246763
loss: 1.0105557441711426,grad_norm: 0.9999992157769939, iteration: 246764
loss: 0.9851509928703308,grad_norm: 0.9219694382725339, iteration: 246765
loss: 1.0339359045028687,grad_norm: 0.8892098644389638, iteration: 246766
loss: 1.0222110748291016,grad_norm: 0.9911052215534337, iteration: 246767
loss: 0.9880324602127075,grad_norm: 0.935375156681522, iteration: 246768
loss: 1.0091038942337036,grad_norm: 0.8495743552613655, iteration: 246769
loss: 0.9772570133209229,grad_norm: 0.8987326265276459, iteration: 246770
loss: 1.0237739086151123,grad_norm: 0.8797810433945609, iteration: 246771
loss: 0.9875894784927368,grad_norm: 0.9006829124675872, iteration: 246772
loss: 0.9815454483032227,grad_norm: 0.9193332403719255, iteration: 246773
loss: 1.010332703590393,grad_norm: 0.8271636777430209, iteration: 246774
loss: 0.9990886449813843,grad_norm: 0.9818297165310345, iteration: 246775
loss: 0.9688453674316406,grad_norm: 0.9999991830669396, iteration: 246776
loss: 1.0107206106185913,grad_norm: 0.8307767232044215, iteration: 246777
loss: 1.024665355682373,grad_norm: 0.910076831981123, iteration: 246778
loss: 1.1192384958267212,grad_norm: 0.9999994390028314, iteration: 246779
loss: 0.9552472829818726,grad_norm: 0.86040435544302, iteration: 246780
loss: 0.9947430491447449,grad_norm: 0.7490016551423366, iteration: 246781
loss: 1.023688554763794,grad_norm: 0.984459235856348, iteration: 246782
loss: 0.9716029763221741,grad_norm: 0.8360770910634682, iteration: 246783
loss: 1.0079537630081177,grad_norm: 0.9999990754995404, iteration: 246784
loss: 1.0036121606826782,grad_norm: 0.9098662244057358, iteration: 246785
loss: 0.9908933043479919,grad_norm: 0.853432013481391, iteration: 246786
loss: 0.9837877154350281,grad_norm: 0.9999991486392277, iteration: 246787
loss: 1.0099631547927856,grad_norm: 0.7920830690942415, iteration: 246788
loss: 0.9980036616325378,grad_norm: 0.8614239485717916, iteration: 246789
loss: 0.9896234273910522,grad_norm: 0.9999994283983266, iteration: 246790
loss: 1.004996418952942,grad_norm: 0.9311700193625699, iteration: 246791
loss: 0.9270144701004028,grad_norm: 0.9999996935152273, iteration: 246792
loss: 0.9819508790969849,grad_norm: 0.8960251796542577, iteration: 246793
loss: 1.003064513206482,grad_norm: 0.9132165697141817, iteration: 246794
loss: 0.987543523311615,grad_norm: 0.8604108021579556, iteration: 246795
loss: 0.9653246402740479,grad_norm: 0.7852805158074028, iteration: 246796
loss: 0.9919571876525879,grad_norm: 0.9365133099484684, iteration: 246797
loss: 0.977024257183075,grad_norm: 0.9999991639146365, iteration: 246798
loss: 0.9929383397102356,grad_norm: 0.9999990658730212, iteration: 246799
loss: 1.0070350170135498,grad_norm: 0.85490019290443, iteration: 246800
loss: 1.0215142965316772,grad_norm: 0.9902924959435058, iteration: 246801
loss: 1.0691828727722168,grad_norm: 0.9389047517563616, iteration: 246802
loss: 1.000253677368164,grad_norm: 0.700753140864303, iteration: 246803
loss: 1.0369205474853516,grad_norm: 0.9554812143708804, iteration: 246804
loss: 1.0047087669372559,grad_norm: 0.8661855882543312, iteration: 246805
loss: 0.985696017742157,grad_norm: 0.729170084220657, iteration: 246806
loss: 1.0009346008300781,grad_norm: 0.8362886327504184, iteration: 246807
loss: 0.9668582081794739,grad_norm: 0.9999992000975809, iteration: 246808
loss: 0.9849221110343933,grad_norm: 0.8039097993534746, iteration: 246809
loss: 0.9665501117706299,grad_norm: 0.8568756793517569, iteration: 246810
loss: 1.018524408340454,grad_norm: 0.8521209092898674, iteration: 246811
loss: 1.0486284494400024,grad_norm: 0.7869796817269235, iteration: 246812
loss: 0.9739850163459778,grad_norm: 0.9774478680820947, iteration: 246813
loss: 1.1093904972076416,grad_norm: 0.9999990692588834, iteration: 246814
loss: 0.9809455275535583,grad_norm: 0.9999989930814493, iteration: 246815
loss: 0.9777377247810364,grad_norm: 0.8688714486831041, iteration: 246816
loss: 1.1376643180847168,grad_norm: 0.9999997760682616, iteration: 246817
loss: 0.9881398677825928,grad_norm: 0.6960810471275556, iteration: 246818
loss: 0.9735989570617676,grad_norm: 0.8404885321653662, iteration: 246819
loss: 0.9870132803916931,grad_norm: 0.8958313217583692, iteration: 246820
loss: 1.01229727268219,grad_norm: 0.9443407632434988, iteration: 246821
loss: 1.0072063207626343,grad_norm: 0.8342488649940649, iteration: 246822
loss: 1.0262131690979004,grad_norm: 0.8081306656228163, iteration: 246823
loss: 1.0001447200775146,grad_norm: 0.8060094632328463, iteration: 246824
loss: 1.074432373046875,grad_norm: 0.9222757338404417, iteration: 246825
loss: 1.0009161233901978,grad_norm: 0.8579953099296439, iteration: 246826
loss: 0.9896718263626099,grad_norm: 0.9999996836929339, iteration: 246827
loss: 0.9962610602378845,grad_norm: 0.7866534017867387, iteration: 246828
loss: 1.0163806676864624,grad_norm: 0.9999992009834319, iteration: 246829
loss: 0.9953548312187195,grad_norm: 0.8536365030433662, iteration: 246830
loss: 0.9806363582611084,grad_norm: 0.7831926495783611, iteration: 246831
loss: 1.0208418369293213,grad_norm: 0.9999994225308731, iteration: 246832
loss: 0.9826218485832214,grad_norm: 0.9999990961976395, iteration: 246833
loss: 1.0203216075897217,grad_norm: 0.9209742131345858, iteration: 246834
loss: 0.9906600117683411,grad_norm: 0.7338884894782448, iteration: 246835
loss: 1.0382107496261597,grad_norm: 0.9498045420287928, iteration: 246836
loss: 0.9982643127441406,grad_norm: 0.946086876553068, iteration: 246837
loss: 0.9922881126403809,grad_norm: 0.999998988458948, iteration: 246838
loss: 1.0221772193908691,grad_norm: 0.9577195098822642, iteration: 246839
loss: 0.9821767210960388,grad_norm: 0.9652443204309589, iteration: 246840
loss: 0.9828718304634094,grad_norm: 0.9999992692396543, iteration: 246841
loss: 1.0161876678466797,grad_norm: 0.9999991493846458, iteration: 246842
loss: 1.0020954608917236,grad_norm: 0.786639145350215, iteration: 246843
loss: 1.0266271829605103,grad_norm: 0.9366875904572665, iteration: 246844
loss: 0.9887790679931641,grad_norm: 0.999999100477136, iteration: 246845
loss: 1.02468740940094,grad_norm: 0.9999993865199411, iteration: 246846
loss: 0.9824478030204773,grad_norm: 0.9677584366589886, iteration: 246847
loss: 1.0012590885162354,grad_norm: 0.9999990699331522, iteration: 246848
loss: 1.0234036445617676,grad_norm: 0.999999110052738, iteration: 246849
loss: 0.9805743098258972,grad_norm: 0.8271669288153729, iteration: 246850
loss: 0.9929599761962891,grad_norm: 0.9484726430675273, iteration: 246851
loss: 0.9784339666366577,grad_norm: 0.8380546007709526, iteration: 246852
loss: 0.9752485156059265,grad_norm: 0.8462751588977034, iteration: 246853
loss: 1.0338678359985352,grad_norm: 0.9999996282626837, iteration: 246854
loss: 0.9683882594108582,grad_norm: 0.9999991791337556, iteration: 246855
loss: 0.9695146083831787,grad_norm: 0.9230938058016347, iteration: 246856
loss: 0.9925699830055237,grad_norm: 0.9363922098112379, iteration: 246857
loss: 0.9467952847480774,grad_norm: 0.8794563820545207, iteration: 246858
loss: 0.9918529391288757,grad_norm: 0.9334735328768945, iteration: 246859
loss: 1.0606740713119507,grad_norm: 0.9999991446626054, iteration: 246860
loss: 0.9867137670516968,grad_norm: 0.939992481215066, iteration: 246861
loss: 1.034128189086914,grad_norm: 0.926279420311453, iteration: 246862
loss: 1.0262768268585205,grad_norm: 0.7866127376981441, iteration: 246863
loss: 0.9766291379928589,grad_norm: 0.8326414372142381, iteration: 246864
loss: 0.9640869498252869,grad_norm: 0.8117168684815623, iteration: 246865
loss: 0.9989174604415894,grad_norm: 0.8914785654927241, iteration: 246866
loss: 1.0207490921020508,grad_norm: 0.8755740947097095, iteration: 246867
loss: 1.022337555885315,grad_norm: 0.8693243566861851, iteration: 246868
loss: 0.9909425377845764,grad_norm: 0.7679606450024904, iteration: 246869
loss: 0.9919551610946655,grad_norm: 0.8965639948769348, iteration: 246870
loss: 1.0065010786056519,grad_norm: 0.9894248293217807, iteration: 246871
loss: 0.9986576437950134,grad_norm: 0.8673493394371291, iteration: 246872
loss: 0.973992109298706,grad_norm: 0.9860595205564722, iteration: 246873
loss: 0.9793521761894226,grad_norm: 0.8694387008594296, iteration: 246874
loss: 0.9580183625221252,grad_norm: 0.7688751654644985, iteration: 246875
loss: 0.9981163740158081,grad_norm: 0.9999991744110803, iteration: 246876
loss: 0.9559372663497925,grad_norm: 0.8387678681022679, iteration: 246877
loss: 1.0005803108215332,grad_norm: 0.9338387742150079, iteration: 246878
loss: 1.0148632526397705,grad_norm: 0.8877036634753706, iteration: 246879
loss: 1.0536013841629028,grad_norm: 0.9160491909213342, iteration: 246880
loss: 1.0245503187179565,grad_norm: 0.999999262895446, iteration: 246881
loss: 0.9913498163223267,grad_norm: 0.8048054155231512, iteration: 246882
loss: 1.01449453830719,grad_norm: 0.8267640891383905, iteration: 246883
loss: 0.9910476803779602,grad_norm: 0.8890880632301275, iteration: 246884
loss: 1.0204899311065674,grad_norm: 0.8679381288934702, iteration: 246885
loss: 0.977078378200531,grad_norm: 0.8113701855057904, iteration: 246886
loss: 1.0206482410430908,grad_norm: 0.8357934912650665, iteration: 246887
loss: 0.9869025945663452,grad_norm: 0.9999989676628714, iteration: 246888
loss: 0.9525993466377258,grad_norm: 0.993641555201198, iteration: 246889
loss: 1.0629236698150635,grad_norm: 0.9999991085629224, iteration: 246890
loss: 0.9857465028762817,grad_norm: 0.9999990537403362, iteration: 246891
loss: 0.9774976372718811,grad_norm: 0.9017726388674309, iteration: 246892
loss: 1.0032269954681396,grad_norm: 0.9961829441218188, iteration: 246893
loss: 0.9989137649536133,grad_norm: 0.9339417443728849, iteration: 246894
loss: 0.9872472286224365,grad_norm: 0.9999990967743061, iteration: 246895
loss: 1.001642107963562,grad_norm: 0.9204714546455404, iteration: 246896
loss: 1.0039366483688354,grad_norm: 0.9962199286998961, iteration: 246897
loss: 1.0309815406799316,grad_norm: 0.9891769994647134, iteration: 246898
loss: 0.9908086657524109,grad_norm: 0.9262202576407417, iteration: 246899
loss: 1.0269062519073486,grad_norm: 0.9472620860040508, iteration: 246900
loss: 0.999210000038147,grad_norm: 0.8494276034292678, iteration: 246901
loss: 0.9673665761947632,grad_norm: 0.8689310696400769, iteration: 246902
loss: 0.9888263940811157,grad_norm: 0.844710518181138, iteration: 246903
loss: 0.9825647473335266,grad_norm: 0.9999991843137962, iteration: 246904
loss: 0.9991857409477234,grad_norm: 0.9999999021891405, iteration: 246905
loss: 0.965814471244812,grad_norm: 0.8171958069890595, iteration: 246906
loss: 1.0182557106018066,grad_norm: 0.9399145601548335, iteration: 246907
loss: 0.9984079003334045,grad_norm: 0.8389668644663697, iteration: 246908
loss: 1.006481647491455,grad_norm: 0.9999992039009306, iteration: 246909
loss: 1.026332139968872,grad_norm: 0.88456385764939, iteration: 246910
loss: 1.0171810388565063,grad_norm: 0.7975917899220962, iteration: 246911
loss: 0.9915207028388977,grad_norm: 0.9999989600136118, iteration: 246912
loss: 0.9467014074325562,grad_norm: 0.8815423223185574, iteration: 246913
loss: 0.9813687205314636,grad_norm: 0.9999990026948391, iteration: 246914
loss: 1.011401653289795,grad_norm: 0.9281490809171945, iteration: 246915
loss: 0.9972957968711853,grad_norm: 0.9792550835855457, iteration: 246916
loss: 0.9944912791252136,grad_norm: 0.8402609000750841, iteration: 246917
loss: 0.9907914400100708,grad_norm: 0.9999991918030409, iteration: 246918
loss: 0.9913209676742554,grad_norm: 0.8071453524552247, iteration: 246919
loss: 1.022657871246338,grad_norm: 0.9999990359146252, iteration: 246920
loss: 1.0575578212738037,grad_norm: 0.999999970067733, iteration: 246921
loss: 0.9911918640136719,grad_norm: 0.9705779458861564, iteration: 246922
loss: 0.9544478058815002,grad_norm: 0.8978674340078008, iteration: 246923
loss: 1.0067931413650513,grad_norm: 0.7996489085387661, iteration: 246924
loss: 0.9792832136154175,grad_norm: 0.7164805708965368, iteration: 246925
loss: 0.9916203618049622,grad_norm: 0.7878945726147704, iteration: 246926
loss: 1.0897932052612305,grad_norm: 0.9999990910421033, iteration: 246927
loss: 1.0003912448883057,grad_norm: 0.86516331707106, iteration: 246928
loss: 1.0241868495941162,grad_norm: 0.8514161580178856, iteration: 246929
loss: 0.9474765062332153,grad_norm: 0.9416795863459756, iteration: 246930
loss: 0.990597128868103,grad_norm: 0.8477922141562229, iteration: 246931
loss: 0.9846213459968567,grad_norm: 0.8969301966412749, iteration: 246932
loss: 0.9869130253791809,grad_norm: 0.9707452430795827, iteration: 246933
loss: 0.9754359126091003,grad_norm: 0.9572641889186885, iteration: 246934
loss: 1.1522990465164185,grad_norm: 0.9999992818598845, iteration: 246935
loss: 0.9744778871536255,grad_norm: 0.9451012037881782, iteration: 246936
loss: 0.9798541069030762,grad_norm: 0.9160044952413371, iteration: 246937
loss: 0.987933337688446,grad_norm: 0.8485808931039123, iteration: 246938
loss: 0.9902304410934448,grad_norm: 0.8887927955012338, iteration: 246939
loss: 0.9490071535110474,grad_norm: 0.8364402488679489, iteration: 246940
loss: 1.0061931610107422,grad_norm: 0.8999512789814128, iteration: 246941
loss: 0.9873395562171936,grad_norm: 0.9999991082429293, iteration: 246942
loss: 1.0036399364471436,grad_norm: 0.9999993558268414, iteration: 246943
loss: 1.0322506427764893,grad_norm: 0.9759563578951134, iteration: 246944
loss: 1.0016701221466064,grad_norm: 0.9501463951287704, iteration: 246945
loss: 0.9830313920974731,grad_norm: 0.781261681369333, iteration: 246946
loss: 1.001448392868042,grad_norm: 0.8658887139280147, iteration: 246947
loss: 1.00373375415802,grad_norm: 0.8826942138387399, iteration: 246948
loss: 0.9365641474723816,grad_norm: 0.9381768766816305, iteration: 246949
loss: 0.9736401438713074,grad_norm: 0.999999121955654, iteration: 246950
loss: 1.0146514177322388,grad_norm: 0.9880220477238513, iteration: 246951
loss: 0.9694325923919678,grad_norm: 0.999999044487879, iteration: 246952
loss: 1.0212950706481934,grad_norm: 0.9828368853730116, iteration: 246953
loss: 0.9565097689628601,grad_norm: 0.8906699305882694, iteration: 246954
loss: 0.9712589979171753,grad_norm: 0.7789464341533576, iteration: 246955
loss: 0.9985073804855347,grad_norm: 0.9999991071001311, iteration: 246956
loss: 0.9740817546844482,grad_norm: 0.9623573588785718, iteration: 246957
loss: 0.9881830215454102,grad_norm: 0.9999989717301032, iteration: 246958
loss: 1.0415610074996948,grad_norm: 0.9114079932522676, iteration: 246959
loss: 1.0011980533599854,grad_norm: 0.9670425076680225, iteration: 246960
loss: 0.9965214133262634,grad_norm: 0.9915693886910827, iteration: 246961
loss: 1.0182355642318726,grad_norm: 0.9665140090636781, iteration: 246962
loss: 1.0368132591247559,grad_norm: 0.999998966171918, iteration: 246963
loss: 1.0458049774169922,grad_norm: 0.9999995126464097, iteration: 246964
loss: 0.9817323684692383,grad_norm: 0.9323180543044891, iteration: 246965
loss: 1.0199915170669556,grad_norm: 0.9999991010502011, iteration: 246966
loss: 0.995724618434906,grad_norm: 0.9922327706361619, iteration: 246967
loss: 1.0286692380905151,grad_norm: 0.8991633904199208, iteration: 246968
loss: 1.026323914527893,grad_norm: 0.9999992369657746, iteration: 246969
loss: 1.1666723489761353,grad_norm: 0.999999214645945, iteration: 246970
loss: 0.9816450476646423,grad_norm: 0.861075350076141, iteration: 246971
loss: 0.9991531372070312,grad_norm: 0.8997095757553859, iteration: 246972
loss: 0.9634740352630615,grad_norm: 0.9662989196264614, iteration: 246973
loss: 0.9942154884338379,grad_norm: 0.9999993904786401, iteration: 246974
loss: 0.9702566266059875,grad_norm: 0.8223027624615403, iteration: 246975
loss: 1.0048892498016357,grad_norm: 0.914717674553979, iteration: 246976
loss: 0.980725884437561,grad_norm: 0.8609332337571954, iteration: 246977
loss: 0.9946693778038025,grad_norm: 0.9607255871950613, iteration: 246978
loss: 0.9883393049240112,grad_norm: 0.9999990521238468, iteration: 246979
loss: 0.9757106900215149,grad_norm: 0.9999992939896961, iteration: 246980
loss: 0.9847900867462158,grad_norm: 0.9813392068368003, iteration: 246981
loss: 1.0790760517120361,grad_norm: 0.9383593286944591, iteration: 246982
loss: 1.0668448209762573,grad_norm: 0.913659390099468, iteration: 246983
loss: 0.9638054370880127,grad_norm: 0.8535704936597118, iteration: 246984
loss: 0.9897675514221191,grad_norm: 0.838537698251158, iteration: 246985
loss: 0.9893119931221008,grad_norm: 0.9999990909673248, iteration: 246986
loss: 0.9706840515136719,grad_norm: 0.8359106377821233, iteration: 246987
loss: 1.0245999097824097,grad_norm: 0.8408193893839573, iteration: 246988
loss: 1.0034215450286865,grad_norm: 0.988324229422636, iteration: 246989
loss: 1.0585771799087524,grad_norm: 0.9999994080241872, iteration: 246990
loss: 1.0037274360656738,grad_norm: 0.9224250772258591, iteration: 246991
loss: 1.005367636680603,grad_norm: 0.7822173214773518, iteration: 246992
loss: 0.9832242727279663,grad_norm: 0.7010564364968841, iteration: 246993
loss: 1.051098108291626,grad_norm: 0.9999991985292499, iteration: 246994
loss: 1.0299073457717896,grad_norm: 0.9999993230961408, iteration: 246995
loss: 1.0000509023666382,grad_norm: 0.924467680735634, iteration: 246996
loss: 0.982075035572052,grad_norm: 0.8242147128183062, iteration: 246997
loss: 1.0346341133117676,grad_norm: 0.982845654733423, iteration: 246998
loss: 1.0221344232559204,grad_norm: 0.7205833155555496, iteration: 246999
loss: 0.9752887487411499,grad_norm: 0.9431662725110386, iteration: 247000
loss: 1.0652676820755005,grad_norm: 0.9999993786624998, iteration: 247001
loss: 1.024487018585205,grad_norm: 0.8297584261314382, iteration: 247002
loss: 1.0386545658111572,grad_norm: 0.8645087040410798, iteration: 247003
loss: 0.9815495610237122,grad_norm: 0.8085501557266171, iteration: 247004
loss: 1.00466787815094,grad_norm: 0.9923355543318403, iteration: 247005
loss: 0.9833511710166931,grad_norm: 0.8184540779693982, iteration: 247006
loss: 0.9780677556991577,grad_norm: 0.9120392202017451, iteration: 247007
loss: 0.9962841272354126,grad_norm: 0.9246552103590798, iteration: 247008
loss: 1.0001111030578613,grad_norm: 0.8918491946320815, iteration: 247009
loss: 0.98637455701828,grad_norm: 0.8464533397969385, iteration: 247010
loss: 1.0068004131317139,grad_norm: 0.8617023940546664, iteration: 247011
loss: 1.0334705114364624,grad_norm: 0.9658649667890761, iteration: 247012
loss: 1.0206866264343262,grad_norm: 0.8469557392852992, iteration: 247013
loss: 0.9997381567955017,grad_norm: 0.8450084279899437, iteration: 247014
loss: 1.0970600843429565,grad_norm: 0.9284325358872981, iteration: 247015
loss: 1.2693846225738525,grad_norm: 0.9999998975295742, iteration: 247016
loss: 0.9798046946525574,grad_norm: 0.7920374138102007, iteration: 247017
loss: 0.9796271920204163,grad_norm: 0.882720302000332, iteration: 247018
loss: 1.0170238018035889,grad_norm: 0.8141767312375408, iteration: 247019
loss: 0.9639878273010254,grad_norm: 0.9999989625750201, iteration: 247020
loss: 0.986158549785614,grad_norm: 0.796878389757559, iteration: 247021
loss: 0.9918120503425598,grad_norm: 0.9999991723233499, iteration: 247022
loss: 1.0138342380523682,grad_norm: 0.7668598473458157, iteration: 247023
loss: 0.9996707439422607,grad_norm: 0.9999989015307631, iteration: 247024
loss: 0.990101158618927,grad_norm: 0.9268762673163121, iteration: 247025
loss: 0.9893233180046082,grad_norm: 0.8453081571172937, iteration: 247026
loss: 1.0359525680541992,grad_norm: 0.874713387033267, iteration: 247027
loss: 1.0223500728607178,grad_norm: 0.8983682272724733, iteration: 247028
loss: 0.966792106628418,grad_norm: 0.970394675178325, iteration: 247029
loss: 1.0120619535446167,grad_norm: 0.8430945669943233, iteration: 247030
loss: 0.9775670170783997,grad_norm: 0.9614479564024273, iteration: 247031
loss: 1.0174641609191895,grad_norm: 0.8890003671773414, iteration: 247032
loss: 0.9869368076324463,grad_norm: 0.9999991553460809, iteration: 247033
loss: 1.0049601793289185,grad_norm: 0.9999991713580375, iteration: 247034
loss: 1.0023969411849976,grad_norm: 0.7968399480326889, iteration: 247035
loss: 1.002866268157959,grad_norm: 0.8759189942237185, iteration: 247036
loss: 0.9915143251419067,grad_norm: 0.9999991194883453, iteration: 247037
loss: 0.9676201939582825,grad_norm: 0.7428375239879041, iteration: 247038
loss: 0.9994123578071594,grad_norm: 0.9914974552913227, iteration: 247039
loss: 1.0042728185653687,grad_norm: 0.9999992206345933, iteration: 247040
loss: 0.9708171486854553,grad_norm: 0.8944479272976518, iteration: 247041
loss: 0.9940495491027832,grad_norm: 0.8458597550283676, iteration: 247042
loss: 1.0191940069198608,grad_norm: 0.9696759088241291, iteration: 247043
loss: 1.0113955736160278,grad_norm: 0.8055553505043094, iteration: 247044
loss: 1.0268017053604126,grad_norm: 0.8449617766830227, iteration: 247045
loss: 1.0300859212875366,grad_norm: 0.8129538326670257, iteration: 247046
loss: 1.013087511062622,grad_norm: 0.9593744313789613, iteration: 247047
loss: 0.9576933979988098,grad_norm: 0.9145214322900892, iteration: 247048
loss: 1.0062578916549683,grad_norm: 0.7469524756645667, iteration: 247049
loss: 1.0072226524353027,grad_norm: 0.9877304636075479, iteration: 247050
loss: 0.9853073954582214,grad_norm: 0.8918645897745162, iteration: 247051
loss: 0.9871000051498413,grad_norm: 0.9999991959929386, iteration: 247052
loss: 0.9889952540397644,grad_norm: 0.8348320837959091, iteration: 247053
loss: 0.9880696535110474,grad_norm: 0.9999990608842646, iteration: 247054
loss: 1.0297223329544067,grad_norm: 0.999999139182416, iteration: 247055
loss: 1.0224428176879883,grad_norm: 0.808024100293692, iteration: 247056
loss: 0.9714446067810059,grad_norm: 0.9069286600759617, iteration: 247057
loss: 1.0135915279388428,grad_norm: 0.9352877444505769, iteration: 247058
loss: 0.9856641292572021,grad_norm: 0.9891580927696705, iteration: 247059
loss: 1.018232822418213,grad_norm: 0.9406628712090452, iteration: 247060
loss: 1.0313374996185303,grad_norm: 0.8774297322393034, iteration: 247061
loss: 0.9813997745513916,grad_norm: 0.9237986726802586, iteration: 247062
loss: 1.0274983644485474,grad_norm: 0.9999990115747157, iteration: 247063
loss: 1.0032461881637573,grad_norm: 0.8574645813799003, iteration: 247064
loss: 1.0523430109024048,grad_norm: 0.9999997331003375, iteration: 247065
loss: 0.9993085861206055,grad_norm: 0.9063059716718874, iteration: 247066
loss: 1.004410982131958,grad_norm: 0.7604580563460327, iteration: 247067
loss: 0.9886292815208435,grad_norm: 0.8611799027196043, iteration: 247068
loss: 0.9712885022163391,grad_norm: 0.9130974470864218, iteration: 247069
loss: 0.9706246256828308,grad_norm: 0.9214985292692798, iteration: 247070
loss: 1.0032044649124146,grad_norm: 0.9772851360239101, iteration: 247071
loss: 0.9862549901008606,grad_norm: 0.9999989380971301, iteration: 247072
loss: 1.04673171043396,grad_norm: 0.90281751782049, iteration: 247073
loss: 1.0105620622634888,grad_norm: 0.7594310246812795, iteration: 247074
loss: 1.0188359022140503,grad_norm: 0.872764081500496, iteration: 247075
loss: 0.9829005002975464,grad_norm: 0.8581136681493026, iteration: 247076
loss: 1.032341480255127,grad_norm: 0.8484352782353821, iteration: 247077
loss: 1.0261353254318237,grad_norm: 0.9374254274547712, iteration: 247078
loss: 0.9822983145713806,grad_norm: 0.8979203625348101, iteration: 247079
loss: 0.9918601512908936,grad_norm: 0.8746778134617296, iteration: 247080
loss: 0.9710909128189087,grad_norm: 0.7574020987424469, iteration: 247081
loss: 1.0404351949691772,grad_norm: 0.9999992000176995, iteration: 247082
loss: 0.9953455328941345,grad_norm: 0.8733645059647523, iteration: 247083
loss: 1.0241024494171143,grad_norm: 0.7995275217097786, iteration: 247084
loss: 0.9847633242607117,grad_norm: 0.9999993271258205, iteration: 247085
loss: 0.9843681454658508,grad_norm: 0.9391227161821263, iteration: 247086
loss: 0.982099175453186,grad_norm: 0.9543405682679031, iteration: 247087
loss: 0.960934042930603,grad_norm: 0.9830878776177678, iteration: 247088
loss: 0.9860119819641113,grad_norm: 0.9999990923742835, iteration: 247089
loss: 1.005811333656311,grad_norm: 0.9129862711139302, iteration: 247090
loss: 0.970093846321106,grad_norm: 0.7880608466451138, iteration: 247091
loss: 0.9970512986183167,grad_norm: 0.9557322455472684, iteration: 247092
loss: 0.9933011531829834,grad_norm: 0.8213695796986249, iteration: 247093
loss: 0.9854381680488586,grad_norm: 0.8116534859760592, iteration: 247094
loss: 0.9951397180557251,grad_norm: 0.9999990631253193, iteration: 247095
loss: 0.9808506369590759,grad_norm: 0.8119361375338472, iteration: 247096
loss: 0.9869036674499512,grad_norm: 0.9589557642529161, iteration: 247097
loss: 0.9866019487380981,grad_norm: 0.9999989755225791, iteration: 247098
loss: 1.0010857582092285,grad_norm: 0.9999990383344224, iteration: 247099
loss: 0.9837473630905151,grad_norm: 0.9594878428709539, iteration: 247100
loss: 0.9479528665542603,grad_norm: 0.8542843843108513, iteration: 247101
loss: 1.0083507299423218,grad_norm: 0.9999990195668119, iteration: 247102
loss: 0.9932910203933716,grad_norm: 0.864705045410614, iteration: 247103
loss: 1.0617682933807373,grad_norm: 0.9999990855843395, iteration: 247104
loss: 1.028214454650879,grad_norm: 0.897621818892919, iteration: 247105
loss: 0.9660677909851074,grad_norm: 0.7439193375090273, iteration: 247106
loss: 0.9954879879951477,grad_norm: 0.9999991850425206, iteration: 247107
loss: 1.003675103187561,grad_norm: 0.9627800259070401, iteration: 247108
loss: 1.0135964155197144,grad_norm: 0.9999990536363405, iteration: 247109
loss: 1.0060055255889893,grad_norm: 0.8893790650532558, iteration: 247110
loss: 0.9756680130958557,grad_norm: 0.9737306748400312, iteration: 247111
loss: 0.986771821975708,grad_norm: 0.9009881789866176, iteration: 247112
loss: 1.0003881454467773,grad_norm: 0.9575755419623267, iteration: 247113
loss: 0.9605686068534851,grad_norm: 0.9809844145813693, iteration: 247114
loss: 0.9630352258682251,grad_norm: 0.7690505181899895, iteration: 247115
loss: 0.9936197400093079,grad_norm: 0.9999991498221514, iteration: 247116
loss: 0.9838710427284241,grad_norm: 0.9394814655517921, iteration: 247117
loss: 1.0014121532440186,grad_norm: 0.9722853719277387, iteration: 247118
loss: 1.0254919528961182,grad_norm: 0.9999995097669823, iteration: 247119
loss: 1.0082601308822632,grad_norm: 0.9999998025585057, iteration: 247120
loss: 0.9972097277641296,grad_norm: 0.9999990971329875, iteration: 247121
loss: 1.004300832748413,grad_norm: 0.958956557561963, iteration: 247122
loss: 0.9368531107902527,grad_norm: 0.881140421003725, iteration: 247123
loss: 1.0042345523834229,grad_norm: 0.9999989765153213, iteration: 247124
loss: 0.9891086220741272,grad_norm: 0.9999991487237295, iteration: 247125
loss: 1.0141456127166748,grad_norm: 0.8456145834571563, iteration: 247126
loss: 0.9859505295753479,grad_norm: 0.9138529130178243, iteration: 247127
loss: 0.9895781874656677,grad_norm: 0.9999990673036009, iteration: 247128
loss: 1.0075408220291138,grad_norm: 0.9999990961568885, iteration: 247129
loss: 1.014143466949463,grad_norm: 0.8474923899270007, iteration: 247130
loss: 0.9894115924835205,grad_norm: 0.8745296483409026, iteration: 247131
loss: 1.0003060102462769,grad_norm: 0.8701202789422735, iteration: 247132
loss: 0.997317373752594,grad_norm: 0.9167940489821921, iteration: 247133
loss: 0.9538428783416748,grad_norm: 0.7972365961310524, iteration: 247134
loss: 1.0152790546417236,grad_norm: 0.9999990738477001, iteration: 247135
loss: 1.008434534072876,grad_norm: 0.9675749117388426, iteration: 247136
loss: 0.9989249110221863,grad_norm: 0.999999170861333, iteration: 247137
loss: 0.9880532026290894,grad_norm: 0.9923765824068066, iteration: 247138
loss: 0.9683994054794312,grad_norm: 0.9390704104390818, iteration: 247139
loss: 0.9986146688461304,grad_norm: 0.9638249735282336, iteration: 247140
loss: 0.9941931962966919,grad_norm: 0.887019278290496, iteration: 247141
loss: 0.9953287243843079,grad_norm: 0.9440814041884038, iteration: 247142
loss: 1.0061686038970947,grad_norm: 0.8924706765526813, iteration: 247143
loss: 1.0132499933242798,grad_norm: 0.9864920875031156, iteration: 247144
loss: 1.0000369548797607,grad_norm: 0.8736526440065718, iteration: 247145
loss: 1.0075719356536865,grad_norm: 0.9999990969879997, iteration: 247146
loss: 1.0199944972991943,grad_norm: 0.9999991736580105, iteration: 247147
loss: 0.9787716269493103,grad_norm: 0.9833215062155225, iteration: 247148
loss: 0.9591552019119263,grad_norm: 0.8215959344234033, iteration: 247149
loss: 1.084688663482666,grad_norm: 0.9999999205006566, iteration: 247150
loss: 0.9861279129981995,grad_norm: 0.9070601137467921, iteration: 247151
loss: 0.9920292496681213,grad_norm: 0.7795407350202525, iteration: 247152
loss: 0.9481759667396545,grad_norm: 0.9589391632088297, iteration: 247153
loss: 1.0261433124542236,grad_norm: 0.9999994207524113, iteration: 247154
loss: 1.0416103601455688,grad_norm: 0.855230854545851, iteration: 247155
loss: 1.0092610120773315,grad_norm: 0.874161435070362, iteration: 247156
loss: 0.973985493183136,grad_norm: 0.9250677242115803, iteration: 247157
loss: 0.9879015684127808,grad_norm: 0.8190048560600105, iteration: 247158
loss: 0.9758639931678772,grad_norm: 0.9762217409844377, iteration: 247159
loss: 0.9714236259460449,grad_norm: 0.999999047176792, iteration: 247160
loss: 0.9607911109924316,grad_norm: 0.8892222217612136, iteration: 247161
loss: 1.0089443922042847,grad_norm: 0.9762742657401824, iteration: 247162
loss: 1.0080689191818237,grad_norm: 0.9999990544810288, iteration: 247163
loss: 1.093011736869812,grad_norm: 0.999999068628751, iteration: 247164
loss: 1.0219800472259521,grad_norm: 0.9999990057546961, iteration: 247165
loss: 0.9948053956031799,grad_norm: 0.8759060241545208, iteration: 247166
loss: 0.9629753232002258,grad_norm: 0.888732464366577, iteration: 247167
loss: 0.9865553379058838,grad_norm: 0.7989130119058712, iteration: 247168
loss: 1.037104845046997,grad_norm: 0.9999989452476492, iteration: 247169
loss: 1.0071238279342651,grad_norm: 0.9328358507369483, iteration: 247170
loss: 0.9800294041633606,grad_norm: 0.8842286319690201, iteration: 247171
loss: 1.0035609006881714,grad_norm: 0.8775819084370629, iteration: 247172
loss: 1.036961555480957,grad_norm: 0.8379146356635048, iteration: 247173
loss: 1.021146535873413,grad_norm: 0.8658929189618745, iteration: 247174
loss: 0.9884498119354248,grad_norm: 0.8154447499184023, iteration: 247175
loss: 0.9826694130897522,grad_norm: 0.8284342119495046, iteration: 247176
loss: 0.9902892708778381,grad_norm: 0.9194197554028317, iteration: 247177
loss: 0.988274335861206,grad_norm: 0.8890638108935751, iteration: 247178
loss: 0.9928363561630249,grad_norm: 0.9128146482058287, iteration: 247179
loss: 1.0213208198547363,grad_norm: 0.9240501992571977, iteration: 247180
loss: 0.9653124809265137,grad_norm: 0.859211411115091, iteration: 247181
loss: 1.0186593532562256,grad_norm: 0.9940438377893565, iteration: 247182
loss: 0.9898295998573303,grad_norm: 0.8354980461564457, iteration: 247183
loss: 1.0085374116897583,grad_norm: 0.8800320250207658, iteration: 247184
loss: 0.9679346680641174,grad_norm: 0.7491617686953072, iteration: 247185
loss: 0.9946342706680298,grad_norm: 0.7954157439308384, iteration: 247186
loss: 1.0689116716384888,grad_norm: 0.9381143299564295, iteration: 247187
loss: 1.0013949871063232,grad_norm: 0.8597485617594743, iteration: 247188
loss: 1.0088804960250854,grad_norm: 0.9999991562417293, iteration: 247189
loss: 1.0010404586791992,grad_norm: 0.8183645765792129, iteration: 247190
loss: 0.969273030757904,grad_norm: 0.8417384040591865, iteration: 247191
loss: 0.9908638000488281,grad_norm: 0.9334753637146347, iteration: 247192
loss: 1.0224424600601196,grad_norm: 0.7597314904519265, iteration: 247193
loss: 1.013278841972351,grad_norm: 0.9542053155112322, iteration: 247194
loss: 1.0339574813842773,grad_norm: 0.8018817147523197, iteration: 247195
loss: 1.0322959423065186,grad_norm: 0.9294899968423083, iteration: 247196
loss: 0.9977598786354065,grad_norm: 0.9999991911748787, iteration: 247197
loss: 0.9898091554641724,grad_norm: 0.6985428327479527, iteration: 247198
loss: 0.9539340138435364,grad_norm: 0.978379924967925, iteration: 247199
loss: 1.023239254951477,grad_norm: 0.9174611597681711, iteration: 247200
loss: 0.9835586547851562,grad_norm: 0.9773973936425429, iteration: 247201
loss: 1.0170530080795288,grad_norm: 0.9999992645498017, iteration: 247202
loss: 0.9811461567878723,grad_norm: 0.818136973005413, iteration: 247203
loss: 0.9845201373100281,grad_norm: 0.7413624610071483, iteration: 247204
loss: 0.980627179145813,grad_norm: 0.8852140764582617, iteration: 247205
loss: 1.0161161422729492,grad_norm: 0.9999991792517802, iteration: 247206
loss: 0.99604731798172,grad_norm: 0.8118072791742506, iteration: 247207
loss: 1.017440676689148,grad_norm: 0.8611711056141627, iteration: 247208
loss: 0.9736951589584351,grad_norm: 0.9999991092576697, iteration: 247209
loss: 0.9943655133247375,grad_norm: 0.9258162325382346, iteration: 247210
loss: 0.9998642206192017,grad_norm: 0.8226778194080189, iteration: 247211
loss: 1.0382254123687744,grad_norm: 0.9273451064803137, iteration: 247212
loss: 0.9930768013000488,grad_norm: 0.951837346388315, iteration: 247213
loss: 1.0205066204071045,grad_norm: 0.828330273904266, iteration: 247214
loss: 1.0466066598892212,grad_norm: 0.9791533968384677, iteration: 247215
loss: 1.0182055234909058,grad_norm: 0.9999991028286207, iteration: 247216
loss: 1.037819504737854,grad_norm: 0.9999991861080414, iteration: 247217
loss: 0.9620351791381836,grad_norm: 0.8166393418006611, iteration: 247218
loss: 0.9861645102500916,grad_norm: 0.9999989747707095, iteration: 247219
loss: 1.014803409576416,grad_norm: 0.8182306191934012, iteration: 247220
loss: 1.0008397102355957,grad_norm: 0.8231297313424136, iteration: 247221
loss: 1.0025339126586914,grad_norm: 0.9999989190172066, iteration: 247222
loss: 1.0356756448745728,grad_norm: 0.9999992648947876, iteration: 247223
loss: 0.9936898350715637,grad_norm: 0.9777264046902306, iteration: 247224
loss: 0.9835605621337891,grad_norm: 0.90289196731809, iteration: 247225
loss: 1.1194181442260742,grad_norm: 0.9999993454916658, iteration: 247226
loss: 0.9884454607963562,grad_norm: 0.9999992878962753, iteration: 247227
loss: 0.9704850912094116,grad_norm: 0.9513370345887617, iteration: 247228
loss: 1.025497317314148,grad_norm: 0.8406866210473775, iteration: 247229
loss: 0.9963948130607605,grad_norm: 0.7496695789937079, iteration: 247230
loss: 1.0102766752243042,grad_norm: 0.9299360715327917, iteration: 247231
loss: 1.029680609703064,grad_norm: 0.9999993476994032, iteration: 247232
loss: 1.011021614074707,grad_norm: 0.9999990875778563, iteration: 247233
loss: 1.0061469078063965,grad_norm: 0.8404300732185539, iteration: 247234
loss: 0.9867238402366638,grad_norm: 0.9198353393873737, iteration: 247235
loss: 1.0140846967697144,grad_norm: 0.9641713739996418, iteration: 247236
loss: 0.9916163682937622,grad_norm: 0.9542377553350265, iteration: 247237
loss: 0.994689404964447,grad_norm: 0.9999992252752254, iteration: 247238
loss: 1.0056488513946533,grad_norm: 0.883014969894688, iteration: 247239
loss: 1.0070122480392456,grad_norm: 0.9999997508172458, iteration: 247240
loss: 1.0017277002334595,grad_norm: 0.9999992899212077, iteration: 247241
loss: 0.9618912935256958,grad_norm: 0.9999988984986532, iteration: 247242
loss: 0.9740996360778809,grad_norm: 0.9297971015177791, iteration: 247243
loss: 0.9966134428977966,grad_norm: 0.7524993591385211, iteration: 247244
loss: 1.0223698616027832,grad_norm: 0.9549780726406017, iteration: 247245
loss: 0.9761602878570557,grad_norm: 0.8696940002708267, iteration: 247246
loss: 1.0096492767333984,grad_norm: 0.8995012904711744, iteration: 247247
loss: 1.011508822441101,grad_norm: 0.8531673443783362, iteration: 247248
loss: 0.9582672119140625,grad_norm: 0.9486309162916228, iteration: 247249
loss: 0.9793161153793335,grad_norm: 0.9194981798163941, iteration: 247250
loss: 0.9770542979240417,grad_norm: 0.9632887173714594, iteration: 247251
loss: 1.011804461479187,grad_norm: 0.8385866651883847, iteration: 247252
loss: 0.9817188382148743,grad_norm: 0.9307163236452576, iteration: 247253
loss: 0.9810409545898438,grad_norm: 0.9943434338644094, iteration: 247254
loss: 0.9459708333015442,grad_norm: 0.9999991542710468, iteration: 247255
loss: 1.0144145488739014,grad_norm: 0.7872485309552628, iteration: 247256
loss: 0.9658081531524658,grad_norm: 0.999999563824292, iteration: 247257
loss: 1.0149486064910889,grad_norm: 0.9335031817383979, iteration: 247258
loss: 1.0073003768920898,grad_norm: 0.8974159675945543, iteration: 247259
loss: 0.9803758859634399,grad_norm: 0.8947845502832997, iteration: 247260
loss: 0.9904090166091919,grad_norm: 0.9302106098031548, iteration: 247261
loss: 1.01498544216156,grad_norm: 0.9019432739943536, iteration: 247262
loss: 0.9594469666481018,grad_norm: 0.7867031997724302, iteration: 247263
loss: 0.970816433429718,grad_norm: 0.8772752962918904, iteration: 247264
loss: 0.9892035722732544,grad_norm: 0.9177633620860826, iteration: 247265
loss: 0.9387986063957214,grad_norm: 0.9760521274225611, iteration: 247266
loss: 1.0172775983810425,grad_norm: 0.7837581253388948, iteration: 247267
loss: 0.9978519678115845,grad_norm: 0.9149018450188239, iteration: 247268
loss: 1.0127782821655273,grad_norm: 0.7638743312781431, iteration: 247269
loss: 0.9854357838630676,grad_norm: 0.7958910545072421, iteration: 247270
loss: 1.0044373273849487,grad_norm: 0.9811903473925961, iteration: 247271
loss: 0.9987812638282776,grad_norm: 0.9520384863216581, iteration: 247272
loss: 0.9983999133110046,grad_norm: 0.9616623335092177, iteration: 247273
loss: 0.9998099207878113,grad_norm: 0.9703248836548432, iteration: 247274
loss: 1.0223404169082642,grad_norm: 0.8963306827186271, iteration: 247275
loss: 0.9760238528251648,grad_norm: 0.8752247944029273, iteration: 247276
loss: 1.0241332054138184,grad_norm: 0.961648322630826, iteration: 247277
loss: 1.0117613077163696,grad_norm: 0.7967503327234201, iteration: 247278
loss: 1.008158802986145,grad_norm: 0.9491145536132928, iteration: 247279
loss: 1.0230783224105835,grad_norm: 0.8184503018798361, iteration: 247280
loss: 1.0092625617980957,grad_norm: 0.8787450298050509, iteration: 247281
loss: 0.9956085085868835,grad_norm: 0.9999990676784986, iteration: 247282
loss: 0.9859925508499146,grad_norm: 0.9909195767371971, iteration: 247283
loss: 0.9782406091690063,grad_norm: 0.8147435281469464, iteration: 247284
loss: 0.9797117710113525,grad_norm: 0.8479419054122064, iteration: 247285
loss: 1.0048574209213257,grad_norm: 0.8756724812841623, iteration: 247286
loss: 1.0085245370864868,grad_norm: 0.9635037087269036, iteration: 247287
loss: 1.0171117782592773,grad_norm: 0.9183501452878762, iteration: 247288
loss: 1.0439515113830566,grad_norm: 0.9999996646347414, iteration: 247289
loss: 1.017592430114746,grad_norm: 0.8616151238523914, iteration: 247290
loss: 0.9574058651924133,grad_norm: 0.8075039887287526, iteration: 247291
loss: 1.0334097146987915,grad_norm: 0.9999993486592281, iteration: 247292
loss: 1.0235625505447388,grad_norm: 0.7760814052118915, iteration: 247293
loss: 1.0395387411117554,grad_norm: 0.84410995548849, iteration: 247294
loss: 0.9864806532859802,grad_norm: 0.850065958727637, iteration: 247295
loss: 1.016281247138977,grad_norm: 0.9999990761358494, iteration: 247296
loss: 1.00232994556427,grad_norm: 0.7160158540601828, iteration: 247297
loss: 1.016958236694336,grad_norm: 0.983619069853961, iteration: 247298
loss: 0.9677565097808838,grad_norm: 0.955282506411073, iteration: 247299
loss: 1.0803524255752563,grad_norm: 0.893405246250452, iteration: 247300
loss: 0.9664695262908936,grad_norm: 0.862956352123766, iteration: 247301
loss: 1.0149035453796387,grad_norm: 0.7103260541051074, iteration: 247302
loss: 1.0261261463165283,grad_norm: 0.7845171092476702, iteration: 247303
loss: 0.9969300627708435,grad_norm: 0.6843192625107392, iteration: 247304
loss: 0.9654554724693298,grad_norm: 0.923535384477169, iteration: 247305
loss: 0.9963364601135254,grad_norm: 0.999999496730648, iteration: 247306
loss: 0.9701159596443176,grad_norm: 0.7119467791138546, iteration: 247307
loss: 1.0040594339370728,grad_norm: 0.8349572702650268, iteration: 247308
loss: 0.9983857274055481,grad_norm: 0.9234879561055591, iteration: 247309
loss: 1.0094175338745117,grad_norm: 0.9489342928954162, iteration: 247310
loss: 0.9943811297416687,grad_norm: 0.8615246304672138, iteration: 247311
loss: 1.015038251876831,grad_norm: 0.8756514486962055, iteration: 247312
loss: 0.9756495356559753,grad_norm: 0.8523976102742273, iteration: 247313
loss: 1.033463478088379,grad_norm: 0.9463314995770976, iteration: 247314
loss: 1.0217210054397583,grad_norm: 0.8708644117585899, iteration: 247315
loss: 0.9729140400886536,grad_norm: 0.8501823733409585, iteration: 247316
loss: 0.9934182167053223,grad_norm: 0.8137217083630548, iteration: 247317
loss: 1.0214923620224,grad_norm: 0.9999991640311354, iteration: 247318
loss: 0.9997182488441467,grad_norm: 0.9999990112790927, iteration: 247319
loss: 0.9975213408470154,grad_norm: 0.9641281940313989, iteration: 247320
loss: 0.9848719835281372,grad_norm: 0.8932398086668323, iteration: 247321
loss: 0.9960322380065918,grad_norm: 0.9389029785304716, iteration: 247322
loss: 1.000404953956604,grad_norm: 0.7994534822458863, iteration: 247323
loss: 0.9998522996902466,grad_norm: 0.9547035190345607, iteration: 247324
loss: 1.0693721771240234,grad_norm: 0.9999997256668954, iteration: 247325
loss: 1.1531567573547363,grad_norm: 0.9524403683277753, iteration: 247326
loss: 1.0039033889770508,grad_norm: 0.915262786889142, iteration: 247327
loss: 0.9985837340354919,grad_norm: 0.9999991594193212, iteration: 247328
loss: 1.0082697868347168,grad_norm: 0.7773172809001112, iteration: 247329
loss: 1.0000340938568115,grad_norm: 0.9745972569332626, iteration: 247330
loss: 0.9931922554969788,grad_norm: 0.8573985944214313, iteration: 247331
loss: 0.974493682384491,grad_norm: 0.8871081610117062, iteration: 247332
loss: 1.034876823425293,grad_norm: 0.9999989545353898, iteration: 247333
loss: 1.0484060049057007,grad_norm: 0.9999996654442641, iteration: 247334
loss: 1.0284196138381958,grad_norm: 0.9999990507892276, iteration: 247335
loss: 1.016527533531189,grad_norm: 0.9579303114988184, iteration: 247336
loss: 1.0438859462738037,grad_norm: 0.8994633652762762, iteration: 247337
loss: 0.9678681492805481,grad_norm: 0.8730382729527839, iteration: 247338
loss: 1.0085560083389282,grad_norm: 0.999999070110325, iteration: 247339
loss: 1.0040818452835083,grad_norm: 0.846341226621974, iteration: 247340
loss: 1.0208911895751953,grad_norm: 0.874173981344509, iteration: 247341
loss: 0.9975010752677917,grad_norm: 0.8693610686808295, iteration: 247342
loss: 1.0028144121170044,grad_norm: 0.8537152742004048, iteration: 247343
loss: 0.9803208112716675,grad_norm: 0.9943866150708646, iteration: 247344
loss: 0.9969066381454468,grad_norm: 0.8687081720663916, iteration: 247345
loss: 1.0045826435089111,grad_norm: 0.8129720952973827, iteration: 247346
loss: 0.9898808598518372,grad_norm: 0.999999273984252, iteration: 247347
loss: 1.009555459022522,grad_norm: 0.9999991360424897, iteration: 247348
loss: 0.9761403203010559,grad_norm: 0.9999991810286304, iteration: 247349
loss: 1.0185219049453735,grad_norm: 0.8707779727383618, iteration: 247350
loss: 0.9881000518798828,grad_norm: 0.8696209515173075, iteration: 247351
loss: 0.970162034034729,grad_norm: 0.8690997463251634, iteration: 247352
loss: 0.9985756874084473,grad_norm: 0.9999991408898816, iteration: 247353
loss: 1.0321638584136963,grad_norm: 0.9765952367941747, iteration: 247354
loss: 1.074783444404602,grad_norm: 0.9999997853936329, iteration: 247355
loss: 1.0224465131759644,grad_norm: 0.8286048960408431, iteration: 247356
loss: 0.9899136424064636,grad_norm: 0.8258585818920957, iteration: 247357
loss: 1.0217934846878052,grad_norm: 0.845399615312845, iteration: 247358
loss: 0.9726274609565735,grad_norm: 0.9736476885016031, iteration: 247359
loss: 1.020897626876831,grad_norm: 0.8266717990969034, iteration: 247360
loss: 1.0023248195648193,grad_norm: 0.8279815062103565, iteration: 247361
loss: 1.0024880170822144,grad_norm: 0.8048243971688797, iteration: 247362
loss: 0.9701454639434814,grad_norm: 0.9999991699095068, iteration: 247363
loss: 1.0193860530853271,grad_norm: 0.9535092485414374, iteration: 247364
loss: 0.9870370030403137,grad_norm: 0.9375915249610683, iteration: 247365
loss: 1.0195811986923218,grad_norm: 0.8657988280752272, iteration: 247366
loss: 1.0307778120040894,grad_norm: 0.7704429218780962, iteration: 247367
loss: 0.9842455983161926,grad_norm: 0.9039744112998838, iteration: 247368
loss: 0.9869328141212463,grad_norm: 0.9999994805571404, iteration: 247369
loss: 0.9846360683441162,grad_norm: 0.9208013461243382, iteration: 247370
loss: 0.9932600259780884,grad_norm: 0.8676987573128806, iteration: 247371
loss: 0.9642314910888672,grad_norm: 0.7401327258726459, iteration: 247372
loss: 1.000219702720642,grad_norm: 0.8783584024313353, iteration: 247373
loss: 0.9952382445335388,grad_norm: 0.8560205057449339, iteration: 247374
loss: 1.024860143661499,grad_norm: 0.9035130127476289, iteration: 247375
loss: 0.9941301345825195,grad_norm: 0.901538250041707, iteration: 247376
loss: 0.9541077613830566,grad_norm: 0.9759420903081627, iteration: 247377
loss: 1.015809178352356,grad_norm: 0.8147447833032448, iteration: 247378
loss: 0.9736261963844299,grad_norm: 0.9671923093212575, iteration: 247379
loss: 1.0266505479812622,grad_norm: 0.9999990386008922, iteration: 247380
loss: 1.000792145729065,grad_norm: 0.8415782883530758, iteration: 247381
loss: 0.9854705929756165,grad_norm: 0.9415636236579499, iteration: 247382
loss: 1.0305793285369873,grad_norm: 0.8089320930208584, iteration: 247383
loss: 1.0017696619033813,grad_norm: 0.8776739648760702, iteration: 247384
loss: 1.0547066926956177,grad_norm: 0.8545022155497831, iteration: 247385
loss: 1.0386507511138916,grad_norm: 0.8421164845508889, iteration: 247386
loss: 1.0528825521469116,grad_norm: 0.999999125614231, iteration: 247387
loss: 0.999880313873291,grad_norm: 0.8677043855412329, iteration: 247388
loss: 0.9887813925743103,grad_norm: 0.7495772652814326, iteration: 247389
loss: 0.9843382239341736,grad_norm: 0.9999990476367132, iteration: 247390
loss: 0.9989672899246216,grad_norm: 0.9073634647639663, iteration: 247391
loss: 0.9851880073547363,grad_norm: 0.9351716613005615, iteration: 247392
loss: 0.9866250157356262,grad_norm: 0.7910958464021788, iteration: 247393
loss: 0.9735053777694702,grad_norm: 0.9999989781938389, iteration: 247394
loss: 0.9888511300086975,grad_norm: 0.770530455912593, iteration: 247395
loss: 0.968562126159668,grad_norm: 0.9262979311701969, iteration: 247396
loss: 0.9851824045181274,grad_norm: 0.9999990870299554, iteration: 247397
loss: 0.9957174062728882,grad_norm: 0.9999989967083622, iteration: 247398
loss: 0.987004280090332,grad_norm: 0.7186702455278616, iteration: 247399
loss: 0.9801834225654602,grad_norm: 0.9999991702789979, iteration: 247400
loss: 0.977043628692627,grad_norm: 0.786648966021541, iteration: 247401
loss: 0.9570069909095764,grad_norm: 0.9814122623053854, iteration: 247402
loss: 0.9873746633529663,grad_norm: 0.9999991781421723, iteration: 247403
loss: 0.983867883682251,grad_norm: 0.9082210051352069, iteration: 247404
loss: 1.0157852172851562,grad_norm: 0.9218515117020671, iteration: 247405
loss: 0.9625828266143799,grad_norm: 0.9999991276457229, iteration: 247406
loss: 1.0174379348754883,grad_norm: 0.9069225049491402, iteration: 247407
loss: 1.0099780559539795,grad_norm: 0.8792805387404097, iteration: 247408
loss: 1.0158785581588745,grad_norm: 0.7370261903715789, iteration: 247409
loss: 1.0024495124816895,grad_norm: 0.9193436077766913, iteration: 247410
loss: 1.0179418325424194,grad_norm: 0.8821767292857637, iteration: 247411
loss: 1.0058932304382324,grad_norm: 0.9999990855538123, iteration: 247412
loss: 1.0193809270858765,grad_norm: 0.999999033778068, iteration: 247413
loss: 0.9899277091026306,grad_norm: 0.9809786731276103, iteration: 247414
loss: 0.9805114269256592,grad_norm: 0.9999991045251774, iteration: 247415
loss: 1.005478858947754,grad_norm: 0.939829526923114, iteration: 247416
loss: 1.0104912519454956,grad_norm: 0.876085032806365, iteration: 247417
loss: 0.9907467365264893,grad_norm: 0.8865025049230522, iteration: 247418
loss: 0.9987694621086121,grad_norm: 0.9187818658010659, iteration: 247419
loss: 0.9762773513793945,grad_norm: 0.7621209224110941, iteration: 247420
loss: 0.9836247563362122,grad_norm: 0.9120305467233037, iteration: 247421
loss: 1.0075938701629639,grad_norm: 0.9999990079380482, iteration: 247422
loss: 0.9847655296325684,grad_norm: 0.9999992273179831, iteration: 247423
loss: 0.9891072511672974,grad_norm: 0.8619605267594054, iteration: 247424
loss: 1.0141475200653076,grad_norm: 0.7838864869627131, iteration: 247425
loss: 1.0016000270843506,grad_norm: 0.977193978292766, iteration: 247426
loss: 0.9782474040985107,grad_norm: 0.821415416707415, iteration: 247427
loss: 0.9663750529289246,grad_norm: 0.9957684209952756, iteration: 247428
loss: 1.004603385925293,grad_norm: 0.9708986133415288, iteration: 247429
loss: 1.0195547342300415,grad_norm: 0.8210356100822013, iteration: 247430
loss: 1.0347236394882202,grad_norm: 0.859345211195957, iteration: 247431
loss: 1.0249828100204468,grad_norm: 0.9849095751712702, iteration: 247432
loss: 0.9802083969116211,grad_norm: 0.9999991141412394, iteration: 247433
loss: 0.9795293211936951,grad_norm: 0.999999232206192, iteration: 247434
loss: 1.0054168701171875,grad_norm: 0.7917524866202773, iteration: 247435
loss: 1.0040143728256226,grad_norm: 0.9999990509422616, iteration: 247436
loss: 1.0152487754821777,grad_norm: 0.9999991059570352, iteration: 247437
loss: 0.9989278316497803,grad_norm: 0.8030634520323817, iteration: 247438
loss: 1.005985140800476,grad_norm: 0.8545276331542753, iteration: 247439
loss: 0.9952229261398315,grad_norm: 0.9923585308303842, iteration: 247440
loss: 0.975019097328186,grad_norm: 0.918188910925759, iteration: 247441
loss: 0.9967817068099976,grad_norm: 0.934882960432161, iteration: 247442
loss: 1.0003407001495361,grad_norm: 0.9999995128695743, iteration: 247443
loss: 0.9670150876045227,grad_norm: 0.9485360042916625, iteration: 247444
loss: 0.9910528659820557,grad_norm: 0.8112511009035798, iteration: 247445
loss: 0.9966614246368408,grad_norm: 0.9086249044488313, iteration: 247446
loss: 1.008884072303772,grad_norm: 0.9700463256611151, iteration: 247447
loss: 1.0043338537216187,grad_norm: 0.9891057028097893, iteration: 247448
loss: 0.9551619291305542,grad_norm: 0.8308591266086123, iteration: 247449
loss: 1.0402904748916626,grad_norm: 0.999999431536438, iteration: 247450
loss: 0.9667741656303406,grad_norm: 0.9013860738590737, iteration: 247451
loss: 1.0250736474990845,grad_norm: 0.8832870483869264, iteration: 247452
loss: 1.0012915134429932,grad_norm: 0.9178527574434071, iteration: 247453
loss: 0.99592524766922,grad_norm: 0.8743246677928891, iteration: 247454
loss: 0.9913681745529175,grad_norm: 0.7519510480021804, iteration: 247455
loss: 0.9918932914733887,grad_norm: 0.9999995974779807, iteration: 247456
loss: 0.9546148180961609,grad_norm: 0.9523549497295831, iteration: 247457
loss: 1.0271753072738647,grad_norm: 0.8186642365457838, iteration: 247458
loss: 0.9759302139282227,grad_norm: 0.8325084788079818, iteration: 247459
loss: 0.9772871732711792,grad_norm: 0.9999991511518125, iteration: 247460
loss: 1.0081658363342285,grad_norm: 0.9938587179778356, iteration: 247461
loss: 1.0017486810684204,grad_norm: 0.8481074029355999, iteration: 247462
loss: 1.0165719985961914,grad_norm: 0.8564868621085665, iteration: 247463
loss: 0.991346538066864,grad_norm: 0.9999991040472004, iteration: 247464
loss: 0.972101092338562,grad_norm: 0.8874061314837353, iteration: 247465
loss: 1.0191329717636108,grad_norm: 0.9126592883012408, iteration: 247466
loss: 0.9945540428161621,grad_norm: 0.9706980403835122, iteration: 247467
loss: 0.9888997077941895,grad_norm: 0.999999113066562, iteration: 247468
loss: 0.9831387400627136,grad_norm: 0.9257545003192537, iteration: 247469
loss: 0.9824248552322388,grad_norm: 0.7533324014737314, iteration: 247470
loss: 0.992628812789917,grad_norm: 0.9422000881463523, iteration: 247471
loss: 0.996764063835144,grad_norm: 0.7415804616872967, iteration: 247472
loss: 0.9969489574432373,grad_norm: 0.8273579295530412, iteration: 247473
loss: 0.9932404160499573,grad_norm: 0.9999990662808611, iteration: 247474
loss: 1.017439603805542,grad_norm: 0.9165014775434142, iteration: 247475
loss: 0.9940486550331116,grad_norm: 0.9999990820870359, iteration: 247476
loss: 0.985093891620636,grad_norm: 0.9999991010264202, iteration: 247477
loss: 0.9847145676612854,grad_norm: 0.8005009518582961, iteration: 247478
loss: 0.9841681122779846,grad_norm: 0.9611868559738195, iteration: 247479
loss: 1.1454263925552368,grad_norm: 0.9999996439319775, iteration: 247480
loss: 0.9931961894035339,grad_norm: 0.9336745024414302, iteration: 247481
loss: 1.0198149681091309,grad_norm: 0.9999991048753127, iteration: 247482
loss: 0.978647768497467,grad_norm: 0.9999990627559394, iteration: 247483
loss: 1.0167008638381958,grad_norm: 0.8623106479050142, iteration: 247484
loss: 1.0092203617095947,grad_norm: 0.9227535417082611, iteration: 247485
loss: 0.991835355758667,grad_norm: 0.8979756789121462, iteration: 247486
loss: 1.01150643825531,grad_norm: 0.855407784627822, iteration: 247487
loss: 1.0362350940704346,grad_norm: 0.9999990940460518, iteration: 247488
loss: 1.102603793144226,grad_norm: 0.999999787166732, iteration: 247489
loss: 0.9764745235443115,grad_norm: 0.9175527704996999, iteration: 247490
loss: 1.0002769231796265,grad_norm: 0.9780446661234881, iteration: 247491
loss: 1.0384060144424438,grad_norm: 0.8141730956809214, iteration: 247492
loss: 0.9921717643737793,grad_norm: 0.9779467731396998, iteration: 247493
loss: 1.0160170793533325,grad_norm: 0.7874349978082988, iteration: 247494
loss: 0.9599798321723938,grad_norm: 0.8550030965983472, iteration: 247495
loss: 0.987389087677002,grad_norm: 0.8732588329570199, iteration: 247496
loss: 0.9763225317001343,grad_norm: 0.9630035818487309, iteration: 247497
loss: 1.026627540588379,grad_norm: 0.8230708480958417, iteration: 247498
loss: 1.0166951417922974,grad_norm: 0.696674037066, iteration: 247499
loss: 1.0286295413970947,grad_norm: 0.8697132152093084, iteration: 247500
loss: 0.9924788475036621,grad_norm: 0.999999179965566, iteration: 247501
loss: 1.0116900205612183,grad_norm: 0.999999064994227, iteration: 247502
loss: 0.9939587116241455,grad_norm: 0.9583896908206274, iteration: 247503
loss: 1.0052711963653564,grad_norm: 0.9999991006248512, iteration: 247504
loss: 1.0063666105270386,grad_norm: 0.9999994748470054, iteration: 247505
loss: 0.9902958869934082,grad_norm: 0.9338570719623047, iteration: 247506
loss: 0.9834486842155457,grad_norm: 0.8518710627415743, iteration: 247507
loss: 1.0270851850509644,grad_norm: 0.8200763527306264, iteration: 247508
loss: 0.9615822434425354,grad_norm: 0.9747103555305404, iteration: 247509
loss: 1.099869966506958,grad_norm: 0.9999995191875021, iteration: 247510
loss: 1.0170079469680786,grad_norm: 0.8408484880777684, iteration: 247511
loss: 1.016329288482666,grad_norm: 0.8740177383361343, iteration: 247512
loss: 1.0003658533096313,grad_norm: 0.8881290647424984, iteration: 247513
loss: 1.0128426551818848,grad_norm: 0.9999990343950438, iteration: 247514
loss: 1.0381346940994263,grad_norm: 0.9999991891932559, iteration: 247515
loss: 0.9862446188926697,grad_norm: 0.9097831213270501, iteration: 247516
loss: 0.9921056628227234,grad_norm: 0.8870091386818141, iteration: 247517
loss: 1.0035446882247925,grad_norm: 0.9970695831611857, iteration: 247518
loss: 0.9972270727157593,grad_norm: 0.8264253236491307, iteration: 247519
loss: 1.026667594909668,grad_norm: 0.8793547499330466, iteration: 247520
loss: 1.0010727643966675,grad_norm: 0.9187807789517473, iteration: 247521
loss: 1.0085126161575317,grad_norm: 0.8877304411029043, iteration: 247522
loss: 0.9868484735488892,grad_norm: 0.9999991658429771, iteration: 247523
loss: 0.978534460067749,grad_norm: 0.8408486089292648, iteration: 247524
loss: 0.9929183721542358,grad_norm: 0.9348292033216674, iteration: 247525
loss: 0.9948244094848633,grad_norm: 0.7864781414183653, iteration: 247526
loss: 1.0143013000488281,grad_norm: 0.8903958098449392, iteration: 247527
loss: 0.9879528284072876,grad_norm: 0.999999125801887, iteration: 247528
loss: 1.015459418296814,grad_norm: 0.7737090240709797, iteration: 247529
loss: 1.0259809494018555,grad_norm: 0.9999992526174764, iteration: 247530
loss: 1.0070865154266357,grad_norm: 0.9999992112467737, iteration: 247531
loss: 1.0268582105636597,grad_norm: 0.8859305289067121, iteration: 247532
loss: 1.0037477016448975,grad_norm: 0.8706542360309588, iteration: 247533
loss: 1.03826105594635,grad_norm: 0.9093277274504447, iteration: 247534
loss: 1.0328369140625,grad_norm: 0.9999992907532752, iteration: 247535
loss: 0.9924184679985046,grad_norm: 0.9999991767463756, iteration: 247536
loss: 0.9984084963798523,grad_norm: 0.9748122205912683, iteration: 247537
loss: 0.9617198705673218,grad_norm: 0.9999990267846846, iteration: 247538
loss: 1.0240360498428345,grad_norm: 0.8402421016553426, iteration: 247539
loss: 1.0332680940628052,grad_norm: 0.8565531373354274, iteration: 247540
loss: 0.9779291749000549,grad_norm: 0.8596749302605813, iteration: 247541
loss: 1.0016112327575684,grad_norm: 0.7801425557357354, iteration: 247542
loss: 1.015800952911377,grad_norm: 0.99999919209642, iteration: 247543
loss: 1.0050159692764282,grad_norm: 0.963122857720333, iteration: 247544
loss: 1.011304259300232,grad_norm: 0.7181648143676426, iteration: 247545
loss: 1.0139135122299194,grad_norm: 0.8386687326358113, iteration: 247546
loss: 1.0406187772750854,grad_norm: 0.7790379177334453, iteration: 247547
loss: 1.0023916959762573,grad_norm: 0.7115081248398921, iteration: 247548
loss: 1.0211491584777832,grad_norm: 0.9653256313371662, iteration: 247549
loss: 0.9734494090080261,grad_norm: 0.8173791997481509, iteration: 247550
loss: 0.9957466125488281,grad_norm: 0.9917051792155605, iteration: 247551
loss: 1.0115387439727783,grad_norm: 0.8745667789714561, iteration: 247552
loss: 1.0238947868347168,grad_norm: 0.7909070626671406, iteration: 247553
loss: 1.0001466274261475,grad_norm: 0.9999990244590482, iteration: 247554
loss: 1.029259443283081,grad_norm: 0.9753462337778493, iteration: 247555
loss: 0.9946635961532593,grad_norm: 0.8722897605112645, iteration: 247556
loss: 0.9692575931549072,grad_norm: 0.9937874011459333, iteration: 247557
loss: 1.015116572380066,grad_norm: 0.9120396741308325, iteration: 247558
loss: 0.9682121872901917,grad_norm: 0.9999990888254192, iteration: 247559
loss: 0.9977344870567322,grad_norm: 0.8723270292139709, iteration: 247560
loss: 1.0066757202148438,grad_norm: 0.8802062735972002, iteration: 247561
loss: 1.0044962167739868,grad_norm: 0.838880504179432, iteration: 247562
loss: 1.0287730693817139,grad_norm: 0.8630725871082493, iteration: 247563
loss: 1.0127283334732056,grad_norm: 0.9999992149183345, iteration: 247564
loss: 0.9912306070327759,grad_norm: 0.999998998625134, iteration: 247565
loss: 0.9872815608978271,grad_norm: 0.7575124810741967, iteration: 247566
loss: 1.004098892211914,grad_norm: 0.9428733105617776, iteration: 247567
loss: 0.9756377935409546,grad_norm: 0.8127960256959169, iteration: 247568
loss: 0.9433640241622925,grad_norm: 0.7513712947817758, iteration: 247569
loss: 1.0174871683120728,grad_norm: 0.9999990301001155, iteration: 247570
loss: 1.014904499053955,grad_norm: 0.853430406006658, iteration: 247571
loss: 1.016250491142273,grad_norm: 0.8631787465428016, iteration: 247572
loss: 0.9778634309768677,grad_norm: 0.8888177737872981, iteration: 247573
loss: 0.9805966019630432,grad_norm: 0.8389921851350941, iteration: 247574
loss: 0.9997448921203613,grad_norm: 0.9204437844950506, iteration: 247575
loss: 0.9972909688949585,grad_norm: 0.8980432285973443, iteration: 247576
loss: 1.0402148962020874,grad_norm: 0.9068602739356193, iteration: 247577
loss: 1.006211280822754,grad_norm: 0.8737508920029665, iteration: 247578
loss: 0.9982722997665405,grad_norm: 0.8698472168380723, iteration: 247579
loss: 0.9889306426048279,grad_norm: 0.9999990334713319, iteration: 247580
loss: 1.0647467374801636,grad_norm: 0.9658785381259115, iteration: 247581
loss: 1.0009139776229858,grad_norm: 0.7347985924878809, iteration: 247582
loss: 0.982266366481781,grad_norm: 0.8901715232081876, iteration: 247583
loss: 1.0027945041656494,grad_norm: 0.9638568781057842, iteration: 247584
loss: 1.018004059791565,grad_norm: 0.8987713483620121, iteration: 247585
loss: 1.014819860458374,grad_norm: 0.9999991935380348, iteration: 247586
loss: 0.9573893547058105,grad_norm: 0.8730992843489419, iteration: 247587
loss: 0.9990766644477844,grad_norm: 0.9805182006686766, iteration: 247588
loss: 0.9931023120880127,grad_norm: 0.9513916736369159, iteration: 247589
loss: 1.0200644731521606,grad_norm: 0.8330215704220896, iteration: 247590
loss: 1.0137124061584473,grad_norm: 0.8672134408731651, iteration: 247591
loss: 0.9808022975921631,grad_norm: 0.9595117293744395, iteration: 247592
loss: 0.9441238641738892,grad_norm: 0.9827534619514381, iteration: 247593
loss: 0.9785566926002502,grad_norm: 0.7327556788851425, iteration: 247594
loss: 0.9997808337211609,grad_norm: 0.9999990565988832, iteration: 247595
loss: 1.0065090656280518,grad_norm: 0.9373688126168083, iteration: 247596
loss: 1.0637767314910889,grad_norm: 0.9098018161992202, iteration: 247597
loss: 0.9992462396621704,grad_norm: 0.8735867584202954, iteration: 247598
loss: 1.0087913274765015,grad_norm: 0.9821763865813563, iteration: 247599
loss: 0.9788489937782288,grad_norm: 0.8764950033292632, iteration: 247600
loss: 0.967689573764801,grad_norm: 0.9052114925044873, iteration: 247601
loss: 0.9640820026397705,grad_norm: 0.8591569813086968, iteration: 247602
loss: 0.998754620552063,grad_norm: 0.8805552191243257, iteration: 247603
loss: 1.0067225694656372,grad_norm: 0.9295960570279911, iteration: 247604
loss: 1.005879521369934,grad_norm: 0.7248296986561529, iteration: 247605
loss: 1.005455493927002,grad_norm: 0.9999991999360981, iteration: 247606
loss: 0.9762327075004578,grad_norm: 0.9190245168690332, iteration: 247607
loss: 1.0489702224731445,grad_norm: 0.8379114572761246, iteration: 247608
loss: 0.9742341041564941,grad_norm: 0.9999990207181072, iteration: 247609
loss: 0.9913102984428406,grad_norm: 0.7477387293335586, iteration: 247610
loss: 1.007211685180664,grad_norm: 0.8326761171752695, iteration: 247611
loss: 1.0008385181427002,grad_norm: 0.7406081281488129, iteration: 247612
loss: 0.9472436308860779,grad_norm: 0.8179311164315888, iteration: 247613
loss: 1.003552794456482,grad_norm: 0.7763320973257256, iteration: 247614
loss: 0.9909772872924805,grad_norm: 0.8476395351274509, iteration: 247615
loss: 1.0027124881744385,grad_norm: 0.9999992327192904, iteration: 247616
loss: 1.038763403892517,grad_norm: 0.818296692931281, iteration: 247617
loss: 1.0292686223983765,grad_norm: 0.7910154737456189, iteration: 247618
loss: 0.9832853078842163,grad_norm: 0.9999990129801186, iteration: 247619
loss: 0.9829117655754089,grad_norm: 0.9999991338571397, iteration: 247620
loss: 1.0150763988494873,grad_norm: 0.9818881937599676, iteration: 247621
loss: 1.0477139949798584,grad_norm: 0.9999989930019303, iteration: 247622
loss: 0.9845613241195679,grad_norm: 0.6970769775823098, iteration: 247623
loss: 0.9817282557487488,grad_norm: 0.836354439086277, iteration: 247624
loss: 0.9947693943977356,grad_norm: 0.9999991539931314, iteration: 247625
loss: 1.0272819995880127,grad_norm: 0.8537833448065978, iteration: 247626
loss: 1.002219796180725,grad_norm: 0.8111973264474558, iteration: 247627
loss: 0.9741049408912659,grad_norm: 0.9473475950571754, iteration: 247628
loss: 1.0146872997283936,grad_norm: 0.9999998123008443, iteration: 247629
loss: 0.9958829283714294,grad_norm: 0.8197823273085731, iteration: 247630
loss: 1.0197436809539795,grad_norm: 0.8900390612659291, iteration: 247631
loss: 1.0107241868972778,grad_norm: 0.7570238675411107, iteration: 247632
loss: 0.9965038299560547,grad_norm: 0.9668088363069047, iteration: 247633
loss: 1.0365383625030518,grad_norm: 0.9953289870006511, iteration: 247634
loss: 1.011745572090149,grad_norm: 0.8948121429549495, iteration: 247635
loss: 0.9654274582862854,grad_norm: 0.7995273252790491, iteration: 247636
loss: 1.0382750034332275,grad_norm: 0.9867534616104077, iteration: 247637
loss: 1.0101008415222168,grad_norm: 0.9999990695436304, iteration: 247638
loss: 1.0308688879013062,grad_norm: 0.7912873340191552, iteration: 247639
loss: 1.016503095626831,grad_norm: 0.949137861864821, iteration: 247640
loss: 0.9861484169960022,grad_norm: 0.8638431947346236, iteration: 247641
loss: 0.9891226291656494,grad_norm: 0.8439325523702257, iteration: 247642
loss: 0.997869610786438,grad_norm: 0.7765590085452378, iteration: 247643
loss: 1.026437759399414,grad_norm: 0.873680710774543, iteration: 247644
loss: 0.9851521253585815,grad_norm: 0.8304812343225096, iteration: 247645
loss: 1.0119694471359253,grad_norm: 0.9999993317454702, iteration: 247646
loss: 0.9881665706634521,grad_norm: 0.8645005036445464, iteration: 247647
loss: 1.0047580003738403,grad_norm: 0.9999990827793179, iteration: 247648
loss: 1.0493183135986328,grad_norm: 0.8700379348336151, iteration: 247649
loss: 1.0546071529388428,grad_norm: 0.9723467452393169, iteration: 247650
loss: 1.0681062936782837,grad_norm: 0.9999995801175249, iteration: 247651
loss: 0.9702634811401367,grad_norm: 0.9147428955423718, iteration: 247652
loss: 1.003456711769104,grad_norm: 0.9406468217810131, iteration: 247653
loss: 0.9719411134719849,grad_norm: 0.9524855228625977, iteration: 247654
loss: 0.9863258600234985,grad_norm: 0.8867687553255477, iteration: 247655
loss: 0.9859815835952759,grad_norm: 0.8734443316621346, iteration: 247656
loss: 1.0176751613616943,grad_norm: 0.9999997734619301, iteration: 247657
loss: 0.991041362285614,grad_norm: 0.9999990342348067, iteration: 247658
loss: 0.9913543462753296,grad_norm: 0.9999991747642315, iteration: 247659
loss: 0.9816308617591858,grad_norm: 0.7669892410105987, iteration: 247660
loss: 0.9998727440834045,grad_norm: 0.9999990309535508, iteration: 247661
loss: 1.0170339345932007,grad_norm: 0.8287757813844453, iteration: 247662
loss: 0.9921857118606567,grad_norm: 0.8606623014468703, iteration: 247663
loss: 1.0430569648742676,grad_norm: 0.9999990553740266, iteration: 247664
loss: 0.9653616547584534,grad_norm: 0.8959379982569661, iteration: 247665
loss: 0.9695919752120972,grad_norm: 0.9999990453439833, iteration: 247666
loss: 0.9967967867851257,grad_norm: 0.8741184277552433, iteration: 247667
loss: 0.9919194579124451,grad_norm: 0.9999997325022127, iteration: 247668
loss: 0.9769834280014038,grad_norm: 0.8668802349204369, iteration: 247669
loss: 1.028681993484497,grad_norm: 0.9919599320699891, iteration: 247670
loss: 0.9997468590736389,grad_norm: 0.867643481975526, iteration: 247671
loss: 1.0001089572906494,grad_norm: 0.9408501210807712, iteration: 247672
loss: 1.012308120727539,grad_norm: 0.9350625407261094, iteration: 247673
loss: 0.9611414670944214,grad_norm: 0.85382411364322, iteration: 247674
loss: 0.9600545763969421,grad_norm: 0.9862223150852651, iteration: 247675
loss: 0.9925736784934998,grad_norm: 0.9992633883242333, iteration: 247676
loss: 1.0043160915374756,grad_norm: 0.9194016534630802, iteration: 247677
loss: 0.9951289892196655,grad_norm: 0.8545596072019894, iteration: 247678
loss: 0.9852554798126221,grad_norm: 0.9262577899407869, iteration: 247679
loss: 1.0206502676010132,grad_norm: 0.7939161732569705, iteration: 247680
loss: 0.9730294346809387,grad_norm: 0.8327189106000005, iteration: 247681
loss: 0.9826407432556152,grad_norm: 0.8391719420968651, iteration: 247682
loss: 0.983428955078125,grad_norm: 0.8406616888307042, iteration: 247683
loss: 0.9366883039474487,grad_norm: 0.894723040654315, iteration: 247684
loss: 0.9968025088310242,grad_norm: 0.9999991010067352, iteration: 247685
loss: 0.9869546890258789,grad_norm: 0.9999991335708369, iteration: 247686
loss: 1.0345491170883179,grad_norm: 0.8276229937860381, iteration: 247687
loss: 0.9948438405990601,grad_norm: 0.786636249245312, iteration: 247688
loss: 1.038528561592102,grad_norm: 0.9999999345170708, iteration: 247689
loss: 0.9994115233421326,grad_norm: 0.999999085360965, iteration: 247690
loss: 0.9838582277297974,grad_norm: 0.9999991070755851, iteration: 247691
loss: 0.9895056486129761,grad_norm: 0.9528041566077559, iteration: 247692
loss: 1.0307706594467163,grad_norm: 0.9896333434526836, iteration: 247693
loss: 0.9978185296058655,grad_norm: 0.727332760708983, iteration: 247694
loss: 1.1082435846328735,grad_norm: 0.9999996369470925, iteration: 247695
loss: 0.9705899953842163,grad_norm: 0.8054562134844888, iteration: 247696
loss: 0.9906584620475769,grad_norm: 0.9999992609232072, iteration: 247697
loss: 1.0287197828292847,grad_norm: 0.8779298247031982, iteration: 247698
loss: 1.013625979423523,grad_norm: 0.8452925910017834, iteration: 247699
loss: 1.017154335975647,grad_norm: 0.9999994160114035, iteration: 247700
loss: 0.9845113754272461,grad_norm: 0.7785898926724253, iteration: 247701
loss: 0.9765186309814453,grad_norm: 0.7706636582084747, iteration: 247702
loss: 0.987399160861969,grad_norm: 0.9794542891013288, iteration: 247703
loss: 1.0140033960342407,grad_norm: 0.9756678151849505, iteration: 247704
loss: 1.0085458755493164,grad_norm: 0.9999990503598661, iteration: 247705
loss: 0.9972768425941467,grad_norm: 0.9793493837901847, iteration: 247706
loss: 1.0165024995803833,grad_norm: 0.9999995255016735, iteration: 247707
loss: 0.9744210243225098,grad_norm: 0.9999990716924411, iteration: 247708
loss: 1.0763626098632812,grad_norm: 0.8851650381067464, iteration: 247709
loss: 1.032455563545227,grad_norm: 0.8467679162249124, iteration: 247710
loss: 1.0554112195968628,grad_norm: 0.9985464495930412, iteration: 247711
loss: 0.9828495383262634,grad_norm: 0.8643266627617829, iteration: 247712
loss: 0.9964402318000793,grad_norm: 0.9509639767243806, iteration: 247713
loss: 0.9830413460731506,grad_norm: 0.8695480784106506, iteration: 247714
loss: 1.0527868270874023,grad_norm: 0.6750628998661204, iteration: 247715
loss: 0.9725150465965271,grad_norm: 0.9228341806886297, iteration: 247716
loss: 1.0065950155258179,grad_norm: 0.7453990874358762, iteration: 247717
loss: 0.9995627999305725,grad_norm: 0.9999997987381071, iteration: 247718
loss: 0.9670606851577759,grad_norm: 0.9370708418124556, iteration: 247719
loss: 0.9772445559501648,grad_norm: 0.9835858854583666, iteration: 247720
loss: 1.0399315357208252,grad_norm: 0.999999364469169, iteration: 247721
loss: 1.0849239826202393,grad_norm: 0.9999997776917983, iteration: 247722
loss: 0.9901527166366577,grad_norm: 0.8241941669095799, iteration: 247723
loss: 1.009637475013733,grad_norm: 0.9999991988294918, iteration: 247724
loss: 0.9955757260322571,grad_norm: 0.8065730412892147, iteration: 247725
loss: 1.2045572996139526,grad_norm: 0.999999667552988, iteration: 247726
loss: 0.9767712950706482,grad_norm: 0.8867861052484478, iteration: 247727
loss: 1.1123158931732178,grad_norm: 0.9458337871490259, iteration: 247728
loss: 1.0188593864440918,grad_norm: 0.9001090061249492, iteration: 247729
loss: 0.9924843907356262,grad_norm: 0.9999994613560498, iteration: 247730
loss: 0.9746432304382324,grad_norm: 0.9999991292310203, iteration: 247731
loss: 1.052693486213684,grad_norm: 0.8679042514351645, iteration: 247732
loss: 1.073964238166809,grad_norm: 0.9999991759772499, iteration: 247733
loss: 0.9738637804985046,grad_norm: 0.8418389512657937, iteration: 247734
loss: 1.0332846641540527,grad_norm: 0.9999990700705299, iteration: 247735
loss: 1.0122838020324707,grad_norm: 0.985424381057041, iteration: 247736
loss: 0.9798832535743713,grad_norm: 0.9513202781878025, iteration: 247737
loss: 0.9838941693305969,grad_norm: 0.7625401353176232, iteration: 247738
loss: 1.0061101913452148,grad_norm: 0.9999998522952097, iteration: 247739
loss: 0.9849268198013306,grad_norm: 0.8745198251376443, iteration: 247740
loss: 0.9788414835929871,grad_norm: 0.7424265934206854, iteration: 247741
loss: 0.9602552056312561,grad_norm: 0.8728660628807536, iteration: 247742
loss: 0.9992376565933228,grad_norm: 0.9568484709308981, iteration: 247743
loss: 1.0087804794311523,grad_norm: 0.9999992164107236, iteration: 247744
loss: 1.0006983280181885,grad_norm: 0.8791024580258888, iteration: 247745
loss: 1.0035357475280762,grad_norm: 0.734205690921426, iteration: 247746
loss: 0.9910383820533752,grad_norm: 0.9999990699451611, iteration: 247747
loss: 0.9859502911567688,grad_norm: 0.9999990570866546, iteration: 247748
loss: 1.0237386226654053,grad_norm: 0.9999991866973081, iteration: 247749
loss: 1.0179147720336914,grad_norm: 0.8472522644928028, iteration: 247750
loss: 0.9960512518882751,grad_norm: 0.9622558210497368, iteration: 247751
loss: 0.9762941002845764,grad_norm: 0.787893109109523, iteration: 247752
loss: 0.9996820092201233,grad_norm: 0.9507301846324326, iteration: 247753
loss: 0.9673216342926025,grad_norm: 0.8920451247385712, iteration: 247754
loss: 1.0158061981201172,grad_norm: 0.933274747404426, iteration: 247755
loss: 0.9974585771560669,grad_norm: 0.9999992627886557, iteration: 247756
loss: 1.0067750215530396,grad_norm: 0.9713790850257882, iteration: 247757
loss: 1.0330837965011597,grad_norm: 0.9999991532287985, iteration: 247758
loss: 1.00644052028656,grad_norm: 0.9132938007447888, iteration: 247759
loss: 1.0682870149612427,grad_norm: 0.9999998153753034, iteration: 247760
loss: 1.060366153717041,grad_norm: 0.9999993427950842, iteration: 247761
loss: 1.0238374471664429,grad_norm: 0.9999991676942511, iteration: 247762
loss: 1.008678674697876,grad_norm: 0.9999992168166232, iteration: 247763
loss: 0.9953044056892395,grad_norm: 0.9999989470295404, iteration: 247764
loss: 0.9731003642082214,grad_norm: 0.9999992702891536, iteration: 247765
loss: 0.991122305393219,grad_norm: 0.9613938371799463, iteration: 247766
loss: 1.046685814857483,grad_norm: 0.9831073669829558, iteration: 247767
loss: 1.1071300506591797,grad_norm: 0.7935314329721173, iteration: 247768
loss: 0.9862090349197388,grad_norm: 0.864503494252507, iteration: 247769
loss: 1.0091831684112549,grad_norm: 0.9942373457007924, iteration: 247770
loss: 1.0076191425323486,grad_norm: 0.9999991880667516, iteration: 247771
loss: 0.9918470978736877,grad_norm: 0.7605703912303456, iteration: 247772
loss: 1.0478649139404297,grad_norm: 0.9999996321769514, iteration: 247773
loss: 0.970984160900116,grad_norm: 0.9999996041467604, iteration: 247774
loss: 1.0068989992141724,grad_norm: 0.9383642882233479, iteration: 247775
loss: 0.9841598868370056,grad_norm: 0.8182504421795734, iteration: 247776
loss: 1.0898669958114624,grad_norm: 0.9999994286861498, iteration: 247777
loss: 0.9829075336456299,grad_norm: 0.7784610420608187, iteration: 247778
loss: 1.1140193939208984,grad_norm: 0.9999994140391333, iteration: 247779
loss: 1.046963095664978,grad_norm: 0.9999992505768277, iteration: 247780
loss: 1.085471272468567,grad_norm: 0.9999992613523423, iteration: 247781
loss: 0.9896318912506104,grad_norm: 0.8890326517809256, iteration: 247782
loss: 1.0382750034332275,grad_norm: 0.9999994877808973, iteration: 247783
loss: 0.9757071137428284,grad_norm: 0.9357985901857979, iteration: 247784
loss: 0.9602713584899902,grad_norm: 0.9552982470964955, iteration: 247785
loss: 0.9902231097221375,grad_norm: 0.9117797120794832, iteration: 247786
loss: 1.0284318923950195,grad_norm: 0.9229925957483871, iteration: 247787
loss: 0.9883951544761658,grad_norm: 0.9999991606333442, iteration: 247788
loss: 1.0010392665863037,grad_norm: 0.8395283717671629, iteration: 247789
loss: 1.0260905027389526,grad_norm: 0.9957835606086006, iteration: 247790
loss: 0.9933198690414429,grad_norm: 0.8083020575976889, iteration: 247791
loss: 0.9825762510299683,grad_norm: 0.8149552184444839, iteration: 247792
loss: 0.9942013621330261,grad_norm: 0.9882313844229521, iteration: 247793
loss: 1.0970020294189453,grad_norm: 0.8091544234326693, iteration: 247794
loss: 1.0174788236618042,grad_norm: 0.9999992187356861, iteration: 247795
loss: 1.0064257383346558,grad_norm: 0.9999990814557248, iteration: 247796
loss: 0.9846359491348267,grad_norm: 0.7433105492467809, iteration: 247797
loss: 1.002806305885315,grad_norm: 0.7923337624512806, iteration: 247798
loss: 0.9854683876037598,grad_norm: 0.9501446282687308, iteration: 247799
loss: 1.006028652191162,grad_norm: 0.9511922128541538, iteration: 247800
loss: 0.9920730590820312,grad_norm: 0.9999999296230793, iteration: 247801
loss: 1.0131281614303589,grad_norm: 0.9999991348322286, iteration: 247802
loss: 0.9989105463027954,grad_norm: 0.9664688630317883, iteration: 247803
loss: 1.0068260431289673,grad_norm: 0.7723657592537103, iteration: 247804
loss: 0.9695210456848145,grad_norm: 0.8622682269300365, iteration: 247805
loss: 1.0167715549468994,grad_norm: 0.9096995815778597, iteration: 247806
loss: 0.9734719395637512,grad_norm: 0.9999990472262752, iteration: 247807
loss: 1.0089527368545532,grad_norm: 0.9765878595609612, iteration: 247808
loss: 0.9967259764671326,grad_norm: 0.8020441699967974, iteration: 247809
loss: 0.9982413649559021,grad_norm: 0.9124096062301436, iteration: 247810
loss: 0.9813111424446106,grad_norm: 0.9139873436472267, iteration: 247811
loss: 1.0174099206924438,grad_norm: 0.8726260334516197, iteration: 247812
loss: 0.9922818541526794,grad_norm: 0.8706445555317873, iteration: 247813
loss: 0.9981323480606079,grad_norm: 0.7857409652462543, iteration: 247814
loss: 0.9553115963935852,grad_norm: 0.9715006411904423, iteration: 247815
loss: 0.9993064999580383,grad_norm: 0.9999996046923781, iteration: 247816
loss: 0.9767352938652039,grad_norm: 0.810665418971896, iteration: 247817
loss: 0.9782126545906067,grad_norm: 0.8837396263839519, iteration: 247818
loss: 0.9629569053649902,grad_norm: 0.8019587947456844, iteration: 247819
loss: 0.949601948261261,grad_norm: 0.9897457839861067, iteration: 247820
loss: 0.9976446628570557,grad_norm: 0.7918566749329866, iteration: 247821
loss: 0.9996763467788696,grad_norm: 0.9331879297417904, iteration: 247822
loss: 1.039495587348938,grad_norm: 0.99999908345745, iteration: 247823
loss: 1.000463843345642,grad_norm: 0.8762557109960033, iteration: 247824
loss: 0.9828062653541565,grad_norm: 0.7994501722128781, iteration: 247825
loss: 1.0203403234481812,grad_norm: 0.783095140002487, iteration: 247826
loss: 1.027466058731079,grad_norm: 0.9999996832001108, iteration: 247827
loss: 0.997511625289917,grad_norm: 0.9981622872673105, iteration: 247828
loss: 0.9362682104110718,grad_norm: 0.8584299222236027, iteration: 247829
loss: 1.0105618238449097,grad_norm: 0.7832878165268696, iteration: 247830
loss: 1.0075511932373047,grad_norm: 0.9999992409309567, iteration: 247831
loss: 0.9971373081207275,grad_norm: 0.8037079589841236, iteration: 247832
loss: 0.9864326119422913,grad_norm: 0.8813495861192132, iteration: 247833
loss: 1.0147963762283325,grad_norm: 0.9999989371490211, iteration: 247834
loss: 0.986908495426178,grad_norm: 0.7212172425651231, iteration: 247835
loss: 0.9746347665786743,grad_norm: 0.8292821281343167, iteration: 247836
loss: 0.9870920181274414,grad_norm: 0.8380295678658061, iteration: 247837
loss: 0.9856260418891907,grad_norm: 0.8011988944519891, iteration: 247838
loss: 1.0327987670898438,grad_norm: 0.999999526308694, iteration: 247839
loss: 1.0037949085235596,grad_norm: 0.9999990514413118, iteration: 247840
loss: 1.0191079378128052,grad_norm: 0.9999989696981153, iteration: 247841
loss: 1.0204609632492065,grad_norm: 0.7939985703120795, iteration: 247842
loss: 0.9778438806533813,grad_norm: 0.9999989591782835, iteration: 247843
loss: 1.0065579414367676,grad_norm: 0.8829714292382979, iteration: 247844
loss: 1.0726993083953857,grad_norm: 0.999999094233569, iteration: 247845
loss: 0.987217366695404,grad_norm: 0.8398162425751713, iteration: 247846
loss: 1.0425313711166382,grad_norm: 0.9999990664031255, iteration: 247847
loss: 1.0215411186218262,grad_norm: 0.9999991220022802, iteration: 247848
loss: 0.9659478664398193,grad_norm: 0.9635687631657239, iteration: 247849
loss: 0.9596483707427979,grad_norm: 0.9999992046929238, iteration: 247850
loss: 1.032008171081543,grad_norm: 0.9324547298652589, iteration: 247851
loss: 0.9802842140197754,grad_norm: 0.9999992894548654, iteration: 247852
loss: 1.0642610788345337,grad_norm: 0.999999501169989, iteration: 247853
loss: 0.9675135016441345,grad_norm: 0.9541627046848088, iteration: 247854
loss: 1.103237271308899,grad_norm: 0.9999992068414911, iteration: 247855
loss: 0.9947506189346313,grad_norm: 0.8266839911034752, iteration: 247856
loss: 0.9828873872756958,grad_norm: 0.8940434524662113, iteration: 247857
loss: 0.9962760806083679,grad_norm: 0.8596885563741514, iteration: 247858
loss: 0.9824380278587341,grad_norm: 0.8009470570295292, iteration: 247859
loss: 0.9404495358467102,grad_norm: 0.9999994708515391, iteration: 247860
loss: 1.0004488229751587,grad_norm: 0.9128606274500803, iteration: 247861
loss: 1.007171630859375,grad_norm: 0.9999989337036047, iteration: 247862
loss: 0.9840388894081116,grad_norm: 0.9999991305075115, iteration: 247863
loss: 1.0114517211914062,grad_norm: 0.9999995580500209, iteration: 247864
loss: 0.9866390228271484,grad_norm: 0.8779641423281234, iteration: 247865
loss: 0.9627447724342346,grad_norm: 0.8900193010843328, iteration: 247866
loss: 0.9709106087684631,grad_norm: 0.8443571325656284, iteration: 247867
loss: 1.0006455183029175,grad_norm: 0.9542567469341949, iteration: 247868
loss: 1.0481071472167969,grad_norm: 0.9999998901746343, iteration: 247869
loss: 0.9915338754653931,grad_norm: 0.8540001254000523, iteration: 247870
loss: 0.9779403209686279,grad_norm: 0.903974054774192, iteration: 247871
loss: 1.0013700723648071,grad_norm: 0.9377853486372206, iteration: 247872
loss: 1.0443861484527588,grad_norm: 0.9999989861736337, iteration: 247873
loss: 0.9686357378959656,grad_norm: 0.8548410192642613, iteration: 247874
loss: 1.0551494359970093,grad_norm: 0.9999991673219132, iteration: 247875
loss: 0.980091392993927,grad_norm: 0.7026211961438926, iteration: 247876
loss: 0.9497209191322327,grad_norm: 0.999999154759469, iteration: 247877
loss: 1.0223504304885864,grad_norm: 0.8208625135434238, iteration: 247878
loss: 1.0757148265838623,grad_norm: 0.8739559276619225, iteration: 247879
loss: 1.0109281539916992,grad_norm: 0.8165278800720673, iteration: 247880
loss: 0.98542720079422,grad_norm: 0.936375179056185, iteration: 247881
loss: 1.0828336477279663,grad_norm: 0.9999990983239546, iteration: 247882
loss: 1.0369452238082886,grad_norm: 0.8481886043075197, iteration: 247883
loss: 1.0097987651824951,grad_norm: 0.9699196357380876, iteration: 247884
loss: 1.0193448066711426,grad_norm: 0.9706623726703875, iteration: 247885
loss: 0.9814991354942322,grad_norm: 0.8789044575083812, iteration: 247886
loss: 1.0142395496368408,grad_norm: 0.9999991709228757, iteration: 247887
loss: 0.95280522108078,grad_norm: 0.9305010275090233, iteration: 247888
loss: 1.068855881690979,grad_norm: 0.9999992566784782, iteration: 247889
loss: 1.0163788795471191,grad_norm: 0.9999991722729655, iteration: 247890
loss: 1.0059452056884766,grad_norm: 0.840330532777751, iteration: 247891
loss: 1.0178313255310059,grad_norm: 0.9862722971172088, iteration: 247892
loss: 1.0093679428100586,grad_norm: 0.8537534202172645, iteration: 247893
loss: 0.9749594926834106,grad_norm: 0.8286647935675502, iteration: 247894
loss: 0.975426971912384,grad_norm: 0.9993279091138135, iteration: 247895
loss: 1.031086802482605,grad_norm: 0.9999989591394225, iteration: 247896
loss: 1.06973135471344,grad_norm: 0.9999992982924208, iteration: 247897
loss: 1.0089343786239624,grad_norm: 0.9153123796696909, iteration: 247898
loss: 1.0188542604446411,grad_norm: 0.9999992846783122, iteration: 247899
loss: 0.985663115978241,grad_norm: 0.6986838195801194, iteration: 247900
loss: 1.0510894060134888,grad_norm: 0.999999096858456, iteration: 247901
loss: 0.9929704070091248,grad_norm: 0.9999990486694716, iteration: 247902
loss: 0.9999719858169556,grad_norm: 0.8394546703709949, iteration: 247903
loss: 0.9893300533294678,grad_norm: 0.7688625920314083, iteration: 247904
loss: 0.9930403232574463,grad_norm: 0.8819538528793757, iteration: 247905
loss: 0.9885373711585999,grad_norm: 0.8636302536301348, iteration: 247906
loss: 0.9654210805892944,grad_norm: 0.8353653608552383, iteration: 247907
loss: 0.9909109473228455,grad_norm: 0.764619000330782, iteration: 247908
loss: 1.1864838600158691,grad_norm: 0.9999990974248032, iteration: 247909
loss: 1.00788152217865,grad_norm: 0.9225528556125849, iteration: 247910
loss: 0.9810970425605774,grad_norm: 0.9132700296739802, iteration: 247911
loss: 0.9662544131278992,grad_norm: 0.9077538085366248, iteration: 247912
loss: 1.0189182758331299,grad_norm: 0.8519085777715314, iteration: 247913
loss: 1.0347557067871094,grad_norm: 0.9999989097153256, iteration: 247914
loss: 0.9640787243843079,grad_norm: 0.984462346411305, iteration: 247915
loss: 0.9828343391418457,grad_norm: 0.9510314744111295, iteration: 247916
loss: 0.9872144460678101,grad_norm: 0.7683014509216176, iteration: 247917
loss: 1.030529499053955,grad_norm: 0.9999995052591159, iteration: 247918
loss: 1.0192053318023682,grad_norm: 0.8721604298873398, iteration: 247919
loss: 0.9959368109703064,grad_norm: 0.7840109435325102, iteration: 247920
loss: 0.9950159192085266,grad_norm: 0.9067061932605143, iteration: 247921
loss: 1.016309142112732,grad_norm: 0.928580231826949, iteration: 247922
loss: 1.0137271881103516,grad_norm: 0.900841116021528, iteration: 247923
loss: 0.9701737761497498,grad_norm: 0.9999989486113467, iteration: 247924
loss: 0.9822880625724792,grad_norm: 0.8061102520567386, iteration: 247925
loss: 1.0047203302383423,grad_norm: 0.9999999369007094, iteration: 247926
loss: 1.0275031328201294,grad_norm: 0.8799217044072508, iteration: 247927
loss: 1.030566930770874,grad_norm: 0.9999992013230431, iteration: 247928
loss: 1.0144484043121338,grad_norm: 0.8673085335259557, iteration: 247929
loss: 0.9718124270439148,grad_norm: 0.9999991500636664, iteration: 247930
loss: 1.001102328300476,grad_norm: 0.999999141734924, iteration: 247931
loss: 0.9936165809631348,grad_norm: 0.8205388685417662, iteration: 247932
loss: 1.0035786628723145,grad_norm: 0.8557315710195961, iteration: 247933
loss: 0.985587477684021,grad_norm: 0.9999990836287667, iteration: 247934
loss: 1.0038727521896362,grad_norm: 0.9999990397302004, iteration: 247935
loss: 0.9930201172828674,grad_norm: 0.7382420579870644, iteration: 247936
loss: 1.0123414993286133,grad_norm: 0.8019321519403949, iteration: 247937
loss: 0.9738013744354248,grad_norm: 0.9494366010531762, iteration: 247938
loss: 1.006047248840332,grad_norm: 0.9434784739200747, iteration: 247939
loss: 1.0528172254562378,grad_norm: 0.9999999459392074, iteration: 247940
loss: 0.999817430973053,grad_norm: 0.8855466812535705, iteration: 247941
loss: 0.9765419960021973,grad_norm: 0.9594320942555679, iteration: 247942
loss: 1.0343530178070068,grad_norm: 0.9614733113608459, iteration: 247943
loss: 0.962800145149231,grad_norm: 0.9541759996419793, iteration: 247944
loss: 0.9857829809188843,grad_norm: 0.8778650321563526, iteration: 247945
loss: 0.9848520755767822,grad_norm: 0.9999990921470316, iteration: 247946
loss: 1.0298261642456055,grad_norm: 0.8699118127162795, iteration: 247947
loss: 1.0160646438598633,grad_norm: 0.8642233766499894, iteration: 247948
loss: 0.9801573157310486,grad_norm: 0.8534419102647167, iteration: 247949
loss: 1.0592188835144043,grad_norm: 0.9999990832728229, iteration: 247950
loss: 1.0182627439498901,grad_norm: 0.9203902392293429, iteration: 247951
loss: 1.0402969121932983,grad_norm: 0.7369132402289293, iteration: 247952
loss: 0.9822303056716919,grad_norm: 0.8498874010651972, iteration: 247953
loss: 0.9697126746177673,grad_norm: 0.8694739869794321, iteration: 247954
loss: 1.020858883857727,grad_norm: 0.9086210265860467, iteration: 247955
loss: 1.062168836593628,grad_norm: 0.9923027759494786, iteration: 247956
loss: 0.9800546169281006,grad_norm: 0.9158945778568577, iteration: 247957
loss: 0.9836848378181458,grad_norm: 0.8905173760396995, iteration: 247958
loss: 0.9922166466712952,grad_norm: 0.9565932640221735, iteration: 247959
loss: 0.9609073400497437,grad_norm: 0.8461315333022177, iteration: 247960
loss: 1.0114915370941162,grad_norm: 0.8585483536022703, iteration: 247961
loss: 0.9954690337181091,grad_norm: 0.9999996964855037, iteration: 247962
loss: 1.0138764381408691,grad_norm: 0.8284032849300139, iteration: 247963
loss: 0.9731898903846741,grad_norm: 0.9999991810463962, iteration: 247964
loss: 1.0092673301696777,grad_norm: 0.9999994016238155, iteration: 247965
loss: 1.0592800378799438,grad_norm: 0.9254479292269748, iteration: 247966
loss: 0.9983927607536316,grad_norm: 0.9999992584938845, iteration: 247967
loss: 1.0330487489700317,grad_norm: 0.785028492284691, iteration: 247968
loss: 0.9763237237930298,grad_norm: 0.9277608175800528, iteration: 247969
loss: 0.9817008972167969,grad_norm: 0.9203242296287715, iteration: 247970
loss: 0.9950270652770996,grad_norm: 0.9845423920723291, iteration: 247971
loss: 0.9892032742500305,grad_norm: 0.9999994603331058, iteration: 247972
loss: 0.9733067154884338,grad_norm: 0.8580720620947985, iteration: 247973
loss: 0.9863508343696594,grad_norm: 0.8329780428717221, iteration: 247974
loss: 0.9810641407966614,grad_norm: 0.8792681789355156, iteration: 247975
loss: 1.0094711780548096,grad_norm: 0.893691528467782, iteration: 247976
loss: 1.0102264881134033,grad_norm: 0.8581758890509449, iteration: 247977
loss: 0.9633115530014038,grad_norm: 0.9999991686594585, iteration: 247978
loss: 0.9943588376045227,grad_norm: 0.9780602299744399, iteration: 247979
loss: 0.9695147275924683,grad_norm: 0.7456602319688271, iteration: 247980
loss: 1.0114719867706299,grad_norm: 0.9095103463338612, iteration: 247981
loss: 0.9626902937889099,grad_norm: 0.9999991950064019, iteration: 247982
loss: 1.0493581295013428,grad_norm: 0.7752688506579629, iteration: 247983
loss: 1.0314197540283203,grad_norm: 0.886055641481376, iteration: 247984
loss: 0.9919386506080627,grad_norm: 0.9999991716194183, iteration: 247985
loss: 0.9745360016822815,grad_norm: 0.999999492447894, iteration: 247986
loss: 0.9945705533027649,grad_norm: 0.9999992126949621, iteration: 247987
loss: 0.9670993685722351,grad_norm: 0.8509330699933075, iteration: 247988
loss: 0.9684000015258789,grad_norm: 0.7748935988738983, iteration: 247989
loss: 1.022842526435852,grad_norm: 0.8898566466774014, iteration: 247990
loss: 1.0223867893218994,grad_norm: 0.8457155373982929, iteration: 247991
loss: 1.0065861940383911,grad_norm: 0.8791547211801376, iteration: 247992
loss: 1.002615213394165,grad_norm: 0.9554080444053064, iteration: 247993
loss: 0.9847003817558289,grad_norm: 0.9999991317845469, iteration: 247994
loss: 1.031431794166565,grad_norm: 0.9520105860213397, iteration: 247995
loss: 1.0119984149932861,grad_norm: 0.8575583421092354, iteration: 247996
loss: 0.996398389339447,grad_norm: 0.9573978772749826, iteration: 247997
loss: 1.0077266693115234,grad_norm: 0.999999162580133, iteration: 247998
loss: 1.0062159299850464,grad_norm: 0.7312028908739461, iteration: 247999
loss: 1.005785346031189,grad_norm: 0.9999990250571144, iteration: 248000
loss: 0.998020589351654,grad_norm: 0.9293613453888244, iteration: 248001
loss: 1.0024529695510864,grad_norm: 0.9152192969535099, iteration: 248002
loss: 1.0399298667907715,grad_norm: 0.7707986382303068, iteration: 248003
loss: 1.0126609802246094,grad_norm: 0.9999991050974191, iteration: 248004
loss: 1.0204486846923828,grad_norm: 0.9999989639686712, iteration: 248005
loss: 1.0259029865264893,grad_norm: 0.9999990961415494, iteration: 248006
loss: 0.9724606275558472,grad_norm: 0.7673267875208936, iteration: 248007
loss: 1.0016295909881592,grad_norm: 0.7883313388688498, iteration: 248008
loss: 1.0030757188796997,grad_norm: 0.9999992562897634, iteration: 248009
loss: 0.9840235710144043,grad_norm: 0.8117076081203388, iteration: 248010
loss: 1.0111414194107056,grad_norm: 0.9999994517988254, iteration: 248011
loss: 0.9906256794929504,grad_norm: 0.885957738676213, iteration: 248012
loss: 0.9953627586364746,grad_norm: 0.7665577414640585, iteration: 248013
loss: 0.9652934670448303,grad_norm: 0.9465465480366118, iteration: 248014
loss: 0.9660497307777405,grad_norm: 0.7860728379876614, iteration: 248015
loss: 0.9816611409187317,grad_norm: 0.9999991381000786, iteration: 248016
loss: 0.9925261735916138,grad_norm: 0.9416162021292953, iteration: 248017
loss: 1.003726601600647,grad_norm: 0.8487937701391833, iteration: 248018
loss: 0.9870390295982361,grad_norm: 0.8687917011199491, iteration: 248019
loss: 0.9971827268600464,grad_norm: 0.9576783298238281, iteration: 248020
loss: 0.9691270589828491,grad_norm: 0.8310226727489504, iteration: 248021
loss: 0.979496955871582,grad_norm: 0.8962058414246702, iteration: 248022
loss: 1.0098117589950562,grad_norm: 0.9999997288592988, iteration: 248023
loss: 1.0096153020858765,grad_norm: 0.879327646217063, iteration: 248024
loss: 1.0028693675994873,grad_norm: 0.8894590479433244, iteration: 248025
loss: 1.008751392364502,grad_norm: 0.7415978947785337, iteration: 248026
loss: 1.0140960216522217,grad_norm: 0.9999991732621571, iteration: 248027
loss: 0.989015519618988,grad_norm: 0.8370919845956348, iteration: 248028
loss: 0.997301459312439,grad_norm: 0.8378881546403758, iteration: 248029
loss: 0.9872457385063171,grad_norm: 0.9604676066977262, iteration: 248030
loss: 1.0444271564483643,grad_norm: 0.7981201431224657, iteration: 248031
loss: 0.9862989783287048,grad_norm: 0.8983032254881917, iteration: 248032
loss: 1.0942121744155884,grad_norm: 0.9999992836796635, iteration: 248033
loss: 1.0162266492843628,grad_norm: 0.7261232112203563, iteration: 248034
loss: 1.000257134437561,grad_norm: 0.9093107257678702, iteration: 248035
loss: 1.0088400840759277,grad_norm: 0.9999991101502441, iteration: 248036
loss: 1.003481149673462,grad_norm: 0.9492677585867598, iteration: 248037
loss: 1.0152311325073242,grad_norm: 0.9999998488919777, iteration: 248038
loss: 0.978169322013855,grad_norm: 0.999999122882534, iteration: 248039
loss: 1.0056467056274414,grad_norm: 0.9733898393834866, iteration: 248040
loss: 0.9921546578407288,grad_norm: 0.9380134020651616, iteration: 248041
loss: 1.0053704977035522,grad_norm: 0.9129735304890217, iteration: 248042
loss: 1.0319244861602783,grad_norm: 0.9999991794342274, iteration: 248043
loss: 1.01075279712677,grad_norm: 0.9999991241349744, iteration: 248044
loss: 1.0485856533050537,grad_norm: 0.9999991018798279, iteration: 248045
loss: 1.0107063055038452,grad_norm: 0.999999615160744, iteration: 248046
loss: 0.9911722540855408,grad_norm: 0.9467989903795825, iteration: 248047
loss: 0.9966060519218445,grad_norm: 0.9999990984410273, iteration: 248048
loss: 1.005184292793274,grad_norm: 0.8947361206262711, iteration: 248049
loss: 1.0110594034194946,grad_norm: 0.9341884791768978, iteration: 248050
loss: 0.9891304969787598,grad_norm: 0.8952593443314208, iteration: 248051
loss: 0.9938918948173523,grad_norm: 0.772583603637151, iteration: 248052
loss: 1.0440988540649414,grad_norm: 0.9999990746103758, iteration: 248053
loss: 0.9703714847564697,grad_norm: 0.9231843088121843, iteration: 248054
loss: 0.9965808987617493,grad_norm: 0.9088846405719261, iteration: 248055
loss: 1.0019841194152832,grad_norm: 0.9651919086967564, iteration: 248056
loss: 1.0495628118515015,grad_norm: 0.9999997540699379, iteration: 248057
loss: 1.008806824684143,grad_norm: 0.9999990566060952, iteration: 248058
loss: 1.024216890335083,grad_norm: 0.9066232282317513, iteration: 248059
loss: 1.0617021322250366,grad_norm: 0.9999989966884877, iteration: 248060
loss: 0.9660601019859314,grad_norm: 0.9888368459110716, iteration: 248061
loss: 0.9851991534233093,grad_norm: 0.9090464188805782, iteration: 248062
loss: 1.020843505859375,grad_norm: 0.9026597393045438, iteration: 248063
loss: 0.9623814821243286,grad_norm: 0.8587323376817406, iteration: 248064
loss: 0.9958474040031433,grad_norm: 0.8263178901450744, iteration: 248065
loss: 0.9849079251289368,grad_norm: 0.8942827713684812, iteration: 248066
loss: 1.010597586631775,grad_norm: 0.880296211288603, iteration: 248067
loss: 1.0117616653442383,grad_norm: 0.9784269035161935, iteration: 248068
loss: 0.9999198317527771,grad_norm: 0.9999998536935626, iteration: 248069
loss: 0.9905425310134888,grad_norm: 0.8950293655147268, iteration: 248070
loss: 1.0127142667770386,grad_norm: 0.9152524429862983, iteration: 248071
loss: 0.976386308670044,grad_norm: 0.9999990305028755, iteration: 248072
loss: 0.9762595295906067,grad_norm: 0.9805075966313701, iteration: 248073
loss: 1.032537817955017,grad_norm: 0.8427482476964555, iteration: 248074
loss: 1.0027275085449219,grad_norm: 0.9999990638142954, iteration: 248075
loss: 1.0035666227340698,grad_norm: 0.8140009812094854, iteration: 248076
loss: 1.009992003440857,grad_norm: 0.9999989691833951, iteration: 248077
loss: 1.0202510356903076,grad_norm: 0.705902832968901, iteration: 248078
loss: 1.0128968954086304,grad_norm: 0.8799925635085021, iteration: 248079
loss: 1.016321063041687,grad_norm: 0.99999950046266, iteration: 248080
loss: 0.9982950091362,grad_norm: 0.9274057735580636, iteration: 248081
loss: 0.9751427173614502,grad_norm: 0.8964689640207993, iteration: 248082
loss: 0.9867896437644958,grad_norm: 0.8146100870086848, iteration: 248083
loss: 1.016271948814392,grad_norm: 0.9999991470577962, iteration: 248084
loss: 0.9980112314224243,grad_norm: 0.7352828082366495, iteration: 248085
loss: 0.9898976683616638,grad_norm: 0.8769197662021783, iteration: 248086
loss: 0.9745174646377563,grad_norm: 0.7987600416055582, iteration: 248087
loss: 1.0125467777252197,grad_norm: 0.7633623786447506, iteration: 248088
loss: 1.0068731307983398,grad_norm: 0.8697785011003681, iteration: 248089
loss: 1.0268845558166504,grad_norm: 0.9589503077535831, iteration: 248090
loss: 1.0049419403076172,grad_norm: 0.7873976994924587, iteration: 248091
loss: 1.037550449371338,grad_norm: 0.9667634397177414, iteration: 248092
loss: 0.9968644380569458,grad_norm: 0.9499473488927053, iteration: 248093
loss: 1.0415791273117065,grad_norm: 0.9999991958117584, iteration: 248094
loss: 1.0262714624404907,grad_norm: 0.9999991950922721, iteration: 248095
loss: 1.037143588066101,grad_norm: 0.9999995115498234, iteration: 248096
loss: 1.0236769914627075,grad_norm: 0.9740630509800057, iteration: 248097
loss: 0.9643451571464539,grad_norm: 0.8431488125650212, iteration: 248098
loss: 1.0451668500900269,grad_norm: 0.9999994636779123, iteration: 248099
loss: 1.0034500360488892,grad_norm: 0.9419857050951452, iteration: 248100
loss: 0.9834794402122498,grad_norm: 0.7907738127809302, iteration: 248101
loss: 0.9504289031028748,grad_norm: 0.9468711047435434, iteration: 248102
loss: 1.0414173603057861,grad_norm: 0.9322059712167241, iteration: 248103
loss: 1.0006182193756104,grad_norm: 0.8887682356790241, iteration: 248104
loss: 1.0208542346954346,grad_norm: 0.9315993956955368, iteration: 248105
loss: 0.9606292247772217,grad_norm: 0.9143800375047515, iteration: 248106
loss: 0.9819326996803284,grad_norm: 0.780982898785789, iteration: 248107
loss: 0.9697404503822327,grad_norm: 0.9684254826331112, iteration: 248108
loss: 1.0169763565063477,grad_norm: 0.800868456709548, iteration: 248109
loss: 1.0237568616867065,grad_norm: 0.9999990927434426, iteration: 248110
loss: 0.9889821410179138,grad_norm: 0.7915554082775528, iteration: 248111
loss: 0.9665129780769348,grad_norm: 0.7226045795808929, iteration: 248112
loss: 0.9969127774238586,grad_norm: 0.8936401579960177, iteration: 248113
loss: 0.9734036922454834,grad_norm: 0.8513020247592151, iteration: 248114
loss: 0.9829009175300598,grad_norm: 0.8104616422275769, iteration: 248115
loss: 1.0064256191253662,grad_norm: 0.9999992182961142, iteration: 248116
loss: 0.999427318572998,grad_norm: 0.9999991347809429, iteration: 248117
loss: 1.0392500162124634,grad_norm: 0.8848189854541114, iteration: 248118
loss: 1.0039379596710205,grad_norm: 0.8039155858833242, iteration: 248119
loss: 1.0047327280044556,grad_norm: 0.836603232127862, iteration: 248120
loss: 1.1049048900604248,grad_norm: 0.8957333602553738, iteration: 248121
loss: 1.0249770879745483,grad_norm: 0.9408261501997102, iteration: 248122
loss: 1.002307653427124,grad_norm: 0.7972247155062506, iteration: 248123
loss: 1.0902851819992065,grad_norm: 0.8603525083187968, iteration: 248124
loss: 1.011098027229309,grad_norm: 0.9789313078816273, iteration: 248125
loss: 1.0002846717834473,grad_norm: 0.9999990915194731, iteration: 248126
loss: 0.9948939085006714,grad_norm: 0.8537714888854175, iteration: 248127
loss: 1.0115771293640137,grad_norm: 0.8166738334714374, iteration: 248128
loss: 1.009293794631958,grad_norm: 0.8502054479646599, iteration: 248129
loss: 0.9972279071807861,grad_norm: 0.9999989760687391, iteration: 248130
loss: 0.9983319044113159,grad_norm: 0.9550886828248975, iteration: 248131
loss: 0.9664921164512634,grad_norm: 0.9999990200239022, iteration: 248132
loss: 1.0352545976638794,grad_norm: 0.9154698394878349, iteration: 248133
loss: 1.013266921043396,grad_norm: 0.8350507528869899, iteration: 248134
loss: 1.0182371139526367,grad_norm: 0.8567813330750638, iteration: 248135
loss: 1.0224591493606567,grad_norm: 0.9999991209716148, iteration: 248136
loss: 0.9699537754058838,grad_norm: 0.9999992958047392, iteration: 248137
loss: 0.9817048907279968,grad_norm: 0.8672318849948001, iteration: 248138
loss: 1.0034246444702148,grad_norm: 0.9999990777651016, iteration: 248139
loss: 1.0308165550231934,grad_norm: 0.8161535453062114, iteration: 248140
loss: 0.9759007096290588,grad_norm: 0.9999990252037289, iteration: 248141
loss: 0.9894508719444275,grad_norm: 0.8441819503423881, iteration: 248142
loss: 0.9847908020019531,grad_norm: 0.8623069625112898, iteration: 248143
loss: 0.9748421311378479,grad_norm: 0.7400121334604115, iteration: 248144
loss: 1.003786563873291,grad_norm: 0.9177306360274124, iteration: 248145
loss: 1.0273175239562988,grad_norm: 0.9959151837582619, iteration: 248146
loss: 1.104061245918274,grad_norm: 0.9603126064807883, iteration: 248147
loss: 1.0038779973983765,grad_norm: 0.940531274032177, iteration: 248148
loss: 0.971973180770874,grad_norm: 0.7612600749498522, iteration: 248149
loss: 0.9822038412094116,grad_norm: 0.8733061577193506, iteration: 248150
loss: 0.9780328869819641,grad_norm: 0.8506645844485117, iteration: 248151
loss: 1.0113445520401,grad_norm: 0.9623936219580506, iteration: 248152
loss: 1.0407700538635254,grad_norm: 0.9278480509448579, iteration: 248153
loss: 0.9743529558181763,grad_norm: 0.8840737741211072, iteration: 248154
loss: 0.9954330325126648,grad_norm: 0.8185221104216434, iteration: 248155
loss: 1.0572162866592407,grad_norm: 0.9999999885627114, iteration: 248156
loss: 0.9854673743247986,grad_norm: 0.9465807614154769, iteration: 248157
loss: 0.9913716316223145,grad_norm: 0.9999991215308852, iteration: 248158
loss: 0.9983540773391724,grad_norm: 0.823033382702767, iteration: 248159
loss: 1.010384202003479,grad_norm: 0.9699525323752803, iteration: 248160
loss: 1.035582423210144,grad_norm: 0.9285662797121724, iteration: 248161
loss: 1.01430344581604,grad_norm: 0.9415970609787387, iteration: 248162
loss: 1.020867109298706,grad_norm: 0.9245438427147773, iteration: 248163
loss: 1.0115646123886108,grad_norm: 0.8745417176197864, iteration: 248164
loss: 1.014204502105713,grad_norm: 0.9323062516872216, iteration: 248165
loss: 1.0155919790267944,grad_norm: 0.9999991023655674, iteration: 248166
loss: 1.0026570558547974,grad_norm: 0.8636244999320775, iteration: 248167
loss: 0.9911949038505554,grad_norm: 0.9350089109988019, iteration: 248168
loss: 1.0063958168029785,grad_norm: 0.8858310550424289, iteration: 248169
loss: 0.967079222202301,grad_norm: 0.9999992460363702, iteration: 248170
loss: 0.9659743309020996,grad_norm: 0.9999990768759968, iteration: 248171
loss: 0.9989726543426514,grad_norm: 0.9999993295448923, iteration: 248172
loss: 0.9850936532020569,grad_norm: 0.9139218989792123, iteration: 248173
loss: 0.9956560134887695,grad_norm: 0.9770320605700312, iteration: 248174
loss: 1.0055739879608154,grad_norm: 0.9307417154154717, iteration: 248175
loss: 1.000663161277771,grad_norm: 0.7858986454602952, iteration: 248176
loss: 1.0171397924423218,grad_norm: 0.8408622850316091, iteration: 248177
loss: 0.9814425110816956,grad_norm: 0.9508429592955793, iteration: 248178
loss: 1.0239779949188232,grad_norm: 0.9999990766065598, iteration: 248179
loss: 1.0083690881729126,grad_norm: 0.9203483884539571, iteration: 248180
loss: 1.0523450374603271,grad_norm: 0.9999998271602833, iteration: 248181
loss: 0.985936164855957,grad_norm: 0.7643277250721919, iteration: 248182
loss: 1.0047441720962524,grad_norm: 0.7412256934288212, iteration: 248183
loss: 0.9839186668395996,grad_norm: 0.8492654131802084, iteration: 248184
loss: 1.0057345628738403,grad_norm: 0.889526952781347, iteration: 248185
loss: 0.9881066679954529,grad_norm: 0.9462613474714935, iteration: 248186
loss: 1.054279088973999,grad_norm: 0.8873807938798493, iteration: 248187
loss: 0.9869533777236938,grad_norm: 0.7877415593059359, iteration: 248188
loss: 1.0110127925872803,grad_norm: 0.9196995347654212, iteration: 248189
loss: 1.0448591709136963,grad_norm: 0.9999996692149116, iteration: 248190
loss: 0.9856500029563904,grad_norm: 0.9528917966475643, iteration: 248191
loss: 0.9939680099487305,grad_norm: 0.7784966681073648, iteration: 248192
loss: 0.9871222376823425,grad_norm: 0.7817228645028926, iteration: 248193
loss: 0.9850347638130188,grad_norm: 0.8209846567095539, iteration: 248194
loss: 1.0379122495651245,grad_norm: 0.9999995504893201, iteration: 248195
loss: 0.9888185262680054,grad_norm: 0.9999990679362261, iteration: 248196
loss: 0.9821335077285767,grad_norm: 0.7025902781864708, iteration: 248197
loss: 1.0215363502502441,grad_norm: 0.7819642464504318, iteration: 248198
loss: 0.9992797374725342,grad_norm: 0.9999990429114902, iteration: 248199
loss: 1.035365343093872,grad_norm: 0.9999991047658546, iteration: 248200
loss: 1.0512282848358154,grad_norm: 0.9999990758463371, iteration: 248201
loss: 1.0005834102630615,grad_norm: 0.8738351156483699, iteration: 248202
loss: 0.9752553105354309,grad_norm: 0.9999992402643951, iteration: 248203
loss: 1.0155030488967896,grad_norm: 0.9999990801729758, iteration: 248204
loss: 0.985872745513916,grad_norm: 0.958454621075706, iteration: 248205
loss: 0.9884781837463379,grad_norm: 0.8891557020229, iteration: 248206
loss: 0.9981710910797119,grad_norm: 0.9999994008717871, iteration: 248207
loss: 1.0049444437026978,grad_norm: 0.9255356949235304, iteration: 248208
loss: 1.033976435661316,grad_norm: 0.9999997729973426, iteration: 248209
loss: 1.007523536682129,grad_norm: 0.9767276169998877, iteration: 248210
loss: 0.9837422370910645,grad_norm: 0.9255661179133572, iteration: 248211
loss: 0.9906054735183716,grad_norm: 0.7397208227873593, iteration: 248212
loss: 1.0187163352966309,grad_norm: 0.7780939779157392, iteration: 248213
loss: 0.9714860916137695,grad_norm: 0.8735423016005893, iteration: 248214
loss: 0.9971393346786499,grad_norm: 0.9372613143621067, iteration: 248215
loss: 1.0052788257598877,grad_norm: 0.9780074881263059, iteration: 248216
loss: 1.0304638147354126,grad_norm: 0.9999991432525182, iteration: 248217
loss: 1.0080922842025757,grad_norm: 0.9820836016436681, iteration: 248218
loss: 1.04376220703125,grad_norm: 0.9579066306847712, iteration: 248219
loss: 1.025948166847229,grad_norm: 0.8859394742013486, iteration: 248220
loss: 0.9918522834777832,grad_norm: 0.8817951467905101, iteration: 248221
loss: 1.0059691667556763,grad_norm: 0.7753332416374581, iteration: 248222
loss: 0.9772772789001465,grad_norm: 0.9999992087630505, iteration: 248223
loss: 1.0027793645858765,grad_norm: 0.8493866982294666, iteration: 248224
loss: 1.0005226135253906,grad_norm: 0.9995312525015541, iteration: 248225
loss: 1.0022293329238892,grad_norm: 0.89047173122587, iteration: 248226
loss: 1.0083476305007935,grad_norm: 0.8725840536043142, iteration: 248227
loss: 1.0023174285888672,grad_norm: 0.9999990979889167, iteration: 248228
loss: 1.018205165863037,grad_norm: 0.9999989942722342, iteration: 248229
loss: 1.0124611854553223,grad_norm: 0.8814951657833837, iteration: 248230
loss: 1.0144957304000854,grad_norm: 0.9077830891120559, iteration: 248231
loss: 0.9772274494171143,grad_norm: 0.8748744303731104, iteration: 248232
loss: 1.0348076820373535,grad_norm: 0.9045370881171592, iteration: 248233
loss: 0.9524680972099304,grad_norm: 0.9999990184257534, iteration: 248234
loss: 1.068353295326233,grad_norm: 0.9999991533726316, iteration: 248235
loss: 0.998968243598938,grad_norm: 0.9999990497141176, iteration: 248236
loss: 0.9852358102798462,grad_norm: 0.8068596336000269, iteration: 248237
loss: 0.9757943749427795,grad_norm: 0.9270362479047308, iteration: 248238
loss: 0.9734480381011963,grad_norm: 0.9276727218449542, iteration: 248239
loss: 0.9879539012908936,grad_norm: 0.9763744773464398, iteration: 248240
loss: 0.9848676323890686,grad_norm: 0.9579609142891252, iteration: 248241
loss: 0.9954067468643188,grad_norm: 0.8386049145280192, iteration: 248242
loss: 0.991156280040741,grad_norm: 0.9685202300257528, iteration: 248243
loss: 0.990907609462738,grad_norm: 0.7921881537938369, iteration: 248244
loss: 0.9833510518074036,grad_norm: 0.8533514229115374, iteration: 248245
loss: 1.0130741596221924,grad_norm: 0.8629424846605609, iteration: 248246
loss: 1.0059486627578735,grad_norm: 0.9795200070377955, iteration: 248247
loss: 0.9959051609039307,grad_norm: 0.8054925093792501, iteration: 248248
loss: 0.9549111127853394,grad_norm: 0.9164786820734109, iteration: 248249
loss: 0.9909244775772095,grad_norm: 0.9573564448864531, iteration: 248250
loss: 1.0019272565841675,grad_norm: 0.9697252671974225, iteration: 248251
loss: 1.0118541717529297,grad_norm: 0.8671064816012632, iteration: 248252
loss: 1.0128871202468872,grad_norm: 0.9999990235394843, iteration: 248253
loss: 0.981324315071106,grad_norm: 0.9254618032858817, iteration: 248254
loss: 1.0051772594451904,grad_norm: 0.8267905162762813, iteration: 248255
loss: 0.9868595600128174,grad_norm: 0.8279032063847261, iteration: 248256
loss: 0.9709211587905884,grad_norm: 0.9999991264309936, iteration: 248257
loss: 0.9720795750617981,grad_norm: 0.8193200042438432, iteration: 248258
loss: 1.0126477479934692,grad_norm: 0.9036168611439942, iteration: 248259
loss: 1.045055627822876,grad_norm: 0.8320025482846672, iteration: 248260
loss: 1.0043514966964722,grad_norm: 0.9463385494361316, iteration: 248261
loss: 0.9786689877510071,grad_norm: 0.8182900886980115, iteration: 248262
loss: 1.0119866132736206,grad_norm: 0.8096644792162432, iteration: 248263
loss: 1.0115388631820679,grad_norm: 0.8917361839884728, iteration: 248264
loss: 1.0063074827194214,grad_norm: 0.8673116549174932, iteration: 248265
loss: 1.0127592086791992,grad_norm: 0.6986945918484261, iteration: 248266
loss: 1.0178369283676147,grad_norm: 0.796525926708256, iteration: 248267
loss: 0.9718273878097534,grad_norm: 0.7860786388281662, iteration: 248268
loss: 0.9617573618888855,grad_norm: 0.8879899630524094, iteration: 248269
loss: 1.008178949356079,grad_norm: 0.9430585098111682, iteration: 248270
loss: 0.9899063110351562,grad_norm: 0.9801934112501955, iteration: 248271
loss: 1.0007622241973877,grad_norm: 0.8905283972952717, iteration: 248272
loss: 0.9903919100761414,grad_norm: 0.9118371752570373, iteration: 248273
loss: 0.9729012250900269,grad_norm: 0.9999992642996877, iteration: 248274
loss: 1.028253197669983,grad_norm: 0.8644186687338162, iteration: 248275
loss: 1.0674097537994385,grad_norm: 0.9999991040672813, iteration: 248276
loss: 0.9738457202911377,grad_norm: 0.8184780746168995, iteration: 248277
loss: 1.0048636198043823,grad_norm: 0.7278803263452956, iteration: 248278
loss: 0.9951580166816711,grad_norm: 0.9623942215348354, iteration: 248279
loss: 1.0164953470230103,grad_norm: 0.9205968774829405, iteration: 248280
loss: 1.0224590301513672,grad_norm: 0.845642357424913, iteration: 248281
loss: 0.9947289824485779,grad_norm: 0.8772424707286332, iteration: 248282
loss: 1.003185749053955,grad_norm: 0.7525329889561425, iteration: 248283
loss: 1.0170509815216064,grad_norm: 0.9320385412470008, iteration: 248284
loss: 1.0191168785095215,grad_norm: 0.8613749733925802, iteration: 248285
loss: 1.0115480422973633,grad_norm: 0.9999990105054751, iteration: 248286
loss: 1.0090699195861816,grad_norm: 0.7750108186498282, iteration: 248287
loss: 1.0279717445373535,grad_norm: 0.7201919977134761, iteration: 248288
loss: 1.0023703575134277,grad_norm: 0.9999997984808672, iteration: 248289
loss: 1.0100071430206299,grad_norm: 0.8747212738072924, iteration: 248290
loss: 0.9863588213920593,grad_norm: 0.9515038608000531, iteration: 248291
loss: 1.0314866304397583,grad_norm: 0.8956638718164368, iteration: 248292
loss: 0.9833767414093018,grad_norm: 0.9858639524996496, iteration: 248293
loss: 1.0186680555343628,grad_norm: 0.8375576723614567, iteration: 248294
loss: 0.9740957617759705,grad_norm: 0.9999990244255321, iteration: 248295
loss: 0.9753068685531616,grad_norm: 0.8442601205755198, iteration: 248296
loss: 1.0061891078948975,grad_norm: 0.874506036474707, iteration: 248297
loss: 1.0434356927871704,grad_norm: 0.9999991317459709, iteration: 248298
loss: 1.0031906366348267,grad_norm: 0.9999991120011167, iteration: 248299
loss: 1.0152204036712646,grad_norm: 0.7130262870754183, iteration: 248300
loss: 1.0106556415557861,grad_norm: 0.899151902830938, iteration: 248301
loss: 0.9858207702636719,grad_norm: 0.8421420349189931, iteration: 248302
loss: 0.9860760569572449,grad_norm: 0.9999991773201291, iteration: 248303
loss: 1.0144009590148926,grad_norm: 0.9169150799595503, iteration: 248304
loss: 0.9952965974807739,grad_norm: 0.9762038689365344, iteration: 248305
loss: 1.0079256296157837,grad_norm: 0.9834918865569233, iteration: 248306
loss: 0.963476300239563,grad_norm: 0.9999991733420187, iteration: 248307
loss: 1.0533373355865479,grad_norm: 0.7704465795401036, iteration: 248308
loss: 1.0205453634262085,grad_norm: 0.7794372015977308, iteration: 248309
loss: 1.0120229721069336,grad_norm: 0.9185736377337681, iteration: 248310
loss: 1.0027387142181396,grad_norm: 0.7810450745109868, iteration: 248311
loss: 1.0075339078903198,grad_norm: 0.9088368482470495, iteration: 248312
loss: 0.9739047884941101,grad_norm: 0.9069471471260181, iteration: 248313
loss: 1.0163153409957886,grad_norm: 0.8585173239757796, iteration: 248314
loss: 1.0033321380615234,grad_norm: 0.7459693718600042, iteration: 248315
loss: 0.9633485674858093,grad_norm: 0.9999991730615009, iteration: 248316
loss: 1.0055758953094482,grad_norm: 0.9999991759267609, iteration: 248317
loss: 0.9574912190437317,grad_norm: 0.7982495990189319, iteration: 248318
loss: 1.007750153541565,grad_norm: 0.8132067527699017, iteration: 248319
loss: 1.0383378267288208,grad_norm: 0.9999989712611951, iteration: 248320
loss: 1.0113937854766846,grad_norm: 0.8682693427964084, iteration: 248321
loss: 1.0418872833251953,grad_norm: 0.8412316735489386, iteration: 248322
loss: 0.992614209651947,grad_norm: 0.8514251035466208, iteration: 248323
loss: 0.9944719076156616,grad_norm: 0.9559022715551918, iteration: 248324
loss: 1.0311189889907837,grad_norm: 0.9847126839868794, iteration: 248325
loss: 1.0196049213409424,grad_norm: 0.9999997631868093, iteration: 248326
loss: 1.0170886516571045,grad_norm: 0.8769044340212531, iteration: 248327
loss: 1.0249525308609009,grad_norm: 0.9562312958089199, iteration: 248328
loss: 0.9760398864746094,grad_norm: 0.999999046878951, iteration: 248329
loss: 1.0218656063079834,grad_norm: 0.9999991533086061, iteration: 248330
loss: 1.031901478767395,grad_norm: 0.8584745872377565, iteration: 248331
loss: 0.9825868606567383,grad_norm: 0.8103450171530292, iteration: 248332
loss: 0.9923794865608215,grad_norm: 0.8264068543772167, iteration: 248333
loss: 0.9854342341423035,grad_norm: 0.9999991029106491, iteration: 248334
loss: 0.9983087182044983,grad_norm: 0.8805301663325003, iteration: 248335
loss: 0.9888399243354797,grad_norm: 0.8813203936637826, iteration: 248336
loss: 1.0024930238723755,grad_norm: 0.9156651913806296, iteration: 248337
loss: 0.9837100505828857,grad_norm: 0.8053994116012912, iteration: 248338
loss: 1.1115554571151733,grad_norm: 0.7995313863072465, iteration: 248339
loss: 1.0123555660247803,grad_norm: 0.8535431623026298, iteration: 248340
loss: 0.998468279838562,grad_norm: 0.8456464436094403, iteration: 248341
loss: 1.0189319849014282,grad_norm: 0.9343510405039128, iteration: 248342
loss: 0.9734770059585571,grad_norm: 0.8731288094032991, iteration: 248343
loss: 1.0317984819412231,grad_norm: 0.9748395100979569, iteration: 248344
loss: 0.9950271844863892,grad_norm: 0.999998959640584, iteration: 248345
loss: 0.9504642486572266,grad_norm: 0.999999028120371, iteration: 248346
loss: 1.0217843055725098,grad_norm: 0.7797814443073641, iteration: 248347
loss: 1.0170843601226807,grad_norm: 0.9736893812442327, iteration: 248348
loss: 0.982774555683136,grad_norm: 0.9728983102911342, iteration: 248349
loss: 0.9778875708580017,grad_norm: 0.757822303550069, iteration: 248350
loss: 0.9549760222434998,grad_norm: 0.9999990687016301, iteration: 248351
loss: 0.9921528697013855,grad_norm: 0.9999991375346695, iteration: 248352
loss: 0.9992543458938599,grad_norm: 0.8579496657794944, iteration: 248353
loss: 1.0061750411987305,grad_norm: 0.9999991002809407, iteration: 248354
loss: 0.9787651300430298,grad_norm: 0.9220101318076189, iteration: 248355
loss: 0.998961865901947,grad_norm: 0.9479742239535631, iteration: 248356
loss: 0.9833535552024841,grad_norm: 0.8471028649447955, iteration: 248357
loss: 0.9936395883560181,grad_norm: 0.9445408099277323, iteration: 248358
loss: 1.0038642883300781,grad_norm: 0.8952366103284257, iteration: 248359
loss: 0.9583199620246887,grad_norm: 0.8967062917063576, iteration: 248360
loss: 1.004294753074646,grad_norm: 0.8980969752074002, iteration: 248361
loss: 0.9723276495933533,grad_norm: 0.7073038033097873, iteration: 248362
loss: 1.012608528137207,grad_norm: 0.838467239513302, iteration: 248363
loss: 1.0148677825927734,grad_norm: 0.8293923002094705, iteration: 248364
loss: 1.0403162240982056,grad_norm: 0.9999993557297242, iteration: 248365
loss: 1.024715781211853,grad_norm: 0.8894859601780611, iteration: 248366
loss: 0.9572404623031616,grad_norm: 0.9021629610783857, iteration: 248367
loss: 0.9872154593467712,grad_norm: 0.9153465786398178, iteration: 248368
loss: 0.998671293258667,grad_norm: 0.9397282249817873, iteration: 248369
loss: 1.0007859468460083,grad_norm: 0.7369761562246919, iteration: 248370
loss: 1.0229405164718628,grad_norm: 0.8376200196735492, iteration: 248371
loss: 1.0040154457092285,grad_norm: 0.8651007649166096, iteration: 248372
loss: 0.98836350440979,grad_norm: 0.8224048788520196, iteration: 248373
loss: 1.0573769807815552,grad_norm: 0.9222757405071177, iteration: 248374
loss: 0.9923964738845825,grad_norm: 0.9137680714914036, iteration: 248375
loss: 0.973434567451477,grad_norm: 0.9437067233604768, iteration: 248376
loss: 1.0207581520080566,grad_norm: 0.9658302695986012, iteration: 248377
loss: 0.9810792207717896,grad_norm: 0.8055833355706447, iteration: 248378
loss: 1.0259581804275513,grad_norm: 0.8336013807397127, iteration: 248379
loss: 1.0486063957214355,grad_norm: 0.9999992437672047, iteration: 248380
loss: 0.981097936630249,grad_norm: 0.9364867255518744, iteration: 248381
loss: 1.0137112140655518,grad_norm: 0.9999990129740239, iteration: 248382
loss: 0.9600065350532532,grad_norm: 0.9999991501917159, iteration: 248383
loss: 0.9784412980079651,grad_norm: 0.7888030876565025, iteration: 248384
loss: 0.9913783073425293,grad_norm: 0.9421339874227763, iteration: 248385
loss: 1.0168004035949707,grad_norm: 0.9233302354105969, iteration: 248386
loss: 1.0137462615966797,grad_norm: 0.8473264063029421, iteration: 248387
loss: 1.0656325817108154,grad_norm: 0.9999995621786144, iteration: 248388
loss: 1.0079658031463623,grad_norm: 0.9999990957218875, iteration: 248389
loss: 0.9707304239273071,grad_norm: 0.9533303024616873, iteration: 248390
loss: 1.0024173259735107,grad_norm: 0.9999990968914916, iteration: 248391
loss: 1.0323847532272339,grad_norm: 0.9177399610546618, iteration: 248392
loss: 1.0253649950027466,grad_norm: 0.839278130022912, iteration: 248393
loss: 0.9957571029663086,grad_norm: 0.8381679958023018, iteration: 248394
loss: 1.0523215532302856,grad_norm: 0.9074188192833037, iteration: 248395
loss: 0.9930494427680969,grad_norm: 0.7979955608112109, iteration: 248396
loss: 1.0071524381637573,grad_norm: 0.9999991377822692, iteration: 248397
loss: 0.9931038022041321,grad_norm: 0.9788741374258458, iteration: 248398
loss: 0.9919854402542114,grad_norm: 0.9033614262852795, iteration: 248399
loss: 0.9866356253623962,grad_norm: 0.8426812953067615, iteration: 248400
loss: 0.9445441365242004,grad_norm: 0.9399674344929906, iteration: 248401
loss: 0.9742857217788696,grad_norm: 0.9448566356314378, iteration: 248402
loss: 0.9720417857170105,grad_norm: 0.8655482225688437, iteration: 248403
loss: 0.9905750155448914,grad_norm: 0.894090915209572, iteration: 248404
loss: 0.9701138734817505,grad_norm: 0.9468003918801535, iteration: 248405
loss: 1.0020365715026855,grad_norm: 0.8978338238223472, iteration: 248406
loss: 0.9967849850654602,grad_norm: 0.8244846121253631, iteration: 248407
loss: 0.9878619313240051,grad_norm: 0.9999991508894032, iteration: 248408
loss: 0.9849000573158264,grad_norm: 0.9931569015275806, iteration: 248409
loss: 1.0023524761199951,grad_norm: 0.9999995774109862, iteration: 248410
loss: 0.9886437654495239,grad_norm: 0.9792615740565334, iteration: 248411
loss: 0.9696675539016724,grad_norm: 0.9195231386579666, iteration: 248412
loss: 0.9721848964691162,grad_norm: 0.8222699692664923, iteration: 248413
loss: 0.9891487956047058,grad_norm: 0.9999990663542471, iteration: 248414
loss: 0.9807479977607727,grad_norm: 0.9999990919386578, iteration: 248415
loss: 0.9930928945541382,grad_norm: 0.9999989464390839, iteration: 248416
loss: 1.0069290399551392,grad_norm: 0.8423816454648511, iteration: 248417
loss: 1.0080901384353638,grad_norm: 0.9999989898404777, iteration: 248418
loss: 0.9922700524330139,grad_norm: 0.7920956237561477, iteration: 248419
loss: 1.0294945240020752,grad_norm: 0.800984060598456, iteration: 248420
loss: 1.0815106630325317,grad_norm: 0.9999990532836589, iteration: 248421
loss: 0.995870053768158,grad_norm: 0.8484932702655819, iteration: 248422
loss: 1.0226465463638306,grad_norm: 0.822644147075217, iteration: 248423
loss: 1.0034924745559692,grad_norm: 0.840556855897632, iteration: 248424
loss: 0.9788801074028015,grad_norm: 0.6755099763228516, iteration: 248425
loss: 0.9999763369560242,grad_norm: 0.9659659350291369, iteration: 248426
loss: 1.01606023311615,grad_norm: 0.8081809073594083, iteration: 248427
loss: 1.0339254140853882,grad_norm: 0.8988738126111809, iteration: 248428
loss: 1.0104771852493286,grad_norm: 0.8166127111463957, iteration: 248429
loss: 0.9768342971801758,grad_norm: 0.8664154777721255, iteration: 248430
loss: 1.0093328952789307,grad_norm: 0.9999989885387343, iteration: 248431
loss: 1.0047346353530884,grad_norm: 0.9999992423983922, iteration: 248432
loss: 0.95854651927948,grad_norm: 0.8502455823560416, iteration: 248433
loss: 1.0144683122634888,grad_norm: 0.9999991202030105, iteration: 248434
loss: 1.0049834251403809,grad_norm: 0.8425475107421415, iteration: 248435
loss: 0.9491652846336365,grad_norm: 0.8769482233904679, iteration: 248436
loss: 1.0007346868515015,grad_norm: 0.9999991853995809, iteration: 248437
loss: 1.010768175125122,grad_norm: 0.8988109867517683, iteration: 248438
loss: 0.997829258441925,grad_norm: 0.932632826566792, iteration: 248439
loss: 0.9765357375144958,grad_norm: 0.8945611253653113, iteration: 248440
loss: 0.9667258262634277,grad_norm: 0.9483743433564835, iteration: 248441
loss: 0.9936644434928894,grad_norm: 0.8249233209358066, iteration: 248442
loss: 1.028828740119934,grad_norm: 0.7582421517056326, iteration: 248443
loss: 0.9816301465034485,grad_norm: 0.8936273829253928, iteration: 248444
loss: 0.992878258228302,grad_norm: 0.8249977473105482, iteration: 248445
loss: 0.9575009942054749,grad_norm: 0.8404657867341017, iteration: 248446
loss: 1.0136343240737915,grad_norm: 0.8141247432830159, iteration: 248447
loss: 0.9877516031265259,grad_norm: 0.9065117170974216, iteration: 248448
loss: 1.011579990386963,grad_norm: 0.9353031380392721, iteration: 248449
loss: 0.9882065057754517,grad_norm: 0.8521813107376991, iteration: 248450
loss: 0.9639330506324768,grad_norm: 0.8764911699548211, iteration: 248451
loss: 1.006239652633667,grad_norm: 0.9748802914008405, iteration: 248452
loss: 0.9807941317558289,grad_norm: 0.9003257934371842, iteration: 248453
loss: 0.9769654870033264,grad_norm: 0.924315355509813, iteration: 248454
loss: 1.0755257606506348,grad_norm: 0.8778645332818675, iteration: 248455
loss: 0.9816426634788513,grad_norm: 0.9999994544180412, iteration: 248456
loss: 0.981881856918335,grad_norm: 0.9999992432096436, iteration: 248457
loss: 1.0327789783477783,grad_norm: 0.9939608424729369, iteration: 248458
loss: 1.0233176946640015,grad_norm: 0.8377856015425346, iteration: 248459
loss: 0.9702346920967102,grad_norm: 0.9999990405420863, iteration: 248460
loss: 0.958050549030304,grad_norm: 0.7684984517587968, iteration: 248461
loss: 1.0086171627044678,grad_norm: 0.9999991307583507, iteration: 248462
loss: 1.006712794303894,grad_norm: 0.9575095139057384, iteration: 248463
loss: 0.963358461856842,grad_norm: 0.8165472016732115, iteration: 248464
loss: 0.9920399188995361,grad_norm: 0.942456852836123, iteration: 248465
loss: 1.005605936050415,grad_norm: 0.7047821223424291, iteration: 248466
loss: 0.9772825837135315,grad_norm: 0.7537815817947905, iteration: 248467
loss: 1.021669626235962,grad_norm: 0.8628341675830753, iteration: 248468
loss: 1.0099354982376099,grad_norm: 0.8626726812890971, iteration: 248469
loss: 0.9645395874977112,grad_norm: 0.8605633038098528, iteration: 248470
loss: 1.0602489709854126,grad_norm: 0.8570375880913749, iteration: 248471
loss: 1.0015673637390137,grad_norm: 0.9682629291174238, iteration: 248472
loss: 0.9967204928398132,grad_norm: 0.8583249063970914, iteration: 248473
loss: 1.0169477462768555,grad_norm: 0.7941179341038721, iteration: 248474
loss: 1.00852370262146,grad_norm: 0.7817226207989423, iteration: 248475
loss: 1.0452842712402344,grad_norm: 0.834510163595403, iteration: 248476
loss: 0.9928529262542725,grad_norm: 0.8814568957507761, iteration: 248477
loss: 1.0225619077682495,grad_norm: 0.9657555165522074, iteration: 248478
loss: 0.977275013923645,grad_norm: 0.9999990470892195, iteration: 248479
loss: 0.9685575366020203,grad_norm: 0.9999990089367883, iteration: 248480
loss: 0.9996670484542847,grad_norm: 0.7711689365045309, iteration: 248481
loss: 0.9653550982475281,grad_norm: 0.9583554780922637, iteration: 248482
loss: 1.021708607673645,grad_norm: 0.9475746745990952, iteration: 248483
loss: 1.0124077796936035,grad_norm: 0.7493875157450791, iteration: 248484
loss: 1.0185915231704712,grad_norm: 0.8298620702538111, iteration: 248485
loss: 1.037199854850769,grad_norm: 0.972264857798265, iteration: 248486
loss: 0.9744162559509277,grad_norm: 0.8346296880445137, iteration: 248487
loss: 0.976587176322937,grad_norm: 0.799385313277278, iteration: 248488
loss: 0.9641058444976807,grad_norm: 0.8851253179573347, iteration: 248489
loss: 0.9944540858268738,grad_norm: 0.9095873730992435, iteration: 248490
loss: 0.9934474229812622,grad_norm: 0.9183644061562828, iteration: 248491
loss: 0.9963135123252869,grad_norm: 0.9712632170416164, iteration: 248492
loss: 0.9834513068199158,grad_norm: 0.7247409613598882, iteration: 248493
loss: 1.0661603212356567,grad_norm: 0.9999990723356903, iteration: 248494
loss: 0.991645336151123,grad_norm: 0.783411866338459, iteration: 248495
loss: 1.0004215240478516,grad_norm: 0.9210572480964365, iteration: 248496
loss: 1.0046337842941284,grad_norm: 0.9125587974627898, iteration: 248497
loss: 0.9837768077850342,grad_norm: 0.9019065723102563, iteration: 248498
loss: 1.0278031826019287,grad_norm: 0.792791244146449, iteration: 248499
loss: 1.006839394569397,grad_norm: 0.9447254896560995, iteration: 248500
loss: 1.0226575136184692,grad_norm: 0.7948261274258909, iteration: 248501
loss: 1.0240684747695923,grad_norm: 0.918607265535964, iteration: 248502
loss: 0.9916352033615112,grad_norm: 0.9999991675702552, iteration: 248503
loss: 0.9691033959388733,grad_norm: 0.8265167814050884, iteration: 248504
loss: 0.9773313403129578,grad_norm: 0.8668751981030385, iteration: 248505
loss: 0.9930572509765625,grad_norm: 0.8981561171219906, iteration: 248506
loss: 1.0043394565582275,grad_norm: 0.86154517225762, iteration: 248507
loss: 1.0491390228271484,grad_norm: 0.9999996873585022, iteration: 248508
loss: 1.0019656419754028,grad_norm: 0.863461437538593, iteration: 248509
loss: 1.0162471532821655,grad_norm: 0.8327278347548086, iteration: 248510
loss: 1.0037822723388672,grad_norm: 0.9303644431516411, iteration: 248511
loss: 0.9784522652626038,grad_norm: 0.9999991360808108, iteration: 248512
loss: 0.9841592907905579,grad_norm: 0.9999991401494022, iteration: 248513
loss: 1.007029414176941,grad_norm: 0.9314774866133883, iteration: 248514
loss: 1.055069923400879,grad_norm: 0.9999997478337486, iteration: 248515
loss: 1.0286294221878052,grad_norm: 0.894695833922198, iteration: 248516
loss: 0.9916534423828125,grad_norm: 0.8900442492766338, iteration: 248517
loss: 0.9945897459983826,grad_norm: 0.713776800149197, iteration: 248518
loss: 0.9506425261497498,grad_norm: 0.9550731782284934, iteration: 248519
loss: 1.0232982635498047,grad_norm: 0.9999996025667273, iteration: 248520
loss: 1.0793650150299072,grad_norm: 0.9999990292513905, iteration: 248521
loss: 0.9745287299156189,grad_norm: 0.851192823855656, iteration: 248522
loss: 1.0233536958694458,grad_norm: 0.9238345905551703, iteration: 248523
loss: 1.0072325468063354,grad_norm: 0.9971415165275044, iteration: 248524
loss: 0.9615247845649719,grad_norm: 0.885327011943105, iteration: 248525
loss: 1.0033622980117798,grad_norm: 0.8156362033373286, iteration: 248526
loss: 0.9962197542190552,grad_norm: 0.993236545448892, iteration: 248527
loss: 1.000439167022705,grad_norm: 0.8514369131219724, iteration: 248528
loss: 1.021234154701233,grad_norm: 0.8212092827900126, iteration: 248529
loss: 0.98430997133255,grad_norm: 0.8728489991355067, iteration: 248530
loss: 0.9926717281341553,grad_norm: 0.9452162499924912, iteration: 248531
loss: 1.0116488933563232,grad_norm: 0.8059826395583315, iteration: 248532
loss: 0.9743831157684326,grad_norm: 0.9018140956469771, iteration: 248533
loss: 0.9534867405891418,grad_norm: 0.8341511817571953, iteration: 248534
loss: 0.9926677942276001,grad_norm: 0.9201032667243072, iteration: 248535
loss: 0.978802502155304,grad_norm: 0.843235559212195, iteration: 248536
loss: 1.0074857473373413,grad_norm: 0.8218279207108042, iteration: 248537
loss: 0.9928473830223083,grad_norm: 0.8886549915906128, iteration: 248538
loss: 1.0128456354141235,grad_norm: 0.9703415505775204, iteration: 248539
loss: 1.0263543128967285,grad_norm: 0.8330021250784652, iteration: 248540
loss: 1.0054233074188232,grad_norm: 0.8593166829416619, iteration: 248541
loss: 0.9913795590400696,grad_norm: 0.8851376247542577, iteration: 248542
loss: 1.0244603157043457,grad_norm: 0.9999990150170271, iteration: 248543
loss: 1.0128300189971924,grad_norm: 0.9061535547467725, iteration: 248544
loss: 1.0587157011032104,grad_norm: 0.8839929188644549, iteration: 248545
loss: 1.106844186782837,grad_norm: 0.999999868982488, iteration: 248546
loss: 0.9971335530281067,grad_norm: 0.9742856886743844, iteration: 248547
loss: 1.0135213136672974,grad_norm: 0.8467519449144767, iteration: 248548
loss: 0.9983360767364502,grad_norm: 0.8508171043719249, iteration: 248549
loss: 0.9540460705757141,grad_norm: 0.8631663933611701, iteration: 248550
loss: 0.9837843775749207,grad_norm: 0.8525615608459296, iteration: 248551
loss: 1.024604082107544,grad_norm: 0.9503412135774969, iteration: 248552
loss: 0.9884891510009766,grad_norm: 0.927446929205627, iteration: 248553
loss: 0.9542924761772156,grad_norm: 0.814505681690582, iteration: 248554
loss: 0.9697339534759521,grad_norm: 0.98124884314516, iteration: 248555
loss: 1.0312776565551758,grad_norm: 0.8110033237974088, iteration: 248556
loss: 0.949553370475769,grad_norm: 0.8989960188089595, iteration: 248557
loss: 1.0279792547225952,grad_norm: 0.9435018576338421, iteration: 248558
loss: 0.9864872694015503,grad_norm: 0.8705526958433102, iteration: 248559
loss: 1.0373040437698364,grad_norm: 0.8296643548870141, iteration: 248560
loss: 0.9993881583213806,grad_norm: 0.7866418727643081, iteration: 248561
loss: 1.0047532320022583,grad_norm: 0.8472190706229152, iteration: 248562
loss: 0.9771119952201843,grad_norm: 0.9999991743378508, iteration: 248563
loss: 1.0260428190231323,grad_norm: 0.9433446757348998, iteration: 248564
loss: 1.0158145427703857,grad_norm: 0.9973633805405504, iteration: 248565
loss: 0.9931614398956299,grad_norm: 0.8687698851233246, iteration: 248566
loss: 1.0499531030654907,grad_norm: 0.9999997724778407, iteration: 248567
loss: 0.9940525889396667,grad_norm: 0.6878754136847112, iteration: 248568
loss: 0.9346771240234375,grad_norm: 0.8334894647232407, iteration: 248569
loss: 1.0097159147262573,grad_norm: 0.9999998175621856, iteration: 248570
loss: 0.9839304685592651,grad_norm: 0.7098041055151142, iteration: 248571
loss: 1.0059294700622559,grad_norm: 0.8729203781204162, iteration: 248572
loss: 0.9896703958511353,grad_norm: 0.9874868627699953, iteration: 248573
loss: 0.9932159781455994,grad_norm: 0.9112708691472062, iteration: 248574
loss: 1.0001381635665894,grad_norm: 0.8674061004703533, iteration: 248575
loss: 1.0015428066253662,grad_norm: 0.9992336901152715, iteration: 248576
loss: 0.9923651218414307,grad_norm: 0.9467434994500832, iteration: 248577
loss: 1.017871379852295,grad_norm: 0.9999999594036915, iteration: 248578
loss: 1.038121223449707,grad_norm: 0.999999050825227, iteration: 248579
loss: 0.9900317788124084,grad_norm: 0.8821120724143635, iteration: 248580
loss: 1.0217375755310059,grad_norm: 0.9999989730873365, iteration: 248581
loss: 1.0223044157028198,grad_norm: 0.9999990795632803, iteration: 248582
loss: 0.9496343731880188,grad_norm: 0.8328866231460642, iteration: 248583
loss: 0.9938583970069885,grad_norm: 0.9859406188362665, iteration: 248584
loss: 1.0346835851669312,grad_norm: 0.9179520312788308, iteration: 248585
loss: 0.9934268593788147,grad_norm: 0.9247737544686832, iteration: 248586
loss: 1.0540419816970825,grad_norm: 0.9999994929850494, iteration: 248587
loss: 0.9858238697052002,grad_norm: 0.9975184516242708, iteration: 248588
loss: 1.0194642543792725,grad_norm: 0.8405556600591128, iteration: 248589
loss: 0.9749146103858948,grad_norm: 0.9999990885143161, iteration: 248590
loss: 0.9750738739967346,grad_norm: 0.838229805784433, iteration: 248591
loss: 0.9549224972724915,grad_norm: 0.9278076613361186, iteration: 248592
loss: 1.006909966468811,grad_norm: 0.8994358584770207, iteration: 248593
loss: 0.9874345660209656,grad_norm: 0.9441304331470334, iteration: 248594
loss: 0.9926592111587524,grad_norm: 0.9999990855479811, iteration: 248595
loss: 0.9994451999664307,grad_norm: 0.999999492966049, iteration: 248596
loss: 1.0239752531051636,grad_norm: 0.9999994520843666, iteration: 248597
loss: 1.005409836769104,grad_norm: 0.8160804451819513, iteration: 248598
loss: 0.9919043779373169,grad_norm: 0.8851955077934299, iteration: 248599
loss: 0.9717761874198914,grad_norm: 0.9713668532143042, iteration: 248600
loss: 1.0237836837768555,grad_norm: 0.9648106559456859, iteration: 248601
loss: 0.9866042733192444,grad_norm: 0.8080710395718054, iteration: 248602
loss: 0.9965471625328064,grad_norm: 0.9782866752529248, iteration: 248603
loss: 0.9661586880683899,grad_norm: 0.8840927463154182, iteration: 248604
loss: 0.9968498349189758,grad_norm: 0.9999991490109413, iteration: 248605
loss: 1.0009843111038208,grad_norm: 0.9556050964751096, iteration: 248606
loss: 0.9648295640945435,grad_norm: 0.8643949737296255, iteration: 248607
loss: 1.0003774166107178,grad_norm: 0.9999990979693677, iteration: 248608
loss: 0.9883162379264832,grad_norm: 0.8805379601289751, iteration: 248609
loss: 0.9705380797386169,grad_norm: 0.9965349776911471, iteration: 248610
loss: 1.0234622955322266,grad_norm: 0.9336840913483946, iteration: 248611
loss: 1.0157310962677002,grad_norm: 0.999999111748861, iteration: 248612
loss: 1.0366685390472412,grad_norm: 0.9524563206779577, iteration: 248613
loss: 1.003536343574524,grad_norm: 0.9926360217936722, iteration: 248614
loss: 1.0497565269470215,grad_norm: 0.9041201690925331, iteration: 248615
loss: 1.0218334197998047,grad_norm: 0.9615958746322711, iteration: 248616
loss: 1.0769023895263672,grad_norm: 0.9999994407688679, iteration: 248617
loss: 0.9757211208343506,grad_norm: 0.9999990589660015, iteration: 248618
loss: 1.0118037462234497,grad_norm: 0.8721408614351084, iteration: 248619
loss: 0.9951624870300293,grad_norm: 0.9176648907116887, iteration: 248620
loss: 1.0231910943984985,grad_norm: 0.9999993030222808, iteration: 248621
loss: 1.0404057502746582,grad_norm: 0.9999992638061241, iteration: 248622
loss: 1.0046483278274536,grad_norm: 0.8921308254251683, iteration: 248623
loss: 0.987077534198761,grad_norm: 0.99810418195354, iteration: 248624
loss: 1.0113558769226074,grad_norm: 0.999999252985138, iteration: 248625
loss: 1.0400468111038208,grad_norm: 0.8414003405143979, iteration: 248626
loss: 0.9989184141159058,grad_norm: 0.9999993089130554, iteration: 248627
loss: 1.009634256362915,grad_norm: 0.9501992312693094, iteration: 248628
loss: 1.0455384254455566,grad_norm: 0.9999992084873603, iteration: 248629
loss: 0.9950451254844666,grad_norm: 0.8802981986599977, iteration: 248630
loss: 1.0261527299880981,grad_norm: 0.9246769440580936, iteration: 248631
loss: 0.9915788173675537,grad_norm: 0.8564031715304916, iteration: 248632
loss: 0.9715760946273804,grad_norm: 0.9150736589194, iteration: 248633
loss: 0.9807944893836975,grad_norm: 0.9080655302609342, iteration: 248634
loss: 0.9980278611183167,grad_norm: 0.9183388804936549, iteration: 248635
loss: 1.0040112733840942,grad_norm: 0.8008653879441608, iteration: 248636
loss: 0.9862654209136963,grad_norm: 0.8914313301575374, iteration: 248637
loss: 0.9903667569160461,grad_norm: 0.9183515676872506, iteration: 248638
loss: 0.9973961114883423,grad_norm: 0.8184968552356945, iteration: 248639
loss: 1.0073776245117188,grad_norm: 0.9730706230367864, iteration: 248640
loss: 0.976129412651062,grad_norm: 0.9605745498142306, iteration: 248641
loss: 0.9870566725730896,grad_norm: 0.9161795144867523, iteration: 248642
loss: 1.0117400884628296,grad_norm: 0.9999992068509889, iteration: 248643
loss: 1.0139085054397583,grad_norm: 0.8967588072963057, iteration: 248644
loss: 1.0206763744354248,grad_norm: 0.9806353058904856, iteration: 248645
loss: 0.9651070237159729,grad_norm: 0.8622552470268131, iteration: 248646
loss: 1.0315001010894775,grad_norm: 0.8725136880074934, iteration: 248647
loss: 0.9717991948127747,grad_norm: 0.834913061013053, iteration: 248648
loss: 0.9904721975326538,grad_norm: 0.9999990543274115, iteration: 248649
loss: 1.0146050453186035,grad_norm: 0.8716346167731346, iteration: 248650
loss: 1.013049840927124,grad_norm: 0.9999992127981661, iteration: 248651
loss: 0.95240718126297,grad_norm: 0.926119458878243, iteration: 248652
loss: 0.9642491936683655,grad_norm: 0.9620903560939464, iteration: 248653
loss: 0.9839977025985718,grad_norm: 0.9999990649516124, iteration: 248654
loss: 0.986609935760498,grad_norm: 0.9022152037108843, iteration: 248655
loss: 1.0035197734832764,grad_norm: 0.999998988523938, iteration: 248656
loss: 1.0135393142700195,grad_norm: 0.8004413279226926, iteration: 248657
loss: 1.0234979391098022,grad_norm: 0.7748891052511956, iteration: 248658
loss: 1.0236995220184326,grad_norm: 0.9537162327064584, iteration: 248659
loss: 1.0123002529144287,grad_norm: 0.8695976736911231, iteration: 248660
loss: 1.0006980895996094,grad_norm: 0.9697832132236661, iteration: 248661
loss: 0.9282546639442444,grad_norm: 0.9999989315699469, iteration: 248662
loss: 0.9818920493125916,grad_norm: 0.849241826638523, iteration: 248663
loss: 1.0079091787338257,grad_norm: 0.9860702361193839, iteration: 248664
loss: 0.9890073537826538,grad_norm: 0.9999991350438421, iteration: 248665
loss: 1.0205583572387695,grad_norm: 0.9711847087209386, iteration: 248666
loss: 1.0684834718704224,grad_norm: 0.9999992398066295, iteration: 248667
loss: 1.0128284692764282,grad_norm: 0.9193465345745401, iteration: 248668
loss: 1.0073171854019165,grad_norm: 0.7061305018452616, iteration: 248669
loss: 1.0003398656845093,grad_norm: 0.8825024160570332, iteration: 248670
loss: 0.9965194463729858,grad_norm: 0.785010409965911, iteration: 248671
loss: 0.9977174997329712,grad_norm: 0.7868014854901629, iteration: 248672
loss: 1.0128031969070435,grad_norm: 0.9707632143106107, iteration: 248673
loss: 1.0041731595993042,grad_norm: 0.9383412269209933, iteration: 248674
loss: 0.9826235175132751,grad_norm: 0.922504579025174, iteration: 248675
loss: 0.9850062727928162,grad_norm: 0.7702855298106019, iteration: 248676
loss: 0.992561936378479,grad_norm: 0.9999990463632714, iteration: 248677
loss: 0.9953782558441162,grad_norm: 0.9999991236543824, iteration: 248678
loss: 0.9970180988311768,grad_norm: 0.9999990820471487, iteration: 248679
loss: 0.981130838394165,grad_norm: 0.9359836098999367, iteration: 248680
loss: 0.9959387183189392,grad_norm: 0.8175576529052279, iteration: 248681
loss: 1.0159717798233032,grad_norm: 0.7838921300187178, iteration: 248682
loss: 0.9505531191825867,grad_norm: 0.8118610881664726, iteration: 248683
loss: 1.0055382251739502,grad_norm: 0.8399740253142424, iteration: 248684
loss: 0.9927566051483154,grad_norm: 0.9166783052750553, iteration: 248685
loss: 1.0046528577804565,grad_norm: 0.9139196112441231, iteration: 248686
loss: 0.9686101078987122,grad_norm: 0.9999995981709604, iteration: 248687
loss: 1.0142030715942383,grad_norm: 0.999999120282491, iteration: 248688
loss: 1.0537487268447876,grad_norm: 0.9997908402044877, iteration: 248689
loss: 0.9858732223510742,grad_norm: 0.8753325114442367, iteration: 248690
loss: 1.0120364427566528,grad_norm: 0.9999990567565231, iteration: 248691
loss: 0.9581428170204163,grad_norm: 0.9999992615226636, iteration: 248692
loss: 1.010074496269226,grad_norm: 0.9999989485490992, iteration: 248693
loss: 1.0026330947875977,grad_norm: 0.758412704566837, iteration: 248694
loss: 1.0027580261230469,grad_norm: 0.9999992965008242, iteration: 248695
loss: 1.0028382539749146,grad_norm: 0.9288335680831636, iteration: 248696
loss: 1.0101044178009033,grad_norm: 0.9999990023108949, iteration: 248697
loss: 1.0899205207824707,grad_norm: 0.9999993234349814, iteration: 248698
loss: 1.0502848625183105,grad_norm: 0.9521796935827372, iteration: 248699
loss: 0.9880384206771851,grad_norm: 0.8279073359731915, iteration: 248700
loss: 0.9909540414810181,grad_norm: 0.9999991735042116, iteration: 248701
loss: 1.0199459791183472,grad_norm: 0.9999999562404042, iteration: 248702
loss: 1.064505934715271,grad_norm: 0.8692293388382346, iteration: 248703
loss: 0.9981617331504822,grad_norm: 0.7660419001724169, iteration: 248704
loss: 1.01686429977417,grad_norm: 0.9379048561038833, iteration: 248705
loss: 0.9989755749702454,grad_norm: 0.9999991670865243, iteration: 248706
loss: 0.9960830211639404,grad_norm: 0.9455713127459018, iteration: 248707
loss: 0.9628304839134216,grad_norm: 0.8901439346894316, iteration: 248708
loss: 1.0228573083877563,grad_norm: 0.7472314980086665, iteration: 248709
loss: 0.9914959669113159,grad_norm: 0.9231242929342656, iteration: 248710
loss: 0.9967271685600281,grad_norm: 0.8298318659496693, iteration: 248711
loss: 0.9942443370819092,grad_norm: 0.8061386734258065, iteration: 248712
loss: 1.0086663961410522,grad_norm: 0.7897932089431168, iteration: 248713
loss: 1.0207573175430298,grad_norm: 0.9999992888666491, iteration: 248714
loss: 1.0327837467193604,grad_norm: 0.9192552693103364, iteration: 248715
loss: 1.0206352472305298,grad_norm: 0.775023024608581, iteration: 248716
loss: 1.0177897214889526,grad_norm: 0.9503406393060897, iteration: 248717
loss: 0.975152313709259,grad_norm: 0.895233048597174, iteration: 248718
loss: 1.0610616207122803,grad_norm: 0.8753229058148354, iteration: 248719
loss: 1.0082224607467651,grad_norm: 0.7526303517617954, iteration: 248720
loss: 1.0416120290756226,grad_norm: 0.9612696302285013, iteration: 248721
loss: 0.987805187702179,grad_norm: 0.9999990134845516, iteration: 248722
loss: 1.0456879138946533,grad_norm: 0.9888258202675212, iteration: 248723
loss: 0.9637542963027954,grad_norm: 0.8455993879817363, iteration: 248724
loss: 1.0136661529541016,grad_norm: 0.9393334904192909, iteration: 248725
loss: 1.0134133100509644,grad_norm: 0.8224009735890601, iteration: 248726
loss: 0.9797029495239258,grad_norm: 0.8355316330213655, iteration: 248727
loss: 0.9836537837982178,grad_norm: 0.999999073789953, iteration: 248728
loss: 0.9849362373352051,grad_norm: 0.7937256799354369, iteration: 248729
loss: 1.0429551601409912,grad_norm: 0.8346429824320528, iteration: 248730
loss: 0.9857290983200073,grad_norm: 0.8879490818280743, iteration: 248731
loss: 1.017892599105835,grad_norm: 0.7961571647992052, iteration: 248732
loss: 0.96138596534729,grad_norm: 0.8863865328832246, iteration: 248733
loss: 1.0155905485153198,grad_norm: 0.9826822477462831, iteration: 248734
loss: 0.9857840538024902,grad_norm: 0.8117872542625909, iteration: 248735
loss: 0.955133318901062,grad_norm: 0.7845901975621427, iteration: 248736
loss: 0.9819642901420593,grad_norm: 0.999999098259025, iteration: 248737
loss: 0.9677261114120483,grad_norm: 0.9424035385519633, iteration: 248738
loss: 0.9396161437034607,grad_norm: 0.9429282048922105, iteration: 248739
loss: 0.9882263541221619,grad_norm: 0.9999990504021727, iteration: 248740
loss: 0.9883988499641418,grad_norm: 0.8392065523951043, iteration: 248741
loss: 0.9928349256515503,grad_norm: 0.8700267132426334, iteration: 248742
loss: 0.98058021068573,grad_norm: 0.7269100887858347, iteration: 248743
loss: 0.9934018850326538,grad_norm: 0.8093806840887958, iteration: 248744
loss: 1.060567021369934,grad_norm: 0.9999998595934056, iteration: 248745
loss: 0.9739208221435547,grad_norm: 0.9999991624356456, iteration: 248746
loss: 1.003828525543213,grad_norm: 0.9999991632081796, iteration: 248747
loss: 1.001137375831604,grad_norm: 0.7739858728827004, iteration: 248748
loss: 0.987356424331665,grad_norm: 0.9999990789759294, iteration: 248749
loss: 0.9887524843215942,grad_norm: 0.9578006934564665, iteration: 248750
loss: 0.9940992593765259,grad_norm: 0.7884339872719793, iteration: 248751
loss: 0.9959478378295898,grad_norm: 0.9014500792942209, iteration: 248752
loss: 0.9894267320632935,grad_norm: 0.8526372245549521, iteration: 248753
loss: 1.0046725273132324,grad_norm: 0.8277969798595766, iteration: 248754
loss: 1.0057390928268433,grad_norm: 0.9058204089834294, iteration: 248755
loss: 1.0041826963424683,grad_norm: 0.9999991349441714, iteration: 248756
loss: 1.0035529136657715,grad_norm: 0.8981831023473964, iteration: 248757
loss: 0.9713847637176514,grad_norm: 0.858594334148467, iteration: 248758
loss: 0.9906459450721741,grad_norm: 0.9750780507979745, iteration: 248759
loss: 0.9968391060829163,grad_norm: 0.7342360460580976, iteration: 248760
loss: 0.9794725179672241,grad_norm: 0.8509539950392486, iteration: 248761
loss: 0.9740323424339294,grad_norm: 0.9344777352559922, iteration: 248762
loss: 1.0035573244094849,grad_norm: 0.8706460683473818, iteration: 248763
loss: 0.993894100189209,grad_norm: 0.9999991212764541, iteration: 248764
loss: 0.9998102188110352,grad_norm: 0.7947756053745264, iteration: 248765
loss: 1.005594253540039,grad_norm: 0.83770980369567, iteration: 248766
loss: 1.0363940000534058,grad_norm: 0.9987807184778904, iteration: 248767
loss: 1.0023325681686401,grad_norm: 0.7816342557219712, iteration: 248768
loss: 1.0052576065063477,grad_norm: 0.9624642672526406, iteration: 248769
loss: 0.9772736430168152,grad_norm: 0.9999991131277499, iteration: 248770
loss: 1.0025601387023926,grad_norm: 0.8271404266962433, iteration: 248771
loss: 0.9642278552055359,grad_norm: 0.8906869998918425, iteration: 248772
loss: 0.9457428455352783,grad_norm: 0.8240794505084872, iteration: 248773
loss: 1.0072963237762451,grad_norm: 0.8799455329677573, iteration: 248774
loss: 0.9660017490386963,grad_norm: 0.8934280382507244, iteration: 248775
loss: 0.9956462979316711,grad_norm: 0.9007985702530039, iteration: 248776
loss: 1.0451933145523071,grad_norm: 0.999999024632445, iteration: 248777
loss: 0.9994912147521973,grad_norm: 0.7746758807761832, iteration: 248778
loss: 1.0013322830200195,grad_norm: 0.9125546152626169, iteration: 248779
loss: 1.001567006111145,grad_norm: 0.9149720597962128, iteration: 248780
loss: 1.002676248550415,grad_norm: 0.8449212327469086, iteration: 248781
loss: 1.0120400190353394,grad_norm: 0.9870934913212982, iteration: 248782
loss: 0.9787763357162476,grad_norm: 0.8586083715294781, iteration: 248783
loss: 0.9437764286994934,grad_norm: 0.8301916209600695, iteration: 248784
loss: 0.9893917441368103,grad_norm: 0.8276620641705588, iteration: 248785
loss: 0.9746419787406921,grad_norm: 0.9999990049432469, iteration: 248786
loss: 0.9660680890083313,grad_norm: 0.9193662861682892, iteration: 248787
loss: 0.9743970036506653,grad_norm: 0.9522975913420078, iteration: 248788
loss: 1.007186770439148,grad_norm: 0.8628660209387922, iteration: 248789
loss: 1.0422391891479492,grad_norm: 0.9999993524171226, iteration: 248790
loss: 1.079783320426941,grad_norm: 0.9999993515485331, iteration: 248791
loss: 0.995309054851532,grad_norm: 0.9999990494167397, iteration: 248792
loss: 1.0093798637390137,grad_norm: 0.9975196316928353, iteration: 248793
loss: 0.9915100336074829,grad_norm: 0.9912189125314537, iteration: 248794
loss: 0.9652718901634216,grad_norm: 0.8077271998204121, iteration: 248795
loss: 1.0337599515914917,grad_norm: 0.9999991807108648, iteration: 248796
loss: 0.9682050347328186,grad_norm: 0.6689395096991486, iteration: 248797
loss: 0.9956618547439575,grad_norm: 0.7865817183300237, iteration: 248798
loss: 0.9755508303642273,grad_norm: 0.8260733942094738, iteration: 248799
loss: 1.0052965879440308,grad_norm: 0.9049344889011253, iteration: 248800
loss: 0.9467690587043762,grad_norm: 0.9999992860554818, iteration: 248801
loss: 0.9953387379646301,grad_norm: 0.8334202483661636, iteration: 248802
loss: 1.024346113204956,grad_norm: 0.7986104424111303, iteration: 248803
loss: 0.9593806266784668,grad_norm: 0.8980252055266843, iteration: 248804
loss: 0.9892246127128601,grad_norm: 0.8604296826296626, iteration: 248805
loss: 1.0142799615859985,grad_norm: 0.9999992632157397, iteration: 248806
loss: 0.978444516658783,grad_norm: 0.8544876549989162, iteration: 248807
loss: 0.9824122786521912,grad_norm: 0.8806619370442281, iteration: 248808
loss: 0.9738718867301941,grad_norm: 0.9069364176268291, iteration: 248809
loss: 1.022566795349121,grad_norm: 0.8375848074828546, iteration: 248810
loss: 1.0006670951843262,grad_norm: 0.9241911011029893, iteration: 248811
loss: 0.9998765587806702,grad_norm: 0.8937215518346449, iteration: 248812
loss: 1.0022773742675781,grad_norm: 0.9136652437454369, iteration: 248813
loss: 0.9709486961364746,grad_norm: 0.9346392930488596, iteration: 248814
loss: 0.9807049036026001,grad_norm: 0.9999991523842839, iteration: 248815
loss: 1.018799066543579,grad_norm: 0.9999989916305183, iteration: 248816
loss: 1.0108187198638916,grad_norm: 0.9999991507575157, iteration: 248817
loss: 1.024797797203064,grad_norm: 0.9999997535440378, iteration: 248818
loss: 1.0032449960708618,grad_norm: 0.8670173452716058, iteration: 248819
loss: 0.9872638583183289,grad_norm: 0.9999990654250214, iteration: 248820
loss: 1.0138015747070312,grad_norm: 0.8440923436513778, iteration: 248821
loss: 1.0104570388793945,grad_norm: 0.9446597602683843, iteration: 248822
loss: 1.0148918628692627,grad_norm: 0.8380230844582282, iteration: 248823
loss: 1.0370501279830933,grad_norm: 0.9202831480196624, iteration: 248824
loss: 1.0236835479736328,grad_norm: 0.9210060675495623, iteration: 248825
loss: 1.0134304761886597,grad_norm: 0.7858573917490567, iteration: 248826
loss: 1.014074444770813,grad_norm: 0.9858147698527973, iteration: 248827
loss: 0.998319149017334,grad_norm: 0.7485657180461226, iteration: 248828
loss: 0.9773166179656982,grad_norm: 0.8265111706325987, iteration: 248829
loss: 0.9960078001022339,grad_norm: 0.946056083392678, iteration: 248830
loss: 0.9761775135993958,grad_norm: 0.8968723897243469, iteration: 248831
loss: 0.9324219822883606,grad_norm: 0.9953987450492027, iteration: 248832
loss: 0.9839128851890564,grad_norm: 0.8442741506662262, iteration: 248833
loss: 0.9857167601585388,grad_norm: 0.7707912849769503, iteration: 248834
loss: 1.0036982297897339,grad_norm: 0.9999991611072202, iteration: 248835
loss: 0.9650623202323914,grad_norm: 0.9541512410276919, iteration: 248836
loss: 0.9987297058105469,grad_norm: 0.8735240791814466, iteration: 248837
loss: 0.946753203868866,grad_norm: 0.964022275963879, iteration: 248838
loss: 0.9792039394378662,grad_norm: 0.9999991471939649, iteration: 248839
loss: 0.9528768062591553,grad_norm: 0.9999991491468828, iteration: 248840
loss: 1.0142005681991577,grad_norm: 0.9359205940168737, iteration: 248841
loss: 1.03034508228302,grad_norm: 0.9303150324460643, iteration: 248842
loss: 0.954468846321106,grad_norm: 0.8462243625099212, iteration: 248843
loss: 1.0022331476211548,grad_norm: 0.9999989889119258, iteration: 248844
loss: 0.9970624446868896,grad_norm: 0.8806407708201331, iteration: 248845
loss: 0.9353641271591187,grad_norm: 0.9678024086573549, iteration: 248846
loss: 0.9822641611099243,grad_norm: 0.867314778473382, iteration: 248847
loss: 0.9996689558029175,grad_norm: 0.9266018632638986, iteration: 248848
loss: 1.0333985090255737,grad_norm: 0.9710215878617733, iteration: 248849
loss: 1.0589672327041626,grad_norm: 0.9999996627934571, iteration: 248850
loss: 0.9949379563331604,grad_norm: 0.9809655047809951, iteration: 248851
loss: 1.0362508296966553,grad_norm: 0.999999023304989, iteration: 248852
loss: 1.0126612186431885,grad_norm: 0.8211994416794821, iteration: 248853
loss: 0.9850779175758362,grad_norm: 0.9248362336953379, iteration: 248854
loss: 0.9823117852210999,grad_norm: 0.9282974770044381, iteration: 248855
loss: 0.9611514806747437,grad_norm: 0.9999990803043824, iteration: 248856
loss: 1.0068755149841309,grad_norm: 0.8634374315366483, iteration: 248857
loss: 0.9739861488342285,grad_norm: 0.6950168289694657, iteration: 248858
loss: 0.9751930832862854,grad_norm: 0.8539925217290807, iteration: 248859
loss: 1.0279229879379272,grad_norm: 0.9923209970484339, iteration: 248860
loss: 0.9939284324645996,grad_norm: 0.9999990265468808, iteration: 248861
loss: 0.969749391078949,grad_norm: 0.9999990366761117, iteration: 248862
loss: 0.9955127239227295,grad_norm: 0.8568843671782032, iteration: 248863
loss: 1.0354728698730469,grad_norm: 0.9999990581211324, iteration: 248864
loss: 1.1364033222198486,grad_norm: 1.0000000224599803, iteration: 248865
loss: 1.0680028200149536,grad_norm: 0.9999999593910374, iteration: 248866
loss: 1.016029953956604,grad_norm: 0.8415786302293579, iteration: 248867
loss: 1.0119622945785522,grad_norm: 0.9106181555727231, iteration: 248868
loss: 0.9859489798545837,grad_norm: 0.8740821214625273, iteration: 248869
loss: 1.0051639080047607,grad_norm: 0.9305880200627404, iteration: 248870
loss: 1.0265427827835083,grad_norm: 0.9496660271531272, iteration: 248871
loss: 1.0144107341766357,grad_norm: 0.9570808933204878, iteration: 248872
loss: 0.9600086808204651,grad_norm: 0.9042013910923857, iteration: 248873
loss: 1.0304458141326904,grad_norm: 0.8675028368861774, iteration: 248874
loss: 1.00288724899292,grad_norm: 0.8643732301407766, iteration: 248875
loss: 1.0179839134216309,grad_norm: 0.9819854383538419, iteration: 248876
loss: 0.9903655052185059,grad_norm: 0.9320874773324366, iteration: 248877
loss: 0.9826314449310303,grad_norm: 0.8036834735842645, iteration: 248878
loss: 1.007432222366333,grad_norm: 0.7619847642140474, iteration: 248879
loss: 1.0271028280258179,grad_norm: 0.9999990385643839, iteration: 248880
loss: 0.996099591255188,grad_norm: 0.8541531950941148, iteration: 248881
loss: 0.9791118502616882,grad_norm: 0.8036003536525985, iteration: 248882
loss: 1.046103835105896,grad_norm: 0.9999992373987593, iteration: 248883
loss: 1.017215371131897,grad_norm: 0.765219923618393, iteration: 248884
loss: 0.9705747365951538,grad_norm: 0.8934916759172564, iteration: 248885
loss: 0.9700284600257874,grad_norm: 0.9999990470884252, iteration: 248886
loss: 0.9656955003738403,grad_norm: 0.8944219333000347, iteration: 248887
loss: 0.9724408984184265,grad_norm: 0.9999990733103649, iteration: 248888
loss: 1.0050411224365234,grad_norm: 0.8835005901197835, iteration: 248889
loss: 0.974853515625,grad_norm: 0.9999995239685745, iteration: 248890
loss: 1.0244196653366089,grad_norm: 0.8827662872847966, iteration: 248891
loss: 1.0107150077819824,grad_norm: 0.9999992732481684, iteration: 248892
loss: 0.9887032508850098,grad_norm: 0.9702045362198537, iteration: 248893
loss: 0.9929686784744263,grad_norm: 0.999998941173107, iteration: 248894
loss: 1.0470086336135864,grad_norm: 0.9999998467463749, iteration: 248895
loss: 1.000836730003357,grad_norm: 0.8331658172638092, iteration: 248896
loss: 1.0074788331985474,grad_norm: 0.7811536762955821, iteration: 248897
loss: 1.009282112121582,grad_norm: 0.9110670399216406, iteration: 248898
loss: 1.0368001461029053,grad_norm: 0.8733202072912478, iteration: 248899
loss: 1.005910038948059,grad_norm: 0.958667487958374, iteration: 248900
loss: 1.0926754474639893,grad_norm: 0.9776116244480085, iteration: 248901
loss: 0.968849241733551,grad_norm: 0.8358102042759613, iteration: 248902
loss: 1.0161346197128296,grad_norm: 0.9999993301343367, iteration: 248903
loss: 1.1361836194992065,grad_norm: 0.9079674979961287, iteration: 248904
loss: 0.9982350468635559,grad_norm: 0.7499491447352551, iteration: 248905
loss: 1.115661382675171,grad_norm: 0.8985378926700648, iteration: 248906
loss: 0.9919158220291138,grad_norm: 0.8806607131563892, iteration: 248907
loss: 1.1066511869430542,grad_norm: 0.9999990467511692, iteration: 248908
loss: 1.1779654026031494,grad_norm: 0.9991039007651428, iteration: 248909
loss: 1.1445037126541138,grad_norm: 0.9999997913497357, iteration: 248910
loss: 1.1440318822860718,grad_norm: 0.999999162067296, iteration: 248911
loss: 0.992564857006073,grad_norm: 0.9999995530302609, iteration: 248912
loss: 1.0237157344818115,grad_norm: 0.9941177225417609, iteration: 248913
loss: 1.0690029859542847,grad_norm: 0.9999994534111468, iteration: 248914
loss: 0.9555273056030273,grad_norm: 0.895504880514813, iteration: 248915
loss: 1.053477168083191,grad_norm: 0.9999994143916319, iteration: 248916
loss: 1.1236799955368042,grad_norm: 0.9999999735808531, iteration: 248917
loss: 1.20359206199646,grad_norm: 1.0000000241814677, iteration: 248918
loss: 1.3318573236465454,grad_norm: 0.9999993799924015, iteration: 248919
loss: 1.1790789365768433,grad_norm: 0.9999993102506348, iteration: 248920
loss: 1.647013545036316,grad_norm: 0.9999997542287676, iteration: 248921
loss: 1.344596028327942,grad_norm: 0.9999996344601577, iteration: 248922
loss: 1.4950356483459473,grad_norm: 0.9999998549672553, iteration: 248923
loss: 1.53752863407135,grad_norm: 0.9999998366747984, iteration: 248924
loss: 1.2796295881271362,grad_norm: 0.9999998158799184, iteration: 248925
loss: 1.3056551218032837,grad_norm: 0.9999994914578964, iteration: 248926
loss: 1.329054355621338,grad_norm: 0.9999998453864115, iteration: 248927
loss: 1.443872332572937,grad_norm: 0.9999998437441342, iteration: 248928
loss: 1.4612329006195068,grad_norm: 0.9999995537407973, iteration: 248929
loss: 1.290422797203064,grad_norm: 0.9999996790561959, iteration: 248930
loss: 1.3383708000183105,grad_norm: 0.9999999348742783, iteration: 248931
loss: 1.4567204713821411,grad_norm: 0.9999999131254993, iteration: 248932
loss: 1.4708231687545776,grad_norm: 0.9999996757002384, iteration: 248933
loss: 1.519321084022522,grad_norm: 0.9999998966764398, iteration: 248934
loss: 1.281601905822754,grad_norm: 0.9999994890703526, iteration: 248935
loss: 1.5037933588027954,grad_norm: 0.9999998519072045, iteration: 248936
loss: 1.294170618057251,grad_norm: 0.9999996599078248, iteration: 248937
loss: 1.3995500802993774,grad_norm: 0.99999976653928, iteration: 248938
loss: 1.3746795654296875,grad_norm: 0.9999998100232995, iteration: 248939
loss: 1.372618317604065,grad_norm: 0.9999994197482789, iteration: 248940
loss: 1.4162615537643433,grad_norm: 0.9999998590570692, iteration: 248941
loss: 1.360127329826355,grad_norm: 1.0000000569001468, iteration: 248942
loss: 1.269258975982666,grad_norm: 0.9999999007113866, iteration: 248943
loss: 1.2943986654281616,grad_norm: 0.9999998289855366, iteration: 248944
loss: 1.758692741394043,grad_norm: 0.9999998679023185, iteration: 248945
loss: 1.3293793201446533,grad_norm: 0.9999999072331164, iteration: 248946
loss: 1.2813001871109009,grad_norm: 0.9999997208637091, iteration: 248947
loss: 1.2786273956298828,grad_norm: 0.9999997974023987, iteration: 248948
loss: 1.2442864179611206,grad_norm: 0.9999991644379829, iteration: 248949
loss: 1.2470886707305908,grad_norm: 0.9999994704822964, iteration: 248950
loss: 1.0921393632888794,grad_norm: 0.9999993689542659, iteration: 248951
loss: 1.2041575908660889,grad_norm: 0.9999995280158923, iteration: 248952
loss: 1.247050404548645,grad_norm: 0.999999330973259, iteration: 248953
loss: 1.3065413236618042,grad_norm: 0.9999999138736085, iteration: 248954
loss: 1.255262851715088,grad_norm: 0.999999529705155, iteration: 248955
loss: 1.2836418151855469,grad_norm: 0.9999998147247062, iteration: 248956
loss: 1.161344289779663,grad_norm: 0.999999901346339, iteration: 248957
loss: 1.118658185005188,grad_norm: 0.9999996016573762, iteration: 248958
loss: 1.1263132095336914,grad_norm: 0.9999997213148168, iteration: 248959
loss: 1.082115888595581,grad_norm: 0.9999996150753083, iteration: 248960
loss: 1.1231379508972168,grad_norm: 0.9999992758816435, iteration: 248961
loss: 1.1191457509994507,grad_norm: 0.9999991424266716, iteration: 248962
loss: 1.1252018213272095,grad_norm: 0.9925564630105084, iteration: 248963
loss: 1.0420784950256348,grad_norm: 0.9999990755240272, iteration: 248964
loss: 1.0601933002471924,grad_norm: 0.9999993479133783, iteration: 248965
loss: 0.9858120083808899,grad_norm: 0.9999991227392149, iteration: 248966
loss: 1.0728987455368042,grad_norm: 0.9999996243750536, iteration: 248967
loss: 1.0952802896499634,grad_norm: 0.9999994353981376, iteration: 248968
loss: 1.016947627067566,grad_norm: 0.9168558501242423, iteration: 248969
loss: 1.0449515581130981,grad_norm: 0.9999990224988929, iteration: 248970
loss: 0.9919857382774353,grad_norm: 0.9755492583925324, iteration: 248971
loss: 1.0198384523391724,grad_norm: 0.9999991630504892, iteration: 248972
loss: 0.9963206648826599,grad_norm: 0.8832839595818293, iteration: 248973
loss: 1.0304945707321167,grad_norm: 0.9999989264984083, iteration: 248974
loss: 1.0734889507293701,grad_norm: 0.9999999129198656, iteration: 248975
loss: 1.0617724657058716,grad_norm: 0.7543236544323385, iteration: 248976
loss: 1.0070375204086304,grad_norm: 0.7542804007860365, iteration: 248977
loss: 0.9792503118515015,grad_norm: 0.969049647761482, iteration: 248978
loss: 1.032400131225586,grad_norm: 0.9999991114151512, iteration: 248979
loss: 0.9766601324081421,grad_norm: 0.8694549774044932, iteration: 248980
loss: 1.008764386177063,grad_norm: 0.8788984127697489, iteration: 248981
loss: 1.0796995162963867,grad_norm: 0.9999999003930277, iteration: 248982
loss: 0.9989797472953796,grad_norm: 0.9522068225606404, iteration: 248983
loss: 1.0163625478744507,grad_norm: 0.9999992182370674, iteration: 248984
loss: 1.0097436904907227,grad_norm: 0.9011651190725097, iteration: 248985
loss: 1.0040159225463867,grad_norm: 0.9999990457330563, iteration: 248986
loss: 1.0131601095199585,grad_norm: 0.9863328447533137, iteration: 248987
loss: 1.019892930984497,grad_norm: 0.9799772654878637, iteration: 248988
loss: 0.9799429774284363,grad_norm: 0.9999991774979864, iteration: 248989
loss: 1.0243182182312012,grad_norm: 0.9656202484690906, iteration: 248990
loss: 1.0594372749328613,grad_norm: 0.9999995241759692, iteration: 248991
loss: 0.9914290904998779,grad_norm: 0.8625566900378595, iteration: 248992
loss: 1.0554057359695435,grad_norm: 0.8795567622992535, iteration: 248993
loss: 1.073904037475586,grad_norm: 0.999999066093343, iteration: 248994
loss: 1.025381088256836,grad_norm: 0.9822536657584582, iteration: 248995
loss: 1.0058573484420776,grad_norm: 0.9999991346061653, iteration: 248996
loss: 1.017434000968933,grad_norm: 0.8109018060844937, iteration: 248997
loss: 1.0045362710952759,grad_norm: 0.9999992665099425, iteration: 248998
loss: 1.0067192316055298,grad_norm: 0.8321902665582953, iteration: 248999
loss: 0.9660977125167847,grad_norm: 0.9449292744137796, iteration: 249000
loss: 0.9695019125938416,grad_norm: 0.8251306924329244, iteration: 249001
loss: 1.0097379684448242,grad_norm: 0.9999990761521977, iteration: 249002
loss: 1.0252045392990112,grad_norm: 0.8494277184112313, iteration: 249003
loss: 1.0160558223724365,grad_norm: 0.8178701002331136, iteration: 249004
loss: 0.9521524310112,grad_norm: 0.8793877835305488, iteration: 249005
loss: 1.0035454034805298,grad_norm: 0.8606056589283936, iteration: 249006
loss: 1.0500916242599487,grad_norm: 0.9999999346314219, iteration: 249007
loss: 1.0002905130386353,grad_norm: 0.999998961681651, iteration: 249008
loss: 1.0081002712249756,grad_norm: 0.99999902524872, iteration: 249009
loss: 1.021722435951233,grad_norm: 0.9343575277446329, iteration: 249010
loss: 1.0606474876403809,grad_norm: 0.999999133724467, iteration: 249011
loss: 1.0501781702041626,grad_norm: 0.999999029039484, iteration: 249012
loss: 1.0171500444412231,grad_norm: 0.9399730675266771, iteration: 249013
loss: 1.008123755455017,grad_norm: 0.8652437413422273, iteration: 249014
loss: 1.0178465843200684,grad_norm: 0.9235266666831052, iteration: 249015
loss: 1.0190757513046265,grad_norm: 0.9488776986348892, iteration: 249016
loss: 1.0060728788375854,grad_norm: 0.9999992150835652, iteration: 249017
loss: 1.0222071409225464,grad_norm: 0.9999992135687833, iteration: 249018
loss: 1.04466712474823,grad_norm: 0.9267605314036411, iteration: 249019
loss: 1.0141383409500122,grad_norm: 0.7568674343450394, iteration: 249020
loss: 0.9973117113113403,grad_norm: 0.8918394006650011, iteration: 249021
loss: 1.005522608757019,grad_norm: 0.9999992826815493, iteration: 249022
loss: 1.007648229598999,grad_norm: 0.8974686726105715, iteration: 249023
loss: 0.9950101971626282,grad_norm: 0.9999991630178625, iteration: 249024
loss: 1.0134286880493164,grad_norm: 0.8448684105777431, iteration: 249025
loss: 1.0100916624069214,grad_norm: 0.9514906623264988, iteration: 249026
loss: 1.015956997871399,grad_norm: 0.906988180796627, iteration: 249027
loss: 1.005910873413086,grad_norm: 0.9690543236816114, iteration: 249028
loss: 0.9814855456352234,grad_norm: 0.9219180363683107, iteration: 249029
loss: 0.994727611541748,grad_norm: 0.9786992672611258, iteration: 249030
loss: 0.9753543138504028,grad_norm: 0.9718568051424373, iteration: 249031
loss: 0.9904863238334656,grad_norm: 0.9999991882545777, iteration: 249032
loss: 0.9926261305809021,grad_norm: 0.9980090189426389, iteration: 249033
loss: 1.0317025184631348,grad_norm: 0.9999990992622328, iteration: 249034
loss: 0.9821578860282898,grad_norm: 0.9999990838808892, iteration: 249035
loss: 1.0473216772079468,grad_norm: 0.8840228250138024, iteration: 249036
loss: 0.9977940320968628,grad_norm: 0.9444656336704277, iteration: 249037
loss: 1.015042781829834,grad_norm: 0.8350840906149241, iteration: 249038
loss: 1.02701735496521,grad_norm: 0.8772431587790455, iteration: 249039
loss: 1.027778148651123,grad_norm: 0.999999625304256, iteration: 249040
loss: 1.0063965320587158,grad_norm: 0.999999043445966, iteration: 249041
loss: 1.0063884258270264,grad_norm: 0.8627806738303138, iteration: 249042
loss: 1.0292857885360718,grad_norm: 0.9999991972528725, iteration: 249043
loss: 0.9961773157119751,grad_norm: 0.8218924781093393, iteration: 249044
loss: 1.010930061340332,grad_norm: 0.9999998383024742, iteration: 249045
loss: 1.0006757974624634,grad_norm: 0.9251758224207599, iteration: 249046
loss: 0.9812054634094238,grad_norm: 0.9535733390190057, iteration: 249047
loss: 1.0154974460601807,grad_norm: 0.9952739701223434, iteration: 249048
loss: 1.0408391952514648,grad_norm: 0.9999995919384642, iteration: 249049
loss: 0.9662808179855347,grad_norm: 0.9999991309767734, iteration: 249050
loss: 1.014158010482788,grad_norm: 0.8227869544371957, iteration: 249051
loss: 1.0672627687454224,grad_norm: 0.9473534199291493, iteration: 249052
loss: 1.0656259059906006,grad_norm: 0.9999991201830136, iteration: 249053
loss: 1.0267841815948486,grad_norm: 0.9999996294166065, iteration: 249054
loss: 1.003507137298584,grad_norm: 0.9999989622841952, iteration: 249055
loss: 1.0260741710662842,grad_norm: 0.9523911485053209, iteration: 249056
loss: 1.0109727382659912,grad_norm: 0.9726352976143738, iteration: 249057
loss: 1.0588626861572266,grad_norm: 0.9999997592457457, iteration: 249058
loss: 0.9540600776672363,grad_norm: 0.9478750295504167, iteration: 249059
loss: 1.0106725692749023,grad_norm: 0.9999992484610389, iteration: 249060
loss: 0.9614489078521729,grad_norm: 0.9724089671401464, iteration: 249061
loss: 1.0443018674850464,grad_norm: 0.9999992038249016, iteration: 249062
loss: 0.9942644834518433,grad_norm: 0.8369390742434224, iteration: 249063
loss: 0.9906800389289856,grad_norm: 0.9999990349129113, iteration: 249064
loss: 1.0371025800704956,grad_norm: 0.9999992362829376, iteration: 249065
loss: 0.9981272220611572,grad_norm: 0.9999989652599697, iteration: 249066
loss: 0.9682877659797668,grad_norm: 0.9513097494704876, iteration: 249067
loss: 0.9991328120231628,grad_norm: 0.8434889337118097, iteration: 249068
loss: 1.0075469017028809,grad_norm: 0.99999973286566, iteration: 249069
loss: 0.9833297729492188,grad_norm: 0.9947681500538027, iteration: 249070
loss: 0.996293842792511,grad_norm: 0.7756538915047704, iteration: 249071
loss: 0.9998172521591187,grad_norm: 0.9999998765555322, iteration: 249072
loss: 1.0180877447128296,grad_norm: 0.8999338414398534, iteration: 249073
loss: 0.9824348092079163,grad_norm: 0.9199701861761651, iteration: 249074
loss: 1.0049853324890137,grad_norm: 0.9291717803165745, iteration: 249075
loss: 0.988281786441803,grad_norm: 0.8977092521416308, iteration: 249076
loss: 1.014486312866211,grad_norm: 0.9659761124439336, iteration: 249077
loss: 1.0151044130325317,grad_norm: 0.8533657392606308, iteration: 249078
loss: 0.9766489267349243,grad_norm: 0.6977525940041345, iteration: 249079
loss: 1.0304832458496094,grad_norm: 0.9999989597225054, iteration: 249080
loss: 0.9910417795181274,grad_norm: 0.8241231682051107, iteration: 249081
loss: 1.02901291847229,grad_norm: 0.8517720431352929, iteration: 249082
loss: 1.0183218717575073,grad_norm: 0.9752603030799045, iteration: 249083
loss: 1.090775966644287,grad_norm: 0.9999994720407135, iteration: 249084
loss: 1.0123223066329956,grad_norm: 0.9494640552604562, iteration: 249085
loss: 0.9825486540794373,grad_norm: 0.9999990375551039, iteration: 249086
loss: 1.009537935256958,grad_norm: 0.9207115987978203, iteration: 249087
loss: 1.0132477283477783,grad_norm: 0.8308247417601259, iteration: 249088
loss: 0.9574605822563171,grad_norm: 0.8633554266891451, iteration: 249089
loss: 0.9518004059791565,grad_norm: 0.9999990080963196, iteration: 249090
loss: 1.0125007629394531,grad_norm: 0.9395972000548725, iteration: 249091
loss: 1.0151830911636353,grad_norm: 0.9999990530672208, iteration: 249092
loss: 1.037689447402954,grad_norm: 0.8615433719926412, iteration: 249093
loss: 1.0812166929244995,grad_norm: 0.94552138189109, iteration: 249094
loss: 1.0097544193267822,grad_norm: 0.9950204319898093, iteration: 249095
loss: 0.9828725457191467,grad_norm: 0.8344491185819398, iteration: 249096
loss: 0.9711276292800903,grad_norm: 0.9999995735743612, iteration: 249097
loss: 1.021277666091919,grad_norm: 0.9999992644639945, iteration: 249098
loss: 1.0066626071929932,grad_norm: 0.8970052831997867, iteration: 249099
loss: 0.99444979429245,grad_norm: 0.8710810794457698, iteration: 249100
loss: 0.9835678935050964,grad_norm: 0.8008408656660572, iteration: 249101
loss: 1.0228651762008667,grad_norm: 0.9769675562505513, iteration: 249102
loss: 0.9721622467041016,grad_norm: 0.8576540786474963, iteration: 249103
loss: 1.0248756408691406,grad_norm: 0.8419016439910905, iteration: 249104
loss: 0.9880812168121338,grad_norm: 0.9378132106669965, iteration: 249105
loss: 1.0147312879562378,grad_norm: 0.8016967511486518, iteration: 249106
loss: 0.977654218673706,grad_norm: 0.9065124151776562, iteration: 249107
loss: 1.0135844945907593,grad_norm: 0.9711192370867009, iteration: 249108
loss: 1.040406584739685,grad_norm: 0.9999994215083384, iteration: 249109
loss: 1.0178585052490234,grad_norm: 0.9480398251962737, iteration: 249110
loss: 0.9657776355743408,grad_norm: 0.9670305762175867, iteration: 249111
loss: 1.0300096273422241,grad_norm: 0.7884143779313179, iteration: 249112
loss: 1.0760611295700073,grad_norm: 0.8742290709093172, iteration: 249113
loss: 0.9605119228363037,grad_norm: 0.8642464479016764, iteration: 249114
loss: 1.2133501768112183,grad_norm: 0.9999992053707909, iteration: 249115
loss: 0.9714458584785461,grad_norm: 0.9866518397146512, iteration: 249116
loss: 1.046743631362915,grad_norm: 0.98704450908274, iteration: 249117
loss: 1.0639433860778809,grad_norm: 0.9999993682313564, iteration: 249118
loss: 1.0272908210754395,grad_norm: 0.8917953814635373, iteration: 249119
loss: 1.0613452196121216,grad_norm: 0.9999999749702954, iteration: 249120
loss: 1.044579267501831,grad_norm: 0.9999994427450741, iteration: 249121
loss: 1.0318742990493774,grad_norm: 0.9999998188407501, iteration: 249122
loss: 0.9929518699645996,grad_norm: 0.9035403514301357, iteration: 249123
loss: 1.0655425786972046,grad_norm: 0.9580460547489069, iteration: 249124
loss: 1.0153160095214844,grad_norm: 0.999999215783827, iteration: 249125
loss: 1.0298781394958496,grad_norm: 0.9999990645327095, iteration: 249126
loss: 1.0466485023498535,grad_norm: 0.9616034838538529, iteration: 249127
loss: 0.9730040431022644,grad_norm: 0.9307165956699912, iteration: 249128
loss: 0.9855515956878662,grad_norm: 0.9999991026340201, iteration: 249129
loss: 0.9962063431739807,grad_norm: 0.9999991727836436, iteration: 249130
loss: 0.9926571249961853,grad_norm: 0.9999990246075067, iteration: 249131
loss: 1.0163973569869995,grad_norm: 0.9999991909260352, iteration: 249132
loss: 0.9926875233650208,grad_norm: 0.8544642639479554, iteration: 249133
loss: 0.9990368485450745,grad_norm: 0.9045147179102736, iteration: 249134
loss: 1.040576457977295,grad_norm: 0.9999991192834989, iteration: 249135
loss: 1.001223087310791,grad_norm: 0.8475908710420206, iteration: 249136
loss: 0.9980955123901367,grad_norm: 0.9518068469152302, iteration: 249137
loss: 1.0360442399978638,grad_norm: 0.9999990641333261, iteration: 249138
loss: 1.0214062929153442,grad_norm: 0.838568303518769, iteration: 249139
loss: 1.0041521787643433,grad_norm: 0.8802951954407755, iteration: 249140
loss: 1.052352786064148,grad_norm: 0.9999991780323113, iteration: 249141
loss: 0.9792869091033936,grad_norm: 0.7793768320735833, iteration: 249142
loss: 1.024774432182312,grad_norm: 0.8938470834041131, iteration: 249143
loss: 0.9954309463500977,grad_norm: 0.9504475189671937, iteration: 249144
loss: 1.0263460874557495,grad_norm: 0.876115150549999, iteration: 249145
loss: 0.9801647067070007,grad_norm: 0.7424121006569216, iteration: 249146
loss: 0.9852939248085022,grad_norm: 0.9447719063981078, iteration: 249147
loss: 0.9585459232330322,grad_norm: 0.99999908163557, iteration: 249148
loss: 1.0138403177261353,grad_norm: 0.9999992475080864, iteration: 249149
loss: 0.9703664779663086,grad_norm: 0.8331536425100408, iteration: 249150
loss: 0.9802203178405762,grad_norm: 0.8130403753560714, iteration: 249151
loss: 0.9848957061767578,grad_norm: 0.9202613091396529, iteration: 249152
loss: 1.006311297416687,grad_norm: 0.8046095126707865, iteration: 249153
loss: 0.9833613634109497,grad_norm: 0.8803449896570344, iteration: 249154
loss: 0.9953977465629578,grad_norm: 0.797151823807748, iteration: 249155
loss: 1.0377521514892578,grad_norm: 0.9999990742666593, iteration: 249156
loss: 0.9864056706428528,grad_norm: 0.9822681602606922, iteration: 249157
loss: 0.9908350706100464,grad_norm: 0.8232201210713818, iteration: 249158
loss: 1.0126664638519287,grad_norm: 0.686694576871482, iteration: 249159
loss: 1.007870078086853,grad_norm: 0.9999994001106708, iteration: 249160
loss: 0.9691508412361145,grad_norm: 0.8874186942300311, iteration: 249161
loss: 1.0194849967956543,grad_norm: 0.8369070492692232, iteration: 249162
loss: 0.9973008036613464,grad_norm: 0.9999991274558406, iteration: 249163
loss: 1.0132945775985718,grad_norm: 0.9544896289211114, iteration: 249164
loss: 0.9780415892601013,grad_norm: 0.8485172233827643, iteration: 249165
loss: 0.9752882719039917,grad_norm: 0.9999992121837551, iteration: 249166
loss: 1.0310879945755005,grad_norm: 0.944608972382884, iteration: 249167
loss: 1.0366244316101074,grad_norm: 0.999999843131168, iteration: 249168
loss: 1.0022141933441162,grad_norm: 0.8646827254625431, iteration: 249169
loss: 1.0210716724395752,grad_norm: 0.7456839392907376, iteration: 249170
loss: 1.0067225694656372,grad_norm: 0.7015418308880136, iteration: 249171
loss: 1.0327157974243164,grad_norm: 0.8688982164010938, iteration: 249172
loss: 0.9850366711616516,grad_norm: 0.821795768329082, iteration: 249173
loss: 1.0738623142242432,grad_norm: 0.9999991995778618, iteration: 249174
loss: 0.9945418238639832,grad_norm: 0.8007871355406969, iteration: 249175
loss: 0.9828111529350281,grad_norm: 0.7873016722351448, iteration: 249176
loss: 1.0292150974273682,grad_norm: 0.999999274146975, iteration: 249177
loss: 0.9883241057395935,grad_norm: 0.9999993488849295, iteration: 249178
loss: 0.9786498546600342,grad_norm: 0.7893659651848577, iteration: 249179
loss: 0.9700179696083069,grad_norm: 0.9747180342127022, iteration: 249180
loss: 0.9729755520820618,grad_norm: 0.9999990449100419, iteration: 249181
loss: 1.0085080862045288,grad_norm: 0.9195580288704628, iteration: 249182
loss: 1.008010745048523,grad_norm: 0.8815407193525546, iteration: 249183
loss: 1.0157451629638672,grad_norm: 0.9628617207848063, iteration: 249184
loss: 0.9902421832084656,grad_norm: 0.999999474882078, iteration: 249185
loss: 0.9982022047042847,grad_norm: 0.7604379401149789, iteration: 249186
loss: 1.0033245086669922,grad_norm: 0.9999996100384235, iteration: 249187
loss: 0.9593080878257751,grad_norm: 0.9999995192465749, iteration: 249188
loss: 1.0460799932479858,grad_norm: 0.9999996185205978, iteration: 249189
loss: 1.0287644863128662,grad_norm: 0.8315260246216948, iteration: 249190
loss: 0.9813879132270813,grad_norm: 0.9283023292248331, iteration: 249191
loss: 1.0077773332595825,grad_norm: 0.9999989224148143, iteration: 249192
loss: 0.9907174110412598,grad_norm: 0.9426949632902055, iteration: 249193
loss: 1.0062289237976074,grad_norm: 0.903729658333116, iteration: 249194
loss: 1.0091496706008911,grad_norm: 0.9999991318196286, iteration: 249195
loss: 1.0060166120529175,grad_norm: 0.9999991228174996, iteration: 249196
loss: 0.984503984451294,grad_norm: 0.9999991249819975, iteration: 249197
loss: 0.9693053364753723,grad_norm: 0.8660938948561548, iteration: 249198
loss: 1.000666618347168,grad_norm: 0.9653029480291145, iteration: 249199
loss: 0.963290810585022,grad_norm: 0.8529697284680401, iteration: 249200
loss: 1.0079526901245117,grad_norm: 0.8970409875991415, iteration: 249201
loss: 1.0573267936706543,grad_norm: 0.8446328137625287, iteration: 249202
loss: 0.997998058795929,grad_norm: 0.9528694904269652, iteration: 249203
loss: 0.998446524143219,grad_norm: 0.9315268605287983, iteration: 249204
loss: 1.1058332920074463,grad_norm: 0.9999995430286983, iteration: 249205
loss: 1.005075454711914,grad_norm: 0.7564544755101661, iteration: 249206
loss: 0.9872362613677979,grad_norm: 0.943319385567239, iteration: 249207
loss: 0.9720969796180725,grad_norm: 0.7012736662764066, iteration: 249208
loss: 1.0089919567108154,grad_norm: 0.9157112256440167, iteration: 249209
loss: 1.0170109272003174,grad_norm: 0.9999991335841082, iteration: 249210
loss: 1.017071008682251,grad_norm: 0.9999991388913324, iteration: 249211
loss: 0.9939072132110596,grad_norm: 0.8439245814697172, iteration: 249212
loss: 0.9969756603240967,grad_norm: 0.8306362666680734, iteration: 249213
loss: 0.9846481084823608,grad_norm: 0.9137381716748091, iteration: 249214
loss: 0.9954757690429688,grad_norm: 0.9840929000889276, iteration: 249215
loss: 0.9931437969207764,grad_norm: 0.9238865160793115, iteration: 249216
loss: 0.9840688109397888,grad_norm: 0.8128048315015035, iteration: 249217
loss: 0.9898483753204346,grad_norm: 0.999999443865938, iteration: 249218
loss: 1.0107369422912598,grad_norm: 0.8642816200153584, iteration: 249219
loss: 1.0219616889953613,grad_norm: 0.8980480255296467, iteration: 249220
loss: 0.9919825196266174,grad_norm: 0.8320352256822946, iteration: 249221
loss: 1.0637812614440918,grad_norm: 0.8939107939697785, iteration: 249222
loss: 0.9876189827919006,grad_norm: 0.9198039268028858, iteration: 249223
loss: 1.006841778755188,grad_norm: 0.9129487703500102, iteration: 249224
loss: 1.0210806131362915,grad_norm: 0.9999991400211924, iteration: 249225
loss: 1.0058166980743408,grad_norm: 0.9999996772987626, iteration: 249226
loss: 1.0244545936584473,grad_norm: 0.9999990634739822, iteration: 249227
loss: 0.9979193210601807,grad_norm: 0.9999990319906517, iteration: 249228
loss: 1.023152470588684,grad_norm: 0.9999993339098202, iteration: 249229
loss: 0.9696450233459473,grad_norm: 0.8986595867246283, iteration: 249230
loss: 0.9783698320388794,grad_norm: 0.8304513470375399, iteration: 249231
loss: 1.0091900825500488,grad_norm: 0.9470016515027935, iteration: 249232
loss: 0.991119384765625,grad_norm: 0.9543801680540109, iteration: 249233
loss: 0.9701517820358276,grad_norm: 0.9999991859195088, iteration: 249234
loss: 1.0027250051498413,grad_norm: 0.7034613099226258, iteration: 249235
loss: 0.9836674332618713,grad_norm: 0.9176008977078036, iteration: 249236
loss: 1.0018253326416016,grad_norm: 0.8366841219908188, iteration: 249237
loss: 1.0112401247024536,grad_norm: 0.9307098777461901, iteration: 249238
loss: 1.0052133798599243,grad_norm: 0.999999648815259, iteration: 249239
loss: 1.021308422088623,grad_norm: 0.870222371351805, iteration: 249240
loss: 0.9999240636825562,grad_norm: 0.9526991596260177, iteration: 249241
loss: 1.005739450454712,grad_norm: 0.8558253168384262, iteration: 249242
loss: 1.0245850086212158,grad_norm: 0.9093150761031827, iteration: 249243
loss: 0.9837185144424438,grad_norm: 0.9161057818647416, iteration: 249244
loss: 1.00766122341156,grad_norm: 0.9999993547169032, iteration: 249245
loss: 0.9575707912445068,grad_norm: 0.9999990958887642, iteration: 249246
loss: 0.9688305258750916,grad_norm: 0.9999992335944217, iteration: 249247
loss: 0.9917064309120178,grad_norm: 0.8176841127070205, iteration: 249248
loss: 1.0229114294052124,grad_norm: 0.8750251634981143, iteration: 249249
loss: 0.9610744714736938,grad_norm: 0.9999992258048793, iteration: 249250
loss: 1.0095829963684082,grad_norm: 0.8242264999864424, iteration: 249251
loss: 0.9861068725585938,grad_norm: 0.9999991476586506, iteration: 249252
loss: 0.9895635843276978,grad_norm: 0.9675801460360107, iteration: 249253
loss: 1.031843900680542,grad_norm: 0.8950348705306317, iteration: 249254
loss: 1.034650206565857,grad_norm: 0.9941783408108857, iteration: 249255
loss: 1.0142812728881836,grad_norm: 0.9589047942450406, iteration: 249256
loss: 0.988839864730835,grad_norm: 0.8950793683487066, iteration: 249257
loss: 1.0418694019317627,grad_norm: 0.9999997294204509, iteration: 249258
loss: 1.0019192695617676,grad_norm: 0.9999991452293941, iteration: 249259
loss: 1.0071295499801636,grad_norm: 0.9999990918930197, iteration: 249260
loss: 1.0075751543045044,grad_norm: 0.9183064713413218, iteration: 249261
loss: 1.0418671369552612,grad_norm: 0.999999435638916, iteration: 249262
loss: 0.9911141395568848,grad_norm: 0.8451949857054564, iteration: 249263
loss: 1.0771297216415405,grad_norm: 0.8611468070361593, iteration: 249264
loss: 0.9923868775367737,grad_norm: 0.9950520876401631, iteration: 249265
loss: 1.013307809829712,grad_norm: 0.9364352285789267, iteration: 249266
loss: 1.0044065713882446,grad_norm: 0.9608046551416942, iteration: 249267
loss: 1.009787917137146,grad_norm: 0.9999998932423009, iteration: 249268
loss: 0.9767952561378479,grad_norm: 0.7345499862309932, iteration: 249269
loss: 1.005199909210205,grad_norm: 0.9109384377836762, iteration: 249270
loss: 0.9863061308860779,grad_norm: 0.9473524463218772, iteration: 249271
loss: 1.0038022994995117,grad_norm: 0.9999999621828156, iteration: 249272
loss: 0.9995754361152649,grad_norm: 0.8162213693906023, iteration: 249273
loss: 0.9609401822090149,grad_norm: 0.9999997119531505, iteration: 249274
loss: 0.9990473389625549,grad_norm: 0.865504828264671, iteration: 249275
loss: 1.0099419355392456,grad_norm: 0.9999996093899324, iteration: 249276
loss: 0.9750635623931885,grad_norm: 0.8978942247513486, iteration: 249277
loss: 0.9747576117515564,grad_norm: 0.9304790510268485, iteration: 249278
loss: 1.0043342113494873,grad_norm: 0.9999999130610573, iteration: 249279
loss: 0.9948224425315857,grad_norm: 0.8502019656563147, iteration: 249280
loss: 0.9791277050971985,grad_norm: 0.749201056054073, iteration: 249281
loss: 1.0390287637710571,grad_norm: 0.9554622687087113, iteration: 249282
loss: 1.0444681644439697,grad_norm: 0.9999991302181167, iteration: 249283
loss: 0.9917402863502502,grad_norm: 0.999999080207751, iteration: 249284
loss: 1.0178935527801514,grad_norm: 0.9653861734269854, iteration: 249285
loss: 0.9754723906517029,grad_norm: 0.9999992019531998, iteration: 249286
loss: 0.9909917712211609,grad_norm: 0.946354805331186, iteration: 249287
loss: 0.9984303116798401,grad_norm: 0.9025130820862705, iteration: 249288
loss: 1.0150494575500488,grad_norm: 0.750017626624969, iteration: 249289
loss: 1.0831820964813232,grad_norm: 0.9999999607915202, iteration: 249290
loss: 0.9806796312332153,grad_norm: 0.8460374953298252, iteration: 249291
loss: 1.0086973905563354,grad_norm: 0.8582183242614062, iteration: 249292
loss: 1.0092805624008179,grad_norm: 0.853867891890869, iteration: 249293
loss: 0.9877476692199707,grad_norm: 0.8663749223095382, iteration: 249294
loss: 1.001144289970398,grad_norm: 0.9448640027520522, iteration: 249295
loss: 1.0305426120758057,grad_norm: 0.9999991693865179, iteration: 249296
loss: 1.0052775144577026,grad_norm: 0.9035117228938948, iteration: 249297
loss: 1.027649164199829,grad_norm: 0.7581944746389555, iteration: 249298
loss: 0.9647507071495056,grad_norm: 0.9355485069802825, iteration: 249299
loss: 0.9938506484031677,grad_norm: 0.9844452131254894, iteration: 249300
loss: 1.0139466524124146,grad_norm: 0.7960917693144429, iteration: 249301
loss: 1.0056445598602295,grad_norm: 0.9211064077384847, iteration: 249302
loss: 0.9787260890007019,grad_norm: 0.7848166584599636, iteration: 249303
loss: 1.011283040046692,grad_norm: 0.9999990028006849, iteration: 249304
loss: 1.0127965211868286,grad_norm: 0.9999991592874251, iteration: 249305
loss: 1.0608543157577515,grad_norm: 0.9999991333723638, iteration: 249306
loss: 0.9938896298408508,grad_norm: 0.9354319578275205, iteration: 249307
loss: 0.9822242259979248,grad_norm: 0.9461228942340565, iteration: 249308
loss: 0.9861719608306885,grad_norm: 0.8929231108431618, iteration: 249309
loss: 0.9734486937522888,grad_norm: 0.8642535961122758, iteration: 249310
loss: 0.9934035539627075,grad_norm: 0.985946927307184, iteration: 249311
loss: 0.9967090487480164,grad_norm: 0.9780223876061144, iteration: 249312
loss: 0.9752131700515747,grad_norm: 0.9421020168806175, iteration: 249313
loss: 0.9616761207580566,grad_norm: 0.965016914656421, iteration: 249314
loss: 0.9889240860939026,grad_norm: 0.943664036047955, iteration: 249315
loss: 0.9793282747268677,grad_norm: 0.9138642735740549, iteration: 249316
loss: 1.0279035568237305,grad_norm: 0.9999991236114267, iteration: 249317
loss: 0.9821497201919556,grad_norm: 0.7234851836288008, iteration: 249318
loss: 0.9829598665237427,grad_norm: 0.8760609418098232, iteration: 249319
loss: 0.9961939454078674,grad_norm: 0.9806655844223262, iteration: 249320
loss: 1.0939135551452637,grad_norm: 0.9999997850711588, iteration: 249321
loss: 0.990175724029541,grad_norm: 0.9999989124854672, iteration: 249322
loss: 0.9700092077255249,grad_norm: 0.8753661394275385, iteration: 249323
loss: 0.9950172901153564,grad_norm: 0.8171038814855861, iteration: 249324
loss: 0.9804267287254333,grad_norm: 0.8322327017556482, iteration: 249325
loss: 1.011794924736023,grad_norm: 0.9842026429915683, iteration: 249326
loss: 0.996986448764801,grad_norm: 0.8591953090619987, iteration: 249327
loss: 1.0063406229019165,grad_norm: 0.7959018448244541, iteration: 249328
loss: 0.9924872517585754,grad_norm: 0.9946597889628074, iteration: 249329
loss: 0.9912346601486206,grad_norm: 0.9999989872785494, iteration: 249330
loss: 0.9778982996940613,grad_norm: 0.9496023655751343, iteration: 249331
loss: 1.018625259399414,grad_norm: 0.999999152930081, iteration: 249332
loss: 1.0440305471420288,grad_norm: 0.9719940008999335, iteration: 249333
loss: 1.0063471794128418,grad_norm: 0.8638976725942115, iteration: 249334
loss: 0.9839605689048767,grad_norm: 0.8378215638712955, iteration: 249335
loss: 1.0264270305633545,grad_norm: 0.8261869067501031, iteration: 249336
loss: 0.9983169436454773,grad_norm: 0.945791492731845, iteration: 249337
loss: 0.9669919013977051,grad_norm: 0.8700290631104737, iteration: 249338
loss: 1.0149389505386353,grad_norm: 0.8471272556642249, iteration: 249339
loss: 0.9860794544219971,grad_norm: 0.9999994814726657, iteration: 249340
loss: 1.039487600326538,grad_norm: 0.8676476287929352, iteration: 249341
loss: 0.9798779487609863,grad_norm: 0.9149335060176148, iteration: 249342
loss: 1.0150662660598755,grad_norm: 0.8475308964555067, iteration: 249343
loss: 1.051936388015747,grad_norm: 0.9452902349222276, iteration: 249344
loss: 1.0058703422546387,grad_norm: 0.9423434025036324, iteration: 249345
loss: 1.0126949548721313,grad_norm: 0.9745218664689971, iteration: 249346
loss: 0.9996328949928284,grad_norm: 0.999999234304363, iteration: 249347
loss: 1.0366547107696533,grad_norm: 0.8461308538379699, iteration: 249348
loss: 0.9765480756759644,grad_norm: 0.7637501571809809, iteration: 249349
loss: 1.0349589586257935,grad_norm: 0.9697090279819962, iteration: 249350
loss: 1.0239360332489014,grad_norm: 0.8661367650088839, iteration: 249351
loss: 0.9879226684570312,grad_norm: 0.788495815490168, iteration: 249352
loss: 0.9889515042304993,grad_norm: 0.9108909970605706, iteration: 249353
loss: 1.0094033479690552,grad_norm: 0.9999992690149785, iteration: 249354
loss: 1.0145092010498047,grad_norm: 0.9257906055231182, iteration: 249355
loss: 0.9786862730979919,grad_norm: 0.8580222740008593, iteration: 249356
loss: 0.9855005741119385,grad_norm: 0.9999989839933722, iteration: 249357
loss: 1.0017778873443604,grad_norm: 0.9999990576647501, iteration: 249358
loss: 1.0031452178955078,grad_norm: 0.898892396219252, iteration: 249359
loss: 1.0179847478866577,grad_norm: 0.7837383119538833, iteration: 249360
loss: 1.0683751106262207,grad_norm: 0.9999990403449039, iteration: 249361
loss: 1.0028150081634521,grad_norm: 0.7555724926059025, iteration: 249362
loss: 0.9928510189056396,grad_norm: 0.9661226596588575, iteration: 249363
loss: 0.9887837171554565,grad_norm: 0.7613754054398608, iteration: 249364
loss: 0.9933280944824219,grad_norm: 0.939325897274913, iteration: 249365
loss: 1.0268363952636719,grad_norm: 0.9015480406046075, iteration: 249366
loss: 0.9627585411071777,grad_norm: 0.9966119177465199, iteration: 249367
loss: 1.0540632009506226,grad_norm: 0.9110755633261599, iteration: 249368
loss: 1.086167573928833,grad_norm: 0.9999995968065939, iteration: 249369
loss: 0.9839674830436707,grad_norm: 0.8066896159660882, iteration: 249370
loss: 1.0308003425598145,grad_norm: 0.8021109773321086, iteration: 249371
loss: 1.017562985420227,grad_norm: 0.9999991996649628, iteration: 249372
loss: 1.008935809135437,grad_norm: 0.9128229325836269, iteration: 249373
loss: 1.0226563215255737,grad_norm: 0.9392445061052822, iteration: 249374
loss: 1.0096583366394043,grad_norm: 0.8495411217032773, iteration: 249375
loss: 1.0193290710449219,grad_norm: 0.8833348480819523, iteration: 249376
loss: 0.9997690320014954,grad_norm: 0.9985725453916404, iteration: 249377
loss: 1.0010639429092407,grad_norm: 0.9999991561763188, iteration: 249378
loss: 0.9667210578918457,grad_norm: 0.9142075122996489, iteration: 249379
loss: 0.9938536286354065,grad_norm: 0.9999991834146122, iteration: 249380
loss: 0.982997477054596,grad_norm: 0.78166534534562, iteration: 249381
loss: 0.9924498796463013,grad_norm: 0.952821886447668, iteration: 249382
loss: 1.0127032995224,grad_norm: 0.966310890331135, iteration: 249383
loss: 0.9944610595703125,grad_norm: 0.9770959675715271, iteration: 249384
loss: 1.0506142377853394,grad_norm: 0.9565460411652591, iteration: 249385
loss: 0.988447904586792,grad_norm: 0.7907034751763375, iteration: 249386
loss: 1.004352331161499,grad_norm: 0.8562385000028261, iteration: 249387
loss: 1.0038673877716064,grad_norm: 0.8517454997803514, iteration: 249388
loss: 1.0190153121948242,grad_norm: 0.7667573944003518, iteration: 249389
loss: 1.0058019161224365,grad_norm: 0.9675244146152471, iteration: 249390
loss: 0.9895535707473755,grad_norm: 0.9999990650864872, iteration: 249391
loss: 0.9923080801963806,grad_norm: 0.9225250496016075, iteration: 249392
loss: 0.9996781945228577,grad_norm: 0.7904104389548795, iteration: 249393
loss: 0.9914554357528687,grad_norm: 0.9489153346458251, iteration: 249394
loss: 1.017670750617981,grad_norm: 0.9921447791408297, iteration: 249395
loss: 1.0212048292160034,grad_norm: 0.9398633554189781, iteration: 249396
loss: 0.9983338713645935,grad_norm: 0.8852125137242072, iteration: 249397
loss: 0.9973889589309692,grad_norm: 0.8421942858246054, iteration: 249398
loss: 1.0021471977233887,grad_norm: 0.7848383932209541, iteration: 249399
loss: 0.9968495965003967,grad_norm: 0.9999992036665741, iteration: 249400
loss: 1.0204355716705322,grad_norm: 0.9999992475914301, iteration: 249401
loss: 0.9819949865341187,grad_norm: 0.960546596325315, iteration: 249402
loss: 0.9739714860916138,grad_norm: 0.8170311215730275, iteration: 249403
loss: 0.9726553559303284,grad_norm: 0.9070314314740585, iteration: 249404
loss: 1.0081008672714233,grad_norm: 0.893016549597225, iteration: 249405
loss: 1.0048938989639282,grad_norm: 0.9999995764242339, iteration: 249406
loss: 1.0137559175491333,grad_norm: 0.823379113233377, iteration: 249407
loss: 1.022067666053772,grad_norm: 0.9999990258347501, iteration: 249408
loss: 0.9825637340545654,grad_norm: 0.8238432592277138, iteration: 249409
loss: 1.0140608549118042,grad_norm: 0.9655378036391139, iteration: 249410
loss: 1.008447289466858,grad_norm: 0.8963249226598032, iteration: 249411
loss: 0.986175000667572,grad_norm: 0.835386040049594, iteration: 249412
loss: 0.9774845838546753,grad_norm: 0.8806160465007256, iteration: 249413
loss: 1.0053539276123047,grad_norm: 0.9378496083580892, iteration: 249414
loss: 1.02003812789917,grad_norm: 0.9623652959283628, iteration: 249415
loss: 1.0441105365753174,grad_norm: 0.9666928800509824, iteration: 249416
loss: 0.9404812455177307,grad_norm: 0.8237250113121065, iteration: 249417
loss: 1.008766531944275,grad_norm: 0.9999993442749568, iteration: 249418
loss: 0.9933634400367737,grad_norm: 0.9999991823583505, iteration: 249419
loss: 0.9931617975234985,grad_norm: 0.9095598735066992, iteration: 249420
loss: 0.9963310360908508,grad_norm: 0.8472512460946954, iteration: 249421
loss: 0.9649282097816467,grad_norm: 0.7489107687056578, iteration: 249422
loss: 0.9922442436218262,grad_norm: 0.9552552635670722, iteration: 249423
loss: 0.9463963508605957,grad_norm: 0.8940361640609998, iteration: 249424
loss: 1.0118435621261597,grad_norm: 0.7523195266068105, iteration: 249425
loss: 0.9663211703300476,grad_norm: 0.8353031989938421, iteration: 249426
loss: 1.0276005268096924,grad_norm: 0.9000483555140714, iteration: 249427
loss: 1.0038012266159058,grad_norm: 0.9788852392698655, iteration: 249428
loss: 1.065137267112732,grad_norm: 0.9999999396868934, iteration: 249429
loss: 1.0263234376907349,grad_norm: 0.7786027521787265, iteration: 249430
loss: 1.0394747257232666,grad_norm: 0.9999994949348421, iteration: 249431
loss: 0.96929931640625,grad_norm: 0.7964742829569257, iteration: 249432
loss: 1.0299710035324097,grad_norm: 0.9999990426474756, iteration: 249433
loss: 1.0188332796096802,grad_norm: 0.9522473580994257, iteration: 249434
loss: 0.9826555252075195,grad_norm: 0.8415912667003117, iteration: 249435
loss: 0.9821233153343201,grad_norm: 0.9462142364793472, iteration: 249436
loss: 1.0256531238555908,grad_norm: 0.948147782914069, iteration: 249437
loss: 0.9828079342842102,grad_norm: 0.6555901632759636, iteration: 249438
loss: 1.000510811805725,grad_norm: 0.8419701860462429, iteration: 249439
loss: 1.0136728286743164,grad_norm: 0.9999990258292136, iteration: 249440
loss: 1.0306938886642456,grad_norm: 0.809074825283778, iteration: 249441
loss: 1.0188865661621094,grad_norm: 0.689744807153248, iteration: 249442
loss: 0.9896127581596375,grad_norm: 0.9999992569801651, iteration: 249443
loss: 0.959309458732605,grad_norm: 0.7668072005640064, iteration: 249444
loss: 0.9802754521369934,grad_norm: 0.8876710756825787, iteration: 249445
loss: 1.0359787940979004,grad_norm: 0.9999990438700637, iteration: 249446
loss: 1.0410948991775513,grad_norm: 0.9899502440246701, iteration: 249447
loss: 1.0217149257659912,grad_norm: 0.8150886087850674, iteration: 249448
loss: 0.953069806098938,grad_norm: 0.9638438654758378, iteration: 249449
loss: 1.042675256729126,grad_norm: 0.9999993997310078, iteration: 249450
loss: 1.017484426498413,grad_norm: 0.8724233139124649, iteration: 249451
loss: 0.9959548711776733,grad_norm: 0.8799807511884524, iteration: 249452
loss: 1.002792477607727,grad_norm: 0.8992883972046132, iteration: 249453
loss: 0.9966685175895691,grad_norm: 0.739167335576772, iteration: 249454
loss: 0.9938051700592041,grad_norm: 0.9133591747616874, iteration: 249455
loss: 0.9983857274055481,grad_norm: 0.999999150703233, iteration: 249456
loss: 0.9845641851425171,grad_norm: 0.996100245912967, iteration: 249457
loss: 0.9617826342582703,grad_norm: 0.9441123732158408, iteration: 249458
loss: 0.9931535124778748,grad_norm: 0.8697889339501924, iteration: 249459
loss: 0.9580173492431641,grad_norm: 0.9264734279013783, iteration: 249460
loss: 0.9805041551589966,grad_norm: 0.9999992743476867, iteration: 249461
loss: 1.0130972862243652,grad_norm: 0.9004158423953974, iteration: 249462
loss: 1.0462746620178223,grad_norm: 0.9999992538596955, iteration: 249463
loss: 0.9923005700111389,grad_norm: 0.8068229526154982, iteration: 249464
loss: 1.0345792770385742,grad_norm: 0.8814328275865481, iteration: 249465
loss: 0.9543172121047974,grad_norm: 0.9999992215556298, iteration: 249466
loss: 1.0402714014053345,grad_norm: 0.9999992932190523, iteration: 249467
loss: 1.0673187971115112,grad_norm: 0.999999684822962, iteration: 249468
loss: 1.0276100635528564,grad_norm: 0.8891381408702831, iteration: 249469
loss: 1.0261482000350952,grad_norm: 0.8077353807842252, iteration: 249470
loss: 1.0012151002883911,grad_norm: 0.9325759079158323, iteration: 249471
loss: 1.0261306762695312,grad_norm: 0.9999990844159868, iteration: 249472
loss: 0.9271469116210938,grad_norm: 0.7806546261809304, iteration: 249473
loss: 1.0052309036254883,grad_norm: 0.9100291017145996, iteration: 249474
loss: 1.0186965465545654,grad_norm: 0.9999994736844334, iteration: 249475
loss: 1.0190587043762207,grad_norm: 0.8592608631764006, iteration: 249476
loss: 1.011595368385315,grad_norm: 0.9999991420570632, iteration: 249477
loss: 0.99838787317276,grad_norm: 0.9906521387217398, iteration: 249478
loss: 0.9716066122055054,grad_norm: 0.999999079848973, iteration: 249479
loss: 0.9643282890319824,grad_norm: 0.8369655217008533, iteration: 249480
loss: 1.0312665700912476,grad_norm: 0.9284062040052687, iteration: 249481
loss: 0.9858346581459045,grad_norm: 0.8255821664426036, iteration: 249482
loss: 1.0176070928573608,grad_norm: 0.7910382650976419, iteration: 249483
loss: 0.9965847730636597,grad_norm: 0.9999991392622328, iteration: 249484
loss: 1.0361684560775757,grad_norm: 0.9999995379623072, iteration: 249485
loss: 0.9874422550201416,grad_norm: 0.9664931110195092, iteration: 249486
loss: 1.0219917297363281,grad_norm: 0.9504800830068705, iteration: 249487
loss: 1.0255099534988403,grad_norm: 0.9999991256174905, iteration: 249488
loss: 0.9945317506790161,grad_norm: 0.8794261240987035, iteration: 249489
loss: 0.9895122051239014,grad_norm: 0.931977532356066, iteration: 249490
loss: 1.0288726091384888,grad_norm: 0.7817523000713533, iteration: 249491
loss: 1.0319693088531494,grad_norm: 0.9830425223442252, iteration: 249492
loss: 0.9428259134292603,grad_norm: 0.8920386135903088, iteration: 249493
loss: 1.031219482421875,grad_norm: 0.8670740613475232, iteration: 249494
loss: 0.9979103207588196,grad_norm: 0.8965589422148582, iteration: 249495
loss: 1.0102325677871704,grad_norm: 0.9999992101956301, iteration: 249496
loss: 1.0214787721633911,grad_norm: 0.9327298496621148, iteration: 249497
loss: 0.9816234111785889,grad_norm: 0.9499891672679992, iteration: 249498
loss: 0.9903490543365479,grad_norm: 0.8787800098339309, iteration: 249499
loss: 0.965863823890686,grad_norm: 0.8783780276104213, iteration: 249500
loss: 0.9700716733932495,grad_norm: 0.9744058108593291, iteration: 249501
loss: 1.0288437604904175,grad_norm: 0.8172171930841963, iteration: 249502
loss: 0.9829976558685303,grad_norm: 0.8449341044896388, iteration: 249503
loss: 1.0052063465118408,grad_norm: 0.904120516310862, iteration: 249504
loss: 1.0195949077606201,grad_norm: 0.8873525478080762, iteration: 249505
loss: 0.962144672870636,grad_norm: 0.9999990351370118, iteration: 249506
loss: 1.0360172986984253,grad_norm: 0.9878967807227588, iteration: 249507
loss: 0.950214684009552,grad_norm: 0.9209865098326361, iteration: 249508
loss: 1.003524899482727,grad_norm: 0.9065227923635559, iteration: 249509
loss: 0.9734220504760742,grad_norm: 0.8554771778316025, iteration: 249510
loss: 0.9715221524238586,grad_norm: 0.8977786609120989, iteration: 249511
loss: 1.0207916498184204,grad_norm: 0.9164003501219626, iteration: 249512
loss: 0.9810569286346436,grad_norm: 0.8874110504198592, iteration: 249513
loss: 0.9943408370018005,grad_norm: 0.8767421667364647, iteration: 249514
loss: 0.9902544617652893,grad_norm: 0.8691525927330819, iteration: 249515
loss: 1.0409802198410034,grad_norm: 0.9882874168358253, iteration: 249516
loss: 1.0066560506820679,grad_norm: 0.8507113065738477, iteration: 249517
loss: 1.032203197479248,grad_norm: 0.8483698591279668, iteration: 249518
loss: 0.9737898707389832,grad_norm: 0.994832349489776, iteration: 249519
loss: 0.96015864610672,grad_norm: 0.7990212369230235, iteration: 249520
loss: 1.01102876663208,grad_norm: 0.9530008579060121, iteration: 249521
loss: 1.0079196691513062,grad_norm: 0.9999990707539914, iteration: 249522
loss: 1.0205405950546265,grad_norm: 0.7884400599700138, iteration: 249523
loss: 0.9841569662094116,grad_norm: 0.9500222451796877, iteration: 249524
loss: 1.0100516080856323,grad_norm: 0.9129118278042659, iteration: 249525
loss: 1.0521372556686401,grad_norm: 0.9999989485031671, iteration: 249526
loss: 0.9775397181510925,grad_norm: 0.8448107309837914, iteration: 249527
loss: 0.9805009365081787,grad_norm: 0.9937437210748592, iteration: 249528
loss: 1.0270140171051025,grad_norm: 0.9999993317804896, iteration: 249529
loss: 1.0141198635101318,grad_norm: 0.9464975665316455, iteration: 249530
loss: 0.9850397109985352,grad_norm: 0.8582747941888894, iteration: 249531
loss: 1.0033003091812134,grad_norm: 0.8065605229541553, iteration: 249532
loss: 0.9749830365180969,grad_norm: 0.961520156531723, iteration: 249533
loss: 0.9883875250816345,grad_norm: 0.9077139661349956, iteration: 249534
loss: 1.055145502090454,grad_norm: 0.999999049687413, iteration: 249535
loss: 0.9872937798500061,grad_norm: 0.9171647639054922, iteration: 249536
loss: 0.999269425868988,grad_norm: 0.9999999013392947, iteration: 249537
loss: 1.0172948837280273,grad_norm: 0.9999994571328751, iteration: 249538
loss: 0.9849347472190857,grad_norm: 0.8632196819100277, iteration: 249539
loss: 0.9770929217338562,grad_norm: 0.8322343331635974, iteration: 249540
loss: 0.9912782311439514,grad_norm: 0.9156506515780489, iteration: 249541
loss: 1.0035603046417236,grad_norm: 0.8544800886049476, iteration: 249542
loss: 1.0106662511825562,grad_norm: 0.9943572928609963, iteration: 249543
loss: 0.9898034930229187,grad_norm: 0.7679069288192527, iteration: 249544
loss: 1.0148463249206543,grad_norm: 0.9933868627489129, iteration: 249545
loss: 1.0412485599517822,grad_norm: 0.7901279106666295, iteration: 249546
loss: 0.9930287003517151,grad_norm: 0.8578751491949823, iteration: 249547
loss: 1.0212233066558838,grad_norm: 0.8490101392410596, iteration: 249548
loss: 1.0067698955535889,grad_norm: 0.9802559215638259, iteration: 249549
loss: 1.0122069120407104,grad_norm: 0.8724396840670918, iteration: 249550
loss: 1.0016913414001465,grad_norm: 0.863338467826422, iteration: 249551
loss: 0.989118218421936,grad_norm: 0.9968261477851624, iteration: 249552
loss: 0.9724196195602417,grad_norm: 0.9652788182272987, iteration: 249553
loss: 1.0238288640975952,grad_norm: 0.8144450154857085, iteration: 249554
loss: 0.9864546060562134,grad_norm: 0.9718997331658393, iteration: 249555
loss: 1.002265214920044,grad_norm: 0.9516019636339572, iteration: 249556
loss: 0.9428790211677551,grad_norm: 0.7749183079649227, iteration: 249557
loss: 1.0280792713165283,grad_norm: 0.999999170222426, iteration: 249558
loss: 1.0256986618041992,grad_norm: 0.913492311801417, iteration: 249559
loss: 1.0076302289962769,grad_norm: 0.8368189881113309, iteration: 249560
loss: 0.972294270992279,grad_norm: 0.8935620935899926, iteration: 249561
loss: 0.9986241459846497,grad_norm: 0.7998241018062824, iteration: 249562
loss: 0.990675687789917,grad_norm: 0.8694759909396842, iteration: 249563
loss: 1.0183634757995605,grad_norm: 0.8765900724393587, iteration: 249564
loss: 0.9835536479949951,grad_norm: 0.9538388792913244, iteration: 249565
loss: 0.9973288774490356,grad_norm: 0.8773788089983449, iteration: 249566
loss: 1.0518944263458252,grad_norm: 0.9999992146007808, iteration: 249567
loss: 1.0038094520568848,grad_norm: 0.7718559280582835, iteration: 249568
loss: 0.9849810600280762,grad_norm: 0.9735014381803783, iteration: 249569
loss: 1.023160696029663,grad_norm: 0.999999602818173, iteration: 249570
loss: 0.9729712605476379,grad_norm: 0.922304587579943, iteration: 249571
loss: 0.999760091304779,grad_norm: 0.8944102097004781, iteration: 249572
loss: 1.0271531343460083,grad_norm: 0.9730367081627449, iteration: 249573
loss: 0.9983081817626953,grad_norm: 0.8848954148972771, iteration: 249574
loss: 0.9800819158554077,grad_norm: 0.9444723142262358, iteration: 249575
loss: 0.9760233759880066,grad_norm: 0.8551174860437007, iteration: 249576
loss: 1.0051732063293457,grad_norm: 0.9999992721112054, iteration: 249577
loss: 0.9741464853286743,grad_norm: 0.9198543320430748, iteration: 249578
loss: 0.9363946914672852,grad_norm: 0.8843049316308962, iteration: 249579
loss: 1.0307927131652832,grad_norm: 0.9999991714458876, iteration: 249580
loss: 1.005369782447815,grad_norm: 0.8672950894683661, iteration: 249581
loss: 0.9781403541564941,grad_norm: 0.9999991533242155, iteration: 249582
loss: 1.0035408735275269,grad_norm: 0.9418729147387177, iteration: 249583
loss: 1.0071804523468018,grad_norm: 0.9218339140507351, iteration: 249584
loss: 0.9635344743728638,grad_norm: 0.9103896149668667, iteration: 249585
loss: 1.0121474266052246,grad_norm: 0.7838678062468345, iteration: 249586
loss: 0.9728061556816101,grad_norm: 0.8682032767339252, iteration: 249587
loss: 1.000240445137024,grad_norm: 0.6487628200985406, iteration: 249588
loss: 0.9775364995002747,grad_norm: 0.9999989151498285, iteration: 249589
loss: 0.9935777187347412,grad_norm: 0.99999933978614, iteration: 249590
loss: 0.9960347414016724,grad_norm: 0.8397177030935786, iteration: 249591
loss: 1.0019973516464233,grad_norm: 0.999998908884859, iteration: 249592
loss: 1.011737585067749,grad_norm: 0.8183092059112516, iteration: 249593
loss: 0.9975618124008179,grad_norm: 0.999998997420522, iteration: 249594
loss: 1.0212376117706299,grad_norm: 0.7419830815124326, iteration: 249595
loss: 1.002698302268982,grad_norm: 0.999999219023453, iteration: 249596
loss: 0.9907751083374023,grad_norm: 0.9999990654660124, iteration: 249597
loss: 1.0079898834228516,grad_norm: 0.8616027661239354, iteration: 249598
loss: 1.0113496780395508,grad_norm: 0.924176656998661, iteration: 249599
loss: 0.9692827463150024,grad_norm: 0.9470430446704304, iteration: 249600
loss: 1.0038920640945435,grad_norm: 0.9139092899779911, iteration: 249601
loss: 0.9887708425521851,grad_norm: 0.8611683396221618, iteration: 249602
loss: 1.0004074573516846,grad_norm: 0.7782730894162981, iteration: 249603
loss: 1.0197685956954956,grad_norm: 0.9999989954715975, iteration: 249604
loss: 1.0211632251739502,grad_norm: 0.9875116249161143, iteration: 249605
loss: 1.0057690143585205,grad_norm: 0.9845127441446474, iteration: 249606
loss: 1.0252714157104492,grad_norm: 0.963769714384786, iteration: 249607
loss: 0.9823392033576965,grad_norm: 0.9999992219812178, iteration: 249608
loss: 0.9957894086837769,grad_norm: 0.8172674135027195, iteration: 249609
loss: 1.0206128358840942,grad_norm: 0.9503985724250195, iteration: 249610
loss: 0.9893179535865784,grad_norm: 0.8240269170898261, iteration: 249611
loss: 0.9937257170677185,grad_norm: 0.9486186891353952, iteration: 249612
loss: 1.0226786136627197,grad_norm: 0.9006177916942868, iteration: 249613
loss: 0.9973732829093933,grad_norm: 0.9199109281119079, iteration: 249614
loss: 1.006028175354004,grad_norm: 0.8758654184609065, iteration: 249615
loss: 0.975929319858551,grad_norm: 0.823219651069671, iteration: 249616
loss: 1.0478616952896118,grad_norm: 0.9999996172182094, iteration: 249617
loss: 1.0072017908096313,grad_norm: 0.8534909591539125, iteration: 249618
loss: 1.0086443424224854,grad_norm: 0.8984246291673971, iteration: 249619
loss: 0.992662250995636,grad_norm: 0.9999991663995061, iteration: 249620
loss: 1.0041534900665283,grad_norm: 0.8656873965474468, iteration: 249621
loss: 1.0006284713745117,grad_norm: 0.9999992030135548, iteration: 249622
loss: 0.9940160512924194,grad_norm: 0.9999993249855711, iteration: 249623
loss: 0.9869727492332458,grad_norm: 0.8363265706369759, iteration: 249624
loss: 0.9845070242881775,grad_norm: 0.9670121464750986, iteration: 249625
loss: 0.9916266798973083,grad_norm: 0.9999999022326324, iteration: 249626
loss: 0.9933059811592102,grad_norm: 0.8699302593756634, iteration: 249627
loss: 0.9657129645347595,grad_norm: 0.763517429623482, iteration: 249628
loss: 1.009017825126648,grad_norm: 0.8351225046346636, iteration: 249629
loss: 1.001693844795227,grad_norm: 0.7091239588310931, iteration: 249630
loss: 1.006041169166565,grad_norm: 0.883125727779964, iteration: 249631
loss: 0.9820563793182373,grad_norm: 0.8360481228900193, iteration: 249632
loss: 1.032218337059021,grad_norm: 0.9999997008951288, iteration: 249633
loss: 1.0420483350753784,grad_norm: 0.9999991035213824, iteration: 249634
loss: 1.0200421810150146,grad_norm: 0.8055122320967236, iteration: 249635
loss: 0.9612974524497986,grad_norm: 0.916352613110881, iteration: 249636
loss: 0.9882076978683472,grad_norm: 0.936991041633514, iteration: 249637
loss: 0.9929671883583069,grad_norm: 0.8949440862100625, iteration: 249638
loss: 0.9723032712936401,grad_norm: 0.9214329828315198, iteration: 249639
loss: 1.0086536407470703,grad_norm: 0.9999990981000358, iteration: 249640
loss: 1.012081503868103,grad_norm: 0.8990682592089465, iteration: 249641
loss: 1.008730173110962,grad_norm: 0.857085682094355, iteration: 249642
loss: 1.0254100561141968,grad_norm: 0.7550722668204417, iteration: 249643
loss: 0.9840831160545349,grad_norm: 0.9999994165770038, iteration: 249644
loss: 0.9904117584228516,grad_norm: 0.9999991153244271, iteration: 249645
loss: 1.00906503200531,grad_norm: 0.9106251501247019, iteration: 249646
loss: 1.0279507637023926,grad_norm: 0.8487043760690314, iteration: 249647
loss: 1.032246708869934,grad_norm: 0.8120693858459397, iteration: 249648
loss: 1.1210418939590454,grad_norm: 0.9999992492886298, iteration: 249649
loss: 0.9832863211631775,grad_norm: 0.9999992147661296, iteration: 249650
loss: 1.1118333339691162,grad_norm: 0.9999998856616183, iteration: 249651
loss: 1.0717649459838867,grad_norm: 1.0000000212471467, iteration: 249652
loss: 1.0094316005706787,grad_norm: 0.9999991714396168, iteration: 249653
loss: 0.9802009463310242,grad_norm: 0.9199193552356673, iteration: 249654
loss: 1.0236097574234009,grad_norm: 0.9943181633825486, iteration: 249655
loss: 1.0117636919021606,grad_norm: 0.9999991334111089, iteration: 249656
loss: 0.9916554689407349,grad_norm: 0.9999990400436688, iteration: 249657
loss: 1.0060133934020996,grad_norm: 0.8958631638756013, iteration: 249658
loss: 0.9775980710983276,grad_norm: 0.981954265676556, iteration: 249659
loss: 1.0153007507324219,grad_norm: 0.8498552796513619, iteration: 249660
loss: 1.0157445669174194,grad_norm: 0.8620988743946202, iteration: 249661
loss: 0.9940052628517151,grad_norm: 0.8933515025658486, iteration: 249662
loss: 0.9933927655220032,grad_norm: 0.7432393026575176, iteration: 249663
loss: 0.9734575748443604,grad_norm: 0.7667863032342317, iteration: 249664
loss: 1.00544011592865,grad_norm: 0.8164990418985826, iteration: 249665
loss: 1.011193871498108,grad_norm: 0.9999991807110307, iteration: 249666
loss: 0.9826269745826721,grad_norm: 0.952762704012184, iteration: 249667
loss: 1.0315152406692505,grad_norm: 0.9682449492021525, iteration: 249668
loss: 1.0448263883590698,grad_norm: 0.9615366024709331, iteration: 249669
loss: 0.975090742111206,grad_norm: 0.8823899742367316, iteration: 249670
loss: 1.0483641624450684,grad_norm: 0.9999999920570247, iteration: 249671
loss: 1.0249111652374268,grad_norm: 0.9999992108554934, iteration: 249672
loss: 1.0399185419082642,grad_norm: 0.9999990107234211, iteration: 249673
loss: 0.9636747241020203,grad_norm: 0.8111814804033126, iteration: 249674
loss: 1.0433274507522583,grad_norm: 0.999999452984496, iteration: 249675
loss: 1.0460503101348877,grad_norm: 0.7208902952085433, iteration: 249676
loss: 0.9599238038063049,grad_norm: 0.9999990078063702, iteration: 249677
loss: 0.993916392326355,grad_norm: 0.7964610827020108, iteration: 249678
loss: 1.0280793905258179,grad_norm: 0.7783902550458366, iteration: 249679
loss: 1.0306507349014282,grad_norm: 0.9999990867297507, iteration: 249680
loss: 0.9593833088874817,grad_norm: 0.7333269292361623, iteration: 249681
loss: 0.985486626625061,grad_norm: 0.9197120525316365, iteration: 249682
loss: 1.0073586702346802,grad_norm: 0.9999990066967286, iteration: 249683
loss: 0.9936601519584656,grad_norm: 0.9412671032490109, iteration: 249684
loss: 0.983492910861969,grad_norm: 0.9999991027523552, iteration: 249685
loss: 1.004249095916748,grad_norm: 0.9653740435929837, iteration: 249686
loss: 0.9896848797798157,grad_norm: 0.9882021351572089, iteration: 249687
loss: 0.996213436126709,grad_norm: 0.8572314466649053, iteration: 249688
loss: 1.015337347984314,grad_norm: 0.9999991462656296, iteration: 249689
loss: 0.950526237487793,grad_norm: 0.9999992490297269, iteration: 249690
loss: 0.9918946623802185,grad_norm: 0.9215676360831186, iteration: 249691
loss: 0.9662929177284241,grad_norm: 0.7626552363372879, iteration: 249692
loss: 0.9770541191101074,grad_norm: 0.9631110403228306, iteration: 249693
loss: 1.0145915746688843,grad_norm: 0.9999991948577733, iteration: 249694
loss: 1.013933777809143,grad_norm: 0.9212072731007106, iteration: 249695
loss: 0.9949344396591187,grad_norm: 0.9865922219345836, iteration: 249696
loss: 0.9726718664169312,grad_norm: 0.8924004149503227, iteration: 249697
loss: 1.0184217691421509,grad_norm: 0.9591881142497694, iteration: 249698
loss: 1.0638848543167114,grad_norm: 0.9999998422245412, iteration: 249699
loss: 0.9923813343048096,grad_norm: 0.9718994869872073, iteration: 249700
loss: 1.0575897693634033,grad_norm: 0.8160323064146074, iteration: 249701
loss: 0.9808357954025269,grad_norm: 0.9105533653921714, iteration: 249702
loss: 0.9670758843421936,grad_norm: 0.7966127638224144, iteration: 249703
loss: 1.0378005504608154,grad_norm: 0.9793620732193968, iteration: 249704
loss: 1.0139209032058716,grad_norm: 0.9786565075890667, iteration: 249705
loss: 0.9660872220993042,grad_norm: 0.9108677091696614, iteration: 249706
loss: 1.0411090850830078,grad_norm: 0.9999993965731271, iteration: 249707
loss: 1.0000022649765015,grad_norm: 0.9345364042873237, iteration: 249708
loss: 1.0025019645690918,grad_norm: 0.8552401957144307, iteration: 249709
loss: 1.035375714302063,grad_norm: 0.8103821225951973, iteration: 249710
loss: 1.0079268217086792,grad_norm: 0.9543004648608564, iteration: 249711
loss: 0.9820306301116943,grad_norm: 0.8707002879259805, iteration: 249712
loss: 0.9877178072929382,grad_norm: 0.8866725563067698, iteration: 249713
loss: 1.1312980651855469,grad_norm: 0.999999772315616, iteration: 249714
loss: 0.9841400384902954,grad_norm: 0.9401504747962985, iteration: 249715
loss: 1.0040969848632812,grad_norm: 0.9999990768019086, iteration: 249716
loss: 1.0098646879196167,grad_norm: 0.9999992385123464, iteration: 249717
loss: 0.9712382555007935,grad_norm: 0.9073317949393224, iteration: 249718
loss: 1.0090725421905518,grad_norm: 0.9675375625529522, iteration: 249719
loss: 0.9994745850563049,grad_norm: 0.8089266494968697, iteration: 249720
loss: 0.9961255192756653,grad_norm: 0.9227562256663464, iteration: 249721
loss: 1.0028611421585083,grad_norm: 0.9098977532245335, iteration: 249722
loss: 0.9558561444282532,grad_norm: 0.8847920674320604, iteration: 249723
loss: 0.9915516972541809,grad_norm: 0.8110778166890706, iteration: 249724
loss: 0.9766285419464111,grad_norm: 0.7783916641434979, iteration: 249725
loss: 0.9822768568992615,grad_norm: 0.9999991788033122, iteration: 249726
loss: 1.0159300565719604,grad_norm: 0.8023602689261987, iteration: 249727
loss: 0.9981985092163086,grad_norm: 0.8941496525034509, iteration: 249728
loss: 0.9952215552330017,grad_norm: 0.8536490224290999, iteration: 249729
loss: 0.9992890954017639,grad_norm: 0.9999990357612201, iteration: 249730
loss: 1.0156291723251343,grad_norm: 0.8262110948325914, iteration: 249731
loss: 0.9414727687835693,grad_norm: 0.8469847267469323, iteration: 249732
loss: 0.9990415573120117,grad_norm: 0.832285132678747, iteration: 249733
loss: 1.0099875926971436,grad_norm: 0.9999993698919705, iteration: 249734
loss: 1.0401724576950073,grad_norm: 0.9999992867672961, iteration: 249735
loss: 1.0002678632736206,grad_norm: 0.9531055161673637, iteration: 249736
loss: 1.0079100131988525,grad_norm: 0.9999991877932549, iteration: 249737
loss: 1.0279146432876587,grad_norm: 0.8425997398277539, iteration: 249738
loss: 1.0076091289520264,grad_norm: 0.999999096876617, iteration: 249739
loss: 0.9632684588432312,grad_norm: 0.952958936805153, iteration: 249740
loss: 1.0170944929122925,grad_norm: 0.7816488361328107, iteration: 249741
loss: 0.9826285243034363,grad_norm: 0.8612708431981952, iteration: 249742
loss: 1.0121811628341675,grad_norm: 0.9027705361076857, iteration: 249743
loss: 1.1233965158462524,grad_norm: 0.9999998627228318, iteration: 249744
loss: 1.0184152126312256,grad_norm: 0.9047526783033052, iteration: 249745
loss: 0.9895268678665161,grad_norm: 0.9724047812556671, iteration: 249746
loss: 1.0373835563659668,grad_norm: 0.9999992726305982, iteration: 249747
loss: 0.983715295791626,grad_norm: 0.9999992702143058, iteration: 249748
loss: 0.9872545599937439,grad_norm: 0.9999992530821121, iteration: 249749
loss: 1.0002670288085938,grad_norm: 0.8824262861601503, iteration: 249750
loss: 1.2376166582107544,grad_norm: 0.9999992878308468, iteration: 249751
loss: 0.9779511094093323,grad_norm: 0.8206216640687627, iteration: 249752
loss: 0.9735566973686218,grad_norm: 0.9287169195832888, iteration: 249753
loss: 1.0104368925094604,grad_norm: 0.9113293162167753, iteration: 249754
loss: 0.9864482879638672,grad_norm: 0.9999990109189009, iteration: 249755
loss: 1.0012422800064087,grad_norm: 0.9999992464895874, iteration: 249756
loss: 0.9659203886985779,grad_norm: 0.9721121470826967, iteration: 249757
loss: 1.0174556970596313,grad_norm: 0.7892660770296935, iteration: 249758
loss: 1.0437525510787964,grad_norm: 0.9757372478337628, iteration: 249759
loss: 1.0220494270324707,grad_norm: 0.9999991285453875, iteration: 249760
loss: 1.0630069971084595,grad_norm: 0.999998978311238, iteration: 249761
loss: 1.0086334943771362,grad_norm: 0.999999141169282, iteration: 249762
loss: 0.9812222719192505,grad_norm: 0.9543188687055952, iteration: 249763
loss: 1.0798403024673462,grad_norm: 0.999999098330651, iteration: 249764
loss: 0.9866409301757812,grad_norm: 0.9854749160126332, iteration: 249765
loss: 1.001309871673584,grad_norm: 0.9999994914367815, iteration: 249766
loss: 1.0649020671844482,grad_norm: 0.9999999051480146, iteration: 249767
loss: 1.0680420398712158,grad_norm: 0.9478168743839833, iteration: 249768
loss: 0.9844478964805603,grad_norm: 0.887869324793739, iteration: 249769
loss: 1.3491036891937256,grad_norm: 0.9999996508908978, iteration: 249770
loss: 0.9822442531585693,grad_norm: 0.9237102109731671, iteration: 249771
loss: 1.0065873861312866,grad_norm: 0.8705345501400921, iteration: 249772
loss: 0.9919501543045044,grad_norm: 0.9999990136322954, iteration: 249773
loss: 1.070540428161621,grad_norm: 0.8811624866609308, iteration: 249774
loss: 1.01047682762146,grad_norm: 0.8384597644368021, iteration: 249775
loss: 1.034756064414978,grad_norm: 0.8881304569618218, iteration: 249776
loss: 1.1586617231369019,grad_norm: 0.9999996552048169, iteration: 249777
loss: 1.0387682914733887,grad_norm: 0.9999993284319221, iteration: 249778
loss: 1.1992084980010986,grad_norm: 0.9999999491794805, iteration: 249779
loss: 0.9920952916145325,grad_norm: 0.8011262292011738, iteration: 249780
loss: 1.0621289014816284,grad_norm: 0.8256850508220306, iteration: 249781
loss: 1.0163859128952026,grad_norm: 0.9999990833481289, iteration: 249782
loss: 0.9966840744018555,grad_norm: 0.9140932267179133, iteration: 249783
loss: 1.1301121711730957,grad_norm: 0.999999764575114, iteration: 249784
loss: 1.0800906419754028,grad_norm: 0.9721102199499974, iteration: 249785
loss: 1.1416372060775757,grad_norm: 0.9670402464497518, iteration: 249786
loss: 0.9853920340538025,grad_norm: 0.8041431890518943, iteration: 249787
loss: 1.056287407875061,grad_norm: 0.9999993667662253, iteration: 249788
loss: 1.0590336322784424,grad_norm: 0.9999992622125459, iteration: 249789
loss: 1.0997121334075928,grad_norm: 0.999999435978588, iteration: 249790
loss: 1.0267893075942993,grad_norm: 0.9554541614056389, iteration: 249791
loss: 1.083146333694458,grad_norm: 0.8675434402748429, iteration: 249792
loss: 1.238629698753357,grad_norm: 0.9999999681123927, iteration: 249793
loss: 1.3397189378738403,grad_norm: 0.9999997465576544, iteration: 249794
loss: 1.0836931467056274,grad_norm: 0.9999990688232587, iteration: 249795
loss: 1.0871009826660156,grad_norm: 0.9999992442165474, iteration: 249796
loss: 1.0580757856369019,grad_norm: 0.999999097907715, iteration: 249797
loss: 1.1027839183807373,grad_norm: 0.9999999016705962, iteration: 249798
loss: 0.9931832551956177,grad_norm: 0.8557346935076803, iteration: 249799
loss: 1.101912498474121,grad_norm: 0.9999997412730719, iteration: 249800
loss: 0.9863516688346863,grad_norm: 0.9369664471725407, iteration: 249801
loss: 0.9884427189826965,grad_norm: 0.9252022863680217, iteration: 249802
loss: 1.1725181341171265,grad_norm: 0.9999991825468943, iteration: 249803
loss: 1.036080002784729,grad_norm: 0.9999994536540715, iteration: 249804
loss: 0.9925848841667175,grad_norm: 0.9728661947872242, iteration: 249805
loss: 1.0068659782409668,grad_norm: 0.979479931121664, iteration: 249806
loss: 0.9941538572311401,grad_norm: 0.9488035416953764, iteration: 249807
loss: 0.9702155590057373,grad_norm: 0.8537073060783362, iteration: 249808
loss: 1.0062161684036255,grad_norm: 0.9999990619005925, iteration: 249809
loss: 1.0029315948486328,grad_norm: 0.898501433646734, iteration: 249810
loss: 0.9671524167060852,grad_norm: 0.9999991034979968, iteration: 249811
loss: 0.9733376502990723,grad_norm: 0.7699584103232423, iteration: 249812
loss: 0.9621031284332275,grad_norm: 0.9999992123891217, iteration: 249813
loss: 0.9876990914344788,grad_norm: 0.9999997846294838, iteration: 249814
loss: 1.01389741897583,grad_norm: 0.999999506184174, iteration: 249815
loss: 0.9802910685539246,grad_norm: 0.9999994710605202, iteration: 249816
loss: 1.0299980640411377,grad_norm: 0.9999994094626324, iteration: 249817
loss: 1.0082809925079346,grad_norm: 0.7578780374862787, iteration: 249818
loss: 1.0156188011169434,grad_norm: 0.8224504893280953, iteration: 249819
loss: 0.9840851426124573,grad_norm: 0.9134059413399113, iteration: 249820
loss: 1.0109418630599976,grad_norm: 0.913441095170979, iteration: 249821
loss: 1.0394420623779297,grad_norm: 0.9999991707237331, iteration: 249822
loss: 1.0525233745574951,grad_norm: 0.9999997303848066, iteration: 249823
loss: 0.9723507165908813,grad_norm: 0.9999991646609708, iteration: 249824
loss: 0.9890786409378052,grad_norm: 0.9952148099195517, iteration: 249825
loss: 1.0043773651123047,grad_norm: 0.9999996770500156, iteration: 249826
loss: 1.004879117012024,grad_norm: 0.9605069319833717, iteration: 249827
loss: 1.0103131532669067,grad_norm: 0.8620718724471002, iteration: 249828
loss: 1.0179580450057983,grad_norm: 0.9769543534902939, iteration: 249829
loss: 0.9980436563491821,grad_norm: 0.9302361537175284, iteration: 249830
loss: 0.9617822170257568,grad_norm: 0.8763028800203136, iteration: 249831
loss: 1.0574792623519897,grad_norm: 0.9999989046686101, iteration: 249832
loss: 1.0194252729415894,grad_norm: 0.971897645403681, iteration: 249833
loss: 0.9733863472938538,grad_norm: 0.9999991289546232, iteration: 249834
loss: 1.0138415098190308,grad_norm: 0.8970189816253111, iteration: 249835
loss: 0.992636501789093,grad_norm: 0.9999991361542176, iteration: 249836
loss: 1.0046323537826538,grad_norm: 0.8491962059055813, iteration: 249837
loss: 1.0357481241226196,grad_norm: 0.9999993363151457, iteration: 249838
loss: 0.9923657774925232,grad_norm: 0.9999990140674003, iteration: 249839
loss: 0.9801663160324097,grad_norm: 0.8036779490924056, iteration: 249840
loss: 1.0069819688796997,grad_norm: 0.9484422119886914, iteration: 249841
loss: 1.0381591320037842,grad_norm: 0.9999990832503605, iteration: 249842
loss: 1.1555655002593994,grad_norm: 0.9999996320824966, iteration: 249843
loss: 0.9712266325950623,grad_norm: 0.9217942071246005, iteration: 249844
loss: 0.9917736649513245,grad_norm: 0.9999990428012053, iteration: 249845
loss: 1.0089563131332397,grad_norm: 0.8978171835767683, iteration: 249846
loss: 1.150844931602478,grad_norm: 0.9368323416731623, iteration: 249847
loss: 1.0183042287826538,grad_norm: 0.8828783920010838, iteration: 249848
loss: 0.9904592037200928,grad_norm: 0.7855848424366793, iteration: 249849
loss: 1.0234401226043701,grad_norm: 0.8151415494710919, iteration: 249850
loss: 1.0121933221817017,grad_norm: 0.970282802321523, iteration: 249851
loss: 0.9986245036125183,grad_norm: 0.9049120337550877, iteration: 249852
loss: 1.0522432327270508,grad_norm: 0.9809568078191259, iteration: 249853
loss: 1.0212335586547852,grad_norm: 0.913028267045942, iteration: 249854
loss: 0.9869645833969116,grad_norm: 0.8616009744827337, iteration: 249855
loss: 1.003627896308899,grad_norm: 0.9999992039242097, iteration: 249856
loss: 1.0190070867538452,grad_norm: 0.9999996679431545, iteration: 249857
loss: 0.9703738689422607,grad_norm: 0.8695026550083925, iteration: 249858
loss: 0.9712802767753601,grad_norm: 0.8920262577233768, iteration: 249859
loss: 1.0169826745986938,grad_norm: 0.8053849388326522, iteration: 249860
loss: 1.0065122842788696,grad_norm: 0.8507616105601167, iteration: 249861
loss: 0.9945258498191833,grad_norm: 0.8793506610274294, iteration: 249862
loss: 1.035682201385498,grad_norm: 0.816137604604251, iteration: 249863
loss: 1.0042871236801147,grad_norm: 0.9999989534330451, iteration: 249864
loss: 0.984758734703064,grad_norm: 0.8961981014778073, iteration: 249865
loss: 0.9940115213394165,grad_norm: 0.8674462466571167, iteration: 249866
loss: 1.0043693780899048,grad_norm: 0.9999991320493662, iteration: 249867
loss: 0.995674192905426,grad_norm: 0.9006489415441367, iteration: 249868
loss: 0.9543753862380981,grad_norm: 0.7677095722262233, iteration: 249869
loss: 0.9904664158821106,grad_norm: 0.8045494210536804, iteration: 249870
loss: 1.0031096935272217,grad_norm: 0.93483426903666, iteration: 249871
loss: 1.0034633874893188,grad_norm: 0.9158142950897786, iteration: 249872
loss: 1.0470085144042969,grad_norm: 0.9409898073804666, iteration: 249873
loss: 1.0247149467468262,grad_norm: 0.8570540196458443, iteration: 249874
loss: 0.9954370856285095,grad_norm: 0.8853827696317585, iteration: 249875
loss: 1.0372463464736938,grad_norm: 0.9999992252094182, iteration: 249876
loss: 1.0019582509994507,grad_norm: 0.8914358104382573, iteration: 249877
loss: 1.0403457880020142,grad_norm: 0.9999994861185073, iteration: 249878
loss: 1.0259090662002563,grad_norm: 0.8375280023401168, iteration: 249879
loss: 1.0142405033111572,grad_norm: 0.9954074153819148, iteration: 249880
loss: 0.9805371761322021,grad_norm: 0.8321491575596591, iteration: 249881
loss: 1.0689083337783813,grad_norm: 0.8617742462651921, iteration: 249882
loss: 1.0182150602340698,grad_norm: 0.9999991661948476, iteration: 249883
loss: 1.0172855854034424,grad_norm: 0.9769809498908758, iteration: 249884
loss: 1.0047937631607056,grad_norm: 0.9563267657323017, iteration: 249885
loss: 0.9984390735626221,grad_norm: 0.8064013881568974, iteration: 249886
loss: 1.0150829553604126,grad_norm: 0.8889977470568649, iteration: 249887
loss: 0.9791901111602783,grad_norm: 0.8982777936400089, iteration: 249888
loss: 1.0279420614242554,grad_norm: 0.8351734983585234, iteration: 249889
loss: 1.0370078086853027,grad_norm: 0.9582280516948987, iteration: 249890
loss: 0.9794268608093262,grad_norm: 0.9536448616882045, iteration: 249891
loss: 0.985593318939209,grad_norm: 0.9861272723891773, iteration: 249892
loss: 0.970496416091919,grad_norm: 0.8929229784882261, iteration: 249893
loss: 0.9920471906661987,grad_norm: 0.8434395911320514, iteration: 249894
loss: 1.0515140295028687,grad_norm: 0.9999998703209089, iteration: 249895
loss: 0.9991980791091919,grad_norm: 0.8211254694048622, iteration: 249896
loss: 1.0208293199539185,grad_norm: 0.9999990844167047, iteration: 249897
loss: 1.1019693613052368,grad_norm: 0.9999998458509182, iteration: 249898
loss: 0.981512725353241,grad_norm: 0.8888859411093883, iteration: 249899
loss: 1.0045244693756104,grad_norm: 0.8662700090650264, iteration: 249900
loss: 1.0241522789001465,grad_norm: 0.9999994733072076, iteration: 249901
loss: 0.9904502630233765,grad_norm: 0.8268671542216254, iteration: 249902
loss: 0.9661838412284851,grad_norm: 0.93913015215893, iteration: 249903
loss: 1.0109854936599731,grad_norm: 0.9778134401239236, iteration: 249904
loss: 1.0510567426681519,grad_norm: 1.0000000171416492, iteration: 249905
loss: 1.0094114542007446,grad_norm: 0.8496335401048014, iteration: 249906
loss: 0.9997648596763611,grad_norm: 0.9592184830100009, iteration: 249907
loss: 0.9501389265060425,grad_norm: 0.9891838497627893, iteration: 249908
loss: 0.9853132963180542,grad_norm: 0.9999991328201273, iteration: 249909
loss: 0.9867701530456543,grad_norm: 0.9188777888809098, iteration: 249910
loss: 1.0360987186431885,grad_norm: 0.8325274692171447, iteration: 249911
loss: 1.1625512838363647,grad_norm: 0.9999997216618257, iteration: 249912
loss: 0.9908502101898193,grad_norm: 0.8860720946745394, iteration: 249913
loss: 1.0204267501831055,grad_norm: 0.8869715013631156, iteration: 249914
loss: 1.0004796981811523,grad_norm: 0.9673841326871953, iteration: 249915
loss: 1.0530229806900024,grad_norm: 0.9999997154441341, iteration: 249916
loss: 0.9967882633209229,grad_norm: 0.8727197643462341, iteration: 249917
loss: 1.0112993717193604,grad_norm: 0.8674719362286295, iteration: 249918
loss: 1.0178112983703613,grad_norm: 0.8534812759512012, iteration: 249919
loss: 1.0032262802124023,grad_norm: 0.8260743629817069, iteration: 249920
loss: 1.0213782787322998,grad_norm: 0.9455372897892161, iteration: 249921
loss: 1.007712960243225,grad_norm: 0.862026962224096, iteration: 249922
loss: 1.0097835063934326,grad_norm: 0.7884958638797606, iteration: 249923
loss: 1.015554428100586,grad_norm: 0.9999992046111338, iteration: 249924
loss: 0.984290361404419,grad_norm: 0.8280017840970081, iteration: 249925
loss: 1.0020467042922974,grad_norm: 1.0000000471857302, iteration: 249926
loss: 0.9947271943092346,grad_norm: 0.863824268275134, iteration: 249927
loss: 1.040817379951477,grad_norm: 0.9999991446715915, iteration: 249928
loss: 1.0271539688110352,grad_norm: 0.998251193508215, iteration: 249929
loss: 0.9815981388092041,grad_norm: 0.802827621166311, iteration: 249930
loss: 1.0004276037216187,grad_norm: 0.7964968053746024, iteration: 249931
loss: 0.9814277291297913,grad_norm: 0.8651732185983787, iteration: 249932
loss: 1.0128388404846191,grad_norm: 0.8555169844865538, iteration: 249933
loss: 1.0554172992706299,grad_norm: 0.999999544058451, iteration: 249934
loss: 0.9846516847610474,grad_norm: 0.9999991870709702, iteration: 249935
loss: 0.9725629091262817,grad_norm: 0.9435148485670566, iteration: 249936
loss: 1.0027064085006714,grad_norm: 0.9999991339541049, iteration: 249937
loss: 1.0032784938812256,grad_norm: 0.9999994137398934, iteration: 249938
loss: 1.008723258972168,grad_norm: 0.864116218265957, iteration: 249939
loss: 1.0086321830749512,grad_norm: 0.9999990097337094, iteration: 249940
loss: 0.9871804118156433,grad_norm: 0.931719342786519, iteration: 249941
loss: 0.9851100444793701,grad_norm: 0.9999991573533148, iteration: 249942
loss: 1.0284628868103027,grad_norm: 0.8579524215286748, iteration: 249943
loss: 0.995043933391571,grad_norm: 0.8613176157046236, iteration: 249944
loss: 0.9824912548065186,grad_norm: 0.7607169718918733, iteration: 249945
loss: 1.05083167552948,grad_norm: 0.9999997648776376, iteration: 249946
loss: 0.9829452633857727,grad_norm: 0.999999165136065, iteration: 249947
loss: 1.0732173919677734,grad_norm: 0.9999999194670878, iteration: 249948
loss: 0.9934017658233643,grad_norm: 0.7986310544107479, iteration: 249949
loss: 1.035701036453247,grad_norm: 0.9999989435168036, iteration: 249950
loss: 1.004178762435913,grad_norm: 0.9597601482932719, iteration: 249951
loss: 0.987794816493988,grad_norm: 0.8857764216669017, iteration: 249952
loss: 0.9838507175445557,grad_norm: 0.9073379769617352, iteration: 249953
loss: 1.0151170492172241,grad_norm: 0.9999991444152871, iteration: 249954
loss: 1.0074220895767212,grad_norm: 0.9019171235952417, iteration: 249955
loss: 1.0300159454345703,grad_norm: 0.9099077978200324, iteration: 249956
loss: 0.9848718047142029,grad_norm: 0.7957982179313114, iteration: 249957
loss: 1.0059624910354614,grad_norm: 0.788785861343058, iteration: 249958
loss: 1.0416431427001953,grad_norm: 0.8340388849137585, iteration: 249959
loss: 1.0321567058563232,grad_norm: 0.9999991323694413, iteration: 249960
loss: 1.0063555240631104,grad_norm: 0.9707387611364828, iteration: 249961
loss: 1.025033712387085,grad_norm: 0.9999991851297831, iteration: 249962
loss: 1.0187207460403442,grad_norm: 0.9330227157601845, iteration: 249963
loss: 0.9963604807853699,grad_norm: 0.8712749762185491, iteration: 249964
loss: 0.97102290391922,grad_norm: 0.9737660600133505, iteration: 249965
loss: 0.9712872505187988,grad_norm: 0.8972712787675423, iteration: 249966
loss: 0.9992191195487976,grad_norm: 0.925568426589622, iteration: 249967
loss: 1.0154179334640503,grad_norm: 0.9999990847843467, iteration: 249968
loss: 1.003503680229187,grad_norm: 0.9999990562150811, iteration: 249969
loss: 0.9944314360618591,grad_norm: 0.999999030900295, iteration: 249970
loss: 0.9934511184692383,grad_norm: 0.8750395802854076, iteration: 249971
loss: 1.012815237045288,grad_norm: 0.9967525793869788, iteration: 249972
loss: 0.9901683926582336,grad_norm: 0.9999991658618421, iteration: 249973
loss: 1.0154554843902588,grad_norm: 0.9390576146668436, iteration: 249974
loss: 1.0247759819030762,grad_norm: 0.8816322652971406, iteration: 249975
loss: 0.9996203184127808,grad_norm: 0.9999990347149352, iteration: 249976
loss: 0.9855878353118896,grad_norm: 0.9892791305684835, iteration: 249977
loss: 1.0080277919769287,grad_norm: 0.7741103862450135, iteration: 249978
loss: 0.982319176197052,grad_norm: 0.9999992418017063, iteration: 249979
loss: 0.9988555312156677,grad_norm: 0.929300256815003, iteration: 249980
loss: 0.9959754943847656,grad_norm: 0.9999989722773076, iteration: 249981
loss: 0.9916266798973083,grad_norm: 0.9625717207587854, iteration: 249982
loss: 1.0082828998565674,grad_norm: 0.9123437705336073, iteration: 249983
loss: 1.066670536994934,grad_norm: 0.9999990969693047, iteration: 249984
loss: 1.0914607048034668,grad_norm: 0.9999993233215391, iteration: 249985
loss: 1.0355298519134521,grad_norm: 0.9999991119966367, iteration: 249986
loss: 1.0071310997009277,grad_norm: 0.9512978495235774, iteration: 249987
loss: 0.9927489161491394,grad_norm: 0.9999991708596143, iteration: 249988
loss: 0.9816102981567383,grad_norm: 0.9099812032410418, iteration: 249989
loss: 1.1725516319274902,grad_norm: 0.9999995908044664, iteration: 249990
loss: 1.022390365600586,grad_norm: 0.9999993184375995, iteration: 249991
loss: 1.0081721544265747,grad_norm: 0.9999989345767096, iteration: 249992
loss: 1.0045723915100098,grad_norm: 0.9016235922008475, iteration: 249993
loss: 0.9757621884346008,grad_norm: 0.9105550385533344, iteration: 249994
loss: 0.9716169238090515,grad_norm: 0.8495737816325375, iteration: 249995
loss: 0.9891812801361084,grad_norm: 0.9999990982081849, iteration: 249996
loss: 1.0143492221832275,grad_norm: 0.8886599438820995, iteration: 249997
loss: 0.9389330744743347,grad_norm: 0.9999991073737652, iteration: 249998
loss: 1.0630141496658325,grad_norm: 0.9999995376523304, iteration: 249999
loss: 1.039579153060913,grad_norm: 0.7728668275880872, iteration: 250000
Evaluating at step 250000
{'val': 1.0007062908262014, 'test': 2.1622698265142715}
loss: 0.9823727011680603,grad_norm: 0.9999994799150224, iteration: 250001
loss: 0.9570387005805969,grad_norm: 0.9319708999062329, iteration: 250002
loss: 1.0383949279785156,grad_norm: 0.999998950517507, iteration: 250003
loss: 1.2761869430541992,grad_norm: 0.9999998273523828, iteration: 250004
loss: 0.9798243045806885,grad_norm: 0.9478596839799877, iteration: 250005
loss: 1.0007658004760742,grad_norm: 0.9999990377467403, iteration: 250006
loss: 1.0704528093338013,grad_norm: 0.9999991562112988, iteration: 250007
loss: 0.9989970326423645,grad_norm: 0.9999990458915365, iteration: 250008
loss: 0.9780803322792053,grad_norm: 0.8367965175982128, iteration: 250009
loss: 1.031086802482605,grad_norm: 0.9999991373108078, iteration: 250010
loss: 1.0304186344146729,grad_norm: 0.9999989313896288, iteration: 250011
loss: 1.0316119194030762,grad_norm: 0.9999991226298518, iteration: 250012
loss: 1.033705472946167,grad_norm: 0.8946238281746646, iteration: 250013
loss: 0.9969032406806946,grad_norm: 0.8034015199896253, iteration: 250014
loss: 0.9708929061889648,grad_norm: 0.9476190077587793, iteration: 250015
loss: 1.046231985092163,grad_norm: 1.0000000781986471, iteration: 250016
loss: 0.9917178750038147,grad_norm: 0.8665875903240279, iteration: 250017
loss: 0.9985644817352295,grad_norm: 0.9945777803976203, iteration: 250018
loss: 0.9936603903770447,grad_norm: 0.8122359688301012, iteration: 250019
loss: 0.9615625739097595,grad_norm: 0.9999993108611852, iteration: 250020
loss: 0.9632965922355652,grad_norm: 0.9999991146967457, iteration: 250021
loss: 1.0156114101409912,grad_norm: 0.8769420629570148, iteration: 250022
loss: 1.0685393810272217,grad_norm: 0.9999991493604975, iteration: 250023
loss: 1.1267942190170288,grad_norm: 0.9999991277111185, iteration: 250024
loss: 1.0043632984161377,grad_norm: 0.8192569723421953, iteration: 250025
loss: 1.0022944211959839,grad_norm: 0.9148496106773231, iteration: 250026
loss: 0.9868059158325195,grad_norm: 0.9999994313439234, iteration: 250027
loss: 0.9994078278541565,grad_norm: 0.9999993843978632, iteration: 250028
loss: 1.0148744583129883,grad_norm: 0.9999991001429442, iteration: 250029
loss: 0.9826486706733704,grad_norm: 0.8011082299152763, iteration: 250030
loss: 1.011681079864502,grad_norm: 0.8793352031834137, iteration: 250031
loss: 1.1345957517623901,grad_norm: 0.999999628467403, iteration: 250032
loss: 1.008969783782959,grad_norm: 0.9243018263556013, iteration: 250033
loss: 1.0127699375152588,grad_norm: 0.9342885000103723, iteration: 250034
loss: 0.987677812576294,grad_norm: 0.8540127831105587, iteration: 250035
loss: 1.0091038942337036,grad_norm: 0.8719136180573426, iteration: 250036
loss: 1.0830484628677368,grad_norm: 0.9999998717838684, iteration: 250037
loss: 1.1457675695419312,grad_norm: 0.9999995842288215, iteration: 250038
loss: 1.2092736959457397,grad_norm: 0.9999997701880657, iteration: 250039
loss: 1.035677194595337,grad_norm: 0.9999999132914981, iteration: 250040
loss: 1.3785228729248047,grad_norm: 0.9999997420997604, iteration: 250041
loss: 0.9488874673843384,grad_norm: 0.8889005183247688, iteration: 250042
loss: 0.9995755553245544,grad_norm: 0.8185601966812588, iteration: 250043
loss: 1.0077710151672363,grad_norm: 0.9862005971887732, iteration: 250044
loss: 1.0089495182037354,grad_norm: 0.9999990804321139, iteration: 250045
loss: 0.9903166890144348,grad_norm: 0.8376461607316339, iteration: 250046
loss: 0.9786317944526672,grad_norm: 0.9999992665350932, iteration: 250047
loss: 1.0439417362213135,grad_norm: 0.9999995568648107, iteration: 250048
loss: 1.0001977682113647,grad_norm: 0.9999993250428799, iteration: 250049
loss: 1.0079559087753296,grad_norm: 0.82633073111581, iteration: 250050
loss: 1.0321892499923706,grad_norm: 0.9999993809300698, iteration: 250051
loss: 1.0467305183410645,grad_norm: 0.93242246776759, iteration: 250052
loss: 1.0521678924560547,grad_norm: 0.9999991804448821, iteration: 250053
loss: 1.017052173614502,grad_norm: 0.9999996517384411, iteration: 250054
loss: 1.0250110626220703,grad_norm: 0.9938341419373996, iteration: 250055
loss: 0.9969032406806946,grad_norm: 0.999999100603333, iteration: 250056
loss: 1.0044816732406616,grad_norm: 0.9748538479588749, iteration: 250057
loss: 0.9612292051315308,grad_norm: 0.9999990713797373, iteration: 250058
loss: 1.1602030992507935,grad_norm: 0.9999996253711706, iteration: 250059
loss: 1.0273324251174927,grad_norm: 0.9139827820875494, iteration: 250060
loss: 0.997231125831604,grad_norm: 0.8445916567737749, iteration: 250061
loss: 1.0153677463531494,grad_norm: 0.9999995926814123, iteration: 250062
loss: 0.958471417427063,grad_norm: 0.9027224697436806, iteration: 250063
loss: 0.9859505891799927,grad_norm: 0.9999991313822215, iteration: 250064
loss: 0.9896061420440674,grad_norm: 0.8464779633998938, iteration: 250065
loss: 1.105871319770813,grad_norm: 0.9580040279577708, iteration: 250066
loss: 1.0102614164352417,grad_norm: 0.7174635600485657, iteration: 250067
loss: 0.9997264742851257,grad_norm: 0.9999990678521795, iteration: 250068
loss: 0.9933590292930603,grad_norm: 0.9999999470762391, iteration: 250069
loss: 1.014697551727295,grad_norm: 0.9163861540962731, iteration: 250070
loss: 1.0597519874572754,grad_norm: 0.8300312687160675, iteration: 250071
loss: 1.0004209280014038,grad_norm: 0.8705697157040797, iteration: 250072
loss: 0.9691790342330933,grad_norm: 0.9999991955644071, iteration: 250073
loss: 0.9993615746498108,grad_norm: 0.8783464176986996, iteration: 250074
loss: 0.9838854670524597,grad_norm: 0.9446263130488524, iteration: 250075
loss: 0.9688919186592102,grad_norm: 0.7827435077291464, iteration: 250076
loss: 0.9984817504882812,grad_norm: 0.8881511263062163, iteration: 250077
loss: 1.000166654586792,grad_norm: 0.9905034449180125, iteration: 250078
loss: 1.0037744045257568,grad_norm: 0.9999991463323634, iteration: 250079
loss: 1.0108466148376465,grad_norm: 0.8224330044712084, iteration: 250080
loss: 1.0199583768844604,grad_norm: 0.8860020241953284, iteration: 250081
loss: 0.987951934337616,grad_norm: 0.9759761940617291, iteration: 250082
loss: 1.0202085971832275,grad_norm: 0.9999991336955192, iteration: 250083
loss: 0.9733848571777344,grad_norm: 0.7587971723760117, iteration: 250084
loss: 1.0134165287017822,grad_norm: 0.9311293948776311, iteration: 250085
loss: 1.2193341255187988,grad_norm: 0.9999997049691158, iteration: 250086
loss: 1.0757826566696167,grad_norm: 0.9844429541610953, iteration: 250087
loss: 0.9820867776870728,grad_norm: 0.8327685986233005, iteration: 250088
loss: 1.009302020072937,grad_norm: 0.8217447875949665, iteration: 250089
loss: 0.9815851449966431,grad_norm: 0.9870988914609042, iteration: 250090
loss: 0.9708465337753296,grad_norm: 0.999999108538376, iteration: 250091
loss: 0.9700459241867065,grad_norm: 0.9445145959309644, iteration: 250092
loss: 1.0146323442459106,grad_norm: 0.9999995120713171, iteration: 250093
loss: 0.9719350337982178,grad_norm: 0.9079502209655343, iteration: 250094
loss: 1.0044327974319458,grad_norm: 0.8731297368386582, iteration: 250095
loss: 1.021741509437561,grad_norm: 0.9999989968174766, iteration: 250096
loss: 1.0051159858703613,grad_norm: 0.999999313523902, iteration: 250097
loss: 1.0369079113006592,grad_norm: 0.9133280039270669, iteration: 250098
loss: 0.9901552200317383,grad_norm: 0.8489906040938012, iteration: 250099
loss: 1.0541319847106934,grad_norm: 0.9999989939727941, iteration: 250100
loss: 1.011816143989563,grad_norm: 0.8676440031891429, iteration: 250101
loss: 1.0208431482315063,grad_norm: 0.8035270310315975, iteration: 250102
loss: 0.9928430914878845,grad_norm: 0.9940674986538247, iteration: 250103
loss: 0.9869664311408997,grad_norm: 0.9118261122956022, iteration: 250104
loss: 0.9825966358184814,grad_norm: 0.906643966567918, iteration: 250105
loss: 0.9895427227020264,grad_norm: 0.9999991781161405, iteration: 250106
loss: 1.1358317136764526,grad_norm: 0.999999750067937, iteration: 250107
loss: 1.0296870470046997,grad_norm: 0.9999994324608356, iteration: 250108
loss: 1.0094600915908813,grad_norm: 0.9999992713368856, iteration: 250109
loss: 1.0109081268310547,grad_norm: 0.91813069743111, iteration: 250110
loss: 1.0082377195358276,grad_norm: 0.7953706401873719, iteration: 250111
loss: 1.017695665359497,grad_norm: 0.999999020708199, iteration: 250112
loss: 1.0221643447875977,grad_norm: 0.9999996423086582, iteration: 250113
loss: 0.9915204644203186,grad_norm: 0.9901502678314358, iteration: 250114
loss: 0.9910826086997986,grad_norm: 0.999999329925732, iteration: 250115
loss: 1.0676337480545044,grad_norm: 1.000000054375646, iteration: 250116
loss: 1.1251851320266724,grad_norm: 0.999999779549083, iteration: 250117
loss: 0.9696473479270935,grad_norm: 0.9999998985732637, iteration: 250118
loss: 1.0203559398651123,grad_norm: 0.999999348331313, iteration: 250119
loss: 0.9740691184997559,grad_norm: 0.6972747699986708, iteration: 250120
loss: 0.9889746308326721,grad_norm: 0.999999209753137, iteration: 250121
loss: 0.9985958337783813,grad_norm: 0.9050139509433524, iteration: 250122
loss: 1.0249475240707397,grad_norm: 0.8202470014075398, iteration: 250123
loss: 0.98957759141922,grad_norm: 0.8734818613578812, iteration: 250124
loss: 1.0484962463378906,grad_norm: 0.9999993500544041, iteration: 250125
loss: 0.9934224486351013,grad_norm: 0.8883728124925943, iteration: 250126
loss: 0.9793734550476074,grad_norm: 0.9999990423789359, iteration: 250127
loss: 1.001709222793579,grad_norm: 0.999999106882668, iteration: 250128
loss: 1.0784578323364258,grad_norm: 0.9999995643908715, iteration: 250129
loss: 1.2949970960617065,grad_norm: 0.9999998239862994, iteration: 250130
loss: 1.0214210748672485,grad_norm: 0.9999992608977903, iteration: 250131
loss: 1.1295442581176758,grad_norm: 0.9999993551733178, iteration: 250132
loss: 1.0923645496368408,grad_norm: 0.9999999972048018, iteration: 250133
loss: 1.1497336626052856,grad_norm: 0.9999993988849317, iteration: 250134
loss: 1.1168032884597778,grad_norm: 0.9999993566135873, iteration: 250135
loss: 0.980947732925415,grad_norm: 0.8969984883194947, iteration: 250136
loss: 1.0010360479354858,grad_norm: 0.8773662883116313, iteration: 250137
loss: 1.028866171836853,grad_norm: 0.9999994922141086, iteration: 250138
loss: 1.0392829179763794,grad_norm: 0.9999998681786578, iteration: 250139
loss: 1.0537713766098022,grad_norm: 0.9999999172009327, iteration: 250140
loss: 0.9458123445510864,grad_norm: 0.932023888059734, iteration: 250141
loss: 0.9988827109336853,grad_norm: 0.999999106912477, iteration: 250142
loss: 0.9781352281570435,grad_norm: 0.8914629715407846, iteration: 250143
loss: 0.9944329261779785,grad_norm: 0.8724638067429697, iteration: 250144
loss: 0.9774425029754639,grad_norm: 0.8505968967601493, iteration: 250145
loss: 1.0233350992202759,grad_norm: 0.9662190402487012, iteration: 250146
loss: 0.9991235733032227,grad_norm: 0.7920834831466049, iteration: 250147
loss: 0.983294665813446,grad_norm: 0.8599879423225353, iteration: 250148
loss: 1.0211255550384521,grad_norm: 0.9999999230464668, iteration: 250149
loss: 1.0413614511489868,grad_norm: 0.8815811590660823, iteration: 250150
loss: 1.0680259466171265,grad_norm: 0.8949227475630213, iteration: 250151
loss: 1.131148099899292,grad_norm: 0.9999994445777362, iteration: 250152
loss: 1.0039490461349487,grad_norm: 0.8997787760147102, iteration: 250153
loss: 1.0001634359359741,grad_norm: 0.9999989511726233, iteration: 250154
loss: 1.0539056062698364,grad_norm: 0.9999991291750837, iteration: 250155
loss: 0.9372597932815552,grad_norm: 0.8924131722917854, iteration: 250156
loss: 1.1427931785583496,grad_norm: 0.9999997711509845, iteration: 250157
loss: 1.0293292999267578,grad_norm: 0.999999231946216, iteration: 250158
loss: 1.0579239130020142,grad_norm: 0.9727008515351502, iteration: 250159
loss: 1.02579927444458,grad_norm: 0.9999991358731558, iteration: 250160
loss: 1.0888636112213135,grad_norm: 0.9422924621892023, iteration: 250161
loss: 0.997515082359314,grad_norm: 0.9999991519899026, iteration: 250162
loss: 1.129041314125061,grad_norm: 0.9999998528256936, iteration: 250163
loss: 1.085429310798645,grad_norm: 0.999999906538771, iteration: 250164
loss: 1.0128690004348755,grad_norm: 0.9999990295800955, iteration: 250165
loss: 1.051896572113037,grad_norm: 0.9999995472788558, iteration: 250166
loss: 1.1480995416641235,grad_norm: 0.9999997619399273, iteration: 250167
loss: 1.1013462543487549,grad_norm: 0.9999996920337373, iteration: 250168
loss: 1.075979471206665,grad_norm: 0.9999992866249376, iteration: 250169
loss: 1.0435112714767456,grad_norm: 0.8929664766553331, iteration: 250170
loss: 1.250929832458496,grad_norm: 0.9999994935382741, iteration: 250171
loss: 0.9688773155212402,grad_norm: 0.999999001826561, iteration: 250172
loss: 1.004630446434021,grad_norm: 0.9999990275641529, iteration: 250173
loss: 1.0210237503051758,grad_norm: 0.7612434825291182, iteration: 250174
loss: 1.0776019096374512,grad_norm: 0.99999928551529, iteration: 250175
loss: 0.9993435144424438,grad_norm: 0.9727690445155778, iteration: 250176
loss: 1.0701204538345337,grad_norm: 0.9999999151120953, iteration: 250177
loss: 0.9528067708015442,grad_norm: 0.934882315402848, iteration: 250178
loss: 1.0441738367080688,grad_norm: 0.7465515488979261, iteration: 250179
loss: 1.0554084777832031,grad_norm: 0.9999998409783897, iteration: 250180
loss: 1.0231269598007202,grad_norm: 0.9999996751855365, iteration: 250181
loss: 0.9776197671890259,grad_norm: 0.999999472680561, iteration: 250182
loss: 0.9993579387664795,grad_norm: 0.9999989502276398, iteration: 250183
loss: 0.993009090423584,grad_norm: 0.9999996818763333, iteration: 250184
loss: 1.015693187713623,grad_norm: 0.9359322716367211, iteration: 250185
loss: 1.075154423713684,grad_norm: 0.9999996530039958, iteration: 250186
loss: 1.0102155208587646,grad_norm: 0.9462514571596292, iteration: 250187
loss: 0.9930188059806824,grad_norm: 0.9999996540866491, iteration: 250188
loss: 1.0195300579071045,grad_norm: 0.9999990678615552, iteration: 250189
loss: 1.061617374420166,grad_norm: 0.9999998983621285, iteration: 250190
loss: 0.9631497859954834,grad_norm: 0.8440561589837743, iteration: 250191
loss: 1.0829315185546875,grad_norm: 0.9999998078585255, iteration: 250192
loss: 0.9962189197540283,grad_norm: 0.9999990801194772, iteration: 250193
loss: 1.0650004148483276,grad_norm: 0.9733078657135406, iteration: 250194
loss: 1.0424383878707886,grad_norm: 0.9999997909263111, iteration: 250195
loss: 1.025777816772461,grad_norm: 0.8302098807328443, iteration: 250196
loss: 1.0225626230239868,grad_norm: 0.9999994040000586, iteration: 250197
loss: 0.9692988395690918,grad_norm: 0.9769659916001016, iteration: 250198
loss: 1.0295339822769165,grad_norm: 0.7943809230198908, iteration: 250199
loss: 1.0319774150848389,grad_norm: 0.8717934686292885, iteration: 250200
loss: 0.958710253238678,grad_norm: 0.9999990720430737, iteration: 250201
loss: 1.017470359802246,grad_norm: 0.7489572492301944, iteration: 250202
loss: 1.0287392139434814,grad_norm: 0.9999989169511391, iteration: 250203
loss: 0.9858124852180481,grad_norm: 0.9988243523957658, iteration: 250204
loss: 1.0650395154953003,grad_norm: 0.9999996881611342, iteration: 250205
loss: 1.04949152469635,grad_norm: 0.9357297135179087, iteration: 250206
loss: 1.068058967590332,grad_norm: 0.9999992506759089, iteration: 250207
loss: 1.0058789253234863,grad_norm: 0.9386487675423419, iteration: 250208
loss: 1.00477135181427,grad_norm: 0.8951741150577798, iteration: 250209
loss: 1.0532829761505127,grad_norm: 0.9999998555850113, iteration: 250210
loss: 0.9729891419410706,grad_norm: 0.9872063661102335, iteration: 250211
loss: 1.0016127824783325,grad_norm: 0.9999990173031885, iteration: 250212
loss: 1.0127781629562378,grad_norm: 0.9999989683323703, iteration: 250213
loss: 0.993750810623169,grad_norm: 0.9999994784271259, iteration: 250214
loss: 1.1122007369995117,grad_norm: 0.9999999732159258, iteration: 250215
loss: 0.9868597388267517,grad_norm: 0.9559539018649431, iteration: 250216
loss: 0.9755362868309021,grad_norm: 0.9999990296404961, iteration: 250217
loss: 1.0331388711929321,grad_norm: 0.99999917604815, iteration: 250218
loss: 0.9892688989639282,grad_norm: 0.9605138972762369, iteration: 250219
loss: 0.9721183776855469,grad_norm: 0.9164800437057113, iteration: 250220
loss: 0.994562566280365,grad_norm: 0.8459176648852372, iteration: 250221
loss: 1.005193829536438,grad_norm: 0.836621342636459, iteration: 250222
loss: 1.0386667251586914,grad_norm: 0.999999041720968, iteration: 250223
loss: 0.9725321531295776,grad_norm: 0.8357141623436196, iteration: 250224
loss: 1.0690251588821411,grad_norm: 0.9999992186496718, iteration: 250225
loss: 1.025834321975708,grad_norm: 0.8757083198322272, iteration: 250226
loss: 1.016603708267212,grad_norm: 0.8809130746794729, iteration: 250227
loss: 1.0676559209823608,grad_norm: 0.9999990847668544, iteration: 250228
loss: 0.9915849566459656,grad_norm: 0.8856050159513236, iteration: 250229
loss: 1.0236390829086304,grad_norm: 0.9999994504878267, iteration: 250230
loss: 1.0318326950073242,grad_norm: 0.9999991159021577, iteration: 250231
loss: 1.0173182487487793,grad_norm: 0.800121472006497, iteration: 250232
loss: 0.9974824786186218,grad_norm: 0.9999991557762399, iteration: 250233
loss: 1.2666280269622803,grad_norm: 0.9999995681885048, iteration: 250234
loss: 1.0228497982025146,grad_norm: 0.9999994277823523, iteration: 250235
loss: 0.9880836606025696,grad_norm: 0.9999991046333091, iteration: 250236
loss: 0.9967360496520996,grad_norm: 0.7386630963972772, iteration: 250237
loss: 0.9970411658287048,grad_norm: 0.9999989915499504, iteration: 250238
loss: 1.0045911073684692,grad_norm: 0.996921646923211, iteration: 250239
loss: 1.0149188041687012,grad_norm: 0.9999995727595361, iteration: 250240
loss: 1.060734748840332,grad_norm: 0.9999992943701361, iteration: 250241
loss: 0.9759467840194702,grad_norm: 0.8648408358477898, iteration: 250242
loss: 0.9650818705558777,grad_norm: 0.8170227055661746, iteration: 250243
loss: 1.019840955734253,grad_norm: 0.999999962499006, iteration: 250244
loss: 1.015495777130127,grad_norm: 0.8748897285823948, iteration: 250245
loss: 0.9910421371459961,grad_norm: 0.9999992931935514, iteration: 250246
loss: 1.2422211170196533,grad_norm: 0.9999998764517761, iteration: 250247
loss: 1.0572388172149658,grad_norm: 0.999999248983848, iteration: 250248
loss: 1.197371006011963,grad_norm: 0.9999996039734644, iteration: 250249
loss: 1.1044228076934814,grad_norm: 0.9999998994597703, iteration: 250250
loss: 1.0104576349258423,grad_norm: 0.8572367528546466, iteration: 250251
loss: 1.1108226776123047,grad_norm: 0.9999998558927214, iteration: 250252
loss: 1.0026601552963257,grad_norm: 0.9306261795154889, iteration: 250253
loss: 0.9817919731140137,grad_norm: 0.8736477600827954, iteration: 250254
loss: 0.9939433336257935,grad_norm: 0.8674591061539431, iteration: 250255
loss: 1.0283688306808472,grad_norm: 0.8420744183340857, iteration: 250256
loss: 1.1212379932403564,grad_norm: 0.9999992052976997, iteration: 250257
loss: 1.039345622062683,grad_norm: 0.8789091387547806, iteration: 250258
loss: 1.061562418937683,grad_norm: 0.999999352780718, iteration: 250259
loss: 1.0260273218154907,grad_norm: 0.999999320181291, iteration: 250260
loss: 1.0379353761672974,grad_norm: 0.9999999107372264, iteration: 250261
loss: 0.9985463619232178,grad_norm: 0.9999997122406038, iteration: 250262
loss: 0.9664850831031799,grad_norm: 0.8468009508719306, iteration: 250263
loss: 0.9910252094268799,grad_norm: 0.8938003286580471, iteration: 250264
loss: 1.0288275480270386,grad_norm: 0.999999156961133, iteration: 250265
loss: 1.0824329853057861,grad_norm: 0.9999999076430431, iteration: 250266
loss: 0.9841188788414001,grad_norm: 0.9310924587197046, iteration: 250267
loss: 1.1568676233291626,grad_norm: 0.9999999668786705, iteration: 250268
loss: 1.042423963546753,grad_norm: 0.9999995541364225, iteration: 250269
loss: 1.103345513343811,grad_norm: 0.9999992390761323, iteration: 250270
loss: 1.0482255220413208,grad_norm: 0.9999991121420412, iteration: 250271
loss: 0.9859194159507751,grad_norm: 0.9999989705916374, iteration: 250272
loss: 1.0075814723968506,grad_norm: 0.922795608762017, iteration: 250273
loss: 1.0433825254440308,grad_norm: 0.9999997523156894, iteration: 250274
loss: 1.0371990203857422,grad_norm: 0.9999991283870275, iteration: 250275
loss: 1.035090684890747,grad_norm: 0.9423197385989229, iteration: 250276
loss: 1.0187450647354126,grad_norm: 0.7270195385608645, iteration: 250277
loss: 0.9808538556098938,grad_norm: 0.8131682256881869, iteration: 250278
loss: 0.9733686447143555,grad_norm: 0.9999996097841151, iteration: 250279
loss: 0.959736704826355,grad_norm: 0.8610795201634078, iteration: 250280
loss: 1.0113550424575806,grad_norm: 0.8846761836567423, iteration: 250281
loss: 0.9814335107803345,grad_norm: 0.9519752599134137, iteration: 250282
loss: 1.0075310468673706,grad_norm: 0.8885904356490698, iteration: 250283
loss: 0.9950054883956909,grad_norm: 0.9579213367191651, iteration: 250284
loss: 1.0214636325836182,grad_norm: 0.9999992725105648, iteration: 250285
loss: 0.9972203373908997,grad_norm: 0.8429725334465327, iteration: 250286
loss: 1.0180305242538452,grad_norm: 0.8793542248963756, iteration: 250287
loss: 1.0793575048446655,grad_norm: 0.9999993763113147, iteration: 250288
loss: 1.0106345415115356,grad_norm: 0.9999995965084483, iteration: 250289
loss: 1.2456371784210205,grad_norm: 0.9999992749397362, iteration: 250290
loss: 1.0114781856536865,grad_norm: 0.813076568534856, iteration: 250291
loss: 0.9970241785049438,grad_norm: 0.845937364026289, iteration: 250292
loss: 1.0132770538330078,grad_norm: 0.9999991516121844, iteration: 250293
loss: 1.2075092792510986,grad_norm: 0.999999616007042, iteration: 250294
loss: 0.953294575214386,grad_norm: 0.9996013819344708, iteration: 250295
loss: 0.9811396598815918,grad_norm: 0.8352817459945061, iteration: 250296
loss: 0.9664447903633118,grad_norm: 0.8426047436513506, iteration: 250297
loss: 1.046643853187561,grad_norm: 0.9999994524097509, iteration: 250298
loss: 0.9930210113525391,grad_norm: 0.9280483225894788, iteration: 250299
loss: 0.9977179169654846,grad_norm: 0.858252628960334, iteration: 250300
loss: 1.001203179359436,grad_norm: 0.9999993011034758, iteration: 250301
loss: 1.087003231048584,grad_norm: 0.9999996235695103, iteration: 250302
loss: 0.9629803895950317,grad_norm: 0.9212044555973561, iteration: 250303
loss: 0.9772489070892334,grad_norm: 0.7965688811121827, iteration: 250304
loss: 0.9685764908790588,grad_norm: 0.878717965868498, iteration: 250305
loss: 0.9792312383651733,grad_norm: 0.9058641597705276, iteration: 250306
loss: 0.9731451869010925,grad_norm: 0.999999076848662, iteration: 250307
loss: 0.998495876789093,grad_norm: 0.9061573395876559, iteration: 250308
loss: 1.079807996749878,grad_norm: 0.9999991304661873, iteration: 250309
loss: 0.9992093443870544,grad_norm: 0.9543031530495637, iteration: 250310
loss: 1.0386656522750854,grad_norm: 0.9999991783233112, iteration: 250311
loss: 1.0146867036819458,grad_norm: 0.8779605023223477, iteration: 250312
loss: 1.0527712106704712,grad_norm: 0.9999997997235524, iteration: 250313
loss: 1.0851202011108398,grad_norm: 0.9999993416021734, iteration: 250314
loss: 1.0152815580368042,grad_norm: 0.9999991337442783, iteration: 250315
loss: 1.0478200912475586,grad_norm: 0.9999990215493989, iteration: 250316
loss: 1.0535244941711426,grad_norm: 0.909841163853384, iteration: 250317
loss: 1.0498241186141968,grad_norm: 0.9999994905221755, iteration: 250318
loss: 0.9529282450675964,grad_norm: 0.9999990243800451, iteration: 250319
loss: 1.0079283714294434,grad_norm: 0.7321138156026584, iteration: 250320
loss: 1.0370903015136719,grad_norm: 0.7301916122985828, iteration: 250321
loss: 0.9869189858436584,grad_norm: 0.999999788566148, iteration: 250322
loss: 1.0524336099624634,grad_norm: 0.946858738023866, iteration: 250323
loss: 0.9782577753067017,grad_norm: 0.8361374438277331, iteration: 250324
loss: 1.0191278457641602,grad_norm: 0.9999989634968152, iteration: 250325
loss: 1.0176763534545898,grad_norm: 0.7242803741625183, iteration: 250326
loss: 1.0026609897613525,grad_norm: 0.9438779590657195, iteration: 250327
loss: 1.0214385986328125,grad_norm: 0.9999990221477723, iteration: 250328
loss: 0.9925079345703125,grad_norm: 0.999999623012063, iteration: 250329
loss: 1.0219122171401978,grad_norm: 0.9999991768992088, iteration: 250330
loss: 0.9808436036109924,grad_norm: 0.8911293863864383, iteration: 250331
loss: 1.3506876230239868,grad_norm: 0.9999999018029909, iteration: 250332
loss: 0.978352427482605,grad_norm: 0.9253061653355619, iteration: 250333
loss: 0.9748812913894653,grad_norm: 0.9999990090837988, iteration: 250334
loss: 0.9846866130828857,grad_norm: 0.8841517373410492, iteration: 250335
loss: 1.035961389541626,grad_norm: 0.8698778668297967, iteration: 250336
loss: 1.019386887550354,grad_norm: 0.8656535455790728, iteration: 250337
loss: 1.017812728881836,grad_norm: 0.7745851134050009, iteration: 250338
loss: 1.0339704751968384,grad_norm: 0.8665895582323382, iteration: 250339
loss: 1.0572493076324463,grad_norm: 0.9999994532969178, iteration: 250340
loss: 0.989348292350769,grad_norm: 0.7656582275812666, iteration: 250341
loss: 0.9991702437400818,grad_norm: 0.9999993604686915, iteration: 250342
loss: 1.0428928136825562,grad_norm: 0.9999998650692634, iteration: 250343
loss: 1.011966586112976,grad_norm: 0.9999991807903014, iteration: 250344
loss: 0.9723888635635376,grad_norm: 0.8166169785687213, iteration: 250345
loss: 1.0301196575164795,grad_norm: 0.942667984949909, iteration: 250346
loss: 1.0362753868103027,grad_norm: 0.92458349192979, iteration: 250347
loss: 1.12327241897583,grad_norm: 0.9999999014224533, iteration: 250348
loss: 1.0087798833847046,grad_norm: 0.8508408822003356, iteration: 250349
loss: 1.0662614107131958,grad_norm: 0.9999994821414209, iteration: 250350
loss: 1.0209561586380005,grad_norm: 0.8661002960748405, iteration: 250351
loss: 1.0653125047683716,grad_norm: 0.9999998633477206, iteration: 250352
loss: 0.9886144399642944,grad_norm: 0.9411862069386703, iteration: 250353
loss: 0.9741693139076233,grad_norm: 0.8636453853802829, iteration: 250354
loss: 1.0275229215621948,grad_norm: 0.808552313703789, iteration: 250355
loss: 1.0259969234466553,grad_norm: 0.9999990014976768, iteration: 250356
loss: 0.9952871203422546,grad_norm: 0.9999997761649265, iteration: 250357
loss: 1.1775310039520264,grad_norm: 0.9999997199910973, iteration: 250358
loss: 1.011865258216858,grad_norm: 0.8570393495655135, iteration: 250359
loss: 1.1289734840393066,grad_norm: 0.9999997511334704, iteration: 250360
loss: 0.9850819110870361,grad_norm: 0.7889921347282296, iteration: 250361
loss: 1.0043262243270874,grad_norm: 0.9999990039655248, iteration: 250362
loss: 1.025486707687378,grad_norm: 0.925662026252507, iteration: 250363
loss: 0.9854125380516052,grad_norm: 0.7177195748342299, iteration: 250364
loss: 0.9918275475502014,grad_norm: 0.8303360304758233, iteration: 250365
loss: 1.0367350578308105,grad_norm: 0.9999996494249205, iteration: 250366
loss: 1.1472834348678589,grad_norm: 0.99999973611507, iteration: 250367
loss: 1.131980061531067,grad_norm: 0.9999996923415435, iteration: 250368
loss: 0.9922155737876892,grad_norm: 0.8408592332759184, iteration: 250369
loss: 0.9757005572319031,grad_norm: 0.8546000275294326, iteration: 250370
loss: 1.0066859722137451,grad_norm: 0.9999990447032473, iteration: 250371
loss: 1.0171722173690796,grad_norm: 0.9999993247714164, iteration: 250372
loss: 1.1315571069717407,grad_norm: 0.9999991911854822, iteration: 250373
loss: 1.0857785940170288,grad_norm: 0.9999999472674518, iteration: 250374
loss: 1.0894147157669067,grad_norm: 0.9999996609942933, iteration: 250375
loss: 1.0123600959777832,grad_norm: 0.9859301945759987, iteration: 250376
loss: 1.0313085317611694,grad_norm: 0.9414087713324538, iteration: 250377
loss: 1.0114721059799194,grad_norm: 0.9999991398402923, iteration: 250378
loss: 1.0404266119003296,grad_norm: 0.966461540984346, iteration: 250379
loss: 1.0161586999893188,grad_norm: 0.9999990990037712, iteration: 250380
loss: 0.9767143130302429,grad_norm: 0.8316032712993934, iteration: 250381
loss: 0.9918633103370667,grad_norm: 0.8200333507969919, iteration: 250382
loss: 1.0793383121490479,grad_norm: 0.9999995296958816, iteration: 250383
loss: 0.9878467917442322,grad_norm: 0.9826546515771855, iteration: 250384
loss: 0.9851850867271423,grad_norm: 0.8087719697388001, iteration: 250385
loss: 1.004807472229004,grad_norm: 0.9999990688328327, iteration: 250386
loss: 1.0477386713027954,grad_norm: 0.9999996462278347, iteration: 250387
loss: 1.0066769123077393,grad_norm: 0.8215148742877492, iteration: 250388
loss: 1.0103214979171753,grad_norm: 0.8259417403139903, iteration: 250389
loss: 0.9785317778587341,grad_norm: 0.7265458368867134, iteration: 250390
loss: 1.0244438648223877,grad_norm: 0.8108189480208726, iteration: 250391
loss: 0.9968657493591309,grad_norm: 0.9999990832147798, iteration: 250392
loss: 1.419608473777771,grad_norm: 0.9999997559954953, iteration: 250393
loss: 1.025634765625,grad_norm: 0.9999992888961956, iteration: 250394
loss: 1.1242570877075195,grad_norm: 0.9999991972725157, iteration: 250395
loss: 0.9952995777130127,grad_norm: 0.9409228455389556, iteration: 250396
loss: 1.0916393995285034,grad_norm: 0.9999995535774799, iteration: 250397
loss: 1.0358089208602905,grad_norm: 0.886639325333149, iteration: 250398
loss: 1.1665836572647095,grad_norm: 0.9999999729912161, iteration: 250399
loss: 1.0080629587173462,grad_norm: 0.9377185325822989, iteration: 250400
loss: 1.0202480554580688,grad_norm: 0.9999990916887903, iteration: 250401
loss: 1.0131360292434692,grad_norm: 0.8961028984459312, iteration: 250402
loss: 1.0098941326141357,grad_norm: 0.8690051093372233, iteration: 250403
loss: 1.0392262935638428,grad_norm: 0.9999995897857793, iteration: 250404
loss: 0.9619421362876892,grad_norm: 0.7730137185540339, iteration: 250405
loss: 1.0310062170028687,grad_norm: 0.9665249386007552, iteration: 250406
loss: 0.9855340123176575,grad_norm: 0.9975164022324073, iteration: 250407
loss: 0.9986377954483032,grad_norm: 0.9144519628419681, iteration: 250408
loss: 1.016911506652832,grad_norm: 0.9999992199080603, iteration: 250409
loss: 1.0226036310195923,grad_norm: 0.8947321411121245, iteration: 250410
loss: 0.9707221984863281,grad_norm: 0.9950209610807439, iteration: 250411
loss: 0.9943345785140991,grad_norm: 0.7625013849688048, iteration: 250412
loss: 0.9951945543289185,grad_norm: 0.9999991206876242, iteration: 250413
loss: 0.9973337054252625,grad_norm: 0.9999991427748688, iteration: 250414
loss: 1.143929123878479,grad_norm: 0.9999998193267097, iteration: 250415
loss: 1.03954017162323,grad_norm: 0.9999993764963602, iteration: 250416
loss: 0.9818436503410339,grad_norm: 0.7284812902031422, iteration: 250417
loss: 1.0136253833770752,grad_norm: 0.9568107248428801, iteration: 250418
loss: 1.0278140306472778,grad_norm: 0.9241281560081891, iteration: 250419
loss: 1.0241068601608276,grad_norm: 0.9999999518859803, iteration: 250420
loss: 1.014418601989746,grad_norm: 0.8883268124027021, iteration: 250421
loss: 0.9989175200462341,grad_norm: 0.9244414262332071, iteration: 250422
loss: 1.1592562198638916,grad_norm: 0.999999122248273, iteration: 250423
loss: 0.9836146235466003,grad_norm: 0.9999994789444142, iteration: 250424
loss: 1.0132349729537964,grad_norm: 0.99999985840428, iteration: 250425
loss: 1.003073811531067,grad_norm: 0.9447640704911349, iteration: 250426
loss: 1.0030477046966553,grad_norm: 0.9999990367170745, iteration: 250427
loss: 1.0012418031692505,grad_norm: 0.8239711192064552, iteration: 250428
loss: 1.006961703300476,grad_norm: 0.978144377942815, iteration: 250429
loss: 1.0217394828796387,grad_norm: 0.9999995610697342, iteration: 250430
loss: 1.0145384073257446,grad_norm: 0.9286460759770654, iteration: 250431
loss: 0.9846998453140259,grad_norm: 0.9879134220821257, iteration: 250432
loss: 0.9861566424369812,grad_norm: 0.8361622680230867, iteration: 250433
loss: 1.011866569519043,grad_norm: 0.9800686883872761, iteration: 250434
loss: 1.0403456687927246,grad_norm: 0.9999992050346983, iteration: 250435
loss: 1.0694724321365356,grad_norm: 0.9658154436127054, iteration: 250436
loss: 1.0736591815948486,grad_norm: 0.9999991171278988, iteration: 250437
loss: 1.0776907205581665,grad_norm: 0.9999993331918936, iteration: 250438
loss: 0.9780731201171875,grad_norm: 0.8005872883669158, iteration: 250439
loss: 1.0322661399841309,grad_norm: 0.9999992740733534, iteration: 250440
loss: 0.9821532368659973,grad_norm: 0.9360386030044924, iteration: 250441
loss: 0.9904057383537292,grad_norm: 0.9156676144605634, iteration: 250442
loss: 0.9905282855033875,grad_norm: 0.9185470300125731, iteration: 250443
loss: 1.041238784790039,grad_norm: 0.9999998965998661, iteration: 250444
loss: 1.0154578685760498,grad_norm: 0.975976196268084, iteration: 250445
loss: 1.018604040145874,grad_norm: 0.8236628839934484, iteration: 250446
loss: 1.0205597877502441,grad_norm: 0.910125904729277, iteration: 250447
loss: 0.9909077286720276,grad_norm: 0.99999981463388, iteration: 250448
loss: 1.0601106882095337,grad_norm: 0.9999995033406355, iteration: 250449
loss: 1.0076267719268799,grad_norm: 0.8954409277791618, iteration: 250450
loss: 0.992205023765564,grad_norm: 0.8928499302762256, iteration: 250451
loss: 1.0023577213287354,grad_norm: 0.8969861118815952, iteration: 250452
loss: 1.0029984712600708,grad_norm: 0.9495898873515117, iteration: 250453
loss: 1.125027060508728,grad_norm: 0.9999998678965428, iteration: 250454
loss: 1.0013514757156372,grad_norm: 0.8774144172204129, iteration: 250455
loss: 1.1752738952636719,grad_norm: 0.9999996712811423, iteration: 250456
loss: 1.0278366804122925,grad_norm: 0.8473734858600952, iteration: 250457
loss: 1.0012158155441284,grad_norm: 0.989320081644704, iteration: 250458
loss: 1.079843282699585,grad_norm: 0.9999998786851814, iteration: 250459
loss: 0.9980217814445496,grad_norm: 0.8336990774851168, iteration: 250460
loss: 1.0094407796859741,grad_norm: 0.8789156604755901, iteration: 250461
loss: 1.0236934423446655,grad_norm: 0.770142880078715, iteration: 250462
loss: 1.0104550123214722,grad_norm: 0.8622898115242302, iteration: 250463
loss: 1.0390334129333496,grad_norm: 0.9999994555655277, iteration: 250464
loss: 0.9974962472915649,grad_norm: 0.9022146485265482, iteration: 250465
loss: 0.9590949416160583,grad_norm: 0.9999991651504196, iteration: 250466
loss: 1.001199722290039,grad_norm: 0.8661978697971813, iteration: 250467
loss: 0.98090660572052,grad_norm: 0.9999989603186943, iteration: 250468
loss: 1.0752564668655396,grad_norm: 0.9999991500312448, iteration: 250469
loss: 1.0170317888259888,grad_norm: 0.7789357583930361, iteration: 250470
loss: 1.026555061340332,grad_norm: 0.977623627517578, iteration: 250471
loss: 1.12138032913208,grad_norm: 0.9999995331559627, iteration: 250472
loss: 0.9861290454864502,grad_norm: 0.9557541558148204, iteration: 250473
loss: 0.9989150762557983,grad_norm: 0.7945676324135631, iteration: 250474
loss: 1.0460456609725952,grad_norm: 0.857177559147785, iteration: 250475
loss: 0.9832343459129333,grad_norm: 0.9141196639397838, iteration: 250476
loss: 0.9827407598495483,grad_norm: 0.8442151236312806, iteration: 250477
loss: 1.0096158981323242,grad_norm: 0.7630922405743146, iteration: 250478
loss: 1.0550739765167236,grad_norm: 0.9999993646725581, iteration: 250479
loss: 0.9956669807434082,grad_norm: 0.9999994186121254, iteration: 250480
loss: 1.0058910846710205,grad_norm: 0.9399351764142867, iteration: 250481
loss: 1.0187489986419678,grad_norm: 0.9904000540228489, iteration: 250482
loss: 0.9995520710945129,grad_norm: 0.999999213255154, iteration: 250483
loss: 1.024178147315979,grad_norm: 0.999999139015122, iteration: 250484
loss: 1.0238640308380127,grad_norm: 0.9999990560795404, iteration: 250485
loss: 1.1570448875427246,grad_norm: 0.9999997381847928, iteration: 250486
loss: 1.0665228366851807,grad_norm: 0.8193206903900897, iteration: 250487
loss: 1.0176211595535278,grad_norm: 0.9999989874803765, iteration: 250488
loss: 1.0052317380905151,grad_norm: 0.8859724562082529, iteration: 250489
loss: 1.0054253339767456,grad_norm: 0.8743615927508236, iteration: 250490
loss: 1.026842713356018,grad_norm: 0.9999997889106167, iteration: 250491
loss: 1.0513211488723755,grad_norm: 0.9999994637643681, iteration: 250492
loss: 1.030163288116455,grad_norm: 0.9430914001116176, iteration: 250493
loss: 1.0405211448669434,grad_norm: 0.9999990264439358, iteration: 250494
loss: 1.0106302499771118,grad_norm: 0.9538293800069292, iteration: 250495
loss: 1.0461896657943726,grad_norm: 0.9999993168253304, iteration: 250496
loss: 1.005427360534668,grad_norm: 0.869708413809152, iteration: 250497
loss: 1.0196495056152344,grad_norm: 0.9999991115870882, iteration: 250498
loss: 0.9828028082847595,grad_norm: 0.9043840375476425, iteration: 250499
loss: 1.001880407333374,grad_norm: 0.999999111888571, iteration: 250500
loss: 0.9957890510559082,grad_norm: 0.9999991842735853, iteration: 250501
loss: 0.9692129492759705,grad_norm: 0.9999992734783132, iteration: 250502
loss: 1.0030648708343506,grad_norm: 0.9999992587058196, iteration: 250503
loss: 1.0102040767669678,grad_norm: 0.7575719688921595, iteration: 250504
loss: 0.9985790252685547,grad_norm: 0.9347875140427009, iteration: 250505
loss: 1.010068416595459,grad_norm: 0.9999992632151229, iteration: 250506
loss: 0.996935248374939,grad_norm: 0.9999994352185687, iteration: 250507
loss: 1.0160577297210693,grad_norm: 0.9195031665567825, iteration: 250508
loss: 1.0230194330215454,grad_norm: 0.8523865026052689, iteration: 250509
loss: 0.9715563654899597,grad_norm: 0.999999451546893, iteration: 250510
loss: 1.0111900568008423,grad_norm: 0.880788630301949, iteration: 250511
loss: 1.0078965425491333,grad_norm: 0.8782884886229528, iteration: 250512
loss: 0.9909479022026062,grad_norm: 0.8228896938785186, iteration: 250513
loss: 0.9940580725669861,grad_norm: 0.9088643159326133, iteration: 250514
loss: 1.0265233516693115,grad_norm: 0.8787341489950564, iteration: 250515
loss: 1.0121707916259766,grad_norm: 0.9751623115244936, iteration: 250516
loss: 0.9660094976425171,grad_norm: 0.8862561044962647, iteration: 250517
loss: 1.0201773643493652,grad_norm: 0.999999179644282, iteration: 250518
loss: 0.955464243888855,grad_norm: 0.8391320366178344, iteration: 250519
loss: 1.0463168621063232,grad_norm: 0.9624247641473923, iteration: 250520
loss: 1.0962486267089844,grad_norm: 0.999999599695774, iteration: 250521
loss: 0.9867216348648071,grad_norm: 0.8431602885931658, iteration: 250522
loss: 1.0161501169204712,grad_norm: 0.9461053102186979, iteration: 250523
loss: 0.9957371950149536,grad_norm: 0.9999990489029262, iteration: 250524
loss: 1.0031850337982178,grad_norm: 0.9301421624352545, iteration: 250525
loss: 1.0145124197006226,grad_norm: 0.9999997295280859, iteration: 250526
loss: 1.0030558109283447,grad_norm: 0.9490720829513913, iteration: 250527
loss: 1.0481632947921753,grad_norm: 0.8686287901160321, iteration: 250528
loss: 0.9961705207824707,grad_norm: 0.9318705125771026, iteration: 250529
loss: 1.0455865859985352,grad_norm: 0.9999998170835769, iteration: 250530
loss: 1.0897109508514404,grad_norm: 0.9999996158472253, iteration: 250531
loss: 0.9920526742935181,grad_norm: 0.9999992474649171, iteration: 250532
loss: 1.000738501548767,grad_norm: 0.8150846796708775, iteration: 250533
loss: 1.0122511386871338,grad_norm: 0.8062998371832546, iteration: 250534
loss: 1.0043236017227173,grad_norm: 0.9962615195910541, iteration: 250535
loss: 0.9880983829498291,grad_norm: 0.9999992533488522, iteration: 250536
loss: 1.0168083906173706,grad_norm: 0.8641403759382326, iteration: 250537
loss: 0.9673416614532471,grad_norm: 0.9306801489647807, iteration: 250538
loss: 0.9806520938873291,grad_norm: 0.9296731024649296, iteration: 250539
loss: 0.9727780222892761,grad_norm: 0.8324578981880766, iteration: 250540
loss: 1.0361874103546143,grad_norm: 0.9069865201065277, iteration: 250541
loss: 0.9924832582473755,grad_norm: 0.7972505649426306, iteration: 250542
loss: 0.9881210327148438,grad_norm: 0.9246775198112903, iteration: 250543
loss: 1.1649205684661865,grad_norm: 0.9999996853086555, iteration: 250544
loss: 1.0126028060913086,grad_norm: 0.9429171811310169, iteration: 250545
loss: 1.0151389837265015,grad_norm: 0.9118837657586324, iteration: 250546
loss: 0.9959776997566223,grad_norm: 0.9605522812202019, iteration: 250547
loss: 0.9836187362670898,grad_norm: 0.8929763320094168, iteration: 250548
loss: 1.046960473060608,grad_norm: 0.9289598251362317, iteration: 250549
loss: 1.004597783088684,grad_norm: 0.9987959348245108, iteration: 250550
loss: 0.965423047542572,grad_norm: 0.8491945362423028, iteration: 250551
loss: 1.0267300605773926,grad_norm: 0.999999116996885, iteration: 250552
loss: 1.0018837451934814,grad_norm: 0.9999997340348398, iteration: 250553
loss: 0.9927006363868713,grad_norm: 0.8291774496550552, iteration: 250554
loss: 1.0275551080703735,grad_norm: 0.9999995630417801, iteration: 250555
loss: 1.0062330961227417,grad_norm: 0.9985892487083755, iteration: 250556
loss: 0.987596869468689,grad_norm: 0.948005559592048, iteration: 250557
loss: 1.0015844106674194,grad_norm: 0.8087476946742133, iteration: 250558
loss: 1.0035096406936646,grad_norm: 0.9999989629423655, iteration: 250559
loss: 0.9647585153579712,grad_norm: 0.8825077253047724, iteration: 250560
loss: 1.0316916704177856,grad_norm: 0.9999997958469147, iteration: 250561
loss: 0.9992188215255737,grad_norm: 0.9999990909986686, iteration: 250562
loss: 1.0244005918502808,grad_norm: 0.8697072500724126, iteration: 250563
loss: 0.9901889562606812,grad_norm: 0.8017439370912885, iteration: 250564
loss: 1.0005582571029663,grad_norm: 0.9999998270239625, iteration: 250565
loss: 1.0098176002502441,grad_norm: 0.7936079141484639, iteration: 250566
loss: 0.972084641456604,grad_norm: 0.9999990587941227, iteration: 250567
loss: 0.976087749004364,grad_norm: 0.8566564546910549, iteration: 250568
loss: 1.0133670568466187,grad_norm: 0.9999994036042508, iteration: 250569
loss: 1.0259627103805542,grad_norm: 0.9999989941016486, iteration: 250570
loss: 1.011892318725586,grad_norm: 0.8873989913880131, iteration: 250571
loss: 1.0280818939208984,grad_norm: 0.8462988996074132, iteration: 250572
loss: 0.9796690344810486,grad_norm: 0.9999992031276727, iteration: 250573
loss: 1.0124499797821045,grad_norm: 0.9999990411483297, iteration: 250574
loss: 0.9520819783210754,grad_norm: 0.9464560813615992, iteration: 250575
loss: 0.9821071028709412,grad_norm: 0.9963626176749614, iteration: 250576
loss: 0.9873536825180054,grad_norm: 0.8989481366457001, iteration: 250577
loss: 0.9999351501464844,grad_norm: 0.9999998844331034, iteration: 250578
loss: 1.0105788707733154,grad_norm: 0.7047477059583488, iteration: 250579
loss: 1.0067369937896729,grad_norm: 0.9999991297816246, iteration: 250580
loss: 0.9862931370735168,grad_norm: 0.9762993684247893, iteration: 250581
loss: 0.9864898324012756,grad_norm: 0.8947953229528262, iteration: 250582
loss: 0.9929501414299011,grad_norm: 0.7816776835459155, iteration: 250583
loss: 0.9688466191291809,grad_norm: 0.9999989765978109, iteration: 250584
loss: 1.010379433631897,grad_norm: 0.9328647225595761, iteration: 250585
loss: 0.9864187836647034,grad_norm: 0.9078338534554387, iteration: 250586
loss: 1.029013991355896,grad_norm: 0.8660018698796902, iteration: 250587
loss: 1.0134495496749878,grad_norm: 0.803798338288463, iteration: 250588
loss: 1.0055649280548096,grad_norm: 0.9904944230412414, iteration: 250589
loss: 1.0207464694976807,grad_norm: 0.9999991128688982, iteration: 250590
loss: 0.9872794151306152,grad_norm: 0.9999990609577072, iteration: 250591
loss: 0.9943777918815613,grad_norm: 0.7860663549981242, iteration: 250592
loss: 1.027077317237854,grad_norm: 0.8736829665968946, iteration: 250593
loss: 1.0510222911834717,grad_norm: 0.9999991520634861, iteration: 250594
loss: 0.98521888256073,grad_norm: 0.9999427247529462, iteration: 250595
loss: 1.0026025772094727,grad_norm: 0.9061983638572034, iteration: 250596
loss: 1.0618674755096436,grad_norm: 0.9999990755305618, iteration: 250597
loss: 1.0126055479049683,grad_norm: 0.9405999396243107, iteration: 250598
loss: 1.0378018617630005,grad_norm: 0.8997764909255588, iteration: 250599
loss: 1.0078258514404297,grad_norm: 0.8494493732903996, iteration: 250600
loss: 0.9888603091239929,grad_norm: 0.9999989545971429, iteration: 250601
loss: 1.045602798461914,grad_norm: 0.9999992653992177, iteration: 250602
loss: 0.9911121129989624,grad_norm: 0.9999991277158515, iteration: 250603
loss: 0.9966049790382385,grad_norm: 0.9184535362786186, iteration: 250604
loss: 1.009117603302002,grad_norm: 0.9999990459860139, iteration: 250605
loss: 1.0681817531585693,grad_norm: 0.9999996623327619, iteration: 250606
loss: 0.9922863841056824,grad_norm: 0.8835985842105248, iteration: 250607
loss: 1.0139580965042114,grad_norm: 0.8367363825670487, iteration: 250608
loss: 1.016811728477478,grad_norm: 0.7403761762365817, iteration: 250609
loss: 0.9800671339035034,grad_norm: 0.7596730892732989, iteration: 250610
loss: 1.017878532409668,grad_norm: 0.9055497967642208, iteration: 250611
loss: 1.0089751482009888,grad_norm: 0.9643455197690746, iteration: 250612
loss: 0.9862440824508667,grad_norm: 0.9885168831891714, iteration: 250613
loss: 1.2545334100723267,grad_norm: 0.9999992192733765, iteration: 250614
loss: 1.0224462747573853,grad_norm: 0.7579131474860963, iteration: 250615
loss: 1.0152250528335571,grad_norm: 0.9193212702744148, iteration: 250616
loss: 1.0011391639709473,grad_norm: 0.9863412051555533, iteration: 250617
loss: 1.0024852752685547,grad_norm: 0.9500361591178813, iteration: 250618
loss: 1.044427752494812,grad_norm: 0.8767606391895617, iteration: 250619
loss: 1.1001081466674805,grad_norm: 0.9999996298582584, iteration: 250620
loss: 0.9645960927009583,grad_norm: 0.9947934004018026, iteration: 250621
loss: 0.9739768505096436,grad_norm: 0.8478743836594035, iteration: 250622
loss: 0.9849171042442322,grad_norm: 0.9046791187240899, iteration: 250623
loss: 1.000077486038208,grad_norm: 0.8129950460763194, iteration: 250624
loss: 1.0023137331008911,grad_norm: 0.9097932712181419, iteration: 250625
loss: 0.9964327216148376,grad_norm: 0.8688271325840138, iteration: 250626
loss: 1.0057077407836914,grad_norm: 0.9617709270785959, iteration: 250627
loss: 1.0536376237869263,grad_norm: 0.9999993240503383, iteration: 250628
loss: 0.9860473871231079,grad_norm: 0.8755986397133331, iteration: 250629
loss: 1.0016413927078247,grad_norm: 0.9999990543188023, iteration: 250630
loss: 1.0123118162155151,grad_norm: 0.8342946870845916, iteration: 250631
loss: 1.0105128288269043,grad_norm: 0.9999991260944427, iteration: 250632
loss: 1.0020197629928589,grad_norm: 0.8180865366665424, iteration: 250633
loss: 1.0135936737060547,grad_norm: 0.8947411789189995, iteration: 250634
loss: 0.9791130423545837,grad_norm: 0.9059007602052723, iteration: 250635
loss: 0.9542481303215027,grad_norm: 0.7700093824862254, iteration: 250636
loss: 0.9804672598838806,grad_norm: 0.863478876582946, iteration: 250637
loss: 1.0030455589294434,grad_norm: 0.8950651514973861, iteration: 250638
loss: 1.0103869438171387,grad_norm: 0.8158777013336995, iteration: 250639
loss: 1.0190287828445435,grad_norm: 0.9999991247991086, iteration: 250640
loss: 1.0239418745040894,grad_norm: 0.8974896428948946, iteration: 250641
loss: 1.006603479385376,grad_norm: 0.8584205230820646, iteration: 250642
loss: 1.0657660961151123,grad_norm: 0.9999990930166379, iteration: 250643
loss: 1.0035518407821655,grad_norm: 0.9999990509905644, iteration: 250644
loss: 0.9760912656784058,grad_norm: 0.9054652885094526, iteration: 250645
loss: 1.0049965381622314,grad_norm: 0.9999998534149485, iteration: 250646
loss: 0.9780098795890808,grad_norm: 0.9999990440729665, iteration: 250647
loss: 1.0164101123809814,grad_norm: 0.9934080566315875, iteration: 250648
loss: 1.0481313467025757,grad_norm: 0.9999991698591909, iteration: 250649
loss: 1.0032093524932861,grad_norm: 0.9254040124823996, iteration: 250650
loss: 1.0532948970794678,grad_norm: 0.8577438673475273, iteration: 250651
loss: 1.0378124713897705,grad_norm: 0.8171675908099169, iteration: 250652
loss: 1.0095608234405518,grad_norm: 0.7818967509548521, iteration: 250653
loss: 0.9942615628242493,grad_norm: 0.743386643981932, iteration: 250654
loss: 1.0182002782821655,grad_norm: 0.9010726782209845, iteration: 250655
loss: 1.0940945148468018,grad_norm: 0.8633705834154004, iteration: 250656
loss: 1.0386873483657837,grad_norm: 0.86713732123274, iteration: 250657
loss: 0.9948108196258545,grad_norm: 0.8497944380798774, iteration: 250658
loss: 1.030521035194397,grad_norm: 0.9999993286345218, iteration: 250659
loss: 1.097838044166565,grad_norm: 0.9999996319710092, iteration: 250660
loss: 0.9749863147735596,grad_norm: 0.7986755135562198, iteration: 250661
loss: 0.9854336977005005,grad_norm: 0.8697438847847968, iteration: 250662
loss: 0.9659322500228882,grad_norm: 0.938137200149311, iteration: 250663
loss: 1.0082839727401733,grad_norm: 0.9179058054134461, iteration: 250664
loss: 1.0268396139144897,grad_norm: 0.8219061412198884, iteration: 250665
loss: 0.9986438751220703,grad_norm: 0.8822077658481806, iteration: 250666
loss: 1.0171767473220825,grad_norm: 0.8565193790275494, iteration: 250667
loss: 1.0402603149414062,grad_norm: 0.9891273276744305, iteration: 250668
loss: 1.0132514238357544,grad_norm: 0.9999990925337333, iteration: 250669
loss: 0.9900834560394287,grad_norm: 0.9139844325295322, iteration: 250670
loss: 0.9762541055679321,grad_norm: 0.9310208763902, iteration: 250671
loss: 0.9675227403640747,grad_norm: 0.8557584648411136, iteration: 250672
loss: 1.0353121757507324,grad_norm: 0.8414503235512734, iteration: 250673
loss: 1.0829015970230103,grad_norm: 1.00000000743905, iteration: 250674
loss: 1.0168436765670776,grad_norm: 0.8971496599120258, iteration: 250675
loss: 1.080454707145691,grad_norm: 0.9999992699456577, iteration: 250676
loss: 1.0033210515975952,grad_norm: 0.8742834620980157, iteration: 250677
loss: 0.9742089509963989,grad_norm: 0.9999991592711123, iteration: 250678
loss: 0.9770157933235168,grad_norm: 0.772110407913617, iteration: 250679
loss: 0.972619891166687,grad_norm: 0.8094708171111844, iteration: 250680
loss: 1.0125763416290283,grad_norm: 0.9023343845229562, iteration: 250681
loss: 0.9659886956214905,grad_norm: 0.9561081448117553, iteration: 250682
loss: 1.0271432399749756,grad_norm: 0.9999998971948973, iteration: 250683
loss: 0.9799814820289612,grad_norm: 0.7781759999587904, iteration: 250684
loss: 0.990073561668396,grad_norm: 0.8109889599362021, iteration: 250685
loss: 0.9877583384513855,grad_norm: 0.9443365711126699, iteration: 250686
loss: 0.9896267056465149,grad_norm: 0.8471553486696419, iteration: 250687
loss: 0.9921635389328003,grad_norm: 0.9749863170122121, iteration: 250688
loss: 0.9847573637962341,grad_norm: 0.7856085266519963, iteration: 250689
loss: 1.000649333000183,grad_norm: 0.8639003831711526, iteration: 250690
loss: 1.0124337673187256,grad_norm: 0.8825346832971004, iteration: 250691
loss: 0.9599502086639404,grad_norm: 0.8272428374786207, iteration: 250692
loss: 0.953449547290802,grad_norm: 0.8324226852347185, iteration: 250693
loss: 0.9903624057769775,grad_norm: 0.8912346806260608, iteration: 250694
loss: 1.0235258340835571,grad_norm: 0.9999992222405377, iteration: 250695
loss: 0.9710544347763062,grad_norm: 0.8580900597388664, iteration: 250696
loss: 1.0071840286254883,grad_norm: 0.8712511567018991, iteration: 250697
loss: 1.020669937133789,grad_norm: 0.9999990953115364, iteration: 250698
loss: 1.0055102109909058,grad_norm: 0.9999991000485562, iteration: 250699
loss: 1.0019387006759644,grad_norm: 0.9494672394980966, iteration: 250700
loss: 1.0249571800231934,grad_norm: 0.8471065667287219, iteration: 250701
loss: 0.9601820707321167,grad_norm: 0.9434952788830144, iteration: 250702
loss: 1.1410456895828247,grad_norm: 0.9999996920166445, iteration: 250703
loss: 1.0085275173187256,grad_norm: 0.9999989450460823, iteration: 250704
loss: 1.0234135389328003,grad_norm: 0.999999112585898, iteration: 250705
loss: 0.9498074650764465,grad_norm: 0.9125163800847911, iteration: 250706
loss: 1.0047004222869873,grad_norm: 0.920027715034178, iteration: 250707
loss: 0.967358410358429,grad_norm: 0.8106888679897524, iteration: 250708
loss: 0.9875101447105408,grad_norm: 0.8584331610288359, iteration: 250709
loss: 0.9953625798225403,grad_norm: 0.8923488636627112, iteration: 250710
loss: 0.9980777502059937,grad_norm: 0.999999685210843, iteration: 250711
loss: 1.0429601669311523,grad_norm: 0.8586105384071362, iteration: 250712
loss: 1.0273925065994263,grad_norm: 0.9442538312275593, iteration: 250713
loss: 0.9981725215911865,grad_norm: 0.9999990674088266, iteration: 250714
loss: 1.0495002269744873,grad_norm: 0.9999998562599713, iteration: 250715
loss: 1.0099821090698242,grad_norm: 0.8358261490712044, iteration: 250716
loss: 1.0422230958938599,grad_norm: 1.0000000297306468, iteration: 250717
loss: 1.0106263160705566,grad_norm: 0.7928848642669819, iteration: 250718
loss: 0.9690747261047363,grad_norm: 0.9796878156278235, iteration: 250719
loss: 0.9785416722297668,grad_norm: 0.9795394885012665, iteration: 250720
loss: 0.9799824357032776,grad_norm: 0.8806335225114916, iteration: 250721
loss: 1.0009950399398804,grad_norm: 0.9959050421193116, iteration: 250722
loss: 1.0291639566421509,grad_norm: 0.999999461262084, iteration: 250723
loss: 0.9984594583511353,grad_norm: 0.8721905794159908, iteration: 250724
loss: 0.9483295679092407,grad_norm: 0.9311176774846983, iteration: 250725
loss: 0.9954812526702881,grad_norm: 0.8861067046607982, iteration: 250726
loss: 1.1375668048858643,grad_norm: 0.9999994471359377, iteration: 250727
loss: 0.993286669254303,grad_norm: 0.9513693919408338, iteration: 250728
loss: 0.9762199521064758,grad_norm: 0.9329298968327361, iteration: 250729
loss: 0.9589698314666748,grad_norm: 0.9552287000129153, iteration: 250730
loss: 1.010125756263733,grad_norm: 0.9999990895249689, iteration: 250731
loss: 0.995713472366333,grad_norm: 0.8952939570535189, iteration: 250732
loss: 0.9833531379699707,grad_norm: 0.9999990741854183, iteration: 250733
loss: 0.9932800531387329,grad_norm: 0.9138880363174776, iteration: 250734
loss: 0.9901173114776611,grad_norm: 0.9999993985160255, iteration: 250735
loss: 1.008129358291626,grad_norm: 0.7744404491814803, iteration: 250736
loss: 1.0201740264892578,grad_norm: 0.8618675260967927, iteration: 250737
loss: 0.9699375629425049,grad_norm: 0.9999992695930154, iteration: 250738
loss: 0.9891824722290039,grad_norm: 0.7702559979090933, iteration: 250739
loss: 0.98597252368927,grad_norm: 0.9578192510392812, iteration: 250740
loss: 1.0139342546463013,grad_norm: 0.9912815271135643, iteration: 250741
loss: 0.966996431350708,grad_norm: 0.878508657542113, iteration: 250742
loss: 0.9898163080215454,grad_norm: 0.9999996961823262, iteration: 250743
loss: 1.0157618522644043,grad_norm: 0.8685589022024165, iteration: 250744
loss: 1.0082777738571167,grad_norm: 0.8625712346735411, iteration: 250745
loss: 0.9824039340019226,grad_norm: 0.8318084214129322, iteration: 250746
loss: 1.0349524021148682,grad_norm: 0.9999998177208225, iteration: 250747
loss: 0.9526391625404358,grad_norm: 0.8842779593492832, iteration: 250748
loss: 1.0198122262954712,grad_norm: 0.9999990140857614, iteration: 250749
loss: 0.9981226921081543,grad_norm: 0.932255321675583, iteration: 250750
loss: 1.0241791009902954,grad_norm: 0.8448759391777868, iteration: 250751
loss: 1.0203040838241577,grad_norm: 0.8784656971925789, iteration: 250752
loss: 1.017318844795227,grad_norm: 0.9999992051809543, iteration: 250753
loss: 0.9537607431411743,grad_norm: 0.9999993182606773, iteration: 250754
loss: 1.0032764673233032,grad_norm: 0.8462002677526779, iteration: 250755
loss: 1.0132747888565063,grad_norm: 0.7731171093593361, iteration: 250756
loss: 0.999887228012085,grad_norm: 0.8859618417752237, iteration: 250757
loss: 1.0148208141326904,grad_norm: 0.913315198129084, iteration: 250758
loss: 0.9870308637619019,grad_norm: 0.9999990958228874, iteration: 250759
loss: 0.9856424331665039,grad_norm: 0.9158073738800107, iteration: 250760
loss: 1.0021722316741943,grad_norm: 0.9999991448681119, iteration: 250761
loss: 0.9918422698974609,grad_norm: 0.9944103989316448, iteration: 250762
loss: 0.9842819571495056,grad_norm: 0.9999990804651032, iteration: 250763
loss: 1.0270733833312988,grad_norm: 0.9078521744100276, iteration: 250764
loss: 0.984748125076294,grad_norm: 0.97023433471256, iteration: 250765
loss: 1.0365192890167236,grad_norm: 0.8401668710842872, iteration: 250766
loss: 0.9664182662963867,grad_norm: 0.8166168284983623, iteration: 250767
loss: 0.9872837066650391,grad_norm: 0.875122767720253, iteration: 250768
loss: 0.9440172910690308,grad_norm: 0.9096946941584813, iteration: 250769
loss: 0.9725444316864014,grad_norm: 0.8656877282752778, iteration: 250770
loss: 0.9932151436805725,grad_norm: 0.8624398233413072, iteration: 250771
loss: 0.983694314956665,grad_norm: 0.9111476889627833, iteration: 250772
loss: 0.9808794856071472,grad_norm: 0.9999990187462966, iteration: 250773
loss: 0.9639468193054199,grad_norm: 0.8749861966992022, iteration: 250774
loss: 0.9900322556495667,grad_norm: 0.7988600399102693, iteration: 250775
loss: 0.9940879344940186,grad_norm: 0.8853085896368368, iteration: 250776
loss: 1.0046674013137817,grad_norm: 0.9548402754877857, iteration: 250777
loss: 1.003758192062378,grad_norm: 0.9806905501071709, iteration: 250778
loss: 1.0255825519561768,grad_norm: 0.9998142869831899, iteration: 250779
loss: 0.9996235966682434,grad_norm: 0.9999994636294475, iteration: 250780
loss: 1.004853367805481,grad_norm: 0.873508391972848, iteration: 250781
loss: 0.9726126194000244,grad_norm: 0.9613860445630263, iteration: 250782
loss: 1.011419653892517,grad_norm: 0.8979802256319771, iteration: 250783
loss: 1.0203163623809814,grad_norm: 0.8411985460858246, iteration: 250784
loss: 0.9986211657524109,grad_norm: 0.8469305146088315, iteration: 250785
loss: 0.9873120784759521,grad_norm: 0.7767912670392649, iteration: 250786
loss: 1.0124038457870483,grad_norm: 0.8973055469317492, iteration: 250787
loss: 0.9985399842262268,grad_norm: 0.830361261887111, iteration: 250788
loss: 1.0470046997070312,grad_norm: 0.9999990618960019, iteration: 250789
loss: 0.9964775443077087,grad_norm: 0.8268245796480022, iteration: 250790
loss: 0.9670774936676025,grad_norm: 0.999999285489325, iteration: 250791
loss: 0.9577623009681702,grad_norm: 0.87485692835545, iteration: 250792
loss: 0.9766698479652405,grad_norm: 0.8670896197831386, iteration: 250793
loss: 0.9976024627685547,grad_norm: 0.8325296360052199, iteration: 250794
loss: 1.0457128286361694,grad_norm: 0.9999989680536545, iteration: 250795
loss: 0.9900678992271423,grad_norm: 0.9478295790525124, iteration: 250796
loss: 1.0891221761703491,grad_norm: 0.9999992164086242, iteration: 250797
loss: 1.1089617013931274,grad_norm: 0.8830068575872232, iteration: 250798
loss: 1.035046100616455,grad_norm: 0.9418193057687595, iteration: 250799
loss: 0.9838505387306213,grad_norm: 0.867810442917135, iteration: 250800
loss: 1.012489676475525,grad_norm: 0.8748990118352297, iteration: 250801
loss: 1.023976445198059,grad_norm: 0.9785539823729635, iteration: 250802
loss: 0.9992005228996277,grad_norm: 0.9079054300165669, iteration: 250803
loss: 1.0146790742874146,grad_norm: 0.8748711976386319, iteration: 250804
loss: 0.9967095255851746,grad_norm: 0.9707042406664831, iteration: 250805
loss: 0.9749446511268616,grad_norm: 0.9999995882399152, iteration: 250806
loss: 1.0039284229278564,grad_norm: 0.9999991097161488, iteration: 250807
loss: 1.0038129091262817,grad_norm: 0.931943912686869, iteration: 250808
loss: 1.014762043952942,grad_norm: 0.7014146514970468, iteration: 250809
loss: 0.9565285444259644,grad_norm: 0.9610622094818285, iteration: 250810
loss: 1.0343515872955322,grad_norm: 0.9999991891421689, iteration: 250811
loss: 0.9791480302810669,grad_norm: 0.9486257948798578, iteration: 250812
loss: 0.9887112975120544,grad_norm: 0.8963571545340987, iteration: 250813
loss: 0.969730019569397,grad_norm: 0.8978236494886409, iteration: 250814
loss: 0.9951705932617188,grad_norm: 0.808136382573175, iteration: 250815
loss: 0.9728081226348877,grad_norm: 0.9999991636257031, iteration: 250816
loss: 1.0392980575561523,grad_norm: 0.9999991812744051, iteration: 250817
loss: 1.0354629755020142,grad_norm: 0.8314125357128862, iteration: 250818
loss: 0.9854236245155334,grad_norm: 0.790660808668949, iteration: 250819
loss: 1.0354633331298828,grad_norm: 0.9999990953147961, iteration: 250820
loss: 0.9535355567932129,grad_norm: 0.8962839184316683, iteration: 250821
loss: 0.9901869297027588,grad_norm: 0.999998997074044, iteration: 250822
loss: 1.0055079460144043,grad_norm: 0.9037687925721893, iteration: 250823
loss: 1.0082534551620483,grad_norm: 0.9165736990219174, iteration: 250824
loss: 1.0133137702941895,grad_norm: 0.9999990525508327, iteration: 250825
loss: 1.0043725967407227,grad_norm: 0.7419704092869919, iteration: 250826
loss: 1.0466642379760742,grad_norm: 0.8491436416984061, iteration: 250827
loss: 1.0381667613983154,grad_norm: 0.8767493562577664, iteration: 250828
loss: 1.0265114307403564,grad_norm: 0.9999990362274999, iteration: 250829
loss: 0.9851635694503784,grad_norm: 0.9999990464729145, iteration: 250830
loss: 0.9632698893547058,grad_norm: 0.9693507421562612, iteration: 250831
loss: 1.017602801322937,grad_norm: 0.9415005618483661, iteration: 250832
loss: 1.0660258531570435,grad_norm: 0.9999993501367598, iteration: 250833
loss: 1.013412356376648,grad_norm: 0.9999994882212656, iteration: 250834
loss: 1.0147589445114136,grad_norm: 0.9999990829548524, iteration: 250835
loss: 0.9690217971801758,grad_norm: 0.9906866772250236, iteration: 250836
loss: 0.9981243014335632,grad_norm: 0.8117100038977162, iteration: 250837
loss: 1.0113030672073364,grad_norm: 0.9999992541438886, iteration: 250838
loss: 1.0001312494277954,grad_norm: 0.9999990589014912, iteration: 250839
loss: 0.9721673130989075,grad_norm: 0.7165735598161553, iteration: 250840
loss: 1.0003457069396973,grad_norm: 0.8564574939822752, iteration: 250841
loss: 0.9973330497741699,grad_norm: 0.9685700404837009, iteration: 250842
loss: 0.9705721139907837,grad_norm: 0.9802665415944822, iteration: 250843
loss: 0.9652528166770935,grad_norm: 0.8906299919198518, iteration: 250844
loss: 0.9784161448478699,grad_norm: 0.92707866700432, iteration: 250845
loss: 0.9880959987640381,grad_norm: 0.7827356684156108, iteration: 250846
loss: 1.0069202184677124,grad_norm: 0.9999991301551487, iteration: 250847
loss: 0.9993910193443298,grad_norm: 0.8114105040022214, iteration: 250848
loss: 1.0308619737625122,grad_norm: 0.9999999725250297, iteration: 250849
loss: 0.9844277501106262,grad_norm: 0.7942463190459534, iteration: 250850
loss: 1.0256261825561523,grad_norm: 0.9001702604157138, iteration: 250851
loss: 0.9628819823265076,grad_norm: 0.8302478817333815, iteration: 250852
loss: 1.0035080909729004,grad_norm: 0.9999990406163118, iteration: 250853
loss: 1.0230815410614014,grad_norm: 0.7772532046515493, iteration: 250854
loss: 0.9811642169952393,grad_norm: 0.9999993519662944, iteration: 250855
loss: 1.045098900794983,grad_norm: 0.9999991314323746, iteration: 250856
loss: 1.013997197151184,grad_norm: 0.9407631207446946, iteration: 250857
loss: 1.021241545677185,grad_norm: 0.9768031362834195, iteration: 250858
loss: 1.0010093450546265,grad_norm: 0.9130951916137832, iteration: 250859
loss: 1.056553840637207,grad_norm: 0.9999998591652826, iteration: 250860
loss: 1.025102138519287,grad_norm: 0.9740388061929782, iteration: 250861
loss: 1.004960536956787,grad_norm: 0.8793111763906548, iteration: 250862
loss: 0.9931655526161194,grad_norm: 0.8584302697722422, iteration: 250863
loss: 0.9705814719200134,grad_norm: 0.8127903037422838, iteration: 250864
loss: 0.9623061418533325,grad_norm: 0.8298010885110024, iteration: 250865
loss: 1.0467205047607422,grad_norm: 0.9570706723836702, iteration: 250866
loss: 1.0167958736419678,grad_norm: 0.9446715934147045, iteration: 250867
loss: 1.0199379920959473,grad_norm: 0.8085605374314976, iteration: 250868
loss: 0.99599289894104,grad_norm: 0.8002302638971028, iteration: 250869
loss: 1.0170446634292603,grad_norm: 0.9452454324649885, iteration: 250870
loss: 0.9929804801940918,grad_norm: 0.8159877120068669, iteration: 250871
loss: 0.9897761940956116,grad_norm: 0.8106438400930042, iteration: 250872
loss: 0.9975481033325195,grad_norm: 0.8316668743346448, iteration: 250873
loss: 0.9978957176208496,grad_norm: 0.9850364594242264, iteration: 250874
loss: 1.0306622982025146,grad_norm: 0.8503830860659531, iteration: 250875
loss: 1.0055651664733887,grad_norm: 0.7754033893974607, iteration: 250876
loss: 0.9834180474281311,grad_norm: 0.7904078831018919, iteration: 250877
loss: 0.985075056552887,grad_norm: 0.7935980043950908, iteration: 250878
loss: 0.9892746210098267,grad_norm: 0.9359822868749212, iteration: 250879
loss: 0.9676500558853149,grad_norm: 0.9175195482244496, iteration: 250880
loss: 1.0092655420303345,grad_norm: 0.8278826222096439, iteration: 250881
loss: 1.0278483629226685,grad_norm: 0.822012118789268, iteration: 250882
loss: 1.0280404090881348,grad_norm: 0.9999989376951899, iteration: 250883
loss: 1.0247136354446411,grad_norm: 0.9999997887579376, iteration: 250884
loss: 1.0007013082504272,grad_norm: 0.7783376898270141, iteration: 250885
loss: 1.0016382932662964,grad_norm: 0.8495735225185482, iteration: 250886
loss: 1.0012898445129395,grad_norm: 0.7882916370850649, iteration: 250887
loss: 1.0017590522766113,grad_norm: 0.9552988226151756, iteration: 250888
loss: 1.0299005508422852,grad_norm: 0.9704761337875822, iteration: 250889
loss: 1.0375155210494995,grad_norm: 0.999998976982877, iteration: 250890
loss: 1.0465092658996582,grad_norm: 0.9999991149798609, iteration: 250891
loss: 0.9544364809989929,grad_norm: 0.8333869785566775, iteration: 250892
loss: 1.011184811592102,grad_norm: 0.8604634982782271, iteration: 250893
loss: 1.0018209218978882,grad_norm: 0.8729973766821634, iteration: 250894
loss: 1.003755807876587,grad_norm: 0.9999992611381522, iteration: 250895
loss: 0.9709186553955078,grad_norm: 0.9999990478113056, iteration: 250896
loss: 1.0301789045333862,grad_norm: 0.835582920267606, iteration: 250897
loss: 0.9725745916366577,grad_norm: 0.9052233289700186, iteration: 250898
loss: 0.9945663809776306,grad_norm: 0.9999990465850044, iteration: 250899
loss: 0.9889087677001953,grad_norm: 0.9202926788165711, iteration: 250900
loss: 1.0055875778198242,grad_norm: 0.830433880848638, iteration: 250901
loss: 0.9989778995513916,grad_norm: 0.9703365657656672, iteration: 250902
loss: 1.0018612146377563,grad_norm: 0.9556697501128041, iteration: 250903
loss: 0.9943896532058716,grad_norm: 0.9999990539814807, iteration: 250904
loss: 1.032036542892456,grad_norm: 0.9999998376318403, iteration: 250905
loss: 1.0191192626953125,grad_norm: 0.7629700394176625, iteration: 250906
loss: 1.0206856727600098,grad_norm: 0.8007732665663116, iteration: 250907
loss: 0.986329972743988,grad_norm: 0.9486891841447571, iteration: 250908
loss: 1.0223312377929688,grad_norm: 0.9284178835173954, iteration: 250909
loss: 0.985925555229187,grad_norm: 0.8610044639203144, iteration: 250910
loss: 1.0166125297546387,grad_norm: 0.9701068974618614, iteration: 250911
loss: 0.9988124370574951,grad_norm: 0.9098278764042924, iteration: 250912
loss: 1.034582495689392,grad_norm: 0.8802082891734694, iteration: 250913
loss: 1.014733910560608,grad_norm: 0.8292343964579065, iteration: 250914
loss: 1.0289028882980347,grad_norm: 0.9999992392383344, iteration: 250915
loss: 1.03913152217865,grad_norm: 0.9999997455980693, iteration: 250916
loss: 1.0329631567001343,grad_norm: 0.9999995693800664, iteration: 250917
loss: 0.9705114960670471,grad_norm: 0.7416482105677519, iteration: 250918
loss: 0.9908386468887329,grad_norm: 0.749022999819291, iteration: 250919
loss: 1.0136818885803223,grad_norm: 0.9999991198594315, iteration: 250920
loss: 1.0079689025878906,grad_norm: 0.9090846394414606, iteration: 250921
loss: 0.9884116649627686,grad_norm: 0.9264786788028845, iteration: 250922
loss: 0.9955291152000427,grad_norm: 0.8543954967345297, iteration: 250923
loss: 0.9745157361030579,grad_norm: 0.9999989945384126, iteration: 250924
loss: 1.0064727067947388,grad_norm: 0.936911382281795, iteration: 250925
loss: 0.9623420238494873,grad_norm: 0.8681715360039864, iteration: 250926
loss: 0.9848225116729736,grad_norm: 0.8709738371259356, iteration: 250927
loss: 0.9551243782043457,grad_norm: 0.9999994688489768, iteration: 250928
loss: 0.9615537524223328,grad_norm: 0.9999991403553206, iteration: 250929
loss: 0.9930395483970642,grad_norm: 0.8376264374230532, iteration: 250930
loss: 0.9992315173149109,grad_norm: 0.9292975627586769, iteration: 250931
loss: 1.0287905931472778,grad_norm: 0.838143709548782, iteration: 250932
loss: 1.019067645072937,grad_norm: 0.9999993500671691, iteration: 250933
loss: 1.0250574350357056,grad_norm: 0.885715965954641, iteration: 250934
loss: 1.0186140537261963,grad_norm: 0.8774568984715335, iteration: 250935
loss: 1.0150635242462158,grad_norm: 0.9999989328908104, iteration: 250936
loss: 0.999075174331665,grad_norm: 0.9237545112914638, iteration: 250937
loss: 0.9878901839256287,grad_norm: 0.999999111876866, iteration: 250938
loss: 1.0069419145584106,grad_norm: 0.9251573887045793, iteration: 250939
loss: 1.0031630992889404,grad_norm: 0.8853652475948754, iteration: 250940
loss: 0.9775259494781494,grad_norm: 0.6843342509535142, iteration: 250941
loss: 0.98450767993927,grad_norm: 0.8804494640464775, iteration: 250942
loss: 1.0092217922210693,grad_norm: 0.8919841096455139, iteration: 250943
loss: 0.9512079358100891,grad_norm: 0.961192715519054, iteration: 250944
loss: 0.9797834157943726,grad_norm: 0.9237608179681546, iteration: 250945
loss: 1.0277175903320312,grad_norm: 0.9999990014370039, iteration: 250946
loss: 0.9966500997543335,grad_norm: 0.8938363010311474, iteration: 250947
loss: 0.9700700640678406,grad_norm: 0.8355272962742443, iteration: 250948
loss: 1.0088226795196533,grad_norm: 0.9756331356809472, iteration: 250949
loss: 1.0482827425003052,grad_norm: 0.9999992261821432, iteration: 250950
loss: 0.9837930202484131,grad_norm: 0.8856055468783319, iteration: 250951
loss: 1.004622220993042,grad_norm: 0.9999990866775998, iteration: 250952
loss: 1.023863673210144,grad_norm: 0.9999990717567646, iteration: 250953
loss: 1.0035747289657593,grad_norm: 0.9999990678937456, iteration: 250954
loss: 1.0006357431411743,grad_norm: 0.8675763466872304, iteration: 250955
loss: 0.9974197149276733,grad_norm: 0.9999995062112799, iteration: 250956
loss: 1.0338534116744995,grad_norm: 0.999999876957414, iteration: 250957
loss: 0.9907172322273254,grad_norm: 0.9409121809480246, iteration: 250958
loss: 1.0336246490478516,grad_norm: 0.9999994136313767, iteration: 250959
loss: 1.027357578277588,grad_norm: 0.9999991894447094, iteration: 250960
loss: 1.0005979537963867,grad_norm: 0.7660734035596268, iteration: 250961
loss: 1.0118416547775269,grad_norm: 0.9999992036839045, iteration: 250962
loss: 1.0001479387283325,grad_norm: 0.98561109202466, iteration: 250963
loss: 0.9960552453994751,grad_norm: 0.7350556647298359, iteration: 250964
loss: 1.0241495370864868,grad_norm: 0.938999105505484, iteration: 250965
loss: 0.9877923130989075,grad_norm: 0.8005633969045599, iteration: 250966
loss: 1.0291402339935303,grad_norm: 0.9898848652310729, iteration: 250967
loss: 1.0397619009017944,grad_norm: 0.9999990099474068, iteration: 250968
loss: 1.0323371887207031,grad_norm: 0.8645106525541693, iteration: 250969
loss: 0.9582539200782776,grad_norm: 0.9977027954449997, iteration: 250970
loss: 1.0228118896484375,grad_norm: 0.767879437343394, iteration: 250971
loss: 0.9912891983985901,grad_norm: 0.8456033698172172, iteration: 250972
loss: 0.9794681072235107,grad_norm: 0.7907463964846267, iteration: 250973
loss: 1.0061792135238647,grad_norm: 0.8227655423615469, iteration: 250974
loss: 0.9983241558074951,grad_norm: 0.941331205993316, iteration: 250975
loss: 0.9719427227973938,grad_norm: 0.9999991477141416, iteration: 250976
loss: 0.9996303915977478,grad_norm: 0.6874694486067902, iteration: 250977
loss: 1.0283783674240112,grad_norm: 0.9533804692227495, iteration: 250978
loss: 0.9723497033119202,grad_norm: 0.9152899985056849, iteration: 250979
loss: 1.0223206281661987,grad_norm: 0.827119363340441, iteration: 250980
loss: 1.032813549041748,grad_norm: 0.7479340505814547, iteration: 250981
loss: 0.9839115738868713,grad_norm: 0.9872238499914681, iteration: 250982
loss: 0.9732566475868225,grad_norm: 0.6950417374986948, iteration: 250983
loss: 0.9963622689247131,grad_norm: 0.8856377591789303, iteration: 250984
loss: 0.9848152995109558,grad_norm: 0.9999990145335896, iteration: 250985
loss: 1.0268027782440186,grad_norm: 0.9416061413905336, iteration: 250986
loss: 0.9665353298187256,grad_norm: 0.9999991654919973, iteration: 250987
loss: 1.0135259628295898,grad_norm: 0.8479546024408023, iteration: 250988
loss: 0.9983109831809998,grad_norm: 0.9288495402260991, iteration: 250989
loss: 1.0116764307022095,grad_norm: 0.9652917218971764, iteration: 250990
loss: 1.0021083354949951,grad_norm: 0.9999990046429074, iteration: 250991
loss: 1.0105030536651611,grad_norm: 0.8638515595095136, iteration: 250992
loss: 1.007249355316162,grad_norm: 0.9040188345541419, iteration: 250993
loss: 1.0112390518188477,grad_norm: 0.9178114230036044, iteration: 250994
loss: 1.031267762184143,grad_norm: 0.9275377213022232, iteration: 250995
loss: 1.0364667177200317,grad_norm: 0.9769363411651227, iteration: 250996
loss: 0.9709581136703491,grad_norm: 0.8979763076803849, iteration: 250997
loss: 1.0130599737167358,grad_norm: 0.9999990572701691, iteration: 250998
loss: 1.0105271339416504,grad_norm: 0.9667860715352112, iteration: 250999
loss: 0.9766142964363098,grad_norm: 0.9179239866566469, iteration: 251000
loss: 0.9683754444122314,grad_norm: 0.9072817009644127, iteration: 251001
loss: 0.9933920502662659,grad_norm: 0.9458895154018466, iteration: 251002
loss: 1.006856083869934,grad_norm: 0.7787284981030175, iteration: 251003
loss: 0.9959220886230469,grad_norm: 0.8740795832183406, iteration: 251004
loss: 1.0361570119857788,grad_norm: 0.8300239003420665, iteration: 251005
loss: 0.9700913429260254,grad_norm: 0.9999991222221685, iteration: 251006
loss: 0.978712797164917,grad_norm: 0.8392536668918968, iteration: 251007
loss: 0.9836615920066833,grad_norm: 0.8733934095847494, iteration: 251008
loss: 0.9643309116363525,grad_norm: 0.8310781292469276, iteration: 251009
loss: 1.0209003686904907,grad_norm: 0.785003849591944, iteration: 251010
loss: 1.0286058187484741,grad_norm: 0.9447233270100718, iteration: 251011
loss: 0.9820612072944641,grad_norm: 0.9999991237423561, iteration: 251012
loss: 1.0221917629241943,grad_norm: 0.8818326913090457, iteration: 251013
loss: 0.9909205436706543,grad_norm: 0.9075906075437821, iteration: 251014
loss: 1.0049211978912354,grad_norm: 0.8214167584746057, iteration: 251015
loss: 1.0168628692626953,grad_norm: 0.9849066517936221, iteration: 251016
loss: 1.0349431037902832,grad_norm: 0.9925350037744312, iteration: 251017
loss: 0.9947870373725891,grad_norm: 0.8651515734230137, iteration: 251018
loss: 1.0333737134933472,grad_norm: 0.99999942379132, iteration: 251019
loss: 1.0609095096588135,grad_norm: 0.9294462128416023, iteration: 251020
loss: 1.0067282915115356,grad_norm: 0.9999990824828876, iteration: 251021
loss: 1.000073790550232,grad_norm: 0.9510890844594628, iteration: 251022
loss: 1.0260347127914429,grad_norm: 0.9999991836987399, iteration: 251023
loss: 1.0189064741134644,grad_norm: 0.9625821527320698, iteration: 251024
loss: 0.9790863394737244,grad_norm: 0.8346337712386415, iteration: 251025
loss: 0.9926102757453918,grad_norm: 0.9726039918884449, iteration: 251026
loss: 1.0297982692718506,grad_norm: 0.9999991383151549, iteration: 251027
loss: 1.0214436054229736,grad_norm: 0.9999990090603328, iteration: 251028
loss: 0.9725569486618042,grad_norm: 0.9257451454151462, iteration: 251029
loss: 0.9964778423309326,grad_norm: 0.928535288518948, iteration: 251030
loss: 0.9712914824485779,grad_norm: 0.743592284522076, iteration: 251031
loss: 1.0395524501800537,grad_norm: 0.7084192844581946, iteration: 251032
loss: 0.9972891211509705,grad_norm: 0.9627048572596781, iteration: 251033
loss: 1.0146918296813965,grad_norm: 0.8029153639333029, iteration: 251034
loss: 0.9581518173217773,grad_norm: 0.916400650739434, iteration: 251035
loss: 1.0078445672988892,grad_norm: 0.9342908486580848, iteration: 251036
loss: 1.013185739517212,grad_norm: 0.9228901866746262, iteration: 251037
loss: 1.0250498056411743,grad_norm: 0.9999998639539227, iteration: 251038
loss: 0.9995061159133911,grad_norm: 0.8453584139151437, iteration: 251039
loss: 1.0183080434799194,grad_norm: 0.8490291217027786, iteration: 251040
loss: 1.0132282972335815,grad_norm: 0.902300669119411, iteration: 251041
loss: 1.0143685340881348,grad_norm: 0.8646516981266532, iteration: 251042
loss: 1.0180264711380005,grad_norm: 0.9493498403724142, iteration: 251043
loss: 0.9834162592887878,grad_norm: 0.8962517363367423, iteration: 251044
loss: 1.0043339729309082,grad_norm: 0.8877759121552824, iteration: 251045
loss: 1.029335856437683,grad_norm: 0.8686670005878231, iteration: 251046
loss: 0.9793713688850403,grad_norm: 0.7811366562210256, iteration: 251047
loss: 1.0442768335342407,grad_norm: 0.9534734359272153, iteration: 251048
loss: 0.9753884673118591,grad_norm: 0.9999991361801406, iteration: 251049
loss: 0.9697833061218262,grad_norm: 0.9002477692237011, iteration: 251050
loss: 0.9714781641960144,grad_norm: 0.9588665659025041, iteration: 251051
loss: 1.0059951543807983,grad_norm: 0.9169417316362901, iteration: 251052
loss: 1.010291337966919,grad_norm: 0.8768463710075648, iteration: 251053
loss: 1.0042619705200195,grad_norm: 0.8591396295050837, iteration: 251054
loss: 1.0149734020233154,grad_norm: 0.925214493984672, iteration: 251055
loss: 1.5126535892486572,grad_norm: 0.9999996427812035, iteration: 251056
loss: 0.976671040058136,grad_norm: 0.7969290660614717, iteration: 251057
loss: 0.9953279495239258,grad_norm: 0.8319443060507994, iteration: 251058
loss: 1.0260623693466187,grad_norm: 0.9999997325921324, iteration: 251059
loss: 1.0205610990524292,grad_norm: 0.9907212841190851, iteration: 251060
loss: 0.9782409071922302,grad_norm: 0.9311153144065033, iteration: 251061
loss: 0.9860922694206238,grad_norm: 0.9321775936312452, iteration: 251062
loss: 0.9965967535972595,grad_norm: 0.8475932532647209, iteration: 251063
loss: 0.983858048915863,grad_norm: 0.8921303114972888, iteration: 251064
loss: 1.0109145641326904,grad_norm: 0.9084766558263961, iteration: 251065
loss: 1.0141024589538574,grad_norm: 0.9999990365831208, iteration: 251066
loss: 0.9929624199867249,grad_norm: 0.8253205590435071, iteration: 251067
loss: 0.9601778388023376,grad_norm: 0.9999991346278622, iteration: 251068
loss: 0.9937398433685303,grad_norm: 0.9999992485156668, iteration: 251069
loss: 1.0419807434082031,grad_norm: 0.9999990313275657, iteration: 251070
loss: 1.0218031406402588,grad_norm: 0.9739416711660617, iteration: 251071
loss: 0.9803948998451233,grad_norm: 0.8770026017182319, iteration: 251072
loss: 0.9971362948417664,grad_norm: 0.900760654772318, iteration: 251073
loss: 1.0187381505966187,grad_norm: 0.7967521830362075, iteration: 251074
loss: 0.9776396155357361,grad_norm: 0.8980617508292196, iteration: 251075
loss: 0.9872268438339233,grad_norm: 0.913293446547752, iteration: 251076
loss: 1.003452181816101,grad_norm: 0.931395889573352, iteration: 251077
loss: 0.9839783906936646,grad_norm: 0.9350663794287147, iteration: 251078
loss: 0.9767056703567505,grad_norm: 0.9999990215722812, iteration: 251079
loss: 0.9879592061042786,grad_norm: 0.8579555460832418, iteration: 251080
loss: 0.9970678091049194,grad_norm: 0.9999990696539238, iteration: 251081
loss: 1.001070499420166,grad_norm: 0.8807475987658355, iteration: 251082
loss: 1.0117741823196411,grad_norm: 0.9999993915987817, iteration: 251083
loss: 0.9893958568572998,grad_norm: 0.8726184650267064, iteration: 251084
loss: 1.0339610576629639,grad_norm: 0.898244735954609, iteration: 251085
loss: 0.9803510904312134,grad_norm: 0.9976035418755222, iteration: 251086
loss: 1.0040415525436401,grad_norm: 0.7042275662136969, iteration: 251087
loss: 0.9888015985488892,grad_norm: 0.9441066988885904, iteration: 251088
loss: 1.01736319065094,grad_norm: 0.9313830040852678, iteration: 251089
loss: 0.9922449588775635,grad_norm: 0.9229532950523397, iteration: 251090
loss: 1.0253186225891113,grad_norm: 0.9619272184719566, iteration: 251091
loss: 0.9919950366020203,grad_norm: 0.9999991167775412, iteration: 251092
loss: 0.9948726296424866,grad_norm: 0.7962611822757194, iteration: 251093
loss: 0.988639771938324,grad_norm: 0.9999998966040624, iteration: 251094
loss: 0.9729289412498474,grad_norm: 0.7355734144741438, iteration: 251095
loss: 0.9953357577323914,grad_norm: 0.8036809748470741, iteration: 251096
loss: 0.9857357144355774,grad_norm: 0.8912692763294422, iteration: 251097
loss: 0.9901326894760132,grad_norm: 0.9999993237168782, iteration: 251098
loss: 1.0069061517715454,grad_norm: 0.8051111345564969, iteration: 251099
loss: 1.0213929414749146,grad_norm: 0.9999990360319534, iteration: 251100
loss: 1.01908278465271,grad_norm: 0.7820655091503027, iteration: 251101
loss: 1.0083829164505005,grad_norm: 0.928251799231131, iteration: 251102
loss: 0.960321307182312,grad_norm: 0.999999108890244, iteration: 251103
loss: 0.9592465758323669,grad_norm: 0.9254761495324463, iteration: 251104
loss: 0.9915759563446045,grad_norm: 0.8004543872757923, iteration: 251105
loss: 0.9983863234519958,grad_norm: 0.752186864099073, iteration: 251106
loss: 0.9810977578163147,grad_norm: 0.6827897780619591, iteration: 251107
loss: 1.0080171823501587,grad_norm: 0.8697947091676372, iteration: 251108
loss: 0.9986903667449951,grad_norm: 0.9999992194863642, iteration: 251109
loss: 0.9878674149513245,grad_norm: 0.9814902816660283, iteration: 251110
loss: 0.9774778485298157,grad_norm: 0.9999992301606282, iteration: 251111
loss: 0.9879979491233826,grad_norm: 0.9999991763263604, iteration: 251112
loss: 0.9859322905540466,grad_norm: 0.9626841618494631, iteration: 251113
loss: 1.0176819562911987,grad_norm: 0.8700362741809006, iteration: 251114
loss: 1.0127143859863281,grad_norm: 0.9999991426278357, iteration: 251115
loss: 0.9982272386550903,grad_norm: 0.9847361882160042, iteration: 251116
loss: 0.9983339905738831,grad_norm: 0.9672162354844355, iteration: 251117
loss: 0.9993232488632202,grad_norm: 0.9999996381585976, iteration: 251118
loss: 0.9787687063217163,grad_norm: 0.9999991501426938, iteration: 251119
loss: 1.0065045356750488,grad_norm: 0.7032631219540416, iteration: 251120
loss: 0.9854492545127869,grad_norm: 0.8333040052149766, iteration: 251121
loss: 0.9884224534034729,grad_norm: 0.8710239185633053, iteration: 251122
loss: 1.003637671470642,grad_norm: 0.9637085303649057, iteration: 251123
loss: 0.9952328205108643,grad_norm: 0.8287149262935531, iteration: 251124
loss: 1.0418685674667358,grad_norm: 0.7452410750746494, iteration: 251125
loss: 1.0074176788330078,grad_norm: 0.800025351656221, iteration: 251126
loss: 0.9914084076881409,grad_norm: 0.9999990994950703, iteration: 251127
loss: 0.9888653755187988,grad_norm: 0.7983672932711366, iteration: 251128
loss: 0.9855507016181946,grad_norm: 0.9999990613984132, iteration: 251129
loss: 0.973038911819458,grad_norm: 0.9146011520451279, iteration: 251130
loss: 1.004050374031067,grad_norm: 0.8860309799092614, iteration: 251131
loss: 1.0391520261764526,grad_norm: 0.999999143918344, iteration: 251132
loss: 0.9939061403274536,grad_norm: 0.9999990624142987, iteration: 251133
loss: 1.010161280632019,grad_norm: 0.9833784625914833, iteration: 251134
loss: 0.9904654622077942,grad_norm: 0.9570935107752798, iteration: 251135
loss: 0.9860075116157532,grad_norm: 0.8763762668817385, iteration: 251136
loss: 1.0091108083724976,grad_norm: 0.9259351947728571, iteration: 251137
loss: 0.9804033637046814,grad_norm: 0.9098220759099239, iteration: 251138
loss: 1.0055052042007446,grad_norm: 0.8741350626310745, iteration: 251139
loss: 0.9767171740531921,grad_norm: 0.9807516514364161, iteration: 251140
loss: 1.0136401653289795,grad_norm: 0.8924539526231914, iteration: 251141
loss: 0.935063898563385,grad_norm: 0.799140371384105, iteration: 251142
loss: 0.9950181245803833,grad_norm: 0.8639459172644599, iteration: 251143
loss: 1.0082581043243408,grad_norm: 0.9138876380749544, iteration: 251144
loss: 0.9884647727012634,grad_norm: 0.9999991010968644, iteration: 251145
loss: 0.9834860563278198,grad_norm: 0.9625512712290337, iteration: 251146
loss: 1.0004212856292725,grad_norm: 0.9999991619570109, iteration: 251147
loss: 0.9821196794509888,grad_norm: 0.9999991049974147, iteration: 251148
loss: 1.025084376335144,grad_norm: 0.8102837804446087, iteration: 251149
loss: 0.9945542812347412,grad_norm: 0.8978418712300656, iteration: 251150
loss: 0.9766198396682739,grad_norm: 0.8373580696877351, iteration: 251151
loss: 0.9921770691871643,grad_norm: 0.8212678135736495, iteration: 251152
loss: 1.0019735097885132,grad_norm: 0.7978071220555094, iteration: 251153
loss: 1.0448822975158691,grad_norm: 0.8019504761176292, iteration: 251154
loss: 0.9873052835464478,grad_norm: 0.8395388985460578, iteration: 251155
loss: 0.971913754940033,grad_norm: 0.9779763709762627, iteration: 251156
loss: 1.0123027563095093,grad_norm: 0.7565799533493759, iteration: 251157
loss: 1.0100890398025513,grad_norm: 0.8985895845747338, iteration: 251158
loss: 1.0228856801986694,grad_norm: 0.999999876793789, iteration: 251159
loss: 0.9730145931243896,grad_norm: 0.8757814607526152, iteration: 251160
loss: 0.9854796528816223,grad_norm: 0.7147167465054365, iteration: 251161
loss: 0.9684097766876221,grad_norm: 0.8472843749880166, iteration: 251162
loss: 1.009645700454712,grad_norm: 0.8861211320382198, iteration: 251163
loss: 1.0026323795318604,grad_norm: 0.8222228214594365, iteration: 251164
loss: 0.987680971622467,grad_norm: 0.9632996984472381, iteration: 251165
loss: 0.9849874377250671,grad_norm: 0.8962883322019198, iteration: 251166
loss: 0.9745523929595947,grad_norm: 0.9646475598691157, iteration: 251167
loss: 0.9573574662208557,grad_norm: 0.8141732429220749, iteration: 251168
loss: 0.997229278087616,grad_norm: 0.8495103898490746, iteration: 251169
loss: 1.0015066862106323,grad_norm: 0.9009498385330629, iteration: 251170
loss: 0.9859961271286011,grad_norm: 0.8671886250159744, iteration: 251171
loss: 1.000122308731079,grad_norm: 0.817218136137818, iteration: 251172
loss: 0.9914533495903015,grad_norm: 0.8362865727797657, iteration: 251173
loss: 1.0286810398101807,grad_norm: 0.8853115541030669, iteration: 251174
loss: 0.988603949546814,grad_norm: 0.9234077052141203, iteration: 251175
loss: 0.9684662222862244,grad_norm: 0.9999991060190806, iteration: 251176
loss: 1.0155563354492188,grad_norm: 0.9999993498053474, iteration: 251177
loss: 1.0163761377334595,grad_norm: 0.823025928235965, iteration: 251178
loss: 0.9946271777153015,grad_norm: 0.9396702836342609, iteration: 251179
loss: 1.0136045217514038,grad_norm: 0.9152400808034193, iteration: 251180
loss: 1.0123780965805054,grad_norm: 0.9838280744444512, iteration: 251181
loss: 0.990143358707428,grad_norm: 0.8774796638806682, iteration: 251182
loss: 1.0119282007217407,grad_norm: 0.9423581427164688, iteration: 251183
loss: 0.982604444026947,grad_norm: 0.9758879484832068, iteration: 251184
loss: 1.0405884981155396,grad_norm: 0.9999991346580093, iteration: 251185
loss: 0.9920224547386169,grad_norm: 0.9480853950190317, iteration: 251186
loss: 0.9772346615791321,grad_norm: 0.9999991021629393, iteration: 251187
loss: 0.9884147644042969,grad_norm: 0.9999990170611018, iteration: 251188
loss: 1.0276761054992676,grad_norm: 0.8509396671301069, iteration: 251189
loss: 1.0045676231384277,grad_norm: 0.9183902736999169, iteration: 251190
loss: 1.0208700895309448,grad_norm: 0.6078796713993115, iteration: 251191
loss: 0.9897793531417847,grad_norm: 0.8962083020043667, iteration: 251192
loss: 1.008643388748169,grad_norm: 0.97819182925202, iteration: 251193
loss: 0.9528033137321472,grad_norm: 0.7865760479898467, iteration: 251194
loss: 1.0224686861038208,grad_norm: 0.9999994207436925, iteration: 251195
loss: 1.0198134183883667,grad_norm: 0.9999995085725012, iteration: 251196
loss: 0.999891459941864,grad_norm: 0.8737883720102622, iteration: 251197
loss: 1.0077166557312012,grad_norm: 0.9295198433834154, iteration: 251198
loss: 0.9657063484191895,grad_norm: 0.922637751498386, iteration: 251199
loss: 0.9600374102592468,grad_norm: 0.9999992025714302, iteration: 251200
loss: 1.0385339260101318,grad_norm: 0.9313257546115216, iteration: 251201
loss: 0.9973065257072449,grad_norm: 0.9206317807879648, iteration: 251202
loss: 0.9946970343589783,grad_norm: 0.8437435352483362, iteration: 251203
loss: 0.96272873878479,grad_norm: 0.9052113444452711, iteration: 251204
loss: 0.9945568442344666,grad_norm: 0.8437883056581297, iteration: 251205
loss: 0.9905511140823364,grad_norm: 0.7171734184372991, iteration: 251206
loss: 0.9666725993156433,grad_norm: 0.8984591989435544, iteration: 251207
loss: 1.0839579105377197,grad_norm: 0.9455906725039712, iteration: 251208
loss: 1.0182818174362183,grad_norm: 0.9803282094975029, iteration: 251209
loss: 0.9662933945655823,grad_norm: 0.9999991581446486, iteration: 251210
loss: 1.0052872896194458,grad_norm: 0.7112125027250557, iteration: 251211
loss: 1.0057204961776733,grad_norm: 0.8756408301594, iteration: 251212
loss: 1.0164551734924316,grad_norm: 0.9999996856303288, iteration: 251213
loss: 0.9793264865875244,grad_norm: 0.9999990394718781, iteration: 251214
loss: 0.9751763939857483,grad_norm: 0.768820810078468, iteration: 251215
loss: 1.0188179016113281,grad_norm: 0.9999991149734633, iteration: 251216
loss: 1.019731879234314,grad_norm: 0.853950248105872, iteration: 251217
loss: 1.0111240148544312,grad_norm: 0.8792440474317634, iteration: 251218
loss: 0.9717946648597717,grad_norm: 0.9442317876035541, iteration: 251219
loss: 0.9823769927024841,grad_norm: 0.8808927665189011, iteration: 251220
loss: 1.0234501361846924,grad_norm: 0.9999993068211125, iteration: 251221
loss: 0.9722790718078613,grad_norm: 0.9999990872172162, iteration: 251222
loss: 1.0093801021575928,grad_norm: 0.8392533515787252, iteration: 251223
loss: 0.9932971596717834,grad_norm: 0.8272125824855667, iteration: 251224
loss: 1.0203306674957275,grad_norm: 0.9999991798792724, iteration: 251225
loss: 1.0091944932937622,grad_norm: 0.9999989331535841, iteration: 251226
loss: 0.9782083034515381,grad_norm: 0.9607357227942618, iteration: 251227
loss: 1.0341465473175049,grad_norm: 0.8271745348502222, iteration: 251228
loss: 0.996701180934906,grad_norm: 0.9478199213359614, iteration: 251229
loss: 1.0040524005889893,grad_norm: 0.9999990613044577, iteration: 251230
loss: 0.9950003027915955,grad_norm: 0.7552431585608121, iteration: 251231
loss: 1.0326701402664185,grad_norm: 0.9999998610993506, iteration: 251232
loss: 1.022988200187683,grad_norm: 0.9195639026577466, iteration: 251233
loss: 1.0103187561035156,grad_norm: 0.8707980415281915, iteration: 251234
loss: 0.9878285527229309,grad_norm: 0.9999991185923734, iteration: 251235
loss: 1.0273065567016602,grad_norm: 0.7711997141426419, iteration: 251236
loss: 0.9720237851142883,grad_norm: 0.999999006433384, iteration: 251237
loss: 0.9482574462890625,grad_norm: 0.9110003911744807, iteration: 251238
loss: 1.037207007408142,grad_norm: 0.9999991428973021, iteration: 251239
loss: 0.9970932006835938,grad_norm: 0.9999989979687943, iteration: 251240
loss: 0.9786615371704102,grad_norm: 0.7389159233844895, iteration: 251241
loss: 0.9954203367233276,grad_norm: 0.9999997419791019, iteration: 251242
loss: 1.0477502346038818,grad_norm: 0.9999991623946124, iteration: 251243
loss: 1.0790560245513916,grad_norm: 0.9999991806312531, iteration: 251244
loss: 0.9790820479393005,grad_norm: 0.9999994780502464, iteration: 251245
loss: 1.0360708236694336,grad_norm: 0.9999991003256091, iteration: 251246
loss: 0.9639537930488586,grad_norm: 0.815919438606568, iteration: 251247
loss: 1.0196259021759033,grad_norm: 0.8168903528025886, iteration: 251248
loss: 1.0069422721862793,grad_norm: 0.9351717648431185, iteration: 251249
loss: 1.0298372507095337,grad_norm: 0.8735917770746467, iteration: 251250
loss: 0.9907435178756714,grad_norm: 0.9162920279354325, iteration: 251251
loss: 1.0159085988998413,grad_norm: 0.999999016777877, iteration: 251252
loss: 0.9702308773994446,grad_norm: 0.8465815254134132, iteration: 251253
loss: 1.0194809436798096,grad_norm: 0.7956682418364862, iteration: 251254
loss: 0.9914969205856323,grad_norm: 0.8182333138476963, iteration: 251255
loss: 0.9532442092895508,grad_norm: 0.746871762529078, iteration: 251256
loss: 1.0427297353744507,grad_norm: 0.9999998711565629, iteration: 251257
loss: 0.9815686345100403,grad_norm: 0.8070655891339166, iteration: 251258
loss: 0.9896330833435059,grad_norm: 0.8101846904972994, iteration: 251259
loss: 0.9660615921020508,grad_norm: 0.9109934326898184, iteration: 251260
loss: 1.019066333770752,grad_norm: 0.9999991589808799, iteration: 251261
loss: 0.9850839376449585,grad_norm: 0.9201745092809915, iteration: 251262
loss: 1.0037221908569336,grad_norm: 0.807688527516829, iteration: 251263
loss: 0.9935651421546936,grad_norm: 0.9041730519867477, iteration: 251264
loss: 0.9935392141342163,grad_norm: 0.9950363899116169, iteration: 251265
loss: 0.9643581509590149,grad_norm: 0.9585524004189415, iteration: 251266
loss: 1.0012940168380737,grad_norm: 0.7807944652451405, iteration: 251267
loss: 1.0574406385421753,grad_norm: 0.9467799652834442, iteration: 251268
loss: 0.9850542545318604,grad_norm: 0.8507010946444891, iteration: 251269
loss: 1.013411521911621,grad_norm: 0.9398922713965079, iteration: 251270
loss: 0.9777808785438538,grad_norm: 0.9999992293527314, iteration: 251271
loss: 0.9757342338562012,grad_norm: 0.9254353431881973, iteration: 251272
loss: 1.0325489044189453,grad_norm: 0.9999992153789846, iteration: 251273
loss: 1.0177141427993774,grad_norm: 0.9999992671598624, iteration: 251274
loss: 1.0038925409317017,grad_norm: 0.7957687759325163, iteration: 251275
loss: 1.016533613204956,grad_norm: 0.8285861820222553, iteration: 251276
loss: 1.037859320640564,grad_norm: 0.9246740914281943, iteration: 251277
loss: 1.0098084211349487,grad_norm: 0.9999989874533551, iteration: 251278
loss: 1.0758262872695923,grad_norm: 0.9999992787143627, iteration: 251279
loss: 0.990716814994812,grad_norm: 0.8351683862582536, iteration: 251280
loss: 1.006164789199829,grad_norm: 0.8622054991024276, iteration: 251281
loss: 0.9552715420722961,grad_norm: 0.7680780405704265, iteration: 251282
loss: 1.014054298400879,grad_norm: 0.9099455587034035, iteration: 251283
loss: 0.9798611998558044,grad_norm: 0.7930062901164631, iteration: 251284
loss: 1.0269460678100586,grad_norm: 0.8556827055444333, iteration: 251285
loss: 0.9840375781059265,grad_norm: 0.9429626181884626, iteration: 251286
loss: 0.9948142766952515,grad_norm: 0.9246540177332109, iteration: 251287
loss: 1.021758794784546,grad_norm: 0.8605335646400948, iteration: 251288
loss: 1.024468183517456,grad_norm: 0.8777012992019307, iteration: 251289
loss: 1.0307008028030396,grad_norm: 0.9349028753247332, iteration: 251290
loss: 0.9968239068984985,grad_norm: 0.8557187450681955, iteration: 251291
loss: 0.9742807149887085,grad_norm: 0.9999990237209513, iteration: 251292
loss: 1.0104601383209229,grad_norm: 0.9999990391549918, iteration: 251293
loss: 0.9770823121070862,grad_norm: 0.798220252150923, iteration: 251294
loss: 0.9993302226066589,grad_norm: 0.9843998428435866, iteration: 251295
loss: 1.008154034614563,grad_norm: 0.8644010711860203, iteration: 251296
loss: 1.0180535316467285,grad_norm: 0.8702862358642366, iteration: 251297
loss: 0.972720742225647,grad_norm: 0.9163126436152786, iteration: 251298
loss: 0.9755277037620544,grad_norm: 0.7543657020325519, iteration: 251299
loss: 0.9698355197906494,grad_norm: 0.9289439803724916, iteration: 251300
loss: 1.0226402282714844,grad_norm: 0.8529607857637852, iteration: 251301
loss: 0.9754869937896729,grad_norm: 0.7361335633016703, iteration: 251302
loss: 0.9948588609695435,grad_norm: 0.9251816319685942, iteration: 251303
loss: 1.0350576639175415,grad_norm: 0.7823962575342533, iteration: 251304
loss: 0.9927501678466797,grad_norm: 0.8891478272465995, iteration: 251305
loss: 1.0068496465682983,grad_norm: 0.9842052398654486, iteration: 251306
loss: 1.0044500827789307,grad_norm: 0.9886160399264733, iteration: 251307
loss: 1.0180933475494385,grad_norm: 0.8762514823303882, iteration: 251308
loss: 0.9947495460510254,grad_norm: 0.9388035942108435, iteration: 251309
loss: 0.9959277510643005,grad_norm: 0.9111073751446962, iteration: 251310
loss: 0.9812943339347839,grad_norm: 0.8545321373974166, iteration: 251311
loss: 0.9891240000724792,grad_norm: 0.8251132188064172, iteration: 251312
loss: 1.0759538412094116,grad_norm: 0.9999991900204582, iteration: 251313
loss: 0.9762556552886963,grad_norm: 0.8687883260597479, iteration: 251314
loss: 1.0247273445129395,grad_norm: 0.9657157286206011, iteration: 251315
loss: 1.0503332614898682,grad_norm: 0.8533217700631438, iteration: 251316
loss: 0.9776822924613953,grad_norm: 0.8492914989015814, iteration: 251317
loss: 0.9962033033370972,grad_norm: 0.7979116018540894, iteration: 251318
loss: 1.0321186780929565,grad_norm: 0.9999992725485806, iteration: 251319
loss: 0.9863998293876648,grad_norm: 0.905029337321725, iteration: 251320
loss: 0.9822536706924438,grad_norm: 0.9510635363476923, iteration: 251321
loss: 1.0471173524856567,grad_norm: 0.8906997008092888, iteration: 251322
loss: 0.9881855845451355,grad_norm: 0.9999990482181003, iteration: 251323
loss: 1.016634225845337,grad_norm: 0.9234384609870032, iteration: 251324
loss: 0.9842473864555359,grad_norm: 0.974306251479109, iteration: 251325
loss: 1.008753776550293,grad_norm: 0.9999992527291691, iteration: 251326
loss: 1.0120526552200317,grad_norm: 0.8086469070453871, iteration: 251327
loss: 0.9848811030387878,grad_norm: 0.7943813770721986, iteration: 251328
loss: 0.9734153747558594,grad_norm: 0.7924353906877768, iteration: 251329
loss: 1.071050763130188,grad_norm: 0.8513270921974279, iteration: 251330
loss: 1.1036310195922852,grad_norm: 0.9999992784671358, iteration: 251331
loss: 0.9749106764793396,grad_norm: 0.892014202371951, iteration: 251332
loss: 1.0100401639938354,grad_norm: 0.9691262105197527, iteration: 251333
loss: 0.9922564029693604,grad_norm: 0.9232185504204736, iteration: 251334
loss: 1.0203967094421387,grad_norm: 0.8702489481774005, iteration: 251335
loss: 1.0124025344848633,grad_norm: 0.999999072232173, iteration: 251336
loss: 0.9711049199104309,grad_norm: 0.7699418143696015, iteration: 251337
loss: 0.9877269864082336,grad_norm: 0.7678705497849451, iteration: 251338
loss: 1.0262036323547363,grad_norm: 0.9386120213545603, iteration: 251339
loss: 1.0489667654037476,grad_norm: 0.9828721857488101, iteration: 251340
loss: 0.9861174821853638,grad_norm: 0.8595682726047236, iteration: 251341
loss: 0.9926547408103943,grad_norm: 0.9041196383054343, iteration: 251342
loss: 1.0141396522521973,grad_norm: 0.8591349103145606, iteration: 251343
loss: 0.9954493641853333,grad_norm: 0.9999992451248796, iteration: 251344
loss: 1.0429165363311768,grad_norm: 0.999999135222098, iteration: 251345
loss: 0.989824652671814,grad_norm: 0.8383517595886231, iteration: 251346
loss: 0.9849963188171387,grad_norm: 0.7094729866655114, iteration: 251347
loss: 1.0505975484848022,grad_norm: 0.9999999113663476, iteration: 251348
loss: 0.9971106648445129,grad_norm: 0.9999991603926609, iteration: 251349
loss: 1.0072675943374634,grad_norm: 0.8306463806712338, iteration: 251350
loss: 0.9941686987876892,grad_norm: 0.9116191198975802, iteration: 251351
loss: 1.0150043964385986,grad_norm: 0.8117436094192378, iteration: 251352
loss: 1.0225706100463867,grad_norm: 0.7852551609125309, iteration: 251353
loss: 0.9900203943252563,grad_norm: 0.8679758385120501, iteration: 251354
loss: 1.014925479888916,grad_norm: 0.8915704853020227, iteration: 251355
loss: 1.0540674924850464,grad_norm: 0.9999992170933395, iteration: 251356
loss: 0.9708184599876404,grad_norm: 0.8477307997529497, iteration: 251357
loss: 0.9946468472480774,grad_norm: 0.8489326796084193, iteration: 251358
loss: 0.9541640281677246,grad_norm: 0.9389471895048832, iteration: 251359
loss: 1.015025019645691,grad_norm: 0.9672700268233515, iteration: 251360
loss: 0.9946919679641724,grad_norm: 0.927488908956349, iteration: 251361
loss: 0.9737659096717834,grad_norm: 0.9173389213277283, iteration: 251362
loss: 0.9912725687026978,grad_norm: 0.9891701493723981, iteration: 251363
loss: 0.9841444492340088,grad_norm: 0.7167145073794662, iteration: 251364
loss: 0.9949889779090881,grad_norm: 0.8407648868649228, iteration: 251365
loss: 1.0039832592010498,grad_norm: 0.8839952516139918, iteration: 251366
loss: 0.9643129110336304,grad_norm: 0.798843526695448, iteration: 251367
loss: 0.9807597994804382,grad_norm: 0.8241276064113715, iteration: 251368
loss: 0.9944101572036743,grad_norm: 0.9817857513050111, iteration: 251369
loss: 1.0140960216522217,grad_norm: 0.976160791448734, iteration: 251370
loss: 1.0136843919754028,grad_norm: 0.7887678762947334, iteration: 251371
loss: 1.0074141025543213,grad_norm: 0.9548123299382878, iteration: 251372
loss: 0.9951679110527039,grad_norm: 0.8040007391930545, iteration: 251373
loss: 1.022646427154541,grad_norm: 0.9999990578180427, iteration: 251374
loss: 1.0305665731430054,grad_norm: 0.9833378567705593, iteration: 251375
loss: 0.9908349514007568,grad_norm: 0.9803824930811561, iteration: 251376
loss: 0.9843987822532654,grad_norm: 0.9971594264217326, iteration: 251377
loss: 0.9767530560493469,grad_norm: 0.7830027799187924, iteration: 251378
loss: 1.0275238752365112,grad_norm: 0.9999992266376696, iteration: 251379
loss: 0.9739252924919128,grad_norm: 0.8660358430778091, iteration: 251380
loss: 0.9695865511894226,grad_norm: 0.9248835354004712, iteration: 251381
loss: 1.0147356986999512,grad_norm: 0.8069606042682975, iteration: 251382
loss: 0.9763551950454712,grad_norm: 0.8958714509125865, iteration: 251383
loss: 0.9997422695159912,grad_norm: 0.9099895743064897, iteration: 251384
loss: 1.0313726663589478,grad_norm: 0.9841296274399324, iteration: 251385
loss: 1.0075732469558716,grad_norm: 0.9184244623483734, iteration: 251386
loss: 0.9623435735702515,grad_norm: 0.792078860675031, iteration: 251387
loss: 0.9546365737915039,grad_norm: 0.8347440784356506, iteration: 251388
loss: 1.0198181867599487,grad_norm: 0.999999013100723, iteration: 251389
loss: 1.0117301940917969,grad_norm: 0.8864547429366199, iteration: 251390
loss: 1.0203121900558472,grad_norm: 0.8480990880218428, iteration: 251391
loss: 0.9959427714347839,grad_norm: 0.8838284975657917, iteration: 251392
loss: 0.9871134161949158,grad_norm: 0.8075601632843855, iteration: 251393
loss: 0.9739093780517578,grad_norm: 0.8987911736632844, iteration: 251394
loss: 1.0258289575576782,grad_norm: 0.8583038150085028, iteration: 251395
loss: 1.0056772232055664,grad_norm: 0.8969435670065354, iteration: 251396
loss: 1.0070503950119019,grad_norm: 0.8041970913509471, iteration: 251397
loss: 1.0221896171569824,grad_norm: 0.9817640756605668, iteration: 251398
loss: 0.9814307689666748,grad_norm: 0.8170487445403862, iteration: 251399
loss: 1.0027581453323364,grad_norm: 0.934636043643599, iteration: 251400
loss: 1.0291650295257568,grad_norm: 0.926808188513279, iteration: 251401
loss: 0.9742345213890076,grad_norm: 0.9999990743979786, iteration: 251402
loss: 0.9572152495384216,grad_norm: 0.8957870426650955, iteration: 251403
loss: 0.9979374408721924,grad_norm: 0.820142783557857, iteration: 251404
loss: 1.0142806768417358,grad_norm: 0.7301378024656097, iteration: 251405
loss: 0.9579175710678101,grad_norm: 0.8507023212801781, iteration: 251406
loss: 0.9662349224090576,grad_norm: 0.8428474593501089, iteration: 251407
loss: 0.9837753772735596,grad_norm: 0.7937710783265673, iteration: 251408
loss: 1.003128170967102,grad_norm: 0.9999991087558482, iteration: 251409
loss: 0.9907429814338684,grad_norm: 0.9432389001599283, iteration: 251410
loss: 0.9877099394798279,grad_norm: 0.7641518996239186, iteration: 251411
loss: 1.0009080171585083,grad_norm: 0.8688335955186155, iteration: 251412
loss: 0.9766310453414917,grad_norm: 0.8806898676368187, iteration: 251413
loss: 0.9912088513374329,grad_norm: 0.843551596613042, iteration: 251414
loss: 0.9816383719444275,grad_norm: 0.9452103952992662, iteration: 251415
loss: 0.976901113986969,grad_norm: 0.8794496885693022, iteration: 251416
loss: 0.9656256437301636,grad_norm: 0.8563935430343563, iteration: 251417
loss: 1.0200834274291992,grad_norm: 0.8942920101740642, iteration: 251418
loss: 0.9886935353279114,grad_norm: 0.795515914598782, iteration: 251419
loss: 0.9939714074134827,grad_norm: 0.955178100155156, iteration: 251420
loss: 1.0471655130386353,grad_norm: 0.916739910324442, iteration: 251421
loss: 1.0241899490356445,grad_norm: 0.9999992047574062, iteration: 251422
loss: 0.9712669849395752,grad_norm: 0.9418712347297957, iteration: 251423
loss: 0.9697118997573853,grad_norm: 0.9315060873064244, iteration: 251424
loss: 0.9705848693847656,grad_norm: 0.9999990456463431, iteration: 251425
loss: 0.9963812232017517,grad_norm: 0.9999990233474084, iteration: 251426
loss: 0.9793866872787476,grad_norm: 0.9999990195624855, iteration: 251427
loss: 0.9444957971572876,grad_norm: 0.999999092802891, iteration: 251428
loss: 1.012572169303894,grad_norm: 0.8793195756821922, iteration: 251429
loss: 0.9972896575927734,grad_norm: 0.7899791265331381, iteration: 251430
loss: 1.0053467750549316,grad_norm: 0.9041071376730779, iteration: 251431
loss: 0.9854684472084045,grad_norm: 0.7914857689762487, iteration: 251432
loss: 0.9921261668205261,grad_norm: 0.8497429042395315, iteration: 251433
loss: 0.9996773600578308,grad_norm: 0.7509192604169938, iteration: 251434
loss: 1.010369062423706,grad_norm: 0.996776667902788, iteration: 251435
loss: 1.0338314771652222,grad_norm: 0.8295378039703813, iteration: 251436
loss: 0.9879036545753479,grad_norm: 0.999999123374487, iteration: 251437
loss: 0.954789400100708,grad_norm: 0.7916514115908854, iteration: 251438
loss: 0.9833579659461975,grad_norm: 0.9144414708520208, iteration: 251439
loss: 1.0140165090560913,grad_norm: 0.7360439510639877, iteration: 251440
loss: 0.9935535788536072,grad_norm: 0.7851081956705188, iteration: 251441
loss: 1.0310016870498657,grad_norm: 0.8643783329233149, iteration: 251442
loss: 1.0480338335037231,grad_norm: 0.9999991231978046, iteration: 251443
loss: 0.9643691182136536,grad_norm: 0.8761615331819685, iteration: 251444
loss: 1.023476481437683,grad_norm: 0.8485851316940531, iteration: 251445
loss: 0.9819120168685913,grad_norm: 0.8926720343917507, iteration: 251446
loss: 1.0021718740463257,grad_norm: 0.9491410112444405, iteration: 251447
loss: 1.0195271968841553,grad_norm: 0.953941225678167, iteration: 251448
loss: 1.0007026195526123,grad_norm: 0.8704689394605178, iteration: 251449
loss: 1.0208221673965454,grad_norm: 0.9822327534673567, iteration: 251450
loss: 1.0025891065597534,grad_norm: 0.9999990616944874, iteration: 251451
loss: 0.9750914573669434,grad_norm: 0.7814242604220587, iteration: 251452
loss: 0.9834740161895752,grad_norm: 0.8729147239912076, iteration: 251453
loss: 0.9904096126556396,grad_norm: 0.8470327307350943, iteration: 251454
loss: 1.0073587894439697,grad_norm: 0.9228729601540021, iteration: 251455
loss: 0.9849157929420471,grad_norm: 0.898827175547445, iteration: 251456
loss: 0.9631302952766418,grad_norm: 0.923196753467518, iteration: 251457
loss: 0.9860596656799316,grad_norm: 0.9999991519659343, iteration: 251458
loss: 1.000411033630371,grad_norm: 0.8967685515215914, iteration: 251459
loss: 1.019558072090149,grad_norm: 0.9266240220358025, iteration: 251460
loss: 1.065466284751892,grad_norm: 0.8045025325048527, iteration: 251461
loss: 0.9978430271148682,grad_norm: 0.9289582177502018, iteration: 251462
loss: 1.003879427909851,grad_norm: 0.99999925926096, iteration: 251463
loss: 0.9639087915420532,grad_norm: 0.9999991304896448, iteration: 251464
loss: 0.9517712593078613,grad_norm: 0.8776119820379015, iteration: 251465
loss: 1.0240670442581177,grad_norm: 0.8469111712823975, iteration: 251466
loss: 1.0002408027648926,grad_norm: 0.9999990793187984, iteration: 251467
loss: 1.0115208625793457,grad_norm: 0.8056396656080319, iteration: 251468
loss: 0.9981415271759033,grad_norm: 0.7946803550447228, iteration: 251469
loss: 1.0030796527862549,grad_norm: 0.8747044943095481, iteration: 251470
loss: 1.0113385915756226,grad_norm: 0.999999125196017, iteration: 251471
loss: 1.0281254053115845,grad_norm: 0.9999989823449372, iteration: 251472
loss: 0.9954855442047119,grad_norm: 0.9999993312521248, iteration: 251473
loss: 0.9926494359970093,grad_norm: 0.951651879105195, iteration: 251474
loss: 1.0091590881347656,grad_norm: 0.8582181035291161, iteration: 251475
loss: 0.9885157346725464,grad_norm: 0.9020302158950803, iteration: 251476
loss: 0.9867099523544312,grad_norm: 0.7685349468553803, iteration: 251477
loss: 0.9809325337409973,grad_norm: 0.9693655257317643, iteration: 251478
loss: 0.9659701585769653,grad_norm: 0.8838846825829877, iteration: 251479
loss: 1.0304971933364868,grad_norm: 0.9999989804118723, iteration: 251480
loss: 0.9894446730613708,grad_norm: 0.9583883215053832, iteration: 251481
loss: 1.026051640510559,grad_norm: 0.8422393966298416, iteration: 251482
loss: 1.0134202241897583,grad_norm: 0.8108998154539244, iteration: 251483
loss: 0.9874241948127747,grad_norm: 0.7904056385328607, iteration: 251484
loss: 0.9861220121383667,grad_norm: 0.9785845124440764, iteration: 251485
loss: 0.9899852275848389,grad_norm: 0.803109234756554, iteration: 251486
loss: 1.0065231323242188,grad_norm: 0.975202865562093, iteration: 251487
loss: 0.9972970485687256,grad_norm: 0.9131772005326572, iteration: 251488
loss: 1.0621294975280762,grad_norm: 0.8431493236990114, iteration: 251489
loss: 0.9742472171783447,grad_norm: 0.9290208229774598, iteration: 251490
loss: 1.0197319984436035,grad_norm: 0.7856062094102458, iteration: 251491
loss: 0.9851799607276917,grad_norm: 0.9999990436126951, iteration: 251492
loss: 0.9383992552757263,grad_norm: 0.8178755671481036, iteration: 251493
loss: 1.0080560445785522,grad_norm: 0.9023012456015155, iteration: 251494
loss: 1.0400141477584839,grad_norm: 0.904837291570362, iteration: 251495
loss: 1.0032316446304321,grad_norm: 0.8809679978240982, iteration: 251496
loss: 1.0190547704696655,grad_norm: 0.9874317212163151, iteration: 251497
loss: 1.0213351249694824,grad_norm: 0.9786779246237494, iteration: 251498
loss: 0.9802546501159668,grad_norm: 0.76734453500833, iteration: 251499
loss: 0.9665756225585938,grad_norm: 0.9999990426475114, iteration: 251500
loss: 1.0027741193771362,grad_norm: 0.9185639751009503, iteration: 251501
loss: 1.0127232074737549,grad_norm: 0.9999992329931107, iteration: 251502
loss: 1.004572868347168,grad_norm: 0.8693754551738812, iteration: 251503
loss: 1.0044479370117188,grad_norm: 0.8769235107697446, iteration: 251504
loss: 0.9840936064720154,grad_norm: 0.8247795209992279, iteration: 251505
loss: 0.9841054081916809,grad_norm: 0.8334725843104701, iteration: 251506
loss: 0.9725167155265808,grad_norm: 0.8298448582726281, iteration: 251507
loss: 1.036089301109314,grad_norm: 0.9999991884738733, iteration: 251508
loss: 0.9694070219993591,grad_norm: 0.9546286766887913, iteration: 251509
loss: 0.9776507616043091,grad_norm: 0.9999998293945762, iteration: 251510
loss: 1.0132954120635986,grad_norm: 0.763026375245314, iteration: 251511
loss: 1.0053778886795044,grad_norm: 0.9233746165228162, iteration: 251512
loss: 0.9554205536842346,grad_norm: 0.9820536567060623, iteration: 251513
loss: 0.9866192936897278,grad_norm: 0.844309412360234, iteration: 251514
loss: 1.0271508693695068,grad_norm: 0.7704349377266468, iteration: 251515
loss: 0.9878852367401123,grad_norm: 0.99999922360641, iteration: 251516
loss: 0.9866545796394348,grad_norm: 0.8372930410482488, iteration: 251517
loss: 1.0413204431533813,grad_norm: 0.9999990134463126, iteration: 251518
loss: 1.0951582193374634,grad_norm: 0.9277917735846881, iteration: 251519
loss: 1.0253448486328125,grad_norm: 0.9217088997799423, iteration: 251520
loss: 0.9660943150520325,grad_norm: 0.9030409133746615, iteration: 251521
loss: 0.9549336433410645,grad_norm: 0.8783552031615874, iteration: 251522
loss: 1.0020256042480469,grad_norm: 0.8737019458615223, iteration: 251523
loss: 1.0236835479736328,grad_norm: 0.9172826160152847, iteration: 251524
loss: 1.0424481630325317,grad_norm: 0.999999935267338, iteration: 251525
loss: 1.0161995887756348,grad_norm: 0.9999990676791315, iteration: 251526
loss: 0.9952126741409302,grad_norm: 0.8944457995025062, iteration: 251527
loss: 0.99397212266922,grad_norm: 0.9670344989173179, iteration: 251528
loss: 0.962149441242218,grad_norm: 0.7834488199918431, iteration: 251529
loss: 0.9830133318901062,grad_norm: 0.9999997139422835, iteration: 251530
loss: 0.9779040813446045,grad_norm: 0.8265219408788199, iteration: 251531
loss: 0.9722296595573425,grad_norm: 0.848228570826723, iteration: 251532
loss: 0.979679524898529,grad_norm: 0.9558129211535686, iteration: 251533
loss: 1.0095086097717285,grad_norm: 0.7549307734450283, iteration: 251534
loss: 1.0327646732330322,grad_norm: 0.9606430659279437, iteration: 251535
loss: 0.9765063524246216,grad_norm: 0.9999990427430793, iteration: 251536
loss: 0.9930227398872375,grad_norm: 0.955084606584303, iteration: 251537
loss: 0.9769119024276733,grad_norm: 0.997170777196454, iteration: 251538
loss: 1.0007421970367432,grad_norm: 0.8850680789092026, iteration: 251539
loss: 1.0368887186050415,grad_norm: 0.9999990887364923, iteration: 251540
loss: 0.9661166667938232,grad_norm: 0.9772918965197078, iteration: 251541
loss: 1.0291435718536377,grad_norm: 0.8885115083280452, iteration: 251542
loss: 0.9844032526016235,grad_norm: 0.9999990653716629, iteration: 251543
loss: 1.0001401901245117,grad_norm: 0.9219232567667675, iteration: 251544
loss: 1.0217232704162598,grad_norm: 0.8726875697031606, iteration: 251545
loss: 1.0124778747558594,grad_norm: 0.9999991534207817, iteration: 251546
loss: 0.9857460856437683,grad_norm: 0.9024067492441539, iteration: 251547
loss: 0.9802752733230591,grad_norm: 0.9999991353706678, iteration: 251548
loss: 0.9916924834251404,grad_norm: 0.9307697699538289, iteration: 251549
loss: 0.9723408222198486,grad_norm: 0.8179198714813846, iteration: 251550
loss: 0.9807231426239014,grad_norm: 0.7809956661558572, iteration: 251551
loss: 1.0004435777664185,grad_norm: 0.8452632993749619, iteration: 251552
loss: 0.9584122896194458,grad_norm: 0.9999991731174909, iteration: 251553
loss: 0.9627374410629272,grad_norm: 0.8089139700125604, iteration: 251554
loss: 0.9707084894180298,grad_norm: 0.9294875731858041, iteration: 251555
loss: 1.01504385471344,grad_norm: 0.7708738319921826, iteration: 251556
loss: 1.0033674240112305,grad_norm: 0.9572908666557677, iteration: 251557
loss: 0.9885058999061584,grad_norm: 0.8510175759044605, iteration: 251558
loss: 0.9697040915489197,grad_norm: 0.9999992823633865, iteration: 251559
loss: 0.9811825156211853,grad_norm: 0.9725308843107661, iteration: 251560
loss: 1.0039311647415161,grad_norm: 0.9680373014340252, iteration: 251561
loss: 1.0034791231155396,grad_norm: 0.790487489384188, iteration: 251562
loss: 0.9569022059440613,grad_norm: 0.9999991729056932, iteration: 251563
loss: 1.0093414783477783,grad_norm: 0.8798659737936072, iteration: 251564
loss: 0.9880919456481934,grad_norm: 0.838588383690902, iteration: 251565
loss: 0.9745053052902222,grad_norm: 0.9999990560476666, iteration: 251566
loss: 1.0166164636611938,grad_norm: 0.8579142477643804, iteration: 251567
loss: 1.02632737159729,grad_norm: 0.9769093200246883, iteration: 251568
loss: 1.0290437936782837,grad_norm: 0.8456309454189723, iteration: 251569
loss: 0.9647884964942932,grad_norm: 0.9999991687563394, iteration: 251570
loss: 1.0112249851226807,grad_norm: 0.944879466147293, iteration: 251571
loss: 1.008927822113037,grad_norm: 0.9999991044152065, iteration: 251572
loss: 0.9932724833488464,grad_norm: 0.999999028834782, iteration: 251573
loss: 0.9986196160316467,grad_norm: 0.9444905983772941, iteration: 251574
loss: 1.0136362314224243,grad_norm: 0.9041222604379302, iteration: 251575
loss: 0.9542712569236755,grad_norm: 0.893219713027714, iteration: 251576
loss: 1.0040264129638672,grad_norm: 0.9999990374741436, iteration: 251577
loss: 1.0076396465301514,grad_norm: 0.9403243876093749, iteration: 251578
loss: 0.9935838580131531,grad_norm: 0.9365364224210476, iteration: 251579
loss: 0.922406017780304,grad_norm: 0.9555306969447503, iteration: 251580
loss: 1.0184941291809082,grad_norm: 0.8625165083581723, iteration: 251581
loss: 1.0030193328857422,grad_norm: 0.9130283148446988, iteration: 251582
loss: 1.0470380783081055,grad_norm: 0.9999995638347284, iteration: 251583
loss: 1.0344876050949097,grad_norm: 0.9253499369270698, iteration: 251584
loss: 0.9965089559555054,grad_norm: 0.7653659201470654, iteration: 251585
loss: 1.0333629846572876,grad_norm: 0.675348797330262, iteration: 251586
loss: 1.0106428861618042,grad_norm: 0.9999990160900579, iteration: 251587
loss: 1.018035888671875,grad_norm: 0.843299716518321, iteration: 251588
loss: 0.9586546421051025,grad_norm: 0.9138838912218015, iteration: 251589
loss: 0.9929016828536987,grad_norm: 0.999999076732983, iteration: 251590
loss: 1.0242105722427368,grad_norm: 0.9999993794207388, iteration: 251591
loss: 1.030509352684021,grad_norm: 0.999999072588356, iteration: 251592
loss: 0.9732677340507507,grad_norm: 0.8256070013379301, iteration: 251593
loss: 1.0474176406860352,grad_norm: 0.9999993643904128, iteration: 251594
loss: 0.9987389445304871,grad_norm: 0.808374351913895, iteration: 251595
loss: 0.9478680491447449,grad_norm: 0.8211663244152034, iteration: 251596
loss: 0.9470477104187012,grad_norm: 0.9337654537693901, iteration: 251597
loss: 0.9770238995552063,grad_norm: 0.9999991182430694, iteration: 251598
loss: 0.9962525963783264,grad_norm: 0.8714888975357962, iteration: 251599
loss: 0.99713134765625,grad_norm: 0.8141620920828398, iteration: 251600
loss: 1.0032905340194702,grad_norm: 0.8081471879724919, iteration: 251601
loss: 0.9618226885795593,grad_norm: 0.8911430797169166, iteration: 251602
loss: 0.9858741760253906,grad_norm: 0.8127444969092692, iteration: 251603
loss: 1.0160735845565796,grad_norm: 0.9999991140692639, iteration: 251604
loss: 0.9830539226531982,grad_norm: 0.7997055871135694, iteration: 251605
loss: 0.9690672159194946,grad_norm: 0.999999047099275, iteration: 251606
loss: 1.0034637451171875,grad_norm: 0.8753941978038758, iteration: 251607
loss: 0.9660764932632446,grad_norm: 0.9901115943407642, iteration: 251608
loss: 0.9877845048904419,grad_norm: 0.8356309467302903, iteration: 251609
loss: 1.0205320119857788,grad_norm: 0.966121582827533, iteration: 251610
loss: 1.0297263860702515,grad_norm: 0.8718609644890053, iteration: 251611
loss: 1.0042141675949097,grad_norm: 0.9320800013802502, iteration: 251612
loss: 0.98678058385849,grad_norm: 0.6987275485020532, iteration: 251613
loss: 1.0291558504104614,grad_norm: 0.9546678863642944, iteration: 251614
loss: 0.9748319983482361,grad_norm: 0.9999991552886591, iteration: 251615
loss: 0.9949426651000977,grad_norm: 0.8102177988039533, iteration: 251616
loss: 0.9953935742378235,grad_norm: 0.8103733449086514, iteration: 251617
loss: 1.0562267303466797,grad_norm: 0.9062151919291875, iteration: 251618
loss: 1.0017482042312622,grad_norm: 0.9572875532496383, iteration: 251619
loss: 1.0154056549072266,grad_norm: 0.9999994508888176, iteration: 251620
loss: 1.0058296918869019,grad_norm: 0.8778652790354753, iteration: 251621
loss: 0.9719117283821106,grad_norm: 0.9999991078266177, iteration: 251622
loss: 1.0361043214797974,grad_norm: 0.983915153161312, iteration: 251623
loss: 1.0089740753173828,grad_norm: 0.893281143724176, iteration: 251624
loss: 1.0124753713607788,grad_norm: 0.793962237236736, iteration: 251625
loss: 0.9949806928634644,grad_norm: 0.8515397761773474, iteration: 251626
loss: 1.02815580368042,grad_norm: 0.9999990340526884, iteration: 251627
loss: 1.0050102472305298,grad_norm: 0.9420102472707664, iteration: 251628
loss: 1.0695037841796875,grad_norm: 0.9769132667087627, iteration: 251629
loss: 1.0187256336212158,grad_norm: 0.9723109249572909, iteration: 251630
loss: 0.9797927141189575,grad_norm: 0.924675713837295, iteration: 251631
loss: 0.9842254519462585,grad_norm: 0.9691639248546718, iteration: 251632
loss: 1.033695101737976,grad_norm: 0.8731059932777087, iteration: 251633
loss: 1.002951741218567,grad_norm: 0.9480406517044334, iteration: 251634
loss: 1.0153908729553223,grad_norm: 0.9928528771153412, iteration: 251635
loss: 1.0073281526565552,grad_norm: 0.9999991125692297, iteration: 251636
loss: 0.9870963096618652,grad_norm: 0.934662337146716, iteration: 251637
loss: 0.9822922945022583,grad_norm: 0.9641069132426976, iteration: 251638
loss: 0.9673620462417603,grad_norm: 0.9999990814613454, iteration: 251639
loss: 1.026278018951416,grad_norm: 0.9999991093302286, iteration: 251640
loss: 1.0047216415405273,grad_norm: 0.8277026599279652, iteration: 251641
loss: 1.0077834129333496,grad_norm: 0.9999991082009982, iteration: 251642
loss: 0.9608017206192017,grad_norm: 0.9392068300003055, iteration: 251643
loss: 1.0074551105499268,grad_norm: 0.8606742113603502, iteration: 251644
loss: 0.99161297082901,grad_norm: 0.999999010647217, iteration: 251645
loss: 0.9689369797706604,grad_norm: 0.9999991056256117, iteration: 251646
loss: 1.0423797369003296,grad_norm: 0.9999998809395706, iteration: 251647
loss: 1.0381232500076294,grad_norm: 0.9362490557345469, iteration: 251648
loss: 0.9849414825439453,grad_norm: 0.8634187645566774, iteration: 251649
loss: 0.9746655821800232,grad_norm: 0.762596164742011, iteration: 251650
loss: 1.0113331079483032,grad_norm: 0.9999997658505189, iteration: 251651
loss: 1.0018843412399292,grad_norm: 0.9854233246932659, iteration: 251652
loss: 0.9775272607803345,grad_norm: 0.8183039010669955, iteration: 251653
loss: 1.0068128108978271,grad_norm: 0.9890766845884643, iteration: 251654
loss: 0.9719588756561279,grad_norm: 0.8916094952993113, iteration: 251655
loss: 0.9422379732131958,grad_norm: 0.9999992081231486, iteration: 251656
loss: 0.9812871217727661,grad_norm: 0.8181692710331061, iteration: 251657
loss: 0.9740278124809265,grad_norm: 0.7833162667497878, iteration: 251658
loss: 1.0460799932479858,grad_norm: 0.9999991047709171, iteration: 251659
loss: 1.0136301517486572,grad_norm: 0.8665842515658811, iteration: 251660
loss: 1.0218698978424072,grad_norm: 0.9999992569462686, iteration: 251661
loss: 1.0054006576538086,grad_norm: 0.8481949759850226, iteration: 251662
loss: 1.0217580795288086,grad_norm: 0.839147931369935, iteration: 251663
loss: 0.9826391339302063,grad_norm: 0.964463953415722, iteration: 251664
loss: 1.0148957967758179,grad_norm: 0.8521803681726628, iteration: 251665
loss: 1.0726145505905151,grad_norm: 0.9999991056639163, iteration: 251666
loss: 0.9975928068161011,grad_norm: 0.9229280073383086, iteration: 251667
loss: 1.021864652633667,grad_norm: 0.8800423954020109, iteration: 251668
loss: 1.0088797807693481,grad_norm: 0.9999998031370518, iteration: 251669
loss: 0.9793397188186646,grad_norm: 0.9153652393548498, iteration: 251670
loss: 0.9536716341972351,grad_norm: 0.9234677593495169, iteration: 251671
loss: 1.0569031238555908,grad_norm: 0.7875395998834941, iteration: 251672
loss: 0.9829801321029663,grad_norm: 0.7603226126657103, iteration: 251673
loss: 0.9645653963088989,grad_norm: 0.9999990402720416, iteration: 251674
loss: 1.0204370021820068,grad_norm: 0.9057882171557372, iteration: 251675
loss: 1.0258467197418213,grad_norm: 0.9099219490056101, iteration: 251676
loss: 0.9675350189208984,grad_norm: 0.9318534288782062, iteration: 251677
loss: 0.967522382736206,grad_norm: 0.9301337884508522, iteration: 251678
loss: 1.0074728727340698,grad_norm: 0.9057665255418506, iteration: 251679
loss: 1.1269460916519165,grad_norm: 0.9450439919253907, iteration: 251680
loss: 1.0377933979034424,grad_norm: 0.9999993534484045, iteration: 251681
loss: 0.9798287749290466,grad_norm: 0.9179113696606077, iteration: 251682
loss: 1.0179296731948853,grad_norm: 0.9999990902811232, iteration: 251683
loss: 1.0954612493515015,grad_norm: 0.9999994520900105, iteration: 251684
loss: 0.9999738335609436,grad_norm: 0.79976296062318, iteration: 251685
loss: 1.0143810510635376,grad_norm: 0.82029101734737, iteration: 251686
loss: 0.9730231761932373,grad_norm: 0.8415638944130792, iteration: 251687
loss: 1.0470954179763794,grad_norm: 0.8783724851112624, iteration: 251688
loss: 0.9717258810997009,grad_norm: 0.9161868497984648, iteration: 251689
loss: 1.0210262537002563,grad_norm: 0.9999997321132644, iteration: 251690
loss: 1.001434326171875,grad_norm: 0.8115588861316032, iteration: 251691
loss: 1.0822618007659912,grad_norm: 0.8550436083507833, iteration: 251692
loss: 0.9922609329223633,grad_norm: 0.9999991868170481, iteration: 251693
loss: 0.9865100979804993,grad_norm: 0.9242194742739397, iteration: 251694
loss: 1.0098583698272705,grad_norm: 0.8010205226635745, iteration: 251695
loss: 0.9965643286705017,grad_norm: 0.8244094455431727, iteration: 251696
loss: 1.0023016929626465,grad_norm: 0.9999990877513523, iteration: 251697
loss: 1.014038324356079,grad_norm: 0.9999990670495047, iteration: 251698
loss: 1.0235782861709595,grad_norm: 0.9999990700269248, iteration: 251699
loss: 1.0291236639022827,grad_norm: 0.9493252647943226, iteration: 251700
loss: 0.9950450658798218,grad_norm: 0.9999996604044953, iteration: 251701
loss: 1.0332261323928833,grad_norm: 0.9886042603090482, iteration: 251702
loss: 1.0319404602050781,grad_norm: 0.8859548995321641, iteration: 251703
loss: 1.0036860704421997,grad_norm: 0.99999937793698, iteration: 251704
loss: 1.0086034536361694,grad_norm: 0.9110922692353114, iteration: 251705
loss: 1.0655229091644287,grad_norm: 0.999999324816169, iteration: 251706
loss: 0.9613323211669922,grad_norm: 0.7984196774034704, iteration: 251707
loss: 0.989609956741333,grad_norm: 0.9912754823231613, iteration: 251708
loss: 0.9894453287124634,grad_norm: 0.901948665177753, iteration: 251709
loss: 1.0048893690109253,grad_norm: 0.9273181011532244, iteration: 251710
loss: 1.0096873044967651,grad_norm: 0.8570805490347491, iteration: 251711
loss: 1.0468682050704956,grad_norm: 0.9999996388392334, iteration: 251712
loss: 1.0166693925857544,grad_norm: 0.9759058236997374, iteration: 251713
loss: 0.9994378089904785,grad_norm: 0.8016751288352733, iteration: 251714
loss: 1.012488842010498,grad_norm: 0.9338763242551072, iteration: 251715
loss: 1.0043103694915771,grad_norm: 0.8337553718535172, iteration: 251716
loss: 1.038256287574768,grad_norm: 0.9999994916251927, iteration: 251717
loss: 0.9852505326271057,grad_norm: 0.9102465543376776, iteration: 251718
loss: 1.0242104530334473,grad_norm: 0.8467580742823231, iteration: 251719
loss: 1.0062026977539062,grad_norm: 0.9999992230451444, iteration: 251720
loss: 0.9791397452354431,grad_norm: 0.9307524497908044, iteration: 251721
loss: 1.0132800340652466,grad_norm: 0.9999989126823733, iteration: 251722
loss: 0.9620235562324524,grad_norm: 0.9999991660760871, iteration: 251723
loss: 1.0196245908737183,grad_norm: 0.9999990471656307, iteration: 251724
loss: 0.9678571224212646,grad_norm: 0.8540073760258184, iteration: 251725
loss: 1.023553729057312,grad_norm: 0.9999998055443737, iteration: 251726
loss: 1.0021635293960571,grad_norm: 0.9999991615753056, iteration: 251727
loss: 1.010872483253479,grad_norm: 0.8283405232665406, iteration: 251728
loss: 1.0184398889541626,grad_norm: 0.9999991669918384, iteration: 251729
loss: 1.0025979280471802,grad_norm: 0.9999992166571342, iteration: 251730
loss: 0.9970001578330994,grad_norm: 0.7786437263367864, iteration: 251731
loss: 1.0387152433395386,grad_norm: 0.9999991563532686, iteration: 251732
loss: 1.038408875465393,grad_norm: 0.9999997040145909, iteration: 251733
loss: 0.9852714538574219,grad_norm: 0.8866327648834273, iteration: 251734
loss: 1.00333833694458,grad_norm: 0.9999999034273088, iteration: 251735
loss: 1.0063964128494263,grad_norm: 0.9999990944508724, iteration: 251736
loss: 1.014143943786621,grad_norm: 0.8846546448534076, iteration: 251737
loss: 0.9959286451339722,grad_norm: 0.9999992298568627, iteration: 251738
loss: 1.0002644062042236,grad_norm: 0.9492878604727187, iteration: 251739
loss: 1.0098763704299927,grad_norm: 0.8373609816495923, iteration: 251740
loss: 1.0022307634353638,grad_norm: 0.9999991670306477, iteration: 251741
loss: 1.0400447845458984,grad_norm: 0.8866127783321843, iteration: 251742
loss: 1.0170031785964966,grad_norm: 0.9999990078030884, iteration: 251743
loss: 0.950337827205658,grad_norm: 0.9864747688384962, iteration: 251744
loss: 1.0206416845321655,grad_norm: 0.8332850823848073, iteration: 251745
loss: 0.9911807179450989,grad_norm: 0.8868721667210357, iteration: 251746
loss: 1.0053685903549194,grad_norm: 0.9999992111490644, iteration: 251747
loss: 0.9631045460700989,grad_norm: 0.9999996443146127, iteration: 251748
loss: 1.0356637239456177,grad_norm: 0.9999994772621124, iteration: 251749
loss: 1.007932424545288,grad_norm: 0.8187870962004989, iteration: 251750
loss: 1.03444504737854,grad_norm: 0.9566527720873833, iteration: 251751
loss: 0.9618757367134094,grad_norm: 0.8840418788564106, iteration: 251752
loss: 1.00199556350708,grad_norm: 0.9335333304748713, iteration: 251753
loss: 0.980299174785614,grad_norm: 0.8868032326037738, iteration: 251754
loss: 1.0065997838974,grad_norm: 0.881111343915996, iteration: 251755
loss: 0.9932438731193542,grad_norm: 0.9038709820193203, iteration: 251756
loss: 1.000230312347412,grad_norm: 0.9999989190884455, iteration: 251757
loss: 1.0204983949661255,grad_norm: 0.8675578149080145, iteration: 251758
loss: 1.0194026231765747,grad_norm: 0.8599297964543747, iteration: 251759
loss: 0.9333949089050293,grad_norm: 0.9756803227943388, iteration: 251760
loss: 0.9781263470649719,grad_norm: 0.9999990370586607, iteration: 251761
loss: 1.0053118467330933,grad_norm: 0.9799690982835544, iteration: 251762
loss: 0.9968603253364563,grad_norm: 0.7657667566916768, iteration: 251763
loss: 1.0198309421539307,grad_norm: 0.9561599812102883, iteration: 251764
loss: 0.9881329536437988,grad_norm: 0.8496312950110511, iteration: 251765
loss: 1.075216293334961,grad_norm: 0.9999994113281618, iteration: 251766
loss: 0.9945688247680664,grad_norm: 0.9999991268987672, iteration: 251767
loss: 1.0176315307617188,grad_norm: 0.9999990186086748, iteration: 251768
loss: 1.0341955423355103,grad_norm: 0.8224159844593416, iteration: 251769
loss: 0.9931530952453613,grad_norm: 0.8545790663951015, iteration: 251770
loss: 0.9643753170967102,grad_norm: 0.9794874396345064, iteration: 251771
loss: 0.9937943816184998,grad_norm: 0.9999999117035666, iteration: 251772
loss: 0.995951235294342,grad_norm: 0.8690853135306906, iteration: 251773
loss: 1.021215558052063,grad_norm: 0.8865647298324105, iteration: 251774
loss: 0.987062931060791,grad_norm: 0.9070054597758768, iteration: 251775
loss: 0.9917402863502502,grad_norm: 0.8236016711340463, iteration: 251776
loss: 1.0256551504135132,grad_norm: 0.8950502240397733, iteration: 251777
loss: 0.9817284941673279,grad_norm: 0.7929474273589797, iteration: 251778
loss: 0.9969210028648376,grad_norm: 0.9860653179378562, iteration: 251779
loss: 0.9662652611732483,grad_norm: 0.9511323575524833, iteration: 251780
loss: 1.0992966890335083,grad_norm: 0.9999991391006869, iteration: 251781
loss: 0.9959168434143066,grad_norm: 0.8711056652487775, iteration: 251782
loss: 1.0125696659088135,grad_norm: 0.9405850669496619, iteration: 251783
loss: 0.9872399568557739,grad_norm: 0.9996286745362654, iteration: 251784
loss: 1.0269362926483154,grad_norm: 0.8585186555374135, iteration: 251785
loss: 1.0190094709396362,grad_norm: 0.9501213587470445, iteration: 251786
loss: 0.9786112308502197,grad_norm: 0.9211683038637487, iteration: 251787
loss: 0.9961932301521301,grad_norm: 0.8405866184234511, iteration: 251788
loss: 1.0145407915115356,grad_norm: 0.9999991950036599, iteration: 251789
loss: 0.9979923963546753,grad_norm: 0.9102027969866776, iteration: 251790
loss: 0.9736747145652771,grad_norm: 0.9999995186024262, iteration: 251791
loss: 1.0111125707626343,grad_norm: 0.9143723679112339, iteration: 251792
loss: 1.0084820985794067,grad_norm: 0.999999234396817, iteration: 251793
loss: 0.9976498484611511,grad_norm: 0.9110273639055544, iteration: 251794
loss: 0.9949779510498047,grad_norm: 0.9999993009889085, iteration: 251795
loss: 1.015945315361023,grad_norm: 0.9930090443721322, iteration: 251796
loss: 0.975853443145752,grad_norm: 0.9121453733415777, iteration: 251797
loss: 0.9674623608589172,grad_norm: 0.9999992854853418, iteration: 251798
loss: 1.0243992805480957,grad_norm: 0.959988315502017, iteration: 251799
loss: 1.004008412361145,grad_norm: 0.9999993541319456, iteration: 251800
loss: 1.0132575035095215,grad_norm: 0.9999990535529606, iteration: 251801
loss: 1.020166039466858,grad_norm: 0.9681606998714521, iteration: 251802
loss: 0.9921702742576599,grad_norm: 0.8086611792435356, iteration: 251803
loss: 0.9653705954551697,grad_norm: 0.9115392788301311, iteration: 251804
loss: 1.0106933116912842,grad_norm: 0.9145854314468713, iteration: 251805
loss: 1.0072966814041138,grad_norm: 0.9903182444119478, iteration: 251806
loss: 1.0256030559539795,grad_norm: 0.9621474598939792, iteration: 251807
loss: 0.9932509660720825,grad_norm: 0.864776446649439, iteration: 251808
loss: 1.0045421123504639,grad_norm: 0.8302712224756703, iteration: 251809
loss: 1.0094496011734009,grad_norm: 0.8945265804658392, iteration: 251810
loss: 0.9811469316482544,grad_norm: 0.8978031606894065, iteration: 251811
loss: 0.9971518516540527,grad_norm: 0.9999990783757149, iteration: 251812
loss: 1.0244603157043457,grad_norm: 0.8522533630077923, iteration: 251813
loss: 1.0126512050628662,grad_norm: 0.8037913313464409, iteration: 251814
loss: 1.0506325960159302,grad_norm: 0.8393642412392472, iteration: 251815
loss: 0.9830592274665833,grad_norm: 0.9726319208615966, iteration: 251816
loss: 1.0041325092315674,grad_norm: 0.8361352452186328, iteration: 251817
loss: 1.0006991624832153,grad_norm: 0.8841142532360494, iteration: 251818
loss: 1.0019021034240723,grad_norm: 0.999998986641839, iteration: 251819
loss: 0.9794019460678101,grad_norm: 0.9999993235640039, iteration: 251820
loss: 1.026466965675354,grad_norm: 0.7498751266857618, iteration: 251821
loss: 1.0250139236450195,grad_norm: 0.7393817994715713, iteration: 251822
loss: 0.9491229057312012,grad_norm: 0.9999992371845025, iteration: 251823
loss: 0.9972316026687622,grad_norm: 0.892192026163637, iteration: 251824
loss: 1.1144263744354248,grad_norm: 0.9999999665690822, iteration: 251825
loss: 0.9833877682685852,grad_norm: 0.9999992357248616, iteration: 251826
loss: 1.0124624967575073,grad_norm: 0.8755651597391417, iteration: 251827
loss: 0.9696945548057556,grad_norm: 0.8278681712461541, iteration: 251828
loss: 0.985435962677002,grad_norm: 0.833047702414483, iteration: 251829
loss: 1.0306551456451416,grad_norm: 0.9999991792514052, iteration: 251830
loss: 0.9884286522865295,grad_norm: 0.9642695499409715, iteration: 251831
loss: 1.005013108253479,grad_norm: 0.8491418703044674, iteration: 251832
loss: 0.9868856072425842,grad_norm: 0.8569188690626822, iteration: 251833
loss: 1.052420735359192,grad_norm: 0.9999991072487785, iteration: 251834
loss: 1.0066661834716797,grad_norm: 0.9999991320555047, iteration: 251835
loss: 0.9798450469970703,grad_norm: 0.9999990627137801, iteration: 251836
loss: 1.0215706825256348,grad_norm: 0.8500755335266065, iteration: 251837
loss: 1.0063871145248413,grad_norm: 0.8840640893107186, iteration: 251838
loss: 0.9882328510284424,grad_norm: 0.9445349745757434, iteration: 251839
loss: 1.000342607498169,grad_norm: 0.9373910968630655, iteration: 251840
loss: 1.0133048295974731,grad_norm: 0.8995501398069159, iteration: 251841
loss: 0.9769459366798401,grad_norm: 0.8958979031521067, iteration: 251842
loss: 0.9981653690338135,grad_norm: 0.9358515874557168, iteration: 251843
loss: 1.0263493061065674,grad_norm: 0.9782727455254085, iteration: 251844
loss: 1.0068873167037964,grad_norm: 0.9845477027918245, iteration: 251845
loss: 0.9904341101646423,grad_norm: 0.9999990626425013, iteration: 251846
loss: 0.9782102704048157,grad_norm: 0.9999989956806359, iteration: 251847
loss: 0.9599120616912842,grad_norm: 0.8565837936509207, iteration: 251848
loss: 1.012227177619934,grad_norm: 0.8641730750205933, iteration: 251849
loss: 1.0088627338409424,grad_norm: 0.9492532132142711, iteration: 251850
loss: 1.022127389907837,grad_norm: 0.9999989281531357, iteration: 251851
loss: 0.9896929264068604,grad_norm: 0.9343367893458907, iteration: 251852
loss: 1.034986138343811,grad_norm: 0.999999088357726, iteration: 251853
loss: 0.970809817314148,grad_norm: 0.8894484720450232, iteration: 251854
loss: 1.0717579126358032,grad_norm: 0.9999993579246794, iteration: 251855
loss: 0.9761146306991577,grad_norm: 0.9068404147408538, iteration: 251856
loss: 0.9799898266792297,grad_norm: 0.8848438638987265, iteration: 251857
loss: 0.9879652261734009,grad_norm: 0.796295303637111, iteration: 251858
loss: 1.0044559240341187,grad_norm: 0.8434226810339321, iteration: 251859
loss: 0.9751563668251038,grad_norm: 0.9385377031711997, iteration: 251860
loss: 0.9937214851379395,grad_norm: 0.8165359802509562, iteration: 251861
loss: 1.0345407724380493,grad_norm: 0.8195911115259479, iteration: 251862
loss: 0.9955277442932129,grad_norm: 0.9999989766950831, iteration: 251863
loss: 0.9998880624771118,grad_norm: 0.8876678891632548, iteration: 251864
loss: 0.9730559587478638,grad_norm: 0.7985338753576889, iteration: 251865
loss: 1.0034101009368896,grad_norm: 0.9999995463824675, iteration: 251866
loss: 1.0062042474746704,grad_norm: 0.8384956959237675, iteration: 251867
loss: 0.9914966225624084,grad_norm: 0.8492756246775492, iteration: 251868
loss: 1.0060150623321533,grad_norm: 0.8541633879370136, iteration: 251869
loss: 1.0028475522994995,grad_norm: 0.9206148852719611, iteration: 251870
loss: 0.9838927388191223,grad_norm: 0.7682582051514688, iteration: 251871
loss: 0.9881174564361572,grad_norm: 0.8333070155526847, iteration: 251872
loss: 1.0223876237869263,grad_norm: 0.7788134145647982, iteration: 251873
loss: 0.9527363181114197,grad_norm: 0.9731069388924204, iteration: 251874
loss: 1.0172696113586426,grad_norm: 0.8546261593250406, iteration: 251875
loss: 1.0581727027893066,grad_norm: 0.999999120125087, iteration: 251876
loss: 1.0179246664047241,grad_norm: 0.9999989920439113, iteration: 251877
loss: 0.9938046336174011,grad_norm: 0.9251326610456299, iteration: 251878
loss: 0.9892628788948059,grad_norm: 0.9059041952095136, iteration: 251879
loss: 0.99832683801651,grad_norm: 0.775221558986298, iteration: 251880
loss: 0.9478501081466675,grad_norm: 0.7922245331514264, iteration: 251881
loss: 0.9888356924057007,grad_norm: 0.9999991446748856, iteration: 251882
loss: 0.9867876768112183,grad_norm: 0.999999615176836, iteration: 251883
loss: 1.002816915512085,grad_norm: 0.9999995658415534, iteration: 251884
loss: 0.9760470390319824,grad_norm: 0.7510895389246162, iteration: 251885
loss: 1.0155653953552246,grad_norm: 0.8080291088276117, iteration: 251886
loss: 1.0323143005371094,grad_norm: 0.9211285948189689, iteration: 251887
loss: 0.9917706847190857,grad_norm: 0.7914499720932712, iteration: 251888
loss: 1.0429532527923584,grad_norm: 0.8650916894025406, iteration: 251889
loss: 0.9940151572227478,grad_norm: 0.8963146009759971, iteration: 251890
loss: 0.9937019348144531,grad_norm: 0.9641114571915514, iteration: 251891
loss: 0.9981955885887146,grad_norm: 0.8544793911746779, iteration: 251892
loss: 0.9890539646148682,grad_norm: 0.9567523643184431, iteration: 251893
loss: 1.0085591077804565,grad_norm: 0.8309432514514806, iteration: 251894
loss: 1.018620252609253,grad_norm: 0.9457716886677043, iteration: 251895
loss: 1.0045690536499023,grad_norm: 0.7370348655508074, iteration: 251896
loss: 0.9872121810913086,grad_norm: 0.8360617652359151, iteration: 251897
loss: 1.0026155710220337,grad_norm: 0.9999990231183359, iteration: 251898
loss: 1.0277785062789917,grad_norm: 0.8360649406307388, iteration: 251899
loss: 1.0276877880096436,grad_norm: 0.8350666481951794, iteration: 251900
loss: 1.0080699920654297,grad_norm: 0.7684882256262555, iteration: 251901
loss: 0.9644144773483276,grad_norm: 0.8209599913047484, iteration: 251902
loss: 1.0381635427474976,grad_norm: 0.9999997507676076, iteration: 251903
loss: 1.0029336214065552,grad_norm: 0.7594708933512542, iteration: 251904
loss: 1.0150567293167114,grad_norm: 0.9999994657238062, iteration: 251905
loss: 0.9879769682884216,grad_norm: 0.9275499078476196, iteration: 251906
loss: 0.9758183360099792,grad_norm: 0.9999991811454876, iteration: 251907
loss: 1.0047569274902344,grad_norm: 0.8980012801965146, iteration: 251908
loss: 0.9832543730735779,grad_norm: 0.9373872909251235, iteration: 251909
loss: 1.0092748403549194,grad_norm: 0.9525860471372406, iteration: 251910
loss: 1.0249966382980347,grad_norm: 0.8763743252356856, iteration: 251911
loss: 1.011817216873169,grad_norm: 0.9471798926069434, iteration: 251912
loss: 1.0201027393341064,grad_norm: 0.999999076800235, iteration: 251913
loss: 0.9631190896034241,grad_norm: 0.8243093723575107, iteration: 251914
loss: 1.1264125108718872,grad_norm: 0.9999996744139726, iteration: 251915
loss: 0.9683916568756104,grad_norm: 0.8142418230362143, iteration: 251916
loss: 0.9896846413612366,grad_norm: 0.7909865405528791, iteration: 251917
loss: 1.0309520959854126,grad_norm: 0.7254422854626166, iteration: 251918
loss: 0.98442542552948,grad_norm: 0.8134754997611608, iteration: 251919
loss: 1.0032529830932617,grad_norm: 0.953414533086021, iteration: 251920
loss: 1.0109695196151733,grad_norm: 0.9545024169986577, iteration: 251921
loss: 1.0312806367874146,grad_norm: 0.9526332457814711, iteration: 251922
loss: 0.9811528325080872,grad_norm: 0.8098961494483632, iteration: 251923
loss: 0.9495664834976196,grad_norm: 0.8830468945402308, iteration: 251924
loss: 1.0789412260055542,grad_norm: 0.9999994238624877, iteration: 251925
loss: 1.0200947523117065,grad_norm: 0.884037697489351, iteration: 251926
loss: 0.997331976890564,grad_norm: 0.7840899637139727, iteration: 251927
loss: 0.9557262063026428,grad_norm: 0.994905869002419, iteration: 251928
loss: 1.0010422468185425,grad_norm: 0.9999990899188902, iteration: 251929
loss: 0.996543824672699,grad_norm: 0.9999990189900607, iteration: 251930
loss: 1.0128170251846313,grad_norm: 0.8518611309666866, iteration: 251931
loss: 0.9923458695411682,grad_norm: 0.9999999067074523, iteration: 251932
loss: 1.008918046951294,grad_norm: 0.9999991713259151, iteration: 251933
loss: 0.9799058437347412,grad_norm: 0.9999991272955616, iteration: 251934
loss: 0.9870427846908569,grad_norm: 0.9956287795388258, iteration: 251935
loss: 1.0301657915115356,grad_norm: 0.9091299042812256, iteration: 251936
loss: 0.985868513584137,grad_norm: 0.8451400067310593, iteration: 251937
loss: 0.994803249835968,grad_norm: 0.8225191784566418, iteration: 251938
loss: 1.029999852180481,grad_norm: 0.9288585005809321, iteration: 251939
loss: 1.0041202306747437,grad_norm: 0.9939367126348918, iteration: 251940
loss: 0.9940215945243835,grad_norm: 0.8670318544539565, iteration: 251941
loss: 1.029226303100586,grad_norm: 0.8444705308318403, iteration: 251942
loss: 1.13216233253479,grad_norm: 0.9999990318306586, iteration: 251943
loss: 0.937648355960846,grad_norm: 0.9216803206636162, iteration: 251944
loss: 0.9695337414741516,grad_norm: 0.7672072215899305, iteration: 251945
loss: 1.0195848941802979,grad_norm: 0.7940139916761072, iteration: 251946
loss: 0.9974948167800903,grad_norm: 0.8497272096508326, iteration: 251947
loss: 1.0082809925079346,grad_norm: 0.7646133735222613, iteration: 251948
loss: 0.9839988350868225,grad_norm: 0.8035739935934173, iteration: 251949
loss: 1.0144551992416382,grad_norm: 0.8601131940582186, iteration: 251950
loss: 1.017119288444519,grad_norm: 0.9999990781168384, iteration: 251951
loss: 1.010044813156128,grad_norm: 0.7996062919275078, iteration: 251952
loss: 0.9800928831100464,grad_norm: 0.8426024763796853, iteration: 251953
loss: 0.9830346703529358,grad_norm: 0.9999991751999644, iteration: 251954
loss: 1.0440257787704468,grad_norm: 0.9999998075040765, iteration: 251955
loss: 0.9853777885437012,grad_norm: 0.9442963744396439, iteration: 251956
loss: 0.9890908598899841,grad_norm: 0.957022630591714, iteration: 251957
loss: 1.0147199630737305,grad_norm: 0.999999059009099, iteration: 251958
loss: 1.0953936576843262,grad_norm: 0.9999993921845942, iteration: 251959
loss: 1.036729335784912,grad_norm: 0.8076441815629113, iteration: 251960
loss: 1.0959360599517822,grad_norm: 0.9999994723203139, iteration: 251961
loss: 1.009069561958313,grad_norm: 0.8933098215665002, iteration: 251962
loss: 0.9709274768829346,grad_norm: 0.9895391664620493, iteration: 251963
loss: 0.968039870262146,grad_norm: 0.9675179022667526, iteration: 251964
loss: 1.0194166898727417,grad_norm: 0.9980615676923643, iteration: 251965
loss: 1.0123504400253296,grad_norm: 0.9265608154040926, iteration: 251966
loss: 0.9844499230384827,grad_norm: 0.9242529218833746, iteration: 251967
loss: 1.0097252130508423,grad_norm: 0.8930726416621153, iteration: 251968
loss: 1.0346271991729736,grad_norm: 0.7997161391677318, iteration: 251969
loss: 1.0018914937973022,grad_norm: 0.9447516153351602, iteration: 251970
loss: 1.0078946352005005,grad_norm: 0.8286026794654457, iteration: 251971
loss: 1.0402662754058838,grad_norm: 0.8505768611184271, iteration: 251972
loss: 1.0162684917449951,grad_norm: 0.9734898347404265, iteration: 251973
loss: 1.0184532403945923,grad_norm: 0.9245536541742077, iteration: 251974
loss: 1.021222710609436,grad_norm: 0.7183698208570047, iteration: 251975
loss: 0.9941485524177551,grad_norm: 0.8475301021592676, iteration: 251976
loss: 0.9651753306388855,grad_norm: 0.8271500681285451, iteration: 251977
loss: 1.0281816720962524,grad_norm: 0.9999990949043968, iteration: 251978
loss: 1.0101231336593628,grad_norm: 0.9999996315536432, iteration: 251979
loss: 1.0080828666687012,grad_norm: 0.9087131394995303, iteration: 251980
loss: 0.9988911747932434,grad_norm: 0.999999262021464, iteration: 251981
loss: 1.0176377296447754,grad_norm: 0.9064069561538013, iteration: 251982
loss: 1.0077065229415894,grad_norm: 0.9513059974869692, iteration: 251983
loss: 1.0061057806015015,grad_norm: 0.999998984105288, iteration: 251984
loss: 1.0298815965652466,grad_norm: 0.9279035493303677, iteration: 251985
loss: 1.0047672986984253,grad_norm: 0.7738503725525686, iteration: 251986
loss: 0.9674343466758728,grad_norm: 0.8529887729933341, iteration: 251987
loss: 0.9942119717597961,grad_norm: 0.9999990085982405, iteration: 251988
loss: 0.9764797687530518,grad_norm: 0.8124944475201104, iteration: 251989
loss: 0.9747743606567383,grad_norm: 0.9999998625111343, iteration: 251990
loss: 0.988002359867096,grad_norm: 0.7826986099877697, iteration: 251991
loss: 1.0418342351913452,grad_norm: 0.9999992311337697, iteration: 251992
loss: 0.9539308547973633,grad_norm: 0.986253789446378, iteration: 251993
loss: 1.0219825506210327,grad_norm: 0.9744107729449227, iteration: 251994
loss: 0.9990763068199158,grad_norm: 0.8644065663064859, iteration: 251995
loss: 0.9949808716773987,grad_norm: 0.9281689142163552, iteration: 251996
loss: 0.9838009476661682,grad_norm: 0.8695076815337845, iteration: 251997
loss: 0.9836074709892273,grad_norm: 0.8611013518360909, iteration: 251998
loss: 1.0073288679122925,grad_norm: 0.8664552433083024, iteration: 251999
loss: 0.9806046485900879,grad_norm: 0.852123061238789, iteration: 252000
loss: 1.0456767082214355,grad_norm: 0.9999993709868331, iteration: 252001
loss: 0.9998793601989746,grad_norm: 0.8441940942516716, iteration: 252002
loss: 0.990074098110199,grad_norm: 0.983191682166305, iteration: 252003
loss: 0.9446117877960205,grad_norm: 0.8518633903185373, iteration: 252004
loss: 0.9845014810562134,grad_norm: 0.9001106797610633, iteration: 252005
loss: 1.0004469156265259,grad_norm: 0.9999992193563271, iteration: 252006
loss: 1.0225163698196411,grad_norm: 0.999998966323329, iteration: 252007
loss: 0.9693205952644348,grad_norm: 0.9206249302799824, iteration: 252008
loss: 1.0249059200286865,grad_norm: 0.9999990290915947, iteration: 252009
loss: 1.1642351150512695,grad_norm: 0.9999995865866037, iteration: 252010
loss: 1.0464006662368774,grad_norm: 0.9999999155330207, iteration: 252011
loss: 0.9748477935791016,grad_norm: 0.9671985892658591, iteration: 252012
loss: 0.999691903591156,grad_norm: 0.8454119156924104, iteration: 252013
loss: 1.019968867301941,grad_norm: 0.7910057933462905, iteration: 252014
loss: 0.991729199886322,grad_norm: 0.999999267024802, iteration: 252015
loss: 1.0122675895690918,grad_norm: 0.9958331025545659, iteration: 252016
loss: 0.9856594204902649,grad_norm: 0.9999991487749265, iteration: 252017
loss: 0.9793457984924316,grad_norm: 0.9283697237646497, iteration: 252018
loss: 1.0116063356399536,grad_norm: 0.9067113383201891, iteration: 252019
loss: 1.0040297508239746,grad_norm: 0.9999993073236297, iteration: 252020
loss: 0.9724117517471313,grad_norm: 0.9394028527729724, iteration: 252021
loss: 0.9756618738174438,grad_norm: 0.8871380059658464, iteration: 252022
loss: 1.0377708673477173,grad_norm: 0.9999993099387651, iteration: 252023
loss: 0.9808251857757568,grad_norm: 0.9923078137398303, iteration: 252024
loss: 1.021910548210144,grad_norm: 0.979867102046549, iteration: 252025
loss: 1.0482345819473267,grad_norm: 0.9999991191860116, iteration: 252026
loss: 1.0063542127609253,grad_norm: 0.9121721127168257, iteration: 252027
loss: 1.0538767576217651,grad_norm: 0.9999991183273689, iteration: 252028
loss: 0.9763437509536743,grad_norm: 0.7765770174622384, iteration: 252029
loss: 0.9982985258102417,grad_norm: 0.9782127560549141, iteration: 252030
loss: 0.9988040924072266,grad_norm: 0.9999993273360516, iteration: 252031
loss: 0.9812554717063904,grad_norm: 0.8448901488003329, iteration: 252032
loss: 1.0339586734771729,grad_norm: 0.9637132250399568, iteration: 252033
loss: 0.9930362105369568,grad_norm: 0.8407195110875894, iteration: 252034
loss: 1.0012233257293701,grad_norm: 0.8148207841890088, iteration: 252035
loss: 0.9894457459449768,grad_norm: 0.9999990681759489, iteration: 252036
loss: 1.0057802200317383,grad_norm: 0.9999990973247683, iteration: 252037
loss: 1.0237287282943726,grad_norm: 0.8200759840366555, iteration: 252038
loss: 0.9799332618713379,grad_norm: 0.8818573135217015, iteration: 252039
loss: 1.0074079036712646,grad_norm: 0.8843841414805895, iteration: 252040
loss: 1.0130517482757568,grad_norm: 0.8519277105119268, iteration: 252041
loss: 0.9754130840301514,grad_norm: 0.8172242537908215, iteration: 252042
loss: 0.993473470211029,grad_norm: 0.9669666015229853, iteration: 252043
loss: 1.1453357934951782,grad_norm: 0.9949625396314026, iteration: 252044
loss: 0.9727111458778381,grad_norm: 0.8707651392771976, iteration: 252045
loss: 1.0259770154953003,grad_norm: 0.9999991525138705, iteration: 252046
loss: 0.9825154542922974,grad_norm: 0.999999379095368, iteration: 252047
loss: 1.0299391746520996,grad_norm: 0.9203507445504403, iteration: 252048
loss: 1.046540379524231,grad_norm: 0.9999995507703343, iteration: 252049
loss: 0.9744437336921692,grad_norm: 0.9302997745475456, iteration: 252050
loss: 0.9823762774467468,grad_norm: 0.7755688418832115, iteration: 252051
loss: 1.0121270418167114,grad_norm: 0.9181377823074729, iteration: 252052
loss: 0.9523071050643921,grad_norm: 0.9617257085034455, iteration: 252053
loss: 1.0152984857559204,grad_norm: 0.9174694277063254, iteration: 252054
loss: 0.9846030473709106,grad_norm: 0.9252426338698699, iteration: 252055
loss: 0.9800267219543457,grad_norm: 0.8679865955064228, iteration: 252056
loss: 1.0301300287246704,grad_norm: 0.9331424492696336, iteration: 252057
loss: 0.992892861366272,grad_norm: 0.9999991113772441, iteration: 252058
loss: 0.992179274559021,grad_norm: 0.8973407822920207, iteration: 252059
loss: 0.9826831221580505,grad_norm: 0.8377842897180646, iteration: 252060
loss: 1.0586466789245605,grad_norm: 0.9999995520430597, iteration: 252061
loss: 1.0335314273834229,grad_norm: 0.8476962824702536, iteration: 252062
loss: 1.0159245729446411,grad_norm: 0.6787109762551782, iteration: 252063
loss: 1.0186846256256104,grad_norm: 0.893991124175491, iteration: 252064
loss: 0.9768034815788269,grad_norm: 0.8448142446288458, iteration: 252065
loss: 1.0112996101379395,grad_norm: 0.9711022467812909, iteration: 252066
loss: 0.9817611575126648,grad_norm: 0.9999990473890138, iteration: 252067
loss: 0.9803690910339355,grad_norm: 0.7615655002397617, iteration: 252068
loss: 0.9629467129707336,grad_norm: 0.8588327784881002, iteration: 252069
loss: 1.0098488330841064,grad_norm: 0.9782623138565087, iteration: 252070
loss: 0.9810170531272888,grad_norm: 0.9999994208317411, iteration: 252071
loss: 0.9722627997398376,grad_norm: 0.741262686658104, iteration: 252072
loss: 0.9798622131347656,grad_norm: 0.88280201824004, iteration: 252073
loss: 1.006731629371643,grad_norm: 0.8451212434065389, iteration: 252074
loss: 0.9962658882141113,grad_norm: 0.9999992489798082, iteration: 252075
loss: 1.0061581134796143,grad_norm: 0.892078340570092, iteration: 252076
loss: 0.9704514145851135,grad_norm: 0.9419775631747488, iteration: 252077
loss: 0.9997576475143433,grad_norm: 0.6731120669584639, iteration: 252078
loss: 1.0051119327545166,grad_norm: 0.9999989797594593, iteration: 252079
loss: 1.0291869640350342,grad_norm: 0.906386645454882, iteration: 252080
loss: 1.0066182613372803,grad_norm: 0.9999992188582443, iteration: 252081
loss: 1.0255931615829468,grad_norm: 0.9999990316782499, iteration: 252082
loss: 0.9826039671897888,grad_norm: 0.9083535622208729, iteration: 252083
loss: 0.9538150429725647,grad_norm: 0.9177055466106164, iteration: 252084
loss: 1.0005568265914917,grad_norm: 0.9471167666326022, iteration: 252085
loss: 1.010962724685669,grad_norm: 0.8473250743901307, iteration: 252086
loss: 0.97243332862854,grad_norm: 0.7973465464865144, iteration: 252087
loss: 0.9740529656410217,grad_norm: 0.8108114659712388, iteration: 252088
loss: 1.1160622835159302,grad_norm: 0.9999990405656792, iteration: 252089
loss: 0.9760982990264893,grad_norm: 0.8921290112602496, iteration: 252090
loss: 1.012645959854126,grad_norm: 0.8334872702474018, iteration: 252091
loss: 0.9653177261352539,grad_norm: 0.8853272222292664, iteration: 252092
loss: 1.0560309886932373,grad_norm: 0.9067232432057017, iteration: 252093
loss: 1.02774178981781,grad_norm: 0.8303096588964607, iteration: 252094
loss: 1.0039894580841064,grad_norm: 0.8902307770305018, iteration: 252095
loss: 0.968537449836731,grad_norm: 0.9999990727830351, iteration: 252096
loss: 0.9684702754020691,grad_norm: 0.913513204057642, iteration: 252097
loss: 0.9535576105117798,grad_norm: 0.9999990652329386, iteration: 252098
loss: 0.9738813042640686,grad_norm: 0.9999997056812291, iteration: 252099
loss: 1.0221582651138306,grad_norm: 0.9830845616529316, iteration: 252100
loss: 0.9790005683898926,grad_norm: 0.820388119343522, iteration: 252101
loss: 1.0118513107299805,grad_norm: 0.89440001181441, iteration: 252102
loss: 1.0011383295059204,grad_norm: 0.9514631141986402, iteration: 252103
loss: 1.0072184801101685,grad_norm: 0.9999990998349146, iteration: 252104
loss: 1.056929349899292,grad_norm: 0.999999037230883, iteration: 252105
loss: 0.9987719058990479,grad_norm: 0.8389205542150482, iteration: 252106
loss: 1.03364098072052,grad_norm: 0.9999991385877364, iteration: 252107
loss: 1.0047011375427246,grad_norm: 0.999999844528075, iteration: 252108
loss: 0.9822213649749756,grad_norm: 0.9999991126956272, iteration: 252109
loss: 0.9782682061195374,grad_norm: 0.7790305868960368, iteration: 252110
loss: 1.067183494567871,grad_norm: 0.9999995285309142, iteration: 252111
loss: 0.9520413875579834,grad_norm: 0.9104651055946575, iteration: 252112
loss: 1.0141732692718506,grad_norm: 0.9652257738297918, iteration: 252113
loss: 1.0255086421966553,grad_norm: 0.9860742537636156, iteration: 252114
loss: 1.002118468284607,grad_norm: 0.9646335045820925, iteration: 252115
loss: 1.0039674043655396,grad_norm: 0.9999990885366841, iteration: 252116
loss: 1.130118727684021,grad_norm: 0.9999991804088735, iteration: 252117
loss: 1.0138566493988037,grad_norm: 0.8634851687457002, iteration: 252118
loss: 1.011789083480835,grad_norm: 0.8137591141831649, iteration: 252119
loss: 0.9981350302696228,grad_norm: 0.9999991235471897, iteration: 252120
loss: 0.9995188117027283,grad_norm: 0.9715365833665416, iteration: 252121
loss: 1.0052366256713867,grad_norm: 0.8770442028987232, iteration: 252122
loss: 0.9995802044868469,grad_norm: 0.7800381753916171, iteration: 252123
loss: 0.9877060055732727,grad_norm: 0.7173020989416994, iteration: 252124
loss: 1.0152719020843506,grad_norm: 0.9999992570830747, iteration: 252125
loss: 0.9889518618583679,grad_norm: 0.7539898563039582, iteration: 252126
loss: 0.9453260898590088,grad_norm: 0.9999992689031895, iteration: 252127
loss: 0.9528207778930664,grad_norm: 0.9072207850892243, iteration: 252128
loss: 0.98050856590271,grad_norm: 0.8972923803761239, iteration: 252129
loss: 0.9838387966156006,grad_norm: 0.8079871368062338, iteration: 252130
loss: 1.013210654258728,grad_norm: 0.9741636147717941, iteration: 252131
loss: 0.9887884855270386,grad_norm: 0.9812369007381975, iteration: 252132
loss: 0.9998474717140198,grad_norm: 0.9218128759173967, iteration: 252133
loss: 0.9945721626281738,grad_norm: 0.8529698348909933, iteration: 252134
loss: 0.9826396703720093,grad_norm: 0.8309213805536274, iteration: 252135
loss: 0.9819011092185974,grad_norm: 0.8701953841699225, iteration: 252136
loss: 0.9961791038513184,grad_norm: 0.858243938049747, iteration: 252137
loss: 1.022426724433899,grad_norm: 0.9299577931210872, iteration: 252138
loss: 0.9903154969215393,grad_norm: 0.7907632625870453, iteration: 252139
loss: 0.9907590746879578,grad_norm: 0.9421366825258776, iteration: 252140
loss: 0.9875967502593994,grad_norm: 0.8199028969714639, iteration: 252141
loss: 0.9905459880828857,grad_norm: 0.8074120480241023, iteration: 252142
loss: 1.0111085176467896,grad_norm: 0.9736197509488995, iteration: 252143
loss: 1.0046427249908447,grad_norm: 0.8267363807708473, iteration: 252144
loss: 0.990841805934906,grad_norm: 0.9999991373816409, iteration: 252145
loss: 1.003284215927124,grad_norm: 0.9237099956995571, iteration: 252146
loss: 1.0032401084899902,grad_norm: 0.8678334799190287, iteration: 252147
loss: 0.9809281826019287,grad_norm: 0.9235607798318839, iteration: 252148
loss: 0.9994294047355652,grad_norm: 0.865387919613442, iteration: 252149
loss: 1.0602048635482788,grad_norm: 0.9999992896832686, iteration: 252150
loss: 0.9984527826309204,grad_norm: 0.9884968448810656, iteration: 252151
loss: 1.0151252746582031,grad_norm: 0.9797467921465159, iteration: 252152
loss: 1.0459551811218262,grad_norm: 0.9999995412513607, iteration: 252153
loss: 1.0300729274749756,grad_norm: 0.999999008323684, iteration: 252154
loss: 1.0028785467147827,grad_norm: 0.999999059209567, iteration: 252155
loss: 1.0178091526031494,grad_norm: 0.9999992845988601, iteration: 252156
loss: 1.0108541250228882,grad_norm: 0.9999991263291595, iteration: 252157
loss: 0.956819474697113,grad_norm: 0.8050325002844252, iteration: 252158
loss: 1.0193732976913452,grad_norm: 0.8980870812369254, iteration: 252159
loss: 1.0190272331237793,grad_norm: 0.8322445174993184, iteration: 252160
loss: 1.0682960748672485,grad_norm: 0.9598880560429579, iteration: 252161
loss: 0.9982538819313049,grad_norm: 0.9257115980160612, iteration: 252162
loss: 1.001095175743103,grad_norm: 0.8244966607994487, iteration: 252163
loss: 0.9831939339637756,grad_norm: 0.9818968137842369, iteration: 252164
loss: 0.9995537996292114,grad_norm: 0.8476688334786224, iteration: 252165
loss: 0.9986364841461182,grad_norm: 0.8475370165147377, iteration: 252166
loss: 1.163962483406067,grad_norm: 0.9999990443846597, iteration: 252167
loss: 0.9734097123146057,grad_norm: 0.8888931507462371, iteration: 252168
loss: 0.9781505465507507,grad_norm: 0.8407212300148578, iteration: 252169
loss: 1.011383295059204,grad_norm: 0.9999990001748726, iteration: 252170
loss: 0.9991409182548523,grad_norm: 0.7139441556226551, iteration: 252171
loss: 1.0207685232162476,grad_norm: 0.999999094838145, iteration: 252172
loss: 1.019335150718689,grad_norm: 0.8400824471644287, iteration: 252173
loss: 1.007411241531372,grad_norm: 0.8812413445266077, iteration: 252174
loss: 1.0339443683624268,grad_norm: 0.8807416357155744, iteration: 252175
loss: 0.9608002305030823,grad_norm: 0.8979064833327086, iteration: 252176
loss: 0.9953909516334534,grad_norm: 0.9430906767602522, iteration: 252177
loss: 1.0386320352554321,grad_norm: 0.8858215798180495, iteration: 252178
loss: 1.00702702999115,grad_norm: 0.7665977198027821, iteration: 252179
loss: 1.0103929042816162,grad_norm: 0.999999019641066, iteration: 252180
loss: 0.9998998641967773,grad_norm: 0.8639102926449921, iteration: 252181
loss: 0.9980083703994751,grad_norm: 0.9289525866658551, iteration: 252182
loss: 1.0100558996200562,grad_norm: 0.8992406747237494, iteration: 252183
loss: 1.0423654317855835,grad_norm: 0.9999989680843983, iteration: 252184
loss: 1.0114061832427979,grad_norm: 0.899758927082491, iteration: 252185
loss: 0.9756348133087158,grad_norm: 0.8368564355012392, iteration: 252186
loss: 1.023463487625122,grad_norm: 0.9999997274265392, iteration: 252187
loss: 0.9784021973609924,grad_norm: 0.999998957433883, iteration: 252188
loss: 1.0114226341247559,grad_norm: 0.772268572154403, iteration: 252189
loss: 0.9866936802864075,grad_norm: 0.9696717151524726, iteration: 252190
loss: 0.9795745611190796,grad_norm: 0.9999993639985241, iteration: 252191
loss: 1.0008294582366943,grad_norm: 0.9120500713708443, iteration: 252192
loss: 0.9814300537109375,grad_norm: 0.8696005336425453, iteration: 252193
loss: 1.0049186944961548,grad_norm: 0.8756555792608546, iteration: 252194
loss: 0.9812805652618408,grad_norm: 0.974770273655783, iteration: 252195
loss: 1.0104179382324219,grad_norm: 0.9605250161273678, iteration: 252196
loss: 0.9769136309623718,grad_norm: 0.8080571837312621, iteration: 252197
loss: 0.9902938008308411,grad_norm: 0.7942712709702886, iteration: 252198
loss: 1.0326735973358154,grad_norm: 0.988843460180857, iteration: 252199
loss: 0.986480712890625,grad_norm: 0.8381119144509347, iteration: 252200
loss: 0.9752423763275146,grad_norm: 0.8256590495861095, iteration: 252201
loss: 1.015446424484253,grad_norm: 0.8678872800657847, iteration: 252202
loss: 1.0039172172546387,grad_norm: 0.9294484535688198, iteration: 252203
loss: 1.0031616687774658,grad_norm: 0.9547722039012548, iteration: 252204
loss: 0.9879568815231323,grad_norm: 0.8694150881599156, iteration: 252205
loss: 0.9893547296524048,grad_norm: 0.9215177668947115, iteration: 252206
loss: 1.0168650150299072,grad_norm: 0.9558399738522636, iteration: 252207
loss: 0.9997358918190002,grad_norm: 0.7886440538036644, iteration: 252208
loss: 1.003096342086792,grad_norm: 0.9026403408848633, iteration: 252209
loss: 1.0062843561172485,grad_norm: 0.8258423265458249, iteration: 252210
loss: 0.9842360615730286,grad_norm: 0.8293528555432049, iteration: 252211
loss: 1.0154379606246948,grad_norm: 0.9789280700741486, iteration: 252212
loss: 0.9918414950370789,grad_norm: 0.852549322958158, iteration: 252213
loss: 0.993973970413208,grad_norm: 0.7662717379091225, iteration: 252214
loss: 1.014733910560608,grad_norm: 0.8086831581118124, iteration: 252215
loss: 1.0159698724746704,grad_norm: 0.9999996471285385, iteration: 252216
loss: 1.020991563796997,grad_norm: 0.9356644925183044, iteration: 252217
loss: 1.0042613744735718,grad_norm: 0.8514745704260896, iteration: 252218
loss: 0.9823548793792725,grad_norm: 0.8757548911979081, iteration: 252219
loss: 0.9474498629570007,grad_norm: 0.8173161411853924, iteration: 252220
loss: 1.0002061128616333,grad_norm: 0.9348630000633876, iteration: 252221
loss: 0.9931486248970032,grad_norm: 0.8460198037676513, iteration: 252222
loss: 0.9913012981414795,grad_norm: 0.8032817973313994, iteration: 252223
loss: 0.9881059527397156,grad_norm: 0.7960760356355215, iteration: 252224
loss: 1.0686007738113403,grad_norm: 1.0000000046587283, iteration: 252225
loss: 0.9913380742073059,grad_norm: 0.8226643673293134, iteration: 252226
loss: 1.0805575847625732,grad_norm: 0.9999992413236639, iteration: 252227
loss: 0.9657118320465088,grad_norm: 0.9999996631449418, iteration: 252228
loss: 1.1107038259506226,grad_norm: 0.9999989258306671, iteration: 252229
loss: 1.0083916187286377,grad_norm: 0.9002768438572109, iteration: 252230
loss: 0.9901384711265564,grad_norm: 0.7044957636518129, iteration: 252231
loss: 0.9819034337997437,grad_norm: 0.9708361145473958, iteration: 252232
loss: 1.1200510263442993,grad_norm: 0.9999992341398908, iteration: 252233
loss: 0.9446509480476379,grad_norm: 0.7675613045851643, iteration: 252234
loss: 1.001031756401062,grad_norm: 0.9971622860291736, iteration: 252235
loss: 0.9593964219093323,grad_norm: 0.9251641235770505, iteration: 252236
loss: 0.9897075295448303,grad_norm: 0.8656923719108838, iteration: 252237
loss: 1.0131322145462036,grad_norm: 0.9023172824614433, iteration: 252238
loss: 1.0003553628921509,grad_norm: 0.7890717189998019, iteration: 252239
loss: 1.0121203660964966,grad_norm: 0.8765110314120229, iteration: 252240
loss: 0.9869653582572937,grad_norm: 0.9151733695627342, iteration: 252241
loss: 1.0118921995162964,grad_norm: 0.8895591677049134, iteration: 252242
loss: 1.0199633836746216,grad_norm: 0.7587836461289678, iteration: 252243
loss: 1.0190858840942383,grad_norm: 0.8764519222301049, iteration: 252244
loss: 1.0133386850357056,grad_norm: 0.8560851868005278, iteration: 252245
loss: 1.0679672956466675,grad_norm: 0.9484574118695754, iteration: 252246
loss: 1.0034255981445312,grad_norm: 0.9004778136314816, iteration: 252247
loss: 0.9891030788421631,grad_norm: 0.999998997574126, iteration: 252248
loss: 0.9580716490745544,grad_norm: 0.8657447011794204, iteration: 252249
loss: 0.9611948132514954,grad_norm: 0.9999990074201731, iteration: 252250
loss: 1.0332838296890259,grad_norm: 0.921094931206114, iteration: 252251
loss: 1.012657880783081,grad_norm: 0.9999990740434871, iteration: 252252
loss: 1.003305435180664,grad_norm: 0.8656750567896748, iteration: 252253
loss: 1.0240281820297241,grad_norm: 0.9999993249842375, iteration: 252254
loss: 1.0243754386901855,grad_norm: 0.9999995919476461, iteration: 252255
loss: 0.9995062947273254,grad_norm: 0.9303532579015549, iteration: 252256
loss: 1.0301815271377563,grad_norm: 0.7900804570410622, iteration: 252257
loss: 1.0037932395935059,grad_norm: 0.7372586696431317, iteration: 252258
loss: 1.0190702676773071,grad_norm: 0.9136612026949478, iteration: 252259
loss: 0.9715640544891357,grad_norm: 0.8626818013360542, iteration: 252260
loss: 1.0483694076538086,grad_norm: 0.9845580311873937, iteration: 252261
loss: 0.9855198264122009,grad_norm: 0.7937190887333667, iteration: 252262
loss: 1.008508563041687,grad_norm: 0.9999991157789695, iteration: 252263
loss: 0.960296630859375,grad_norm: 0.9999990098952916, iteration: 252264
loss: 0.9738634824752808,grad_norm: 0.9999990550494187, iteration: 252265
loss: 1.0301955938339233,grad_norm: 0.9757771500266599, iteration: 252266
loss: 1.0256774425506592,grad_norm: 0.8377673017728574, iteration: 252267
loss: 1.125196933746338,grad_norm: 0.9999991971412169, iteration: 252268
loss: 1.0531480312347412,grad_norm: 1.0000000590842693, iteration: 252269
loss: 1.0820379257202148,grad_norm: 0.9937134539267689, iteration: 252270
loss: 1.0274215936660767,grad_norm: 0.9999990227198478, iteration: 252271
loss: 1.0037871599197388,grad_norm: 0.9999992324785331, iteration: 252272
loss: 0.9693403244018555,grad_norm: 0.9481321977388716, iteration: 252273
loss: 1.001456618309021,grad_norm: 0.7754983126063653, iteration: 252274
loss: 1.1006863117218018,grad_norm: 0.9999991433994565, iteration: 252275
loss: 1.0302939414978027,grad_norm: 0.9999990467688681, iteration: 252276
loss: 1.0159964561462402,grad_norm: 0.8344739492819131, iteration: 252277
loss: 1.0212578773498535,grad_norm: 0.8340265375619706, iteration: 252278
loss: 0.9874798059463501,grad_norm: 0.8192926283765267, iteration: 252279
loss: 1.0208886861801147,grad_norm: 0.9999998373218453, iteration: 252280
loss: 1.0387520790100098,grad_norm: 0.8034996400429393, iteration: 252281
loss: 1.0205931663513184,grad_norm: 0.8087477347880019, iteration: 252282
loss: 0.9914757609367371,grad_norm: 0.9999993264184875, iteration: 252283
loss: 0.997485876083374,grad_norm: 0.9999990491240504, iteration: 252284
loss: 1.0180809497833252,grad_norm: 0.9382194375494197, iteration: 252285
loss: 1.0562446117401123,grad_norm: 0.9689203131984792, iteration: 252286
loss: 0.9951526522636414,grad_norm: 0.7745694753033282, iteration: 252287
loss: 1.0133328437805176,grad_norm: 0.955646601998739, iteration: 252288
loss: 1.0654202699661255,grad_norm: 0.872367638170806, iteration: 252289
loss: 1.0113486051559448,grad_norm: 0.8872203607371444, iteration: 252290
loss: 1.0236539840698242,grad_norm: 0.874062696371961, iteration: 252291
loss: 0.9869171380996704,grad_norm: 0.9724659452081502, iteration: 252292
loss: 0.9999973177909851,grad_norm: 0.9439727981917532, iteration: 252293
loss: 1.0076004266738892,grad_norm: 0.9999990111280487, iteration: 252294
loss: 1.0087542533874512,grad_norm: 0.8828309040926663, iteration: 252295
loss: 1.0019094944000244,grad_norm: 0.9611584739472918, iteration: 252296
loss: 1.0019493103027344,grad_norm: 0.9703753167655988, iteration: 252297
loss: 1.0319902896881104,grad_norm: 0.810852718742953, iteration: 252298
loss: 1.0111865997314453,grad_norm: 0.9999990798313252, iteration: 252299
loss: 1.0081019401550293,grad_norm: 0.9116771433666782, iteration: 252300
loss: 1.007009506225586,grad_norm: 0.800515270144591, iteration: 252301
loss: 1.0199034214019775,grad_norm: 0.9237863433038569, iteration: 252302
loss: 0.9869898557662964,grad_norm: 0.8980841709943567, iteration: 252303
loss: 0.991100549697876,grad_norm: 0.999999227199549, iteration: 252304
loss: 0.9738447070121765,grad_norm: 0.846129673421628, iteration: 252305
loss: 0.9798983335494995,grad_norm: 0.8213815970857825, iteration: 252306
loss: 1.1672567129135132,grad_norm: 1.0000000037991201, iteration: 252307
loss: 0.9942803978919983,grad_norm: 0.8352879696813127, iteration: 252308
loss: 0.9654272198677063,grad_norm: 0.8995016797970343, iteration: 252309
loss: 0.9988694190979004,grad_norm: 0.8665931461257305, iteration: 252310
loss: 1.029786467552185,grad_norm: 0.7826835247841566, iteration: 252311
loss: 1.0392370223999023,grad_norm: 0.9999990038944575, iteration: 252312
loss: 0.9662699699401855,grad_norm: 0.8778790510333686, iteration: 252313
loss: 0.9864873290061951,grad_norm: 0.831864629022445, iteration: 252314
loss: 0.9802747368812561,grad_norm: 0.8791758514789978, iteration: 252315
loss: 1.0101923942565918,grad_norm: 0.9999993940901782, iteration: 252316
loss: 0.9866723418235779,grad_norm: 0.9498063350905601, iteration: 252317
loss: 0.99566650390625,grad_norm: 0.937956440885019, iteration: 252318
loss: 0.9758111834526062,grad_norm: 0.9277261959613545, iteration: 252319
loss: 0.9834708571434021,grad_norm: 0.8719779810135505, iteration: 252320
loss: 0.9947521686553955,grad_norm: 0.8468499846049996, iteration: 252321
loss: 0.9891418218612671,grad_norm: 0.8670444327932557, iteration: 252322
loss: 1.0054864883422852,grad_norm: 0.9590198067431536, iteration: 252323
loss: 0.988028883934021,grad_norm: 0.7403457167197378, iteration: 252324
loss: 1.0125396251678467,grad_norm: 0.9604035973531341, iteration: 252325
loss: 0.9974422454833984,grad_norm: 0.982835623863574, iteration: 252326
loss: 1.103227138519287,grad_norm: 0.9999992043030829, iteration: 252327
loss: 0.9923315048217773,grad_norm: 0.8368395151487367, iteration: 252328
loss: 0.9926570653915405,grad_norm: 0.9999991095179964, iteration: 252329
loss: 0.9812822341918945,grad_norm: 0.9083566832704965, iteration: 252330
loss: 0.9822113513946533,grad_norm: 0.9999990813725257, iteration: 252331
loss: 1.0040148496627808,grad_norm: 0.8369932194873425, iteration: 252332
loss: 0.976298451423645,grad_norm: 0.8257285290364534, iteration: 252333
loss: 0.982753336429596,grad_norm: 0.9023774506514387, iteration: 252334
loss: 0.979327917098999,grad_norm: 0.8382820107562926, iteration: 252335
loss: 1.0142204761505127,grad_norm: 0.9999990647394124, iteration: 252336
loss: 1.0236389636993408,grad_norm: 0.8320127263649266, iteration: 252337
loss: 0.980520486831665,grad_norm: 0.9999991429908079, iteration: 252338
loss: 0.9632014036178589,grad_norm: 0.9864569135159311, iteration: 252339
loss: 1.0170679092407227,grad_norm: 0.999999626709725, iteration: 252340
loss: 1.0185257196426392,grad_norm: 0.9999992449702788, iteration: 252341
loss: 1.0356166362762451,grad_norm: 0.9999993657428843, iteration: 252342
loss: 0.939293622970581,grad_norm: 0.9999991460596835, iteration: 252343
loss: 1.0548408031463623,grad_norm: 0.9999995974708098, iteration: 252344
loss: 0.9971637725830078,grad_norm: 0.9999990824622806, iteration: 252345
loss: 1.0185389518737793,grad_norm: 0.9999990884959324, iteration: 252346
loss: 1.0349771976470947,grad_norm: 0.9999996923595892, iteration: 252347
loss: 1.0007846355438232,grad_norm: 0.9544794377834551, iteration: 252348
loss: 0.9877784848213196,grad_norm: 0.7776041237594565, iteration: 252349
loss: 0.9942176938056946,grad_norm: 0.8302816048395211, iteration: 252350
loss: 0.9849603176116943,grad_norm: 0.9087331376188282, iteration: 252351
loss: 1.018386960029602,grad_norm: 0.8289751987185956, iteration: 252352
loss: 0.9688655734062195,grad_norm: 0.9090034068392698, iteration: 252353
loss: 1.0363247394561768,grad_norm: 0.9319271720946704, iteration: 252354
loss: 0.9722453355789185,grad_norm: 0.9335257442423662, iteration: 252355
loss: 1.0425505638122559,grad_norm: 0.9955198441138579, iteration: 252356
loss: 1.0089404582977295,grad_norm: 0.9221072048301154, iteration: 252357
loss: 0.9861749410629272,grad_norm: 0.8267919597023851, iteration: 252358
loss: 1.029492974281311,grad_norm: 0.8014110463196313, iteration: 252359
loss: 1.0396496057510376,grad_norm: 0.8991733061488381, iteration: 252360
loss: 1.0145421028137207,grad_norm: 1.0000000329389802, iteration: 252361
loss: 0.9563747644424438,grad_norm: 0.8822860658802383, iteration: 252362
loss: 0.9798448085784912,grad_norm: 0.8962761665201406, iteration: 252363
loss: 1.043930172920227,grad_norm: 0.9999997445200091, iteration: 252364
loss: 1.023376226425171,grad_norm: 0.9054331086431304, iteration: 252365
loss: 0.9901162385940552,grad_norm: 0.8473317041501779, iteration: 252366
loss: 0.9792662262916565,grad_norm: 0.7089009620630542, iteration: 252367
loss: 1.0183385610580444,grad_norm: 0.9999991098640589, iteration: 252368
loss: 1.0025559663772583,grad_norm: 0.8328161835310834, iteration: 252369
loss: 0.9849467873573303,grad_norm: 0.9835198595912775, iteration: 252370
loss: 0.9853957295417786,grad_norm: 0.8491409992299379, iteration: 252371
loss: 0.9789273738861084,grad_norm: 0.8858210707880299, iteration: 252372
loss: 1.0010716915130615,grad_norm: 0.99999926007649, iteration: 252373
loss: 0.9793579578399658,grad_norm: 0.9824973258744155, iteration: 252374
loss: 1.023040533065796,grad_norm: 0.9922495851795741, iteration: 252375
loss: 0.9899695515632629,grad_norm: 0.8836680491987033, iteration: 252376
loss: 1.009445071220398,grad_norm: 0.8697634502998323, iteration: 252377
loss: 0.955953061580658,grad_norm: 0.8147030521631068, iteration: 252378
loss: 0.9642040729522705,grad_norm: 0.9271056198040982, iteration: 252379
loss: 1.0197745561599731,grad_norm: 0.976639325252872, iteration: 252380
loss: 1.0142650604248047,grad_norm: 0.7375664914473797, iteration: 252381
loss: 1.0176692008972168,grad_norm: 0.8092793810966533, iteration: 252382
loss: 0.9649840593338013,grad_norm: 0.9999989026126223, iteration: 252383
loss: 0.9614253044128418,grad_norm: 0.9262510272194144, iteration: 252384
loss: 0.9496637582778931,grad_norm: 0.9611343527118091, iteration: 252385
loss: 1.01776123046875,grad_norm: 0.8003048097308803, iteration: 252386
loss: 1.0305918455123901,grad_norm: 0.8616809628904515, iteration: 252387
loss: 1.0154286623001099,grad_norm: 0.8288017536068324, iteration: 252388
loss: 0.9943015575408936,grad_norm: 0.8633023324496694, iteration: 252389
loss: 1.0038995742797852,grad_norm: 0.9201729665126328, iteration: 252390
loss: 1.0738517045974731,grad_norm: 0.9999997816919153, iteration: 252391
loss: 1.0110912322998047,grad_norm: 0.746925410669558, iteration: 252392
loss: 0.9683337211608887,grad_norm: 0.9999991229753677, iteration: 252393
loss: 0.9869986772537231,grad_norm: 0.9627898005254968, iteration: 252394
loss: 0.9611729979515076,grad_norm: 0.99999901354426, iteration: 252395
loss: 1.0101672410964966,grad_norm: 0.8966156355270033, iteration: 252396
loss: 0.9916473627090454,grad_norm: 0.7873655732334984, iteration: 252397
loss: 0.9961955547332764,grad_norm: 0.7730502761127312, iteration: 252398
loss: 1.0107793807983398,grad_norm: 0.8746778555901995, iteration: 252399
loss: 0.9957392811775208,grad_norm: 0.9378488558995333, iteration: 252400
loss: 1.0538033246994019,grad_norm: 0.9999998026190483, iteration: 252401
loss: 0.9885360598564148,grad_norm: 0.9999991290065169, iteration: 252402
loss: 1.0032603740692139,grad_norm: 0.8527891030699539, iteration: 252403
loss: 1.0115818977355957,grad_norm: 0.9244255418706112, iteration: 252404
loss: 1.0553613901138306,grad_norm: 0.9999990151620687, iteration: 252405
loss: 1.019229531288147,grad_norm: 0.8638525881590238, iteration: 252406
loss: 1.0406643152236938,grad_norm: 0.8957213615616838, iteration: 252407
loss: 1.0201787948608398,grad_norm: 0.7829956714596368, iteration: 252408
loss: 0.9856781363487244,grad_norm: 0.9223630146340706, iteration: 252409
loss: 0.9949001669883728,grad_norm: 0.9707719300884079, iteration: 252410
loss: 0.9926341772079468,grad_norm: 0.9999988640024049, iteration: 252411
loss: 1.0121405124664307,grad_norm: 0.9999990740692911, iteration: 252412
loss: 0.9784836173057556,grad_norm: 0.8222740773917852, iteration: 252413
loss: 1.0134197473526,grad_norm: 0.8010830941339366, iteration: 252414
loss: 0.9858601093292236,grad_norm: 0.9999990296200756, iteration: 252415
loss: 0.9389269351959229,grad_norm: 0.9483102419591187, iteration: 252416
loss: 1.0165940523147583,grad_norm: 0.9192113167979008, iteration: 252417
loss: 1.0258084535598755,grad_norm: 0.9999990520874659, iteration: 252418
loss: 1.0028858184814453,grad_norm: 0.8954750721164484, iteration: 252419
loss: 0.9833739399909973,grad_norm: 0.937969055533725, iteration: 252420
loss: 1.0219087600708008,grad_norm: 0.9326630367462004, iteration: 252421
loss: 0.9724191427230835,grad_norm: 0.9999990409464732, iteration: 252422
loss: 0.9477863907814026,grad_norm: 0.9491296756736004, iteration: 252423
loss: 1.0043089389801025,grad_norm: 0.9999990502262902, iteration: 252424
loss: 0.9954696893692017,grad_norm: 0.9999989193809952, iteration: 252425
loss: 0.9910967946052551,grad_norm: 0.8063293773240374, iteration: 252426
loss: 0.9836224913597107,grad_norm: 0.9023808915637909, iteration: 252427
loss: 0.979621171951294,grad_norm: 0.948772402599594, iteration: 252428
loss: 0.9986225366592407,grad_norm: 0.9074589609763246, iteration: 252429
loss: 1.0615253448486328,grad_norm: 0.9871549055265879, iteration: 252430
loss: 1.0137650966644287,grad_norm: 0.7305304222229924, iteration: 252431
loss: 1.0192468166351318,grad_norm: 0.9999998786408772, iteration: 252432
loss: 1.0053106546401978,grad_norm: 0.9999989755189933, iteration: 252433
loss: 1.0024553537368774,grad_norm: 0.7990742882908646, iteration: 252434
loss: 0.9848418235778809,grad_norm: 0.9821581049893309, iteration: 252435
loss: 0.9582953453063965,grad_norm: 0.9651753184260367, iteration: 252436
loss: 1.0131131410598755,grad_norm: 0.9189676587393939, iteration: 252437
loss: 1.0004584789276123,grad_norm: 0.9999989896012758, iteration: 252438
loss: 1.0005022287368774,grad_norm: 0.8014121208731905, iteration: 252439
loss: 1.0001659393310547,grad_norm: 0.9483874774778963, iteration: 252440
loss: 0.9816828370094299,grad_norm: 0.7024331540349362, iteration: 252441
loss: 1.0100655555725098,grad_norm: 0.8365438735608418, iteration: 252442
loss: 1.0135408639907837,grad_norm: 0.9999992130368303, iteration: 252443
loss: 1.0147335529327393,grad_norm: 0.8661837225168832, iteration: 252444
loss: 0.9899954199790955,grad_norm: 0.854186012517296, iteration: 252445
loss: 0.958643913269043,grad_norm: 0.8230734104837667, iteration: 252446
loss: 1.0728845596313477,grad_norm: 0.9999994221064414, iteration: 252447
loss: 0.9730073809623718,grad_norm: 0.8519634833028819, iteration: 252448
loss: 1.0436121225357056,grad_norm: 0.9999997811648373, iteration: 252449
loss: 0.9805772304534912,grad_norm: 0.8318625195862135, iteration: 252450
loss: 1.0192145109176636,grad_norm: 0.9367627285373858, iteration: 252451
loss: 0.9903442859649658,grad_norm: 0.8725150694089744, iteration: 252452
loss: 1.011374831199646,grad_norm: 0.8950450285065196, iteration: 252453
loss: 0.9716052412986755,grad_norm: 0.9515212120777454, iteration: 252454
loss: 1.0202648639678955,grad_norm: 0.9999994273956659, iteration: 252455
loss: 0.9583425521850586,grad_norm: 0.8877384720395517, iteration: 252456
loss: 1.0234060287475586,grad_norm: 0.9999993789439626, iteration: 252457
loss: 1.00920569896698,grad_norm: 0.9292702434559034, iteration: 252458
loss: 1.104557752609253,grad_norm: 0.9999999200595374, iteration: 252459
loss: 1.0590014457702637,grad_norm: 0.999999136551347, iteration: 252460
loss: 0.9832715392112732,grad_norm: 0.8965562511914597, iteration: 252461
loss: 0.9957357048988342,grad_norm: 0.925494486458217, iteration: 252462
loss: 0.9805853366851807,grad_norm: 0.8126192071764925, iteration: 252463
loss: 1.0148913860321045,grad_norm: 0.9184831421000326, iteration: 252464
loss: 0.9670675992965698,grad_norm: 0.9393495850457019, iteration: 252465
loss: 1.0019347667694092,grad_norm: 0.9659901134934755, iteration: 252466
loss: 1.0224156379699707,grad_norm: 0.9999990353578452, iteration: 252467
loss: 0.9792589545249939,grad_norm: 0.9999989869886654, iteration: 252468
loss: 1.0009881258010864,grad_norm: 0.7224594983620376, iteration: 252469
loss: 0.9672582745552063,grad_norm: 0.9018577880951686, iteration: 252470
loss: 1.0050296783447266,grad_norm: 0.8134085641010956, iteration: 252471
loss: 0.9705773591995239,grad_norm: 0.851726527183406, iteration: 252472
loss: 1.042716145515442,grad_norm: 0.82835120860498, iteration: 252473
loss: 1.0228112936019897,grad_norm: 0.7523411671001364, iteration: 252474
loss: 0.981041431427002,grad_norm: 0.9389649211628414, iteration: 252475
loss: 0.990891695022583,grad_norm: 0.8338783525788437, iteration: 252476
loss: 0.9919789433479309,grad_norm: 0.9999990779316011, iteration: 252477
loss: 1.0273125171661377,grad_norm: 0.9977513835619668, iteration: 252478
loss: 1.078334093093872,grad_norm: 0.9999994597128118, iteration: 252479
loss: 1.0059558153152466,grad_norm: 0.9699336321617796, iteration: 252480
loss: 1.0181026458740234,grad_norm: 0.9277997103923386, iteration: 252481
loss: 1.0075219869613647,grad_norm: 0.7834164508512361, iteration: 252482
loss: 1.0599124431610107,grad_norm: 0.9999995634589625, iteration: 252483
loss: 0.9745299816131592,grad_norm: 0.999999459862487, iteration: 252484
loss: 0.9911043643951416,grad_norm: 0.7403253692747714, iteration: 252485
loss: 1.0616759061813354,grad_norm: 0.999999707850162, iteration: 252486
loss: 1.005001425743103,grad_norm: 0.8233519236801173, iteration: 252487
loss: 1.0196105241775513,grad_norm: 0.9999991081281394, iteration: 252488
loss: 1.0195528268814087,grad_norm: 0.8420036457891069, iteration: 252489
loss: 1.018183708190918,grad_norm: 0.9999991790774211, iteration: 252490
loss: 1.0054876804351807,grad_norm: 0.9999991842064083, iteration: 252491
loss: 0.9884299635887146,grad_norm: 0.9999990100972261, iteration: 252492
loss: 1.0036113262176514,grad_norm: 0.9683962358494321, iteration: 252493
loss: 0.9808064699172974,grad_norm: 0.9111061641995001, iteration: 252494
loss: 1.0042906999588013,grad_norm: 0.8424685434055235, iteration: 252495
loss: 0.9767040014266968,grad_norm: 0.8398580925497009, iteration: 252496
loss: 0.9901717901229858,grad_norm: 0.9999990789906095, iteration: 252497
loss: 0.9843041300773621,grad_norm: 0.8870697599443814, iteration: 252498
loss: 1.0046091079711914,grad_norm: 0.9190259123407504, iteration: 252499
loss: 1.0096999406814575,grad_norm: 0.8422152212330937, iteration: 252500
loss: 0.9954808354377747,grad_norm: 0.9281233181526237, iteration: 252501
loss: 1.0287657976150513,grad_norm: 0.9083723975034889, iteration: 252502
loss: 0.9648791551589966,grad_norm: 0.9055422852513206, iteration: 252503
loss: 0.9778836369514465,grad_norm: 0.9999991057376132, iteration: 252504
loss: 0.9804351329803467,grad_norm: 0.8926634155809925, iteration: 252505
loss: 0.9799327850341797,grad_norm: 0.9399164441879739, iteration: 252506
loss: 1.00542414188385,grad_norm: 0.8212013752869632, iteration: 252507
loss: 0.9946386814117432,grad_norm: 0.8981311060512285, iteration: 252508
loss: 0.9786328673362732,grad_norm: 0.8133147649719427, iteration: 252509
loss: 1.016922116279602,grad_norm: 0.8013722531245289, iteration: 252510
loss: 1.0128657817840576,grad_norm: 0.8694452828453741, iteration: 252511
loss: 1.0128328800201416,grad_norm: 0.9530288382944602, iteration: 252512
loss: 0.9538782238960266,grad_norm: 0.754876568311745, iteration: 252513
loss: 0.9959653615951538,grad_norm: 0.7382604854289903, iteration: 252514
loss: 1.0481928586959839,grad_norm: 0.9550486571949006, iteration: 252515
loss: 1.0437475442886353,grad_norm: 0.8049488718096116, iteration: 252516
loss: 0.9916309118270874,grad_norm: 0.8242206589591812, iteration: 252517
loss: 0.9823102951049805,grad_norm: 0.8092710153204687, iteration: 252518
loss: 0.9590522646903992,grad_norm: 0.9109948149033233, iteration: 252519
loss: 1.023270845413208,grad_norm: 0.9788912573504353, iteration: 252520
loss: 0.9719454646110535,grad_norm: 0.9874992921505769, iteration: 252521
loss: 0.9978052973747253,grad_norm: 0.888510584500395, iteration: 252522
loss: 0.9513226747512817,grad_norm: 0.7709087623966305, iteration: 252523
loss: 1.016371726989746,grad_norm: 0.893910223057985, iteration: 252524
loss: 0.9756723642349243,grad_norm: 0.951948330655799, iteration: 252525
loss: 1.0302155017852783,grad_norm: 0.9999991105647865, iteration: 252526
loss: 1.0092265605926514,grad_norm: 0.9444592257720044, iteration: 252527
loss: 1.018049955368042,grad_norm: 0.8489184009597368, iteration: 252528
loss: 0.9783918857574463,grad_norm: 0.9254366694738183, iteration: 252529
loss: 1.0578281879425049,grad_norm: 0.9999996008792993, iteration: 252530
loss: 0.9923114776611328,grad_norm: 0.8877814333521403, iteration: 252531
loss: 0.9830729365348816,grad_norm: 0.8926218463160668, iteration: 252532
loss: 1.0237281322479248,grad_norm: 0.9999992797312728, iteration: 252533
loss: 1.0056941509246826,grad_norm: 0.7502965451010574, iteration: 252534
loss: 0.9848453402519226,grad_norm: 0.8088723937174446, iteration: 252535
loss: 0.9923146367073059,grad_norm: 0.9742424445375332, iteration: 252536
loss: 1.0005311965942383,grad_norm: 0.8827936813851818, iteration: 252537
loss: 1.0450990200042725,grad_norm: 0.8494644934636463, iteration: 252538
loss: 1.0159374475479126,grad_norm: 0.9908572246161818, iteration: 252539
loss: 0.9932781457901001,grad_norm: 0.9020094375142629, iteration: 252540
loss: 1.1798653602600098,grad_norm: 0.9999999078303503, iteration: 252541
loss: 1.0240997076034546,grad_norm: 0.9729733877562534, iteration: 252542
loss: 1.0214112997055054,grad_norm: 0.9659408177256853, iteration: 252543
loss: 0.9958937168121338,grad_norm: 0.8770724884221475, iteration: 252544
loss: 0.9995701909065247,grad_norm: 0.811257671348657, iteration: 252545
loss: 0.9919965863227844,grad_norm: 0.912164216673927, iteration: 252546
loss: 1.008430004119873,grad_norm: 0.949897298454047, iteration: 252547
loss: 0.9945414066314697,grad_norm: 0.9220362795166328, iteration: 252548
loss: 1.013165831565857,grad_norm: 0.7796684414392843, iteration: 252549
loss: 1.0219887495040894,grad_norm: 0.8764015177118752, iteration: 252550
loss: 0.9875713586807251,grad_norm: 0.8388020586092534, iteration: 252551
loss: 0.9880546927452087,grad_norm: 0.8045201912625195, iteration: 252552
loss: 0.9489665031433105,grad_norm: 0.9417902708457492, iteration: 252553
loss: 0.9870506525039673,grad_norm: 0.9284369638934781, iteration: 252554
loss: 1.0121777057647705,grad_norm: 0.9999992039006661, iteration: 252555
loss: 1.0154552459716797,grad_norm: 0.8339326050828263, iteration: 252556
loss: 0.9958375692367554,grad_norm: 0.8928554826825417, iteration: 252557
loss: 0.9944697618484497,grad_norm: 0.7441059847885467, iteration: 252558
loss: 1.0036474466323853,grad_norm: 0.7830681933255121, iteration: 252559
loss: 0.9999003410339355,grad_norm: 0.8738514316416565, iteration: 252560
loss: 0.9831950068473816,grad_norm: 0.9613458368719964, iteration: 252561
loss: 1.0031601190567017,grad_norm: 0.8500052709998488, iteration: 252562
loss: 0.9969388842582703,grad_norm: 0.8392882019462417, iteration: 252563
loss: 0.9961459040641785,grad_norm: 0.7796472172086356, iteration: 252564
loss: 1.0261352062225342,grad_norm: 0.9999996087702361, iteration: 252565
loss: 0.9813727140426636,grad_norm: 0.957708934238558, iteration: 252566
loss: 0.9970620274543762,grad_norm: 0.9345210966692329, iteration: 252567
loss: 1.0203860998153687,grad_norm: 0.841041386666017, iteration: 252568
loss: 0.9708161354064941,grad_norm: 0.8377433171286437, iteration: 252569
loss: 0.9974109530448914,grad_norm: 0.9130760163733763, iteration: 252570
loss: 0.9817617535591125,grad_norm: 0.7092145454895282, iteration: 252571
loss: 0.9754273891448975,grad_norm: 0.8267689031514132, iteration: 252572
loss: 0.9985218644142151,grad_norm: 0.9665870675345059, iteration: 252573
loss: 0.9901106357574463,grad_norm: 0.8580850078872049, iteration: 252574
loss: 0.9961220622062683,grad_norm: 0.880912615321107, iteration: 252575
loss: 0.9772630333900452,grad_norm: 0.941860328061901, iteration: 252576
loss: 0.9954917430877686,grad_norm: 0.9999991132676951, iteration: 252577
loss: 0.974998950958252,grad_norm: 0.9929156966861981, iteration: 252578
loss: 1.0170522928237915,grad_norm: 0.8250790369143294, iteration: 252579
loss: 1.0188360214233398,grad_norm: 0.7742148198128365, iteration: 252580
loss: 0.9448615312576294,grad_norm: 0.8315258151135528, iteration: 252581
loss: 1.0074329376220703,grad_norm: 0.8154741949602186, iteration: 252582
loss: 0.9951404333114624,grad_norm: 0.9999998176708594, iteration: 252583
loss: 0.950655996799469,grad_norm: 0.6999512620678024, iteration: 252584
loss: 1.016174554824829,grad_norm: 0.8744806152075082, iteration: 252585
loss: 0.9910517334938049,grad_norm: 0.8891125477602553, iteration: 252586
loss: 0.9664265513420105,grad_norm: 0.8590942788429783, iteration: 252587
loss: 0.9705245494842529,grad_norm: 0.7922111909770184, iteration: 252588
loss: 1.0300633907318115,grad_norm: 0.9999999179419867, iteration: 252589
loss: 0.976513147354126,grad_norm: 0.938500688825161, iteration: 252590
loss: 0.9711896181106567,grad_norm: 0.8760246434382826, iteration: 252591
loss: 1.0044587850570679,grad_norm: 0.9258693969532523, iteration: 252592
loss: 1.0393213033676147,grad_norm: 0.911415211592293, iteration: 252593
loss: 1.0185567140579224,grad_norm: 0.8957551955452671, iteration: 252594
loss: 0.977690577507019,grad_norm: 0.8733199449215506, iteration: 252595
loss: 0.9778581857681274,grad_norm: 0.844432473024689, iteration: 252596
loss: 0.993724524974823,grad_norm: 0.9349762359172309, iteration: 252597
loss: 0.990358293056488,grad_norm: 0.8170625513782666, iteration: 252598
loss: 1.0203969478607178,grad_norm: 0.9999989971468005, iteration: 252599
loss: 0.9603734612464905,grad_norm: 0.817732217645075, iteration: 252600
loss: 1.003922462463379,grad_norm: 0.8582629481215964, iteration: 252601
loss: 1.0030734539031982,grad_norm: 0.8410180727916599, iteration: 252602
loss: 1.0448603630065918,grad_norm: 0.8696776122739438, iteration: 252603
loss: 1.027089238166809,grad_norm: 0.9266532091196337, iteration: 252604
loss: 1.0476717948913574,grad_norm: 0.9999990374119964, iteration: 252605
loss: 0.9747198820114136,grad_norm: 0.8635490686351649, iteration: 252606
loss: 0.9909939169883728,grad_norm: 0.936425521297283, iteration: 252607
loss: 1.0218133926391602,grad_norm: 0.999999016458765, iteration: 252608
loss: 1.0516934394836426,grad_norm: 0.9885903423489087, iteration: 252609
loss: 0.9982065558433533,grad_norm: 0.9598010399571082, iteration: 252610
loss: 1.0556074380874634,grad_norm: 0.9999991023032564, iteration: 252611
loss: 0.9724472761154175,grad_norm: 0.9461440080451787, iteration: 252612
loss: 1.0381847620010376,grad_norm: 0.9309605809504788, iteration: 252613
loss: 0.9508135318756104,grad_norm: 0.9044331387497085, iteration: 252614
loss: 1.0216256380081177,grad_norm: 0.9034709013299473, iteration: 252615
loss: 1.031111717224121,grad_norm: 0.9999991589169781, iteration: 252616
loss: 0.9949097633361816,grad_norm: 0.8822712435677892, iteration: 252617
loss: 1.0362279415130615,grad_norm: 0.9999991284897102, iteration: 252618
loss: 0.9931525588035583,grad_norm: 0.829812484815428, iteration: 252619
loss: 1.0336978435516357,grad_norm: 0.7442794040475216, iteration: 252620
loss: 0.9942777752876282,grad_norm: 0.8002343973387204, iteration: 252621
loss: 1.0045418739318848,grad_norm: 0.9344307508046951, iteration: 252622
loss: 1.0107591152191162,grad_norm: 0.9999991173694033, iteration: 252623
loss: 1.0081006288528442,grad_norm: 0.8646812965181484, iteration: 252624
loss: 1.0196832418441772,grad_norm: 0.999999160354158, iteration: 252625
loss: 1.0130457878112793,grad_norm: 0.9834514943413185, iteration: 252626
loss: 1.012660264968872,grad_norm: 0.9595079583022836, iteration: 252627
loss: 0.9892035126686096,grad_norm: 0.8306312887450071, iteration: 252628
loss: 1.0280529260635376,grad_norm: 0.9946987226914953, iteration: 252629
loss: 1.025493860244751,grad_norm: 0.862824184635395, iteration: 252630
loss: 0.9939499497413635,grad_norm: 0.9999990385813013, iteration: 252631
loss: 1.0315237045288086,grad_norm: 0.9999995082398676, iteration: 252632
loss: 1.0373491048812866,grad_norm: 0.959316479804855, iteration: 252633
loss: 1.00102961063385,grad_norm: 0.9999993387060305, iteration: 252634
loss: 1.0085862874984741,grad_norm: 0.7194986237446361, iteration: 252635
loss: 0.9774104952812195,grad_norm: 0.8396160857014711, iteration: 252636
loss: 0.9825608134269714,grad_norm: 0.9999992516431981, iteration: 252637
loss: 0.9563108682632446,grad_norm: 0.9532520429653187, iteration: 252638
loss: 0.9851837754249573,grad_norm: 0.9999990563045634, iteration: 252639
loss: 1.008510947227478,grad_norm: 0.9985528755670552, iteration: 252640
loss: 0.993389368057251,grad_norm: 0.7587639092896318, iteration: 252641
loss: 1.079264760017395,grad_norm: 0.9999996909373601, iteration: 252642
loss: 1.028821349143982,grad_norm: 0.9206555134654525, iteration: 252643
loss: 1.0036637783050537,grad_norm: 0.9042311893972229, iteration: 252644
loss: 1.0086162090301514,grad_norm: 0.9700119848632769, iteration: 252645
loss: 0.9937803745269775,grad_norm: 0.9999991682459536, iteration: 252646
loss: 1.026557445526123,grad_norm: 0.8454960147359832, iteration: 252647
loss: 0.9824239611625671,grad_norm: 0.9999990865981878, iteration: 252648
loss: 1.0093566179275513,grad_norm: 0.8389394711529129, iteration: 252649
loss: 1.0024503469467163,grad_norm: 0.8221987315009605, iteration: 252650
loss: 1.0133353471755981,grad_norm: 0.9999991341452671, iteration: 252651
loss: 0.9647875428199768,grad_norm: 0.8900965714043895, iteration: 252652
loss: 0.9989440441131592,grad_norm: 0.8180641100077879, iteration: 252653
loss: 1.0274163484573364,grad_norm: 0.826897095019754, iteration: 252654
loss: 0.9898716807365417,grad_norm: 0.7562722695870387, iteration: 252655
loss: 1.0055252313613892,grad_norm: 0.7415735211485112, iteration: 252656
loss: 0.9907839894294739,grad_norm: 0.8594988334133432, iteration: 252657
loss: 0.9648148417472839,grad_norm: 0.8655143961022942, iteration: 252658
loss: 0.9861524105072021,grad_norm: 0.8483985770483099, iteration: 252659
loss: 0.9810053706169128,grad_norm: 0.7584095581373568, iteration: 252660
loss: 1.0022245645523071,grad_norm: 0.8384937689730003, iteration: 252661
loss: 1.0264849662780762,grad_norm: 0.999999126644778, iteration: 252662
loss: 0.9996541142463684,grad_norm: 0.9999990560761468, iteration: 252663
loss: 1.009660243988037,grad_norm: 0.9823869810554663, iteration: 252664
loss: 0.9904747605323792,grad_norm: 0.7749130483099023, iteration: 252665
loss: 1.004313349723816,grad_norm: 0.95289015080926, iteration: 252666
loss: 1.0135823488235474,grad_norm: 0.7417914160820134, iteration: 252667
loss: 1.0032649040222168,grad_norm: 0.999999204129102, iteration: 252668
loss: 1.009547472000122,grad_norm: 0.7821212649421421, iteration: 252669
loss: 1.0203536748886108,grad_norm: 0.9999999352378706, iteration: 252670
loss: 1.0267221927642822,grad_norm: 0.9523837855106189, iteration: 252671
loss: 0.9928022027015686,grad_norm: 0.851806715394607, iteration: 252672
loss: 1.0289112329483032,grad_norm: 0.9262734409704335, iteration: 252673
loss: 0.9925309419631958,grad_norm: 0.9028214523613187, iteration: 252674
loss: 0.9874007701873779,grad_norm: 0.9115747754570889, iteration: 252675
loss: 1.0092995166778564,grad_norm: 0.808577168692365, iteration: 252676
loss: 0.9960298538208008,grad_norm: 0.9028757396502922, iteration: 252677
loss: 1.0082156658172607,grad_norm: 0.8949638603291072, iteration: 252678
loss: 1.0366820096969604,grad_norm: 0.9999991932555853, iteration: 252679
loss: 1.0260974168777466,grad_norm: 0.8329285953960914, iteration: 252680
loss: 0.9751540422439575,grad_norm: 0.8612377807594287, iteration: 252681
loss: 1.0207533836364746,grad_norm: 0.910233867367681, iteration: 252682
loss: 1.0005236864089966,grad_norm: 0.993039072137233, iteration: 252683
loss: 0.9989778399467468,grad_norm: 0.8585324682503322, iteration: 252684
loss: 1.012231469154358,grad_norm: 0.9999990927790089, iteration: 252685
loss: 0.980231761932373,grad_norm: 0.8456271731259299, iteration: 252686
loss: 1.043474793434143,grad_norm: 0.9189378735527333, iteration: 252687
loss: 0.9548702836036682,grad_norm: 0.8813539052114339, iteration: 252688
loss: 1.0172367095947266,grad_norm: 0.8969366375542205, iteration: 252689
loss: 1.030738353729248,grad_norm: 0.8806998172001865, iteration: 252690
loss: 0.9900903701782227,grad_norm: 0.999999036002131, iteration: 252691
loss: 1.0272929668426514,grad_norm: 0.999999674419762, iteration: 252692
loss: 0.9848691821098328,grad_norm: 0.7892421197835259, iteration: 252693
loss: 1.0200047492980957,grad_norm: 0.9133361508207944, iteration: 252694
loss: 1.015802025794983,grad_norm: 0.9069051411516545, iteration: 252695
loss: 1.0178444385528564,grad_norm: 0.9999990921222037, iteration: 252696
loss: 0.9856588244438171,grad_norm: 0.9999990581183067, iteration: 252697
loss: 1.0094059705734253,grad_norm: 0.9999993239110944, iteration: 252698
loss: 1.018674373626709,grad_norm: 0.7525564819649361, iteration: 252699
loss: 0.9507299661636353,grad_norm: 0.8810497582172117, iteration: 252700
loss: 1.0142778158187866,grad_norm: 0.7791435056567579, iteration: 252701
loss: 0.9600771069526672,grad_norm: 0.98112953612173, iteration: 252702
loss: 1.0062323808670044,grad_norm: 0.9999992857895299, iteration: 252703
loss: 1.0283565521240234,grad_norm: 0.9583481229321525, iteration: 252704
loss: 0.9902759194374084,grad_norm: 0.8679253934176111, iteration: 252705
loss: 0.9791733622550964,grad_norm: 0.9189974905398621, iteration: 252706
loss: 1.272117257118225,grad_norm: 0.9999991223901935, iteration: 252707
loss: 1.0547600984573364,grad_norm: 1.0000000255139194, iteration: 252708
loss: 0.9783204793930054,grad_norm: 0.8722360015187712, iteration: 252709
loss: 1.0155853033065796,grad_norm: 0.7931407869944112, iteration: 252710
loss: 1.015944004058838,grad_norm: 0.7745147270569895, iteration: 252711
loss: 1.0331463813781738,grad_norm: 0.801630890998396, iteration: 252712
loss: 0.9494726657867432,grad_norm: 0.8508616238365224, iteration: 252713
loss: 0.9902974367141724,grad_norm: 0.9517638987377611, iteration: 252714
loss: 1.0065568685531616,grad_norm: 0.8401398395798327, iteration: 252715
loss: 1.025788426399231,grad_norm: 0.9953741678624229, iteration: 252716
loss: 0.9778894186019897,grad_norm: 0.9017778138677965, iteration: 252717
loss: 0.9628846049308777,grad_norm: 0.9999991133469022, iteration: 252718
loss: 1.0206058025360107,grad_norm: 0.8471998377107127, iteration: 252719
loss: 1.0410710573196411,grad_norm: 0.9006860482822235, iteration: 252720
loss: 0.9878089427947998,grad_norm: 0.8264308701078056, iteration: 252721
loss: 0.9796305894851685,grad_norm: 0.7778001926181316, iteration: 252722
loss: 1.0150736570358276,grad_norm: 0.8934529114844982, iteration: 252723
loss: 1.004717469215393,grad_norm: 0.9244455520903028, iteration: 252724
loss: 1.0589641332626343,grad_norm: 0.9999990605641025, iteration: 252725
loss: 0.9753686189651489,grad_norm: 0.9832776255533158, iteration: 252726
loss: 1.019437313079834,grad_norm: 0.9999990370341288, iteration: 252727
loss: 1.0008385181427002,grad_norm: 0.8888843999410669, iteration: 252728
loss: 0.970167338848114,grad_norm: 0.9999991492652169, iteration: 252729
loss: 0.9750224947929382,grad_norm: 0.7892237591741607, iteration: 252730
loss: 1.0092109441757202,grad_norm: 0.99999909234789, iteration: 252731
loss: 0.9658197164535522,grad_norm: 0.9999991157758663, iteration: 252732
loss: 1.0057607889175415,grad_norm: 0.8235761621510286, iteration: 252733
loss: 1.0057536363601685,grad_norm: 0.9999991902262931, iteration: 252734
loss: 0.9945522546768188,grad_norm: 0.7332234793815895, iteration: 252735
loss: 1.0184015035629272,grad_norm: 0.720810689249123, iteration: 252736
loss: 0.9901926517486572,grad_norm: 0.9468483173589641, iteration: 252737
loss: 1.007077932357788,grad_norm: 0.7051477742688134, iteration: 252738
loss: 0.9848071336746216,grad_norm: 0.9999989853507488, iteration: 252739
loss: 0.9764066934585571,grad_norm: 0.7824892095930199, iteration: 252740
loss: 0.995528519153595,grad_norm: 0.7070303417925241, iteration: 252741
loss: 1.0214592218399048,grad_norm: 0.9999990964685737, iteration: 252742
loss: 0.9788572788238525,grad_norm: 0.953681129308641, iteration: 252743
loss: 0.9819180369377136,grad_norm: 0.915743276145608, iteration: 252744
loss: 1.0394927263259888,grad_norm: 0.740731759474022, iteration: 252745
loss: 1.031757116317749,grad_norm: 0.8860993061017645, iteration: 252746
loss: 0.970146119594574,grad_norm: 0.9607599324554501, iteration: 252747
loss: 1.0142862796783447,grad_norm: 0.8157488850515495, iteration: 252748
loss: 1.0063917636871338,grad_norm: 0.9999994380016525, iteration: 252749
loss: 0.9928532242774963,grad_norm: 0.7200410540249521, iteration: 252750
loss: 1.0038093328475952,grad_norm: 0.8926039824767283, iteration: 252751
loss: 0.9650274515151978,grad_norm: 0.8405258176409989, iteration: 252752
loss: 1.0052834749221802,grad_norm: 0.9168445911822853, iteration: 252753
loss: 1.0096176862716675,grad_norm: 0.891744731228076, iteration: 252754
loss: 1.000085711479187,grad_norm: 0.8394835621373364, iteration: 252755
loss: 1.0102084875106812,grad_norm: 0.8556100159437513, iteration: 252756
loss: 0.977333128452301,grad_norm: 0.8001331356409618, iteration: 252757
loss: 1.0130140781402588,grad_norm: 0.9615087512858327, iteration: 252758
loss: 0.9598258137702942,grad_norm: 0.7887285342060484, iteration: 252759
loss: 1.0467828512191772,grad_norm: 0.8998875358319904, iteration: 252760
loss: 0.9779830574989319,grad_norm: 0.9637893260955307, iteration: 252761
loss: 0.9978988766670227,grad_norm: 0.9601353097602012, iteration: 252762
loss: 1.0484341382980347,grad_norm: 0.9113442114332657, iteration: 252763
loss: 0.9825695157051086,grad_norm: 0.9999991685471447, iteration: 252764
loss: 0.998584508895874,grad_norm: 0.7156455496412064, iteration: 252765
loss: 0.9743536710739136,grad_norm: 0.9190473338321313, iteration: 252766
loss: 0.995137631893158,grad_norm: 0.9999999195342012, iteration: 252767
loss: 1.0160472393035889,grad_norm: 0.9999991866240023, iteration: 252768
loss: 1.0019166469573975,grad_norm: 0.7704611948342952, iteration: 252769
loss: 1.0063132047653198,grad_norm: 0.8376685365830121, iteration: 252770
loss: 1.0251320600509644,grad_norm: 0.9999997320551659, iteration: 252771
loss: 0.9873961806297302,grad_norm: 0.7470248234478208, iteration: 252772
loss: 0.9760553240776062,grad_norm: 0.7989909003706746, iteration: 252773
loss: 1.0057387351989746,grad_norm: 0.9002472924403222, iteration: 252774
loss: 1.0137828588485718,grad_norm: 0.7784812487380318, iteration: 252775
loss: 1.0142992734909058,grad_norm: 0.9999997031564292, iteration: 252776
loss: 1.0102896690368652,grad_norm: 0.9999989729063314, iteration: 252777
loss: 1.0084751844406128,grad_norm: 0.9999993021011563, iteration: 252778
loss: 1.0716813802719116,grad_norm: 0.999999166847903, iteration: 252779
loss: 0.9727171063423157,grad_norm: 0.8779451577579499, iteration: 252780
loss: 0.9847471714019775,grad_norm: 0.9999990597110321, iteration: 252781
loss: 1.0000462532043457,grad_norm: 0.8508210572240524, iteration: 252782
loss: 1.0197418928146362,grad_norm: 0.9999996778807021, iteration: 252783
loss: 0.9624624848365784,grad_norm: 0.9163573571918047, iteration: 252784
loss: 1.022736668586731,grad_norm: 0.9999990675288148, iteration: 252785
loss: 0.9940211176872253,grad_norm: 0.9036085178075137, iteration: 252786
loss: 0.9956600069999695,grad_norm: 0.9902561458066612, iteration: 252787
loss: 1.002834677696228,grad_norm: 0.9999991334015278, iteration: 252788
loss: 0.9531021118164062,grad_norm: 0.9402811720766122, iteration: 252789
loss: 1.0309993028640747,grad_norm: 0.9999991386607173, iteration: 252790
loss: 0.9993494153022766,grad_norm: 0.9940842432360846, iteration: 252791
loss: 1.0041403770446777,grad_norm: 0.8891816978766414, iteration: 252792
loss: 1.0079659223556519,grad_norm: 0.9177424281066044, iteration: 252793
loss: 1.0358977317810059,grad_norm: 0.9068971330312865, iteration: 252794
loss: 1.0010912418365479,grad_norm: 0.8724473126224142, iteration: 252795
loss: 0.9874792695045471,grad_norm: 0.9080751177280559, iteration: 252796
loss: 1.016263723373413,grad_norm: 0.9999991654410002, iteration: 252797
loss: 1.0265308618545532,grad_norm: 0.9999993862848191, iteration: 252798
loss: 0.9936317801475525,grad_norm: 0.9410184591064819, iteration: 252799
loss: 0.9867178797721863,grad_norm: 0.8254195084092154, iteration: 252800
loss: 1.0152934789657593,grad_norm: 0.7804944730195057, iteration: 252801
loss: 1.0096789598464966,grad_norm: 0.8671472733785113, iteration: 252802
loss: 1.0060701370239258,grad_norm: 0.8775850062508993, iteration: 252803
loss: 1.0210063457489014,grad_norm: 0.9999991621275548, iteration: 252804
loss: 0.9893436431884766,grad_norm: 0.9487302384043063, iteration: 252805
loss: 0.995878279209137,grad_norm: 0.8872744548742529, iteration: 252806
loss: 1.0050971508026123,grad_norm: 0.9052117082669551, iteration: 252807
loss: 0.976687490940094,grad_norm: 0.9999990238966733, iteration: 252808
loss: 0.9944779872894287,grad_norm: 0.9158534084371567, iteration: 252809
loss: 0.9990270137786865,grad_norm: 0.8152481459988514, iteration: 252810
loss: 0.9793809056282043,grad_norm: 0.9159870134623768, iteration: 252811
loss: 1.0076531171798706,grad_norm: 0.7531084414883278, iteration: 252812
loss: 1.0249664783477783,grad_norm: 0.8355715975066365, iteration: 252813
loss: 0.9675636887550354,grad_norm: 0.9834483269004413, iteration: 252814
loss: 1.0315605401992798,grad_norm: 0.9800777238728572, iteration: 252815
loss: 0.9822123050689697,grad_norm: 0.86033377849406, iteration: 252816
loss: 1.0027371644973755,grad_norm: 0.913517953503687, iteration: 252817
loss: 0.9784110188484192,grad_norm: 0.972686653428445, iteration: 252818
loss: 1.0284396409988403,grad_norm: 0.9999992273407328, iteration: 252819
loss: 0.967043936252594,grad_norm: 0.9361697360412964, iteration: 252820
loss: 0.9946390390396118,grad_norm: 0.9338223042486978, iteration: 252821
loss: 1.0254464149475098,grad_norm: 0.9999990824425447, iteration: 252822
loss: 1.0094189643859863,grad_norm: 0.9453114181919008, iteration: 252823
loss: 0.9797207713127136,grad_norm: 0.9346900398283489, iteration: 252824
loss: 1.128182291984558,grad_norm: 0.9999991547948128, iteration: 252825
loss: 0.9631556868553162,grad_norm: 0.8628068767778435, iteration: 252826
loss: 0.9657716155052185,grad_norm: 0.7601337591295977, iteration: 252827
loss: 1.008016586303711,grad_norm: 0.9999991517967838, iteration: 252828
loss: 1.0103263854980469,grad_norm: 0.806617279117093, iteration: 252829
loss: 1.0234642028808594,grad_norm: 0.8924964191337394, iteration: 252830
loss: 0.9854229092597961,grad_norm: 0.9129772362817186, iteration: 252831
loss: 1.0164130926132202,grad_norm: 0.8658378675449996, iteration: 252832
loss: 0.9712529182434082,grad_norm: 0.8851713075946228, iteration: 252833
loss: 0.9779338836669922,grad_norm: 0.6956275530478372, iteration: 252834
loss: 1.0001568794250488,grad_norm: 0.9081901979407457, iteration: 252835
loss: 0.9891608357429504,grad_norm: 0.8888702673758713, iteration: 252836
loss: 0.9957639575004578,grad_norm: 0.7911438274784215, iteration: 252837
loss: 0.9867263436317444,grad_norm: 0.8547823474258526, iteration: 252838
loss: 1.0486786365509033,grad_norm: 0.9946598590752205, iteration: 252839
loss: 0.9894508719444275,grad_norm: 0.863282974152869, iteration: 252840
loss: 1.0246951580047607,grad_norm: 0.99999900657438, iteration: 252841
loss: 0.9807417988777161,grad_norm: 0.9042677839411674, iteration: 252842
loss: 0.9937288165092468,grad_norm: 0.7366678960820673, iteration: 252843
loss: 0.961254358291626,grad_norm: 0.9542147291610613, iteration: 252844
loss: 1.0909745693206787,grad_norm: 0.9999993444744838, iteration: 252845
loss: 1.0019840002059937,grad_norm: 0.9304649721115744, iteration: 252846
loss: 1.024483561515808,grad_norm: 0.8559858780620206, iteration: 252847
loss: 1.0155755281448364,grad_norm: 0.9999997766884772, iteration: 252848
loss: 0.9598444700241089,grad_norm: 0.833618730095293, iteration: 252849
loss: 1.0149182081222534,grad_norm: 0.9999992334733978, iteration: 252850
loss: 1.0452450513839722,grad_norm: 0.9106443361448091, iteration: 252851
loss: 0.9952501654624939,grad_norm: 0.7948668399896222, iteration: 252852
loss: 1.005475640296936,grad_norm: 0.9999994098261278, iteration: 252853
loss: 1.0151718854904175,grad_norm: 0.8252214672458481, iteration: 252854
loss: 1.010117530822754,grad_norm: 0.7943997852293753, iteration: 252855
loss: 0.9689056873321533,grad_norm: 0.7670169924211736, iteration: 252856
loss: 1.0127923488616943,grad_norm: 0.9465548329560967, iteration: 252857
loss: 1.029588222503662,grad_norm: 0.8317098307564085, iteration: 252858
loss: 1.013270378112793,grad_norm: 0.7895384130153231, iteration: 252859
loss: 1.032174825668335,grad_norm: 0.9999990008265202, iteration: 252860
loss: 1.007589340209961,grad_norm: 0.9322272500429895, iteration: 252861
loss: 0.98579341173172,grad_norm: 0.9955834655580023, iteration: 252862
loss: 1.011465311050415,grad_norm: 0.853075604832202, iteration: 252863
loss: 0.9744086861610413,grad_norm: 0.7884172593452603, iteration: 252864
loss: 0.9505582451820374,grad_norm: 0.9053818620222533, iteration: 252865
loss: 1.0547475814819336,grad_norm: 0.999999101134843, iteration: 252866
loss: 1.0486189126968384,grad_norm: 0.9999994672842131, iteration: 252867
loss: 0.9793444871902466,grad_norm: 0.7355651267066722, iteration: 252868
loss: 1.0044188499450684,grad_norm: 0.8313695316747127, iteration: 252869
loss: 0.991310179233551,grad_norm: 0.8495984568727236, iteration: 252870
loss: 0.9945665001869202,grad_norm: 0.805980081109523, iteration: 252871
loss: 1.0112011432647705,grad_norm: 0.8638491758255663, iteration: 252872
loss: 0.977525532245636,grad_norm: 0.9999989583605885, iteration: 252873
loss: 1.055876612663269,grad_norm: 0.9999990990178326, iteration: 252874
loss: 1.0101743936538696,grad_norm: 0.9229473307314199, iteration: 252875
loss: 1.0008931159973145,grad_norm: 0.9394423552701785, iteration: 252876
loss: 1.0041117668151855,grad_norm: 0.9315662281729725, iteration: 252877
loss: 1.0172518491744995,grad_norm: 0.7860396793966042, iteration: 252878
loss: 1.013594388961792,grad_norm: 0.9794557183321927, iteration: 252879
loss: 1.0176008939743042,grad_norm: 0.9809367176416278, iteration: 252880
loss: 1.019948959350586,grad_norm: 0.7753659770106455, iteration: 252881
loss: 1.0052599906921387,grad_norm: 0.999999061524614, iteration: 252882
loss: 1.0322984457015991,grad_norm: 0.7591448953746409, iteration: 252883
loss: 0.9949146509170532,grad_norm: 0.999999081413852, iteration: 252884
loss: 1.0000219345092773,grad_norm: 0.8536107936504131, iteration: 252885
loss: 0.9790109992027283,grad_norm: 0.704144380437542, iteration: 252886
loss: 0.963226318359375,grad_norm: 0.8693819206222155, iteration: 252887
loss: 1.0304524898529053,grad_norm: 0.9999994786146341, iteration: 252888
loss: 0.9780896902084351,grad_norm: 0.9289335070819159, iteration: 252889
loss: 1.0280077457427979,grad_norm: 0.999999003528832, iteration: 252890
loss: 1.0011979341506958,grad_norm: 0.9957251215322089, iteration: 252891
loss: 1.0101537704467773,grad_norm: 0.7685507153093508, iteration: 252892
loss: 1.0051021575927734,grad_norm: 0.9639865303585077, iteration: 252893
loss: 0.9885256886482239,grad_norm: 0.9999997007950757, iteration: 252894
loss: 1.000556230545044,grad_norm: 0.8334069985451523, iteration: 252895
loss: 1.00823175907135,grad_norm: 0.7962537580520163, iteration: 252896
loss: 1.0042330026626587,grad_norm: 0.9999992803280764, iteration: 252897
loss: 0.997468888759613,grad_norm: 0.7897199680805227, iteration: 252898
loss: 0.9533494114875793,grad_norm: 0.95651977895632, iteration: 252899
loss: 0.9873437881469727,grad_norm: 0.7633123933033046, iteration: 252900
loss: 0.9937450885772705,grad_norm: 0.8949475247064302, iteration: 252901
loss: 0.9993664026260376,grad_norm: 0.8101251891358351, iteration: 252902
loss: 0.9433692693710327,grad_norm: 0.9999991725125321, iteration: 252903
loss: 1.004928469657898,grad_norm: 0.9999991043334108, iteration: 252904
loss: 0.984944224357605,grad_norm: 0.9999992332840736, iteration: 252905
loss: 1.0137040615081787,grad_norm: 0.9547572601583196, iteration: 252906
loss: 1.0258382558822632,grad_norm: 0.8632163294066139, iteration: 252907
loss: 0.9974892139434814,grad_norm: 0.912596040842999, iteration: 252908
loss: 0.9713344573974609,grad_norm: 0.8842183415772477, iteration: 252909
loss: 1.0211846828460693,grad_norm: 0.7891197492879188, iteration: 252910
loss: 1.022495985031128,grad_norm: 0.8845254156016243, iteration: 252911
loss: 1.014761209487915,grad_norm: 0.9404914190739877, iteration: 252912
loss: 0.9816195368766785,grad_norm: 0.8397473927447342, iteration: 252913
loss: 0.9541495442390442,grad_norm: 0.9656903104875343, iteration: 252914
loss: 0.9934623837471008,grad_norm: 0.8886577853857321, iteration: 252915
loss: 1.009863257408142,grad_norm: 0.9820347196840719, iteration: 252916
loss: 1.0255074501037598,grad_norm: 0.809677668776978, iteration: 252917
loss: 1.0089412927627563,grad_norm: 0.7682053067055604, iteration: 252918
loss: 1.0133832693099976,grad_norm: 0.9038097081277366, iteration: 252919
loss: 1.0226577520370483,grad_norm: 0.9999990047964041, iteration: 252920
loss: 1.062813401222229,grad_norm: 0.9999991998069556, iteration: 252921
loss: 1.0403106212615967,grad_norm: 0.9999995626656502, iteration: 252922
loss: 0.9796427488327026,grad_norm: 0.8771941904807651, iteration: 252923
loss: 0.983298659324646,grad_norm: 0.8239799192809785, iteration: 252924
loss: 0.9876587986946106,grad_norm: 0.8513450308959915, iteration: 252925
loss: 0.9738980531692505,grad_norm: 0.9999989519905366, iteration: 252926
loss: 1.0156804323196411,grad_norm: 0.8500105017440072, iteration: 252927
loss: 1.025282621383667,grad_norm: 0.8463046022704173, iteration: 252928
loss: 1.0115958452224731,grad_norm: 0.9999992799923739, iteration: 252929
loss: 0.9916794300079346,grad_norm: 0.9974154428696432, iteration: 252930
loss: 0.9976327419281006,grad_norm: 0.8114025635616189, iteration: 252931
loss: 1.001442313194275,grad_norm: 0.8606293923667547, iteration: 252932
loss: 0.9695792198181152,grad_norm: 0.834827481516575, iteration: 252933
loss: 0.979934811592102,grad_norm: 0.777968738080151, iteration: 252934
loss: 0.9973961710929871,grad_norm: 0.8893912450383542, iteration: 252935
loss: 1.0057331323623657,grad_norm: 0.7982074185017759, iteration: 252936
loss: 0.9816336035728455,grad_norm: 0.805814631813732, iteration: 252937
loss: 1.017560601234436,grad_norm: 0.9311285142070169, iteration: 252938
loss: 0.9851384162902832,grad_norm: 0.7820838526552835, iteration: 252939
loss: 0.984808623790741,grad_norm: 0.999999295114809, iteration: 252940
loss: 1.0048283338546753,grad_norm: 0.792543152376654, iteration: 252941
loss: 0.9908502697944641,grad_norm: 0.8290921366217996, iteration: 252942
loss: 1.0123320817947388,grad_norm: 0.9541994126149445, iteration: 252943
loss: 0.9603250622749329,grad_norm: 0.912822110463739, iteration: 252944
loss: 1.0406303405761719,grad_norm: 0.9999994466926975, iteration: 252945
loss: 0.9789358973503113,grad_norm: 0.9982824007985984, iteration: 252946
loss: 1.02021324634552,grad_norm: 0.9403979827010722, iteration: 252947
loss: 1.0203535556793213,grad_norm: 0.824891411671736, iteration: 252948
loss: 1.0009344816207886,grad_norm: 0.8779078626235901, iteration: 252949
loss: 0.9809527397155762,grad_norm: 0.857218909989172, iteration: 252950
loss: 1.0089836120605469,grad_norm: 0.8959519444486996, iteration: 252951
loss: 0.957438051700592,grad_norm: 0.9937572082660985, iteration: 252952
loss: 0.9857370853424072,grad_norm: 0.9437966486428383, iteration: 252953
loss: 0.9935232996940613,grad_norm: 0.9175837968491823, iteration: 252954
loss: 1.0501465797424316,grad_norm: 0.9999990549559095, iteration: 252955
loss: 1.0077261924743652,grad_norm: 0.862684006175896, iteration: 252956
loss: 0.9969956278800964,grad_norm: 0.843534001630507, iteration: 252957
loss: 0.9847381711006165,grad_norm: 0.8217809935521687, iteration: 252958
loss: 1.0452054738998413,grad_norm: 0.9926794243007596, iteration: 252959
loss: 0.9990639686584473,grad_norm: 0.8978366590441468, iteration: 252960
loss: 1.0033214092254639,grad_norm: 0.8038471976207646, iteration: 252961
loss: 0.9563278555870056,grad_norm: 0.9556668345630208, iteration: 252962
loss: 1.0167299509048462,grad_norm: 0.9999990612298403, iteration: 252963
loss: 0.9963022470474243,grad_norm: 0.9999991261784668, iteration: 252964
loss: 1.0049774646759033,grad_norm: 0.9699199101690243, iteration: 252965
loss: 1.0239382982254028,grad_norm: 0.8897233461350633, iteration: 252966
loss: 1.0259207487106323,grad_norm: 0.9999991457754013, iteration: 252967
loss: 1.0506943464279175,grad_norm: 0.9237557804552811, iteration: 252968
loss: 1.0063748359680176,grad_norm: 0.8892727495500686, iteration: 252969
loss: 1.0819555521011353,grad_norm: 0.8486918193896593, iteration: 252970
loss: 0.9923804998397827,grad_norm: 0.8916780088317965, iteration: 252971
loss: 0.9815618395805359,grad_norm: 0.9999991367396225, iteration: 252972
loss: 1.0546995401382446,grad_norm: 0.999999326082034, iteration: 252973
loss: 1.0222907066345215,grad_norm: 0.9087080998531968, iteration: 252974
loss: 1.0206193923950195,grad_norm: 0.8710208482166839, iteration: 252975
loss: 0.9728174209594727,grad_norm: 0.8731868221161511, iteration: 252976
loss: 1.0095800161361694,grad_norm: 0.9999990192808683, iteration: 252977
loss: 0.9634962677955627,grad_norm: 0.9405623667574804, iteration: 252978
loss: 1.0176119804382324,grad_norm: 0.9999992077056759, iteration: 252979
loss: 0.9793423414230347,grad_norm: 0.8443916047155938, iteration: 252980
loss: 1.0389845371246338,grad_norm: 0.8987449415372694, iteration: 252981
loss: 0.9912253618240356,grad_norm: 0.9438821456339288, iteration: 252982
loss: 0.9995913505554199,grad_norm: 0.9562141074124585, iteration: 252983
loss: 1.015863060951233,grad_norm: 0.9981105867918513, iteration: 252984
loss: 0.9978232383728027,grad_norm: 0.8257869136208094, iteration: 252985
loss: 1.0231361389160156,grad_norm: 0.9999995329052118, iteration: 252986
loss: 0.9866136908531189,grad_norm: 0.8666056462924667, iteration: 252987
loss: 0.9902211427688599,grad_norm: 0.9251328945129307, iteration: 252988
loss: 1.0247392654418945,grad_norm: 0.9118606591840113, iteration: 252989
loss: 0.9967158436775208,grad_norm: 0.9214998967689162, iteration: 252990
loss: 0.9727615118026733,grad_norm: 0.8174699580222452, iteration: 252991
loss: 1.0125867128372192,grad_norm: 0.8000246417855043, iteration: 252992
loss: 1.0213147401809692,grad_norm: 0.9999991526418635, iteration: 252993
loss: 0.9676623940467834,grad_norm: 0.973493418827869, iteration: 252994
loss: 1.0081431865692139,grad_norm: 0.7947225540323785, iteration: 252995
loss: 0.9955833554267883,grad_norm: 0.9085892535929648, iteration: 252996
loss: 0.9984942078590393,grad_norm: 0.8088204206328975, iteration: 252997
loss: 0.9960199594497681,grad_norm: 0.9701373359798048, iteration: 252998
loss: 1.027079701423645,grad_norm: 0.9999991911606457, iteration: 252999
loss: 0.9862436056137085,grad_norm: 0.8482488568350358, iteration: 253000
loss: 1.014657735824585,grad_norm: 0.9061230727990839, iteration: 253001
loss: 1.0040405988693237,grad_norm: 0.9710929174217705, iteration: 253002
loss: 0.9900256395339966,grad_norm: 0.999999180598919, iteration: 253003
loss: 0.9720562696456909,grad_norm: 0.8886248427027916, iteration: 253004
loss: 1.008243203163147,grad_norm: 0.9497572898921063, iteration: 253005
loss: 1.0223898887634277,grad_norm: 0.9151606788150894, iteration: 253006
loss: 1.009957194328308,grad_norm: 0.9999992422806168, iteration: 253007
loss: 1.0168207883834839,grad_norm: 0.855734386762947, iteration: 253008
loss: 1.004922866821289,grad_norm: 0.8834244372819231, iteration: 253009
loss: 0.9523167014122009,grad_norm: 0.9427390162220742, iteration: 253010
loss: 0.9477315545082092,grad_norm: 0.8550695516733586, iteration: 253011
loss: 1.008394718170166,grad_norm: 0.8720903031911363, iteration: 253012
loss: 0.9988372325897217,grad_norm: 0.906042217821744, iteration: 253013
loss: 1.0156974792480469,grad_norm: 0.8960055183649613, iteration: 253014
loss: 0.985410213470459,grad_norm: 0.9942928288309146, iteration: 253015
loss: 0.997703492641449,grad_norm: 0.8776368961977525, iteration: 253016
loss: 0.9831459522247314,grad_norm: 0.797426743153589, iteration: 253017
loss: 1.006598711013794,grad_norm: 0.9146194577408504, iteration: 253018
loss: 0.9895681142807007,grad_norm: 0.9271682754303172, iteration: 253019
loss: 1.02001953125,grad_norm: 0.8521322368339567, iteration: 253020
loss: 1.031589150428772,grad_norm: 0.9086432233523511, iteration: 253021
loss: 1.0260512828826904,grad_norm: 0.8758876072836939, iteration: 253022
loss: 0.9826886057853699,grad_norm: 0.9821596927752495, iteration: 253023
loss: 1.0218532085418701,grad_norm: 0.7830719372784641, iteration: 253024
loss: 0.9950230717658997,grad_norm: 0.8667366987272633, iteration: 253025
loss: 1.0193337202072144,grad_norm: 0.9999992018176679, iteration: 253026
loss: 0.9692596793174744,grad_norm: 0.9999990058472755, iteration: 253027
loss: 0.9887661337852478,grad_norm: 0.9999989340594921, iteration: 253028
loss: 1.0395437479019165,grad_norm: 0.7945009744871986, iteration: 253029
loss: 0.9762681126594543,grad_norm: 0.9999992464593581, iteration: 253030
loss: 0.9984827041625977,grad_norm: 0.9999990307966017, iteration: 253031
loss: 0.9954898953437805,grad_norm: 0.753276067498133, iteration: 253032
loss: 0.9654110074043274,grad_norm: 0.9999991678218734, iteration: 253033
loss: 0.978927731513977,grad_norm: 0.9153943262217786, iteration: 253034
loss: 0.9846693873405457,grad_norm: 0.8542594573816704, iteration: 253035
loss: 0.9948160648345947,grad_norm: 0.8919784250698874, iteration: 253036
loss: 1.008241057395935,grad_norm: 0.9569665471033171, iteration: 253037
loss: 0.9852550625801086,grad_norm: 0.8370236704938996, iteration: 253038
loss: 0.9499568939208984,grad_norm: 0.9985373480437523, iteration: 253039
loss: 1.0371819734573364,grad_norm: 0.9506558678383931, iteration: 253040
loss: 1.0055595636367798,grad_norm: 0.9999990636555688, iteration: 253041
loss: 1.010573387145996,grad_norm: 0.8345154858216779, iteration: 253042
loss: 1.047574758529663,grad_norm: 0.8589389855654905, iteration: 253043
loss: 0.9813005924224854,grad_norm: 0.9999990469400023, iteration: 253044
loss: 1.0235207080841064,grad_norm: 0.9483077609701106, iteration: 253045
loss: 0.9647709131240845,grad_norm: 0.7504724809378888, iteration: 253046
loss: 0.9815434813499451,grad_norm: 0.9086680866320265, iteration: 253047
loss: 1.0042294263839722,grad_norm: 0.9054888632976944, iteration: 253048
loss: 1.0574028491973877,grad_norm: 0.8084684161520155, iteration: 253049
loss: 0.9877647757530212,grad_norm: 0.8340414452297996, iteration: 253050
loss: 0.966446042060852,grad_norm: 0.956548936262284, iteration: 253051
loss: 1.0375620126724243,grad_norm: 0.9230375134311681, iteration: 253052
loss: 1.0820001363754272,grad_norm: 0.8546393172288587, iteration: 253053
loss: 1.0360002517700195,grad_norm: 0.8660314415765141, iteration: 253054
loss: 1.0062894821166992,grad_norm: 0.7739160367709734, iteration: 253055
loss: 0.9767836928367615,grad_norm: 0.7729007109133454, iteration: 253056
loss: 1.006366491317749,grad_norm: 0.8322186702816629, iteration: 253057
loss: 1.0057108402252197,grad_norm: 0.9214251014217367, iteration: 253058
loss: 0.9885475039482117,grad_norm: 0.8736347937381143, iteration: 253059
loss: 1.0102221965789795,grad_norm: 0.9999996025564813, iteration: 253060
loss: 1.0273782014846802,grad_norm: 0.7835214193153759, iteration: 253061
loss: 0.9589588046073914,grad_norm: 0.9179664545188135, iteration: 253062
loss: 0.9894351959228516,grad_norm: 0.9999989851460037, iteration: 253063
loss: 0.9911010265350342,grad_norm: 0.9999990423473851, iteration: 253064
loss: 1.004647970199585,grad_norm: 0.9327234774324766, iteration: 253065
loss: 0.998285710811615,grad_norm: 0.9999990864677606, iteration: 253066
loss: 0.9783328771591187,grad_norm: 0.8906217216724216, iteration: 253067
loss: 1.0584927797317505,grad_norm: 0.9999993529840302, iteration: 253068
loss: 0.9936323165893555,grad_norm: 0.9303400513293443, iteration: 253069
loss: 0.9816885590553284,grad_norm: 0.9999990366523336, iteration: 253070
loss: 0.9622796773910522,grad_norm: 0.8363353043319225, iteration: 253071
loss: 0.9769085049629211,grad_norm: 0.8430970198574567, iteration: 253072
loss: 0.9748199582099915,grad_norm: 0.9076538751985512, iteration: 253073
loss: 1.0344160795211792,grad_norm: 0.8053791489927429, iteration: 253074
loss: 0.9905679821968079,grad_norm: 0.8431109060025493, iteration: 253075
loss: 1.0165574550628662,grad_norm: 0.8984286934635253, iteration: 253076
loss: 1.0116468667984009,grad_norm: 0.9999990934957137, iteration: 253077
loss: 1.0330051183700562,grad_norm: 0.9520911931654944, iteration: 253078
loss: 1.034868836402893,grad_norm: 0.9919698033621286, iteration: 253079
loss: 1.0040112733840942,grad_norm: 0.914568671995066, iteration: 253080
loss: 0.9741374850273132,grad_norm: 0.8495139495505519, iteration: 253081
loss: 1.012116551399231,grad_norm: 0.9377223630129778, iteration: 253082
loss: 1.014744520187378,grad_norm: 0.9272442457967873, iteration: 253083
loss: 0.9913179874420166,grad_norm: 0.9745321520783532, iteration: 253084
loss: 1.0080369710922241,grad_norm: 0.8741069048620099, iteration: 253085
loss: 1.0101741552352905,grad_norm: 0.9509705726669018, iteration: 253086
loss: 1.0062958002090454,grad_norm: 0.9343514714131093, iteration: 253087
loss: 1.0011582374572754,grad_norm: 0.774969516963107, iteration: 253088
loss: 1.0164165496826172,grad_norm: 0.9999992137330221, iteration: 253089
loss: 1.0186189413070679,grad_norm: 0.794457697550736, iteration: 253090
loss: 1.0127079486846924,grad_norm: 0.9999990940016982, iteration: 253091
loss: 1.006612777709961,grad_norm: 0.9640980670219236, iteration: 253092
loss: 1.0753121376037598,grad_norm: 0.8258455648186802, iteration: 253093
loss: 1.126990795135498,grad_norm: 0.9999999336546778, iteration: 253094
loss: 1.002007246017456,grad_norm: 0.99999914031982, iteration: 253095
loss: 1.0118426084518433,grad_norm: 0.9197319997251889, iteration: 253096
loss: 0.9852449297904968,grad_norm: 0.8400648200374908, iteration: 253097
loss: 1.00693678855896,grad_norm: 0.999999020029415, iteration: 253098
loss: 1.0064666271209717,grad_norm: 0.8322690140129997, iteration: 253099
loss: 1.0264793634414673,grad_norm: 0.9999990357618839, iteration: 253100
loss: 0.9995813965797424,grad_norm: 0.8581079557885366, iteration: 253101
loss: 1.0019989013671875,grad_norm: 0.9992282472240319, iteration: 253102
loss: 0.9924057722091675,grad_norm: 0.8197378128528312, iteration: 253103
loss: 1.0044829845428467,grad_norm: 0.9221858419886905, iteration: 253104
loss: 1.0985794067382812,grad_norm: 0.9999992592440589, iteration: 253105
loss: 1.0428980588912964,grad_norm: 0.9999994991160986, iteration: 253106
loss: 0.9471825957298279,grad_norm: 0.8860313177138095, iteration: 253107
loss: 0.9619107842445374,grad_norm: 0.8987565144031276, iteration: 253108
loss: 1.0151909589767456,grad_norm: 0.8210288892338424, iteration: 253109
loss: 1.0238393545150757,grad_norm: 0.9999996652901235, iteration: 253110
loss: 0.950555682182312,grad_norm: 0.9482769817588537, iteration: 253111
loss: 0.9882645606994629,grad_norm: 0.8663271556636698, iteration: 253112
loss: 1.008597731590271,grad_norm: 0.9796536109573922, iteration: 253113
loss: 1.0177772045135498,grad_norm: 0.7510816544988101, iteration: 253114
loss: 1.0049206018447876,grad_norm: 0.7547534837217946, iteration: 253115
loss: 1.0240964889526367,grad_norm: 0.9999989989501712, iteration: 253116
loss: 1.0157076120376587,grad_norm: 0.9422611565872439, iteration: 253117
loss: 0.9674028158187866,grad_norm: 0.9287264854938985, iteration: 253118
loss: 0.9696109890937805,grad_norm: 0.9188732474393861, iteration: 253119
loss: 0.9754806160926819,grad_norm: 0.9018244310428538, iteration: 253120
loss: 1.032096028327942,grad_norm: 0.8893889875383316, iteration: 253121
loss: 0.9818798303604126,grad_norm: 0.8389406546810986, iteration: 253122
loss: 1.0009273290634155,grad_norm: 0.7823426871126826, iteration: 253123
loss: 1.0133470296859741,grad_norm: 0.8312327839455693, iteration: 253124
loss: 1.0698328018188477,grad_norm: 0.9999998443187637, iteration: 253125
loss: 0.9963552951812744,grad_norm: 0.9073125784596454, iteration: 253126
loss: 0.9944162368774414,grad_norm: 0.893523305955894, iteration: 253127
loss: 1.0033479928970337,grad_norm: 0.9999991561659431, iteration: 253128
loss: 1.0261286497116089,grad_norm: 0.9176137975152515, iteration: 253129
loss: 1.037309169769287,grad_norm: 0.8915953073338168, iteration: 253130
loss: 1.0066733360290527,grad_norm: 0.8100459319934411, iteration: 253131
loss: 1.0130754709243774,grad_norm: 0.9459476152504189, iteration: 253132
loss: 0.9855327010154724,grad_norm: 0.8819918195604941, iteration: 253133
loss: 0.9939375519752502,grad_norm: 0.863164538916232, iteration: 253134
loss: 0.9886349439620972,grad_norm: 0.7123747189927156, iteration: 253135
loss: 0.9649656414985657,grad_norm: 0.9056352833133932, iteration: 253136
loss: 1.016589879989624,grad_norm: 0.9999990751695852, iteration: 253137
loss: 1.0009815692901611,grad_norm: 0.9999989793445963, iteration: 253138
loss: 0.9893982410430908,grad_norm: 0.9999991073639475, iteration: 253139
loss: 1.0195544958114624,grad_norm: 0.999999132142094, iteration: 253140
loss: 0.9862127304077148,grad_norm: 0.9539548767091091, iteration: 253141
loss: 1.0081677436828613,grad_norm: 0.9999991565193179, iteration: 253142
loss: 1.054295539855957,grad_norm: 0.999999243963382, iteration: 253143
loss: 0.9914599657058716,grad_norm: 0.7817008694299307, iteration: 253144
loss: 1.015089750289917,grad_norm: 0.7879695415283716, iteration: 253145
loss: 1.010818600654602,grad_norm: 0.9999991925566283, iteration: 253146
loss: 0.9971301555633545,grad_norm: 0.9999991117524337, iteration: 253147
loss: 1.1100797653198242,grad_norm: 0.9999998723066175, iteration: 253148
loss: 1.0687891244888306,grad_norm: 0.9999998440173347, iteration: 253149
loss: 1.031653881072998,grad_norm: 0.8986513705209273, iteration: 253150
loss: 0.9703252911567688,grad_norm: 0.9391205301877632, iteration: 253151
loss: 0.9961797595024109,grad_norm: 0.7451662645681163, iteration: 253152
loss: 0.9685256481170654,grad_norm: 0.9325270210709619, iteration: 253153
loss: 0.9568191170692444,grad_norm: 0.9244844769416047, iteration: 253154
loss: 1.004683494567871,grad_norm: 0.9310825651508349, iteration: 253155
loss: 1.0245025157928467,grad_norm: 0.8785477949681126, iteration: 253156
loss: 1.0083726644515991,grad_norm: 0.9999997493987346, iteration: 253157
loss: 0.9735859036445618,grad_norm: 0.8563193618181154, iteration: 253158
loss: 0.9592630863189697,grad_norm: 0.9999990524706954, iteration: 253159
loss: 0.9760499000549316,grad_norm: 0.9999990773320497, iteration: 253160
loss: 1.010583758354187,grad_norm: 0.8124340293470681, iteration: 253161
loss: 1.0131512880325317,grad_norm: 0.9999992062033547, iteration: 253162
loss: 0.9760816693305969,grad_norm: 0.8433811890239495, iteration: 253163
loss: 1.0006744861602783,grad_norm: 0.9999991619430941, iteration: 253164
loss: 0.9939268231391907,grad_norm: 0.902844018243505, iteration: 253165
loss: 1.0195904970169067,grad_norm: 0.858031483084653, iteration: 253166
loss: 1.0075433254241943,grad_norm: 0.9679062205634067, iteration: 253167
loss: 1.001930594444275,grad_norm: 0.9999991926483248, iteration: 253168
loss: 1.0067951679229736,grad_norm: 0.9166328441746004, iteration: 253169
loss: 0.9778714179992676,grad_norm: 0.999999290383171, iteration: 253170
loss: 0.9690577387809753,grad_norm: 0.8217318791960487, iteration: 253171
loss: 1.157744288444519,grad_norm: 0.9999998006481711, iteration: 253172
loss: 1.026589035987854,grad_norm: 0.9999989906606509, iteration: 253173
loss: 0.9957498908042908,grad_norm: 0.9999990908782137, iteration: 253174
loss: 1.0225075483322144,grad_norm: 0.7520418771759663, iteration: 253175
loss: 1.0056359767913818,grad_norm: 0.7253239471638407, iteration: 253176
loss: 0.9838827252388,grad_norm: 0.9639591691816833, iteration: 253177
loss: 1.0187417268753052,grad_norm: 0.875271518502024, iteration: 253178
loss: 1.0157084465026855,grad_norm: 0.768219516107342, iteration: 253179
loss: 1.0128986835479736,grad_norm: 0.9006871260752369, iteration: 253180
loss: 0.9939115047454834,grad_norm: 0.9707708680522855, iteration: 253181
loss: 1.015326976776123,grad_norm: 0.8151412045052793, iteration: 253182
loss: 1.0091078281402588,grad_norm: 0.756698089885395, iteration: 253183
loss: 0.9909107089042664,grad_norm: 0.7225863874257785, iteration: 253184
loss: 1.0094776153564453,grad_norm: 0.9435381136706331, iteration: 253185
loss: 1.0213184356689453,grad_norm: 0.9045245612718474, iteration: 253186
loss: 0.9995661377906799,grad_norm: 0.856939872505225, iteration: 253187
loss: 1.001865267753601,grad_norm: 0.7375429021644875, iteration: 253188
loss: 0.9771389961242676,grad_norm: 0.9999992676646269, iteration: 253189
loss: 0.9939184784889221,grad_norm: 0.9999990551159088, iteration: 253190
loss: 1.0355154275894165,grad_norm: 0.9999993961555644, iteration: 253191
loss: 1.0041325092315674,grad_norm: 0.9999991818477526, iteration: 253192
loss: 1.0197068452835083,grad_norm: 0.9999994194012836, iteration: 253193
loss: 1.0128875970840454,grad_norm: 0.9295479241827009, iteration: 253194
loss: 0.9778620004653931,grad_norm: 0.999999079880087, iteration: 253195
loss: 1.0240817070007324,grad_norm: 0.8275259571477692, iteration: 253196
loss: 0.999059796333313,grad_norm: 0.9999990586515194, iteration: 253197
loss: 0.9929083585739136,grad_norm: 0.87118844255527, iteration: 253198
loss: 0.9985390305519104,grad_norm: 0.977089543798413, iteration: 253199
loss: 1.030397891998291,grad_norm: 0.9999993953573217, iteration: 253200
loss: 1.044472098350525,grad_norm: 0.7947600845542068, iteration: 253201
loss: 0.9976316690444946,grad_norm: 0.8732500559967723, iteration: 253202
loss: 1.0150808095932007,grad_norm: 0.8817199411809539, iteration: 253203
loss: 0.9565772414207458,grad_norm: 0.7900394493940827, iteration: 253204
loss: 1.039581298828125,grad_norm: 0.896959482512827, iteration: 253205
loss: 0.9908479452133179,grad_norm: 0.8042826041169095, iteration: 253206
loss: 0.9938603043556213,grad_norm: 0.9606834182322723, iteration: 253207
loss: 0.9710045456886292,grad_norm: 0.8169279485001976, iteration: 253208
loss: 1.0093717575073242,grad_norm: 0.9999991127050818, iteration: 253209
loss: 1.1159192323684692,grad_norm: 0.9999991751986276, iteration: 253210
loss: 0.9515285491943359,grad_norm: 0.8936156952710598, iteration: 253211
loss: 1.029233455657959,grad_norm: 0.9999991064266753, iteration: 253212
loss: 1.0495353937149048,grad_norm: 0.823589343623618, iteration: 253213
loss: 1.020837426185608,grad_norm: 0.9539944174865419, iteration: 253214
loss: 0.983900249004364,grad_norm: 0.7771855449475439, iteration: 253215
loss: 1.0322043895721436,grad_norm: 0.9999991200488512, iteration: 253216
loss: 1.0341733694076538,grad_norm: 0.9999991883401562, iteration: 253217
loss: 1.0266295671463013,grad_norm: 0.9999998249844121, iteration: 253218
loss: 0.9695265293121338,grad_norm: 0.9999990925115113, iteration: 253219
loss: 0.9936621785163879,grad_norm: 0.860703854818227, iteration: 253220
loss: 1.0185792446136475,grad_norm: 0.9442953611500258, iteration: 253221
loss: 0.994893491268158,grad_norm: 0.7301017649510626, iteration: 253222
loss: 1.0204159021377563,grad_norm: 0.9999995906289109, iteration: 253223
loss: 1.0214958190917969,grad_norm: 0.8710526396942938, iteration: 253224
loss: 1.0062154531478882,grad_norm: 0.9160381771055791, iteration: 253225
loss: 0.9518670439720154,grad_norm: 0.9768926845490521, iteration: 253226
loss: 0.9993597269058228,grad_norm: 0.8356127357189942, iteration: 253227
loss: 0.9841264486312866,grad_norm: 0.9999990763162424, iteration: 253228
loss: 0.9534913897514343,grad_norm: 0.8884627242510242, iteration: 253229
loss: 0.9472955465316772,grad_norm: 0.8749606005733548, iteration: 253230
loss: 1.0141575336456299,grad_norm: 0.7886335064855813, iteration: 253231
loss: 1.0231878757476807,grad_norm: 0.9999991140579914, iteration: 253232
loss: 0.9740258455276489,grad_norm: 0.9999990689511247, iteration: 253233
loss: 0.9879215955734253,grad_norm: 0.8370819499228171, iteration: 253234
loss: 1.006096601486206,grad_norm: 0.8770107748959669, iteration: 253235
loss: 0.9918121695518494,grad_norm: 0.8706490285358623, iteration: 253236
loss: 1.0047763586044312,grad_norm: 0.9434594448735573, iteration: 253237
loss: 0.9886036515235901,grad_norm: 0.9021905216678306, iteration: 253238
loss: 1.0493791103363037,grad_norm: 0.9999998507781406, iteration: 253239
loss: 1.0158648490905762,grad_norm: 0.8488434673255965, iteration: 253240
loss: 1.0146546363830566,grad_norm: 0.9671368639364534, iteration: 253241
loss: 1.0109312534332275,grad_norm: 0.8747586940305416, iteration: 253242
loss: 1.0111125707626343,grad_norm: 0.9999992919189222, iteration: 253243
loss: 1.0166139602661133,grad_norm: 0.798931397349614, iteration: 253244
loss: 1.0083577632904053,grad_norm: 0.944176436975718, iteration: 253245
loss: 0.9808371663093567,grad_norm: 0.9715517335456657, iteration: 253246
loss: 0.9758980870246887,grad_norm: 0.9983049625131456, iteration: 253247
loss: 0.9751714468002319,grad_norm: 0.9712814992464055, iteration: 253248
loss: 0.9687913656234741,grad_norm: 0.9999991172182825, iteration: 253249
loss: 0.9935584664344788,grad_norm: 0.9028819377831623, iteration: 253250
loss: 0.9690666198730469,grad_norm: 0.8571839497588043, iteration: 253251
loss: 1.0091893672943115,grad_norm: 0.9999990082783723, iteration: 253252
loss: 1.0076112747192383,grad_norm: 0.9999991320132099, iteration: 253253
loss: 0.9781060218811035,grad_norm: 0.8437519443586603, iteration: 253254
loss: 1.0111234188079834,grad_norm: 0.9999990654444583, iteration: 253255
loss: 1.0123021602630615,grad_norm: 0.8670494579886798, iteration: 253256
loss: 1.031596064567566,grad_norm: 0.877149360134541, iteration: 253257
loss: 1.033301591873169,grad_norm: 0.8683459198557562, iteration: 253258
loss: 0.9948074221611023,grad_norm: 0.8209576332262155, iteration: 253259
loss: 0.9891169667243958,grad_norm: 0.8985174238185827, iteration: 253260
loss: 1.0132813453674316,grad_norm: 0.9999998386920099, iteration: 253261
loss: 1.011526107788086,grad_norm: 0.7028767101871037, iteration: 253262
loss: 1.1265207529067993,grad_norm: 0.9813688815938335, iteration: 253263
loss: 0.9410306215286255,grad_norm: 0.9622874761804384, iteration: 253264
loss: 1.0125738382339478,grad_norm: 0.9999991658729281, iteration: 253265
loss: 0.9717305898666382,grad_norm: 0.9079756279350462, iteration: 253266
loss: 1.0165627002716064,grad_norm: 0.9999992016552964, iteration: 253267
loss: 1.025992512702942,grad_norm: 0.9999992222049949, iteration: 253268
loss: 1.0363173484802246,grad_norm: 0.7911581793918547, iteration: 253269
loss: 1.0361918210983276,grad_norm: 0.8513794685853147, iteration: 253270
loss: 1.0050185918807983,grad_norm: 0.8184307714000076, iteration: 253271
loss: 1.107041358947754,grad_norm: 0.9999995163718772, iteration: 253272
loss: 0.9916672706604004,grad_norm: 0.7735348722247204, iteration: 253273
loss: 1.0178735256195068,grad_norm: 0.8945468394097982, iteration: 253274
loss: 0.9944096207618713,grad_norm: 0.7699859622480855, iteration: 253275
loss: 1.0262612104415894,grad_norm: 0.9999996573153179, iteration: 253276
loss: 1.0021196603775024,grad_norm: 0.7473317476923254, iteration: 253277
loss: 1.002286434173584,grad_norm: 0.9547903214480615, iteration: 253278
loss: 0.9886302351951599,grad_norm: 0.8433888129792062, iteration: 253279
loss: 0.9746906757354736,grad_norm: 0.9999990050989135, iteration: 253280
loss: 1.0058327913284302,grad_norm: 0.7166889791155168, iteration: 253281
loss: 0.9894302487373352,grad_norm: 0.9999990238770118, iteration: 253282
loss: 0.9854759573936462,grad_norm: 0.999999277219399, iteration: 253283
loss: 0.9985314607620239,grad_norm: 0.9531690266864171, iteration: 253284
loss: 1.0169531106948853,grad_norm: 0.9115609357770659, iteration: 253285
loss: 0.9752392172813416,grad_norm: 0.9709296751273663, iteration: 253286
loss: 0.9932454228401184,grad_norm: 0.9999989633528206, iteration: 253287
loss: 1.0127495527267456,grad_norm: 0.9999990329886745, iteration: 253288
loss: 0.9952535629272461,grad_norm: 0.9085658119780499, iteration: 253289
loss: 1.0217138528823853,grad_norm: 0.9947214792885234, iteration: 253290
loss: 0.9797345995903015,grad_norm: 0.8803787460995957, iteration: 253291
loss: 1.0180237293243408,grad_norm: 0.9082561685640843, iteration: 253292
loss: 1.1365838050842285,grad_norm: 0.9999990999298297, iteration: 253293
loss: 1.0227571725845337,grad_norm: 0.9413989687250207, iteration: 253294
loss: 1.0031379461288452,grad_norm: 0.9108183782042901, iteration: 253295
loss: 1.0019489526748657,grad_norm: 0.9214211149898419, iteration: 253296
loss: 1.008566975593567,grad_norm: 0.7783743414332858, iteration: 253297
loss: 1.0204367637634277,grad_norm: 0.8530342723003146, iteration: 253298
loss: 0.9678992629051208,grad_norm: 0.8013031761779891, iteration: 253299
loss: 1.0215308666229248,grad_norm: 0.9680495534338074, iteration: 253300
loss: 1.0345990657806396,grad_norm: 0.8530409125458889, iteration: 253301
loss: 0.9937135577201843,grad_norm: 0.8961458489299419, iteration: 253302
loss: 1.0028105974197388,grad_norm: 0.8669618235253268, iteration: 253303
loss: 0.9920705556869507,grad_norm: 0.8511346810777378, iteration: 253304
loss: 1.0014995336532593,grad_norm: 0.9440049452610006, iteration: 253305
loss: 0.9833508729934692,grad_norm: 0.8068661812414835, iteration: 253306
loss: 1.0253252983093262,grad_norm: 0.9999991041912286, iteration: 253307
loss: 1.0241307020187378,grad_norm: 0.8811600871417038, iteration: 253308
loss: 0.9861907958984375,grad_norm: 0.839662405468603, iteration: 253309
loss: 0.9994134902954102,grad_norm: 0.9243273269590219, iteration: 253310
loss: 0.9963748455047607,grad_norm: 0.9999991291153085, iteration: 253311
loss: 1.0008143186569214,grad_norm: 0.999999099448064, iteration: 253312
loss: 1.0186281204223633,grad_norm: 0.8923435058543805, iteration: 253313
loss: 0.9955052137374878,grad_norm: 0.9999995000108002, iteration: 253314
loss: 1.041229009628296,grad_norm: 0.986128360368515, iteration: 253315
loss: 1.0416789054870605,grad_norm: 0.8704497762176338, iteration: 253316
loss: 1.0172475576400757,grad_norm: 0.9160087165777157, iteration: 253317
loss: 1.0247818231582642,grad_norm: 0.9999996070336895, iteration: 253318
loss: 0.9842098355293274,grad_norm: 0.9999991228087441, iteration: 253319
loss: 0.9730202555656433,grad_norm: 0.9999991811652954, iteration: 253320
loss: 0.9820497632026672,grad_norm: 0.8925251448714846, iteration: 253321
loss: 0.9848518967628479,grad_norm: 0.8182295471048586, iteration: 253322
loss: 0.9885438680648804,grad_norm: 0.9999992210941279, iteration: 253323
loss: 1.017980694770813,grad_norm: 0.6150601607735888, iteration: 253324
loss: 1.0135763883590698,grad_norm: 0.8034040793840238, iteration: 253325
loss: 0.9689695835113525,grad_norm: 0.9192133526345179, iteration: 253326
loss: 0.9562172293663025,grad_norm: 0.9116981731005803, iteration: 253327
loss: 0.977993905544281,grad_norm: 0.9389639856396305, iteration: 253328
loss: 1.0104480981826782,grad_norm: 0.9272094009341313, iteration: 253329
loss: 1.0075366497039795,grad_norm: 0.9818891308874884, iteration: 253330
loss: 0.9850301146507263,grad_norm: 0.9882508958717876, iteration: 253331
loss: 0.9998247623443604,grad_norm: 0.9263779033280666, iteration: 253332
loss: 0.9664688110351562,grad_norm: 0.9009576324105075, iteration: 253333
loss: 1.0110619068145752,grad_norm: 0.8032344157408041, iteration: 253334
loss: 1.1318345069885254,grad_norm: 0.9999997762153728, iteration: 253335
loss: 0.9568420648574829,grad_norm: 0.9999992324992598, iteration: 253336
loss: 0.9740559458732605,grad_norm: 0.8194861374318322, iteration: 253337
loss: 1.0203393697738647,grad_norm: 0.9114436330831132, iteration: 253338
loss: 1.0032352209091187,grad_norm: 0.999999650640647, iteration: 253339
loss: 0.9763861298561096,grad_norm: 0.9871366146282777, iteration: 253340
loss: 1.0168230533599854,grad_norm: 0.9999989486735665, iteration: 253341
loss: 1.0675339698791504,grad_norm: 0.9999991191548154, iteration: 253342
loss: 0.9859936833381653,grad_norm: 0.8981504623972284, iteration: 253343
loss: 0.9992821216583252,grad_norm: 0.9563369119949862, iteration: 253344
loss: 0.9985893368721008,grad_norm: 0.8098510791141795, iteration: 253345
loss: 0.9663718938827515,grad_norm: 0.8045281522795652, iteration: 253346
loss: 1.0147801637649536,grad_norm: 0.999999256624498, iteration: 253347
loss: 1.0143903493881226,grad_norm: 0.9999991213723156, iteration: 253348
loss: 1.0015192031860352,grad_norm: 0.9050529523476084, iteration: 253349
loss: 0.9873954653739929,grad_norm: 0.9485351789824186, iteration: 253350
loss: 1.0400817394256592,grad_norm: 0.8840114240343213, iteration: 253351
loss: 1.0077593326568604,grad_norm: 0.9279227600896031, iteration: 253352
loss: 1.0063115358352661,grad_norm: 0.8624340459193383, iteration: 253353
loss: 0.9478350281715393,grad_norm: 0.9999991038715142, iteration: 253354
loss: 0.9931988716125488,grad_norm: 0.9426839001682031, iteration: 253355
loss: 0.9887807369232178,grad_norm: 0.9999992748038917, iteration: 253356
loss: 0.9480774998664856,grad_norm: 0.8650684158622296, iteration: 253357
loss: 0.9724021553993225,grad_norm: 0.8824109340743228, iteration: 253358
loss: 1.0006033182144165,grad_norm: 0.825644405712517, iteration: 253359
loss: 1.0094290971755981,grad_norm: 0.9795225929412334, iteration: 253360
loss: 1.0236586332321167,grad_norm: 0.8263790452375963, iteration: 253361
loss: 1.0114344358444214,grad_norm: 0.9999990586892168, iteration: 253362
loss: 1.0160585641860962,grad_norm: 0.9999990009573683, iteration: 253363
loss: 1.0212749242782593,grad_norm: 0.9575406998148247, iteration: 253364
loss: 1.0709679126739502,grad_norm: 0.8998797496940442, iteration: 253365
loss: 1.0069694519042969,grad_norm: 0.8101508928951815, iteration: 253366
loss: 1.023436188697815,grad_norm: 0.8653748599693174, iteration: 253367
loss: 0.9841474890708923,grad_norm: 0.8793340661811432, iteration: 253368
loss: 0.9692093729972839,grad_norm: 0.8447333898550705, iteration: 253369
loss: 0.9977198243141174,grad_norm: 0.737519403336867, iteration: 253370
loss: 1.0202513933181763,grad_norm: 0.9094829870853902, iteration: 253371
loss: 1.0425506830215454,grad_norm: 0.9999992459577751, iteration: 253372
loss: 1.011776089668274,grad_norm: 0.7927924478323027, iteration: 253373
loss: 1.0401363372802734,grad_norm: 0.9999995911027388, iteration: 253374
loss: 1.0021321773529053,grad_norm: 0.9756253760764236, iteration: 253375
loss: 0.9888420104980469,grad_norm: 0.9999989937611771, iteration: 253376
loss: 1.0274826288223267,grad_norm: 0.9615033576760089, iteration: 253377
loss: 0.9922741651535034,grad_norm: 0.9623660958519711, iteration: 253378
loss: 0.9961349368095398,grad_norm: 0.8107333589448309, iteration: 253379
loss: 1.0100843906402588,grad_norm: 0.8544571010753806, iteration: 253380
loss: 1.0295062065124512,grad_norm: 0.9999999048114365, iteration: 253381
loss: 1.0139222145080566,grad_norm: 0.8797058325184223, iteration: 253382
loss: 1.0602974891662598,grad_norm: 0.8752419520323006, iteration: 253383
loss: 0.9908624887466431,grad_norm: 0.8670485573182974, iteration: 253384
loss: 0.9898983240127563,grad_norm: 0.9363729755618495, iteration: 253385
loss: 0.9880048036575317,grad_norm: 0.7941216762973337, iteration: 253386
loss: 0.9982221722602844,grad_norm: 0.9607787415536175, iteration: 253387
loss: 1.029219388961792,grad_norm: 0.8365191878316913, iteration: 253388
loss: 1.0091172456741333,grad_norm: 0.831692958569132, iteration: 253389
loss: 0.980317234992981,grad_norm: 0.8684150619643309, iteration: 253390
loss: 1.0201101303100586,grad_norm: 0.9695083507251271, iteration: 253391
loss: 0.9679104089736938,grad_norm: 0.8384646764648109, iteration: 253392
loss: 1.0462851524353027,grad_norm: 0.874927789671502, iteration: 253393
loss: 1.0315741300582886,grad_norm: 0.8811782929140646, iteration: 253394
loss: 1.008411169052124,grad_norm: 0.8443835233200524, iteration: 253395
loss: 1.0127813816070557,grad_norm: 0.9488242705777234, iteration: 253396
loss: 1.0139001607894897,grad_norm: 0.9999990497114434, iteration: 253397
loss: 0.9722174406051636,grad_norm: 0.9999990388446378, iteration: 253398
loss: 1.0542088747024536,grad_norm: 0.910613801386786, iteration: 253399
loss: 1.035409688949585,grad_norm: 0.9107449708097664, iteration: 253400
loss: 1.0308465957641602,grad_norm: 0.9999992373481228, iteration: 253401
loss: 1.0092122554779053,grad_norm: 0.9273586813848417, iteration: 253402
loss: 1.04275381565094,grad_norm: 1.0000000503837259, iteration: 253403
loss: 0.978923499584198,grad_norm: 0.9999991524894348, iteration: 253404
loss: 1.0043224096298218,grad_norm: 0.9319220500251112, iteration: 253405
loss: 0.9811744093894958,grad_norm: 0.8922348006252034, iteration: 253406
loss: 1.0540484189987183,grad_norm: 0.9999996376601764, iteration: 253407
loss: 1.0325456857681274,grad_norm: 0.8683057221305804, iteration: 253408
loss: 1.004631519317627,grad_norm: 0.9666073031750706, iteration: 253409
loss: 1.2360643148422241,grad_norm: 0.9999999243278406, iteration: 253410
loss: 0.985396146774292,grad_norm: 0.8397466046407696, iteration: 253411
loss: 1.0662614107131958,grad_norm: 0.9999999802686605, iteration: 253412
loss: 1.0723366737365723,grad_norm: 0.9999999552220266, iteration: 253413
loss: 0.9707796573638916,grad_norm: 0.9999992054987276, iteration: 253414
loss: 1.002687692642212,grad_norm: 0.8748580986250515, iteration: 253415
loss: 1.0118924379348755,grad_norm: 0.8200263763465001, iteration: 253416
loss: 0.9782760739326477,grad_norm: 0.9243404222697875, iteration: 253417
loss: 1.0369683504104614,grad_norm: 0.7804452165298442, iteration: 253418
loss: 1.00424325466156,grad_norm: 0.9188264061604652, iteration: 253419
loss: 1.0920652151107788,grad_norm: 0.9020287001270068, iteration: 253420
loss: 1.0535298585891724,grad_norm: 0.9748762581294536, iteration: 253421
loss: 0.9858438372612,grad_norm: 0.7485163478174496, iteration: 253422
loss: 1.0373235940933228,grad_norm: 0.8977264712097182, iteration: 253423
loss: 1.0428845882415771,grad_norm: 0.99999918341428, iteration: 253424
loss: 1.0445955991744995,grad_norm: 0.9999990581776838, iteration: 253425
loss: 1.0093441009521484,grad_norm: 0.8083840102908034, iteration: 253426
loss: 1.0114277601242065,grad_norm: 0.8879084280175805, iteration: 253427
loss: 1.0027388334274292,grad_norm: 0.8235526399506893, iteration: 253428
loss: 0.9946351647377014,grad_norm: 0.9019398398373409, iteration: 253429
loss: 1.0723038911819458,grad_norm: 0.8177103896487321, iteration: 253430
loss: 1.0026768445968628,grad_norm: 0.8083703809758039, iteration: 253431
loss: 1.0429264307022095,grad_norm: 0.9999991143551209, iteration: 253432
loss: 0.9649414420127869,grad_norm: 0.819681784266704, iteration: 253433
loss: 0.9803977608680725,grad_norm: 0.8079264956298879, iteration: 253434
loss: 1.0528960227966309,grad_norm: 0.8776160002914848, iteration: 253435
loss: 1.0743372440338135,grad_norm: 0.9999993841827377, iteration: 253436
loss: 1.0122514963150024,grad_norm: 0.9819439475654005, iteration: 253437
loss: 1.0005834102630615,grad_norm: 0.7285677321785938, iteration: 253438
loss: 1.0388036966323853,grad_norm: 0.9999992478102299, iteration: 253439
loss: 0.9830936193466187,grad_norm: 0.9714724872877257, iteration: 253440
loss: 0.9566025733947754,grad_norm: 0.8221908693925237, iteration: 253441
loss: 1.0109227895736694,grad_norm: 0.840389090361448, iteration: 253442
loss: 0.9662095904350281,grad_norm: 0.9999992579777224, iteration: 253443
loss: 0.9870163202285767,grad_norm: 0.999999086932059, iteration: 253444
loss: 0.9418906569480896,grad_norm: 0.8880267309448739, iteration: 253445
loss: 0.968235433101654,grad_norm: 0.835538884437959, iteration: 253446
loss: 1.0261776447296143,grad_norm: 0.9999992327846833, iteration: 253447
loss: 1.0059940814971924,grad_norm: 0.9212624986552234, iteration: 253448
loss: 0.9557350873947144,grad_norm: 0.8967401499239872, iteration: 253449
loss: 1.0121519565582275,grad_norm: 0.9999991356066994, iteration: 253450
loss: 0.9990307688713074,grad_norm: 0.9999990948304718, iteration: 253451
loss: 1.006726861000061,grad_norm: 0.9721270727265751, iteration: 253452
loss: 1.0027799606323242,grad_norm: 0.9999993894677214, iteration: 253453
loss: 1.0341824293136597,grad_norm: 0.7102352092925062, iteration: 253454
loss: 0.9673988223075867,grad_norm: 0.8214460286939625, iteration: 253455
loss: 0.9730463027954102,grad_norm: 0.8025416647197173, iteration: 253456
loss: 1.0212815999984741,grad_norm: 0.9301573447074237, iteration: 253457
loss: 0.9427387714385986,grad_norm: 0.9999993170407477, iteration: 253458
loss: 1.0447639226913452,grad_norm: 0.9667375567464153, iteration: 253459
loss: 1.035195231437683,grad_norm: 0.9231700548357583, iteration: 253460
loss: 1.0027464628219604,grad_norm: 0.9310359366880002, iteration: 253461
loss: 0.9627339243888855,grad_norm: 0.8027794069208919, iteration: 253462
loss: 0.9855684041976929,grad_norm: 0.8292959005044727, iteration: 253463
loss: 1.0332001447677612,grad_norm: 0.9136325062799693, iteration: 253464
loss: 0.9934797883033752,grad_norm: 0.9999991496431149, iteration: 253465
loss: 1.0074710845947266,grad_norm: 0.9273608106587568, iteration: 253466
loss: 0.9669976830482483,grad_norm: 0.9822755104856018, iteration: 253467
loss: 0.9955150485038757,grad_norm: 0.9653920260992563, iteration: 253468
loss: 0.994600772857666,grad_norm: 0.8672120103668333, iteration: 253469
loss: 0.9762290716171265,grad_norm: 0.9999995728082183, iteration: 253470
loss: 1.0034390687942505,grad_norm: 0.9081915964304886, iteration: 253471
loss: 0.9682462811470032,grad_norm: 0.9999990756290742, iteration: 253472
loss: 0.9942042231559753,grad_norm: 0.855833827884415, iteration: 253473
loss: 1.0019773244857788,grad_norm: 0.9999991507893908, iteration: 253474
loss: 1.0098105669021606,grad_norm: 0.9999991830324149, iteration: 253475
loss: 1.029977560043335,grad_norm: 0.9585012282303506, iteration: 253476
loss: 0.9830501079559326,grad_norm: 0.9767668178133553, iteration: 253477
loss: 1.0186783075332642,grad_norm: 0.8756694138825402, iteration: 253478
loss: 1.0220084190368652,grad_norm: 0.9999991914643154, iteration: 253479
loss: 0.997524082660675,grad_norm: 0.9295710432410073, iteration: 253480
loss: 1.0074195861816406,grad_norm: 0.9648053146058201, iteration: 253481
loss: 1.0168704986572266,grad_norm: 0.8688468708434617, iteration: 253482
loss: 1.0430952310562134,grad_norm: 0.9999990979177189, iteration: 253483
loss: 1.030473232269287,grad_norm: 0.8802621358336413, iteration: 253484
loss: 0.9687655568122864,grad_norm: 0.7705066053711899, iteration: 253485
loss: 1.0454401969909668,grad_norm: 0.9561746990728173, iteration: 253486
loss: 0.9988113641738892,grad_norm: 0.99999925759681, iteration: 253487
loss: 0.9852604269981384,grad_norm: 0.9402589442732812, iteration: 253488
loss: 1.0220856666564941,grad_norm: 0.8632161593545951, iteration: 253489
loss: 1.0072720050811768,grad_norm: 0.8588699545382931, iteration: 253490
loss: 1.0441290140151978,grad_norm: 0.9688219384463002, iteration: 253491
loss: 1.009972095489502,grad_norm: 0.9999997726042897, iteration: 253492
loss: 1.0324994325637817,grad_norm: 0.9999991243672713, iteration: 253493
loss: 0.9988712668418884,grad_norm: 0.9999989680412896, iteration: 253494
loss: 0.986445963382721,grad_norm: 0.7996047704685418, iteration: 253495
loss: 1.0273315906524658,grad_norm: 0.8995787846823745, iteration: 253496
loss: 1.0089010000228882,grad_norm: 0.9999991768854922, iteration: 253497
loss: 0.9987025260925293,grad_norm: 0.9999992928283898, iteration: 253498
loss: 1.0268585681915283,grad_norm: 0.9999991281281528, iteration: 253499
loss: 0.9697494506835938,grad_norm: 0.7799249147219991, iteration: 253500
loss: 1.0185781717300415,grad_norm: 0.999999039824612, iteration: 253501
loss: 0.9778602123260498,grad_norm: 0.7735574170448766, iteration: 253502
loss: 1.0157744884490967,grad_norm: 0.9999992513074484, iteration: 253503
loss: 0.9753210544586182,grad_norm: 0.999999078602725, iteration: 253504
loss: 0.9901466965675354,grad_norm: 0.9999991355168278, iteration: 253505
loss: 0.9990988373756409,grad_norm: 0.8467268568519744, iteration: 253506
loss: 1.0548757314682007,grad_norm: 0.844787389717647, iteration: 253507
loss: 0.980077862739563,grad_norm: 0.7942069208345568, iteration: 253508
loss: 0.9765198826789856,grad_norm: 0.9604138630665672, iteration: 253509
loss: 0.9489444494247437,grad_norm: 0.9504667695337469, iteration: 253510
loss: 1.0158586502075195,grad_norm: 0.9999990512927197, iteration: 253511
loss: 0.9878175258636475,grad_norm: 0.8847943851666868, iteration: 253512
loss: 1.034165382385254,grad_norm: 0.9002141701696703, iteration: 253513
loss: 0.978242039680481,grad_norm: 0.9727690190162682, iteration: 253514
loss: 0.9909969568252563,grad_norm: 0.9114219514295631, iteration: 253515
loss: 0.9908357858657837,grad_norm: 0.9760921005700582, iteration: 253516
loss: 0.9883518218994141,grad_norm: 0.9999990736796761, iteration: 253517
loss: 0.9683936238288879,grad_norm: 0.9147674420691388, iteration: 253518
loss: 0.9881078004837036,grad_norm: 0.8723272275436099, iteration: 253519
loss: 0.9874722361564636,grad_norm: 0.8456348943552717, iteration: 253520
loss: 1.0493398904800415,grad_norm: 0.9999990457947429, iteration: 253521
loss: 1.0042940378189087,grad_norm: 0.9999991066006345, iteration: 253522
loss: 0.968859076499939,grad_norm: 0.9999990011007369, iteration: 253523
loss: 0.9831223487854004,grad_norm: 0.9999991503006669, iteration: 253524
loss: 0.9927002191543579,grad_norm: 0.8932542158225384, iteration: 253525
loss: 1.0266324281692505,grad_norm: 0.8624488370994104, iteration: 253526
loss: 0.9747267961502075,grad_norm: 0.9999991132422057, iteration: 253527
loss: 1.0259710550308228,grad_norm: 0.8054877542292977, iteration: 253528
loss: 1.0219775438308716,grad_norm: 0.971365706111901, iteration: 253529
loss: 0.9714201092720032,grad_norm: 0.7704811652579873, iteration: 253530
loss: 1.008960485458374,grad_norm: 0.9004823174071318, iteration: 253531
loss: 1.0046519041061401,grad_norm: 0.9999991937362671, iteration: 253532
loss: 1.0088703632354736,grad_norm: 0.9306621162612628, iteration: 253533
loss: 0.9781954884529114,grad_norm: 0.7880219300011115, iteration: 253534
loss: 0.9678923487663269,grad_norm: 0.8506717382112549, iteration: 253535
loss: 0.9586734771728516,grad_norm: 0.9999991856299344, iteration: 253536
loss: 1.0016597509384155,grad_norm: 0.999998897681175, iteration: 253537
loss: 0.9916073083877563,grad_norm: 0.9662307942127063, iteration: 253538
loss: 1.0150399208068848,grad_norm: 0.9869546158492206, iteration: 253539
loss: 0.958739697933197,grad_norm: 0.9999997741703128, iteration: 253540
loss: 1.0310207605361938,grad_norm: 0.9999991176449158, iteration: 253541
loss: 1.0101157426834106,grad_norm: 0.9984527738634267, iteration: 253542
loss: 1.019812822341919,grad_norm: 0.9737400613026641, iteration: 253543
loss: 1.11616051197052,grad_norm: 0.9999992595468197, iteration: 253544
loss: 1.0012480020523071,grad_norm: 0.9432855742349116, iteration: 253545
loss: 0.9961312413215637,grad_norm: 0.8496359331360418, iteration: 253546
loss: 1.0535531044006348,grad_norm: 0.9999990881091922, iteration: 253547
loss: 0.9703168869018555,grad_norm: 0.9003818822037728, iteration: 253548
loss: 1.0395475625991821,grad_norm: 0.9999997134150416, iteration: 253549
loss: 1.1231062412261963,grad_norm: 0.9084932236312909, iteration: 253550
loss: 0.9885119795799255,grad_norm: 0.8733903339849386, iteration: 253551
loss: 1.0122220516204834,grad_norm: 0.8405801718153442, iteration: 253552
loss: 0.9976760745048523,grad_norm: 0.8141091025687162, iteration: 253553
loss: 0.9751365780830383,grad_norm: 0.9345311403847594, iteration: 253554
loss: 1.007539987564087,grad_norm: 0.9999990472813491, iteration: 253555
loss: 0.9812296032905579,grad_norm: 0.7743413734272102, iteration: 253556
loss: 1.0239639282226562,grad_norm: 0.7299603780934408, iteration: 253557
loss: 0.9665974378585815,grad_norm: 0.7574600125130834, iteration: 253558
loss: 0.9692454934120178,grad_norm: 0.9668701539481137, iteration: 253559
loss: 1.0566496849060059,grad_norm: 0.9999995018203787, iteration: 253560
loss: 1.007954478263855,grad_norm: 0.9999994400502894, iteration: 253561
loss: 0.9773259162902832,grad_norm: 0.9999991509542279, iteration: 253562
loss: 0.9899171590805054,grad_norm: 0.9048179161455187, iteration: 253563
loss: 1.0136666297912598,grad_norm: 0.839278610218057, iteration: 253564
loss: 1.0135136842727661,grad_norm: 0.8881070003286592, iteration: 253565
loss: 0.9873036742210388,grad_norm: 0.9383687697662887, iteration: 253566
loss: 1.0485607385635376,grad_norm: 0.9999991091705388, iteration: 253567
loss: 1.0157675743103027,grad_norm: 0.9895667442105668, iteration: 253568
loss: 0.9854762554168701,grad_norm: 0.9999992982491217, iteration: 253569
loss: 0.9958648085594177,grad_norm: 0.8686784542055129, iteration: 253570
loss: 1.0018870830535889,grad_norm: 0.8228801459544434, iteration: 253571
loss: 0.9770016074180603,grad_norm: 0.9911535642019251, iteration: 253572
loss: 1.0065199136734009,grad_norm: 0.9999989791515225, iteration: 253573
loss: 0.9703946709632874,grad_norm: 0.9330091185019554, iteration: 253574
loss: 1.0208008289337158,grad_norm: 0.7849235528762999, iteration: 253575
loss: 1.0325974225997925,grad_norm: 0.9999991582020192, iteration: 253576
loss: 1.0300872325897217,grad_norm: 0.8906532958735771, iteration: 253577
loss: 1.0230258703231812,grad_norm: 0.9999991764297782, iteration: 253578
loss: 0.9866898059844971,grad_norm: 0.7971539435575092, iteration: 253579
loss: 0.9997712969779968,grad_norm: 0.9029648305993482, iteration: 253580
loss: 0.9924507737159729,grad_norm: 0.9526736910749761, iteration: 253581
loss: 0.9889301657676697,grad_norm: 0.8705556153391595, iteration: 253582
loss: 0.9805417656898499,grad_norm: 0.8132566102305362, iteration: 253583
loss: 0.9887176156044006,grad_norm: 0.9462979396277829, iteration: 253584
loss: 1.002333164215088,grad_norm: 0.9999989540004455, iteration: 253585
loss: 0.9952747821807861,grad_norm: 0.9999991965268846, iteration: 253586
loss: 1.022757887840271,grad_norm: 0.9999991802670363, iteration: 253587
loss: 0.9985869526863098,grad_norm: 0.8921030058578997, iteration: 253588
loss: 1.0208154916763306,grad_norm: 0.9223952470440797, iteration: 253589
loss: 1.0278949737548828,grad_norm: 1.000000017737568, iteration: 253590
loss: 1.017529845237732,grad_norm: 0.9299727828242905, iteration: 253591
loss: 0.9719470739364624,grad_norm: 0.9314148574509749, iteration: 253592
loss: 1.034832239151001,grad_norm: 0.9970830190322031, iteration: 253593
loss: 1.0732944011688232,grad_norm: 0.9315698194492448, iteration: 253594
loss: 0.9818384051322937,grad_norm: 0.7671011815783291, iteration: 253595
loss: 1.0075658559799194,grad_norm: 0.8871444181135939, iteration: 253596
loss: 1.03938889503479,grad_norm: 0.9290003997247319, iteration: 253597
loss: 1.0112717151641846,grad_norm: 0.9999990440838548, iteration: 253598
loss: 0.9597850441932678,grad_norm: 0.9999991705390685, iteration: 253599
loss: 0.9789091944694519,grad_norm: 0.9095266957939327, iteration: 253600
loss: 0.9893022179603577,grad_norm: 0.9582317698043645, iteration: 253601
loss: 1.087165117263794,grad_norm: 0.9999991596291247, iteration: 253602
loss: 1.0336111783981323,grad_norm: 0.9059211790400288, iteration: 253603
loss: 1.0452266931533813,grad_norm: 0.8791581132580307, iteration: 253604
loss: 1.0040391683578491,grad_norm: 0.9528328218074573, iteration: 253605
loss: 0.9737006425857544,grad_norm: 0.8592857383603515, iteration: 253606
loss: 0.9963352680206299,grad_norm: 0.8443943660474388, iteration: 253607
loss: 0.9953368902206421,grad_norm: 0.9999991676511153, iteration: 253608
loss: 0.9859174489974976,grad_norm: 0.9999991555680215, iteration: 253609
loss: 1.0039345026016235,grad_norm: 0.8888916848899527, iteration: 253610
loss: 0.9887233376502991,grad_norm: 0.7812824397991756, iteration: 253611
loss: 1.004061222076416,grad_norm: 0.9497449761376887, iteration: 253612
loss: 1.022808313369751,grad_norm: 0.9086982039019882, iteration: 253613
loss: 1.0092402696609497,grad_norm: 0.9321475925256116, iteration: 253614
loss: 1.0154805183410645,grad_norm: 0.9620134251563652, iteration: 253615
loss: 0.9924198389053345,grad_norm: 0.751475265568411, iteration: 253616
loss: 0.9977717995643616,grad_norm: 0.9816056874627229, iteration: 253617
loss: 1.031532645225525,grad_norm: 0.9999998923706037, iteration: 253618
loss: 1.0020322799682617,grad_norm: 0.7975565523123767, iteration: 253619
loss: 0.9784484505653381,grad_norm: 0.9049964594106416, iteration: 253620
loss: 0.981701672077179,grad_norm: 0.9999991478506987, iteration: 253621
loss: 1.0024001598358154,grad_norm: 0.9774804589464937, iteration: 253622
loss: 0.9521613121032715,grad_norm: 0.9426975697372953, iteration: 253623
loss: 1.0153529644012451,grad_norm: 0.8274622038044093, iteration: 253624
loss: 1.0290287733078003,grad_norm: 0.8669772586852864, iteration: 253625
loss: 0.9878082275390625,grad_norm: 0.9668469564407279, iteration: 253626
loss: 1.0434067249298096,grad_norm: 0.9705705166627995, iteration: 253627
loss: 0.9804214835166931,grad_norm: 0.9999991305010955, iteration: 253628
loss: 1.007063388824463,grad_norm: 0.9999992414748514, iteration: 253629
loss: 0.9860532879829407,grad_norm: 0.9999991714474271, iteration: 253630
loss: 1.0000015497207642,grad_norm: 0.9313801108802091, iteration: 253631
loss: 1.0018616914749146,grad_norm: 0.8583185363642613, iteration: 253632
loss: 1.0033454895019531,grad_norm: 0.8037822941709966, iteration: 253633
loss: 0.9773156642913818,grad_norm: 0.8090490793243159, iteration: 253634
loss: 1.0409362316131592,grad_norm: 0.9999991585394217, iteration: 253635
loss: 1.006883978843689,grad_norm: 0.9999993209816844, iteration: 253636
loss: 1.0076192617416382,grad_norm: 0.8156648855744465, iteration: 253637
loss: 1.0680618286132812,grad_norm: 0.9476437179129651, iteration: 253638
loss: 1.0115066766738892,grad_norm: 0.8752696311508157, iteration: 253639
loss: 0.9670364260673523,grad_norm: 0.9999990592832371, iteration: 253640
loss: 1.0316450595855713,grad_norm: 0.9999996932639738, iteration: 253641
loss: 0.9901453256607056,grad_norm: 0.9478326202756454, iteration: 253642
loss: 1.0198363065719604,grad_norm: 0.9039325161835632, iteration: 253643
loss: 0.990212082862854,grad_norm: 0.8112779120685207, iteration: 253644
loss: 1.0109320878982544,grad_norm: 0.9999993618639902, iteration: 253645
loss: 0.9870566129684448,grad_norm: 0.9999992663905929, iteration: 253646
loss: 0.9906259775161743,grad_norm: 0.8793198231337593, iteration: 253647
loss: 1.0218786001205444,grad_norm: 0.9375868625933901, iteration: 253648
loss: 0.9767739772796631,grad_norm: 0.9999990819127293, iteration: 253649
loss: 0.9890089631080627,grad_norm: 0.9999991514866827, iteration: 253650
loss: 0.9985288977622986,grad_norm: 0.9611125828949594, iteration: 253651
loss: 0.9792684316635132,grad_norm: 0.9503398428950747, iteration: 253652
loss: 1.0230839252471924,grad_norm: 0.9111634338376846, iteration: 253653
loss: 1.0073177814483643,grad_norm: 0.8249649767862376, iteration: 253654
loss: 1.007441759109497,grad_norm: 0.9999989579775913, iteration: 253655
loss: 0.9738104939460754,grad_norm: 0.9594053623524249, iteration: 253656
loss: 0.9964693188667297,grad_norm: 0.8790124696951976, iteration: 253657
loss: 1.00154447555542,grad_norm: 0.8408108032233104, iteration: 253658
loss: 0.9704241156578064,grad_norm: 0.999999003147476, iteration: 253659
loss: 1.004775881767273,grad_norm: 0.9999991818885848, iteration: 253660
loss: 0.9920611381530762,grad_norm: 0.8428968149504579, iteration: 253661
loss: 1.0228804349899292,grad_norm: 0.9029821219954107, iteration: 253662
loss: 0.9901823401451111,grad_norm: 0.8635229651588113, iteration: 253663
loss: 0.954383909702301,grad_norm: 0.9999999358522271, iteration: 253664
loss: 1.0149365663528442,grad_norm: 0.9002709327826431, iteration: 253665
loss: 1.0512754917144775,grad_norm: 0.9999998084042612, iteration: 253666
loss: 1.0089157819747925,grad_norm: 0.7642229450998437, iteration: 253667
loss: 0.9629347920417786,grad_norm: 0.9999989983918531, iteration: 253668
loss: 0.984413743019104,grad_norm: 0.7892795749173283, iteration: 253669
loss: 0.9852930307388306,grad_norm: 0.940784217135085, iteration: 253670
loss: 1.0142089128494263,grad_norm: 0.9999991174003269, iteration: 253671
loss: 1.0236101150512695,grad_norm: 0.9999991066881981, iteration: 253672
loss: 0.9920179843902588,grad_norm: 0.9583156781625826, iteration: 253673
loss: 0.9678634405136108,grad_norm: 0.8232792385664676, iteration: 253674
loss: 1.0130382776260376,grad_norm: 0.9424266521735526, iteration: 253675
loss: 0.9556993246078491,grad_norm: 0.9999991539111538, iteration: 253676
loss: 0.9909224510192871,grad_norm: 0.798779062708938, iteration: 253677
loss: 0.9648323655128479,grad_norm: 0.9042638615092404, iteration: 253678
loss: 1.0147900581359863,grad_norm: 0.9999990206386771, iteration: 253679
loss: 1.0167433023452759,grad_norm: 0.8305450465201634, iteration: 253680
loss: 1.0783268213272095,grad_norm: 0.9999995633491255, iteration: 253681
loss: 1.0162396430969238,grad_norm: 0.8937874127848953, iteration: 253682
loss: 1.0174705982208252,grad_norm: 0.8658348327149086, iteration: 253683
loss: 1.0103588104248047,grad_norm: 0.9720171619410666, iteration: 253684
loss: 1.0218254327774048,grad_norm: 0.9999994636464851, iteration: 253685
loss: 0.9935101270675659,grad_norm: 0.9681685649174265, iteration: 253686
loss: 0.9905003309249878,grad_norm: 0.7294776273450251, iteration: 253687
loss: 1.0172191858291626,grad_norm: 0.9999991075685569, iteration: 253688
loss: 1.0365395545959473,grad_norm: 0.8980511513000198, iteration: 253689
loss: 0.984596848487854,grad_norm: 0.9155330766485249, iteration: 253690
loss: 0.9829412698745728,grad_norm: 0.8909466942514261, iteration: 253691
loss: 1.005867600440979,grad_norm: 0.9250516541542557, iteration: 253692
loss: 1.0355539321899414,grad_norm: 0.8609262016303253, iteration: 253693
loss: 0.9722648859024048,grad_norm: 0.9124666770807596, iteration: 253694
loss: 0.9895229935646057,grad_norm: 0.9999991403753721, iteration: 253695
loss: 0.9987573027610779,grad_norm: 0.9311683714449441, iteration: 253696
loss: 1.0250847339630127,grad_norm: 0.983900454652893, iteration: 253697
loss: 0.9955973029136658,grad_norm: 0.875590122561772, iteration: 253698
loss: 0.993357241153717,grad_norm: 0.9589546990694652, iteration: 253699
loss: 0.9579951763153076,grad_norm: 0.9465395284486514, iteration: 253700
loss: 1.0191913843154907,grad_norm: 0.7535557699151315, iteration: 253701
loss: 1.013685941696167,grad_norm: 0.9605283413250405, iteration: 253702
loss: 0.9842060804367065,grad_norm: 0.886314154393837, iteration: 253703
loss: 1.0348927974700928,grad_norm: 0.9999993624191185, iteration: 253704
loss: 0.997255265712738,grad_norm: 0.9999991027297201, iteration: 253705
loss: 1.0079389810562134,grad_norm: 0.9418361534131604, iteration: 253706
loss: 0.9950886368751526,grad_norm: 0.9646806557751472, iteration: 253707
loss: 1.0091129541397095,grad_norm: 0.8135133267805869, iteration: 253708
loss: 0.995120644569397,grad_norm: 0.9558606332678404, iteration: 253709
loss: 0.9802731275558472,grad_norm: 0.8199508102625465, iteration: 253710
loss: 0.9997929930686951,grad_norm: 0.9042393186020934, iteration: 253711
loss: 0.984001874923706,grad_norm: 0.8144063402581084, iteration: 253712
loss: 0.9967650175094604,grad_norm: 0.9815918399132401, iteration: 253713
loss: 1.0114185810089111,grad_norm: 0.8603800566673647, iteration: 253714
loss: 0.9843257069587708,grad_norm: 0.9999989631941111, iteration: 253715
loss: 0.9974725842475891,grad_norm: 0.9628307178236053, iteration: 253716
loss: 1.00523042678833,grad_norm: 0.9197345245720379, iteration: 253717
loss: 0.980492353439331,grad_norm: 0.9999992169694406, iteration: 253718
loss: 0.9850874543190002,grad_norm: 0.9999990123482695, iteration: 253719
loss: 0.9982215762138367,grad_norm: 0.9794593786262538, iteration: 253720
loss: 1.0175285339355469,grad_norm: 0.7825850055739894, iteration: 253721
loss: 1.0091444253921509,grad_norm: 0.9777246374462836, iteration: 253722
loss: 0.9975634813308716,grad_norm: 0.9999994019391959, iteration: 253723
loss: 1.0455031394958496,grad_norm: 0.8550571121311538, iteration: 253724
loss: 0.9934837222099304,grad_norm: 0.8737929962495826, iteration: 253725
loss: 0.9750585556030273,grad_norm: 0.8062089486205424, iteration: 253726
loss: 1.0074492692947388,grad_norm: 0.809903159062086, iteration: 253727
loss: 0.9637336730957031,grad_norm: 0.8948526044916495, iteration: 253728
loss: 1.1337357759475708,grad_norm: 0.9999996996284788, iteration: 253729
loss: 0.9785122275352478,grad_norm: 0.8679696534326303, iteration: 253730
loss: 0.9750897288322449,grad_norm: 0.9998731251839313, iteration: 253731
loss: 1.0026776790618896,grad_norm: 0.798828258468451, iteration: 253732
loss: 1.0005710124969482,grad_norm: 0.8849659156884261, iteration: 253733
loss: 1.0088846683502197,grad_norm: 0.9019952503698211, iteration: 253734
loss: 1.039924144744873,grad_norm: 0.9999990250230912, iteration: 253735
loss: 0.99831622838974,grad_norm: 0.8421702763285038, iteration: 253736
loss: 1.0231438875198364,grad_norm: 0.9999992641639113, iteration: 253737
loss: 0.9886975884437561,grad_norm: 0.8123452997310493, iteration: 253738
loss: 0.9681315422058105,grad_norm: 0.9999990866169153, iteration: 253739
loss: 0.955553412437439,grad_norm: 0.9919953356027084, iteration: 253740
loss: 0.9658322930335999,grad_norm: 0.9731523680272134, iteration: 253741
loss: 0.9939711093902588,grad_norm: 0.9999994490501715, iteration: 253742
loss: 0.9953835606575012,grad_norm: 0.8542780931103143, iteration: 253743
loss: 1.03958261013031,grad_norm: 0.9999999624300808, iteration: 253744
loss: 0.9763005375862122,grad_norm: 0.904350645316837, iteration: 253745
loss: 0.9862926602363586,grad_norm: 0.8562540835072538, iteration: 253746
loss: 0.9783852100372314,grad_norm: 0.961606084776233, iteration: 253747
loss: 0.98196941614151,grad_norm: 0.889671078831939, iteration: 253748
loss: 0.9687250256538391,grad_norm: 0.799889540743946, iteration: 253749
loss: 0.997188150882721,grad_norm: 0.8792821320097937, iteration: 253750
loss: 1.0326908826828003,grad_norm: 0.84714518957608, iteration: 253751
loss: 1.0149078369140625,grad_norm: 0.7573806334949574, iteration: 253752
loss: 0.9956371188163757,grad_norm: 0.952275514410376, iteration: 253753
loss: 0.9974616169929504,grad_norm: 0.7332150892097619, iteration: 253754
loss: 1.0054500102996826,grad_norm: 0.9999990511584105, iteration: 253755
loss: 0.9952305555343628,grad_norm: 0.8532251516308722, iteration: 253756
loss: 1.0033864974975586,grad_norm: 0.888872493579105, iteration: 253757
loss: 1.0011013746261597,grad_norm: 0.8504511432988827, iteration: 253758
loss: 1.0958935022354126,grad_norm: 0.9999991647852898, iteration: 253759
loss: 1.0063685178756714,grad_norm: 0.9582463823511, iteration: 253760
loss: 0.9813188314437866,grad_norm: 0.8240367983617713, iteration: 253761
loss: 1.0172995328903198,grad_norm: 0.999999139022491, iteration: 253762
loss: 1.0211174488067627,grad_norm: 0.9658095652165631, iteration: 253763
loss: 0.9903841614723206,grad_norm: 0.9999991993647641, iteration: 253764
loss: 1.0181678533554077,grad_norm: 0.8418965429825127, iteration: 253765
loss: 0.9880101084709167,grad_norm: 0.8641734333617644, iteration: 253766
loss: 1.0056430101394653,grad_norm: 0.842867214688764, iteration: 253767
loss: 0.9911179542541504,grad_norm: 0.8645298508507749, iteration: 253768
loss: 0.9801168441772461,grad_norm: 0.9999990696671832, iteration: 253769
loss: 1.0423922538757324,grad_norm: 0.8804594929052812, iteration: 253770
loss: 0.9813445806503296,grad_norm: 0.9527539657380637, iteration: 253771
loss: 0.9781327843666077,grad_norm: 0.8503216274133599, iteration: 253772
loss: 1.0099207162857056,grad_norm: 0.8943362938857042, iteration: 253773
loss: 0.9842904806137085,grad_norm: 0.8359017263153975, iteration: 253774
loss: 0.9710254073143005,grad_norm: 0.9447131107038214, iteration: 253775
loss: 0.9893189668655396,grad_norm: 0.8683333254632196, iteration: 253776
loss: 1.0908180475234985,grad_norm: 0.8397028746470523, iteration: 253777
loss: 1.0091153383255005,grad_norm: 0.7811009226679605, iteration: 253778
loss: 1.033468246459961,grad_norm: 0.9999991436167132, iteration: 253779
loss: 0.9748393297195435,grad_norm: 0.8996930446053578, iteration: 253780
loss: 1.0222774744033813,grad_norm: 0.828012916445964, iteration: 253781
loss: 1.0906085968017578,grad_norm: 0.8441969216870217, iteration: 253782
loss: 1.0980385541915894,grad_norm: 0.9999990255670882, iteration: 253783
loss: 1.0256229639053345,grad_norm: 0.999999226328167, iteration: 253784
loss: 1.0381649732589722,grad_norm: 0.9999994740312038, iteration: 253785
loss: 0.9672767519950867,grad_norm: 0.9582081039692915, iteration: 253786
loss: 1.0339596271514893,grad_norm: 0.9512163229138065, iteration: 253787
loss: 0.9653763175010681,grad_norm: 0.8543312428791201, iteration: 253788
loss: 0.9787256121635437,grad_norm: 0.9153253119752736, iteration: 253789
loss: 0.9911016225814819,grad_norm: 0.9930124078080004, iteration: 253790
loss: 1.004751443862915,grad_norm: 0.8576640290633641, iteration: 253791
loss: 0.989893913269043,grad_norm: 0.7301172282313857, iteration: 253792
loss: 0.9675301909446716,grad_norm: 0.8933703534774491, iteration: 253793
loss: 1.0229300260543823,grad_norm: 0.9239114964994191, iteration: 253794
loss: 0.9850175380706787,grad_norm: 0.9999991731601121, iteration: 253795
loss: 0.9962508082389832,grad_norm: 0.9999991281361857, iteration: 253796
loss: 0.9773322939872742,grad_norm: 0.9506804721262513, iteration: 253797
loss: 0.983727753162384,grad_norm: 0.8092671146185912, iteration: 253798
loss: 1.0131206512451172,grad_norm: 0.9999990879443871, iteration: 253799
loss: 1.0002968311309814,grad_norm: 0.9634937905869003, iteration: 253800
loss: 0.9862406253814697,grad_norm: 0.8085749623390844, iteration: 253801
loss: 1.011741042137146,grad_norm: 0.9999989992254853, iteration: 253802
loss: 1.012985348701477,grad_norm: 0.8033340470895055, iteration: 253803
loss: 0.9760279655456543,grad_norm: 0.9731673257644385, iteration: 253804
loss: 0.9471822381019592,grad_norm: 0.8934685730681405, iteration: 253805
loss: 1.0005699396133423,grad_norm: 0.9071765951214941, iteration: 253806
loss: 1.013522982597351,grad_norm: 0.9999990599040675, iteration: 253807
loss: 1.022101640701294,grad_norm: 0.9999995572759348, iteration: 253808
loss: 0.9850964546203613,grad_norm: 0.8782549254948869, iteration: 253809
loss: 0.9866278767585754,grad_norm: 0.8150074561927363, iteration: 253810
loss: 1.0451432466506958,grad_norm: 0.9342355537992725, iteration: 253811
loss: 0.9944563508033752,grad_norm: 0.8433000969157327, iteration: 253812
loss: 0.9861308336257935,grad_norm: 0.7567286757033783, iteration: 253813
loss: 1.0064843893051147,grad_norm: 0.7436696108058871, iteration: 253814
loss: 0.9881787896156311,grad_norm: 0.7212463420446155, iteration: 253815
loss: 0.9990535974502563,grad_norm: 0.9435029030643562, iteration: 253816
loss: 0.9954994916915894,grad_norm: 0.8737086764628111, iteration: 253817
loss: 1.0862181186676025,grad_norm: 0.9999994069992071, iteration: 253818
loss: 1.0038622617721558,grad_norm: 0.8283381824666558, iteration: 253819
loss: 1.0009238719940186,grad_norm: 0.8749824347289902, iteration: 253820
loss: 0.9982017278671265,grad_norm: 0.90446074648036, iteration: 253821
loss: 0.9925282001495361,grad_norm: 0.6878664691385251, iteration: 253822
loss: 1.022623062133789,grad_norm: 0.8748083653694033, iteration: 253823
loss: 1.3602194786071777,grad_norm: 0.9318021637969235, iteration: 253824
loss: 0.9987972974777222,grad_norm: 0.9958357131194653, iteration: 253825
loss: 1.083537220954895,grad_norm: 0.9999991913861459, iteration: 253826
loss: 1.098270297050476,grad_norm: 0.9999993099571683, iteration: 253827
loss: 1.1463369131088257,grad_norm: 0.9999989255803311, iteration: 253828
loss: 1.1861929893493652,grad_norm: 0.9999991029992894, iteration: 253829
loss: 1.0370416641235352,grad_norm: 0.9999991000132957, iteration: 253830
loss: 1.2254204750061035,grad_norm: 0.9999992850591973, iteration: 253831
loss: 1.141878366470337,grad_norm: 0.9999994260647268, iteration: 253832
loss: 1.044333815574646,grad_norm: 0.9999994340888337, iteration: 253833
loss: 1.0902377367019653,grad_norm: 0.8841536490386962, iteration: 253834
loss: 1.0022252798080444,grad_norm: 0.8449651421915814, iteration: 253835
loss: 1.0212002992630005,grad_norm: 0.9757383677717337, iteration: 253836
loss: 0.9937975406646729,grad_norm: 0.9999992040050998, iteration: 253837
loss: 1.0422719717025757,grad_norm: 1.0000000205344284, iteration: 253838
loss: 1.0316855907440186,grad_norm: 0.9999989983754756, iteration: 253839
loss: 0.9977564811706543,grad_norm: 0.959070338713833, iteration: 253840
loss: 1.082856297492981,grad_norm: 0.9999999005377784, iteration: 253841
loss: 1.0153520107269287,grad_norm: 0.9591304215632431, iteration: 253842
loss: 1.000017762184143,grad_norm: 0.8998433776766722, iteration: 253843
loss: 1.0183687210083008,grad_norm: 0.8423385438366058, iteration: 253844
loss: 1.0298433303833008,grad_norm: 0.999999445211145, iteration: 253845
loss: 1.0054837465286255,grad_norm: 0.999999200990319, iteration: 253846
loss: 0.9771574139595032,grad_norm: 0.8740166118660324, iteration: 253847
loss: 1.1316958665847778,grad_norm: 0.9999993608122693, iteration: 253848
loss: 0.9738640189170837,grad_norm: 0.8832055329027136, iteration: 253849
loss: 0.9992875456809998,grad_norm: 0.8190522387813183, iteration: 253850
loss: 0.9926030039787292,grad_norm: 0.8502194961575587, iteration: 253851
loss: 1.0074735879898071,grad_norm: 0.9999992638787227, iteration: 253852
loss: 1.0514389276504517,grad_norm: 0.9999990611662524, iteration: 253853
loss: 1.0187443494796753,grad_norm: 0.9999999174131697, iteration: 253854
loss: 0.9951490163803101,grad_norm: 0.9999993458631361, iteration: 253855
loss: 0.9935792684555054,grad_norm: 0.8821169955314219, iteration: 253856
loss: 1.0027915239334106,grad_norm: 0.9999991374260089, iteration: 253857
loss: 1.0056381225585938,grad_norm: 0.9861126223915747, iteration: 253858
loss: 0.998257040977478,grad_norm: 0.9999992053811474, iteration: 253859
loss: 1.0564370155334473,grad_norm: 0.9999997996341594, iteration: 253860
loss: 0.9991844296455383,grad_norm: 0.8587051760686024, iteration: 253861
loss: 1.0199048519134521,grad_norm: 0.7507427993887356, iteration: 253862
loss: 0.9998033046722412,grad_norm: 0.8171427314272143, iteration: 253863
loss: 0.9967440962791443,grad_norm: 0.8153561780962223, iteration: 253864
loss: 0.9869697093963623,grad_norm: 0.9857184700268333, iteration: 253865
loss: 1.0601401329040527,grad_norm: 0.9999995303422737, iteration: 253866
loss: 1.0136196613311768,grad_norm: 0.8925633945972843, iteration: 253867
loss: 1.014570713043213,grad_norm: 0.8010601257043439, iteration: 253868
loss: 0.9849039316177368,grad_norm: 0.9826537898963817, iteration: 253869
loss: 1.0317615270614624,grad_norm: 0.8216382433888598, iteration: 253870
loss: 1.0077141523361206,grad_norm: 0.999998981050232, iteration: 253871
loss: 1.0251766443252563,grad_norm: 0.8043993327739196, iteration: 253872
loss: 1.010390281677246,grad_norm: 0.9999996218061534, iteration: 253873
loss: 1.0149388313293457,grad_norm: 0.9807013844381628, iteration: 253874
loss: 1.0004767179489136,grad_norm: 0.9999992552135296, iteration: 253875
loss: 1.0544456243515015,grad_norm: 0.9580686702741197, iteration: 253876
loss: 1.0708781480789185,grad_norm: 0.8456621104179459, iteration: 253877
loss: 1.1578048467636108,grad_norm: 0.9999995397855217, iteration: 253878
loss: 1.1087398529052734,grad_norm: 0.9999993467176826, iteration: 253879
loss: 1.0598182678222656,grad_norm: 0.9999992583247905, iteration: 253880
loss: 1.0146825313568115,grad_norm: 0.9999990547920142, iteration: 253881
loss: 1.0095914602279663,grad_norm: 0.8742659401771821, iteration: 253882
loss: 1.0357229709625244,grad_norm: 0.9999997215660054, iteration: 253883
loss: 1.0319972038269043,grad_norm: 0.9284622148011289, iteration: 253884
loss: 1.0216854810714722,grad_norm: 0.9196028989486403, iteration: 253885
loss: 1.0068706274032593,grad_norm: 0.968916405694959, iteration: 253886
loss: 1.026530385017395,grad_norm: 0.9999990331181733, iteration: 253887
loss: 0.9889001846313477,grad_norm: 0.9028720765878652, iteration: 253888
loss: 0.9973083734512329,grad_norm: 0.9999996773591102, iteration: 253889
loss: 0.9883090257644653,grad_norm: 0.8262152282983185, iteration: 253890
loss: 1.062101125717163,grad_norm: 0.999999355421232, iteration: 253891
loss: 1.0035936832427979,grad_norm: 0.9400488108820098, iteration: 253892
loss: 1.0313758850097656,grad_norm: 0.9999998535035937, iteration: 253893
loss: 0.9670596718788147,grad_norm: 0.7579175471118795, iteration: 253894
loss: 1.0158683061599731,grad_norm: 0.9063232537944101, iteration: 253895
loss: 1.052019715309143,grad_norm: 0.9999998284729762, iteration: 253896
loss: 1.0279104709625244,grad_norm: 0.8530500385348675, iteration: 253897
loss: 1.0166616439819336,grad_norm: 0.9234825951842123, iteration: 253898
loss: 1.0095516443252563,grad_norm: 0.9999990838211731, iteration: 253899
loss: 0.9741642475128174,grad_norm: 0.9834534443072481, iteration: 253900
loss: 1.010258674621582,grad_norm: 0.9428649441618746, iteration: 253901
loss: 0.981448769569397,grad_norm: 0.8182171578426788, iteration: 253902
loss: 1.1084680557250977,grad_norm: 0.999999870100093, iteration: 253903
loss: 1.079052209854126,grad_norm: 0.999999798646229, iteration: 253904
loss: 0.9725715517997742,grad_norm: 0.9836608955863533, iteration: 253905
loss: 0.9902567267417908,grad_norm: 0.773926624280312, iteration: 253906
loss: 1.026839017868042,grad_norm: 0.9175500050181669, iteration: 253907
loss: 1.0201386213302612,grad_norm: 0.9999990594825521, iteration: 253908
loss: 1.084905743598938,grad_norm: 0.9999991774276347, iteration: 253909
loss: 1.0952646732330322,grad_norm: 0.9062855225484452, iteration: 253910
loss: 1.0147650241851807,grad_norm: 0.9524911571625724, iteration: 253911
loss: 0.9766352772712708,grad_norm: 0.9999990518762948, iteration: 253912
loss: 1.0033875703811646,grad_norm: 0.801867594111916, iteration: 253913
loss: 1.0303518772125244,grad_norm: 0.8013598246052593, iteration: 253914
loss: 1.0243983268737793,grad_norm: 0.9999994137561823, iteration: 253915
loss: 0.9831938147544861,grad_norm: 0.9022999397375479, iteration: 253916
loss: 1.100456714630127,grad_norm: 0.9999994493572882, iteration: 253917
loss: 1.0396840572357178,grad_norm: 0.9999997399470338, iteration: 253918
loss: 0.97244793176651,grad_norm: 0.8141331067386663, iteration: 253919
loss: 0.9782074093818665,grad_norm: 0.9999995683329382, iteration: 253920
loss: 1.0161486864089966,grad_norm: 0.9999997762676832, iteration: 253921
loss: 0.9826869368553162,grad_norm: 0.9270078436856019, iteration: 253922
loss: 1.023779034614563,grad_norm: 0.9999992731799063, iteration: 253923
loss: 1.0319441556930542,grad_norm: 0.9999993513835013, iteration: 253924
loss: 1.0351388454437256,grad_norm: 0.9057437273112886, iteration: 253925
loss: 1.1091163158416748,grad_norm: 0.9999991488619742, iteration: 253926
loss: 1.0592492818832397,grad_norm: 0.9999992305903276, iteration: 253927
loss: 0.9815562963485718,grad_norm: 0.9158196934172999, iteration: 253928
loss: 0.9968938231468201,grad_norm: 0.9322788830956897, iteration: 253929
loss: 1.007164478302002,grad_norm: 0.9999993951344385, iteration: 253930
loss: 1.034549355506897,grad_norm: 0.9999994386419763, iteration: 253931
loss: 1.0290111303329468,grad_norm: 0.8809465095048236, iteration: 253932
loss: 1.0217307806015015,grad_norm: 0.9601820595448682, iteration: 253933
loss: 0.9907591342926025,grad_norm: 0.8419147075977709, iteration: 253934
loss: 0.9746705889701843,grad_norm: 0.9999989988033163, iteration: 253935
loss: 1.0865226984024048,grad_norm: 0.9999991988306748, iteration: 253936
loss: 1.0354712009429932,grad_norm: 0.926607365010878, iteration: 253937
loss: 1.0815728902816772,grad_norm: 0.99999987371362, iteration: 253938
loss: 1.0207390785217285,grad_norm: 0.8068287409384148, iteration: 253939
loss: 0.9888792634010315,grad_norm: 0.82735398975104, iteration: 253940
loss: 0.9552285671234131,grad_norm: 0.9163118755231475, iteration: 253941
loss: 1.043218731880188,grad_norm: 0.9058363203886342, iteration: 253942
loss: 0.9725348353385925,grad_norm: 0.918561733421595, iteration: 253943
loss: 0.987591028213501,grad_norm: 0.8896359506204019, iteration: 253944
loss: 0.9833781123161316,grad_norm: 0.8873834076643498, iteration: 253945
loss: 0.982742965221405,grad_norm: 0.9481189373915153, iteration: 253946
loss: 0.9866406917572021,grad_norm: 0.9792211374237324, iteration: 253947
loss: 0.9773190021514893,grad_norm: 0.8099437579624994, iteration: 253948
loss: 1.0078306198120117,grad_norm: 0.7988618087849069, iteration: 253949
loss: 1.0038018226623535,grad_norm: 0.8309696327221712, iteration: 253950
loss: 1.0183039903640747,grad_norm: 0.9843658139137677, iteration: 253951
loss: 1.0199165344238281,grad_norm: 0.9627772373811628, iteration: 253952
loss: 1.0003178119659424,grad_norm: 0.9315126387805619, iteration: 253953
loss: 1.0278706550598145,grad_norm: 0.9999998189465297, iteration: 253954
loss: 1.0510032176971436,grad_norm: 0.9999993017182364, iteration: 253955
loss: 1.088916540145874,grad_norm: 0.9999990957372604, iteration: 253956
loss: 0.9862489700317383,grad_norm: 0.9805044516066422, iteration: 253957
loss: 1.0185333490371704,grad_norm: 0.7916839831977636, iteration: 253958
loss: 1.0512548685073853,grad_norm: 0.9999995616351731, iteration: 253959
loss: 1.0148481130599976,grad_norm: 0.9999991068327998, iteration: 253960
loss: 1.0292080640792847,grad_norm: 0.9999997019989302, iteration: 253961
loss: 1.018033504486084,grad_norm: 0.9999992460019163, iteration: 253962
loss: 1.0029157400131226,grad_norm: 0.8443833497313851, iteration: 253963
loss: 0.981408417224884,grad_norm: 0.9296760305618568, iteration: 253964
loss: 1.016276478767395,grad_norm: 0.9675242737616511, iteration: 253965
loss: 1.0060688257217407,grad_norm: 0.9044810810722798, iteration: 253966
loss: 0.9820897579193115,grad_norm: 0.9739058627205286, iteration: 253967
loss: 0.9810327291488647,grad_norm: 0.9999991319113208, iteration: 253968
loss: 0.9989449977874756,grad_norm: 0.9999993088696878, iteration: 253969
loss: 0.9933628439903259,grad_norm: 0.9404363057320514, iteration: 253970
loss: 1.0026625394821167,grad_norm: 0.8564166328486246, iteration: 253971
loss: 0.9995508790016174,grad_norm: 0.8026694898029345, iteration: 253972
loss: 1.028072476387024,grad_norm: 0.8331174252047859, iteration: 253973
loss: 1.003113865852356,grad_norm: 0.8490720391395403, iteration: 253974
loss: 0.9826890826225281,grad_norm: 0.9620589757245546, iteration: 253975
loss: 1.005431890487671,grad_norm: 0.8891042254907137, iteration: 253976
loss: 1.0859713554382324,grad_norm: 0.9141209263078831, iteration: 253977
loss: 1.0284724235534668,grad_norm: 0.9999990058045001, iteration: 253978
loss: 1.0110710859298706,grad_norm: 0.8954856730811244, iteration: 253979
loss: 1.0154354572296143,grad_norm: 0.9999990394130944, iteration: 253980
loss: 1.0722055435180664,grad_norm: 0.999999206067989, iteration: 253981
loss: 1.0375165939331055,grad_norm: 0.9999992624009914, iteration: 253982
loss: 1.0505667924880981,grad_norm: 0.9999990547650335, iteration: 253983
loss: 1.0793287754058838,grad_norm: 1.0000001050068916, iteration: 253984
loss: 1.0216902494430542,grad_norm: 0.8858278513400204, iteration: 253985
loss: 0.999732494354248,grad_norm: 0.8666029404492045, iteration: 253986
loss: 0.9880343675613403,grad_norm: 0.8436837880679231, iteration: 253987
loss: 1.0023859739303589,grad_norm: 0.897490011013129, iteration: 253988
loss: 0.9997202754020691,grad_norm: 0.7585424137701438, iteration: 253989
loss: 0.9809793829917908,grad_norm: 0.9999990717439321, iteration: 253990
loss: 1.048760175704956,grad_norm: 0.9241751101759055, iteration: 253991
loss: 0.9931811690330505,grad_norm: 0.9999997698067242, iteration: 253992
loss: 1.087223768234253,grad_norm: 0.8380666664371387, iteration: 253993
loss: 0.9935297966003418,grad_norm: 0.8477608779428114, iteration: 253994
loss: 1.0104484558105469,grad_norm: 0.9093150284871321, iteration: 253995
loss: 1.006468653678894,grad_norm: 0.7388476328155842, iteration: 253996
loss: 0.9968904852867126,grad_norm: 0.8885806206068212, iteration: 253997
loss: 1.0048969984054565,grad_norm: 0.9999990034552779, iteration: 253998
loss: 0.9939089417457581,grad_norm: 0.9219917793224198, iteration: 253999
loss: 0.9748420119285583,grad_norm: 0.8572864206987981, iteration: 254000
loss: 0.9536482691764832,grad_norm: 0.9999992178013742, iteration: 254001
loss: 0.9849894046783447,grad_norm: 0.9048105700025852, iteration: 254002
loss: 1.0762178897857666,grad_norm: 0.9999990873172965, iteration: 254003
loss: 1.0160603523254395,grad_norm: 0.9999991852606331, iteration: 254004
loss: 0.9738338589668274,grad_norm: 0.9768073942349473, iteration: 254005
loss: 0.9944993853569031,grad_norm: 0.9813394221879745, iteration: 254006
loss: 1.214429497718811,grad_norm: 0.9999994901588256, iteration: 254007
loss: 1.0291166305541992,grad_norm: 0.9999995169802657, iteration: 254008
loss: 1.0126596689224243,grad_norm: 0.9512191250467212, iteration: 254009
loss: 1.0210566520690918,grad_norm: 0.9663950263923595, iteration: 254010
loss: 1.0916250944137573,grad_norm: 0.999999429806981, iteration: 254011
loss: 0.9810463786125183,grad_norm: 0.8041305913879088, iteration: 254012
loss: 0.9959776401519775,grad_norm: 0.8204491818215913, iteration: 254013
loss: 1.0228543281555176,grad_norm: 0.9460636519264259, iteration: 254014
loss: 0.9917641282081604,grad_norm: 0.7980220877600177, iteration: 254015
loss: 1.0366865396499634,grad_norm: 0.8834161451422253, iteration: 254016
loss: 1.0571202039718628,grad_norm: 0.907125389024292, iteration: 254017
loss: 1.005752682685852,grad_norm: 0.9999990646646781, iteration: 254018
loss: 1.0167372226715088,grad_norm: 0.8647324892233196, iteration: 254019
loss: 0.9930828809738159,grad_norm: 0.8260475788349824, iteration: 254020
loss: 1.0293318033218384,grad_norm: 0.9999991842717448, iteration: 254021
loss: 1.0280956029891968,grad_norm: 0.7914470404892161, iteration: 254022
loss: 1.0054736137390137,grad_norm: 0.9999992465732153, iteration: 254023
loss: 1.0517569780349731,grad_norm: 0.9999999571005536, iteration: 254024
loss: 1.0004491806030273,grad_norm: 0.9477680585299275, iteration: 254025
loss: 0.9950092434883118,grad_norm: 0.9289740574463426, iteration: 254026
loss: 0.9981689453125,grad_norm: 0.8675238091069345, iteration: 254027
loss: 1.0043630599975586,grad_norm: 0.9046133084661733, iteration: 254028
loss: 1.1168147325515747,grad_norm: 0.9999992236902323, iteration: 254029
loss: 0.983722984790802,grad_norm: 0.9802253867384433, iteration: 254030
loss: 1.0249899625778198,grad_norm: 0.8840047819845623, iteration: 254031
loss: 1.058762788772583,grad_norm: 0.9999997275559954, iteration: 254032
loss: 1.3034653663635254,grad_norm: 1.000000015266657, iteration: 254033
loss: 1.0190483331680298,grad_norm: 0.9488702370341895, iteration: 254034
loss: 0.9999374747276306,grad_norm: 0.9999991723344981, iteration: 254035
loss: 1.0450650453567505,grad_norm: 0.7936474904398578, iteration: 254036
loss: 0.9892143607139587,grad_norm: 0.9428087887463439, iteration: 254037
loss: 1.0207922458648682,grad_norm: 0.9999991231923446, iteration: 254038
loss: 0.9969097375869751,grad_norm: 0.9342830590514939, iteration: 254039
loss: 0.9855837225914001,grad_norm: 0.9999991605911839, iteration: 254040
loss: 0.9937658905982971,grad_norm: 0.889751329145719, iteration: 254041
loss: 1.0902345180511475,grad_norm: 0.9999991328508205, iteration: 254042
loss: 1.0337131023406982,grad_norm: 0.8011634561078232, iteration: 254043
loss: 1.1011786460876465,grad_norm: 0.9342499140982361, iteration: 254044
loss: 1.0571434497833252,grad_norm: 0.9999994915286421, iteration: 254045
loss: 0.9676116108894348,grad_norm: 0.8577268572477246, iteration: 254046
loss: 1.0413620471954346,grad_norm: 0.99999903197768, iteration: 254047
loss: 1.0383424758911133,grad_norm: 0.8358550218772448, iteration: 254048
loss: 1.01461660861969,grad_norm: 0.8837327646425157, iteration: 254049
loss: 0.9963491559028625,grad_norm: 0.9865610444433014, iteration: 254050
loss: 1.1079652309417725,grad_norm: 0.999999257509414, iteration: 254051
loss: 1.1418471336364746,grad_norm: 0.9999992791081316, iteration: 254052
loss: 0.9953815937042236,grad_norm: 0.9999991030491243, iteration: 254053
loss: 1.0118545293807983,grad_norm: 0.8900494895705409, iteration: 254054
loss: 1.0096921920776367,grad_norm: 0.7100448275952934, iteration: 254055
loss: 1.039509654045105,grad_norm: 0.764964762709777, iteration: 254056
loss: 1.1428769826889038,grad_norm: 0.9999995600800091, iteration: 254057
loss: 0.9728898406028748,grad_norm: 0.999999106574158, iteration: 254058
loss: 1.0502508878707886,grad_norm: 0.9999991416840581, iteration: 254059
loss: 0.9690653085708618,grad_norm: 0.9893640120031525, iteration: 254060
loss: 0.9929938316345215,grad_norm: 0.8690655761357498, iteration: 254061
loss: 0.9949451684951782,grad_norm: 0.6957459428713486, iteration: 254062
loss: 1.0161787271499634,grad_norm: 0.9263971502881321, iteration: 254063
loss: 0.9853609204292297,grad_norm: 0.8237352146967798, iteration: 254064
loss: 1.0843968391418457,grad_norm: 0.9999992983237271, iteration: 254065
loss: 1.0998709201812744,grad_norm: 0.9839198622699403, iteration: 254066
loss: 1.0188853740692139,grad_norm: 0.994100709906282, iteration: 254067
loss: 1.031664252281189,grad_norm: 0.883314444516256, iteration: 254068
loss: 1.0203243494033813,grad_norm: 0.7450063186608464, iteration: 254069
loss: 1.0064493417739868,grad_norm: 0.8848426917193046, iteration: 254070
loss: 0.9911872744560242,grad_norm: 0.8043282648626723, iteration: 254071
loss: 0.9998528957366943,grad_norm: 0.8030023867292252, iteration: 254072
loss: 0.9820116758346558,grad_norm: 0.8793698361665883, iteration: 254073
loss: 1.007061243057251,grad_norm: 0.9999989533701351, iteration: 254074
loss: 0.9752070307731628,grad_norm: 0.9999992604788465, iteration: 254075
loss: 1.0492987632751465,grad_norm: 0.9570164825545214, iteration: 254076
loss: 1.0086681842803955,grad_norm: 0.7802167060376743, iteration: 254077
loss: 1.0191441774368286,grad_norm: 0.9999993060556903, iteration: 254078
loss: 1.0788410902023315,grad_norm: 0.9999997404725885, iteration: 254079
loss: 1.0781880617141724,grad_norm: 0.9999997454812094, iteration: 254080
loss: 1.0016753673553467,grad_norm: 0.9999991378013323, iteration: 254081
loss: 1.1174845695495605,grad_norm: 0.9999992925764266, iteration: 254082
loss: 0.9630411267280579,grad_norm: 0.9999992353740249, iteration: 254083
loss: 1.0225505828857422,grad_norm: 0.999999096660043, iteration: 254084
loss: 1.0994048118591309,grad_norm: 0.9999999883854823, iteration: 254085
loss: 1.0282061100006104,grad_norm: 0.8437397393224223, iteration: 254086
loss: 0.9796485900878906,grad_norm: 0.9999992717035635, iteration: 254087
loss: 0.9876136183738708,grad_norm: 0.9999999967828557, iteration: 254088
loss: 1.064778447151184,grad_norm: 0.9999994794192038, iteration: 254089
loss: 1.0298488140106201,grad_norm: 0.9999992134575064, iteration: 254090
loss: 1.0817817449569702,grad_norm: 0.9999994341181526, iteration: 254091
loss: 0.9921601414680481,grad_norm: 0.9999994281434814, iteration: 254092
loss: 1.018481731414795,grad_norm: 0.8518571113917826, iteration: 254093
loss: 1.0990244150161743,grad_norm: 0.8700596555752189, iteration: 254094
loss: 1.1148133277893066,grad_norm: 1.0000000348949813, iteration: 254095
loss: 1.0125430822372437,grad_norm: 0.9908125602008028, iteration: 254096
loss: 1.0938994884490967,grad_norm: 0.9999998488350179, iteration: 254097
loss: 0.9632989764213562,grad_norm: 0.9913025614453868, iteration: 254098
loss: 0.9777457118034363,grad_norm: 0.82273780074187, iteration: 254099
loss: 1.2127634286880493,grad_norm: 0.9999999926924719, iteration: 254100
loss: 0.9732290506362915,grad_norm: 0.8984695667320814, iteration: 254101
loss: 0.9874919056892395,grad_norm: 0.7548189254162446, iteration: 254102
loss: 1.0045751333236694,grad_norm: 0.9590464432911704, iteration: 254103
loss: 1.0711560249328613,grad_norm: 0.9999992868964239, iteration: 254104
loss: 0.9764404296875,grad_norm: 0.9999991980369979, iteration: 254105
loss: 0.9981573224067688,grad_norm: 0.9331377025396966, iteration: 254106
loss: 0.9980530142784119,grad_norm: 0.9167936861480959, iteration: 254107
loss: 0.9850480556488037,grad_norm: 0.9999990836706618, iteration: 254108
loss: 0.9818007946014404,grad_norm: 0.9091801266025163, iteration: 254109
loss: 1.0123568773269653,grad_norm: 0.9999991226005075, iteration: 254110
loss: 1.0044845342636108,grad_norm: 0.9999992587733717, iteration: 254111
loss: 1.0091941356658936,grad_norm: 0.872339550360351, iteration: 254112
loss: 1.001665472984314,grad_norm: 0.9242080538341731, iteration: 254113
loss: 0.9992138147354126,grad_norm: 0.9999992435126406, iteration: 254114
loss: 1.0157818794250488,grad_norm: 0.8538453321486593, iteration: 254115
loss: 1.0135618448257446,grad_norm: 0.999999472683805, iteration: 254116
loss: 1.0522260665893555,grad_norm: 0.871038222336398, iteration: 254117
loss: 0.9927026033401489,grad_norm: 0.9999990294598584, iteration: 254118
loss: 0.9925797581672668,grad_norm: 0.9999991465925255, iteration: 254119
loss: 0.9959551692008972,grad_norm: 0.9440873623989254, iteration: 254120
loss: 0.9777860045433044,grad_norm: 0.9999994582431346, iteration: 254121
loss: 0.9625124335289001,grad_norm: 0.9999995913665191, iteration: 254122
loss: 0.9739632606506348,grad_norm: 0.9074909892560412, iteration: 254123
loss: 1.0311976671218872,grad_norm: 0.9395148153341464, iteration: 254124
loss: 1.0437194108963013,grad_norm: 0.9999992698242882, iteration: 254125
loss: 1.0413891077041626,grad_norm: 0.9192787166251543, iteration: 254126
loss: 1.0278936624526978,grad_norm: 0.8674745073165688, iteration: 254127
loss: 0.9793128371238708,grad_norm: 0.85516119911847, iteration: 254128
loss: 1.0268603563308716,grad_norm: 0.9999989887200501, iteration: 254129
loss: 0.995524525642395,grad_norm: 0.9999997226281258, iteration: 254130
loss: 0.9782354831695557,grad_norm: 0.8527066674875475, iteration: 254131
loss: 0.9888396263122559,grad_norm: 0.9999990137884043, iteration: 254132
loss: 1.0033234357833862,grad_norm: 0.9491197938772614, iteration: 254133
loss: 0.9875032901763916,grad_norm: 0.9999990344005143, iteration: 254134
loss: 0.9800595045089722,grad_norm: 0.8969986824703722, iteration: 254135
loss: 1.0189547538757324,grad_norm: 0.9999995017035352, iteration: 254136
loss: 1.000091314315796,grad_norm: 0.9842898027042261, iteration: 254137
loss: 1.0229018926620483,grad_norm: 0.8577811803501608, iteration: 254138
loss: 1.005320429801941,grad_norm: 0.895757252600396, iteration: 254139
loss: 1.0524957180023193,grad_norm: 0.9999997558166055, iteration: 254140
loss: 0.9738522171974182,grad_norm: 0.9182574192832941, iteration: 254141
loss: 1.0318588018417358,grad_norm: 0.9999992709237912, iteration: 254142
loss: 1.020039439201355,grad_norm: 0.9999991628859386, iteration: 254143
loss: 1.0325360298156738,grad_norm: 0.9411214200050813, iteration: 254144
loss: 0.971574604511261,grad_norm: 0.7395044390765223, iteration: 254145
loss: 0.9739698171615601,grad_norm: 0.9710178135534949, iteration: 254146
loss: 1.0074371099472046,grad_norm: 0.9999990093613337, iteration: 254147
loss: 0.9902015328407288,grad_norm: 0.8974450930238442, iteration: 254148
loss: 1.043681263923645,grad_norm: 0.9999992223131617, iteration: 254149
loss: 1.0022653341293335,grad_norm: 0.9999992132234972, iteration: 254150
loss: 1.000699758529663,grad_norm: 0.9999992083142655, iteration: 254151
loss: 1.016851544380188,grad_norm: 0.9999996882830356, iteration: 254152
loss: 1.0072299242019653,grad_norm: 0.9864237331393128, iteration: 254153
loss: 0.9967373609542847,grad_norm: 0.9999991404378666, iteration: 254154
loss: 1.012635588645935,grad_norm: 0.9999991936870396, iteration: 254155
loss: 1.0400704145431519,grad_norm: 0.9999991858275756, iteration: 254156
loss: 1.0943701267242432,grad_norm: 0.9999992365933684, iteration: 254157
loss: 1.0027207136154175,grad_norm: 0.9381375151562779, iteration: 254158
loss: 0.9956830739974976,grad_norm: 0.9999995782821758, iteration: 254159
loss: 0.9571858644485474,grad_norm: 0.9999992155511667, iteration: 254160
loss: 0.9932604432106018,grad_norm: 0.8886442209325361, iteration: 254161
loss: 1.0205317735671997,grad_norm: 0.9206294605870636, iteration: 254162
loss: 1.047149658203125,grad_norm: 0.9128147703959062, iteration: 254163
loss: 1.134279727935791,grad_norm: 0.8624807795919321, iteration: 254164
loss: 1.0177751779556274,grad_norm: 0.9999992662322791, iteration: 254165
loss: 1.0725064277648926,grad_norm: 0.83698823072312, iteration: 254166
loss: 1.021989107131958,grad_norm: 0.9060463385224693, iteration: 254167
loss: 1.0106819868087769,grad_norm: 0.852375485736181, iteration: 254168
loss: 1.0291850566864014,grad_norm: 0.9005074740494236, iteration: 254169
loss: 0.9757589101791382,grad_norm: 0.8580527115510861, iteration: 254170
loss: 0.974683940410614,grad_norm: 0.8909204466290428, iteration: 254171
loss: 0.9808424711227417,grad_norm: 0.7908216786508693, iteration: 254172
loss: 1.0224026441574097,grad_norm: 0.9999990380332655, iteration: 254173
loss: 0.9744116067886353,grad_norm: 0.8674208068977673, iteration: 254174
loss: 1.0312426090240479,grad_norm: 0.9999990856548987, iteration: 254175
loss: 0.9978503584861755,grad_norm: 0.9999994461240945, iteration: 254176
loss: 0.9629316926002502,grad_norm: 0.7941810927482217, iteration: 254177
loss: 1.0749114751815796,grad_norm: 0.8065248541183055, iteration: 254178
loss: 1.0133039951324463,grad_norm: 0.8290204884256829, iteration: 254179
loss: 1.0122658014297485,grad_norm: 0.8951202942096647, iteration: 254180
loss: 1.0278785228729248,grad_norm: 0.9999992027687854, iteration: 254181
loss: 0.9949941039085388,grad_norm: 0.9560766153388145, iteration: 254182
loss: 1.045066475868225,grad_norm: 0.9999997355500867, iteration: 254183
loss: 1.009030818939209,grad_norm: 0.9999995441041178, iteration: 254184
loss: 0.9980804920196533,grad_norm: 0.8593865483407205, iteration: 254185
loss: 1.0212889909744263,grad_norm: 0.8903698690637694, iteration: 254186
loss: 0.9824544191360474,grad_norm: 0.9999993584352187, iteration: 254187
loss: 1.0249992609024048,grad_norm: 0.9999994116946263, iteration: 254188
loss: 1.0453345775604248,grad_norm: 0.9999994214461716, iteration: 254189
loss: 0.982406735420227,grad_norm: 0.8549794867467674, iteration: 254190
loss: 1.033009648323059,grad_norm: 0.8732290333925785, iteration: 254191
loss: 0.9997637867927551,grad_norm: 0.7980422513586366, iteration: 254192
loss: 0.998145341873169,grad_norm: 0.9999990555360966, iteration: 254193
loss: 1.033112645149231,grad_norm: 0.9999993741396007, iteration: 254194
loss: 0.9563631415367126,grad_norm: 0.9753058197491309, iteration: 254195
loss: 1.0058858394622803,grad_norm: 0.8579995287257942, iteration: 254196
loss: 1.005838394165039,grad_norm: 0.8535166518361097, iteration: 254197
loss: 1.060896873474121,grad_norm: 0.999999279415776, iteration: 254198
loss: 0.9915220737457275,grad_norm: 0.7947786085922712, iteration: 254199
loss: 1.0043963193893433,grad_norm: 0.7859021958174218, iteration: 254200
loss: 0.9642185568809509,grad_norm: 0.8022861273905596, iteration: 254201
loss: 1.0927289724349976,grad_norm: 0.9999999476006566, iteration: 254202
loss: 1.0304027795791626,grad_norm: 0.9535763994853433, iteration: 254203
loss: 0.9646622538566589,grad_norm: 0.9999990879234517, iteration: 254204
loss: 1.0628643035888672,grad_norm: 0.9999996092423943, iteration: 254205
loss: 1.0084941387176514,grad_norm: 0.9253797185473517, iteration: 254206
loss: 1.0012928247451782,grad_norm: 0.8537713848113727, iteration: 254207
loss: 1.0279359817504883,grad_norm: 0.999999965752886, iteration: 254208
loss: 1.1617441177368164,grad_norm: 0.9999992668003389, iteration: 254209
loss: 0.9995367527008057,grad_norm: 0.9999994275683816, iteration: 254210
loss: 1.028943419456482,grad_norm: 0.999999227420787, iteration: 254211
loss: 1.0353214740753174,grad_norm: 0.8791830413863752, iteration: 254212
loss: 1.02492356300354,grad_norm: 0.9999990834443011, iteration: 254213
loss: 1.0020333528518677,grad_norm: 0.9028378527161941, iteration: 254214
loss: 0.9819474220275879,grad_norm: 0.883093820024303, iteration: 254215
loss: 0.9678251147270203,grad_norm: 0.9176217151669402, iteration: 254216
loss: 0.9897465109825134,grad_norm: 0.7415040698433393, iteration: 254217
loss: 0.9935161471366882,grad_norm: 0.9999990599441692, iteration: 254218
loss: 1.0798085927963257,grad_norm: 0.999999927812018, iteration: 254219
loss: 0.9852160811424255,grad_norm: 0.9999991647155463, iteration: 254220
loss: 0.9803557395935059,grad_norm: 0.7993081974864499, iteration: 254221
loss: 1.001435399055481,grad_norm: 0.8737359588151812, iteration: 254222
loss: 1.2022624015808105,grad_norm: 0.9999996345966728, iteration: 254223
loss: 1.0036383867263794,grad_norm: 0.9999996122202983, iteration: 254224
loss: 0.9877048134803772,grad_norm: 0.8699656443607152, iteration: 254225
loss: 1.0209587812423706,grad_norm: 0.7519979723914483, iteration: 254226
loss: 1.0489054918289185,grad_norm: 0.9999994880455589, iteration: 254227
loss: 1.1561880111694336,grad_norm: 0.9563698538052215, iteration: 254228
loss: 1.0952022075653076,grad_norm: 0.9999990470124366, iteration: 254229
loss: 1.0416104793548584,grad_norm: 0.9999997108392499, iteration: 254230
loss: 0.9888269901275635,grad_norm: 0.9010594719429303, iteration: 254231
loss: 0.997890293598175,grad_norm: 0.9999996505459632, iteration: 254232
loss: 1.0454343557357788,grad_norm: 0.9230570643210069, iteration: 254233
loss: 0.9829941391944885,grad_norm: 0.8543945361535076, iteration: 254234
loss: 0.9753243327140808,grad_norm: 0.8814571364854249, iteration: 254235
loss: 0.9558088183403015,grad_norm: 0.9863181131397084, iteration: 254236
loss: 1.090226173400879,grad_norm: 0.9598764293549737, iteration: 254237
loss: 1.0277849435806274,grad_norm: 0.9999992782460962, iteration: 254238
loss: 1.0079929828643799,grad_norm: 0.9510107364933176, iteration: 254239
loss: 1.0779144763946533,grad_norm: 0.9999991773992348, iteration: 254240
loss: 1.1353017091751099,grad_norm: 0.999999486017208, iteration: 254241
loss: 1.0590113401412964,grad_norm: 0.9999996429482114, iteration: 254242
loss: 0.9817327260971069,grad_norm: 0.7946143338885161, iteration: 254243
loss: 0.9756901264190674,grad_norm: 0.7892486182864368, iteration: 254244
loss: 1.0033535957336426,grad_norm: 0.825859199501563, iteration: 254245
loss: 0.9876375198364258,grad_norm: 0.9498835611232114, iteration: 254246
loss: 1.0279444456100464,grad_norm: 0.9999993067246146, iteration: 254247
loss: 0.9931910037994385,grad_norm: 0.9999991709708345, iteration: 254248
loss: 0.9907167553901672,grad_norm: 0.7699300590105993, iteration: 254249
loss: 1.0511536598205566,grad_norm: 0.9999995274088052, iteration: 254250
loss: 0.9940084218978882,grad_norm: 0.999999139203035, iteration: 254251
loss: 1.006541132926941,grad_norm: 0.8577512870501008, iteration: 254252
loss: 1.0199377536773682,grad_norm: 0.8055661776417111, iteration: 254253
loss: 1.0374544858932495,grad_norm: 0.9999996246419177, iteration: 254254
loss: 1.034209132194519,grad_norm: 0.9539029409611194, iteration: 254255
loss: 0.986606776714325,grad_norm: 0.9999990364819372, iteration: 254256
loss: 0.9651927947998047,grad_norm: 0.8033138592599578, iteration: 254257
loss: 1.0128742456436157,grad_norm: 0.9999995868498215, iteration: 254258
loss: 1.1045488119125366,grad_norm: 0.9999993391375559, iteration: 254259
loss: 1.0269659757614136,grad_norm: 0.8540537127099527, iteration: 254260
loss: 1.0489815473556519,grad_norm: 0.999999654176119, iteration: 254261
loss: 1.0019301176071167,grad_norm: 0.9999990633721715, iteration: 254262
loss: 1.0000381469726562,grad_norm: 0.8332299983791127, iteration: 254263
loss: 1.0139676332473755,grad_norm: 0.9999990760661392, iteration: 254264
loss: 1.0464335680007935,grad_norm: 0.9999993929526159, iteration: 254265
loss: 1.0312669277191162,grad_norm: 0.8505529140166722, iteration: 254266
loss: 1.0646610260009766,grad_norm: 0.9999999956173571, iteration: 254267
loss: 1.0400115251541138,grad_norm: 0.7886352968898225, iteration: 254268
loss: 1.1252156496047974,grad_norm: 1.0000000045356212, iteration: 254269
loss: 0.9817851781845093,grad_norm: 0.9654801887332678, iteration: 254270
loss: 1.0120693445205688,grad_norm: 0.7684274908917663, iteration: 254271
loss: 0.9694913625717163,grad_norm: 0.8931891589103227, iteration: 254272
loss: 1.0089470148086548,grad_norm: 0.8324522755122145, iteration: 254273
loss: 1.0043261051177979,grad_norm: 0.8188080554964744, iteration: 254274
loss: 1.0087416172027588,grad_norm: 0.9999991245501637, iteration: 254275
loss: 0.9732471108436584,grad_norm: 0.8006611446046641, iteration: 254276
loss: 1.0426939725875854,grad_norm: 0.9999990550378685, iteration: 254277
loss: 1.010103464126587,grad_norm: 0.9999994155825598, iteration: 254278
loss: 0.9798796772956848,grad_norm: 0.9981817035390714, iteration: 254279
loss: 1.0104970932006836,grad_norm: 0.9291739014440379, iteration: 254280
loss: 0.9857720136642456,grad_norm: 0.7732438817002543, iteration: 254281
loss: 1.0196207761764526,grad_norm: 0.9999990676547562, iteration: 254282
loss: 0.9906033277511597,grad_norm: 0.8393523112974762, iteration: 254283
loss: 1.0047738552093506,grad_norm: 0.9999991410420799, iteration: 254284
loss: 1.0157703161239624,grad_norm: 0.999999428184849, iteration: 254285
loss: 1.0346797704696655,grad_norm: 0.8693061381192189, iteration: 254286
loss: 1.0096005201339722,grad_norm: 0.8143730220285565, iteration: 254287
loss: 0.9937726259231567,grad_norm: 0.9999991016140041, iteration: 254288
loss: 0.9912487864494324,grad_norm: 0.7851396472913228, iteration: 254289
loss: 1.0365753173828125,grad_norm: 0.874104356338339, iteration: 254290
loss: 1.0062764883041382,grad_norm: 0.8430605029705096, iteration: 254291
loss: 1.0299267768859863,grad_norm: 0.8111269745486278, iteration: 254292
loss: 1.0296571254730225,grad_norm: 0.7596947328710868, iteration: 254293
loss: 0.9981811046600342,grad_norm: 0.9699516941526579, iteration: 254294
loss: 0.9954996109008789,grad_norm: 0.9999995139549535, iteration: 254295
loss: 0.9914612174034119,grad_norm: 0.8665034343648077, iteration: 254296
loss: 0.9657105803489685,grad_norm: 0.8399531508827812, iteration: 254297
loss: 0.9672402739524841,grad_norm: 0.8188522611503432, iteration: 254298
loss: 1.0495713949203491,grad_norm: 0.9192074550893602, iteration: 254299
loss: 0.9584058523178101,grad_norm: 0.7376437757540913, iteration: 254300
loss: 1.0046710968017578,grad_norm: 0.7209944186811554, iteration: 254301
loss: 1.0131374597549438,grad_norm: 0.9999990733001151, iteration: 254302
loss: 0.9795676469802856,grad_norm: 0.9999989352136731, iteration: 254303
loss: 0.9902055263519287,grad_norm: 0.9297003661018456, iteration: 254304
loss: 0.9853813648223877,grad_norm: 0.9999991745314027, iteration: 254305
loss: 1.0257261991500854,grad_norm: 0.9999996247141443, iteration: 254306
loss: 1.0290920734405518,grad_norm: 0.8367454644877085, iteration: 254307
loss: 1.0520106554031372,grad_norm: 0.999999224510513, iteration: 254308
loss: 0.9911059737205505,grad_norm: 0.8263960655207965, iteration: 254309
loss: 1.0423280000686646,grad_norm: 0.9999991845915505, iteration: 254310
loss: 1.0313856601715088,grad_norm: 0.999999200523712, iteration: 254311
loss: 0.9997007250785828,grad_norm: 0.9999990511418716, iteration: 254312
loss: 1.0489985942840576,grad_norm: 0.8997842912064689, iteration: 254313
loss: 0.9754543900489807,grad_norm: 0.7845144348985168, iteration: 254314
loss: 1.0436604022979736,grad_norm: 0.9376457780489222, iteration: 254315
loss: 0.9878777265548706,grad_norm: 0.9622898587935529, iteration: 254316
loss: 1.0197240114212036,grad_norm: 0.9999990198064085, iteration: 254317
loss: 1.061920166015625,grad_norm: 0.9999997697031296, iteration: 254318
loss: 1.0214232206344604,grad_norm: 0.9999991206571773, iteration: 254319
loss: 1.0719859600067139,grad_norm: 0.9999996804048146, iteration: 254320
loss: 1.0130882263183594,grad_norm: 0.9148146949386597, iteration: 254321
loss: 1.0583736896514893,grad_norm: 0.9999991080516368, iteration: 254322
loss: 1.0198417901992798,grad_norm: 0.8657427939715991, iteration: 254323
loss: 1.0274547338485718,grad_norm: 0.9203572333690901, iteration: 254324
loss: 1.0420879125595093,grad_norm: 0.9999995852351623, iteration: 254325
loss: 0.9996744394302368,grad_norm: 0.9999991297502584, iteration: 254326
loss: 0.9747615456581116,grad_norm: 0.821417466699225, iteration: 254327
loss: 1.0079352855682373,grad_norm: 0.9358175023164802, iteration: 254328
loss: 1.0143461227416992,grad_norm: 0.9999991564393473, iteration: 254329
loss: 1.0389961004257202,grad_norm: 0.9004473494598119, iteration: 254330
loss: 1.0146268606185913,grad_norm: 0.9320101712022217, iteration: 254331
loss: 1.0354291200637817,grad_norm: 0.9999995393604137, iteration: 254332
loss: 0.973973274230957,grad_norm: 0.9691175898047054, iteration: 254333
loss: 1.010189175605774,grad_norm: 0.9999991649462407, iteration: 254334
loss: 1.0881435871124268,grad_norm: 0.9999998603351438, iteration: 254335
loss: 1.0017672777175903,grad_norm: 0.9999990670652885, iteration: 254336
loss: 1.0143754482269287,grad_norm: 0.999999153259316, iteration: 254337
loss: 0.9753063917160034,grad_norm: 0.8772904421328038, iteration: 254338
loss: 1.0657457113265991,grad_norm: 0.8561882934260926, iteration: 254339
loss: 1.0330913066864014,grad_norm: 0.9999990809689788, iteration: 254340
loss: 0.9788564443588257,grad_norm: 0.999999210242674, iteration: 254341
loss: 1.057987928390503,grad_norm: 0.9123878747767791, iteration: 254342
loss: 0.9937999844551086,grad_norm: 0.9999992246728797, iteration: 254343
loss: 1.0393811464309692,grad_norm: 0.9999996390630281, iteration: 254344
loss: 0.9780299663543701,grad_norm: 0.893052234354732, iteration: 254345
loss: 0.9917012453079224,grad_norm: 0.7649068626999806, iteration: 254346
loss: 1.0237566232681274,grad_norm: 0.7563798978310757, iteration: 254347
loss: 0.9765869975090027,grad_norm: 0.8366519947593968, iteration: 254348
loss: 1.1486891508102417,grad_norm: 0.9999990347816293, iteration: 254349
loss: 1.0537081956863403,grad_norm: 0.9999992072411197, iteration: 254350
loss: 1.0060412883758545,grad_norm: 0.8516016482624, iteration: 254351
loss: 1.0163275003433228,grad_norm: 0.8087718546363029, iteration: 254352
loss: 1.0419243574142456,grad_norm: 0.9999991153783692, iteration: 254353
loss: 0.9898250102996826,grad_norm: 0.9517255425921557, iteration: 254354
loss: 0.9976090788841248,grad_norm: 0.9999992441201749, iteration: 254355
loss: 1.1042832136154175,grad_norm: 0.9999997401039237, iteration: 254356
loss: 1.00600004196167,grad_norm: 0.9999990017832066, iteration: 254357
loss: 0.9566171169281006,grad_norm: 0.928957903893252, iteration: 254358
loss: 1.0518486499786377,grad_norm: 0.9999994226449573, iteration: 254359
loss: 0.9905118346214294,grad_norm: 0.8349081185638696, iteration: 254360
loss: 0.9549731612205505,grad_norm: 0.9265253566036872, iteration: 254361
loss: 1.0063726902008057,grad_norm: 0.8828301285430927, iteration: 254362
loss: 1.069053292274475,grad_norm: 0.8754976460302983, iteration: 254363
loss: 0.9882951974868774,grad_norm: 0.8109379678433519, iteration: 254364
loss: 1.0067297220230103,grad_norm: 0.85050639993202, iteration: 254365
loss: 1.034821629524231,grad_norm: 0.9430034332087499, iteration: 254366
loss: 1.019309163093567,grad_norm: 0.8488693330091407, iteration: 254367
loss: 1.0107859373092651,grad_norm: 0.8894109805766344, iteration: 254368
loss: 1.0025025606155396,grad_norm: 0.8987786443088988, iteration: 254369
loss: 1.0209614038467407,grad_norm: 0.8620461271267962, iteration: 254370
loss: 0.9910528659820557,grad_norm: 0.876905045039838, iteration: 254371
loss: 1.0855796337127686,grad_norm: 0.999999112828135, iteration: 254372
loss: 0.9917027950286865,grad_norm: 0.8667985611166552, iteration: 254373
loss: 1.0372366905212402,grad_norm: 0.9109713350928004, iteration: 254374
loss: 1.0422661304473877,grad_norm: 0.9033300676422443, iteration: 254375
loss: 1.0445955991744995,grad_norm: 0.9102541464736258, iteration: 254376
loss: 1.0207910537719727,grad_norm: 0.980107094931879, iteration: 254377
loss: 0.9604101777076721,grad_norm: 0.8793528309203247, iteration: 254378
loss: 1.0082740783691406,grad_norm: 0.9462799592199024, iteration: 254379
loss: 1.0154262781143188,grad_norm: 0.9999992745495426, iteration: 254380
loss: 0.984158992767334,grad_norm: 0.7366143369834767, iteration: 254381
loss: 1.019582748413086,grad_norm: 0.9468816791260367, iteration: 254382
loss: 0.9821732044219971,grad_norm: 0.8321647920422571, iteration: 254383
loss: 0.964404284954071,grad_norm: 0.8405606463699146, iteration: 254384
loss: 0.9926455616950989,grad_norm: 0.8333992382065286, iteration: 254385
loss: 1.0021727085113525,grad_norm: 0.9581849903831328, iteration: 254386
loss: 0.9856908917427063,grad_norm: 0.9999991980487214, iteration: 254387
loss: 1.004784107208252,grad_norm: 0.8642039546266781, iteration: 254388
loss: 0.9714930057525635,grad_norm: 0.8783383815009335, iteration: 254389
loss: 1.0005720853805542,grad_norm: 0.9776936830614386, iteration: 254390
loss: 1.0051625967025757,grad_norm: 0.8666767738137421, iteration: 254391
loss: 1.0223052501678467,grad_norm: 0.8321821245090073, iteration: 254392
loss: 0.9843975901603699,grad_norm: 0.9999990635576073, iteration: 254393
loss: 1.0711764097213745,grad_norm: 0.9999991605696827, iteration: 254394
loss: 1.0017443895339966,grad_norm: 0.9999992244532377, iteration: 254395
loss: 0.9937821626663208,grad_norm: 0.9496197322524385, iteration: 254396
loss: 1.004653811454773,grad_norm: 0.8398729212701729, iteration: 254397
loss: 1.0030410289764404,grad_norm: 0.9717887536446692, iteration: 254398
loss: 1.0095902681350708,grad_norm: 0.9605679043095198, iteration: 254399
loss: 0.9853808879852295,grad_norm: 0.9976533105990256, iteration: 254400
loss: 0.9762255549430847,grad_norm: 0.9999990469007352, iteration: 254401
loss: 1.015077829360962,grad_norm: 0.9999993035348566, iteration: 254402
loss: 1.006125569343567,grad_norm: 0.7586151089640056, iteration: 254403
loss: 1.0086872577667236,grad_norm: 0.9386400291189757, iteration: 254404
loss: 0.9912838339805603,grad_norm: 0.9999992124844378, iteration: 254405
loss: 0.9631245136260986,grad_norm: 0.8848554770636656, iteration: 254406
loss: 1.013219952583313,grad_norm: 0.9999990866327304, iteration: 254407
loss: 1.005833625793457,grad_norm: 0.9999993105746853, iteration: 254408
loss: 1.017555594444275,grad_norm: 0.9470113233858869, iteration: 254409
loss: 0.9825677871704102,grad_norm: 0.9571901853831664, iteration: 254410
loss: 0.9854833483695984,grad_norm: 0.7492288987700408, iteration: 254411
loss: 0.9998955130577087,grad_norm: 0.8509946769496994, iteration: 254412
loss: 1.0411152839660645,grad_norm: 0.9073638092804277, iteration: 254413
loss: 1.000958800315857,grad_norm: 0.999999405180378, iteration: 254414
loss: 0.9772242903709412,grad_norm: 0.892255137724535, iteration: 254415
loss: 0.9853076934814453,grad_norm: 0.7693411517747368, iteration: 254416
loss: 0.9713020920753479,grad_norm: 0.8773515584918631, iteration: 254417
loss: 1.0663690567016602,grad_norm: 0.9999993495998581, iteration: 254418
loss: 0.9863225817680359,grad_norm: 0.9999991625760041, iteration: 254419
loss: 1.00716233253479,grad_norm: 0.9999991587205097, iteration: 254420
loss: 1.0584505796432495,grad_norm: 0.9999992925773453, iteration: 254421
loss: 0.9790830016136169,grad_norm: 0.9999995250499863, iteration: 254422
loss: 1.0188294649124146,grad_norm: 0.9999991258204088, iteration: 254423
loss: 0.9886925220489502,grad_norm: 0.9128424994677253, iteration: 254424
loss: 0.9861612319946289,grad_norm: 0.9901529616217802, iteration: 254425
loss: 1.057167649269104,grad_norm: 0.85824750478064, iteration: 254426
loss: 0.9776774644851685,grad_norm: 0.8509098887147681, iteration: 254427
loss: 0.9937814474105835,grad_norm: 0.9325882467806541, iteration: 254428
loss: 0.9950718879699707,grad_norm: 0.7357912819800878, iteration: 254429
loss: 1.015054702758789,grad_norm: 0.7859893096914482, iteration: 254430
loss: 0.9849793910980225,grad_norm: 0.999999131789837, iteration: 254431
loss: 1.034554123878479,grad_norm: 0.9964355616877766, iteration: 254432
loss: 0.9870388507843018,grad_norm: 0.8181255881493222, iteration: 254433
loss: 1.00227952003479,grad_norm: 0.9999990664054791, iteration: 254434
loss: 1.0217816829681396,grad_norm: 0.9999991067962044, iteration: 254435
loss: 0.9797592759132385,grad_norm: 0.9474806157822772, iteration: 254436
loss: 1.0021027326583862,grad_norm: 0.999999438069809, iteration: 254437
loss: 1.030387043952942,grad_norm: 0.9252034611178797, iteration: 254438
loss: 0.991274893283844,grad_norm: 0.9283218431741943, iteration: 254439
loss: 1.0479220151901245,grad_norm: 0.9999990200501351, iteration: 254440
loss: 1.011899471282959,grad_norm: 0.8061773582026676, iteration: 254441
loss: 0.969299852848053,grad_norm: 0.9654629151291261, iteration: 254442
loss: 0.9843353629112244,grad_norm: 0.856257783477781, iteration: 254443
loss: 1.0091311931610107,grad_norm: 0.9999995065162588, iteration: 254444
loss: 1.0211255550384521,grad_norm: 0.9858603707989764, iteration: 254445
loss: 1.0297214984893799,grad_norm: 0.9371662490899887, iteration: 254446
loss: 0.9787189960479736,grad_norm: 0.8120214474935116, iteration: 254447
loss: 0.9944154620170593,grad_norm: 0.834771934102887, iteration: 254448
loss: 1.0057512521743774,grad_norm: 0.8685174075191376, iteration: 254449
loss: 1.0041431188583374,grad_norm: 0.712948917505532, iteration: 254450
loss: 0.9784400463104248,grad_norm: 0.9999991631704682, iteration: 254451
loss: 1.042759895324707,grad_norm: 0.9999991502696305, iteration: 254452
loss: 1.0360745191574097,grad_norm: 0.8729512075511319, iteration: 254453
loss: 0.9887712597846985,grad_norm: 0.8186996232541196, iteration: 254454
loss: 1.0155212879180908,grad_norm: 0.9734640028293887, iteration: 254455
loss: 0.9631199836730957,grad_norm: 0.8548180535821425, iteration: 254456
loss: 1.0257869958877563,grad_norm: 0.8364009861387807, iteration: 254457
loss: 1.0087889432907104,grad_norm: 0.848730867115299, iteration: 254458
loss: 1.0228476524353027,grad_norm: 0.8304667329761805, iteration: 254459
loss: 1.0018457174301147,grad_norm: 0.8409684376573616, iteration: 254460
loss: 0.9985161423683167,grad_norm: 0.9999996172512502, iteration: 254461
loss: 1.0127629041671753,grad_norm: 0.9066841130496249, iteration: 254462
loss: 0.9930353760719299,grad_norm: 0.8391105170671023, iteration: 254463
loss: 1.015121579170227,grad_norm: 0.9999998178430785, iteration: 254464
loss: 1.0390912294387817,grad_norm: 0.8946843915577627, iteration: 254465
loss: 1.003431797027588,grad_norm: 0.9006265675649706, iteration: 254466
loss: 0.9945048093795776,grad_norm: 0.736431867423698, iteration: 254467
loss: 0.9688321352005005,grad_norm: 0.8544924519415327, iteration: 254468
loss: 1.0310925245285034,grad_norm: 0.9999997490841105, iteration: 254469
loss: 0.9895192384719849,grad_norm: 0.952917407531187, iteration: 254470
loss: 1.0131256580352783,grad_norm: 0.9590494902532388, iteration: 254471
loss: 1.0143219232559204,grad_norm: 0.9856368235784261, iteration: 254472
loss: 1.026803731918335,grad_norm: 0.9870400967635513, iteration: 254473
loss: 0.9926881790161133,grad_norm: 0.8458893151249969, iteration: 254474
loss: 1.0545403957366943,grad_norm: 0.939628687586881, iteration: 254475
loss: 1.0136480331420898,grad_norm: 0.9291167750048758, iteration: 254476
loss: 1.037146806716919,grad_norm: 0.8181882251734417, iteration: 254477
loss: 1.0001492500305176,grad_norm: 0.9999990989796997, iteration: 254478
loss: 1.0032800436019897,grad_norm: 0.785286776375031, iteration: 254479
loss: 1.033706545829773,grad_norm: 0.9999992547919302, iteration: 254480
loss: 0.9870381951332092,grad_norm: 0.9999991858985152, iteration: 254481
loss: 0.9920910596847534,grad_norm: 0.8016549426348654, iteration: 254482
loss: 1.015522837638855,grad_norm: 0.9999998170534978, iteration: 254483
loss: 0.9875311255455017,grad_norm: 0.8205719077567042, iteration: 254484
loss: 0.9771835803985596,grad_norm: 0.9017304277843483, iteration: 254485
loss: 0.996449887752533,grad_norm: 0.9999999292554972, iteration: 254486
loss: 1.0656627416610718,grad_norm: 0.886674133387613, iteration: 254487
loss: 0.9876450300216675,grad_norm: 0.8993954428721336, iteration: 254488
loss: 0.9865831732749939,grad_norm: 0.8344851484988133, iteration: 254489
loss: 1.0304632186889648,grad_norm: 0.9999998490657697, iteration: 254490
loss: 0.9853186011314392,grad_norm: 0.9999991088723869, iteration: 254491
loss: 1.0066561698913574,grad_norm: 0.8463474104647494, iteration: 254492
loss: 0.9733701944351196,grad_norm: 0.9176998410879248, iteration: 254493
loss: 1.0054335594177246,grad_norm: 0.7839983971518486, iteration: 254494
loss: 1.026543140411377,grad_norm: 0.9999991414876377, iteration: 254495
loss: 1.0087231397628784,grad_norm: 0.8550688940235561, iteration: 254496
loss: 0.993658185005188,grad_norm: 0.8786644771849188, iteration: 254497
loss: 1.004522442817688,grad_norm: 0.8893337400245857, iteration: 254498
loss: 0.9782326221466064,grad_norm: 0.8304700163090886, iteration: 254499
loss: 0.9645172953605652,grad_norm: 0.8450293669031574, iteration: 254500
loss: 0.9957699775695801,grad_norm: 0.897271691992666, iteration: 254501
loss: 0.9694010615348816,grad_norm: 0.9175139961843531, iteration: 254502
loss: 1.026568055152893,grad_norm: 0.9999999039611726, iteration: 254503
loss: 1.0003260374069214,grad_norm: 0.9604056720416208, iteration: 254504
loss: 1.0181260108947754,grad_norm: 0.9999991017130172, iteration: 254505
loss: 0.987223207950592,grad_norm: 0.723927141878681, iteration: 254506
loss: 0.9696328639984131,grad_norm: 0.8410099880691901, iteration: 254507
loss: 1.0815685987472534,grad_norm: 0.9999992328592767, iteration: 254508
loss: 0.9794018864631653,grad_norm: 0.9863981447498492, iteration: 254509
loss: 0.9915189146995544,grad_norm: 0.8311302808238282, iteration: 254510
loss: 0.9716475605964661,grad_norm: 0.9515073383319038, iteration: 254511
loss: 1.000095009803772,grad_norm: 0.9999991144394019, iteration: 254512
loss: 0.9966250061988831,grad_norm: 0.7878887152859128, iteration: 254513
loss: 0.9988367557525635,grad_norm: 0.9999991570014383, iteration: 254514
loss: 0.9578103423118591,grad_norm: 0.9999991401810444, iteration: 254515
loss: 1.0150307416915894,grad_norm: 0.7798948979625315, iteration: 254516
loss: 0.9935539960861206,grad_norm: 0.9533671176724339, iteration: 254517
loss: 0.9704769253730774,grad_norm: 0.9533871438781788, iteration: 254518
loss: 0.9978473782539368,grad_norm: 0.9999989776008085, iteration: 254519
loss: 1.1250147819519043,grad_norm: 0.9999997568664352, iteration: 254520
loss: 0.9803546667098999,grad_norm: 0.8124418669058907, iteration: 254521
loss: 0.999622642993927,grad_norm: 0.885961973786502, iteration: 254522
loss: 1.029271125793457,grad_norm: 0.8899892015701173, iteration: 254523
loss: 1.0332763195037842,grad_norm: 0.872868077395413, iteration: 254524
loss: 0.9726985096931458,grad_norm: 0.9065167802413158, iteration: 254525
loss: 0.9705142378807068,grad_norm: 0.9606171664185562, iteration: 254526
loss: 0.9939119219779968,grad_norm: 0.9999997876361664, iteration: 254527
loss: 1.0149363279342651,grad_norm: 0.9660730107701552, iteration: 254528
loss: 1.0252301692962646,grad_norm: 0.9999999297692908, iteration: 254529
loss: 0.992366373538971,grad_norm: 0.827133893715635, iteration: 254530
loss: 0.9488580226898193,grad_norm: 0.7689941949167445, iteration: 254531
loss: 1.0213141441345215,grad_norm: 0.9999990834143418, iteration: 254532
loss: 0.9868478178977966,grad_norm: 0.945331893753648, iteration: 254533
loss: 0.9998558163642883,grad_norm: 0.8005662980611326, iteration: 254534
loss: 1.01388418674469,grad_norm: 0.9237213256737726, iteration: 254535
loss: 1.0146316289901733,grad_norm: 0.9999994973863228, iteration: 254536
loss: 1.0292530059814453,grad_norm: 0.9999991249881488, iteration: 254537
loss: 0.9732027649879456,grad_norm: 0.855060106963123, iteration: 254538
loss: 1.016719937324524,grad_norm: 0.9999993405846725, iteration: 254539
loss: 1.0141363143920898,grad_norm: 0.8501031942047481, iteration: 254540
loss: 0.9962926506996155,grad_norm: 0.699346933854168, iteration: 254541
loss: 1.0019502639770508,grad_norm: 0.9999994497126466, iteration: 254542
loss: 1.0197629928588867,grad_norm: 0.7080619291414847, iteration: 254543
loss: 0.9639487862586975,grad_norm: 0.8647096760558316, iteration: 254544
loss: 1.0059089660644531,grad_norm: 0.9999990672288486, iteration: 254545
loss: 0.9697568416595459,grad_norm: 0.8718885723254701, iteration: 254546
loss: 0.9731362462043762,grad_norm: 0.9999990881746288, iteration: 254547
loss: 0.9960924386978149,grad_norm: 0.8837020773644145, iteration: 254548
loss: 0.9647331833839417,grad_norm: 0.8305975561017952, iteration: 254549
loss: 1.124315857887268,grad_norm: 0.999999009790443, iteration: 254550
loss: 0.9931455850601196,grad_norm: 0.9086705378971471, iteration: 254551
loss: 0.9770593643188477,grad_norm: 0.9128882706200846, iteration: 254552
loss: 1.0001105070114136,grad_norm: 0.9306244494900758, iteration: 254553
loss: 1.0376065969467163,grad_norm: 0.9347350385863187, iteration: 254554
loss: 1.175815463066101,grad_norm: 0.9999993803825864, iteration: 254555
loss: 1.0469046831130981,grad_norm: 0.9999991988657971, iteration: 254556
loss: 0.9763566851615906,grad_norm: 0.9467381708243426, iteration: 254557
loss: 1.1100420951843262,grad_norm: 0.9999995770015457, iteration: 254558
loss: 0.9968011379241943,grad_norm: 0.9999990841875203, iteration: 254559
loss: 0.995637834072113,grad_norm: 0.9999990488517724, iteration: 254560
loss: 1.0083231925964355,grad_norm: 0.9685267825615381, iteration: 254561
loss: 1.0023635625839233,grad_norm: 0.99999913408592, iteration: 254562
loss: 1.0067918300628662,grad_norm: 0.9999996073084519, iteration: 254563
loss: 1.0671138763427734,grad_norm: 0.9999993603416844, iteration: 254564
loss: 1.0209481716156006,grad_norm: 0.9999990205866411, iteration: 254565
loss: 0.9619802236557007,grad_norm: 0.9605447558064191, iteration: 254566
loss: 0.977125346660614,grad_norm: 0.9999993618882763, iteration: 254567
loss: 0.9840685725212097,grad_norm: 0.856263051504157, iteration: 254568
loss: 1.0176000595092773,grad_norm: 0.783754406894331, iteration: 254569
loss: 1.0024213790893555,grad_norm: 0.8219951569323738, iteration: 254570
loss: 1.026142954826355,grad_norm: 0.8133321677014882, iteration: 254571
loss: 0.9871459603309631,grad_norm: 0.9478437662258078, iteration: 254572
loss: 1.016053557395935,grad_norm: 0.9999998992074349, iteration: 254573
loss: 0.9597997665405273,grad_norm: 0.9885094117972211, iteration: 254574
loss: 0.9720426201820374,grad_norm: 0.9270395957780433, iteration: 254575
loss: 1.0675524473190308,grad_norm: 0.9999990927313954, iteration: 254576
loss: 1.0498793125152588,grad_norm: 0.9999991221194412, iteration: 254577
loss: 0.9755100011825562,grad_norm: 0.9999992501298524, iteration: 254578
loss: 0.9618631601333618,grad_norm: 0.8301941955875821, iteration: 254579
loss: 1.0486533641815186,grad_norm: 0.9999997816034103, iteration: 254580
loss: 1.014754295349121,grad_norm: 0.9999991334321061, iteration: 254581
loss: 1.0060545206069946,grad_norm: 0.7978849237658184, iteration: 254582
loss: 0.9805243015289307,grad_norm: 0.9103193590136226, iteration: 254583
loss: 0.9904743432998657,grad_norm: 0.8038737400612879, iteration: 254584
loss: 1.0446209907531738,grad_norm: 0.9173369520265218, iteration: 254585
loss: 0.9656526446342468,grad_norm: 0.8929244972071003, iteration: 254586
loss: 1.028380274772644,grad_norm: 0.9999993330558208, iteration: 254587
loss: 0.9752904176712036,grad_norm: 0.9183531385844163, iteration: 254588
loss: 0.9968793392181396,grad_norm: 0.9999991180442755, iteration: 254589
loss: 1.0095698833465576,grad_norm: 0.8504965379019821, iteration: 254590
loss: 0.9774174690246582,grad_norm: 0.9376581466887774, iteration: 254591
loss: 0.9943438172340393,grad_norm: 0.9525569316837892, iteration: 254592
loss: 1.0050779581069946,grad_norm: 0.9999991171254144, iteration: 254593
loss: 0.9872593283653259,grad_norm: 0.8240173722693219, iteration: 254594
loss: 1.0107715129852295,grad_norm: 0.8991821360552884, iteration: 254595
loss: 0.9949899911880493,grad_norm: 0.7779878094028597, iteration: 254596
loss: 0.9455798268318176,grad_norm: 0.8549785498458983, iteration: 254597
loss: 0.9645958542823792,grad_norm: 0.999999112349064, iteration: 254598
loss: 0.9969134330749512,grad_norm: 0.796039173774291, iteration: 254599
loss: 1.012082815170288,grad_norm: 0.904029162852103, iteration: 254600
loss: 1.0738319158554077,grad_norm: 0.9999990644740987, iteration: 254601
loss: 1.0087857246398926,grad_norm: 0.999999150382409, iteration: 254602
loss: 1.0113435983657837,grad_norm: 0.9999997489114276, iteration: 254603
loss: 0.9746605157852173,grad_norm: 0.9999990129912262, iteration: 254604
loss: 0.9459323883056641,grad_norm: 0.9044011169893044, iteration: 254605
loss: 1.0214228630065918,grad_norm: 0.928142549693678, iteration: 254606
loss: 0.9656575918197632,grad_norm: 0.9226449186953184, iteration: 254607
loss: 0.9808256030082703,grad_norm: 0.9999999003710611, iteration: 254608
loss: 1.0157709121704102,grad_norm: 0.9571383335227945, iteration: 254609
loss: 0.9567891359329224,grad_norm: 0.7925340586973845, iteration: 254610
loss: 1.0140562057495117,grad_norm: 0.817539388573264, iteration: 254611
loss: 1.0480372905731201,grad_norm: 0.813171769885134, iteration: 254612
loss: 1.022616982460022,grad_norm: 0.7812866507770607, iteration: 254613
loss: 1.0081067085266113,grad_norm: 0.9999999229402878, iteration: 254614
loss: 1.0390465259552002,grad_norm: 0.9492212432947671, iteration: 254615
loss: 0.9550804495811462,grad_norm: 0.9999991454480531, iteration: 254616
loss: 1.0837303400039673,grad_norm: 0.9717571818288085, iteration: 254617
loss: 1.0063022375106812,grad_norm: 0.8638390385779167, iteration: 254618
loss: 1.2702611684799194,grad_norm: 0.9999996783805943, iteration: 254619
loss: 1.0015883445739746,grad_norm: 0.9993409261455264, iteration: 254620
loss: 1.0198301076889038,grad_norm: 0.8263340421393425, iteration: 254621
loss: 1.0032906532287598,grad_norm: 0.8779047311823047, iteration: 254622
loss: 0.9659190773963928,grad_norm: 0.9999993652728505, iteration: 254623
loss: 1.0177088975906372,grad_norm: 0.8195841404827654, iteration: 254624
loss: 1.151286005973816,grad_norm: 0.9999994404409502, iteration: 254625
loss: 0.9988862872123718,grad_norm: 0.8523549765559286, iteration: 254626
loss: 1.0055453777313232,grad_norm: 0.9852464777356401, iteration: 254627
loss: 1.0215675830841064,grad_norm: 0.9999990918711665, iteration: 254628
loss: 1.0127975940704346,grad_norm: 0.9999990907996194, iteration: 254629
loss: 1.204764485359192,grad_norm: 0.9189308482669528, iteration: 254630
loss: 1.0352195501327515,grad_norm: 0.7762611439914262, iteration: 254631
loss: 1.0308856964111328,grad_norm: 0.8702806085266166, iteration: 254632
loss: 1.0195955038070679,grad_norm: 0.9999990767116186, iteration: 254633
loss: 0.9539000391960144,grad_norm: 0.8633623650711854, iteration: 254634
loss: 1.0234057903289795,grad_norm: 0.8365421715085011, iteration: 254635
loss: 0.9707778096199036,grad_norm: 0.869184358546137, iteration: 254636
loss: 0.9500396847724915,grad_norm: 0.9999991453237781, iteration: 254637
loss: 0.9660691618919373,grad_norm: 0.8891427485017269, iteration: 254638
loss: 0.9838305711746216,grad_norm: 0.7164034710228099, iteration: 254639
loss: 1.015705943107605,grad_norm: 0.7877597355951761, iteration: 254640
loss: 0.9695261716842651,grad_norm: 0.8315929999190834, iteration: 254641
loss: 1.0175344944000244,grad_norm: 0.8953105833410692, iteration: 254642
loss: 1.0110573768615723,grad_norm: 0.7576785357480046, iteration: 254643
loss: 1.0149883031845093,grad_norm: 0.9999993939919622, iteration: 254644
loss: 1.0015912055969238,grad_norm: 0.9373017887251279, iteration: 254645
loss: 0.9958334565162659,grad_norm: 0.9999990211091085, iteration: 254646
loss: 1.0438048839569092,grad_norm: 0.9447840603327052, iteration: 254647
loss: 1.023396611213684,grad_norm: 0.7972889751099046, iteration: 254648
loss: 0.966046929359436,grad_norm: 0.8987913781209604, iteration: 254649
loss: 0.9960927963256836,grad_norm: 0.9556121875701887, iteration: 254650
loss: 1.0140295028686523,grad_norm: 0.8432722773374263, iteration: 254651
loss: 0.9926623702049255,grad_norm: 0.8915294852120509, iteration: 254652
loss: 1.1473884582519531,grad_norm: 0.9999996190532083, iteration: 254653
loss: 0.9729839563369751,grad_norm: 0.7579853610803781, iteration: 254654
loss: 1.0634639263153076,grad_norm: 0.9999997313079575, iteration: 254655
loss: 0.9656035304069519,grad_norm: 0.9319102243779025, iteration: 254656
loss: 1.003943681716919,grad_norm: 0.9999999453673315, iteration: 254657
loss: 0.9852091073989868,grad_norm: 0.9999998673058114, iteration: 254658
loss: 1.0155924558639526,grad_norm: 0.9813832091865, iteration: 254659
loss: 0.9983019232749939,grad_norm: 0.892586703223714, iteration: 254660
loss: 1.0028294324874878,grad_norm: 0.9999990712616267, iteration: 254661
loss: 1.0305298566818237,grad_norm: 0.9999991077374342, iteration: 254662
loss: 1.0330085754394531,grad_norm: 0.7563292098904328, iteration: 254663
loss: 1.0027812719345093,grad_norm: 0.999999079642504, iteration: 254664
loss: 1.0099409818649292,grad_norm: 0.8910548043205452, iteration: 254665
loss: 1.0624161958694458,grad_norm: 0.9999995181704748, iteration: 254666
loss: 1.0987133979797363,grad_norm: 0.999999445877882, iteration: 254667
loss: 1.068657636642456,grad_norm: 0.9151855811380261, iteration: 254668
loss: 0.952094554901123,grad_norm: 0.7992481285069087, iteration: 254669
loss: 1.0213103294372559,grad_norm: 0.9894421943886202, iteration: 254670
loss: 1.0913407802581787,grad_norm: 0.999999228601209, iteration: 254671
loss: 1.048441767692566,grad_norm: 0.8712446550088577, iteration: 254672
loss: 0.9675018191337585,grad_norm: 0.8955318504784523, iteration: 254673
loss: 0.971876323223114,grad_norm: 0.8440135439002769, iteration: 254674
loss: 1.0472016334533691,grad_norm: 0.9999992369032201, iteration: 254675
loss: 1.1350771188735962,grad_norm: 1.0000000051339142, iteration: 254676
loss: 0.9781777262687683,grad_norm: 0.9999990316726918, iteration: 254677
loss: 1.175366759300232,grad_norm: 0.9999992819091446, iteration: 254678
loss: 1.003013014793396,grad_norm: 0.9796079166873707, iteration: 254679
loss: 1.0073939561843872,grad_norm: 0.858888009022891, iteration: 254680
loss: 1.1110361814498901,grad_norm: 0.9999996726877963, iteration: 254681
loss: 0.967643141746521,grad_norm: 0.8261099906357573, iteration: 254682
loss: 1.0185143947601318,grad_norm: 0.999999202674868, iteration: 254683
loss: 0.9850093722343445,grad_norm: 0.9999989350781522, iteration: 254684
loss: 0.9991925954818726,grad_norm: 0.9637584047584575, iteration: 254685
loss: 1.0310828685760498,grad_norm: 0.9999992804758192, iteration: 254686
loss: 0.9980408549308777,grad_norm: 0.9083275895829035, iteration: 254687
loss: 0.9832406044006348,grad_norm: 0.9999991061455786, iteration: 254688
loss: 1.0210292339324951,grad_norm: 0.9999991251832465, iteration: 254689
loss: 1.0113006830215454,grad_norm: 0.8804489027542947, iteration: 254690
loss: 0.9887816309928894,grad_norm: 0.9933807200177566, iteration: 254691
loss: 1.0029155015945435,grad_norm: 0.8041923145820434, iteration: 254692
loss: 1.0214238166809082,grad_norm: 0.9999989693088026, iteration: 254693
loss: 1.0125926733016968,grad_norm: 0.9850956358870544, iteration: 254694
loss: 1.0200567245483398,grad_norm: 0.9158968019679832, iteration: 254695
loss: 1.0078561305999756,grad_norm: 0.8308268920883051, iteration: 254696
loss: 1.009242057800293,grad_norm: 0.830711104046091, iteration: 254697
loss: 1.020158290863037,grad_norm: 0.8877663378951508, iteration: 254698
loss: 1.044870138168335,grad_norm: 0.9999993058519914, iteration: 254699
loss: 0.9892991781234741,grad_norm: 0.9321083200457168, iteration: 254700
loss: 1.131363868713379,grad_norm: 0.999999167193303, iteration: 254701
loss: 1.0513012409210205,grad_norm: 0.9382424158379711, iteration: 254702
loss: 1.0457913875579834,grad_norm: 0.9999994863945105, iteration: 254703
loss: 1.0209205150604248,grad_norm: 0.878423203135434, iteration: 254704
loss: 1.045161485671997,grad_norm: 0.9368864671930675, iteration: 254705
loss: 0.9912624359130859,grad_norm: 0.7882342104346535, iteration: 254706
loss: 0.9820393919944763,grad_norm: 0.8826969418085484, iteration: 254707
loss: 1.012535572052002,grad_norm: 0.9402865126777548, iteration: 254708
loss: 0.969879150390625,grad_norm: 0.873428876121821, iteration: 254709
loss: 0.9846153855323792,grad_norm: 0.9603666599559967, iteration: 254710
loss: 1.016055941581726,grad_norm: 0.939960095663077, iteration: 254711
loss: 0.9715811014175415,grad_norm: 0.9999991582977356, iteration: 254712
loss: 1.0175294876098633,grad_norm: 0.9936775361828555, iteration: 254713
loss: 1.0564950704574585,grad_norm: 0.8703375505958929, iteration: 254714
loss: 1.0025449991226196,grad_norm: 0.9999993015192126, iteration: 254715
loss: 1.1267597675323486,grad_norm: 0.999999729802231, iteration: 254716
loss: 1.023696780204773,grad_norm: 0.9999990848965773, iteration: 254717
loss: 1.0862159729003906,grad_norm: 0.9372426163842769, iteration: 254718
loss: 1.0213308334350586,grad_norm: 0.9999990628225761, iteration: 254719
loss: 1.060955286026001,grad_norm: 0.9915343806273269, iteration: 254720
loss: 1.0148853063583374,grad_norm: 0.8604680936388452, iteration: 254721
loss: 1.0002686977386475,grad_norm: 0.8988143004980065, iteration: 254722
loss: 1.0027077198028564,grad_norm: 0.9999997397899558, iteration: 254723
loss: 0.9985207319259644,grad_norm: 0.8096703536369508, iteration: 254724
loss: 1.0388545989990234,grad_norm: 0.8078117839618386, iteration: 254725
loss: 1.0226236581802368,grad_norm: 0.7656049380182767, iteration: 254726
loss: 1.068830966949463,grad_norm: 0.9999990921965666, iteration: 254727
loss: 1.0240999460220337,grad_norm: 0.9709369130221864, iteration: 254728
loss: 0.9919620156288147,grad_norm: 0.8625053966867441, iteration: 254729
loss: 1.0111474990844727,grad_norm: 0.9999991451913376, iteration: 254730
loss: 1.0131316184997559,grad_norm: 0.9999990683782941, iteration: 254731
loss: 0.9994763135910034,grad_norm: 0.9673228619051183, iteration: 254732
loss: 0.9794518351554871,grad_norm: 0.865308278806658, iteration: 254733
loss: 1.0122711658477783,grad_norm: 0.8271387412720858, iteration: 254734
loss: 0.9554711580276489,grad_norm: 0.8413500502170075, iteration: 254735
loss: 0.9978888630867004,grad_norm: 0.9751696310635793, iteration: 254736
loss: 0.9958384037017822,grad_norm: 0.8263787253570046, iteration: 254737
loss: 1.0152515172958374,grad_norm: 0.999999101499116, iteration: 254738
loss: 0.9837701320648193,grad_norm: 0.9686266301508693, iteration: 254739
loss: 1.0186362266540527,grad_norm: 0.8886704508162663, iteration: 254740
loss: 1.02488112449646,grad_norm: 0.8231327265301783, iteration: 254741
loss: 0.9771884679794312,grad_norm: 0.9999993775584658, iteration: 254742
loss: 1.0013468265533447,grad_norm: 0.8714842093979708, iteration: 254743
loss: 0.9623991847038269,grad_norm: 0.9999992796811077, iteration: 254744
loss: 1.0309956073760986,grad_norm: 0.9439363729199696, iteration: 254745
loss: 1.02361261844635,grad_norm: 0.9999990054175784, iteration: 254746
loss: 0.9954445362091064,grad_norm: 0.8927919130009309, iteration: 254747
loss: 0.9959599375724792,grad_norm: 0.9999993613835655, iteration: 254748
loss: 1.0225530862808228,grad_norm: 0.8339958315507863, iteration: 254749
loss: 1.0010040998458862,grad_norm: 0.917838478913937, iteration: 254750
loss: 1.0133148431777954,grad_norm: 0.8189763607691312, iteration: 254751
loss: 0.9645019173622131,grad_norm: 0.7438807686306529, iteration: 254752
loss: 1.0003806352615356,grad_norm: 0.9105566110353778, iteration: 254753
loss: 1.006169080734253,grad_norm: 0.9999991091489818, iteration: 254754
loss: 1.049045443534851,grad_norm: 0.999999659414568, iteration: 254755
loss: 1.050823450088501,grad_norm: 0.9999991152635412, iteration: 254756
loss: 0.9706276655197144,grad_norm: 0.9609250348379366, iteration: 254757
loss: 1.06769859790802,grad_norm: 0.9999996178376084, iteration: 254758
loss: 1.0263985395431519,grad_norm: 0.9671570486364566, iteration: 254759
loss: 1.0186059474945068,grad_norm: 0.9999989737580888, iteration: 254760
loss: 1.0016047954559326,grad_norm: 0.851646846244653, iteration: 254761
loss: 1.0351866483688354,grad_norm: 0.834108568319114, iteration: 254762
loss: 0.9737268090248108,grad_norm: 0.9826052771202906, iteration: 254763
loss: 0.9839485287666321,grad_norm: 0.7289356685366069, iteration: 254764
loss: 0.9904122948646545,grad_norm: 0.9969022035412851, iteration: 254765
loss: 1.011216163635254,grad_norm: 0.9261420653224376, iteration: 254766
loss: 1.0437445640563965,grad_norm: 0.9999996796860505, iteration: 254767
loss: 1.0234209299087524,grad_norm: 0.8122621061248104, iteration: 254768
loss: 1.0107094049453735,grad_norm: 0.6994088902303923, iteration: 254769
loss: 1.0249158143997192,grad_norm: 0.7570713619539501, iteration: 254770
loss: 0.9824121594429016,grad_norm: 0.8637153213460426, iteration: 254771
loss: 1.0020207166671753,grad_norm: 0.9999993849922336, iteration: 254772
loss: 1.3070498704910278,grad_norm: 0.9999991650716418, iteration: 254773
loss: 1.0846304893493652,grad_norm: 0.9999999352384478, iteration: 254774
loss: 0.9811112880706787,grad_norm: 0.8697486410435955, iteration: 254775
loss: 1.1602075099945068,grad_norm: 0.9999994742666437, iteration: 254776
loss: 1.0409443378448486,grad_norm: 0.9999990690598545, iteration: 254777
loss: 1.0671181678771973,grad_norm: 0.9999990984832473, iteration: 254778
loss: 0.9808563590049744,grad_norm: 0.9999992811908618, iteration: 254779
loss: 0.978752076625824,grad_norm: 0.7810343250904579, iteration: 254780
loss: 1.017741322517395,grad_norm: 0.901973066488084, iteration: 254781
loss: 1.0048729181289673,grad_norm: 0.9999991167723835, iteration: 254782
loss: 1.0050318241119385,grad_norm: 0.9072761864785835, iteration: 254783
loss: 1.0212342739105225,grad_norm: 0.7451727121996331, iteration: 254784
loss: 1.0500032901763916,grad_norm: 0.9999991728654829, iteration: 254785
loss: 1.005681037902832,grad_norm: 0.8792856996279941, iteration: 254786
loss: 1.0093683004379272,grad_norm: 0.9689868754760759, iteration: 254787
loss: 0.9860697388648987,grad_norm: 0.9397391795964328, iteration: 254788
loss: 0.981197714805603,grad_norm: 0.6984253145545924, iteration: 254789
loss: 0.9974754452705383,grad_norm: 0.8474808355879713, iteration: 254790
loss: 1.0086445808410645,grad_norm: 0.8162726478549878, iteration: 254791
loss: 1.0116742849349976,grad_norm: 0.9916680597557942, iteration: 254792
loss: 0.9957966208457947,grad_norm: 0.7641356239158624, iteration: 254793
loss: 0.9902434945106506,grad_norm: 0.8572422302302586, iteration: 254794
loss: 1.0544519424438477,grad_norm: 0.9999994460905033, iteration: 254795
loss: 0.9735488891601562,grad_norm: 0.7973170920089031, iteration: 254796
loss: 1.0203272104263306,grad_norm: 0.7481044231143776, iteration: 254797
loss: 0.9974193572998047,grad_norm: 0.9999992900435196, iteration: 254798
loss: 0.9559839367866516,grad_norm: 0.8309307889928393, iteration: 254799
loss: 0.9974539875984192,grad_norm: 0.9999992195320133, iteration: 254800
loss: 0.9773287177085876,grad_norm: 0.8139767952383808, iteration: 254801
loss: 1.0285664796829224,grad_norm: 0.9999991451032338, iteration: 254802
loss: 0.9966167211532593,grad_norm: 0.7921034481016938, iteration: 254803
loss: 1.0263086557388306,grad_norm: 0.9999990156627723, iteration: 254804
loss: 1.0092315673828125,grad_norm: 0.8957829679168579, iteration: 254805
loss: 1.0260086059570312,grad_norm: 0.9088157334071836, iteration: 254806
loss: 0.9829988479614258,grad_norm: 0.7847221071346143, iteration: 254807
loss: 0.9854867458343506,grad_norm: 0.7653245512035468, iteration: 254808
loss: 0.9618205428123474,grad_norm: 0.9999992115835352, iteration: 254809
loss: 0.9923449158668518,grad_norm: 0.9240619717236299, iteration: 254810
loss: 1.026221513748169,grad_norm: 0.9510595199861639, iteration: 254811
loss: 0.9495810866355896,grad_norm: 0.8387942787224616, iteration: 254812
loss: 0.9847674369812012,grad_norm: 0.8920411897961253, iteration: 254813
loss: 0.9644237756729126,grad_norm: 0.8551512963924924, iteration: 254814
loss: 0.9812138080596924,grad_norm: 0.9688264335067105, iteration: 254815
loss: 1.0092524290084839,grad_norm: 0.8382865624588008, iteration: 254816
loss: 1.0070134401321411,grad_norm: 0.9157556452834009, iteration: 254817
loss: 0.9883059859275818,grad_norm: 0.7758836501718903, iteration: 254818
loss: 0.976711094379425,grad_norm: 0.9483479835697363, iteration: 254819
loss: 1.0519452095031738,grad_norm: 0.9999993222222098, iteration: 254820
loss: 0.9957915544509888,grad_norm: 0.8936651496783147, iteration: 254821
loss: 0.9610878825187683,grad_norm: 0.7897900330181693, iteration: 254822
loss: 1.0495989322662354,grad_norm: 0.9999994400903905, iteration: 254823
loss: 0.9906646609306335,grad_norm: 0.9060093265147143, iteration: 254824
loss: 1.0418174266815186,grad_norm: 0.9693083809557698, iteration: 254825
loss: 0.980116605758667,grad_norm: 0.822648022475089, iteration: 254826
loss: 0.9861741662025452,grad_norm: 0.7689194117764688, iteration: 254827
loss: 0.9673651456832886,grad_norm: 0.820345342675055, iteration: 254828
loss: 1.0076066255569458,grad_norm: 0.8308291597712516, iteration: 254829
loss: 1.0109463930130005,grad_norm: 0.8426332906852623, iteration: 254830
loss: 1.0277314186096191,grad_norm: 0.8252151224659002, iteration: 254831
loss: 1.0052565336227417,grad_norm: 0.7153920034646594, iteration: 254832
loss: 0.9689280986785889,grad_norm: 0.9728942210288056, iteration: 254833
loss: 0.995512068271637,grad_norm: 0.9999991440664888, iteration: 254834
loss: 0.97515869140625,grad_norm: 0.9999993373108751, iteration: 254835
loss: 1.0260649919509888,grad_norm: 0.999999105234482, iteration: 254836
loss: 1.0111581087112427,grad_norm: 0.8673721162093205, iteration: 254837
loss: 1.0286872386932373,grad_norm: 0.8242438596919897, iteration: 254838
loss: 1.0067243576049805,grad_norm: 0.999999021890963, iteration: 254839
loss: 0.9977471232414246,grad_norm: 0.8813835524446714, iteration: 254840
loss: 1.0347926616668701,grad_norm: 0.8074572809714241, iteration: 254841
loss: 0.985358715057373,grad_norm: 0.8488087822540757, iteration: 254842
loss: 1.0338467359542847,grad_norm: 0.7878177929490583, iteration: 254843
loss: 0.983904242515564,grad_norm: 0.8635766712127907, iteration: 254844
loss: 1.0148099660873413,grad_norm: 0.9016228969001747, iteration: 254845
loss: 1.005442500114441,grad_norm: 0.8948277395680764, iteration: 254846
loss: 0.9912073016166687,grad_norm: 0.9368310768488523, iteration: 254847
loss: 0.9992402791976929,grad_norm: 0.8460189906244924, iteration: 254848
loss: 1.0133779048919678,grad_norm: 0.8857649150821479, iteration: 254849
loss: 0.9763063788414001,grad_norm: 0.9999998588181301, iteration: 254850
loss: 0.9810684323310852,grad_norm: 0.9999989839292818, iteration: 254851
loss: 0.9753643274307251,grad_norm: 0.9981827609023748, iteration: 254852
loss: 1.0426028966903687,grad_norm: 0.9999989838999706, iteration: 254853
loss: 1.004503607749939,grad_norm: 0.9262476757181014, iteration: 254854
loss: 0.9870430827140808,grad_norm: 0.88595254593608, iteration: 254855
loss: 1.0088746547698975,grad_norm: 0.8380378703861195, iteration: 254856
loss: 1.0064977407455444,grad_norm: 0.9274596794978394, iteration: 254857
loss: 1.0123814344406128,grad_norm: 0.8776563272827562, iteration: 254858
loss: 1.0095431804656982,grad_norm: 0.9999990981708774, iteration: 254859
loss: 1.0221755504608154,grad_norm: 0.7756904516063543, iteration: 254860
loss: 0.958077609539032,grad_norm: 0.7972821392092971, iteration: 254861
loss: 0.9768943190574646,grad_norm: 0.7458873385210472, iteration: 254862
loss: 1.0046498775482178,grad_norm: 0.7762356447527322, iteration: 254863
loss: 1.0407145023345947,grad_norm: 0.8529045018509354, iteration: 254864
loss: 1.0006306171417236,grad_norm: 0.8459907219601808, iteration: 254865
loss: 1.0149621963500977,grad_norm: 0.7172265200286925, iteration: 254866
loss: 1.0536670684814453,grad_norm: 0.9999990479292898, iteration: 254867
loss: 1.0110222101211548,grad_norm: 0.8158118171920246, iteration: 254868
loss: 0.9840047955513,grad_norm: 0.9999990530056632, iteration: 254869
loss: 1.0117043256759644,grad_norm: 0.9395235094673868, iteration: 254870
loss: 0.962054431438446,grad_norm: 0.914277265234063, iteration: 254871
loss: 0.9992862939834595,grad_norm: 0.8623784076362646, iteration: 254872
loss: 1.0243558883666992,grad_norm: 0.8814574611345702, iteration: 254873
loss: 0.9951472878456116,grad_norm: 0.8152274643471925, iteration: 254874
loss: 0.9852403998374939,grad_norm: 0.9999990882269799, iteration: 254875
loss: 1.0283899307250977,grad_norm: 0.9999991193254857, iteration: 254876
loss: 0.9769351482391357,grad_norm: 0.86678066144434, iteration: 254877
loss: 0.9467608332633972,grad_norm: 0.9380475712131717, iteration: 254878
loss: 1.0102001428604126,grad_norm: 0.8210453325724734, iteration: 254879
loss: 1.0010780096054077,grad_norm: 0.8869671642796805, iteration: 254880
loss: 0.9914531111717224,grad_norm: 0.9254009046265731, iteration: 254881
loss: 0.9849652051925659,grad_norm: 0.8916903031574024, iteration: 254882
loss: 1.0040521621704102,grad_norm: 0.9999995498007044, iteration: 254883
loss: 0.9998698830604553,grad_norm: 0.9999990390024578, iteration: 254884
loss: 0.9786949753761292,grad_norm: 0.9065104828931638, iteration: 254885
loss: 1.0100839138031006,grad_norm: 0.7833387004475449, iteration: 254886
loss: 0.9993371367454529,grad_norm: 0.9055347977168335, iteration: 254887
loss: 1.0054636001586914,grad_norm: 0.8228293666700593, iteration: 254888
loss: 0.9967362284660339,grad_norm: 0.9999990323627848, iteration: 254889
loss: 0.9506258964538574,grad_norm: 0.8532776610931134, iteration: 254890
loss: 1.0175812244415283,grad_norm: 0.9473944918659206, iteration: 254891
loss: 1.0084381103515625,grad_norm: 0.8576565606877139, iteration: 254892
loss: 1.001592993736267,grad_norm: 0.9999993587914835, iteration: 254893
loss: 0.9818673133850098,grad_norm: 0.8171870287484527, iteration: 254894
loss: 0.9679427742958069,grad_norm: 0.999999193897443, iteration: 254895
loss: 1.0126603841781616,grad_norm: 0.8000527472982237, iteration: 254896
loss: 0.9986265301704407,grad_norm: 0.8168368415424132, iteration: 254897
loss: 0.9685229063034058,grad_norm: 0.7949064040611671, iteration: 254898
loss: 0.9694042205810547,grad_norm: 0.9046110165778313, iteration: 254899
loss: 0.9867193102836609,grad_norm: 0.9999991731410871, iteration: 254900
loss: 1.0366064310073853,grad_norm: 0.9999994952047404, iteration: 254901
loss: 0.9676569104194641,grad_norm: 0.8420790325194455, iteration: 254902
loss: 1.0213816165924072,grad_norm: 0.9999991485459957, iteration: 254903
loss: 1.028347134590149,grad_norm: 0.7622255326462644, iteration: 254904
loss: 0.9920482635498047,grad_norm: 0.8239257091646819, iteration: 254905
loss: 0.9914669990539551,grad_norm: 0.9889673022640952, iteration: 254906
loss: 0.9939813613891602,grad_norm: 0.8431043758262277, iteration: 254907
loss: 0.9926923513412476,grad_norm: 0.9767071806141844, iteration: 254908
loss: 0.9745131731033325,grad_norm: 0.9999991295965074, iteration: 254909
loss: 1.0128309726715088,grad_norm: 0.9999989782583902, iteration: 254910
loss: 0.9929975867271423,grad_norm: 0.8006417193926805, iteration: 254911
loss: 1.0181080102920532,grad_norm: 0.9542715767834097, iteration: 254912
loss: 1.0077641010284424,grad_norm: 0.7668980562453429, iteration: 254913
loss: 1.0432147979736328,grad_norm: 0.8325328430630695, iteration: 254914
loss: 1.00997793674469,grad_norm: 0.9999989978126455, iteration: 254915
loss: 0.9725325703620911,grad_norm: 0.8408132706689647, iteration: 254916
loss: 1.0706623792648315,grad_norm: 0.9487858063809005, iteration: 254917
loss: 0.9689604043960571,grad_norm: 0.9999992552570756, iteration: 254918
loss: 0.9891645908355713,grad_norm: 0.8772448211800543, iteration: 254919
loss: 0.9746017456054688,grad_norm: 0.9446122756042867, iteration: 254920
loss: 1.0050361156463623,grad_norm: 0.9427486986875082, iteration: 254921
loss: 1.0104106664657593,grad_norm: 0.8301425583841693, iteration: 254922
loss: 1.0194863080978394,grad_norm: 0.8140580685666697, iteration: 254923
loss: 1.0869793891906738,grad_norm: 1.0000000316154625, iteration: 254924
loss: 1.0659762620925903,grad_norm: 0.9999999264663096, iteration: 254925
loss: 1.0144050121307373,grad_norm: 0.9298670127868682, iteration: 254926
loss: 1.0088011026382446,grad_norm: 0.8482517827385188, iteration: 254927
loss: 0.991312563419342,grad_norm: 0.7973652657316126, iteration: 254928
loss: 0.9875823259353638,grad_norm: 0.9733993164349998, iteration: 254929
loss: 0.9967436790466309,grad_norm: 0.9915784972514201, iteration: 254930
loss: 0.9730049967765808,grad_norm: 0.8063958015531091, iteration: 254931
loss: 1.0024986267089844,grad_norm: 0.915477676035444, iteration: 254932
loss: 0.9756473898887634,grad_norm: 0.8265672334633045, iteration: 254933
loss: 0.9926468729972839,grad_norm: 0.9383939765765518, iteration: 254934
loss: 1.017665982246399,grad_norm: 0.9356413765678522, iteration: 254935
loss: 0.9954356551170349,grad_norm: 0.8158686567313085, iteration: 254936
loss: 0.9720306396484375,grad_norm: 0.9917263588589967, iteration: 254937
loss: 1.0588405132293701,grad_norm: 0.9999989947907258, iteration: 254938
loss: 1.004665493965149,grad_norm: 0.8426547430211269, iteration: 254939
loss: 1.0008822679519653,grad_norm: 0.9999991372121886, iteration: 254940
loss: 1.0047293901443481,grad_norm: 0.9173098750839245, iteration: 254941
loss: 1.0038042068481445,grad_norm: 0.8259175796001187, iteration: 254942
loss: 0.9934435486793518,grad_norm: 0.878986229197407, iteration: 254943
loss: 1.0315265655517578,grad_norm: 0.8585748777661533, iteration: 254944
loss: 1.0262601375579834,grad_norm: 0.7965876201785405, iteration: 254945
loss: 1.0345200300216675,grad_norm: 0.9148442609270492, iteration: 254946
loss: 1.0093328952789307,grad_norm: 0.9186871610563352, iteration: 254947
loss: 1.0230809450149536,grad_norm: 0.8274196616998918, iteration: 254948
loss: 0.9563513398170471,grad_norm: 0.848680525004689, iteration: 254949
loss: 0.9826135635375977,grad_norm: 0.8174430623930422, iteration: 254950
loss: 1.0417622327804565,grad_norm: 0.9999991892999645, iteration: 254951
loss: 0.9983840584754944,grad_norm: 0.8889110904976814, iteration: 254952
loss: 1.0044864416122437,grad_norm: 0.8500345629825414, iteration: 254953
loss: 1.0156863927841187,grad_norm: 0.8068125174254837, iteration: 254954
loss: 0.9841304421424866,grad_norm: 0.9999990020506866, iteration: 254955
loss: 0.9784101247787476,grad_norm: 0.9163921710844702, iteration: 254956
loss: 0.9970282912254333,grad_norm: 0.8337696081221188, iteration: 254957
loss: 1.0191186666488647,grad_norm: 0.73956960000015, iteration: 254958
loss: 0.985858678817749,grad_norm: 0.7542202926149916, iteration: 254959
loss: 1.0327566862106323,grad_norm: 0.9999993442033124, iteration: 254960
loss: 0.9867653250694275,grad_norm: 0.8241033814827599, iteration: 254961
loss: 0.991121232509613,grad_norm: 0.875254534157387, iteration: 254962
loss: 0.9770244359970093,grad_norm: 0.8954766945300514, iteration: 254963
loss: 0.9571857452392578,grad_norm: 0.9318808836244015, iteration: 254964
loss: 1.035874366760254,grad_norm: 0.9070517253611811, iteration: 254965
loss: 0.9982322454452515,grad_norm: 0.9999990705870683, iteration: 254966
loss: 1.050188660621643,grad_norm: 0.999999162692912, iteration: 254967
loss: 1.014577865600586,grad_norm: 0.8431274833667468, iteration: 254968
loss: 0.9697102308273315,grad_norm: 0.8568569807574217, iteration: 254969
loss: 1.0207158327102661,grad_norm: 0.9999989774254291, iteration: 254970
loss: 1.0144346952438354,grad_norm: 0.7102696009514022, iteration: 254971
loss: 1.0195053815841675,grad_norm: 0.9999990346957487, iteration: 254972
loss: 0.9957908987998962,grad_norm: 0.9130857183669417, iteration: 254973
loss: 1.046849012374878,grad_norm: 0.8383764043664512, iteration: 254974
loss: 0.9897288680076599,grad_norm: 0.7902843676016051, iteration: 254975
loss: 1.0169577598571777,grad_norm: 0.9402247410558883, iteration: 254976
loss: 1.0030405521392822,grad_norm: 0.9174319387852868, iteration: 254977
loss: 0.9947969317436218,grad_norm: 0.748527151541827, iteration: 254978
loss: 1.0065683126449585,grad_norm: 0.8917457200197692, iteration: 254979
loss: 0.9954859614372253,grad_norm: 0.7001212351304096, iteration: 254980
loss: 0.9765025973320007,grad_norm: 0.7587504053589781, iteration: 254981
loss: 1.031327247619629,grad_norm: 0.999999280564365, iteration: 254982
loss: 0.9922375082969666,grad_norm: 0.8339493768961791, iteration: 254983
loss: 0.9820008277893066,grad_norm: 0.8765394871077294, iteration: 254984
loss: 1.0141762495040894,grad_norm: 0.9042627122783746, iteration: 254985
loss: 0.9997192025184631,grad_norm: 0.8058790636494413, iteration: 254986
loss: 1.0288084745407104,grad_norm: 0.8505838247623861, iteration: 254987
loss: 1.002381443977356,grad_norm: 0.9776512780500198, iteration: 254988
loss: 1.0177106857299805,grad_norm: 0.9362024337243943, iteration: 254989
loss: 1.002333402633667,grad_norm: 0.820104209796608, iteration: 254990
loss: 0.9772622585296631,grad_norm: 0.8804837994216681, iteration: 254991
loss: 0.960976243019104,grad_norm: 0.9894097540345058, iteration: 254992
loss: 0.9543206691741943,grad_norm: 0.9440921361030976, iteration: 254993
loss: 1.0315663814544678,grad_norm: 0.9999993230122675, iteration: 254994
loss: 1.0260645151138306,grad_norm: 0.9999996938246963, iteration: 254995
loss: 1.0043405294418335,grad_norm: 0.9666325215773582, iteration: 254996
loss: 0.9742421507835388,grad_norm: 0.8103474194091146, iteration: 254997
loss: 0.9790331125259399,grad_norm: 0.8216393320517564, iteration: 254998
loss: 1.0079721212387085,grad_norm: 0.9999990688758056, iteration: 254999
loss: 1.0121506452560425,grad_norm: 0.6630611813823255, iteration: 255000
loss: 1.0288009643554688,grad_norm: 0.7504735452941053, iteration: 255001
loss: 0.9890086054801941,grad_norm: 0.9999994683783743, iteration: 255002
loss: 0.9847227931022644,grad_norm: 0.9999991600237879, iteration: 255003
loss: 1.0061254501342773,grad_norm: 0.8547111896709276, iteration: 255004
loss: 1.056273102760315,grad_norm: 0.999999293758068, iteration: 255005
loss: 1.0154004096984863,grad_norm: 0.9566593363065686, iteration: 255006
loss: 1.0282999277114868,grad_norm: 0.8346763779180508, iteration: 255007
loss: 1.0761983394622803,grad_norm: 0.9999990341391979, iteration: 255008
loss: 1.0097182989120483,grad_norm: 0.9260893425550224, iteration: 255009
loss: 1.1691310405731201,grad_norm: 0.9999992949387653, iteration: 255010
loss: 1.0403610467910767,grad_norm: 0.9999991046707851, iteration: 255011
loss: 1.035601019859314,grad_norm: 0.9999991647598814, iteration: 255012
loss: 1.056152105331421,grad_norm: 0.929574848502828, iteration: 255013
loss: 0.9934223890304565,grad_norm: 0.9999991890586604, iteration: 255014
loss: 1.0671238899230957,grad_norm: 0.999999872134706, iteration: 255015
loss: 1.0168046951293945,grad_norm: 0.7872125308851685, iteration: 255016
loss: 1.1283069849014282,grad_norm: 0.8556164005991895, iteration: 255017
loss: 1.0279346704483032,grad_norm: 0.999999684892404, iteration: 255018
loss: 1.0502010583877563,grad_norm: 0.955626218046576, iteration: 255019
loss: 0.9636891484260559,grad_norm: 0.7597991728473321, iteration: 255020
loss: 0.9667781591415405,grad_norm: 0.8073256105794228, iteration: 255021
loss: 0.9765745997428894,grad_norm: 0.7643418401393945, iteration: 255022
loss: 1.1032164096832275,grad_norm: 0.901449891465879, iteration: 255023
loss: 1.0296510457992554,grad_norm: 0.9999990716562696, iteration: 255024
loss: 1.0254896879196167,grad_norm: 0.9999994092262938, iteration: 255025
loss: 0.995678722858429,grad_norm: 0.9056511718674697, iteration: 255026
loss: 1.0030368566513062,grad_norm: 0.8967338610686277, iteration: 255027
loss: 0.9910667538642883,grad_norm: 0.9999994359267038, iteration: 255028
loss: 1.0503991842269897,grad_norm: 0.9999994522723489, iteration: 255029
loss: 1.0063557624816895,grad_norm: 0.8387497721799336, iteration: 255030
loss: 0.9749780893325806,grad_norm: 0.9999991167399116, iteration: 255031
loss: 1.1168818473815918,grad_norm: 0.9999990897583925, iteration: 255032
loss: 1.1679201126098633,grad_norm: 0.9999992008569213, iteration: 255033
loss: 0.9935132265090942,grad_norm: 0.9205009638599578, iteration: 255034
loss: 1.0046162605285645,grad_norm: 0.8677991608772337, iteration: 255035
loss: 0.9876593351364136,grad_norm: 0.9171787835892246, iteration: 255036
loss: 1.025519847869873,grad_norm: 0.8465329779636687, iteration: 255037
loss: 0.9912158250808716,grad_norm: 0.9999998224465738, iteration: 255038
loss: 1.036057949066162,grad_norm: 0.9999997669292593, iteration: 255039
loss: 1.0413093566894531,grad_norm: 0.9430791319820085, iteration: 255040
loss: 1.0023548603057861,grad_norm: 0.9444703581529265, iteration: 255041
loss: 1.0665313005447388,grad_norm: 0.999999861800263, iteration: 255042
loss: 1.0278170108795166,grad_norm: 0.9999999371411807, iteration: 255043
loss: 1.0694438219070435,grad_norm: 0.9999999761452354, iteration: 255044
loss: 1.0056599378585815,grad_norm: 0.9972201945318491, iteration: 255045
loss: 1.0104283094406128,grad_norm: 0.7982095577815155, iteration: 255046
loss: 1.2099976539611816,grad_norm: 0.9999996275986929, iteration: 255047
loss: 1.0370296239852905,grad_norm: 0.9231626027125837, iteration: 255048
loss: 1.015356421470642,grad_norm: 0.9999992177925776, iteration: 255049
loss: 1.0855424404144287,grad_norm: 0.9999991925174694, iteration: 255050
loss: 0.9788814783096313,grad_norm: 0.8495795040759576, iteration: 255051
loss: 1.0050336122512817,grad_norm: 0.9999990912657798, iteration: 255052
loss: 1.0139665603637695,grad_norm: 0.9004511520753438, iteration: 255053
loss: 1.0928924083709717,grad_norm: 0.9999992929566552, iteration: 255054
loss: 0.9923675656318665,grad_norm: 0.9760441216653943, iteration: 255055
loss: 0.9986903071403503,grad_norm: 0.9918994326395437, iteration: 255056
loss: 1.094132900238037,grad_norm: 0.999999959996783, iteration: 255057
loss: 0.9903415441513062,grad_norm: 0.8860868697277167, iteration: 255058
loss: 1.0246031284332275,grad_norm: 0.9999992076328343, iteration: 255059
loss: 0.9997758865356445,grad_norm: 0.8363928560118542, iteration: 255060
loss: 1.084094524383545,grad_norm: 0.9999999645410445, iteration: 255061
loss: 1.062210202217102,grad_norm: 1.0000000119917043, iteration: 255062
loss: 0.9610922932624817,grad_norm: 0.8261915657671641, iteration: 255063
loss: 0.9978072047233582,grad_norm: 0.9288272238532379, iteration: 255064
loss: 1.0278606414794922,grad_norm: 0.8651926954732685, iteration: 255065
loss: 1.033623218536377,grad_norm: 0.8234934848739471, iteration: 255066
loss: 0.991381824016571,grad_norm: 0.842752958180366, iteration: 255067
loss: 0.9900510907173157,grad_norm: 0.9999990597768267, iteration: 255068
loss: 1.0152881145477295,grad_norm: 0.8586313755772412, iteration: 255069
loss: 0.9986071586608887,grad_norm: 0.9583063083619683, iteration: 255070
loss: 0.9974817633628845,grad_norm: 0.9063080987135732, iteration: 255071
loss: 1.0408940315246582,grad_norm: 0.999999213904841, iteration: 255072
loss: 1.0001572370529175,grad_norm: 0.8746191660509676, iteration: 255073
loss: 1.0124870538711548,grad_norm: 0.9254159723505495, iteration: 255074
loss: 0.9910385012626648,grad_norm: 0.9419420410753784, iteration: 255075
loss: 1.0142678022384644,grad_norm: 0.8349405923033887, iteration: 255076
loss: 1.0228979587554932,grad_norm: 0.8401057757703977, iteration: 255077
loss: 0.9984537363052368,grad_norm: 0.9999989892636372, iteration: 255078
loss: 0.9958602786064148,grad_norm: 0.999999145778066, iteration: 255079
loss: 0.959703266620636,grad_norm: 0.9359322690093578, iteration: 255080
loss: 0.9988508224487305,grad_norm: 0.9999991936104725, iteration: 255081
loss: 0.9535604119300842,grad_norm: 0.8658987477900639, iteration: 255082
loss: 1.0238388776779175,grad_norm: 0.864086680298636, iteration: 255083
loss: 0.9948534369468689,grad_norm: 0.8392011761480294, iteration: 255084
loss: 1.0114996433258057,grad_norm: 0.8816585713343085, iteration: 255085
loss: 1.003204107284546,grad_norm: 0.7717934900638357, iteration: 255086
loss: 1.02547287940979,grad_norm: 0.9999996556197562, iteration: 255087
loss: 0.9826748371124268,grad_norm: 0.9999990610040417, iteration: 255088
loss: 0.9698143601417542,grad_norm: 0.8656762028372333, iteration: 255089
loss: 0.9821667671203613,grad_norm: 0.9999991505919771, iteration: 255090
loss: 0.984161376953125,grad_norm: 0.9512692604921049, iteration: 255091
loss: 1.0219697952270508,grad_norm: 0.8710549854975775, iteration: 255092
loss: 0.9964212775230408,grad_norm: 0.994908639496517, iteration: 255093
loss: 0.9812584519386292,grad_norm: 0.8994272533200055, iteration: 255094
loss: 0.9821475744247437,grad_norm: 0.8478866758060019, iteration: 255095
loss: 0.9996243119239807,grad_norm: 0.8003971028180019, iteration: 255096
loss: 0.9892627000808716,grad_norm: 0.9999990386660518, iteration: 255097
loss: 1.0295358896255493,grad_norm: 0.99999907005176, iteration: 255098
loss: 0.9832905530929565,grad_norm: 0.9999991467579712, iteration: 255099
loss: 0.9999553561210632,grad_norm: 0.7463236479232773, iteration: 255100
loss: 1.0684012174606323,grad_norm: 0.999999176639788, iteration: 255101
loss: 0.9909035563468933,grad_norm: 0.9999991202184254, iteration: 255102
loss: 1.042887568473816,grad_norm: 0.9999997840327849, iteration: 255103
loss: 0.9965654611587524,grad_norm: 0.8470594038010263, iteration: 255104
loss: 1.0210078954696655,grad_norm: 0.9999991122711955, iteration: 255105
loss: 1.0156524181365967,grad_norm: 0.8754721528238048, iteration: 255106
loss: 1.014605164527893,grad_norm: 0.8548511495888509, iteration: 255107
loss: 0.9919959306716919,grad_norm: 0.9622879659994558, iteration: 255108
loss: 0.9781923294067383,grad_norm: 0.9999990671913978, iteration: 255109
loss: 0.9900152087211609,grad_norm: 0.8581660483797816, iteration: 255110
loss: 1.0066508054733276,grad_norm: 0.8915755675487159, iteration: 255111
loss: 1.0055044889450073,grad_norm: 0.9653957828084436, iteration: 255112
loss: 1.023788332939148,grad_norm: 0.999999818180814, iteration: 255113
loss: 0.9707028269767761,grad_norm: 0.9502607984379821, iteration: 255114
loss: 0.9764216542243958,grad_norm: 0.9999990988956682, iteration: 255115
loss: 1.0011540651321411,grad_norm: 0.839069146025937, iteration: 255116
loss: 1.0336741209030151,grad_norm: 0.9999990957449265, iteration: 255117
loss: 1.0056238174438477,grad_norm: 0.8222947200050906, iteration: 255118
loss: 0.9926046133041382,grad_norm: 0.9415980351454591, iteration: 255119
loss: 0.9668245315551758,grad_norm: 0.7745146251586763, iteration: 255120
loss: 1.041731834411621,grad_norm: 0.9413342350090126, iteration: 255121
loss: 0.9799791574478149,grad_norm: 0.8857985089150148, iteration: 255122
loss: 1.0028951168060303,grad_norm: 0.9999998914792683, iteration: 255123
loss: 0.9907063841819763,grad_norm: 0.8686351850294367, iteration: 255124
loss: 1.0785186290740967,grad_norm: 0.9999999289446069, iteration: 255125
loss: 1.0033502578735352,grad_norm: 0.9999991901362445, iteration: 255126
loss: 0.9991607666015625,grad_norm: 0.9544237922232782, iteration: 255127
loss: 1.007358431816101,grad_norm: 0.8161368658257339, iteration: 255128
loss: 1.0115798711776733,grad_norm: 0.9999996962896839, iteration: 255129
loss: 0.9879444241523743,grad_norm: 0.924809896281528, iteration: 255130
loss: 0.9519394040107727,grad_norm: 0.9336829975270717, iteration: 255131
loss: 0.9968515634536743,grad_norm: 0.8588001568662722, iteration: 255132
loss: 0.9986791014671326,grad_norm: 0.8658794089955774, iteration: 255133
loss: 1.1317741870880127,grad_norm: 0.9999990707428982, iteration: 255134
loss: 0.9624010920524597,grad_norm: 0.9999998080378232, iteration: 255135
loss: 1.001594066619873,grad_norm: 0.8795041170567083, iteration: 255136
loss: 1.0390301942825317,grad_norm: 0.9999991286365891, iteration: 255137
loss: 1.1063181161880493,grad_norm: 0.9999992451087104, iteration: 255138
loss: 1.0880632400512695,grad_norm: 0.9999994582224048, iteration: 255139
loss: 1.0007212162017822,grad_norm: 0.9999991398925764, iteration: 255140
loss: 1.0007745027542114,grad_norm: 0.9549745193319085, iteration: 255141
loss: 1.0199153423309326,grad_norm: 0.9999994562484571, iteration: 255142
loss: 1.0082576274871826,grad_norm: 0.9611344312959066, iteration: 255143
loss: 1.0099667310714722,grad_norm: 0.999999822378798, iteration: 255144
loss: 1.2120747566223145,grad_norm: 1.0000000023358737, iteration: 255145
loss: 1.006284475326538,grad_norm: 0.8435595023937617, iteration: 255146
loss: 1.0510284900665283,grad_norm: 0.8926187973075034, iteration: 255147
loss: 0.9752858281135559,grad_norm: 0.8288577840383017, iteration: 255148
loss: 1.064408779144287,grad_norm: 0.9631721703283805, iteration: 255149
loss: 0.9815621972084045,grad_norm: 0.8406878585730003, iteration: 255150
loss: 1.026978850364685,grad_norm: 0.9999990826271944, iteration: 255151
loss: 0.9840986132621765,grad_norm: 0.8951049947216456, iteration: 255152
loss: 1.0488349199295044,grad_norm: 0.999999277445553, iteration: 255153
loss: 0.9943762421607971,grad_norm: 0.9999994884641723, iteration: 255154
loss: 1.237920880317688,grad_norm: 0.9999989902615088, iteration: 255155
loss: 1.075400471687317,grad_norm: 0.8985083194267133, iteration: 255156
loss: 0.9887951612472534,grad_norm: 0.9863897067081042, iteration: 255157
loss: 1.0016862154006958,grad_norm: 0.9999991330880257, iteration: 255158
loss: 1.0149258375167847,grad_norm: 0.919190760376678, iteration: 255159
loss: 1.090256929397583,grad_norm: 0.9999993776332606, iteration: 255160
loss: 1.0565768480300903,grad_norm: 0.9999998415440269, iteration: 255161
loss: 1.0322935581207275,grad_norm: 0.9999994808258961, iteration: 255162
loss: 1.1045750379562378,grad_norm: 0.999999945534089, iteration: 255163
loss: 0.9792622923851013,grad_norm: 0.8838017744965404, iteration: 255164
loss: 1.0296382904052734,grad_norm: 0.7862572831939674, iteration: 255165
loss: 1.0118603706359863,grad_norm: 0.9999989936863154, iteration: 255166
loss: 1.0213311910629272,grad_norm: 0.9717460654304998, iteration: 255167
loss: 0.9941151142120361,grad_norm: 0.9385967142162249, iteration: 255168
loss: 0.962834358215332,grad_norm: 0.9999990806469798, iteration: 255169
loss: 1.0290305614471436,grad_norm: 0.9898069746269331, iteration: 255170
loss: 0.9968825578689575,grad_norm: 0.9999989909152639, iteration: 255171
loss: 1.03219735622406,grad_norm: 0.894480233054957, iteration: 255172
loss: 0.9826709032058716,grad_norm: 0.9999994690274316, iteration: 255173
loss: 1.0482265949249268,grad_norm: 0.8385467750813969, iteration: 255174
loss: 1.0065913200378418,grad_norm: 0.9999991952753274, iteration: 255175
loss: 0.9754554629325867,grad_norm: 0.9999995811274882, iteration: 255176
loss: 0.9889510273933411,grad_norm: 0.8653580972753042, iteration: 255177
loss: 1.0235716104507446,grad_norm: 0.8903419099230181, iteration: 255178
loss: 0.9983710050582886,grad_norm: 0.7716599290869333, iteration: 255179
loss: 1.107082486152649,grad_norm: 0.9999991690483073, iteration: 255180
loss: 0.9982196688652039,grad_norm: 0.9999990915503949, iteration: 255181
loss: 1.0272190570831299,grad_norm: 0.9999997314326733, iteration: 255182
loss: 1.0319074392318726,grad_norm: 0.982613313804884, iteration: 255183
loss: 0.9847701191902161,grad_norm: 0.8852558911671438, iteration: 255184
loss: 1.0067170858383179,grad_norm: 0.9443460555036669, iteration: 255185
loss: 1.0137768983840942,grad_norm: 0.9999992563821822, iteration: 255186
loss: 0.9634551405906677,grad_norm: 0.8064540950498669, iteration: 255187
loss: 0.9877831935882568,grad_norm: 0.9345588396626009, iteration: 255188
loss: 0.9445052146911621,grad_norm: 0.9973681077784535, iteration: 255189
loss: 0.989952564239502,grad_norm: 0.7778120142974682, iteration: 255190
loss: 1.0096708536148071,grad_norm: 0.999999169821813, iteration: 255191
loss: 1.025187373161316,grad_norm: 0.8227390003041912, iteration: 255192
loss: 0.9773756265640259,grad_norm: 0.9178592457043142, iteration: 255193
loss: 1.059361457824707,grad_norm: 0.8702787027895991, iteration: 255194
loss: 0.9984105229377747,grad_norm: 0.8216257220806242, iteration: 255195
loss: 0.9770514965057373,grad_norm: 0.9604186751780124, iteration: 255196
loss: 0.9740684628486633,grad_norm: 0.7284936078152963, iteration: 255197
loss: 1.030451774597168,grad_norm: 0.9999996245293163, iteration: 255198
loss: 1.0406545400619507,grad_norm: 0.8812455882465946, iteration: 255199
loss: 1.0011656284332275,grad_norm: 0.9504587751865633, iteration: 255200
loss: 1.0002331733703613,grad_norm: 0.8667139246820316, iteration: 255201
loss: 0.9911109209060669,grad_norm: 0.7992433466686555, iteration: 255202
loss: 0.9958495497703552,grad_norm: 0.9850819965998153, iteration: 255203
loss: 1.013883113861084,grad_norm: 0.9132932751021908, iteration: 255204
loss: 1.012681245803833,grad_norm: 0.9444419567745588, iteration: 255205
loss: 1.0055395364761353,grad_norm: 0.9999991705946735, iteration: 255206
loss: 1.0455734729766846,grad_norm: 0.927364470521898, iteration: 255207
loss: 1.049086093902588,grad_norm: 0.9322901956385519, iteration: 255208
loss: 1.0116872787475586,grad_norm: 0.9999990993380485, iteration: 255209
loss: 0.9821510314941406,grad_norm: 0.9999989440815871, iteration: 255210
loss: 1.0250991582870483,grad_norm: 0.9755226199174193, iteration: 255211
loss: 0.9514323472976685,grad_norm: 0.776447345881769, iteration: 255212
loss: 1.0114731788635254,grad_norm: 0.7872427743986349, iteration: 255213
loss: 0.9957242012023926,grad_norm: 0.8834991685767544, iteration: 255214
loss: 1.0262070894241333,grad_norm: 0.9838100717684932, iteration: 255215
loss: 1.1070950031280518,grad_norm: 0.9999993732688777, iteration: 255216
loss: 1.1288198232650757,grad_norm: 0.9999993176788962, iteration: 255217
loss: 0.952816903591156,grad_norm: 0.9999992005334373, iteration: 255218
loss: 0.9865049719810486,grad_norm: 0.8664799978436917, iteration: 255219
loss: 1.0596596002578735,grad_norm: 0.9999990918407046, iteration: 255220
loss: 0.9970118999481201,grad_norm: 0.8167443198559855, iteration: 255221
loss: 1.0108424425125122,grad_norm: 0.8608022072723497, iteration: 255222
loss: 1.0108246803283691,grad_norm: 0.8911237084541245, iteration: 255223
loss: 0.9722035527229309,grad_norm: 0.8373683896927075, iteration: 255224
loss: 0.9673684239387512,grad_norm: 0.8812464000922147, iteration: 255225
loss: 1.098760962486267,grad_norm: 0.9999997356912591, iteration: 255226
loss: 0.9981479644775391,grad_norm: 0.9276978086692703, iteration: 255227
loss: 0.997886061668396,grad_norm: 0.9999990739465595, iteration: 255228
loss: 1.0036652088165283,grad_norm: 0.9332358706442538, iteration: 255229
loss: 0.9624660611152649,grad_norm: 0.7889101949207789, iteration: 255230
loss: 0.988921046257019,grad_norm: 0.9865005553799696, iteration: 255231
loss: 0.9800488948822021,grad_norm: 0.9009835131780831, iteration: 255232
loss: 0.9813694953918457,grad_norm: 0.8474971474489683, iteration: 255233
loss: 1.0064845085144043,grad_norm: 0.8071626686828179, iteration: 255234
loss: 0.9526546597480774,grad_norm: 0.826915810400462, iteration: 255235
loss: 1.0582915544509888,grad_norm: 0.9514025723252081, iteration: 255236
loss: 1.1223739385604858,grad_norm: 0.9999990063953785, iteration: 255237
loss: 0.9932853579521179,grad_norm: 0.9453769622627216, iteration: 255238
loss: 0.9886768460273743,grad_norm: 0.8651337619886778, iteration: 255239
loss: 1.0165551900863647,grad_norm: 0.768943838351055, iteration: 255240
loss: 1.0846898555755615,grad_norm: 0.999999365195819, iteration: 255241
loss: 1.016802430152893,grad_norm: 0.9433829903605383, iteration: 255242
loss: 1.0203241109848022,grad_norm: 0.9006269142085342, iteration: 255243
loss: 1.113251805305481,grad_norm: 0.9999994058947717, iteration: 255244
loss: 1.0377404689788818,grad_norm: 0.8068363533064584, iteration: 255245
loss: 1.0032726526260376,grad_norm: 0.9559098904865632, iteration: 255246
loss: 1.0025043487548828,grad_norm: 0.8539420936104603, iteration: 255247
loss: 1.0196418762207031,grad_norm: 0.9999992208545926, iteration: 255248
loss: 0.9651893377304077,grad_norm: 0.9999991287788172, iteration: 255249
loss: 1.0720199346542358,grad_norm: 0.9096112198823874, iteration: 255250
loss: 0.9790627360343933,grad_norm: 0.8868081148116715, iteration: 255251
loss: 1.0903353691101074,grad_norm: 0.9999995560558715, iteration: 255252
loss: 1.0128370523452759,grad_norm: 0.9238432822072973, iteration: 255253
loss: 0.9962819814682007,grad_norm: 0.9999995889501591, iteration: 255254
loss: 1.0321505069732666,grad_norm: 0.9999990428834169, iteration: 255255
loss: 1.005557656288147,grad_norm: 0.93332330720579, iteration: 255256
loss: 0.9791490435600281,grad_norm: 0.9999992969598733, iteration: 255257
loss: 1.0551246404647827,grad_norm: 0.9999994911447138, iteration: 255258
loss: 1.0282111167907715,grad_norm: 0.9999989044893314, iteration: 255259
loss: 1.072806715965271,grad_norm: 0.8090609430407527, iteration: 255260
loss: 1.1816534996032715,grad_norm: 0.999999268354089, iteration: 255261
loss: 1.0099250078201294,grad_norm: 0.993320629814791, iteration: 255262
loss: 0.9891070127487183,grad_norm: 0.8918774616818581, iteration: 255263
loss: 1.0041788816452026,grad_norm: 0.9241227481179533, iteration: 255264
loss: 1.1168051958084106,grad_norm: 0.9999999894230843, iteration: 255265
loss: 1.124207615852356,grad_norm: 0.999999019802641, iteration: 255266
loss: 1.0098587274551392,grad_norm: 0.8116863807220182, iteration: 255267
loss: 0.9952146410942078,grad_norm: 0.9999992226257003, iteration: 255268
loss: 1.0055787563323975,grad_norm: 0.931425061361064, iteration: 255269
loss: 0.954586923122406,grad_norm: 0.9999992326728329, iteration: 255270
loss: 0.9604675769805908,grad_norm: 0.8302871706976841, iteration: 255271
loss: 1.0083510875701904,grad_norm: 0.9999995361831012, iteration: 255272
loss: 1.040597915649414,grad_norm: 0.9255280482148426, iteration: 255273
loss: 1.0150057077407837,grad_norm: 0.9999995779240535, iteration: 255274
loss: 1.2137460708618164,grad_norm: 0.9999995361120873, iteration: 255275
loss: 0.9949030876159668,grad_norm: 0.7977806704228817, iteration: 255276
loss: 0.970690131187439,grad_norm: 0.9999990353113787, iteration: 255277
loss: 1.0198802947998047,grad_norm: 0.8906838094394276, iteration: 255278
loss: 1.0705056190490723,grad_norm: 1.0000000791013732, iteration: 255279
loss: 1.0144741535186768,grad_norm: 0.7708793857419323, iteration: 255280
loss: 1.0849851369857788,grad_norm: 0.9556496850626206, iteration: 255281
loss: 1.005691647529602,grad_norm: 0.9999997018542952, iteration: 255282
loss: 1.190293312072754,grad_norm: 0.9999996951896806, iteration: 255283
loss: 1.0170270204544067,grad_norm: 0.9649220595394996, iteration: 255284
loss: 0.9925414323806763,grad_norm: 0.900692427546791, iteration: 255285
loss: 1.011576533317566,grad_norm: 0.8162698542542275, iteration: 255286
loss: 0.9795686602592468,grad_norm: 0.8356297825949224, iteration: 255287
loss: 1.1542099714279175,grad_norm: 0.9999990613346645, iteration: 255288
loss: 0.9705175161361694,grad_norm: 0.9362003621298636, iteration: 255289
loss: 1.0028181076049805,grad_norm: 0.9759838802732073, iteration: 255290
loss: 0.9684705138206482,grad_norm: 0.740977694217914, iteration: 255291
loss: 1.0441566705703735,grad_norm: 0.9017212859495342, iteration: 255292
loss: 1.2882071733474731,grad_norm: 0.9999995612786823, iteration: 255293
loss: 1.0161052942276,grad_norm: 0.9948427285030997, iteration: 255294
loss: 1.0051352977752686,grad_norm: 0.9999994097780576, iteration: 255295
loss: 0.9856048226356506,grad_norm: 0.9999991276043664, iteration: 255296
loss: 1.0524578094482422,grad_norm: 0.9999997704797621, iteration: 255297
loss: 1.0389243364334106,grad_norm: 0.9647072689718761, iteration: 255298
loss: 0.986513078212738,grad_norm: 0.9443787989839403, iteration: 255299
loss: 1.0047796964645386,grad_norm: 0.9999994753913749, iteration: 255300
loss: 1.0107942819595337,grad_norm: 0.9258188324734312, iteration: 255301
loss: 1.1208503246307373,grad_norm: 0.9465597915076506, iteration: 255302
loss: 0.9781138896942139,grad_norm: 0.984259625387382, iteration: 255303
loss: 1.0992597341537476,grad_norm: 0.9999994755619909, iteration: 255304
loss: 1.0125144720077515,grad_norm: 0.9531814222585675, iteration: 255305
loss: 0.9949313998222351,grad_norm: 0.9713130680718995, iteration: 255306
loss: 0.9869926571846008,grad_norm: 0.8928358131283674, iteration: 255307
loss: 0.9964712858200073,grad_norm: 0.9589590452405627, iteration: 255308
loss: 0.9600991606712341,grad_norm: 0.8308631712566462, iteration: 255309
loss: 1.0969353914260864,grad_norm: 0.9934457498811454, iteration: 255310
loss: 1.0075032711029053,grad_norm: 0.9999991130982503, iteration: 255311
loss: 1.032831072807312,grad_norm: 0.9999990639824268, iteration: 255312
loss: 0.9920175671577454,grad_norm: 0.7552768830701287, iteration: 255313
loss: 0.9987969994544983,grad_norm: 0.9999993431467052, iteration: 255314
loss: 1.007563591003418,grad_norm: 0.9089847386469393, iteration: 255315
loss: 1.0146818161010742,grad_norm: 0.6773347994023086, iteration: 255316
loss: 0.9877661466598511,grad_norm: 0.7662327934240238, iteration: 255317
loss: 1.0024999380111694,grad_norm: 0.9596752468595385, iteration: 255318
loss: 0.9767109751701355,grad_norm: 0.9999991643261896, iteration: 255319
loss: 1.0574636459350586,grad_norm: 0.9731126901975797, iteration: 255320
loss: 1.028361201286316,grad_norm: 0.9999991339690297, iteration: 255321
loss: 1.0142821073532104,grad_norm: 0.9121602135081481, iteration: 255322
loss: 1.0211912393569946,grad_norm: 0.8123853803480784, iteration: 255323
loss: 1.030165195465088,grad_norm: 0.8217322773352895, iteration: 255324
loss: 0.9784554839134216,grad_norm: 0.7595612449080564, iteration: 255325
loss: 1.0426169633865356,grad_norm: 0.9999995546496285, iteration: 255326
loss: 1.0178275108337402,grad_norm: 0.9999992064986815, iteration: 255327
loss: 1.0150878429412842,grad_norm: 0.9950212350992087, iteration: 255328
loss: 0.9946330189704895,grad_norm: 0.9999990860141788, iteration: 255329
loss: 1.0252372026443481,grad_norm: 0.9999994922034123, iteration: 255330
loss: 0.9970855116844177,grad_norm: 0.9999990551447309, iteration: 255331
loss: 0.9335859417915344,grad_norm: 0.8727860571525937, iteration: 255332
loss: 1.070483922958374,grad_norm: 0.7671957333042166, iteration: 255333
loss: 1.0378338098526,grad_norm: 0.8710204263594248, iteration: 255334
loss: 1.010178565979004,grad_norm: 0.9999996382964857, iteration: 255335
loss: 1.0315362215042114,grad_norm: 0.7930408728680203, iteration: 255336
loss: 1.017516851425171,grad_norm: 0.9345186647850612, iteration: 255337
loss: 0.9681350588798523,grad_norm: 0.9999990728112496, iteration: 255338
loss: 1.095719575881958,grad_norm: 0.9999991803373878, iteration: 255339
loss: 0.9930625557899475,grad_norm: 0.999999170281639, iteration: 255340
loss: 1.0036869049072266,grad_norm: 0.7248000434212105, iteration: 255341
loss: 0.9999487400054932,grad_norm: 0.8850136846476321, iteration: 255342
loss: 0.9624271988868713,grad_norm: 0.81289462867508, iteration: 255343
loss: 1.0089761018753052,grad_norm: 0.9999991471594859, iteration: 255344
loss: 0.9757009744644165,grad_norm: 0.8525099635913534, iteration: 255345
loss: 0.9658384323120117,grad_norm: 0.9999992381021362, iteration: 255346
loss: 0.9929381012916565,grad_norm: 0.9234072665546241, iteration: 255347
loss: 1.0321192741394043,grad_norm: 0.813857309399637, iteration: 255348
loss: 0.9819484949111938,grad_norm: 0.7396092448449871, iteration: 255349
loss: 1.0317171812057495,grad_norm: 0.9999991592539539, iteration: 255350
loss: 0.9977750182151794,grad_norm: 0.9999998623139252, iteration: 255351
loss: 1.0257452726364136,grad_norm: 0.9999996433854983, iteration: 255352
loss: 0.9914295077323914,grad_norm: 0.7487289552134943, iteration: 255353
loss: 1.0331608057022095,grad_norm: 0.9537720517953244, iteration: 255354
loss: 1.0254830121994019,grad_norm: 0.9999991712030134, iteration: 255355
loss: 1.0249676704406738,grad_norm: 0.8896065098010836, iteration: 255356
loss: 1.0427277088165283,grad_norm: 0.940184583384325, iteration: 255357
loss: 0.9575082063674927,grad_norm: 0.9500960325206635, iteration: 255358
loss: 1.014560580253601,grad_norm: 0.8131262888532824, iteration: 255359
loss: 0.9901736974716187,grad_norm: 0.8361949067299027, iteration: 255360
loss: 1.0580995082855225,grad_norm: 0.9999991702701045, iteration: 255361
loss: 1.0545226335525513,grad_norm: 0.9999991031659955, iteration: 255362
loss: 0.9685243368148804,grad_norm: 0.98455914794188, iteration: 255363
loss: 0.9830711483955383,grad_norm: 0.8719599698997296, iteration: 255364
loss: 1.0050784349441528,grad_norm: 0.9999999194913147, iteration: 255365
loss: 1.0229681730270386,grad_norm: 0.999999808073232, iteration: 255366
loss: 1.0085793733596802,grad_norm: 0.8828259736786042, iteration: 255367
loss: 1.2029428482055664,grad_norm: 0.9915695930299513, iteration: 255368
loss: 0.9325239062309265,grad_norm: 0.9170152550751974, iteration: 255369
loss: 0.9753794074058533,grad_norm: 0.8726618228185604, iteration: 255370
loss: 1.0967738628387451,grad_norm: 0.9999998381591834, iteration: 255371
loss: 0.9800810217857361,grad_norm: 0.936975658819149, iteration: 255372
loss: 1.0712718963623047,grad_norm: 0.9999999682640377, iteration: 255373
loss: 1.043681263923645,grad_norm: 0.9357033797514307, iteration: 255374
loss: 0.9898630976676941,grad_norm: 0.9999999270931712, iteration: 255375
loss: 1.0030369758605957,grad_norm: 0.7730370420185554, iteration: 255376
loss: 1.0085734128952026,grad_norm: 0.950024535468148, iteration: 255377
loss: 1.0395292043685913,grad_norm: 0.9999990705200974, iteration: 255378
loss: 0.9655406475067139,grad_norm: 0.8859975849254013, iteration: 255379
loss: 1.000097632408142,grad_norm: 0.8465755305610347, iteration: 255380
loss: 0.9890662431716919,grad_norm: 0.9999994841983284, iteration: 255381
loss: 0.981182336807251,grad_norm: 0.7958407617314207, iteration: 255382
loss: 1.0069127082824707,grad_norm: 0.9999991026400108, iteration: 255383
loss: 1.0253808498382568,grad_norm: 0.847922230409683, iteration: 255384
loss: 0.9996875524520874,grad_norm: 0.9603682888652916, iteration: 255385
loss: 1.0149281024932861,grad_norm: 0.9919924785146392, iteration: 255386
loss: 0.9912407994270325,grad_norm: 0.8790653971704925, iteration: 255387
loss: 1.0028241872787476,grad_norm: 0.9234130513877465, iteration: 255388
loss: 1.0893445014953613,grad_norm: 0.9999991527700909, iteration: 255389
loss: 1.008154034614563,grad_norm: 0.9484825696482153, iteration: 255390
loss: 0.99797123670578,grad_norm: 0.880801630819159, iteration: 255391
loss: 1.0256903171539307,grad_norm: 0.9999993497176805, iteration: 255392
loss: 1.0161916017532349,grad_norm: 0.9174683978367665, iteration: 255393
loss: 1.0037609338760376,grad_norm: 0.8520637852920697, iteration: 255394
loss: 1.0674434900283813,grad_norm: 0.999999336431167, iteration: 255395
loss: 0.9970206618309021,grad_norm: 0.9999989959127421, iteration: 255396
loss: 1.0440800189971924,grad_norm: 0.9008746428132234, iteration: 255397
loss: 0.9966694712638855,grad_norm: 0.8834172846156644, iteration: 255398
loss: 1.010854721069336,grad_norm: 0.7377741489355175, iteration: 255399
loss: 1.0321125984191895,grad_norm: 0.8278657349903398, iteration: 255400
loss: 0.9927073121070862,grad_norm: 0.7483059750243924, iteration: 255401
loss: 0.9760721325874329,grad_norm: 0.8599107107658601, iteration: 255402
loss: 0.9872549176216125,grad_norm: 0.8186401143104077, iteration: 255403
loss: 1.0356429815292358,grad_norm: 0.9999999095121629, iteration: 255404
loss: 1.0350184440612793,grad_norm: 0.8667587736744692, iteration: 255405
loss: 0.9966932535171509,grad_norm: 0.9738184712995136, iteration: 255406
loss: 1.0422356128692627,grad_norm: 0.9065342087585051, iteration: 255407
loss: 0.9935238361358643,grad_norm: 0.9996829098005042, iteration: 255408
loss: 0.9975019693374634,grad_norm: 0.8262058600467219, iteration: 255409
loss: 1.066510796546936,grad_norm: 0.9999994724441715, iteration: 255410
loss: 1.0501796007156372,grad_norm: 0.8950815894445883, iteration: 255411
loss: 0.9756376147270203,grad_norm: 0.8152332515179037, iteration: 255412
loss: 1.0082193613052368,grad_norm: 0.909243264516977, iteration: 255413
loss: 1.1314297914505005,grad_norm: 0.9999992214467666, iteration: 255414
loss: 0.9948505759239197,grad_norm: 0.9442808809412065, iteration: 255415
loss: 0.9739681482315063,grad_norm: 0.9420161765708873, iteration: 255416
loss: 0.9731643199920654,grad_norm: 0.8882654892183175, iteration: 255417
loss: 1.0993118286132812,grad_norm: 0.9999997813373875, iteration: 255418
loss: 0.9827256798744202,grad_norm: 0.8972281758045495, iteration: 255419
loss: 0.9945851564407349,grad_norm: 0.9453896711252818, iteration: 255420
loss: 0.9976993203163147,grad_norm: 0.8017831604186301, iteration: 255421
loss: 0.997933566570282,grad_norm: 0.8642333124490793, iteration: 255422
loss: 1.0497289896011353,grad_norm: 0.9548065541388846, iteration: 255423
loss: 1.0275444984436035,grad_norm: 0.7285829799665957, iteration: 255424
loss: 1.0162622928619385,grad_norm: 0.9999995433290579, iteration: 255425
loss: 0.9642374515533447,grad_norm: 0.8852502750650805, iteration: 255426
loss: 1.0410505533218384,grad_norm: 0.9999992022857348, iteration: 255427
loss: 0.9582315683364868,grad_norm: 0.8678378348804351, iteration: 255428
loss: 1.0223006010055542,grad_norm: 0.9999990710653859, iteration: 255429
loss: 0.9610357880592346,grad_norm: 0.9999990989473542, iteration: 255430
loss: 0.9747204780578613,grad_norm: 0.8959160328544353, iteration: 255431
loss: 1.0002548694610596,grad_norm: 0.9063129022054197, iteration: 255432
loss: 1.0690768957138062,grad_norm: 0.886693764378164, iteration: 255433
loss: 1.040185809135437,grad_norm: 0.9999990767639668, iteration: 255434
loss: 0.9886179566383362,grad_norm: 0.9067386243763542, iteration: 255435
loss: 1.0306216478347778,grad_norm: 0.9573732206324718, iteration: 255436
loss: 1.084004521369934,grad_norm: 0.9932536857656663, iteration: 255437
loss: 1.0112453699111938,grad_norm: 0.9999991411103517, iteration: 255438
loss: 1.0136984586715698,grad_norm: 0.9852734065803564, iteration: 255439
loss: 0.9685061573982239,grad_norm: 0.9177528810889117, iteration: 255440
loss: 0.9843875169754028,grad_norm: 0.9999989997621535, iteration: 255441
loss: 1.1150652170181274,grad_norm: 0.9999997266694386, iteration: 255442
loss: 0.9900697469711304,grad_norm: 0.8597461358729166, iteration: 255443
loss: 1.0169323682785034,grad_norm: 0.8832339195676431, iteration: 255444
loss: 0.9931995272636414,grad_norm: 0.9999991746082952, iteration: 255445
loss: 1.046176791191101,grad_norm: 0.9995778071061878, iteration: 255446
loss: 1.0130935907363892,grad_norm: 0.825549667018157, iteration: 255447
loss: 0.9990710616111755,grad_norm: 0.9729606079308744, iteration: 255448
loss: 0.9855494499206543,grad_norm: 0.9318360295021876, iteration: 255449
loss: 1.0093311071395874,grad_norm: 0.999999700246682, iteration: 255450
loss: 1.0656201839447021,grad_norm: 0.8888306335460748, iteration: 255451
loss: 0.9847946763038635,grad_norm: 0.829917743000202, iteration: 255452
loss: 1.1713740825653076,grad_norm: 0.9999993602385414, iteration: 255453
loss: 1.0358998775482178,grad_norm: 0.9999997252287657, iteration: 255454
loss: 1.0557218790054321,grad_norm: 0.9999990085174637, iteration: 255455
loss: 1.0087796449661255,grad_norm: 0.7608188574023588, iteration: 255456
loss: 1.0557737350463867,grad_norm: 0.9999991084951783, iteration: 255457
loss: 0.9836590886116028,grad_norm: 0.8939044259011484, iteration: 255458
loss: 1.1286946535110474,grad_norm: 0.99999943238739, iteration: 255459
loss: 0.988697350025177,grad_norm: 0.9999990797862516, iteration: 255460
loss: 0.9946780800819397,grad_norm: 0.8471667326120086, iteration: 255461
loss: 1.0625011920928955,grad_norm: 0.8620464164982561, iteration: 255462
loss: 1.0839210748672485,grad_norm: 0.9999991247101762, iteration: 255463
loss: 1.077562689781189,grad_norm: 0.9999997371813346, iteration: 255464
loss: 1.0648791790008545,grad_norm: 0.9155092393059933, iteration: 255465
loss: 1.025469183921814,grad_norm: 0.9999991639400538, iteration: 255466
loss: 1.0169117450714111,grad_norm: 0.9504669289587685, iteration: 255467
loss: 1.0215308666229248,grad_norm: 0.9999999794547245, iteration: 255468
loss: 0.9975156784057617,grad_norm: 0.708725820177157, iteration: 255469
loss: 1.0924091339111328,grad_norm: 0.9344604422092159, iteration: 255470
loss: 1.0105749368667603,grad_norm: 0.9999990769561512, iteration: 255471
loss: 1.038397192955017,grad_norm: 0.8928294644137481, iteration: 255472
loss: 1.000422477722168,grad_norm: 0.9101518543764983, iteration: 255473
loss: 0.950099527835846,grad_norm: 0.847012065704406, iteration: 255474
loss: 1.0317201614379883,grad_norm: 0.9999993493745148, iteration: 255475
loss: 1.0203133821487427,grad_norm: 0.7520088591764813, iteration: 255476
loss: 0.9899551272392273,grad_norm: 0.8832926581195193, iteration: 255477
loss: 1.0349183082580566,grad_norm: 0.9265186946217888, iteration: 255478
loss: 0.9802692532539368,grad_norm: 0.8328590903650119, iteration: 255479
loss: 1.0034334659576416,grad_norm: 0.8200942687534146, iteration: 255480
loss: 0.9956837892532349,grad_norm: 0.9021157008078126, iteration: 255481
loss: 1.0265510082244873,grad_norm: 0.9999991359325408, iteration: 255482
loss: 0.9889404773712158,grad_norm: 0.8927955191059079, iteration: 255483
loss: 0.9871402978897095,grad_norm: 0.9999995069152768, iteration: 255484
loss: 1.039963722229004,grad_norm: 0.9999991515740941, iteration: 255485
loss: 1.018174648284912,grad_norm: 0.7080808883675999, iteration: 255486
loss: 1.1248685121536255,grad_norm: 0.9999992925785883, iteration: 255487
loss: 0.9813339114189148,grad_norm: 0.8486057249010068, iteration: 255488
loss: 0.9623314142227173,grad_norm: 0.8550304321648856, iteration: 255489
loss: 1.0090956687927246,grad_norm: 0.9999990104792957, iteration: 255490
loss: 1.0450551509857178,grad_norm: 0.9999991796352924, iteration: 255491
loss: 0.972800076007843,grad_norm: 0.9082798354880464, iteration: 255492
loss: 0.9673855304718018,grad_norm: 0.9999990112310732, iteration: 255493
loss: 1.0059781074523926,grad_norm: 0.9999991011470344, iteration: 255494
loss: 0.9932880997657776,grad_norm: 0.9999995378189893, iteration: 255495
loss: 1.0365041494369507,grad_norm: 0.8867052555199163, iteration: 255496
loss: 1.026695728302002,grad_norm: 0.9033205997407806, iteration: 255497
loss: 1.023252248764038,grad_norm: 0.9816348410763355, iteration: 255498
loss: 1.052559494972229,grad_norm: 0.9999998106309381, iteration: 255499
loss: 0.9779780507087708,grad_norm: 0.834574430304095, iteration: 255500
loss: 0.9930419921875,grad_norm: 0.8796710460886412, iteration: 255501
loss: 1.045724868774414,grad_norm: 0.9999991454592413, iteration: 255502
loss: 0.994034469127655,grad_norm: 0.891399301768366, iteration: 255503
loss: 0.9921128749847412,grad_norm: 0.887075908991694, iteration: 255504
loss: 1.0198277235031128,grad_norm: 0.9486217325254137, iteration: 255505
loss: 0.9996752738952637,grad_norm: 0.9999996979292851, iteration: 255506
loss: 0.9972773194313049,grad_norm: 0.8328253101732119, iteration: 255507
loss: 0.971221923828125,grad_norm: 0.9999990190682135, iteration: 255508
loss: 1.01237154006958,grad_norm: 0.7910484070546273, iteration: 255509
loss: 1.0188742876052856,grad_norm: 0.9999992451131378, iteration: 255510
loss: 1.0234057903289795,grad_norm: 0.8923539033669583, iteration: 255511
loss: 1.0013645887374878,grad_norm: 0.8052743755881531, iteration: 255512
loss: 0.9877355694770813,grad_norm: 0.9453639738350226, iteration: 255513
loss: 1.0122520923614502,grad_norm: 0.7920211738148036, iteration: 255514
loss: 0.9993818402290344,grad_norm: 0.999999140991509, iteration: 255515
loss: 1.0198636054992676,grad_norm: 0.8093022870769673, iteration: 255516
loss: 0.955902636051178,grad_norm: 0.9999991444357496, iteration: 255517
loss: 1.0289695262908936,grad_norm: 0.9999992170254951, iteration: 255518
loss: 0.9988603591918945,grad_norm: 0.9999990727065651, iteration: 255519
loss: 0.9802744388580322,grad_norm: 0.8281360835997377, iteration: 255520
loss: 1.0229225158691406,grad_norm: 0.9999991012910328, iteration: 255521
loss: 0.9680542945861816,grad_norm: 0.9999991270035833, iteration: 255522
loss: 1.0161625146865845,grad_norm: 0.9999990170469398, iteration: 255523
loss: 1.0117613077163696,grad_norm: 0.9999991019161484, iteration: 255524
loss: 1.0976002216339111,grad_norm: 0.9999991396476184, iteration: 255525
loss: 1.1133207082748413,grad_norm: 0.9999999335504743, iteration: 255526
loss: 0.9865713715553284,grad_norm: 0.8246865615195923, iteration: 255527
loss: 1.0050992965698242,grad_norm: 0.9999990358707543, iteration: 255528
loss: 0.985138475894928,grad_norm: 0.9482132072795421, iteration: 255529
loss: 1.0246895551681519,grad_norm: 0.8706387039284618, iteration: 255530
loss: 0.988736629486084,grad_norm: 0.9999992186900337, iteration: 255531
loss: 1.0054614543914795,grad_norm: 0.7783406374287577, iteration: 255532
loss: 1.0142123699188232,grad_norm: 0.9999997410600514, iteration: 255533
loss: 1.000048279762268,grad_norm: 0.8653320954837437, iteration: 255534
loss: 0.9725412130355835,grad_norm: 0.8893722223233241, iteration: 255535
loss: 0.9535534977912903,grad_norm: 0.8979014572958269, iteration: 255536
loss: 1.00444757938385,grad_norm: 0.9594695567059427, iteration: 255537
loss: 0.9751686453819275,grad_norm: 0.9999990987257966, iteration: 255538
loss: 0.9764716029167175,grad_norm: 0.9185247588291128, iteration: 255539
loss: 1.4588291645050049,grad_norm: 0.9999999356175677, iteration: 255540
loss: 1.0170977115631104,grad_norm: 0.826686680945521, iteration: 255541
loss: 0.9911385178565979,grad_norm: 0.950018193633427, iteration: 255542
loss: 0.9823009967803955,grad_norm: 0.8929554526315975, iteration: 255543
loss: 1.0258612632751465,grad_norm: 0.8387965934341889, iteration: 255544
loss: 1.0160634517669678,grad_norm: 0.9999989393842103, iteration: 255545
loss: 1.0056945085525513,grad_norm: 0.770313320328439, iteration: 255546
loss: 1.0604816675186157,grad_norm: 0.9984063450403132, iteration: 255547
loss: 0.9655413031578064,grad_norm: 0.7759475686622467, iteration: 255548
loss: 1.0344265699386597,grad_norm: 0.9128117892534011, iteration: 255549
loss: 0.9903366565704346,grad_norm: 0.9090087173476656, iteration: 255550
loss: 0.9743767380714417,grad_norm: 0.9205385905699879, iteration: 255551
loss: 1.0046887397766113,grad_norm: 0.9279337980574714, iteration: 255552
loss: 0.9983373284339905,grad_norm: 0.9999999968034007, iteration: 255553
loss: 1.1870664358139038,grad_norm: 0.9999996070521526, iteration: 255554
loss: 0.9595140814781189,grad_norm: 0.7559649739131336, iteration: 255555
loss: 1.0045922994613647,grad_norm: 0.9174019793676765, iteration: 255556
loss: 0.9699240922927856,grad_norm: 0.8432030365543921, iteration: 255557
loss: 1.0101081132888794,grad_norm: 0.8820611078330745, iteration: 255558
loss: 1.0133286714553833,grad_norm: 0.7984993265528761, iteration: 255559
loss: 1.0083589553833008,grad_norm: 0.8851907503300719, iteration: 255560
loss: 1.009734869003296,grad_norm: 0.9999991293150274, iteration: 255561
loss: 0.9933192133903503,grad_norm: 0.8994173600977994, iteration: 255562
loss: 0.9693313837051392,grad_norm: 0.8291538577077284, iteration: 255563
loss: 0.9926939606666565,grad_norm: 0.8859101167360074, iteration: 255564
loss: 0.9792540669441223,grad_norm: 0.883205496640973, iteration: 255565
loss: 0.988512396812439,grad_norm: 0.8249568773082353, iteration: 255566
loss: 0.9732226133346558,grad_norm: 0.9065819957604263, iteration: 255567
loss: 0.9536836743354797,grad_norm: 0.7496250054378728, iteration: 255568
loss: 1.0053563117980957,grad_norm: 0.9805295498042775, iteration: 255569
loss: 1.007806420326233,grad_norm: 0.9999990040566229, iteration: 255570
loss: 1.0127981901168823,grad_norm: 0.8003497496559348, iteration: 255571
loss: 1.026583194732666,grad_norm: 0.8433537119297069, iteration: 255572
loss: 1.0622060298919678,grad_norm: 0.9999992902552668, iteration: 255573
loss: 0.9891698360443115,grad_norm: 0.8386921587673544, iteration: 255574
loss: 0.995681881904602,grad_norm: 0.7907020649623876, iteration: 255575
loss: 0.992539644241333,grad_norm: 0.9999991155995631, iteration: 255576
loss: 0.990084171295166,grad_norm: 0.8849762863011126, iteration: 255577
loss: 1.021106243133545,grad_norm: 0.8426837328200355, iteration: 255578
loss: 1.0182673931121826,grad_norm: 0.9999989981542701, iteration: 255579
loss: 1.002829909324646,grad_norm: 0.9751992807032843, iteration: 255580
loss: 1.0232195854187012,grad_norm: 0.8328212328871015, iteration: 255581
loss: 0.9914907217025757,grad_norm: 0.8138805701682065, iteration: 255582
loss: 0.9957056641578674,grad_norm: 0.7421040785053359, iteration: 255583
loss: 1.0420254468917847,grad_norm: 0.9999996080972354, iteration: 255584
loss: 1.1160621643066406,grad_norm: 0.9999989762418082, iteration: 255585
loss: 0.9924134612083435,grad_norm: 0.9763512076724074, iteration: 255586
loss: 1.033333659172058,grad_norm: 0.8998961587537813, iteration: 255587
loss: 0.9688568711280823,grad_norm: 0.8863920954054637, iteration: 255588
loss: 1.0130720138549805,grad_norm: 0.7900967048735184, iteration: 255589
loss: 1.001583456993103,grad_norm: 0.999999489185424, iteration: 255590
loss: 0.9902392029762268,grad_norm: 0.9005926022134702, iteration: 255591
loss: 1.0318882465362549,grad_norm: 0.9999989980327492, iteration: 255592
loss: 0.9935765266418457,grad_norm: 0.78702187802729, iteration: 255593
loss: 0.9803551435470581,grad_norm: 0.9252155031170051, iteration: 255594
loss: 1.0664185285568237,grad_norm: 0.9999999986773722, iteration: 255595
loss: 0.9832109212875366,grad_norm: 0.984913827686899, iteration: 255596
loss: 1.0010708570480347,grad_norm: 0.8684782659234943, iteration: 255597
loss: 0.9895991086959839,grad_norm: 0.856141204724135, iteration: 255598
loss: 1.104954719543457,grad_norm: 0.99999916886327, iteration: 255599
loss: 0.9938182234764099,grad_norm: 0.8848906756630426, iteration: 255600
loss: 0.9883002638816833,grad_norm: 0.8876116472790181, iteration: 255601
loss: 1.028439998626709,grad_norm: 0.8703539970663168, iteration: 255602
loss: 0.9898964762687683,grad_norm: 0.9783206338907562, iteration: 255603
loss: 0.9965665340423584,grad_norm: 0.7980827752208969, iteration: 255604
loss: 1.0222355127334595,grad_norm: 0.8798479991060326, iteration: 255605
loss: 1.0263707637786865,grad_norm: 0.9999990059646438, iteration: 255606
loss: 1.0263131856918335,grad_norm: 0.8844508372250441, iteration: 255607
loss: 1.0213570594787598,grad_norm: 0.9762400859162984, iteration: 255608
loss: 0.985050618648529,grad_norm: 0.8515954022645994, iteration: 255609
loss: 0.9809125065803528,grad_norm: 0.9999989425841125, iteration: 255610
loss: 1.0147970914840698,grad_norm: 0.8935830904811677, iteration: 255611
loss: 1.0170713663101196,grad_norm: 0.9420141262421269, iteration: 255612
loss: 1.0364855527877808,grad_norm: 0.9449819319239428, iteration: 255613
loss: 0.9650728106498718,grad_norm: 0.8164158009055518, iteration: 255614
loss: 1.0000132322311401,grad_norm: 0.9280124979525265, iteration: 255615
loss: 1.0108264684677124,grad_norm: 0.9487608949117026, iteration: 255616
loss: 0.9642751812934875,grad_norm: 0.9999990424159644, iteration: 255617
loss: 1.0193557739257812,grad_norm: 0.9639523780426634, iteration: 255618
loss: 0.9632899761199951,grad_norm: 0.9723042464880144, iteration: 255619
loss: 1.0232303142547607,grad_norm: 0.9124059749428997, iteration: 255620
loss: 0.9873766303062439,grad_norm: 0.8263409775353968, iteration: 255621
loss: 1.054256558418274,grad_norm: 0.9999990273205318, iteration: 255622
loss: 0.957789957523346,grad_norm: 0.9454330260181638, iteration: 255623
loss: 0.987396776676178,grad_norm: 0.9999991567416884, iteration: 255624
loss: 1.0081936120986938,grad_norm: 0.8180736261742143, iteration: 255625
loss: 1.0834118127822876,grad_norm: 0.9999996102351087, iteration: 255626
loss: 1.0430550575256348,grad_norm: 0.8886898774777516, iteration: 255627
loss: 1.0258108377456665,grad_norm: 0.8876390490267769, iteration: 255628
loss: 0.9616844058036804,grad_norm: 0.9707630614943805, iteration: 255629
loss: 1.0097941160202026,grad_norm: 0.7099859014092001, iteration: 255630
loss: 0.9911937117576599,grad_norm: 0.777160670346976, iteration: 255631
loss: 1.0256038904190063,grad_norm: 0.9999989764464284, iteration: 255632
loss: 1.0009236335754395,grad_norm: 0.7942373715046707, iteration: 255633
loss: 0.9895651340484619,grad_norm: 0.8770155190709117, iteration: 255634
loss: 0.9783000349998474,grad_norm: 0.8880079751585638, iteration: 255635
loss: 0.9616333842277527,grad_norm: 0.9999991623979333, iteration: 255636
loss: 1.0673251152038574,grad_norm: 0.9427091055164426, iteration: 255637
loss: 1.0334508419036865,grad_norm: 0.9873184975614242, iteration: 255638
loss: 0.9730968475341797,grad_norm: 0.9075397610365907, iteration: 255639
loss: 1.0106486082077026,grad_norm: 0.8873139685725264, iteration: 255640
loss: 0.9943368434906006,grad_norm: 0.9296329880566709, iteration: 255641
loss: 1.052354097366333,grad_norm: 0.9999996653103898, iteration: 255642
loss: 1.0112749338150024,grad_norm: 0.9999990990997297, iteration: 255643
loss: 0.9891055822372437,grad_norm: 0.9754979689996551, iteration: 255644
loss: 1.0385323762893677,grad_norm: 0.8520740645461204, iteration: 255645
loss: 1.0012822151184082,grad_norm: 0.8262296253756022, iteration: 255646
loss: 0.9465062022209167,grad_norm: 0.9208785412671492, iteration: 255647
loss: 0.962780773639679,grad_norm: 0.9635413195246992, iteration: 255648
loss: 1.0134152173995972,grad_norm: 0.8719184973572294, iteration: 255649
loss: 1.0006593465805054,grad_norm: 0.916576744058355, iteration: 255650
loss: 1.0084980726242065,grad_norm: 0.8991658265216983, iteration: 255651
loss: 1.1620458364486694,grad_norm: 0.9999994938359548, iteration: 255652
loss: 0.9919701814651489,grad_norm: 0.9069439883978437, iteration: 255653
loss: 0.9840114712715149,grad_norm: 0.922120778532193, iteration: 255654
loss: 1.0056726932525635,grad_norm: 0.8915544617850939, iteration: 255655
loss: 1.0040686130523682,grad_norm: 0.8815362245348224, iteration: 255656
loss: 0.9959850311279297,grad_norm: 0.9999990844909199, iteration: 255657
loss: 1.0246526002883911,grad_norm: 0.9999992111295755, iteration: 255658
loss: 0.9967520833015442,grad_norm: 0.9999991590704479, iteration: 255659
loss: 0.9861361384391785,grad_norm: 0.9397429099907247, iteration: 255660
loss: 1.0143531560897827,grad_norm: 0.9789800081202531, iteration: 255661
loss: 1.0278937816619873,grad_norm: 0.9999995094451802, iteration: 255662
loss: 0.9981861114501953,grad_norm: 0.7770311057332046, iteration: 255663
loss: 0.9908757209777832,grad_norm: 0.7815307801709314, iteration: 255664
loss: 0.9919928908348083,grad_norm: 0.999999024402667, iteration: 255665
loss: 0.9893385767936707,grad_norm: 0.9999992696185708, iteration: 255666
loss: 1.0136802196502686,grad_norm: 0.9669568663147449, iteration: 255667
loss: 1.048889398574829,grad_norm: 0.932820188341941, iteration: 255668
loss: 0.9950100183486938,grad_norm: 0.8643968258972065, iteration: 255669
loss: 0.9973370432853699,grad_norm: 0.9182835376534937, iteration: 255670
loss: 0.9838314652442932,grad_norm: 0.9485955164285729, iteration: 255671
loss: 0.9920117259025574,grad_norm: 0.9490030257839843, iteration: 255672
loss: 0.9944244623184204,grad_norm: 0.8466636211389946, iteration: 255673
loss: 0.9749534130096436,grad_norm: 0.8969507518890038, iteration: 255674
loss: 1.0031664371490479,grad_norm: 0.8537182086825028, iteration: 255675
loss: 1.0122284889221191,grad_norm: 0.9999991313692602, iteration: 255676
loss: 1.0011639595031738,grad_norm: 0.8222720349486674, iteration: 255677
loss: 1.0485937595367432,grad_norm: 0.946649873383793, iteration: 255678
loss: 0.9883885383605957,grad_norm: 0.8324318159299481, iteration: 255679
loss: 1.0246895551681519,grad_norm: 0.9060906459306167, iteration: 255680
loss: 0.9551923274993896,grad_norm: 0.9999991384623115, iteration: 255681
loss: 1.0446155071258545,grad_norm: 0.9999991754524477, iteration: 255682
loss: 0.9729931950569153,grad_norm: 0.8630813237829501, iteration: 255683
loss: 1.0136231184005737,grad_norm: 0.8273968677095828, iteration: 255684
loss: 1.0220774412155151,grad_norm: 0.9999991340337643, iteration: 255685
loss: 0.9763363599777222,grad_norm: 0.9845748816253358, iteration: 255686
loss: 1.0846861600875854,grad_norm: 0.9999991694227592, iteration: 255687
loss: 0.9864959716796875,grad_norm: 0.929865223069639, iteration: 255688
loss: 1.0177794694900513,grad_norm: 0.9874933291047056, iteration: 255689
loss: 1.0073857307434082,grad_norm: 0.9917448058364763, iteration: 255690
loss: 0.9874303936958313,grad_norm: 0.9434232307095823, iteration: 255691
loss: 0.9797813892364502,grad_norm: 0.8034414512491019, iteration: 255692
loss: 1.0609402656555176,grad_norm: 0.999999004447709, iteration: 255693
loss: 1.0168882608413696,grad_norm: 0.904782113416087, iteration: 255694
loss: 1.0041494369506836,grad_norm: 0.7730158329075137, iteration: 255695
loss: 1.1363096237182617,grad_norm: 0.9999990116150714, iteration: 255696
loss: 1.0199178457260132,grad_norm: 0.9999992459248169, iteration: 255697
loss: 0.9530187249183655,grad_norm: 0.9571335367853288, iteration: 255698
loss: 1.056538701057434,grad_norm: 0.8369813649472946, iteration: 255699
loss: 1.0221123695373535,grad_norm: 0.9264206257777887, iteration: 255700
loss: 0.9898552298545837,grad_norm: 0.7647904078258722, iteration: 255701
loss: 1.0919476747512817,grad_norm: 0.999999956832552, iteration: 255702
loss: 1.0785439014434814,grad_norm: 0.9999990391066416, iteration: 255703
loss: 0.9995457530021667,grad_norm: 0.7559611708267265, iteration: 255704
loss: 1.02415931224823,grad_norm: 0.9999990624203806, iteration: 255705
loss: 1.0191547870635986,grad_norm: 0.8866726600917765, iteration: 255706
loss: 0.9673666954040527,grad_norm: 0.9999991348303267, iteration: 255707
loss: 1.0069656372070312,grad_norm: 0.8488306001007696, iteration: 255708
loss: 1.0117666721343994,grad_norm: 0.9999991899137888, iteration: 255709
loss: 1.0081875324249268,grad_norm: 0.9113301551143721, iteration: 255710
loss: 1.0035392045974731,grad_norm: 0.7943839725221392, iteration: 255711
loss: 0.9977412819862366,grad_norm: 0.9696792264795145, iteration: 255712
loss: 0.9852364659309387,grad_norm: 0.9683060905726928, iteration: 255713
loss: 1.0081149339675903,grad_norm: 0.9019271674296357, iteration: 255714
loss: 1.0267843008041382,grad_norm: 0.9999991228046158, iteration: 255715
loss: 1.011257529258728,grad_norm: 0.8537759319058666, iteration: 255716
loss: 1.000651478767395,grad_norm: 0.9181170666671576, iteration: 255717
loss: 1.055708646774292,grad_norm: 0.8392181289557717, iteration: 255718
loss: 0.9653946757316589,grad_norm: 0.9194184833563241, iteration: 255719
loss: 0.9557910561561584,grad_norm: 0.8187572255335386, iteration: 255720
loss: 1.0347654819488525,grad_norm: 0.9999990738789221, iteration: 255721
loss: 1.009029507637024,grad_norm: 0.9661015790394707, iteration: 255722
loss: 0.9925556182861328,grad_norm: 0.8725573887418199, iteration: 255723
loss: 1.1755504608154297,grad_norm: 0.9999998563753075, iteration: 255724
loss: 1.004165530204773,grad_norm: 0.8455974973763853, iteration: 255725
loss: 1.0021953582763672,grad_norm: 0.7952708735902648, iteration: 255726
loss: 0.9735091924667358,grad_norm: 0.9028231651101856, iteration: 255727
loss: 1.0164809226989746,grad_norm: 0.8523253700086028, iteration: 255728
loss: 1.0534125566482544,grad_norm: 0.9999995546062477, iteration: 255729
loss: 0.9574217796325684,grad_norm: 0.9999989896093003, iteration: 255730
loss: 0.9944504499435425,grad_norm: 0.7349979761066927, iteration: 255731
loss: 1.0028924942016602,grad_norm: 0.9999991089472747, iteration: 255732
loss: 0.9916701316833496,grad_norm: 0.8349148621215923, iteration: 255733
loss: 0.9983099102973938,grad_norm: 0.8468295123322035, iteration: 255734
loss: 1.0256989002227783,grad_norm: 0.9357999742865067, iteration: 255735
loss: 0.9961491823196411,grad_norm: 0.8289274528705195, iteration: 255736
loss: 1.0254002809524536,grad_norm: 0.9999992030896327, iteration: 255737
loss: 0.9815518260002136,grad_norm: 0.8054768191888126, iteration: 255738
loss: 0.999157190322876,grad_norm: 0.9717235572337687, iteration: 255739
loss: 0.9997363686561584,grad_norm: 0.9094248270407435, iteration: 255740
loss: 0.9999313354492188,grad_norm: 0.8079517007249534, iteration: 255741
loss: 1.012636661529541,grad_norm: 0.8270533435375923, iteration: 255742
loss: 1.0505082607269287,grad_norm: 0.846145545351438, iteration: 255743
loss: 1.0000126361846924,grad_norm: 0.880958779502653, iteration: 255744
loss: 1.0551207065582275,grad_norm: 0.9999997707762043, iteration: 255745
loss: 1.0252344608306885,grad_norm: 0.7711595612800947, iteration: 255746
loss: 1.0087717771530151,grad_norm: 0.8796177830872502, iteration: 255747
loss: 1.0068244934082031,grad_norm: 0.8873155671136589, iteration: 255748
loss: 0.9671735763549805,grad_norm: 0.847919851263011, iteration: 255749
loss: 0.982499897480011,grad_norm: 0.8382923887888041, iteration: 255750
loss: 0.9944957494735718,grad_norm: 0.7767833794533684, iteration: 255751
loss: 1.0030460357666016,grad_norm: 0.9469591034612007, iteration: 255752
loss: 0.9550951719284058,grad_norm: 0.8709959109570968, iteration: 255753
loss: 1.001118779182434,grad_norm: 0.8525131688272407, iteration: 255754
loss: 1.0075721740722656,grad_norm: 0.9999990090909293, iteration: 255755
loss: 0.9951156377792358,grad_norm: 0.7373336866299215, iteration: 255756
loss: 0.953961193561554,grad_norm: 0.9102888631402193, iteration: 255757
loss: 1.0062345266342163,grad_norm: 0.8676032670861592, iteration: 255758
loss: 1.0992995500564575,grad_norm: 0.9999991419410599, iteration: 255759
loss: 1.0032633543014526,grad_norm: 0.7911259127504714, iteration: 255760
loss: 1.001344084739685,grad_norm: 0.6824666051187126, iteration: 255761
loss: 0.9781877398490906,grad_norm: 0.9999989708742371, iteration: 255762
loss: 0.9778640866279602,grad_norm: 0.8257706512711636, iteration: 255763
loss: 1.0243067741394043,grad_norm: 0.771685145810833, iteration: 255764
loss: 0.9866940975189209,grad_norm: 0.9496714551699192, iteration: 255765
loss: 0.9783220291137695,grad_norm: 0.8236172521646011, iteration: 255766
loss: 1.0170918703079224,grad_norm: 0.9999989573314559, iteration: 255767
loss: 0.9959837794303894,grad_norm: 0.9999990419845274, iteration: 255768
loss: 1.0104026794433594,grad_norm: 0.7354490214686656, iteration: 255769
loss: 0.9866001009941101,grad_norm: 0.9999996935104982, iteration: 255770
loss: 0.9938587546348572,grad_norm: 0.8170479808950972, iteration: 255771
loss: 0.9840173125267029,grad_norm: 0.8299791120933196, iteration: 255772
loss: 1.0419836044311523,grad_norm: 0.9359011019855381, iteration: 255773
loss: 1.044570803642273,grad_norm: 0.9999992507289167, iteration: 255774
loss: 0.9914386868476868,grad_norm: 0.9999990408507676, iteration: 255775
loss: 0.9709148406982422,grad_norm: 0.9467059646689888, iteration: 255776
loss: 0.9850053787231445,grad_norm: 0.8570833290852592, iteration: 255777
loss: 0.954557478427887,grad_norm: 0.7931050755418028, iteration: 255778
loss: 1.0067840814590454,grad_norm: 0.9135818653291734, iteration: 255779
loss: 1.0049539804458618,grad_norm: 0.8606468225640549, iteration: 255780
loss: 1.0114386081695557,grad_norm: 0.898176740356499, iteration: 255781
loss: 1.039717674255371,grad_norm: 0.7639411448366763, iteration: 255782
loss: 1.0212388038635254,grad_norm: 0.7389025706027212, iteration: 255783
loss: 0.9814098477363586,grad_norm: 0.8846766133461628, iteration: 255784
loss: 0.9827774167060852,grad_norm: 0.9695424599827477, iteration: 255785
loss: 1.0010626316070557,grad_norm: 0.9438654062731948, iteration: 255786
loss: 1.022228717803955,grad_norm: 0.9999991827854795, iteration: 255787
loss: 0.9925476312637329,grad_norm: 0.9214404999032447, iteration: 255788
loss: 1.00734543800354,grad_norm: 0.9999989256581079, iteration: 255789
loss: 0.9944286942481995,grad_norm: 0.9999990832092105, iteration: 255790
loss: 1.038487434387207,grad_norm: 0.999999112421985, iteration: 255791
loss: 1.010290503501892,grad_norm: 0.9865890851819891, iteration: 255792
loss: 1.0070998668670654,grad_norm: 0.8674825926633317, iteration: 255793
loss: 1.0096957683563232,grad_norm: 0.9999989874617302, iteration: 255794
loss: 0.9720009565353394,grad_norm: 0.8105225637071685, iteration: 255795
loss: 1.0165334939956665,grad_norm: 0.9319241502027173, iteration: 255796
loss: 1.0164027214050293,grad_norm: 0.8866397924560695, iteration: 255797
loss: 1.001042127609253,grad_norm: 0.923025504509772, iteration: 255798
loss: 1.0605171918869019,grad_norm: 0.9999995175526389, iteration: 255799
loss: 0.999116837978363,grad_norm: 0.7953819223203691, iteration: 255800
loss: 1.0189950466156006,grad_norm: 0.7077538819564088, iteration: 255801
loss: 0.974555492401123,grad_norm: 0.8363338029603575, iteration: 255802
loss: 0.9877923727035522,grad_norm: 0.769700722848144, iteration: 255803
loss: 1.004359245300293,grad_norm: 0.7784680535335222, iteration: 255804
loss: 0.9953900575637817,grad_norm: 0.9973484849485608, iteration: 255805
loss: 1.0109987258911133,grad_norm: 0.9865464588082518, iteration: 255806
loss: 0.9851247668266296,grad_norm: 0.8491369508916787, iteration: 255807
loss: 1.0341392755508423,grad_norm: 0.9989425801174019, iteration: 255808
loss: 0.9943602681159973,grad_norm: 0.908493890218362, iteration: 255809
loss: 0.9783219695091248,grad_norm: 0.8529011642500356, iteration: 255810
loss: 1.0988001823425293,grad_norm: 0.9883149135535914, iteration: 255811
loss: 1.0014926195144653,grad_norm: 0.9982950299304197, iteration: 255812
loss: 1.006672739982605,grad_norm: 0.8138225019690698, iteration: 255813
loss: 1.0135999917984009,grad_norm: 0.9600307278075844, iteration: 255814
loss: 0.9599263668060303,grad_norm: 0.8237300283759672, iteration: 255815
loss: 1.036863088607788,grad_norm: 0.9999996182658996, iteration: 255816
loss: 1.001474380493164,grad_norm: 0.9999989252008878, iteration: 255817
loss: 0.9859539866447449,grad_norm: 0.9006800834077937, iteration: 255818
loss: 1.0282639265060425,grad_norm: 0.9999989614181832, iteration: 255819
loss: 1.0353646278381348,grad_norm: 0.9999991495892633, iteration: 255820
loss: 1.0207479000091553,grad_norm: 0.7679365285977129, iteration: 255821
loss: 0.9873195886611938,grad_norm: 0.9061028137756599, iteration: 255822
loss: 1.0117806196212769,grad_norm: 0.87119995720376, iteration: 255823
loss: 1.009217381477356,grad_norm: 0.9424452278548824, iteration: 255824
loss: 0.9966908097267151,grad_norm: 0.9867819247020716, iteration: 255825
loss: 0.9789953231811523,grad_norm: 0.91124608436462, iteration: 255826
loss: 0.960837721824646,grad_norm: 0.9983194057133039, iteration: 255827
loss: 1.0250836610794067,grad_norm: 0.9265657083432579, iteration: 255828
loss: 1.0069808959960938,grad_norm: 0.9137599514315133, iteration: 255829
loss: 0.9953835606575012,grad_norm: 0.941418256632002, iteration: 255830
loss: 1.0599253177642822,grad_norm: 0.9999990100950458, iteration: 255831
loss: 1.0298599004745483,grad_norm: 0.9212482486028577, iteration: 255832
loss: 0.9910163283348083,grad_norm: 0.9313202407091162, iteration: 255833
loss: 0.9953709840774536,grad_norm: 0.90103136328726, iteration: 255834
loss: 0.9930887222290039,grad_norm: 0.99999915890009, iteration: 255835
loss: 1.0157297849655151,grad_norm: 0.9333343161230498, iteration: 255836
loss: 1.0112934112548828,grad_norm: 0.9999989075703515, iteration: 255837
loss: 0.9999924302101135,grad_norm: 0.9814875589516099, iteration: 255838
loss: 1.0090652704238892,grad_norm: 0.9855384152223654, iteration: 255839
loss: 1.0158940553665161,grad_norm: 0.8135042265660736, iteration: 255840
loss: 1.023900032043457,grad_norm: 0.9858148526908461, iteration: 255841
loss: 1.0342189073562622,grad_norm: 0.999999034168009, iteration: 255842
loss: 1.0705301761627197,grad_norm: 0.999999069648183, iteration: 255843
loss: 1.0822950601577759,grad_norm: 0.9927491592395399, iteration: 255844
loss: 1.0015074014663696,grad_norm: 0.9999997008775343, iteration: 255845
loss: 0.9672451019287109,grad_norm: 0.8068269346321216, iteration: 255846
loss: 0.9712872505187988,grad_norm: 0.9347947854450812, iteration: 255847
loss: 0.9969421625137329,grad_norm: 0.9999991293767305, iteration: 255848
loss: 0.9734729528427124,grad_norm: 0.7818592785287252, iteration: 255849
loss: 1.0436344146728516,grad_norm: 0.9999991231603771, iteration: 255850
loss: 0.993156373500824,grad_norm: 0.873773756420664, iteration: 255851
loss: 1.020801067352295,grad_norm: 0.9999992314901026, iteration: 255852
loss: 0.99888014793396,grad_norm: 0.8576298038420014, iteration: 255853
loss: 0.9909263253211975,grad_norm: 0.7783612236856349, iteration: 255854
loss: 0.9578911662101746,grad_norm: 0.7738158462016386, iteration: 255855
loss: 0.9763013124465942,grad_norm: 0.8751688562844538, iteration: 255856
loss: 0.9990308284759521,grad_norm: 0.7886085548196063, iteration: 255857
loss: 1.0126608610153198,grad_norm: 0.8297896179124946, iteration: 255858
loss: 0.9741728901863098,grad_norm: 0.9001727077714607, iteration: 255859
loss: 0.9812821745872498,grad_norm: 0.7208672582870894, iteration: 255860
loss: 1.012420892715454,grad_norm: 0.802318928643713, iteration: 255861
loss: 1.0123302936553955,grad_norm: 0.8980890464166388, iteration: 255862
loss: 1.0136774778366089,grad_norm: 0.9835183606004699, iteration: 255863
loss: 0.9686305522918701,grad_norm: 0.9999991670062179, iteration: 255864
loss: 1.0292534828186035,grad_norm: 0.9281436658439578, iteration: 255865
loss: 1.0389442443847656,grad_norm: 0.9999991442454929, iteration: 255866
loss: 1.038460373878479,grad_norm: 0.946916916042014, iteration: 255867
loss: 1.0264748334884644,grad_norm: 0.9999990655953584, iteration: 255868
loss: 0.9985958933830261,grad_norm: 0.9191266916645912, iteration: 255869
loss: 0.9918755888938904,grad_norm: 0.8645801787299495, iteration: 255870
loss: 1.0600394010543823,grad_norm: 0.8694515938218982, iteration: 255871
loss: 0.9588004946708679,grad_norm: 0.7970873428711255, iteration: 255872
loss: 1.0056610107421875,grad_norm: 0.7701412872829154, iteration: 255873
loss: 1.0305497646331787,grad_norm: 0.971959277319777, iteration: 255874
loss: 1.0098289251327515,grad_norm: 0.9999991261137463, iteration: 255875
loss: 0.9980490207672119,grad_norm: 0.9265414089979689, iteration: 255876
loss: 1.0149728059768677,grad_norm: 0.9999990313026155, iteration: 255877
loss: 1.0872212648391724,grad_norm: 0.9999997426112704, iteration: 255878
loss: 0.9844658970832825,grad_norm: 0.7980452926141208, iteration: 255879
loss: 0.9529972076416016,grad_norm: 0.9999999200815398, iteration: 255880
loss: 0.9971460103988647,grad_norm: 0.8800120640891196, iteration: 255881
loss: 1.0375016927719116,grad_norm: 0.9091126154679648, iteration: 255882
loss: 1.025460958480835,grad_norm: 0.9639210111432593, iteration: 255883
loss: 0.9960646629333496,grad_norm: 0.988874750869396, iteration: 255884
loss: 0.9842144846916199,grad_norm: 0.9687101055422657, iteration: 255885
loss: 1.0371992588043213,grad_norm: 0.8611913461994333, iteration: 255886
loss: 0.9954190850257874,grad_norm: 0.9262719561984709, iteration: 255887
loss: 0.9948457479476929,grad_norm: 0.9630872546654221, iteration: 255888
loss: 1.025508165359497,grad_norm: 0.9511553819221268, iteration: 255889
loss: 0.9741418361663818,grad_norm: 0.7215271266913114, iteration: 255890
loss: 0.9853528738021851,grad_norm: 0.8833109900819668, iteration: 255891
loss: 0.9706567525863647,grad_norm: 0.9397176075138821, iteration: 255892
loss: 1.0115362405776978,grad_norm: 0.8877203201569932, iteration: 255893
loss: 0.9967832565307617,grad_norm: 0.9840236273140145, iteration: 255894
loss: 0.9696416258811951,grad_norm: 0.9670986460346661, iteration: 255895
loss: 0.9927879571914673,grad_norm: 0.8706495520805024, iteration: 255896
loss: 1.0140453577041626,grad_norm: 0.9539011577299054, iteration: 255897
loss: 1.0026065111160278,grad_norm: 0.749586996998579, iteration: 255898
loss: 0.9913493394851685,grad_norm: 0.771503101421972, iteration: 255899
loss: 0.9861180186271667,grad_norm: 0.8850878727226968, iteration: 255900
loss: 1.0111757516860962,grad_norm: 0.7579008179778408, iteration: 255901
loss: 0.9995074272155762,grad_norm: 0.8999373648333663, iteration: 255902
loss: 0.9745787978172302,grad_norm: 0.8578460149375445, iteration: 255903
loss: 0.9557145237922668,grad_norm: 0.7824925752591784, iteration: 255904
loss: 1.0035144090652466,grad_norm: 0.7691444389545671, iteration: 255905
loss: 1.0031189918518066,grad_norm: 0.8969826503292474, iteration: 255906
loss: 0.9920085668563843,grad_norm: 0.973006475177291, iteration: 255907
loss: 0.9740434288978577,grad_norm: 0.9644522565440653, iteration: 255908
loss: 1.0112301111221313,grad_norm: 0.9532768505914385, iteration: 255909
loss: 1.011722207069397,grad_norm: 0.8823630921741842, iteration: 255910
loss: 1.0010803937911987,grad_norm: 0.9165097789242352, iteration: 255911
loss: 1.046484112739563,grad_norm: 0.999999176387633, iteration: 255912
loss: 0.9751409888267517,grad_norm: 0.8238555115205126, iteration: 255913
loss: 0.9988012909889221,grad_norm: 0.9999992093718035, iteration: 255914
loss: 0.9886445999145508,grad_norm: 0.774458706198214, iteration: 255915
loss: 1.0052154064178467,grad_norm: 0.8343057606790117, iteration: 255916
loss: 0.9904680252075195,grad_norm: 0.8008449327789202, iteration: 255917
loss: 1.009400486946106,grad_norm: 0.9999991232124265, iteration: 255918
loss: 0.9953095316886902,grad_norm: 0.868101530078747, iteration: 255919
loss: 0.9901043176651001,grad_norm: 0.9999990209446674, iteration: 255920
loss: 1.0256062746047974,grad_norm: 0.9999991920925175, iteration: 255921
loss: 1.0310778617858887,grad_norm: 0.7992911411998863, iteration: 255922
loss: 0.9684789180755615,grad_norm: 0.880638681554141, iteration: 255923
loss: 1.003487467765808,grad_norm: 0.9999991480605827, iteration: 255924
loss: 1.0449250936508179,grad_norm: 0.9999992985349063, iteration: 255925
loss: 1.018707275390625,grad_norm: 0.9664674845151585, iteration: 255926
loss: 1.0104308128356934,grad_norm: 0.9229378744471044, iteration: 255927
loss: 0.938307523727417,grad_norm: 0.7771832165383705, iteration: 255928
loss: 1.0447205305099487,grad_norm: 0.9002731219761932, iteration: 255929
loss: 1.001686453819275,grad_norm: 0.9999993040605977, iteration: 255930
loss: 1.0220246315002441,grad_norm: 0.9999990984168283, iteration: 255931
loss: 1.025617241859436,grad_norm: 0.9945559520248705, iteration: 255932
loss: 0.9931402802467346,grad_norm: 0.9270165409543921, iteration: 255933
loss: 0.9932091236114502,grad_norm: 0.9139036207729769, iteration: 255934
loss: 0.9698846340179443,grad_norm: 0.9285500964345815, iteration: 255935
loss: 0.9877307415008545,grad_norm: 0.7558652433377566, iteration: 255936
loss: 0.9906867146492004,grad_norm: 0.7777121301444365, iteration: 255937
loss: 1.000426173210144,grad_norm: 0.9669922553347627, iteration: 255938
loss: 0.9903841614723206,grad_norm: 0.9999991629739671, iteration: 255939
loss: 0.9863707423210144,grad_norm: 0.7530680709040853, iteration: 255940
loss: 0.9848634600639343,grad_norm: 0.9999991345972501, iteration: 255941
loss: 0.9968960285186768,grad_norm: 0.7931338313824954, iteration: 255942
loss: 0.9956322908401489,grad_norm: 0.9894444888298896, iteration: 255943
loss: 1.037348747253418,grad_norm: 0.9729896102377985, iteration: 255944
loss: 1.000240683555603,grad_norm: 0.8477272875443843, iteration: 255945
loss: 1.0141805410385132,grad_norm: 0.9297660856805783, iteration: 255946
loss: 1.0122575759887695,grad_norm: 0.893838691025895, iteration: 255947
loss: 0.9879443049430847,grad_norm: 0.8570161062131673, iteration: 255948
loss: 0.9822596311569214,grad_norm: 0.9510014124414791, iteration: 255949
loss: 0.9812814593315125,grad_norm: 0.9065185475846971, iteration: 255950
loss: 1.0155009031295776,grad_norm: 0.9999991324977002, iteration: 255951
loss: 1.0426865816116333,grad_norm: 0.9697829085600763, iteration: 255952
loss: 0.9994197487831116,grad_norm: 0.8520022323440848, iteration: 255953
loss: 0.9910673499107361,grad_norm: 0.9194312811053661, iteration: 255954
loss: 1.0819343328475952,grad_norm: 0.9999991299005647, iteration: 255955
loss: 1.0013691186904907,grad_norm: 0.9503180688593533, iteration: 255956
loss: 0.9814943075180054,grad_norm: 0.9999989843450516, iteration: 255957
loss: 1.0028862953186035,grad_norm: 0.9837713597601039, iteration: 255958
loss: 0.9938645958900452,grad_norm: 0.9999992663778109, iteration: 255959
loss: 0.971315324306488,grad_norm: 0.8756818095839288, iteration: 255960
loss: 0.992879331111908,grad_norm: 0.9534684178135744, iteration: 255961
loss: 1.0027153491973877,grad_norm: 0.9504906881260949, iteration: 255962
loss: 1.0087242126464844,grad_norm: 0.8967574149194208, iteration: 255963
loss: 0.9710358381271362,grad_norm: 0.9270008062843301, iteration: 255964
loss: 1.0334999561309814,grad_norm: 0.9563995917499545, iteration: 255965
loss: 0.9595703482627869,grad_norm: 0.8739316257196155, iteration: 255966
loss: 0.9865266680717468,grad_norm: 0.8531061982982499, iteration: 255967
loss: 0.9858293533325195,grad_norm: 0.9999991121646465, iteration: 255968
loss: 1.0008927583694458,grad_norm: 0.9533848408027322, iteration: 255969
loss: 0.9599745869636536,grad_norm: 0.9147047357823576, iteration: 255970
loss: 0.9840005040168762,grad_norm: 0.8918668665303493, iteration: 255971
loss: 1.0351862907409668,grad_norm: 0.8157801785402206, iteration: 255972
loss: 0.9947850108146667,grad_norm: 0.9999994693095441, iteration: 255973
loss: 1.0194416046142578,grad_norm: 0.7693857066741224, iteration: 255974
loss: 1.0190809965133667,grad_norm: 0.8318091963365888, iteration: 255975
loss: 0.9990525841712952,grad_norm: 0.9999991873026045, iteration: 255976
loss: 1.028212308883667,grad_norm: 0.7760709403153316, iteration: 255977
loss: 1.0347379446029663,grad_norm: 0.9999991672297064, iteration: 255978
loss: 1.0300344228744507,grad_norm: 0.9249834432994736, iteration: 255979
loss: 1.011210322380066,grad_norm: 0.8685839730901483, iteration: 255980
loss: 0.9812058210372925,grad_norm: 0.8690948786964802, iteration: 255981
loss: 1.0169543027877808,grad_norm: 0.9818365445375652, iteration: 255982
loss: 1.0354647636413574,grad_norm: 0.9999992496862333, iteration: 255983
loss: 0.9921573400497437,grad_norm: 0.8855454028691951, iteration: 255984
loss: 0.9598527550697327,grad_norm: 0.9558516593631532, iteration: 255985
loss: 0.9897041320800781,grad_norm: 0.7559553247812537, iteration: 255986
loss: 1.0175843238830566,grad_norm: 0.8841570841753442, iteration: 255987
loss: 0.9937452673912048,grad_norm: 0.8070325371857334, iteration: 255988
loss: 0.9654394388198853,grad_norm: 0.8697441397547225, iteration: 255989
loss: 0.9958842396736145,grad_norm: 0.8972663929914833, iteration: 255990
loss: 1.0109527111053467,grad_norm: 0.8681152677997882, iteration: 255991
loss: 1.0330160856246948,grad_norm: 0.9999992505828179, iteration: 255992
loss: 0.9907916784286499,grad_norm: 0.9999990484040234, iteration: 255993
loss: 0.9832028150558472,grad_norm: 0.9999992426571488, iteration: 255994
loss: 0.9973132014274597,grad_norm: 0.9999990486197429, iteration: 255995
loss: 0.9670998454093933,grad_norm: 0.8942254057807839, iteration: 255996
loss: 1.0208942890167236,grad_norm: 0.9754151880700148, iteration: 255997
loss: 1.0275558233261108,grad_norm: 0.9244075784734282, iteration: 255998
loss: 1.034099817276001,grad_norm: 0.9846686263175765, iteration: 255999
loss: 1.0188850164413452,grad_norm: 0.9999997263682566, iteration: 256000
loss: 1.0031530857086182,grad_norm: 0.9999990264711796, iteration: 256001
loss: 1.0054075717926025,grad_norm: 0.8503630827252555, iteration: 256002
loss: 0.959943950176239,grad_norm: 0.9999991806254562, iteration: 256003
loss: 0.9787896275520325,grad_norm: 0.7828727885593662, iteration: 256004
loss: 1.007041096687317,grad_norm: 0.8155230152886691, iteration: 256005
loss: 0.9947345852851868,grad_norm: 0.829474539741059, iteration: 256006
loss: 1.0609687566757202,grad_norm: 0.8313954845924642, iteration: 256007
loss: 0.9886801838874817,grad_norm: 0.834594383976849, iteration: 256008
loss: 1.0008981227874756,grad_norm: 0.9999993619311146, iteration: 256009
loss: 1.0182610750198364,grad_norm: 0.8886657315879547, iteration: 256010
loss: 1.0012694597244263,grad_norm: 0.8797355185294755, iteration: 256011
loss: 1.000740885734558,grad_norm: 0.8335232189728102, iteration: 256012
loss: 0.9919250011444092,grad_norm: 0.9213256721070137, iteration: 256013
loss: 0.996512234210968,grad_norm: 0.8266258902959714, iteration: 256014
loss: 0.9971878528594971,grad_norm: 0.7656693147247995, iteration: 256015
loss: 0.998465359210968,grad_norm: 0.7873839286836002, iteration: 256016
loss: 0.9706003069877625,grad_norm: 0.900846792639009, iteration: 256017
loss: 1.0396305322647095,grad_norm: 0.844674915113334, iteration: 256018
loss: 1.0115429162979126,grad_norm: 0.9999990784362622, iteration: 256019
loss: 0.9972920417785645,grad_norm: 0.9440023040057585, iteration: 256020
loss: 0.9969028234481812,grad_norm: 0.808562992120024, iteration: 256021
loss: 1.008363962173462,grad_norm: 0.8089047445765417, iteration: 256022
loss: 1.0387884378433228,grad_norm: 0.9999990409002698, iteration: 256023
loss: 1.0282628536224365,grad_norm: 0.9999990105828191, iteration: 256024
loss: 1.0171815156936646,grad_norm: 0.8308319527108892, iteration: 256025
loss: 1.0174683332443237,grad_norm: 0.862744182936727, iteration: 256026
loss: 1.0293668508529663,grad_norm: 0.8584781762969953, iteration: 256027
loss: 0.9933812618255615,grad_norm: 0.808904894820901, iteration: 256028
loss: 1.0257757902145386,grad_norm: 0.786144002311429, iteration: 256029
loss: 0.9888020753860474,grad_norm: 0.7168730999061979, iteration: 256030
loss: 1.0118370056152344,grad_norm: 0.9999991384910607, iteration: 256031
loss: 1.0102910995483398,grad_norm: 0.9999990944453087, iteration: 256032
loss: 0.9776546955108643,grad_norm: 0.999999116939093, iteration: 256033
loss: 0.9814805388450623,grad_norm: 0.9417602452263821, iteration: 256034
loss: 1.021294116973877,grad_norm: 0.9394222211358086, iteration: 256035
loss: 1.0258314609527588,grad_norm: 0.9008561929957793, iteration: 256036
loss: 0.9930872917175293,grad_norm: 0.8688905238473991, iteration: 256037
loss: 0.9951720833778381,grad_norm: 0.8525654111102218, iteration: 256038
loss: 1.0223619937896729,grad_norm: 0.8507319795571833, iteration: 256039
loss: 1.0244288444519043,grad_norm: 0.7566340769108557, iteration: 256040
loss: 0.9649653434753418,grad_norm: 0.8809004695495485, iteration: 256041
loss: 0.9939242601394653,grad_norm: 0.8501242992385699, iteration: 256042
loss: 1.0368657112121582,grad_norm: 0.9999999956711132, iteration: 256043
loss: 1.0011177062988281,grad_norm: 0.86985742109459, iteration: 256044
loss: 1.0247995853424072,grad_norm: 0.8813435899776124, iteration: 256045
loss: 0.9995845556259155,grad_norm: 0.8199440296467858, iteration: 256046
loss: 0.9975653886795044,grad_norm: 0.8940630419976885, iteration: 256047
loss: 1.0933713912963867,grad_norm: 0.9999992953419435, iteration: 256048
loss: 1.01534903049469,grad_norm: 0.999999286136699, iteration: 256049
loss: 1.0073415040969849,grad_norm: 0.853112672490138, iteration: 256050
loss: 0.9654168486595154,grad_norm: 0.9999991273307668, iteration: 256051
loss: 1.0264575481414795,grad_norm: 0.8716531661892112, iteration: 256052
loss: 0.9902579188346863,grad_norm: 0.8709572917737715, iteration: 256053
loss: 0.9622873663902283,grad_norm: 0.9999990885383564, iteration: 256054
loss: 0.9702506065368652,grad_norm: 0.7427139287338829, iteration: 256055
loss: 1.011212706565857,grad_norm: 0.968075223120478, iteration: 256056
loss: 1.0054675340652466,grad_norm: 0.7707761853210815, iteration: 256057
loss: 1.0049515962600708,grad_norm: 0.9400794938159128, iteration: 256058
loss: 0.9871025085449219,grad_norm: 0.7252366683926389, iteration: 256059
loss: 0.954889714717865,grad_norm: 0.9115226667583967, iteration: 256060
loss: 0.9986276626586914,grad_norm: 0.9053651531222918, iteration: 256061
loss: 0.9662961959838867,grad_norm: 0.8868234824722566, iteration: 256062
loss: 0.9752064347267151,grad_norm: 0.8539447155674204, iteration: 256063
loss: 1.0039012432098389,grad_norm: 0.7571760630092905, iteration: 256064
loss: 1.014581561088562,grad_norm: 0.8906561502150661, iteration: 256065
loss: 1.0113792419433594,grad_norm: 0.9230989828592205, iteration: 256066
loss: 0.9664168357849121,grad_norm: 0.9007973682988085, iteration: 256067
loss: 0.9987558722496033,grad_norm: 0.8475285095235167, iteration: 256068
loss: 1.0206595659255981,grad_norm: 0.9999990429736539, iteration: 256069
loss: 1.0067280530929565,grad_norm: 0.9964534938237073, iteration: 256070
loss: 1.00333833694458,grad_norm: 0.969586920578159, iteration: 256071
loss: 0.9575070142745972,grad_norm: 0.9611459800597341, iteration: 256072
loss: 1.0266238451004028,grad_norm: 0.7590742941779217, iteration: 256073
loss: 0.9893922805786133,grad_norm: 0.7960301868534129, iteration: 256074
loss: 0.9839752316474915,grad_norm: 0.7281975978424995, iteration: 256075
loss: 0.9186158776283264,grad_norm: 0.8041622551637421, iteration: 256076
loss: 1.0729856491088867,grad_norm: 0.954856498309208, iteration: 256077
loss: 1.073034405708313,grad_norm: 0.93882504576685, iteration: 256078
loss: 0.9638320207595825,grad_norm: 0.9427848363810164, iteration: 256079
loss: 1.0376919507980347,grad_norm: 0.9513258431027342, iteration: 256080
loss: 0.9723435640335083,grad_norm: 0.9863302740873532, iteration: 256081
loss: 0.9803012013435364,grad_norm: 0.7705911287421313, iteration: 256082
loss: 0.9847127795219421,grad_norm: 0.822660502635707, iteration: 256083
loss: 1.0084881782531738,grad_norm: 0.9459711538294553, iteration: 256084
loss: 0.9985806941986084,grad_norm: 0.9106425992517947, iteration: 256085
loss: 0.9482452273368835,grad_norm: 0.9133579169022938, iteration: 256086
loss: 1.0263270139694214,grad_norm: 0.9431973378936414, iteration: 256087
loss: 0.978767454624176,grad_norm: 0.8139007902535539, iteration: 256088
loss: 0.9789069890975952,grad_norm: 0.817534053707417, iteration: 256089
loss: 0.9903815388679504,grad_norm: 0.7125612260841555, iteration: 256090
loss: 1.0496747493743896,grad_norm: 0.8789232947188855, iteration: 256091
loss: 0.9865144491195679,grad_norm: 0.7990556610445712, iteration: 256092
loss: 1.0068763494491577,grad_norm: 0.8841528611626194, iteration: 256093
loss: 1.0335239171981812,grad_norm: 0.9999992157973036, iteration: 256094
loss: 1.0868300199508667,grad_norm: 0.8464068079068648, iteration: 256095
loss: 1.0026723146438599,grad_norm: 0.9248020814235305, iteration: 256096
loss: 0.9603142142295837,grad_norm: 0.7734543151706396, iteration: 256097
loss: 0.999159038066864,grad_norm: 0.8251483887338497, iteration: 256098
loss: 1.0236482620239258,grad_norm: 0.8959031111486656, iteration: 256099
loss: 0.9862412810325623,grad_norm: 0.8539136906984416, iteration: 256100
loss: 0.96751868724823,grad_norm: 0.8810074000251098, iteration: 256101
loss: 0.9768531918525696,grad_norm: 0.8803061986445069, iteration: 256102
loss: 0.9746155738830566,grad_norm: 0.863297037696529, iteration: 256103
loss: 0.9964033961296082,grad_norm: 0.8393195274635951, iteration: 256104
loss: 0.9801434278488159,grad_norm: 0.8646979625502944, iteration: 256105
loss: 1.0258946418762207,grad_norm: 0.9999991461178964, iteration: 256106
loss: 0.9870075583457947,grad_norm: 0.9003476262402235, iteration: 256107
loss: 0.9849027991294861,grad_norm: 0.9098781278358884, iteration: 256108
loss: 0.9774923920631409,grad_norm: 0.9438226217106784, iteration: 256109
loss: 1.09675931930542,grad_norm: 0.999999396496089, iteration: 256110
loss: 1.0280429124832153,grad_norm: 0.8523916843159866, iteration: 256111
loss: 0.9958068132400513,grad_norm: 0.7747159792727344, iteration: 256112
loss: 1.0570279359817505,grad_norm: 0.9999995927020369, iteration: 256113
loss: 1.0058187246322632,grad_norm: 0.9999997007441849, iteration: 256114
loss: 1.000542402267456,grad_norm: 0.7589363476366551, iteration: 256115
loss: 1.0096485614776611,grad_norm: 0.8343402067823772, iteration: 256116
loss: 0.9562631249427795,grad_norm: 0.9999992664901908, iteration: 256117
loss: 1.0127710103988647,grad_norm: 0.9061843305455491, iteration: 256118
loss: 1.0119420289993286,grad_norm: 0.9999990864771637, iteration: 256119
loss: 1.0056079626083374,grad_norm: 0.9232145583880762, iteration: 256120
loss: 0.9961612224578857,grad_norm: 0.7548922122291413, iteration: 256121
loss: 0.9589621424674988,grad_norm: 0.9158415899787671, iteration: 256122
loss: 1.017990231513977,grad_norm: 0.8707057785345987, iteration: 256123
loss: 0.9849433302879333,grad_norm: 0.8521016540874677, iteration: 256124
loss: 0.9912892580032349,grad_norm: 0.9707942866305254, iteration: 256125
loss: 1.0098876953125,grad_norm: 0.8823417552132259, iteration: 256126
loss: 1.142094373703003,grad_norm: 0.999999704458246, iteration: 256127
loss: 1.0148329734802246,grad_norm: 0.9624341106406122, iteration: 256128
loss: 0.9808043837547302,grad_norm: 0.6785222002701317, iteration: 256129
loss: 1.094760775566101,grad_norm: 0.9666189954846021, iteration: 256130
loss: 0.9960965514183044,grad_norm: 0.8299311435864559, iteration: 256131
loss: 1.0161223411560059,grad_norm: 0.9388998137318295, iteration: 256132
loss: 1.0198593139648438,grad_norm: 0.906613417532895, iteration: 256133
loss: 0.977927565574646,grad_norm: 0.986173801170142, iteration: 256134
loss: 0.9703620672225952,grad_norm: 0.9873687065167652, iteration: 256135
loss: 0.9990785121917725,grad_norm: 0.9690366964025314, iteration: 256136
loss: 0.988426685333252,grad_norm: 0.9124841500911659, iteration: 256137
loss: 0.9637938141822815,grad_norm: 0.8728324906464899, iteration: 256138
loss: 0.9794831275939941,grad_norm: 0.8619898363923517, iteration: 256139
loss: 0.995398998260498,grad_norm: 0.9999995198076944, iteration: 256140
loss: 1.0694568157196045,grad_norm: 0.974620459990598, iteration: 256141
loss: 1.0016601085662842,grad_norm: 0.9622326550542896, iteration: 256142
loss: 1.0052350759506226,grad_norm: 0.8637040351933057, iteration: 256143
loss: 1.0240013599395752,grad_norm: 0.9999994101545046, iteration: 256144
loss: 0.99057537317276,grad_norm: 0.8698038992366036, iteration: 256145
loss: 1.0224361419677734,grad_norm: 0.9999991943138524, iteration: 256146
loss: 0.999119758605957,grad_norm: 0.9320606094000516, iteration: 256147
loss: 1.0461359024047852,grad_norm: 0.8933262736256881, iteration: 256148
loss: 0.9956860542297363,grad_norm: 0.849731556955617, iteration: 256149
loss: 1.0201116800308228,grad_norm: 0.8606984583914469, iteration: 256150
loss: 1.0174332857131958,grad_norm: 0.9999992144311236, iteration: 256151
loss: 1.0063979625701904,grad_norm: 0.9732982485245252, iteration: 256152
loss: 0.9831061959266663,grad_norm: 0.9999995716300754, iteration: 256153
loss: 1.003121018409729,grad_norm: 0.9335633867709632, iteration: 256154
loss: 0.9982810020446777,grad_norm: 0.9006815969825692, iteration: 256155
loss: 1.0247899293899536,grad_norm: 0.8449709838982989, iteration: 256156
loss: 0.9566870331764221,grad_norm: 0.999999121509275, iteration: 256157
loss: 1.0111461877822876,grad_norm: 0.9088262231558426, iteration: 256158
loss: 0.9880117177963257,grad_norm: 0.8732898749043396, iteration: 256159
loss: 0.9918308258056641,grad_norm: 0.9179113551662651, iteration: 256160
loss: 1.0507092475891113,grad_norm: 0.918794618790459, iteration: 256161
loss: 0.9846785664558411,grad_norm: 0.8221246059179668, iteration: 256162
loss: 1.0323792695999146,grad_norm: 0.9356013091747802, iteration: 256163
loss: 1.0707976818084717,grad_norm: 0.9999998101776914, iteration: 256164
loss: 0.9869019985198975,grad_norm: 0.9999995242851555, iteration: 256165
loss: 0.9902847409248352,grad_norm: 0.9181187841830184, iteration: 256166
loss: 0.9977710843086243,grad_norm: 0.783757082152016, iteration: 256167
loss: 0.9829874634742737,grad_norm: 0.9293081601024217, iteration: 256168
loss: 0.9896562695503235,grad_norm: 0.9857741858498282, iteration: 256169
loss: 0.9594399333000183,grad_norm: 0.8582149850018305, iteration: 256170
loss: 1.0814425945281982,grad_norm: 0.7708375770837645, iteration: 256171
loss: 1.0470325946807861,grad_norm: 0.9999991096690086, iteration: 256172
loss: 0.948448121547699,grad_norm: 0.7779984345794915, iteration: 256173
loss: 0.9829127788543701,grad_norm: 0.8602433360513115, iteration: 256174
loss: 0.9888239502906799,grad_norm: 0.8643913117857868, iteration: 256175
loss: 0.9875448942184448,grad_norm: 0.9999989700185636, iteration: 256176
loss: 1.0216796398162842,grad_norm: 0.9999990429659604, iteration: 256177
loss: 1.013799786567688,grad_norm: 0.9056011731342022, iteration: 256178
loss: 1.00679612159729,grad_norm: 0.9472143733436806, iteration: 256179
loss: 1.1315547227859497,grad_norm: 0.8699408897021249, iteration: 256180
loss: 0.9771898984909058,grad_norm: 0.9166052464001705, iteration: 256181
loss: 1.0277657508850098,grad_norm: 0.999999077475719, iteration: 256182
loss: 1.0219446420669556,grad_norm: 0.9999992245877775, iteration: 256183
loss: 0.98599773645401,grad_norm: 0.88498122990943, iteration: 256184
loss: 0.9928803443908691,grad_norm: 0.7579772855006617, iteration: 256185
loss: 0.9929057955741882,grad_norm: 0.999999463715185, iteration: 256186
loss: 1.0625160932540894,grad_norm: 0.9999990298952761, iteration: 256187
loss: 0.9878431558609009,grad_norm: 0.868147267371194, iteration: 256188
loss: 1.0094033479690552,grad_norm: 0.729011859111948, iteration: 256189
loss: 0.9556354880332947,grad_norm: 0.935608253540399, iteration: 256190
loss: 1.0067282915115356,grad_norm: 0.9005249717157731, iteration: 256191
loss: 0.9848355650901794,grad_norm: 0.9999991467086987, iteration: 256192
loss: 1.0039663314819336,grad_norm: 0.7235277236806618, iteration: 256193
loss: 0.9976059794425964,grad_norm: 0.9393237682192314, iteration: 256194
loss: 0.9890011548995972,grad_norm: 0.9624862356971955, iteration: 256195
loss: 1.1325422525405884,grad_norm: 0.9999997813129733, iteration: 256196
loss: 1.025435447692871,grad_norm: 0.8521751527824258, iteration: 256197
loss: 1.0474387407302856,grad_norm: 0.9999995005069983, iteration: 256198
loss: 0.9883368611335754,grad_norm: 0.8539329192310783, iteration: 256199
loss: 1.075309157371521,grad_norm: 0.7957413970203502, iteration: 256200
loss: 1.008569598197937,grad_norm: 0.9999991943944887, iteration: 256201
loss: 0.9902333617210388,grad_norm: 0.9425708349060589, iteration: 256202
loss: 0.9989270567893982,grad_norm: 0.8678899716229364, iteration: 256203
loss: 0.9970414042472839,grad_norm: 0.7613312323217616, iteration: 256204
loss: 0.9897168278694153,grad_norm: 0.9673260946128197, iteration: 256205
loss: 1.0399045944213867,grad_norm: 0.9999993380099269, iteration: 256206
loss: 1.0020747184753418,grad_norm: 0.998021915510222, iteration: 256207
loss: 0.9827215671539307,grad_norm: 0.9422362105787154, iteration: 256208
loss: 0.974698007106781,grad_norm: 0.7118641860619987, iteration: 256209
loss: 0.9983096718788147,grad_norm: 0.8579373115430144, iteration: 256210
loss: 1.0658694505691528,grad_norm: 0.9999997805721493, iteration: 256211
loss: 1.017063856124878,grad_norm: 0.9478265441795408, iteration: 256212
loss: 0.9837306141853333,grad_norm: 0.7960172942762529, iteration: 256213
loss: 1.0238112211227417,grad_norm: 0.8866266137986916, iteration: 256214
loss: 1.002305269241333,grad_norm: 0.9427581070639492, iteration: 256215
loss: 1.0432173013687134,grad_norm: 0.9999992761182771, iteration: 256216
loss: 0.9832525849342346,grad_norm: 0.9999990904164289, iteration: 256217
loss: 0.9900863766670227,grad_norm: 0.999999672414861, iteration: 256218
loss: 0.982816755771637,grad_norm: 0.7837946339003277, iteration: 256219
loss: 0.9522171020507812,grad_norm: 0.9951289037588414, iteration: 256220
loss: 0.9849613308906555,grad_norm: 0.9999990806445523, iteration: 256221
loss: 1.1399810314178467,grad_norm: 0.9999999158464089, iteration: 256222
loss: 1.1189113855361938,grad_norm: 0.9999991910493156, iteration: 256223
loss: 0.9834805130958557,grad_norm: 0.9212539484784794, iteration: 256224
loss: 0.9920423030853271,grad_norm: 0.8379865345065736, iteration: 256225
loss: 0.9814953207969666,grad_norm: 0.8766334033005243, iteration: 256226
loss: 1.0202629566192627,grad_norm: 0.999999248319436, iteration: 256227
loss: 1.0115373134613037,grad_norm: 0.8402903827663727, iteration: 256228
loss: 0.9828195571899414,grad_norm: 0.9040523773315283, iteration: 256229
loss: 0.9980844259262085,grad_norm: 0.9050102724412187, iteration: 256230
loss: 1.0076427459716797,grad_norm: 0.9999991235723728, iteration: 256231
loss: 1.000786304473877,grad_norm: 0.8825625725228892, iteration: 256232
loss: 1.062416911125183,grad_norm: 0.9999990537685857, iteration: 256233
loss: 0.9873548150062561,grad_norm: 0.8737333406978751, iteration: 256234
loss: 0.9744898080825806,grad_norm: 0.8969298437843138, iteration: 256235
loss: 1.0179383754730225,grad_norm: 0.8661757351752829, iteration: 256236
loss: 0.9968549609184265,grad_norm: 0.8780408393214203, iteration: 256237
loss: 1.0224906206130981,grad_norm: 1.0000000174395394, iteration: 256238
loss: 1.0204135179519653,grad_norm: 0.99999899609071, iteration: 256239
loss: 0.979572594165802,grad_norm: 0.999999162637222, iteration: 256240
loss: 1.0129640102386475,grad_norm: 0.9999992706888025, iteration: 256241
loss: 0.9585005640983582,grad_norm: 0.9837050532525716, iteration: 256242
loss: 0.9901123046875,grad_norm: 0.8763507823147005, iteration: 256243
loss: 1.023293375968933,grad_norm: 0.9735043286665273, iteration: 256244
loss: 0.9871363043785095,grad_norm: 0.9436234576818244, iteration: 256245
loss: 0.9906133413314819,grad_norm: 0.9336994842281305, iteration: 256246
loss: 1.0018534660339355,grad_norm: 0.753433512889769, iteration: 256247
loss: 1.0156681537628174,grad_norm: 0.9589721852496612, iteration: 256248
loss: 1.0081768035888672,grad_norm: 0.9347666201590038, iteration: 256249
loss: 0.9647188782691956,grad_norm: 0.8100830435108216, iteration: 256250
loss: 1.0095363855361938,grad_norm: 0.9119448276648185, iteration: 256251
loss: 1.0222026109695435,grad_norm: 0.9999997867420907, iteration: 256252
loss: 0.9268012642860413,grad_norm: 0.8916194840121118, iteration: 256253
loss: 1.0299369096755981,grad_norm: 0.8765539423419585, iteration: 256254
loss: 1.0290995836257935,grad_norm: 0.9227346324238256, iteration: 256255
loss: 1.0266748666763306,grad_norm: 0.9411606011095034, iteration: 256256
loss: 1.0264867544174194,grad_norm: 0.9999990188101199, iteration: 256257
loss: 0.9871951937675476,grad_norm: 0.7929221777617507, iteration: 256258
loss: 1.0116764307022095,grad_norm: 0.9662147932671838, iteration: 256259
loss: 0.9725431799888611,grad_norm: 0.9999989893771403, iteration: 256260
loss: 1.1930502653121948,grad_norm: 0.9999998432568882, iteration: 256261
loss: 1.001207947731018,grad_norm: 0.7904179033694484, iteration: 256262
loss: 1.017113208770752,grad_norm: 0.996112877685474, iteration: 256263
loss: 1.0273994207382202,grad_norm: 0.8717829908348931, iteration: 256264
loss: 0.9786955714225769,grad_norm: 0.8150914706406237, iteration: 256265
loss: 0.9851522445678711,grad_norm: 0.8268081590329086, iteration: 256266
loss: 1.0121979713439941,grad_norm: 0.8570313611050151, iteration: 256267
loss: 1.0112500190734863,grad_norm: 0.8891766754500361, iteration: 256268
loss: 1.015068531036377,grad_norm: 0.8851830403984425, iteration: 256269
loss: 1.0093055963516235,grad_norm: 0.9206302311638224, iteration: 256270
loss: 1.0081417560577393,grad_norm: 0.9733421056057078, iteration: 256271
loss: 1.0001400709152222,grad_norm: 0.9276484736492753, iteration: 256272
loss: 1.0052330493927002,grad_norm: 0.9999991182791254, iteration: 256273
loss: 1.017250657081604,grad_norm: 0.8534907457094361, iteration: 256274
loss: 0.9973046183586121,grad_norm: 0.908221101412127, iteration: 256275
loss: 1.0125669240951538,grad_norm: 0.8206803683156535, iteration: 256276
loss: 0.991628885269165,grad_norm: 0.7798778738474933, iteration: 256277
loss: 1.0093673467636108,grad_norm: 0.8974069071101892, iteration: 256278
loss: 0.9699084162712097,grad_norm: 0.9522154942175481, iteration: 256279
loss: 1.030801773071289,grad_norm: 0.999999831357762, iteration: 256280
loss: 1.0347751379013062,grad_norm: 0.7894964497475884, iteration: 256281
loss: 1.0209753513336182,grad_norm: 0.9999997575200984, iteration: 256282
loss: 0.9482062458992004,grad_norm: 0.9899891792363027, iteration: 256283
loss: 1.1486961841583252,grad_norm: 0.9999997941163572, iteration: 256284
loss: 1.0088874101638794,grad_norm: 0.8731988030959544, iteration: 256285
loss: 1.0162060260772705,grad_norm: 0.8502228726267645, iteration: 256286
loss: 0.9809417128562927,grad_norm: 0.9056917615832153, iteration: 256287
loss: 1.0318028926849365,grad_norm: 0.881493180263978, iteration: 256288
loss: 0.9866945743560791,grad_norm: 0.8304322578663201, iteration: 256289
loss: 0.974151611328125,grad_norm: 0.9999991174200357, iteration: 256290
loss: 1.0064078569412231,grad_norm: 0.8963848205546822, iteration: 256291
loss: 1.1143866777420044,grad_norm: 0.9999992222869387, iteration: 256292
loss: 1.0075995922088623,grad_norm: 0.7897564132375966, iteration: 256293
loss: 0.9941627979278564,grad_norm: 0.8741388658480593, iteration: 256294
loss: 0.9965561032295227,grad_norm: 0.8699897618031589, iteration: 256295
loss: 1.0273586511611938,grad_norm: 0.7368275060762239, iteration: 256296
loss: 0.9943540692329407,grad_norm: 0.7201158077812312, iteration: 256297
loss: 0.9790536165237427,grad_norm: 0.7429011432420913, iteration: 256298
loss: 1.0086252689361572,grad_norm: 0.874994042682069, iteration: 256299
loss: 1.0224223136901855,grad_norm: 0.8541809000653886, iteration: 256300
loss: 1.0049060583114624,grad_norm: 0.9999991059555949, iteration: 256301
loss: 0.9989562630653381,grad_norm: 0.8105175093491297, iteration: 256302
loss: 1.0103753805160522,grad_norm: 0.8931467325280247, iteration: 256303
loss: 0.9927797317504883,grad_norm: 0.9306500116842202, iteration: 256304
loss: 0.9911887049674988,grad_norm: 0.9844540445561173, iteration: 256305
loss: 1.0578464269638062,grad_norm: 0.9999997301863209, iteration: 256306
loss: 0.9752175211906433,grad_norm: 0.9999990343589906, iteration: 256307
loss: 1.0327694416046143,grad_norm: 0.8519720602614134, iteration: 256308
loss: 0.9658844470977783,grad_norm: 0.9055946493295273, iteration: 256309
loss: 0.9712724685668945,grad_norm: 0.8298476203705061, iteration: 256310
loss: 0.9896678924560547,grad_norm: 0.9999991236457046, iteration: 256311
loss: 1.111753225326538,grad_norm: 0.8339648129615788, iteration: 256312
loss: 1.0805768966674805,grad_norm: 0.9999998949376204, iteration: 256313
loss: 1.0251840353012085,grad_norm: 0.9891643364504357, iteration: 256314
loss: 1.0006310939788818,grad_norm: 0.8322101891680358, iteration: 256315
loss: 0.9870865345001221,grad_norm: 0.852279919464694, iteration: 256316
loss: 0.9984309077262878,grad_norm: 0.9999991002246876, iteration: 256317
loss: 0.994558572769165,grad_norm: 0.8417654300862337, iteration: 256318
loss: 1.031416893005371,grad_norm: 0.9999993108796501, iteration: 256319
loss: 0.9639169573783875,grad_norm: 0.999999038194265, iteration: 256320
loss: 0.9592115879058838,grad_norm: 0.8167190667412244, iteration: 256321
loss: 0.9753208160400391,grad_norm: 0.8713845439777752, iteration: 256322
loss: 1.021813988685608,grad_norm: 0.9999991171392781, iteration: 256323
loss: 1.0250537395477295,grad_norm: 0.9999992479744809, iteration: 256324
loss: 1.0006638765335083,grad_norm: 0.9787655552361199, iteration: 256325
loss: 0.9851104617118835,grad_norm: 0.7969826322001675, iteration: 256326
loss: 0.9888859391212463,grad_norm: 0.8939996640847326, iteration: 256327
loss: 1.0167173147201538,grad_norm: 0.8625317950807225, iteration: 256328
loss: 1.1349743604660034,grad_norm: 0.9999991719493728, iteration: 256329
loss: 1.1343640089035034,grad_norm: 0.9999995137360101, iteration: 256330
loss: 1.0334930419921875,grad_norm: 0.9624299230983231, iteration: 256331
loss: 0.9927066564559937,grad_norm: 0.9999991029555996, iteration: 256332
loss: 1.1815128326416016,grad_norm: 0.9999991637662474, iteration: 256333
loss: 1.0178714990615845,grad_norm: 0.9999996114584697, iteration: 256334
loss: 0.9818139672279358,grad_norm: 0.8245573234816196, iteration: 256335
loss: 0.9668921232223511,grad_norm: 0.8404940271418665, iteration: 256336
loss: 0.9882442951202393,grad_norm: 0.999999747587095, iteration: 256337
loss: 1.0223661661148071,grad_norm: 0.9999990204332899, iteration: 256338
loss: 1.0665045976638794,grad_norm: 0.9999997400500206, iteration: 256339
loss: 1.0062257051467896,grad_norm: 0.9999988896284021, iteration: 256340
loss: 1.0792258977890015,grad_norm: 0.9999991941954377, iteration: 256341
loss: 1.026625156402588,grad_norm: 0.9999996366178784, iteration: 256342
loss: 1.0170130729675293,grad_norm: 0.7351907556670497, iteration: 256343
loss: 1.0005735158920288,grad_norm: 0.9847589262379978, iteration: 256344
loss: 1.0146726369857788,grad_norm: 0.8085591422235382, iteration: 256345
loss: 0.9957849979400635,grad_norm: 0.9397642664162248, iteration: 256346
loss: 1.0003795623779297,grad_norm: 0.8210065155306806, iteration: 256347
loss: 0.9916673898696899,grad_norm: 0.9999990100676109, iteration: 256348
loss: 1.0619564056396484,grad_norm: 0.9999992552649256, iteration: 256349
loss: 1.0398218631744385,grad_norm: 0.8422043733366571, iteration: 256350
loss: 1.0171083211898804,grad_norm: 0.8735664757316994, iteration: 256351
loss: 1.0022566318511963,grad_norm: 0.7765386050256657, iteration: 256352
loss: 0.958206832408905,grad_norm: 0.9271516075980235, iteration: 256353
loss: 1.0720494985580444,grad_norm: 0.961583195603778, iteration: 256354
loss: 1.0122652053833008,grad_norm: 0.9999989998884478, iteration: 256355
loss: 1.009916067123413,grad_norm: 0.9405478392098868, iteration: 256356
loss: 1.0807499885559082,grad_norm: 0.9999997572256709, iteration: 256357
loss: 1.013867735862732,grad_norm: 0.9999990212662287, iteration: 256358
loss: 1.071531891822815,grad_norm: 0.9754464270185482, iteration: 256359
loss: 0.9941471219062805,grad_norm: 0.9999998013211123, iteration: 256360
loss: 1.3406857252120972,grad_norm: 0.9999997637817678, iteration: 256361
loss: 1.0497157573699951,grad_norm: 0.999999115971809, iteration: 256362
loss: 1.0561405420303345,grad_norm: 0.986572146431489, iteration: 256363
loss: 1.0001354217529297,grad_norm: 0.9999996537638538, iteration: 256364
loss: 1.0859616994857788,grad_norm: 0.9299979065308336, iteration: 256365
loss: 1.0075503587722778,grad_norm: 1.000000014202313, iteration: 256366
loss: 1.0009371042251587,grad_norm: 0.8459627383630638, iteration: 256367
loss: 1.0115904808044434,grad_norm: 0.8941156496513377, iteration: 256368
loss: 1.0733988285064697,grad_norm: 0.9999997191959256, iteration: 256369
loss: 0.9980278015136719,grad_norm: 0.9111208197762465, iteration: 256370
loss: 0.9991251230239868,grad_norm: 0.830760859604158, iteration: 256371
loss: 0.9994416236877441,grad_norm: 0.7945460621545799, iteration: 256372
loss: 0.9680042862892151,grad_norm: 0.8640178748605339, iteration: 256373
loss: 1.0108013153076172,grad_norm: 0.8879638278613944, iteration: 256374
loss: 1.0051151514053345,grad_norm: 0.9999992742731227, iteration: 256375
loss: 0.9914726614952087,grad_norm: 0.8407257699774868, iteration: 256376
loss: 0.9975521564483643,grad_norm: 0.780472688345931, iteration: 256377
loss: 1.0744775533676147,grad_norm: 0.9999997210550382, iteration: 256378
loss: 1.08299720287323,grad_norm: 0.9999996031331632, iteration: 256379
loss: 0.9811750054359436,grad_norm: 0.999999092291773, iteration: 256380
loss: 0.9980522990226746,grad_norm: 0.9999989924824167, iteration: 256381
loss: 0.9940782785415649,grad_norm: 0.9868215951918664, iteration: 256382
loss: 1.0286483764648438,grad_norm: 0.8917342572417133, iteration: 256383
loss: 1.0125632286071777,grad_norm: 0.9999998023361522, iteration: 256384
loss: 0.9757660031318665,grad_norm: 0.9961600272105515, iteration: 256385
loss: 0.9651697278022766,grad_norm: 0.7841400218444301, iteration: 256386
loss: 0.9735351204872131,grad_norm: 0.8200940083561351, iteration: 256387
loss: 1.0025347471237183,grad_norm: 0.9344214173625041, iteration: 256388
loss: 0.9977887272834778,grad_norm: 0.9999991431589105, iteration: 256389
loss: 1.0193159580230713,grad_norm: 0.7901346660203954, iteration: 256390
loss: 1.0110646486282349,grad_norm: 0.7698920154137577, iteration: 256391
loss: 1.0075279474258423,grad_norm: 0.9999990947223913, iteration: 256392
loss: 0.9753895998001099,grad_norm: 0.7669643228334789, iteration: 256393
loss: 1.0211658477783203,grad_norm: 0.9715522945772856, iteration: 256394
loss: 1.0068992376327515,grad_norm: 0.9165479446403302, iteration: 256395
loss: 0.98224276304245,grad_norm: 0.9999990196245786, iteration: 256396
loss: 0.9781140089035034,grad_norm: 0.8858104714048496, iteration: 256397
loss: 1.0017958879470825,grad_norm: 0.8079472821000093, iteration: 256398
loss: 1.0023478269577026,grad_norm: 0.9752030517179707, iteration: 256399
loss: 0.9792922735214233,grad_norm: 0.9999989440732346, iteration: 256400
loss: 0.967965304851532,grad_norm: 0.9662706648488335, iteration: 256401
loss: 0.9989929795265198,grad_norm: 0.7529492566575116, iteration: 256402
loss: 1.002874732017517,grad_norm: 0.7995034214886758, iteration: 256403
loss: 1.015344262123108,grad_norm: 0.8584329166980967, iteration: 256404
loss: 0.9818131327629089,grad_norm: 0.9125892869421414, iteration: 256405
loss: 1.005357027053833,grad_norm: 0.7269776983061147, iteration: 256406
loss: 0.9790279865264893,grad_norm: 0.8161776732682248, iteration: 256407
loss: 1.0209752321243286,grad_norm: 0.9999991882072689, iteration: 256408
loss: 1.0142098665237427,grad_norm: 0.7484090967942909, iteration: 256409
loss: 1.0083268880844116,grad_norm: 0.7223880330492607, iteration: 256410
loss: 0.9910054206848145,grad_norm: 0.8937402032472748, iteration: 256411
loss: 0.9896904230117798,grad_norm: 0.8252759306227235, iteration: 256412
loss: 1.0412007570266724,grad_norm: 0.9999994163310879, iteration: 256413
loss: 0.9915810227394104,grad_norm: 0.8057939173014641, iteration: 256414
loss: 1.0240871906280518,grad_norm: 0.8849185371407117, iteration: 256415
loss: 0.952126681804657,grad_norm: 0.9581549688664529, iteration: 256416
loss: 1.0140900611877441,grad_norm: 0.9936772397308316, iteration: 256417
loss: 1.0778950452804565,grad_norm: 0.8938849551677333, iteration: 256418
loss: 1.0480067729949951,grad_norm: 0.8590405955360909, iteration: 256419
loss: 1.066596508026123,grad_norm: 0.9999996127586881, iteration: 256420
loss: 0.9895654916763306,grad_norm: 0.999999092491072, iteration: 256421
loss: 1.058412790298462,grad_norm: 0.9999990897017188, iteration: 256422
loss: 1.0021476745605469,grad_norm: 0.999999046832245, iteration: 256423
loss: 1.0171295404434204,grad_norm: 0.9300173749861261, iteration: 256424
loss: 0.981760561466217,grad_norm: 0.9999989813858119, iteration: 256425
loss: 0.9995070695877075,grad_norm: 0.9590714587111434, iteration: 256426
loss: 1.0163545608520508,grad_norm: 0.8196181683526635, iteration: 256427
loss: 0.997134268283844,grad_norm: 0.8507737099219489, iteration: 256428
loss: 1.0035300254821777,grad_norm: 0.7797085840473907, iteration: 256429
loss: 0.9929782748222351,grad_norm: 0.8195165306170709, iteration: 256430
loss: 1.0117419958114624,grad_norm: 0.8404674956726785, iteration: 256431
loss: 1.068176031112671,grad_norm: 0.999999806604974, iteration: 256432
loss: 0.9856338500976562,grad_norm: 0.9202150040154667, iteration: 256433
loss: 0.9930624961853027,grad_norm: 0.7429660552428929, iteration: 256434
loss: 0.9634062647819519,grad_norm: 0.9193637164934108, iteration: 256435
loss: 1.0042179822921753,grad_norm: 0.9216171100872186, iteration: 256436
loss: 0.9852465391159058,grad_norm: 0.7863761227636045, iteration: 256437
loss: 1.0203607082366943,grad_norm: 0.8909003039100911, iteration: 256438
loss: 0.9900593757629395,grad_norm: 0.8739041652191116, iteration: 256439
loss: 0.9886186718940735,grad_norm: 0.9310396390549833, iteration: 256440
loss: 1.0533474683761597,grad_norm: 0.97453964702995, iteration: 256441
loss: 0.9803402423858643,grad_norm: 0.7395330534769059, iteration: 256442
loss: 0.9716632962226868,grad_norm: 0.9348883686338367, iteration: 256443
loss: 0.9972114562988281,grad_norm: 0.9413035796994816, iteration: 256444
loss: 0.9848983883857727,grad_norm: 0.9999990992134591, iteration: 256445
loss: 0.9916133880615234,grad_norm: 0.9270826885359794, iteration: 256446
loss: 1.0106706619262695,grad_norm: 0.9999989771730605, iteration: 256447
loss: 0.997298538684845,grad_norm: 0.999999011420782, iteration: 256448
loss: 0.9897569417953491,grad_norm: 0.9092462327565809, iteration: 256449
loss: 0.9936087131500244,grad_norm: 0.8471752502485383, iteration: 256450
loss: 0.9985467195510864,grad_norm: 0.7617486090350686, iteration: 256451
loss: 1.0348637104034424,grad_norm: 0.8376602400604228, iteration: 256452
loss: 0.9984959959983826,grad_norm: 0.9469628002141572, iteration: 256453
loss: 0.986751914024353,grad_norm: 0.8226349037045294, iteration: 256454
loss: 1.029725193977356,grad_norm: 0.9101440918387647, iteration: 256455
loss: 1.0013118982315063,grad_norm: 0.9906396177970161, iteration: 256456
loss: 1.0364079475402832,grad_norm: 0.7819454683741095, iteration: 256457
loss: 0.9850660562515259,grad_norm: 0.9999989602075546, iteration: 256458
loss: 1.0427160263061523,grad_norm: 0.9999996110320385, iteration: 256459
loss: 1.0253301858901978,grad_norm: 0.878761820746779, iteration: 256460
loss: 0.9941933155059814,grad_norm: 0.8502652792995412, iteration: 256461
loss: 1.041426420211792,grad_norm: 0.9873110445141519, iteration: 256462
loss: 1.0070806741714478,grad_norm: 0.9999991312915648, iteration: 256463
loss: 1.03762686252594,grad_norm: 0.9006073479419223, iteration: 256464
loss: 1.0131045579910278,grad_norm: 0.8375602265686914, iteration: 256465
loss: 0.9770812392234802,grad_norm: 0.7663166825741512, iteration: 256466
loss: 0.9866806268692017,grad_norm: 0.7884294399923838, iteration: 256467
loss: 0.9743387699127197,grad_norm: 0.8547616305263633, iteration: 256468
loss: 1.0171979665756226,grad_norm: 0.9999993283448801, iteration: 256469
loss: 1.011736273765564,grad_norm: 0.9090512499996347, iteration: 256470
loss: 0.9933833479881287,grad_norm: 0.811257752600841, iteration: 256471
loss: 1.0041981935501099,grad_norm: 0.9999991573225065, iteration: 256472
loss: 1.010424017906189,grad_norm: 0.9637087120608875, iteration: 256473
loss: 0.9698397517204285,grad_norm: 0.9714309579960007, iteration: 256474
loss: 1.0229358673095703,grad_norm: 0.7840315302690022, iteration: 256475
loss: 1.0002409219741821,grad_norm: 0.8074469907207024, iteration: 256476
loss: 0.9887422919273376,grad_norm: 0.7725582793470261, iteration: 256477
loss: 0.9913232922554016,grad_norm: 0.9338575779344452, iteration: 256478
loss: 0.9729803204536438,grad_norm: 0.9043702249233534, iteration: 256479
loss: 0.9768198132514954,grad_norm: 0.853837607604842, iteration: 256480
loss: 1.0168428421020508,grad_norm: 0.9832990135220582, iteration: 256481
loss: 0.9718315601348877,grad_norm: 0.787770131947682, iteration: 256482
loss: 0.9726736545562744,grad_norm: 0.94226247080293, iteration: 256483
loss: 0.9670284986495972,grad_norm: 0.8531525227891287, iteration: 256484
loss: 0.976553201675415,grad_norm: 0.8556018706508807, iteration: 256485
loss: 1.0567820072174072,grad_norm: 0.9999991790173883, iteration: 256486
loss: 0.9972608089447021,grad_norm: 0.7973982658263278, iteration: 256487
loss: 1.0089563131332397,grad_norm: 0.9462626327896815, iteration: 256488
loss: 1.0023871660232544,grad_norm: 0.9671266859619758, iteration: 256489
loss: 0.9887220859527588,grad_norm: 0.7457380978930461, iteration: 256490
loss: 0.9811383485794067,grad_norm: 0.7615088206583008, iteration: 256491
loss: 1.006956934928894,grad_norm: 0.9207321599232839, iteration: 256492
loss: 1.0665684938430786,grad_norm: 0.9999992052013622, iteration: 256493
loss: 1.0097925662994385,grad_norm: 0.9999990489969054, iteration: 256494
loss: 0.9830119013786316,grad_norm: 0.9999992131684579, iteration: 256495
loss: 1.0015184879302979,grad_norm: 0.9950892626162469, iteration: 256496
loss: 1.0072499513626099,grad_norm: 0.820716252436645, iteration: 256497
loss: 1.0060889720916748,grad_norm: 0.9999991240496341, iteration: 256498
loss: 1.032263994216919,grad_norm: 0.8617147563564631, iteration: 256499
loss: 1.0361475944519043,grad_norm: 0.999999178871756, iteration: 256500
loss: 0.9885820746421814,grad_norm: 0.9999992101539265, iteration: 256501
loss: 1.0067428350448608,grad_norm: 0.962059184869032, iteration: 256502
loss: 0.9614179134368896,grad_norm: 0.7278635206615213, iteration: 256503
loss: 1.0020525455474854,grad_norm: 0.9450876436033897, iteration: 256504
loss: 0.9976040124893188,grad_norm: 0.9999990356420256, iteration: 256505
loss: 1.0143619775772095,grad_norm: 0.8379334645576473, iteration: 256506
loss: 0.9941437244415283,grad_norm: 0.8495034569102018, iteration: 256507
loss: 0.9915685057640076,grad_norm: 0.8995473773842085, iteration: 256508
loss: 0.9787260293960571,grad_norm: 0.931562538513713, iteration: 256509
loss: 1.0372675657272339,grad_norm: 0.9999991147377136, iteration: 256510
loss: 1.0008647441864014,grad_norm: 0.9783483050755718, iteration: 256511
loss: 1.03899085521698,grad_norm: 0.7888034322976787, iteration: 256512
loss: 1.0225625038146973,grad_norm: 0.8711257809569747, iteration: 256513
loss: 0.992135226726532,grad_norm: 0.8046440321209062, iteration: 256514
loss: 1.0100632905960083,grad_norm: 0.999999049283248, iteration: 256515
loss: 1.0005383491516113,grad_norm: 0.8430401897495433, iteration: 256516
loss: 1.044769525527954,grad_norm: 0.9999992026966973, iteration: 256517
loss: 1.0601799488067627,grad_norm: 0.9999997783290636, iteration: 256518
loss: 1.022805094718933,grad_norm: 0.9018247188728897, iteration: 256519
loss: 1.0431487560272217,grad_norm: 0.8925802670472017, iteration: 256520
loss: 1.003576636314392,grad_norm: 0.8270452886749561, iteration: 256521
loss: 1.0124601125717163,grad_norm: 0.97943308013184, iteration: 256522
loss: 1.0140210390090942,grad_norm: 0.8930755022276563, iteration: 256523
loss: 0.9685440063476562,grad_norm: 0.89297556865468, iteration: 256524
loss: 0.9802408814430237,grad_norm: 0.9999990277947086, iteration: 256525
loss: 1.0012516975402832,grad_norm: 0.9999990973962746, iteration: 256526
loss: 1.0074613094329834,grad_norm: 0.999999138931343, iteration: 256527
loss: 0.9623329043388367,grad_norm: 0.9830882962694726, iteration: 256528
loss: 1.0106598138809204,grad_norm: 0.9293961086944228, iteration: 256529
loss: 0.9911901354789734,grad_norm: 0.9381591050933126, iteration: 256530
loss: 1.0050151348114014,grad_norm: 0.8183378641547363, iteration: 256531
loss: 1.0258718729019165,grad_norm: 0.9644584385461724, iteration: 256532
loss: 0.9703589081764221,grad_norm: 0.822932008727411, iteration: 256533
loss: 1.0040147304534912,grad_norm: 0.9999991649841941, iteration: 256534
loss: 0.9511523246765137,grad_norm: 0.9999991685773774, iteration: 256535
loss: 1.0369093418121338,grad_norm: 0.9999993787787048, iteration: 256536
loss: 1.0194369554519653,grad_norm: 0.8816848721469719, iteration: 256537
loss: 1.0144261121749878,grad_norm: 0.8333847128843392, iteration: 256538
loss: 1.0594913959503174,grad_norm: 0.9999991469136753, iteration: 256539
loss: 1.0041559934616089,grad_norm: 0.9285045160120983, iteration: 256540
loss: 0.975553572177887,grad_norm: 0.9999990759101016, iteration: 256541
loss: 1.0588377714157104,grad_norm: 0.9999993011310436, iteration: 256542
loss: 1.0255205631256104,grad_norm: 0.8535708834464246, iteration: 256543
loss: 1.046943187713623,grad_norm: 0.9936833842204857, iteration: 256544
loss: 1.037061095237732,grad_norm: 0.931035658931871, iteration: 256545
loss: 1.0373635292053223,grad_norm: 0.7609542123592099, iteration: 256546
loss: 1.0290563106536865,grad_norm: 0.8736651274001981, iteration: 256547
loss: 1.0835264921188354,grad_norm: 0.9999994338780996, iteration: 256548
loss: 1.038920283317566,grad_norm: 0.7803236398113451, iteration: 256549
loss: 1.0145288705825806,grad_norm: 0.999999047269764, iteration: 256550
loss: 1.0506101846694946,grad_norm: 0.9999991398122161, iteration: 256551
loss: 0.9792545437812805,grad_norm: 0.847425565340425, iteration: 256552
loss: 1.0597846508026123,grad_norm: 0.8292210191190241, iteration: 256553
loss: 0.9699104428291321,grad_norm: 0.9999990926926678, iteration: 256554
loss: 1.067893147468567,grad_norm: 0.9905230934699619, iteration: 256555
loss: 1.0240498781204224,grad_norm: 0.792519132564195, iteration: 256556
loss: 0.989336371421814,grad_norm: 0.9929634931738727, iteration: 256557
loss: 1.0328941345214844,grad_norm: 0.9216041390260841, iteration: 256558
loss: 0.9808138012886047,grad_norm: 0.9567266562547035, iteration: 256559
loss: 0.9897692799568176,grad_norm: 0.9999992302177283, iteration: 256560
loss: 1.0230355262756348,grad_norm: 0.8336931211659013, iteration: 256561
loss: 0.9647269248962402,grad_norm: 0.9999989659960207, iteration: 256562
loss: 0.9875653386116028,grad_norm: 0.8851458980109056, iteration: 256563
loss: 1.028689980506897,grad_norm: 0.9999990366198244, iteration: 256564
loss: 0.9857064485549927,grad_norm: 0.999999249145658, iteration: 256565
loss: 0.9654245972633362,grad_norm: 0.8170421056311258, iteration: 256566
loss: 0.9991238117218018,grad_norm: 0.9518206984976271, iteration: 256567
loss: 1.0277488231658936,grad_norm: 0.9698091942756388, iteration: 256568
loss: 1.024982213973999,grad_norm: 0.9999998382123908, iteration: 256569
loss: 1.0304136276245117,grad_norm: 0.8247791416275418, iteration: 256570
loss: 1.010764718055725,grad_norm: 0.9719455392756079, iteration: 256571
loss: 1.0079772472381592,grad_norm: 0.7556515027442067, iteration: 256572
loss: 1.0034345388412476,grad_norm: 0.9317595970529634, iteration: 256573
loss: 1.0110230445861816,grad_norm: 0.9999990977456877, iteration: 256574
loss: 1.1435714960098267,grad_norm: 0.9999994736986988, iteration: 256575
loss: 0.9898613691329956,grad_norm: 0.7775643632645897, iteration: 256576
loss: 1.0196611881256104,grad_norm: 0.9382374113648959, iteration: 256577
loss: 1.2050427198410034,grad_norm: 0.9999996615094244, iteration: 256578
loss: 1.005051851272583,grad_norm: 0.9999991515292705, iteration: 256579
loss: 1.0170378684997559,grad_norm: 0.999998949723846, iteration: 256580
loss: 0.9612103700637817,grad_norm: 0.9999989150490962, iteration: 256581
loss: 0.9922029376029968,grad_norm: 0.7105487025018206, iteration: 256582
loss: 0.968935489654541,grad_norm: 0.9999992103049663, iteration: 256583
loss: 0.9786695241928101,grad_norm: 0.9999996193544952, iteration: 256584
loss: 1.0442204475402832,grad_norm: 0.9999994269403205, iteration: 256585
loss: 0.9843920469284058,grad_norm: 0.9328498382189563, iteration: 256586
loss: 1.001929521560669,grad_norm: 0.8199047728292003, iteration: 256587
loss: 0.988580584526062,grad_norm: 0.7953561325555631, iteration: 256588
loss: 1.0014221668243408,grad_norm: 0.8711611952726145, iteration: 256589
loss: 1.0188922882080078,grad_norm: 0.796479841949329, iteration: 256590
loss: 1.027542233467102,grad_norm: 0.9999991466719681, iteration: 256591
loss: 0.9996688961982727,grad_norm: 0.8677633114968151, iteration: 256592
loss: 0.9908329248428345,grad_norm: 0.9039189029204999, iteration: 256593
loss: 1.0318551063537598,grad_norm: 0.9046175639812428, iteration: 256594
loss: 0.988149106502533,grad_norm: 0.9419836197731304, iteration: 256595
loss: 0.985194206237793,grad_norm: 0.8266190458108253, iteration: 256596
loss: 0.9410849809646606,grad_norm: 0.9422287701471067, iteration: 256597
loss: 0.9724437594413757,grad_norm: 0.9218201674011215, iteration: 256598
loss: 0.97586590051651,grad_norm: 0.9276530559614151, iteration: 256599
loss: 1.0102336406707764,grad_norm: 0.8891747815543629, iteration: 256600
loss: 1.0058530569076538,grad_norm: 0.9827670059559224, iteration: 256601
loss: 1.0098755359649658,grad_norm: 0.9999991193333636, iteration: 256602
loss: 1.0270370244979858,grad_norm: 0.8051218191411178, iteration: 256603
loss: 0.9899091720581055,grad_norm: 0.8923797404472764, iteration: 256604
loss: 1.026790976524353,grad_norm: 0.8977351418534194, iteration: 256605
loss: 1.0157438516616821,grad_norm: 0.7947795885988345, iteration: 256606
loss: 1.033838152885437,grad_norm: 0.9984730669691149, iteration: 256607
loss: 1.0200306177139282,grad_norm: 0.9164095037823384, iteration: 256608
loss: 0.9949609637260437,grad_norm: 0.9208037137477587, iteration: 256609
loss: 1.0778721570968628,grad_norm: 0.9999990263182637, iteration: 256610
loss: 0.9936789870262146,grad_norm: 0.9999991905458926, iteration: 256611
loss: 1.0180845260620117,grad_norm: 0.9974322531669261, iteration: 256612
loss: 1.0842174291610718,grad_norm: 0.999999897269711, iteration: 256613
loss: 1.005050539970398,grad_norm: 0.6901905961046307, iteration: 256614
loss: 1.0392948389053345,grad_norm: 0.878754520190874, iteration: 256615
loss: 1.022316336631775,grad_norm: 0.9999990730554171, iteration: 256616
loss: 0.9972758889198303,grad_norm: 0.8503727869172106, iteration: 256617
loss: 1.0013136863708496,grad_norm: 0.8195749696997475, iteration: 256618
loss: 0.9718629717826843,grad_norm: 0.9118436631452947, iteration: 256619
loss: 1.013088583946228,grad_norm: 0.9999991796589622, iteration: 256620
loss: 1.0419437885284424,grad_norm: 0.9999999773968766, iteration: 256621
loss: 1.0151373147964478,grad_norm: 0.8203425441729802, iteration: 256622
loss: 0.9663404822349548,grad_norm: 0.987233091475966, iteration: 256623
loss: 1.0023802518844604,grad_norm: 0.8544586539556441, iteration: 256624
loss: 1.0179860591888428,grad_norm: 0.9034539924657303, iteration: 256625
loss: 1.0301655530929565,grad_norm: 0.7918872900706717, iteration: 256626
loss: 0.9867635369300842,grad_norm: 0.8995416117060886, iteration: 256627
loss: 0.9730941653251648,grad_norm: 0.8531051307027013, iteration: 256628
loss: 1.0433036088943481,grad_norm: 0.9046033998250679, iteration: 256629
loss: 1.0011849403381348,grad_norm: 0.9350906018153127, iteration: 256630
loss: 0.9985938668251038,grad_norm: 0.81421305977183, iteration: 256631
loss: 0.9727185964584351,grad_norm: 0.8964392668888297, iteration: 256632
loss: 1.0307456254959106,grad_norm: 1.000000012591796, iteration: 256633
loss: 1.0423591136932373,grad_norm: 0.9133538351513844, iteration: 256634
loss: 1.0081373453140259,grad_norm: 0.9999992270980812, iteration: 256635
loss: 1.0466713905334473,grad_norm: 0.999999027421036, iteration: 256636
loss: 1.0318092107772827,grad_norm: 0.9999991254460571, iteration: 256637
loss: 1.0085952281951904,grad_norm: 0.8491134077477243, iteration: 256638
loss: 1.0190205574035645,grad_norm: 0.9867824443022389, iteration: 256639
loss: 1.009596824645996,grad_norm: 0.9065200622636465, iteration: 256640
loss: 0.9887716174125671,grad_norm: 0.8626712911704266, iteration: 256641
loss: 1.0170718431472778,grad_norm: 0.9999991348743149, iteration: 256642
loss: 1.0041460990905762,grad_norm: 0.9484189546707755, iteration: 256643
loss: 1.015092372894287,grad_norm: 0.8727459254287852, iteration: 256644
loss: 0.9996955990791321,grad_norm: 0.999999264452331, iteration: 256645
loss: 1.0220414400100708,grad_norm: 0.899565078480484, iteration: 256646
loss: 0.9956986904144287,grad_norm: 0.7903461941307465, iteration: 256647
loss: 1.0215129852294922,grad_norm: 0.9238131735735978, iteration: 256648
loss: 1.0236672163009644,grad_norm: 0.8195708362669987, iteration: 256649
loss: 0.9764338731765747,grad_norm: 0.9466754575035337, iteration: 256650
loss: 1.0547832250595093,grad_norm: 0.9999999887699299, iteration: 256651
loss: 1.0410020351409912,grad_norm: 0.9132535498891484, iteration: 256652
loss: 0.9886977076530457,grad_norm: 0.8169960449548082, iteration: 256653
loss: 1.017962098121643,grad_norm: 0.7641741796366405, iteration: 256654
loss: 1.0212265253067017,grad_norm: 0.7780150246820713, iteration: 256655
loss: 0.9976552128791809,grad_norm: 0.9999990363542512, iteration: 256656
loss: 1.0029184818267822,grad_norm: 0.999999113640764, iteration: 256657
loss: 0.9770886301994324,grad_norm: 0.889591257611374, iteration: 256658
loss: 1.0198066234588623,grad_norm: 0.9999990637047136, iteration: 256659
loss: 0.9743804931640625,grad_norm: 0.8724041885805757, iteration: 256660
loss: 1.031178593635559,grad_norm: 0.9999991063208543, iteration: 256661
loss: 1.0116022825241089,grad_norm: 0.7929189464406425, iteration: 256662
loss: 0.9530145525932312,grad_norm: 0.8776505084466029, iteration: 256663
loss: 0.9790695905685425,grad_norm: 0.8192264759450347, iteration: 256664
loss: 0.9855440855026245,grad_norm: 0.7040184092402347, iteration: 256665
loss: 1.0134496688842773,grad_norm: 0.8702950591871003, iteration: 256666
loss: 1.0318995714187622,grad_norm: 0.9077676175734709, iteration: 256667
loss: 1.0143916606903076,grad_norm: 0.8894188287422006, iteration: 256668
loss: 1.0171536207199097,grad_norm: 0.8380562545434891, iteration: 256669
loss: 1.0398558378219604,grad_norm: 0.9999992468068267, iteration: 256670
loss: 1.0086346864700317,grad_norm: 0.9638907318835938, iteration: 256671
loss: 1.0230952501296997,grad_norm: 0.9999992385673029, iteration: 256672
loss: 1.039202094078064,grad_norm: 0.9999997172129139, iteration: 256673
loss: 0.9989938139915466,grad_norm: 0.8793223580906893, iteration: 256674
loss: 0.9877409338951111,grad_norm: 0.7453873570796447, iteration: 256675
loss: 1.002616286277771,grad_norm: 0.9999989827874707, iteration: 256676
loss: 0.9986984133720398,grad_norm: 0.919614539923585, iteration: 256677
loss: 1.013633370399475,grad_norm: 0.9999995514147695, iteration: 256678
loss: 0.9963566660881042,grad_norm: 0.9999991213628052, iteration: 256679
loss: 1.012731671333313,grad_norm: 0.7889037629699295, iteration: 256680
loss: 0.9902338981628418,grad_norm: 0.8854905375646542, iteration: 256681
loss: 1.0131767988204956,grad_norm: 0.8270272597404106, iteration: 256682
loss: 0.9975244998931885,grad_norm: 0.9999991390570342, iteration: 256683
loss: 0.9816645979881287,grad_norm: 0.7270504063541807, iteration: 256684
loss: 0.9709752798080444,grad_norm: 0.9999992424037774, iteration: 256685
loss: 0.9662618637084961,grad_norm: 0.8107240449914261, iteration: 256686
loss: 0.9493252038955688,grad_norm: 0.9079313257034981, iteration: 256687
loss: 0.9881830215454102,grad_norm: 0.9999992059157761, iteration: 256688
loss: 0.9935455322265625,grad_norm: 0.8568502836184173, iteration: 256689
loss: 0.998396098613739,grad_norm: 0.9382479124187532, iteration: 256690
loss: 1.020727515220642,grad_norm: 0.8511206423423123, iteration: 256691
loss: 1.0407036542892456,grad_norm: 0.9999990609644147, iteration: 256692
loss: 0.9691846966743469,grad_norm: 0.9440031932633306, iteration: 256693
loss: 1.0251296758651733,grad_norm: 0.8282759541796917, iteration: 256694
loss: 1.118887186050415,grad_norm: 0.9999997473525083, iteration: 256695
loss: 1.000910758972168,grad_norm: 0.9101240672510987, iteration: 256696
loss: 1.0116060972213745,grad_norm: 0.7673139198414884, iteration: 256697
loss: 0.9965313076972961,grad_norm: 0.9637279187783621, iteration: 256698
loss: 1.007028341293335,grad_norm: 0.9288600717014687, iteration: 256699
loss: 1.0322909355163574,grad_norm: 0.999999731078346, iteration: 256700
loss: 0.9669111967086792,grad_norm: 0.7772801479219454, iteration: 256701
loss: 0.9987948536872864,grad_norm: 0.8404444001616266, iteration: 256702
loss: 0.9834226965904236,grad_norm: 0.819201800599998, iteration: 256703
loss: 0.9745599031448364,grad_norm: 0.9999990565494682, iteration: 256704
loss: 0.9801768064498901,grad_norm: 0.9999990814753065, iteration: 256705
loss: 1.0228806734085083,grad_norm: 0.9770578478785543, iteration: 256706
loss: 1.0140892267227173,grad_norm: 0.9055774478034891, iteration: 256707
loss: 0.984685480594635,grad_norm: 0.8790345240723848, iteration: 256708
loss: 0.9955487847328186,grad_norm: 0.9335056820474575, iteration: 256709
loss: 1.0036827325820923,grad_norm: 0.9096060970954359, iteration: 256710
loss: 1.0332528352737427,grad_norm: 0.9999990713837593, iteration: 256711
loss: 1.0169695615768433,grad_norm: 0.999999106348321, iteration: 256712
loss: 0.9927141070365906,grad_norm: 0.9088324065400692, iteration: 256713
loss: 1.0402640104293823,grad_norm: 0.9023398992149173, iteration: 256714
loss: 1.0197250843048096,grad_norm: 0.9999992093784383, iteration: 256715
loss: 0.9950632452964783,grad_norm: 0.9837133959088499, iteration: 256716
loss: 1.0403757095336914,grad_norm: 0.8215433152483245, iteration: 256717
loss: 1.0252728462219238,grad_norm: 0.7766728886017613, iteration: 256718
loss: 0.9922919273376465,grad_norm: 0.8409352146197251, iteration: 256719
loss: 1.020325779914856,grad_norm: 0.7804056154267439, iteration: 256720
loss: 0.989037811756134,grad_norm: 0.9767957397588426, iteration: 256721
loss: 0.9879528880119324,grad_norm: 0.7509223059568767, iteration: 256722
loss: 1.0065827369689941,grad_norm: 0.9784828564988531, iteration: 256723
loss: 1.0352367162704468,grad_norm: 0.9669678480571234, iteration: 256724
loss: 0.9960486888885498,grad_norm: 0.751957015363495, iteration: 256725
loss: 0.9905802011489868,grad_norm: 0.9367021639464901, iteration: 256726
loss: 1.0448081493377686,grad_norm: 0.9351688014622132, iteration: 256727
loss: 1.0184299945831299,grad_norm: 0.9001599732711779, iteration: 256728
loss: 0.9925236701965332,grad_norm: 0.857491231098498, iteration: 256729
loss: 1.0124115943908691,grad_norm: 0.8437788393672264, iteration: 256730
loss: 0.9818189740180969,grad_norm: 0.8436788933769758, iteration: 256731
loss: 1.1084707975387573,grad_norm: 0.999999942140871, iteration: 256732
loss: 1.0238769054412842,grad_norm: 0.8763967459477844, iteration: 256733
loss: 1.0201518535614014,grad_norm: 0.8230541037933912, iteration: 256734
loss: 1.012948751449585,grad_norm: 0.999999877445948, iteration: 256735
loss: 1.0066134929656982,grad_norm: 0.8696517783749494, iteration: 256736
loss: 1.017720341682434,grad_norm: 0.8583260747832873, iteration: 256737
loss: 1.0169644355773926,grad_norm: 0.9999991048925048, iteration: 256738
loss: 1.001598834991455,grad_norm: 0.8816046164340025, iteration: 256739
loss: 0.9704629778862,grad_norm: 0.9105514334624004, iteration: 256740
loss: 0.9870084524154663,grad_norm: 0.7218413212589249, iteration: 256741
loss: 0.9681591987609863,grad_norm: 0.8704793618911818, iteration: 256742
loss: 0.999496340751648,grad_norm: 0.7400814140552207, iteration: 256743
loss: 1.0159926414489746,grad_norm: 0.8961959084372293, iteration: 256744
loss: 1.024897813796997,grad_norm: 0.8247935602314808, iteration: 256745
loss: 0.9822966456413269,grad_norm: 0.8176583959795539, iteration: 256746
loss: 1.0713343620300293,grad_norm: 0.9999995533932786, iteration: 256747
loss: 1.0283217430114746,grad_norm: 0.8757577605063331, iteration: 256748
loss: 0.9824774265289307,grad_norm: 0.8208299006178827, iteration: 256749
loss: 0.9937350749969482,grad_norm: 0.719933172615655, iteration: 256750
loss: 1.0103758573532104,grad_norm: 0.999998977415676, iteration: 256751
loss: 0.969890296459198,grad_norm: 0.9255921775066374, iteration: 256752
loss: 1.0056465864181519,grad_norm: 0.9795762093461262, iteration: 256753
loss: 0.983877420425415,grad_norm: 0.9999990978940803, iteration: 256754
loss: 1.133543848991394,grad_norm: 0.9050145888318049, iteration: 256755
loss: 0.9641138315200806,grad_norm: 0.8214689022966463, iteration: 256756
loss: 1.0138195753097534,grad_norm: 0.7596853531794602, iteration: 256757
loss: 0.9865196943283081,grad_norm: 0.8247180711644796, iteration: 256758
loss: 1.1068421602249146,grad_norm: 0.999999512682797, iteration: 256759
loss: 1.0056339502334595,grad_norm: 0.7989734179419733, iteration: 256760
loss: 0.9732544422149658,grad_norm: 0.9621605741824035, iteration: 256761
loss: 0.986272394657135,grad_norm: 0.8294655549217588, iteration: 256762
loss: 0.9826051592826843,grad_norm: 0.9999990294593036, iteration: 256763
loss: 1.011204481124878,grad_norm: 0.9047712455065773, iteration: 256764
loss: 0.9965012669563293,grad_norm: 0.99999900477454, iteration: 256765
loss: 0.9997331500053406,grad_norm: 0.8983373436129481, iteration: 256766
loss: 1.00570809841156,grad_norm: 0.8394120201559304, iteration: 256767
loss: 0.9750165343284607,grad_norm: 0.9574647375562635, iteration: 256768
loss: 1.0332443714141846,grad_norm: 0.9999990935228213, iteration: 256769
loss: 1.043554425239563,grad_norm: 0.9285921405979458, iteration: 256770
loss: 1.01157546043396,grad_norm: 0.7628803025295097, iteration: 256771
loss: 0.9779871106147766,grad_norm: 0.755195069584209, iteration: 256772
loss: 0.9814673066139221,grad_norm: 0.8897080670308363, iteration: 256773
loss: 0.9623306393623352,grad_norm: 0.8000649775222944, iteration: 256774
loss: 1.0148766040802002,grad_norm: 0.8936396990078461, iteration: 256775
loss: 1.002213716506958,grad_norm: 0.7757766265781671, iteration: 256776
loss: 1.0250612497329712,grad_norm: 0.8639126340224855, iteration: 256777
loss: 1.0236835479736328,grad_norm: 0.9999989945470622, iteration: 256778
loss: 0.9487758278846741,grad_norm: 0.9999992212191046, iteration: 256779
loss: 1.010633111000061,grad_norm: 0.7835327983005077, iteration: 256780
loss: 0.984444797039032,grad_norm: 0.9999990213268688, iteration: 256781
loss: 1.0194867849349976,grad_norm: 0.8342654274843536, iteration: 256782
loss: 0.9802846312522888,grad_norm: 0.8973257574785686, iteration: 256783
loss: 1.0069230794906616,grad_norm: 0.9706328544330308, iteration: 256784
loss: 0.9500853419303894,grad_norm: 0.8594984235165576, iteration: 256785
loss: 1.0656663179397583,grad_norm: 0.9268982644320508, iteration: 256786
loss: 0.992255449295044,grad_norm: 0.8008700085517706, iteration: 256787
loss: 1.004675269126892,grad_norm: 0.9185389131051189, iteration: 256788
loss: 0.9855543375015259,grad_norm: 0.8841573900315194, iteration: 256789
loss: 1.0197991132736206,grad_norm: 0.9306887420691605, iteration: 256790
loss: 1.0125527381896973,grad_norm: 0.8343272300168841, iteration: 256791
loss: 0.975149929523468,grad_norm: 0.9999998500246025, iteration: 256792
loss: 1.0239053964614868,grad_norm: 0.6667155504778711, iteration: 256793
loss: 0.9921370148658752,grad_norm: 0.9999990296025097, iteration: 256794
loss: 0.987969696521759,grad_norm: 0.8467224839483511, iteration: 256795
loss: 0.9748103022575378,grad_norm: 0.8221306252019152, iteration: 256796
loss: 0.9678698778152466,grad_norm: 0.8261131403117514, iteration: 256797
loss: 0.9780599474906921,grad_norm: 0.8953302332301936, iteration: 256798
loss: 0.995800256729126,grad_norm: 0.9999992290275951, iteration: 256799
loss: 0.9790852069854736,grad_norm: 0.9103600001411092, iteration: 256800
loss: 1.0349094867706299,grad_norm: 0.949093520874679, iteration: 256801
loss: 0.9836076498031616,grad_norm: 0.8221459790901685, iteration: 256802
loss: 0.9770942330360413,grad_norm: 0.9377379113277298, iteration: 256803
loss: 1.0280250310897827,grad_norm: 0.80986877495032, iteration: 256804
loss: 0.9917685389518738,grad_norm: 0.759341815534316, iteration: 256805
loss: 0.9646939039230347,grad_norm: 0.8527143228865443, iteration: 256806
loss: 0.9883280992507935,grad_norm: 0.7301225772711215, iteration: 256807
loss: 0.9940455555915833,grad_norm: 0.9999990186732562, iteration: 256808
loss: 1.0194429159164429,grad_norm: 0.9542916954980712, iteration: 256809
loss: 0.9962379932403564,grad_norm: 0.9540843115191106, iteration: 256810
loss: 0.9786531925201416,grad_norm: 0.7734413683489955, iteration: 256811
loss: 1.0342966318130493,grad_norm: 0.999999321358917, iteration: 256812
loss: 0.9844351410865784,grad_norm: 0.8882342604450626, iteration: 256813
loss: 0.9893094897270203,grad_norm: 0.8971113476630048, iteration: 256814
loss: 0.9603143334388733,grad_norm: 0.9999989055771756, iteration: 256815
loss: 0.9655259847640991,grad_norm: 0.9749043221397771, iteration: 256816
loss: 1.001334309577942,grad_norm: 0.7575115154178967, iteration: 256817
loss: 1.0217913389205933,grad_norm: 0.9962048658106406, iteration: 256818
loss: 1.0753159523010254,grad_norm: 0.9999997834058041, iteration: 256819
loss: 0.9712591171264648,grad_norm: 0.936234852254172, iteration: 256820
loss: 0.9710347056388855,grad_norm: 0.9999991152048152, iteration: 256821
loss: 0.9812872409820557,grad_norm: 0.9626694530839843, iteration: 256822
loss: 1.0015976428985596,grad_norm: 0.769231959047132, iteration: 256823
loss: 0.9837455749511719,grad_norm: 0.9209602199944616, iteration: 256824
loss: 0.9917054176330566,grad_norm: 0.8765318341179641, iteration: 256825
loss: 1.0061074495315552,grad_norm: 0.8840667994312538, iteration: 256826
loss: 0.9809738993644714,grad_norm: 0.7537248319846882, iteration: 256827
loss: 1.0185648202896118,grad_norm: 0.9022186447767828, iteration: 256828
loss: 1.0462539196014404,grad_norm: 0.9999990634499714, iteration: 256829
loss: 0.9805835485458374,grad_norm: 0.9473370821066234, iteration: 256830
loss: 0.989852249622345,grad_norm: 0.8826611570482183, iteration: 256831
loss: 1.0023137331008911,grad_norm: 0.9787214026941611, iteration: 256832
loss: 1.0042765140533447,grad_norm: 0.8772271434520139, iteration: 256833
loss: 1.0039114952087402,grad_norm: 0.850422568223773, iteration: 256834
loss: 1.0199295282363892,grad_norm: 0.9999992257620932, iteration: 256835
loss: 0.987805187702179,grad_norm: 0.803275589163149, iteration: 256836
loss: 0.9697187542915344,grad_norm: 0.987318634403055, iteration: 256837
loss: 1.0225245952606201,grad_norm: 0.9118960681664923, iteration: 256838
loss: 0.9954548478126526,grad_norm: 0.8040135235960059, iteration: 256839
loss: 1.0346606969833374,grad_norm: 0.7835188827500253, iteration: 256840
loss: 0.9840260744094849,grad_norm: 0.7909602373997732, iteration: 256841
loss: 0.9850653409957886,grad_norm: 0.9499940172349551, iteration: 256842
loss: 0.9855939745903015,grad_norm: 0.8266494325147848, iteration: 256843
loss: 1.0291746854782104,grad_norm: 0.8520575803830063, iteration: 256844
loss: 0.9929929971694946,grad_norm: 0.9514620362518627, iteration: 256845
loss: 0.9861240983009338,grad_norm: 0.7902538888109335, iteration: 256846
loss: 0.9789220690727234,grad_norm: 0.7818025650156305, iteration: 256847
loss: 1.0012766122817993,grad_norm: 0.9999990780910473, iteration: 256848
loss: 1.0044292211532593,grad_norm: 0.9205407399490418, iteration: 256849
loss: 0.999821126461029,grad_norm: 0.878702519420424, iteration: 256850
loss: 0.9917771816253662,grad_norm: 0.8302112989422631, iteration: 256851
loss: 1.0099284648895264,grad_norm: 0.793580948300961, iteration: 256852
loss: 1.0362091064453125,grad_norm: 0.8259712607044777, iteration: 256853
loss: 1.010677695274353,grad_norm: 0.9999989916507227, iteration: 256854
loss: 1.00725257396698,grad_norm: 0.9347927836465738, iteration: 256855
loss: 0.9798871278762817,grad_norm: 0.8960958013670993, iteration: 256856
loss: 1.074350118637085,grad_norm: 0.9526211108781758, iteration: 256857
loss: 1.0327410697937012,grad_norm: 0.8396062550800464, iteration: 256858
loss: 0.9995301365852356,grad_norm: 0.9999990748884633, iteration: 256859
loss: 1.0083749294281006,grad_norm: 0.9679213582052907, iteration: 256860
loss: 1.0077463388442993,grad_norm: 0.8300828995494723, iteration: 256861
loss: 0.980613648891449,grad_norm: 0.808597805942893, iteration: 256862
loss: 1.0026072263717651,grad_norm: 0.9999990364867691, iteration: 256863
loss: 1.1215872764587402,grad_norm: 0.9778457629733035, iteration: 256864
loss: 0.9900176525115967,grad_norm: 0.7942448503215612, iteration: 256865
loss: 1.0087273120880127,grad_norm: 0.8750638556716245, iteration: 256866
loss: 1.0029538869857788,grad_norm: 0.7937131655089613, iteration: 256867
loss: 1.0529990196228027,grad_norm: 0.9584262670084823, iteration: 256868
loss: 0.9860303997993469,grad_norm: 0.999999124469485, iteration: 256869
loss: 0.95697021484375,grad_norm: 0.919224839038455, iteration: 256870
loss: 1.003848671913147,grad_norm: 0.9999990702489698, iteration: 256871
loss: 0.997468888759613,grad_norm: 0.804503988917659, iteration: 256872
loss: 1.013755440711975,grad_norm: 0.8596881461589708, iteration: 256873
loss: 0.9794647097587585,grad_norm: 0.8814287252695261, iteration: 256874
loss: 1.0029171705245972,grad_norm: 0.9562386849609705, iteration: 256875
loss: 0.9918099641799927,grad_norm: 0.7839326434420872, iteration: 256876
loss: 1.0376639366149902,grad_norm: 0.9999990865634815, iteration: 256877
loss: 0.9829959869384766,grad_norm: 0.8157406384827166, iteration: 256878
loss: 1.0092273950576782,grad_norm: 0.9999991591423092, iteration: 256879
loss: 0.9595998525619507,grad_norm: 0.9341563506200361, iteration: 256880
loss: 0.9737616777420044,grad_norm: 0.7725317916222051, iteration: 256881
loss: 1.0222457647323608,grad_norm: 0.8420532454619633, iteration: 256882
loss: 1.003289818763733,grad_norm: 0.8168672818348076, iteration: 256883
loss: 1.1160436868667603,grad_norm: 0.9999993667156195, iteration: 256884
loss: 0.9809392094612122,grad_norm: 0.8423815168820896, iteration: 256885
loss: 0.9940701723098755,grad_norm: 0.8277131918586763, iteration: 256886
loss: 1.0187820196151733,grad_norm: 0.8472141739783016, iteration: 256887
loss: 1.014928936958313,grad_norm: 0.7861038848914257, iteration: 256888
loss: 0.9979220032691956,grad_norm: 0.9999993964243048, iteration: 256889
loss: 1.03240966796875,grad_norm: 0.9254508955735872, iteration: 256890
loss: 0.9777758121490479,grad_norm: 0.7930149032864933, iteration: 256891
loss: 1.0492303371429443,grad_norm: 0.9891698844905134, iteration: 256892
loss: 1.0040408372879028,grad_norm: 0.9999992125253437, iteration: 256893
loss: 0.9945095777511597,grad_norm: 0.9999990918494811, iteration: 256894
loss: 1.008904218673706,grad_norm: 0.9066667040540906, iteration: 256895
loss: 1.0345438718795776,grad_norm: 0.9999997110312, iteration: 256896
loss: 0.9888168573379517,grad_norm: 0.8432589571758788, iteration: 256897
loss: 1.0200772285461426,grad_norm: 0.999999639158658, iteration: 256898
loss: 1.0041958093643188,grad_norm: 0.8285176066106571, iteration: 256899
loss: 0.9919108748435974,grad_norm: 0.8128944050375455, iteration: 256900
loss: 1.0444978475570679,grad_norm: 0.9711757837424744, iteration: 256901
loss: 1.0145344734191895,grad_norm: 0.9999992435254317, iteration: 256902
loss: 0.9964104294776917,grad_norm: 0.8181519382706578, iteration: 256903
loss: 1.1296544075012207,grad_norm: 0.9999991586426212, iteration: 256904
loss: 1.0338807106018066,grad_norm: 0.9999999528744293, iteration: 256905
loss: 0.9947785139083862,grad_norm: 0.9999990839085066, iteration: 256906
loss: 0.9623952507972717,grad_norm: 0.9999989379155378, iteration: 256907
loss: 0.9898284077644348,grad_norm: 0.8991449235090089, iteration: 256908
loss: 0.9931786060333252,grad_norm: 0.8706277086178241, iteration: 256909
loss: 1.0201503038406372,grad_norm: 0.9999991183179116, iteration: 256910
loss: 1.0279144048690796,grad_norm: 0.9999991982625206, iteration: 256911
loss: 1.0421615839004517,grad_norm: 0.9756294184591799, iteration: 256912
loss: 0.97173672914505,grad_norm: 0.9671059664698485, iteration: 256913
loss: 0.9917771816253662,grad_norm: 0.8640163464547153, iteration: 256914
loss: 1.0405304431915283,grad_norm: 0.887307032368724, iteration: 256915
loss: 1.0019463300704956,grad_norm: 0.9308730918497882, iteration: 256916
loss: 0.9863378405570984,grad_norm: 0.8055506606876596, iteration: 256917
loss: 0.9977778792381287,grad_norm: 0.797873876768791, iteration: 256918
loss: 1.04075288772583,grad_norm: 0.8930605687628491, iteration: 256919
loss: 1.1331173181533813,grad_norm: 0.9999999964734483, iteration: 256920
loss: 0.9900321364402771,grad_norm: 0.8999002611358875, iteration: 256921
loss: 0.9786991477012634,grad_norm: 0.9631404823025942, iteration: 256922
loss: 1.0284956693649292,grad_norm: 0.9999989487719071, iteration: 256923
loss: 1.0608903169631958,grad_norm: 0.9999997371802078, iteration: 256924
loss: 1.018761157989502,grad_norm: 0.9999999385463548, iteration: 256925
loss: 1.0360441207885742,grad_norm: 0.9999999102952569, iteration: 256926
loss: 0.9805159568786621,grad_norm: 0.81224789539771, iteration: 256927
loss: 1.016581654548645,grad_norm: 0.8286453130429701, iteration: 256928
loss: 1.0616880655288696,grad_norm: 0.9097556414441347, iteration: 256929
loss: 0.9855880737304688,grad_norm: 0.7883180240729729, iteration: 256930
loss: 1.1002799272537231,grad_norm: 0.9999994395384924, iteration: 256931
loss: 1.0214335918426514,grad_norm: 0.9999990049458679, iteration: 256932
loss: 1.012558102607727,grad_norm: 0.982926062946845, iteration: 256933
loss: 0.9628040194511414,grad_norm: 0.9999991022363214, iteration: 256934
loss: 1.059639811515808,grad_norm: 0.9999997381822631, iteration: 256935
loss: 0.9824090600013733,grad_norm: 0.9321937376906664, iteration: 256936
loss: 0.9926164746284485,grad_norm: 0.9720136754606383, iteration: 256937
loss: 0.9957723021507263,grad_norm: 0.8613381008189765, iteration: 256938
loss: 0.9798184037208557,grad_norm: 0.8463311037688902, iteration: 256939
loss: 1.0046042203903198,grad_norm: 0.8535912076974079, iteration: 256940
loss: 1.0205652713775635,grad_norm: 0.9999994874620584, iteration: 256941
loss: 1.0068508386611938,grad_norm: 0.8793439620652171, iteration: 256942
loss: 0.9934539794921875,grad_norm: 0.7035380582204142, iteration: 256943
loss: 0.9728902578353882,grad_norm: 0.8192482030262874, iteration: 256944
loss: 0.9728083610534668,grad_norm: 0.6838967905580149, iteration: 256945
loss: 0.9847020506858826,grad_norm: 0.8836756097751491, iteration: 256946
loss: 0.9521589875221252,grad_norm: 0.7959183070845256, iteration: 256947
loss: 1.068022608757019,grad_norm: 0.9999999635627744, iteration: 256948
loss: 0.9864909052848816,grad_norm: 0.999998999762927, iteration: 256949
loss: 1.0110840797424316,grad_norm: 0.999999322047461, iteration: 256950
loss: 0.9918241500854492,grad_norm: 0.8561279254621769, iteration: 256951
loss: 0.9694435000419617,grad_norm: 0.7088301202527283, iteration: 256952
loss: 0.9722498655319214,grad_norm: 0.9331926415024362, iteration: 256953
loss: 1.0158722400665283,grad_norm: 0.8526473312119123, iteration: 256954
loss: 1.0155857801437378,grad_norm: 0.8740462974109661, iteration: 256955
loss: 0.9887651205062866,grad_norm: 0.9999999907796889, iteration: 256956
loss: 0.9691295027732849,grad_norm: 0.8910098571884784, iteration: 256957
loss: 0.9863260984420776,grad_norm: 0.838455136568207, iteration: 256958
loss: 1.0599637031555176,grad_norm: 0.9999991996655625, iteration: 256959
loss: 0.9899578094482422,grad_norm: 0.9015492794434259, iteration: 256960
loss: 1.0094960927963257,grad_norm: 0.9999990429048006, iteration: 256961
loss: 1.0253995656967163,grad_norm: 0.9999993436517505, iteration: 256962
loss: 1.0090537071228027,grad_norm: 0.8081156659386147, iteration: 256963
loss: 1.009630560874939,grad_norm: 0.9090291928240326, iteration: 256964
loss: 1.008489727973938,grad_norm: 0.999999165652862, iteration: 256965
loss: 1.0427356958389282,grad_norm: 0.8468947064131636, iteration: 256966
loss: 1.0231757164001465,grad_norm: 0.999999125721328, iteration: 256967
loss: 0.9917641282081604,grad_norm: 0.8055673074583366, iteration: 256968
loss: 1.0056571960449219,grad_norm: 0.9337506323195832, iteration: 256969
loss: 1.0229841470718384,grad_norm: 0.7647365201995798, iteration: 256970
loss: 0.9868660569190979,grad_norm: 0.9122190257019337, iteration: 256971
loss: 1.0778604745864868,grad_norm: 0.9600860336232329, iteration: 256972
loss: 0.947924792766571,grad_norm: 0.8967984684951569, iteration: 256973
loss: 1.0404831171035767,grad_norm: 0.9445735334690957, iteration: 256974
loss: 0.9988303184509277,grad_norm: 0.9815251737917852, iteration: 256975
loss: 0.9975746870040894,grad_norm: 0.7523546608451297, iteration: 256976
loss: 1.0176352262496948,grad_norm: 0.7825823295583741, iteration: 256977
loss: 1.0134795904159546,grad_norm: 0.7901786315019075, iteration: 256978
loss: 0.9972280859947205,grad_norm: 0.7829506216805281, iteration: 256979
loss: 1.0045478343963623,grad_norm: 0.8791321686666209, iteration: 256980
loss: 0.9809603691101074,grad_norm: 0.954150891999778, iteration: 256981
loss: 1.080562710762024,grad_norm: 0.9999995041559793, iteration: 256982
loss: 0.9527929425239563,grad_norm: 0.7119825147234503, iteration: 256983
loss: 1.0036420822143555,grad_norm: 0.9417939998084791, iteration: 256984
loss: 1.0210661888122559,grad_norm: 0.8818917821566171, iteration: 256985
loss: 1.006761074066162,grad_norm: 0.9888443983942676, iteration: 256986
loss: 1.0006226301193237,grad_norm: 0.999999069497987, iteration: 256987
loss: 0.9782665967941284,grad_norm: 0.847955595008863, iteration: 256988
loss: 1.0006346702575684,grad_norm: 0.9059567468826744, iteration: 256989
loss: 1.0879868268966675,grad_norm: 0.9999990730771154, iteration: 256990
loss: 1.0559815168380737,grad_norm: 0.9999991215575955, iteration: 256991
loss: 0.978347659111023,grad_norm: 0.9999990730702262, iteration: 256992
loss: 0.9977785348892212,grad_norm: 0.9999998133606364, iteration: 256993
loss: 0.9739794135093689,grad_norm: 0.9460286609632516, iteration: 256994
loss: 0.9698547720909119,grad_norm: 0.8140776860215145, iteration: 256995
loss: 0.9978622794151306,grad_norm: 0.8583731793164887, iteration: 256996
loss: 1.0088688135147095,grad_norm: 0.9999992637936005, iteration: 256997
loss: 1.047265887260437,grad_norm: 0.9654143143186252, iteration: 256998
loss: 1.0560380220413208,grad_norm: 0.9721575287771153, iteration: 256999
loss: 1.0845638513565063,grad_norm: 0.999999337509011, iteration: 257000
loss: 0.9476061463356018,grad_norm: 0.7576310731258571, iteration: 257001
loss: 1.031484603881836,grad_norm: 0.9113860703215758, iteration: 257002
loss: 1.0083750486373901,grad_norm: 0.9480914606208115, iteration: 257003
loss: 0.9761104583740234,grad_norm: 0.8139808856422664, iteration: 257004
loss: 1.1058944463729858,grad_norm: 0.9999998912779652, iteration: 257005
loss: 0.9664158225059509,grad_norm: 0.8854021673408172, iteration: 257006
loss: 1.0414166450500488,grad_norm: 0.9999993462355329, iteration: 257007
loss: 0.9699320793151855,grad_norm: 0.7625036567642557, iteration: 257008
loss: 0.9523658752441406,grad_norm: 0.9999992330877575, iteration: 257009
loss: 0.9998826384544373,grad_norm: 0.8855491430072462, iteration: 257010
loss: 1.0286905765533447,grad_norm: 0.9733584000019773, iteration: 257011
loss: 1.0155750513076782,grad_norm: 0.9445092156006647, iteration: 257012
loss: 1.0099036693572998,grad_norm: 0.7666823583290421, iteration: 257013
loss: 1.0168453454971313,grad_norm: 0.8528327520216862, iteration: 257014
loss: 0.9886483550071716,grad_norm: 0.9999991405135681, iteration: 257015
loss: 0.9552976489067078,grad_norm: 0.9999992494890477, iteration: 257016
loss: 0.9778480529785156,grad_norm: 0.9786624695515774, iteration: 257017
loss: 0.9708092212677002,grad_norm: 0.8994193764871499, iteration: 257018
loss: 1.000360131263733,grad_norm: 0.7128130141381366, iteration: 257019
loss: 0.9752060770988464,grad_norm: 0.9999999096568009, iteration: 257020
loss: 1.011602759361267,grad_norm: 0.7535017008760729, iteration: 257021
loss: 1.0010379552841187,grad_norm: 0.9757259094167557, iteration: 257022
loss: 1.019171953201294,grad_norm: 0.9999991770894081, iteration: 257023
loss: 1.0206385850906372,grad_norm: 0.9469669266008164, iteration: 257024
loss: 0.9980053901672363,grad_norm: 0.8236962268219189, iteration: 257025
loss: 0.9952526092529297,grad_norm: 0.899702374711767, iteration: 257026
loss: 1.020849347114563,grad_norm: 0.8950767741612601, iteration: 257027
loss: 0.9812961220741272,grad_norm: 0.999999150201153, iteration: 257028
loss: 1.0038970708847046,grad_norm: 0.9999991198468403, iteration: 257029
loss: 1.0238195657730103,grad_norm: 0.9123774936330129, iteration: 257030
loss: 1.0264756679534912,grad_norm: 0.9999990788728573, iteration: 257031
loss: 1.0248662233352661,grad_norm: 0.8368417475090411, iteration: 257032
loss: 0.9866471290588379,grad_norm: 0.8092138676128237, iteration: 257033
loss: 1.065671682357788,grad_norm: 0.9999992144379335, iteration: 257034
loss: 1.0211400985717773,grad_norm: 0.8540378556777954, iteration: 257035
loss: 0.9956398606300354,grad_norm: 0.999999948810236, iteration: 257036
loss: 0.9946029782295227,grad_norm: 0.9999995414015469, iteration: 257037
loss: 1.0704052448272705,grad_norm: 0.999999180537117, iteration: 257038
loss: 0.9876577258110046,grad_norm: 0.8971330407478387, iteration: 257039
loss: 1.0209455490112305,grad_norm: 0.898922601334511, iteration: 257040
loss: 0.9984953999519348,grad_norm: 0.8962505271403566, iteration: 257041
loss: 1.0081886053085327,grad_norm: 0.9999992857171336, iteration: 257042
loss: 1.0004140138626099,grad_norm: 0.9080205512442032, iteration: 257043
loss: 1.017470121383667,grad_norm: 0.9922683081761757, iteration: 257044
loss: 0.9785565137863159,grad_norm: 0.9999990342748593, iteration: 257045
loss: 1.0040953159332275,grad_norm: 0.9184792973526773, iteration: 257046
loss: 0.976876437664032,grad_norm: 0.9999989241392865, iteration: 257047
loss: 1.0223088264465332,grad_norm: 0.9087934580938836, iteration: 257048
loss: 0.9711453318595886,grad_norm: 0.9600948206210339, iteration: 257049
loss: 1.042509913444519,grad_norm: 0.8902179525908677, iteration: 257050
loss: 1.012324571609497,grad_norm: 0.7983629434842585, iteration: 257051
loss: 1.031825065612793,grad_norm: 0.9454880279691985, iteration: 257052
loss: 0.9982375502586365,grad_norm: 0.8264123026837754, iteration: 257053
loss: 0.9746717214584351,grad_norm: 0.820295907207235, iteration: 257054
loss: 0.9938088059425354,grad_norm: 0.9176776190783711, iteration: 257055
loss: 0.9905555248260498,grad_norm: 0.9933690745891948, iteration: 257056
loss: 0.9977135062217712,grad_norm: 0.9811019424485633, iteration: 257057
loss: 0.9956724643707275,grad_norm: 0.8075623397420946, iteration: 257058
loss: 0.9949432015419006,grad_norm: 0.8215893606560807, iteration: 257059
loss: 0.9890111684799194,grad_norm: 0.9205204486724812, iteration: 257060
loss: 0.9935404658317566,grad_norm: 0.9688249088646591, iteration: 257061
loss: 1.018110990524292,grad_norm: 0.8882520536099301, iteration: 257062
loss: 1.0132306814193726,grad_norm: 0.9999999855497561, iteration: 257063
loss: 1.0791881084442139,grad_norm: 0.9999992600462755, iteration: 257064
loss: 0.9996280670166016,grad_norm: 0.9743187409817026, iteration: 257065
loss: 1.010562539100647,grad_norm: 0.9838240995485307, iteration: 257066
loss: 0.9785463809967041,grad_norm: 0.9494243536452257, iteration: 257067
loss: 0.993004322052002,grad_norm: 0.8855288116772464, iteration: 257068
loss: 0.9654254913330078,grad_norm: 0.8919532136789076, iteration: 257069
loss: 1.0569236278533936,grad_norm: 0.9999998697955197, iteration: 257070
loss: 0.9927477836608887,grad_norm: 0.7985499303933578, iteration: 257071
loss: 0.994739294052124,grad_norm: 0.9999992402365572, iteration: 257072
loss: 1.0067328214645386,grad_norm: 0.9999991666429926, iteration: 257073
loss: 0.9573546648025513,grad_norm: 0.999999090730437, iteration: 257074
loss: 0.9825547337532043,grad_norm: 0.9398442590944341, iteration: 257075
loss: 0.9846077561378479,grad_norm: 0.9802589301915381, iteration: 257076
loss: 1.01670503616333,grad_norm: 0.866472225948173, iteration: 257077
loss: 0.9723641872406006,grad_norm: 0.9513051828602535, iteration: 257078
loss: 0.9700214862823486,grad_norm: 0.9053623396901668, iteration: 257079
loss: 0.9863764643669128,grad_norm: 0.9999991890976269, iteration: 257080
loss: 0.9479706287384033,grad_norm: 0.8092147646044568, iteration: 257081
loss: 0.9979795217514038,grad_norm: 0.7239274691253837, iteration: 257082
loss: 0.9831838011741638,grad_norm: 0.999999036772896, iteration: 257083
loss: 1.0170997381210327,grad_norm: 0.8332410228465886, iteration: 257084
loss: 0.9976620078086853,grad_norm: 0.8778957460817819, iteration: 257085
loss: 0.9946738481521606,grad_norm: 0.8522412885004037, iteration: 257086
loss: 0.9923135638237,grad_norm: 0.8589646890234262, iteration: 257087
loss: 0.9935797452926636,grad_norm: 0.8844150596105846, iteration: 257088
loss: 0.9665650725364685,grad_norm: 0.9999991874015475, iteration: 257089
loss: 0.9777083992958069,grad_norm: 0.871534126216082, iteration: 257090
loss: 1.015655279159546,grad_norm: 0.7042217781573578, iteration: 257091
loss: 0.9770832061767578,grad_norm: 0.8191747249167026, iteration: 257092
loss: 0.9715259671211243,grad_norm: 0.7234821747549486, iteration: 257093
loss: 0.9696901440620422,grad_norm: 0.9177739496750875, iteration: 257094
loss: 1.0420249700546265,grad_norm: 0.8027823159030283, iteration: 257095
loss: 0.9976270198822021,grad_norm: 0.7575159093212994, iteration: 257096
loss: 1.0158714056015015,grad_norm: 0.9999990459138093, iteration: 257097
loss: 0.9856561422348022,grad_norm: 0.8833999322385742, iteration: 257098
loss: 0.9826545119285583,grad_norm: 0.8370733382100816, iteration: 257099
loss: 0.9952856302261353,grad_norm: 0.8870036486232519, iteration: 257100
loss: 0.9599175453186035,grad_norm: 0.9876942391530785, iteration: 257101
loss: 0.9823837280273438,grad_norm: 0.761927372335373, iteration: 257102
loss: 1.0013091564178467,grad_norm: 0.8365072612530973, iteration: 257103
loss: 1.0217843055725098,grad_norm: 0.9903891387543629, iteration: 257104
loss: 0.9840124249458313,grad_norm: 0.8190340194672034, iteration: 257105
loss: 0.9753937721252441,grad_norm: 0.8611552437191226, iteration: 257106
loss: 1.0141804218292236,grad_norm: 0.9564221953806997, iteration: 257107
loss: 0.9864611625671387,grad_norm: 0.8067986510433844, iteration: 257108
loss: 1.001433253288269,grad_norm: 0.7641274000663624, iteration: 257109
loss: 1.0058784484863281,grad_norm: 0.7712709761305094, iteration: 257110
loss: 1.0580278635025024,grad_norm: 0.9999997094899269, iteration: 257111
loss: 1.0126395225524902,grad_norm: 0.862972156736268, iteration: 257112
loss: 0.9982897043228149,grad_norm: 0.999999462375823, iteration: 257113
loss: 1.0252690315246582,grad_norm: 0.9999997370529241, iteration: 257114
loss: 1.0208461284637451,grad_norm: 0.9736048754178753, iteration: 257115
loss: 1.0292115211486816,grad_norm: 0.9999990734304254, iteration: 257116
loss: 1.0378504991531372,grad_norm: 0.9999990101407396, iteration: 257117
loss: 0.9765730500221252,grad_norm: 0.9310598024302473, iteration: 257118
loss: 1.0001966953277588,grad_norm: 0.9727512678419259, iteration: 257119
loss: 0.9779579043388367,grad_norm: 0.7940661793759805, iteration: 257120
loss: 1.007083535194397,grad_norm: 0.8372068600264364, iteration: 257121
loss: 1.0017857551574707,grad_norm: 0.9184494101707312, iteration: 257122
loss: 0.9980677962303162,grad_norm: 0.7791828459594139, iteration: 257123
loss: 0.9833716154098511,grad_norm: 0.895131785755554, iteration: 257124
loss: 1.0129992961883545,grad_norm: 0.9999989520582881, iteration: 257125
loss: 0.99064701795578,grad_norm: 0.9530607009782842, iteration: 257126
loss: 1.0297833681106567,grad_norm: 0.9999993372057299, iteration: 257127
loss: 1.0081264972686768,grad_norm: 0.9908235777143163, iteration: 257128
loss: 0.9827965497970581,grad_norm: 0.8344625844662825, iteration: 257129
loss: 1.0494639873504639,grad_norm: 0.9051679771032004, iteration: 257130
loss: 0.9985744953155518,grad_norm: 0.9541534240428493, iteration: 257131
loss: 0.9992268085479736,grad_norm: 0.9999991408257514, iteration: 257132
loss: 0.99861079454422,grad_norm: 0.7472202270400501, iteration: 257133
loss: 0.9885823130607605,grad_norm: 0.829614402884313, iteration: 257134
loss: 0.9769253134727478,grad_norm: 0.752656394934389, iteration: 257135
loss: 1.0211645364761353,grad_norm: 0.8057046267705423, iteration: 257136
loss: 0.9902136325836182,grad_norm: 0.8161458387221305, iteration: 257137
loss: 1.0178170204162598,grad_norm: 0.9922579930196406, iteration: 257138
loss: 1.052740216255188,grad_norm: 0.9999989458740893, iteration: 257139
loss: 0.9736519455909729,grad_norm: 0.9068807207181815, iteration: 257140
loss: 0.9570347666740417,grad_norm: 0.8202280863549856, iteration: 257141
loss: 0.9730729460716248,grad_norm: 0.9996576804485834, iteration: 257142
loss: 0.96178138256073,grad_norm: 0.9999990870743528, iteration: 257143
loss: 0.9908637404441833,grad_norm: 0.9651994641712285, iteration: 257144
loss: 0.9723642468452454,grad_norm: 0.7774888060405553, iteration: 257145
loss: 1.0227898359298706,grad_norm: 0.7829046978140638, iteration: 257146
loss: 1.0309120416641235,grad_norm: 0.9999991736422474, iteration: 257147
loss: 1.003299593925476,grad_norm: 0.887903809192966, iteration: 257148
loss: 0.9882899522781372,grad_norm: 0.9769404087232033, iteration: 257149
loss: 1.0705842971801758,grad_norm: 0.9999992422407077, iteration: 257150
loss: 0.9748726487159729,grad_norm: 0.9139137205363538, iteration: 257151
loss: 0.9776900410652161,grad_norm: 0.9630034052533889, iteration: 257152
loss: 1.075365662574768,grad_norm: 0.999999232351349, iteration: 257153
loss: 1.0346044301986694,grad_norm: 0.9999989451149401, iteration: 257154
loss: 1.000428318977356,grad_norm: 0.999999162373477, iteration: 257155
loss: 1.0779560804367065,grad_norm: 0.9999997582201177, iteration: 257156
loss: 0.9837270975112915,grad_norm: 0.8258645527770693, iteration: 257157
loss: 0.9807236194610596,grad_norm: 0.999999104610882, iteration: 257158
loss: 0.9796450138092041,grad_norm: 0.8039069034381167, iteration: 257159
loss: 1.0212565660476685,grad_norm: 0.845840184067476, iteration: 257160
loss: 1.03510582447052,grad_norm: 0.9999989237373486, iteration: 257161
loss: 1.030089259147644,grad_norm: 0.990385304139685, iteration: 257162
loss: 0.9890990853309631,grad_norm: 0.9999996415401408, iteration: 257163
loss: 0.9932641983032227,grad_norm: 0.9339497410402754, iteration: 257164
loss: 1.0136466026306152,grad_norm: 0.7843110214978362, iteration: 257165
loss: 0.9809253215789795,grad_norm: 0.9453110638226253, iteration: 257166
loss: 1.0087261199951172,grad_norm: 0.9067837745665288, iteration: 257167
loss: 1.0200438499450684,grad_norm: 0.9999991847978952, iteration: 257168
loss: 1.004251480102539,grad_norm: 0.9999992565300792, iteration: 257169
loss: 1.1225024461746216,grad_norm: 0.9999997459039459, iteration: 257170
loss: 1.0109426975250244,grad_norm: 0.9730425564898033, iteration: 257171
loss: 1.1009093523025513,grad_norm: 0.9999995444495469, iteration: 257172
loss: 0.9919338822364807,grad_norm: 0.8061212122732259, iteration: 257173
loss: 1.0159187316894531,grad_norm: 0.7822816198496558, iteration: 257174
loss: 1.005213975906372,grad_norm: 0.8727814435237339, iteration: 257175
loss: 0.9853466749191284,grad_norm: 0.9295907981085789, iteration: 257176
loss: 1.0210802555084229,grad_norm: 0.8960508841433283, iteration: 257177
loss: 1.0037250518798828,grad_norm: 0.9688333621875325, iteration: 257178
loss: 0.9590063095092773,grad_norm: 0.8758696735618812, iteration: 257179
loss: 1.0100995302200317,grad_norm: 0.9179220115218387, iteration: 257180
loss: 0.9951992034912109,grad_norm: 0.8678359454122749, iteration: 257181
loss: 1.0162838697433472,grad_norm: 0.8702094565661342, iteration: 257182
loss: 1.0190271139144897,grad_norm: 0.9999990486949799, iteration: 257183
loss: 1.009113073348999,grad_norm: 0.8140203040837332, iteration: 257184
loss: 1.1487137079238892,grad_norm: 0.9999997116955831, iteration: 257185
loss: 0.9962934851646423,grad_norm: 0.8638633427730649, iteration: 257186
loss: 0.9656042456626892,grad_norm: 0.9670058194428, iteration: 257187
loss: 1.0085216760635376,grad_norm: 0.7621237194078145, iteration: 257188
loss: 1.0084950923919678,grad_norm: 0.7990186070406967, iteration: 257189
loss: 0.9979643821716309,grad_norm: 0.9135581980445964, iteration: 257190
loss: 1.012996792793274,grad_norm: 0.9999990681843131, iteration: 257191
loss: 0.9904841184616089,grad_norm: 0.9472879413909904, iteration: 257192
loss: 1.0385626554489136,grad_norm: 0.9999995039665165, iteration: 257193
loss: 1.1118834018707275,grad_norm: 0.9999991201553491, iteration: 257194
loss: 1.0322211980819702,grad_norm: 0.9999990605815803, iteration: 257195
loss: 0.979381799697876,grad_norm: 0.8574317892193166, iteration: 257196
loss: 1.0123978853225708,grad_norm: 0.9363814197896527, iteration: 257197
loss: 1.0116534233093262,grad_norm: 0.7782231839344983, iteration: 257198
loss: 1.0234705209732056,grad_norm: 0.8410335344128629, iteration: 257199
loss: 1.0453516244888306,grad_norm: 0.8267811566759129, iteration: 257200
loss: 1.0260010957717896,grad_norm: 0.9142612996432331, iteration: 257201
loss: 0.9811891913414001,grad_norm: 0.9999996351263032, iteration: 257202
loss: 0.9966545701026917,grad_norm: 0.9955769628259873, iteration: 257203
loss: 0.9342169761657715,grad_norm: 0.8525638579304506, iteration: 257204
loss: 0.9955576062202454,grad_norm: 0.9808214837113693, iteration: 257205
loss: 1.0083688497543335,grad_norm: 0.8059159947417929, iteration: 257206
loss: 0.9821299910545349,grad_norm: 0.8987601772177114, iteration: 257207
loss: 1.001129150390625,grad_norm: 0.9999991258009322, iteration: 257208
loss: 1.1805115938186646,grad_norm: 0.9999999338929758, iteration: 257209
loss: 0.980543315410614,grad_norm: 0.8190995329071995, iteration: 257210
loss: 1.0082627534866333,grad_norm: 0.9355700285139206, iteration: 257211
loss: 0.9918686151504517,grad_norm: 0.9999990019314381, iteration: 257212
loss: 0.9797588586807251,grad_norm: 0.8640899685083243, iteration: 257213
loss: 0.9818733930587769,grad_norm: 0.7977681456645762, iteration: 257214
loss: 1.2272510528564453,grad_norm: 0.9999991758777622, iteration: 257215
loss: 1.0331941843032837,grad_norm: 0.93429987899206, iteration: 257216
loss: 0.9975066781044006,grad_norm: 0.8748568966388087, iteration: 257217
loss: 1.0458502769470215,grad_norm: 0.9999990908056903, iteration: 257218
loss: 1.037047028541565,grad_norm: 0.8917862857471907, iteration: 257219
loss: 1.0186307430267334,grad_norm: 0.9080584180703837, iteration: 257220
loss: 0.9906703233718872,grad_norm: 0.7780530286777394, iteration: 257221
loss: 0.9761897921562195,grad_norm: 0.9270227668317275, iteration: 257222
loss: 0.9863432049751282,grad_norm: 0.9999997249777887, iteration: 257223
loss: 0.9668244123458862,grad_norm: 0.839635537766665, iteration: 257224
loss: 0.9634256362915039,grad_norm: 0.9581947040525784, iteration: 257225
loss: 1.1560168266296387,grad_norm: 0.999999917434142, iteration: 257226
loss: 1.0004299879074097,grad_norm: 0.9043222540815709, iteration: 257227
loss: 0.9657301306724548,grad_norm: 0.987290144799331, iteration: 257228
loss: 1.0400434732437134,grad_norm: 0.7207901716091183, iteration: 257229
loss: 1.001285195350647,grad_norm: 0.9999990129117532, iteration: 257230
loss: 0.996070146560669,grad_norm: 0.9528607187400792, iteration: 257231
loss: 0.9950185418128967,grad_norm: 0.848787478026625, iteration: 257232
loss: 1.0119459629058838,grad_norm: 0.8220300323483662, iteration: 257233
loss: 1.000927448272705,grad_norm: 0.9619120047065938, iteration: 257234
loss: 1.0075688362121582,grad_norm: 0.8025671481980515, iteration: 257235
loss: 0.9886147379875183,grad_norm: 0.8379882000800151, iteration: 257236
loss: 0.9799760580062866,grad_norm: 0.9999991662276978, iteration: 257237
loss: 0.9751737713813782,grad_norm: 0.8138484168979065, iteration: 257238
loss: 1.0041354894638062,grad_norm: 0.8510039781006163, iteration: 257239
loss: 0.9444537162780762,grad_norm: 0.8890475376227456, iteration: 257240
loss: 1.052690029144287,grad_norm: 0.8551874682733005, iteration: 257241
loss: 1.0173825025558472,grad_norm: 0.9999998388272409, iteration: 257242
loss: 0.9980136156082153,grad_norm: 0.9050660746728357, iteration: 257243
loss: 1.035132646560669,grad_norm: 0.7860018021545487, iteration: 257244
loss: 1.003006100654602,grad_norm: 0.9282654574567758, iteration: 257245
loss: 1.076664686203003,grad_norm: 0.9999992307815112, iteration: 257246
loss: 0.974784255027771,grad_norm: 0.9674088980274591, iteration: 257247
loss: 0.9477291107177734,grad_norm: 0.8263625706207407, iteration: 257248
loss: 0.9818676114082336,grad_norm: 0.9999991783592191, iteration: 257249
loss: 1.0168602466583252,grad_norm: 0.8676769293644022, iteration: 257250
loss: 1.006049633026123,grad_norm: 0.9999993166378193, iteration: 257251
loss: 1.0335739850997925,grad_norm: 0.9068950026211334, iteration: 257252
loss: 0.9998222589492798,grad_norm: 0.8903659006847485, iteration: 257253
loss: 0.9639505743980408,grad_norm: 0.6969317312717107, iteration: 257254
loss: 1.0182479619979858,grad_norm: 0.9999991099131927, iteration: 257255
loss: 0.9678884744644165,grad_norm: 0.993437117294022, iteration: 257256
loss: 1.0104668140411377,grad_norm: 0.9166299676027948, iteration: 257257
loss: 1.0004314184188843,grad_norm: 0.9756306862179114, iteration: 257258
loss: 1.0161552429199219,grad_norm: 0.7988016552347014, iteration: 257259
loss: 0.9709011316299438,grad_norm: 0.9204683291405658, iteration: 257260
loss: 1.0288240909576416,grad_norm: 0.9325068678642333, iteration: 257261
loss: 1.0143922567367554,grad_norm: 0.9048845285809862, iteration: 257262
loss: 0.9889842867851257,grad_norm: 0.9632618002162813, iteration: 257263
loss: 0.9427309632301331,grad_norm: 0.7976594981989581, iteration: 257264
loss: 1.0015997886657715,grad_norm: 0.869579304575656, iteration: 257265
loss: 1.023374080657959,grad_norm: 0.9741634196282845, iteration: 257266
loss: 0.9545029401779175,grad_norm: 0.884137237160996, iteration: 257267
loss: 1.0132609605789185,grad_norm: 0.9999995462510477, iteration: 257268
loss: 0.9891071319580078,grad_norm: 0.8367291296639611, iteration: 257269
loss: 1.0613731145858765,grad_norm: 0.999998989183604, iteration: 257270
loss: 1.046852469444275,grad_norm: 0.9999997175633998, iteration: 257271
loss: 1.0145795345306396,grad_norm: 0.871062599084736, iteration: 257272
loss: 0.9933717846870422,grad_norm: 0.9395186428418462, iteration: 257273
loss: 0.9527022242546082,grad_norm: 0.9009539062166942, iteration: 257274
loss: 1.0616114139556885,grad_norm: 0.9999992464275862, iteration: 257275
loss: 0.975714921951294,grad_norm: 0.763891758109406, iteration: 257276
loss: 1.0315736532211304,grad_norm: 0.9999991030501227, iteration: 257277
loss: 0.989990234375,grad_norm: 0.8354801831991212, iteration: 257278
loss: 1.0586236715316772,grad_norm: 0.9430371508647463, iteration: 257279
loss: 1.0167412757873535,grad_norm: 0.8700557664190256, iteration: 257280
loss: 0.9830110669136047,grad_norm: 0.9117852502171373, iteration: 257281
loss: 1.0184427499771118,grad_norm: 0.9119831414696982, iteration: 257282
loss: 0.983144223690033,grad_norm: 0.7701357152283405, iteration: 257283
loss: 1.006001591682434,grad_norm: 0.9134737405388943, iteration: 257284
loss: 1.0187431573867798,grad_norm: 0.9999997065431854, iteration: 257285
loss: 0.9849910736083984,grad_norm: 0.954998835056061, iteration: 257286
loss: 1.0264415740966797,grad_norm: 0.9999990384372116, iteration: 257287
loss: 1.0168052911758423,grad_norm: 0.9999991642198991, iteration: 257288
loss: 1.0183080434799194,grad_norm: 0.8231511180447728, iteration: 257289
loss: 0.9952954053878784,grad_norm: 0.7890008148337715, iteration: 257290
loss: 1.0322808027267456,grad_norm: 0.9999999539227344, iteration: 257291
loss: 1.033228874206543,grad_norm: 0.8633892095121541, iteration: 257292
loss: 1.039486050605774,grad_norm: 0.9947982986812611, iteration: 257293
loss: 1.0114372968673706,grad_norm: 0.9313047373090189, iteration: 257294
loss: 1.0585923194885254,grad_norm: 0.9999994526358841, iteration: 257295
loss: 1.1049442291259766,grad_norm: 0.8765775034346343, iteration: 257296
loss: 1.0177128314971924,grad_norm: 0.9928982448435588, iteration: 257297
loss: 0.9966657757759094,grad_norm: 0.9999990853046209, iteration: 257298
loss: 1.0152623653411865,grad_norm: 0.9999992365762981, iteration: 257299
loss: 1.0426594018936157,grad_norm: 0.999999364565833, iteration: 257300
loss: 0.9910582900047302,grad_norm: 0.82088178162841, iteration: 257301
loss: 0.9774270057678223,grad_norm: 0.8601730626304276, iteration: 257302
loss: 1.000863790512085,grad_norm: 0.9999994736854413, iteration: 257303
loss: 0.9832249879837036,grad_norm: 0.844773087646829, iteration: 257304
loss: 1.0314695835113525,grad_norm: 0.9799857625765495, iteration: 257305
loss: 1.0641839504241943,grad_norm: 0.9999992090926214, iteration: 257306
loss: 0.9972749948501587,grad_norm: 0.9999995530005791, iteration: 257307
loss: 1.0521554946899414,grad_norm: 0.9999989249461941, iteration: 257308
loss: 1.0239490270614624,grad_norm: 0.9999998633905264, iteration: 257309
loss: 1.0241639614105225,grad_norm: 0.9999991389533807, iteration: 257310
loss: 0.9919571876525879,grad_norm: 0.9999992986885229, iteration: 257311
loss: 1.0001405477523804,grad_norm: 0.931664531428051, iteration: 257312
loss: 1.1589486598968506,grad_norm: 0.9999999865988753, iteration: 257313
loss: 1.0303635597229004,grad_norm: 0.999999605303553, iteration: 257314
loss: 1.020329475402832,grad_norm: 0.9613058593195477, iteration: 257315
loss: 1.0728198289871216,grad_norm: 0.9526686979768905, iteration: 257316
loss: 1.031652808189392,grad_norm: 0.9144238691989852, iteration: 257317
loss: 0.9862819910049438,grad_norm: 0.8839071029063463, iteration: 257318
loss: 0.9974858164787292,grad_norm: 0.84850895362004, iteration: 257319
loss: 0.9853766560554504,grad_norm: 0.8900314559809355, iteration: 257320
loss: 1.0090100765228271,grad_norm: 0.9427581873762257, iteration: 257321
loss: 0.9685788154602051,grad_norm: 0.8880544221057263, iteration: 257322
loss: 0.9809882640838623,grad_norm: 0.9999996401160788, iteration: 257323
loss: 0.9995616674423218,grad_norm: 0.9999996416704986, iteration: 257324
loss: 1.022788405418396,grad_norm: 0.9999990403072462, iteration: 257325
loss: 1.0939717292785645,grad_norm: 0.8143202892634062, iteration: 257326
loss: 1.0456271171569824,grad_norm: 0.8303226643566847, iteration: 257327
loss: 1.0465725660324097,grad_norm: 0.9315382796017144, iteration: 257328
loss: 1.0293800830841064,grad_norm: 0.9430685123511746, iteration: 257329
loss: 0.9977248907089233,grad_norm: 0.9947811435723511, iteration: 257330
loss: 0.993964672088623,grad_norm: 0.7977610300860485, iteration: 257331
loss: 1.0061475038528442,grad_norm: 0.988427610448836, iteration: 257332
loss: 1.007256031036377,grad_norm: 0.8631111159294999, iteration: 257333
loss: 1.216848611831665,grad_norm: 0.9999994839324863, iteration: 257334
loss: 0.9733344316482544,grad_norm: 0.999999182410245, iteration: 257335
loss: 1.0049999952316284,grad_norm: 0.8308996451483945, iteration: 257336
loss: 1.0305612087249756,grad_norm: 0.9999995894510155, iteration: 257337
loss: 1.0373908281326294,grad_norm: 0.9999990223071323, iteration: 257338
loss: 0.9791964292526245,grad_norm: 0.7810368112127155, iteration: 257339
loss: 1.00897216796875,grad_norm: 0.9828862275973778, iteration: 257340
loss: 1.1720081567764282,grad_norm: 0.9999999644045217, iteration: 257341
loss: 1.0021462440490723,grad_norm: 0.9148278098000249, iteration: 257342
loss: 1.0533759593963623,grad_norm: 0.9999991637716523, iteration: 257343
loss: 1.054748773574829,grad_norm: 0.9999997607919614, iteration: 257344
loss: 1.0758572816848755,grad_norm: 0.907529009906339, iteration: 257345
loss: 1.023154616355896,grad_norm: 0.9999995047212522, iteration: 257346
loss: 1.0114084482192993,grad_norm: 0.8532455496142792, iteration: 257347
loss: 1.0109480619430542,grad_norm: 0.8120761312955153, iteration: 257348
loss: 1.0100740194320679,grad_norm: 0.9999990180279604, iteration: 257349
loss: 1.004876971244812,grad_norm: 0.8770267084397634, iteration: 257350
loss: 0.9729116559028625,grad_norm: 0.8023117285102329, iteration: 257351
loss: 1.012925148010254,grad_norm: 0.9273179545958118, iteration: 257352
loss: 0.9587222933769226,grad_norm: 0.86544093236919, iteration: 257353
loss: 1.0003114938735962,grad_norm: 0.960189126889905, iteration: 257354
loss: 1.0024898052215576,grad_norm: 0.9048391897624428, iteration: 257355
loss: 1.0022037029266357,grad_norm: 0.9189556809571975, iteration: 257356
loss: 1.0005310773849487,grad_norm: 0.9662924676315388, iteration: 257357
loss: 0.9579170346260071,grad_norm: 0.985670949473282, iteration: 257358
loss: 1.0319768190383911,grad_norm: 0.9999992144742578, iteration: 257359
loss: 0.9921812415122986,grad_norm: 0.959979661706928, iteration: 257360
loss: 0.9845277667045593,grad_norm: 0.8474833383206201, iteration: 257361
loss: 1.0282925367355347,grad_norm: 0.9999991735370456, iteration: 257362
loss: 1.0078761577606201,grad_norm: 0.8825389897454035, iteration: 257363
loss: 1.0140653848648071,grad_norm: 0.9037679114506797, iteration: 257364
loss: 1.008276104927063,grad_norm: 0.7879160550444491, iteration: 257365
loss: 1.0114315748214722,grad_norm: 0.9999998969066992, iteration: 257366
loss: 1.0090298652648926,grad_norm: 0.9817842658159054, iteration: 257367
loss: 0.971928060054779,grad_norm: 0.8704674015936545, iteration: 257368
loss: 1.026029348373413,grad_norm: 0.9362182301251366, iteration: 257369
loss: 1.012478232383728,grad_norm: 0.9999989148996591, iteration: 257370
loss: 0.9989019632339478,grad_norm: 0.8602303899172127, iteration: 257371
loss: 1.0252641439437866,grad_norm: 0.9360898447750042, iteration: 257372
loss: 0.9952226281166077,grad_norm: 0.9999991311346742, iteration: 257373
loss: 1.0039403438568115,grad_norm: 0.969376756898009, iteration: 257374
loss: 1.0102092027664185,grad_norm: 0.9999993041822367, iteration: 257375
loss: 1.0074894428253174,grad_norm: 0.9385044012221623, iteration: 257376
loss: 1.0016789436340332,grad_norm: 0.9999990735298833, iteration: 257377
loss: 1.0189776420593262,grad_norm: 0.9999990746752524, iteration: 257378
loss: 1.0091526508331299,grad_norm: 0.767529897870174, iteration: 257379
loss: 0.9799529910087585,grad_norm: 0.9858270532010723, iteration: 257380
loss: 1.0220876932144165,grad_norm: 0.939320630891535, iteration: 257381
loss: 0.9556472301483154,grad_norm: 0.8882508822701918, iteration: 257382
loss: 0.9895527362823486,grad_norm: 0.9077500747546757, iteration: 257383
loss: 0.9901643395423889,grad_norm: 0.917676277521254, iteration: 257384
loss: 1.0154807567596436,grad_norm: 0.892868311901113, iteration: 257385
loss: 0.9949330687522888,grad_norm: 0.9630307973648091, iteration: 257386
loss: 1.0033353567123413,grad_norm: 0.7653799925019928, iteration: 257387
loss: 1.035257339477539,grad_norm: 0.999999884936648, iteration: 257388
loss: 1.013970136642456,grad_norm: 0.8532558109784163, iteration: 257389
loss: 0.9723171591758728,grad_norm: 0.8892518805005638, iteration: 257390
loss: 1.0581555366516113,grad_norm: 0.9999992163396663, iteration: 257391
loss: 0.9872196912765503,grad_norm: 0.9304206463497012, iteration: 257392
loss: 1.0029488801956177,grad_norm: 0.9788791983524476, iteration: 257393
loss: 1.0217416286468506,grad_norm: 0.9999995396017152, iteration: 257394
loss: 0.9911404848098755,grad_norm: 0.7891355980881143, iteration: 257395
loss: 1.0146435499191284,grad_norm: 0.7210772866337799, iteration: 257396
loss: 0.9543842077255249,grad_norm: 0.8551571946711891, iteration: 257397
loss: 0.9913907051086426,grad_norm: 0.8144909229882525, iteration: 257398
loss: 0.9922189712524414,grad_norm: 0.9999991223664513, iteration: 257399
loss: 1.0482975244522095,grad_norm: 0.9271813968769916, iteration: 257400
loss: 1.020121693611145,grad_norm: 0.9108992538083999, iteration: 257401
loss: 0.9719485640525818,grad_norm: 0.999999484280381, iteration: 257402
loss: 1.0758134126663208,grad_norm: 0.9999991113204314, iteration: 257403
loss: 1.0367205142974854,grad_norm: 0.985822479088496, iteration: 257404
loss: 1.004277229309082,grad_norm: 0.9999991281476623, iteration: 257405
loss: 1.0032891035079956,grad_norm: 0.9999990891989688, iteration: 257406
loss: 1.0329725742340088,grad_norm: 0.8775925298771633, iteration: 257407
loss: 1.1734832525253296,grad_norm: 1.0000001019581455, iteration: 257408
loss: 0.9985322952270508,grad_norm: 0.9525560747214941, iteration: 257409
loss: 0.9850261211395264,grad_norm: 0.9316775118766661, iteration: 257410
loss: 0.9837759733200073,grad_norm: 0.889102911906224, iteration: 257411
loss: 1.1999074220657349,grad_norm: 0.9999998769024784, iteration: 257412
loss: 1.026107907295227,grad_norm: 0.8490001073053961, iteration: 257413
loss: 1.0364686250686646,grad_norm: 0.7433997435296786, iteration: 257414
loss: 1.0480821132659912,grad_norm: 0.7823746884523153, iteration: 257415
loss: 1.0062955617904663,grad_norm: 0.8445353634496384, iteration: 257416
loss: 0.982127845287323,grad_norm: 0.7421386259672257, iteration: 257417
loss: 1.0121020078659058,grad_norm: 0.8963332310744715, iteration: 257418
loss: 1.02427339553833,grad_norm: 0.8560672309174311, iteration: 257419
loss: 0.9554369449615479,grad_norm: 0.7712101932575945, iteration: 257420
loss: 1.0504213571548462,grad_norm: 0.9999991622485724, iteration: 257421
loss: 1.0251071453094482,grad_norm: 0.9999996972061699, iteration: 257422
loss: 1.0474755764007568,grad_norm: 0.9999990738272375, iteration: 257423
loss: 1.0421830415725708,grad_norm: 0.6959279991691616, iteration: 257424
loss: 0.9730565547943115,grad_norm: 0.874350349999306, iteration: 257425
loss: 1.028264045715332,grad_norm: 0.9999991810447163, iteration: 257426
loss: 0.9806345105171204,grad_norm: 0.9145148410470554, iteration: 257427
loss: 1.0036629438400269,grad_norm: 0.8702802549998047, iteration: 257428
loss: 0.9750264286994934,grad_norm: 0.9505050472095193, iteration: 257429
loss: 0.9811077117919922,grad_norm: 0.9999993955192311, iteration: 257430
loss: 1.140717625617981,grad_norm: 0.9999993852275105, iteration: 257431
loss: 0.9771212339401245,grad_norm: 0.999999374894656, iteration: 257432
loss: 1.0616514682769775,grad_norm: 0.9999998602639371, iteration: 257433
loss: 0.9755754470825195,grad_norm: 0.9999990346865206, iteration: 257434
loss: 1.0048284530639648,grad_norm: 0.8589975442526955, iteration: 257435
loss: 1.1167216300964355,grad_norm: 0.9999994152763607, iteration: 257436
loss: 1.0722699165344238,grad_norm: 0.9999997746051545, iteration: 257437
loss: 0.9461179375648499,grad_norm: 0.8355379857178679, iteration: 257438
loss: 1.0730944871902466,grad_norm: 0.9999998998295037, iteration: 257439
loss: 1.0866549015045166,grad_norm: 0.9955140618450231, iteration: 257440
loss: 1.000170111656189,grad_norm: 0.7893575440948453, iteration: 257441
loss: 0.9862291216850281,grad_norm: 0.9558222312714888, iteration: 257442
loss: 1.0061229467391968,grad_norm: 0.9999998098017464, iteration: 257443
loss: 1.020339846611023,grad_norm: 0.9996100469738887, iteration: 257444
loss: 1.079908847808838,grad_norm: 0.9750022010543278, iteration: 257445
loss: 0.9868643283843994,grad_norm: 0.7957643279362436, iteration: 257446
loss: 0.9999122619628906,grad_norm: 0.9458287795200484, iteration: 257447
loss: 1.018272042274475,grad_norm: 0.9153885003781297, iteration: 257448
loss: 1.0394935607910156,grad_norm: 0.9032420320275405, iteration: 257449
loss: 0.9720597863197327,grad_norm: 0.9533575553394529, iteration: 257450
loss: 1.0296763181686401,grad_norm: 0.9999990438567593, iteration: 257451
loss: 1.0031483173370361,grad_norm: 0.9999991187717803, iteration: 257452
loss: 1.0013874769210815,grad_norm: 0.9999990763103879, iteration: 257453
loss: 0.9836342334747314,grad_norm: 0.9999994125371386, iteration: 257454
loss: 0.9806079268455505,grad_norm: 0.7861604060023505, iteration: 257455
loss: 1.0166339874267578,grad_norm: 0.8978430600690672, iteration: 257456
loss: 1.0052032470703125,grad_norm: 0.9999990595000536, iteration: 257457
loss: 1.032081127166748,grad_norm: 0.9999990686188285, iteration: 257458
loss: 1.0186916589736938,grad_norm: 0.8106605584355006, iteration: 257459
loss: 1.0180025100708008,grad_norm: 0.7761642522443651, iteration: 257460
loss: 1.0812129974365234,grad_norm: 0.8916881998500489, iteration: 257461
loss: 0.9826820492744446,grad_norm: 0.97870801790609, iteration: 257462
loss: 0.9917227029800415,grad_norm: 0.9370648640091201, iteration: 257463
loss: 0.999864399433136,grad_norm: 0.7937572973153697, iteration: 257464
loss: 1.0208600759506226,grad_norm: 0.9999990485823346, iteration: 257465
loss: 0.990439236164093,grad_norm: 0.9281498309828409, iteration: 257466
loss: 1.0493510961532593,grad_norm: 0.9999991249102015, iteration: 257467
loss: 1.009346842765808,grad_norm: 0.7796499825386791, iteration: 257468
loss: 1.0166912078857422,grad_norm: 0.9681402508081186, iteration: 257469
loss: 1.0081400871276855,grad_norm: 0.8592680082821484, iteration: 257470
loss: 1.0182031393051147,grad_norm: 0.7905927318133809, iteration: 257471
loss: 0.9808303117752075,grad_norm: 0.9328293209506316, iteration: 257472
loss: 1.0162149667739868,grad_norm: 0.7832682101067067, iteration: 257473
loss: 0.9639227986335754,grad_norm: 0.7935628385654826, iteration: 257474
loss: 0.9890999794006348,grad_norm: 0.9999993132248846, iteration: 257475
loss: 1.0224735736846924,grad_norm: 0.8987346842340278, iteration: 257476
loss: 0.9716150760650635,grad_norm: 0.9111412380556136, iteration: 257477
loss: 1.0278137922286987,grad_norm: 0.9999990814417592, iteration: 257478
loss: 0.9890649914741516,grad_norm: 0.8397995626119582, iteration: 257479
loss: 1.0148755311965942,grad_norm: 0.999999837475069, iteration: 257480
loss: 1.053215503692627,grad_norm: 0.9651048000244591, iteration: 257481
loss: 1.0157614946365356,grad_norm: 0.9999990036666291, iteration: 257482
loss: 1.0164562463760376,grad_norm: 0.8402373563571807, iteration: 257483
loss: 1.0218911170959473,grad_norm: 0.9999998485211082, iteration: 257484
loss: 1.0173958539962769,grad_norm: 0.8843212788085497, iteration: 257485
loss: 1.0055714845657349,grad_norm: 0.8161660925718024, iteration: 257486
loss: 0.9885411262512207,grad_norm: 0.7714796199398017, iteration: 257487
loss: 0.9878063201904297,grad_norm: 0.9999990840315374, iteration: 257488
loss: 1.0469175577163696,grad_norm: 0.9126236941292958, iteration: 257489
loss: 0.9969671368598938,grad_norm: 0.9999992456020073, iteration: 257490
loss: 0.9868130087852478,grad_norm: 0.9566493777958899, iteration: 257491
loss: 0.9936551451683044,grad_norm: 0.7940115746136909, iteration: 257492
loss: 1.0276259183883667,grad_norm: 0.8523469312039653, iteration: 257493
loss: 1.0033398866653442,grad_norm: 0.999999235499017, iteration: 257494
loss: 1.0152784585952759,grad_norm: 0.8305689732824545, iteration: 257495
loss: 1.0264716148376465,grad_norm: 0.9999990504823477, iteration: 257496
loss: 1.023921012878418,grad_norm: 0.9999996295285593, iteration: 257497
loss: 0.9966470003128052,grad_norm: 0.999998988579321, iteration: 257498
loss: 0.993126630783081,grad_norm: 0.9111328276960377, iteration: 257499
loss: 0.9664428234100342,grad_norm: 0.9999992076646222, iteration: 257500
loss: 0.9866113066673279,grad_norm: 0.8633347729058412, iteration: 257501
loss: 1.0553414821624756,grad_norm: 0.9999994935904101, iteration: 257502
loss: 0.9993553757667542,grad_norm: 0.9649067813039299, iteration: 257503
loss: 1.0278257131576538,grad_norm: 0.9999996171511965, iteration: 257504
loss: 1.0294363498687744,grad_norm: 0.9999999996875809, iteration: 257505
loss: 1.0113308429718018,grad_norm: 0.8996991133919735, iteration: 257506
loss: 1.127727746963501,grad_norm: 0.9999998351321033, iteration: 257507
loss: 0.9832800030708313,grad_norm: 0.8831957403676194, iteration: 257508
loss: 0.9809685349464417,grad_norm: 0.943625992208369, iteration: 257509
loss: 0.9984705448150635,grad_norm: 0.9093219318887176, iteration: 257510
loss: 0.9923574328422546,grad_norm: 0.9807498679384538, iteration: 257511
loss: 1.0884194374084473,grad_norm: 0.8474820380925233, iteration: 257512
loss: 0.9383681416511536,grad_norm: 0.9255912928856546, iteration: 257513
loss: 0.9925673604011536,grad_norm: 0.8117338350913007, iteration: 257514
loss: 1.0354580879211426,grad_norm: 0.9999999348389825, iteration: 257515
loss: 1.059554934501648,grad_norm: 0.9999994733229214, iteration: 257516
loss: 1.0227677822113037,grad_norm: 0.9999991930880782, iteration: 257517
loss: 1.008317470550537,grad_norm: 0.7493714108176315, iteration: 257518
loss: 1.0046815872192383,grad_norm: 0.9999992409699835, iteration: 257519
loss: 0.9806877374649048,grad_norm: 0.9493940627793703, iteration: 257520
loss: 1.0266354084014893,grad_norm: 0.9999991019235582, iteration: 257521
loss: 1.0037113428115845,grad_norm: 0.8573537752079865, iteration: 257522
loss: 0.9912095069885254,grad_norm: 0.8135368103502583, iteration: 257523
loss: 1.0214307308197021,grad_norm: 0.9999991492507151, iteration: 257524
loss: 1.0028436183929443,grad_norm: 0.8216170600334074, iteration: 257525
loss: 1.0213022232055664,grad_norm: 0.8257722884604771, iteration: 257526
loss: 0.9972203373908997,grad_norm: 0.8774559630246778, iteration: 257527
loss: 0.9694859981536865,grad_norm: 0.9601334012076604, iteration: 257528
loss: 0.9882274866104126,grad_norm: 0.7575315624348836, iteration: 257529
loss: 1.1306672096252441,grad_norm: 0.9999994298571314, iteration: 257530
loss: 1.0205998420715332,grad_norm: 0.9999990484073026, iteration: 257531
loss: 0.9886879324913025,grad_norm: 0.8977361802095113, iteration: 257532
loss: 0.9959298372268677,grad_norm: 0.9108134843702962, iteration: 257533
loss: 1.0100646018981934,grad_norm: 0.9923921126041535, iteration: 257534
loss: 1.0359959602355957,grad_norm: 0.9999992737527879, iteration: 257535
loss: 0.9878215789794922,grad_norm: 0.884908462630797, iteration: 257536
loss: 1.0069690942764282,grad_norm: 0.8244799269661216, iteration: 257537
loss: 1.0063011646270752,grad_norm: 0.8518968264387291, iteration: 257538
loss: 1.039246678352356,grad_norm: 0.8661758266511562, iteration: 257539
loss: 1.005367398262024,grad_norm: 0.7869626468056132, iteration: 257540
loss: 1.0106028318405151,grad_norm: 0.7602497403425341, iteration: 257541
loss: 1.0069876909255981,grad_norm: 0.9999991004879298, iteration: 257542
loss: 0.9957943558692932,grad_norm: 0.8389367969397606, iteration: 257543
loss: 0.9883585572242737,grad_norm: 0.999999168317289, iteration: 257544
loss: 1.0211902856826782,grad_norm: 0.8326852731722509, iteration: 257545
loss: 0.997833251953125,grad_norm: 0.8314526593588959, iteration: 257546
loss: 1.022828459739685,grad_norm: 0.9999993094280427, iteration: 257547
loss: 0.9955214858055115,grad_norm: 0.9999991673824151, iteration: 257548
loss: 0.9771727323532104,grad_norm: 0.837146077014029, iteration: 257549
loss: 0.9658972024917603,grad_norm: 0.9713132586263129, iteration: 257550
loss: 1.041555643081665,grad_norm: 0.9999994762267486, iteration: 257551
loss: 1.0250877141952515,grad_norm: 0.892606625373366, iteration: 257552
loss: 0.9672123789787292,grad_norm: 0.9999989040614944, iteration: 257553
loss: 1.0360225439071655,grad_norm: 0.9999999502786334, iteration: 257554
loss: 1.006860375404358,grad_norm: 0.8248299253830678, iteration: 257555
loss: 1.0324901342391968,grad_norm: 0.916220907013839, iteration: 257556
loss: 0.9960204362869263,grad_norm: 0.858877272626752, iteration: 257557
loss: 1.0190324783325195,grad_norm: 0.9006491838246093, iteration: 257558
loss: 1.0540555715560913,grad_norm: 0.9779539245939912, iteration: 257559
loss: 0.9677178263664246,grad_norm: 0.9795557321169569, iteration: 257560
loss: 1.0399925708770752,grad_norm: 0.9999995066432757, iteration: 257561
loss: 1.0093433856964111,grad_norm: 0.999999037856051, iteration: 257562
loss: 0.9706321954727173,grad_norm: 0.9047929827695587, iteration: 257563
loss: 0.9982012510299683,grad_norm: 0.808228173026975, iteration: 257564
loss: 0.9967427849769592,grad_norm: 0.9107526789768656, iteration: 257565
loss: 0.978811502456665,grad_norm: 0.9818695137103429, iteration: 257566
loss: 0.9992682337760925,grad_norm: 0.8196061923386577, iteration: 257567
loss: 1.0095198154449463,grad_norm: 0.8087128511441561, iteration: 257568
loss: 0.9734522104263306,grad_norm: 0.8157594119937916, iteration: 257569
loss: 0.9637073278427124,grad_norm: 0.9999990755580643, iteration: 257570
loss: 1.05408775806427,grad_norm: 0.908858658365588, iteration: 257571
loss: 1.1013684272766113,grad_norm: 0.9999999186923821, iteration: 257572
loss: 1.055095911026001,grad_norm: 0.9044094486218174, iteration: 257573
loss: 1.2242755889892578,grad_norm: 0.999999665697314, iteration: 257574
loss: 1.066426396369934,grad_norm: 0.9999996703165299, iteration: 257575
loss: 1.009933352470398,grad_norm: 0.8115989802953607, iteration: 257576
loss: 1.0687018632888794,grad_norm: 0.9999995089666529, iteration: 257577
loss: 0.9621301889419556,grad_norm: 0.9231164264750874, iteration: 257578
loss: 1.0012938976287842,grad_norm: 0.9494210242034571, iteration: 257579
loss: 1.0033705234527588,grad_norm: 0.8597544509875825, iteration: 257580
loss: 1.0339919328689575,grad_norm: 0.999998927949548, iteration: 257581
loss: 0.9768267869949341,grad_norm: 0.9772534417513877, iteration: 257582
loss: 1.0205193758010864,grad_norm: 0.9252968626969834, iteration: 257583
loss: 1.0104914903640747,grad_norm: 0.8584296865540877, iteration: 257584
loss: 1.0850605964660645,grad_norm: 0.9999989433981039, iteration: 257585
loss: 1.0510838031768799,grad_norm: 0.9999991118827931, iteration: 257586
loss: 0.9895826578140259,grad_norm: 0.8670930499628627, iteration: 257587
loss: 1.010292649269104,grad_norm: 0.9147208126046026, iteration: 257588
loss: 0.9873887896537781,grad_norm: 0.8068068935062678, iteration: 257589
loss: 0.9933133721351624,grad_norm: 0.8383374376699787, iteration: 257590
loss: 0.994683027267456,grad_norm: 0.8942110830899982, iteration: 257591
loss: 1.0066235065460205,grad_norm: 0.9022454240724168, iteration: 257592
loss: 1.018121600151062,grad_norm: 0.9999991773107251, iteration: 257593
loss: 1.0262898206710815,grad_norm: 1.0000000201061559, iteration: 257594
loss: 0.9788649678230286,grad_norm: 0.9534625414511928, iteration: 257595
loss: 0.9803063869476318,grad_norm: 0.8744250084903619, iteration: 257596
loss: 0.9535685777664185,grad_norm: 0.9189135953982174, iteration: 257597
loss: 0.9635990262031555,grad_norm: 0.789206314305841, iteration: 257598
loss: 1.021376132965088,grad_norm: 0.9106157004059603, iteration: 257599
loss: 1.0621658563613892,grad_norm: 0.9061863698213418, iteration: 257600
loss: 0.9890473484992981,grad_norm: 0.9999992355879764, iteration: 257601
loss: 1.0192646980285645,grad_norm: 0.9999992799708118, iteration: 257602
loss: 0.9784797430038452,grad_norm: 0.9278001598611185, iteration: 257603
loss: 1.0034301280975342,grad_norm: 0.999999088445284, iteration: 257604
loss: 0.9629876613616943,grad_norm: 0.8637467476841013, iteration: 257605
loss: 1.0295443534851074,grad_norm: 0.9277295172154046, iteration: 257606
loss: 1.046318769454956,grad_norm: 0.9999993112264436, iteration: 257607
loss: 0.9998922348022461,grad_norm: 0.9999991201950953, iteration: 257608
loss: 0.9954025149345398,grad_norm: 0.9999989504107164, iteration: 257609
loss: 1.0997878313064575,grad_norm: 0.9999997192197226, iteration: 257610
loss: 1.0043420791625977,grad_norm: 0.9252804715700432, iteration: 257611
loss: 0.9897587299346924,grad_norm: 0.9999998398180416, iteration: 257612
loss: 1.02943754196167,grad_norm: 0.9999993701340455, iteration: 257613
loss: 1.012019395828247,grad_norm: 0.9999990873560296, iteration: 257614
loss: 0.9828354716300964,grad_norm: 0.7795319940968033, iteration: 257615
loss: 0.9892467856407166,grad_norm: 0.8111055970876334, iteration: 257616
loss: 1.0135208368301392,grad_norm: 0.99999910434599, iteration: 257617
loss: 1.0026438236236572,grad_norm: 0.9999998593591003, iteration: 257618
loss: 1.0251818895339966,grad_norm: 0.9999989918338229, iteration: 257619
loss: 1.0202338695526123,grad_norm: 0.9089748728817733, iteration: 257620
loss: 1.0182963609695435,grad_norm: 0.8459507082781491, iteration: 257621
loss: 1.076905369758606,grad_norm: 0.9361858423167736, iteration: 257622
loss: 0.9853252172470093,grad_norm: 0.9999992269102425, iteration: 257623
loss: 1.0937312841415405,grad_norm: 0.999999876696552, iteration: 257624
loss: 1.0143734216690063,grad_norm: 0.7685653835880617, iteration: 257625
loss: 0.984585702419281,grad_norm: 0.9353824282634181, iteration: 257626
loss: 1.0174225568771362,grad_norm: 0.9136455829809269, iteration: 257627
loss: 1.0089006423950195,grad_norm: 0.9999990599795819, iteration: 257628
loss: 1.1146730184555054,grad_norm: 0.9999991422175636, iteration: 257629
loss: 1.0333720445632935,grad_norm: 0.8397335505693484, iteration: 257630
loss: 0.9973117113113403,grad_norm: 0.930276382025848, iteration: 257631
loss: 0.9833098649978638,grad_norm: 0.9681695924394911, iteration: 257632
loss: 1.0400800704956055,grad_norm: 0.9176559155374905, iteration: 257633
loss: 0.9913064241409302,grad_norm: 0.9244132403157611, iteration: 257634
loss: 1.005720615386963,grad_norm: 0.8882629119886808, iteration: 257635
loss: 1.0510451793670654,grad_norm: 0.9999996829341533, iteration: 257636
loss: 1.0189974308013916,grad_norm: 0.8422341356940592, iteration: 257637
loss: 0.9896804690361023,grad_norm: 0.9338173692389691, iteration: 257638
loss: 1.112318992614746,grad_norm: 0.9999993567815451, iteration: 257639
loss: 1.0063344240188599,grad_norm: 0.8891547646047185, iteration: 257640
loss: 1.037814974784851,grad_norm: 0.9999991178875628, iteration: 257641
loss: 0.9882810711860657,grad_norm: 0.9999991344527757, iteration: 257642
loss: 1.0033082962036133,grad_norm: 0.9561650047909819, iteration: 257643
loss: 0.9922065138816833,grad_norm: 0.9999991546918952, iteration: 257644
loss: 1.0156594514846802,grad_norm: 0.8328281585280546, iteration: 257645
loss: 0.984747052192688,grad_norm: 0.7178046847187186, iteration: 257646
loss: 0.9726539254188538,grad_norm: 0.8270576135120958, iteration: 257647
loss: 1.039421558380127,grad_norm: 0.986025689394968, iteration: 257648
loss: 1.0008912086486816,grad_norm: 0.9999996834214216, iteration: 257649
loss: 0.9920713901519775,grad_norm: 0.999999271967497, iteration: 257650
loss: 1.059412956237793,grad_norm: 0.9674300189256707, iteration: 257651
loss: 1.0141066312789917,grad_norm: 0.9999992946819207, iteration: 257652
loss: 1.008242130279541,grad_norm: 0.8771844889767522, iteration: 257653
loss: 1.0127100944519043,grad_norm: 0.934948196208973, iteration: 257654
loss: 1.0232386589050293,grad_norm: 0.9414074566264162, iteration: 257655
loss: 0.9936214089393616,grad_norm: 1.0000000315725532, iteration: 257656
loss: 1.0042574405670166,grad_norm: 0.9419367548835426, iteration: 257657
loss: 0.9960933923721313,grad_norm: 0.9582497444939657, iteration: 257658
loss: 1.0105626583099365,grad_norm: 0.7490032924776829, iteration: 257659
loss: 1.0344914197921753,grad_norm: 0.8671073450594046, iteration: 257660
loss: 1.1198559999465942,grad_norm: 0.9999994467131, iteration: 257661
loss: 0.971821665763855,grad_norm: 0.8133731907868526, iteration: 257662
loss: 0.9668334722518921,grad_norm: 0.8562411129427833, iteration: 257663
loss: 1.015600562095642,grad_norm: 0.9999998232568688, iteration: 257664
loss: 1.0139458179473877,grad_norm: 0.9866441924210356, iteration: 257665
loss: 0.989844024181366,grad_norm: 0.8383347567997724, iteration: 257666
loss: 0.9839884042739868,grad_norm: 0.7994853183674868, iteration: 257667
loss: 1.0017229318618774,grad_norm: 0.8810962032404018, iteration: 257668
loss: 1.0091378688812256,grad_norm: 0.9290521031527716, iteration: 257669
loss: 1.0245455503463745,grad_norm: 0.9999995135519274, iteration: 257670
loss: 0.9817689061164856,grad_norm: 0.9999991119954427, iteration: 257671
loss: 1.01202392578125,grad_norm: 0.9999998320367844, iteration: 257672
loss: 1.0139296054840088,grad_norm: 0.9310039117860195, iteration: 257673
loss: 1.024407982826233,grad_norm: 0.9270719563941581, iteration: 257674
loss: 0.9796072840690613,grad_norm: 0.9068880739457486, iteration: 257675
loss: 1.0111525058746338,grad_norm: 0.8133029621068901, iteration: 257676
loss: 0.9986734390258789,grad_norm: 0.9035994679913745, iteration: 257677
loss: 1.0074678659439087,grad_norm: 0.9999990810440766, iteration: 257678
loss: 0.9823265671730042,grad_norm: 0.9293843320980236, iteration: 257679
loss: 1.002282738685608,grad_norm: 0.8720731157007872, iteration: 257680
loss: 0.9656204581260681,grad_norm: 0.8258043886378544, iteration: 257681
loss: 1.0134893655776978,grad_norm: 0.9230774095720721, iteration: 257682
loss: 1.0377432107925415,grad_norm: 0.9538968698815525, iteration: 257683
loss: 1.0157150030136108,grad_norm: 0.9999990454413651, iteration: 257684
loss: 0.9521529674530029,grad_norm: 0.8855320541070654, iteration: 257685
loss: 0.9790496826171875,grad_norm: 0.864349963901524, iteration: 257686
loss: 1.026046872138977,grad_norm: 0.9999997258502069, iteration: 257687
loss: 1.0147029161453247,grad_norm: 0.7597508379169685, iteration: 257688
loss: 1.0323805809020996,grad_norm: 0.904268101321316, iteration: 257689
loss: 1.051490306854248,grad_norm: 0.9314404137751562, iteration: 257690
loss: 1.0234184265136719,grad_norm: 0.8933611761802464, iteration: 257691
loss: 1.0119487047195435,grad_norm: 0.9048627739413169, iteration: 257692
loss: 1.0120528936386108,grad_norm: 0.7376031921973951, iteration: 257693
loss: 1.00394606590271,grad_norm: 0.9640452941526918, iteration: 257694
loss: 0.9927861094474792,grad_norm: 0.9999991711369614, iteration: 257695
loss: 1.0163366794586182,grad_norm: 0.8904017414757938, iteration: 257696
loss: 0.9678325057029724,grad_norm: 0.9999990834289635, iteration: 257697
loss: 0.9711111187934875,grad_norm: 0.7953596376259972, iteration: 257698
loss: 0.9678548574447632,grad_norm: 0.919848278524112, iteration: 257699
loss: 1.0081690549850464,grad_norm: 0.8731479723545962, iteration: 257700
loss: 1.0077035427093506,grad_norm: 0.8511464511153615, iteration: 257701
loss: 0.977971613407135,grad_norm: 0.7898981798763415, iteration: 257702
loss: 1.0012892484664917,grad_norm: 0.8111099616430912, iteration: 257703
loss: 1.017345905303955,grad_norm: 0.8923544489921407, iteration: 257704
loss: 1.0093199014663696,grad_norm: 0.9999992239587954, iteration: 257705
loss: 0.996900200843811,grad_norm: 0.9068009656174985, iteration: 257706
loss: 1.0051630735397339,grad_norm: 0.8075526768517826, iteration: 257707
loss: 0.942604660987854,grad_norm: 0.81887623574776, iteration: 257708
loss: 1.0043715238571167,grad_norm: 0.9999992134568326, iteration: 257709
loss: 1.0478999614715576,grad_norm: 0.9999999675177454, iteration: 257710
loss: 1.0354558229446411,grad_norm: 0.83858434399268, iteration: 257711
loss: 0.9966139197349548,grad_norm: 0.9258795570810486, iteration: 257712
loss: 1.0013763904571533,grad_norm: 0.9999995855658345, iteration: 257713
loss: 0.9939518570899963,grad_norm: 0.8028189125861229, iteration: 257714
loss: 1.0222193002700806,grad_norm: 0.856973308339447, iteration: 257715
loss: 0.989966094493866,grad_norm: 0.8412979124810533, iteration: 257716
loss: 0.9632650017738342,grad_norm: 0.8851655944003546, iteration: 257717
loss: 0.9959381818771362,grad_norm: 0.8511291917659642, iteration: 257718
loss: 1.0045920610427856,grad_norm: 0.9999994984572999, iteration: 257719
loss: 0.9970163702964783,grad_norm: 0.8474456928057422, iteration: 257720
loss: 1.061204195022583,grad_norm: 0.999999679943168, iteration: 257721
loss: 1.0328335762023926,grad_norm: 0.9999994216365915, iteration: 257722
loss: 1.0156917572021484,grad_norm: 0.9895180232785143, iteration: 257723
loss: 0.9910987615585327,grad_norm: 0.7549096071215897, iteration: 257724
loss: 0.9831337928771973,grad_norm: 0.8822378971692724, iteration: 257725
loss: 1.0014564990997314,grad_norm: 0.9999997712467529, iteration: 257726
loss: 1.0340629816055298,grad_norm: 0.937185310343983, iteration: 257727
loss: 0.9924083948135376,grad_norm: 0.7920431030074054, iteration: 257728
loss: 1.108743667602539,grad_norm: 0.9999997062353112, iteration: 257729
loss: 0.9993484616279602,grad_norm: 0.9273835821403358, iteration: 257730
loss: 1.022409200668335,grad_norm: 0.9999996378484698, iteration: 257731
loss: 0.986677348613739,grad_norm: 0.8872005600556127, iteration: 257732
loss: 0.9878949522972107,grad_norm: 0.8300439232342078, iteration: 257733
loss: 0.991021454334259,grad_norm: 0.7156376731633218, iteration: 257734
loss: 0.9560664296150208,grad_norm: 0.9999990768791068, iteration: 257735
loss: 1.026145577430725,grad_norm: 0.9999998394391582, iteration: 257736
loss: 1.0050584077835083,grad_norm: 0.9750514952211404, iteration: 257737
loss: 1.098521113395691,grad_norm: 0.9999992977457743, iteration: 257738
loss: 1.0255036354064941,grad_norm: 0.9741276656065471, iteration: 257739
loss: 1.023375391960144,grad_norm: 0.9999999537707157, iteration: 257740
loss: 0.9828678965568542,grad_norm: 0.8583939716865829, iteration: 257741
loss: 0.963043212890625,grad_norm: 0.9999990949142258, iteration: 257742
loss: 0.9925030469894409,grad_norm: 0.9999990673586773, iteration: 257743
loss: 0.9944040775299072,grad_norm: 0.8954235983571713, iteration: 257744
loss: 0.9945972561836243,grad_norm: 0.9305174737658761, iteration: 257745
loss: 1.0525871515274048,grad_norm: 0.9999994723529482, iteration: 257746
loss: 0.9927234053611755,grad_norm: 0.9999992223129008, iteration: 257747
loss: 0.9504556655883789,grad_norm: 0.9999996704156594, iteration: 257748
loss: 0.966927170753479,grad_norm: 0.8629050579540012, iteration: 257749
loss: 1.0246886014938354,grad_norm: 0.9999992742168934, iteration: 257750
loss: 0.9852161407470703,grad_norm: 0.8037098317621725, iteration: 257751
loss: 1.008321762084961,grad_norm: 0.8764861501690199, iteration: 257752
loss: 1.0330922603607178,grad_norm: 0.999999007012195, iteration: 257753
loss: 1.014132022857666,grad_norm: 0.9999990839547344, iteration: 257754
loss: 1.0181427001953125,grad_norm: 0.9999992636558869, iteration: 257755
loss: 1.0151437520980835,grad_norm: 0.8867192517649086, iteration: 257756
loss: 1.0289257764816284,grad_norm: 0.7503133690050476, iteration: 257757
loss: 1.0339808464050293,grad_norm: 0.999999631204405, iteration: 257758
loss: 0.9905520677566528,grad_norm: 0.821983306954827, iteration: 257759
loss: 0.9602518677711487,grad_norm: 0.805634568332765, iteration: 257760
loss: 1.0351450443267822,grad_norm: 0.8080562899849287, iteration: 257761
loss: 0.9839276671409607,grad_norm: 0.9853576022471284, iteration: 257762
loss: 1.0034325122833252,grad_norm: 0.8514532395370319, iteration: 257763
loss: 0.9979633092880249,grad_norm: 0.8554203397728583, iteration: 257764
loss: 1.0063695907592773,grad_norm: 0.8184473276887717, iteration: 257765
loss: 1.0053133964538574,grad_norm: 0.9216477794179693, iteration: 257766
loss: 0.9730421304702759,grad_norm: 0.9999993929616208, iteration: 257767
loss: 1.0041452646255493,grad_norm: 0.8549801053440783, iteration: 257768
loss: 0.9835897088050842,grad_norm: 0.8005301204895168, iteration: 257769
loss: 1.070855975151062,grad_norm: 0.9999999874358456, iteration: 257770
loss: 1.0099157094955444,grad_norm: 0.9544095706005219, iteration: 257771
loss: 1.0394752025604248,grad_norm: 0.8903650463265484, iteration: 257772
loss: 0.9859654307365417,grad_norm: 0.8835806580313819, iteration: 257773
loss: 1.0144809484481812,grad_norm: 0.9999992294705324, iteration: 257774
loss: 0.9639123678207397,grad_norm: 0.806855985377299, iteration: 257775
loss: 0.9643954634666443,grad_norm: 0.8679027714899031, iteration: 257776
loss: 0.9955790042877197,grad_norm: 0.7827985996254743, iteration: 257777
loss: 1.0179619789123535,grad_norm: 0.9100216343634052, iteration: 257778
loss: 1.0269169807434082,grad_norm: 0.9999991735879717, iteration: 257779
loss: 1.0222378969192505,grad_norm: 0.9246694946845183, iteration: 257780
loss: 1.0075141191482544,grad_norm: 0.8044268341919855, iteration: 257781
loss: 0.9993654489517212,grad_norm: 0.9991798602453098, iteration: 257782
loss: 0.9774285554885864,grad_norm: 0.8993933843410691, iteration: 257783
loss: 1.0151745080947876,grad_norm: 0.8123240108044093, iteration: 257784
loss: 0.9855108261108398,grad_norm: 0.8422897305638182, iteration: 257785
loss: 1.0100778341293335,grad_norm: 0.9999990987704765, iteration: 257786
loss: 1.0060895681381226,grad_norm: 0.8309508528230017, iteration: 257787
loss: 1.0154162645339966,grad_norm: 0.8870806838955111, iteration: 257788
loss: 1.0151498317718506,grad_norm: 0.723119824422145, iteration: 257789
loss: 0.9725643992424011,grad_norm: 0.9999990794549422, iteration: 257790
loss: 0.9862717986106873,grad_norm: 0.9492500279154237, iteration: 257791
loss: 0.9856606721878052,grad_norm: 0.9999996661728747, iteration: 257792
loss: 0.9893977046012878,grad_norm: 0.89453764557168, iteration: 257793
loss: 1.0009077787399292,grad_norm: 0.8276458193667412, iteration: 257794
loss: 1.0209619998931885,grad_norm: 0.9999996968674998, iteration: 257795
loss: 1.02060866355896,grad_norm: 0.9925829000240296, iteration: 257796
loss: 0.9922274947166443,grad_norm: 0.8576686434374132, iteration: 257797
loss: 0.9884798526763916,grad_norm: 0.9999991419331202, iteration: 257798
loss: 0.9925222396850586,grad_norm: 0.8962086336453402, iteration: 257799
loss: 1.0270916223526,grad_norm: 0.9999991456635939, iteration: 257800
loss: 1.00933837890625,grad_norm: 0.9999991654953625, iteration: 257801
loss: 0.9909018278121948,grad_norm: 0.9999992384667205, iteration: 257802
loss: 1.0289478302001953,grad_norm: 0.8309194737085158, iteration: 257803
loss: 1.017198920249939,grad_norm: 0.8746520247911226, iteration: 257804
loss: 0.9839180707931519,grad_norm: 0.8777194909616772, iteration: 257805
loss: 1.1093453168869019,grad_norm: 0.9999998652833408, iteration: 257806
loss: 1.024557113647461,grad_norm: 0.901028846077955, iteration: 257807
loss: 0.9746150374412537,grad_norm: 0.9999992111077451, iteration: 257808
loss: 1.0136712789535522,grad_norm: 0.8269222775368873, iteration: 257809
loss: 0.9923927187919617,grad_norm: 0.9450630575710185, iteration: 257810
loss: 1.0160273313522339,grad_norm: 0.9999995625913766, iteration: 257811
loss: 0.9978907704353333,grad_norm: 0.9999995867251834, iteration: 257812
loss: 1.126020073890686,grad_norm: 0.9999992929027754, iteration: 257813
loss: 0.9750524759292603,grad_norm: 0.9034659400139392, iteration: 257814
loss: 0.9870567917823792,grad_norm: 0.9007829614406189, iteration: 257815
loss: 0.9891669750213623,grad_norm: 0.9788817663389361, iteration: 257816
loss: 1.016837239265442,grad_norm: 0.948694300644321, iteration: 257817
loss: 1.0205597877502441,grad_norm: 0.9999991341074413, iteration: 257818
loss: 1.0153999328613281,grad_norm: 0.7746024543671229, iteration: 257819
loss: 0.9585113525390625,grad_norm: 0.8233628192986204, iteration: 257820
loss: 1.0296987295150757,grad_norm: 0.9774436308934913, iteration: 257821
loss: 0.9917805194854736,grad_norm: 0.8838667009492112, iteration: 257822
loss: 1.0048073530197144,grad_norm: 0.8706109522847013, iteration: 257823
loss: 0.9728548526763916,grad_norm: 0.9999994569193751, iteration: 257824
loss: 1.0044894218444824,grad_norm: 0.8350152004786794, iteration: 257825
loss: 0.9889194965362549,grad_norm: 0.8085648986765399, iteration: 257826
loss: 1.0241504907608032,grad_norm: 0.999999145439561, iteration: 257827
loss: 1.0158987045288086,grad_norm: 0.9999989568023867, iteration: 257828
loss: 0.9609655737876892,grad_norm: 0.8972431725238424, iteration: 257829
loss: 1.017151117324829,grad_norm: 0.9999991646341325, iteration: 257830
loss: 0.9959705471992493,grad_norm: 0.7321342167519664, iteration: 257831
loss: 1.0288290977478027,grad_norm: 0.9999991157208207, iteration: 257832
loss: 1.0015976428985596,grad_norm: 0.8769052077153197, iteration: 257833
loss: 0.996171772480011,grad_norm: 0.9999998417719396, iteration: 257834
loss: 1.0135159492492676,grad_norm: 0.8723420983461745, iteration: 257835
loss: 0.9906718730926514,grad_norm: 0.9999990969797979, iteration: 257836
loss: 0.9991961121559143,grad_norm: 0.9813918856697135, iteration: 257837
loss: 1.061816692352295,grad_norm: 0.9204094375963139, iteration: 257838
loss: 1.1494745016098022,grad_norm: 0.9999990974966763, iteration: 257839
loss: 0.9966257810592651,grad_norm: 0.999999177476508, iteration: 257840
loss: 0.9787148237228394,grad_norm: 0.8857904992126138, iteration: 257841
loss: 0.9783212542533875,grad_norm: 0.8882144165904056, iteration: 257842
loss: 1.1548881530761719,grad_norm: 0.9999990129970125, iteration: 257843
loss: 0.9942752718925476,grad_norm: 0.9525235616618919, iteration: 257844
loss: 1.073738694190979,grad_norm: 0.8814923167120261, iteration: 257845
loss: 1.0264970064163208,grad_norm: 0.8615743457428329, iteration: 257846
loss: 1.0499142408370972,grad_norm: 0.9999996399625974, iteration: 257847
loss: 1.0307210683822632,grad_norm: 0.9999997704741984, iteration: 257848
loss: 1.049209475517273,grad_norm: 0.9510591559480159, iteration: 257849
loss: 1.0217881202697754,grad_norm: 0.9999989813595848, iteration: 257850
loss: 0.9877429008483887,grad_norm: 0.9999992348055333, iteration: 257851
loss: 1.0389333963394165,grad_norm: 0.9999995185444265, iteration: 257852
loss: 0.997164785861969,grad_norm: 0.7903047693609783, iteration: 257853
loss: 0.9770904779434204,grad_norm: 0.9999998186426892, iteration: 257854
loss: 0.9999646544456482,grad_norm: 0.8430831415486147, iteration: 257855
loss: 0.9936732649803162,grad_norm: 0.9999998221066995, iteration: 257856
loss: 0.9990660548210144,grad_norm: 0.8791064640436, iteration: 257857
loss: 1.019774079322815,grad_norm: 0.8475635642509818, iteration: 257858
loss: 0.9920031428337097,grad_norm: 0.9259796560178137, iteration: 257859
loss: 1.0145379304885864,grad_norm: 0.964319755781261, iteration: 257860
loss: 0.9968336820602417,grad_norm: 0.8791341303470795, iteration: 257861
loss: 0.9868205785751343,grad_norm: 0.9999992228460276, iteration: 257862
loss: 1.1318145990371704,grad_norm: 0.9999992759604351, iteration: 257863
loss: 1.0300403833389282,grad_norm: 0.9022280010298137, iteration: 257864
loss: 0.9953403472900391,grad_norm: 0.9999990250582705, iteration: 257865
loss: 1.0715605020523071,grad_norm: 0.9943817977162159, iteration: 257866
loss: 1.0174145698547363,grad_norm: 0.7830151378603145, iteration: 257867
loss: 0.9750492572784424,grad_norm: 0.9999991371579449, iteration: 257868
loss: 0.9702909588813782,grad_norm: 0.8448631702601912, iteration: 257869
loss: 0.9534915685653687,grad_norm: 0.7538675373401802, iteration: 257870
loss: 1.0413585901260376,grad_norm: 0.9999994632716872, iteration: 257871
loss: 1.0004072189331055,grad_norm: 0.8387156551882802, iteration: 257872
loss: 0.9866994023323059,grad_norm: 0.9999991627353085, iteration: 257873
loss: 0.9659032821655273,grad_norm: 0.9264380197354958, iteration: 257874
loss: 1.052525520324707,grad_norm: 0.9753803548049373, iteration: 257875
loss: 1.0533345937728882,grad_norm: 0.9999998358165423, iteration: 257876
loss: 1.012973427772522,grad_norm: 0.9999992787367851, iteration: 257877
loss: 0.9901357293128967,grad_norm: 0.9340807128061116, iteration: 257878
loss: 1.0150917768478394,grad_norm: 0.9914722294179243, iteration: 257879
loss: 1.0054324865341187,grad_norm: 0.9315490819019019, iteration: 257880
loss: 1.0205838680267334,grad_norm: 0.9999991450975825, iteration: 257881
loss: 1.0327433347702026,grad_norm: 0.9999991921508155, iteration: 257882
loss: 1.0355042219161987,grad_norm: 0.9961296711281485, iteration: 257883
loss: 0.9846587181091309,grad_norm: 0.8866954817120165, iteration: 257884
loss: 1.000864028930664,grad_norm: 0.9799461605976524, iteration: 257885
loss: 0.9976750016212463,grad_norm: 0.9999995586026145, iteration: 257886
loss: 1.0285122394561768,grad_norm: 0.9999992359322429, iteration: 257887
loss: 1.0168132781982422,grad_norm: 0.9484921007928596, iteration: 257888
loss: 1.0192267894744873,grad_norm: 0.9999991374462562, iteration: 257889
loss: 0.9931640028953552,grad_norm: 0.8073560784132991, iteration: 257890
loss: 0.9919564723968506,grad_norm: 0.9326412472299319, iteration: 257891
loss: 1.042854905128479,grad_norm: 0.999999774264108, iteration: 257892
loss: 1.0256075859069824,grad_norm: 0.9999995855392635, iteration: 257893
loss: 1.0028884410858154,grad_norm: 0.8853043156685592, iteration: 257894
loss: 1.11725652217865,grad_norm: 0.999999486028016, iteration: 257895
loss: 0.984825611114502,grad_norm: 0.9999990908994975, iteration: 257896
loss: 1.0203360319137573,grad_norm: 0.9999992777437495, iteration: 257897
loss: 1.0266904830932617,grad_norm: 0.9999990349185264, iteration: 257898
loss: 1.0169391632080078,grad_norm: 0.8552363731796153, iteration: 257899
loss: 1.0357791185379028,grad_norm: 0.771557897479001, iteration: 257900
loss: 1.0099815130233765,grad_norm: 0.9128555885021165, iteration: 257901
loss: 1.0267413854599,grad_norm: 0.9999989961138447, iteration: 257902
loss: 1.1673412322998047,grad_norm: 0.999999074145709, iteration: 257903
loss: 0.9840490221977234,grad_norm: 0.852015882986155, iteration: 257904
loss: 1.038436770439148,grad_norm: 0.9999992561766193, iteration: 257905
loss: 1.0628185272216797,grad_norm: 0.8565076642358266, iteration: 257906
loss: 0.9847766160964966,grad_norm: 0.9723154427385368, iteration: 257907
loss: 1.0197242498397827,grad_norm: 0.8628588914660537, iteration: 257908
loss: 1.0260553359985352,grad_norm: 0.9999990697644808, iteration: 257909
loss: 1.1305986642837524,grad_norm: 0.9999992682104923, iteration: 257910
loss: 1.0934566259384155,grad_norm: 0.9999991783226361, iteration: 257911
loss: 1.0749461650848389,grad_norm: 0.9999994702121081, iteration: 257912
loss: 0.9811042547225952,grad_norm: 0.8128299439125998, iteration: 257913
loss: 0.9772130846977234,grad_norm: 0.9507326191808324, iteration: 257914
loss: 1.1437121629714966,grad_norm: 0.9999996838150059, iteration: 257915
loss: 1.024910807609558,grad_norm: 0.999999369423811, iteration: 257916
loss: 1.0155552625656128,grad_norm: 0.8233596230408471, iteration: 257917
loss: 1.0259978771209717,grad_norm: 0.9999993667633744, iteration: 257918
loss: 1.0506715774536133,grad_norm: 0.9554765430028779, iteration: 257919
loss: 1.022794485092163,grad_norm: 0.9999994641646708, iteration: 257920
loss: 0.9974826574325562,grad_norm: 0.8756307146448519, iteration: 257921
loss: 1.026707410812378,grad_norm: 0.9634187297699295, iteration: 257922
loss: 1.0518319606781006,grad_norm: 0.9999990303955122, iteration: 257923
loss: 1.0678480863571167,grad_norm: 0.9999990804128862, iteration: 257924
loss: 1.0308798551559448,grad_norm: 0.9771693730406107, iteration: 257925
loss: 0.9882713556289673,grad_norm: 0.9999992387257552, iteration: 257926
loss: 1.1555732488632202,grad_norm: 0.9999992713037367, iteration: 257927
loss: 1.1038897037506104,grad_norm: 0.9999996852046137, iteration: 257928
loss: 1.0084015130996704,grad_norm: 0.8251193599487974, iteration: 257929
loss: 1.0775326490402222,grad_norm: 0.9999995902318359, iteration: 257930
loss: 1.1352006196975708,grad_norm: 0.9999996994873214, iteration: 257931
loss: 0.9907171726226807,grad_norm: 0.9999996930490027, iteration: 257932
loss: 1.0001988410949707,grad_norm: 0.9793174352382317, iteration: 257933
loss: 1.0221010446548462,grad_norm: 0.9706145585131392, iteration: 257934
loss: 1.086389183998108,grad_norm: 0.9999991800234231, iteration: 257935
loss: 0.9905687570571899,grad_norm: 0.9999990361555877, iteration: 257936
loss: 0.9948977828025818,grad_norm: 0.9999991270289847, iteration: 257937
loss: 1.0202792882919312,grad_norm: 0.8592448542165232, iteration: 257938
loss: 0.9968706965446472,grad_norm: 0.930062949338246, iteration: 257939
loss: 1.0568124055862427,grad_norm: 0.9999998978065183, iteration: 257940
loss: 0.9763261675834656,grad_norm: 0.8471864278958413, iteration: 257941
loss: 1.0147428512573242,grad_norm: 0.9744940163399275, iteration: 257942
loss: 1.0116150379180908,grad_norm: 0.9999990722505382, iteration: 257943
loss: 1.0293700695037842,grad_norm: 0.9999991628719341, iteration: 257944
loss: 1.0821912288665771,grad_norm: 0.9999999668738516, iteration: 257945
loss: 1.0597953796386719,grad_norm: 0.9999997389315383, iteration: 257946
loss: 1.002712607383728,grad_norm: 0.9999991412472302, iteration: 257947
loss: 1.0027496814727783,grad_norm: 0.7417597782211769, iteration: 257948
loss: 0.984448254108429,grad_norm: 0.7354354503313331, iteration: 257949
loss: 1.1156721115112305,grad_norm: 0.9999999638720325, iteration: 257950
loss: 1.0277111530303955,grad_norm: 0.9999989836837606, iteration: 257951
loss: 1.0211467742919922,grad_norm: 0.9999993581937001, iteration: 257952
loss: 1.0251871347427368,grad_norm: 0.8647188462166283, iteration: 257953
loss: 1.0526726245880127,grad_norm: 0.9999991799907866, iteration: 257954
loss: 1.0290850400924683,grad_norm: 0.999999211082425, iteration: 257955
loss: 1.0144973993301392,grad_norm: 0.9999997490134, iteration: 257956
loss: 1.0339330434799194,grad_norm: 0.84174871262821, iteration: 257957
loss: 0.9810349941253662,grad_norm: 0.8817858487808384, iteration: 257958
loss: 0.9762873649597168,grad_norm: 0.9999999506351347, iteration: 257959
loss: 1.018051266670227,grad_norm: 0.9999992188732173, iteration: 257960
loss: 1.0487604141235352,grad_norm: 0.999999352005688, iteration: 257961
loss: 1.0167691707611084,grad_norm: 0.9999995523865298, iteration: 257962
loss: 0.9771629571914673,grad_norm: 0.9999996767768138, iteration: 257963
loss: 0.9930722117424011,grad_norm: 0.8827969280601875, iteration: 257964
loss: 0.9998994469642639,grad_norm: 0.8214314268034181, iteration: 257965
loss: 1.0473582744598389,grad_norm: 0.9999994917952889, iteration: 257966
loss: 1.0910980701446533,grad_norm: 0.8712736831134131, iteration: 257967
loss: 0.9689368605613708,grad_norm: 0.8719362693898421, iteration: 257968
loss: 1.0608304738998413,grad_norm: 0.9999991348246304, iteration: 257969
loss: 0.994574785232544,grad_norm: 0.9450533343810922, iteration: 257970
loss: 0.9902011156082153,grad_norm: 0.8729539606637432, iteration: 257971
loss: 1.0751460790634155,grad_norm: 0.9999996692541456, iteration: 257972
loss: 1.2421401739120483,grad_norm: 0.9999995337679646, iteration: 257973
loss: 1.027217984199524,grad_norm: 0.9198664944168626, iteration: 257974
loss: 0.9758124351501465,grad_norm: 0.8302096422984299, iteration: 257975
loss: 1.0002394914627075,grad_norm: 0.9645826154849035, iteration: 257976
loss: 1.0276850461959839,grad_norm: 0.9342842581141164, iteration: 257977
loss: 1.02386474609375,grad_norm: 0.907484056998131, iteration: 257978
loss: 0.9809699058532715,grad_norm: 0.8858561855429742, iteration: 257979
loss: 1.0344486236572266,grad_norm: 0.9999990514002629, iteration: 257980
loss: 0.9850667715072632,grad_norm: 0.9999990936307294, iteration: 257981
loss: 1.155220866203308,grad_norm: 0.9999993374258848, iteration: 257982
loss: 1.1232013702392578,grad_norm: 0.9999989877805902, iteration: 257983
loss: 1.0012508630752563,grad_norm: 0.7995570691564123, iteration: 257984
loss: 1.0135753154754639,grad_norm: 0.7631597531522257, iteration: 257985
loss: 1.0240769386291504,grad_norm: 0.7778362002721443, iteration: 257986
loss: 1.0005030632019043,grad_norm: 0.8213935374396186, iteration: 257987
loss: 0.992957592010498,grad_norm: 0.8930219323872137, iteration: 257988
loss: 0.9891433119773865,grad_norm: 0.9999999212800482, iteration: 257989
loss: 1.046632170677185,grad_norm: 0.6795003632554267, iteration: 257990
loss: 1.0228866338729858,grad_norm: 0.8708909188532135, iteration: 257991
loss: 0.9873088598251343,grad_norm: 0.8950332699112803, iteration: 257992
loss: 0.9713461995124817,grad_norm: 0.9999991409789183, iteration: 257993
loss: 0.9865314364433289,grad_norm: 0.9376551734989163, iteration: 257994
loss: 1.0133849382400513,grad_norm: 0.9579819938362963, iteration: 257995
loss: 1.0530489683151245,grad_norm: 0.9627424670783588, iteration: 257996
loss: 1.0139482021331787,grad_norm: 0.9999991146949072, iteration: 257997
loss: 0.9707876443862915,grad_norm: 0.8596169003147511, iteration: 257998
loss: 1.0139544010162354,grad_norm: 0.9451706073274001, iteration: 257999
loss: 1.0085327625274658,grad_norm: 0.9999991757201727, iteration: 258000
loss: 1.0294454097747803,grad_norm: 0.7117465994930903, iteration: 258001
loss: 1.0050837993621826,grad_norm: 0.8669258897055716, iteration: 258002
loss: 1.0204774141311646,grad_norm: 0.7465721962439307, iteration: 258003
loss: 0.9901003837585449,grad_norm: 0.9967174474876226, iteration: 258004
loss: 1.0845216512680054,grad_norm: 0.9999994043575785, iteration: 258005
loss: 0.9930020570755005,grad_norm: 0.8505731392163052, iteration: 258006
loss: 0.958979070186615,grad_norm: 0.9999990265814994, iteration: 258007
loss: 0.9971020221710205,grad_norm: 0.9988410726222391, iteration: 258008
loss: 1.0485689640045166,grad_norm: 0.9999997035813807, iteration: 258009
loss: 1.0185067653656006,grad_norm: 0.87835860658762, iteration: 258010
loss: 0.9984734654426575,grad_norm: 0.7418131944456783, iteration: 258011
loss: 1.0162793397903442,grad_norm: 0.9999990203000795, iteration: 258012
loss: 0.9898702502250671,grad_norm: 0.8234535641985185, iteration: 258013
loss: 0.9570456743240356,grad_norm: 0.8203837201972367, iteration: 258014
loss: 1.018601655960083,grad_norm: 0.9207695768935744, iteration: 258015
loss: 1.000718116760254,grad_norm: 0.9155888026211385, iteration: 258016
loss: 1.0284316539764404,grad_norm: 0.9999992404570288, iteration: 258017
loss: 1.003158688545227,grad_norm: 0.9999992176286541, iteration: 258018
loss: 1.1567689180374146,grad_norm: 0.9999995540547416, iteration: 258019
loss: 0.9778364896774292,grad_norm: 0.9208211331773302, iteration: 258020
loss: 0.9833717346191406,grad_norm: 0.9999993357376143, iteration: 258021
loss: 1.0131322145462036,grad_norm: 0.8328400527673008, iteration: 258022
loss: 0.977752685546875,grad_norm: 0.9999990212425139, iteration: 258023
loss: 1.0062229633331299,grad_norm: 0.8861871503641184, iteration: 258024
loss: 1.003678560256958,grad_norm: 0.8721173306847905, iteration: 258025
loss: 1.0609272718429565,grad_norm: 0.9999990406772592, iteration: 258026
loss: 1.001997470855713,grad_norm: 0.999998977621177, iteration: 258027
loss: 1.0204919576644897,grad_norm: 0.9031934912560634, iteration: 258028
loss: 1.0017414093017578,grad_norm: 0.8786004978887297, iteration: 258029
loss: 0.9949818849563599,grad_norm: 0.8166507153707797, iteration: 258030
loss: 0.9754709005355835,grad_norm: 0.8528936684715877, iteration: 258031
loss: 1.011765956878662,grad_norm: 0.9743224844531989, iteration: 258032
loss: 0.9909878969192505,grad_norm: 0.8897124251245109, iteration: 258033
loss: 1.0010520219802856,grad_norm: 0.9999991425344192, iteration: 258034
loss: 1.017850399017334,grad_norm: 0.9999997533002116, iteration: 258035
loss: 1.1047613620758057,grad_norm: 0.9999992754286142, iteration: 258036
loss: 1.0565359592437744,grad_norm: 0.9999992401924646, iteration: 258037
loss: 1.0219169855117798,grad_norm: 0.9541517627338774, iteration: 258038
loss: 1.0041837692260742,grad_norm: 0.9999995019953896, iteration: 258039
loss: 0.9913071393966675,grad_norm: 0.832102346264479, iteration: 258040
loss: 1.0122750997543335,grad_norm: 0.9182097011073147, iteration: 258041
loss: 0.9941411018371582,grad_norm: 0.9681417735697085, iteration: 258042
loss: 1.022005558013916,grad_norm: 0.9124382331356902, iteration: 258043
loss: 1.0324535369873047,grad_norm: 0.9999990976622821, iteration: 258044
loss: 1.0216749906539917,grad_norm: 0.7706470868118079, iteration: 258045
loss: 1.0183839797973633,grad_norm: 0.9792735624747689, iteration: 258046
loss: 1.0372240543365479,grad_norm: 0.8693723845119166, iteration: 258047
loss: 0.9973151087760925,grad_norm: 0.9999990400707129, iteration: 258048
loss: 0.9774376749992371,grad_norm: 0.8841085960956996, iteration: 258049
loss: 1.0136473178863525,grad_norm: 0.9999995019224368, iteration: 258050
loss: 1.0214896202087402,grad_norm: 0.9999991181834336, iteration: 258051
loss: 1.0406289100646973,grad_norm: 0.9999995467145865, iteration: 258052
loss: 0.9766368865966797,grad_norm: 0.8978267545681785, iteration: 258053
loss: 0.9661411643028259,grad_norm: 0.8933363049714061, iteration: 258054
loss: 0.9640789031982422,grad_norm: 0.7823791774499903, iteration: 258055
loss: 1.0169429779052734,grad_norm: 0.9999995526856105, iteration: 258056
loss: 1.0634928941726685,grad_norm: 0.9999991618723583, iteration: 258057
loss: 1.0463894605636597,grad_norm: 0.9999998927752516, iteration: 258058
loss: 0.9657042622566223,grad_norm: 0.9999992015813369, iteration: 258059
loss: 1.0526728630065918,grad_norm: 0.840214193832655, iteration: 258060
loss: 1.013749361038208,grad_norm: 0.9999991020689681, iteration: 258061
loss: 1.029042363166809,grad_norm: 0.7662290958472414, iteration: 258062
loss: 1.022473931312561,grad_norm: 0.8832088025380228, iteration: 258063
loss: 1.0145214796066284,grad_norm: 0.9999992338268708, iteration: 258064
loss: 1.0198392868041992,grad_norm: 0.9068816784686297, iteration: 258065
loss: 0.9832435250282288,grad_norm: 0.8818020809124733, iteration: 258066
loss: 1.063106894493103,grad_norm: 0.9999991855974213, iteration: 258067
loss: 0.9966259598731995,grad_norm: 0.9999991975516377, iteration: 258068
loss: 1.0030688047409058,grad_norm: 0.9999990819120697, iteration: 258069
loss: 0.9965532422065735,grad_norm: 0.9999999664918963, iteration: 258070
loss: 0.9776347875595093,grad_norm: 0.9300579347290868, iteration: 258071
loss: 1.0558576583862305,grad_norm: 0.8827172433148953, iteration: 258072
loss: 0.9937122464179993,grad_norm: 0.8442078340468572, iteration: 258073
loss: 1.0316081047058105,grad_norm: 0.9044653324173366, iteration: 258074
loss: 1.0294902324676514,grad_norm: 0.7595491121270549, iteration: 258075
loss: 0.9829897284507751,grad_norm: 0.9999989907828709, iteration: 258076
loss: 1.0243796110153198,grad_norm: 0.8004243225530993, iteration: 258077
loss: 1.0315735340118408,grad_norm: 0.9999990967305499, iteration: 258078
loss: 1.0228265523910522,grad_norm: 0.8663778894213127, iteration: 258079
loss: 0.9926340579986572,grad_norm: 0.8577368580339573, iteration: 258080
loss: 0.9740797877311707,grad_norm: 0.8974523327956082, iteration: 258081
loss: 0.9792826771736145,grad_norm: 0.8609483233277212, iteration: 258082
loss: 0.9804375767707825,grad_norm: 0.7361358030498463, iteration: 258083
loss: 1.0347248315811157,grad_norm: 0.9185138307262187, iteration: 258084
loss: 1.0147916078567505,grad_norm: 0.9058209033918735, iteration: 258085
loss: 1.0181888341903687,grad_norm: 0.8313751006871648, iteration: 258086
loss: 1.0152664184570312,grad_norm: 0.9999996160769473, iteration: 258087
loss: 1.0040137767791748,grad_norm: 0.7317168711230965, iteration: 258088
loss: 1.1222953796386719,grad_norm: 0.9999998318907038, iteration: 258089
loss: 0.9707998037338257,grad_norm: 0.8914774187818859, iteration: 258090
loss: 0.9933757781982422,grad_norm: 0.9657309055374302, iteration: 258091
loss: 0.9891332387924194,grad_norm: 0.7538353724478422, iteration: 258092
loss: 1.0136408805847168,grad_norm: 0.9999991065298326, iteration: 258093
loss: 1.0115551948547363,grad_norm: 0.8957241179936493, iteration: 258094
loss: 1.0297352075576782,grad_norm: 0.8647970071959817, iteration: 258095
loss: 1.046263575553894,grad_norm: 0.9999992009748934, iteration: 258096
loss: 0.9958164095878601,grad_norm: 0.9999990497913382, iteration: 258097
loss: 0.9908631443977356,grad_norm: 0.8941823550421145, iteration: 258098
loss: 1.0511527061462402,grad_norm: 0.9999991574777477, iteration: 258099
loss: 1.016801357269287,grad_norm: 0.9041798982670203, iteration: 258100
loss: 1.0162603855133057,grad_norm: 0.9539822894174793, iteration: 258101
loss: 0.9904109239578247,grad_norm: 0.9251325697487581, iteration: 258102
loss: 1.0384653806686401,grad_norm: 0.8598910841403298, iteration: 258103
loss: 1.0242607593536377,grad_norm: 0.8665943592388617, iteration: 258104
loss: 1.0186117887496948,grad_norm: 0.897143284646637, iteration: 258105
loss: 0.9949204325675964,grad_norm: 0.8263037989784869, iteration: 258106
loss: 1.0438899993896484,grad_norm: 0.952489374183049, iteration: 258107
loss: 1.0068503618240356,grad_norm: 0.8640049791800855, iteration: 258108
loss: 1.0380396842956543,grad_norm: 0.9999989759650206, iteration: 258109
loss: 1.0179803371429443,grad_norm: 0.8934524925312525, iteration: 258110
loss: 1.002446174621582,grad_norm: 0.7969570046911533, iteration: 258111
loss: 1.0258809328079224,grad_norm: 0.9685312309417333, iteration: 258112
loss: 1.0127776861190796,grad_norm: 0.8278505687566058, iteration: 258113
loss: 1.0070459842681885,grad_norm: 0.9122213783366805, iteration: 258114
loss: 1.0167858600616455,grad_norm: 0.7219921351419816, iteration: 258115
loss: 1.010117769241333,grad_norm: 0.9999990621037681, iteration: 258116
loss: 0.9658201336860657,grad_norm: 0.8599234818315263, iteration: 258117
loss: 1.063940167427063,grad_norm: 0.8218823587029908, iteration: 258118
loss: 1.056567668914795,grad_norm: 0.9999991007670267, iteration: 258119
loss: 1.0026943683624268,grad_norm: 0.7766716986428469, iteration: 258120
loss: 1.0006006956100464,grad_norm: 0.8789332382964115, iteration: 258121
loss: 1.0493170022964478,grad_norm: 0.9279259562410814, iteration: 258122
loss: 0.9965003132820129,grad_norm: 0.9999992033064296, iteration: 258123
loss: 1.0107574462890625,grad_norm: 0.997522325011098, iteration: 258124
loss: 0.9951879978179932,grad_norm: 0.9999993803903044, iteration: 258125
loss: 1.04881751537323,grad_norm: 0.9999991305674061, iteration: 258126
loss: 0.9811856746673584,grad_norm: 0.9610906748414239, iteration: 258127
loss: 1.0101715326309204,grad_norm: 0.896784831167809, iteration: 258128
loss: 1.0989084243774414,grad_norm: 0.9499891521952707, iteration: 258129
loss: 0.9830489158630371,grad_norm: 0.7622356531959138, iteration: 258130
loss: 1.0184415578842163,grad_norm: 0.8976361313055312, iteration: 258131
loss: 0.9758197665214539,grad_norm: 0.8953906400727407, iteration: 258132
loss: 0.9683746099472046,grad_norm: 0.8896921524901154, iteration: 258133
loss: 1.0021721124649048,grad_norm: 0.8966488139502151, iteration: 258134
loss: 0.9956221580505371,grad_norm: 0.9060588027114299, iteration: 258135
loss: 1.0205844640731812,grad_norm: 0.8469774778421348, iteration: 258136
loss: 1.011007308959961,grad_norm: 0.8052576701329973, iteration: 258137
loss: 1.0154215097427368,grad_norm: 0.8572029568531808, iteration: 258138
loss: 0.9793965220451355,grad_norm: 0.8694364287072571, iteration: 258139
loss: 1.0104167461395264,grad_norm: 0.9999993058190116, iteration: 258140
loss: 1.013637661933899,grad_norm: 0.9999992333540977, iteration: 258141
loss: 1.0055140256881714,grad_norm: 0.9512961958193665, iteration: 258142
loss: 1.0121055841445923,grad_norm: 0.9999989891261485, iteration: 258143
loss: 1.0364726781845093,grad_norm: 0.9999990677891532, iteration: 258144
loss: 1.0024477243423462,grad_norm: 0.8670275392007155, iteration: 258145
loss: 1.0153573751449585,grad_norm: 0.9978645183547641, iteration: 258146
loss: 1.0227603912353516,grad_norm: 0.9999996890423931, iteration: 258147
loss: 1.1328729391098022,grad_norm: 0.9999996663568711, iteration: 258148
loss: 0.9932506680488586,grad_norm: 0.936598121377271, iteration: 258149
loss: 0.9741360545158386,grad_norm: 0.9325679362996019, iteration: 258150
loss: 1.0308003425598145,grad_norm: 0.9999990477663463, iteration: 258151
loss: 0.9896026849746704,grad_norm: 0.9999990406597379, iteration: 258152
loss: 0.9955683350563049,grad_norm: 0.9999990680597819, iteration: 258153
loss: 1.03860342502594,grad_norm: 0.8024159306720914, iteration: 258154
loss: 0.990006148815155,grad_norm: 0.8587622542838811, iteration: 258155
loss: 0.9900049567222595,grad_norm: 0.9762983563195814, iteration: 258156
loss: 1.0698151588439941,grad_norm: 0.9999991323874797, iteration: 258157
loss: 1.0409996509552002,grad_norm: 0.9999991910328084, iteration: 258158
loss: 1.0391536951065063,grad_norm: 0.9999996059330012, iteration: 258159
loss: 1.0318056344985962,grad_norm: 0.8252128651292863, iteration: 258160
loss: 1.006926417350769,grad_norm: 0.8908137550675488, iteration: 258161
loss: 1.0086848735809326,grad_norm: 0.8670992209840848, iteration: 258162
loss: 0.9918798208236694,grad_norm: 0.9999989068162821, iteration: 258163
loss: 1.0660035610198975,grad_norm: 0.9999996559124664, iteration: 258164
loss: 1.025231122970581,grad_norm: 0.9698134040557091, iteration: 258165
loss: 1.032814621925354,grad_norm: 0.9999994642285958, iteration: 258166
loss: 0.9852339625358582,grad_norm: 0.9999990662050476, iteration: 258167
loss: 0.9872169494628906,grad_norm: 0.9082130039410802, iteration: 258168
loss: 1.0195642709732056,grad_norm: 0.9999993324534613, iteration: 258169
loss: 0.9887508749961853,grad_norm: 0.8864511544299737, iteration: 258170
loss: 1.0172351598739624,grad_norm: 0.9852227221168672, iteration: 258171
loss: 1.1466294527053833,grad_norm: 0.9999991858621942, iteration: 258172
loss: 1.0074182748794556,grad_norm: 0.9999998116776324, iteration: 258173
loss: 1.0308679342269897,grad_norm: 0.9999991439704606, iteration: 258174
loss: 1.0548124313354492,grad_norm: 0.8301589353897691, iteration: 258175
loss: 1.0372039079666138,grad_norm: 0.9999992162437246, iteration: 258176
loss: 1.0881575345993042,grad_norm: 0.9999996554504403, iteration: 258177
loss: 1.0415596961975098,grad_norm: 0.9999991948528432, iteration: 258178
loss: 1.0265955924987793,grad_norm: 0.9949148271348265, iteration: 258179
loss: 1.1224063634872437,grad_norm: 0.9999993287670437, iteration: 258180
loss: 1.0902700424194336,grad_norm: 0.999999585840849, iteration: 258181
loss: 1.0706589221954346,grad_norm: 0.999999743375076, iteration: 258182
loss: 1.0013642311096191,grad_norm: 0.870506124707446, iteration: 258183
loss: 1.0193202495574951,grad_norm: 0.9511074906201586, iteration: 258184
loss: 1.0865944623947144,grad_norm: 0.9999993157759716, iteration: 258185
loss: 1.0875909328460693,grad_norm: 0.999999134917711, iteration: 258186
loss: 0.9985960125923157,grad_norm: 0.9022162656588238, iteration: 258187
loss: 1.0322643518447876,grad_norm: 0.9999998532506233, iteration: 258188
loss: 1.0758942365646362,grad_norm: 0.9999993033879166, iteration: 258189
loss: 1.05684494972229,grad_norm: 0.9999991532307304, iteration: 258190
loss: 1.0086115598678589,grad_norm: 0.894017848580025, iteration: 258191
loss: 1.1409778594970703,grad_norm: 0.9999999368339829, iteration: 258192
loss: 0.990034282207489,grad_norm: 0.7397819637926865, iteration: 258193
loss: 0.9910677075386047,grad_norm: 0.91886020700908, iteration: 258194
loss: 1.0922259092330933,grad_norm: 0.9999991112393647, iteration: 258195
loss: 1.0176432132720947,grad_norm: 0.9999993380565292, iteration: 258196
loss: 0.974687933921814,grad_norm: 0.977411568468796, iteration: 258197
loss: 0.9728947877883911,grad_norm: 0.8158852674036493, iteration: 258198
loss: 1.0930896997451782,grad_norm: 0.9999998391127854, iteration: 258199
loss: 1.1197938919067383,grad_norm: 0.9999996293816079, iteration: 258200
loss: 1.0107556581497192,grad_norm: 0.9999998979467497, iteration: 258201
loss: 1.0090891122817993,grad_norm: 0.9191821596975224, iteration: 258202
loss: 1.070483684539795,grad_norm: 0.9999990429846728, iteration: 258203
loss: 1.0134997367858887,grad_norm: 0.9999992752149536, iteration: 258204
loss: 1.0058517456054688,grad_norm: 0.9999990326080751, iteration: 258205
loss: 0.9952834248542786,grad_norm: 0.8395894006088291, iteration: 258206
loss: 1.066663384437561,grad_norm: 0.9999989684072447, iteration: 258207
loss: 1.0493324995040894,grad_norm: 0.7527701416052145, iteration: 258208
loss: 1.0047510862350464,grad_norm: 0.9469618373673834, iteration: 258209
loss: 1.0787556171417236,grad_norm: 0.9999997727735693, iteration: 258210
loss: 1.0118123292922974,grad_norm: 0.9816156798641327, iteration: 258211
loss: 1.0265034437179565,grad_norm: 0.9999992587545219, iteration: 258212
loss: 1.0267971754074097,grad_norm: 0.9726584391845072, iteration: 258213
loss: 0.9930320978164673,grad_norm: 0.9484925587409545, iteration: 258214
loss: 1.0430155992507935,grad_norm: 0.9999995025163109, iteration: 258215
loss: 1.0352212190628052,grad_norm: 0.9999990754760976, iteration: 258216
loss: 1.0520046949386597,grad_norm: 0.9999996763699799, iteration: 258217
loss: 1.0012528896331787,grad_norm: 0.999999021689849, iteration: 258218
loss: 1.0100115537643433,grad_norm: 0.8622005204396086, iteration: 258219
loss: 1.0177654027938843,grad_norm: 0.9999989808292751, iteration: 258220
loss: 1.0125142335891724,grad_norm: 0.8595024222745022, iteration: 258221
loss: 1.0006238222122192,grad_norm: 0.9999994192903591, iteration: 258222
loss: 1.1161664724349976,grad_norm: 0.9999996576473457, iteration: 258223
loss: 1.0544230937957764,grad_norm: 0.999999653946354, iteration: 258224
loss: 1.0204567909240723,grad_norm: 0.9840834009373002, iteration: 258225
loss: 0.9847745299339294,grad_norm: 0.9828922227528331, iteration: 258226
loss: 1.0398012399673462,grad_norm: 0.8678050399409264, iteration: 258227
loss: 0.9596173763275146,grad_norm: 0.8457244931011668, iteration: 258228
loss: 1.0103334188461304,grad_norm: 0.8777127497772317, iteration: 258229
loss: 1.0146851539611816,grad_norm: 0.9999990293914119, iteration: 258230
loss: 0.9954039454460144,grad_norm: 0.9999998221594207, iteration: 258231
loss: 1.0296084880828857,grad_norm: 0.999999231824911, iteration: 258232
loss: 1.0054718255996704,grad_norm: 0.88342112145606, iteration: 258233
loss: 0.990505039691925,grad_norm: 0.9999989950047651, iteration: 258234
loss: 1.0115633010864258,grad_norm: 0.8338845291762024, iteration: 258235
loss: 1.0969997644424438,grad_norm: 0.9999999374343891, iteration: 258236
loss: 0.9540220499038696,grad_norm: 0.8947208622606886, iteration: 258237
loss: 1.0607236623764038,grad_norm: 0.8631721384426236, iteration: 258238
loss: 1.1074565649032593,grad_norm: 0.9999991337042086, iteration: 258239
loss: 0.9907152056694031,grad_norm: 0.9288415968646249, iteration: 258240
loss: 1.0185860395431519,grad_norm: 0.9999993158769308, iteration: 258241
loss: 1.0504823923110962,grad_norm: 0.8550144663367317, iteration: 258242
loss: 1.0221866369247437,grad_norm: 0.999999241580214, iteration: 258243
loss: 1.1057947874069214,grad_norm: 0.9999999791163217, iteration: 258244
loss: 1.0013184547424316,grad_norm: 0.9485566595386365, iteration: 258245
loss: 1.092706322669983,grad_norm: 0.999999281680308, iteration: 258246
loss: 0.9815316796302795,grad_norm: 0.9375887253813656, iteration: 258247
loss: 0.9652119874954224,grad_norm: 0.8093550750400922, iteration: 258248
loss: 1.095558762550354,grad_norm: 0.9999991560976579, iteration: 258249
loss: 1.0308959484100342,grad_norm: 0.9470134301363586, iteration: 258250
loss: 1.0132789611816406,grad_norm: 0.9999989619826624, iteration: 258251
loss: 0.9963459372520447,grad_norm: 0.9999992785456022, iteration: 258252
loss: 1.0149965286254883,grad_norm: 0.9999994314427278, iteration: 258253
loss: 1.0489600896835327,grad_norm: 0.9999998109402013, iteration: 258254
loss: 0.9885175824165344,grad_norm: 0.9999992061610659, iteration: 258255
loss: 1.1500977277755737,grad_norm: 0.9999998531795425, iteration: 258256
loss: 0.9861258268356323,grad_norm: 0.9999994114074249, iteration: 258257
loss: 1.0630900859832764,grad_norm: 0.9999998358943469, iteration: 258258
loss: 1.005497932434082,grad_norm: 0.9999996920647257, iteration: 258259
loss: 0.9683108925819397,grad_norm: 0.9622620020661005, iteration: 258260
loss: 1.000792384147644,grad_norm: 0.9999999230499416, iteration: 258261
loss: 0.9920424818992615,grad_norm: 0.9999996926136175, iteration: 258262
loss: 1.045111060142517,grad_norm: 0.8058850322890222, iteration: 258263
loss: 1.0642741918563843,grad_norm: 0.9999996366693262, iteration: 258264
loss: 1.0042952299118042,grad_norm: 0.9999991937615867, iteration: 258265
loss: 1.0088634490966797,grad_norm: 0.957945947696577, iteration: 258266
loss: 1.112286925315857,grad_norm: 0.9999991891375621, iteration: 258267
loss: 1.0118544101715088,grad_norm: 0.9594736348378808, iteration: 258268
loss: 1.073980450630188,grad_norm: 0.9999994354694025, iteration: 258269
loss: 1.0600459575653076,grad_norm: 0.9999996255675325, iteration: 258270
loss: 1.0812047719955444,grad_norm: 0.9999999064661473, iteration: 258271
loss: 1.0584970712661743,grad_norm: 0.9219691885353493, iteration: 258272
loss: 1.0036542415618896,grad_norm: 0.9076319287405656, iteration: 258273
loss: 1.0146374702453613,grad_norm: 0.7741099298026747, iteration: 258274
loss: 1.2438327074050903,grad_norm: 0.999999654218337, iteration: 258275
loss: 1.0671579837799072,grad_norm: 0.9383573762358257, iteration: 258276
loss: 1.0402910709381104,grad_norm: 0.9999991337298118, iteration: 258277
loss: 1.003474473953247,grad_norm: 0.9999990913424627, iteration: 258278
loss: 0.9944532513618469,grad_norm: 0.8185523150342144, iteration: 258279
loss: 0.9926759004592896,grad_norm: 0.969888272672725, iteration: 258280
loss: 1.147689700126648,grad_norm: 0.999999773764355, iteration: 258281
loss: 1.0345937013626099,grad_norm: 0.999999151452479, iteration: 258282
loss: 1.1595717668533325,grad_norm: 0.9999991425990067, iteration: 258283
loss: 0.9975131154060364,grad_norm: 0.8596037577612488, iteration: 258284
loss: 0.9826595783233643,grad_norm: 0.8919554481858881, iteration: 258285
loss: 0.9755544066429138,grad_norm: 0.9792572012618753, iteration: 258286
loss: 1.035025954246521,grad_norm: 0.999999069557256, iteration: 258287
loss: 0.9907241463661194,grad_norm: 0.8440166338991593, iteration: 258288
loss: 0.9902371764183044,grad_norm: 0.7852637206217757, iteration: 258289
loss: 1.0133944749832153,grad_norm: 0.9567630340524134, iteration: 258290
loss: 1.0727051496505737,grad_norm: 0.8141148766010471, iteration: 258291
loss: 1.0406990051269531,grad_norm: 0.9070408133541632, iteration: 258292
loss: 1.066426396369934,grad_norm: 0.9482988780897359, iteration: 258293
loss: 1.030577301979065,grad_norm: 0.9999996976581412, iteration: 258294
loss: 1.0023115873336792,grad_norm: 0.9999990520409145, iteration: 258295
loss: 1.0406845808029175,grad_norm: 0.8213534123506789, iteration: 258296
loss: 1.0083142518997192,grad_norm: 0.9999992009178343, iteration: 258297
loss: 1.1041291952133179,grad_norm: 0.9999998834604684, iteration: 258298
loss: 1.0302592515945435,grad_norm: 0.9999996845562931, iteration: 258299
loss: 1.0802252292633057,grad_norm: 0.9478102454784368, iteration: 258300
loss: 1.0114004611968994,grad_norm: 0.9999990947601959, iteration: 258301
loss: 1.017915964126587,grad_norm: 0.9811038526271477, iteration: 258302
loss: 0.9747571349143982,grad_norm: 0.9999992804669087, iteration: 258303
loss: 0.9846438765525818,grad_norm: 0.9828124372789234, iteration: 258304
loss: 1.050715684890747,grad_norm: 0.964826489858782, iteration: 258305
loss: 1.0372700691223145,grad_norm: 0.8465798139121252, iteration: 258306
loss: 1.0557535886764526,grad_norm: 0.9999996706715483, iteration: 258307
loss: 1.0170233249664307,grad_norm: 0.8781584354450265, iteration: 258308
loss: 0.9932932257652283,grad_norm: 0.9300457635346459, iteration: 258309
loss: 1.0264302492141724,grad_norm: 0.9700939149242963, iteration: 258310
loss: 1.0719271898269653,grad_norm: 0.9999993166741068, iteration: 258311
loss: 0.9886603951454163,grad_norm: 0.9445770134542416, iteration: 258312
loss: 0.9837703704833984,grad_norm: 0.8405058057409015, iteration: 258313
loss: 0.9593036770820618,grad_norm: 0.9604243957967863, iteration: 258314
loss: 0.9911581873893738,grad_norm: 0.9999991426001392, iteration: 258315
loss: 1.0309447050094604,grad_norm: 0.9999990565986562, iteration: 258316
loss: 0.9932323694229126,grad_norm: 0.9999991688390725, iteration: 258317
loss: 0.9889550805091858,grad_norm: 0.8445793856015443, iteration: 258318
loss: 1.014999508857727,grad_norm: 0.997069197362866, iteration: 258319
loss: 1.0079028606414795,grad_norm: 0.9999991716894558, iteration: 258320
loss: 1.028460144996643,grad_norm: 0.90794863091033, iteration: 258321
loss: 1.0112687349319458,grad_norm: 0.999999237107778, iteration: 258322
loss: 1.0253969430923462,grad_norm: 0.9999989781673797, iteration: 258323
loss: 1.0051279067993164,grad_norm: 0.8802200221915945, iteration: 258324
loss: 0.9732977151870728,grad_norm: 0.8142601460086873, iteration: 258325
loss: 1.0099256038665771,grad_norm: 0.7422323698471675, iteration: 258326
loss: 0.9856038689613342,grad_norm: 0.9703380256978257, iteration: 258327
loss: 0.9674187898635864,grad_norm: 0.8812255160886954, iteration: 258328
loss: 1.0278925895690918,grad_norm: 0.9999995752996869, iteration: 258329
loss: 0.9987457394599915,grad_norm: 0.957913781024524, iteration: 258330
loss: 1.0033313035964966,grad_norm: 0.895559616037433, iteration: 258331
loss: 0.9766603112220764,grad_norm: 0.9999991667337508, iteration: 258332
loss: 0.9943262338638306,grad_norm: 0.8172664843927294, iteration: 258333
loss: 0.9733067154884338,grad_norm: 0.9999991445566171, iteration: 258334
loss: 1.0021133422851562,grad_norm: 0.9494429356218349, iteration: 258335
loss: 1.008857250213623,grad_norm: 0.9999996283875255, iteration: 258336
loss: 0.9976226687431335,grad_norm: 0.9999989690675939, iteration: 258337
loss: 0.9967514276504517,grad_norm: 0.999999002674303, iteration: 258338
loss: 0.9680625796318054,grad_norm: 0.9839030087904885, iteration: 258339
loss: 1.0177704095840454,grad_norm: 0.9036637327114037, iteration: 258340
loss: 1.0198707580566406,grad_norm: 0.8639265684192041, iteration: 258341
loss: 0.994353711605072,grad_norm: 0.78392997170234, iteration: 258342
loss: 1.028478980064392,grad_norm: 0.8266236772632427, iteration: 258343
loss: 1.0818078517913818,grad_norm: 0.9999994921009099, iteration: 258344
loss: 1.0261144638061523,grad_norm: 0.9999993048878861, iteration: 258345
loss: 1.0883677005767822,grad_norm: 0.8195819436520712, iteration: 258346
loss: 0.9819046854972839,grad_norm: 0.987020020917298, iteration: 258347
loss: 0.9906472563743591,grad_norm: 0.9588558579717817, iteration: 258348
loss: 1.0462430715560913,grad_norm: 0.8074878086670914, iteration: 258349
loss: 0.9628165364265442,grad_norm: 0.9683676484832997, iteration: 258350
loss: 0.9830560684204102,grad_norm: 0.8944678234867831, iteration: 258351
loss: 0.9794951677322388,grad_norm: 0.871265030723354, iteration: 258352
loss: 1.0045844316482544,grad_norm: 0.8373581828835802, iteration: 258353
loss: 0.9872173070907593,grad_norm: 0.9009339239578168, iteration: 258354
loss: 0.9943649172782898,grad_norm: 0.9811541949565754, iteration: 258355
loss: 0.9924232363700867,grad_norm: 0.939465576350354, iteration: 258356
loss: 1.013415813446045,grad_norm: 0.9982625016958255, iteration: 258357
loss: 1.151142954826355,grad_norm: 0.9999994958525403, iteration: 258358
loss: 1.0760979652404785,grad_norm: 0.9999996681255865, iteration: 258359
loss: 0.9704939126968384,grad_norm: 0.9999990342447802, iteration: 258360
loss: 1.0619224309921265,grad_norm: 0.9999992294165523, iteration: 258361
loss: 0.9848886728286743,grad_norm: 0.7918034487986813, iteration: 258362
loss: 1.021976351737976,grad_norm: 0.896855029337605, iteration: 258363
loss: 0.9655234217643738,grad_norm: 0.894490520589215, iteration: 258364
loss: 1.1175763607025146,grad_norm: 0.9999997053129063, iteration: 258365
loss: 1.0091816186904907,grad_norm: 0.999999004402529, iteration: 258366
loss: 1.009606957435608,grad_norm: 0.9999989857087211, iteration: 258367
loss: 1.00577974319458,grad_norm: 0.850078215469171, iteration: 258368
loss: 0.9812044501304626,grad_norm: 0.860399122405828, iteration: 258369
loss: 1.0441962480545044,grad_norm: 0.8546406611811785, iteration: 258370
loss: 0.9921172857284546,grad_norm: 0.8905416111516523, iteration: 258371
loss: 1.0083606243133545,grad_norm: 0.9999989608121345, iteration: 258372
loss: 1.0215654373168945,grad_norm: 0.8260888420173782, iteration: 258373
loss: 0.9970290660858154,grad_norm: 0.9078866592161162, iteration: 258374
loss: 0.9946203827857971,grad_norm: 0.9453769246769195, iteration: 258375
loss: 1.0082433223724365,grad_norm: 0.9999998381306655, iteration: 258376
loss: 0.9919827580451965,grad_norm: 0.8634710412882409, iteration: 258377
loss: 1.002927541732788,grad_norm: 0.7276322414017531, iteration: 258378
loss: 0.9881541728973389,grad_norm: 0.8938382163669066, iteration: 258379
loss: 1.1271305084228516,grad_norm: 0.9999998930933122, iteration: 258380
loss: 1.0008503198623657,grad_norm: 0.9999990973473329, iteration: 258381
loss: 0.990415096282959,grad_norm: 0.8566299720682953, iteration: 258382
loss: 0.9750630259513855,grad_norm: 0.7408387390782277, iteration: 258383
loss: 1.051176905632019,grad_norm: 0.9999998934402706, iteration: 258384
loss: 1.0025514364242554,grad_norm: 0.8146351402359318, iteration: 258385
loss: 1.0511291027069092,grad_norm: 0.9999995256736489, iteration: 258386
loss: 1.0111383199691772,grad_norm: 0.8440399489475641, iteration: 258387
loss: 0.9818928241729736,grad_norm: 0.9311695981758432, iteration: 258388
loss: 0.978417694568634,grad_norm: 0.9213207098393708, iteration: 258389
loss: 0.9616671204566956,grad_norm: 0.9999993076207745, iteration: 258390
loss: 0.9933063983917236,grad_norm: 0.9999994723818478, iteration: 258391
loss: 1.008095622062683,grad_norm: 0.8589596195612851, iteration: 258392
loss: 0.9775553345680237,grad_norm: 0.9362182514461431, iteration: 258393
loss: 0.9805735349655151,grad_norm: 0.9367002277166949, iteration: 258394
loss: 0.9691262245178223,grad_norm: 0.7728620672716489, iteration: 258395
loss: 0.9119244813919067,grad_norm: 0.89235000708186, iteration: 258396
loss: 1.002482533454895,grad_norm: 0.8450079411846912, iteration: 258397
loss: 1.0126992464065552,grad_norm: 0.8684196365673817, iteration: 258398
loss: 0.969296395778656,grad_norm: 0.9172024182236387, iteration: 258399
loss: 1.022558331489563,grad_norm: 0.8623927155046949, iteration: 258400
loss: 0.9598293304443359,grad_norm: 0.9344379786985955, iteration: 258401
loss: 0.9983781576156616,grad_norm: 0.9999993150172196, iteration: 258402
loss: 0.9794952273368835,grad_norm: 0.9972673785986111, iteration: 258403
loss: 0.9743685722351074,grad_norm: 0.9028874354898089, iteration: 258404
loss: 1.049371361732483,grad_norm: 0.9214895953897666, iteration: 258405
loss: 0.950218141078949,grad_norm: 0.8698912948322941, iteration: 258406
loss: 1.0152055025100708,grad_norm: 0.9999991440693033, iteration: 258407
loss: 0.9952588677406311,grad_norm: 0.9999991688976871, iteration: 258408
loss: 1.1345704793930054,grad_norm: 0.9672989368692732, iteration: 258409
loss: 0.979636013507843,grad_norm: 0.9999997229488411, iteration: 258410
loss: 0.9979766607284546,grad_norm: 0.9999990324685305, iteration: 258411
loss: 1.0148308277130127,grad_norm: 0.9381016936573298, iteration: 258412
loss: 1.1941901445388794,grad_norm: 0.9999999242639045, iteration: 258413
loss: 0.9966005682945251,grad_norm: 0.999999095456194, iteration: 258414
loss: 0.9753685593605042,grad_norm: 0.9999990149746002, iteration: 258415
loss: 0.9933274388313293,grad_norm: 0.8887308308394648, iteration: 258416
loss: 1.0044734477996826,grad_norm: 0.9682622215305852, iteration: 258417
loss: 0.975898027420044,grad_norm: 0.9121594134179264, iteration: 258418
loss: 0.9828921556472778,grad_norm: 0.7881409361008681, iteration: 258419
loss: 0.9949858784675598,grad_norm: 0.9999992864319195, iteration: 258420
loss: 1.0096428394317627,grad_norm: 0.9161696985496129, iteration: 258421
loss: 0.9850786924362183,grad_norm: 0.8660022321048214, iteration: 258422
loss: 0.9959743022918701,grad_norm: 0.8113003979046327, iteration: 258423
loss: 1.000893473625183,grad_norm: 0.866766055848917, iteration: 258424
loss: 0.9828082919120789,grad_norm: 0.9421561231137777, iteration: 258425
loss: 1.0143636465072632,grad_norm: 0.9999990314199269, iteration: 258426
loss: 1.1293092966079712,grad_norm: 0.99999955636334, iteration: 258427
loss: 1.03043794631958,grad_norm: 0.9349199461144077, iteration: 258428
loss: 0.9921512007713318,grad_norm: 0.999999397418479, iteration: 258429
loss: 0.9788388609886169,grad_norm: 0.8234402722566866, iteration: 258430
loss: 1.026414155960083,grad_norm: 0.9181783272621856, iteration: 258431
loss: 1.0460972785949707,grad_norm: 0.9999998529574717, iteration: 258432
loss: 0.9904693365097046,grad_norm: 0.8428378202877488, iteration: 258433
loss: 1.0056394338607788,grad_norm: 0.8973412306966397, iteration: 258434
loss: 1.0561769008636475,grad_norm: 0.9999997246882764, iteration: 258435
loss: 1.0081520080566406,grad_norm: 0.9006407649751459, iteration: 258436
loss: 1.010751724243164,grad_norm: 0.7994979710430531, iteration: 258437
loss: 0.9485182166099548,grad_norm: 0.934996452630521, iteration: 258438
loss: 0.9994648694992065,grad_norm: 0.8074282049733233, iteration: 258439
loss: 0.9988865852355957,grad_norm: 0.9189168572950294, iteration: 258440
loss: 0.9771299362182617,grad_norm: 0.9999991549582594, iteration: 258441
loss: 1.0365618467330933,grad_norm: 0.8971407629221156, iteration: 258442
loss: 0.984607994556427,grad_norm: 0.9578020671646836, iteration: 258443
loss: 1.0042150020599365,grad_norm: 0.8575199138122533, iteration: 258444
loss: 0.9943327307701111,grad_norm: 0.8463337495047807, iteration: 258445
loss: 0.9932262897491455,grad_norm: 0.999999149055427, iteration: 258446
loss: 0.980452835559845,grad_norm: 0.9999991331563556, iteration: 258447
loss: 1.0154695510864258,grad_norm: 0.7354890769893139, iteration: 258448
loss: 1.002064824104309,grad_norm: 0.927369681183809, iteration: 258449
loss: 0.9768196940422058,grad_norm: 0.7660857799221721, iteration: 258450
loss: 0.994598388671875,grad_norm: 0.8698176647796282, iteration: 258451
loss: 1.0250977277755737,grad_norm: 0.9168405569501296, iteration: 258452
loss: 1.0307120084762573,grad_norm: 0.9415560327766758, iteration: 258453
loss: 1.0298364162445068,grad_norm: 0.8030896692927495, iteration: 258454
loss: 1.027856707572937,grad_norm: 0.9999991687056801, iteration: 258455
loss: 1.0007151365280151,grad_norm: 0.9991635505505727, iteration: 258456
loss: 1.0085734128952026,grad_norm: 0.9804142502589713, iteration: 258457
loss: 0.9917639493942261,grad_norm: 0.9200828307448148, iteration: 258458
loss: 1.0173115730285645,grad_norm: 0.9999998800977967, iteration: 258459
loss: 1.3379913568496704,grad_norm: 0.9999994314567378, iteration: 258460
loss: 0.9952441453933716,grad_norm: 0.9999989858363837, iteration: 258461
loss: 1.0221304893493652,grad_norm: 0.8372939930953095, iteration: 258462
loss: 0.9672262072563171,grad_norm: 0.9999989700626895, iteration: 258463
loss: 1.1385544538497925,grad_norm: 0.9999991564959396, iteration: 258464
loss: 1.0838289260864258,grad_norm: 0.9999998758886386, iteration: 258465
loss: 1.0219610929489136,grad_norm: 0.9999990834326087, iteration: 258466
loss: 1.0362322330474854,grad_norm: 0.8313050402864282, iteration: 258467
loss: 0.985852062702179,grad_norm: 0.8068714402663283, iteration: 258468
loss: 0.9956483840942383,grad_norm: 0.9999990466425374, iteration: 258469
loss: 0.9940981864929199,grad_norm: 0.9999993943135699, iteration: 258470
loss: 0.9849924445152283,grad_norm: 0.9999990231133333, iteration: 258471
loss: 0.9658697247505188,grad_norm: 0.9568782931910088, iteration: 258472
loss: 1.0236130952835083,grad_norm: 0.7630736984734191, iteration: 258473
loss: 1.257436752319336,grad_norm: 0.9999998378744434, iteration: 258474
loss: 1.0089634656906128,grad_norm: 0.9999998436570137, iteration: 258475
loss: 0.9772593379020691,grad_norm: 0.8532317102111521, iteration: 258476
loss: 0.992681086063385,grad_norm: 0.9999991668461946, iteration: 258477
loss: 1.0093759298324585,grad_norm: 0.9399868225036196, iteration: 258478
loss: 0.9869336485862732,grad_norm: 0.8654075373107589, iteration: 258479
loss: 0.9759482145309448,grad_norm: 0.891437700024903, iteration: 258480
loss: 1.0274790525436401,grad_norm: 0.902031734585884, iteration: 258481
loss: 0.9978077411651611,grad_norm: 0.7750006640693987, iteration: 258482
loss: 1.2861695289611816,grad_norm: 0.9999995727359388, iteration: 258483
loss: 0.99221271276474,grad_norm: 0.9422177746267963, iteration: 258484
loss: 1.0227677822113037,grad_norm: 0.9999991574328698, iteration: 258485
loss: 1.0079349279403687,grad_norm: 0.9999991176247678, iteration: 258486
loss: 1.0164519548416138,grad_norm: 0.8567359436779507, iteration: 258487
loss: 0.9985559582710266,grad_norm: 0.9205486738659384, iteration: 258488
loss: 1.0725877285003662,grad_norm: 0.999999373958244, iteration: 258489
loss: 1.02274489402771,grad_norm: 0.9999992363129246, iteration: 258490
loss: 1.0097988843917847,grad_norm: 0.7465083335624472, iteration: 258491
loss: 0.9977597594261169,grad_norm: 0.9541565913673279, iteration: 258492
loss: 0.9942601323127747,grad_norm: 0.8189916420507962, iteration: 258493
loss: 1.013613224029541,grad_norm: 0.963320685854191, iteration: 258494
loss: 0.9829197525978088,grad_norm: 0.9092895793576146, iteration: 258495
loss: 1.0261725187301636,grad_norm: 0.9999991272018107, iteration: 258496
loss: 1.029597520828247,grad_norm: 0.9245563295997995, iteration: 258497
loss: 1.0152851343154907,grad_norm: 0.8554088538182251, iteration: 258498
loss: 1.0894523859024048,grad_norm: 0.9013199160831041, iteration: 258499
loss: 0.9861430525779724,grad_norm: 0.7458411696889145, iteration: 258500
loss: 0.9535275101661682,grad_norm: 0.9999991886462146, iteration: 258501
loss: 0.9887741804122925,grad_norm: 0.9999991565308632, iteration: 258502
loss: 1.0041611194610596,grad_norm: 0.999999193683502, iteration: 258503
loss: 1.0031927824020386,grad_norm: 0.9910555282747603, iteration: 258504
loss: 1.0010608434677124,grad_norm: 0.8054537602202005, iteration: 258505
loss: 1.0029139518737793,grad_norm: 0.9999996600562132, iteration: 258506
loss: 1.0210747718811035,grad_norm: 0.8837644892205024, iteration: 258507
loss: 0.9817296862602234,grad_norm: 0.9999991209561515, iteration: 258508
loss: 0.99197918176651,grad_norm: 0.9398885665351657, iteration: 258509
loss: 0.9972184300422668,grad_norm: 0.8223137257684149, iteration: 258510
loss: 0.9812227487564087,grad_norm: 0.9999997479966041, iteration: 258511
loss: 0.9812504649162292,grad_norm: 0.8165689107487296, iteration: 258512
loss: 1.0018683671951294,grad_norm: 0.7844624589876324, iteration: 258513
loss: 1.0482804775238037,grad_norm: 0.9999995964292461, iteration: 258514
loss: 0.9991787075996399,grad_norm: 0.8534232378999509, iteration: 258515
loss: 0.9823516011238098,grad_norm: 0.9141070308799454, iteration: 258516
loss: 0.9816952347755432,grad_norm: 0.906842443068828, iteration: 258517
loss: 1.0239779949188232,grad_norm: 0.8258509720916447, iteration: 258518
loss: 0.946658730506897,grad_norm: 0.9999991709278152, iteration: 258519
loss: 0.981151819229126,grad_norm: 0.8195542704082783, iteration: 258520
loss: 1.0113639831542969,grad_norm: 0.9118013063819008, iteration: 258521
loss: 0.9646863341331482,grad_norm: 0.8471323582694952, iteration: 258522
loss: 1.0308281183242798,grad_norm: 0.9999990948448535, iteration: 258523
loss: 0.9739335775375366,grad_norm: 0.8506784991448182, iteration: 258524
loss: 0.9761319160461426,grad_norm: 0.9999992710876509, iteration: 258525
loss: 0.9824454188346863,grad_norm: 0.8551841519662712, iteration: 258526
loss: 0.9905818700790405,grad_norm: 0.9999990600114298, iteration: 258527
loss: 1.0282734632492065,grad_norm: 0.9229400911013776, iteration: 258528
loss: 0.96589595079422,grad_norm: 0.8793641923763906, iteration: 258529
loss: 0.9697354435920715,grad_norm: 0.8698077946306624, iteration: 258530
loss: 1.0170762538909912,grad_norm: 0.7821361752315549, iteration: 258531
loss: 0.9875625967979431,grad_norm: 0.9999991137629969, iteration: 258532
loss: 0.9954257011413574,grad_norm: 0.879402240033816, iteration: 258533
loss: 1.0211409330368042,grad_norm: 0.8994006416048845, iteration: 258534
loss: 1.0214754343032837,grad_norm: 0.7857090209038436, iteration: 258535
loss: 1.0231717824935913,grad_norm: 0.9149955760107192, iteration: 258536
loss: 0.9990023970603943,grad_norm: 0.9999997751463485, iteration: 258537
loss: 1.0409626960754395,grad_norm: 0.9999997898023308, iteration: 258538
loss: 1.0064356327056885,grad_norm: 0.8837383108216174, iteration: 258539
loss: 1.0057568550109863,grad_norm: 0.7496307749328742, iteration: 258540
loss: 1.0331162214279175,grad_norm: 0.9616951028006834, iteration: 258541
loss: 0.9728954434394836,grad_norm: 0.9489050829670377, iteration: 258542
loss: 1.0142316818237305,grad_norm: 0.9120384825696971, iteration: 258543
loss: 0.9753668904304504,grad_norm: 0.9225521312006462, iteration: 258544
loss: 1.0540639162063599,grad_norm: 0.9597136245799802, iteration: 258545
loss: 0.9953876733779907,grad_norm: 0.8094363594657497, iteration: 258546
loss: 1.0156017541885376,grad_norm: 0.7848046095345346, iteration: 258547
loss: 1.0450986623764038,grad_norm: 0.8427877428339065, iteration: 258548
loss: 0.9656224846839905,grad_norm: 0.8583392842817038, iteration: 258549
loss: 0.9620819091796875,grad_norm: 0.8155994119948602, iteration: 258550
loss: 1.0359994173049927,grad_norm: 0.9676303434098088, iteration: 258551
loss: 1.005259394645691,grad_norm: 0.9999991358478305, iteration: 258552
loss: 1.040401816368103,grad_norm: 0.8200633325994657, iteration: 258553
loss: 1.0040961503982544,grad_norm: 0.8652535012814474, iteration: 258554
loss: 1.070356011390686,grad_norm: 0.9999995679038723, iteration: 258555
loss: 1.0252478122711182,grad_norm: 0.9541606990210166, iteration: 258556
loss: 1.0649996995925903,grad_norm: 0.9999992555561288, iteration: 258557
loss: 0.9861872792243958,grad_norm: 0.8661014659314876, iteration: 258558
loss: 1.0243629217147827,grad_norm: 0.8457084897901803, iteration: 258559
loss: 0.9516890048980713,grad_norm: 0.8924810267215817, iteration: 258560
loss: 1.049890160560608,grad_norm: 0.999999570897924, iteration: 258561
loss: 1.0922188758850098,grad_norm: 0.9999991954094012, iteration: 258562
loss: 1.0117453336715698,grad_norm: 0.8994354099478947, iteration: 258563
loss: 1.009191632270813,grad_norm: 0.966185271617063, iteration: 258564
loss: 1.0045148134231567,grad_norm: 0.9827333227463066, iteration: 258565
loss: 1.0710718631744385,grad_norm: 0.9999992316448572, iteration: 258566
loss: 0.9896377921104431,grad_norm: 0.8107935084856746, iteration: 258567
loss: 1.0133917331695557,grad_norm: 0.9401817105663209, iteration: 258568
loss: 1.1240627765655518,grad_norm: 0.9999994107935009, iteration: 258569
loss: 1.0812407732009888,grad_norm: 0.9999992764967037, iteration: 258570
loss: 0.9966951012611389,grad_norm: 0.8200991150433523, iteration: 258571
loss: 1.0341379642486572,grad_norm: 0.9801022129310981, iteration: 258572
loss: 1.0101135969161987,grad_norm: 0.8097484703105106, iteration: 258573
loss: 1.3456165790557861,grad_norm: 0.9999996311481768, iteration: 258574
loss: 1.007459282875061,grad_norm: 0.8787446264710193, iteration: 258575
loss: 0.9963775873184204,grad_norm: 0.9999992423897657, iteration: 258576
loss: 0.9636650681495667,grad_norm: 0.9877824160380998, iteration: 258577
loss: 1.0125285387039185,grad_norm: 0.9667349575879335, iteration: 258578
loss: 0.9982782006263733,grad_norm: 0.9999991515464653, iteration: 258579
loss: 1.0364993810653687,grad_norm: 0.9999991014430949, iteration: 258580
loss: 1.0016709566116333,grad_norm: 0.7396397182412552, iteration: 258581
loss: 1.0164076089859009,grad_norm: 0.9958538224573088, iteration: 258582
loss: 1.0785789489746094,grad_norm: 0.9999998718997436, iteration: 258583
loss: 1.000350832939148,grad_norm: 0.8519968374783342, iteration: 258584
loss: 1.1726187467575073,grad_norm: 0.9999999665383827, iteration: 258585
loss: 1.0108283758163452,grad_norm: 0.8255730177201077, iteration: 258586
loss: 0.9700499176979065,grad_norm: 0.880486502953443, iteration: 258587
loss: 1.018776535987854,grad_norm: 0.9999990563098075, iteration: 258588
loss: 0.9864991307258606,grad_norm: 0.8597237627371671, iteration: 258589
loss: 1.0005112886428833,grad_norm: 0.9216397385633294, iteration: 258590
loss: 1.0172706842422485,grad_norm: 0.999999118616414, iteration: 258591
loss: 1.0199623107910156,grad_norm: 0.9999992764432036, iteration: 258592
loss: 0.9867023229598999,grad_norm: 0.9999990199451556, iteration: 258593
loss: 1.0170984268188477,grad_norm: 0.9999991956614356, iteration: 258594
loss: 1.0120735168457031,grad_norm: 0.9999990698195079, iteration: 258595
loss: 0.9947559237480164,grad_norm: 0.9999991935087346, iteration: 258596
loss: 0.988479495048523,grad_norm: 0.8575830004176168, iteration: 258597
loss: 1.109381914138794,grad_norm: 0.9999994255354762, iteration: 258598
loss: 1.0054497718811035,grad_norm: 0.9999990400581275, iteration: 258599
loss: 1.014628291130066,grad_norm: 0.7917450522897371, iteration: 258600
loss: 0.9851858019828796,grad_norm: 0.9999991728839096, iteration: 258601
loss: 1.003250002861023,grad_norm: 0.8109936018782683, iteration: 258602
loss: 0.9809263348579407,grad_norm: 0.8886217897706781, iteration: 258603
loss: 1.0568293333053589,grad_norm: 0.9694093715644916, iteration: 258604
loss: 0.9881200194358826,grad_norm: 0.815299219975817, iteration: 258605
loss: 1.005785584449768,grad_norm: 0.8194128390626402, iteration: 258606
loss: 1.1010795831680298,grad_norm: 0.9999993225285944, iteration: 258607
loss: 1.1752643585205078,grad_norm: 0.999999708253974, iteration: 258608
loss: 0.9970639944076538,grad_norm: 0.9098903891625273, iteration: 258609
loss: 1.0067124366760254,grad_norm: 0.7878486328084189, iteration: 258610
loss: 0.9733568429946899,grad_norm: 0.8954908916665848, iteration: 258611
loss: 0.990097165107727,grad_norm: 0.7756208398026859, iteration: 258612
loss: 1.0274379253387451,grad_norm: 0.9999999919343551, iteration: 258613
loss: 1.0647544860839844,grad_norm: 0.9476767427490305, iteration: 258614
loss: 1.0326006412506104,grad_norm: 0.965581854005097, iteration: 258615
loss: 1.0047868490219116,grad_norm: 0.8894355134551843, iteration: 258616
loss: 1.0013564825057983,grad_norm: 0.9999992639233042, iteration: 258617
loss: 1.0152788162231445,grad_norm: 0.7484304387176551, iteration: 258618
loss: 1.0564104318618774,grad_norm: 0.9009025217580124, iteration: 258619
loss: 1.008007287979126,grad_norm: 0.9999990906064505, iteration: 258620
loss: 1.0068117380142212,grad_norm: 0.9999991967342555, iteration: 258621
loss: 0.9941678047180176,grad_norm: 0.9979385014897117, iteration: 258622
loss: 1.238623857498169,grad_norm: 0.9999999824315208, iteration: 258623
loss: 0.9839275479316711,grad_norm: 0.9999990453102442, iteration: 258624
loss: 0.9998378753662109,grad_norm: 0.9999992195047523, iteration: 258625
loss: 1.0202629566192627,grad_norm: 0.9999992052098635, iteration: 258626
loss: 1.002727746963501,grad_norm: 0.9415123354881905, iteration: 258627
loss: 1.027373194694519,grad_norm: 0.999999320844319, iteration: 258628
loss: 1.014924168586731,grad_norm: 0.8608544363095025, iteration: 258629
loss: 1.0794873237609863,grad_norm: 0.8869639621233255, iteration: 258630
loss: 0.9866834878921509,grad_norm: 0.9466638565369566, iteration: 258631
loss: 1.0321301221847534,grad_norm: 0.816730276695156, iteration: 258632
loss: 0.967816948890686,grad_norm: 0.7694488706464911, iteration: 258633
loss: 1.009057641029358,grad_norm: 0.905613123167037, iteration: 258634
loss: 1.038246750831604,grad_norm: 0.9999994455596072, iteration: 258635
loss: 0.9714770913124084,grad_norm: 0.7477932009837264, iteration: 258636
loss: 1.0023603439331055,grad_norm: 0.7331173646254447, iteration: 258637
loss: 1.0069936513900757,grad_norm: 0.9999990170931449, iteration: 258638
loss: 0.9930064082145691,grad_norm: 0.9999997475081849, iteration: 258639
loss: 1.0056129693984985,grad_norm: 0.9999990335420547, iteration: 258640
loss: 1.0096054077148438,grad_norm: 0.9999990381661308, iteration: 258641
loss: 0.9673095345497131,grad_norm: 0.786609704703881, iteration: 258642
loss: 1.0051511526107788,grad_norm: 0.969220047355823, iteration: 258643
loss: 1.109555721282959,grad_norm: 0.9999989727667248, iteration: 258644
loss: 1.087472915649414,grad_norm: 0.8596699347856883, iteration: 258645
loss: 1.1294875144958496,grad_norm: 0.9999993589907537, iteration: 258646
loss: 1.0459380149841309,grad_norm: 0.9999991513060106, iteration: 258647
loss: 1.0238910913467407,grad_norm: 0.9852497216731244, iteration: 258648
loss: 1.103061318397522,grad_norm: 0.9999997439848718, iteration: 258649
loss: 1.0083179473876953,grad_norm: 0.8305267059310198, iteration: 258650
loss: 1.098488688468933,grad_norm: 0.9999994157708145, iteration: 258651
loss: 0.9880774617195129,grad_norm: 0.8999187278670061, iteration: 258652
loss: 1.0091968774795532,grad_norm: 0.9999991500455055, iteration: 258653
loss: 0.9464014768600464,grad_norm: 0.8767959306792871, iteration: 258654
loss: 0.9800460338592529,grad_norm: 0.9379992915502757, iteration: 258655
loss: 0.9943715333938599,grad_norm: 0.854831250067262, iteration: 258656
loss: 1.014557123184204,grad_norm: 0.866910186114107, iteration: 258657
loss: 1.050218105316162,grad_norm: 0.9999990887523086, iteration: 258658
loss: 0.9634644389152527,grad_norm: 0.7853268160588409, iteration: 258659
loss: 0.9946057200431824,grad_norm: 0.8480007844030945, iteration: 258660
loss: 0.9762856364250183,grad_norm: 0.7625262119197235, iteration: 258661
loss: 0.9924138188362122,grad_norm: 0.8305017226474163, iteration: 258662
loss: 1.0119484663009644,grad_norm: 0.9999993364657292, iteration: 258663
loss: 0.9871233105659485,grad_norm: 0.9999989774481102, iteration: 258664
loss: 0.9861502051353455,grad_norm: 0.9594702122707982, iteration: 258665
loss: 1.0882322788238525,grad_norm: 0.9999990802023918, iteration: 258666
loss: 0.9670356512069702,grad_norm: 0.7354039757660105, iteration: 258667
loss: 1.0085159540176392,grad_norm: 0.9335732510930261, iteration: 258668
loss: 1.0429452657699585,grad_norm: 0.9999990879887948, iteration: 258669
loss: 1.067367434501648,grad_norm: 0.9999994325416046, iteration: 258670
loss: 0.9931052327156067,grad_norm: 0.9299630467457216, iteration: 258671
loss: 1.0213576555252075,grad_norm: 0.7878977138942441, iteration: 258672
loss: 1.030198335647583,grad_norm: 0.9329331282471879, iteration: 258673
loss: 1.0115573406219482,grad_norm: 0.8390435870964355, iteration: 258674
loss: 1.0226154327392578,grad_norm: 0.8488400610878107, iteration: 258675
loss: 1.0181121826171875,grad_norm: 0.9999991497484432, iteration: 258676
loss: 1.0144556760787964,grad_norm: 0.938503721165463, iteration: 258677
loss: 0.9606882333755493,grad_norm: 0.8542802772911495, iteration: 258678
loss: 0.9925166964530945,grad_norm: 0.9432308143983142, iteration: 258679
loss: 1.049747109413147,grad_norm: 0.99999911793026, iteration: 258680
loss: 0.9807525873184204,grad_norm: 0.9999994879160647, iteration: 258681
loss: 1.031322717666626,grad_norm: 0.9999995055703738, iteration: 258682
loss: 1.0717514753341675,grad_norm: 0.9999992258991651, iteration: 258683
loss: 1.0261709690093994,grad_norm: 0.9999997570975945, iteration: 258684
loss: 0.9934943914413452,grad_norm: 0.9650229775809637, iteration: 258685
loss: 0.9929717779159546,grad_norm: 0.9999991745610665, iteration: 258686
loss: 0.9880771636962891,grad_norm: 0.8477722966555021, iteration: 258687
loss: 0.9762429594993591,grad_norm: 0.9999993640518073, iteration: 258688
loss: 0.9901342391967773,grad_norm: 0.8595490496404957, iteration: 258689
loss: 1.013311505317688,grad_norm: 0.9193619884155323, iteration: 258690
loss: 0.9547698497772217,grad_norm: 0.7783188765175243, iteration: 258691
loss: 1.049585223197937,grad_norm: 0.9999990934066318, iteration: 258692
loss: 1.021510362625122,grad_norm: 0.9999994529487054, iteration: 258693
loss: 1.062414288520813,grad_norm: 0.8491883796904944, iteration: 258694
loss: 1.0000911951065063,grad_norm: 0.865924653311267, iteration: 258695
loss: 0.9866282939910889,grad_norm: 0.9069389424390135, iteration: 258696
loss: 0.9830877184867859,grad_norm: 0.956854447798107, iteration: 258697
loss: 0.9541131258010864,grad_norm: 0.9688854756977142, iteration: 258698
loss: 0.9986804127693176,grad_norm: 0.9999989569314064, iteration: 258699
loss: 1.018513560295105,grad_norm: 0.8475395086057008, iteration: 258700
loss: 1.0083214044570923,grad_norm: 0.9999990707575312, iteration: 258701
loss: 1.0164859294891357,grad_norm: 0.8288243685198893, iteration: 258702
loss: 1.0355950593948364,grad_norm: 0.8961493631278173, iteration: 258703
loss: 0.9870535135269165,grad_norm: 0.795827965733622, iteration: 258704
loss: 1.0098236799240112,grad_norm: 0.8062787995278089, iteration: 258705
loss: 0.980390191078186,grad_norm: 0.879218573999769, iteration: 258706
loss: 0.9997454881668091,grad_norm: 0.9749832103349577, iteration: 258707
loss: 0.9831435680389404,grad_norm: 0.999999253089624, iteration: 258708
loss: 0.9990307092666626,grad_norm: 0.8385734428794756, iteration: 258709
loss: 0.9924994111061096,grad_norm: 0.8574372863308384, iteration: 258710
loss: 1.016061782836914,grad_norm: 0.9577401822673153, iteration: 258711
loss: 1.0947374105453491,grad_norm: 0.9999995742111218, iteration: 258712
loss: 1.0033553838729858,grad_norm: 0.784046643746098, iteration: 258713
loss: 1.1213613748550415,grad_norm: 0.9999992371384906, iteration: 258714
loss: 1.1030826568603516,grad_norm: 0.9999998670038135, iteration: 258715
loss: 1.0276061296463013,grad_norm: 0.9999990258931237, iteration: 258716
loss: 1.0545403957366943,grad_norm: 0.8648528082200905, iteration: 258717
loss: 0.9886412620544434,grad_norm: 0.7467879398509389, iteration: 258718
loss: 1.0653027296066284,grad_norm: 0.9999999868506018, iteration: 258719
loss: 0.9920058250427246,grad_norm: 0.9999990483334751, iteration: 258720
loss: 1.1097198724746704,grad_norm: 0.9999998336298898, iteration: 258721
loss: 0.9840555191040039,grad_norm: 0.9422503515052858, iteration: 258722
loss: 1.0309268236160278,grad_norm: 0.9613372225325653, iteration: 258723
loss: 1.0423946380615234,grad_norm: 0.9999996377466411, iteration: 258724
loss: 0.9921767711639404,grad_norm: 0.83051346890884, iteration: 258725
loss: 0.9649901986122131,grad_norm: 0.8616112097760572, iteration: 258726
loss: 0.9751750826835632,grad_norm: 0.9999995013092977, iteration: 258727
loss: 0.97991544008255,grad_norm: 0.8421490932616169, iteration: 258728
loss: 1.0010414123535156,grad_norm: 0.999999142037299, iteration: 258729
loss: 0.9994691610336304,grad_norm: 0.9388431430923445, iteration: 258730
loss: 1.015400767326355,grad_norm: 0.9009108991053434, iteration: 258731
loss: 0.9784249067306519,grad_norm: 0.9999990665513665, iteration: 258732
loss: 1.0076274871826172,grad_norm: 0.9999991157449146, iteration: 258733
loss: 1.0112311840057373,grad_norm: 0.9999989841379133, iteration: 258734
loss: 1.031427025794983,grad_norm: 0.7932261336838882, iteration: 258735
loss: 0.9794079065322876,grad_norm: 0.999998914310245, iteration: 258736
loss: 1.1392014026641846,grad_norm: 0.8440914356886882, iteration: 258737
loss: 1.0001498460769653,grad_norm: 0.793103711843943, iteration: 258738
loss: 1.033020257949829,grad_norm: 0.999999536341811, iteration: 258739
loss: 1.0005892515182495,grad_norm: 0.9999990608529791, iteration: 258740
loss: 1.033143401145935,grad_norm: 0.9999996452602112, iteration: 258741
loss: 1.092438817024231,grad_norm: 0.9999996238802362, iteration: 258742
loss: 0.980651319026947,grad_norm: 0.9984551055233322, iteration: 258743
loss: 1.0642898082733154,grad_norm: 0.9999991995905886, iteration: 258744
loss: 1.094096302986145,grad_norm: 0.999999664841871, iteration: 258745
loss: 1.0234532356262207,grad_norm: 0.9794666178400407, iteration: 258746
loss: 1.0719382762908936,grad_norm: 0.9999993138323323, iteration: 258747
loss: 1.0289232730865479,grad_norm: 0.9064587993197688, iteration: 258748
loss: 1.0580744743347168,grad_norm: 0.9999998224511286, iteration: 258749
loss: 1.0942881107330322,grad_norm: 0.9492196812217656, iteration: 258750
loss: 1.0864745378494263,grad_norm: 0.9999998418277026, iteration: 258751
loss: 0.9847361445426941,grad_norm: 0.7814484571675109, iteration: 258752
loss: 0.9688979983329773,grad_norm: 0.9999991466855548, iteration: 258753
loss: 1.0110704898834229,grad_norm: 0.9524126711403378, iteration: 258754
loss: 1.0266001224517822,grad_norm: 0.7519105513902296, iteration: 258755
loss: 1.0112030506134033,grad_norm: 0.7735806510873886, iteration: 258756
loss: 1.0442334413528442,grad_norm: 0.8312853386738752, iteration: 258757
loss: 1.0131667852401733,grad_norm: 0.8982747928188641, iteration: 258758
loss: 1.0496548414230347,grad_norm: 0.9945024544949754, iteration: 258759
loss: 0.9985589385032654,grad_norm: 0.8285437263928205, iteration: 258760
loss: 1.1727391481399536,grad_norm: 0.9999992677762041, iteration: 258761
loss: 1.0480799674987793,grad_norm: 0.9999994141190165, iteration: 258762
loss: 1.0966122150421143,grad_norm: 0.9999991252172434, iteration: 258763
loss: 1.0715675354003906,grad_norm: 0.999999117844986, iteration: 258764
loss: 0.9943440556526184,grad_norm: 0.8700500836831712, iteration: 258765
loss: 1.0004554986953735,grad_norm: 0.9999990308115965, iteration: 258766
loss: 1.069786787033081,grad_norm: 0.9999993158164673, iteration: 258767
loss: 0.9705449938774109,grad_norm: 0.9404919643098503, iteration: 258768
loss: 1.0270930528640747,grad_norm: 0.9999992378567006, iteration: 258769
loss: 0.978976309299469,grad_norm: 0.8779838984185854, iteration: 258770
loss: 1.0060566663742065,grad_norm: 0.9982349599818171, iteration: 258771
loss: 1.014208436012268,grad_norm: 0.9999991021264357, iteration: 258772
loss: 1.067559838294983,grad_norm: 0.9999996284859624, iteration: 258773
loss: 1.0668776035308838,grad_norm: 0.9999995227783465, iteration: 258774
loss: 0.9876795411109924,grad_norm: 0.8086082579153925, iteration: 258775
loss: 1.0036736726760864,grad_norm: 0.8886437703316721, iteration: 258776
loss: 0.9885092377662659,grad_norm: 0.9340952661645472, iteration: 258777
loss: 1.0655437707901,grad_norm: 1.0000001325430208, iteration: 258778
loss: 1.023391604423523,grad_norm: 0.999999158084595, iteration: 258779
loss: 0.995781660079956,grad_norm: 0.9999992076060862, iteration: 258780
loss: 1.046891450881958,grad_norm: 0.8789815308175771, iteration: 258781
loss: 0.9907543659210205,grad_norm: 0.9099542958486536, iteration: 258782
loss: 1.0568912029266357,grad_norm: 0.780982231427342, iteration: 258783
loss: 1.0057135820388794,grad_norm: 0.9999991129750883, iteration: 258784
loss: 0.986003041267395,grad_norm: 0.9999997153883545, iteration: 258785
loss: 0.9596163034439087,grad_norm: 0.7595485570624441, iteration: 258786
loss: 0.9951438903808594,grad_norm: 0.9999991551108375, iteration: 258787
loss: 1.0482558012008667,grad_norm: 0.9999992463509625, iteration: 258788
loss: 0.9702669978141785,grad_norm: 0.7168809463651122, iteration: 258789
loss: 1.0183111429214478,grad_norm: 0.9999997475509287, iteration: 258790
loss: 1.005355715751648,grad_norm: 0.9999998637071043, iteration: 258791
loss: 0.9937578439712524,grad_norm: 0.9999993017126576, iteration: 258792
loss: 1.0292302370071411,grad_norm: 0.7868411992165988, iteration: 258793
loss: 0.9876031279563904,grad_norm: 0.9999991119528995, iteration: 258794
loss: 1.0110725164413452,grad_norm: 0.9999991871661773, iteration: 258795
loss: 1.0542783737182617,grad_norm: 0.7456447312490634, iteration: 258796
loss: 1.048898696899414,grad_norm: 0.999999188737776, iteration: 258797
loss: 1.0175120830535889,grad_norm: 0.9999993763241851, iteration: 258798
loss: 1.0398972034454346,grad_norm: 0.8873974429550511, iteration: 258799
loss: 1.2712684869766235,grad_norm: 0.9999998368601933, iteration: 258800
loss: 1.1132937669754028,grad_norm: 0.9999993969056772, iteration: 258801
loss: 0.9702651500701904,grad_norm: 0.8011336382140335, iteration: 258802
loss: 1.1890387535095215,grad_norm: 0.9999999636752845, iteration: 258803
loss: 1.0138152837753296,grad_norm: 0.999999197050409, iteration: 258804
loss: 1.0532697439193726,grad_norm: 0.8987406278560452, iteration: 258805
loss: 1.2702082395553589,grad_norm: 0.9999991497055312, iteration: 258806
loss: 1.2743643522262573,grad_norm: 0.9999996459107963, iteration: 258807
loss: 0.985802948474884,grad_norm: 0.9081339559126773, iteration: 258808
loss: 1.0857118368148804,grad_norm: 1.0000000768806645, iteration: 258809
loss: 1.100041389465332,grad_norm: 0.9999991033520818, iteration: 258810
loss: 1.296661138534546,grad_norm: 0.999999388976537, iteration: 258811
loss: 1.224129319190979,grad_norm: 0.999999268578817, iteration: 258812
loss: 1.6476547718048096,grad_norm: 0.9999995546151166, iteration: 258813
loss: 1.330247402191162,grad_norm: 0.999999799216601, iteration: 258814
loss: 1.0471324920654297,grad_norm: 0.9999998746046723, iteration: 258815
loss: 1.4630268812179565,grad_norm: 0.9999995636128359, iteration: 258816
loss: 1.0129051208496094,grad_norm: 0.839144717389031, iteration: 258817
loss: 1.0457746982574463,grad_norm: 0.9999992370905904, iteration: 258818
loss: 1.0678458213806152,grad_norm: 0.9999991183819857, iteration: 258819
loss: 1.2328811883926392,grad_norm: 0.9999996631323013, iteration: 258820
loss: 1.2697222232818604,grad_norm: 0.9999996261499602, iteration: 258821
loss: 1.3955161571502686,grad_norm: 0.9999997859340931, iteration: 258822
loss: 1.1531498432159424,grad_norm: 0.9335549532984763, iteration: 258823
loss: 1.059369444847107,grad_norm: 0.9999994752976645, iteration: 258824
loss: 1.1162300109863281,grad_norm: 0.9999993500537331, iteration: 258825
loss: 1.1497067213058472,grad_norm: 0.9999997948382473, iteration: 258826
loss: 1.3923996686935425,grad_norm: 0.9999998606445996, iteration: 258827
loss: 1.1695432662963867,grad_norm: 0.9999998674773874, iteration: 258828
loss: 1.0209370851516724,grad_norm: 0.9999992145351291, iteration: 258829
loss: 1.1594988107681274,grad_norm: 0.9999996093137732, iteration: 258830
loss: 1.1190593242645264,grad_norm: 0.9999999063226113, iteration: 258831
loss: 1.1113476753234863,grad_norm: 0.9999998844902283, iteration: 258832
loss: 1.216350793838501,grad_norm: 0.9999996785688474, iteration: 258833
loss: 1.03400456905365,grad_norm: 0.9999995093957313, iteration: 258834
loss: 1.093700647354126,grad_norm: 0.9999992285982404, iteration: 258835
loss: 1.0457618236541748,grad_norm: 0.9999991398903249, iteration: 258836
loss: 1.00313138961792,grad_norm: 0.9969119290476456, iteration: 258837
loss: 1.0661808252334595,grad_norm: 0.9999996844341923, iteration: 258838
loss: 1.130246639251709,grad_norm: 0.9999996238515969, iteration: 258839
loss: 1.2269337177276611,grad_norm: 0.9999998635994151, iteration: 258840
loss: 1.1203515529632568,grad_norm: 0.9999999812687251, iteration: 258841
loss: 1.0536787509918213,grad_norm: 0.999999590081776, iteration: 258842
loss: 1.0925209522247314,grad_norm: 0.9999998416046455, iteration: 258843
loss: 1.0775501728057861,grad_norm: 0.9999989837054424, iteration: 258844
loss: 1.087679386138916,grad_norm: 0.9999992007465273, iteration: 258845
loss: 0.9937898516654968,grad_norm: 0.9999996632135036, iteration: 258846
loss: 1.0378156900405884,grad_norm: 0.9999994967871246, iteration: 258847
loss: 1.0614153146743774,grad_norm: 0.8441635144081371, iteration: 258848
loss: 1.0427871942520142,grad_norm: 0.9999990443703066, iteration: 258849
loss: 1.0678991079330444,grad_norm: 0.9999993193943753, iteration: 258850
loss: 1.100492000579834,grad_norm: 0.9999999601565881, iteration: 258851
loss: 1.0683423280715942,grad_norm: 0.9999993200486919, iteration: 258852
loss: 0.9881476759910583,grad_norm: 0.8245724898228486, iteration: 258853
loss: 1.0138171911239624,grad_norm: 0.8212198728551438, iteration: 258854
loss: 1.0242754220962524,grad_norm: 0.9875178473846584, iteration: 258855
loss: 1.0335381031036377,grad_norm: 0.8321081808855121, iteration: 258856
loss: 1.0290101766586304,grad_norm: 0.9999991601423748, iteration: 258857
loss: 0.9722181558609009,grad_norm: 0.8986021478301676, iteration: 258858
loss: 1.0368106365203857,grad_norm: 0.999999892051561, iteration: 258859
loss: 0.9769418835639954,grad_norm: 0.9999990317964249, iteration: 258860
loss: 1.0111552476882935,grad_norm: 0.7815572425528583, iteration: 258861
loss: 0.9923120141029358,grad_norm: 0.9769772288044132, iteration: 258862
loss: 1.1871689558029175,grad_norm: 0.9999995990610092, iteration: 258863
loss: 0.9802458882331848,grad_norm: 0.9999992659788137, iteration: 258864
loss: 1.0303376913070679,grad_norm: 0.9999994508608527, iteration: 258865
loss: 1.01662278175354,grad_norm: 0.832888387035684, iteration: 258866
loss: 1.1299954652786255,grad_norm: 0.9999995929375349, iteration: 258867
loss: 1.0049654245376587,grad_norm: 0.9271844424255247, iteration: 258868
loss: 1.0434588193893433,grad_norm: 0.9999996087120601, iteration: 258869
loss: 1.020127296447754,grad_norm: 0.9999991669297607, iteration: 258870
loss: 0.9922422170639038,grad_norm: 0.8917285856849808, iteration: 258871
loss: 1.0225656032562256,grad_norm: 0.9999994344050243, iteration: 258872
loss: 1.090779423713684,grad_norm: 0.9019863647950845, iteration: 258873
loss: 0.9833585619926453,grad_norm: 0.9999992397815779, iteration: 258874
loss: 1.044617772102356,grad_norm: 0.9999990071654783, iteration: 258875
loss: 0.9830979108810425,grad_norm: 0.8254106802304245, iteration: 258876
loss: 1.0030370950698853,grad_norm: 0.9247143796569653, iteration: 258877
loss: 0.9921226501464844,grad_norm: 0.8877407878284553, iteration: 258878
loss: 0.985813319683075,grad_norm: 0.8904996073292392, iteration: 258879
loss: 1.1192493438720703,grad_norm: 0.971548225502091, iteration: 258880
loss: 0.9663198590278625,grad_norm: 0.7972586228832304, iteration: 258881
loss: 0.987061083316803,grad_norm: 0.9711545753075785, iteration: 258882
loss: 1.0026763677597046,grad_norm: 0.9387324149258679, iteration: 258883
loss: 0.9865651726722717,grad_norm: 0.9888420536274106, iteration: 258884
loss: 0.996608316898346,grad_norm: 0.9999991982800442, iteration: 258885
loss: 0.995849072933197,grad_norm: 0.8114401588575259, iteration: 258886
loss: 1.0079368352890015,grad_norm: 0.8855522182200638, iteration: 258887
loss: 1.0159401893615723,grad_norm: 0.9428946243011278, iteration: 258888
loss: 1.0170531272888184,grad_norm: 0.9842472287500517, iteration: 258889
loss: 0.9591189026832581,grad_norm: 0.9509480240281155, iteration: 258890
loss: 0.9501950144767761,grad_norm: 0.817173099377543, iteration: 258891
loss: 1.012622356414795,grad_norm: 0.8237796330968367, iteration: 258892
loss: 1.0063667297363281,grad_norm: 0.9999993833818012, iteration: 258893
loss: 1.0029609203338623,grad_norm: 0.9204852068951185, iteration: 258894
loss: 1.0071992874145508,grad_norm: 0.9999995254754235, iteration: 258895
loss: 1.014448881149292,grad_norm: 0.8457493879648661, iteration: 258896
loss: 0.9876359105110168,grad_norm: 0.9365429681010371, iteration: 258897
loss: 1.0020514726638794,grad_norm: 0.9999992153188278, iteration: 258898
loss: 0.957909882068634,grad_norm: 0.8502465400374407, iteration: 258899
loss: 1.0116983652114868,grad_norm: 0.8592435834292478, iteration: 258900
loss: 0.9853785037994385,grad_norm: 0.8398551689042845, iteration: 258901
loss: 0.989143967628479,grad_norm: 0.8859622566832531, iteration: 258902
loss: 1.0878318548202515,grad_norm: 0.9999998744460158, iteration: 258903
loss: 1.0071004629135132,grad_norm: 0.9999992620918928, iteration: 258904
loss: 1.0253403186798096,grad_norm: 0.9999993720131278, iteration: 258905
loss: 1.0147194862365723,grad_norm: 0.9999990189441703, iteration: 258906
loss: 0.990476667881012,grad_norm: 0.9941909925322902, iteration: 258907
loss: 1.0505666732788086,grad_norm: 0.8589712949970191, iteration: 258908
loss: 1.0196841955184937,grad_norm: 0.9999990880941748, iteration: 258909
loss: 0.9987884163856506,grad_norm: 0.99780490129467, iteration: 258910
loss: 1.023950457572937,grad_norm: 0.8579194371116551, iteration: 258911
loss: 1.0415599346160889,grad_norm: 0.78849659616445, iteration: 258912
loss: 1.025761604309082,grad_norm: 0.8097329656975499, iteration: 258913
loss: 0.9934644103050232,grad_norm: 0.8761156853873866, iteration: 258914
loss: 1.0663366317749023,grad_norm: 0.9004078767202915, iteration: 258915
loss: 1.0251595973968506,grad_norm: 0.9999990988348338, iteration: 258916
loss: 0.9860540628433228,grad_norm: 0.8868534179749633, iteration: 258917
loss: 0.9702857732772827,grad_norm: 0.8725230788128312, iteration: 258918
loss: 1.028178095817566,grad_norm: 0.9999998697593444, iteration: 258919
loss: 1.001079797744751,grad_norm: 0.999999087558327, iteration: 258920
loss: 0.9616773724555969,grad_norm: 0.9261798615021084, iteration: 258921
loss: 0.9895336627960205,grad_norm: 0.9143362277557345, iteration: 258922
loss: 0.9695091247558594,grad_norm: 0.7875105501940867, iteration: 258923
loss: 1.0083049535751343,grad_norm: 0.8404259702530782, iteration: 258924
loss: 0.9799957871437073,grad_norm: 0.9330467651230727, iteration: 258925
loss: 1.0192004442214966,grad_norm: 0.9098384353949849, iteration: 258926
loss: 1.0214850902557373,grad_norm: 0.80916162406246, iteration: 258927
loss: 1.020795226097107,grad_norm: 0.8834797927223278, iteration: 258928
loss: 1.037185549736023,grad_norm: 0.8929344246985645, iteration: 258929
loss: 1.0176657438278198,grad_norm: 0.8769284667866536, iteration: 258930
loss: 0.9810585975646973,grad_norm: 0.9013362408473995, iteration: 258931
loss: 0.9892926812171936,grad_norm: 0.885900040986024, iteration: 258932
loss: 1.021025538444519,grad_norm: 0.99999935899289, iteration: 258933
loss: 1.006278395652771,grad_norm: 0.9999990092342264, iteration: 258934
loss: 1.0052881240844727,grad_norm: 0.7631182646266439, iteration: 258935
loss: 0.9976370334625244,grad_norm: 0.9999991673263979, iteration: 258936
loss: 1.0312488079071045,grad_norm: 0.9999991492379408, iteration: 258937
loss: 1.0388619899749756,grad_norm: 0.8642883903936565, iteration: 258938
loss: 0.9765788912773132,grad_norm: 0.8913473265543982, iteration: 258939
loss: 1.008546233177185,grad_norm: 0.9999991593578637, iteration: 258940
loss: 1.031799077987671,grad_norm: 0.9999995616951064, iteration: 258941
loss: 0.9823386669158936,grad_norm: 0.911010664255516, iteration: 258942
loss: 1.0082257986068726,grad_norm: 0.8750284764988632, iteration: 258943
loss: 1.0044710636138916,grad_norm: 0.9194236831693305, iteration: 258944
loss: 0.963338315486908,grad_norm: 0.7548183221872972, iteration: 258945
loss: 0.9937649965286255,grad_norm: 0.9886949138583583, iteration: 258946
loss: 1.0047085285186768,grad_norm: 0.9999990787076413, iteration: 258947
loss: 1.0014846324920654,grad_norm: 0.8093683337393582, iteration: 258948
loss: 0.9715813398361206,grad_norm: 0.9999989904551897, iteration: 258949
loss: 1.128056287765503,grad_norm: 0.999999555374483, iteration: 258950
loss: 0.9958290457725525,grad_norm: 0.958674266537961, iteration: 258951
loss: 1.0140233039855957,grad_norm: 0.9747898466988753, iteration: 258952
loss: 1.055827260017395,grad_norm: 0.9999999159838175, iteration: 258953
loss: 1.0017937421798706,grad_norm: 0.9999990983390883, iteration: 258954
loss: 1.017333984375,grad_norm: 0.9999993942385369, iteration: 258955
loss: 1.0135208368301392,grad_norm: 0.7887967075992984, iteration: 258956
loss: 0.9882122278213501,grad_norm: 0.9999990493576671, iteration: 258957
loss: 0.9889476895332336,grad_norm: 0.9999991867887668, iteration: 258958
loss: 0.9773246049880981,grad_norm: 0.9008355984047957, iteration: 258959
loss: 0.9731201529502869,grad_norm: 0.887686685578978, iteration: 258960
loss: 0.987417459487915,grad_norm: 0.8352231038283308, iteration: 258961
loss: 0.9784414768218994,grad_norm: 0.8786457891130399, iteration: 258962
loss: 0.9681118130683899,grad_norm: 0.8853188594229042, iteration: 258963
loss: 0.9989234209060669,grad_norm: 0.9999992794267831, iteration: 258964
loss: 1.0517538785934448,grad_norm: 0.9171359994497124, iteration: 258965
loss: 0.9995431303977966,grad_norm: 0.9613791168936332, iteration: 258966
loss: 0.9980165362358093,grad_norm: 0.9093779534341717, iteration: 258967
loss: 1.0161632299423218,grad_norm: 0.9806214434004977, iteration: 258968
loss: 1.0347764492034912,grad_norm: 0.9999990126927414, iteration: 258969
loss: 0.9974997043609619,grad_norm: 0.7303179279999659, iteration: 258970
loss: 1.0280531644821167,grad_norm: 0.9999990957509394, iteration: 258971
loss: 0.9884824752807617,grad_norm: 0.999999873944075, iteration: 258972
loss: 1.0213191509246826,grad_norm: 0.999999313298907, iteration: 258973
loss: 1.0279057025909424,grad_norm: 0.9999991115819387, iteration: 258974
loss: 1.0093132257461548,grad_norm: 0.9999995413295694, iteration: 258975
loss: 0.9690661430358887,grad_norm: 0.9581983987371517, iteration: 258976
loss: 0.9591265916824341,grad_norm: 0.9227993187489316, iteration: 258977
loss: 0.9797368049621582,grad_norm: 0.7865056407396095, iteration: 258978
loss: 0.9438422918319702,grad_norm: 0.9999990306141369, iteration: 258979
loss: 1.0618810653686523,grad_norm: 0.899990125143216, iteration: 258980
loss: 1.0147874355316162,grad_norm: 0.8631562970841327, iteration: 258981
loss: 1.0028908252716064,grad_norm: 0.9999995741683816, iteration: 258982
loss: 1.017144799232483,grad_norm: 0.9604378308021115, iteration: 258983
loss: 0.976190447807312,grad_norm: 0.9215929064674364, iteration: 258984
loss: 1.0507217645645142,grad_norm: 0.9999991442247449, iteration: 258985
loss: 0.9919382929801941,grad_norm: 0.8065123729070428, iteration: 258986
loss: 1.0060632228851318,grad_norm: 0.9999998541932961, iteration: 258987
loss: 1.0165667533874512,grad_norm: 0.9729126966187057, iteration: 258988
loss: 1.0219460725784302,grad_norm: 0.9741942451739445, iteration: 258989
loss: 1.0001198053359985,grad_norm: 0.7665987887288409, iteration: 258990
loss: 1.023173451423645,grad_norm: 0.8294322319910727, iteration: 258991
loss: 1.0090833902359009,grad_norm: 0.832479850392273, iteration: 258992
loss: 1.0162346363067627,grad_norm: 0.9301586252747432, iteration: 258993
loss: 1.0187071561813354,grad_norm: 0.8859501097697465, iteration: 258994
loss: 0.9910231828689575,grad_norm: 0.8309507761147896, iteration: 258995
loss: 1.0178115367889404,grad_norm: 0.953326189806277, iteration: 258996
loss: 0.9908146858215332,grad_norm: 0.9999990129077803, iteration: 258997
loss: 1.0636861324310303,grad_norm: 0.8679694690590083, iteration: 258998
loss: 1.1562687158584595,grad_norm: 0.9999997906253227, iteration: 258999
loss: 1.0748270750045776,grad_norm: 0.999999942804905, iteration: 259000
loss: 0.9993128180503845,grad_norm: 0.7561752373792396, iteration: 259001
loss: 1.002320647239685,grad_norm: 0.9999993618458696, iteration: 259002
loss: 1.0337116718292236,grad_norm: 0.9999991605828116, iteration: 259003
loss: 1.0161360502243042,grad_norm: 0.9999988663336438, iteration: 259004
loss: 0.9825212359428406,grad_norm: 0.9428729111697743, iteration: 259005
loss: 1.1885240077972412,grad_norm: 0.9999998481585628, iteration: 259006
loss: 0.9815539121627808,grad_norm: 0.8291016626934663, iteration: 259007
loss: 0.9864067435264587,grad_norm: 0.9999991321864403, iteration: 259008
loss: 1.0100983381271362,grad_norm: 0.9999992086288217, iteration: 259009
loss: 0.993877649307251,grad_norm: 0.9600881454968299, iteration: 259010
loss: 0.9812951683998108,grad_norm: 0.8674996311444262, iteration: 259011
loss: 0.9857879877090454,grad_norm: 0.9999990472793426, iteration: 259012
loss: 1.0237501859664917,grad_norm: 0.999999252385539, iteration: 259013
loss: 0.9881607890129089,grad_norm: 0.9897162053488536, iteration: 259014
loss: 0.9583795070648193,grad_norm: 0.8504872539386816, iteration: 259015
loss: 1.0077327489852905,grad_norm: 0.7878952233498046, iteration: 259016
loss: 0.9610462188720703,grad_norm: 0.9079796631609961, iteration: 259017
loss: 0.9679386615753174,grad_norm: 0.8243325491157951, iteration: 259018
loss: 0.9999838471412659,grad_norm: 0.9946363025068894, iteration: 259019
loss: 0.9833840131759644,grad_norm: 0.7986193778287034, iteration: 259020
loss: 1.0055837631225586,grad_norm: 0.9999988836337078, iteration: 259021
loss: 1.0580387115478516,grad_norm: 0.9592989979100973, iteration: 259022
loss: 1.0035085678100586,grad_norm: 0.9122789594426274, iteration: 259023
loss: 1.0339125394821167,grad_norm: 0.9999998140665336, iteration: 259024
loss: 0.9671226143836975,grad_norm: 0.8130678636515082, iteration: 259025
loss: 0.9791962504386902,grad_norm: 0.8859184937359149, iteration: 259026
loss: 0.9949935674667358,grad_norm: 0.7908338725243128, iteration: 259027
loss: 1.0205374956130981,grad_norm: 0.8334237935616208, iteration: 259028
loss: 1.035642385482788,grad_norm: 0.9277210609377478, iteration: 259029
loss: 1.0373305082321167,grad_norm: 0.8363324075008207, iteration: 259030
loss: 1.0366820096969604,grad_norm: 0.9909029938120386, iteration: 259031
loss: 0.9910240173339844,grad_norm: 0.7650041670678851, iteration: 259032
loss: 1.0226856470108032,grad_norm: 0.9999990509950438, iteration: 259033
loss: 1.0444414615631104,grad_norm: 0.9999992727537542, iteration: 259034
loss: 1.0089244842529297,grad_norm: 0.9999991627359055, iteration: 259035
loss: 1.017063856124878,grad_norm: 0.8776797933490962, iteration: 259036
loss: 0.9891193509101868,grad_norm: 0.8230823925892544, iteration: 259037
loss: 1.035763144493103,grad_norm: 0.9867462450111504, iteration: 259038
loss: 1.2035390138626099,grad_norm: 0.9999990249823244, iteration: 259039
loss: 1.020654559135437,grad_norm: 0.8620884821911062, iteration: 259040
loss: 1.017939805984497,grad_norm: 0.9999989361270598, iteration: 259041
loss: 0.9916097521781921,grad_norm: 0.9936316402686891, iteration: 259042
loss: 1.0026443004608154,grad_norm: 0.9301157781284536, iteration: 259043
loss: 1.0172148942947388,grad_norm: 0.8520582527188924, iteration: 259044
loss: 1.0066100358963013,grad_norm: 0.9605824076376038, iteration: 259045
loss: 1.0247260332107544,grad_norm: 0.8832452838081212, iteration: 259046
loss: 1.0744414329528809,grad_norm: 0.9999997937288646, iteration: 259047
loss: 0.9950268864631653,grad_norm: 0.9533368075907259, iteration: 259048
loss: 1.027422308921814,grad_norm: 0.8022560753730119, iteration: 259049
loss: 1.0134785175323486,grad_norm: 0.9325634056490308, iteration: 259050
loss: 1.0695768594741821,grad_norm: 0.9999999444509908, iteration: 259051
loss: 1.0436598062515259,grad_norm: 0.9999996543339337, iteration: 259052
loss: 1.034774661064148,grad_norm: 0.9214572576939749, iteration: 259053
loss: 0.990135908126831,grad_norm: 0.9857873680168404, iteration: 259054
loss: 1.041480541229248,grad_norm: 0.8623743648565578, iteration: 259055
loss: 0.9755342602729797,grad_norm: 0.9470069827862398, iteration: 259056
loss: 1.0499991178512573,grad_norm: 0.9627071130501546, iteration: 259057
loss: 0.9946198463439941,grad_norm: 0.8322176499528963, iteration: 259058
loss: 0.9781777858734131,grad_norm: 0.824899012279962, iteration: 259059
loss: 0.9840916395187378,grad_norm: 0.9999991626746968, iteration: 259060
loss: 1.0258498191833496,grad_norm: 0.9999993036681926, iteration: 259061
loss: 1.0576131343841553,grad_norm: 0.8696101114085147, iteration: 259062
loss: 1.1054348945617676,grad_norm: 0.999999253118208, iteration: 259063
loss: 1.046982765197754,grad_norm: 0.9999997728887179, iteration: 259064
loss: 0.9988542199134827,grad_norm: 0.8759389944450157, iteration: 259065
loss: 1.043104887008667,grad_norm: 0.9853657556446995, iteration: 259066
loss: 0.9870620965957642,grad_norm: 0.986284648497582, iteration: 259067
loss: 0.9698788523674011,grad_norm: 0.7757575016577185, iteration: 259068
loss: 0.9807701110839844,grad_norm: 0.9891030678003891, iteration: 259069
loss: 0.9785236716270447,grad_norm: 0.7279604702256528, iteration: 259070
loss: 1.0656417608261108,grad_norm: 0.9407743935866475, iteration: 259071
loss: 1.0837106704711914,grad_norm: 0.9999995505341011, iteration: 259072
loss: 1.038918375968933,grad_norm: 0.9999990254040774, iteration: 259073
loss: 0.9902411103248596,grad_norm: 0.9493063285482976, iteration: 259074
loss: 1.0397197008132935,grad_norm: 0.9999998706218658, iteration: 259075
loss: 0.9787370562553406,grad_norm: 0.8965330607442781, iteration: 259076
loss: 1.0379985570907593,grad_norm: 0.8233696451427566, iteration: 259077
loss: 1.014198660850525,grad_norm: 0.8724857153690109, iteration: 259078
loss: 1.001277208328247,grad_norm: 0.9999998091287281, iteration: 259079
loss: 1.0146600008010864,grad_norm: 0.9623614400656975, iteration: 259080
loss: 1.0669991970062256,grad_norm: 0.8890177217128714, iteration: 259081
loss: 0.9848093390464783,grad_norm: 0.7931836708556435, iteration: 259082
loss: 0.9920664429664612,grad_norm: 0.9112527727101464, iteration: 259083
loss: 0.987104058265686,grad_norm: 0.949443459016225, iteration: 259084
loss: 1.0115753412246704,grad_norm: 0.7326490712527279, iteration: 259085
loss: 1.0308376550674438,grad_norm: 0.8381004457767116, iteration: 259086
loss: 1.0035598278045654,grad_norm: 0.9999990671631027, iteration: 259087
loss: 0.9965620636940002,grad_norm: 0.8311150172511388, iteration: 259088
loss: 0.9997121691703796,grad_norm: 0.9166691803022364, iteration: 259089
loss: 1.0240064859390259,grad_norm: 0.9822990469586692, iteration: 259090
loss: 0.9874062538146973,grad_norm: 0.9626667429497054, iteration: 259091
loss: 1.0379116535186768,grad_norm: 0.9999992137988101, iteration: 259092
loss: 1.0395578145980835,grad_norm: 0.9999990218812074, iteration: 259093
loss: 0.9866603016853333,grad_norm: 0.8479607687707902, iteration: 259094
loss: 1.0083143711090088,grad_norm: 0.9283325205668382, iteration: 259095
loss: 0.9867983460426331,grad_norm: 0.9193735685708888, iteration: 259096
loss: 1.0603739023208618,grad_norm: 0.9999993332803326, iteration: 259097
loss: 1.00334894657135,grad_norm: 0.9105274559862432, iteration: 259098
loss: 1.1636145114898682,grad_norm: 0.9999996358480088, iteration: 259099
loss: 0.9941311478614807,grad_norm: 0.9999992256211948, iteration: 259100
loss: 1.0070935487747192,grad_norm: 0.8879301532369122, iteration: 259101
loss: 1.0238910913467407,grad_norm: 0.9540794625586049, iteration: 259102
loss: 1.0095255374908447,grad_norm: 0.8658492479182641, iteration: 259103
loss: 1.0150617361068726,grad_norm: 0.8685110307099985, iteration: 259104
loss: 1.1105858087539673,grad_norm: 0.9999999259220316, iteration: 259105
loss: 0.9992542266845703,grad_norm: 0.8212043947789305, iteration: 259106
loss: 1.0249918699264526,grad_norm: 0.8041959525225024, iteration: 259107
loss: 0.9880585670471191,grad_norm: 0.9999993073262057, iteration: 259108
loss: 1.0255600214004517,grad_norm: 0.8661303068460232, iteration: 259109
loss: 0.9912963509559631,grad_norm: 0.8353854538297238, iteration: 259110
loss: 0.991628110408783,grad_norm: 0.8093132928161934, iteration: 259111
loss: 0.970302164554596,grad_norm: 0.7682088113260497, iteration: 259112
loss: 1.041670799255371,grad_norm: 0.9999995128445233, iteration: 259113
loss: 1.0130716562271118,grad_norm: 0.9925159181203494, iteration: 259114
loss: 0.9936053156852722,grad_norm: 0.8976715064510687, iteration: 259115
loss: 1.0082173347473145,grad_norm: 0.9856664377062959, iteration: 259116
loss: 1.0189425945281982,grad_norm: 0.9999990886071004, iteration: 259117
loss: 0.9781455397605896,grad_norm: 0.9999996859780789, iteration: 259118
loss: 0.9964385628700256,grad_norm: 0.9999991107478956, iteration: 259119
loss: 1.0517938137054443,grad_norm: 0.9999989895548991, iteration: 259120
loss: 1.0070074796676636,grad_norm: 0.9114174160014387, iteration: 259121
loss: 1.0121506452560425,grad_norm: 0.8283244603641198, iteration: 259122
loss: 1.0033419132232666,grad_norm: 0.9293215438299034, iteration: 259123
loss: 1.0093014240264893,grad_norm: 0.8426318722244651, iteration: 259124
loss: 0.9983882308006287,grad_norm: 0.9433907708995336, iteration: 259125
loss: 0.9805601239204407,grad_norm: 0.7974883782863453, iteration: 259126
loss: 1.0014235973358154,grad_norm: 0.7622512294723318, iteration: 259127
loss: 0.9880313873291016,grad_norm: 0.8267704280204111, iteration: 259128
loss: 1.0208266973495483,grad_norm: 0.8422956161045391, iteration: 259129
loss: 1.0875744819641113,grad_norm: 0.999999261774597, iteration: 259130
loss: 0.9748128652572632,grad_norm: 0.8103109443105438, iteration: 259131
loss: 1.0096689462661743,grad_norm: 0.9010124381772464, iteration: 259132
loss: 1.001650333404541,grad_norm: 0.7562162960413779, iteration: 259133
loss: 0.9971909523010254,grad_norm: 0.7814562454314326, iteration: 259134
loss: 1.005707859992981,grad_norm: 0.9999993159939602, iteration: 259135
loss: 1.0176396369934082,grad_norm: 0.9518645685262356, iteration: 259136
loss: 1.0042192935943604,grad_norm: 0.9999991095489745, iteration: 259137
loss: 1.0057973861694336,grad_norm: 0.7989428119601241, iteration: 259138
loss: 1.0189518928527832,grad_norm: 0.8831509668013139, iteration: 259139
loss: 0.922547459602356,grad_norm: 0.8679518363112961, iteration: 259140
loss: 1.037635326385498,grad_norm: 0.9833900263921949, iteration: 259141
loss: 1.026829719543457,grad_norm: 0.9539118889922522, iteration: 259142
loss: 1.031600832939148,grad_norm: 0.8017435892273164, iteration: 259143
loss: 1.0080626010894775,grad_norm: 0.8854167707667229, iteration: 259144
loss: 1.011373519897461,grad_norm: 0.9999995847820609, iteration: 259145
loss: 1.0004863739013672,grad_norm: 0.9456917733720561, iteration: 259146
loss: 1.0054391622543335,grad_norm: 0.859934739132372, iteration: 259147
loss: 0.9823474287986755,grad_norm: 0.9999990359324782, iteration: 259148
loss: 1.0549931526184082,grad_norm: 0.9569926886697756, iteration: 259149
loss: 0.9962016344070435,grad_norm: 0.9148469570340051, iteration: 259150
loss: 1.0202462673187256,grad_norm: 0.8853314048678426, iteration: 259151
loss: 0.970382809638977,grad_norm: 0.860331362870559, iteration: 259152
loss: 0.995945930480957,grad_norm: 0.9152258633088989, iteration: 259153
loss: 0.997505247592926,grad_norm: 0.8898461970302416, iteration: 259154
loss: 0.9894533157348633,grad_norm: 0.7792319171983954, iteration: 259155
loss: 1.0304092168807983,grad_norm: 0.9226785132848324, iteration: 259156
loss: 1.0223113298416138,grad_norm: 0.8125404253132921, iteration: 259157
loss: 1.022914171218872,grad_norm: 0.9484998741146131, iteration: 259158
loss: 0.9653635025024414,grad_norm: 0.9999991807312487, iteration: 259159
loss: 1.013822078704834,grad_norm: 0.7736030597785847, iteration: 259160
loss: 0.9814154505729675,grad_norm: 0.8695930773659338, iteration: 259161
loss: 0.9726241230964661,grad_norm: 0.9026003323131805, iteration: 259162
loss: 1.0202468633651733,grad_norm: 0.8860318721689583, iteration: 259163
loss: 1.034448504447937,grad_norm: 0.9999990810013257, iteration: 259164
loss: 1.1011381149291992,grad_norm: 0.992796256339463, iteration: 259165
loss: 1.0056393146514893,grad_norm: 0.8638023335129723, iteration: 259166
loss: 0.9848584532737732,grad_norm: 0.9839268557813448, iteration: 259167
loss: 1.02472984790802,grad_norm: 0.8120873501677633, iteration: 259168
loss: 1.0194472074508667,grad_norm: 0.9193758664268371, iteration: 259169
loss: 0.9943708777427673,grad_norm: 0.8254349693002176, iteration: 259170
loss: 1.016233205795288,grad_norm: 0.9999990869745229, iteration: 259171
loss: 1.0070687532424927,grad_norm: 0.8122972192187464, iteration: 259172
loss: 0.9950915575027466,grad_norm: 0.8994419964573978, iteration: 259173
loss: 0.9947981238365173,grad_norm: 0.7690857937527714, iteration: 259174
loss: 0.9876251816749573,grad_norm: 0.9379183207874578, iteration: 259175
loss: 0.9999118447303772,grad_norm: 0.7556925337062499, iteration: 259176
loss: 1.0624747276306152,grad_norm: 0.9999998725281086, iteration: 259177
loss: 1.0057567358016968,grad_norm: 0.7842829926590488, iteration: 259178
loss: 1.0132817029953003,grad_norm: 0.9448625638370559, iteration: 259179
loss: 1.1543338298797607,grad_norm: 0.9604013441647603, iteration: 259180
loss: 1.014051079750061,grad_norm: 0.9999996541213527, iteration: 259181
loss: 0.9846345782279968,grad_norm: 0.7690837308571473, iteration: 259182
loss: 1.0088372230529785,grad_norm: 0.898789515539083, iteration: 259183
loss: 1.0168284177780151,grad_norm: 0.9999995208495285, iteration: 259184
loss: 1.0135631561279297,grad_norm: 0.8248697420299663, iteration: 259185
loss: 0.9950487017631531,grad_norm: 0.8964561950731134, iteration: 259186
loss: 1.02902352809906,grad_norm: 0.9999990138120262, iteration: 259187
loss: 0.9964801073074341,grad_norm: 0.8334649439559945, iteration: 259188
loss: 1.0709314346313477,grad_norm: 0.8704253801016023, iteration: 259189
loss: 1.0180137157440186,grad_norm: 0.9999991200534933, iteration: 259190
loss: 1.0164698362350464,grad_norm: 0.8243766438565772, iteration: 259191
loss: 0.9859410524368286,grad_norm: 0.9491583627475719, iteration: 259192
loss: 1.0341919660568237,grad_norm: 0.9375651126935738, iteration: 259193
loss: 0.9893513321876526,grad_norm: 0.9245568630305906, iteration: 259194
loss: 1.013107419013977,grad_norm: 0.8028331081442602, iteration: 259195
loss: 0.9909439086914062,grad_norm: 0.9609583965449997, iteration: 259196
loss: 1.0473283529281616,grad_norm: 0.9999991668884015, iteration: 259197
loss: 1.0374268293380737,grad_norm: 0.8895844151783577, iteration: 259198
loss: 0.9838218092918396,grad_norm: 0.9535635659586789, iteration: 259199
loss: 1.0794832706451416,grad_norm: 0.9307083426943986, iteration: 259200
loss: 1.0147335529327393,grad_norm: 0.999999793395261, iteration: 259201
loss: 0.9847597479820251,grad_norm: 0.8219929229489187, iteration: 259202
loss: 1.0053571462631226,grad_norm: 0.8979112156937252, iteration: 259203
loss: 1.004868745803833,grad_norm: 0.745923365952792, iteration: 259204
loss: 0.9904939532279968,grad_norm: 0.8010256233522134, iteration: 259205
loss: 1.0473610162734985,grad_norm: 0.9100257802307179, iteration: 259206
loss: 1.0048141479492188,grad_norm: 0.9999990564745914, iteration: 259207
loss: 1.0148741006851196,grad_norm: 0.9999992807424687, iteration: 259208
loss: 1.024652361869812,grad_norm: 0.7900422957699821, iteration: 259209
loss: 1.0030851364135742,grad_norm: 0.8171703523955276, iteration: 259210
loss: 1.0360989570617676,grad_norm: 0.9190193790840816, iteration: 259211
loss: 1.0056509971618652,grad_norm: 0.8221672443452841, iteration: 259212
loss: 0.9929150342941284,grad_norm: 0.8779722634772422, iteration: 259213
loss: 0.9880070090293884,grad_norm: 0.8556268822740402, iteration: 259214
loss: 0.9772224426269531,grad_norm: 0.7641739199621023, iteration: 259215
loss: 1.0057260990142822,grad_norm: 0.9999993473180913, iteration: 259216
loss: 1.0091290473937988,grad_norm: 0.8147717268341553, iteration: 259217
loss: 0.9914190173149109,grad_norm: 0.7869349130007247, iteration: 259218
loss: 1.0011838674545288,grad_norm: 0.7544788827061083, iteration: 259219
loss: 1.0594011545181274,grad_norm: 0.9999997023743653, iteration: 259220
loss: 1.119809865951538,grad_norm: 0.9089892504974006, iteration: 259221
loss: 1.005370855331421,grad_norm: 0.9308457910984764, iteration: 259222
loss: 1.0144578218460083,grad_norm: 0.8811031166685422, iteration: 259223
loss: 1.058029055595398,grad_norm: 0.9999991968250704, iteration: 259224
loss: 1.0167598724365234,grad_norm: 0.9160418683562926, iteration: 259225
loss: 0.9712360501289368,grad_norm: 0.949079442137509, iteration: 259226
loss: 0.9810785055160522,grad_norm: 0.9104361149785667, iteration: 259227
loss: 0.980449378490448,grad_norm: 0.8959532084571246, iteration: 259228
loss: 0.9732988476753235,grad_norm: 0.7971833116202925, iteration: 259229
loss: 0.9855108857154846,grad_norm: 0.9304731364780444, iteration: 259230
loss: 0.9774875640869141,grad_norm: 0.8734401228960109, iteration: 259231
loss: 1.0123361349105835,grad_norm: 0.7987805061563541, iteration: 259232
loss: 0.9650489091873169,grad_norm: 0.9984650877203941, iteration: 259233
loss: 1.0131818056106567,grad_norm: 0.9999990621591078, iteration: 259234
loss: 0.9742290377616882,grad_norm: 0.8028350568337473, iteration: 259235
loss: 1.0419974327087402,grad_norm: 0.9999993707403014, iteration: 259236
loss: 0.9739692211151123,grad_norm: 0.6759925019166921, iteration: 259237
loss: 0.9461690783500671,grad_norm: 0.8401332383669611, iteration: 259238
loss: 1.0138920545578003,grad_norm: 0.9415317339512694, iteration: 259239
loss: 1.0195742845535278,grad_norm: 0.9999995750939478, iteration: 259240
loss: 1.0213667154312134,grad_norm: 0.9999995781676005, iteration: 259241
loss: 1.016231894493103,grad_norm: 0.7906259465638327, iteration: 259242
loss: 1.0461926460266113,grad_norm: 0.9999991045701058, iteration: 259243
loss: 1.011810541152954,grad_norm: 0.7685922541635828, iteration: 259244
loss: 1.0018545389175415,grad_norm: 0.9999990826003374, iteration: 259245
loss: 0.9782589673995972,grad_norm: 0.8847756542180826, iteration: 259246
loss: 1.0063807964324951,grad_norm: 0.7856572568860205, iteration: 259247
loss: 0.9995549917221069,grad_norm: 0.8633404102439595, iteration: 259248
loss: 0.9882722496986389,grad_norm: 0.8187922350575627, iteration: 259249
loss: 1.0172760486602783,grad_norm: 0.9113132286606755, iteration: 259250
loss: 1.027066707611084,grad_norm: 0.8336551859589659, iteration: 259251
loss: 0.9797943234443665,grad_norm: 0.9999993054828371, iteration: 259252
loss: 0.9758318662643433,grad_norm: 0.9999990247057412, iteration: 259253
loss: 1.032840371131897,grad_norm: 0.999999075802581, iteration: 259254
loss: 1.060860514640808,grad_norm: 0.9555954120155984, iteration: 259255
loss: 1.14431893825531,grad_norm: 0.9999997783159984, iteration: 259256
loss: 1.0186301469802856,grad_norm: 0.8291485521944262, iteration: 259257
loss: 0.9718725681304932,grad_norm: 0.8916350206417779, iteration: 259258
loss: 0.9903778433799744,grad_norm: 0.8049421007685281, iteration: 259259
loss: 0.9904552102088928,grad_norm: 0.8599978389890499, iteration: 259260
loss: 0.9977372288703918,grad_norm: 0.8821248001476046, iteration: 259261
loss: 1.0165166854858398,grad_norm: 0.99999915658936, iteration: 259262
loss: 1.008317470550537,grad_norm: 0.899429762886037, iteration: 259263
loss: 1.0141006708145142,grad_norm: 0.8959968038942032, iteration: 259264
loss: 1.0066399574279785,grad_norm: 0.8668676734482879, iteration: 259265
loss: 1.0870869159698486,grad_norm: 0.8785908000903981, iteration: 259266
loss: 0.9789704084396362,grad_norm: 0.7796601083875876, iteration: 259267
loss: 0.9752632975578308,grad_norm: 0.8887397928908228, iteration: 259268
loss: 0.9813605546951294,grad_norm: 0.8968588049066994, iteration: 259269
loss: 0.9931485652923584,grad_norm: 0.9830152351177815, iteration: 259270
loss: 0.9461290240287781,grad_norm: 0.8144301830759805, iteration: 259271
loss: 1.0413941144943237,grad_norm: 0.9999995069971672, iteration: 259272
loss: 0.9981397390365601,grad_norm: 0.8625711585782233, iteration: 259273
loss: 1.0238980054855347,grad_norm: 0.8299291717204254, iteration: 259274
loss: 1.0254343748092651,grad_norm: 0.8430139862065049, iteration: 259275
loss: 1.0111662149429321,grad_norm: 0.9999999426034991, iteration: 259276
loss: 1.0157910585403442,grad_norm: 0.9999990877711888, iteration: 259277
loss: 0.993521511554718,grad_norm: 0.9999990119150104, iteration: 259278
loss: 0.9689618349075317,grad_norm: 0.9348007549060119, iteration: 259279
loss: 1.012027621269226,grad_norm: 0.7936737559154874, iteration: 259280
loss: 0.9967969655990601,grad_norm: 0.7811440524164623, iteration: 259281
loss: 1.0194700956344604,grad_norm: 0.8891172138523086, iteration: 259282
loss: 1.0025787353515625,grad_norm: 0.8413327452521876, iteration: 259283
loss: 1.0609750747680664,grad_norm: 0.9999996314774906, iteration: 259284
loss: 0.9920722842216492,grad_norm: 0.9999993902261147, iteration: 259285
loss: 0.9774561524391174,grad_norm: 0.8632801746577156, iteration: 259286
loss: 1.002243161201477,grad_norm: 0.9999997934060009, iteration: 259287
loss: 1.0262010097503662,grad_norm: 0.9816192814381549, iteration: 259288
loss: 0.9899885654449463,grad_norm: 0.9136566059737985, iteration: 259289
loss: 0.9672691226005554,grad_norm: 0.7742641445232963, iteration: 259290
loss: 0.9810969233512878,grad_norm: 0.9999990150778283, iteration: 259291
loss: 0.9997027516365051,grad_norm: 0.8002224636364308, iteration: 259292
loss: 1.0537899732589722,grad_norm: 0.9347923504385396, iteration: 259293
loss: 1.0017790794372559,grad_norm: 0.76770950305983, iteration: 259294
loss: 0.9960823059082031,grad_norm: 0.7098639323202712, iteration: 259295
loss: 0.9592162370681763,grad_norm: 0.9285219905352843, iteration: 259296
loss: 0.9988407492637634,grad_norm: 0.9999994594035733, iteration: 259297
loss: 1.0826475620269775,grad_norm: 0.7591241491820926, iteration: 259298
loss: 1.0156476497650146,grad_norm: 0.9999992446149237, iteration: 259299
loss: 0.9986030459403992,grad_norm: 0.9623461816642074, iteration: 259300
loss: 0.9719646573066711,grad_norm: 0.8518401758593496, iteration: 259301
loss: 0.9669257998466492,grad_norm: 0.9999991704348824, iteration: 259302
loss: 0.9962000250816345,grad_norm: 0.906201838189562, iteration: 259303
loss: 0.9667580723762512,grad_norm: 0.9999990716607006, iteration: 259304
loss: 1.014466404914856,grad_norm: 0.8009736425252457, iteration: 259305
loss: 0.9928974509239197,grad_norm: 0.8205814715650794, iteration: 259306
loss: 1.0089174509048462,grad_norm: 0.9999990462427333, iteration: 259307
loss: 0.9873027801513672,grad_norm: 0.7553576703421774, iteration: 259308
loss: 1.0041213035583496,grad_norm: 0.9377519449255097, iteration: 259309
loss: 1.031667947769165,grad_norm: 0.9606275081288124, iteration: 259310
loss: 0.978425920009613,grad_norm: 0.9238366885028004, iteration: 259311
loss: 1.0285422801971436,grad_norm: 0.8256730570570211, iteration: 259312
loss: 0.992266833782196,grad_norm: 0.7937217869645715, iteration: 259313
loss: 1.0184720754623413,grad_norm: 0.9999991804442675, iteration: 259314
loss: 1.0869841575622559,grad_norm: 1.000000055925853, iteration: 259315
loss: 1.0582752227783203,grad_norm: 0.9999992647418574, iteration: 259316
loss: 1.0318673849105835,grad_norm: 0.8765536373876285, iteration: 259317
loss: 0.9975455403327942,grad_norm: 0.8685019527845605, iteration: 259318
loss: 0.9534139633178711,grad_norm: 0.8922687257062073, iteration: 259319
loss: 1.0037462711334229,grad_norm: 0.9490087214642543, iteration: 259320
loss: 1.0059419870376587,grad_norm: 0.9844018965019541, iteration: 259321
loss: 0.9572376012802124,grad_norm: 0.8250279354457132, iteration: 259322
loss: 1.0211236476898193,grad_norm: 0.9251281270728386, iteration: 259323
loss: 0.9716461300849915,grad_norm: 0.9637470559525713, iteration: 259324
loss: 0.9975972771644592,grad_norm: 0.7835080961448877, iteration: 259325
loss: 1.0436592102050781,grad_norm: 0.99999903846271, iteration: 259326
loss: 1.0513272285461426,grad_norm: 0.9888001278068965, iteration: 259327
loss: 1.0090619325637817,grad_norm: 0.9999990679279561, iteration: 259328
loss: 1.0500199794769287,grad_norm: 0.9999990658822645, iteration: 259329
loss: 0.981112539768219,grad_norm: 0.9972531254974649, iteration: 259330
loss: 1.0225383043289185,grad_norm: 0.8405112474572677, iteration: 259331
loss: 1.0198233127593994,grad_norm: 0.9170414575708018, iteration: 259332
loss: 0.9932782649993896,grad_norm: 0.8518052433186397, iteration: 259333
loss: 0.9548885226249695,grad_norm: 0.9999990754722634, iteration: 259334
loss: 0.993804931640625,grad_norm: 0.9999991872976249, iteration: 259335
loss: 1.0375617742538452,grad_norm: 0.9608946476378997, iteration: 259336
loss: 1.0136067867279053,grad_norm: 0.8834402620425915, iteration: 259337
loss: 1.0224425792694092,grad_norm: 0.8257398337924431, iteration: 259338
loss: 0.9646316170692444,grad_norm: 0.8847262436333617, iteration: 259339
loss: 1.0772358179092407,grad_norm: 0.9999990209580784, iteration: 259340
loss: 1.0160387754440308,grad_norm: 0.8411987783883098, iteration: 259341
loss: 1.0045524835586548,grad_norm: 0.891232560990172, iteration: 259342
loss: 0.9889087677001953,grad_norm: 0.7702867643644584, iteration: 259343
loss: 1.0186235904693604,grad_norm: 0.9165779152994495, iteration: 259344
loss: 1.0036654472351074,grad_norm: 0.8950032186817026, iteration: 259345
loss: 1.0424596071243286,grad_norm: 0.7999152308638616, iteration: 259346
loss: 1.0125399827957153,grad_norm: 0.999999038035856, iteration: 259347
loss: 0.9894287586212158,grad_norm: 0.9563382116619977, iteration: 259348
loss: 0.9724059104919434,grad_norm: 0.999998993014049, iteration: 259349
loss: 0.9764994978904724,grad_norm: 0.8532454058399043, iteration: 259350
loss: 1.0175809860229492,grad_norm: 0.999999095470471, iteration: 259351
loss: 0.988921582698822,grad_norm: 0.7676001736911244, iteration: 259352
loss: 1.0022472143173218,grad_norm: 0.9374556229296446, iteration: 259353
loss: 1.027406930923462,grad_norm: 0.7844033289509122, iteration: 259354
loss: 0.9886592030525208,grad_norm: 0.7001447567816595, iteration: 259355
loss: 0.9770321249961853,grad_norm: 0.9744802831711318, iteration: 259356
loss: 1.001865267753601,grad_norm: 0.8999066755595013, iteration: 259357
loss: 1.085565447807312,grad_norm: 0.7272165680942804, iteration: 259358
loss: 0.9819275140762329,grad_norm: 0.814516371627024, iteration: 259359
loss: 1.0648798942565918,grad_norm: 0.8809716417686638, iteration: 259360
loss: 1.0010509490966797,grad_norm: 0.8251790173333879, iteration: 259361
loss: 1.0226556062698364,grad_norm: 0.9999999939078293, iteration: 259362
loss: 0.9894022941589355,grad_norm: 0.8246760835470205, iteration: 259363
loss: 1.0274596214294434,grad_norm: 0.9999990295019413, iteration: 259364
loss: 1.0057423114776611,grad_norm: 0.907105096535351, iteration: 259365
loss: 1.0083760023117065,grad_norm: 0.9999992371852356, iteration: 259366
loss: 1.0244107246398926,grad_norm: 0.9007691156174744, iteration: 259367
loss: 1.004064679145813,grad_norm: 0.9505704464028172, iteration: 259368
loss: 0.9739048480987549,grad_norm: 0.743958973753082, iteration: 259369
loss: 0.9843786954879761,grad_norm: 0.8138508873765185, iteration: 259370
loss: 1.0584006309509277,grad_norm: 0.999999199678301, iteration: 259371
loss: 1.0183534622192383,grad_norm: 0.9543303519037912, iteration: 259372
loss: 0.9439876079559326,grad_norm: 0.7456231412144892, iteration: 259373
loss: 1.0314013957977295,grad_norm: 0.8624689037751725, iteration: 259374
loss: 1.013490915298462,grad_norm: 0.8077775687177539, iteration: 259375
loss: 0.9999178051948547,grad_norm: 0.8485023191005405, iteration: 259376
loss: 1.0308541059494019,grad_norm: 0.9999990965151655, iteration: 259377
loss: 0.994310200214386,grad_norm: 0.8617136366229803, iteration: 259378
loss: 1.0047845840454102,grad_norm: 0.9999991260857818, iteration: 259379
loss: 1.0199223756790161,grad_norm: 0.885650731024458, iteration: 259380
loss: 0.9489050507545471,grad_norm: 0.9077712506639222, iteration: 259381
loss: 1.0123250484466553,grad_norm: 0.7304919896065144, iteration: 259382
loss: 0.9992285966873169,grad_norm: 0.857153661435737, iteration: 259383
loss: 0.9800422191619873,grad_norm: 0.8967035077750145, iteration: 259384
loss: 1.0082294940948486,grad_norm: 0.9999993909185921, iteration: 259385
loss: 0.9925786256790161,grad_norm: 0.9999990978298037, iteration: 259386
loss: 0.9752026796340942,grad_norm: 0.8994547109428974, iteration: 259387
loss: 0.9957087635993958,grad_norm: 0.9225094714569456, iteration: 259388
loss: 1.0100452899932861,grad_norm: 0.8804906677324948, iteration: 259389
loss: 0.993710458278656,grad_norm: 0.9765786716140153, iteration: 259390
loss: 0.9732975959777832,grad_norm: 0.9194111571384515, iteration: 259391
loss: 0.9991807341575623,grad_norm: 0.8855813170120962, iteration: 259392
loss: 0.9979075193405151,grad_norm: 0.8706349272608719, iteration: 259393
loss: 0.998213529586792,grad_norm: 0.7898527405964333, iteration: 259394
loss: 1.0064092874526978,grad_norm: 0.8728166343261676, iteration: 259395
loss: 1.0005669593811035,grad_norm: 0.8990744598310322, iteration: 259396
loss: 1.0061341524124146,grad_norm: 0.9999991938929692, iteration: 259397
loss: 1.0383365154266357,grad_norm: 0.9999991408487683, iteration: 259398
loss: 0.9982602000236511,grad_norm: 0.8073588736588049, iteration: 259399
loss: 1.0358408689498901,grad_norm: 0.9780944008312844, iteration: 259400
loss: 1.0001825094223022,grad_norm: 0.8363389382422984, iteration: 259401
loss: 1.0028761625289917,grad_norm: 0.8896234428079863, iteration: 259402
loss: 1.0066732168197632,grad_norm: 0.9154490348122394, iteration: 259403
loss: 1.0401278734207153,grad_norm: 0.9999997576097072, iteration: 259404
loss: 0.981463611125946,grad_norm: 0.7573118880922187, iteration: 259405
loss: 1.0032787322998047,grad_norm: 0.9232853895802036, iteration: 259406
loss: 1.0222902297973633,grad_norm: 0.8607216555042486, iteration: 259407
loss: 1.096166968345642,grad_norm: 0.9999997510246831, iteration: 259408
loss: 0.9909881949424744,grad_norm: 0.8186985736355543, iteration: 259409
loss: 0.9651036858558655,grad_norm: 0.7836987815836396, iteration: 259410
loss: 0.9761254191398621,grad_norm: 0.9999994851174725, iteration: 259411
loss: 1.0294119119644165,grad_norm: 0.871489928541231, iteration: 259412
loss: 0.9765040278434753,grad_norm: 0.999999129146167, iteration: 259413
loss: 1.0055402517318726,grad_norm: 0.8830913853336082, iteration: 259414
loss: 1.0076497793197632,grad_norm: 0.7511777740905788, iteration: 259415
loss: 0.9917595982551575,grad_norm: 0.8182537841851891, iteration: 259416
loss: 1.0016673803329468,grad_norm: 0.8025539546577123, iteration: 259417
loss: 1.0111050605773926,grad_norm: 0.9999991103857221, iteration: 259418
loss: 1.0401555299758911,grad_norm: 0.737949007286952, iteration: 259419
loss: 0.9666245579719543,grad_norm: 0.9999990745766453, iteration: 259420
loss: 0.9786438345909119,grad_norm: 0.8866876766714253, iteration: 259421
loss: 1.0702886581420898,grad_norm: 0.9999996431469157, iteration: 259422
loss: 0.9672449231147766,grad_norm: 0.7578266378771404, iteration: 259423
loss: 1.1166867017745972,grad_norm: 0.9999991664396771, iteration: 259424
loss: 0.9960660934448242,grad_norm: 0.760952808678809, iteration: 259425
loss: 0.9789851307868958,grad_norm: 0.9208789098613116, iteration: 259426
loss: 1.0008771419525146,grad_norm: 0.7891429490579795, iteration: 259427
loss: 1.0213967561721802,grad_norm: 0.999999735201797, iteration: 259428
loss: 1.000914216041565,grad_norm: 0.6945323948229918, iteration: 259429
loss: 0.9814813137054443,grad_norm: 0.942101885561909, iteration: 259430
loss: 0.9801726341247559,grad_norm: 0.9782065684510243, iteration: 259431
loss: 0.9911767244338989,grad_norm: 0.8907058201370179, iteration: 259432
loss: 1.023234248161316,grad_norm: 0.8619734165947095, iteration: 259433
loss: 1.024997353553772,grad_norm: 0.9999991152374308, iteration: 259434
loss: 1.026047706604004,grad_norm: 0.8244660904194007, iteration: 259435
loss: 0.9861516356468201,grad_norm: 0.84971087523651, iteration: 259436
loss: 1.0176950693130493,grad_norm: 0.8878730445592334, iteration: 259437
loss: 0.99388188123703,grad_norm: 0.9481800345646687, iteration: 259438
loss: 1.013794183731079,grad_norm: 0.7184572828756148, iteration: 259439
loss: 0.9885715246200562,grad_norm: 0.8854337675190183, iteration: 259440
loss: 1.0321853160858154,grad_norm: 0.9215089566195059, iteration: 259441
loss: 1.08877432346344,grad_norm: 0.9999996631146865, iteration: 259442
loss: 1.0483927726745605,grad_norm: 0.9719060423077807, iteration: 259443
loss: 0.9856716990470886,grad_norm: 0.914915517651522, iteration: 259444
loss: 1.012779712677002,grad_norm: 0.7927625600832965, iteration: 259445
loss: 1.0023384094238281,grad_norm: 0.9999990352637949, iteration: 259446
loss: 0.9982587695121765,grad_norm: 0.8654162004581893, iteration: 259447
loss: 1.013899326324463,grad_norm: 0.9980792358061147, iteration: 259448
loss: 1.0105409622192383,grad_norm: 0.8954836154303208, iteration: 259449
loss: 1.026350736618042,grad_norm: 0.9118412716305964, iteration: 259450
loss: 0.9923915863037109,grad_norm: 0.9501882788632808, iteration: 259451
loss: 0.9747982025146484,grad_norm: 0.8043625734018334, iteration: 259452
loss: 1.0351738929748535,grad_norm: 0.924329369989042, iteration: 259453
loss: 0.9880142211914062,grad_norm: 0.9358784449035095, iteration: 259454
loss: 1.0020426511764526,grad_norm: 0.7919313029668382, iteration: 259455
loss: 1.0111483335494995,grad_norm: 0.999999168355598, iteration: 259456
loss: 0.9880880117416382,grad_norm: 0.8857872667174206, iteration: 259457
loss: 1.0356498956680298,grad_norm: 0.8742141327021378, iteration: 259458
loss: 0.9875513911247253,grad_norm: 0.7013994605286503, iteration: 259459
loss: 1.0216022729873657,grad_norm: 0.8675442055787737, iteration: 259460
loss: 0.9739036560058594,grad_norm: 0.9999991575608557, iteration: 259461
loss: 0.944611668586731,grad_norm: 0.9406105334656832, iteration: 259462
loss: 1.0311750173568726,grad_norm: 0.9205018098252801, iteration: 259463
loss: 0.9714881777763367,grad_norm: 0.8410568004774439, iteration: 259464
loss: 0.9915229678153992,grad_norm: 0.9025179701592595, iteration: 259465
loss: 0.9974556565284729,grad_norm: 0.8657838595149373, iteration: 259466
loss: 1.0744823217391968,grad_norm: 0.9999999263853001, iteration: 259467
loss: 0.9919290542602539,grad_norm: 0.8169683936818947, iteration: 259468
loss: 1.0198041200637817,grad_norm: 0.942070996830492, iteration: 259469
loss: 1.038892388343811,grad_norm: 0.9207834205432675, iteration: 259470
loss: 1.0076433420181274,grad_norm: 0.8723129137221366, iteration: 259471
loss: 0.9721758365631104,grad_norm: 0.8673660637149704, iteration: 259472
loss: 1.0074808597564697,grad_norm: 0.9999992802517398, iteration: 259473
loss: 1.001277208328247,grad_norm: 0.9032049555183131, iteration: 259474
loss: 1.0202916860580444,grad_norm: 0.9039404276650027, iteration: 259475
loss: 1.0078423023223877,grad_norm: 0.9999990561198078, iteration: 259476
loss: 1.0358885526657104,grad_norm: 0.9999998171216297, iteration: 259477
loss: 0.9871121644973755,grad_norm: 0.9765228882126352, iteration: 259478
loss: 1.022589921951294,grad_norm: 0.9999992413076555, iteration: 259479
loss: 1.0208760499954224,grad_norm: 0.7962046142061839, iteration: 259480
loss: 1.0055303573608398,grad_norm: 0.9732234742069814, iteration: 259481
loss: 1.0135116577148438,grad_norm: 0.8677321879846738, iteration: 259482
loss: 0.9934910535812378,grad_norm: 0.8375973010601603, iteration: 259483
loss: 1.0235003232955933,grad_norm: 0.7956044379725442, iteration: 259484
loss: 0.982965886592865,grad_norm: 0.8216753351062709, iteration: 259485
loss: 1.0005035400390625,grad_norm: 0.7977305713521796, iteration: 259486
loss: 0.9820257425308228,grad_norm: 0.9106891693447535, iteration: 259487
loss: 0.9618483185768127,grad_norm: 0.9670259625002552, iteration: 259488
loss: 1.004608392715454,grad_norm: 0.8596860790285573, iteration: 259489
loss: 0.9958078265190125,grad_norm: 0.7395995557956395, iteration: 259490
loss: 1.0028080940246582,grad_norm: 0.7933614899315036, iteration: 259491
loss: 0.9952988624572754,grad_norm: 0.8646490698378221, iteration: 259492
loss: 0.9958242774009705,grad_norm: 0.802250050780102, iteration: 259493
loss: 1.0291519165039062,grad_norm: 0.868222811598294, iteration: 259494
loss: 1.003462553024292,grad_norm: 0.8229606470743063, iteration: 259495
loss: 0.9880117177963257,grad_norm: 0.7944553801074143, iteration: 259496
loss: 0.9822445511817932,grad_norm: 0.9410441532872655, iteration: 259497
loss: 1.0027669668197632,grad_norm: 0.8216022955747632, iteration: 259498
loss: 1.0764292478561401,grad_norm: 0.7958090347001003, iteration: 259499
loss: 1.016594648361206,grad_norm: 0.731909951899751, iteration: 259500
loss: 1.021625280380249,grad_norm: 0.9490162896262331, iteration: 259501
loss: 0.9859817028045654,grad_norm: 0.8977385455494559, iteration: 259502
loss: 1.0450187921524048,grad_norm: 0.9999995322025419, iteration: 259503
loss: 1.0073236227035522,grad_norm: 0.7729990558308586, iteration: 259504
loss: 0.97816002368927,grad_norm: 0.9936326770355552, iteration: 259505
loss: 1.0033880472183228,grad_norm: 0.9595717848074583, iteration: 259506
loss: 0.9967889189720154,grad_norm: 0.9952267649191213, iteration: 259507
loss: 1.040094017982483,grad_norm: 0.9943188147496055, iteration: 259508
loss: 0.9759242534637451,grad_norm: 0.9447819095548604, iteration: 259509
loss: 0.9996923804283142,grad_norm: 0.839848747199583, iteration: 259510
loss: 1.0325223207473755,grad_norm: 0.999999852556307, iteration: 259511
loss: 1.047060251235962,grad_norm: 0.8595699457309361, iteration: 259512
loss: 0.9712463021278381,grad_norm: 0.8049409511065604, iteration: 259513
loss: 0.9999470710754395,grad_norm: 0.886454296034056, iteration: 259514
loss: 0.9976683259010315,grad_norm: 0.9468126069944027, iteration: 259515
loss: 0.9995036721229553,grad_norm: 0.9001051674567883, iteration: 259516
loss: 1.0137825012207031,grad_norm: 0.9238384784062398, iteration: 259517
loss: 0.9899197220802307,grad_norm: 0.7487921059065442, iteration: 259518
loss: 0.9758495092391968,grad_norm: 0.8940128087620921, iteration: 259519
loss: 0.9793169498443604,grad_norm: 0.879510524255407, iteration: 259520
loss: 0.9942837357521057,grad_norm: 0.9999996955565964, iteration: 259521
loss: 1.0012096166610718,grad_norm: 0.9889239021038888, iteration: 259522
loss: 1.0340349674224854,grad_norm: 0.8654554276836687, iteration: 259523
loss: 1.0023066997528076,grad_norm: 0.7153330799959245, iteration: 259524
loss: 0.996173083782196,grad_norm: 0.9896583070974635, iteration: 259525
loss: 1.0631822347640991,grad_norm: 0.9199915277954046, iteration: 259526
loss: 0.997228741645813,grad_norm: 0.8600722843044686, iteration: 259527
loss: 1.014119029045105,grad_norm: 0.8928870604831647, iteration: 259528
loss: 1.0038176774978638,grad_norm: 0.9871520968811487, iteration: 259529
loss: 0.9874416589736938,grad_norm: 0.9999991756148285, iteration: 259530
loss: 1.0080844163894653,grad_norm: 0.9790643376583774, iteration: 259531
loss: 1.0412167310714722,grad_norm: 0.827115708181168, iteration: 259532
loss: 0.9536135792732239,grad_norm: 0.783399949203546, iteration: 259533
loss: 1.0019097328186035,grad_norm: 0.7716881493454302, iteration: 259534
loss: 1.0249826908111572,grad_norm: 1.0000000553655892, iteration: 259535
loss: 0.997065544128418,grad_norm: 0.9999989461748716, iteration: 259536
loss: 0.9819005131721497,grad_norm: 0.782382815197344, iteration: 259537
loss: 0.9511364698410034,grad_norm: 0.9277987482083352, iteration: 259538
loss: 1.028388500213623,grad_norm: 0.9999993307906201, iteration: 259539
loss: 1.010201334953308,grad_norm: 0.9999993091193509, iteration: 259540
loss: 0.9811877012252808,grad_norm: 0.9027009985108885, iteration: 259541
loss: 1.00736403465271,grad_norm: 0.8541435841028706, iteration: 259542
loss: 1.0018724203109741,grad_norm: 0.999999734149753, iteration: 259543
loss: 0.9603384733200073,grad_norm: 0.9769541936115261, iteration: 259544
loss: 1.0272883176803589,grad_norm: 0.8035135963910292, iteration: 259545
loss: 1.0095653533935547,grad_norm: 0.9999990628038113, iteration: 259546
loss: 0.9800699949264526,grad_norm: 0.9632952909260256, iteration: 259547
loss: 0.9756628274917603,grad_norm: 0.9225283140343085, iteration: 259548
loss: 1.0379159450531006,grad_norm: 0.8036122459328879, iteration: 259549
loss: 1.02430260181427,grad_norm: 0.8876886417285399, iteration: 259550
loss: 1.0517479181289673,grad_norm: 0.9999991636381182, iteration: 259551
loss: 1.0173392295837402,grad_norm: 0.869911971225492, iteration: 259552
loss: 0.9940162897109985,grad_norm: 0.999999261342254, iteration: 259553
loss: 0.9611244797706604,grad_norm: 0.8811082722729996, iteration: 259554
loss: 0.9656835198402405,grad_norm: 0.9783762251157045, iteration: 259555
loss: 0.9777350425720215,grad_norm: 0.9535066189142171, iteration: 259556
loss: 1.0487487316131592,grad_norm: 0.7341657096182583, iteration: 259557
loss: 1.0142652988433838,grad_norm: 0.9094602908422008, iteration: 259558
loss: 0.995683491230011,grad_norm: 0.9999992121946373, iteration: 259559
loss: 0.9991044402122498,grad_norm: 0.9999991007505663, iteration: 259560
loss: 1.0026369094848633,grad_norm: 0.9339266624926853, iteration: 259561
loss: 0.9907341599464417,grad_norm: 0.8982969390201199, iteration: 259562
loss: 0.9906361103057861,grad_norm: 0.779914687442907, iteration: 259563
loss: 1.0245082378387451,grad_norm: 0.952940858260185, iteration: 259564
loss: 1.0237940549850464,grad_norm: 0.9999988780399979, iteration: 259565
loss: 0.9930116534233093,grad_norm: 0.8838936552822595, iteration: 259566
loss: 1.0122522115707397,grad_norm: 0.9564779460737961, iteration: 259567
loss: 0.9751161336898804,grad_norm: 0.8806509080104148, iteration: 259568
loss: 0.9964973330497742,grad_norm: 0.7470779588687284, iteration: 259569
loss: 0.9949495196342468,grad_norm: 0.8310090994725968, iteration: 259570
loss: 1.027817726135254,grad_norm: 0.8321924251499462, iteration: 259571
loss: 0.9993922114372253,grad_norm: 0.7992602524701717, iteration: 259572
loss: 0.9973242878913879,grad_norm: 0.9999990538279503, iteration: 259573
loss: 0.9968091249465942,grad_norm: 0.9999991200376779, iteration: 259574
loss: 0.9917596578598022,grad_norm: 0.9841882003189995, iteration: 259575
loss: 1.0477098226547241,grad_norm: 0.9006861437191102, iteration: 259576
loss: 1.0259759426116943,grad_norm: 0.8712880344493559, iteration: 259577
loss: 1.022923469543457,grad_norm: 0.9024040093460629, iteration: 259578
loss: 0.944052517414093,grad_norm: 0.9684984443015023, iteration: 259579
loss: 1.0392873287200928,grad_norm: 0.9221353250998349, iteration: 259580
loss: 1.0488157272338867,grad_norm: 0.9451905281436933, iteration: 259581
loss: 0.976478099822998,grad_norm: 0.7805973162378832, iteration: 259582
loss: 0.9972999095916748,grad_norm: 0.7861990150777791, iteration: 259583
loss: 1.005268931388855,grad_norm: 0.8844050250994002, iteration: 259584
loss: 0.9974794387817383,grad_norm: 0.7834869955997967, iteration: 259585
loss: 1.0059678554534912,grad_norm: 0.9846891347001275, iteration: 259586
loss: 0.9937109351158142,grad_norm: 0.9999991553754674, iteration: 259587
loss: 0.9675805568695068,grad_norm: 0.820067922141448, iteration: 259588
loss: 0.9813327193260193,grad_norm: 0.7922408518094827, iteration: 259589
loss: 1.0334560871124268,grad_norm: 0.9033488672890807, iteration: 259590
loss: 1.0062057971954346,grad_norm: 0.8785024544324995, iteration: 259591
loss: 1.037258505821228,grad_norm: 0.8795919082894946, iteration: 259592
loss: 0.962761640548706,grad_norm: 0.960052752381127, iteration: 259593
loss: 1.003511905670166,grad_norm: 0.8813442921148273, iteration: 259594
loss: 0.9949827194213867,grad_norm: 0.9999992086887918, iteration: 259595
loss: 1.016371726989746,grad_norm: 0.9999990998891666, iteration: 259596
loss: 0.9607347249984741,grad_norm: 0.9544925051437, iteration: 259597
loss: 0.9988258481025696,grad_norm: 0.9028768709358107, iteration: 259598
loss: 0.9629785418510437,grad_norm: 0.8658446638062124, iteration: 259599
loss: 0.9934128522872925,grad_norm: 0.8602958250778195, iteration: 259600
loss: 0.9800434708595276,grad_norm: 0.885349903379507, iteration: 259601
loss: 0.9704717397689819,grad_norm: 0.820798686089622, iteration: 259602
loss: 0.9880242943763733,grad_norm: 0.8709979300734398, iteration: 259603
loss: 1.0620980262756348,grad_norm: 0.7853386243724528, iteration: 259604
loss: 0.9735432863235474,grad_norm: 0.9243186057744052, iteration: 259605
loss: 1.0199434757232666,grad_norm: 0.9999991027395742, iteration: 259606
loss: 0.9242265224456787,grad_norm: 0.8741144112927071, iteration: 259607
loss: 0.9550729393959045,grad_norm: 0.8306069753541399, iteration: 259608
loss: 1.0102930068969727,grad_norm: 0.999999127497196, iteration: 259609
loss: 1.0038959980010986,grad_norm: 0.8493478456402994, iteration: 259610
loss: 0.9921032786369324,grad_norm: 0.797685533578694, iteration: 259611
loss: 1.04586660861969,grad_norm: 0.9916079310656274, iteration: 259612
loss: 0.998460054397583,grad_norm: 0.9202857844011026, iteration: 259613
loss: 0.9757167100906372,grad_norm: 0.7995529764572614, iteration: 259614
loss: 1.0096501111984253,grad_norm: 0.844104975306773, iteration: 259615
loss: 1.021785020828247,grad_norm: 0.9807044450165406, iteration: 259616
loss: 1.1127971410751343,grad_norm: 0.9659829581765297, iteration: 259617
loss: 1.0240004062652588,grad_norm: 0.848241795215479, iteration: 259618
loss: 0.9617181420326233,grad_norm: 0.9611177853779522, iteration: 259619
loss: 0.9960069060325623,grad_norm: 0.8965104410292603, iteration: 259620
loss: 0.956861138343811,grad_norm: 0.9199428647353446, iteration: 259621
loss: 0.9973607063293457,grad_norm: 0.7808965421865851, iteration: 259622
loss: 0.9797249436378479,grad_norm: 0.8784789887575428, iteration: 259623
loss: 1.0051347017288208,grad_norm: 0.8783583280109006, iteration: 259624
loss: 0.9798754453659058,grad_norm: 0.9026014798271038, iteration: 259625
loss: 1.001290202140808,grad_norm: 0.8332035306407235, iteration: 259626
loss: 1.020018458366394,grad_norm: 0.8399075609432701, iteration: 259627
loss: 0.9487224817276001,grad_norm: 0.9999990877935377, iteration: 259628
loss: 0.9915946125984192,grad_norm: 0.9999990717859539, iteration: 259629
loss: 1.0312844514846802,grad_norm: 0.9411532285081234, iteration: 259630
loss: 1.0179258584976196,grad_norm: 0.9999998795677015, iteration: 259631
loss: 1.0157805681228638,grad_norm: 0.9999995133046296, iteration: 259632
loss: 0.9943804144859314,grad_norm: 0.882201790927636, iteration: 259633
loss: 0.993346631526947,grad_norm: 0.8280265230791555, iteration: 259634
loss: 1.006925344467163,grad_norm: 0.9999991294139733, iteration: 259635
loss: 0.9954037666320801,grad_norm: 0.8659057909573243, iteration: 259636
loss: 0.993797779083252,grad_norm: 0.9999991947905836, iteration: 259637
loss: 0.9690380692481995,grad_norm: 0.7671159466012691, iteration: 259638
loss: 1.0502666234970093,grad_norm: 0.9999990270753012, iteration: 259639
loss: 0.96515291929245,grad_norm: 0.8443374994610343, iteration: 259640
loss: 0.9823490381240845,grad_norm: 0.9718570178878495, iteration: 259641
loss: 1.0998625755310059,grad_norm: 0.9999998511405921, iteration: 259642
loss: 1.0764501094818115,grad_norm: 0.9999997477080536, iteration: 259643
loss: 0.9678709506988525,grad_norm: 0.8648468921574436, iteration: 259644
loss: 1.006912112236023,grad_norm: 0.7873098100357668, iteration: 259645
loss: 0.987870991230011,grad_norm: 0.999999061785666, iteration: 259646
loss: 1.0596104860305786,grad_norm: 0.9999996067381505, iteration: 259647
loss: 1.0625371932983398,grad_norm: 0.9999990033517052, iteration: 259648
loss: 1.0167670249938965,grad_norm: 0.9999990847311774, iteration: 259649
loss: 0.983782172203064,grad_norm: 0.900367149195553, iteration: 259650
loss: 0.9684134125709534,grad_norm: 0.9999993044994099, iteration: 259651
loss: 1.0402837991714478,grad_norm: 0.9471376479925581, iteration: 259652
loss: 1.0837434530258179,grad_norm: 0.999998977866601, iteration: 259653
loss: 1.003792643547058,grad_norm: 0.8798665413082549, iteration: 259654
loss: 1.0254061222076416,grad_norm: 0.899853164592521, iteration: 259655
loss: 1.065115213394165,grad_norm: 0.9999997286374684, iteration: 259656
loss: 1.008111834526062,grad_norm: 0.8287845673174377, iteration: 259657
loss: 1.05078125,grad_norm: 0.9419983934760816, iteration: 259658
loss: 1.0232012271881104,grad_norm: 0.8268160145730681, iteration: 259659
loss: 0.9535369873046875,grad_norm: 0.9999990927833499, iteration: 259660
loss: 0.9705100655555725,grad_norm: 0.9999993072681334, iteration: 259661
loss: 1.0112147331237793,grad_norm: 0.9999992746170122, iteration: 259662
loss: 1.0655256509780884,grad_norm: 0.9999996833231858, iteration: 259663
loss: 0.9734329581260681,grad_norm: 0.8725900372597039, iteration: 259664
loss: 1.0290440320968628,grad_norm: 0.8096900979595696, iteration: 259665
loss: 0.9827092289924622,grad_norm: 0.9676879193216193, iteration: 259666
loss: 1.0299370288848877,grad_norm: 0.9999990563937027, iteration: 259667
loss: 0.951744556427002,grad_norm: 0.8829234093197385, iteration: 259668
loss: 0.9654890894889832,grad_norm: 0.9058097368543492, iteration: 259669
loss: 0.9817734360694885,grad_norm: 0.9999989648526817, iteration: 259670
loss: 1.0296381711959839,grad_norm: 0.9999991368843932, iteration: 259671
loss: 1.0160417556762695,grad_norm: 0.8128584852836613, iteration: 259672
loss: 1.0307294130325317,grad_norm: 0.9107872992459753, iteration: 259673
loss: 0.9932903051376343,grad_norm: 0.82610889234003, iteration: 259674
loss: 0.9748355150222778,grad_norm: 0.9999990689263755, iteration: 259675
loss: 0.9867392778396606,grad_norm: 0.8602401113999787, iteration: 259676
loss: 1.0154746770858765,grad_norm: 0.8778556329913543, iteration: 259677
loss: 1.0557940006256104,grad_norm: 0.9999996548113815, iteration: 259678
loss: 1.000878930091858,grad_norm: 0.9999990686504249, iteration: 259679
loss: 1.0211790800094604,grad_norm: 0.9999991652083845, iteration: 259680
loss: 1.001632571220398,grad_norm: 0.9999996446460344, iteration: 259681
loss: 1.0283896923065186,grad_norm: 0.9999991016139033, iteration: 259682
loss: 1.0131410360336304,grad_norm: 0.8540918142457392, iteration: 259683
loss: 1.0169460773468018,grad_norm: 0.9999996447920235, iteration: 259684
loss: 1.0048810243606567,grad_norm: 0.9530546078165022, iteration: 259685
loss: 1.0044959783554077,grad_norm: 0.9655692966322161, iteration: 259686
loss: 0.9861778616905212,grad_norm: 0.7416202868008375, iteration: 259687
loss: 0.9482258558273315,grad_norm: 0.7939902233570185, iteration: 259688
loss: 1.0037344694137573,grad_norm: 0.7578445761616488, iteration: 259689
loss: 0.9965566396713257,grad_norm: 0.7767993371210273, iteration: 259690
loss: 0.9252534508705139,grad_norm: 0.9056497428333152, iteration: 259691
loss: 1.0032025575637817,grad_norm: 0.8645285105867598, iteration: 259692
loss: 1.0259898900985718,grad_norm: 0.8198418151231919, iteration: 259693
loss: 0.9951822757720947,grad_norm: 0.8378769077861836, iteration: 259694
loss: 0.9958125352859497,grad_norm: 0.9999991274887838, iteration: 259695
loss: 1.0244388580322266,grad_norm: 0.9999991801696391, iteration: 259696
loss: 1.0011389255523682,grad_norm: 0.8509368255853997, iteration: 259697
loss: 0.973629355430603,grad_norm: 0.9999991265496209, iteration: 259698
loss: 1.0179461240768433,grad_norm: 0.7554387586593434, iteration: 259699
loss: 1.0191211700439453,grad_norm: 0.8340351712923066, iteration: 259700
loss: 0.9881715774536133,grad_norm: 0.8640529292636209, iteration: 259701
loss: 1.0184980630874634,grad_norm: 0.9796684816712813, iteration: 259702
loss: 1.0234895944595337,grad_norm: 0.8063386678692587, iteration: 259703
loss: 0.9813106060028076,grad_norm: 0.8378525527665914, iteration: 259704
loss: 0.9865183234214783,grad_norm: 0.9208342580411156, iteration: 259705
loss: 1.0193709135055542,grad_norm: 0.9999990956953572, iteration: 259706
loss: 1.0140560865402222,grad_norm: 0.8605395438443423, iteration: 259707
loss: 0.9760324954986572,grad_norm: 0.7837466087676567, iteration: 259708
loss: 0.9943737983703613,grad_norm: 0.8993727681758067, iteration: 259709
loss: 0.9424473643302917,grad_norm: 0.9110617411325798, iteration: 259710
loss: 0.969144344329834,grad_norm: 0.9815044053492097, iteration: 259711
loss: 0.9695570468902588,grad_norm: 0.9597869048272507, iteration: 259712
loss: 1.0584312677383423,grad_norm: 0.9999989982430831, iteration: 259713
loss: 0.980006992816925,grad_norm: 0.9347321673577723, iteration: 259714
loss: 1.0029551982879639,grad_norm: 0.8769874773089277, iteration: 259715
loss: 0.9856888055801392,grad_norm: 0.7815822955934292, iteration: 259716
loss: 0.9852516055107117,grad_norm: 0.9999990955155253, iteration: 259717
loss: 0.9630923867225647,grad_norm: 0.999999612271818, iteration: 259718
loss: 1.0009348392486572,grad_norm: 0.7431757433436041, iteration: 259719
loss: 0.9943723678588867,grad_norm: 0.9359846384741654, iteration: 259720
loss: 0.988136887550354,grad_norm: 0.8677645302944668, iteration: 259721
loss: 0.9868600964546204,grad_norm: 0.923202065579314, iteration: 259722
loss: 1.0334105491638184,grad_norm: 0.9999991741415338, iteration: 259723
loss: 1.0108839273452759,grad_norm: 0.9140478224495494, iteration: 259724
loss: 1.0084621906280518,grad_norm: 0.7251937366243324, iteration: 259725
loss: 1.0240293741226196,grad_norm: 0.9377574390436952, iteration: 259726
loss: 0.9822520613670349,grad_norm: 0.8820010155917398, iteration: 259727
loss: 1.0108695030212402,grad_norm: 0.9064987244534972, iteration: 259728
loss: 0.994328498840332,grad_norm: 0.8718966120244995, iteration: 259729
loss: 0.9566860795021057,grad_norm: 0.8408586124752504, iteration: 259730
loss: 1.016090989112854,grad_norm: 0.9999991572685093, iteration: 259731
loss: 1.016584873199463,grad_norm: 0.9317758863960262, iteration: 259732
loss: 1.0062505006790161,grad_norm: 0.8333770893860133, iteration: 259733
loss: 1.0026347637176514,grad_norm: 0.9999999861235908, iteration: 259734
loss: 1.0302646160125732,grad_norm: 0.7750903093161114, iteration: 259735
loss: 1.0334025621414185,grad_norm: 0.9875768632198084, iteration: 259736
loss: 1.0407586097717285,grad_norm: 0.9999998252457023, iteration: 259737
loss: 0.9950894117355347,grad_norm: 0.8098674804528497, iteration: 259738
loss: 1.008327841758728,grad_norm: 0.7072466582966924, iteration: 259739
loss: 0.9906056523323059,grad_norm: 0.9999991555816724, iteration: 259740
loss: 0.9959923028945923,grad_norm: 0.7181705221479828, iteration: 259741
loss: 0.9836021661758423,grad_norm: 0.8495173821468694, iteration: 259742
loss: 1.0098884105682373,grad_norm: 0.8861108089798587, iteration: 259743
loss: 1.0156594514846802,grad_norm: 0.8735484039085643, iteration: 259744
loss: 0.9862722754478455,grad_norm: 0.8607835355734114, iteration: 259745
loss: 1.0165817737579346,grad_norm: 0.7817677378005428, iteration: 259746
loss: 0.9994418025016785,grad_norm: 0.8248044537127606, iteration: 259747
loss: 1.002506971359253,grad_norm: 0.9999990936934835, iteration: 259748
loss: 0.9828439354896545,grad_norm: 0.9999992315294628, iteration: 259749
loss: 1.021362543106079,grad_norm: 0.8515368451814739, iteration: 259750
loss: 0.9519490599632263,grad_norm: 0.7796504166108184, iteration: 259751
loss: 0.983582615852356,grad_norm: 0.9999991765510503, iteration: 259752
loss: 1.012188196182251,grad_norm: 0.8491211606001673, iteration: 259753
loss: 1.0229982137680054,grad_norm: 0.9999990726091685, iteration: 259754
loss: 1.137437343597412,grad_norm: 0.9999993569930262, iteration: 259755
loss: 1.024855136871338,grad_norm: 0.9707729940515645, iteration: 259756
loss: 0.9343035817146301,grad_norm: 0.9599214138845332, iteration: 259757
loss: 1.0055198669433594,grad_norm: 0.7612062463248183, iteration: 259758
loss: 0.9777147173881531,grad_norm: 0.9999989356822649, iteration: 259759
loss: 1.054464340209961,grad_norm: 0.8332073771888949, iteration: 259760
loss: 1.01420259475708,grad_norm: 0.7970944128524542, iteration: 259761
loss: 0.9659233689308167,grad_norm: 0.8536067413841949, iteration: 259762
loss: 1.0500057935714722,grad_norm: 0.9181387173196203, iteration: 259763
loss: 0.9825379848480225,grad_norm: 0.9999990112168707, iteration: 259764
loss: 0.9928570985794067,grad_norm: 0.9999991002111295, iteration: 259765
loss: 0.9768254160881042,grad_norm: 0.9999991594171543, iteration: 259766
loss: 1.0057893991470337,grad_norm: 0.9375633375313596, iteration: 259767
loss: 0.9804651737213135,grad_norm: 0.8015014339188279, iteration: 259768
loss: 0.9889782071113586,grad_norm: 0.9474215742928516, iteration: 259769
loss: 1.0154987573623657,grad_norm: 0.9885857062247552, iteration: 259770
loss: 1.021990180015564,grad_norm: 0.9999991771304999, iteration: 259771
loss: 1.0141167640686035,grad_norm: 0.9236791222061028, iteration: 259772
loss: 0.9977861642837524,grad_norm: 0.8647383662231881, iteration: 259773
loss: 0.966759443283081,grad_norm: 0.830905012782418, iteration: 259774
loss: 1.0274055004119873,grad_norm: 0.889012761552775, iteration: 259775
loss: 0.9675921201705933,grad_norm: 0.7849810734303461, iteration: 259776
loss: 0.9738776087760925,grad_norm: 0.889149889372787, iteration: 259777
loss: 1.0028811693191528,grad_norm: 0.9384638800641705, iteration: 259778
loss: 1.0611419677734375,grad_norm: 0.9999993692962332, iteration: 259779
loss: 1.029393196105957,grad_norm: 0.9999994887801994, iteration: 259780
loss: 0.9808209538459778,grad_norm: 0.8480736658282301, iteration: 259781
loss: 1.003161907196045,grad_norm: 0.8241240949555007, iteration: 259782
loss: 1.010650634765625,grad_norm: 0.9999991233842707, iteration: 259783
loss: 1.0244171619415283,grad_norm: 0.9999997206118628, iteration: 259784
loss: 1.0269100666046143,grad_norm: 0.8268197310686358, iteration: 259785
loss: 0.9963735342025757,grad_norm: 0.9592085912377872, iteration: 259786
loss: 1.0108848810195923,grad_norm: 0.7542512000088111, iteration: 259787
loss: 1.0186339616775513,grad_norm: 0.7847894184164067, iteration: 259788
loss: 1.0127958059310913,grad_norm: 0.9999991773469646, iteration: 259789
loss: 1.0049203634262085,grad_norm: 0.9544773133586114, iteration: 259790
loss: 0.9908236861228943,grad_norm: 0.8226254132929945, iteration: 259791
loss: 1.0175496339797974,grad_norm: 0.8099068308699132, iteration: 259792
loss: 0.9668759107589722,grad_norm: 0.7939959571595431, iteration: 259793
loss: 0.9932266473770142,grad_norm: 0.9568260108984104, iteration: 259794
loss: 1.0134748220443726,grad_norm: 0.9999991331095999, iteration: 259795
loss: 1.0005953311920166,grad_norm: 0.892548338863785, iteration: 259796
loss: 1.0549873113632202,grad_norm: 0.9999995236213081, iteration: 259797
loss: 0.9972705245018005,grad_norm: 0.8502879610581501, iteration: 259798
loss: 1.019029140472412,grad_norm: 0.7604691916303478, iteration: 259799
loss: 1.012981653213501,grad_norm: 0.9999991689330776, iteration: 259800
loss: 0.972339928150177,grad_norm: 0.8310693879933244, iteration: 259801
loss: 1.022955060005188,grad_norm: 0.8563074952541156, iteration: 259802
loss: 0.9785432815551758,grad_norm: 0.8065924443161973, iteration: 259803
loss: 1.0231995582580566,grad_norm: 0.7353604660417512, iteration: 259804
loss: 1.0099694728851318,grad_norm: 0.961335295825438, iteration: 259805
loss: 0.9698930382728577,grad_norm: 0.7883817360311135, iteration: 259806
loss: 0.9918262362480164,grad_norm: 0.9999992078700118, iteration: 259807
loss: 1.0155954360961914,grad_norm: 0.7613312010490638, iteration: 259808
loss: 0.983860969543457,grad_norm: 0.999999070316266, iteration: 259809
loss: 0.9918352365493774,grad_norm: 0.8983561923896022, iteration: 259810
loss: 0.9913486838340759,grad_norm: 0.9464054422443411, iteration: 259811
loss: 1.0142465829849243,grad_norm: 0.7959875595609707, iteration: 259812
loss: 1.019091248512268,grad_norm: 0.9999991212965904, iteration: 259813
loss: 1.0017311573028564,grad_norm: 0.8372313545925092, iteration: 259814
loss: 1.0465503931045532,grad_norm: 0.8521833231569446, iteration: 259815
loss: 0.9980261325836182,grad_norm: 0.9934138865926565, iteration: 259816
loss: 1.1014565229415894,grad_norm: 0.9999996857183671, iteration: 259817
loss: 1.243444800376892,grad_norm: 0.9999997900293862, iteration: 259818
loss: 1.0076792240142822,grad_norm: 0.9160511709071248, iteration: 259819
loss: 0.9886622428894043,grad_norm: 0.9667647589608486, iteration: 259820
loss: 1.0064406394958496,grad_norm: 0.9965182642574137, iteration: 259821
loss: 0.9770076274871826,grad_norm: 0.7747883196139932, iteration: 259822
loss: 1.0018750429153442,grad_norm: 0.8655772677467077, iteration: 259823
loss: 1.0134433507919312,grad_norm: 0.9668780805451541, iteration: 259824
loss: 1.0304630994796753,grad_norm: 0.9999993170301346, iteration: 259825
loss: 1.0050697326660156,grad_norm: 0.8679418505683579, iteration: 259826
loss: 0.9866077303886414,grad_norm: 0.7124961751141704, iteration: 259827
loss: 0.9865528345108032,grad_norm: 0.9477289034327271, iteration: 259828
loss: 1.0156781673431396,grad_norm: 0.9999997319754517, iteration: 259829
loss: 1.0213329792022705,grad_norm: 0.8617749461053567, iteration: 259830
loss: 1.014325737953186,grad_norm: 0.8229851978484563, iteration: 259831
loss: 0.9543124437332153,grad_norm: 0.8589009556994791, iteration: 259832
loss: 0.9866102337837219,grad_norm: 0.9500430886596731, iteration: 259833
loss: 1.009260654449463,grad_norm: 0.9426310751948656, iteration: 259834
loss: 1.0036259889602661,grad_norm: 0.8509056502073102, iteration: 259835
loss: 1.0230786800384521,grad_norm: 0.8827549167278692, iteration: 259836
loss: 0.986690104007721,grad_norm: 0.8321387885119366, iteration: 259837
loss: 0.9980646967887878,grad_norm: 0.9430113239571664, iteration: 259838
loss: 1.024039387702942,grad_norm: 0.9458894956374045, iteration: 259839
loss: 1.0203689336776733,grad_norm: 0.9365078796141694, iteration: 259840
loss: 1.0007078647613525,grad_norm: 0.9752807497186748, iteration: 259841
loss: 0.9910356998443604,grad_norm: 0.7938020667504175, iteration: 259842
loss: 0.9620309472084045,grad_norm: 0.8548745595765136, iteration: 259843
loss: 1.0303447246551514,grad_norm: 0.9999991333164356, iteration: 259844
loss: 1.0293588638305664,grad_norm: 0.8400583310177951, iteration: 259845
loss: 1.003954529762268,grad_norm: 0.9128244245464138, iteration: 259846
loss: 0.9740043878555298,grad_norm: 0.7953159097715212, iteration: 259847
loss: 0.9839213490486145,grad_norm: 0.7999820077024439, iteration: 259848
loss: 1.0196363925933838,grad_norm: 0.8121383058178306, iteration: 259849
loss: 1.0331171751022339,grad_norm: 0.9999990136203294, iteration: 259850
loss: 0.9652238488197327,grad_norm: 0.9210007065956494, iteration: 259851
loss: 1.0108294486999512,grad_norm: 0.736180966103839, iteration: 259852
loss: 0.9770464301109314,grad_norm: 0.8491398431800025, iteration: 259853
loss: 0.970192551612854,grad_norm: 0.9999990714048157, iteration: 259854
loss: 1.01019287109375,grad_norm: 0.8099285331680551, iteration: 259855
loss: 1.019382119178772,grad_norm: 0.8964891117898509, iteration: 259856
loss: 0.9946281909942627,grad_norm: 0.9999990159090099, iteration: 259857
loss: 1.0256948471069336,grad_norm: 0.9999993277808987, iteration: 259858
loss: 1.0042475461959839,grad_norm: 0.9910845269032997, iteration: 259859
loss: 1.011949062347412,grad_norm: 0.7685970089262353, iteration: 259860
loss: 1.0213298797607422,grad_norm: 0.9027066851436489, iteration: 259861
loss: 1.0325841903686523,grad_norm: 0.9999989435290385, iteration: 259862
loss: 0.9910669922828674,grad_norm: 0.7667553190970455, iteration: 259863
loss: 0.9945963621139526,grad_norm: 0.9999993642137982, iteration: 259864
loss: 1.0209254026412964,grad_norm: 0.8608812744529977, iteration: 259865
loss: 0.9989583492279053,grad_norm: 0.9461185579946728, iteration: 259866
loss: 1.0072251558303833,grad_norm: 0.8761415010473862, iteration: 259867
loss: 1.001024603843689,grad_norm: 0.9999990011330585, iteration: 259868
loss: 1.0130902528762817,grad_norm: 0.807361428559113, iteration: 259869
loss: 1.0612764358520508,grad_norm: 0.899152803782026, iteration: 259870
loss: 1.025513768196106,grad_norm: 0.8957938224497789, iteration: 259871
loss: 0.9944608807563782,grad_norm: 0.9999991553956457, iteration: 259872
loss: 0.9613905549049377,grad_norm: 0.7700706695266283, iteration: 259873
loss: 1.0721004009246826,grad_norm: 0.9999989784420796, iteration: 259874
loss: 1.0530744791030884,grad_norm: 0.9999995189079145, iteration: 259875
loss: 1.058220624923706,grad_norm: 0.9999994032117839, iteration: 259876
loss: 1.009010672569275,grad_norm: 0.9261986980624817, iteration: 259877
loss: 1.0149171352386475,grad_norm: 0.9999989763547575, iteration: 259878
loss: 0.994867205619812,grad_norm: 0.8192768957169223, iteration: 259879
loss: 1.0054261684417725,grad_norm: 0.9170985631959551, iteration: 259880
loss: 1.0271397829055786,grad_norm: 0.8305519383230489, iteration: 259881
loss: 0.9947181940078735,grad_norm: 0.9648227448545603, iteration: 259882
loss: 0.9995473027229309,grad_norm: 0.7734684024551018, iteration: 259883
loss: 0.9885855317115784,grad_norm: 0.7669720738472474, iteration: 259884
loss: 1.0279613733291626,grad_norm: 0.8653922506201174, iteration: 259885
loss: 1.0273427963256836,grad_norm: 0.9653186927709521, iteration: 259886
loss: 1.0078214406967163,grad_norm: 0.7803315238896472, iteration: 259887
loss: 0.9972321391105652,grad_norm: 0.8079050333950543, iteration: 259888
loss: 0.9914834499359131,grad_norm: 0.8967688919076235, iteration: 259889
loss: 0.9913069009780884,grad_norm: 0.9999992159780948, iteration: 259890
loss: 0.9865915179252625,grad_norm: 0.809005280568515, iteration: 259891
loss: 1.042518973350525,grad_norm: 0.9999990994828765, iteration: 259892
loss: 1.0172004699707031,grad_norm: 0.9428102647494009, iteration: 259893
loss: 0.9934871196746826,grad_norm: 0.8435492553186951, iteration: 259894
loss: 0.9576355218887329,grad_norm: 0.9218780643091263, iteration: 259895
loss: 0.988189697265625,grad_norm: 0.7916178581335415, iteration: 259896
loss: 0.9951788187026978,grad_norm: 0.7951211782552431, iteration: 259897
loss: 1.0015530586242676,grad_norm: 0.9999991317349531, iteration: 259898
loss: 1.0052708387374878,grad_norm: 0.8659435249547324, iteration: 259899
loss: 0.9728273153305054,grad_norm: 0.9257190355167179, iteration: 259900
loss: 0.9892230033874512,grad_norm: 0.7123132210420494, iteration: 259901
loss: 1.0436168909072876,grad_norm: 0.8306669882950406, iteration: 259902
loss: 0.9739854335784912,grad_norm: 0.9999990569094643, iteration: 259903
loss: 0.9901599884033203,grad_norm: 0.8135307378527946, iteration: 259904
loss: 0.9779472351074219,grad_norm: 0.9999991369659975, iteration: 259905
loss: 1.002799153327942,grad_norm: 0.8579865437029255, iteration: 259906
loss: 1.0924187898635864,grad_norm: 0.9999992410502668, iteration: 259907
loss: 1.018272042274475,grad_norm: 0.9480441510232334, iteration: 259908
loss: 0.9712467193603516,grad_norm: 0.9469952510747619, iteration: 259909
loss: 1.0156164169311523,grad_norm: 0.8822235114258458, iteration: 259910
loss: 1.0812554359436035,grad_norm: 0.9999996070076811, iteration: 259911
loss: 0.9902058243751526,grad_norm: 0.7566247980041311, iteration: 259912
loss: 1.0206117630004883,grad_norm: 0.9127667612183076, iteration: 259913
loss: 0.9709324240684509,grad_norm: 0.752347315186671, iteration: 259914
loss: 1.0087766647338867,grad_norm: 0.8830627766878669, iteration: 259915
loss: 0.9809355735778809,grad_norm: 0.8708525015759756, iteration: 259916
loss: 1.1707262992858887,grad_norm: 0.9999998960254285, iteration: 259917
loss: 1.023707628250122,grad_norm: 0.9763231758789562, iteration: 259918
loss: 1.0081870555877686,grad_norm: 0.8779906544849593, iteration: 259919
loss: 1.0204113721847534,grad_norm: 0.9999995019447483, iteration: 259920
loss: 1.0135047435760498,grad_norm: 0.8830749364665916, iteration: 259921
loss: 1.0038625001907349,grad_norm: 0.8277400477066681, iteration: 259922
loss: 1.015752911567688,grad_norm: 0.9999991217321431, iteration: 259923
loss: 0.9965121746063232,grad_norm: 0.8025473784644992, iteration: 259924
loss: 0.9892448782920837,grad_norm: 0.9999993416440501, iteration: 259925
loss: 1.0303248167037964,grad_norm: 0.8127406745609029, iteration: 259926
loss: 1.0085941553115845,grad_norm: 0.8853357023543607, iteration: 259927
loss: 0.9940136075019836,grad_norm: 0.8577336799281717, iteration: 259928
loss: 1.0008000135421753,grad_norm: 0.7650763041662276, iteration: 259929
loss: 1.003373384475708,grad_norm: 0.807492348847365, iteration: 259930
loss: 0.997479259967804,grad_norm: 0.9268582222084369, iteration: 259931
loss: 0.9741300344467163,grad_norm: 0.8045677507806803, iteration: 259932
loss: 0.9957512617111206,grad_norm: 0.9999992070972873, iteration: 259933
loss: 1.008700966835022,grad_norm: 0.8812118805017763, iteration: 259934
loss: 0.9720256328582764,grad_norm: 0.8803237128372564, iteration: 259935
loss: 1.0022448301315308,grad_norm: 0.9471435991547469, iteration: 259936
loss: 0.9928135275840759,grad_norm: 0.8762011163864923, iteration: 259937
loss: 1.006554365158081,grad_norm: 0.8064995012687417, iteration: 259938
loss: 0.9887399673461914,grad_norm: 0.9928917401607267, iteration: 259939
loss: 1.0068020820617676,grad_norm: 0.9968047554049063, iteration: 259940
loss: 0.9913275837898254,grad_norm: 0.8758056314960981, iteration: 259941
loss: 1.0054118633270264,grad_norm: 0.8153652967707471, iteration: 259942
loss: 1.0265121459960938,grad_norm: 0.9999991778384858, iteration: 259943
loss: 0.9878544807434082,grad_norm: 0.9449640214203587, iteration: 259944
loss: 0.9831488132476807,grad_norm: 0.7557484428590806, iteration: 259945
loss: 1.0058884620666504,grad_norm: 0.8609838395585648, iteration: 259946
loss: 1.0050729513168335,grad_norm: 0.919257826602874, iteration: 259947
loss: 1.011587142944336,grad_norm: 0.8136277663060804, iteration: 259948
loss: 0.9763083457946777,grad_norm: 0.8370663832532093, iteration: 259949
loss: 0.9949336051940918,grad_norm: 0.7610423791279105, iteration: 259950
loss: 1.0069849491119385,grad_norm: 0.942012685317203, iteration: 259951
loss: 0.9808739423751831,grad_norm: 0.9999994065308054, iteration: 259952
loss: 0.9824134707450867,grad_norm: 0.999999349133652, iteration: 259953
loss: 1.0311261415481567,grad_norm: 0.9985673714298194, iteration: 259954
loss: 1.2027990818023682,grad_norm: 1.0000000133813514, iteration: 259955
loss: 0.9813507795333862,grad_norm: 0.9429718036103434, iteration: 259956
loss: 1.0207889080047607,grad_norm: 0.8000428576877504, iteration: 259957
loss: 1.0350613594055176,grad_norm: 0.753729832530852, iteration: 259958
loss: 0.9704087376594543,grad_norm: 0.8280991322526509, iteration: 259959
loss: 1.0061852931976318,grad_norm: 0.9999992659913097, iteration: 259960
loss: 0.9872851967811584,grad_norm: 0.8717323800169823, iteration: 259961
loss: 1.0210644006729126,grad_norm: 0.9694591611670414, iteration: 259962
loss: 0.9862070679664612,grad_norm: 0.9519829269482742, iteration: 259963
loss: 1.0212655067443848,grad_norm: 0.9999990439136491, iteration: 259964
loss: 1.0188467502593994,grad_norm: 0.9284615387608265, iteration: 259965
loss: 1.0680314302444458,grad_norm: 0.999999341458261, iteration: 259966
loss: 1.0087625980377197,grad_norm: 0.8060198381771673, iteration: 259967
loss: 0.9906312823295593,grad_norm: 0.921670851603783, iteration: 259968
loss: 0.9889403581619263,grad_norm: 0.8528343507057929, iteration: 259969
loss: 1.0492662191390991,grad_norm: 0.8644643533201597, iteration: 259970
loss: 1.0268689393997192,grad_norm: 0.9999993617498666, iteration: 259971
loss: 1.0040439367294312,grad_norm: 0.8926250320988913, iteration: 259972
loss: 1.0358877182006836,grad_norm: 0.8109084279307642, iteration: 259973
loss: 0.9900212287902832,grad_norm: 0.8876900925013607, iteration: 259974
loss: 1.0222841501235962,grad_norm: 0.818850322283063, iteration: 259975
loss: 1.0155106782913208,grad_norm: 0.9999990473404348, iteration: 259976
loss: 1.0409826040267944,grad_norm: 0.9131179727012988, iteration: 259977
loss: 1.0162725448608398,grad_norm: 0.9999992379035078, iteration: 259978
loss: 1.030780553817749,grad_norm: 0.9999997483350662, iteration: 259979
loss: 0.9993100762367249,grad_norm: 0.8716163789276956, iteration: 259980
loss: 1.0264382362365723,grad_norm: 0.9985726904228626, iteration: 259981
loss: 0.9938312768936157,grad_norm: 0.7509012018361618, iteration: 259982
loss: 1.0143539905548096,grad_norm: 0.7555206990460633, iteration: 259983
loss: 1.0132038593292236,grad_norm: 0.9999991824152084, iteration: 259984
loss: 1.045495867729187,grad_norm: 0.9335825582600061, iteration: 259985
loss: 1.0021260976791382,grad_norm: 0.9437012545218953, iteration: 259986
loss: 1.0103188753128052,grad_norm: 0.9999991370933917, iteration: 259987
loss: 0.9965048432350159,grad_norm: 0.9200757475992699, iteration: 259988
loss: 0.9913097023963928,grad_norm: 0.8755722814990711, iteration: 259989
loss: 1.0425814390182495,grad_norm: 0.904642707682612, iteration: 259990
loss: 1.0026317834854126,grad_norm: 0.999999876779731, iteration: 259991
loss: 1.0013470649719238,grad_norm: 0.9999990612721182, iteration: 259992
loss: 0.9897120594978333,grad_norm: 0.8370819678776911, iteration: 259993
loss: 0.979236364364624,grad_norm: 0.8955931827594162, iteration: 259994
loss: 0.9873270392417908,grad_norm: 0.9967306334668776, iteration: 259995
loss: 1.0279978513717651,grad_norm: 0.8908590641597135, iteration: 259996
loss: 0.9746848344802856,grad_norm: 0.8909221518650728, iteration: 259997
loss: 1.026108741760254,grad_norm: 0.9246196679647017, iteration: 259998
loss: 1.0003299713134766,grad_norm: 0.7940775750711196, iteration: 259999
loss: 1.0139250755310059,grad_norm: 0.9999991359045776, iteration: 260000
Evaluating at step 260000
{'val': 0.9944373574107885, 'test': 2.127087973938963}
loss: 1.0059170722961426,grad_norm: 0.8469049863397848, iteration: 260001
loss: 1.0188947916030884,grad_norm: 0.8304741260327891, iteration: 260002
loss: 0.9527018666267395,grad_norm: 0.874279099395194, iteration: 260003
loss: 1.0035501718521118,grad_norm: 0.999999009614287, iteration: 260004
loss: 1.0028760433197021,grad_norm: 0.8979128782942346, iteration: 260005
loss: 0.9711018800735474,grad_norm: 0.9999994099334409, iteration: 260006
loss: 1.028828501701355,grad_norm: 0.9342183878453463, iteration: 260007
loss: 0.9889194369316101,grad_norm: 0.8891654838780395, iteration: 260008
loss: 1.0204236507415771,grad_norm: 0.9999990610088642, iteration: 260009
loss: 1.0063579082489014,grad_norm: 0.8245186712597818, iteration: 260010
loss: 0.9316026568412781,grad_norm: 0.8831617586234521, iteration: 260011
loss: 0.9902875423431396,grad_norm: 0.7808253060323067, iteration: 260012
loss: 1.0081264972686768,grad_norm: 0.9738419006381074, iteration: 260013
loss: 1.025510549545288,grad_norm: 0.9053327356394938, iteration: 260014
loss: 1.0208393335342407,grad_norm: 0.9999997366399808, iteration: 260015
loss: 0.9882327318191528,grad_norm: 0.9528205134741068, iteration: 260016
loss: 1.0209301710128784,grad_norm: 0.7357348652492461, iteration: 260017
loss: 1.0682225227355957,grad_norm: 0.9999999202797648, iteration: 260018
loss: 0.9779034852981567,grad_norm: 0.8579974280471563, iteration: 260019
loss: 0.9909897446632385,grad_norm: 0.9696277728041358, iteration: 260020
loss: 0.9891405701637268,grad_norm: 0.9368833308992758, iteration: 260021
loss: 0.9787390232086182,grad_norm: 0.8220232358830597, iteration: 260022
loss: 1.0203059911727905,grad_norm: 0.9999990981753144, iteration: 260023
loss: 1.039514183998108,grad_norm: 0.9999991189366763, iteration: 260024
loss: 0.9726567268371582,grad_norm: 0.8958147674749656, iteration: 260025
loss: 0.9999210238456726,grad_norm: 0.999999170248178, iteration: 260026
loss: 1.1676230430603027,grad_norm: 0.999999891257003, iteration: 260027
loss: 1.039954662322998,grad_norm: 0.9999996464200078, iteration: 260028
loss: 1.0175713300704956,grad_norm: 0.7623163777024353, iteration: 260029
loss: 0.9961764812469482,grad_norm: 0.9845354646627177, iteration: 260030
loss: 1.0018525123596191,grad_norm: 0.9999992218395883, iteration: 260031
loss: 1.0198991298675537,grad_norm: 0.8100547642075256, iteration: 260032
loss: 0.9827646613121033,grad_norm: 0.930150909898217, iteration: 260033
loss: 0.9849702715873718,grad_norm: 0.8202889403141392, iteration: 260034
loss: 0.9945268630981445,grad_norm: 0.7695768910271081, iteration: 260035
loss: 0.9951350092887878,grad_norm: 0.8056001076473063, iteration: 260036
loss: 1.0005611181259155,grad_norm: 0.9523301814747598, iteration: 260037
loss: 0.9981508851051331,grad_norm: 0.8450938546507275, iteration: 260038
loss: 1.0115890502929688,grad_norm: 0.818195747230344, iteration: 260039
loss: 1.0191090106964111,grad_norm: 0.9855975724199166, iteration: 260040
loss: 0.9977637529373169,grad_norm: 0.7493899078762791, iteration: 260041
loss: 0.9829287528991699,grad_norm: 0.8029088667050083, iteration: 260042
loss: 1.0497126579284668,grad_norm: 0.9999991012526276, iteration: 260043
loss: 1.0286837816238403,grad_norm: 0.9999999449175433, iteration: 260044
loss: 1.138069987297058,grad_norm: 0.9999991401408578, iteration: 260045
loss: 0.9835744500160217,grad_norm: 0.9018203984036844, iteration: 260046
loss: 0.9803884029388428,grad_norm: 0.889484676750471, iteration: 260047
loss: 1.0127638578414917,grad_norm: 0.7508395197421828, iteration: 260048
loss: 0.9929447174072266,grad_norm: 0.9288087670445718, iteration: 260049
loss: 1.023371934890747,grad_norm: 0.9999998296305669, iteration: 260050
loss: 1.0070910453796387,grad_norm: 0.8979340241916723, iteration: 260051
loss: 0.9378246665000916,grad_norm: 0.8521166755527336, iteration: 260052
loss: 1.024633526802063,grad_norm: 0.9999999807666973, iteration: 260053
loss: 1.0223588943481445,grad_norm: 0.999999274712407, iteration: 260054
loss: 0.9934017062187195,grad_norm: 0.9671789032435294, iteration: 260055
loss: 0.9866023659706116,grad_norm: 0.9854379473346708, iteration: 260056
loss: 0.9920918345451355,grad_norm: 0.8006569738078352, iteration: 260057
loss: 0.9787330627441406,grad_norm: 0.8038730205498058, iteration: 260058
loss: 0.9972749948501587,grad_norm: 0.8528357502873607, iteration: 260059
loss: 0.9826154708862305,grad_norm: 0.8373901121401467, iteration: 260060
loss: 1.016514778137207,grad_norm: 1.0000000232497221, iteration: 260061
loss: 0.9895951747894287,grad_norm: 0.807632207925932, iteration: 260062
loss: 0.9718207716941833,grad_norm: 0.7974765759301133, iteration: 260063
loss: 0.9858397245407104,grad_norm: 0.9999990156772144, iteration: 260064
loss: 1.0145460367202759,grad_norm: 0.9295833731522289, iteration: 260065
loss: 0.9889408349990845,grad_norm: 0.7344919902545146, iteration: 260066
loss: 0.9569433331489563,grad_norm: 0.8233223258135747, iteration: 260067
loss: 0.9643324613571167,grad_norm: 0.9999990871351826, iteration: 260068
loss: 1.0089306831359863,grad_norm: 0.8064502136337761, iteration: 260069
loss: 0.9940850138664246,grad_norm: 0.8150999164977992, iteration: 260070
loss: 0.97242271900177,grad_norm: 0.8943979461649709, iteration: 260071
loss: 1.030239224433899,grad_norm: 0.9416451799387923, iteration: 260072
loss: 1.0066519975662231,grad_norm: 0.9385523710747283, iteration: 260073
loss: 1.027355670928955,grad_norm: 0.835241222092415, iteration: 260074
loss: 0.9774314165115356,grad_norm: 0.905896931740458, iteration: 260075
loss: 0.992603600025177,grad_norm: 0.9402507447904029, iteration: 260076
loss: 1.0422254800796509,grad_norm: 0.79828579883886, iteration: 260077
loss: 1.0085736513137817,grad_norm: 0.730115095354186, iteration: 260078
loss: 1.1245187520980835,grad_norm: 0.9999994594212581, iteration: 260079
loss: 1.0113970041275024,grad_norm: 0.8032931304691106, iteration: 260080
loss: 0.9963908195495605,grad_norm: 0.9656845913286133, iteration: 260081
loss: 1.0201377868652344,grad_norm: 0.9999993257981187, iteration: 260082
loss: 0.9999220967292786,grad_norm: 0.9026047757933053, iteration: 260083
loss: 1.0144259929656982,grad_norm: 0.9484484332337899, iteration: 260084
loss: 0.9859074354171753,grad_norm: 0.8018403945025402, iteration: 260085
loss: 1.0286779403686523,grad_norm: 0.869692258023307, iteration: 260086
loss: 1.005850911140442,grad_norm: 0.8678530403170601, iteration: 260087
loss: 0.9810982346534729,grad_norm: 0.8342856657981438, iteration: 260088
loss: 0.9790401458740234,grad_norm: 0.9999994423458429, iteration: 260089
loss: 0.9817312359809875,grad_norm: 0.9999992062959245, iteration: 260090
loss: 1.00126051902771,grad_norm: 0.9999991830981627, iteration: 260091
loss: 1.015735149383545,grad_norm: 0.9999991447067729, iteration: 260092
loss: 1.0068721771240234,grad_norm: 0.9052403326347256, iteration: 260093
loss: 1.0220228433609009,grad_norm: 0.8712237398119574, iteration: 260094
loss: 1.0446784496307373,grad_norm: 0.8806876469393525, iteration: 260095
loss: 0.9651313424110413,grad_norm: 0.999999076918942, iteration: 260096
loss: 0.9872274398803711,grad_norm: 0.9999996456406279, iteration: 260097
loss: 1.0270756483078003,grad_norm: 0.9564669786421264, iteration: 260098
loss: 0.994624674320221,grad_norm: 0.9692293540335107, iteration: 260099
loss: 0.9950733780860901,grad_norm: 0.8125724497456103, iteration: 260100
loss: 0.9647365808486938,grad_norm: 0.7629204666616599, iteration: 260101
loss: 0.9900203347206116,grad_norm: 0.9999991043257402, iteration: 260102
loss: 0.9821802973747253,grad_norm: 0.9485035496746435, iteration: 260103
loss: 0.9862093329429626,grad_norm: 0.970511965191036, iteration: 260104
loss: 1.0123753547668457,grad_norm: 0.690024051173098, iteration: 260105
loss: 0.9634078741073608,grad_norm: 0.9231083330418991, iteration: 260106
loss: 0.9922491312026978,grad_norm: 0.9720311356563498, iteration: 260107
loss: 1.0044260025024414,grad_norm: 0.951946588986784, iteration: 260108
loss: 1.0325883626937866,grad_norm: 0.9999991291927656, iteration: 260109
loss: 0.9928878545761108,grad_norm: 0.9999997930238371, iteration: 260110
loss: 0.9782825112342834,grad_norm: 0.8751049825451368, iteration: 260111
loss: 1.145440697669983,grad_norm: 0.9999991418776314, iteration: 260112
loss: 0.9900814294815063,grad_norm: 0.7680834939434633, iteration: 260113
loss: 0.9831126928329468,grad_norm: 0.8923763124760017, iteration: 260114
loss: 0.986506462097168,grad_norm: 0.97823638005136, iteration: 260115
loss: 0.9760394096374512,grad_norm: 0.8106087434362593, iteration: 260116
loss: 0.9818846583366394,grad_norm: 0.9999990838377132, iteration: 260117
loss: 1.0148910284042358,grad_norm: 0.9144208078309133, iteration: 260118
loss: 0.983899712562561,grad_norm: 0.9540048061366426, iteration: 260119
loss: 0.9574541449546814,grad_norm: 0.7186674466704098, iteration: 260120
loss: 1.0049339532852173,grad_norm: 0.8569187515678289, iteration: 260121
loss: 1.010822057723999,grad_norm: 0.9163733985108736, iteration: 260122
loss: 0.986560583114624,grad_norm: 0.827851283623771, iteration: 260123
loss: 0.9843302369117737,grad_norm: 0.9191958212669356, iteration: 260124
loss: 1.046037197113037,grad_norm: 0.9103011975452907, iteration: 260125
loss: 0.9764942526817322,grad_norm: 0.9999991496655107, iteration: 260126
loss: 0.988933265209198,grad_norm: 0.8736898685741052, iteration: 260127
loss: 0.9797855615615845,grad_norm: 0.9455775973360084, iteration: 260128
loss: 0.9915710687637329,grad_norm: 0.9151068719245202, iteration: 260129
loss: 1.0379307270050049,grad_norm: 0.883344212485751, iteration: 260130
loss: 0.9941507577896118,grad_norm: 0.9065449240955393, iteration: 260131
loss: 0.9783874154090881,grad_norm: 0.9743936408111802, iteration: 260132
loss: 1.0047385692596436,grad_norm: 0.8772583790671478, iteration: 260133
loss: 0.9811936616897583,grad_norm: 0.984466940976498, iteration: 260134
loss: 0.9823665022850037,grad_norm: 0.8076247364048367, iteration: 260135
loss: 1.0307390689849854,grad_norm: 0.8972060628081199, iteration: 260136
loss: 0.9745626449584961,grad_norm: 0.9349664624564645, iteration: 260137
loss: 1.0105875730514526,grad_norm: 0.9486664978852598, iteration: 260138
loss: 1.0035463571548462,grad_norm: 0.8603171574672631, iteration: 260139
loss: 0.9879365563392639,grad_norm: 0.8363700226072702, iteration: 260140
loss: 1.0315618515014648,grad_norm: 0.9270711354530733, iteration: 260141
loss: 0.9988841414451599,grad_norm: 0.8680637017164542, iteration: 260142
loss: 0.9841634035110474,grad_norm: 0.8773952609574062, iteration: 260143
loss: 0.975395917892456,grad_norm: 0.8384598712402942, iteration: 260144
loss: 0.960710346698761,grad_norm: 0.8225504503435191, iteration: 260145
loss: 0.9872208833694458,grad_norm: 0.999999247312411, iteration: 260146
loss: 1.0002521276474,grad_norm: 0.9999990310024559, iteration: 260147
loss: 1.035874366760254,grad_norm: 0.9460445007179414, iteration: 260148
loss: 1.0040674209594727,grad_norm: 0.8786508805207548, iteration: 260149
loss: 1.0056840181350708,grad_norm: 0.9064040024663841, iteration: 260150
loss: 1.0106555223464966,grad_norm: 0.8325328782101593, iteration: 260151
loss: 0.9895955324172974,grad_norm: 0.9999992232219577, iteration: 260152
loss: 0.9989529848098755,grad_norm: 0.888772127467885, iteration: 260153
loss: 0.998274564743042,grad_norm: 0.7405218582859916, iteration: 260154
loss: 1.036632776260376,grad_norm: 0.9999991483442624, iteration: 260155
loss: 0.9746851325035095,grad_norm: 0.9243319195574192, iteration: 260156
loss: 1.0016409158706665,grad_norm: 0.8383246518364117, iteration: 260157
loss: 1.0010515451431274,grad_norm: 0.9999992819869168, iteration: 260158
loss: 0.9821723103523254,grad_norm: 0.789606551125147, iteration: 260159
loss: 1.0104963779449463,grad_norm: 0.7287606367989092, iteration: 260160
loss: 0.9810628294944763,grad_norm: 0.9999990473888537, iteration: 260161
loss: 1.0219422578811646,grad_norm: 0.9522455408361713, iteration: 260162
loss: 1.0345203876495361,grad_norm: 0.9999988922924886, iteration: 260163
loss: 0.978667676448822,grad_norm: 0.927892229766542, iteration: 260164
loss: 0.9801478385925293,grad_norm: 0.9293361544970574, iteration: 260165
loss: 0.9695246815681458,grad_norm: 0.7688944359617984, iteration: 260166
loss: 0.9857310056686401,grad_norm: 0.9513040920499259, iteration: 260167
loss: 0.9633098244667053,grad_norm: 0.9999991242339783, iteration: 260168
loss: 1.005480170249939,grad_norm: 0.8969943500439874, iteration: 260169
loss: 1.0737534761428833,grad_norm: 0.9999991530099505, iteration: 260170
loss: 1.0052028894424438,grad_norm: 0.8149466163894269, iteration: 260171
loss: 1.1339114904403687,grad_norm: 0.8568256371073729, iteration: 260172
loss: 1.0110418796539307,grad_norm: 0.8520640863219593, iteration: 260173
loss: 1.0034254789352417,grad_norm: 0.8898471160275218, iteration: 260174
loss: 1.0170692205429077,grad_norm: 0.8548473262911972, iteration: 260175
loss: 0.9655822515487671,grad_norm: 0.8871382972148814, iteration: 260176
loss: 0.9903278946876526,grad_norm: 0.9095179936431951, iteration: 260177
loss: 0.9740007519721985,grad_norm: 0.8646458389172599, iteration: 260178
loss: 0.9400430917739868,grad_norm: 0.999999144470087, iteration: 260179
loss: 0.97682785987854,grad_norm: 0.7335746712405314, iteration: 260180
loss: 1.0184845924377441,grad_norm: 0.9999991399993207, iteration: 260181
loss: 1.0313255786895752,grad_norm: 0.9453366832436496, iteration: 260182
loss: 1.0131865739822388,grad_norm: 0.8856287364131179, iteration: 260183
loss: 0.995363175868988,grad_norm: 0.9999990297307859, iteration: 260184
loss: 0.9809927940368652,grad_norm: 0.9152449949117449, iteration: 260185
loss: 0.9761559367179871,grad_norm: 0.8991303801328724, iteration: 260186
loss: 1.0497671365737915,grad_norm: 0.9999997690436524, iteration: 260187
loss: 0.9811901450157166,grad_norm: 0.7580570089147377, iteration: 260188
loss: 1.004195213317871,grad_norm: 0.9344766823353529, iteration: 260189
loss: 0.9923500418663025,grad_norm: 0.9999991721245708, iteration: 260190
loss: 0.9704457521438599,grad_norm: 0.9008137821548988, iteration: 260191
loss: 1.0287063121795654,grad_norm: 0.9999995282895257, iteration: 260192
loss: 1.0148613452911377,grad_norm: 0.9999993924126157, iteration: 260193
loss: 0.9808534383773804,grad_norm: 0.9999990576322306, iteration: 260194
loss: 0.9895967841148376,grad_norm: 0.8235578958753955, iteration: 260195
loss: 0.9898104071617126,grad_norm: 0.7774727975156891, iteration: 260196
loss: 1.0377694368362427,grad_norm: 0.92156863471903, iteration: 260197
loss: 1.0109295845031738,grad_norm: 0.8179372361482689, iteration: 260198
loss: 0.9991999268531799,grad_norm: 0.8687540080830185, iteration: 260199
loss: 0.9735273122787476,grad_norm: 0.9112070781258963, iteration: 260200
loss: 1.0051754713058472,grad_norm: 0.8662398348295035, iteration: 260201
loss: 0.9852393269538879,grad_norm: 0.9999995543164354, iteration: 260202
loss: 0.9858837127685547,grad_norm: 0.8554027287273266, iteration: 260203
loss: 0.9868959188461304,grad_norm: 0.8752035924783118, iteration: 260204
loss: 0.9736009836196899,grad_norm: 0.8192583645296649, iteration: 260205
loss: 1.008901596069336,grad_norm: 0.8713620320293178, iteration: 260206
loss: 0.9871343970298767,grad_norm: 0.7829960668893288, iteration: 260207
loss: 1.060650110244751,grad_norm: 0.8486675550453114, iteration: 260208
loss: 1.0234142541885376,grad_norm: 0.978859438126527, iteration: 260209
loss: 1.012674331665039,grad_norm: 0.9750562324864374, iteration: 260210
loss: 1.0840744972229004,grad_norm: 0.8548472637920493, iteration: 260211
loss: 0.9903407692909241,grad_norm: 0.999999040439908, iteration: 260212
loss: 1.0106029510498047,grad_norm: 0.917916718574828, iteration: 260213
loss: 1.1088173389434814,grad_norm: 0.9999990883567099, iteration: 260214
loss: 0.9801037907600403,grad_norm: 0.9958874599922819, iteration: 260215
loss: 0.9901694655418396,grad_norm: 0.9999989382315989, iteration: 260216
loss: 1.0196077823638916,grad_norm: 0.8514556089874944, iteration: 260217
loss: 0.9690181016921997,grad_norm: 0.8761206157684484, iteration: 260218
loss: 0.9970154762268066,grad_norm: 0.8573257443536485, iteration: 260219
loss: 1.010377049446106,grad_norm: 0.7665997907840175, iteration: 260220
loss: 0.9744709134101868,grad_norm: 0.7875518539563385, iteration: 260221
loss: 1.0170950889587402,grad_norm: 0.9223609424957874, iteration: 260222
loss: 0.9949102997779846,grad_norm: 0.8119119552234713, iteration: 260223
loss: 0.9872502088546753,grad_norm: 0.9404721406042675, iteration: 260224
loss: 1.0276482105255127,grad_norm: 0.9826115896255968, iteration: 260225
loss: 1.0164145231246948,grad_norm: 0.8672728021560983, iteration: 260226
loss: 1.0306891202926636,grad_norm: 0.7818709382503347, iteration: 260227
loss: 0.9716495275497437,grad_norm: 0.9999991557299327, iteration: 260228
loss: 0.9464218020439148,grad_norm: 0.9999991874681547, iteration: 260229
loss: 1.0699832439422607,grad_norm: 0.9999992743019701, iteration: 260230
loss: 0.9939414262771606,grad_norm: 0.802117422019323, iteration: 260231
loss: 1.026133418083191,grad_norm: 0.9999989188790608, iteration: 260232
loss: 0.9638480544090271,grad_norm: 0.8644187691336968, iteration: 260233
loss: 0.9889854788780212,grad_norm: 0.8643216077099767, iteration: 260234
loss: 1.0184941291809082,grad_norm: 0.9521074637133112, iteration: 260235
loss: 1.0000202655792236,grad_norm: 0.9975662805335329, iteration: 260236
loss: 0.9979244470596313,grad_norm: 0.93164022274982, iteration: 260237
loss: 0.9666040539741516,grad_norm: 0.8977188557310654, iteration: 260238
loss: 1.002316951751709,grad_norm: 0.769417622783367, iteration: 260239
loss: 1.000711441040039,grad_norm: 0.9075683168045859, iteration: 260240
loss: 1.0184030532836914,grad_norm: 0.9831854650237861, iteration: 260241
loss: 1.0135202407836914,grad_norm: 0.999999213978893, iteration: 260242
loss: 0.9847395420074463,grad_norm: 0.878236359305237, iteration: 260243
loss: 0.9564539194107056,grad_norm: 0.9999991462949702, iteration: 260244
loss: 0.989484965801239,grad_norm: 0.9999991913352859, iteration: 260245
loss: 0.9659183025360107,grad_norm: 0.9595131537308804, iteration: 260246
loss: 0.991990864276886,grad_norm: 0.9290780922708872, iteration: 260247
loss: 1.1298753023147583,grad_norm: 0.884247289897369, iteration: 260248
loss: 1.123096227645874,grad_norm: 0.9999998299367834, iteration: 260249
loss: 1.0035068988800049,grad_norm: 0.8919896961767152, iteration: 260250
loss: 0.9756922125816345,grad_norm: 0.7490174835325798, iteration: 260251
loss: 1.0367673635482788,grad_norm: 0.97090312974262, iteration: 260252
loss: 1.0210661888122559,grad_norm: 0.9154781726791675, iteration: 260253
loss: 0.9967144727706909,grad_norm: 0.9999990535411226, iteration: 260254
loss: 1.0176966190338135,grad_norm: 0.722388423149036, iteration: 260255
loss: 0.9829261302947998,grad_norm: 0.9999991286744981, iteration: 260256
loss: 1.0098397731781006,grad_norm: 0.8098313747741207, iteration: 260257
loss: 0.9581369161605835,grad_norm: 0.9954457438351753, iteration: 260258
loss: 0.9840279817581177,grad_norm: 0.9999990994324175, iteration: 260259
loss: 0.9961020350456238,grad_norm: 0.8705519700642323, iteration: 260260
loss: 0.9752257466316223,grad_norm: 0.8631933813095123, iteration: 260261
loss: 1.0216292142868042,grad_norm: 0.999999779406223, iteration: 260262
loss: 0.9795348644256592,grad_norm: 0.9999989991518882, iteration: 260263
loss: 1.0034148693084717,grad_norm: 0.7938853291452872, iteration: 260264
loss: 1.1251745223999023,grad_norm: 0.9999996570033317, iteration: 260265
loss: 0.9715189933776855,grad_norm: 0.8236355826422987, iteration: 260266
loss: 0.9918941855430603,grad_norm: 0.9603822694141789, iteration: 260267
loss: 0.969939112663269,grad_norm: 0.9173263883647887, iteration: 260268
loss: 1.0351346731185913,grad_norm: 0.949052601351478, iteration: 260269
loss: 1.073786973953247,grad_norm: 0.9999991029975763, iteration: 260270
loss: 0.9479550719261169,grad_norm: 0.8593194790209908, iteration: 260271
loss: 1.0756396055221558,grad_norm: 0.9999994220168527, iteration: 260272
loss: 1.014086365699768,grad_norm: 0.8823337377786208, iteration: 260273
loss: 1.0287812948226929,grad_norm: 0.926880793431731, iteration: 260274
loss: 0.9863068461418152,grad_norm: 0.7576938760131381, iteration: 260275
loss: 0.9976711869239807,grad_norm: 0.999999112199275, iteration: 260276
loss: 0.9792860150337219,grad_norm: 0.7961979742091598, iteration: 260277
loss: 0.9832212924957275,grad_norm: 0.7514073616926127, iteration: 260278
loss: 0.9872896075248718,grad_norm: 0.8520034884785124, iteration: 260279
loss: 1.010542631149292,grad_norm: 0.9327620129001402, iteration: 260280
loss: 0.9863813519477844,grad_norm: 0.9204935785964773, iteration: 260281
loss: 1.0158076286315918,grad_norm: 0.930359838711006, iteration: 260282
loss: 0.9843502044677734,grad_norm: 0.6479771214452914, iteration: 260283
loss: 0.9817749261856079,grad_norm: 0.8269887761607112, iteration: 260284
loss: 0.989005982875824,grad_norm: 0.859855275972041, iteration: 260285
loss: 0.9841678738594055,grad_norm: 0.9688210280019143, iteration: 260286
loss: 0.9923659563064575,grad_norm: 0.9999993503775558, iteration: 260287
loss: 0.9993525743484497,grad_norm: 0.7500779551180926, iteration: 260288
loss: 0.9904230237007141,grad_norm: 0.9796400168621903, iteration: 260289
loss: 1.0302338600158691,grad_norm: 0.9378573301921512, iteration: 260290
loss: 0.9920278191566467,grad_norm: 0.9406549062012861, iteration: 260291
loss: 1.06062912940979,grad_norm: 0.9565796381017914, iteration: 260292
loss: 0.9767457842826843,grad_norm: 0.7737779570426263, iteration: 260293
loss: 1.0176764726638794,grad_norm: 0.834256508998763, iteration: 260294
loss: 1.0162073373794556,grad_norm: 0.8517285330590914, iteration: 260295
loss: 1.0318849086761475,grad_norm: 0.9929168710917334, iteration: 260296
loss: 0.9893731474876404,grad_norm: 0.8081885615989195, iteration: 260297
loss: 1.0208157300949097,grad_norm: 0.8004620092581612, iteration: 260298
loss: 1.0051617622375488,grad_norm: 0.7680359376279426, iteration: 260299
loss: 1.0067353248596191,grad_norm: 0.902095411425394, iteration: 260300
loss: 1.0401637554168701,grad_norm: 0.9999993824517829, iteration: 260301
loss: 0.968546986579895,grad_norm: 0.9999989283020544, iteration: 260302
loss: 0.9941161870956421,grad_norm: 0.9539813811276759, iteration: 260303
loss: 0.963999330997467,grad_norm: 0.9999990682500022, iteration: 260304
loss: 1.0244046449661255,grad_norm: 0.7874374220613302, iteration: 260305
loss: 1.0004143714904785,grad_norm: 0.9479054843624058, iteration: 260306
loss: 1.0589768886566162,grad_norm: 0.8764534161182532, iteration: 260307
loss: 0.9853065013885498,grad_norm: 0.8264872198887803, iteration: 260308
loss: 1.003434658050537,grad_norm: 0.9178210434006804, iteration: 260309
loss: 1.014087200164795,grad_norm: 0.9440343931921675, iteration: 260310
loss: 1.0084577798843384,grad_norm: 0.8985429043908921, iteration: 260311
loss: 0.986136794090271,grad_norm: 0.7913010081467068, iteration: 260312
loss: 1.0330283641815186,grad_norm: 0.9472918480983332, iteration: 260313
loss: 0.9954851269721985,grad_norm: 0.9036726636754056, iteration: 260314
loss: 1.0225688219070435,grad_norm: 0.8842868056184113, iteration: 260315
loss: 0.9582772850990295,grad_norm: 0.8513364902049021, iteration: 260316
loss: 0.9948692917823792,grad_norm: 0.7347883917763729, iteration: 260317
loss: 1.0612304210662842,grad_norm: 0.9999999562036261, iteration: 260318
loss: 0.966376006603241,grad_norm: 0.7734871504451969, iteration: 260319
loss: 1.026079535484314,grad_norm: 0.9999991017505958, iteration: 260320
loss: 0.9565989971160889,grad_norm: 0.9999990634925695, iteration: 260321
loss: 1.0154690742492676,grad_norm: 0.9999993546082325, iteration: 260322
loss: 1.04438054561615,grad_norm: 0.9999996337048489, iteration: 260323
loss: 0.9928786754608154,grad_norm: 0.8266777134133846, iteration: 260324
loss: 0.9943357706069946,grad_norm: 0.8613475821659975, iteration: 260325
loss: 0.9870659708976746,grad_norm: 0.894536688709568, iteration: 260326
loss: 0.998181164264679,grad_norm: 0.788995690073184, iteration: 260327
loss: 0.9783114194869995,grad_norm: 0.7501859127761948, iteration: 260328
loss: 1.015718936920166,grad_norm: 0.9036714299087701, iteration: 260329
loss: 1.0554420948028564,grad_norm: 0.9999998718871035, iteration: 260330
loss: 1.0060335397720337,grad_norm: 0.953876398843247, iteration: 260331
loss: 0.9777795672416687,grad_norm: 0.7999521621135001, iteration: 260332
loss: 0.9975634217262268,grad_norm: 0.8923890099700992, iteration: 260333
loss: 0.9687506556510925,grad_norm: 0.9137227316413866, iteration: 260334
loss: 1.0072786808013916,grad_norm: 0.7913801619738069, iteration: 260335
loss: 0.9932742118835449,grad_norm: 0.9632322901024664, iteration: 260336
loss: 0.9814000129699707,grad_norm: 0.9761039702443796, iteration: 260337
loss: 1.0026761293411255,grad_norm: 0.9999990962641271, iteration: 260338
loss: 0.9808175563812256,grad_norm: 0.8581689047735297, iteration: 260339
loss: 1.0352386236190796,grad_norm: 0.9781811948944088, iteration: 260340
loss: 0.9985343813896179,grad_norm: 0.9999996017379741, iteration: 260341
loss: 1.0079551935195923,grad_norm: 0.7292652325030167, iteration: 260342
loss: 1.0327311754226685,grad_norm: 0.8515643008208124, iteration: 260343
loss: 1.032046914100647,grad_norm: 0.999999140838894, iteration: 260344
loss: 1.0008912086486816,grad_norm: 0.8565371011074357, iteration: 260345
loss: 1.121526837348938,grad_norm: 0.9999991195832582, iteration: 260346
loss: 1.0664806365966797,grad_norm: 0.8290446297508577, iteration: 260347
loss: 0.9958559274673462,grad_norm: 0.7809647175516743, iteration: 260348
loss: 1.0189321041107178,grad_norm: 0.999999598822031, iteration: 260349
loss: 1.0317213535308838,grad_norm: 0.7839811044016519, iteration: 260350
loss: 0.9560703039169312,grad_norm: 0.9999992210931906, iteration: 260351
loss: 0.9933125376701355,grad_norm: 0.999999107413548, iteration: 260352
loss: 0.9942188858985901,grad_norm: 0.7782796598285148, iteration: 260353
loss: 0.9614791870117188,grad_norm: 0.9802796974805347, iteration: 260354
loss: 1.0359196662902832,grad_norm: 0.9999991932529065, iteration: 260355
loss: 1.0284068584442139,grad_norm: 0.9584439220888553, iteration: 260356
loss: 1.0242046117782593,grad_norm: 0.8990643727167982, iteration: 260357
loss: 1.0079007148742676,grad_norm: 0.8722914690639494, iteration: 260358
loss: 0.9957073330879211,grad_norm: 0.7743781657977549, iteration: 260359
loss: 1.0063244104385376,grad_norm: 0.8305672760126082, iteration: 260360
loss: 0.9704002737998962,grad_norm: 0.9192700516632035, iteration: 260361
loss: 1.018683671951294,grad_norm: 0.9648361068748647, iteration: 260362
loss: 0.9899159073829651,grad_norm: 0.8938752140315316, iteration: 260363
loss: 1.006102442741394,grad_norm: 0.9999992137528755, iteration: 260364
loss: 0.9911824464797974,grad_norm: 0.9999989937926991, iteration: 260365
loss: 1.0136719942092896,grad_norm: 0.9999991926642203, iteration: 260366
loss: 1.007062315940857,grad_norm: 0.9999991073406492, iteration: 260367
loss: 1.0338941812515259,grad_norm: 0.9445760898998296, iteration: 260368
loss: 0.9888109564781189,grad_norm: 0.8236541460779699, iteration: 260369
loss: 0.9838517904281616,grad_norm: 0.8127625357424558, iteration: 260370
loss: 0.9549102187156677,grad_norm: 0.9689037231866614, iteration: 260371
loss: 1.011389136314392,grad_norm: 0.860876923532032, iteration: 260372
loss: 1.0363374948501587,grad_norm: 0.9999991857786847, iteration: 260373
loss: 1.019102692604065,grad_norm: 0.9606871458788596, iteration: 260374
loss: 0.9718105792999268,grad_norm: 0.9127847775833877, iteration: 260375
loss: 1.0196799039840698,grad_norm: 0.9442834753130939, iteration: 260376
loss: 1.0277981758117676,grad_norm: 0.9999991333441222, iteration: 260377
loss: 0.9475356340408325,grad_norm: 0.7880148842413521, iteration: 260378
loss: 1.0151766538619995,grad_norm: 0.8912524055592885, iteration: 260379
loss: 0.9674447774887085,grad_norm: 0.9440020917256464, iteration: 260380
loss: 1.024474024772644,grad_norm: 0.9999994416334852, iteration: 260381
loss: 1.0813649892807007,grad_norm: 0.9999992758691841, iteration: 260382
loss: 1.024573564529419,grad_norm: 0.9999991476091636, iteration: 260383
loss: 0.9961619973182678,grad_norm: 0.9202685562171008, iteration: 260384
loss: 1.0263880491256714,grad_norm: 0.8142142059092662, iteration: 260385
loss: 0.9984241127967834,grad_norm: 0.961090728967641, iteration: 260386
loss: 1.0171380043029785,grad_norm: 0.8722732337784415, iteration: 260387
loss: 1.0200295448303223,grad_norm: 0.873187531656441, iteration: 260388
loss: 1.0102479457855225,grad_norm: 0.7836363122680818, iteration: 260389
loss: 0.9780157208442688,grad_norm: 0.9890190403217651, iteration: 260390
loss: 1.019960641860962,grad_norm: 0.6766098262578316, iteration: 260391
loss: 1.0196471214294434,grad_norm: 0.9032275732613029, iteration: 260392
loss: 0.9991530776023865,grad_norm: 0.9999991697474269, iteration: 260393
loss: 1.051655888557434,grad_norm: 0.9361174219866738, iteration: 260394
loss: 0.9990504384040833,grad_norm: 0.9999990493059847, iteration: 260395
loss: 0.9957099556922913,grad_norm: 0.8208576744865425, iteration: 260396
loss: 0.9608681797981262,grad_norm: 0.9172034590166395, iteration: 260397
loss: 1.0228126049041748,grad_norm: 0.9999990137971572, iteration: 260398
loss: 1.0174262523651123,grad_norm: 0.966282953419638, iteration: 260399
loss: 0.9431623220443726,grad_norm: 0.9144211670968788, iteration: 260400
loss: 0.9861039519309998,grad_norm: 0.8960267563879919, iteration: 260401
loss: 1.0045748949050903,grad_norm: 0.9494376104745814, iteration: 260402
loss: 1.001831293106079,grad_norm: 0.8496425097358256, iteration: 260403
loss: 0.975972056388855,grad_norm: 0.8834274664930376, iteration: 260404
loss: 1.030276894569397,grad_norm: 0.8786041571148067, iteration: 260405
loss: 0.9647098183631897,grad_norm: 0.9216402710726468, iteration: 260406
loss: 0.9827882647514343,grad_norm: 0.7722051896154112, iteration: 260407
loss: 0.9842215180397034,grad_norm: 0.8918914650267133, iteration: 260408
loss: 1.0154489278793335,grad_norm: 0.8358185019486394, iteration: 260409
loss: 1.0325528383255005,grad_norm: 0.9999993563305689, iteration: 260410
loss: 1.0006814002990723,grad_norm: 0.8955168234792972, iteration: 260411
loss: 0.9752374887466431,grad_norm: 0.9549401043248467, iteration: 260412
loss: 1.0164990425109863,grad_norm: 0.9999990658871414, iteration: 260413
loss: 1.0222985744476318,grad_norm: 0.8957959591183566, iteration: 260414
loss: 1.0090312957763672,grad_norm: 0.8369476129952834, iteration: 260415
loss: 0.9976879358291626,grad_norm: 0.9124249254831356, iteration: 260416
loss: 0.9918617010116577,grad_norm: 0.9545870274088677, iteration: 260417
loss: 1.0481072664260864,grad_norm: 0.863188714836113, iteration: 260418
loss: 0.9939372539520264,grad_norm: 0.8460359212534153, iteration: 260419
loss: 1.0161126852035522,grad_norm: 0.9284717256866843, iteration: 260420
loss: 1.0150578022003174,grad_norm: 0.8043904945257768, iteration: 260421
loss: 0.9853408932685852,grad_norm: 0.8244571089840109, iteration: 260422
loss: 0.9550817012786865,grad_norm: 0.98576744828619, iteration: 260423
loss: 0.9864994883537292,grad_norm: 0.8325837019425317, iteration: 260424
loss: 0.9877338409423828,grad_norm: 0.969069960887172, iteration: 260425
loss: 0.9962725043296814,grad_norm: 0.902551680538677, iteration: 260426
loss: 1.0179767608642578,grad_norm: 0.921009075167827, iteration: 260427
loss: 1.001837968826294,grad_norm: 0.7388237263283464, iteration: 260428
loss: 0.9640142321586609,grad_norm: 0.8456221372661119, iteration: 260429
loss: 0.9856923222541809,grad_norm: 0.8014929635082217, iteration: 260430
loss: 0.9770933389663696,grad_norm: 0.9999992661843974, iteration: 260431
loss: 0.9885557889938354,grad_norm: 0.9999991242293526, iteration: 260432
loss: 0.984386146068573,grad_norm: 0.9753820718362415, iteration: 260433
loss: 1.0156880617141724,grad_norm: 0.8578599365658839, iteration: 260434
loss: 0.9907082319259644,grad_norm: 0.9999991957947885, iteration: 260435
loss: 1.020350694656372,grad_norm: 0.8800427902847779, iteration: 260436
loss: 0.9982857704162598,grad_norm: 0.979995902363719, iteration: 260437
loss: 0.9683131575584412,grad_norm: 0.9671543769038453, iteration: 260438
loss: 0.9662099480628967,grad_norm: 0.8857248462537135, iteration: 260439
loss: 1.0345609188079834,grad_norm: 0.9999993898294959, iteration: 260440
loss: 0.9993951320648193,grad_norm: 0.9999991193125362, iteration: 260441
loss: 0.9975290298461914,grad_norm: 0.8013361301730196, iteration: 260442
loss: 0.9986274838447571,grad_norm: 0.9999991050377458, iteration: 260443
loss: 0.9723832607269287,grad_norm: 0.8909219203220119, iteration: 260444
loss: 0.9819327592849731,grad_norm: 0.7747218768666597, iteration: 260445
loss: 0.9790945053100586,grad_norm: 0.8206202143929863, iteration: 260446
loss: 1.0066606998443604,grad_norm: 0.9306409683389397, iteration: 260447
loss: 0.9837721586227417,grad_norm: 0.8951487832919507, iteration: 260448
loss: 0.9627454876899719,grad_norm: 0.9635242060085057, iteration: 260449
loss: 1.1357393264770508,grad_norm: 0.9999993735412217, iteration: 260450
loss: 0.9923939108848572,grad_norm: 0.8537987674064342, iteration: 260451
loss: 0.988274335861206,grad_norm: 0.93315958603549, iteration: 260452
loss: 1.019307017326355,grad_norm: 0.7306380324019652, iteration: 260453
loss: 1.0030139684677124,grad_norm: 0.8621645140566763, iteration: 260454
loss: 0.9822696447372437,grad_norm: 0.9310230846494303, iteration: 260455
loss: 1.0109350681304932,grad_norm: 0.8093871894901778, iteration: 260456
loss: 0.9599748253822327,grad_norm: 0.7212719592211679, iteration: 260457
loss: 0.9780318737030029,grad_norm: 0.8264352883769798, iteration: 260458
loss: 1.0117226839065552,grad_norm: 0.833696358098759, iteration: 260459
loss: 0.9974989295005798,grad_norm: 0.9999992502534002, iteration: 260460
loss: 1.0099167823791504,grad_norm: 0.7781931317287875, iteration: 260461
loss: 1.0833426713943481,grad_norm: 0.9999993496773459, iteration: 260462
loss: 0.9703119993209839,grad_norm: 0.9156231584806146, iteration: 260463
loss: 0.990375280380249,grad_norm: 0.9999991255782819, iteration: 260464
loss: 0.9818936586380005,grad_norm: 0.8963139984090205, iteration: 260465
loss: 1.0029959678649902,grad_norm: 0.6500549699398284, iteration: 260466
loss: 0.9629563689231873,grad_norm: 0.9999991463797888, iteration: 260467
loss: 1.006438136100769,grad_norm: 0.9229799172210922, iteration: 260468
loss: 1.019748330116272,grad_norm: 0.8699139613232395, iteration: 260469
loss: 0.9973239898681641,grad_norm: 0.9999992063886634, iteration: 260470
loss: 1.0126044750213623,grad_norm: 0.9710134624582869, iteration: 260471
loss: 0.9905562996864319,grad_norm: 0.9503916631918553, iteration: 260472
loss: 0.9813659191131592,grad_norm: 0.9193385745464857, iteration: 260473
loss: 1.019180178642273,grad_norm: 0.8013086611497253, iteration: 260474
loss: 1.0299988985061646,grad_norm: 0.9999991610838007, iteration: 260475
loss: 0.9838910102844238,grad_norm: 0.8989590060670348, iteration: 260476
loss: 0.9968010187149048,grad_norm: 0.9350862350726383, iteration: 260477
loss: 1.0336893796920776,grad_norm: 0.8687872370973868, iteration: 260478
loss: 0.9838723540306091,grad_norm: 0.8679586761694562, iteration: 260479
loss: 1.0072544813156128,grad_norm: 0.9818629096577424, iteration: 260480
loss: 1.012311339378357,grad_norm: 0.9043998334538292, iteration: 260481
loss: 0.9732441902160645,grad_norm: 0.8338620673655779, iteration: 260482
loss: 1.0078177452087402,grad_norm: 0.9999991769849035, iteration: 260483
loss: 1.0042263269424438,grad_norm: 0.9841116913851372, iteration: 260484
loss: 1.0278584957122803,grad_norm: 0.9999990958097578, iteration: 260485
loss: 0.9514158368110657,grad_norm: 0.8874941914608991, iteration: 260486
loss: 1.0242308378219604,grad_norm: 0.8364448281980627, iteration: 260487
loss: 0.9827791452407837,grad_norm: 0.8764365701302023, iteration: 260488
loss: 1.009647250175476,grad_norm: 0.8674935398934425, iteration: 260489
loss: 1.0649250745773315,grad_norm: 0.9999993065013563, iteration: 260490
loss: 0.9924732446670532,grad_norm: 0.99999918397605, iteration: 260491
loss: 0.9880536794662476,grad_norm: 0.8131127529118222, iteration: 260492
loss: 0.9786533117294312,grad_norm: 0.8490676321607649, iteration: 260493
loss: 0.9974663257598877,grad_norm: 0.7650812707260481, iteration: 260494
loss: 0.9907979369163513,grad_norm: 0.8221342894156326, iteration: 260495
loss: 1.0039888620376587,grad_norm: 0.8369080291256825, iteration: 260496
loss: 1.0130687952041626,grad_norm: 0.9671627337955448, iteration: 260497
loss: 0.9981083869934082,grad_norm: 0.8478976710873117, iteration: 260498
loss: 1.0371724367141724,grad_norm: 0.9126075920659409, iteration: 260499
loss: 1.0125811100006104,grad_norm: 0.7935519982114075, iteration: 260500
loss: 1.006671667098999,grad_norm: 0.8988414604139829, iteration: 260501
loss: 1.0077182054519653,grad_norm: 0.9885994600561103, iteration: 260502
loss: 1.0021696090698242,grad_norm: 0.7495287355158413, iteration: 260503
loss: 1.0238230228424072,grad_norm: 0.9318159425443866, iteration: 260504
loss: 1.0057402849197388,grad_norm: 0.6904310478048005, iteration: 260505
loss: 1.0169401168823242,grad_norm: 0.9472145599804663, iteration: 260506
loss: 1.0708080530166626,grad_norm: 0.9999996724395167, iteration: 260507
loss: 1.0051192045211792,grad_norm: 0.7717015885963804, iteration: 260508
loss: 1.047961711883545,grad_norm: 0.9999998366192419, iteration: 260509
loss: 0.9912256598472595,grad_norm: 0.831792539284176, iteration: 260510
loss: 0.9920538663864136,grad_norm: 0.9964358563628881, iteration: 260511
loss: 0.9979398846626282,grad_norm: 0.9789086794891523, iteration: 260512
loss: 1.0218851566314697,grad_norm: 0.7989816797141929, iteration: 260513
loss: 1.0047862529754639,grad_norm: 0.9946985748950177, iteration: 260514
loss: 0.9602329134941101,grad_norm: 0.8376151012828605, iteration: 260515
loss: 0.9908602237701416,grad_norm: 0.6940817777647941, iteration: 260516
loss: 0.9723518490791321,grad_norm: 0.8746989313532315, iteration: 260517
loss: 1.0253134965896606,grad_norm: 0.9999991419878012, iteration: 260518
loss: 0.9935069680213928,grad_norm: 0.9999991918896117, iteration: 260519
loss: 0.9920458793640137,grad_norm: 0.9177019319611354, iteration: 260520
loss: 0.9929642081260681,grad_norm: 0.9320034657596725, iteration: 260521
loss: 0.9760000705718994,grad_norm: 0.9028376518022148, iteration: 260522
loss: 1.0038840770721436,grad_norm: 0.9999991466873608, iteration: 260523
loss: 0.9965341687202454,grad_norm: 0.9842802519560966, iteration: 260524
loss: 0.9980694651603699,grad_norm: 0.8835651907751272, iteration: 260525
loss: 1.0034633874893188,grad_norm: 0.9182583057339669, iteration: 260526
loss: 0.9709287285804749,grad_norm: 0.8953913914129504, iteration: 260527
loss: 0.9827812910079956,grad_norm: 0.8803365781046504, iteration: 260528
loss: 1.0228215456008911,grad_norm: 0.7389205837749956, iteration: 260529
loss: 0.9832463264465332,grad_norm: 0.860189823780235, iteration: 260530
loss: 0.9924380779266357,grad_norm: 0.7844813845675204, iteration: 260531
loss: 0.9907576441764832,grad_norm: 0.8849323209003428, iteration: 260532
loss: 0.9635903239250183,grad_norm: 0.9188496147023274, iteration: 260533
loss: 1.0365455150604248,grad_norm: 0.8644041915380181, iteration: 260534
loss: 1.0197495222091675,grad_norm: 0.8349855329434049, iteration: 260535
loss: 0.9701460003852844,grad_norm: 0.9862165071780249, iteration: 260536
loss: 0.9984057545661926,grad_norm: 0.8949681956616753, iteration: 260537
loss: 0.9824132323265076,grad_norm: 0.9999991783558071, iteration: 260538
loss: 1.0244393348693848,grad_norm: 0.980991684270733, iteration: 260539
loss: 1.0388332605361938,grad_norm: 0.8599154022871517, iteration: 260540
loss: 0.9981133937835693,grad_norm: 0.7822530633443735, iteration: 260541
loss: 1.005839467048645,grad_norm: 0.8388930458200634, iteration: 260542
loss: 0.9899062514305115,grad_norm: 0.8972134062952002, iteration: 260543
loss: 0.9853331446647644,grad_norm: 0.9999993424258039, iteration: 260544
loss: 0.9464458227157593,grad_norm: 0.8919462121356938, iteration: 260545
loss: 1.0222601890563965,grad_norm: 0.8945416219262423, iteration: 260546
loss: 1.0078356266021729,grad_norm: 0.8458730908447117, iteration: 260547
loss: 1.020449161529541,grad_norm: 0.8899301418987062, iteration: 260548
loss: 0.9742723703384399,grad_norm: 0.8899138207214269, iteration: 260549
loss: 1.000383973121643,grad_norm: 0.9999991170242281, iteration: 260550
loss: 1.0189145803451538,grad_norm: 0.716554648453118, iteration: 260551
loss: 1.0033906698226929,grad_norm: 0.9085808547574639, iteration: 260552
loss: 0.991458535194397,grad_norm: 0.7267642785372875, iteration: 260553
loss: 0.9873350858688354,grad_norm: 0.890908679583508, iteration: 260554
loss: 0.9768484234809875,grad_norm: 0.7747296884913842, iteration: 260555
loss: 0.9447347521781921,grad_norm: 0.8529747241392899, iteration: 260556
loss: 0.9700886011123657,grad_norm: 0.9999990414926756, iteration: 260557
loss: 1.017966628074646,grad_norm: 0.9742369368063234, iteration: 260558
loss: 1.0061171054840088,grad_norm: 0.8429700169962457, iteration: 260559
loss: 0.9571847319602966,grad_norm: 0.9254633604585477, iteration: 260560
loss: 0.9956505298614502,grad_norm: 0.8734860466377122, iteration: 260561
loss: 0.9724680781364441,grad_norm: 0.899571122968421, iteration: 260562
loss: 1.0055248737335205,grad_norm: 0.9789841759905458, iteration: 260563
loss: 0.9972286224365234,grad_norm: 0.9999990619320497, iteration: 260564
loss: 1.002600908279419,grad_norm: 0.9956475069191211, iteration: 260565
loss: 1.0325028896331787,grad_norm: 0.9268412387424201, iteration: 260566
loss: 1.0292423963546753,grad_norm: 0.7895666189416758, iteration: 260567
loss: 1.0202674865722656,grad_norm: 0.9999990779214921, iteration: 260568
loss: 0.9769841432571411,grad_norm: 0.9769322598064579, iteration: 260569
loss: 1.0050290822982788,grad_norm: 0.7705968728686531, iteration: 260570
loss: 1.0362894535064697,grad_norm: 0.9999992974867807, iteration: 260571
loss: 1.0053648948669434,grad_norm: 0.90733872770003, iteration: 260572
loss: 1.0436463356018066,grad_norm: 0.9963242712614253, iteration: 260573
loss: 0.9962764382362366,grad_norm: 0.8775329833242314, iteration: 260574
loss: 1.0702506303787231,grad_norm: 0.9239640654003859, iteration: 260575
loss: 1.1604351997375488,grad_norm: 0.9999992832053437, iteration: 260576
loss: 1.0393058061599731,grad_norm: 0.944876835386514, iteration: 260577
loss: 1.012704849243164,grad_norm: 0.9999992842890925, iteration: 260578
loss: 0.999356210231781,grad_norm: 0.9999991539086092, iteration: 260579
loss: 0.9642279148101807,grad_norm: 0.9999990246486036, iteration: 260580
loss: 1.00197172164917,grad_norm: 0.9999990323442419, iteration: 260581
loss: 1.0078911781311035,grad_norm: 0.9999989688506381, iteration: 260582
loss: 1.0100748538970947,grad_norm: 0.7759677241786692, iteration: 260583
loss: 0.986373245716095,grad_norm: 0.8992229697324959, iteration: 260584
loss: 1.0034853219985962,grad_norm: 0.9435851817652992, iteration: 260585
loss: 0.9998266696929932,grad_norm: 0.8580664429563932, iteration: 260586
loss: 0.9657180309295654,grad_norm: 0.8262366755239263, iteration: 260587
loss: 0.9859200119972229,grad_norm: 0.8446287588996839, iteration: 260588
loss: 1.0095257759094238,grad_norm: 0.940362270922474, iteration: 260589
loss: 0.9864978790283203,grad_norm: 0.9999991919888365, iteration: 260590
loss: 0.9639044404029846,grad_norm: 0.8495208631327772, iteration: 260591
loss: 0.9981210231781006,grad_norm: 0.999999150697144, iteration: 260592
loss: 0.981886088848114,grad_norm: 0.7499238179838863, iteration: 260593
loss: 0.9995911717414856,grad_norm: 0.9999992648196526, iteration: 260594
loss: 0.9930603504180908,grad_norm: 0.8036723472753876, iteration: 260595
loss: 1.0243167877197266,grad_norm: 0.8555531130848968, iteration: 260596
loss: 1.005674958229065,grad_norm: 0.999999410660195, iteration: 260597
loss: 1.0422239303588867,grad_norm: 0.8187622712479152, iteration: 260598
loss: 1.0073529481887817,grad_norm: 0.8345604488379816, iteration: 260599
loss: 1.04121994972229,grad_norm: 0.9999991243437247, iteration: 260600
loss: 1.0059595108032227,grad_norm: 0.8968998642073416, iteration: 260601
loss: 1.0924410820007324,grad_norm: 0.9443370366807433, iteration: 260602
loss: 0.9802004098892212,grad_norm: 0.9260920566932427, iteration: 260603
loss: 1.0793161392211914,grad_norm: 0.9999994090670884, iteration: 260604
loss: 0.9870165586471558,grad_norm: 0.8743182350404987, iteration: 260605
loss: 1.004582405090332,grad_norm: 0.9999994382532635, iteration: 260606
loss: 1.0052058696746826,grad_norm: 0.7985545899374126, iteration: 260607
loss: 1.0099848508834839,grad_norm: 0.9999990825669391, iteration: 260608
loss: 1.011995553970337,grad_norm: 0.9196514710558162, iteration: 260609
loss: 1.1004441976547241,grad_norm: 0.9999992493945387, iteration: 260610
loss: 1.083717703819275,grad_norm: 0.9999990735589782, iteration: 260611
loss: 0.9698441028594971,grad_norm: 0.9903874783244849, iteration: 260612
loss: 1.0405296087265015,grad_norm: 0.8536376270259534, iteration: 260613
loss: 0.9690699577331543,grad_norm: 0.8497917878376469, iteration: 260614
loss: 0.9714232087135315,grad_norm: 0.9419386139412202, iteration: 260615
loss: 0.9768998026847839,grad_norm: 0.8293979764600025, iteration: 260616
loss: 0.9703712463378906,grad_norm: 0.9012414080557668, iteration: 260617
loss: 1.0180273056030273,grad_norm: 0.9999989405108736, iteration: 260618
loss: 0.981562077999115,grad_norm: 0.7866575298811704, iteration: 260619
loss: 0.9903387427330017,grad_norm: 0.9999990154749216, iteration: 260620
loss: 1.0004075765609741,grad_norm: 0.9496254938147848, iteration: 260621
loss: 1.0338587760925293,grad_norm: 0.9284658533991249, iteration: 260622
loss: 1.0018209218978882,grad_norm: 0.8597466986360256, iteration: 260623
loss: 0.9580734372138977,grad_norm: 0.9156230739536464, iteration: 260624
loss: 0.9928661584854126,grad_norm: 0.755365490559678, iteration: 260625
loss: 0.9719339609146118,grad_norm: 0.9557454631996719, iteration: 260626
loss: 1.003275990486145,grad_norm: 0.826416841604251, iteration: 260627
loss: 1.0017060041427612,grad_norm: 0.9999991540632126, iteration: 260628
loss: 0.9643939733505249,grad_norm: 0.9114575905907979, iteration: 260629
loss: 1.0139293670654297,grad_norm: 0.7720335892832143, iteration: 260630
loss: 1.0019352436065674,grad_norm: 0.8530705081039576, iteration: 260631
loss: 1.0176504850387573,grad_norm: 0.7648590048064504, iteration: 260632
loss: 1.0061314105987549,grad_norm: 0.7488984063605758, iteration: 260633
loss: 1.018371820449829,grad_norm: 0.9720719806749122, iteration: 260634
loss: 1.0195592641830444,grad_norm: 0.9999991245262757, iteration: 260635
loss: 1.0013047456741333,grad_norm: 0.9973465593940741, iteration: 260636
loss: 0.9999437928199768,grad_norm: 0.9596125063508512, iteration: 260637
loss: 1.0114893913269043,grad_norm: 0.9126014744934319, iteration: 260638
loss: 1.0191940069198608,grad_norm: 0.7962218729686813, iteration: 260639
loss: 0.9915598630905151,grad_norm: 0.7226217394967461, iteration: 260640
loss: 0.9806888699531555,grad_norm: 0.893758576308566, iteration: 260641
loss: 1.0178049802780151,grad_norm: 0.859901721936452, iteration: 260642
loss: 1.0233913660049438,grad_norm: 0.9081043334452978, iteration: 260643
loss: 0.9892253875732422,grad_norm: 0.8434896427353662, iteration: 260644
loss: 0.960943341255188,grad_norm: 0.9336533001848926, iteration: 260645
loss: 1.1155128479003906,grad_norm: 0.999999883587357, iteration: 260646
loss: 1.0275975465774536,grad_norm: 0.9117414760962195, iteration: 260647
loss: 1.0168564319610596,grad_norm: 0.9999990838682836, iteration: 260648
loss: 0.979124128818512,grad_norm: 0.9030883865555279, iteration: 260649
loss: 1.0055632591247559,grad_norm: 0.803873235106242, iteration: 260650
loss: 0.9972912669181824,grad_norm: 0.8539534268025721, iteration: 260651
loss: 0.9999641180038452,grad_norm: 0.8853963621986017, iteration: 260652
loss: 1.0064109563827515,grad_norm: 0.8081805718750558, iteration: 260653
loss: 1.0546075105667114,grad_norm: 0.9999996142245753, iteration: 260654
loss: 1.1120580434799194,grad_norm: 0.999999504652115, iteration: 260655
loss: 0.9792026281356812,grad_norm: 0.871321615440705, iteration: 260656
loss: 0.9846558570861816,grad_norm: 0.9999991548299981, iteration: 260657
loss: 1.0713380575180054,grad_norm: 0.9999998782043285, iteration: 260658
loss: 1.023247480392456,grad_norm: 0.8525597707901207, iteration: 260659
loss: 0.9655284285545349,grad_norm: 0.7881296559975648, iteration: 260660
loss: 0.9668924808502197,grad_norm: 0.8897992363817071, iteration: 260661
loss: 1.0723707675933838,grad_norm: 0.7876377127328688, iteration: 260662
loss: 0.9632726907730103,grad_norm: 0.843177959511032, iteration: 260663
loss: 1.0107170343399048,grad_norm: 0.8518715314096544, iteration: 260664
loss: 1.0014677047729492,grad_norm: 0.8948033085780622, iteration: 260665
loss: 1.0080386400222778,grad_norm: 0.8508778842945475, iteration: 260666
loss: 1.0818605422973633,grad_norm: 0.8315061516094978, iteration: 260667
loss: 0.9671517014503479,grad_norm: 0.8627984617951875, iteration: 260668
loss: 1.1126723289489746,grad_norm: 0.9999996197450607, iteration: 260669
loss: 1.0186984539031982,grad_norm: 0.8879759759837139, iteration: 260670
loss: 1.0320544242858887,grad_norm: 0.9999989761517656, iteration: 260671
loss: 1.0149273872375488,grad_norm: 0.909188890763679, iteration: 260672
loss: 1.0013940334320068,grad_norm: 0.9927403215849171, iteration: 260673
loss: 0.9962916970252991,grad_norm: 0.8422998593794737, iteration: 260674
loss: 0.9975535273551941,grad_norm: 0.9989184487074428, iteration: 260675
loss: 1.0232127904891968,grad_norm: 0.9999992234260547, iteration: 260676
loss: 1.1672272682189941,grad_norm: 0.9999996161792076, iteration: 260677
loss: 0.9949498176574707,grad_norm: 0.7471203775111379, iteration: 260678
loss: 1.066332221031189,grad_norm: 0.9999999199999334, iteration: 260679
loss: 0.9615231156349182,grad_norm: 0.8139223927702759, iteration: 260680
loss: 0.9549641013145447,grad_norm: 0.8651908611477973, iteration: 260681
loss: 1.0299842357635498,grad_norm: 0.9650283979300551, iteration: 260682
loss: 0.9926518797874451,grad_norm: 0.9336752332497146, iteration: 260683
loss: 1.0241172313690186,grad_norm: 0.9172821059185026, iteration: 260684
loss: 1.0043413639068604,grad_norm: 0.9631481641168342, iteration: 260685
loss: 1.0622385740280151,grad_norm: 0.9999998361160626, iteration: 260686
loss: 1.0005992650985718,grad_norm: 0.961876954024939, iteration: 260687
loss: 1.0044524669647217,grad_norm: 0.8787184504465684, iteration: 260688
loss: 1.0327152013778687,grad_norm: 0.9283601897420187, iteration: 260689
loss: 1.0052138566970825,grad_norm: 0.8286684915337077, iteration: 260690
loss: 1.0205655097961426,grad_norm: 0.9488971128766215, iteration: 260691
loss: 1.0113518238067627,grad_norm: 0.9999992417388996, iteration: 260692
loss: 1.007384181022644,grad_norm: 0.9999991610220696, iteration: 260693
loss: 1.0135610103607178,grad_norm: 0.8338649629986504, iteration: 260694
loss: 1.054819941520691,grad_norm: 0.9999991441244674, iteration: 260695
loss: 0.9826756715774536,grad_norm: 0.9819448650593677, iteration: 260696
loss: 0.9835943579673767,grad_norm: 0.937508618752655, iteration: 260697
loss: 0.991554856300354,grad_norm: 0.9226321668376224, iteration: 260698
loss: 0.9582772850990295,grad_norm: 0.9999989850099675, iteration: 260699
loss: 0.9789816737174988,grad_norm: 0.9692720723966232, iteration: 260700
loss: 0.9887908697128296,grad_norm: 0.9999989836474552, iteration: 260701
loss: 0.9987215995788574,grad_norm: 0.9999991097597175, iteration: 260702
loss: 1.0036404132843018,grad_norm: 0.8956555787576225, iteration: 260703
loss: 0.9845467805862427,grad_norm: 0.9999991616992389, iteration: 260704
loss: 0.9706060886383057,grad_norm: 0.8782198635085017, iteration: 260705
loss: 1.0226225852966309,grad_norm: 1.0000000594810745, iteration: 260706
loss: 1.003082513809204,grad_norm: 0.8987846986714073, iteration: 260707
loss: 1.0417131185531616,grad_norm: 0.9999991497499086, iteration: 260708
loss: 0.9787631034851074,grad_norm: 0.958014361338358, iteration: 260709
loss: 0.9859132766723633,grad_norm: 0.7458043928175364, iteration: 260710
loss: 1.0311152935028076,grad_norm: 0.9176225723101364, iteration: 260711
loss: 1.0277314186096191,grad_norm: 0.9999992187575759, iteration: 260712
loss: 1.0099849700927734,grad_norm: 0.8532096474568773, iteration: 260713
loss: 0.9739755988121033,grad_norm: 0.8721173730284685, iteration: 260714
loss: 0.9710930585861206,grad_norm: 0.9276908401535308, iteration: 260715
loss: 0.9715854525566101,grad_norm: 0.8978611494369665, iteration: 260716
loss: 1.0380016565322876,grad_norm: 0.8684220083684757, iteration: 260717
loss: 1.0048012733459473,grad_norm: 0.9999990458984389, iteration: 260718
loss: 0.9701719284057617,grad_norm: 0.8292093913676517, iteration: 260719
loss: 1.0210801362991333,grad_norm: 0.6912434165033519, iteration: 260720
loss: 0.9934173822402954,grad_norm: 0.9072007319135955, iteration: 260721
loss: 1.0018833875656128,grad_norm: 0.9999989704219324, iteration: 260722
loss: 0.9908859729766846,grad_norm: 0.8177981597287429, iteration: 260723
loss: 0.9873824715614319,grad_norm: 0.9621794179585113, iteration: 260724
loss: 1.0014773607254028,grad_norm: 0.908575963894595, iteration: 260725
loss: 1.0377588272094727,grad_norm: 0.7814885609853871, iteration: 260726
loss: 0.9628798365592957,grad_norm: 0.9165420922917932, iteration: 260727
loss: 0.9894590377807617,grad_norm: 0.8689134200115184, iteration: 260728
loss: 1.0188757181167603,grad_norm: 0.9999991594803839, iteration: 260729
loss: 0.990900456905365,grad_norm: 0.9999991090493198, iteration: 260730
loss: 0.9953874945640564,grad_norm: 0.8792087272586205, iteration: 260731
loss: 0.9930728673934937,grad_norm: 0.9016619552448798, iteration: 260732
loss: 1.008705496788025,grad_norm: 0.8553820662633163, iteration: 260733
loss: 1.0239545106887817,grad_norm: 0.8816738393848069, iteration: 260734
loss: 1.033501386642456,grad_norm: 0.999998978729203, iteration: 260735
loss: 1.0224063396453857,grad_norm: 0.9243207443828406, iteration: 260736
loss: 0.9659250974655151,grad_norm: 0.8808246848074254, iteration: 260737
loss: 0.9734424352645874,grad_norm: 0.8722264354997261, iteration: 260738
loss: 1.0144832134246826,grad_norm: 0.980580105007047, iteration: 260739
loss: 1.0062661170959473,grad_norm: 0.999999134217096, iteration: 260740
loss: 1.0137985944747925,grad_norm: 0.9103548688648001, iteration: 260741
loss: 0.9687701463699341,grad_norm: 0.9999991283362478, iteration: 260742
loss: 1.0081583261489868,grad_norm: 0.9462810400037379, iteration: 260743
loss: 1.0069981813430786,grad_norm: 0.9999990092561567, iteration: 260744
loss: 1.0071607828140259,grad_norm: 0.9276392047362431, iteration: 260745
loss: 1.0065574645996094,grad_norm: 0.999999086866085, iteration: 260746
loss: 0.9918560981750488,grad_norm: 0.8228977864232456, iteration: 260747
loss: 0.9778923988342285,grad_norm: 0.8585370652892507, iteration: 260748
loss: 1.0042152404785156,grad_norm: 0.8997297749755895, iteration: 260749
loss: 1.0017009973526,grad_norm: 0.7920565094686386, iteration: 260750
loss: 0.9873793125152588,grad_norm: 0.7202200000952984, iteration: 260751
loss: 1.0220891237258911,grad_norm: 0.9999994007331742, iteration: 260752
loss: 1.0261787176132202,grad_norm: 0.9999996111601623, iteration: 260753
loss: 0.9965232610702515,grad_norm: 0.932520498629902, iteration: 260754
loss: 1.0208498239517212,grad_norm: 0.8869173353465075, iteration: 260755
loss: 1.0154935121536255,grad_norm: 0.8074472353971386, iteration: 260756
loss: 1.018445611000061,grad_norm: 0.707993001541608, iteration: 260757
loss: 0.9980553984642029,grad_norm: 0.965739347012425, iteration: 260758
loss: 0.9984139204025269,grad_norm: 0.9388916179424643, iteration: 260759
loss: 0.9971470832824707,grad_norm: 0.7393965447099523, iteration: 260760
loss: 1.032027006149292,grad_norm: 0.9977082194520226, iteration: 260761
loss: 1.0311505794525146,grad_norm: 0.938622125782223, iteration: 260762
loss: 1.0026527643203735,grad_norm: 0.9999991344204932, iteration: 260763
loss: 0.9948415756225586,grad_norm: 0.8531233396440573, iteration: 260764
loss: 0.9941800832748413,grad_norm: 0.7828507546341693, iteration: 260765
loss: 1.0051047801971436,grad_norm: 0.8906123289666829, iteration: 260766
loss: 0.9853711724281311,grad_norm: 0.9999992063137788, iteration: 260767
loss: 0.9860993027687073,grad_norm: 0.9463203058154852, iteration: 260768
loss: 1.0098881721496582,grad_norm: 0.9665746586468945, iteration: 260769
loss: 1.0065757036209106,grad_norm: 0.9281313331768032, iteration: 260770
loss: 0.9857006072998047,grad_norm: 0.8169839616985674, iteration: 260771
loss: 0.9912241101264954,grad_norm: 0.8513946075711277, iteration: 260772
loss: 0.9989510178565979,grad_norm: 0.8596607400498157, iteration: 260773
loss: 1.004075288772583,grad_norm: 0.8376344467440229, iteration: 260774
loss: 1.0030158758163452,grad_norm: 0.8202737557227144, iteration: 260775
loss: 0.979162871837616,grad_norm: 0.9999989993410987, iteration: 260776
loss: 1.007105827331543,grad_norm: 0.7942706410237266, iteration: 260777
loss: 1.0692135095596313,grad_norm: 0.9524491174870001, iteration: 260778
loss: 0.9722532033920288,grad_norm: 0.7781828082662577, iteration: 260779
loss: 0.9918980002403259,grad_norm: 0.9999992536565906, iteration: 260780
loss: 0.9887183904647827,grad_norm: 0.9999991400584073, iteration: 260781
loss: 0.9778628349304199,grad_norm: 0.8785414547931432, iteration: 260782
loss: 1.0058751106262207,grad_norm: 0.9497481217431175, iteration: 260783
loss: 1.0025427341461182,grad_norm: 0.8961808082311267, iteration: 260784
loss: 1.0076395273208618,grad_norm: 0.8779692841692346, iteration: 260785
loss: 0.9646021723747253,grad_norm: 0.7353062560688359, iteration: 260786
loss: 1.001530408859253,grad_norm: 0.7281023872107218, iteration: 260787
loss: 1.034837007522583,grad_norm: 0.9999996316929106, iteration: 260788
loss: 0.9836633205413818,grad_norm: 0.8046273103219577, iteration: 260789
loss: 0.9673636555671692,grad_norm: 0.9278408548496982, iteration: 260790
loss: 1.0228362083435059,grad_norm: 0.9045503790328024, iteration: 260791
loss: 1.0025385618209839,grad_norm: 0.9261597174848619, iteration: 260792
loss: 0.9833217859268188,grad_norm: 0.8578418257916058, iteration: 260793
loss: 1.017773985862732,grad_norm: 0.9942497278609355, iteration: 260794
loss: 0.9959166646003723,grad_norm: 0.8591321378875165, iteration: 260795
loss: 1.0294020175933838,grad_norm: 0.9352658904547776, iteration: 260796
loss: 1.0443240404129028,grad_norm: 0.9300429120046819, iteration: 260797
loss: 0.9728993773460388,grad_norm: 0.8354368867737115, iteration: 260798
loss: 0.9963157176971436,grad_norm: 0.9714383947680931, iteration: 260799
loss: 1.0317437648773193,grad_norm: 0.7309955099598604, iteration: 260800
loss: 1.004029393196106,grad_norm: 0.9391509694487334, iteration: 260801
loss: 1.0211232900619507,grad_norm: 0.8624281619945477, iteration: 260802
loss: 1.043434500694275,grad_norm: 0.9999992823000429, iteration: 260803
loss: 1.021188735961914,grad_norm: 0.8882605842598251, iteration: 260804
loss: 1.0266236066818237,grad_norm: 0.8470506631613359, iteration: 260805
loss: 1.017562985420227,grad_norm: 0.8524629483658045, iteration: 260806
loss: 1.0027705430984497,grad_norm: 0.8324328668376066, iteration: 260807
loss: 0.9964845776557922,grad_norm: 0.9566058729537312, iteration: 260808
loss: 0.9930548667907715,grad_norm: 0.8569537957586152, iteration: 260809
loss: 1.030009388923645,grad_norm: 0.9999990701203783, iteration: 260810
loss: 1.0033904314041138,grad_norm: 0.7449385142661772, iteration: 260811
loss: 1.0198699235916138,grad_norm: 0.8353444765429935, iteration: 260812
loss: 1.024247407913208,grad_norm: 0.912956048831631, iteration: 260813
loss: 0.995053231716156,grad_norm: 0.999999213140964, iteration: 260814
loss: 0.9885621666908264,grad_norm: 0.7855630460692069, iteration: 260815
loss: 0.9907861948013306,grad_norm: 0.9999991748822603, iteration: 260816
loss: 1.0465797185897827,grad_norm: 0.9213949110428473, iteration: 260817
loss: 1.0051765441894531,grad_norm: 0.999999162446577, iteration: 260818
loss: 0.9982345700263977,grad_norm: 0.8911746360159276, iteration: 260819
loss: 0.9851164817810059,grad_norm: 0.8228727621900357, iteration: 260820
loss: 1.0183860063552856,grad_norm: 0.9999997279519504, iteration: 260821
loss: 1.0045666694641113,grad_norm: 0.9025968552780262, iteration: 260822
loss: 0.9809204339981079,grad_norm: 0.9476811175807838, iteration: 260823
loss: 1.011107087135315,grad_norm: 0.9600392977006099, iteration: 260824
loss: 0.9974308609962463,grad_norm: 0.8347250526787696, iteration: 260825
loss: 0.9796353578567505,grad_norm: 0.8800087700692438, iteration: 260826
loss: 0.970924437046051,grad_norm: 0.8833042866641945, iteration: 260827
loss: 0.9946002960205078,grad_norm: 0.9999990010274241, iteration: 260828
loss: 1.0016344785690308,grad_norm: 0.8651912346371126, iteration: 260829
loss: 1.0004324913024902,grad_norm: 0.77026354777328, iteration: 260830
loss: 0.9893003106117249,grad_norm: 0.8390922614000024, iteration: 260831
loss: 0.9801327586174011,grad_norm: 0.9999992058570341, iteration: 260832
loss: 0.9904384613037109,grad_norm: 0.9480409170858253, iteration: 260833
loss: 1.0346015691757202,grad_norm: 0.9999995504875461, iteration: 260834
loss: 1.0209184885025024,grad_norm: 0.8007229021435236, iteration: 260835
loss: 0.9907861351966858,grad_norm: 0.9999990777783461, iteration: 260836
loss: 1.0156428813934326,grad_norm: 0.7485409857026075, iteration: 260837
loss: 1.024483561515808,grad_norm: 0.7098574402926208, iteration: 260838
loss: 1.0123167037963867,grad_norm: 0.9999991502300848, iteration: 260839
loss: 1.0131381750106812,grad_norm: 0.8588865518207323, iteration: 260840
loss: 0.9967364072799683,grad_norm: 0.8716066200276364, iteration: 260841
loss: 0.9996910095214844,grad_norm: 0.9528762254100316, iteration: 260842
loss: 1.0028362274169922,grad_norm: 0.9999999576430966, iteration: 260843
loss: 0.998016357421875,grad_norm: 0.8632032613540525, iteration: 260844
loss: 1.0090655088424683,grad_norm: 0.8943778928292325, iteration: 260845
loss: 1.0351524353027344,grad_norm: 0.9999990681126878, iteration: 260846
loss: 1.00578773021698,grad_norm: 0.9999989661520994, iteration: 260847
loss: 1.0040990114212036,grad_norm: 0.7260840540452085, iteration: 260848
loss: 1.0047187805175781,grad_norm: 0.9999991630273034, iteration: 260849
loss: 1.0113004446029663,grad_norm: 0.9628314811244193, iteration: 260850
loss: 1.008825659751892,grad_norm: 0.9371745669798522, iteration: 260851
loss: 1.0060349702835083,grad_norm: 0.9999989915970394, iteration: 260852
loss: 1.0194729566574097,grad_norm: 0.9686529916784997, iteration: 260853
loss: 0.9723397493362427,grad_norm: 0.9658576061712463, iteration: 260854
loss: 1.0034806728363037,grad_norm: 0.9999989522031012, iteration: 260855
loss: 0.9777631759643555,grad_norm: 0.9999990545665565, iteration: 260856
loss: 1.0187796354293823,grad_norm: 0.9508168225250284, iteration: 260857
loss: 0.9747897982597351,grad_norm: 0.8727562588305356, iteration: 260858
loss: 1.0181007385253906,grad_norm: 0.9529368585738851, iteration: 260859
loss: 1.0358760356903076,grad_norm: 0.9835547054009798, iteration: 260860
loss: 1.0154101848602295,grad_norm: 0.9999991566757523, iteration: 260861
loss: 1.014526128768921,grad_norm: 0.7297300578338237, iteration: 260862
loss: 0.991808295249939,grad_norm: 0.7955402265370475, iteration: 260863
loss: 0.986349880695343,grad_norm: 0.8556795284650376, iteration: 260864
loss: 1.005444884300232,grad_norm: 0.9999990054951573, iteration: 260865
loss: 0.9842884540557861,grad_norm: 0.921840317039421, iteration: 260866
loss: 1.0047314167022705,grad_norm: 0.8406677561705143, iteration: 260867
loss: 1.0927066802978516,grad_norm: 0.9999997460474834, iteration: 260868
loss: 1.002970576286316,grad_norm: 0.9693274983867035, iteration: 260869
loss: 0.9885364770889282,grad_norm: 0.9121574022094928, iteration: 260870
loss: 0.9417892098426819,grad_norm: 0.8665223261944239, iteration: 260871
loss: 0.979957103729248,grad_norm: 0.9409426071254435, iteration: 260872
loss: 0.9930276274681091,grad_norm: 0.8561474316895606, iteration: 260873
loss: 0.9945785999298096,grad_norm: 0.7371268807233899, iteration: 260874
loss: 1.0016303062438965,grad_norm: 0.9999991457453719, iteration: 260875
loss: 0.9391212463378906,grad_norm: 0.9193251357056269, iteration: 260876
loss: 0.9906700849533081,grad_norm: 0.9999990889132547, iteration: 260877
loss: 1.0095750093460083,grad_norm: 0.9593769279377801, iteration: 260878
loss: 1.005832314491272,grad_norm: 0.9999989584044797, iteration: 260879
loss: 1.024341344833374,grad_norm: 0.8736228645109394, iteration: 260880
loss: 0.9756990075111389,grad_norm: 0.8789923865659707, iteration: 260881
loss: 1.0145423412322998,grad_norm: 0.8741431188706376, iteration: 260882
loss: 0.9858891367912292,grad_norm: 0.8532483855359125, iteration: 260883
loss: 1.0104700326919556,grad_norm: 0.7969316124103246, iteration: 260884
loss: 1.0006356239318848,grad_norm: 0.7383795412420161, iteration: 260885
loss: 1.0149059295654297,grad_norm: 0.9999994810531262, iteration: 260886
loss: 1.0077197551727295,grad_norm: 0.881097450580812, iteration: 260887
loss: 0.9760516881942749,grad_norm: 0.9596695923184775, iteration: 260888
loss: 0.9874372482299805,grad_norm: 0.8337269075917841, iteration: 260889
loss: 1.0059221982955933,grad_norm: 0.9450759920807053, iteration: 260890
loss: 1.059931755065918,grad_norm: 0.9126718007996225, iteration: 260891
loss: 0.9789942502975464,grad_norm: 0.9758930080220138, iteration: 260892
loss: 1.021063208580017,grad_norm: 0.8148430418246282, iteration: 260893
loss: 1.0068809986114502,grad_norm: 0.9999991902757378, iteration: 260894
loss: 0.9752447605133057,grad_norm: 0.8144039561076124, iteration: 260895
loss: 0.9784976243972778,grad_norm: 0.9508281631249018, iteration: 260896
loss: 0.9780706763267517,grad_norm: 0.9999991402733325, iteration: 260897
loss: 0.9855546951293945,grad_norm: 0.8106420886257804, iteration: 260898
loss: 1.0072572231292725,grad_norm: 0.9789548656892255, iteration: 260899
loss: 0.9947598576545715,grad_norm: 0.9796394878879452, iteration: 260900
loss: 1.0320788621902466,grad_norm: 0.7548014015084398, iteration: 260901
loss: 0.9658467173576355,grad_norm: 0.8542435257110607, iteration: 260902
loss: 1.0261034965515137,grad_norm: 0.9999994263128021, iteration: 260903
loss: 0.9866950511932373,grad_norm: 0.7746304060982627, iteration: 260904
loss: 0.9698246121406555,grad_norm: 0.9954782448523604, iteration: 260905
loss: 0.9577564001083374,grad_norm: 0.9604161886710514, iteration: 260906
loss: 0.9910807609558105,grad_norm: 0.8114713125934234, iteration: 260907
loss: 0.9926654696464539,grad_norm: 0.9555118758343255, iteration: 260908
loss: 1.0131696462631226,grad_norm: 0.9944532754166032, iteration: 260909
loss: 1.0002110004425049,grad_norm: 0.8082937267371032, iteration: 260910
loss: 1.0078374147415161,grad_norm: 0.9999990098689847, iteration: 260911
loss: 0.9877260327339172,grad_norm: 0.9594503020673091, iteration: 260912
loss: 0.9927446246147156,grad_norm: 0.9444126870890168, iteration: 260913
loss: 0.9669629335403442,grad_norm: 0.9999990202740843, iteration: 260914
loss: 0.9878281950950623,grad_norm: 0.9890018829341637, iteration: 260915
loss: 1.022396445274353,grad_norm: 0.7862201607870123, iteration: 260916
loss: 0.9817283153533936,grad_norm: 0.8693135422575132, iteration: 260917
loss: 1.0168834924697876,grad_norm: 0.9141250649109337, iteration: 260918
loss: 1.0159443616867065,grad_norm: 0.8174991985457212, iteration: 260919
loss: 1.0135494470596313,grad_norm: 0.941258945572647, iteration: 260920
loss: 1.015098214149475,grad_norm: 0.83147284596865, iteration: 260921
loss: 0.9730619788169861,grad_norm: 0.9423032901192985, iteration: 260922
loss: 1.0094473361968994,grad_norm: 0.9161836007133156, iteration: 260923
loss: 0.987074077129364,grad_norm: 0.8821699778866328, iteration: 260924
loss: 0.9629684090614319,grad_norm: 0.9999990100240947, iteration: 260925
loss: 0.982996940612793,grad_norm: 0.9661632714159384, iteration: 260926
loss: 1.028207540512085,grad_norm: 0.9999991120452449, iteration: 260927
loss: 0.995002806186676,grad_norm: 0.9997124361575483, iteration: 260928
loss: 0.9958527684211731,grad_norm: 0.9407371166771038, iteration: 260929
loss: 0.9945856928825378,grad_norm: 0.9311078354651751, iteration: 260930
loss: 1.0384563207626343,grad_norm: 0.9802171532958277, iteration: 260931
loss: 1.0062849521636963,grad_norm: 0.9999991734274271, iteration: 260932
loss: 1.0062888860702515,grad_norm: 0.8484115699472403, iteration: 260933
loss: 1.023630976676941,grad_norm: 0.9395692643107529, iteration: 260934
loss: 0.9631720185279846,grad_norm: 0.884583087733544, iteration: 260935
loss: 0.982122004032135,grad_norm: 0.9999990332479678, iteration: 260936
loss: 1.0149060487747192,grad_norm: 0.8401293948375248, iteration: 260937
loss: 0.9916318655014038,grad_norm: 0.7310687008222547, iteration: 260938
loss: 1.003466248512268,grad_norm: 0.8739201972554891, iteration: 260939
loss: 0.9936733841896057,grad_norm: 0.9562863660975646, iteration: 260940
loss: 1.052107334136963,grad_norm: 0.9999990419700479, iteration: 260941
loss: 1.0108873844146729,grad_norm: 0.9222916717479701, iteration: 260942
loss: 1.0739474296569824,grad_norm: 0.8985162115147091, iteration: 260943
loss: 0.9730949401855469,grad_norm: 0.7853791591882937, iteration: 260944
loss: 0.9987592697143555,grad_norm: 0.9571678850775357, iteration: 260945
loss: 1.022778868675232,grad_norm: 0.9999994763492406, iteration: 260946
loss: 0.9859053492546082,grad_norm: 0.8274660980827028, iteration: 260947
loss: 0.9983944296836853,grad_norm: 0.9999994982756658, iteration: 260948
loss: 0.9811249375343323,grad_norm: 0.8708906169446359, iteration: 260949
loss: 0.990800142288208,grad_norm: 0.8661589566014297, iteration: 260950
loss: 0.9789047837257385,grad_norm: 0.9999991082729989, iteration: 260951
loss: 0.9891123175621033,grad_norm: 0.9801208917981783, iteration: 260952
loss: 0.957933783531189,grad_norm: 0.977437569562155, iteration: 260953
loss: 0.9976800680160522,grad_norm: 0.999999114990993, iteration: 260954
loss: 1.0077197551727295,grad_norm: 0.999999696788146, iteration: 260955
loss: 1.0093969106674194,grad_norm: 0.9999989442339364, iteration: 260956
loss: 0.9339135885238647,grad_norm: 0.9385222155073303, iteration: 260957
loss: 1.0175552368164062,grad_norm: 0.9999991023070345, iteration: 260958
loss: 0.9751555919647217,grad_norm: 0.7626978198037208, iteration: 260959
loss: 1.011939287185669,grad_norm: 0.8202206018009536, iteration: 260960
loss: 1.013648509979248,grad_norm: 0.9999997866395933, iteration: 260961
loss: 1.0398144721984863,grad_norm: 0.9999992163082607, iteration: 260962
loss: 0.9829016327857971,grad_norm: 0.7678053617533341, iteration: 260963
loss: 0.9963986277580261,grad_norm: 0.9999990570960978, iteration: 260964
loss: 0.9984512329101562,grad_norm: 0.908677593243153, iteration: 260965
loss: 1.0141887664794922,grad_norm: 0.8708603938916202, iteration: 260966
loss: 1.0065609216690063,grad_norm: 0.9305264666710987, iteration: 260967
loss: 0.9974265098571777,grad_norm: 0.7181187059075151, iteration: 260968
loss: 0.9761332273483276,grad_norm: 0.9999991514678228, iteration: 260969
loss: 1.0062885284423828,grad_norm: 0.9813881423655116, iteration: 260970
loss: 0.9969344735145569,grad_norm: 0.9999991764846763, iteration: 260971
loss: 0.9849532842636108,grad_norm: 0.8235397663071691, iteration: 260972
loss: 0.9654063582420349,grad_norm: 0.8351593388432517, iteration: 260973
loss: 1.0747743844985962,grad_norm: 0.9999991621299733, iteration: 260974
loss: 1.0098544359207153,grad_norm: 0.9999992265075244, iteration: 260975
loss: 0.9899574518203735,grad_norm: 0.9670831307161367, iteration: 260976
loss: 0.9716631174087524,grad_norm: 0.999999816407815, iteration: 260977
loss: 0.975400447845459,grad_norm: 0.8132697832688094, iteration: 260978
loss: 0.9773958325386047,grad_norm: 0.9236905699945027, iteration: 260979
loss: 1.0129165649414062,grad_norm: 0.9799164365135685, iteration: 260980
loss: 0.9930335283279419,grad_norm: 0.9195410806546689, iteration: 260981
loss: 1.0344446897506714,grad_norm: 0.6251044664104186, iteration: 260982
loss: 0.9966957569122314,grad_norm: 0.8549863796952797, iteration: 260983
loss: 1.065649390220642,grad_norm: 0.9790044995242911, iteration: 260984
loss: 1.024517297744751,grad_norm: 0.8265977885920217, iteration: 260985
loss: 1.0038728713989258,grad_norm: 0.9999991038806457, iteration: 260986
loss: 0.9762083888053894,grad_norm: 0.768417734455725, iteration: 260987
loss: 1.0133737325668335,grad_norm: 0.9733909610790824, iteration: 260988
loss: 0.9789825677871704,grad_norm: 0.7927136181891314, iteration: 260989
loss: 1.0840110778808594,grad_norm: 0.9999991422987798, iteration: 260990
loss: 0.9589845538139343,grad_norm: 0.8914674777636299, iteration: 260991
loss: 0.9780429601669312,grad_norm: 0.9999992794233233, iteration: 260992
loss: 1.029349684715271,grad_norm: 0.9999990401655688, iteration: 260993
loss: 1.0134519338607788,grad_norm: 0.9999993440663139, iteration: 260994
loss: 1.000942587852478,grad_norm: 0.9999990840235856, iteration: 260995
loss: 1.0271366834640503,grad_norm: 0.9285206433678611, iteration: 260996
loss: 1.0315049886703491,grad_norm: 0.9357211608486867, iteration: 260997
loss: 1.0013096332550049,grad_norm: 0.8895350529330065, iteration: 260998
loss: 1.0212880373001099,grad_norm: 0.7699608888399484, iteration: 260999
loss: 1.0059809684753418,grad_norm: 0.7400075754298797, iteration: 261000
loss: 1.0244855880737305,grad_norm: 0.9999992572608656, iteration: 261001
loss: 0.9939342141151428,grad_norm: 0.8830829066772837, iteration: 261002
loss: 1.081386923789978,grad_norm: 0.9999991456729598, iteration: 261003
loss: 0.9923877716064453,grad_norm: 0.953586908255972, iteration: 261004
loss: 0.9969260692596436,grad_norm: 0.9999990569702858, iteration: 261005
loss: 0.9968019127845764,grad_norm: 0.9563444152159605, iteration: 261006
loss: 1.0285301208496094,grad_norm: 0.996615583955536, iteration: 261007
loss: 0.9657625555992126,grad_norm: 0.8548606706334774, iteration: 261008
loss: 0.9661055207252502,grad_norm: 0.8515254003288285, iteration: 261009
loss: 1.0254071950912476,grad_norm: 0.8993460914262086, iteration: 261010
loss: 1.001009225845337,grad_norm: 0.8347595792088394, iteration: 261011
loss: 0.9666734337806702,grad_norm: 0.940173393241474, iteration: 261012
loss: 0.983929455280304,grad_norm: 0.8582467759791141, iteration: 261013
loss: 0.9776424765586853,grad_norm: 0.872682079136481, iteration: 261014
loss: 0.9889432787895203,grad_norm: 0.8141059947145876, iteration: 261015
loss: 1.0132867097854614,grad_norm: 0.9416903314851657, iteration: 261016
loss: 0.9709752798080444,grad_norm: 0.9929130144373916, iteration: 261017
loss: 0.9793952107429504,grad_norm: 0.9999995268838079, iteration: 261018
loss: 0.9956941604614258,grad_norm: 0.9646921529405622, iteration: 261019
loss: 1.0457220077514648,grad_norm: 0.8010478827577909, iteration: 261020
loss: 1.0329886674880981,grad_norm: 0.9647892035519204, iteration: 261021
loss: 1.0144201517105103,grad_norm: 0.8476649239546519, iteration: 261022
loss: 0.9967444539070129,grad_norm: 0.9041366860116115, iteration: 261023
loss: 1.01283860206604,grad_norm: 0.9471085604061216, iteration: 261024
loss: 1.0909156799316406,grad_norm: 0.9999999369238614, iteration: 261025
loss: 0.9652528762817383,grad_norm: 0.9254427335486831, iteration: 261026
loss: 1.0004479885101318,grad_norm: 0.978881379666288, iteration: 261027
loss: 1.0008968114852905,grad_norm: 0.9999991948670229, iteration: 261028
loss: 0.9790361523628235,grad_norm: 0.8865550322055518, iteration: 261029
loss: 0.9673336148262024,grad_norm: 0.8555647782385344, iteration: 261030
loss: 1.0275863409042358,grad_norm: 0.7986761148290983, iteration: 261031
loss: 1.0243744850158691,grad_norm: 0.812000552913053, iteration: 261032
loss: 0.9984972476959229,grad_norm: 0.7552810098419653, iteration: 261033
loss: 1.043106198310852,grad_norm: 0.9999992295464766, iteration: 261034
loss: 1.0001285076141357,grad_norm: 0.9160002711660686, iteration: 261035
loss: 1.011547327041626,grad_norm: 0.9462756222785844, iteration: 261036
loss: 1.049232840538025,grad_norm: 0.9339895090208412, iteration: 261037
loss: 1.0227071046829224,grad_norm: 0.9045479031766395, iteration: 261038
loss: 0.9882549047470093,grad_norm: 0.84770789745875, iteration: 261039
loss: 1.0121902227401733,grad_norm: 0.8372689647412069, iteration: 261040
loss: 1.1205857992172241,grad_norm: 0.9999994592846077, iteration: 261041
loss: 0.9644946455955505,grad_norm: 0.8830471852764495, iteration: 261042
loss: 1.0315531492233276,grad_norm: 0.9230329205314743, iteration: 261043
loss: 0.996943473815918,grad_norm: 0.8481045754566165, iteration: 261044
loss: 0.9995607137680054,grad_norm: 0.9056964203047567, iteration: 261045
loss: 1.0008916854858398,grad_norm: 0.8278245214073839, iteration: 261046
loss: 0.9869369268417358,grad_norm: 0.7984760045579556, iteration: 261047
loss: 1.0085440874099731,grad_norm: 0.9917091256647126, iteration: 261048
loss: 1.0064630508422852,grad_norm: 0.8511073273691772, iteration: 261049
loss: 0.9971855282783508,grad_norm: 0.8462721149658351, iteration: 261050
loss: 1.0050504207611084,grad_norm: 0.9999992942755969, iteration: 261051
loss: 1.0206769704818726,grad_norm: 0.9327876521210762, iteration: 261052
loss: 1.0137875080108643,grad_norm: 0.9999990567946254, iteration: 261053
loss: 1.1812381744384766,grad_norm: 0.9999998978909981, iteration: 261054
loss: 1.0190812349319458,grad_norm: 0.8273758459701261, iteration: 261055
loss: 1.0186357498168945,grad_norm: 0.999999728727717, iteration: 261056
loss: 0.9906948208808899,grad_norm: 0.8248837579018087, iteration: 261057
loss: 0.9880226254463196,grad_norm: 0.965756139892573, iteration: 261058
loss: 0.9587547779083252,grad_norm: 0.7679669818723519, iteration: 261059
loss: 0.9753690958023071,grad_norm: 0.9593885806167594, iteration: 261060
loss: 0.977501392364502,grad_norm: 0.8051185069679688, iteration: 261061
loss: 1.0046778917312622,grad_norm: 0.8148124453220695, iteration: 261062
loss: 1.0188719034194946,grad_norm: 0.7867197772821469, iteration: 261063
loss: 1.0088927745819092,grad_norm: 0.8780665282644919, iteration: 261064
loss: 1.0223084688186646,grad_norm: 0.9762944902958964, iteration: 261065
loss: 0.9941547513008118,grad_norm: 0.8627279100305781, iteration: 261066
loss: 0.9910067915916443,grad_norm: 0.9274777593244825, iteration: 261067
loss: 1.0092787742614746,grad_norm: 0.8941395666107712, iteration: 261068
loss: 0.9595890641212463,grad_norm: 0.7787619655348145, iteration: 261069
loss: 0.9822880029678345,grad_norm: 0.7324598346376637, iteration: 261070
loss: 1.0070395469665527,grad_norm: 0.999999125448581, iteration: 261071
loss: 0.9983767867088318,grad_norm: 0.965779243276371, iteration: 261072
loss: 1.0171921253204346,grad_norm: 0.8118092426179747, iteration: 261073
loss: 1.0222288370132446,grad_norm: 0.8489340955710204, iteration: 261074
loss: 1.0364394187927246,grad_norm: 0.8517441995347421, iteration: 261075
loss: 1.0303713083267212,grad_norm: 0.7424456971053682, iteration: 261076
loss: 0.976830244064331,grad_norm: 0.8053536221823179, iteration: 261077
loss: 0.9905335903167725,grad_norm: 0.8262243406028139, iteration: 261078
loss: 1.0542902946472168,grad_norm: 0.9200548223630406, iteration: 261079
loss: 1.0083491802215576,grad_norm: 0.9727584640342337, iteration: 261080
loss: 1.0029147863388062,grad_norm: 0.9999992413344473, iteration: 261081
loss: 0.9835731387138367,grad_norm: 0.9861918239111617, iteration: 261082
loss: 1.023959994316101,grad_norm: 0.9999992263001142, iteration: 261083
loss: 1.0026414394378662,grad_norm: 0.9999993701388497, iteration: 261084
loss: 0.9804000854492188,grad_norm: 0.9999991100462006, iteration: 261085
loss: 0.9616491794586182,grad_norm: 0.9656457530121689, iteration: 261086
loss: 1.0043506622314453,grad_norm: 0.999999175963967, iteration: 261087
loss: 1.0116876363754272,grad_norm: 0.8111628359213817, iteration: 261088
loss: 1.0239068269729614,grad_norm: 0.9999994436157041, iteration: 261089
loss: 1.1222258806228638,grad_norm: 0.999999959783878, iteration: 261090
loss: 0.9719682335853577,grad_norm: 0.9029585245820982, iteration: 261091
loss: 1.033861517906189,grad_norm: 0.8685902935556039, iteration: 261092
loss: 1.0944432020187378,grad_norm: 0.999999208837339, iteration: 261093
loss: 1.010036826133728,grad_norm: 0.8274227233574442, iteration: 261094
loss: 0.9680531024932861,grad_norm: 0.940008589681924, iteration: 261095
loss: 1.0047701597213745,grad_norm: 0.9999990942695187, iteration: 261096
loss: 0.9817487597465515,grad_norm: 0.7741810759051225, iteration: 261097
loss: 1.011246919631958,grad_norm: 0.772509448861345, iteration: 261098
loss: 1.0121573209762573,grad_norm: 0.8431180223834897, iteration: 261099
loss: 1.028651475906372,grad_norm: 0.9999990577031329, iteration: 261100
loss: 0.9724034667015076,grad_norm: 0.8733718477441318, iteration: 261101
loss: 0.9863118529319763,grad_norm: 0.9999990859313173, iteration: 261102
loss: 1.0118992328643799,grad_norm: 0.8390491918421493, iteration: 261103
loss: 1.0344878435134888,grad_norm: 0.9999991228782519, iteration: 261104
loss: 1.009876012802124,grad_norm: 0.9093686607703152, iteration: 261105
loss: 0.9924954771995544,grad_norm: 0.7870169536706482, iteration: 261106
loss: 0.9703056812286377,grad_norm: 0.8908040464214732, iteration: 261107
loss: 0.9768285751342773,grad_norm: 0.8650804307569712, iteration: 261108
loss: 1.0008236169815063,grad_norm: 0.9871367706948043, iteration: 261109
loss: 1.0215749740600586,grad_norm: 0.8982155093631725, iteration: 261110
loss: 1.0399597883224487,grad_norm: 0.9999993047777272, iteration: 261111
loss: 0.9610772132873535,grad_norm: 0.9769333357228482, iteration: 261112
loss: 1.0088353157043457,grad_norm: 0.999999154592744, iteration: 261113
loss: 1.0276830196380615,grad_norm: 0.9999991094634726, iteration: 261114
loss: 0.9683539867401123,grad_norm: 0.8790521773208199, iteration: 261115
loss: 1.0048861503601074,grad_norm: 0.9999991565960393, iteration: 261116
loss: 0.9979192018508911,grad_norm: 0.996361439375756, iteration: 261117
loss: 0.9838096499443054,grad_norm: 0.8799906910238562, iteration: 261118
loss: 1.0309845209121704,grad_norm: 0.9999995510463545, iteration: 261119
loss: 0.9687421321868896,grad_norm: 0.8334556338923046, iteration: 261120
loss: 1.0305815935134888,grad_norm: 0.8314581227958443, iteration: 261121
loss: 0.9741666913032532,grad_norm: 0.9602024652100226, iteration: 261122
loss: 1.0397640466690063,grad_norm: 0.9999991089357715, iteration: 261123
loss: 1.0169671773910522,grad_norm: 0.9585440930827777, iteration: 261124
loss: 1.0290817022323608,grad_norm: 0.9999990705279213, iteration: 261125
loss: 1.0169323682785034,grad_norm: 0.9999996629686944, iteration: 261126
loss: 1.0027071237564087,grad_norm: 0.8528091703390981, iteration: 261127
loss: 1.0010087490081787,grad_norm: 0.9999991787556476, iteration: 261128
loss: 1.0312144756317139,grad_norm: 0.8900825361351585, iteration: 261129
loss: 0.9646259546279907,grad_norm: 0.9744847622797134, iteration: 261130
loss: 1.0196475982666016,grad_norm: 0.932497837974155, iteration: 261131
loss: 0.9973382949829102,grad_norm: 0.7714761742964099, iteration: 261132
loss: 0.9743300080299377,grad_norm: 0.9627853613935125, iteration: 261133
loss: 0.9837309718132019,grad_norm: 0.8089393982820131, iteration: 261134
loss: 0.9861913323402405,grad_norm: 0.7758608713159454, iteration: 261135
loss: 0.9976589679718018,grad_norm: 0.8878234842756482, iteration: 261136
loss: 1.0141040086746216,grad_norm: 0.8660535679417913, iteration: 261137
loss: 0.9979878067970276,grad_norm: 0.9999992560513137, iteration: 261138
loss: 1.0069482326507568,grad_norm: 0.9999992258901431, iteration: 261139
loss: 0.9961895942687988,grad_norm: 0.8903701390486628, iteration: 261140
loss: 0.9447087049484253,grad_norm: 0.937694845433606, iteration: 261141
loss: 1.0005624294281006,grad_norm: 0.8162893396652827, iteration: 261142
loss: 0.9792929291725159,grad_norm: 0.749685153727232, iteration: 261143
loss: 0.9846905469894409,grad_norm: 0.7699583982314449, iteration: 261144
loss: 0.9950693249702454,grad_norm: 0.9785223786763638, iteration: 261145
loss: 1.0163638591766357,grad_norm: 0.9005768056266009, iteration: 261146
loss: 1.013693928718567,grad_norm: 0.9999991320660944, iteration: 261147
loss: 1.007624864578247,grad_norm: 0.7788504701110823, iteration: 261148
loss: 0.996153712272644,grad_norm: 0.967904135426157, iteration: 261149
loss: 1.0259140729904175,grad_norm: 0.9383331830323449, iteration: 261150
loss: 1.0002628564834595,grad_norm: 0.9102612647483245, iteration: 261151
loss: 0.9953411817550659,grad_norm: 0.9434202508505852, iteration: 261152
loss: 0.9838345646858215,grad_norm: 0.8721936390921932, iteration: 261153
loss: 0.972425103187561,grad_norm: 0.7828195881347315, iteration: 261154
loss: 0.9882010221481323,grad_norm: 0.8852706137148281, iteration: 261155
loss: 0.9970982670783997,grad_norm: 0.754864739079476, iteration: 261156
loss: 1.013424038887024,grad_norm: 0.9802722612486929, iteration: 261157
loss: 0.9950829148292542,grad_norm: 0.9999989325971737, iteration: 261158
loss: 0.9711683392524719,grad_norm: 0.9999993709001964, iteration: 261159
loss: 0.9816129207611084,grad_norm: 0.9999991093964788, iteration: 261160
loss: 1.0128164291381836,grad_norm: 0.9839999975766156, iteration: 261161
loss: 1.0093048810958862,grad_norm: 0.9999997009630867, iteration: 261162
loss: 1.0109597444534302,grad_norm: 0.9999996664850279, iteration: 261163
loss: 1.0542188882827759,grad_norm: 0.8183223180867327, iteration: 261164
loss: 1.0046579837799072,grad_norm: 0.9327069443009545, iteration: 261165
loss: 1.0442132949829102,grad_norm: 0.8391738104561008, iteration: 261166
loss: 1.0286093950271606,grad_norm: 0.9170103131410191, iteration: 261167
loss: 1.009645938873291,grad_norm: 0.9515469454063685, iteration: 261168
loss: 0.9953157305717468,grad_norm: 0.9725552580436574, iteration: 261169
loss: 1.0062049627304077,grad_norm: 0.9999991283513023, iteration: 261170
loss: 0.9941868782043457,grad_norm: 0.8692761564762349, iteration: 261171
loss: 1.0157140493392944,grad_norm: 0.8318923952709717, iteration: 261172
loss: 1.06259024143219,grad_norm: 0.9999989397084277, iteration: 261173
loss: 0.9977380633354187,grad_norm: 0.7358748301467806, iteration: 261174
loss: 1.0006589889526367,grad_norm: 0.8308001759891444, iteration: 261175
loss: 0.9800513982772827,grad_norm: 0.84956678567513, iteration: 261176
loss: 1.0169591903686523,grad_norm: 0.9999991490878286, iteration: 261177
loss: 1.019493818283081,grad_norm: 0.9178831320035507, iteration: 261178
loss: 1.036091685295105,grad_norm: 0.8840556195356944, iteration: 261179
loss: 1.0118577480316162,grad_norm: 0.8127714586081467, iteration: 261180
loss: 0.9940639734268188,grad_norm: 0.9008069714503285, iteration: 261181
loss: 0.9914024472236633,grad_norm: 0.931096474463143, iteration: 261182
loss: 1.0108733177185059,grad_norm: 0.9445969322701565, iteration: 261183
loss: 0.9638380408287048,grad_norm: 0.7938534288440022, iteration: 261184
loss: 0.9749931693077087,grad_norm: 0.899776154297051, iteration: 261185
loss: 0.970302402973175,grad_norm: 0.9674183805596758, iteration: 261186
loss: 0.986471951007843,grad_norm: 0.699764915092766, iteration: 261187
loss: 0.9838482141494751,grad_norm: 0.7679290010324755, iteration: 261188
loss: 1.0078564882278442,grad_norm: 0.9999990279554494, iteration: 261189
loss: 0.9683099985122681,grad_norm: 0.9384412621793637, iteration: 261190
loss: 0.984168529510498,grad_norm: 0.7504805365553081, iteration: 261191
loss: 1.0048457384109497,grad_norm: 0.8233896886135441, iteration: 261192
loss: 1.0423656702041626,grad_norm: 0.9578567996813792, iteration: 261193
loss: 0.9914409518241882,grad_norm: 0.7531674608360495, iteration: 261194
loss: 1.0094884634017944,grad_norm: 0.931953855738629, iteration: 261195
loss: 0.9701789021492004,grad_norm: 0.8926165488528839, iteration: 261196
loss: 1.0291646718978882,grad_norm: 0.7915365908416663, iteration: 261197
loss: 0.9826873540878296,grad_norm: 0.8147535745383275, iteration: 261198
loss: 0.9589002728462219,grad_norm: 0.814077159775329, iteration: 261199
loss: 1.0070452690124512,grad_norm: 0.9208907981258209, iteration: 261200
loss: 0.9926297068595886,grad_norm: 0.9036762966839675, iteration: 261201
loss: 1.013493299484253,grad_norm: 0.8111219917583119, iteration: 261202
loss: 1.0546793937683105,grad_norm: 0.9999998046224968, iteration: 261203
loss: 1.065775752067566,grad_norm: 0.9999996014603555, iteration: 261204
loss: 1.0054079294204712,grad_norm: 0.8583561935603351, iteration: 261205
loss: 0.991948127746582,grad_norm: 0.7869695085957351, iteration: 261206
loss: 0.9792243838310242,grad_norm: 0.9999990904505089, iteration: 261207
loss: 1.0615335702896118,grad_norm: 0.9999991632889887, iteration: 261208
loss: 1.0052841901779175,grad_norm: 0.8655853355530398, iteration: 261209
loss: 1.034818410873413,grad_norm: 0.9344777411059799, iteration: 261210
loss: 1.0190528631210327,grad_norm: 0.9274472481339295, iteration: 261211
loss: 1.023633599281311,grad_norm: 0.921041772448994, iteration: 261212
loss: 1.091936469078064,grad_norm: 0.9999991880733721, iteration: 261213
loss: 1.0213699340820312,grad_norm: 0.8935407234353053, iteration: 261214
loss: 0.9692946076393127,grad_norm: 0.8545344597413529, iteration: 261215
loss: 1.03107488155365,grad_norm: 0.9643346690755744, iteration: 261216
loss: 1.0078736543655396,grad_norm: 0.8782545225828737, iteration: 261217
loss: 0.9914085865020752,grad_norm: 0.7702044775065373, iteration: 261218
loss: 0.9934879541397095,grad_norm: 0.7695897830020203, iteration: 261219
loss: 1.0111297369003296,grad_norm: 0.8625531852156135, iteration: 261220
loss: 1.0147957801818848,grad_norm: 0.9223197942376998, iteration: 261221
loss: 0.9799185991287231,grad_norm: 0.8973251261591974, iteration: 261222
loss: 0.9902148842811584,grad_norm: 0.8562888066773824, iteration: 261223
loss: 0.9912608861923218,grad_norm: 0.9014823221128138, iteration: 261224
loss: 0.9998343586921692,grad_norm: 0.9160361702571432, iteration: 261225
loss: 1.006537675857544,grad_norm: 0.8321749142276884, iteration: 261226
loss: 1.0279333591461182,grad_norm: 0.9999990931229054, iteration: 261227
loss: 0.9408664703369141,grad_norm: 0.8546168887847406, iteration: 261228
loss: 0.9875102043151855,grad_norm: 0.9083013515597598, iteration: 261229
loss: 0.9970988035202026,grad_norm: 0.999999125785044, iteration: 261230
loss: 1.004153847694397,grad_norm: 0.9999991679123547, iteration: 261231
loss: 1.0197099447250366,grad_norm: 0.9021241452503707, iteration: 261232
loss: 0.9945003986358643,grad_norm: 0.8887469198756717, iteration: 261233
loss: 0.9812093377113342,grad_norm: 0.9575498667223066, iteration: 261234
loss: 0.9999063611030579,grad_norm: 0.9526406499079341, iteration: 261235
loss: 1.014632225036621,grad_norm: 0.8158509967131683, iteration: 261236
loss: 1.0201631784439087,grad_norm: 0.9876708084545128, iteration: 261237
loss: 0.9972463250160217,grad_norm: 0.8109134391973651, iteration: 261238
loss: 0.9805882573127747,grad_norm: 0.8610334904347324, iteration: 261239
loss: 1.00588858127594,grad_norm: 0.9282370320122524, iteration: 261240
loss: 0.9907276034355164,grad_norm: 0.9171359429105512, iteration: 261241
loss: 1.0284556150436401,grad_norm: 0.9221711139225142, iteration: 261242
loss: 0.9875260591506958,grad_norm: 0.9748168877547365, iteration: 261243
loss: 0.9939241409301758,grad_norm: 0.778395779374763, iteration: 261244
loss: 0.9881070852279663,grad_norm: 0.9168150312916239, iteration: 261245
loss: 1.0285087823867798,grad_norm: 0.9999992336773535, iteration: 261246
loss: 0.9913501739501953,grad_norm: 0.7525703379745106, iteration: 261247
loss: 1.0015140771865845,grad_norm: 0.931221306124977, iteration: 261248
loss: 1.0145595073699951,grad_norm: 0.963062596309905, iteration: 261249
loss: 1.0060404539108276,grad_norm: 0.9999991134635923, iteration: 261250
loss: 1.000717282295227,grad_norm: 0.8867510569251144, iteration: 261251
loss: 1.0781418085098267,grad_norm: 0.9999990255614533, iteration: 261252
loss: 0.9882969260215759,grad_norm: 0.9830535455978099, iteration: 261253
loss: 0.9948915243148804,grad_norm: 0.8595787752713212, iteration: 261254
loss: 0.9955368638038635,grad_norm: 0.718049466125532, iteration: 261255
loss: 0.9930927753448486,grad_norm: 0.9033618424027268, iteration: 261256
loss: 1.0104488134384155,grad_norm: 0.9017467776685634, iteration: 261257
loss: 1.0124452114105225,grad_norm: 0.940113226464916, iteration: 261258
loss: 0.9831914901733398,grad_norm: 0.9385312119255873, iteration: 261259
loss: 0.967909038066864,grad_norm: 0.9346393156704388, iteration: 261260
loss: 0.9933693408966064,grad_norm: 0.976595326218415, iteration: 261261
loss: 1.0138537883758545,grad_norm: 0.8052461092278211, iteration: 261262
loss: 1.0006211996078491,grad_norm: 0.862526648706307, iteration: 261263
loss: 0.9968892335891724,grad_norm: 0.9288572089376705, iteration: 261264
loss: 1.0157883167266846,grad_norm: 0.9735229211823935, iteration: 261265
loss: 1.0308998823165894,grad_norm: 0.8713723573144597, iteration: 261266
loss: 0.9953277707099915,grad_norm: 0.8599554316047324, iteration: 261267
loss: 0.985801637172699,grad_norm: 0.9009298299247942, iteration: 261268
loss: 0.9950215816497803,grad_norm: 0.8727151098755855, iteration: 261269
loss: 1.0277113914489746,grad_norm: 0.9999990489874826, iteration: 261270
loss: 1.0218349695205688,grad_norm: 0.8979056303218307, iteration: 261271
loss: 1.0113056898117065,grad_norm: 0.9602812199957796, iteration: 261272
loss: 1.0478376150131226,grad_norm: 0.6841807291562803, iteration: 261273
loss: 1.0012383460998535,grad_norm: 0.7616360687994688, iteration: 261274
loss: 0.9797202944755554,grad_norm: 0.7877388355660904, iteration: 261275
loss: 1.0147912502288818,grad_norm: 0.9871614193180402, iteration: 261276
loss: 0.9884092807769775,grad_norm: 0.9194608617889793, iteration: 261277
loss: 1.0109622478485107,grad_norm: 0.9371805589527582, iteration: 261278
loss: 0.9512969255447388,grad_norm: 0.7370441576947008, iteration: 261279
loss: 1.0039926767349243,grad_norm: 0.8731611384809196, iteration: 261280
loss: 0.9887591004371643,grad_norm: 0.895084820065303, iteration: 261281
loss: 0.9769517779350281,grad_norm: 0.8190573463314917, iteration: 261282
loss: 0.9917929768562317,grad_norm: 0.8273596946962462, iteration: 261283
loss: 0.9865434169769287,grad_norm: 0.9899261585134334, iteration: 261284
loss: 1.0142992734909058,grad_norm: 0.8569365013806608, iteration: 261285
loss: 0.991527259349823,grad_norm: 0.9999991770912869, iteration: 261286
loss: 0.9986417889595032,grad_norm: 0.8610144199259089, iteration: 261287
loss: 1.0039575099945068,grad_norm: 0.9331547048330705, iteration: 261288
loss: 0.9793484210968018,grad_norm: 0.9999991369859893, iteration: 261289
loss: 1.0074609518051147,grad_norm: 0.8988857250703871, iteration: 261290
loss: 0.9941171407699585,grad_norm: 0.8059421842879118, iteration: 261291
loss: 1.0124810934066772,grad_norm: 0.8144056181988794, iteration: 261292
loss: 1.0057326555252075,grad_norm: 0.7831775955998191, iteration: 261293
loss: 1.0179423093795776,grad_norm: 0.9569098008411993, iteration: 261294
loss: 0.9964543581008911,grad_norm: 0.7934491003847464, iteration: 261295
loss: 1.0097562074661255,grad_norm: 0.9029286586004613, iteration: 261296
loss: 0.999188244342804,grad_norm: 0.795752670530766, iteration: 261297
loss: 1.0340111255645752,grad_norm: 0.8477654074822843, iteration: 261298
loss: 1.0139782428741455,grad_norm: 0.8691722295100542, iteration: 261299
loss: 0.9676316976547241,grad_norm: 0.9041830395379026, iteration: 261300
loss: 0.9997665286064148,grad_norm: 0.9999991832195867, iteration: 261301
loss: 0.9889912009239197,grad_norm: 0.6604296096361894, iteration: 261302
loss: 0.9866706728935242,grad_norm: 0.8774967412929465, iteration: 261303
loss: 0.9854643940925598,grad_norm: 0.7581117030324562, iteration: 261304
loss: 1.0074070692062378,grad_norm: 0.9999992570330677, iteration: 261305
loss: 0.9498724937438965,grad_norm: 0.8377210897248861, iteration: 261306
loss: 1.0211217403411865,grad_norm: 0.9730422619563709, iteration: 261307
loss: 1.0220615863800049,grad_norm: 0.9276351937738002, iteration: 261308
loss: 1.03827965259552,grad_norm: 0.9768816310865623, iteration: 261309
loss: 0.9525233507156372,grad_norm: 0.9881501821521805, iteration: 261310
loss: 0.9967197179794312,grad_norm: 0.9999990817587663, iteration: 261311
loss: 0.9836511015892029,grad_norm: 0.9085981202177278, iteration: 261312
loss: 1.0068638324737549,grad_norm: 0.7986553278925431, iteration: 261313
loss: 0.9919329285621643,grad_norm: 0.728139891153972, iteration: 261314
loss: 0.9887257814407349,grad_norm: 0.7153237489196721, iteration: 261315
loss: 1.0141382217407227,grad_norm: 0.9243320781941464, iteration: 261316
loss: 0.9792563319206238,grad_norm: 0.9399490262945863, iteration: 261317
loss: 0.9476144909858704,grad_norm: 0.8225595212210833, iteration: 261318
loss: 0.9987479448318481,grad_norm: 0.8524039007693106, iteration: 261319
loss: 1.0505465269088745,grad_norm: 0.9223629015895399, iteration: 261320
loss: 1.0851373672485352,grad_norm: 0.91301207459539, iteration: 261321
loss: 1.0086126327514648,grad_norm: 0.8867313521402652, iteration: 261322
loss: 0.9920438528060913,grad_norm: 0.9999989873454896, iteration: 261323
loss: 0.958646833896637,grad_norm: 0.880673388024038, iteration: 261324
loss: 0.9804447293281555,grad_norm: 0.904151676882012, iteration: 261325
loss: 0.970583438873291,grad_norm: 0.7931229447517552, iteration: 261326
loss: 0.9884327054023743,grad_norm: 0.9737001494589597, iteration: 261327
loss: 0.9901912212371826,grad_norm: 0.9529713640541297, iteration: 261328
loss: 0.9802185893058777,grad_norm: 0.8946943279620955, iteration: 261329
loss: 1.084521770477295,grad_norm: 0.9999997364618392, iteration: 261330
loss: 0.9994310140609741,grad_norm: 0.9781535994306656, iteration: 261331
loss: 0.9790471196174622,grad_norm: 0.9071182501504083, iteration: 261332
loss: 0.9919815063476562,grad_norm: 0.8891051066324475, iteration: 261333
loss: 0.9941031336784363,grad_norm: 0.8910579424972855, iteration: 261334
loss: 1.0171515941619873,grad_norm: 0.8487467567001185, iteration: 261335
loss: 1.031369924545288,grad_norm: 0.8672514605431232, iteration: 261336
loss: 0.984941303730011,grad_norm: 0.8671747861269833, iteration: 261337
loss: 1.0029270648956299,grad_norm: 0.9999990216181609, iteration: 261338
loss: 1.0357732772827148,grad_norm: 0.8240435827942689, iteration: 261339
loss: 0.9924376606941223,grad_norm: 0.9931248582116238, iteration: 261340
loss: 1.0568501949310303,grad_norm: 0.9999991500718207, iteration: 261341
loss: 0.9817574620246887,grad_norm: 0.8637096302958965, iteration: 261342
loss: 0.9905250072479248,grad_norm: 0.9014749165263288, iteration: 261343
loss: 1.005769968032837,grad_norm: 0.7530395564862162, iteration: 261344
loss: 1.008622407913208,grad_norm: 0.9999999705802728, iteration: 261345
loss: 0.9890672564506531,grad_norm: 0.9999990592665272, iteration: 261346
loss: 0.9979326725006104,grad_norm: 0.7667463377982855, iteration: 261347
loss: 1.0196243524551392,grad_norm: 0.9334772378366429, iteration: 261348
loss: 0.9930559396743774,grad_norm: 0.9194721097061386, iteration: 261349
loss: 0.9780093431472778,grad_norm: 0.8182134309400184, iteration: 261350
loss: 0.9824174046516418,grad_norm: 0.9999990652550512, iteration: 261351
loss: 0.9584808349609375,grad_norm: 0.8119187716734404, iteration: 261352
loss: 0.9350030422210693,grad_norm: 0.8885893460245827, iteration: 261353
loss: 1.0191805362701416,grad_norm: 0.9999992542045938, iteration: 261354
loss: 1.008703351020813,grad_norm: 0.9999990936321553, iteration: 261355
loss: 0.9963006973266602,grad_norm: 0.7729059458022097, iteration: 261356
loss: 0.9900915622711182,grad_norm: 0.8897589065582769, iteration: 261357
loss: 1.2495607137680054,grad_norm: 0.9999997890374251, iteration: 261358
loss: 1.0227198600769043,grad_norm: 0.9999998222797364, iteration: 261359
loss: 1.0163755416870117,grad_norm: 0.9999993350368801, iteration: 261360
loss: 1.009100079536438,grad_norm: 0.8790347562420305, iteration: 261361
loss: 1.0126421451568604,grad_norm: 0.8856679637081654, iteration: 261362
loss: 1.0016021728515625,grad_norm: 0.9874981280782279, iteration: 261363
loss: 0.9673003554344177,grad_norm: 0.9353066768581856, iteration: 261364
loss: 0.9915465712547302,grad_norm: 0.805131899514166, iteration: 261365
loss: 0.9922621250152588,grad_norm: 0.7888658347517737, iteration: 261366
loss: 1.0157462358474731,grad_norm: 0.8480839983876204, iteration: 261367
loss: 1.0141202211380005,grad_norm: 0.9999990043158348, iteration: 261368
loss: 1.0300158262252808,grad_norm: 0.9999991896312501, iteration: 261369
loss: 1.0306898355484009,grad_norm: 0.9999991467632221, iteration: 261370
loss: 1.018257737159729,grad_norm: 0.8482187188923407, iteration: 261371
loss: 1.019084095954895,grad_norm: 0.8827539619431756, iteration: 261372
loss: 1.0500503778457642,grad_norm: 0.9659272327641355, iteration: 261373
loss: 1.0291926860809326,grad_norm: 0.8024314174567142, iteration: 261374
loss: 1.0176893472671509,grad_norm: 0.9007677411320149, iteration: 261375
loss: 1.0157757997512817,grad_norm: 0.8363540293556311, iteration: 261376
loss: 1.0362052917480469,grad_norm: 0.8329760915700015, iteration: 261377
loss: 0.9691221117973328,grad_norm: 0.7573079763639318, iteration: 261378
loss: 0.9890316724777222,grad_norm: 0.9999990501642874, iteration: 261379
loss: 1.0165319442749023,grad_norm: 0.8757745883950556, iteration: 261380
loss: 1.0475729703903198,grad_norm: 0.9031680676837278, iteration: 261381
loss: 0.9970583915710449,grad_norm: 0.9016067106736608, iteration: 261382
loss: 0.9789615869522095,grad_norm: 0.8579402042235644, iteration: 261383
loss: 1.0315673351287842,grad_norm: 0.9999992218675171, iteration: 261384
loss: 1.000287413597107,grad_norm: 0.8994493742184624, iteration: 261385
loss: 1.0123279094696045,grad_norm: 0.8837212170019265, iteration: 261386
loss: 1.0239685773849487,grad_norm: 0.7905827381016639, iteration: 261387
loss: 1.0018327236175537,grad_norm: 0.9999991050498691, iteration: 261388
loss: 0.9836509227752686,grad_norm: 0.7738103831133856, iteration: 261389
loss: 0.978208601474762,grad_norm: 0.7861760323743181, iteration: 261390
loss: 0.9936649203300476,grad_norm: 0.9234410651868361, iteration: 261391
loss: 0.97064208984375,grad_norm: 0.98417083089639, iteration: 261392
loss: 0.9571989178657532,grad_norm: 0.9999990196648519, iteration: 261393
loss: 0.9742968678474426,grad_norm: 0.9791408767231846, iteration: 261394
loss: 0.9909927845001221,grad_norm: 0.7825711229344537, iteration: 261395
loss: 0.9752910137176514,grad_norm: 0.8709349845793494, iteration: 261396
loss: 0.9901902675628662,grad_norm: 0.8762749129584101, iteration: 261397
loss: 1.016798973083496,grad_norm: 0.7047077152567859, iteration: 261398
loss: 0.992682158946991,grad_norm: 0.9453825230317793, iteration: 261399
loss: 1.017397403717041,grad_norm: 0.9229100824648943, iteration: 261400
loss: 1.0366787910461426,grad_norm: 0.9999993135175432, iteration: 261401
loss: 0.9796340465545654,grad_norm: 0.8094630885120723, iteration: 261402
loss: 0.9665253758430481,grad_norm: 0.9999991153267541, iteration: 261403
loss: 0.989020586013794,grad_norm: 0.847314777967661, iteration: 261404
loss: 1.0146243572235107,grad_norm: 0.9999992402348028, iteration: 261405
loss: 1.0159717798233032,grad_norm: 0.9238654051436784, iteration: 261406
loss: 1.0123292207717896,grad_norm: 0.7847982742054559, iteration: 261407
loss: 1.0203548669815063,grad_norm: 0.9620055142262359, iteration: 261408
loss: 0.9770621657371521,grad_norm: 0.7299296149613865, iteration: 261409
loss: 0.9884580373764038,grad_norm: 0.839530074994242, iteration: 261410
loss: 1.0309686660766602,grad_norm: 0.9328241503613298, iteration: 261411
loss: 0.9905254244804382,grad_norm: 0.907477848457256, iteration: 261412
loss: 0.9709740877151489,grad_norm: 0.9999990662683462, iteration: 261413
loss: 0.9968212246894836,grad_norm: 0.9963797994260678, iteration: 261414
loss: 0.937004029750824,grad_norm: 0.999999065851826, iteration: 261415
loss: 0.9806656837463379,grad_norm: 0.8162099838440525, iteration: 261416
loss: 1.0020171403884888,grad_norm: 0.8424111417132552, iteration: 261417
loss: 0.9918237924575806,grad_norm: 0.7882433977699477, iteration: 261418
loss: 1.0010569095611572,grad_norm: 0.7983915694553324, iteration: 261419
loss: 1.009950876235962,grad_norm: 0.7906652625755551, iteration: 261420
loss: 1.020495891571045,grad_norm: 0.7962441656630426, iteration: 261421
loss: 0.9902106523513794,grad_norm: 0.9901342682066926, iteration: 261422
loss: 1.0196106433868408,grad_norm: 0.8781115556170155, iteration: 261423
loss: 1.0062360763549805,grad_norm: 0.8200941316377904, iteration: 261424
loss: 1.003365159034729,grad_norm: 0.833215381989356, iteration: 261425
loss: 1.019625186920166,grad_norm: 0.684296704074519, iteration: 261426
loss: 0.988350510597229,grad_norm: 0.9999991821440322, iteration: 261427
loss: 1.012668251991272,grad_norm: 0.7764641912970786, iteration: 261428
loss: 0.9812610745429993,grad_norm: 0.9205541854562693, iteration: 261429
loss: 1.0128068923950195,grad_norm: 0.9883569124186127, iteration: 261430
loss: 0.9681563973426819,grad_norm: 0.8997438162191573, iteration: 261431
loss: 0.9873377680778503,grad_norm: 0.8873122219610065, iteration: 261432
loss: 1.013446569442749,grad_norm: 0.9999991703668929, iteration: 261433
loss: 1.0321773290634155,grad_norm: 0.9999990254455431, iteration: 261434
loss: 1.0095868110656738,grad_norm: 0.8233707201188066, iteration: 261435
loss: 1.0101478099822998,grad_norm: 0.7771537664280237, iteration: 261436
loss: 0.9979483485221863,grad_norm: 0.9535888389575844, iteration: 261437
loss: 1.0049736499786377,grad_norm: 0.862222842655389, iteration: 261438
loss: 0.9862167239189148,grad_norm: 0.9426856304769892, iteration: 261439
loss: 1.013247013092041,grad_norm: 0.874921635194704, iteration: 261440
loss: 1.0447230339050293,grad_norm: 0.8843143430204073, iteration: 261441
loss: 0.9798091053962708,grad_norm: 0.9999989711596406, iteration: 261442
loss: 0.9875935912132263,grad_norm: 0.7639707960314949, iteration: 261443
loss: 1.0013805627822876,grad_norm: 0.8578199099153815, iteration: 261444
loss: 0.9939979910850525,grad_norm: 0.8339374690101518, iteration: 261445
loss: 1.0071172714233398,grad_norm: 0.853024159018515, iteration: 261446
loss: 0.9959543347358704,grad_norm: 0.9999990688640182, iteration: 261447
loss: 0.9717807173728943,grad_norm: 0.8856713619274523, iteration: 261448
loss: 0.960357129573822,grad_norm: 0.9813457250113337, iteration: 261449
loss: 1.0467396974563599,grad_norm: 0.9999993334486031, iteration: 261450
loss: 0.9891274571418762,grad_norm: 0.9233313133702739, iteration: 261451
loss: 1.0198206901550293,grad_norm: 0.8384177752028849, iteration: 261452
loss: 0.9683067202568054,grad_norm: 0.7575397845433891, iteration: 261453
loss: 1.0311627388000488,grad_norm: 0.8408093450368095, iteration: 261454
loss: 0.9793316721916199,grad_norm: 0.967159420912679, iteration: 261455
loss: 0.9988627433776855,grad_norm: 0.788419822101373, iteration: 261456
loss: 1.0039081573486328,grad_norm: 0.9836533116061756, iteration: 261457
loss: 1.0082110166549683,grad_norm: 0.88153045424751, iteration: 261458
loss: 0.9634889364242554,grad_norm: 0.812459844696508, iteration: 261459
loss: 0.9586454033851624,grad_norm: 0.835195356288555, iteration: 261460
loss: 1.0034626722335815,grad_norm: 0.8364226264741583, iteration: 261461
loss: 1.000133991241455,grad_norm: 0.9999991855668778, iteration: 261462
loss: 0.9937873482704163,grad_norm: 0.8140076270010508, iteration: 261463
loss: 1.0295056104660034,grad_norm: 0.9999991797605173, iteration: 261464
loss: 0.979360818862915,grad_norm: 0.9815570877311527, iteration: 261465
loss: 1.015316367149353,grad_norm: 0.9999992190082884, iteration: 261466
loss: 1.052558183670044,grad_norm: 0.9999994344189576, iteration: 261467
loss: 1.0539441108703613,grad_norm: 0.9999991013656009, iteration: 261468
loss: 0.9939038753509521,grad_norm: 0.9999989765202688, iteration: 261469
loss: 0.9800683259963989,grad_norm: 0.9999989549884674, iteration: 261470
loss: 0.9954195618629456,grad_norm: 0.8635717417230926, iteration: 261471
loss: 1.0386710166931152,grad_norm: 0.7807119204757543, iteration: 261472
loss: 0.9690870046615601,grad_norm: 0.8507504257261389, iteration: 261473
loss: 0.968097984790802,grad_norm: 0.7175876118746841, iteration: 261474
loss: 1.0140048265457153,grad_norm: 0.9201241251829186, iteration: 261475
loss: 1.0406908988952637,grad_norm: 0.9142047412804513, iteration: 261476
loss: 0.9764369130134583,grad_norm: 0.91885048698465, iteration: 261477
loss: 1.0035210847854614,grad_norm: 0.999999055394874, iteration: 261478
loss: 1.0035709142684937,grad_norm: 0.9999990915473983, iteration: 261479
loss: 0.9868975877761841,grad_norm: 0.9999991309898488, iteration: 261480
loss: 1.029131293296814,grad_norm: 0.9999993261989784, iteration: 261481
loss: 1.038503885269165,grad_norm: 0.9072801046146619, iteration: 261482
loss: 1.0159196853637695,grad_norm: 0.9999992099645332, iteration: 261483
loss: 0.9969326257705688,grad_norm: 0.9387849413526124, iteration: 261484
loss: 0.9864278435707092,grad_norm: 0.9901908216300258, iteration: 261485
loss: 0.9991489052772522,grad_norm: 0.8604452331314155, iteration: 261486
loss: 0.9824380874633789,grad_norm: 0.9072008097677651, iteration: 261487
loss: 0.992450475692749,grad_norm: 0.9573290408463242, iteration: 261488
loss: 1.003987431526184,grad_norm: 0.9088275433406358, iteration: 261489
loss: 0.9811611175537109,grad_norm: 0.8966970964171904, iteration: 261490
loss: 1.0316988229751587,grad_norm: 0.9999990473873243, iteration: 261491
loss: 0.9689542055130005,grad_norm: 0.8585876694461749, iteration: 261492
loss: 0.9843176007270813,grad_norm: 0.9999991000343946, iteration: 261493
loss: 0.9836084842681885,grad_norm: 0.9196249066466524, iteration: 261494
loss: 1.0182678699493408,grad_norm: 0.9999990089777866, iteration: 261495
loss: 0.9797345995903015,grad_norm: 0.8095550035179914, iteration: 261496
loss: 0.9908748865127563,grad_norm: 0.8657226355556238, iteration: 261497
loss: 0.997828483581543,grad_norm: 0.9544125234179452, iteration: 261498
loss: 0.9909597635269165,grad_norm: 0.8475788200977892, iteration: 261499
loss: 0.9991540908813477,grad_norm: 0.7152552562343834, iteration: 261500
loss: 0.981580913066864,grad_norm: 0.9999990921608398, iteration: 261501
loss: 1.0176920890808105,grad_norm: 0.8147184926422647, iteration: 261502
loss: 1.0552043914794922,grad_norm: 0.883614161807087, iteration: 261503
loss: 1.0028839111328125,grad_norm: 0.8946300846840575, iteration: 261504
loss: 1.0068905353546143,grad_norm: 0.9999990813664166, iteration: 261505
loss: 0.9654306173324585,grad_norm: 0.8099459037695804, iteration: 261506
loss: 0.933233380317688,grad_norm: 0.8438463447846076, iteration: 261507
loss: 0.987238883972168,grad_norm: 0.9352070000470211, iteration: 261508
loss: 0.9748227596282959,grad_norm: 0.9780190770252766, iteration: 261509
loss: 0.9811506867408752,grad_norm: 0.7502959701658807, iteration: 261510
loss: 0.9954747557640076,grad_norm: 0.9999991910966141, iteration: 261511
loss: 0.9860354065895081,grad_norm: 0.9999989949157594, iteration: 261512
loss: 1.0027741193771362,grad_norm: 0.9999992191410495, iteration: 261513
loss: 1.01870596408844,grad_norm: 0.9499001978145402, iteration: 261514
loss: 1.0114665031433105,grad_norm: 0.9880411442635954, iteration: 261515
loss: 1.0259695053100586,grad_norm: 0.8472875821895486, iteration: 261516
loss: 0.9843663573265076,grad_norm: 0.840273947117969, iteration: 261517
loss: 0.9402932524681091,grad_norm: 0.8591043805895606, iteration: 261518
loss: 1.0106347799301147,grad_norm: 0.8936925349570909, iteration: 261519
loss: 0.9900271892547607,grad_norm: 0.9999990246218715, iteration: 261520
loss: 1.0001192092895508,grad_norm: 0.832418154915986, iteration: 261521
loss: 0.9579067230224609,grad_norm: 0.9573204773266473, iteration: 261522
loss: 0.980660617351532,grad_norm: 0.9999989852783695, iteration: 261523
loss: 0.9718915820121765,grad_norm: 0.9682336059963395, iteration: 261524
loss: 0.9841989874839783,grad_norm: 0.9491723107037919, iteration: 261525
loss: 1.008451223373413,grad_norm: 0.9763908703510124, iteration: 261526
loss: 1.012442708015442,grad_norm: 0.7605739086519568, iteration: 261527
loss: 1.0046147108078003,grad_norm: 0.9999990893718627, iteration: 261528
loss: 1.0027046203613281,grad_norm: 0.9854813163290466, iteration: 261529
loss: 1.0022681951522827,grad_norm: 0.7442584387871635, iteration: 261530
loss: 0.9945948719978333,grad_norm: 0.9980210735000452, iteration: 261531
loss: 1.015592098236084,grad_norm: 0.8527671811521669, iteration: 261532
loss: 1.0177922248840332,grad_norm: 0.8139213786867093, iteration: 261533
loss: 1.0129311084747314,grad_norm: 0.999999155790699, iteration: 261534
loss: 0.9700719714164734,grad_norm: 0.9281246359127319, iteration: 261535
loss: 0.9981119632720947,grad_norm: 0.90647694715219, iteration: 261536
loss: 1.0821624994277954,grad_norm: 0.9999991609237733, iteration: 261537
loss: 0.946503221988678,grad_norm: 0.8566305685079517, iteration: 261538
loss: 1.0054123401641846,grad_norm: 0.9630213131101272, iteration: 261539
loss: 1.0335500240325928,grad_norm: 0.9999991466745285, iteration: 261540
loss: 1.003833532333374,grad_norm: 0.9999990801430106, iteration: 261541
loss: 0.9444748759269714,grad_norm: 0.9999991442095361, iteration: 261542
loss: 0.9721629619598389,grad_norm: 0.9999991406306885, iteration: 261543
loss: 0.9860560297966003,grad_norm: 0.9428311972252756, iteration: 261544
loss: 0.9933880567550659,grad_norm: 0.8485709464038393, iteration: 261545
loss: 1.0514034032821655,grad_norm: 0.9999992220218745, iteration: 261546
loss: 1.0132770538330078,grad_norm: 0.9999990268011718, iteration: 261547
loss: 1.0444952249526978,grad_norm: 0.9575581818929951, iteration: 261548
loss: 0.9857580661773682,grad_norm: 0.9999992507213094, iteration: 261549
loss: 1.058903694152832,grad_norm: 0.9104422746134605, iteration: 261550
loss: 1.0019985437393188,grad_norm: 0.8903657789387883, iteration: 261551
loss: 0.9607138633728027,grad_norm: 0.942979182945353, iteration: 261552
loss: 1.0291993618011475,grad_norm: 0.9999998776687167, iteration: 261553
loss: 0.9548966288566589,grad_norm: 0.8092607643974307, iteration: 261554
loss: 0.971359133720398,grad_norm: 0.9999990197379086, iteration: 261555
loss: 1.0342309474945068,grad_norm: 0.999998979460803, iteration: 261556
loss: 0.9606080055236816,grad_norm: 0.9999989885318024, iteration: 261557
loss: 0.9756912589073181,grad_norm: 0.8484805442702003, iteration: 261558
loss: 0.9951751232147217,grad_norm: 0.8861011630610436, iteration: 261559
loss: 1.0205656290054321,grad_norm: 0.8035603306462664, iteration: 261560
loss: 0.985769510269165,grad_norm: 0.8510377251373938, iteration: 261561
loss: 0.9719943404197693,grad_norm: 0.9999990701591566, iteration: 261562
loss: 1.0381686687469482,grad_norm: 0.9167287177589598, iteration: 261563
loss: 0.992955207824707,grad_norm: 0.8329061730900591, iteration: 261564
loss: 1.013942003250122,grad_norm: 0.9999999622485023, iteration: 261565
loss: 0.9598411917686462,grad_norm: 0.7324240277185945, iteration: 261566
loss: 0.9800297617912292,grad_norm: 0.879403102306137, iteration: 261567
loss: 0.990071713924408,grad_norm: 0.8250249436816621, iteration: 261568
loss: 1.0037851333618164,grad_norm: 0.9217058691665526, iteration: 261569
loss: 0.957041323184967,grad_norm: 0.840129091803617, iteration: 261570
loss: 1.0115200281143188,grad_norm: 0.8396351897137411, iteration: 261571
loss: 1.0203568935394287,grad_norm: 0.7557887116714699, iteration: 261572
loss: 1.017008662223816,grad_norm: 0.8742392775004405, iteration: 261573
loss: 1.0281040668487549,grad_norm: 0.8631797847309366, iteration: 261574
loss: 0.9670994877815247,grad_norm: 0.7851942570268383, iteration: 261575
loss: 1.0567067861557007,grad_norm: 0.8583205409166826, iteration: 261576
loss: 1.030486822128296,grad_norm: 0.9999991406894696, iteration: 261577
loss: 0.9986099004745483,grad_norm: 0.9336680231409357, iteration: 261578
loss: 1.035407543182373,grad_norm: 0.9356185368204245, iteration: 261579
loss: 1.0012251138687134,grad_norm: 0.9999997747280176, iteration: 261580
loss: 1.0053256750106812,grad_norm: 0.9999993286943586, iteration: 261581
loss: 0.9734513163566589,grad_norm: 0.8905029815578859, iteration: 261582
loss: 0.9995046854019165,grad_norm: 0.9999994398467356, iteration: 261583
loss: 1.0023350715637207,grad_norm: 0.9999990885704171, iteration: 261584
loss: 1.0154207944869995,grad_norm: 0.9625453182760146, iteration: 261585
loss: 0.9952502250671387,grad_norm: 0.8259176060075046, iteration: 261586
loss: 1.0691335201263428,grad_norm: 0.9999991289439509, iteration: 261587
loss: 1.011450171470642,grad_norm: 0.9999990971200298, iteration: 261588
loss: 1.001103162765503,grad_norm: 0.8199692910858565, iteration: 261589
loss: 0.9520126581192017,grad_norm: 0.8711051528666354, iteration: 261590
loss: 1.0094542503356934,grad_norm: 0.8293198692654227, iteration: 261591
loss: 1.0507996082305908,grad_norm: 0.9999997024617105, iteration: 261592
loss: 1.0152804851531982,grad_norm: 0.6919752977688368, iteration: 261593
loss: 0.9813491702079773,grad_norm: 0.8975094823426789, iteration: 261594
loss: 0.9469085335731506,grad_norm: 0.8940280083996544, iteration: 261595
loss: 0.9985595345497131,grad_norm: 0.9597230001457677, iteration: 261596
loss: 1.0241446495056152,grad_norm: 0.8055254015276113, iteration: 261597
loss: 0.9839857220649719,grad_norm: 0.8634604056332681, iteration: 261598
loss: 0.9871682524681091,grad_norm: 0.8776161747270949, iteration: 261599
loss: 1.0096557140350342,grad_norm: 0.9999998900372735, iteration: 261600
loss: 0.9877219796180725,grad_norm: 0.9091834101348477, iteration: 261601
loss: 1.0011177062988281,grad_norm: 0.9087194323346488, iteration: 261602
loss: 1.0054699182510376,grad_norm: 0.9618447438189575, iteration: 261603
loss: 0.9753802418708801,grad_norm: 0.8699711745845727, iteration: 261604
loss: 1.0128860473632812,grad_norm: 0.8521185791834867, iteration: 261605
loss: 0.986603856086731,grad_norm: 0.9859291237747342, iteration: 261606
loss: 0.9916698336601257,grad_norm: 0.9042553368835737, iteration: 261607
loss: 0.9775259494781494,grad_norm: 0.9999989594965094, iteration: 261608
loss: 0.9683365225791931,grad_norm: 0.7029636842288344, iteration: 261609
loss: 1.0332655906677246,grad_norm: 0.9162102446846271, iteration: 261610
loss: 1.007063865661621,grad_norm: 0.9027407719808331, iteration: 261611
loss: 1.0033828020095825,grad_norm: 0.9994269985134264, iteration: 261612
loss: 1.0045852661132812,grad_norm: 0.7632437960785372, iteration: 261613
loss: 1.028009295463562,grad_norm: 0.8230074782163795, iteration: 261614
loss: 0.9616697430610657,grad_norm: 0.8569151965240781, iteration: 261615
loss: 0.9808251857757568,grad_norm: 0.7574960405003517, iteration: 261616
loss: 1.0371508598327637,grad_norm: 0.7769978641461288, iteration: 261617
loss: 0.9858176708221436,grad_norm: 0.9212398980036157, iteration: 261618
loss: 1.0157686471939087,grad_norm: 0.9169225482687401, iteration: 261619
loss: 0.9738634824752808,grad_norm: 0.9351871291464129, iteration: 261620
loss: 1.0268182754516602,grad_norm: 0.9999991905060832, iteration: 261621
loss: 1.0003081560134888,grad_norm: 0.816729522877209, iteration: 261622
loss: 0.990750789642334,grad_norm: 0.9586182677124502, iteration: 261623
loss: 1.0045887231826782,grad_norm: 0.932131632209985, iteration: 261624
loss: 0.9880537986755371,grad_norm: 0.9742351906015353, iteration: 261625
loss: 1.0191123485565186,grad_norm: 0.8988116369522301, iteration: 261626
loss: 1.0098365545272827,grad_norm: 0.9490371990913873, iteration: 261627
loss: 0.9871330857276917,grad_norm: 0.970321703353931, iteration: 261628
loss: 1.0263882875442505,grad_norm: 0.9999993376146585, iteration: 261629
loss: 1.0231698751449585,grad_norm: 0.9999990837963407, iteration: 261630
loss: 0.9802961945533752,grad_norm: 0.9111286710335317, iteration: 261631
loss: 1.0060774087905884,grad_norm: 0.9150937872055872, iteration: 261632
loss: 1.0202559232711792,grad_norm: 0.9999995499251878, iteration: 261633
loss: 1.069609522819519,grad_norm: 0.9999995538374125, iteration: 261634
loss: 0.9978737831115723,grad_norm: 0.870927327445865, iteration: 261635
loss: 1.009830355644226,grad_norm: 0.9999990219826721, iteration: 261636
loss: 0.9684300422668457,grad_norm: 0.9489201129592866, iteration: 261637
loss: 0.9942557215690613,grad_norm: 0.946162689497404, iteration: 261638
loss: 0.9937376379966736,grad_norm: 0.8795190326509924, iteration: 261639
loss: 0.9720514416694641,grad_norm: 0.8862215887844889, iteration: 261640
loss: 1.021112322807312,grad_norm: 0.9999991678616164, iteration: 261641
loss: 0.9980871081352234,grad_norm: 0.999999113555092, iteration: 261642
loss: 1.0251541137695312,grad_norm: 0.9999994857797536, iteration: 261643
loss: 1.0063750743865967,grad_norm: 0.9999989957185365, iteration: 261644
loss: 1.0120512247085571,grad_norm: 0.8157291984860715, iteration: 261645
loss: 0.9707401394844055,grad_norm: 1.000000059203657, iteration: 261646
loss: 0.9790666103363037,grad_norm: 0.875792054190187, iteration: 261647
loss: 1.019809603691101,grad_norm: 0.9999991633010273, iteration: 261648
loss: 0.9712996482849121,grad_norm: 0.843158610848094, iteration: 261649
loss: 0.9848065972328186,grad_norm: 0.8922895938708624, iteration: 261650
loss: 1.0256255865097046,grad_norm: 0.8802954094829601, iteration: 261651
loss: 0.9906733632087708,grad_norm: 0.9639786594067743, iteration: 261652
loss: 1.0019772052764893,grad_norm: 0.9216681373563448, iteration: 261653
loss: 0.9824564456939697,grad_norm: 0.7566638250149607, iteration: 261654
loss: 1.0103769302368164,grad_norm: 0.7785532259328805, iteration: 261655
loss: 0.997535228729248,grad_norm: 0.8594519489154575, iteration: 261656
loss: 1.0539265871047974,grad_norm: 0.9683968032751795, iteration: 261657
loss: 1.0286167860031128,grad_norm: 0.9967917254010199, iteration: 261658
loss: 1.0220909118652344,grad_norm: 0.96840306503379, iteration: 261659
loss: 0.9806410074234009,grad_norm: 0.9999991169046902, iteration: 261660
loss: 0.9863377809524536,grad_norm: 0.7639043317972156, iteration: 261661
loss: 1.0072425603866577,grad_norm: 0.9855720959200589, iteration: 261662
loss: 1.0260446071624756,grad_norm: 0.8605575937555627, iteration: 261663
loss: 0.9836523532867432,grad_norm: 0.902818817070942, iteration: 261664
loss: 1.0219401121139526,grad_norm: 0.8199284451200134, iteration: 261665
loss: 1.0093929767608643,grad_norm: 0.8615988713498066, iteration: 261666
loss: 1.005621075630188,grad_norm: 0.9968797515240295, iteration: 261667
loss: 0.9900202751159668,grad_norm: 0.9284666951465566, iteration: 261668
loss: 1.0404480695724487,grad_norm: 0.7789055530247291, iteration: 261669
loss: 0.9663501381874084,grad_norm: 0.9782719477565187, iteration: 261670
loss: 0.9968112111091614,grad_norm: 0.9999997865194908, iteration: 261671
loss: 1.009886384010315,grad_norm: 0.9950649340359541, iteration: 261672
loss: 0.9761980772018433,grad_norm: 0.9999990436433513, iteration: 261673
loss: 1.0116498470306396,grad_norm: 0.9999992672188805, iteration: 261674
loss: 1.0087684392929077,grad_norm: 0.9999989658002637, iteration: 261675
loss: 1.0599040985107422,grad_norm: 0.9999993028715138, iteration: 261676
loss: 0.9973703622817993,grad_norm: 0.8307452446806638, iteration: 261677
loss: 1.0036088228225708,grad_norm: 0.8091668427861709, iteration: 261678
loss: 0.9816039800643921,grad_norm: 0.9999992303122717, iteration: 261679
loss: 0.9916694760322571,grad_norm: 0.945622859553779, iteration: 261680
loss: 1.027718424797058,grad_norm: 0.9877186727578937, iteration: 261681
loss: 0.9950399398803711,grad_norm: 0.944394798420783, iteration: 261682
loss: 1.0095155239105225,grad_norm: 0.7426701231550789, iteration: 261683
loss: 1.0242449045181274,grad_norm: 0.7603980114827505, iteration: 261684
loss: 0.9573049545288086,grad_norm: 0.7849931042719386, iteration: 261685
loss: 1.021154761314392,grad_norm: 0.9945923067535664, iteration: 261686
loss: 1.0029667615890503,grad_norm: 0.9999991604820602, iteration: 261687
loss: 0.9766813516616821,grad_norm: 0.9505714008522052, iteration: 261688
loss: 0.9896451234817505,grad_norm: 0.6627742162763111, iteration: 261689
loss: 1.0658646821975708,grad_norm: 0.9999991161135098, iteration: 261690
loss: 1.0163700580596924,grad_norm: 0.8805292524541541, iteration: 261691
loss: 1.0310264825820923,grad_norm: 0.8244949549653535, iteration: 261692
loss: 0.9755093455314636,grad_norm: 0.9999995175255171, iteration: 261693
loss: 0.964928150177002,grad_norm: 0.9999994458370847, iteration: 261694
loss: 1.0374891757965088,grad_norm: 0.8727545399483954, iteration: 261695
loss: 1.048709750175476,grad_norm: 0.7654484476613449, iteration: 261696
loss: 0.9676251411437988,grad_norm: 0.8542347569836259, iteration: 261697
loss: 1.0439790487289429,grad_norm: 0.9999990803263161, iteration: 261698
loss: 1.0199087858200073,grad_norm: 0.9128685872499995, iteration: 261699
loss: 0.9951896071434021,grad_norm: 0.9180950347214588, iteration: 261700
loss: 1.0236165523529053,grad_norm: 0.9999991186241732, iteration: 261701
loss: 0.97882080078125,grad_norm: 0.9494473456028456, iteration: 261702
loss: 0.9991940855979919,grad_norm: 0.8794904095198586, iteration: 261703
loss: 1.005959153175354,grad_norm: 0.8018578679768847, iteration: 261704
loss: 1.0126274824142456,grad_norm: 0.9561582249215561, iteration: 261705
loss: 0.9841359257698059,grad_norm: 0.8436355879625821, iteration: 261706
loss: 0.9869381785392761,grad_norm: 0.8115111957050849, iteration: 261707
loss: 1.0177515745162964,grad_norm: 0.9996824304217402, iteration: 261708
loss: 1.0141183137893677,grad_norm: 0.8764656000747593, iteration: 261709
loss: 1.0056239366531372,grad_norm: 0.8209366304847637, iteration: 261710
loss: 0.9805512428283691,grad_norm: 0.9544353318423131, iteration: 261711
loss: 1.0158851146697998,grad_norm: 0.9097311812319827, iteration: 261712
loss: 0.9859554767608643,grad_norm: 0.8121464627250718, iteration: 261713
loss: 0.9776341915130615,grad_norm: 0.9126334027123417, iteration: 261714
loss: 0.9837372899055481,grad_norm: 0.9999990216375019, iteration: 261715
loss: 1.0078333616256714,grad_norm: 0.740694999314779, iteration: 261716
loss: 0.9987273812294006,grad_norm: 0.7501304427474648, iteration: 261717
loss: 0.9779577255249023,grad_norm: 0.7601062516872653, iteration: 261718
loss: 0.984348714351654,grad_norm: 0.9067647809606737, iteration: 261719
loss: 0.9913719296455383,grad_norm: 0.8325151070134714, iteration: 261720
loss: 0.9835763573646545,grad_norm: 0.999999101206669, iteration: 261721
loss: 0.9781134128570557,grad_norm: 0.8069918693942045, iteration: 261722
loss: 1.000757098197937,grad_norm: 0.897426712971695, iteration: 261723
loss: 0.9961867928504944,grad_norm: 0.9999991151521457, iteration: 261724
loss: 1.002961277961731,grad_norm: 0.9999990179090914, iteration: 261725
loss: 0.9973012208938599,grad_norm: 0.9999990780777911, iteration: 261726
loss: 1.0053743124008179,grad_norm: 0.8796886624727581, iteration: 261727
loss: 1.033935785293579,grad_norm: 0.875635415536419, iteration: 261728
loss: 1.031353235244751,grad_norm: 0.9999994324914998, iteration: 261729
loss: 1.0103427171707153,grad_norm: 0.981909672199726, iteration: 261730
loss: 1.017410159111023,grad_norm: 0.9999991119930524, iteration: 261731
loss: 0.9945889711380005,grad_norm: 0.9393356681208007, iteration: 261732
loss: 0.9852449297904968,grad_norm: 0.9217336281801254, iteration: 261733
loss: 1.0355826616287231,grad_norm: 0.9186485727156201, iteration: 261734
loss: 1.0050712823867798,grad_norm: 0.8320855638514427, iteration: 261735
loss: 1.000720739364624,grad_norm: 0.863408012701273, iteration: 261736
loss: 0.994105339050293,grad_norm: 0.9567074023929708, iteration: 261737
loss: 0.9711737036705017,grad_norm: 0.9163200652334168, iteration: 261738
loss: 0.9821152091026306,grad_norm: 0.8998143634640571, iteration: 261739
loss: 1.0591132640838623,grad_norm: 0.8700947068804994, iteration: 261740
loss: 1.0029618740081787,grad_norm: 0.9172129068820895, iteration: 261741
loss: 1.0105904340744019,grad_norm: 0.9828460142172675, iteration: 261742
loss: 1.0257322788238525,grad_norm: 0.7562491883815797, iteration: 261743
loss: 1.0349522829055786,grad_norm: 0.9999990219665124, iteration: 261744
loss: 0.9721505641937256,grad_norm: 0.7384868219088295, iteration: 261745
loss: 1.030519962310791,grad_norm: 0.8228532215976948, iteration: 261746
loss: 1.0184394121170044,grad_norm: 0.8958350181940553, iteration: 261747
loss: 0.996090292930603,grad_norm: 0.7767711105274504, iteration: 261748
loss: 1.0004078149795532,grad_norm: 0.8058594023929598, iteration: 261749
loss: 0.9775800108909607,grad_norm: 0.9302982739669907, iteration: 261750
loss: 1.0295521020889282,grad_norm: 0.9999999086093514, iteration: 261751
loss: 1.002999186515808,grad_norm: 0.9090833904685975, iteration: 261752
loss: 1.0141725540161133,grad_norm: 0.9999990651970575, iteration: 261753
loss: 1.0284565687179565,grad_norm: 0.9281807214123897, iteration: 261754
loss: 0.9882427453994751,grad_norm: 0.8521394580888204, iteration: 261755
loss: 1.004384160041809,grad_norm: 0.9839340281940887, iteration: 261756
loss: 1.032493233680725,grad_norm: 0.9999997007366156, iteration: 261757
loss: 0.9897506237030029,grad_norm: 0.7845487293005443, iteration: 261758
loss: 1.0022239685058594,grad_norm: 0.8559515953948513, iteration: 261759
loss: 0.996718168258667,grad_norm: 0.8248039188710826, iteration: 261760
loss: 1.009892225265503,grad_norm: 0.7780939218819332, iteration: 261761
loss: 1.0090309381484985,grad_norm: 0.8079196316751622, iteration: 261762
loss: 1.0063700675964355,grad_norm: 0.905432247467745, iteration: 261763
loss: 1.016038179397583,grad_norm: 0.9999990515128134, iteration: 261764
loss: 1.0061454772949219,grad_norm: 0.9915693134590987, iteration: 261765
loss: 0.9901931285858154,grad_norm: 0.842296500934729, iteration: 261766
loss: 0.9989349246025085,grad_norm: 0.8115023820245274, iteration: 261767
loss: 1.0029420852661133,grad_norm: 0.9006873257188458, iteration: 261768
loss: 0.9985010623931885,grad_norm: 0.8505654803172976, iteration: 261769
loss: 1.0224231481552124,grad_norm: 0.9999992147492294, iteration: 261770
loss: 1.0111992359161377,grad_norm: 0.9109970874143237, iteration: 261771
loss: 1.010866403579712,grad_norm: 0.9999991645585157, iteration: 261772
loss: 1.028846263885498,grad_norm: 0.9999988501281671, iteration: 261773
loss: 1.0056437253952026,grad_norm: 0.9934129926592404, iteration: 261774
loss: 1.022663950920105,grad_norm: 0.9999992997265368, iteration: 261775
loss: 0.9972132444381714,grad_norm: 0.9999990703802701, iteration: 261776
loss: 0.9646707773208618,grad_norm: 0.8452830071408806, iteration: 261777
loss: 0.9676463007926941,grad_norm: 0.999999166832403, iteration: 261778
loss: 1.0022610425949097,grad_norm: 0.9073200530023892, iteration: 261779
loss: 0.9919057488441467,grad_norm: 0.880918154726106, iteration: 261780
loss: 1.0147366523742676,grad_norm: 0.9074217606017791, iteration: 261781
loss: 0.9886292219161987,grad_norm: 0.9018814817633423, iteration: 261782
loss: 1.0096794366836548,grad_norm: 0.9999995824316408, iteration: 261783
loss: 1.0138740539550781,grad_norm: 0.8446430792155318, iteration: 261784
loss: 0.9760615825653076,grad_norm: 0.9549866978905344, iteration: 261785
loss: 0.9926607012748718,grad_norm: 0.8720599622023896, iteration: 261786
loss: 1.0083703994750977,grad_norm: 0.9999991598507593, iteration: 261787
loss: 1.0178121328353882,grad_norm: 0.8702864776577021, iteration: 261788
loss: 0.9995031356811523,grad_norm: 0.8294033953758576, iteration: 261789
loss: 1.0141596794128418,grad_norm: 0.9268598707125916, iteration: 261790
loss: 1.0122544765472412,grad_norm: 0.9900233879200829, iteration: 261791
loss: 0.9929128289222717,grad_norm: 0.9748325684602858, iteration: 261792
loss: 0.9872824549674988,grad_norm: 0.7759805484534574, iteration: 261793
loss: 1.0017306804656982,grad_norm: 0.999999217133012, iteration: 261794
loss: 1.040629506111145,grad_norm: 0.7745563602006476, iteration: 261795
loss: 1.0247470140457153,grad_norm: 0.8291512971673898, iteration: 261796
loss: 0.9779267907142639,grad_norm: 0.9163736510724023, iteration: 261797
loss: 0.9628366231918335,grad_norm: 0.8104141660127123, iteration: 261798
loss: 1.0130317211151123,grad_norm: 0.946588184791906, iteration: 261799
loss: 0.9674777984619141,grad_norm: 0.9999989927743669, iteration: 261800
loss: 0.9972450137138367,grad_norm: 0.9999991338492509, iteration: 261801
loss: 0.9926846027374268,grad_norm: 0.9999990642451503, iteration: 261802
loss: 1.0085924863815308,grad_norm: 0.9999991409971993, iteration: 261803
loss: 0.9930576086044312,grad_norm: 0.9083002140052129, iteration: 261804
loss: 0.9962027668952942,grad_norm: 0.9999990956648174, iteration: 261805
loss: 0.9772561192512512,grad_norm: 0.9167880551600857, iteration: 261806
loss: 0.9649844169616699,grad_norm: 0.9189229491070166, iteration: 261807
loss: 1.0103777647018433,grad_norm: 0.8875261361832686, iteration: 261808
loss: 1.0435585975646973,grad_norm: 0.8635353648551178, iteration: 261809
loss: 1.067271113395691,grad_norm: 0.9999993564970815, iteration: 261810
loss: 0.9800610542297363,grad_norm: 0.9717853447007526, iteration: 261811
loss: 1.0310219526290894,grad_norm: 0.8873998855090421, iteration: 261812
loss: 0.9857353568077087,grad_norm: 0.9565824262897415, iteration: 261813
loss: 1.0087745189666748,grad_norm: 0.8307975615600409, iteration: 261814
loss: 1.0689001083374023,grad_norm: 0.9999998594650742, iteration: 261815
loss: 1.0240936279296875,grad_norm: 0.8778609070942414, iteration: 261816
loss: 1.0003219842910767,grad_norm: 0.8621189237219492, iteration: 261817
loss: 1.0039154291152954,grad_norm: 0.9999994083167221, iteration: 261818
loss: 1.0434443950653076,grad_norm: 0.7725750069455403, iteration: 261819
loss: 0.9927114248275757,grad_norm: 0.9218529296034712, iteration: 261820
loss: 1.0156794786453247,grad_norm: 0.9999991114285781, iteration: 261821
loss: 0.994370698928833,grad_norm: 0.8964914501664987, iteration: 261822
loss: 1.1141088008880615,grad_norm: 0.9999996440280926, iteration: 261823
loss: 0.9962607622146606,grad_norm: 0.8304371817298091, iteration: 261824
loss: 0.949321448802948,grad_norm: 0.9708819529593421, iteration: 261825
loss: 0.996408224105835,grad_norm: 0.9875166417667505, iteration: 261826
loss: 0.971211314201355,grad_norm: 0.9114777692506507, iteration: 261827
loss: 0.9949647784233093,grad_norm: 0.9999997530981533, iteration: 261828
loss: 1.0086582899093628,grad_norm: 0.8591946142465297, iteration: 261829
loss: 1.019066572189331,grad_norm: 0.8248786708617436, iteration: 261830
loss: 0.9598575234413147,grad_norm: 0.8812639999757081, iteration: 261831
loss: 0.9562593698501587,grad_norm: 0.9891441918049793, iteration: 261832
loss: 0.98573237657547,grad_norm: 0.9999997562716576, iteration: 261833
loss: 1.0062075853347778,grad_norm: 0.9133541493516175, iteration: 261834
loss: 0.9896733164787292,grad_norm: 0.9999993106609351, iteration: 261835
loss: 0.9949706196784973,grad_norm: 0.8809481996803319, iteration: 261836
loss: 0.9883162379264832,grad_norm: 0.9560601038088615, iteration: 261837
loss: 0.982621967792511,grad_norm: 0.8580142514222725, iteration: 261838
loss: 0.9606059789657593,grad_norm: 0.9999990813892022, iteration: 261839
loss: 0.9932898283004761,grad_norm: 0.9289716617563475, iteration: 261840
loss: 1.0634348392486572,grad_norm: 0.999999734706118, iteration: 261841
loss: 1.008093237876892,grad_norm: 0.7899776993736842, iteration: 261842
loss: 0.9963585734367371,grad_norm: 0.9999992797788374, iteration: 261843
loss: 1.0845615863800049,grad_norm: 0.9999996011219198, iteration: 261844
loss: 0.9625393152236938,grad_norm: 0.9701146642116234, iteration: 261845
loss: 0.9969269633293152,grad_norm: 0.7258059292207638, iteration: 261846
loss: 1.018225073814392,grad_norm: 0.9366465587151055, iteration: 261847
loss: 1.0160419940948486,grad_norm: 0.8630085460995511, iteration: 261848
loss: 1.0143163204193115,grad_norm: 0.8806933446116293, iteration: 261849
loss: 0.989683210849762,grad_norm: 0.9527025387363086, iteration: 261850
loss: 1.03485107421875,grad_norm: 0.9999997700561409, iteration: 261851
loss: 0.9703285694122314,grad_norm: 0.8242381465792431, iteration: 261852
loss: 0.9684149622917175,grad_norm: 0.839627871451245, iteration: 261853
loss: 0.9976651072502136,grad_norm: 0.845083921361763, iteration: 261854
loss: 0.9925609827041626,grad_norm: 0.9892227842555857, iteration: 261855
loss: 1.0014879703521729,grad_norm: 0.7892711220902774, iteration: 261856
loss: 0.9916026592254639,grad_norm: 0.9530378257310748, iteration: 261857
loss: 0.9862802624702454,grad_norm: 0.8401728917297719, iteration: 261858
loss: 0.9987072348594666,grad_norm: 0.7915739850549395, iteration: 261859
loss: 1.012587070465088,grad_norm: 0.9999990332961136, iteration: 261860
loss: 1.0073184967041016,grad_norm: 0.7620437216396877, iteration: 261861
loss: 1.0023967027664185,grad_norm: 0.9063547506997953, iteration: 261862
loss: 0.9897148013114929,grad_norm: 0.7505233851204621, iteration: 261863
loss: 0.9952338337898254,grad_norm: 0.822910431117454, iteration: 261864
loss: 0.9983759522438049,grad_norm: 0.9999991839067373, iteration: 261865
loss: 1.1911581754684448,grad_norm: 0.9999997293629701, iteration: 261866
loss: 0.9922685027122498,grad_norm: 0.8080380765757068, iteration: 261867
loss: 1.0028963088989258,grad_norm: 0.8352052511574013, iteration: 261868
loss: 0.9909282922744751,grad_norm: 0.9888591332963446, iteration: 261869
loss: 0.9811288118362427,grad_norm: 0.8661486367985195, iteration: 261870
loss: 1.1265149116516113,grad_norm: 0.999999699350165, iteration: 261871
loss: 1.0087708234786987,grad_norm: 0.9021795016120299, iteration: 261872
loss: 0.9733614325523376,grad_norm: 0.8676823444017099, iteration: 261873
loss: 1.010191798210144,grad_norm: 0.8838570877672984, iteration: 261874
loss: 1.026229977607727,grad_norm: 0.9825989536572407, iteration: 261875
loss: 1.0117392539978027,grad_norm: 0.8444707128470618, iteration: 261876
loss: 1.0339852571487427,grad_norm: 0.9999990074522845, iteration: 261877
loss: 1.0080838203430176,grad_norm: 0.9686264532777885, iteration: 261878
loss: 0.981789767742157,grad_norm: 0.9341763403132035, iteration: 261879
loss: 0.9870733618736267,grad_norm: 0.8935466856530415, iteration: 261880
loss: 0.956627607345581,grad_norm: 0.8921060508219522, iteration: 261881
loss: 0.9864383935928345,grad_norm: 0.999999036525973, iteration: 261882
loss: 1.089730143547058,grad_norm: 0.9999999878764602, iteration: 261883
loss: 0.9973347187042236,grad_norm: 0.9058446635578411, iteration: 261884
loss: 1.0177333354949951,grad_norm: 0.8419962038739818, iteration: 261885
loss: 0.9801105856895447,grad_norm: 0.7294087812998469, iteration: 261886
loss: 0.9980730414390564,grad_norm: 0.8083762996707524, iteration: 261887
loss: 0.9675933122634888,grad_norm: 0.7719337801730745, iteration: 261888
loss: 0.9964392185211182,grad_norm: 0.8533650180904303, iteration: 261889
loss: 0.9679347276687622,grad_norm: 0.8804517369144236, iteration: 261890
loss: 0.9670745134353638,grad_norm: 0.8720409500269544, iteration: 261891
loss: 0.9662632942199707,grad_norm: 0.8466590351242492, iteration: 261892
loss: 0.9913721680641174,grad_norm: 0.9334203817705858, iteration: 261893
loss: 1.0435768365859985,grad_norm: 0.9999990375792458, iteration: 261894
loss: 0.9714738726615906,grad_norm: 0.8739337199032466, iteration: 261895
loss: 0.998634934425354,grad_norm: 0.8704250464874191, iteration: 261896
loss: 1.0005778074264526,grad_norm: 0.7885722671549407, iteration: 261897
loss: 0.9803307056427002,grad_norm: 0.9668642152888851, iteration: 261898
loss: 0.9892427325248718,grad_norm: 0.7857005875002235, iteration: 261899
loss: 1.0233160257339478,grad_norm: 0.7719593062735166, iteration: 261900
loss: 1.0157713890075684,grad_norm: 0.9999991767506198, iteration: 261901
loss: 1.0031830072402954,grad_norm: 0.830413810676116, iteration: 261902
loss: 0.9966607689857483,grad_norm: 0.7890139912235248, iteration: 261903
loss: 1.025219440460205,grad_norm: 0.8529443857135813, iteration: 261904
loss: 1.015586018562317,grad_norm: 0.6737500930017515, iteration: 261905
loss: 1.00806725025177,grad_norm: 0.9999992193599001, iteration: 261906
loss: 0.9844492077827454,grad_norm: 0.8331832251875242, iteration: 261907
loss: 1.030086636543274,grad_norm: 0.986955318516654, iteration: 261908
loss: 0.9946117401123047,grad_norm: 0.8673679796390982, iteration: 261909
loss: 0.9881393313407898,grad_norm: 0.997720374057204, iteration: 261910
loss: 1.0270880460739136,grad_norm: 0.9999994988806532, iteration: 261911
loss: 1.0092275142669678,grad_norm: 0.9999991895376751, iteration: 261912
loss: 0.9650785326957703,grad_norm: 0.9964061443936572, iteration: 261913
loss: 1.0085244178771973,grad_norm: 0.789557765441855, iteration: 261914
loss: 1.0139693021774292,grad_norm: 0.8588275034518535, iteration: 261915
loss: 0.9972375631332397,grad_norm: 0.8422025437681562, iteration: 261916
loss: 1.0030957460403442,grad_norm: 0.7352973575594872, iteration: 261917
loss: 0.9912696480751038,grad_norm: 0.824037003436818, iteration: 261918
loss: 1.031795620918274,grad_norm: 0.9999992173060653, iteration: 261919
loss: 1.0215178728103638,grad_norm: 0.7694263092877779, iteration: 261920
loss: 1.0032455921173096,grad_norm: 0.999999492489789, iteration: 261921
loss: 1.0216130018234253,grad_norm: 0.9520548852378357, iteration: 261922
loss: 1.0179585218429565,grad_norm: 0.9948602044174335, iteration: 261923
loss: 1.0002497434616089,grad_norm: 0.9999991837704925, iteration: 261924
loss: 0.9700124263763428,grad_norm: 0.9267395354758463, iteration: 261925
loss: 0.9752805829048157,grad_norm: 0.999999154931077, iteration: 261926
loss: 1.0304324626922607,grad_norm: 0.9999991182488535, iteration: 261927
loss: 1.115914225578308,grad_norm: 0.9999996972336264, iteration: 261928
loss: 0.9507362842559814,grad_norm: 0.922255009231106, iteration: 261929
loss: 0.9226392507553101,grad_norm: 0.8984708632691999, iteration: 261930
loss: 0.9942022562026978,grad_norm: 0.9089029464474133, iteration: 261931
loss: 1.0027912855148315,grad_norm: 0.9816069141255966, iteration: 261932
loss: 0.9726552367210388,grad_norm: 0.9803588915773341, iteration: 261933
loss: 0.996204137802124,grad_norm: 0.919991118227653, iteration: 261934
loss: 1.034101128578186,grad_norm: 0.8475477372485556, iteration: 261935
loss: 1.0365543365478516,grad_norm: 0.9999992453998644, iteration: 261936
loss: 1.0410521030426025,grad_norm: 0.8278942660580658, iteration: 261937
loss: 0.9884477853775024,grad_norm: 0.8583440152946067, iteration: 261938
loss: 0.9867359399795532,grad_norm: 0.8514405183875883, iteration: 261939
loss: 1.0230920314788818,grad_norm: 0.9811949329744606, iteration: 261940
loss: 1.0367664098739624,grad_norm: 0.9999989376662044, iteration: 261941
loss: 0.9906803369522095,grad_norm: 0.9363375686877761, iteration: 261942
loss: 0.9867613911628723,grad_norm: 0.9999992071009289, iteration: 261943
loss: 1.0173945426940918,grad_norm: 0.9299200823768909, iteration: 261944
loss: 0.9777989387512207,grad_norm: 0.9999990508305977, iteration: 261945
loss: 0.9938510060310364,grad_norm: 0.9999991220181604, iteration: 261946
loss: 0.9871017932891846,grad_norm: 0.8468546557774981, iteration: 261947
loss: 0.9901044964790344,grad_norm: 0.9757311439805213, iteration: 261948
loss: 0.9898401498794556,grad_norm: 0.8847652526778581, iteration: 261949
loss: 1.011854648590088,grad_norm: 0.9999990986909341, iteration: 261950
loss: 1.038326621055603,grad_norm: 0.8507606450972681, iteration: 261951
loss: 0.9720078110694885,grad_norm: 0.8843178079457763, iteration: 261952
loss: 1.0252007246017456,grad_norm: 0.8998374717274303, iteration: 261953
loss: 1.0090038776397705,grad_norm: 0.8154605253530852, iteration: 261954
loss: 1.0390944480895996,grad_norm: 0.9308499608259949, iteration: 261955
loss: 0.9676720499992371,grad_norm: 0.7961074286071365, iteration: 261956
loss: 1.0289051532745361,grad_norm: 0.8929250682148002, iteration: 261957
loss: 1.0226157903671265,grad_norm: 0.8176959520090835, iteration: 261958
loss: 0.9878355264663696,grad_norm: 0.9013575436692151, iteration: 261959
loss: 1.0065418481826782,grad_norm: 0.9255199718456121, iteration: 261960
loss: 0.9951799511909485,grad_norm: 0.841195809179037, iteration: 261961
loss: 1.0535832643508911,grad_norm: 0.9999992015217466, iteration: 261962
loss: 0.9843283295631409,grad_norm: 0.8939081944963969, iteration: 261963
loss: 1.0081130266189575,grad_norm: 0.8464651163911037, iteration: 261964
loss: 0.9730093479156494,grad_norm: 0.9999990711381856, iteration: 261965
loss: 0.9806033968925476,grad_norm: 0.7895305410586839, iteration: 261966
loss: 1.0049017667770386,grad_norm: 0.7989197982476253, iteration: 261967
loss: 0.9917035698890686,grad_norm: 0.8588920359063751, iteration: 261968
loss: 0.9744067788124084,grad_norm: 0.959778817350849, iteration: 261969
loss: 1.000662088394165,grad_norm: 0.9222280865422849, iteration: 261970
loss: 0.9953451752662659,grad_norm: 0.9999995306892306, iteration: 261971
loss: 1.0167654752731323,grad_norm: 0.999999199513961, iteration: 261972
loss: 0.9745144844055176,grad_norm: 0.9279759143115155, iteration: 261973
loss: 1.0005823373794556,grad_norm: 0.8446299142002853, iteration: 261974
loss: 1.007638692855835,grad_norm: 0.8623522657162918, iteration: 261975
loss: 0.973322331905365,grad_norm: 0.7875241920178498, iteration: 261976
loss: 0.9732723832130432,grad_norm: 0.8689650601869247, iteration: 261977
loss: 1.0392314195632935,grad_norm: 0.9126550062890995, iteration: 261978
loss: 1.015299916267395,grad_norm: 0.8397660565623551, iteration: 261979
loss: 1.031672716140747,grad_norm: 0.9999994751352778, iteration: 261980
loss: 0.9690130949020386,grad_norm: 0.8135933345065406, iteration: 261981
loss: 1.0041171312332153,grad_norm: 0.8447588148250225, iteration: 261982
loss: 0.9891980290412903,grad_norm: 0.8981024672818229, iteration: 261983
loss: 0.9971129298210144,grad_norm: 0.8597386939261596, iteration: 261984
loss: 1.00948965549469,grad_norm: 0.9999991573226661, iteration: 261985
loss: 0.9769734144210815,grad_norm: 0.8352705935984156, iteration: 261986
loss: 1.0321788787841797,grad_norm: 0.999999148151298, iteration: 261987
loss: 0.9344233274459839,grad_norm: 0.9999992518885071, iteration: 261988
loss: 0.9969726800918579,grad_norm: 0.9999991374539237, iteration: 261989
loss: 0.9974128603935242,grad_norm: 0.9600257772436078, iteration: 261990
loss: 1.014801263809204,grad_norm: 0.999998903999595, iteration: 261991
loss: 1.0065332651138306,grad_norm: 0.9760563366908356, iteration: 261992
loss: 0.9777419567108154,grad_norm: 0.7062316929514165, iteration: 261993
loss: 1.008493185043335,grad_norm: 0.7900567729042846, iteration: 261994
loss: 1.0261099338531494,grad_norm: 0.8307068694787927, iteration: 261995
loss: 1.0038460493087769,grad_norm: 0.8190287937109207, iteration: 261996
loss: 0.975111186504364,grad_norm: 0.8201775964970259, iteration: 261997
loss: 0.988042950630188,grad_norm: 0.7757428351694649, iteration: 261998
loss: 0.9650056958198547,grad_norm: 0.9009871263373267, iteration: 261999
loss: 0.9415210485458374,grad_norm: 0.9077054247902813, iteration: 262000
loss: 0.9907598495483398,grad_norm: 0.9999991463760395, iteration: 262001
loss: 1.0006762742996216,grad_norm: 0.773766303656058, iteration: 262002
loss: 1.0330777168273926,grad_norm: 0.7910305944171748, iteration: 262003
loss: 0.9753002524375916,grad_norm: 0.7051975869941056, iteration: 262004
loss: 1.0068252086639404,grad_norm: 0.9229585607957005, iteration: 262005
loss: 0.9805042147636414,grad_norm: 0.8967674764331393, iteration: 262006
loss: 0.987416684627533,grad_norm: 0.9643017599930932, iteration: 262007
loss: 0.9954196214675903,grad_norm: 0.9999991193222645, iteration: 262008
loss: 0.9834362268447876,grad_norm: 0.9999991990198762, iteration: 262009
loss: 0.9875837564468384,grad_norm: 0.9600313966966718, iteration: 262010
loss: 1.0175700187683105,grad_norm: 0.9999990954901423, iteration: 262011
loss: 0.9856880903244019,grad_norm: 0.7834576482472111, iteration: 262012
loss: 1.0356380939483643,grad_norm: 0.9999991809567429, iteration: 262013
loss: 1.0076887607574463,grad_norm: 0.8609795414957486, iteration: 262014
loss: 0.997525691986084,grad_norm: 0.9978706744111138, iteration: 262015
loss: 0.9423160552978516,grad_norm: 0.8309408204462777, iteration: 262016
loss: 1.0182936191558838,grad_norm: 0.8488708901360824, iteration: 262017
loss: 0.9986674189567566,grad_norm: 0.9999991939620422, iteration: 262018
loss: 1.0310635566711426,grad_norm: 0.9999991833923955, iteration: 262019
loss: 0.9679731130599976,grad_norm: 0.9047361985593732, iteration: 262020
loss: 1.0074737071990967,grad_norm: 0.8896036685598546, iteration: 262021
loss: 0.9861062169075012,grad_norm: 0.9999992865899896, iteration: 262022
loss: 1.0089775323867798,grad_norm: 0.9335351816001608, iteration: 262023
loss: 0.9887490272521973,grad_norm: 0.9999990832598223, iteration: 262024
loss: 1.000264286994934,grad_norm: 0.9980394485612264, iteration: 262025
loss: 0.9325922727584839,grad_norm: 0.8615970392457627, iteration: 262026
loss: 1.0218499898910522,grad_norm: 0.9570399996414535, iteration: 262027
loss: 1.0244176387786865,grad_norm: 0.7710639357768069, iteration: 262028
loss: 1.0231901407241821,grad_norm: 0.8639732913723993, iteration: 262029
loss: 0.9949828386306763,grad_norm: 0.9565911168137994, iteration: 262030
loss: 0.991824209690094,grad_norm: 0.8630448902864584, iteration: 262031
loss: 1.0336579084396362,grad_norm: 0.8705052407061095, iteration: 262032
loss: 0.9626727104187012,grad_norm: 0.9071642341932991, iteration: 262033
loss: 1.019461750984192,grad_norm: 0.9748330940508928, iteration: 262034
loss: 1.0036497116088867,grad_norm: 0.8606403085901686, iteration: 262035
loss: 1.0104150772094727,grad_norm: 0.8476425780271415, iteration: 262036
loss: 0.9809333086013794,grad_norm: 0.8845100161885003, iteration: 262037
loss: 0.995342493057251,grad_norm: 0.7883068927048997, iteration: 262038
loss: 1.0445486307144165,grad_norm: 0.9642178793876214, iteration: 262039
loss: 1.0235936641693115,grad_norm: 0.9896311292296978, iteration: 262040
loss: 0.9896211624145508,grad_norm: 0.7419754944317376, iteration: 262041
loss: 1.0112093687057495,grad_norm: 0.8903012864380541, iteration: 262042
loss: 1.0107544660568237,grad_norm: 0.9702387492366018, iteration: 262043
loss: 1.0149781703948975,grad_norm: 0.9522368525642136, iteration: 262044
loss: 0.9912230968475342,grad_norm: 0.9078244887990073, iteration: 262045
loss: 0.9786732792854309,grad_norm: 0.8951600002106431, iteration: 262046
loss: 1.0242547988891602,grad_norm: 0.9999992982578878, iteration: 262047
loss: 1.0127456188201904,grad_norm: 0.7647943356369394, iteration: 262048
loss: 0.9735031127929688,grad_norm: 0.892727317381164, iteration: 262049
loss: 0.9726987481117249,grad_norm: 0.936931923521225, iteration: 262050
loss: 0.9944685101509094,grad_norm: 0.8731932636303944, iteration: 262051
loss: 0.9958965182304382,grad_norm: 0.9999991639233679, iteration: 262052
loss: 1.0103482007980347,grad_norm: 0.9999991394911818, iteration: 262053
loss: 0.9767131805419922,grad_norm: 0.9341962708811457, iteration: 262054
loss: 0.9769671559333801,grad_norm: 0.7974122280595412, iteration: 262055
loss: 1.040008306503296,grad_norm: 0.9322044512670671, iteration: 262056
loss: 0.9975050091743469,grad_norm: 0.8903672203409679, iteration: 262057
loss: 1.0286942720413208,grad_norm: 0.7829034912852006, iteration: 262058
loss: 0.9629436731338501,grad_norm: 0.8389521315263846, iteration: 262059
loss: 0.9988502860069275,grad_norm: 0.8286218166703443, iteration: 262060
loss: 0.9900231957435608,grad_norm: 0.8117608260319664, iteration: 262061
loss: 1.0053480863571167,grad_norm: 0.7647690873574984, iteration: 262062
loss: 1.0344372987747192,grad_norm: 0.9999991214798852, iteration: 262063
loss: 0.9643710851669312,grad_norm: 0.9827589245432312, iteration: 262064
loss: 1.0145567655563354,grad_norm: 0.999999049190516, iteration: 262065
loss: 0.9800269603729248,grad_norm: 0.8410679874761621, iteration: 262066
loss: 1.0299450159072876,grad_norm: 0.8457160140221819, iteration: 262067
loss: 0.9966744780540466,grad_norm: 0.9505408057888757, iteration: 262068
loss: 1.018332600593567,grad_norm: 0.8961039126745037, iteration: 262069
loss: 0.9779965281486511,grad_norm: 0.9421306327713953, iteration: 262070
loss: 1.001717448234558,grad_norm: 0.7823776090584323, iteration: 262071
loss: 0.9756290912628174,grad_norm: 0.8190743486788974, iteration: 262072
loss: 1.0155589580535889,grad_norm: 0.9276966746926802, iteration: 262073
loss: 1.0760096311569214,grad_norm: 0.999999167784243, iteration: 262074
loss: 1.083957314491272,grad_norm: 0.9045533456679898, iteration: 262075
loss: 0.9919201135635376,grad_norm: 0.8832851931902282, iteration: 262076
loss: 0.9548259973526001,grad_norm: 0.9324774250697371, iteration: 262077
loss: 1.002873182296753,grad_norm: 0.8353816391555621, iteration: 262078
loss: 1.015934705734253,grad_norm: 0.9999993028037149, iteration: 262079
loss: 1.0161755084991455,grad_norm: 0.9999992141986573, iteration: 262080
loss: 1.0200119018554688,grad_norm: 0.7176196292292477, iteration: 262081
loss: 0.9995970129966736,grad_norm: 0.9410764776098882, iteration: 262082
loss: 0.9729610085487366,grad_norm: 0.8773698816943319, iteration: 262083
loss: 1.0185446739196777,grad_norm: 0.8283241967156721, iteration: 262084
loss: 1.0252765417099,grad_norm: 0.8190681781379654, iteration: 262085
loss: 1.0029617547988892,grad_norm: 0.7728923110702601, iteration: 262086
loss: 0.9926417469978333,grad_norm: 0.9253024979758016, iteration: 262087
loss: 0.9934439659118652,grad_norm: 0.7873989884589854, iteration: 262088
loss: 0.9726680517196655,grad_norm: 0.9017992801382007, iteration: 262089
loss: 0.9778192043304443,grad_norm: 0.8978482274310429, iteration: 262090
loss: 0.9722680449485779,grad_norm: 0.9799065116204704, iteration: 262091
loss: 0.9846668243408203,grad_norm: 0.7643703384685262, iteration: 262092
loss: 1.0981197357177734,grad_norm: 0.8961679083088699, iteration: 262093
loss: 0.9940537810325623,grad_norm: 0.8726808101365062, iteration: 262094
loss: 0.975093424320221,grad_norm: 0.8991087583454505, iteration: 262095
loss: 0.9966168403625488,grad_norm: 0.9999991603765799, iteration: 262096
loss: 1.0042638778686523,grad_norm: 0.9699145833998782, iteration: 262097
loss: 0.9783854484558105,grad_norm: 0.8597344409868443, iteration: 262098
loss: 1.0305161476135254,grad_norm: 0.8734206560232469, iteration: 262099
loss: 0.9501226544380188,grad_norm: 0.9577896568814824, iteration: 262100
loss: 0.9889531135559082,grad_norm: 0.8737614364459565, iteration: 262101
loss: 0.9764159321784973,grad_norm: 0.8732199939384037, iteration: 262102
loss: 0.9971895813941956,grad_norm: 0.9999992301338004, iteration: 262103
loss: 0.9778628945350647,grad_norm: 0.8629448580698875, iteration: 262104
loss: 1.0217931270599365,grad_norm: 0.8077222936001671, iteration: 262105
loss: 0.983134388923645,grad_norm: 0.8064580847367843, iteration: 262106
loss: 1.0005426406860352,grad_norm: 0.9999998637959769, iteration: 262107
loss: 0.9851233959197998,grad_norm: 0.7314278597480618, iteration: 262108
loss: 0.9925107359886169,grad_norm: 0.8199492679580003, iteration: 262109
loss: 0.9844642877578735,grad_norm: 0.7234978836771546, iteration: 262110
loss: 0.9787226915359497,grad_norm: 0.8961968967917224, iteration: 262111
loss: 1.029325008392334,grad_norm: 0.734558564638715, iteration: 262112
loss: 1.0965259075164795,grad_norm: 1.0000000372256075, iteration: 262113
loss: 1.0022566318511963,grad_norm: 0.9999999699882091, iteration: 262114
loss: 1.005956768989563,grad_norm: 0.8050425681171496, iteration: 262115
loss: 1.0054295063018799,grad_norm: 0.8028522042781648, iteration: 262116
loss: 0.9892465472221375,grad_norm: 0.9127044372322194, iteration: 262117
loss: 1.0083789825439453,grad_norm: 0.7799640656104189, iteration: 262118
loss: 1.020728349685669,grad_norm: 0.8816530361176368, iteration: 262119
loss: 0.9831628799438477,grad_norm: 0.8018625401373481, iteration: 262120
loss: 0.9585468769073486,grad_norm: 0.8182780699757759, iteration: 262121
loss: 0.999877393245697,grad_norm: 0.9257942801781337, iteration: 262122
loss: 1.0193027257919312,grad_norm: 0.7761603513023372, iteration: 262123
loss: 1.036781907081604,grad_norm: 0.9766984664317829, iteration: 262124
loss: 0.9689855575561523,grad_norm: 0.9402953234489121, iteration: 262125
loss: 0.9821824431419373,grad_norm: 0.9959357560266511, iteration: 262126
loss: 0.9754431843757629,grad_norm: 0.9999990913453276, iteration: 262127
loss: 0.9904935956001282,grad_norm: 0.999999399655656, iteration: 262128
loss: 0.9925687909126282,grad_norm: 0.9824118737550831, iteration: 262129
loss: 0.9991770386695862,grad_norm: 0.7862221977154974, iteration: 262130
loss: 0.9981734752655029,grad_norm: 0.9999990295607323, iteration: 262131
loss: 1.0111008882522583,grad_norm: 0.9682492685410204, iteration: 262132
loss: 0.9767402410507202,grad_norm: 0.981924921488869, iteration: 262133
loss: 1.003372311592102,grad_norm: 0.9999992247704801, iteration: 262134
loss: 1.0057963132858276,grad_norm: 0.9999991937390446, iteration: 262135
loss: 0.9871121644973755,grad_norm: 0.7596917716040587, iteration: 262136
loss: 0.9620534777641296,grad_norm: 0.7889248228641121, iteration: 262137
loss: 0.9991535544395447,grad_norm: 0.7806134993232589, iteration: 262138
loss: 0.9995501637458801,grad_norm: 0.9095275775482452, iteration: 262139
loss: 1.0181689262390137,grad_norm: 0.9836500725757028, iteration: 262140
loss: 1.0147899389266968,grad_norm: 0.9952938040600883, iteration: 262141
loss: 0.9897584319114685,grad_norm: 0.8609307506223884, iteration: 262142
loss: 0.973362386226654,grad_norm: 0.9561839659610781, iteration: 262143
loss: 0.9917012453079224,grad_norm: 0.8979285907066176, iteration: 262144
loss: 1.0075770616531372,grad_norm: 0.9999989094992543, iteration: 262145
loss: 1.0121594667434692,grad_norm: 0.8626888514950928, iteration: 262146
loss: 0.980097234249115,grad_norm: 0.8450730678500259, iteration: 262147
loss: 1.0042845010757446,grad_norm: 0.9107319998886101, iteration: 262148
loss: 0.9584551453590393,grad_norm: 0.9999990320192731, iteration: 262149
loss: 0.981888473033905,grad_norm: 0.9158910673247386, iteration: 262150
loss: 1.0478352308273315,grad_norm: 0.981638468749869, iteration: 262151
loss: 0.9955704808235168,grad_norm: 0.8345402568077662, iteration: 262152
loss: 0.9939259886741638,grad_norm: 0.9305363409277316, iteration: 262153
loss: 0.992384135723114,grad_norm: 0.9288498598096402, iteration: 262154
loss: 0.9966394901275635,grad_norm: 0.8677438729101903, iteration: 262155
loss: 0.9936083555221558,grad_norm: 0.855887773240749, iteration: 262156
loss: 1.0292047262191772,grad_norm: 0.9999991268273694, iteration: 262157
loss: 0.9874325394630432,grad_norm: 0.9999993576574141, iteration: 262158
loss: 0.9753531813621521,grad_norm: 0.9984288223702881, iteration: 262159
loss: 0.9743874073028564,grad_norm: 0.9999990900275224, iteration: 262160
loss: 1.0045994520187378,grad_norm: 0.769254822091243, iteration: 262161
loss: 0.9880034327507019,grad_norm: 0.8764748485745979, iteration: 262162
loss: 1.0241730213165283,grad_norm: 0.7041927792483614, iteration: 262163
loss: 1.007637619972229,grad_norm: 0.9999992892618672, iteration: 262164
loss: 1.002353310585022,grad_norm: 0.8090506137798346, iteration: 262165
loss: 0.9935123920440674,grad_norm: 0.8266732231585707, iteration: 262166
loss: 1.0252785682678223,grad_norm: 0.8992001149548786, iteration: 262167
loss: 0.9877532720565796,grad_norm: 0.7821771427879729, iteration: 262168
loss: 1.0178881883621216,grad_norm: 0.8039801317186342, iteration: 262169
loss: 1.034936547279358,grad_norm: 0.9339069852470814, iteration: 262170
loss: 0.9768992066383362,grad_norm: 0.8367138704433158, iteration: 262171
loss: 0.9601379036903381,grad_norm: 0.8836592087749966, iteration: 262172
loss: 1.0022135972976685,grad_norm: 0.9999990056193913, iteration: 262173
loss: 0.9437386393547058,grad_norm: 0.9465696884085607, iteration: 262174
loss: 1.0030142068862915,grad_norm: 0.9999992165044651, iteration: 262175
loss: 0.9742069840431213,grad_norm: 0.9076722800244099, iteration: 262176
loss: 1.003360629081726,grad_norm: 0.8625401774557897, iteration: 262177
loss: 1.0056875944137573,grad_norm: 0.8986732193200251, iteration: 262178
loss: 0.9702224731445312,grad_norm: 0.7958149654717207, iteration: 262179
loss: 0.9794421792030334,grad_norm: 0.9212086184196931, iteration: 262180
loss: 1.015125036239624,grad_norm: 0.9999993460222358, iteration: 262181
loss: 0.9995664954185486,grad_norm: 0.8915106170333791, iteration: 262182
loss: 1.026979923248291,grad_norm: 0.7952251083848368, iteration: 262183
loss: 1.018257975578308,grad_norm: 0.9055938841445244, iteration: 262184
loss: 0.9775272607803345,grad_norm: 0.9999991469538883, iteration: 262185
loss: 1.0525145530700684,grad_norm: 0.9999999444903455, iteration: 262186
loss: 1.0080420970916748,grad_norm: 0.9999998236928059, iteration: 262187
loss: 1.0045690536499023,grad_norm: 0.9047081804543912, iteration: 262188
loss: 0.9943945407867432,grad_norm: 0.9999991210601029, iteration: 262189
loss: 0.9513275027275085,grad_norm: 0.8388140170694418, iteration: 262190
loss: 1.0118741989135742,grad_norm: 0.9006289965914003, iteration: 262191
loss: 1.0113316774368286,grad_norm: 0.9999990854724767, iteration: 262192
loss: 1.0051456689834595,grad_norm: 0.9217502843920148, iteration: 262193
loss: 0.9801583886146545,grad_norm: 0.9044855209498426, iteration: 262194
loss: 0.9777606725692749,grad_norm: 0.9895319436094645, iteration: 262195
loss: 1.0471713542938232,grad_norm: 0.8661393474638476, iteration: 262196
loss: 0.9687521457672119,grad_norm: 0.9999990165379165, iteration: 262197
loss: 1.011940360069275,grad_norm: 0.9382930846153681, iteration: 262198
loss: 0.9818292856216431,grad_norm: 0.9255315095280386, iteration: 262199
loss: 1.0189367532730103,grad_norm: 0.8849471571278702, iteration: 262200
loss: 1.0397038459777832,grad_norm: 0.9999998369778087, iteration: 262201
loss: 1.0247209072113037,grad_norm: 0.867397093188757, iteration: 262202
loss: 0.9594972133636475,grad_norm: 0.8854559423894991, iteration: 262203
loss: 0.9796024560928345,grad_norm: 0.8246958959379878, iteration: 262204
loss: 0.9873424768447876,grad_norm: 0.7772509026887819, iteration: 262205
loss: 0.9748397469520569,grad_norm: 0.7843157261698785, iteration: 262206
loss: 1.022913932800293,grad_norm: 0.9031257749405781, iteration: 262207
loss: 1.0239218473434448,grad_norm: 0.8178185313630447, iteration: 262208
loss: 1.009476900100708,grad_norm: 0.9121284902345141, iteration: 262209
loss: 1.0624405145645142,grad_norm: 0.9999991360447064, iteration: 262210
loss: 0.9721948504447937,grad_norm: 0.9834104844752596, iteration: 262211
loss: 0.9385043978691101,grad_norm: 0.8437330885427324, iteration: 262212
loss: 0.9849529266357422,grad_norm: 0.9999991581171763, iteration: 262213
loss: 1.0166031122207642,grad_norm: 0.9095322733586977, iteration: 262214
loss: 0.9789493083953857,grad_norm: 0.8916501260104664, iteration: 262215
loss: 0.9869544506072998,grad_norm: 0.9999990679980187, iteration: 262216
loss: 1.0268805027008057,grad_norm: 0.7874934948748146, iteration: 262217
loss: 0.9916577339172363,grad_norm: 0.9999990621184375, iteration: 262218
loss: 1.0362519025802612,grad_norm: 0.9495252955319932, iteration: 262219
loss: 1.0141557455062866,grad_norm: 0.9773828334778942, iteration: 262220
loss: 0.9926161170005798,grad_norm: 0.8340951730939078, iteration: 262221
loss: 0.9892933964729309,grad_norm: 0.9581479952925664, iteration: 262222
loss: 0.9863795638084412,grad_norm: 0.8035024182016066, iteration: 262223
loss: 1.0122262239456177,grad_norm: 0.9334633626035014, iteration: 262224
loss: 1.0027717351913452,grad_norm: 0.7575009293963221, iteration: 262225
loss: 1.0324536561965942,grad_norm: 0.99999899970777, iteration: 262226
loss: 0.9919098615646362,grad_norm: 0.8680156070709536, iteration: 262227
loss: 0.9869116544723511,grad_norm: 0.8775111206736916, iteration: 262228
loss: 0.9977532029151917,grad_norm: 0.7777682277273231, iteration: 262229
loss: 1.014908790588379,grad_norm: 0.9999991533655119, iteration: 262230
loss: 1.0241050720214844,grad_norm: 0.9366676053842387, iteration: 262231
loss: 0.9804674386978149,grad_norm: 0.810110047883204, iteration: 262232
loss: 0.9631684422492981,grad_norm: 0.9588909269803749, iteration: 262233
loss: 0.9401847720146179,grad_norm: 0.9999990323823671, iteration: 262234
loss: 1.0176444053649902,grad_norm: 0.9176295332973382, iteration: 262235
loss: 1.0280364751815796,grad_norm: 0.9999989701313624, iteration: 262236
loss: 0.9819223284721375,grad_norm: 0.8245641649084038, iteration: 262237
loss: 1.0625617504119873,grad_norm: 0.9552053743291866, iteration: 262238
loss: 0.9934921264648438,grad_norm: 0.819297790409021, iteration: 262239
loss: 1.0282981395721436,grad_norm: 0.9268201105097732, iteration: 262240
loss: 0.9978544116020203,grad_norm: 0.999999263425663, iteration: 262241
loss: 1.015633463859558,grad_norm: 0.9152302736337141, iteration: 262242
loss: 1.0136338472366333,grad_norm: 0.9019417477105651, iteration: 262243
loss: 0.9906510710716248,grad_norm: 0.9047905790347951, iteration: 262244
loss: 1.0004311800003052,grad_norm: 0.9999994067511625, iteration: 262245
loss: 1.0239044427871704,grad_norm: 0.9866500693218275, iteration: 262246
loss: 1.0097503662109375,grad_norm: 0.9979193193737385, iteration: 262247
loss: 0.9798954129219055,grad_norm: 0.9944906546099431, iteration: 262248
loss: 0.9904985427856445,grad_norm: 0.9311880069670116, iteration: 262249
loss: 0.9989432096481323,grad_norm: 0.9999991315241262, iteration: 262250
loss: 1.0090785026550293,grad_norm: 0.9999992016284562, iteration: 262251
loss: 1.0145399570465088,grad_norm: 0.8655283723871002, iteration: 262252
loss: 1.0053094625473022,grad_norm: 0.8681232972983778, iteration: 262253
loss: 1.0013872385025024,grad_norm: 0.9999991184966118, iteration: 262254
loss: 0.9889237284660339,grad_norm: 0.7443571167677664, iteration: 262255
loss: 0.9593440294265747,grad_norm: 0.8197009448358954, iteration: 262256
loss: 1.0119383335113525,grad_norm: 0.8340789451680588, iteration: 262257
loss: 1.024824619293213,grad_norm: 0.9999990857475157, iteration: 262258
loss: 1.002137303352356,grad_norm: 0.8235396507243005, iteration: 262259
loss: 0.9886795878410339,grad_norm: 0.9150686326040692, iteration: 262260
loss: 1.007404088973999,grad_norm: 0.8788998776260144, iteration: 262261
loss: 1.0278663635253906,grad_norm: 0.8392794877319177, iteration: 262262
loss: 1.0093797445297241,grad_norm: 0.9176867124736237, iteration: 262263
loss: 0.9839658737182617,grad_norm: 0.8040683100367217, iteration: 262264
loss: 0.9771537780761719,grad_norm: 0.7960006630237854, iteration: 262265
loss: 0.9877603650093079,grad_norm: 0.9145718495077763, iteration: 262266
loss: 0.9657129645347595,grad_norm: 0.8277999855215036, iteration: 262267
loss: 1.0267984867095947,grad_norm: 0.9355402433135037, iteration: 262268
loss: 0.9854975938796997,grad_norm: 0.9483270595878901, iteration: 262269
loss: 1.0094873905181885,grad_norm: 0.7965907396918541, iteration: 262270
loss: 0.9573279023170471,grad_norm: 0.8606059930271056, iteration: 262271
loss: 0.9967718124389648,grad_norm: 0.9845395607883176, iteration: 262272
loss: 0.9941926598548889,grad_norm: 0.737371114260635, iteration: 262273
loss: 1.007545828819275,grad_norm: 0.9194166564241902, iteration: 262274
loss: 1.0124170780181885,grad_norm: 0.6742213102251536, iteration: 262275
loss: 1.0231781005859375,grad_norm: 0.8876589751925764, iteration: 262276
loss: 0.9956969618797302,grad_norm: 0.9067714041488184, iteration: 262277
loss: 0.93105149269104,grad_norm: 0.8944536898043534, iteration: 262278
loss: 1.0081616640090942,grad_norm: 0.8552798909746592, iteration: 262279
loss: 0.9999523758888245,grad_norm: 0.7909964207943134, iteration: 262280
loss: 1.0491626262664795,grad_norm: 0.9267615008036394, iteration: 262281
loss: 1.0320134162902832,grad_norm: 0.9999991635449629, iteration: 262282
loss: 0.9913129806518555,grad_norm: 0.9830879679968435, iteration: 262283
loss: 0.9986434578895569,grad_norm: 0.8828994504209309, iteration: 262284
loss: 0.9795392155647278,grad_norm: 0.994110708708193, iteration: 262285
loss: 1.0216245651245117,grad_norm: 0.921266525877319, iteration: 262286
loss: 1.0542218685150146,grad_norm: 0.9097729862330786, iteration: 262287
loss: 0.9965493679046631,grad_norm: 0.7757460232969035, iteration: 262288
loss: 0.9724019169807434,grad_norm: 0.890447400890316, iteration: 262289
loss: 1.0217399597167969,grad_norm: 0.829689095109371, iteration: 262290
loss: 0.9492975473403931,grad_norm: 0.8859141083267181, iteration: 262291
loss: 1.0094146728515625,grad_norm: 0.9438039867123902, iteration: 262292
loss: 1.0238651037216187,grad_norm: 0.6970330755076803, iteration: 262293
loss: 1.0048640966415405,grad_norm: 0.8765808545383321, iteration: 262294
loss: 1.0259381532669067,grad_norm: 0.9999998184249608, iteration: 262295
loss: 0.9802406430244446,grad_norm: 0.8791797554673316, iteration: 262296
loss: 0.9596651792526245,grad_norm: 0.9058000801417666, iteration: 262297
loss: 1.006086826324463,grad_norm: 0.7052196594341259, iteration: 262298
loss: 0.9952715039253235,grad_norm: 0.8909021379626243, iteration: 262299
loss: 1.0036194324493408,grad_norm: 0.8620291505469574, iteration: 262300
loss: 1.0230703353881836,grad_norm: 0.9159017926728924, iteration: 262301
loss: 0.9868587255477905,grad_norm: 0.7799468245025046, iteration: 262302
loss: 1.0602717399597168,grad_norm: 0.9999994860316487, iteration: 262303
loss: 1.0092172622680664,grad_norm: 0.9187206139220898, iteration: 262304
loss: 1.0183693170547485,grad_norm: 0.9723529972730205, iteration: 262305
loss: 0.9923632144927979,grad_norm: 0.7962914537966536, iteration: 262306
loss: 1.0533382892608643,grad_norm: 0.8344504350930496, iteration: 262307
loss: 1.0095733404159546,grad_norm: 0.7850430620248346, iteration: 262308
loss: 0.9938406348228455,grad_norm: 0.9636808578387768, iteration: 262309
loss: 0.9976378083229065,grad_norm: 0.9999992035111535, iteration: 262310
loss: 0.9789384603500366,grad_norm: 0.7721046691504079, iteration: 262311
loss: 0.9726868271827698,grad_norm: 0.7791123047751127, iteration: 262312
loss: 1.0201011896133423,grad_norm: 0.8523039182352605, iteration: 262313
loss: 0.9852433800697327,grad_norm: 0.8383268323478476, iteration: 262314
loss: 0.9551352858543396,grad_norm: 0.9999990633361597, iteration: 262315
loss: 0.9747068285942078,grad_norm: 0.9818175951175449, iteration: 262316
loss: 0.993740439414978,grad_norm: 0.9003265508830314, iteration: 262317
loss: 1.0015952587127686,grad_norm: 0.8375043785721605, iteration: 262318
loss: 1.0083613395690918,grad_norm: 0.9095852388388234, iteration: 262319
loss: 1.0247641801834106,grad_norm: 0.9999992522556821, iteration: 262320
loss: 0.967244565486908,grad_norm: 0.7645863512460387, iteration: 262321
loss: 1.0623159408569336,grad_norm: 0.999999392138425, iteration: 262322
loss: 0.9962640404701233,grad_norm: 0.8946795067606047, iteration: 262323
loss: 0.9714339971542358,grad_norm: 0.8608220644529909, iteration: 262324
loss: 0.9627383351325989,grad_norm: 0.9161118959696182, iteration: 262325
loss: 0.9900450706481934,grad_norm: 0.9999991885749099, iteration: 262326
loss: 1.0102697610855103,grad_norm: 0.8512969118607566, iteration: 262327
loss: 1.0298891067504883,grad_norm: 0.9999991245837986, iteration: 262328
loss: 0.9827345013618469,grad_norm: 0.8967993523750944, iteration: 262329
loss: 1.0607519149780273,grad_norm: 0.999999076868955, iteration: 262330
loss: 1.0360978841781616,grad_norm: 0.9999990384642578, iteration: 262331
loss: 1.0235369205474854,grad_norm: 0.8227334638492562, iteration: 262332
loss: 0.9548638463020325,grad_norm: 0.7349062475383162, iteration: 262333
loss: 0.9920322895050049,grad_norm: 0.8472755657814911, iteration: 262334
loss: 1.0018339157104492,grad_norm: 0.9999990792087647, iteration: 262335
loss: 1.025896668434143,grad_norm: 0.9599109187768572, iteration: 262336
loss: 1.0423107147216797,grad_norm: 0.7853004130770543, iteration: 262337
loss: 0.9984503388404846,grad_norm: 0.8370014317543242, iteration: 262338
loss: 0.9879130721092224,grad_norm: 0.8756431456420317, iteration: 262339
loss: 1.0378669500350952,grad_norm: 0.999999845640133, iteration: 262340
loss: 1.0262309312820435,grad_norm: 0.8941209282837539, iteration: 262341
loss: 0.9864113330841064,grad_norm: 0.945719072275817, iteration: 262342
loss: 1.0321722030639648,grad_norm: 0.9840968118617359, iteration: 262343
loss: 0.9499621987342834,grad_norm: 0.8599772409773139, iteration: 262344
loss: 1.0182033777236938,grad_norm: 0.8590978851524175, iteration: 262345
loss: 0.9866787195205688,grad_norm: 0.8731385647054806, iteration: 262346
loss: 1.0284186601638794,grad_norm: 0.9999990751710078, iteration: 262347
loss: 0.9894163012504578,grad_norm: 0.8127934484790879, iteration: 262348
loss: 0.992936909198761,grad_norm: 0.91819358649922, iteration: 262349
loss: 0.9782961010932922,grad_norm: 0.9782918402809037, iteration: 262350
loss: 0.9573154449462891,grad_norm: 0.9754180818109569, iteration: 262351
loss: 0.9598858952522278,grad_norm: 0.8679674692664772, iteration: 262352
loss: 1.0162707567214966,grad_norm: 0.828520555161804, iteration: 262353
loss: 0.9969928860664368,grad_norm: 0.9999990933305231, iteration: 262354
loss: 1.0049546957015991,grad_norm: 0.882505094468453, iteration: 262355
loss: 1.010846495628357,grad_norm: 0.7906247287603833, iteration: 262356
loss: 0.9778297543525696,grad_norm: 0.7375814105323331, iteration: 262357
loss: 1.028016448020935,grad_norm: 0.999999163640895, iteration: 262358
loss: 0.9806137681007385,grad_norm: 0.9840997455815204, iteration: 262359
loss: 1.0026546716690063,grad_norm: 0.9162910413047646, iteration: 262360
loss: 1.111238956451416,grad_norm: 0.9999996113786968, iteration: 262361
loss: 0.975615918636322,grad_norm: 0.9999990777880957, iteration: 262362
loss: 1.0125675201416016,grad_norm: 0.8698160627431333, iteration: 262363
loss: 1.0079652070999146,grad_norm: 0.8140883120481611, iteration: 262364
loss: 0.9820869565010071,grad_norm: 0.8549047154171691, iteration: 262365
loss: 1.1009914875030518,grad_norm: 0.9999994703912143, iteration: 262366
loss: 0.9975350499153137,grad_norm: 0.901665008000472, iteration: 262367
loss: 0.9854509234428406,grad_norm: 0.9016957865016715, iteration: 262368
loss: 0.9819425344467163,grad_norm: 0.8382080846945631, iteration: 262369
loss: 0.9709882736206055,grad_norm: 0.9999991283785524, iteration: 262370
loss: 1.0003679990768433,grad_norm: 0.801452479032373, iteration: 262371
loss: 1.0228567123413086,grad_norm: 0.999999166284232, iteration: 262372
loss: 0.9711716175079346,grad_norm: 0.9650419093958923, iteration: 262373
loss: 1.0062611103057861,grad_norm: 0.9867621403043775, iteration: 262374
loss: 0.9680812358856201,grad_norm: 0.9999990098319981, iteration: 262375
loss: 1.0099345445632935,grad_norm: 0.800799952433865, iteration: 262376
loss: 1.0105994939804077,grad_norm: 0.9999989546951669, iteration: 262377
loss: 1.005974531173706,grad_norm: 0.8992269332313233, iteration: 262378
loss: 1.0473681688308716,grad_norm: 0.8862391306113739, iteration: 262379
loss: 1.00676429271698,grad_norm: 0.9247465437983513, iteration: 262380
loss: 1.0278406143188477,grad_norm: 0.9067429137623721, iteration: 262381
loss: 1.002634048461914,grad_norm: 0.7996672101924297, iteration: 262382
loss: 1.0130820274353027,grad_norm: 0.999999035758104, iteration: 262383
loss: 1.0021029710769653,grad_norm: 0.8896181638206906, iteration: 262384
loss: 0.9504291415214539,grad_norm: 0.9119786984231063, iteration: 262385
loss: 0.9567335844039917,grad_norm: 0.9175037604796538, iteration: 262386
loss: 1.0232855081558228,grad_norm: 0.999999098980376, iteration: 262387
loss: 1.1107906103134155,grad_norm: 0.9999995622036344, iteration: 262388
loss: 1.0296154022216797,grad_norm: 0.9099766471427122, iteration: 262389
loss: 0.9479671120643616,grad_norm: 0.9697341705088045, iteration: 262390
loss: 1.0061241388320923,grad_norm: 0.9989853361676914, iteration: 262391
loss: 1.0771756172180176,grad_norm: 0.9999990700460887, iteration: 262392
loss: 1.0507322549819946,grad_norm: 0.7857152402270423, iteration: 262393
loss: 0.9847297668457031,grad_norm: 0.8059470355531684, iteration: 262394
loss: 1.0055757761001587,grad_norm: 0.9763086377883228, iteration: 262395
loss: 0.987528920173645,grad_norm: 0.8632914998454695, iteration: 262396
loss: 0.9533933401107788,grad_norm: 0.8562559664507422, iteration: 262397
loss: 1.0361210107803345,grad_norm: 0.9999998573304039, iteration: 262398
loss: 1.0417462587356567,grad_norm: 0.9999992848500734, iteration: 262399
loss: 1.0007545948028564,grad_norm: 0.9999991138021235, iteration: 262400
loss: 0.9605226516723633,grad_norm: 0.8761250807892405, iteration: 262401
loss: 0.9966425895690918,grad_norm: 0.9252005495220235, iteration: 262402
loss: 1.0118731260299683,grad_norm: 0.7927677363642587, iteration: 262403
loss: 1.017417073249817,grad_norm: 0.6773903034478, iteration: 262404
loss: 1.0249451398849487,grad_norm: 0.9601206009900588, iteration: 262405
loss: 1.034793496131897,grad_norm: 0.9521645831620811, iteration: 262406
loss: 1.0141793489456177,grad_norm: 0.9999990158149997, iteration: 262407
loss: 1.0493499040603638,grad_norm: 0.7582027706630391, iteration: 262408
loss: 0.9852012991905212,grad_norm: 0.8211615292653437, iteration: 262409
loss: 1.0065534114837646,grad_norm: 0.9304515861393886, iteration: 262410
loss: 0.9590854644775391,grad_norm: 0.7943883954068546, iteration: 262411
loss: 0.9654208421707153,grad_norm: 0.8101653330186106, iteration: 262412
loss: 1.0119554996490479,grad_norm: 0.9999992056672568, iteration: 262413
loss: 0.9969489574432373,grad_norm: 0.9999990537191096, iteration: 262414
loss: 0.9657609462738037,grad_norm: 0.9382387877950538, iteration: 262415
loss: 1.0680824518203735,grad_norm: 0.9614151475859782, iteration: 262416
loss: 1.0062779188156128,grad_norm: 0.8430900617073339, iteration: 262417
loss: 1.033333659172058,grad_norm: 0.845185329544288, iteration: 262418
loss: 0.968811571598053,grad_norm: 0.9999991768370715, iteration: 262419
loss: 0.999840497970581,grad_norm: 0.9026536635549282, iteration: 262420
loss: 1.0014243125915527,grad_norm: 0.9999991233923872, iteration: 262421
loss: 0.987795352935791,grad_norm: 0.9063351072157891, iteration: 262422
loss: 1.0025880336761475,grad_norm: 0.9197255994977579, iteration: 262423
loss: 0.9475888013839722,grad_norm: 0.8974924961495997, iteration: 262424
loss: 1.0118037462234497,grad_norm: 0.9999990813995212, iteration: 262425
loss: 0.9960625767707825,grad_norm: 0.8470358627172487, iteration: 262426
loss: 0.9946934580802917,grad_norm: 0.9999992573861809, iteration: 262427
loss: 1.0159934759140015,grad_norm: 0.8915047229027638, iteration: 262428
loss: 1.023234486579895,grad_norm: 0.9999995377221118, iteration: 262429
loss: 1.0306161642074585,grad_norm: 0.9333043441182802, iteration: 262430
loss: 0.9862778782844543,grad_norm: 0.6322358321200251, iteration: 262431
loss: 1.0208708047866821,grad_norm: 0.9751260995601592, iteration: 262432
loss: 1.0618267059326172,grad_norm: 0.9999994834993967, iteration: 262433
loss: 1.0208702087402344,grad_norm: 0.8714156886966103, iteration: 262434
loss: 0.9756519198417664,grad_norm: 0.8544365391551574, iteration: 262435
loss: 1.0119246244430542,grad_norm: 0.8692183288607571, iteration: 262436
loss: 1.021401286125183,grad_norm: 0.9999998061062078, iteration: 262437
loss: 0.9943749904632568,grad_norm: 0.7749296907356883, iteration: 262438
loss: 1.0055177211761475,grad_norm: 0.9999995491088202, iteration: 262439
loss: 1.01438570022583,grad_norm: 0.9999991013967902, iteration: 262440
loss: 0.9989573359489441,grad_norm: 0.9275643398668386, iteration: 262441
loss: 1.018140196800232,grad_norm: 0.9823427794073226, iteration: 262442
loss: 1.0154213905334473,grad_norm: 0.9999991789598093, iteration: 262443
loss: 0.9885589480400085,grad_norm: 0.9999991523977264, iteration: 262444
loss: 0.9846287965774536,grad_norm: 0.873866770197063, iteration: 262445
loss: 0.9729230999946594,grad_norm: 0.8637928435308111, iteration: 262446
loss: 1.000013828277588,grad_norm: 0.8149795072916247, iteration: 262447
loss: 0.9910357594490051,grad_norm: 0.9999993500712802, iteration: 262448
loss: 0.9845714569091797,grad_norm: 0.999999005091414, iteration: 262449
loss: 0.94500732421875,grad_norm: 0.8153651849592458, iteration: 262450
loss: 1.001667857170105,grad_norm: 0.9999991060565806, iteration: 262451
loss: 1.0087060928344727,grad_norm: 0.8653665233914976, iteration: 262452
loss: 1.0081548690795898,grad_norm: 0.7256492405071041, iteration: 262453
loss: 1.0239254236221313,grad_norm: 0.9999993233953353, iteration: 262454
loss: 0.970933735370636,grad_norm: 0.9999990975285765, iteration: 262455
loss: 0.9828064441680908,grad_norm: 0.9053942978414398, iteration: 262456
loss: 0.9739161729812622,grad_norm: 0.7943400252973755, iteration: 262457
loss: 1.0145072937011719,grad_norm: 0.8376665433731251, iteration: 262458
loss: 0.9783567190170288,grad_norm: 0.8134266994557198, iteration: 262459
loss: 0.9925199151039124,grad_norm: 0.7595985770117715, iteration: 262460
loss: 0.975740373134613,grad_norm: 0.7958635700077168, iteration: 262461
loss: 0.9993025660514832,grad_norm: 0.845735226518444, iteration: 262462
loss: 0.9983997344970703,grad_norm: 0.883041542858536, iteration: 262463
loss: 0.993461012840271,grad_norm: 0.7558736252688607, iteration: 262464
loss: 0.9966806769371033,grad_norm: 0.8921649375472093, iteration: 262465
loss: 1.0083544254302979,grad_norm: 0.9999994173357328, iteration: 262466
loss: 0.9549756050109863,grad_norm: 0.7854059137525564, iteration: 262467
loss: 1.0064762830734253,grad_norm: 0.7894456636176216, iteration: 262468
loss: 0.9360377192497253,grad_norm: 0.944341402666559, iteration: 262469
loss: 1.0081660747528076,grad_norm: 0.8508052192826516, iteration: 262470
loss: 1.0315182209014893,grad_norm: 0.9999990944257607, iteration: 262471
loss: 0.9616886377334595,grad_norm: 0.7036564143222253, iteration: 262472
loss: 1.0367767810821533,grad_norm: 0.7646673868304917, iteration: 262473
loss: 0.999701976776123,grad_norm: 0.9939496422162899, iteration: 262474
loss: 1.0098145008087158,grad_norm: 0.9999992707744576, iteration: 262475
loss: 1.0240411758422852,grad_norm: 0.7710352223148078, iteration: 262476
loss: 1.028767466545105,grad_norm: 0.8132725716328194, iteration: 262477
loss: 0.9876123666763306,grad_norm: 0.9999990383466244, iteration: 262478
loss: 1.0022072792053223,grad_norm: 0.7923494274597499, iteration: 262479
loss: 1.0282350778579712,grad_norm: 0.8681297848497628, iteration: 262480
loss: 1.021829605102539,grad_norm: 0.8926267528810624, iteration: 262481
loss: 1.0066310167312622,grad_norm: 0.816568212510607, iteration: 262482
loss: 0.9772018790245056,grad_norm: 0.8731532277092598, iteration: 262483
loss: 1.0084633827209473,grad_norm: 0.7724455673585827, iteration: 262484
loss: 1.0140471458435059,grad_norm: 0.9046873513675883, iteration: 262485
loss: 1.0027283430099487,grad_norm: 0.8957862455751117, iteration: 262486
loss: 0.9950189590454102,grad_norm: 0.9609493004584869, iteration: 262487
loss: 1.0417143106460571,grad_norm: 0.9999996488435392, iteration: 262488
loss: 1.0248448848724365,grad_norm: 0.9999991734699533, iteration: 262489
loss: 0.9842627644538879,grad_norm: 0.9413007942234293, iteration: 262490
loss: 1.0138723850250244,grad_norm: 0.9999991370548221, iteration: 262491
loss: 1.0421777963638306,grad_norm: 0.9999998293802528, iteration: 262492
loss: 0.994169294834137,grad_norm: 0.7954487983254478, iteration: 262493
loss: 0.9787164926528931,grad_norm: 0.7993031302142537, iteration: 262494
loss: 0.9606100916862488,grad_norm: 0.873461185285407, iteration: 262495
loss: 0.9924196004867554,grad_norm: 0.9999998300325654, iteration: 262496
loss: 0.9258617758750916,grad_norm: 0.8546718411427519, iteration: 262497
loss: 0.9841620922088623,grad_norm: 0.9197544446031394, iteration: 262498
loss: 1.0103676319122314,grad_norm: 0.9448084302733015, iteration: 262499
loss: 0.9992479085922241,grad_norm: 0.9999992407166333, iteration: 262500
loss: 1.0437546968460083,grad_norm: 0.9301952061681081, iteration: 262501
loss: 1.0010277032852173,grad_norm: 0.9138138985632827, iteration: 262502
loss: 1.0213748216629028,grad_norm: 0.8977359416472849, iteration: 262503
loss: 0.9822121858596802,grad_norm: 0.9986390640186575, iteration: 262504
loss: 0.9718105792999268,grad_norm: 0.7115902018849691, iteration: 262505
loss: 0.981783926486969,grad_norm: 0.9999990583099573, iteration: 262506
loss: 1.0085177421569824,grad_norm: 0.9999991120208744, iteration: 262507
loss: 0.9752365946769714,grad_norm: 0.9136554801093244, iteration: 262508
loss: 0.9707097411155701,grad_norm: 0.9999990185197903, iteration: 262509
loss: 1.10616934299469,grad_norm: 0.930061534485722, iteration: 262510
loss: 0.9816631078720093,grad_norm: 0.9999991064202414, iteration: 262511
loss: 1.0239216089248657,grad_norm: 0.7812332158109848, iteration: 262512
loss: 1.0009115934371948,grad_norm: 0.7847864112188, iteration: 262513
loss: 1.019213080406189,grad_norm: 0.9999989619318602, iteration: 262514
loss: 0.9803158044815063,grad_norm: 0.8895776366334346, iteration: 262515
loss: 1.0267254114151,grad_norm: 0.8605968827663415, iteration: 262516
loss: 0.9747485518455505,grad_norm: 0.9999990976365056, iteration: 262517
loss: 0.9423062801361084,grad_norm: 0.7525973785049428, iteration: 262518
loss: 0.9903261065483093,grad_norm: 0.7746124324261807, iteration: 262519
loss: 1.001855731010437,grad_norm: 0.980723734368019, iteration: 262520
loss: 1.0024958848953247,grad_norm: 0.999999217150197, iteration: 262521
loss: 1.0024926662445068,grad_norm: 0.8341563810160745, iteration: 262522
loss: 1.0040913820266724,grad_norm: 0.9999992542014333, iteration: 262523
loss: 1.0209087133407593,grad_norm: 0.9195138025728551, iteration: 262524
loss: 1.0097745656967163,grad_norm: 0.9999990824969637, iteration: 262525
loss: 0.9922666549682617,grad_norm: 0.808267929165398, iteration: 262526
loss: 1.0191470384597778,grad_norm: 0.999999082322987, iteration: 262527
loss: 1.0258718729019165,grad_norm: 0.9327319243873625, iteration: 262528
loss: 0.9976846575737,grad_norm: 0.7486407336180425, iteration: 262529
loss: 1.0301042795181274,grad_norm: 0.9999993880672524, iteration: 262530
loss: 1.009710431098938,grad_norm: 0.8892089396437054, iteration: 262531
loss: 1.0342841148376465,grad_norm: 0.8608774003973101, iteration: 262532
loss: 1.0036745071411133,grad_norm: 0.9341771069251922, iteration: 262533
loss: 1.025510549545288,grad_norm: 0.7769521337332593, iteration: 262534
loss: 0.9996668696403503,grad_norm: 0.7740573980008026, iteration: 262535
loss: 1.0331467390060425,grad_norm: 0.9999996078552342, iteration: 262536
loss: 0.9799452424049377,grad_norm: 0.9257886477929719, iteration: 262537
loss: 1.0065202713012695,grad_norm: 0.7890049412026298, iteration: 262538
loss: 1.0563236474990845,grad_norm: 0.9999992613109705, iteration: 262539
loss: 1.0298782587051392,grad_norm: 0.8382725242445255, iteration: 262540
loss: 1.020385503768921,grad_norm: 0.8647008217905501, iteration: 262541
loss: 1.1035561561584473,grad_norm: 0.8383281841308808, iteration: 262542
loss: 1.029702067375183,grad_norm: 0.8880509539124233, iteration: 262543
loss: 1.0134820938110352,grad_norm: 0.8941117053130854, iteration: 262544
loss: 0.9936899542808533,grad_norm: 0.9729281177215319, iteration: 262545
loss: 1.0165876150131226,grad_norm: 0.9999991610221648, iteration: 262546
loss: 1.1496477127075195,grad_norm: 0.9999999093310327, iteration: 262547
loss: 0.9720685482025146,grad_norm: 0.966059805569913, iteration: 262548
loss: 1.0373080968856812,grad_norm: 0.9999990298932244, iteration: 262549
loss: 1.1317980289459229,grad_norm: 0.9073345232115684, iteration: 262550
loss: 1.0106998682022095,grad_norm: 0.8076257463063427, iteration: 262551
loss: 1.0195456743240356,grad_norm: 0.8808013884820188, iteration: 262552
loss: 1.0201133489608765,grad_norm: 0.9999993024174048, iteration: 262553
loss: 1.261435866355896,grad_norm: 0.9999997072092799, iteration: 262554
loss: 1.0470417737960815,grad_norm: 0.9999991229498233, iteration: 262555
loss: 1.0593631267547607,grad_norm: 0.8720747857782798, iteration: 262556
loss: 1.007466197013855,grad_norm: 0.9988699689747561, iteration: 262557
loss: 1.0412232875823975,grad_norm: 0.9999997482249869, iteration: 262558
loss: 0.9908010363578796,grad_norm: 0.8417792723694058, iteration: 262559
loss: 1.0081819295883179,grad_norm: 0.9008201481243118, iteration: 262560
loss: 0.9961704015731812,grad_norm: 0.9379027307223607, iteration: 262561
loss: 0.9614840745925903,grad_norm: 0.8506231214702282, iteration: 262562
loss: 1.0378137826919556,grad_norm: 0.9999990775544674, iteration: 262563
loss: 0.986856997013092,grad_norm: 0.85733015603844, iteration: 262564
loss: 1.0116641521453857,grad_norm: 0.9319481974325619, iteration: 262565
loss: 0.9776272773742676,grad_norm: 0.9392612808239156, iteration: 262566
loss: 1.0144686698913574,grad_norm: 0.8355694529822035, iteration: 262567
loss: 1.0482745170593262,grad_norm: 0.8036362978939093, iteration: 262568
loss: 0.9934902191162109,grad_norm: 0.999999159367524, iteration: 262569
loss: 0.9980501532554626,grad_norm: 0.9999990178016772, iteration: 262570
loss: 1.0518532991409302,grad_norm: 0.9999998091901473, iteration: 262571
loss: 0.9859879612922668,grad_norm: 0.9522273307471398, iteration: 262572
loss: 1.0140557289123535,grad_norm: 0.9534150559819373, iteration: 262573
loss: 0.9710976481437683,grad_norm: 0.9048404998565693, iteration: 262574
loss: 1.0032938718795776,grad_norm: 0.8739727231664011, iteration: 262575
loss: 0.984018862247467,grad_norm: 0.9344216716450511, iteration: 262576
loss: 1.014636516571045,grad_norm: 0.8692725503522828, iteration: 262577
loss: 0.9560217261314392,grad_norm: 0.8614108116835487, iteration: 262578
loss: 0.9949196577072144,grad_norm: 0.7462877891582442, iteration: 262579
loss: 1.0187891721725464,grad_norm: 0.95167869527247, iteration: 262580
loss: 1.0358734130859375,grad_norm: 0.9999991749515459, iteration: 262581
loss: 1.031360387802124,grad_norm: 0.9999991876537359, iteration: 262582
loss: 1.0014537572860718,grad_norm: 0.9304870550317365, iteration: 262583
loss: 1.0193880796432495,grad_norm: 0.9999999591278974, iteration: 262584
loss: 0.9716832041740417,grad_norm: 0.7856545334386215, iteration: 262585
loss: 0.9612306356430054,grad_norm: 0.8360342392420793, iteration: 262586
loss: 1.0099157094955444,grad_norm: 0.999999304000031, iteration: 262587
loss: 0.9851018786430359,grad_norm: 0.9922755433240602, iteration: 262588
loss: 1.017569661140442,grad_norm: 0.918180605760965, iteration: 262589
loss: 1.0172460079193115,grad_norm: 0.9061589655146697, iteration: 262590
loss: 1.0043545961380005,grad_norm: 0.9999990532964641, iteration: 262591
loss: 1.0146793127059937,grad_norm: 0.8228102056497267, iteration: 262592
loss: 0.958596408367157,grad_norm: 0.9020065392687819, iteration: 262593
loss: 1.0286154747009277,grad_norm: 0.9999992950764809, iteration: 262594
loss: 0.9681405425071716,grad_norm: 0.9999993581807624, iteration: 262595
loss: 0.9935055375099182,grad_norm: 0.9999991511669558, iteration: 262596
loss: 0.9859270453453064,grad_norm: 0.9526103164278842, iteration: 262597
loss: 1.0072617530822754,grad_norm: 0.8775071205365577, iteration: 262598
loss: 0.9800369143486023,grad_norm: 0.9999991323655295, iteration: 262599
loss: 1.0185450315475464,grad_norm: 0.9999989794985288, iteration: 262600
loss: 1.0458014011383057,grad_norm: 0.8346348042519115, iteration: 262601
loss: 1.0168896913528442,grad_norm: 0.9050503560942611, iteration: 262602
loss: 0.9963854551315308,grad_norm: 0.9999991053082457, iteration: 262603
loss: 0.9615743160247803,grad_norm: 0.8911752698099724, iteration: 262604
loss: 0.9860512614250183,grad_norm: 0.9999990465649745, iteration: 262605
loss: 1.1339671611785889,grad_norm: 0.9999995626572669, iteration: 262606
loss: 0.970628559589386,grad_norm: 0.7776215503545252, iteration: 262607
loss: 0.985557496547699,grad_norm: 0.8046375949857391, iteration: 262608
loss: 1.0067516565322876,grad_norm: 0.8538146496679129, iteration: 262609
loss: 1.0400891304016113,grad_norm: 0.8617884899171729, iteration: 262610
loss: 0.9890308380126953,grad_norm: 0.9596628943664083, iteration: 262611
loss: 1.0319017171859741,grad_norm: 0.9999994368733413, iteration: 262612
loss: 1.0382945537567139,grad_norm: 0.9999999353308957, iteration: 262613
loss: 1.074231743812561,grad_norm: 0.9440460831299591, iteration: 262614
loss: 1.0011353492736816,grad_norm: 0.9218816873693599, iteration: 262615
loss: 1.0208154916763306,grad_norm: 0.8483770990624229, iteration: 262616
loss: 0.9641247391700745,grad_norm: 0.897234751489255, iteration: 262617
loss: 0.9921713471412659,grad_norm: 0.9999989544540371, iteration: 262618
loss: 0.9977347254753113,grad_norm: 0.7936349565976064, iteration: 262619
loss: 1.0523836612701416,grad_norm: 0.864044739291683, iteration: 262620
loss: 0.9732329845428467,grad_norm: 0.9040473638109617, iteration: 262621
loss: 1.0160731077194214,grad_norm: 0.9508126078842267, iteration: 262622
loss: 1.0208783149719238,grad_norm: 0.8936582315474276, iteration: 262623
loss: 1.041695237159729,grad_norm: 0.8233829393723483, iteration: 262624
loss: 0.9686239361763,grad_norm: 0.8059317190325108, iteration: 262625
loss: 0.9999192953109741,grad_norm: 0.9686244963112888, iteration: 262626
loss: 0.9721766710281372,grad_norm: 0.9515665410675093, iteration: 262627
loss: 1.0697733163833618,grad_norm: 0.9999997338999626, iteration: 262628
loss: 0.9982731342315674,grad_norm: 0.9057629968560269, iteration: 262629
loss: 0.977057158946991,grad_norm: 0.7701619245400825, iteration: 262630
loss: 0.9998131394386292,grad_norm: 0.9999992305080855, iteration: 262631
loss: 1.0311418771743774,grad_norm: 0.9477088822161105, iteration: 262632
loss: 0.9957424402236938,grad_norm: 0.7661140094387466, iteration: 262633
loss: 0.9663241505622864,grad_norm: 0.8472277180963045, iteration: 262634
loss: 1.020159125328064,grad_norm: 0.999999558926057, iteration: 262635
loss: 0.969415009021759,grad_norm: 0.9046802049809761, iteration: 262636
loss: 0.9966819286346436,grad_norm: 0.8880447698624057, iteration: 262637
loss: 1.0528289079666138,grad_norm: 0.8159607155877717, iteration: 262638
loss: 0.9657140970230103,grad_norm: 0.9999991011624239, iteration: 262639
loss: 1.0067399740219116,grad_norm: 0.9999990699655494, iteration: 262640
loss: 0.9988418817520142,grad_norm: 0.9999991523684435, iteration: 262641
loss: 1.0094122886657715,grad_norm: 0.8332070025576206, iteration: 262642
loss: 1.0373831987380981,grad_norm: 0.764561784865386, iteration: 262643
loss: 0.9626258015632629,grad_norm: 0.9396650039052142, iteration: 262644
loss: 1.0299263000488281,grad_norm: 0.9999998379011713, iteration: 262645
loss: 0.9869535565376282,grad_norm: 0.78108675004321, iteration: 262646
loss: 0.9853765368461609,grad_norm: 0.932687024549585, iteration: 262647
loss: 0.991396427154541,grad_norm: 0.9579693882249717, iteration: 262648
loss: 1.00230872631073,grad_norm: 0.9335137678207136, iteration: 262649
loss: 0.9674211740493774,grad_norm: 0.7916046263156075, iteration: 262650
loss: 0.9945538640022278,grad_norm: 0.9566941123403664, iteration: 262651
loss: 1.0160927772521973,grad_norm: 0.8861133421386375, iteration: 262652
loss: 0.9843671321868896,grad_norm: 0.8705778451347527, iteration: 262653
loss: 1.0089433193206787,grad_norm: 0.9159667516222612, iteration: 262654
loss: 1.0140446424484253,grad_norm: 0.9999993207852492, iteration: 262655
loss: 1.00038480758667,grad_norm: 0.8812609365906666, iteration: 262656
loss: 0.9769654273986816,grad_norm: 0.8477943685956585, iteration: 262657
loss: 0.9914923906326294,grad_norm: 0.9526699430688028, iteration: 262658
loss: 0.9937424063682556,grad_norm: 0.7767717784978447, iteration: 262659
loss: 0.9580172300338745,grad_norm: 0.8340664718352586, iteration: 262660
loss: 0.9846502542495728,grad_norm: 0.9162328712280652, iteration: 262661
loss: 0.9928869605064392,grad_norm: 0.9881497892440401, iteration: 262662
loss: 1.0118874311447144,grad_norm: 0.999999205959288, iteration: 262663
loss: 1.0080446004867554,grad_norm: 0.6934892151227611, iteration: 262664
loss: 0.9646017551422119,grad_norm: 0.9999995688598101, iteration: 262665
loss: 1.070476770401001,grad_norm: 0.9999995475136563, iteration: 262666
loss: 1.008004069328308,grad_norm: 0.7652165477887756, iteration: 262667
loss: 1.0391491651535034,grad_norm: 0.9999991165497116, iteration: 262668
loss: 0.9826486110687256,grad_norm: 0.999999060393655, iteration: 262669
loss: 0.991521954536438,grad_norm: 0.9999989919776588, iteration: 262670
loss: 1.0220588445663452,grad_norm: 0.8138049010190564, iteration: 262671
loss: 0.9947437047958374,grad_norm: 0.9730548970581053, iteration: 262672
loss: 0.9949377775192261,grad_norm: 0.8231942570514308, iteration: 262673
loss: 0.9907469153404236,grad_norm: 0.9103347865825079, iteration: 262674
loss: 1.0010156631469727,grad_norm: 0.9999989828192798, iteration: 262675
loss: 0.9910111427307129,grad_norm: 0.8171472170420506, iteration: 262676
loss: 0.9973185658454895,grad_norm: 0.9999999582205582, iteration: 262677
loss: 1.01373291015625,grad_norm: 0.9320549047601986, iteration: 262678
loss: 1.0045925378799438,grad_norm: 0.9999990038961679, iteration: 262679
loss: 1.0349971055984497,grad_norm: 0.9448255794652118, iteration: 262680
loss: 0.989091157913208,grad_norm: 0.9305681066038384, iteration: 262681
loss: 0.9865537285804749,grad_norm: 0.9999990656497235, iteration: 262682
loss: 1.014060378074646,grad_norm: 0.7643897713418398, iteration: 262683
loss: 1.0002950429916382,grad_norm: 0.9999991608559335, iteration: 262684
loss: 1.000657320022583,grad_norm: 0.9999989778681511, iteration: 262685
loss: 1.098222017288208,grad_norm: 0.9999996665397479, iteration: 262686
loss: 0.9770979285240173,grad_norm: 0.9160475334545946, iteration: 262687
loss: 0.9874935746192932,grad_norm: 0.9830470241725823, iteration: 262688
loss: 1.133285641670227,grad_norm: 0.8854331726305807, iteration: 262689
loss: 1.0457948446273804,grad_norm: 0.8262963686015922, iteration: 262690
loss: 0.9955564737319946,grad_norm: 0.9999996629884923, iteration: 262691
loss: 1.0128815174102783,grad_norm: 0.9999990480061989, iteration: 262692
loss: 0.9727161526679993,grad_norm: 0.8596499996693763, iteration: 262693
loss: 1.0169261693954468,grad_norm: 0.9917282279920163, iteration: 262694
loss: 1.039648175239563,grad_norm: 0.999999144713007, iteration: 262695
loss: 1.0326296091079712,grad_norm: 0.7559930435845624, iteration: 262696
loss: 0.9884098172187805,grad_norm: 0.8468330176945167, iteration: 262697
loss: 1.0169273614883423,grad_norm: 0.8970250346879651, iteration: 262698
loss: 0.9913409352302551,grad_norm: 0.9999992669463234, iteration: 262699
loss: 0.9962571263313293,grad_norm: 0.926884613437219, iteration: 262700
loss: 0.9503881931304932,grad_norm: 0.9999991345829903, iteration: 262701
loss: 0.9929667115211487,grad_norm: 0.8296121769979024, iteration: 262702
loss: 0.9757617712020874,grad_norm: 0.9139494298327585, iteration: 262703
loss: 0.9658004641532898,grad_norm: 0.838639978220145, iteration: 262704
loss: 1.0599356889724731,grad_norm: 0.9999998862569499, iteration: 262705
loss: 0.9957311153411865,grad_norm: 0.9101371676712573, iteration: 262706
loss: 1.005373239517212,grad_norm: 0.973727235774909, iteration: 262707
loss: 1.0789852142333984,grad_norm: 0.9999998352167224, iteration: 262708
loss: 1.0115054845809937,grad_norm: 0.9999994001492865, iteration: 262709
loss: 1.0205022096633911,grad_norm: 0.8881540433123309, iteration: 262710
loss: 1.0085747241973877,grad_norm: 0.8753692916554388, iteration: 262711
loss: 0.9855428338050842,grad_norm: 0.7895061923875561, iteration: 262712
loss: 1.0179733037948608,grad_norm: 0.8929077944596224, iteration: 262713
loss: 1.032975435256958,grad_norm: 0.9648463159692792, iteration: 262714
loss: 0.9878974556922913,grad_norm: 0.8338611455368846, iteration: 262715
loss: 0.980811357498169,grad_norm: 0.703867126077554, iteration: 262716
loss: 0.9964370727539062,grad_norm: 0.8830513789246003, iteration: 262717
loss: 0.9925738573074341,grad_norm: 0.8238998602221601, iteration: 262718
loss: 1.0032275915145874,grad_norm: 0.9965067433890534, iteration: 262719
loss: 1.023506999015808,grad_norm: 0.9015818041801218, iteration: 262720
loss: 0.9993299841880798,grad_norm: 0.9861089099197146, iteration: 262721
loss: 0.9841894507408142,grad_norm: 0.9999989422501122, iteration: 262722
loss: 0.9609747529029846,grad_norm: 0.9999990576732032, iteration: 262723
loss: 0.9858194589614868,grad_norm: 0.8666730719581889, iteration: 262724
loss: 1.0682567358016968,grad_norm: 0.7636277387024317, iteration: 262725
loss: 1.0090044736862183,grad_norm: 0.7508672015975799, iteration: 262726
loss: 1.0000203847885132,grad_norm: 0.9999992322203795, iteration: 262727
loss: 1.0122523307800293,grad_norm: 0.6917620242596172, iteration: 262728
loss: 0.9970245957374573,grad_norm: 0.8941573892218166, iteration: 262729
loss: 1.1221749782562256,grad_norm: 0.9442650869112016, iteration: 262730
loss: 1.0659676790237427,grad_norm: 0.9999991200092561, iteration: 262731
loss: 0.9795131087303162,grad_norm: 0.9999991127130128, iteration: 262732
loss: 0.9712082147598267,grad_norm: 0.979265869311906, iteration: 262733
loss: 0.9888960123062134,grad_norm: 0.9113537572039742, iteration: 262734
loss: 0.9907670021057129,grad_norm: 0.9374306148345151, iteration: 262735
loss: 1.040643572807312,grad_norm: 0.8769264496197112, iteration: 262736
loss: 1.0039044618606567,grad_norm: 0.8454609037200789, iteration: 262737
loss: 1.0204318761825562,grad_norm: 0.9999991134115889, iteration: 262738
loss: 0.995823323726654,grad_norm: 0.9999990797240813, iteration: 262739
loss: 1.0341331958770752,grad_norm: 0.8667110996741231, iteration: 262740
loss: 1.0110926628112793,grad_norm: 0.9846686888156863, iteration: 262741
loss: 1.0854843854904175,grad_norm: 0.9999992424678135, iteration: 262742
loss: 0.965825617313385,grad_norm: 0.8888675047903956, iteration: 262743
loss: 1.0155373811721802,grad_norm: 0.9999989432802785, iteration: 262744
loss: 0.9882617592811584,grad_norm: 0.8035801137462559, iteration: 262745
loss: 0.9974086880683899,grad_norm: 0.9999991414539523, iteration: 262746
loss: 1.0101526975631714,grad_norm: 0.9392811247459516, iteration: 262747
loss: 1.0507235527038574,grad_norm: 0.8887923265550605, iteration: 262748
loss: 1.0977448225021362,grad_norm: 0.9999989891112314, iteration: 262749
loss: 1.0093141794204712,grad_norm: 0.9999993556570357, iteration: 262750
loss: 0.9625500440597534,grad_norm: 0.9999990708190711, iteration: 262751
loss: 1.009809970855713,grad_norm: 0.9081267873589448, iteration: 262752
loss: 1.0055829286575317,grad_norm: 0.8515244693548659, iteration: 262753
loss: 1.0045095682144165,grad_norm: 0.9999989908657324, iteration: 262754
loss: 1.013701319694519,grad_norm: 0.9999991731134892, iteration: 262755
loss: 1.0212435722351074,grad_norm: 0.9999991552525195, iteration: 262756
loss: 1.0038295984268188,grad_norm: 0.8567960684691943, iteration: 262757
loss: 0.9870520234107971,grad_norm: 0.879600203716505, iteration: 262758
loss: 0.9816306233406067,grad_norm: 0.7958877409671701, iteration: 262759
loss: 1.0081183910369873,grad_norm: 0.9170235626681968, iteration: 262760
loss: 0.9842637181282043,grad_norm: 0.8590817093048597, iteration: 262761
loss: 1.0948708057403564,grad_norm: 0.9999998423538302, iteration: 262762
loss: 0.9925840497016907,grad_norm: 0.8603374410180901, iteration: 262763
loss: 1.0244883298873901,grad_norm: 0.8820310487149436, iteration: 262764
loss: 0.9716087579727173,grad_norm: 0.9999991976191996, iteration: 262765
loss: 0.9931010603904724,grad_norm: 0.8676246217787139, iteration: 262766
loss: 1.01249098777771,grad_norm: 0.9009899915400619, iteration: 262767
loss: 1.009342074394226,grad_norm: 0.8267014918021294, iteration: 262768
loss: 1.0545554161071777,grad_norm: 0.9177231925538146, iteration: 262769
loss: 0.9754311442375183,grad_norm: 0.7357026353754272, iteration: 262770
loss: 1.0133662223815918,grad_norm: 0.999999190926583, iteration: 262771
loss: 0.9800487160682678,grad_norm: 0.8114907939360281, iteration: 262772
loss: 1.0958176851272583,grad_norm: 0.9899950114560997, iteration: 262773
loss: 1.1195546388626099,grad_norm: 0.9999991240065215, iteration: 262774
loss: 1.037136435508728,grad_norm: 0.8282403749207762, iteration: 262775
loss: 1.038581371307373,grad_norm: 0.9999993315491303, iteration: 262776
loss: 1.0860810279846191,grad_norm: 0.9783537900642855, iteration: 262777
loss: 1.0126152038574219,grad_norm: 0.9999999502884767, iteration: 262778
loss: 1.0599159002304077,grad_norm: 0.9999991863715652, iteration: 262779
loss: 1.0901298522949219,grad_norm: 0.999999308002062, iteration: 262780
loss: 1.0241023302078247,grad_norm: 0.8496068759795282, iteration: 262781
loss: 1.0090126991271973,grad_norm: 0.9999991237730785, iteration: 262782
loss: 1.0373984575271606,grad_norm: 0.9999993010899557, iteration: 262783
loss: 0.9766748547554016,grad_norm: 0.9999991059128892, iteration: 262784
loss: 0.9995085000991821,grad_norm: 0.814720387344569, iteration: 262785
loss: 1.0455375909805298,grad_norm: 0.8361359875221335, iteration: 262786
loss: 1.0378427505493164,grad_norm: 0.8274426093117165, iteration: 262787
loss: 0.9989894032478333,grad_norm: 0.804956979653293, iteration: 262788
loss: 1.0823607444763184,grad_norm: 0.999999901940719, iteration: 262789
loss: 0.9659777283668518,grad_norm: 0.96311940842958, iteration: 262790
loss: 0.9644572734832764,grad_norm: 0.8517380923916774, iteration: 262791
loss: 0.9937752485275269,grad_norm: 0.7438732571102208, iteration: 262792
loss: 0.9959027171134949,grad_norm: 0.9999990171034693, iteration: 262793
loss: 0.9969075918197632,grad_norm: 0.9627913405623788, iteration: 262794
loss: 1.0893524885177612,grad_norm: 0.9430534590907438, iteration: 262795
loss: 1.0347391366958618,grad_norm: 0.9999993507648574, iteration: 262796
loss: 1.0120468139648438,grad_norm: 0.937773265860163, iteration: 262797
loss: 0.9896837472915649,grad_norm: 0.9836487714579034, iteration: 262798
loss: 1.013834834098816,grad_norm: 0.9215165941690168, iteration: 262799
loss: 0.9581383466720581,grad_norm: 0.7866820117710046, iteration: 262800
loss: 1.0210453271865845,grad_norm: 0.9999996774452059, iteration: 262801
loss: 0.9778571128845215,grad_norm: 0.8184272429534598, iteration: 262802
loss: 1.008887767791748,grad_norm: 0.8367844739860639, iteration: 262803
loss: 0.9855436682701111,grad_norm: 0.9999996836888233, iteration: 262804
loss: 0.9708338379859924,grad_norm: 0.9999991769266192, iteration: 262805
loss: 0.9966350197792053,grad_norm: 0.9999998922858855, iteration: 262806
loss: 1.0035667419433594,grad_norm: 0.9178913621432255, iteration: 262807
loss: 1.0267122983932495,grad_norm: 0.896515798936202, iteration: 262808
loss: 1.0762358903884888,grad_norm: 0.9999989349277457, iteration: 262809
loss: 1.0132651329040527,grad_norm: 0.8223271889854209, iteration: 262810
loss: 1.001715898513794,grad_norm: 0.8544858818026636, iteration: 262811
loss: 1.0237829685211182,grad_norm: 0.9999991566501254, iteration: 262812
loss: 1.0564579963684082,grad_norm: 0.9241928565014651, iteration: 262813
loss: 1.0526237487792969,grad_norm: 0.9999991028363853, iteration: 262814
loss: 1.0099457502365112,grad_norm: 0.8415322509610199, iteration: 262815
loss: 0.9977142214775085,grad_norm: 0.9440836841748926, iteration: 262816
loss: 1.0289757251739502,grad_norm: 0.7908130331814989, iteration: 262817
loss: 0.9997867941856384,grad_norm: 0.9999992841141223, iteration: 262818
loss: 0.9901676177978516,grad_norm: 0.9030894256988939, iteration: 262819
loss: 0.9827703237533569,grad_norm: 0.8912157123186568, iteration: 262820
loss: 1.0121910572052002,grad_norm: 0.9999997609938512, iteration: 262821
loss: 0.9905311465263367,grad_norm: 0.8172371627201038, iteration: 262822
loss: 0.9970319271087646,grad_norm: 0.887692517850565, iteration: 262823
loss: 0.9559327960014343,grad_norm: 0.9596225633543274, iteration: 262824
loss: 0.9774373173713684,grad_norm: 0.9999993522494324, iteration: 262825
loss: 1.0294321775436401,grad_norm: 0.9555207564992201, iteration: 262826
loss: 0.9949546456336975,grad_norm: 0.9812186964490829, iteration: 262827
loss: 1.0530107021331787,grad_norm: 0.9678959233405602, iteration: 262828
loss: 1.0552293062210083,grad_norm: 0.9999997314484114, iteration: 262829
loss: 0.9835460186004639,grad_norm: 0.9455099893025823, iteration: 262830
loss: 1.0159367322921753,grad_norm: 0.9543272146013256, iteration: 262831
loss: 0.9763012528419495,grad_norm: 0.8561256941724904, iteration: 262832
loss: 1.0036139488220215,grad_norm: 0.9999990554209074, iteration: 262833
loss: 1.0093330144882202,grad_norm: 0.8083763337810991, iteration: 262834
loss: 0.9803627729415894,grad_norm: 0.8866194346838056, iteration: 262835
loss: 1.004137396812439,grad_norm: 0.9999998627814451, iteration: 262836
loss: 1.0371742248535156,grad_norm: 0.9788640472595207, iteration: 262837
loss: 1.0194672346115112,grad_norm: 0.9999999434950588, iteration: 262838
loss: 1.0755776166915894,grad_norm: 0.9999993263454037, iteration: 262839
loss: 0.9939576983451843,grad_norm: 0.9999991117603154, iteration: 262840
loss: 1.004004716873169,grad_norm: 0.9685431674582832, iteration: 262841
loss: 0.9915629029273987,grad_norm: 0.8398672326635689, iteration: 262842
loss: 0.9936935305595398,grad_norm: 0.9014791683586756, iteration: 262843
loss: 0.9749124646186829,grad_norm: 0.987891504532848, iteration: 262844
loss: 0.9832200407981873,grad_norm: 0.8021417402062778, iteration: 262845
loss: 0.9976288676261902,grad_norm: 0.8866721831451126, iteration: 262846
loss: 0.9926700592041016,grad_norm: 0.8242852215647463, iteration: 262847
loss: 1.001969337463379,grad_norm: 0.9409282733802397, iteration: 262848
loss: 0.9990085363388062,grad_norm: 0.9999998734586534, iteration: 262849
loss: 1.0299161672592163,grad_norm: 0.8603633200274088, iteration: 262850
loss: 1.007163166999817,grad_norm: 0.9408948079703743, iteration: 262851
loss: 1.0471657514572144,grad_norm: 0.9999993933659099, iteration: 262852
loss: 1.0299733877182007,grad_norm: 0.9290020790439494, iteration: 262853
loss: 0.9914265275001526,grad_norm: 0.9999994672088403, iteration: 262854
loss: 1.0191524028778076,grad_norm: 0.9571218673376924, iteration: 262855
loss: 0.978523313999176,grad_norm: 0.8630714281392656, iteration: 262856
loss: 1.024623990058899,grad_norm: 0.9999993948646976, iteration: 262857
loss: 1.005689024925232,grad_norm: 0.9999992564895026, iteration: 262858
loss: 0.9837956428527832,grad_norm: 0.9376744309325837, iteration: 262859
loss: 1.0103918313980103,grad_norm: 0.9780177175953759, iteration: 262860
loss: 1.0116864442825317,grad_norm: 0.9999992458019338, iteration: 262861
loss: 0.9779154062271118,grad_norm: 0.999999135840013, iteration: 262862
loss: 1.0283094644546509,grad_norm: 0.8356589120273863, iteration: 262863
loss: 1.0248019695281982,grad_norm: 0.8956399021800018, iteration: 262864
loss: 1.0011194944381714,grad_norm: 0.9999998072674114, iteration: 262865
loss: 1.016292691230774,grad_norm: 0.9007937932788617, iteration: 262866
loss: 1.0722582340240479,grad_norm: 0.9999992347692284, iteration: 262867
loss: 1.046287178993225,grad_norm: 0.9999992953468925, iteration: 262868
loss: 0.9843625426292419,grad_norm: 0.7965880196409283, iteration: 262869
loss: 1.0186339616775513,grad_norm: 0.9999991082203079, iteration: 262870
loss: 1.0749181509017944,grad_norm: 0.9941075141619523, iteration: 262871
loss: 0.9651344418525696,grad_norm: 0.9580083800220016, iteration: 262872
loss: 0.9519763588905334,grad_norm: 0.7043048712841837, iteration: 262873
loss: 1.0295666456222534,grad_norm: 0.8502923989718655, iteration: 262874
loss: 1.0943878889083862,grad_norm: 0.9999994185682073, iteration: 262875
loss: 0.9912146925926208,grad_norm: 0.9999991618701681, iteration: 262876
loss: 1.0244293212890625,grad_norm: 0.9801066737271991, iteration: 262877
loss: 0.9973994493484497,grad_norm: 0.9506098001682739, iteration: 262878
loss: 1.011695384979248,grad_norm: 0.9999992709570891, iteration: 262879
loss: 0.9500873684883118,grad_norm: 0.8102198848266657, iteration: 262880
loss: 1.040067195892334,grad_norm: 0.9999990206389965, iteration: 262881
loss: 1.021662950515747,grad_norm: 0.9226523791557124, iteration: 262882
loss: 1.0184584856033325,grad_norm: 0.9954659870337166, iteration: 262883
loss: 1.0178629159927368,grad_norm: 0.8753688235010124, iteration: 262884
loss: 1.012526273727417,grad_norm: 0.8235647998905962, iteration: 262885
loss: 1.0284500122070312,grad_norm: 0.8785458535132905, iteration: 262886
loss: 0.9913273453712463,grad_norm: 0.9747647597720638, iteration: 262887
loss: 1.034391164779663,grad_norm: 0.9410815524566136, iteration: 262888
loss: 1.0468121767044067,grad_norm: 1.0000000012576225, iteration: 262889
loss: 1.0185545682907104,grad_norm: 0.8286310508167141, iteration: 262890
loss: 0.9982883334159851,grad_norm: 0.9999991767946648, iteration: 262891
loss: 0.9921968579292297,grad_norm: 0.99999938177686, iteration: 262892
loss: 1.0252931118011475,grad_norm: 0.7904358478493997, iteration: 262893
loss: 0.9959988594055176,grad_norm: 0.8698283011031581, iteration: 262894
loss: 1.0679867267608643,grad_norm: 0.9999992669525006, iteration: 262895
loss: 0.9977961182594299,grad_norm: 0.9999998270730976, iteration: 262896
loss: 1.0160470008850098,grad_norm: 0.999999105232754, iteration: 262897
loss: 0.9602615237236023,grad_norm: 0.8443750381663823, iteration: 262898
loss: 0.976775050163269,grad_norm: 0.8995888731445132, iteration: 262899
loss: 0.9735442996025085,grad_norm: 0.9999998907420421, iteration: 262900
loss: 1.007154941558838,grad_norm: 0.918026563543049, iteration: 262901
loss: 1.0097366571426392,grad_norm: 0.9999989924421666, iteration: 262902
loss: 0.992367684841156,grad_norm: 0.9683084041027753, iteration: 262903
loss: 1.0533767938613892,grad_norm: 0.9999990278180549, iteration: 262904
loss: 0.9561123251914978,grad_norm: 0.9999992145207481, iteration: 262905
loss: 0.9839649200439453,grad_norm: 0.9093569743642036, iteration: 262906
loss: 0.9938725233078003,grad_norm: 0.9498784986744443, iteration: 262907
loss: 0.9945315718650818,grad_norm: 0.8374652847162822, iteration: 262908
loss: 0.9966294169425964,grad_norm: 0.9192039440253933, iteration: 262909
loss: 1.000102162361145,grad_norm: 0.8080649522779827, iteration: 262910
loss: 1.1809756755828857,grad_norm: 0.9999995596994541, iteration: 262911
loss: 0.9723584055900574,grad_norm: 0.8321074428139024, iteration: 262912
loss: 0.9762215614318848,grad_norm: 0.765386985946828, iteration: 262913
loss: 0.9945142865180969,grad_norm: 0.8455439620516619, iteration: 262914
loss: 1.0119637250900269,grad_norm: 0.9233335387534218, iteration: 262915
loss: 0.960284948348999,grad_norm: 0.9642904571841171, iteration: 262916
loss: 1.0258097648620605,grad_norm: 0.9056517191155965, iteration: 262917
loss: 0.9844329357147217,grad_norm: 0.9999993782094979, iteration: 262918
loss: 1.0118664503097534,grad_norm: 0.9876182293761239, iteration: 262919
loss: 0.9965959191322327,grad_norm: 0.9145383141555108, iteration: 262920
loss: 1.0037389993667603,grad_norm: 0.9999994786052636, iteration: 262921
loss: 0.9948962330818176,grad_norm: 0.8770327816491948, iteration: 262922
loss: 1.0174565315246582,grad_norm: 0.9999990515243823, iteration: 262923
loss: 0.9752845764160156,grad_norm: 0.7508155718162487, iteration: 262924
loss: 0.9738085865974426,grad_norm: 0.9760121242847901, iteration: 262925
loss: 0.9989140033721924,grad_norm: 0.8904155341818761, iteration: 262926
loss: 1.0221360921859741,grad_norm: 0.9250331527578394, iteration: 262927
loss: 1.0205100774765015,grad_norm: 0.9475933110561136, iteration: 262928
loss: 0.9836196303367615,grad_norm: 0.6615635400222137, iteration: 262929
loss: 1.101545810699463,grad_norm: 0.9999993276471746, iteration: 262930
loss: 1.0000042915344238,grad_norm: 0.9999989095456786, iteration: 262931
loss: 1.0955677032470703,grad_norm: 0.9999991789358545, iteration: 262932
loss: 1.0162428617477417,grad_norm: 0.9530049129926086, iteration: 262933
loss: 0.9818053841590881,grad_norm: 0.9999990389236687, iteration: 262934
loss: 1.0246527194976807,grad_norm: 0.999999399809677, iteration: 262935
loss: 1.007132649421692,grad_norm: 0.999999153857609, iteration: 262936
loss: 0.9639520049095154,grad_norm: 0.8164000088771832, iteration: 262937
loss: 1.0001323223114014,grad_norm: 0.9710277813418916, iteration: 262938
loss: 1.0209578275680542,grad_norm: 0.8354176068404382, iteration: 262939
loss: 0.9643044471740723,grad_norm: 0.9661347723364455, iteration: 262940
loss: 0.9960781335830688,grad_norm: 0.7920292617009814, iteration: 262941
loss: 1.001726508140564,grad_norm: 0.9012119838252864, iteration: 262942
loss: 0.978171169757843,grad_norm: 0.9999991003398245, iteration: 262943
loss: 0.9455759525299072,grad_norm: 0.892683262159878, iteration: 262944
loss: 0.9738789796829224,grad_norm: 0.8251173251746765, iteration: 262945
loss: 1.0053194761276245,grad_norm: 0.8126503034643838, iteration: 262946
loss: 0.9519194960594177,grad_norm: 0.9999990596812026, iteration: 262947
loss: 1.0143098831176758,grad_norm: 0.9806822296506336, iteration: 262948
loss: 1.0673987865447998,grad_norm: 0.9999992142549281, iteration: 262949
loss: 0.97248774766922,grad_norm: 0.9083913882289248, iteration: 262950
loss: 0.9654791951179504,grad_norm: 0.9999991571141403, iteration: 262951
loss: 1.017160177230835,grad_norm: 0.8411529812993624, iteration: 262952
loss: 0.9512031078338623,grad_norm: 0.8835848600328214, iteration: 262953
loss: 0.967782199382782,grad_norm: 0.9999996693642197, iteration: 262954
loss: 0.9829103350639343,grad_norm: 0.999999180198784, iteration: 262955
loss: 0.9855117797851562,grad_norm: 0.9234491665800334, iteration: 262956
loss: 1.0385037660598755,grad_norm: 0.8302648174450207, iteration: 262957
loss: 1.0052757263183594,grad_norm: 0.9999992347885669, iteration: 262958
loss: 0.9982563853263855,grad_norm: 0.8519128220656464, iteration: 262959
loss: 1.0008788108825684,grad_norm: 0.9999991711493627, iteration: 262960
loss: 0.9805644750595093,grad_norm: 0.8726119122870108, iteration: 262961
loss: 0.9938015341758728,grad_norm: 0.9926368315740635, iteration: 262962
loss: 0.9841352701187134,grad_norm: 0.8081056613437674, iteration: 262963
loss: 0.9866535663604736,grad_norm: 0.8449630621170798, iteration: 262964
loss: 1.0375720262527466,grad_norm: 0.9523615620132406, iteration: 262965
loss: 0.9865196943283081,grad_norm: 0.999999209269042, iteration: 262966
loss: 1.0171315670013428,grad_norm: 0.8853480129543436, iteration: 262967
loss: 1.0295957326889038,grad_norm: 0.9090695870685208, iteration: 262968
loss: 0.9933658242225647,grad_norm: 0.9999992468525091, iteration: 262969
loss: 1.0808851718902588,grad_norm: 0.9999994546465872, iteration: 262970
loss: 1.016054391860962,grad_norm: 0.8893754016703648, iteration: 262971
loss: 1.0305230617523193,grad_norm: 0.9999997118218923, iteration: 262972
loss: 1.0114840269088745,grad_norm: 0.9559920580991026, iteration: 262973
loss: 0.9915706515312195,grad_norm: 0.9999994314970593, iteration: 262974
loss: 1.0077292919158936,grad_norm: 0.7608295670287678, iteration: 262975
loss: 1.0053690671920776,grad_norm: 0.8619068084539954, iteration: 262976
loss: 1.0117872953414917,grad_norm: 0.9999994224386171, iteration: 262977
loss: 0.979642927646637,grad_norm: 0.9787471314881178, iteration: 262978
loss: 0.9361000657081604,grad_norm: 0.8409876864397496, iteration: 262979
loss: 1.0569357872009277,grad_norm: 0.9999990184382188, iteration: 262980
loss: 0.9996069073677063,grad_norm: 0.9999990759906137, iteration: 262981
loss: 1.036587119102478,grad_norm: 0.9676401721383805, iteration: 262982
loss: 0.9654640555381775,grad_norm: 0.9999990990605497, iteration: 262983
loss: 0.9849063158035278,grad_norm: 0.9906372593966473, iteration: 262984
loss: 1.0599212646484375,grad_norm: 0.9999993653343886, iteration: 262985
loss: 0.9658731818199158,grad_norm: 0.8079277685768381, iteration: 262986
loss: 0.9715730547904968,grad_norm: 0.9336414970637746, iteration: 262987
loss: 1.0103665590286255,grad_norm: 0.9999994541667604, iteration: 262988
loss: 1.013761281967163,grad_norm: 0.928689990491671, iteration: 262989
loss: 1.0105606317520142,grad_norm: 0.9476835891358737, iteration: 262990
loss: 1.020226001739502,grad_norm: 0.873683828286444, iteration: 262991
loss: 1.008102297782898,grad_norm: 0.7783042262874238, iteration: 262992
loss: 0.9670318961143494,grad_norm: 0.8634114840845007, iteration: 262993
loss: 1.0585688352584839,grad_norm: 0.7645492154678454, iteration: 262994
loss: 0.9951595664024353,grad_norm: 0.9727597808885365, iteration: 262995
loss: 1.0153487920761108,grad_norm: 0.8128288750256919, iteration: 262996
loss: 1.0036698579788208,grad_norm: 0.9770645853197827, iteration: 262997
loss: 0.9953741431236267,grad_norm: 0.9167913131127041, iteration: 262998
loss: 0.962046205997467,grad_norm: 0.9332869396588661, iteration: 262999
loss: 0.9861496686935425,grad_norm: 0.8750715674046455, iteration: 263000
loss: 0.9641422033309937,grad_norm: 0.8960862344634674, iteration: 263001
loss: 0.9925294518470764,grad_norm: 0.8804295891395135, iteration: 263002
loss: 1.0273146629333496,grad_norm: 0.9255931836302546, iteration: 263003
loss: 1.0055898427963257,grad_norm: 0.9997683484731598, iteration: 263004
loss: 1.0518343448638916,grad_norm: 0.8334431911512941, iteration: 263005
loss: 1.0333017110824585,grad_norm: 0.925711428775306, iteration: 263006
loss: 1.022568702697754,grad_norm: 0.999999097185308, iteration: 263007
loss: 1.0279079675674438,grad_norm: 0.9999998599890944, iteration: 263008
loss: 0.989742636680603,grad_norm: 0.8029153419759191, iteration: 263009
loss: 1.026400089263916,grad_norm: 0.9999991604561488, iteration: 263010
loss: 0.9949028491973877,grad_norm: 0.9999993274485988, iteration: 263011
loss: 1.0107992887496948,grad_norm: 0.9534666434368585, iteration: 263012
loss: 1.000991940498352,grad_norm: 0.9999991132536348, iteration: 263013
loss: 1.0230672359466553,grad_norm: 0.9999997106321914, iteration: 263014
loss: 0.9691091775894165,grad_norm: 0.834483596198777, iteration: 263015
loss: 1.0344135761260986,grad_norm: 0.9999997027954282, iteration: 263016
loss: 1.0067445039749146,grad_norm: 0.8020275659834607, iteration: 263017
loss: 0.9892258048057556,grad_norm: 0.9999991603374924, iteration: 263018
loss: 1.0120582580566406,grad_norm: 0.9999990864209796, iteration: 263019
loss: 1.021563172340393,grad_norm: 0.9999996693749464, iteration: 263020
loss: 1.0442997217178345,grad_norm: 0.9999991950389482, iteration: 263021
loss: 0.973551332950592,grad_norm: 0.8361916703583198, iteration: 263022
loss: 0.9751966595649719,grad_norm: 0.999999504281503, iteration: 263023
loss: 0.9785512089729309,grad_norm: 0.9464855242698296, iteration: 263024
loss: 0.973685622215271,grad_norm: 0.8757809443528863, iteration: 263025
loss: 1.011431097984314,grad_norm: 0.9999991262087566, iteration: 263026
loss: 1.0192726850509644,grad_norm: 0.8799585292829646, iteration: 263027
loss: 0.994254469871521,grad_norm: 0.8473943093853997, iteration: 263028
loss: 1.0057430267333984,grad_norm: 0.8513442491108076, iteration: 263029
loss: 1.0018131732940674,grad_norm: 0.9999991249213593, iteration: 263030
loss: 0.9943484663963318,grad_norm: 0.8652463882664395, iteration: 263031
loss: 1.0122442245483398,grad_norm: 0.9999992593498703, iteration: 263032
loss: 1.0056160688400269,grad_norm: 0.8949392796883713, iteration: 263033
loss: 1.0345834493637085,grad_norm: 0.9114643572872909, iteration: 263034
loss: 0.9898512363433838,grad_norm: 0.9999991747499254, iteration: 263035
loss: 0.9606837630271912,grad_norm: 0.7517140117648591, iteration: 263036
loss: 1.0267566442489624,grad_norm: 0.8731577433757649, iteration: 263037
loss: 1.0158919095993042,grad_norm: 0.9340999048617704, iteration: 263038
loss: 0.9565073251724243,grad_norm: 0.9276783379561628, iteration: 263039
loss: 0.9907788634300232,grad_norm: 0.874487441464443, iteration: 263040
loss: 1.021112322807312,grad_norm: 0.9335934854709746, iteration: 263041
loss: 1.0059089660644531,grad_norm: 0.9555483101753096, iteration: 263042
loss: 1.026529312133789,grad_norm: 0.8095221873598016, iteration: 263043
loss: 1.0390454530715942,grad_norm: 0.9999991912804965, iteration: 263044
loss: 1.3440110683441162,grad_norm: 0.9999994848252781, iteration: 263045
loss: 1.0089093446731567,grad_norm: 0.8271480706391123, iteration: 263046
loss: 0.9892483353614807,grad_norm: 0.8564095138600327, iteration: 263047
loss: 0.9865562915802002,grad_norm: 0.8388912383258201, iteration: 263048
loss: 0.990464985370636,grad_norm: 0.997587448557874, iteration: 263049
loss: 1.0498884916305542,grad_norm: 0.7570591034889556, iteration: 263050
loss: 1.0260683298110962,grad_norm: 0.8717208370457655, iteration: 263051
loss: 0.9863480925559998,grad_norm: 0.9439353079451599, iteration: 263052
loss: 0.9864919781684875,grad_norm: 0.9337100688248926, iteration: 263053
loss: 1.0044775009155273,grad_norm: 0.8895032393310447, iteration: 263054
loss: 1.0344074964523315,grad_norm: 0.8077893870482531, iteration: 263055
loss: 0.9722978472709656,grad_norm: 0.890380194435807, iteration: 263056
loss: 0.989048421382904,grad_norm: 0.9359177136877647, iteration: 263057
loss: 1.0062659978866577,grad_norm: 0.9007436505741859, iteration: 263058
loss: 1.0070070028305054,grad_norm: 0.795475920960416, iteration: 263059
loss: 1.0248874425888062,grad_norm: 0.9999991766591937, iteration: 263060
loss: 0.9948309659957886,grad_norm: 0.9999996129842701, iteration: 263061
loss: 1.0290924310684204,grad_norm: 0.9999991224255473, iteration: 263062
loss: 0.9756160378456116,grad_norm: 0.8667338888530255, iteration: 263063
loss: 1.0274139642715454,grad_norm: 0.9999992382044758, iteration: 263064
loss: 1.0394009351730347,grad_norm: 0.9999991947179503, iteration: 263065
loss: 1.0055509805679321,grad_norm: 0.8119212994333446, iteration: 263066
loss: 1.0128428936004639,grad_norm: 0.9158600582773001, iteration: 263067
loss: 0.9940585494041443,grad_norm: 0.864802224631378, iteration: 263068
loss: 1.018691062927246,grad_norm: 0.9999990446602025, iteration: 263069
loss: 1.0188994407653809,grad_norm: 0.9758661530593629, iteration: 263070
loss: 0.9807352423667908,grad_norm: 0.8589404936260527, iteration: 263071
loss: 0.9783701300621033,grad_norm: 0.9747352395979934, iteration: 263072
loss: 1.012243390083313,grad_norm: 0.9422175482715826, iteration: 263073
loss: 0.9867531657218933,grad_norm: 0.9609713551427304, iteration: 263074
loss: 1.0335841178894043,grad_norm: 0.9685911214553985, iteration: 263075
loss: 0.9926381707191467,grad_norm: 0.9999990723014628, iteration: 263076
loss: 1.0261411666870117,grad_norm: 0.8873014629436456, iteration: 263077
loss: 1.0137534141540527,grad_norm: 0.905950838975407, iteration: 263078
loss: 1.0091010332107544,grad_norm: 0.9999996736384008, iteration: 263079
loss: 1.0035974979400635,grad_norm: 0.9472398721238262, iteration: 263080
loss: 1.086749792098999,grad_norm: 0.9999997607612359, iteration: 263081
loss: 1.0269817113876343,grad_norm: 0.874057675181758, iteration: 263082
loss: 0.9796268939971924,grad_norm: 0.8666427800673696, iteration: 263083
loss: 1.033611536026001,grad_norm: 0.999999791928552, iteration: 263084
loss: 0.9973837733268738,grad_norm: 0.9868200590936861, iteration: 263085
loss: 1.0201869010925293,grad_norm: 0.8369585091135348, iteration: 263086
loss: 1.0291142463684082,grad_norm: 0.9972048527104271, iteration: 263087
loss: 0.9984890818595886,grad_norm: 0.8026432496080513, iteration: 263088
loss: 0.9763860106468201,grad_norm: 0.9499763684204047, iteration: 263089
loss: 0.9931216239929199,grad_norm: 0.9999990088572676, iteration: 263090
loss: 1.0129996538162231,grad_norm: 0.9999993361962162, iteration: 263091
loss: 1.0187692642211914,grad_norm: 0.9999993603869866, iteration: 263092
loss: 1.0098302364349365,grad_norm: 0.9999990188543989, iteration: 263093
loss: 1.0367599725723267,grad_norm: 0.962557598984261, iteration: 263094
loss: 0.9967105984687805,grad_norm: 0.7988096660849422, iteration: 263095
loss: 1.0191755294799805,grad_norm: 0.9511151096611559, iteration: 263096
loss: 0.9816288948059082,grad_norm: 0.9999991193206991, iteration: 263097
loss: 0.9964428544044495,grad_norm: 0.8665761970880778, iteration: 263098
loss: 0.9953137040138245,grad_norm: 0.7624591494002096, iteration: 263099
loss: 1.0412706136703491,grad_norm: 0.9786369704771755, iteration: 263100
loss: 1.0215462446212769,grad_norm: 0.9120627315148336, iteration: 263101
loss: 1.011939287185669,grad_norm: 0.9429389206720236, iteration: 263102
loss: 1.013826608657837,grad_norm: 0.9999993340385291, iteration: 263103
loss: 1.0072826147079468,grad_norm: 0.8935917738667652, iteration: 263104
loss: 0.9925971627235413,grad_norm: 0.9999990507616864, iteration: 263105
loss: 0.9495670795440674,grad_norm: 0.7436379315660708, iteration: 263106
loss: 1.0113788843154907,grad_norm: 0.8538734164621694, iteration: 263107
loss: 1.012618899345398,grad_norm: 0.8037247446681373, iteration: 263108
loss: 1.0010851621627808,grad_norm: 0.8791782456250412, iteration: 263109
loss: 1.003343939781189,grad_norm: 0.9897804716019388, iteration: 263110
loss: 1.004920482635498,grad_norm: 0.8461751891837671, iteration: 263111
loss: 0.9797500967979431,grad_norm: 0.8710271135858721, iteration: 263112
loss: 1.0442476272583008,grad_norm: 0.8743940402231221, iteration: 263113
loss: 0.9987223744392395,grad_norm: 0.9323001351281012, iteration: 263114
loss: 1.0360361337661743,grad_norm: 0.8432971140806551, iteration: 263115
loss: 1.0614349842071533,grad_norm: 0.9999998593262556, iteration: 263116
loss: 1.0051167011260986,grad_norm: 0.8969321144342938, iteration: 263117
loss: 1.014510989189148,grad_norm: 0.999999572737978, iteration: 263118
loss: 1.0051718950271606,grad_norm: 0.9770483795681597, iteration: 263119
loss: 1.026563048362732,grad_norm: 0.959377061207116, iteration: 263120
loss: 0.9797313213348389,grad_norm: 0.977531217694842, iteration: 263121
loss: 1.0609124898910522,grad_norm: 0.999999064016698, iteration: 263122
loss: 1.0034937858581543,grad_norm: 0.9999991493266022, iteration: 263123
loss: 1.1129833459854126,grad_norm: 0.9999998149110662, iteration: 263124
loss: 0.9848986268043518,grad_norm: 0.9786090127084721, iteration: 263125
loss: 0.9979318380355835,grad_norm: 0.875872528498121, iteration: 263126
loss: 0.985881507396698,grad_norm: 0.8473073825459364, iteration: 263127
loss: 0.9964357018470764,grad_norm: 0.7551351705018128, iteration: 263128
loss: 1.0086445808410645,grad_norm: 0.7631173309702182, iteration: 263129
loss: 1.0267666578292847,grad_norm: 0.9999992507769337, iteration: 263130
loss: 1.0243420600891113,grad_norm: 0.8014198433083985, iteration: 263131
loss: 0.991443395614624,grad_norm: 0.999999197196316, iteration: 263132
loss: 0.9981838464736938,grad_norm: 0.9999996171838158, iteration: 263133
loss: 0.9571490287780762,grad_norm: 0.9999989093460124, iteration: 263134
loss: 1.0384085178375244,grad_norm: 0.9590650210470556, iteration: 263135
loss: 1.0073754787445068,grad_norm: 0.9999995218837073, iteration: 263136
loss: 0.9846146702766418,grad_norm: 0.9999991082187182, iteration: 263137
loss: 0.9900491833686829,grad_norm: 0.8763457225159531, iteration: 263138
loss: 0.9399309158325195,grad_norm: 0.773283606814299, iteration: 263139
loss: 1.0232995748519897,grad_norm: 0.999999524763114, iteration: 263140
loss: 1.0010411739349365,grad_norm: 0.9999991685367098, iteration: 263141
loss: 0.9520170092582703,grad_norm: 0.9995858095785963, iteration: 263142
loss: 0.9591851830482483,grad_norm: 0.8412010450608742, iteration: 263143
loss: 0.9808456897735596,grad_norm: 0.9999990211806781, iteration: 263144
loss: 1.0183645486831665,grad_norm: 0.9388195599045391, iteration: 263145
loss: 1.0491899251937866,grad_norm: 0.9999990763104186, iteration: 263146
loss: 1.0160282850265503,grad_norm: 0.946938712620758, iteration: 263147
loss: 1.014963150024414,grad_norm: 0.9999992533790071, iteration: 263148
loss: 1.0604791641235352,grad_norm: 0.999998930537904, iteration: 263149
loss: 0.947853147983551,grad_norm: 0.9872829309280543, iteration: 263150
loss: 0.9914613366127014,grad_norm: 0.9999991583991787, iteration: 263151
loss: 0.9949053525924683,grad_norm: 0.9518882310427592, iteration: 263152
loss: 1.0186573266983032,grad_norm: 0.8800618869146307, iteration: 263153
loss: 1.0755358934402466,grad_norm: 0.9999997622585597, iteration: 263154
loss: 1.0102884769439697,grad_norm: 0.99999906021912, iteration: 263155
loss: 1.010278582572937,grad_norm: 0.8881406666155016, iteration: 263156
loss: 1.016530990600586,grad_norm: 0.9436337981416906, iteration: 263157
loss: 1.087742805480957,grad_norm: 0.9066906070377456, iteration: 263158
loss: 1.0145183801651,grad_norm: 0.9999993040347533, iteration: 263159
loss: 0.9969268441200256,grad_norm: 0.9690792570063758, iteration: 263160
loss: 0.987968921661377,grad_norm: 0.7498777698645349, iteration: 263161
loss: 1.0531748533248901,grad_norm: 0.994436828520488, iteration: 263162
loss: 0.9718890190124512,grad_norm: 0.9732538780346973, iteration: 263163
loss: 0.9931470155715942,grad_norm: 0.9999990823153266, iteration: 263164
loss: 1.017359733581543,grad_norm: 0.7642861997670399, iteration: 263165
loss: 1.0400829315185547,grad_norm: 0.9999998347613445, iteration: 263166
loss: 0.949133038520813,grad_norm: 0.8154051347466198, iteration: 263167
loss: 1.00015127658844,grad_norm: 0.8892259608861245, iteration: 263168
loss: 1.0019327402114868,grad_norm: 0.7494831860287356, iteration: 263169
loss: 1.0100475549697876,grad_norm: 0.90932179804485, iteration: 263170
loss: 0.9829155206680298,grad_norm: 0.918949298391537, iteration: 263171
loss: 1.0062711238861084,grad_norm: 0.8080064461343094, iteration: 263172
loss: 0.9742677211761475,grad_norm: 0.8565349810206577, iteration: 263173
loss: 0.9923756122589111,grad_norm: 0.7975422914345205, iteration: 263174
loss: 0.9977701306343079,grad_norm: 0.830306804414554, iteration: 263175
loss: 1.0203372240066528,grad_norm: 0.9066125105054164, iteration: 263176
loss: 1.005215048789978,grad_norm: 0.9999990158649027, iteration: 263177
loss: 1.044919729232788,grad_norm: 0.999999646972216, iteration: 263178
loss: 1.0023664236068726,grad_norm: 0.9999994661369699, iteration: 263179
loss: 0.9703710675239563,grad_norm: 0.9710678850168223, iteration: 263180
loss: 1.0361628532409668,grad_norm: 0.883665906338764, iteration: 263181
loss: 0.9897032380104065,grad_norm: 0.8597141934037045, iteration: 263182
loss: 1.0236899852752686,grad_norm: 0.9607846109385851, iteration: 263183
loss: 1.0799953937530518,grad_norm: 0.9999993693613546, iteration: 263184
loss: 0.9781665205955505,grad_norm: 0.9999992948229096, iteration: 263185
loss: 0.9751900434494019,grad_norm: 0.8498619437963503, iteration: 263186
loss: 1.0046182870864868,grad_norm: 0.7858298561254101, iteration: 263187
loss: 1.013046145439148,grad_norm: 0.9999995633153529, iteration: 263188
loss: 1.0249173641204834,grad_norm: 0.9688144633872977, iteration: 263189
loss: 0.9936155676841736,grad_norm: 0.9510526171796353, iteration: 263190
loss: 0.9887696504592896,grad_norm: 0.9739383356392963, iteration: 263191
loss: 1.0031930208206177,grad_norm: 0.8331538794347532, iteration: 263192
loss: 0.9880046844482422,grad_norm: 0.9383900156673135, iteration: 263193
loss: 0.9779956340789795,grad_norm: 0.8641584528275315, iteration: 263194
loss: 1.0245779752731323,grad_norm: 0.8609315221658768, iteration: 263195
loss: 1.0231481790542603,grad_norm: 0.8246090897857064, iteration: 263196
loss: 1.0116294622421265,grad_norm: 0.8386127677272441, iteration: 263197
loss: 1.074118971824646,grad_norm: 0.9999994195911289, iteration: 263198
loss: 1.0749839544296265,grad_norm: 0.9999999569024203, iteration: 263199
loss: 0.993574857711792,grad_norm: 0.8250518131429088, iteration: 263200
loss: 1.0196385383605957,grad_norm: 0.8973385480322364, iteration: 263201
loss: 1.0030630826950073,grad_norm: 0.9123189503441016, iteration: 263202
loss: 1.0002079010009766,grad_norm: 0.9562767628057671, iteration: 263203
loss: 0.992655873298645,grad_norm: 0.845264514104471, iteration: 263204
loss: 0.9919772744178772,grad_norm: 0.7918699769506478, iteration: 263205
loss: 0.9952247738838196,grad_norm: 0.9866279755735646, iteration: 263206
loss: 1.0077223777770996,grad_norm: 0.8767302698918132, iteration: 263207
loss: 1.004539132118225,grad_norm: 0.9999989697804819, iteration: 263208
loss: 0.9893380403518677,grad_norm: 0.8744242247588591, iteration: 263209
loss: 1.0280144214630127,grad_norm: 0.9999990614468145, iteration: 263210
loss: 1.0231322050094604,grad_norm: 0.8711735305653014, iteration: 263211
loss: 1.03759765625,grad_norm: 1.0000000047194093, iteration: 263212
loss: 0.9953402280807495,grad_norm: 0.8626566199064741, iteration: 263213
loss: 0.9771767854690552,grad_norm: 0.8529774255105128, iteration: 263214
loss: 0.9786627292633057,grad_norm: 0.9065938820810235, iteration: 263215
loss: 1.0757088661193848,grad_norm: 0.9999997274014407, iteration: 263216
loss: 1.0224844217300415,grad_norm: 0.9336744439346241, iteration: 263217
loss: 1.0350470542907715,grad_norm: 0.9999993352659907, iteration: 263218
loss: 1.0324523448944092,grad_norm: 0.8617943331359498, iteration: 263219
loss: 0.991813063621521,grad_norm: 0.809551354564686, iteration: 263220
loss: 1.0014945268630981,grad_norm: 0.9999991080345618, iteration: 263221
loss: 0.9805142283439636,grad_norm: 0.7350161933143413, iteration: 263222
loss: 1.010761022567749,grad_norm: 0.7718179936886415, iteration: 263223
loss: 0.9922955632209778,grad_norm: 0.8613557449620785, iteration: 263224
loss: 1.0127228498458862,grad_norm: 0.999999759338077, iteration: 263225
loss: 1.017922282218933,grad_norm: 0.9999990848230793, iteration: 263226
loss: 0.9902629852294922,grad_norm: 0.941400663568257, iteration: 263227
loss: 0.9950428009033203,grad_norm: 0.771762512241684, iteration: 263228
loss: 0.9954447746276855,grad_norm: 0.9589346596670933, iteration: 263229
loss: 0.9798639416694641,grad_norm: 0.9838625730806273, iteration: 263230
loss: 1.010154128074646,grad_norm: 0.8584622489384252, iteration: 263231
loss: 0.9681435227394104,grad_norm: 0.7903286217417372, iteration: 263232
loss: 1.0806647539138794,grad_norm: 0.9999992857923755, iteration: 263233
loss: 1.0907444953918457,grad_norm: 0.9999997168021374, iteration: 263234
loss: 1.0137587785720825,grad_norm: 0.8092259291485409, iteration: 263235
loss: 1.0056381225585938,grad_norm: 0.8747626016530781, iteration: 263236
loss: 0.9667839407920837,grad_norm: 0.8460951212238983, iteration: 263237
loss: 1.087885856628418,grad_norm: 0.9999993391687179, iteration: 263238
loss: 1.0178532600402832,grad_norm: 0.9999994134563212, iteration: 263239
loss: 1.0144214630126953,grad_norm: 0.9095127378298031, iteration: 263240
loss: 0.9977194666862488,grad_norm: 0.9181116148104869, iteration: 263241
loss: 0.999258279800415,grad_norm: 0.966962546203321, iteration: 263242
loss: 0.9916208386421204,grad_norm: 0.8377913724354737, iteration: 263243
loss: 1.0307022333145142,grad_norm: 0.9999992586494586, iteration: 263244
loss: 0.9779945611953735,grad_norm: 0.8602810335163895, iteration: 263245
loss: 0.9793740510940552,grad_norm: 0.9999993045316508, iteration: 263246
loss: 1.0252817869186401,grad_norm: 0.8935277216364884, iteration: 263247
loss: 0.9996006488800049,grad_norm: 0.8974652809013836, iteration: 263248
loss: 0.9712791442871094,grad_norm: 0.9876008730198873, iteration: 263249
loss: 0.9393479824066162,grad_norm: 0.9195479432345455, iteration: 263250
loss: 1.0138061046600342,grad_norm: 0.8491155155259387, iteration: 263251
loss: 1.0165081024169922,grad_norm: 0.7288614498806163, iteration: 263252
loss: 1.0249320268630981,grad_norm: 0.9598405438900897, iteration: 263253
loss: 1.0559433698654175,grad_norm: 0.8647074682451686, iteration: 263254
loss: 0.9825913906097412,grad_norm: 0.930245775557273, iteration: 263255
loss: 0.9891104102134705,grad_norm: 0.9999995485053599, iteration: 263256
loss: 1.0140273571014404,grad_norm: 0.9728753552851964, iteration: 263257
loss: 1.0617836713790894,grad_norm: 0.9482625279889965, iteration: 263258
loss: 1.0150115489959717,grad_norm: 0.9661416488498314, iteration: 263259
loss: 1.031651496887207,grad_norm: 0.9999992553435606, iteration: 263260
loss: 1.0104527473449707,grad_norm: 0.8899851574991634, iteration: 263261
loss: 1.080257773399353,grad_norm: 0.9999993485491229, iteration: 263262
loss: 1.0502654314041138,grad_norm: 0.9999992176681524, iteration: 263263
loss: 0.9997876882553101,grad_norm: 0.9672506826396543, iteration: 263264
loss: 0.996350109577179,grad_norm: 0.7872427483976944, iteration: 263265
loss: 1.004220962524414,grad_norm: 0.9999990486544765, iteration: 263266
loss: 0.9730736613273621,grad_norm: 0.8644884946021041, iteration: 263267
loss: 0.9868369102478027,grad_norm: 0.9999993114915607, iteration: 263268
loss: 0.988913893699646,grad_norm: 0.7890845299036494, iteration: 263269
loss: 1.0067477226257324,grad_norm: 0.9999992467061571, iteration: 263270
loss: 1.001250982284546,grad_norm: 0.8456276569287671, iteration: 263271
loss: 1.0519664287567139,grad_norm: 0.9349165685213044, iteration: 263272
loss: 1.0012786388397217,grad_norm: 0.91705188258258, iteration: 263273
loss: 1.0204178094863892,grad_norm: 0.819548103479879, iteration: 263274
loss: 1.003791093826294,grad_norm: 0.7755391702926683, iteration: 263275
loss: 1.0328700542449951,grad_norm: 0.9999992498561315, iteration: 263276
loss: 0.97127765417099,grad_norm: 0.8736768184908469, iteration: 263277
loss: 1.060135841369629,grad_norm: 0.9999992132930824, iteration: 263278
loss: 0.9910879135131836,grad_norm: 0.7719451065663888, iteration: 263279
loss: 1.0387190580368042,grad_norm: 0.99999912768327, iteration: 263280
loss: 1.05415940284729,grad_norm: 0.9626615328449174, iteration: 263281
loss: 1.0005438327789307,grad_norm: 0.9999991647511993, iteration: 263282
loss: 0.9874957799911499,grad_norm: 0.8195142627639935, iteration: 263283
loss: 1.0117124319076538,grad_norm: 0.9999992080411602, iteration: 263284
loss: 1.1266381740570068,grad_norm: 0.9999990861490707, iteration: 263285
loss: 0.9823446869850159,grad_norm: 0.999999152543384, iteration: 263286
loss: 0.9850180149078369,grad_norm: 0.9387232294388532, iteration: 263287
loss: 0.9841588139533997,grad_norm: 0.9011597812891455, iteration: 263288
loss: 0.9750881791114807,grad_norm: 0.8518017963604878, iteration: 263289
loss: 1.070266604423523,grad_norm: 0.9999997391308826, iteration: 263290
loss: 0.9988644123077393,grad_norm: 0.7263767666670063, iteration: 263291
loss: 1.0424604415893555,grad_norm: 0.9238640704213292, iteration: 263292
loss: 0.9838475584983826,grad_norm: 0.9848362672831568, iteration: 263293
loss: 0.969177782535553,grad_norm: 0.7637399112894344, iteration: 263294
loss: 0.9883838891983032,grad_norm: 0.9999993893624474, iteration: 263295
loss: 1.0115172863006592,grad_norm: 0.9999993602220104, iteration: 263296
loss: 1.086788296699524,grad_norm: 0.8318335959179196, iteration: 263297
loss: 0.9820996522903442,grad_norm: 0.7629130967558744, iteration: 263298
loss: 1.0031777620315552,grad_norm: 0.7481433675687832, iteration: 263299
loss: 1.0213594436645508,grad_norm: 0.921607478851801, iteration: 263300
loss: 0.9893902540206909,grad_norm: 0.9999989618663532, iteration: 263301
loss: 0.9996268153190613,grad_norm: 0.804506379133582, iteration: 263302
loss: 0.9915992021560669,grad_norm: 0.886109155032598, iteration: 263303
loss: 0.9728056788444519,grad_norm: 0.9397147045908378, iteration: 263304
loss: 1.0076804161071777,grad_norm: 0.9999993088512475, iteration: 263305
loss: 1.0003870725631714,grad_norm: 0.9100673626286196, iteration: 263306
loss: 1.0300023555755615,grad_norm: 0.9316736053658554, iteration: 263307
loss: 0.9676725268363953,grad_norm: 0.9999997112965475, iteration: 263308
loss: 0.997837245464325,grad_norm: 0.9894565175054941, iteration: 263309
loss: 0.9691284894943237,grad_norm: 0.7896594898078517, iteration: 263310
loss: 1.052474856376648,grad_norm: 0.9999991106031426, iteration: 263311
loss: 0.9954522252082825,grad_norm: 0.8142661371453062, iteration: 263312
loss: 1.0207539796829224,grad_norm: 0.938013329385152, iteration: 263313
loss: 1.0065748691558838,grad_norm: 0.9999991788592099, iteration: 263314
loss: 1.0109894275665283,grad_norm: 0.7570639501471454, iteration: 263315
loss: 0.9855379462242126,grad_norm: 0.9999990554694121, iteration: 263316
loss: 1.0344312191009521,grad_norm: 0.935425584733229, iteration: 263317
loss: 0.9762386679649353,grad_norm: 0.9999990456533219, iteration: 263318
loss: 1.0141608715057373,grad_norm: 0.9999991650595361, iteration: 263319
loss: 1.0052094459533691,grad_norm: 0.8284105229467216, iteration: 263320
loss: 0.9717916250228882,grad_norm: 0.999999220504279, iteration: 263321
loss: 1.009534478187561,grad_norm: 0.9999990993118234, iteration: 263322
loss: 1.0708727836608887,grad_norm: 0.9999992171618111, iteration: 263323
loss: 1.019577145576477,grad_norm: 0.9873110619201241, iteration: 263324
loss: 0.9639481902122498,grad_norm: 0.8553326823214749, iteration: 263325
loss: 0.9947552680969238,grad_norm: 0.9999991431614831, iteration: 263326
loss: 0.9838245511054993,grad_norm: 0.9126267410861295, iteration: 263327
loss: 1.1655943393707275,grad_norm: 0.9999995067979576, iteration: 263328
loss: 1.007278561592102,grad_norm: 0.9706718135821972, iteration: 263329
loss: 0.9826599955558777,grad_norm: 0.9698302571807293, iteration: 263330
loss: 1.0434656143188477,grad_norm: 0.9611070602296544, iteration: 263331
loss: 1.01826012134552,grad_norm: 0.8949418891812383, iteration: 263332
loss: 0.9715345501899719,grad_norm: 0.9109896819540102, iteration: 263333
loss: 0.9770994782447815,grad_norm: 0.9999990866678569, iteration: 263334
loss: 1.034419059753418,grad_norm: 0.9999998925929787, iteration: 263335
loss: 1.015868067741394,grad_norm: 0.9383803137609286, iteration: 263336
loss: 0.9886225461959839,grad_norm: 0.9437968450858351, iteration: 263337
loss: 0.9759895205497742,grad_norm: 0.9999992031920216, iteration: 263338
loss: 0.9803207516670227,grad_norm: 0.998150922469727, iteration: 263339
loss: 1.0039891004562378,grad_norm: 0.7424910786629237, iteration: 263340
loss: 1.0033289194107056,grad_norm: 0.9248750959235574, iteration: 263341
loss: 0.9884341359138489,grad_norm: 0.816734627657109, iteration: 263342
loss: 0.9747844934463501,grad_norm: 0.8444279462781044, iteration: 263343
loss: 0.9928522706031799,grad_norm: 0.9999990045254418, iteration: 263344
loss: 1.027289867401123,grad_norm: 0.844879755905918, iteration: 263345
loss: 0.979299783706665,grad_norm: 0.9999990518997911, iteration: 263346
loss: 1.0333023071289062,grad_norm: 0.7474509764813809, iteration: 263347
loss: 0.9661707282066345,grad_norm: 0.9895527717654038, iteration: 263348
loss: 1.0560145378112793,grad_norm: 0.984163002984715, iteration: 263349
loss: 0.9645316004753113,grad_norm: 0.8969398479640938, iteration: 263350
loss: 1.1177197694778442,grad_norm: 0.9999989645693645, iteration: 263351
loss: 1.0877264738082886,grad_norm: 0.999999095161224, iteration: 263352
loss: 0.9824111461639404,grad_norm: 0.8283730748739029, iteration: 263353
loss: 0.9605455994606018,grad_norm: 0.8621382813717824, iteration: 263354
loss: 1.008980631828308,grad_norm: 0.8483362041462799, iteration: 263355
loss: 0.9786481261253357,grad_norm: 0.9999990077483923, iteration: 263356
loss: 0.9473423361778259,grad_norm: 0.9712350744365492, iteration: 263357
loss: 0.9731419086456299,grad_norm: 0.8166813244800162, iteration: 263358
loss: 1.010185718536377,grad_norm: 0.8173666791586902, iteration: 263359
loss: 0.9792366623878479,grad_norm: 0.9100811456672179, iteration: 263360
loss: 1.0157817602157593,grad_norm: 0.9969089180648478, iteration: 263361
loss: 1.0010322332382202,grad_norm: 0.9601500002832511, iteration: 263362
loss: 0.973191499710083,grad_norm: 0.811710219334828, iteration: 263363
loss: 1.0044618844985962,grad_norm: 0.8434361548609197, iteration: 263364
loss: 0.9889465570449829,grad_norm: 0.8187867261051774, iteration: 263365
loss: 1.0135596990585327,grad_norm: 0.8530092899883152, iteration: 263366
loss: 0.9719898700714111,grad_norm: 0.7928200774275348, iteration: 263367
loss: 0.9864549040794373,grad_norm: 0.7839321379494371, iteration: 263368
loss: 0.9956616759300232,grad_norm: 0.6641551346238366, iteration: 263369
loss: 1.0190385580062866,grad_norm: 0.9999992145368527, iteration: 263370
loss: 1.0128263235092163,grad_norm: 0.9999997046151143, iteration: 263371
loss: 1.00089430809021,grad_norm: 0.9999989942189063, iteration: 263372
loss: 0.9715017080307007,grad_norm: 0.8488993110487825, iteration: 263373
loss: 1.0166003704071045,grad_norm: 0.9413439841228539, iteration: 263374
loss: 0.994895339012146,grad_norm: 0.8988872101497764, iteration: 263375
loss: 1.0047250986099243,grad_norm: 0.9164675349043352, iteration: 263376
loss: 0.9656470417976379,grad_norm: 0.8174301030287603, iteration: 263377
loss: 1.024983525276184,grad_norm: 0.7904790056467781, iteration: 263378
loss: 0.9963805675506592,grad_norm: 0.9999991043209423, iteration: 263379
loss: 1.0132509469985962,grad_norm: 0.9999990456537972, iteration: 263380
loss: 0.9790958166122437,grad_norm: 0.9999991276835404, iteration: 263381
loss: 0.9935732483863831,grad_norm: 0.9937401540002121, iteration: 263382
loss: 1.0000109672546387,grad_norm: 0.9936995109795795, iteration: 263383
loss: 1.0464341640472412,grad_norm: 0.9999992791801428, iteration: 263384
loss: 0.9987699389457703,grad_norm: 0.7849466785793611, iteration: 263385
loss: 1.0190153121948242,grad_norm: 0.9959175048607385, iteration: 263386
loss: 1.0425597429275513,grad_norm: 0.7835066211095749, iteration: 263387
loss: 1.0080969333648682,grad_norm: 0.9999991963202588, iteration: 263388
loss: 0.9801290035247803,grad_norm: 0.884004244511253, iteration: 263389
loss: 1.0172390937805176,grad_norm: 0.8630660380782377, iteration: 263390
loss: 1.024690866470337,grad_norm: 0.8542629871511451, iteration: 263391
loss: 0.998015284538269,grad_norm: 0.9534014644545986, iteration: 263392
loss: 1.0034794807434082,grad_norm: 0.8373852084211406, iteration: 263393
loss: 0.9783790111541748,grad_norm: 0.9537174643719869, iteration: 263394
loss: 0.9600768089294434,grad_norm: 0.9999991826627393, iteration: 263395
loss: 0.9940755367279053,grad_norm: 0.9400993493216513, iteration: 263396
loss: 1.0014442205429077,grad_norm: 0.9238703650785625, iteration: 263397
loss: 1.1462888717651367,grad_norm: 0.9693384084703183, iteration: 263398
loss: 1.0204572677612305,grad_norm: 0.9491418352653778, iteration: 263399
loss: 1.0034782886505127,grad_norm: 0.9327545333496139, iteration: 263400
loss: 0.9746558666229248,grad_norm: 0.9009743829614356, iteration: 263401
loss: 0.9985572695732117,grad_norm: 0.948695638771794, iteration: 263402
loss: 1.001218318939209,grad_norm: 0.8320066491212023, iteration: 263403
loss: 0.95864337682724,grad_norm: 0.7991929702020861, iteration: 263404
loss: 0.9918612837791443,grad_norm: 0.999999152864682, iteration: 263405
loss: 1.0089203119277954,grad_norm: 0.9999990875516831, iteration: 263406
loss: 0.9847444891929626,grad_norm: 0.9326380279155576, iteration: 263407
loss: 1.0299171209335327,grad_norm: 0.8639889360797917, iteration: 263408
loss: 0.9904699921607971,grad_norm: 0.8759812653841671, iteration: 263409
loss: 0.9496076107025146,grad_norm: 0.9521523587041238, iteration: 263410
loss: 1.0135722160339355,grad_norm: 0.9999993035760756, iteration: 263411
loss: 0.960972011089325,grad_norm: 0.8136523190280864, iteration: 263412
loss: 0.9927401542663574,grad_norm: 0.8199754409239115, iteration: 263413
loss: 0.9653695225715637,grad_norm: 0.9999992311505206, iteration: 263414
loss: 1.0006190538406372,grad_norm: 0.9782427524831745, iteration: 263415
loss: 1.0017443895339966,grad_norm: 0.9302596595522759, iteration: 263416
loss: 0.9983127117156982,grad_norm: 0.8300391302616527, iteration: 263417
loss: 0.98592609167099,grad_norm: 0.8970875153887528, iteration: 263418
loss: 0.9861082434654236,grad_norm: 0.8373894297216796, iteration: 263419
loss: 0.9939344525337219,grad_norm: 0.9475306885273999, iteration: 263420
loss: 0.9891760349273682,grad_norm: 0.8927136556184891, iteration: 263421
loss: 0.9620894193649292,grad_norm: 0.9958042437888844, iteration: 263422
loss: 1.009905219078064,grad_norm: 0.9487521136775667, iteration: 263423
loss: 1.0313770771026611,grad_norm: 0.9999990835052807, iteration: 263424
loss: 0.9972982406616211,grad_norm: 0.9430068993190999, iteration: 263425
loss: 1.00420343875885,grad_norm: 0.7754123777001621, iteration: 263426
loss: 1.019995093345642,grad_norm: 0.9153946406049079, iteration: 263427
loss: 1.0286765098571777,grad_norm: 0.999999058072823, iteration: 263428
loss: 0.9746224284172058,grad_norm: 0.9836022928098894, iteration: 263429
loss: 0.9985722899436951,grad_norm: 0.9999990395197367, iteration: 263430
loss: 0.965922474861145,grad_norm: 0.8229960605540985, iteration: 263431
loss: 0.9878856539726257,grad_norm: 0.8819952851462934, iteration: 263432
loss: 1.0206794738769531,grad_norm: 0.9433687594006195, iteration: 263433
loss: 1.0047800540924072,grad_norm: 0.980304882863399, iteration: 263434
loss: 0.9893317222595215,grad_norm: 0.9908426719197759, iteration: 263435
loss: 0.9787801504135132,grad_norm: 0.8419482875490004, iteration: 263436
loss: 1.0052008628845215,grad_norm: 0.9763055032224706, iteration: 263437
loss: 1.0336416959762573,grad_norm: 0.7213776290957102, iteration: 263438
loss: 1.081346869468689,grad_norm: 0.9122307559956261, iteration: 263439
loss: 0.9824310541152954,grad_norm: 0.807018054242171, iteration: 263440
loss: 0.9807490110397339,grad_norm: 0.7882597675259919, iteration: 263441
loss: 0.9957846999168396,grad_norm: 0.842429378420282, iteration: 263442
loss: 1.0178097486495972,grad_norm: 0.8687622439508639, iteration: 263443
loss: 1.0268242359161377,grad_norm: 0.8655542793681976, iteration: 263444
loss: 0.9964876174926758,grad_norm: 0.885180113110815, iteration: 263445
loss: 0.9725547432899475,grad_norm: 0.7881240819607698, iteration: 263446
loss: 0.993267834186554,grad_norm: 0.7181161192779898, iteration: 263447
loss: 1.0069622993469238,grad_norm: 0.849855229272738, iteration: 263448
loss: 1.0848008394241333,grad_norm: 0.9999992145974695, iteration: 263449
loss: 1.0616800785064697,grad_norm: 0.9999990514130456, iteration: 263450
loss: 1.0333592891693115,grad_norm: 0.9999990168510159, iteration: 263451
loss: 1.015219807624817,grad_norm: 0.8303150490734751, iteration: 263452
loss: 0.9991983771324158,grad_norm: 0.7768469027328336, iteration: 263453
loss: 1.0244301557540894,grad_norm: 0.9759576911048599, iteration: 263454
loss: 1.0558626651763916,grad_norm: 0.999998906629956, iteration: 263455
loss: 1.0301458835601807,grad_norm: 0.8456840431517241, iteration: 263456
loss: 0.9919490814208984,grad_norm: 0.9322708045893464, iteration: 263457
loss: 1.1219428777694702,grad_norm: 0.9999998268276707, iteration: 263458
loss: 0.95366370677948,grad_norm: 0.9999991620177542, iteration: 263459
loss: 0.9970695972442627,grad_norm: 0.8202434776725896, iteration: 263460
loss: 0.9855366349220276,grad_norm: 0.8020559382105102, iteration: 263461
loss: 0.9912015199661255,grad_norm: 0.9345867477496769, iteration: 263462
loss: 1.0156422853469849,grad_norm: 0.9999992741967193, iteration: 263463
loss: 1.023820400238037,grad_norm: 0.9260700146972937, iteration: 263464
loss: 1.010840654373169,grad_norm: 0.9999990653512326, iteration: 263465
loss: 0.9958620667457581,grad_norm: 0.8071687648737729, iteration: 263466
loss: 1.0052125453948975,grad_norm: 0.7816347976549647, iteration: 263467
loss: 1.0200308561325073,grad_norm: 0.9612462695506373, iteration: 263468
loss: 1.0052253007888794,grad_norm: 0.9999993366733716, iteration: 263469
loss: 1.0004249811172485,grad_norm: 0.7757147033567688, iteration: 263470
loss: 1.0018516778945923,grad_norm: 0.9999996964286954, iteration: 263471
loss: 0.9802097678184509,grad_norm: 0.9149251804128632, iteration: 263472
loss: 1.0331629514694214,grad_norm: 0.9999991176627439, iteration: 263473
loss: 1.0272427797317505,grad_norm: 0.910402743403758, iteration: 263474
loss: 0.9800019860267639,grad_norm: 0.942970028343671, iteration: 263475
loss: 1.0323306322097778,grad_norm: 0.9475407048124196, iteration: 263476
loss: 1.0204963684082031,grad_norm: 0.7947670469277719, iteration: 263477
loss: 1.0122265815734863,grad_norm: 0.9216042128646953, iteration: 263478
loss: 0.9724658131599426,grad_norm: 0.9999991456268426, iteration: 263479
loss: 1.0244724750518799,grad_norm: 0.7881611173458192, iteration: 263480
loss: 1.0138323307037354,grad_norm: 0.7996429021403617, iteration: 263481
loss: 1.0022058486938477,grad_norm: 0.9999990001161466, iteration: 263482
loss: 0.9761401414871216,grad_norm: 0.9999988740081913, iteration: 263483
loss: 1.004017949104309,grad_norm: 0.8225281095247527, iteration: 263484
loss: 1.001697301864624,grad_norm: 0.9909764555580701, iteration: 263485
loss: 1.01971435546875,grad_norm: 0.8511220763548372, iteration: 263486
loss: 1.0212098360061646,grad_norm: 0.8995415519719071, iteration: 263487
loss: 1.0232809782028198,grad_norm: 0.7244956998283724, iteration: 263488
loss: 0.9923282861709595,grad_norm: 0.8657671343977099, iteration: 263489
loss: 1.0311927795410156,grad_norm: 0.8887822528606573, iteration: 263490
loss: 1.0120306015014648,grad_norm: 0.982868589167073, iteration: 263491
loss: 1.0663939714431763,grad_norm: 0.9999992552874155, iteration: 263492
loss: 1.0641778707504272,grad_norm: 0.9999997029343158, iteration: 263493
loss: 1.0258538722991943,grad_norm: 0.9175868197600872, iteration: 263494
loss: 0.9777342677116394,grad_norm: 0.9072989287876898, iteration: 263495
loss: 1.0278410911560059,grad_norm: 0.780188312895552, iteration: 263496
loss: 1.0498329401016235,grad_norm: 0.9999996729118317, iteration: 263497
loss: 1.0382585525512695,grad_norm: 0.8892421465046914, iteration: 263498
loss: 0.9898496866226196,grad_norm: 0.8434926511124873, iteration: 263499
loss: 0.9846619367599487,grad_norm: 0.8815076880306951, iteration: 263500
loss: 1.0107688903808594,grad_norm: 0.9999992177206065, iteration: 263501
loss: 1.0230295658111572,grad_norm: 0.7678152099344987, iteration: 263502
loss: 1.0095809698104858,grad_norm: 0.8161937115455679, iteration: 263503
loss: 1.0475741624832153,grad_norm: 0.9999990585127101, iteration: 263504
loss: 1.0297187566757202,grad_norm: 0.7140877077833051, iteration: 263505
loss: 0.9962817430496216,grad_norm: 0.9999998268603463, iteration: 263506
loss: 1.0234973430633545,grad_norm: 0.8665163761287865, iteration: 263507
loss: 1.054341435432434,grad_norm: 0.8530766265537603, iteration: 263508
loss: 0.9892702698707581,grad_norm: 0.9078016720655088, iteration: 263509
loss: 0.9973174929618835,grad_norm: 0.8924088341128815, iteration: 263510
loss: 0.9941481947898865,grad_norm: 0.852204565833317, iteration: 263511
loss: 1.0054092407226562,grad_norm: 0.9351696391961695, iteration: 263512
loss: 0.9936684370040894,grad_norm: 0.7749497488963459, iteration: 263513
loss: 0.9919767379760742,grad_norm: 0.8882976534876693, iteration: 263514
loss: 0.981508731842041,grad_norm: 0.9053245049065177, iteration: 263515
loss: 1.0326474905014038,grad_norm: 0.8525603762585301, iteration: 263516
loss: 1.0332871675491333,grad_norm: 0.9999991033141379, iteration: 263517
loss: 0.98483806848526,grad_norm: 0.9896984497365082, iteration: 263518
loss: 0.9742307662963867,grad_norm: 0.7097112685878366, iteration: 263519
loss: 0.9884269833564758,grad_norm: 0.6093713429992944, iteration: 263520
loss: 1.0224642753601074,grad_norm: 0.9808864186276344, iteration: 263521
loss: 1.002121925354004,grad_norm: 0.758327251311519, iteration: 263522
loss: 0.988727867603302,grad_norm: 0.7655483206481629, iteration: 263523
loss: 0.9657657146453857,grad_norm: 0.915588008454578, iteration: 263524
loss: 0.9930199980735779,grad_norm: 0.7505636593343572, iteration: 263525
loss: 1.0336201190948486,grad_norm: 0.9732611907017821, iteration: 263526
loss: 0.9923094511032104,grad_norm: 0.9922628478982956, iteration: 263527
loss: 1.0297694206237793,grad_norm: 0.9999990953880029, iteration: 263528
loss: 0.9984843134880066,grad_norm: 0.9024276977870539, iteration: 263529
loss: 1.006142258644104,grad_norm: 0.9999990626980324, iteration: 263530
loss: 1.001526951789856,grad_norm: 0.929063733564413, iteration: 263531
loss: 1.0145082473754883,grad_norm: 0.8852698039513934, iteration: 263532
loss: 1.002854585647583,grad_norm: 0.9999991960743932, iteration: 263533
loss: 1.0371694564819336,grad_norm: 0.8865076950765278, iteration: 263534
loss: 0.9889025688171387,grad_norm: 0.7496643646210737, iteration: 263535
loss: 0.9906108379364014,grad_norm: 0.9202261057360361, iteration: 263536
loss: 1.029848337173462,grad_norm: 0.9999992011595605, iteration: 263537
loss: 1.0488619804382324,grad_norm: 0.9999990822751219, iteration: 263538
loss: 1.0050233602523804,grad_norm: 0.8818845678009481, iteration: 263539
loss: 0.9757392406463623,grad_norm: 0.9189788663941222, iteration: 263540
loss: 1.0083391666412354,grad_norm: 0.9999998520199056, iteration: 263541
loss: 0.9913493990898132,grad_norm: 0.8475378118629516, iteration: 263542
loss: 1.0070255994796753,grad_norm: 0.8637836494140217, iteration: 263543
loss: 1.01885986328125,grad_norm: 0.9999992432397741, iteration: 263544
loss: 0.9828733205795288,grad_norm: 0.8601646001507905, iteration: 263545
loss: 1.004004716873169,grad_norm: 0.8421648665507959, iteration: 263546
loss: 0.9967049956321716,grad_norm: 0.9855921079383891, iteration: 263547
loss: 1.0270092487335205,grad_norm: 0.8112253384907616, iteration: 263548
loss: 1.0213969945907593,grad_norm: 0.9164893718573125, iteration: 263549
loss: 1.0172479152679443,grad_norm: 0.8412327894905769, iteration: 263550
loss: 0.9676080942153931,grad_norm: 0.8743427596914267, iteration: 263551
loss: 1.0192508697509766,grad_norm: 0.9632414117574011, iteration: 263552
loss: 1.020077109336853,grad_norm: 0.9780968823152522, iteration: 263553
loss: 1.0803169012069702,grad_norm: 0.9721524975869816, iteration: 263554
loss: 1.072053074836731,grad_norm: 0.9999996314766145, iteration: 263555
loss: 0.9944251179695129,grad_norm: 0.9606547092324731, iteration: 263556
loss: 1.257315754890442,grad_norm: 0.9999992455180794, iteration: 263557
loss: 0.992231547832489,grad_norm: 0.8366795697255118, iteration: 263558
loss: 1.007623553276062,grad_norm: 0.8469121694746986, iteration: 263559
loss: 0.9878316521644592,grad_norm: 0.9475531017722133, iteration: 263560
loss: 1.0273369550704956,grad_norm: 0.880720905131943, iteration: 263561
loss: 1.1383261680603027,grad_norm: 0.9999995987335749, iteration: 263562
loss: 0.9759004712104797,grad_norm: 0.9567363668163816, iteration: 263563
loss: 0.9961795210838318,grad_norm: 0.8128601967140372, iteration: 263564
loss: 0.9952426552772522,grad_norm: 0.8591724723157472, iteration: 263565
loss: 0.9797576665878296,grad_norm: 0.8684172388164915, iteration: 263566
loss: 0.9924038648605347,grad_norm: 0.7228586154998559, iteration: 263567
loss: 0.9962310194969177,grad_norm: 0.8166252023057279, iteration: 263568
loss: 0.9900549054145813,grad_norm: 0.8857124113313753, iteration: 263569
loss: 1.1752322912216187,grad_norm: 0.9999997195987563, iteration: 263570
loss: 0.9917253851890564,grad_norm: 0.919375989126345, iteration: 263571
loss: 0.9913030862808228,grad_norm: 0.8109696131050758, iteration: 263572
loss: 1.0020403861999512,grad_norm: 0.8906456760959792, iteration: 263573
loss: 0.9810636043548584,grad_norm: 0.8831460333795765, iteration: 263574
loss: 1.0435220003128052,grad_norm: 0.9999991996145203, iteration: 263575
loss: 1.0088683366775513,grad_norm: 0.8526310401426069, iteration: 263576
loss: 1.0361377000808716,grad_norm: 0.9999991316563407, iteration: 263577
loss: 1.0646892786026,grad_norm: 0.9450347381304722, iteration: 263578
loss: 1.0982880592346191,grad_norm: 0.9031934906459693, iteration: 263579
loss: 1.0088551044464111,grad_norm: 0.8971133788971847, iteration: 263580
loss: 1.0370166301727295,grad_norm: 0.9999991517880619, iteration: 263581
loss: 1.0276812314987183,grad_norm: 0.7436715422432932, iteration: 263582
loss: 1.0061299800872803,grad_norm: 0.7954306094866385, iteration: 263583
loss: 1.0070743560791016,grad_norm: 0.8427357844472442, iteration: 263584
loss: 1.0193531513214111,grad_norm: 0.8301619131587397, iteration: 263585
loss: 1.017612338066101,grad_norm: 0.9450209773157298, iteration: 263586
loss: 1.0135029554367065,grad_norm: 0.9770691252106019, iteration: 263587
loss: 1.0009844303131104,grad_norm: 0.7438571057634735, iteration: 263588
loss: 0.9977914094924927,grad_norm: 0.9425740088561931, iteration: 263589
loss: 1.0033725500106812,grad_norm: 0.8394349244228309, iteration: 263590
loss: 1.028476357460022,grad_norm: 0.9719900953211793, iteration: 263591
loss: 1.0134726762771606,grad_norm: 0.8126105595740784, iteration: 263592
loss: 1.0044978857040405,grad_norm: 0.7088519947295998, iteration: 263593
loss: 1.0170252323150635,grad_norm: 0.9999992301792303, iteration: 263594
loss: 1.0047643184661865,grad_norm: 0.9020320974827254, iteration: 263595
loss: 1.0140968561172485,grad_norm: 0.9999991663818651, iteration: 263596
loss: 0.9810439944267273,grad_norm: 0.7748551708564003, iteration: 263597
loss: 0.9959800243377686,grad_norm: 0.8703073018004056, iteration: 263598
loss: 1.0249260663986206,grad_norm: 0.9145568974318469, iteration: 263599
loss: 1.0181478261947632,grad_norm: 0.8542640002056129, iteration: 263600
loss: 0.992005467414856,grad_norm: 0.8587533566937303, iteration: 263601
loss: 1.0091466903686523,grad_norm: 0.8884285832052021, iteration: 263602
loss: 1.021982192993164,grad_norm: 0.9999991230558128, iteration: 263603
loss: 1.0458906888961792,grad_norm: 0.9999997128380733, iteration: 263604
loss: 1.0078215599060059,grad_norm: 0.973088610643984, iteration: 263605
loss: 0.9981517195701599,grad_norm: 0.9999992193279292, iteration: 263606
loss: 0.9925875663757324,grad_norm: 0.7730293773029605, iteration: 263607
loss: 0.9973235130310059,grad_norm: 0.9999991658664061, iteration: 263608
loss: 1.0344129800796509,grad_norm: 0.9999999045626813, iteration: 263609
loss: 1.0008032321929932,grad_norm: 0.943719765037199, iteration: 263610
loss: 0.98841792345047,grad_norm: 0.9999989938723929, iteration: 263611
loss: 1.0027087926864624,grad_norm: 0.9187162722641864, iteration: 263612
loss: 0.9734928607940674,grad_norm: 0.872384791882559, iteration: 263613
loss: 1.0238590240478516,grad_norm: 0.9104901121538846, iteration: 263614
loss: 0.9856960773468018,grad_norm: 0.7890182797972105, iteration: 263615
loss: 0.9784852862358093,grad_norm: 0.7343766918726786, iteration: 263616
loss: 1.0115859508514404,grad_norm: 0.7889986247747366, iteration: 263617
loss: 1.0147372484207153,grad_norm: 0.8696540254668248, iteration: 263618
loss: 0.9983271956443787,grad_norm: 0.9999991627242284, iteration: 263619
loss: 1.0179564952850342,grad_norm: 0.9082439479275923, iteration: 263620
loss: 0.9941681623458862,grad_norm: 0.8154140580462321, iteration: 263621
loss: 1.0224398374557495,grad_norm: 0.8620247411156248, iteration: 263622
loss: 0.9949915409088135,grad_norm: 0.9999992273737728, iteration: 263623
loss: 1.0135854482650757,grad_norm: 0.8395195109812935, iteration: 263624
loss: 1.0162315368652344,grad_norm: 0.7728932255991723, iteration: 263625
loss: 0.980566680431366,grad_norm: 0.9391132482863382, iteration: 263626
loss: 0.9877244830131531,grad_norm: 0.861901703146878, iteration: 263627
loss: 1.0328454971313477,grad_norm: 0.8414982066992701, iteration: 263628
loss: 0.997774064540863,grad_norm: 0.9580902040441159, iteration: 263629
loss: 0.9931075572967529,grad_norm: 0.9999990398868008, iteration: 263630
loss: 1.022927165031433,grad_norm: 0.7621493020178397, iteration: 263631
loss: 1.002641201019287,grad_norm: 0.7527834663487415, iteration: 263632
loss: 0.9611768126487732,grad_norm: 0.8234009862647431, iteration: 263633
loss: 1.0259513854980469,grad_norm: 0.7867326948844584, iteration: 263634
loss: 1.0021116733551025,grad_norm: 0.9059794858110389, iteration: 263635
loss: 0.9772796630859375,grad_norm: 0.8734585206007106, iteration: 263636
loss: 1.0265214443206787,grad_norm: 0.9999991718037534, iteration: 263637
loss: 1.003231406211853,grad_norm: 0.9348654829045757, iteration: 263638
loss: 0.9741709232330322,grad_norm: 0.99999905996848, iteration: 263639
loss: 0.9865448474884033,grad_norm: 0.8422064689013723, iteration: 263640
loss: 0.9987753629684448,grad_norm: 0.7893953017820396, iteration: 263641
loss: 0.9551011919975281,grad_norm: 0.7900504361696042, iteration: 263642
loss: 0.9956248998641968,grad_norm: 0.8609893417123112, iteration: 263643
loss: 1.0806081295013428,grad_norm: 0.9999991724392951, iteration: 263644
loss: 0.9902117848396301,grad_norm: 0.8816091879785254, iteration: 263645
loss: 1.0241937637329102,grad_norm: 0.9103139094714575, iteration: 263646
loss: 0.9643054008483887,grad_norm: 0.9525423421835352, iteration: 263647
loss: 0.9798110127449036,grad_norm: 0.9999991074750344, iteration: 263648
loss: 0.9698936939239502,grad_norm: 0.9999991428850691, iteration: 263649
loss: 1.013137936592102,grad_norm: 0.9999990249162841, iteration: 263650
loss: 0.9891369938850403,grad_norm: 0.8701557767711982, iteration: 263651
loss: 1.031350016593933,grad_norm: 0.8596335948526, iteration: 263652
loss: 0.9923712015151978,grad_norm: 0.8338784305779671, iteration: 263653
loss: 1.0197086334228516,grad_norm: 0.9999989548281747, iteration: 263654
loss: 0.9931893348693848,grad_norm: 0.8205458894859956, iteration: 263655
loss: 1.0630850791931152,grad_norm: 0.8477647285339587, iteration: 263656
loss: 0.9812778234481812,grad_norm: 0.9759757407913773, iteration: 263657
loss: 1.024199366569519,grad_norm: 0.8444108533746286, iteration: 263658
loss: 0.9899486303329468,grad_norm: 0.8632970803017017, iteration: 263659
loss: 1.0054744482040405,grad_norm: 0.8373890609554515, iteration: 263660
loss: 1.018344521522522,grad_norm: 0.9885301108558233, iteration: 263661
loss: 0.9997383952140808,grad_norm: 0.8973772550665055, iteration: 263662
loss: 1.0030676126480103,grad_norm: 0.9436592295004369, iteration: 263663
loss: 0.9617397785186768,grad_norm: 0.7820188594685075, iteration: 263664
loss: 1.033787727355957,grad_norm: 0.8553701696473348, iteration: 263665
loss: 1.022943377494812,grad_norm: 0.7943372468245379, iteration: 263666
loss: 0.9711779952049255,grad_norm: 0.8077476814865088, iteration: 263667
loss: 1.001216173171997,grad_norm: 0.9561246231488506, iteration: 263668
loss: 0.9774684906005859,grad_norm: 0.9999991205671837, iteration: 263669
loss: 1.0938501358032227,grad_norm: 0.999999142708769, iteration: 263670
loss: 1.007886290550232,grad_norm: 0.999999092837983, iteration: 263671
loss: 1.0036619901657104,grad_norm: 0.9367774546927128, iteration: 263672
loss: 1.0654057264328003,grad_norm: 0.9999995073915172, iteration: 263673
loss: 1.0029412508010864,grad_norm: 0.8766136770380799, iteration: 263674
loss: 1.0543211698532104,grad_norm: 0.9999991431068205, iteration: 263675
loss: 1.0180186033248901,grad_norm: 0.862716452119704, iteration: 263676
loss: 1.0332427024841309,grad_norm: 0.9282566159765124, iteration: 263677
loss: 1.0978895425796509,grad_norm: 0.9624125573736236, iteration: 263678
loss: 0.9858871698379517,grad_norm: 0.9123930794235732, iteration: 263679
loss: 1.0072011947631836,grad_norm: 0.8922414153136794, iteration: 263680
loss: 1.0089133977890015,grad_norm: 0.900335336852738, iteration: 263681
loss: 1.0002801418304443,grad_norm: 0.8935865062472129, iteration: 263682
loss: 1.0456345081329346,grad_norm: 0.9999999936338024, iteration: 263683
loss: 1.0270150899887085,grad_norm: 0.9734230650863075, iteration: 263684
loss: 1.003104567527771,grad_norm: 0.831543104046675, iteration: 263685
loss: 0.991534411907196,grad_norm: 0.7797011582336436, iteration: 263686
loss: 0.9932087659835815,grad_norm: 0.8335119371617297, iteration: 263687
loss: 0.9714080095291138,grad_norm: 0.7904509307470322, iteration: 263688
loss: 0.9959068894386292,grad_norm: 0.9019703287135168, iteration: 263689
loss: 1.0105046033859253,grad_norm: 0.8937647595791232, iteration: 263690
loss: 1.0167862176895142,grad_norm: 0.9999991805289211, iteration: 263691
loss: 1.0103111267089844,grad_norm: 0.9538447200298075, iteration: 263692
loss: 0.97420734167099,grad_norm: 0.9999991171366522, iteration: 263693
loss: 0.9895435571670532,grad_norm: 0.896040901411308, iteration: 263694
loss: 1.0208956003189087,grad_norm: 0.9879400193711569, iteration: 263695
loss: 0.9883350729942322,grad_norm: 0.99529305620288, iteration: 263696
loss: 0.9588269591331482,grad_norm: 0.8212708694470247, iteration: 263697
loss: 0.9791170358657837,grad_norm: 0.8838432171792816, iteration: 263698
loss: 1.0775675773620605,grad_norm: 0.9358208450811695, iteration: 263699
loss: 1.086916446685791,grad_norm: 0.9999995732322003, iteration: 263700
loss: 0.951305091381073,grad_norm: 0.9196252045115175, iteration: 263701
loss: 0.9709226489067078,grad_norm: 0.7392055246206335, iteration: 263702
loss: 1.0116453170776367,grad_norm: 0.8303423132223191, iteration: 263703
loss: 1.0066465139389038,grad_norm: 0.9999991005880178, iteration: 263704
loss: 0.9829380512237549,grad_norm: 0.999999059810894, iteration: 263705
loss: 1.025010108947754,grad_norm: 0.6887795334933465, iteration: 263706
loss: 0.9809311032295227,grad_norm: 0.9634122062692942, iteration: 263707
loss: 0.9660238027572632,grad_norm: 0.8493873679816162, iteration: 263708
loss: 0.9910954236984253,grad_norm: 0.8856436095903296, iteration: 263709
loss: 1.014148235321045,grad_norm: 0.7031279828483344, iteration: 263710
loss: 1.0221030712127686,grad_norm: 0.9999991557124052, iteration: 263711
loss: 0.9880189299583435,grad_norm: 0.854124257533949, iteration: 263712
loss: 1.0428274869918823,grad_norm: 0.999999067770996, iteration: 263713
loss: 1.0634797811508179,grad_norm: 0.9999994507677401, iteration: 263714
loss: 0.9748703837394714,grad_norm: 0.8486906245832094, iteration: 263715
loss: 1.0151251554489136,grad_norm: 0.9999990608085217, iteration: 263716
loss: 1.0050419569015503,grad_norm: 0.9673581524791512, iteration: 263717
loss: 0.9959641098976135,grad_norm: 0.8112530000443513, iteration: 263718
loss: 1.0650484561920166,grad_norm: 0.999999134247157, iteration: 263719
loss: 0.9871990084648132,grad_norm: 0.8617198986655087, iteration: 263720
loss: 1.0123562812805176,grad_norm: 0.7551306992969865, iteration: 263721
loss: 1.0278035402297974,grad_norm: 0.8895574273933897, iteration: 263722
loss: 1.0001155138015747,grad_norm: 0.8697216749898614, iteration: 263723
loss: 0.9392813444137573,grad_norm: 0.909818783045556, iteration: 263724
loss: 1.0240243673324585,grad_norm: 0.9999990626429569, iteration: 263725
loss: 1.044471025466919,grad_norm: 0.855202840921383, iteration: 263726
loss: 1.0020666122436523,grad_norm: 0.8953333933328558, iteration: 263727
loss: 0.9935721755027771,grad_norm: 0.872868420839366, iteration: 263728
loss: 1.001266360282898,grad_norm: 0.8694742482122492, iteration: 263729
loss: 0.9588465094566345,grad_norm: 0.8695205881692989, iteration: 263730
loss: 0.9735642075538635,grad_norm: 0.7952434011559779, iteration: 263731
loss: 1.0328788757324219,grad_norm: 0.972650965473517, iteration: 263732
loss: 1.0056909322738647,grad_norm: 0.9999991668959207, iteration: 263733
loss: 1.0488258600234985,grad_norm: 0.978777470823612, iteration: 263734
loss: 1.0047962665557861,grad_norm: 0.8261019296353678, iteration: 263735
loss: 1.0217909812927246,grad_norm: 0.7854110243470582, iteration: 263736
loss: 1.0229772329330444,grad_norm: 0.8284325805249563, iteration: 263737
loss: 0.9933963418006897,grad_norm: 0.8814182212249685, iteration: 263738
loss: 0.9919982552528381,grad_norm: 0.9999996374499635, iteration: 263739
loss: 1.0031722784042358,grad_norm: 0.9818998091346345, iteration: 263740
loss: 1.0211594104766846,grad_norm: 0.762407849450825, iteration: 263741
loss: 1.0916868448257446,grad_norm: 1.00000002621711, iteration: 263742
loss: 1.0125924348831177,grad_norm: 0.9999989872412365, iteration: 263743
loss: 1.0756651163101196,grad_norm: 0.9999993653073241, iteration: 263744
loss: 1.049875259399414,grad_norm: 0.9999999189134231, iteration: 263745
loss: 0.9782647490501404,grad_norm: 0.8680884880570555, iteration: 263746
loss: 0.9945399165153503,grad_norm: 0.8490500661863037, iteration: 263747
loss: 0.9781209826469421,grad_norm: 0.9392397063100109, iteration: 263748
loss: 0.9631284475326538,grad_norm: 0.8689575516090229, iteration: 263749
loss: 0.9856482744216919,grad_norm: 0.9062521946944196, iteration: 263750
loss: 0.9876356720924377,grad_norm: 0.8248142011064643, iteration: 263751
loss: 1.0035005807876587,grad_norm: 0.766007343292406, iteration: 263752
loss: 0.9916591048240662,grad_norm: 0.9999990496111016, iteration: 263753
loss: 0.9860658049583435,grad_norm: 0.9999990370286769, iteration: 263754
loss: 1.0012481212615967,grad_norm: 0.9999991816215552, iteration: 263755
loss: 0.9630979299545288,grad_norm: 0.8572101523756923, iteration: 263756
loss: 1.0049227476119995,grad_norm: 0.9075856212373816, iteration: 263757
loss: 1.005291223526001,grad_norm: 0.948231086167225, iteration: 263758
loss: 0.9905452728271484,grad_norm: 0.898038881155278, iteration: 263759
loss: 0.9885627031326294,grad_norm: 0.8407529192098547, iteration: 263760
loss: 1.0055369138717651,grad_norm: 0.9999989272209312, iteration: 263761
loss: 0.9952962398529053,grad_norm: 0.9999994928358757, iteration: 263762
loss: 0.9896615147590637,grad_norm: 0.8608432209965207, iteration: 263763
loss: 1.0019426345825195,grad_norm: 0.9999989552309971, iteration: 263764
loss: 1.0022332668304443,grad_norm: 0.8623774960055653, iteration: 263765
loss: 0.9961952567100525,grad_norm: 0.8912933216825814, iteration: 263766
loss: 1.0058598518371582,grad_norm: 0.9999990630682523, iteration: 263767
loss: 1.0111554861068726,grad_norm: 0.9999997301649234, iteration: 263768
loss: 0.9674333333969116,grad_norm: 0.7895776620484702, iteration: 263769
loss: 0.9385513663291931,grad_norm: 0.9652471356014207, iteration: 263770
loss: 1.0509520769119263,grad_norm: 0.9638010566074315, iteration: 263771
loss: 1.0485502481460571,grad_norm: 0.9999992484443188, iteration: 263772
loss: 0.9863469004631042,grad_norm: 0.8905635161255954, iteration: 263773
loss: 1.0170133113861084,grad_norm: 0.9017034034598154, iteration: 263774
loss: 1.0102241039276123,grad_norm: 0.87820287572, iteration: 263775
loss: 1.000038504600525,grad_norm: 0.999999291856308, iteration: 263776
loss: 0.9899333119392395,grad_norm: 0.9928260228812069, iteration: 263777
loss: 0.9940536618232727,grad_norm: 0.9179862635472184, iteration: 263778
loss: 1.0215502977371216,grad_norm: 0.7766588115895815, iteration: 263779
loss: 1.0345443487167358,grad_norm: 0.8306445647524601, iteration: 263780
loss: 1.0146465301513672,grad_norm: 0.9446040567170041, iteration: 263781
loss: 1.0012177228927612,grad_norm: 0.9999992119964602, iteration: 263782
loss: 1.0149167776107788,grad_norm: 0.8561529673270425, iteration: 263783
loss: 0.9986890554428101,grad_norm: 0.9999991699945562, iteration: 263784
loss: 1.0441721677780151,grad_norm: 0.8641954821857482, iteration: 263785
loss: 1.0083658695220947,grad_norm: 0.8450365984473984, iteration: 263786
loss: 1.04896879196167,grad_norm: 0.9999995916442777, iteration: 263787
loss: 0.984192967414856,grad_norm: 0.8750183999644133, iteration: 263788
loss: 1.0287487506866455,grad_norm: 0.9456104420433441, iteration: 263789
loss: 0.9829333424568176,grad_norm: 0.8093302509471817, iteration: 263790
loss: 1.003801703453064,grad_norm: 0.82700094303627, iteration: 263791
loss: 1.0685330629348755,grad_norm: 0.9999999637335586, iteration: 263792
loss: 0.99613356590271,grad_norm: 0.8904786177805974, iteration: 263793
loss: 0.9610731601715088,grad_norm: 0.8341849093670569, iteration: 263794
loss: 0.970504105091095,grad_norm: 0.7770480054441282, iteration: 263795
loss: 0.9858102798461914,grad_norm: 0.9999992146178909, iteration: 263796
loss: 0.9960145354270935,grad_norm: 0.9294495302556806, iteration: 263797
loss: 1.0073585510253906,grad_norm: 0.9999991662153895, iteration: 263798
loss: 0.9913846850395203,grad_norm: 0.837296133362, iteration: 263799
loss: 0.9834016561508179,grad_norm: 0.8563537497685704, iteration: 263800
loss: 0.9834380745887756,grad_norm: 0.977196575099618, iteration: 263801
loss: 1.0115150213241577,grad_norm: 0.8389328014152089, iteration: 263802
loss: 1.0451463460922241,grad_norm: 0.9373714673788469, iteration: 263803
loss: 0.9733336567878723,grad_norm: 0.8465232026258896, iteration: 263804
loss: 1.017337441444397,grad_norm: 0.9999992419109045, iteration: 263805
loss: 0.9899651408195496,grad_norm: 0.888941736841336, iteration: 263806
loss: 1.13405442237854,grad_norm: 0.9999992734297211, iteration: 263807
loss: 0.9941221475601196,grad_norm: 0.8905534632659527, iteration: 263808
loss: 0.9828538298606873,grad_norm: 0.9999992412381783, iteration: 263809
loss: 1.0213639736175537,grad_norm: 0.9693006830259401, iteration: 263810
loss: 1.0166338682174683,grad_norm: 0.9999991472884303, iteration: 263811
loss: 1.0013136863708496,grad_norm: 0.999999090026754, iteration: 263812
loss: 0.995909571647644,grad_norm: 0.99999914006779, iteration: 263813
loss: 0.995221734046936,grad_norm: 0.7713516253851738, iteration: 263814
loss: 0.9734745621681213,grad_norm: 0.9245145559085162, iteration: 263815
loss: 0.9835987687110901,grad_norm: 0.9999991258791666, iteration: 263816
loss: 1.0001987218856812,grad_norm: 0.9500770760997588, iteration: 263817
loss: 1.020569086074829,grad_norm: 0.8575906176876026, iteration: 263818
loss: 1.0037811994552612,grad_norm: 0.895139128756273, iteration: 263819
loss: 1.0007349252700806,grad_norm: 0.7860682047598478, iteration: 263820
loss: 1.0092881917953491,grad_norm: 0.9999990881334548, iteration: 263821
loss: 0.9907081127166748,grad_norm: 0.8917180354483246, iteration: 263822
loss: 0.945914089679718,grad_norm: 0.8363758661605858, iteration: 263823
loss: 0.9697063565254211,grad_norm: 0.8352800405830882, iteration: 263824
loss: 0.9976689219474792,grad_norm: 0.8470856231496635, iteration: 263825
loss: 1.0627939701080322,grad_norm: 0.9999994694605305, iteration: 263826
loss: 1.103798747062683,grad_norm: 0.9999998667887133, iteration: 263827
loss: 1.026406168937683,grad_norm: 0.9999991528149675, iteration: 263828
loss: 0.9862766861915588,grad_norm: 0.9999991622005113, iteration: 263829
loss: 0.9993313550949097,grad_norm: 0.8861586017642633, iteration: 263830
loss: 0.9881159663200378,grad_norm: 0.8386148146069871, iteration: 263831
loss: 1.0459399223327637,grad_norm: 0.9787223291958785, iteration: 263832
loss: 0.980859637260437,grad_norm: 0.8996774935868389, iteration: 263833
loss: 0.9777592420578003,grad_norm: 0.8147601745757379, iteration: 263834
loss: 1.0071614980697632,grad_norm: 0.7562438527519794, iteration: 263835
loss: 1.021130084991455,grad_norm: 0.9982731654974231, iteration: 263836
loss: 1.0261445045471191,grad_norm: 0.9999990255500518, iteration: 263837
loss: 0.9906638264656067,grad_norm: 0.7748325351731129, iteration: 263838
loss: 1.0008659362792969,grad_norm: 0.9999990160869697, iteration: 263839
loss: 0.958376944065094,grad_norm: 0.8383031400842896, iteration: 263840
loss: 1.0297589302062988,grad_norm: 0.8261693854158437, iteration: 263841
loss: 0.9520794153213501,grad_norm: 0.8068139572862788, iteration: 263842
loss: 1.0233632326126099,grad_norm: 0.999999483016013, iteration: 263843
loss: 1.0295993089675903,grad_norm: 0.7842035873037219, iteration: 263844
loss: 1.0026830434799194,grad_norm: 0.9999989544409342, iteration: 263845
loss: 0.9762048125267029,grad_norm: 0.9999996613546829, iteration: 263846
loss: 0.9977554082870483,grad_norm: 0.7954870801946464, iteration: 263847
loss: 1.039687991142273,grad_norm: 0.9999990901416516, iteration: 263848
loss: 0.9978704452514648,grad_norm: 0.7299921298618954, iteration: 263849
loss: 0.9728763699531555,grad_norm: 0.8707506076814723, iteration: 263850
loss: 1.1019366979599,grad_norm: 0.9999998713782701, iteration: 263851
loss: 0.9650145173072815,grad_norm: 0.983027499560413, iteration: 263852
loss: 1.0745435953140259,grad_norm: 0.9999992494315009, iteration: 263853
loss: 1.0241637229919434,grad_norm: 0.9999991876333941, iteration: 263854
loss: 0.9752035140991211,grad_norm: 0.8624991442975095, iteration: 263855
loss: 1.0051287412643433,grad_norm: 0.9999991064874726, iteration: 263856
loss: 0.9701639413833618,grad_norm: 0.9999990967825679, iteration: 263857
loss: 1.0136163234710693,grad_norm: 0.9787076256031921, iteration: 263858
loss: 1.0098828077316284,grad_norm: 0.9999993023657255, iteration: 263859
loss: 1.041824460029602,grad_norm: 0.8499155690541604, iteration: 263860
loss: 1.0248481035232544,grad_norm: 0.8508436955343511, iteration: 263861
loss: 1.03513765335083,grad_norm: 0.9999999576762936, iteration: 263862
loss: 1.0186004638671875,grad_norm: 0.9999990363183479, iteration: 263863
loss: 0.9982245564460754,grad_norm: 0.9529488146840995, iteration: 263864
loss: 1.0037407875061035,grad_norm: 0.8650417053409922, iteration: 263865
loss: 1.0302743911743164,grad_norm: 0.9999994185891841, iteration: 263866
loss: 0.9628221392631531,grad_norm: 0.9430579157735673, iteration: 263867
loss: 0.9757564663887024,grad_norm: 0.8074535588009315, iteration: 263868
loss: 0.9855881333351135,grad_norm: 0.9036206282840377, iteration: 263869
loss: 1.0347647666931152,grad_norm: 0.9146104846972116, iteration: 263870
loss: 0.9691634774208069,grad_norm: 0.9359100159130842, iteration: 263871
loss: 1.0735538005828857,grad_norm: 0.9999992268466273, iteration: 263872
loss: 0.9790002107620239,grad_norm: 0.9503025859212099, iteration: 263873
loss: 0.9847756624221802,grad_norm: 0.7511456175725589, iteration: 263874
loss: 1.0324856042861938,grad_norm: 0.999999375774302, iteration: 263875
loss: 0.986179769039154,grad_norm: 0.9703682254743785, iteration: 263876
loss: 1.04075288772583,grad_norm: 0.9999990691604133, iteration: 263877
loss: 1.0084481239318848,grad_norm: 0.7669676386228751, iteration: 263878
loss: 1.000423789024353,grad_norm: 0.7910092665753241, iteration: 263879
loss: 0.9701735973358154,grad_norm: 0.8903414527116049, iteration: 263880
loss: 1.0295735597610474,grad_norm: 0.9999991949892189, iteration: 263881
loss: 0.97120201587677,grad_norm: 0.8264679350310182, iteration: 263882
loss: 0.9550033807754517,grad_norm: 0.9754274057068738, iteration: 263883
loss: 1.0319575071334839,grad_norm: 0.837864479013414, iteration: 263884
loss: 1.0173394680023193,grad_norm: 0.9999995109259331, iteration: 263885
loss: 0.9598444700241089,grad_norm: 0.8925638720297855, iteration: 263886
loss: 0.9937748908996582,grad_norm: 0.9256191737178824, iteration: 263887
loss: 1.0055274963378906,grad_norm: 0.9086608465043553, iteration: 263888
loss: 0.9892739653587341,grad_norm: 0.8445730548262601, iteration: 263889
loss: 1.0149143934249878,grad_norm: 0.9999998745246025, iteration: 263890
loss: 0.9995954632759094,grad_norm: 0.9999995945214721, iteration: 263891
loss: 0.9848523736000061,grad_norm: 0.8133944804446517, iteration: 263892
loss: 0.9788105487823486,grad_norm: 0.7959294704041728, iteration: 263893
loss: 1.0057454109191895,grad_norm: 0.999999898873706, iteration: 263894
loss: 1.0187739133834839,grad_norm: 0.9480096511416789, iteration: 263895
loss: 1.032696008682251,grad_norm: 0.9999995145285858, iteration: 263896
loss: 1.0241427421569824,grad_norm: 0.7475171092904334, iteration: 263897
loss: 1.018261194229126,grad_norm: 0.8452959039686059, iteration: 263898
loss: 1.0077012777328491,grad_norm: 0.9994498493247327, iteration: 263899
loss: 0.9917030930519104,grad_norm: 0.7406425136356527, iteration: 263900
loss: 1.028143286705017,grad_norm: 0.9999992858328665, iteration: 263901
loss: 0.9647790193557739,grad_norm: 0.965575912601443, iteration: 263902
loss: 0.999786376953125,grad_norm: 0.9013992321958441, iteration: 263903
loss: 0.9622577428817749,grad_norm: 0.745557563212391, iteration: 263904
loss: 0.9773057103157043,grad_norm: 0.7563931562895868, iteration: 263905
loss: 1.0120458602905273,grad_norm: 0.9468519382650552, iteration: 263906
loss: 0.9814866781234741,grad_norm: 0.7942913572897784, iteration: 263907
loss: 1.0378419160842896,grad_norm: 0.9999993659561883, iteration: 263908
loss: 1.0153309106826782,grad_norm: 0.9999991115385638, iteration: 263909
loss: 1.0059430599212646,grad_norm: 0.9999992000561176, iteration: 263910
loss: 0.9789830446243286,grad_norm: 0.9269151182037254, iteration: 263911
loss: 1.0086549520492554,grad_norm: 0.9255803483651657, iteration: 263912
loss: 1.0038390159606934,grad_norm: 0.9999991489538327, iteration: 263913
loss: 1.0135769844055176,grad_norm: 0.8738156650715085, iteration: 263914
loss: 1.0153008699417114,grad_norm: 0.8631711465747328, iteration: 263915
loss: 0.9667225480079651,grad_norm: 0.8571373483713856, iteration: 263916
loss: 1.0011711120605469,grad_norm: 0.9097254063797355, iteration: 263917
loss: 1.0085906982421875,grad_norm: 0.7538893988445958, iteration: 263918
loss: 1.0635911226272583,grad_norm: 0.9999998377845435, iteration: 263919
loss: 1.0600948333740234,grad_norm: 0.9999991352857234, iteration: 263920
loss: 0.9866224527359009,grad_norm: 0.8589101701219994, iteration: 263921
loss: 0.9927852749824524,grad_norm: 0.9999993130206191, iteration: 263922
loss: 0.9831308722496033,grad_norm: 0.9999990646233737, iteration: 263923
loss: 1.0258079767227173,grad_norm: 0.9999992507944466, iteration: 263924
loss: 1.014298677444458,grad_norm: 0.9672971011123184, iteration: 263925
loss: 1.0264003276824951,grad_norm: 0.8592849206842306, iteration: 263926
loss: 1.0000801086425781,grad_norm: 0.9999990957725278, iteration: 263927
loss: 0.9700220227241516,grad_norm: 0.9258048971597065, iteration: 263928
loss: 1.0138832330703735,grad_norm: 0.9492035594076359, iteration: 263929
loss: 0.9781013131141663,grad_norm: 0.9999996036784347, iteration: 263930
loss: 0.9721821546554565,grad_norm: 0.9999992778502236, iteration: 263931
loss: 0.9596269130706787,grad_norm: 0.9479933200414427, iteration: 263932
loss: 1.008888602256775,grad_norm: 0.8976461785809573, iteration: 263933
loss: 0.9779570698738098,grad_norm: 0.9999990128589156, iteration: 263934
loss: 1.0015789270401,grad_norm: 0.9999993835649322, iteration: 263935
loss: 1.0031938552856445,grad_norm: 0.8996137181861202, iteration: 263936
loss: 1.0334925651550293,grad_norm: 0.9999993083435296, iteration: 263937
loss: 1.0148658752441406,grad_norm: 0.9999989847509101, iteration: 263938
loss: 0.9542086720466614,grad_norm: 0.9457928933808795, iteration: 263939
loss: 0.9731253981590271,grad_norm: 0.7734145265127598, iteration: 263940
loss: 0.9688789248466492,grad_norm: 0.9545189376479422, iteration: 263941
loss: 0.9829235076904297,grad_norm: 0.9351989068801397, iteration: 263942
loss: 1.002529263496399,grad_norm: 0.9979893472299878, iteration: 263943
loss: 1.0136507749557495,grad_norm: 0.999999110184108, iteration: 263944
loss: 0.9861310720443726,grad_norm: 0.8724010541708755, iteration: 263945
loss: 1.0247012376785278,grad_norm: 0.9999993945931187, iteration: 263946
loss: 1.0514596700668335,grad_norm: 0.9999990584792794, iteration: 263947
loss: 1.0061075687408447,grad_norm: 0.9999991610089037, iteration: 263948
loss: 0.9736889600753784,grad_norm: 0.9999996060616437, iteration: 263949
loss: 1.0018818378448486,grad_norm: 0.7915918414850172, iteration: 263950
loss: 0.9940965175628662,grad_norm: 0.9999994078695583, iteration: 263951
loss: 0.9357019662857056,grad_norm: 0.931078798896646, iteration: 263952
loss: 1.0078946352005005,grad_norm: 0.9978070345899273, iteration: 263953
loss: 1.038468599319458,grad_norm: 0.9427729076554198, iteration: 263954
loss: 0.9880619645118713,grad_norm: 0.9898159700671031, iteration: 263955
loss: 1.015964150428772,grad_norm: 0.8679763536120298, iteration: 263956
loss: 1.0570378303527832,grad_norm: 0.9999994919185607, iteration: 263957
loss: 1.0026153326034546,grad_norm: 0.8546104625758408, iteration: 263958
loss: 0.9780616760253906,grad_norm: 0.8481862089219692, iteration: 263959
loss: 1.057568073272705,grad_norm: 0.9231311887852385, iteration: 263960
loss: 1.0294421911239624,grad_norm: 0.8015504380483403, iteration: 263961
loss: 1.0185015201568604,grad_norm: 0.8752081402808013, iteration: 263962
loss: 1.021619439125061,grad_norm: 0.8914823856299573, iteration: 263963
loss: 0.9810203313827515,grad_norm: 0.9999991215641678, iteration: 263964
loss: 0.9955245852470398,grad_norm: 0.9809669550770116, iteration: 263965
loss: 1.0584759712219238,grad_norm: 0.8390007900947352, iteration: 263966
loss: 0.9905184507369995,grad_norm: 0.9141384273160428, iteration: 263967
loss: 0.9654667377471924,grad_norm: 0.7409721777498622, iteration: 263968
loss: 1.0696412324905396,grad_norm: 0.9999992232403573, iteration: 263969
loss: 1.011589765548706,grad_norm: 0.9135929468751232, iteration: 263970
loss: 1.0284985303878784,grad_norm: 0.9999996020693096, iteration: 263971
loss: 0.9913021326065063,grad_norm: 0.9136909139051279, iteration: 263972
loss: 1.0028727054595947,grad_norm: 0.8707470447904978, iteration: 263973
loss: 0.9960833191871643,grad_norm: 0.9999990729721567, iteration: 263974
loss: 0.9965521097183228,grad_norm: 0.8830098491151007, iteration: 263975
loss: 0.9806658029556274,grad_norm: 0.8394438312630633, iteration: 263976
loss: 1.086506724357605,grad_norm: 0.9999992162137297, iteration: 263977
loss: 1.0134834051132202,grad_norm: 0.7732165926399424, iteration: 263978
loss: 1.0350927114486694,grad_norm: 0.9999994945264332, iteration: 263979
loss: 0.9780989289283752,grad_norm: 0.8671577231075231, iteration: 263980
loss: 1.008996844291687,grad_norm: 0.9999991783534495, iteration: 263981
loss: 0.9966843724250793,grad_norm: 0.9028495249962397, iteration: 263982
loss: 0.9940280318260193,grad_norm: 0.9090403665870571, iteration: 263983
loss: 1.0093899965286255,grad_norm: 0.9885814127541437, iteration: 263984
loss: 0.9939639568328857,grad_norm: 0.8341089692964618, iteration: 263985
loss: 1.0546425580978394,grad_norm: 0.8410571949344383, iteration: 263986
loss: 0.9805198907852173,grad_norm: 0.7712226855103893, iteration: 263987
loss: 1.0245178937911987,grad_norm: 0.9321746055785445, iteration: 263988
loss: 1.0943875312805176,grad_norm: 0.9999990864218442, iteration: 263989
loss: 0.9965006113052368,grad_norm: 0.794064345908904, iteration: 263990
loss: 0.9987768530845642,grad_norm: 0.729364551176332, iteration: 263991
loss: 0.9661522507667542,grad_norm: 0.9848063968571874, iteration: 263992
loss: 1.0210142135620117,grad_norm: 0.9999989745445577, iteration: 263993
loss: 1.0219260454177856,grad_norm: 0.8319588416918746, iteration: 263994
loss: 0.979522705078125,grad_norm: 0.997531910622513, iteration: 263995
loss: 1.013247013092041,grad_norm: 0.9092979660941521, iteration: 263996
loss: 0.9303295016288757,grad_norm: 0.96397752564926, iteration: 263997
loss: 1.0279958248138428,grad_norm: 0.985461940640223, iteration: 263998
loss: 1.0258055925369263,grad_norm: 0.865564644831767, iteration: 263999
loss: 0.9884551763534546,grad_norm: 0.7962648597914012, iteration: 264000
loss: 0.990552544593811,grad_norm: 0.8893740081255281, iteration: 264001
loss: 1.0245256423950195,grad_norm: 0.9839273557852757, iteration: 264002
loss: 0.9934577941894531,grad_norm: 0.9168779025088676, iteration: 264003
loss: 1.030835747718811,grad_norm: 0.9999995197147953, iteration: 264004
loss: 0.987865686416626,grad_norm: 0.859219600335987, iteration: 264005
loss: 1.0046645402908325,grad_norm: 0.7875580736544856, iteration: 264006
loss: 1.08254873752594,grad_norm: 0.9999991884410931, iteration: 264007
loss: 0.9812758564949036,grad_norm: 0.9999991138532571, iteration: 264008
loss: 1.013533592224121,grad_norm: 0.9392667773352578, iteration: 264009
loss: 0.9953833222389221,grad_norm: 0.9999992956197102, iteration: 264010
loss: 1.0774272680282593,grad_norm: 0.999999085360867, iteration: 264011
loss: 1.019338607788086,grad_norm: 0.9999989980446846, iteration: 264012
loss: 0.9852861166000366,grad_norm: 0.7522266051855514, iteration: 264013
loss: 1.027036428451538,grad_norm: 0.9999990965403254, iteration: 264014
loss: 0.9952349662780762,grad_norm: 0.9991544577473117, iteration: 264015
loss: 1.0511350631713867,grad_norm: 0.8671011867940263, iteration: 264016
loss: 1.0129575729370117,grad_norm: 0.8695480153486356, iteration: 264017
loss: 1.0011159181594849,grad_norm: 0.9478947282621384, iteration: 264018
loss: 1.0061407089233398,grad_norm: 0.9267693600515032, iteration: 264019
loss: 1.0097181797027588,grad_norm: 0.8722725079843244, iteration: 264020
loss: 0.9889925122261047,grad_norm: 0.7990954053191818, iteration: 264021
loss: 0.9813863039016724,grad_norm: 0.890841630961342, iteration: 264022
loss: 1.0235135555267334,grad_norm: 0.9999997620252239, iteration: 264023
loss: 0.9847886562347412,grad_norm: 0.7986832705721795, iteration: 264024
loss: 0.9882950186729431,grad_norm: 0.7529463228400722, iteration: 264025
loss: 1.0336500406265259,grad_norm: 0.8941544538439045, iteration: 264026
loss: 0.9992797374725342,grad_norm: 0.9999997750750815, iteration: 264027
loss: 0.9769389033317566,grad_norm: 0.8499938625194028, iteration: 264028
loss: 0.9888303875923157,grad_norm: 0.9226945684089977, iteration: 264029
loss: 0.9993542432785034,grad_norm: 0.9817843885534646, iteration: 264030
loss: 1.0052040815353394,grad_norm: 0.9868865183650409, iteration: 264031
loss: 1.004123330116272,grad_norm: 0.9977444650890943, iteration: 264032
loss: 0.9858301281929016,grad_norm: 0.8310992723579863, iteration: 264033
loss: 1.0183987617492676,grad_norm: 0.861677992360622, iteration: 264034
loss: 1.0122073888778687,grad_norm: 0.7208605628542044, iteration: 264035
loss: 0.9781109094619751,grad_norm: 0.9999991410337944, iteration: 264036
loss: 1.009141206741333,grad_norm: 0.9192887377882693, iteration: 264037
loss: 1.002286672592163,grad_norm: 0.7657706944847928, iteration: 264038
loss: 1.0243887901306152,grad_norm: 0.9999994815377274, iteration: 264039
loss: 0.9736611247062683,grad_norm: 0.689505366273951, iteration: 264040
loss: 0.9973024129867554,grad_norm: 0.8213956825831882, iteration: 264041
loss: 1.0771180391311646,grad_norm: 0.9999991173985867, iteration: 264042
loss: 1.0295805931091309,grad_norm: 0.9722388864203816, iteration: 264043
loss: 1.0005565881729126,grad_norm: 0.8928457888509193, iteration: 264044
loss: 0.9587951898574829,grad_norm: 0.7748161850277416, iteration: 264045
loss: 1.018103837966919,grad_norm: 0.9905379781239344, iteration: 264046
loss: 1.015142798423767,grad_norm: 0.922822621031907, iteration: 264047
loss: 0.9880603551864624,grad_norm: 0.9208931905723668, iteration: 264048
loss: 1.01613450050354,grad_norm: 0.9302148638176369, iteration: 264049
loss: 0.9642710089683533,grad_norm: 0.8880737974674678, iteration: 264050
loss: 1.0312596559524536,grad_norm: 0.9335848361628712, iteration: 264051
loss: 1.0536624193191528,grad_norm: 0.7907117597832447, iteration: 264052
loss: 1.0456719398498535,grad_norm: 0.9825903243207716, iteration: 264053
loss: 1.005086898803711,grad_norm: 0.9999990707674248, iteration: 264054
loss: 1.0204999446868896,grad_norm: 0.9999993648366654, iteration: 264055
loss: 1.0822911262512207,grad_norm: 0.999999607328697, iteration: 264056
loss: 0.9914879202842712,grad_norm: 0.8691566125761521, iteration: 264057
loss: 1.4296941757202148,grad_norm: 0.9999996894636694, iteration: 264058
loss: 1.000348687171936,grad_norm: 0.8740221875248693, iteration: 264059
loss: 0.9938322305679321,grad_norm: 0.8824703039954486, iteration: 264060
loss: 0.9939497113227844,grad_norm: 0.9999992851852619, iteration: 264061
loss: 1.0240774154663086,grad_norm: 0.8086509039038362, iteration: 264062
loss: 1.0065258741378784,grad_norm: 0.9999990025932809, iteration: 264063
loss: 1.0331058502197266,grad_norm: 0.9587012530979232, iteration: 264064
loss: 0.9926984310150146,grad_norm: 0.8838231246810768, iteration: 264065
loss: 0.9986160397529602,grad_norm: 0.8154885056998381, iteration: 264066
loss: 1.00320303440094,grad_norm: 0.9999991040492704, iteration: 264067
loss: 1.009955883026123,grad_norm: 0.9484342938063404, iteration: 264068
loss: 0.9921574592590332,grad_norm: 0.8389622176750801, iteration: 264069
loss: 1.0490171909332275,grad_norm: 0.9968559405238249, iteration: 264070
loss: 1.0122036933898926,grad_norm: 0.9539057668537989, iteration: 264071
loss: 1.0298938751220703,grad_norm: 0.9999990341805032, iteration: 264072
loss: 0.9983452558517456,grad_norm: 0.8495127437095626, iteration: 264073
loss: 0.9743247032165527,grad_norm: 0.9999989647100934, iteration: 264074
loss: 1.0097488164901733,grad_norm: 0.7665500476851553, iteration: 264075
loss: 0.9947121739387512,grad_norm: 0.8248758391683632, iteration: 264076
loss: 0.97831791639328,grad_norm: 0.8924747912729438, iteration: 264077
loss: 1.0761446952819824,grad_norm: 0.9999992135907091, iteration: 264078
loss: 0.9819599390029907,grad_norm: 0.9302673493335656, iteration: 264079
loss: 1.0009493827819824,grad_norm: 0.9188493558358316, iteration: 264080
loss: 0.9763520956039429,grad_norm: 0.9319543156840995, iteration: 264081
loss: 1.0288469791412354,grad_norm: 0.9910998335354233, iteration: 264082
loss: 0.9943681955337524,grad_norm: 0.8972196771827669, iteration: 264083
loss: 0.9772489070892334,grad_norm: 0.7054835829560813, iteration: 264084
loss: 0.9371857643127441,grad_norm: 0.9190040659840978, iteration: 264085
loss: 0.9785159230232239,grad_norm: 0.8950476634626097, iteration: 264086
loss: 1.002446174621582,grad_norm: 0.8590310940982013, iteration: 264087
loss: 0.9609339833259583,grad_norm: 0.999999065324648, iteration: 264088
loss: 1.0475560426712036,grad_norm: 0.8128363534931721, iteration: 264089
loss: 0.9810952544212341,grad_norm: 0.9800783883975054, iteration: 264090
loss: 1.0174119472503662,grad_norm: 0.8632688341988572, iteration: 264091
loss: 0.9876689910888672,grad_norm: 0.9146725613896961, iteration: 264092
loss: 0.9825593829154968,grad_norm: 0.8219413110632529, iteration: 264093
loss: 0.9887263178825378,grad_norm: 0.833769804539416, iteration: 264094
loss: 0.9962963461875916,grad_norm: 0.8091041980687244, iteration: 264095
loss: 1.016375184059143,grad_norm: 0.9086023217476459, iteration: 264096
loss: 1.007285237312317,grad_norm: 0.9999990181974815, iteration: 264097
loss: 1.0160953998565674,grad_norm: 0.8265453820434191, iteration: 264098
loss: 1.0103365182876587,grad_norm: 0.8820833944689409, iteration: 264099
loss: 1.0067418813705444,grad_norm: 0.9062362786252478, iteration: 264100
loss: 1.0089027881622314,grad_norm: 0.8786783017108427, iteration: 264101
loss: 1.0205605030059814,grad_norm: 0.9155049558752794, iteration: 264102
loss: 0.9723844528198242,grad_norm: 0.9999991636977564, iteration: 264103
loss: 1.0091660022735596,grad_norm: 0.8312601703245304, iteration: 264104
loss: 1.0153625011444092,grad_norm: 0.9256299428248047, iteration: 264105
loss: 1.0399471521377563,grad_norm: 0.8174086255868842, iteration: 264106
loss: 0.9629412889480591,grad_norm: 0.9999990244257301, iteration: 264107
loss: 1.0237289667129517,grad_norm: 0.9445703728187597, iteration: 264108
loss: 1.01492440700531,grad_norm: 0.9999991417061428, iteration: 264109
loss: 1.0234495401382446,grad_norm: 0.883276020686458, iteration: 264110
loss: 0.9862501621246338,grad_norm: 0.9999990904130155, iteration: 264111
loss: 1.0379246473312378,grad_norm: 0.9300808220354484, iteration: 264112
loss: 0.9991022944450378,grad_norm: 0.8851666678408481, iteration: 264113
loss: 0.9823909997940063,grad_norm: 0.9788425009699593, iteration: 264114
loss: 1.0161648988723755,grad_norm: 0.8537675859949504, iteration: 264115
loss: 0.9991453289985657,grad_norm: 0.8440300862097996, iteration: 264116
loss: 0.9881178736686707,grad_norm: 0.7803534143063057, iteration: 264117
loss: 0.9800407290458679,grad_norm: 0.8959115378150156, iteration: 264118
loss: 1.0015528202056885,grad_norm: 0.927673123933213, iteration: 264119
loss: 0.9949076175689697,grad_norm: 0.7718928571335568, iteration: 264120
loss: 0.989014744758606,grad_norm: 0.9236551045131068, iteration: 264121
loss: 0.994092583656311,grad_norm: 0.9315314890979295, iteration: 264122
loss: 1.0099889039993286,grad_norm: 0.9999990962606212, iteration: 264123
loss: 0.9804739356040955,grad_norm: 0.9999991091676289, iteration: 264124
loss: 1.0193367004394531,grad_norm: 0.9999991150552447, iteration: 264125
loss: 1.016577959060669,grad_norm: 0.8039072585322097, iteration: 264126
loss: 0.9945428967475891,grad_norm: 0.8436388103837806, iteration: 264127
loss: 0.9833223223686218,grad_norm: 0.956269705063466, iteration: 264128
loss: 1.0181794166564941,grad_norm: 0.9614765300650375, iteration: 264129
loss: 0.958739697933197,grad_norm: 0.983787623736406, iteration: 264130
loss: 1.007705569267273,grad_norm: 0.773587176447074, iteration: 264131
loss: 0.9929932355880737,grad_norm: 0.903255668881146, iteration: 264132
loss: 1.0234980583190918,grad_norm: 0.9630554858004877, iteration: 264133
loss: 1.1016032695770264,grad_norm: 0.9372532673677598, iteration: 264134
loss: 0.9940471649169922,grad_norm: 0.9500149298414085, iteration: 264135
loss: 0.9956454634666443,grad_norm: 0.8262705716114037, iteration: 264136
loss: 0.9603891968727112,grad_norm: 0.9163419311000293, iteration: 264137
loss: 0.9837281703948975,grad_norm: 0.9214167877348447, iteration: 264138
loss: 0.9860484600067139,grad_norm: 0.9999996204016713, iteration: 264139
loss: 1.0280756950378418,grad_norm: 0.8464549665631043, iteration: 264140
loss: 1.040781855583191,grad_norm: 0.9459604377349907, iteration: 264141
loss: 0.9782113432884216,grad_norm: 0.8261711066369714, iteration: 264142
loss: 0.9750123023986816,grad_norm: 0.8992960218014349, iteration: 264143
loss: 0.9848549365997314,grad_norm: 0.9999990787815911, iteration: 264144
loss: 1.0121043920516968,grad_norm: 0.7509412019571714, iteration: 264145
loss: 0.9946763515472412,grad_norm: 0.9137481273868822, iteration: 264146
loss: 0.9656851291656494,grad_norm: 0.8314556338739758, iteration: 264147
loss: 1.0089242458343506,grad_norm: 0.9392068466669751, iteration: 264148
loss: 1.001091480255127,grad_norm: 0.9473734976345524, iteration: 264149
loss: 1.0275673866271973,grad_norm: 0.9738901374867942, iteration: 264150
loss: 0.972847044467926,grad_norm: 0.7362274923152347, iteration: 264151
loss: 1.029628038406372,grad_norm: 0.8844691694436463, iteration: 264152
loss: 1.007015585899353,grad_norm: 0.8423446952353075, iteration: 264153
loss: 1.0219078063964844,grad_norm: 0.9999993981970648, iteration: 264154
loss: 1.0024986267089844,grad_norm: 0.8153658872880548, iteration: 264155
loss: 0.9794557094573975,grad_norm: 0.8427951358799113, iteration: 264156
loss: 0.9926960468292236,grad_norm: 0.7962179491784762, iteration: 264157
loss: 0.9804282188415527,grad_norm: 0.8149248905920422, iteration: 264158
loss: 0.9943282008171082,grad_norm: 0.8973856309523242, iteration: 264159
loss: 0.9964168667793274,grad_norm: 0.882573580108103, iteration: 264160
loss: 1.006901502609253,grad_norm: 0.9764285826995924, iteration: 264161
loss: 1.0257951021194458,grad_norm: 0.9056201340488175, iteration: 264162
loss: 1.0035793781280518,grad_norm: 0.915604243942717, iteration: 264163
loss: 1.0410964488983154,grad_norm: 0.9999997196992295, iteration: 264164
loss: 1.0228464603424072,grad_norm: 0.8589968358119081, iteration: 264165
loss: 0.9846985936164856,grad_norm: 0.9671867948600873, iteration: 264166
loss: 1.0024174451828003,grad_norm: 0.7338255441556409, iteration: 264167
loss: 0.9886860847473145,grad_norm: 0.9999996438985113, iteration: 264168
loss: 1.0047835111618042,grad_norm: 0.8452965587333857, iteration: 264169
loss: 0.9999815225601196,grad_norm: 0.788609696835761, iteration: 264170
loss: 1.0178182125091553,grad_norm: 0.7728286578281219, iteration: 264171
loss: 0.9799025058746338,grad_norm: 0.7907789247293197, iteration: 264172
loss: 1.0722509622573853,grad_norm: 0.9999992185432862, iteration: 264173
loss: 1.014290690422058,grad_norm: 0.8876912771007447, iteration: 264174
loss: 1.0298631191253662,grad_norm: 0.8868371332679226, iteration: 264175
loss: 1.0070308446884155,grad_norm: 0.9695489458068456, iteration: 264176
loss: 0.9868819713592529,grad_norm: 0.8032123369523132, iteration: 264177
loss: 0.9775624871253967,grad_norm: 0.8460008576666375, iteration: 264178
loss: 1.0014930963516235,grad_norm: 0.9952018658119173, iteration: 264179
loss: 1.0139092206954956,grad_norm: 0.9999991567827211, iteration: 264180
loss: 1.032241702079773,grad_norm: 0.8245238031434274, iteration: 264181
loss: 1.0071172714233398,grad_norm: 0.829248732901093, iteration: 264182
loss: 0.9852822422981262,grad_norm: 0.9304501002859399, iteration: 264183
loss: 0.9786771535873413,grad_norm: 0.8518541012332803, iteration: 264184
loss: 0.9753307700157166,grad_norm: 0.9662770945316111, iteration: 264185
loss: 1.2522854804992676,grad_norm: 0.9999994849203966, iteration: 264186
loss: 0.9965922832489014,grad_norm: 0.8609496679451671, iteration: 264187
loss: 1.034183382987976,grad_norm: 0.9999990895848642, iteration: 264188
loss: 0.9898372292518616,grad_norm: 0.8859400260685649, iteration: 264189
loss: 1.0373762845993042,grad_norm: 0.9999996878439992, iteration: 264190
loss: 1.0258713960647583,grad_norm: 0.8545137699502109, iteration: 264191
loss: 1.0994073152542114,grad_norm: 0.9999994847972333, iteration: 264192
loss: 1.005946159362793,grad_norm: 0.909164184222359, iteration: 264193
loss: 1.0326194763183594,grad_norm: 0.8826867314982846, iteration: 264194
loss: 0.9918976426124573,grad_norm: 0.7834809373967383, iteration: 264195
loss: 1.006339192390442,grad_norm: 0.9201409829242811, iteration: 264196
loss: 1.1208025217056274,grad_norm: 0.9999992255330006, iteration: 264197
loss: 0.9718318581581116,grad_norm: 0.9073134783151943, iteration: 264198
loss: 0.9922643899917603,grad_norm: 0.8655383771101771, iteration: 264199
loss: 0.963307797908783,grad_norm: 0.8431268126648984, iteration: 264200
loss: 0.9870392680168152,grad_norm: 0.8185494887113327, iteration: 264201
loss: 1.0000144243240356,grad_norm: 0.9575841261729013, iteration: 264202
loss: 0.9792636036872864,grad_norm: 0.9999990409429167, iteration: 264203
loss: 1.0201376676559448,grad_norm: 0.9999990519624219, iteration: 264204
loss: 1.0011709928512573,grad_norm: 0.8786852280068268, iteration: 264205
loss: 0.9848003387451172,grad_norm: 0.9195347883890419, iteration: 264206
loss: 1.0204651355743408,grad_norm: 0.920060278736263, iteration: 264207
loss: 0.962200939655304,grad_norm: 0.9452326374644012, iteration: 264208
loss: 0.980057954788208,grad_norm: 0.8889711592293511, iteration: 264209
loss: 1.0259469747543335,grad_norm: 0.8317399400506454, iteration: 264210
loss: 0.931995153427124,grad_norm: 0.7738169543616679, iteration: 264211
loss: 0.9792453050613403,grad_norm: 0.9053876430224869, iteration: 264212
loss: 0.9805814623832703,grad_norm: 0.8445877511599325, iteration: 264213
loss: 1.0116609334945679,grad_norm: 0.7674636109832733, iteration: 264214
loss: 1.02143394947052,grad_norm: 0.8432965591555706, iteration: 264215
loss: 0.958115816116333,grad_norm: 0.8826484889507055, iteration: 264216
loss: 1.0191774368286133,grad_norm: 0.9999993364249166, iteration: 264217
loss: 1.0145392417907715,grad_norm: 0.8465146501776776, iteration: 264218
loss: 1.0783510208129883,grad_norm: 0.9999996407628273, iteration: 264219
loss: 1.0147038698196411,grad_norm: 0.8388615664275224, iteration: 264220
loss: 0.9668846130371094,grad_norm: 0.8416334309341946, iteration: 264221
loss: 1.003499150276184,grad_norm: 0.8094657021931839, iteration: 264222
loss: 1.051076054573059,grad_norm: 0.9999994065748424, iteration: 264223
loss: 0.9897677302360535,grad_norm: 0.8381451765793113, iteration: 264224
loss: 0.9699323773384094,grad_norm: 0.8301622845252359, iteration: 264225
loss: 1.0043854713439941,grad_norm: 0.9609660315676325, iteration: 264226
loss: 1.0169029235839844,grad_norm: 0.8525549204633747, iteration: 264227
loss: 1.0684216022491455,grad_norm: 0.9999997120623639, iteration: 264228
loss: 1.018049955368042,grad_norm: 0.9999995452497958, iteration: 264229
loss: 0.9510467052459717,grad_norm: 0.8550828825006831, iteration: 264230
loss: 1.213206171989441,grad_norm: 0.9999994116249094, iteration: 264231
loss: 0.9923941493034363,grad_norm: 0.8519987252068824, iteration: 264232
loss: 1.0059674978256226,grad_norm: 0.7516349210416476, iteration: 264233
loss: 1.014907956123352,grad_norm: 0.8408948544982068, iteration: 264234
loss: 0.9895137548446655,grad_norm: 0.829437397504763, iteration: 264235
loss: 1.0263804197311401,grad_norm: 0.9999990587913942, iteration: 264236
loss: 0.9924858212471008,grad_norm: 0.7954073550155597, iteration: 264237
loss: 0.9894278645515442,grad_norm: 0.9049292122735689, iteration: 264238
loss: 1.003325343132019,grad_norm: 0.9743435270865493, iteration: 264239
loss: 0.9760303497314453,grad_norm: 0.9097113583320838, iteration: 264240
loss: 0.9738940596580505,grad_norm: 0.9476217964584667, iteration: 264241
loss: 0.9699704051017761,grad_norm: 0.9414683733978092, iteration: 264242
loss: 1.0351386070251465,grad_norm: 0.7194801988041137, iteration: 264243
loss: 0.9828264117240906,grad_norm: 0.7550719511433719, iteration: 264244
loss: 1.0101568698883057,grad_norm: 0.952237214830278, iteration: 264245
loss: 1.0109251737594604,grad_norm: 0.7305672402155335, iteration: 264246
loss: 1.0157973766326904,grad_norm: 0.9999992369106884, iteration: 264247
loss: 0.990119218826294,grad_norm: 0.9999990642865205, iteration: 264248
loss: 1.0148948431015015,grad_norm: 0.8949912702366423, iteration: 264249
loss: 1.014290452003479,grad_norm: 0.9310812406333312, iteration: 264250
loss: 0.9778987169265747,grad_norm: 0.7833102313032023, iteration: 264251
loss: 1.0555224418640137,grad_norm: 0.9999991864607176, iteration: 264252
loss: 1.034865379333496,grad_norm: 0.9552420332739654, iteration: 264253
loss: 1.048473834991455,grad_norm: 0.9999997363385081, iteration: 264254
loss: 1.0027399063110352,grad_norm: 0.8700428718652439, iteration: 264255
loss: 1.0310966968536377,grad_norm: 0.8804804841823549, iteration: 264256
loss: 1.009917140007019,grad_norm: 0.9999993322501691, iteration: 264257
loss: 1.0028764009475708,grad_norm: 0.7629559310636763, iteration: 264258
loss: 0.9985564947128296,grad_norm: 0.9202992536215161, iteration: 264259
loss: 1.0339100360870361,grad_norm: 0.8017923490286599, iteration: 264260
loss: 1.045334815979004,grad_norm: 0.9064284310357867, iteration: 264261
loss: 0.9755324721336365,grad_norm: 0.8159907746704745, iteration: 264262
loss: 1.12532377243042,grad_norm: 0.99999953836108, iteration: 264263
loss: 0.9899080395698547,grad_norm: 0.9061005812123525, iteration: 264264
loss: 0.9882272481918335,grad_norm: 0.8847730579377674, iteration: 264265
loss: 0.9626424908638,grad_norm: 0.9999995116815843, iteration: 264266
loss: 1.0043679475784302,grad_norm: 0.7879877870466181, iteration: 264267
loss: 0.9734586477279663,grad_norm: 0.8333319863288404, iteration: 264268
loss: 0.9773702025413513,grad_norm: 0.9469879869804837, iteration: 264269
loss: 1.0135295391082764,grad_norm: 0.9322205531057083, iteration: 264270
loss: 0.9989564418792725,grad_norm: 0.8368307584619953, iteration: 264271
loss: 0.9808435440063477,grad_norm: 0.9955772940143368, iteration: 264272
loss: 1.0209468603134155,grad_norm: 0.7783903375594913, iteration: 264273
loss: 0.988416314125061,grad_norm: 0.845538730282367, iteration: 264274
loss: 0.9868776798248291,grad_norm: 0.954841556162671, iteration: 264275
loss: 0.9947974681854248,grad_norm: 0.8591418199597556, iteration: 264276
loss: 1.0055389404296875,grad_norm: 0.9270732674375124, iteration: 264277
loss: 1.0095760822296143,grad_norm: 0.9091477689475725, iteration: 264278
loss: 1.0230512619018555,grad_norm: 0.9999994624662976, iteration: 264279
loss: 1.0044912099838257,grad_norm: 0.9999989058590243, iteration: 264280
loss: 1.0196740627288818,grad_norm: 0.8452648036329562, iteration: 264281
loss: 0.9738003611564636,grad_norm: 0.9502616727947433, iteration: 264282
loss: 1.0383509397506714,grad_norm: 0.9999994163854811, iteration: 264283
loss: 1.0206868648529053,grad_norm: 0.9999991075212125, iteration: 264284
loss: 1.0196032524108887,grad_norm: 0.937633341712361, iteration: 264285
loss: 0.992897093296051,grad_norm: 0.7712034788556736, iteration: 264286
loss: 0.9889259338378906,grad_norm: 0.8330768499255677, iteration: 264287
loss: 1.0012524127960205,grad_norm: 0.9999998520864629, iteration: 264288
loss: 1.0218908786773682,grad_norm: 0.9999997259575031, iteration: 264289
loss: 1.0157184600830078,grad_norm: 0.9645709477164282, iteration: 264290
loss: 1.0297859907150269,grad_norm: 0.9523744112009328, iteration: 264291
loss: 1.0115444660186768,grad_norm: 0.8514925627985837, iteration: 264292
loss: 0.991966187953949,grad_norm: 0.8409538648635605, iteration: 264293
loss: 1.0223861932754517,grad_norm: 0.9999994977795996, iteration: 264294
loss: 0.9957000017166138,grad_norm: 0.8513402102135139, iteration: 264295
loss: 0.95939701795578,grad_norm: 0.9238861027783429, iteration: 264296
loss: 0.9985795021057129,grad_norm: 0.9999991136036953, iteration: 264297
loss: 1.0064382553100586,grad_norm: 0.9125265249649492, iteration: 264298
loss: 0.9620242714881897,grad_norm: 0.8567734649871915, iteration: 264299
loss: 0.9828712940216064,grad_norm: 0.9086241866527299, iteration: 264300
loss: 0.9971914291381836,grad_norm: 0.855328453709223, iteration: 264301
loss: 0.9554639458656311,grad_norm: 0.9012531921047295, iteration: 264302
loss: 1.0087980031967163,grad_norm: 0.7569566625096747, iteration: 264303
loss: 1.0324679613113403,grad_norm: 0.9999991540501254, iteration: 264304
loss: 1.0081672668457031,grad_norm: 0.7913085029848911, iteration: 264305
loss: 1.0015192031860352,grad_norm: 0.780899643536752, iteration: 264306
loss: 1.0258970260620117,grad_norm: 0.8581176499446053, iteration: 264307
loss: 1.0119199752807617,grad_norm: 0.7529064248581183, iteration: 264308
loss: 1.0452395677566528,grad_norm: 0.9999990535966918, iteration: 264309
loss: 1.0169992446899414,grad_norm: 0.8530922932530253, iteration: 264310
loss: 1.0022999048233032,grad_norm: 0.9999990810874481, iteration: 264311
loss: 1.0161467790603638,grad_norm: 0.8098368127287754, iteration: 264312
loss: 0.9554824233055115,grad_norm: 0.8167943636076779, iteration: 264313
loss: 0.9825991988182068,grad_norm: 0.8703799317618769, iteration: 264314
loss: 1.0744563341140747,grad_norm: 0.9999993461481103, iteration: 264315
loss: 1.0138678550720215,grad_norm: 0.8234095635921647, iteration: 264316
loss: 0.9946251511573792,grad_norm: 0.9999989416097409, iteration: 264317
loss: 0.9935253262519836,grad_norm: 0.9999992302139179, iteration: 264318
loss: 1.0093640089035034,grad_norm: 0.7340422861894121, iteration: 264319
loss: 0.9976097345352173,grad_norm: 0.9999994853761174, iteration: 264320
loss: 1.0260734558105469,grad_norm: 0.9999991807679117, iteration: 264321
loss: 1.0316227674484253,grad_norm: 0.9999990435515242, iteration: 264322
loss: 1.1027257442474365,grad_norm: 0.9999992696717764, iteration: 264323
loss: 1.0195564031600952,grad_norm: 0.9999991065834957, iteration: 264324
loss: 1.045129656791687,grad_norm: 0.9999990880599892, iteration: 264325
loss: 1.0441564321517944,grad_norm: 0.9999996507715864, iteration: 264326
loss: 1.0320497751235962,grad_norm: 0.9999990132899018, iteration: 264327
loss: 0.9887107014656067,grad_norm: 0.8312630246339677, iteration: 264328
loss: 0.9862925410270691,grad_norm: 0.99999916388196, iteration: 264329
loss: 1.0346367359161377,grad_norm: 0.999999574893529, iteration: 264330
loss: 0.9629500508308411,grad_norm: 0.7405605924584906, iteration: 264331
loss: 0.9571933746337891,grad_norm: 0.8464675969096976, iteration: 264332
loss: 0.9927650094032288,grad_norm: 0.9499415628622053, iteration: 264333
loss: 1.0002020597457886,grad_norm: 0.9999991470135663, iteration: 264334
loss: 0.9910008311271667,grad_norm: 0.8865700063097857, iteration: 264335
loss: 0.9582979083061218,grad_norm: 0.829597339546421, iteration: 264336
loss: 1.0329869985580444,grad_norm: 0.9999992130168434, iteration: 264337
loss: 1.0195239782333374,grad_norm: 0.9198468312704448, iteration: 264338
loss: 0.9548460245132446,grad_norm: 0.962949840766214, iteration: 264339
loss: 0.9929342865943909,grad_norm: 0.9999990650965996, iteration: 264340
loss: 0.9805877208709717,grad_norm: 0.8799649974386882, iteration: 264341
loss: 1.0194745063781738,grad_norm: 0.999999064520165, iteration: 264342
loss: 1.0088543891906738,grad_norm: 0.8614470203241201, iteration: 264343
loss: 1.0243803262710571,grad_norm: 0.8296591752979551, iteration: 264344
loss: 1.015099287033081,grad_norm: 0.9594907527133303, iteration: 264345
loss: 1.0019116401672363,grad_norm: 0.8015686616727343, iteration: 264346
loss: 0.9961646795272827,grad_norm: 0.9543927034139273, iteration: 264347
loss: 0.9841198921203613,grad_norm: 0.8610738748944222, iteration: 264348
loss: 0.9999663233757019,grad_norm: 0.9481817185712219, iteration: 264349
loss: 1.0019663572311401,grad_norm: 0.9999992300697789, iteration: 264350
loss: 0.9890472888946533,grad_norm: 0.9400974586977361, iteration: 264351
loss: 0.9975334405899048,grad_norm: 0.7707889647533455, iteration: 264352
loss: 1.0111949443817139,grad_norm: 0.9999991380390615, iteration: 264353
loss: 0.9814592003822327,grad_norm: 0.9536959182266317, iteration: 264354
loss: 1.0035946369171143,grad_norm: 0.837536716020989, iteration: 264355
loss: 0.998558521270752,grad_norm: 0.9999990417405105, iteration: 264356
loss: 1.0704143047332764,grad_norm: 0.9999999421653263, iteration: 264357
loss: 0.9869467616081238,grad_norm: 0.7297970225520876, iteration: 264358
loss: 0.9654086828231812,grad_norm: 0.908486481929005, iteration: 264359
loss: 0.9909963607788086,grad_norm: 0.854464600966741, iteration: 264360
loss: 0.9452146887779236,grad_norm: 0.9259934509258919, iteration: 264361
loss: 1.0153181552886963,grad_norm: 0.7128343976138499, iteration: 264362
loss: 1.019867181777954,grad_norm: 0.9430141494556259, iteration: 264363
loss: 1.0510891675949097,grad_norm: 0.889443260631404, iteration: 264364
loss: 0.999233067035675,grad_norm: 0.9999992300095667, iteration: 264365
loss: 0.9963701367378235,grad_norm: 0.7678765362513611, iteration: 264366
loss: 0.9973071813583374,grad_norm: 0.7767288445529912, iteration: 264367
loss: 1.0630772113800049,grad_norm: 0.9999997439824515, iteration: 264368
loss: 1.0229089260101318,grad_norm: 0.9999991104844824, iteration: 264369
loss: 1.0394554138183594,grad_norm: 0.8522019645027651, iteration: 264370
loss: 1.0112248659133911,grad_norm: 0.9074679352382016, iteration: 264371
loss: 0.9564436078071594,grad_norm: 0.834720350766455, iteration: 264372
loss: 1.0520288944244385,grad_norm: 0.9614184430050331, iteration: 264373
loss: 1.0026593208312988,grad_norm: 0.9999995073272357, iteration: 264374
loss: 1.0488429069519043,grad_norm: 0.9845032022375769, iteration: 264375
loss: 1.0064802169799805,grad_norm: 0.9092351776789895, iteration: 264376
loss: 1.0071669816970825,grad_norm: 0.9706405031852892, iteration: 264377
loss: 1.0066555738449097,grad_norm: 0.983939230316557, iteration: 264378
loss: 0.9655919671058655,grad_norm: 0.8477799644723274, iteration: 264379
loss: 1.0041505098342896,grad_norm: 0.868411117925146, iteration: 264380
loss: 1.001193881034851,grad_norm: 0.7894835828005299, iteration: 264381
loss: 0.9710768461227417,grad_norm: 0.9999991486954986, iteration: 264382
loss: 1.0306499004364014,grad_norm: 0.7876674380906282, iteration: 264383
loss: 0.9983339309692383,grad_norm: 0.9207774300527171, iteration: 264384
loss: 0.9969611167907715,grad_norm: 0.9695394464387466, iteration: 264385
loss: 1.005925178527832,grad_norm: 0.9268636338181618, iteration: 264386
loss: 0.9940394759178162,grad_norm: 0.9328947020813575, iteration: 264387
loss: 1.0235875844955444,grad_norm: 0.9999998274005598, iteration: 264388
loss: 0.9743145108222961,grad_norm: 0.9999997941501992, iteration: 264389
loss: 1.0868322849273682,grad_norm: 0.9999999048889515, iteration: 264390
loss: 1.0051827430725098,grad_norm: 0.9999990425568177, iteration: 264391
loss: 1.032884955406189,grad_norm: 0.9999993201733659, iteration: 264392
loss: 1.0085381269454956,grad_norm: 0.994667384456942, iteration: 264393
loss: 1.0053774118423462,grad_norm: 0.9999991516752661, iteration: 264394
loss: 1.026521921157837,grad_norm: 0.8196049288828409, iteration: 264395
loss: 1.2583433389663696,grad_norm: 0.9999992355379247, iteration: 264396
loss: 1.0051912069320679,grad_norm: 0.9252711820465739, iteration: 264397
loss: 0.9942122101783752,grad_norm: 0.9079151519285747, iteration: 264398
loss: 1.017421841621399,grad_norm: 0.8526292687678123, iteration: 264399
loss: 1.000187873840332,grad_norm: 0.8161394609661915, iteration: 264400
loss: 0.9911148548126221,grad_norm: 0.937707625960788, iteration: 264401
loss: 0.9886174201965332,grad_norm: 0.9775343908371923, iteration: 264402
loss: 1.0108023881912231,grad_norm: 0.8700438815553425, iteration: 264403
loss: 1.000840425491333,grad_norm: 0.8976541229518701, iteration: 264404
loss: 1.008514165878296,grad_norm: 0.8735593033935596, iteration: 264405
loss: 0.9755657911300659,grad_norm: 0.8022222285783539, iteration: 264406
loss: 1.0196131467819214,grad_norm: 0.9036482785945735, iteration: 264407
loss: 0.9440184235572815,grad_norm: 0.8936714648750134, iteration: 264408
loss: 0.9512603878974915,grad_norm: 0.9876175796728824, iteration: 264409
loss: 0.9869712591171265,grad_norm: 0.9999990386694236, iteration: 264410
loss: 1.1542237997055054,grad_norm: 0.9999990662795233, iteration: 264411
loss: 1.1450968980789185,grad_norm: 0.9999991837769525, iteration: 264412
loss: 1.0279635190963745,grad_norm: 0.8153203132066097, iteration: 264413
loss: 1.0010677576065063,grad_norm: 0.8401606118448417, iteration: 264414
loss: 0.9828358292579651,grad_norm: 0.8777114791594369, iteration: 264415
loss: 1.0344719886779785,grad_norm: 0.9999996654838391, iteration: 264416
loss: 0.9906626343727112,grad_norm: 0.8326282668765886, iteration: 264417
loss: 0.9875198602676392,grad_norm: 0.8801338728659842, iteration: 264418
loss: 1.0156022310256958,grad_norm: 0.6817795941935686, iteration: 264419
loss: 0.9993419647216797,grad_norm: 0.9625473586477047, iteration: 264420
loss: 0.9510062336921692,grad_norm: 0.9695761285550201, iteration: 264421
loss: 0.9959524869918823,grad_norm: 0.9999990998466721, iteration: 264422
loss: 0.978507399559021,grad_norm: 0.974940628030822, iteration: 264423
loss: 1.032963514328003,grad_norm: 0.8963628816401215, iteration: 264424
loss: 0.986781120300293,grad_norm: 0.7701011673348626, iteration: 264425
loss: 0.999865710735321,grad_norm: 0.9999990955501893, iteration: 264426
loss: 1.0231781005859375,grad_norm: 0.8271048715487259, iteration: 264427
loss: 1.0035533905029297,grad_norm: 0.9700092996939199, iteration: 264428
loss: 0.9802813529968262,grad_norm: 0.9999992037750779, iteration: 264429
loss: 1.0089781284332275,grad_norm: 0.9999999715855754, iteration: 264430
loss: 0.9972936511039734,grad_norm: 0.7437642171953678, iteration: 264431
loss: 1.0106993913650513,grad_norm: 0.9833720067223638, iteration: 264432
loss: 1.0222820043563843,grad_norm: 0.8020754969944714, iteration: 264433
loss: 1.0201776027679443,grad_norm: 0.8389471619854477, iteration: 264434
loss: 0.9628517031669617,grad_norm: 0.9942405267728981, iteration: 264435
loss: 1.0123612880706787,grad_norm: 0.8934464781956131, iteration: 264436
loss: 1.074392557144165,grad_norm: 1.0000000081807305, iteration: 264437
loss: 0.9310738444328308,grad_norm: 0.8702500657188859, iteration: 264438
loss: 1.0036975145339966,grad_norm: 0.8496683738560138, iteration: 264439
loss: 1.0644266605377197,grad_norm: 0.7729290904462742, iteration: 264440
loss: 0.9771780371665955,grad_norm: 0.7344507543160329, iteration: 264441
loss: 1.1661558151245117,grad_norm: 0.9999992469621956, iteration: 264442
loss: 1.111452341079712,grad_norm: 0.9999995343099903, iteration: 264443
loss: 1.0380871295928955,grad_norm: 0.9999992157390966, iteration: 264444
loss: 1.0869908332824707,grad_norm: 0.9999991098803781, iteration: 264445
loss: 1.0237623453140259,grad_norm: 0.9999991333535436, iteration: 264446
loss: 1.0613220930099487,grad_norm: 0.9999989684093548, iteration: 264447
loss: 0.9633943438529968,grad_norm: 0.8408248573863396, iteration: 264448
loss: 1.0439748764038086,grad_norm: 0.9999990144124438, iteration: 264449
loss: 0.9964171051979065,grad_norm: 0.9424643194130903, iteration: 264450
loss: 1.0297489166259766,grad_norm: 0.9999995374261524, iteration: 264451
loss: 1.0673668384552002,grad_norm: 0.875363608448609, iteration: 264452
loss: 1.0177446603775024,grad_norm: 0.7412421082129602, iteration: 264453
loss: 1.123815655708313,grad_norm: 0.8963499412218291, iteration: 264454
loss: 1.0523781776428223,grad_norm: 0.8386578939006649, iteration: 264455
loss: 1.050317406654358,grad_norm: 0.9371672594570141, iteration: 264456
loss: 1.0630747079849243,grad_norm: 0.9999999828902849, iteration: 264457
loss: 1.018418788909912,grad_norm: 0.9150201045805247, iteration: 264458
loss: 0.994374692440033,grad_norm: 0.9496006938851668, iteration: 264459
loss: 1.011548399925232,grad_norm: 0.9999991646862159, iteration: 264460
loss: 1.0000699758529663,grad_norm: 0.9616704188182472, iteration: 264461
loss: 0.9983430504798889,grad_norm: 0.770238174463565, iteration: 264462
loss: 0.9970653057098389,grad_norm: 0.8440555411021788, iteration: 264463
loss: 1.0657821893692017,grad_norm: 0.8393562849132492, iteration: 264464
loss: 0.9767752289772034,grad_norm: 0.7299993386148113, iteration: 264465
loss: 1.0196727514266968,grad_norm: 0.9999996590817343, iteration: 264466
loss: 1.007951021194458,grad_norm: 0.8362987502128209, iteration: 264467
loss: 1.0481950044631958,grad_norm: 0.9534339814507335, iteration: 264468
loss: 1.0106074810028076,grad_norm: 0.7805251729593733, iteration: 264469
loss: 1.0526930093765259,grad_norm: 1.000000000362169, iteration: 264470
loss: 1.0030503273010254,grad_norm: 0.9999990005038983, iteration: 264471
loss: 0.9857915639877319,grad_norm: 0.9999990171930747, iteration: 264472
loss: 1.0145708322525024,grad_norm: 0.9999991410447462, iteration: 264473
loss: 1.0524014234542847,grad_norm: 0.9999991409666872, iteration: 264474
loss: 0.9897415041923523,grad_norm: 0.8928834105339443, iteration: 264475
loss: 1.0925816297531128,grad_norm: 0.988408094386879, iteration: 264476
loss: 1.0159803628921509,grad_norm: 0.9543113442619611, iteration: 264477
loss: 1.0637118816375732,grad_norm: 0.9999997617128997, iteration: 264478
loss: 1.092812180519104,grad_norm: 0.9999994293682599, iteration: 264479
loss: 1.043173909187317,grad_norm: 0.9216578463080195, iteration: 264480
loss: 1.0565365552902222,grad_norm: 0.9999999776213241, iteration: 264481
loss: 1.021142840385437,grad_norm: 0.8755510813093988, iteration: 264482
loss: 1.0268898010253906,grad_norm: 0.8712484701823178, iteration: 264483
loss: 1.0113130807876587,grad_norm: 0.9999993030681806, iteration: 264484
loss: 0.9986670017242432,grad_norm: 0.9999990435791851, iteration: 264485
loss: 1.0093674659729004,grad_norm: 0.9999991683470265, iteration: 264486
loss: 1.0616731643676758,grad_norm: 0.9999993817367955, iteration: 264487
loss: 0.973177969455719,grad_norm: 0.9999991406543267, iteration: 264488
loss: 1.243411898612976,grad_norm: 0.9999993700225537, iteration: 264489
loss: 1.0092803239822388,grad_norm: 0.7719106421168958, iteration: 264490
loss: 0.9963237047195435,grad_norm: 0.9999989935572047, iteration: 264491
loss: 1.0109084844589233,grad_norm: 0.862479563563917, iteration: 264492
loss: 0.9775586724281311,grad_norm: 0.9312191095083234, iteration: 264493
loss: 1.0514765977859497,grad_norm: 0.9999996384536171, iteration: 264494
loss: 0.9788649082183838,grad_norm: 0.847062957543374, iteration: 264495
loss: 0.9958149790763855,grad_norm: 0.8381621572957706, iteration: 264496
loss: 0.9726307988166809,grad_norm: 0.999998976586115, iteration: 264497
loss: 0.9657571911811829,grad_norm: 0.8645266918186866, iteration: 264498
loss: 0.9835661053657532,grad_norm: 0.8840306461610272, iteration: 264499
loss: 1.0096549987792969,grad_norm: 0.90925743684965, iteration: 264500
loss: 0.9863151907920837,grad_norm: 0.9485798196277055, iteration: 264501
loss: 1.0128117799758911,grad_norm: 0.779793476146062, iteration: 264502
loss: 1.0319669246673584,grad_norm: 0.8678617463633482, iteration: 264503
loss: 0.9910007119178772,grad_norm: 0.8901503697938632, iteration: 264504
loss: 1.0572484731674194,grad_norm: 0.6918876377579833, iteration: 264505
loss: 1.0314903259277344,grad_norm: 0.8202081557700049, iteration: 264506
loss: 1.0347434282302856,grad_norm: 0.904879016373345, iteration: 264507
loss: 0.9907289147377014,grad_norm: 0.9969159536388058, iteration: 264508
loss: 1.016953468322754,grad_norm: 0.794566076010411, iteration: 264509
loss: 1.0219007730484009,grad_norm: 0.8368839543985144, iteration: 264510
loss: 0.989215075969696,grad_norm: 0.9999991652958091, iteration: 264511
loss: 1.0575640201568604,grad_norm: 0.9999994422417567, iteration: 264512
loss: 1.0011827945709229,grad_norm: 0.8865909880031171, iteration: 264513
loss: 0.9923296570777893,grad_norm: 0.7928342825288826, iteration: 264514
loss: 1.0149179697036743,grad_norm: 0.8486942216320427, iteration: 264515
loss: 1.0191301107406616,grad_norm: 0.882112923584581, iteration: 264516
loss: 0.9927955865859985,grad_norm: 0.8721100850548784, iteration: 264517
loss: 1.0449713468551636,grad_norm: 0.9402689497765523, iteration: 264518
loss: 1.0262449979782104,grad_norm: 0.9372112549400926, iteration: 264519
loss: 1.0142921209335327,grad_norm: 0.8570579011582219, iteration: 264520
loss: 1.1241278648376465,grad_norm: 0.9999993772776117, iteration: 264521
loss: 0.9987916350364685,grad_norm: 0.9219895633834172, iteration: 264522
loss: 1.0102806091308594,grad_norm: 0.9999994181257968, iteration: 264523
loss: 1.0129475593566895,grad_norm: 0.8241433702792377, iteration: 264524
loss: 0.9621886014938354,grad_norm: 0.9999990361811629, iteration: 264525
loss: 1.005754828453064,grad_norm: 0.9992736595032651, iteration: 264526
loss: 1.0235730409622192,grad_norm: 0.9999995792561069, iteration: 264527
loss: 0.9944396018981934,grad_norm: 0.8775192435057743, iteration: 264528
loss: 1.0273610353469849,grad_norm: 0.7899996641970558, iteration: 264529
loss: 1.00521719455719,grad_norm: 0.8923641571416223, iteration: 264530
loss: 1.0032868385314941,grad_norm: 0.8991791145912602, iteration: 264531
loss: 1.0854482650756836,grad_norm: 0.9999990929992002, iteration: 264532
loss: 1.0038083791732788,grad_norm: 0.8376091211524818, iteration: 264533
loss: 0.9894302487373352,grad_norm: 0.815099155579772, iteration: 264534
loss: 1.0330101251602173,grad_norm: 0.9999990458140673, iteration: 264535
loss: 1.0628883838653564,grad_norm: 0.875146829573173, iteration: 264536
loss: 0.9886575937271118,grad_norm: 0.8272519057909486, iteration: 264537
loss: 0.9751031398773193,grad_norm: 0.9999991382699785, iteration: 264538
loss: 1.0075128078460693,grad_norm: 0.8205706987136605, iteration: 264539
loss: 0.9610744714736938,grad_norm: 0.776100074335355, iteration: 264540
loss: 1.0303046703338623,grad_norm: 0.850128686257847, iteration: 264541
loss: 0.969391405582428,grad_norm: 0.9829023049404061, iteration: 264542
loss: 0.9756062030792236,grad_norm: 0.9999991809137387, iteration: 264543
loss: 1.0044276714324951,grad_norm: 0.7440314286271363, iteration: 264544
loss: 1.0092434883117676,grad_norm: 0.8922192031203984, iteration: 264545
loss: 1.00923490524292,grad_norm: 0.8180731694670164, iteration: 264546
loss: 1.0089010000228882,grad_norm: 0.8640458408049211, iteration: 264547
loss: 1.0254679918289185,grad_norm: 0.821493729692041, iteration: 264548
loss: 0.9946864247322083,grad_norm: 0.7443756886017028, iteration: 264549
loss: 0.9812155365943909,grad_norm: 0.7239670653281918, iteration: 264550
loss: 1.0170115232467651,grad_norm: 0.8811458471817972, iteration: 264551
loss: 0.9908803701400757,grad_norm: 0.9479042490428929, iteration: 264552
loss: 1.0056568384170532,grad_norm: 0.7842748481370881, iteration: 264553
loss: 0.9773380756378174,grad_norm: 0.878466681748581, iteration: 264554
loss: 1.0063503980636597,grad_norm: 0.9067014663494807, iteration: 264555
loss: 1.0000630617141724,grad_norm: 0.9431595310321181, iteration: 264556
loss: 1.018923044204712,grad_norm: 0.8326554696901524, iteration: 264557
loss: 0.9889968037605286,grad_norm: 0.999999121223641, iteration: 264558
loss: 1.0129772424697876,grad_norm: 0.8124484069271252, iteration: 264559
loss: 0.9892948865890503,grad_norm: 0.999999082590517, iteration: 264560
loss: 1.0552457571029663,grad_norm: 0.9999992922099593, iteration: 264561
loss: 1.0315998792648315,grad_norm: 0.9999995331272162, iteration: 264562
loss: 1.0069364309310913,grad_norm: 0.9025914276491332, iteration: 264563
loss: 1.0084038972854614,grad_norm: 0.8639083658624317, iteration: 264564
loss: 0.9567659497261047,grad_norm: 0.9999991517587358, iteration: 264565
loss: 0.9957019686698914,grad_norm: 0.8895044552147706, iteration: 264566
loss: 0.9956075549125671,grad_norm: 0.8244177815750875, iteration: 264567
loss: 0.9970425367355347,grad_norm: 0.8855616906459247, iteration: 264568
loss: 1.0658893585205078,grad_norm: 0.9999994973314501, iteration: 264569
loss: 1.0138516426086426,grad_norm: 0.9999993424290147, iteration: 264570
loss: 0.9750259518623352,grad_norm: 0.9999990491366942, iteration: 264571
loss: 1.043910264968872,grad_norm: 0.9999991744102823, iteration: 264572
loss: 1.0410488843917847,grad_norm: 0.9279548940734423, iteration: 264573
loss: 0.9804545044898987,grad_norm: 0.8398330343620766, iteration: 264574
loss: 1.0023521184921265,grad_norm: 0.9160247393934527, iteration: 264575
loss: 0.9656288027763367,grad_norm: 0.8410465478483252, iteration: 264576
loss: 1.070967435836792,grad_norm: 0.9999998470748984, iteration: 264577
loss: 0.9663576483726501,grad_norm: 0.9448636606448901, iteration: 264578
loss: 0.982072651386261,grad_norm: 0.798939695792316, iteration: 264579
loss: 1.0145535469055176,grad_norm: 0.90125945635572, iteration: 264580
loss: 0.9609264135360718,grad_norm: 0.9546481449539812, iteration: 264581
loss: 1.0202327966690063,grad_norm: 0.754357856562286, iteration: 264582
loss: 0.9890661239624023,grad_norm: 0.8216176442129173, iteration: 264583
loss: 1.0240241289138794,grad_norm: 0.9999994136364438, iteration: 264584
loss: 0.9899118542671204,grad_norm: 0.8406940525458643, iteration: 264585
loss: 0.9833815693855286,grad_norm: 0.9999991085814216, iteration: 264586
loss: 1.0156254768371582,grad_norm: 0.9999991083404637, iteration: 264587
loss: 0.9802679419517517,grad_norm: 0.8889855563326374, iteration: 264588
loss: 1.0761919021606445,grad_norm: 0.849547263803777, iteration: 264589
loss: 1.0201475620269775,grad_norm: 0.78715525910829, iteration: 264590
loss: 1.0078179836273193,grad_norm: 0.757533465947458, iteration: 264591
loss: 1.0367246866226196,grad_norm: 0.8534291798226101, iteration: 264592
loss: 1.0184752941131592,grad_norm: 0.8974792449774036, iteration: 264593
loss: 1.176971673965454,grad_norm: 0.9999994706126012, iteration: 264594
loss: 1.0614948272705078,grad_norm: 0.9999998185093472, iteration: 264595
loss: 1.0166178941726685,grad_norm: 0.9211991584365093, iteration: 264596
loss: 1.075535535812378,grad_norm: 0.8537458522417569, iteration: 264597
loss: 1.0567013025283813,grad_norm: 0.9999997672274865, iteration: 264598
loss: 0.9991623163223267,grad_norm: 0.7114140466061157, iteration: 264599
loss: 1.042455792427063,grad_norm: 0.8603642669670831, iteration: 264600
loss: 1.0985054969787598,grad_norm: 0.999999809416083, iteration: 264601
loss: 0.9808338284492493,grad_norm: 0.8828330641197204, iteration: 264602
loss: 1.0615782737731934,grad_norm: 0.9999991062924268, iteration: 264603
loss: 1.0471513271331787,grad_norm: 0.9999991825714013, iteration: 264604
loss: 0.9995784759521484,grad_norm: 0.8240431625457596, iteration: 264605
loss: 1.1182312965393066,grad_norm: 0.9999994200726262, iteration: 264606
loss: 0.9665159583091736,grad_norm: 0.8534691088720852, iteration: 264607
loss: 1.1262702941894531,grad_norm: 0.9999994828668322, iteration: 264608
loss: 0.9402830600738525,grad_norm: 0.9746536505179652, iteration: 264609
loss: 1.0498820543289185,grad_norm: 0.717354864107745, iteration: 264610
loss: 1.0509358644485474,grad_norm: 0.9999996302040411, iteration: 264611
loss: 1.0172345638275146,grad_norm: 0.9072801221650457, iteration: 264612
loss: 1.1025961637496948,grad_norm: 0.9999992412684008, iteration: 264613
loss: 1.0014301538467407,grad_norm: 0.9070538836742521, iteration: 264614
loss: 1.0915178060531616,grad_norm: 0.937266576371803, iteration: 264615
loss: 0.9803552627563477,grad_norm: 0.8440264501809307, iteration: 264616
loss: 1.079068660736084,grad_norm: 0.9999995797989595, iteration: 264617
loss: 1.100570797920227,grad_norm: 0.9433791661286584, iteration: 264618
loss: 1.0572547912597656,grad_norm: 0.9999993284210966, iteration: 264619
loss: 0.9964227080345154,grad_norm: 0.9999991174915074, iteration: 264620
loss: 0.9819998145103455,grad_norm: 0.9812581958850995, iteration: 264621
loss: 1.1238640546798706,grad_norm: 0.9999998644921892, iteration: 264622
loss: 0.9694697856903076,grad_norm: 0.9418032374987593, iteration: 264623
loss: 1.0285507440567017,grad_norm: 0.9999990972210175, iteration: 264624
loss: 1.0358707904815674,grad_norm: 0.9999999329021995, iteration: 264625
loss: 1.0879216194152832,grad_norm: 0.9999999022519962, iteration: 264626
loss: 0.981130063533783,grad_norm: 0.8233561884855164, iteration: 264627
loss: 1.0426267385482788,grad_norm: 0.9999993265570984, iteration: 264628
loss: 1.0173180103302002,grad_norm: 0.7272194292327692, iteration: 264629
loss: 1.0365575551986694,grad_norm: 0.8365681182259798, iteration: 264630
loss: 1.0127670764923096,grad_norm: 0.9999994910617154, iteration: 264631
loss: 0.9911264777183533,grad_norm: 0.9669076170995534, iteration: 264632
loss: 1.0678627490997314,grad_norm: 0.9999994123011515, iteration: 264633
loss: 0.9827818870544434,grad_norm: 0.7185632039743364, iteration: 264634
loss: 1.099787712097168,grad_norm: 0.9999994786684829, iteration: 264635
loss: 1.0584349632263184,grad_norm: 0.9999998725531073, iteration: 264636
loss: 0.9457908868789673,grad_norm: 0.8383120493539966, iteration: 264637
loss: 0.9625401496887207,grad_norm: 0.8394356250507012, iteration: 264638
loss: 1.0899641513824463,grad_norm: 0.7940891390157154, iteration: 264639
loss: 1.1086868047714233,grad_norm: 0.9999997431211438, iteration: 264640
loss: 0.9874114394187927,grad_norm: 0.8049299527344536, iteration: 264641
loss: 1.0256867408752441,grad_norm: 0.9999998698825856, iteration: 264642
loss: 1.091423749923706,grad_norm: 0.9999998630773622, iteration: 264643
loss: 1.0413708686828613,grad_norm: 0.9999996911683692, iteration: 264644
loss: 0.9878396391868591,grad_norm: 0.9259260027566879, iteration: 264645
loss: 0.9861257076263428,grad_norm: 0.9999993791093909, iteration: 264646
loss: 1.021194577217102,grad_norm: 0.8032264113029465, iteration: 264647
loss: 1.0293736457824707,grad_norm: 0.8661220307729854, iteration: 264648
loss: 1.0095276832580566,grad_norm: 0.8962598060181407, iteration: 264649
loss: 1.0133917331695557,grad_norm: 0.8026125483507875, iteration: 264650
loss: 0.9598627686500549,grad_norm: 0.9314574498390967, iteration: 264651
loss: 1.0696996450424194,grad_norm: 0.9999999379403592, iteration: 264652
loss: 1.2422820329666138,grad_norm: 0.9999998945101359, iteration: 264653
loss: 1.0186190605163574,grad_norm: 0.9397601409490565, iteration: 264654
loss: 1.0348421335220337,grad_norm: 0.9521554912816899, iteration: 264655
loss: 1.0027378797531128,grad_norm: 0.999999183581908, iteration: 264656
loss: 1.0068389177322388,grad_norm: 0.9918665457983314, iteration: 264657
loss: 1.0317363739013672,grad_norm: 0.9999990234546606, iteration: 264658
loss: 1.1598516702651978,grad_norm: 0.9999995721051445, iteration: 264659
loss: 1.049518346786499,grad_norm: 0.8964217001600968, iteration: 264660
loss: 0.9882356524467468,grad_norm: 0.901447918827556, iteration: 264661
loss: 1.0209418535232544,grad_norm: 0.8809951346604982, iteration: 264662
loss: 1.1005948781967163,grad_norm: 0.9999991459360381, iteration: 264663
loss: 0.9895404577255249,grad_norm: 0.9999991747689821, iteration: 264664
loss: 1.222110629081726,grad_norm: 0.9999998090465846, iteration: 264665
loss: 1.0688488483428955,grad_norm: 0.9999992852913803, iteration: 264666
loss: 1.188435435295105,grad_norm: 0.9999999424624629, iteration: 264667
loss: 1.074676752090454,grad_norm: 0.871922976267697, iteration: 264668
loss: 1.3963358402252197,grad_norm: 0.9999994916836837, iteration: 264669
loss: 1.2731573581695557,grad_norm: 0.9999994538437842, iteration: 264670
loss: 1.0982600450515747,grad_norm: 0.9999990732423361, iteration: 264671
loss: 1.318044900894165,grad_norm: 0.9999993186504258, iteration: 264672
loss: 1.2286741733551025,grad_norm: 0.999999663923084, iteration: 264673
loss: 1.1757127046585083,grad_norm: 0.9999993456138593, iteration: 264674
loss: 1.2648231983184814,grad_norm: 0.9999999066135329, iteration: 264675
loss: 1.3481025695800781,grad_norm: 0.9999998487132773, iteration: 264676
loss: 1.0585579872131348,grad_norm: 0.999999998492928, iteration: 264677
loss: 1.4095516204833984,grad_norm: 1.0000000408197351, iteration: 264678
loss: 1.491956353187561,grad_norm: 0.9999997985815667, iteration: 264679
loss: 1.315536379814148,grad_norm: 0.9999992928544343, iteration: 264680
loss: 1.2132582664489746,grad_norm: 0.9999992036818129, iteration: 264681
loss: 1.040025234222412,grad_norm: 0.9375190159986018, iteration: 264682
loss: 1.3877240419387817,grad_norm: 0.9999996103080238, iteration: 264683
loss: 1.9341533184051514,grad_norm: 0.9999996616227081, iteration: 264684
loss: 1.1507362127304077,grad_norm: 0.9659411415821909, iteration: 264685
loss: 1.3043270111083984,grad_norm: 0.9999997906220758, iteration: 264686
loss: 1.0502315759658813,grad_norm: 0.99999973494569, iteration: 264687
loss: 1.0179470777511597,grad_norm: 0.9667382700476753, iteration: 264688
loss: 1.0907126665115356,grad_norm: 0.9772108221983697, iteration: 264689
loss: 1.0292942523956299,grad_norm: 0.9999992987908829, iteration: 264690
loss: 1.0550062656402588,grad_norm: 0.9819798554019413, iteration: 264691
loss: 1.1072721481323242,grad_norm: 0.9999998855746343, iteration: 264692
loss: 1.0768393278121948,grad_norm: 0.9999990284966356, iteration: 264693
loss: 1.103842854499817,grad_norm: 0.9999992470552104, iteration: 264694
loss: 1.0876741409301758,grad_norm: 0.9482762330952469, iteration: 264695
loss: 1.007849097251892,grad_norm: 0.9063104444297557, iteration: 264696
loss: 1.2071224451065063,grad_norm: 1.000000028055678, iteration: 264697
loss: 0.9829301834106445,grad_norm: 0.9626740794497605, iteration: 264698
loss: 1.0069321393966675,grad_norm: 0.9267631111196368, iteration: 264699
loss: 1.0430852174758911,grad_norm: 0.8973873385147167, iteration: 264700
loss: 1.040067195892334,grad_norm: 0.7415276855235492, iteration: 264701
loss: 1.0161056518554688,grad_norm: 0.9999991576773037, iteration: 264702
loss: 1.1446737051010132,grad_norm: 0.9999999217918948, iteration: 264703
loss: 0.9728198647499084,grad_norm: 0.8321069886226214, iteration: 264704
loss: 1.1154077053070068,grad_norm: 0.9999999558239984, iteration: 264705
loss: 1.0658546686172485,grad_norm: 0.9999990537644567, iteration: 264706
loss: 1.0876410007476807,grad_norm: 0.9999997433452424, iteration: 264707
loss: 1.053401231765747,grad_norm: 0.9999990135141044, iteration: 264708
loss: 0.9900327324867249,grad_norm: 0.8355738015235296, iteration: 264709
loss: 1.0947177410125732,grad_norm: 0.9999991649115844, iteration: 264710
loss: 0.9868859052658081,grad_norm: 0.8190060561463305, iteration: 264711
loss: 1.0312888622283936,grad_norm: 0.9999993602929552, iteration: 264712
loss: 1.0562394857406616,grad_norm: 0.9999990848985754, iteration: 264713
loss: 0.9937111735343933,grad_norm: 0.9650491474200702, iteration: 264714
loss: 1.028708577156067,grad_norm: 0.9845348282992024, iteration: 264715
loss: 1.124310851097107,grad_norm: 0.9999992104870314, iteration: 264716
loss: 1.0142443180084229,grad_norm: 0.7770882042287136, iteration: 264717
loss: 0.9835036396980286,grad_norm: 0.994165754418759, iteration: 264718
loss: 1.0032459497451782,grad_norm: 0.9131057342415049, iteration: 264719
loss: 0.9818809628486633,grad_norm: 0.9999993613049734, iteration: 264720
loss: 1.1972287893295288,grad_norm: 0.9999996846293707, iteration: 264721
loss: 1.009496808052063,grad_norm: 0.9999990517478949, iteration: 264722
loss: 1.0841745138168335,grad_norm: 0.9999998852328689, iteration: 264723
loss: 1.0552085638046265,grad_norm: 0.8808901502277742, iteration: 264724
loss: 1.0863438844680786,grad_norm: 0.9293554391804064, iteration: 264725
loss: 1.0872198343276978,grad_norm: 0.9999993212184218, iteration: 264726
loss: 1.0360852479934692,grad_norm: 0.8574747350626809, iteration: 264727
loss: 1.0950511693954468,grad_norm: 0.9887430114595247, iteration: 264728
loss: 1.0453168153762817,grad_norm: 0.9999999204394014, iteration: 264729
loss: 1.0512568950653076,grad_norm: 0.9999990443807293, iteration: 264730
loss: 0.9889233708381653,grad_norm: 0.8395464006769211, iteration: 264731
loss: 0.9637269973754883,grad_norm: 0.867348643288997, iteration: 264732
loss: 1.025228500366211,grad_norm: 0.9199138824597722, iteration: 264733
loss: 1.0166393518447876,grad_norm: 0.8260727421311427, iteration: 264734
loss: 1.0415372848510742,grad_norm: 0.9999997497208338, iteration: 264735
loss: 1.007278561592102,grad_norm: 0.8678601083696471, iteration: 264736
loss: 1.0358480215072632,grad_norm: 0.9999991258461984, iteration: 264737
loss: 1.0557440519332886,grad_norm: 0.8573924870660455, iteration: 264738
loss: 1.0172579288482666,grad_norm: 0.9438082737727356, iteration: 264739
loss: 0.9931074380874634,grad_norm: 0.9999990050296229, iteration: 264740
loss: 1.1013027429580688,grad_norm: 0.9130351850908461, iteration: 264741
loss: 1.027482271194458,grad_norm: 0.8615073743583096, iteration: 264742
loss: 1.058321475982666,grad_norm: 0.7682931547585772, iteration: 264743
loss: 1.0063354969024658,grad_norm: 0.8313734721040272, iteration: 264744
loss: 1.0258355140686035,grad_norm: 0.8910141104049452, iteration: 264745
loss: 1.1036262512207031,grad_norm: 0.9423531025501248, iteration: 264746
loss: 1.0364879369735718,grad_norm: 0.9151400416569765, iteration: 264747
loss: 0.9714091420173645,grad_norm: 0.9999991029032087, iteration: 264748
loss: 1.0236016511917114,grad_norm: 0.9999993262998065, iteration: 264749
loss: 1.046163558959961,grad_norm: 0.9239002190759966, iteration: 264750
loss: 0.9784196019172668,grad_norm: 0.9999991758053126, iteration: 264751
loss: 1.0634520053863525,grad_norm: 0.864361458984151, iteration: 264752
loss: 1.0269232988357544,grad_norm: 0.9999996543746871, iteration: 264753
loss: 1.16184401512146,grad_norm: 0.9999997413657226, iteration: 264754
loss: 1.1099319458007812,grad_norm: 0.998584357799863, iteration: 264755
loss: 0.9834505319595337,grad_norm: 0.7834512406114833, iteration: 264756
loss: 1.0191588401794434,grad_norm: 0.9449482435615347, iteration: 264757
loss: 0.9855772256851196,grad_norm: 0.9999993715131295, iteration: 264758
loss: 1.0318115949630737,grad_norm: 0.9606042716465747, iteration: 264759
loss: 1.0646413564682007,grad_norm: 0.9934272463527621, iteration: 264760
loss: 0.9926511645317078,grad_norm: 0.9742448820381912, iteration: 264761
loss: 0.9925870895385742,grad_norm: 0.7704182829779269, iteration: 264762
loss: 1.0283862352371216,grad_norm: 0.884601870174812, iteration: 264763
loss: 1.0411988496780396,grad_norm: 0.9069988484148167, iteration: 264764
loss: 1.034618854522705,grad_norm: 0.9243873287718347, iteration: 264765
loss: 1.019660234451294,grad_norm: 0.9130767758717003, iteration: 264766
loss: 0.9882144927978516,grad_norm: 0.9999990825465359, iteration: 264767
loss: 1.0108174085617065,grad_norm: 0.9077035950967285, iteration: 264768
loss: 0.981587827205658,grad_norm: 0.793343540965019, iteration: 264769
loss: 0.9695712924003601,grad_norm: 0.9734836891723301, iteration: 264770
loss: 1.0794039964675903,grad_norm: 0.9999992227755237, iteration: 264771
loss: 1.0277613401412964,grad_norm: 0.7578591981916543, iteration: 264772
loss: 0.9741638898849487,grad_norm: 0.8741314867729114, iteration: 264773
loss: 1.0132371187210083,grad_norm: 0.8421709119231082, iteration: 264774
loss: 1.0819884538650513,grad_norm: 0.9999989966521508, iteration: 264775
loss: 0.9748464822769165,grad_norm: 0.914481229936829, iteration: 264776
loss: 1.0444774627685547,grad_norm: 0.9164372252341291, iteration: 264777
loss: 1.0505166053771973,grad_norm: 0.9999990939477498, iteration: 264778
loss: 0.9911913275718689,grad_norm: 0.8651689452198574, iteration: 264779
loss: 0.9638488292694092,grad_norm: 0.8161224939402723, iteration: 264780
loss: 0.9941521286964417,grad_norm: 0.9688917633447963, iteration: 264781
loss: 1.0340831279754639,grad_norm: 0.8713777502681016, iteration: 264782
loss: 1.005953311920166,grad_norm: 0.9052046548229833, iteration: 264783
loss: 1.073155164718628,grad_norm: 0.8988520752382095, iteration: 264784
loss: 1.0076369047164917,grad_norm: 0.999999381950721, iteration: 264785
loss: 1.008364200592041,grad_norm: 0.7596399959696432, iteration: 264786
loss: 0.989067792892456,grad_norm: 0.9999991002565831, iteration: 264787
loss: 1.0053483247756958,grad_norm: 0.8253935335439416, iteration: 264788
loss: 0.9926353693008423,grad_norm: 0.8279168031657577, iteration: 264789
loss: 1.0510900020599365,grad_norm: 0.929779300568025, iteration: 264790
loss: 1.0218344926834106,grad_norm: 0.8769107972893035, iteration: 264791
loss: 1.0336726903915405,grad_norm: 0.9999990958349644, iteration: 264792
loss: 1.0356348752975464,grad_norm: 0.8733802460164812, iteration: 264793
loss: 1.0688984394073486,grad_norm: 0.8830306756767422, iteration: 264794
loss: 1.0054959058761597,grad_norm: 0.9999991919840558, iteration: 264795
loss: 1.010464072227478,grad_norm: 0.8406679130112703, iteration: 264796
loss: 1.156388282775879,grad_norm: 0.8854125804672847, iteration: 264797
loss: 0.9994553923606873,grad_norm: 0.9497326719568043, iteration: 264798
loss: 0.9950103759765625,grad_norm: 0.9471125474742058, iteration: 264799
loss: 1.0031787157058716,grad_norm: 0.9999990249148584, iteration: 264800
loss: 1.032720685005188,grad_norm: 0.9999990399534869, iteration: 264801
loss: 1.0825271606445312,grad_norm: 0.9999990875542644, iteration: 264802
loss: 0.9567880034446716,grad_norm: 0.9461654581156181, iteration: 264803
loss: 1.0034856796264648,grad_norm: 0.9920179894831971, iteration: 264804
loss: 1.020275592803955,grad_norm: 0.9999991614476368, iteration: 264805
loss: 1.0205613374710083,grad_norm: 0.8644983477513778, iteration: 264806
loss: 0.9943346977233887,grad_norm: 0.9999990421971288, iteration: 264807
loss: 1.0019054412841797,grad_norm: 0.8315715311651756, iteration: 264808
loss: 0.9905530214309692,grad_norm: 0.7540133510522486, iteration: 264809
loss: 0.9798583984375,grad_norm: 0.826830517842597, iteration: 264810
loss: 1.0005989074707031,grad_norm: 0.7548919073472551, iteration: 264811
loss: 0.9995059967041016,grad_norm: 0.9095760412340392, iteration: 264812
loss: 0.9746525287628174,grad_norm: 0.9034426065707108, iteration: 264813
loss: 1.0355873107910156,grad_norm: 0.9923540828039052, iteration: 264814
loss: 1.0074245929718018,grad_norm: 0.9396394636941963, iteration: 264815
loss: 1.1351383924484253,grad_norm: 0.9999993878836929, iteration: 264816
loss: 1.011441946029663,grad_norm: 0.8109595417867659, iteration: 264817
loss: 1.0155805349349976,grad_norm: 0.8708779537956592, iteration: 264818
loss: 1.0197087526321411,grad_norm: 0.9150825571622334, iteration: 264819
loss: 1.024088740348816,grad_norm: 0.999999866168697, iteration: 264820
loss: 1.0267679691314697,grad_norm: 0.7527028580463179, iteration: 264821
loss: 1.0024688243865967,grad_norm: 0.9999989922341664, iteration: 264822
loss: 1.0150113105773926,grad_norm: 0.8782119669483718, iteration: 264823
loss: 0.9995589852333069,grad_norm: 0.8913600337162974, iteration: 264824
loss: 0.9939793944358826,grad_norm: 0.8350350817980594, iteration: 264825
loss: 1.0151622295379639,grad_norm: 0.8263794129721551, iteration: 264826
loss: 0.9875174164772034,grad_norm: 0.7870236980567337, iteration: 264827
loss: 0.992178201675415,grad_norm: 0.9059151261217888, iteration: 264828
loss: 1.0149800777435303,grad_norm: 0.8827393665611645, iteration: 264829
loss: 1.0472592115402222,grad_norm: 0.8039527439286284, iteration: 264830
loss: 0.9904015064239502,grad_norm: 0.9714205355387268, iteration: 264831
loss: 1.063907265663147,grad_norm: 0.8581932945905605, iteration: 264832
loss: 0.975726306438446,grad_norm: 0.7705278465309171, iteration: 264833
loss: 1.0068291425704956,grad_norm: 0.9999992296377357, iteration: 264834
loss: 1.0103397369384766,grad_norm: 0.9999990856112331, iteration: 264835
loss: 1.0195231437683105,grad_norm: 0.7702762183917942, iteration: 264836
loss: 0.9854256510734558,grad_norm: 0.9270339350709462, iteration: 264837
loss: 1.0013148784637451,grad_norm: 0.7752525004203438, iteration: 264838
loss: 0.9669225215911865,grad_norm: 0.9825969281705442, iteration: 264839
loss: 1.0306670665740967,grad_norm: 0.773482784990321, iteration: 264840
loss: 1.0138027667999268,grad_norm: 0.9999990138348119, iteration: 264841
loss: 1.0133717060089111,grad_norm: 0.8673638125591491, iteration: 264842
loss: 0.990861713886261,grad_norm: 0.9443107844837157, iteration: 264843
loss: 0.9746588468551636,grad_norm: 0.8853375922926399, iteration: 264844
loss: 1.0313119888305664,grad_norm: 0.8463218224981928, iteration: 264845
loss: 0.9550245404243469,grad_norm: 0.9007135150724186, iteration: 264846
loss: 0.9555608034133911,grad_norm: 0.9569207796183443, iteration: 264847
loss: 1.0453150272369385,grad_norm: 0.999998991467237, iteration: 264848
loss: 1.0546053647994995,grad_norm: 0.9999991636612644, iteration: 264849
loss: 1.0157572031021118,grad_norm: 0.9176339819393253, iteration: 264850
loss: 0.9805904626846313,grad_norm: 0.8315109413300059, iteration: 264851
loss: 0.9613943696022034,grad_norm: 0.9387340253355126, iteration: 264852
loss: 1.0807538032531738,grad_norm: 0.9985406123428588, iteration: 264853
loss: 0.9910483360290527,grad_norm: 0.9300526194055905, iteration: 264854
loss: 0.9711580872535706,grad_norm: 0.9999990518992392, iteration: 264855
loss: 0.9752969741821289,grad_norm: 0.9348199559734046, iteration: 264856
loss: 0.9973031282424927,grad_norm: 0.8911103036275507, iteration: 264857
loss: 0.9572717547416687,grad_norm: 0.90385483048452, iteration: 264858
loss: 1.0085231065750122,grad_norm: 0.85841409195477, iteration: 264859
loss: 0.9849662184715271,grad_norm: 0.8587176208752111, iteration: 264860
loss: 1.0234752893447876,grad_norm: 0.8884786806063413, iteration: 264861
loss: 1.0013973712921143,grad_norm: 0.7530911848101691, iteration: 264862
loss: 0.9945816397666931,grad_norm: 0.9208500254854353, iteration: 264863
loss: 1.0044883489608765,grad_norm: 0.8319234388412741, iteration: 264864
loss: 0.9804797172546387,grad_norm: 0.8599834587423264, iteration: 264865
loss: 0.9915987253189087,grad_norm: 0.8204138146815972, iteration: 264866
loss: 1.1212207078933716,grad_norm: 0.9999994229580769, iteration: 264867
loss: 0.9916282296180725,grad_norm: 0.9999993037178878, iteration: 264868
loss: 1.00571608543396,grad_norm: 0.9064348210449201, iteration: 264869
loss: 0.9891684651374817,grad_norm: 0.9057798463639934, iteration: 264870
loss: 1.0214498043060303,grad_norm: 0.8994520274693183, iteration: 264871
loss: 1.0444691181182861,grad_norm: 0.9999994669505774, iteration: 264872
loss: 1.0036611557006836,grad_norm: 0.906847182437011, iteration: 264873
loss: 1.0224882364273071,grad_norm: 0.9999990724937989, iteration: 264874
loss: 1.033660888671875,grad_norm: 0.783969657499905, iteration: 264875
loss: 1.0281472206115723,grad_norm: 0.8021267265608878, iteration: 264876
loss: 1.0149283409118652,grad_norm: 0.8735200054521229, iteration: 264877
loss: 1.001507043838501,grad_norm: 0.8579267883809625, iteration: 264878
loss: 1.0138882398605347,grad_norm: 0.9251196556393262, iteration: 264879
loss: 1.010623574256897,grad_norm: 0.6567187835109508, iteration: 264880
loss: 1.007742166519165,grad_norm: 0.9999994950746132, iteration: 264881
loss: 0.9969652891159058,grad_norm: 0.9999991161198345, iteration: 264882
loss: 0.9978624582290649,grad_norm: 0.7316013927451965, iteration: 264883
loss: 0.9828828573226929,grad_norm: 0.8733835298634782, iteration: 264884
loss: 1.022713303565979,grad_norm: 0.9999990906132384, iteration: 264885
loss: 1.0033485889434814,grad_norm: 0.8917253361605522, iteration: 264886
loss: 0.9853177666664124,grad_norm: 0.9280333702854222, iteration: 264887
loss: 1.0035237073898315,grad_norm: 0.9534537087238683, iteration: 264888
loss: 1.0478559732437134,grad_norm: 0.7495373327895133, iteration: 264889
loss: 0.9724858999252319,grad_norm: 0.7907244215731267, iteration: 264890
loss: 0.9792382121086121,grad_norm: 0.7956772378774268, iteration: 264891
loss: 0.9809834957122803,grad_norm: 0.8979925814502092, iteration: 264892
loss: 1.0228086709976196,grad_norm: 0.9281239891498387, iteration: 264893
loss: 0.9959062337875366,grad_norm: 0.8198840995665028, iteration: 264894
loss: 1.015673041343689,grad_norm: 0.7930872860231197, iteration: 264895
loss: 1.0191748142242432,grad_norm: 0.8987149025824664, iteration: 264896
loss: 1.0027848482131958,grad_norm: 0.8668386163714467, iteration: 264897
loss: 1.018383264541626,grad_norm: 0.8464844001578349, iteration: 264898
loss: 1.0519160032272339,grad_norm: 0.8609002856949411, iteration: 264899
loss: 1.00972580909729,grad_norm: 0.9077803849707666, iteration: 264900
loss: 1.005566954612732,grad_norm: 0.9999990813771227, iteration: 264901
loss: 1.0160140991210938,grad_norm: 0.9999997366081002, iteration: 264902
loss: 1.0072718858718872,grad_norm: 0.9932078457481044, iteration: 264903
loss: 0.9919766783714294,grad_norm: 0.8251208835853472, iteration: 264904
loss: 0.970164954662323,grad_norm: 0.8799352168357062, iteration: 264905
loss: 0.9853706955909729,grad_norm: 0.8201157737785038, iteration: 264906
loss: 1.0265991687774658,grad_norm: 0.8745059013064179, iteration: 264907
loss: 1.0008673667907715,grad_norm: 0.8854790223582957, iteration: 264908
loss: 1.0260262489318848,grad_norm: 0.7302634012925271, iteration: 264909
loss: 1.0068106651306152,grad_norm: 0.9223247537574382, iteration: 264910
loss: 1.0059601068496704,grad_norm: 0.8525614787293809, iteration: 264911
loss: 1.0173770189285278,grad_norm: 0.986706863487992, iteration: 264912
loss: 1.0416163206100464,grad_norm: 0.8475444254722756, iteration: 264913
loss: 0.9901710748672485,grad_norm: 0.9229432485956626, iteration: 264914
loss: 1.0163558721542358,grad_norm: 0.8868820089543155, iteration: 264915
loss: 1.0441257953643799,grad_norm: 0.9999994353747811, iteration: 264916
loss: 0.993493378162384,grad_norm: 0.9701831597280118, iteration: 264917
loss: 1.0782047510147095,grad_norm: 0.9999999523616718, iteration: 264918
loss: 0.979762852191925,grad_norm: 0.9515984774863749, iteration: 264919
loss: 1.0175248384475708,grad_norm: 0.9015062675967781, iteration: 264920
loss: 0.9610971808433533,grad_norm: 0.6921946334498776, iteration: 264921
loss: 1.0341696739196777,grad_norm: 0.9999995596197392, iteration: 264922
loss: 1.0559799671173096,grad_norm: 0.9999990892038143, iteration: 264923
loss: 1.020444393157959,grad_norm: 0.999999268434174, iteration: 264924
loss: 1.0026665925979614,grad_norm: 0.9861569075499895, iteration: 264925
loss: 1.0450948476791382,grad_norm: 0.9341933310570638, iteration: 264926
loss: 0.9908754229545593,grad_norm: 0.7842173987261616, iteration: 264927
loss: 1.0030587911605835,grad_norm: 0.9999989528391103, iteration: 264928
loss: 1.0067867040634155,grad_norm: 0.8412531972099502, iteration: 264929
loss: 1.0049750804901123,grad_norm: 0.999999421288663, iteration: 264930
loss: 0.9808679223060608,grad_norm: 0.8303912245693785, iteration: 264931
loss: 0.9844418168067932,grad_norm: 0.8625391596229987, iteration: 264932
loss: 1.0017898082733154,grad_norm: 0.9999991548088459, iteration: 264933
loss: 0.9588479399681091,grad_norm: 0.9999990798458424, iteration: 264934
loss: 1.0685988664627075,grad_norm: 0.8089442188133764, iteration: 264935
loss: 0.9859848022460938,grad_norm: 0.89809993782265, iteration: 264936
loss: 1.0147745609283447,grad_norm: 0.8388917317762538, iteration: 264937
loss: 1.071761131286621,grad_norm: 0.9999999379641097, iteration: 264938
loss: 0.9727530479431152,grad_norm: 0.91018041484259, iteration: 264939
loss: 1.01618230342865,grad_norm: 0.9999990776186787, iteration: 264940
loss: 0.9994942545890808,grad_norm: 0.9999990194296052, iteration: 264941
loss: 1.0221710205078125,grad_norm: 0.8447824810092922, iteration: 264942
loss: 1.0066763162612915,grad_norm: 0.999999285087578, iteration: 264943
loss: 1.0341193675994873,grad_norm: 0.7954308324870947, iteration: 264944
loss: 1.0303566455841064,grad_norm: 0.8211861516668238, iteration: 264945
loss: 0.9954198002815247,grad_norm: 0.9579134732996841, iteration: 264946
loss: 0.9894813299179077,grad_norm: 0.7581980914072287, iteration: 264947
loss: 1.0067543983459473,grad_norm: 0.9999991481636092, iteration: 264948
loss: 0.9842498302459717,grad_norm: 0.9999992023953168, iteration: 264949
loss: 1.0431034564971924,grad_norm: 0.9024000187678622, iteration: 264950
loss: 1.0429203510284424,grad_norm: 0.9017312545479239, iteration: 264951
loss: 0.9865908622741699,grad_norm: 0.8500405221502064, iteration: 264952
loss: 1.0214589834213257,grad_norm: 0.9999990846607602, iteration: 264953
loss: 0.9892520308494568,grad_norm: 0.9999989092426657, iteration: 264954
loss: 0.9816325902938843,grad_norm: 0.9999992908116672, iteration: 264955
loss: 0.9743196368217468,grad_norm: 0.7336792528636121, iteration: 264956
loss: 0.9745357632637024,grad_norm: 0.8116500201891337, iteration: 264957
loss: 1.0190916061401367,grad_norm: 0.8617649157992806, iteration: 264958
loss: 0.9707351922988892,grad_norm: 0.9999996024009945, iteration: 264959
loss: 1.0064826011657715,grad_norm: 0.8762700816905308, iteration: 264960
loss: 1.0281888246536255,grad_norm: 0.999999114285129, iteration: 264961
loss: 0.996041476726532,grad_norm: 0.9567595137453683, iteration: 264962
loss: 0.9678145051002502,grad_norm: 0.970001626693673, iteration: 264963
loss: 1.0470942258834839,grad_norm: 0.8735702444788278, iteration: 264964
loss: 0.995643138885498,grad_norm: 0.938875454103881, iteration: 264965
loss: 0.9754169583320618,grad_norm: 0.8171087360575829, iteration: 264966
loss: 0.9876758456230164,grad_norm: 0.9169649561044159, iteration: 264967
loss: 1.0029112100601196,grad_norm: 0.9999991545575799, iteration: 264968
loss: 0.9916410446166992,grad_norm: 0.9817050054280951, iteration: 264969
loss: 1.016167163848877,grad_norm: 0.9172757293012317, iteration: 264970
loss: 1.0189619064331055,grad_norm: 0.9999994617464075, iteration: 264971
loss: 1.0871336460113525,grad_norm: 0.9999992475624927, iteration: 264972
loss: 0.987543523311615,grad_norm: 0.9641631258759463, iteration: 264973
loss: 1.0478473901748657,grad_norm: 0.9522085608307114, iteration: 264974
loss: 0.9364467859268188,grad_norm: 0.9733676388749181, iteration: 264975
loss: 1.0114638805389404,grad_norm: 0.9289184998488277, iteration: 264976
loss: 1.1246103048324585,grad_norm: 0.9999998353384768, iteration: 264977
loss: 1.0130943059921265,grad_norm: 0.8258096283612087, iteration: 264978
loss: 0.9747475981712341,grad_norm: 0.9201940407237189, iteration: 264979
loss: 0.992919921875,grad_norm: 0.8806033354655056, iteration: 264980
loss: 0.9798464179039001,grad_norm: 0.9999992228325308, iteration: 264981
loss: 1.0176196098327637,grad_norm: 0.9990274686188468, iteration: 264982
loss: 1.0008869171142578,grad_norm: 0.999999132967714, iteration: 264983
loss: 0.9914608001708984,grad_norm: 0.9202393963401089, iteration: 264984
loss: 1.09605073928833,grad_norm: 0.9999995744375114, iteration: 264985
loss: 0.9890447854995728,grad_norm: 0.8887994272707103, iteration: 264986
loss: 0.9827417135238647,grad_norm: 0.9999991006921214, iteration: 264987
loss: 0.9930222630500793,grad_norm: 0.8783109152179455, iteration: 264988
loss: 1.022781252861023,grad_norm: 0.9999991670803156, iteration: 264989
loss: 1.0046017169952393,grad_norm: 0.9536728107793685, iteration: 264990
loss: 0.9832537770271301,grad_norm: 0.9786911603262731, iteration: 264991
loss: 1.0772687196731567,grad_norm: 0.9999995919351897, iteration: 264992
loss: 1.0019888877868652,grad_norm: 0.9318310982099746, iteration: 264993
loss: 1.0336917638778687,grad_norm: 0.8475505028200282, iteration: 264994
loss: 1.018109679222107,grad_norm: 0.9390635777626882, iteration: 264995
loss: 1.0724161863327026,grad_norm: 0.9999992382738984, iteration: 264996
loss: 0.9807909727096558,grad_norm: 0.8288383484402326, iteration: 264997
loss: 0.9896669387817383,grad_norm: 0.886321740172824, iteration: 264998
loss: 0.9771759510040283,grad_norm: 0.9501147145372554, iteration: 264999
loss: 0.9960649013519287,grad_norm: 0.8778984122869934, iteration: 265000
loss: 0.9794116616249084,grad_norm: 0.843912561521353, iteration: 265001
loss: 0.9780223369598389,grad_norm: 0.8516136221851143, iteration: 265002
loss: 0.9877068996429443,grad_norm: 0.8701545716833141, iteration: 265003
loss: 0.9960355758666992,grad_norm: 0.90371711286178, iteration: 265004
loss: 1.0167449712753296,grad_norm: 0.8611916709400629, iteration: 265005
loss: 0.9888496398925781,grad_norm: 0.8792434989362766, iteration: 265006
loss: 1.0091160535812378,grad_norm: 0.9515061349128761, iteration: 265007
loss: 1.0198918581008911,grad_norm: 0.9527112959465616, iteration: 265008
loss: 0.976921021938324,grad_norm: 0.821456762803295, iteration: 265009
loss: 1.0143728256225586,grad_norm: 0.9373775657632425, iteration: 265010
loss: 0.9749792218208313,grad_norm: 0.9038862899360425, iteration: 265011
loss: 1.0340580940246582,grad_norm: 0.999998974228918, iteration: 265012
loss: 0.9942189455032349,grad_norm: 0.7745387793366271, iteration: 265013
loss: 1.1273874044418335,grad_norm: 0.9393742006185192, iteration: 265014
loss: 1.0310364961624146,grad_norm: 0.999999220946031, iteration: 265015
loss: 0.9787840843200684,grad_norm: 0.7459523016402371, iteration: 265016
loss: 1.016607642173767,grad_norm: 0.9999994186396977, iteration: 265017
loss: 0.9773726463317871,grad_norm: 0.946205636497132, iteration: 265018
loss: 0.9926977157592773,grad_norm: 0.9854086319609456, iteration: 265019
loss: 0.9760966300964355,grad_norm: 0.9919823363312373, iteration: 265020
loss: 1.0127323865890503,grad_norm: 0.9477689653316514, iteration: 265021
loss: 1.0192747116088867,grad_norm: 0.9999996939593497, iteration: 265022
loss: 1.0169992446899414,grad_norm: 0.784182504359836, iteration: 265023
loss: 1.0327579975128174,grad_norm: 0.8829973566151887, iteration: 265024
loss: 0.9978092312812805,grad_norm: 0.8721777699832353, iteration: 265025
loss: 1.0027016401290894,grad_norm: 0.9295645840747505, iteration: 265026
loss: 0.9896339774131775,grad_norm: 0.9999991682040995, iteration: 265027
loss: 0.9844743609428406,grad_norm: 0.85684521456163, iteration: 265028
loss: 1.021657943725586,grad_norm: 0.864105244676385, iteration: 265029
loss: 0.9919952750205994,grad_norm: 0.7999087340530603, iteration: 265030
loss: 1.0795693397521973,grad_norm: 0.9999992134203933, iteration: 265031
loss: 1.0032981634140015,grad_norm: 0.8805398932791474, iteration: 265032
loss: 1.0035948753356934,grad_norm: 0.9949533373939731, iteration: 265033
loss: 0.9747277498245239,grad_norm: 0.8590685473600834, iteration: 265034
loss: 1.0127990245819092,grad_norm: 0.7573707905189199, iteration: 265035
loss: 1.0846383571624756,grad_norm: 0.9830542584087365, iteration: 265036
loss: 1.1269179582595825,grad_norm: 0.999999861332594, iteration: 265037
loss: 0.9752550721168518,grad_norm: 0.9568443083644779, iteration: 265038
loss: 1.0328099727630615,grad_norm: 0.99999933230084, iteration: 265039
loss: 0.9672456383705139,grad_norm: 0.9002924442422939, iteration: 265040
loss: 1.0375615358352661,grad_norm: 0.9999990268283163, iteration: 265041
loss: 1.0267783403396606,grad_norm: 0.8651364089880803, iteration: 265042
loss: 0.9811329245567322,grad_norm: 0.8472861054431733, iteration: 265043
loss: 0.9597961902618408,grad_norm: 0.9999990659288436, iteration: 265044
loss: 0.9753445982933044,grad_norm: 0.8920813552434671, iteration: 265045
loss: 1.0839803218841553,grad_norm: 0.9999992346235825, iteration: 265046
loss: 1.0328842401504517,grad_norm: 0.9778676229859234, iteration: 265047
loss: 1.0665602684020996,grad_norm: 0.8344609002409983, iteration: 265048
loss: 0.9837360382080078,grad_norm: 0.9999992643625487, iteration: 265049
loss: 1.0114725828170776,grad_norm: 0.9788488835610719, iteration: 265050
loss: 1.0807980298995972,grad_norm: 0.999999136122128, iteration: 265051
loss: 1.0179387331008911,grad_norm: 0.9999992293743872, iteration: 265052
loss: 0.9912758469581604,grad_norm: 0.8378936162370083, iteration: 265053
loss: 1.0821874141693115,grad_norm: 0.8846982539239986, iteration: 265054
loss: 0.9859475493431091,grad_norm: 0.7425578537143371, iteration: 265055
loss: 1.0160139799118042,grad_norm: 0.8166897108615195, iteration: 265056
loss: 1.0136239528656006,grad_norm: 0.7963671982785666, iteration: 265057
loss: 1.0262701511383057,grad_norm: 0.9999995268629948, iteration: 265058
loss: 1.0660468339920044,grad_norm: 0.9999993928263938, iteration: 265059
loss: 1.0534611940383911,grad_norm: 0.9999990826227957, iteration: 265060
loss: 1.0747923851013184,grad_norm: 0.974163619268635, iteration: 265061
loss: 1.011581301689148,grad_norm: 0.9858121981382713, iteration: 265062
loss: 0.9894263744354248,grad_norm: 0.8992588543983677, iteration: 265063
loss: 1.2799248695373535,grad_norm: 0.9999998610007761, iteration: 265064
loss: 1.0088123083114624,grad_norm: 0.9999993336034437, iteration: 265065
loss: 0.9725735187530518,grad_norm: 0.7660977167372393, iteration: 265066
loss: 1.0358662605285645,grad_norm: 0.8527213238463803, iteration: 265067
loss: 1.048289179801941,grad_norm: 0.9999999944967681, iteration: 265068
loss: 1.0056871175765991,grad_norm: 0.9999992290231278, iteration: 265069
loss: 1.0003046989440918,grad_norm: 0.7582657257669165, iteration: 265070
loss: 0.9887607097625732,grad_norm: 0.9051411123863579, iteration: 265071
loss: 1.0155421495437622,grad_norm: 0.7113569438519277, iteration: 265072
loss: 0.9792652130126953,grad_norm: 0.9999993450409116, iteration: 265073
loss: 0.9966667294502258,grad_norm: 0.8608942548066381, iteration: 265074
loss: 0.987595796585083,grad_norm: 0.7774699358904995, iteration: 265075
loss: 1.0556340217590332,grad_norm: 0.9999992826285585, iteration: 265076
loss: 0.9924445152282715,grad_norm: 0.922449303018004, iteration: 265077
loss: 0.9668163657188416,grad_norm: 0.9795456472408354, iteration: 265078
loss: 1.0246262550354004,grad_norm: 0.7939887596862489, iteration: 265079
loss: 1.1160247325897217,grad_norm: 0.9999991221494101, iteration: 265080
loss: 1.0001442432403564,grad_norm: 0.9621470553339606, iteration: 265081
loss: 1.0072983503341675,grad_norm: 0.8142557127658614, iteration: 265082
loss: 1.0125149488449097,grad_norm: 0.9626806893724157, iteration: 265083
loss: 0.9671614170074463,grad_norm: 0.8373426818602203, iteration: 265084
loss: 0.9815236330032349,grad_norm: 0.7457314192504919, iteration: 265085
loss: 1.1747914552688599,grad_norm: 0.9999997746188702, iteration: 265086
loss: 0.9722359776496887,grad_norm: 0.8676516563639696, iteration: 265087
loss: 1.016465425491333,grad_norm: 0.9572704982210869, iteration: 265088
loss: 0.9910717010498047,grad_norm: 0.8703071730421965, iteration: 265089
loss: 1.0034658908843994,grad_norm: 0.8822433449488428, iteration: 265090
loss: 1.1022971868515015,grad_norm: 0.9999993512121018, iteration: 265091
loss: 0.9905517101287842,grad_norm: 0.9999989178388008, iteration: 265092
loss: 1.0025811195373535,grad_norm: 0.800602222385724, iteration: 265093
loss: 1.007569670677185,grad_norm: 0.9257192409552915, iteration: 265094
loss: 1.0241400003433228,grad_norm: 0.9999996387299657, iteration: 265095
loss: 0.9593201279640198,grad_norm: 0.7642619847507196, iteration: 265096
loss: 0.9961616396903992,grad_norm: 0.8786758479357746, iteration: 265097
loss: 0.9874473214149475,grad_norm: 0.8967086813111265, iteration: 265098
loss: 1.098982572555542,grad_norm: 0.9999992167153543, iteration: 265099
loss: 1.0025192499160767,grad_norm: 0.9999992230844572, iteration: 265100
loss: 0.9997761845588684,grad_norm: 0.9999990690516755, iteration: 265101
loss: 0.9782836437225342,grad_norm: 0.8980926390497831, iteration: 265102
loss: 1.0030509233474731,grad_norm: 0.7511525165681396, iteration: 265103
loss: 1.0111818313598633,grad_norm: 0.8955718981354582, iteration: 265104
loss: 1.0080260038375854,grad_norm: 0.8900900216855284, iteration: 265105
loss: 1.0290520191192627,grad_norm: 0.9168632101460886, iteration: 265106
loss: 0.9860513210296631,grad_norm: 0.8661384197488446, iteration: 265107
loss: 1.022058367729187,grad_norm: 0.9999990506368523, iteration: 265108
loss: 1.0070425271987915,grad_norm: 0.8823070434690143, iteration: 265109
loss: 0.9841228723526001,grad_norm: 0.9110322549236474, iteration: 265110
loss: 1.0024302005767822,grad_norm: 0.8683167339373727, iteration: 265111
loss: 1.0475209951400757,grad_norm: 0.9999991079948587, iteration: 265112
loss: 0.9970977306365967,grad_norm: 0.7970423204968002, iteration: 265113
loss: 1.017831802368164,grad_norm: 0.9323281049850457, iteration: 265114
loss: 1.0860220193862915,grad_norm: 0.9786562587866824, iteration: 265115
loss: 1.000575304031372,grad_norm: 0.999999573665473, iteration: 265116
loss: 1.00281822681427,grad_norm: 0.8293250668379402, iteration: 265117
loss: 1.012770652770996,grad_norm: 0.7892922848846287, iteration: 265118
loss: 1.0153976678848267,grad_norm: 0.973901584732973, iteration: 265119
loss: 1.0052406787872314,grad_norm: 0.8646211426935053, iteration: 265120
loss: 1.0110200643539429,grad_norm: 0.8414671877075189, iteration: 265121
loss: 1.0099583864212036,grad_norm: 0.7656751797112421, iteration: 265122
loss: 1.0832390785217285,grad_norm: 0.8883311674247589, iteration: 265123
loss: 1.2246736288070679,grad_norm: 0.9999990707767895, iteration: 265124
loss: 1.0502384901046753,grad_norm: 0.9999990900026349, iteration: 265125
loss: 1.0092358589172363,grad_norm: 0.9033931339310755, iteration: 265126
loss: 1.0116262435913086,grad_norm: 0.8876065098455453, iteration: 265127
loss: 1.010006308555603,grad_norm: 0.9565949267502144, iteration: 265128
loss: 0.9912729859352112,grad_norm: 0.7756458918701202, iteration: 265129
loss: 1.0182267427444458,grad_norm: 0.8058404500403156, iteration: 265130
loss: 1.011061429977417,grad_norm: 0.9999990623100388, iteration: 265131
loss: 0.9949173331260681,grad_norm: 0.8423641954180239, iteration: 265132
loss: 1.0372753143310547,grad_norm: 0.9760382818653905, iteration: 265133
loss: 1.0074410438537598,grad_norm: 0.8220343459171806, iteration: 265134
loss: 1.0140730142593384,grad_norm: 0.7860219773901104, iteration: 265135
loss: 1.0203616619110107,grad_norm: 0.9999995674243753, iteration: 265136
loss: 1.0359355211257935,grad_norm: 0.9592740093396525, iteration: 265137
loss: 1.053806185722351,grad_norm: 0.8663997456879508, iteration: 265138
loss: 1.004315733909607,grad_norm: 0.7280618446028875, iteration: 265139
loss: 1.1527700424194336,grad_norm: 0.9999996580506686, iteration: 265140
loss: 0.9817436337471008,grad_norm: 0.8945838669808303, iteration: 265141
loss: 1.037263035774231,grad_norm: 0.9999998783904616, iteration: 265142
loss: 1.0299168825149536,grad_norm: 0.9999992536111687, iteration: 265143
loss: 1.013986587524414,grad_norm: 0.9493138935324213, iteration: 265144
loss: 1.04244065284729,grad_norm: 0.9999995994655602, iteration: 265145
loss: 1.0898061990737915,grad_norm: 0.897871168741415, iteration: 265146
loss: 0.9958609938621521,grad_norm: 0.85315486941474, iteration: 265147
loss: 0.9693120121955872,grad_norm: 0.9090476863201037, iteration: 265148
loss: 1.0106149911880493,grad_norm: 0.916281036598291, iteration: 265149
loss: 1.0056700706481934,grad_norm: 0.7823751417769524, iteration: 265150
loss: 1.0823918581008911,grad_norm: 0.9999990478951091, iteration: 265151
loss: 1.047023057937622,grad_norm: 0.9999992950037357, iteration: 265152
loss: 1.059084415435791,grad_norm: 0.9999999208906897, iteration: 265153
loss: 1.0989289283752441,grad_norm: 0.9999995621319341, iteration: 265154
loss: 1.0054831504821777,grad_norm: 0.8860511139379689, iteration: 265155
loss: 1.1269710063934326,grad_norm: 0.9999990624213143, iteration: 265156
loss: 1.0020062923431396,grad_norm: 0.9190287849167418, iteration: 265157
loss: 1.0685874223709106,grad_norm: 0.999999740413033, iteration: 265158
loss: 1.085986614227295,grad_norm: 0.9832912549630162, iteration: 265159
loss: 1.0212763547897339,grad_norm: 0.9999993066974802, iteration: 265160
loss: 1.1042733192443848,grad_norm: 0.9999993605829014, iteration: 265161
loss: 0.9790470600128174,grad_norm: 0.9154483845253707, iteration: 265162
loss: 1.0312116146087646,grad_norm: 0.9590497206233893, iteration: 265163
loss: 0.9829422235488892,grad_norm: 0.8276075654860641, iteration: 265164
loss: 1.0918259620666504,grad_norm: 0.7910224446167176, iteration: 265165
loss: 1.0326915979385376,grad_norm: 0.8440147027493986, iteration: 265166
loss: 1.1427714824676514,grad_norm: 0.9689807533149003, iteration: 265167
loss: 1.0574085712432861,grad_norm: 0.9999993350138769, iteration: 265168
loss: 1.0728358030319214,grad_norm: 0.9999996838577293, iteration: 265169
loss: 0.9972965121269226,grad_norm: 0.8702898809289388, iteration: 265170
loss: 1.1100335121154785,grad_norm: 0.9999996317039334, iteration: 265171
loss: 1.1446441411972046,grad_norm: 0.9999999470350511, iteration: 265172
loss: 1.083713412284851,grad_norm: 0.9999990694143407, iteration: 265173
loss: 1.0428897142410278,grad_norm: 0.9950754757638097, iteration: 265174
loss: 1.0675575733184814,grad_norm: 0.9999997950743957, iteration: 265175
loss: 1.0320727825164795,grad_norm: 0.9051711768845871, iteration: 265176
loss: 1.044830083847046,grad_norm: 0.8043492278198955, iteration: 265177
loss: 1.1253288984298706,grad_norm: 0.9999992120071591, iteration: 265178
loss: 1.0100359916687012,grad_norm: 0.9999992020766828, iteration: 265179
loss: 1.0178992748260498,grad_norm: 0.9820369924567273, iteration: 265180
loss: 1.058536410331726,grad_norm: 0.9999991048446383, iteration: 265181
loss: 1.0177966356277466,grad_norm: 0.9262339744565677, iteration: 265182
loss: 1.0055171251296997,grad_norm: 0.9999992352707068, iteration: 265183
loss: 1.075334072113037,grad_norm: 0.9999994940883822, iteration: 265184
loss: 1.0130376815795898,grad_norm: 0.9558831919284995, iteration: 265185
loss: 1.0034170150756836,grad_norm: 0.9563249732239707, iteration: 265186
loss: 1.0630805492401123,grad_norm: 0.9999999025080861, iteration: 265187
loss: 0.9703218340873718,grad_norm: 0.8427831901084619, iteration: 265188
loss: 0.9805855751037598,grad_norm: 0.999999838147398, iteration: 265189
loss: 1.039786458015442,grad_norm: 0.9999994480083239, iteration: 265190
loss: 1.089046597480774,grad_norm: 0.9999991815557315, iteration: 265191
loss: 1.015147089958191,grad_norm: 0.999999280270801, iteration: 265192
loss: 1.064757227897644,grad_norm: 0.9096625826694789, iteration: 265193
loss: 1.0196309089660645,grad_norm: 0.8897863151279658, iteration: 265194
loss: 1.02626633644104,grad_norm: 0.9345539705131781, iteration: 265195
loss: 1.054541826248169,grad_norm: 0.7813832135197512, iteration: 265196
loss: 1.0538095235824585,grad_norm: 0.9847060080213432, iteration: 265197
loss: 1.1078801155090332,grad_norm: 0.8237019514235377, iteration: 265198
loss: 1.0025441646575928,grad_norm: 0.8562703519688679, iteration: 265199
loss: 1.0070700645446777,grad_norm: 0.9126869371428993, iteration: 265200
loss: 1.0314773321151733,grad_norm: 0.999999682041389, iteration: 265201
loss: 1.0172432661056519,grad_norm: 1.0000000682473376, iteration: 265202
loss: 1.0455161333084106,grad_norm: 0.8080141031580804, iteration: 265203
loss: 1.0163633823394775,grad_norm: 0.8135642584003742, iteration: 265204
loss: 1.0623736381530762,grad_norm: 0.9999992515519673, iteration: 265205
loss: 1.0382381677627563,grad_norm: 0.843393919652568, iteration: 265206
loss: 1.0535272359848022,grad_norm: 0.9999992154275591, iteration: 265207
loss: 1.080613136291504,grad_norm: 0.9730064881822578, iteration: 265208
loss: 0.9911910891532898,grad_norm: 0.9178102232939604, iteration: 265209
loss: 0.9941747784614563,grad_norm: 0.9999991843929595, iteration: 265210
loss: 0.963595449924469,grad_norm: 0.9207776300024397, iteration: 265211
loss: 1.1225022077560425,grad_norm: 0.9999991098074044, iteration: 265212
loss: 0.9806017279624939,grad_norm: 0.8482209836987642, iteration: 265213
loss: 1.0266063213348389,grad_norm: 0.9999992190493371, iteration: 265214
loss: 0.9942135214805603,grad_norm: 0.932160788671759, iteration: 265215
loss: 1.0036331415176392,grad_norm: 0.8684201888375613, iteration: 265216
loss: 1.0087257623672485,grad_norm: 0.7125615102219423, iteration: 265217
loss: 0.9920177459716797,grad_norm: 0.7541835595284659, iteration: 265218
loss: 1.104552984237671,grad_norm: 0.9999993654289157, iteration: 265219
loss: 0.9859080910682678,grad_norm: 0.9422447800739632, iteration: 265220
loss: 1.0045627355575562,grad_norm: 0.8429133049016404, iteration: 265221
loss: 1.1840400695800781,grad_norm: 0.9999998223052875, iteration: 265222
loss: 1.0091232061386108,grad_norm: 0.9999991925439059, iteration: 265223
loss: 1.0546293258666992,grad_norm: 0.9102692794240625, iteration: 265224
loss: 1.0381546020507812,grad_norm: 0.9999990797867552, iteration: 265225
loss: 1.0626565217971802,grad_norm: 0.9999998275908689, iteration: 265226
loss: 1.0810433626174927,grad_norm: 0.8121763950933234, iteration: 265227
loss: 1.0681148767471313,grad_norm: 0.906554688622155, iteration: 265228
loss: 1.0024975538253784,grad_norm: 0.8403838265860955, iteration: 265229
loss: 1.0173742771148682,grad_norm: 0.999999067473425, iteration: 265230
loss: 1.156603455543518,grad_norm: 0.9999993367678317, iteration: 265231
loss: 1.0170336961746216,grad_norm: 0.9065311376790314, iteration: 265232
loss: 1.0678863525390625,grad_norm: 0.8617849364325547, iteration: 265233
loss: 0.9959849119186401,grad_norm: 0.9078961741541917, iteration: 265234
loss: 1.0232412815093994,grad_norm: 0.9858691504037773, iteration: 265235
loss: 0.9868389964103699,grad_norm: 0.6969649842875205, iteration: 265236
loss: 1.0338213443756104,grad_norm: 0.9999996657560172, iteration: 265237
loss: 1.0128509998321533,grad_norm: 0.9175387490013046, iteration: 265238
loss: 1.0815515518188477,grad_norm: 0.8213866993195875, iteration: 265239
loss: 1.0091876983642578,grad_norm: 0.9999997965388879, iteration: 265240
loss: 0.9703109860420227,grad_norm: 0.9779263044631226, iteration: 265241
loss: 1.0519988536834717,grad_norm: 0.9455373528153441, iteration: 265242
loss: 1.0222042798995972,grad_norm: 0.876300545161668, iteration: 265243
loss: 1.0919066667556763,grad_norm: 0.8276644923252802, iteration: 265244
loss: 1.0370323657989502,grad_norm: 0.9999991716025467, iteration: 265245
loss: 1.0393022298812866,grad_norm: 0.9823466014849579, iteration: 265246
loss: 1.076807975769043,grad_norm: 0.9427487310386271, iteration: 265247
loss: 0.9778554439544678,grad_norm: 0.9999990834092214, iteration: 265248
loss: 0.9902040362358093,grad_norm: 0.8517869901945014, iteration: 265249
loss: 0.9709434509277344,grad_norm: 0.7486235516704577, iteration: 265250
loss: 1.008972406387329,grad_norm: 0.9999992089921055, iteration: 265251
loss: 1.0257772207260132,grad_norm: 0.891709784298893, iteration: 265252
loss: 1.04232656955719,grad_norm: 0.9999998103607247, iteration: 265253
loss: 1.0266103744506836,grad_norm: 0.9999997168561021, iteration: 265254
loss: 0.997819721698761,grad_norm: 0.9999992235138667, iteration: 265255
loss: 1.0328500270843506,grad_norm: 0.9999932312598422, iteration: 265256
loss: 1.0089292526245117,grad_norm: 0.999656756383583, iteration: 265257
loss: 1.0158064365386963,grad_norm: 0.8851538560260085, iteration: 265258
loss: 0.991352915763855,grad_norm: 0.8816370136491078, iteration: 265259
loss: 1.0935561656951904,grad_norm: 0.9999989037852309, iteration: 265260
loss: 0.9505277872085571,grad_norm: 0.9999991494526222, iteration: 265261
loss: 0.9872560501098633,grad_norm: 0.903234049448436, iteration: 265262
loss: 0.994117259979248,grad_norm: 0.7122109071509695, iteration: 265263
loss: 0.9896388053894043,grad_norm: 0.8685334067445385, iteration: 265264
loss: 0.9492233991622925,grad_norm: 0.9524899468970863, iteration: 265265
loss: 1.0238935947418213,grad_norm: 0.8054803619868234, iteration: 265266
loss: 0.9867011904716492,grad_norm: 0.8635011088796075, iteration: 265267
loss: 1.0475139617919922,grad_norm: 0.8494925377578044, iteration: 265268
loss: 0.98880934715271,grad_norm: 0.9113067988523588, iteration: 265269
loss: 1.0975868701934814,grad_norm: 0.9242548979823587, iteration: 265270
loss: 1.0935612916946411,grad_norm: 0.9999989754073187, iteration: 265271
loss: 0.9998518228530884,grad_norm: 0.9191637977388667, iteration: 265272
loss: 1.0108789205551147,grad_norm: 0.8704639754629698, iteration: 265273
loss: 0.9850651621818542,grad_norm: 0.999999957122407, iteration: 265274
loss: 1.0414702892303467,grad_norm: 0.9999997428488653, iteration: 265275
loss: 0.9630976915359497,grad_norm: 0.7848393011069761, iteration: 265276
loss: 0.990205705165863,grad_norm: 0.9999990543930812, iteration: 265277
loss: 1.0636849403381348,grad_norm: 0.8427692902105323, iteration: 265278
loss: 1.0139943361282349,grad_norm: 0.8258694325242238, iteration: 265279
loss: 0.9891801476478577,grad_norm: 0.9999991639421202, iteration: 265280
loss: 1.0135912895202637,grad_norm: 0.99999934004291, iteration: 265281
loss: 1.0067939758300781,grad_norm: 0.9847411596858383, iteration: 265282
loss: 1.08311927318573,grad_norm: 0.9233856160224714, iteration: 265283
loss: 0.9637536406517029,grad_norm: 0.8929770150211852, iteration: 265284
loss: 0.987188994884491,grad_norm: 0.8805659305861114, iteration: 265285
loss: 1.0100436210632324,grad_norm: 0.9724959855790782, iteration: 265286
loss: 0.986521303653717,grad_norm: 0.9069044638283269, iteration: 265287
loss: 1.049839973449707,grad_norm: 0.9999995299037515, iteration: 265288
loss: 0.9898022413253784,grad_norm: 0.9999989918021499, iteration: 265289
loss: 0.9973047971725464,grad_norm: 0.9440810512919675, iteration: 265290
loss: 0.9813190698623657,grad_norm: 0.9244842866360625, iteration: 265291
loss: 1.0223767757415771,grad_norm: 0.8289136932512409, iteration: 265292
loss: 1.0621671676635742,grad_norm: 0.9999996242150607, iteration: 265293
loss: 1.0019168853759766,grad_norm: 0.8275791804180849, iteration: 265294
loss: 0.9946698546409607,grad_norm: 0.9999995093760244, iteration: 265295
loss: 1.0253636837005615,grad_norm: 0.803991495504844, iteration: 265296
loss: 1.0226026773452759,grad_norm: 0.7485180707428306, iteration: 265297
loss: 1.0206390619277954,grad_norm: 0.8896881094076537, iteration: 265298
loss: 1.039710521697998,grad_norm: 0.8921014771474911, iteration: 265299
loss: 0.9793672561645508,grad_norm: 0.7484383391771012, iteration: 265300
loss: 0.9884306788444519,grad_norm: 0.9999990673006515, iteration: 265301
loss: 0.96509850025177,grad_norm: 0.8795608768691026, iteration: 265302
loss: 0.9845834970474243,grad_norm: 0.8730702599390859, iteration: 265303
loss: 1.0770230293273926,grad_norm: 0.8438359006707791, iteration: 265304
loss: 1.085329294204712,grad_norm: 0.9999990804861804, iteration: 265305
loss: 0.9983798861503601,grad_norm: 0.9379764089993731, iteration: 265306
loss: 1.0051093101501465,grad_norm: 0.999999168022982, iteration: 265307
loss: 1.001932144165039,grad_norm: 0.7722190873751998, iteration: 265308
loss: 1.0002599954605103,grad_norm: 0.9170792216858694, iteration: 265309
loss: 1.0194129943847656,grad_norm: 0.8021676230571438, iteration: 265310
loss: 0.9846296310424805,grad_norm: 0.92575001226607, iteration: 265311
loss: 1.0486234426498413,grad_norm: 0.9112012788555991, iteration: 265312
loss: 1.0862452983856201,grad_norm: 0.9999993355807268, iteration: 265313
loss: 1.020289421081543,grad_norm: 0.8711495201812469, iteration: 265314
loss: 1.065631628036499,grad_norm: 0.9084926792625451, iteration: 265315
loss: 0.9866788387298584,grad_norm: 0.7738243290435955, iteration: 265316
loss: 1.0028952360153198,grad_norm: 0.82073536820313, iteration: 265317
loss: 0.9686362147331238,grad_norm: 0.9744312623347224, iteration: 265318
loss: 1.0042861700057983,grad_norm: 0.9999990659671131, iteration: 265319
loss: 1.064132809638977,grad_norm: 0.9537473509932028, iteration: 265320
loss: 0.9681561589241028,grad_norm: 0.8840939326139535, iteration: 265321
loss: 0.9633389115333557,grad_norm: 0.9999990373764175, iteration: 265322
loss: 1.0197322368621826,grad_norm: 0.9999993933591255, iteration: 265323
loss: 0.995018482208252,grad_norm: 0.7887961756058349, iteration: 265324
loss: 1.0745818614959717,grad_norm: 0.9437109421503228, iteration: 265325
loss: 0.9925535917282104,grad_norm: 0.9621980580507374, iteration: 265326
loss: 1.000257134437561,grad_norm: 0.999999462263426, iteration: 265327
loss: 0.9685928225517273,grad_norm: 0.86292330894586, iteration: 265328
loss: 1.0663034915924072,grad_norm: 0.9999995960640448, iteration: 265329
loss: 1.0449000597000122,grad_norm: 0.9660680972250947, iteration: 265330
loss: 1.0228012800216675,grad_norm: 0.8432366538164126, iteration: 265331
loss: 1.0323230028152466,grad_norm: 0.999999667589597, iteration: 265332
loss: 1.0563805103302002,grad_norm: 0.7942853457960789, iteration: 265333
loss: 0.9878830313682556,grad_norm: 0.9541988072052208, iteration: 265334
loss: 0.9766979217529297,grad_norm: 0.7554256216523345, iteration: 265335
loss: 0.9777256846427917,grad_norm: 0.9526765031735008, iteration: 265336
loss: 0.9976937174797058,grad_norm: 0.8263949482917476, iteration: 265337
loss: 1.0115716457366943,grad_norm: 0.9999992487749505, iteration: 265338
loss: 0.9797378182411194,grad_norm: 0.9854773599166258, iteration: 265339
loss: 0.9752108454704285,grad_norm: 0.7695498075136431, iteration: 265340
loss: 1.017109990119934,grad_norm: 0.9147043877371793, iteration: 265341
loss: 1.021474838256836,grad_norm: 0.9473873017540382, iteration: 265342
loss: 1.0614060163497925,grad_norm: 0.9999990965245437, iteration: 265343
loss: 1.0093525648117065,grad_norm: 0.7986444583942672, iteration: 265344
loss: 1.007263422012329,grad_norm: 0.9999991423616279, iteration: 265345
loss: 0.9985602498054504,grad_norm: 0.7302741724198143, iteration: 265346
loss: 1.0083353519439697,grad_norm: 0.8503265644821597, iteration: 265347
loss: 1.0010133981704712,grad_norm: 0.9999990962289708, iteration: 265348
loss: 0.9694368839263916,grad_norm: 0.999998908201375, iteration: 265349
loss: 1.018731713294983,grad_norm: 0.7750506434239539, iteration: 265350
loss: 1.0285260677337646,grad_norm: 0.8131427540138918, iteration: 265351
loss: 1.0159568786621094,grad_norm: 0.8597154483480891, iteration: 265352
loss: 1.013680338859558,grad_norm: 0.7623235519825797, iteration: 265353
loss: 0.9917708039283752,grad_norm: 0.849859788836909, iteration: 265354
loss: 1.0247300863265991,grad_norm: 0.8717698012051787, iteration: 265355
loss: 1.0212935209274292,grad_norm: 0.922984481242844, iteration: 265356
loss: 0.9709293842315674,grad_norm: 0.8968066431593813, iteration: 265357
loss: 1.0058752298355103,grad_norm: 0.9704546446489292, iteration: 265358
loss: 1.026206612586975,grad_norm: 0.9175346369581024, iteration: 265359
loss: 0.9798158407211304,grad_norm: 0.9641660940934788, iteration: 265360
loss: 0.9940429329872131,grad_norm: 0.9999990327350105, iteration: 265361
loss: 1.001369833946228,grad_norm: 0.9742479165661785, iteration: 265362
loss: 1.0133484601974487,grad_norm: 0.9999991230452789, iteration: 265363
loss: 0.9769233465194702,grad_norm: 0.9037257143504314, iteration: 265364
loss: 1.057249903678894,grad_norm: 0.9781650949444584, iteration: 265365
loss: 1.0208444595336914,grad_norm: 0.999999541557573, iteration: 265366
loss: 1.0193445682525635,grad_norm: 0.9155324387790565, iteration: 265367
loss: 1.0098466873168945,grad_norm: 0.7715386175540805, iteration: 265368
loss: 1.0109502077102661,grad_norm: 0.6819440452905466, iteration: 265369
loss: 1.0056815147399902,grad_norm: 0.949490688240645, iteration: 265370
loss: 1.029289722442627,grad_norm: 0.9265715566749264, iteration: 265371
loss: 1.0076686143875122,grad_norm: 0.884252726129213, iteration: 265372
loss: 0.978293776512146,grad_norm: 0.7518269010157775, iteration: 265373
loss: 1.023951530456543,grad_norm: 0.9160638394925749, iteration: 265374
loss: 1.0258440971374512,grad_norm: 0.9999991602551922, iteration: 265375
loss: 1.0274039506912231,grad_norm: 0.9189072210801947, iteration: 265376
loss: 1.0055166482925415,grad_norm: 0.8277097704018496, iteration: 265377
loss: 1.0062745809555054,grad_norm: 0.9999991175835949, iteration: 265378
loss: 1.0670745372772217,grad_norm: 0.9999991027308813, iteration: 265379
loss: 0.9761005640029907,grad_norm: 0.7665106891881862, iteration: 265380
loss: 1.0834423303604126,grad_norm: 0.8329130491839579, iteration: 265381
loss: 0.9683875441551208,grad_norm: 0.9999989878347393, iteration: 265382
loss: 1.0015108585357666,grad_norm: 0.8817545164066788, iteration: 265383
loss: 1.056565761566162,grad_norm: 0.9956316849779961, iteration: 265384
loss: 1.0422166585922241,grad_norm: 0.8675678977768422, iteration: 265385
loss: 0.9965729117393494,grad_norm: 0.8702743460560105, iteration: 265386
loss: 1.0320217609405518,grad_norm: 0.8326129159232473, iteration: 265387
loss: 1.0242830514907837,grad_norm: 0.7957184469181785, iteration: 265388
loss: 0.9896730780601501,grad_norm: 0.7275964281341838, iteration: 265389
loss: 0.9533159136772156,grad_norm: 0.9877328300065805, iteration: 265390
loss: 1.0668272972106934,grad_norm: 0.9999998039092339, iteration: 265391
loss: 0.9847785830497742,grad_norm: 0.9435861934137854, iteration: 265392
loss: 1.0258159637451172,grad_norm: 0.7889961322046994, iteration: 265393
loss: 0.9755916595458984,grad_norm: 0.9999991920511767, iteration: 265394
loss: 1.0009084939956665,grad_norm: 0.9376830566105153, iteration: 265395
loss: 0.9685416221618652,grad_norm: 0.9180425562056178, iteration: 265396
loss: 0.9795553088188171,grad_norm: 0.764982091685458, iteration: 265397
loss: 0.9809865951538086,grad_norm: 0.8238093247485447, iteration: 265398
loss: 1.011183261871338,grad_norm: 0.841025922250355, iteration: 265399
loss: 0.9990097284317017,grad_norm: 0.9999997321052168, iteration: 265400
loss: 1.0082635879516602,grad_norm: 0.8061559394811163, iteration: 265401
loss: 1.0124191045761108,grad_norm: 0.8689030486948424, iteration: 265402
loss: 0.9776281118392944,grad_norm: 0.8823700719421599, iteration: 265403
loss: 1.0334750413894653,grad_norm: 0.8576459677274009, iteration: 265404
loss: 0.9923046231269836,grad_norm: 0.933061610278699, iteration: 265405
loss: 1.0115994215011597,grad_norm: 0.9677842088112315, iteration: 265406
loss: 1.0148624181747437,grad_norm: 0.8874359840840217, iteration: 265407
loss: 0.9783316850662231,grad_norm: 0.8786070025440629, iteration: 265408
loss: 0.9661139845848083,grad_norm: 0.756934934881204, iteration: 265409
loss: 1.053604245185852,grad_norm: 0.8425007005809647, iteration: 265410
loss: 0.9940299391746521,grad_norm: 0.9346682684226832, iteration: 265411
loss: 0.9525492787361145,grad_norm: 0.7679894391531554, iteration: 265412
loss: 0.9917927980422974,grad_norm: 0.9366144030836241, iteration: 265413
loss: 1.0107876062393188,grad_norm: 0.8630446583666707, iteration: 265414
loss: 1.0080186128616333,grad_norm: 0.7626576307515908, iteration: 265415
loss: 0.9768088459968567,grad_norm: 0.9800411793129924, iteration: 265416
loss: 1.0543177127838135,grad_norm: 0.9661829323362994, iteration: 265417
loss: 1.0155320167541504,grad_norm: 0.9357212843512361, iteration: 265418
loss: 0.9605249762535095,grad_norm: 0.7017103195861966, iteration: 265419
loss: 1.02625572681427,grad_norm: 0.9380044748137719, iteration: 265420
loss: 1.0189119577407837,grad_norm: 0.8127656064406319, iteration: 265421
loss: 1.029881238937378,grad_norm: 0.9999990958203111, iteration: 265422
loss: 1.0219531059265137,grad_norm: 0.786212529005372, iteration: 265423
loss: 1.0040843486785889,grad_norm: 0.8741098252693825, iteration: 265424
loss: 1.032370686531067,grad_norm: 0.9999992092881413, iteration: 265425
loss: 0.9865499138832092,grad_norm: 0.8909914678710941, iteration: 265426
loss: 1.0159728527069092,grad_norm: 0.9497930817615152, iteration: 265427
loss: 1.054614543914795,grad_norm: 0.8053720154322651, iteration: 265428
loss: 1.0082098245620728,grad_norm: 0.8318869442225719, iteration: 265429
loss: 0.9820428490638733,grad_norm: 0.9999995668939775, iteration: 265430
loss: 0.983516275882721,grad_norm: 0.8152024696783906, iteration: 265431
loss: 1.0057562589645386,grad_norm: 0.8414699918488201, iteration: 265432
loss: 1.016390323638916,grad_norm: 0.999999880732222, iteration: 265433
loss: 1.0907853841781616,grad_norm: 0.9362682943735127, iteration: 265434
loss: 1.018667459487915,grad_norm: 0.9452785989644618, iteration: 265435
loss: 0.9831514358520508,grad_norm: 0.8568997491561808, iteration: 265436
loss: 0.9947373867034912,grad_norm: 0.9718241675221946, iteration: 265437
loss: 0.997927188873291,grad_norm: 0.9266175916574145, iteration: 265438
loss: 1.0366883277893066,grad_norm: 0.9377631119709591, iteration: 265439
loss: 1.0073704719543457,grad_norm: 0.8236177880826715, iteration: 265440
loss: 0.9909525513648987,grad_norm: 0.848008996984508, iteration: 265441
loss: 0.9945409893989563,grad_norm: 0.8384576490714937, iteration: 265442
loss: 1.0384900569915771,grad_norm: 0.9229545380911196, iteration: 265443
loss: 1.0230411291122437,grad_norm: 0.946974895396462, iteration: 265444
loss: 0.9912957549095154,grad_norm: 0.9098569178674423, iteration: 265445
loss: 1.0382684469223022,grad_norm: 0.9763273531925176, iteration: 265446
loss: 1.0330941677093506,grad_norm: 0.9999989825480284, iteration: 265447
loss: 1.1157995462417603,grad_norm: 0.9999996667189218, iteration: 265448
loss: 1.0208505392074585,grad_norm: 0.8539379141506745, iteration: 265449
loss: 0.9843995571136475,grad_norm: 0.7739047670704811, iteration: 265450
loss: 1.0199261903762817,grad_norm: 0.8585728226844602, iteration: 265451
loss: 1.0372462272644043,grad_norm: 0.8538721513931457, iteration: 265452
loss: 0.994394063949585,grad_norm: 0.7173178128060353, iteration: 265453
loss: 1.0243488550186157,grad_norm: 0.7817877692306705, iteration: 265454
loss: 0.9779542088508606,grad_norm: 0.9999989917571342, iteration: 265455
loss: 1.0147515535354614,grad_norm: 0.971117788907879, iteration: 265456
loss: 0.989353358745575,grad_norm: 0.9999992180376333, iteration: 265457
loss: 1.0022119283676147,grad_norm: 0.9999991233086011, iteration: 265458
loss: 0.980779767036438,grad_norm: 0.9999994012342496, iteration: 265459
loss: 0.9830641746520996,grad_norm: 0.9999990362216262, iteration: 265460
loss: 0.9731088280677795,grad_norm: 0.9649652223355606, iteration: 265461
loss: 0.9836490750312805,grad_norm: 0.8728441280670699, iteration: 265462
loss: 0.9941278100013733,grad_norm: 0.9999990878374896, iteration: 265463
loss: 0.9878910183906555,grad_norm: 0.9136762154577435, iteration: 265464
loss: 0.9908062815666199,grad_norm: 0.9191035175792223, iteration: 265465
loss: 1.0314462184906006,grad_norm: 0.7533958975638082, iteration: 265466
loss: 1.004359483718872,grad_norm: 0.8653738521616876, iteration: 265467
loss: 1.0023163557052612,grad_norm: 0.8676551699458219, iteration: 265468
loss: 1.024643063545227,grad_norm: 0.871067850620519, iteration: 265469
loss: 0.9888031482696533,grad_norm: 0.885897497195965, iteration: 265470
loss: 1.3050576448440552,grad_norm: 0.9999997038894796, iteration: 265471
loss: 1.0431820154190063,grad_norm: 0.9999992470794756, iteration: 265472
loss: 1.012387990951538,grad_norm: 0.9999989935372897, iteration: 265473
loss: 1.0114551782608032,grad_norm: 0.9280800019354262, iteration: 265474
loss: 0.9909510016441345,grad_norm: 0.7985885724832973, iteration: 265475
loss: 0.9831412434577942,grad_norm: 0.7315782755703092, iteration: 265476
loss: 1.001471996307373,grad_norm: 0.8317315940244002, iteration: 265477
loss: 1.0049102306365967,grad_norm: 0.9457826152429702, iteration: 265478
loss: 1.0685900449752808,grad_norm: 0.9723622674762459, iteration: 265479
loss: 0.982661247253418,grad_norm: 0.7539029626961166, iteration: 265480
loss: 1.0167165994644165,grad_norm: 0.8303494874088507, iteration: 265481
loss: 1.0488907098770142,grad_norm: 0.8517958852941415, iteration: 265482
loss: 1.031779170036316,grad_norm: 0.9999993300036506, iteration: 265483
loss: 0.9815292954444885,grad_norm: 0.7997611838565789, iteration: 265484
loss: 0.9881978631019592,grad_norm: 0.999999112538536, iteration: 265485
loss: 1.0126627683639526,grad_norm: 0.9207365801377021, iteration: 265486
loss: 0.999332845211029,grad_norm: 0.9921364925948348, iteration: 265487
loss: 0.9918387532234192,grad_norm: 0.8050422698520241, iteration: 265488
loss: 1.1520529985427856,grad_norm: 0.9999991071252693, iteration: 265489
loss: 0.9818152189254761,grad_norm: 0.7244302990207085, iteration: 265490
loss: 0.9777292013168335,grad_norm: 0.995907900002245, iteration: 265491
loss: 0.9645679593086243,grad_norm: 0.8598886831326636, iteration: 265492
loss: 0.9617676138877869,grad_norm: 0.9145375833250295, iteration: 265493
loss: 1.0096991062164307,grad_norm: 0.8471779505492012, iteration: 265494
loss: 0.9723237752914429,grad_norm: 0.8349401391742548, iteration: 265495
loss: 1.009383201599121,grad_norm: 0.9052266334801661, iteration: 265496
loss: 1.0242656469345093,grad_norm: 0.7854251955070215, iteration: 265497
loss: 0.9756453037261963,grad_norm: 0.840770814448057, iteration: 265498
loss: 0.9818075299263,grad_norm: 0.9110384484677471, iteration: 265499
loss: 0.989289402961731,grad_norm: 0.8855167334079667, iteration: 265500
loss: 1.0076714754104614,grad_norm: 0.8679162907134899, iteration: 265501
loss: 1.0769155025482178,grad_norm: 0.7517789729873475, iteration: 265502
loss: 1.0297452211380005,grad_norm: 0.8020607212137031, iteration: 265503
loss: 0.9926278591156006,grad_norm: 0.7612164016722938, iteration: 265504
loss: 1.0167878866195679,grad_norm: 0.9105431117545293, iteration: 265505
loss: 1.0640544891357422,grad_norm: 0.999999192062988, iteration: 265506
loss: 1.029674768447876,grad_norm: 0.9559144200551598, iteration: 265507
loss: 1.0328190326690674,grad_norm: 0.97775115587738, iteration: 265508
loss: 1.0189176797866821,grad_norm: 0.7445159432126403, iteration: 265509
loss: 1.010176658630371,grad_norm: 0.8748737523576138, iteration: 265510
loss: 0.9857812523841858,grad_norm: 0.9999990197686519, iteration: 265511
loss: 0.9861379265785217,grad_norm: 0.9845281035427096, iteration: 265512
loss: 0.947635293006897,grad_norm: 0.8281866957209788, iteration: 265513
loss: 0.9661992192268372,grad_norm: 0.9954785178175226, iteration: 265514
loss: 0.9787310361862183,grad_norm: 0.8304294706941693, iteration: 265515
loss: 0.9889464378356934,grad_norm: 0.8785184764249401, iteration: 265516
loss: 0.9949765205383301,grad_norm: 0.8476564668654931, iteration: 265517
loss: 1.0138880014419556,grad_norm: 0.9999995562683832, iteration: 265518
loss: 1.020350694656372,grad_norm: 0.808914382268723, iteration: 265519
loss: 1.0209022760391235,grad_norm: 0.8357981584026033, iteration: 265520
loss: 0.9954162836074829,grad_norm: 0.9999991310413434, iteration: 265521
loss: 1.07063627243042,grad_norm: 0.999999833797291, iteration: 265522
loss: 1.0120645761489868,grad_norm: 0.7901161410541586, iteration: 265523
loss: 0.9562118649482727,grad_norm: 0.9328444051873784, iteration: 265524
loss: 1.0646618604660034,grad_norm: 0.9999991048497092, iteration: 265525
loss: 0.9946715235710144,grad_norm: 0.8973922154856528, iteration: 265526
loss: 1.007283329963684,grad_norm: 0.8514543803050577, iteration: 265527
loss: 0.9513242840766907,grad_norm: 0.9999993732558887, iteration: 265528
loss: 1.0047391653060913,grad_norm: 0.9999990569867006, iteration: 265529
loss: 1.0322365760803223,grad_norm: 0.9999995625564354, iteration: 265530
loss: 1.0236965417861938,grad_norm: 0.9999990617426538, iteration: 265531
loss: 1.0094612836837769,grad_norm: 0.8145513720331363, iteration: 265532
loss: 0.9945792555809021,grad_norm: 0.9020160479995852, iteration: 265533
loss: 1.0182338953018188,grad_norm: 0.8880265232439415, iteration: 265534
loss: 0.9928329586982727,grad_norm: 0.9999992460812519, iteration: 265535
loss: 1.00830078125,grad_norm: 0.8936858894463533, iteration: 265536
loss: 1.0188066959381104,grad_norm: 0.9843067603324624, iteration: 265537
loss: 0.9834584593772888,grad_norm: 0.9061269152195682, iteration: 265538
loss: 1.023120641708374,grad_norm: 0.9999990257529301, iteration: 265539
loss: 1.0382369756698608,grad_norm: 0.9155403840602063, iteration: 265540
loss: 1.0955560207366943,grad_norm: 0.8178313285350859, iteration: 265541
loss: 1.01278817653656,grad_norm: 0.9688806906197232, iteration: 265542
loss: 1.0149712562561035,grad_norm: 0.8830903091698515, iteration: 265543
loss: 1.0148056745529175,grad_norm: 0.9265361459227863, iteration: 265544
loss: 0.9791561961174011,grad_norm: 0.8695025378350906, iteration: 265545
loss: 0.9697392582893372,grad_norm: 0.7374941530653581, iteration: 265546
loss: 0.9895323514938354,grad_norm: 0.9785011222463224, iteration: 265547
loss: 1.0333199501037598,grad_norm: 0.7251879857276272, iteration: 265548
loss: 1.1054434776306152,grad_norm: 0.999999405749432, iteration: 265549
loss: 0.9826374053955078,grad_norm: 0.909936077728213, iteration: 265550
loss: 1.05018150806427,grad_norm: 0.9999991500140257, iteration: 265551
loss: 1.033394455909729,grad_norm: 0.9999992063440287, iteration: 265552
loss: 1.0158884525299072,grad_norm: 0.8937484342026293, iteration: 265553
loss: 1.0154392719268799,grad_norm: 0.9883857745719379, iteration: 265554
loss: 0.9722191095352173,grad_norm: 0.9077098770648708, iteration: 265555
loss: 0.9928941130638123,grad_norm: 0.9999995490175387, iteration: 265556
loss: 0.9988548755645752,grad_norm: 0.7240398065941368, iteration: 265557
loss: 0.9821467399597168,grad_norm: 0.8196845416258561, iteration: 265558
loss: 1.0182850360870361,grad_norm: 0.9999992702192415, iteration: 265559
loss: 1.042333722114563,grad_norm: 0.9433719271778626, iteration: 265560
loss: 1.0284658670425415,grad_norm: 0.9999991197760622, iteration: 265561
loss: 0.9542050361633301,grad_norm: 0.9354352000518255, iteration: 265562
loss: 1.006817102432251,grad_norm: 0.999999865699471, iteration: 265563
loss: 1.0054351091384888,grad_norm: 0.9128834549378262, iteration: 265564
loss: 1.0388476848602295,grad_norm: 0.9999996694398908, iteration: 265565
loss: 0.9904355406761169,grad_norm: 0.8576205488180156, iteration: 265566
loss: 1.041548490524292,grad_norm: 1.00000011167706, iteration: 265567
loss: 0.9900819659233093,grad_norm: 0.8152226975062729, iteration: 265568
loss: 1.0902873277664185,grad_norm: 0.9999998495716352, iteration: 265569
loss: 1.010926604270935,grad_norm: 0.9999992884998655, iteration: 265570
loss: 0.9982168078422546,grad_norm: 0.8568462444046117, iteration: 265571
loss: 0.9966657161712646,grad_norm: 0.8197602355868452, iteration: 265572
loss: 1.0373066663742065,grad_norm: 0.9999989612245263, iteration: 265573
loss: 0.9684156179428101,grad_norm: 0.8083435889204044, iteration: 265574
loss: 0.9959923624992371,grad_norm: 0.8712889592170819, iteration: 265575
loss: 1.015419840812683,grad_norm: 0.8410077808732013, iteration: 265576
loss: 0.9815343022346497,grad_norm: 0.9999989761218075, iteration: 265577
loss: 0.9890726208686829,grad_norm: 0.8888768562796727, iteration: 265578
loss: 1.0076404809951782,grad_norm: 0.9644565176868332, iteration: 265579
loss: 0.9867030382156372,grad_norm: 0.9566786605237337, iteration: 265580
loss: 1.0102777481079102,grad_norm: 0.99999917605341, iteration: 265581
loss: 0.9886722564697266,grad_norm: 0.9833605070725148, iteration: 265582
loss: 1.0352228879928589,grad_norm: 0.8024769587011624, iteration: 265583
loss: 0.9932302832603455,grad_norm: 0.9689684170459695, iteration: 265584
loss: 0.9855688810348511,grad_norm: 0.8436054110229643, iteration: 265585
loss: 0.9810771942138672,grad_norm: 0.8877950998486169, iteration: 265586
loss: 1.006384253501892,grad_norm: 0.8194783932589464, iteration: 265587
loss: 1.0374141931533813,grad_norm: 0.6937918354950187, iteration: 265588
loss: 1.0091971158981323,grad_norm: 0.734742699271011, iteration: 265589
loss: 0.9799361228942871,grad_norm: 0.8071164279001564, iteration: 265590
loss: 1.0549473762512207,grad_norm: 0.8938228456978866, iteration: 265591
loss: 0.9978693723678589,grad_norm: 0.8788742936976432, iteration: 265592
loss: 1.0204277038574219,grad_norm: 0.999999688108952, iteration: 265593
loss: 0.9816005825996399,grad_norm: 0.9999990250877275, iteration: 265594
loss: 1.013991117477417,grad_norm: 0.8784672523563908, iteration: 265595
loss: 0.9897209405899048,grad_norm: 0.8549808805595708, iteration: 265596
loss: 1.0760408639907837,grad_norm: 0.999999719494121, iteration: 265597
loss: 1.0137042999267578,grad_norm: 0.9419080053620951, iteration: 265598
loss: 1.0100796222686768,grad_norm: 0.9999990387150136, iteration: 265599
loss: 1.0153179168701172,grad_norm: 0.9564972043269608, iteration: 265600
loss: 1.0214143991470337,grad_norm: 0.9041601614083425, iteration: 265601
loss: 0.9630988836288452,grad_norm: 0.9999992169935387, iteration: 265602
loss: 0.9738743305206299,grad_norm: 0.999998917432109, iteration: 265603
loss: 1.0023939609527588,grad_norm: 0.9767755275646598, iteration: 265604
loss: 0.9742078185081482,grad_norm: 0.8908181726750053, iteration: 265605
loss: 1.0116736888885498,grad_norm: 0.9412005446259649, iteration: 265606
loss: 0.9823010563850403,grad_norm: 0.9999992117177482, iteration: 265607
loss: 1.025668740272522,grad_norm: 0.8662564463617551, iteration: 265608
loss: 1.0374715328216553,grad_norm: 0.9354276543157221, iteration: 265609
loss: 0.9937313795089722,grad_norm: 0.9999990031773407, iteration: 265610
loss: 0.9880762696266174,grad_norm: 0.9088260745175883, iteration: 265611
loss: 0.9765497446060181,grad_norm: 0.8403895450385774, iteration: 265612
loss: 0.9982033967971802,grad_norm: 0.8069025188634146, iteration: 265613
loss: 1.0101133584976196,grad_norm: 0.7358823385148241, iteration: 265614
loss: 0.9873694181442261,grad_norm: 0.9999990011541202, iteration: 265615
loss: 0.9886486530303955,grad_norm: 0.8977732398297857, iteration: 265616
loss: 1.0194129943847656,grad_norm: 0.9720313589334273, iteration: 265617
loss: 0.9883348345756531,grad_norm: 0.8166751593771778, iteration: 265618
loss: 0.9828161001205444,grad_norm: 0.9681726402643627, iteration: 265619
loss: 1.0267001390457153,grad_norm: 0.7912456131303849, iteration: 265620
loss: 1.0129109621047974,grad_norm: 0.9163677097466303, iteration: 265621
loss: 1.1084516048431396,grad_norm: 0.9999994284023713, iteration: 265622
loss: 1.0190811157226562,grad_norm: 0.9999990873652691, iteration: 265623
loss: 0.9515939354896545,grad_norm: 0.9051602142929441, iteration: 265624
loss: 1.0200560092926025,grad_norm: 0.9085182598106184, iteration: 265625
loss: 0.9645978808403015,grad_norm: 0.9047998313872418, iteration: 265626
loss: 1.0020487308502197,grad_norm: 0.9753490192102025, iteration: 265627
loss: 1.008298635482788,grad_norm: 0.9703760971703954, iteration: 265628
loss: 1.0223203897476196,grad_norm: 0.9999999543081264, iteration: 265629
loss: 1.0439437627792358,grad_norm: 0.9999990579189572, iteration: 265630
loss: 1.000078797340393,grad_norm: 0.9338798030639937, iteration: 265631
loss: 1.0374410152435303,grad_norm: 0.847543818831651, iteration: 265632
loss: 1.0954951047897339,grad_norm: 0.9999991619682308, iteration: 265633
loss: 1.0044199228286743,grad_norm: 0.9842608543231374, iteration: 265634
loss: 1.0034067630767822,grad_norm: 0.9781964623951899, iteration: 265635
loss: 1.0003764629364014,grad_norm: 0.7612248777658585, iteration: 265636
loss: 1.0204344987869263,grad_norm: 0.8430737756830059, iteration: 265637
loss: 1.0186872482299805,grad_norm: 0.9442618951137928, iteration: 265638
loss: 1.0204578638076782,grad_norm: 0.7291661883232524, iteration: 265639
loss: 0.9935389161109924,grad_norm: 0.9999991039646279, iteration: 265640
loss: 1.0145878791809082,grad_norm: 0.795027036887831, iteration: 265641
loss: 1.0134650468826294,grad_norm: 0.9263435076868697, iteration: 265642
loss: 1.0362200736999512,grad_norm: 0.9320547949797081, iteration: 265643
loss: 1.0328084230422974,grad_norm: 0.8958158612293369, iteration: 265644
loss: 1.0018799304962158,grad_norm: 0.9746633386047576, iteration: 265645
loss: 0.9939319491386414,grad_norm: 0.9999992037882098, iteration: 265646
loss: 1.0044134855270386,grad_norm: 0.9999990779335342, iteration: 265647
loss: 0.989432156085968,grad_norm: 0.9625504451084147, iteration: 265648
loss: 0.9840539693832397,grad_norm: 0.8853654961528512, iteration: 265649
loss: 1.0363444089889526,grad_norm: 0.9999997160029364, iteration: 265650
loss: 0.993252158164978,grad_norm: 0.9035440496730683, iteration: 265651
loss: 1.0017564296722412,grad_norm: 0.8043308878186051, iteration: 265652
loss: 1.0029535293579102,grad_norm: 0.9999991783107187, iteration: 265653
loss: 1.0051296949386597,grad_norm: 0.8978426437284889, iteration: 265654
loss: 1.0192667245864868,grad_norm: 0.9966802901719923, iteration: 265655
loss: 1.0149294137954712,grad_norm: 0.9327381492328108, iteration: 265656
loss: 1.0454342365264893,grad_norm: 0.9999998765001046, iteration: 265657
loss: 1.0062530040740967,grad_norm: 0.9396134816701566, iteration: 265658
loss: 1.031795620918274,grad_norm: 0.8069603656177046, iteration: 265659
loss: 1.0007967948913574,grad_norm: 0.8748088804910831, iteration: 265660
loss: 1.0123894214630127,grad_norm: 0.8507336455946406, iteration: 265661
loss: 1.027854084968567,grad_norm: 0.826434888515169, iteration: 265662
loss: 1.0082718133926392,grad_norm: 0.9346759573538399, iteration: 265663
loss: 1.0000065565109253,grad_norm: 0.9132797802952148, iteration: 265664
loss: 0.9783928990364075,grad_norm: 0.7311102032752735, iteration: 265665
loss: 0.9864802956581116,grad_norm: 0.9895317137902219, iteration: 265666
loss: 0.9719730019569397,grad_norm: 0.8706838339528591, iteration: 265667
loss: 0.9892258048057556,grad_norm: 0.8491857673810466, iteration: 265668
loss: 0.993217408657074,grad_norm: 0.9227980602415407, iteration: 265669
loss: 0.9327933192253113,grad_norm: 0.8415597645384423, iteration: 265670
loss: 1.0133589506149292,grad_norm: 0.9060614412782239, iteration: 265671
loss: 0.9949856400489807,grad_norm: 0.7960103866971492, iteration: 265672
loss: 1.0077556371688843,grad_norm: 0.9470184416763783, iteration: 265673
loss: 1.0642565488815308,grad_norm: 0.9999997859433766, iteration: 265674
loss: 1.002285122871399,grad_norm: 0.9930751199842545, iteration: 265675
loss: 0.9713439345359802,grad_norm: 0.8903371102758215, iteration: 265676
loss: 1.042577862739563,grad_norm: 0.9999989654465722, iteration: 265677
loss: 0.9992825984954834,grad_norm: 0.9353889860820611, iteration: 265678
loss: 1.0066041946411133,grad_norm: 0.99999906789246, iteration: 265679
loss: 0.9833494424819946,grad_norm: 0.9735109242561821, iteration: 265680
loss: 0.9794791340827942,grad_norm: 0.8883191958380242, iteration: 265681
loss: 1.0036779642105103,grad_norm: 0.8901796712648904, iteration: 265682
loss: 0.993272602558136,grad_norm: 0.8828185827498781, iteration: 265683
loss: 1.0181427001953125,grad_norm: 0.9999991278450396, iteration: 265684
loss: 1.0099880695343018,grad_norm: 0.7402866286402031, iteration: 265685
loss: 1.0140362977981567,grad_norm: 0.9392080860333295, iteration: 265686
loss: 1.006057620048523,grad_norm: 1.0000000154108926, iteration: 265687
loss: 1.031736135482788,grad_norm: 0.999999033464609, iteration: 265688
loss: 0.9813482761383057,grad_norm: 0.8415882667400112, iteration: 265689
loss: 0.9946328997612,grad_norm: 0.9115280632485661, iteration: 265690
loss: 1.0074114799499512,grad_norm: 0.7737195386686222, iteration: 265691
loss: 0.9995627403259277,grad_norm: 0.9999990977083296, iteration: 265692
loss: 1.0109570026397705,grad_norm: 0.8715250021740425, iteration: 265693
loss: 0.982226550579071,grad_norm: 0.7487120998489787, iteration: 265694
loss: 1.0706840753555298,grad_norm: 0.9999992375294768, iteration: 265695
loss: 0.9952008724212646,grad_norm: 0.9170357454801904, iteration: 265696
loss: 0.9946020841598511,grad_norm: 0.9999991300497032, iteration: 265697
loss: 0.9671744108200073,grad_norm: 0.9747169554333047, iteration: 265698
loss: 0.9958475828170776,grad_norm: 0.999999096363707, iteration: 265699
loss: 0.9950417876243591,grad_norm: 0.9999991122575872, iteration: 265700
loss: 0.9631580114364624,grad_norm: 0.9308547178924442, iteration: 265701
loss: 1.0303465127944946,grad_norm: 0.8636862007724958, iteration: 265702
loss: 1.0374237298965454,grad_norm: 0.9999997452409985, iteration: 265703
loss: 0.9789337515830994,grad_norm: 0.9999990570951718, iteration: 265704
loss: 1.0050514936447144,grad_norm: 0.8314232628513403, iteration: 265705
loss: 1.0006014108657837,grad_norm: 0.9835681126434058, iteration: 265706
loss: 0.9980454444885254,grad_norm: 0.8640197743422223, iteration: 265707
loss: 0.9526832103729248,grad_norm: 0.8935185843114956, iteration: 265708
loss: 1.0037113428115845,grad_norm: 0.9317429851941323, iteration: 265709
loss: 0.9962692260742188,grad_norm: 0.8673399025792717, iteration: 265710
loss: 0.9828163385391235,grad_norm: 0.8044958658165304, iteration: 265711
loss: 1.0155713558197021,grad_norm: 0.7765502337901319, iteration: 265712
loss: 1.020753264427185,grad_norm: 0.917369141692783, iteration: 265713
loss: 0.9853174686431885,grad_norm: 0.7596652355904471, iteration: 265714
loss: 1.0108619928359985,grad_norm: 0.9769092401102748, iteration: 265715
loss: 1.007222056388855,grad_norm: 0.958757104800314, iteration: 265716
loss: 0.9453521966934204,grad_norm: 0.8930715171765927, iteration: 265717
loss: 0.992900013923645,grad_norm: 0.9999991302774998, iteration: 265718
loss: 1.0444246530532837,grad_norm: 0.9120653778286474, iteration: 265719
loss: 1.0005244016647339,grad_norm: 0.9999991800979097, iteration: 265720
loss: 1.0138664245605469,grad_norm: 0.9573032658063265, iteration: 265721
loss: 1.0062776803970337,grad_norm: 0.9309923723173075, iteration: 265722
loss: 1.0162715911865234,grad_norm: 0.8857378437356417, iteration: 265723
loss: 0.9610911011695862,grad_norm: 0.7224447690523684, iteration: 265724
loss: 1.0719841718673706,grad_norm: 0.8414890169115347, iteration: 265725
loss: 0.9979242086410522,grad_norm: 0.8420521205650656, iteration: 265726
loss: 1.047865867614746,grad_norm: 0.9999995101261085, iteration: 265727
loss: 1.037673830986023,grad_norm: 0.8958387584877674, iteration: 265728
loss: 0.9878893494606018,grad_norm: 0.9444412880512175, iteration: 265729
loss: 0.968601644039154,grad_norm: 0.9814763474836401, iteration: 265730
loss: 0.9447804689407349,grad_norm: 0.8758813583831214, iteration: 265731
loss: 0.9476196765899658,grad_norm: 0.8434857849187942, iteration: 265732
loss: 1.0204272270202637,grad_norm: 0.8755562755021388, iteration: 265733
loss: 1.007508397102356,grad_norm: 0.8335487344026964, iteration: 265734
loss: 0.9857640266418457,grad_norm: 0.9999990343051712, iteration: 265735
loss: 0.9998443722724915,grad_norm: 0.9112601521670122, iteration: 265736
loss: 0.9356614351272583,grad_norm: 0.9999990746987102, iteration: 265737
loss: 0.980038583278656,grad_norm: 0.9580183874702152, iteration: 265738
loss: 1.0005443096160889,grad_norm: 0.9999991443074046, iteration: 265739
loss: 1.0209027528762817,grad_norm: 0.9261022111938998, iteration: 265740
loss: 0.998572826385498,grad_norm: 0.9999990739648, iteration: 265741
loss: 1.028282642364502,grad_norm: 0.933555025844778, iteration: 265742
loss: 1.0125056505203247,grad_norm: 0.8298322269587669, iteration: 265743
loss: 1.0063632726669312,grad_norm: 0.8464954472489968, iteration: 265744
loss: 1.0328794717788696,grad_norm: 0.9449288139227185, iteration: 265745
loss: 1.0093072652816772,grad_norm: 0.80716153983213, iteration: 265746
loss: 1.0459433794021606,grad_norm: 0.9999989778356237, iteration: 265747
loss: 0.9692943692207336,grad_norm: 0.8957892365401465, iteration: 265748
loss: 0.9928405284881592,grad_norm: 0.9339706256766054, iteration: 265749
loss: 0.9865298867225647,grad_norm: 0.7418690307939805, iteration: 265750
loss: 0.9737514853477478,grad_norm: 0.8402086889179713, iteration: 265751
loss: 0.9655280709266663,grad_norm: 0.9504772711919479, iteration: 265752
loss: 1.0055235624313354,grad_norm: 0.904756208101742, iteration: 265753
loss: 1.0020272731781006,grad_norm: 0.8139398080534815, iteration: 265754
loss: 1.0501466989517212,grad_norm: 0.9999993923441232, iteration: 265755
loss: 0.9528778195381165,grad_norm: 0.999999123221258, iteration: 265756
loss: 1.0310630798339844,grad_norm: 0.9999996353472939, iteration: 265757
loss: 1.055528998374939,grad_norm: 0.9517407744268428, iteration: 265758
loss: 1.0191404819488525,grad_norm: 0.8248712106161615, iteration: 265759
loss: 0.964925229549408,grad_norm: 0.9435741838972929, iteration: 265760
loss: 1.0004054307937622,grad_norm: 0.9158449115206038, iteration: 265761
loss: 1.0234454870224,grad_norm: 0.8413629836600724, iteration: 265762
loss: 0.985964298248291,grad_norm: 0.8436905353277101, iteration: 265763
loss: 1.0307214260101318,grad_norm: 0.931248296481594, iteration: 265764
loss: 1.088737964630127,grad_norm: 0.9228378265906599, iteration: 265765
loss: 1.1391254663467407,grad_norm: 0.9999999211805177, iteration: 265766
loss: 1.0007706880569458,grad_norm: 0.8194184928224926, iteration: 265767
loss: 1.0101158618927002,grad_norm: 0.9999991833981202, iteration: 265768
loss: 0.9826894402503967,grad_norm: 0.8795783921690434, iteration: 265769
loss: 1.0861464738845825,grad_norm: 0.9999999368044724, iteration: 265770
loss: 0.9999860525131226,grad_norm: 0.9256323739484313, iteration: 265771
loss: 1.0273497104644775,grad_norm: 0.9932351645319458, iteration: 265772
loss: 0.9983993768692017,grad_norm: 0.7932702357802247, iteration: 265773
loss: 1.0205734968185425,grad_norm: 0.9116597636611172, iteration: 265774
loss: 0.9804807305335999,grad_norm: 0.8917909368820344, iteration: 265775
loss: 1.0243921279907227,grad_norm: 0.9999991031434101, iteration: 265776
loss: 0.985939621925354,grad_norm: 0.8009618493927368, iteration: 265777
loss: 0.9806166291236877,grad_norm: 0.9160839569567147, iteration: 265778
loss: 0.9623085856437683,grad_norm: 0.8497864470526617, iteration: 265779
loss: 1.0030142068862915,grad_norm: 0.9999992041284337, iteration: 265780
loss: 0.9985295534133911,grad_norm: 0.9999990888126418, iteration: 265781
loss: 0.9914527535438538,grad_norm: 0.9278307266155228, iteration: 265782
loss: 1.0048636198043823,grad_norm: 0.8381343810147363, iteration: 265783
loss: 0.9737876653671265,grad_norm: 0.862598824525661, iteration: 265784
loss: 1.0279321670532227,grad_norm: 0.9999991886885312, iteration: 265785
loss: 0.9482353329658508,grad_norm: 0.7933284232271767, iteration: 265786
loss: 1.114684820175171,grad_norm: 0.9488414402973471, iteration: 265787
loss: 0.9800784587860107,grad_norm: 0.8690999548084044, iteration: 265788
loss: 0.9667107462882996,grad_norm: 0.858712856526343, iteration: 265789
loss: 1.0263574123382568,grad_norm: 0.8688698102574589, iteration: 265790
loss: 0.992314875125885,grad_norm: 0.9999991155113123, iteration: 265791
loss: 1.01120924949646,grad_norm: 1.0000001238975875, iteration: 265792
loss: 1.0438458919525146,grad_norm: 0.897273830208788, iteration: 265793
loss: 1.00574791431427,grad_norm: 0.9050244484332196, iteration: 265794
loss: 1.0001003742218018,grad_norm: 0.9460049872243688, iteration: 265795
loss: 0.9764168858528137,grad_norm: 0.885081190217884, iteration: 265796
loss: 1.0061588287353516,grad_norm: 0.9999990532366624, iteration: 265797
loss: 1.0058636665344238,grad_norm: 0.9999993195545122, iteration: 265798
loss: 0.9856729507446289,grad_norm: 0.987675459694559, iteration: 265799
loss: 1.0091670751571655,grad_norm: 0.8703667729177792, iteration: 265800
loss: 1.0187946557998657,grad_norm: 0.9999992197152363, iteration: 265801
loss: 1.0091785192489624,grad_norm: 0.8522046956515279, iteration: 265802
loss: 0.9832852482795715,grad_norm: 0.832880639694285, iteration: 265803
loss: 1.011680006980896,grad_norm: 0.8630305224126612, iteration: 265804
loss: 1.0103763341903687,grad_norm: 0.9122665625338287, iteration: 265805
loss: 1.0158389806747437,grad_norm: 0.9070775573337819, iteration: 265806
loss: 0.9952585697174072,grad_norm: 0.8203482800439805, iteration: 265807
loss: 0.9866006970405579,grad_norm: 0.9530656674680886, iteration: 265808
loss: 1.0061248540878296,grad_norm: 0.8861003958519755, iteration: 265809
loss: 1.0264936685562134,grad_norm: 0.7966306570943784, iteration: 265810
loss: 1.0085684061050415,grad_norm: 0.7981900632186791, iteration: 265811
loss: 0.9553384184837341,grad_norm: 0.8732972380174003, iteration: 265812
loss: 1.0142103433609009,grad_norm: 0.8737048650506642, iteration: 265813
loss: 1.0184520483016968,grad_norm: 0.846087807729445, iteration: 265814
loss: 0.9798654317855835,grad_norm: 0.9695437126428877, iteration: 265815
loss: 1.0205284357070923,grad_norm: 0.8075592724877226, iteration: 265816
loss: 1.0056520700454712,grad_norm: 0.8517147885691142, iteration: 265817
loss: 0.9645566344261169,grad_norm: 0.897178361095584, iteration: 265818
loss: 0.9669339060783386,grad_norm: 0.9357204779646608, iteration: 265819
loss: 0.9975132942199707,grad_norm: 0.8721743025424108, iteration: 265820
loss: 1.021089792251587,grad_norm: 0.9422058694375898, iteration: 265821
loss: 1.0132851600646973,grad_norm: 0.9128622038637657, iteration: 265822
loss: 0.9809215068817139,grad_norm: 0.9477172959072483, iteration: 265823
loss: 1.0107061862945557,grad_norm: 0.8025840804874212, iteration: 265824
loss: 0.9781818389892578,grad_norm: 0.9999996416743696, iteration: 265825
loss: 1.0752780437469482,grad_norm: 0.7989979614206765, iteration: 265826
loss: 1.0049759149551392,grad_norm: 0.8474858146287756, iteration: 265827
loss: 1.031142234802246,grad_norm: 0.9810785298155461, iteration: 265828
loss: 0.9885691404342651,grad_norm: 0.8088511787893746, iteration: 265829
loss: 1.0196493864059448,grad_norm: 0.7853601134394788, iteration: 265830
loss: 1.0164850950241089,grad_norm: 0.8472294701417495, iteration: 265831
loss: 1.0075526237487793,grad_norm: 0.9999991916109023, iteration: 265832
loss: 1.0156360864639282,grad_norm: 0.8362666273046923, iteration: 265833
loss: 0.9894453883171082,grad_norm: 0.9929930087001646, iteration: 265834
loss: 1.0059691667556763,grad_norm: 0.9677191261226888, iteration: 265835
loss: 1.0168938636779785,grad_norm: 0.8335186941812321, iteration: 265836
loss: 0.9900439381599426,grad_norm: 0.8676288527005077, iteration: 265837
loss: 1.0010181665420532,grad_norm: 1.0000000100730126, iteration: 265838
loss: 1.055259346961975,grad_norm: 0.9999990963913433, iteration: 265839
loss: 0.9677878022193909,grad_norm: 0.7640503946903323, iteration: 265840
loss: 0.9958255290985107,grad_norm: 0.9023631880776437, iteration: 265841
loss: 1.0517185926437378,grad_norm: 0.9999991580296415, iteration: 265842
loss: 1.0220000743865967,grad_norm: 0.8026242031345011, iteration: 265843
loss: 0.9754543304443359,grad_norm: 0.9673431083752975, iteration: 265844
loss: 0.9863659739494324,grad_norm: 0.8382553577136046, iteration: 265845
loss: 1.0011178255081177,grad_norm: 0.7785241095582192, iteration: 265846
loss: 1.0280200242996216,grad_norm: 0.9747210118574867, iteration: 265847
loss: 1.0467180013656616,grad_norm: 0.9999989367749613, iteration: 265848
loss: 1.0063207149505615,grad_norm: 0.713376291004504, iteration: 265849
loss: 1.0463658571243286,grad_norm: 0.9999991539088403, iteration: 265850
loss: 0.9938474297523499,grad_norm: 0.8873478091361038, iteration: 265851
loss: 0.9874147772789001,grad_norm: 0.9684055406311559, iteration: 265852
loss: 0.9815379977226257,grad_norm: 0.9356400068611294, iteration: 265853
loss: 1.0194634199142456,grad_norm: 0.8774992813247948, iteration: 265854
loss: 0.9834743738174438,grad_norm: 0.8784158004067962, iteration: 265855
loss: 1.025277018547058,grad_norm: 0.9121857422027492, iteration: 265856
loss: 0.990536630153656,grad_norm: 0.7440040550397023, iteration: 265857
loss: 0.9756651520729065,grad_norm: 0.9999991463189668, iteration: 265858
loss: 0.9916248321533203,grad_norm: 0.8784439870483909, iteration: 265859
loss: 0.9923391342163086,grad_norm: 0.77840727823779, iteration: 265860
loss: 1.0056257247924805,grad_norm: 0.9436205113634896, iteration: 265861
loss: 0.9989008903503418,grad_norm: 0.9999990919090914, iteration: 265862
loss: 0.9801188111305237,grad_norm: 0.9894301727367396, iteration: 265863
loss: 1.0631279945373535,grad_norm: 0.9999993529550357, iteration: 265864
loss: 1.0057538747787476,grad_norm: 0.8540857400747105, iteration: 265865
loss: 1.0192726850509644,grad_norm: 0.9999991601681469, iteration: 265866
loss: 1.0028246641159058,grad_norm: 0.7642680263526459, iteration: 265867
loss: 0.9859040975570679,grad_norm: 0.935931677148786, iteration: 265868
loss: 0.959858775138855,grad_norm: 0.883469763689883, iteration: 265869
loss: 1.000186800956726,grad_norm: 0.8011415815498079, iteration: 265870
loss: 0.9577771425247192,grad_norm: 0.9999998437857328, iteration: 265871
loss: 0.9803533554077148,grad_norm: 0.9999991192983864, iteration: 265872
loss: 1.0003553628921509,grad_norm: 0.8190384708733641, iteration: 265873
loss: 0.9776380658149719,grad_norm: 0.8624134009333851, iteration: 265874
loss: 1.0085183382034302,grad_norm: 0.9578293058722841, iteration: 265875
loss: 1.0502759218215942,grad_norm: 0.9810382102344662, iteration: 265876
loss: 1.0253390073776245,grad_norm: 0.8486545581887553, iteration: 265877
loss: 0.9967302680015564,grad_norm: 0.8177283589192649, iteration: 265878
loss: 1.0149461030960083,grad_norm: 0.8019188506483512, iteration: 265879
loss: 1.0119962692260742,grad_norm: 0.9003597083850099, iteration: 265880
loss: 0.9906972646713257,grad_norm: 0.9484839184059467, iteration: 265881
loss: 0.9744306206703186,grad_norm: 0.86143575620957, iteration: 265882
loss: 0.9898058176040649,grad_norm: 0.9451049768025899, iteration: 265883
loss: 0.9923174381256104,grad_norm: 0.9999997258915019, iteration: 265884
loss: 0.9793959856033325,grad_norm: 0.7071436754733436, iteration: 265885
loss: 1.0338928699493408,grad_norm: 0.9594600640634231, iteration: 265886
loss: 1.0231354236602783,grad_norm: 0.9120060153802971, iteration: 265887
loss: 1.0143342018127441,grad_norm: 0.933147542998447, iteration: 265888
loss: 1.0314325094223022,grad_norm: 0.9999990820841128, iteration: 265889
loss: 1.0336614847183228,grad_norm: 0.9385899036416424, iteration: 265890
loss: 1.1066744327545166,grad_norm: 0.935163997548424, iteration: 265891
loss: 0.9811937212944031,grad_norm: 0.8700309867809677, iteration: 265892
loss: 1.0002237558364868,grad_norm: 0.9183521700720287, iteration: 265893
loss: 0.9941854476928711,grad_norm: 0.8421633578343435, iteration: 265894
loss: 0.9526214599609375,grad_norm: 0.9999989898294467, iteration: 265895
loss: 0.9629386067390442,grad_norm: 0.9498611213310819, iteration: 265896
loss: 0.9667518138885498,grad_norm: 0.7860672428963189, iteration: 265897
loss: 0.9935246706008911,grad_norm: 0.8008351326849154, iteration: 265898
loss: 0.985852062702179,grad_norm: 0.9499217885856623, iteration: 265899
loss: 0.9840469360351562,grad_norm: 0.8852115710775, iteration: 265900
loss: 1.0300400257110596,grad_norm: 0.9136252102527208, iteration: 265901
loss: 1.005031943321228,grad_norm: 0.9999992010336016, iteration: 265902
loss: 0.9831336140632629,grad_norm: 0.7695324634155264, iteration: 265903
loss: 1.0861554145812988,grad_norm: 0.9999996066563692, iteration: 265904
loss: 0.9929214715957642,grad_norm: 0.9078920836894053, iteration: 265905
loss: 1.002967357635498,grad_norm: 0.9999990919189489, iteration: 265906
loss: 1.0069886445999146,grad_norm: 0.7627103949360918, iteration: 265907
loss: 0.9660736918449402,grad_norm: 0.8389131789785246, iteration: 265908
loss: 0.9922234416007996,grad_norm: 0.8011674178166069, iteration: 265909
loss: 1.0922008752822876,grad_norm: 0.9999997824648506, iteration: 265910
loss: 0.9860509634017944,grad_norm: 0.9999991958218345, iteration: 265911
loss: 0.9790538549423218,grad_norm: 0.8066391906343467, iteration: 265912
loss: 1.0076810121536255,grad_norm: 0.8465470470549307, iteration: 265913
loss: 1.0251699686050415,grad_norm: 0.8364017625470674, iteration: 265914
loss: 1.0103412866592407,grad_norm: 0.9999991198245458, iteration: 265915
loss: 0.9838191270828247,grad_norm: 0.9012709712036127, iteration: 265916
loss: 0.9841341376304626,grad_norm: 0.9367368929090718, iteration: 265917
loss: 1.0341312885284424,grad_norm: 0.9999992346471192, iteration: 265918
loss: 0.9758199453353882,grad_norm: 0.8170496057846127, iteration: 265919
loss: 1.0031806230545044,grad_norm: 0.87672022415898, iteration: 265920
loss: 1.0222405195236206,grad_norm: 0.7795896057897476, iteration: 265921
loss: 0.9849802255630493,grad_norm: 0.7425189265860898, iteration: 265922
loss: 0.9680973291397095,grad_norm: 0.8340630407078209, iteration: 265923
loss: 1.0028859376907349,grad_norm: 0.8341004763517602, iteration: 265924
loss: 0.9769862294197083,grad_norm: 0.8585871384952256, iteration: 265925
loss: 1.000160813331604,grad_norm: 0.7988417493573238, iteration: 265926
loss: 1.0189546346664429,grad_norm: 0.9999990944360051, iteration: 265927
loss: 0.9732056260108948,grad_norm: 0.8343284555641507, iteration: 265928
loss: 0.9889296293258667,grad_norm: 0.846273390568812, iteration: 265929
loss: 1.003767728805542,grad_norm: 0.9485298572107842, iteration: 265930
loss: 1.0242005586624146,grad_norm: 0.8062800884847583, iteration: 265931
loss: 1.0005548000335693,grad_norm: 0.9999994814226855, iteration: 265932
loss: 1.0539464950561523,grad_norm: 0.9552514682527363, iteration: 265933
loss: 0.9920288324356079,grad_norm: 0.7621416217679144, iteration: 265934
loss: 1.0585509538650513,grad_norm: 0.947407748804459, iteration: 265935
loss: 0.9668551683425903,grad_norm: 0.81342109110629, iteration: 265936
loss: 1.0110514163970947,grad_norm: 0.8714328794633781, iteration: 265937
loss: 0.9662685990333557,grad_norm: 0.9999992035932823, iteration: 265938
loss: 1.0269674062728882,grad_norm: 0.8829188834565388, iteration: 265939
loss: 0.9791354537010193,grad_norm: 0.9999992546826998, iteration: 265940
loss: 1.0059012174606323,grad_norm: 0.7909505607582276, iteration: 265941
loss: 0.959732174873352,grad_norm: 0.9746776391760088, iteration: 265942
loss: 1.0997824668884277,grad_norm: 0.9999992567355247, iteration: 265943
loss: 1.0049207210540771,grad_norm: 0.875037046357342, iteration: 265944
loss: 0.9779054522514343,grad_norm: 0.8172754942109098, iteration: 265945
loss: 0.999297559261322,grad_norm: 0.9271790462222987, iteration: 265946
loss: 1.030293583869934,grad_norm: 0.7997565360292386, iteration: 265947
loss: 1.000123381614685,grad_norm: 0.999999159561334, iteration: 265948
loss: 0.9975168108940125,grad_norm: 0.9903835748979285, iteration: 265949
loss: 1.010028600692749,grad_norm: 0.8075130642609063, iteration: 265950
loss: 1.0450687408447266,grad_norm: 0.8245421854399383, iteration: 265951
loss: 1.0244518518447876,grad_norm: 0.7678717267718241, iteration: 265952
loss: 1.0262945890426636,grad_norm: 0.999999251000507, iteration: 265953
loss: 1.0115278959274292,grad_norm: 0.9999991625088134, iteration: 265954
loss: 1.0303288698196411,grad_norm: 0.9999991392669664, iteration: 265955
loss: 1.0435349941253662,grad_norm: 0.886427859259857, iteration: 265956
loss: 0.9717399477958679,grad_norm: 0.9445860371746355, iteration: 265957
loss: 0.9996525049209595,grad_norm: 0.9999990470416708, iteration: 265958
loss: 1.0071696043014526,grad_norm: 0.8624851019123179, iteration: 265959
loss: 1.0197776556015015,grad_norm: 0.8918261411001461, iteration: 265960
loss: 0.996604323387146,grad_norm: 0.9488437852235189, iteration: 265961
loss: 0.989086389541626,grad_norm: 0.930109233532408, iteration: 265962
loss: 1.0049384832382202,grad_norm: 0.881712428752991, iteration: 265963
loss: 0.993251383304596,grad_norm: 0.8853772315240919, iteration: 265964
loss: 0.9608356356620789,grad_norm: 0.862756704635964, iteration: 265965
loss: 0.9781258702278137,grad_norm: 0.7260857644519237, iteration: 265966
loss: 0.9877468943595886,grad_norm: 0.936037250748873, iteration: 265967
loss: 0.9846078157424927,grad_norm: 0.8231750465509879, iteration: 265968
loss: 1.0492277145385742,grad_norm: 0.9969393667610819, iteration: 265969
loss: 1.0006800889968872,grad_norm: 0.8503709727200275, iteration: 265970
loss: 0.9580356478691101,grad_norm: 0.97352985886356, iteration: 265971
loss: 0.9847411513328552,grad_norm: 0.9869518091524487, iteration: 265972
loss: 0.9904618263244629,grad_norm: 0.8057038708010182, iteration: 265973
loss: 0.9954684972763062,grad_norm: 0.7915541990803551, iteration: 265974
loss: 1.0137839317321777,grad_norm: 0.9999991709045686, iteration: 265975
loss: 0.9987927675247192,grad_norm: 0.9999991287421606, iteration: 265976
loss: 1.0194196701049805,grad_norm: 0.8774995548704945, iteration: 265977
loss: 0.9973265528678894,grad_norm: 0.8107644838433284, iteration: 265978
loss: 1.0211085081100464,grad_norm: 0.7456853850384966, iteration: 265979
loss: 1.0001715421676636,grad_norm: 0.8518718298671454, iteration: 265980
loss: 0.9992964863777161,grad_norm: 0.799278720072168, iteration: 265981
loss: 0.957929253578186,grad_norm: 0.9852732356052043, iteration: 265982
loss: 1.028115153312683,grad_norm: 0.8622778858870705, iteration: 265983
loss: 0.9485946297645569,grad_norm: 0.8869201029293422, iteration: 265984
loss: 0.9931856393814087,grad_norm: 0.8066119010284447, iteration: 265985
loss: 0.9798874855041504,grad_norm: 0.9364928001473243, iteration: 265986
loss: 0.992861270904541,grad_norm: 0.9576415235916418, iteration: 265987
loss: 1.0148802995681763,grad_norm: 0.9905623383615382, iteration: 265988
loss: 0.9971098899841309,grad_norm: 0.7122138493903462, iteration: 265989
loss: 0.9740066528320312,grad_norm: 0.7523060226484278, iteration: 265990
loss: 1.032589077949524,grad_norm: 0.9999991009608562, iteration: 265991
loss: 1.0312092304229736,grad_norm: 0.7368317876763583, iteration: 265992
loss: 0.9630424976348877,grad_norm: 0.9999991233884132, iteration: 265993
loss: 0.9740544557571411,grad_norm: 0.8803982541458465, iteration: 265994
loss: 1.022096872329712,grad_norm: 0.8175988772771615, iteration: 265995
loss: 1.0096416473388672,grad_norm: 0.9602793254919423, iteration: 265996
loss: 1.01198410987854,grad_norm: 0.868171474488308, iteration: 265997
loss: 1.0008673667907715,grad_norm: 0.9510462912959984, iteration: 265998
loss: 0.9936324954032898,grad_norm: 0.8954986860576387, iteration: 265999
loss: 1.1256189346313477,grad_norm: 0.9999998595385581, iteration: 266000
loss: 1.0086088180541992,grad_norm: 0.8765144929627766, iteration: 266001
loss: 0.9991093277931213,grad_norm: 0.9891674778178791, iteration: 266002
loss: 1.0281500816345215,grad_norm: 0.9418773175094386, iteration: 266003
loss: 1.012589931488037,grad_norm: 0.752350138243129, iteration: 266004
loss: 0.9857252240180969,grad_norm: 0.8967886081667535, iteration: 266005
loss: 0.9645331501960754,grad_norm: 0.7973818978814083, iteration: 266006
loss: 1.008331537246704,grad_norm: 0.8595440737326672, iteration: 266007
loss: 1.0435762405395508,grad_norm: 0.9999991425476057, iteration: 266008
loss: 0.9802694916725159,grad_norm: 0.9098946696988158, iteration: 266009
loss: 1.0336076021194458,grad_norm: 0.9405585672534068, iteration: 266010
loss: 0.9740747213363647,grad_norm: 0.8375300020366643, iteration: 266011
loss: 0.9940937161445618,grad_norm: 0.933181087725022, iteration: 266012
loss: 0.9695685505867004,grad_norm: 0.8888856108655824, iteration: 266013
loss: 1.0132958889007568,grad_norm: 0.8807110829683156, iteration: 266014
loss: 1.0245352983474731,grad_norm: 0.8346415641904364, iteration: 266015
loss: 1.0102941989898682,grad_norm: 0.8124345144564122, iteration: 266016
loss: 1.010573387145996,grad_norm: 0.999999042338173, iteration: 266017
loss: 0.9597219228744507,grad_norm: 0.9999990982778971, iteration: 266018
loss: 1.0424965620040894,grad_norm: 0.9579533181583907, iteration: 266019
loss: 0.9744543433189392,grad_norm: 0.9999991774904536, iteration: 266020
loss: 0.9798018932342529,grad_norm: 0.9661813669799326, iteration: 266021
loss: 1.0007566213607788,grad_norm: 0.8982871043905246, iteration: 266022
loss: 1.0145307779312134,grad_norm: 0.7274641545883629, iteration: 266023
loss: 0.9870635867118835,grad_norm: 0.8089702674169248, iteration: 266024
loss: 0.9992730617523193,grad_norm: 0.9923434805919529, iteration: 266025
loss: 0.9603959321975708,grad_norm: 0.984111216210339, iteration: 266026
loss: 1.0125027894973755,grad_norm: 0.9084550340904228, iteration: 266027
loss: 0.9977791905403137,grad_norm: 0.8137773959101636, iteration: 266028
loss: 0.9884328246116638,grad_norm: 0.9999991183532362, iteration: 266029
loss: 0.9970672130584717,grad_norm: 0.9633202511152156, iteration: 266030
loss: 1.0249334573745728,grad_norm: 0.8998709701530294, iteration: 266031
loss: 1.0416239500045776,grad_norm: 0.9999989990628431, iteration: 266032
loss: 1.0191015005111694,grad_norm: 0.9769066632171246, iteration: 266033
loss: 1.0251786708831787,grad_norm: 0.8481088181219426, iteration: 266034
loss: 1.0230472087860107,grad_norm: 0.9999995957101503, iteration: 266035
loss: 0.9930893182754517,grad_norm: 0.8308586475051578, iteration: 266036
loss: 1.017418622970581,grad_norm: 0.9050219583237773, iteration: 266037
loss: 1.006699562072754,grad_norm: 0.8551520478952043, iteration: 266038
loss: 1.0635706186294556,grad_norm: 0.9886303832638484, iteration: 266039
loss: 1.0085607767105103,grad_norm: 0.8225628423481464, iteration: 266040
loss: 0.9927161335945129,grad_norm: 0.693145308617458, iteration: 266041
loss: 0.9876964688301086,grad_norm: 0.8092515802129144, iteration: 266042
loss: 0.9804248213768005,grad_norm: 0.9999990915621851, iteration: 266043
loss: 1.0320919752120972,grad_norm: 0.9999990990760415, iteration: 266044
loss: 0.9610812664031982,grad_norm: 0.8749751856580475, iteration: 266045
loss: 0.9990750551223755,grad_norm: 0.7979002374091609, iteration: 266046
loss: 1.01763916015625,grad_norm: 0.9677614919105733, iteration: 266047
loss: 0.9868220090866089,grad_norm: 0.9004597429466686, iteration: 266048
loss: 1.0139081478118896,grad_norm: 0.9430781943871037, iteration: 266049
loss: 1.0186282396316528,grad_norm: 0.9999989373422102, iteration: 266050
loss: 1.0063756704330444,grad_norm: 0.8420242437500102, iteration: 266051
loss: 1.0125349760055542,grad_norm: 0.8322034679410879, iteration: 266052
loss: 1.0128098726272583,grad_norm: 0.7923566703196545, iteration: 266053
loss: 0.9634022116661072,grad_norm: 0.7585510014521077, iteration: 266054
loss: 1.0070489645004272,grad_norm: 0.7603460796358492, iteration: 266055
loss: 1.0521767139434814,grad_norm: 0.9999998089307086, iteration: 266056
loss: 1.148018717765808,grad_norm: 0.9999994370268461, iteration: 266057
loss: 1.0872050523757935,grad_norm: 0.9999990768862576, iteration: 266058
loss: 0.9601292610168457,grad_norm: 0.9727987333200758, iteration: 266059
loss: 0.9855150580406189,grad_norm: 0.8268480363253651, iteration: 266060
loss: 1.0368469953536987,grad_norm: 0.9783131370527397, iteration: 266061
loss: 1.003505825996399,grad_norm: 0.7544411273202163, iteration: 266062
loss: 1.0085628032684326,grad_norm: 0.7718272543377331, iteration: 266063
loss: 0.9929762482643127,grad_norm: 0.8565106199519019, iteration: 266064
loss: 0.9936496615409851,grad_norm: 0.9734398170699802, iteration: 266065
loss: 1.0115214586257935,grad_norm: 0.8998785433877726, iteration: 266066
loss: 0.9966959357261658,grad_norm: 0.999999146399015, iteration: 266067
loss: 0.9741078615188599,grad_norm: 0.7624244889237847, iteration: 266068
loss: 1.0190303325653076,grad_norm: 0.82415112290651, iteration: 266069
loss: 0.9893478751182556,grad_norm: 0.914931825250306, iteration: 266070
loss: 1.0205953121185303,grad_norm: 0.686191371432075, iteration: 266071
loss: 1.0076344013214111,grad_norm: 0.9535619088880507, iteration: 266072
loss: 0.9943747520446777,grad_norm: 0.9660573350918689, iteration: 266073
loss: 1.0072709321975708,grad_norm: 0.8495751609363743, iteration: 266074
loss: 0.9856012463569641,grad_norm: 0.9550034603645349, iteration: 266075
loss: 1.0009138584136963,grad_norm: 0.9505470579524868, iteration: 266076
loss: 1.012597680091858,grad_norm: 0.9740132834595784, iteration: 266077
loss: 0.9556563496589661,grad_norm: 0.8760450769608547, iteration: 266078
loss: 0.9930217862129211,grad_norm: 0.9999991900026919, iteration: 266079
loss: 0.9824833869934082,grad_norm: 0.9999992139016642, iteration: 266080
loss: 1.021921992301941,grad_norm: 0.9614855984017026, iteration: 266081
loss: 0.98442143201828,grad_norm: 0.941860352016885, iteration: 266082
loss: 1.014725923538208,grad_norm: 0.8572440843188324, iteration: 266083
loss: 1.0095171928405762,grad_norm: 0.9551534905686431, iteration: 266084
loss: 1.003600835800171,grad_norm: 0.9999991017364047, iteration: 266085
loss: 0.9982773661613464,grad_norm: 0.8867951524068982, iteration: 266086
loss: 1.0152455568313599,grad_norm: 0.9999992084170435, iteration: 266087
loss: 0.9932593703269958,grad_norm: 0.8976614367293737, iteration: 266088
loss: 1.1778231859207153,grad_norm: 0.9999990477911983, iteration: 266089
loss: 0.9837514758110046,grad_norm: 0.9081919942269404, iteration: 266090
loss: 0.9941080808639526,grad_norm: 0.8888301685722592, iteration: 266091
loss: 1.097636103630066,grad_norm: 0.8014055772864404, iteration: 266092
loss: 0.9738585352897644,grad_norm: 0.9049486538537813, iteration: 266093
loss: 1.0208715200424194,grad_norm: 0.9088501442476805, iteration: 266094
loss: 0.9823381304740906,grad_norm: 0.9213498545858815, iteration: 266095
loss: 1.0209414958953857,grad_norm: 0.855049659247053, iteration: 266096
loss: 1.0136492252349854,grad_norm: 0.9290528977356397, iteration: 266097
loss: 1.0070912837982178,grad_norm: 0.999999002599344, iteration: 266098
loss: 1.0303207635879517,grad_norm: 0.8939059036734681, iteration: 266099
loss: 0.9651902914047241,grad_norm: 0.9263381027417422, iteration: 266100
loss: 0.9668052196502686,grad_norm: 0.9999991122850457, iteration: 266101
loss: 0.9991998672485352,grad_norm: 0.9440367397825536, iteration: 266102
loss: 0.9932463765144348,grad_norm: 0.7685749834942338, iteration: 266103
loss: 0.9706649780273438,grad_norm: 0.9999992667949551, iteration: 266104
loss: 1.021073579788208,grad_norm: 0.9059078271611924, iteration: 266105
loss: 1.0664511919021606,grad_norm: 0.9999990878016703, iteration: 266106
loss: 0.9692395329475403,grad_norm: 0.9710642011954048, iteration: 266107
loss: 1.0147496461868286,grad_norm: 0.962634693986064, iteration: 266108
loss: 1.0264039039611816,grad_norm: 0.9806670851500718, iteration: 266109
loss: 0.9872883558273315,grad_norm: 0.9999994696678773, iteration: 266110
loss: 0.9977079033851624,grad_norm: 0.8449779159017963, iteration: 266111
loss: 0.9752312898635864,grad_norm: 0.761993775611409, iteration: 266112
loss: 0.9884425401687622,grad_norm: 0.9355414740105773, iteration: 266113
loss: 0.9991576075553894,grad_norm: 0.8026519306572635, iteration: 266114
loss: 0.9718746542930603,grad_norm: 0.9999993400642508, iteration: 266115
loss: 0.9993993043899536,grad_norm: 0.6986070404157247, iteration: 266116
loss: 0.9705718755722046,grad_norm: 0.8354576082968297, iteration: 266117
loss: 0.9803073406219482,grad_norm: 0.8006312155519133, iteration: 266118
loss: 1.0442014932632446,grad_norm: 0.9874923729099879, iteration: 266119
loss: 0.9963279366493225,grad_norm: 0.9999991765604276, iteration: 266120
loss: 1.0269609689712524,grad_norm: 0.9999990523427981, iteration: 266121
loss: 0.9656822085380554,grad_norm: 0.8611025681155718, iteration: 266122
loss: 0.970234751701355,grad_norm: 0.8473376097115302, iteration: 266123
loss: 0.9952161908149719,grad_norm: 0.9999991215698072, iteration: 266124
loss: 0.9966897964477539,grad_norm: 0.9234695223613111, iteration: 266125
loss: 0.9957713484764099,grad_norm: 0.9999998778482889, iteration: 266126
loss: 1.003707766532898,grad_norm: 0.9999991634415895, iteration: 266127
loss: 1.1770892143249512,grad_norm: 0.9999992648049313, iteration: 266128
loss: 1.0296958684921265,grad_norm: 0.9381716788195931, iteration: 266129
loss: 0.9834504723548889,grad_norm: 0.9999990573209546, iteration: 266130
loss: 1.0141746997833252,grad_norm: 0.9999991750764378, iteration: 266131
loss: 0.999420166015625,grad_norm: 0.8217236155619663, iteration: 266132
loss: 0.9999945759773254,grad_norm: 0.8443572782293627, iteration: 266133
loss: 0.9737206697463989,grad_norm: 0.8362692840789326, iteration: 266134
loss: 1.0554964542388916,grad_norm: 0.9999998964584955, iteration: 266135
loss: 0.9792251586914062,grad_norm: 0.8512094111256268, iteration: 266136
loss: 0.9864761233329773,grad_norm: 0.998965241826211, iteration: 266137
loss: 0.9832274317741394,grad_norm: 0.8570581298777785, iteration: 266138
loss: 1.0133087635040283,grad_norm: 0.9432539176765934, iteration: 266139
loss: 0.9944522380828857,grad_norm: 0.8824450311367222, iteration: 266140
loss: 1.0152838230133057,grad_norm: 0.8927632354540054, iteration: 266141
loss: 0.9469911456108093,grad_norm: 0.8893742237627293, iteration: 266142
loss: 0.9796518087387085,grad_norm: 0.9380240584834779, iteration: 266143
loss: 1.000489592552185,grad_norm: 0.9105480874617581, iteration: 266144
loss: 1.0272376537322998,grad_norm: 0.9999993492325195, iteration: 266145
loss: 0.9880044460296631,grad_norm: 0.819726301100585, iteration: 266146
loss: 0.9884178638458252,grad_norm: 0.9999994426585705, iteration: 266147
loss: 0.9781312346458435,grad_norm: 0.8367666054620915, iteration: 266148
loss: 0.9887570738792419,grad_norm: 0.8195025770125928, iteration: 266149
loss: 0.9767112731933594,grad_norm: 0.9999992306734319, iteration: 266150
loss: 0.996063768863678,grad_norm: 0.9605161225512873, iteration: 266151
loss: 1.025956153869629,grad_norm: 0.9999989515355393, iteration: 266152
loss: 1.0596038103103638,grad_norm: 0.7617830982022841, iteration: 266153
loss: 0.9837067127227783,grad_norm: 0.9685580865057476, iteration: 266154
loss: 1.0041147470474243,grad_norm: 0.8014389419764791, iteration: 266155
loss: 1.005915880203247,grad_norm: 0.818969080610762, iteration: 266156
loss: 0.9736284017562866,grad_norm: 0.7952534377543345, iteration: 266157
loss: 1.0008927583694458,grad_norm: 0.8890611641929226, iteration: 266158
loss: 0.9713832139968872,grad_norm: 0.8354022524525984, iteration: 266159
loss: 0.989906370639801,grad_norm: 0.7885236083746961, iteration: 266160
loss: 0.9826174378395081,grad_norm: 0.8449526778446252, iteration: 266161
loss: 0.9945720434188843,grad_norm: 0.8187954681132872, iteration: 266162
loss: 1.0097670555114746,grad_norm: 0.818113695461376, iteration: 266163
loss: 1.0045490264892578,grad_norm: 0.7842331836889493, iteration: 266164
loss: 0.994627833366394,grad_norm: 0.8465496133499075, iteration: 266165
loss: 1.0051343441009521,grad_norm: 0.9162047174297975, iteration: 266166
loss: 0.9946244955062866,grad_norm: 0.7645921549274174, iteration: 266167
loss: 1.0160839557647705,grad_norm: 0.8441644946044119, iteration: 266168
loss: 1.0131255388259888,grad_norm: 0.8616834247966154, iteration: 266169
loss: 0.9928685426712036,grad_norm: 0.999999069527712, iteration: 266170
loss: 0.9923050999641418,grad_norm: 0.9362376035574436, iteration: 266171
loss: 1.0184937715530396,grad_norm: 0.9868437992993055, iteration: 266172
loss: 1.0309877395629883,grad_norm: 0.9394036886393138, iteration: 266173
loss: 1.019068717956543,grad_norm: 0.8805232627088815, iteration: 266174
loss: 1.0193536281585693,grad_norm: 0.8750386681670342, iteration: 266175
loss: 1.0074824094772339,grad_norm: 0.849409932918411, iteration: 266176
loss: 0.97859126329422,grad_norm: 0.9999991339658172, iteration: 266177
loss: 1.0132745504379272,grad_norm: 0.9153123619413128, iteration: 266178
loss: 0.9764118194580078,grad_norm: 0.99999989454851, iteration: 266179
loss: 0.9956119656562805,grad_norm: 0.9274682849969719, iteration: 266180
loss: 0.9941829442977905,grad_norm: 0.9999990757384226, iteration: 266181
loss: 0.9727351665496826,grad_norm: 0.8971095280196666, iteration: 266182
loss: 1.0160106420516968,grad_norm: 0.9999991103725931, iteration: 266183
loss: 0.9809326529502869,grad_norm: 0.8203800886992699, iteration: 266184
loss: 0.9874137043952942,grad_norm: 0.7105173829824354, iteration: 266185
loss: 1.0277504920959473,grad_norm: 0.9333521188423991, iteration: 266186
loss: 0.9964998960494995,grad_norm: 0.9155231145640836, iteration: 266187
loss: 0.9979822635650635,grad_norm: 0.9084376647471529, iteration: 266188
loss: 0.997525691986084,grad_norm: 0.9124793279157798, iteration: 266189
loss: 1.0002813339233398,grad_norm: 0.9999993614424518, iteration: 266190
loss: 1.037030577659607,grad_norm: 0.9393811022985928, iteration: 266191
loss: 0.9915449619293213,grad_norm: 0.901903475793825, iteration: 266192
loss: 0.9656645059585571,grad_norm: 0.9134570376621534, iteration: 266193
loss: 0.9825018048286438,grad_norm: 0.858890045059395, iteration: 266194
loss: 1.001076102256775,grad_norm: 0.8841704641764105, iteration: 266195
loss: 0.9726249575614929,grad_norm: 0.999999160488232, iteration: 266196
loss: 1.0122618675231934,grad_norm: 0.9713516648242869, iteration: 266197
loss: 0.9904384613037109,grad_norm: 0.8706990885281558, iteration: 266198
loss: 1.0824060440063477,grad_norm: 0.9999992641223979, iteration: 266199
loss: 1.010218858718872,grad_norm: 0.9999991378251702, iteration: 266200
loss: 1.0130445957183838,grad_norm: 0.8628713264394985, iteration: 266201
loss: 0.9987301826477051,grad_norm: 0.7437888340901445, iteration: 266202
loss: 0.9737018942832947,grad_norm: 0.9588035641194202, iteration: 266203
loss: 1.054465889930725,grad_norm: 0.9999992565656147, iteration: 266204
loss: 0.9955915808677673,grad_norm: 0.7953415307061121, iteration: 266205
loss: 1.0117146968841553,grad_norm: 0.9999991510811402, iteration: 266206
loss: 1.029615044593811,grad_norm: 0.9478193669177144, iteration: 266207
loss: 1.018624186515808,grad_norm: 0.905812090206639, iteration: 266208
loss: 1.0140540599822998,grad_norm: 0.8751512869364422, iteration: 266209
loss: 0.9433581829071045,grad_norm: 0.8589792293168642, iteration: 266210
loss: 1.0010508298873901,grad_norm: 0.8786134164415076, iteration: 266211
loss: 1.0756640434265137,grad_norm: 0.9999996851646621, iteration: 266212
loss: 1.004634141921997,grad_norm: 0.9464745992468153, iteration: 266213
loss: 1.0209388732910156,grad_norm: 0.9999991946504149, iteration: 266214
loss: 1.0104706287384033,grad_norm: 0.9999992458368192, iteration: 266215
loss: 1.0362931489944458,grad_norm: 0.7496558702055767, iteration: 266216
loss: 1.0870726108551025,grad_norm: 0.8959615152404518, iteration: 266217
loss: 1.0117623805999756,grad_norm: 0.8209002594717885, iteration: 266218
loss: 0.9828416705131531,grad_norm: 0.7924292799843886, iteration: 266219
loss: 1.0639452934265137,grad_norm: 0.9999992535025719, iteration: 266220
loss: 0.9679500460624695,grad_norm: 0.9535430262452517, iteration: 266221
loss: 0.9927840828895569,grad_norm: 0.8795866613093185, iteration: 266222
loss: 0.9912344813346863,grad_norm: 0.9128840285145565, iteration: 266223
loss: 1.0047320127487183,grad_norm: 0.9276182665988757, iteration: 266224
loss: 0.9927273392677307,grad_norm: 0.8195909183883034, iteration: 266225
loss: 1.0072205066680908,grad_norm: 0.8268708243145781, iteration: 266226
loss: 0.956139326095581,grad_norm: 0.9999990703450131, iteration: 266227
loss: 1.0045377016067505,grad_norm: 0.6916632476804774, iteration: 266228
loss: 0.9876739978790283,grad_norm: 0.99999959548024, iteration: 266229
loss: 1.0002554655075073,grad_norm: 0.842885773698894, iteration: 266230
loss: 0.982398271560669,grad_norm: 0.9999990026067559, iteration: 266231
loss: 0.9932670593261719,grad_norm: 0.8774785649646347, iteration: 266232
loss: 0.9902642965316772,grad_norm: 0.754991603900505, iteration: 266233
loss: 0.9987580180168152,grad_norm: 0.7330043848843053, iteration: 266234
loss: 1.0166245698928833,grad_norm: 0.999999101346404, iteration: 266235
loss: 0.983357846736908,grad_norm: 0.9999991616097949, iteration: 266236
loss: 1.026571273803711,grad_norm: 0.9999989678032035, iteration: 266237
loss: 1.0065423250198364,grad_norm: 0.9398476179175417, iteration: 266238
loss: 1.004754662513733,grad_norm: 0.7945665346354387, iteration: 266239
loss: 1.0048874616622925,grad_norm: 0.999999038141976, iteration: 266240
loss: 1.0049006938934326,grad_norm: 0.7165303837440545, iteration: 266241
loss: 1.0174311399459839,grad_norm: 0.9999990744397763, iteration: 266242
loss: 0.9849535822868347,grad_norm: 0.8711161241550156, iteration: 266243
loss: 0.9908197522163391,grad_norm: 0.830234201063023, iteration: 266244
loss: 1.0014369487762451,grad_norm: 0.8361023486727157, iteration: 266245
loss: 1.0166902542114258,grad_norm: 0.8721910897154159, iteration: 266246
loss: 0.9396274089813232,grad_norm: 0.9419518459250318, iteration: 266247
loss: 1.029684066772461,grad_norm: 0.9800983840343264, iteration: 266248
loss: 0.9685656428337097,grad_norm: 0.8235811576325123, iteration: 266249
loss: 1.0154485702514648,grad_norm: 0.8316925601818668, iteration: 266250
loss: 1.0264661312103271,grad_norm: 0.8944281586588337, iteration: 266251
loss: 0.9847429990768433,grad_norm: 0.8987965110460455, iteration: 266252
loss: 0.9585452079772949,grad_norm: 0.7887951280201164, iteration: 266253
loss: 1.0067614316940308,grad_norm: 0.9622812210342411, iteration: 266254
loss: 1.0859888792037964,grad_norm: 0.8152297238924529, iteration: 266255
loss: 0.9761244058609009,grad_norm: 0.8628870430033801, iteration: 266256
loss: 1.0022258758544922,grad_norm: 0.9173856585109877, iteration: 266257
loss: 0.9977860450744629,grad_norm: 0.9818776885790773, iteration: 266258
loss: 1.0138155221939087,grad_norm: 0.8188342088728786, iteration: 266259
loss: 1.063012719154358,grad_norm: 0.9927580435364322, iteration: 266260
loss: 1.072229027748108,grad_norm: 0.9462847992564178, iteration: 266261
loss: 1.0085599422454834,grad_norm: 0.9999990731002675, iteration: 266262
loss: 1.0312550067901611,grad_norm: 0.7889451522542924, iteration: 266263
loss: 1.021619200706482,grad_norm: 0.9851697096272376, iteration: 266264
loss: 0.9840818643569946,grad_norm: 0.8683673297680453, iteration: 266265
loss: 0.9819037318229675,grad_norm: 0.9893172028891152, iteration: 266266
loss: 0.9935200214385986,grad_norm: 0.7368439055714785, iteration: 266267
loss: 1.0014084577560425,grad_norm: 0.9443608201397881, iteration: 266268
loss: 0.9690779447555542,grad_norm: 0.8080671794566926, iteration: 266269
loss: 1.0488592386245728,grad_norm: 0.9999994719388886, iteration: 266270
loss: 0.9847978949546814,grad_norm: 0.872274349690458, iteration: 266271
loss: 0.9914895296096802,grad_norm: 0.9999990423082498, iteration: 266272
loss: 0.9847027659416199,grad_norm: 0.9170215637051994, iteration: 266273
loss: 1.0753661394119263,grad_norm: 0.9999991657417533, iteration: 266274
loss: 0.9923995733261108,grad_norm: 0.8727690079143647, iteration: 266275
loss: 0.9780769348144531,grad_norm: 0.924712448810682, iteration: 266276
loss: 1.0181612968444824,grad_norm: 0.9999996040392858, iteration: 266277
loss: 0.996208906173706,grad_norm: 0.9510799477267903, iteration: 266278
loss: 0.9890400171279907,grad_norm: 0.8513992084986505, iteration: 266279
loss: 0.9899505972862244,grad_norm: 0.7789802361331547, iteration: 266280
loss: 0.9578299522399902,grad_norm: 0.869316355997269, iteration: 266281
loss: 1.0110692977905273,grad_norm: 0.8473425608132996, iteration: 266282
loss: 0.986184298992157,grad_norm: 0.9999990419820286, iteration: 266283
loss: 1.0003353357315063,grad_norm: 0.8484172373621228, iteration: 266284
loss: 0.9776644706726074,grad_norm: 0.7642411344432799, iteration: 266285
loss: 1.0250462293624878,grad_norm: 0.8899853207025096, iteration: 266286
loss: 0.9670115113258362,grad_norm: 0.7294678451412229, iteration: 266287
loss: 1.00171959400177,grad_norm: 0.8998975490917346, iteration: 266288
loss: 1.000523328781128,grad_norm: 0.9811436777841624, iteration: 266289
loss: 1.0409435033798218,grad_norm: 0.8953052889041151, iteration: 266290
loss: 1.0000851154327393,grad_norm: 0.9999992526434308, iteration: 266291
loss: 1.0131783485412598,grad_norm: 0.7789154441534898, iteration: 266292
loss: 1.0206913948059082,grad_norm: 0.7860181611339269, iteration: 266293
loss: 1.0366722345352173,grad_norm: 0.9394772858313002, iteration: 266294
loss: 0.9991448521614075,grad_norm: 0.9315341914353159, iteration: 266295
loss: 0.9802478551864624,grad_norm: 0.999999654364621, iteration: 266296
loss: 1.0213669538497925,grad_norm: 0.9999990356799959, iteration: 266297
loss: 0.98055100440979,grad_norm: 0.9246593321720452, iteration: 266298
loss: 0.9782968759536743,grad_norm: 0.7650494498651267, iteration: 266299
loss: 1.0134482383728027,grad_norm: 0.8137241013431318, iteration: 266300
loss: 1.0255060195922852,grad_norm: 0.9999990120828333, iteration: 266301
loss: 0.9956532716751099,grad_norm: 0.8663894617905397, iteration: 266302
loss: 1.0328530073165894,grad_norm: 0.9092863033942603, iteration: 266303
loss: 0.9757596254348755,grad_norm: 0.7921196318617895, iteration: 266304
loss: 0.993380069732666,grad_norm: 0.8105582488059514, iteration: 266305
loss: 0.9875595569610596,grad_norm: 0.9157663276286878, iteration: 266306
loss: 0.9919599294662476,grad_norm: 0.9999991453344895, iteration: 266307
loss: 0.9591309428215027,grad_norm: 0.9791865716724264, iteration: 266308
loss: 1.1513060331344604,grad_norm: 0.9999992165702172, iteration: 266309
loss: 0.9948456883430481,grad_norm: 0.9529364560101115, iteration: 266310
loss: 1.0059576034545898,grad_norm: 0.8809753538308259, iteration: 266311
loss: 1.0215997695922852,grad_norm: 0.9717053400502875, iteration: 266312
loss: 1.004930019378662,grad_norm: 0.9142254820184855, iteration: 266313
loss: 1.0237749814987183,grad_norm: 0.9999991506583417, iteration: 266314
loss: 1.0226444005966187,grad_norm: 0.9999996765123896, iteration: 266315
loss: 0.9819463491439819,grad_norm: 0.8744046660892056, iteration: 266316
loss: 1.0459349155426025,grad_norm: 0.9429109485483567, iteration: 266317
loss: 0.9765822291374207,grad_norm: 0.9066796284532823, iteration: 266318
loss: 0.9745342135429382,grad_norm: 0.8579963296364904, iteration: 266319
loss: 0.9718232154846191,grad_norm: 0.8760906157322753, iteration: 266320
loss: 1.0159670114517212,grad_norm: 0.8761296149883497, iteration: 266321
loss: 0.9929868578910828,grad_norm: 0.8583953115527589, iteration: 266322
loss: 1.0120974779129028,grad_norm: 0.8967684375717672, iteration: 266323
loss: 1.019667387008667,grad_norm: 0.7319664056541826, iteration: 266324
loss: 0.9887257814407349,grad_norm: 0.999999102712663, iteration: 266325
loss: 0.9961941838264465,grad_norm: 0.8409436591599427, iteration: 266326
loss: 0.9905760884284973,grad_norm: 0.875550392820495, iteration: 266327
loss: 1.0277682542800903,grad_norm: 0.9070197502183858, iteration: 266328
loss: 0.9844428896903992,grad_norm: 0.7777651684765587, iteration: 266329
loss: 0.9992746710777283,grad_norm: 0.7551620678315893, iteration: 266330
loss: 1.0276509523391724,grad_norm: 0.7315009866133655, iteration: 266331
loss: 0.9669954180717468,grad_norm: 0.9999991259124693, iteration: 266332
loss: 1.0217833518981934,grad_norm: 0.9404889175999973, iteration: 266333
loss: 0.975700855255127,grad_norm: 0.8402756964015705, iteration: 266334
loss: 0.9807586669921875,grad_norm: 0.9999990369220975, iteration: 266335
loss: 0.9995296001434326,grad_norm: 0.9605904387967977, iteration: 266336
loss: 0.9922545552253723,grad_norm: 0.9999994348020141, iteration: 266337
loss: 0.9915568232536316,grad_norm: 0.947227746067275, iteration: 266338
loss: 0.9961902499198914,grad_norm: 0.8663414128969393, iteration: 266339
loss: 1.0085095167160034,grad_norm: 0.9180476564363125, iteration: 266340
loss: 0.9984551072120667,grad_norm: 0.9035865072011254, iteration: 266341
loss: 1.01913583278656,grad_norm: 0.999999183527495, iteration: 266342
loss: 1.0003405809402466,grad_norm: 0.9999990909222196, iteration: 266343
loss: 1.0098363161087036,grad_norm: 0.8559415942860824, iteration: 266344
loss: 0.9855124950408936,grad_norm: 0.8384160965329197, iteration: 266345
loss: 0.9866077303886414,grad_norm: 0.8652136367944275, iteration: 266346
loss: 1.002202033996582,grad_norm: 0.8670018834617597, iteration: 266347
loss: 0.9865953326225281,grad_norm: 0.8265103042767946, iteration: 266348
loss: 1.0588229894638062,grad_norm: 0.9347385709856657, iteration: 266349
loss: 1.0051696300506592,grad_norm: 0.8837158386566667, iteration: 266350
loss: 1.0158270597457886,grad_norm: 0.9999992344030795, iteration: 266351
loss: 0.979793906211853,grad_norm: 0.9054849014632383, iteration: 266352
loss: 0.9777951836585999,grad_norm: 0.8372252595064615, iteration: 266353
loss: 1.0298795700073242,grad_norm: 0.9171538344204189, iteration: 266354
loss: 0.9626579284667969,grad_norm: 0.9999991496056269, iteration: 266355
loss: 1.0010637044906616,grad_norm: 0.7490649204134842, iteration: 266356
loss: 1.0068639516830444,grad_norm: 0.8975495752131359, iteration: 266357
loss: 0.9861880540847778,grad_norm: 0.9999991266003403, iteration: 266358
loss: 1.0859966278076172,grad_norm: 0.9999993599196831, iteration: 266359
loss: 0.990536630153656,grad_norm: 0.9586803511016138, iteration: 266360
loss: 0.985450804233551,grad_norm: 0.7760403595247537, iteration: 266361
loss: 0.9923593997955322,grad_norm: 0.8853365148755389, iteration: 266362
loss: 1.0259748697280884,grad_norm: 0.9999992073032152, iteration: 266363
loss: 0.9667723178863525,grad_norm: 0.9999991512655007, iteration: 266364
loss: 1.0316370725631714,grad_norm: 0.8928428797415812, iteration: 266365
loss: 0.9773151874542236,grad_norm: 0.9246835462244029, iteration: 266366
loss: 1.0246460437774658,grad_norm: 0.8426016345763228, iteration: 266367
loss: 1.0149800777435303,grad_norm: 0.8353661920126274, iteration: 266368
loss: 1.0158272981643677,grad_norm: 0.8634979398571863, iteration: 266369
loss: 0.9598010182380676,grad_norm: 0.9082781333461386, iteration: 266370
loss: 0.999731183052063,grad_norm: 0.7824795315837301, iteration: 266371
loss: 0.9707872271537781,grad_norm: 0.9341790266361689, iteration: 266372
loss: 1.0077793598175049,grad_norm: 0.8157222397160884, iteration: 266373
loss: 0.9994567036628723,grad_norm: 0.8408194741143735, iteration: 266374
loss: 0.9963150024414062,grad_norm: 0.9821687032233288, iteration: 266375
loss: 1.0183134078979492,grad_norm: 0.8622030582935714, iteration: 266376
loss: 1.0034115314483643,grad_norm: 0.8871700296780285, iteration: 266377
loss: 1.0276092290878296,grad_norm: 0.9999991555176984, iteration: 266378
loss: 0.994110107421875,grad_norm: 0.9999991794419725, iteration: 266379
loss: 0.9811044931411743,grad_norm: 0.9020076073797761, iteration: 266380
loss: 1.003747582435608,grad_norm: 0.9999990721088755, iteration: 266381
loss: 0.9969404935836792,grad_norm: 0.8771139768709745, iteration: 266382
loss: 1.0222615003585815,grad_norm: 0.9324593738100873, iteration: 266383
loss: 0.9923573732376099,grad_norm: 0.9999990837906352, iteration: 266384
loss: 0.9779844284057617,grad_norm: 0.8378372253561601, iteration: 266385
loss: 1.0036914348602295,grad_norm: 0.9999990826649392, iteration: 266386
loss: 1.003904938697815,grad_norm: 0.9779042039665359, iteration: 266387
loss: 1.0117541551589966,grad_norm: 0.7961450791740169, iteration: 266388
loss: 1.021668791770935,grad_norm: 0.9940299189732673, iteration: 266389
loss: 1.0395941734313965,grad_norm: 0.9754344864395487, iteration: 266390
loss: 1.0733036994934082,grad_norm: 0.9999990579281629, iteration: 266391
loss: 1.0099012851715088,grad_norm: 0.8845022136239078, iteration: 266392
loss: 0.9807015657424927,grad_norm: 0.9999993100310323, iteration: 266393
loss: 1.0852895975112915,grad_norm: 0.9110039983864713, iteration: 266394
loss: 1.0150178670883179,grad_norm: 0.9314938962339243, iteration: 266395
loss: 1.0072555541992188,grad_norm: 0.864320130081956, iteration: 266396
loss: 0.9639354944229126,grad_norm: 0.8970773299130421, iteration: 266397
loss: 1.013282060623169,grad_norm: 0.943136901456983, iteration: 266398
loss: 0.9828760027885437,grad_norm: 0.7851295085922027, iteration: 266399
loss: 0.969369113445282,grad_norm: 0.8398459020943913, iteration: 266400
loss: 1.0044395923614502,grad_norm: 0.8238075794559648, iteration: 266401
loss: 1.0115078687667847,grad_norm: 0.7737225600022716, iteration: 266402
loss: 1.0024428367614746,grad_norm: 0.8959374721056362, iteration: 266403
loss: 0.9889746308326721,grad_norm: 0.839764576412299, iteration: 266404
loss: 1.030552625656128,grad_norm: 0.9999990071775688, iteration: 266405
loss: 1.0146807432174683,grad_norm: 0.9550985659059359, iteration: 266406
loss: 1.0351026058197021,grad_norm: 0.9999990073857536, iteration: 266407
loss: 1.0034898519515991,grad_norm: 0.8863908210573227, iteration: 266408
loss: 1.002190113067627,grad_norm: 0.8754207909474102, iteration: 266409
loss: 1.0018539428710938,grad_norm: 0.9182274506675384, iteration: 266410
loss: 1.0189622640609741,grad_norm: 0.83664950481275, iteration: 266411
loss: 0.9994745850563049,grad_norm: 0.9417310120206225, iteration: 266412
loss: 1.0136429071426392,grad_norm: 0.9465342631979572, iteration: 266413
loss: 0.9580420851707458,grad_norm: 0.8243046973882119, iteration: 266414
loss: 0.9777299761772156,grad_norm: 0.9230194566088081, iteration: 266415
loss: 1.0082899332046509,grad_norm: 0.817748158048901, iteration: 266416
loss: 1.0384806394577026,grad_norm: 0.8834094204854893, iteration: 266417
loss: 0.9872586727142334,grad_norm: 0.9999992715109296, iteration: 266418
loss: 1.010080337524414,grad_norm: 0.9793016547724114, iteration: 266419
loss: 0.9721839427947998,grad_norm: 0.8450387225409534, iteration: 266420
loss: 1.0442818403244019,grad_norm: 0.9102351733917905, iteration: 266421
loss: 0.9913365244865417,grad_norm: 0.881107728740902, iteration: 266422
loss: 1.002548336982727,grad_norm: 0.8835370404162656, iteration: 266423
loss: 1.0188610553741455,grad_norm: 0.9999990594097934, iteration: 266424
loss: 1.0397043228149414,grad_norm: 0.7837880637254317, iteration: 266425
loss: 0.9844330549240112,grad_norm: 0.9999994089358872, iteration: 266426
loss: 0.960959255695343,grad_norm: 0.8916825795813744, iteration: 266427
loss: 0.9700883626937866,grad_norm: 0.8867215310744779, iteration: 266428
loss: 1.0352015495300293,grad_norm: 0.9999990301229906, iteration: 266429
loss: 0.9877438545227051,grad_norm: 0.8556001133440657, iteration: 266430
loss: 1.0042479038238525,grad_norm: 0.7814593853301314, iteration: 266431
loss: 0.9996516704559326,grad_norm: 0.7625530272471881, iteration: 266432
loss: 1.002508282661438,grad_norm: 0.8933054301551729, iteration: 266433
loss: 1.0300500392913818,grad_norm: 0.8614666113096846, iteration: 266434
loss: 0.9806227087974548,grad_norm: 0.7913668457783914, iteration: 266435
loss: 1.017944574356079,grad_norm: 0.8292780462901171, iteration: 266436
loss: 1.022044062614441,grad_norm: 0.9890257958299207, iteration: 266437
loss: 0.9849932193756104,grad_norm: 0.7296822297711905, iteration: 266438
loss: 0.9946966767311096,grad_norm: 0.9351994428818082, iteration: 266439
loss: 0.9860386252403259,grad_norm: 0.8410828515887381, iteration: 266440
loss: 1.0108754634857178,grad_norm: 0.8081621477317069, iteration: 266441
loss: 1.001187801361084,grad_norm: 0.9107377468789729, iteration: 266442
loss: 1.029173731803894,grad_norm: 0.900955577226841, iteration: 266443
loss: 0.9776415228843689,grad_norm: 0.917877451604597, iteration: 266444
loss: 1.0149585008621216,grad_norm: 0.9884047189518825, iteration: 266445
loss: 0.9999198317527771,grad_norm: 0.9033580278616665, iteration: 266446
loss: 1.0231192111968994,grad_norm: 0.9999991027856774, iteration: 266447
loss: 1.0070302486419678,grad_norm: 0.8623193853239371, iteration: 266448
loss: 1.1632754802703857,grad_norm: 1.0000000307682912, iteration: 266449
loss: 0.9870222806930542,grad_norm: 0.9100924395704297, iteration: 266450
loss: 0.9973331689834595,grad_norm: 0.8694796289735389, iteration: 266451
loss: 1.0102002620697021,grad_norm: 0.7410538225951219, iteration: 266452
loss: 1.0030150413513184,grad_norm: 0.9678181728649795, iteration: 266453
loss: 0.9930074214935303,grad_norm: 0.9999989525393596, iteration: 266454
loss: 1.029268741607666,grad_norm: 0.8814980034951911, iteration: 266455
loss: 1.0178301334381104,grad_norm: 0.8904953848570066, iteration: 266456
loss: 0.99791020154953,grad_norm: 0.9618080169365143, iteration: 266457
loss: 0.9893272519111633,grad_norm: 0.851745522741461, iteration: 266458
loss: 1.0070991516113281,grad_norm: 0.8148705908164698, iteration: 266459
loss: 1.0070228576660156,grad_norm: 0.714965079576311, iteration: 266460
loss: 0.988314151763916,grad_norm: 0.8986774992913228, iteration: 266461
loss: 1.0310496091842651,grad_norm: 0.9789051792074468, iteration: 266462
loss: 1.0068881511688232,grad_norm: 0.9999993979050672, iteration: 266463
loss: 1.0045291185379028,grad_norm: 0.795729942845985, iteration: 266464
loss: 1.013550043106079,grad_norm: 0.919322894918615, iteration: 266465
loss: 0.992016613483429,grad_norm: 0.7355563667126588, iteration: 266466
loss: 1.0760178565979004,grad_norm: 1.000000021280927, iteration: 266467
loss: 1.0155807733535767,grad_norm: 0.9999991185369034, iteration: 266468
loss: 1.0201690196990967,grad_norm: 0.8011427340462222, iteration: 266469
loss: 0.9865002036094666,grad_norm: 0.8439917826925916, iteration: 266470
loss: 1.0205273628234863,grad_norm: 0.9400505340425745, iteration: 266471
loss: 0.9760032892227173,grad_norm: 0.7989371272200012, iteration: 266472
loss: 1.0532433986663818,grad_norm: 0.9304970592844245, iteration: 266473
loss: 1.0197254419326782,grad_norm: 0.9999998111863463, iteration: 266474
loss: 1.0682635307312012,grad_norm: 0.8641022204394287, iteration: 266475
loss: 0.9880003929138184,grad_norm: 0.8616017535234993, iteration: 266476
loss: 0.9905621409416199,grad_norm: 0.8414899790287345, iteration: 266477
loss: 1.0051066875457764,grad_norm: 0.8859134241815952, iteration: 266478
loss: 1.0120681524276733,grad_norm: 0.8466655515952276, iteration: 266479
loss: 0.9918886423110962,grad_norm: 0.9133699877116999, iteration: 266480
loss: 1.0179344415664673,grad_norm: 0.9999995871365216, iteration: 266481
loss: 0.9818501472473145,grad_norm: 0.765893848672827, iteration: 266482
loss: 0.9965606927871704,grad_norm: 0.8312326893241448, iteration: 266483
loss: 0.9920440912246704,grad_norm: 0.9586168006122057, iteration: 266484
loss: 1.0376051664352417,grad_norm: 0.999999262767474, iteration: 266485
loss: 0.9751561284065247,grad_norm: 0.8954221130317508, iteration: 266486
loss: 1.0326249599456787,grad_norm: 0.9999994878185624, iteration: 266487
loss: 1.0090473890304565,grad_norm: 0.8578328453622567, iteration: 266488
loss: 1.0076030492782593,grad_norm: 0.9251576572295254, iteration: 266489
loss: 0.96397864818573,grad_norm: 0.8552376366155154, iteration: 266490
loss: 0.9906171560287476,grad_norm: 0.8840195507517941, iteration: 266491
loss: 1.0277632474899292,grad_norm: 0.9301295895495285, iteration: 266492
loss: 1.0177569389343262,grad_norm: 0.7738535980791088, iteration: 266493
loss: 0.9688469171524048,grad_norm: 0.7241547599374469, iteration: 266494
loss: 1.004044532775879,grad_norm: 0.9786594181441086, iteration: 266495
loss: 0.9960359930992126,grad_norm: 0.9999989867603222, iteration: 266496
loss: 0.9913730025291443,grad_norm: 0.9199342887387347, iteration: 266497
loss: 0.9958687424659729,grad_norm: 0.9383897696726009, iteration: 266498
loss: 0.9808598756790161,grad_norm: 0.8706850527364857, iteration: 266499
loss: 1.0308837890625,grad_norm: 0.9999994664589273, iteration: 266500
loss: 0.9742109179496765,grad_norm: 0.8147804282616814, iteration: 266501
loss: 1.0071995258331299,grad_norm: 0.9999989294853986, iteration: 266502
loss: 0.9944953918457031,grad_norm: 0.955223887682001, iteration: 266503
loss: 1.0167040824890137,grad_norm: 0.9999994225434935, iteration: 266504
loss: 1.04081130027771,grad_norm: 0.9999999841311902, iteration: 266505
loss: 1.0296282768249512,grad_norm: 0.9381317507912761, iteration: 266506
loss: 1.0097891092300415,grad_norm: 0.78590214103013, iteration: 266507
loss: 1.1286201477050781,grad_norm: 0.9999990567942425, iteration: 266508
loss: 0.9771963357925415,grad_norm: 0.8172997757171037, iteration: 266509
loss: 1.0281322002410889,grad_norm: 0.7485476444811865, iteration: 266510
loss: 0.9953152537345886,grad_norm: 0.8196588546245045, iteration: 266511
loss: 0.954214334487915,grad_norm: 0.7522489404022324, iteration: 266512
loss: 0.992943286895752,grad_norm: 0.9999990707393216, iteration: 266513
loss: 0.9558250308036804,grad_norm: 0.9387794607242794, iteration: 266514
loss: 1.0226716995239258,grad_norm: 0.8752949585581881, iteration: 266515
loss: 1.010859727859497,grad_norm: 0.999999127788013, iteration: 266516
loss: 0.9908378720283508,grad_norm: 0.8802683164971397, iteration: 266517
loss: 0.9764208197593689,grad_norm: 0.8688434807484474, iteration: 266518
loss: 1.0080294609069824,grad_norm: 0.812032945344529, iteration: 266519
loss: 1.0114986896514893,grad_norm: 0.9135897671797806, iteration: 266520
loss: 1.0341466665267944,grad_norm: 0.7801653621948755, iteration: 266521
loss: 0.9980003237724304,grad_norm: 0.8264815220021776, iteration: 266522
loss: 1.0076136589050293,grad_norm: 0.9076737390347535, iteration: 266523
loss: 0.9755076766014099,grad_norm: 0.8230308889677485, iteration: 266524
loss: 1.0249279737472534,grad_norm: 0.9999990322046057, iteration: 266525
loss: 1.0097752809524536,grad_norm: 0.9317163588710762, iteration: 266526
loss: 1.0012534856796265,grad_norm: 0.8343558223058606, iteration: 266527
loss: 0.998030960559845,grad_norm: 0.8523712403055377, iteration: 266528
loss: 0.9904806017875671,grad_norm: 0.818890510130965, iteration: 266529
loss: 1.05844247341156,grad_norm: 0.989063727830833, iteration: 266530
loss: 1.0155516862869263,grad_norm: 0.7590869153522373, iteration: 266531
loss: 1.083648443222046,grad_norm: 0.9999995428366181, iteration: 266532
loss: 0.9968662261962891,grad_norm: 0.9999990355844437, iteration: 266533
loss: 0.9812149405479431,grad_norm: 0.9931112324134557, iteration: 266534
loss: 0.9916850328445435,grad_norm: 0.8868558526087182, iteration: 266535
loss: 1.0222938060760498,grad_norm: 0.999999127833723, iteration: 266536
loss: 1.0169172286987305,grad_norm: 0.9368149603702945, iteration: 266537
loss: 1.0149005651474,grad_norm: 0.8240455115032326, iteration: 266538
loss: 1.0432099103927612,grad_norm: 0.9999995805121442, iteration: 266539
loss: 1.0203733444213867,grad_norm: 0.9535078633467187, iteration: 266540
loss: 1.016636848449707,grad_norm: 0.7635724989987727, iteration: 266541
loss: 0.9632116556167603,grad_norm: 0.8573077882714676, iteration: 266542
loss: 0.9897022247314453,grad_norm: 0.8600121813579468, iteration: 266543
loss: 1.0805950164794922,grad_norm: 0.9999994213326602, iteration: 266544
loss: 1.0009329319000244,grad_norm: 0.9999997037468134, iteration: 266545
loss: 1.0182993412017822,grad_norm: 0.7281022037745168, iteration: 266546
loss: 1.0741485357284546,grad_norm: 0.9745596912721703, iteration: 266547
loss: 1.011202096939087,grad_norm: 0.8066473778385151, iteration: 266548
loss: 0.9991417527198792,grad_norm: 0.8105771187423382, iteration: 266549
loss: 1.0026791095733643,grad_norm: 0.8285311287843949, iteration: 266550
loss: 1.0140337944030762,grad_norm: 0.9999991115187773, iteration: 266551
loss: 1.0126010179519653,grad_norm: 0.8969793957721809, iteration: 266552
loss: 0.9624167680740356,grad_norm: 0.8895870572490362, iteration: 266553
loss: 0.9922491908073425,grad_norm: 0.8439745712498792, iteration: 266554
loss: 1.0202122926712036,grad_norm: 0.8536812898355813, iteration: 266555
loss: 1.0247164964675903,grad_norm: 0.8757862078303325, iteration: 266556
loss: 0.9790239930152893,grad_norm: 0.8715844705191402, iteration: 266557
loss: 1.0024033784866333,grad_norm: 0.99999911239385, iteration: 266558
loss: 1.0029959678649902,grad_norm: 0.8704208243095068, iteration: 266559
loss: 1.0255839824676514,grad_norm: 0.7682963543600586, iteration: 266560
loss: 0.9984211921691895,grad_norm: 0.9793868421732548, iteration: 266561
loss: 0.9836425185203552,grad_norm: 0.7628207159389876, iteration: 266562
loss: 1.1026784181594849,grad_norm: 0.9999995418317716, iteration: 266563
loss: 1.0058178901672363,grad_norm: 0.8315191282276698, iteration: 266564
loss: 1.0092705488204956,grad_norm: 0.8721118250946168, iteration: 266565
loss: 0.9916003346443176,grad_norm: 0.869805965090814, iteration: 266566
loss: 1.0194607973098755,grad_norm: 0.7900855668148223, iteration: 266567
loss: 0.9878682494163513,grad_norm: 0.999999106732313, iteration: 266568
loss: 1.0189526081085205,grad_norm: 0.9120304241159568, iteration: 266569
loss: 1.0080146789550781,grad_norm: 0.8998987400221053, iteration: 266570
loss: 1.0025031566619873,grad_norm: 0.8812967847667765, iteration: 266571
loss: 1.115200161933899,grad_norm: 0.9758899415458835, iteration: 266572
loss: 1.000503420829773,grad_norm: 0.8990215676912716, iteration: 266573
loss: 0.9810827374458313,grad_norm: 0.9999990075637768, iteration: 266574
loss: 1.0600563287734985,grad_norm: 0.999999816701008, iteration: 266575
loss: 0.9978001117706299,grad_norm: 0.8894749251445729, iteration: 266576
loss: 1.0426812171936035,grad_norm: 0.7867330099923616, iteration: 266577
loss: 0.9645654559135437,grad_norm: 0.9196094969727895, iteration: 266578
loss: 0.996386706829071,grad_norm: 0.7823421418541784, iteration: 266579
loss: 0.9653462171554565,grad_norm: 0.7592395794708002, iteration: 266580
loss: 1.0076385736465454,grad_norm: 0.949539648213428, iteration: 266581
loss: 0.9697092175483704,grad_norm: 0.9343222235674208, iteration: 266582
loss: 1.063970923423767,grad_norm: 0.8721223443241273, iteration: 266583
loss: 1.0235965251922607,grad_norm: 0.797453469039235, iteration: 266584
loss: 0.9879674315452576,grad_norm: 0.8097885502521571, iteration: 266585
loss: 1.0232101678848267,grad_norm: 0.9218307097103997, iteration: 266586
loss: 1.0011811256408691,grad_norm: 0.7703317801842482, iteration: 266587
loss: 1.0315732955932617,grad_norm: 0.9999996051961781, iteration: 266588
loss: 1.0374298095703125,grad_norm: 0.8158670738030096, iteration: 266589
loss: 1.004604458808899,grad_norm: 0.86423929295766, iteration: 266590
loss: 1.0338740348815918,grad_norm: 0.8452875902091307, iteration: 266591
loss: 1.000270962715149,grad_norm: 0.8342192395510002, iteration: 266592
loss: 0.9979276061058044,grad_norm: 0.9999994789151763, iteration: 266593
loss: 1.0020617246627808,grad_norm: 0.8655878402632735, iteration: 266594
loss: 1.0126649141311646,grad_norm: 0.9999990852749981, iteration: 266595
loss: 1.0837115049362183,grad_norm: 0.9958602466195009, iteration: 266596
loss: 0.953389585018158,grad_norm: 0.9581291502529065, iteration: 266597
loss: 1.0049842596054077,grad_norm: 0.8501665976539838, iteration: 266598
loss: 1.0076665878295898,grad_norm: 0.8026100526292959, iteration: 266599
loss: 1.0228781700134277,grad_norm: 0.9226787224713405, iteration: 266600
loss: 1.0163627862930298,grad_norm: 0.9527630889736243, iteration: 266601
loss: 0.9983001947402954,grad_norm: 0.7639565004789537, iteration: 266602
loss: 1.0004513263702393,grad_norm: 0.9578687369671081, iteration: 266603
loss: 1.0205637216567993,grad_norm: 0.9953493044067815, iteration: 266604
loss: 0.9995132684707642,grad_norm: 0.9626086788549139, iteration: 266605
loss: 1.0029629468917847,grad_norm: 0.9047192988664544, iteration: 266606
loss: 1.0375412702560425,grad_norm: 0.9999998015009868, iteration: 266607
loss: 1.0468553304672241,grad_norm: 0.999999768902291, iteration: 266608
loss: 0.9907211661338806,grad_norm: 0.9670673927636098, iteration: 266609
loss: 1.0114814043045044,grad_norm: 0.8850049022795887, iteration: 266610
loss: 0.9842478036880493,grad_norm: 0.8605010772772705, iteration: 266611
loss: 0.9910292029380798,grad_norm: 0.8170729831279155, iteration: 266612
loss: 1.0111825466156006,grad_norm: 0.7880810797169453, iteration: 266613
loss: 0.9725539088249207,grad_norm: 0.9333103637245611, iteration: 266614
loss: 0.98736172914505,grad_norm: 0.8382419127027871, iteration: 266615
loss: 1.0038259029388428,grad_norm: 0.8584540226415807, iteration: 266616
loss: 0.964712381362915,grad_norm: 0.9314426480850293, iteration: 266617
loss: 1.0014718770980835,grad_norm: 0.9999991685376454, iteration: 266618
loss: 1.0018806457519531,grad_norm: 0.8313795191640848, iteration: 266619
loss: 1.0052074193954468,grad_norm: 0.8080205177920082, iteration: 266620
loss: 0.977666437625885,grad_norm: 0.8159492591708456, iteration: 266621
loss: 1.0444509983062744,grad_norm: 0.9999992738276356, iteration: 266622
loss: 1.1689869165420532,grad_norm: 0.9999990619024111, iteration: 266623
loss: 0.9754643440246582,grad_norm: 0.7878841542452838, iteration: 266624
loss: 1.0319197177886963,grad_norm: 0.9999993381270682, iteration: 266625
loss: 0.9930007457733154,grad_norm: 0.8073687641686109, iteration: 266626
loss: 1.0153601169586182,grad_norm: 0.7241986664202856, iteration: 266627
loss: 0.9747630953788757,grad_norm: 0.8781047001821739, iteration: 266628
loss: 1.030981183052063,grad_norm: 0.9708256016506075, iteration: 266629
loss: 0.9838650226593018,grad_norm: 0.9999990772183247, iteration: 266630
loss: 1.0082989931106567,grad_norm: 0.9999991449547766, iteration: 266631
loss: 1.0049799680709839,grad_norm: 0.841034033166349, iteration: 266632
loss: 1.0061990022659302,grad_norm: 0.8998261659837155, iteration: 266633
loss: 1.002146601676941,grad_norm: 0.9999991832475306, iteration: 266634
loss: 0.9832409620285034,grad_norm: 0.7752065933682131, iteration: 266635
loss: 0.9931997060775757,grad_norm: 0.8885302616489278, iteration: 266636
loss: 1.0521056652069092,grad_norm: 0.9999998267064093, iteration: 266637
loss: 1.0279213190078735,grad_norm: 0.9287362761726666, iteration: 266638
loss: 1.0049338340759277,grad_norm: 0.799565285264091, iteration: 266639
loss: 1.0041121244430542,grad_norm: 0.7940074273839635, iteration: 266640
loss: 1.0064711570739746,grad_norm: 0.8036902434106105, iteration: 266641
loss: 0.9922114014625549,grad_norm: 0.9999991776190025, iteration: 266642
loss: 1.0491304397583008,grad_norm: 0.9999994807816874, iteration: 266643
loss: 1.1018939018249512,grad_norm: 0.9999996264838207, iteration: 266644
loss: 1.0201947689056396,grad_norm: 0.999999213573754, iteration: 266645
loss: 0.9819109439849854,grad_norm: 0.9643961227691348, iteration: 266646
loss: 0.9987049102783203,grad_norm: 0.8376686258137086, iteration: 266647
loss: 0.9262955784797668,grad_norm: 0.9999992647337685, iteration: 266648
loss: 0.9981715679168701,grad_norm: 0.9183685388081491, iteration: 266649
loss: 1.0381065607070923,grad_norm: 0.9303141657012992, iteration: 266650
loss: 1.0126667022705078,grad_norm: 0.9999994547834568, iteration: 266651
loss: 0.9984006881713867,grad_norm: 0.836824882781566, iteration: 266652
loss: 1.0040143728256226,grad_norm: 0.8979102888767311, iteration: 266653
loss: 0.987301766872406,grad_norm: 0.8095399381202946, iteration: 266654
loss: 0.9957384467124939,grad_norm: 0.9999999776250144, iteration: 266655
loss: 1.009352684020996,grad_norm: 0.8547456294789835, iteration: 266656
loss: 0.9801015853881836,grad_norm: 0.9731656098954398, iteration: 266657
loss: 0.9591895937919617,grad_norm: 0.8673654635736505, iteration: 266658
loss: 1.1505498886108398,grad_norm: 0.999999205801226, iteration: 266659
loss: 0.989842414855957,grad_norm: 0.9999998235574628, iteration: 266660
loss: 0.9731632471084595,grad_norm: 0.9300218620783602, iteration: 266661
loss: 1.0082957744598389,grad_norm: 0.804202595056774, iteration: 266662
loss: 1.053138017654419,grad_norm: 0.9999992294487093, iteration: 266663
loss: 0.998290479183197,grad_norm: 0.8033886562471773, iteration: 266664
loss: 0.9941004514694214,grad_norm: 0.9952471766130363, iteration: 266665
loss: 0.9739629626274109,grad_norm: 0.8455292166729661, iteration: 266666
loss: 0.9967482686042786,grad_norm: 0.72842178279121, iteration: 266667
loss: 1.0126979351043701,grad_norm: 0.9573511063501636, iteration: 266668
loss: 1.017250895500183,grad_norm: 0.9999991107673459, iteration: 266669
loss: 0.9975539445877075,grad_norm: 0.8358376539959836, iteration: 266670
loss: 1.060133934020996,grad_norm: 0.9999990350305363, iteration: 266671
loss: 1.0062345266342163,grad_norm: 0.9395625556325009, iteration: 266672
loss: 1.016813039779663,grad_norm: 0.8765488203493287, iteration: 266673
loss: 1.0423526763916016,grad_norm: 0.8478533731405719, iteration: 266674
loss: 1.0480250120162964,grad_norm: 0.999999745876095, iteration: 266675
loss: 1.0757286548614502,grad_norm: 0.9999997268844785, iteration: 266676
loss: 0.9802358150482178,grad_norm: 0.8415470115486371, iteration: 266677
loss: 1.0436122417449951,grad_norm: 0.9999999468830532, iteration: 266678
loss: 0.9884868860244751,grad_norm: 0.9999991498293579, iteration: 266679
loss: 1.014305591583252,grad_norm: 0.9999995273456296, iteration: 266680
loss: 0.9948641061782837,grad_norm: 0.8808077124784721, iteration: 266681
loss: 0.9990905523300171,grad_norm: 0.7688857572434866, iteration: 266682
loss: 1.0055668354034424,grad_norm: 0.8802005243271284, iteration: 266683
loss: 0.9799601435661316,grad_norm: 0.8475306764709429, iteration: 266684
loss: 1.0241869688034058,grad_norm: 0.8998688864564467, iteration: 266685
loss: 0.9981536865234375,grad_norm: 0.9734309564787492, iteration: 266686
loss: 1.0096685886383057,grad_norm: 0.7662824455506919, iteration: 266687
loss: 1.0037410259246826,grad_norm: 0.8468674468580001, iteration: 266688
loss: 0.993644654750824,grad_norm: 0.8317761824384778, iteration: 266689
loss: 0.992540180683136,grad_norm: 0.853332523810489, iteration: 266690
loss: 0.9850196838378906,grad_norm: 0.9940155306799181, iteration: 266691
loss: 1.0195976495742798,grad_norm: 0.9723861566774993, iteration: 266692
loss: 0.972751796245575,grad_norm: 0.9999992175645557, iteration: 266693
loss: 1.0266026258468628,grad_norm: 0.9522827145466259, iteration: 266694
loss: 0.9789040684700012,grad_norm: 0.9347394620191737, iteration: 266695
loss: 0.9811853170394897,grad_norm: 0.8578690699988767, iteration: 266696
loss: 0.9952594041824341,grad_norm: 0.9999994431931208, iteration: 266697
loss: 0.9612388014793396,grad_norm: 0.9319783295389643, iteration: 266698
loss: 1.0228865146636963,grad_norm: 0.8843089379013244, iteration: 266699
loss: 0.9833267331123352,grad_norm: 0.7778067073984017, iteration: 266700
loss: 0.9912300109863281,grad_norm: 0.9999992738184347, iteration: 266701
loss: 0.9921299815177917,grad_norm: 0.9954892692584464, iteration: 266702
loss: 0.9910898804664612,grad_norm: 0.9750801363692478, iteration: 266703
loss: 0.9653164744377136,grad_norm: 0.8415052242565161, iteration: 266704
loss: 0.986874520778656,grad_norm: 0.7677961025744224, iteration: 266705
loss: 0.9668933153152466,grad_norm: 0.8990082387035553, iteration: 266706
loss: 0.9897740483283997,grad_norm: 0.7600141831167876, iteration: 266707
loss: 0.9858623743057251,grad_norm: 0.978575137557202, iteration: 266708
loss: 0.9597426652908325,grad_norm: 0.9200463212631421, iteration: 266709
loss: 0.9839001297950745,grad_norm: 0.7938306574147336, iteration: 266710
loss: 1.0202391147613525,grad_norm: 0.6923194454535733, iteration: 266711
loss: 0.9874807596206665,grad_norm: 0.9999990414652598, iteration: 266712
loss: 0.9785540699958801,grad_norm: 0.9050833174693699, iteration: 266713
loss: 0.9783250689506531,grad_norm: 0.8722073258679808, iteration: 266714
loss: 1.0150288343429565,grad_norm: 0.7395134325912724, iteration: 266715
loss: 1.0559382438659668,grad_norm: 0.8793791912216802, iteration: 266716
loss: 1.0273810625076294,grad_norm: 0.9999990379201249, iteration: 266717
loss: 1.0039340257644653,grad_norm: 0.911274261908153, iteration: 266718
loss: 0.9642627239227295,grad_norm: 0.8766567720928723, iteration: 266719
loss: 0.9786449074745178,grad_norm: 0.8215685395185852, iteration: 266720
loss: 1.0152971744537354,grad_norm: 0.8692815192341482, iteration: 266721
loss: 1.0059605836868286,grad_norm: 0.8748724403778056, iteration: 266722
loss: 0.9862553477287292,grad_norm: 0.999999548842635, iteration: 266723
loss: 0.9912614226341248,grad_norm: 0.8841635991116485, iteration: 266724
loss: 1.0026686191558838,grad_norm: 0.8684016435316071, iteration: 266725
loss: 0.9667783975601196,grad_norm: 0.8594577019724249, iteration: 266726
loss: 0.9604457020759583,grad_norm: 0.8726572792946069, iteration: 266727
loss: 1.0273797512054443,grad_norm: 0.8419899417749749, iteration: 266728
loss: 0.9794550538063049,grad_norm: 0.8176927359082421, iteration: 266729
loss: 1.01564359664917,grad_norm: 0.8842188033094408, iteration: 266730
loss: 0.9672749042510986,grad_norm: 0.9503919649696564, iteration: 266731
loss: 0.9864064455032349,grad_norm: 0.8846130003664912, iteration: 266732
loss: 0.9662964940071106,grad_norm: 0.8318820601602749, iteration: 266733
loss: 0.9844779968261719,grad_norm: 0.7523896013379625, iteration: 266734
loss: 0.9928011894226074,grad_norm: 0.8008000295875413, iteration: 266735
loss: 0.9570012092590332,grad_norm: 0.7306247526243207, iteration: 266736
loss: 1.0044561624526978,grad_norm: 0.9999989628391178, iteration: 266737
loss: 1.002607822418213,grad_norm: 0.9999996268391702, iteration: 266738
loss: 1.0080935955047607,grad_norm: 0.7363435211162642, iteration: 266739
loss: 0.982616662979126,grad_norm: 0.8553400419500272, iteration: 266740
loss: 1.013764500617981,grad_norm: 0.9207709272455777, iteration: 266741
loss: 0.9880032539367676,grad_norm: 0.832180742387783, iteration: 266742
loss: 1.019260048866272,grad_norm: 0.9999992781204453, iteration: 266743
loss: 1.0040507316589355,grad_norm: 0.9999991967008383, iteration: 266744
loss: 0.9603772163391113,grad_norm: 0.8830400609198618, iteration: 266745
loss: 1.005434274673462,grad_norm: 0.923162178869762, iteration: 266746
loss: 1.0172889232635498,grad_norm: 0.8409923901121032, iteration: 266747
loss: 0.9537770748138428,grad_norm: 0.9999989081436222, iteration: 266748
loss: 1.0033844709396362,grad_norm: 0.8946161259659662, iteration: 266749
loss: 0.9804435968399048,grad_norm: 0.988101165114482, iteration: 266750
loss: 1.0334686040878296,grad_norm: 0.8829827926771512, iteration: 266751
loss: 0.9845298528671265,grad_norm: 0.8531271362823006, iteration: 266752
loss: 1.0120863914489746,grad_norm: 0.9999994276106032, iteration: 266753
loss: 0.9893891215324402,grad_norm: 0.9999991684857992, iteration: 266754
loss: 0.9935881495475769,grad_norm: 0.9080622183545723, iteration: 266755
loss: 0.9672513008117676,grad_norm: 0.8912651730432397, iteration: 266756
loss: 0.9934644103050232,grad_norm: 0.6808888830689196, iteration: 266757
loss: 1.0030289888381958,grad_norm: 0.8251701032215807, iteration: 266758
loss: 0.9795911312103271,grad_norm: 0.8676612981625662, iteration: 266759
loss: 1.020135521888733,grad_norm: 0.9798069950374643, iteration: 266760
loss: 1.024673581123352,grad_norm: 0.9999991293402463, iteration: 266761
loss: 0.9990907311439514,grad_norm: 0.9999991992559835, iteration: 266762
loss: 1.031766414642334,grad_norm: 0.9996742600060922, iteration: 266763
loss: 0.9768807888031006,grad_norm: 0.8394824828266966, iteration: 266764
loss: 1.05745530128479,grad_norm: 0.9999989559821069, iteration: 266765
loss: 1.0085073709487915,grad_norm: 0.9988826798690127, iteration: 266766
loss: 1.014327049255371,grad_norm: 0.8593632405205679, iteration: 266767
loss: 0.9894967079162598,grad_norm: 0.9999992142083224, iteration: 266768
loss: 0.9643019437789917,grad_norm: 0.8846835117107316, iteration: 266769
loss: 0.981706976890564,grad_norm: 0.8698036233231676, iteration: 266770
loss: 0.9983323216438293,grad_norm: 0.8648055499762856, iteration: 266771
loss: 1.1181763410568237,grad_norm: 0.884844759032711, iteration: 266772
loss: 0.9788579344749451,grad_norm: 0.999999345704001, iteration: 266773
loss: 1.0133343935012817,grad_norm: 0.9999990554636768, iteration: 266774
loss: 1.0416396856307983,grad_norm: 0.9999992556018993, iteration: 266775
loss: 0.9932923913002014,grad_norm: 0.9312368570662076, iteration: 266776
loss: 1.0295164585113525,grad_norm: 0.9999999301977014, iteration: 266777
loss: 0.9744568467140198,grad_norm: 0.9331289897781662, iteration: 266778
loss: 1.0075665712356567,grad_norm: 0.9697473512529905, iteration: 266779
loss: 1.0204904079437256,grad_norm: 0.9034443462286266, iteration: 266780
loss: 1.004245638847351,grad_norm: 0.6946114294175635, iteration: 266781
loss: 1.0268573760986328,grad_norm: 0.9999992031974831, iteration: 266782
loss: 0.9383647441864014,grad_norm: 0.9999990690538835, iteration: 266783
loss: 0.9870773553848267,grad_norm: 0.7253500272232593, iteration: 266784
loss: 1.014169692993164,grad_norm: 0.7688006242295136, iteration: 266785
loss: 1.029059648513794,grad_norm: 0.8987426152166106, iteration: 266786
loss: 1.0061752796173096,grad_norm: 0.8967142746981434, iteration: 266787
loss: 1.0189770460128784,grad_norm: 0.9659339024272863, iteration: 266788
loss: 0.9939696192741394,grad_norm: 0.9303980947826767, iteration: 266789
loss: 1.0056480169296265,grad_norm: 0.9375440788430558, iteration: 266790
loss: 0.9991595149040222,grad_norm: 0.7481883882004681, iteration: 266791
loss: 0.9746570587158203,grad_norm: 0.8718696579150482, iteration: 266792
loss: 1.043468713760376,grad_norm: 0.8228347213433134, iteration: 266793
loss: 1.0477887392044067,grad_norm: 0.9520080549435279, iteration: 266794
loss: 1.0154298543930054,grad_norm: 0.7729083022090838, iteration: 266795
loss: 0.9950457215309143,grad_norm: 0.7979068523900118, iteration: 266796
loss: 0.9925159811973572,grad_norm: 0.9999989952586343, iteration: 266797
loss: 0.9734771847724915,grad_norm: 0.7652123467142645, iteration: 266798
loss: 0.9843921661376953,grad_norm: 0.8026848112586968, iteration: 266799
loss: 0.9897321462631226,grad_norm: 0.9543914986409178, iteration: 266800
loss: 1.0030021667480469,grad_norm: 0.9999995567708322, iteration: 266801
loss: 0.987986147403717,grad_norm: 0.8173912398047991, iteration: 266802
loss: 0.9671633839607239,grad_norm: 0.8976851570006628, iteration: 266803
loss: 0.9768359065055847,grad_norm: 0.9999996055507315, iteration: 266804
loss: 0.9891768097877502,grad_norm: 0.7777501173841531, iteration: 266805
loss: 1.0049210786819458,grad_norm: 0.8966690892163789, iteration: 266806
loss: 1.0127044916152954,grad_norm: 0.9428479753637199, iteration: 266807
loss: 0.9992826581001282,grad_norm: 0.9989857000179614, iteration: 266808
loss: 0.986956775188446,grad_norm: 0.8383987431326133, iteration: 266809
loss: 1.0425448417663574,grad_norm: 0.9748685194756684, iteration: 266810
loss: 1.026540756225586,grad_norm: 0.8334273804247365, iteration: 266811
loss: 0.9938730001449585,grad_norm: 0.8836420854544232, iteration: 266812
loss: 1.0152268409729004,grad_norm: 0.7988049929955625, iteration: 266813
loss: 1.0291662216186523,grad_norm: 0.9999993886581073, iteration: 266814
loss: 0.951201856136322,grad_norm: 0.7949341182074159, iteration: 266815
loss: 1.0102708339691162,grad_norm: 0.9101957412405131, iteration: 266816
loss: 0.9678093791007996,grad_norm: 0.8260168304157102, iteration: 266817
loss: 0.9649166464805603,grad_norm: 0.8154818950338235, iteration: 266818
loss: 0.9860990643501282,grad_norm: 0.8557714723746332, iteration: 266819
loss: 0.962117612361908,grad_norm: 0.8623187056654001, iteration: 266820
loss: 0.9989270567893982,grad_norm: 0.9999991666164398, iteration: 266821
loss: 1.0432994365692139,grad_norm: 0.9708999094089849, iteration: 266822
loss: 1.0535129308700562,grad_norm: 0.9999995213663306, iteration: 266823
loss: 0.9875117540359497,grad_norm: 0.8584307944517677, iteration: 266824
loss: 0.9598524570465088,grad_norm: 0.9535540098687553, iteration: 266825
loss: 1.0176035165786743,grad_norm: 0.9999990771509506, iteration: 266826
loss: 1.0074400901794434,grad_norm: 0.8468936006479079, iteration: 266827
loss: 0.9958385229110718,grad_norm: 0.9999990494720679, iteration: 266828
loss: 1.0046098232269287,grad_norm: 0.9353879305878965, iteration: 266829
loss: 0.9749042987823486,grad_norm: 0.8809791326513361, iteration: 266830
loss: 0.9592691659927368,grad_norm: 0.9235251445178775, iteration: 266831
loss: 0.9713379740715027,grad_norm: 0.9999991186792748, iteration: 266832
loss: 1.0377329587936401,grad_norm: 0.8700754052892155, iteration: 266833
loss: 1.1631057262420654,grad_norm: 0.9999996434746876, iteration: 266834
loss: 0.9619404673576355,grad_norm: 0.9999990782968394, iteration: 266835
loss: 0.9626950621604919,grad_norm: 0.779416211024785, iteration: 266836
loss: 0.9999160170555115,grad_norm: 0.9999993988316541, iteration: 266837
loss: 0.9787856340408325,grad_norm: 0.9577720981892616, iteration: 266838
loss: 1.0073556900024414,grad_norm: 0.9999990318763752, iteration: 266839
loss: 0.9969968199729919,grad_norm: 0.9999998138637761, iteration: 266840
loss: 0.9949901103973389,grad_norm: 0.8421313320747614, iteration: 266841
loss: 0.9833827018737793,grad_norm: 0.999999148372326, iteration: 266842
loss: 1.0123240947723389,grad_norm: 0.8659276024446041, iteration: 266843
loss: 1.0693268775939941,grad_norm: 0.9999996460801965, iteration: 266844
loss: 1.0381475687026978,grad_norm: 0.845085085702898, iteration: 266845
loss: 1.0019103288650513,grad_norm: 0.8782572678686177, iteration: 266846
loss: 1.0095577239990234,grad_norm: 0.9301389990298826, iteration: 266847
loss: 0.9718747735023499,grad_norm: 0.9374595439765698, iteration: 266848
loss: 0.9963204860687256,grad_norm: 0.8477217581461771, iteration: 266849
loss: 0.9987530708312988,grad_norm: 0.9652280495200161, iteration: 266850
loss: 0.9736256003379822,grad_norm: 0.8723511401787925, iteration: 266851
loss: 0.9853442311286926,grad_norm: 0.7637104710783849, iteration: 266852
loss: 1.0087189674377441,grad_norm: 0.9760810810401269, iteration: 266853
loss: 1.015715479850769,grad_norm: 0.8264302949726923, iteration: 266854
loss: 0.9986841082572937,grad_norm: 0.8837446106975051, iteration: 266855
loss: 0.9950532913208008,grad_norm: 0.9696611046038512, iteration: 266856
loss: 1.010188102722168,grad_norm: 0.7191187784363955, iteration: 266857
loss: 1.129007339477539,grad_norm: 0.9999994590255938, iteration: 266858
loss: 0.9631698727607727,grad_norm: 0.9345955142788884, iteration: 266859
loss: 1.006996989250183,grad_norm: 0.8758472158596448, iteration: 266860
loss: 1.0145186185836792,grad_norm: 0.7323719361595754, iteration: 266861
loss: 0.9818065762519836,grad_norm: 0.9999990481042251, iteration: 266862
loss: 1.0281122922897339,grad_norm: 0.9391232101354438, iteration: 266863
loss: 0.9955925345420837,grad_norm: 0.9999993169618242, iteration: 266864
loss: 1.001967191696167,grad_norm: 0.8969117842045574, iteration: 266865
loss: 0.9953166842460632,grad_norm: 0.9999991182375655, iteration: 266866
loss: 1.0265264511108398,grad_norm: 0.883511206035907, iteration: 266867
loss: 1.0025312900543213,grad_norm: 0.8353732459257147, iteration: 266868
loss: 1.001894474029541,grad_norm: 0.8091005380981254, iteration: 266869
loss: 1.001673698425293,grad_norm: 0.7136886838788006, iteration: 266870
loss: 1.0243110656738281,grad_norm: 0.9869552688345402, iteration: 266871
loss: 0.9810706973075867,grad_norm: 0.804805943114976, iteration: 266872
loss: 1.074784755706787,grad_norm: 0.9999991012695976, iteration: 266873
loss: 1.1071958541870117,grad_norm: 0.9404383282134962, iteration: 266874
loss: 1.0454201698303223,grad_norm: 0.9999998428342354, iteration: 266875
loss: 0.988538920879364,grad_norm: 0.8429470130002005, iteration: 266876
loss: 1.0084254741668701,grad_norm: 0.9999991434165953, iteration: 266877
loss: 0.9959478378295898,grad_norm: 0.76383786561526, iteration: 266878
loss: 0.9593610763549805,grad_norm: 0.8212810238844512, iteration: 266879
loss: 1.0384290218353271,grad_norm: 0.9999996116904334, iteration: 266880
loss: 0.9623391032218933,grad_norm: 0.9090525442481004, iteration: 266881
loss: 1.0344821214675903,grad_norm: 0.9946080731352978, iteration: 266882
loss: 1.0035847425460815,grad_norm: 0.8009111667801219, iteration: 266883
loss: 0.9896920919418335,grad_norm: 0.9511234275469925, iteration: 266884
loss: 1.09182870388031,grad_norm: 0.9999997481443949, iteration: 266885
loss: 0.9930253624916077,grad_norm: 0.8410307439288108, iteration: 266886
loss: 1.0074447393417358,grad_norm: 0.8905749316475464, iteration: 266887
loss: 1.0101394653320312,grad_norm: 0.8127375690366917, iteration: 266888
loss: 1.01915442943573,grad_norm: 0.9999991208272988, iteration: 266889
loss: 0.9888182878494263,grad_norm: 0.7654842769849678, iteration: 266890
loss: 0.9600748419761658,grad_norm: 0.7380452348113442, iteration: 266891
loss: 1.0660090446472168,grad_norm: 0.9557983262733993, iteration: 266892
loss: 1.0282158851623535,grad_norm: 0.9999990151681246, iteration: 266893
loss: 0.9912332892417908,grad_norm: 0.9273776335833205, iteration: 266894
loss: 0.9988666772842407,grad_norm: 0.9999996456662925, iteration: 266895
loss: 1.0302772521972656,grad_norm: 0.8812670744265861, iteration: 266896
loss: 1.0013532638549805,grad_norm: 0.8593438180418211, iteration: 266897
loss: 1.0073323249816895,grad_norm: 0.7697785386753676, iteration: 266898
loss: 1.0189831256866455,grad_norm: 0.8487189762352941, iteration: 266899
loss: 0.9964792728424072,grad_norm: 0.9999990546613184, iteration: 266900
loss: 1.0885330438613892,grad_norm: 0.8715721180137922, iteration: 266901
loss: 1.0077582597732544,grad_norm: 0.9999990521932092, iteration: 266902
loss: 1.0252982378005981,grad_norm: 0.8358889144989512, iteration: 266903
loss: 0.9869904518127441,grad_norm: 0.9999992241436101, iteration: 266904
loss: 1.0071693658828735,grad_norm: 0.8393191019201811, iteration: 266905
loss: 1.0163973569869995,grad_norm: 0.906691835346771, iteration: 266906
loss: 0.9695882201194763,grad_norm: 0.8365728789547752, iteration: 266907
loss: 0.9572927355766296,grad_norm: 0.9999991833678279, iteration: 266908
loss: 0.9699848890304565,grad_norm: 0.8383037593188325, iteration: 266909
loss: 0.9587485790252686,grad_norm: 0.913429281033072, iteration: 266910
loss: 1.0780686140060425,grad_norm: 0.9999992517194372, iteration: 266911
loss: 1.0035076141357422,grad_norm: 0.7515692830735735, iteration: 266912
loss: 1.0747896432876587,grad_norm: 0.9999993157077722, iteration: 266913
loss: 0.9665610790252686,grad_norm: 0.9999990037837478, iteration: 266914
loss: 0.9828826189041138,grad_norm: 0.9059382408596898, iteration: 266915
loss: 1.0574703216552734,grad_norm: 0.99999943834599, iteration: 266916
loss: 1.060706615447998,grad_norm: 0.8433454207186903, iteration: 266917
loss: 1.0118292570114136,grad_norm: 0.7900011538168551, iteration: 266918
loss: 1.0409940481185913,grad_norm: 0.8995217626172269, iteration: 266919
loss: 1.0062153339385986,grad_norm: 0.8843735485154356, iteration: 266920
loss: 1.0445985794067383,grad_norm: 0.8726842109491643, iteration: 266921
loss: 0.9822188019752502,grad_norm: 0.7938544883140631, iteration: 266922
loss: 0.9719838500022888,grad_norm: 0.9999990854763922, iteration: 266923
loss: 0.9881007075309753,grad_norm: 0.9053912465647972, iteration: 266924
loss: 0.9551512002944946,grad_norm: 0.8805042378778717, iteration: 266925
loss: 0.9476726651191711,grad_norm: 0.8945095403899203, iteration: 266926
loss: 1.0235612392425537,grad_norm: 0.9999991421864038, iteration: 266927
loss: 1.0146135091781616,grad_norm: 0.9999992052668822, iteration: 266928
loss: 0.9905518293380737,grad_norm: 0.8888219808693794, iteration: 266929
loss: 1.0063908100128174,grad_norm: 0.9999993574035844, iteration: 266930
loss: 1.000596284866333,grad_norm: 0.8182203267072015, iteration: 266931
loss: 0.9862106442451477,grad_norm: 0.9612607047189722, iteration: 266932
loss: 0.9820109009742737,grad_norm: 0.7363645703143853, iteration: 266933
loss: 0.970044732093811,grad_norm: 0.7718605002806096, iteration: 266934
loss: 0.9650780558586121,grad_norm: 0.8543770836031643, iteration: 266935
loss: 0.993428647518158,grad_norm: 0.8175779875312862, iteration: 266936
loss: 1.0659477710723877,grad_norm: 0.8273483041487004, iteration: 266937
loss: 1.0105184316635132,grad_norm: 0.7919915788538661, iteration: 266938
loss: 1.0048362016677856,grad_norm: 0.7178311837152347, iteration: 266939
loss: 1.0747720003128052,grad_norm: 0.9999996106598941, iteration: 266940
loss: 1.0179483890533447,grad_norm: 0.9777931468811217, iteration: 266941
loss: 1.0086071491241455,grad_norm: 0.8967775486409062, iteration: 266942
loss: 0.9984979033470154,grad_norm: 0.8161673007367448, iteration: 266943
loss: 0.9595260620117188,grad_norm: 0.9999990762150024, iteration: 266944
loss: 0.9941848516464233,grad_norm: 0.7678511756948599, iteration: 266945
loss: 0.9963422417640686,grad_norm: 0.9237080562429627, iteration: 266946
loss: 1.031220555305481,grad_norm: 0.9999990052915667, iteration: 266947
loss: 0.955087423324585,grad_norm: 0.9094550818942618, iteration: 266948
loss: 0.9659861326217651,grad_norm: 0.9491200985607815, iteration: 266949
loss: 1.0011807680130005,grad_norm: 0.8347436680376855, iteration: 266950
loss: 1.0118850469589233,grad_norm: 0.9926476830860603, iteration: 266951
loss: 0.9992437958717346,grad_norm: 0.8602976479369323, iteration: 266952
loss: 1.0971342325210571,grad_norm: 0.9999991559398521, iteration: 266953
loss: 1.009238362312317,grad_norm: 0.8639091154445322, iteration: 266954
loss: 0.9735849499702454,grad_norm: 0.7774892283066408, iteration: 266955
loss: 0.9946334362030029,grad_norm: 0.9999989708496211, iteration: 266956
loss: 0.9810226559638977,grad_norm: 0.8202665140941969, iteration: 266957
loss: 0.9744147062301636,grad_norm: 0.8708021470900136, iteration: 266958
loss: 1.0264379978179932,grad_norm: 0.9999990719403277, iteration: 266959
loss: 1.0247364044189453,grad_norm: 0.9999994136684636, iteration: 266960
loss: 1.0456870794296265,grad_norm: 0.9999990832338743, iteration: 266961
loss: 0.974114179611206,grad_norm: 0.9999992103577853, iteration: 266962
loss: 1.0203570127487183,grad_norm: 0.9999991219376261, iteration: 266963
loss: 0.9858501553535461,grad_norm: 0.9774455894720965, iteration: 266964
loss: 0.9994150400161743,grad_norm: 0.9471947918012918, iteration: 266965
loss: 0.9922629594802856,grad_norm: 0.8749762237688645, iteration: 266966
loss: 0.9975854158401489,grad_norm: 0.9999990572425247, iteration: 266967
loss: 1.0274598598480225,grad_norm: 0.9223022120150602, iteration: 266968
loss: 0.9894960522651672,grad_norm: 0.8169152146928893, iteration: 266969
loss: 1.01738703250885,grad_norm: 0.7991167211588442, iteration: 266970
loss: 1.040584683418274,grad_norm: 0.9999991437738573, iteration: 266971
loss: 1.0151565074920654,grad_norm: 0.7982385250534523, iteration: 266972
loss: 0.9809871315956116,grad_norm: 0.8201010735678758, iteration: 266973
loss: 1.0567374229431152,grad_norm: 0.9999992454044806, iteration: 266974
loss: 1.0890886783599854,grad_norm: 0.9999992597561406, iteration: 266975
loss: 0.9726850986480713,grad_norm: 0.8936445680518592, iteration: 266976
loss: 1.016570806503296,grad_norm: 0.8478198870706669, iteration: 266977
loss: 1.0330270528793335,grad_norm: 0.9020426242794031, iteration: 266978
loss: 0.9970682263374329,grad_norm: 0.998837631873662, iteration: 266979
loss: 0.9767659306526184,grad_norm: 0.8317676602450947, iteration: 266980
loss: 0.9942794442176819,grad_norm: 0.9225082510304694, iteration: 266981
loss: 0.9769344925880432,grad_norm: 0.876832475202185, iteration: 266982
loss: 0.9950038194656372,grad_norm: 0.8287164897145679, iteration: 266983
loss: 0.9950074553489685,grad_norm: 0.754484591426012, iteration: 266984
loss: 1.0065886974334717,grad_norm: 0.8876565646878101, iteration: 266985
loss: 0.9971643090248108,grad_norm: 0.8704430075684748, iteration: 266986
loss: 0.9926131963729858,grad_norm: 0.9999992257105494, iteration: 266987
loss: 0.9559717178344727,grad_norm: 0.9398775114816359, iteration: 266988
loss: 1.0077295303344727,grad_norm: 0.8351972073171955, iteration: 266989
loss: 1.009085774421692,grad_norm: 0.8387339136816905, iteration: 266990
loss: 1.0435410737991333,grad_norm: 0.9999990852995261, iteration: 266991
loss: 0.9474074840545654,grad_norm: 0.8997227103509114, iteration: 266992
loss: 1.0462455749511719,grad_norm: 0.9163659883027012, iteration: 266993
loss: 0.9823125004768372,grad_norm: 0.9028783167293023, iteration: 266994
loss: 1.039304256439209,grad_norm: 0.8530517691694615, iteration: 266995
loss: 1.082983136177063,grad_norm: 0.9996091046888163, iteration: 266996
loss: 0.9971957802772522,grad_norm: 0.9999992607212671, iteration: 266997
loss: 0.9849476218223572,grad_norm: 0.9999992158763708, iteration: 266998
loss: 0.94806307554245,grad_norm: 0.7717904612451653, iteration: 266999
loss: 0.9796796441078186,grad_norm: 0.7042507827548312, iteration: 267000
loss: 1.017938256263733,grad_norm: 0.9999990799025712, iteration: 267001
loss: 0.9866117835044861,grad_norm: 0.9999989888825456, iteration: 267002
loss: 1.031455397605896,grad_norm: 0.86222909247991, iteration: 267003
loss: 0.971415638923645,grad_norm: 0.8452055212234769, iteration: 267004
loss: 0.991929829120636,grad_norm: 0.801440913390745, iteration: 267005
loss: 0.9808334112167358,grad_norm: 0.999999078139423, iteration: 267006
loss: 0.9715521931648254,grad_norm: 0.8734601499001701, iteration: 267007
loss: 0.9774149060249329,grad_norm: 0.77515464208554, iteration: 267008
loss: 0.9895191192626953,grad_norm: 0.9009849462810373, iteration: 267009
loss: 0.9734011292457581,grad_norm: 0.9351982491697188, iteration: 267010
loss: 1.000345230102539,grad_norm: 0.8963995235995836, iteration: 267011
loss: 0.9832423329353333,grad_norm: 0.7971364844001613, iteration: 267012
loss: 1.0035227537155151,grad_norm: 0.9999752316030113, iteration: 267013
loss: 1.0297795534133911,grad_norm: 0.9999995851716194, iteration: 267014
loss: 1.0317693948745728,grad_norm: 0.748520388707536, iteration: 267015
loss: 1.0259805917739868,grad_norm: 0.8006976888656743, iteration: 267016
loss: 0.9763685464859009,grad_norm: 0.8121695062975989, iteration: 267017
loss: 0.9758177399635315,grad_norm: 0.8072212653835567, iteration: 267018
loss: 0.9911049008369446,grad_norm: 0.8459976859487804, iteration: 267019
loss: 1.0363885164260864,grad_norm: 0.9999998732398846, iteration: 267020
loss: 1.012337565422058,grad_norm: 0.7775030165481527, iteration: 267021
loss: 1.0603998899459839,grad_norm: 0.8868102548110323, iteration: 267022
loss: 1.008716106414795,grad_norm: 0.9584016105715051, iteration: 267023
loss: 1.0070761442184448,grad_norm: 0.9999991952003762, iteration: 267024
loss: 0.9994809627532959,grad_norm: 0.777826350019962, iteration: 267025
loss: 1.0128570795059204,grad_norm: 0.9999990923046852, iteration: 267026
loss: 0.9913142919540405,grad_norm: 0.8968733722468402, iteration: 267027
loss: 0.975184440612793,grad_norm: 0.7691918511662199, iteration: 267028
loss: 0.9866818785667419,grad_norm: 0.9999992020468694, iteration: 267029
loss: 0.9644299745559692,grad_norm: 0.8479878662303573, iteration: 267030
loss: 1.0599716901779175,grad_norm: 0.999999800060372, iteration: 267031
loss: 0.9590075016021729,grad_norm: 0.7956374377569267, iteration: 267032
loss: 0.9733620285987854,grad_norm: 0.8516590719086329, iteration: 267033
loss: 1.024977207183838,grad_norm: 0.9962564543204098, iteration: 267034
loss: 1.0078188180923462,grad_norm: 0.8519684045955114, iteration: 267035
loss: 0.957899272441864,grad_norm: 0.7925895250401171, iteration: 267036
loss: 0.9577474594116211,grad_norm: 0.9009117806275051, iteration: 267037
loss: 0.9931665658950806,grad_norm: 0.7745150552429055, iteration: 267038
loss: 1.0023045539855957,grad_norm: 0.9632324788239915, iteration: 267039
loss: 1.0333940982818604,grad_norm: 0.9999991362210543, iteration: 267040
loss: 0.981010913848877,grad_norm: 0.8252859649840126, iteration: 267041
loss: 1.022465467453003,grad_norm: 0.9999989781458912, iteration: 267042
loss: 0.963640570640564,grad_norm: 0.9375779498743767, iteration: 267043
loss: 1.017951250076294,grad_norm: 0.8275975796705187, iteration: 267044
loss: 0.9866957664489746,grad_norm: 0.7458092617147136, iteration: 267045
loss: 0.9941760897636414,grad_norm: 0.8173454228752813, iteration: 267046
loss: 0.9471062421798706,grad_norm: 0.9999990455416281, iteration: 267047
loss: 1.1626999378204346,grad_norm: 0.9999998976926273, iteration: 267048
loss: 1.0830212831497192,grad_norm: 0.8242146689791812, iteration: 267049
loss: 0.9928866028785706,grad_norm: 0.8081311893495375, iteration: 267050
loss: 0.9980295300483704,grad_norm: 0.9337538481823916, iteration: 267051
loss: 1.0146589279174805,grad_norm: 0.914276656997947, iteration: 267052
loss: 0.9972996115684509,grad_norm: 0.997530388840839, iteration: 267053
loss: 1.0244789123535156,grad_norm: 0.9507543344386497, iteration: 267054
loss: 1.0047118663787842,grad_norm: 0.8249438349915781, iteration: 267055
loss: 0.9852398037910461,grad_norm: 0.8537738933970979, iteration: 267056
loss: 1.0330252647399902,grad_norm: 0.9439035280784323, iteration: 267057
loss: 0.9885125756263733,grad_norm: 0.9999990345783218, iteration: 267058
loss: 0.9631893038749695,grad_norm: 0.9999991087498938, iteration: 267059
loss: 1.098262906074524,grad_norm: 0.9999990901448637, iteration: 267060
loss: 1.0187379121780396,grad_norm: 0.8472729954027781, iteration: 267061
loss: 1.003495454788208,grad_norm: 0.9662002703972261, iteration: 267062
loss: 0.9708512425422668,grad_norm: 0.9999993365283119, iteration: 267063
loss: 1.009230375289917,grad_norm: 0.9034857822149838, iteration: 267064
loss: 1.0006273984909058,grad_norm: 0.9146734314224684, iteration: 267065
loss: 0.9910024404525757,grad_norm: 0.7911179809698281, iteration: 267066
loss: 1.0356119871139526,grad_norm: 0.9942803494767221, iteration: 267067
loss: 1.0366030931472778,grad_norm: 0.9999995264111188, iteration: 267068
loss: 1.0548807382583618,grad_norm: 0.9999992050745171, iteration: 267069
loss: 1.0238951444625854,grad_norm: 0.9589900451611114, iteration: 267070
loss: 1.0726338624954224,grad_norm: 0.9999999021038078, iteration: 267071
loss: 0.9807281494140625,grad_norm: 0.9999992812813864, iteration: 267072
loss: 1.039018988609314,grad_norm: 0.8500476360249547, iteration: 267073
loss: 0.9968187808990479,grad_norm: 0.7885471582311923, iteration: 267074
loss: 0.9993972182273865,grad_norm: 0.8597465297221423, iteration: 267075
loss: 0.9788352847099304,grad_norm: 0.8498785789390333, iteration: 267076
loss: 0.974985659122467,grad_norm: 0.7925657606245815, iteration: 267077
loss: 1.0198113918304443,grad_norm: 0.9999996512017185, iteration: 267078
loss: 1.013075828552246,grad_norm: 0.9999997727340799, iteration: 267079
loss: 0.9994275569915771,grad_norm: 0.9999990289422424, iteration: 267080
loss: 1.0086578130722046,grad_norm: 0.7855288525834029, iteration: 267081
loss: 1.01008939743042,grad_norm: 0.8579686053299754, iteration: 267082
loss: 1.0391356945037842,grad_norm: 0.991014873581453, iteration: 267083
loss: 0.9532788991928101,grad_norm: 0.8702050976665728, iteration: 267084
loss: 0.9933520555496216,grad_norm: 0.8531027264687059, iteration: 267085
loss: 1.0083391666412354,grad_norm: 0.8162337124369109, iteration: 267086
loss: 0.9793571829795837,grad_norm: 0.9069098973576082, iteration: 267087
loss: 1.0056869983673096,grad_norm: 0.749419103602746, iteration: 267088
loss: 1.0546073913574219,grad_norm: 0.9999991540891925, iteration: 267089
loss: 1.0067625045776367,grad_norm: 0.9999991853065897, iteration: 267090
loss: 0.9985087513923645,grad_norm: 0.9658500940088286, iteration: 267091
loss: 0.9892497658729553,grad_norm: 0.7117642349626819, iteration: 267092
loss: 1.0038779973983765,grad_norm: 0.8283880422755214, iteration: 267093
loss: 1.0059062242507935,grad_norm: 0.9829053608546857, iteration: 267094
loss: 1.004212737083435,grad_norm: 0.8940526859132694, iteration: 267095
loss: 0.9788040518760681,grad_norm: 0.9137670389035057, iteration: 267096
loss: 1.055094599723816,grad_norm: 0.9377856644699614, iteration: 267097
loss: 1.0159673690795898,grad_norm: 0.9999993884386599, iteration: 267098
loss: 0.9850984215736389,grad_norm: 0.89910111428949, iteration: 267099
loss: 1.0325654745101929,grad_norm: 0.9999990655151808, iteration: 267100
loss: 1.0202254056930542,grad_norm: 0.9999991955850679, iteration: 267101
loss: 0.9976177215576172,grad_norm: 0.8642175582063893, iteration: 267102
loss: 0.978027880191803,grad_norm: 0.8954805743870807, iteration: 267103
loss: 0.9989591836929321,grad_norm: 0.8055233715731341, iteration: 267104
loss: 1.1160686016082764,grad_norm: 0.9999997346920888, iteration: 267105
loss: 1.0057412385940552,grad_norm: 0.9999992720912719, iteration: 267106
loss: 0.998751699924469,grad_norm: 0.6851196788074924, iteration: 267107
loss: 0.9569767117500305,grad_norm: 0.9259777028021264, iteration: 267108
loss: 1.017249584197998,grad_norm: 0.843622778376492, iteration: 267109
loss: 0.9881216883659363,grad_norm: 0.9969312092884753, iteration: 267110
loss: 0.9682555794715881,grad_norm: 0.9999991597247941, iteration: 267111
loss: 0.9835492372512817,grad_norm: 0.8236269081011952, iteration: 267112
loss: 1.0246342420578003,grad_norm: 0.9999989445959271, iteration: 267113
loss: 1.0480802059173584,grad_norm: 0.9158494966333864, iteration: 267114
loss: 1.0001641511917114,grad_norm: 0.871900035274267, iteration: 267115
loss: 1.1423841714859009,grad_norm: 0.99999915973367, iteration: 267116
loss: 1.0283257961273193,grad_norm: 0.8468482892235305, iteration: 267117
loss: 0.9966727495193481,grad_norm: 0.8369545349717027, iteration: 267118
loss: 0.9947132468223572,grad_norm: 0.8366232851345274, iteration: 267119
loss: 1.0993824005126953,grad_norm: 0.8906779727792267, iteration: 267120
loss: 0.9990331530570984,grad_norm: 0.9760003487503564, iteration: 267121
loss: 1.053938865661621,grad_norm: 0.889836863073683, iteration: 267122
loss: 1.0753374099731445,grad_norm: 0.8730583654206829, iteration: 267123
loss: 0.9800817370414734,grad_norm: 0.9175855213178172, iteration: 267124
loss: 1.0702424049377441,grad_norm: 0.9189719163938672, iteration: 267125
loss: 1.0739741325378418,grad_norm: 0.8842651823946691, iteration: 267126
loss: 1.0607616901397705,grad_norm: 0.9772650512354168, iteration: 267127
loss: 1.0300637483596802,grad_norm: 0.9886401042937957, iteration: 267128
loss: 1.0840510129928589,grad_norm: 0.850938024540958, iteration: 267129
loss: 1.108009696006775,grad_norm: 0.9999999250391394, iteration: 267130
loss: 1.0085046291351318,grad_norm: 0.7411603178099363, iteration: 267131
loss: 1.0389657020568848,grad_norm: 0.8456147670208546, iteration: 267132
loss: 0.9770268201828003,grad_norm: 0.9999993864648464, iteration: 267133
loss: 1.0338248014450073,grad_norm: 0.8153072981337811, iteration: 267134
loss: 1.0999515056610107,grad_norm: 0.888965344519768, iteration: 267135
loss: 1.032762885093689,grad_norm: 0.8027381852229876, iteration: 267136
loss: 0.995880126953125,grad_norm: 0.7257979429300185, iteration: 267137
loss: 0.9970726370811462,grad_norm: 0.9261545319005301, iteration: 267138
loss: 0.992027997970581,grad_norm: 0.7162569312083378, iteration: 267139
loss: 0.9964588284492493,grad_norm: 0.8059056126641033, iteration: 267140
loss: 1.0336494445800781,grad_norm: 0.7507814741443377, iteration: 267141
loss: 0.9929341077804565,grad_norm: 0.8924761754151327, iteration: 267142
loss: 0.9870855808258057,grad_norm: 0.8060811249019534, iteration: 267143
loss: 1.0145386457443237,grad_norm: 0.8777679238440871, iteration: 267144
loss: 1.1354621648788452,grad_norm: 0.9999997583858435, iteration: 267145
loss: 1.0070401430130005,grad_norm: 0.9049910321275754, iteration: 267146
loss: 1.000736117362976,grad_norm: 0.7697061835419898, iteration: 267147
loss: 0.9864369034767151,grad_norm: 0.8620382145402878, iteration: 267148
loss: 1.0300570726394653,grad_norm: 0.8427215013961569, iteration: 267149
loss: 0.9941117167472839,grad_norm: 0.9435926708286319, iteration: 267150
loss: 1.0196298360824585,grad_norm: 0.9004032373478897, iteration: 267151
loss: 0.9429871439933777,grad_norm: 0.9999992325033722, iteration: 267152
loss: 0.9888879060745239,grad_norm: 0.880179938629343, iteration: 267153
loss: 1.0194278955459595,grad_norm: 0.8103374457308865, iteration: 267154
loss: 1.002504825592041,grad_norm: 0.954259142040212, iteration: 267155
loss: 0.9894802570343018,grad_norm: 0.8007444147752314, iteration: 267156
loss: 1.0830795764923096,grad_norm: 0.8882958386657898, iteration: 267157
loss: 0.9764907956123352,grad_norm: 0.830247170749369, iteration: 267158
loss: 0.9982430934906006,grad_norm: 0.99999906427542, iteration: 267159
loss: 1.0057767629623413,grad_norm: 0.8119676691885591, iteration: 267160
loss: 0.9995769262313843,grad_norm: 0.99999902593905, iteration: 267161
loss: 1.0310295820236206,grad_norm: 0.999999712022479, iteration: 267162
loss: 0.9991064071655273,grad_norm: 0.9325863302866767, iteration: 267163
loss: 0.9889559149742126,grad_norm: 0.8124739623134282, iteration: 267164
loss: 0.9820343255996704,grad_norm: 0.9427189502454023, iteration: 267165
loss: 1.0171860456466675,grad_norm: 0.8716817771970441, iteration: 267166
loss: 1.0168341398239136,grad_norm: 0.8592173695048324, iteration: 267167
loss: 1.0251176357269287,grad_norm: 0.7711477885500325, iteration: 267168
loss: 0.9502083659172058,grad_norm: 0.8945337720404203, iteration: 267169
loss: 0.9867920279502869,grad_norm: 0.8638734595665286, iteration: 267170
loss: 1.0602716207504272,grad_norm: 0.999999030222097, iteration: 267171
loss: 1.0014785528182983,grad_norm: 0.8988039652053079, iteration: 267172
loss: 1.002151370048523,grad_norm: 0.9302596352918001, iteration: 267173
loss: 1.032766580581665,grad_norm: 0.8167007964468062, iteration: 267174
loss: 1.0060433149337769,grad_norm: 0.9999990259890419, iteration: 267175
loss: 1.0486092567443848,grad_norm: 0.999999421351924, iteration: 267176
loss: 0.948627233505249,grad_norm: 0.8576251985583347, iteration: 267177
loss: 1.0069513320922852,grad_norm: 0.9999991608646482, iteration: 267178
loss: 0.9950868487358093,grad_norm: 0.9999992193735592, iteration: 267179
loss: 1.0677663087844849,grad_norm: 0.9999992840655162, iteration: 267180
loss: 0.9979945421218872,grad_norm: 0.8578298128212091, iteration: 267181
loss: 1.006283164024353,grad_norm: 0.7324545023906684, iteration: 267182
loss: 0.9931879639625549,grad_norm: 0.9999990196121288, iteration: 267183
loss: 0.993106484413147,grad_norm: 0.9473645128024584, iteration: 267184
loss: 1.0129280090332031,grad_norm: 0.9480265718306291, iteration: 267185
loss: 1.0098997354507446,grad_norm: 0.7783440166686613, iteration: 267186
loss: 0.9697872996330261,grad_norm: 0.7727500492867819, iteration: 267187
loss: 0.995776355266571,grad_norm: 0.8092501924066496, iteration: 267188
loss: 1.00674307346344,grad_norm: 0.8773661540819441, iteration: 267189
loss: 1.0127949714660645,grad_norm: 0.9999994784792079, iteration: 267190
loss: 0.9945241212844849,grad_norm: 0.8748213951041406, iteration: 267191
loss: 1.0430034399032593,grad_norm: 0.9423793975511442, iteration: 267192
loss: 0.9951086044311523,grad_norm: 0.8928381080015952, iteration: 267193
loss: 0.9658744931221008,grad_norm: 0.8651707451520801, iteration: 267194
loss: 0.9699956774711609,grad_norm: 0.9428691006832679, iteration: 267195
loss: 0.9782518148422241,grad_norm: 0.9121282605551937, iteration: 267196
loss: 1.0152605772018433,grad_norm: 0.9999992254089932, iteration: 267197
loss: 1.003752589225769,grad_norm: 0.7138118523777384, iteration: 267198
loss: 0.9958986639976501,grad_norm: 0.9384259860216773, iteration: 267199
loss: 1.0365171432495117,grad_norm: 0.8402005595758946, iteration: 267200
loss: 0.9940690398216248,grad_norm: 0.915561647763905, iteration: 267201
loss: 1.0019044876098633,grad_norm: 0.9777833566154365, iteration: 267202
loss: 0.9761026501655579,grad_norm: 0.8100346651271804, iteration: 267203
loss: 1.0069161653518677,grad_norm: 0.9999992537395174, iteration: 267204
loss: 0.9915934205055237,grad_norm: 0.8479726335894477, iteration: 267205
loss: 0.9925721883773804,grad_norm: 0.804354884836116, iteration: 267206
loss: 1.0095229148864746,grad_norm: 0.8326291325745381, iteration: 267207
loss: 0.9808621406555176,grad_norm: 0.8320172803376673, iteration: 267208
loss: 1.0050048828125,grad_norm: 0.9127159446626687, iteration: 267209
loss: 1.0211516618728638,grad_norm: 0.9414046543314842, iteration: 267210
loss: 0.9859285950660706,grad_norm: 0.8352139548571558, iteration: 267211
loss: 0.9920017123222351,grad_norm: 0.9208135945841605, iteration: 267212
loss: 1.0241175889968872,grad_norm: 0.9999991861552223, iteration: 267213
loss: 1.0088764429092407,grad_norm: 0.9999990087034363, iteration: 267214
loss: 0.9966443181037903,grad_norm: 0.850770958580391, iteration: 267215
loss: 1.0089867115020752,grad_norm: 0.9228856706686805, iteration: 267216
loss: 1.0290887355804443,grad_norm: 0.8249772848162955, iteration: 267217
loss: 0.9747130870819092,grad_norm: 0.8185738806599099, iteration: 267218
loss: 1.001761555671692,grad_norm: 0.8389617412004963, iteration: 267219
loss: 1.0232590436935425,grad_norm: 0.999999466152998, iteration: 267220
loss: 1.0128449201583862,grad_norm: 0.7773740816715898, iteration: 267221
loss: 1.0404983758926392,grad_norm: 0.9999997678114855, iteration: 267222
loss: 0.9817235469818115,grad_norm: 0.8681315744791083, iteration: 267223
loss: 0.9919257760047913,grad_norm: 0.9856848014911872, iteration: 267224
loss: 1.030969500541687,grad_norm: 0.8582967843350382, iteration: 267225
loss: 1.001763939857483,grad_norm: 0.9232526954193468, iteration: 267226
loss: 0.9958588480949402,grad_norm: 0.8162642203590168, iteration: 267227
loss: 1.0219378471374512,grad_norm: 0.9063194635106736, iteration: 267228
loss: 1.0348882675170898,grad_norm: 0.8924904560668002, iteration: 267229
loss: 1.065058946609497,grad_norm: 0.9999991894424866, iteration: 267230
loss: 1.0515162944793701,grad_norm: 0.8230393573372261, iteration: 267231
loss: 1.0007942914962769,grad_norm: 0.883917593866132, iteration: 267232
loss: 0.9836376309394836,grad_norm: 0.7737026281908675, iteration: 267233
loss: 1.0212010145187378,grad_norm: 0.9999995476437009, iteration: 267234
loss: 0.9541105031967163,grad_norm: 0.7818583787656471, iteration: 267235
loss: 0.9738479256629944,grad_norm: 0.8439125301654078, iteration: 267236
loss: 0.965509831905365,grad_norm: 0.8713734135445735, iteration: 267237
loss: 1.0006861686706543,grad_norm: 0.9999991013105624, iteration: 267238
loss: 1.007381796836853,grad_norm: 0.9349820356921393, iteration: 267239
loss: 0.9923819303512573,grad_norm: 0.9999990569534222, iteration: 267240
loss: 1.0079156160354614,grad_norm: 0.7467647180285026, iteration: 267241
loss: 1.003966212272644,grad_norm: 0.9079601048609739, iteration: 267242
loss: 0.9634345173835754,grad_norm: 0.7993605317712499, iteration: 267243
loss: 1.0133557319641113,grad_norm: 0.8785304052883891, iteration: 267244
loss: 0.9826144576072693,grad_norm: 0.8167377030926934, iteration: 267245
loss: 0.9938861727714539,grad_norm: 0.9999991246538824, iteration: 267246
loss: 0.9852132201194763,grad_norm: 0.726481655114638, iteration: 267247
loss: 1.0100351572036743,grad_norm: 0.8803172197279371, iteration: 267248
loss: 1.0196088552474976,grad_norm: 0.9999994609274038, iteration: 267249
loss: 0.977383553981781,grad_norm: 0.9127315031027408, iteration: 267250
loss: 0.9729354381561279,grad_norm: 0.9999990704149893, iteration: 267251
loss: 0.9802703857421875,grad_norm: 0.8820774981740541, iteration: 267252
loss: 1.0603694915771484,grad_norm: 0.929171294641819, iteration: 267253
loss: 0.9902383685112,grad_norm: 0.9999991720270096, iteration: 267254
loss: 1.1864286661148071,grad_norm: 0.9999994727529311, iteration: 267255
loss: 0.9900250434875488,grad_norm: 0.8982407791569972, iteration: 267256
loss: 1.0027227401733398,grad_norm: 0.829177905860904, iteration: 267257
loss: 0.9942321181297302,grad_norm: 0.9999992575929321, iteration: 267258
loss: 0.967628002166748,grad_norm: 0.9999991040580755, iteration: 267259
loss: 0.9732720255851746,grad_norm: 0.9645009986029566, iteration: 267260
loss: 0.9827483892440796,grad_norm: 0.7808869333280819, iteration: 267261
loss: 1.0082921981811523,grad_norm: 0.9999990994903677, iteration: 267262
loss: 0.9592417478561401,grad_norm: 0.9686409482846117, iteration: 267263
loss: 0.9672217965126038,grad_norm: 0.8684257755498824, iteration: 267264
loss: 0.9820801019668579,grad_norm: 0.7932338243743943, iteration: 267265
loss: 0.9855597019195557,grad_norm: 0.9045991154548593, iteration: 267266
loss: 0.9906090497970581,grad_norm: 0.9719050679098022, iteration: 267267
loss: 0.9906399846076965,grad_norm: 0.7990483767475752, iteration: 267268
loss: 0.9768184423446655,grad_norm: 0.819883761572661, iteration: 267269
loss: 0.9411870837211609,grad_norm: 0.9268734375358314, iteration: 267270
loss: 0.9905774593353271,grad_norm: 0.8713493418735008, iteration: 267271
loss: 1.016324758529663,grad_norm: 0.9999991886237779, iteration: 267272
loss: 0.9775453209877014,grad_norm: 0.8535981680021527, iteration: 267273
loss: 1.029291033744812,grad_norm: 0.7756283420326847, iteration: 267274
loss: 0.979709804058075,grad_norm: 0.7170611265865003, iteration: 267275
loss: 1.000282883644104,grad_norm: 0.942664987924906, iteration: 267276
loss: 0.9708949327468872,grad_norm: 0.8322421341649318, iteration: 267277
loss: 1.0358079671859741,grad_norm: 0.999999244505808, iteration: 267278
loss: 0.9923005700111389,grad_norm: 0.8748536743606586, iteration: 267279
loss: 1.0323879718780518,grad_norm: 0.961835247037073, iteration: 267280
loss: 1.0421329736709595,grad_norm: 0.8059807578848219, iteration: 267281
loss: 0.9935670495033264,grad_norm: 0.9030595503106963, iteration: 267282
loss: 0.98586505651474,grad_norm: 0.8386841713628489, iteration: 267283
loss: 0.9858160018920898,grad_norm: 0.9610834138187153, iteration: 267284
loss: 1.0229452848434448,grad_norm: 0.9474205080950663, iteration: 267285
loss: 0.9828521013259888,grad_norm: 0.9045740201561605, iteration: 267286
loss: 1.0079731941223145,grad_norm: 0.9999997570236391, iteration: 267287
loss: 1.0524168014526367,grad_norm: 0.9999999669531856, iteration: 267288
loss: 0.967266857624054,grad_norm: 0.9999992255419878, iteration: 267289
loss: 1.0322821140289307,grad_norm: 0.9999990206160595, iteration: 267290
loss: 1.0495274066925049,grad_norm: 0.8335830776113955, iteration: 267291
loss: 0.9865557551383972,grad_norm: 0.9166720234304687, iteration: 267292
loss: 1.0088192224502563,grad_norm: 0.7657837694997175, iteration: 267293
loss: 1.024969458580017,grad_norm: 0.8240902467433213, iteration: 267294
loss: 1.0253161191940308,grad_norm: 0.8982726534201014, iteration: 267295
loss: 1.0127019882202148,grad_norm: 0.9999992603454122, iteration: 267296
loss: 0.9678502082824707,grad_norm: 0.7890800789883664, iteration: 267297
loss: 0.942570149898529,grad_norm: 0.7609206282983885, iteration: 267298
loss: 1.024896264076233,grad_norm: 0.9999991222798834, iteration: 267299
loss: 1.0184904336929321,grad_norm: 0.799991927871628, iteration: 267300
loss: 0.9788177013397217,grad_norm: 0.8300663661695012, iteration: 267301
loss: 1.0103120803833008,grad_norm: 0.6935204390710628, iteration: 267302
loss: 0.971824586391449,grad_norm: 0.8264400131854933, iteration: 267303
loss: 1.033135175704956,grad_norm: 0.8864046355615447, iteration: 267304
loss: 0.9686937928199768,grad_norm: 0.9256026641866003, iteration: 267305
loss: 0.9802423119544983,grad_norm: 0.8254067701927693, iteration: 267306
loss: 0.988563597202301,grad_norm: 0.9999990517126042, iteration: 267307
loss: 1.021362543106079,grad_norm: 0.999999039046119, iteration: 267308
loss: 0.9869294762611389,grad_norm: 0.8175288157109389, iteration: 267309
loss: 0.9719024896621704,grad_norm: 0.8242969956164096, iteration: 267310
loss: 1.0305793285369873,grad_norm: 0.9999999263850645, iteration: 267311
loss: 1.0143808126449585,grad_norm: 0.9999992267481825, iteration: 267312
loss: 0.9753948450088501,grad_norm: 0.9999997859281806, iteration: 267313
loss: 0.9889007806777954,grad_norm: 0.8962811750885019, iteration: 267314
loss: 1.003536343574524,grad_norm: 0.8314384464941484, iteration: 267315
loss: 0.9540531635284424,grad_norm: 0.9999991013243864, iteration: 267316
loss: 0.9825446605682373,grad_norm: 0.9743074940393394, iteration: 267317
loss: 0.992068350315094,grad_norm: 0.9999991983861856, iteration: 267318
loss: 0.9769694805145264,grad_norm: 0.762295600039987, iteration: 267319
loss: 0.9942987561225891,grad_norm: 0.872294077892817, iteration: 267320
loss: 1.0272501707077026,grad_norm: 0.8010727792726491, iteration: 267321
loss: 0.9864957332611084,grad_norm: 0.8919694975864476, iteration: 267322
loss: 0.940289318561554,grad_norm: 0.8897326364778048, iteration: 267323
loss: 1.037476897239685,grad_norm: 0.9264236694201595, iteration: 267324
loss: 0.9905080199241638,grad_norm: 0.9999990003819692, iteration: 267325
loss: 1.0168501138687134,grad_norm: 0.8200366753714645, iteration: 267326
loss: 1.0027215480804443,grad_norm: 0.8015337165972087, iteration: 267327
loss: 0.9925591945648193,grad_norm: 0.8301711137301483, iteration: 267328
loss: 1.0121735334396362,grad_norm: 0.8014496164098078, iteration: 267329
loss: 1.0109446048736572,grad_norm: 0.9710645455237558, iteration: 267330
loss: 1.074998378753662,grad_norm: 0.8308396874909636, iteration: 267331
loss: 0.9912272691726685,grad_norm: 0.8818053182268019, iteration: 267332
loss: 1.0343431234359741,grad_norm: 0.9251800313756278, iteration: 267333
loss: 1.003503441810608,grad_norm: 0.8204741964365794, iteration: 267334
loss: 0.9912555813789368,grad_norm: 0.934864420685456, iteration: 267335
loss: 0.9949842095375061,grad_norm: 0.7615135412812591, iteration: 267336
loss: 0.9825296401977539,grad_norm: 0.6729498994372402, iteration: 267337
loss: 1.0116868019104004,grad_norm: 0.828867355497063, iteration: 267338
loss: 1.008173942565918,grad_norm: 0.886096819457312, iteration: 267339
loss: 1.1620715856552124,grad_norm: 0.9999995640340977, iteration: 267340
loss: 0.9926656484603882,grad_norm: 0.8787295016221889, iteration: 267341
loss: 1.078689694404602,grad_norm: 0.999999311968202, iteration: 267342
loss: 0.9978771209716797,grad_norm: 0.7047028938183312, iteration: 267343
loss: 1.0544195175170898,grad_norm: 0.8514592329242944, iteration: 267344
loss: 1.0118792057037354,grad_norm: 0.8747236370351601, iteration: 267345
loss: 1.0133270025253296,grad_norm: 0.9625935367190714, iteration: 267346
loss: 1.0567357540130615,grad_norm: 0.9999993135095188, iteration: 267347
loss: 1.0308560132980347,grad_norm: 0.9999997020366941, iteration: 267348
loss: 0.9834163784980774,grad_norm: 0.8669051577033622, iteration: 267349
loss: 0.9884480237960815,grad_norm: 0.912328823857502, iteration: 267350
loss: 1.199359655380249,grad_norm: 0.9999996925701691, iteration: 267351
loss: 0.9833257794380188,grad_norm: 0.9354632799899805, iteration: 267352
loss: 1.1502281427383423,grad_norm: 0.9999990206265208, iteration: 267353
loss: 0.9890499114990234,grad_norm: 0.9247083722063966, iteration: 267354
loss: 0.9809970259666443,grad_norm: 0.7749693389980393, iteration: 267355
loss: 0.9892017841339111,grad_norm: 0.79792038496483, iteration: 267356
loss: 1.1643092632293701,grad_norm: 0.8994592428332251, iteration: 267357
loss: 1.02315354347229,grad_norm: 0.8325355819228039, iteration: 267358
loss: 1.0381971597671509,grad_norm: 0.852737674119119, iteration: 267359
loss: 1.0147912502288818,grad_norm: 0.8957982907169108, iteration: 267360
loss: 0.9974460601806641,grad_norm: 0.8512841217866353, iteration: 267361
loss: 0.9879624843597412,grad_norm: 0.9610613192971456, iteration: 267362
loss: 1.0126668214797974,grad_norm: 0.9999997903025835, iteration: 267363
loss: 0.9829584360122681,grad_norm: 0.8851742629974135, iteration: 267364
loss: 1.0260467529296875,grad_norm: 0.9999990889379082, iteration: 267365
loss: 1.0716824531555176,grad_norm: 0.9287590583645461, iteration: 267366
loss: 0.9960384368896484,grad_norm: 0.905538406224366, iteration: 267367
loss: 0.9837153553962708,grad_norm: 0.7867681942873819, iteration: 267368
loss: 1.0145593881607056,grad_norm: 0.9033970221342248, iteration: 267369
loss: 1.017668604850769,grad_norm: 0.9999989465942722, iteration: 267370
loss: 0.9940953850746155,grad_norm: 0.8831730388274479, iteration: 267371
loss: 1.1822361946105957,grad_norm: 0.9999992833067535, iteration: 267372
loss: 1.4884220361709595,grad_norm: 0.999999456250109, iteration: 267373
loss: 0.9965170621871948,grad_norm: 0.9999992884776656, iteration: 267374
loss: 0.9963232278823853,grad_norm: 0.8262555155564294, iteration: 267375
loss: 1.0235445499420166,grad_norm: 0.9482818403895557, iteration: 267376
loss: 1.0775307416915894,grad_norm: 0.9999991376087647, iteration: 267377
loss: 1.0074657201766968,grad_norm: 0.8520216367809875, iteration: 267378
loss: 1.0230305194854736,grad_norm: 0.840666912252393, iteration: 267379
loss: 0.9901582598686218,grad_norm: 0.7257412723389228, iteration: 267380
loss: 0.9813792109489441,grad_norm: 0.8586861471596073, iteration: 267381
loss: 1.3015241622924805,grad_norm: 0.9999995198386851, iteration: 267382
loss: 1.0134886503219604,grad_norm: 0.9999991820799334, iteration: 267383
loss: 1.0034332275390625,grad_norm: 0.8860606113182757, iteration: 267384
loss: 1.028273582458496,grad_norm: 0.7184583145735894, iteration: 267385
loss: 1.0329123735427856,grad_norm: 0.8808625088164261, iteration: 267386
loss: 1.0169597864151,grad_norm: 0.7806514869116346, iteration: 267387
loss: 1.0274690389633179,grad_norm: 0.999999200775822, iteration: 267388
loss: 1.0752118825912476,grad_norm: 0.9283525647077855, iteration: 267389
loss: 0.95244961977005,grad_norm: 0.8633374581366529, iteration: 267390
loss: 1.0128262042999268,grad_norm: 0.9999990839536345, iteration: 267391
loss: 1.085951328277588,grad_norm: 0.9999996409358053, iteration: 267392
loss: 1.0275031328201294,grad_norm: 0.9206915707873543, iteration: 267393
loss: 0.9755068421363831,grad_norm: 0.8129838154500808, iteration: 267394
loss: 1.006790041923523,grad_norm: 0.8682780124107219, iteration: 267395
loss: 1.075530767440796,grad_norm: 0.9999992052517775, iteration: 267396
loss: 1.0240471363067627,grad_norm: 0.8761805595615064, iteration: 267397
loss: 1.1885651350021362,grad_norm: 0.999999192744915, iteration: 267398
loss: 0.972454845905304,grad_norm: 0.9371910929701157, iteration: 267399
loss: 0.9763501882553101,grad_norm: 0.9553249298147413, iteration: 267400
loss: 1.0133780241012573,grad_norm: 0.999999108275138, iteration: 267401
loss: 1.0004997253417969,grad_norm: 0.8580160135417402, iteration: 267402
loss: 1.0549664497375488,grad_norm: 0.8544971305526119, iteration: 267403
loss: 1.6485391855239868,grad_norm: 0.9999994085966544, iteration: 267404
loss: 1.0106157064437866,grad_norm: 0.926821572272752, iteration: 267405
loss: 1.075630784034729,grad_norm: 0.919475814497874, iteration: 267406
loss: 1.0809184312820435,grad_norm: 0.9999996258830439, iteration: 267407
loss: 1.2966991662979126,grad_norm: 0.9999992669992571, iteration: 267408
loss: 1.0219861268997192,grad_norm: 0.9999990521815398, iteration: 267409
loss: 0.9957311153411865,grad_norm: 0.8473071987991243, iteration: 267410
loss: 1.0273029804229736,grad_norm: 0.6844026094724875, iteration: 267411
loss: 1.0503069162368774,grad_norm: 0.9999998970546639, iteration: 267412
loss: 0.9836845993995667,grad_norm: 0.8763427373952398, iteration: 267413
loss: 0.9906607270240784,grad_norm: 0.9999991249171205, iteration: 267414
loss: 1.0003547668457031,grad_norm: 0.8840050571891093, iteration: 267415
loss: 0.9937325119972229,grad_norm: 0.9171896568290461, iteration: 267416
loss: 0.9859133362770081,grad_norm: 0.8884067884918468, iteration: 267417
loss: 1.111865758895874,grad_norm: 0.9999990987174199, iteration: 267418
loss: 1.0153597593307495,grad_norm: 0.9999991503264977, iteration: 267419
loss: 1.0130325555801392,grad_norm: 0.8806767513745475, iteration: 267420
loss: 0.9954509735107422,grad_norm: 0.9232820381693132, iteration: 267421
loss: 1.073284387588501,grad_norm: 0.9009125222492801, iteration: 267422
loss: 1.0263142585754395,grad_norm: 0.8737681119442616, iteration: 267423
loss: 0.9854371547698975,grad_norm: 0.8987557705521823, iteration: 267424
loss: 1.05374276638031,grad_norm: 0.999999806449857, iteration: 267425
loss: 1.064606785774231,grad_norm: 0.8128773519374689, iteration: 267426
loss: 1.0230627059936523,grad_norm: 0.9680902955646302, iteration: 267427
loss: 1.0862215757369995,grad_norm: 0.9335478414916466, iteration: 267428
loss: 1.0159999132156372,grad_norm: 0.7769445856342794, iteration: 267429
loss: 0.9704501628875732,grad_norm: 0.9178531484490161, iteration: 267430
loss: 1.018131971359253,grad_norm: 0.9999992929060677, iteration: 267431
loss: 1.0201596021652222,grad_norm: 0.8286079115050659, iteration: 267432
loss: 1.0031155347824097,grad_norm: 0.9025149788603258, iteration: 267433
loss: 0.9599155783653259,grad_norm: 0.8857042333245783, iteration: 267434
loss: 1.1118030548095703,grad_norm: 0.9999999977043348, iteration: 267435
loss: 0.9890235662460327,grad_norm: 0.7027824767155536, iteration: 267436
loss: 1.0346943140029907,grad_norm: 0.9999992998453162, iteration: 267437
loss: 1.0868675708770752,grad_norm: 0.9999994107835527, iteration: 267438
loss: 1.0029079914093018,grad_norm: 0.8066742565529538, iteration: 267439
loss: 1.2367143630981445,grad_norm: 0.9999996530597008, iteration: 267440
loss: 1.0241377353668213,grad_norm: 0.8874919399048771, iteration: 267441
loss: 1.117016315460205,grad_norm: 0.9999991238292011, iteration: 267442
loss: 1.1464263200759888,grad_norm: 0.9999997124123434, iteration: 267443
loss: 1.1880072355270386,grad_norm: 0.9999995865222505, iteration: 267444
loss: 1.010618805885315,grad_norm: 0.8486635766281473, iteration: 267445
loss: 1.1524592638015747,grad_norm: 0.9999997135036407, iteration: 267446
loss: 1.1283329725265503,grad_norm: 0.9999999398811628, iteration: 267447
loss: 1.1988164186477661,grad_norm: 0.9586522933853057, iteration: 267448
loss: 1.0404443740844727,grad_norm: 0.846469932751986, iteration: 267449
loss: 1.0460442304611206,grad_norm: 0.9516643574996967, iteration: 267450
loss: 0.9900978803634644,grad_norm: 0.9004213613430052, iteration: 267451
loss: 1.0435725450515747,grad_norm: 0.999999016874163, iteration: 267452
loss: 1.1527029275894165,grad_norm: 0.9999998811541965, iteration: 267453
loss: 1.1282153129577637,grad_norm: 0.9999996301161956, iteration: 267454
loss: 1.101721167564392,grad_norm: 0.9999998015785136, iteration: 267455
loss: 1.116149663925171,grad_norm: 0.9999994820963471, iteration: 267456
loss: 1.049681544303894,grad_norm: 0.9220159015850942, iteration: 267457
loss: 1.0218254327774048,grad_norm: 0.9999989891733924, iteration: 267458
loss: 1.0237455368041992,grad_norm: 0.9999996557563959, iteration: 267459
loss: 1.1275473833084106,grad_norm: 0.9999999558043419, iteration: 267460
loss: 1.1917530298233032,grad_norm: 0.9999998140970123, iteration: 267461
loss: 1.1171931028366089,grad_norm: 0.9999990065863873, iteration: 267462
loss: 1.0974547863006592,grad_norm: 0.9999998905707569, iteration: 267463
loss: 1.101338267326355,grad_norm: 0.9999994789792062, iteration: 267464
loss: 1.3373682498931885,grad_norm: 0.9999993433853019, iteration: 267465
loss: 1.3045446872711182,grad_norm: 0.9999994488503794, iteration: 267466
loss: 1.2029547691345215,grad_norm: 0.9999996619427554, iteration: 267467
loss: 1.3074924945831299,grad_norm: 0.9999997012375912, iteration: 267468
loss: 1.3171072006225586,grad_norm: 0.9999992767199959, iteration: 267469
loss: 1.1974263191223145,grad_norm: 0.9999992991959009, iteration: 267470
loss: 1.3406333923339844,grad_norm: 0.9999997543170577, iteration: 267471
loss: 1.386323094367981,grad_norm: 0.9999997788896416, iteration: 267472
loss: 1.4564059972763062,grad_norm: 0.9999994162747357, iteration: 267473
loss: 1.3071739673614502,grad_norm: 0.9999995351150024, iteration: 267474
loss: 1.2135307788848877,grad_norm: 0.9999990644678465, iteration: 267475
loss: 1.2197691202163696,grad_norm: 0.9999997204866451, iteration: 267476
loss: 1.2126718759536743,grad_norm: 0.9999996222227601, iteration: 267477
loss: 1.2737770080566406,grad_norm: 0.9999992728252447, iteration: 267478
loss: 1.2062771320343018,grad_norm: 0.9999994310477058, iteration: 267479
loss: 0.9997844099998474,grad_norm: 0.9999990753843215, iteration: 267480
loss: 1.3409583568572998,grad_norm: 0.9999997227586387, iteration: 267481
loss: 1.209408164024353,grad_norm: 0.9999996754260049, iteration: 267482
loss: 1.0796661376953125,grad_norm: 0.847857975143158, iteration: 267483
loss: 1.0330086946487427,grad_norm: 0.9999990692177106, iteration: 267484
loss: 1.3760077953338623,grad_norm: 0.9999999283319041, iteration: 267485
loss: 1.106465220451355,grad_norm: 0.9999992381416924, iteration: 267486
loss: 1.0773003101348877,grad_norm: 0.9514369695230503, iteration: 267487
loss: 1.0584684610366821,grad_norm: 0.9999991996046486, iteration: 267488
loss: 1.037476897239685,grad_norm: 0.9894780735687213, iteration: 267489
loss: 1.040282964706421,grad_norm: 0.9999997912408051, iteration: 267490
loss: 1.1760215759277344,grad_norm: 0.9999997060097504, iteration: 267491
loss: 1.1097005605697632,grad_norm: 0.9999990990319215, iteration: 267492
loss: 0.9999280571937561,grad_norm: 0.9999990825495865, iteration: 267493
loss: 1.0276739597320557,grad_norm: 0.8305052945811936, iteration: 267494
loss: 1.101611614227295,grad_norm: 0.999999521358624, iteration: 267495
loss: 0.9899089336395264,grad_norm: 0.8719335497917327, iteration: 267496
loss: 1.0344786643981934,grad_norm: 0.9999992581421907, iteration: 267497
loss: 0.975256085395813,grad_norm: 0.9967856059828785, iteration: 267498
loss: 1.0321223735809326,grad_norm: 0.9538755065328249, iteration: 267499
loss: 1.0007251501083374,grad_norm: 0.8688169656133918, iteration: 267500
loss: 1.057239294052124,grad_norm: 0.8745854163845342, iteration: 267501
loss: 1.0232913494110107,grad_norm: 0.7974653442442154, iteration: 267502
loss: 1.0431404113769531,grad_norm: 0.9999992671286373, iteration: 267503
loss: 1.0321294069290161,grad_norm: 0.8517709568130138, iteration: 267504
loss: 1.0563712120056152,grad_norm: 0.9809297316356884, iteration: 267505
loss: 1.0761960744857788,grad_norm: 0.9999995176386931, iteration: 267506
loss: 1.1524863243103027,grad_norm: 1.0000000169261447, iteration: 267507
loss: 1.0510343313217163,grad_norm: 0.9999996132604579, iteration: 267508
loss: 1.189872145652771,grad_norm: 0.9999995897884635, iteration: 267509
loss: 1.0885061025619507,grad_norm: 0.9999992818389615, iteration: 267510
loss: 1.0050745010375977,grad_norm: 0.9999992189678586, iteration: 267511
loss: 1.062168836593628,grad_norm: 0.9999996128431581, iteration: 267512
loss: 1.0375338792800903,grad_norm: 0.9999993492436726, iteration: 267513
loss: 1.1380444765090942,grad_norm: 0.9999994951819609, iteration: 267514
loss: 1.0869245529174805,grad_norm: 0.7632763310626607, iteration: 267515
loss: 1.2099655866622925,grad_norm: 0.9999998503407995, iteration: 267516
loss: 1.0773409605026245,grad_norm: 0.9999995481981594, iteration: 267517
loss: 1.2122561931610107,grad_norm: 0.9999990624984211, iteration: 267518
loss: 1.113186240196228,grad_norm: 0.9999994073502559, iteration: 267519
loss: 0.9983431696891785,grad_norm: 0.9999996704090057, iteration: 267520
loss: 1.104344367980957,grad_norm: 0.9999994129861192, iteration: 267521
loss: 1.1166491508483887,grad_norm: 0.9999996842456003, iteration: 267522
loss: 1.242436408996582,grad_norm: 0.9999997417957874, iteration: 267523
loss: 1.3692280054092407,grad_norm: 0.9999999932420609, iteration: 267524
loss: 1.2638264894485474,grad_norm: 0.9999996754470069, iteration: 267525
loss: 1.1580774784088135,grad_norm: 0.9999997528868386, iteration: 267526
loss: 1.246090054512024,grad_norm: 0.9999990741425184, iteration: 267527
loss: 1.1409354209899902,grad_norm: 0.9999996992770945, iteration: 267528
loss: 1.354408621788025,grad_norm: 0.9999999583241965, iteration: 267529
loss: 1.2017914056777954,grad_norm: 0.9999996858661832, iteration: 267530
loss: 1.2963316440582275,grad_norm: 0.999999890719782, iteration: 267531
loss: 1.1020394563674927,grad_norm: 0.9999998248550068, iteration: 267532
loss: 1.0288060903549194,grad_norm: 0.999999854432987, iteration: 267533
loss: 1.1382874250411987,grad_norm: 0.9999993103408394, iteration: 267534
loss: 1.513329267501831,grad_norm: 0.9999998953308024, iteration: 267535
loss: 1.2192468643188477,grad_norm: 0.9999993273980713, iteration: 267536
loss: 1.1072030067443848,grad_norm: 0.9999992692550719, iteration: 267537
loss: 1.6291717290878296,grad_norm: 0.999999924016329, iteration: 267538
loss: 1.387369155883789,grad_norm: 0.9999996404033772, iteration: 267539
loss: 1.7004252672195435,grad_norm: 0.9999996086265217, iteration: 267540
loss: 1.4903141260147095,grad_norm: 1.000000108296669, iteration: 267541
loss: 1.5893269777297974,grad_norm: 0.9999997408315201, iteration: 267542
loss: 1.8879245519638062,grad_norm: 0.9999998591808467, iteration: 267543
loss: 1.5326496362686157,grad_norm: 0.9999999467775637, iteration: 267544
loss: 1.2715048789978027,grad_norm: 0.9999998201128074, iteration: 267545
loss: 1.4844094514846802,grad_norm: 0.9999998030629027, iteration: 267546
loss: 1.4265669584274292,grad_norm: 0.9999998616206541, iteration: 267547
loss: 1.4589821100234985,grad_norm: 0.9999995878300937, iteration: 267548
loss: 1.4367836713790894,grad_norm: 0.9999998743206234, iteration: 267549
loss: 1.3478397130966187,grad_norm: 0.9999998811203176, iteration: 267550
loss: 1.3822484016418457,grad_norm: 0.9999995134817251, iteration: 267551
loss: 1.5367372035980225,grad_norm: 0.9999996117014343, iteration: 267552
loss: 1.598099708557129,grad_norm: 0.9999999131060601, iteration: 267553
loss: 1.322497844696045,grad_norm: 0.9999996855419324, iteration: 267554
loss: 1.2905300855636597,grad_norm: 0.9999998946304116, iteration: 267555
loss: 1.5303618907928467,grad_norm: 0.9999998129343004, iteration: 267556
loss: 1.5169663429260254,grad_norm: 0.9999999945962106, iteration: 267557
loss: 1.507765293121338,grad_norm: 1.000000013108751, iteration: 267558
loss: 1.4366244077682495,grad_norm: 1.0000000336400068, iteration: 267559
loss: 1.4014123678207397,grad_norm: 0.9999994839888586, iteration: 267560
loss: 1.5491231679916382,grad_norm: 0.9999998665355218, iteration: 267561
loss: 1.3296056985855103,grad_norm: 0.9999996243220541, iteration: 267562
loss: 1.639432430267334,grad_norm: 0.9999995542870226, iteration: 267563
loss: 1.4490352869033813,grad_norm: 0.999999894384967, iteration: 267564
loss: 1.625007152557373,grad_norm: 0.9999998312831434, iteration: 267565
loss: 1.5230047702789307,grad_norm: 0.9999996777871396, iteration: 267566
loss: 1.457735538482666,grad_norm: 0.9999996158339983, iteration: 267567
loss: 1.7538342475891113,grad_norm: 0.999999776508926, iteration: 267568
loss: 1.7127829790115356,grad_norm: 0.9999999177907558, iteration: 267569
loss: 1.4893841743469238,grad_norm: 0.9999998667787274, iteration: 267570
loss: 1.7057462930679321,grad_norm: 0.99999960345362, iteration: 267571
loss: 1.2786208391189575,grad_norm: 0.9999998135146644, iteration: 267572
loss: 1.3106948137283325,grad_norm: 0.9999994082418444, iteration: 267573
loss: 1.4461631774902344,grad_norm: 1.0000000937628328, iteration: 267574
loss: 1.4088213443756104,grad_norm: 0.999999323427355, iteration: 267575
loss: 1.6379756927490234,grad_norm: 0.9999997494306704, iteration: 267576
loss: 1.391858696937561,grad_norm: 0.9999995399840403, iteration: 267577
loss: 1.6074044704437256,grad_norm: 0.9999999237546957, iteration: 267578
loss: 1.473703384399414,grad_norm: 0.9999998991836984, iteration: 267579
loss: 1.541693925857544,grad_norm: 0.9999996435595487, iteration: 267580
loss: 1.2628118991851807,grad_norm: 0.9999995379009708, iteration: 267581
loss: 1.5236096382141113,grad_norm: 0.9999999115400219, iteration: 267582
loss: 1.4577971696853638,grad_norm: 0.999999980006125, iteration: 267583
loss: 1.3867309093475342,grad_norm: 0.9999994299569336, iteration: 267584
loss: 1.5515661239624023,grad_norm: 0.9999995363936232, iteration: 267585
loss: 1.541340947151184,grad_norm: 1.0000000324631415, iteration: 267586
loss: 1.5756525993347168,grad_norm: 0.9999999610024026, iteration: 267587
loss: 1.5178660154342651,grad_norm: 0.9999996355210922, iteration: 267588
loss: 1.5226942300796509,grad_norm: 0.9999998922338303, iteration: 267589
loss: 1.31522536277771,grad_norm: 0.9999996152629174, iteration: 267590
loss: 1.552106499671936,grad_norm: 0.9999997070523455, iteration: 267591
loss: 1.6827727556228638,grad_norm: 0.9999995688036588, iteration: 267592
loss: 1.5682741403579712,grad_norm: 0.9999998151940362, iteration: 267593
loss: 1.4897962808609009,grad_norm: 0.9999998344815069, iteration: 267594
loss: 1.6205195188522339,grad_norm: 1.0000000382026348, iteration: 267595
loss: 1.4746507406234741,grad_norm: 0.9999998349214811, iteration: 267596
loss: 1.61802339553833,grad_norm: 0.9999999501386989, iteration: 267597
loss: 1.6300095319747925,grad_norm: 0.999999989637456, iteration: 267598
loss: 1.6733733415603638,grad_norm: 0.9999999869972239, iteration: 267599
loss: 1.6039862632751465,grad_norm: 1.0000000218380116, iteration: 267600
loss: 1.8293702602386475,grad_norm: 1.0000000073274982, iteration: 267601
loss: 1.5622414350509644,grad_norm: 0.9999999658429118, iteration: 267602
loss: 1.5882688760757446,grad_norm: 0.9999996564841629, iteration: 267603
loss: 1.5526034832000732,grad_norm: 0.9999998354871797, iteration: 267604
loss: 1.4571088552474976,grad_norm: 0.9999997387185564, iteration: 267605
loss: 1.590237021446228,grad_norm: 0.9999997876889747, iteration: 267606
loss: 1.8354413509368896,grad_norm: 0.9999999793530343, iteration: 267607
loss: 1.6537636518478394,grad_norm: 1.0000000120836994, iteration: 267608
loss: 1.6263880729675293,grad_norm: 0.9999999789380857, iteration: 267609
loss: 1.791982889175415,grad_norm: 0.999999901897764, iteration: 267610
loss: 1.301774501800537,grad_norm: 0.9999997885700651, iteration: 267611
loss: 1.5422627925872803,grad_norm: 0.999999962088806, iteration: 267612
loss: 1.9951921701431274,grad_norm: 0.9999999091902534, iteration: 267613
loss: 1.780902624130249,grad_norm: 0.9999998137061541, iteration: 267614
loss: 1.6243594884872437,grad_norm: 0.9999999925212204, iteration: 267615
loss: 1.6630454063415527,grad_norm: 1.0000000113990133, iteration: 267616
loss: 1.6521714925765991,grad_norm: 0.9999998432388858, iteration: 267617
loss: 1.8402562141418457,grad_norm: 0.9999997464146213, iteration: 267618
loss: 1.5199062824249268,grad_norm: 0.9999998696548844, iteration: 267619
loss: 1.7306879758834839,grad_norm: 0.999999934479595, iteration: 267620
loss: 1.5183714628219604,grad_norm: 0.9999998310217073, iteration: 267621
loss: 1.8707536458969116,grad_norm: 0.9999999695340054, iteration: 267622
loss: 2.1091272830963135,grad_norm: 0.999999894366183, iteration: 267623
loss: 1.5745389461517334,grad_norm: 0.9999997559557406, iteration: 267624
loss: 1.5602879524230957,grad_norm: 0.9999998426440899, iteration: 267625
loss: 1.9315608739852905,grad_norm: 0.9999999603570207, iteration: 267626
loss: 2.0183708667755127,grad_norm: 1.000000017202503, iteration: 267627
loss: 2.093963384628296,grad_norm: 0.9999999793080179, iteration: 267628
loss: 1.6852390766143799,grad_norm: 1.0000000199016406, iteration: 267629
loss: 1.771429419517517,grad_norm: 0.999999852620833, iteration: 267630
loss: 1.5714178085327148,grad_norm: 1.0000000724832727, iteration: 267631
loss: 1.5375311374664307,grad_norm: 1.0000000329604855, iteration: 267632
loss: 1.7739648818969727,grad_norm: 0.9999999180249366, iteration: 267633
loss: 2.2453742027282715,grad_norm: 1.0000000351018807, iteration: 267634
loss: 1.6628068685531616,grad_norm: 1.0000000287105149, iteration: 267635
loss: 1.5800660848617554,grad_norm: 0.9999998986267933, iteration: 267636
loss: 2.021986246109009,grad_norm: 0.9999999456439561, iteration: 267637
loss: 1.6392570734024048,grad_norm: 1.000000005779895, iteration: 267638
loss: 1.7589352130889893,grad_norm: 0.9999998870659454, iteration: 267639
loss: 1.6319738626480103,grad_norm: 0.9999999204110688, iteration: 267640
loss: 1.8575944900512695,grad_norm: 0.9999999561676874, iteration: 267641
loss: 1.5774823427200317,grad_norm: 1.0000000678546925, iteration: 267642
loss: 1.587868332862854,grad_norm: 1.000000001377045, iteration: 267643
loss: 1.5139260292053223,grad_norm: 0.999999882138563, iteration: 267644
loss: 1.6160773038864136,grad_norm: 1.000000000916973, iteration: 267645
loss: 1.6549246311187744,grad_norm: 1.0000000566205614, iteration: 267646
loss: 1.7861058712005615,grad_norm: 1.00000000506639, iteration: 267647
loss: 1.7585173845291138,grad_norm: 0.9999999010010305, iteration: 267648
loss: 1.8176155090332031,grad_norm: 0.9999998027621874, iteration: 267649
loss: 1.8337467908859253,grad_norm: 0.9999999538619604, iteration: 267650
loss: 1.8145642280578613,grad_norm: 0.9999998498336057, iteration: 267651
loss: 1.7778136730194092,grad_norm: 0.9999997867899066, iteration: 267652
loss: 1.8015332221984863,grad_norm: 0.9999998825489095, iteration: 267653
loss: 1.572995901107788,grad_norm: 0.9999999608011042, iteration: 267654
loss: 1.6477940082550049,grad_norm: 0.9999997787743298, iteration: 267655
loss: 1.5896259546279907,grad_norm: 0.9999999595764933, iteration: 267656
loss: 1.8281774520874023,grad_norm: 0.9999998729828145, iteration: 267657
loss: 1.8801954984664917,grad_norm: 0.9999999991127437, iteration: 267658
loss: 1.6641359329223633,grad_norm: 1.0000000859502074, iteration: 267659
loss: 1.5728331804275513,grad_norm: 0.9999998469435458, iteration: 267660
loss: 1.6598231792449951,grad_norm: 0.9999999017485649, iteration: 267661
loss: 1.6402467489242554,grad_norm: 0.99999988972997, iteration: 267662
loss: 1.5879889726638794,grad_norm: 0.999999938316677, iteration: 267663
loss: 1.756233811378479,grad_norm: 0.9999997260445129, iteration: 267664
loss: 1.799380898475647,grad_norm: 1.0000000369279527, iteration: 267665
loss: 1.650614619255066,grad_norm: 0.9999999216409707, iteration: 267666
loss: 1.5422828197479248,grad_norm: 0.9999998796130181, iteration: 267667
loss: 1.6976535320281982,grad_norm: 0.9999999242440357, iteration: 267668
loss: 1.8232073783874512,grad_norm: 0.9999999798932628, iteration: 267669
loss: 1.8216099739074707,grad_norm: 0.9999999271131838, iteration: 267670
loss: 1.7336314916610718,grad_norm: 0.9999997957913113, iteration: 267671
loss: 1.6712826490402222,grad_norm: 0.9999998747971469, iteration: 267672
loss: 1.5674234628677368,grad_norm: 0.999999926549423, iteration: 267673
loss: 1.8807733058929443,grad_norm: 0.999999741215926, iteration: 267674
loss: 1.6834567785263062,grad_norm: 0.9999999754697241, iteration: 267675
loss: 1.6691383123397827,grad_norm: 0.9999999662544558, iteration: 267676
loss: 1.5908668041229248,grad_norm: 0.9999999677475648, iteration: 267677
loss: 1.6046146154403687,grad_norm: 1.0000000057765694, iteration: 267678
loss: 1.7794976234436035,grad_norm: 1.00000001498744, iteration: 267679
loss: 1.4724653959274292,grad_norm: 0.9999998543407856, iteration: 267680
loss: 1.5829920768737793,grad_norm: 0.999999878028708, iteration: 267681
loss: 1.7284773588180542,grad_norm: 0.9999999451991747, iteration: 267682
loss: 1.6042723655700684,grad_norm: 0.9999998372519401, iteration: 267683
loss: 1.6872711181640625,grad_norm: 0.9999999968732503, iteration: 267684
loss: 1.4468212127685547,grad_norm: 0.9999998683743525, iteration: 267685
loss: 1.5165456533432007,grad_norm: 0.9999999572052095, iteration: 267686
loss: 1.5675348043441772,grad_norm: 1.0000000026444922, iteration: 267687
loss: 1.781109094619751,grad_norm: 0.9999997751480656, iteration: 267688
loss: 1.638228178024292,grad_norm: 1.0000000172058967, iteration: 267689
loss: 1.7273023128509521,grad_norm: 1.0000000387436485, iteration: 267690
loss: 1.8070205450057983,grad_norm: 0.9999998776426042, iteration: 267691
loss: 1.3677934408187866,grad_norm: 0.9999995358382625, iteration: 267692
loss: 1.4375572204589844,grad_norm: 0.999999933290985, iteration: 267693
loss: 1.7808916568756104,grad_norm: 0.999999801512666, iteration: 267694
loss: 1.6428087949752808,grad_norm: 0.9999998716298987, iteration: 267695
loss: 1.4718157052993774,grad_norm: 0.9999999009720568, iteration: 267696
loss: 1.7990968227386475,grad_norm: 1.0000000217608984, iteration: 267697
loss: 1.944337248802185,grad_norm: 0.9999999114690331, iteration: 267698
loss: 1.5620498657226562,grad_norm: 0.9999999436558167, iteration: 267699
loss: 1.9299190044403076,grad_norm: 0.9999998710629434, iteration: 267700
loss: 1.6552232503890991,grad_norm: 0.9999998572132843, iteration: 267701
loss: 1.910672664642334,grad_norm: 1.0000000067694272, iteration: 267702
loss: 1.664085030555725,grad_norm: 0.9999994998713083, iteration: 267703
loss: 1.9897148609161377,grad_norm: 1.000000027296961, iteration: 267704
loss: 1.7021279335021973,grad_norm: 0.9999999622576871, iteration: 267705
loss: 1.5314656496047974,grad_norm: 0.9999998706732584, iteration: 267706
loss: 1.6256425380706787,grad_norm: 0.999999930351466, iteration: 267707
loss: 1.7850457429885864,grad_norm: 0.99999986994568, iteration: 267708
loss: 1.5484964847564697,grad_norm: 0.9999998695762345, iteration: 267709
loss: 1.8676795959472656,grad_norm: 1.0000000593875435, iteration: 267710
loss: 1.7524282932281494,grad_norm: 0.9999999148788707, iteration: 267711
loss: 1.7857096195220947,grad_norm: 0.9999997074804847, iteration: 267712
loss: 1.770798683166504,grad_norm: 0.9999995563757332, iteration: 267713
loss: 1.3720389604568481,grad_norm: 0.9999998875944507, iteration: 267714
loss: 1.8528602123260498,grad_norm: 0.9999999159721881, iteration: 267715
loss: 1.7588521242141724,grad_norm: 0.9999999357618427, iteration: 267716
loss: 1.7165021896362305,grad_norm: 0.9999999504050268, iteration: 267717
loss: 1.5102688074111938,grad_norm: 1.0000000113024088, iteration: 267718
loss: 1.861639142036438,grad_norm: 1.0000000397569613, iteration: 267719
loss: 1.876490592956543,grad_norm: 0.999999854856504, iteration: 267720
loss: 1.7565429210662842,grad_norm: 1.0000000543254874, iteration: 267721
loss: 1.7207237482070923,grad_norm: 0.999999997600924, iteration: 267722
loss: 1.542549729347229,grad_norm: 0.9999997258090206, iteration: 267723
loss: 1.5836418867111206,grad_norm: 0.9999995928204124, iteration: 267724
loss: 1.5895576477050781,grad_norm: 0.9999999359422117, iteration: 267725
loss: 1.5727412700653076,grad_norm: 0.9999998692750522, iteration: 267726
loss: 1.805617332458496,grad_norm: 0.9999998110505081, iteration: 267727
loss: 1.4847326278686523,grad_norm: 0.9999996170917739, iteration: 267728
loss: 1.618586778640747,grad_norm: 1.0000000395686035, iteration: 267729
loss: 1.6074042320251465,grad_norm: 1.0000000379350065, iteration: 267730
loss: 1.4730042219161987,grad_norm: 0.9999996474000411, iteration: 267731
loss: 1.58353853225708,grad_norm: 0.9999998232333113, iteration: 267732
loss: 1.5984833240509033,grad_norm: 0.9999998497786489, iteration: 267733
loss: 1.7839863300323486,grad_norm: 0.9999998866989782, iteration: 267734
loss: 1.4013205766677856,grad_norm: 1.000000006899199, iteration: 267735
loss: 1.382704496383667,grad_norm: 0.9999993935264849, iteration: 267736
loss: 1.4463534355163574,grad_norm: 0.9999995289613343, iteration: 267737
loss: 1.6276177167892456,grad_norm: 0.999999829534253, iteration: 267738
loss: 1.5776129961013794,grad_norm: 0.9999996672735217, iteration: 267739
loss: 1.5794283151626587,grad_norm: 0.9999999022327936, iteration: 267740
loss: 1.5764635801315308,grad_norm: 0.9999998814860573, iteration: 267741
loss: 1.6722798347473145,grad_norm: 0.9999999189856377, iteration: 267742
loss: 1.8940033912658691,grad_norm: 0.9999999239958423, iteration: 267743
loss: 1.8000231981277466,grad_norm: 0.9999994615986479, iteration: 267744
loss: 1.5134767293930054,grad_norm: 0.9999999021556618, iteration: 267745
loss: 1.7502033710479736,grad_norm: 0.999999873968871, iteration: 267746
loss: 1.7203418016433716,grad_norm: 0.9999998373893808, iteration: 267747
loss: 1.468833088874817,grad_norm: 0.9999997034318971, iteration: 267748
loss: 1.6648340225219727,grad_norm: 0.9999999704150946, iteration: 267749
loss: 1.5168559551239014,grad_norm: 0.999999793296225, iteration: 267750
loss: 1.4294464588165283,grad_norm: 0.9999998875311147, iteration: 267751
loss: 1.7824052572250366,grad_norm: 0.9999999725504749, iteration: 267752
loss: 1.4139790534973145,grad_norm: 0.9999996781047773, iteration: 267753
loss: 1.62191641330719,grad_norm: 0.9999996798052043, iteration: 267754
loss: 1.5888888835906982,grad_norm: 0.9999999043281755, iteration: 267755
loss: 1.5895901918411255,grad_norm: 0.9999998247178645, iteration: 267756
loss: 1.6270331144332886,grad_norm: 0.9999998653980302, iteration: 267757
loss: 2.0124523639678955,grad_norm: 0.9999999259507462, iteration: 267758
loss: 1.2091490030288696,grad_norm: 0.9999998199339, iteration: 267759
loss: 1.5366649627685547,grad_norm: 0.9999999058983878, iteration: 267760
loss: 1.4461027383804321,grad_norm: 0.9999997915803966, iteration: 267761
loss: 1.7565438747406006,grad_norm: 0.9999998653295677, iteration: 267762
loss: 1.5816627740859985,grad_norm: 0.9999998225273057, iteration: 267763
loss: 1.675354242324829,grad_norm: 0.9999998961629233, iteration: 267764
loss: 1.5077505111694336,grad_norm: 0.9999998300320796, iteration: 267765
loss: 1.7599494457244873,grad_norm: 0.9999999112248625, iteration: 267766
loss: 1.7134037017822266,grad_norm: 0.9999999083216738, iteration: 267767
loss: 1.3633524179458618,grad_norm: 0.9999998050885613, iteration: 267768
loss: 1.4751726388931274,grad_norm: 0.9999999127914978, iteration: 267769
loss: 1.639222264289856,grad_norm: 0.9999999200176586, iteration: 267770
loss: 1.37645423412323,grad_norm: 0.9999998455554681, iteration: 267771
loss: 1.5149312019348145,grad_norm: 0.9999999355154837, iteration: 267772
loss: 1.4526863098144531,grad_norm: 0.999999936589969, iteration: 267773
loss: 1.4981746673583984,grad_norm: 0.9999999423177324, iteration: 267774
loss: 1.4628887176513672,grad_norm: 0.9999998029316574, iteration: 267775
loss: 1.5122772455215454,grad_norm: 0.9999998554679606, iteration: 267776
loss: 1.462436556816101,grad_norm: 0.9999998717835017, iteration: 267777
loss: 1.7361253499984741,grad_norm: 1.0000000984795312, iteration: 267778
loss: 1.8464512825012207,grad_norm: 0.9999998522759376, iteration: 267779
loss: 1.913184642791748,grad_norm: 0.9999998660888724, iteration: 267780
loss: 1.6144888401031494,grad_norm: 0.9999999879016582, iteration: 267781
loss: 1.5898360013961792,grad_norm: 1.0000000131264895, iteration: 267782
loss: 1.5775490999221802,grad_norm: 0.9999998275416078, iteration: 267783
loss: 1.4925681352615356,grad_norm: 0.9999997719582534, iteration: 267784
loss: 1.6317728757858276,grad_norm: 0.9999999269822184, iteration: 267785
loss: 1.6746174097061157,grad_norm: 0.999999846151802, iteration: 267786
loss: 1.6277786493301392,grad_norm: 0.9999999293148187, iteration: 267787
loss: 1.7176520824432373,grad_norm: 1.000000034127443, iteration: 267788
loss: 1.2547059059143066,grad_norm: 0.9999996758844824, iteration: 267789
loss: 1.8057866096496582,grad_norm: 0.9999998671297919, iteration: 267790
loss: 1.6046538352966309,grad_norm: 0.999999897055173, iteration: 267791
loss: 1.5973498821258545,grad_norm: 0.999999940302373, iteration: 267792
loss: 1.3857343196868896,grad_norm: 0.9999997890568091, iteration: 267793
loss: 1.489505410194397,grad_norm: 0.9999998325104229, iteration: 267794
loss: 1.4953114986419678,grad_norm: 0.9999998595600047, iteration: 267795
loss: 1.5966728925704956,grad_norm: 1.0000000329349807, iteration: 267796
loss: 1.5245721340179443,grad_norm: 1.000000006646426, iteration: 267797
loss: 1.389809489250183,grad_norm: 0.9999996022729986, iteration: 267798
loss: 1.4847017526626587,grad_norm: 0.9999999256364268, iteration: 267799
loss: 1.3840346336364746,grad_norm: 0.9999999906611466, iteration: 267800
loss: 1.3830465078353882,grad_norm: 0.9999998813123113, iteration: 267801
loss: 1.3923349380493164,grad_norm: 0.9999999162356552, iteration: 267802
loss: 1.3529646396636963,grad_norm: 0.9999995299312269, iteration: 267803
loss: 1.3447887897491455,grad_norm: 0.9999999637769226, iteration: 267804
loss: 1.4644619226455688,grad_norm: 0.9999999179549065, iteration: 267805
loss: 1.4232783317565918,grad_norm: 0.9999996574592902, iteration: 267806
loss: 1.48773992061615,grad_norm: 0.9999997873968616, iteration: 267807
loss: 1.4134423732757568,grad_norm: 0.9999999302905228, iteration: 267808
loss: 1.427100419998169,grad_norm: 0.9999997418246207, iteration: 267809
loss: 1.2639609575271606,grad_norm: 0.9999998659367283, iteration: 267810
loss: 1.2266149520874023,grad_norm: 0.9999999207050243, iteration: 267811
loss: 1.9537032842636108,grad_norm: 0.9999999012876691, iteration: 267812
loss: 1.316946268081665,grad_norm: 0.9999999218982764, iteration: 267813
loss: 1.6686468124389648,grad_norm: 0.9999999001881912, iteration: 267814
loss: 1.5678813457489014,grad_norm: 0.9999998008862466, iteration: 267815
loss: 1.2465832233428955,grad_norm: 0.9999995009399161, iteration: 267816
loss: 1.3842402696609497,grad_norm: 0.9999997756554169, iteration: 267817
loss: 1.2369091510772705,grad_norm: 0.9999998219924734, iteration: 267818
loss: 1.5052688121795654,grad_norm: 1.0000000738749735, iteration: 267819
loss: 1.4572070837020874,grad_norm: 0.9999997733734735, iteration: 267820
loss: 1.1953569650650024,grad_norm: 0.9999999587998383, iteration: 267821
loss: 1.3578649759292603,grad_norm: 0.9999999973816271, iteration: 267822
loss: 1.3328810930252075,grad_norm: 0.9999998297588792, iteration: 267823
loss: 1.4872241020202637,grad_norm: 0.9999998641339418, iteration: 267824
loss: 1.5112299919128418,grad_norm: 0.9999999877844985, iteration: 267825
loss: 1.4461636543273926,grad_norm: 0.9999999077208257, iteration: 267826
loss: 1.2998756170272827,grad_norm: 0.9999999198610806, iteration: 267827
loss: 1.2655614614486694,grad_norm: 0.9999999162928199, iteration: 267828
loss: 1.3918923139572144,grad_norm: 0.9999999574134282, iteration: 267829
loss: 1.4306316375732422,grad_norm: 0.9999997755800304, iteration: 267830
loss: 1.4417251348495483,grad_norm: 0.9999999261905013, iteration: 267831
loss: 1.4311091899871826,grad_norm: 0.99999986307435, iteration: 267832
loss: 1.3397216796875,grad_norm: 1.000000040931338, iteration: 267833
loss: 1.368470311164856,grad_norm: 0.9999999637991955, iteration: 267834
loss: 1.4061256647109985,grad_norm: 0.9999997806855936, iteration: 267835
loss: 1.1780853271484375,grad_norm: 0.9999996475604664, iteration: 267836
loss: 1.2440141439437866,grad_norm: 0.9999996899995478, iteration: 267837
loss: 1.4770631790161133,grad_norm: 0.9999999542483631, iteration: 267838
loss: 1.5176305770874023,grad_norm: 0.999999856537871, iteration: 267839
loss: 1.3893707990646362,grad_norm: 0.9999997988161102, iteration: 267840
loss: 1.455708384513855,grad_norm: 0.9999998994778468, iteration: 267841
loss: 1.3475195169448853,grad_norm: 0.9999998313462956, iteration: 267842
loss: 1.4229708909988403,grad_norm: 0.999999873587126, iteration: 267843
loss: 1.6068006753921509,grad_norm: 0.9999999574730463, iteration: 267844
loss: 1.5911169052124023,grad_norm: 0.999999630810987, iteration: 267845
loss: 1.316611647605896,grad_norm: 0.9999998270477352, iteration: 267846
loss: 1.2921762466430664,grad_norm: 0.999999928127966, iteration: 267847
loss: 1.3382728099822998,grad_norm: 0.9999998529621967, iteration: 267848
loss: 1.473055362701416,grad_norm: 0.9999999714404897, iteration: 267849
loss: 1.363157868385315,grad_norm: 0.9999998877077698, iteration: 267850
loss: 1.3335846662521362,grad_norm: 0.9999997173488644, iteration: 267851
loss: 1.352712631225586,grad_norm: 1.0000000038584378, iteration: 267852
loss: 1.34413480758667,grad_norm: 0.9999998719407399, iteration: 267853
loss: 1.2049717903137207,grad_norm: 0.9999998861248671, iteration: 267854
loss: 1.1936566829681396,grad_norm: 0.9999999310288737, iteration: 267855
loss: 1.3381378650665283,grad_norm: 0.9999998619309265, iteration: 267856
loss: 1.12809157371521,grad_norm: 0.9999997914878411, iteration: 267857
loss: 1.348429799079895,grad_norm: 0.999999859841632, iteration: 267858
loss: 1.2059853076934814,grad_norm: 0.9999997775581783, iteration: 267859
loss: 1.2231227159500122,grad_norm: 0.9999996798407862, iteration: 267860
loss: 1.0809720754623413,grad_norm: 0.9999990803004851, iteration: 267861
loss: 1.048195719718933,grad_norm: 0.9999994452795957, iteration: 267862
loss: 1.2919018268585205,grad_norm: 0.999999718938872, iteration: 267863
loss: 1.1477344036102295,grad_norm: 0.9999999198790819, iteration: 267864
loss: 1.169722080230713,grad_norm: 0.9999994613789571, iteration: 267865
loss: 1.3938822746276855,grad_norm: 0.9999999187884425, iteration: 267866
loss: 1.2344143390655518,grad_norm: 0.9999999375270082, iteration: 267867
loss: 1.2732447385787964,grad_norm: 0.9999998943077781, iteration: 267868
loss: 1.146863341331482,grad_norm: 0.9999990659946093, iteration: 267869
loss: 1.3242535591125488,grad_norm: 0.9999998020987427, iteration: 267870
loss: 1.1606541872024536,grad_norm: 0.9999996881165415, iteration: 267871
loss: 1.3482067584991455,grad_norm: 0.999999697347335, iteration: 267872
loss: 1.1201388835906982,grad_norm: 0.9999992822259157, iteration: 267873
loss: 1.6492592096328735,grad_norm: 0.9999997698735698, iteration: 267874
loss: 1.2147843837738037,grad_norm: 0.9999995929010198, iteration: 267875
loss: 1.0853043794631958,grad_norm: 0.9999999210237092, iteration: 267876
loss: 1.178330421447754,grad_norm: 0.9999998061004697, iteration: 267877
loss: 1.1728476285934448,grad_norm: 0.9999994980621182, iteration: 267878
loss: 1.3380507230758667,grad_norm: 0.9999994549529586, iteration: 267879
loss: 1.1836599111557007,grad_norm: 0.9999994729365808, iteration: 267880
loss: 1.1263928413391113,grad_norm: 0.9999995824895173, iteration: 267881
loss: 1.280653715133667,grad_norm: 0.9999997304943985, iteration: 267882
loss: 1.2089811563491821,grad_norm: 0.9999997904256577, iteration: 267883
loss: 1.4071522951126099,grad_norm: 0.9999998075894808, iteration: 267884
loss: 1.307551383972168,grad_norm: 0.99999970548112, iteration: 267885
loss: 1.317991852760315,grad_norm: 0.9999996658827387, iteration: 267886
loss: 1.125443458557129,grad_norm: 0.9999992544385737, iteration: 267887
loss: 1.1307648420333862,grad_norm: 0.9999999527476138, iteration: 267888
loss: 1.1862207651138306,grad_norm: 0.9999991712710162, iteration: 267889
loss: 1.2107874155044556,grad_norm: 0.9999994321172394, iteration: 267890
loss: 1.3709328174591064,grad_norm: 0.9999995486062245, iteration: 267891
loss: 1.2475063800811768,grad_norm: 0.9999992107503147, iteration: 267892
loss: 1.2053585052490234,grad_norm: 0.9999998683639737, iteration: 267893
loss: 1.1261118650436401,grad_norm: 0.9999991091306498, iteration: 267894
loss: 1.1786905527114868,grad_norm: 0.9999996138211993, iteration: 267895
loss: 1.1812628507614136,grad_norm: 0.9999995533484413, iteration: 267896
loss: 1.1736905574798584,grad_norm: 0.9999998892986216, iteration: 267897
loss: 1.1747902631759644,grad_norm: 0.9999995083924057, iteration: 267898
loss: 1.2300447225570679,grad_norm: 0.9999995389880534, iteration: 267899
loss: 1.5558749437332153,grad_norm: 0.9999996166937666, iteration: 267900
loss: 1.1354663372039795,grad_norm: 0.9999996522849082, iteration: 267901
loss: 1.1633703708648682,grad_norm: 1.0000000161496299, iteration: 267902
loss: 1.2797127962112427,grad_norm: 0.9999993240794424, iteration: 267903
loss: 1.1923550367355347,grad_norm: 0.9999993719363386, iteration: 267904
loss: 1.1738048791885376,grad_norm: 0.9999996190996193, iteration: 267905
loss: 1.2525755167007446,grad_norm: 0.9999994492674967, iteration: 267906
loss: 1.083578109741211,grad_norm: 0.9999998842875781, iteration: 267907
loss: 1.1379040479660034,grad_norm: 0.9999997168141993, iteration: 267908
loss: 1.1021192073822021,grad_norm: 0.999999310701643, iteration: 267909
loss: 1.2034857273101807,grad_norm: 0.9999996523417098, iteration: 267910
loss: 1.193678855895996,grad_norm: 0.9999991091866258, iteration: 267911
loss: 1.1952259540557861,grad_norm: 0.9999997298425248, iteration: 267912
loss: 1.2732447385787964,grad_norm: 0.9999999203649031, iteration: 267913
loss: 1.0871036052703857,grad_norm: 0.9999994236094384, iteration: 267914
loss: 1.135884404182434,grad_norm: 0.999999363439922, iteration: 267915
loss: 1.1795322895050049,grad_norm: 0.999999819363524, iteration: 267916
loss: 1.1696288585662842,grad_norm: 0.9999993078339021, iteration: 267917
loss: 1.0956006050109863,grad_norm: 0.9999996697914145, iteration: 267918
loss: 1.187363862991333,grad_norm: 0.9999995484456319, iteration: 267919
loss: 1.2209835052490234,grad_norm: 0.9999994638492388, iteration: 267920
loss: 1.2778432369232178,grad_norm: 0.9999998075580555, iteration: 267921
loss: 1.0908440351486206,grad_norm: 0.9999995029651458, iteration: 267922
loss: 1.200778841972351,grad_norm: 0.9999991809154266, iteration: 267923
loss: 1.2818611860275269,grad_norm: 0.9999999124938624, iteration: 267924
loss: 1.160544991493225,grad_norm: 1.0000000426581952, iteration: 267925
loss: 1.1073503494262695,grad_norm: 0.9999997037305243, iteration: 267926
loss: 1.2757163047790527,grad_norm: 0.999999809415536, iteration: 267927
loss: 1.1812562942504883,grad_norm: 0.9999991132080572, iteration: 267928
loss: 1.1990212202072144,grad_norm: 0.9999994758046024, iteration: 267929
loss: 1.1961121559143066,grad_norm: 0.9999998599913514, iteration: 267930
loss: 1.1102358102798462,grad_norm: 0.9999999188627616, iteration: 267931
loss: 1.1838181018829346,grad_norm: 0.9999997072893431, iteration: 267932
loss: 1.1761161088943481,grad_norm: 0.9999996366913919, iteration: 267933
loss: 1.161690354347229,grad_norm: 0.9999996194016424, iteration: 267934
loss: 1.0636277198791504,grad_norm: 0.9999992319559944, iteration: 267935
loss: 1.1201915740966797,grad_norm: 0.9999994123414098, iteration: 267936
loss: 1.162024974822998,grad_norm: 0.9999996860824193, iteration: 267937
loss: 1.3083465099334717,grad_norm: 0.9999997175553983, iteration: 267938
loss: 1.06361985206604,grad_norm: 0.933903915091985, iteration: 267939
loss: 1.1221072673797607,grad_norm: 0.9999991789075678, iteration: 267940
loss: 1.0916178226470947,grad_norm: 0.9999998039823437, iteration: 267941
loss: 1.029241681098938,grad_norm: 0.9999992221735118, iteration: 267942
loss: 1.3123233318328857,grad_norm: 0.9999995333413707, iteration: 267943
loss: 1.0799295902252197,grad_norm: 0.9999996745953169, iteration: 267944
loss: 1.1374510526657104,grad_norm: 0.9999997639358258, iteration: 267945
loss: 0.9896199703216553,grad_norm: 0.999999024626868, iteration: 267946
loss: 1.1385973691940308,grad_norm: 0.9999996228483861, iteration: 267947
loss: 1.242343783378601,grad_norm: 0.9999994524404289, iteration: 267948
loss: 1.2066258192062378,grad_norm: 0.999999575532808, iteration: 267949
loss: 1.1468669176101685,grad_norm: 0.9999991477201154, iteration: 267950
loss: 1.1195013523101807,grad_norm: 0.9999993200885152, iteration: 267951
loss: 1.1542764902114868,grad_norm: 0.9999999614767002, iteration: 267952
loss: 1.337857961654663,grad_norm: 0.9999993961905165, iteration: 267953
loss: 1.090247631072998,grad_norm: 0.9999995586805253, iteration: 267954
loss: 1.271249532699585,grad_norm: 0.999999908234189, iteration: 267955
loss: 1.1184332370758057,grad_norm: 0.9999998386101225, iteration: 267956
loss: 1.0616066455841064,grad_norm: 0.9999995348670668, iteration: 267957
loss: 1.1444683074951172,grad_norm: 0.9999993183002869, iteration: 267958
loss: 1.0413070917129517,grad_norm: 0.9999997999295951, iteration: 267959
loss: 1.1289122104644775,grad_norm: 0.9999997273544663, iteration: 267960
loss: 1.1119964122772217,grad_norm: 0.9694660133730746, iteration: 267961
loss: 1.0852546691894531,grad_norm: 0.9999998560699664, iteration: 267962
loss: 1.1092503070831299,grad_norm: 0.9999998324088192, iteration: 267963
loss: 1.0310527086257935,grad_norm: 0.9999991612144409, iteration: 267964
loss: 1.0982284545898438,grad_norm: 0.9999995563491288, iteration: 267965
loss: 1.1547168493270874,grad_norm: 0.999999342713678, iteration: 267966
loss: 1.050588607788086,grad_norm: 0.9480248429854803, iteration: 267967
loss: 1.1450111865997314,grad_norm: 0.9999998285703111, iteration: 267968
loss: 1.068772554397583,grad_norm: 0.999999839772774, iteration: 267969
loss: 1.2163537740707397,grad_norm: 0.9999997440035908, iteration: 267970
loss: 1.1035634279251099,grad_norm: 0.9999995361554322, iteration: 267971
loss: 1.0943726301193237,grad_norm: 0.9999996784995487, iteration: 267972
loss: 1.0684765577316284,grad_norm: 0.9999997486054049, iteration: 267973
loss: 1.1483020782470703,grad_norm: 0.9999994377360814, iteration: 267974
loss: 1.0634437799453735,grad_norm: 0.9999996205919608, iteration: 267975
loss: 1.008712649345398,grad_norm: 0.9999997363026705, iteration: 267976
loss: 1.1098461151123047,grad_norm: 0.9999996975895896, iteration: 267977
loss: 1.0618627071380615,grad_norm: 0.9999993458385078, iteration: 267978
loss: 1.0610613822937012,grad_norm: 0.9999993588719065, iteration: 267979
loss: 1.1016782522201538,grad_norm: 0.9999995947111542, iteration: 267980
loss: 1.1822980642318726,grad_norm: 0.9999996089944745, iteration: 267981
loss: 1.071722149848938,grad_norm: 0.9999991824740733, iteration: 267982
loss: 1.0810590982437134,grad_norm: 0.9999991933582685, iteration: 267983
loss: 1.1325180530548096,grad_norm: 0.9999998197192624, iteration: 267984
loss: 1.0278719663619995,grad_norm: 0.9999996986530263, iteration: 267985
loss: 1.1462122201919556,grad_norm: 0.9999991129670015, iteration: 267986
loss: 1.230876088142395,grad_norm: 0.9999999025916939, iteration: 267987
loss: 1.013841152191162,grad_norm: 0.999999235020741, iteration: 267988
loss: 1.11489999294281,grad_norm: 0.9999991975007443, iteration: 267989
loss: 1.1559560298919678,grad_norm: 0.9999994819115476, iteration: 267990
loss: 1.0891610383987427,grad_norm: 0.9999990681012648, iteration: 267991
loss: 1.089562177658081,grad_norm: 0.999999842003706, iteration: 267992
loss: 1.1180752515792847,grad_norm: 0.9999996501198151, iteration: 267993
loss: 1.0761617422103882,grad_norm: 0.9999999053021215, iteration: 267994
loss: 1.111748456954956,grad_norm: 0.9999994819508525, iteration: 267995
loss: 1.1274349689483643,grad_norm: 0.9999996433385163, iteration: 267996
loss: 1.1351903676986694,grad_norm: 0.9999998741516349, iteration: 267997
loss: 1.2734090089797974,grad_norm: 0.9999998113008463, iteration: 267998
loss: 1.2014248371124268,grad_norm: 1.0000000248544152, iteration: 267999
loss: 1.0367401838302612,grad_norm: 0.9999992309233804, iteration: 268000
loss: 1.0740509033203125,grad_norm: 0.9999998914562772, iteration: 268001
loss: 0.996151864528656,grad_norm: 0.9120610671511602, iteration: 268002
loss: 1.067844033241272,grad_norm: 0.9999997308996669, iteration: 268003
loss: 1.1487149000167847,grad_norm: 0.9999998352000198, iteration: 268004
loss: 1.0423341989517212,grad_norm: 0.9999996397942379, iteration: 268005
loss: 1.0390747785568237,grad_norm: 0.9847425017350184, iteration: 268006
loss: 1.3157721757888794,grad_norm: 0.9999999888757685, iteration: 268007
loss: 1.0843747854232788,grad_norm: 0.9999992530234637, iteration: 268008
loss: 1.1260566711425781,grad_norm: 0.9999993513197268, iteration: 268009
loss: 1.1601102352142334,grad_norm: 0.9999992915890141, iteration: 268010
loss: 1.172890305519104,grad_norm: 0.999999650676037, iteration: 268011
loss: 1.013094186782837,grad_norm: 0.9999990842350547, iteration: 268012
loss: 1.039442539215088,grad_norm: 0.9999998919532763, iteration: 268013
loss: 1.1564958095550537,grad_norm: 0.9200895242195679, iteration: 268014
loss: 1.1591238975524902,grad_norm: 0.999999388437793, iteration: 268015
loss: 1.0276609659194946,grad_norm: 0.999999979317309, iteration: 268016
loss: 1.0771762132644653,grad_norm: 0.9999991214171009, iteration: 268017
loss: 0.968116283416748,grad_norm: 0.9999992875505345, iteration: 268018
loss: 1.1341882944107056,grad_norm: 0.9999997879891837, iteration: 268019
loss: 1.0382896661758423,grad_norm: 0.9999998578307305, iteration: 268020
loss: 1.065566897392273,grad_norm: 0.9999989824166318, iteration: 268021
loss: 1.000986933708191,grad_norm: 0.9999996205662008, iteration: 268022
loss: 1.079315185546875,grad_norm: 0.9999998689713587, iteration: 268023
loss: 1.081794261932373,grad_norm: 0.9999993284913036, iteration: 268024
loss: 1.1248221397399902,grad_norm: 0.9999992735421821, iteration: 268025
loss: 1.0552020072937012,grad_norm: 0.9999997650281935, iteration: 268026
loss: 1.1194857358932495,grad_norm: 0.9999998811366123, iteration: 268027
loss: 1.128792405128479,grad_norm: 0.999999350282161, iteration: 268028
loss: 1.213194727897644,grad_norm: 0.9999993358622601, iteration: 268029
loss: 1.041805386543274,grad_norm: 0.8802310095972623, iteration: 268030
loss: 1.1255908012390137,grad_norm: 0.9999996608741788, iteration: 268031
loss: 1.031066656112671,grad_norm: 0.9999992429300087, iteration: 268032
loss: 1.1465193033218384,grad_norm: 0.9999990270268881, iteration: 268033
loss: 1.0233240127563477,grad_norm: 0.9999996341091713, iteration: 268034
loss: 1.1624226570129395,grad_norm: 0.9999991674240232, iteration: 268035
loss: 0.9773063063621521,grad_norm: 0.9999991233381894, iteration: 268036
loss: 1.1370607614517212,grad_norm: 0.9999991878108613, iteration: 268037
loss: 1.0775474309921265,grad_norm: 0.9999996930859263, iteration: 268038
loss: 1.024799108505249,grad_norm: 0.9527686274601411, iteration: 268039
loss: 1.0476535558700562,grad_norm: 0.9999996511078291, iteration: 268040
loss: 1.0927942991256714,grad_norm: 0.9999990936475639, iteration: 268041
loss: 1.1261646747589111,grad_norm: 0.9999997348301431, iteration: 268042
loss: 1.084284782409668,grad_norm: 0.9999991608522447, iteration: 268043
loss: 0.9824293255805969,grad_norm: 0.999999759206713, iteration: 268044
loss: 1.0589090585708618,grad_norm: 0.9999993453985144, iteration: 268045
loss: 1.1593244075775146,grad_norm: 0.9999993468339712, iteration: 268046
loss: 1.1159526109695435,grad_norm: 0.9999995278731623, iteration: 268047
loss: 1.105127215385437,grad_norm: 0.9857781886785603, iteration: 268048
loss: 1.1413242816925049,grad_norm: 0.9999991892326097, iteration: 268049
loss: 1.063551425933838,grad_norm: 0.9999999342575723, iteration: 268050
loss: 1.0698504447937012,grad_norm: 0.9999992021026933, iteration: 268051
loss: 1.0840729475021362,grad_norm: 0.9999991251656105, iteration: 268052
loss: 1.0217152833938599,grad_norm: 0.972157451327397, iteration: 268053
loss: 1.1450068950653076,grad_norm: 0.999999609057444, iteration: 268054
loss: 1.0023865699768066,grad_norm: 0.9999993740173478, iteration: 268055
loss: 1.1021188497543335,grad_norm: 0.9999995545132545, iteration: 268056
loss: 1.1842061281204224,grad_norm: 0.9999996142422347, iteration: 268057
loss: 1.007631778717041,grad_norm: 0.8851260715148679, iteration: 268058
loss: 1.005244255065918,grad_norm: 0.9999991073549438, iteration: 268059
loss: 1.0930366516113281,grad_norm: 0.9999992217638626, iteration: 268060
loss: 1.2669771909713745,grad_norm: 0.9999996158315524, iteration: 268061
loss: 1.1763650178909302,grad_norm: 0.9999997235691573, iteration: 268062
loss: 1.0181474685668945,grad_norm: 0.9999991848220078, iteration: 268063
loss: 1.2155120372772217,grad_norm: 0.9999997741915474, iteration: 268064
loss: 1.1437828540802002,grad_norm: 0.9999994849007631, iteration: 268065
loss: 1.014874815940857,grad_norm: 0.9999992185517418, iteration: 268066
loss: 1.2328749895095825,grad_norm: 1.000000097845888, iteration: 268067
loss: 1.0801829099655151,grad_norm: 0.9999997617189662, iteration: 268068
loss: 1.1820335388183594,grad_norm: 0.8689144917000816, iteration: 268069
loss: 1.0829777717590332,grad_norm: 0.999999669413417, iteration: 268070
loss: 1.0594968795776367,grad_norm: 0.9999992811250841, iteration: 268071
loss: 1.1381945610046387,grad_norm: 0.9999992809834637, iteration: 268072
loss: 1.2436782121658325,grad_norm: 0.9999997213039935, iteration: 268073
loss: 1.0848084688186646,grad_norm: 0.8723799670786448, iteration: 268074
loss: 1.1397334337234497,grad_norm: 0.9999996466322546, iteration: 268075
loss: 1.2155052423477173,grad_norm: 0.9999997535048337, iteration: 268076
loss: 0.9917545318603516,grad_norm: 0.9999996013959068, iteration: 268077
loss: 1.162177324295044,grad_norm: 0.999999122131161, iteration: 268078
loss: 1.2038817405700684,grad_norm: 0.9999996957301177, iteration: 268079
loss: 1.1325206756591797,grad_norm: 0.9999995200422933, iteration: 268080
loss: 1.0491141080856323,grad_norm: 0.9491617639946814, iteration: 268081
loss: 1.1467863321304321,grad_norm: 0.9999993673528265, iteration: 268082
loss: 1.0904206037521362,grad_norm: 0.9999991702091424, iteration: 268083
loss: 1.055658221244812,grad_norm: 0.9999992610923673, iteration: 268084
loss: 1.0760419368743896,grad_norm: 0.999999992069257, iteration: 268085
loss: 1.2169384956359863,grad_norm: 0.9999996473454036, iteration: 268086
loss: 1.1047815084457397,grad_norm: 0.9999999734333662, iteration: 268087
loss: 0.9948064684867859,grad_norm: 0.8473978859230469, iteration: 268088
loss: 1.0341449975967407,grad_norm: 0.9999991321546522, iteration: 268089
loss: 1.0337594747543335,grad_norm: 0.9999990073893608, iteration: 268090
loss: 1.01127290725708,grad_norm: 0.8821673975381052, iteration: 268091
loss: 1.0224372148513794,grad_norm: 0.9999996400131236, iteration: 268092
loss: 1.0262478590011597,grad_norm: 0.9999995111764559, iteration: 268093
loss: 1.033597469329834,grad_norm: 0.9999994203716679, iteration: 268094
loss: 1.0738961696624756,grad_norm: 0.9999991474133862, iteration: 268095
loss: 1.037224531173706,grad_norm: 0.9999997797414281, iteration: 268096
loss: 1.0514987707138062,grad_norm: 0.9849975504572962, iteration: 268097
loss: 1.0904344320297241,grad_norm: 0.9850634771505516, iteration: 268098
loss: 1.1864349842071533,grad_norm: 0.9999999171284569, iteration: 268099
loss: 1.1380234956741333,grad_norm: 0.9999999335382979, iteration: 268100
loss: 1.0136051177978516,grad_norm: 0.9934798253374232, iteration: 268101
loss: 1.0114071369171143,grad_norm: 0.9999991337156978, iteration: 268102
loss: 1.027267575263977,grad_norm: 0.9056182912664126, iteration: 268103
loss: 1.1434011459350586,grad_norm: 0.9999998829383783, iteration: 268104
loss: 1.013867974281311,grad_norm: 0.8610655253563326, iteration: 268105
loss: 1.0823875665664673,grad_norm: 0.9999990804412655, iteration: 268106
loss: 1.0182496309280396,grad_norm: 0.9408877541050631, iteration: 268107
loss: 1.0400408506393433,grad_norm: 0.9928282949689522, iteration: 268108
loss: 1.2235774993896484,grad_norm: 0.9999993386414332, iteration: 268109
loss: 1.0916128158569336,grad_norm: 0.9999993210321921, iteration: 268110
loss: 1.1468924283981323,grad_norm: 0.9999998555195061, iteration: 268111
loss: 1.0257618427276611,grad_norm: 0.8732593667829551, iteration: 268112
loss: 1.1410045623779297,grad_norm: 0.9999999635943727, iteration: 268113
loss: 1.165582299232483,grad_norm: 0.9999990375256201, iteration: 268114
loss: 0.9952850341796875,grad_norm: 0.8643708273184638, iteration: 268115
loss: 1.0178771018981934,grad_norm: 0.9999992304118105, iteration: 268116
loss: 1.051637887954712,grad_norm: 0.9999992181881227, iteration: 268117
loss: 0.9834733009338379,grad_norm: 0.9425564103581194, iteration: 268118
loss: 1.0300593376159668,grad_norm: 0.990207121374644, iteration: 268119
loss: 1.028543472290039,grad_norm: 0.9999993222468905, iteration: 268120
loss: 0.9830085635185242,grad_norm: 0.8733545592357036, iteration: 268121
loss: 1.1390825510025024,grad_norm: 0.9999999327354524, iteration: 268122
loss: 1.1307481527328491,grad_norm: 0.9999995649328942, iteration: 268123
loss: 1.0349446535110474,grad_norm: 0.9999996900910323, iteration: 268124
loss: 1.0668728351593018,grad_norm: 0.9248611061203433, iteration: 268125
loss: 1.0084385871887207,grad_norm: 0.8106288116475546, iteration: 268126
loss: 1.0159192085266113,grad_norm: 0.8067846333577451, iteration: 268127
loss: 1.004792332649231,grad_norm: 0.9175042628816731, iteration: 268128
loss: 1.1940205097198486,grad_norm: 0.9999991588740607, iteration: 268129
loss: 1.1270943880081177,grad_norm: 0.9999995564706229, iteration: 268130
loss: 1.1235963106155396,grad_norm: 0.9999992453631408, iteration: 268131
loss: 1.1471177339553833,grad_norm: 0.9999998855259514, iteration: 268132
loss: 1.0235247611999512,grad_norm: 0.8611686828648784, iteration: 268133
loss: 1.2304985523223877,grad_norm: 0.9999994273869306, iteration: 268134
loss: 1.0134122371673584,grad_norm: 0.9999999691140284, iteration: 268135
loss: 1.0696972608566284,grad_norm: 0.9999995241793133, iteration: 268136
loss: 1.1141480207443237,grad_norm: 0.9999995567425922, iteration: 268137
loss: 1.1675411462783813,grad_norm: 0.8959499543979041, iteration: 268138
loss: 1.1727781295776367,grad_norm: 1.0000000092876944, iteration: 268139
loss: 1.0484516620635986,grad_norm: 0.928819989793872, iteration: 268140
loss: 1.1097638607025146,grad_norm: 0.9999990956493254, iteration: 268141
loss: 1.0099297761917114,grad_norm: 0.9999990980694661, iteration: 268142
loss: 1.0256507396697998,grad_norm: 0.9999997691558514, iteration: 268143
loss: 1.0632485151290894,grad_norm: 0.9999997225202774, iteration: 268144
loss: 1.1166117191314697,grad_norm: 0.9999991772657034, iteration: 268145
loss: 1.088869333267212,grad_norm: 0.999999141881347, iteration: 268146
loss: 1.0672262907028198,grad_norm: 0.9999992528165944, iteration: 268147
loss: 1.0338307619094849,grad_norm: 0.9999991690936693, iteration: 268148
loss: 1.0480103492736816,grad_norm: 0.999999183354565, iteration: 268149
loss: 1.0202945470809937,grad_norm: 0.8220609913131822, iteration: 268150
loss: 1.070522427558899,grad_norm: 0.9999991124390252, iteration: 268151
loss: 1.0424131155014038,grad_norm: 0.9999996844615465, iteration: 268152
loss: 1.0047045946121216,grad_norm: 0.9999991177191183, iteration: 268153
loss: 1.102041482925415,grad_norm: 0.9999992018322392, iteration: 268154
loss: 1.0046741962432861,grad_norm: 0.8666276594059085, iteration: 268155
loss: 1.026535987854004,grad_norm: 0.9999990559414013, iteration: 268156
loss: 1.1310545206069946,grad_norm: 0.9999994691138936, iteration: 268157
loss: 1.1471463441848755,grad_norm: 0.9999994598052522, iteration: 268158
loss: 0.9818035960197449,grad_norm: 0.9999992000359196, iteration: 268159
loss: 1.1207622289657593,grad_norm: 0.9999991721411098, iteration: 268160
loss: 1.0342543125152588,grad_norm: 0.9999989923404072, iteration: 268161
loss: 1.0622613430023193,grad_norm: 0.999999304454048, iteration: 268162
loss: 1.0784051418304443,grad_norm: 0.9999997823571867, iteration: 268163
loss: 1.0835851430892944,grad_norm: 0.9324745221105584, iteration: 268164
loss: 1.059641718864441,grad_norm: 0.9999999559820717, iteration: 268165
loss: 1.0520902872085571,grad_norm: 0.9999991366859796, iteration: 268166
loss: 1.0797702074050903,grad_norm: 0.9999992736571167, iteration: 268167
loss: 1.1386579275131226,grad_norm: 0.9999998561849255, iteration: 268168
loss: 1.0359607934951782,grad_norm: 0.9999990816692721, iteration: 268169
loss: 1.0578904151916504,grad_norm: 0.8059648930514086, iteration: 268170
loss: 0.9978482723236084,grad_norm: 0.8466756919453926, iteration: 268171
loss: 1.0393413305282593,grad_norm: 0.9797585418017853, iteration: 268172
loss: 1.0904335975646973,grad_norm: 0.8720875938396063, iteration: 268173
loss: 1.1328060626983643,grad_norm: 1.0000000489616292, iteration: 268174
loss: 1.0027509927749634,grad_norm: 0.8918000983242628, iteration: 268175
loss: 1.0268112421035767,grad_norm: 0.9999992896624897, iteration: 268176
loss: 1.0179883241653442,grad_norm: 0.8911117283288599, iteration: 268177
loss: 1.0845307111740112,grad_norm: 0.9999996213661582, iteration: 268178
loss: 1.0177342891693115,grad_norm: 0.9675091477720589, iteration: 268179
loss: 1.0645277500152588,grad_norm: 0.9999992684749353, iteration: 268180
loss: 1.090753197669983,grad_norm: 0.9999993726465707, iteration: 268181
loss: 1.0383108854293823,grad_norm: 0.9999991740831481, iteration: 268182
loss: 1.006420612335205,grad_norm: 0.9999997491850813, iteration: 268183
loss: 1.07256281375885,grad_norm: 0.7821238058125324, iteration: 268184
loss: 1.0271106958389282,grad_norm: 0.8012600875112486, iteration: 268185
loss: 1.0340062379837036,grad_norm: 0.9430019523114294, iteration: 268186
loss: 1.1317883729934692,grad_norm: 0.9999999680992283, iteration: 268187
loss: 1.0558552742004395,grad_norm: 0.999999974122289, iteration: 268188
loss: 1.0475445985794067,grad_norm: 0.9999994321569429, iteration: 268189
loss: 1.0243632793426514,grad_norm: 0.8477714021851595, iteration: 268190
loss: 1.0600876808166504,grad_norm: 0.9999994140686179, iteration: 268191
loss: 1.0293605327606201,grad_norm: 0.9999992794502045, iteration: 268192
loss: 1.057784914970398,grad_norm: 0.9999994848535254, iteration: 268193
loss: 1.0708492994308472,grad_norm: 0.9999991198664794, iteration: 268194
loss: 1.0267361402511597,grad_norm: 0.8895081097980632, iteration: 268195
loss: 1.01060152053833,grad_norm: 0.7703367949925527, iteration: 268196
loss: 1.0618406534194946,grad_norm: 0.9071515814608152, iteration: 268197
loss: 1.0093834400177002,grad_norm: 0.9907017805554555, iteration: 268198
loss: 0.9909595847129822,grad_norm: 0.9982072808631931, iteration: 268199
loss: 1.0957586765289307,grad_norm: 0.9999999934801367, iteration: 268200
loss: 1.0414979457855225,grad_norm: 0.9999994709650819, iteration: 268201
loss: 1.0515296459197998,grad_norm: 0.9286208895276342, iteration: 268202
loss: 1.0548491477966309,grad_norm: 0.9999996181217676, iteration: 268203
loss: 0.9826816320419312,grad_norm: 0.9999993564694476, iteration: 268204
loss: 0.989816427230835,grad_norm: 0.8409932898297994, iteration: 268205
loss: 1.0565167665481567,grad_norm: 0.9999990605893397, iteration: 268206
loss: 1.087701678276062,grad_norm: 0.855877918758819, iteration: 268207
loss: 1.0608817338943481,grad_norm: 0.9999992667246012, iteration: 268208
loss: 1.074449062347412,grad_norm: 0.9999993837205301, iteration: 268209
loss: 1.022147536277771,grad_norm: 0.9765756557962548, iteration: 268210
loss: 1.019761323928833,grad_norm: 0.9298029087227728, iteration: 268211
loss: 1.0623540878295898,grad_norm: 0.9999991670378878, iteration: 268212
loss: 1.0160142183303833,grad_norm: 0.7808679202723291, iteration: 268213
loss: 1.088462471961975,grad_norm: 0.999999677553347, iteration: 268214
loss: 1.0220648050308228,grad_norm: 0.9999997535864524, iteration: 268215
loss: 1.0227181911468506,grad_norm: 0.9999991503543177, iteration: 268216
loss: 1.0652471780776978,grad_norm: 0.9999990658612625, iteration: 268217
loss: 1.0465208292007446,grad_norm: 0.9404698328672457, iteration: 268218
loss: 1.0164340734481812,grad_norm: 0.9210355120737264, iteration: 268219
loss: 1.0804109573364258,grad_norm: 0.8270177629141879, iteration: 268220
loss: 1.0123988389968872,grad_norm: 0.9999998233236478, iteration: 268221
loss: 0.9971503615379333,grad_norm: 0.999999660692401, iteration: 268222
loss: 1.0160229206085205,grad_norm: 0.9999991156853948, iteration: 268223
loss: 1.1033499240875244,grad_norm: 1.0000000438153553, iteration: 268224
loss: 1.0140546560287476,grad_norm: 0.785647625866729, iteration: 268225
loss: 1.1043556928634644,grad_norm: 0.9999991756661261, iteration: 268226
loss: 1.102463722229004,grad_norm: 0.9058161356478536, iteration: 268227
loss: 0.9872663617134094,grad_norm: 0.9609085131360613, iteration: 268228
loss: 1.0533664226531982,grad_norm: 0.9999993044348935, iteration: 268229
loss: 1.087009072303772,grad_norm: 0.9999999257984767, iteration: 268230
loss: 1.0641833543777466,grad_norm: 0.8732665731553927, iteration: 268231
loss: 0.9966875314712524,grad_norm: 0.8447052466068011, iteration: 268232
loss: 1.1362743377685547,grad_norm: 0.9999994454102314, iteration: 268233
loss: 1.104265809059143,grad_norm: 0.9999990078317783, iteration: 268234
loss: 1.0480387210845947,grad_norm: 0.9999991106371539, iteration: 268235
loss: 1.0717235803604126,grad_norm: 0.9999990957401405, iteration: 268236
loss: 0.9764537811279297,grad_norm: 0.8598448786227184, iteration: 268237
loss: 1.0844346284866333,grad_norm: 0.9999994240117409, iteration: 268238
loss: 1.032833218574524,grad_norm: 0.9999999382229231, iteration: 268239
loss: 1.0140153169631958,grad_norm: 0.6940498401723394, iteration: 268240
loss: 1.1617134809494019,grad_norm: 0.9999990222954738, iteration: 268241
loss: 1.0024198293685913,grad_norm: 0.6893038798813017, iteration: 268242
loss: 1.015754222869873,grad_norm: 0.9999996789763593, iteration: 268243
loss: 1.1641727685928345,grad_norm: 0.9999996238475354, iteration: 268244
loss: 1.0323408842086792,grad_norm: 0.759701384352497, iteration: 268245
loss: 1.080106258392334,grad_norm: 0.9999994773603794, iteration: 268246
loss: 1.0376547574996948,grad_norm: 0.9999992157406044, iteration: 268247
loss: 0.9733514189720154,grad_norm: 0.9999992741225459, iteration: 268248
loss: 1.008163571357727,grad_norm: 0.8159680042273872, iteration: 268249
loss: 1.052842617034912,grad_norm: 0.9999990470906144, iteration: 268250
loss: 1.0404707193374634,grad_norm: 0.9999992866139795, iteration: 268251
loss: 1.4408565759658813,grad_norm: 0.9999999636953685, iteration: 268252
loss: 1.0840458869934082,grad_norm: 0.9999999092606185, iteration: 268253
loss: 1.017031192779541,grad_norm: 0.9999999808814505, iteration: 268254
loss: 1.1200568675994873,grad_norm: 0.9999996773614455, iteration: 268255
loss: 1.060661792755127,grad_norm: 0.9999990568223421, iteration: 268256
loss: 0.9596882462501526,grad_norm: 0.9999994192353675, iteration: 268257
loss: 1.2245838642120361,grad_norm: 0.9999994235234898, iteration: 268258
loss: 1.0575467348098755,grad_norm: 0.9999994102252256, iteration: 268259
loss: 1.0739248991012573,grad_norm: 1.0000001013492776, iteration: 268260
loss: 1.101187825202942,grad_norm: 0.999999886541905, iteration: 268261
loss: 1.0746338367462158,grad_norm: 0.899841622563266, iteration: 268262
loss: 0.9723371267318726,grad_norm: 0.9999992279080437, iteration: 268263
loss: 1.056688904762268,grad_norm: 0.9183771024210354, iteration: 268264
loss: 1.0484464168548584,grad_norm: 0.8674039049848014, iteration: 268265
loss: 1.0738756656646729,grad_norm: 0.9119324786495473, iteration: 268266
loss: 1.0387630462646484,grad_norm: 0.9999996674538824, iteration: 268267
loss: 1.1563364267349243,grad_norm: 0.9999994904937518, iteration: 268268
loss: 1.0668381452560425,grad_norm: 0.9999999270621562, iteration: 268269
loss: 1.0887290239334106,grad_norm: 0.9999992892134554, iteration: 268270
loss: 1.0062052011489868,grad_norm: 0.8789706984484871, iteration: 268271
loss: 1.218969702720642,grad_norm: 0.9999999274948563, iteration: 268272
loss: 1.1115249395370483,grad_norm: 0.9722551380668198, iteration: 268273
loss: 1.0819414854049683,grad_norm: 0.8718368112836943, iteration: 268274
loss: 1.1053640842437744,grad_norm: 0.9999992668463837, iteration: 268275
loss: 1.0848671197891235,grad_norm: 0.9999993265545709, iteration: 268276
loss: 1.063257098197937,grad_norm: 0.9999998157186295, iteration: 268277
loss: 1.0393825769424438,grad_norm: 0.8564759106906633, iteration: 268278
loss: 1.0080214738845825,grad_norm: 0.6791238278934492, iteration: 268279
loss: 1.0532386302947998,grad_norm: 0.9667321679733429, iteration: 268280
loss: 0.9772084355354309,grad_norm: 0.9570665058421725, iteration: 268281
loss: 1.023597240447998,grad_norm: 0.9999997230144254, iteration: 268282
loss: 1.0422972440719604,grad_norm: 0.9285105307940519, iteration: 268283
loss: 1.0182210206985474,grad_norm: 0.926157779476464, iteration: 268284
loss: 1.0503467321395874,grad_norm: 0.8901224137691832, iteration: 268285
loss: 1.0380817651748657,grad_norm: 0.8288343782637092, iteration: 268286
loss: 1.0963257551193237,grad_norm: 0.9999999443023898, iteration: 268287
loss: 1.0200018882751465,grad_norm: 0.9359251636922173, iteration: 268288
loss: 1.034440517425537,grad_norm: 0.7559097948758176, iteration: 268289
loss: 1.0039174556732178,grad_norm: 0.8841441840601602, iteration: 268290
loss: 1.0368876457214355,grad_norm: 0.999999240360989, iteration: 268291
loss: 0.990456759929657,grad_norm: 0.9354805238455753, iteration: 268292
loss: 1.0274431705474854,grad_norm: 0.8712399661029643, iteration: 268293
loss: 1.244010329246521,grad_norm: 0.9999991475357058, iteration: 268294
loss: 0.974150538444519,grad_norm: 0.8463370750613373, iteration: 268295
loss: 1.0536352396011353,grad_norm: 0.843912469584955, iteration: 268296
loss: 0.9847623705863953,grad_norm: 0.9999991167678419, iteration: 268297
loss: 1.154502511024475,grad_norm: 0.9999990273601383, iteration: 268298
loss: 1.0832115411758423,grad_norm: 0.9999992800493955, iteration: 268299
loss: 1.0433869361877441,grad_norm: 0.9999992065571964, iteration: 268300
loss: 1.016912817955017,grad_norm: 0.9075936057136024, iteration: 268301
loss: 1.009697675704956,grad_norm: 0.9966691584085767, iteration: 268302
loss: 1.02610182762146,grad_norm: 0.9999996517140183, iteration: 268303
loss: 1.0684962272644043,grad_norm: 0.9999991723882148, iteration: 268304
loss: 1.0541743040084839,grad_norm: 0.9999998921519881, iteration: 268305
loss: 1.1480929851531982,grad_norm: 0.9999995234150473, iteration: 268306
loss: 1.0396586656570435,grad_norm: 0.9179988428002553, iteration: 268307
loss: 0.9741283655166626,grad_norm: 0.8625995092146104, iteration: 268308
loss: 1.0243617296218872,grad_norm: 0.9999998623505983, iteration: 268309
loss: 0.9865991473197937,grad_norm: 0.7961759127694903, iteration: 268310
loss: 0.9729633927345276,grad_norm: 0.8559853445320132, iteration: 268311
loss: 1.0301727056503296,grad_norm: 0.9999996834792343, iteration: 268312
loss: 1.0803011655807495,grad_norm: 0.9999991817613749, iteration: 268313
loss: 1.1035923957824707,grad_norm: 1.0000000479360316, iteration: 268314
loss: 1.0535434484481812,grad_norm: 0.9999995187071608, iteration: 268315
loss: 1.0694063901901245,grad_norm: 0.9966465614080902, iteration: 268316
loss: 1.0775619745254517,grad_norm: 0.9999997844665892, iteration: 268317
loss: 1.022559404373169,grad_norm: 0.9571556485860002, iteration: 268318
loss: 1.0226709842681885,grad_norm: 0.9999993894984867, iteration: 268319
loss: 1.039463758468628,grad_norm: 0.999999310680711, iteration: 268320
loss: 1.1200661659240723,grad_norm: 0.9999993834283655, iteration: 268321
loss: 0.9971565008163452,grad_norm: 0.9999993886831996, iteration: 268322
loss: 1.0924978256225586,grad_norm: 0.8466357056055884, iteration: 268323
loss: 0.9885419011116028,grad_norm: 0.880501363386053, iteration: 268324
loss: 1.0062130689620972,grad_norm: 0.9999990666414766, iteration: 268325
loss: 0.9727175831794739,grad_norm: 0.7946755738105621, iteration: 268326
loss: 0.993743896484375,grad_norm: 0.8648105486778479, iteration: 268327
loss: 1.0225346088409424,grad_norm: 0.9999991155449062, iteration: 268328
loss: 0.988377571105957,grad_norm: 0.9999990390408894, iteration: 268329
loss: 0.9976678490638733,grad_norm: 0.7915068184880713, iteration: 268330
loss: 1.1692899465560913,grad_norm: 0.9999994630520759, iteration: 268331
loss: 0.9881420135498047,grad_norm: 0.938608670291354, iteration: 268332
loss: 0.9950710535049438,grad_norm: 0.8216080144708745, iteration: 268333
loss: 1.0121186971664429,grad_norm: 0.9040468678025557, iteration: 268334
loss: 1.0835261344909668,grad_norm: 0.999999249300744, iteration: 268335
loss: 1.001742959022522,grad_norm: 0.9818029715143115, iteration: 268336
loss: 1.000894546508789,grad_norm: 0.9999991558978077, iteration: 268337
loss: 1.0233145952224731,grad_norm: 0.9999992951962027, iteration: 268338
loss: 1.083025336265564,grad_norm: 0.999999106769448, iteration: 268339
loss: 1.0087134838104248,grad_norm: 0.8050343064490509, iteration: 268340
loss: 0.9897409081459045,grad_norm: 0.800055472801057, iteration: 268341
loss: 1.0824109315872192,grad_norm: 0.9999996271031257, iteration: 268342
loss: 1.0339683294296265,grad_norm: 0.9999993362013143, iteration: 268343
loss: 1.0152127742767334,grad_norm: 0.818631225608253, iteration: 268344
loss: 1.0022484064102173,grad_norm: 0.8345742633290519, iteration: 268345
loss: 1.0679163932800293,grad_norm: 0.8533829032322837, iteration: 268346
loss: 1.1085338592529297,grad_norm: 0.999999074488177, iteration: 268347
loss: 1.0389198064804077,grad_norm: 0.8092292285509409, iteration: 268348
loss: 1.0205923318862915,grad_norm: 0.9999989751349043, iteration: 268349
loss: 1.0096553564071655,grad_norm: 0.9999989948121201, iteration: 268350
loss: 1.0439554452896118,grad_norm: 0.9999990595014959, iteration: 268351
loss: 1.077399730682373,grad_norm: 0.9999991264099022, iteration: 268352
loss: 1.0094125270843506,grad_norm: 0.9564421270059359, iteration: 268353
loss: 1.065899133682251,grad_norm: 0.9870515882276077, iteration: 268354
loss: 1.0082426071166992,grad_norm: 0.9699870244935328, iteration: 268355
loss: 1.0672537088394165,grad_norm: 0.9999990529152298, iteration: 268356
loss: 1.0426157712936401,grad_norm: 0.9999992111622908, iteration: 268357
loss: 1.0146734714508057,grad_norm: 0.8366420740918554, iteration: 268358
loss: 1.099543571472168,grad_norm: 0.9999994923312231, iteration: 268359
loss: 1.1243168115615845,grad_norm: 0.999999767904331, iteration: 268360
loss: 1.0467082262039185,grad_norm: 0.733784658161635, iteration: 268361
loss: 1.036860466003418,grad_norm: 0.842821211795447, iteration: 268362
loss: 1.0640459060668945,grad_norm: 0.9999990986248353, iteration: 268363
loss: 0.9905859231948853,grad_norm: 0.8182524457097926, iteration: 268364
loss: 1.0168753862380981,grad_norm: 0.8910261668419522, iteration: 268365
loss: 0.9784548282623291,grad_norm: 0.7857794950404583, iteration: 268366
loss: 1.0726795196533203,grad_norm: 0.8514828439771137, iteration: 268367
loss: 1.0714396238327026,grad_norm: 0.9999993023185669, iteration: 268368
loss: 1.0140858888626099,grad_norm: 0.9999991762967203, iteration: 268369
loss: 1.0241904258728027,grad_norm: 0.8322424158693571, iteration: 268370
loss: 1.0307122468948364,grad_norm: 0.9099587059392779, iteration: 268371
loss: 0.9510700106620789,grad_norm: 0.9999992070746451, iteration: 268372
loss: 1.015778660774231,grad_norm: 0.9999990942229925, iteration: 268373
loss: 1.0888679027557373,grad_norm: 0.9999990854141105, iteration: 268374
loss: 1.0667530298233032,grad_norm: 0.8849411454880972, iteration: 268375
loss: 1.1094601154327393,grad_norm: 0.9999995104832088, iteration: 268376
loss: 0.9871311187744141,grad_norm: 0.9110513513002719, iteration: 268377
loss: 1.026253581047058,grad_norm: 0.8733164296950122, iteration: 268378
loss: 1.0457220077514648,grad_norm: 0.9999992110472772, iteration: 268379
loss: 1.0331698656082153,grad_norm: 0.9165394896502448, iteration: 268380
loss: 1.0104992389678955,grad_norm: 0.9094200243379859, iteration: 268381
loss: 1.1966631412506104,grad_norm: 0.9987271847845902, iteration: 268382
loss: 1.0385234355926514,grad_norm: 0.8349099106894724, iteration: 268383
loss: 1.0759745836257935,grad_norm: 0.9999990810249549, iteration: 268384
loss: 1.0254698991775513,grad_norm: 0.9999991891877691, iteration: 268385
loss: 1.0963231325149536,grad_norm: 0.9999994587914642, iteration: 268386
loss: 0.9927178025245667,grad_norm: 0.8178945606549866, iteration: 268387
loss: 0.9820412397384644,grad_norm: 0.9428886372093659, iteration: 268388
loss: 1.064699649810791,grad_norm: 0.9511118295584629, iteration: 268389
loss: 1.0174975395202637,grad_norm: 0.9102486911480718, iteration: 268390
loss: 1.1188607215881348,grad_norm: 0.8237940777668046, iteration: 268391
loss: 1.038956642150879,grad_norm: 0.9999991758941086, iteration: 268392
loss: 1.0891367197036743,grad_norm: 0.9999991335226417, iteration: 268393
loss: 1.0532327890396118,grad_norm: 0.999999775257512, iteration: 268394
loss: 1.0763349533081055,grad_norm: 0.9999989667285099, iteration: 268395
loss: 0.9871214032173157,grad_norm: 0.8205846879675003, iteration: 268396
loss: 1.015074610710144,grad_norm: 0.9999993211949466, iteration: 268397
loss: 0.9791061282157898,grad_norm: 0.8796239354967144, iteration: 268398
loss: 1.0193451642990112,grad_norm: 0.9771955548173289, iteration: 268399
loss: 0.9963175654411316,grad_norm: 0.9999989395817674, iteration: 268400
loss: 1.109570860862732,grad_norm: 0.9999993372137747, iteration: 268401
loss: 1.0437932014465332,grad_norm: 0.9999992028624053, iteration: 268402
loss: 1.0187660455703735,grad_norm: 0.9068346652542664, iteration: 268403
loss: 1.026091456413269,grad_norm: 0.9397677825381198, iteration: 268404
loss: 1.0862971544265747,grad_norm: 0.9999992277165198, iteration: 268405
loss: 1.0035269260406494,grad_norm: 0.9692000531197503, iteration: 268406
loss: 1.011164903640747,grad_norm: 0.8272862961998338, iteration: 268407
loss: 1.1435365676879883,grad_norm: 0.9349172486183981, iteration: 268408
loss: 0.9532005190849304,grad_norm: 0.9362927924971524, iteration: 268409
loss: 1.078255295753479,grad_norm: 0.9935687491322275, iteration: 268410
loss: 1.040080189704895,grad_norm: 0.8127482319886933, iteration: 268411
loss: 1.094278335571289,grad_norm: 0.9832378656500741, iteration: 268412
loss: 1.1101819276809692,grad_norm: 0.9999997865263862, iteration: 268413
loss: 1.0803056955337524,grad_norm: 0.99999919334787, iteration: 268414
loss: 1.2065702676773071,grad_norm: 1.0000000080972584, iteration: 268415
loss: 1.0817219018936157,grad_norm: 0.9154876491720685, iteration: 268416
loss: 1.0026086568832397,grad_norm: 0.9999990913619626, iteration: 268417
loss: 1.0818172693252563,grad_norm: 0.9543035429239772, iteration: 268418
loss: 0.9642556309700012,grad_norm: 0.9839728992090342, iteration: 268419
loss: 1.1802282333374023,grad_norm: 0.9999992451546631, iteration: 268420
loss: 0.9809901118278503,grad_norm: 0.9057943367585042, iteration: 268421
loss: 1.0254064798355103,grad_norm: 0.7800911880649327, iteration: 268422
loss: 1.0020661354064941,grad_norm: 0.9999991001474413, iteration: 268423
loss: 1.0067956447601318,grad_norm: 0.8953174129668593, iteration: 268424
loss: 1.046916127204895,grad_norm: 0.7840994835300412, iteration: 268425
loss: 1.6389559507369995,grad_norm: 0.9999996058665925, iteration: 268426
loss: 1.030688762664795,grad_norm: 0.9395980162242948, iteration: 268427
loss: 1.0371779203414917,grad_norm: 0.7907289225625967, iteration: 268428
loss: 1.110729694366455,grad_norm: 0.9999991056830998, iteration: 268429
loss: 0.9997203350067139,grad_norm: 0.9325958384733896, iteration: 268430
loss: 1.0337634086608887,grad_norm: 0.9208385699084551, iteration: 268431
loss: 0.9942466616630554,grad_norm: 0.8446110599198537, iteration: 268432
loss: 1.0651377439498901,grad_norm: 0.9999998920764345, iteration: 268433
loss: 1.0094226598739624,grad_norm: 1.0000000028919918, iteration: 268434
loss: 1.0387401580810547,grad_norm: 0.9999995111137557, iteration: 268435
loss: 1.0555694103240967,grad_norm: 0.9999991257810213, iteration: 268436
loss: 1.0418325662612915,grad_norm: 0.8242017266653759, iteration: 268437
loss: 1.0121495723724365,grad_norm: 0.8252083300248638, iteration: 268438
loss: 1.1084553003311157,grad_norm: 0.9064493311430599, iteration: 268439
loss: 1.0637394189834595,grad_norm: 0.9999998068299816, iteration: 268440
loss: 1.0035213232040405,grad_norm: 0.999999199631915, iteration: 268441
loss: 1.0939282178878784,grad_norm: 0.9999991684266802, iteration: 268442
loss: 1.2260169982910156,grad_norm: 0.9999999384540434, iteration: 268443
loss: 1.1267343759536743,grad_norm: 0.9999996108332229, iteration: 268444
loss: 1.1232013702392578,grad_norm: 0.9999995008762821, iteration: 268445
loss: 1.09565269947052,grad_norm: 0.800478299092933, iteration: 268446
loss: 1.0340334177017212,grad_norm: 0.8628079586914744, iteration: 268447
loss: 0.9952760338783264,grad_norm: 0.9954556331893957, iteration: 268448
loss: 0.9901485443115234,grad_norm: 0.9103709024777304, iteration: 268449
loss: 1.017544150352478,grad_norm: 0.9999995352321615, iteration: 268450
loss: 1.0305577516555786,grad_norm: 0.9999991002680781, iteration: 268451
loss: 1.0188003778457642,grad_norm: 0.8155268262378279, iteration: 268452
loss: 1.0353620052337646,grad_norm: 0.8782724687397236, iteration: 268453
loss: 1.0518991947174072,grad_norm: 0.9999992524641454, iteration: 268454
loss: 1.0248384475708008,grad_norm: 0.8718971170687951, iteration: 268455
loss: 0.9952439069747925,grad_norm: 0.9315619939760758, iteration: 268456
loss: 1.060506820678711,grad_norm: 0.9999999006759206, iteration: 268457
loss: 1.3513875007629395,grad_norm: 0.999999867534309, iteration: 268458
loss: 1.1260128021240234,grad_norm: 0.9999996270169217, iteration: 268459
loss: 1.0335065126419067,grad_norm: 0.9177728517236061, iteration: 268460
loss: 1.0212191343307495,grad_norm: 0.9999991594903863, iteration: 268461
loss: 1.0135741233825684,grad_norm: 0.793464226421414, iteration: 268462
loss: 1.0491341352462769,grad_norm: 0.9017571094560032, iteration: 268463
loss: 1.116932988166809,grad_norm: 0.9999993592177389, iteration: 268464
loss: 1.104429006576538,grad_norm: 1.000000057125694, iteration: 268465
loss: 1.0573396682739258,grad_norm: 0.9999992098324191, iteration: 268466
loss: 1.2252202033996582,grad_norm: 0.9999995270715172, iteration: 268467
loss: 1.236891508102417,grad_norm: 0.999999964128837, iteration: 268468
loss: 1.155836582183838,grad_norm: 0.9999999136504747, iteration: 268469
loss: 0.9827851057052612,grad_norm: 0.9999993177650759, iteration: 268470
loss: 1.0148413181304932,grad_norm: 0.7670990801036139, iteration: 268471
loss: 0.9972993731498718,grad_norm: 0.9999992299816751, iteration: 268472
loss: 1.0566341876983643,grad_norm: 0.9249260971811959, iteration: 268473
loss: 1.1018263101577759,grad_norm: 0.9999992946919503, iteration: 268474
loss: 1.031180739402771,grad_norm: 0.9282764728501982, iteration: 268475
loss: 1.0625450611114502,grad_norm: 0.7519089073441693, iteration: 268476
loss: 1.1440430879592896,grad_norm: 0.9999992765029075, iteration: 268477
loss: 1.0188963413238525,grad_norm: 0.9394404819531931, iteration: 268478
loss: 1.011168122291565,grad_norm: 0.9360940357527774, iteration: 268479
loss: 1.0835461616516113,grad_norm: 0.8600981650172683, iteration: 268480
loss: 1.0329362154006958,grad_norm: 0.9999991304323849, iteration: 268481
loss: 1.0017738342285156,grad_norm: 0.8920414512685213, iteration: 268482
loss: 1.0341893434524536,grad_norm: 0.999999798322427, iteration: 268483
loss: 1.1473500728607178,grad_norm: 0.9999993781490089, iteration: 268484
loss: 1.1231281757354736,grad_norm: 0.9999995360981307, iteration: 268485
loss: 1.0536088943481445,grad_norm: 0.9999998869932583, iteration: 268486
loss: 1.0579952001571655,grad_norm: 0.9999991140627956, iteration: 268487
loss: 1.0121403932571411,grad_norm: 0.7735987566285639, iteration: 268488
loss: 1.0809826850891113,grad_norm: 0.9130216692765951, iteration: 268489
loss: 1.025327205657959,grad_norm: 0.9999990763415749, iteration: 268490
loss: 1.0769973993301392,grad_norm: 0.9999991030717705, iteration: 268491
loss: 1.0882521867752075,grad_norm: 0.9999991332663657, iteration: 268492
loss: 0.994900107383728,grad_norm: 0.9986391859701675, iteration: 268493
loss: 1.0928095579147339,grad_norm: 0.9999991447451316, iteration: 268494
loss: 1.0269416570663452,grad_norm: 0.9999997480395416, iteration: 268495
loss: 1.0081450939178467,grad_norm: 0.9065013198174785, iteration: 268496
loss: 0.9862036108970642,grad_norm: 0.9999994734835711, iteration: 268497
loss: 1.0358515977859497,grad_norm: 0.7960176411059461, iteration: 268498
loss: 1.0685014724731445,grad_norm: 0.8283603961952488, iteration: 268499
loss: 1.0136830806732178,grad_norm: 0.9629061964573651, iteration: 268500
loss: 1.0492916107177734,grad_norm: 0.9999993715381305, iteration: 268501
loss: 1.001673936843872,grad_norm: 0.9999991237422536, iteration: 268502
loss: 1.0852049589157104,grad_norm: 0.9999992058519501, iteration: 268503
loss: 0.9979228973388672,grad_norm: 0.8607578888291053, iteration: 268504
loss: 1.0074903964996338,grad_norm: 0.7831887825240201, iteration: 268505
loss: 1.006949782371521,grad_norm: 0.9999992014800765, iteration: 268506
loss: 1.0526968240737915,grad_norm: 0.910748710316652, iteration: 268507
loss: 1.0883501768112183,grad_norm: 0.9999997648392795, iteration: 268508
loss: 1.0054545402526855,grad_norm: 0.9436985521013325, iteration: 268509
loss: 1.0151740312576294,grad_norm: 0.9558070767105566, iteration: 268510
loss: 1.0265628099441528,grad_norm: 0.9122084124814026, iteration: 268511
loss: 0.9738047122955322,grad_norm: 0.6663538891125738, iteration: 268512
loss: 0.9882833957672119,grad_norm: 0.9999993117649212, iteration: 268513
loss: 1.0041128396987915,grad_norm: 0.9305223498383985, iteration: 268514
loss: 1.017964243888855,grad_norm: 0.905313602669389, iteration: 268515
loss: 1.0913052558898926,grad_norm: 0.9999996769226942, iteration: 268516
loss: 1.0208181142807007,grad_norm: 0.9999990601574095, iteration: 268517
loss: 0.9961426258087158,grad_norm: 0.8412640461677322, iteration: 268518
loss: 1.056860327720642,grad_norm: 0.87330009253155, iteration: 268519
loss: 1.0424494743347168,grad_norm: 0.9999994744299447, iteration: 268520
loss: 0.9729834794998169,grad_norm: 0.7849559821888364, iteration: 268521
loss: 1.0307062864303589,grad_norm: 0.7844777609421312, iteration: 268522
loss: 1.1683884859085083,grad_norm: 0.9593889646228013, iteration: 268523
loss: 1.2221235036849976,grad_norm: 0.9999999203914373, iteration: 268524
loss: 1.0829981565475464,grad_norm: 0.9999994430783978, iteration: 268525
loss: 1.0061225891113281,grad_norm: 0.8639715581093342, iteration: 268526
loss: 1.104311466217041,grad_norm: 0.9972879918356504, iteration: 268527
loss: 1.0280375480651855,grad_norm: 0.9999999607877796, iteration: 268528
loss: 1.0868433713912964,grad_norm: 0.9999992014700482, iteration: 268529
loss: 0.9931680560112,grad_norm: 0.7225607387994226, iteration: 268530
loss: 1.0177700519561768,grad_norm: 0.9999995000830972, iteration: 268531
loss: 1.0218865871429443,grad_norm: 0.9999999305579367, iteration: 268532
loss: 1.095259189605713,grad_norm: 0.9999995549551545, iteration: 268533
loss: 1.0187113285064697,grad_norm: 0.9999992191746024, iteration: 268534
loss: 1.0195858478546143,grad_norm: 0.7990563579621539, iteration: 268535
loss: 1.0274134874343872,grad_norm: 0.9999998144348113, iteration: 268536
loss: 1.0931065082550049,grad_norm: 0.9999995045044933, iteration: 268537
loss: 0.9524856209754944,grad_norm: 0.8311780762558605, iteration: 268538
loss: 1.0065176486968994,grad_norm: 0.9269559834803209, iteration: 268539
loss: 1.0161309242248535,grad_norm: 0.9548143034093133, iteration: 268540
loss: 1.0152291059494019,grad_norm: 0.8817378717276497, iteration: 268541
loss: 1.3151267766952515,grad_norm: 0.9999997112712129, iteration: 268542
loss: 1.0059616565704346,grad_norm: 0.8556283828157053, iteration: 268543
loss: 1.0045839548110962,grad_norm: 0.9999992023776182, iteration: 268544
loss: 0.9858353734016418,grad_norm: 0.9999990055928231, iteration: 268545
loss: 0.9804763793945312,grad_norm: 0.88367473188792, iteration: 268546
loss: 1.0662189722061157,grad_norm: 0.9999996391631305, iteration: 268547
loss: 1.0320048332214355,grad_norm: 0.8344219764678867, iteration: 268548
loss: 1.0490213632583618,grad_norm: 0.9653352236783578, iteration: 268549
loss: 0.9721498489379883,grad_norm: 0.9878884104395008, iteration: 268550
loss: 1.0042139291763306,grad_norm: 0.9999991351808148, iteration: 268551
loss: 1.1015839576721191,grad_norm: 0.9371603379813646, iteration: 268552
loss: 0.9535611867904663,grad_norm: 0.8474321222827826, iteration: 268553
loss: 1.0628291368484497,grad_norm: 0.9999992609558418, iteration: 268554
loss: 1.0559977293014526,grad_norm: 0.9999995790307137, iteration: 268555
loss: 0.9982160329818726,grad_norm: 0.9863453267052157, iteration: 268556
loss: 1.0148199796676636,grad_norm: 0.8012879465103473, iteration: 268557
loss: 1.069735050201416,grad_norm: 0.9999998731017323, iteration: 268558
loss: 1.0155237913131714,grad_norm: 0.9999990739018649, iteration: 268559
loss: 1.152645230293274,grad_norm: 0.916419048100012, iteration: 268560
loss: 1.0263304710388184,grad_norm: 0.9999992142168108, iteration: 268561
loss: 1.040917158126831,grad_norm: 0.9999998007731306, iteration: 268562
loss: 0.9666849970817566,grad_norm: 0.9160470526118898, iteration: 268563
loss: 1.0582557916641235,grad_norm: 0.8454369398969165, iteration: 268564
loss: 1.136514663696289,grad_norm: 0.9999998325246523, iteration: 268565
loss: 1.6311482191085815,grad_norm: 0.99999958698826, iteration: 268566
loss: 0.9708386063575745,grad_norm: 0.8729002545503117, iteration: 268567
loss: 0.9894059896469116,grad_norm: 0.8464568090437983, iteration: 268568
loss: 1.1319708824157715,grad_norm: 0.9999992909056039, iteration: 268569
loss: 1.0149750709533691,grad_norm: 0.9073074968878362, iteration: 268570
loss: 1.0026819705963135,grad_norm: 0.9999990612965491, iteration: 268571
loss: 1.0143554210662842,grad_norm: 0.9827490723378756, iteration: 268572
loss: 0.9950025081634521,grad_norm: 0.7907602931599678, iteration: 268573
loss: 1.338746190071106,grad_norm: 0.9999998161816667, iteration: 268574
loss: 1.013891577720642,grad_norm: 0.9347228495165357, iteration: 268575
loss: 1.07314133644104,grad_norm: 0.9999990713317579, iteration: 268576
loss: 1.0151984691619873,grad_norm: 0.8750598606860179, iteration: 268577
loss: 1.083784818649292,grad_norm: 0.999999307469384, iteration: 268578
loss: 0.980532169342041,grad_norm: 0.933297315538181, iteration: 268579
loss: 0.9696425795555115,grad_norm: 0.9999995648908115, iteration: 268580
loss: 1.0103737115859985,grad_norm: 0.9999991628618111, iteration: 268581
loss: 1.019483208656311,grad_norm: 0.8099024807770764, iteration: 268582
loss: 0.9865899085998535,grad_norm: 0.7945240328763078, iteration: 268583
loss: 1.0370539426803589,grad_norm: 0.9023294728716106, iteration: 268584
loss: 1.0495259761810303,grad_norm: 0.9999991928994035, iteration: 268585
loss: 0.9913637042045593,grad_norm: 0.7922401493930599, iteration: 268586
loss: 1.0225505828857422,grad_norm: 0.8335476322736618, iteration: 268587
loss: 1.0904499292373657,grad_norm: 0.9999997390174435, iteration: 268588
loss: 0.9617099761962891,grad_norm: 0.7428065723470817, iteration: 268589
loss: 1.087591290473938,grad_norm: 0.8583361949560192, iteration: 268590
loss: 1.01731538772583,grad_norm: 0.9829146169950863, iteration: 268591
loss: 0.9875465035438538,grad_norm: 0.9197634487227646, iteration: 268592
loss: 0.9668841361999512,grad_norm: 0.8466459610640525, iteration: 268593
loss: 1.0400015115737915,grad_norm: 0.9999991995058537, iteration: 268594
loss: 0.9788740277290344,grad_norm: 0.8303938258546181, iteration: 268595
loss: 1.0145788192749023,grad_norm: 0.9761257760448531, iteration: 268596
loss: 0.9849700331687927,grad_norm: 0.9999994625267445, iteration: 268597
loss: 1.0328636169433594,grad_norm: 0.8888786305454044, iteration: 268598
loss: 1.0076303482055664,grad_norm: 0.9999993145982865, iteration: 268599
loss: 1.0042688846588135,grad_norm: 0.8705193646496137, iteration: 268600
loss: 1.0826553106307983,grad_norm: 0.798719610722968, iteration: 268601
loss: 0.9837513566017151,grad_norm: 0.9988046061843435, iteration: 268602
loss: 1.0444939136505127,grad_norm: 0.9999992630125899, iteration: 268603
loss: 1.0103256702423096,grad_norm: 0.9493902131439043, iteration: 268604
loss: 1.038558840751648,grad_norm: 0.9999998373083787, iteration: 268605
loss: 1.0446233749389648,grad_norm: 0.8750706003986802, iteration: 268606
loss: 1.0152684450149536,grad_norm: 0.8140510997503003, iteration: 268607
loss: 1.0769137144088745,grad_norm: 0.9999996160422376, iteration: 268608
loss: 1.05534029006958,grad_norm: 0.99460623654105, iteration: 268609
loss: 1.201122760772705,grad_norm: 0.9999999669485857, iteration: 268610
loss: 0.9774031043052673,grad_norm: 0.8866032754719155, iteration: 268611
loss: 0.9381945729255676,grad_norm: 0.9127252891648402, iteration: 268612
loss: 0.9778442978858948,grad_norm: 0.9089056330769418, iteration: 268613
loss: 1.047049641609192,grad_norm: 0.8751865136918305, iteration: 268614
loss: 0.9914279580116272,grad_norm: 0.9928023674891486, iteration: 268615
loss: 0.9891668558120728,grad_norm: 0.8344198728169174, iteration: 268616
loss: 0.9847797751426697,grad_norm: 0.8208708307012733, iteration: 268617
loss: 1.0322871208190918,grad_norm: 0.9999994003655188, iteration: 268618
loss: 0.9987056255340576,grad_norm: 0.9999990270319915, iteration: 268619
loss: 1.022173285484314,grad_norm: 0.9999991338921108, iteration: 268620
loss: 1.0233793258666992,grad_norm: 0.9999997709467927, iteration: 268621
loss: 1.0119022130966187,grad_norm: 0.9999991378600687, iteration: 268622
loss: 1.1408302783966064,grad_norm: 0.9999992419362709, iteration: 268623
loss: 1.0124692916870117,grad_norm: 0.9878708198671661, iteration: 268624
loss: 0.9644075036048889,grad_norm: 0.9087225386807746, iteration: 268625
loss: 1.0411708354949951,grad_norm: 0.9999990159423948, iteration: 268626
loss: 0.9678648114204407,grad_norm: 0.9111897745359181, iteration: 268627
loss: 0.9835806488990784,grad_norm: 0.9999995422637598, iteration: 268628
loss: 1.041791558265686,grad_norm: 0.8862495862551324, iteration: 268629
loss: 1.1831868886947632,grad_norm: 0.9999998762929756, iteration: 268630
loss: 1.0592576265335083,grad_norm: 0.9447945090920966, iteration: 268631
loss: 1.05209481716156,grad_norm: 0.8820253609217015, iteration: 268632
loss: 1.0384399890899658,grad_norm: 0.9039367346204791, iteration: 268633
loss: 1.0433143377304077,grad_norm: 0.8589970966935114, iteration: 268634
loss: 0.941580593585968,grad_norm: 0.9881486318881215, iteration: 268635
loss: 1.0410418510437012,grad_norm: 0.850967385537362, iteration: 268636
loss: 0.9568268060684204,grad_norm: 0.811414973954155, iteration: 268637
loss: 0.9987108707427979,grad_norm: 0.8488744069753295, iteration: 268638
loss: 1.0404002666473389,grad_norm: 0.9999990705802903, iteration: 268639
loss: 0.9994924068450928,grad_norm: 0.9999997284157671, iteration: 268640
loss: 1.028275489807129,grad_norm: 0.9999998278969845, iteration: 268641
loss: 1.0157147645950317,grad_norm: 0.7681643795219341, iteration: 268642
loss: 0.9947546124458313,grad_norm: 0.999999155650626, iteration: 268643
loss: 1.1049730777740479,grad_norm: 0.9265945067698675, iteration: 268644
loss: 0.9480795860290527,grad_norm: 0.7228162954852745, iteration: 268645
loss: 0.9727999567985535,grad_norm: 0.8388707495887774, iteration: 268646
loss: 0.9843107461929321,grad_norm: 0.8030439835705943, iteration: 268647
loss: 1.0065069198608398,grad_norm: 0.8824424461105138, iteration: 268648
loss: 0.9934882521629333,grad_norm: 0.8887013054951257, iteration: 268649
loss: 1.041906714439392,grad_norm: 0.8811027057817556, iteration: 268650
loss: 1.0019440650939941,grad_norm: 0.9999989972217195, iteration: 268651
loss: 1.0701462030410767,grad_norm: 0.9999994212410586, iteration: 268652
loss: 1.0055214166641235,grad_norm: 0.9999991636518407, iteration: 268653
loss: 1.0213861465454102,grad_norm: 0.8152888190455102, iteration: 268654
loss: 0.9780847430229187,grad_norm: 0.8346624496679182, iteration: 268655
loss: 1.0121201276779175,grad_norm: 0.999999811441097, iteration: 268656
loss: 1.0037791728973389,grad_norm: 0.9479911345597569, iteration: 268657
loss: 1.0477452278137207,grad_norm: 0.9999991719637314, iteration: 268658
loss: 0.9929863810539246,grad_norm: 0.8341783536958491, iteration: 268659
loss: 0.9886772632598877,grad_norm: 0.9999993352210601, iteration: 268660
loss: 1.0359410047531128,grad_norm: 0.9999999420359563, iteration: 268661
loss: 1.000862717628479,grad_norm: 0.7840945534182925, iteration: 268662
loss: 0.9859414100646973,grad_norm: 0.9640742714259904, iteration: 268663
loss: 0.977681040763855,grad_norm: 0.8430627975010941, iteration: 268664
loss: 0.9990665912628174,grad_norm: 0.9999995254954378, iteration: 268665
loss: 1.0187015533447266,grad_norm: 0.7603273860489771, iteration: 268666
loss: 0.9490841031074524,grad_norm: 0.8101765020810716, iteration: 268667
loss: 1.054081916809082,grad_norm: 0.9999990477310307, iteration: 268668
loss: 0.9828418493270874,grad_norm: 0.9089872971562832, iteration: 268669
loss: 1.0305918455123901,grad_norm: 0.9999990999611056, iteration: 268670
loss: 1.025377869606018,grad_norm: 0.9173646464462057, iteration: 268671
loss: 1.045408010482788,grad_norm: 0.9999991942880646, iteration: 268672
loss: 1.020020842552185,grad_norm: 0.7358256971811807, iteration: 268673
loss: 1.0015960931777954,grad_norm: 0.7813913349866884, iteration: 268674
loss: 0.9967483282089233,grad_norm: 0.9019582099978304, iteration: 268675
loss: 1.0145031213760376,grad_norm: 0.942860960786431, iteration: 268676
loss: 1.049566626548767,grad_norm: 0.8002493244197451, iteration: 268677
loss: 0.9883159399032593,grad_norm: 0.8081375362402866, iteration: 268678
loss: 1.003820538520813,grad_norm: 0.844105053398332, iteration: 268679
loss: 0.9642043113708496,grad_norm: 0.9999990340635381, iteration: 268680
loss: 0.9937137365341187,grad_norm: 0.9999991226637105, iteration: 268681
loss: 1.112786054611206,grad_norm: 0.9999996500397339, iteration: 268682
loss: 1.003348708152771,grad_norm: 0.9999991518850394, iteration: 268683
loss: 1.083143949508667,grad_norm: 0.9879990479175644, iteration: 268684
loss: 1.033094048500061,grad_norm: 0.8941927317530349, iteration: 268685
loss: 1.0026284456253052,grad_norm: 0.9752555711080337, iteration: 268686
loss: 1.0446397066116333,grad_norm: 0.9999990860844514, iteration: 268687
loss: 1.1057913303375244,grad_norm: 0.943085668878948, iteration: 268688
loss: 0.9706446528434753,grad_norm: 0.850906578078693, iteration: 268689
loss: 0.9780029654502869,grad_norm: 0.9881390725282927, iteration: 268690
loss: 1.037920355796814,grad_norm: 0.9401971782819124, iteration: 268691
loss: 1.033487319946289,grad_norm: 0.9999994083436937, iteration: 268692
loss: 1.0218498706817627,grad_norm: 0.8464783123184076, iteration: 268693
loss: 1.011289358139038,grad_norm: 0.8789722279288201, iteration: 268694
loss: 1.1715539693832397,grad_norm: 0.9999999027221284, iteration: 268695
loss: 1.0348718166351318,grad_norm: 0.8317871420556189, iteration: 268696
loss: 1.0035072565078735,grad_norm: 0.9964946087962806, iteration: 268697
loss: 0.986078679561615,grad_norm: 0.7284977713822939, iteration: 268698
loss: 1.019718050956726,grad_norm: 0.8983450393382272, iteration: 268699
loss: 1.0162321329116821,grad_norm: 0.8821716327191763, iteration: 268700
loss: 1.0192031860351562,grad_norm: 0.9999994512194118, iteration: 268701
loss: 1.0178258419036865,grad_norm: 0.9999990437383526, iteration: 268702
loss: 0.9787704348564148,grad_norm: 0.8966972353797965, iteration: 268703
loss: 1.0898722410202026,grad_norm: 0.8643117882251014, iteration: 268704
loss: 1.0055533647537231,grad_norm: 0.9999992203325284, iteration: 268705
loss: 1.1263352632522583,grad_norm: 0.9999992680062713, iteration: 268706
loss: 1.0077258348464966,grad_norm: 0.999999922438236, iteration: 268707
loss: 0.9719661474227905,grad_norm: 0.9426134844587221, iteration: 268708
loss: 1.0321542024612427,grad_norm: 0.9999991331386849, iteration: 268709
loss: 1.0212225914001465,grad_norm: 0.9568000243068449, iteration: 268710
loss: 0.9991976618766785,grad_norm: 0.7233782614722607, iteration: 268711
loss: 0.9949668049812317,grad_norm: 0.8835266977137495, iteration: 268712
loss: 0.9411172270774841,grad_norm: 0.827059096086436, iteration: 268713
loss: 1.0273652076721191,grad_norm: 0.9271081966646182, iteration: 268714
loss: 1.0068038702011108,grad_norm: 0.8844641470712133, iteration: 268715
loss: 1.0019956827163696,grad_norm: 0.9141064555232592, iteration: 268716
loss: 0.9807963371276855,grad_norm: 0.7994407277857949, iteration: 268717
loss: 0.9757695198059082,grad_norm: 0.8886403352519804, iteration: 268718
loss: 1.0293105840682983,grad_norm: 0.9150660322783061, iteration: 268719
loss: 0.9648444056510925,grad_norm: 0.8569421563053129, iteration: 268720
loss: 1.0929490327835083,grad_norm: 0.9999997419298742, iteration: 268721
loss: 1.018754243850708,grad_norm: 0.7867204413053973, iteration: 268722
loss: 1.0803897380828857,grad_norm: 0.9999996992143444, iteration: 268723
loss: 1.0389515161514282,grad_norm: 0.9046887773598736, iteration: 268724
loss: 0.9835383296012878,grad_norm: 0.7419383067971707, iteration: 268725
loss: 0.9849401116371155,grad_norm: 0.9245538083190545, iteration: 268726
loss: 0.9627521634101868,grad_norm: 0.973338964387791, iteration: 268727
loss: 1.1652064323425293,grad_norm: 0.999999962692406, iteration: 268728
loss: 1.0241705179214478,grad_norm: 0.9347633001958503, iteration: 268729
loss: 1.0809444189071655,grad_norm: 0.9999993984639024, iteration: 268730
loss: 0.9910901784896851,grad_norm: 0.999999618855976, iteration: 268731
loss: 1.1019961833953857,grad_norm: 0.9999994238064847, iteration: 268732
loss: 1.0499190092086792,grad_norm: 0.9999999597759054, iteration: 268733
loss: 1.0121296644210815,grad_norm: 0.8376448118820806, iteration: 268734
loss: 0.9861199855804443,grad_norm: 0.9821553318915921, iteration: 268735
loss: 1.083390712738037,grad_norm: 0.9999994558665303, iteration: 268736
loss: 1.0104429721832275,grad_norm: 0.9047646573926581, iteration: 268737
loss: 1.0287203788757324,grad_norm: 0.7817639947192698, iteration: 268738
loss: 1.1874910593032837,grad_norm: 0.9999999791345717, iteration: 268739
loss: 1.052391767501831,grad_norm: 0.8773533955176702, iteration: 268740
loss: 0.9822872281074524,grad_norm: 0.6995195466633078, iteration: 268741
loss: 1.0583254098892212,grad_norm: 0.9999994045988201, iteration: 268742
loss: 1.0290801525115967,grad_norm: 0.8969775163794275, iteration: 268743
loss: 0.9874730706214905,grad_norm: 0.9027077985442793, iteration: 268744
loss: 1.011071801185608,grad_norm: 0.9999991316482607, iteration: 268745
loss: 1.0289993286132812,grad_norm: 0.9999995843158925, iteration: 268746
loss: 0.9923679828643799,grad_norm: 0.8103229453818329, iteration: 268747
loss: 1.0182216167449951,grad_norm: 0.8014618863703691, iteration: 268748
loss: 1.0460566282272339,grad_norm: 0.8098236117636042, iteration: 268749
loss: 1.033251166343689,grad_norm: 0.9999996976241854, iteration: 268750
loss: 1.0049914121627808,grad_norm: 0.9999991241282201, iteration: 268751
loss: 0.9817425608634949,grad_norm: 0.7831096749352231, iteration: 268752
loss: 1.064294457435608,grad_norm: 0.9999999517771727, iteration: 268753
loss: 0.9284423589706421,grad_norm: 0.8101080767817128, iteration: 268754
loss: 1.0418179035186768,grad_norm: 0.8646052637264966, iteration: 268755
loss: 1.0376319885253906,grad_norm: 0.8278671807119158, iteration: 268756
loss: 1.1081029176712036,grad_norm: 0.9999998693390383, iteration: 268757
loss: 1.022657871246338,grad_norm: 0.9641688414774456, iteration: 268758
loss: 0.9784918427467346,grad_norm: 0.7508478985161835, iteration: 268759
loss: 1.0302647352218628,grad_norm: 0.902235459879374, iteration: 268760
loss: 1.070985198020935,grad_norm: 0.9716857921414878, iteration: 268761
loss: 1.0037308931350708,grad_norm: 0.9999991288853437, iteration: 268762
loss: 1.0667392015457153,grad_norm: 0.8964068731308877, iteration: 268763
loss: 1.0092607736587524,grad_norm: 0.999999043289391, iteration: 268764
loss: 1.0786341428756714,grad_norm: 0.8965558904236582, iteration: 268765
loss: 0.9960407018661499,grad_norm: 0.9999997991119204, iteration: 268766
loss: 1.0169930458068848,grad_norm: 0.8256167409789764, iteration: 268767
loss: 0.9837241172790527,grad_norm: 0.9999990934165023, iteration: 268768
loss: 1.2954174280166626,grad_norm: 0.9999996911323192, iteration: 268769
loss: 1.0474401712417603,grad_norm: 0.9999997651028122, iteration: 268770
loss: 0.9916409254074097,grad_norm: 0.9429692307052246, iteration: 268771
loss: 1.0211764574050903,grad_norm: 0.9999990616680753, iteration: 268772
loss: 1.0128045082092285,grad_norm: 0.999999799425173, iteration: 268773
loss: 1.0229772329330444,grad_norm: 0.9155940307041041, iteration: 268774
loss: 0.9969196915626526,grad_norm: 0.8345701043461548, iteration: 268775
loss: 1.0262616872787476,grad_norm: 0.9214303871923764, iteration: 268776
loss: 1.0300143957138062,grad_norm: 0.9999990195393031, iteration: 268777
loss: 0.967401921749115,grad_norm: 0.9606167574055944, iteration: 268778
loss: 1.0021864175796509,grad_norm: 0.6993551275115187, iteration: 268779
loss: 0.9848806262016296,grad_norm: 0.9999991425385949, iteration: 268780
loss: 1.210761308670044,grad_norm: 0.999999876180525, iteration: 268781
loss: 1.0577270984649658,grad_norm: 0.9999991282145698, iteration: 268782
loss: 1.1481313705444336,grad_norm: 0.9999994587800762, iteration: 268783
loss: 1.0791081190109253,grad_norm: 0.9999990935750059, iteration: 268784
loss: 1.03371000289917,grad_norm: 0.8180531693482347, iteration: 268785
loss: 1.0062631368637085,grad_norm: 0.9484721601960251, iteration: 268786
loss: 0.9668123126029968,grad_norm: 0.9435785149107992, iteration: 268787
loss: 1.023341417312622,grad_norm: 0.999999581681851, iteration: 268788
loss: 1.2623921632766724,grad_norm: 0.9999991563692918, iteration: 268789
loss: 1.1348674297332764,grad_norm: 0.9999998016313605, iteration: 268790
loss: 0.9993529319763184,grad_norm: 0.920383814470983, iteration: 268791
loss: 1.151587724685669,grad_norm: 0.9999999382636446, iteration: 268792
loss: 1.2548750638961792,grad_norm: 0.9999995157976286, iteration: 268793
loss: 1.0423345565795898,grad_norm: 0.9999992425583483, iteration: 268794
loss: 1.0374054908752441,grad_norm: 0.9999989967491448, iteration: 268795
loss: 1.152144432067871,grad_norm: 0.9999999206189788, iteration: 268796
loss: 1.0180349349975586,grad_norm: 0.9822110662431159, iteration: 268797
loss: 1.1246014833450317,grad_norm: 0.9999992261698808, iteration: 268798
loss: 1.0263526439666748,grad_norm: 0.8444725494939187, iteration: 268799
loss: 1.1090432405471802,grad_norm: 0.9999998503687221, iteration: 268800
loss: 1.1809155941009521,grad_norm: 0.9999994423312903, iteration: 268801
loss: 1.1923184394836426,grad_norm: 0.9999996948508904, iteration: 268802
loss: 1.0859166383743286,grad_norm: 0.9999994560335331, iteration: 268803
loss: 1.2378216981887817,grad_norm: 0.999999981085937, iteration: 268804
loss: 1.0301765203475952,grad_norm: 0.882393659356393, iteration: 268805
loss: 1.0139390230178833,grad_norm: 0.9072160209145214, iteration: 268806
loss: 1.0623141527175903,grad_norm: 0.9999996731376368, iteration: 268807
loss: 1.0198103189468384,grad_norm: 0.9999994598993773, iteration: 268808
loss: 1.0792500972747803,grad_norm: 0.9999999279012325, iteration: 268809
loss: 1.2079074382781982,grad_norm: 0.9999998715051837, iteration: 268810
loss: 1.1123805046081543,grad_norm: 0.9670544230594723, iteration: 268811
loss: 1.017930269241333,grad_norm: 0.9999994364727854, iteration: 268812
loss: 0.9970375299453735,grad_norm: 0.8416336435143248, iteration: 268813
loss: 1.085395097732544,grad_norm: 0.9999992734189846, iteration: 268814
loss: 1.0176998376846313,grad_norm: 0.9379395141949397, iteration: 268815
loss: 1.083166480064392,grad_norm: 0.9999992477791183, iteration: 268816
loss: 1.1305426359176636,grad_norm: 0.999999138916122, iteration: 268817
loss: 1.0173779726028442,grad_norm: 0.8651525270486184, iteration: 268818
loss: 1.0004773139953613,grad_norm: 0.858302382401569, iteration: 268819
loss: 1.2585945129394531,grad_norm: 0.999999220896211, iteration: 268820
loss: 1.085755467414856,grad_norm: 0.9999992575320035, iteration: 268821
loss: 1.0349138975143433,grad_norm: 0.9999990075884094, iteration: 268822
loss: 1.1856776475906372,grad_norm: 0.9999992391628373, iteration: 268823
loss: 1.0723505020141602,grad_norm: 0.765692558254549, iteration: 268824
loss: 1.0090970993041992,grad_norm: 0.9999991529230501, iteration: 268825
loss: 1.016050100326538,grad_norm: 0.9999996168650677, iteration: 268826
loss: 0.9905635714530945,grad_norm: 0.883258324303249, iteration: 268827
loss: 1.0326088666915894,grad_norm: 0.9005413941286627, iteration: 268828
loss: 1.204606533050537,grad_norm: 0.9639241336315093, iteration: 268829
loss: 1.0776708126068115,grad_norm: 0.9999990447044986, iteration: 268830
loss: 1.0205737352371216,grad_norm: 0.8159029362616845, iteration: 268831
loss: 1.0986639261245728,grad_norm: 0.999999710189056, iteration: 268832
loss: 1.044926643371582,grad_norm: 0.9999997392541908, iteration: 268833
loss: 0.9607536792755127,grad_norm: 0.9726400742720901, iteration: 268834
loss: 1.0178128480911255,grad_norm: 0.9999997010190099, iteration: 268835
loss: 1.0026741027832031,grad_norm: 0.999999345512428, iteration: 268836
loss: 1.03239905834198,grad_norm: 0.9670510033700428, iteration: 268837
loss: 1.1628434658050537,grad_norm: 0.9999992660779604, iteration: 268838
loss: 1.0201605558395386,grad_norm: 0.9999994096232366, iteration: 268839
loss: 1.0901298522949219,grad_norm: 0.9999992396944474, iteration: 268840
loss: 1.059120774269104,grad_norm: 0.9999994812171739, iteration: 268841
loss: 1.0442250967025757,grad_norm: 0.862881145256258, iteration: 268842
loss: 1.049172282218933,grad_norm: 0.9061139384177245, iteration: 268843
loss: 0.995309591293335,grad_norm: 0.9024071967340959, iteration: 268844
loss: 1.0125384330749512,grad_norm: 0.9999992026430852, iteration: 268845
loss: 0.9992753863334656,grad_norm: 0.9676586326602886, iteration: 268846
loss: 1.0982328653335571,grad_norm: 0.821393822506157, iteration: 268847
loss: 1.079211711883545,grad_norm: 0.9999999392187335, iteration: 268848
loss: 1.0178146362304688,grad_norm: 0.8240329165677421, iteration: 268849
loss: 1.1815136671066284,grad_norm: 0.9505393645445654, iteration: 268850
loss: 1.0753928422927856,grad_norm: 0.9999991249827562, iteration: 268851
loss: 0.9868378639221191,grad_norm: 0.8861799860615509, iteration: 268852
loss: 1.0354702472686768,grad_norm: 0.9259666002916388, iteration: 268853
loss: 1.0632983446121216,grad_norm: 0.999999216384548, iteration: 268854
loss: 1.059372901916504,grad_norm: 0.9999991866774259, iteration: 268855
loss: 1.002629280090332,grad_norm: 0.7853067226273218, iteration: 268856
loss: 1.0941691398620605,grad_norm: 0.9999991080135798, iteration: 268857
loss: 1.031253457069397,grad_norm: 0.8136260377161846, iteration: 268858
loss: 1.071618676185608,grad_norm: 0.9999993934615116, iteration: 268859
loss: 0.9854691028594971,grad_norm: 0.927572763350304, iteration: 268860
loss: 1.042955756187439,grad_norm: 0.9999996694197039, iteration: 268861
loss: 1.068396806716919,grad_norm: 0.9426574923582378, iteration: 268862
loss: 1.118333339691162,grad_norm: 0.9999989968423952, iteration: 268863
loss: 1.0824426412582397,grad_norm: 0.9999993041595828, iteration: 268864
loss: 1.0613642930984497,grad_norm: 0.9999995647819743, iteration: 268865
loss: 1.0322444438934326,grad_norm: 0.999999178261191, iteration: 268866
loss: 1.0310873985290527,grad_norm: 0.8654913611701293, iteration: 268867
loss: 1.0476301908493042,grad_norm: 0.9999991710920466, iteration: 268868
loss: 1.0242177248001099,grad_norm: 0.8097342316796661, iteration: 268869
loss: 1.0325944423675537,grad_norm: 0.9999991225195134, iteration: 268870
loss: 1.0087827444076538,grad_norm: 0.9061578924227757, iteration: 268871
loss: 0.9905834197998047,grad_norm: 0.9999992238283714, iteration: 268872
loss: 1.0382670164108276,grad_norm: 0.99999909011366, iteration: 268873
loss: 1.090041995048523,grad_norm: 0.9911238308941298, iteration: 268874
loss: 1.238028645515442,grad_norm: 0.9999994913905614, iteration: 268875
loss: 1.0333564281463623,grad_norm: 0.9999990464584487, iteration: 268876
loss: 1.0893436670303345,grad_norm: 0.9999992231645354, iteration: 268877
loss: 1.0533753633499146,grad_norm: 0.9999991383128632, iteration: 268878
loss: 1.055558681488037,grad_norm: 0.9235428914114313, iteration: 268879
loss: 1.0211235284805298,grad_norm: 0.999999969144101, iteration: 268880
loss: 1.0688409805297852,grad_norm: 0.8678825547579819, iteration: 268881
loss: 1.0014680624008179,grad_norm: 0.9340159831586343, iteration: 268882
loss: 1.0101518630981445,grad_norm: 0.9827158754087235, iteration: 268883
loss: 1.0090510845184326,grad_norm: 0.9999991218830482, iteration: 268884
loss: 1.0777732133865356,grad_norm: 0.9999994245127496, iteration: 268885
loss: 1.0261693000793457,grad_norm: 0.8813449587170272, iteration: 268886
loss: 1.0000637769699097,grad_norm: 0.8674503184320266, iteration: 268887
loss: 1.0048811435699463,grad_norm: 0.8469230201500839, iteration: 268888
loss: 1.0939606428146362,grad_norm: 0.9999991380569253, iteration: 268889
loss: 1.025666356086731,grad_norm: 0.999999159962023, iteration: 268890
loss: 0.9809563755989075,grad_norm: 0.9999990424135822, iteration: 268891
loss: 1.1073864698410034,grad_norm: 0.8386461692963274, iteration: 268892
loss: 1.0661362409591675,grad_norm: 0.9305032926981278, iteration: 268893
loss: 1.0240755081176758,grad_norm: 0.7872470110236269, iteration: 268894
loss: 1.0001107454299927,grad_norm: 0.9947603803081287, iteration: 268895
loss: 1.0067365169525146,grad_norm: 0.9999993847628106, iteration: 268896
loss: 1.0832345485687256,grad_norm: 0.999999501043237, iteration: 268897
loss: 0.9932776093482971,grad_norm: 0.9999996227019903, iteration: 268898
loss: 1.012045979499817,grad_norm: 0.8040545482949871, iteration: 268899
loss: 1.0221316814422607,grad_norm: 0.9485204670764479, iteration: 268900
loss: 1.0810205936431885,grad_norm: 0.7854123180323723, iteration: 268901
loss: 1.00508451461792,grad_norm: 0.9999991906699531, iteration: 268902
loss: 1.1426928043365479,grad_norm: 0.9999990917585851, iteration: 268903
loss: 0.9995962381362915,grad_norm: 0.9332610939543008, iteration: 268904
loss: 0.9897926449775696,grad_norm: 0.881887437220114, iteration: 268905
loss: 1.0487664937973022,grad_norm: 0.9999992647905805, iteration: 268906
loss: 1.1127310991287231,grad_norm: 0.9999992574979555, iteration: 268907
loss: 1.0172462463378906,grad_norm: 0.8769633753325284, iteration: 268908
loss: 1.0463638305664062,grad_norm: 0.9999993118298667, iteration: 268909
loss: 1.0041216611862183,grad_norm: 0.939581589555864, iteration: 268910
loss: 1.0045967102050781,grad_norm: 0.9530798034820502, iteration: 268911
loss: 1.0354301929473877,grad_norm: 0.8308581900050219, iteration: 268912
loss: 1.1024025678634644,grad_norm: 0.999999091314454, iteration: 268913
loss: 1.1983972787857056,grad_norm: 0.9999994227944778, iteration: 268914
loss: 1.016139268875122,grad_norm: 0.8375025998132084, iteration: 268915
loss: 1.1027686595916748,grad_norm: 0.9999995990362847, iteration: 268916
loss: 0.9904287457466125,grad_norm: 0.867738053084106, iteration: 268917
loss: 1.0610976219177246,grad_norm: 0.8605875343034324, iteration: 268918
loss: 1.021978497505188,grad_norm: 0.8340970458395315, iteration: 268919
loss: 1.0255653858184814,grad_norm: 0.9999989948567121, iteration: 268920
loss: 1.041822075843811,grad_norm: 0.9827851315866136, iteration: 268921
loss: 1.0101523399353027,grad_norm: 0.9999991634125485, iteration: 268922
loss: 1.0028358697891235,grad_norm: 0.8787977580332491, iteration: 268923
loss: 0.9873553514480591,grad_norm: 0.8803834408020075, iteration: 268924
loss: 0.9925570487976074,grad_norm: 0.7695354329189247, iteration: 268925
loss: 1.0540887117385864,grad_norm: 0.9515228351947331, iteration: 268926
loss: 1.0445235967636108,grad_norm: 0.9990452159339523, iteration: 268927
loss: 1.0117051601409912,grad_norm: 0.9287636210588038, iteration: 268928
loss: 0.9819062352180481,grad_norm: 0.7355930368355911, iteration: 268929
loss: 1.1037797927856445,grad_norm: 0.9501392552534161, iteration: 268930
loss: 1.0028325319290161,grad_norm: 0.8997726765274882, iteration: 268931
loss: 0.9971562027931213,grad_norm: 0.8571865954055291, iteration: 268932
loss: 1.020203948020935,grad_norm: 0.9929837020624772, iteration: 268933
loss: 1.0456660985946655,grad_norm: 0.8846045641972338, iteration: 268934
loss: 1.0128570795059204,grad_norm: 0.9555869075510145, iteration: 268935
loss: 0.9788976907730103,grad_norm: 0.8917142008419658, iteration: 268936
loss: 1.0055811405181885,grad_norm: 0.9999995319678555, iteration: 268937
loss: 1.0060676336288452,grad_norm: 0.7515113044629061, iteration: 268938
loss: 1.034861445426941,grad_norm: 0.8850845835420552, iteration: 268939
loss: 0.9823929071426392,grad_norm: 0.9734913947753203, iteration: 268940
loss: 0.9909102916717529,grad_norm: 0.843068665317656, iteration: 268941
loss: 0.9875078797340393,grad_norm: 0.9362181403330788, iteration: 268942
loss: 1.0092401504516602,grad_norm: 0.9621621102383908, iteration: 268943
loss: 1.011676549911499,grad_norm: 0.9999991443685783, iteration: 268944
loss: 0.9607115387916565,grad_norm: 0.963408913113683, iteration: 268945
loss: 1.0574250221252441,grad_norm: 0.9143367310932583, iteration: 268946
loss: 1.083294153213501,grad_norm: 0.999999050575407, iteration: 268947
loss: 1.004936695098877,grad_norm: 0.8563801863175561, iteration: 268948
loss: 0.9995742440223694,grad_norm: 0.8107995229099328, iteration: 268949
loss: 1.0314807891845703,grad_norm: 0.9999990209178578, iteration: 268950
loss: 0.9944481253623962,grad_norm: 0.891130568244522, iteration: 268951
loss: 1.0470454692840576,grad_norm: 0.891622904674866, iteration: 268952
loss: 1.0060044527053833,grad_norm: 0.9999998319605833, iteration: 268953
loss: 1.044440746307373,grad_norm: 0.9235755944052862, iteration: 268954
loss: 1.0291011333465576,grad_norm: 0.9999997498518114, iteration: 268955
loss: 0.9811633825302124,grad_norm: 0.8410720811463294, iteration: 268956
loss: 1.0370945930480957,grad_norm: 0.8007858562744851, iteration: 268957
loss: 1.0005528926849365,grad_norm: 0.8660098175222072, iteration: 268958
loss: 0.9875673055648804,grad_norm: 0.7961313841829419, iteration: 268959
loss: 0.9843884706497192,grad_norm: 0.8415748564591217, iteration: 268960
loss: 1.0531725883483887,grad_norm: 0.9267765708384974, iteration: 268961
loss: 0.9941343069076538,grad_norm: 0.9999992137121179, iteration: 268962
loss: 0.9503082036972046,grad_norm: 0.945719210037777, iteration: 268963
loss: 1.015419602394104,grad_norm: 0.7999312241845147, iteration: 268964
loss: 0.9955564737319946,grad_norm: 0.7978222479498467, iteration: 268965
loss: 1.070542573928833,grad_norm: 0.991818228208462, iteration: 268966
loss: 1.0380489826202393,grad_norm: 0.9846020911317537, iteration: 268967
loss: 1.012464165687561,grad_norm: 0.8478917348971458, iteration: 268968
loss: 1.0006240606307983,grad_norm: 0.9476405348254421, iteration: 268969
loss: 1.0190171003341675,grad_norm: 0.7649006545246645, iteration: 268970
loss: 1.0014961957931519,grad_norm: 0.9999995795655401, iteration: 268971
loss: 1.1642730236053467,grad_norm: 0.999999262361844, iteration: 268972
loss: 0.9987286925315857,grad_norm: 0.7562500411730559, iteration: 268973
loss: 0.9951959252357483,grad_norm: 0.999999088944445, iteration: 268974
loss: 1.0041801929473877,grad_norm: 0.9181683410950408, iteration: 268975
loss: 1.0340009927749634,grad_norm: 0.9094819829183012, iteration: 268976
loss: 1.0460245609283447,grad_norm: 0.8128186930483459, iteration: 268977
loss: 1.030349612236023,grad_norm: 0.9999991970132456, iteration: 268978
loss: 0.9974249601364136,grad_norm: 0.7821676599674182, iteration: 268979
loss: 1.0519477128982544,grad_norm: 0.999999101135637, iteration: 268980
loss: 1.138079047203064,grad_norm: 0.9581726643137242, iteration: 268981
loss: 0.9759511351585388,grad_norm: 0.9999989804752314, iteration: 268982
loss: 0.9961077570915222,grad_norm: 0.8976973718743698, iteration: 268983
loss: 1.0730772018432617,grad_norm: 0.8343781801659225, iteration: 268984
loss: 0.996852457523346,grad_norm: 0.8512238059625068, iteration: 268985
loss: 1.0229045152664185,grad_norm: 0.7837481665493087, iteration: 268986
loss: 1.0242091417312622,grad_norm: 0.7623261194328244, iteration: 268987
loss: 0.9667964577674866,grad_norm: 0.9999989760453124, iteration: 268988
loss: 0.996374785900116,grad_norm: 0.8459742543964738, iteration: 268989
loss: 1.0933955907821655,grad_norm: 0.9999996175757615, iteration: 268990
loss: 0.9659271240234375,grad_norm: 0.7965971023308486, iteration: 268991
loss: 0.9815945029258728,grad_norm: 0.9999992232316778, iteration: 268992
loss: 1.0098751783370972,grad_norm: 0.8672663212498188, iteration: 268993
loss: 1.022939682006836,grad_norm: 0.944181599649144, iteration: 268994
loss: 0.9937460422515869,grad_norm: 0.8767044031993623, iteration: 268995
loss: 1.0448256731033325,grad_norm: 0.9999993883084645, iteration: 268996
loss: 0.978640079498291,grad_norm: 0.8388315715389417, iteration: 268997
loss: 1.0307047367095947,grad_norm: 0.8370742591709233, iteration: 268998
loss: 0.9927673935890198,grad_norm: 0.9537395504204943, iteration: 268999
loss: 0.9661024808883667,grad_norm: 0.9999246727489306, iteration: 269000
loss: 1.0246098041534424,grad_norm: 0.7900828741216597, iteration: 269001
loss: 0.9623615145683289,grad_norm: 0.9999993438524651, iteration: 269002
loss: 1.0321842432022095,grad_norm: 0.9467856531326311, iteration: 269003
loss: 1.0101594924926758,grad_norm: 0.8372825150437694, iteration: 269004
loss: 1.1368526220321655,grad_norm: 0.9650570901276497, iteration: 269005
loss: 0.9906101226806641,grad_norm: 0.9779969883424628, iteration: 269006
loss: 1.0171382427215576,grad_norm: 0.877608634655701, iteration: 269007
loss: 1.0404987335205078,grad_norm: 0.9571351355908978, iteration: 269008
loss: 1.0168306827545166,grad_norm: 0.8242145102205118, iteration: 269009
loss: 0.9811449646949768,grad_norm: 0.9572029280286032, iteration: 269010
loss: 1.0027371644973755,grad_norm: 0.7961692750341444, iteration: 269011
loss: 1.1107491254806519,grad_norm: 0.9999999153661949, iteration: 269012
loss: 1.0403727293014526,grad_norm: 0.925715362503916, iteration: 269013
loss: 0.9921000599861145,grad_norm: 0.897278782911264, iteration: 269014
loss: 1.012272596359253,grad_norm: 0.9535352078252629, iteration: 269015
loss: 1.0658519268035889,grad_norm: 0.9999991744764066, iteration: 269016
loss: 0.9972232580184937,grad_norm: 0.9595347561409641, iteration: 269017
loss: 0.9828690886497498,grad_norm: 0.7878204568081045, iteration: 269018
loss: 0.9775725603103638,grad_norm: 0.8716218678409393, iteration: 269019
loss: 0.9948981404304504,grad_norm: 0.742823112001568, iteration: 269020
loss: 1.0691807270050049,grad_norm: 0.999999027127732, iteration: 269021
loss: 1.0301740169525146,grad_norm: 0.8371382553136482, iteration: 269022
loss: 1.0348589420318604,grad_norm: 0.9999992600989535, iteration: 269023
loss: 1.0706857442855835,grad_norm: 0.9999996045391492, iteration: 269024
loss: 1.006875991821289,grad_norm: 0.8061317736991613, iteration: 269025
loss: 0.9942446947097778,grad_norm: 0.7616592183400598, iteration: 269026
loss: 1.012495517730713,grad_norm: 0.9051704634670271, iteration: 269027
loss: 0.9973997473716736,grad_norm: 0.9999989596174653, iteration: 269028
loss: 1.0510281324386597,grad_norm: 0.9416024260675802, iteration: 269029
loss: 1.0322173833847046,grad_norm: 0.9184232914391859, iteration: 269030
loss: 1.0076721906661987,grad_norm: 0.8219336663009167, iteration: 269031
loss: 0.974640429019928,grad_norm: 0.8165532358640094, iteration: 269032
loss: 1.0378360748291016,grad_norm: 0.9134874405891592, iteration: 269033
loss: 0.9974350333213806,grad_norm: 0.7430953928649838, iteration: 269034
loss: 0.9737868309020996,grad_norm: 0.999999838711966, iteration: 269035
loss: 1.0537415742874146,grad_norm: 0.9999990559761147, iteration: 269036
loss: 1.0292973518371582,grad_norm: 0.8071320761899056, iteration: 269037
loss: 1.033345103263855,grad_norm: 0.8068305120828265, iteration: 269038
loss: 1.0011991262435913,grad_norm: 0.9999989408574385, iteration: 269039
loss: 1.018886685371399,grad_norm: 0.7906627828505294, iteration: 269040
loss: 1.0128387212753296,grad_norm: 0.8038019443631192, iteration: 269041
loss: 1.010429859161377,grad_norm: 0.9127538722104768, iteration: 269042
loss: 1.0106390714645386,grad_norm: 0.9999990711906458, iteration: 269043
loss: 1.0396616458892822,grad_norm: 0.9999995369324329, iteration: 269044
loss: 1.0184557437896729,grad_norm: 0.7843529237399239, iteration: 269045
loss: 0.9881439208984375,grad_norm: 0.8911294235363209, iteration: 269046
loss: 0.9837334156036377,grad_norm: 0.8527569848311108, iteration: 269047
loss: 1.0127778053283691,grad_norm: 0.9999991032052844, iteration: 269048
loss: 1.0002248287200928,grad_norm: 0.9065430973853593, iteration: 269049
loss: 0.9912451505661011,grad_norm: 0.81106345275491, iteration: 269050
loss: 1.0228911638259888,grad_norm: 0.9999990897632258, iteration: 269051
loss: 0.9901264309883118,grad_norm: 0.8183871859294435, iteration: 269052
loss: 0.988179087638855,grad_norm: 0.7238596835314574, iteration: 269053
loss: 0.9719707369804382,grad_norm: 0.8027283120126916, iteration: 269054
loss: 0.9709734916687012,grad_norm: 0.8804368813686965, iteration: 269055
loss: 1.0178508758544922,grad_norm: 0.8348676164431775, iteration: 269056
loss: 1.0190134048461914,grad_norm: 0.9551412366576505, iteration: 269057
loss: 0.9644407033920288,grad_norm: 0.8397883265984927, iteration: 269058
loss: 1.003844141960144,grad_norm: 0.9999995331042915, iteration: 269059
loss: 0.996734619140625,grad_norm: 0.9858241944283312, iteration: 269060
loss: 1.0076863765716553,grad_norm: 0.9999990540776194, iteration: 269061
loss: 0.9995133876800537,grad_norm: 0.9999990795076367, iteration: 269062
loss: 1.0252848863601685,grad_norm: 0.8896537758620536, iteration: 269063
loss: 1.0305485725402832,grad_norm: 0.7331913635654136, iteration: 269064
loss: 0.996044397354126,grad_norm: 0.9466935693202477, iteration: 269065
loss: 1.0555100440979004,grad_norm: 0.9854695222815253, iteration: 269066
loss: 0.9924113750457764,grad_norm: 0.8698397882669788, iteration: 269067
loss: 0.9861533641815186,grad_norm: 0.8644460061168455, iteration: 269068
loss: 0.9900350570678711,grad_norm: 0.7587303555687274, iteration: 269069
loss: 0.9603269696235657,grad_norm: 0.9999992303590626, iteration: 269070
loss: 1.031516194343567,grad_norm: 0.9166073557351543, iteration: 269071
loss: 1.022942304611206,grad_norm: 0.8807509156497101, iteration: 269072
loss: 0.9955411553382874,grad_norm: 0.8143001111051554, iteration: 269073
loss: 1.025884985923767,grad_norm: 0.7433237443453631, iteration: 269074
loss: 1.0651335716247559,grad_norm: 0.9999991542913985, iteration: 269075
loss: 0.9787034392356873,grad_norm: 0.7742157477445979, iteration: 269076
loss: 1.0520676374435425,grad_norm: 0.9999992110275541, iteration: 269077
loss: 1.0453786849975586,grad_norm: 0.8031317408403247, iteration: 269078
loss: 1.0039255619049072,grad_norm: 0.9999991638423804, iteration: 269079
loss: 1.023452639579773,grad_norm: 0.9621504565163018, iteration: 269080
loss: 1.017013669013977,grad_norm: 0.99999976342392, iteration: 269081
loss: 0.9842901825904846,grad_norm: 0.7560833773397597, iteration: 269082
loss: 0.9958805441856384,grad_norm: 0.9999993705418373, iteration: 269083
loss: 0.989579975605011,grad_norm: 0.9326100227924196, iteration: 269084
loss: 0.9907726645469666,grad_norm: 0.750870270050296, iteration: 269085
loss: 1.0190232992172241,grad_norm: 0.9058911404382195, iteration: 269086
loss: 1.004915475845337,grad_norm: 0.9036084878563618, iteration: 269087
loss: 1.040213704109192,grad_norm: 0.8267336111369785, iteration: 269088
loss: 0.9842522740364075,grad_norm: 0.851434980478583, iteration: 269089
loss: 1.028647780418396,grad_norm: 0.9999990753408172, iteration: 269090
loss: 0.9874032735824585,grad_norm: 0.7941075109933763, iteration: 269091
loss: 0.9980546832084656,grad_norm: 0.8696854721650364, iteration: 269092
loss: 1.0001388788223267,grad_norm: 0.9999992423573159, iteration: 269093
loss: 0.9752614498138428,grad_norm: 0.8734923267424344, iteration: 269094
loss: 1.0014578104019165,grad_norm: 0.7807220323239901, iteration: 269095
loss: 1.025283932685852,grad_norm: 0.852853844714018, iteration: 269096
loss: 1.049628496170044,grad_norm: 0.754296950629042, iteration: 269097
loss: 1.0360980033874512,grad_norm: 0.8120598479293717, iteration: 269098
loss: 1.006544589996338,grad_norm: 0.8342069471919367, iteration: 269099
loss: 0.9638598561286926,grad_norm: 0.9544282179689402, iteration: 269100
loss: 0.9900876879692078,grad_norm: 0.9274497565826926, iteration: 269101
loss: 0.9999816417694092,grad_norm: 0.9999992164864869, iteration: 269102
loss: 0.9962926506996155,grad_norm: 0.8682181382383177, iteration: 269103
loss: 1.044628381729126,grad_norm: 0.7382761950604448, iteration: 269104
loss: 1.0670251846313477,grad_norm: 0.9999991079524843, iteration: 269105
loss: 1.0209171772003174,grad_norm: 0.8898286644138849, iteration: 269106
loss: 1.0045592784881592,grad_norm: 0.999999176074263, iteration: 269107
loss: 0.9808763861656189,grad_norm: 0.896530795655315, iteration: 269108
loss: 1.0026298761367798,grad_norm: 0.820655937513912, iteration: 269109
loss: 1.0329489707946777,grad_norm: 0.9999991001577228, iteration: 269110
loss: 1.021736979484558,grad_norm: 0.9082754203457168, iteration: 269111
loss: 1.0156742334365845,grad_norm: 0.9647928524271333, iteration: 269112
loss: 0.9993029832839966,grad_norm: 0.9083344009041358, iteration: 269113
loss: 0.9792701601982117,grad_norm: 0.8711678734258911, iteration: 269114
loss: 1.0235388278961182,grad_norm: 0.895669527561468, iteration: 269115
loss: 1.0158683061599731,grad_norm: 0.8211281058355933, iteration: 269116
loss: 0.9628279805183411,grad_norm: 0.9029573402067189, iteration: 269117
loss: 0.98264479637146,grad_norm: 0.9785415642812929, iteration: 269118
loss: 0.988023042678833,grad_norm: 0.931970089275704, iteration: 269119
loss: 1.031212568283081,grad_norm: 0.9899903265058259, iteration: 269120
loss: 1.0044947862625122,grad_norm: 0.7607817406360834, iteration: 269121
loss: 1.0383192300796509,grad_norm: 0.8659089964378424, iteration: 269122
loss: 1.0144374370574951,grad_norm: 0.903260706277107, iteration: 269123
loss: 1.0236904621124268,grad_norm: 0.9266441717191485, iteration: 269124
loss: 1.0500704050064087,grad_norm: 0.9999993906765473, iteration: 269125
loss: 1.0134313106536865,grad_norm: 0.9302859020634668, iteration: 269126
loss: 1.012217402458191,grad_norm: 0.8268272344439188, iteration: 269127
loss: 0.9647672772407532,grad_norm: 0.9372786509219884, iteration: 269128
loss: 0.9517173767089844,grad_norm: 0.8529625759957011, iteration: 269129
loss: 0.9902242422103882,grad_norm: 0.8284174596929489, iteration: 269130
loss: 0.9875999093055725,grad_norm: 0.7978560340802848, iteration: 269131
loss: 0.9551416039466858,grad_norm: 0.9553727277967892, iteration: 269132
loss: 1.1271744966506958,grad_norm: 0.999999241298758, iteration: 269133
loss: 1.0236135721206665,grad_norm: 0.8379954736199743, iteration: 269134
loss: 1.0095293521881104,grad_norm: 0.9999991128658777, iteration: 269135
loss: 1.008546233177185,grad_norm: 0.7612426384189184, iteration: 269136
loss: 0.9869369864463806,grad_norm: 0.7176864840647567, iteration: 269137
loss: 0.9634690284729004,grad_norm: 0.9208980304854315, iteration: 269138
loss: 0.9841474294662476,grad_norm: 0.8601365125686848, iteration: 269139
loss: 0.9990476965904236,grad_norm: 0.9633843933844032, iteration: 269140
loss: 0.976813018321991,grad_norm: 0.9999990403150684, iteration: 269141
loss: 0.9997966885566711,grad_norm: 0.9048457741245445, iteration: 269142
loss: 1.0170732736587524,grad_norm: 0.8278944947701844, iteration: 269143
loss: 1.0166295766830444,grad_norm: 0.8631489352736957, iteration: 269144
loss: 0.972899317741394,grad_norm: 0.8437083256321051, iteration: 269145
loss: 0.9769873023033142,grad_norm: 0.8370900550003132, iteration: 269146
loss: 0.9659199118614197,grad_norm: 0.7579171673212726, iteration: 269147
loss: 0.9908742308616638,grad_norm: 0.8875904941953753, iteration: 269148
loss: 0.9867250323295593,grad_norm: 0.8765362371743952, iteration: 269149
loss: 1.0273865461349487,grad_norm: 0.7769176399446395, iteration: 269150
loss: 1.004781723022461,grad_norm: 0.9879173689420907, iteration: 269151
loss: 0.9765434861183167,grad_norm: 0.8840786310769775, iteration: 269152
loss: 1.005690574645996,grad_norm: 0.9764347337795979, iteration: 269153
loss: 1.0466885566711426,grad_norm: 0.888128444425744, iteration: 269154
loss: 0.9994855523109436,grad_norm: 0.7206400787595447, iteration: 269155
loss: 0.9990736842155457,grad_norm: 0.9275204141608482, iteration: 269156
loss: 1.0061001777648926,grad_norm: 0.770996073355265, iteration: 269157
loss: 1.116331934928894,grad_norm: 0.9999991460132778, iteration: 269158
loss: 0.9918940663337708,grad_norm: 0.8122310734627782, iteration: 269159
loss: 1.047065258026123,grad_norm: 0.9465867166228026, iteration: 269160
loss: 1.0453791618347168,grad_norm: 0.9185941094803065, iteration: 269161
loss: 1.0010548830032349,grad_norm: 0.8663399178344531, iteration: 269162
loss: 1.0065228939056396,grad_norm: 0.9999990087832811, iteration: 269163
loss: 1.0056848526000977,grad_norm: 0.9999990670009173, iteration: 269164
loss: 1.0137122869491577,grad_norm: 0.9604674335792105, iteration: 269165
loss: 0.9933760762214661,grad_norm: 0.8846182729594827, iteration: 269166
loss: 0.9699227809906006,grad_norm: 0.8414978000430651, iteration: 269167
loss: 0.9703971743583679,grad_norm: 0.8184974348367748, iteration: 269168
loss: 0.9923092722892761,grad_norm: 0.7758339044850043, iteration: 269169
loss: 0.9936601519584656,grad_norm: 0.9053409619017523, iteration: 269170
loss: 1.004595398902893,grad_norm: 0.7490051268240029, iteration: 269171
loss: 1.0007634162902832,grad_norm: 0.973786064705283, iteration: 269172
loss: 1.0255719423294067,grad_norm: 0.9132787368373174, iteration: 269173
loss: 0.9983809590339661,grad_norm: 0.8323568798537688, iteration: 269174
loss: 1.018874168395996,grad_norm: 0.829767173713558, iteration: 269175
loss: 0.9734683036804199,grad_norm: 0.8413656830461953, iteration: 269176
loss: 1.0016067028045654,grad_norm: 0.8906143053016475, iteration: 269177
loss: 1.016770362854004,grad_norm: 0.9999991050177535, iteration: 269178
loss: 1.018958330154419,grad_norm: 0.9536709491266256, iteration: 269179
loss: 1.0647391080856323,grad_norm: 0.9608488648317393, iteration: 269180
loss: 0.9985502362251282,grad_norm: 0.889633299966935, iteration: 269181
loss: 1.0780668258666992,grad_norm: 0.8748217403404506, iteration: 269182
loss: 1.099793553352356,grad_norm: 0.9999992874861587, iteration: 269183
loss: 1.016350269317627,grad_norm: 0.838372374722696, iteration: 269184
loss: 0.9849371314048767,grad_norm: 0.9634978147607162, iteration: 269185
loss: 1.010927677154541,grad_norm: 0.8510072379456207, iteration: 269186
loss: 0.9564338326454163,grad_norm: 0.9703047105043587, iteration: 269187
loss: 0.98271244764328,grad_norm: 0.9124025193949421, iteration: 269188
loss: 0.9918529391288757,grad_norm: 0.843280069714649, iteration: 269189
loss: 1.020085334777832,grad_norm: 0.9999991509639375, iteration: 269190
loss: 0.9683791995048523,grad_norm: 0.936373444929078, iteration: 269191
loss: 1.0159701108932495,grad_norm: 0.8135000625287742, iteration: 269192
loss: 1.035670280456543,grad_norm: 0.9453365605858988, iteration: 269193
loss: 1.0066523551940918,grad_norm: 0.9428858078114161, iteration: 269194
loss: 1.0405457019805908,grad_norm: 0.7900707991136281, iteration: 269195
loss: 1.0143661499023438,grad_norm: 0.9999997551252725, iteration: 269196
loss: 1.0505949258804321,grad_norm: 0.999999902599736, iteration: 269197
loss: 1.0217105150222778,grad_norm: 0.9999990159465514, iteration: 269198
loss: 0.9616298079490662,grad_norm: 0.7759202383689245, iteration: 269199
loss: 1.0378752946853638,grad_norm: 0.9999991648060954, iteration: 269200
loss: 1.009714961051941,grad_norm: 0.999999129951339, iteration: 269201
loss: 0.9957935810089111,grad_norm: 0.9999989156059774, iteration: 269202
loss: 0.9893546104431152,grad_norm: 0.9999991720636562, iteration: 269203
loss: 0.9878855347633362,grad_norm: 0.805413438212379, iteration: 269204
loss: 1.0081692934036255,grad_norm: 0.9192760997798272, iteration: 269205
loss: 1.0224065780639648,grad_norm: 0.829471213322714, iteration: 269206
loss: 1.0205063819885254,grad_norm: 0.96960770861489, iteration: 269207
loss: 0.9444866180419922,grad_norm: 0.9129911476505089, iteration: 269208
loss: 1.0131571292877197,grad_norm: 0.8719503929076051, iteration: 269209
loss: 0.9970355033874512,grad_norm: 0.8907755263025345, iteration: 269210
loss: 1.0626599788665771,grad_norm: 0.9999993665168297, iteration: 269211
loss: 1.0624568462371826,grad_norm: 0.9999990898382604, iteration: 269212
loss: 0.9777576327323914,grad_norm: 0.76040939149521, iteration: 269213
loss: 0.9864558577537537,grad_norm: 0.9324094408718425, iteration: 269214
loss: 0.996035635471344,grad_norm: 0.9577080151115457, iteration: 269215
loss: 1.0082374811172485,grad_norm: 0.9999990382282166, iteration: 269216
loss: 1.0051223039627075,grad_norm: 0.7896873212398529, iteration: 269217
loss: 0.9650416374206543,grad_norm: 0.7741306132720907, iteration: 269218
loss: 1.0407295227050781,grad_norm: 0.9800849060690818, iteration: 269219
loss: 0.9882848262786865,grad_norm: 0.8432960626671568, iteration: 269220
loss: 1.1171542406082153,grad_norm: 0.9999995939112132, iteration: 269221
loss: 0.9785117506980896,grad_norm: 0.9999996981205124, iteration: 269222
loss: 1.0309110879898071,grad_norm: 0.9999990598219851, iteration: 269223
loss: 0.9928292036056519,grad_norm: 0.7686372279264779, iteration: 269224
loss: 1.0017011165618896,grad_norm: 0.847168521016967, iteration: 269225
loss: 1.011870265007019,grad_norm: 0.7956492703793829, iteration: 269226
loss: 0.9907213449478149,grad_norm: 0.7852106448899923, iteration: 269227
loss: 1.000167727470398,grad_norm: 0.7348681284198103, iteration: 269228
loss: 1.0220320224761963,grad_norm: 0.9999990064592574, iteration: 269229
loss: 1.0232961177825928,grad_norm: 0.8630784880842356, iteration: 269230
loss: 1.0225956439971924,grad_norm: 0.9999998301522597, iteration: 269231
loss: 0.9614946246147156,grad_norm: 0.8709846767633569, iteration: 269232
loss: 1.0217366218566895,grad_norm: 0.7437556918833941, iteration: 269233
loss: 0.9984065294265747,grad_norm: 0.9957194387125898, iteration: 269234
loss: 0.9902563095092773,grad_norm: 0.9743864522663245, iteration: 269235
loss: 1.0216301679611206,grad_norm: 0.8732930699865954, iteration: 269236
loss: 0.9994031190872192,grad_norm: 0.7258601571933575, iteration: 269237
loss: 0.9750168919563293,grad_norm: 0.8320332423745465, iteration: 269238
loss: 0.9784558415412903,grad_norm: 0.8095374448669655, iteration: 269239
loss: 0.9908916354179382,grad_norm: 0.8227928701540196, iteration: 269240
loss: 1.017720341682434,grad_norm: 0.9404313546631607, iteration: 269241
loss: 0.982810914516449,grad_norm: 0.7198958958894615, iteration: 269242
loss: 1.1992733478546143,grad_norm: 0.9999999072431346, iteration: 269243
loss: 1.0099941492080688,grad_norm: 0.9210044046198421, iteration: 269244
loss: 0.955757200717926,grad_norm: 0.8568555934706454, iteration: 269245
loss: 1.1735248565673828,grad_norm: 0.9674421879110662, iteration: 269246
loss: 0.9831020832061768,grad_norm: 0.882741946736307, iteration: 269247
loss: 1.0026516914367676,grad_norm: 0.7330422678519318, iteration: 269248
loss: 1.0381814241409302,grad_norm: 0.8978676352675378, iteration: 269249
loss: 1.0240797996520996,grad_norm: 0.9941053637214066, iteration: 269250
loss: 0.9623705148696899,grad_norm: 0.9194948405304055, iteration: 269251
loss: 1.0059258937835693,grad_norm: 0.7994164779695727, iteration: 269252
loss: 1.061887264251709,grad_norm: 0.8571252564901388, iteration: 269253
loss: 0.9879563450813293,grad_norm: 0.7491308465654456, iteration: 269254
loss: 0.9820832014083862,grad_norm: 0.7424681151291349, iteration: 269255
loss: 0.9935063719749451,grad_norm: 0.7468195980267732, iteration: 269256
loss: 0.9703260660171509,grad_norm: 0.8325568948786035, iteration: 269257
loss: 1.0180771350860596,grad_norm: 0.9180536722513674, iteration: 269258
loss: 0.9990840554237366,grad_norm: 0.9270830163744808, iteration: 269259
loss: 1.0153722763061523,grad_norm: 0.9029416469370564, iteration: 269260
loss: 0.9872496128082275,grad_norm: 0.8195923898448981, iteration: 269261
loss: 0.9804591536521912,grad_norm: 0.8660645692084342, iteration: 269262
loss: 0.9810683727264404,grad_norm: 0.948964123052218, iteration: 269263
loss: 1.0142449140548706,grad_norm: 0.920854703480825, iteration: 269264
loss: 1.0017977952957153,grad_norm: 0.8965923961184322, iteration: 269265
loss: 0.988193690776825,grad_norm: 0.99512533034564, iteration: 269266
loss: 0.9961451888084412,grad_norm: 0.9999991754362528, iteration: 269267
loss: 1.0231751203536987,grad_norm: 0.8099650635899289, iteration: 269268
loss: 1.087365984916687,grad_norm: 0.9999996393768796, iteration: 269269
loss: 0.9685555100440979,grad_norm: 0.8499939161376001, iteration: 269270
loss: 1.0146363973617554,grad_norm: 0.8962799539655631, iteration: 269271
loss: 1.0332915782928467,grad_norm: 0.8256892205533655, iteration: 269272
loss: 0.9850664138793945,grad_norm: 0.8861534964163805, iteration: 269273
loss: 1.0077518224716187,grad_norm: 0.7183772559068956, iteration: 269274
loss: 1.036850094795227,grad_norm: 0.9999992398014025, iteration: 269275
loss: 1.0007654428482056,grad_norm: 0.8853022215188812, iteration: 269276
loss: 1.0046229362487793,grad_norm: 0.7732267798390579, iteration: 269277
loss: 0.9585077166557312,grad_norm: 0.8358531875256556, iteration: 269278
loss: 0.977986752986908,grad_norm: 0.9582634050079414, iteration: 269279
loss: 1.0099267959594727,grad_norm: 0.8302481547742183, iteration: 269280
loss: 0.978736937046051,grad_norm: 0.9999990941099022, iteration: 269281
loss: 1.0345538854599,grad_norm: 0.7494947459574718, iteration: 269282
loss: 1.0251020193099976,grad_norm: 0.9999998325202201, iteration: 269283
loss: 0.994211733341217,grad_norm: 0.8518332272457734, iteration: 269284
loss: 0.9942761659622192,grad_norm: 0.9999991381813189, iteration: 269285
loss: 1.0004767179489136,grad_norm: 0.9999990478759994, iteration: 269286
loss: 0.9828926920890808,grad_norm: 0.8714826019508864, iteration: 269287
loss: 1.0991859436035156,grad_norm: 0.9471368350823012, iteration: 269288
loss: 0.9793638586997986,grad_norm: 0.7481891585733091, iteration: 269289
loss: 1.029691457748413,grad_norm: 0.7781100095560171, iteration: 269290
loss: 0.9962382316589355,grad_norm: 0.9479636357453642, iteration: 269291
loss: 1.0016108751296997,grad_norm: 0.9032700044943075, iteration: 269292
loss: 1.0105377435684204,grad_norm: 0.8708401670339885, iteration: 269293
loss: 1.01094388961792,grad_norm: 0.9999994014741943, iteration: 269294
loss: 1.0378190279006958,grad_norm: 0.8458598600493623, iteration: 269295
loss: 1.0515940189361572,grad_norm: 0.9549357414192433, iteration: 269296
loss: 0.9754670262336731,grad_norm: 0.7603091159913894, iteration: 269297
loss: 1.0188112258911133,grad_norm: 0.8494826362849913, iteration: 269298
loss: 1.00979745388031,grad_norm: 0.9999996122983977, iteration: 269299
loss: 1.0054757595062256,grad_norm: 0.7107162561530606, iteration: 269300
loss: 0.9874584078788757,grad_norm: 0.7728511807147777, iteration: 269301
loss: 0.9938403367996216,grad_norm: 0.9200851739838036, iteration: 269302
loss: 0.9998842477798462,grad_norm: 0.9632720739741267, iteration: 269303
loss: 1.027364730834961,grad_norm: 0.7851799130048871, iteration: 269304
loss: 1.018182635307312,grad_norm: 0.999999307085583, iteration: 269305
loss: 1.0137028694152832,grad_norm: 0.9921342849802256, iteration: 269306
loss: 0.9927756190299988,grad_norm: 0.9999991257661746, iteration: 269307
loss: 1.006582260131836,grad_norm: 0.999999945002825, iteration: 269308
loss: 1.0159180164337158,grad_norm: 0.8990899594028993, iteration: 269309
loss: 1.024823784828186,grad_norm: 0.8548780693942192, iteration: 269310
loss: 1.0314747095108032,grad_norm: 0.9999991168096742, iteration: 269311
loss: 0.9979858994483948,grad_norm: 0.9999989852537877, iteration: 269312
loss: 1.0056955814361572,grad_norm: 0.8025939034028385, iteration: 269313
loss: 0.9827334880828857,grad_norm: 0.8608801355293959, iteration: 269314
loss: 1.091460108757019,grad_norm: 0.9999994197078251, iteration: 269315
loss: 0.9786132574081421,grad_norm: 0.8956362141410459, iteration: 269316
loss: 0.9812029600143433,grad_norm: 0.8536517855931085, iteration: 269317
loss: 0.9708273410797119,grad_norm: 0.9001635747534025, iteration: 269318
loss: 0.9715308547019958,grad_norm: 0.8621796855794022, iteration: 269319
loss: 1.0146136283874512,grad_norm: 0.923016924439539, iteration: 269320
loss: 0.9866660237312317,grad_norm: 0.9265673184740384, iteration: 269321
loss: 0.992600679397583,grad_norm: 0.953333592718932, iteration: 269322
loss: 1.0539504289627075,grad_norm: 1.0000000312732398, iteration: 269323
loss: 0.9679316878318787,grad_norm: 0.8916757647622965, iteration: 269324
loss: 1.0565896034240723,grad_norm: 0.9789654957889119, iteration: 269325
loss: 1.0002045631408691,grad_norm: 0.7703942440813709, iteration: 269326
loss: 0.9779388308525085,grad_norm: 0.9999993361452679, iteration: 269327
loss: 1.0148859024047852,grad_norm: 0.9039628901007599, iteration: 269328
loss: 0.9769260883331299,grad_norm: 0.8622994797128647, iteration: 269329
loss: 0.9880689382553101,grad_norm: 0.7857955212803893, iteration: 269330
loss: 0.9732326865196228,grad_norm: 0.9880603617206732, iteration: 269331
loss: 1.0350316762924194,grad_norm: 0.9999992022058146, iteration: 269332
loss: 1.0445568561553955,grad_norm: 0.999999206508906, iteration: 269333
loss: 0.9856167435646057,grad_norm: 0.9213446674594946, iteration: 269334
loss: 0.9795858263969421,grad_norm: 0.9999992660329241, iteration: 269335
loss: 1.0790799856185913,grad_norm: 0.7585569604928624, iteration: 269336
loss: 0.9509133696556091,grad_norm: 0.7209536808339422, iteration: 269337
loss: 1.0184121131896973,grad_norm: 0.8649543890870467, iteration: 269338
loss: 0.9840276837348938,grad_norm: 0.9606423428335084, iteration: 269339
loss: 1.019346833229065,grad_norm: 0.9943371389385235, iteration: 269340
loss: 0.9703274965286255,grad_norm: 0.7895935492463214, iteration: 269341
loss: 1.0342164039611816,grad_norm: 0.9999999245508903, iteration: 269342
loss: 1.0405795574188232,grad_norm: 0.9999991894841437, iteration: 269343
loss: 0.9533687829971313,grad_norm: 0.9503221690415901, iteration: 269344
loss: 0.9823959469795227,grad_norm: 0.8730661926634368, iteration: 269345
loss: 0.9384772181510925,grad_norm: 0.8255734853847426, iteration: 269346
loss: 1.0098179578781128,grad_norm: 0.8871916842501212, iteration: 269347
loss: 1.0236541032791138,grad_norm: 0.9999993667438384, iteration: 269348
loss: 0.993495523929596,grad_norm: 0.911916092076368, iteration: 269349
loss: 1.02089262008667,grad_norm: 0.83397655534877, iteration: 269350
loss: 0.9896157383918762,grad_norm: 0.7659144828223888, iteration: 269351
loss: 1.0360537767410278,grad_norm: 0.96459931649829, iteration: 269352
loss: 1.0656437873840332,grad_norm: 0.9999992739106168, iteration: 269353
loss: 1.0010839700698853,grad_norm: 0.9999994934590805, iteration: 269354
loss: 0.996209979057312,grad_norm: 0.7839875265072697, iteration: 269355
loss: 0.9672543406486511,grad_norm: 0.8699453844485897, iteration: 269356
loss: 1.0083744525909424,grad_norm: 0.9999991294019828, iteration: 269357
loss: 0.9764136672019958,grad_norm: 0.9505414931757307, iteration: 269358
loss: 1.060158610343933,grad_norm: 0.847402095135048, iteration: 269359
loss: 0.9881637096405029,grad_norm: 0.9150226294751537, iteration: 269360
loss: 0.9881567358970642,grad_norm: 0.9480817243354159, iteration: 269361
loss: 0.9705044627189636,grad_norm: 0.7038062972891534, iteration: 269362
loss: 1.0625698566436768,grad_norm: 0.9999998759866993, iteration: 269363
loss: 0.993760883808136,grad_norm: 0.9999990427757737, iteration: 269364
loss: 0.9660940170288086,grad_norm: 0.8819902573094859, iteration: 269365
loss: 0.9813435077667236,grad_norm: 0.8577254322200284, iteration: 269366
loss: 0.9991704821586609,grad_norm: 0.8715656207821526, iteration: 269367
loss: 1.0200846195220947,grad_norm: 0.7758196710598545, iteration: 269368
loss: 0.9751402735710144,grad_norm: 0.8319927787921851, iteration: 269369
loss: 1.0566987991333008,grad_norm: 0.9999992326341128, iteration: 269370
loss: 0.9951249957084656,grad_norm: 0.8991752435493215, iteration: 269371
loss: 1.0109840631484985,grad_norm: 0.999999238125108, iteration: 269372
loss: 0.9829390645027161,grad_norm: 0.839979346860559, iteration: 269373
loss: 0.9960923790931702,grad_norm: 0.9081737052541355, iteration: 269374
loss: 0.9522910714149475,grad_norm: 0.8223756976222638, iteration: 269375
loss: 0.9906398057937622,grad_norm: 0.9309148806678075, iteration: 269376
loss: 0.9870597124099731,grad_norm: 0.9114065950821209, iteration: 269377
loss: 0.9917327761650085,grad_norm: 0.9938417160081096, iteration: 269378
loss: 1.0000197887420654,grad_norm: 0.8885276908938934, iteration: 269379
loss: 1.01337730884552,grad_norm: 0.8198190502714049, iteration: 269380
loss: 0.9819303154945374,grad_norm: 0.8252167385189204, iteration: 269381
loss: 0.9686146378517151,grad_norm: 0.9081633607948919, iteration: 269382
loss: 0.9593438506126404,grad_norm: 0.95566918509044, iteration: 269383
loss: 0.9981059432029724,grad_norm: 0.9999998198069151, iteration: 269384
loss: 0.9906277060508728,grad_norm: 0.7685029742257647, iteration: 269385
loss: 1.0223135948181152,grad_norm: 0.982951088732347, iteration: 269386
loss: 1.0137884616851807,grad_norm: 0.9820171110562533, iteration: 269387
loss: 0.9678772687911987,grad_norm: 0.9999991898446778, iteration: 269388
loss: 0.966067373752594,grad_norm: 0.8592597905639507, iteration: 269389
loss: 0.9696022272109985,grad_norm: 0.8582682887891964, iteration: 269390
loss: 1.022024393081665,grad_norm: 0.8549216591094537, iteration: 269391
loss: 0.961401641368866,grad_norm: 0.9999989542525743, iteration: 269392
loss: 1.005469560623169,grad_norm: 0.7925372274391465, iteration: 269393
loss: 1.0728338956832886,grad_norm: 0.9999992233023319, iteration: 269394
loss: 1.0025190114974976,grad_norm: 0.999999113900343, iteration: 269395
loss: 1.0183476209640503,grad_norm: 0.8348020312017291, iteration: 269396
loss: 1.0170058012008667,grad_norm: 0.9737201606382374, iteration: 269397
loss: 0.9894171357154846,grad_norm: 0.7847816314728747, iteration: 269398
loss: 0.983241617679596,grad_norm: 0.8680153428347015, iteration: 269399
loss: 1.0989469289779663,grad_norm: 1.0000000974594956, iteration: 269400
loss: 0.9933496117591858,grad_norm: 0.8392988292726136, iteration: 269401
loss: 0.9986206889152527,grad_norm: 0.9837290490112846, iteration: 269402
loss: 0.9939897656440735,grad_norm: 0.853833864980166, iteration: 269403
loss: 0.9978272914886475,grad_norm: 0.8816324336688703, iteration: 269404
loss: 1.0506309270858765,grad_norm: 0.9999998207104827, iteration: 269405
loss: 0.9975031018257141,grad_norm: 0.850707151225724, iteration: 269406
loss: 1.1362419128417969,grad_norm: 0.9999993138434665, iteration: 269407
loss: 0.9633212685585022,grad_norm: 0.8515679904261637, iteration: 269408
loss: 1.0253595113754272,grad_norm: 0.8817123105077881, iteration: 269409
loss: 1.020320177078247,grad_norm: 0.9999993179061355, iteration: 269410
loss: 0.9913556575775146,grad_norm: 0.9999995562594974, iteration: 269411
loss: 0.9823306202888489,grad_norm: 0.7889110776916285, iteration: 269412
loss: 1.0317306518554688,grad_norm: 0.7467219333569791, iteration: 269413
loss: 0.9957887530326843,grad_norm: 0.9226913898565909, iteration: 269414
loss: 1.0051599740982056,grad_norm: 0.8856543398266296, iteration: 269415
loss: 0.9963710904121399,grad_norm: 0.9999993244111677, iteration: 269416
loss: 0.9858046174049377,grad_norm: 0.7385452377051375, iteration: 269417
loss: 1.011438250541687,grad_norm: 0.8401991644848683, iteration: 269418
loss: 0.9976621866226196,grad_norm: 0.8203109815045642, iteration: 269419
loss: 1.0167557001113892,grad_norm: 0.8987082569961838, iteration: 269420
loss: 0.9777122139930725,grad_norm: 0.8431874509789352, iteration: 269421
loss: 0.9813558459281921,grad_norm: 0.8393124412489946, iteration: 269422
loss: 0.9778693318367004,grad_norm: 0.8190031444535746, iteration: 269423
loss: 0.9926077723503113,grad_norm: 0.6372646106662196, iteration: 269424
loss: 1.0280957221984863,grad_norm: 0.8790501764752001, iteration: 269425
loss: 0.9979364275932312,grad_norm: 0.8855986277234433, iteration: 269426
loss: 1.0000531673431396,grad_norm: 0.89342188520983, iteration: 269427
loss: 1.0980024337768555,grad_norm: 0.9999993740206827, iteration: 269428
loss: 1.0083454847335815,grad_norm: 0.9999991375811005, iteration: 269429
loss: 1.017749547958374,grad_norm: 0.88576605094166, iteration: 269430
loss: 0.9985014796257019,grad_norm: 0.9248792484057876, iteration: 269431
loss: 1.003037691116333,grad_norm: 0.9247886457170509, iteration: 269432
loss: 1.0273176431655884,grad_norm: 0.7461028864561464, iteration: 269433
loss: 1.0099010467529297,grad_norm: 0.9355327957779105, iteration: 269434
loss: 0.9594506621360779,grad_norm: 0.9034828522765067, iteration: 269435
loss: 1.0204025506973267,grad_norm: 0.92429005227712, iteration: 269436
loss: 1.0163010358810425,grad_norm: 0.9999990653843023, iteration: 269437
loss: 1.0085663795471191,grad_norm: 0.8995139719387958, iteration: 269438
loss: 0.9732587337493896,grad_norm: 0.8441891443999273, iteration: 269439
loss: 0.9910432696342468,grad_norm: 0.818283411648, iteration: 269440
loss: 0.9895552396774292,grad_norm: 0.8475787558792223, iteration: 269441
loss: 1.0081558227539062,grad_norm: 0.9999998143211746, iteration: 269442
loss: 0.9940920472145081,grad_norm: 0.892889091158116, iteration: 269443
loss: 0.9852272272109985,grad_norm: 0.8429435687081487, iteration: 269444
loss: 1.0148811340332031,grad_norm: 0.9143896962980598, iteration: 269445
loss: 0.9905748963356018,grad_norm: 0.8953319041211673, iteration: 269446
loss: 1.0678136348724365,grad_norm: 0.9477877221461785, iteration: 269447
loss: 1.005785584449768,grad_norm: 0.8786905787207276, iteration: 269448
loss: 1.0061135292053223,grad_norm: 0.9999992289697794, iteration: 269449
loss: 1.0169073343276978,grad_norm: 0.8808932183305345, iteration: 269450
loss: 1.006219744682312,grad_norm: 0.8180818893228787, iteration: 269451
loss: 0.9632834792137146,grad_norm: 0.8485223428077552, iteration: 269452
loss: 1.0180939435958862,grad_norm: 0.999999327806927, iteration: 269453
loss: 0.98752361536026,grad_norm: 0.8014370486134216, iteration: 269454
loss: 0.9660190343856812,grad_norm: 0.9999999906161084, iteration: 269455
loss: 0.9890876412391663,grad_norm: 0.818880859064005, iteration: 269456
loss: 1.023201584815979,grad_norm: 0.8326944607927724, iteration: 269457
loss: 0.9881471991539001,grad_norm: 0.9999989947024259, iteration: 269458
loss: 0.9801425337791443,grad_norm: 0.7554855600057745, iteration: 269459
loss: 0.9899511337280273,grad_norm: 0.7781503956436756, iteration: 269460
loss: 0.9990506768226624,grad_norm: 0.9999989285542211, iteration: 269461
loss: 1.0805139541625977,grad_norm: 0.9640741322405968, iteration: 269462
loss: 0.9859232902526855,grad_norm: 0.9999991157738448, iteration: 269463
loss: 1.0436593294143677,grad_norm: 0.9696337293624165, iteration: 269464
loss: 0.9711068272590637,grad_norm: 0.8318423219162006, iteration: 269465
loss: 0.9796732664108276,grad_norm: 0.9657597339017664, iteration: 269466
loss: 0.9873208403587341,grad_norm: 0.9705716858137977, iteration: 269467
loss: 0.9890904426574707,grad_norm: 0.7885001144408765, iteration: 269468
loss: 1.1341668367385864,grad_norm: 0.9999996501826579, iteration: 269469
loss: 0.9867371916770935,grad_norm: 0.793284736193653, iteration: 269470
loss: 1.0449328422546387,grad_norm: 0.8635827509860105, iteration: 269471
loss: 1.040286660194397,grad_norm: 0.9171285604689022, iteration: 269472
loss: 1.0384013652801514,grad_norm: 0.8830152101520169, iteration: 269473
loss: 1.010144829750061,grad_norm: 0.8278697019440663, iteration: 269474
loss: 0.987602174282074,grad_norm: 0.7572960611754735, iteration: 269475
loss: 1.0362091064453125,grad_norm: 0.7050651315765912, iteration: 269476
loss: 1.0327985286712646,grad_norm: 0.9314866442115894, iteration: 269477
loss: 0.980823814868927,grad_norm: 0.9298130911104072, iteration: 269478
loss: 0.982048511505127,grad_norm: 0.8299266006058978, iteration: 269479
loss: 0.9735757112503052,grad_norm: 0.9367087736531913, iteration: 269480
loss: 1.02460515499115,grad_norm: 0.8776324306976238, iteration: 269481
loss: 1.022168517112732,grad_norm: 0.9945597099070179, iteration: 269482
loss: 1.0230902433395386,grad_norm: 0.8230731981543189, iteration: 269483
loss: 1.0358295440673828,grad_norm: 0.8011017021640021, iteration: 269484
loss: 1.0054614543914795,grad_norm: 0.9851124931434303, iteration: 269485
loss: 0.989083468914032,grad_norm: 0.8404418784880883, iteration: 269486
loss: 1.0028127431869507,grad_norm: 0.9999993062719557, iteration: 269487
loss: 0.97956383228302,grad_norm: 0.9999998560545553, iteration: 269488
loss: 0.9823753237724304,grad_norm: 0.9204237566019242, iteration: 269489
loss: 1.0455772876739502,grad_norm: 0.999999300638044, iteration: 269490
loss: 1.0018924474716187,grad_norm: 0.8228803183345662, iteration: 269491
loss: 0.9790496230125427,grad_norm: 0.999118455190101, iteration: 269492
loss: 1.0394302606582642,grad_norm: 0.999998988686828, iteration: 269493
loss: 1.0063295364379883,grad_norm: 0.9887272976784507, iteration: 269494
loss: 1.0313103199005127,grad_norm: 0.9999995690628315, iteration: 269495
loss: 0.9501218199729919,grad_norm: 0.7633790160205811, iteration: 269496
loss: 0.9972955584526062,grad_norm: 0.8007689711842321, iteration: 269497
loss: 1.0086638927459717,grad_norm: 0.7983966024673194, iteration: 269498
loss: 0.9795457720756531,grad_norm: 0.8603921196388754, iteration: 269499
loss: 1.0208195447921753,grad_norm: 0.9999990973109717, iteration: 269500
loss: 0.9926395416259766,grad_norm: 0.8348147241805428, iteration: 269501
loss: 0.9745715856552124,grad_norm: 0.8384928114952646, iteration: 269502
loss: 1.003952980041504,grad_norm: 0.9999991175514599, iteration: 269503
loss: 1.0384204387664795,grad_norm: 0.7032153242892704, iteration: 269504
loss: 0.9709367156028748,grad_norm: 0.9999990720088454, iteration: 269505
loss: 1.0149587392807007,grad_norm: 0.9009548382186449, iteration: 269506
loss: 1.0110342502593994,grad_norm: 0.7694542296200728, iteration: 269507
loss: 1.0019210577011108,grad_norm: 0.8731941611706373, iteration: 269508
loss: 0.9949432611465454,grad_norm: 0.8724720611997766, iteration: 269509
loss: 0.9734321236610413,grad_norm: 0.8970139040257274, iteration: 269510
loss: 1.0263172388076782,grad_norm: 0.7134833552806517, iteration: 269511
loss: 0.9886643886566162,grad_norm: 0.9999990324674134, iteration: 269512
loss: 1.0049054622650146,grad_norm: 0.894464122609774, iteration: 269513
loss: 0.9900404214859009,grad_norm: 0.8891680254257381, iteration: 269514
loss: 0.9950100183486938,grad_norm: 0.9294376645913632, iteration: 269515
loss: 1.0362614393234253,grad_norm: 0.9396685308567275, iteration: 269516
loss: 1.0203813314437866,grad_norm: 0.9999990887607356, iteration: 269517
loss: 0.9887992739677429,grad_norm: 0.9088618581124474, iteration: 269518
loss: 1.0238876342773438,grad_norm: 0.9588370447554085, iteration: 269519
loss: 1.0550318956375122,grad_norm: 0.8479706923310303, iteration: 269520
loss: 0.9952734112739563,grad_norm: 0.859190750156616, iteration: 269521
loss: 1.0540717840194702,grad_norm: 0.9245847087890292, iteration: 269522
loss: 1.0145992040634155,grad_norm: 0.9999992333177905, iteration: 269523
loss: 0.9753425121307373,grad_norm: 0.9999989920731673, iteration: 269524
loss: 1.0036171674728394,grad_norm: 0.8735151019050111, iteration: 269525
loss: 0.9250797033309937,grad_norm: 0.9999993614895871, iteration: 269526
loss: 0.9941472411155701,grad_norm: 0.936897314748212, iteration: 269527
loss: 1.0299978256225586,grad_norm: 0.8186655768634565, iteration: 269528
loss: 0.9601927399635315,grad_norm: 0.9157151987203026, iteration: 269529
loss: 0.9978899359703064,grad_norm: 0.9999989905477952, iteration: 269530
loss: 1.0243269205093384,grad_norm: 0.9999992993688405, iteration: 269531
loss: 1.000731348991394,grad_norm: 0.6721489715198719, iteration: 269532
loss: 0.9613561630249023,grad_norm: 0.8533540331534105, iteration: 269533
loss: 1.0559426546096802,grad_norm: 0.957969442039545, iteration: 269534
loss: 0.9764183163642883,grad_norm: 0.8349200792169873, iteration: 269535
loss: 0.9922690391540527,grad_norm: 0.8084433674708015, iteration: 269536
loss: 1.0794508457183838,grad_norm: 0.8047689870527457, iteration: 269537
loss: 1.0422364473342896,grad_norm: 0.9999991372864507, iteration: 269538
loss: 0.985196053981781,grad_norm: 0.8308934165464383, iteration: 269539
loss: 1.0549933910369873,grad_norm: 0.9999991482265262, iteration: 269540
loss: 0.9806209802627563,grad_norm: 0.920594082323776, iteration: 269541
loss: 0.9974820017814636,grad_norm: 0.8093597231380542, iteration: 269542
loss: 1.0057731866836548,grad_norm: 0.8407927645337926, iteration: 269543
loss: 1.005326509475708,grad_norm: 0.8445213885172267, iteration: 269544
loss: 0.9879047870635986,grad_norm: 0.8649747085965418, iteration: 269545
loss: 0.9857192039489746,grad_norm: 0.7702117916734071, iteration: 269546
loss: 0.9867414832115173,grad_norm: 0.9514122660327095, iteration: 269547
loss: 0.9845175743103027,grad_norm: 0.9557462799377798, iteration: 269548
loss: 1.0332152843475342,grad_norm: 0.9043352049949872, iteration: 269549
loss: 0.9560270309448242,grad_norm: 0.9440571545724757, iteration: 269550
loss: 1.0440199375152588,grad_norm: 0.999999266020411, iteration: 269551
loss: 1.0186023712158203,grad_norm: 0.6963657675803462, iteration: 269552
loss: 1.0253090858459473,grad_norm: 0.7953890890313127, iteration: 269553
loss: 1.0161077976226807,grad_norm: 0.8213038310585933, iteration: 269554
loss: 1.018135905265808,grad_norm: 0.9999991366321228, iteration: 269555
loss: 1.0035003423690796,grad_norm: 0.8760802254347774, iteration: 269556
loss: 0.9703662395477295,grad_norm: 0.8384463284066458, iteration: 269557
loss: 1.0001568794250488,grad_norm: 0.7069676805040257, iteration: 269558
loss: 0.9962218999862671,grad_norm: 0.9369141324473503, iteration: 269559
loss: 1.0055289268493652,grad_norm: 0.8522291936345948, iteration: 269560
loss: 1.0381895303726196,grad_norm: 0.9999996288683276, iteration: 269561
loss: 0.9591798782348633,grad_norm: 0.9999999565062825, iteration: 269562
loss: 1.0310832262039185,grad_norm: 0.8987615190758876, iteration: 269563
loss: 1.02145254611969,grad_norm: 0.7806310940007075, iteration: 269564
loss: 1.0072401762008667,grad_norm: 0.7876601890580202, iteration: 269565
loss: 1.042525291442871,grad_norm: 0.9424653952796646, iteration: 269566
loss: 0.9860266447067261,grad_norm: 0.9709523181336156, iteration: 269567
loss: 0.9868230223655701,grad_norm: 0.8571245612119748, iteration: 269568
loss: 1.0271767377853394,grad_norm: 0.8863011856801671, iteration: 269569
loss: 1.0838416814804077,grad_norm: 0.9999998708371263, iteration: 269570
loss: 0.956951916217804,grad_norm: 0.8662909118721338, iteration: 269571
loss: 0.9827181696891785,grad_norm: 0.9398273949130471, iteration: 269572
loss: 1.0095261335372925,grad_norm: 0.757361725481835, iteration: 269573
loss: 1.0361263751983643,grad_norm: 0.9999996209287608, iteration: 269574
loss: 1.0119065046310425,grad_norm: 0.9999990943889033, iteration: 269575
loss: 1.0814018249511719,grad_norm: 0.9999992018540457, iteration: 269576
loss: 1.1286319494247437,grad_norm: 0.999999090448675, iteration: 269577
loss: 0.9684485197067261,grad_norm: 0.9747014102974897, iteration: 269578
loss: 0.9646887183189392,grad_norm: 0.7645382565334268, iteration: 269579
loss: 0.9837910532951355,grad_norm: 0.8016555102788075, iteration: 269580
loss: 0.9888923168182373,grad_norm: 0.837988462883196, iteration: 269581
loss: 1.0109074115753174,grad_norm: 0.8227639953289819, iteration: 269582
loss: 0.971598207950592,grad_norm: 0.804613028538963, iteration: 269583
loss: 1.0564086437225342,grad_norm: 0.9999992686050648, iteration: 269584
loss: 1.0203138589859009,grad_norm: 0.9999990087334015, iteration: 269585
loss: 0.9723078608512878,grad_norm: 0.9219401177705796, iteration: 269586
loss: 1.0667444467544556,grad_norm: 0.9999994703696717, iteration: 269587
loss: 1.0191899538040161,grad_norm: 0.8056235913647373, iteration: 269588
loss: 1.0106441974639893,grad_norm: 0.9999990668900446, iteration: 269589
loss: 0.9629896283149719,grad_norm: 0.8665650288925681, iteration: 269590
loss: 0.9937979578971863,grad_norm: 0.7608920865872731, iteration: 269591
loss: 1.0068817138671875,grad_norm: 0.8419530978713553, iteration: 269592
loss: 0.9975739121437073,grad_norm: 0.7411108941889332, iteration: 269593
loss: 1.0075033903121948,grad_norm: 0.9085588304983503, iteration: 269594
loss: 0.9869601130485535,grad_norm: 0.9999991102057398, iteration: 269595
loss: 1.0553115606307983,grad_norm: 0.8804498878436873, iteration: 269596
loss: 1.019697904586792,grad_norm: 0.8157211562010008, iteration: 269597
loss: 1.0368784666061401,grad_norm: 0.8278162628108281, iteration: 269598
loss: 0.9790481925010681,grad_norm: 0.8307853746789844, iteration: 269599
loss: 1.0332818031311035,grad_norm: 0.8371156112315634, iteration: 269600
loss: 1.0313373804092407,grad_norm: 0.850064613060282, iteration: 269601
loss: 1.0404953956604004,grad_norm: 0.9999993807563854, iteration: 269602
loss: 0.9782300591468811,grad_norm: 0.7326416852339382, iteration: 269603
loss: 1.0313626527786255,grad_norm: 0.9232768638744161, iteration: 269604
loss: 0.9976145029067993,grad_norm: 0.8317048590530729, iteration: 269605
loss: 1.0038689374923706,grad_norm: 0.9142768241758387, iteration: 269606
loss: 1.0803786516189575,grad_norm: 0.9999990580611188, iteration: 269607
loss: 1.0074207782745361,grad_norm: 0.9973399852623359, iteration: 269608
loss: 1.021235704421997,grad_norm: 0.9999992195146554, iteration: 269609
loss: 1.0316752195358276,grad_norm: 0.9200475401580535, iteration: 269610
loss: 1.047226905822754,grad_norm: 0.9999990541022713, iteration: 269611
loss: 1.0049349069595337,grad_norm: 0.9999991086415305, iteration: 269612
loss: 1.021499752998352,grad_norm: 0.9223818491916771, iteration: 269613
loss: 0.9974185824394226,grad_norm: 0.9111202559774834, iteration: 269614
loss: 1.069691777229309,grad_norm: 0.9999997433538595, iteration: 269615
loss: 0.9666138887405396,grad_norm: 0.7794323892114168, iteration: 269616
loss: 1.0078414678573608,grad_norm: 0.8625144272379881, iteration: 269617
loss: 0.9823210835456848,grad_norm: 0.8491156007171816, iteration: 269618
loss: 0.9908915758132935,grad_norm: 0.813654167311063, iteration: 269619
loss: 1.014360785484314,grad_norm: 0.9586408100523595, iteration: 269620
loss: 0.9968152046203613,grad_norm: 0.8929155487276096, iteration: 269621
loss: 1.0430389642715454,grad_norm: 0.9999992905227625, iteration: 269622
loss: 1.0523908138275146,grad_norm: 0.8592115252801035, iteration: 269623
loss: 1.1187100410461426,grad_norm: 0.9999998832630349, iteration: 269624
loss: 1.0161362886428833,grad_norm: 0.9999989949627918, iteration: 269625
loss: 1.0138689279556274,grad_norm: 0.7995795879069759, iteration: 269626
loss: 0.9712434411048889,grad_norm: 0.8029834694171067, iteration: 269627
loss: 0.9743503928184509,grad_norm: 0.9199659418888642, iteration: 269628
loss: 1.0222694873809814,grad_norm: 0.9999999522611236, iteration: 269629
loss: 0.9859205484390259,grad_norm: 0.837390755818289, iteration: 269630
loss: 1.0314109325408936,grad_norm: 0.9999989769538229, iteration: 269631
loss: 0.9758433699607849,grad_norm: 0.8432332164469094, iteration: 269632
loss: 1.0098048448562622,grad_norm: 0.8708860894610253, iteration: 269633
loss: 0.9954149127006531,grad_norm: 0.8229485084751614, iteration: 269634
loss: 1.1150115728378296,grad_norm: 0.9999990124431223, iteration: 269635
loss: 1.0027960538864136,grad_norm: 0.8595915360405676, iteration: 269636
loss: 1.0026334524154663,grad_norm: 0.7573531746750108, iteration: 269637
loss: 0.9895696043968201,grad_norm: 0.8915327454978829, iteration: 269638
loss: 1.0019237995147705,grad_norm: 0.946744728104682, iteration: 269639
loss: 1.0069624185562134,grad_norm: 0.9999990076169218, iteration: 269640
loss: 0.9881163239479065,grad_norm: 0.839890796186315, iteration: 269641
loss: 1.0565762519836426,grad_norm: 0.9682730591249827, iteration: 269642
loss: 1.0525128841400146,grad_norm: 0.8592708733459419, iteration: 269643
loss: 0.9874985814094543,grad_norm: 0.9999989052371523, iteration: 269644
loss: 1.0890635251998901,grad_norm: 0.9999995225986527, iteration: 269645
loss: 1.0337448120117188,grad_norm: 0.718692059627773, iteration: 269646
loss: 1.0399563312530518,grad_norm: 0.9999990458985741, iteration: 269647
loss: 0.9876596927642822,grad_norm: 0.7780001055765238, iteration: 269648
loss: 1.0353364944458008,grad_norm: 0.9236949414444177, iteration: 269649
loss: 1.0098010301589966,grad_norm: 0.8517452575582766, iteration: 269650
loss: 0.9880117177963257,grad_norm: 0.9977232611925244, iteration: 269651
loss: 1.027932047843933,grad_norm: 0.9557803291681025, iteration: 269652
loss: 0.997784435749054,grad_norm: 0.8604242501905065, iteration: 269653
loss: 1.0256025791168213,grad_norm: 0.7256274723580378, iteration: 269654
loss: 1.0081418752670288,grad_norm: 0.8984047475662118, iteration: 269655
loss: 1.0097838640213013,grad_norm: 0.9816831636824839, iteration: 269656
loss: 1.0439279079437256,grad_norm: 0.752672906983239, iteration: 269657
loss: 1.0056002140045166,grad_norm: 0.8352253119387266, iteration: 269658
loss: 1.0310275554656982,grad_norm: 0.8410218128177112, iteration: 269659
loss: 1.045396089553833,grad_norm: 0.9999999231553174, iteration: 269660
loss: 0.9844297766685486,grad_norm: 0.8629265949119324, iteration: 269661
loss: 0.9831295609474182,grad_norm: 0.9999993579928961, iteration: 269662
loss: 0.9853519797325134,grad_norm: 0.8676653285345469, iteration: 269663
loss: 0.9996800422668457,grad_norm: 0.9118202431510365, iteration: 269664
loss: 1.007283329963684,grad_norm: 0.9999992479539491, iteration: 269665
loss: 1.0509499311447144,grad_norm: 0.9999999731838927, iteration: 269666
loss: 0.9658365845680237,grad_norm: 0.7931420211264142, iteration: 269667
loss: 1.0030308961868286,grad_norm: 0.8190406790818321, iteration: 269668
loss: 0.979331910610199,grad_norm: 0.6885670251320567, iteration: 269669
loss: 0.9792143106460571,grad_norm: 0.9664997638402011, iteration: 269670
loss: 0.9920763969421387,grad_norm: 0.9999991590643145, iteration: 269671
loss: 1.0970922708511353,grad_norm: 0.9668450125052701, iteration: 269672
loss: 0.9796862602233887,grad_norm: 0.860695815142354, iteration: 269673
loss: 0.9970825910568237,grad_norm: 0.8908852376606614, iteration: 269674
loss: 0.9954330921173096,grad_norm: 0.9999989618896848, iteration: 269675
loss: 1.0323567390441895,grad_norm: 0.9353591289394674, iteration: 269676
loss: 0.9734428524971008,grad_norm: 0.9653804893838132, iteration: 269677
loss: 1.0205713510513306,grad_norm: 0.8710291602727875, iteration: 269678
loss: 1.005337119102478,grad_norm: 0.9999991625932279, iteration: 269679
loss: 0.987402081489563,grad_norm: 0.7986257088021943, iteration: 269680
loss: 0.9788813591003418,grad_norm: 0.8161949931263238, iteration: 269681
loss: 0.9947741031646729,grad_norm: 0.9999991565211328, iteration: 269682
loss: 0.9932353496551514,grad_norm: 0.9701322211035965, iteration: 269683
loss: 1.0832830667495728,grad_norm: 0.9999998453538131, iteration: 269684
loss: 1.0050454139709473,grad_norm: 0.7689346852898565, iteration: 269685
loss: 0.9510214328765869,grad_norm: 0.8664540322610478, iteration: 269686
loss: 0.982681393623352,grad_norm: 0.7278442247107872, iteration: 269687
loss: 0.9937773942947388,grad_norm: 0.7517113737517751, iteration: 269688
loss: 1.0423672199249268,grad_norm: 0.9999996336288194, iteration: 269689
loss: 1.012994408607483,grad_norm: 0.999999307420211, iteration: 269690
loss: 0.9706687331199646,grad_norm: 0.8231015824503798, iteration: 269691
loss: 0.986129105091095,grad_norm: 0.6957078795922751, iteration: 269692
loss: 1.1636719703674316,grad_norm: 0.9999994323654183, iteration: 269693
loss: 1.0010737180709839,grad_norm: 0.9737259308236185, iteration: 269694
loss: 1.0048941373825073,grad_norm: 0.9067226096898043, iteration: 269695
loss: 1.0142353773117065,grad_norm: 0.9999991591752069, iteration: 269696
loss: 1.0492472648620605,grad_norm: 0.9402246477567465, iteration: 269697
loss: 1.016083002090454,grad_norm: 0.9250232975787885, iteration: 269698
loss: 1.0091749429702759,grad_norm: 0.8880513466605869, iteration: 269699
loss: 0.9792229533195496,grad_norm: 0.9868313495690003, iteration: 269700
loss: 1.0068919658660889,grad_norm: 0.9255100114604746, iteration: 269701
loss: 1.0306907892227173,grad_norm: 0.9999991445560937, iteration: 269702
loss: 1.0361803770065308,grad_norm: 0.9795480599879977, iteration: 269703
loss: 0.9937215447425842,grad_norm: 0.7990705464232112, iteration: 269704
loss: 0.9834020733833313,grad_norm: 0.7826556923497168, iteration: 269705
loss: 1.1500176191329956,grad_norm: 1.0000000257924873, iteration: 269706
loss: 1.0389163494110107,grad_norm: 0.9499526565654852, iteration: 269707
loss: 1.0272209644317627,grad_norm: 0.9089468114504398, iteration: 269708
loss: 1.0389399528503418,grad_norm: 0.9999996079540789, iteration: 269709
loss: 0.9589406251907349,grad_norm: 0.9143350107236411, iteration: 269710
loss: 0.988222599029541,grad_norm: 0.9127525896481856, iteration: 269711
loss: 1.0174859762191772,grad_norm: 0.9999991440038487, iteration: 269712
loss: 0.98757004737854,grad_norm: 0.9999991544663879, iteration: 269713
loss: 1.0001235008239746,grad_norm: 0.8928926946940476, iteration: 269714
loss: 0.9891821146011353,grad_norm: 0.7172571697843215, iteration: 269715
loss: 1.0111088752746582,grad_norm: 0.9782418823409703, iteration: 269716
loss: 1.0335394144058228,grad_norm: 0.8276258696986384, iteration: 269717
loss: 1.0067241191864014,grad_norm: 0.9044417288186104, iteration: 269718
loss: 0.9759230613708496,grad_norm: 0.9999997020066402, iteration: 269719
loss: 1.0440824031829834,grad_norm: 0.9999992391228307, iteration: 269720
loss: 1.0339680910110474,grad_norm: 0.9999992615856179, iteration: 269721
loss: 1.0473366975784302,grad_norm: 0.9999991931095159, iteration: 269722
loss: 0.9631495475769043,grad_norm: 0.9487038692782496, iteration: 269723
loss: 0.956546425819397,grad_norm: 0.99999958533368, iteration: 269724
loss: 0.9691275954246521,grad_norm: 0.9999999886914593, iteration: 269725
loss: 1.0199719667434692,grad_norm: 0.999999167531039, iteration: 269726
loss: 1.032817006111145,grad_norm: 0.9999990554873857, iteration: 269727
loss: 1.0134230852127075,grad_norm: 0.9677186420555418, iteration: 269728
loss: 1.0024337768554688,grad_norm: 0.9999991455588672, iteration: 269729
loss: 0.9901285767555237,grad_norm: 0.8469146627753554, iteration: 269730
loss: 1.0054162740707397,grad_norm: 0.9999995003925801, iteration: 269731
loss: 1.019112467765808,grad_norm: 0.9999993359495212, iteration: 269732
loss: 0.9471166133880615,grad_norm: 0.9983693775973387, iteration: 269733
loss: 1.0113742351531982,grad_norm: 0.8651961515061437, iteration: 269734
loss: 0.9971511363983154,grad_norm: 0.9999989807774734, iteration: 269735
loss: 0.9401414394378662,grad_norm: 0.8684586097183891, iteration: 269736
loss: 1.0063832998275757,grad_norm: 0.9999990803345441, iteration: 269737
loss: 0.955320417881012,grad_norm: 0.8778027665322136, iteration: 269738
loss: 1.0194580554962158,grad_norm: 0.8821809440934563, iteration: 269739
loss: 1.0038232803344727,grad_norm: 0.8630442478905761, iteration: 269740
loss: 0.9906924962997437,grad_norm: 0.9075346140614658, iteration: 269741
loss: 1.0043151378631592,grad_norm: 0.9999992012436422, iteration: 269742
loss: 0.9827941060066223,grad_norm: 0.8790450599246336, iteration: 269743
loss: 0.9851698875427246,grad_norm: 0.8806470207572314, iteration: 269744
loss: 1.0093671083450317,grad_norm: 0.9999990898110966, iteration: 269745
loss: 1.0464506149291992,grad_norm: 0.9751623642225257, iteration: 269746
loss: 0.9697927832603455,grad_norm: 0.7928156474058092, iteration: 269747
loss: 1.0335289239883423,grad_norm: 0.9917836144511171, iteration: 269748
loss: 0.9831169843673706,grad_norm: 0.7868834030646887, iteration: 269749
loss: 1.0319260358810425,grad_norm: 0.9999995147228398, iteration: 269750
loss: 1.171436071395874,grad_norm: 0.9999999620455291, iteration: 269751
loss: 1.0038981437683105,grad_norm: 0.7376844784729315, iteration: 269752
loss: 0.991310715675354,grad_norm: 0.7119856989581144, iteration: 269753
loss: 0.985201895236969,grad_norm: 0.8029693416072679, iteration: 269754
loss: 1.020790696144104,grad_norm: 0.8060399406461028, iteration: 269755
loss: 1.00230872631073,grad_norm: 0.9999998200551061, iteration: 269756
loss: 0.9572554230690002,grad_norm: 0.9087715090692258, iteration: 269757
loss: 1.0367786884307861,grad_norm: 0.9700246108237909, iteration: 269758
loss: 0.9911073446273804,grad_norm: 0.7926342691162307, iteration: 269759
loss: 0.936053991317749,grad_norm: 0.9999990560077111, iteration: 269760
loss: 1.0055524110794067,grad_norm: 0.9452125951126229, iteration: 269761
loss: 0.9885941743850708,grad_norm: 0.7843335725622251, iteration: 269762
loss: 1.0094048976898193,grad_norm: 0.9999998200093803, iteration: 269763
loss: 1.0438250303268433,grad_norm: 0.8831760734841189, iteration: 269764
loss: 1.0030364990234375,grad_norm: 0.9999989731773011, iteration: 269765
loss: 0.9936807751655579,grad_norm: 0.9883534814417423, iteration: 269766
loss: 1.0143890380859375,grad_norm: 0.8687380641273174, iteration: 269767
loss: 1.0047929286956787,grad_norm: 0.9133826733820551, iteration: 269768
loss: 1.0435526371002197,grad_norm: 1.0000000020466235, iteration: 269769
loss: 1.0080434083938599,grad_norm: 0.7997040825575215, iteration: 269770
loss: 1.103693962097168,grad_norm: 0.9999998926237905, iteration: 269771
loss: 1.0674688816070557,grad_norm: 0.9999998180951073, iteration: 269772
loss: 1.2741984128952026,grad_norm: 0.9999993879451723, iteration: 269773
loss: 1.0112980604171753,grad_norm: 0.9999994244560115, iteration: 269774
loss: 0.9915955662727356,grad_norm: 0.811359493856428, iteration: 269775
loss: 1.0425838232040405,grad_norm: 0.9999990146527761, iteration: 269776
loss: 1.150524616241455,grad_norm: 0.9409139612732219, iteration: 269777
loss: 0.9811009764671326,grad_norm: 0.8960233850731074, iteration: 269778
loss: 1.057835340499878,grad_norm: 1.000000054682568, iteration: 269779
loss: 1.133653163909912,grad_norm: 0.9999999355748638, iteration: 269780
loss: 1.0104179382324219,grad_norm: 0.7792533608187483, iteration: 269781
loss: 1.0916848182678223,grad_norm: 0.9999999521759846, iteration: 269782
loss: 1.085220217704773,grad_norm: 0.9999997635933134, iteration: 269783
loss: 0.9672574400901794,grad_norm: 0.7237715877667804, iteration: 269784
loss: 0.9812753200531006,grad_norm: 0.9999992711515395, iteration: 269785
loss: 1.0080918073654175,grad_norm: 0.9526973904773912, iteration: 269786
loss: 1.0331130027770996,grad_norm: 0.9261480385525674, iteration: 269787
loss: 1.0226151943206787,grad_norm: 0.9999990260315992, iteration: 269788
loss: 0.9589147567749023,grad_norm: 0.7671439214314154, iteration: 269789
loss: 1.0835769176483154,grad_norm: 0.8874604083871772, iteration: 269790
loss: 0.962506115436554,grad_norm: 0.8097333466965991, iteration: 269791
loss: 1.073868751525879,grad_norm: 0.8146530049317297, iteration: 269792
loss: 1.0101191997528076,grad_norm: 0.9999997841603774, iteration: 269793
loss: 0.9950864911079407,grad_norm: 0.9999997951751084, iteration: 269794
loss: 0.9955893158912659,grad_norm: 0.8823965864045712, iteration: 269795
loss: 0.9911577701568604,grad_norm: 0.7743058527499626, iteration: 269796
loss: 0.9952911734580994,grad_norm: 0.7627200796563594, iteration: 269797
loss: 0.993592381477356,grad_norm: 0.9999991171341892, iteration: 269798
loss: 0.9971718192100525,grad_norm: 0.9999990554096091, iteration: 269799
loss: 1.0149297714233398,grad_norm: 0.8896658653631022, iteration: 269800
loss: 0.9908744096755981,grad_norm: 0.9920294552938016, iteration: 269801
loss: 1.0295252799987793,grad_norm: 0.9999993081697077, iteration: 269802
loss: 1.1874240636825562,grad_norm: 0.9999998611262951, iteration: 269803
loss: 1.0226837396621704,grad_norm: 0.8724900185139483, iteration: 269804
loss: 1.0279769897460938,grad_norm: 0.9999992800250487, iteration: 269805
loss: 0.9818317890167236,grad_norm: 0.9952506615731094, iteration: 269806
loss: 1.0145221948623657,grad_norm: 0.9999998727097438, iteration: 269807
loss: 1.0624085664749146,grad_norm: 0.9312493489741418, iteration: 269808
loss: 0.9732845425605774,grad_norm: 0.765375962347299, iteration: 269809
loss: 1.0216732025146484,grad_norm: 0.7890426875768901, iteration: 269810
loss: 0.9726552963256836,grad_norm: 0.8357090505623116, iteration: 269811
loss: 1.0351120233535767,grad_norm: 0.9999997564475086, iteration: 269812
loss: 0.993596613407135,grad_norm: 0.9999990835571405, iteration: 269813
loss: 1.0503782033920288,grad_norm: 0.9999998465078872, iteration: 269814
loss: 1.068095326423645,grad_norm: 0.9999992678739807, iteration: 269815
loss: 1.0087792873382568,grad_norm: 0.8653264785014032, iteration: 269816
loss: 0.9843432903289795,grad_norm: 0.9999990813578844, iteration: 269817
loss: 0.976426362991333,grad_norm: 0.9999993332185506, iteration: 269818
loss: 1.0886610746383667,grad_norm: 0.9999997938394932, iteration: 269819
loss: 1.0255032777786255,grad_norm: 0.8364429198514493, iteration: 269820
loss: 0.989884078502655,grad_norm: 0.999999045052422, iteration: 269821
loss: 0.9791668653488159,grad_norm: 0.7458526212993092, iteration: 269822
loss: 1.0075112581253052,grad_norm: 0.8471004258734421, iteration: 269823
loss: 1.0033962726593018,grad_norm: 0.7918915637775099, iteration: 269824
loss: 1.0495166778564453,grad_norm: 0.9999991386987932, iteration: 269825
loss: 1.0636322498321533,grad_norm: 0.9999995358948537, iteration: 269826
loss: 1.0265566110610962,grad_norm: 0.9999992723534314, iteration: 269827
loss: 1.0007444620132446,grad_norm: 0.8940154684408789, iteration: 269828
loss: 1.0490922927856445,grad_norm: 0.9999992700106591, iteration: 269829
loss: 1.1661491394042969,grad_norm: 1.000000036060619, iteration: 269830
loss: 0.9863768815994263,grad_norm: 0.7464015692061501, iteration: 269831
loss: 1.0116020441055298,grad_norm: 0.8614440622289657, iteration: 269832
loss: 1.0275455713272095,grad_norm: 0.7909757983899137, iteration: 269833
loss: 1.0040619373321533,grad_norm: 0.8389151556606212, iteration: 269834
loss: 1.0032058954238892,grad_norm: 0.9043680693289081, iteration: 269835
loss: 1.0489351749420166,grad_norm: 0.8752139521174871, iteration: 269836
loss: 1.0099835395812988,grad_norm: 0.8142959268836941, iteration: 269837
loss: 0.995538055896759,grad_norm: 0.9999990852260394, iteration: 269838
loss: 1.0016299486160278,grad_norm: 0.9330697907215473, iteration: 269839
loss: 0.9842312932014465,grad_norm: 0.8965030685833374, iteration: 269840
loss: 0.9999561309814453,grad_norm: 0.9556402583331809, iteration: 269841
loss: 0.9822033643722534,grad_norm: 0.8011813252701405, iteration: 269842
loss: 1.0142066478729248,grad_norm: 0.949118492494596, iteration: 269843
loss: 0.992651641368866,grad_norm: 0.8511877509371166, iteration: 269844
loss: 1.039618730545044,grad_norm: 0.8827497228537943, iteration: 269845
loss: 1.0083022117614746,grad_norm: 0.8957330213353334, iteration: 269846
loss: 0.9933539628982544,grad_norm: 0.9022451420412544, iteration: 269847
loss: 1.0071393251419067,grad_norm: 0.9084111149024464, iteration: 269848
loss: 1.0759150981903076,grad_norm: 0.9999997418480073, iteration: 269849
loss: 1.0646541118621826,grad_norm: 0.9999992860636727, iteration: 269850
loss: 1.0185433626174927,grad_norm: 0.9999993955544315, iteration: 269851
loss: 0.9924314022064209,grad_norm: 0.9999990773495134, iteration: 269852
loss: 0.9949660301208496,grad_norm: 0.8598029817591792, iteration: 269853
loss: 0.9886892437934875,grad_norm: 0.9999990314635657, iteration: 269854
loss: 0.9725655317306519,grad_norm: 0.7139368221489114, iteration: 269855
loss: 1.0083918571472168,grad_norm: 0.8921094118042763, iteration: 269856
loss: 1.0320285558700562,grad_norm: 0.9999996344255154, iteration: 269857
loss: 0.9846662282943726,grad_norm: 0.8106752938936778, iteration: 269858
loss: 1.0153981447219849,grad_norm: 0.9841427110864788, iteration: 269859
loss: 1.0299073457717896,grad_norm: 0.8494127447327067, iteration: 269860
loss: 1.0132160186767578,grad_norm: 0.8968403796571741, iteration: 269861
loss: 0.9832663536071777,grad_norm: 0.89713270596596, iteration: 269862
loss: 1.0260065793991089,grad_norm: 0.8849957285139173, iteration: 269863
loss: 1.0059159994125366,grad_norm: 0.999999097128964, iteration: 269864
loss: 0.9879172444343567,grad_norm: 0.8960533580721982, iteration: 269865
loss: 1.0109174251556396,grad_norm: 0.9267373105076816, iteration: 269866
loss: 0.9997865557670593,grad_norm: 0.7753112413685291, iteration: 269867
loss: 0.975346565246582,grad_norm: 0.9999990836664022, iteration: 269868
loss: 0.9934037923812866,grad_norm: 0.7984515647515866, iteration: 269869
loss: 1.0159220695495605,grad_norm: 0.97045873106879, iteration: 269870
loss: 0.9963541626930237,grad_norm: 0.9999991648750528, iteration: 269871
loss: 1.006009817123413,grad_norm: 0.8807410122392064, iteration: 269872
loss: 0.9774782061576843,grad_norm: 0.9985425586710304, iteration: 269873
loss: 0.9862045645713806,grad_norm: 0.8872122864083669, iteration: 269874
loss: 1.0081977844238281,grad_norm: 0.999999283513861, iteration: 269875
loss: 1.0395781993865967,grad_norm: 0.9555636831343544, iteration: 269876
loss: 1.0065147876739502,grad_norm: 0.999999167089551, iteration: 269877
loss: 1.0133013725280762,grad_norm: 0.82844747932752, iteration: 269878
loss: 1.0185649394989014,grad_norm: 0.9999995516901289, iteration: 269879
loss: 1.0038628578186035,grad_norm: 0.7424388030700511, iteration: 269880
loss: 0.9639198780059814,grad_norm: 0.9999991080321012, iteration: 269881
loss: 1.2101686000823975,grad_norm: 0.9999996423302803, iteration: 269882
loss: 0.9787325263023376,grad_norm: 0.806829596382204, iteration: 269883
loss: 0.9665724039077759,grad_norm: 0.7879840929178021, iteration: 269884
loss: 0.9783433675765991,grad_norm: 0.889921902679587, iteration: 269885
loss: 1.040053129196167,grad_norm: 0.8828242528930945, iteration: 269886
loss: 0.9791417717933655,grad_norm: 0.9999992715064184, iteration: 269887
loss: 0.9449279308319092,grad_norm: 0.9241852869204739, iteration: 269888
loss: 0.9991135597229004,grad_norm: 0.9620248351015733, iteration: 269889
loss: 1.1043394804000854,grad_norm: 0.9999991566777444, iteration: 269890
loss: 1.0042177438735962,grad_norm: 0.9999995524377016, iteration: 269891
loss: 0.9855925440788269,grad_norm: 0.7898944998561443, iteration: 269892
loss: 1.017561674118042,grad_norm: 0.999999142286801, iteration: 269893
loss: 0.9760343432426453,grad_norm: 0.8184737450908222, iteration: 269894
loss: 0.9865151643753052,grad_norm: 0.8757767208573773, iteration: 269895
loss: 1.003127932548523,grad_norm: 0.8305747133276624, iteration: 269896
loss: 1.0337127447128296,grad_norm: 0.9999997171731327, iteration: 269897
loss: 1.0405700206756592,grad_norm: 0.9999990309383745, iteration: 269898
loss: 1.0005979537963867,grad_norm: 0.8469994189461165, iteration: 269899
loss: 1.2218703031539917,grad_norm: 0.999999877996904, iteration: 269900
loss: 1.0278375148773193,grad_norm: 0.9177581953947344, iteration: 269901
loss: 0.9844141602516174,grad_norm: 0.9979641071767764, iteration: 269902
loss: 0.9875799417495728,grad_norm: 0.7526228797311614, iteration: 269903
loss: 0.9489555358886719,grad_norm: 0.8005334862474893, iteration: 269904
loss: 0.9883167147636414,grad_norm: 0.9531265798007207, iteration: 269905
loss: 1.010414481163025,grad_norm: 0.9525503316338527, iteration: 269906
loss: 1.0405117273330688,grad_norm: 0.8814655869240378, iteration: 269907
loss: 0.9761282205581665,grad_norm: 0.7688784658551406, iteration: 269908
loss: 1.0157414674758911,grad_norm: 0.6606070011993093, iteration: 269909
loss: 0.9480628967285156,grad_norm: 0.7137148730864471, iteration: 269910
loss: 1.0729503631591797,grad_norm: 0.9999993988235056, iteration: 269911
loss: 0.9763820767402649,grad_norm: 0.8019696760422582, iteration: 269912
loss: 1.0133144855499268,grad_norm: 0.7134650990721411, iteration: 269913
loss: 1.0639100074768066,grad_norm: 0.9999994415881185, iteration: 269914
loss: 0.9955729842185974,grad_norm: 0.9999989106197964, iteration: 269915
loss: 1.0171126127243042,grad_norm: 0.983748854145089, iteration: 269916
loss: 1.0153248310089111,grad_norm: 0.9084009682361037, iteration: 269917
loss: 1.0528085231781006,grad_norm: 0.8889843815602473, iteration: 269918
loss: 1.022494912147522,grad_norm: 0.6934115424556374, iteration: 269919
loss: 0.9815385341644287,grad_norm: 0.8064426343791983, iteration: 269920
loss: 1.0123684406280518,grad_norm: 0.8607031349478914, iteration: 269921
loss: 0.9791467785835266,grad_norm: 0.8699333300825323, iteration: 269922
loss: 1.0962677001953125,grad_norm: 0.9999993043575759, iteration: 269923
loss: 1.0146723985671997,grad_norm: 0.8077335050571115, iteration: 269924
loss: 0.9840261936187744,grad_norm: 0.9999992112601372, iteration: 269925
loss: 1.0594648122787476,grad_norm: 0.9999995921656181, iteration: 269926
loss: 0.9972731471061707,grad_norm: 0.9843979930238156, iteration: 269927
loss: 0.996672511100769,grad_norm: 0.9999999634407106, iteration: 269928
loss: 1.0262031555175781,grad_norm: 0.955417817364036, iteration: 269929
loss: 0.9816215634346008,grad_norm: 0.999999045818591, iteration: 269930
loss: 1.00059974193573,grad_norm: 0.9999997667283715, iteration: 269931
loss: 1.024792194366455,grad_norm: 0.9526752477518883, iteration: 269932
loss: 1.077233910560608,grad_norm: 0.9999994381538676, iteration: 269933
loss: 1.022939682006836,grad_norm: 0.9999989808246658, iteration: 269934
loss: 1.036881923675537,grad_norm: 0.9999993349846285, iteration: 269935
loss: 1.0245726108551025,grad_norm: 0.8320626884902572, iteration: 269936
loss: 0.9987051486968994,grad_norm: 0.8915272366203294, iteration: 269937
loss: 1.0150293111801147,grad_norm: 0.9395281869204721, iteration: 269938
loss: 0.9976063966751099,grad_norm: 0.8946392158188294, iteration: 269939
loss: 1.0154292583465576,grad_norm: 0.9836990625455417, iteration: 269940
loss: 1.08012056350708,grad_norm: 0.999999546470828, iteration: 269941
loss: 1.0474742650985718,grad_norm: 0.9999997083927579, iteration: 269942
loss: 1.0433239936828613,grad_norm: 0.9999991527880898, iteration: 269943
loss: 0.9849817752838135,grad_norm: 0.8990820135534082, iteration: 269944
loss: 1.0206060409545898,grad_norm: 0.8981491814342555, iteration: 269945
loss: 1.0923465490341187,grad_norm: 0.9754357276996739, iteration: 269946
loss: 0.9978250861167908,grad_norm: 0.9756991701309401, iteration: 269947
loss: 1.0253443717956543,grad_norm: 0.9999998862786984, iteration: 269948
loss: 1.0025689601898193,grad_norm: 0.856178596291426, iteration: 269949
loss: 0.9925533533096313,grad_norm: 0.8379621744608188, iteration: 269950
loss: 0.9755239486694336,grad_norm: 0.9049209636029684, iteration: 269951
loss: 1.0342704057693481,grad_norm: 0.9744418372361315, iteration: 269952
loss: 0.9676470160484314,grad_norm: 0.8868542602516745, iteration: 269953
loss: 1.0150872468948364,grad_norm: 0.9999994722386027, iteration: 269954
loss: 1.0095020532608032,grad_norm: 0.7806983841518451, iteration: 269955
loss: 1.0242761373519897,grad_norm: 0.7759263806380027, iteration: 269956
loss: 1.0030765533447266,grad_norm: 0.999999055667047, iteration: 269957
loss: 1.0415985584259033,grad_norm: 0.9999998520906163, iteration: 269958
loss: 0.9714630246162415,grad_norm: 0.9999998806275997, iteration: 269959
loss: 1.0035122632980347,grad_norm: 0.9085396271857475, iteration: 269960
loss: 0.974008321762085,grad_norm: 0.7955078952791411, iteration: 269961
loss: 1.0641404390335083,grad_norm: 0.8548188960613121, iteration: 269962
loss: 1.0276435613632202,grad_norm: 0.9999991757921314, iteration: 269963
loss: 1.0039300918579102,grad_norm: 0.9999990520703304, iteration: 269964
loss: 1.012062668800354,grad_norm: 0.9087746028198003, iteration: 269965
loss: 1.0818932056427002,grad_norm: 0.9999999146329334, iteration: 269966
loss: 0.9725414514541626,grad_norm: 0.8631413397175753, iteration: 269967
loss: 1.0243463516235352,grad_norm: 0.863685667372389, iteration: 269968
loss: 1.0462111234664917,grad_norm: 0.7985023744390196, iteration: 269969
loss: 0.977398157119751,grad_norm: 0.8948219551471522, iteration: 269970
loss: 0.9685430526733398,grad_norm: 0.9846073752403796, iteration: 269971
loss: 1.002152442932129,grad_norm: 0.8414929872051019, iteration: 269972
loss: 0.9850491881370544,grad_norm: 0.913794948007318, iteration: 269973
loss: 1.0364505052566528,grad_norm: 0.815394536920298, iteration: 269974
loss: 0.9975051283836365,grad_norm: 0.9999992008952491, iteration: 269975
loss: 1.0195986032485962,grad_norm: 0.999999744401529, iteration: 269976
loss: 1.053735613822937,grad_norm: 0.9999993213770403, iteration: 269977
loss: 0.9696568846702576,grad_norm: 0.9674116529486452, iteration: 269978
loss: 0.970705509185791,grad_norm: 0.7948178458137858, iteration: 269979
loss: 1.0222151279449463,grad_norm: 0.8961990188449247, iteration: 269980
loss: 1.0337759256362915,grad_norm: 0.8511734498455881, iteration: 269981
loss: 1.081654667854309,grad_norm: 0.9566372321271787, iteration: 269982
loss: 0.9918733835220337,grad_norm: 0.8425510855333316, iteration: 269983
loss: 0.9977614879608154,grad_norm: 0.8234351599976131, iteration: 269984
loss: 1.013922095298767,grad_norm: 0.8636005882801514, iteration: 269985
loss: 1.0024665594100952,grad_norm: 0.9660763411466687, iteration: 269986
loss: 1.0381383895874023,grad_norm: 0.9167073991680542, iteration: 269987
loss: 0.9950737953186035,grad_norm: 0.9999990596402364, iteration: 269988
loss: 1.0032768249511719,grad_norm: 0.8906237559966381, iteration: 269989
loss: 1.0265004634857178,grad_norm: 0.9999998554205076, iteration: 269990
loss: 0.9927536845207214,grad_norm: 0.9999993860796148, iteration: 269991
loss: 0.9975830912590027,grad_norm: 0.9536551375940407, iteration: 269992
loss: 0.9954458475112915,grad_norm: 0.9999991494898913, iteration: 269993
loss: 1.0054214000701904,grad_norm: 0.8803852154326345, iteration: 269994
loss: 1.1046922206878662,grad_norm: 0.9999995654575781, iteration: 269995
loss: 0.9897695183753967,grad_norm: 0.8421993352782015, iteration: 269996
loss: 0.9844234585762024,grad_norm: 0.8232828060751113, iteration: 269997
loss: 1.010303020477295,grad_norm: 0.9144207572920071, iteration: 269998
loss: 1.021299958229065,grad_norm: 0.9999996333183799, iteration: 269999
loss: 0.9875698685646057,grad_norm: 0.7342862533215292, iteration: 270000
Evaluating at step 270000
{'val': 0.995684128254652, 'test': 2.3704409736413927}
loss: 1.0127592086791992,grad_norm: 0.9999992996381124, iteration: 270001
loss: 0.9775061011314392,grad_norm: 0.8148895351075218, iteration: 270002
loss: 0.9933575391769409,grad_norm: 0.8545425327494839, iteration: 270003
loss: 0.9981074333190918,grad_norm: 0.771711971939112, iteration: 270004
loss: 1.0024651288986206,grad_norm: 0.8655380146688927, iteration: 270005
loss: 0.9711651802062988,grad_norm: 0.7684814320042302, iteration: 270006
loss: 0.979742705821991,grad_norm: 0.9791293922967326, iteration: 270007
loss: 1.0249360799789429,grad_norm: 0.8612667852721981, iteration: 270008
loss: 1.0077861547470093,grad_norm: 0.9558212713908515, iteration: 270009
loss: 0.9996339678764343,grad_norm: 0.999999139296031, iteration: 270010
loss: 0.9977819323539734,grad_norm: 0.9999991519130548, iteration: 270011
loss: 1.0025101900100708,grad_norm: 0.7594689987506709, iteration: 270012
loss: 0.9934776425361633,grad_norm: 0.934610025271132, iteration: 270013
loss: 1.0472122430801392,grad_norm: 0.8725137607984176, iteration: 270014
loss: 1.0111057758331299,grad_norm: 0.7887635331274634, iteration: 270015
loss: 0.994311511516571,grad_norm: 0.7242530160557725, iteration: 270016
loss: 1.0345118045806885,grad_norm: 0.999999094134027, iteration: 270017
loss: 0.9947908520698547,grad_norm: 0.9118008918151704, iteration: 270018
loss: 1.011696457862854,grad_norm: 0.9999992561218739, iteration: 270019
loss: 0.9873936176300049,grad_norm: 0.9699269887090154, iteration: 270020
loss: 1.0763524770736694,grad_norm: 0.9999992510282196, iteration: 270021
loss: 0.9734458923339844,grad_norm: 0.8932426653001133, iteration: 270022
loss: 0.9862604737281799,grad_norm: 0.8296869169214379, iteration: 270023
loss: 1.012624740600586,grad_norm: 0.9999999422310062, iteration: 270024
loss: 0.9913851022720337,grad_norm: 0.9635914146843954, iteration: 270025
loss: 0.9894513487815857,grad_norm: 0.9785533700262462, iteration: 270026
loss: 1.00203275680542,grad_norm: 0.8201957073227053, iteration: 270027
loss: 1.0387656688690186,grad_norm: 0.7548261787167947, iteration: 270028
loss: 1.0015251636505127,grad_norm: 0.8130206945655939, iteration: 270029
loss: 1.002502679824829,grad_norm: 0.8805264070873442, iteration: 270030
loss: 0.9812209010124207,grad_norm: 0.8303007198646006, iteration: 270031
loss: 1.020382285118103,grad_norm: 0.9047592629979794, iteration: 270032
loss: 0.9903296828269958,grad_norm: 0.9784237179438225, iteration: 270033
loss: 0.9760487079620361,grad_norm: 0.9999991437418877, iteration: 270034
loss: 0.9848421812057495,grad_norm: 0.7790501728190441, iteration: 270035
loss: 1.0027287006378174,grad_norm: 0.7934836264609612, iteration: 270036
loss: 0.9667588472366333,grad_norm: 0.9999990328594224, iteration: 270037
loss: 1.0554025173187256,grad_norm: 0.7936076499670427, iteration: 270038
loss: 1.0599159002304077,grad_norm: 0.9999994507842366, iteration: 270039
loss: 0.9941075444221497,grad_norm: 0.8395134840736206, iteration: 270040
loss: 1.0248219966888428,grad_norm: 0.9238800883103573, iteration: 270041
loss: 1.0462161302566528,grad_norm: 0.9999994780287079, iteration: 270042
loss: 0.9924390316009521,grad_norm: 0.8145364057295053, iteration: 270043
loss: 1.0162585973739624,grad_norm: 0.9331507878905784, iteration: 270044
loss: 1.01233971118927,grad_norm: 0.8266731420298002, iteration: 270045
loss: 0.9920838475227356,grad_norm: 0.8179611911078748, iteration: 270046
loss: 0.9614076614379883,grad_norm: 0.9335537852908196, iteration: 270047
loss: 0.9877324104309082,grad_norm: 0.8992793344007743, iteration: 270048
loss: 1.0410875082015991,grad_norm: 0.9753106598659957, iteration: 270049
loss: 0.9999459981918335,grad_norm: 0.9384018865831739, iteration: 270050
loss: 0.9992302656173706,grad_norm: 0.8331847562742193, iteration: 270051
loss: 0.9774653315544128,grad_norm: 0.758684886720123, iteration: 270052
loss: 0.9621103405952454,grad_norm: 0.8625032338040063, iteration: 270053
loss: 0.9866441488265991,grad_norm: 0.9574358652783278, iteration: 270054
loss: 1.0324597358703613,grad_norm: 0.9999990505001718, iteration: 270055
loss: 1.0558761358261108,grad_norm: 0.9459966361772498, iteration: 270056
loss: 1.064149260520935,grad_norm: 0.9944433742117741, iteration: 270057
loss: 1.0258861780166626,grad_norm: 0.9999999494766582, iteration: 270058
loss: 0.9957644939422607,grad_norm: 0.9677141465451928, iteration: 270059
loss: 1.0272833108901978,grad_norm: 0.9039768556628067, iteration: 270060
loss: 1.029810905456543,grad_norm: 0.9999999265701115, iteration: 270061
loss: 1.0103143453598022,grad_norm: 0.9051997665953521, iteration: 270062
loss: 0.9534600377082825,grad_norm: 0.7928524058251303, iteration: 270063
loss: 1.061996579170227,grad_norm: 0.9791917707451034, iteration: 270064
loss: 0.9507753849029541,grad_norm: 0.8072988085549536, iteration: 270065
loss: 1.0356254577636719,grad_norm: 0.8673918649986881, iteration: 270066
loss: 1.0112366676330566,grad_norm: 0.8984549657629697, iteration: 270067
loss: 1.0139588117599487,grad_norm: 0.8867493129388417, iteration: 270068
loss: 0.9869871139526367,grad_norm: 0.9669146025175971, iteration: 270069
loss: 1.1303211450576782,grad_norm: 0.9999998321351031, iteration: 270070
loss: 1.0093894004821777,grad_norm: 0.9330475267308096, iteration: 270071
loss: 1.0002919435501099,grad_norm: 0.942803672052387, iteration: 270072
loss: 1.0110265016555786,grad_norm: 0.9999992205980536, iteration: 270073
loss: 1.0179309844970703,grad_norm: 0.9999990892101476, iteration: 270074
loss: 1.0195348262786865,grad_norm: 0.7590775998918855, iteration: 270075
loss: 1.0086536407470703,grad_norm: 0.9776441598114434, iteration: 270076
loss: 0.9907081127166748,grad_norm: 0.8354558125004006, iteration: 270077
loss: 1.0032910108566284,grad_norm: 0.8067912048726015, iteration: 270078
loss: 1.038252592086792,grad_norm: 0.9999991168190699, iteration: 270079
loss: 1.1236330270767212,grad_norm: 0.9999990345805074, iteration: 270080
loss: 0.9877060055732727,grad_norm: 0.6563003044151338, iteration: 270081
loss: 1.007571816444397,grad_norm: 0.8109660519487157, iteration: 270082
loss: 0.9933512806892395,grad_norm: 0.9999990885680613, iteration: 270083
loss: 0.9934468865394592,grad_norm: 0.9337863543955056, iteration: 270084
loss: 1.0202001333236694,grad_norm: 0.9184664709067827, iteration: 270085
loss: 1.0833170413970947,grad_norm: 0.999999418920849, iteration: 270086
loss: 0.9960863590240479,grad_norm: 0.7910442783775201, iteration: 270087
loss: 0.9932951331138611,grad_norm: 0.9999997240871196, iteration: 270088
loss: 0.992938220500946,grad_norm: 0.9999991805070662, iteration: 270089
loss: 0.987737774848938,grad_norm: 0.8831670853998927, iteration: 270090
loss: 1.0182372331619263,grad_norm: 0.999999764542478, iteration: 270091
loss: 1.013025164604187,grad_norm: 0.9362246245730419, iteration: 270092
loss: 1.0058435201644897,grad_norm: 0.8969182484965682, iteration: 270093
loss: 1.0016216039657593,grad_norm: 0.9332139308886018, iteration: 270094
loss: 1.211884617805481,grad_norm: 0.9999991420017474, iteration: 270095
loss: 1.0654058456420898,grad_norm: 0.8665447838668743, iteration: 270096
loss: 1.0461273193359375,grad_norm: 0.9999996605603979, iteration: 270097
loss: 1.0079134702682495,grad_norm: 0.829826212812421, iteration: 270098
loss: 0.9672874808311462,grad_norm: 0.7708227622753283, iteration: 270099
loss: 1.0049875974655151,grad_norm: 0.8143306954000855, iteration: 270100
loss: 0.9833784103393555,grad_norm: 0.9999992634157163, iteration: 270101
loss: 0.9823333024978638,grad_norm: 0.8429456554274539, iteration: 270102
loss: 1.0313196182250977,grad_norm: 0.9999990641792093, iteration: 270103
loss: 1.086893081665039,grad_norm: 0.999999297506495, iteration: 270104
loss: 1.0393186807632446,grad_norm: 0.79938254573118, iteration: 270105
loss: 0.9791445732116699,grad_norm: 0.9999991429549647, iteration: 270106
loss: 1.014646053314209,grad_norm: 0.8866532513522573, iteration: 270107
loss: 1.0337684154510498,grad_norm: 0.9066882204373315, iteration: 270108
loss: 1.095965027809143,grad_norm: 0.9999992741613964, iteration: 270109
loss: 1.0486575365066528,grad_norm: 0.9999993793149321, iteration: 270110
loss: 0.9989778399467468,grad_norm: 0.8369759442456833, iteration: 270111
loss: 0.9997259974479675,grad_norm: 0.8153723453687608, iteration: 270112
loss: 1.0085456371307373,grad_norm: 0.9643724059731758, iteration: 270113
loss: 1.041075587272644,grad_norm: 0.833853838231978, iteration: 270114
loss: 1.0216227769851685,grad_norm: 0.9999996638925289, iteration: 270115
loss: 0.9920747876167297,grad_norm: 0.9589785962837853, iteration: 270116
loss: 0.9678010940551758,grad_norm: 0.9056567992040969, iteration: 270117
loss: 1.003483772277832,grad_norm: 0.910136268459065, iteration: 270118
loss: 1.021726131439209,grad_norm: 0.9175505719448381, iteration: 270119
loss: 1.0243401527404785,grad_norm: 0.903107308602868, iteration: 270120
loss: 0.9845749139785767,grad_norm: 0.8070878878679174, iteration: 270121
loss: 1.047323226928711,grad_norm: 0.9971346097741806, iteration: 270122
loss: 0.9714497327804565,grad_norm: 0.7795933054475334, iteration: 270123
loss: 1.0429480075836182,grad_norm: 0.9999991084376734, iteration: 270124
loss: 0.9998323321342468,grad_norm: 0.729985217911643, iteration: 270125
loss: 1.0238600969314575,grad_norm: 0.9755077833559077, iteration: 270126
loss: 0.9610782265663147,grad_norm: 0.8316413209022023, iteration: 270127
loss: 1.0352400541305542,grad_norm: 0.8767280165905813, iteration: 270128
loss: 1.0088125467300415,grad_norm: 0.8419422402300446, iteration: 270129
loss: 1.0085773468017578,grad_norm: 0.7802375242549701, iteration: 270130
loss: 0.9904613494873047,grad_norm: 0.8670181873053769, iteration: 270131
loss: 1.0103213787078857,grad_norm: 0.9999991887683394, iteration: 270132
loss: 1.027698278427124,grad_norm: 0.8746026483374236, iteration: 270133
loss: 0.9872124791145325,grad_norm: 0.7848217034373488, iteration: 270134
loss: 1.0251877307891846,grad_norm: 0.999999093386322, iteration: 270135
loss: 1.0159119367599487,grad_norm: 0.7478275279822841, iteration: 270136
loss: 0.9920690059661865,grad_norm: 0.8398633997880508, iteration: 270137
loss: 1.0354236364364624,grad_norm: 0.8355308014523795, iteration: 270138
loss: 0.9691736698150635,grad_norm: 0.8728129249138115, iteration: 270139
loss: 0.990216076374054,grad_norm: 0.8163225205658822, iteration: 270140
loss: 0.9789661169052124,grad_norm: 0.8725393249633049, iteration: 270141
loss: 0.9764224290847778,grad_norm: 0.8142757115908252, iteration: 270142
loss: 0.983042299747467,grad_norm: 0.7483461933798911, iteration: 270143
loss: 0.9868723750114441,grad_norm: 0.9917340129979635, iteration: 270144
loss: 1.006523847579956,grad_norm: 0.8366528220684468, iteration: 270145
loss: 1.013464331626892,grad_norm: 0.8973255388376951, iteration: 270146
loss: 0.9941216707229614,grad_norm: 0.8213624904956072, iteration: 270147
loss: 0.9722105264663696,grad_norm: 0.8640575053390616, iteration: 270148
loss: 1.0189363956451416,grad_norm: 0.9297730073546457, iteration: 270149
loss: 0.9934892058372498,grad_norm: 0.8396934333601753, iteration: 270150
loss: 0.9785829782485962,grad_norm: 0.9999991413417645, iteration: 270151
loss: 1.025680422782898,grad_norm: 0.7227197734864715, iteration: 270152
loss: 1.0168675184249878,grad_norm: 0.9999991627231551, iteration: 270153
loss: 1.0139620304107666,grad_norm: 0.9277789375355525, iteration: 270154
loss: 1.129986047744751,grad_norm: 0.9999996817956794, iteration: 270155
loss: 0.9921542406082153,grad_norm: 0.8117873564873808, iteration: 270156
loss: 0.9299565553665161,grad_norm: 0.8564331511160723, iteration: 270157
loss: 1.0138870477676392,grad_norm: 0.9813371317803535, iteration: 270158
loss: 0.9766873121261597,grad_norm: 0.9137409799772569, iteration: 270159
loss: 0.9894023537635803,grad_norm: 0.8592306987662109, iteration: 270160
loss: 1.013031005859375,grad_norm: 0.9999997649320517, iteration: 270161
loss: 0.9673089981079102,grad_norm: 0.9718809968199739, iteration: 270162
loss: 0.9887241721153259,grad_norm: 0.7859697143151033, iteration: 270163
loss: 1.0216100215911865,grad_norm: 0.9999993310798281, iteration: 270164
loss: 1.0181480646133423,grad_norm: 0.8016724894696523, iteration: 270165
loss: 1.0308144092559814,grad_norm: 0.9999994328386744, iteration: 270166
loss: 0.9845978617668152,grad_norm: 0.903056652200808, iteration: 270167
loss: 1.0241496562957764,grad_norm: 0.9999996030449583, iteration: 270168
loss: 1.0279393196105957,grad_norm: 0.799789150783812, iteration: 270169
loss: 1.0186609029769897,grad_norm: 0.9534988740609418, iteration: 270170
loss: 0.9981478452682495,grad_norm: 0.709263108244103, iteration: 270171
loss: 0.9963799118995667,grad_norm: 0.8685934244793269, iteration: 270172
loss: 1.1217001676559448,grad_norm: 0.9999994468759944, iteration: 270173
loss: 0.9882628321647644,grad_norm: 0.8712440011066606, iteration: 270174
loss: 0.997393786907196,grad_norm: 0.8073693956423702, iteration: 270175
loss: 0.9577404856681824,grad_norm: 0.7694718631956972, iteration: 270176
loss: 1.0483986139297485,grad_norm: 0.9999998255732082, iteration: 270177
loss: 0.9710493683815002,grad_norm: 0.7754503680061171, iteration: 270178
loss: 0.9766703844070435,grad_norm: 0.7916480031573021, iteration: 270179
loss: 1.005688190460205,grad_norm: 0.7796289290925299, iteration: 270180
loss: 1.0033862590789795,grad_norm: 0.9999991894466689, iteration: 270181
loss: 1.0070617198944092,grad_norm: 0.8326414386548523, iteration: 270182
loss: 1.0145549774169922,grad_norm: 0.9999992095016093, iteration: 270183
loss: 1.09452223777771,grad_norm: 0.999999763486817, iteration: 270184
loss: 1.0080291032791138,grad_norm: 0.6772312578869327, iteration: 270185
loss: 1.012895941734314,grad_norm: 0.8978279835803623, iteration: 270186
loss: 1.047268271446228,grad_norm: 0.8644818751788194, iteration: 270187
loss: 1.0198525190353394,grad_norm: 0.9037973006600625, iteration: 270188
loss: 0.9839800596237183,grad_norm: 0.8904134464368186, iteration: 270189
loss: 0.9983689785003662,grad_norm: 0.9999998229731701, iteration: 270190
loss: 1.0116102695465088,grad_norm: 0.9996945809557705, iteration: 270191
loss: 0.9826920628547668,grad_norm: 0.8662402255912073, iteration: 270192
loss: 1.0127344131469727,grad_norm: 0.9558033761388891, iteration: 270193
loss: 0.9841947555541992,grad_norm: 0.951955534768318, iteration: 270194
loss: 0.9731506705284119,grad_norm: 0.9349988549744938, iteration: 270195
loss: 0.9649966955184937,grad_norm: 0.8782926338979802, iteration: 270196
loss: 1.0026707649230957,grad_norm: 0.8914597765845579, iteration: 270197
loss: 1.056017518043518,grad_norm: 0.8804552919700834, iteration: 270198
loss: 1.017508625984192,grad_norm: 0.9343364700087369, iteration: 270199
loss: 1.0450760126113892,grad_norm: 0.8824390791235894, iteration: 270200
loss: 0.9871364831924438,grad_norm: 0.9999992439048063, iteration: 270201
loss: 1.0304404497146606,grad_norm: 0.7408807295621581, iteration: 270202
loss: 0.9806680083274841,grad_norm: 0.9999990389931412, iteration: 270203
loss: 1.0190032720565796,grad_norm: 0.9999991784177219, iteration: 270204
loss: 0.9810276031494141,grad_norm: 0.7419561370342267, iteration: 270205
loss: 1.0230967998504639,grad_norm: 0.9040943418872497, iteration: 270206
loss: 0.9874314665794373,grad_norm: 0.9999990985159227, iteration: 270207
loss: 0.9813938736915588,grad_norm: 0.8588287472831094, iteration: 270208
loss: 1.0487899780273438,grad_norm: 0.8996086897119244, iteration: 270209
loss: 0.9897134900093079,grad_norm: 0.9999990071207405, iteration: 270210
loss: 1.0068910121917725,grad_norm: 0.9999992307932332, iteration: 270211
loss: 1.0062536001205444,grad_norm: 0.7749943921176624, iteration: 270212
loss: 0.9942964315414429,grad_norm: 0.8272495471722329, iteration: 270213
loss: 1.0320994853973389,grad_norm: 0.9999991785321872, iteration: 270214
loss: 1.0029957294464111,grad_norm: 0.999999989628198, iteration: 270215
loss: 0.9764140844345093,grad_norm: 0.8882938079572712, iteration: 270216
loss: 0.9699632525444031,grad_norm: 0.8874755270259794, iteration: 270217
loss: 1.068910837173462,grad_norm: 0.8800105645112456, iteration: 270218
loss: 1.0206700563430786,grad_norm: 0.86274838385588, iteration: 270219
loss: 0.9967331886291504,grad_norm: 0.7586008530619133, iteration: 270220
loss: 0.9921085238456726,grad_norm: 0.7235769721393402, iteration: 270221
loss: 1.0185610055923462,grad_norm: 0.8847545888936724, iteration: 270222
loss: 0.9967888593673706,grad_norm: 0.8872540370849223, iteration: 270223
loss: 1.0195664167404175,grad_norm: 0.999999620110149, iteration: 270224
loss: 1.0642353296279907,grad_norm: 0.9999991508977774, iteration: 270225
loss: 1.102669358253479,grad_norm: 0.9999990472701631, iteration: 270226
loss: 0.9895457625389099,grad_norm: 0.999999021232721, iteration: 270227
loss: 1.0071207284927368,grad_norm: 0.9724063075354994, iteration: 270228
loss: 0.9793508052825928,grad_norm: 0.854305470879136, iteration: 270229
loss: 1.021904706954956,grad_norm: 0.8362878305728058, iteration: 270230
loss: 1.0200754404067993,grad_norm: 0.7589383343704076, iteration: 270231
loss: 1.0367404222488403,grad_norm: 0.9999993826625818, iteration: 270232
loss: 1.0301233530044556,grad_norm: 0.9999991681067053, iteration: 270233
loss: 1.102958083152771,grad_norm: 0.8789276396189266, iteration: 270234
loss: 1.0332434177398682,grad_norm: 0.9999992533702361, iteration: 270235
loss: 1.0489249229431152,grad_norm: 0.9999991723932778, iteration: 270236
loss: 0.9601539969444275,grad_norm: 0.9080328536881288, iteration: 270237
loss: 0.9969075322151184,grad_norm: 0.9999995961482635, iteration: 270238
loss: 1.0409592390060425,grad_norm: 0.9999998920291366, iteration: 270239
loss: 0.9853351712226868,grad_norm: 0.9999991061214322, iteration: 270240
loss: 0.975644588470459,grad_norm: 0.8227037856612514, iteration: 270241
loss: 0.9767380356788635,grad_norm: 0.8596593931927226, iteration: 270242
loss: 1.011275291442871,grad_norm: 0.7937918732995883, iteration: 270243
loss: 0.9841723442077637,grad_norm: 0.7910992503665966, iteration: 270244
loss: 1.048996925354004,grad_norm: 0.8259996222791585, iteration: 270245
loss: 0.9978771209716797,grad_norm: 0.8788079209552082, iteration: 270246
loss: 1.019540548324585,grad_norm: 0.8840548436615243, iteration: 270247
loss: 1.0187996625900269,grad_norm: 0.7696310707113267, iteration: 270248
loss: 0.9793097972869873,grad_norm: 0.9565057947869328, iteration: 270249
loss: 1.0178807973861694,grad_norm: 0.8769224433371382, iteration: 270250
loss: 0.9960117340087891,grad_norm: 0.8663335231531709, iteration: 270251
loss: 0.9751477837562561,grad_norm: 0.960000174743971, iteration: 270252
loss: 0.9890448451042175,grad_norm: 0.9182523991076776, iteration: 270253
loss: 0.9919365644454956,grad_norm: 0.830055916746124, iteration: 270254
loss: 0.9768414497375488,grad_norm: 0.9078903932214866, iteration: 270255
loss: 1.0317602157592773,grad_norm: 0.9999993969297613, iteration: 270256
loss: 1.0669238567352295,grad_norm: 0.9999995707626477, iteration: 270257
loss: 0.997894287109375,grad_norm: 0.8011952488515101, iteration: 270258
loss: 1.0817549228668213,grad_norm: 0.9999995684217868, iteration: 270259
loss: 1.0139812231063843,grad_norm: 0.9999997696519805, iteration: 270260
loss: 1.0112684965133667,grad_norm: 0.7790376411170488, iteration: 270261
loss: 1.0250849723815918,grad_norm: 0.9999992113480961, iteration: 270262
loss: 0.973885715007782,grad_norm: 0.8492801890965282, iteration: 270263
loss: 0.9880790114402771,grad_norm: 0.9999989894160778, iteration: 270264
loss: 0.987398087978363,grad_norm: 0.8696016213521313, iteration: 270265
loss: 1.0126949548721313,grad_norm: 0.9285750728509902, iteration: 270266
loss: 1.009833574295044,grad_norm: 0.999999052105038, iteration: 270267
loss: 1.000771403312683,grad_norm: 0.8697282963250382, iteration: 270268
loss: 0.9933700561523438,grad_norm: 0.8623682786431823, iteration: 270269
loss: 1.0394601821899414,grad_norm: 0.9143289685725501, iteration: 270270
loss: 1.052903175354004,grad_norm: 0.9999992371647208, iteration: 270271
loss: 1.0117056369781494,grad_norm: 0.8886239820688223, iteration: 270272
loss: 0.9672930240631104,grad_norm: 0.7701419463313105, iteration: 270273
loss: 1.007675051689148,grad_norm: 0.8680947597757119, iteration: 270274
loss: 0.9908117651939392,grad_norm: 0.86262467172789, iteration: 270275
loss: 1.0342658758163452,grad_norm: 0.9999998280686819, iteration: 270276
loss: 0.9437433481216431,grad_norm: 0.9999992668108799, iteration: 270277
loss: 0.994731068611145,grad_norm: 0.7645431595540019, iteration: 270278
loss: 0.9828492999076843,grad_norm: 0.6816063942134837, iteration: 270279
loss: 0.9969326853752136,grad_norm: 0.9309529024946573, iteration: 270280
loss: 1.0244543552398682,grad_norm: 0.9094952315804387, iteration: 270281
loss: 0.9602653384208679,grad_norm: 0.7292497993514139, iteration: 270282
loss: 1.0035977363586426,grad_norm: 0.8930992605885479, iteration: 270283
loss: 0.9981716871261597,grad_norm: 0.9566569614543041, iteration: 270284
loss: 0.9890841245651245,grad_norm: 0.8434417385577858, iteration: 270285
loss: 1.0372297763824463,grad_norm: 0.9999990657469651, iteration: 270286
loss: 0.9627143740653992,grad_norm: 0.7592408705199487, iteration: 270287
loss: 1.0629136562347412,grad_norm: 0.8773078917896721, iteration: 270288
loss: 1.02273428440094,grad_norm: 0.9999990339818429, iteration: 270289
loss: 1.008313775062561,grad_norm: 0.9332138457525073, iteration: 270290
loss: 1.013310432434082,grad_norm: 0.9999990748412025, iteration: 270291
loss: 0.9679787755012512,grad_norm: 0.890552104764668, iteration: 270292
loss: 0.9907254576683044,grad_norm: 0.862915882896173, iteration: 270293
loss: 0.990705668926239,grad_norm: 0.9999990585125319, iteration: 270294
loss: 1.0712467432022095,grad_norm: 0.9999995025401136, iteration: 270295
loss: 1.0309022665023804,grad_norm: 0.8677534738624509, iteration: 270296
loss: 1.0043829679489136,grad_norm: 0.9999991736736068, iteration: 270297
loss: 1.0015908479690552,grad_norm: 0.8880929374483872, iteration: 270298
loss: 1.0026582479476929,grad_norm: 0.8172569766079018, iteration: 270299
loss: 1.000895619392395,grad_norm: 0.7622030982546184, iteration: 270300
loss: 0.9861367344856262,grad_norm: 0.8444968733047762, iteration: 270301
loss: 0.9842032790184021,grad_norm: 0.9651198765646208, iteration: 270302
loss: 0.9759186506271362,grad_norm: 0.8685443049289792, iteration: 270303
loss: 0.9567374587059021,grad_norm: 0.8048663770326621, iteration: 270304
loss: 0.9894571304321289,grad_norm: 0.9299658285050104, iteration: 270305
loss: 0.9865864515304565,grad_norm: 0.806585246483106, iteration: 270306
loss: 1.0150164365768433,grad_norm: 0.9999990348920712, iteration: 270307
loss: 1.0104787349700928,grad_norm: 0.9127871191991513, iteration: 270308
loss: 0.99533611536026,grad_norm: 0.9999990602625068, iteration: 270309
loss: 1.0026047229766846,grad_norm: 0.9132307864657444, iteration: 270310
loss: 1.0098086595535278,grad_norm: 0.8267532527948466, iteration: 270311
loss: 1.014203667640686,grad_norm: 0.8594315156366752, iteration: 270312
loss: 0.9794763922691345,grad_norm: 0.9241584372162722, iteration: 270313
loss: 1.029947280883789,grad_norm: 0.7808074753372053, iteration: 270314
loss: 1.0099490880966187,grad_norm: 0.9999995596806183, iteration: 270315
loss: 0.9969854354858398,grad_norm: 0.8696678175751916, iteration: 270316
loss: 1.0164856910705566,grad_norm: 0.9999991136281842, iteration: 270317
loss: 0.9779908061027527,grad_norm: 0.887014353694447, iteration: 270318
loss: 1.005082368850708,grad_norm: 0.8989763457658034, iteration: 270319
loss: 0.9875490069389343,grad_norm: 0.9999990756107217, iteration: 270320
loss: 0.9875240921974182,grad_norm: 0.8955503382048898, iteration: 270321
loss: 1.0113844871520996,grad_norm: 0.7912146982353983, iteration: 270322
loss: 1.0179916620254517,grad_norm: 0.9999991936100454, iteration: 270323
loss: 1.000894546508789,grad_norm: 0.6799434610863105, iteration: 270324
loss: 1.0109652280807495,grad_norm: 0.6935550686813883, iteration: 270325
loss: 1.002657413482666,grad_norm: 0.9444378397108211, iteration: 270326
loss: 0.9608151912689209,grad_norm: 0.9668043821642357, iteration: 270327
loss: 1.0142756700515747,grad_norm: 0.8433727662219753, iteration: 270328
loss: 1.0039865970611572,grad_norm: 0.8772871535825943, iteration: 270329
loss: 0.9912970066070557,grad_norm: 0.9180454649306339, iteration: 270330
loss: 1.0613542795181274,grad_norm: 0.99999922678164, iteration: 270331
loss: 0.990502119064331,grad_norm: 0.7937696568921426, iteration: 270332
loss: 0.9805458188056946,grad_norm: 0.9999992105603734, iteration: 270333
loss: 1.026218056678772,grad_norm: 0.9021734081427474, iteration: 270334
loss: 1.0304858684539795,grad_norm: 0.9740151033993881, iteration: 270335
loss: 0.9573038816452026,grad_norm: 0.823424507424577, iteration: 270336
loss: 1.0200684070587158,grad_norm: 0.8761899932094745, iteration: 270337
loss: 1.0123404264450073,grad_norm: 0.8226228884966925, iteration: 270338
loss: 0.9903945326805115,grad_norm: 0.8851535178117329, iteration: 270339
loss: 1.0313690900802612,grad_norm: 0.9999999963736579, iteration: 270340
loss: 1.0391165018081665,grad_norm: 0.9999994938658718, iteration: 270341
loss: 0.9914969205856323,grad_norm: 0.9080311386170979, iteration: 270342
loss: 0.9581444263458252,grad_norm: 0.879437297110981, iteration: 270343
loss: 1.0124601125717163,grad_norm: 0.9999991308048599, iteration: 270344
loss: 0.9795929789543152,grad_norm: 0.7838887556489249, iteration: 270345
loss: 1.039507508277893,grad_norm: 0.9919906813772541, iteration: 270346
loss: 0.9370174407958984,grad_norm: 0.8995586987222872, iteration: 270347
loss: 0.972911536693573,grad_norm: 0.8179643448921152, iteration: 270348
loss: 0.9826862812042236,grad_norm: 0.8156312873308778, iteration: 270349
loss: 0.9888531565666199,grad_norm: 0.8495842712154104, iteration: 270350
loss: 1.0409438610076904,grad_norm: 0.9999991179710416, iteration: 270351
loss: 1.0168797969818115,grad_norm: 0.7291501629904023, iteration: 270352
loss: 1.0097885131835938,grad_norm: 0.7744640484930281, iteration: 270353
loss: 1.0276211500167847,grad_norm: 0.8965894737423235, iteration: 270354
loss: 1.0147112607955933,grad_norm: 0.8065304634326057, iteration: 270355
loss: 1.0089516639709473,grad_norm: 0.8204381757358565, iteration: 270356
loss: 0.9927978515625,grad_norm: 0.9113105199682733, iteration: 270357
loss: 0.9763432741165161,grad_norm: 0.9482355882605341, iteration: 270358
loss: 1.0390903949737549,grad_norm: 0.930800892820362, iteration: 270359
loss: 1.0260363817214966,grad_norm: 0.8834508763880198, iteration: 270360
loss: 0.9944785833358765,grad_norm: 0.79600132113216, iteration: 270361
loss: 1.0182613134384155,grad_norm: 0.906452110781387, iteration: 270362
loss: 0.9814113974571228,grad_norm: 0.9333262200552626, iteration: 270363
loss: 0.9967594742774963,grad_norm: 0.7541131752519518, iteration: 270364
loss: 0.998698353767395,grad_norm: 0.8070209714536469, iteration: 270365
loss: 1.0019261837005615,grad_norm: 0.9720622151125841, iteration: 270366
loss: 1.0229650735855103,grad_norm: 0.8028600028587389, iteration: 270367
loss: 1.0407320261001587,grad_norm: 0.9999994452481734, iteration: 270368
loss: 0.9857794046401978,grad_norm: 0.9999992608151317, iteration: 270369
loss: 1.0287818908691406,grad_norm: 0.9999990945711699, iteration: 270370
loss: 0.9925476312637329,grad_norm: 0.8261230400400077, iteration: 270371
loss: 0.9921683669090271,grad_norm: 0.9829701904609781, iteration: 270372
loss: 0.9763208627700806,grad_norm: 0.8624595127844469, iteration: 270373
loss: 0.9896471500396729,grad_norm: 0.9999990828704965, iteration: 270374
loss: 0.9918070435523987,grad_norm: 0.899192923107445, iteration: 270375
loss: 1.014247179031372,grad_norm: 0.9760133295147377, iteration: 270376
loss: 0.9927431344985962,grad_norm: 0.9379751851985375, iteration: 270377
loss: 1.012213945388794,grad_norm: 0.9995307529099994, iteration: 270378
loss: 1.0137826204299927,grad_norm: 0.9075036696118222, iteration: 270379
loss: 1.007075309753418,grad_norm: 0.9730235410806278, iteration: 270380
loss: 0.9963598251342773,grad_norm: 0.9748327690900602, iteration: 270381
loss: 1.0180243253707886,grad_norm: 0.9299154914610986, iteration: 270382
loss: 1.0028858184814453,grad_norm: 0.7609795433656478, iteration: 270383
loss: 1.0559123754501343,grad_norm: 0.8839924728655443, iteration: 270384
loss: 1.0162339210510254,grad_norm: 0.8565193254413872, iteration: 270385
loss: 1.0057834386825562,grad_norm: 0.9647421985425362, iteration: 270386
loss: 0.9808286428451538,grad_norm: 0.9061273018880428, iteration: 270387
loss: 1.0062884092330933,grad_norm: 0.8008686217374552, iteration: 270388
loss: 1.00836181640625,grad_norm: 0.8667661978838287, iteration: 270389
loss: 0.9954123497009277,grad_norm: 0.9819805804611053, iteration: 270390
loss: 0.967946469783783,grad_norm: 0.8823052818497593, iteration: 270391
loss: 0.9983366131782532,grad_norm: 0.8547657118847263, iteration: 270392
loss: 1.0107245445251465,grad_norm: 0.912998977620933, iteration: 270393
loss: 1.0048720836639404,grad_norm: 0.9213724803118278, iteration: 270394
loss: 1.0373347997665405,grad_norm: 0.8643448278384459, iteration: 270395
loss: 1.0128451585769653,grad_norm: 0.8103149842853256, iteration: 270396
loss: 0.988411009311676,grad_norm: 0.7839820942573302, iteration: 270397
loss: 0.9946033358573914,grad_norm: 0.934168016861187, iteration: 270398
loss: 0.991790771484375,grad_norm: 0.9999992816646581, iteration: 270399
loss: 0.9563758969306946,grad_norm: 0.9265249849616042, iteration: 270400
loss: 0.9985873699188232,grad_norm: 0.9093953803491758, iteration: 270401
loss: 1.0299041271209717,grad_norm: 0.9999991585229018, iteration: 270402
loss: 1.0763438940048218,grad_norm: 0.9999995971601018, iteration: 270403
loss: 1.0001678466796875,grad_norm: 0.9999990141855146, iteration: 270404
loss: 0.9913488030433655,grad_norm: 0.7524423904311631, iteration: 270405
loss: 1.0043283700942993,grad_norm: 0.8264693498899938, iteration: 270406
loss: 1.0180206298828125,grad_norm: 0.8416811734988301, iteration: 270407
loss: 0.9972580671310425,grad_norm: 0.74047194251235, iteration: 270408
loss: 1.0025304555892944,grad_norm: 0.9999990145231094, iteration: 270409
loss: 1.1030774116516113,grad_norm: 0.9999996340898424, iteration: 270410
loss: 0.9973561763763428,grad_norm: 0.8606790219647515, iteration: 270411
loss: 1.0243586301803589,grad_norm: 0.9999994415604126, iteration: 270412
loss: 0.9822354316711426,grad_norm: 0.7349885477895702, iteration: 270413
loss: 1.0238381624221802,grad_norm: 0.9999996851877683, iteration: 270414
loss: 1.0333492755889893,grad_norm: 0.9148400559170112, iteration: 270415
loss: 1.0049852132797241,grad_norm: 0.9012462314052821, iteration: 270416
loss: 0.9932663440704346,grad_norm: 0.9055692775204314, iteration: 270417
loss: 1.0904704332351685,grad_norm: 0.9999998786866634, iteration: 270418
loss: 0.9886817336082458,grad_norm: 0.9498307005798384, iteration: 270419
loss: 1.0548161268234253,grad_norm: 0.9999990943858996, iteration: 270420
loss: 1.0148544311523438,grad_norm: 0.9999990641867615, iteration: 270421
loss: 0.9786327481269836,grad_norm: 0.9999989827600116, iteration: 270422
loss: 1.0273135900497437,grad_norm: 0.8351828895502006, iteration: 270423
loss: 1.0112050771713257,grad_norm: 0.8803438820005757, iteration: 270424
loss: 1.0088375806808472,grad_norm: 0.8213362618274408, iteration: 270425
loss: 0.9811504483222961,grad_norm: 0.8565669765011701, iteration: 270426
loss: 1.0409060716629028,grad_norm: 0.9999991680286394, iteration: 270427
loss: 1.0011435747146606,grad_norm: 0.994799908659698, iteration: 270428
loss: 0.9736918210983276,grad_norm: 0.8730734647299454, iteration: 270429
loss: 0.9853442907333374,grad_norm: 0.9999991437661541, iteration: 270430
loss: 1.0200295448303223,grad_norm: 0.9396371850655549, iteration: 270431
loss: 1.0082582235336304,grad_norm: 0.7104316573307938, iteration: 270432
loss: 1.0201488733291626,grad_norm: 0.9878017746375444, iteration: 270433
loss: 0.9952630400657654,grad_norm: 0.8972648876294906, iteration: 270434
loss: 0.9974773526191711,grad_norm: 0.999999285629385, iteration: 270435
loss: 1.00286865234375,grad_norm: 0.7092368677284495, iteration: 270436
loss: 0.9795711636543274,grad_norm: 0.8215362531072397, iteration: 270437
loss: 0.9915462732315063,grad_norm: 0.8589381008672943, iteration: 270438
loss: 0.9705946445465088,grad_norm: 0.9684761466166932, iteration: 270439
loss: 1.039996862411499,grad_norm: 0.9456621933229716, iteration: 270440
loss: 0.9340540766716003,grad_norm: 0.9410833963451495, iteration: 270441
loss: 1.0212465524673462,grad_norm: 0.999999483378, iteration: 270442
loss: 0.9702953100204468,grad_norm: 0.8527880980493142, iteration: 270443
loss: 1.0225354433059692,grad_norm: 0.999999084792095, iteration: 270444
loss: 1.1020100116729736,grad_norm: 0.9999999278851208, iteration: 270445
loss: 1.0311782360076904,grad_norm: 0.745094448110625, iteration: 270446
loss: 1.0233516693115234,grad_norm: 0.9025671763548934, iteration: 270447
loss: 0.9464786052703857,grad_norm: 0.8703102711446633, iteration: 270448
loss: 1.0960711240768433,grad_norm: 1.0000000344409357, iteration: 270449
loss: 1.001855492591858,grad_norm: 0.8442929881691187, iteration: 270450
loss: 1.0326966047286987,grad_norm: 0.9999994113579018, iteration: 270451
loss: 1.0238089561462402,grad_norm: 0.853808154403532, iteration: 270452
loss: 1.002402663230896,grad_norm: 0.8040873702933808, iteration: 270453
loss: 1.0682320594787598,grad_norm: 0.9727668065570507, iteration: 270454
loss: 1.0209598541259766,grad_norm: 0.8392730467619236, iteration: 270455
loss: 0.9931904077529907,grad_norm: 0.99468250232874, iteration: 270456
loss: 1.0269657373428345,grad_norm: 0.8139874157475527, iteration: 270457
loss: 1.0714486837387085,grad_norm: 0.9999997457451716, iteration: 270458
loss: 1.01068913936615,grad_norm: 0.7479229409343431, iteration: 270459
loss: 1.0015766620635986,grad_norm: 0.8850079963519597, iteration: 270460
loss: 1.0046745538711548,grad_norm: 0.9822296921726161, iteration: 270461
loss: 0.9970036745071411,grad_norm: 0.876591179234533, iteration: 270462
loss: 1.0332382917404175,grad_norm: 0.9999998768774904, iteration: 270463
loss: 1.0160133838653564,grad_norm: 0.9124298569366385, iteration: 270464
loss: 1.0515077114105225,grad_norm: 0.9616218624557458, iteration: 270465
loss: 1.0298844575881958,grad_norm: 0.99999915882033, iteration: 270466
loss: 0.9962210059165955,grad_norm: 0.9807932202724782, iteration: 270467
loss: 0.9893044829368591,grad_norm: 0.9999996950812889, iteration: 270468
loss: 0.9534429907798767,grad_norm: 0.8219606622949746, iteration: 270469
loss: 1.0655367374420166,grad_norm: 0.7579477543115247, iteration: 270470
loss: 1.029750108718872,grad_norm: 0.9339406910770764, iteration: 270471
loss: 0.9922313690185547,grad_norm: 0.8869890975108177, iteration: 270472
loss: 0.966101884841919,grad_norm: 0.9392196561761851, iteration: 270473
loss: 0.9897924065589905,grad_norm: 0.9999990220113071, iteration: 270474
loss: 0.9807530045509338,grad_norm: 0.9999990507772977, iteration: 270475
loss: 0.9741078615188599,grad_norm: 0.866503606747813, iteration: 270476
loss: 0.9885329008102417,grad_norm: 0.8702088899619309, iteration: 270477
loss: 1.0262640714645386,grad_norm: 0.890584431645317, iteration: 270478
loss: 1.013039469718933,grad_norm: 0.8650302945035717, iteration: 270479
loss: 1.0092291831970215,grad_norm: 0.9999994675050746, iteration: 270480
loss: 1.0181349515914917,grad_norm: 0.8454514591839442, iteration: 270481
loss: 1.0109418630599976,grad_norm: 0.9043304032534584, iteration: 270482
loss: 1.0177708864212036,grad_norm: 0.9760132382971264, iteration: 270483
loss: 0.9766814112663269,grad_norm: 0.675465356527924, iteration: 270484
loss: 1.0181857347488403,grad_norm: 0.9327854194791938, iteration: 270485
loss: 0.9830653667449951,grad_norm: 0.999999343748614, iteration: 270486
loss: 1.0184917449951172,grad_norm: 0.999999021091581, iteration: 270487
loss: 1.0279146432876587,grad_norm: 0.9999990282227506, iteration: 270488
loss: 1.0114635229110718,grad_norm: 0.775952366389958, iteration: 270489
loss: 1.0119696855545044,grad_norm: 0.9999993034648809, iteration: 270490
loss: 1.0235553979873657,grad_norm: 0.9999990814517601, iteration: 270491
loss: 1.00129234790802,grad_norm: 0.8768992453842744, iteration: 270492
loss: 1.0044101476669312,grad_norm: 0.740790694477653, iteration: 270493
loss: 0.9885589480400085,grad_norm: 0.9244072951812539, iteration: 270494
loss: 0.9822975993156433,grad_norm: 0.9999991916655859, iteration: 270495
loss: 0.9780535101890564,grad_norm: 0.9048764779419536, iteration: 270496
loss: 0.9686002135276794,grad_norm: 0.8139983094612447, iteration: 270497
loss: 1.0186349153518677,grad_norm: 0.7729039572351302, iteration: 270498
loss: 0.9646326303482056,grad_norm: 0.8844089820833074, iteration: 270499
loss: 1.018001914024353,grad_norm: 0.7970669350225023, iteration: 270500
loss: 1.0580694675445557,grad_norm: 0.8870280628965683, iteration: 270501
loss: 1.004469394683838,grad_norm: 0.9629435884719671, iteration: 270502
loss: 0.9821715354919434,grad_norm: 0.9999991867818456, iteration: 270503
loss: 0.9634688496589661,grad_norm: 0.9223019350771516, iteration: 270504
loss: 1.0272691249847412,grad_norm: 0.9121685251214495, iteration: 270505
loss: 1.025895118713379,grad_norm: 0.8816412023612802, iteration: 270506
loss: 0.9820317029953003,grad_norm: 0.8023242784330646, iteration: 270507
loss: 1.0302393436431885,grad_norm: 0.9999989625659323, iteration: 270508
loss: 1.0224874019622803,grad_norm: 0.8626548388415279, iteration: 270509
loss: 1.0687026977539062,grad_norm: 0.9999998757279848, iteration: 270510
loss: 1.0275427103042603,grad_norm: 0.8813789624455094, iteration: 270511
loss: 1.0082005262374878,grad_norm: 0.8691321874146392, iteration: 270512
loss: 0.9878240823745728,grad_norm: 0.9481479834487463, iteration: 270513
loss: 1.0188409090042114,grad_norm: 0.9862547580522726, iteration: 270514
loss: 1.0054097175598145,grad_norm: 0.9361949951787556, iteration: 270515
loss: 1.0159380435943604,grad_norm: 0.9999998611138102, iteration: 270516
loss: 0.9700492024421692,grad_norm: 0.8305417861160195, iteration: 270517
loss: 1.0066838264465332,grad_norm: 0.8195186743414794, iteration: 270518
loss: 0.9832817912101746,grad_norm: 0.9999991140504964, iteration: 270519
loss: 1.018003225326538,grad_norm: 0.7964716358388492, iteration: 270520
loss: 0.9944143891334534,grad_norm: 0.9063851421210407, iteration: 270521
loss: 1.0101258754730225,grad_norm: 0.918392328813638, iteration: 270522
loss: 0.9677732586860657,grad_norm: 0.8647944560290639, iteration: 270523
loss: 0.9893100261688232,grad_norm: 0.8225070291098081, iteration: 270524
loss: 1.0530678033828735,grad_norm: 0.9999992257317085, iteration: 270525
loss: 0.9662298560142517,grad_norm: 0.8001720537118954, iteration: 270526
loss: 1.016910195350647,grad_norm: 0.8601732715613513, iteration: 270527
loss: 0.9949628710746765,grad_norm: 0.8284246355782748, iteration: 270528
loss: 0.992494523525238,grad_norm: 0.9218414942127728, iteration: 270529
loss: 0.9961865544319153,grad_norm: 0.9462198867572781, iteration: 270530
loss: 0.9605100750923157,grad_norm: 0.9174850420473089, iteration: 270531
loss: 0.9547601938247681,grad_norm: 0.8151912894869775, iteration: 270532
loss: 1.0187957286834717,grad_norm: 0.8287850357665176, iteration: 270533
loss: 1.0099668502807617,grad_norm: 0.999999197199345, iteration: 270534
loss: 0.9974192380905151,grad_norm: 0.8279702053792843, iteration: 270535
loss: 1.0002572536468506,grad_norm: 0.9240962411186091, iteration: 270536
loss: 0.9932262897491455,grad_norm: 0.8851083836161988, iteration: 270537
loss: 1.0005815029144287,grad_norm: 0.8726279726503163, iteration: 270538
loss: 1.0896645784378052,grad_norm: 0.9999993156298496, iteration: 270539
loss: 1.002492070198059,grad_norm: 0.9435815648070092, iteration: 270540
loss: 1.0325112342834473,grad_norm: 0.9999995783363841, iteration: 270541
loss: 0.9894044995307922,grad_norm: 0.7506130457858653, iteration: 270542
loss: 1.034140944480896,grad_norm: 0.9021500916857047, iteration: 270543
loss: 0.972900927066803,grad_norm: 0.8045976064609073, iteration: 270544
loss: 0.9758111238479614,grad_norm: 0.9637043046744701, iteration: 270545
loss: 1.1324431896209717,grad_norm: 0.999999254496701, iteration: 270546
loss: 0.9849494099617004,grad_norm: 0.9120644251624552, iteration: 270547
loss: 0.9988786578178406,grad_norm: 0.8737818451303718, iteration: 270548
loss: 1.0084456205368042,grad_norm: 0.9443421469313635, iteration: 270549
loss: 0.9797135591506958,grad_norm: 0.9431665450730378, iteration: 270550
loss: 1.0135782957077026,grad_norm: 0.8355337484241823, iteration: 270551
loss: 0.944564163684845,grad_norm: 0.8684795575420846, iteration: 270552
loss: 0.9813533425331116,grad_norm: 0.9935231273535188, iteration: 270553
loss: 0.9827156066894531,grad_norm: 0.814958434922953, iteration: 270554
loss: 0.9685181975364685,grad_norm: 0.9370604383480726, iteration: 270555
loss: 1.0196526050567627,grad_norm: 0.8111928936571678, iteration: 270556
loss: 0.9936065673828125,grad_norm: 0.9999990453193078, iteration: 270557
loss: 0.9916346073150635,grad_norm: 0.9999994319625886, iteration: 270558
loss: 1.0639725923538208,grad_norm: 0.9999990716428809, iteration: 270559
loss: 1.007025957107544,grad_norm: 0.997260709579449, iteration: 270560
loss: 1.0376074314117432,grad_norm: 0.9158145138899162, iteration: 270561
loss: 1.0133366584777832,grad_norm: 0.8075850551809385, iteration: 270562
loss: 1.0172616243362427,grad_norm: 0.9999990348869414, iteration: 270563
loss: 1.059806227684021,grad_norm: 0.8203226922044932, iteration: 270564
loss: 0.9758303165435791,grad_norm: 0.9635082291635764, iteration: 270565
loss: 0.9890975952148438,grad_norm: 0.8704494504248229, iteration: 270566
loss: 1.0038352012634277,grad_norm: 0.9999992550081706, iteration: 270567
loss: 0.9957000017166138,grad_norm: 0.9999991470244807, iteration: 270568
loss: 0.9866371154785156,grad_norm: 0.9257197936169752, iteration: 270569
loss: 1.012909173965454,grad_norm: 0.9999993076219722, iteration: 270570
loss: 0.9746960401535034,grad_norm: 0.9229290397489008, iteration: 270571
loss: 0.9790307879447937,grad_norm: 0.9999991625428282, iteration: 270572
loss: 0.9610264301300049,grad_norm: 0.9186228960139303, iteration: 270573
loss: 1.0223031044006348,grad_norm: 0.9253193050062063, iteration: 270574
loss: 0.9826176762580872,grad_norm: 0.8861087810780971, iteration: 270575
loss: 1.021062970161438,grad_norm: 0.7984388594724428, iteration: 270576
loss: 1.0033442974090576,grad_norm: 0.9815178514029342, iteration: 270577
loss: 1.0219284296035767,grad_norm: 0.8040332482228957, iteration: 270578
loss: 1.0061362981796265,grad_norm: 0.7632564356297766, iteration: 270579
loss: 0.9983912706375122,grad_norm: 0.9999992219290781, iteration: 270580
loss: 1.0310229063034058,grad_norm: 0.8718356044562604, iteration: 270581
loss: 0.9753760695457458,grad_norm: 0.7659319209335508, iteration: 270582
loss: 1.05007803440094,grad_norm: 0.9999990519541843, iteration: 270583
loss: 1.0077672004699707,grad_norm: 0.7713600930422463, iteration: 270584
loss: 0.9936509728431702,grad_norm: 0.999999124593353, iteration: 270585
loss: 0.9635050296783447,grad_norm: 0.9029645433880626, iteration: 270586
loss: 1.0136878490447998,grad_norm: 0.7735105316182885, iteration: 270587
loss: 0.9746291637420654,grad_norm: 0.9486935485123906, iteration: 270588
loss: 0.986501932144165,grad_norm: 0.9218581528818833, iteration: 270589
loss: 1.0309221744537354,grad_norm: 0.9777488724572777, iteration: 270590
loss: 1.0208605527877808,grad_norm: 0.735183900750384, iteration: 270591
loss: 1.0085629224777222,grad_norm: 0.9508932000859007, iteration: 270592
loss: 0.9915105700492859,grad_norm: 0.9747547765810035, iteration: 270593
loss: 1.0017273426055908,grad_norm: 0.9999995372691776, iteration: 270594
loss: 1.008528709411621,grad_norm: 0.8665711248145888, iteration: 270595
loss: 0.9803516864776611,grad_norm: 0.796470956120172, iteration: 270596
loss: 1.0167099237442017,grad_norm: 0.9999991194327726, iteration: 270597
loss: 0.9926181435585022,grad_norm: 0.8328296155524367, iteration: 270598
loss: 0.9970311522483826,grad_norm: 0.9999991953892471, iteration: 270599
loss: 0.9935427308082581,grad_norm: 0.8811718298636858, iteration: 270600
loss: 1.02003014087677,grad_norm: 0.9612689159625537, iteration: 270601
loss: 0.9819344878196716,grad_norm: 0.7808100141274563, iteration: 270602
loss: 1.0280860662460327,grad_norm: 0.9999990045686117, iteration: 270603
loss: 0.9744271039962769,grad_norm: 0.9200502657488859, iteration: 270604
loss: 1.0082969665527344,grad_norm: 0.999999440170992, iteration: 270605
loss: 1.0345826148986816,grad_norm: 0.7402766567657406, iteration: 270606
loss: 1.1420923471450806,grad_norm: 0.9999997503342132, iteration: 270607
loss: 0.9486578106880188,grad_norm: 0.9196750590279559, iteration: 270608
loss: 0.98939049243927,grad_norm: 0.9999991783967455, iteration: 270609
loss: 1.0167008638381958,grad_norm: 0.9999992034520879, iteration: 270610
loss: 0.9934009909629822,grad_norm: 0.8921050427097962, iteration: 270611
loss: 0.9994364976882935,grad_norm: 0.934017469361034, iteration: 270612
loss: 1.0065786838531494,grad_norm: 0.9999991756293163, iteration: 270613
loss: 0.951215386390686,grad_norm: 0.7204556330787931, iteration: 270614
loss: 1.0261222124099731,grad_norm: 0.9716365845243342, iteration: 270615
loss: 0.9786704778671265,grad_norm: 0.9218140030583315, iteration: 270616
loss: 1.0365351438522339,grad_norm: 0.8138002768855711, iteration: 270617
loss: 0.9980617761611938,grad_norm: 0.8264054224592484, iteration: 270618
loss: 1.014667272567749,grad_norm: 0.7851348073715833, iteration: 270619
loss: 0.9619463086128235,grad_norm: 0.938240639625203, iteration: 270620
loss: 1.0022671222686768,grad_norm: 0.9999994611699753, iteration: 270621
loss: 1.0600959062576294,grad_norm: 0.9088438166146336, iteration: 270622
loss: 0.9773170948028564,grad_norm: 0.815563567449265, iteration: 270623
loss: 0.9829437732696533,grad_norm: 0.954347781343073, iteration: 270624
loss: 1.0302895307540894,grad_norm: 0.8110045453351981, iteration: 270625
loss: 0.9742910265922546,grad_norm: 0.8644993834340268, iteration: 270626
loss: 1.000240683555603,grad_norm: 0.8597896007404955, iteration: 270627
loss: 1.0065606832504272,grad_norm: 0.9680156715807157, iteration: 270628
loss: 0.998609721660614,grad_norm: 0.8247538112165601, iteration: 270629
loss: 1.0071145296096802,grad_norm: 0.8228834617099056, iteration: 270630
loss: 1.0024991035461426,grad_norm: 0.7687139402096691, iteration: 270631
loss: 1.0142803192138672,grad_norm: 0.9847100885535298, iteration: 270632
loss: 0.9817578196525574,grad_norm: 0.8097516736342322, iteration: 270633
loss: 1.0073652267456055,grad_norm: 0.9999991617644912, iteration: 270634
loss: 1.017972469329834,grad_norm: 0.8705204636674737, iteration: 270635
loss: 0.9921467304229736,grad_norm: 0.7398242303972469, iteration: 270636
loss: 1.0382261276245117,grad_norm: 0.9999991015121381, iteration: 270637
loss: 1.0506129264831543,grad_norm: 0.9999990265688932, iteration: 270638
loss: 1.0032483339309692,grad_norm: 0.8908131051406087, iteration: 270639
loss: 1.026055097579956,grad_norm: 0.9999994441189127, iteration: 270640
loss: 1.0045421123504639,grad_norm: 0.9649139759430381, iteration: 270641
loss: 1.0015925168991089,grad_norm: 0.7457327240887532, iteration: 270642
loss: 0.9749429821968079,grad_norm: 0.774212466138079, iteration: 270643
loss: 0.9832845330238342,grad_norm: 0.8585185078918092, iteration: 270644
loss: 1.0295733213424683,grad_norm: 0.7788442983577966, iteration: 270645
loss: 1.013173222541809,grad_norm: 0.8676203970560578, iteration: 270646
loss: 0.9687842130661011,grad_norm: 0.8629398257248648, iteration: 270647
loss: 0.9817475080490112,grad_norm: 0.9403381658092818, iteration: 270648
loss: 1.0196237564086914,grad_norm: 0.9999999501008591, iteration: 270649
loss: 1.0184361934661865,grad_norm: 0.9999990404449961, iteration: 270650
loss: 1.035267949104309,grad_norm: 0.9999991610952932, iteration: 270651
loss: 1.0079307556152344,grad_norm: 0.9999990928900555, iteration: 270652
loss: 0.9817111492156982,grad_norm: 0.9999993367901627, iteration: 270653
loss: 1.0109111070632935,grad_norm: 0.9999990205404483, iteration: 270654
loss: 1.0236517190933228,grad_norm: 0.9367313168050254, iteration: 270655
loss: 1.1121913194656372,grad_norm: 0.9999998090402532, iteration: 270656
loss: 1.0075421333312988,grad_norm: 0.9999999614815904, iteration: 270657
loss: 0.9933048486709595,grad_norm: 0.7482453116110498, iteration: 270658
loss: 0.9737488031387329,grad_norm: 0.9175712789719807, iteration: 270659
loss: 0.9797599911689758,grad_norm: 0.7772216345843512, iteration: 270660
loss: 0.9748346209526062,grad_norm: 0.8861873371236272, iteration: 270661
loss: 1.0430920124053955,grad_norm: 0.9580965148542123, iteration: 270662
loss: 0.9873484373092651,grad_norm: 0.8982961111884826, iteration: 270663
loss: 1.0136550664901733,grad_norm: 0.9999991085374786, iteration: 270664
loss: 1.1925095319747925,grad_norm: 0.9999992203118836, iteration: 270665
loss: 0.9867379665374756,grad_norm: 0.8820996436240165, iteration: 270666
loss: 0.9981694221496582,grad_norm: 0.7118595186037484, iteration: 270667
loss: 1.0238497257232666,grad_norm: 0.9526106198853911, iteration: 270668
loss: 1.0210211277008057,grad_norm: 0.7854632704129335, iteration: 270669
loss: 0.9712940454483032,grad_norm: 0.827751181854641, iteration: 270670
loss: 1.0313773155212402,grad_norm: 0.9448011481322474, iteration: 270671
loss: 0.9854990243911743,grad_norm: 0.849498748146614, iteration: 270672
loss: 1.0055073499679565,grad_norm: 0.990388690738105, iteration: 270673
loss: 1.0315651893615723,grad_norm: 0.9026143536952153, iteration: 270674
loss: 0.9800554513931274,grad_norm: 0.9583518859865534, iteration: 270675
loss: 1.0115300416946411,grad_norm: 0.8635515904930634, iteration: 270676
loss: 1.0008057355880737,grad_norm: 0.8457722240177102, iteration: 270677
loss: 0.950369119644165,grad_norm: 0.8152702027574736, iteration: 270678
loss: 1.0002894401550293,grad_norm: 0.9999991883661633, iteration: 270679
loss: 0.9954321384429932,grad_norm: 0.7342240519865493, iteration: 270680
loss: 1.0013941526412964,grad_norm: 0.9067459307225741, iteration: 270681
loss: 1.0046210289001465,grad_norm: 0.9330999885413632, iteration: 270682
loss: 1.0116325616836548,grad_norm: 0.9999997684382319, iteration: 270683
loss: 1.0078428983688354,grad_norm: 0.9002011917426581, iteration: 270684
loss: 0.9508357048034668,grad_norm: 0.7730280417828033, iteration: 270685
loss: 1.0270566940307617,grad_norm: 0.7984056102563911, iteration: 270686
loss: 1.087519884109497,grad_norm: 0.8897318928109824, iteration: 270687
loss: 0.9877610802650452,grad_norm: 0.9999989912253994, iteration: 270688
loss: 0.9585881233215332,grad_norm: 0.7523088955075764, iteration: 270689
loss: 0.9874605536460876,grad_norm: 0.9999994895542901, iteration: 270690
loss: 0.967078685760498,grad_norm: 0.8835143739791446, iteration: 270691
loss: 1.015736699104309,grad_norm: 0.9264895461353709, iteration: 270692
loss: 0.9845354557037354,grad_norm: 0.8032095518916311, iteration: 270693
loss: 1.0355613231658936,grad_norm: 0.9999991998060745, iteration: 270694
loss: 1.0306049585342407,grad_norm: 0.9146751001938532, iteration: 270695
loss: 1.0096272230148315,grad_norm: 0.8370201965995047, iteration: 270696
loss: 1.0071020126342773,grad_norm: 0.9774358532774705, iteration: 270697
loss: 1.0181407928466797,grad_norm: 0.7430897339070418, iteration: 270698
loss: 1.0554805994033813,grad_norm: 0.9999998894972164, iteration: 270699
loss: 0.98426353931427,grad_norm: 0.7410876547392075, iteration: 270700
loss: 1.0253911018371582,grad_norm: 0.9999994531802752, iteration: 270701
loss: 0.998996376991272,grad_norm: 0.8072715572823227, iteration: 270702
loss: 0.9873824119567871,grad_norm: 0.7804427008363832, iteration: 270703
loss: 0.9920046925544739,grad_norm: 0.9999990431738913, iteration: 270704
loss: 1.0699397325515747,grad_norm: 0.9999994026188366, iteration: 270705
loss: 1.0296186208724976,grad_norm: 0.9999991462025755, iteration: 270706
loss: 1.0096251964569092,grad_norm: 0.999999020414693, iteration: 270707
loss: 0.9803050756454468,grad_norm: 0.9315566806795856, iteration: 270708
loss: 1.019777774810791,grad_norm: 0.6689005133923931, iteration: 270709
loss: 0.9980781674385071,grad_norm: 0.8187669109656226, iteration: 270710
loss: 1.012939453125,grad_norm: 0.8697626675621766, iteration: 270711
loss: 0.98922199010849,grad_norm: 0.8238813726465882, iteration: 270712
loss: 0.9856922626495361,grad_norm: 0.9999996574171941, iteration: 270713
loss: 0.9784352779388428,grad_norm: 0.7185656454425841, iteration: 270714
loss: 0.9829670190811157,grad_norm: 0.9748210690954014, iteration: 270715
loss: 0.9651104807853699,grad_norm: 0.8630684522660899, iteration: 270716
loss: 1.0354543924331665,grad_norm: 0.9768241731591724, iteration: 270717
loss: 1.0706284046173096,grad_norm: 0.9808619259890592, iteration: 270718
loss: 1.0032141208648682,grad_norm: 0.9999990885631536, iteration: 270719
loss: 1.025849461555481,grad_norm: 0.9999997381595326, iteration: 270720
loss: 1.028746247291565,grad_norm: 0.7665205507823026, iteration: 270721
loss: 1.0129499435424805,grad_norm: 0.9999995638313437, iteration: 270722
loss: 1.058603286743164,grad_norm: 0.9999992505825944, iteration: 270723
loss: 1.0125614404678345,grad_norm: 0.8683811370767412, iteration: 270724
loss: 1.0227527618408203,grad_norm: 0.7656994196932996, iteration: 270725
loss: 1.0172817707061768,grad_norm: 0.7982996703750121, iteration: 270726
loss: 1.0067960023880005,grad_norm: 0.9999990349410257, iteration: 270727
loss: 0.9850669503211975,grad_norm: 0.7337668753447674, iteration: 270728
loss: 1.0333963632583618,grad_norm: 0.8425611811403958, iteration: 270729
loss: 0.9983776211738586,grad_norm: 0.7993712563266977, iteration: 270730
loss: 1.0179284811019897,grad_norm: 0.8725628417569481, iteration: 270731
loss: 0.9910986423492432,grad_norm: 0.8309583438806567, iteration: 270732
loss: 0.9710861444473267,grad_norm: 0.7774543998111084, iteration: 270733
loss: 0.9925400018692017,grad_norm: 0.9999992029634541, iteration: 270734
loss: 1.021370768547058,grad_norm: 0.9999991657533223, iteration: 270735
loss: 1.000587821006775,grad_norm: 0.9498832962486713, iteration: 270736
loss: 1.043370246887207,grad_norm: 0.9999992311046972, iteration: 270737
loss: 1.0101085901260376,grad_norm: 0.8039662438144721, iteration: 270738
loss: 1.039833903312683,grad_norm: 0.9999996962872042, iteration: 270739
loss: 0.98423171043396,grad_norm: 0.8186643327698728, iteration: 270740
loss: 0.9962590932846069,grad_norm: 0.9999991173905068, iteration: 270741
loss: 0.9851177930831909,grad_norm: 0.8529501345500731, iteration: 270742
loss: 1.042872667312622,grad_norm: 0.896904692519052, iteration: 270743
loss: 1.0103187561035156,grad_norm: 0.9862738000066683, iteration: 270744
loss: 1.04316246509552,grad_norm: 0.8646189961386217, iteration: 270745
loss: 0.9988529682159424,grad_norm: 0.7769586689099544, iteration: 270746
loss: 0.9906681776046753,grad_norm: 0.7632204556267392, iteration: 270747
loss: 0.9766210317611694,grad_norm: 0.879943335914668, iteration: 270748
loss: 1.020383358001709,grad_norm: 0.921541516658397, iteration: 270749
loss: 0.9666829705238342,grad_norm: 0.7683059018879581, iteration: 270750
loss: 0.9952592253684998,grad_norm: 0.7984208054799617, iteration: 270751
loss: 0.9693151712417603,grad_norm: 0.9281048410789635, iteration: 270752
loss: 1.001071810722351,grad_norm: 0.8391353836813483, iteration: 270753
loss: 0.99204421043396,grad_norm: 0.9176983504531155, iteration: 270754
loss: 1.0099316835403442,grad_norm: 0.7480152561759966, iteration: 270755
loss: 1.0005519390106201,grad_norm: 0.8666645413140217, iteration: 270756
loss: 1.0110481977462769,grad_norm: 0.7913488065697432, iteration: 270757
loss: 1.0408644676208496,grad_norm: 0.8749477381188474, iteration: 270758
loss: 1.0020899772644043,grad_norm: 0.9324547201785957, iteration: 270759
loss: 1.0372836589813232,grad_norm: 0.9999990555903406, iteration: 270760
loss: 1.0300743579864502,grad_norm: 0.9146600655847917, iteration: 270761
loss: 0.9812913537025452,grad_norm: 0.7628640998760362, iteration: 270762
loss: 1.0893908739089966,grad_norm: 0.9999993570922036, iteration: 270763
loss: 0.9784526228904724,grad_norm: 0.9999990039051829, iteration: 270764
loss: 0.971893310546875,grad_norm: 0.8401957203511686, iteration: 270765
loss: 1.0549143552780151,grad_norm: 0.999999233175301, iteration: 270766
loss: 1.0169960260391235,grad_norm: 0.9241027017179707, iteration: 270767
loss: 1.0051474571228027,grad_norm: 0.9202474405082769, iteration: 270768
loss: 0.9907505512237549,grad_norm: 0.9431984163473341, iteration: 270769
loss: 0.9671837687492371,grad_norm: 0.7676081051178453, iteration: 270770
loss: 0.9803001284599304,grad_norm: 0.9320963228355561, iteration: 270771
loss: 1.0068718194961548,grad_norm: 0.9239103344972468, iteration: 270772
loss: 1.0236639976501465,grad_norm: 0.8448496168755527, iteration: 270773
loss: 1.0495328903198242,grad_norm: 0.9999997023484736, iteration: 270774
loss: 0.9746687412261963,grad_norm: 0.7502253164934483, iteration: 270775
loss: 0.9402596950531006,grad_norm: 0.9939325720859642, iteration: 270776
loss: 1.0068583488464355,grad_norm: 0.9999991127940668, iteration: 270777
loss: 1.0118606090545654,grad_norm: 0.9612081152829862, iteration: 270778
loss: 1.0067168474197388,grad_norm: 0.8544848446819001, iteration: 270779
loss: 0.996550977230072,grad_norm: 0.8906578205436296, iteration: 270780
loss: 0.9516534209251404,grad_norm: 0.9835010685042465, iteration: 270781
loss: 1.110411286354065,grad_norm: 0.9267222976528893, iteration: 270782
loss: 0.9575774669647217,grad_norm: 0.9999990581340323, iteration: 270783
loss: 1.0702978372573853,grad_norm: 0.8051562057482252, iteration: 270784
loss: 0.9989892244338989,grad_norm: 0.8469510324014844, iteration: 270785
loss: 0.9700968861579895,grad_norm: 0.8231418242114952, iteration: 270786
loss: 0.9717004299163818,grad_norm: 0.7797162103051588, iteration: 270787
loss: 1.0324972867965698,grad_norm: 0.8387313910493541, iteration: 270788
loss: 1.0616068840026855,grad_norm: 0.9999990581317451, iteration: 270789
loss: 1.0044796466827393,grad_norm: 0.9327369667870183, iteration: 270790
loss: 1.0073720216751099,grad_norm: 0.9999991180311082, iteration: 270791
loss: 1.0192400217056274,grad_norm: 0.948133595965865, iteration: 270792
loss: 0.9987419843673706,grad_norm: 0.7123934541922707, iteration: 270793
loss: 0.993958055973053,grad_norm: 0.9454205626713915, iteration: 270794
loss: 1.0386793613433838,grad_norm: 0.8584180793133741, iteration: 270795
loss: 0.9844303727149963,grad_norm: 0.9673269260870748, iteration: 270796
loss: 1.0071368217468262,grad_norm: 0.8226324782269222, iteration: 270797
loss: 1.016916036605835,grad_norm: 0.8980427320636406, iteration: 270798
loss: 0.9858735799789429,grad_norm: 0.8061543815973394, iteration: 270799
loss: 1.0112721920013428,grad_norm: 0.8056770394223585, iteration: 270800
loss: 0.9998853802680969,grad_norm: 0.9999989804646117, iteration: 270801
loss: 0.9679180979728699,grad_norm: 0.7817487677673787, iteration: 270802
loss: 1.0080935955047607,grad_norm: 0.6682499162299756, iteration: 270803
loss: 0.9931403398513794,grad_norm: 0.9999991111857035, iteration: 270804
loss: 1.0170643329620361,grad_norm: 0.9999990453038942, iteration: 270805
loss: 1.0125311613082886,grad_norm: 0.9999990457969166, iteration: 270806
loss: 0.9695260524749756,grad_norm: 0.8545666050876451, iteration: 270807
loss: 1.0168486833572388,grad_norm: 0.9224957065257479, iteration: 270808
loss: 1.0015802383422852,grad_norm: 0.8608903670881827, iteration: 270809
loss: 1.0005455017089844,grad_norm: 0.8218619963069789, iteration: 270810
loss: 0.9711601138114929,grad_norm: 0.9038342794001675, iteration: 270811
loss: 1.023553729057312,grad_norm: 0.9999992712843973, iteration: 270812
loss: 1.0123884677886963,grad_norm: 0.9491282362259409, iteration: 270813
loss: 0.9805076122283936,grad_norm: 0.8483586482103598, iteration: 270814
loss: 0.988002598285675,grad_norm: 0.8123670418778093, iteration: 270815
loss: 1.0069868564605713,grad_norm: 0.7348009104016949, iteration: 270816
loss: 1.0207403898239136,grad_norm: 0.9766161498165228, iteration: 270817
loss: 0.9993460774421692,grad_norm: 0.9999990140981244, iteration: 270818
loss: 0.9843462705612183,grad_norm: 0.726324213533579, iteration: 270819
loss: 0.9967028498649597,grad_norm: 0.8616974271317231, iteration: 270820
loss: 0.9777625203132629,grad_norm: 0.7901698487643752, iteration: 270821
loss: 0.9649859070777893,grad_norm: 0.8373866170892383, iteration: 270822
loss: 1.057839274406433,grad_norm: 0.9632723331704538, iteration: 270823
loss: 1.0686004161834717,grad_norm: 0.9133289763949227, iteration: 270824
loss: 1.0005854368209839,grad_norm: 0.80957990527559, iteration: 270825
loss: 1.009924292564392,grad_norm: 0.9727811149500807, iteration: 270826
loss: 1.0113956928253174,grad_norm: 0.8350657173186343, iteration: 270827
loss: 1.0119646787643433,grad_norm: 0.8194920648323587, iteration: 270828
loss: 1.0090548992156982,grad_norm: 0.8581810728846018, iteration: 270829
loss: 0.973947286605835,grad_norm: 0.9989762552014225, iteration: 270830
loss: 0.9863431453704834,grad_norm: 0.9066014368113926, iteration: 270831
loss: 1.0164425373077393,grad_norm: 0.7372512046346775, iteration: 270832
loss: 1.001330852508545,grad_norm: 0.8869109607711831, iteration: 270833
loss: 1.0134272575378418,grad_norm: 0.9278162283620363, iteration: 270834
loss: 0.9716966152191162,grad_norm: 0.9379788293430431, iteration: 270835
loss: 0.9927855134010315,grad_norm: 0.9770720331512509, iteration: 270836
loss: 0.9984341859817505,grad_norm: 0.7850187720611256, iteration: 270837
loss: 1.014804482460022,grad_norm: 0.9023296823820187, iteration: 270838
loss: 0.9453371167182922,grad_norm: 0.8352221590522889, iteration: 270839
loss: 1.0263370275497437,grad_norm: 0.7762511529064703, iteration: 270840
loss: 0.9837102293968201,grad_norm: 0.999999079851843, iteration: 270841
loss: 1.03225576877594,grad_norm: 0.8595059924698852, iteration: 270842
loss: 1.072609782218933,grad_norm: 0.9999993029079121, iteration: 270843
loss: 1.0248581171035767,grad_norm: 0.6863360460698906, iteration: 270844
loss: 1.009022831916809,grad_norm: 0.8171320195515059, iteration: 270845
loss: 0.9997320771217346,grad_norm: 0.9349755321817007, iteration: 270846
loss: 1.0222523212432861,grad_norm: 0.8366141252663155, iteration: 270847
loss: 1.025944709777832,grad_norm: 0.813846773801271, iteration: 270848
loss: 1.0242927074432373,grad_norm: 0.7407288772063152, iteration: 270849
loss: 0.9863795638084412,grad_norm: 0.9448185473581342, iteration: 270850
loss: 1.0052968263626099,grad_norm: 0.8124193038842745, iteration: 270851
loss: 1.0280369520187378,grad_norm: 0.8886751581197273, iteration: 270852
loss: 1.0045711994171143,grad_norm: 0.7968167875218498, iteration: 270853
loss: 1.0081483125686646,grad_norm: 0.9999993031920035, iteration: 270854
loss: 1.0105867385864258,grad_norm: 0.9520319455297097, iteration: 270855
loss: 0.9751196503639221,grad_norm: 0.9333621687051834, iteration: 270856
loss: 0.9946095943450928,grad_norm: 0.9086571066622694, iteration: 270857
loss: 0.993432879447937,grad_norm: 0.7918242028136331, iteration: 270858
loss: 0.9725457429885864,grad_norm: 0.9999990686498719, iteration: 270859
loss: 1.0193861722946167,grad_norm: 0.6971847894924974, iteration: 270860
loss: 1.0111693143844604,grad_norm: 0.8558349241532025, iteration: 270861
loss: 0.9751994609832764,grad_norm: 0.799800182910716, iteration: 270862
loss: 0.9670056104660034,grad_norm: 0.9912628557328018, iteration: 270863
loss: 0.9697414636611938,grad_norm: 0.8578579591532163, iteration: 270864
loss: 1.0160129070281982,grad_norm: 0.9834295533101446, iteration: 270865
loss: 0.9762142300605774,grad_norm: 0.9431444703962343, iteration: 270866
loss: 1.004338264465332,grad_norm: 0.8226436169208144, iteration: 270867
loss: 1.0099540948867798,grad_norm: 0.9040360480364917, iteration: 270868
loss: 1.0194711685180664,grad_norm: 0.9069133622233465, iteration: 270869
loss: 0.9950094223022461,grad_norm: 0.9335166885018177, iteration: 270870
loss: 1.0370420217514038,grad_norm: 0.9999994086129005, iteration: 270871
loss: 0.997434675693512,grad_norm: 0.8617763148145413, iteration: 270872
loss: 0.9948171377182007,grad_norm: 0.8847000244145614, iteration: 270873
loss: 0.991098165512085,grad_norm: 0.8529212231589534, iteration: 270874
loss: 1.0858442783355713,grad_norm: 0.9999992631070457, iteration: 270875
loss: 1.011584758758545,grad_norm: 0.8749860169846977, iteration: 270876
loss: 0.9838191270828247,grad_norm: 0.8108228357762572, iteration: 270877
loss: 1.0293699502944946,grad_norm: 0.9396887575106099, iteration: 270878
loss: 1.011181354522705,grad_norm: 0.9999996225460268, iteration: 270879
loss: 1.0178604125976562,grad_norm: 0.9551900316127933, iteration: 270880
loss: 1.0111981630325317,grad_norm: 0.8402089562545436, iteration: 270881
loss: 1.0084651708602905,grad_norm: 0.804587214684832, iteration: 270882
loss: 0.9884799122810364,grad_norm: 0.9843142190439161, iteration: 270883
loss: 1.0031840801239014,grad_norm: 0.8830203663531206, iteration: 270884
loss: 1.0075969696044922,grad_norm: 0.9278673399931749, iteration: 270885
loss: 1.0879321098327637,grad_norm: 0.9999990920898262, iteration: 270886
loss: 1.2003883123397827,grad_norm: 0.99999992927097, iteration: 270887
loss: 1.004409670829773,grad_norm: 0.9999993871757435, iteration: 270888
loss: 1.0102722644805908,grad_norm: 0.7409892522720283, iteration: 270889
loss: 0.9886734485626221,grad_norm: 0.9594378871285353, iteration: 270890
loss: 0.9856228828430176,grad_norm: 0.7963119236358421, iteration: 270891
loss: 0.9868899583816528,grad_norm: 0.9999998282053671, iteration: 270892
loss: 0.974850058555603,grad_norm: 0.9955162150088175, iteration: 270893
loss: 0.9975118041038513,grad_norm: 0.6817443489407167, iteration: 270894
loss: 1.062119960784912,grad_norm: 0.8128490882892884, iteration: 270895
loss: 0.989423930644989,grad_norm: 0.8124666651099534, iteration: 270896
loss: 0.9796367287635803,grad_norm: 0.8596561410773359, iteration: 270897
loss: 1.0099377632141113,grad_norm: 0.9021262602089992, iteration: 270898
loss: 1.0322437286376953,grad_norm: 0.8684686170154687, iteration: 270899
loss: 0.975955605506897,grad_norm: 0.9623100814497398, iteration: 270900
loss: 1.0168367624282837,grad_norm: 0.8726274688678061, iteration: 270901
loss: 1.079529047012329,grad_norm: 0.8388341989453186, iteration: 270902
loss: 1.0235975980758667,grad_norm: 0.9356241880179402, iteration: 270903
loss: 1.0423986911773682,grad_norm: 0.9851167198824158, iteration: 270904
loss: 1.0008151531219482,grad_norm: 0.7801272089845209, iteration: 270905
loss: 1.0053132772445679,grad_norm: 0.9999992382315529, iteration: 270906
loss: 0.9999399185180664,grad_norm: 0.8257406380452992, iteration: 270907
loss: 1.0235209465026855,grad_norm: 0.9210897055834728, iteration: 270908
loss: 0.9948959350585938,grad_norm: 0.8811433355837005, iteration: 270909
loss: 1.0116076469421387,grad_norm: 0.77845188162259, iteration: 270910
loss: 0.9793143272399902,grad_norm: 0.8500171568097954, iteration: 270911
loss: 0.9908762574195862,grad_norm: 0.9580936886534036, iteration: 270912
loss: 0.9869564175605774,grad_norm: 0.9999991815465544, iteration: 270913
loss: 0.9845237135887146,grad_norm: 0.9587255165363932, iteration: 270914
loss: 0.998002290725708,grad_norm: 0.7490442366893354, iteration: 270915
loss: 0.9978922009468079,grad_norm: 0.9493160547918641, iteration: 270916
loss: 0.9660845398902893,grad_norm: 0.8742306301829391, iteration: 270917
loss: 1.0243492126464844,grad_norm: 0.9123367618420071, iteration: 270918
loss: 1.0161001682281494,grad_norm: 0.9545064433600632, iteration: 270919
loss: 0.999895453453064,grad_norm: 0.9999991367321839, iteration: 270920
loss: 0.9875534176826477,grad_norm: 0.7795211344315605, iteration: 270921
loss: 1.0505344867706299,grad_norm: 0.7879739258830177, iteration: 270922
loss: 0.9752496480941772,grad_norm: 0.7933414913664294, iteration: 270923
loss: 0.9936544299125671,grad_norm: 0.9006611988679432, iteration: 270924
loss: 1.0019315481185913,grad_norm: 0.8952253617268118, iteration: 270925
loss: 1.001284122467041,grad_norm: 0.977010076718441, iteration: 270926
loss: 1.0218479633331299,grad_norm: 0.7475066089433567, iteration: 270927
loss: 1.0427552461624146,grad_norm: 0.8399116209844671, iteration: 270928
loss: 1.0219167470932007,grad_norm: 0.8229517754151958, iteration: 270929
loss: 1.0168724060058594,grad_norm: 0.9043240879620674, iteration: 270930
loss: 1.0036324262619019,grad_norm: 0.9128732552107077, iteration: 270931
loss: 0.9984593391418457,grad_norm: 0.9788512143650502, iteration: 270932
loss: 1.0381544828414917,grad_norm: 0.9167317020398347, iteration: 270933
loss: 0.9802674055099487,grad_norm: 0.8384475283681992, iteration: 270934
loss: 0.9967013001441956,grad_norm: 0.810871802475398, iteration: 270935
loss: 1.0145134925842285,grad_norm: 0.9977379286333797, iteration: 270936
loss: 1.0185081958770752,grad_norm: 0.908844772316774, iteration: 270937
loss: 1.035385012626648,grad_norm: 0.9611436608978524, iteration: 270938
loss: 0.9695845246315002,grad_norm: 0.7761214522608519, iteration: 270939
loss: 0.9954812526702881,grad_norm: 0.8694207070758642, iteration: 270940
loss: 1.0006442070007324,grad_norm: 0.9050487649108857, iteration: 270941
loss: 1.0290019512176514,grad_norm: 0.9231191468482514, iteration: 270942
loss: 1.000475287437439,grad_norm: 0.9412451210703809, iteration: 270943
loss: 0.9921069741249084,grad_norm: 0.8436711267626005, iteration: 270944
loss: 1.0401060581207275,grad_norm: 0.9431235888655546, iteration: 270945
loss: 1.0071958303451538,grad_norm: 0.9367880616206374, iteration: 270946
loss: 1.0062437057495117,grad_norm: 0.7801142606679031, iteration: 270947
loss: 1.0117473602294922,grad_norm: 0.9400686055445188, iteration: 270948
loss: 1.00306236743927,grad_norm: 0.7437388657580462, iteration: 270949
loss: 1.2720528841018677,grad_norm: 0.9999992757326619, iteration: 270950
loss: 0.9926995635032654,grad_norm: 0.9356942368562682, iteration: 270951
loss: 1.0184649229049683,grad_norm: 0.8144212210611557, iteration: 270952
loss: 0.9954212307929993,grad_norm: 0.9185700738585493, iteration: 270953
loss: 0.9653005003929138,grad_norm: 0.999999147777273, iteration: 270954
loss: 0.987457811832428,grad_norm: 0.7954897508097648, iteration: 270955
loss: 0.9884268641471863,grad_norm: 0.7815803429767352, iteration: 270956
loss: 0.9853305220603943,grad_norm: 0.8312601898391367, iteration: 270957
loss: 0.9972618818283081,grad_norm: 0.9538486654225549, iteration: 270958
loss: 1.0094411373138428,grad_norm: 0.79792810008871, iteration: 270959
loss: 0.9694684743881226,grad_norm: 0.8678986745704392, iteration: 270960
loss: 0.9598831534385681,grad_norm: 0.908426917716319, iteration: 270961
loss: 0.9750166535377502,grad_norm: 0.8462480483817368, iteration: 270962
loss: 1.0063363313674927,grad_norm: 0.9503044292316001, iteration: 270963
loss: 1.0154601335525513,grad_norm: 0.804605715095919, iteration: 270964
loss: 0.9865328669548035,grad_norm: 0.8294838641970526, iteration: 270965
loss: 0.9836910963058472,grad_norm: 0.9011837458042155, iteration: 270966
loss: 0.9742463231086731,grad_norm: 0.9966520510232624, iteration: 270967
loss: 1.0235472917556763,grad_norm: 0.8013911299479138, iteration: 270968
loss: 1.0188004970550537,grad_norm: 0.7244428037639135, iteration: 270969
loss: 0.9829016923904419,grad_norm: 0.919361354111163, iteration: 270970
loss: 0.9965293407440186,grad_norm: 0.9999990510456715, iteration: 270971
loss: 0.9776308536529541,grad_norm: 0.8281679802200678, iteration: 270972
loss: 0.9847072958946228,grad_norm: 0.9765417409176061, iteration: 270973
loss: 1.0119147300720215,grad_norm: 0.9107779605875993, iteration: 270974
loss: 1.0185319185256958,grad_norm: 0.7759367550817912, iteration: 270975
loss: 1.0593245029449463,grad_norm: 0.9999996137767344, iteration: 270976
loss: 1.0076385736465454,grad_norm: 0.7181271012548427, iteration: 270977
loss: 1.0073291063308716,grad_norm: 0.8627197131013105, iteration: 270978
loss: 0.9743390083312988,grad_norm: 1.0000000214013123, iteration: 270979
loss: 0.9861928820610046,grad_norm: 0.9619665870072517, iteration: 270980
loss: 1.0077176094055176,grad_norm: 0.8133827636446308, iteration: 270981
loss: 0.9726693034172058,grad_norm: 0.7863088996771348, iteration: 270982
loss: 1.028951644897461,grad_norm: 0.9999990482645597, iteration: 270983
loss: 1.0324851274490356,grad_norm: 0.8629752673347703, iteration: 270984
loss: 0.9997879266738892,grad_norm: 0.9609213498974866, iteration: 270985
loss: 1.0269267559051514,grad_norm: 0.8153234667774673, iteration: 270986
loss: 1.0011249780654907,grad_norm: 0.9999992006090664, iteration: 270987
loss: 1.0126885175704956,grad_norm: 0.9999995879079017, iteration: 270988
loss: 1.0553909540176392,grad_norm: 0.999999438951024, iteration: 270989
loss: 1.015489101409912,grad_norm: 0.9999993485380235, iteration: 270990
loss: 0.9794121980667114,grad_norm: 0.9346044004568874, iteration: 270991
loss: 1.0079658031463623,grad_norm: 0.9811541675168447, iteration: 270992
loss: 0.9861725568771362,grad_norm: 0.8673257656226584, iteration: 270993
loss: 1.008607268333435,grad_norm: 0.9147761964517884, iteration: 270994
loss: 1.0108380317687988,grad_norm: 0.9473094445854465, iteration: 270995
loss: 1.012224555015564,grad_norm: 0.9451069952933902, iteration: 270996
loss: 0.9976844191551208,grad_norm: 0.9295323926237788, iteration: 270997
loss: 1.012319564819336,grad_norm: 0.7735647438048501, iteration: 270998
loss: 1.0042641162872314,grad_norm: 0.9878870400052426, iteration: 270999
loss: 0.9640309810638428,grad_norm: 0.8877534738097483, iteration: 271000
loss: 0.9937261939048767,grad_norm: 0.9999989998116403, iteration: 271001
loss: 0.9860069751739502,grad_norm: 0.9560094226590877, iteration: 271002
loss: 1.0121160745620728,grad_norm: 0.7387844027279667, iteration: 271003
loss: 1.003119707107544,grad_norm: 0.9603360499082858, iteration: 271004
loss: 1.0131382942199707,grad_norm: 0.9999991590697507, iteration: 271005
loss: 0.9837927222251892,grad_norm: 0.9671553887282556, iteration: 271006
loss: 0.9887690544128418,grad_norm: 0.9943912663360631, iteration: 271007
loss: 0.9792731404304504,grad_norm: 0.792303689811812, iteration: 271008
loss: 0.9974960088729858,grad_norm: 0.823533973434294, iteration: 271009
loss: 0.9973024129867554,grad_norm: 0.8434529068704502, iteration: 271010
loss: 1.0468841791152954,grad_norm: 0.9182852037894484, iteration: 271011
loss: 0.9784827828407288,grad_norm: 0.7687918974120063, iteration: 271012
loss: 0.9901001453399658,grad_norm: 0.8290289730510685, iteration: 271013
loss: 1.009031057357788,grad_norm: 0.8305644124255765, iteration: 271014
loss: 0.9910535216331482,grad_norm: 0.9052623051213468, iteration: 271015
loss: 0.9832684397697449,grad_norm: 0.8075824251150783, iteration: 271016
loss: 0.9896111488342285,grad_norm: 0.945506671158541, iteration: 271017
loss: 1.0118334293365479,grad_norm: 0.9999991087376925, iteration: 271018
loss: 1.0167927742004395,grad_norm: 0.7577055214088356, iteration: 271019
loss: 1.0146126747131348,grad_norm: 0.8091909171667544, iteration: 271020
loss: 1.0224416255950928,grad_norm: 0.8462758097982913, iteration: 271021
loss: 1.025481104850769,grad_norm: 0.8480932299422642, iteration: 271022
loss: 0.9691290855407715,grad_norm: 0.8281237040139094, iteration: 271023
loss: 1.002484679222107,grad_norm: 0.7977348728198753, iteration: 271024
loss: 0.991068959236145,grad_norm: 0.9999992290386621, iteration: 271025
loss: 1.0102397203445435,grad_norm: 0.9999992074307109, iteration: 271026
loss: 0.9994097352027893,grad_norm: 0.950994163099102, iteration: 271027
loss: 0.9944813251495361,grad_norm: 0.7476354062669867, iteration: 271028
loss: 1.0020594596862793,grad_norm: 0.8535491318928106, iteration: 271029
loss: 0.9832094311714172,grad_norm: 0.9999991222974769, iteration: 271030
loss: 1.006073236465454,grad_norm: 0.8405777969809962, iteration: 271031
loss: 1.018100619316101,grad_norm: 0.9758628063545972, iteration: 271032
loss: 0.9976814389228821,grad_norm: 0.914925865160321, iteration: 271033
loss: 1.053414225578308,grad_norm: 0.9999991618170567, iteration: 271034
loss: 1.0235849618911743,grad_norm: 0.8862831607114903, iteration: 271035
loss: 0.9840555191040039,grad_norm: 0.8559491445725383, iteration: 271036
loss: 1.0194402933120728,grad_norm: 0.6977786107738041, iteration: 271037
loss: 0.9967449903488159,grad_norm: 0.7691669793935435, iteration: 271038
loss: 0.9925358295440674,grad_norm: 0.8426296857340353, iteration: 271039
loss: 1.0010409355163574,grad_norm: 0.8607135414053069, iteration: 271040
loss: 1.0135469436645508,grad_norm: 0.6997898508889987, iteration: 271041
loss: 0.9975783824920654,grad_norm: 0.9999999154237988, iteration: 271042
loss: 1.000960350036621,grad_norm: 0.9539714302225727, iteration: 271043
loss: 1.0372003316879272,grad_norm: 0.9999992118599662, iteration: 271044
loss: 1.0043784379959106,grad_norm: 0.99999933124516, iteration: 271045
loss: 0.9872857928276062,grad_norm: 0.9999992509253967, iteration: 271046
loss: 1.0250582695007324,grad_norm: 0.9999996220706489, iteration: 271047
loss: 0.9741519093513489,grad_norm: 0.8986650428814784, iteration: 271048
loss: 0.9770684838294983,grad_norm: 0.8525334078143993, iteration: 271049
loss: 0.9887182712554932,grad_norm: 0.8518169199346931, iteration: 271050
loss: 1.0058773756027222,grad_norm: 0.9999992718129306, iteration: 271051
loss: 1.0435864925384521,grad_norm: 0.8588491780229175, iteration: 271052
loss: 1.029725193977356,grad_norm: 0.8086728208571775, iteration: 271053
loss: 0.9875216484069824,grad_norm: 0.910853146078186, iteration: 271054
loss: 0.9783139228820801,grad_norm: 0.8365751731758195, iteration: 271055
loss: 1.0865678787231445,grad_norm: 0.9999995471129822, iteration: 271056
loss: 0.9945756793022156,grad_norm: 0.7668970846610876, iteration: 271057
loss: 0.9839656949043274,grad_norm: 0.97130765230512, iteration: 271058
loss: 1.0047680139541626,grad_norm: 0.7632615359834044, iteration: 271059
loss: 0.952669084072113,grad_norm: 0.7676966077029518, iteration: 271060
loss: 0.980631947517395,grad_norm: 0.7554240970486245, iteration: 271061
loss: 1.0115984678268433,grad_norm: 0.9736045511054489, iteration: 271062
loss: 1.0080245733261108,grad_norm: 0.8218946118063282, iteration: 271063
loss: 0.9975800514221191,grad_norm: 0.8881579897627132, iteration: 271064
loss: 1.0033562183380127,grad_norm: 0.7131644863680885, iteration: 271065
loss: 0.9847747683525085,grad_norm: 0.7555031616745745, iteration: 271066
loss: 0.9796335697174072,grad_norm: 0.9305719372781441, iteration: 271067
loss: 0.9926174879074097,grad_norm: 0.8735090296173967, iteration: 271068
loss: 0.9672021865844727,grad_norm: 0.9665159161898627, iteration: 271069
loss: 1.019330382347107,grad_norm: 0.8657163134523411, iteration: 271070
loss: 1.030813217163086,grad_norm: 0.8748977107556906, iteration: 271071
loss: 0.9788430333137512,grad_norm: 0.76013572735712, iteration: 271072
loss: 1.0050418376922607,grad_norm: 0.9181042579272681, iteration: 271073
loss: 0.9929501414299011,grad_norm: 0.828043315590295, iteration: 271074
loss: 0.9982722401618958,grad_norm: 0.8640730651476389, iteration: 271075
loss: 1.008973479270935,grad_norm: 0.7657322492178804, iteration: 271076
loss: 0.9883156418800354,grad_norm: 0.8503411818686682, iteration: 271077
loss: 0.9823013544082642,grad_norm: 0.8604805413879284, iteration: 271078
loss: 1.0088021755218506,grad_norm: 0.8339487636656753, iteration: 271079
loss: 0.9787876009941101,grad_norm: 0.9999990472955086, iteration: 271080
loss: 1.012614369392395,grad_norm: 0.8078464171607335, iteration: 271081
loss: 1.0361746549606323,grad_norm: 0.9337414614926626, iteration: 271082
loss: 0.976214587688446,grad_norm: 0.9999990785834763, iteration: 271083
loss: 0.9921509027481079,grad_norm: 0.9186921601205261, iteration: 271084
loss: 1.054995059967041,grad_norm: 0.8818371200057811, iteration: 271085
loss: 1.0095674991607666,grad_norm: 0.8985379945458759, iteration: 271086
loss: 0.9928238391876221,grad_norm: 0.7516250347916709, iteration: 271087
loss: 0.999590277671814,grad_norm: 0.9142465263172727, iteration: 271088
loss: 1.0144124031066895,grad_norm: 0.8734938644120593, iteration: 271089
loss: 0.9954675436019897,grad_norm: 0.8373850238965287, iteration: 271090
loss: 0.9937375783920288,grad_norm: 0.895545934473469, iteration: 271091
loss: 1.0557377338409424,grad_norm: 0.9999991434955471, iteration: 271092
loss: 1.0048950910568237,grad_norm: 0.7143682115942165, iteration: 271093
loss: 0.9944854378700256,grad_norm: 0.9383739119682274, iteration: 271094
loss: 0.979742705821991,grad_norm: 0.9271737332876607, iteration: 271095
loss: 0.9821324944496155,grad_norm: 0.9222165447459388, iteration: 271096
loss: 0.9617774486541748,grad_norm: 0.8569319470577206, iteration: 271097
loss: 1.0030720233917236,grad_norm: 0.8384882781049452, iteration: 271098
loss: 1.0552536249160767,grad_norm: 0.9999998676542406, iteration: 271099
loss: 0.9831249117851257,grad_norm: 0.9537484727760672, iteration: 271100
loss: 1.0238699913024902,grad_norm: 0.9999990861678443, iteration: 271101
loss: 1.1005613803863525,grad_norm: 0.9999991094403533, iteration: 271102
loss: 1.0196627378463745,grad_norm: 0.955684455950891, iteration: 271103
loss: 1.0215390920639038,grad_norm: 0.9999990703012116, iteration: 271104
loss: 1.0817962884902954,grad_norm: 0.9999998226831994, iteration: 271105
loss: 1.0027289390563965,grad_norm: 0.7807426935934277, iteration: 271106
loss: 1.03615140914917,grad_norm: 0.7239236800460379, iteration: 271107
loss: 0.9393942356109619,grad_norm: 0.9943459410654025, iteration: 271108
loss: 0.9823211431503296,grad_norm: 0.9999997439896535, iteration: 271109
loss: 1.0385302305221558,grad_norm: 0.7254500627888784, iteration: 271110
loss: 1.0170261859893799,grad_norm: 0.916530553850853, iteration: 271111
loss: 0.9944881200790405,grad_norm: 0.99999930749642, iteration: 271112
loss: 0.9943419694900513,grad_norm: 0.8881124443444836, iteration: 271113
loss: 0.9885281920433044,grad_norm: 0.9620955251474199, iteration: 271114
loss: 0.9920660257339478,grad_norm: 0.785500197569344, iteration: 271115
loss: 0.9689425230026245,grad_norm: 0.8966663964161619, iteration: 271116
loss: 0.9938925504684448,grad_norm: 0.8989573510792411, iteration: 271117
loss: 1.002755880355835,grad_norm: 0.9337576595694103, iteration: 271118
loss: 0.9849985837936401,grad_norm: 0.8339295712309105, iteration: 271119
loss: 1.0168625116348267,grad_norm: 0.689795278030731, iteration: 271120
loss: 1.0194836854934692,grad_norm: 0.9999994494162483, iteration: 271121
loss: 1.0049506425857544,grad_norm: 0.9049897223022598, iteration: 271122
loss: 1.069139838218689,grad_norm: 0.846351866087136, iteration: 271123
loss: 0.9662050604820251,grad_norm: 0.8921465358695576, iteration: 271124
loss: 0.9902181029319763,grad_norm: 0.9085033803816854, iteration: 271125
loss: 1.0760105848312378,grad_norm: 0.9999997696515324, iteration: 271126
loss: 1.0203481912612915,grad_norm: 0.7798825524327523, iteration: 271127
loss: 1.0185213088989258,grad_norm: 0.9153132043491305, iteration: 271128
loss: 1.0097349882125854,grad_norm: 0.8715177839909048, iteration: 271129
loss: 0.9953504204750061,grad_norm: 0.782839971490149, iteration: 271130
loss: 0.9908888936042786,grad_norm: 0.9999989785083757, iteration: 271131
loss: 1.0188510417938232,grad_norm: 0.9730087774040522, iteration: 271132
loss: 1.0082515478134155,grad_norm: 0.7747956189381163, iteration: 271133
loss: 1.0047568082809448,grad_norm: 0.8873984353779429, iteration: 271134
loss: 1.019684910774231,grad_norm: 0.999999061141572, iteration: 271135
loss: 1.0042297840118408,grad_norm: 0.9999998024843938, iteration: 271136
loss: 1.0034537315368652,grad_norm: 0.9083619905738937, iteration: 271137
loss: 1.0072444677352905,grad_norm: 0.8728439801913619, iteration: 271138
loss: 1.0291681289672852,grad_norm: 0.8486656542909625, iteration: 271139
loss: 0.9462739825248718,grad_norm: 0.9840257015847451, iteration: 271140
loss: 0.979229211807251,grad_norm: 0.8249614245919218, iteration: 271141
loss: 0.9983330965042114,grad_norm: 0.83235290325405, iteration: 271142
loss: 1.008687138557434,grad_norm: 0.8451330880058561, iteration: 271143
loss: 0.9812704920768738,grad_norm: 0.913912752154201, iteration: 271144
loss: 0.9781559705734253,grad_norm: 0.7585793832570806, iteration: 271145
loss: 1.0136730670928955,grad_norm: 0.9478555368146746, iteration: 271146
loss: 0.9916631579399109,grad_norm: 0.8069848802491263, iteration: 271147
loss: 1.0380146503448486,grad_norm: 0.9943951796942618, iteration: 271148
loss: 1.0099077224731445,grad_norm: 0.8395333785350194, iteration: 271149
loss: 0.9750543832778931,grad_norm: 0.7737530812817711, iteration: 271150
loss: 0.9882881045341492,grad_norm: 0.9999990073301301, iteration: 271151
loss: 0.9816752076148987,grad_norm: 0.7990866529366596, iteration: 271152
loss: 1.0118358135223389,grad_norm: 0.8586332436867387, iteration: 271153
loss: 0.9495750069618225,grad_norm: 0.8742527142960218, iteration: 271154
loss: 1.037675142288208,grad_norm: 0.9466264954528567, iteration: 271155
loss: 0.9760097861289978,grad_norm: 0.8621799182549369, iteration: 271156
loss: 1.0133298635482788,grad_norm: 0.9414834475538922, iteration: 271157
loss: 0.9941799640655518,grad_norm: 0.9999990581173589, iteration: 271158
loss: 0.9386547207832336,grad_norm: 0.9527018667002877, iteration: 271159
loss: 0.9883477687835693,grad_norm: 0.8921305622625461, iteration: 271160
loss: 1.0139738321304321,grad_norm: 0.8234070463219367, iteration: 271161
loss: 0.9843900799751282,grad_norm: 0.9641168428361706, iteration: 271162
loss: 1.0323909521102905,grad_norm: 0.8528108016303797, iteration: 271163
loss: 1.0129671096801758,grad_norm: 0.88032048092508, iteration: 271164
loss: 1.103351354598999,grad_norm: 0.8175924832394895, iteration: 271165
loss: 0.981421947479248,grad_norm: 0.9624262267486335, iteration: 271166
loss: 1.0142467021942139,grad_norm: 0.8938475854731258, iteration: 271167
loss: 1.0104808807373047,grad_norm: 0.7917820643663163, iteration: 271168
loss: 1.0217113494873047,grad_norm: 0.8422091146482606, iteration: 271169
loss: 1.0295761823654175,grad_norm: 0.828710978005038, iteration: 271170
loss: 0.9911397695541382,grad_norm: 0.9251999755937563, iteration: 271171
loss: 0.9890178442001343,grad_norm: 0.8335195450312279, iteration: 271172
loss: 0.9897950291633606,grad_norm: 0.8648053948961283, iteration: 271173
loss: 0.9688798785209656,grad_norm: 0.8879983073137081, iteration: 271174
loss: 0.9493126273155212,grad_norm: 0.8824465286642871, iteration: 271175
loss: 1.018068552017212,grad_norm: 0.8927487870340637, iteration: 271176
loss: 0.9909889101982117,grad_norm: 0.8012840281122426, iteration: 271177
loss: 1.0294371843338013,grad_norm: 0.8490963989014676, iteration: 271178
loss: 0.996990442276001,grad_norm: 0.7900521858089596, iteration: 271179
loss: 1.039085865020752,grad_norm: 0.9210300171340555, iteration: 271180
loss: 1.0186001062393188,grad_norm: 0.9815463656784569, iteration: 271181
loss: 1.0023236274719238,grad_norm: 0.8203087726506744, iteration: 271182
loss: 0.98286372423172,grad_norm: 0.9110929768895337, iteration: 271183
loss: 1.009299874305725,grad_norm: 0.8393670815388647, iteration: 271184
loss: 1.0122514963150024,grad_norm: 0.9999996335887267, iteration: 271185
loss: 1.0032838582992554,grad_norm: 0.7491936753260291, iteration: 271186
loss: 1.0064667463302612,grad_norm: 0.7434197498206018, iteration: 271187
loss: 0.9724711179733276,grad_norm: 0.7371189509013256, iteration: 271188
loss: 1.1499747037887573,grad_norm: 0.9877680555328877, iteration: 271189
loss: 1.0229991674423218,grad_norm: 0.9551398844158729, iteration: 271190
loss: 1.0328561067581177,grad_norm: 0.8559877316637365, iteration: 271191
loss: 0.9895433187484741,grad_norm: 0.8501026620420705, iteration: 271192
loss: 1.0221072435379028,grad_norm: 0.9999991067028866, iteration: 271193
loss: 1.0069385766983032,grad_norm: 0.8163414021214733, iteration: 271194
loss: 1.0241940021514893,grad_norm: 0.980124451401263, iteration: 271195
loss: 0.9941844940185547,grad_norm: 0.7527540068929033, iteration: 271196
loss: 1.0116124153137207,grad_norm: 0.9999991199446971, iteration: 271197
loss: 1.0304757356643677,grad_norm: 0.9568881362702557, iteration: 271198
loss: 0.9609456658363342,grad_norm: 0.8825788557630135, iteration: 271199
loss: 0.9685798287391663,grad_norm: 0.9520133975429853, iteration: 271200
loss: 1.009844422340393,grad_norm: 0.7471597279811609, iteration: 271201
loss: 1.0005080699920654,grad_norm: 0.9999994421764646, iteration: 271202
loss: 1.0207102298736572,grad_norm: 0.8569875758450134, iteration: 271203
loss: 1.0333333015441895,grad_norm: 0.9999998395048668, iteration: 271204
loss: 1.0181020498275757,grad_norm: 0.9999993647631882, iteration: 271205
loss: 1.0156961679458618,grad_norm: 0.8559683052103294, iteration: 271206
loss: 0.9874381422996521,grad_norm: 0.931509218260463, iteration: 271207
loss: 1.0269665718078613,grad_norm: 0.9999990803325944, iteration: 271208
loss: 1.0010294914245605,grad_norm: 0.790672583356099, iteration: 271209
loss: 1.0054353475570679,grad_norm: 0.9752662500281413, iteration: 271210
loss: 1.0019257068634033,grad_norm: 0.8285539031176421, iteration: 271211
loss: 1.00977623462677,grad_norm: 0.810955534531365, iteration: 271212
loss: 1.1178216934204102,grad_norm: 0.9264916461326435, iteration: 271213
loss: 0.9992268681526184,grad_norm: 0.9810250612698146, iteration: 271214
loss: 0.9568778872489929,grad_norm: 0.8229665681067927, iteration: 271215
loss: 0.9966795444488525,grad_norm: 0.9193458427739146, iteration: 271216
loss: 1.022932767868042,grad_norm: 0.9552298112016033, iteration: 271217
loss: 1.0022261142730713,grad_norm: 0.8687858375076132, iteration: 271218
loss: 1.0145922899246216,grad_norm: 0.9030387280175116, iteration: 271219
loss: 0.9938438534736633,grad_norm: 0.8983455415851993, iteration: 271220
loss: 1.0065619945526123,grad_norm: 0.8836170056936997, iteration: 271221
loss: 0.9819036722183228,grad_norm: 0.9999991323090629, iteration: 271222
loss: 0.9921263456344604,grad_norm: 0.9999990652260604, iteration: 271223
loss: 0.9822916984558105,grad_norm: 0.8388326044209673, iteration: 271224
loss: 0.9864723086357117,grad_norm: 0.9149346542011645, iteration: 271225
loss: 0.9565026164054871,grad_norm: 0.7750717425953276, iteration: 271226
loss: 1.0014480352401733,grad_norm: 0.7761399054702379, iteration: 271227
loss: 0.9721397757530212,grad_norm: 0.9686393420453998, iteration: 271228
loss: 1.063753604888916,grad_norm: 0.9999996194184058, iteration: 271229
loss: 0.99448162317276,grad_norm: 0.9999991093850624, iteration: 271230
loss: 1.0014622211456299,grad_norm: 0.901297890206508, iteration: 271231
loss: 0.9977272748947144,grad_norm: 0.999999052164566, iteration: 271232
loss: 0.9933624267578125,grad_norm: 0.9279363104621653, iteration: 271233
loss: 1.0345250368118286,grad_norm: 0.7626537803530512, iteration: 271234
loss: 1.0135383605957031,grad_norm: 0.999999173873041, iteration: 271235
loss: 0.9849505424499512,grad_norm: 0.8725546567322954, iteration: 271236
loss: 0.9859519004821777,grad_norm: 0.8152047243407955, iteration: 271237
loss: 0.9876092076301575,grad_norm: 0.8599557097910936, iteration: 271238
loss: 1.0154606103897095,grad_norm: 0.828957889502133, iteration: 271239
loss: 1.0166163444519043,grad_norm: 0.8495594705693756, iteration: 271240
loss: 1.0626875162124634,grad_norm: 0.8082597082611922, iteration: 271241
loss: 1.0058538913726807,grad_norm: 0.789465064823689, iteration: 271242
loss: 1.136340618133545,grad_norm: 0.9999996170554484, iteration: 271243
loss: 0.9895914793014526,grad_norm: 0.7218417800609146, iteration: 271244
loss: 0.9932783246040344,grad_norm: 0.9999996771468498, iteration: 271245
loss: 0.9979562163352966,grad_norm: 0.9180523599841085, iteration: 271246
loss: 0.9989950656890869,grad_norm: 0.838435752711879, iteration: 271247
loss: 1.0064224004745483,grad_norm: 0.9529867616362703, iteration: 271248
loss: 1.0029804706573486,grad_norm: 0.9297274753354553, iteration: 271249
loss: 1.0174801349639893,grad_norm: 0.9999991320111224, iteration: 271250
loss: 1.0315645933151245,grad_norm: 0.8907297579872502, iteration: 271251
loss: 0.9955286979675293,grad_norm: 0.8085691464170525, iteration: 271252
loss: 0.9694815874099731,grad_norm: 0.8477602069549671, iteration: 271253
loss: 0.9588712453842163,grad_norm: 0.9046480151265399, iteration: 271254
loss: 1.0182560682296753,grad_norm: 0.7951100463808016, iteration: 271255
loss: 1.0488914251327515,grad_norm: 0.9467439224677131, iteration: 271256
loss: 0.9957813024520874,grad_norm: 0.9653513785495741, iteration: 271257
loss: 0.9981186389923096,grad_norm: 0.7393364688125144, iteration: 271258
loss: 0.9927054047584534,grad_norm: 0.9999995649880667, iteration: 271259
loss: 1.0021283626556396,grad_norm: 0.9999990631642675, iteration: 271260
loss: 1.0126508474349976,grad_norm: 0.9298591246993781, iteration: 271261
loss: 1.009434461593628,grad_norm: 0.9282965971849974, iteration: 271262
loss: 0.9775720238685608,grad_norm: 0.8294831871845963, iteration: 271263
loss: 1.0144236087799072,grad_norm: 0.9026005351897501, iteration: 271264
loss: 0.9967933297157288,grad_norm: 0.9999998732841382, iteration: 271265
loss: 0.9893109202384949,grad_norm: 0.9441288986044646, iteration: 271266
loss: 1.0049678087234497,grad_norm: 0.9380173750691877, iteration: 271267
loss: 1.0088168382644653,grad_norm: 0.9260296769841002, iteration: 271268
loss: 0.976947546005249,grad_norm: 0.8905974788574154, iteration: 271269
loss: 1.0574461221694946,grad_norm: 0.7117335494988942, iteration: 271270
loss: 0.9830833077430725,grad_norm: 0.999999136448969, iteration: 271271
loss: 0.9982798099517822,grad_norm: 0.8062126170575897, iteration: 271272
loss: 0.9640988707542419,grad_norm: 0.7824291644035759, iteration: 271273
loss: 1.022262454032898,grad_norm: 0.9385612816812989, iteration: 271274
loss: 1.000512957572937,grad_norm: 0.85896986156139, iteration: 271275
loss: 0.9514910578727722,grad_norm: 0.7560612790552766, iteration: 271276
loss: 1.0297503471374512,grad_norm: 0.8026360111226832, iteration: 271277
loss: 0.9901387691497803,grad_norm: 0.863056269757343, iteration: 271278
loss: 1.0601086616516113,grad_norm: 0.9999998685218456, iteration: 271279
loss: 0.9882670044898987,grad_norm: 0.8559546849846129, iteration: 271280
loss: 0.9946752190589905,grad_norm: 0.8493254289750816, iteration: 271281
loss: 0.9690812230110168,grad_norm: 0.7636520909845094, iteration: 271282
loss: 0.9942593574523926,grad_norm: 0.9061478497627072, iteration: 271283
loss: 0.9970604181289673,grad_norm: 0.96659643684837, iteration: 271284
loss: 1.0681248903274536,grad_norm: 0.9999993918719837, iteration: 271285
loss: 1.0161851644515991,grad_norm: 0.9999991450036761, iteration: 271286
loss: 1.0921616554260254,grad_norm: 0.9999998608122486, iteration: 271287
loss: 1.0072516202926636,grad_norm: 0.8922125701456259, iteration: 271288
loss: 0.9839891195297241,grad_norm: 0.9999990447938021, iteration: 271289
loss: 1.1522575616836548,grad_norm: 0.9999999069880171, iteration: 271290
loss: 1.175876259803772,grad_norm: 0.9999996528683556, iteration: 271291
loss: 1.0065785646438599,grad_norm: 0.8946989537145006, iteration: 271292
loss: 1.0276778936386108,grad_norm: 0.8507453452457092, iteration: 271293
loss: 0.9723106622695923,grad_norm: 0.7755468304062394, iteration: 271294
loss: 0.990768551826477,grad_norm: 0.7668304678631963, iteration: 271295
loss: 1.0581133365631104,grad_norm: 0.940382009152758, iteration: 271296
loss: 1.0480024814605713,grad_norm: 0.9999994677389473, iteration: 271297
loss: 1.0649617910385132,grad_norm: 0.9999993082471602, iteration: 271298
loss: 0.9599865078926086,grad_norm: 0.8654343522296319, iteration: 271299
loss: 1.036605715751648,grad_norm: 0.8357085363777128, iteration: 271300
loss: 0.9542677402496338,grad_norm: 0.9028796001878535, iteration: 271301
loss: 0.986285388469696,grad_norm: 0.7167415908624083, iteration: 271302
loss: 1.0008405447006226,grad_norm: 0.9999991311328997, iteration: 271303
loss: 1.0338776111602783,grad_norm: 0.8800461108710498, iteration: 271304
loss: 1.188320279121399,grad_norm: 0.9999996774873894, iteration: 271305
loss: 0.9700362086296082,grad_norm: 0.935314417316682, iteration: 271306
loss: 1.0205278396606445,grad_norm: 0.8536268279230141, iteration: 271307
loss: 0.9775317311286926,grad_norm: 0.9012926481674354, iteration: 271308
loss: 0.9739253520965576,grad_norm: 0.8657284485599489, iteration: 271309
loss: 0.9942255020141602,grad_norm: 0.9341641125273086, iteration: 271310
loss: 0.9475225806236267,grad_norm: 0.8215038141581374, iteration: 271311
loss: 1.0384405851364136,grad_norm: 0.8012046165575722, iteration: 271312
loss: 1.003966212272644,grad_norm: 0.9246116243311301, iteration: 271313
loss: 1.1897802352905273,grad_norm: 0.9999998831439517, iteration: 271314
loss: 0.9781614542007446,grad_norm: 0.9999989625902067, iteration: 271315
loss: 1.047194480895996,grad_norm: 0.9537042916642753, iteration: 271316
loss: 0.9847785234451294,grad_norm: 0.8175434218449734, iteration: 271317
loss: 0.9910563826560974,grad_norm: 0.8292807189616008, iteration: 271318
loss: 1.0469735860824585,grad_norm: 0.9999999877147965, iteration: 271319
loss: 0.9629281163215637,grad_norm: 0.9757584822490559, iteration: 271320
loss: 1.0249600410461426,grad_norm: 0.9077613773546228, iteration: 271321
loss: 1.0026617050170898,grad_norm: 0.9355717369757199, iteration: 271322
loss: 1.0408234596252441,grad_norm: 0.9999991623586786, iteration: 271323
loss: 0.9828006625175476,grad_norm: 0.999999562300587, iteration: 271324
loss: 1.0018624067306519,grad_norm: 0.9999993793407914, iteration: 271325
loss: 1.0513378381729126,grad_norm: 0.9556783871744691, iteration: 271326
loss: 0.9427338242530823,grad_norm: 0.8212815735265734, iteration: 271327
loss: 1.001444935798645,grad_norm: 0.8557789474469218, iteration: 271328
loss: 1.005347490310669,grad_norm: 0.9999991061420349, iteration: 271329
loss: 1.030962347984314,grad_norm: 0.9238168687902102, iteration: 271330
loss: 0.9786468148231506,grad_norm: 0.9999991531546845, iteration: 271331
loss: 1.0933738946914673,grad_norm: 0.999999508707485, iteration: 271332
loss: 1.0061979293823242,grad_norm: 0.9999991014456411, iteration: 271333
loss: 0.9616219997406006,grad_norm: 0.9999990961159015, iteration: 271334
loss: 1.038130760192871,grad_norm: 0.8908864409109472, iteration: 271335
loss: 1.101000428199768,grad_norm: 0.9999993786603597, iteration: 271336
loss: 0.9907512664794922,grad_norm: 0.7731888813671063, iteration: 271337
loss: 1.0070579051971436,grad_norm: 0.8353513369029836, iteration: 271338
loss: 0.9925105571746826,grad_norm: 0.8425861777297076, iteration: 271339
loss: 0.9632224440574646,grad_norm: 0.8103363408980159, iteration: 271340
loss: 1.0022484064102173,grad_norm: 0.9565027850858205, iteration: 271341
loss: 0.9919008016586304,grad_norm: 0.9999991895627267, iteration: 271342
loss: 1.0535459518432617,grad_norm: 0.9039690494191781, iteration: 271343
loss: 0.9794825315475464,grad_norm: 0.7819376546836806, iteration: 271344
loss: 0.9979546070098877,grad_norm: 0.7779915367085232, iteration: 271345
loss: 0.9823400974273682,grad_norm: 0.9155287357603996, iteration: 271346
loss: 1.0107367038726807,grad_norm: 0.8483316675251173, iteration: 271347
loss: 1.0519143342971802,grad_norm: 0.9999994064836744, iteration: 271348
loss: 1.0160117149353027,grad_norm: 0.7391733363225094, iteration: 271349
loss: 1.0128730535507202,grad_norm: 0.8069105412476553, iteration: 271350
loss: 1.0895278453826904,grad_norm: 0.9999998434541421, iteration: 271351
loss: 0.9943647384643555,grad_norm: 0.8250401111638438, iteration: 271352
loss: 0.9831566214561462,grad_norm: 0.8715895337730447, iteration: 271353
loss: 1.0458844900131226,grad_norm: 0.9999992185335753, iteration: 271354
loss: 0.9610642790794373,grad_norm: 0.9164523537397853, iteration: 271355
loss: 1.0952705144882202,grad_norm: 1.0000000121682406, iteration: 271356
loss: 1.010772943496704,grad_norm: 0.9999991598721699, iteration: 271357
loss: 1.0055747032165527,grad_norm: 0.8292775076188047, iteration: 271358
loss: 0.9535515904426575,grad_norm: 0.9999990292305336, iteration: 271359
loss: 1.1676132678985596,grad_norm: 0.9999992032628228, iteration: 271360
loss: 0.9918519854545593,grad_norm: 0.7272942655778971, iteration: 271361
loss: 1.0447657108306885,grad_norm: 0.7944663738939786, iteration: 271362
loss: 0.9715380668640137,grad_norm: 0.7775816089563057, iteration: 271363
loss: 1.0254899263381958,grad_norm: 0.7571455705900644, iteration: 271364
loss: 0.9802225232124329,grad_norm: 0.8721857302012858, iteration: 271365
loss: 1.0229154825210571,grad_norm: 0.8610027169150196, iteration: 271366
loss: 0.9784060716629028,grad_norm: 0.8651904355938786, iteration: 271367
loss: 1.0904971361160278,grad_norm: 0.9999992923063984, iteration: 271368
loss: 1.0370765924453735,grad_norm: 0.9999998924890492, iteration: 271369
loss: 1.0260474681854248,grad_norm: 0.9999991439663991, iteration: 271370
loss: 1.0256491899490356,grad_norm: 0.9802763301310442, iteration: 271371
loss: 0.955211877822876,grad_norm: 0.73800476300536, iteration: 271372
loss: 1.02236807346344,grad_norm: 0.9261724231992498, iteration: 271373
loss: 0.9766740202903748,grad_norm: 0.8947187007248022, iteration: 271374
loss: 0.9747530221939087,grad_norm: 0.9285781856368727, iteration: 271375
loss: 1.0047225952148438,grad_norm: 0.916906185270642, iteration: 271376
loss: 1.0541589260101318,grad_norm: 0.8901064940136533, iteration: 271377
loss: 1.004932165145874,grad_norm: 0.9999991982314091, iteration: 271378
loss: 1.0161402225494385,grad_norm: 0.9691015710500257, iteration: 271379
loss: 1.0374171733856201,grad_norm: 0.9999998679520657, iteration: 271380
loss: 1.0055800676345825,grad_norm: 0.8356853695859566, iteration: 271381
loss: 1.0397406816482544,grad_norm: 0.7810369024453057, iteration: 271382
loss: 0.9971321821212769,grad_norm: 0.8528168919869512, iteration: 271383
loss: 1.013702630996704,grad_norm: 0.9100639366973111, iteration: 271384
loss: 0.989116907119751,grad_norm: 0.9999989874019929, iteration: 271385
loss: 1.0165075063705444,grad_norm: 0.8942983421129299, iteration: 271386
loss: 0.9880813360214233,grad_norm: 0.8823113673217291, iteration: 271387
loss: 1.0108453035354614,grad_norm: 0.8390422224342055, iteration: 271388
loss: 1.0071443319320679,grad_norm: 0.8851695604659655, iteration: 271389
loss: 0.9909605979919434,grad_norm: 0.8220715826837126, iteration: 271390
loss: 0.9713780879974365,grad_norm: 0.9999990546117838, iteration: 271391
loss: 0.9513813257217407,grad_norm: 0.8504049914347244, iteration: 271392
loss: 0.9900073409080505,grad_norm: 0.9999990104705193, iteration: 271393
loss: 0.996830940246582,grad_norm: 0.935286071566255, iteration: 271394
loss: 1.0170822143554688,grad_norm: 0.9947113047700341, iteration: 271395
loss: 1.0149940252304077,grad_norm: 0.7375054245264958, iteration: 271396
loss: 0.9982324838638306,grad_norm: 0.8557593722351908, iteration: 271397
loss: 1.003976821899414,grad_norm: 0.9708970025776291, iteration: 271398
loss: 1.0156008005142212,grad_norm: 0.8921055569840118, iteration: 271399
loss: 1.0056018829345703,grad_norm: 0.8827802056432228, iteration: 271400
loss: 1.0244148969650269,grad_norm: 0.9999993190244403, iteration: 271401
loss: 0.9953322410583496,grad_norm: 0.9512287645835695, iteration: 271402
loss: 1.0731449127197266,grad_norm: 0.9999996320995344, iteration: 271403
loss: 0.9896162152290344,grad_norm: 0.8111464256842085, iteration: 271404
loss: 0.9813430905342102,grad_norm: 0.9839449803269321, iteration: 271405
loss: 0.9847100377082825,grad_norm: 0.7758861162945897, iteration: 271406
loss: 1.083196997642517,grad_norm: 0.787703398056921, iteration: 271407
loss: 1.0438357591629028,grad_norm: 0.9999991355905097, iteration: 271408
loss: 1.0301352739334106,grad_norm: 0.8742390702821576, iteration: 271409
loss: 1.055065631866455,grad_norm: 0.9999995054373632, iteration: 271410
loss: 0.9913222193717957,grad_norm: 0.9999990546842542, iteration: 271411
loss: 0.9729692935943604,grad_norm: 0.859994194799504, iteration: 271412
loss: 0.9869413375854492,grad_norm: 0.8859983284384562, iteration: 271413
loss: 1.0164061784744263,grad_norm: 0.9857848821916174, iteration: 271414
loss: 0.980804443359375,grad_norm: 0.9135838490989225, iteration: 271415
loss: 0.9789295196533203,grad_norm: 0.913275296735412, iteration: 271416
loss: 0.9472526907920837,grad_norm: 0.8640304630435791, iteration: 271417
loss: 1.1811878681182861,grad_norm: 0.9999994688986578, iteration: 271418
loss: 0.9760224223136902,grad_norm: 0.9535961420030354, iteration: 271419
loss: 1.0544627904891968,grad_norm: 0.9999990663500753, iteration: 271420
loss: 1.0112521648406982,grad_norm: 0.9688065378884612, iteration: 271421
loss: 1.005094289779663,grad_norm: 0.8579829372223641, iteration: 271422
loss: 0.9862574934959412,grad_norm: 0.9248091682776555, iteration: 271423
loss: 1.0759835243225098,grad_norm: 0.9999992078224891, iteration: 271424
loss: 1.0448896884918213,grad_norm: 0.9999996457636722, iteration: 271425
loss: 0.9905758500099182,grad_norm: 0.999999742184576, iteration: 271426
loss: 1.0153740644454956,grad_norm: 0.9999994422522952, iteration: 271427
loss: 1.0396360158920288,grad_norm: 0.8724352561283596, iteration: 271428
loss: 1.045413851737976,grad_norm: 0.9999993929794008, iteration: 271429
loss: 0.9924155473709106,grad_norm: 0.7908859909925612, iteration: 271430
loss: 1.0239331722259521,grad_norm: 0.9999994921534875, iteration: 271431
loss: 1.001313328742981,grad_norm: 0.8541722530492655, iteration: 271432
loss: 1.0015361309051514,grad_norm: 0.9999989657763118, iteration: 271433
loss: 1.017574667930603,grad_norm: 0.9999990912062782, iteration: 271434
loss: 0.9964502453804016,grad_norm: 0.7829999326921967, iteration: 271435
loss: 0.9778797626495361,grad_norm: 0.7826106557661584, iteration: 271436
loss: 1.01471745967865,grad_norm: 0.8759262465752078, iteration: 271437
loss: 1.001785397529602,grad_norm: 0.8474237706453156, iteration: 271438
loss: 1.0412088632583618,grad_norm: 0.9999995665111797, iteration: 271439
loss: 1.0179632902145386,grad_norm: 0.9140362684007155, iteration: 271440
loss: 0.9965469837188721,grad_norm: 0.8511944435535543, iteration: 271441
loss: 1.019422173500061,grad_norm: 0.9999991064992853, iteration: 271442
loss: 1.029380202293396,grad_norm: 0.9999993310971304, iteration: 271443
loss: 1.0082582235336304,grad_norm: 0.884731476779778, iteration: 271444
loss: 1.0004931688308716,grad_norm: 0.7840549728751541, iteration: 271445
loss: 1.013552188873291,grad_norm: 0.8771273399615691, iteration: 271446
loss: 1.0842100381851196,grad_norm: 0.9999995453074213, iteration: 271447
loss: 1.0133707523345947,grad_norm: 0.7810529829550382, iteration: 271448
loss: 1.0392913818359375,grad_norm: 0.9999991657791872, iteration: 271449
loss: 0.9792987108230591,grad_norm: 0.988232151934525, iteration: 271450
loss: 1.0084308385849,grad_norm: 0.8223772093229028, iteration: 271451
loss: 1.0596288442611694,grad_norm: 0.9999992535359621, iteration: 271452
loss: 1.002313494682312,grad_norm: 0.7312908967393814, iteration: 271453
loss: 1.0458834171295166,grad_norm: 0.999999390373088, iteration: 271454
loss: 1.0019406080245972,grad_norm: 0.8328193893909295, iteration: 271455
loss: 0.9687671661376953,grad_norm: 0.7651617653451006, iteration: 271456
loss: 1.0054124593734741,grad_norm: 0.789225187123034, iteration: 271457
loss: 0.983730673789978,grad_norm: 0.8706070216707863, iteration: 271458
loss: 1.0106170177459717,grad_norm: 0.9556391925267371, iteration: 271459
loss: 1.0046007633209229,grad_norm: 0.8439478411166029, iteration: 271460
loss: 1.0171016454696655,grad_norm: 0.9364770785428892, iteration: 271461
loss: 1.0186909437179565,grad_norm: 0.9034801956286996, iteration: 271462
loss: 1.0140775442123413,grad_norm: 0.8160004509127213, iteration: 271463
loss: 1.1027804613113403,grad_norm: 0.999999270338503, iteration: 271464
loss: 1.0010849237442017,grad_norm: 0.7891077739193781, iteration: 271465
loss: 1.025511384010315,grad_norm: 0.9999996438688377, iteration: 271466
loss: 0.9925895929336548,grad_norm: 0.9422625448532893, iteration: 271467
loss: 1.0031695365905762,grad_norm: 0.999999402345577, iteration: 271468
loss: 1.0122872591018677,grad_norm: 0.9999990513508153, iteration: 271469
loss: 1.0115249156951904,grad_norm: 0.8619712046606288, iteration: 271470
loss: 0.9787566065788269,grad_norm: 0.8689075740104137, iteration: 271471
loss: 0.9995867609977722,grad_norm: 0.9999992673473084, iteration: 271472
loss: 1.1649315357208252,grad_norm: 0.9999994254644153, iteration: 271473
loss: 0.9660672545433044,grad_norm: 0.9135529246141939, iteration: 271474
loss: 0.993506908416748,grad_norm: 0.9307679632814536, iteration: 271475
loss: 0.9810783267021179,grad_norm: 0.7085736457507351, iteration: 271476
loss: 1.0085318088531494,grad_norm: 0.9999995030369249, iteration: 271477
loss: 0.9833092093467712,grad_norm: 0.9999994812131398, iteration: 271478
loss: 0.9937276840209961,grad_norm: 0.940993221395772, iteration: 271479
loss: 1.1533757448196411,grad_norm: 0.9999995776330437, iteration: 271480
loss: 0.9739675521850586,grad_norm: 0.9753096955292081, iteration: 271481
loss: 1.0325798988342285,grad_norm: 0.8056406087563659, iteration: 271482
loss: 1.0646089315414429,grad_norm: 0.9999991100125872, iteration: 271483
loss: 1.0216565132141113,grad_norm: 0.9999993733265853, iteration: 271484
loss: 1.0531141757965088,grad_norm: 0.9999994158019616, iteration: 271485
loss: 1.0086417198181152,grad_norm: 0.8132136049152523, iteration: 271486
loss: 0.9958749413490295,grad_norm: 0.936889294125574, iteration: 271487
loss: 1.0306284427642822,grad_norm: 0.8604456536525417, iteration: 271488
loss: 0.9878252744674683,grad_norm: 0.9204862523270305, iteration: 271489
loss: 0.9975664615631104,grad_norm: 0.888057801018851, iteration: 271490
loss: 1.0390805006027222,grad_norm: 0.9999997757901038, iteration: 271491
loss: 1.024412751197815,grad_norm: 0.8874267488148718, iteration: 271492
loss: 1.3043841123580933,grad_norm: 0.9999998095844681, iteration: 271493
loss: 0.9754719138145447,grad_norm: 0.9760128930400815, iteration: 271494
loss: 1.0025783777236938,grad_norm: 0.7925376042073705, iteration: 271495
loss: 0.9904751181602478,grad_norm: 0.9999996067840656, iteration: 271496
loss: 1.1088007688522339,grad_norm: 0.9999998137791073, iteration: 271497
loss: 0.9991909861564636,grad_norm: 0.9305307799990561, iteration: 271498
loss: 0.9749858379364014,grad_norm: 0.9999992595496748, iteration: 271499
loss: 1.0146764516830444,grad_norm: 0.9393376370649046, iteration: 271500
loss: 1.0161449909210205,grad_norm: 0.9347474283493993, iteration: 271501
loss: 1.0036797523498535,grad_norm: 0.8439961031228145, iteration: 271502
loss: 1.014588713645935,grad_norm: 0.7723948810211299, iteration: 271503
loss: 0.984656035900116,grad_norm: 0.8049437020178782, iteration: 271504
loss: 0.97654789686203,grad_norm: 0.968716976056963, iteration: 271505
loss: 1.024969220161438,grad_norm: 0.9999997513570956, iteration: 271506
loss: 1.000459909439087,grad_norm: 0.9999991004939972, iteration: 271507
loss: 1.0053106546401978,grad_norm: 0.7869520028469633, iteration: 271508
loss: 1.0099200010299683,grad_norm: 0.9999997246198097, iteration: 271509
loss: 1.0321745872497559,grad_norm: 0.9999995179065595, iteration: 271510
loss: 1.0713402032852173,grad_norm: 0.9999992103356996, iteration: 271511
loss: 0.9746915102005005,grad_norm: 0.8468932247629772, iteration: 271512
loss: 0.989321768283844,grad_norm: 0.8066020369958833, iteration: 271513
loss: 1.0219439268112183,grad_norm: 0.8362258308453825, iteration: 271514
loss: 1.0284709930419922,grad_norm: 0.8422372464479773, iteration: 271515
loss: 1.0256383419036865,grad_norm: 0.8976429006656615, iteration: 271516
loss: 1.0077400207519531,grad_norm: 0.7608075199671529, iteration: 271517
loss: 1.0173990726470947,grad_norm: 0.9999992185311009, iteration: 271518
loss: 1.0277395248413086,grad_norm: 0.9999990961040656, iteration: 271519
loss: 0.9733489155769348,grad_norm: 0.9608983067358838, iteration: 271520
loss: 0.9672037959098816,grad_norm: 0.999999742210463, iteration: 271521
loss: 1.027685523033142,grad_norm: 0.8747499533243883, iteration: 271522
loss: 1.0271598100662231,grad_norm: 0.9999995312821984, iteration: 271523
loss: 0.9971679449081421,grad_norm: 0.914712347784438, iteration: 271524
loss: 1.0007723569869995,grad_norm: 0.999999320013057, iteration: 271525
loss: 0.9638559818267822,grad_norm: 0.9249653790673856, iteration: 271526
loss: 0.9734435677528381,grad_norm: 0.8438581550204165, iteration: 271527
loss: 0.9879096746444702,grad_norm: 0.8394219970614333, iteration: 271528
loss: 1.0279805660247803,grad_norm: 0.7841673036414357, iteration: 271529
loss: 1.0241016149520874,grad_norm: 0.9129265353217737, iteration: 271530
loss: 1.0192787647247314,grad_norm: 0.9999995416199827, iteration: 271531
loss: 0.9864169955253601,grad_norm: 0.8480135704739808, iteration: 271532
loss: 1.0213700532913208,grad_norm: 0.807401111063358, iteration: 271533
loss: 1.0418633222579956,grad_norm: 0.9999996607516176, iteration: 271534
loss: 0.9700165390968323,grad_norm: 0.9574156401415155, iteration: 271535
loss: 1.0164690017700195,grad_norm: 0.9338904339848736, iteration: 271536
loss: 0.9759737849235535,grad_norm: 0.7359442914246023, iteration: 271537
loss: 1.0132499933242798,grad_norm: 0.8009572481759194, iteration: 271538
loss: 0.9920950531959534,grad_norm: 0.8282660165022834, iteration: 271539
loss: 1.0175342559814453,grad_norm: 0.8535442843499328, iteration: 271540
loss: 0.9860298037528992,grad_norm: 0.7493711395850315, iteration: 271541
loss: 1.0279896259307861,grad_norm: 0.837621806277157, iteration: 271542
loss: 1.0017324686050415,grad_norm: 0.8189017145010999, iteration: 271543
loss: 1.0525041818618774,grad_norm: 0.999999534220259, iteration: 271544
loss: 1.0948117971420288,grad_norm: 0.9999997155707839, iteration: 271545
loss: 1.0232542753219604,grad_norm: 0.8856074221934304, iteration: 271546
loss: 0.9822124242782593,grad_norm: 0.9999991452370204, iteration: 271547
loss: 0.9691619873046875,grad_norm: 0.8255666600266793, iteration: 271548
loss: 1.2059348821640015,grad_norm: 0.9999997539550624, iteration: 271549
loss: 0.9991629719734192,grad_norm: 0.9159867916324202, iteration: 271550
loss: 1.0307426452636719,grad_norm: 0.9424136193567713, iteration: 271551
loss: 0.9773421883583069,grad_norm: 0.8251085277093597, iteration: 271552
loss: 0.9960931539535522,grad_norm: 0.8621289424142199, iteration: 271553
loss: 0.9964022636413574,grad_norm: 0.7872430284491078, iteration: 271554
loss: 1.0346180200576782,grad_norm: 0.9999991598223775, iteration: 271555
loss: 1.0123951435089111,grad_norm: 0.9361029863339054, iteration: 271556
loss: 1.0054750442504883,grad_norm: 0.8841977663645068, iteration: 271557
loss: 1.0087181329727173,grad_norm: 0.9064045011816432, iteration: 271558
loss: 0.999904453754425,grad_norm: 0.9133591045664148, iteration: 271559
loss: 1.1777397394180298,grad_norm: 1.000000007583862, iteration: 271560
loss: 1.0100739002227783,grad_norm: 0.8327391149425419, iteration: 271561
loss: 1.0552473068237305,grad_norm: 0.9427116631036898, iteration: 271562
loss: 1.0086060762405396,grad_norm: 0.8802193257525417, iteration: 271563
loss: 1.0093789100646973,grad_norm: 0.9142213288165872, iteration: 271564
loss: 0.988537073135376,grad_norm: 0.8380849969676726, iteration: 271565
loss: 0.9547042846679688,grad_norm: 0.8223273654382639, iteration: 271566
loss: 1.0480053424835205,grad_norm: 0.9999998728683793, iteration: 271567
loss: 1.0119043588638306,grad_norm: 0.8337048029650856, iteration: 271568
loss: 0.9815334677696228,grad_norm: 0.9999990472621717, iteration: 271569
loss: 1.098817229270935,grad_norm: 0.8924982497493047, iteration: 271570
loss: 0.9761061668395996,grad_norm: 0.783542255546079, iteration: 271571
loss: 1.0200144052505493,grad_norm: 0.8594906049743316, iteration: 271572
loss: 0.9941499829292297,grad_norm: 0.9999997926460503, iteration: 271573
loss: 1.0143986940383911,grad_norm: 0.9303575763182333, iteration: 271574
loss: 0.9852737188339233,grad_norm: 0.9125361006959893, iteration: 271575
loss: 1.2482248544692993,grad_norm: 0.9999992337935788, iteration: 271576
loss: 1.009461522102356,grad_norm: 0.893428297155896, iteration: 271577
loss: 0.9959031939506531,grad_norm: 0.8590387088072556, iteration: 271578
loss: 1.0117912292480469,grad_norm: 0.8719827262815055, iteration: 271579
loss: 0.994922935962677,grad_norm: 0.8647108309536164, iteration: 271580
loss: 1.0047852993011475,grad_norm: 0.8867063577118146, iteration: 271581
loss: 0.9739744663238525,grad_norm: 0.8555022277666763, iteration: 271582
loss: 1.0329303741455078,grad_norm: 0.8294487521920229, iteration: 271583
loss: 0.9722261428833008,grad_norm: 0.7775565173452818, iteration: 271584
loss: 0.9982852935791016,grad_norm: 0.9243152497007022, iteration: 271585
loss: 0.9956790804862976,grad_norm: 0.8234565112452046, iteration: 271586
loss: 1.2237677574157715,grad_norm: 0.9999997601240806, iteration: 271587
loss: 1.001202940940857,grad_norm: 0.8880822570236324, iteration: 271588
loss: 1.1359976530075073,grad_norm: 0.9999991415643933, iteration: 271589
loss: 0.9979431629180908,grad_norm: 0.9999990520611662, iteration: 271590
loss: 1.041239857673645,grad_norm: 0.9999992211915357, iteration: 271591
loss: 1.000320553779602,grad_norm: 0.8576790290935664, iteration: 271592
loss: 1.024246335029602,grad_norm: 0.7658605688537892, iteration: 271593
loss: 1.008718729019165,grad_norm: 0.8246907572463769, iteration: 271594
loss: 0.9966598153114319,grad_norm: 0.8419719491980597, iteration: 271595
loss: 1.0078791379928589,grad_norm: 0.8771737860467739, iteration: 271596
loss: 0.9984518885612488,grad_norm: 0.8819486363319615, iteration: 271597
loss: 1.0253547430038452,grad_norm: 0.8574409219063066, iteration: 271598
loss: 1.015999436378479,grad_norm: 0.8720916664419024, iteration: 271599
loss: 1.0268558263778687,grad_norm: 0.9078403889187898, iteration: 271600
loss: 1.0024560689926147,grad_norm: 0.9179119376859616, iteration: 271601
loss: 1.0006964206695557,grad_norm: 0.999999453548038, iteration: 271602
loss: 0.9769812226295471,grad_norm: 0.8830507591755938, iteration: 271603
loss: 0.9922574758529663,grad_norm: 0.9079882662646317, iteration: 271604
loss: 0.9597945213317871,grad_norm: 0.8987274136813947, iteration: 271605
loss: 1.000867486000061,grad_norm: 0.999999035977647, iteration: 271606
loss: 0.9830357432365417,grad_norm: 0.9102661847345653, iteration: 271607
loss: 0.9948657155036926,grad_norm: 0.9999989889979986, iteration: 271608
loss: 1.0083380937576294,grad_norm: 0.7438247670176786, iteration: 271609
loss: 0.979868471622467,grad_norm: 0.9999992196107306, iteration: 271610
loss: 0.963350772857666,grad_norm: 0.8674306805206231, iteration: 271611
loss: 0.9837100505828857,grad_norm: 0.7636153557109491, iteration: 271612
loss: 0.9914876222610474,grad_norm: 0.9999992196632324, iteration: 271613
loss: 1.019512414932251,grad_norm: 0.999999199007483, iteration: 271614
loss: 1.0489630699157715,grad_norm: 0.999999759774071, iteration: 271615
loss: 0.9903972744941711,grad_norm: 0.820058034233777, iteration: 271616
loss: 1.0454918146133423,grad_norm: 0.8855907280367084, iteration: 271617
loss: 0.9823463559150696,grad_norm: 0.8686425171156475, iteration: 271618
loss: 1.022414207458496,grad_norm: 0.8403173550792391, iteration: 271619
loss: 1.0190504789352417,grad_norm: 0.9117523937148095, iteration: 271620
loss: 1.0159308910369873,grad_norm: 0.8061279860652174, iteration: 271621
loss: 0.9770969152450562,grad_norm: 0.8581543588626921, iteration: 271622
loss: 0.9986570477485657,grad_norm: 0.9474301762458015, iteration: 271623
loss: 0.9872132539749146,grad_norm: 0.947008043833713, iteration: 271624
loss: 1.0711042881011963,grad_norm: 0.9999996250140077, iteration: 271625
loss: 1.0099347829818726,grad_norm: 0.9148562424979079, iteration: 271626
loss: 1.012970209121704,grad_norm: 0.9330379187295763, iteration: 271627
loss: 1.0375739336013794,grad_norm: 0.8438411716014802, iteration: 271628
loss: 0.9859321117401123,grad_norm: 0.9633342967879576, iteration: 271629
loss: 1.0056514739990234,grad_norm: 0.9999992031480204, iteration: 271630
loss: 0.9961749315261841,grad_norm: 0.9388355744455248, iteration: 271631
loss: 0.9665852785110474,grad_norm: 0.9999990583738657, iteration: 271632
loss: 0.9727180600166321,grad_norm: 0.9999990861859955, iteration: 271633
loss: 0.9676582217216492,grad_norm: 0.7442930022607557, iteration: 271634
loss: 0.9914290308952332,grad_norm: 0.7985377158131786, iteration: 271635
loss: 1.034622311592102,grad_norm: 0.7828222311548674, iteration: 271636
loss: 1.0467017889022827,grad_norm: 0.8048364021054398, iteration: 271637
loss: 1.0321614742279053,grad_norm: 0.9258877332471404, iteration: 271638
loss: 1.019166111946106,grad_norm: 0.7782214017716139, iteration: 271639
loss: 1.0065795183181763,grad_norm: 0.9128295704883594, iteration: 271640
loss: 0.9518831968307495,grad_norm: 0.8329030192622304, iteration: 271641
loss: 1.0196162462234497,grad_norm: 0.7881283140657076, iteration: 271642
loss: 1.0295989513397217,grad_norm: 0.8432576502103185, iteration: 271643
loss: 0.9988614320755005,grad_norm: 0.8790850298821443, iteration: 271644
loss: 1.0121797323226929,grad_norm: 0.8206053210115372, iteration: 271645
loss: 0.9992179274559021,grad_norm: 0.876087100825861, iteration: 271646
loss: 0.993493378162384,grad_norm: 0.865968106293609, iteration: 271647
loss: 0.9887786507606506,grad_norm: 0.8401954632079195, iteration: 271648
loss: 1.028883695602417,grad_norm: 0.9678344753338511, iteration: 271649
loss: 0.9715165495872498,grad_norm: 0.9866281733617142, iteration: 271650
loss: 1.016170620918274,grad_norm: 0.8219243004222198, iteration: 271651
loss: 1.023598313331604,grad_norm: 0.951598504471282, iteration: 271652
loss: 1.0106216669082642,grad_norm: 0.9197705699994866, iteration: 271653
loss: 1.0130974054336548,grad_norm: 0.8921479979527517, iteration: 271654
loss: 0.9760311245918274,grad_norm: 0.9935222202253814, iteration: 271655
loss: 0.980742871761322,grad_norm: 0.9999991553373014, iteration: 271656
loss: 1.0060200691223145,grad_norm: 0.9326843665130619, iteration: 271657
loss: 0.9993457794189453,grad_norm: 0.9650385226749946, iteration: 271658
loss: 1.0160189867019653,grad_norm: 0.920646751771718, iteration: 271659
loss: 0.9900262951850891,grad_norm: 0.8775461408209836, iteration: 271660
loss: 0.9851784110069275,grad_norm: 0.9465393136080846, iteration: 271661
loss: 1.00138258934021,grad_norm: 0.7399297369722255, iteration: 271662
loss: 0.9941195249557495,grad_norm: 0.7789236290475985, iteration: 271663
loss: 1.0430030822753906,grad_norm: 0.9999991267408304, iteration: 271664
loss: 0.9975934028625488,grad_norm: 0.8345505051802474, iteration: 271665
loss: 0.9787235260009766,grad_norm: 0.7804791962123486, iteration: 271666
loss: 0.9574416279792786,grad_norm: 0.8248183949496259, iteration: 271667
loss: 1.033021330833435,grad_norm: 0.6628239938688409, iteration: 271668
loss: 1.0184743404388428,grad_norm: 0.8334658370593998, iteration: 271669
loss: 1.0248937606811523,grad_norm: 0.9351502362915444, iteration: 271670
loss: 0.9793501496315002,grad_norm: 0.9999989736945485, iteration: 271671
loss: 1.0071367025375366,grad_norm: 0.9192292489354004, iteration: 271672
loss: 0.9940975308418274,grad_norm: 0.7871660911306906, iteration: 271673
loss: 1.000665307044983,grad_norm: 0.853393282026062, iteration: 271674
loss: 1.0129265785217285,grad_norm: 0.6687185491302304, iteration: 271675
loss: 1.006587028503418,grad_norm: 0.7877078008721394, iteration: 271676
loss: 1.0078710317611694,grad_norm: 0.9999994418847395, iteration: 271677
loss: 0.9881649017333984,grad_norm: 0.9008933386915304, iteration: 271678
loss: 0.99019455909729,grad_norm: 0.9999991696397456, iteration: 271679
loss: 0.9843026995658875,grad_norm: 0.793222903493389, iteration: 271680
loss: 0.9884907007217407,grad_norm: 0.7676367896995477, iteration: 271681
loss: 0.9772648811340332,grad_norm: 0.840979132389391, iteration: 271682
loss: 1.0212914943695068,grad_norm: 0.8378862880143746, iteration: 271683
loss: 1.0066953897476196,grad_norm: 0.8896905273860711, iteration: 271684
loss: 1.0095192193984985,grad_norm: 0.999999180475985, iteration: 271685
loss: 1.0213570594787598,grad_norm: 0.9999990767340461, iteration: 271686
loss: 0.9838827252388,grad_norm: 0.9954048565374021, iteration: 271687
loss: 1.0505834817886353,grad_norm: 0.9999993460913253, iteration: 271688
loss: 1.0016016960144043,grad_norm: 0.9175723333996106, iteration: 271689
loss: 0.9645238518714905,grad_norm: 0.9227866815838319, iteration: 271690
loss: 0.9926303625106812,grad_norm: 0.8352156913461496, iteration: 271691
loss: 0.9881793260574341,grad_norm: 0.8305064472641684, iteration: 271692
loss: 1.0316925048828125,grad_norm: 0.9999994159166307, iteration: 271693
loss: 1.0100393295288086,grad_norm: 0.8378820036190363, iteration: 271694
loss: 0.9995262026786804,grad_norm: 0.9783917147071315, iteration: 271695
loss: 1.0280747413635254,grad_norm: 0.7932746027818449, iteration: 271696
loss: 1.0482796430587769,grad_norm: 0.8727395475940465, iteration: 271697
loss: 1.0547471046447754,grad_norm: 0.9999994518412914, iteration: 271698
loss: 0.981975793838501,grad_norm: 0.8475078246271951, iteration: 271699
loss: 1.0140031576156616,grad_norm: 0.9999991383121548, iteration: 271700
loss: 0.9575685262680054,grad_norm: 0.8495520677277419, iteration: 271701
loss: 0.9776738882064819,grad_norm: 0.862947364129933, iteration: 271702
loss: 1.011671543121338,grad_norm: 0.914040427958801, iteration: 271703
loss: 1.0082885026931763,grad_norm: 0.8384868967160548, iteration: 271704
loss: 0.9729228019714355,grad_norm: 0.822275819249684, iteration: 271705
loss: 0.99104905128479,grad_norm: 0.8178583872990002, iteration: 271706
loss: 0.9924030900001526,grad_norm: 0.8640141058818919, iteration: 271707
loss: 0.9702549576759338,grad_norm: 0.9260644903643037, iteration: 271708
loss: 1.0165095329284668,grad_norm: 0.9544334195703832, iteration: 271709
loss: 0.9930527806282043,grad_norm: 0.8617558594807995, iteration: 271710
loss: 1.0233997106552124,grad_norm: 0.9729182016728057, iteration: 271711
loss: 0.9430543780326843,grad_norm: 0.8540374857166495, iteration: 271712
loss: 0.9996013045310974,grad_norm: 0.924610559348247, iteration: 271713
loss: 1.0612809658050537,grad_norm: 0.9402421419825273, iteration: 271714
loss: 1.0417777299880981,grad_norm: 0.9664957852282915, iteration: 271715
loss: 0.9886625409126282,grad_norm: 0.9999991296194422, iteration: 271716
loss: 0.9897106289863586,grad_norm: 0.8241666046962374, iteration: 271717
loss: 0.99882972240448,grad_norm: 0.8627464515454255, iteration: 271718
loss: 0.9986525774002075,grad_norm: 0.9446616671589468, iteration: 271719
loss: 1.0177208185195923,grad_norm: 0.9999991389684874, iteration: 271720
loss: 0.9857942461967468,grad_norm: 0.7712504040382728, iteration: 271721
loss: 1.006730556488037,grad_norm: 0.787808999585591, iteration: 271722
loss: 1.0068727731704712,grad_norm: 0.8880725303498951, iteration: 271723
loss: 1.0131046772003174,grad_norm: 0.7650279136501538, iteration: 271724
loss: 1.0083192586898804,grad_norm: 0.8879221403528574, iteration: 271725
loss: 0.9988889098167419,grad_norm: 0.9871886646826505, iteration: 271726
loss: 1.001970648765564,grad_norm: 0.7872608038265295, iteration: 271727
loss: 1.0294742584228516,grad_norm: 0.9333937927553001, iteration: 271728
loss: 1.0503196716308594,grad_norm: 0.8667430345238114, iteration: 271729
loss: 1.0246453285217285,grad_norm: 0.9999992767036678, iteration: 271730
loss: 0.9990132451057434,grad_norm: 0.7865783360255161, iteration: 271731
loss: 0.9855641722679138,grad_norm: 0.8653400313026526, iteration: 271732
loss: 0.9948318600654602,grad_norm: 0.9486355603636163, iteration: 271733
loss: 0.9951368570327759,grad_norm: 0.9959298704842298, iteration: 271734
loss: 1.0005738735198975,grad_norm: 0.6941742255997717, iteration: 271735
loss: 0.9743008017539978,grad_norm: 0.8180455972392693, iteration: 271736
loss: 0.9922987818717957,grad_norm: 0.8127715151624695, iteration: 271737
loss: 1.006341814994812,grad_norm: 0.7704332225837452, iteration: 271738
loss: 0.9759324789047241,grad_norm: 0.7522268325330812, iteration: 271739
loss: 1.0358327627182007,grad_norm: 0.7670544730891632, iteration: 271740
loss: 1.0164191722869873,grad_norm: 0.889997799607382, iteration: 271741
loss: 1.0222214460372925,grad_norm: 0.9075192589514133, iteration: 271742
loss: 1.1735535860061646,grad_norm: 0.999999144668863, iteration: 271743
loss: 0.9876790046691895,grad_norm: 0.9999991083934158, iteration: 271744
loss: 1.0242693424224854,grad_norm: 0.9162338436337444, iteration: 271745
loss: 0.9666580557823181,grad_norm: 0.8570575683914133, iteration: 271746
loss: 0.9932975172996521,grad_norm: 0.8222821915629622, iteration: 271747
loss: 1.0242869853973389,grad_norm: 0.9081637178225759, iteration: 271748
loss: 0.9851227402687073,grad_norm: 0.6895020912439963, iteration: 271749
loss: 0.9669236540794373,grad_norm: 0.9240304767281333, iteration: 271750
loss: 1.0001709461212158,grad_norm: 0.9999990078383382, iteration: 271751
loss: 1.0792778730392456,grad_norm: 0.7740913817646475, iteration: 271752
loss: 1.004306435585022,grad_norm: 0.8349994915476145, iteration: 271753
loss: 0.9996813535690308,grad_norm: 0.9999992562208972, iteration: 271754
loss: 1.0300509929656982,grad_norm: 0.8750624738223528, iteration: 271755
loss: 0.9853311777114868,grad_norm: 0.983910777411224, iteration: 271756
loss: 1.0156521797180176,grad_norm: 0.6716077113416912, iteration: 271757
loss: 1.040102481842041,grad_norm: 0.9999990930003675, iteration: 271758
loss: 0.9763651490211487,grad_norm: 0.9015243426403255, iteration: 271759
loss: 1.0021556615829468,grad_norm: 0.9999990073884144, iteration: 271760
loss: 1.0064342021942139,grad_norm: 0.8897588547110389, iteration: 271761
loss: 0.9916711449623108,grad_norm: 0.999999416540934, iteration: 271762
loss: 1.018420696258545,grad_norm: 0.7634637882069932, iteration: 271763
loss: 1.0083885192871094,grad_norm: 0.9398773062653326, iteration: 271764
loss: 1.0031975507736206,grad_norm: 0.9999996969815833, iteration: 271765
loss: 1.022004246711731,grad_norm: 0.9506642803492542, iteration: 271766
loss: 0.9953109622001648,grad_norm: 0.985592495737725, iteration: 271767
loss: 0.9871086478233337,grad_norm: 0.9278036100979096, iteration: 271768
loss: 1.0648211240768433,grad_norm: 0.9999993879149514, iteration: 271769
loss: 1.0260696411132812,grad_norm: 0.8546150922272241, iteration: 271770
loss: 1.0721062421798706,grad_norm: 0.9827301953098099, iteration: 271771
loss: 1.023136019706726,grad_norm: 0.9056455750222824, iteration: 271772
loss: 1.0314606428146362,grad_norm: 0.9999992328940837, iteration: 271773
loss: 0.9517881274223328,grad_norm: 0.9047131585228874, iteration: 271774
loss: 1.0001821517944336,grad_norm: 0.9686336494356864, iteration: 271775
loss: 0.9939977526664734,grad_norm: 0.8635029142733059, iteration: 271776
loss: 1.0437700748443604,grad_norm: 0.9979327216630535, iteration: 271777
loss: 0.9813476204872131,grad_norm: 0.8279111786872879, iteration: 271778
loss: 1.0402100086212158,grad_norm: 0.7688371797814412, iteration: 271779
loss: 1.0116029977798462,grad_norm: 0.839859432555951, iteration: 271780
loss: 0.970272958278656,grad_norm: 0.872310922042531, iteration: 271781
loss: 1.0055651664733887,grad_norm: 0.7953889176517189, iteration: 271782
loss: 1.0175896883010864,grad_norm: 0.99999894170381, iteration: 271783
loss: 1.0004621744155884,grad_norm: 0.9845708695088428, iteration: 271784
loss: 0.9967775940895081,grad_norm: 0.9999991041308699, iteration: 271785
loss: 0.9672317504882812,grad_norm: 0.9999992085398536, iteration: 271786
loss: 1.000800371170044,grad_norm: 0.8854097944909951, iteration: 271787
loss: 0.9827494025230408,grad_norm: 0.8198099254330478, iteration: 271788
loss: 0.9851590991020203,grad_norm: 0.9579951408037123, iteration: 271789
loss: 1.0333002805709839,grad_norm: 0.9999991357719161, iteration: 271790
loss: 0.9712724685668945,grad_norm: 0.7535947791733426, iteration: 271791
loss: 1.0244126319885254,grad_norm: 0.9999989702348319, iteration: 271792
loss: 0.9980258941650391,grad_norm: 0.9550977827190479, iteration: 271793
loss: 1.0045216083526611,grad_norm: 0.786655169888168, iteration: 271794
loss: 0.9978470802307129,grad_norm: 0.7697843568909202, iteration: 271795
loss: 0.9666294455528259,grad_norm: 0.7454682668844992, iteration: 271796
loss: 1.0395163297653198,grad_norm: 0.8413360736927696, iteration: 271797
loss: 1.0119577646255493,grad_norm: 0.7021427147847049, iteration: 271798
loss: 1.0041718482971191,grad_norm: 0.8127987636892043, iteration: 271799
loss: 0.9841263890266418,grad_norm: 0.9354756384128222, iteration: 271800
loss: 0.9778631329536438,grad_norm: 0.9820922991803479, iteration: 271801
loss: 1.0175025463104248,grad_norm: 0.8391615948100807, iteration: 271802
loss: 1.000756859779358,grad_norm: 0.9999990584965309, iteration: 271803
loss: 1.0030403137207031,grad_norm: 0.7082044876359933, iteration: 271804
loss: 0.992456316947937,grad_norm: 0.8905524485044596, iteration: 271805
loss: 1.0595811605453491,grad_norm: 0.9999990962845992, iteration: 271806
loss: 1.0181008577346802,grad_norm: 0.8300599710408497, iteration: 271807
loss: 0.9544574618339539,grad_norm: 0.9999992149759315, iteration: 271808
loss: 0.9798517823219299,grad_norm: 0.7828608517342031, iteration: 271809
loss: 0.980238676071167,grad_norm: 0.8103099356381579, iteration: 271810
loss: 1.00799560546875,grad_norm: 0.9412856422157186, iteration: 271811
loss: 0.9859732985496521,grad_norm: 0.812662020158486, iteration: 271812
loss: 1.0400032997131348,grad_norm: 0.8662260851218716, iteration: 271813
loss: 1.0084213018417358,grad_norm: 0.8228351678454285, iteration: 271814
loss: 1.0166703462600708,grad_norm: 0.8177276024103871, iteration: 271815
loss: 0.9969727993011475,grad_norm: 0.7964720920516485, iteration: 271816
loss: 0.9705013632774353,grad_norm: 0.9400367954935283, iteration: 271817
loss: 1.0078885555267334,grad_norm: 0.8850231873555732, iteration: 271818
loss: 1.010398507118225,grad_norm: 0.8696756708061415, iteration: 271819
loss: 0.9838783144950867,grad_norm: 0.7907918414761043, iteration: 271820
loss: 0.9995114803314209,grad_norm: 0.7787199420815567, iteration: 271821
loss: 0.9970722198486328,grad_norm: 0.7923399919090919, iteration: 271822
loss: 1.0111982822418213,grad_norm: 0.8849928775796952, iteration: 271823
loss: 0.9682836532592773,grad_norm: 0.8512780273850533, iteration: 271824
loss: 0.9618358016014099,grad_norm: 0.83985196912817, iteration: 271825
loss: 0.982170581817627,grad_norm: 0.9999991502477774, iteration: 271826
loss: 1.003642201423645,grad_norm: 0.8105944295470321, iteration: 271827
loss: 0.9950233101844788,grad_norm: 0.7753549032741416, iteration: 271828
loss: 1.0598411560058594,grad_norm: 0.9999991932033421, iteration: 271829
loss: 0.9946931600570679,grad_norm: 0.8922664720174505, iteration: 271830
loss: 1.0040606260299683,grad_norm: 0.9319454743231476, iteration: 271831
loss: 1.0016136169433594,grad_norm: 0.8069027067775585, iteration: 271832
loss: 1.0033607482910156,grad_norm: 0.889896950915403, iteration: 271833
loss: 0.9868676662445068,grad_norm: 0.84067789732876, iteration: 271834
loss: 1.018310546875,grad_norm: 0.8966038675056855, iteration: 271835
loss: 1.0057506561279297,grad_norm: 0.9999990758939852, iteration: 271836
loss: 1.019157886505127,grad_norm: 0.8227508663360122, iteration: 271837
loss: 1.0606871843338013,grad_norm: 0.9384084242389368, iteration: 271838
loss: 1.0211049318313599,grad_norm: 0.9287235791695365, iteration: 271839
loss: 0.9704349637031555,grad_norm: 0.8825623365493184, iteration: 271840
loss: 0.9982739090919495,grad_norm: 0.999999183659176, iteration: 271841
loss: 1.0112377405166626,grad_norm: 0.9999990345773518, iteration: 271842
loss: 0.9718182682991028,grad_norm: 0.9999990094624116, iteration: 271843
loss: 0.9507163166999817,grad_norm: 0.9036728229739514, iteration: 271844
loss: 1.021736741065979,grad_norm: 0.999999768744826, iteration: 271845
loss: 1.0278254747390747,grad_norm: 0.8939236566881791, iteration: 271846
loss: 1.0056977272033691,grad_norm: 0.9999989691073912, iteration: 271847
loss: 1.0100761651992798,grad_norm: 0.8234735276120774, iteration: 271848
loss: 1.0618364810943604,grad_norm: 0.9999991401001795, iteration: 271849
loss: 1.031562089920044,grad_norm: 0.9066357246099908, iteration: 271850
loss: 0.9862995147705078,grad_norm: 0.7703734681684843, iteration: 271851
loss: 0.997198760509491,grad_norm: 0.9014570973184073, iteration: 271852
loss: 1.0177336931228638,grad_norm: 0.9999989522265691, iteration: 271853
loss: 0.9957242012023926,grad_norm: 0.9999992147873035, iteration: 271854
loss: 1.0040286779403687,grad_norm: 0.9587917448948388, iteration: 271855
loss: 1.0382182598114014,grad_norm: 0.8653331595700987, iteration: 271856
loss: 1.0720094442367554,grad_norm: 0.9999990671066473, iteration: 271857
loss: 1.0018516778945923,grad_norm: 0.9999991677445016, iteration: 271858
loss: 0.9800736308097839,grad_norm: 0.6905378405979674, iteration: 271859
loss: 1.0009657144546509,grad_norm: 0.8691895661234784, iteration: 271860
loss: 0.9865269660949707,grad_norm: 0.9819520123370779, iteration: 271861
loss: 1.0106381177902222,grad_norm: 0.9327064066375932, iteration: 271862
loss: 0.9953112006187439,grad_norm: 0.9999992496825747, iteration: 271863
loss: 1.024382472038269,grad_norm: 0.8984122985711792, iteration: 271864
loss: 0.9807859659194946,grad_norm: 0.8517052440679811, iteration: 271865
loss: 0.983491837978363,grad_norm: 0.8208599173556121, iteration: 271866
loss: 1.0087779760360718,grad_norm: 0.7428888919331297, iteration: 271867
loss: 1.0210916996002197,grad_norm: 0.9990725854319621, iteration: 271868
loss: 1.030251383781433,grad_norm: 0.9485707806949689, iteration: 271869
loss: 1.0009124279022217,grad_norm: 0.6784827170669292, iteration: 271870
loss: 0.977412760257721,grad_norm: 0.9999990965542284, iteration: 271871
loss: 1.0020158290863037,grad_norm: 0.8076257785076038, iteration: 271872
loss: 0.9874604344367981,grad_norm: 0.753870303806795, iteration: 271873
loss: 1.0697108507156372,grad_norm: 0.8234748835551539, iteration: 271874
loss: 0.9807541966438293,grad_norm: 0.9999996856703237, iteration: 271875
loss: 0.9969420433044434,grad_norm: 0.999998987880195, iteration: 271876
loss: 0.9943299293518066,grad_norm: 0.9999989511923956, iteration: 271877
loss: 0.9987056851387024,grad_norm: 0.9403498628713816, iteration: 271878
loss: 1.0074894428253174,grad_norm: 0.8945699485478124, iteration: 271879
loss: 0.9668622016906738,grad_norm: 0.8352477086230705, iteration: 271880
loss: 0.992431104183197,grad_norm: 0.9270083883814173, iteration: 271881
loss: 0.9958487749099731,grad_norm: 0.8675779299331167, iteration: 271882
loss: 0.9879379272460938,grad_norm: 0.7856677418288578, iteration: 271883
loss: 0.9731648564338684,grad_norm: 0.8274051154722837, iteration: 271884
loss: 1.0077248811721802,grad_norm: 0.8545937428807397, iteration: 271885
loss: 1.0310465097427368,grad_norm: 0.9999991748805755, iteration: 271886
loss: 1.0565214157104492,grad_norm: 0.9509489477967935, iteration: 271887
loss: 0.9999164938926697,grad_norm: 0.9032865185228734, iteration: 271888
loss: 0.9930397868156433,grad_norm: 0.9999991764080558, iteration: 271889
loss: 0.9961855411529541,grad_norm: 0.8364434761266885, iteration: 271890
loss: 1.01685631275177,grad_norm: 0.7245649383790151, iteration: 271891
loss: 1.049764633178711,grad_norm: 0.968568047817904, iteration: 271892
loss: 1.0088783502578735,grad_norm: 0.8455219981405065, iteration: 271893
loss: 1.001218557357788,grad_norm: 0.8163899886344265, iteration: 271894
loss: 1.004019021987915,grad_norm: 0.9999990961610168, iteration: 271895
loss: 1.0123262405395508,grad_norm: 0.90645465942589, iteration: 271896
loss: 0.9874613285064697,grad_norm: 0.9999997210405732, iteration: 271897
loss: 0.9728376269340515,grad_norm: 0.868567900479309, iteration: 271898
loss: 1.0563393831253052,grad_norm: 0.9535620488838247, iteration: 271899
loss: 0.9615929126739502,grad_norm: 0.9389609265669597, iteration: 271900
loss: 0.9834244847297668,grad_norm: 0.8163508065362974, iteration: 271901
loss: 0.9957031011581421,grad_norm: 0.8518894690063045, iteration: 271902
loss: 1.0408920049667358,grad_norm: 0.7858267678713804, iteration: 271903
loss: 1.0426793098449707,grad_norm: 0.8728125105062309, iteration: 271904
loss: 1.0357459783554077,grad_norm: 0.9999990883083466, iteration: 271905
loss: 1.0261791944503784,grad_norm: 0.9577991028500026, iteration: 271906
loss: 1.0288922786712646,grad_norm: 0.9999989971789304, iteration: 271907
loss: 1.0230857133865356,grad_norm: 0.9069393608934616, iteration: 271908
loss: 1.0312907695770264,grad_norm: 0.8503049353589774, iteration: 271909
loss: 0.9992746114730835,grad_norm: 0.8094422748846499, iteration: 271910
loss: 0.9643256068229675,grad_norm: 0.9181720330441274, iteration: 271911
loss: 0.9718621373176575,grad_norm: 0.8224275635192092, iteration: 271912
loss: 1.0082125663757324,grad_norm: 0.9999990278342225, iteration: 271913
loss: 0.9882773756980896,grad_norm: 0.9513464392514881, iteration: 271914
loss: 0.989926815032959,grad_norm: 0.8289777835538622, iteration: 271915
loss: 0.9953981041908264,grad_norm: 0.7649670562775386, iteration: 271916
loss: 1.0161906480789185,grad_norm: 0.8288932378232179, iteration: 271917
loss: 1.0920236110687256,grad_norm: 0.9999998638060429, iteration: 271918
loss: 1.0031847953796387,grad_norm: 0.797298260874296, iteration: 271919
loss: 0.9874984622001648,grad_norm: 0.8738973592861387, iteration: 271920
loss: 0.9762932062149048,grad_norm: 0.9707061730215041, iteration: 271921
loss: 0.9860392212867737,grad_norm: 0.9774832220785599, iteration: 271922
loss: 0.9869073629379272,grad_norm: 0.864569093877518, iteration: 271923
loss: 0.9778786301612854,grad_norm: 0.8486941288760771, iteration: 271924
loss: 1.0109984874725342,grad_norm: 0.8088057249032455, iteration: 271925
loss: 0.9908386468887329,grad_norm: 0.7852723180674217, iteration: 271926
loss: 0.9797983169555664,grad_norm: 0.9379234838448295, iteration: 271927
loss: 0.9636675119400024,grad_norm: 0.9999990341642034, iteration: 271928
loss: 1.0107488632202148,grad_norm: 0.8876305323612744, iteration: 271929
loss: 0.9701516628265381,grad_norm: 0.9999992951302406, iteration: 271930
loss: 1.019352674484253,grad_norm: 0.8607936038056159, iteration: 271931
loss: 1.0160963535308838,grad_norm: 0.9534204224758651, iteration: 271932
loss: 1.035847544670105,grad_norm: 0.902968060903798, iteration: 271933
loss: 0.9803286194801331,grad_norm: 0.771542258014022, iteration: 271934
loss: 0.9936158657073975,grad_norm: 0.7001918487643176, iteration: 271935
loss: 1.0105735063552856,grad_norm: 0.8610118282675567, iteration: 271936
loss: 0.9630545377731323,grad_norm: 0.9949949607952658, iteration: 271937
loss: 0.9889205098152161,grad_norm: 0.9999989948497688, iteration: 271938
loss: 1.0352473258972168,grad_norm: 0.8656786782316279, iteration: 271939
loss: 0.9929231405258179,grad_norm: 0.8676176884930049, iteration: 271940
loss: 1.0325746536254883,grad_norm: 0.8309404637180314, iteration: 271941
loss: 1.03213369846344,grad_norm: 0.7793659782094813, iteration: 271942
loss: 1.0213278532028198,grad_norm: 0.8723838931863417, iteration: 271943
loss: 1.003896713256836,grad_norm: 0.9234492608063581, iteration: 271944
loss: 0.9755439162254333,grad_norm: 0.8132670032652635, iteration: 271945
loss: 1.0111452341079712,grad_norm: 0.9999990017390766, iteration: 271946
loss: 0.9649521112442017,grad_norm: 0.8429131906854083, iteration: 271947
loss: 1.0054042339324951,grad_norm: 0.9999992258582362, iteration: 271948
loss: 1.0233545303344727,grad_norm: 0.9999997360607581, iteration: 271949
loss: 1.0037202835083008,grad_norm: 0.9005011563733114, iteration: 271950
loss: 1.015854001045227,grad_norm: 0.8033902496355796, iteration: 271951
loss: 0.9651660323143005,grad_norm: 0.9999990200434355, iteration: 271952
loss: 0.9822180867195129,grad_norm: 0.8058307513257021, iteration: 271953
loss: 1.0530165433883667,grad_norm: 0.9999998667296358, iteration: 271954
loss: 1.0217934846878052,grad_norm: 0.8686283817785013, iteration: 271955
loss: 0.9629096388816833,grad_norm: 0.9104050627888176, iteration: 271956
loss: 0.9617852568626404,grad_norm: 0.7827768091391334, iteration: 271957
loss: 0.964958667755127,grad_norm: 0.8919230270218758, iteration: 271958
loss: 0.9888186454772949,grad_norm: 0.9246946128718329, iteration: 271959
loss: 1.041925072669983,grad_norm: 0.9999990157776758, iteration: 271960
loss: 0.9953075647354126,grad_norm: 0.9231985798855715, iteration: 271961
loss: 1.0515317916870117,grad_norm: 0.9999991542152338, iteration: 271962
loss: 1.0119915008544922,grad_norm: 0.9687121954402665, iteration: 271963
loss: 1.019144058227539,grad_norm: 0.8730509778307424, iteration: 271964
loss: 1.0216127634048462,grad_norm: 0.863297090893765, iteration: 271965
loss: 0.9894336462020874,grad_norm: 0.8989434859371517, iteration: 271966
loss: 0.9963059425354004,grad_norm: 0.9911350548753038, iteration: 271967
loss: 1.0046435594558716,grad_norm: 0.8300463266498518, iteration: 271968
loss: 0.9902514219284058,grad_norm: 0.8113033926663551, iteration: 271969
loss: 0.9979413151741028,grad_norm: 0.8605943257161247, iteration: 271970
loss: 0.9935435652732849,grad_norm: 0.8106479765885874, iteration: 271971
loss: 1.0230810642242432,grad_norm: 0.8809391683568445, iteration: 271972
loss: 0.9700024724006653,grad_norm: 0.9999992076962633, iteration: 271973
loss: 1.0506978034973145,grad_norm: 0.9999993490247802, iteration: 271974
loss: 1.017281413078308,grad_norm: 0.74914894094162, iteration: 271975
loss: 1.0125575065612793,grad_norm: 0.7822496727239336, iteration: 271976
loss: 0.9936351180076599,grad_norm: 0.9999992615402231, iteration: 271977
loss: 0.9856289029121399,grad_norm: 0.7701675391762532, iteration: 271978
loss: 1.0030462741851807,grad_norm: 0.9702243252037834, iteration: 271979
loss: 1.0485590696334839,grad_norm: 0.9999997365177822, iteration: 271980
loss: 1.0164896249771118,grad_norm: 0.9999993395234087, iteration: 271981
loss: 0.9806056022644043,grad_norm: 0.7843057751954928, iteration: 271982
loss: 1.0153945684432983,grad_norm: 0.9143811607239261, iteration: 271983
loss: 0.9838573336601257,grad_norm: 0.7789948935635957, iteration: 271984
loss: 1.0228992700576782,grad_norm: 0.8605710432825855, iteration: 271985
loss: 1.0162104368209839,grad_norm: 0.9999990685658544, iteration: 271986
loss: 1.011938452720642,grad_norm: 0.8811136212422339, iteration: 271987
loss: 1.0243068933486938,grad_norm: 0.7046468430326331, iteration: 271988
loss: 1.0269654989242554,grad_norm: 0.9305536942232001, iteration: 271989
loss: 1.0011128187179565,grad_norm: 0.9999993536098666, iteration: 271990
loss: 0.9982277750968933,grad_norm: 0.7447905841225397, iteration: 271991
loss: 0.9952266216278076,grad_norm: 0.9999991306006459, iteration: 271992
loss: 0.9628078937530518,grad_norm: 0.9113139678501564, iteration: 271993
loss: 0.9805319309234619,grad_norm: 0.9393479679494677, iteration: 271994
loss: 0.9993204474449158,grad_norm: 0.9118602401224861, iteration: 271995
loss: 1.0084573030471802,grad_norm: 0.8929199691280657, iteration: 271996
loss: 1.0138392448425293,grad_norm: 0.7715505112934871, iteration: 271997
loss: 0.9767438769340515,grad_norm: 0.7454949268265237, iteration: 271998
loss: 0.9839841723442078,grad_norm: 0.7836008098480881, iteration: 271999
loss: 0.9993792176246643,grad_norm: 0.802947772301608, iteration: 272000
loss: 0.960260272026062,grad_norm: 0.7150964520319678, iteration: 272001
loss: 0.9700285792350769,grad_norm: 0.9999992310216972, iteration: 272002
loss: 0.991905689239502,grad_norm: 0.914363431902762, iteration: 272003
loss: 1.0146371126174927,grad_norm: 0.8005578276267373, iteration: 272004
loss: 1.0247137546539307,grad_norm: 0.911030179324364, iteration: 272005
loss: 0.9705879092216492,grad_norm: 0.7953092009514566, iteration: 272006
loss: 0.994265079498291,grad_norm: 0.8961952707797425, iteration: 272007
loss: 1.0204992294311523,grad_norm: 0.7574091995455433, iteration: 272008
loss: 1.0057183504104614,grad_norm: 0.9495613885414218, iteration: 272009
loss: 1.0015348196029663,grad_norm: 0.7675805155628144, iteration: 272010
loss: 1.0013476610183716,grad_norm: 0.8262719886106373, iteration: 272011
loss: 1.0455554723739624,grad_norm: 0.9999996102285335, iteration: 272012
loss: 1.037126898765564,grad_norm: 0.9999998463572507, iteration: 272013
loss: 1.0009161233901978,grad_norm: 0.9461269892322814, iteration: 272014
loss: 1.0137684345245361,grad_norm: 0.9999990553592254, iteration: 272015
loss: 0.9788593053817749,grad_norm: 0.8739837074516336, iteration: 272016
loss: 1.0042321681976318,grad_norm: 0.8976782208210426, iteration: 272017
loss: 1.0108070373535156,grad_norm: 0.7214664729927731, iteration: 272018
loss: 0.9983434081077576,grad_norm: 0.9999995352567339, iteration: 272019
loss: 1.0114197731018066,grad_norm: 0.8217315902698392, iteration: 272020
loss: 0.9451181292533875,grad_norm: 0.9999991780193013, iteration: 272021
loss: 0.9714433550834656,grad_norm: 0.9326497278590034, iteration: 272022
loss: 0.9966772794723511,grad_norm: 0.9461147053727138, iteration: 272023
loss: 0.9891595840454102,grad_norm: 0.7514522867818054, iteration: 272024
loss: 1.0110670328140259,grad_norm: 0.8412220112909606, iteration: 272025
loss: 1.0186342000961304,grad_norm: 0.99999978772341, iteration: 272026
loss: 1.0141059160232544,grad_norm: 0.7822817012788742, iteration: 272027
loss: 0.9837918877601624,grad_norm: 0.9075359454547468, iteration: 272028
loss: 0.9726598858833313,grad_norm: 0.9999999409513228, iteration: 272029
loss: 1.0056875944137573,grad_norm: 0.7818108221564305, iteration: 272030
loss: 1.0077494382858276,grad_norm: 0.80680659329746, iteration: 272031
loss: 1.0230525732040405,grad_norm: 0.9999990952701681, iteration: 272032
loss: 0.9684579968452454,grad_norm: 0.7883172280444904, iteration: 272033
loss: 0.995581328868866,grad_norm: 0.9421926483572075, iteration: 272034
loss: 0.9900004267692566,grad_norm: 0.7184066152714846, iteration: 272035
loss: 0.9918184876441956,grad_norm: 0.9193093422808442, iteration: 272036
loss: 1.0293954610824585,grad_norm: 0.7640966816907869, iteration: 272037
loss: 0.9686629176139832,grad_norm: 0.9075114501885705, iteration: 272038
loss: 1.0220273733139038,grad_norm: 0.8349818972991461, iteration: 272039
loss: 0.986717939376831,grad_norm: 0.9478882878350626, iteration: 272040
loss: 0.993938684463501,grad_norm: 0.9999990029003928, iteration: 272041
loss: 0.9273874759674072,grad_norm: 0.8750927920144828, iteration: 272042
loss: 1.0065010786056519,grad_norm: 0.9499400338282977, iteration: 272043
loss: 1.0185916423797607,grad_norm: 0.9527757098813543, iteration: 272044
loss: 0.9740660190582275,grad_norm: 0.7643403247930897, iteration: 272045
loss: 1.00309157371521,grad_norm: 0.9999991118408591, iteration: 272046
loss: 0.9607985019683838,grad_norm: 0.7906739441022905, iteration: 272047
loss: 1.0366777181625366,grad_norm: 0.9999991346235543, iteration: 272048
loss: 0.9610171914100647,grad_norm: 0.9334146384054411, iteration: 272049
loss: 1.016018033027649,grad_norm: 0.9999992951557004, iteration: 272050
loss: 1.006257176399231,grad_norm: 0.8644965809631894, iteration: 272051
loss: 0.9573692679405212,grad_norm: 0.9999991180914711, iteration: 272052
loss: 1.1760214567184448,grad_norm: 0.9999995285012448, iteration: 272053
loss: 1.0327094793319702,grad_norm: 0.8516952718205303, iteration: 272054
loss: 1.014709711074829,grad_norm: 0.9483684045139096, iteration: 272055
loss: 1.0197476148605347,grad_norm: 0.8089044831379123, iteration: 272056
loss: 1.2083346843719482,grad_norm: 0.9999999299006055, iteration: 272057
loss: 1.0123748779296875,grad_norm: 0.7574052469395809, iteration: 272058
loss: 0.9894086718559265,grad_norm: 0.8482881988316232, iteration: 272059
loss: 0.97087162733078,grad_norm: 0.9318019092143417, iteration: 272060
loss: 1.0513733625411987,grad_norm: 0.7494063851121774, iteration: 272061
loss: 1.015605092048645,grad_norm: 0.9554719048800423, iteration: 272062
loss: 1.006348967552185,grad_norm: 0.9291022907094796, iteration: 272063
loss: 1.0024664402008057,grad_norm: 0.9777683640546774, iteration: 272064
loss: 1.008124589920044,grad_norm: 0.9380991198561264, iteration: 272065
loss: 1.030220866203308,grad_norm: 0.9668311998935071, iteration: 272066
loss: 1.0272035598754883,grad_norm: 0.9999990810957373, iteration: 272067
loss: 0.9803310632705688,grad_norm: 0.9196012403534243, iteration: 272068
loss: 1.0515094995498657,grad_norm: 0.9999989013097004, iteration: 272069
loss: 1.0033382177352905,grad_norm: 0.8354637156818178, iteration: 272070
loss: 1.0075863599777222,grad_norm: 0.8561130244592463, iteration: 272071
loss: 0.9979193806648254,grad_norm: 0.8818017879762349, iteration: 272072
loss: 0.9867514371871948,grad_norm: 0.9275122264379186, iteration: 272073
loss: 0.958443284034729,grad_norm: 0.9999998208771528, iteration: 272074
loss: 1.0167462825775146,grad_norm: 0.9999991510180715, iteration: 272075
loss: 1.0029175281524658,grad_norm: 0.7816224418839417, iteration: 272076
loss: 1.0013641119003296,grad_norm: 0.8132249519401568, iteration: 272077
loss: 0.998084306716919,grad_norm: 0.8861091368366117, iteration: 272078
loss: 1.0712699890136719,grad_norm: 0.9751203843400108, iteration: 272079
loss: 0.9950665831565857,grad_norm: 0.9735489571106147, iteration: 272080
loss: 1.0948584079742432,grad_norm: 0.9999990828471365, iteration: 272081
loss: 1.0053153038024902,grad_norm: 0.9053506501399644, iteration: 272082
loss: 1.097027063369751,grad_norm: 0.9101331611255684, iteration: 272083
loss: 1.0101847648620605,grad_norm: 0.770297931817114, iteration: 272084
loss: 1.0180221796035767,grad_norm: 0.9002328330527423, iteration: 272085
loss: 1.0049551725387573,grad_norm: 0.9999998639677062, iteration: 272086
loss: 0.9728151559829712,grad_norm: 0.8918614647247621, iteration: 272087
loss: 0.9983044862747192,grad_norm: 0.8998700207442039, iteration: 272088
loss: 1.0004268884658813,grad_norm: 0.9888099645765018, iteration: 272089
loss: 0.997163712978363,grad_norm: 0.8453347135935713, iteration: 272090
loss: 1.006264090538025,grad_norm: 0.8831975719196213, iteration: 272091
loss: 1.0653092861175537,grad_norm: 0.9634957529994481, iteration: 272092
loss: 1.0463060140609741,grad_norm: 0.9999989122721227, iteration: 272093
loss: 1.0035264492034912,grad_norm: 0.849038741258557, iteration: 272094
loss: 1.0636818408966064,grad_norm: 0.9153197327482729, iteration: 272095
loss: 1.1005477905273438,grad_norm: 0.7982149684370214, iteration: 272096
loss: 1.05413019657135,grad_norm: 0.7256433985394578, iteration: 272097
loss: 0.9617434740066528,grad_norm: 0.9999993784422427, iteration: 272098
loss: 0.9750363826751709,grad_norm: 0.9714511152715481, iteration: 272099
loss: 0.9951237440109253,grad_norm: 0.7364028226186359, iteration: 272100
loss: 0.9760841727256775,grad_norm: 0.883044818839695, iteration: 272101
loss: 1.2358250617980957,grad_norm: 0.9999990469106539, iteration: 272102
loss: 0.9974124431610107,grad_norm: 0.813205364608393, iteration: 272103
loss: 0.9828688502311707,grad_norm: 0.9183613742568, iteration: 272104
loss: 0.9800089001655579,grad_norm: 0.9999990646173266, iteration: 272105
loss: 1.1608470678329468,grad_norm: 0.9999992098493117, iteration: 272106
loss: 1.0167237520217896,grad_norm: 0.9882761956557948, iteration: 272107
loss: 1.1106841564178467,grad_norm: 0.9654938779702105, iteration: 272108
loss: 1.045919418334961,grad_norm: 0.9999998533335106, iteration: 272109
loss: 0.9544025659561157,grad_norm: 0.8330239274471033, iteration: 272110
loss: 1.007149577140808,grad_norm: 0.9258327735459339, iteration: 272111
loss: 1.0057052373886108,grad_norm: 0.9011326404066125, iteration: 272112
loss: 0.9927014708518982,grad_norm: 0.8677319380913225, iteration: 272113
loss: 1.0016072988510132,grad_norm: 0.8302797002353401, iteration: 272114
loss: 0.9762043356895447,grad_norm: 0.8603337573935422, iteration: 272115
loss: 1.030806541442871,grad_norm: 0.9041440993363207, iteration: 272116
loss: 0.995827853679657,grad_norm: 0.8733883984374055, iteration: 272117
loss: 1.0146760940551758,grad_norm: 0.9447790346859243, iteration: 272118
loss: 1.0518003702163696,grad_norm: 0.8743947331145137, iteration: 272119
loss: 1.0528377294540405,grad_norm: 0.9999990883479277, iteration: 272120
loss: 1.0405877828598022,grad_norm: 0.8256923101948205, iteration: 272121
loss: 0.9975234866142273,grad_norm: 0.7960365218872472, iteration: 272122
loss: 1.0219700336456299,grad_norm: 0.7739435641915485, iteration: 272123
loss: 0.9997339248657227,grad_norm: 0.8530025904432533, iteration: 272124
loss: 0.9729828834533691,grad_norm: 0.861291731430969, iteration: 272125
loss: 0.9964996576309204,grad_norm: 0.8014280977831115, iteration: 272126
loss: 1.0608595609664917,grad_norm: 0.9469411266058125, iteration: 272127
loss: 1.0266531705856323,grad_norm: 0.7551056338736809, iteration: 272128
loss: 0.9982384443283081,grad_norm: 0.9999992272145645, iteration: 272129
loss: 0.9834319949150085,grad_norm: 0.8558009131116316, iteration: 272130
loss: 1.0512179136276245,grad_norm: 0.7750620794212586, iteration: 272131
loss: 0.9762561321258545,grad_norm: 0.9999995424012997, iteration: 272132
loss: 0.9909868240356445,grad_norm: 0.9527044328778816, iteration: 272133
loss: 0.9964441061019897,grad_norm: 0.7530126761570719, iteration: 272134
loss: 1.055853247642517,grad_norm: 0.99999922289234, iteration: 272135
loss: 0.9902820587158203,grad_norm: 0.8567651960821023, iteration: 272136
loss: 0.9744149446487427,grad_norm: 0.8468007056214145, iteration: 272137
loss: 1.0006319284439087,grad_norm: 0.9999993305974076, iteration: 272138
loss: 0.9806957244873047,grad_norm: 0.8412702480620021, iteration: 272139
loss: 0.9902520775794983,grad_norm: 0.6871576238756282, iteration: 272140
loss: 1.0016651153564453,grad_norm: 0.9106962811688598, iteration: 272141
loss: 1.009097695350647,grad_norm: 0.9257525380558757, iteration: 272142
loss: 1.005602240562439,grad_norm: 0.9999991266730387, iteration: 272143
loss: 0.9948817491531372,grad_norm: 0.9999989604733063, iteration: 272144
loss: 1.0031635761260986,grad_norm: 0.9204858456565104, iteration: 272145
loss: 1.0455968379974365,grad_norm: 0.9999998653885306, iteration: 272146
loss: 1.039690613746643,grad_norm: 0.926101769175211, iteration: 272147
loss: 0.999975860118866,grad_norm: 0.785098766730817, iteration: 272148
loss: 0.9903002381324768,grad_norm: 0.9999999343209759, iteration: 272149
loss: 0.9829495549201965,grad_norm: 0.8892253852752683, iteration: 272150
loss: 0.9821321368217468,grad_norm: 0.7853952862798734, iteration: 272151
loss: 1.0040780305862427,grad_norm: 0.8189190201573253, iteration: 272152
loss: 1.012012004852295,grad_norm: 0.9999993339469347, iteration: 272153
loss: 1.0063949823379517,grad_norm: 0.9999992328391673, iteration: 272154
loss: 1.0099462270736694,grad_norm: 0.9738430718293187, iteration: 272155
loss: 0.9950084686279297,grad_norm: 0.918120546006949, iteration: 272156
loss: 1.0232418775558472,grad_norm: 0.9232525977779793, iteration: 272157
loss: 1.0223848819732666,grad_norm: 0.9738332564410552, iteration: 272158
loss: 1.0994888544082642,grad_norm: 0.9999999134554448, iteration: 272159
loss: 1.0081533193588257,grad_norm: 0.8185018742386926, iteration: 272160
loss: 1.0129625797271729,grad_norm: 0.9450832372989384, iteration: 272161
loss: 1.0102653503417969,grad_norm: 0.9999991548616666, iteration: 272162
loss: 0.9735201001167297,grad_norm: 0.8401563986359564, iteration: 272163
loss: 1.0621380805969238,grad_norm: 0.9999991903477913, iteration: 272164
loss: 0.9933184385299683,grad_norm: 0.7631594771244292, iteration: 272165
loss: 1.0220850706100464,grad_norm: 0.8643307489405979, iteration: 272166
loss: 0.9879720211029053,grad_norm: 0.9171236276296465, iteration: 272167
loss: 1.0194658041000366,grad_norm: 0.9999992157169573, iteration: 272168
loss: 0.9803974032402039,grad_norm: 0.727256653400409, iteration: 272169
loss: 0.9818264245986938,grad_norm: 0.74370939589121, iteration: 272170
loss: 1.045015573501587,grad_norm: 0.9517073562937196, iteration: 272171
loss: 1.0048606395721436,grad_norm: 0.7962830889388278, iteration: 272172
loss: 1.090192437171936,grad_norm: 0.9920210023840602, iteration: 272173
loss: 1.0051501989364624,grad_norm: 0.8642556512511528, iteration: 272174
loss: 0.9834383130073547,grad_norm: 0.9895745745379844, iteration: 272175
loss: 0.9733638763427734,grad_norm: 0.7035707602111545, iteration: 272176
loss: 1.018231987953186,grad_norm: 0.8056347112709376, iteration: 272177
loss: 0.9969977140426636,grad_norm: 0.8712803467116803, iteration: 272178
loss: 0.9736332297325134,grad_norm: 0.9691146027593209, iteration: 272179
loss: 1.076648235321045,grad_norm: 0.9999993176640023, iteration: 272180
loss: 0.9872928857803345,grad_norm: 0.8361946228639725, iteration: 272181
loss: 1.0281234979629517,grad_norm: 0.9999992045437229, iteration: 272182
loss: 1.0476371049880981,grad_norm: 0.8760481454099267, iteration: 272183
loss: 0.9881100058555603,grad_norm: 0.780644989484127, iteration: 272184
loss: 0.9914966821670532,grad_norm: 0.7899827821140442, iteration: 272185
loss: 0.9864787459373474,grad_norm: 0.8959283122192073, iteration: 272186
loss: 1.014803409576416,grad_norm: 0.9015184696379941, iteration: 272187
loss: 0.9955217838287354,grad_norm: 0.8504744345450316, iteration: 272188
loss: 1.001379370689392,grad_norm: 0.7840936717186766, iteration: 272189
loss: 1.0501645803451538,grad_norm: 0.8412410423743427, iteration: 272190
loss: 0.9687474370002747,grad_norm: 0.8155659184309769, iteration: 272191
loss: 1.0352972745895386,grad_norm: 0.9855522488560308, iteration: 272192
loss: 0.9922565221786499,grad_norm: 0.9999991768925922, iteration: 272193
loss: 0.9778063297271729,grad_norm: 0.8497155793164255, iteration: 272194
loss: 1.0165799856185913,grad_norm: 0.8298103884745669, iteration: 272195
loss: 0.9882329702377319,grad_norm: 0.9546225110918749, iteration: 272196
loss: 1.020655632019043,grad_norm: 0.6623417845129412, iteration: 272197
loss: 0.9863342642784119,grad_norm: 0.8125704717937514, iteration: 272198
loss: 1.0346518754959106,grad_norm: 0.9939741945948951, iteration: 272199
loss: 0.9899113178253174,grad_norm: 0.9215051847795419, iteration: 272200
loss: 1.0040956735610962,grad_norm: 0.8829321537499717, iteration: 272201
loss: 1.01486074924469,grad_norm: 0.9071253904041212, iteration: 272202
loss: 0.9831316471099854,grad_norm: 0.9919179331741378, iteration: 272203
loss: 0.9987632632255554,grad_norm: 0.9296806544537626, iteration: 272204
loss: 0.9718629121780396,grad_norm: 0.756319235501867, iteration: 272205
loss: 1.0678699016571045,grad_norm: 0.9999995051270373, iteration: 272206
loss: 0.999458372592926,grad_norm: 0.7200512026868628, iteration: 272207
loss: 0.9958526492118835,grad_norm: 0.8622124528380086, iteration: 272208
loss: 0.9917881488800049,grad_norm: 0.8723347359686702, iteration: 272209
loss: 0.9914330244064331,grad_norm: 0.920732193148211, iteration: 272210
loss: 0.9799252152442932,grad_norm: 0.8208629436616789, iteration: 272211
loss: 1.0139628648757935,grad_norm: 0.9999993280991293, iteration: 272212
loss: 1.0266183614730835,grad_norm: 0.9727606911218489, iteration: 272213
loss: 1.0326015949249268,grad_norm: 0.9999991546101408, iteration: 272214
loss: 1.0150777101516724,grad_norm: 0.9705265374463996, iteration: 272215
loss: 1.0165895223617554,grad_norm: 0.9879699321124827, iteration: 272216
loss: 0.9826838970184326,grad_norm: 0.7878863335773597, iteration: 272217
loss: 0.968617856502533,grad_norm: 0.9999990379763607, iteration: 272218
loss: 1.0082972049713135,grad_norm: 0.9999993751851173, iteration: 272219
loss: 0.97383052110672,grad_norm: 0.7979561542870176, iteration: 272220
loss: 1.01241135597229,grad_norm: 0.7832588764206875, iteration: 272221
loss: 0.9955608248710632,grad_norm: 0.872919540990601, iteration: 272222
loss: 1.0631605386734009,grad_norm: 0.9047205215017893, iteration: 272223
loss: 1.000587821006775,grad_norm: 0.82549211963688, iteration: 272224
loss: 1.0159907341003418,grad_norm: 0.849534876854652, iteration: 272225
loss: 0.9837381839752197,grad_norm: 0.8331042869092075, iteration: 272226
loss: 1.0312708616256714,grad_norm: 0.999998955916429, iteration: 272227
loss: 1.0602887868881226,grad_norm: 0.9999993201890848, iteration: 272228
loss: 1.0560075044631958,grad_norm: 0.8677218082225605, iteration: 272229
loss: 1.0030980110168457,grad_norm: 0.8613147462747852, iteration: 272230
loss: 1.0170331001281738,grad_norm: 0.9418546539968274, iteration: 272231
loss: 1.0352494716644287,grad_norm: 0.8839028698127912, iteration: 272232
loss: 0.9717477560043335,grad_norm: 0.9776461914901878, iteration: 272233
loss: 1.0258864164352417,grad_norm: 0.8195961405005067, iteration: 272234
loss: 0.9853733777999878,grad_norm: 0.9500299534135025, iteration: 272235
loss: 0.9895119667053223,grad_norm: 0.9999991460469232, iteration: 272236
loss: 0.982343852519989,grad_norm: 0.9999989688711253, iteration: 272237
loss: 1.033578872680664,grad_norm: 0.9999991677174694, iteration: 272238
loss: 0.9968616366386414,grad_norm: 0.9420088765416812, iteration: 272239
loss: 0.9775589108467102,grad_norm: 0.8231432759980545, iteration: 272240
loss: 1.0319229364395142,grad_norm: 0.9937563707795605, iteration: 272241
loss: 1.070469856262207,grad_norm: 0.8163436022543605, iteration: 272242
loss: 0.9866101741790771,grad_norm: 0.7674795670434554, iteration: 272243
loss: 1.0558663606643677,grad_norm: 0.7392447241954769, iteration: 272244
loss: 1.0487629175186157,grad_norm: 1.000000016323867, iteration: 272245
loss: 0.9968756437301636,grad_norm: 0.9163202799835786, iteration: 272246
loss: 1.0070347785949707,grad_norm: 0.9228250208209939, iteration: 272247
loss: 0.9715737700462341,grad_norm: 0.8472751423739611, iteration: 272248
loss: 1.0454219579696655,grad_norm: 0.8330669767739495, iteration: 272249
loss: 1.0878617763519287,grad_norm: 0.9337135548276579, iteration: 272250
loss: 0.9817246198654175,grad_norm: 0.9823202102796873, iteration: 272251
loss: 1.0140935182571411,grad_norm: 0.9680746441363215, iteration: 272252
loss: 0.9856047630310059,grad_norm: 0.9431394231125043, iteration: 272253
loss: 1.0449681282043457,grad_norm: 0.7539664629350493, iteration: 272254
loss: 1.0434257984161377,grad_norm: 0.9554697113428638, iteration: 272255
loss: 1.094191551208496,grad_norm: 0.9999995773075868, iteration: 272256
loss: 1.0279313325881958,grad_norm: 0.9965128863640845, iteration: 272257
loss: 1.0228049755096436,grad_norm: 0.8430357711521331, iteration: 272258
loss: 1.0709362030029297,grad_norm: 0.9113051795356885, iteration: 272259
loss: 1.0359715223312378,grad_norm: 0.9999999355239944, iteration: 272260
loss: 0.9849249124526978,grad_norm: 0.9999990139047977, iteration: 272261
loss: 1.0016649961471558,grad_norm: 0.9999990245094011, iteration: 272262
loss: 0.9902853965759277,grad_norm: 0.8883431806802372, iteration: 272263
loss: 0.9748954772949219,grad_norm: 0.9305647854566278, iteration: 272264
loss: 1.011162281036377,grad_norm: 0.8426138261056325, iteration: 272265
loss: 0.9848455190658569,grad_norm: 0.9999991327190472, iteration: 272266
loss: 1.1033580303192139,grad_norm: 0.9999992524346456, iteration: 272267
loss: 1.0293877124786377,grad_norm: 0.978288204162031, iteration: 272268
loss: 1.0430344343185425,grad_norm: 0.9999990667485003, iteration: 272269
loss: 1.0182911157608032,grad_norm: 0.7188304928931495, iteration: 272270
loss: 1.0127968788146973,grad_norm: 0.9355756263206825, iteration: 272271
loss: 1.0223312377929688,grad_norm: 0.8475893460292276, iteration: 272272
loss: 0.9996964931488037,grad_norm: 0.7904034725439092, iteration: 272273
loss: 1.0086079835891724,grad_norm: 0.8170902564567584, iteration: 272274
loss: 0.9949402213096619,grad_norm: 0.9999996805963328, iteration: 272275
loss: 0.9283132553100586,grad_norm: 0.9186082809486416, iteration: 272276
loss: 1.0367162227630615,grad_norm: 0.999999918071553, iteration: 272277
loss: 1.0199652910232544,grad_norm: 0.999999866208462, iteration: 272278
loss: 1.047014832496643,grad_norm: 0.8474266447743424, iteration: 272279
loss: 0.9876394867897034,grad_norm: 0.9147902219186853, iteration: 272280
loss: 1.032715916633606,grad_norm: 0.9449112802960609, iteration: 272281
loss: 1.012381672859192,grad_norm: 0.747555351769933, iteration: 272282
loss: 0.9816797971725464,grad_norm: 0.87961783927659, iteration: 272283
loss: 1.0268398523330688,grad_norm: 0.9999998658552479, iteration: 272284
loss: 1.0205936431884766,grad_norm: 0.9362628975871969, iteration: 272285
loss: 1.0272639989852905,grad_norm: 0.9999994248274087, iteration: 272286
loss: 1.0154856443405151,grad_norm: 0.8904354384552552, iteration: 272287
loss: 1.0177576541900635,grad_norm: 0.999999017588951, iteration: 272288
loss: 1.0345615148544312,grad_norm: 0.8677384506344613, iteration: 272289
loss: 1.0144171714782715,grad_norm: 0.9999991232314526, iteration: 272290
loss: 0.9660444855690002,grad_norm: 0.9969539732992768, iteration: 272291
loss: 1.0655956268310547,grad_norm: 0.9999991436389714, iteration: 272292
loss: 0.9983840584754944,grad_norm: 0.7199489328393569, iteration: 272293
loss: 0.9892697930335999,grad_norm: 0.8338401059306214, iteration: 272294
loss: 0.9803048372268677,grad_norm: 0.9251667549269482, iteration: 272295
loss: 1.043243646621704,grad_norm: 0.8105434366644202, iteration: 272296
loss: 1.0084736347198486,grad_norm: 0.999999541015845, iteration: 272297
loss: 0.977721095085144,grad_norm: 0.880030905493509, iteration: 272298
loss: 1.194766879081726,grad_norm: 0.9999990913846525, iteration: 272299
loss: 1.0397052764892578,grad_norm: 0.9999990099914727, iteration: 272300
loss: 1.022339940071106,grad_norm: 0.9999996526782335, iteration: 272301
loss: 0.9980896711349487,grad_norm: 0.8729950210885745, iteration: 272302
loss: 1.0428550243377686,grad_norm: 0.8339843900984116, iteration: 272303
loss: 1.000797986984253,grad_norm: 0.8848504527284045, iteration: 272304
loss: 1.1226060390472412,grad_norm: 0.9999992260774621, iteration: 272305
loss: 1.090886116027832,grad_norm: 0.999999232445425, iteration: 272306
loss: 1.0015182495117188,grad_norm: 0.846425619491234, iteration: 272307
loss: 1.000575065612793,grad_norm: 0.9999995659499032, iteration: 272308
loss: 1.1119085550308228,grad_norm: 0.9999995954943344, iteration: 272309
loss: 0.951988697052002,grad_norm: 0.845476510612315, iteration: 272310
loss: 1.080214023590088,grad_norm: 0.9999992601117927, iteration: 272311
loss: 1.0139522552490234,grad_norm: 0.9999992076581081, iteration: 272312
loss: 1.0985219478607178,grad_norm: 0.9999991562943288, iteration: 272313
loss: 1.1107484102249146,grad_norm: 0.999999335714947, iteration: 272314
loss: 1.148607611656189,grad_norm: 0.9999993069152041, iteration: 272315
loss: 0.9834690093994141,grad_norm: 0.8374938216536623, iteration: 272316
loss: 1.015802264213562,grad_norm: 0.9590740355565832, iteration: 272317
loss: 0.9926146864891052,grad_norm: 0.9520712555063271, iteration: 272318
loss: 1.139204978942871,grad_norm: 0.9999997305645997, iteration: 272319
loss: 1.0085079669952393,grad_norm: 0.9999994616295683, iteration: 272320
loss: 1.0083701610565186,grad_norm: 0.8446273710457328, iteration: 272321
loss: 0.9835790395736694,grad_norm: 0.8002269986939282, iteration: 272322
loss: 0.9677672982215881,grad_norm: 0.8608108283732838, iteration: 272323
loss: 0.9944981336593628,grad_norm: 0.9251261397610857, iteration: 272324
loss: 0.9583879709243774,grad_norm: 0.8061281198740816, iteration: 272325
loss: 1.002440333366394,grad_norm: 0.8557923195829603, iteration: 272326
loss: 1.050424575805664,grad_norm: 0.9999992568067932, iteration: 272327
loss: 1.215039610862732,grad_norm: 0.9999993016693156, iteration: 272328
loss: 1.031334638595581,grad_norm: 0.8604489975919695, iteration: 272329
loss: 1.0610615015029907,grad_norm: 0.9999996160537659, iteration: 272330
loss: 1.042018175125122,grad_norm: 0.825266437992398, iteration: 272331
loss: 1.1415303945541382,grad_norm: 0.9999991987489184, iteration: 272332
loss: 1.06489098072052,grad_norm: 0.8304023409324126, iteration: 272333
loss: 1.1309678554534912,grad_norm: 0.9999998616294072, iteration: 272334
loss: 0.9922400712966919,grad_norm: 0.9999992332881646, iteration: 272335
loss: 1.0589901208877563,grad_norm: 0.9999991697813977, iteration: 272336
loss: 0.9945120215415955,grad_norm: 0.9264892682045153, iteration: 272337
loss: 1.0303984880447388,grad_norm: 0.9999990882520946, iteration: 272338
loss: 1.0294800996780396,grad_norm: 0.8482928740781627, iteration: 272339
loss: 1.0161327123641968,grad_norm: 0.9300807571798495, iteration: 272340
loss: 1.0563080310821533,grad_norm: 0.999999146972745, iteration: 272341
loss: 1.006898045539856,grad_norm: 0.9999992750304364, iteration: 272342
loss: 1.1112748384475708,grad_norm: 0.9999992154761297, iteration: 272343
loss: 1.0288335084915161,grad_norm: 0.9999998808508324, iteration: 272344
loss: 1.0016868114471436,grad_norm: 0.9999990681879528, iteration: 272345
loss: 1.048611044883728,grad_norm: 0.9999991085288945, iteration: 272346
loss: 1.003420114517212,grad_norm: 0.7390491922426439, iteration: 272347
loss: 0.9697363972663879,grad_norm: 0.9637594466344627, iteration: 272348
loss: 1.0429900884628296,grad_norm: 0.9999999144381058, iteration: 272349
loss: 1.0107206106185913,grad_norm: 0.8915060568974498, iteration: 272350
loss: 1.0120140314102173,grad_norm: 0.9999991580493355, iteration: 272351
loss: 1.0447312593460083,grad_norm: 0.9999998170910913, iteration: 272352
loss: 0.9980182647705078,grad_norm: 0.8696516187516026, iteration: 272353
loss: 1.0107909440994263,grad_norm: 0.8957320324835419, iteration: 272354
loss: 1.0045816898345947,grad_norm: 0.9999992950930799, iteration: 272355
loss: 1.0062074661254883,grad_norm: 0.9552818614727637, iteration: 272356
loss: 1.0659453868865967,grad_norm: 0.9723503334111949, iteration: 272357
loss: 1.1218184232711792,grad_norm: 0.9999989326805856, iteration: 272358
loss: 0.9907111525535583,grad_norm: 0.9999990659935283, iteration: 272359
loss: 1.0967624187469482,grad_norm: 0.9999992618871981, iteration: 272360
loss: 0.971749484539032,grad_norm: 0.9999992981913186, iteration: 272361
loss: 1.008586049079895,grad_norm: 0.9029546683058641, iteration: 272362
loss: 1.0863468647003174,grad_norm: 0.9999993376475752, iteration: 272363
loss: 1.1250927448272705,grad_norm: 0.9999994171376971, iteration: 272364
loss: 1.0146101713180542,grad_norm: 0.8897325522045092, iteration: 272365
loss: 1.0189425945281982,grad_norm: 0.9073316011637144, iteration: 272366
loss: 1.0286089181900024,grad_norm: 0.9999992405776262, iteration: 272367
loss: 1.1100655794143677,grad_norm: 1.0000000040064243, iteration: 272368
loss: 1.1039764881134033,grad_norm: 0.9999999875219422, iteration: 272369
loss: 0.9691015481948853,grad_norm: 0.999999225113117, iteration: 272370
loss: 1.025244951248169,grad_norm: 0.8720425883912107, iteration: 272371
loss: 1.0913054943084717,grad_norm: 0.9999990902374345, iteration: 272372
loss: 1.1558871269226074,grad_norm: 0.9999998752417644, iteration: 272373
loss: 1.0966558456420898,grad_norm: 0.9999995207143817, iteration: 272374
loss: 1.0829452276229858,grad_norm: 0.9999993545590996, iteration: 272375
loss: 1.0477067232131958,grad_norm: 0.9999996142183828, iteration: 272376
loss: 0.997056245803833,grad_norm: 0.9999991953351549, iteration: 272377
loss: 1.0429155826568604,grad_norm: 0.9009573684507874, iteration: 272378
loss: 1.0210131406784058,grad_norm: 0.8920260506883737, iteration: 272379
loss: 0.9911459684371948,grad_norm: 0.8004145705605535, iteration: 272380
loss: 1.0339401960372925,grad_norm: 0.9999996711806349, iteration: 272381
loss: 1.0266555547714233,grad_norm: 0.9357450852608405, iteration: 272382
loss: 1.0060213804244995,grad_norm: 0.849695261772716, iteration: 272383
loss: 0.9644345641136169,grad_norm: 0.9959610340039325, iteration: 272384
loss: 1.012568473815918,grad_norm: 0.9999992517473049, iteration: 272385
loss: 1.0456594228744507,grad_norm: 0.7774205720505164, iteration: 272386
loss: 1.0036189556121826,grad_norm: 0.839733502872093, iteration: 272387
loss: 0.9881957173347473,grad_norm: 0.8392627936377001, iteration: 272388
loss: 1.0179542303085327,grad_norm: 0.8148670187592452, iteration: 272389
loss: 1.0480291843414307,grad_norm: 0.9999990571079382, iteration: 272390
loss: 1.063562035560608,grad_norm: 0.9999998242130185, iteration: 272391
loss: 1.0075498819351196,grad_norm: 0.9170875239999045, iteration: 272392
loss: 1.0127360820770264,grad_norm: 0.9104993910602488, iteration: 272393
loss: 1.0422815084457397,grad_norm: 0.9999994092637837, iteration: 272394
loss: 0.9913975596427917,grad_norm: 0.987486112836995, iteration: 272395
loss: 1.0266305208206177,grad_norm: 0.9999995932776116, iteration: 272396
loss: 1.0486657619476318,grad_norm: 0.9999997534285817, iteration: 272397
loss: 1.0020859241485596,grad_norm: 0.7863996281662534, iteration: 272398
loss: 0.9690933227539062,grad_norm: 0.7793655863611431, iteration: 272399
loss: 1.0030461549758911,grad_norm: 0.9971913570863534, iteration: 272400
loss: 1.0191508531570435,grad_norm: 0.9999996194591014, iteration: 272401
loss: 1.0275036096572876,grad_norm: 0.9766380120540258, iteration: 272402
loss: 1.0314476490020752,grad_norm: 0.9294458450886468, iteration: 272403
loss: 1.0052121877670288,grad_norm: 0.8830318378490103, iteration: 272404
loss: 0.9580712914466858,grad_norm: 0.9370173073339212, iteration: 272405
loss: 1.0742497444152832,grad_norm: 0.9999993845747323, iteration: 272406
loss: 1.0248485803604126,grad_norm: 0.7396487737694264, iteration: 272407
loss: 1.0390838384628296,grad_norm: 0.9681636114714889, iteration: 272408
loss: 0.9989585280418396,grad_norm: 0.7449612232088817, iteration: 272409
loss: 1.0316579341888428,grad_norm: 0.9011330459180479, iteration: 272410
loss: 0.9946692585945129,grad_norm: 0.993129085207795, iteration: 272411
loss: 1.0186721086502075,grad_norm: 0.9999992938751575, iteration: 272412
loss: 0.9887702465057373,grad_norm: 0.965516225467099, iteration: 272413
loss: 0.9670002460479736,grad_norm: 0.8039622073674725, iteration: 272414
loss: 1.026546835899353,grad_norm: 0.9572085286389974, iteration: 272415
loss: 1.0226551294326782,grad_norm: 0.8238377338584381, iteration: 272416
loss: 0.9793340563774109,grad_norm: 0.891880423641539, iteration: 272417
loss: 0.9565889239311218,grad_norm: 0.8673915522918456, iteration: 272418
loss: 1.0009452104568481,grad_norm: 0.8622230952180643, iteration: 272419
loss: 1.0254087448120117,grad_norm: 0.8020871737372964, iteration: 272420
loss: 0.9946110248565674,grad_norm: 0.9314224414736088, iteration: 272421
loss: 1.030465841293335,grad_norm: 0.8731030545975799, iteration: 272422
loss: 1.016775369644165,grad_norm: 0.7981008166967838, iteration: 272423
loss: 0.9736418724060059,grad_norm: 0.9043915655444417, iteration: 272424
loss: 0.9877708554267883,grad_norm: 0.8190212641805531, iteration: 272425
loss: 0.9958277344703674,grad_norm: 0.7846432473763633, iteration: 272426
loss: 1.08347487449646,grad_norm: 0.9999989281394316, iteration: 272427
loss: 0.9949465394020081,grad_norm: 0.8999338494572141, iteration: 272428
loss: 0.9988937973976135,grad_norm: 0.8381470377290937, iteration: 272429
loss: 1.014115810394287,grad_norm: 0.8077281246353422, iteration: 272430
loss: 1.0309199094772339,grad_norm: 0.8374409725874638, iteration: 272431
loss: 1.0203241109848022,grad_norm: 0.9782979844755606, iteration: 272432
loss: 1.0019527673721313,grad_norm: 0.8351809737285006, iteration: 272433
loss: 1.100191593170166,grad_norm: 0.9999995574534988, iteration: 272434
loss: 1.0174624919891357,grad_norm: 0.9999989668569976, iteration: 272435
loss: 1.0156302452087402,grad_norm: 0.9952519683374601, iteration: 272436
loss: 1.0124603509902954,grad_norm: 0.7758848698005731, iteration: 272437
loss: 1.051923155784607,grad_norm: 0.9999994409704143, iteration: 272438
loss: 1.0177857875823975,grad_norm: 0.9706992025721153, iteration: 272439
loss: 1.0205512046813965,grad_norm: 0.8507816513073004, iteration: 272440
loss: 1.0056071281433105,grad_norm: 0.7621127106785514, iteration: 272441
loss: 1.0080500841140747,grad_norm: 0.8435314595140109, iteration: 272442
loss: 0.9925257563591003,grad_norm: 0.9999991921194783, iteration: 272443
loss: 1.032795786857605,grad_norm: 0.9999991070219512, iteration: 272444
loss: 1.0606426000595093,grad_norm: 0.9999995557236686, iteration: 272445
loss: 0.9767292141914368,grad_norm: 0.7707546424815123, iteration: 272446
loss: 1.0385704040527344,grad_norm: 0.9999993309699086, iteration: 272447
loss: 1.034123420715332,grad_norm: 0.9999993805309076, iteration: 272448
loss: 0.9896791577339172,grad_norm: 0.8798494983664691, iteration: 272449
loss: 1.0153275728225708,grad_norm: 0.835975436545005, iteration: 272450
loss: 1.011900782585144,grad_norm: 0.8351250617556244, iteration: 272451
loss: 0.9760488867759705,grad_norm: 0.9999991392327312, iteration: 272452
loss: 1.000475525856018,grad_norm: 0.9372492242787104, iteration: 272453
loss: 1.025337815284729,grad_norm: 0.837776877481633, iteration: 272454
loss: 0.9847387671470642,grad_norm: 0.9382890977143932, iteration: 272455
loss: 0.9768030643463135,grad_norm: 0.7593126876766393, iteration: 272456
loss: 0.9886462092399597,grad_norm: 0.9541176271043214, iteration: 272457
loss: 1.083156704902649,grad_norm: 0.9999999033643264, iteration: 272458
loss: 0.9711766242980957,grad_norm: 0.8766679037919018, iteration: 272459
loss: 0.9979540705680847,grad_norm: 0.8556078405766515, iteration: 272460
loss: 0.9872722029685974,grad_norm: 0.8341706411339199, iteration: 272461
loss: 1.0716028213500977,grad_norm: 0.6963351883353337, iteration: 272462
loss: 1.0085594654083252,grad_norm: 0.9999996306796047, iteration: 272463
loss: 1.0143879652023315,grad_norm: 0.9241521980230786, iteration: 272464
loss: 1.0229392051696777,grad_norm: 0.8666225633879345, iteration: 272465
loss: 1.01607346534729,grad_norm: 0.9999994621088425, iteration: 272466
loss: 0.998863160610199,grad_norm: 0.9118997510509909, iteration: 272467
loss: 0.9691008925437927,grad_norm: 0.9360202909764649, iteration: 272468
loss: 1.0211933851242065,grad_norm: 0.9656160533475411, iteration: 272469
loss: 1.0366194248199463,grad_norm: 0.9999988619440248, iteration: 272470
loss: 1.0267530679702759,grad_norm: 0.8930160913740509, iteration: 272471
loss: 1.0010323524475098,grad_norm: 0.9545256109773098, iteration: 272472
loss: 0.9947754740715027,grad_norm: 0.9400441764089945, iteration: 272473
loss: 1.0140018463134766,grad_norm: 0.9008292752510332, iteration: 272474
loss: 0.9954735636711121,grad_norm: 0.8450038720861812, iteration: 272475
loss: 1.0128670930862427,grad_norm: 0.9999990378462144, iteration: 272476
loss: 0.9637272953987122,grad_norm: 0.8087934893604546, iteration: 272477
loss: 0.9835296869277954,grad_norm: 0.999999070087525, iteration: 272478
loss: 1.0381489992141724,grad_norm: 0.9124600313257826, iteration: 272479
loss: 1.0037198066711426,grad_norm: 0.8722837723270674, iteration: 272480
loss: 0.9921859502792358,grad_norm: 0.8028040152202917, iteration: 272481
loss: 0.9532716274261475,grad_norm: 0.9771362050700937, iteration: 272482
loss: 0.9953376650810242,grad_norm: 0.7669679437885903, iteration: 272483
loss: 0.9823076725006104,grad_norm: 0.9969942303720302, iteration: 272484
loss: 1.010302186012268,grad_norm: 0.94492488417499, iteration: 272485
loss: 1.0290608406066895,grad_norm: 0.939899932960894, iteration: 272486
loss: 1.0707749128341675,grad_norm: 0.9999997973497589, iteration: 272487
loss: 1.035495400428772,grad_norm: 0.9024661302537627, iteration: 272488
loss: 0.9885812401771545,grad_norm: 0.8891547866449517, iteration: 272489
loss: 1.0219703912734985,grad_norm: 0.999999967914231, iteration: 272490
loss: 1.0228486061096191,grad_norm: 0.9534177610645912, iteration: 272491
loss: 1.071582555770874,grad_norm: 0.8165997406680872, iteration: 272492
loss: 1.00007164478302,grad_norm: 0.9999991398617467, iteration: 272493
loss: 1.0139540433883667,grad_norm: 0.9999990961869112, iteration: 272494
loss: 0.9659047722816467,grad_norm: 0.8180773785247917, iteration: 272495
loss: 0.9783016443252563,grad_norm: 0.9999991390825438, iteration: 272496
loss: 0.995079517364502,grad_norm: 0.9032219338047758, iteration: 272497
loss: 1.004839301109314,grad_norm: 0.9999991088534559, iteration: 272498
loss: 1.0147240161895752,grad_norm: 0.7683299557333813, iteration: 272499
loss: 1.1564826965332031,grad_norm: 0.9999991767572772, iteration: 272500
loss: 1.0184686183929443,grad_norm: 0.7672728813442377, iteration: 272501
loss: 1.0131876468658447,grad_norm: 0.9999996079580636, iteration: 272502
loss: 1.022248387336731,grad_norm: 0.777341253059732, iteration: 272503
loss: 0.9888396859169006,grad_norm: 0.9985623283556486, iteration: 272504
loss: 1.0293374061584473,grad_norm: 0.9999991229181429, iteration: 272505
loss: 0.9918342232704163,grad_norm: 0.8746598476496409, iteration: 272506
loss: 1.0233190059661865,grad_norm: 0.7854632858820482, iteration: 272507
loss: 1.0140964984893799,grad_norm: 0.7838837641218263, iteration: 272508
loss: 1.0014128684997559,grad_norm: 0.8246894773436236, iteration: 272509
loss: 0.9749785661697388,grad_norm: 0.9999989988257698, iteration: 272510
loss: 0.985950231552124,grad_norm: 0.9999990248182603, iteration: 272511
loss: 1.0798895359039307,grad_norm: 0.9999993193928223, iteration: 272512
loss: 0.979581892490387,grad_norm: 0.9630030594877651, iteration: 272513
loss: 1.005535364151001,grad_norm: 0.9987231517082371, iteration: 272514
loss: 1.190092921257019,grad_norm: 0.9999996342033876, iteration: 272515
loss: 1.0065938234329224,grad_norm: 0.912650785301047, iteration: 272516
loss: 0.9812492728233337,grad_norm: 0.9331065040966222, iteration: 272517
loss: 1.0578410625457764,grad_norm: 0.999999393560602, iteration: 272518
loss: 1.061667561531067,grad_norm: 0.9999991930266215, iteration: 272519
loss: 0.987992525100708,grad_norm: 0.9445745569568014, iteration: 272520
loss: 0.9929631948471069,grad_norm: 0.8910679187136052, iteration: 272521
loss: 0.9978014230728149,grad_norm: 0.7705507474292218, iteration: 272522
loss: 1.0364142656326294,grad_norm: 0.8682304790841457, iteration: 272523
loss: 1.057520866394043,grad_norm: 0.9999997990952436, iteration: 272524
loss: 0.948723554611206,grad_norm: 0.8576393875899553, iteration: 272525
loss: 0.999388575553894,grad_norm: 0.9030105100780882, iteration: 272526
loss: 0.9879850149154663,grad_norm: 0.9999996435593169, iteration: 272527
loss: 1.0052103996276855,grad_norm: 0.9999991919053516, iteration: 272528
loss: 0.9560198783874512,grad_norm: 0.8914268816333214, iteration: 272529
loss: 1.0018831491470337,grad_norm: 0.9713384716756669, iteration: 272530
loss: 1.0106741189956665,grad_norm: 0.8997108291882124, iteration: 272531
loss: 0.9619819521903992,grad_norm: 0.9999991387285849, iteration: 272532
loss: 0.9965175986289978,grad_norm: 0.895718735745218, iteration: 272533
loss: 1.0230618715286255,grad_norm: 0.9999990654015575, iteration: 272534
loss: 1.0721120834350586,grad_norm: 0.8907990614411433, iteration: 272535
loss: 1.01551353931427,grad_norm: 0.8625535423928611, iteration: 272536
loss: 1.0107191801071167,grad_norm: 0.9633415574206664, iteration: 272537
loss: 1.0709471702575684,grad_norm: 0.7777078581082414, iteration: 272538
loss: 1.0098843574523926,grad_norm: 0.8984343025090671, iteration: 272539
loss: 0.9976889491081238,grad_norm: 0.9968450832480126, iteration: 272540
loss: 0.9753066301345825,grad_norm: 0.7573449344425863, iteration: 272541
loss: 1.003129482269287,grad_norm: 0.9999992171696126, iteration: 272542
loss: 1.0280205011367798,grad_norm: 0.9999996588267522, iteration: 272543
loss: 1.0534664392471313,grad_norm: 0.95902943752043, iteration: 272544
loss: 1.066080927848816,grad_norm: 0.9727242191479892, iteration: 272545
loss: 1.0038987398147583,grad_norm: 0.7622311821547819, iteration: 272546
loss: 1.0409893989562988,grad_norm: 0.8883177336932431, iteration: 272547
loss: 1.1144161224365234,grad_norm: 0.9999994747104038, iteration: 272548
loss: 1.0036118030548096,grad_norm: 0.9999991005794862, iteration: 272549
loss: 1.1269785165786743,grad_norm: 0.9999996489335148, iteration: 272550
loss: 1.0223063230514526,grad_norm: 0.774484503723439, iteration: 272551
loss: 1.0334805250167847,grad_norm: 0.8710216209837066, iteration: 272552
loss: 1.0173708200454712,grad_norm: 0.9398444564928408, iteration: 272553
loss: 0.9972440600395203,grad_norm: 0.9526793353968589, iteration: 272554
loss: 1.0011096000671387,grad_norm: 0.9017347944799943, iteration: 272555
loss: 1.1709380149841309,grad_norm: 0.9999997246907602, iteration: 272556
loss: 1.0118566751480103,grad_norm: 0.7991613214188626, iteration: 272557
loss: 0.9952476024627686,grad_norm: 0.9999990245524226, iteration: 272558
loss: 0.997894287109375,grad_norm: 0.8070445111776207, iteration: 272559
loss: 1.0741556882858276,grad_norm: 0.9999998894321108, iteration: 272560
loss: 1.005538821220398,grad_norm: 0.9251697077300686, iteration: 272561
loss: 1.082629919052124,grad_norm: 0.9999991348492249, iteration: 272562
loss: 0.9700279235839844,grad_norm: 0.9461655578169942, iteration: 272563
loss: 1.0284711122512817,grad_norm: 0.9999990728375294, iteration: 272564
loss: 1.0271530151367188,grad_norm: 0.979876400798052, iteration: 272565
loss: 0.975627064704895,grad_norm: 0.8652607452168064, iteration: 272566
loss: 0.9910025000572205,grad_norm: 0.6615463753049406, iteration: 272567
loss: 0.9994596838951111,grad_norm: 0.8656438727394595, iteration: 272568
loss: 0.9879676103591919,grad_norm: 0.9999992529809528, iteration: 272569
loss: 1.0423012971878052,grad_norm: 0.9203094488995776, iteration: 272570
loss: 0.9842994809150696,grad_norm: 0.6930381297250511, iteration: 272571
loss: 0.9611022472381592,grad_norm: 0.8205819582551287, iteration: 272572
loss: 1.0357869863510132,grad_norm: 0.999999857999582, iteration: 272573
loss: 0.9562802314758301,grad_norm: 0.840979514426764, iteration: 272574
loss: 1.0804791450500488,grad_norm: 0.8415527213197259, iteration: 272575
loss: 1.0015467405319214,grad_norm: 0.8367776925108795, iteration: 272576
loss: 0.989750862121582,grad_norm: 0.9648055834462426, iteration: 272577
loss: 0.984627902507782,grad_norm: 0.9639876858429813, iteration: 272578
loss: 0.9919933080673218,grad_norm: 0.9158023448178967, iteration: 272579
loss: 1.0306638479232788,grad_norm: 0.9999991843946693, iteration: 272580
loss: 0.9806427359580994,grad_norm: 0.8266360041913985, iteration: 272581
loss: 0.9764938354492188,grad_norm: 0.9999990485691148, iteration: 272582
loss: 1.000296711921692,grad_norm: 0.9040381203510447, iteration: 272583
loss: 1.0287542343139648,grad_norm: 0.8831580227798493, iteration: 272584
loss: 0.9920415878295898,grad_norm: 0.8524966832042871, iteration: 272585
loss: 1.0617989301681519,grad_norm: 0.9999997857659086, iteration: 272586
loss: 1.0620946884155273,grad_norm: 1.0000000081543665, iteration: 272587
loss: 0.9670546054840088,grad_norm: 0.9999990869632462, iteration: 272588
loss: 1.0433191061019897,grad_norm: 0.7142196747120376, iteration: 272589
loss: 1.0063185691833496,grad_norm: 0.7338390360477727, iteration: 272590
loss: 0.9775455594062805,grad_norm: 0.8439776149103921, iteration: 272591
loss: 1.0108563899993896,grad_norm: 0.9999991960592893, iteration: 272592
loss: 1.0338784456253052,grad_norm: 0.8700596881591987, iteration: 272593
loss: 1.0195637941360474,grad_norm: 0.9211125061431091, iteration: 272594
loss: 1.0179924964904785,grad_norm: 0.9999990574164603, iteration: 272595
loss: 1.0168424844741821,grad_norm: 0.7857491290344935, iteration: 272596
loss: 1.045918345451355,grad_norm: 0.9903435264955317, iteration: 272597
loss: 0.9723910093307495,grad_norm: 0.815201829942399, iteration: 272598
loss: 1.0209921598434448,grad_norm: 0.6947816771898889, iteration: 272599
loss: 0.9962581396102905,grad_norm: 0.8053488922564569, iteration: 272600
loss: 1.0165451765060425,grad_norm: 0.9562559038040653, iteration: 272601
loss: 1.0306285619735718,grad_norm: 0.999999173059901, iteration: 272602
loss: 0.9886234402656555,grad_norm: 0.9352751545804034, iteration: 272603
loss: 1.0431461334228516,grad_norm: 0.7859513770330234, iteration: 272604
loss: 1.0316168069839478,grad_norm: 0.9303896059307392, iteration: 272605
loss: 1.00835382938385,grad_norm: 0.9749582327626409, iteration: 272606
loss: 1.0320138931274414,grad_norm: 0.8617255474728162, iteration: 272607
loss: 1.0515639781951904,grad_norm: 0.9999998024508119, iteration: 272608
loss: 1.0160292387008667,grad_norm: 0.9122567171622594, iteration: 272609
loss: 1.0098758935928345,grad_norm: 0.8337640511668787, iteration: 272610
loss: 1.0059651136398315,grad_norm: 0.8995835253401578, iteration: 272611
loss: 1.0276824235916138,grad_norm: 0.8334286049771454, iteration: 272612
loss: 1.03495192527771,grad_norm: 0.8661518390078009, iteration: 272613
loss: 0.9909361004829407,grad_norm: 0.9162633992624051, iteration: 272614
loss: 0.9910190105438232,grad_norm: 0.9460678909542616, iteration: 272615
loss: 1.0106561183929443,grad_norm: 0.9567658164367451, iteration: 272616
loss: 0.9891403317451477,grad_norm: 0.9999990651310633, iteration: 272617
loss: 1.0072124004364014,grad_norm: 0.9999991820608753, iteration: 272618
loss: 0.9924958348274231,grad_norm: 0.7908120112034283, iteration: 272619
loss: 1.0136966705322266,grad_norm: 0.8016751850816491, iteration: 272620
loss: 1.0193959474563599,grad_norm: 0.9999989383318377, iteration: 272621
loss: 1.0115935802459717,grad_norm: 0.9263178542850568, iteration: 272622
loss: 1.028480887413025,grad_norm: 0.7293981644439724, iteration: 272623
loss: 1.026808500289917,grad_norm: 0.9970230987656825, iteration: 272624
loss: 1.0313175916671753,grad_norm: 0.999999963714948, iteration: 272625
loss: 1.021278977394104,grad_norm: 0.9999992455260529, iteration: 272626
loss: 1.037851333618164,grad_norm: 0.8927592272417814, iteration: 272627
loss: 0.9917712211608887,grad_norm: 0.9256408515778277, iteration: 272628
loss: 1.0098628997802734,grad_norm: 0.9093412956813296, iteration: 272629
loss: 1.013831615447998,grad_norm: 0.9090282564179937, iteration: 272630
loss: 0.9971773624420166,grad_norm: 0.8481235920852033, iteration: 272631
loss: 0.9664166569709778,grad_norm: 0.8465338707841741, iteration: 272632
loss: 1.0242879390716553,grad_norm: 0.9999998983969158, iteration: 272633
loss: 0.9825574159622192,grad_norm: 0.7326379593196433, iteration: 272634
loss: 0.9577852487564087,grad_norm: 0.9999991829505493, iteration: 272635
loss: 1.0099754333496094,grad_norm: 0.8553376945298211, iteration: 272636
loss: 1.0322059392929077,grad_norm: 0.9999990297154726, iteration: 272637
loss: 0.9797643423080444,grad_norm: 0.9999992065780848, iteration: 272638
loss: 1.0182396173477173,grad_norm: 0.9179147151806287, iteration: 272639
loss: 0.9949898719787598,grad_norm: 0.8802796479334966, iteration: 272640
loss: 1.0228914022445679,grad_norm: 0.8947127417419292, iteration: 272641
loss: 0.9589014053344727,grad_norm: 0.8599649067726473, iteration: 272642
loss: 0.9979367256164551,grad_norm: 0.8835035494272904, iteration: 272643
loss: 0.9969598054885864,grad_norm: 0.9329621021679281, iteration: 272644
loss: 0.9786185622215271,grad_norm: 0.988844018505354, iteration: 272645
loss: 0.9979236721992493,grad_norm: 0.9999992041955318, iteration: 272646
loss: 0.9913116693496704,grad_norm: 0.9811723162617803, iteration: 272647
loss: 1.0421037673950195,grad_norm: 0.9214856732609125, iteration: 272648
loss: 1.0127919912338257,grad_norm: 0.8961618640999006, iteration: 272649
loss: 1.0559273958206177,grad_norm: 0.9999994948398748, iteration: 272650
loss: 1.0181580781936646,grad_norm: 0.9868260514113126, iteration: 272651
loss: 0.9833090901374817,grad_norm: 0.6966470369181477, iteration: 272652
loss: 1.0282018184661865,grad_norm: 0.8278114721598818, iteration: 272653
loss: 1.1126161813735962,grad_norm: 0.9999993505845518, iteration: 272654
loss: 0.9953349232673645,grad_norm: 0.9999990254222471, iteration: 272655
loss: 1.015773892402649,grad_norm: 0.8822888336909965, iteration: 272656
loss: 0.9980098009109497,grad_norm: 0.7516425220410359, iteration: 272657
loss: 1.024271011352539,grad_norm: 0.9999992913517107, iteration: 272658
loss: 0.962375819683075,grad_norm: 0.9558719710373319, iteration: 272659
loss: 0.9909968376159668,grad_norm: 0.7993381030506058, iteration: 272660
loss: 0.9770048260688782,grad_norm: 0.9426538099826034, iteration: 272661
loss: 1.0233851671218872,grad_norm: 0.8264267036454956, iteration: 272662
loss: 1.0118887424468994,grad_norm: 0.9261624596322371, iteration: 272663
loss: 0.9921695590019226,grad_norm: 0.8254385140427342, iteration: 272664
loss: 0.9808977842330933,grad_norm: 0.7753102210609771, iteration: 272665
loss: 0.9838542938232422,grad_norm: 0.8767527234572415, iteration: 272666
loss: 1.0090234279632568,grad_norm: 0.951470305083996, iteration: 272667
loss: 0.9711238741874695,grad_norm: 0.834001570149161, iteration: 272668
loss: 0.9888531565666199,grad_norm: 0.9999995536141203, iteration: 272669
loss: 0.9836071133613586,grad_norm: 0.9473254264909965, iteration: 272670
loss: 0.9906407594680786,grad_norm: 0.811026475385207, iteration: 272671
loss: 1.017966866493225,grad_norm: 0.8203481357333187, iteration: 272672
loss: 1.039969563484192,grad_norm: 0.7681494414130039, iteration: 272673
loss: 0.9841119647026062,grad_norm: 0.9255814834185889, iteration: 272674
loss: 0.9871683716773987,grad_norm: 0.9287764958409926, iteration: 272675
loss: 0.9978537559509277,grad_norm: 0.9999993603185366, iteration: 272676
loss: 1.0034629106521606,grad_norm: 0.9123474300321008, iteration: 272677
loss: 0.9919494390487671,grad_norm: 0.7965672104019437, iteration: 272678
loss: 0.979534924030304,grad_norm: 0.7830879755955109, iteration: 272679
loss: 1.0341068506240845,grad_norm: 0.9999998586752068, iteration: 272680
loss: 1.0203235149383545,grad_norm: 0.8454020161296418, iteration: 272681
loss: 1.0243364572525024,grad_norm: 0.7695081646540307, iteration: 272682
loss: 0.9600213170051575,grad_norm: 0.9999990859413177, iteration: 272683
loss: 0.976993978023529,grad_norm: 0.8994074449952163, iteration: 272684
loss: 1.057799220085144,grad_norm: 0.8882617995997132, iteration: 272685
loss: 1.0249067544937134,grad_norm: 0.9265647125716907, iteration: 272686
loss: 1.065565586090088,grad_norm: 0.8759804593925885, iteration: 272687
loss: 1.004886507987976,grad_norm: 0.9265170221442569, iteration: 272688
loss: 1.0189626216888428,grad_norm: 0.863238671232655, iteration: 272689
loss: 0.9866529107093811,grad_norm: 0.9377554364276768, iteration: 272690
loss: 1.0105116367340088,grad_norm: 0.9999992139507211, iteration: 272691
loss: 0.9747703075408936,grad_norm: 0.9767648162520661, iteration: 272692
loss: 0.9607030749320984,grad_norm: 0.9999989931494191, iteration: 272693
loss: 1.011406421661377,grad_norm: 0.7744987560860627, iteration: 272694
loss: 1.0192325115203857,grad_norm: 0.9999992344167763, iteration: 272695
loss: 1.003781795501709,grad_norm: 0.862298212779024, iteration: 272696
loss: 0.9846093654632568,grad_norm: 0.8728733461304016, iteration: 272697
loss: 0.9962940812110901,grad_norm: 0.9999991001079082, iteration: 272698
loss: 0.9763191342353821,grad_norm: 0.852088378135207, iteration: 272699
loss: 1.0193336009979248,grad_norm: 0.9775113900208925, iteration: 272700
loss: 1.0368574857711792,grad_norm: 0.9999995942445201, iteration: 272701
loss: 0.9921008348464966,grad_norm: 0.734929209000258, iteration: 272702
loss: 0.9955026507377625,grad_norm: 0.9999992452782853, iteration: 272703
loss: 1.0313308238983154,grad_norm: 0.8430432802043957, iteration: 272704
loss: 0.9815346002578735,grad_norm: 0.8521407867630957, iteration: 272705
loss: 1.0178179740905762,grad_norm: 0.8810320660182691, iteration: 272706
loss: 0.9854679107666016,grad_norm: 0.9742572477540474, iteration: 272707
loss: 1.014765739440918,grad_norm: 0.9999992659173583, iteration: 272708
loss: 0.9852191209793091,grad_norm: 0.999999246015522, iteration: 272709
loss: 1.0421980619430542,grad_norm: 0.9999997996807459, iteration: 272710
loss: 1.0048741102218628,grad_norm: 0.9674475307255375, iteration: 272711
loss: 0.9978030323982239,grad_norm: 0.9003723716605313, iteration: 272712
loss: 1.103098750114441,grad_norm: 0.9999998537341944, iteration: 272713
loss: 0.9967068433761597,grad_norm: 0.7966869654494622, iteration: 272714
loss: 1.0038374662399292,grad_norm: 0.8304979477638563, iteration: 272715
loss: 1.024837851524353,grad_norm: 0.832668706828897, iteration: 272716
loss: 1.0300685167312622,grad_norm: 0.8339943864734172, iteration: 272717
loss: 0.9722437858581543,grad_norm: 0.9999991173429653, iteration: 272718
loss: 1.0786556005477905,grad_norm: 0.9999993395193983, iteration: 272719
loss: 1.0358062982559204,grad_norm: 0.9999997027160337, iteration: 272720
loss: 0.9722870588302612,grad_norm: 0.9402561693195666, iteration: 272721
loss: 1.0043269395828247,grad_norm: 0.8272352381554466, iteration: 272722
loss: 0.9973210096359253,grad_norm: 0.9999989739661259, iteration: 272723
loss: 0.9670355319976807,grad_norm: 0.8046866909097881, iteration: 272724
loss: 1.0465483665466309,grad_norm: 0.9999996032474142, iteration: 272725
loss: 0.9945676922798157,grad_norm: 0.9999991701969115, iteration: 272726
loss: 0.9950400590896606,grad_norm: 0.9166473768382187, iteration: 272727
loss: 1.0273476839065552,grad_norm: 0.9999991976083942, iteration: 272728
loss: 1.0022388696670532,grad_norm: 0.9999990893141254, iteration: 272729
loss: 0.9716650247573853,grad_norm: 0.9323475932362263, iteration: 272730
loss: 0.9743818044662476,grad_norm: 0.8738875004364687, iteration: 272731
loss: 1.0279659032821655,grad_norm: 0.9999990452008172, iteration: 272732
loss: 1.015756368637085,grad_norm: 0.8336914191190595, iteration: 272733
loss: 0.9956961870193481,grad_norm: 0.8156158946726454, iteration: 272734
loss: 1.0523329973220825,grad_norm: 0.9999997155442262, iteration: 272735
loss: 0.9661654829978943,grad_norm: 0.9596823801957095, iteration: 272736
loss: 1.0031098127365112,grad_norm: 0.6794268815680438, iteration: 272737
loss: 0.9893831014633179,grad_norm: 0.8647984566839523, iteration: 272738
loss: 1.0233184099197388,grad_norm: 0.725005920209038, iteration: 272739
loss: 0.9793040156364441,grad_norm: 0.8075755126544554, iteration: 272740
loss: 1.0058902502059937,grad_norm: 0.8922155031266665, iteration: 272741
loss: 1.0269216299057007,grad_norm: 0.7424526325813847, iteration: 272742
loss: 0.9944815635681152,grad_norm: 0.7958469493002186, iteration: 272743
loss: 1.0014978647232056,grad_norm: 0.9076166814097465, iteration: 272744
loss: 0.9985519051551819,grad_norm: 0.9999991400654688, iteration: 272745
loss: 0.9964799284934998,grad_norm: 0.731400220696964, iteration: 272746
loss: 0.9825302958488464,grad_norm: 0.9683599112547457, iteration: 272747
loss: 1.016958475112915,grad_norm: 0.9999995852975585, iteration: 272748
loss: 1.0031777620315552,grad_norm: 0.9999995634940425, iteration: 272749
loss: 0.9903663396835327,grad_norm: 0.8265823814116088, iteration: 272750
loss: 1.0232564210891724,grad_norm: 0.8447282011522936, iteration: 272751
loss: 0.9944409728050232,grad_norm: 0.8668111125763719, iteration: 272752
loss: 0.9704509377479553,grad_norm: 0.7869639396910183, iteration: 272753
loss: 1.006468653678894,grad_norm: 0.9200226937313937, iteration: 272754
loss: 1.0310916900634766,grad_norm: 0.9296524313626446, iteration: 272755
loss: 1.0085607767105103,grad_norm: 0.9999992440791661, iteration: 272756
loss: 1.0304750204086304,grad_norm: 0.8255097554958921, iteration: 272757
loss: 1.025050163269043,grad_norm: 0.8828031057216271, iteration: 272758
loss: 0.9593914747238159,grad_norm: 0.7551678088194642, iteration: 272759
loss: 1.0224131345748901,grad_norm: 0.777235590256863, iteration: 272760
loss: 1.0509408712387085,grad_norm: 0.9452316600821478, iteration: 272761
loss: 0.991381049156189,grad_norm: 0.7830329804468324, iteration: 272762
loss: 0.9609881639480591,grad_norm: 0.9612132826949452, iteration: 272763
loss: 0.9933680891990662,grad_norm: 0.8314048200260673, iteration: 272764
loss: 0.9841236472129822,grad_norm: 0.7328875921378897, iteration: 272765
loss: 0.9583019018173218,grad_norm: 0.7632173620853563, iteration: 272766
loss: 0.9813858270645142,grad_norm: 0.8454152150804464, iteration: 272767
loss: 0.9732140898704529,grad_norm: 0.8126043721186639, iteration: 272768
loss: 0.93510502576828,grad_norm: 0.7717875980620289, iteration: 272769
loss: 1.0201334953308105,grad_norm: 0.9999991753520152, iteration: 272770
loss: 0.9980034828186035,grad_norm: 0.8102536113535004, iteration: 272771
loss: 0.9889665246009827,grad_norm: 0.86351646300352, iteration: 272772
loss: 1.02973473072052,grad_norm: 0.8306118045888936, iteration: 272773
loss: 0.9801886081695557,grad_norm: 0.7946452669732048, iteration: 272774
loss: 1.0058848857879639,grad_norm: 0.94429489952228, iteration: 272775
loss: 0.984871506690979,grad_norm: 0.8742615010058568, iteration: 272776
loss: 1.0381313562393188,grad_norm: 0.8911220714037289, iteration: 272777
loss: 0.9947425723075867,grad_norm: 0.999999059492088, iteration: 272778
loss: 1.0069235563278198,grad_norm: 0.9403913990302049, iteration: 272779
loss: 0.9575819969177246,grad_norm: 0.8445689275527296, iteration: 272780
loss: 0.9870743751525879,grad_norm: 0.999999533231748, iteration: 272781
loss: 0.9905369281768799,grad_norm: 0.7770960156418569, iteration: 272782
loss: 1.00803804397583,grad_norm: 0.8980698274723606, iteration: 272783
loss: 0.9633111953735352,grad_norm: 0.8080804840774182, iteration: 272784
loss: 0.9893929362297058,grad_norm: 0.772534327052702, iteration: 272785
loss: 1.0951554775238037,grad_norm: 0.9914189288492975, iteration: 272786
loss: 1.0000308752059937,grad_norm: 0.8597180231642645, iteration: 272787
loss: 0.9915622472763062,grad_norm: 0.8117755222813172, iteration: 272788
loss: 0.9573895931243896,grad_norm: 0.7609353855048014, iteration: 272789
loss: 1.004096508026123,grad_norm: 0.999998905488557, iteration: 272790
loss: 0.9826003313064575,grad_norm: 0.7984454900220563, iteration: 272791
loss: 0.9997107982635498,grad_norm: 0.9999991282987706, iteration: 272792
loss: 1.0024157762527466,grad_norm: 0.9507984311188292, iteration: 272793
loss: 1.1185065507888794,grad_norm: 0.9537536739583907, iteration: 272794
loss: 1.0030534267425537,grad_norm: 0.7836139066239867, iteration: 272795
loss: 0.9600141644477844,grad_norm: 0.9999990755501481, iteration: 272796
loss: 1.0006070137023926,grad_norm: 0.9444804430678758, iteration: 272797
loss: 0.9933477640151978,grad_norm: 0.7922276237354571, iteration: 272798
loss: 0.9889455437660217,grad_norm: 0.9197262027206512, iteration: 272799
loss: 0.9942038059234619,grad_norm: 0.8532904327564023, iteration: 272800
loss: 1.005063772201538,grad_norm: 0.816125018388936, iteration: 272801
loss: 0.9840935468673706,grad_norm: 0.9999993046084563, iteration: 272802
loss: 1.0210870504379272,grad_norm: 0.8226300385079429, iteration: 272803
loss: 1.009562611579895,grad_norm: 0.9395020114822307, iteration: 272804
loss: 0.9911175966262817,grad_norm: 0.8433399913645419, iteration: 272805
loss: 1.0115458965301514,grad_norm: 0.9514801509606666, iteration: 272806
loss: 1.0031192302703857,grad_norm: 0.797042567428315, iteration: 272807
loss: 0.9938197135925293,grad_norm: 0.818340010219246, iteration: 272808
loss: 1.0764613151550293,grad_norm: 0.8509382166112347, iteration: 272809
loss: 1.0066765546798706,grad_norm: 0.8726510952353468, iteration: 272810
loss: 1.0172574520111084,grad_norm: 0.9093762282686589, iteration: 272811
loss: 1.050260305404663,grad_norm: 0.979034489601693, iteration: 272812
loss: 0.9865353107452393,grad_norm: 0.8148061673463985, iteration: 272813
loss: 1.0057284832000732,grad_norm: 0.9515125163710798, iteration: 272814
loss: 1.0104154348373413,grad_norm: 0.7335080034262583, iteration: 272815
loss: 1.0064849853515625,grad_norm: 0.8552767916306766, iteration: 272816
loss: 1.0238066911697388,grad_norm: 0.851941113111183, iteration: 272817
loss: 0.9930334687232971,grad_norm: 0.7799152692736049, iteration: 272818
loss: 1.0089151859283447,grad_norm: 0.7648651822292613, iteration: 272819
loss: 0.9818302989006042,grad_norm: 0.8280397121862804, iteration: 272820
loss: 1.0097649097442627,grad_norm: 0.9999991629278797, iteration: 272821
loss: 1.0600844621658325,grad_norm: 0.7791240765949643, iteration: 272822
loss: 1.0187283754348755,grad_norm: 0.9226011996826955, iteration: 272823
loss: 0.9352079629898071,grad_norm: 0.8580293911593916, iteration: 272824
loss: 0.997991144657135,grad_norm: 0.7300420125580813, iteration: 272825
loss: 1.0143815279006958,grad_norm: 0.9999990511412397, iteration: 272826
loss: 1.0271031856536865,grad_norm: 0.8388166598520712, iteration: 272827
loss: 1.034450888633728,grad_norm: 0.9817862016329286, iteration: 272828
loss: 1.0099818706512451,grad_norm: 0.838250004825636, iteration: 272829
loss: 1.0558568239212036,grad_norm: 0.8332439672425055, iteration: 272830
loss: 1.0329582691192627,grad_norm: 0.7467543663305304, iteration: 272831
loss: 1.0177358388900757,grad_norm: 0.9154904930619636, iteration: 272832
loss: 0.9763119220733643,grad_norm: 0.9999992105278642, iteration: 272833
loss: 1.0148735046386719,grad_norm: 0.7540505959262804, iteration: 272834
loss: 1.002633810043335,grad_norm: 0.8924885721466681, iteration: 272835
loss: 0.9890508055686951,grad_norm: 0.7523877528143025, iteration: 272836
loss: 1.0042147636413574,grad_norm: 0.7725000337523841, iteration: 272837
loss: 0.9629979729652405,grad_norm: 0.8145560149170141, iteration: 272838
loss: 0.9708296060562134,grad_norm: 0.8554707615620372, iteration: 272839
loss: 1.0065913200378418,grad_norm: 0.9999990359213049, iteration: 272840
loss: 0.9859363436698914,grad_norm: 0.9488861106567386, iteration: 272841
loss: 1.0065922737121582,grad_norm: 0.8424675398229521, iteration: 272842
loss: 1.0079689025878906,grad_norm: 0.8034050894112719, iteration: 272843
loss: 0.955227255821228,grad_norm: 0.7324096765452874, iteration: 272844
loss: 0.9873625040054321,grad_norm: 0.9948343313499366, iteration: 272845
loss: 1.0152422189712524,grad_norm: 0.9999996871253539, iteration: 272846
loss: 0.989728569984436,grad_norm: 0.8700600226073948, iteration: 272847
loss: 1.012851595878601,grad_norm: 0.8721875608925812, iteration: 272848
loss: 1.0526388883590698,grad_norm: 0.8779032931648689, iteration: 272849
loss: 0.9811651110649109,grad_norm: 0.8160367580159085, iteration: 272850
loss: 0.9960901141166687,grad_norm: 0.7686803899508518, iteration: 272851
loss: 1.0201349258422852,grad_norm: 0.9999992402596256, iteration: 272852
loss: 0.9990236759185791,grad_norm: 0.9999989917118686, iteration: 272853
loss: 1.043125033378601,grad_norm: 0.7618684766949733, iteration: 272854
loss: 1.0033053159713745,grad_norm: 0.8674063275324841, iteration: 272855
loss: 0.9918828010559082,grad_norm: 0.9609442631071188, iteration: 272856
loss: 0.9907189607620239,grad_norm: 0.9999993406750833, iteration: 272857
loss: 0.9918108582496643,grad_norm: 0.8469691445646051, iteration: 272858
loss: 0.9666393399238586,grad_norm: 0.9999990750986701, iteration: 272859
loss: 1.0180243253707886,grad_norm: 0.7036892097045427, iteration: 272860
loss: 1.0200610160827637,grad_norm: 0.9999992191541136, iteration: 272861
loss: 0.9899202585220337,grad_norm: 0.7613832581103838, iteration: 272862
loss: 1.0305002927780151,grad_norm: 0.867616238208384, iteration: 272863
loss: 0.991690993309021,grad_norm: 0.7692468420180953, iteration: 272864
loss: 1.0030701160430908,grad_norm: 0.9301743812932589, iteration: 272865
loss: 0.9817935824394226,grad_norm: 0.8353467792855356, iteration: 272866
loss: 1.0055850744247437,grad_norm: 0.8412138783758413, iteration: 272867
loss: 1.0628421306610107,grad_norm: 0.8454896328923218, iteration: 272868
loss: 1.0159152746200562,grad_norm: 0.9999996159650557, iteration: 272869
loss: 1.0063529014587402,grad_norm: 0.8521467909177686, iteration: 272870
loss: 0.9976288080215454,grad_norm: 0.8921731143783046, iteration: 272871
loss: 0.9970864057540894,grad_norm: 0.8745230687163965, iteration: 272872
loss: 1.004597544670105,grad_norm: 0.8985292912946735, iteration: 272873
loss: 1.0137890577316284,grad_norm: 0.9433312023768172, iteration: 272874
loss: 0.9996488690376282,grad_norm: 0.7676297905304373, iteration: 272875
loss: 0.9512872099876404,grad_norm: 0.8838264449686157, iteration: 272876
loss: 0.9793190360069275,grad_norm: 0.8012854433780807, iteration: 272877
loss: 1.0448920726776123,grad_norm: 0.7697989879207758, iteration: 272878
loss: 0.9903803467750549,grad_norm: 0.8354736567247031, iteration: 272879
loss: 1.0152596235275269,grad_norm: 0.7740121484721711, iteration: 272880
loss: 0.9744752645492554,grad_norm: 0.9056639446511937, iteration: 272881
loss: 0.9856628179550171,grad_norm: 0.7283839799449272, iteration: 272882
loss: 0.9913445115089417,grad_norm: 0.8566655791309409, iteration: 272883
loss: 0.9678610563278198,grad_norm: 0.8576058589443629, iteration: 272884
loss: 1.0316005945205688,grad_norm: 0.9999999184965697, iteration: 272885
loss: 0.9981825947761536,grad_norm: 0.9999991343214003, iteration: 272886
loss: 1.0416680574417114,grad_norm: 0.9383055699352804, iteration: 272887
loss: 0.9627472758293152,grad_norm: 0.9125456383717447, iteration: 272888
loss: 0.9759914875030518,grad_norm: 0.8712575564275249, iteration: 272889
loss: 1.0003924369812012,grad_norm: 0.9444966155735626, iteration: 272890
loss: 1.0813568830490112,grad_norm: 0.9999991748460356, iteration: 272891
loss: 0.9736508131027222,grad_norm: 0.835089943127484, iteration: 272892
loss: 1.000274419784546,grad_norm: 0.8221845041738177, iteration: 272893
loss: 0.9751781225204468,grad_norm: 0.7534908516622083, iteration: 272894
loss: 0.9841025471687317,grad_norm: 0.8764207463434838, iteration: 272895
loss: 0.997424840927124,grad_norm: 0.8366351401492803, iteration: 272896
loss: 0.9833852648735046,grad_norm: 0.9999990597010933, iteration: 272897
loss: 0.9862251281738281,grad_norm: 0.9999996900166679, iteration: 272898
loss: 1.039717435836792,grad_norm: 0.9324393864060289, iteration: 272899
loss: 0.9783223867416382,grad_norm: 0.76882795035105, iteration: 272900
loss: 0.9870172142982483,grad_norm: 0.8566477018174233, iteration: 272901
loss: 0.9635855555534363,grad_norm: 0.918236353831136, iteration: 272902
loss: 0.9686476588249207,grad_norm: 0.9985749270778005, iteration: 272903
loss: 0.9969146251678467,grad_norm: 0.7880066201218126, iteration: 272904
loss: 0.9875221252441406,grad_norm: 0.8365611698294414, iteration: 272905
loss: 1.0069830417633057,grad_norm: 0.8596491361909702, iteration: 272906
loss: 1.020420789718628,grad_norm: 0.8902998669762788, iteration: 272907
loss: 1.019083857536316,grad_norm: 0.9633418674002892, iteration: 272908
loss: 1.019363284111023,grad_norm: 0.9999998018076882, iteration: 272909
loss: 0.9695408940315247,grad_norm: 0.8144845778177716, iteration: 272910
loss: 1.0051571130752563,grad_norm: 0.8169520988887898, iteration: 272911
loss: 0.9868674278259277,grad_norm: 0.8332966311506892, iteration: 272912
loss: 0.9894883036613464,grad_norm: 0.9652553172485654, iteration: 272913
loss: 0.9921345710754395,grad_norm: 0.8484155713666174, iteration: 272914
loss: 0.9587621688842773,grad_norm: 0.8691788599103407, iteration: 272915
loss: 1.0138063430786133,grad_norm: 0.9999990316837046, iteration: 272916
loss: 0.9832208156585693,grad_norm: 0.9106515675005685, iteration: 272917
loss: 0.9901509881019592,grad_norm: 0.8602826268685249, iteration: 272918
loss: 0.995373010635376,grad_norm: 0.8307522904659427, iteration: 272919
loss: 0.9660835862159729,grad_norm: 0.8827266845482944, iteration: 272920
loss: 1.0039575099945068,grad_norm: 0.6771313518071638, iteration: 272921
loss: 1.0087109804153442,grad_norm: 0.995133596048322, iteration: 272922
loss: 0.9851007461547852,grad_norm: 0.8938354235180297, iteration: 272923
loss: 0.9641293883323669,grad_norm: 0.9813691094300285, iteration: 272924
loss: 0.9490538835525513,grad_norm: 0.8817893728286333, iteration: 272925
loss: 1.0030497312545776,grad_norm: 0.8117629392454406, iteration: 272926
loss: 1.0061415433883667,grad_norm: 0.952219714803562, iteration: 272927
loss: 1.038676142692566,grad_norm: 0.7911919009789948, iteration: 272928
loss: 1.000646710395813,grad_norm: 0.8250626734608983, iteration: 272929
loss: 1.011100172996521,grad_norm: 0.8684417676062886, iteration: 272930
loss: 1.022524356842041,grad_norm: 0.9951750180046174, iteration: 272931
loss: 0.9934611320495605,grad_norm: 0.7316969474682119, iteration: 272932
loss: 0.9986919164657593,grad_norm: 0.7623001356956464, iteration: 272933
loss: 1.024376392364502,grad_norm: 0.9423358534155067, iteration: 272934
loss: 1.0068930387496948,grad_norm: 0.9999992869822981, iteration: 272935
loss: 1.0100833177566528,grad_norm: 0.8479113106401924, iteration: 272936
loss: 1.0093371868133545,grad_norm: 0.9999991431864025, iteration: 272937
loss: 0.9561275243759155,grad_norm: 0.9326451631863613, iteration: 272938
loss: 1.0078648328781128,grad_norm: 0.9168145630553497, iteration: 272939
loss: 0.9980583786964417,grad_norm: 0.9186856292719601, iteration: 272940
loss: 1.0342211723327637,grad_norm: 0.8325494940773938, iteration: 272941
loss: 0.9982649087905884,grad_norm: 0.9999992291752348, iteration: 272942
loss: 1.016605019569397,grad_norm: 0.7985934241743305, iteration: 272943
loss: 0.9926538467407227,grad_norm: 0.9999991318145298, iteration: 272944
loss: 1.0158336162567139,grad_norm: 0.885976167847285, iteration: 272945
loss: 0.9723373055458069,grad_norm: 0.7583620565573908, iteration: 272946
loss: 0.9649076461791992,grad_norm: 0.811495506530552, iteration: 272947
loss: 0.9598302841186523,grad_norm: 0.8055790123905957, iteration: 272948
loss: 0.9776926636695862,grad_norm: 0.8861959056130283, iteration: 272949
loss: 1.0218154191970825,grad_norm: 0.8340407945452767, iteration: 272950
loss: 1.0010586977005005,grad_norm: 0.9475881451341763, iteration: 272951
loss: 1.0366475582122803,grad_norm: 0.7855425904173214, iteration: 272952
loss: 0.9759740233421326,grad_norm: 0.9802841877394997, iteration: 272953
loss: 1.057176113128662,grad_norm: 0.9999998597197466, iteration: 272954
loss: 0.9546855688095093,grad_norm: 0.9999992528666574, iteration: 272955
loss: 1.0371860265731812,grad_norm: 0.9999994017828622, iteration: 272956
loss: 1.1145908832550049,grad_norm: 0.9999991545517596, iteration: 272957
loss: 0.9838698506355286,grad_norm: 0.9400350619428701, iteration: 272958
loss: 0.9945100545883179,grad_norm: 0.8678482005692445, iteration: 272959
loss: 0.9571332931518555,grad_norm: 0.9190896498295515, iteration: 272960
loss: 1.073561668395996,grad_norm: 0.9999992658014019, iteration: 272961
loss: 1.015510082244873,grad_norm: 0.8759249422594984, iteration: 272962
loss: 1.0067836046218872,grad_norm: 0.9664981023709428, iteration: 272963
loss: 0.989302396774292,grad_norm: 0.9999990446101876, iteration: 272964
loss: 1.002199649810791,grad_norm: 0.9969942126150815, iteration: 272965
loss: 1.0912055969238281,grad_norm: 0.9999991228048927, iteration: 272966
loss: 1.0136271715164185,grad_norm: 0.9836042737804714, iteration: 272967
loss: 1.0109777450561523,grad_norm: 0.9412545617148629, iteration: 272968
loss: 1.0100055932998657,grad_norm: 0.8851586248816147, iteration: 272969
loss: 1.0189169645309448,grad_norm: 0.8091910416868779, iteration: 272970
loss: 0.9949119091033936,grad_norm: 0.7976453981399159, iteration: 272971
loss: 0.991176426410675,grad_norm: 0.9254171730825527, iteration: 272972
loss: 0.9716847538948059,grad_norm: 0.9081168706874899, iteration: 272973
loss: 1.0067237615585327,grad_norm: 0.8798412685468758, iteration: 272974
loss: 0.9542889595031738,grad_norm: 0.9620859526080403, iteration: 272975
loss: 0.9805587530136108,grad_norm: 0.798845615101348, iteration: 272976
loss: 1.029759168624878,grad_norm: 0.9999990933024996, iteration: 272977
loss: 0.9794309735298157,grad_norm: 0.8969136241330015, iteration: 272978
loss: 0.986003041267395,grad_norm: 0.9999991557251219, iteration: 272979
loss: 1.0165733098983765,grad_norm: 0.9024265793088347, iteration: 272980
loss: 1.0494163036346436,grad_norm: 0.9999992165663997, iteration: 272981
loss: 0.9658527970314026,grad_norm: 0.8246674591222197, iteration: 272982
loss: 0.9980666041374207,grad_norm: 0.8930705326579779, iteration: 272983
loss: 0.9985045790672302,grad_norm: 0.9818238278528043, iteration: 272984
loss: 0.9883517622947693,grad_norm: 0.9322064677713336, iteration: 272985
loss: 0.9831619262695312,grad_norm: 0.7631679631299249, iteration: 272986
loss: 1.010082483291626,grad_norm: 0.8268582428138486, iteration: 272987
loss: 0.9538632035255432,grad_norm: 0.8168509519629189, iteration: 272988
loss: 1.0024446249008179,grad_norm: 0.8982468497085756, iteration: 272989
loss: 1.0432155132293701,grad_norm: 0.9999990210788408, iteration: 272990
loss: 0.9960753917694092,grad_norm: 0.9033705441462621, iteration: 272991
loss: 1.0073493719100952,grad_norm: 0.8325407586375351, iteration: 272992
loss: 0.9999468326568604,grad_norm: 0.9999990358085797, iteration: 272993
loss: 0.9855053424835205,grad_norm: 0.6955709871951085, iteration: 272994
loss: 0.9874330163002014,grad_norm: 0.8711610474208933, iteration: 272995
loss: 0.956475555896759,grad_norm: 0.9999990191602143, iteration: 272996
loss: 0.986035168170929,grad_norm: 0.7827374713593218, iteration: 272997
loss: 1.0058480501174927,grad_norm: 0.9999990553698267, iteration: 272998
loss: 0.9969139695167542,grad_norm: 0.8420995160958061, iteration: 272999
loss: 1.000317931175232,grad_norm: 0.9359361764804405, iteration: 273000
loss: 0.9960776567459106,grad_norm: 0.9068715960316168, iteration: 273001
loss: 0.9899408221244812,grad_norm: 0.926416113176186, iteration: 273002
loss: 0.9990373849868774,grad_norm: 0.9999990399664831, iteration: 273003
loss: 1.01394522190094,grad_norm: 0.8565073661564238, iteration: 273004
loss: 1.01841402053833,grad_norm: 0.8263965476348768, iteration: 273005
loss: 0.9863725304603577,grad_norm: 0.8114780999857656, iteration: 273006
loss: 1.0006842613220215,grad_norm: 0.9374851748936259, iteration: 273007
loss: 1.013344645500183,grad_norm: 0.826134339048422, iteration: 273008
loss: 1.0026146173477173,grad_norm: 0.999704351896186, iteration: 273009
loss: 1.0211563110351562,grad_norm: 0.772666939152117, iteration: 273010
loss: 1.0075860023498535,grad_norm: 0.9999993330508069, iteration: 273011
loss: 0.9621055722236633,grad_norm: 0.8821986132962143, iteration: 273012
loss: 0.984620213508606,grad_norm: 0.793132401666463, iteration: 273013
loss: 0.9668874144554138,grad_norm: 0.8144577929676498, iteration: 273014
loss: 1.0307289361953735,grad_norm: 0.8683584713375501, iteration: 273015
loss: 1.0783981084823608,grad_norm: 0.9999998458839685, iteration: 273016
loss: 0.9782540202140808,grad_norm: 0.894263268899294, iteration: 273017
loss: 0.9820383787155151,grad_norm: 0.9999996997973376, iteration: 273018
loss: 0.9941832423210144,grad_norm: 0.8302587455841788, iteration: 273019
loss: 1.0097543001174927,grad_norm: 0.9072701343829246, iteration: 273020
loss: 0.9573554396629333,grad_norm: 0.8970034705535846, iteration: 273021
loss: 0.974250853061676,grad_norm: 0.7359133239069641, iteration: 273022
loss: 0.9922811985015869,grad_norm: 0.8777828135551086, iteration: 273023
loss: 0.9942145943641663,grad_norm: 0.8782776104733662, iteration: 273024
loss: 0.9825360774993896,grad_norm: 0.8907669024274594, iteration: 273025
loss: 1.008224368095398,grad_norm: 0.9114640404065386, iteration: 273026
loss: 0.9678829908370972,grad_norm: 0.8705130231382104, iteration: 273027
loss: 1.0083481073379517,grad_norm: 0.7714188556599358, iteration: 273028
loss: 1.0181312561035156,grad_norm: 0.905684120580038, iteration: 273029
loss: 0.9914385080337524,grad_norm: 0.9813207067793104, iteration: 273030
loss: 1.0502067804336548,grad_norm: 0.9458850368036633, iteration: 273031
loss: 0.9861224889755249,grad_norm: 0.9643343256724695, iteration: 273032
loss: 0.9962087869644165,grad_norm: 0.9103356366128181, iteration: 273033
loss: 1.011372685432434,grad_norm: 0.9999991246022835, iteration: 273034
loss: 1.0085670948028564,grad_norm: 0.9143658880508628, iteration: 273035
loss: 1.057084321975708,grad_norm: 0.9999991286501009, iteration: 273036
loss: 1.0415910482406616,grad_norm: 0.7565341310387581, iteration: 273037
loss: 0.9891055226325989,grad_norm: 0.9999990727507649, iteration: 273038
loss: 0.9918494820594788,grad_norm: 0.8188871514899206, iteration: 273039
loss: 1.030439019203186,grad_norm: 0.8513648877786292, iteration: 273040
loss: 1.021144986152649,grad_norm: 0.8346542457918467, iteration: 273041
loss: 0.9748868346214294,grad_norm: 0.8243694002814332, iteration: 273042
loss: 1.0086392164230347,grad_norm: 0.8334350416631294, iteration: 273043
loss: 0.999745786190033,grad_norm: 0.9164746479741824, iteration: 273044
loss: 1.0184955596923828,grad_norm: 0.9777735775407861, iteration: 273045
loss: 0.962942361831665,grad_norm: 0.9284293010399373, iteration: 273046
loss: 1.0374852418899536,grad_norm: 0.9999997898063088, iteration: 273047
loss: 0.9577453136444092,grad_norm: 0.8454285027864072, iteration: 273048
loss: 0.992651641368866,grad_norm: 0.7762234279635729, iteration: 273049
loss: 1.0003870725631714,grad_norm: 0.9435537367958001, iteration: 273050
loss: 1.0020277500152588,grad_norm: 0.9447999706001574, iteration: 273051
loss: 1.09888756275177,grad_norm: 0.9999990713370331, iteration: 273052
loss: 0.9893335103988647,grad_norm: 0.999999176414087, iteration: 273053
loss: 1.0377317667007446,grad_norm: 0.8475132477112454, iteration: 273054
loss: 1.0198490619659424,grad_norm: 0.9464140160367333, iteration: 273055
loss: 1.0072450637817383,grad_norm: 0.9999990056072405, iteration: 273056
loss: 1.0226904153823853,grad_norm: 0.8990433912693221, iteration: 273057
loss: 0.9805060029029846,grad_norm: 0.8246216298346333, iteration: 273058
loss: 0.987189769744873,grad_norm: 0.8758438139486775, iteration: 273059
loss: 1.048689603805542,grad_norm: 0.9999995335597149, iteration: 273060
loss: 1.024623155593872,grad_norm: 0.999999269042552, iteration: 273061
loss: 1.0222151279449463,grad_norm: 0.9999993533320181, iteration: 273062
loss: 1.0521219968795776,grad_norm: 0.7533554993852997, iteration: 273063
loss: 0.9782164692878723,grad_norm: 0.8484623586690934, iteration: 273064
loss: 0.9606685638427734,grad_norm: 0.9442956226616233, iteration: 273065
loss: 0.997162401676178,grad_norm: 0.8567268171924731, iteration: 273066
loss: 1.0064184665679932,grad_norm: 0.7851494463722122, iteration: 273067
loss: 0.9915758371353149,grad_norm: 0.9999990367588452, iteration: 273068
loss: 1.0056064128875732,grad_norm: 0.9026337218354448, iteration: 273069
loss: 0.9941620826721191,grad_norm: 0.7988934725390812, iteration: 273070
loss: 0.987467348575592,grad_norm: 0.9999992613846117, iteration: 273071
loss: 0.9910285472869873,grad_norm: 0.8838875046854914, iteration: 273072
loss: 1.0187647342681885,grad_norm: 0.8426225645563186, iteration: 273073
loss: 0.9849202632904053,grad_norm: 0.8244969099027599, iteration: 273074
loss: 0.9704841375350952,grad_norm: 0.9911285153919893, iteration: 273075
loss: 0.991430401802063,grad_norm: 0.9999995191647663, iteration: 273076
loss: 1.0061312913894653,grad_norm: 0.9999995575929401, iteration: 273077
loss: 0.9767996072769165,grad_norm: 0.8169763245860698, iteration: 273078
loss: 0.9994966387748718,grad_norm: 0.9160193396897056, iteration: 273079
loss: 1.0118050575256348,grad_norm: 0.6842126899509245, iteration: 273080
loss: 0.9783010482788086,grad_norm: 0.8240308171690882, iteration: 273081
loss: 1.0072517395019531,grad_norm: 0.8214024050898872, iteration: 273082
loss: 0.9823340177536011,grad_norm: 0.9941799117081571, iteration: 273083
loss: 0.9866147041320801,grad_norm: 0.897310289449892, iteration: 273084
loss: 0.9853346347808838,grad_norm: 0.8880465957042786, iteration: 273085
loss: 1.0094308853149414,grad_norm: 0.7887251173045686, iteration: 273086
loss: 0.9890711307525635,grad_norm: 0.8792933806695766, iteration: 273087
loss: 1.001233458518982,grad_norm: 0.8433909021341495, iteration: 273088
loss: 0.9564001560211182,grad_norm: 0.9999992349951953, iteration: 273089
loss: 1.012511968612671,grad_norm: 0.7541208622262056, iteration: 273090
loss: 0.9823904037475586,grad_norm: 0.8131799954482588, iteration: 273091
loss: 1.0182456970214844,grad_norm: 0.7798081932313697, iteration: 273092
loss: 1.0053296089172363,grad_norm: 0.9999991309963829, iteration: 273093
loss: 1.0507547855377197,grad_norm: 0.9999998230123262, iteration: 273094
loss: 0.979983389377594,grad_norm: 0.835229381056152, iteration: 273095
loss: 0.9946661591529846,grad_norm: 0.8737644924385296, iteration: 273096
loss: 1.0175668001174927,grad_norm: 0.9999994932812412, iteration: 273097
loss: 0.996263325214386,grad_norm: 0.9999991271578405, iteration: 273098
loss: 0.9767221808433533,grad_norm: 0.9717960319348433, iteration: 273099
loss: 0.9714825749397278,grad_norm: 0.979894078670247, iteration: 273100
loss: 0.9993686079978943,grad_norm: 0.999999246796776, iteration: 273101
loss: 1.019921064376831,grad_norm: 0.9172684424660706, iteration: 273102
loss: 1.0017656087875366,grad_norm: 0.9702149377423794, iteration: 273103
loss: 1.03908109664917,grad_norm: 0.8236438297518252, iteration: 273104
loss: 1.0975366830825806,grad_norm: 0.9999990797978713, iteration: 273105
loss: 1.0405536890029907,grad_norm: 0.8786696454408153, iteration: 273106
loss: 1.0303856134414673,grad_norm: 0.9569412191158685, iteration: 273107
loss: 1.0492541790008545,grad_norm: 0.9999990420279925, iteration: 273108
loss: 1.0824930667877197,grad_norm: 0.9999991138010921, iteration: 273109
loss: 0.9908508062362671,grad_norm: 0.8206978955527063, iteration: 273110
loss: 0.9902117848396301,grad_norm: 0.8208219877620041, iteration: 273111
loss: 1.0208839178085327,grad_norm: 0.7547492337294592, iteration: 273112
loss: 0.9932292699813843,grad_norm: 0.8824704037238164, iteration: 273113
loss: 1.0220247507095337,grad_norm: 0.8821550354819804, iteration: 273114
loss: 1.0060979127883911,grad_norm: 0.9999992158624772, iteration: 273115
loss: 0.9641901254653931,grad_norm: 0.9202785121287405, iteration: 273116
loss: 1.0034557580947876,grad_norm: 0.8855473520648821, iteration: 273117
loss: 1.0020941495895386,grad_norm: 0.8278195848158758, iteration: 273118
loss: 1.0098685026168823,grad_norm: 0.8381008404091645, iteration: 273119
loss: 0.9953801035881042,grad_norm: 0.9072829603747811, iteration: 273120
loss: 1.0106160640716553,grad_norm: 0.7988860432323446, iteration: 273121
loss: 0.9976597428321838,grad_norm: 0.9110251209932799, iteration: 273122
loss: 0.9921655058860779,grad_norm: 0.7047621850231441, iteration: 273123
loss: 0.9943893551826477,grad_norm: 0.7606208282476299, iteration: 273124
loss: 0.9807818531990051,grad_norm: 0.8784612688416725, iteration: 273125
loss: 1.0116214752197266,grad_norm: 0.7844684303045938, iteration: 273126
loss: 1.0189509391784668,grad_norm: 0.9976666233154101, iteration: 273127
loss: 1.0054396390914917,grad_norm: 0.9126928822461359, iteration: 273128
loss: 0.9814152717590332,grad_norm: 0.9999990880083468, iteration: 273129
loss: 0.9895114302635193,grad_norm: 0.7369093760962278, iteration: 273130
loss: 1.0898348093032837,grad_norm: 0.9999997315395147, iteration: 273131
loss: 1.0195955038070679,grad_norm: 0.9999992328031627, iteration: 273132
loss: 0.9714546203613281,grad_norm: 0.9967092197442675, iteration: 273133
loss: 0.9794078469276428,grad_norm: 0.7626285449432779, iteration: 273134
loss: 1.0098297595977783,grad_norm: 0.8795141371449677, iteration: 273135
loss: 0.9743272662162781,grad_norm: 0.9999991921299807, iteration: 273136
loss: 1.02133309841156,grad_norm: 0.818334990460175, iteration: 273137
loss: 1.0232664346694946,grad_norm: 0.8348653645776788, iteration: 273138
loss: 1.0015203952789307,grad_norm: 0.9735232833777261, iteration: 273139
loss: 0.9770082831382751,grad_norm: 0.8738863698209899, iteration: 273140
loss: 0.9785199165344238,grad_norm: 0.9999991482656897, iteration: 273141
loss: 0.9863279461860657,grad_norm: 0.8545740422538219, iteration: 273142
loss: 0.9813845753669739,grad_norm: 0.8032448210154619, iteration: 273143
loss: 0.9914097189903259,grad_norm: 0.8841374898036443, iteration: 273144
loss: 1.0054680109024048,grad_norm: 0.898017657255992, iteration: 273145
loss: 1.0174870491027832,grad_norm: 0.9999991660221303, iteration: 273146
loss: 0.9560362100601196,grad_norm: 0.8760685529315101, iteration: 273147
loss: 1.225783348083496,grad_norm: 0.9999993271033765, iteration: 273148
loss: 1.086794137954712,grad_norm: 0.9999996004629788, iteration: 273149
loss: 1.0222136974334717,grad_norm: 0.8178097712835171, iteration: 273150
loss: 1.0201141834259033,grad_norm: 0.8573790200360207, iteration: 273151
loss: 0.992267370223999,grad_norm: 0.8693426114287459, iteration: 273152
loss: 1.0672721862792969,grad_norm: 0.8148388131874846, iteration: 273153
loss: 1.1051506996154785,grad_norm: 0.9999991052624481, iteration: 273154
loss: 1.0787854194641113,grad_norm: 0.999999266048399, iteration: 273155
loss: 0.9993601441383362,grad_norm: 0.9079151131055957, iteration: 273156
loss: 1.0532703399658203,grad_norm: 0.9246574181624887, iteration: 273157
loss: 0.9690617322921753,grad_norm: 0.8653899002018635, iteration: 273158
loss: 1.020706057548523,grad_norm: 0.7874762997377924, iteration: 273159
loss: 1.0025964975357056,grad_norm: 0.7720915529401771, iteration: 273160
loss: 0.978604793548584,grad_norm: 0.9999991664711928, iteration: 273161
loss: 0.9684103727340698,grad_norm: 0.8807876639128842, iteration: 273162
loss: 1.0101592540740967,grad_norm: 0.8574445029415184, iteration: 273163
loss: 0.9737445712089539,grad_norm: 0.8880304633361071, iteration: 273164
loss: 1.0420924425125122,grad_norm: 0.9999990442421339, iteration: 273165
loss: 0.9399953484535217,grad_norm: 0.847292928957328, iteration: 273166
loss: 1.18454909324646,grad_norm: 0.9917921367356732, iteration: 273167
loss: 1.0156975984573364,grad_norm: 0.9999997420860985, iteration: 273168
loss: 1.00321364402771,grad_norm: 0.9329217703798709, iteration: 273169
loss: 0.9877943396568298,grad_norm: 0.8280415570595347, iteration: 273170
loss: 1.1292222738265991,grad_norm: 0.9999995975741413, iteration: 273171
loss: 0.9631093144416809,grad_norm: 0.8184079148807752, iteration: 273172
loss: 1.0152968168258667,grad_norm: 0.9416452629466546, iteration: 273173
loss: 1.1743640899658203,grad_norm: 0.9999996410629345, iteration: 273174
loss: 1.0637588500976562,grad_norm: 0.9999992845692582, iteration: 273175
loss: 1.0742383003234863,grad_norm: 0.9045367795825312, iteration: 273176
loss: 0.9609359502792358,grad_norm: 0.8722344954988978, iteration: 273177
loss: 1.0055949687957764,grad_norm: 0.9494375501399624, iteration: 273178
loss: 1.002657413482666,grad_norm: 0.90785750923023, iteration: 273179
loss: 0.9800437688827515,grad_norm: 0.6774221553284518, iteration: 273180
loss: 1.0138486623764038,grad_norm: 0.9999991038971413, iteration: 273181
loss: 1.022111177444458,grad_norm: 0.8025791580213929, iteration: 273182
loss: 0.9961369633674622,grad_norm: 0.6671226731843196, iteration: 273183
loss: 1.0145800113677979,grad_norm: 0.9999998934265956, iteration: 273184
loss: 1.0294179916381836,grad_norm: 0.9054304353798536, iteration: 273185
loss: 0.9975550174713135,grad_norm: 0.8730477166865754, iteration: 273186
loss: 1.0210151672363281,grad_norm: 0.9999990848394026, iteration: 273187
loss: 1.007602334022522,grad_norm: 0.8674919032423685, iteration: 273188
loss: 1.002240777015686,grad_norm: 0.7261370474963106, iteration: 273189
loss: 1.0069488286972046,grad_norm: 0.9999993136607904, iteration: 273190
loss: 1.0012677907943726,grad_norm: 0.9999991923516306, iteration: 273191
loss: 1.020107626914978,grad_norm: 0.8018953984758834, iteration: 273192
loss: 1.0382791757583618,grad_norm: 0.9999996642220245, iteration: 273193
loss: 0.9630252718925476,grad_norm: 0.9999990114585996, iteration: 273194
loss: 1.077294945716858,grad_norm: 0.9999992626670394, iteration: 273195
loss: 0.9971598386764526,grad_norm: 0.8031292404179333, iteration: 273196
loss: 1.0140045881271362,grad_norm: 0.9999990345546578, iteration: 273197
loss: 0.9927219152450562,grad_norm: 0.8549851061664342, iteration: 273198
loss: 1.0131293535232544,grad_norm: 0.7970636433756719, iteration: 273199
loss: 1.0294990539550781,grad_norm: 0.828240568063158, iteration: 273200
loss: 0.9821125268936157,grad_norm: 0.9999993078244888, iteration: 273201
loss: 0.9558948278427124,grad_norm: 0.8172070069775792, iteration: 273202
loss: 1.0097723007202148,grad_norm: 0.7985012466873719, iteration: 273203
loss: 0.997306227684021,grad_norm: 0.7660120239857171, iteration: 273204
loss: 0.9623517990112305,grad_norm: 0.9999993671916394, iteration: 273205
loss: 0.9931681156158447,grad_norm: 0.9999997680711488, iteration: 273206
loss: 1.0026453733444214,grad_norm: 0.9350042936705542, iteration: 273207
loss: 0.9691097140312195,grad_norm: 0.9999992862487539, iteration: 273208
loss: 0.976715087890625,grad_norm: 0.7944791158786745, iteration: 273209
loss: 1.012105941772461,grad_norm: 0.7655141635306483, iteration: 273210
loss: 0.94242924451828,grad_norm: 0.7998933931454871, iteration: 273211
loss: 0.9743475317955017,grad_norm: 0.9426106619438372, iteration: 273212
loss: 1.0390379428863525,grad_norm: 0.9999995377575737, iteration: 273213
loss: 1.083618402481079,grad_norm: 0.999998985171927, iteration: 273214
loss: 1.0157233476638794,grad_norm: 0.910420439532104, iteration: 273215
loss: 1.0197292566299438,grad_norm: 0.9960972085876263, iteration: 273216
loss: 1.028383731842041,grad_norm: 0.9999997970640393, iteration: 273217
loss: 1.025701642036438,grad_norm: 0.9999992039384207, iteration: 273218
loss: 0.9966575503349304,grad_norm: 0.826802772227193, iteration: 273219
loss: 1.0223822593688965,grad_norm: 0.679899879532149, iteration: 273220
loss: 0.9629305005073547,grad_norm: 0.9234444362627313, iteration: 273221
loss: 0.9839091897010803,grad_norm: 0.7458447025947337, iteration: 273222
loss: 1.0225099325180054,grad_norm: 0.9999991122851991, iteration: 273223
loss: 1.1026901006698608,grad_norm: 0.9999990526766354, iteration: 273224
loss: 0.9970765113830566,grad_norm: 0.7792880402330474, iteration: 273225
loss: 1.0326374769210815,grad_norm: 0.8245367754768048, iteration: 273226
loss: 1.0214557647705078,grad_norm: 0.7532558721654307, iteration: 273227
loss: 1.0152777433395386,grad_norm: 0.9999997422953288, iteration: 273228
loss: 1.0178875923156738,grad_norm: 0.9999991109187424, iteration: 273229
loss: 1.0083144903182983,grad_norm: 0.8658106362947902, iteration: 273230
loss: 1.0458177328109741,grad_norm: 0.9848076066273587, iteration: 273231
loss: 0.9691739678382874,grad_norm: 0.810972418444625, iteration: 273232
loss: 1.025132656097412,grad_norm: 0.7331544855766069, iteration: 273233
loss: 0.9845877885818481,grad_norm: 0.7711971204819555, iteration: 273234
loss: 1.0041037797927856,grad_norm: 0.7679796091991289, iteration: 273235
loss: 0.9969151020050049,grad_norm: 0.9285750331719562, iteration: 273236
loss: 1.0023093223571777,grad_norm: 0.9864414157621069, iteration: 273237
loss: 0.981503963470459,grad_norm: 0.7772783223881473, iteration: 273238
loss: 0.9803623557090759,grad_norm: 0.7503122085039649, iteration: 273239
loss: 1.0067169666290283,grad_norm: 0.9999998367268991, iteration: 273240
loss: 1.0333240032196045,grad_norm: 0.7146145143636176, iteration: 273241
loss: 0.9613537192344666,grad_norm: 0.949707252105208, iteration: 273242
loss: 0.9930107593536377,grad_norm: 0.8347332635682431, iteration: 273243
loss: 1.0315983295440674,grad_norm: 0.7230230975389103, iteration: 273244
loss: 0.9946010708808899,grad_norm: 0.808618458671067, iteration: 273245
loss: 1.0399671792984009,grad_norm: 0.784762054099095, iteration: 273246
loss: 0.9852333664894104,grad_norm: 0.9010246693884185, iteration: 273247
loss: 1.0720164775848389,grad_norm: 0.8820770171898278, iteration: 273248
loss: 0.9629896283149719,grad_norm: 0.776353231703611, iteration: 273249
loss: 1.029568076133728,grad_norm: 0.8715127517986041, iteration: 273250
loss: 1.053004503250122,grad_norm: 0.9082460789664117, iteration: 273251
loss: 1.0438082218170166,grad_norm: 0.9999994506766706, iteration: 273252
loss: 0.9613929986953735,grad_norm: 0.7968511651843976, iteration: 273253
loss: 1.0063692331314087,grad_norm: 0.999999129203943, iteration: 273254
loss: 0.9834651350975037,grad_norm: 0.809175346004058, iteration: 273255
loss: 1.0219208002090454,grad_norm: 0.9999992990241855, iteration: 273256
loss: 1.0178686380386353,grad_norm: 0.7964883760534145, iteration: 273257
loss: 1.010514497756958,grad_norm: 0.8637031644948547, iteration: 273258
loss: 1.0350370407104492,grad_norm: 0.9999999667160265, iteration: 273259
loss: 1.014415979385376,grad_norm: 0.9999991450719441, iteration: 273260
loss: 1.0260541439056396,grad_norm: 0.9782766908258194, iteration: 273261
loss: 0.9941055178642273,grad_norm: 0.8828555580053998, iteration: 273262
loss: 0.9782460927963257,grad_norm: 0.8282350437956817, iteration: 273263
loss: 0.9622299075126648,grad_norm: 0.9068851930922273, iteration: 273264
loss: 1.037955403327942,grad_norm: 0.8099601522068124, iteration: 273265
loss: 1.0026558637619019,grad_norm: 0.9970715094147626, iteration: 273266
loss: 1.016330599784851,grad_norm: 0.8131430962440471, iteration: 273267
loss: 0.9872334599494934,grad_norm: 0.8031499408213765, iteration: 273268
loss: 1.0114367008209229,grad_norm: 0.9005029391483187, iteration: 273269
loss: 1.0354478359222412,grad_norm: 0.9245556679934809, iteration: 273270
loss: 0.9836129546165466,grad_norm: 0.9999992179625112, iteration: 273271
loss: 1.1472835540771484,grad_norm: 0.999999870870738, iteration: 273272
loss: 0.9993391036987305,grad_norm: 0.981162997012062, iteration: 273273
loss: 1.0256946086883545,grad_norm: 0.9261125404430367, iteration: 273274
loss: 1.0042107105255127,grad_norm: 0.9628480130206963, iteration: 273275
loss: 0.9601935744285583,grad_norm: 0.9999989120413445, iteration: 273276
loss: 1.0099129676818848,grad_norm: 0.9402465529240048, iteration: 273277
loss: 1.0142847299575806,grad_norm: 0.8905764670759483, iteration: 273278
loss: 1.0231575965881348,grad_norm: 0.8443553323250198, iteration: 273279
loss: 0.9789955615997314,grad_norm: 0.9277523548786096, iteration: 273280
loss: 1.0486823320388794,grad_norm: 0.9999992473771145, iteration: 273281
loss: 1.0235657691955566,grad_norm: 0.6751817349742817, iteration: 273282
loss: 1.0081746578216553,grad_norm: 0.8466862283771922, iteration: 273283
loss: 1.006068229675293,grad_norm: 0.8416115450459294, iteration: 273284
loss: 0.992911159992218,grad_norm: 0.8480107410543607, iteration: 273285
loss: 1.011747121810913,grad_norm: 0.806493893541675, iteration: 273286
loss: 1.0293397903442383,grad_norm: 0.8570385368988397, iteration: 273287
loss: 0.9797603487968445,grad_norm: 0.90457953548224, iteration: 273288
loss: 1.0428509712219238,grad_norm: 0.9460633794767465, iteration: 273289
loss: 0.9997978806495667,grad_norm: 0.9999993528063572, iteration: 273290
loss: 0.9983362555503845,grad_norm: 0.9791371059385238, iteration: 273291
loss: 0.9871465563774109,grad_norm: 0.924838474024162, iteration: 273292
loss: 1.0302586555480957,grad_norm: 0.9380213901992864, iteration: 273293
loss: 0.9829282164573669,grad_norm: 0.9999997866002533, iteration: 273294
loss: 1.0029462575912476,grad_norm: 0.9094154558774055, iteration: 273295
loss: 0.9681337475776672,grad_norm: 0.9989000075531059, iteration: 273296
loss: 0.9632771015167236,grad_norm: 0.9666817829267971, iteration: 273297
loss: 0.9782567620277405,grad_norm: 0.9243464068024801, iteration: 273298
loss: 1.1524226665496826,grad_norm: 0.9999996730251663, iteration: 273299
loss: 1.0066906213760376,grad_norm: 0.9240280247412292, iteration: 273300
loss: 1.0044413805007935,grad_norm: 0.7713900170543811, iteration: 273301
loss: 0.9749906063079834,grad_norm: 0.9174593449952669, iteration: 273302
loss: 0.9792482852935791,grad_norm: 0.8889945293389158, iteration: 273303
loss: 0.9586367607116699,grad_norm: 0.9999991084340856, iteration: 273304
loss: 1.0248761177062988,grad_norm: 0.9335897571931632, iteration: 273305
loss: 0.9836645722389221,grad_norm: 0.9840828100551245, iteration: 273306
loss: 1.0161241292953491,grad_norm: 0.8535421476398709, iteration: 273307
loss: 0.9616158604621887,grad_norm: 0.9249223943593647, iteration: 273308
loss: 0.973400354385376,grad_norm: 0.8869469046307606, iteration: 273309
loss: 1.0227272510528564,grad_norm: 0.988439273963227, iteration: 273310
loss: 0.9949641227722168,grad_norm: 0.9212322921290308, iteration: 273311
loss: 1.0592458248138428,grad_norm: 0.9999994454866737, iteration: 273312
loss: 0.9978272914886475,grad_norm: 0.7913440220588772, iteration: 273313
loss: 0.9903993010520935,grad_norm: 0.877577777738265, iteration: 273314
loss: 0.967282235622406,grad_norm: 0.7712150449775519, iteration: 273315
loss: 0.9905651211738586,grad_norm: 0.8624912950855096, iteration: 273316
loss: 1.0406335592269897,grad_norm: 0.8651020125376745, iteration: 273317
loss: 0.9823799133300781,grad_norm: 0.9108814029113855, iteration: 273318
loss: 0.999845564365387,grad_norm: 0.8012020977410591, iteration: 273319
loss: 0.983717679977417,grad_norm: 0.9595482496333664, iteration: 273320
loss: 1.011123538017273,grad_norm: 0.9999992566251921, iteration: 273321
loss: 0.9884546399116516,grad_norm: 0.8611445174412802, iteration: 273322
loss: 1.0038384199142456,grad_norm: 0.8118967450526057, iteration: 273323
loss: 1.0174658298492432,grad_norm: 0.9283784314482341, iteration: 273324
loss: 1.0390704870224,grad_norm: 0.9999991919609428, iteration: 273325
loss: 1.009186863899231,grad_norm: 0.8307125121236804, iteration: 273326
loss: 1.0482971668243408,grad_norm: 0.850873717854893, iteration: 273327
loss: 0.989573061466217,grad_norm: 0.9587308880888644, iteration: 273328
loss: 1.0170631408691406,grad_norm: 0.8502110639560072, iteration: 273329
loss: 1.008873701095581,grad_norm: 0.9779775786119375, iteration: 273330
loss: 0.9935172200202942,grad_norm: 0.8463932974173151, iteration: 273331
loss: 1.06566321849823,grad_norm: 0.999999053758249, iteration: 273332
loss: 0.9930781126022339,grad_norm: 0.9999991322939258, iteration: 273333
loss: 0.9992261528968811,grad_norm: 0.915565561713352, iteration: 273334
loss: 0.9848170280456543,grad_norm: 0.9999993842250489, iteration: 273335
loss: 1.033839464187622,grad_norm: 0.9999996439937332, iteration: 273336
loss: 1.0435892343521118,grad_norm: 0.876627577137634, iteration: 273337
loss: 0.9540072679519653,grad_norm: 0.9471548297743074, iteration: 273338
loss: 1.0028105974197388,grad_norm: 0.8995891695504344, iteration: 273339
loss: 1.0267646312713623,grad_norm: 0.7956613960683577, iteration: 273340
loss: 0.9953858852386475,grad_norm: 0.8595848941624741, iteration: 273341
loss: 0.989997923374176,grad_norm: 0.9008401201376524, iteration: 273342
loss: 1.0456650257110596,grad_norm: 0.999999413008934, iteration: 273343
loss: 0.9946766495704651,grad_norm: 0.903337396920934, iteration: 273344
loss: 1.0151827335357666,grad_norm: 0.9327111148473685, iteration: 273345
loss: 1.010948657989502,grad_norm: 0.999998949592531, iteration: 273346
loss: 0.9912904500961304,grad_norm: 0.7209530317519572, iteration: 273347
loss: 0.9757177233695984,grad_norm: 0.9999990270069384, iteration: 273348
loss: 0.9816873073577881,grad_norm: 0.9654029198753974, iteration: 273349
loss: 0.9876505136489868,grad_norm: 0.7349646437599129, iteration: 273350
loss: 1.0193015336990356,grad_norm: 0.994203916112063, iteration: 273351
loss: 1.005297064781189,grad_norm: 0.8206844390473034, iteration: 273352
loss: 0.9986830949783325,grad_norm: 0.7860531349563045, iteration: 273353
loss: 1.036198377609253,grad_norm: 0.9999992062560252, iteration: 273354
loss: 0.9976840615272522,grad_norm: 0.8920189726265749, iteration: 273355
loss: 1.0230402946472168,grad_norm: 0.8449568102332549, iteration: 273356
loss: 0.9793557524681091,grad_norm: 0.9999991211561399, iteration: 273357
loss: 0.9915199875831604,grad_norm: 0.8912651036975048, iteration: 273358
loss: 1.026018738746643,grad_norm: 0.8398616937274194, iteration: 273359
loss: 0.9610809087753296,grad_norm: 0.9746640315794305, iteration: 273360
loss: 1.0129543542861938,grad_norm: 0.9999991037678849, iteration: 273361
loss: 1.0022777318954468,grad_norm: 0.7633510223671338, iteration: 273362
loss: 1.0106611251831055,grad_norm: 0.9933565491249823, iteration: 273363
loss: 0.9533488154411316,grad_norm: 0.8429736642621039, iteration: 273364
loss: 0.9885146021842957,grad_norm: 0.9321811764082962, iteration: 273365
loss: 0.9849002957344055,grad_norm: 0.9999992765643106, iteration: 273366
loss: 0.9885824918746948,grad_norm: 0.9349353613634727, iteration: 273367
loss: 0.9745343327522278,grad_norm: 0.9520999188712208, iteration: 273368
loss: 1.0085690021514893,grad_norm: 0.9789543841722309, iteration: 273369
loss: 1.0028271675109863,grad_norm: 0.9160069314364828, iteration: 273370
loss: 0.9947837591171265,grad_norm: 0.8334947035753644, iteration: 273371
loss: 1.0164958238601685,grad_norm: 0.9999991084875758, iteration: 273372
loss: 0.9785237908363342,grad_norm: 0.7574092885637691, iteration: 273373
loss: 1.0024436712265015,grad_norm: 0.8438170957018132, iteration: 273374
loss: 1.0065124034881592,grad_norm: 0.8043486614733689, iteration: 273375
loss: 1.0017168521881104,grad_norm: 0.7897124468822522, iteration: 273376
loss: 1.0097476243972778,grad_norm: 0.8515667167991642, iteration: 273377
loss: 0.9632785320281982,grad_norm: 0.9338324848851791, iteration: 273378
loss: 1.0000033378601074,grad_norm: 0.8267271980553812, iteration: 273379
loss: 1.0433292388916016,grad_norm: 0.8618602316065779, iteration: 273380
loss: 0.9717731475830078,grad_norm: 0.9444482438088083, iteration: 273381
loss: 0.9706332087516785,grad_norm: 0.8982648175095073, iteration: 273382
loss: 0.9897974729537964,grad_norm: 0.8313023293170525, iteration: 273383
loss: 1.005713939666748,grad_norm: 0.732768186691818, iteration: 273384
loss: 1.006395936012268,grad_norm: 0.9014980900237415, iteration: 273385
loss: 0.9817121028900146,grad_norm: 0.9903501402660942, iteration: 273386
loss: 1.0097044706344604,grad_norm: 0.8019103737713794, iteration: 273387
loss: 0.9749125242233276,grad_norm: 0.914720333279593, iteration: 273388
loss: 0.9921185970306396,grad_norm: 0.8843377086204438, iteration: 273389
loss: 1.0131131410598755,grad_norm: 0.8178197512245603, iteration: 273390
loss: 0.9840726256370544,grad_norm: 0.9795229900305221, iteration: 273391
loss: 1.0136222839355469,grad_norm: 0.8868703409347801, iteration: 273392
loss: 0.9421048164367676,grad_norm: 0.9301635471511108, iteration: 273393
loss: 1.0734539031982422,grad_norm: 0.9999998698242649, iteration: 273394
loss: 0.9832088947296143,grad_norm: 0.8314613681080667, iteration: 273395
loss: 1.5309338569641113,grad_norm: 1.0000000618165499, iteration: 273396
loss: 0.9860246777534485,grad_norm: 0.999999049369096, iteration: 273397
loss: 1.0390545129776,grad_norm: 0.8287807316490311, iteration: 273398
loss: 0.9947605729103088,grad_norm: 0.760325400096228, iteration: 273399
loss: 1.0312482118606567,grad_norm: 0.999999752205704, iteration: 273400
loss: 1.021651029586792,grad_norm: 0.9999995655124821, iteration: 273401
loss: 1.0533883571624756,grad_norm: 0.8852143859167899, iteration: 273402
loss: 1.052538514137268,grad_norm: 0.999999748876905, iteration: 273403
loss: 1.0109111070632935,grad_norm: 0.9496473964726337, iteration: 273404
loss: 1.1673232316970825,grad_norm: 0.9999996055015612, iteration: 273405
loss: 0.9785075783729553,grad_norm: 0.9415001873890072, iteration: 273406
loss: 0.9919764399528503,grad_norm: 0.9999992691431061, iteration: 273407
loss: 1.0086642503738403,grad_norm: 0.9333666274802452, iteration: 273408
loss: 1.0060226917266846,grad_norm: 0.9999999333435106, iteration: 273409
loss: 1.0018157958984375,grad_norm: 0.9435938000155758, iteration: 273410
loss: 1.0646787881851196,grad_norm: 0.9999993165634008, iteration: 273411
loss: 1.006415843963623,grad_norm: 0.7249361136772693, iteration: 273412
loss: 1.0239027738571167,grad_norm: 0.9053610936315478, iteration: 273413
loss: 1.0533409118652344,grad_norm: 0.9757859608943756, iteration: 273414
loss: 1.0035830736160278,grad_norm: 0.9999998001389903, iteration: 273415
loss: 0.9878355860710144,grad_norm: 0.8950997663818319, iteration: 273416
loss: 1.043569564819336,grad_norm: 0.9663702174061661, iteration: 273417
loss: 0.9803379774093628,grad_norm: 0.8305253367008735, iteration: 273418
loss: 0.9857426285743713,grad_norm: 0.9999992139540005, iteration: 273419
loss: 1.0116535425186157,grad_norm: 1.0000000855426103, iteration: 273420
loss: 1.0070254802703857,grad_norm: 0.9413210221561933, iteration: 273421
loss: 0.9932066798210144,grad_norm: 0.9999989742736802, iteration: 273422
loss: 1.036203145980835,grad_norm: 0.9765668555327475, iteration: 273423
loss: 1.0021055936813354,grad_norm: 0.8228141584380546, iteration: 273424
loss: 0.9938965439796448,grad_norm: 0.9861017719822917, iteration: 273425
loss: 1.0092929601669312,grad_norm: 0.8866578943646349, iteration: 273426
loss: 0.9880499839782715,grad_norm: 0.8320165990295376, iteration: 273427
loss: 0.9669784307479858,grad_norm: 0.6919744231322005, iteration: 273428
loss: 1.0087085962295532,grad_norm: 0.9999990793786808, iteration: 273429
loss: 1.044118046760559,grad_norm: 0.9999991676243462, iteration: 273430
loss: 0.9937407374382019,grad_norm: 0.9461841079275284, iteration: 273431
loss: 1.0032200813293457,grad_norm: 0.8078623396340316, iteration: 273432
loss: 0.9784914255142212,grad_norm: 0.8203272224644216, iteration: 273433
loss: 0.9974275827407837,grad_norm: 0.7590749802114786, iteration: 273434
loss: 0.9894419312477112,grad_norm: 0.8951113153038034, iteration: 273435
loss: 1.0081263780593872,grad_norm: 0.8616619511571966, iteration: 273436
loss: 0.9981021285057068,grad_norm: 0.7292841287969711, iteration: 273437
loss: 0.9827110171318054,grad_norm: 0.8790684970073509, iteration: 273438
loss: 1.0797394514083862,grad_norm: 0.9999991838221237, iteration: 273439
loss: 1.0011003017425537,grad_norm: 0.8692535741350673, iteration: 273440
loss: 0.966392993927002,grad_norm: 0.846007360254677, iteration: 273441
loss: 1.0102043151855469,grad_norm: 0.9123806298907553, iteration: 273442
loss: 0.9977469444274902,grad_norm: 0.726714299685779, iteration: 273443
loss: 1.0158665180206299,grad_norm: 0.8327380108306083, iteration: 273444
loss: 1.0220787525177002,grad_norm: 0.8909873061379081, iteration: 273445
loss: 1.0491410493850708,grad_norm: 0.9999998813922082, iteration: 273446
loss: 1.0090315341949463,grad_norm: 0.7347243019654659, iteration: 273447
loss: 0.9907674789428711,grad_norm: 0.8386664769015085, iteration: 273448
loss: 1.025370478630066,grad_norm: 0.9541242087824873, iteration: 273449
loss: 0.955374002456665,grad_norm: 0.7911302289113384, iteration: 273450
loss: 0.9787839651107788,grad_norm: 0.8009194622217407, iteration: 273451
loss: 1.003693699836731,grad_norm: 0.8931615135791582, iteration: 273452
loss: 1.017812728881836,grad_norm: 0.7857464384964771, iteration: 273453
loss: 0.9975301027297974,grad_norm: 0.8834884033305892, iteration: 273454
loss: 1.0248757600784302,grad_norm: 0.8456535444787284, iteration: 273455
loss: 1.0144373178482056,grad_norm: 0.7977270511199054, iteration: 273456
loss: 0.9736643433570862,grad_norm: 0.9958414673085159, iteration: 273457
loss: 1.052422285079956,grad_norm: 0.9999991691295572, iteration: 273458
loss: 1.0112202167510986,grad_norm: 0.652525573776688, iteration: 273459
loss: 0.9835759997367859,grad_norm: 0.9475808102761736, iteration: 273460
loss: 1.0200514793395996,grad_norm: 0.9438693637692296, iteration: 273461
loss: 1.0099040269851685,grad_norm: 0.999999433296978, iteration: 273462
loss: 1.0064302682876587,grad_norm: 0.9029720950991933, iteration: 273463
loss: 0.9964972138404846,grad_norm: 0.9037936788449527, iteration: 273464
loss: 1.0052000284194946,grad_norm: 0.8604151550760871, iteration: 273465
loss: 1.0012704133987427,grad_norm: 0.9160656497225768, iteration: 273466
loss: 0.9989404678344727,grad_norm: 0.7718509453358616, iteration: 273467
loss: 1.0201666355133057,grad_norm: 0.9085264752941532, iteration: 273468
loss: 1.0214359760284424,grad_norm: 0.8838477747998953, iteration: 273469
loss: 1.0054173469543457,grad_norm: 0.7998827883297902, iteration: 273470
loss: 1.068796157836914,grad_norm: 0.9999991284748567, iteration: 273471
loss: 1.004815936088562,grad_norm: 0.9999993183086815, iteration: 273472
loss: 0.9788833856582642,grad_norm: 0.7848870005140384, iteration: 273473
loss: 0.9764704704284668,grad_norm: 0.7545022195838373, iteration: 273474
loss: 1.0230382680892944,grad_norm: 0.9094856080747769, iteration: 273475
loss: 0.9998534917831421,grad_norm: 0.8293434372809371, iteration: 273476
loss: 0.9979161620140076,grad_norm: 0.9897078059763064, iteration: 273477
loss: 0.9748312830924988,grad_norm: 0.8955103859428107, iteration: 273478
loss: 1.042393684387207,grad_norm: 0.9029280374585068, iteration: 273479
loss: 1.000553846359253,grad_norm: 0.8539559610312272, iteration: 273480
loss: 1.0013645887374878,grad_norm: 0.8393840727499114, iteration: 273481
loss: 0.9933766722679138,grad_norm: 0.8011091138523299, iteration: 273482
loss: 1.0186150074005127,grad_norm: 0.7374996279205597, iteration: 273483
loss: 1.0147508382797241,grad_norm: 0.9611066815986201, iteration: 273484
loss: 1.0514360666275024,grad_norm: 0.9999999258337816, iteration: 273485
loss: 1.0044764280319214,grad_norm: 0.9888761293285668, iteration: 273486
loss: 0.9845282435417175,grad_norm: 0.9962608853731996, iteration: 273487
loss: 0.9739810824394226,grad_norm: 0.8282822224730574, iteration: 273488
loss: 1.02833092212677,grad_norm: 0.9999991545032144, iteration: 273489
loss: 0.9843781590461731,grad_norm: 0.8738571809463982, iteration: 273490
loss: 1.0092778205871582,grad_norm: 0.9473897149684214, iteration: 273491
loss: 1.0151923894882202,grad_norm: 0.7787955035738808, iteration: 273492
loss: 1.039455771446228,grad_norm: 0.8326015262991802, iteration: 273493
loss: 0.9791940450668335,grad_norm: 0.8480622449430191, iteration: 273494
loss: 0.9790450930595398,grad_norm: 0.8827494056804377, iteration: 273495
loss: 0.9940511584281921,grad_norm: 0.9999989144846024, iteration: 273496
loss: 0.9702965617179871,grad_norm: 0.8880399401240177, iteration: 273497
loss: 1.023481845855713,grad_norm: 0.946547522017716, iteration: 273498
loss: 0.9895693063735962,grad_norm: 0.8449069117899434, iteration: 273499
loss: 0.9907467365264893,grad_norm: 0.7625609240281805, iteration: 273500
loss: 1.0009851455688477,grad_norm: 0.9999997333159906, iteration: 273501
loss: 0.9648804068565369,grad_norm: 0.8270463148201865, iteration: 273502
loss: 1.0212734937667847,grad_norm: 0.9999991276642032, iteration: 273503
loss: 0.9935981631278992,grad_norm: 0.8305364130865357, iteration: 273504
loss: 1.0056369304656982,grad_norm: 0.9999991414783097, iteration: 273505
loss: 1.0519341230392456,grad_norm: 0.9999990148410224, iteration: 273506
loss: 1.0146353244781494,grad_norm: 0.8364728524936564, iteration: 273507
loss: 1.0240931510925293,grad_norm: 0.9722087910250553, iteration: 273508
loss: 1.0889389514923096,grad_norm: 0.9999991684637021, iteration: 273509
loss: 1.0186479091644287,grad_norm: 0.7672877802127914, iteration: 273510
loss: 1.0638339519500732,grad_norm: 0.9999994657930235, iteration: 273511
loss: 0.9507201313972473,grad_norm: 0.7860484331742026, iteration: 273512
loss: 1.0201853513717651,grad_norm: 0.852878705254998, iteration: 273513
loss: 0.9884628653526306,grad_norm: 0.9059247771565656, iteration: 273514
loss: 1.0148825645446777,grad_norm: 0.8452614991810629, iteration: 273515
loss: 1.045958399772644,grad_norm: 0.8432908193844415, iteration: 273516
loss: 0.9971563816070557,grad_norm: 0.999999300563329, iteration: 273517
loss: 0.9810992479324341,grad_norm: 0.9999990864236941, iteration: 273518
loss: 1.0040833950042725,grad_norm: 0.9150986682486182, iteration: 273519
loss: 0.9619765281677246,grad_norm: 0.9999998407390078, iteration: 273520
loss: 0.984034538269043,grad_norm: 0.9261964318362249, iteration: 273521
loss: 1.0025736093521118,grad_norm: 0.7545401365426848, iteration: 273522
loss: 1.0524513721466064,grad_norm: 0.8364498124580787, iteration: 273523
loss: 1.0396428108215332,grad_norm: 0.9999998136367401, iteration: 273524
loss: 1.0028223991394043,grad_norm: 0.743936628178463, iteration: 273525
loss: 0.971310019493103,grad_norm: 0.9355772801942135, iteration: 273526
loss: 0.9842671751976013,grad_norm: 0.8207565723684936, iteration: 273527
loss: 1.1689120531082153,grad_norm: 0.9999990623698528, iteration: 273528
loss: 1.0908395051956177,grad_norm: 0.9999994856665598, iteration: 273529
loss: 0.9866204857826233,grad_norm: 0.8364711998889955, iteration: 273530
loss: 0.9850140810012817,grad_norm: 0.880602347396474, iteration: 273531
loss: 1.024381160736084,grad_norm: 0.6988131035843573, iteration: 273532
loss: 0.9944798946380615,grad_norm: 0.7857844112545506, iteration: 273533
loss: 0.9886188507080078,grad_norm: 0.7837767479984199, iteration: 273534
loss: 1.0120221376419067,grad_norm: 0.8936393791883542, iteration: 273535
loss: 0.9757193922996521,grad_norm: 0.9999990353518037, iteration: 273536
loss: 1.0126330852508545,grad_norm: 0.8203809975595944, iteration: 273537
loss: 1.0460247993469238,grad_norm: 0.9999995760899113, iteration: 273538
loss: 1.0345009565353394,grad_norm: 0.9103414020771642, iteration: 273539
loss: 0.9851382374763489,grad_norm: 0.912786446724736, iteration: 273540
loss: 0.995673418045044,grad_norm: 0.7946098968976353, iteration: 273541
loss: 0.982893705368042,grad_norm: 0.8932881822557653, iteration: 273542
loss: 0.9778602123260498,grad_norm: 0.9999992235809182, iteration: 273543
loss: 0.9874969720840454,grad_norm: 0.9199290762427308, iteration: 273544
loss: 0.9936584234237671,grad_norm: 0.7560091426896806, iteration: 273545
loss: 1.001867413520813,grad_norm: 0.8879069366524202, iteration: 273546
loss: 1.0270841121673584,grad_norm: 0.9108161370268764, iteration: 273547
loss: 1.000834584236145,grad_norm: 0.9303826334995473, iteration: 273548
loss: 1.0355308055877686,grad_norm: 0.9999993020595574, iteration: 273549
loss: 0.9586671590805054,grad_norm: 0.8116147102946428, iteration: 273550
loss: 1.008139729499817,grad_norm: 0.7959828534132214, iteration: 273551
loss: 0.9948341250419617,grad_norm: 0.9999991660058758, iteration: 273552
loss: 0.9754619598388672,grad_norm: 0.8853794092208165, iteration: 273553
loss: 1.0137219429016113,grad_norm: 0.8478868285475809, iteration: 273554
loss: 1.0156011581420898,grad_norm: 0.9999991819636258, iteration: 273555
loss: 0.9889561533927917,grad_norm: 0.7917644351295028, iteration: 273556
loss: 0.9817358255386353,grad_norm: 0.8041635102864285, iteration: 273557
loss: 1.0025333166122437,grad_norm: 0.940879586302633, iteration: 273558
loss: 1.0004494190216064,grad_norm: 0.7868954113554011, iteration: 273559
loss: 0.9930835962295532,grad_norm: 0.8364905985223983, iteration: 273560
loss: 1.0101263523101807,grad_norm: 0.9999990053244584, iteration: 273561
loss: 1.0350697040557861,grad_norm: 0.8709850179649797, iteration: 273562
loss: 1.02988862991333,grad_norm: 0.9999991002545853, iteration: 273563
loss: 0.9669613242149353,grad_norm: 0.8457317806062966, iteration: 273564
loss: 0.9620791077613831,grad_norm: 0.6834980623590464, iteration: 273565
loss: 1.0489615201950073,grad_norm: 0.757584971046937, iteration: 273566
loss: 1.0197575092315674,grad_norm: 0.9891705438259067, iteration: 273567
loss: 1.0294886827468872,grad_norm: 0.7749291553076257, iteration: 273568
loss: 1.0436729192733765,grad_norm: 0.9339899320057489, iteration: 273569
loss: 1.0141408443450928,grad_norm: 0.8884590602862434, iteration: 273570
loss: 1.005787968635559,grad_norm: 0.916634936859422, iteration: 273571
loss: 1.0153453350067139,grad_norm: 0.8475031691816818, iteration: 273572
loss: 0.9693989753723145,grad_norm: 0.893665687394625, iteration: 273573
loss: 0.9908565282821655,grad_norm: 0.9134959818850985, iteration: 273574
loss: 1.0001939535140991,grad_norm: 0.9999991601384806, iteration: 273575
loss: 0.9914731383323669,grad_norm: 0.9346265822577616, iteration: 273576
loss: 1.0167702436447144,grad_norm: 0.8401764212946634, iteration: 273577
loss: 1.0086756944656372,grad_norm: 0.8925513582324149, iteration: 273578
loss: 0.9775733351707458,grad_norm: 0.6896845628592099, iteration: 273579
loss: 0.9589957594871521,grad_norm: 0.8866144296040587, iteration: 273580
loss: 1.0320872068405151,grad_norm: 0.7752254547315905, iteration: 273581
loss: 0.9577898383140564,grad_norm: 0.9631506046732677, iteration: 273582
loss: 1.0019121170043945,grad_norm: 0.8682387151206286, iteration: 273583
loss: 1.0016052722930908,grad_norm: 0.9087496562638548, iteration: 273584
loss: 0.9764357805252075,grad_norm: 0.9316166903739, iteration: 273585
loss: 0.9834608435630798,grad_norm: 0.7908040109338269, iteration: 273586
loss: 1.0000602006912231,grad_norm: 0.8648474626360672, iteration: 273587
loss: 0.9581053256988525,grad_norm: 0.9514902765567178, iteration: 273588
loss: 1.0217219591140747,grad_norm: 0.7838722429839939, iteration: 273589
loss: 0.9801226258277893,grad_norm: 0.903188657529014, iteration: 273590
loss: 0.9563981890678406,grad_norm: 0.9566099252931076, iteration: 273591
loss: 1.0617378950119019,grad_norm: 0.6958307413149426, iteration: 273592
loss: 1.0204781293869019,grad_norm: 0.836268463512344, iteration: 273593
loss: 0.9711732268333435,grad_norm: 0.9022601178114715, iteration: 273594
loss: 0.9817075133323669,grad_norm: 0.8644157668032773, iteration: 273595
loss: 0.9970300793647766,grad_norm: 0.9999991835011127, iteration: 273596
loss: 1.0444194078445435,grad_norm: 0.8798382330012711, iteration: 273597
loss: 1.0029126405715942,grad_norm: 0.7923867662663455, iteration: 273598
loss: 0.9751708507537842,grad_norm: 0.9254070849072045, iteration: 273599
loss: 1.0583713054656982,grad_norm: 0.9178591326115308, iteration: 273600
loss: 1.0784276723861694,grad_norm: 0.9999994572812837, iteration: 273601
loss: 1.0086596012115479,grad_norm: 0.9999991849691536, iteration: 273602
loss: 0.9740062952041626,grad_norm: 0.9999991459239972, iteration: 273603
loss: 0.9969201683998108,grad_norm: 0.9474789136178471, iteration: 273604
loss: 1.005123257637024,grad_norm: 0.8801315243606439, iteration: 273605
loss: 1.0254621505737305,grad_norm: 0.8620446486318003, iteration: 273606
loss: 1.0167793035507202,grad_norm: 0.9207351996178977, iteration: 273607
loss: 1.0142290592193604,grad_norm: 0.9441751956443815, iteration: 273608
loss: 1.0072507858276367,grad_norm: 0.920892738759155, iteration: 273609
loss: 0.9959061145782471,grad_norm: 0.9999989846364833, iteration: 273610
loss: 1.010735034942627,grad_norm: 0.8832064216684535, iteration: 273611
loss: 1.0079491138458252,grad_norm: 0.7984424370389428, iteration: 273612
loss: 0.9529599547386169,grad_norm: 0.850315417984057, iteration: 273613
loss: 1.0003232955932617,grad_norm: 0.839890226123221, iteration: 273614
loss: 0.9873725175857544,grad_norm: 0.7982112078871116, iteration: 273615
loss: 0.9634435176849365,grad_norm: 0.8171260719046239, iteration: 273616
loss: 0.9577776193618774,grad_norm: 0.9999989602857726, iteration: 273617
loss: 0.989392876625061,grad_norm: 0.894871139490337, iteration: 273618
loss: 1.0049943923950195,grad_norm: 0.9677754359258955, iteration: 273619
loss: 0.9586796164512634,grad_norm: 0.8843148116028748, iteration: 273620
loss: 0.9947879910469055,grad_norm: 0.9792676279135869, iteration: 273621
loss: 1.0011473894119263,grad_norm: 0.8482865088166828, iteration: 273622
loss: 1.0082234144210815,grad_norm: 0.868778247073668, iteration: 273623
loss: 0.9857208728790283,grad_norm: 0.8384079989271609, iteration: 273624
loss: 0.9874763488769531,grad_norm: 0.999999146485877, iteration: 273625
loss: 1.004010796546936,grad_norm: 0.9297877018767039, iteration: 273626
loss: 1.06967031955719,grad_norm: 0.9999995802591501, iteration: 273627
loss: 0.9970694184303284,grad_norm: 0.999999895905082, iteration: 273628
loss: 1.006866216659546,grad_norm: 0.8271206628620011, iteration: 273629
loss: 0.992608368396759,grad_norm: 0.772458462844894, iteration: 273630
loss: 1.0650317668914795,grad_norm: 0.9999998047079198, iteration: 273631
loss: 0.9906054735183716,grad_norm: 0.8795335434126921, iteration: 273632
loss: 0.9724799394607544,grad_norm: 0.8178828049975067, iteration: 273633
loss: 0.9947285652160645,grad_norm: 0.9213780713672586, iteration: 273634
loss: 0.9650314450263977,grad_norm: 0.8711063906747343, iteration: 273635
loss: 1.007849097251892,grad_norm: 0.9574945930558517, iteration: 273636
loss: 1.0043268203735352,grad_norm: 0.786912257580347, iteration: 273637
loss: 1.0214070081710815,grad_norm: 0.8487027176336726, iteration: 273638
loss: 1.0080575942993164,grad_norm: 0.9014307621750314, iteration: 273639
loss: 0.9556999802589417,grad_norm: 0.8621038858636052, iteration: 273640
loss: 1.0223536491394043,grad_norm: 0.99999895676484, iteration: 273641
loss: 1.0086771249771118,grad_norm: 0.9999992184590617, iteration: 273642
loss: 1.0210542678833008,grad_norm: 0.9999990667747237, iteration: 273643
loss: 1.0400210618972778,grad_norm: 0.846454830359973, iteration: 273644
loss: 1.0294570922851562,grad_norm: 0.667479108388614, iteration: 273645
loss: 0.9770585298538208,grad_norm: 0.9999991537168784, iteration: 273646
loss: 1.0027638673782349,grad_norm: 0.9999992164711966, iteration: 273647
loss: 1.0149000883102417,grad_norm: 0.9999991383353504, iteration: 273648
loss: 0.9976616501808167,grad_norm: 0.7600129332304102, iteration: 273649
loss: 0.9786072373390198,grad_norm: 0.7835959495359135, iteration: 273650
loss: 1.0109796524047852,grad_norm: 0.999999888683752, iteration: 273651
loss: 1.0331701040267944,grad_norm: 0.8427116020951435, iteration: 273652
loss: 1.0011910200119019,grad_norm: 0.9546233910897921, iteration: 273653
loss: 1.0283123254776,grad_norm: 0.9331041565141456, iteration: 273654
loss: 1.026871919631958,grad_norm: 0.6853591827411425, iteration: 273655
loss: 0.9686030149459839,grad_norm: 0.8559150944410987, iteration: 273656
loss: 1.0125401020050049,grad_norm: 0.9999990869825459, iteration: 273657
loss: 0.9239286184310913,grad_norm: 0.8384391277289178, iteration: 273658
loss: 1.037764072418213,grad_norm: 0.9999990253244975, iteration: 273659
loss: 1.0183886289596558,grad_norm: 0.9999991632115646, iteration: 273660
loss: 1.0049678087234497,grad_norm: 0.9999990978307475, iteration: 273661
loss: 0.9828563928604126,grad_norm: 0.8105255089920663, iteration: 273662
loss: 0.9915714263916016,grad_norm: 0.9789462997029363, iteration: 273663
loss: 1.0002069473266602,grad_norm: 0.8405997483081206, iteration: 273664
loss: 0.9969207048416138,grad_norm: 0.8467203395622886, iteration: 273665
loss: 0.9999810457229614,grad_norm: 0.8487693524711742, iteration: 273666
loss: 1.0640268325805664,grad_norm: 0.8529721375267517, iteration: 273667
loss: 0.9829418063163757,grad_norm: 0.7899373429688432, iteration: 273668
loss: 1.0005745887756348,grad_norm: 0.8722701350487095, iteration: 273669
loss: 0.988989531993866,grad_norm: 0.8486099014552729, iteration: 273670
loss: 1.0087658166885376,grad_norm: 0.9999990992899529, iteration: 273671
loss: 1.0158498287200928,grad_norm: 0.9653035108691551, iteration: 273672
loss: 1.0091720819473267,grad_norm: 0.8051407646121439, iteration: 273673
loss: 1.0113240480422974,grad_norm: 0.9999990022934625, iteration: 273674
loss: 0.9977455735206604,grad_norm: 0.7839976833813832, iteration: 273675
loss: 0.9912830591201782,grad_norm: 0.8958854560625804, iteration: 273676
loss: 0.9932173490524292,grad_norm: 0.7911543368870311, iteration: 273677
loss: 0.9994001984596252,grad_norm: 0.897171043996843, iteration: 273678
loss: 1.040981411933899,grad_norm: 0.9499376680080863, iteration: 273679
loss: 1.0477558374404907,grad_norm: 0.9999997103106759, iteration: 273680
loss: 1.0066877603530884,grad_norm: 0.8684482629348554, iteration: 273681
loss: 0.984405517578125,grad_norm: 0.9999990410079422, iteration: 273682
loss: 1.0014663934707642,grad_norm: 0.864356620223288, iteration: 273683
loss: 0.9802579283714294,grad_norm: 0.8002309907361239, iteration: 273684
loss: 1.0414280891418457,grad_norm: 0.9999991185456086, iteration: 273685
loss: 1.0011028051376343,grad_norm: 0.9196034797799448, iteration: 273686
loss: 1.014358401298523,grad_norm: 0.9999992989580775, iteration: 273687
loss: 1.0587996244430542,grad_norm: 0.9999995945479638, iteration: 273688
loss: 0.9877541065216064,grad_norm: 0.9999992460231657, iteration: 273689
loss: 1.0368632078170776,grad_norm: 0.9326437122514091, iteration: 273690
loss: 0.980358898639679,grad_norm: 0.8329927910219852, iteration: 273691
loss: 1.0038795471191406,grad_norm: 0.8344219878564954, iteration: 273692
loss: 1.0336624383926392,grad_norm: 0.9999993314171496, iteration: 273693
loss: 0.9622756838798523,grad_norm: 0.9216714302531143, iteration: 273694
loss: 1.0158674716949463,grad_norm: 0.9999990007997777, iteration: 273695
loss: 1.0215890407562256,grad_norm: 0.9714048290129937, iteration: 273696
loss: 1.0229721069335938,grad_norm: 0.8911335215516527, iteration: 273697
loss: 0.9757546782493591,grad_norm: 0.8300268073293141, iteration: 273698
loss: 1.03276526927948,grad_norm: 0.8399896565988295, iteration: 273699
loss: 0.9955255389213562,grad_norm: 0.9999990416995622, iteration: 273700
loss: 0.9891089200973511,grad_norm: 0.8909009098180158, iteration: 273701
loss: 1.01797354221344,grad_norm: 0.8738771910481717, iteration: 273702
loss: 1.0104413032531738,grad_norm: 0.8369506540063492, iteration: 273703
loss: 0.9668335914611816,grad_norm: 0.9999990114167037, iteration: 273704
loss: 1.0420546531677246,grad_norm: 0.7997532620899415, iteration: 273705
loss: 1.0470117330551147,grad_norm: 0.9265570531492445, iteration: 273706
loss: 0.9933567643165588,grad_norm: 0.953607913317213, iteration: 273707
loss: 1.0103713274002075,grad_norm: 0.9999991390953042, iteration: 273708
loss: 1.0039387941360474,grad_norm: 0.8414404208120135, iteration: 273709
loss: 0.9827560186386108,grad_norm: 0.758058519160196, iteration: 273710
loss: 1.0046722888946533,grad_norm: 0.8688521137934532, iteration: 273711
loss: 1.0421730279922485,grad_norm: 0.9999990349921305, iteration: 273712
loss: 0.9906813502311707,grad_norm: 0.999999603960669, iteration: 273713
loss: 1.0132476091384888,grad_norm: 0.9999990787563074, iteration: 273714
loss: 1.021104335784912,grad_norm: 0.9999990490800119, iteration: 273715
loss: 0.9827681183815002,grad_norm: 0.999999587454347, iteration: 273716
loss: 0.9694054126739502,grad_norm: 0.9607771056900601, iteration: 273717
loss: 0.9676845073699951,grad_norm: 0.9999991023511671, iteration: 273718
loss: 0.9906955361366272,grad_norm: 0.9841249100858811, iteration: 273719
loss: 1.066940426826477,grad_norm: 0.9999991685595836, iteration: 273720
loss: 1.0105690956115723,grad_norm: 0.8657209378961795, iteration: 273721
loss: 0.9801328778266907,grad_norm: 0.7569353189522224, iteration: 273722
loss: 0.9950419664382935,grad_norm: 0.8650452939523009, iteration: 273723
loss: 0.9691402316093445,grad_norm: 0.9154743769344315, iteration: 273724
loss: 1.1042802333831787,grad_norm: 0.9999989986002237, iteration: 273725
loss: 1.0177892446517944,grad_norm: 0.7893380111324498, iteration: 273726
loss: 1.0088070631027222,grad_norm: 0.7898210361758162, iteration: 273727
loss: 0.9706576466560364,grad_norm: 0.8301493506055535, iteration: 273728
loss: 1.0226716995239258,grad_norm: 0.9226801353467012, iteration: 273729
loss: 0.9925780892372131,grad_norm: 0.9999991132310942, iteration: 273730
loss: 1.0114809274673462,grad_norm: 0.7953020054117205, iteration: 273731
loss: 0.9703659415245056,grad_norm: 0.977833155173763, iteration: 273732
loss: 0.9838820099830627,grad_norm: 0.9212106931673582, iteration: 273733
loss: 1.0182793140411377,grad_norm: 0.995120507075829, iteration: 273734
loss: 1.0092520713806152,grad_norm: 0.8736778654107599, iteration: 273735
loss: 1.0085927248001099,grad_norm: 0.9531138628249897, iteration: 273736
loss: 1.0248967409133911,grad_norm: 0.7499281000605375, iteration: 273737
loss: 1.0096975564956665,grad_norm: 0.7954541561477838, iteration: 273738
loss: 0.9898662567138672,grad_norm: 0.9999993553249322, iteration: 273739
loss: 0.971010684967041,grad_norm: 0.842848190882749, iteration: 273740
loss: 0.9875612854957581,grad_norm: 0.9259313966313613, iteration: 273741
loss: 1.0359506607055664,grad_norm: 0.7853827957517264, iteration: 273742
loss: 0.9927967190742493,grad_norm: 0.999999050086853, iteration: 273743
loss: 1.0474945306777954,grad_norm: 0.9999989858318388, iteration: 273744
loss: 0.9676094055175781,grad_norm: 0.873078926832766, iteration: 273745
loss: 1.014703392982483,grad_norm: 0.882296733362213, iteration: 273746
loss: 0.9609647989273071,grad_norm: 0.8146828343835713, iteration: 273747
loss: 0.9753520488739014,grad_norm: 0.8368614789234106, iteration: 273748
loss: 0.9999893307685852,grad_norm: 0.9348858658413436, iteration: 273749
loss: 1.0009346008300781,grad_norm: 0.8839073043785773, iteration: 273750
loss: 0.9947550892829895,grad_norm: 0.8912299727353011, iteration: 273751
loss: 0.9872173070907593,grad_norm: 0.9999990150642214, iteration: 273752
loss: 0.9894112348556519,grad_norm: 0.7920939675502906, iteration: 273753
loss: 1.0305845737457275,grad_norm: 0.8999113462630325, iteration: 273754
loss: 0.9891407489776611,grad_norm: 0.746610407365984, iteration: 273755
loss: 0.9572970271110535,grad_norm: 0.8714413865107006, iteration: 273756
loss: 1.0008442401885986,grad_norm: 0.9999991687085155, iteration: 273757
loss: 0.9634056091308594,grad_norm: 0.7675312281787829, iteration: 273758
loss: 1.044566035270691,grad_norm: 0.9999991028180283, iteration: 273759
loss: 1.0293728113174438,grad_norm: 0.9901097743124617, iteration: 273760
loss: 1.023519515991211,grad_norm: 0.773587450166662, iteration: 273761
loss: 1.0226502418518066,grad_norm: 0.8132359544052241, iteration: 273762
loss: 0.9994297623634338,grad_norm: 0.9926442303573207, iteration: 273763
loss: 1.0099308490753174,grad_norm: 0.7380765950616832, iteration: 273764
loss: 1.0360575914382935,grad_norm: 0.999999134130671, iteration: 273765
loss: 0.9650171399116516,grad_norm: 0.8228249032350887, iteration: 273766
loss: 0.9575710296630859,grad_norm: 0.9755071920771379, iteration: 273767
loss: 1.0107945203781128,grad_norm: 0.8798202076377268, iteration: 273768
loss: 1.0245589017868042,grad_norm: 0.9809814734419866, iteration: 273769
loss: 1.0388710498809814,grad_norm: 0.958942935085728, iteration: 273770
loss: 0.9873358607292175,grad_norm: 0.8423494274231513, iteration: 273771
loss: 0.9890859127044678,grad_norm: 0.8727648435963566, iteration: 273772
loss: 1.0587811470031738,grad_norm: 0.9999998071397005, iteration: 273773
loss: 1.0357962846755981,grad_norm: 0.9249873346086179, iteration: 273774
loss: 1.010555386543274,grad_norm: 0.9249420784742656, iteration: 273775
loss: 1.0262587070465088,grad_norm: 0.9368222953006817, iteration: 273776
loss: 1.0211371183395386,grad_norm: 0.9999991178476424, iteration: 273777
loss: 1.0077487230300903,grad_norm: 0.8872270181629989, iteration: 273778
loss: 1.029870867729187,grad_norm: 0.9999991134316422, iteration: 273779
loss: 1.0216885805130005,grad_norm: 0.8627331295637476, iteration: 273780
loss: 1.025239109992981,grad_norm: 0.8641390762559267, iteration: 273781
loss: 1.0127241611480713,grad_norm: 0.96503677058009, iteration: 273782
loss: 1.0034428834915161,grad_norm: 0.7100447369180118, iteration: 273783
loss: 1.018267273902893,grad_norm: 0.7649827852940023, iteration: 273784
loss: 0.9958704113960266,grad_norm: 0.7698846479584407, iteration: 273785
loss: 1.0129355192184448,grad_norm: 0.8775721937639787, iteration: 273786
loss: 1.0286641120910645,grad_norm: 0.9164543043951695, iteration: 273787
loss: 1.0269438028335571,grad_norm: 0.909602313206113, iteration: 273788
loss: 1.0230674743652344,grad_norm: 0.6905553642950077, iteration: 273789
loss: 1.010883092880249,grad_norm: 0.8761734015644651, iteration: 273790
loss: 0.9946151971817017,grad_norm: 0.8462167478875802, iteration: 273791
loss: 1.0049262046813965,grad_norm: 0.9438402190259755, iteration: 273792
loss: 1.0207749605178833,grad_norm: 0.8761046944140971, iteration: 273793
loss: 0.9716177582740784,grad_norm: 0.872330147040447, iteration: 273794
loss: 1.036379337310791,grad_norm: 0.9239772314221217, iteration: 273795
loss: 1.0104128122329712,grad_norm: 0.9949232091060577, iteration: 273796
loss: 0.9862639904022217,grad_norm: 0.9450139407490432, iteration: 273797
loss: 0.9785858988761902,grad_norm: 0.8811750293124455, iteration: 273798
loss: 0.9939903020858765,grad_norm: 0.81661482834445, iteration: 273799
loss: 1.013357162475586,grad_norm: 0.7832862512905403, iteration: 273800
loss: 0.9803224205970764,grad_norm: 0.9999990894830968, iteration: 273801
loss: 0.9669325947761536,grad_norm: 0.9052052335019592, iteration: 273802
loss: 0.9975314736366272,grad_norm: 0.9999990784618219, iteration: 273803
loss: 1.0048309564590454,grad_norm: 0.798529853419636, iteration: 273804
loss: 0.9815995097160339,grad_norm: 0.9999991994170406, iteration: 273805
loss: 1.0318217277526855,grad_norm: 0.8233965837005729, iteration: 273806
loss: 1.0202052593231201,grad_norm: 0.8110805113349507, iteration: 273807
loss: 0.989325225353241,grad_norm: 0.8485455577967723, iteration: 273808
loss: 1.0045597553253174,grad_norm: 0.9999989281292123, iteration: 273809
loss: 1.0338451862335205,grad_norm: 0.7915095049919989, iteration: 273810
loss: 1.021241307258606,grad_norm: 0.8399750275884718, iteration: 273811
loss: 0.9804104566574097,grad_norm: 0.9392939375964453, iteration: 273812
loss: 1.0463449954986572,grad_norm: 0.8271484076415108, iteration: 273813
loss: 1.0109586715698242,grad_norm: 0.97849331292479, iteration: 273814
loss: 0.9718590378761292,grad_norm: 0.9786022780983088, iteration: 273815
loss: 1.0047181844711304,grad_norm: 0.7805862178151673, iteration: 273816
loss: 0.9533341526985168,grad_norm: 0.8062468068881513, iteration: 273817
loss: 1.0212334394454956,grad_norm: 0.9999990689288688, iteration: 273818
loss: 1.022483229637146,grad_norm: 0.8464020524176546, iteration: 273819
loss: 1.0171526670455933,grad_norm: 0.9918576728669188, iteration: 273820
loss: 0.9991615414619446,grad_norm: 0.8524630999917463, iteration: 273821
loss: 0.9553691744804382,grad_norm: 0.9045688871263884, iteration: 273822
loss: 1.0236272811889648,grad_norm: 0.8551430512021261, iteration: 273823
loss: 1.0143336057662964,grad_norm: 0.9512232845946955, iteration: 273824
loss: 1.0183652639389038,grad_norm: 0.8445696717766731, iteration: 273825
loss: 1.014461874961853,grad_norm: 0.9670539416436862, iteration: 273826
loss: 0.9770480990409851,grad_norm: 0.8814447383727142, iteration: 273827
loss: 0.9985384345054626,grad_norm: 0.995177604240039, iteration: 273828
loss: 0.9917598962783813,grad_norm: 0.9824918272614319, iteration: 273829
loss: 0.9759174585342407,grad_norm: 0.7693175010812455, iteration: 273830
loss: 1.020811676979065,grad_norm: 0.8383070023513466, iteration: 273831
loss: 0.9869279265403748,grad_norm: 0.9394056529026803, iteration: 273832
loss: 0.9710925221443176,grad_norm: 0.9956454704015093, iteration: 273833
loss: 0.9804810285568237,grad_norm: 0.7995230063432525, iteration: 273834
loss: 0.9867496490478516,grad_norm: 0.8270338789007247, iteration: 273835
loss: 0.9787207245826721,grad_norm: 0.8727966934104056, iteration: 273836
loss: 1.0045002698898315,grad_norm: 0.8437566747078299, iteration: 273837
loss: 0.994659423828125,grad_norm: 0.9965427785659926, iteration: 273838
loss: 0.9875211119651794,grad_norm: 0.9999990283153035, iteration: 273839
loss: 0.976439893245697,grad_norm: 0.9245448320349923, iteration: 273840
loss: 1.035261631011963,grad_norm: 0.9797184547713075, iteration: 273841
loss: 1.0078504085540771,grad_norm: 0.8149685403924883, iteration: 273842
loss: 1.0392118692398071,grad_norm: 0.8242030364885539, iteration: 273843
loss: 1.0116913318634033,grad_norm: 0.7395082805739377, iteration: 273844
loss: 1.00757896900177,grad_norm: 0.7469137817909786, iteration: 273845
loss: 1.020150899887085,grad_norm: 0.9999998761808777, iteration: 273846
loss: 0.9948353171348572,grad_norm: 0.8261400649249574, iteration: 273847
loss: 1.023950219154358,grad_norm: 0.9999993147339962, iteration: 273848
loss: 1.0060006380081177,grad_norm: 0.8307826683499119, iteration: 273849
loss: 0.9773488640785217,grad_norm: 0.7473829067830637, iteration: 273850
loss: 1.00347101688385,grad_norm: 0.8987065274404028, iteration: 273851
loss: 0.9845970869064331,grad_norm: 0.8933140509126234, iteration: 273852
loss: 1.014139175415039,grad_norm: 0.8854645111968134, iteration: 273853
loss: 1.044632911682129,grad_norm: 0.8300591238778912, iteration: 273854
loss: 0.9974785447120667,grad_norm: 0.8626455027792227, iteration: 273855
loss: 1.0053887367248535,grad_norm: 0.8979739125396489, iteration: 273856
loss: 0.9799578785896301,grad_norm: 0.9555393521540906, iteration: 273857
loss: 1.0316342115402222,grad_norm: 0.9315128042810411, iteration: 273858
loss: 1.0003266334533691,grad_norm: 0.978557353053859, iteration: 273859
loss: 0.9848986268043518,grad_norm: 0.7892435027681399, iteration: 273860
loss: 1.0158374309539795,grad_norm: 0.8355168125958105, iteration: 273861
loss: 0.9806007742881775,grad_norm: 0.9322706286191301, iteration: 273862
loss: 0.9925042390823364,grad_norm: 0.914224593522002, iteration: 273863
loss: 1.0244718790054321,grad_norm: 0.7920890672118899, iteration: 273864
loss: 0.9692022204399109,grad_norm: 0.7972615842250935, iteration: 273865
loss: 1.0311318635940552,grad_norm: 0.8402534935278312, iteration: 273866
loss: 1.0391755104064941,grad_norm: 0.9765501034376688, iteration: 273867
loss: 1.005738377571106,grad_norm: 0.9271646502188025, iteration: 273868
loss: 0.9799384474754333,grad_norm: 0.7106630038010993, iteration: 273869
loss: 1.0427618026733398,grad_norm: 0.9846946780466556, iteration: 273870
loss: 0.9584290385246277,grad_norm: 0.8976485526111229, iteration: 273871
loss: 1.0257502794265747,grad_norm: 0.9185514233223994, iteration: 273872
loss: 0.9535637497901917,grad_norm: 0.8622794080900857, iteration: 273873
loss: 1.019642949104309,grad_norm: 0.8904605729665998, iteration: 273874
loss: 1.0056562423706055,grad_norm: 0.7825254658743998, iteration: 273875
loss: 1.0433744192123413,grad_norm: 0.9999991711395837, iteration: 273876
loss: 0.9950413107872009,grad_norm: 0.946332724727231, iteration: 273877
loss: 1.0172175168991089,grad_norm: 0.8453184731474733, iteration: 273878
loss: 0.9861973524093628,grad_norm: 0.745404391991329, iteration: 273879
loss: 1.0240317583084106,grad_norm: 0.9285380043585392, iteration: 273880
loss: 1.076749324798584,grad_norm: 0.9999991907871992, iteration: 273881
loss: 0.9930996298789978,grad_norm: 0.7761839762976833, iteration: 273882
loss: 0.9916577339172363,grad_norm: 0.7858859222006924, iteration: 273883
loss: 1.0074323415756226,grad_norm: 0.999999142467325, iteration: 273884
loss: 1.058514952659607,grad_norm: 0.9999998607348922, iteration: 273885
loss: 0.9772569537162781,grad_norm: 0.9700944982415819, iteration: 273886
loss: 1.0043562650680542,grad_norm: 0.8682340005136095, iteration: 273887
loss: 1.1160087585449219,grad_norm: 0.9999996243257525, iteration: 273888
loss: 0.9538770914077759,grad_norm: 0.8169555117300522, iteration: 273889
loss: 0.9941796660423279,grad_norm: 0.8426557019879845, iteration: 273890
loss: 1.1038103103637695,grad_norm: 0.999999845033615, iteration: 273891
loss: 0.9936351180076599,grad_norm: 0.9999990585437717, iteration: 273892
loss: 1.0232592821121216,grad_norm: 0.999998968603757, iteration: 273893
loss: 0.9770326018333435,grad_norm: 0.936298291879202, iteration: 273894
loss: 0.9897518754005432,grad_norm: 0.9999990229781625, iteration: 273895
loss: 1.0244919061660767,grad_norm: 0.999999790887619, iteration: 273896
loss: 0.9846777319908142,grad_norm: 0.8152396832283688, iteration: 273897
loss: 0.9599936604499817,grad_norm: 0.7909770320948663, iteration: 273898
loss: 1.0000556707382202,grad_norm: 0.9198697430073913, iteration: 273899
loss: 1.0003515481948853,grad_norm: 0.944166981268377, iteration: 273900
loss: 0.9967397451400757,grad_norm: 0.9461812078165921, iteration: 273901
loss: 0.9677712917327881,grad_norm: 0.7950703431807182, iteration: 273902
loss: 1.0046489238739014,grad_norm: 0.7175267301047338, iteration: 273903
loss: 1.0195485353469849,grad_norm: 0.8843862393653841, iteration: 273904
loss: 1.0071951150894165,grad_norm: 0.8097768048165851, iteration: 273905
loss: 1.0070973634719849,grad_norm: 0.8067714872582811, iteration: 273906
loss: 0.9615387916564941,grad_norm: 0.8346651689454214, iteration: 273907
loss: 1.0088856220245361,grad_norm: 0.8871216400062903, iteration: 273908
loss: 0.9518505334854126,grad_norm: 0.8521446005540858, iteration: 273909
loss: 1.0715113878250122,grad_norm: 0.9999995633166228, iteration: 273910
loss: 1.0171418190002441,grad_norm: 0.7671193517268331, iteration: 273911
loss: 0.9749445915222168,grad_norm: 0.9835213913739906, iteration: 273912
loss: 1.0123599767684937,grad_norm: 0.96479917253084, iteration: 273913
loss: 1.0167839527130127,grad_norm: 0.9999989973437576, iteration: 273914
loss: 1.0624748468399048,grad_norm: 0.8643778203009999, iteration: 273915
loss: 1.031031847000122,grad_norm: 0.8172816039749516, iteration: 273916
loss: 0.9773601293563843,grad_norm: 0.9999991233461205, iteration: 273917
loss: 0.9946812391281128,grad_norm: 0.9000444452781472, iteration: 273918
loss: 0.9927986860275269,grad_norm: 0.7149042809576137, iteration: 273919
loss: 0.9868861436843872,grad_norm: 0.9531657662138425, iteration: 273920
loss: 0.9873066544532776,grad_norm: 0.8838996974448706, iteration: 273921
loss: 1.0015652179718018,grad_norm: 0.9234976364372519, iteration: 273922
loss: 0.9825810790061951,grad_norm: 0.9611395330078842, iteration: 273923
loss: 0.9988231062889099,grad_norm: 0.7044121738149264, iteration: 273924
loss: 1.0048418045043945,grad_norm: 0.9554450104981658, iteration: 273925
loss: 0.980617344379425,grad_norm: 0.9556277152064969, iteration: 273926
loss: 0.9997476935386658,grad_norm: 0.9999990422893479, iteration: 273927
loss: 1.0152240991592407,grad_norm: 0.9999988960560295, iteration: 273928
loss: 1.0244864225387573,grad_norm: 0.9999990823579056, iteration: 273929
loss: 0.98589026927948,grad_norm: 0.9172527282776167, iteration: 273930
loss: 1.0099769830703735,grad_norm: 0.9082864770214171, iteration: 273931
loss: 1.0180795192718506,grad_norm: 0.9637818271146182, iteration: 273932
loss: 0.9901646971702576,grad_norm: 0.7743154579141119, iteration: 273933
loss: 1.0055797100067139,grad_norm: 0.6988673061935547, iteration: 273934
loss: 1.040083885192871,grad_norm: 0.8598707653165935, iteration: 273935
loss: 0.9800544381141663,grad_norm: 0.9297590401101761, iteration: 273936
loss: 0.9906570315361023,grad_norm: 0.778969271773535, iteration: 273937
loss: 1.0125665664672852,grad_norm: 0.8702391247528057, iteration: 273938
loss: 0.9592204689979553,grad_norm: 0.8466373563369004, iteration: 273939
loss: 0.9607403874397278,grad_norm: 0.7921410851821483, iteration: 273940
loss: 1.008988380432129,grad_norm: 0.7991533860191851, iteration: 273941
loss: 0.9796321392059326,grad_norm: 0.7592645605577502, iteration: 273942
loss: 0.9626677632331848,grad_norm: 0.9999990779043976, iteration: 273943
loss: 1.0177723169326782,grad_norm: 0.9999991874808588, iteration: 273944
loss: 0.969870924949646,grad_norm: 0.7440320862181352, iteration: 273945
loss: 0.9567949175834656,grad_norm: 0.8791002638096013, iteration: 273946
loss: 0.9972454905509949,grad_norm: 0.7331476336961041, iteration: 273947
loss: 1.0123153924942017,grad_norm: 0.9227783928507436, iteration: 273948
loss: 0.994238555431366,grad_norm: 0.7877444604399999, iteration: 273949
loss: 0.9870319962501526,grad_norm: 0.8058884324078656, iteration: 273950
loss: 0.9874710440635681,grad_norm: 0.9904733127020873, iteration: 273951
loss: 0.9925599694252014,grad_norm: 0.9083599588234728, iteration: 273952
loss: 1.0277639627456665,grad_norm: 0.9187051286575214, iteration: 273953
loss: 0.9743993282318115,grad_norm: 0.8734044633276586, iteration: 273954
loss: 1.0110467672348022,grad_norm: 0.9584470442300134, iteration: 273955
loss: 1.016649603843689,grad_norm: 0.9999991212510492, iteration: 273956
loss: 1.008330225944519,grad_norm: 0.9999995357803932, iteration: 273957
loss: 1.0397990942001343,grad_norm: 0.9999995047412619, iteration: 273958
loss: 0.9856230020523071,grad_norm: 0.977441472080728, iteration: 273959
loss: 0.9787617921829224,grad_norm: 0.9327659013804092, iteration: 273960
loss: 0.9868335127830505,grad_norm: 0.921091331971205, iteration: 273961
loss: 1.0339882373809814,grad_norm: 0.9326464747052094, iteration: 273962
loss: 0.993526816368103,grad_norm: 0.9365735833817017, iteration: 273963
loss: 0.9995177388191223,grad_norm: 0.7579662242375161, iteration: 273964
loss: 1.0523326396942139,grad_norm: 0.8412156260083219, iteration: 273965
loss: 0.9997541904449463,grad_norm: 0.8853695793385135, iteration: 273966
loss: 1.026389241218567,grad_norm: 0.9267958904366175, iteration: 273967
loss: 1.0321401357650757,grad_norm: 0.9999997022828105, iteration: 273968
loss: 0.9493442177772522,grad_norm: 0.8415664693898649, iteration: 273969
loss: 0.9359436631202698,grad_norm: 0.8528997047045563, iteration: 273970
loss: 0.9753426909446716,grad_norm: 0.8087949891201578, iteration: 273971
loss: 0.9696664810180664,grad_norm: 0.9614776673603754, iteration: 273972
loss: 1.012705683708191,grad_norm: 0.932656402601774, iteration: 273973
loss: 0.9993530511856079,grad_norm: 0.9436238471574535, iteration: 273974
loss: 1.017356038093567,grad_norm: 0.9725955575676533, iteration: 273975
loss: 1.1098198890686035,grad_norm: 0.9999998003919807, iteration: 273976
loss: 0.9981630444526672,grad_norm: 0.8285383381043225, iteration: 273977
loss: 0.9996658563613892,grad_norm: 0.9965214412446071, iteration: 273978
loss: 0.9760212302207947,grad_norm: 0.9666925089680214, iteration: 273979
loss: 1.0157909393310547,grad_norm: 0.9177571450035951, iteration: 273980
loss: 0.9805887341499329,grad_norm: 0.7840674902923088, iteration: 273981
loss: 0.9817886352539062,grad_norm: 0.9228863161791596, iteration: 273982
loss: 0.9661494493484497,grad_norm: 0.8711901888606122, iteration: 273983
loss: 0.9993683099746704,grad_norm: 0.813366166145674, iteration: 273984
loss: 0.9834893345832825,grad_norm: 0.9999990143881007, iteration: 273985
loss: 1.0040141344070435,grad_norm: 0.9999991472678224, iteration: 273986
loss: 1.0318714380264282,grad_norm: 0.9384784874249706, iteration: 273987
loss: 1.0788187980651855,grad_norm: 0.9063597279898479, iteration: 273988
loss: 1.0166983604431152,grad_norm: 0.8337542208753309, iteration: 273989
loss: 1.055412769317627,grad_norm: 0.9931663110408082, iteration: 273990
loss: 1.0094581842422485,grad_norm: 0.9010134523102654, iteration: 273991
loss: 0.9933479428291321,grad_norm: 0.9999990125673307, iteration: 273992
loss: 1.0281615257263184,grad_norm: 0.9673251091706989, iteration: 273993
loss: 0.9967004656791687,grad_norm: 0.9999990664463747, iteration: 273994
loss: 1.021956443786621,grad_norm: 0.635692463041205, iteration: 273995
loss: 0.9983311295509338,grad_norm: 0.7957991322291224, iteration: 273996
loss: 1.003169059753418,grad_norm: 0.8126777274446528, iteration: 273997
loss: 0.9959917068481445,grad_norm: 0.8074496210716725, iteration: 273998
loss: 0.9716964960098267,grad_norm: 0.911598129928314, iteration: 273999
loss: 1.0337005853652954,grad_norm: 0.999999640171095, iteration: 274000
loss: 1.0208048820495605,grad_norm: 0.8318224549322077, iteration: 274001
loss: 1.0186513662338257,grad_norm: 0.86175624744408, iteration: 274002
loss: 1.0014904737472534,grad_norm: 0.848380498721854, iteration: 274003
loss: 1.0192984342575073,grad_norm: 0.8462053219117652, iteration: 274004
loss: 1.0182642936706543,grad_norm: 0.9408323263427769, iteration: 274005
loss: 1.0339359045028687,grad_norm: 0.9193416404329476, iteration: 274006
loss: 0.984334409236908,grad_norm: 0.9019759311730158, iteration: 274007
loss: 0.9651021957397461,grad_norm: 0.783291933858055, iteration: 274008
loss: 0.9911766648292542,grad_norm: 0.8026673380618741, iteration: 274009
loss: 1.0079888105392456,grad_norm: 0.868685036614307, iteration: 274010
loss: 0.9843732118606567,grad_norm: 0.7656724362289885, iteration: 274011
loss: 1.0073943138122559,grad_norm: 0.9999991011228859, iteration: 274012
loss: 1.1126271486282349,grad_norm: 0.9287884580382165, iteration: 274013
loss: 1.0093170404434204,grad_norm: 0.7337078361409508, iteration: 274014
loss: 0.9763809442520142,grad_norm: 0.9652607963435774, iteration: 274015
loss: 0.9960097670555115,grad_norm: 0.8422798311525801, iteration: 274016
loss: 0.9898736476898193,grad_norm: 0.8101061388026003, iteration: 274017
loss: 0.9916278123855591,grad_norm: 0.69498223456968, iteration: 274018
loss: 1.003065586090088,grad_norm: 0.8985895549267144, iteration: 274019
loss: 1.0093204975128174,grad_norm: 0.9153028518546402, iteration: 274020
loss: 0.9945294260978699,grad_norm: 0.9999990964850349, iteration: 274021
loss: 0.9936058521270752,grad_norm: 0.8216250100376506, iteration: 274022
loss: 1.0324687957763672,grad_norm: 0.9154608476498404, iteration: 274023
loss: 0.995954155921936,grad_norm: 0.8960622189702963, iteration: 274024
loss: 0.9768256545066833,grad_norm: 0.8979122109403399, iteration: 274025
loss: 1.0026885271072388,grad_norm: 0.7511262240005256, iteration: 274026
loss: 0.9958549737930298,grad_norm: 0.8526518952131208, iteration: 274027
loss: 0.9834429621696472,grad_norm: 0.80464878833902, iteration: 274028
loss: 1.0152537822723389,grad_norm: 0.8167093372769919, iteration: 274029
loss: 0.9934913516044617,grad_norm: 0.9999990433815936, iteration: 274030
loss: 0.9851611256599426,grad_norm: 0.8405096183676749, iteration: 274031
loss: 1.0051766633987427,grad_norm: 0.7132092749200009, iteration: 274032
loss: 1.0488991737365723,grad_norm: 0.9999996947370831, iteration: 274033
loss: 1.0076195001602173,grad_norm: 0.8966561624022314, iteration: 274034
loss: 0.9454255104064941,grad_norm: 0.9671358373999192, iteration: 274035
loss: 0.9670606851577759,grad_norm: 0.8437424533112796, iteration: 274036
loss: 0.9994245767593384,grad_norm: 0.9949398933768608, iteration: 274037
loss: 0.9574097394943237,grad_norm: 0.7279667562492992, iteration: 274038
loss: 0.9687969088554382,grad_norm: 0.794930776272214, iteration: 274039
loss: 1.0304712057113647,grad_norm: 0.8523866725736079, iteration: 274040
loss: 0.9890050292015076,grad_norm: 0.8500833194426729, iteration: 274041
loss: 0.9726139903068542,grad_norm: 0.8230323941224349, iteration: 274042
loss: 0.9839235544204712,grad_norm: 0.9999992147175253, iteration: 274043
loss: 0.9880350232124329,grad_norm: 0.999999012191232, iteration: 274044
loss: 1.0005230903625488,grad_norm: 0.9880680702125939, iteration: 274045
loss: 1.0184085369110107,grad_norm: 0.8525588518972991, iteration: 274046
loss: 0.9932174682617188,grad_norm: 0.9999998188119725, iteration: 274047
loss: 1.0015132427215576,grad_norm: 0.8120344022236028, iteration: 274048
loss: 0.9786932468414307,grad_norm: 0.8337919480198654, iteration: 274049
loss: 0.9787958264350891,grad_norm: 0.9285120710096905, iteration: 274050
loss: 1.026082992553711,grad_norm: 0.8642218268787554, iteration: 274051
loss: 0.9941703081130981,grad_norm: 0.922291193851329, iteration: 274052
loss: 0.9868351817131042,grad_norm: 0.9999997797391017, iteration: 274053
loss: 0.9839692711830139,grad_norm: 0.861774791750939, iteration: 274054
loss: 0.9931317567825317,grad_norm: 0.7914171325409595, iteration: 274055
loss: 0.9826067090034485,grad_norm: 0.7806727129663927, iteration: 274056
loss: 0.9898506999015808,grad_norm: 0.7464958869329557, iteration: 274057
loss: 1.0034810304641724,grad_norm: 0.8972386747419228, iteration: 274058
loss: 0.9558025598526001,grad_norm: 0.8175578530112542, iteration: 274059
loss: 1.0016272068023682,grad_norm: 0.7130583838027287, iteration: 274060
loss: 0.9889063835144043,grad_norm: 0.9999991326253082, iteration: 274061
loss: 1.0069998502731323,grad_norm: 0.7727624249683902, iteration: 274062
loss: 1.011751651763916,grad_norm: 0.8718037987335391, iteration: 274063
loss: 1.044201374053955,grad_norm: 0.999999414280531, iteration: 274064
loss: 0.993287980556488,grad_norm: 0.8381220436905584, iteration: 274065
loss: 1.0283839702606201,grad_norm: 0.8224276790667513, iteration: 274066
loss: 0.9927726984024048,grad_norm: 0.8080893701416396, iteration: 274067
loss: 1.0099079608917236,grad_norm: 0.9427285664019334, iteration: 274068
loss: 1.0821720361709595,grad_norm: 0.8524526365461041, iteration: 274069
loss: 0.9601539373397827,grad_norm: 0.743336315300632, iteration: 274070
loss: 0.9992865920066833,grad_norm: 0.7144361815481712, iteration: 274071
loss: 0.9907782673835754,grad_norm: 0.8903909622682402, iteration: 274072
loss: 1.0208666324615479,grad_norm: 0.8085856467576975, iteration: 274073
loss: 1.024131417274475,grad_norm: 0.9999997897972785, iteration: 274074
loss: 0.9610132575035095,grad_norm: 0.8554139434522103, iteration: 274075
loss: 0.9825072288513184,grad_norm: 0.8177827507533137, iteration: 274076
loss: 0.981552243232727,grad_norm: 0.8741333228400427, iteration: 274077
loss: 1.0250588655471802,grad_norm: 0.8306969043296483, iteration: 274078
loss: 1.0296897888183594,grad_norm: 0.8718521759243587, iteration: 274079
loss: 1.0187963247299194,grad_norm: 0.9096965529673291, iteration: 274080
loss: 1.0222244262695312,grad_norm: 0.8320114377866271, iteration: 274081
loss: 0.9830922484397888,grad_norm: 0.950945293150643, iteration: 274082
loss: 1.0097402334213257,grad_norm: 0.8480688439747395, iteration: 274083
loss: 1.004235863685608,grad_norm: 0.851013363102783, iteration: 274084
loss: 0.9924762845039368,grad_norm: 0.7948675874381259, iteration: 274085
loss: 1.0456366539001465,grad_norm: 0.9999994529120818, iteration: 274086
loss: 1.013272762298584,grad_norm: 0.9179842903706306, iteration: 274087
loss: 0.9659332633018494,grad_norm: 0.8285100541869685, iteration: 274088
loss: 1.0390149354934692,grad_norm: 0.9225399163486935, iteration: 274089
loss: 0.9755761027336121,grad_norm: 0.8387918776805179, iteration: 274090
loss: 1.0102572441101074,grad_norm: 0.9999997743955237, iteration: 274091
loss: 0.9847758412361145,grad_norm: 0.8719592168725765, iteration: 274092
loss: 0.998214066028595,grad_norm: 0.8316089896942567, iteration: 274093
loss: 1.0217344760894775,grad_norm: 0.9999995730616976, iteration: 274094
loss: 0.9919601082801819,grad_norm: 0.8416389195165505, iteration: 274095
loss: 0.9888072609901428,grad_norm: 0.9487282047892142, iteration: 274096
loss: 0.9981831908226013,grad_norm: 0.8420156268046495, iteration: 274097
loss: 1.0190995931625366,grad_norm: 0.9399957064992928, iteration: 274098
loss: 1.0540379285812378,grad_norm: 0.8392995437399551, iteration: 274099
loss: 1.0078798532485962,grad_norm: 0.9999989907880763, iteration: 274100
loss: 0.9924205541610718,grad_norm: 0.8620443675498556, iteration: 274101
loss: 0.9587807059288025,grad_norm: 0.9465610747408335, iteration: 274102
loss: 1.00428307056427,grad_norm: 0.9999998321423316, iteration: 274103
loss: 1.0295171737670898,grad_norm: 0.9999993370985357, iteration: 274104
loss: 1.00213623046875,grad_norm: 0.8846348310397726, iteration: 274105
loss: 1.0232950448989868,grad_norm: 0.9499160973424615, iteration: 274106
loss: 0.9957079887390137,grad_norm: 0.8169599566994782, iteration: 274107
loss: 1.0012263059616089,grad_norm: 0.9999993504427644, iteration: 274108
loss: 1.026613473892212,grad_norm: 0.9537814839738792, iteration: 274109
loss: 0.994901180267334,grad_norm: 0.9384101188396562, iteration: 274110
loss: 1.0217503309249878,grad_norm: 0.8574342437198779, iteration: 274111
loss: 1.0045230388641357,grad_norm: 0.7904608211215955, iteration: 274112
loss: 0.9892794489860535,grad_norm: 0.7556743338747969, iteration: 274113
loss: 0.9909281730651855,grad_norm: 0.9999990256829744, iteration: 274114
loss: 1.0143567323684692,grad_norm: 0.8173147221196978, iteration: 274115
loss: 1.006433129310608,grad_norm: 0.9375225751669544, iteration: 274116
loss: 0.9907480478286743,grad_norm: 0.8919982301093773, iteration: 274117
loss: 0.9691129326820374,grad_norm: 0.9412574736878483, iteration: 274118
loss: 1.03557288646698,grad_norm: 0.9750910380172196, iteration: 274119
loss: 1.0264883041381836,grad_norm: 0.8628859950655489, iteration: 274120
loss: 1.0284194946289062,grad_norm: 0.7013551610689828, iteration: 274121
loss: 0.9835778474807739,grad_norm: 0.9080648511210457, iteration: 274122
loss: 1.0030711889266968,grad_norm: 0.8705047423469825, iteration: 274123
loss: 1.0325740575790405,grad_norm: 0.9933098788768469, iteration: 274124
loss: 1.0819900035858154,grad_norm: 0.8182875848901879, iteration: 274125
loss: 0.9880549907684326,grad_norm: 0.935382574576257, iteration: 274126
loss: 1.0454236268997192,grad_norm: 0.873985558483106, iteration: 274127
loss: 1.054276466369629,grad_norm: 0.6865257682438092, iteration: 274128
loss: 1.014244556427002,grad_norm: 0.8700214289251471, iteration: 274129
loss: 0.9584481716156006,grad_norm: 0.9999991113105097, iteration: 274130
loss: 0.9938958287239075,grad_norm: 0.9476840341271258, iteration: 274131
loss: 1.0204459428787231,grad_norm: 0.9947798012290394, iteration: 274132
loss: 0.991648256778717,grad_norm: 0.9799573022608536, iteration: 274133
loss: 0.99514240026474,grad_norm: 0.8175432826806699, iteration: 274134
loss: 0.9727322459220886,grad_norm: 0.8783494809625021, iteration: 274135
loss: 1.0034270286560059,grad_norm: 0.8751846963440049, iteration: 274136
loss: 0.9804657697677612,grad_norm: 0.7456807840802645, iteration: 274137
loss: 1.0056605339050293,grad_norm: 0.9999989552201669, iteration: 274138
loss: 0.9398255944252014,grad_norm: 0.8355061873138159, iteration: 274139
loss: 0.9724113345146179,grad_norm: 0.8395142726391299, iteration: 274140
loss: 1.020265817642212,grad_norm: 0.8202453601683843, iteration: 274141
loss: 1.0220038890838623,grad_norm: 0.7426020944317664, iteration: 274142
loss: 1.0180107355117798,grad_norm: 0.9999994265280145, iteration: 274143
loss: 0.9921051859855652,grad_norm: 0.9216782955085169, iteration: 274144
loss: 1.0271977186203003,grad_norm: 0.9999990595697605, iteration: 274145
loss: 1.0078603029251099,grad_norm: 0.9109508996273078, iteration: 274146
loss: 0.9975131750106812,grad_norm: 0.9764551506562227, iteration: 274147
loss: 1.0008587837219238,grad_norm: 0.780225012621228, iteration: 274148
loss: 1.029443621635437,grad_norm: 0.7779108059647953, iteration: 274149
loss: 0.9740485548973083,grad_norm: 0.9999990413564636, iteration: 274150
loss: 0.990608811378479,grad_norm: 0.7710445019656712, iteration: 274151
loss: 1.0643175840377808,grad_norm: 0.9999998128964918, iteration: 274152
loss: 1.0424607992172241,grad_norm: 0.9999991118531248, iteration: 274153
loss: 0.9724472761154175,grad_norm: 0.9738593781507525, iteration: 274154
loss: 1.0035574436187744,grad_norm: 0.8806979656998627, iteration: 274155
loss: 1.0255482196807861,grad_norm: 0.8958791569058003, iteration: 274156
loss: 0.9952120184898376,grad_norm: 0.9084340659076727, iteration: 274157
loss: 0.9837862849235535,grad_norm: 0.9413160174243775, iteration: 274158
loss: 0.985118567943573,grad_norm: 0.9092652140960422, iteration: 274159
loss: 0.9959428310394287,grad_norm: 0.8952617574329405, iteration: 274160
loss: 0.9942529797554016,grad_norm: 0.7760731835519412, iteration: 274161
loss: 0.9656655788421631,grad_norm: 0.941222819566256, iteration: 274162
loss: 0.9989556074142456,grad_norm: 0.84947000121295, iteration: 274163
loss: 0.9788275361061096,grad_norm: 0.9286336468294065, iteration: 274164
loss: 1.0080938339233398,grad_norm: 0.8915697108772893, iteration: 274165
loss: 1.023728609085083,grad_norm: 0.8478275542829854, iteration: 274166
loss: 1.0019320249557495,grad_norm: 0.9952828410121223, iteration: 274167
loss: 0.986528754234314,grad_norm: 0.8710046246442691, iteration: 274168
loss: 0.9894896149635315,grad_norm: 0.8081056559911072, iteration: 274169
loss: 1.1084246635437012,grad_norm: 0.9999999085324688, iteration: 274170
loss: 1.059599757194519,grad_norm: 0.9268492020238587, iteration: 274171
loss: 0.9768916964530945,grad_norm: 0.8119095405190496, iteration: 274172
loss: 1.0154087543487549,grad_norm: 0.885593093077903, iteration: 274173
loss: 0.9597141146659851,grad_norm: 0.7875548792333216, iteration: 274174
loss: 1.0070520639419556,grad_norm: 0.951810178999703, iteration: 274175
loss: 0.9784098267555237,grad_norm: 0.9469583797209995, iteration: 274176
loss: 0.9777935147285461,grad_norm: 0.9866724938266046, iteration: 274177
loss: 0.9910234808921814,grad_norm: 0.8508071528065047, iteration: 274178
loss: 0.9851385951042175,grad_norm: 0.9999992032971095, iteration: 274179
loss: 1.0069161653518677,grad_norm: 0.9999990043528937, iteration: 274180
loss: 0.9873952865600586,grad_norm: 0.7193310761099696, iteration: 274181
loss: 0.9785967469215393,grad_norm: 0.9999995855726006, iteration: 274182
loss: 0.9997243285179138,grad_norm: 0.8595088610090783, iteration: 274183
loss: 0.9899282455444336,grad_norm: 0.9894694234431657, iteration: 274184
loss: 1.0340479612350464,grad_norm: 0.9999991366643501, iteration: 274185
loss: 1.0300196409225464,grad_norm: 0.9601456610474921, iteration: 274186
loss: 1.0021322965621948,grad_norm: 0.7907836935054338, iteration: 274187
loss: 1.0093823671340942,grad_norm: 0.9407860300403023, iteration: 274188
loss: 1.019770860671997,grad_norm: 0.793604874272285, iteration: 274189
loss: 1.059710144996643,grad_norm: 0.9852105057287658, iteration: 274190
loss: 0.9908345937728882,grad_norm: 0.8199451299198954, iteration: 274191
loss: 1.0105924606323242,grad_norm: 0.8473845340077928, iteration: 274192
loss: 0.9938023686408997,grad_norm: 0.999999152604798, iteration: 274193
loss: 0.9898281693458557,grad_norm: 0.9353426588560272, iteration: 274194
loss: 0.968390941619873,grad_norm: 0.9273141441141298, iteration: 274195
loss: 0.9967582821846008,grad_norm: 0.8143210163505696, iteration: 274196
loss: 0.9996872544288635,grad_norm: 0.912347590488589, iteration: 274197
loss: 0.9801111817359924,grad_norm: 0.7244429342144241, iteration: 274198
loss: 1.0299780368804932,grad_norm: 0.9999992968942019, iteration: 274199
loss: 0.9848231077194214,grad_norm: 0.7255695842324486, iteration: 274200
loss: 1.0124245882034302,grad_norm: 0.8021528374683394, iteration: 274201
loss: 1.0892643928527832,grad_norm: 0.9999993176705481, iteration: 274202
loss: 1.0265671014785767,grad_norm: 0.999999180486878, iteration: 274203
loss: 0.9934227466583252,grad_norm: 0.8709340216973263, iteration: 274204
loss: 1.010768175125122,grad_norm: 0.8100404279924976, iteration: 274205
loss: 1.0854672193527222,grad_norm: 0.9964090815914093, iteration: 274206
loss: 1.0367079973220825,grad_norm: 0.823999065737508, iteration: 274207
loss: 1.0831800699234009,grad_norm: 0.9999990310926419, iteration: 274208
loss: 0.9558848142623901,grad_norm: 0.8961159541641152, iteration: 274209
loss: 1.029139757156372,grad_norm: 0.999999192313746, iteration: 274210
loss: 0.9985745549201965,grad_norm: 0.8359525613653145, iteration: 274211
loss: 0.9679189920425415,grad_norm: 0.9999992758676962, iteration: 274212
loss: 1.0841535329818726,grad_norm: 0.9113071460755129, iteration: 274213
loss: 0.9911695718765259,grad_norm: 0.8127730883894283, iteration: 274214
loss: 0.984666109085083,grad_norm: 0.9999991321834232, iteration: 274215
loss: 1.0362193584442139,grad_norm: 0.8558667650697956, iteration: 274216
loss: 1.024733543395996,grad_norm: 0.9999990764827826, iteration: 274217
loss: 1.0286400318145752,grad_norm: 0.8623440377052239, iteration: 274218
loss: 1.0482470989227295,grad_norm: 0.9529369928698408, iteration: 274219
loss: 1.0160623788833618,grad_norm: 0.8149028232410199, iteration: 274220
loss: 0.967893660068512,grad_norm: 0.8228817896509878, iteration: 274221
loss: 1.0097100734710693,grad_norm: 0.778719358266038, iteration: 274222
loss: 1.0051189661026,grad_norm: 0.7103493423524416, iteration: 274223
loss: 0.9916645288467407,grad_norm: 0.9344675200212254, iteration: 274224
loss: 0.9917713403701782,grad_norm: 0.952937373770298, iteration: 274225
loss: 0.9986519813537598,grad_norm: 0.8547331030676473, iteration: 274226
loss: 1.0134295225143433,grad_norm: 0.870066055093811, iteration: 274227
loss: 0.9655925035476685,grad_norm: 0.9999991598425709, iteration: 274228
loss: 0.9897241592407227,grad_norm: 0.9842710883165551, iteration: 274229
loss: 0.995033860206604,grad_norm: 0.8959995011790377, iteration: 274230
loss: 0.9910609126091003,grad_norm: 0.9967042944726182, iteration: 274231
loss: 1.0094807147979736,grad_norm: 0.9119261298907533, iteration: 274232
loss: 0.9484642744064331,grad_norm: 0.8882490586645587, iteration: 274233
loss: 1.0050171613693237,grad_norm: 0.836796867862938, iteration: 274234
loss: 0.9893736839294434,grad_norm: 0.7830318125903598, iteration: 274235
loss: 0.9909873008728027,grad_norm: 0.9999990894033904, iteration: 274236
loss: 0.9622786641120911,grad_norm: 0.8666385529065259, iteration: 274237
loss: 1.0070031881332397,grad_norm: 0.7667765637090528, iteration: 274238
loss: 1.0145305395126343,grad_norm: 0.999999065370235, iteration: 274239
loss: 1.0111749172210693,grad_norm: 0.9999990874944648, iteration: 274240
loss: 1.0029431581497192,grad_norm: 0.8355573768872039, iteration: 274241
loss: 1.0578254461288452,grad_norm: 0.9435508461677575, iteration: 274242
loss: 0.9842085242271423,grad_norm: 0.7670323514645131, iteration: 274243
loss: 0.955634593963623,grad_norm: 0.8820256473256639, iteration: 274244
loss: 0.9947057962417603,grad_norm: 0.9263133222178772, iteration: 274245
loss: 1.0517151355743408,grad_norm: 0.944403780888005, iteration: 274246
loss: 0.9815199971199036,grad_norm: 0.9715269795842176, iteration: 274247
loss: 1.0070390701293945,grad_norm: 0.820724657945911, iteration: 274248
loss: 1.0068413019180298,grad_norm: 0.793268670787406, iteration: 274249
loss: 0.9876425266265869,grad_norm: 0.7943208679208794, iteration: 274250
loss: 0.9845065474510193,grad_norm: 0.8707086045727547, iteration: 274251
loss: 1.029640793800354,grad_norm: 0.8062881487445809, iteration: 274252
loss: 1.024134874343872,grad_norm: 0.8501332762950029, iteration: 274253
loss: 0.9937090277671814,grad_norm: 0.9656109329683906, iteration: 274254
loss: 0.9909268617630005,grad_norm: 0.7700124275389763, iteration: 274255
loss: 1.0052133798599243,grad_norm: 0.8940750989830963, iteration: 274256
loss: 0.9662739038467407,grad_norm: 0.7758524033124569, iteration: 274257
loss: 1.0333971977233887,grad_norm: 0.91148465648428, iteration: 274258
loss: 0.992790937423706,grad_norm: 0.999999032581118, iteration: 274259
loss: 0.9634293913841248,grad_norm: 0.8814108539733193, iteration: 274260
loss: 0.9670253992080688,grad_norm: 0.7913115465748826, iteration: 274261
loss: 1.021254539489746,grad_norm: 0.8208399782766488, iteration: 274262
loss: 1.0126783847808838,grad_norm: 0.9999992219028896, iteration: 274263
loss: 1.0086408853530884,grad_norm: 0.9804880704587522, iteration: 274264
loss: 1.0214693546295166,grad_norm: 0.827164488521681, iteration: 274265
loss: 1.0042515993118286,grad_norm: 0.8990669589906156, iteration: 274266
loss: 0.9820659756660461,grad_norm: 0.8950523042498795, iteration: 274267
loss: 0.9506288766860962,grad_norm: 0.8667110087354908, iteration: 274268
loss: 1.0867849588394165,grad_norm: 0.7993907167405399, iteration: 274269
loss: 0.9429090619087219,grad_norm: 0.9999991671167465, iteration: 274270
loss: 0.9885507822036743,grad_norm: 0.8877450811435702, iteration: 274271
loss: 0.9980068802833557,grad_norm: 0.887103909415189, iteration: 274272
loss: 1.0503268241882324,grad_norm: 0.9999997837022206, iteration: 274273
loss: 0.9820262789726257,grad_norm: 0.8224059003945887, iteration: 274274
loss: 1.0344208478927612,grad_norm: 0.9999991591322653, iteration: 274275
loss: 1.0712169408798218,grad_norm: 0.9999997217603, iteration: 274276
loss: 0.9837154150009155,grad_norm: 0.8990612758131613, iteration: 274277
loss: 0.9950064420700073,grad_norm: 0.9217354555669015, iteration: 274278
loss: 0.986754298210144,grad_norm: 0.9512452163460321, iteration: 274279
loss: 0.9659867286682129,grad_norm: 0.7843803447752468, iteration: 274280
loss: 0.9792404174804688,grad_norm: 0.8061564598242592, iteration: 274281
loss: 1.0096423625946045,grad_norm: 0.9235348086759653, iteration: 274282
loss: 1.0514492988586426,grad_norm: 0.9999991368655324, iteration: 274283
loss: 1.0393458604812622,grad_norm: 0.932159115221607, iteration: 274284
loss: 0.9864428043365479,grad_norm: 0.8846371600470341, iteration: 274285
loss: 1.032612919807434,grad_norm: 0.9999991928309274, iteration: 274286
loss: 0.9517889618873596,grad_norm: 0.9973666277539835, iteration: 274287
loss: 1.0218371152877808,grad_norm: 0.8580087912266267, iteration: 274288
loss: 0.9662379622459412,grad_norm: 0.7553401026352443, iteration: 274289
loss: 1.00569748878479,grad_norm: 0.9401935584587837, iteration: 274290
loss: 1.0354090929031372,grad_norm: 0.9999990376842621, iteration: 274291
loss: 1.0180898904800415,grad_norm: 0.9999993156595066, iteration: 274292
loss: 0.969258189201355,grad_norm: 0.9802963434068128, iteration: 274293
loss: 0.9698142409324646,grad_norm: 0.9583086684817925, iteration: 274294
loss: 1.029700756072998,grad_norm: 0.8884507973705342, iteration: 274295
loss: 1.0084766149520874,grad_norm: 0.9999992408457515, iteration: 274296
loss: 0.9939830303192139,grad_norm: 0.7069708383182612, iteration: 274297
loss: 0.9724718928337097,grad_norm: 0.928727310700635, iteration: 274298
loss: 1.014716386795044,grad_norm: 0.7390249495780423, iteration: 274299
loss: 1.0225378274917603,grad_norm: 0.9999997188170862, iteration: 274300
loss: 0.9933578372001648,grad_norm: 0.8554344678109894, iteration: 274301
loss: 1.0194814205169678,grad_norm: 0.9195128934442679, iteration: 274302
loss: 0.997665524482727,grad_norm: 0.9582500188543988, iteration: 274303
loss: 0.9963236451148987,grad_norm: 0.9150157691303616, iteration: 274304
loss: 0.9969877600669861,grad_norm: 0.88069739525613, iteration: 274305
loss: 0.9905598759651184,grad_norm: 0.871073604495711, iteration: 274306
loss: 1.0037106275558472,grad_norm: 0.9999998164986496, iteration: 274307
loss: 0.989551842212677,grad_norm: 0.8077190808839531, iteration: 274308
loss: 1.004485845565796,grad_norm: 0.9510029674135093, iteration: 274309
loss: 1.0110902786254883,grad_norm: 0.9999992487528313, iteration: 274310
loss: 1.0250455141067505,grad_norm: 0.9999993085547162, iteration: 274311
loss: 0.9942113161087036,grad_norm: 0.9224479901770524, iteration: 274312
loss: 0.9885919690132141,grad_norm: 0.8104404622159196, iteration: 274313
loss: 0.9716395139694214,grad_norm: 0.8362325118791281, iteration: 274314
loss: 0.9865106344223022,grad_norm: 0.99999909038343, iteration: 274315
loss: 1.0229331254959106,grad_norm: 0.9510783040676379, iteration: 274316
loss: 1.020331621170044,grad_norm: 0.8274556014755605, iteration: 274317
loss: 1.0142797231674194,grad_norm: 0.9286103760314183, iteration: 274318
loss: 0.9770662188529968,grad_norm: 0.7695631937477453, iteration: 274319
loss: 0.9853304028511047,grad_norm: 0.9999991707645413, iteration: 274320
loss: 1.0252763032913208,grad_norm: 0.9853019955538298, iteration: 274321
loss: 0.9672338366508484,grad_norm: 0.8330877871496314, iteration: 274322
loss: 1.181084156036377,grad_norm: 0.9999998432842166, iteration: 274323
loss: 0.9470206499099731,grad_norm: 0.9999990960371141, iteration: 274324
loss: 1.0552881956100464,grad_norm: 0.9687636247818477, iteration: 274325
loss: 1.0160387754440308,grad_norm: 0.8161521704889824, iteration: 274326
loss: 1.0173773765563965,grad_norm: 0.981446322952371, iteration: 274327
loss: 0.99141925573349,grad_norm: 0.9652642120574152, iteration: 274328
loss: 1.0468930006027222,grad_norm: 0.7636793887501376, iteration: 274329
loss: 0.9917652010917664,grad_norm: 0.9614159557533934, iteration: 274330
loss: 0.9907238483428955,grad_norm: 0.8232089551255787, iteration: 274331
loss: 1.0020897388458252,grad_norm: 0.9417925137773693, iteration: 274332
loss: 1.0054899454116821,grad_norm: 0.6996716394976026, iteration: 274333
loss: 1.0558032989501953,grad_norm: 0.9999993396036694, iteration: 274334
loss: 0.9965845346450806,grad_norm: 0.7688588781975908, iteration: 274335
loss: 0.9944103956222534,grad_norm: 0.9999990605984344, iteration: 274336
loss: 1.055038332939148,grad_norm: 0.7563151820568976, iteration: 274337
loss: 1.256757378578186,grad_norm: 0.9999998129534651, iteration: 274338
loss: 0.9929286241531372,grad_norm: 0.7079216532556888, iteration: 274339
loss: 1.021794080734253,grad_norm: 0.8078333379219405, iteration: 274340
loss: 0.9653928875923157,grad_norm: 0.8051602505947064, iteration: 274341
loss: 1.0332165956497192,grad_norm: 0.8363619706595584, iteration: 274342
loss: 0.9997785687446594,grad_norm: 0.7844236547859454, iteration: 274343
loss: 0.9991968274116516,grad_norm: 0.947484751335572, iteration: 274344
loss: 0.9658154249191284,grad_norm: 0.9186976026176937, iteration: 274345
loss: 0.9583284258842468,grad_norm: 0.9999991139109023, iteration: 274346
loss: 1.003023386001587,grad_norm: 0.8498021222179579, iteration: 274347
loss: 1.0130373239517212,grad_norm: 0.8722873907177633, iteration: 274348
loss: 0.9943642616271973,grad_norm: 0.999999056385012, iteration: 274349
loss: 0.9683878421783447,grad_norm: 0.8633986191467238, iteration: 274350
loss: 1.0125889778137207,grad_norm: 0.8927248112366057, iteration: 274351
loss: 0.9971310496330261,grad_norm: 0.864727036959184, iteration: 274352
loss: 1.006350040435791,grad_norm: 0.9007098527294793, iteration: 274353
loss: 1.0552510023117065,grad_norm: 0.999999898823222, iteration: 274354
loss: 0.9998254179954529,grad_norm: 0.7595483889947465, iteration: 274355
loss: 0.9985907077789307,grad_norm: 0.9999989545839071, iteration: 274356
loss: 0.9934768676757812,grad_norm: 0.9844842578277023, iteration: 274357
loss: 1.001685619354248,grad_norm: 0.9040893426875117, iteration: 274358
loss: 0.9897416234016418,grad_norm: 0.8712482665709318, iteration: 274359
loss: 1.0184725522994995,grad_norm: 0.9974287437851383, iteration: 274360
loss: 1.048688530921936,grad_norm: 0.9999993802564695, iteration: 274361
loss: 0.9905485510826111,grad_norm: 0.8255222833075355, iteration: 274362
loss: 1.0000362396240234,grad_norm: 0.9046497100378103, iteration: 274363
loss: 0.9373117089271545,grad_norm: 0.85667316979809, iteration: 274364
loss: 1.0010275840759277,grad_norm: 0.7833696528338041, iteration: 274365
loss: 1.0208632946014404,grad_norm: 0.9999993047161109, iteration: 274366
loss: 0.9826974868774414,grad_norm: 0.9976124925714817, iteration: 274367
loss: 1.0205193758010864,grad_norm: 0.8376317652143852, iteration: 274368
loss: 1.0368133783340454,grad_norm: 0.9437649478197131, iteration: 274369
loss: 0.9644806981086731,grad_norm: 0.9634935745821861, iteration: 274370
loss: 0.9948254823684692,grad_norm: 0.9959151795604246, iteration: 274371
loss: 0.9978524446487427,grad_norm: 0.9236722725783993, iteration: 274372
loss: 1.00090754032135,grad_norm: 0.7982032459878461, iteration: 274373
loss: 1.027607798576355,grad_norm: 0.99999905442877, iteration: 274374
loss: 0.9917432069778442,grad_norm: 1.00000001951948, iteration: 274375
loss: 0.9752797484397888,grad_norm: 0.8559780531135865, iteration: 274376
loss: 0.9675719141960144,grad_norm: 0.7735047199422526, iteration: 274377
loss: 1.007420539855957,grad_norm: 0.9681989562731874, iteration: 274378
loss: 1.0338022708892822,grad_norm: 0.8281520083348152, iteration: 274379
loss: 0.9940060973167419,grad_norm: 0.7530441805688344, iteration: 274380
loss: 0.996600866317749,grad_norm: 0.9137307368904563, iteration: 274381
loss: 1.063348650932312,grad_norm: 0.9999990355414131, iteration: 274382
loss: 1.030871033668518,grad_norm: 0.7997643100568788, iteration: 274383
loss: 0.9598668217658997,grad_norm: 0.99999969918306, iteration: 274384
loss: 0.991963267326355,grad_norm: 0.999999057093047, iteration: 274385
loss: 0.9580808877944946,grad_norm: 0.9214386122771437, iteration: 274386
loss: 1.0158683061599731,grad_norm: 0.9296997283832658, iteration: 274387
loss: 1.0716652870178223,grad_norm: 0.999999593972969, iteration: 274388
loss: 1.014885663986206,grad_norm: 0.9913175835974011, iteration: 274389
loss: 1.004212737083435,grad_norm: 0.7872538980444773, iteration: 274390
loss: 1.013126015663147,grad_norm: 0.9659006627366434, iteration: 274391
loss: 1.006749153137207,grad_norm: 0.8334035920472839, iteration: 274392
loss: 1.0163487195968628,grad_norm: 0.9999992300109908, iteration: 274393
loss: 0.9723525047302246,grad_norm: 0.8215179237217295, iteration: 274394
loss: 0.9973195791244507,grad_norm: 0.826605161661703, iteration: 274395
loss: 0.9751108884811401,grad_norm: 0.757897383736664, iteration: 274396
loss: 0.9742812514305115,grad_norm: 0.8491542948650723, iteration: 274397
loss: 1.0032330751419067,grad_norm: 0.7866032007977227, iteration: 274398
loss: 1.003828525543213,grad_norm: 0.8912106360930272, iteration: 274399
loss: 0.9965798258781433,grad_norm: 0.9633564879025038, iteration: 274400
loss: 1.0199379920959473,grad_norm: 0.789494071159368, iteration: 274401
loss: 1.0357303619384766,grad_norm: 0.8629944330066103, iteration: 274402
loss: 0.9918901324272156,grad_norm: 0.7777515481450747, iteration: 274403
loss: 0.9955089092254639,grad_norm: 0.9999991044470572, iteration: 274404
loss: 1.1172881126403809,grad_norm: 0.9999993779105306, iteration: 274405
loss: 0.9959726929664612,grad_norm: 0.8719792115623932, iteration: 274406
loss: 1.0291379690170288,grad_norm: 0.9999997688465628, iteration: 274407
loss: 0.9781984090805054,grad_norm: 0.7834144931676837, iteration: 274408
loss: 0.9681664705276489,grad_norm: 0.930639261295326, iteration: 274409
loss: 1.025511384010315,grad_norm: 0.8766651987467192, iteration: 274410
loss: 0.9783846735954285,grad_norm: 0.899164456744739, iteration: 274411
loss: 1.019503116607666,grad_norm: 0.9417523852656159, iteration: 274412
loss: 0.9643409848213196,grad_norm: 0.92185641776874, iteration: 274413
loss: 1.0019571781158447,grad_norm: 0.8526180664027622, iteration: 274414
loss: 0.9950699210166931,grad_norm: 0.999999818854903, iteration: 274415
loss: 1.0201140642166138,grad_norm: 0.7752974810129788, iteration: 274416
loss: 0.9880053400993347,grad_norm: 0.9123017908971899, iteration: 274417
loss: 0.9861248135566711,grad_norm: 0.7710297740960397, iteration: 274418
loss: 1.0137076377868652,grad_norm: 0.8823312447174906, iteration: 274419
loss: 0.9832061529159546,grad_norm: 0.7743046007849623, iteration: 274420
loss: 0.9910999536514282,grad_norm: 0.8523047369022552, iteration: 274421
loss: 0.9995272755622864,grad_norm: 0.8984475889805128, iteration: 274422
loss: 0.9883649945259094,grad_norm: 0.8431328698099994, iteration: 274423
loss: 1.0148824453353882,grad_norm: 0.7655317647263987, iteration: 274424
loss: 0.9783058762550354,grad_norm: 0.9432881543511801, iteration: 274425
loss: 1.035495400428772,grad_norm: 0.9296989020354156, iteration: 274426
loss: 0.9719799757003784,grad_norm: 0.8517103824185777, iteration: 274427
loss: 0.9827647805213928,grad_norm: 0.8483714226056905, iteration: 274428
loss: 1.0363811254501343,grad_norm: 0.9756091369166622, iteration: 274429
loss: 1.0018839836120605,grad_norm: 0.884178901987031, iteration: 274430
loss: 0.992807149887085,grad_norm: 0.8050970685753798, iteration: 274431
loss: 1.0040982961654663,grad_norm: 0.899152487602729, iteration: 274432
loss: 0.9827303290367126,grad_norm: 0.8534757692417363, iteration: 274433
loss: 1.0276856422424316,grad_norm: 0.9999991404656, iteration: 274434
loss: 1.0020744800567627,grad_norm: 0.8256926384174156, iteration: 274435
loss: 1.0316601991653442,grad_norm: 0.9648212607885509, iteration: 274436
loss: 1.0300875902175903,grad_norm: 0.853196597278866, iteration: 274437
loss: 0.9891504645347595,grad_norm: 0.8825487908166398, iteration: 274438
loss: 0.9763304591178894,grad_norm: 0.9999991962605267, iteration: 274439
loss: 0.9960100054740906,grad_norm: 0.941023569077571, iteration: 274440
loss: 0.9695101976394653,grad_norm: 0.9554356489424837, iteration: 274441
loss: 0.9961410164833069,grad_norm: 0.8891555349748771, iteration: 274442
loss: 0.9959526658058167,grad_norm: 0.9999991425426854, iteration: 274443
loss: 0.9958746433258057,grad_norm: 0.906794748247345, iteration: 274444
loss: 1.0137704610824585,grad_norm: 0.994616276262556, iteration: 274445
loss: 0.9991051554679871,grad_norm: 0.9498840957496586, iteration: 274446
loss: 0.9811722636222839,grad_norm: 0.9710109856820097, iteration: 274447
loss: 0.9973831176757812,grad_norm: 0.8511166253886421, iteration: 274448
loss: 0.9701166749000549,grad_norm: 0.8961019175522964, iteration: 274449
loss: 1.0196021795272827,grad_norm: 0.9999993743390413, iteration: 274450
loss: 1.019385814666748,grad_norm: 0.8552660871454117, iteration: 274451
loss: 1.0217994451522827,grad_norm: 0.8725779566006272, iteration: 274452
loss: 1.0070806741714478,grad_norm: 0.8096670234571188, iteration: 274453
loss: 1.0145889520645142,grad_norm: 0.795357809641366, iteration: 274454
loss: 0.9888201951980591,grad_norm: 0.8198346347231428, iteration: 274455
loss: 1.0133038759231567,grad_norm: 0.8251220596292984, iteration: 274456
loss: 1.0076848268508911,grad_norm: 0.8758024863854128, iteration: 274457
loss: 0.9691562056541443,grad_norm: 0.9477585789387588, iteration: 274458
loss: 0.9761937856674194,grad_norm: 0.8445871881559703, iteration: 274459
loss: 0.9877422451972961,grad_norm: 0.6820132018072013, iteration: 274460
loss: 0.9799354672431946,grad_norm: 0.7805878694463374, iteration: 274461
loss: 0.9958401322364807,grad_norm: 0.7222469420238098, iteration: 274462
loss: 1.010042667388916,grad_norm: 0.7093087150570515, iteration: 274463
loss: 0.9769383668899536,grad_norm: 0.9953159303008555, iteration: 274464
loss: 1.0044914484024048,grad_norm: 0.9999990140798486, iteration: 274465
loss: 0.9518435597419739,grad_norm: 0.824489547311382, iteration: 274466
loss: 0.9378370642662048,grad_norm: 0.9218239126009692, iteration: 274467
loss: 1.0020514726638794,grad_norm: 0.9681468550710423, iteration: 274468
loss: 1.0086102485656738,grad_norm: 0.7747607779428186, iteration: 274469
loss: 0.97258460521698,grad_norm: 0.893991113156329, iteration: 274470
loss: 0.9478795528411865,grad_norm: 0.9999991523619867, iteration: 274471
loss: 1.0216121673583984,grad_norm: 0.751699939838838, iteration: 274472
loss: 0.9965918660163879,grad_norm: 0.9999991974531647, iteration: 274473
loss: 0.9910730719566345,grad_norm: 0.784080945710171, iteration: 274474
loss: 1.004090428352356,grad_norm: 0.8429921067134128, iteration: 274475
loss: 1.0277429819107056,grad_norm: 0.7466714219873806, iteration: 274476
loss: 1.003932237625122,grad_norm: 0.8718535624480946, iteration: 274477
loss: 1.0460994243621826,grad_norm: 0.9999992940995994, iteration: 274478
loss: 0.988480269908905,grad_norm: 0.9496883848732389, iteration: 274479
loss: 1.0231660604476929,grad_norm: 0.8784828042806344, iteration: 274480
loss: 0.9880911707878113,grad_norm: 0.8041427354972243, iteration: 274481
loss: 1.0159052610397339,grad_norm: 0.9259546518563025, iteration: 274482
loss: 1.0073432922363281,grad_norm: 0.7832502104515314, iteration: 274483
loss: 1.0049304962158203,grad_norm: 0.9156991643934308, iteration: 274484
loss: 1.0002018213272095,grad_norm: 0.9999991376376738, iteration: 274485
loss: 1.0302319526672363,grad_norm: 0.9999992838293079, iteration: 274486
loss: 1.0249028205871582,grad_norm: 0.9542783648054852, iteration: 274487
loss: 1.0189701318740845,grad_norm: 0.8938148812483019, iteration: 274488
loss: 1.0306904315948486,grad_norm: 0.9191092202286334, iteration: 274489
loss: 1.0443826913833618,grad_norm: 0.9999995354286616, iteration: 274490
loss: 0.9922242760658264,grad_norm: 0.7907453451923551, iteration: 274491
loss: 1.0049387216567993,grad_norm: 0.8302030063257004, iteration: 274492
loss: 0.9851637482643127,grad_norm: 0.9455234753745346, iteration: 274493
loss: 1.018845796585083,grad_norm: 0.7692788220795899, iteration: 274494
loss: 0.9607505202293396,grad_norm: 0.9150926295442132, iteration: 274495
loss: 0.9965669512748718,grad_norm: 0.8925757486919466, iteration: 274496
loss: 0.986555814743042,grad_norm: 0.8690470309279951, iteration: 274497
loss: 0.9861548542976379,grad_norm: 0.9999992280524217, iteration: 274498
loss: 0.9793043732643127,grad_norm: 0.8150182974945255, iteration: 274499
loss: 0.9826236963272095,grad_norm: 0.8013200813375975, iteration: 274500
loss: 0.9895462393760681,grad_norm: 0.7315689932531688, iteration: 274501
loss: 1.0135462284088135,grad_norm: 0.7510385027349628, iteration: 274502
loss: 1.0056287050247192,grad_norm: 0.8431351109014122, iteration: 274503
loss: 1.0402365922927856,grad_norm: 0.9999995545865071, iteration: 274504
loss: 0.9886710047721863,grad_norm: 0.7633366619444433, iteration: 274505
loss: 1.0174517631530762,grad_norm: 0.9999990409015134, iteration: 274506
loss: 0.986421525478363,grad_norm: 0.7851858602971209, iteration: 274507
loss: 0.9816179871559143,grad_norm: 0.7953738686437126, iteration: 274508
loss: 1.0096991062164307,grad_norm: 0.8465715475869541, iteration: 274509
loss: 0.9760600924491882,grad_norm: 0.9999991235668713, iteration: 274510
loss: 0.9892574548721313,grad_norm: 0.8053764479535863, iteration: 274511
loss: 1.0202571153640747,grad_norm: 0.9769354644563056, iteration: 274512
loss: 1.0289982557296753,grad_norm: 0.9999997636880059, iteration: 274513
loss: 0.9878276586532593,grad_norm: 0.9124934946045412, iteration: 274514
loss: 1.0555161237716675,grad_norm: 0.91710241174524, iteration: 274515
loss: 0.9835382699966431,grad_norm: 0.8963396447249854, iteration: 274516
loss: 0.9880104660987854,grad_norm: 0.8174843484733243, iteration: 274517
loss: 1.0318794250488281,grad_norm: 0.9669136534960278, iteration: 274518
loss: 0.9914471507072449,grad_norm: 0.9079174078160474, iteration: 274519
loss: 1.0002483129501343,grad_norm: 0.8201129729559902, iteration: 274520
loss: 0.9749702215194702,grad_norm: 0.9149785150873292, iteration: 274521
loss: 0.9656983017921448,grad_norm: 0.7706086001060296, iteration: 274522
loss: 0.9374122619628906,grad_norm: 0.8910182388867682, iteration: 274523
loss: 0.982774555683136,grad_norm: 0.7703692073453616, iteration: 274524
loss: 0.9858231544494629,grad_norm: 0.807326620062804, iteration: 274525
loss: 0.9998008608818054,grad_norm: 0.9466226574121559, iteration: 274526
loss: 0.9959913492202759,grad_norm: 0.9076711845628231, iteration: 274527
loss: 0.9872172474861145,grad_norm: 0.8991649952553867, iteration: 274528
loss: 0.9632459878921509,grad_norm: 0.8756614685249435, iteration: 274529
loss: 0.9769094586372375,grad_norm: 0.8570111448816436, iteration: 274530
loss: 1.019866943359375,grad_norm: 0.9661946131226424, iteration: 274531
loss: 1.0390691757202148,grad_norm: 0.9999999583197875, iteration: 274532
loss: 1.0002275705337524,grad_norm: 0.9376967754980354, iteration: 274533
loss: 0.9878442883491516,grad_norm: 0.8399594062445902, iteration: 274534
loss: 0.9731377959251404,grad_norm: 0.999999008919623, iteration: 274535
loss: 1.0246094465255737,grad_norm: 0.8637090259346182, iteration: 274536
loss: 0.9867085814476013,grad_norm: 0.9114050300798321, iteration: 274537
loss: 1.0090019702911377,grad_norm: 0.7737674238854511, iteration: 274538
loss: 1.034211277961731,grad_norm: 0.9999991106875769, iteration: 274539
loss: 0.9984422326087952,grad_norm: 0.7836955781006739, iteration: 274540
loss: 0.9755659699440002,grad_norm: 0.9837610074679002, iteration: 274541
loss: 1.0536102056503296,grad_norm: 0.9999991809231727, iteration: 274542
loss: 1.0222773551940918,grad_norm: 0.9999989348806876, iteration: 274543
loss: 1.0251357555389404,grad_norm: 0.7569900765533312, iteration: 274544
loss: 1.0161166191101074,grad_norm: 0.9999990662611719, iteration: 274545
loss: 1.002323031425476,grad_norm: 0.8791917556593758, iteration: 274546
loss: 0.9792991876602173,grad_norm: 0.9190490197383874, iteration: 274547
loss: 0.970306932926178,grad_norm: 0.9067048106294782, iteration: 274548
loss: 1.022702693939209,grad_norm: 0.7276186195433031, iteration: 274549
loss: 0.9855402708053589,grad_norm: 0.9999992511275745, iteration: 274550
loss: 1.0246362686157227,grad_norm: 0.8343289320378007, iteration: 274551
loss: 1.011522889137268,grad_norm: 0.9999991304852222, iteration: 274552
loss: 0.9994882941246033,grad_norm: 0.780006014232415, iteration: 274553
loss: 0.9708434343338013,grad_norm: 0.9999991821147108, iteration: 274554
loss: 0.9906278848648071,grad_norm: 0.8848774860949896, iteration: 274555
loss: 1.0283081531524658,grad_norm: 0.9999994093330266, iteration: 274556
loss: 1.0242974758148193,grad_norm: 0.999999079155935, iteration: 274557
loss: 1.0199319124221802,grad_norm: 0.7865449878613651, iteration: 274558
loss: 0.9907375574111938,grad_norm: 0.9140019769805215, iteration: 274559
loss: 1.007605791091919,grad_norm: 0.7837111480907081, iteration: 274560
loss: 1.029941439628601,grad_norm: 0.8938031356626766, iteration: 274561
loss: 1.0449525117874146,grad_norm: 0.8311709368418225, iteration: 274562
loss: 0.9517873525619507,grad_norm: 0.9365142368566418, iteration: 274563
loss: 0.9977624416351318,grad_norm: 0.7646541031104002, iteration: 274564
loss: 0.9622697830200195,grad_norm: 0.9387976701587217, iteration: 274565
loss: 1.0190691947937012,grad_norm: 0.9027368013861851, iteration: 274566
loss: 0.922926664352417,grad_norm: 0.9629954696007702, iteration: 274567
loss: 0.9934996366500854,grad_norm: 0.7350043064515127, iteration: 274568
loss: 0.9564012289047241,grad_norm: 0.89124159414022, iteration: 274569
loss: 0.974063515663147,grad_norm: 0.910942257162945, iteration: 274570
loss: 1.016494870185852,grad_norm: 0.9065624975152312, iteration: 274571
loss: 1.022491216659546,grad_norm: 0.8397947481638313, iteration: 274572
loss: 0.9988729953765869,grad_norm: 0.8710030613895129, iteration: 274573
loss: 1.0137158632278442,grad_norm: 0.8797108250900333, iteration: 274574
loss: 0.9892420172691345,grad_norm: 0.8289139648235536, iteration: 274575
loss: 0.9962995648384094,grad_norm: 0.9999990812214108, iteration: 274576
loss: 0.9775422215461731,grad_norm: 0.8091690929085137, iteration: 274577
loss: 1.018027663230896,grad_norm: 0.8376544915094658, iteration: 274578
loss: 1.0195133686065674,grad_norm: 0.9287116757878406, iteration: 274579
loss: 0.9990605711936951,grad_norm: 0.8814001268427799, iteration: 274580
loss: 0.975170910358429,grad_norm: 0.8579075388521398, iteration: 274581
loss: 0.982300877571106,grad_norm: 0.8886083009357, iteration: 274582
loss: 1.1026557683944702,grad_norm: 0.9999998867791714, iteration: 274583
loss: 0.993329644203186,grad_norm: 0.8433378100172482, iteration: 274584
loss: 1.027419924736023,grad_norm: 0.851716081186893, iteration: 274585
loss: 0.9612796902656555,grad_norm: 0.9074458979103425, iteration: 274586
loss: 0.9967520833015442,grad_norm: 0.9999991374366514, iteration: 274587
loss: 1.0483359098434448,grad_norm: 0.9557439070085518, iteration: 274588
loss: 0.9752689003944397,grad_norm: 0.9073586281616668, iteration: 274589
loss: 0.9714688062667847,grad_norm: 0.8968345719528299, iteration: 274590
loss: 0.9605796933174133,grad_norm: 0.9242141413003893, iteration: 274591
loss: 0.9763545989990234,grad_norm: 0.919382001370371, iteration: 274592
loss: 0.9894666075706482,grad_norm: 0.8362717388028138, iteration: 274593
loss: 0.9896199703216553,grad_norm: 0.9999989531050594, iteration: 274594
loss: 0.9752147197723389,grad_norm: 0.7316434593983798, iteration: 274595
loss: 0.984585702419281,grad_norm: 0.7088630738735804, iteration: 274596
loss: 0.9702228307723999,grad_norm: 0.8516776455111036, iteration: 274597
loss: 1.006234049797058,grad_norm: 0.9999990313626566, iteration: 274598
loss: 1.0149611234664917,grad_norm: 0.999999338528418, iteration: 274599
loss: 0.9675732254981995,grad_norm: 0.8739893556252522, iteration: 274600
loss: 1.0089638233184814,grad_norm: 0.7906028118813551, iteration: 274601
loss: 1.048609972000122,grad_norm: 0.8717382344041048, iteration: 274602
loss: 1.0235614776611328,grad_norm: 0.85551251454619, iteration: 274603
loss: 1.0304372310638428,grad_norm: 0.7971251885121986, iteration: 274604
loss: 1.0355074405670166,grad_norm: 0.9999991845405485, iteration: 274605
loss: 1.026960015296936,grad_norm: 0.9201605366163818, iteration: 274606
loss: 0.9808680415153503,grad_norm: 0.8557327592314277, iteration: 274607
loss: 1.0056225061416626,grad_norm: 0.9371038036829056, iteration: 274608
loss: 1.0114381313323975,grad_norm: 0.9361597568001192, iteration: 274609
loss: 1.0117570161819458,grad_norm: 0.7094250573215944, iteration: 274610
loss: 1.0162056684494019,grad_norm: 0.9189364201700166, iteration: 274611
loss: 1.0016732215881348,grad_norm: 0.7477579012432477, iteration: 274612
loss: 0.9682642817497253,grad_norm: 0.8418580666106554, iteration: 274613
loss: 0.9727938175201416,grad_norm: 0.7802582062319088, iteration: 274614
loss: 1.0299330949783325,grad_norm: 0.8817208344271996, iteration: 274615
loss: 0.9873644709587097,grad_norm: 0.7375079128673432, iteration: 274616
loss: 0.9376697540283203,grad_norm: 0.9999989680689942, iteration: 274617
loss: 1.0009069442749023,grad_norm: 0.8856217061846992, iteration: 274618
loss: 0.9563351273536682,grad_norm: 0.9999990261280118, iteration: 274619
loss: 0.9886331558227539,grad_norm: 0.9810311945814472, iteration: 274620
loss: 1.003904104232788,grad_norm: 0.8390996260827236, iteration: 274621
loss: 1.0013556480407715,grad_norm: 0.909782237201602, iteration: 274622
loss: 0.985661506652832,grad_norm: 0.9999990952217713, iteration: 274623
loss: 0.9988616108894348,grad_norm: 0.9701269485294832, iteration: 274624
loss: 1.0365976095199585,grad_norm: 0.9138670312200322, iteration: 274625
loss: 1.0472559928894043,grad_norm: 0.9999991379043142, iteration: 274626
loss: 0.9769617319107056,grad_norm: 0.8728372096318893, iteration: 274627
loss: 1.0344929695129395,grad_norm: 0.8424538433320753, iteration: 274628
loss: 0.990757405757904,grad_norm: 0.8742385506810468, iteration: 274629
loss: 0.9760594367980957,grad_norm: 0.9761759490864893, iteration: 274630
loss: 1.0063197612762451,grad_norm: 0.9999990845290219, iteration: 274631
loss: 1.0026202201843262,grad_norm: 0.7466972533370676, iteration: 274632
loss: 0.9760385751724243,grad_norm: 0.9206511878893554, iteration: 274633
loss: 1.0585601329803467,grad_norm: 0.9999992201027331, iteration: 274634
loss: 1.0025980472564697,grad_norm: 0.7889289490921064, iteration: 274635
loss: 1.0070505142211914,grad_norm: 0.8152814506220029, iteration: 274636
loss: 0.9743913412094116,grad_norm: 0.8016832212176845, iteration: 274637
loss: 1.007175087928772,grad_norm: 0.8472082446610875, iteration: 274638
loss: 0.997509777545929,grad_norm: 0.8071905310668205, iteration: 274639
loss: 0.9994540810585022,grad_norm: 0.8451645994989562, iteration: 274640
loss: 0.9918633699417114,grad_norm: 0.7494289419949324, iteration: 274641
loss: 1.0231493711471558,grad_norm: 0.7821528635419372, iteration: 274642
loss: 1.038406491279602,grad_norm: 0.7962555236614145, iteration: 274643
loss: 1.007431983947754,grad_norm: 0.9621543321645237, iteration: 274644
loss: 0.9994239211082458,grad_norm: 0.9194949219324527, iteration: 274645
loss: 1.0091501474380493,grad_norm: 0.9689409501387729, iteration: 274646
loss: 1.0121315717697144,grad_norm: 0.9054114139605305, iteration: 274647
loss: 1.0519344806671143,grad_norm: 0.9429570364096759, iteration: 274648
loss: 0.9784317016601562,grad_norm: 0.9681202607269506, iteration: 274649
loss: 1.00333571434021,grad_norm: 0.9999991431955534, iteration: 274650
loss: 1.0261512994766235,grad_norm: 0.7811815271243053, iteration: 274651
loss: 0.9798949360847473,grad_norm: 0.9999989816521428, iteration: 274652
loss: 1.0328370332717896,grad_norm: 0.8006031702702047, iteration: 274653
loss: 1.126650333404541,grad_norm: 0.9999990016614527, iteration: 274654
loss: 1.0111980438232422,grad_norm: 0.9999998721375482, iteration: 274655
loss: 0.9844358563423157,grad_norm: 0.9419704918710912, iteration: 274656
loss: 0.9915809035301208,grad_norm: 0.7980231276301821, iteration: 274657
loss: 1.0061924457550049,grad_norm: 0.8860640328968303, iteration: 274658
loss: 0.9592857360839844,grad_norm: 0.9356294722837761, iteration: 274659
loss: 0.9651038646697998,grad_norm: 0.7546538908759375, iteration: 274660
loss: 1.0098620653152466,grad_norm: 0.8922467559849295, iteration: 274661
loss: 1.0442323684692383,grad_norm: 0.8575851826938885, iteration: 274662
loss: 0.9736635684967041,grad_norm: 0.9999991281255631, iteration: 274663
loss: 1.027390480041504,grad_norm: 0.9156039819659374, iteration: 274664
loss: 0.9883455634117126,grad_norm: 0.920868828513753, iteration: 274665
loss: 0.9890552759170532,grad_norm: 0.8794808844633253, iteration: 274666
loss: 1.03612220287323,grad_norm: 0.8880216274129654, iteration: 274667
loss: 0.9729554653167725,grad_norm: 0.904378821632955, iteration: 274668
loss: 1.0113170146942139,grad_norm: 0.9999990615273202, iteration: 274669
loss: 1.0012423992156982,grad_norm: 0.7903817130307975, iteration: 274670
loss: 0.9767616391181946,grad_norm: 0.8522423063125109, iteration: 274671
loss: 0.989947497844696,grad_norm: 0.8788379163886787, iteration: 274672
loss: 0.9832088351249695,grad_norm: 0.9864807181941303, iteration: 274673
loss: 0.9891864061355591,grad_norm: 0.9077526613369452, iteration: 274674
loss: 0.984609842300415,grad_norm: 0.9999991491940889, iteration: 274675
loss: 1.0150494575500488,grad_norm: 0.733556462003587, iteration: 274676
loss: 1.0139683485031128,grad_norm: 0.976467546622534, iteration: 274677
loss: 1.0375341176986694,grad_norm: 0.78957072807993, iteration: 274678
loss: 0.9672293663024902,grad_norm: 0.9546491494234217, iteration: 274679
loss: 0.9898264408111572,grad_norm: 0.9999990927534512, iteration: 274680
loss: 1.0272934436798096,grad_norm: 0.9999990254643087, iteration: 274681
loss: 1.0013507604599,grad_norm: 0.9999993048274355, iteration: 274682
loss: 1.0128127336502075,grad_norm: 0.8729871486698128, iteration: 274683
loss: 0.9940032958984375,grad_norm: 0.7583112934754002, iteration: 274684
loss: 0.9795949459075928,grad_norm: 0.9240951978141964, iteration: 274685
loss: 1.0118368864059448,grad_norm: 0.7674814560653413, iteration: 274686
loss: 1.0016157627105713,grad_norm: 0.8317783177699701, iteration: 274687
loss: 0.990638256072998,grad_norm: 0.8278174381906473, iteration: 274688
loss: 0.9811837673187256,grad_norm: 0.82374142409102, iteration: 274689
loss: 0.9752967357635498,grad_norm: 0.9141968169452303, iteration: 274690
loss: 1.020079255104065,grad_norm: 0.8139531779187814, iteration: 274691
loss: 0.9722263216972351,grad_norm: 0.9999990313400906, iteration: 274692
loss: 0.9785583019256592,grad_norm: 0.8459415734162352, iteration: 274693
loss: 1.010507583618164,grad_norm: 0.9611000849608617, iteration: 274694
loss: 0.9580844640731812,grad_norm: 0.8620694983078265, iteration: 274695
loss: 1.0087946653366089,grad_norm: 0.7906916235361844, iteration: 274696
loss: 0.9913395047187805,grad_norm: 0.893021101872944, iteration: 274697
loss: 1.0426812171936035,grad_norm: 0.999999074026905, iteration: 274698
loss: 0.993712842464447,grad_norm: 0.8964297610395795, iteration: 274699
loss: 0.9686968922615051,grad_norm: 0.9999990577948578, iteration: 274700
loss: 1.0400278568267822,grad_norm: 0.8893375771617678, iteration: 274701
loss: 0.9788667559623718,grad_norm: 0.8506649423909446, iteration: 274702
loss: 0.9739032983779907,grad_norm: 0.7147440423576532, iteration: 274703
loss: 0.9963106513023376,grad_norm: 0.8858631086957046, iteration: 274704
loss: 0.962932288646698,grad_norm: 0.9072912974628293, iteration: 274705
loss: 1.006596326828003,grad_norm: 0.9585634963857718, iteration: 274706
loss: 0.9958404302597046,grad_norm: 0.9527949765789868, iteration: 274707
loss: 0.9984196424484253,grad_norm: 0.8406064159905896, iteration: 274708
loss: 1.0111517906188965,grad_norm: 0.8201228460465867, iteration: 274709
loss: 1.0035998821258545,grad_norm: 0.8183534355327128, iteration: 274710
loss: 0.9895620346069336,grad_norm: 0.9790704091331436, iteration: 274711
loss: 0.9922329187393188,grad_norm: 0.9370993749226332, iteration: 274712
loss: 0.9998219609260559,grad_norm: 0.9139027914103512, iteration: 274713
loss: 1.0075244903564453,grad_norm: 0.8008480614114861, iteration: 274714
loss: 0.9851949214935303,grad_norm: 0.7484217964397843, iteration: 274715
loss: 1.0428712368011475,grad_norm: 0.9313412136730738, iteration: 274716
loss: 0.9473040699958801,grad_norm: 0.8503913673817746, iteration: 274717
loss: 0.9669195413589478,grad_norm: 0.8389026140518705, iteration: 274718
loss: 0.9968918561935425,grad_norm: 0.9999990595113176, iteration: 274719
loss: 1.0050238370895386,grad_norm: 0.893728178001943, iteration: 274720
loss: 1.0016294717788696,grad_norm: 0.9373978987959751, iteration: 274721
loss: 0.9788530468940735,grad_norm: 0.9999990605097889, iteration: 274722
loss: 0.9896467328071594,grad_norm: 0.7060394747264621, iteration: 274723
loss: 0.9386659264564514,grad_norm: 0.8057582060004362, iteration: 274724
loss: 0.9824768900871277,grad_norm: 0.9999990264006267, iteration: 274725
loss: 0.9829460382461548,grad_norm: 0.7911237872596837, iteration: 274726
loss: 1.005782961845398,grad_norm: 0.9999991237645208, iteration: 274727
loss: 0.9714727997779846,grad_norm: 0.9999997186774071, iteration: 274728
loss: 1.000132441520691,grad_norm: 0.7601899743502402, iteration: 274729
loss: 1.0366932153701782,grad_norm: 0.9999991365622883, iteration: 274730
loss: 0.986667811870575,grad_norm: 0.8195253818697558, iteration: 274731
loss: 1.0130798816680908,grad_norm: 0.7448466283425541, iteration: 274732
loss: 1.0431554317474365,grad_norm: 0.9043737328388782, iteration: 274733
loss: 1.0223883390426636,grad_norm: 0.8785775105060628, iteration: 274734
loss: 0.9970569014549255,grad_norm: 0.8170083752566581, iteration: 274735
loss: 0.9984971284866333,grad_norm: 0.9162117469722205, iteration: 274736
loss: 0.979281485080719,grad_norm: 0.9423118139894249, iteration: 274737
loss: 1.0076700448989868,grad_norm: 0.8235868768851801, iteration: 274738
loss: 0.9992793798446655,grad_norm: 0.9637689967352879, iteration: 274739
loss: 1.001870036125183,grad_norm: 0.8272363192309251, iteration: 274740
loss: 1.011663556098938,grad_norm: 0.8410099607704961, iteration: 274741
loss: 0.9809844493865967,grad_norm: 0.8284782578462067, iteration: 274742
loss: 0.9870755076408386,grad_norm: 0.9938462523576501, iteration: 274743
loss: 0.9904143214225769,grad_norm: 0.9999991211089653, iteration: 274744
loss: 0.9880073666572571,grad_norm: 0.999999653985064, iteration: 274745
loss: 1.0225751399993896,grad_norm: 0.9652164280425386, iteration: 274746
loss: 1.002790927886963,grad_norm: 0.9999990337853849, iteration: 274747
loss: 0.9829838871955872,grad_norm: 0.9664483858919612, iteration: 274748
loss: 0.99765545129776,grad_norm: 0.921853191239336, iteration: 274749
loss: 0.9951435923576355,grad_norm: 0.6912228247534781, iteration: 274750
loss: 1.037948489189148,grad_norm: 0.8889179630042907, iteration: 274751
loss: 1.0022432804107666,grad_norm: 0.8603300685608185, iteration: 274752
loss: 0.9697571992874146,grad_norm: 0.7935271536689472, iteration: 274753
loss: 1.0337039232254028,grad_norm: 0.8949772475179126, iteration: 274754
loss: 1.020560622215271,grad_norm: 0.7754968604658559, iteration: 274755
loss: 1.0223866701126099,grad_norm: 0.8301001486281445, iteration: 274756
loss: 0.9788155555725098,grad_norm: 0.8125870294735265, iteration: 274757
loss: 0.9740062952041626,grad_norm: 0.7842960834223937, iteration: 274758
loss: 1.0039767026901245,grad_norm: 0.90802501243994, iteration: 274759
loss: 0.994615375995636,grad_norm: 0.9622842692600755, iteration: 274760
loss: 0.9903255701065063,grad_norm: 0.816420951639354, iteration: 274761
loss: 1.0074189901351929,grad_norm: 0.6921807135249141, iteration: 274762
loss: 1.1207854747772217,grad_norm: 0.9999991515331211, iteration: 274763
loss: 0.9885520339012146,grad_norm: 0.8000432870551519, iteration: 274764
loss: 0.9916777014732361,grad_norm: 0.8397267458033186, iteration: 274765
loss: 0.9768518209457397,grad_norm: 0.7676137647744332, iteration: 274766
loss: 0.9709373116493225,grad_norm: 0.8865351529008736, iteration: 274767
loss: 1.0615394115447998,grad_norm: 0.9999994413835842, iteration: 274768
loss: 0.9779160022735596,grad_norm: 0.8571941058109622, iteration: 274769
loss: 1.0215535163879395,grad_norm: 0.8672018523033356, iteration: 274770
loss: 0.9897946119308472,grad_norm: 0.8667358429952663, iteration: 274771
loss: 1.022162914276123,grad_norm: 0.9387463742808062, iteration: 274772
loss: 1.0133453607559204,grad_norm: 0.8740990814945695, iteration: 274773
loss: 0.9884434342384338,grad_norm: 0.7096158156185981, iteration: 274774
loss: 0.9826799035072327,grad_norm: 0.9608646676369896, iteration: 274775
loss: 0.9824142456054688,grad_norm: 0.7637836461447788, iteration: 274776
loss: 1.0342025756835938,grad_norm: 0.999999108008316, iteration: 274777
loss: 1.010237455368042,grad_norm: 0.938009792120242, iteration: 274778
loss: 1.0277924537658691,grad_norm: 0.9999991641481641, iteration: 274779
loss: 1.046557068824768,grad_norm: 0.9445251445604312, iteration: 274780
loss: 1.014968752861023,grad_norm: 0.7847938994098927, iteration: 274781
loss: 0.9931557774543762,grad_norm: 0.9999990734833252, iteration: 274782
loss: 1.0159202814102173,grad_norm: 0.8093714926399579, iteration: 274783
loss: 0.9787486791610718,grad_norm: 0.8800738372962792, iteration: 274784
loss: 1.014407753944397,grad_norm: 0.8369925017135075, iteration: 274785
loss: 1.0443637371063232,grad_norm: 0.9999998378986402, iteration: 274786
loss: 0.9837040901184082,grad_norm: 0.9319057196477902, iteration: 274787
loss: 0.99259352684021,grad_norm: 0.8812319611827962, iteration: 274788
loss: 1.013360857963562,grad_norm: 0.9334880332316392, iteration: 274789
loss: 1.020552396774292,grad_norm: 0.9335139106142017, iteration: 274790
loss: 0.9921537041664124,grad_norm: 0.927591202507081, iteration: 274791
loss: 0.9773225784301758,grad_norm: 0.7789773582229965, iteration: 274792
loss: 1.0162701606750488,grad_norm: 0.9793736721230271, iteration: 274793
loss: 1.0012494325637817,grad_norm: 0.8275807454467, iteration: 274794
loss: 0.9734362363815308,grad_norm: 0.8633632400448524, iteration: 274795
loss: 0.9858436584472656,grad_norm: 0.8996495582892462, iteration: 274796
loss: 1.0244202613830566,grad_norm: 0.9999990949942512, iteration: 274797
loss: 0.9698138236999512,grad_norm: 0.9208615020526943, iteration: 274798
loss: 0.9722285270690918,grad_norm: 0.8791202120644148, iteration: 274799
loss: 0.9865110516548157,grad_norm: 0.9999989904980919, iteration: 274800
loss: 1.0013591051101685,grad_norm: 0.9999991473730341, iteration: 274801
loss: 1.050767421722412,grad_norm: 0.999999015023285, iteration: 274802
loss: 0.9894173741340637,grad_norm: 0.9577888691632263, iteration: 274803
loss: 0.988808274269104,grad_norm: 0.6621843958657341, iteration: 274804
loss: 0.9835255742073059,grad_norm: 0.9999993450319177, iteration: 274805
loss: 0.9809032082557678,grad_norm: 0.8841831429262677, iteration: 274806
loss: 1.024566650390625,grad_norm: 0.8204014926311225, iteration: 274807
loss: 1.039726972579956,grad_norm: 0.8344574772516192, iteration: 274808
loss: 0.9514968991279602,grad_norm: 0.9055173013631046, iteration: 274809
loss: 1.0389409065246582,grad_norm: 0.9075756921043375, iteration: 274810
loss: 0.9966475367546082,grad_norm: 0.7693744820836929, iteration: 274811
loss: 0.9885190725326538,grad_norm: 0.9999991011057215, iteration: 274812
loss: 0.9848812818527222,grad_norm: 0.9999991446980254, iteration: 274813
loss: 1.0763229131698608,grad_norm: 0.9999990125726653, iteration: 274814
loss: 1.005530595779419,grad_norm: 0.8021440034442798, iteration: 274815
loss: 0.9473440647125244,grad_norm: 0.9748650289555089, iteration: 274816
loss: 0.9913212060928345,grad_norm: 0.8528793670906257, iteration: 274817
loss: 1.0092202425003052,grad_norm: 0.8956672645147149, iteration: 274818
loss: 0.9365994334220886,grad_norm: 0.9999989882514354, iteration: 274819
loss: 1.020622968673706,grad_norm: 0.941466686035104, iteration: 274820
loss: 1.0077412128448486,grad_norm: 0.8393962066340864, iteration: 274821
loss: 1.162584662437439,grad_norm: 0.999999826304944, iteration: 274822
loss: 1.06199049949646,grad_norm: 0.9999995135572802, iteration: 274823
loss: 1.1241090297698975,grad_norm: 0.8729494605476539, iteration: 274824
loss: 0.9565327167510986,grad_norm: 0.7598075665233524, iteration: 274825
loss: 1.0240641832351685,grad_norm: 0.9820730307410634, iteration: 274826
loss: 1.0098388195037842,grad_norm: 0.8852211560315041, iteration: 274827
loss: 0.9816218018531799,grad_norm: 0.9999991103459757, iteration: 274828
loss: 1.0375022888183594,grad_norm: 0.9999999895792494, iteration: 274829
loss: 1.0404078960418701,grad_norm: 0.9355028788167387, iteration: 274830
loss: 1.0982065200805664,grad_norm: 0.8544233798071571, iteration: 274831
loss: 1.0113555192947388,grad_norm: 0.7776061344880176, iteration: 274832
loss: 0.951652467250824,grad_norm: 0.7914182877210946, iteration: 274833
loss: 0.9731385707855225,grad_norm: 0.8432855176032096, iteration: 274834
loss: 0.9459590911865234,grad_norm: 0.8988978187130813, iteration: 274835
loss: 1.0358366966247559,grad_norm: 0.8429884845198855, iteration: 274836
loss: 0.9878767728805542,grad_norm: 0.8179881895187021, iteration: 274837
loss: 0.9755128622055054,grad_norm: 0.999999098603666, iteration: 274838
loss: 1.0257542133331299,grad_norm: 0.8235424732446841, iteration: 274839
loss: 1.0787714719772339,grad_norm: 0.9999997208413126, iteration: 274840
loss: 1.0731070041656494,grad_norm: 0.9694519270256712, iteration: 274841
loss: 1.0154224634170532,grad_norm: 0.8333485418411465, iteration: 274842
loss: 1.0030817985534668,grad_norm: 0.8140671227487049, iteration: 274843
loss: 1.006597638130188,grad_norm: 0.743908775825966, iteration: 274844
loss: 1.033189296722412,grad_norm: 0.9999992531252283, iteration: 274845
loss: 0.9866000413894653,grad_norm: 0.977806039754979, iteration: 274846
loss: 1.0022571086883545,grad_norm: 0.9375466581764734, iteration: 274847
loss: 1.0123540163040161,grad_norm: 0.9707471706844107, iteration: 274848
loss: 1.0371291637420654,grad_norm: 0.8258316596748355, iteration: 274849
loss: 0.9956766963005066,grad_norm: 0.7334981300452579, iteration: 274850
loss: 0.9950692653656006,grad_norm: 0.7980455729197152, iteration: 274851
loss: 1.0475478172302246,grad_norm: 0.9999994541128007, iteration: 274852
loss: 0.9738634824752808,grad_norm: 0.8044118446716547, iteration: 274853
loss: 1.107491374015808,grad_norm: 0.9999999554652773, iteration: 274854
loss: 1.002810001373291,grad_norm: 0.8226460110387485, iteration: 274855
loss: 1.000472903251648,grad_norm: 0.8870113188725856, iteration: 274856
loss: 0.9780617356300354,grad_norm: 0.7919556695641069, iteration: 274857
loss: 0.9885229468345642,grad_norm: 0.9397485917919121, iteration: 274858
loss: 1.0426567792892456,grad_norm: 0.9999990582520825, iteration: 274859
loss: 1.0193595886230469,grad_norm: 0.9999991043561577, iteration: 274860
loss: 1.0282020568847656,grad_norm: 0.8836590429960351, iteration: 274861
loss: 0.9985597133636475,grad_norm: 0.9715204647319321, iteration: 274862
loss: 1.0006916522979736,grad_norm: 0.9999990830638944, iteration: 274863
loss: 1.022234559059143,grad_norm: 0.7902364030504905, iteration: 274864
loss: 1.0006885528564453,grad_norm: 0.7832950601120309, iteration: 274865
loss: 0.9812456369400024,grad_norm: 0.9392186807491741, iteration: 274866
loss: 1.0406121015548706,grad_norm: 0.788869694685092, iteration: 274867
loss: 1.0196219682693481,grad_norm: 0.7640705381284079, iteration: 274868
loss: 0.998029351234436,grad_norm: 0.9999990614976391, iteration: 274869
loss: 1.0399229526519775,grad_norm: 0.9999989942273947, iteration: 274870
loss: 0.9851556420326233,grad_norm: 0.8121467322830251, iteration: 274871
loss: 1.026513695716858,grad_norm: 0.7831046170323906, iteration: 274872
loss: 1.0018742084503174,grad_norm: 0.9999990101284896, iteration: 274873
loss: 0.9601321816444397,grad_norm: 0.9789670811144519, iteration: 274874
loss: 1.000010371208191,grad_norm: 0.9700413293477841, iteration: 274875
loss: 1.0184807777404785,grad_norm: 0.9999991858354529, iteration: 274876
loss: 0.9572181105613708,grad_norm: 0.9999991377246894, iteration: 274877
loss: 0.9579339623451233,grad_norm: 0.9298707535216598, iteration: 274878
loss: 0.9696572422981262,grad_norm: 0.99999906241901, iteration: 274879
loss: 1.0634063482284546,grad_norm: 0.9999990275039015, iteration: 274880
loss: 0.9771857261657715,grad_norm: 0.8116217898708162, iteration: 274881
loss: 0.9570268988609314,grad_norm: 0.8487491505332045, iteration: 274882
loss: 1.000578761100769,grad_norm: 0.8882348607261625, iteration: 274883
loss: 1.0310657024383545,grad_norm: 0.9999991064226187, iteration: 274884
loss: 0.9752027988433838,grad_norm: 0.7806607703845492, iteration: 274885
loss: 1.0514013767242432,grad_norm: 1.0000000827897118, iteration: 274886
loss: 1.0075767040252686,grad_norm: 0.7765118565785947, iteration: 274887
loss: 1.091120719909668,grad_norm: 0.977799648519628, iteration: 274888
loss: 1.0369235277175903,grad_norm: 0.9999992190693933, iteration: 274889
loss: 0.9764056205749512,grad_norm: 0.8014941620877319, iteration: 274890
loss: 0.9587220549583435,grad_norm: 0.9999989836920243, iteration: 274891
loss: 0.9788357019424438,grad_norm: 0.8797469617608604, iteration: 274892
loss: 1.060288906097412,grad_norm: 0.8795038080226628, iteration: 274893
loss: 1.0604115724563599,grad_norm: 0.9999991151688699, iteration: 274894
loss: 1.132213830947876,grad_norm: 0.9999997517642901, iteration: 274895
loss: 1.0160341262817383,grad_norm: 0.9364272519749631, iteration: 274896
loss: 0.9729524850845337,grad_norm: 0.7851171794921995, iteration: 274897
loss: 1.0052863359451294,grad_norm: 0.9606680184869292, iteration: 274898
loss: 1.0187126398086548,grad_norm: 0.9296556077352365, iteration: 274899
loss: 0.9882663488388062,grad_norm: 0.932349465990913, iteration: 274900
loss: 1.0179723501205444,grad_norm: 0.8103138012989912, iteration: 274901
loss: 1.0325384140014648,grad_norm: 0.917265373262577, iteration: 274902
loss: 0.9898535013198853,grad_norm: 0.8952225306183131, iteration: 274903
loss: 1.0026462078094482,grad_norm: 0.72748545241406, iteration: 274904
loss: 1.0090034008026123,grad_norm: 0.9999991085948999, iteration: 274905
loss: 1.0699392557144165,grad_norm: 0.7212335412708605, iteration: 274906
loss: 0.9908584952354431,grad_norm: 0.8689540487421851, iteration: 274907
loss: 1.0163739919662476,grad_norm: 0.9999992075936975, iteration: 274908
loss: 0.998215913772583,grad_norm: 0.8630023792543464, iteration: 274909
loss: 1.0042757987976074,grad_norm: 0.7574951684563052, iteration: 274910
loss: 0.9643590450286865,grad_norm: 0.9083905442204632, iteration: 274911
loss: 1.0342520475387573,grad_norm: 0.9150763429459453, iteration: 274912
loss: 1.018336534500122,grad_norm: 0.7776365033637322, iteration: 274913
loss: 1.0157458782196045,grad_norm: 0.895571362655949, iteration: 274914
loss: 1.0326210260391235,grad_norm: 0.9333202177023228, iteration: 274915
loss: 0.9554292559623718,grad_norm: 0.9708492051545761, iteration: 274916
loss: 0.9846571087837219,grad_norm: 0.9385685077746262, iteration: 274917
loss: 1.0223801136016846,grad_norm: 0.8965503204246342, iteration: 274918
loss: 1.0097874402999878,grad_norm: 0.9522646634288644, iteration: 274919
loss: 0.9962241053581238,grad_norm: 0.8610023004488766, iteration: 274920
loss: 0.9679877758026123,grad_norm: 0.9429139143581179, iteration: 274921
loss: 1.0187979936599731,grad_norm: 0.8519938439145667, iteration: 274922
loss: 1.0168882608413696,grad_norm: 0.7623775983837494, iteration: 274923
loss: 0.9733366966247559,grad_norm: 0.8109925248399825, iteration: 274924
loss: 0.9860524535179138,grad_norm: 0.9999992324886219, iteration: 274925
loss: 0.9935747981071472,grad_norm: 0.9999990895380323, iteration: 274926
loss: 0.9761718511581421,grad_norm: 0.7585150354130674, iteration: 274927
loss: 0.9919254779815674,grad_norm: 0.9348360949370522, iteration: 274928
loss: 1.0275790691375732,grad_norm: 0.7854290163260054, iteration: 274929
loss: 1.0213894844055176,grad_norm: 0.739929480891227, iteration: 274930
loss: 0.9921280741691589,grad_norm: 0.9650211574594915, iteration: 274931
loss: 0.9630908966064453,grad_norm: 0.9027301687433337, iteration: 274932
loss: 0.9868465662002563,grad_norm: 0.939682805488476, iteration: 274933
loss: 1.0162376165390015,grad_norm: 0.7780785849100006, iteration: 274934
loss: 0.9896107912063599,grad_norm: 0.9650022548333018, iteration: 274935
loss: 1.034050464630127,grad_norm: 0.776684298187631, iteration: 274936
loss: 0.9652406573295593,grad_norm: 0.7861451872203498, iteration: 274937
loss: 1.0064311027526855,grad_norm: 0.6649879780448583, iteration: 274938
loss: 0.9773836135864258,grad_norm: 0.8419196088426592, iteration: 274939
loss: 1.005699634552002,grad_norm: 0.9564000040233451, iteration: 274940
loss: 0.956063985824585,grad_norm: 0.7516867219326833, iteration: 274941
loss: 0.9921932220458984,grad_norm: 0.8875497282409174, iteration: 274942
loss: 0.991669774055481,grad_norm: 0.9576037040750068, iteration: 274943
loss: 0.9877750277519226,grad_norm: 0.7431663711711197, iteration: 274944
loss: 1.0384409427642822,grad_norm: 0.9122042581980149, iteration: 274945
loss: 0.9692386388778687,grad_norm: 0.8230161838262918, iteration: 274946
loss: 1.0161279439926147,grad_norm: 0.9556440130926382, iteration: 274947
loss: 0.9997076392173767,grad_norm: 0.9542975922090672, iteration: 274948
loss: 0.9521908164024353,grad_norm: 0.9270668075489648, iteration: 274949
loss: 1.0858190059661865,grad_norm: 0.9999992126387145, iteration: 274950
loss: 1.0252113342285156,grad_norm: 0.7666243707825762, iteration: 274951
loss: 0.9815919399261475,grad_norm: 0.8716658353903928, iteration: 274952
loss: 1.0196154117584229,grad_norm: 0.9999991649247781, iteration: 274953
loss: 0.9935811758041382,grad_norm: 0.825912354013911, iteration: 274954
loss: 1.0148532390594482,grad_norm: 0.9993220099115866, iteration: 274955
loss: 1.0134767293930054,grad_norm: 0.7299202435981187, iteration: 274956
loss: 0.9818569421768188,grad_norm: 0.9817172621983725, iteration: 274957
loss: 0.9601143598556519,grad_norm: 0.9999991370915366, iteration: 274958
loss: 0.9902143478393555,grad_norm: 0.9887659362885435, iteration: 274959
loss: 0.9960744976997375,grad_norm: 0.7556884701841603, iteration: 274960
loss: 0.9799795150756836,grad_norm: 0.7725681814380675, iteration: 274961
loss: 1.0202631950378418,grad_norm: 0.9098858649078395, iteration: 274962
loss: 0.9940953254699707,grad_norm: 0.8834213715808142, iteration: 274963
loss: 1.0076262950897217,grad_norm: 0.8762005529721019, iteration: 274964
loss: 1.0249698162078857,grad_norm: 0.764336358055561, iteration: 274965
loss: 0.9866721630096436,grad_norm: 0.9999991437293382, iteration: 274966
loss: 1.0019621849060059,grad_norm: 0.7944124466210277, iteration: 274967
loss: 1.0190463066101074,grad_norm: 0.9488185476908938, iteration: 274968
loss: 0.9979192614555359,grad_norm: 0.7971726104854924, iteration: 274969
loss: 1.0236330032348633,grad_norm: 0.8557995403668848, iteration: 274970
loss: 0.9589601755142212,grad_norm: 0.964172573828258, iteration: 274971
loss: 1.0818921327590942,grad_norm: 0.8371299494076216, iteration: 274972
loss: 1.0101690292358398,grad_norm: 0.7978613803585537, iteration: 274973
loss: 1.0126166343688965,grad_norm: 0.8843279499730335, iteration: 274974
loss: 1.024113655090332,grad_norm: 0.724575373126566, iteration: 274975
loss: 0.9809361696243286,grad_norm: 0.749257439336471, iteration: 274976
loss: 0.9951618313789368,grad_norm: 0.8520759122824758, iteration: 274977
loss: 1.0033856630325317,grad_norm: 0.9551998898949716, iteration: 274978
loss: 1.007360816001892,grad_norm: 0.7549004578517639, iteration: 274979
loss: 0.9706131219863892,grad_norm: 0.9999991263889237, iteration: 274980
loss: 0.9719932079315186,grad_norm: 0.9305773090778807, iteration: 274981
loss: 1.0383859872817993,grad_norm: 0.9999990879501383, iteration: 274982
loss: 1.0232676267623901,grad_norm: 0.9515195536461153, iteration: 274983
loss: 1.0051565170288086,grad_norm: 0.691675910690626, iteration: 274984
loss: 1.0202316045761108,grad_norm: 0.8675131290574609, iteration: 274985
loss: 0.9877710342407227,grad_norm: 0.9531098493237857, iteration: 274986
loss: 1.0369176864624023,grad_norm: 0.9999994330155563, iteration: 274987
loss: 0.9471026062965393,grad_norm: 0.8960857961247153, iteration: 274988
loss: 0.976098895072937,grad_norm: 0.9507222416584605, iteration: 274989
loss: 0.9881213307380676,grad_norm: 0.7867491581701208, iteration: 274990
loss: 0.9782764911651611,grad_norm: 0.9561841889661199, iteration: 274991
loss: 0.9960172772407532,grad_norm: 0.7918983501251139, iteration: 274992
loss: 0.9942526817321777,grad_norm: 0.76152306567981, iteration: 274993
loss: 0.9982195496559143,grad_norm: 0.7885807772627175, iteration: 274994
loss: 1.042903184890747,grad_norm: 0.9999990643489415, iteration: 274995
loss: 0.9936511516571045,grad_norm: 0.878433480317547, iteration: 274996
loss: 0.9956215023994446,grad_norm: 0.7944059841282666, iteration: 274997
loss: 0.9853412508964539,grad_norm: 0.894335877856649, iteration: 274998
loss: 1.0174428224563599,grad_norm: 0.7809346226449362, iteration: 274999
loss: 0.9941580295562744,grad_norm: 0.7438675226041027, iteration: 275000
loss: 1.069631814956665,grad_norm: 0.9999990370154683, iteration: 275001
loss: 0.9798175096511841,grad_norm: 0.9563574952266657, iteration: 275002
loss: 0.9638736844062805,grad_norm: 0.9690983799674369, iteration: 275003
loss: 0.9679512977600098,grad_norm: 0.9498873127031104, iteration: 275004
loss: 1.0280274152755737,grad_norm: 0.9999990564431772, iteration: 275005
loss: 0.9965553879737854,grad_norm: 0.8068617506465764, iteration: 275006
loss: 1.0175200700759888,grad_norm: 0.8444566689974068, iteration: 275007
loss: 0.9871566295623779,grad_norm: 0.8555976007244357, iteration: 275008
loss: 0.9924633502960205,grad_norm: 0.8881611667790772, iteration: 275009
loss: 0.9869595170021057,grad_norm: 0.7731079572297049, iteration: 275010
loss: 1.0135655403137207,grad_norm: 0.9086039902840114, iteration: 275011
loss: 0.9841789603233337,grad_norm: 0.9087565785112678, iteration: 275012
loss: 0.9953562617301941,grad_norm: 0.9614224448749599, iteration: 275013
loss: 0.9634606242179871,grad_norm: 0.833657026107935, iteration: 275014
loss: 0.9737057089805603,grad_norm: 0.9999988537594888, iteration: 275015
loss: 1.0049195289611816,grad_norm: 0.7522205478145384, iteration: 275016
loss: 0.9853712916374207,grad_norm: 0.9476604192593558, iteration: 275017
loss: 0.969147801399231,grad_norm: 0.7316844543937439, iteration: 275018
loss: 0.9782714247703552,grad_norm: 0.8634472884101849, iteration: 275019
loss: 1.0144802331924438,grad_norm: 0.9999991764147577, iteration: 275020
loss: 0.9787682890892029,grad_norm: 0.911858633932054, iteration: 275021
loss: 0.9932870864868164,grad_norm: 0.7552502346978617, iteration: 275022
loss: 1.0596247911453247,grad_norm: 0.9894690116731326, iteration: 275023
loss: 0.9808186292648315,grad_norm: 0.9999990497515231, iteration: 275024
loss: 1.0493497848510742,grad_norm: 0.7966288913850421, iteration: 275025
loss: 1.017722487449646,grad_norm: 0.9999990682515081, iteration: 275026
loss: 0.9941628575325012,grad_norm: 0.8809668332707388, iteration: 275027
loss: 0.9744974374771118,grad_norm: 0.8641466435916988, iteration: 275028
loss: 0.9918674826622009,grad_norm: 0.9421400913126922, iteration: 275029
loss: 0.9932249188423157,grad_norm: 0.9999993564165681, iteration: 275030
loss: 1.0305750370025635,grad_norm: 0.8115149496315951, iteration: 275031
loss: 0.9960591793060303,grad_norm: 0.8145325599028904, iteration: 275032
loss: 0.9614580869674683,grad_norm: 0.9999991252142922, iteration: 275033
loss: 1.0172456502914429,grad_norm: 0.9999991672454216, iteration: 275034
loss: 0.9953502416610718,grad_norm: 0.8296301729469658, iteration: 275035
loss: 0.9684287905693054,grad_norm: 0.7397156553125593, iteration: 275036
loss: 1.0432425737380981,grad_norm: 0.7523903766746706, iteration: 275037
loss: 0.9966563582420349,grad_norm: 0.7867743186521339, iteration: 275038
loss: 0.9894289970397949,grad_norm: 0.9225841979709125, iteration: 275039
loss: 0.9974097609519958,grad_norm: 0.7537502193049059, iteration: 275040
loss: 1.0394319295883179,grad_norm: 0.99550013506834, iteration: 275041
loss: 1.0023084878921509,grad_norm: 0.8688451174047165, iteration: 275042
loss: 1.0208752155303955,grad_norm: 0.8990182578726141, iteration: 275043
loss: 0.9981850981712341,grad_norm: 0.8700747651837792, iteration: 275044
loss: 1.0157506465911865,grad_norm: 0.8341002677524173, iteration: 275045
loss: 1.0178353786468506,grad_norm: 0.8688519712092421, iteration: 275046
loss: 1.0006489753723145,grad_norm: 0.9999993332090811, iteration: 275047
loss: 0.9934018850326538,grad_norm: 0.9456371767515419, iteration: 275048
loss: 1.000859260559082,grad_norm: 0.9486612729237479, iteration: 275049
loss: 0.9772058725357056,grad_norm: 0.8521549603808121, iteration: 275050
loss: 0.9971785545349121,grad_norm: 0.7457210206903112, iteration: 275051
loss: 0.9928880333900452,grad_norm: 0.8552726658058323, iteration: 275052
loss: 0.98771733045578,grad_norm: 0.7549365874363255, iteration: 275053
loss: 0.9957753419876099,grad_norm: 0.8415326709632918, iteration: 275054
loss: 0.9716873168945312,grad_norm: 0.8618161520275086, iteration: 275055
loss: 1.0088056325912476,grad_norm: 0.9233641146513663, iteration: 275056
loss: 0.9864658117294312,grad_norm: 0.7234413300992159, iteration: 275057
loss: 0.988828182220459,grad_norm: 0.806807858389466, iteration: 275058
loss: 0.9744263291358948,grad_norm: 0.9999996754610336, iteration: 275059
loss: 0.9850494265556335,grad_norm: 0.9811019216490466, iteration: 275060
loss: 1.0575480461120605,grad_norm: 0.9999998761282384, iteration: 275061
loss: 1.040735125541687,grad_norm: 0.9999995324901267, iteration: 275062
loss: 1.005968451499939,grad_norm: 0.9999991495460198, iteration: 275063
loss: 0.9967712759971619,grad_norm: 0.786752936790161, iteration: 275064
loss: 1.01616370677948,grad_norm: 0.9999992096467929, iteration: 275065
loss: 1.1227742433547974,grad_norm: 0.9761653186650181, iteration: 275066
loss: 0.9815112352371216,grad_norm: 0.9824048334501927, iteration: 275067
loss: 0.9859135150909424,grad_norm: 0.7962134601234563, iteration: 275068
loss: 0.9881011247634888,grad_norm: 0.9999990836484969, iteration: 275069
loss: 1.0126439332962036,grad_norm: 0.8235983559482701, iteration: 275070
loss: 1.0221266746520996,grad_norm: 0.9999993551724138, iteration: 275071
loss: 1.099244236946106,grad_norm: 0.9999991789073059, iteration: 275072
loss: 1.0899171829223633,grad_norm: 0.9999991236716705, iteration: 275073
loss: 0.9981864094734192,grad_norm: 0.9999997966414265, iteration: 275074
loss: 0.9980108141899109,grad_norm: 0.9999992015571432, iteration: 275075
loss: 1.1003674268722534,grad_norm: 0.9999995536937136, iteration: 275076
loss: 0.9934107065200806,grad_norm: 0.9999989893807001, iteration: 275077
loss: 0.9632337689399719,grad_norm: 0.9111794710048015, iteration: 275078
loss: 1.0511634349822998,grad_norm: 1.0000000395309752, iteration: 275079
loss: 1.0042214393615723,grad_norm: 0.8108082675439502, iteration: 275080
loss: 1.1220818758010864,grad_norm: 0.999999529670025, iteration: 275081
loss: 0.9395797848701477,grad_norm: 0.7433888902446987, iteration: 275082
loss: 0.9889979958534241,grad_norm: 0.9030040202140387, iteration: 275083
loss: 0.9956523180007935,grad_norm: 0.7871413700970692, iteration: 275084
loss: 0.9652320742607117,grad_norm: 0.8800680513772646, iteration: 275085
loss: 0.9981381893157959,grad_norm: 0.8496464379386784, iteration: 275086
loss: 0.976861298084259,grad_norm: 0.8641595605649374, iteration: 275087
loss: 0.971538782119751,grad_norm: 0.8926550172268194, iteration: 275088
loss: 1.0184364318847656,grad_norm: 0.8014966807826982, iteration: 275089
loss: 1.0025875568389893,grad_norm: 0.9999990493900248, iteration: 275090
loss: 0.991030216217041,grad_norm: 0.9090350425915559, iteration: 275091
loss: 1.0242303609848022,grad_norm: 0.8267948436755654, iteration: 275092
loss: 1.0080631971359253,grad_norm: 0.876959810757251, iteration: 275093
loss: 0.989762544631958,grad_norm: 0.7659614548030304, iteration: 275094
loss: 0.9909635186195374,grad_norm: 0.9281723950306141, iteration: 275095
loss: 1.042953372001648,grad_norm: 0.9178890684114472, iteration: 275096
loss: 1.0198231935501099,grad_norm: 0.9142458279012369, iteration: 275097
loss: 0.9768932461738586,grad_norm: 0.9999991329539047, iteration: 275098
loss: 1.0048160552978516,grad_norm: 0.7896821870624032, iteration: 275099
loss: 0.9702649712562561,grad_norm: 0.8775967225024036, iteration: 275100
loss: 1.0750113725662231,grad_norm: 0.9513963879533099, iteration: 275101
loss: 0.9743275046348572,grad_norm: 0.9747929583514098, iteration: 275102
loss: 1.0213298797607422,grad_norm: 0.9340719826251462, iteration: 275103
loss: 0.9632432460784912,grad_norm: 0.6832420656401984, iteration: 275104
loss: 0.9839328527450562,grad_norm: 0.9011155094528089, iteration: 275105
loss: 0.9724169969558716,grad_norm: 0.8765123085405935, iteration: 275106
loss: 1.011904001235962,grad_norm: 0.7898632079693158, iteration: 275107
loss: 1.0411415100097656,grad_norm: 0.9999998016441469, iteration: 275108
loss: 0.9913036823272705,grad_norm: 0.803185487740435, iteration: 275109
loss: 1.0042163133621216,grad_norm: 0.8545738092110337, iteration: 275110
loss: 1.00259268283844,grad_norm: 0.9801405147333739, iteration: 275111
loss: 1.0272092819213867,grad_norm: 0.9999989609739157, iteration: 275112
loss: 1.010690689086914,grad_norm: 0.8253273694656909, iteration: 275113
loss: 1.0120691061019897,grad_norm: 0.8176676675198684, iteration: 275114
loss: 1.0282914638519287,grad_norm: 0.9999991042605935, iteration: 275115
loss: 1.0311226844787598,grad_norm: 0.9999999067972841, iteration: 275116
loss: 0.9666592478752136,grad_norm: 0.750283577394083, iteration: 275117
loss: 0.9799015522003174,grad_norm: 0.7669818528115118, iteration: 275118
loss: 1.0384944677352905,grad_norm: 0.8333592933988859, iteration: 275119
loss: 1.0105550289154053,grad_norm: 0.760179626994674, iteration: 275120
loss: 0.9669464826583862,grad_norm: 0.7762308068329656, iteration: 275121
loss: 0.9915406703948975,grad_norm: 0.8450151383646699, iteration: 275122
loss: 1.0209556818008423,grad_norm: 0.9738642509150848, iteration: 275123
loss: 0.9781490564346313,grad_norm: 0.7134919335537655, iteration: 275124
loss: 0.9750045537948608,grad_norm: 0.8946198397814791, iteration: 275125
loss: 0.9825951457023621,grad_norm: 0.9999990472543384, iteration: 275126
loss: 1.093907356262207,grad_norm: 0.9999999349783917, iteration: 275127
loss: 0.981156051158905,grad_norm: 0.8045786527801202, iteration: 275128
loss: 1.00131356716156,grad_norm: 0.7700073546399868, iteration: 275129
loss: 0.9827479124069214,grad_norm: 0.88927876332056, iteration: 275130
loss: 0.9925130605697632,grad_norm: 0.98841342957368, iteration: 275131
loss: 0.9832805395126343,grad_norm: 0.9233406592202106, iteration: 275132
loss: 1.0154454708099365,grad_norm: 0.9313478657190134, iteration: 275133
loss: 1.0601531267166138,grad_norm: 0.8878157780476672, iteration: 275134
loss: 1.0450822114944458,grad_norm: 0.9999992835490426, iteration: 275135
loss: 0.9998896718025208,grad_norm: 0.9201006027428449, iteration: 275136
loss: 1.0030326843261719,grad_norm: 0.9650703675877174, iteration: 275137
loss: 1.0314619541168213,grad_norm: 0.7888035106480377, iteration: 275138
loss: 0.9781844019889832,grad_norm: 0.8587935762350791, iteration: 275139
loss: 0.9860293865203857,grad_norm: 0.9999992505701252, iteration: 275140
loss: 1.0524544715881348,grad_norm: 0.9999992619744875, iteration: 275141
loss: 0.9645745754241943,grad_norm: 0.933564616336935, iteration: 275142
loss: 1.0911060571670532,grad_norm: 0.7905949781477841, iteration: 275143
loss: 1.0291060209274292,grad_norm: 0.8629100281090999, iteration: 275144
loss: 0.9892632365226746,grad_norm: 0.8317911032820391, iteration: 275145
loss: 1.0049372911453247,grad_norm: 0.8285642737946118, iteration: 275146
loss: 0.9813604950904846,grad_norm: 0.6758768385649118, iteration: 275147
loss: 1.0126824378967285,grad_norm: 0.8343764370059611, iteration: 275148
loss: 1.0198777914047241,grad_norm: 0.9999992313561668, iteration: 275149
loss: 1.057267189025879,grad_norm: 0.9999994922245, iteration: 275150
loss: 1.0275211334228516,grad_norm: 0.8877452655434629, iteration: 275151
loss: 0.9703172445297241,grad_norm: 0.7389880308024823, iteration: 275152
loss: 0.9789572358131409,grad_norm: 0.8631906957961051, iteration: 275153
loss: 1.011841058731079,grad_norm: 0.9999990803742497, iteration: 275154
loss: 0.9982593059539795,grad_norm: 0.8463545488856616, iteration: 275155
loss: 0.9633674621582031,grad_norm: 0.740927252496984, iteration: 275156
loss: 1.0070583820343018,grad_norm: 0.9083224036711812, iteration: 275157
loss: 1.0561891794204712,grad_norm: 0.8237465917337609, iteration: 275158
loss: 1.0061558485031128,grad_norm: 0.8618182094904355, iteration: 275159
loss: 0.988061249256134,grad_norm: 0.8293960135901195, iteration: 275160
loss: 1.0029493570327759,grad_norm: 0.8073964459231173, iteration: 275161
loss: 1.0065933465957642,grad_norm: 0.9050945295356246, iteration: 275162
loss: 0.999626636505127,grad_norm: 0.9536406363726251, iteration: 275163
loss: 1.020763874053955,grad_norm: 0.9781694301142053, iteration: 275164
loss: 0.9970001578330994,grad_norm: 0.9999990977541043, iteration: 275165
loss: 1.0315779447555542,grad_norm: 0.97999305697689, iteration: 275166
loss: 0.9871934652328491,grad_norm: 0.8289695035177921, iteration: 275167
loss: 1.041288137435913,grad_norm: 0.9824820859019721, iteration: 275168
loss: 1.0059775114059448,grad_norm: 0.9999990419622767, iteration: 275169
loss: 0.9895286560058594,grad_norm: 0.9999991162818932, iteration: 275170
loss: 1.010695457458496,grad_norm: 0.8458017836995538, iteration: 275171
loss: 0.9878586530685425,grad_norm: 0.9999991240354371, iteration: 275172
loss: 1.0020787715911865,grad_norm: 0.9114544749479535, iteration: 275173
loss: 1.022337555885315,grad_norm: 0.7588847373218437, iteration: 275174
loss: 1.0255869626998901,grad_norm: 0.9070960377414408, iteration: 275175
loss: 0.9763635396957397,grad_norm: 0.9494319798212977, iteration: 275176
loss: 0.9717063903808594,grad_norm: 0.7699014114156222, iteration: 275177
loss: 1.020868182182312,grad_norm: 0.8467657992694003, iteration: 275178
loss: 0.9951741695404053,grad_norm: 0.842804222785937, iteration: 275179
loss: 0.9706921577453613,grad_norm: 0.9623256014274053, iteration: 275180
loss: 1.0389233827590942,grad_norm: 0.8803630498928254, iteration: 275181
loss: 1.0201873779296875,grad_norm: 0.999999247441788, iteration: 275182
loss: 0.983042299747467,grad_norm: 0.9302307254634696, iteration: 275183
loss: 0.982541561126709,grad_norm: 0.9999993131168294, iteration: 275184
loss: 1.0280954837799072,grad_norm: 0.9452860012214866, iteration: 275185
loss: 0.9924439787864685,grad_norm: 0.9981947608972663, iteration: 275186
loss: 1.001663327217102,grad_norm: 0.8792238376195222, iteration: 275187
loss: 0.9835317730903625,grad_norm: 0.8879373328847401, iteration: 275188
loss: 0.9982787370681763,grad_norm: 0.9232554558801578, iteration: 275189
loss: 1.0144681930541992,grad_norm: 0.8621819806710742, iteration: 275190
loss: 1.0356619358062744,grad_norm: 0.9694421702607645, iteration: 275191
loss: 0.994518518447876,grad_norm: 0.9471809673728134, iteration: 275192
loss: 1.0269277095794678,grad_norm: 0.8573639774850347, iteration: 275193
loss: 0.9723370671272278,grad_norm: 0.9221018745459816, iteration: 275194
loss: 0.9895274639129639,grad_norm: 0.8161613242231035, iteration: 275195
loss: 1.0612869262695312,grad_norm: 0.9644064082529296, iteration: 275196
loss: 1.014432430267334,grad_norm: 0.9202881637265338, iteration: 275197
loss: 1.016011357307434,grad_norm: 0.9999989918426972, iteration: 275198
loss: 0.970372200012207,grad_norm: 0.7594672569063246, iteration: 275199
loss: 1.0019080638885498,grad_norm: 0.9078625563079488, iteration: 275200
loss: 0.9795300364494324,grad_norm: 0.9253368806797271, iteration: 275201
loss: 0.9874621033668518,grad_norm: 0.9999990060100395, iteration: 275202
loss: 0.9956694841384888,grad_norm: 0.8390849570773604, iteration: 275203
loss: 1.00637686252594,grad_norm: 0.7913893595983971, iteration: 275204
loss: 1.0150586366653442,grad_norm: 0.8511224122387328, iteration: 275205
loss: 1.044567584991455,grad_norm: 0.9453563496822159, iteration: 275206
loss: 0.9892975687980652,grad_norm: 0.846871153851872, iteration: 275207
loss: 1.0260027647018433,grad_norm: 0.9044515749483807, iteration: 275208
loss: 0.9975588917732239,grad_norm: 0.7711406437366375, iteration: 275209
loss: 1.0095361471176147,grad_norm: 0.8908797628334396, iteration: 275210
loss: 0.9773385524749756,grad_norm: 0.9329021960782053, iteration: 275211
loss: 1.0132570266723633,grad_norm: 0.970248050949822, iteration: 275212
loss: 0.9757979512214661,grad_norm: 0.9999990967059831, iteration: 275213
loss: 0.9825085997581482,grad_norm: 0.8665540856417322, iteration: 275214
loss: 0.9967440366744995,grad_norm: 0.8837621940647992, iteration: 275215
loss: 0.9982541799545288,grad_norm: 0.9999992502750538, iteration: 275216
loss: 0.9790738224983215,grad_norm: 0.9308947511018776, iteration: 275217
loss: 1.0379737615585327,grad_norm: 0.9999990234435606, iteration: 275218
loss: 1.0429058074951172,grad_norm: 0.9838553010652409, iteration: 275219
loss: 0.9767744541168213,grad_norm: 0.7804213762331196, iteration: 275220
loss: 0.9866876006126404,grad_norm: 0.9999991827362167, iteration: 275221
loss: 0.9871442317962646,grad_norm: 0.7587214559317346, iteration: 275222
loss: 1.0108437538146973,grad_norm: 0.7537838275910899, iteration: 275223
loss: 0.9922799468040466,grad_norm: 0.8192019784593904, iteration: 275224
loss: 1.0016874074935913,grad_norm: 0.7846493347973024, iteration: 275225
loss: 0.9993258714675903,grad_norm: 0.8282377908777537, iteration: 275226
loss: 0.9928110837936401,grad_norm: 0.7776075646723285, iteration: 275227
loss: 1.062136173248291,grad_norm: 0.9867223113919114, iteration: 275228
loss: 1.0923022031784058,grad_norm: 0.8236223991950172, iteration: 275229
loss: 1.0581644773483276,grad_norm: 0.9999991935015915, iteration: 275230
loss: 1.0396755933761597,grad_norm: 0.9104877656572367, iteration: 275231
loss: 0.99537593126297,grad_norm: 0.7930107241208461, iteration: 275232
loss: 0.9600961208343506,grad_norm: 0.9467992938393376, iteration: 275233
loss: 0.9978089332580566,grad_norm: 0.9468151745195713, iteration: 275234
loss: 0.96466463804245,grad_norm: 0.8546009237917268, iteration: 275235
loss: 1.0044214725494385,grad_norm: 0.9999992024456681, iteration: 275236
loss: 1.0160266160964966,grad_norm: 0.9676856193387261, iteration: 275237
loss: 0.9940018653869629,grad_norm: 0.9999991370519287, iteration: 275238
loss: 1.0144137144088745,grad_norm: 0.999999078156806, iteration: 275239
loss: 0.9929013252258301,grad_norm: 0.7257863962917859, iteration: 275240
loss: 0.9909588694572449,grad_norm: 0.9999992296389627, iteration: 275241
loss: 1.0380584001541138,grad_norm: 0.9999999091538361, iteration: 275242
loss: 0.9951282739639282,grad_norm: 0.8170576252728629, iteration: 275243
loss: 0.9502638578414917,grad_norm: 0.9999991364558123, iteration: 275244
loss: 0.9805391430854797,grad_norm: 0.9999991646444082, iteration: 275245
loss: 0.9762329459190369,grad_norm: 0.8655913937811902, iteration: 275246
loss: 0.9899384379386902,grad_norm: 0.8085871120197933, iteration: 275247
loss: 0.9764031767845154,grad_norm: 0.7612212619985343, iteration: 275248
loss: 0.9812506437301636,grad_norm: 0.7919812344136402, iteration: 275249
loss: 0.9892538189888,grad_norm: 0.9014602534056467, iteration: 275250
loss: 0.9919518232345581,grad_norm: 0.8428850836362578, iteration: 275251
loss: 0.9946194291114807,grad_norm: 0.7846179464197722, iteration: 275252
loss: 0.9871166944503784,grad_norm: 0.8241254073941799, iteration: 275253
loss: 1.0602407455444336,grad_norm: 0.9999994419865279, iteration: 275254
loss: 0.9537950158119202,grad_norm: 0.7126153220471121, iteration: 275255
loss: 1.0144755840301514,grad_norm: 0.8447386110368359, iteration: 275256
loss: 0.980596661567688,grad_norm: 0.8863106545083986, iteration: 275257
loss: 1.0445665121078491,grad_norm: 0.8269114450805525, iteration: 275258
loss: 1.025079369544983,grad_norm: 0.999999186799693, iteration: 275259
loss: 0.9866303205490112,grad_norm: 0.968005755134089, iteration: 275260
loss: 0.9702359437942505,grad_norm: 0.7684292359164319, iteration: 275261
loss: 1.0125126838684082,grad_norm: 0.923673302864295, iteration: 275262
loss: 1.0356196165084839,grad_norm: 0.9717537423996583, iteration: 275263
loss: 1.018022060394287,grad_norm: 0.9999991730227848, iteration: 275264
loss: 0.9771173596382141,grad_norm: 0.7946641160286992, iteration: 275265
loss: 0.9849390387535095,grad_norm: 0.9329712047135026, iteration: 275266
loss: 0.9918844103813171,grad_norm: 0.7752317144470592, iteration: 275267
loss: 1.0769182443618774,grad_norm: 0.9999995893053503, iteration: 275268
loss: 1.0245308876037598,grad_norm: 0.8913889878475995, iteration: 275269
loss: 0.9774264097213745,grad_norm: 0.9999990356906995, iteration: 275270
loss: 0.9566825032234192,grad_norm: 0.8731457230562562, iteration: 275271
loss: 0.9880384206771851,grad_norm: 0.7990202583858813, iteration: 275272
loss: 1.0086915493011475,grad_norm: 0.9389473495311377, iteration: 275273
loss: 1.0088130235671997,grad_norm: 0.880484148811503, iteration: 275274
loss: 1.0031358003616333,grad_norm: 0.8446543513122281, iteration: 275275
loss: 1.019730567932129,grad_norm: 0.9335160343446418, iteration: 275276
loss: 1.0199795961380005,grad_norm: 0.9999993649063028, iteration: 275277
loss: 1.0001821517944336,grad_norm: 0.8403198107165605, iteration: 275278
loss: 0.9948152899742126,grad_norm: 0.9999996534175739, iteration: 275279
loss: 1.0223294496536255,grad_norm: 0.8374922689676807, iteration: 275280
loss: 0.9745956063270569,grad_norm: 0.9999991254088293, iteration: 275281
loss: 0.9735490679740906,grad_norm: 0.8273413135467541, iteration: 275282
loss: 1.0058832168579102,grad_norm: 0.8118288495509899, iteration: 275283
loss: 1.0566383600234985,grad_norm: 0.9999990976689715, iteration: 275284
loss: 0.966893196105957,grad_norm: 0.6775035156797213, iteration: 275285
loss: 1.0231294631958008,grad_norm: 0.9673642704608874, iteration: 275286
loss: 0.9945344924926758,grad_norm: 0.9401959950216626, iteration: 275287
loss: 1.003125786781311,grad_norm: 0.9999990957301496, iteration: 275288
loss: 0.9775663614273071,grad_norm: 0.999999902870738, iteration: 275289
loss: 1.1769288778305054,grad_norm: 0.9999995944891678, iteration: 275290
loss: 1.0839067697525024,grad_norm: 0.9999996050030214, iteration: 275291
loss: 1.1802488565444946,grad_norm: 0.9999993738751792, iteration: 275292
loss: 1.2931314706802368,grad_norm: 0.9999995451596463, iteration: 275293
loss: 1.169097661972046,grad_norm: 0.9999998827900954, iteration: 275294
loss: 1.1939756870269775,grad_norm: 0.9999999218224016, iteration: 275295
loss: 1.1053677797317505,grad_norm: 0.9999994368861912, iteration: 275296
loss: 1.2510128021240234,grad_norm: 0.9999997862121334, iteration: 275297
loss: 1.0519251823425293,grad_norm: 0.99999971802603, iteration: 275298
loss: 1.0051923990249634,grad_norm: 0.7324255061694201, iteration: 275299
loss: 1.167807936668396,grad_norm: 0.9999998168030958, iteration: 275300
loss: 1.1513630151748657,grad_norm: 0.9999999657054947, iteration: 275301
loss: 1.2053340673446655,grad_norm: 0.999999518789006, iteration: 275302
loss: 1.0893319845199585,grad_norm: 1.0000000575923744, iteration: 275303
loss: 1.108400583267212,grad_norm: 0.9999998193225874, iteration: 275304
loss: 0.9670200943946838,grad_norm: 0.872500929170636, iteration: 275305
loss: 1.0559635162353516,grad_norm: 0.999999423080199, iteration: 275306
loss: 1.0230127573013306,grad_norm: 0.9999991115712146, iteration: 275307
loss: 1.182839035987854,grad_norm: 0.9999998892552757, iteration: 275308
loss: 1.1317224502563477,grad_norm: 0.9999999112627354, iteration: 275309
loss: 1.0411373376846313,grad_norm: 0.9999993651802915, iteration: 275310
loss: 1.1111189126968384,grad_norm: 0.999999338502244, iteration: 275311
loss: 1.1430737972259521,grad_norm: 0.999999850773265, iteration: 275312
loss: 1.1433335542678833,grad_norm: 0.9999996100817169, iteration: 275313
loss: 1.188780665397644,grad_norm: 0.9999996274099427, iteration: 275314
loss: 1.1932450532913208,grad_norm: 0.9999998750806204, iteration: 275315
loss: 1.173432469367981,grad_norm: 0.9999992923324257, iteration: 275316
loss: 1.0748945474624634,grad_norm: 0.8775163109612653, iteration: 275317
loss: 1.0206342935562134,grad_norm: 0.9999994823071425, iteration: 275318
loss: 1.1951547861099243,grad_norm: 0.9999998527911216, iteration: 275319
loss: 1.0512691736221313,grad_norm: 0.9999998227416608, iteration: 275320
loss: 1.102238655090332,grad_norm: 0.9999999033345377, iteration: 275321
loss: 1.0897711515426636,grad_norm: 0.9999996334980624, iteration: 275322
loss: 1.2009570598602295,grad_norm: 0.9999999660095251, iteration: 275323
loss: 1.1948686838150024,grad_norm: 0.9999998276015485, iteration: 275324
loss: 0.9996085166931152,grad_norm: 0.9999991525947922, iteration: 275325
loss: 1.2626183032989502,grad_norm: 0.9999996101848847, iteration: 275326
loss: 1.089974284172058,grad_norm: 0.9999993659116607, iteration: 275327
loss: 1.0782670974731445,grad_norm: 0.9999999939778408, iteration: 275328
loss: 1.2189748287200928,grad_norm: 0.9999999062363822, iteration: 275329
loss: 1.0019136667251587,grad_norm: 0.9999997246092276, iteration: 275330
loss: 1.0259835720062256,grad_norm: 0.9999995363974737, iteration: 275331
loss: 1.1247334480285645,grad_norm: 0.9999999578787565, iteration: 275332
loss: 0.9796007871627808,grad_norm: 0.8475576499183017, iteration: 275333
loss: 1.0119858980178833,grad_norm: 0.9255971687049311, iteration: 275334
loss: 1.0290945768356323,grad_norm: 0.9197681181808284, iteration: 275335
loss: 1.0747684240341187,grad_norm: 0.9999994041910629, iteration: 275336
loss: 0.9750078916549683,grad_norm: 0.7104437127549743, iteration: 275337
loss: 1.23130464553833,grad_norm: 0.9999996423855251, iteration: 275338
loss: 1.0086383819580078,grad_norm: 0.8259577830022024, iteration: 275339
loss: 1.000002145767212,grad_norm: 0.8628017057528489, iteration: 275340
loss: 1.224377989768982,grad_norm: 0.9999994623946574, iteration: 275341
loss: 1.0215942859649658,grad_norm: 0.9999991126910073, iteration: 275342
loss: 1.001834750175476,grad_norm: 0.9999996768217305, iteration: 275343
loss: 1.0469173192977905,grad_norm: 0.9119232616876376, iteration: 275344
loss: 1.0582575798034668,grad_norm: 0.9999991452399786, iteration: 275345
loss: 0.9937068819999695,grad_norm: 0.9999999339858103, iteration: 275346
loss: 0.99029940366745,grad_norm: 0.8604588035357831, iteration: 275347
loss: 1.049281358718872,grad_norm: 0.9999991804985429, iteration: 275348
loss: 0.9902229905128479,grad_norm: 0.987213415224413, iteration: 275349
loss: 1.122276782989502,grad_norm: 0.9999995537692704, iteration: 275350
loss: 0.9980638027191162,grad_norm: 0.9946692059701088, iteration: 275351
loss: 1.0069154500961304,grad_norm: 0.7514892568789389, iteration: 275352
loss: 1.0130738019943237,grad_norm: 0.9999992657129105, iteration: 275353
loss: 0.9965708255767822,grad_norm: 0.9615722443333753, iteration: 275354
loss: 1.1677260398864746,grad_norm: 0.9999993455878707, iteration: 275355
loss: 1.2105088233947754,grad_norm: 0.9999997188610621, iteration: 275356
loss: 1.0129534006118774,grad_norm: 0.9999998091919106, iteration: 275357
loss: 1.0091646909713745,grad_norm: 0.9999990557668572, iteration: 275358
loss: 0.9897375702857971,grad_norm: 0.7914914157890992, iteration: 275359
loss: 1.00962495803833,grad_norm: 0.9999996061636077, iteration: 275360
loss: 1.023511528968811,grad_norm: 0.9116602888805342, iteration: 275361
loss: 1.0314844846725464,grad_norm: 0.9537724482946391, iteration: 275362
loss: 0.9539292454719543,grad_norm: 0.8008018024809446, iteration: 275363
loss: 1.0204988718032837,grad_norm: 0.9999993551701274, iteration: 275364
loss: 1.2965751886367798,grad_norm: 0.9999999166579332, iteration: 275365
loss: 1.0894733667373657,grad_norm: 0.9999990297074564, iteration: 275366
loss: 1.0538336038589478,grad_norm: 0.9999995459900272, iteration: 275367
loss: 1.171064019203186,grad_norm: 0.9999990207771853, iteration: 275368
loss: 1.0080904960632324,grad_norm: 0.9999991310976183, iteration: 275369
loss: 1.0836790800094604,grad_norm: 0.9999998229228292, iteration: 275370
loss: 1.041099190711975,grad_norm: 0.9999990370199197, iteration: 275371
loss: 1.065877079963684,grad_norm: 0.8134569288148471, iteration: 275372
loss: 0.9515535235404968,grad_norm: 0.9174064649419066, iteration: 275373
loss: 1.0108418464660645,grad_norm: 0.9880606433397595, iteration: 275374
loss: 1.008470892906189,grad_norm: 0.9612221319011401, iteration: 275375
loss: 1.3335611820220947,grad_norm: 0.9999995731763757, iteration: 275376
loss: 1.0127233266830444,grad_norm: 0.9276219391084336, iteration: 275377
loss: 1.0136066675186157,grad_norm: 0.9424393591750588, iteration: 275378
loss: 1.0302512645721436,grad_norm: 0.9999990822823681, iteration: 275379
loss: 1.063375473022461,grad_norm: 0.9999992625742741, iteration: 275380
loss: 1.0160996913909912,grad_norm: 0.9191879332933602, iteration: 275381
loss: 0.998306930065155,grad_norm: 0.999999048139839, iteration: 275382
loss: 1.0306740999221802,grad_norm: 0.9999996958142906, iteration: 275383
loss: 1.0079686641693115,grad_norm: 1.0000001384552464, iteration: 275384
loss: 0.9865202903747559,grad_norm: 0.9999990732374184, iteration: 275385
loss: 0.9906882047653198,grad_norm: 0.9999994130764451, iteration: 275386
loss: 1.0034139156341553,grad_norm: 0.8214687812856217, iteration: 275387
loss: 1.0414128303527832,grad_norm: 0.8754604891621134, iteration: 275388
loss: 1.0094804763793945,grad_norm: 0.9999991094703518, iteration: 275389
loss: 1.1174407005310059,grad_norm: 0.9999993693788678, iteration: 275390
loss: 0.9761085510253906,grad_norm: 0.8159101484876183, iteration: 275391
loss: 1.0226035118103027,grad_norm: 0.999999109358508, iteration: 275392
loss: 1.152177095413208,grad_norm: 0.9999998722302886, iteration: 275393
loss: 1.0978549718856812,grad_norm: 0.982672788264139, iteration: 275394
loss: 0.974925696849823,grad_norm: 0.9999990283746576, iteration: 275395
loss: 1.12134850025177,grad_norm: 0.9999991417726136, iteration: 275396
loss: 0.9884225726127625,grad_norm: 0.8534549009844389, iteration: 275397
loss: 1.1547774076461792,grad_norm: 0.9999998945382245, iteration: 275398
loss: 1.0430505275726318,grad_norm: 0.9999998140402059, iteration: 275399
loss: 1.0423426628112793,grad_norm: 0.9079096195939563, iteration: 275400
loss: 1.0112128257751465,grad_norm: 0.7972324645504308, iteration: 275401
loss: 1.1544106006622314,grad_norm: 0.9999996771092909, iteration: 275402
loss: 1.0285264253616333,grad_norm: 0.999999049966116, iteration: 275403
loss: 0.9895505309104919,grad_norm: 0.9144407207554497, iteration: 275404
loss: 1.0101927518844604,grad_norm: 0.9999990491253917, iteration: 275405
loss: 1.0057783126831055,grad_norm: 0.9999992884587019, iteration: 275406
loss: 1.0292726755142212,grad_norm: 0.8271570722880601, iteration: 275407
loss: 1.0332481861114502,grad_norm: 0.7121853721610333, iteration: 275408
loss: 0.999407172203064,grad_norm: 0.9999990851066969, iteration: 275409
loss: 1.010515570640564,grad_norm: 0.9999994169829134, iteration: 275410
loss: 1.0024868249893188,grad_norm: 0.819958851888332, iteration: 275411
loss: 0.9796594977378845,grad_norm: 0.9610436243297688, iteration: 275412
loss: 1.029215931892395,grad_norm: 0.9165157536870515, iteration: 275413
loss: 0.9928438067436218,grad_norm: 0.8890260976924697, iteration: 275414
loss: 1.0793405771255493,grad_norm: 0.999999394984382, iteration: 275415
loss: 0.970306396484375,grad_norm: 0.9999991361654168, iteration: 275416
loss: 0.9997283816337585,grad_norm: 0.9999991670211908, iteration: 275417
loss: 0.9885737895965576,grad_norm: 0.8783632733503541, iteration: 275418
loss: 0.9758633375167847,grad_norm: 0.9999990414442935, iteration: 275419
loss: 0.9861811995506287,grad_norm: 0.9192975188821955, iteration: 275420
loss: 1.00064218044281,grad_norm: 0.9999991767606093, iteration: 275421
loss: 0.981988787651062,grad_norm: 0.8358019397361288, iteration: 275422
loss: 0.9693200588226318,grad_norm: 0.8544455367095083, iteration: 275423
loss: 1.0066719055175781,grad_norm: 0.9818617316353347, iteration: 275424
loss: 1.057366967201233,grad_norm: 0.9999991316644088, iteration: 275425
loss: 0.9969892501831055,grad_norm: 0.9999990847050773, iteration: 275426
loss: 1.00578773021698,grad_norm: 0.8278924228513396, iteration: 275427
loss: 1.016611933708191,grad_norm: 0.999999164704712, iteration: 275428
loss: 1.038348913192749,grad_norm: 0.9358393890090189, iteration: 275429
loss: 0.9922758936882019,grad_norm: 0.7892428331661262, iteration: 275430
loss: 1.0220822095870972,grad_norm: 0.9507719017818005, iteration: 275431
loss: 1.0021861791610718,grad_norm: 0.7844178518043211, iteration: 275432
loss: 0.9858826994895935,grad_norm: 0.7232533666039164, iteration: 275433
loss: 1.0437836647033691,grad_norm: 0.9999994005547966, iteration: 275434
loss: 1.0206869840621948,grad_norm: 0.802398542423367, iteration: 275435
loss: 0.9829631447792053,grad_norm: 0.8396972457513167, iteration: 275436
loss: 0.9717444777488708,grad_norm: 0.8004782116741226, iteration: 275437
loss: 1.046877145767212,grad_norm: 0.9999995592113258, iteration: 275438
loss: 1.0124808549880981,grad_norm: 0.99999915318008, iteration: 275439
loss: 0.9751582741737366,grad_norm: 0.9999990946647467, iteration: 275440
loss: 0.9846577644348145,grad_norm: 0.9188855138331683, iteration: 275441
loss: 1.087659478187561,grad_norm: 0.9999995029690354, iteration: 275442
loss: 1.0029473304748535,grad_norm: 0.8360436714134803, iteration: 275443
loss: 1.00848388671875,grad_norm: 0.803154258719699, iteration: 275444
loss: 1.018009901046753,grad_norm: 0.9178774215418743, iteration: 275445
loss: 1.0726886987686157,grad_norm: 0.9999992084275192, iteration: 275446
loss: 1.018120527267456,grad_norm: 0.7716527762271369, iteration: 275447
loss: 0.978061318397522,grad_norm: 0.80251292213513, iteration: 275448
loss: 1.0759211778640747,grad_norm: 0.9999995818700899, iteration: 275449
loss: 1.2012853622436523,grad_norm: 0.9999998117343593, iteration: 275450
loss: 0.9555624127388,grad_norm: 0.8337473605342675, iteration: 275451
loss: 1.0016499757766724,grad_norm: 0.9477895774551313, iteration: 275452
loss: 0.9653664827346802,grad_norm: 0.8659762670377903, iteration: 275453
loss: 0.99004727602005,grad_norm: 0.9999992128277565, iteration: 275454
loss: 1.0433074235916138,grad_norm: 0.9999999414037298, iteration: 275455
loss: 0.9986723065376282,grad_norm: 0.9623322102663912, iteration: 275456
loss: 1.0471692085266113,grad_norm: 0.7910061327594843, iteration: 275457
loss: 0.9951375722885132,grad_norm: 0.9134622420504633, iteration: 275458
loss: 1.0040560960769653,grad_norm: 0.8094170688829994, iteration: 275459
loss: 1.0053943395614624,grad_norm: 0.8134747862435898, iteration: 275460
loss: 0.9893879890441895,grad_norm: 0.950003127013097, iteration: 275461
loss: 1.0151360034942627,grad_norm: 0.9307586745454518, iteration: 275462
loss: 1.077337384223938,grad_norm: 0.9999991925566378, iteration: 275463
loss: 1.014096975326538,grad_norm: 0.9999991944211035, iteration: 275464
loss: 0.9481307864189148,grad_norm: 0.8943448401996491, iteration: 275465
loss: 1.1703073978424072,grad_norm: 0.9999998838346255, iteration: 275466
loss: 0.98731929063797,grad_norm: 0.9390499396018366, iteration: 275467
loss: 1.0187673568725586,grad_norm: 0.7897858380287549, iteration: 275468
loss: 1.002387285232544,grad_norm: 0.9347201212968259, iteration: 275469
loss: 0.9689223170280457,grad_norm: 0.9792575968725516, iteration: 275470
loss: 0.9942547678947449,grad_norm: 0.9999999087656966, iteration: 275471
loss: 1.0401285886764526,grad_norm: 0.9999990498029502, iteration: 275472
loss: 1.0660532712936401,grad_norm: 0.8521337085282926, iteration: 275473
loss: 1.0437830686569214,grad_norm: 0.9999994193843762, iteration: 275474
loss: 0.9865598082542419,grad_norm: 0.773920551831401, iteration: 275475
loss: 1.0262823104858398,grad_norm: 0.989620347649992, iteration: 275476
loss: 0.9656212329864502,grad_norm: 0.7539468484024269, iteration: 275477
loss: 1.0239665508270264,grad_norm: 0.8345602081343108, iteration: 275478
loss: 1.0123600959777832,grad_norm: 0.9999990022003722, iteration: 275479
loss: 1.007730484008789,grad_norm: 0.7679718934776518, iteration: 275480
loss: 0.9888485074043274,grad_norm: 0.9873887469941521, iteration: 275481
loss: 0.9925063252449036,grad_norm: 0.8982833381935155, iteration: 275482
loss: 1.0312424898147583,grad_norm: 0.9607385529357406, iteration: 275483
loss: 0.9973049759864807,grad_norm: 0.895777482525639, iteration: 275484
loss: 0.9955914616584778,grad_norm: 0.8002448062465122, iteration: 275485
loss: 0.9804356098175049,grad_norm: 0.9999992684482643, iteration: 275486
loss: 1.0063414573669434,grad_norm: 0.9754054410309555, iteration: 275487
loss: 0.9612972140312195,grad_norm: 0.9999990508867111, iteration: 275488
loss: 1.000954270362854,grad_norm: 0.8248669118310366, iteration: 275489
loss: 1.0001693964004517,grad_norm: 0.718182036833862, iteration: 275490
loss: 0.9565684199333191,grad_norm: 0.8932923793219737, iteration: 275491
loss: 1.0496907234191895,grad_norm: 0.9999998871139971, iteration: 275492
loss: 0.9853091239929199,grad_norm: 0.7954651859818266, iteration: 275493
loss: 0.956895649433136,grad_norm: 0.8713994592333058, iteration: 275494
loss: 1.046774983406067,grad_norm: 1.0000000912291611, iteration: 275495
loss: 1.053722620010376,grad_norm: 0.9999998505241302, iteration: 275496
loss: 0.9816562533378601,grad_norm: 0.8031498269461907, iteration: 275497
loss: 0.9937980771064758,grad_norm: 0.9999992629080738, iteration: 275498
loss: 1.0050357580184937,grad_norm: 0.8860614631220427, iteration: 275499
loss: 1.0440770387649536,grad_norm: 0.9999991337175804, iteration: 275500
loss: 1.0209237337112427,grad_norm: 0.9999998733798383, iteration: 275501
loss: 1.009831190109253,grad_norm: 0.8695679910506364, iteration: 275502
loss: 1.0087885856628418,grad_norm: 0.9008905327908924, iteration: 275503
loss: 1.0303659439086914,grad_norm: 0.8736517254567514, iteration: 275504
loss: 1.0154814720153809,grad_norm: 0.9082991148256965, iteration: 275505
loss: 0.9742716550827026,grad_norm: 0.8539802912893132, iteration: 275506
loss: 1.0503993034362793,grad_norm: 0.9999990045965909, iteration: 275507
loss: 1.054770827293396,grad_norm: 0.9999995707070406, iteration: 275508
loss: 1.0039868354797363,grad_norm: 0.8842336418431334, iteration: 275509
loss: 1.0013771057128906,grad_norm: 0.7880674018689355, iteration: 275510
loss: 1.0186330080032349,grad_norm: 0.9999991497591831, iteration: 275511
loss: 1.0299561023712158,grad_norm: 0.9999994409904877, iteration: 275512
loss: 0.9627719521522522,grad_norm: 0.8913021061235404, iteration: 275513
loss: 0.9981657862663269,grad_norm: 0.8928369902833803, iteration: 275514
loss: 0.9931833744049072,grad_norm: 0.7388711543361138, iteration: 275515
loss: 0.9932562708854675,grad_norm: 0.8896203380716653, iteration: 275516
loss: 1.0738080739974976,grad_norm: 0.9999993799084848, iteration: 275517
loss: 0.9782489538192749,grad_norm: 0.9999991742833529, iteration: 275518
loss: 0.9822516441345215,grad_norm: 0.7840706046619299, iteration: 275519
loss: 0.997414231300354,grad_norm: 0.9339548542623525, iteration: 275520
loss: 0.981741726398468,grad_norm: 0.8983490866680472, iteration: 275521
loss: 0.9743596315383911,grad_norm: 0.9999992162859932, iteration: 275522
loss: 1.0275095701217651,grad_norm: 0.8111284846181973, iteration: 275523
loss: 0.9718276858329773,grad_norm: 0.8418328086052048, iteration: 275524
loss: 1.0076900720596313,grad_norm: 0.808235220895311, iteration: 275525
loss: 1.050378441810608,grad_norm: 0.9999999016055311, iteration: 275526
loss: 0.9902418851852417,grad_norm: 0.999999131106916, iteration: 275527
loss: 1.0137871503829956,grad_norm: 0.6949641575678591, iteration: 275528
loss: 1.0458838939666748,grad_norm: 0.9999993281015783, iteration: 275529
loss: 1.0105754137039185,grad_norm: 0.7932021271898468, iteration: 275530
loss: 0.9892387986183167,grad_norm: 0.8223038634941647, iteration: 275531
loss: 0.9968530535697937,grad_norm: 0.8578880726245399, iteration: 275532
loss: 1.0365132093429565,grad_norm: 0.99999912168514, iteration: 275533
loss: 0.9694504141807556,grad_norm: 0.8169827121221732, iteration: 275534
loss: 1.0292441844940186,grad_norm: 0.9999991138095647, iteration: 275535
loss: 0.9875794053077698,grad_norm: 0.8702417065116999, iteration: 275536
loss: 1.0183125734329224,grad_norm: 0.9999992033286896, iteration: 275537
loss: 0.9942675232887268,grad_norm: 0.7835766051211156, iteration: 275538
loss: 1.030845046043396,grad_norm: 0.8797526555867293, iteration: 275539
loss: 0.9838184714317322,grad_norm: 0.735345841132255, iteration: 275540
loss: 1.1446092128753662,grad_norm: 0.9999991465112711, iteration: 275541
loss: 1.0008875131607056,grad_norm: 0.8200260508462651, iteration: 275542
loss: 1.0919114351272583,grad_norm: 0.9999991059425722, iteration: 275543
loss: 1.0714563131332397,grad_norm: 0.9999991325621277, iteration: 275544
loss: 0.9794161915779114,grad_norm: 0.9999994231516457, iteration: 275545
loss: 0.982879638671875,grad_norm: 0.8643822780020829, iteration: 275546
loss: 1.0769850015640259,grad_norm: 0.9999991869041192, iteration: 275547
loss: 1.0074563026428223,grad_norm: 0.9578703797302377, iteration: 275548
loss: 0.9758397340774536,grad_norm: 0.9649077125485556, iteration: 275549
loss: 1.0018879175186157,grad_norm: 0.8245346285312689, iteration: 275550
loss: 0.9503241777420044,grad_norm: 0.9106835687729453, iteration: 275551
loss: 0.9853023886680603,grad_norm: 0.9056164604846931, iteration: 275552
loss: 1.0237128734588623,grad_norm: 0.9423194444530208, iteration: 275553
loss: 1.064329743385315,grad_norm: 0.99999909665769, iteration: 275554
loss: 1.0082436800003052,grad_norm: 0.920768499008475, iteration: 275555
loss: 0.9817789196968079,grad_norm: 0.9999997821742219, iteration: 275556
loss: 1.0125527381896973,grad_norm: 0.7744860047249212, iteration: 275557
loss: 0.9650768041610718,grad_norm: 0.9425005450166332, iteration: 275558
loss: 1.032200574874878,grad_norm: 0.902363994213893, iteration: 275559
loss: 1.0290075540542603,grad_norm: 0.8738238207562399, iteration: 275560
loss: 0.9852950572967529,grad_norm: 0.9756307075602465, iteration: 275561
loss: 0.9952094554901123,grad_norm: 0.8334014827342301, iteration: 275562
loss: 1.0369470119476318,grad_norm: 0.8873640493329875, iteration: 275563
loss: 0.981982409954071,grad_norm: 0.76648819600311, iteration: 275564
loss: 0.9914661645889282,grad_norm: 0.8710163309925794, iteration: 275565
loss: 1.016746997833252,grad_norm: 1.0000000646927953, iteration: 275566
loss: 1.0274900197982788,grad_norm: 0.752375483359604, iteration: 275567
loss: 1.0070770978927612,grad_norm: 0.8757440125242941, iteration: 275568
loss: 1.003584861755371,grad_norm: 0.7626781820876322, iteration: 275569
loss: 0.9704808592796326,grad_norm: 0.831664422061959, iteration: 275570
loss: 1.0059784650802612,grad_norm: 0.9999992794204831, iteration: 275571
loss: 1.0492184162139893,grad_norm: 0.9999994415763706, iteration: 275572
loss: 1.035420298576355,grad_norm: 0.9999996162856647, iteration: 275573
loss: 1.0451287031173706,grad_norm: 0.9999995168263601, iteration: 275574
loss: 0.9781750440597534,grad_norm: 0.9761813818278228, iteration: 275575
loss: 0.9697081446647644,grad_norm: 0.9999993918553138, iteration: 275576
loss: 1.0426527261734009,grad_norm: 0.9173484785277878, iteration: 275577
loss: 1.03673255443573,grad_norm: 0.9717845886482213, iteration: 275578
loss: 1.0263406038284302,grad_norm: 0.9999991869913937, iteration: 275579
loss: 1.0096282958984375,grad_norm: 0.7080937218405885, iteration: 275580
loss: 1.16840660572052,grad_norm: 0.9999998702741899, iteration: 275581
loss: 1.0080795288085938,grad_norm: 0.7673115504525367, iteration: 275582
loss: 1.0047974586486816,grad_norm: 0.8854575788491457, iteration: 275583
loss: 1.0111247301101685,grad_norm: 0.850338216723783, iteration: 275584
loss: 1.0402958393096924,grad_norm: 0.9999990050019194, iteration: 275585
loss: 1.0339335203170776,grad_norm: 0.7831547869507492, iteration: 275586
loss: 0.9658975005149841,grad_norm: 0.9060255434340051, iteration: 275587
loss: 0.9771618247032166,grad_norm: 0.9227060961630957, iteration: 275588
loss: 1.020247220993042,grad_norm: 0.9693458065061171, iteration: 275589
loss: 1.0084229707717896,grad_norm: 0.9859260690272507, iteration: 275590
loss: 0.9752281308174133,grad_norm: 0.8159486383270093, iteration: 275591
loss: 0.9780014157295227,grad_norm: 0.999999023715736, iteration: 275592
loss: 1.0092718601226807,grad_norm: 0.8271932511735516, iteration: 275593
loss: 1.131029725074768,grad_norm: 0.9999999038279087, iteration: 275594
loss: 1.020288348197937,grad_norm: 0.9201790307485325, iteration: 275595
loss: 1.0020009279251099,grad_norm: 0.8995977081204057, iteration: 275596
loss: 0.9980322122573853,grad_norm: 0.9496072763621772, iteration: 275597
loss: 1.107110619544983,grad_norm: 0.9999993846668023, iteration: 275598
loss: 1.0398340225219727,grad_norm: 0.7647152921036545, iteration: 275599
loss: 0.9903486371040344,grad_norm: 0.9999990776538857, iteration: 275600
loss: 1.0015424489974976,grad_norm: 0.8664891193654244, iteration: 275601
loss: 0.9952352046966553,grad_norm: 0.9999992124485971, iteration: 275602
loss: 0.9629761576652527,grad_norm: 0.9713160577212567, iteration: 275603
loss: 0.967070460319519,grad_norm: 0.7244285898222979, iteration: 275604
loss: 0.9945733547210693,grad_norm: 0.8660712851305225, iteration: 275605
loss: 1.0162163972854614,grad_norm: 0.9999996005849309, iteration: 275606
loss: 0.9699143767356873,grad_norm: 0.9239928686677003, iteration: 275607
loss: 1.0356578826904297,grad_norm: 0.9999999357151088, iteration: 275608
loss: 1.0438716411590576,grad_norm: 0.750002642455563, iteration: 275609
loss: 0.978293240070343,grad_norm: 0.8424368811496521, iteration: 275610
loss: 0.9399382472038269,grad_norm: 0.8288365211155387, iteration: 275611
loss: 1.0185667276382446,grad_norm: 0.8199740626119411, iteration: 275612
loss: 1.0449891090393066,grad_norm: 0.999999286216589, iteration: 275613
loss: 1.1011847257614136,grad_norm: 0.9999993223116701, iteration: 275614
loss: 1.0632061958312988,grad_norm: 0.9999996037466221, iteration: 275615
loss: 1.2740998268127441,grad_norm: 0.9999994521405916, iteration: 275616
loss: 1.0659122467041016,grad_norm: 1.0000000139987135, iteration: 275617
loss: 1.0245693922042847,grad_norm: 0.9155112437103355, iteration: 275618
loss: 1.0512336492538452,grad_norm: 0.9999991562707388, iteration: 275619
loss: 1.0307317972183228,grad_norm: 0.9188904777995949, iteration: 275620
loss: 0.9982530474662781,grad_norm: 0.950273820095212, iteration: 275621
loss: 1.0081547498703003,grad_norm: 0.7881337411915565, iteration: 275622
loss: 0.988831102848053,grad_norm: 0.9788743102417182, iteration: 275623
loss: 1.1330554485321045,grad_norm: 0.9999994618464757, iteration: 275624
loss: 1.0178996324539185,grad_norm: 0.9999998542196785, iteration: 275625
loss: 0.9916349053382874,grad_norm: 0.9999995632535802, iteration: 275626
loss: 1.022587537765503,grad_norm: 0.9093915444055982, iteration: 275627
loss: 0.9968551993370056,grad_norm: 0.9260825190205897, iteration: 275628
loss: 1.041452169418335,grad_norm: 0.9326170072412683, iteration: 275629
loss: 0.9642513990402222,grad_norm: 0.7938172205149664, iteration: 275630
loss: 1.169738531112671,grad_norm: 0.9999996532800934, iteration: 275631
loss: 1.0595877170562744,grad_norm: 0.9999992335731919, iteration: 275632
loss: 1.0096192359924316,grad_norm: 0.8895628342769698, iteration: 275633
loss: 1.0453604459762573,grad_norm: 0.8167950921533794, iteration: 275634
loss: 0.9755532741546631,grad_norm: 0.6662706704212588, iteration: 275635
loss: 1.056739091873169,grad_norm: 0.9999991707810925, iteration: 275636
loss: 1.121110200881958,grad_norm: 0.9999995765573306, iteration: 275637
loss: 0.9844133853912354,grad_norm: 0.9999992196579686, iteration: 275638
loss: 1.037941336631775,grad_norm: 0.9741805959733162, iteration: 275639
loss: 1.039832353591919,grad_norm: 0.9691693994303243, iteration: 275640
loss: 1.1058214902877808,grad_norm: 0.9999991606958886, iteration: 275641
loss: 0.9906875491142273,grad_norm: 0.9183181626101191, iteration: 275642
loss: 0.9854631423950195,grad_norm: 0.8870203929140633, iteration: 275643
loss: 1.0217163562774658,grad_norm: 0.9999990959288798, iteration: 275644
loss: 0.9936888217926025,grad_norm: 0.8100482226926962, iteration: 275645
loss: 1.0606721639633179,grad_norm: 0.7943945389590913, iteration: 275646
loss: 1.0563290119171143,grad_norm: 0.9609711430542771, iteration: 275647
loss: 1.0810288190841675,grad_norm: 0.9186143307277663, iteration: 275648
loss: 0.9837663173675537,grad_norm: 0.9002159794168152, iteration: 275649
loss: 1.024031400680542,grad_norm: 0.9999996543182162, iteration: 275650
loss: 0.9814283847808838,grad_norm: 0.9069430151860921, iteration: 275651
loss: 1.0294396877288818,grad_norm: 0.878786958168229, iteration: 275652
loss: 1.0598458051681519,grad_norm: 0.999999203190992, iteration: 275653
loss: 1.0421732664108276,grad_norm: 0.8774445623053309, iteration: 275654
loss: 0.9828341603279114,grad_norm: 0.9120140361699521, iteration: 275655
loss: 1.014876365661621,grad_norm: 0.6524637871517988, iteration: 275656
loss: 0.9779741168022156,grad_norm: 0.8598966492941633, iteration: 275657
loss: 0.9948830008506775,grad_norm: 0.9334694125174957, iteration: 275658
loss: 1.041062831878662,grad_norm: 0.7816633722417916, iteration: 275659
loss: 1.0194379091262817,grad_norm: 0.7852710990597664, iteration: 275660
loss: 1.0040818452835083,grad_norm: 0.8401138152843955, iteration: 275661
loss: 1.0047272443771362,grad_norm: 0.8828819150693209, iteration: 275662
loss: 1.0314544439315796,grad_norm: 0.8959022363916491, iteration: 275663
loss: 1.0247650146484375,grad_norm: 0.9999991003805381, iteration: 275664
loss: 1.0835928916931152,grad_norm: 0.8235451482505092, iteration: 275665
loss: 1.0155774354934692,grad_norm: 0.7588010489433937, iteration: 275666
loss: 1.0063409805297852,grad_norm: 0.9999994842624882, iteration: 275667
loss: 0.9770460724830627,grad_norm: 0.9999991553384474, iteration: 275668
loss: 1.094315528869629,grad_norm: 0.9999996572795489, iteration: 275669
loss: 1.1072593927383423,grad_norm: 0.9999990623270808, iteration: 275670
loss: 1.0112805366516113,grad_norm: 0.9246925147928561, iteration: 275671
loss: 1.023141860961914,grad_norm: 0.9483549237223897, iteration: 275672
loss: 0.9684227108955383,grad_norm: 0.7945321225697161, iteration: 275673
loss: 0.9969494342803955,grad_norm: 0.9331310170250409, iteration: 275674
loss: 1.063664197921753,grad_norm: 0.9999998990194218, iteration: 275675
loss: 0.9621376991271973,grad_norm: 0.8235971735659356, iteration: 275676
loss: 1.0610761642456055,grad_norm: 0.999999076434038, iteration: 275677
loss: 1.0533607006072998,grad_norm: 0.9999993824957923, iteration: 275678
loss: 0.9727647304534912,grad_norm: 0.948865564812258, iteration: 275679
loss: 1.1296521425247192,grad_norm: 0.9999990329510176, iteration: 275680
loss: 1.0035642385482788,grad_norm: 0.8164671304910389, iteration: 275681
loss: 1.0129159688949585,grad_norm: 0.9999992119221598, iteration: 275682
loss: 1.0011601448059082,grad_norm: 0.9999991716907196, iteration: 275683
loss: 1.0122302770614624,grad_norm: 0.9600631044872278, iteration: 275684
loss: 1.0213556289672852,grad_norm: 0.9495476615607441, iteration: 275685
loss: 1.0152863264083862,grad_norm: 0.9981898157026712, iteration: 275686
loss: 1.037582278251648,grad_norm: 0.946201285434414, iteration: 275687
loss: 1.0120975971221924,grad_norm: 0.787147233931976, iteration: 275688
loss: 0.9863668084144592,grad_norm: 0.7622567623039981, iteration: 275689
loss: 1.0196001529693604,grad_norm: 0.9999991241309422, iteration: 275690
loss: 1.062730312347412,grad_norm: 0.999999387643532, iteration: 275691
loss: 1.0037872791290283,grad_norm: 0.890823586144171, iteration: 275692
loss: 1.0019534826278687,grad_norm: 0.9999992640373472, iteration: 275693
loss: 1.061012625694275,grad_norm: 0.9999995423673153, iteration: 275694
loss: 0.9953295588493347,grad_norm: 0.8763896523388623, iteration: 275695
loss: 1.0010945796966553,grad_norm: 0.938972146040323, iteration: 275696
loss: 1.017917275428772,grad_norm: 0.9999997077744692, iteration: 275697
loss: 0.9941128492355347,grad_norm: 0.9070205949044375, iteration: 275698
loss: 1.0251063108444214,grad_norm: 0.7354059288670527, iteration: 275699
loss: 1.048579216003418,grad_norm: 0.999999711702878, iteration: 275700
loss: 1.0292267799377441,grad_norm: 0.7939754047796316, iteration: 275701
loss: 0.9771656394004822,grad_norm: 0.8507378203812951, iteration: 275702
loss: 1.0751930475234985,grad_norm: 0.999998993702447, iteration: 275703
loss: 1.034867525100708,grad_norm: 0.9672025844877417, iteration: 275704
loss: 0.9729820489883423,grad_norm: 0.9999991513577983, iteration: 275705
loss: 1.0341371297836304,grad_norm: 0.999999604036092, iteration: 275706
loss: 1.1053282022476196,grad_norm: 0.9999997194031955, iteration: 275707
loss: 0.9991235733032227,grad_norm: 0.7681522853591877, iteration: 275708
loss: 1.026689052581787,grad_norm: 0.999999146301391, iteration: 275709
loss: 1.0254690647125244,grad_norm: 0.8243726595151696, iteration: 275710
loss: 1.0115578174591064,grad_norm: 0.9452441766362853, iteration: 275711
loss: 0.9744842052459717,grad_norm: 0.838012815261958, iteration: 275712
loss: 1.0097090005874634,grad_norm: 0.940423571958394, iteration: 275713
loss: 1.0902656316757202,grad_norm: 0.9999995890023088, iteration: 275714
loss: 1.0300803184509277,grad_norm: 0.9100796275848114, iteration: 275715
loss: 1.0815532207489014,grad_norm: 0.9999995579313024, iteration: 275716
loss: 0.9970040917396545,grad_norm: 0.6544515088750271, iteration: 275717
loss: 1.1100260019302368,grad_norm: 0.9999991720334261, iteration: 275718
loss: 1.1274763345718384,grad_norm: 0.9999997832839791, iteration: 275719
loss: 0.9936329126358032,grad_norm: 0.9999989910593088, iteration: 275720
loss: 1.028611183166504,grad_norm: 0.99999985710487, iteration: 275721
loss: 1.0012826919555664,grad_norm: 0.9999996582213039, iteration: 275722
loss: 1.0043264627456665,grad_norm: 0.8614329348176042, iteration: 275723
loss: 1.0523014068603516,grad_norm: 0.9999992838171943, iteration: 275724
loss: 1.0569514036178589,grad_norm: 0.8151618629225194, iteration: 275725
loss: 1.1201872825622559,grad_norm: 0.9095100050604055, iteration: 275726
loss: 1.0496784448623657,grad_norm: 0.9999998727660417, iteration: 275727
loss: 1.0908234119415283,grad_norm: 0.984716104155842, iteration: 275728
loss: 1.0403411388397217,grad_norm: 0.7833750569211236, iteration: 275729
loss: 1.0709483623504639,grad_norm: 0.9999993159377363, iteration: 275730
loss: 1.0404279232025146,grad_norm: 0.9999993237954803, iteration: 275731
loss: 1.0470367670059204,grad_norm: 0.8732333668225645, iteration: 275732
loss: 1.0517312288284302,grad_norm: 0.9999991587812391, iteration: 275733
loss: 1.010198712348938,grad_norm: 0.8579986445713327, iteration: 275734
loss: 1.0921130180358887,grad_norm: 0.9036264128918485, iteration: 275735
loss: 1.0271304845809937,grad_norm: 0.9999991545648235, iteration: 275736
loss: 1.0018582344055176,grad_norm: 0.7847425701521682, iteration: 275737
loss: 0.9677086472511292,grad_norm: 0.9281201297967098, iteration: 275738
loss: 1.0090358257293701,grad_norm: 0.9999995046624921, iteration: 275739
loss: 1.2177854776382446,grad_norm: 1.0000000182851005, iteration: 275740
loss: 1.0821222066879272,grad_norm: 0.9999993579160632, iteration: 275741
loss: 0.9764149188995361,grad_norm: 0.8457164272841341, iteration: 275742
loss: 0.987798273563385,grad_norm: 0.9247252019647755, iteration: 275743
loss: 1.057078242301941,grad_norm: 0.9999992035052061, iteration: 275744
loss: 1.020828127861023,grad_norm: 0.9999995167386672, iteration: 275745
loss: 1.0361301898956299,grad_norm: 0.9976028891773149, iteration: 275746
loss: 1.0827178955078125,grad_norm: 0.9999991300337326, iteration: 275747
loss: 1.1505342721939087,grad_norm: 0.9999995669053998, iteration: 275748
loss: 1.083254337310791,grad_norm: 0.9999999442574572, iteration: 275749
loss: 1.0302654504776,grad_norm: 0.7928090125076234, iteration: 275750
loss: 1.0697928667068481,grad_norm: 0.9999997195615241, iteration: 275751
loss: 1.0442991256713867,grad_norm: 0.99999966443936, iteration: 275752
loss: 1.1230963468551636,grad_norm: 0.999998945876848, iteration: 275753
loss: 1.05791437625885,grad_norm: 0.9999993524502875, iteration: 275754
loss: 0.9511971473693848,grad_norm: 0.9680880965544538, iteration: 275755
loss: 1.0088788270950317,grad_norm: 0.9272138616971357, iteration: 275756
loss: 1.0161510705947876,grad_norm: 0.9124900769882809, iteration: 275757
loss: 1.0096373558044434,grad_norm: 0.9999991036511264, iteration: 275758
loss: 1.0225074291229248,grad_norm: 0.9999997350873504, iteration: 275759
loss: 1.0164660215377808,grad_norm: 0.8756915118348818, iteration: 275760
loss: 1.023152470588684,grad_norm: 0.9767651762020756, iteration: 275761
loss: 1.0929539203643799,grad_norm: 0.9822195454998747, iteration: 275762
loss: 1.065820574760437,grad_norm: 0.9800507154351369, iteration: 275763
loss: 1.0137466192245483,grad_norm: 0.8724209355091103, iteration: 275764
loss: 1.0241572856903076,grad_norm: 0.904116931279896, iteration: 275765
loss: 1.0168981552124023,grad_norm: 0.999999065463066, iteration: 275766
loss: 1.0570167303085327,grad_norm: 0.9999996474957877, iteration: 275767
loss: 0.9949032068252563,grad_norm: 0.9975832297333971, iteration: 275768
loss: 1.008373498916626,grad_norm: 0.8854560032541698, iteration: 275769
loss: 1.0994707345962524,grad_norm: 0.9999991265244648, iteration: 275770
loss: 0.9903631806373596,grad_norm: 0.9999993206063453, iteration: 275771
loss: 0.9993283748626709,grad_norm: 0.8327426595405178, iteration: 275772
loss: 1.046311855316162,grad_norm: 0.9999996824456013, iteration: 275773
loss: 1.0614707469940186,grad_norm: 0.9047269553163291, iteration: 275774
loss: 1.0144851207733154,grad_norm: 0.9253305477541192, iteration: 275775
loss: 0.9857224822044373,grad_norm: 0.8414094829136993, iteration: 275776
loss: 1.049605131149292,grad_norm: 0.8137633070091695, iteration: 275777
loss: 1.1000285148620605,grad_norm: 1.0000000845985935, iteration: 275778
loss: 0.99837327003479,grad_norm: 0.9999996336437438, iteration: 275779
loss: 0.9950724840164185,grad_norm: 0.8573871552793415, iteration: 275780
loss: 1.0173715353012085,grad_norm: 0.9384642951202129, iteration: 275781
loss: 1.0384782552719116,grad_norm: 0.999999982670236, iteration: 275782
loss: 1.017242431640625,grad_norm: 0.9999990051285773, iteration: 275783
loss: 1.0196187496185303,grad_norm: 0.9988782330853834, iteration: 275784
loss: 1.012399673461914,grad_norm: 0.9999990465064372, iteration: 275785
loss: 1.101181149482727,grad_norm: 0.9999990861955531, iteration: 275786
loss: 1.008093237876892,grad_norm: 0.999999465198459, iteration: 275787
loss: 1.0195618867874146,grad_norm: 0.9999993674379627, iteration: 275788
loss: 1.0567188262939453,grad_norm: 0.9722067126038221, iteration: 275789
loss: 1.1679573059082031,grad_norm: 0.9999996439166202, iteration: 275790
loss: 1.0036629438400269,grad_norm: 0.8048303454801334, iteration: 275791
loss: 1.002217173576355,grad_norm: 0.9999994034218361, iteration: 275792
loss: 1.0507802963256836,grad_norm: 0.9999989722143664, iteration: 275793
loss: 1.052416443824768,grad_norm: 0.9765611671385256, iteration: 275794
loss: 1.0487120151519775,grad_norm: 0.9999992412704458, iteration: 275795
loss: 1.032753586769104,grad_norm: 0.9999994766965025, iteration: 275796
loss: 1.0272396802902222,grad_norm: 0.861229791605827, iteration: 275797
loss: 1.105332612991333,grad_norm: 0.9999996288888703, iteration: 275798
loss: 1.0300654172897339,grad_norm: 0.9999995522039963, iteration: 275799
loss: 1.0923618078231812,grad_norm: 0.9999992343999524, iteration: 275800
loss: 1.0491844415664673,grad_norm: 0.9999999364548786, iteration: 275801
loss: 1.0300523042678833,grad_norm: 0.999999143037936, iteration: 275802
loss: 0.9863805174827576,grad_norm: 0.8103558985893682, iteration: 275803
loss: 0.9984644651412964,grad_norm: 0.9282216554125947, iteration: 275804
loss: 0.9905277490615845,grad_norm: 0.999999149129706, iteration: 275805
loss: 1.0084004402160645,grad_norm: 0.9999991729133073, iteration: 275806
loss: 0.9813603162765503,grad_norm: 0.7951833415057579, iteration: 275807
loss: 1.0261412858963013,grad_norm: 0.8787681560474405, iteration: 275808
loss: 1.0293914079666138,grad_norm: 0.8343349839048169, iteration: 275809
loss: 1.012630820274353,grad_norm: 0.8973866279440227, iteration: 275810
loss: 0.9918393492698669,grad_norm: 0.999999939989719, iteration: 275811
loss: 1.0374456644058228,grad_norm: 0.9999992163832045, iteration: 275812
loss: 0.9560648798942566,grad_norm: 0.9999991236641695, iteration: 275813
loss: 1.022757649421692,grad_norm: 1.0000000311649444, iteration: 275814
loss: 1.2054862976074219,grad_norm: 0.9999998967445908, iteration: 275815
loss: 1.247257113456726,grad_norm: 0.9999997831709968, iteration: 275816
loss: 1.0953068733215332,grad_norm: 0.999999531966941, iteration: 275817
loss: 1.195183277130127,grad_norm: 0.9999999767645569, iteration: 275818
loss: 1.0642080307006836,grad_norm: 0.9999997272083726, iteration: 275819
loss: 1.0507653951644897,grad_norm: 0.9973927619218644, iteration: 275820
loss: 1.0982491970062256,grad_norm: 0.9999994985889167, iteration: 275821
loss: 1.0613088607788086,grad_norm: 0.9999991241554471, iteration: 275822
loss: 1.0846296548843384,grad_norm: 0.9999991976239319, iteration: 275823
loss: 1.0426034927368164,grad_norm: 0.9999996713042014, iteration: 275824
loss: 0.9889912605285645,grad_norm: 0.8960087425282517, iteration: 275825
loss: 1.075283169746399,grad_norm: 0.9999993499152817, iteration: 275826
loss: 1.1260600090026855,grad_norm: 0.9999990717875249, iteration: 275827
loss: 1.040614128112793,grad_norm: 0.9802646458308639, iteration: 275828
loss: 1.026983380317688,grad_norm: 0.999999081990209, iteration: 275829
loss: 1.0441263914108276,grad_norm: 0.997567080007892, iteration: 275830
loss: 1.1894408464431763,grad_norm: 0.9999995443574682, iteration: 275831
loss: 1.1753638982772827,grad_norm: 0.9999999735261779, iteration: 275832
loss: 1.0014228820800781,grad_norm: 0.9999993750372078, iteration: 275833
loss: 1.1397093534469604,grad_norm: 0.9999997256960803, iteration: 275834
loss: 1.090108871459961,grad_norm: 0.9999996023527337, iteration: 275835
loss: 1.1081328392028809,grad_norm: 0.999999135396119, iteration: 275836
loss: 1.0434414148330688,grad_norm: 0.9999991439572019, iteration: 275837
loss: 1.1845359802246094,grad_norm: 0.9999999434352573, iteration: 275838
loss: 1.0304032564163208,grad_norm: 0.9863289608190493, iteration: 275839
loss: 1.054003357887268,grad_norm: 0.9999995248780669, iteration: 275840
loss: 1.0344984531402588,grad_norm: 0.9999996890715696, iteration: 275841
loss: 0.9808772802352905,grad_norm: 0.927996053450517, iteration: 275842
loss: 1.0301332473754883,grad_norm: 0.9999996695978156, iteration: 275843
loss: 1.0018476247787476,grad_norm: 0.8037679036669221, iteration: 275844
loss: 1.0901120901107788,grad_norm: 0.9999989630736993, iteration: 275845
loss: 1.0243611335754395,grad_norm: 0.8403605946741426, iteration: 275846
loss: 0.9975828528404236,grad_norm: 0.9263010853305489, iteration: 275847
loss: 1.015109658241272,grad_norm: 0.7788982454145379, iteration: 275848
loss: 1.0572526454925537,grad_norm: 0.99999984924729, iteration: 275849
loss: 1.026925802230835,grad_norm: 0.8176141174738298, iteration: 275850
loss: 1.0419952869415283,grad_norm: 0.999999090825532, iteration: 275851
loss: 1.0021532773971558,grad_norm: 0.8587857011739075, iteration: 275852
loss: 1.017601728439331,grad_norm: 0.9999998576695966, iteration: 275853
loss: 0.984719455242157,grad_norm: 0.9999990664581861, iteration: 275854
loss: 0.990631103515625,grad_norm: 0.8954397958448744, iteration: 275855
loss: 1.0124969482421875,grad_norm: 0.999999685389561, iteration: 275856
loss: 1.019640326499939,grad_norm: 0.9999993016620475, iteration: 275857
loss: 0.994841456413269,grad_norm: 0.9809612564632504, iteration: 275858
loss: 1.0182183980941772,grad_norm: 0.8764009340546666, iteration: 275859
loss: 1.1421055793762207,grad_norm: 0.999999894465009, iteration: 275860
loss: 1.028764009475708,grad_norm: 0.8394077926547656, iteration: 275861
loss: 1.0020614862442017,grad_norm: 0.9999991264346716, iteration: 275862
loss: 1.026788592338562,grad_norm: 0.8606127665834439, iteration: 275863
loss: 1.0886845588684082,grad_norm: 0.9999996261652928, iteration: 275864
loss: 1.0060328245162964,grad_norm: 0.9008373984157754, iteration: 275865
loss: 1.022801160812378,grad_norm: 0.9999992177639176, iteration: 275866
loss: 1.0371845960617065,grad_norm: 0.7783250362734314, iteration: 275867
loss: 0.9839113354682922,grad_norm: 0.9267602211018435, iteration: 275868
loss: 1.0051947832107544,grad_norm: 0.9999997764008178, iteration: 275869
loss: 1.030055046081543,grad_norm: 0.9999990948105919, iteration: 275870
loss: 0.9932512640953064,grad_norm: 0.9822928339206083, iteration: 275871
loss: 0.9745987057685852,grad_norm: 0.8973769225485198, iteration: 275872
loss: 1.0030953884124756,grad_norm: 0.8112923208625386, iteration: 275873
loss: 1.0964152812957764,grad_norm: 0.9523001284289173, iteration: 275874
loss: 1.0491907596588135,grad_norm: 0.9999998495538203, iteration: 275875
loss: 0.9828946590423584,grad_norm: 0.9999990986994939, iteration: 275876
loss: 1.0666069984436035,grad_norm: 0.9999998996203898, iteration: 275877
loss: 1.0171732902526855,grad_norm: 0.9999994049292598, iteration: 275878
loss: 0.9990031123161316,grad_norm: 0.9999992046189335, iteration: 275879
loss: 0.9778922200202942,grad_norm: 0.9999991644563058, iteration: 275880
loss: 0.998759925365448,grad_norm: 0.9999991643688045, iteration: 275881
loss: 1.0919283628463745,grad_norm: 0.9999990991262213, iteration: 275882
loss: 1.0605628490447998,grad_norm: 0.9999993966298751, iteration: 275883
loss: 1.0967381000518799,grad_norm: 0.9999991726638813, iteration: 275884
loss: 0.9896692633628845,grad_norm: 0.9999996954168456, iteration: 275885
loss: 1.0510101318359375,grad_norm: 0.8878708906123046, iteration: 275886
loss: 1.0631235837936401,grad_norm: 0.999999175584069, iteration: 275887
loss: 1.0099154710769653,grad_norm: 0.9999991832007367, iteration: 275888
loss: 0.9707877039909363,grad_norm: 0.892755408134139, iteration: 275889
loss: 1.075690746307373,grad_norm: 0.9999990878237857, iteration: 275890
loss: 1.001173973083496,grad_norm: 0.8278094350498643, iteration: 275891
loss: 1.001859188079834,grad_norm: 0.8821443249303945, iteration: 275892
loss: 1.0195729732513428,grad_norm: 0.9217411603535905, iteration: 275893
loss: 0.9822230935096741,grad_norm: 0.8002159551607301, iteration: 275894
loss: 0.9690598249435425,grad_norm: 0.9999991977726242, iteration: 275895
loss: 0.9884517788887024,grad_norm: 0.9999991225078116, iteration: 275896
loss: 1.0072319507598877,grad_norm: 0.9213077134751307, iteration: 275897
loss: 1.001521110534668,grad_norm: 0.9999990533103033, iteration: 275898
loss: 1.007403016090393,grad_norm: 0.8503926125833163, iteration: 275899
loss: 1.0903129577636719,grad_norm: 0.9999994397757228, iteration: 275900
loss: 0.9965257048606873,grad_norm: 0.999999143795346, iteration: 275901
loss: 0.9897245764732361,grad_norm: 0.70864771527075, iteration: 275902
loss: 1.0092328786849976,grad_norm: 0.8717262405622094, iteration: 275903
loss: 1.019883632659912,grad_norm: 0.9999991163609455, iteration: 275904
loss: 0.9749107956886292,grad_norm: 0.9999991525490683, iteration: 275905
loss: 1.038595199584961,grad_norm: 0.948807094758923, iteration: 275906
loss: 1.0415711402893066,grad_norm: 0.9208261214814474, iteration: 275907
loss: 1.079410433769226,grad_norm: 0.9821963169264251, iteration: 275908
loss: 0.9504228830337524,grad_norm: 0.8511115596233741, iteration: 275909
loss: 0.9701405763626099,grad_norm: 0.9583519537473967, iteration: 275910
loss: 0.9859035611152649,grad_norm: 0.9999998367112342, iteration: 275911
loss: 0.9972821474075317,grad_norm: 0.7083667984063535, iteration: 275912
loss: 1.0832843780517578,grad_norm: 1.0000000431860239, iteration: 275913
loss: 1.0785889625549316,grad_norm: 0.9999998717279429, iteration: 275914
loss: 1.041711449623108,grad_norm: 0.7476360094047602, iteration: 275915
loss: 1.0006396770477295,grad_norm: 0.9650340473059772, iteration: 275916
loss: 1.0278240442276,grad_norm: 0.9347479871773178, iteration: 275917
loss: 1.0058730840682983,grad_norm: 0.8232725024773938, iteration: 275918
loss: 1.136392593383789,grad_norm: 0.7977965362580777, iteration: 275919
loss: 1.0284844636917114,grad_norm: 0.9690557678899363, iteration: 275920
loss: 1.0016565322875977,grad_norm: 0.853233531790817, iteration: 275921
loss: 1.2185555696487427,grad_norm: 0.9999994802878244, iteration: 275922
loss: 0.9849117398262024,grad_norm: 0.9999991906412318, iteration: 275923
loss: 1.007844090461731,grad_norm: 0.9999994435013695, iteration: 275924
loss: 0.997445285320282,grad_norm: 0.9999990616211859, iteration: 275925
loss: 0.9900985360145569,grad_norm: 0.8776466710267504, iteration: 275926
loss: 0.9880252480506897,grad_norm: 0.8782679169646186, iteration: 275927
loss: 0.9720672369003296,grad_norm: 0.9674458678054992, iteration: 275928
loss: 0.9761666059494019,grad_norm: 0.9999991504980723, iteration: 275929
loss: 0.9987120628356934,grad_norm: 0.8887624091219753, iteration: 275930
loss: 1.0480362176895142,grad_norm: 0.9999991786004001, iteration: 275931
loss: 1.043016791343689,grad_norm: 0.8170596831980004, iteration: 275932
loss: 1.0147532224655151,grad_norm: 0.8427426905326626, iteration: 275933
loss: 1.0006859302520752,grad_norm: 0.99999903858505, iteration: 275934
loss: 1.0210504531860352,grad_norm: 0.8214280985143776, iteration: 275935
loss: 1.063267469406128,grad_norm: 0.9999992091550036, iteration: 275936
loss: 0.9886709451675415,grad_norm: 0.772205729559438, iteration: 275937
loss: 1.1418811082839966,grad_norm: 0.9999991614280075, iteration: 275938
loss: 1.0930194854736328,grad_norm: 0.9999990848052629, iteration: 275939
loss: 0.9940645098686218,grad_norm: 0.7830791689996913, iteration: 275940
loss: 1.0500112771987915,grad_norm: 0.9999990910950884, iteration: 275941
loss: 0.9933782815933228,grad_norm: 0.9999993416647717, iteration: 275942
loss: 0.9890491366386414,grad_norm: 0.9999992336581749, iteration: 275943
loss: 0.9970822930335999,grad_norm: 0.8250594521585327, iteration: 275944
loss: 1.0292274951934814,grad_norm: 0.9887723062846212, iteration: 275945
loss: 1.0332121849060059,grad_norm: 0.9999999381834315, iteration: 275946
loss: 1.006354808807373,grad_norm: 0.8047700316633558, iteration: 275947
loss: 1.025255560874939,grad_norm: 0.9350092908173949, iteration: 275948
loss: 1.0179656744003296,grad_norm: 0.9999994053662395, iteration: 275949
loss: 1.0724544525146484,grad_norm: 0.9999991660129565, iteration: 275950
loss: 0.9741994142532349,grad_norm: 0.8511316868902394, iteration: 275951
loss: 1.0051183700561523,grad_norm: 0.799679779400908, iteration: 275952
loss: 0.9913263916969299,grad_norm: 0.8676301155460612, iteration: 275953
loss: 0.9747815132141113,grad_norm: 0.8514366954996783, iteration: 275954
loss: 0.9739740490913391,grad_norm: 0.999999650636737, iteration: 275955
loss: 1.0246233940124512,grad_norm: 0.8975273866431692, iteration: 275956
loss: 0.9750461578369141,grad_norm: 0.9927021735709144, iteration: 275957
loss: 1.0419799089431763,grad_norm: 0.999999264816337, iteration: 275958
loss: 1.1288396120071411,grad_norm: 0.9999994650165107, iteration: 275959
loss: 1.012230634689331,grad_norm: 0.8337238496475061, iteration: 275960
loss: 1.0065537691116333,grad_norm: 0.9059412944343582, iteration: 275961
loss: 1.0705901384353638,grad_norm: 0.9999994362725533, iteration: 275962
loss: 1.1060549020767212,grad_norm: 0.9999993955670061, iteration: 275963
loss: 0.970008909702301,grad_norm: 0.9999997781286547, iteration: 275964
loss: 1.041303277015686,grad_norm: 0.9999997795899653, iteration: 275965
loss: 0.9734125137329102,grad_norm: 0.9222130060824775, iteration: 275966
loss: 1.0347232818603516,grad_norm: 0.8438021495714715, iteration: 275967
loss: 1.0313998460769653,grad_norm: 0.9999998725353787, iteration: 275968
loss: 1.0860921144485474,grad_norm: 0.8689854607364927, iteration: 275969
loss: 1.035386323928833,grad_norm: 0.8312686997500598, iteration: 275970
loss: 1.0438100099563599,grad_norm: 0.8527433843896076, iteration: 275971
loss: 0.983485996723175,grad_norm: 0.9715966688715423, iteration: 275972
loss: 1.0254759788513184,grad_norm: 0.9071478721318579, iteration: 275973
loss: 1.0178779363632202,grad_norm: 0.9999990932755732, iteration: 275974
loss: 0.9977301359176636,grad_norm: 0.9823509315824305, iteration: 275975
loss: 0.981805682182312,grad_norm: 0.8017779860653583, iteration: 275976
loss: 1.0446537733078003,grad_norm: 0.9999991094337415, iteration: 275977
loss: 1.1375409364700317,grad_norm: 0.9999991442718704, iteration: 275978
loss: 1.0151365995407104,grad_norm: 0.9212758955659984, iteration: 275979
loss: 1.067104458808899,grad_norm: 0.9870171018880448, iteration: 275980
loss: 1.054843544960022,grad_norm: 0.9059695559459721, iteration: 275981
loss: 0.9925238490104675,grad_norm: 0.6999955898647643, iteration: 275982
loss: 0.9951343536376953,grad_norm: 0.8636100648348403, iteration: 275983
loss: 1.0423060655593872,grad_norm: 0.972327693104888, iteration: 275984
loss: 1.095388650894165,grad_norm: 0.8656931013488445, iteration: 275985
loss: 1.0739907026290894,grad_norm: 0.9351871592614531, iteration: 275986
loss: 1.0468233823776245,grad_norm: 0.9999999814644446, iteration: 275987
loss: 1.0632860660552979,grad_norm: 0.9999999186295346, iteration: 275988
loss: 1.0073603391647339,grad_norm: 0.7855263684149684, iteration: 275989
loss: 1.0712101459503174,grad_norm: 0.852991946445746, iteration: 275990
loss: 1.0828508138656616,grad_norm: 0.9999992374566978, iteration: 275991
loss: 1.063401222229004,grad_norm: 0.9999995902997739, iteration: 275992
loss: 1.0085595846176147,grad_norm: 0.9394776078879229, iteration: 275993
loss: 1.1758663654327393,grad_norm: 0.9999994064500622, iteration: 275994
loss: 1.0232168436050415,grad_norm: 0.999999181902607, iteration: 275995
loss: 1.0074824094772339,grad_norm: 0.9468720106936976, iteration: 275996
loss: 0.9758921265602112,grad_norm: 0.8829606222002317, iteration: 275997
loss: 1.0540854930877686,grad_norm: 0.9055246223451503, iteration: 275998
loss: 0.9918676018714905,grad_norm: 0.7787740512479578, iteration: 275999
loss: 1.172206997871399,grad_norm: 0.9999997188159442, iteration: 276000
loss: 0.9745503067970276,grad_norm: 0.9999992919557947, iteration: 276001
loss: 1.0601222515106201,grad_norm: 0.9999993415998326, iteration: 276002
loss: 1.0601191520690918,grad_norm: 0.99999921825686, iteration: 276003
loss: 1.096400260925293,grad_norm: 0.9999994223487916, iteration: 276004
loss: 1.0777801275253296,grad_norm: 0.8480887713448134, iteration: 276005
loss: 1.2036718130111694,grad_norm: 0.9999999376344264, iteration: 276006
loss: 1.0049508810043335,grad_norm: 0.9999991734195862, iteration: 276007
loss: 0.9980563521385193,grad_norm: 0.9999997076721738, iteration: 276008
loss: 1.0313475131988525,grad_norm: 0.9999995543834999, iteration: 276009
loss: 1.1461080312728882,grad_norm: 0.9999993174317315, iteration: 276010
loss: 0.9920209050178528,grad_norm: 0.931268002197989, iteration: 276011
loss: 1.07442045211792,grad_norm: 0.8396711950488925, iteration: 276012
loss: 0.9812101721763611,grad_norm: 0.8032230347723106, iteration: 276013
loss: 0.9743603467941284,grad_norm: 0.8096245607623254, iteration: 276014
loss: 1.0032845735549927,grad_norm: 0.999999340527593, iteration: 276015
loss: 1.00696861743927,grad_norm: 0.8817603933296678, iteration: 276016
loss: 0.9671255946159363,grad_norm: 0.8717047693907427, iteration: 276017
loss: 1.0006027221679688,grad_norm: 0.9999995286986649, iteration: 276018
loss: 0.9488483667373657,grad_norm: 0.8035288741259722, iteration: 276019
loss: 1.0354232788085938,grad_norm: 0.8826200425870337, iteration: 276020
loss: 0.9818145632743835,grad_norm: 0.8061845299685872, iteration: 276021
loss: 1.0054519176483154,grad_norm: 0.8791842270296876, iteration: 276022
loss: 1.0312788486480713,grad_norm: 0.7624523799738038, iteration: 276023
loss: 1.0685535669326782,grad_norm: 0.9999997362075335, iteration: 276024
loss: 0.9808749556541443,grad_norm: 0.7974518144496111, iteration: 276025
loss: 1.0122672319412231,grad_norm: 0.7639253095443492, iteration: 276026
loss: 0.9774518609046936,grad_norm: 0.9999992512718892, iteration: 276027
loss: 1.0117648839950562,grad_norm: 0.7756019135464122, iteration: 276028
loss: 1.0876516103744507,grad_norm: 0.9999993467208856, iteration: 276029
loss: 0.9784455895423889,grad_norm: 0.9999996447048126, iteration: 276030
loss: 1.0375645160675049,grad_norm: 0.9975509629819221, iteration: 276031
loss: 1.0767089128494263,grad_norm: 0.9999991277257979, iteration: 276032
loss: 1.0451592206954956,grad_norm: 0.999999034914363, iteration: 276033
loss: 1.0007356405258179,grad_norm: 0.9979982633575614, iteration: 276034
loss: 1.011182427406311,grad_norm: 0.8958441683398513, iteration: 276035
loss: 1.0016096830368042,grad_norm: 0.8332769989938338, iteration: 276036
loss: 1.0859824419021606,grad_norm: 0.9999995516157714, iteration: 276037
loss: 1.0175178050994873,grad_norm: 0.9999991909414934, iteration: 276038
loss: 0.9918013215065002,grad_norm: 0.9984931275347005, iteration: 276039
loss: 1.0126930475234985,grad_norm: 0.9999992676818651, iteration: 276040
loss: 1.0136057138442993,grad_norm: 0.9337147559616455, iteration: 276041
loss: 1.028447151184082,grad_norm: 0.9999993918102345, iteration: 276042
loss: 1.0141761302947998,grad_norm: 0.9999991845510914, iteration: 276043
loss: 1.0378150939941406,grad_norm: 0.9999999126760316, iteration: 276044
loss: 1.0525264739990234,grad_norm: 0.9999992861453016, iteration: 276045
loss: 1.032047152519226,grad_norm: 0.849957670437385, iteration: 276046
loss: 1.045644998550415,grad_norm: 0.9999999406706609, iteration: 276047
loss: 0.9986048340797424,grad_norm: 0.9999995276845903, iteration: 276048
loss: 1.0344696044921875,grad_norm: 0.9647298944847507, iteration: 276049
loss: 0.995319664478302,grad_norm: 0.9193441841178559, iteration: 276050
loss: 1.0194443464279175,grad_norm: 0.7284656615189283, iteration: 276051
loss: 1.0440112352371216,grad_norm: 0.999999878703719, iteration: 276052
loss: 1.0090442895889282,grad_norm: 0.9999996238652441, iteration: 276053
loss: 1.0821332931518555,grad_norm: 0.9999998175667189, iteration: 276054
loss: 1.0092569589614868,grad_norm: 0.9818152286081092, iteration: 276055
loss: 1.009444236755371,grad_norm: 0.9999991940679068, iteration: 276056
loss: 0.9765145182609558,grad_norm: 0.8910944487342544, iteration: 276057
loss: 0.9861319065093994,grad_norm: 0.8700657109506754, iteration: 276058
loss: 1.0093050003051758,grad_norm: 0.8664769602804505, iteration: 276059
loss: 1.0153720378875732,grad_norm: 0.8879736955737498, iteration: 276060
loss: 0.9599206447601318,grad_norm: 0.8076033165778596, iteration: 276061
loss: 1.0400193929672241,grad_norm: 0.9999991942395111, iteration: 276062
loss: 1.088387370109558,grad_norm: 0.9999993940016639, iteration: 276063
loss: 0.9800798296928406,grad_norm: 0.9999991143717581, iteration: 276064
loss: 1.0282405614852905,grad_norm: 0.9999990555489673, iteration: 276065
loss: 0.9701080322265625,grad_norm: 0.9658944025015207, iteration: 276066
loss: 1.0181297063827515,grad_norm: 0.8065462973745984, iteration: 276067
loss: 1.0018500089645386,grad_norm: 0.8951858324062837, iteration: 276068
loss: 1.0186511278152466,grad_norm: 0.7887037943741362, iteration: 276069
loss: 0.9825248122215271,grad_norm: 0.717479264422996, iteration: 276070
loss: 0.9903168678283691,grad_norm: 0.9999992028679557, iteration: 276071
loss: 1.0164318084716797,grad_norm: 0.9073413294536888, iteration: 276072
loss: 0.9875524640083313,grad_norm: 0.800731535576986, iteration: 276073
loss: 1.0397740602493286,grad_norm: 0.9999992219104216, iteration: 276074
loss: 1.0352469682693481,grad_norm: 0.9999998228283514, iteration: 276075
loss: 0.9973162412643433,grad_norm: 0.9999991834596986, iteration: 276076
loss: 0.9708742499351501,grad_norm: 0.8248109681308903, iteration: 276077
loss: 1.0086783170700073,grad_norm: 0.8238727299450618, iteration: 276078
loss: 1.0325473546981812,grad_norm: 0.9999990431097049, iteration: 276079
loss: 1.0215872526168823,grad_norm: 0.8785388450921968, iteration: 276080
loss: 0.9533111453056335,grad_norm: 0.8073307532137725, iteration: 276081
loss: 1.0139026641845703,grad_norm: 0.8152971679845235, iteration: 276082
loss: 1.0955886840820312,grad_norm: 0.9999996098296903, iteration: 276083
loss: 1.1096183061599731,grad_norm: 0.9999992888062856, iteration: 276084
loss: 1.0063475370407104,grad_norm: 0.8362366229395669, iteration: 276085
loss: 1.0224279165267944,grad_norm: 0.8916957551970957, iteration: 276086
loss: 1.0542110204696655,grad_norm: 0.999999420074135, iteration: 276087
loss: 0.9689515233039856,grad_norm: 0.9405359821447821, iteration: 276088
loss: 1.006049394607544,grad_norm: 0.9999992413350219, iteration: 276089
loss: 1.056459665298462,grad_norm: 0.978959181516071, iteration: 276090
loss: 1.0000724792480469,grad_norm: 0.9999992391917678, iteration: 276091
loss: 0.9981275200843811,grad_norm: 0.795560050430561, iteration: 276092
loss: 1.043946623802185,grad_norm: 0.9999992352904513, iteration: 276093
loss: 1.0108739137649536,grad_norm: 0.9245994656503845, iteration: 276094
loss: 1.028347373008728,grad_norm: 0.9917996746047466, iteration: 276095
loss: 1.0196404457092285,grad_norm: 0.9999992254118902, iteration: 276096
loss: 0.9866239428520203,grad_norm: 0.7808866437498172, iteration: 276097
loss: 0.9829233288764954,grad_norm: 0.9999990729049538, iteration: 276098
loss: 1.053505539894104,grad_norm: 0.8309790358633495, iteration: 276099
loss: 1.027924656867981,grad_norm: 0.7857083762883182, iteration: 276100
loss: 1.0047831535339355,grad_norm: 0.7315273141303651, iteration: 276101
loss: 1.0224626064300537,grad_norm: 0.8101061388105462, iteration: 276102
loss: 0.9896712899208069,grad_norm: 0.8632058034924308, iteration: 276103
loss: 0.997247576713562,grad_norm: 0.8661348426015113, iteration: 276104
loss: 0.990634024143219,grad_norm: 0.9640938232026082, iteration: 276105
loss: 1.008361577987671,grad_norm: 0.9999996331213281, iteration: 276106
loss: 1.0054305791854858,grad_norm: 0.9485032259795297, iteration: 276107
loss: 0.978731095790863,grad_norm: 0.9999991340683616, iteration: 276108
loss: 1.2253168821334839,grad_norm: 0.9999998969358074, iteration: 276109
loss: 1.0572456121444702,grad_norm: 0.9999999164726988, iteration: 276110
loss: 1.0179855823516846,grad_norm: 0.9999992390731721, iteration: 276111
loss: 1.0017162561416626,grad_norm: 0.9079541628800443, iteration: 276112
loss: 1.0401577949523926,grad_norm: 0.9999990865925761, iteration: 276113
loss: 1.1675816774368286,grad_norm: 1.0000000042734343, iteration: 276114
loss: 1.0094611644744873,grad_norm: 0.9999991962841571, iteration: 276115
loss: 1.0164097547531128,grad_norm: 0.8928224232933386, iteration: 276116
loss: 1.0206589698791504,grad_norm: 0.9586520134143657, iteration: 276117
loss: 0.9628815054893494,grad_norm: 0.999999090079955, iteration: 276118
loss: 1.048148512840271,grad_norm: 0.999999999395014, iteration: 276119
loss: 0.9976358413696289,grad_norm: 0.8461274196997981, iteration: 276120
loss: 0.9814190864562988,grad_norm: 0.9121379383832584, iteration: 276121
loss: 0.9644567966461182,grad_norm: 0.8135097508233402, iteration: 276122
loss: 1.008179783821106,grad_norm: 0.9316829656415161, iteration: 276123
loss: 1.025699257850647,grad_norm: 0.975782317494314, iteration: 276124
loss: 0.969230592250824,grad_norm: 0.910098973720099, iteration: 276125
loss: 1.0641967058181763,grad_norm: 0.9999999599308613, iteration: 276126
loss: 1.0679231882095337,grad_norm: 0.9397947977394824, iteration: 276127
loss: 1.0599617958068848,grad_norm: 0.907634536515154, iteration: 276128
loss: 1.0066026449203491,grad_norm: 0.8104817816907056, iteration: 276129
loss: 1.0098470449447632,grad_norm: 0.8150416824943165, iteration: 276130
loss: 0.9808279871940613,grad_norm: 0.9999990057223317, iteration: 276131
loss: 0.9672844409942627,grad_norm: 0.999999095899007, iteration: 276132
loss: 1.050885796546936,grad_norm: 0.9999990782179844, iteration: 276133
loss: 1.021816372871399,grad_norm: 0.7859061825026948, iteration: 276134
loss: 1.0326881408691406,grad_norm: 0.9999990819065285, iteration: 276135
loss: 0.9925355911254883,grad_norm: 0.7849394941893948, iteration: 276136
loss: 0.9806364178657532,grad_norm: 0.7007427584310142, iteration: 276137
loss: 1.0089445114135742,grad_norm: 0.9999991413474228, iteration: 276138
loss: 1.076003074645996,grad_norm: 0.9999990959455674, iteration: 276139
loss: 0.9992114305496216,grad_norm: 0.9912484073214936, iteration: 276140
loss: 0.9641979932785034,grad_norm: 0.9999990223401963, iteration: 276141
loss: 1.0459855794906616,grad_norm: 0.9999999481770764, iteration: 276142
loss: 1.0390278100967407,grad_norm: 0.9496188672163816, iteration: 276143
loss: 0.9956768155097961,grad_norm: 0.8509870622044393, iteration: 276144
loss: 0.9754046201705933,grad_norm: 0.8373378220046376, iteration: 276145
loss: 1.0343862771987915,grad_norm: 0.9263603658685663, iteration: 276146
loss: 0.9989547729492188,grad_norm: 0.945928596822369, iteration: 276147
loss: 0.9949859976768494,grad_norm: 0.8325661843763897, iteration: 276148
loss: 1.0317643880844116,grad_norm: 0.8632442739874804, iteration: 276149
loss: 0.9746816158294678,grad_norm: 0.8367322074342204, iteration: 276150
loss: 0.9972835779190063,grad_norm: 0.834191043237865, iteration: 276151
loss: 0.9984613656997681,grad_norm: 0.8790591867473513, iteration: 276152
loss: 1.008901834487915,grad_norm: 0.761081853096444, iteration: 276153
loss: 0.9943215847015381,grad_norm: 0.8377864662637614, iteration: 276154
loss: 0.978690505027771,grad_norm: 0.973167609733765, iteration: 276155
loss: 1.0133271217346191,grad_norm: 0.8292869614003767, iteration: 276156
loss: 1.0322303771972656,grad_norm: 0.9104979714056335, iteration: 276157
loss: 1.0049351453781128,grad_norm: 0.999999754081217, iteration: 276158
loss: 1.006034255027771,grad_norm: 0.8448564527072215, iteration: 276159
loss: 1.0229668617248535,grad_norm: 0.9999990600551827, iteration: 276160
loss: 0.9939818978309631,grad_norm: 0.8544717608978009, iteration: 276161
loss: 1.055334210395813,grad_norm: 1.0000000415613273, iteration: 276162
loss: 1.0123814344406128,grad_norm: 0.9819132110210781, iteration: 276163
loss: 1.0010876655578613,grad_norm: 0.797663418830671, iteration: 276164
loss: 1.1384003162384033,grad_norm: 0.9999991723036461, iteration: 276165
loss: 1.0109202861785889,grad_norm: 0.8879951795327251, iteration: 276166
loss: 1.1140403747558594,grad_norm: 0.9999994085855679, iteration: 276167
loss: 0.9743863344192505,grad_norm: 0.9999991165515166, iteration: 276168
loss: 1.015769362449646,grad_norm: 0.9982938882787599, iteration: 276169
loss: 0.9952582120895386,grad_norm: 0.8371346336423853, iteration: 276170
loss: 0.9837336540222168,grad_norm: 0.8011342949570979, iteration: 276171
loss: 0.9940055012702942,grad_norm: 0.7811203493355189, iteration: 276172
loss: 0.9483126997947693,grad_norm: 0.9999992134582618, iteration: 276173
loss: 1.0081762075424194,grad_norm: 0.9343441411502379, iteration: 276174
loss: 0.9955227971076965,grad_norm: 0.9336688457167686, iteration: 276175
loss: 1.0166959762573242,grad_norm: 0.9105910688485803, iteration: 276176
loss: 1.0514246225357056,grad_norm: 0.99999996076854, iteration: 276177
loss: 0.9976184964179993,grad_norm: 0.7696451191712798, iteration: 276178
loss: 1.001409888267517,grad_norm: 0.7463441534234215, iteration: 276179
loss: 0.997143030166626,grad_norm: 0.73473162332038, iteration: 276180
loss: 0.9903377890586853,grad_norm: 0.9999990523425056, iteration: 276181
loss: 0.9872431755065918,grad_norm: 0.7732326320822561, iteration: 276182
loss: 1.007176160812378,grad_norm: 0.8887914063845973, iteration: 276183
loss: 0.9959917068481445,grad_norm: 0.9545591187210797, iteration: 276184
loss: 1.0634745359420776,grad_norm: 0.9114305966463566, iteration: 276185
loss: 1.0135568380355835,grad_norm: 0.9544326461337617, iteration: 276186
loss: 1.0191153287887573,grad_norm: 0.8829747717910549, iteration: 276187
loss: 1.0117254257202148,grad_norm: 0.8406311559970945, iteration: 276188
loss: 0.9962102770805359,grad_norm: 0.9108230418619867, iteration: 276189
loss: 0.9889518618583679,grad_norm: 0.9031127672914903, iteration: 276190
loss: 0.9911428689956665,grad_norm: 0.9999992916059081, iteration: 276191
loss: 0.9932878613471985,grad_norm: 0.9152210847337477, iteration: 276192
loss: 1.004328966140747,grad_norm: 0.8957447253063806, iteration: 276193
loss: 1.0029715299606323,grad_norm: 0.8529388135071937, iteration: 276194
loss: 0.996086835861206,grad_norm: 0.8051665457369267, iteration: 276195
loss: 0.9989864230155945,grad_norm: 0.8901137246596766, iteration: 276196
loss: 1.0187016725540161,grad_norm: 0.6989593862596725, iteration: 276197
loss: 1.0332034826278687,grad_norm: 0.853028159477623, iteration: 276198
loss: 1.0172840356826782,grad_norm: 0.9870712682782926, iteration: 276199
loss: 1.0274707078933716,grad_norm: 0.9999997636893617, iteration: 276200
loss: 1.0158833265304565,grad_norm: 0.999999107906302, iteration: 276201
loss: 0.9865118265151978,grad_norm: 0.9999991741059355, iteration: 276202
loss: 1.0075939893722534,grad_norm: 0.9466393199262456, iteration: 276203
loss: 1.0240578651428223,grad_norm: 0.8398364845221024, iteration: 276204
loss: 1.02042555809021,grad_norm: 0.999999424935657, iteration: 276205
loss: 0.9985134601593018,grad_norm: 0.8658519562657767, iteration: 276206
loss: 1.0143113136291504,grad_norm: 0.8744940355092142, iteration: 276207
loss: 0.9815898537635803,grad_norm: 0.7436443836340757, iteration: 276208
loss: 1.0237733125686646,grad_norm: 0.8039474066817069, iteration: 276209
loss: 1.0096993446350098,grad_norm: 0.8593667675947402, iteration: 276210
loss: 0.9875417947769165,grad_norm: 0.7669275263575575, iteration: 276211
loss: 0.9779080152511597,grad_norm: 0.719105952612956, iteration: 276212
loss: 0.9813295006752014,grad_norm: 0.9034361705319873, iteration: 276213
loss: 1.0120867490768433,grad_norm: 0.9778153948203085, iteration: 276214
loss: 1.0109285116195679,grad_norm: 0.8518931620032189, iteration: 276215
loss: 0.9792140126228333,grad_norm: 0.8478206579731827, iteration: 276216
loss: 1.0070714950561523,grad_norm: 0.9027297540146151, iteration: 276217
loss: 1.0165766477584839,grad_norm: 0.913140106774098, iteration: 276218
loss: 0.9615198969841003,grad_norm: 0.9100143478798245, iteration: 276219
loss: 0.9728105068206787,grad_norm: 0.8596503805733094, iteration: 276220
loss: 0.9729551076889038,grad_norm: 0.8650902503801099, iteration: 276221
loss: 0.9748607873916626,grad_norm: 0.9512937330362256, iteration: 276222
loss: 0.9993261694908142,grad_norm: 0.945714720360929, iteration: 276223
loss: 1.006951928138733,grad_norm: 0.7632411532871497, iteration: 276224
loss: 1.0166069269180298,grad_norm: 0.9033940619995589, iteration: 276225
loss: 1.039021372795105,grad_norm: 0.9303343718042663, iteration: 276226
loss: 0.9541481733322144,grad_norm: 0.7375465819553851, iteration: 276227
loss: 0.9857890009880066,grad_norm: 0.9067389039446064, iteration: 276228
loss: 0.997538685798645,grad_norm: 0.9999990364311561, iteration: 276229
loss: 1.0262997150421143,grad_norm: 0.9778300748732242, iteration: 276230
loss: 1.0888398885726929,grad_norm: 0.9264789870150739, iteration: 276231
loss: 0.9728578329086304,grad_norm: 0.8286428441755356, iteration: 276232
loss: 1.0081201791763306,grad_norm: 0.945603344451343, iteration: 276233
loss: 0.9962045550346375,grad_norm: 0.7512230260271394, iteration: 276234
loss: 1.0101372003555298,grad_norm: 0.9069032574907777, iteration: 276235
loss: 0.9711933135986328,grad_norm: 0.8731951555985177, iteration: 276236
loss: 1.040121078491211,grad_norm: 0.9999991464830335, iteration: 276237
loss: 1.0033881664276123,grad_norm: 0.8129986717877499, iteration: 276238
loss: 0.9973152279853821,grad_norm: 0.920411771003519, iteration: 276239
loss: 0.9973127841949463,grad_norm: 0.9361591854570293, iteration: 276240
loss: 1.0020627975463867,grad_norm: 0.9999990768713015, iteration: 276241
loss: 1.0190292596817017,grad_norm: 0.8991241135093775, iteration: 276242
loss: 1.024867296218872,grad_norm: 0.8531121947854233, iteration: 276243
loss: 0.9631164073944092,grad_norm: 0.9411615546584337, iteration: 276244
loss: 0.9897903800010681,grad_norm: 0.8048442807071077, iteration: 276245
loss: 0.9980161786079407,grad_norm: 0.9159182062254971, iteration: 276246
loss: 0.9935780763626099,grad_norm: 0.9903489117243313, iteration: 276247
loss: 0.9787742495536804,grad_norm: 0.8932650885010321, iteration: 276248
loss: 1.0027347803115845,grad_norm: 0.7063528983155959, iteration: 276249
loss: 0.995201051235199,grad_norm: 0.9040778175596085, iteration: 276250
loss: 0.9952584505081177,grad_norm: 0.954508885699148, iteration: 276251
loss: 0.9721078276634216,grad_norm: 0.9101953866587363, iteration: 276252
loss: 0.9569624066352844,grad_norm: 0.7835801222406639, iteration: 276253
loss: 0.9774876832962036,grad_norm: 0.8602965725236689, iteration: 276254
loss: 0.9959495663642883,grad_norm: 0.8124135258458256, iteration: 276255
loss: 0.9699691534042358,grad_norm: 0.8238639505588164, iteration: 276256
loss: 0.9880907535552979,grad_norm: 0.9999989130790458, iteration: 276257
loss: 1.0106993913650513,grad_norm: 0.8362654942454479, iteration: 276258
loss: 1.0139563083648682,grad_norm: 0.8045175187274876, iteration: 276259
loss: 1.0137277841567993,grad_norm: 0.7986562363463579, iteration: 276260
loss: 0.9621700644493103,grad_norm: 0.915813999734576, iteration: 276261
loss: 1.0109498500823975,grad_norm: 0.9999995110431851, iteration: 276262
loss: 1.0009084939956665,grad_norm: 0.8991533659636102, iteration: 276263
loss: 1.133648157119751,grad_norm: 0.999999839541295, iteration: 276264
loss: 1.025726318359375,grad_norm: 0.8979066648008935, iteration: 276265
loss: 1.0291043519973755,grad_norm: 0.8515232070714418, iteration: 276266
loss: 0.9715963006019592,grad_norm: 0.8423744509616182, iteration: 276267
loss: 1.0225756168365479,grad_norm: 0.9079099849510515, iteration: 276268
loss: 0.9810418486595154,grad_norm: 0.9999995400977842, iteration: 276269
loss: 1.0187922716140747,grad_norm: 0.6907513738795878, iteration: 276270
loss: 1.0247013568878174,grad_norm: 0.8884669799874171, iteration: 276271
loss: 0.9879466891288757,grad_norm: 0.8679493458376145, iteration: 276272
loss: 1.0056401491165161,grad_norm: 0.9999991141192822, iteration: 276273
loss: 1.0480196475982666,grad_norm: 0.9656210382889101, iteration: 276274
loss: 1.0094417333602905,grad_norm: 0.8184264490617218, iteration: 276275
loss: 1.0307077169418335,grad_norm: 0.853431873518382, iteration: 276276
loss: 1.0203031301498413,grad_norm: 0.9999993546689792, iteration: 276277
loss: 1.0336111783981323,grad_norm: 0.9999991194345051, iteration: 276278
loss: 1.0208334922790527,grad_norm: 0.9698782615598235, iteration: 276279
loss: 1.0712482929229736,grad_norm: 0.8666602278608959, iteration: 276280
loss: 0.9900906085968018,grad_norm: 0.741956628496419, iteration: 276281
loss: 1.1197587251663208,grad_norm: 0.9549744142658726, iteration: 276282
loss: 0.9839625954627991,grad_norm: 0.7543822849283273, iteration: 276283
loss: 1.0814754962921143,grad_norm: 0.9465947222980914, iteration: 276284
loss: 1.0118533372879028,grad_norm: 0.9901026674253472, iteration: 276285
loss: 0.9577497839927673,grad_norm: 0.7998503479513, iteration: 276286
loss: 1.0288408994674683,grad_norm: 0.9363333030111443, iteration: 276287
loss: 1.0213656425476074,grad_norm: 0.8948785988534113, iteration: 276288
loss: 0.9523856043815613,grad_norm: 0.871715453456798, iteration: 276289
loss: 0.9768611192703247,grad_norm: 0.7935558489510107, iteration: 276290
loss: 0.9897122383117676,grad_norm: 0.8442412205050704, iteration: 276291
loss: 0.9701268076896667,grad_norm: 0.9999990281766278, iteration: 276292
loss: 1.0085586309432983,grad_norm: 0.8741419182477723, iteration: 276293
loss: 0.9627730250358582,grad_norm: 0.7825690414948284, iteration: 276294
loss: 0.9936931133270264,grad_norm: 0.8437540013057528, iteration: 276295
loss: 0.9826430082321167,grad_norm: 0.8159048353581849, iteration: 276296
loss: 0.9982295632362366,grad_norm: 0.7749779684437026, iteration: 276297
loss: 0.9804547429084778,grad_norm: 0.9276833077497233, iteration: 276298
loss: 0.9859021306037903,grad_norm: 0.8968314077561468, iteration: 276299
loss: 0.985529363155365,grad_norm: 0.8260205118488779, iteration: 276300
loss: 0.984974205493927,grad_norm: 0.9262540893764265, iteration: 276301
loss: 0.9770085215568542,grad_norm: 0.8546202992364542, iteration: 276302
loss: 1.0145854949951172,grad_norm: 0.827707679386361, iteration: 276303
loss: 0.9945505857467651,grad_norm: 0.9440453335850426, iteration: 276304
loss: 1.039672613143921,grad_norm: 0.7734829548580103, iteration: 276305
loss: 1.0604947805404663,grad_norm: 0.8888713717878155, iteration: 276306
loss: 0.9970815181732178,grad_norm: 0.8338872554863987, iteration: 276307
loss: 0.9812025427818298,grad_norm: 0.7837798201277866, iteration: 276308
loss: 1.0229769945144653,grad_norm: 0.9999992599391926, iteration: 276309
loss: 1.017393946647644,grad_norm: 0.802569890977595, iteration: 276310
loss: 1.0154160261154175,grad_norm: 0.7838831634215189, iteration: 276311
loss: 0.9973704814910889,grad_norm: 0.829839627554189, iteration: 276312
loss: 1.0204991102218628,grad_norm: 0.8335729385566176, iteration: 276313
loss: 1.0149364471435547,grad_norm: 0.999999096068375, iteration: 276314
loss: 0.9893639087677002,grad_norm: 0.8735688954213952, iteration: 276315
loss: 0.977783739566803,grad_norm: 0.9028879285303568, iteration: 276316
loss: 1.0043787956237793,grad_norm: 0.9999992347322346, iteration: 276317
loss: 1.0092113018035889,grad_norm: 0.9999993327768998, iteration: 276318
loss: 1.0014607906341553,grad_norm: 0.8750175142692014, iteration: 276319
loss: 0.990226149559021,grad_norm: 0.7572024195055312, iteration: 276320
loss: 1.0234436988830566,grad_norm: 0.8680843185877873, iteration: 276321
loss: 0.9764843583106995,grad_norm: 0.9999991125103657, iteration: 276322
loss: 1.0267410278320312,grad_norm: 0.8025802436418233, iteration: 276323
loss: 0.9899185299873352,grad_norm: 0.8892566016126083, iteration: 276324
loss: 1.004949688911438,grad_norm: 0.9999992028189816, iteration: 276325
loss: 0.9912598729133606,grad_norm: 0.9378813944717229, iteration: 276326
loss: 1.030592918395996,grad_norm: 0.9999992686176715, iteration: 276327
loss: 1.0397658348083496,grad_norm: 0.9999992730153727, iteration: 276328
loss: 1.0034217834472656,grad_norm: 0.8792046996879296, iteration: 276329
loss: 1.0610854625701904,grad_norm: 0.999999400251341, iteration: 276330
loss: 0.9701761603355408,grad_norm: 0.907753228755543, iteration: 276331
loss: 1.008334755897522,grad_norm: 0.869816222100362, iteration: 276332
loss: 0.9916549324989319,grad_norm: 0.7983957880192764, iteration: 276333
loss: 0.9611043334007263,grad_norm: 0.8995426300005679, iteration: 276334
loss: 0.9876359105110168,grad_norm: 0.9359635798218228, iteration: 276335
loss: 1.0061484575271606,grad_norm: 0.9999990615580636, iteration: 276336
loss: 0.9938970804214478,grad_norm: 0.824904919111776, iteration: 276337
loss: 0.9795005321502686,grad_norm: 0.8504204245364578, iteration: 276338
loss: 0.9870606660842896,grad_norm: 0.9793367511776199, iteration: 276339
loss: 1.0102673768997192,grad_norm: 0.8213992347057055, iteration: 276340
loss: 0.9603993892669678,grad_norm: 0.8806423989319206, iteration: 276341
loss: 1.005488395690918,grad_norm: 0.8906735241745084, iteration: 276342
loss: 0.9835646748542786,grad_norm: 0.9094949561846277, iteration: 276343
loss: 0.9857797026634216,grad_norm: 0.853962224469777, iteration: 276344
loss: 1.0156692266464233,grad_norm: 0.7713440178076413, iteration: 276345
loss: 0.9796545505523682,grad_norm: 0.7588604220197842, iteration: 276346
loss: 1.006808876991272,grad_norm: 0.951184816921122, iteration: 276347
loss: 1.0254920721054077,grad_norm: 0.937221990866075, iteration: 276348
loss: 1.0129482746124268,grad_norm: 0.9234377820949321, iteration: 276349
loss: 1.011408805847168,grad_norm: 0.9011309756441455, iteration: 276350
loss: 0.9973776936531067,grad_norm: 0.8198444509737188, iteration: 276351
loss: 1.0207487344741821,grad_norm: 0.8640921445877872, iteration: 276352
loss: 1.0103436708450317,grad_norm: 0.8061199663666159, iteration: 276353
loss: 1.098770260810852,grad_norm: 0.9999990925908337, iteration: 276354
loss: 1.007088541984558,grad_norm: 0.8948807231102006, iteration: 276355
loss: 0.9842128753662109,grad_norm: 0.9267394091765268, iteration: 276356
loss: 1.0409733057022095,grad_norm: 0.9147767381128631, iteration: 276357
loss: 0.9974526166915894,grad_norm: 0.9999996935236537, iteration: 276358
loss: 1.0056123733520508,grad_norm: 0.9677814428326487, iteration: 276359
loss: 1.0116504430770874,grad_norm: 0.8836975415061822, iteration: 276360
loss: 1.0373855829238892,grad_norm: 0.9999993468951556, iteration: 276361
loss: 0.9946488738059998,grad_norm: 0.8453722038757195, iteration: 276362
loss: 1.0309544801712036,grad_norm: 0.8952312861310074, iteration: 276363
loss: 0.9804468750953674,grad_norm: 0.8957811244492941, iteration: 276364
loss: 1.0305297374725342,grad_norm: 0.9915182138913509, iteration: 276365
loss: 0.9766196608543396,grad_norm: 0.9999992588243799, iteration: 276366
loss: 0.9961917400360107,grad_norm: 0.9546464494157629, iteration: 276367
loss: 1.0187206268310547,grad_norm: 0.8431818389739103, iteration: 276368
loss: 0.979840099811554,grad_norm: 0.9428904053391575, iteration: 276369
loss: 0.9797642827033997,grad_norm: 0.9999990722890517, iteration: 276370
loss: 0.983367383480072,grad_norm: 0.7994554102181881, iteration: 276371
loss: 1.0160855054855347,grad_norm: 0.7838941193590032, iteration: 276372
loss: 1.1823416948318481,grad_norm: 0.9999994027265035, iteration: 276373
loss: 1.0870888233184814,grad_norm: 0.9999990978758315, iteration: 276374
loss: 0.9860610365867615,grad_norm: 0.855639045152145, iteration: 276375
loss: 0.9334213137626648,grad_norm: 0.938962996849662, iteration: 276376
loss: 1.0446913242340088,grad_norm: 0.9999991458367409, iteration: 276377
loss: 0.9703313112258911,grad_norm: 0.8122244624181306, iteration: 276378
loss: 0.9997832179069519,grad_norm: 0.9787422051570256, iteration: 276379
loss: 1.0470716953277588,grad_norm: 0.867877409841303, iteration: 276380
loss: 1.0174241065979004,grad_norm: 0.8175841695756241, iteration: 276381
loss: 0.9621652364730835,grad_norm: 0.6905114818630764, iteration: 276382
loss: 1.0076735019683838,grad_norm: 0.9880609118535816, iteration: 276383
loss: 1.0046626329421997,grad_norm: 0.8167581349615625, iteration: 276384
loss: 1.1021287441253662,grad_norm: 0.9999997216088247, iteration: 276385
loss: 1.016725778579712,grad_norm: 0.9398635935524453, iteration: 276386
loss: 1.008419394493103,grad_norm: 0.9494264774333192, iteration: 276387
loss: 1.0030293464660645,grad_norm: 0.8550517774748468, iteration: 276388
loss: 0.995784342288971,grad_norm: 0.9256703290310915, iteration: 276389
loss: 1.0706987380981445,grad_norm: 0.9999990863948705, iteration: 276390
loss: 1.102025032043457,grad_norm: 0.9999994885385026, iteration: 276391
loss: 1.0181430578231812,grad_norm: 0.8239145981394341, iteration: 276392
loss: 0.994376540184021,grad_norm: 0.771916386456459, iteration: 276393
loss: 0.9638851284980774,grad_norm: 0.8318071735965733, iteration: 276394
loss: 1.043736457824707,grad_norm: 0.8743524315981502, iteration: 276395
loss: 1.040107011795044,grad_norm: 0.8133761259219685, iteration: 276396
loss: 1.0807301998138428,grad_norm: 0.8686544718813222, iteration: 276397
loss: 1.0653854608535767,grad_norm: 0.784143077380338, iteration: 276398
loss: 0.9878095388412476,grad_norm: 0.9999990672980557, iteration: 276399
loss: 1.0480862855911255,grad_norm: 0.9999991442348264, iteration: 276400
loss: 0.963499903678894,grad_norm: 0.8734105855251618, iteration: 276401
loss: 0.9995165467262268,grad_norm: 0.8961923790020067, iteration: 276402
loss: 1.007766842842102,grad_norm: 0.7599551164783486, iteration: 276403
loss: 1.0820250511169434,grad_norm: 0.9999992541632796, iteration: 276404
loss: 1.00722074508667,grad_norm: 0.9650472775461925, iteration: 276405
loss: 1.0022093057632446,grad_norm: 0.888768641130573, iteration: 276406
loss: 1.0044742822647095,grad_norm: 0.8382941225300029, iteration: 276407
loss: 1.006090760231018,grad_norm: 0.8583195891552251, iteration: 276408
loss: 0.9756768345832825,grad_norm: 0.9502986165459766, iteration: 276409
loss: 1.0212825536727905,grad_norm: 0.9999995846485326, iteration: 276410
loss: 1.0184879302978516,grad_norm: 0.8931419302577085, iteration: 276411
loss: 0.9969552159309387,grad_norm: 0.8679549132380957, iteration: 276412
loss: 1.0140479803085327,grad_norm: 0.9999991335042553, iteration: 276413
loss: 1.1061238050460815,grad_norm: 0.9999997753829969, iteration: 276414
loss: 1.002817988395691,grad_norm: 0.8798583859264875, iteration: 276415
loss: 1.0018737316131592,grad_norm: 0.9838790621010046, iteration: 276416
loss: 1.0365426540374756,grad_norm: 0.9675517995040779, iteration: 276417
loss: 1.003235936164856,grad_norm: 0.897158499058234, iteration: 276418
loss: 0.9911534190177917,grad_norm: 0.8566709796672469, iteration: 276419
loss: 1.10360586643219,grad_norm: 0.9999998805050392, iteration: 276420
loss: 0.9624404907226562,grad_norm: 0.9999990584373647, iteration: 276421
loss: 1.011934757232666,grad_norm: 0.8278057939223697, iteration: 276422
loss: 0.9937566518783569,grad_norm: 0.9345688314597891, iteration: 276423
loss: 0.9985479116439819,grad_norm: 0.9560795140612075, iteration: 276424
loss: 1.0207089185714722,grad_norm: 0.8199690493749205, iteration: 276425
loss: 0.9899429678916931,grad_norm: 0.7796870293332038, iteration: 276426
loss: 1.0180058479309082,grad_norm: 0.7996594454307829, iteration: 276427
loss: 1.0522098541259766,grad_norm: 0.9999991769100214, iteration: 276428
loss: 1.0048109292984009,grad_norm: 0.9628885431800693, iteration: 276429
loss: 0.9803695678710938,grad_norm: 0.99999944053787, iteration: 276430
loss: 1.0117889642715454,grad_norm: 0.927015680438051, iteration: 276431
loss: 0.9813017249107361,grad_norm: 0.9093052974844511, iteration: 276432
loss: 0.9462929964065552,grad_norm: 0.8680519923380878, iteration: 276433
loss: 0.9918218851089478,grad_norm: 0.8663650684609958, iteration: 276434
loss: 1.0197484493255615,grad_norm: 0.9999997741799164, iteration: 276435
loss: 0.9959213733673096,grad_norm: 0.7710499594480204, iteration: 276436
loss: 0.9714645743370056,grad_norm: 0.866405527765892, iteration: 276437
loss: 1.0269615650177002,grad_norm: 0.9246740650798722, iteration: 276438
loss: 1.0050103664398193,grad_norm: 0.9999993677361443, iteration: 276439
loss: 1.0485886335372925,grad_norm: 0.9999994020975526, iteration: 276440
loss: 1.0051192045211792,grad_norm: 0.8156080547843615, iteration: 276441
loss: 1.0271939039230347,grad_norm: 0.8736896830641729, iteration: 276442
loss: 1.0385562181472778,grad_norm: 0.9410696469614455, iteration: 276443
loss: 0.9987030029296875,grad_norm: 0.9100410272011444, iteration: 276444
loss: 0.9833400249481201,grad_norm: 0.7439227966488631, iteration: 276445
loss: 0.9622228741645813,grad_norm: 0.9203129482986789, iteration: 276446
loss: 1.0186712741851807,grad_norm: 0.8377614188727732, iteration: 276447
loss: 1.0323060750961304,grad_norm: 0.7688125157344387, iteration: 276448
loss: 0.971154510974884,grad_norm: 0.9155786573472097, iteration: 276449
loss: 0.9775486588478088,grad_norm: 0.8879595883481008, iteration: 276450
loss: 1.0100419521331787,grad_norm: 0.8002291802642828, iteration: 276451
loss: 0.988107442855835,grad_norm: 0.8937392461819512, iteration: 276452
loss: 0.986201822757721,grad_norm: 0.9331772110973335, iteration: 276453
loss: 1.0415984392166138,grad_norm: 0.7499280822100778, iteration: 276454
loss: 1.078128695487976,grad_norm: 0.9999992583531508, iteration: 276455
loss: 1.0168325901031494,grad_norm: 0.8692330207116115, iteration: 276456
loss: 0.9813491702079773,grad_norm: 0.9387165294447851, iteration: 276457
loss: 1.0017269849777222,grad_norm: 0.7469585155834546, iteration: 276458
loss: 1.0129503011703491,grad_norm: 0.9999991230568621, iteration: 276459
loss: 0.9768521189689636,grad_norm: 0.7389207605218382, iteration: 276460
loss: 1.0089679956436157,grad_norm: 0.9999991097834721, iteration: 276461
loss: 1.029146432876587,grad_norm: 0.7656680735103087, iteration: 276462
loss: 0.9681631326675415,grad_norm: 0.9162182055956087, iteration: 276463
loss: 1.0582362413406372,grad_norm: 0.999999237153578, iteration: 276464
loss: 1.0153964757919312,grad_norm: 0.7245490054838476, iteration: 276465
loss: 0.9840354919433594,grad_norm: 0.9119023995261122, iteration: 276466
loss: 0.98052978515625,grad_norm: 0.945494680553598, iteration: 276467
loss: 1.0788984298706055,grad_norm: 0.9999991803914973, iteration: 276468
loss: 1.1083499193191528,grad_norm: 0.9999999550883896, iteration: 276469
loss: 0.9499579071998596,grad_norm: 0.9036588446524264, iteration: 276470
loss: 1.0252710580825806,grad_norm: 0.7850767605432236, iteration: 276471
loss: 0.9817569255828857,grad_norm: 0.8383189352179409, iteration: 276472
loss: 0.9787849187850952,grad_norm: 0.9999990959579766, iteration: 276473
loss: 1.0143699645996094,grad_norm: 0.9999998755105527, iteration: 276474
loss: 1.0655040740966797,grad_norm: 0.9929026797502091, iteration: 276475
loss: 1.0169038772583008,grad_norm: 0.7654117923329697, iteration: 276476
loss: 1.0180305242538452,grad_norm: 0.9999996287557592, iteration: 276477
loss: 0.9916996359825134,grad_norm: 0.9451722783525579, iteration: 276478
loss: 1.0370125770568848,grad_norm: 0.9999998126131727, iteration: 276479
loss: 1.0053836107254028,grad_norm: 0.753379782813742, iteration: 276480
loss: 1.0041351318359375,grad_norm: 0.7744392911256305, iteration: 276481
loss: 0.997275710105896,grad_norm: 0.8441044752247866, iteration: 276482
loss: 1.076177716255188,grad_norm: 0.9999990937271354, iteration: 276483
loss: 0.991701602935791,grad_norm: 0.8408869119234624, iteration: 276484
loss: 1.0111830234527588,grad_norm: 0.7761458267362792, iteration: 276485
loss: 0.9999897480010986,grad_norm: 0.9999993548571162, iteration: 276486
loss: 1.0290104150772095,grad_norm: 0.7733471934956488, iteration: 276487
loss: 1.029801845550537,grad_norm: 0.999999048197508, iteration: 276488
loss: 1.0779868364334106,grad_norm: 0.9109228909615678, iteration: 276489
loss: 1.0781939029693604,grad_norm: 0.9200443921688726, iteration: 276490
loss: 1.0673149824142456,grad_norm: 0.9999994354113505, iteration: 276491
loss: 1.1085922718048096,grad_norm: 0.9999995577832234, iteration: 276492
loss: 1.011897325515747,grad_norm: 0.9109100004142114, iteration: 276493
loss: 0.9768050312995911,grad_norm: 0.832866288612151, iteration: 276494
loss: 1.0155233144760132,grad_norm: 0.9999990706719237, iteration: 276495
loss: 1.030794620513916,grad_norm: 0.8408589091183873, iteration: 276496
loss: 0.9931471347808838,grad_norm: 0.9632346041862655, iteration: 276497
loss: 0.9991386532783508,grad_norm: 0.917805812921803, iteration: 276498
loss: 0.9892947673797607,grad_norm: 0.8044803081731989, iteration: 276499
loss: 0.991227924823761,grad_norm: 0.9999993514125798, iteration: 276500
loss: 1.0309449434280396,grad_norm: 0.999999157587485, iteration: 276501
loss: 1.0328094959259033,grad_norm: 0.7805784669270003, iteration: 276502
loss: 0.9956719279289246,grad_norm: 0.8842326512328942, iteration: 276503
loss: 0.9930997490882874,grad_norm: 0.9916757673772141, iteration: 276504
loss: 0.9946153163909912,grad_norm: 0.8617216077774755, iteration: 276505
loss: 0.9881194233894348,grad_norm: 0.9999990024115036, iteration: 276506
loss: 0.9558410048484802,grad_norm: 0.9999497080635863, iteration: 276507
loss: 1.0299519300460815,grad_norm: 0.9999990686132038, iteration: 276508
loss: 1.0036664009094238,grad_norm: 0.8653130468350149, iteration: 276509
loss: 0.9614890813827515,grad_norm: 0.8222299436445947, iteration: 276510
loss: 0.9709694981575012,grad_norm: 0.8010117656610064, iteration: 276511
loss: 1.0103532075881958,grad_norm: 0.9547198050136967, iteration: 276512
loss: 0.9969677925109863,grad_norm: 0.777738912179242, iteration: 276513
loss: 1.0238851308822632,grad_norm: 0.9182506732246016, iteration: 276514
loss: 0.9997891187667847,grad_norm: 0.9999991070146882, iteration: 276515
loss: 1.0409700870513916,grad_norm: 0.8242590385711448, iteration: 276516
loss: 1.0374900102615356,grad_norm: 0.9766618310383696, iteration: 276517
loss: 1.1097103357315063,grad_norm: 0.9691608659940458, iteration: 276518
loss: 1.0329691171646118,grad_norm: 0.8773784996548664, iteration: 276519
loss: 1.060638427734375,grad_norm: 0.9999991248974601, iteration: 276520
loss: 1.0100232362747192,grad_norm: 0.9592048031791605, iteration: 276521
loss: 1.034630537033081,grad_norm: 0.9999990200403718, iteration: 276522
loss: 1.0954489707946777,grad_norm: 0.897434604602498, iteration: 276523
loss: 0.9903238415718079,grad_norm: 0.939832655832617, iteration: 276524
loss: 1.006885051727295,grad_norm: 0.8788677907896199, iteration: 276525
loss: 1.0187090635299683,grad_norm: 0.7388696045576904, iteration: 276526
loss: 1.0446038246154785,grad_norm: 0.999999996049092, iteration: 276527
loss: 0.9924570322036743,grad_norm: 0.7548467902298734, iteration: 276528
loss: 1.212291955947876,grad_norm: 0.9999997251343803, iteration: 276529
loss: 1.0265566110610962,grad_norm: 0.9018935536344446, iteration: 276530
loss: 1.01321280002594,grad_norm: 0.8674328965013708, iteration: 276531
loss: 1.0518205165863037,grad_norm: 0.9999997443283334, iteration: 276532
loss: 0.9916372895240784,grad_norm: 0.9549477625040738, iteration: 276533
loss: 0.9567629098892212,grad_norm: 0.9880949634258751, iteration: 276534
loss: 0.9798796772956848,grad_norm: 0.8662026921570857, iteration: 276535
loss: 0.9951791763305664,grad_norm: 0.8914775198200949, iteration: 276536
loss: 1.1430941820144653,grad_norm: 0.9999994876072132, iteration: 276537
loss: 0.9986929297447205,grad_norm: 0.8600427523987311, iteration: 276538
loss: 1.0184015035629272,grad_norm: 0.9999991185744667, iteration: 276539
loss: 1.1057755947113037,grad_norm: 0.9999990414597946, iteration: 276540
loss: 1.0098844766616821,grad_norm: 0.9999992108936872, iteration: 276541
loss: 1.077680230140686,grad_norm: 0.9999994429261441, iteration: 276542
loss: 0.9967604875564575,grad_norm: 0.7540360730785897, iteration: 276543
loss: 0.9921520948410034,grad_norm: 0.8232751255223989, iteration: 276544
loss: 0.9944595694541931,grad_norm: 0.9191150808481352, iteration: 276545
loss: 1.0307896137237549,grad_norm: 0.9524560975421951, iteration: 276546
loss: 0.9915268421173096,grad_norm: 0.8079423743375873, iteration: 276547
loss: 1.024239420890808,grad_norm: 0.9999995484793985, iteration: 276548
loss: 0.985413670539856,grad_norm: 0.9999990454034358, iteration: 276549
loss: 1.0229356288909912,grad_norm: 0.9085682339204418, iteration: 276550
loss: 1.0182753801345825,grad_norm: 0.9938899813161877, iteration: 276551
loss: 0.9948402643203735,grad_norm: 0.925169653454174, iteration: 276552
loss: 0.9997967481613159,grad_norm: 0.7964545667964551, iteration: 276553
loss: 0.9863797426223755,grad_norm: 0.7265527555035216, iteration: 276554
loss: 0.9605916738510132,grad_norm: 0.792703419347575, iteration: 276555
loss: 1.0168125629425049,grad_norm: 0.8487405596928326, iteration: 276556
loss: 1.0273551940917969,grad_norm: 0.9999990586172793, iteration: 276557
loss: 0.993830144405365,grad_norm: 0.8630371124158696, iteration: 276558
loss: 1.0072587728500366,grad_norm: 0.999999210667675, iteration: 276559
loss: 1.002667784690857,grad_norm: 0.9363334935116284, iteration: 276560
loss: 0.9947294592857361,grad_norm: 0.9999990975572581, iteration: 276561
loss: 0.9933347702026367,grad_norm: 0.9999990856102352, iteration: 276562
loss: 1.01052987575531,grad_norm: 0.9999991033062149, iteration: 276563
loss: 1.020132303237915,grad_norm: 0.9986118521643734, iteration: 276564
loss: 0.9731529355049133,grad_norm: 0.871504409579067, iteration: 276565
loss: 1.00140380859375,grad_norm: 0.9999991684971357, iteration: 276566
loss: 1.0078914165496826,grad_norm: 0.9382845753629268, iteration: 276567
loss: 1.0285502672195435,grad_norm: 0.7508647840642916, iteration: 276568
loss: 0.986674964427948,grad_norm: 0.9258930946797375, iteration: 276569
loss: 1.0166188478469849,grad_norm: 0.848153338566125, iteration: 276570
loss: 0.9967252612113953,grad_norm: 0.9999998982439269, iteration: 276571
loss: 1.020620346069336,grad_norm: 0.9028068882236273, iteration: 276572
loss: 0.9962347149848938,grad_norm: 0.915515021984138, iteration: 276573
loss: 0.946751058101654,grad_norm: 0.8131420770879938, iteration: 276574
loss: 1.0377365350723267,grad_norm: 0.9452748378499649, iteration: 276575
loss: 1.0857614278793335,grad_norm: 0.9999996930311653, iteration: 276576
loss: 1.0032882690429688,grad_norm: 0.7278939225037583, iteration: 276577
loss: 1.022188663482666,grad_norm: 0.99999927590407, iteration: 276578
loss: 1.0468438863754272,grad_norm: 0.999999093638714, iteration: 276579
loss: 1.005898118019104,grad_norm: 0.7318709458564296, iteration: 276580
loss: 1.0116734504699707,grad_norm: 0.9067578241220496, iteration: 276581
loss: 1.0143237113952637,grad_norm: 0.9544954100757086, iteration: 276582
loss: 1.0031523704528809,grad_norm: 0.8737187898563891, iteration: 276583
loss: 1.023106336593628,grad_norm: 0.9458531598233466, iteration: 276584
loss: 1.017019271850586,grad_norm: 0.8921424515329888, iteration: 276585
loss: 0.9713366627693176,grad_norm: 0.8512587707837875, iteration: 276586
loss: 0.9720836281776428,grad_norm: 0.8467626633814216, iteration: 276587
loss: 1.0048850774765015,grad_norm: 0.8696588584920142, iteration: 276588
loss: 1.007516622543335,grad_norm: 0.9247095777919994, iteration: 276589
loss: 1.0382673740386963,grad_norm: 0.9187630071769466, iteration: 276590
loss: 0.9874547123908997,grad_norm: 0.8725382961713155, iteration: 276591
loss: 1.1248277425765991,grad_norm: 0.9999999261199598, iteration: 276592
loss: 1.015304684638977,grad_norm: 0.9999996427392648, iteration: 276593
loss: 0.9904122948646545,grad_norm: 0.9230102019494899, iteration: 276594
loss: 1.0031179189682007,grad_norm: 0.9067995133876491, iteration: 276595
loss: 1.0076323747634888,grad_norm: 0.9999998235512522, iteration: 276596
loss: 1.0087053775787354,grad_norm: 0.7243029063329253, iteration: 276597
loss: 1.0386475324630737,grad_norm: 0.9999997576695234, iteration: 276598
loss: 1.0223768949508667,grad_norm: 0.7698935851300023, iteration: 276599
loss: 1.0083963871002197,grad_norm: 0.8558694585161496, iteration: 276600
loss: 0.9744461178779602,grad_norm: 0.9085374139174399, iteration: 276601
loss: 1.0136789083480835,grad_norm: 0.8355573410306549, iteration: 276602
loss: 1.0052093267440796,grad_norm: 0.8519528662009092, iteration: 276603
loss: 1.0042345523834229,grad_norm: 0.8864861394880428, iteration: 276604
loss: 1.0056949853897095,grad_norm: 0.999999059390571, iteration: 276605
loss: 0.9822943210601807,grad_norm: 0.9480527348296672, iteration: 276606
loss: 0.9672591090202332,grad_norm: 0.9982822842034902, iteration: 276607
loss: 1.0415356159210205,grad_norm: 0.999998990590314, iteration: 276608
loss: 0.9901265501976013,grad_norm: 0.9999991678104332, iteration: 276609
loss: 1.0347039699554443,grad_norm: 0.9419909692482878, iteration: 276610
loss: 1.0255064964294434,grad_norm: 0.7765376734128624, iteration: 276611
loss: 1.0045191049575806,grad_norm: 0.9999991165930213, iteration: 276612
loss: 0.9954190850257874,grad_norm: 0.885134249585233, iteration: 276613
loss: 1.0351296663284302,grad_norm: 0.9999991993957387, iteration: 276614
loss: 0.9681089520454407,grad_norm: 0.9796594228270026, iteration: 276615
loss: 0.996745228767395,grad_norm: 0.7689333697526949, iteration: 276616
loss: 1.034074068069458,grad_norm: 0.7462288816633894, iteration: 276617
loss: 1.0288290977478027,grad_norm: 0.8191387532134885, iteration: 276618
loss: 0.9798319935798645,grad_norm: 0.7612973702866682, iteration: 276619
loss: 0.9546263813972473,grad_norm: 0.8036614613501983, iteration: 276620
loss: 0.9680945873260498,grad_norm: 0.8439603692056812, iteration: 276621
loss: 1.0246508121490479,grad_norm: 0.9227114713668887, iteration: 276622
loss: 0.9779607653617859,grad_norm: 0.8838124001926724, iteration: 276623
loss: 1.036820411682129,grad_norm: 0.8858241131963165, iteration: 276624
loss: 0.9952178001403809,grad_norm: 0.9999993237591882, iteration: 276625
loss: 1.0198503732681274,grad_norm: 0.9999995235815013, iteration: 276626
loss: 0.9940003752708435,grad_norm: 0.878282119980924, iteration: 276627
loss: 1.0029536485671997,grad_norm: 0.8814514548716811, iteration: 276628
loss: 0.9995412826538086,grad_norm: 0.8505273258447814, iteration: 276629
loss: 1.0034750699996948,grad_norm: 0.8019924979494403, iteration: 276630
loss: 1.016352653503418,grad_norm: 0.800613297711706, iteration: 276631
loss: 0.9982708692550659,grad_norm: 0.9541796736089464, iteration: 276632
loss: 1.0643506050109863,grad_norm: 0.9999995031095673, iteration: 276633
loss: 0.9951242208480835,grad_norm: 0.9999996226391679, iteration: 276634
loss: 1.0072044134140015,grad_norm: 0.9999991361501341, iteration: 276635
loss: 0.969535231590271,grad_norm: 0.8929129140560924, iteration: 276636
loss: 1.0043548345565796,grad_norm: 0.7831933055404282, iteration: 276637
loss: 1.0321582555770874,grad_norm: 0.7956343570531861, iteration: 276638
loss: 0.9948634505271912,grad_norm: 0.8602834306116762, iteration: 276639
loss: 1.003464698791504,grad_norm: 0.8580860751318427, iteration: 276640
loss: 1.0141783952713013,grad_norm: 0.9999995373205308, iteration: 276641
loss: 1.0101377964019775,grad_norm: 0.9999990805464268, iteration: 276642
loss: 1.0125889778137207,grad_norm: 0.9999992008421799, iteration: 276643
loss: 0.9716649055480957,grad_norm: 0.8354398589017531, iteration: 276644
loss: 1.0064963102340698,grad_norm: 0.957120548478867, iteration: 276645
loss: 0.9913157224655151,grad_norm: 0.9546931003008129, iteration: 276646
loss: 0.9993081092834473,grad_norm: 0.7974803232291947, iteration: 276647
loss: 1.069005012512207,grad_norm: 0.9999996913455976, iteration: 276648
loss: 1.0138169527053833,grad_norm: 0.9964416928653353, iteration: 276649
loss: 1.031922459602356,grad_norm: 0.7508702083724347, iteration: 276650
loss: 0.9794632196426392,grad_norm: 0.9265605208207048, iteration: 276651
loss: 1.0360804796218872,grad_norm: 0.9999995022691308, iteration: 276652
loss: 0.9963536262512207,grad_norm: 0.8153153908484935, iteration: 276653
loss: 1.022375464439392,grad_norm: 0.9346556696486832, iteration: 276654
loss: 0.9878619313240051,grad_norm: 0.836284735474824, iteration: 276655
loss: 1.0297813415527344,grad_norm: 0.8119387053751516, iteration: 276656
loss: 1.0370370149612427,grad_norm: 0.8768505078462683, iteration: 276657
loss: 0.996808648109436,grad_norm: 0.8340214517016732, iteration: 276658
loss: 0.9819779396057129,grad_norm: 0.999999201949315, iteration: 276659
loss: 0.9646194577217102,grad_norm: 0.7767319220085439, iteration: 276660
loss: 0.9990602731704712,grad_norm: 0.9207312889391526, iteration: 276661
loss: 0.9811412692070007,grad_norm: 0.883097891509995, iteration: 276662
loss: 1.0025275945663452,grad_norm: 0.9417403242627695, iteration: 276663
loss: 1.0175434350967407,grad_norm: 0.8303150594860266, iteration: 276664
loss: 0.990725576877594,grad_norm: 0.9915012324341437, iteration: 276665
loss: 1.0746735334396362,grad_norm: 0.9999992226233471, iteration: 276666
loss: 1.0187796354293823,grad_norm: 0.9999991118812842, iteration: 276667
loss: 1.016496181488037,grad_norm: 0.7739704285380747, iteration: 276668
loss: 1.0140902996063232,grad_norm: 0.6719034527393764, iteration: 276669
loss: 1.029288649559021,grad_norm: 0.8207420015929195, iteration: 276670
loss: 1.140647053718567,grad_norm: 0.9999995926519869, iteration: 276671
loss: 0.9997283816337585,grad_norm: 0.9414420796181238, iteration: 276672
loss: 0.9695908427238464,grad_norm: 0.9999990147595957, iteration: 276673
loss: 0.9990230798721313,grad_norm: 0.9188795549122202, iteration: 276674
loss: 1.0039308071136475,grad_norm: 0.7233794534236321, iteration: 276675
loss: 0.9977538585662842,grad_norm: 0.999999042107264, iteration: 276676
loss: 1.0151474475860596,grad_norm: 0.7692726319905729, iteration: 276677
loss: 1.0234454870224,grad_norm: 0.8138247430611361, iteration: 276678
loss: 1.0073935985565186,grad_norm: 0.9999990981568813, iteration: 276679
loss: 0.9960402250289917,grad_norm: 0.8478718759658013, iteration: 276680
loss: 0.9938353896141052,grad_norm: 0.7190162938161643, iteration: 276681
loss: 0.9983730316162109,grad_norm: 0.999999082899702, iteration: 276682
loss: 0.9907644391059875,grad_norm: 0.883891292393951, iteration: 276683
loss: 1.0069642066955566,grad_norm: 0.8649217743982773, iteration: 276684
loss: 0.9907453656196594,grad_norm: 0.7715459471087251, iteration: 276685
loss: 0.9608060717582703,grad_norm: 0.974230838069671, iteration: 276686
loss: 0.9875139594078064,grad_norm: 0.7607735369486239, iteration: 276687
loss: 0.9957171082496643,grad_norm: 0.9999991094837973, iteration: 276688
loss: 1.020798921585083,grad_norm: 0.9999992108070377, iteration: 276689
loss: 1.0012578964233398,grad_norm: 0.9444724819639667, iteration: 276690
loss: 1.0363373756408691,grad_norm: 0.9999997384860414, iteration: 276691
loss: 0.9942677021026611,grad_norm: 0.7482268947454377, iteration: 276692
loss: 0.9990707039833069,grad_norm: 0.9703030328320597, iteration: 276693
loss: 1.0144366025924683,grad_norm: 0.7947688391673124, iteration: 276694
loss: 1.081344723701477,grad_norm: 0.9999991717900343, iteration: 276695
loss: 1.0021021366119385,grad_norm: 0.88251360801438, iteration: 276696
loss: 0.9843382835388184,grad_norm: 0.9999991237902669, iteration: 276697
loss: 1.012570858001709,grad_norm: 0.7886850330586068, iteration: 276698
loss: 1.0258866548538208,grad_norm: 0.8369455934881274, iteration: 276699
loss: 1.0338271856307983,grad_norm: 0.9121036294488801, iteration: 276700
loss: 0.9997318387031555,grad_norm: 0.999999571791311, iteration: 276701
loss: 0.995964527130127,grad_norm: 0.8147801170528576, iteration: 276702
loss: 0.9954738020896912,grad_norm: 0.9999995635390391, iteration: 276703
loss: 0.9938810467720032,grad_norm: 0.850199832867305, iteration: 276704
loss: 1.0033763647079468,grad_norm: 0.8844896536698992, iteration: 276705
loss: 1.0331214666366577,grad_norm: 0.7619345217855444, iteration: 276706
loss: 1.0153934955596924,grad_norm: 0.7551482805220302, iteration: 276707
loss: 1.0190367698669434,grad_norm: 0.8658728111961199, iteration: 276708
loss: 0.9637469053268433,grad_norm: 0.9172446340534571, iteration: 276709
loss: 0.9866970181465149,grad_norm: 0.8873424253199836, iteration: 276710
loss: 1.0028612613677979,grad_norm: 0.8688931327284084, iteration: 276711
loss: 1.0231019258499146,grad_norm: 0.9213149143646868, iteration: 276712
loss: 1.000102162361145,grad_norm: 0.9999990505492611, iteration: 276713
loss: 1.027596116065979,grad_norm: 0.962692571611065, iteration: 276714
loss: 1.025559902191162,grad_norm: 0.9999991425530517, iteration: 276715
loss: 1.0090245008468628,grad_norm: 0.9999990779310907, iteration: 276716
loss: 1.0445343255996704,grad_norm: 0.9999998705000326, iteration: 276717
loss: 1.0237349271774292,grad_norm: 0.9259222305032517, iteration: 276718
loss: 0.9690322875976562,grad_norm: 0.8876904132581421, iteration: 276719
loss: 0.990361750125885,grad_norm: 0.8882277058271254, iteration: 276720
loss: 0.9944567084312439,grad_norm: 0.9563875124041523, iteration: 276721
loss: 1.0493873357772827,grad_norm: 0.9999992062320991, iteration: 276722
loss: 0.9893054962158203,grad_norm: 0.8077255727086615, iteration: 276723
loss: 0.9899823665618896,grad_norm: 0.8188132254684274, iteration: 276724
loss: 0.9930275678634644,grad_norm: 0.8022246609161093, iteration: 276725
loss: 1.002030611038208,grad_norm: 0.9999992139327043, iteration: 276726
loss: 0.9723624587059021,grad_norm: 0.765001860004131, iteration: 276727
loss: 1.137752652168274,grad_norm: 0.9999995529506679, iteration: 276728
loss: 1.05514395236969,grad_norm: 0.8369242199883249, iteration: 276729
loss: 0.9971234798431396,grad_norm: 0.8406245140742251, iteration: 276730
loss: 1.0189062356948853,grad_norm: 0.8717355848399102, iteration: 276731
loss: 1.000654935836792,grad_norm: 0.8086324868690626, iteration: 276732
loss: 1.0633481740951538,grad_norm: 0.9999999414154984, iteration: 276733
loss: 1.0329452753067017,grad_norm: 0.8650057522523322, iteration: 276734
loss: 1.0037380456924438,grad_norm: 0.7569499433507422, iteration: 276735
loss: 0.9919732809066772,grad_norm: 0.8579312558852563, iteration: 276736
loss: 1.0322407484054565,grad_norm: 0.8714940480177562, iteration: 276737
loss: 0.9855180382728577,grad_norm: 0.975886088886112, iteration: 276738
loss: 1.0328221321105957,grad_norm: 0.9508831917955517, iteration: 276739
loss: 0.9915400147438049,grad_norm: 0.9999990532642157, iteration: 276740
loss: 1.1084645986557007,grad_norm: 0.9999997322686286, iteration: 276741
loss: 0.9860966801643372,grad_norm: 0.7944216272162246, iteration: 276742
loss: 0.9984877109527588,grad_norm: 0.8808747262880352, iteration: 276743
loss: 0.991677463054657,grad_norm: 0.9502423829827016, iteration: 276744
loss: 1.136641502380371,grad_norm: 0.9999999913371037, iteration: 276745
loss: 1.0011686086654663,grad_norm: 0.856457916148937, iteration: 276746
loss: 0.9939653873443604,grad_norm: 0.8387040572054327, iteration: 276747
loss: 1.0622007846832275,grad_norm: 0.9999995112606492, iteration: 276748
loss: 1.045092225074768,grad_norm: 0.7897442380492625, iteration: 276749
loss: 0.9827657341957092,grad_norm: 0.6864016222445628, iteration: 276750
loss: 1.124387502670288,grad_norm: 0.9999996176177129, iteration: 276751
loss: 1.0607647895812988,grad_norm: 0.9999992946473325, iteration: 276752
loss: 1.021445631980896,grad_norm: 0.8530053846237164, iteration: 276753
loss: 0.9906364679336548,grad_norm: 0.806492030644183, iteration: 276754
loss: 0.9979656338691711,grad_norm: 0.8498413518023081, iteration: 276755
loss: 0.9901160597801208,grad_norm: 0.9189841887084746, iteration: 276756
loss: 1.006508231163025,grad_norm: 0.8917038256585652, iteration: 276757
loss: 0.9651791453361511,grad_norm: 0.8095146400422834, iteration: 276758
loss: 0.9956074953079224,grad_norm: 0.9965107294508968, iteration: 276759
loss: 0.989845335483551,grad_norm: 0.8312752129728956, iteration: 276760
loss: 1.1199052333831787,grad_norm: 1.0000000340993342, iteration: 276761
loss: 1.0234159231185913,grad_norm: 0.8231702285549355, iteration: 276762
loss: 0.9861799478530884,grad_norm: 0.8122536690942622, iteration: 276763
loss: 0.9862002730369568,grad_norm: 0.8165260578635337, iteration: 276764
loss: 0.9440336227416992,grad_norm: 0.8037527882105329, iteration: 276765
loss: 1.0124938488006592,grad_norm: 0.8260851076819156, iteration: 276766
loss: 1.1530190706253052,grad_norm: 0.9999999953347409, iteration: 276767
loss: 1.0590156316757202,grad_norm: 0.9999991128449518, iteration: 276768
loss: 1.0341014862060547,grad_norm: 0.9999993195607245, iteration: 276769
loss: 1.0758347511291504,grad_norm: 0.9999995278701027, iteration: 276770
loss: 1.0770293474197388,grad_norm: 0.9450490245043588, iteration: 276771
loss: 1.034163475036621,grad_norm: 0.9999995598095349, iteration: 276772
loss: 1.021049976348877,grad_norm: 0.7108800703744814, iteration: 276773
loss: 0.981480062007904,grad_norm: 0.9880569057369919, iteration: 276774
loss: 1.1228928565979004,grad_norm: 0.9999999695066779, iteration: 276775
loss: 1.016786813735962,grad_norm: 0.9484310483514443, iteration: 276776
loss: 0.9734885096549988,grad_norm: 0.999999039426103, iteration: 276777
loss: 1.0642240047454834,grad_norm: 0.8446100874924293, iteration: 276778
loss: 0.9745415449142456,grad_norm: 0.7476898543558167, iteration: 276779
loss: 0.9966681599617004,grad_norm: 0.8063925893326023, iteration: 276780
loss: 0.9945851564407349,grad_norm: 0.9844718253610399, iteration: 276781
loss: 0.9927234649658203,grad_norm: 0.999999135122646, iteration: 276782
loss: 0.9738415479660034,grad_norm: 0.9071161229862524, iteration: 276783
loss: 1.0261086225509644,grad_norm: 0.8554595195491927, iteration: 276784
loss: 1.0835251808166504,grad_norm: 0.9999990515269336, iteration: 276785
loss: 0.9397003650665283,grad_norm: 0.8638458445864747, iteration: 276786
loss: 0.9845213294029236,grad_norm: 0.8828632492973236, iteration: 276787
loss: 0.9876168966293335,grad_norm: 0.8762565514895254, iteration: 276788
loss: 1.008689284324646,grad_norm: 0.9836437976741553, iteration: 276789
loss: 1.0522783994674683,grad_norm: 0.9832803574583029, iteration: 276790
loss: 0.9736093878746033,grad_norm: 0.8283062815171804, iteration: 276791
loss: 0.9785512685775757,grad_norm: 0.7694785752405989, iteration: 276792
loss: 1.001257300376892,grad_norm: 0.7623080616526272, iteration: 276793
loss: 0.9821624755859375,grad_norm: 0.8668347099268052, iteration: 276794
loss: 0.9811089634895325,grad_norm: 0.9995060406833113, iteration: 276795
loss: 1.0972403287887573,grad_norm: 0.9492534246846447, iteration: 276796
loss: 1.019545078277588,grad_norm: 0.9999988755458299, iteration: 276797
loss: 0.9777351021766663,grad_norm: 0.8758396559347366, iteration: 276798
loss: 0.985360860824585,grad_norm: 0.7921112085174709, iteration: 276799
loss: 0.9928576946258545,grad_norm: 0.9301139949848785, iteration: 276800
loss: 1.0322366952896118,grad_norm: 0.8819233901063155, iteration: 276801
loss: 1.002854824066162,grad_norm: 0.8762697867750395, iteration: 276802
loss: 0.9601536989212036,grad_norm: 0.8181060654837145, iteration: 276803
loss: 0.9966682195663452,grad_norm: 0.9988825190732601, iteration: 276804
loss: 1.0195616483688354,grad_norm: 0.9843094346865776, iteration: 276805
loss: 1.0199565887451172,grad_norm: 0.7874627285493537, iteration: 276806
loss: 1.0049852132797241,grad_norm: 0.9133011388862246, iteration: 276807
loss: 0.9913460612297058,grad_norm: 0.8471645262487854, iteration: 276808
loss: 1.0234267711639404,grad_norm: 0.8904409916072326, iteration: 276809
loss: 0.9993587732315063,grad_norm: 0.7609382130521597, iteration: 276810
loss: 0.9516555666923523,grad_norm: 0.9242906022664974, iteration: 276811
loss: 1.1144318580627441,grad_norm: 0.9999997404941599, iteration: 276812
loss: 0.9892346262931824,grad_norm: 0.8717004252918894, iteration: 276813
loss: 1.0259268283843994,grad_norm: 0.9746318790379974, iteration: 276814
loss: 1.002400279045105,grad_norm: 0.9641983257244018, iteration: 276815
loss: 0.9686894416809082,grad_norm: 0.7478758094814881, iteration: 276816
loss: 1.000413179397583,grad_norm: 0.9999998236927995, iteration: 276817
loss: 0.9592565298080444,grad_norm: 0.8315139363063412, iteration: 276818
loss: 1.0404425859451294,grad_norm: 0.8226363945812722, iteration: 276819
loss: 0.9781869053840637,grad_norm: 0.9308814727509501, iteration: 276820
loss: 1.0204732418060303,grad_norm: 0.8323272065941455, iteration: 276821
loss: 1.0197893381118774,grad_norm: 0.8955306863643289, iteration: 276822
loss: 0.9885801076889038,grad_norm: 0.999998971908851, iteration: 276823
loss: 1.002436637878418,grad_norm: 0.9101885916325385, iteration: 276824
loss: 1.0347706079483032,grad_norm: 0.9819000425125741, iteration: 276825
loss: 1.0495373010635376,grad_norm: 0.9010823238606231, iteration: 276826
loss: 1.1081242561340332,grad_norm: 0.8547393693585994, iteration: 276827
loss: 0.9736739993095398,grad_norm: 0.9999996903311857, iteration: 276828
loss: 0.9633439779281616,grad_norm: 0.9999990435235723, iteration: 276829
loss: 0.9816214442253113,grad_norm: 0.7822419011988081, iteration: 276830
loss: 0.9995796084403992,grad_norm: 0.8253910764281752, iteration: 276831
loss: 1.0165457725524902,grad_norm: 0.8776060559067902, iteration: 276832
loss: 0.9906657338142395,grad_norm: 0.9294524855990502, iteration: 276833
loss: 1.0040005445480347,grad_norm: 0.863865968093782, iteration: 276834
loss: 1.0473748445510864,grad_norm: 0.9999991901197475, iteration: 276835
loss: 1.0267601013183594,grad_norm: 0.9495278119687953, iteration: 276836
loss: 1.0307588577270508,grad_norm: 0.9752505471105335, iteration: 276837
loss: 1.026972770690918,grad_norm: 0.9148301045797679, iteration: 276838
loss: 1.0136823654174805,grad_norm: 0.9725841474435714, iteration: 276839
loss: 1.02406907081604,grad_norm: 0.9999990638554032, iteration: 276840
loss: 0.9672886729240417,grad_norm: 0.6860876306864306, iteration: 276841
loss: 0.9949949383735657,grad_norm: 0.7257812250574305, iteration: 276842
loss: 1.0052204132080078,grad_norm: 0.8586065651229957, iteration: 276843
loss: 0.9875961542129517,grad_norm: 0.8514292327476809, iteration: 276844
loss: 0.9531283974647522,grad_norm: 0.9137748457173244, iteration: 276845
loss: 0.9860218167304993,grad_norm: 0.9946286340564499, iteration: 276846
loss: 1.0156683921813965,grad_norm: 0.9615112705404271, iteration: 276847
loss: 0.9731169939041138,grad_norm: 0.9250849145377621, iteration: 276848
loss: 1.0114326477050781,grad_norm: 0.8488902538938786, iteration: 276849
loss: 0.9803950786590576,grad_norm: 0.9999989645153938, iteration: 276850
loss: 1.099868655204773,grad_norm: 0.9999999154109614, iteration: 276851
loss: 0.9842495918273926,grad_norm: 0.9520494376496536, iteration: 276852
loss: 0.9608579874038696,grad_norm: 0.7520587903384715, iteration: 276853
loss: 0.9933426976203918,grad_norm: 0.9401519996244666, iteration: 276854
loss: 1.0160095691680908,grad_norm: 0.9999993133160887, iteration: 276855
loss: 1.0147751569747925,grad_norm: 0.7026669809102212, iteration: 276856
loss: 0.9971729516983032,grad_norm: 0.8837576656023481, iteration: 276857
loss: 0.982673168182373,grad_norm: 0.8565695719273273, iteration: 276858
loss: 1.1398755311965942,grad_norm: 1.0000000282583954, iteration: 276859
loss: 1.0008741617202759,grad_norm: 0.9744881242538115, iteration: 276860
loss: 0.9967862367630005,grad_norm: 0.7756179383096293, iteration: 276861
loss: 1.0006794929504395,grad_norm: 0.7942772426811213, iteration: 276862
loss: 1.0185251235961914,grad_norm: 0.8393850444401706, iteration: 276863
loss: 0.981252908706665,grad_norm: 0.9999990818971408, iteration: 276864
loss: 1.0545928478240967,grad_norm: 0.9999991864819048, iteration: 276865
loss: 1.0101298093795776,grad_norm: 0.8653665819641274, iteration: 276866
loss: 0.971556544303894,grad_norm: 0.7905819300186673, iteration: 276867
loss: 0.9941239356994629,grad_norm: 0.7422800237864198, iteration: 276868
loss: 0.986456573009491,grad_norm: 0.9999991907460033, iteration: 276869
loss: 0.9928317070007324,grad_norm: 0.789877102255845, iteration: 276870
loss: 1.046273946762085,grad_norm: 0.9999990780405463, iteration: 276871
loss: 0.9761949777603149,grad_norm: 0.9625204586773572, iteration: 276872
loss: 1.0223413705825806,grad_norm: 0.9522647559880633, iteration: 276873
loss: 1.0238291025161743,grad_norm: 0.9657504034793782, iteration: 276874
loss: 1.0005114078521729,grad_norm: 0.9999999222170464, iteration: 276875
loss: 1.000535249710083,grad_norm: 0.7743405730852854, iteration: 276876
loss: 0.997915506362915,grad_norm: 0.8513480185267653, iteration: 276877
loss: 1.069427490234375,grad_norm: 0.9999994660370962, iteration: 276878
loss: 0.9894963502883911,grad_norm: 0.8705519213918663, iteration: 276879
loss: 0.9974039196968079,grad_norm: 0.9999989269313249, iteration: 276880
loss: 1.0182490348815918,grad_norm: 0.8915450560824342, iteration: 276881
loss: 0.9737293124198914,grad_norm: 0.9195173283183344, iteration: 276882
loss: 1.00502347946167,grad_norm: 0.8216307602485505, iteration: 276883
loss: 1.0242502689361572,grad_norm: 0.7819928570181661, iteration: 276884
loss: 1.0087590217590332,grad_norm: 0.8948705991802338, iteration: 276885
loss: 1.0083510875701904,grad_norm: 0.9381957756232365, iteration: 276886
loss: 1.0025219917297363,grad_norm: 0.8162760298602286, iteration: 276887
loss: 0.9852104783058167,grad_norm: 0.8860660064452764, iteration: 276888
loss: 1.0949699878692627,grad_norm: 0.9982265674891231, iteration: 276889
loss: 1.0081005096435547,grad_norm: 0.7874617931779686, iteration: 276890
loss: 1.0676268339157104,grad_norm: 0.9999993397616034, iteration: 276891
loss: 1.0009492635726929,grad_norm: 0.9495147319645193, iteration: 276892
loss: 0.9932836294174194,grad_norm: 0.8408839961357418, iteration: 276893
loss: 1.0044667720794678,grad_norm: 0.6938747618169735, iteration: 276894
loss: 1.0226469039916992,grad_norm: 0.7113066646810351, iteration: 276895
loss: 1.0266432762145996,grad_norm: 0.8212298910431446, iteration: 276896
loss: 1.0155571699142456,grad_norm: 0.7600583961692956, iteration: 276897
loss: 1.006293535232544,grad_norm: 0.8780917008370619, iteration: 276898
loss: 1.1900947093963623,grad_norm: 0.9999995272256865, iteration: 276899
loss: 0.9708462953567505,grad_norm: 0.8026157049361047, iteration: 276900
loss: 0.9966076612472534,grad_norm: 0.9182074472847399, iteration: 276901
loss: 0.9531161189079285,grad_norm: 0.8143408466295297, iteration: 276902
loss: 1.0059925317764282,grad_norm: 0.8150926052630797, iteration: 276903
loss: 0.985821008682251,grad_norm: 0.8882870688951978, iteration: 276904
loss: 0.9831303954124451,grad_norm: 0.9999991452277084, iteration: 276905
loss: 0.9929623603820801,grad_norm: 0.7272789191115636, iteration: 276906
loss: 0.9951974153518677,grad_norm: 0.7460813821051507, iteration: 276907
loss: 1.0346815586090088,grad_norm: 0.9999991719440421, iteration: 276908
loss: 0.9941627383232117,grad_norm: 0.9999992547096105, iteration: 276909
loss: 1.0046948194503784,grad_norm: 0.8323590728848971, iteration: 276910
loss: 1.007111668586731,grad_norm: 0.7420866650273027, iteration: 276911
loss: 1.042232632637024,grad_norm: 0.9999994994974067, iteration: 276912
loss: 1.011523962020874,grad_norm: 0.9999992553470421, iteration: 276913
loss: 1.01059889793396,grad_norm: 0.8232373992653951, iteration: 276914
loss: 1.0058122873306274,grad_norm: 0.8592638394797829, iteration: 276915
loss: 0.9461113214492798,grad_norm: 0.8248800292876444, iteration: 276916
loss: 0.9647014141082764,grad_norm: 0.7616833877917465, iteration: 276917
loss: 1.0300894975662231,grad_norm: 0.9999998818415899, iteration: 276918
loss: 1.0149321556091309,grad_norm: 0.8602860380063317, iteration: 276919
loss: 1.0067874193191528,grad_norm: 0.9999995630814053, iteration: 276920
loss: 1.0530551671981812,grad_norm: 0.9999992053008596, iteration: 276921
loss: 0.9841014742851257,grad_norm: 0.8322734184234528, iteration: 276922
loss: 1.0195119380950928,grad_norm: 0.9999989736235873, iteration: 276923
loss: 0.9903290271759033,grad_norm: 0.7202809861706854, iteration: 276924
loss: 0.9630207419395447,grad_norm: 0.8866479175045513, iteration: 276925
loss: 0.9743499755859375,grad_norm: 0.9861679665946148, iteration: 276926
loss: 0.9955793023109436,grad_norm: 0.6405294938112397, iteration: 276927
loss: 1.031017541885376,grad_norm: 0.9999991237019616, iteration: 276928
loss: 0.9698494076728821,grad_norm: 0.9465502881952883, iteration: 276929
loss: 1.0190281867980957,grad_norm: 0.9258617375605124, iteration: 276930
loss: 1.029157042503357,grad_norm: 0.8738153626083335, iteration: 276931
loss: 1.0480655431747437,grad_norm: 0.9215490642509222, iteration: 276932
loss: 1.0247069597244263,grad_norm: 0.9377447825594534, iteration: 276933
loss: 0.9990028738975525,grad_norm: 0.9999997021716096, iteration: 276934
loss: 0.9939536452293396,grad_norm: 0.7851325200632936, iteration: 276935
loss: 1.002366304397583,grad_norm: 0.9999991175742045, iteration: 276936
loss: 1.0236409902572632,grad_norm: 0.9445711362816032, iteration: 276937
loss: 1.001303791999817,grad_norm: 0.9999991857173893, iteration: 276938
loss: 1.019037127494812,grad_norm: 0.9391966122890797, iteration: 276939
loss: 1.0130586624145508,grad_norm: 0.8772350841031308, iteration: 276940
loss: 0.988242506980896,grad_norm: 0.8996920158091387, iteration: 276941
loss: 0.9688794612884521,grad_norm: 0.9851432779025755, iteration: 276942
loss: 0.9821406006813049,grad_norm: 0.8828769244069032, iteration: 276943
loss: 0.9959346652030945,grad_norm: 0.7915700743764154, iteration: 276944
loss: 0.9906027317047119,grad_norm: 0.7529429269777411, iteration: 276945
loss: 0.9982209801673889,grad_norm: 0.914000382105889, iteration: 276946
loss: 1.0064916610717773,grad_norm: 0.8429722340927295, iteration: 276947
loss: 1.001646876335144,grad_norm: 0.7971036999732005, iteration: 276948
loss: 1.006020188331604,grad_norm: 0.8902703163344518, iteration: 276949
loss: 1.064557433128357,grad_norm: 0.9999992592874273, iteration: 276950
loss: 1.0710006952285767,grad_norm: 0.9999997553974187, iteration: 276951
loss: 1.0416591167449951,grad_norm: 0.8836764742532479, iteration: 276952
loss: 1.0583347082138062,grad_norm: 0.8544176820526652, iteration: 276953
loss: 0.9912896156311035,grad_norm: 0.88706231449815, iteration: 276954
loss: 1.0075814723968506,grad_norm: 0.7229255089617026, iteration: 276955
loss: 1.0582664012908936,grad_norm: 0.9207978337824869, iteration: 276956
loss: 0.9672842025756836,grad_norm: 0.9000666865839059, iteration: 276957
loss: 1.077989935874939,grad_norm: 0.999999373319799, iteration: 276958
loss: 1.0488674640655518,grad_norm: 0.8586567965013503, iteration: 276959
loss: 1.0235130786895752,grad_norm: 0.9999995200405628, iteration: 276960
loss: 0.9885775446891785,grad_norm: 0.8495277943061457, iteration: 276961
loss: 0.9852469563484192,grad_norm: 0.8648907912696046, iteration: 276962
loss: 1.0164166688919067,grad_norm: 0.7333933952198549, iteration: 276963
loss: 0.989136815071106,grad_norm: 0.9229852788334252, iteration: 276964
loss: 1.0096453428268433,grad_norm: 0.8753754542343283, iteration: 276965
loss: 0.9786916971206665,grad_norm: 0.8451692403186684, iteration: 276966
loss: 1.0110851526260376,grad_norm: 0.9074575560880771, iteration: 276967
loss: 0.9950305223464966,grad_norm: 0.7765297299349945, iteration: 276968
loss: 0.9605750441551208,grad_norm: 0.8423224903320528, iteration: 276969
loss: 1.003379225730896,grad_norm: 0.9346723542090076, iteration: 276970
loss: 1.0023494958877563,grad_norm: 0.9999991341002422, iteration: 276971
loss: 1.0103200674057007,grad_norm: 0.9761419930032271, iteration: 276972
loss: 0.9903727769851685,grad_norm: 0.874226833638842, iteration: 276973
loss: 1.010829210281372,grad_norm: 0.9672316902649246, iteration: 276974
loss: 1.0506370067596436,grad_norm: 0.9999993056017384, iteration: 276975
loss: 0.9959213137626648,grad_norm: 0.738493930683313, iteration: 276976
loss: 1.0061125755310059,grad_norm: 0.716520220809121, iteration: 276977
loss: 0.9669397473335266,grad_norm: 0.8068067540129676, iteration: 276978
loss: 0.9954943060874939,grad_norm: 0.9272210800652252, iteration: 276979
loss: 0.9954008460044861,grad_norm: 0.8632990315727084, iteration: 276980
loss: 1.0272375345230103,grad_norm: 0.9434929372196891, iteration: 276981
loss: 1.0033756494522095,grad_norm: 0.7643995860102004, iteration: 276982
loss: 1.1377791166305542,grad_norm: 0.9999997289910415, iteration: 276983
loss: 1.0264828205108643,grad_norm: 0.9999995465516079, iteration: 276984
loss: 0.9815323948860168,grad_norm: 0.999999101275108, iteration: 276985
loss: 1.023440957069397,grad_norm: 0.8701013567519595, iteration: 276986
loss: 1.004241704940796,grad_norm: 0.9853575899413405, iteration: 276987
loss: 0.9933034777641296,grad_norm: 0.7044272983922568, iteration: 276988
loss: 1.0994120836257935,grad_norm: 0.8734399219591106, iteration: 276989
loss: 0.9654629230499268,grad_norm: 0.9024047831228194, iteration: 276990
loss: 1.0163804292678833,grad_norm: 0.8828213098258275, iteration: 276991
loss: 1.0228290557861328,grad_norm: 0.9999991261239498, iteration: 276992
loss: 0.9401328563690186,grad_norm: 0.9999991020200912, iteration: 276993
loss: 1.013600468635559,grad_norm: 0.8058064302351032, iteration: 276994
loss: 0.9945796728134155,grad_norm: 0.9999991937900169, iteration: 276995
loss: 1.0024988651275635,grad_norm: 0.9999989812342017, iteration: 276996
loss: 0.9946517944335938,grad_norm: 0.9999992025974238, iteration: 276997
loss: 0.9949297904968262,grad_norm: 0.926139818670101, iteration: 276998
loss: 0.9558545351028442,grad_norm: 0.8351567648626774, iteration: 276999
loss: 0.9827027320861816,grad_norm: 0.9525552568046483, iteration: 277000
loss: 1.0508882999420166,grad_norm: 0.9999998560570531, iteration: 277001
loss: 1.0368268489837646,grad_norm: 0.8048349077121143, iteration: 277002
loss: 0.9775233268737793,grad_norm: 0.7517215181492327, iteration: 277003
loss: 0.9807249903678894,grad_norm: 0.8211174536728142, iteration: 277004
loss: 0.9537911415100098,grad_norm: 0.8184048266770223, iteration: 277005
loss: 0.979130208492279,grad_norm: 0.988227716265022, iteration: 277006
loss: 0.9975965619087219,grad_norm: 0.9999990968768186, iteration: 277007
loss: 1.0047059059143066,grad_norm: 0.8230624425636595, iteration: 277008
loss: 1.0257333517074585,grad_norm: 0.767584299206101, iteration: 277009
loss: 1.0073097944259644,grad_norm: 0.946183482725666, iteration: 277010
loss: 0.9941082000732422,grad_norm: 0.8683823734963927, iteration: 277011
loss: 0.993308961391449,grad_norm: 0.9999989760087188, iteration: 277012
loss: 0.928168773651123,grad_norm: 0.8526172895637605, iteration: 277013
loss: 1.0184155702590942,grad_norm: 0.8946755459037721, iteration: 277014
loss: 0.9847962856292725,grad_norm: 0.8493491413368939, iteration: 277015
loss: 0.9840434789657593,grad_norm: 0.8635998043949951, iteration: 277016
loss: 1.0462092161178589,grad_norm: 0.9999990253594336, iteration: 277017
loss: 0.9584189653396606,grad_norm: 0.7841676463489541, iteration: 277018
loss: 0.9979605078697205,grad_norm: 0.8678735146598426, iteration: 277019
loss: 0.9694214463233948,grad_norm: 0.7798480024797593, iteration: 277020
loss: 1.0065093040466309,grad_norm: 0.9810746995323782, iteration: 277021
loss: 1.0115575790405273,grad_norm: 0.8436752294252445, iteration: 277022
loss: 1.0028516054153442,grad_norm: 0.8786451699701523, iteration: 277023
loss: 1.0270224809646606,grad_norm: 0.8764142772440544, iteration: 277024
loss: 0.9606813788414001,grad_norm: 0.9416771690168322, iteration: 277025
loss: 1.010770559310913,grad_norm: 0.8130571665843964, iteration: 277026
loss: 0.9905350804328918,grad_norm: 0.8996093965734153, iteration: 277027
loss: 1.0118598937988281,grad_norm: 0.9062569073607765, iteration: 277028
loss: 1.0270477533340454,grad_norm: 0.9999990524076415, iteration: 277029
loss: 0.9810929298400879,grad_norm: 0.7954227351649245, iteration: 277030
loss: 1.0041049718856812,grad_norm: 0.7803061687588785, iteration: 277031
loss: 1.0841422080993652,grad_norm: 0.8512023320018504, iteration: 277032
loss: 1.0315946340560913,grad_norm: 0.9291482692079801, iteration: 277033
loss: 0.9648572206497192,grad_norm: 0.8523627180580375, iteration: 277034
loss: 1.0050585269927979,grad_norm: 0.8963046868197798, iteration: 277035
loss: 0.9799365401268005,grad_norm: 0.8906381787939146, iteration: 277036
loss: 0.9732497930526733,grad_norm: 0.81726705143003, iteration: 277037
loss: 1.032449722290039,grad_norm: 0.8878821822849, iteration: 277038
loss: 1.0087224245071411,grad_norm: 0.9605585961332788, iteration: 277039
loss: 0.9670802354812622,grad_norm: 0.8280562631952193, iteration: 277040
loss: 1.0507124662399292,grad_norm: 0.7215015318051592, iteration: 277041
loss: 1.0141160488128662,grad_norm: 0.9999991124260693, iteration: 277042
loss: 1.057347297668457,grad_norm: 0.9168835194695006, iteration: 277043
loss: 0.9847099781036377,grad_norm: 0.9999990410898124, iteration: 277044
loss: 1.0076810121536255,grad_norm: 0.849282717665843, iteration: 277045
loss: 1.014526128768921,grad_norm: 0.8240579510679225, iteration: 277046
loss: 1.0036755800247192,grad_norm: 0.7405112310789791, iteration: 277047
loss: 1.0046420097351074,grad_norm: 0.7921880868815608, iteration: 277048
loss: 1.01972496509552,grad_norm: 0.9662462440282129, iteration: 277049
loss: 0.9690739512443542,grad_norm: 0.7699956738715031, iteration: 277050
loss: 1.0227423906326294,grad_norm: 0.9999996324302407, iteration: 277051
loss: 0.9935706853866577,grad_norm: 0.8610435299826544, iteration: 277052
loss: 1.053604006767273,grad_norm: 0.9923785672746084, iteration: 277053
loss: 0.9760754108428955,grad_norm: 0.7541908439906659, iteration: 277054
loss: 0.9914820790290833,grad_norm: 0.8225488167271042, iteration: 277055
loss: 1.0357375144958496,grad_norm: 0.8093734996000347, iteration: 277056
loss: 0.9994363188743591,grad_norm: 0.9429939088383543, iteration: 277057
loss: 1.1099073886871338,grad_norm: 0.999999270198038, iteration: 277058
loss: 1.0145952701568604,grad_norm: 0.7175838812951549, iteration: 277059
loss: 0.9937978982925415,grad_norm: 0.7961786998162933, iteration: 277060
loss: 1.0150718688964844,grad_norm: 0.9473477560195014, iteration: 277061
loss: 0.9851741790771484,grad_norm: 0.8739600526130114, iteration: 277062
loss: 1.1225616931915283,grad_norm: 0.9999991875766966, iteration: 277063
loss: 1.0135616064071655,grad_norm: 0.766379373178927, iteration: 277064
loss: 0.9711865186691284,grad_norm: 0.9999991353913935, iteration: 277065
loss: 0.9847666621208191,grad_norm: 0.9193187786719997, iteration: 277066
loss: 0.9759303331375122,grad_norm: 0.8532876242793986, iteration: 277067
loss: 1.1125158071517944,grad_norm: 0.9999991063606598, iteration: 277068
loss: 1.0072288513183594,grad_norm: 0.912742803250023, iteration: 277069
loss: 1.024942398071289,grad_norm: 0.753917705378582, iteration: 277070
loss: 1.0028557777404785,grad_norm: 0.8970776319294237, iteration: 277071
loss: 1.0005664825439453,grad_norm: 0.905688702456822, iteration: 277072
loss: 0.9844624996185303,grad_norm: 0.9012126001832931, iteration: 277073
loss: 1.0047569274902344,grad_norm: 0.9999990475266166, iteration: 277074
loss: 1.0290323495864868,grad_norm: 0.9999992166361692, iteration: 277075
loss: 1.0054179430007935,grad_norm: 0.7114407472386105, iteration: 277076
loss: 0.9803323149681091,grad_norm: 0.9781294667685705, iteration: 277077
loss: 0.9929358959197998,grad_norm: 0.8283640757135606, iteration: 277078
loss: 1.0187811851501465,grad_norm: 0.797489424056564, iteration: 277079
loss: 0.9592626094818115,grad_norm: 0.807022330611976, iteration: 277080
loss: 0.9919195771217346,grad_norm: 0.8793036942814562, iteration: 277081
loss: 0.973616898059845,grad_norm: 0.8196777644992314, iteration: 277082
loss: 1.0609805583953857,grad_norm: 0.8095527721068287, iteration: 277083
loss: 0.969778835773468,grad_norm: 0.9791461548605371, iteration: 277084
loss: 1.015474557876587,grad_norm: 0.9104330804871897, iteration: 277085
loss: 0.984853982925415,grad_norm: 0.859403742488151, iteration: 277086
loss: 0.9725980758666992,grad_norm: 0.7755642885581083, iteration: 277087
loss: 1.038087248802185,grad_norm: 0.9541819807654173, iteration: 277088
loss: 1.0212515592575073,grad_norm: 0.7593417055808845, iteration: 277089
loss: 1.0188369750976562,grad_norm: 0.8465876402764593, iteration: 277090
loss: 1.0708229541778564,grad_norm: 0.9999999561233653, iteration: 277091
loss: 1.139570951461792,grad_norm: 0.9999994284488793, iteration: 277092
loss: 0.9811293482780457,grad_norm: 0.8923535291134718, iteration: 277093
loss: 1.0592173337936401,grad_norm: 0.9999997277517133, iteration: 277094
loss: 0.9487249851226807,grad_norm: 0.8056193883372063, iteration: 277095
loss: 0.978018581867218,grad_norm: 0.8390433800669956, iteration: 277096
loss: 1.022681474685669,grad_norm: 0.9265968552274606, iteration: 277097
loss: 1.0074408054351807,grad_norm: 0.7550036771860958, iteration: 277098
loss: 1.0323646068572998,grad_norm: 0.7594663131720736, iteration: 277099
loss: 1.01275634765625,grad_norm: 0.9602819027637011, iteration: 277100
loss: 1.0215907096862793,grad_norm: 0.8143364325217975, iteration: 277101
loss: 0.9638188481330872,grad_norm: 0.8466793898372672, iteration: 277102
loss: 0.979727566242218,grad_norm: 0.8094510226545831, iteration: 277103
loss: 1.0148531198501587,grad_norm: 0.875041450865801, iteration: 277104
loss: 0.967298686504364,grad_norm: 0.9766779012577834, iteration: 277105
loss: 1.0460718870162964,grad_norm: 0.9999990578050204, iteration: 277106
loss: 1.0833102464675903,grad_norm: 0.998129880001564, iteration: 277107
loss: 1.0222595930099487,grad_norm: 0.9999990521702052, iteration: 277108
loss: 1.049796223640442,grad_norm: 0.9809397543826391, iteration: 277109
loss: 0.9885637164115906,grad_norm: 0.9999990039172778, iteration: 277110
loss: 1.0405213832855225,grad_norm: 0.9999999510119676, iteration: 277111
loss: 0.9976843595504761,grad_norm: 0.832403069978115, iteration: 277112
loss: 0.9778476357460022,grad_norm: 0.8438537753253247, iteration: 277113
loss: 1.0563981533050537,grad_norm: 0.9999991717188196, iteration: 277114
loss: 1.0994222164154053,grad_norm: 0.9999990120106684, iteration: 277115
loss: 1.0354849100112915,grad_norm: 0.8106574694538372, iteration: 277116
loss: 1.0089759826660156,grad_norm: 0.9999998373131195, iteration: 277117
loss: 1.0460151433944702,grad_norm: 0.8793079899098639, iteration: 277118
loss: 0.9691572785377502,grad_norm: 0.846690246049056, iteration: 277119
loss: 1.0268397331237793,grad_norm: 0.9555556674213496, iteration: 277120
loss: 1.0091782808303833,grad_norm: 0.9999993956650588, iteration: 277121
loss: 0.9983770251274109,grad_norm: 0.8994453725020845, iteration: 277122
loss: 0.9899163246154785,grad_norm: 0.996041301294377, iteration: 277123
loss: 0.993777871131897,grad_norm: 0.8965823838712206, iteration: 277124
loss: 1.0970346927642822,grad_norm: 0.771257341060035, iteration: 277125
loss: 1.0607444047927856,grad_norm: 0.9999991700806194, iteration: 277126
loss: 0.9871638417243958,grad_norm: 0.8091243634749641, iteration: 277127
loss: 1.0016649961471558,grad_norm: 0.9471276445251046, iteration: 277128
loss: 1.0681592226028442,grad_norm: 1.0000000326534964, iteration: 277129
loss: 1.0638818740844727,grad_norm: 0.8764373685379963, iteration: 277130
loss: 1.0616977214813232,grad_norm: 0.9999991037108303, iteration: 277131
loss: 1.1068944931030273,grad_norm: 0.9999992428188968, iteration: 277132
loss: 0.9786636233329773,grad_norm: 0.9821134374788083, iteration: 277133
loss: 1.0256580114364624,grad_norm: 0.7958720367880068, iteration: 277134
loss: 1.0249333381652832,grad_norm: 0.8828132978713953, iteration: 277135
loss: 1.013291835784912,grad_norm: 0.9912922894721452, iteration: 277136
loss: 1.041882038116455,grad_norm: 0.9191511796783085, iteration: 277137
loss: 1.081485390663147,grad_norm: 0.8923292391003294, iteration: 277138
loss: 1.0112662315368652,grad_norm: 0.893953389549033, iteration: 277139
loss: 1.0584189891815186,grad_norm: 0.9999991924936688, iteration: 277140
loss: 1.0778745412826538,grad_norm: 0.9999990562707554, iteration: 277141
loss: 1.0163381099700928,grad_norm: 0.9999991444181375, iteration: 277142
loss: 0.9695572257041931,grad_norm: 0.9999990650696138, iteration: 277143
loss: 0.9675489068031311,grad_norm: 0.9850965839991832, iteration: 277144
loss: 0.9703534841537476,grad_norm: 0.8430164862654044, iteration: 277145
loss: 0.9960600137710571,grad_norm: 0.8594941598237369, iteration: 277146
loss: 1.017107605934143,grad_norm: 0.8331444912094443, iteration: 277147
loss: 1.0696146488189697,grad_norm: 0.9999991055613958, iteration: 277148
loss: 1.015803337097168,grad_norm: 0.9090123496435341, iteration: 277149
loss: 0.984264612197876,grad_norm: 0.9436709471663286, iteration: 277150
loss: 1.1092774868011475,grad_norm: 0.9999990969895171, iteration: 277151
loss: 1.0481513738632202,grad_norm: 0.9999994254214666, iteration: 277152
loss: 1.001604676246643,grad_norm: 0.9999989153083546, iteration: 277153
loss: 1.017065167427063,grad_norm: 0.8676898506462318, iteration: 277154
loss: 1.0649570226669312,grad_norm: 0.9999998771536719, iteration: 277155
loss: 1.047653079032898,grad_norm: 0.8481351449909715, iteration: 277156
loss: 0.979026734828949,grad_norm: 0.8523505818593308, iteration: 277157
loss: 1.001798391342163,grad_norm: 0.8208939379680117, iteration: 277158
loss: 1.0706995725631714,grad_norm: 0.9999991089677025, iteration: 277159
loss: 1.0308096408843994,grad_norm: 0.9999994678551298, iteration: 277160
loss: 0.9965753555297852,grad_norm: 0.9943797744641943, iteration: 277161
loss: 1.0298243761062622,grad_norm: 0.9129108693145906, iteration: 277162
loss: 1.0549700260162354,grad_norm: 0.9999992150233865, iteration: 277163
loss: 0.936455249786377,grad_norm: 0.9753604992316933, iteration: 277164
loss: 0.9995785355567932,grad_norm: 0.9611093494279292, iteration: 277165
loss: 1.1014506816864014,grad_norm: 0.9999990691931457, iteration: 277166
loss: 1.0765708684921265,grad_norm: 0.999998987889805, iteration: 277167
loss: 0.9607798457145691,grad_norm: 0.8971372221654527, iteration: 277168
loss: 1.0398588180541992,grad_norm: 0.999999239325305, iteration: 277169
loss: 1.0209661722183228,grad_norm: 0.934996803598818, iteration: 277170
loss: 1.069050908088684,grad_norm: 0.9371967996157409, iteration: 277171
loss: 1.0010995864868164,grad_norm: 0.925650516250323, iteration: 277172
loss: 1.1161545515060425,grad_norm: 0.9999998467826328, iteration: 277173
loss: 0.9791098237037659,grad_norm: 0.8411273088906767, iteration: 277174
loss: 1.048504114151001,grad_norm: 0.832620311254646, iteration: 277175
loss: 1.035162329673767,grad_norm: 0.9999990162020218, iteration: 277176
loss: 1.0024499893188477,grad_norm: 0.9049562628707164, iteration: 277177
loss: 1.1064351797103882,grad_norm: 0.9999994920298554, iteration: 277178
loss: 0.93262779712677,grad_norm: 0.7243673536631766, iteration: 277179
loss: 1.0906096696853638,grad_norm: 0.9999990956552035, iteration: 277180
loss: 1.002957820892334,grad_norm: 0.9594420736796392, iteration: 277181
loss: 1.0658392906188965,grad_norm: 0.9999994652512677, iteration: 277182
loss: 0.9391651749610901,grad_norm: 0.999999224170841, iteration: 277183
loss: 1.0536726713180542,grad_norm: 0.9999998690971774, iteration: 277184
loss: 1.065137267112732,grad_norm: 0.9999991357496417, iteration: 277185
loss: 1.0211710929870605,grad_norm: 0.9999996382194085, iteration: 277186
loss: 1.0336122512817383,grad_norm: 0.7991260824073937, iteration: 277187
loss: 1.0156368017196655,grad_norm: 0.9369217760356687, iteration: 277188
loss: 0.9693638682365417,grad_norm: 0.8233720540076834, iteration: 277189
loss: 1.1640969514846802,grad_norm: 0.9999994750740858, iteration: 277190
loss: 1.0036181211471558,grad_norm: 0.9999990074622958, iteration: 277191
loss: 1.0117331743240356,grad_norm: 0.8845885814992498, iteration: 277192
loss: 1.0443013906478882,grad_norm: 0.9999999735257301, iteration: 277193
loss: 1.0315629243850708,grad_norm: 0.9999995694210784, iteration: 277194
loss: 0.9538978338241577,grad_norm: 0.8897533228512134, iteration: 277195
loss: 0.9945478439331055,grad_norm: 0.9999990419829141, iteration: 277196
loss: 1.1331537961959839,grad_norm: 0.9433460624411096, iteration: 277197
loss: 0.9884815216064453,grad_norm: 0.8336812605745821, iteration: 277198
loss: 0.966683030128479,grad_norm: 0.883478166969485, iteration: 277199
loss: 0.9829294085502625,grad_norm: 0.9755559761152997, iteration: 277200
loss: 0.9886713624000549,grad_norm: 0.9418027604062953, iteration: 277201
loss: 0.976830005645752,grad_norm: 0.8365271228429756, iteration: 277202
loss: 1.0524308681488037,grad_norm: 0.999999539866134, iteration: 277203
loss: 1.0486119985580444,grad_norm: 0.9999992201228763, iteration: 277204
loss: 1.004899263381958,grad_norm: 0.9288600346469239, iteration: 277205
loss: 0.9905808568000793,grad_norm: 0.8503359375059494, iteration: 277206
loss: 0.9823073148727417,grad_norm: 0.9437193695465924, iteration: 277207
loss: 1.0908950567245483,grad_norm: 0.8721078181721726, iteration: 277208
loss: 0.9814832806587219,grad_norm: 0.9110500471169722, iteration: 277209
loss: 1.0396907329559326,grad_norm: 0.8046759857495744, iteration: 277210
loss: 1.0216752290725708,grad_norm: 0.9473922253571992, iteration: 277211
loss: 1.0555026531219482,grad_norm: 0.8800862190043641, iteration: 277212
loss: 1.0621205568313599,grad_norm: 0.9999995007750822, iteration: 277213
loss: 0.9911474585533142,grad_norm: 0.9999995193830579, iteration: 277214
loss: 0.99517422914505,grad_norm: 0.7911007873809361, iteration: 277215
loss: 0.964824378490448,grad_norm: 0.8157418796572938, iteration: 277216
loss: 1.045819640159607,grad_norm: 0.9951958131353783, iteration: 277217
loss: 1.029415249824524,grad_norm: 0.8936688819135981, iteration: 277218
loss: 0.9963531494140625,grad_norm: 0.8673249795035122, iteration: 277219
loss: 1.0955349206924438,grad_norm: 0.9999994381358206, iteration: 277220
loss: 1.0247652530670166,grad_norm: 0.8774733134307546, iteration: 277221
loss: 1.0421240329742432,grad_norm: 0.7941357077572321, iteration: 277222
loss: 1.0507169961929321,grad_norm: 0.9481022282935953, iteration: 277223
loss: 1.107578158378601,grad_norm: 0.9999991106311813, iteration: 277224
loss: 1.005846619606018,grad_norm: 0.9999996596602264, iteration: 277225
loss: 1.0495959520339966,grad_norm: 0.999999150537456, iteration: 277226
loss: 1.0133780241012573,grad_norm: 0.9999991616143569, iteration: 277227
loss: 0.9988895058631897,grad_norm: 0.8103668281674951, iteration: 277228
loss: 1.014796495437622,grad_norm: 0.919283471078733, iteration: 277229
loss: 1.0775986909866333,grad_norm: 0.9518492966820729, iteration: 277230
loss: 0.9822790622711182,grad_norm: 0.840936778697078, iteration: 277231
loss: 1.0139981508255005,grad_norm: 0.7724737354588118, iteration: 277232
loss: 1.0205705165863037,grad_norm: 0.9999990791661717, iteration: 277233
loss: 0.9910370111465454,grad_norm: 0.9999991106639802, iteration: 277234
loss: 1.0416125059127808,grad_norm: 0.8673482691853521, iteration: 277235
loss: 1.0382030010223389,grad_norm: 0.9999993583256952, iteration: 277236
loss: 1.0679041147232056,grad_norm: 0.9999999441340536, iteration: 277237
loss: 1.0062400102615356,grad_norm: 0.9999989917143014, iteration: 277238
loss: 1.027225136756897,grad_norm: 0.9999990544447177, iteration: 277239
loss: 1.0181186199188232,grad_norm: 0.9999998107539861, iteration: 277240
loss: 1.0065569877624512,grad_norm: 0.9999991751395217, iteration: 277241
loss: 1.0035839080810547,grad_norm: 0.9999992283298282, iteration: 277242
loss: 0.9802566766738892,grad_norm: 0.8807043191785328, iteration: 277243
loss: 0.9927489161491394,grad_norm: 0.9999991653688994, iteration: 277244
loss: 1.0879194736480713,grad_norm: 0.9757370600864267, iteration: 277245
loss: 1.0527229309082031,grad_norm: 0.999999795497207, iteration: 277246
loss: 1.0576378107070923,grad_norm: 0.9999991116914909, iteration: 277247
loss: 0.982125997543335,grad_norm: 0.7465804019380798, iteration: 277248
loss: 0.9941726326942444,grad_norm: 0.8365569539487184, iteration: 277249
loss: 0.9782909154891968,grad_norm: 0.9530657760543054, iteration: 277250
loss: 1.0752005577087402,grad_norm: 0.9999998127365403, iteration: 277251
loss: 1.0598143339157104,grad_norm: 1.0000000470568855, iteration: 277252
loss: 1.0471618175506592,grad_norm: 0.9999999390572305, iteration: 277253
loss: 1.0263727903366089,grad_norm: 0.9999993807562383, iteration: 277254
loss: 1.038781762123108,grad_norm: 0.8309127533516711, iteration: 277255
loss: 1.0091018676757812,grad_norm: 0.9999991491223468, iteration: 277256
loss: 0.9894290566444397,grad_norm: 0.9999990714955485, iteration: 277257
loss: 0.9881439208984375,grad_norm: 0.7764308214017889, iteration: 277258
loss: 1.0133841037750244,grad_norm: 0.9999990870568803, iteration: 277259
loss: 1.0008715391159058,grad_norm: 0.9690562029090626, iteration: 277260
loss: 1.031141757965088,grad_norm: 0.8880370074383226, iteration: 277261
loss: 1.0556691884994507,grad_norm: 0.9999995048402677, iteration: 277262
loss: 1.0804970264434814,grad_norm: 0.9225586860252583, iteration: 277263
loss: 1.0269415378570557,grad_norm: 0.9999991132375953, iteration: 277264
loss: 1.0480973720550537,grad_norm: 0.8212983845987769, iteration: 277265
loss: 1.0305938720703125,grad_norm: 0.751004402902436, iteration: 277266
loss: 0.9945253133773804,grad_norm: 0.88919989506501, iteration: 277267
loss: 1.0143097639083862,grad_norm: 0.9999990811905793, iteration: 277268
loss: 1.0372079610824585,grad_norm: 0.999999411253276, iteration: 277269
loss: 0.961912989616394,grad_norm: 0.8203661950998007, iteration: 277270
loss: 1.0751140117645264,grad_norm: 0.9203500392974971, iteration: 277271
loss: 0.9676395654678345,grad_norm: 0.7926001023599915, iteration: 277272
loss: 1.2453669309616089,grad_norm: 0.9999994895583513, iteration: 277273
loss: 1.054240107536316,grad_norm: 1.0000000071894777, iteration: 277274
loss: 0.9793440103530884,grad_norm: 0.903323927610919, iteration: 277275
loss: 0.9943018555641174,grad_norm: 0.7851613398040679, iteration: 277276
loss: 0.9805962443351746,grad_norm: 0.8362068238651693, iteration: 277277
loss: 1.0544445514678955,grad_norm: 0.9999995656697658, iteration: 277278
loss: 1.0402392148971558,grad_norm: 0.864623781941471, iteration: 277279
loss: 0.9996330738067627,grad_norm: 0.9999994983465806, iteration: 277280
loss: 1.0083409547805786,grad_norm: 0.9999995905521335, iteration: 277281
loss: 1.0861456394195557,grad_norm: 0.754358866230706, iteration: 277282
loss: 1.003477692604065,grad_norm: 0.9914732174897414, iteration: 277283
loss: 0.9888157844543457,grad_norm: 0.7531667181992172, iteration: 277284
loss: 0.9689345955848694,grad_norm: 0.9999989399809214, iteration: 277285
loss: 1.2674604654312134,grad_norm: 0.9999993667731109, iteration: 277286
loss: 1.0167874097824097,grad_norm: 0.9999991587911415, iteration: 277287
loss: 1.0283305644989014,grad_norm: 0.9999990722421292, iteration: 277288
loss: 0.9934124946594238,grad_norm: 0.8147230243903092, iteration: 277289
loss: 0.9946576356887817,grad_norm: 0.8333098220911208, iteration: 277290
loss: 1.1040055751800537,grad_norm: 0.9999992055818712, iteration: 277291
loss: 1.0885365009307861,grad_norm: 0.9999994452162133, iteration: 277292
loss: 1.0656379461288452,grad_norm: 0.9553925904112905, iteration: 277293
loss: 1.0239049196243286,grad_norm: 0.9999998433548511, iteration: 277294
loss: 1.0274897813796997,grad_norm: 0.8550648818296899, iteration: 277295
loss: 1.0387426614761353,grad_norm: 0.9712626259737549, iteration: 277296
loss: 1.043102741241455,grad_norm: 0.8381632051294394, iteration: 277297
loss: 1.0198745727539062,grad_norm: 0.9999991471160854, iteration: 277298
loss: 1.0025142431259155,grad_norm: 0.9999991106961389, iteration: 277299
loss: 1.016796588897705,grad_norm: 0.9757560652134907, iteration: 277300
loss: 0.9888734221458435,grad_norm: 0.9999991642822043, iteration: 277301
loss: 0.9977883100509644,grad_norm: 0.9361675464502939, iteration: 277302
loss: 0.9721277356147766,grad_norm: 0.8103951817665805, iteration: 277303
loss: 1.052071452140808,grad_norm: 0.9999991431494448, iteration: 277304
loss: 1.0127155780792236,grad_norm: 0.9502123679155915, iteration: 277305
loss: 0.9912111163139343,grad_norm: 0.8348489361499022, iteration: 277306
loss: 1.0365184545516968,grad_norm: 0.9680022966270211, iteration: 277307
loss: 1.0168873071670532,grad_norm: 0.9999991534397994, iteration: 277308
loss: 1.0221707820892334,grad_norm: 0.9999995468928421, iteration: 277309
loss: 0.9907495379447937,grad_norm: 0.7639074764005659, iteration: 277310
loss: 1.056909441947937,grad_norm: 0.8658438327825692, iteration: 277311
loss: 1.00547194480896,grad_norm: 0.9344761823831538, iteration: 277312
loss: 1.0966384410858154,grad_norm: 0.9999992929068462, iteration: 277313
loss: 1.0379462242126465,grad_norm: 0.9548493048243394, iteration: 277314
loss: 1.0016366243362427,grad_norm: 0.98075573092338, iteration: 277315
loss: 1.08275306224823,grad_norm: 0.8673069887831224, iteration: 277316
loss: 0.9540248513221741,grad_norm: 0.8892699728200185, iteration: 277317
loss: 1.043643593788147,grad_norm: 0.8654868698069026, iteration: 277318
loss: 0.9967107772827148,grad_norm: 0.8876564994271009, iteration: 277319
loss: 1.1136820316314697,grad_norm: 0.9423931411344025, iteration: 277320
loss: 1.0371876955032349,grad_norm: 0.9197501052954608, iteration: 277321
loss: 0.9922048449516296,grad_norm: 0.9241291424228119, iteration: 277322
loss: 1.037466049194336,grad_norm: 0.774254602965731, iteration: 277323
loss: 1.0687174797058105,grad_norm: 0.9999991188033905, iteration: 277324
loss: 1.0160151720046997,grad_norm: 0.9999992282743877, iteration: 277325
loss: 1.0187747478485107,grad_norm: 0.9620555042623818, iteration: 277326
loss: 0.9976561069488525,grad_norm: 0.9999994138064631, iteration: 277327
loss: 1.0052645206451416,grad_norm: 0.9415371787890209, iteration: 277328
loss: 0.9892305135726929,grad_norm: 0.8458745709246368, iteration: 277329
loss: 0.9902624487876892,grad_norm: 0.7309483482739701, iteration: 277330
loss: 0.990100085735321,grad_norm: 0.9775859829504882, iteration: 277331
loss: 1.019687294960022,grad_norm: 0.874151089023686, iteration: 277332
loss: 0.9840456247329712,grad_norm: 0.9309474640256781, iteration: 277333
loss: 1.0404497385025024,grad_norm: 0.9999991143260448, iteration: 277334
loss: 0.9995672106742859,grad_norm: 0.9999996219635916, iteration: 277335
loss: 0.9566844701766968,grad_norm: 0.8742500983175059, iteration: 277336
loss: 0.9989479184150696,grad_norm: 0.930156534052443, iteration: 277337
loss: 1.029641032218933,grad_norm: 0.8319414362217854, iteration: 277338
loss: 1.012555480003357,grad_norm: 0.9718227376482738, iteration: 277339
loss: 1.0314687490463257,grad_norm: 0.9999989006175733, iteration: 277340
loss: 1.0390455722808838,grad_norm: 0.9999991166610503, iteration: 277341
loss: 1.0156617164611816,grad_norm: 0.9552922608019694, iteration: 277342
loss: 1.245007872581482,grad_norm: 0.9999999989096767, iteration: 277343
loss: 0.984210729598999,grad_norm: 0.9444809120265721, iteration: 277344
loss: 0.9660407304763794,grad_norm: 0.8177874536643112, iteration: 277345
loss: 0.9569132924079895,grad_norm: 0.9237578454769608, iteration: 277346
loss: 0.9991340637207031,grad_norm: 0.8345544904483064, iteration: 277347
loss: 1.071931004524231,grad_norm: 0.9999991682641882, iteration: 277348
loss: 0.9575107097625732,grad_norm: 0.8411277748985497, iteration: 277349
loss: 0.9848640561103821,grad_norm: 0.9181776582698031, iteration: 277350
loss: 1.0281240940093994,grad_norm: 0.8780384372289498, iteration: 277351
loss: 1.0486183166503906,grad_norm: 0.9001720638608184, iteration: 277352
loss: 1.0038799047470093,grad_norm: 0.9459581658950277, iteration: 277353
loss: 1.1115161180496216,grad_norm: 0.9999993471065675, iteration: 277354
loss: 1.0213730335235596,grad_norm: 0.8894107854681226, iteration: 277355
loss: 0.9955549240112305,grad_norm: 0.8495718045069938, iteration: 277356
loss: 1.0713831186294556,grad_norm: 0.9999996370465309, iteration: 277357
loss: 1.0088468790054321,grad_norm: 0.9115188490667787, iteration: 277358
loss: 1.0064204931259155,grad_norm: 0.8535124172361166, iteration: 277359
loss: 0.9609678983688354,grad_norm: 0.9071970108372532, iteration: 277360
loss: 1.0401768684387207,grad_norm: 0.6961659472987692, iteration: 277361
loss: 1.027793049812317,grad_norm: 0.8247630547679319, iteration: 277362
loss: 1.031882643699646,grad_norm: 0.921561439830772, iteration: 277363
loss: 0.9626370668411255,grad_norm: 0.8404252376586888, iteration: 277364
loss: 1.0003607273101807,grad_norm: 0.9332037799941384, iteration: 277365
loss: 0.9923952221870422,grad_norm: 0.7546187573298995, iteration: 277366
loss: 0.9753975868225098,grad_norm: 0.8168114592038039, iteration: 277367
loss: 1.1174544095993042,grad_norm: 0.9581209162385566, iteration: 277368
loss: 1.0074068307876587,grad_norm: 0.9999995560277972, iteration: 277369
loss: 0.9665036797523499,grad_norm: 0.9999992149369512, iteration: 277370
loss: 0.9999903440475464,grad_norm: 0.8412049088124522, iteration: 277371
loss: 0.9978266954421997,grad_norm: 0.9999991359891598, iteration: 277372
loss: 0.9817730784416199,grad_norm: 0.9020736446849891, iteration: 277373
loss: 0.9830114841461182,grad_norm: 0.963666363648174, iteration: 277374
loss: 1.0383793115615845,grad_norm: 0.9999996024055007, iteration: 277375
loss: 1.0924689769744873,grad_norm: 0.9999992981684386, iteration: 277376
loss: 0.9964967966079712,grad_norm: 0.8326195624347063, iteration: 277377
loss: 0.9811970591545105,grad_norm: 0.937920133605072, iteration: 277378
loss: 1.0003588199615479,grad_norm: 0.9315176374230617, iteration: 277379
loss: 1.032857060432434,grad_norm: 0.8270253505724618, iteration: 277380
loss: 0.9774826169013977,grad_norm: 0.9515102941874415, iteration: 277381
loss: 0.9988905787467957,grad_norm: 0.7798321028056923, iteration: 277382
loss: 1.0829370021820068,grad_norm: 0.9999994408564269, iteration: 277383
loss: 1.0444858074188232,grad_norm: 0.9999992754158908, iteration: 277384
loss: 1.0041896104812622,grad_norm: 0.8542093623138048, iteration: 277385
loss: 1.0178838968276978,grad_norm: 0.9999996115668537, iteration: 277386
loss: 0.9980875253677368,grad_norm: 0.7824576535181546, iteration: 277387
loss: 0.9996153712272644,grad_norm: 0.9463111771849734, iteration: 277388
loss: 0.9696921706199646,grad_norm: 0.8629493840799906, iteration: 277389
loss: 0.9990025758743286,grad_norm: 0.8699723520451837, iteration: 277390
loss: 0.9340641498565674,grad_norm: 0.9999990301335735, iteration: 277391
loss: 1.0062665939331055,grad_norm: 0.8477717387385869, iteration: 277392
loss: 0.9817149639129639,grad_norm: 0.7515324767205086, iteration: 277393
loss: 0.9605497121810913,grad_norm: 0.9077047978969784, iteration: 277394
loss: 1.0273422002792358,grad_norm: 0.9258800794174535, iteration: 277395
loss: 1.0140095949172974,grad_norm: 0.9999992693836176, iteration: 277396
loss: 1.0630966424942017,grad_norm: 0.8401164427662551, iteration: 277397
loss: 0.9970236420631409,grad_norm: 0.9701051452627103, iteration: 277398
loss: 0.9673100709915161,grad_norm: 0.9034274848578037, iteration: 277399
loss: 0.9920764565467834,grad_norm: 0.9031973013023585, iteration: 277400
loss: 1.0330946445465088,grad_norm: 0.9308810090418965, iteration: 277401
loss: 1.0045323371887207,grad_norm: 0.8495279017416251, iteration: 277402
loss: 1.0065363645553589,grad_norm: 0.7886721147313922, iteration: 277403
loss: 0.9808309078216553,grad_norm: 0.917651783024129, iteration: 277404
loss: 1.0280616283416748,grad_norm: 0.8421923117084187, iteration: 277405
loss: 1.033371925354004,grad_norm: 0.837389155713199, iteration: 277406
loss: 1.0291616916656494,grad_norm: 0.9999991074273124, iteration: 277407
loss: 0.9999517202377319,grad_norm: 0.896130414284635, iteration: 277408
loss: 1.0772768259048462,grad_norm: 0.9303690559300536, iteration: 277409
loss: 0.9906044602394104,grad_norm: 0.6852820990553826, iteration: 277410
loss: 1.0174152851104736,grad_norm: 0.8838195169956751, iteration: 277411
loss: 1.0248392820358276,grad_norm: 0.8324055894441137, iteration: 277412
loss: 1.0338689088821411,grad_norm: 0.9613050523002491, iteration: 277413
loss: 1.0214089155197144,grad_norm: 0.9982851392599664, iteration: 277414
loss: 1.050780177116394,grad_norm: 0.9672102577670145, iteration: 277415
loss: 1.0097676515579224,grad_norm: 0.996780810958658, iteration: 277416
loss: 0.9796845316886902,grad_norm: 0.9999991267275473, iteration: 277417
loss: 1.0015807151794434,grad_norm: 0.8849509744342416, iteration: 277418
loss: 0.9931251406669617,grad_norm: 0.9999995898226305, iteration: 277419
loss: 1.0401490926742554,grad_norm: 0.8109033887050354, iteration: 277420
loss: 0.9534308314323425,grad_norm: 0.9513616074814263, iteration: 277421
loss: 0.9906624555587769,grad_norm: 0.9802285849334127, iteration: 277422
loss: 0.9926411509513855,grad_norm: 0.9282287086866401, iteration: 277423
loss: 1.0025272369384766,grad_norm: 0.9223249874025184, iteration: 277424
loss: 1.079970121383667,grad_norm: 0.9999997308123035, iteration: 277425
loss: 0.9848505854606628,grad_norm: 0.7492384959258052, iteration: 277426
loss: 1.0331838130950928,grad_norm: 0.999999403619952, iteration: 277427
loss: 1.0022144317626953,grad_norm: 0.7665535332051951, iteration: 277428
loss: 0.9954035878181458,grad_norm: 0.907585145649235, iteration: 277429
loss: 0.9787338972091675,grad_norm: 0.9999990854543181, iteration: 277430
loss: 1.0337110757827759,grad_norm: 0.8876573392216747, iteration: 277431
loss: 0.9840186834335327,grad_norm: 0.999999014108108, iteration: 277432
loss: 0.9782907962799072,grad_norm: 0.79708551961225, iteration: 277433
loss: 0.9994931221008301,grad_norm: 0.8678302146323609, iteration: 277434
loss: 0.9888452887535095,grad_norm: 0.8364170432205765, iteration: 277435
loss: 0.9905844330787659,grad_norm: 0.9002796193132879, iteration: 277436
loss: 1.0379149913787842,grad_norm: 0.9999995341296849, iteration: 277437
loss: 0.966770589351654,grad_norm: 0.7891478781780267, iteration: 277438
loss: 1.0101048946380615,grad_norm: 0.999999239695962, iteration: 277439
loss: 1.0355184078216553,grad_norm: 0.8612718621501612, iteration: 277440
loss: 0.9938462376594543,grad_norm: 0.8387988031554668, iteration: 277441
loss: 1.0261056423187256,grad_norm: 0.8072855672942973, iteration: 277442
loss: 0.989274799823761,grad_norm: 0.9381337516959279, iteration: 277443
loss: 1.1027350425720215,grad_norm: 0.999999024842938, iteration: 277444
loss: 1.0261738300323486,grad_norm: 0.8399688383336971, iteration: 277445
loss: 1.016053557395935,grad_norm: 0.8114161737221783, iteration: 277446
loss: 1.17848539352417,grad_norm: 0.9999995263001944, iteration: 277447
loss: 1.0027689933776855,grad_norm: 0.9067286642655918, iteration: 277448
loss: 1.0273793935775757,grad_norm: 0.9999992763530442, iteration: 277449
loss: 0.9420886635780334,grad_norm: 0.9713022598875054, iteration: 277450
loss: 1.0283210277557373,grad_norm: 0.8569338935324015, iteration: 277451
loss: 1.001293420791626,grad_norm: 0.9655184133839955, iteration: 277452
loss: 1.0037082433700562,grad_norm: 0.828060554579611, iteration: 277453
loss: 0.9926083087921143,grad_norm: 0.814970951812253, iteration: 277454
loss: 1.1459790468215942,grad_norm: 0.9999997308364376, iteration: 277455
loss: 0.9807807803153992,grad_norm: 0.8570396151215902, iteration: 277456
loss: 1.0186266899108887,grad_norm: 0.9999990204930729, iteration: 277457
loss: 1.1819064617156982,grad_norm: 0.9999993726831676, iteration: 277458
loss: 1.0766383409500122,grad_norm: 0.9999999555707029, iteration: 277459
loss: 0.9862127900123596,grad_norm: 0.9714932044188301, iteration: 277460
loss: 1.0355393886566162,grad_norm: 0.9999994175285692, iteration: 277461
loss: 1.0496493577957153,grad_norm: 0.9999997550481274, iteration: 277462
loss: 0.9963371157646179,grad_norm: 0.785439459153997, iteration: 277463
loss: 1.1881221532821655,grad_norm: 0.9999999371862965, iteration: 277464
loss: 0.9953498244285583,grad_norm: 0.8038718220989257, iteration: 277465
loss: 0.9958648085594177,grad_norm: 0.9999992428865115, iteration: 277466
loss: 1.0384302139282227,grad_norm: 0.8373195171610146, iteration: 277467
loss: 0.9945454597473145,grad_norm: 0.9442115704030136, iteration: 277468
loss: 0.9872182011604309,grad_norm: 0.9785839988558747, iteration: 277469
loss: 0.9978063702583313,grad_norm: 0.9043943045722811, iteration: 277470
loss: 0.9941149950027466,grad_norm: 0.9999990555383745, iteration: 277471
loss: 1.0761666297912598,grad_norm: 0.9999992395678797, iteration: 277472
loss: 0.9635143280029297,grad_norm: 0.7968770182922545, iteration: 277473
loss: 0.9606255292892456,grad_norm: 0.8490321132950849, iteration: 277474
loss: 1.0482736825942993,grad_norm: 0.9999990366615519, iteration: 277475
loss: 1.0398157835006714,grad_norm: 0.9982551250983482, iteration: 277476
loss: 0.9881482124328613,grad_norm: 0.8844352177526831, iteration: 277477
loss: 1.024624228477478,grad_norm: 0.9108058391652759, iteration: 277478
loss: 0.9904597997665405,grad_norm: 0.8565180157449416, iteration: 277479
loss: 0.9846129417419434,grad_norm: 0.8564623642417335, iteration: 277480
loss: 1.0044662952423096,grad_norm: 0.9999990529648648, iteration: 277481
loss: 0.9954193234443665,grad_norm: 0.8832059878923858, iteration: 277482
loss: 0.9996956586837769,grad_norm: 0.9999992347369009, iteration: 277483
loss: 1.001296043395996,grad_norm: 0.8806796977288595, iteration: 277484
loss: 1.0484213829040527,grad_norm: 0.9215974966739803, iteration: 277485
loss: 1.0268882513046265,grad_norm: 0.8464031019614034, iteration: 277486
loss: 1.0446445941925049,grad_norm: 0.9604383111025115, iteration: 277487
loss: 0.9923934936523438,grad_norm: 0.9999991038877882, iteration: 277488
loss: 0.9869067668914795,grad_norm: 0.8472618468040942, iteration: 277489
loss: 0.9593468308448792,grad_norm: 0.7944803986086441, iteration: 277490
loss: 1.0213613510131836,grad_norm: 0.9999989988024577, iteration: 277491
loss: 0.9961004853248596,grad_norm: 0.8893360488969443, iteration: 277492
loss: 1.016191005706787,grad_norm: 0.8765012194434453, iteration: 277493
loss: 1.035028338432312,grad_norm: 0.7992727599789086, iteration: 277494
loss: 1.0031987428665161,grad_norm: 0.9999993283760753, iteration: 277495
loss: 1.002241611480713,grad_norm: 0.7803622145593511, iteration: 277496
loss: 1.0163170099258423,grad_norm: 0.7889445732847167, iteration: 277497
loss: 0.9893157482147217,grad_norm: 0.999999135267561, iteration: 277498
loss: 1.0048880577087402,grad_norm: 0.9999993007022365, iteration: 277499
loss: 0.9889768958091736,grad_norm: 0.8615499791247082, iteration: 277500
loss: 1.0403062105178833,grad_norm: 0.8374591496239673, iteration: 277501
loss: 0.9933900237083435,grad_norm: 0.8598358801470003, iteration: 277502
loss: 1.0550296306610107,grad_norm: 0.9999994338618611, iteration: 277503
loss: 0.9904744029045105,grad_norm: 0.9999989338446295, iteration: 277504
loss: 0.990513801574707,grad_norm: 0.781538789290994, iteration: 277505
loss: 1.0349817276000977,grad_norm: 0.9999992448942845, iteration: 277506
loss: 0.9902898669242859,grad_norm: 0.8493178521472765, iteration: 277507
loss: 1.0162451267242432,grad_norm: 0.9999991512280696, iteration: 277508
loss: 1.0269153118133545,grad_norm: 0.85491620521991, iteration: 277509
loss: 1.0040638446807861,grad_norm: 0.8984190884327334, iteration: 277510
loss: 1.0440231561660767,grad_norm: 0.8338597977317701, iteration: 277511
loss: 1.0116956233978271,grad_norm: 0.9183485136972793, iteration: 277512
loss: 0.9848909974098206,grad_norm: 0.9478066855631854, iteration: 277513
loss: 0.996684730052948,grad_norm: 0.9999991165996875, iteration: 277514
loss: 0.993269681930542,grad_norm: 0.9140730111803981, iteration: 277515
loss: 1.0592509508132935,grad_norm: 0.9046930808620203, iteration: 277516
loss: 0.9938300251960754,grad_norm: 0.8353378957414289, iteration: 277517
loss: 1.0060323476791382,grad_norm: 0.7828911001371338, iteration: 277518
loss: 1.0045067071914673,grad_norm: 0.9999995455519809, iteration: 277519
loss: 0.9698808193206787,grad_norm: 0.7790339613297683, iteration: 277520
loss: 1.0151394605636597,grad_norm: 0.9999991479855251, iteration: 277521
loss: 0.9852438569068909,grad_norm: 0.9999990271647002, iteration: 277522
loss: 1.036684274673462,grad_norm: 0.924551035356918, iteration: 277523
loss: 0.9825261235237122,grad_norm: 0.7743475244947459, iteration: 277524
loss: 0.9759459495544434,grad_norm: 0.999997908164878, iteration: 277525
loss: 1.0106717348098755,grad_norm: 0.7408460886461313, iteration: 277526
loss: 1.014662742614746,grad_norm: 0.9999989888564803, iteration: 277527
loss: 0.9877973794937134,grad_norm: 0.8922791639483789, iteration: 277528
loss: 1.030184268951416,grad_norm: 0.9999999196346719, iteration: 277529
loss: 1.036901831626892,grad_norm: 0.9101279329202607, iteration: 277530
loss: 1.004331111907959,grad_norm: 0.9999992159022525, iteration: 277531
loss: 1.0260612964630127,grad_norm: 0.8472276250060646, iteration: 277532
loss: 1.0258409976959229,grad_norm: 0.7500053082295342, iteration: 277533
loss: 0.9711998105049133,grad_norm: 0.7888693919544726, iteration: 277534
loss: 0.9652174711227417,grad_norm: 0.778075333489798, iteration: 277535
loss: 1.0209585428237915,grad_norm: 0.9302188100791631, iteration: 277536
loss: 1.0326484441757202,grad_norm: 0.9999998849955248, iteration: 277537
loss: 0.9766287803649902,grad_norm: 0.9999989694626301, iteration: 277538
loss: 1.0054779052734375,grad_norm: 0.9480824597938222, iteration: 277539
loss: 0.9992737770080566,grad_norm: 0.9999991199754154, iteration: 277540
loss: 1.031588077545166,grad_norm: 0.7977858799462519, iteration: 277541
loss: 1.0011006593704224,grad_norm: 0.8477270243104005, iteration: 277542
loss: 1.0070059299468994,grad_norm: 0.9130349762854119, iteration: 277543
loss: 1.0243126153945923,grad_norm: 0.9999994616355642, iteration: 277544
loss: 0.9906368255615234,grad_norm: 0.9489805440111511, iteration: 277545
loss: 0.9957753419876099,grad_norm: 0.8628294512939201, iteration: 277546
loss: 0.991346538066864,grad_norm: 0.9999995979442197, iteration: 277547
loss: 0.9813861846923828,grad_norm: 0.8465760988001048, iteration: 277548
loss: 0.9987096786499023,grad_norm: 0.7688601059386674, iteration: 277549
loss: 1.0680190324783325,grad_norm: 0.9356203976183238, iteration: 277550
loss: 1.0222671031951904,grad_norm: 0.9674660690900653, iteration: 277551
loss: 1.0037778615951538,grad_norm: 0.9999990664675491, iteration: 277552
loss: 0.966849684715271,grad_norm: 0.8439385210335634, iteration: 277553
loss: 0.976399838924408,grad_norm: 0.7632844846790992, iteration: 277554
loss: 0.9711378216743469,grad_norm: 0.9399933867073218, iteration: 277555
loss: 0.9893832206726074,grad_norm: 0.8157570077472626, iteration: 277556
loss: 0.9925583004951477,grad_norm: 0.9999990692094223, iteration: 277557
loss: 0.9947997331619263,grad_norm: 0.774526255953187, iteration: 277558
loss: 1.0083180665969849,grad_norm: 0.8650547830952217, iteration: 277559
loss: 1.0024820566177368,grad_norm: 0.874479770022039, iteration: 277560
loss: 0.9697769284248352,grad_norm: 0.7372545464046054, iteration: 277561
loss: 1.0000072717666626,grad_norm: 0.9999991647957649, iteration: 277562
loss: 0.9713175892829895,grad_norm: 0.9089200705425944, iteration: 277563
loss: 0.97417151927948,grad_norm: 0.7115929054693235, iteration: 277564
loss: 1.0164459943771362,grad_norm: 0.9999990174082255, iteration: 277565
loss: 0.9913865923881531,grad_norm: 0.9062354448849528, iteration: 277566
loss: 1.0191994905471802,grad_norm: 0.6895531585193297, iteration: 277567
loss: 1.0078144073486328,grad_norm: 0.8200790332722405, iteration: 277568
loss: 0.9951865077018738,grad_norm: 0.8195609383666307, iteration: 277569
loss: 0.9915375709533691,grad_norm: 0.9999991607788458, iteration: 277570
loss: 1.0064224004745483,grad_norm: 0.9254885460763581, iteration: 277571
loss: 0.995022177696228,grad_norm: 0.9999993818677123, iteration: 277572
loss: 1.0500388145446777,grad_norm: 0.9999992931523012, iteration: 277573
loss: 1.0339761972427368,grad_norm: 0.9364615487874803, iteration: 277574
loss: 0.9942830801010132,grad_norm: 0.8568824952259942, iteration: 277575
loss: 1.0114396810531616,grad_norm: 0.893335068524399, iteration: 277576
loss: 1.0048104524612427,grad_norm: 0.7191587963763632, iteration: 277577
loss: 1.0213571786880493,grad_norm: 0.8128828628383501, iteration: 277578
loss: 1.0130910873413086,grad_norm: 0.9720947915507278, iteration: 277579
loss: 0.9735512733459473,grad_norm: 0.9575234280825202, iteration: 277580
loss: 1.010424256324768,grad_norm: 0.8049221526835418, iteration: 277581
loss: 0.9845678210258484,grad_norm: 0.8949047247943727, iteration: 277582
loss: 0.9891706705093384,grad_norm: 0.8094403405696472, iteration: 277583
loss: 1.024735927581787,grad_norm: 0.864345312965665, iteration: 277584
loss: 1.0123018026351929,grad_norm: 0.7034147032858261, iteration: 277585
loss: 0.9690161943435669,grad_norm: 0.8722025173778416, iteration: 277586
loss: 1.0147117376327515,grad_norm: 0.8587645707008452, iteration: 277587
loss: 0.9727413654327393,grad_norm: 0.9999992258403624, iteration: 277588
loss: 0.9758411645889282,grad_norm: 0.9283242137540111, iteration: 277589
loss: 0.9946520328521729,grad_norm: 0.8542790242444327, iteration: 277590
loss: 0.9835900664329529,grad_norm: 0.7843269107824664, iteration: 277591
loss: 1.0222289562225342,grad_norm: 0.9697516126641947, iteration: 277592
loss: 0.9635627865791321,grad_norm: 0.9263915667928193, iteration: 277593
loss: 1.0069471597671509,grad_norm: 0.9610707085507144, iteration: 277594
loss: 1.0260981321334839,grad_norm: 0.9999995335544778, iteration: 277595
loss: 0.9982397556304932,grad_norm: 0.8095585359240208, iteration: 277596
loss: 0.994537889957428,grad_norm: 0.8329078444860152, iteration: 277597
loss: 0.98126220703125,grad_norm: 0.8947822541199819, iteration: 277598
loss: 0.9953787326812744,grad_norm: 0.9312196172269658, iteration: 277599
loss: 0.9682530164718628,grad_norm: 0.7833800720874136, iteration: 277600
loss: 1.0154346227645874,grad_norm: 0.8390363178335547, iteration: 277601
loss: 1.0901875495910645,grad_norm: 0.9999992076050233, iteration: 277602
loss: 0.9846646785736084,grad_norm: 0.830649391086471, iteration: 277603
loss: 0.9977952837944031,grad_norm: 0.9914459817559088, iteration: 277604
loss: 1.0160208940505981,grad_norm: 0.9999995030523067, iteration: 277605
loss: 1.01169753074646,grad_norm: 0.9999991540426455, iteration: 277606
loss: 0.9743596911430359,grad_norm: 0.7379597842028214, iteration: 277607
loss: 0.9801381230354309,grad_norm: 0.83686964210295, iteration: 277608
loss: 1.0083223581314087,grad_norm: 0.8987252508829301, iteration: 277609
loss: 0.9941377639770508,grad_norm: 0.9342886687461303, iteration: 277610
loss: 0.9798612594604492,grad_norm: 0.8509274261235756, iteration: 277611
loss: 1.0060817003250122,grad_norm: 0.8436325331329586, iteration: 277612
loss: 0.9807043075561523,grad_norm: 0.8629290138838079, iteration: 277613
loss: 1.0238233804702759,grad_norm: 0.9246710003032934, iteration: 277614
loss: 1.0280534029006958,grad_norm: 0.999999061243504, iteration: 277615
loss: 1.0186238288879395,grad_norm: 0.9999994645816068, iteration: 277616
loss: 0.9927189946174622,grad_norm: 0.9999991415107385, iteration: 277617
loss: 1.070077657699585,grad_norm: 0.8253767432103889, iteration: 277618
loss: 0.999112606048584,grad_norm: 0.672946961957089, iteration: 277619
loss: 1.0280804634094238,grad_norm: 0.9999991595863428, iteration: 277620
loss: 0.9969485402107239,grad_norm: 0.9999991052381092, iteration: 277621
loss: 1.0141925811767578,grad_norm: 0.9139889049112446, iteration: 277622
loss: 1.0337485074996948,grad_norm: 0.9537761706416068, iteration: 277623
loss: 1.0016320943832397,grad_norm: 0.8366386361739117, iteration: 277624
loss: 1.0635145902633667,grad_norm: 0.9593475443628758, iteration: 277625
loss: 1.0387847423553467,grad_norm: 0.9999990809712234, iteration: 277626
loss: 0.9799666404724121,grad_norm: 0.9901633657519019, iteration: 277627
loss: 0.9964672327041626,grad_norm: 0.9819158056013059, iteration: 277628
loss: 1.138664960861206,grad_norm: 0.9999993484767519, iteration: 277629
loss: 1.0047887563705444,grad_norm: 0.8803562572578274, iteration: 277630
loss: 0.9657391905784607,grad_norm: 0.9999990279232404, iteration: 277631
loss: 1.047008991241455,grad_norm: 0.9999990757443491, iteration: 277632
loss: 1.009294033050537,grad_norm: 0.9999990129933287, iteration: 277633
loss: 1.0009815692901611,grad_norm: 0.8610882326432583, iteration: 277634
loss: 1.0119295120239258,grad_norm: 0.9999996219046786, iteration: 277635
loss: 0.9921879768371582,grad_norm: 0.9330473463621991, iteration: 277636
loss: 0.984772264957428,grad_norm: 0.9720419307969927, iteration: 277637
loss: 1.0140141248703003,grad_norm: 0.9999992159627131, iteration: 277638
loss: 1.0318901538848877,grad_norm: 0.9999998672781946, iteration: 277639
loss: 1.034353256225586,grad_norm: 0.798172196183154, iteration: 277640
loss: 0.9793477654457092,grad_norm: 0.7987290957961399, iteration: 277641
loss: 1.0169121026992798,grad_norm: 0.922092945688423, iteration: 277642
loss: 1.0098085403442383,grad_norm: 0.7772972901382096, iteration: 277643
loss: 1.0023621320724487,grad_norm: 0.9999991076995755, iteration: 277644
loss: 0.9783322215080261,grad_norm: 0.8852926239597778, iteration: 277645
loss: 1.0376989841461182,grad_norm: 0.810157205320084, iteration: 277646
loss: 0.9878430962562561,grad_norm: 0.9697529383088838, iteration: 277647
loss: 0.985148012638092,grad_norm: 0.7894749171720911, iteration: 277648
loss: 0.9926266074180603,grad_norm: 0.8856586217853452, iteration: 277649
loss: 1.006058931350708,grad_norm: 0.805670402741518, iteration: 277650
loss: 1.0118434429168701,grad_norm: 0.999999627181059, iteration: 277651
loss: 0.9601463079452515,grad_norm: 0.8495372016063253, iteration: 277652
loss: 0.9643647074699402,grad_norm: 0.9999989399620625, iteration: 277653
loss: 1.0145150423049927,grad_norm: 0.8248580122108305, iteration: 277654
loss: 0.9897671341896057,grad_norm: 0.9087303060184875, iteration: 277655
loss: 0.9928727746009827,grad_norm: 0.9999991686336471, iteration: 277656
loss: 0.9899017810821533,grad_norm: 0.9999991880052734, iteration: 277657
loss: 0.9482517242431641,grad_norm: 0.9899384537418164, iteration: 277658
loss: 0.9958935379981995,grad_norm: 0.8434847031095946, iteration: 277659
loss: 0.9738268256187439,grad_norm: 0.7530620974798301, iteration: 277660
loss: 0.9796302318572998,grad_norm: 0.7305663066361132, iteration: 277661
loss: 0.9736806154251099,grad_norm: 0.9312475272176316, iteration: 277662
loss: 0.9873729944229126,grad_norm: 0.9177375238345155, iteration: 277663
loss: 0.9977124333381653,grad_norm: 0.9999992836904905, iteration: 277664
loss: 1.0561927556991577,grad_norm: 0.9999999235359152, iteration: 277665
loss: 1.0110734701156616,grad_norm: 0.9999992872737172, iteration: 277666
loss: 1.0147712230682373,grad_norm: 0.8912434249485494, iteration: 277667
loss: 1.03280508518219,grad_norm: 0.9293703842363307, iteration: 277668
loss: 1.0200347900390625,grad_norm: 0.9156570860798889, iteration: 277669
loss: 1.0214544534683228,grad_norm: 0.8455543348991349, iteration: 277670
loss: 0.961468517780304,grad_norm: 0.8510334143905249, iteration: 277671
loss: 1.0034632682800293,grad_norm: 0.9999989851936049, iteration: 277672
loss: 0.9813703894615173,grad_norm: 0.9999989704866638, iteration: 277673
loss: 1.0886399745941162,grad_norm: 0.9999990779101928, iteration: 277674
loss: 0.9869177937507629,grad_norm: 0.9531351618648601, iteration: 277675
loss: 1.0120997428894043,grad_norm: 0.919744251776627, iteration: 277676
loss: 0.9500903487205505,grad_norm: 0.7426026795746811, iteration: 277677
loss: 0.9990599155426025,grad_norm: 0.75987207825775, iteration: 277678
loss: 0.9741695523262024,grad_norm: 0.9151920497609305, iteration: 277679
loss: 0.9964653253555298,grad_norm: 0.895411188134309, iteration: 277680
loss: 1.0047770738601685,grad_norm: 0.9267559989897711, iteration: 277681
loss: 1.0290133953094482,grad_norm: 0.9999995671965569, iteration: 277682
loss: 1.0147842168807983,grad_norm: 0.9081508130641001, iteration: 277683
loss: 1.011666178703308,grad_norm: 0.9999991777389937, iteration: 277684
loss: 0.985400378704071,grad_norm: 0.8356113357104239, iteration: 277685
loss: 1.0295045375823975,grad_norm: 0.8760087093988312, iteration: 277686
loss: 1.0035468339920044,grad_norm: 0.8262291408851107, iteration: 277687
loss: 0.9858807325363159,grad_norm: 0.8640813871618664, iteration: 277688
loss: 1.0850087404251099,grad_norm: 0.9999992897454127, iteration: 277689
loss: 0.9609096646308899,grad_norm: 0.9072763353263485, iteration: 277690
loss: 0.9847702980041504,grad_norm: 0.8340929729373565, iteration: 277691
loss: 1.011845350265503,grad_norm: 0.850171607687674, iteration: 277692
loss: 1.0449326038360596,grad_norm: 0.999999741018848, iteration: 277693
loss: 0.980535626411438,grad_norm: 0.8162324815987059, iteration: 277694
loss: 1.0016965866088867,grad_norm: 0.8647522313729369, iteration: 277695
loss: 0.9504137635231018,grad_norm: 0.837905961592993, iteration: 277696
loss: 0.9686052799224854,grad_norm: 0.9830831871800203, iteration: 277697
loss: 0.9940657615661621,grad_norm: 0.7920680250132947, iteration: 277698
loss: 1.0097123384475708,grad_norm: 0.9999993384404422, iteration: 277699
loss: 1.0503480434417725,grad_norm: 0.9999992831409853, iteration: 277700
loss: 0.9988663196563721,grad_norm: 0.9969831990712064, iteration: 277701
loss: 1.0084617137908936,grad_norm: 0.9306321953922224, iteration: 277702
loss: 1.011731505393982,grad_norm: 0.9247357126769196, iteration: 277703
loss: 0.9911807179450989,grad_norm: 0.9936393000133877, iteration: 277704
loss: 0.996269166469574,grad_norm: 0.9999991019603263, iteration: 277705
loss: 0.975226640701294,grad_norm: 0.9650920827798883, iteration: 277706
loss: 0.9787363409996033,grad_norm: 0.8742340028155563, iteration: 277707
loss: 0.9875757694244385,grad_norm: 0.7947435883518289, iteration: 277708
loss: 0.9967759847640991,grad_norm: 0.862540729926301, iteration: 277709
loss: 0.9907739758491516,grad_norm: 0.8923809359878934, iteration: 277710
loss: 0.9991154074668884,grad_norm: 0.9999992365311425, iteration: 277711
loss: 1.0330740213394165,grad_norm: 0.8531050214688076, iteration: 277712
loss: 0.9904135465621948,grad_norm: 0.7958268861793288, iteration: 277713
loss: 1.0086013078689575,grad_norm: 0.746856324404542, iteration: 277714
loss: 1.022874116897583,grad_norm: 0.794583651821188, iteration: 277715
loss: 1.0335108041763306,grad_norm: 0.9999994390709762, iteration: 277716
loss: 1.0144764184951782,grad_norm: 0.9057216881278427, iteration: 277717
loss: 0.9999492168426514,grad_norm: 0.8004795682187145, iteration: 277718
loss: 0.9505780339241028,grad_norm: 0.716398082884118, iteration: 277719
loss: 1.0084682703018188,grad_norm: 0.781165607493027, iteration: 277720
loss: 0.9738138318061829,grad_norm: 0.7800030699805972, iteration: 277721
loss: 1.0007307529449463,grad_norm: 0.8497563574601023, iteration: 277722
loss: 1.004373550415039,grad_norm: 0.8058171372169458, iteration: 277723
loss: 1.003955602645874,grad_norm: 0.8435834514710957, iteration: 277724
loss: 1.000546932220459,grad_norm: 0.9999993763104155, iteration: 277725
loss: 0.9837611317634583,grad_norm: 0.8195224734390979, iteration: 277726
loss: 0.993227481842041,grad_norm: 0.7726718239727555, iteration: 277727
loss: 0.9959452152252197,grad_norm: 0.7281230821962343, iteration: 277728
loss: 1.0007144212722778,grad_norm: 0.9019811727264072, iteration: 277729
loss: 0.9912295341491699,grad_norm: 0.9999991545107382, iteration: 277730
loss: 1.011306881904602,grad_norm: 0.9106667811976912, iteration: 277731
loss: 1.0266568660736084,grad_norm: 0.9999993632914552, iteration: 277732
loss: 1.0098652839660645,grad_norm: 0.8219938315698023, iteration: 277733
loss: 1.0093038082122803,grad_norm: 0.9999995127922966, iteration: 277734
loss: 0.9871188998222351,grad_norm: 0.8206542504613694, iteration: 277735
loss: 0.9908093214035034,grad_norm: 0.9999990083567217, iteration: 277736
loss: 0.9860491156578064,grad_norm: 0.7665777974744973, iteration: 277737
loss: 0.9948642253875732,grad_norm: 0.9178912640653479, iteration: 277738
loss: 1.0223283767700195,grad_norm: 0.7762741295676383, iteration: 277739
loss: 1.0236774682998657,grad_norm: 0.8444200817546588, iteration: 277740
loss: 0.9768460392951965,grad_norm: 0.8304007916371561, iteration: 277741
loss: 1.0454813241958618,grad_norm: 0.9999995003872911, iteration: 277742
loss: 1.0467760562896729,grad_norm: 0.9999991703596697, iteration: 277743
loss: 0.9943476319313049,grad_norm: 0.9999991769586981, iteration: 277744
loss: 0.9978906512260437,grad_norm: 0.7714849431158657, iteration: 277745
loss: 1.0220811367034912,grad_norm: 0.9999992319962717, iteration: 277746
loss: 0.9944313764572144,grad_norm: 0.9999993809868064, iteration: 277747
loss: 0.9672735333442688,grad_norm: 0.9999993905996709, iteration: 277748
loss: 1.0435271263122559,grad_norm: 0.920786334355353, iteration: 277749
loss: 1.0199965238571167,grad_norm: 0.9304331661972246, iteration: 277750
loss: 1.0110152959823608,grad_norm: 0.9999999748172574, iteration: 277751
loss: 0.9741737246513367,grad_norm: 0.9301136116959285, iteration: 277752
loss: 1.021995186805725,grad_norm: 0.9999989984945948, iteration: 277753
loss: 1.0299838781356812,grad_norm: 0.857635582866338, iteration: 277754
loss: 1.0196622610092163,grad_norm: 0.7577922576327745, iteration: 277755
loss: 0.9861827492713928,grad_norm: 0.9999991228138928, iteration: 277756
loss: 1.028727650642395,grad_norm: 0.9999995485048874, iteration: 277757
loss: 0.9780539870262146,grad_norm: 0.8554123630047612, iteration: 277758
loss: 0.9654716849327087,grad_norm: 0.8885310946337321, iteration: 277759
loss: 1.0059659481048584,grad_norm: 0.8368974996509636, iteration: 277760
loss: 0.9988440275192261,grad_norm: 0.9999998609710088, iteration: 277761
loss: 0.9791258573532104,grad_norm: 0.938823382581541, iteration: 277762
loss: 1.025002360343933,grad_norm: 0.9999992162294012, iteration: 277763
loss: 1.0289033651351929,grad_norm: 0.8452089411224468, iteration: 277764
loss: 1.0247172117233276,grad_norm: 0.9999995347582266, iteration: 277765
loss: 1.0111500024795532,grad_norm: 0.9999991245842055, iteration: 277766
loss: 1.0033748149871826,grad_norm: 0.9999991224970594, iteration: 277767
loss: 0.9939956068992615,grad_norm: 0.8202223872994489, iteration: 277768
loss: 0.9482957720756531,grad_norm: 0.9055308013830925, iteration: 277769
loss: 0.9967401027679443,grad_norm: 0.8849224245940248, iteration: 277770
loss: 1.0379533767700195,grad_norm: 0.9999999064218434, iteration: 277771
loss: 1.053348422050476,grad_norm: 0.9130542142227129, iteration: 277772
loss: 1.0097259283065796,grad_norm: 0.8345279377611423, iteration: 277773
loss: 1.001481056213379,grad_norm: 0.8965609349050196, iteration: 277774
loss: 0.9722306728363037,grad_norm: 0.8470715990089922, iteration: 277775
loss: 1.065739631652832,grad_norm: 0.9999997681979348, iteration: 277776
loss: 1.0939539670944214,grad_norm: 0.9999991906579149, iteration: 277777
loss: 1.0493396520614624,grad_norm: 0.9999999131916448, iteration: 277778
loss: 1.0252381563186646,grad_norm: 0.865350594561378, iteration: 277779
loss: 1.0114823579788208,grad_norm: 0.8460735715559057, iteration: 277780
loss: 1.0045692920684814,grad_norm: 0.9454260911739868, iteration: 277781
loss: 0.9941820502281189,grad_norm: 0.9999994148238455, iteration: 277782
loss: 1.000232219696045,grad_norm: 0.9999992862320162, iteration: 277783
loss: 1.108838438987732,grad_norm: 0.9999992125868445, iteration: 277784
loss: 1.0225987434387207,grad_norm: 0.9999991562771952, iteration: 277785
loss: 1.036870002746582,grad_norm: 0.9999989457407409, iteration: 277786
loss: 1.0060155391693115,grad_norm: 0.9999992405355919, iteration: 277787
loss: 1.0195116996765137,grad_norm: 0.8632813642837124, iteration: 277788
loss: 0.9605926871299744,grad_norm: 0.8739030561123688, iteration: 277789
loss: 1.0282597541809082,grad_norm: 0.999999212115106, iteration: 277790
loss: 0.9933232069015503,grad_norm: 0.839503758480005, iteration: 277791
loss: 1.0373648405075073,grad_norm: 0.9081371970595188, iteration: 277792
loss: 1.0225203037261963,grad_norm: 0.8262500215889571, iteration: 277793
loss: 1.0032460689544678,grad_norm: 0.927696031967803, iteration: 277794
loss: 1.1587241888046265,grad_norm: 0.9999999090525178, iteration: 277795
loss: 1.0052313804626465,grad_norm: 0.7789710663492632, iteration: 277796
loss: 1.009252905845642,grad_norm: 0.9797006590725753, iteration: 277797
loss: 1.1822564601898193,grad_norm: 0.9999996036297488, iteration: 277798
loss: 1.0322781801223755,grad_norm: 0.999999157627043, iteration: 277799
loss: 1.017889380455017,grad_norm: 0.9999991617536539, iteration: 277800
loss: 1.0868302583694458,grad_norm: 0.9999992636131465, iteration: 277801
loss: 0.9997013807296753,grad_norm: 0.8513066599411308, iteration: 277802
loss: 1.0646347999572754,grad_norm: 0.9337739146189268, iteration: 277803
loss: 0.9872033596038818,grad_norm: 0.9999992032461744, iteration: 277804
loss: 1.0313940048217773,grad_norm: 0.9018564944320612, iteration: 277805
loss: 1.015411138534546,grad_norm: 0.9222773156079219, iteration: 277806
loss: 1.0572688579559326,grad_norm: 0.9318907677397998, iteration: 277807
loss: 0.9839045405387878,grad_norm: 0.9999997191803598, iteration: 277808
loss: 1.0668920278549194,grad_norm: 0.9999994484398226, iteration: 277809
loss: 0.9770928025245667,grad_norm: 0.8776459498705567, iteration: 277810
loss: 1.0663015842437744,grad_norm: 0.9999995302618145, iteration: 277811
loss: 0.9857804179191589,grad_norm: 0.8564163832440007, iteration: 277812
loss: 1.016481876373291,grad_norm: 0.9562139728380468, iteration: 277813
loss: 1.044756293296814,grad_norm: 0.8047223693566956, iteration: 277814
loss: 1.0211552381515503,grad_norm: 0.9999996875629075, iteration: 277815
loss: 0.9906589984893799,grad_norm: 0.8211911431283692, iteration: 277816
loss: 1.0081242322921753,grad_norm: 0.9385149161212316, iteration: 277817
loss: 1.008100986480713,grad_norm: 0.8490333924651533, iteration: 277818
loss: 0.99532151222229,grad_norm: 0.740189398301669, iteration: 277819
loss: 0.9858479499816895,grad_norm: 0.9366755210282474, iteration: 277820
loss: 1.027886152267456,grad_norm: 0.9268159449307993, iteration: 277821
loss: 0.9778880476951599,grad_norm: 0.8677129524062831, iteration: 277822
loss: 1.0279566049575806,grad_norm: 0.990014948341502, iteration: 277823
loss: 1.0543545484542847,grad_norm: 0.8890155682946491, iteration: 277824
loss: 1.0318715572357178,grad_norm: 0.8654763212696244, iteration: 277825
loss: 1.0045641660690308,grad_norm: 0.8119146654981078, iteration: 277826
loss: 0.9781359434127808,grad_norm: 0.9985297082259599, iteration: 277827
loss: 0.9894269704818726,grad_norm: 0.9867176968693283, iteration: 277828
loss: 1.0797837972640991,grad_norm: 0.9312624482807595, iteration: 277829
loss: 0.9805868864059448,grad_norm: 0.8366257379168383, iteration: 277830
loss: 1.0089033842086792,grad_norm: 0.9999991275140384, iteration: 277831
loss: 1.002545952796936,grad_norm: 0.8804626584280004, iteration: 277832
loss: 1.0117762088775635,grad_norm: 0.8917821044886016, iteration: 277833
loss: 0.9917625784873962,grad_norm: 0.999999292031669, iteration: 277834
loss: 0.9768431186676025,grad_norm: 0.9459506343428344, iteration: 277835
loss: 1.0205868482589722,grad_norm: 0.9999989942433604, iteration: 277836
loss: 0.9911020398139954,grad_norm: 0.789327842401218, iteration: 277837
loss: 0.99144047498703,grad_norm: 0.8250838654316528, iteration: 277838
loss: 0.9886286854743958,grad_norm: 0.7828100338813376, iteration: 277839
loss: 1.034301519393921,grad_norm: 0.827596010877082, iteration: 277840
loss: 1.0343444347381592,grad_norm: 0.9611396188052957, iteration: 277841
loss: 1.0095261335372925,grad_norm: 0.8465806438936524, iteration: 277842
loss: 0.9618663191795349,grad_norm: 0.845255043742258, iteration: 277843
loss: 0.9856781959533691,grad_norm: 0.6918685470174407, iteration: 277844
loss: 0.9820868372917175,grad_norm: 0.9999991711549211, iteration: 277845
loss: 1.138941764831543,grad_norm: 0.9999997017752171, iteration: 277846
loss: 1.0138946771621704,grad_norm: 0.9846013527109954, iteration: 277847
loss: 1.0156930685043335,grad_norm: 0.931120120473266, iteration: 277848
loss: 1.0058802366256714,grad_norm: 0.978054114216486, iteration: 277849
loss: 0.9682211875915527,grad_norm: 0.8911482357668148, iteration: 277850
loss: 1.0217761993408203,grad_norm: 0.9999990420517845, iteration: 277851
loss: 1.0306336879730225,grad_norm: 0.9999996620549443, iteration: 277852
loss: 1.0366203784942627,grad_norm: 0.9999993527173828, iteration: 277853
loss: 1.0058842897415161,grad_norm: 0.9047267995652265, iteration: 277854
loss: 1.0005995035171509,grad_norm: 0.999999303680055, iteration: 277855
loss: 0.9803456664085388,grad_norm: 0.9999993215661744, iteration: 277856
loss: 1.000183343887329,grad_norm: 0.9999991939099477, iteration: 277857
loss: 1.032511591911316,grad_norm: 0.9999991197311161, iteration: 277858
loss: 1.0284563302993774,grad_norm: 0.9999991611891655, iteration: 277859
loss: 1.0882976055145264,grad_norm: 0.9999990994888959, iteration: 277860
loss: 0.9971029162406921,grad_norm: 0.9999994968089811, iteration: 277861
loss: 0.9710758924484253,grad_norm: 0.7343924552816139, iteration: 277862
loss: 0.9721347093582153,grad_norm: 0.9999990130203362, iteration: 277863
loss: 1.0070953369140625,grad_norm: 0.7734203272998207, iteration: 277864
loss: 1.085675835609436,grad_norm: 0.9999989556991066, iteration: 277865
loss: 0.9712904095649719,grad_norm: 0.9225682875426966, iteration: 277866
loss: 1.0863065719604492,grad_norm: 0.999999865683901, iteration: 277867
loss: 1.0038596391677856,grad_norm: 0.7636125108544023, iteration: 277868
loss: 0.9979936480522156,grad_norm: 0.9999991278568424, iteration: 277869
loss: 0.9853590130805969,grad_norm: 0.9269181924776108, iteration: 277870
loss: 1.0352445840835571,grad_norm: 0.999999136277446, iteration: 277871
loss: 1.0007033348083496,grad_norm: 0.7901041426318003, iteration: 277872
loss: 1.018121361732483,grad_norm: 0.8944280112514491, iteration: 277873
loss: 0.9346514344215393,grad_norm: 0.8916480395983207, iteration: 277874
loss: 0.9792877435684204,grad_norm: 0.8832051457504901, iteration: 277875
loss: 1.0444175004959106,grad_norm: 1.00000000555115, iteration: 277876
loss: 1.028714895248413,grad_norm: 0.8570626645129868, iteration: 277877
loss: 1.0054867267608643,grad_norm: 0.9092700906008825, iteration: 277878
loss: 1.08100163936615,grad_norm: 0.9999993212904617, iteration: 277879
loss: 0.9992737770080566,grad_norm: 0.9117586342081072, iteration: 277880
loss: 1.0815801620483398,grad_norm: 0.9999991014115397, iteration: 277881
loss: 0.9974843263626099,grad_norm: 0.999999128750727, iteration: 277882
loss: 0.9477025270462036,grad_norm: 0.8866515508109089, iteration: 277883
loss: 1.071882963180542,grad_norm: 0.8652900968519168, iteration: 277884
loss: 1.139409065246582,grad_norm: 0.9999998738606395, iteration: 277885
loss: 0.9747847318649292,grad_norm: 0.8756852188036597, iteration: 277886
loss: 1.0280412435531616,grad_norm: 0.9829227260638922, iteration: 277887
loss: 0.9987501502037048,grad_norm: 0.7088586871186675, iteration: 277888
loss: 1.0995023250579834,grad_norm: 0.9004401462809721, iteration: 277889
loss: 0.9847496151924133,grad_norm: 0.871962979149922, iteration: 277890
loss: 1.0242825746536255,grad_norm: 0.9728764594702161, iteration: 277891
loss: 0.9922057390213013,grad_norm: 0.9532833802751317, iteration: 277892
loss: 1.0282155275344849,grad_norm: 0.939763458670753, iteration: 277893
loss: 1.1064578294754028,grad_norm: 0.9999994886880536, iteration: 277894
loss: 0.9926843047142029,grad_norm: 0.9999990425492351, iteration: 277895
loss: 0.9726836681365967,grad_norm: 0.8999257945430361, iteration: 277896
loss: 0.9800474643707275,grad_norm: 0.9497876618791965, iteration: 277897
loss: 1.0303003787994385,grad_norm: 0.9072957699033387, iteration: 277898
loss: 1.0169544219970703,grad_norm: 0.749066003671171, iteration: 277899
loss: 1.0185407400131226,grad_norm: 0.8779935386956352, iteration: 277900
loss: 1.036138892173767,grad_norm: 0.9968893862440751, iteration: 277901
loss: 1.0174192190170288,grad_norm: 0.7773350137781458, iteration: 277902
loss: 0.9955101609230042,grad_norm: 0.8398782744108855, iteration: 277903
loss: 0.9813351631164551,grad_norm: 0.9999996395815948, iteration: 277904
loss: 1.025325894355774,grad_norm: 0.9999998033057609, iteration: 277905
loss: 1.0184388160705566,grad_norm: 0.9148821843515217, iteration: 277906
loss: 1.0005040168762207,grad_norm: 0.8408470408293602, iteration: 277907
loss: 0.9895732998847961,grad_norm: 0.834767725523812, iteration: 277908
loss: 0.9756888151168823,grad_norm: 0.8663964162529944, iteration: 277909
loss: 0.995527982711792,grad_norm: 0.8421809228630142, iteration: 277910
loss: 1.0957300662994385,grad_norm: 0.999998944076496, iteration: 277911
loss: 0.9708245396614075,grad_norm: 0.7980765660520155, iteration: 277912
loss: 0.9955806732177734,grad_norm: 0.7936373445508929, iteration: 277913
loss: 0.9898325800895691,grad_norm: 0.8643050442094491, iteration: 277914
loss: 1.0434644222259521,grad_norm: 0.7835395228501194, iteration: 277915
loss: 1.0259841680526733,grad_norm: 0.8134998027627726, iteration: 277916
loss: 1.0281028747558594,grad_norm: 0.8272732613403355, iteration: 277917
loss: 1.0090042352676392,grad_norm: 0.9999992275974703, iteration: 277918
loss: 1.0295546054840088,grad_norm: 0.9999992375100669, iteration: 277919
loss: 1.024214267730713,grad_norm: 0.9999991473343824, iteration: 277920
loss: 1.0307974815368652,grad_norm: 0.8569091988186395, iteration: 277921
loss: 0.9623064398765564,grad_norm: 0.9999991867662916, iteration: 277922
loss: 0.9613412022590637,grad_norm: 0.8743377277862332, iteration: 277923
loss: 0.9938281774520874,grad_norm: 0.7474313324084783, iteration: 277924
loss: 1.0342051982879639,grad_norm: 0.7832964559040744, iteration: 277925
loss: 1.008245825767517,grad_norm: 0.8130147339142939, iteration: 277926
loss: 0.996962308883667,grad_norm: 0.8238620971741306, iteration: 277927
loss: 0.9786694645881653,grad_norm: 0.7139074626153359, iteration: 277928
loss: 1.0012911558151245,grad_norm: 0.7638720493244953, iteration: 277929
loss: 0.994689404964447,grad_norm: 0.9999993152976498, iteration: 277930
loss: 1.0130000114440918,grad_norm: 0.8374497971820037, iteration: 277931
loss: 1.0275232791900635,grad_norm: 0.999999982201609, iteration: 277932
loss: 1.0033403635025024,grad_norm: 0.8063925897239741, iteration: 277933
loss: 1.0012420415878296,grad_norm: 0.8248630920560297, iteration: 277934
loss: 1.008880615234375,grad_norm: 0.8948759206696077, iteration: 277935
loss: 0.9699220657348633,grad_norm: 0.781416362248471, iteration: 277936
loss: 0.9769325852394104,grad_norm: 0.9999991460751024, iteration: 277937
loss: 1.0327459573745728,grad_norm: 0.9407304317090989, iteration: 277938
loss: 1.0007084608078003,grad_norm: 0.8753853541443153, iteration: 277939
loss: 0.9533302187919617,grad_norm: 0.8684196058382132, iteration: 277940
loss: 1.0880374908447266,grad_norm: 0.99999989061344, iteration: 277941
loss: 1.0278633832931519,grad_norm: 0.9188617184349568, iteration: 277942
loss: 1.007788062095642,grad_norm: 0.9011643613856002, iteration: 277943
loss: 0.9959219098091125,grad_norm: 0.8295978742412089, iteration: 277944
loss: 0.9777423143386841,grad_norm: 0.6875015618182042, iteration: 277945
loss: 0.9675145149230957,grad_norm: 0.9999992093312385, iteration: 277946
loss: 1.0190280675888062,grad_norm: 0.7687576966208913, iteration: 277947
loss: 1.0350866317749023,grad_norm: 0.9999999202375667, iteration: 277948
loss: 1.055535078048706,grad_norm: 0.9999991083176982, iteration: 277949
loss: 0.9926811456680298,grad_norm: 0.8733986154663258, iteration: 277950
loss: 1.013336420059204,grad_norm: 0.9999991588747131, iteration: 277951
loss: 1.019987940788269,grad_norm: 0.8858333079008811, iteration: 277952
loss: 0.9770320653915405,grad_norm: 0.7293476611872739, iteration: 277953
loss: 0.9774941802024841,grad_norm: 0.89190851415655, iteration: 277954
loss: 0.9947768449783325,grad_norm: 0.999999651897387, iteration: 277955
loss: 1.0099740028381348,grad_norm: 0.8174221503640752, iteration: 277956
loss: 0.9844191074371338,grad_norm: 0.8059337858427418, iteration: 277957
loss: 1.0002689361572266,grad_norm: 0.8251365789309172, iteration: 277958
loss: 1.0003873109817505,grad_norm: 0.9302707461179679, iteration: 277959
loss: 1.0024043321609497,grad_norm: 0.8905580780792683, iteration: 277960
loss: 1.0064517259597778,grad_norm: 0.99999907430083, iteration: 277961
loss: 0.9951396584510803,grad_norm: 0.9723954469211652, iteration: 277962
loss: 1.0092006921768188,grad_norm: 0.9999992992785703, iteration: 277963
loss: 1.0031917095184326,grad_norm: 0.8671655625490974, iteration: 277964
loss: 1.068473219871521,grad_norm: 0.8537023356594755, iteration: 277965
loss: 0.9922588467597961,grad_norm: 0.8999826052939277, iteration: 277966
loss: 0.9788228869438171,grad_norm: 0.7662537756370307, iteration: 277967
loss: 0.9829407930374146,grad_norm: 0.8696860444805866, iteration: 277968
loss: 1.0172367095947266,grad_norm: 0.7989408016727838, iteration: 277969
loss: 0.9647541046142578,grad_norm: 0.6971895083301798, iteration: 277970
loss: 0.9986684322357178,grad_norm: 0.9999996703041376, iteration: 277971
loss: 1.038487434387207,grad_norm: 0.9999993157089699, iteration: 277972
loss: 0.983840823173523,grad_norm: 0.9415289862051563, iteration: 277973
loss: 0.9951322674751282,grad_norm: 0.8944740230980047, iteration: 277974
loss: 0.987311840057373,grad_norm: 0.9215943979514873, iteration: 277975
loss: 0.991707444190979,grad_norm: 0.7139783237014572, iteration: 277976
loss: 1.0742217302322388,grad_norm: 0.9999998465671474, iteration: 277977
loss: 1.0021439790725708,grad_norm: 0.7877013924980051, iteration: 277978
loss: 1.0256187915802002,grad_norm: 0.7565077960182808, iteration: 277979
loss: 0.994638979434967,grad_norm: 0.8901770379661156, iteration: 277980
loss: 1.0436995029449463,grad_norm: 0.8251482042238923, iteration: 277981
loss: 0.9829999208450317,grad_norm: 0.6947699455161935, iteration: 277982
loss: 0.975879967212677,grad_norm: 0.831326175463898, iteration: 277983
loss: 0.9866215586662292,grad_norm: 0.9694837141598701, iteration: 277984
loss: 1.0246158838272095,grad_norm: 0.935364521769219, iteration: 277985
loss: 1.0009576082229614,grad_norm: 0.9999992203882337, iteration: 277986
loss: 0.9581303000450134,grad_norm: 0.9658160350556534, iteration: 277987
loss: 1.0298378467559814,grad_norm: 0.9999995470045404, iteration: 277988
loss: 0.9593806266784668,grad_norm: 0.9999992239163328, iteration: 277989
loss: 1.026215672492981,grad_norm: 0.9887252945834817, iteration: 277990
loss: 1.031337022781372,grad_norm: 0.7341911394227859, iteration: 277991
loss: 0.9949321150779724,grad_norm: 0.7353489857065845, iteration: 277992
loss: 0.9955201148986816,grad_norm: 0.9272406193196351, iteration: 277993
loss: 0.9952465891838074,grad_norm: 0.8808210861181199, iteration: 277994
loss: 1.039765477180481,grad_norm: 0.9999990968232381, iteration: 277995
loss: 1.0607868432998657,grad_norm: 0.9999995234356888, iteration: 277996
loss: 1.0498584508895874,grad_norm: 0.9375656693594195, iteration: 277997
loss: 0.9705234169960022,grad_norm: 0.8702265282902668, iteration: 277998
loss: 0.9945751428604126,grad_norm: 0.7805783777408797, iteration: 277999
loss: 0.973792314529419,grad_norm: 0.9999992911428459, iteration: 278000
loss: 1.0521972179412842,grad_norm: 0.8772461227931462, iteration: 278001
loss: 1.071410894393921,grad_norm: 0.9999998400889079, iteration: 278002
loss: 0.9850137829780579,grad_norm: 0.8248213358246598, iteration: 278003
loss: 0.987775981426239,grad_norm: 0.8607301175693819, iteration: 278004
loss: 0.9936274290084839,grad_norm: 0.9057436046480887, iteration: 278005
loss: 1.0062685012817383,grad_norm: 0.9513695400997393, iteration: 278006
loss: 1.0724360942840576,grad_norm: 0.7969829437788535, iteration: 278007
loss: 1.0466116666793823,grad_norm: 0.9946721958034651, iteration: 278008
loss: 0.9960260987281799,grad_norm: 0.7831889058930855, iteration: 278009
loss: 1.0392018556594849,grad_norm: 0.9999991401876968, iteration: 278010
loss: 0.9810208678245544,grad_norm: 0.9875894443862511, iteration: 278011
loss: 1.0305438041687012,grad_norm: 0.7636767839800825, iteration: 278012
loss: 1.0381293296813965,grad_norm: 0.882922195003698, iteration: 278013
loss: 0.9977692365646362,grad_norm: 0.8929950616694755, iteration: 278014
loss: 0.9809884428977966,grad_norm: 0.9729288640696593, iteration: 278015
loss: 0.9987247586250305,grad_norm: 0.8638269564630857, iteration: 278016
loss: 0.9835057258605957,grad_norm: 0.9999991979417403, iteration: 278017
loss: 0.9832907915115356,grad_norm: 0.8598960660609645, iteration: 278018
loss: 1.0418975353240967,grad_norm: 0.9999992454718062, iteration: 278019
loss: 0.9950544238090515,grad_norm: 0.773029465022265, iteration: 278020
loss: 0.9748445749282837,grad_norm: 0.8991183303429469, iteration: 278021
loss: 0.993226170539856,grad_norm: 0.8048515146734773, iteration: 278022
loss: 0.9315409064292908,grad_norm: 0.8069044393948399, iteration: 278023
loss: 1.015488862991333,grad_norm: 0.9999991066814058, iteration: 278024
loss: 0.9539047479629517,grad_norm: 0.7823155984213905, iteration: 278025
loss: 0.9776375889778137,grad_norm: 0.7693452272013637, iteration: 278026
loss: 0.9883923530578613,grad_norm: 0.814003313562745, iteration: 278027
loss: 1.0688756704330444,grad_norm: 0.999999255634255, iteration: 278028
loss: 0.9671788215637207,grad_norm: 0.9038841987488441, iteration: 278029
loss: 1.0125696659088135,grad_norm: 0.7691810032038827, iteration: 278030
loss: 0.9853336811065674,grad_norm: 0.8252602599804735, iteration: 278031
loss: 1.0196681022644043,grad_norm: 0.8199225714231817, iteration: 278032
loss: 0.9880268573760986,grad_norm: 0.7759555011854283, iteration: 278033
loss: 1.0945217609405518,grad_norm: 0.974432427726677, iteration: 278034
loss: 1.0357484817504883,grad_norm: 0.7151838784289795, iteration: 278035
loss: 0.973791778087616,grad_norm: 0.9066532254609753, iteration: 278036
loss: 1.0447641611099243,grad_norm: 0.9731356897419257, iteration: 278037
loss: 1.0326017141342163,grad_norm: 0.7968004124258268, iteration: 278038
loss: 1.0411930084228516,grad_norm: 0.7729258320766064, iteration: 278039
loss: 0.9664894938468933,grad_norm: 0.7819758667199318, iteration: 278040
loss: 0.9944947957992554,grad_norm: 0.9289346150578208, iteration: 278041
loss: 0.9942229986190796,grad_norm: 0.8075791178665792, iteration: 278042
loss: 0.9828588962554932,grad_norm: 0.874628825085568, iteration: 278043
loss: 0.9886212944984436,grad_norm: 0.9069696809280182, iteration: 278044
loss: 0.9948360919952393,grad_norm: 0.8707164073442917, iteration: 278045
loss: 1.0460972785949707,grad_norm: 0.9999997487563353, iteration: 278046
loss: 1.0227771997451782,grad_norm: 0.8962872079918852, iteration: 278047
loss: 1.0080305337905884,grad_norm: 0.9104289186657049, iteration: 278048
loss: 1.1337835788726807,grad_norm: 0.9999999646835381, iteration: 278049
loss: 0.9988634586334229,grad_norm: 0.9161645053864003, iteration: 278050
loss: 1.0675071477890015,grad_norm: 0.9999992475431156, iteration: 278051
loss: 1.0006895065307617,grad_norm: 0.6997194836170101, iteration: 278052
loss: 0.9761828184127808,grad_norm: 0.8812101119477396, iteration: 278053
loss: 1.0259425640106201,grad_norm: 0.8977766991890921, iteration: 278054
loss: 0.9898916482925415,grad_norm: 0.9999989402009701, iteration: 278055
loss: 0.985141932964325,grad_norm: 0.7748857612707479, iteration: 278056
loss: 0.9973912835121155,grad_norm: 0.9575560701888401, iteration: 278057
loss: 0.9779981970787048,grad_norm: 0.7319731848533998, iteration: 278058
loss: 1.0162365436553955,grad_norm: 0.9999990870165054, iteration: 278059
loss: 1.0098676681518555,grad_norm: 0.7609319725668668, iteration: 278060
loss: 1.0141609907150269,grad_norm: 0.9266881897438827, iteration: 278061
loss: 1.051445484161377,grad_norm: 0.8958547572029977, iteration: 278062
loss: 1.0013787746429443,grad_norm: 0.8762461342833052, iteration: 278063
loss: 1.0852984189987183,grad_norm: 0.9999991688349729, iteration: 278064
loss: 1.0742053985595703,grad_norm: 0.941451693166428, iteration: 278065
loss: 1.0199368000030518,grad_norm: 0.9011198453952708, iteration: 278066
loss: 1.012920618057251,grad_norm: 0.9999998519002878, iteration: 278067
loss: 1.0493638515472412,grad_norm: 0.9999992378591427, iteration: 278068
loss: 1.3348023891448975,grad_norm: 0.9999999779989401, iteration: 278069
loss: 1.0223468542099,grad_norm: 0.9386005050467141, iteration: 278070
loss: 0.9931192994117737,grad_norm: 0.7223667763353062, iteration: 278071
loss: 0.9587304592132568,grad_norm: 0.9999992015596256, iteration: 278072
loss: 0.9669017195701599,grad_norm: 0.7915938431646731, iteration: 278073
loss: 1.0153770446777344,grad_norm: 0.9693841217947484, iteration: 278074
loss: 1.0372825860977173,grad_norm: 0.8605825252737299, iteration: 278075
loss: 1.0139367580413818,grad_norm: 0.8341084009947517, iteration: 278076
loss: 1.008243441581726,grad_norm: 0.8767755230538604, iteration: 278077
loss: 0.9954679012298584,grad_norm: 0.9213240284161031, iteration: 278078
loss: 1.0119719505310059,grad_norm: 0.7449807392823075, iteration: 278079
loss: 0.9978901147842407,grad_norm: 0.9425508323995851, iteration: 278080
loss: 1.014054775238037,grad_norm: 0.8842981834964088, iteration: 278081
loss: 0.9674574136734009,grad_norm: 0.9588749038479336, iteration: 278082
loss: 1.0361125469207764,grad_norm: 0.9999990711888024, iteration: 278083
loss: 1.0587910413742065,grad_norm: 0.8211290252480307, iteration: 278084
loss: 1.0153692960739136,grad_norm: 0.9999990467632436, iteration: 278085
loss: 1.0377342700958252,grad_norm: 0.9483217137311031, iteration: 278086
loss: 1.0582613945007324,grad_norm: 0.8965668332351602, iteration: 278087
loss: 0.9919784069061279,grad_norm: 0.8798980706989803, iteration: 278088
loss: 1.0102838277816772,grad_norm: 0.9999990372381033, iteration: 278089
loss: 0.9682005047798157,grad_norm: 0.9999990267865116, iteration: 278090
loss: 1.0020949840545654,grad_norm: 0.9999990719190841, iteration: 278091
loss: 0.9644176363945007,grad_norm: 0.8688598295271682, iteration: 278092
loss: 1.0158371925354004,grad_norm: 0.9999990349017767, iteration: 278093
loss: 1.0201289653778076,grad_norm: 0.9463283063157533, iteration: 278094
loss: 1.0075159072875977,grad_norm: 0.7994295493916427, iteration: 278095
loss: 1.0384414196014404,grad_norm: 0.7175940009066594, iteration: 278096
loss: 1.0151509046554565,grad_norm: 0.9158972248808086, iteration: 278097
loss: 0.9494725465774536,grad_norm: 0.7634163358461146, iteration: 278098
loss: 0.9967137575149536,grad_norm: 0.931602447499189, iteration: 278099
loss: 0.9684447050094604,grad_norm: 0.9543718446201588, iteration: 278100
loss: 1.0454833507537842,grad_norm: 0.7310345087466543, iteration: 278101
loss: 1.0091793537139893,grad_norm: 0.812982603700656, iteration: 278102
loss: 0.9743536114692688,grad_norm: 0.9999990302547109, iteration: 278103
loss: 1.0040535926818848,grad_norm: 0.8058265855598701, iteration: 278104
loss: 1.0447654724121094,grad_norm: 0.9999995112055604, iteration: 278105
loss: 0.9946379661560059,grad_norm: 0.9833350263873583, iteration: 278106
loss: 1.1332180500030518,grad_norm: 0.9858234062777761, iteration: 278107
loss: 0.9990139603614807,grad_norm: 0.8565713820332892, iteration: 278108
loss: 0.9914090037345886,grad_norm: 0.929327485546173, iteration: 278109
loss: 0.9752649068832397,grad_norm: 0.8192687956254486, iteration: 278110
loss: 1.0057357549667358,grad_norm: 0.9679901765118724, iteration: 278111
loss: 1.0274142026901245,grad_norm: 0.9165967635452846, iteration: 278112
loss: 0.9919036626815796,grad_norm: 0.9017283035052134, iteration: 278113
loss: 1.0273828506469727,grad_norm: 0.9272715948755845, iteration: 278114
loss: 1.0115453004837036,grad_norm: 0.7722148021036698, iteration: 278115
loss: 1.0084272623062134,grad_norm: 0.8363407989552991, iteration: 278116
loss: 1.0106452703475952,grad_norm: 0.7926125336466924, iteration: 278117
loss: 1.0349417924880981,grad_norm: 0.8953134887791561, iteration: 278118
loss: 1.0015465021133423,grad_norm: 0.8036965221718081, iteration: 278119
loss: 0.981622576713562,grad_norm: 0.9999991282404838, iteration: 278120
loss: 0.9960799813270569,grad_norm: 0.9999994771714108, iteration: 278121
loss: 1.0254199504852295,grad_norm: 0.9263976520710451, iteration: 278122
loss: 0.9855715036392212,grad_norm: 0.9664558775036504, iteration: 278123
loss: 0.9906999468803406,grad_norm: 0.8637161791513693, iteration: 278124
loss: 0.9886972904205322,grad_norm: 0.7870593759077292, iteration: 278125
loss: 1.0048073530197144,grad_norm: 0.8294146964914032, iteration: 278126
loss: 1.0342473983764648,grad_norm: 0.909177637752047, iteration: 278127
loss: 1.0061944723129272,grad_norm: 0.9688058128994509, iteration: 278128
loss: 0.9929594397544861,grad_norm: 0.709253836775766, iteration: 278129
loss: 1.082719326019287,grad_norm: 0.9999991013061118, iteration: 278130
loss: 0.9733974933624268,grad_norm: 0.9999989024555955, iteration: 278131
loss: 0.9961246252059937,grad_norm: 0.9534175875860647, iteration: 278132
loss: 0.9987313747406006,grad_norm: 0.8563228880457862, iteration: 278133
loss: 1.0061074495315552,grad_norm: 0.950373254745349, iteration: 278134
loss: 0.9804680943489075,grad_norm: 0.8909426975797238, iteration: 278135
loss: 0.9926083087921143,grad_norm: 0.9999999240147701, iteration: 278136
loss: 0.9876611828804016,grad_norm: 0.802566789534107, iteration: 278137
loss: 0.9778743386268616,grad_norm: 0.7866390976670975, iteration: 278138
loss: 1.0146117210388184,grad_norm: 0.9015871208807816, iteration: 278139
loss: 0.9499434232711792,grad_norm: 0.9062336229049686, iteration: 278140
loss: 1.0204224586486816,grad_norm: 0.9380346140502166, iteration: 278141
loss: 0.977352499961853,grad_norm: 0.8732569621973159, iteration: 278142
loss: 1.0512466430664062,grad_norm: 0.904503016252442, iteration: 278143
loss: 1.0156327486038208,grad_norm: 0.8946078266649483, iteration: 278144
loss: 0.9732915759086609,grad_norm: 0.7372294213655285, iteration: 278145
loss: 0.982864499092102,grad_norm: 0.8959675921520124, iteration: 278146
loss: 0.9782235622406006,grad_norm: 0.9493110517656566, iteration: 278147
loss: 1.0251485109329224,grad_norm: 0.8725276221938053, iteration: 278148
loss: 1.0235565900802612,grad_norm: 0.901025268992618, iteration: 278149
loss: 1.0554239749908447,grad_norm: 0.9994225969694551, iteration: 278150
loss: 1.010207176208496,grad_norm: 0.9223276061765747, iteration: 278151
loss: 1.0128885507583618,grad_norm: 0.9001819194754718, iteration: 278152
loss: 1.0009218454360962,grad_norm: 0.9747537496916269, iteration: 278153
loss: 1.0027117729187012,grad_norm: 0.9252111948375451, iteration: 278154
loss: 0.9915648698806763,grad_norm: 0.8987757328580837, iteration: 278155
loss: 1.022834062576294,grad_norm: 0.9862343703959277, iteration: 278156
loss: 1.0179082155227661,grad_norm: 0.7475388727951178, iteration: 278157
loss: 0.9538057446479797,grad_norm: 0.9805394718253846, iteration: 278158
loss: 1.024037480354309,grad_norm: 0.9999998119997748, iteration: 278159
loss: 0.9854809045791626,grad_norm: 0.9660582066147827, iteration: 278160
loss: 0.995401918888092,grad_norm: 0.7125243592270997, iteration: 278161
loss: 0.9906793236732483,grad_norm: 0.9551014702992676, iteration: 278162
loss: 1.04704749584198,grad_norm: 0.999999059063398, iteration: 278163
loss: 0.9984495043754578,grad_norm: 0.7416606283786562, iteration: 278164
loss: 1.036237120628357,grad_norm: 0.7241052038933166, iteration: 278165
loss: 1.0638877153396606,grad_norm: 0.9999994951667238, iteration: 278166
loss: 0.9685250520706177,grad_norm: 0.8870279618061403, iteration: 278167
loss: 0.9828759431838989,grad_norm: 0.940189837687912, iteration: 278168
loss: 1.0087759494781494,grad_norm: 0.8900301431132082, iteration: 278169
loss: 1.002813458442688,grad_norm: 0.954370875955892, iteration: 278170
loss: 0.9925857782363892,grad_norm: 0.7498538951165737, iteration: 278171
loss: 1.0116764307022095,grad_norm: 0.833448454839229, iteration: 278172
loss: 0.9766086339950562,grad_norm: 0.7546609089782991, iteration: 278173
loss: 1.0167951583862305,grad_norm: 0.990277335519525, iteration: 278174
loss: 0.9862627387046814,grad_norm: 0.9510880359051508, iteration: 278175
loss: 1.0285512208938599,grad_norm: 0.9035084071749305, iteration: 278176
loss: 1.0344349145889282,grad_norm: 0.9999991263192259, iteration: 278177
loss: 0.9880384802818298,grad_norm: 0.9999991339826431, iteration: 278178
loss: 0.9725041389465332,grad_norm: 0.8941324869925602, iteration: 278179
loss: 0.9761378169059753,grad_norm: 0.8984275782116138, iteration: 278180
loss: 1.0118571519851685,grad_norm: 0.9914024509865804, iteration: 278181
loss: 1.0103278160095215,grad_norm: 0.9862748243729496, iteration: 278182
loss: 1.0020959377288818,grad_norm: 0.7834919564842594, iteration: 278183
loss: 0.9456513524055481,grad_norm: 0.8252640653632665, iteration: 278184
loss: 0.9924622774124146,grad_norm: 0.9999992421601214, iteration: 278185
loss: 1.0041762590408325,grad_norm: 0.9120838134143666, iteration: 278186
loss: 0.9970954060554504,grad_norm: 0.999999301608996, iteration: 278187
loss: 1.023348331451416,grad_norm: 0.8423690640831968, iteration: 278188
loss: 0.987511932849884,grad_norm: 0.918817522927413, iteration: 278189
loss: 0.9716814756393433,grad_norm: 0.8844681026569239, iteration: 278190
loss: 1.0001140832901,grad_norm: 0.7591876997907973, iteration: 278191
loss: 1.0071656703948975,grad_norm: 0.9999991131988717, iteration: 278192
loss: 0.9520870447158813,grad_norm: 0.8881783558292479, iteration: 278193
loss: 0.9918946623802185,grad_norm: 0.9999999307568235, iteration: 278194
loss: 0.9827213883399963,grad_norm: 0.8693701477092257, iteration: 278195
loss: 1.0165385007858276,grad_norm: 0.9392979908066842, iteration: 278196
loss: 0.9878283739089966,grad_norm: 0.8253676878322652, iteration: 278197
loss: 0.9825212955474854,grad_norm: 0.9012447267292619, iteration: 278198
loss: 1.007546067237854,grad_norm: 0.7924061810130291, iteration: 278199
loss: 0.9545990228652954,grad_norm: 0.9999991468214828, iteration: 278200
loss: 0.9599750638008118,grad_norm: 0.8531625719284802, iteration: 278201
loss: 0.961802065372467,grad_norm: 0.8187582818400161, iteration: 278202
loss: 0.9664963483810425,grad_norm: 0.9046024467119577, iteration: 278203
loss: 1.0269817113876343,grad_norm: 0.7752759182897051, iteration: 278204
loss: 1.0125293731689453,grad_norm: 0.876558862166317, iteration: 278205
loss: 0.9976356029510498,grad_norm: 0.8942865768770372, iteration: 278206
loss: 0.9407403469085693,grad_norm: 0.8102533777370605, iteration: 278207
loss: 1.0068588256835938,grad_norm: 0.9361873938871267, iteration: 278208
loss: 1.0236295461654663,grad_norm: 0.9698624899699468, iteration: 278209
loss: 0.9984296560287476,grad_norm: 0.88518986824196, iteration: 278210
loss: 1.0422707796096802,grad_norm: 0.999999774941466, iteration: 278211
loss: 1.001912236213684,grad_norm: 0.7745734348703119, iteration: 278212
loss: 1.0237013101577759,grad_norm: 0.9328243543752004, iteration: 278213
loss: 1.0108301639556885,grad_norm: 0.8052760234879683, iteration: 278214
loss: 1.0243350267410278,grad_norm: 0.9999997612073831, iteration: 278215
loss: 1.0223908424377441,grad_norm: 0.9999997010854957, iteration: 278216
loss: 1.0290700197219849,grad_norm: 0.9999990348370115, iteration: 278217
loss: 0.9707961678504944,grad_norm: 0.858247338285501, iteration: 278218
loss: 0.9982430338859558,grad_norm: 0.8864431054485565, iteration: 278219
loss: 1.0168882608413696,grad_norm: 0.9999991836072152, iteration: 278220
loss: 1.0269168615341187,grad_norm: 0.9999996683324591, iteration: 278221
loss: 0.9799111485481262,grad_norm: 0.8527087842119743, iteration: 278222
loss: 1.0261751413345337,grad_norm: 0.8278378256919914, iteration: 278223
loss: 0.9805183410644531,grad_norm: 0.8871023066050386, iteration: 278224
loss: 0.9641357660293579,grad_norm: 0.9040516084039825, iteration: 278225
loss: 1.0317652225494385,grad_norm: 0.8104643588055893, iteration: 278226
loss: 0.9893895387649536,grad_norm: 0.8775854362406935, iteration: 278227
loss: 1.01803457736969,grad_norm: 0.9999992233114725, iteration: 278228
loss: 1.0236725807189941,grad_norm: 0.9310113577872156, iteration: 278229
loss: 1.0044512748718262,grad_norm: 0.9137449772721291, iteration: 278230
loss: 1.0651441812515259,grad_norm: 0.8726988712766841, iteration: 278231
loss: 1.0039682388305664,grad_norm: 0.8789461493010343, iteration: 278232
loss: 0.9505451917648315,grad_norm: 0.8637937167043559, iteration: 278233
loss: 0.9925627708435059,grad_norm: 0.808328696953914, iteration: 278234
loss: 1.077080488204956,grad_norm: 0.999999181113479, iteration: 278235
loss: 1.0102638006210327,grad_norm: 0.8276749783064445, iteration: 278236
loss: 1.010379672050476,grad_norm: 0.8962550059935164, iteration: 278237
loss: 1.0383440256118774,grad_norm: 0.8986521187906839, iteration: 278238
loss: 0.9791333079338074,grad_norm: 0.9016639674121738, iteration: 278239
loss: 1.2014164924621582,grad_norm: 0.9999997579393732, iteration: 278240
loss: 1.0402753353118896,grad_norm: 0.9999996507033514, iteration: 278241
loss: 1.020472764968872,grad_norm: 0.7775634008423276, iteration: 278242
loss: 1.0083123445510864,grad_norm: 0.7810070438633058, iteration: 278243
loss: 0.9821458458900452,grad_norm: 0.833919661798834, iteration: 278244
loss: 1.078739881515503,grad_norm: 0.9003349122706389, iteration: 278245
loss: 1.0115529298782349,grad_norm: 0.9999990839167432, iteration: 278246
loss: 0.9837804436683655,grad_norm: 0.9999998653503724, iteration: 278247
loss: 1.0240261554718018,grad_norm: 0.7429677042765618, iteration: 278248
loss: 1.0604937076568604,grad_norm: 0.8253200092857301, iteration: 278249
loss: 1.0014050006866455,grad_norm: 0.995189500126053, iteration: 278250
loss: 0.9862215518951416,grad_norm: 0.9272551459168203, iteration: 278251
loss: 1.0002691745758057,grad_norm: 0.9093943046608582, iteration: 278252
loss: 1.0163770914077759,grad_norm: 0.8748281351937819, iteration: 278253
loss: 0.987444281578064,grad_norm: 0.9574485491432774, iteration: 278254
loss: 1.0152883529663086,grad_norm: 0.8615994405426501, iteration: 278255
loss: 1.0358210802078247,grad_norm: 0.9999991135919194, iteration: 278256
loss: 0.987779438495636,grad_norm: 0.8459534642585119, iteration: 278257
loss: 1.0060467720031738,grad_norm: 0.7054362039464178, iteration: 278258
loss: 1.001264214515686,grad_norm: 0.9999989323873522, iteration: 278259
loss: 0.9980652928352356,grad_norm: 0.8197665483367905, iteration: 278260
loss: 0.9760564565658569,grad_norm: 0.77516522967348, iteration: 278261
loss: 1.0809500217437744,grad_norm: 0.7623703169232728, iteration: 278262
loss: 0.9913081526756287,grad_norm: 0.8570169211987242, iteration: 278263
loss: 0.9963080883026123,grad_norm: 0.9360938201145849, iteration: 278264
loss: 0.9884458780288696,grad_norm: 0.7316711992659363, iteration: 278265
loss: 0.9606037139892578,grad_norm: 0.7852517351518453, iteration: 278266
loss: 0.9897273778915405,grad_norm: 0.9355881244887131, iteration: 278267
loss: 0.9893457293510437,grad_norm: 0.7837001685421039, iteration: 278268
loss: 1.0666377544403076,grad_norm: 0.9999997301987261, iteration: 278269
loss: 1.048715591430664,grad_norm: 1.0000000567786715, iteration: 278270
loss: 0.986659824848175,grad_norm: 0.6809645832394645, iteration: 278271
loss: 0.9734649658203125,grad_norm: 0.9999992964631559, iteration: 278272
loss: 0.996245265007019,grad_norm: 0.9999991288078331, iteration: 278273
loss: 1.0928843021392822,grad_norm: 0.9999990495239286, iteration: 278274
loss: 1.0058189630508423,grad_norm: 0.7651227842873606, iteration: 278275
loss: 1.0717616081237793,grad_norm: 0.9999990349904984, iteration: 278276
loss: 1.0882210731506348,grad_norm: 0.9999999794172714, iteration: 278277
loss: 1.0328803062438965,grad_norm: 0.9152438752012821, iteration: 278278
loss: 1.0136936902999878,grad_norm: 0.9999990213937752, iteration: 278279
loss: 0.9738137722015381,grad_norm: 0.8803485804306959, iteration: 278280
loss: 0.9938521981239319,grad_norm: 0.7953022593324062, iteration: 278281
loss: 0.9692445993423462,grad_norm: 0.9083918722530819, iteration: 278282
loss: 0.9974004030227661,grad_norm: 0.7659675289464022, iteration: 278283
loss: 1.084218144416809,grad_norm: 0.8959603383850824, iteration: 278284
loss: 1.088340163230896,grad_norm: 0.9400673372791327, iteration: 278285
loss: 1.0123817920684814,grad_norm: 0.9467244271523123, iteration: 278286
loss: 1.0111130475997925,grad_norm: 0.8403024359994735, iteration: 278287
loss: 1.0539571046829224,grad_norm: 0.9154403278733676, iteration: 278288
loss: 1.0720428228378296,grad_norm: 0.8862852906840484, iteration: 278289
loss: 1.0102511644363403,grad_norm: 0.9999990447090731, iteration: 278290
loss: 1.0242352485656738,grad_norm: 0.8257421761250984, iteration: 278291
loss: 0.9994135499000549,grad_norm: 0.8942119704498245, iteration: 278292
loss: 1.017707347869873,grad_norm: 0.7170420679439112, iteration: 278293
loss: 0.9823458790779114,grad_norm: 0.855039763352756, iteration: 278294
loss: 0.9828951358795166,grad_norm: 0.7311588522537726, iteration: 278295
loss: 0.9818593859672546,grad_norm: 0.8357019111670645, iteration: 278296
loss: 1.0215929746627808,grad_norm: 0.9327371360679606, iteration: 278297
loss: 1.0531052350997925,grad_norm: 0.9999992674887237, iteration: 278298
loss: 1.0130723714828491,grad_norm: 0.9765060997568167, iteration: 278299
loss: 0.9894954562187195,grad_norm: 0.9999991303425783, iteration: 278300
loss: 1.0228465795516968,grad_norm: 0.9313180500492059, iteration: 278301
loss: 1.0037705898284912,grad_norm: 0.7227819533421629, iteration: 278302
loss: 1.0343741178512573,grad_norm: 0.8426424628122451, iteration: 278303
loss: 0.9864720702171326,grad_norm: 0.999999384609202, iteration: 278304
loss: 1.0053973197937012,grad_norm: 0.9700840834745469, iteration: 278305
loss: 1.0179394483566284,grad_norm: 0.8253065229156595, iteration: 278306
loss: 1.0000085830688477,grad_norm: 0.9999991648069176, iteration: 278307
loss: 1.0295568704605103,grad_norm: 0.7770614165740577, iteration: 278308
loss: 1.0269168615341187,grad_norm: 0.9056688416575537, iteration: 278309
loss: 0.978126585483551,grad_norm: 0.9975360828738877, iteration: 278310
loss: 1.0120769739151,grad_norm: 0.7412957165135606, iteration: 278311
loss: 1.0204687118530273,grad_norm: 0.9999990327382977, iteration: 278312
loss: 0.9814255237579346,grad_norm: 0.7926019334817446, iteration: 278313
loss: 0.9692237973213196,grad_norm: 0.7873564671690755, iteration: 278314
loss: 0.9992708563804626,grad_norm: 0.8055724900748015, iteration: 278315
loss: 0.9541051983833313,grad_norm: 0.9119691012095485, iteration: 278316
loss: 0.982563316822052,grad_norm: 0.9473975360339913, iteration: 278317
loss: 0.9843539595603943,grad_norm: 0.8786216382199875, iteration: 278318
loss: 0.9992760419845581,grad_norm: 0.865164680746893, iteration: 278319
loss: 0.972800612449646,grad_norm: 0.7972812864366607, iteration: 278320
loss: 1.0102423429489136,grad_norm: 0.7533214873247037, iteration: 278321
loss: 0.9724259376525879,grad_norm: 0.8242463822519646, iteration: 278322
loss: 0.9891518354415894,grad_norm: 0.7886749447116163, iteration: 278323
loss: 0.9655567407608032,grad_norm: 0.8576580612493091, iteration: 278324
loss: 1.0222550630569458,grad_norm: 0.7429344956404977, iteration: 278325
loss: 1.0495636463165283,grad_norm: 0.8827110409781457, iteration: 278326
loss: 1.1141544580459595,grad_norm: 0.9999992718096604, iteration: 278327
loss: 0.9676425457000732,grad_norm: 0.9283192932720814, iteration: 278328
loss: 0.9592553973197937,grad_norm: 0.9778562730044836, iteration: 278329
loss: 1.026013731956482,grad_norm: 0.857653868679812, iteration: 278330
loss: 0.9951028823852539,grad_norm: 0.8544800940491318, iteration: 278331
loss: 1.022265911102295,grad_norm: 0.9999990690826752, iteration: 278332
loss: 0.9943244457244873,grad_norm: 0.7343191868879441, iteration: 278333
loss: 1.0455682277679443,grad_norm: 0.8511683672360241, iteration: 278334
loss: 1.0143183469772339,grad_norm: 0.9520709886429012, iteration: 278335
loss: 0.9962899684906006,grad_norm: 0.9726016762876025, iteration: 278336
loss: 1.029159426689148,grad_norm: 0.7737186161241001, iteration: 278337
loss: 0.9778926372528076,grad_norm: 0.9999990584764102, iteration: 278338
loss: 0.9932326674461365,grad_norm: 0.7895847150642231, iteration: 278339
loss: 0.992887020111084,grad_norm: 0.7398222701621576, iteration: 278340
loss: 1.0107591152191162,grad_norm: 0.9999992483572537, iteration: 278341
loss: 1.0108590126037598,grad_norm: 0.8272719477080424, iteration: 278342
loss: 1.0093148946762085,grad_norm: 0.9381959375274274, iteration: 278343
loss: 0.982644259929657,grad_norm: 0.9999990126211301, iteration: 278344
loss: 1.030954122543335,grad_norm: 0.7918580455034264, iteration: 278345
loss: 1.1008728742599487,grad_norm: 0.9999993044897026, iteration: 278346
loss: 1.007513403892517,grad_norm: 0.9999991254889711, iteration: 278347
loss: 0.9576414227485657,grad_norm: 0.7670668137445475, iteration: 278348
loss: 0.9906755685806274,grad_norm: 0.9451612283395531, iteration: 278349
loss: 0.9949888586997986,grad_norm: 0.9904025962333717, iteration: 278350
loss: 1.0090913772583008,grad_norm: 0.8251371675003911, iteration: 278351
loss: 0.9622615575790405,grad_norm: 0.8154748191610665, iteration: 278352
loss: 1.0020241737365723,grad_norm: 0.9953169027811194, iteration: 278353
loss: 0.9834592342376709,grad_norm: 0.8840380613646305, iteration: 278354
loss: 0.963402271270752,grad_norm: 0.7626272642855336, iteration: 278355
loss: 1.0798877477645874,grad_norm: 0.869788445303102, iteration: 278356
loss: 0.997424840927124,grad_norm: 0.6855029733837146, iteration: 278357
loss: 1.025449275970459,grad_norm: 0.9999991259098969, iteration: 278358
loss: 0.9565555453300476,grad_norm: 0.7727051749480663, iteration: 278359
loss: 1.0793216228485107,grad_norm: 0.9999991711354612, iteration: 278360
loss: 1.0043022632598877,grad_norm: 0.8427398929042611, iteration: 278361
loss: 1.043860912322998,grad_norm: 0.8520093435174904, iteration: 278362
loss: 1.16922128200531,grad_norm: 0.9999996063367023, iteration: 278363
loss: 0.9675639867782593,grad_norm: 0.9244243167018921, iteration: 278364
loss: 1.0123552083969116,grad_norm: 0.909419393754874, iteration: 278365
loss: 0.9514110684394836,grad_norm: 0.8717155712372585, iteration: 278366
loss: 1.0059040784835815,grad_norm: 0.8438652399882283, iteration: 278367
loss: 1.009087324142456,grad_norm: 0.9790300798290813, iteration: 278368
loss: 1.0122424364089966,grad_norm: 0.7649747879985738, iteration: 278369
loss: 0.9940791726112366,grad_norm: 0.9999991379193564, iteration: 278370
loss: 1.0032391548156738,grad_norm: 0.9080760064073216, iteration: 278371
loss: 0.9933138489723206,grad_norm: 0.849113549612092, iteration: 278372
loss: 0.9697844982147217,grad_norm: 0.8903176672613298, iteration: 278373
loss: 1.0285418033599854,grad_norm: 0.8179541599158782, iteration: 278374
loss: 0.9854969382286072,grad_norm: 0.9676851630536261, iteration: 278375
loss: 1.0666160583496094,grad_norm: 0.9999991101610817, iteration: 278376
loss: 1.0031272172927856,grad_norm: 0.9123277921746752, iteration: 278377
loss: 1.0099821090698242,grad_norm: 0.8389503374753894, iteration: 278378
loss: 0.9967897534370422,grad_norm: 0.9010264577138422, iteration: 278379
loss: 1.1283758878707886,grad_norm: 0.9999990731997165, iteration: 278380
loss: 0.9966837167739868,grad_norm: 0.9999990547348999, iteration: 278381
loss: 1.0179035663604736,grad_norm: 0.9999991193336852, iteration: 278382
loss: 0.9707272052764893,grad_norm: 0.7629753101522644, iteration: 278383
loss: 1.0183783769607544,grad_norm: 0.9999996997095661, iteration: 278384
loss: 1.010538101196289,grad_norm: 0.9128403590932709, iteration: 278385
loss: 1.0490658283233643,grad_norm: 0.9786913668299249, iteration: 278386
loss: 1.0151546001434326,grad_norm: 0.8548664621176039, iteration: 278387
loss: 1.0524446964263916,grad_norm: 0.9999990617271671, iteration: 278388
loss: 1.0162831544876099,grad_norm: 0.8087698901581012, iteration: 278389
loss: 0.976771354675293,grad_norm: 0.9999991219844447, iteration: 278390
loss: 0.9924302697181702,grad_norm: 0.7049401745599825, iteration: 278391
loss: 0.9835949540138245,grad_norm: 0.914790430441375, iteration: 278392
loss: 0.9645020365715027,grad_norm: 0.9386141612939236, iteration: 278393
loss: 0.9690696597099304,grad_norm: 0.8820712565138424, iteration: 278394
loss: 0.9827409386634827,grad_norm: 0.9629901360294234, iteration: 278395
loss: 0.9782285094261169,grad_norm: 0.999999225445314, iteration: 278396
loss: 0.9966206550598145,grad_norm: 0.7888289994424785, iteration: 278397
loss: 0.9836766123771667,grad_norm: 0.999998904436569, iteration: 278398
loss: 1.0004125833511353,grad_norm: 0.7939283208599058, iteration: 278399
loss: 1.0764973163604736,grad_norm: 0.9999999461544207, iteration: 278400
loss: 0.9886787533760071,grad_norm: 0.9759497987118272, iteration: 278401
loss: 0.947498619556427,grad_norm: 0.9650062577365561, iteration: 278402
loss: 0.9681008458137512,grad_norm: 0.9999991730507776, iteration: 278403
loss: 0.9503054022789001,grad_norm: 0.9306305703254858, iteration: 278404
loss: 0.9862408638000488,grad_norm: 0.9223693651324996, iteration: 278405
loss: 1.075268030166626,grad_norm: 0.9999989870054097, iteration: 278406
loss: 1.1034773588180542,grad_norm: 0.9999993098755854, iteration: 278407
loss: 1.0082217454910278,grad_norm: 0.8845629504647092, iteration: 278408
loss: 0.9869993329048157,grad_norm: 0.9320649607147248, iteration: 278409
loss: 1.022110104560852,grad_norm: 0.908304179042511, iteration: 278410
loss: 0.99913489818573,grad_norm: 0.7599017371720298, iteration: 278411
loss: 1.0223217010498047,grad_norm: 0.9999996546782643, iteration: 278412
loss: 1.0072513818740845,grad_norm: 0.999999264840646, iteration: 278413
loss: 1.0206314325332642,grad_norm: 0.9999991619160398, iteration: 278414
loss: 0.9924447536468506,grad_norm: 0.7740893093003185, iteration: 278415
loss: 1.0443795919418335,grad_norm: 0.8267677950107272, iteration: 278416
loss: 0.9967593550682068,grad_norm: 0.8633748666200984, iteration: 278417
loss: 1.0020390748977661,grad_norm: 0.9668538587067408, iteration: 278418
loss: 0.9976789355278015,grad_norm: 0.7677200353119389, iteration: 278419
loss: 1.0012115240097046,grad_norm: 0.8252027694097316, iteration: 278420
loss: 0.9826736450195312,grad_norm: 0.8099339066455363, iteration: 278421
loss: 0.9948826432228088,grad_norm: 0.7846410169664244, iteration: 278422
loss: 0.9982554316520691,grad_norm: 0.7837284318260872, iteration: 278423
loss: 1.0031688213348389,grad_norm: 0.829064195198156, iteration: 278424
loss: 0.9994016289710999,grad_norm: 0.8593391639502715, iteration: 278425
loss: 0.9490178823471069,grad_norm: 0.9999994597398943, iteration: 278426
loss: 0.9705857038497925,grad_norm: 0.9016351008188946, iteration: 278427
loss: 1.0278654098510742,grad_norm: 0.8616214698119373, iteration: 278428
loss: 1.1126266717910767,grad_norm: 0.9999998898567908, iteration: 278429
loss: 1.1939995288848877,grad_norm: 0.9999999298933736, iteration: 278430
loss: 1.0607290267944336,grad_norm: 0.9999991663737603, iteration: 278431
loss: 1.0377140045166016,grad_norm: 0.9899236711722192, iteration: 278432
loss: 1.0283703804016113,grad_norm: 0.9999991378669085, iteration: 278433
loss: 1.0015788078308105,grad_norm: 0.8975935295980503, iteration: 278434
loss: 0.9879632592201233,grad_norm: 0.8182212968695739, iteration: 278435
loss: 0.97409987449646,grad_norm: 0.9737489981897677, iteration: 278436
loss: 0.9507970809936523,grad_norm: 0.9999991476460854, iteration: 278437
loss: 0.9821000099182129,grad_norm: 0.9685854895238966, iteration: 278438
loss: 0.9962118268013,grad_norm: 0.9999993011944417, iteration: 278439
loss: 0.976864755153656,grad_norm: 0.9999992879976468, iteration: 278440
loss: 0.9666520953178406,grad_norm: 0.9204297842800528, iteration: 278441
loss: 1.0057740211486816,grad_norm: 0.9794190104183471, iteration: 278442
loss: 0.9736315011978149,grad_norm: 0.999999774169814, iteration: 278443
loss: 1.0140376091003418,grad_norm: 0.9093199403857116, iteration: 278444
loss: 1.1059967279434204,grad_norm: 0.8656209276905834, iteration: 278445
loss: 0.973063588142395,grad_norm: 0.8590214752379749, iteration: 278446
loss: 1.0547077655792236,grad_norm: 0.9999991235362804, iteration: 278447
loss: 1.029313564300537,grad_norm: 0.8091094944185744, iteration: 278448
loss: 0.9977731704711914,grad_norm: 0.9293325778776365, iteration: 278449
loss: 1.0346848964691162,grad_norm: 0.7959666171423418, iteration: 278450
loss: 1.0456843376159668,grad_norm: 0.9383373368342711, iteration: 278451
loss: 0.9961399435997009,grad_norm: 0.9580379943768339, iteration: 278452
loss: 1.0533792972564697,grad_norm: 0.9999992670807009, iteration: 278453
loss: 0.9486175179481506,grad_norm: 0.9797358379169827, iteration: 278454
loss: 1.044409990310669,grad_norm: 0.9604318739770565, iteration: 278455
loss: 1.1299585103988647,grad_norm: 0.8678409151523224, iteration: 278456
loss: 0.9791221022605896,grad_norm: 0.7304307969925361, iteration: 278457
loss: 0.9552910923957825,grad_norm: 0.793818348101761, iteration: 278458
loss: 0.9871695637702942,grad_norm: 0.8079661517315757, iteration: 278459
loss: 0.9602458477020264,grad_norm: 0.8242471145681523, iteration: 278460
loss: 0.993518590927124,grad_norm: 0.8328939307735447, iteration: 278461
loss: 0.9876677393913269,grad_norm: 0.9036841100206183, iteration: 278462
loss: 1.1028138399124146,grad_norm: 0.9999991630279048, iteration: 278463
loss: 0.9821796417236328,grad_norm: 0.988844620064075, iteration: 278464
loss: 1.0038238763809204,grad_norm: 0.9147231315870938, iteration: 278465
loss: 0.9910512566566467,grad_norm: 0.9999992781478628, iteration: 278466
loss: 0.9949489831924438,grad_norm: 0.8257883212828827, iteration: 278467
loss: 1.0199663639068604,grad_norm: 0.9999990811510194, iteration: 278468
loss: 1.0133824348449707,grad_norm: 0.9999992303770943, iteration: 278469
loss: 1.0171833038330078,grad_norm: 0.9266521648485633, iteration: 278470
loss: 1.0218533277511597,grad_norm: 0.7151677668091229, iteration: 278471
loss: 0.9734492897987366,grad_norm: 0.9641258254185344, iteration: 278472
loss: 1.0377614498138428,grad_norm: 0.9999991914513955, iteration: 278473
loss: 1.012776255607605,grad_norm: 0.950417477243448, iteration: 278474
loss: 1.023891568183899,grad_norm: 0.9999990241306554, iteration: 278475
loss: 0.9852358102798462,grad_norm: 0.8564846139352701, iteration: 278476
loss: 1.0056403875350952,grad_norm: 0.8143974324088703, iteration: 278477
loss: 1.007913589477539,grad_norm: 0.8279995321250576, iteration: 278478
loss: 1.0740479230880737,grad_norm: 0.9132068796596823, iteration: 278479
loss: 1.162703275680542,grad_norm: 0.9999997356537694, iteration: 278480
loss: 1.0472544431686401,grad_norm: 0.978646774214861, iteration: 278481
loss: 0.954356849193573,grad_norm: 0.9860433354989937, iteration: 278482
loss: 1.0130306482315063,grad_norm: 0.8872001895265815, iteration: 278483
loss: 1.0231729745864868,grad_norm: 0.9999991628313062, iteration: 278484
loss: 1.0098525285720825,grad_norm: 0.9307564980413398, iteration: 278485
loss: 1.1710649728775024,grad_norm: 0.9999995246062495, iteration: 278486
loss: 1.0243611335754395,grad_norm: 0.8530444401643893, iteration: 278487
loss: 0.9831923842430115,grad_norm: 0.8037325409203617, iteration: 278488
loss: 1.0021175146102905,grad_norm: 0.9999990842046942, iteration: 278489
loss: 1.0184673070907593,grad_norm: 0.9529617201739705, iteration: 278490
loss: 1.026163101196289,grad_norm: 0.8205228270770122, iteration: 278491
loss: 1.0276551246643066,grad_norm: 0.9163718664920419, iteration: 278492
loss: 1.0069551467895508,grad_norm: 0.8989597511107388, iteration: 278493
loss: 1.0049372911453247,grad_norm: 0.978513401983518, iteration: 278494
loss: 1.0807725191116333,grad_norm: 0.9999991820192264, iteration: 278495
loss: 0.9995054006576538,grad_norm: 0.8623157347131624, iteration: 278496
loss: 0.9341403841972351,grad_norm: 0.9712688749850166, iteration: 278497
loss: 1.0411173105239868,grad_norm: 0.9999991586590521, iteration: 278498
loss: 1.0233373641967773,grad_norm: 0.8503049902974247, iteration: 278499
loss: 0.9985682368278503,grad_norm: 0.7809044438803247, iteration: 278500
loss: 1.00478994846344,grad_norm: 0.7491751061396908, iteration: 278501
loss: 1.0141650438308716,grad_norm: 0.9999989785886607, iteration: 278502
loss: 0.9607846736907959,grad_norm: 0.8029771650973392, iteration: 278503
loss: 1.0174494981765747,grad_norm: 0.7596657680141369, iteration: 278504
loss: 1.0262595415115356,grad_norm: 0.9703465168564636, iteration: 278505
loss: 0.9618895649909973,grad_norm: 0.6927998842698346, iteration: 278506
loss: 0.9686666131019592,grad_norm: 0.9197179915657414, iteration: 278507
loss: 0.9860778450965881,grad_norm: 0.9634296923674421, iteration: 278508
loss: 1.0562173128128052,grad_norm: 0.999999171457177, iteration: 278509
loss: 1.0044344663619995,grad_norm: 0.8140445017009996, iteration: 278510
loss: 0.9843353629112244,grad_norm: 0.9548498383562459, iteration: 278511
loss: 1.0580216646194458,grad_norm: 0.9999991450638088, iteration: 278512
loss: 0.9792788624763489,grad_norm: 0.8887628654267358, iteration: 278513
loss: 0.9910441040992737,grad_norm: 0.9186814529776095, iteration: 278514
loss: 0.939996600151062,grad_norm: 0.9999990324428053, iteration: 278515
loss: 0.9880285859107971,grad_norm: 0.9520559016405772, iteration: 278516
loss: 0.9890226125717163,grad_norm: 0.9975969498008275, iteration: 278517
loss: 1.0660046339035034,grad_norm: 0.9999992642516483, iteration: 278518
loss: 0.9576984643936157,grad_norm: 0.8044347760244343, iteration: 278519
loss: 0.9608734250068665,grad_norm: 0.7103500969841744, iteration: 278520
loss: 1.0301306247711182,grad_norm: 0.8561143083154874, iteration: 278521
loss: 1.0005065202713013,grad_norm: 0.8385629679435418, iteration: 278522
loss: 0.9938037395477295,grad_norm: 0.9999990523866895, iteration: 278523
loss: 0.9809544086456299,grad_norm: 0.8681159475631289, iteration: 278524
loss: 0.993662416934967,grad_norm: 0.9999989209710727, iteration: 278525
loss: 1.01816987991333,grad_norm: 0.999999221301204, iteration: 278526
loss: 0.981170117855072,grad_norm: 0.9999992908178795, iteration: 278527
loss: 1.066442847251892,grad_norm: 0.9999992147934987, iteration: 278528
loss: 1.0269134044647217,grad_norm: 0.7816479332379518, iteration: 278529
loss: 1.0475869178771973,grad_norm: 0.9999990368615362, iteration: 278530
loss: 0.9800618290901184,grad_norm: 0.7956043091284819, iteration: 278531
loss: 0.9713138937950134,grad_norm: 0.871314578283009, iteration: 278532
loss: 0.9906184673309326,grad_norm: 0.9999989180192618, iteration: 278533
loss: 0.996146559715271,grad_norm: 0.8838386400909816, iteration: 278534
loss: 0.9898178577423096,grad_norm: 0.9999998114393353, iteration: 278535
loss: 1.011436104774475,grad_norm: 0.8087660970864167, iteration: 278536
loss: 0.9909800887107849,grad_norm: 0.7549598926520997, iteration: 278537
loss: 0.9898596405982971,grad_norm: 0.848589980650942, iteration: 278538
loss: 0.9930779337882996,grad_norm: 0.6723498627314416, iteration: 278539
loss: 1.0147302150726318,grad_norm: 0.8782041878629904, iteration: 278540
loss: 1.0270001888275146,grad_norm: 0.9033370369555377, iteration: 278541
loss: 0.9819797873497009,grad_norm: 0.9948523683593229, iteration: 278542
loss: 0.9958909153938293,grad_norm: 0.8950532794560437, iteration: 278543
loss: 0.9938966035842896,grad_norm: 0.918816215884357, iteration: 278544
loss: 0.9967116117477417,grad_norm: 0.9753511704295179, iteration: 278545
loss: 0.9934161901473999,grad_norm: 0.8409947307551054, iteration: 278546
loss: 1.034196376800537,grad_norm: 0.8834193335991013, iteration: 278547
loss: 0.9771943688392639,grad_norm: 0.9633918629694318, iteration: 278548
loss: 1.0302026271820068,grad_norm: 0.6606414223233509, iteration: 278549
loss: 1.0184402465820312,grad_norm: 0.7870116531100158, iteration: 278550
loss: 0.9844079613685608,grad_norm: 0.8210557629161341, iteration: 278551
loss: 1.0111184120178223,grad_norm: 0.8971216596517904, iteration: 278552
loss: 1.0098952054977417,grad_norm: 0.8309772414578781, iteration: 278553
loss: 1.0048600435256958,grad_norm: 0.8990643018937293, iteration: 278554
loss: 1.0197408199310303,grad_norm: 0.8896745992872254, iteration: 278555
loss: 0.9652144312858582,grad_norm: 0.8668707604167945, iteration: 278556
loss: 1.06659996509552,grad_norm: 0.7878940299740246, iteration: 278557
loss: 1.0008963346481323,grad_norm: 0.8545956973594272, iteration: 278558
loss: 0.9883208274841309,grad_norm: 0.9285982376536532, iteration: 278559
loss: 1.1167864799499512,grad_norm: 0.9999993906541838, iteration: 278560
loss: 1.0138952732086182,grad_norm: 0.7996566946813102, iteration: 278561
loss: 0.9839131236076355,grad_norm: 0.8292713841093406, iteration: 278562
loss: 1.0114442110061646,grad_norm: 0.8964541122245561, iteration: 278563
loss: 1.040456771850586,grad_norm: 0.9999991029195268, iteration: 278564
loss: 0.961840808391571,grad_norm: 0.9040883059899172, iteration: 278565
loss: 1.0205076932907104,grad_norm: 0.9776585538389551, iteration: 278566
loss: 0.9929041862487793,grad_norm: 0.930169700064471, iteration: 278567
loss: 1.0226953029632568,grad_norm: 0.8986980638118413, iteration: 278568
loss: 0.9997802972793579,grad_norm: 0.9507753171796125, iteration: 278569
loss: 0.9846367835998535,grad_norm: 0.9999991335227031, iteration: 278570
loss: 0.9953981637954712,grad_norm: 0.8383684390680148, iteration: 278571
loss: 1.010163426399231,grad_norm: 0.9564226734186261, iteration: 278572
loss: 1.0168780088424683,grad_norm: 0.9156871792498069, iteration: 278573
loss: 1.0312459468841553,grad_norm: 0.9158622807791911, iteration: 278574
loss: 1.0080989599227905,grad_norm: 0.9999994281529908, iteration: 278575
loss: 0.9737000465393066,grad_norm: 0.8562074731174344, iteration: 278576
loss: 0.9721566438674927,grad_norm: 0.8316074810009331, iteration: 278577
loss: 1.0096664428710938,grad_norm: 0.9450512999121442, iteration: 278578
loss: 0.9871741533279419,grad_norm: 0.855558787354217, iteration: 278579
loss: 0.9817662835121155,grad_norm: 0.8896586078305004, iteration: 278580
loss: 1.004418969154358,grad_norm: 0.9531615582521599, iteration: 278581
loss: 0.9335922002792358,grad_norm: 0.9731742906880647, iteration: 278582
loss: 0.9914360046386719,grad_norm: 0.9999992519775891, iteration: 278583
loss: 0.9834520816802979,grad_norm: 0.8923589489834585, iteration: 278584
loss: 1.0180566310882568,grad_norm: 0.7264799548828428, iteration: 278585
loss: 1.035605788230896,grad_norm: 0.8147130244333814, iteration: 278586
loss: 0.9854334592819214,grad_norm: 0.8514955173457801, iteration: 278587
loss: 1.017892837524414,grad_norm: 0.8594159469766844, iteration: 278588
loss: 1.0049958229064941,grad_norm: 0.8342894596968119, iteration: 278589
loss: 1.1145539283752441,grad_norm: 0.9457680334503051, iteration: 278590
loss: 0.9936904311180115,grad_norm: 0.7090160355848424, iteration: 278591
loss: 1.007285714149475,grad_norm: 0.9378496740205351, iteration: 278592
loss: 0.996292769908905,grad_norm: 0.9207130243270845, iteration: 278593
loss: 0.9918667674064636,grad_norm: 0.7154237766919396, iteration: 278594
loss: 0.9745265245437622,grad_norm: 0.9436887494807249, iteration: 278595
loss: 0.9881104230880737,grad_norm: 0.7947535591570308, iteration: 278596
loss: 0.9941918253898621,grad_norm: 0.9999990711663884, iteration: 278597
loss: 0.9819692373275757,grad_norm: 0.9999991458834969, iteration: 278598
loss: 0.9862028956413269,grad_norm: 0.8256195266865145, iteration: 278599
loss: 1.0003955364227295,grad_norm: 0.7626398015289098, iteration: 278600
loss: 0.9768383502960205,grad_norm: 0.8283614774048345, iteration: 278601
loss: 1.0384244918823242,grad_norm: 0.9654199875741946, iteration: 278602
loss: 0.9804516434669495,grad_norm: 0.9159806015082629, iteration: 278603
loss: 1.0186680555343628,grad_norm: 0.9999990993479618, iteration: 278604
loss: 0.9989469647407532,grad_norm: 0.7525861404347671, iteration: 278605
loss: 0.9952998757362366,grad_norm: 0.8650945586191615, iteration: 278606
loss: 0.975053608417511,grad_norm: 0.9338636547891709, iteration: 278607
loss: 1.011804223060608,grad_norm: 0.9999993063465952, iteration: 278608
loss: 1.0081647634506226,grad_norm: 0.8821737538933472, iteration: 278609
loss: 0.9981164336204529,grad_norm: 0.7233575419617548, iteration: 278610
loss: 0.9664977192878723,grad_norm: 0.8979498115164342, iteration: 278611
loss: 1.0015980005264282,grad_norm: 0.980408448155825, iteration: 278612
loss: 0.9692422747612,grad_norm: 0.7915250118934681, iteration: 278613
loss: 0.9548397064208984,grad_norm: 0.9822548677147553, iteration: 278614
loss: 1.0217957496643066,grad_norm: 0.9799212390149697, iteration: 278615
loss: 0.9864506721496582,grad_norm: 0.9745849424871569, iteration: 278616
loss: 0.9749193787574768,grad_norm: 0.8763026174593148, iteration: 278617
loss: 1.0057899951934814,grad_norm: 0.9070691631488665, iteration: 278618
loss: 1.0569490194320679,grad_norm: 0.9137207762879428, iteration: 278619
loss: 0.9553219676017761,grad_norm: 0.7938260001131221, iteration: 278620
loss: 1.021086573600769,grad_norm: 0.8440017842296481, iteration: 278621
loss: 0.9775936007499695,grad_norm: 0.9677581386639185, iteration: 278622
loss: 0.9980893135070801,grad_norm: 0.9040460345237437, iteration: 278623
loss: 1.0247763395309448,grad_norm: 0.9999990685730629, iteration: 278624
loss: 1.0605189800262451,grad_norm: 0.9999992414948166, iteration: 278625
loss: 1.0244648456573486,grad_norm: 0.7475876456851925, iteration: 278626
loss: 1.0101977586746216,grad_norm: 0.8983522082332384, iteration: 278627
loss: 0.9830076694488525,grad_norm: 0.7388773368255792, iteration: 278628
loss: 1.023682713508606,grad_norm: 0.8172056946443799, iteration: 278629
loss: 1.0005286931991577,grad_norm: 0.9156490572809233, iteration: 278630
loss: 0.9953740239143372,grad_norm: 0.9024624611113855, iteration: 278631
loss: 1.007431983947754,grad_norm: 0.8150998935229984, iteration: 278632
loss: 0.9804677367210388,grad_norm: 0.8860298704646815, iteration: 278633
loss: 0.9965025186538696,grad_norm: 0.8045467677581978, iteration: 278634
loss: 1.0226218700408936,grad_norm: 0.9962875002029777, iteration: 278635
loss: 1.0241930484771729,grad_norm: 0.9766672093360755, iteration: 278636
loss: 0.9570212364196777,grad_norm: 0.9232614863083098, iteration: 278637
loss: 1.0316399335861206,grad_norm: 0.8064427266037658, iteration: 278638
loss: 0.9769976139068604,grad_norm: 0.9385787889050283, iteration: 278639
loss: 0.9848207235336304,grad_norm: 0.6731596800776316, iteration: 278640
loss: 0.9616392254829407,grad_norm: 0.8782332495735491, iteration: 278641
loss: 1.0702004432678223,grad_norm: 0.8436420685657963, iteration: 278642
loss: 0.9711803197860718,grad_norm: 0.7904332957106758, iteration: 278643
loss: 0.9784008264541626,grad_norm: 0.8560604371098964, iteration: 278644
loss: 1.0069516897201538,grad_norm: 0.7731391515829314, iteration: 278645
loss: 0.9779046773910522,grad_norm: 0.7218534765026268, iteration: 278646
loss: 1.0279932022094727,grad_norm: 0.9999992215017639, iteration: 278647
loss: 0.9815831780433655,grad_norm: 0.9380592339966083, iteration: 278648
loss: 1.0056321620941162,grad_norm: 0.9410294409895247, iteration: 278649
loss: 0.9633968472480774,grad_norm: 0.7613455950010299, iteration: 278650
loss: 0.9878942370414734,grad_norm: 0.9132153924847277, iteration: 278651
loss: 1.0002059936523438,grad_norm: 0.9007104012541642, iteration: 278652
loss: 1.026742935180664,grad_norm: 0.9999989884949305, iteration: 278653
loss: 1.0008727312088013,grad_norm: 0.9999989795088353, iteration: 278654
loss: 1.0131701231002808,grad_norm: 0.8889600699845802, iteration: 278655
loss: 1.047581434249878,grad_norm: 0.9999993619285537, iteration: 278656
loss: 1.0026285648345947,grad_norm: 0.7199947522089248, iteration: 278657
loss: 0.9899646639823914,grad_norm: 0.8742064756994239, iteration: 278658
loss: 1.0315929651260376,grad_norm: 0.8879105142037947, iteration: 278659
loss: 1.0147500038146973,grad_norm: 0.7392835577190591, iteration: 278660
loss: 0.9998452663421631,grad_norm: 0.8999770145204193, iteration: 278661
loss: 0.992825984954834,grad_norm: 0.793769120559167, iteration: 278662
loss: 1.0393821001052856,grad_norm: 0.9999999726119619, iteration: 278663
loss: 0.9895728230476379,grad_norm: 0.8455113657083237, iteration: 278664
loss: 1.0641710758209229,grad_norm: 0.9999990273615023, iteration: 278665
loss: 1.0052249431610107,grad_norm: 0.8244074220355432, iteration: 278666
loss: 0.9759120345115662,grad_norm: 0.9360544912074483, iteration: 278667
loss: 0.9876797795295715,grad_norm: 0.7530793152592417, iteration: 278668
loss: 1.0085736513137817,grad_norm: 0.7921816198880336, iteration: 278669
loss: 1.0143475532531738,grad_norm: 0.7756338245916186, iteration: 278670
loss: 1.0128986835479736,grad_norm: 0.9132497705969498, iteration: 278671
loss: 0.9624535441398621,grad_norm: 0.8283653994761166, iteration: 278672
loss: 1.0036301612854004,grad_norm: 0.7206669504766187, iteration: 278673
loss: 1.090370774269104,grad_norm: 0.9999996310740805, iteration: 278674
loss: 1.002321481704712,grad_norm: 0.8345834524097185, iteration: 278675
loss: 1.0279672145843506,grad_norm: 0.9738176332833401, iteration: 278676
loss: 0.987830638885498,grad_norm: 0.7223172216111029, iteration: 278677
loss: 1.062849998474121,grad_norm: 0.8892034588105953, iteration: 278678
loss: 1.0283396244049072,grad_norm: 0.935795818696502, iteration: 278679
loss: 1.0072964429855347,grad_norm: 0.8618220633302085, iteration: 278680
loss: 0.9793466925621033,grad_norm: 0.9150456301864608, iteration: 278681
loss: 1.0767346620559692,grad_norm: 0.999999209445579, iteration: 278682
loss: 1.0478262901306152,grad_norm: 0.9999996121209075, iteration: 278683
loss: 0.9879265427589417,grad_norm: 0.9999995489942392, iteration: 278684
loss: 0.946036696434021,grad_norm: 0.8455669183783701, iteration: 278685
loss: 1.0613903999328613,grad_norm: 0.9999994828473537, iteration: 278686
loss: 1.0088913440704346,grad_norm: 0.7465467011785365, iteration: 278687
loss: 0.9631069898605347,grad_norm: 0.9320939036220836, iteration: 278688
loss: 1.1133373975753784,grad_norm: 0.9999997832988675, iteration: 278689
loss: 1.0336190462112427,grad_norm: 0.9999994015803858, iteration: 278690
loss: 1.014050841331482,grad_norm: 0.9999995315575636, iteration: 278691
loss: 0.962152898311615,grad_norm: 0.9352866705649429, iteration: 278692
loss: 1.1156797409057617,grad_norm: 0.995168170960432, iteration: 278693
loss: 1.0094585418701172,grad_norm: 0.9084549647071213, iteration: 278694
loss: 1.036594033241272,grad_norm: 0.8325243118299674, iteration: 278695
loss: 1.0065864324569702,grad_norm: 0.9631736928194946, iteration: 278696
loss: 1.0224599838256836,grad_norm: 0.9999991190145544, iteration: 278697
loss: 0.9826463460922241,grad_norm: 0.7115609144617893, iteration: 278698
loss: 0.9826026558876038,grad_norm: 0.9229142628319917, iteration: 278699
loss: 0.9862843751907349,grad_norm: 0.7788719985768715, iteration: 278700
loss: 1.0016148090362549,grad_norm: 0.9003713613034003, iteration: 278701
loss: 0.9566465020179749,grad_norm: 0.7762684176237769, iteration: 278702
loss: 0.9990896582603455,grad_norm: 0.8458316550026005, iteration: 278703
loss: 0.9698657989501953,grad_norm: 0.8158724733707824, iteration: 278704
loss: 1.0133336782455444,grad_norm: 0.7793433474460153, iteration: 278705
loss: 1.0050595998764038,grad_norm: 0.9999993910821661, iteration: 278706
loss: 1.0527969598770142,grad_norm: 0.9330364031751602, iteration: 278707
loss: 1.0507307052612305,grad_norm: 0.8958233138988637, iteration: 278708
loss: 0.9975572824478149,grad_norm: 0.8490220307500511, iteration: 278709
loss: 0.9951573610305786,grad_norm: 0.8020034676682036, iteration: 278710
loss: 1.058963656425476,grad_norm: 0.9999991927755997, iteration: 278711
loss: 1.0346568822860718,grad_norm: 0.9999992935263559, iteration: 278712
loss: 1.0450098514556885,grad_norm: 0.9999992914143335, iteration: 278713
loss: 1.0242213010787964,grad_norm: 0.9999992519535638, iteration: 278714
loss: 1.0133886337280273,grad_norm: 0.8255893724217649, iteration: 278715
loss: 0.9932864904403687,grad_norm: 0.8357383798195751, iteration: 278716
loss: 0.9673236012458801,grad_norm: 0.9999993866000875, iteration: 278717
loss: 0.993362307548523,grad_norm: 0.9079856258566791, iteration: 278718
loss: 1.0217787027359009,grad_norm: 0.8648023825242491, iteration: 278719
loss: 1.0171095132827759,grad_norm: 0.9999993701314905, iteration: 278720
loss: 0.9654127955436707,grad_norm: 0.873989238144307, iteration: 278721
loss: 1.0471630096435547,grad_norm: 0.9833938621894934, iteration: 278722
loss: 1.0080622434616089,grad_norm: 0.9236221347713488, iteration: 278723
loss: 1.0223275423049927,grad_norm: 0.9109343380357422, iteration: 278724
loss: 1.0325146913528442,grad_norm: 0.9096206044920259, iteration: 278725
loss: 1.005010962486267,grad_norm: 0.9680960830086552, iteration: 278726
loss: 0.9796093106269836,grad_norm: 0.9441781196123933, iteration: 278727
loss: 1.028530478477478,grad_norm: 0.8795560072026514, iteration: 278728
loss: 1.0261520147323608,grad_norm: 0.8301921637342643, iteration: 278729
loss: 0.9699053764343262,grad_norm: 0.9516824485085812, iteration: 278730
loss: 1.111131191253662,grad_norm: 0.9999998714507798, iteration: 278731
loss: 0.9872944951057434,grad_norm: 0.8665560798739506, iteration: 278732
loss: 0.9845734238624573,grad_norm: 0.8343669097086311, iteration: 278733
loss: 1.0030428171157837,grad_norm: 0.999999219034732, iteration: 278734
loss: 1.0623191595077515,grad_norm: 0.9999991082437317, iteration: 278735
loss: 1.038571834564209,grad_norm: 0.974540932204771, iteration: 278736
loss: 1.0047426223754883,grad_norm: 0.853268094049106, iteration: 278737
loss: 1.0361816883087158,grad_norm: 0.8445377407523786, iteration: 278738
loss: 1.0151385068893433,grad_norm: 0.8796652355134149, iteration: 278739
loss: 1.018499732017517,grad_norm: 0.8153525814694212, iteration: 278740
loss: 0.9843812584877014,grad_norm: 0.9999991531787503, iteration: 278741
loss: 0.9397239089012146,grad_norm: 0.8991615505451405, iteration: 278742
loss: 1.0133299827575684,grad_norm: 0.9999992716860627, iteration: 278743
loss: 1.008029818534851,grad_norm: 0.7497497180959497, iteration: 278744
loss: 0.976325273513794,grad_norm: 0.8268968247358341, iteration: 278745
loss: 1.1224842071533203,grad_norm: 0.9999994171824076, iteration: 278746
loss: 1.0044519901275635,grad_norm: 0.9999995651548018, iteration: 278747
loss: 1.0114785432815552,grad_norm: 0.8515588423770086, iteration: 278748
loss: 1.0635372400283813,grad_norm: 0.9999998563675542, iteration: 278749
loss: 0.9834930896759033,grad_norm: 0.9283287258175171, iteration: 278750
loss: 0.9913694262504578,grad_norm: 0.9348998577434151, iteration: 278751
loss: 0.9575240612030029,grad_norm: 0.9999991828316168, iteration: 278752
loss: 1.0039002895355225,grad_norm: 0.8801343357539342, iteration: 278753
loss: 0.9870949387550354,grad_norm: 0.7817854186354287, iteration: 278754
loss: 1.0974605083465576,grad_norm: 0.9062042914489222, iteration: 278755
loss: 0.9806085824966431,grad_norm: 0.9799664962127388, iteration: 278756
loss: 1.0300973653793335,grad_norm: 0.9999990576930211, iteration: 278757
loss: 1.0180606842041016,grad_norm: 0.9999992471225071, iteration: 278758
loss: 0.969923734664917,grad_norm: 0.86459937200908, iteration: 278759
loss: 1.019456386566162,grad_norm: 0.8111459325622398, iteration: 278760
loss: 0.975444495677948,grad_norm: 0.8296348625923454, iteration: 278761
loss: 0.970282256603241,grad_norm: 0.9335948325546453, iteration: 278762
loss: 0.9912328124046326,grad_norm: 0.7627560889956363, iteration: 278763
loss: 0.9823957681655884,grad_norm: 0.9031199008084101, iteration: 278764
loss: 0.9878413677215576,grad_norm: 0.928568230687266, iteration: 278765
loss: 0.9947128891944885,grad_norm: 0.8524465226986389, iteration: 278766
loss: 1.0378292798995972,grad_norm: 0.7907131616516434, iteration: 278767
loss: 0.970744788646698,grad_norm: 0.7946848516474757, iteration: 278768
loss: 1.0409774780273438,grad_norm: 0.9999992797720137, iteration: 278769
loss: 0.9883220195770264,grad_norm: 0.8888838776316977, iteration: 278770
loss: 1.0328620672225952,grad_norm: 0.8664827188500837, iteration: 278771
loss: 0.9757965803146362,grad_norm: 0.8632315497162845, iteration: 278772
loss: 0.9881095290184021,grad_norm: 0.8845101926121757, iteration: 278773
loss: 1.006580114364624,grad_norm: 0.9999996939436069, iteration: 278774
loss: 0.9835359454154968,grad_norm: 0.9087637905671131, iteration: 278775
loss: 1.0028009414672852,grad_norm: 0.7065786010468419, iteration: 278776
loss: 1.004310131072998,grad_norm: 0.7522425791414319, iteration: 278777
loss: 1.0059013366699219,grad_norm: 0.9339407876101417, iteration: 278778
loss: 0.9891982674598694,grad_norm: 0.973885984632498, iteration: 278779
loss: 0.9901318550109863,grad_norm: 0.9999990645031106, iteration: 278780
loss: 1.0108518600463867,grad_norm: 0.857320673922749, iteration: 278781
loss: 1.0119467973709106,grad_norm: 0.909670104700524, iteration: 278782
loss: 1.0420825481414795,grad_norm: 0.9999994916453855, iteration: 278783
loss: 1.075730323791504,grad_norm: 0.9999991318446602, iteration: 278784
loss: 1.0088058710098267,grad_norm: 0.8152506418669503, iteration: 278785
loss: 1.006554365158081,grad_norm: 0.8337624814251428, iteration: 278786
loss: 1.0048632621765137,grad_norm: 0.910991686850676, iteration: 278787
loss: 1.0164577960968018,grad_norm: 0.9048531464792495, iteration: 278788
loss: 1.0024083852767944,grad_norm: 0.8969283803331068, iteration: 278789
loss: 1.0375596284866333,grad_norm: 0.9373876222805788, iteration: 278790
loss: 0.977539598941803,grad_norm: 0.8120143217280678, iteration: 278791
loss: 0.9971383810043335,grad_norm: 0.8932162303064869, iteration: 278792
loss: 1.0625072717666626,grad_norm: 0.9999992976333693, iteration: 278793
loss: 0.9843084812164307,grad_norm: 0.8284603810960961, iteration: 278794
loss: 1.0109727382659912,grad_norm: 0.9999996355494432, iteration: 278795
loss: 0.9971864819526672,grad_norm: 0.9999995811753726, iteration: 278796
loss: 0.9892467856407166,grad_norm: 0.8951482517895193, iteration: 278797
loss: 1.0274325609207153,grad_norm: 0.8217957471499736, iteration: 278798
loss: 0.9686915874481201,grad_norm: 0.9999990143513114, iteration: 278799
loss: 0.9954198002815247,grad_norm: 0.991143136146838, iteration: 278800
loss: 0.9992842078208923,grad_norm: 0.7840084584241714, iteration: 278801
loss: 0.9545121192932129,grad_norm: 0.8795105896534943, iteration: 278802
loss: 1.0147954225540161,grad_norm: 0.9999991863758944, iteration: 278803
loss: 1.0398913621902466,grad_norm: 0.9999994491756737, iteration: 278804
loss: 1.0036152601242065,grad_norm: 0.8359642529931208, iteration: 278805
loss: 1.0374871492385864,grad_norm: 0.9229191379373678, iteration: 278806
loss: 1.0221538543701172,grad_norm: 0.7666006498461754, iteration: 278807
loss: 0.9447351098060608,grad_norm: 0.9999991124625165, iteration: 278808
loss: 0.994834840297699,grad_norm: 0.9731246211804078, iteration: 278809
loss: 1.0630335807800293,grad_norm: 0.9999999048915612, iteration: 278810
loss: 0.9947525262832642,grad_norm: 0.8897138924520288, iteration: 278811
loss: 1.0337997674942017,grad_norm: 0.9999994347196394, iteration: 278812
loss: 0.9823386669158936,grad_norm: 0.7867705085805977, iteration: 278813
loss: 0.9858667254447937,grad_norm: 0.7507920788763087, iteration: 278814
loss: 1.0228859186172485,grad_norm: 0.9533199189517333, iteration: 278815
loss: 1.093340516090393,grad_norm: 0.9999997228511007, iteration: 278816
loss: 0.9887300133705139,grad_norm: 0.8837698679900873, iteration: 278817
loss: 0.9927513003349304,grad_norm: 0.7409960532280925, iteration: 278818
loss: 1.054764747619629,grad_norm: 0.9999995600258498, iteration: 278819
loss: 1.014388918876648,grad_norm: 0.9999990643931951, iteration: 278820
loss: 1.0564231872558594,grad_norm: 0.9999992262209314, iteration: 278821
loss: 0.9978765249252319,grad_norm: 0.7823768894177673, iteration: 278822
loss: 1.1167051792144775,grad_norm: 0.7923301872267798, iteration: 278823
loss: 0.9981464743614197,grad_norm: 0.7396989284552542, iteration: 278824
loss: 1.044566035270691,grad_norm: 0.9999995548521087, iteration: 278825
loss: 1.0173683166503906,grad_norm: 0.9999989763328668, iteration: 278826
loss: 0.9914358854293823,grad_norm: 0.9794833904373759, iteration: 278827
loss: 1.0115833282470703,grad_norm: 0.9537034852437675, iteration: 278828
loss: 1.0701602697372437,grad_norm: 0.9999991756314722, iteration: 278829
loss: 1.0967049598693848,grad_norm: 0.9999991929637302, iteration: 278830
loss: 1.018166184425354,grad_norm: 0.7457163800211529, iteration: 278831
loss: 1.0366253852844238,grad_norm: 0.8840481410375948, iteration: 278832
loss: 0.9960054755210876,grad_norm: 0.8948430609285957, iteration: 278833
loss: 1.0227417945861816,grad_norm: 0.9999991374174487, iteration: 278834
loss: 1.0301287174224854,grad_norm: 0.9544028395912321, iteration: 278835
loss: 0.9945616126060486,grad_norm: 0.9999990810589688, iteration: 278836
loss: 0.993376612663269,grad_norm: 0.8117128940208683, iteration: 278837
loss: 0.9888653755187988,grad_norm: 0.9903998136292227, iteration: 278838
loss: 0.9986419677734375,grad_norm: 0.7656331795998588, iteration: 278839
loss: 1.0131750106811523,grad_norm: 0.9273280552751879, iteration: 278840
loss: 0.9875869750976562,grad_norm: 0.8773954495171888, iteration: 278841
loss: 1.0123499631881714,grad_norm: 0.8431273315820675, iteration: 278842
loss: 1.0073782205581665,grad_norm: 0.9999997242099712, iteration: 278843
loss: 1.031148910522461,grad_norm: 0.9999994639718018, iteration: 278844
loss: 1.016628384590149,grad_norm: 0.99999914075144, iteration: 278845
loss: 1.0211355686187744,grad_norm: 0.9896840566766982, iteration: 278846
loss: 0.9945781230926514,grad_norm: 0.999999048141114, iteration: 278847
loss: 1.0115151405334473,grad_norm: 0.9770927820088167, iteration: 278848
loss: 1.0195232629776,grad_norm: 0.7722869974930192, iteration: 278849
loss: 0.9933334589004517,grad_norm: 0.8485986693924232, iteration: 278850
loss: 0.9907248616218567,grad_norm: 0.9473849861658341, iteration: 278851
loss: 0.983334481716156,grad_norm: 0.8593056290231402, iteration: 278852
loss: 1.0084457397460938,grad_norm: 0.8036201490554133, iteration: 278853
loss: 0.9609842896461487,grad_norm: 0.9198187717141728, iteration: 278854
loss: 0.9659962058067322,grad_norm: 0.854260234881453, iteration: 278855
loss: 0.9767116904258728,grad_norm: 0.8105855830584208, iteration: 278856
loss: 1.0100442171096802,grad_norm: 0.8635103935401434, iteration: 278857
loss: 1.0040405988693237,grad_norm: 0.9626433699914184, iteration: 278858
loss: 0.9759862422943115,grad_norm: 0.9195493943166259, iteration: 278859
loss: 0.9830865263938904,grad_norm: 0.8747540663455443, iteration: 278860
loss: 1.0896129608154297,grad_norm: 0.8909942349200916, iteration: 278861
loss: 1.000231385231018,grad_norm: 0.764016337075794, iteration: 278862
loss: 1.0181828737258911,grad_norm: 0.999999267841443, iteration: 278863
loss: 1.066418170928955,grad_norm: 0.9999994360457823, iteration: 278864
loss: 0.9845547080039978,grad_norm: 0.8273946799147736, iteration: 278865
loss: 1.005960464477539,grad_norm: 0.876666324483567, iteration: 278866
loss: 1.0265469551086426,grad_norm: 0.9684565251853929, iteration: 278867
loss: 0.9561928510665894,grad_norm: 0.9320360391971422, iteration: 278868
loss: 0.9635336995124817,grad_norm: 0.9999991430450225, iteration: 278869
loss: 1.018894076347351,grad_norm: 0.9667856411171978, iteration: 278870
loss: 1.0222630500793457,grad_norm: 0.8783973575346158, iteration: 278871
loss: 1.030682921409607,grad_norm: 0.9999991575935175, iteration: 278872
loss: 1.0199317932128906,grad_norm: 0.999999470654637, iteration: 278873
loss: 1.0293129682540894,grad_norm: 0.935432670079078, iteration: 278874
loss: 1.069324016571045,grad_norm: 0.9999989260370379, iteration: 278875
loss: 0.9741137027740479,grad_norm: 0.7656619369008862, iteration: 278876
loss: 1.0157824754714966,grad_norm: 0.8905182514469057, iteration: 278877
loss: 1.064750075340271,grad_norm: 0.9999999639902596, iteration: 278878
loss: 1.003627896308899,grad_norm: 0.980094197238057, iteration: 278879
loss: 0.9998422265052795,grad_norm: 0.6488941810489037, iteration: 278880
loss: 0.9715521931648254,grad_norm: 0.8713057602769828, iteration: 278881
loss: 0.9840316772460938,grad_norm: 0.9464599082244044, iteration: 278882
loss: 1.0346077680587769,grad_norm: 0.9636418737262568, iteration: 278883
loss: 0.9781734347343445,grad_norm: 0.8903486029391829, iteration: 278884
loss: 1.0203003883361816,grad_norm: 0.8654561387752362, iteration: 278885
loss: 1.007423758506775,grad_norm: 0.840005880607274, iteration: 278886
loss: 1.0809663534164429,grad_norm: 0.999999515538515, iteration: 278887
loss: 1.0632728338241577,grad_norm: 0.9274799611319327, iteration: 278888
loss: 1.0011502504348755,grad_norm: 0.843752506585113, iteration: 278889
loss: 1.0593746900558472,grad_norm: 0.880951148437301, iteration: 278890
loss: 1.0205695629119873,grad_norm: 0.9675060529983107, iteration: 278891
loss: 0.9790940284729004,grad_norm: 0.897532795950748, iteration: 278892
loss: 1.0222105979919434,grad_norm: 0.9999995470373806, iteration: 278893
loss: 1.0448532104492188,grad_norm: 0.8269683939161154, iteration: 278894
loss: 0.976261556148529,grad_norm: 0.8833111330208341, iteration: 278895
loss: 1.0025029182434082,grad_norm: 0.770324446240059, iteration: 278896
loss: 1.0175453424453735,grad_norm: 0.9999998568605464, iteration: 278897
loss: 1.0118874311447144,grad_norm: 0.7707486774368418, iteration: 278898
loss: 0.9918073415756226,grad_norm: 0.8524448246852755, iteration: 278899
loss: 1.0331469774246216,grad_norm: 0.7874777255518671, iteration: 278900
loss: 1.0310989618301392,grad_norm: 0.8101735400178266, iteration: 278901
loss: 0.9796518683433533,grad_norm: 0.9278455614786172, iteration: 278902
loss: 0.974358320236206,grad_norm: 0.8895464201304575, iteration: 278903
loss: 0.9963763952255249,grad_norm: 0.894285463344294, iteration: 278904
loss: 0.9664444923400879,grad_norm: 0.8100260428937169, iteration: 278905
loss: 0.9989244341850281,grad_norm: 0.90889588991319, iteration: 278906
loss: 0.9999710321426392,grad_norm: 0.685338233166728, iteration: 278907
loss: 1.027608036994934,grad_norm: 0.7575722528918599, iteration: 278908
loss: 1.0067716836929321,grad_norm: 0.9999992678386025, iteration: 278909
loss: 1.0203220844268799,grad_norm: 0.8325927045707036, iteration: 278910
loss: 0.9953158497810364,grad_norm: 0.8510741565498428, iteration: 278911
loss: 0.9993147253990173,grad_norm: 0.821487678712835, iteration: 278912
loss: 0.9879763722419739,grad_norm: 0.8208486505352801, iteration: 278913
loss: 0.9639150500297546,grad_norm: 0.8216682053590634, iteration: 278914
loss: 1.0185185670852661,grad_norm: 0.9577039064232068, iteration: 278915
loss: 1.0038028955459595,grad_norm: 0.7382440517544147, iteration: 278916
loss: 0.9958158731460571,grad_norm: 0.9024758240267002, iteration: 278917
loss: 1.0135533809661865,grad_norm: 0.9330735146404538, iteration: 278918
loss: 1.032106876373291,grad_norm: 0.86559607132049, iteration: 278919
loss: 0.9963277578353882,grad_norm: 0.9293343820549523, iteration: 278920
loss: 0.9922551512718201,grad_norm: 0.8231479657231099, iteration: 278921
loss: 0.9772332310676575,grad_norm: 0.8455668512544182, iteration: 278922
loss: 1.0076253414154053,grad_norm: 0.8587629834738975, iteration: 278923
loss: 0.9878228306770325,grad_norm: 0.888439050932478, iteration: 278924
loss: 1.0094949007034302,grad_norm: 0.8604845962074381, iteration: 278925
loss: 0.998370349407196,grad_norm: 0.8416730975834388, iteration: 278926
loss: 1.0164061784744263,grad_norm: 0.8896386719983895, iteration: 278927
loss: 0.9877819418907166,grad_norm: 0.8269545583672958, iteration: 278928
loss: 0.9945979118347168,grad_norm: 0.9999991394562053, iteration: 278929
loss: 0.9665934443473816,grad_norm: 0.9999991598727127, iteration: 278930
loss: 0.9827621579170227,grad_norm: 0.9335042786098627, iteration: 278931
loss: 1.0401538610458374,grad_norm: 0.9254781294864521, iteration: 278932
loss: 0.9737688899040222,grad_norm: 0.7546020617084077, iteration: 278933
loss: 1.0204339027404785,grad_norm: 0.9306268264559303, iteration: 278934
loss: 1.0313643217086792,grad_norm: 0.9999991601968019, iteration: 278935
loss: 1.1208405494689941,grad_norm: 0.9999996403136059, iteration: 278936
loss: 0.9589998722076416,grad_norm: 0.8458508779203938, iteration: 278937
loss: 0.9743756055831909,grad_norm: 0.999999350091336, iteration: 278938
loss: 1.0656654834747314,grad_norm: 0.9778145552667423, iteration: 278939
loss: 0.9788677096366882,grad_norm: 0.8345329163598065, iteration: 278940
loss: 1.0458463430404663,grad_norm: 0.9999992496429219, iteration: 278941
loss: 0.9806228876113892,grad_norm: 0.9722152072047606, iteration: 278942
loss: 0.9981704950332642,grad_norm: 0.8459942977892301, iteration: 278943
loss: 1.0037273168563843,grad_norm: 0.9223737977200586, iteration: 278944
loss: 1.0386484861373901,grad_norm: 0.9101685551334785, iteration: 278945
loss: 1.011234164237976,grad_norm: 0.9999991506419147, iteration: 278946
loss: 1.007392168045044,grad_norm: 0.7457712838609675, iteration: 278947
loss: 1.0029857158660889,grad_norm: 0.811116058124097, iteration: 278948
loss: 1.028090000152588,grad_norm: 0.9999992589650787, iteration: 278949
loss: 1.0481034517288208,grad_norm: 0.9157545224569463, iteration: 278950
loss: 0.9898087978363037,grad_norm: 0.9999995719833789, iteration: 278951
loss: 0.9897286295890808,grad_norm: 0.9999989438964024, iteration: 278952
loss: 1.0066888332366943,grad_norm: 0.9442989602469001, iteration: 278953
loss: 0.9780072569847107,grad_norm: 0.7891060510436649, iteration: 278954
loss: 1.0178563594818115,grad_norm: 0.8897985045985398, iteration: 278955
loss: 0.9836000204086304,grad_norm: 0.8332181881726368, iteration: 278956
loss: 1.0017855167388916,grad_norm: 0.9726578822028009, iteration: 278957
loss: 1.038748025894165,grad_norm: 0.8381095290159224, iteration: 278958
loss: 1.04989755153656,grad_norm: 0.9308479729892611, iteration: 278959
loss: 1.008926510810852,grad_norm: 0.8518134474972864, iteration: 278960
loss: 1.032866358757019,grad_norm: 0.8640377758777792, iteration: 278961
loss: 0.9684855937957764,grad_norm: 0.9790275016924794, iteration: 278962
loss: 0.9689286351203918,grad_norm: 0.8178035270522067, iteration: 278963
loss: 1.0472500324249268,grad_norm: 0.99999911864872, iteration: 278964
loss: 0.9691553115844727,grad_norm: 0.9444293424791258, iteration: 278965
loss: 1.033516764640808,grad_norm: 0.9999993630866705, iteration: 278966
loss: 1.022931694984436,grad_norm: 0.99999928198423, iteration: 278967
loss: 0.9870455861091614,grad_norm: 0.7896441722945268, iteration: 278968
loss: 1.016616702079773,grad_norm: 0.8638673124574239, iteration: 278969
loss: 0.9685357213020325,grad_norm: 0.7440371130017983, iteration: 278970
loss: 1.041098952293396,grad_norm: 0.9772411123872885, iteration: 278971
loss: 1.0129700899124146,grad_norm: 0.7980070486155889, iteration: 278972
loss: 0.9592275619506836,grad_norm: 0.8554687426767426, iteration: 278973
loss: 1.0164573192596436,grad_norm: 0.8764408842811416, iteration: 278974
loss: 1.003886103630066,grad_norm: 0.8046412539962005, iteration: 278975
loss: 0.9516537189483643,grad_norm: 0.7796582948259378, iteration: 278976
loss: 0.9948825240135193,grad_norm: 0.9999995542837012, iteration: 278977
loss: 0.9992650151252747,grad_norm: 0.8246779739748242, iteration: 278978
loss: 1.0022876262664795,grad_norm: 0.9084383664097091, iteration: 278979
loss: 0.9547916054725647,grad_norm: 0.8765816722496171, iteration: 278980
loss: 1.018876314163208,grad_norm: 0.9999990496361674, iteration: 278981
loss: 1.0064696073532104,grad_norm: 0.8046847618529922, iteration: 278982
loss: 0.9857389330863953,grad_norm: 0.8222404528140388, iteration: 278983
loss: 0.9912959337234497,grad_norm: 0.9999991494457976, iteration: 278984
loss: 1.0382826328277588,grad_norm: 0.8582471197784434, iteration: 278985
loss: 1.003436803817749,grad_norm: 0.9460966821120584, iteration: 278986
loss: 0.9626289010047913,grad_norm: 0.80440069927797, iteration: 278987
loss: 1.0284875631332397,grad_norm: 0.8350216878871835, iteration: 278988
loss: 0.9753468036651611,grad_norm: 0.8293647189471768, iteration: 278989
loss: 0.9863185882568359,grad_norm: 0.9084045535228482, iteration: 278990
loss: 0.9998838901519775,grad_norm: 0.983798345125908, iteration: 278991
loss: 0.9978217482566833,grad_norm: 0.9053344336867212, iteration: 278992
loss: 1.022965669631958,grad_norm: 0.7383051507287142, iteration: 278993
loss: 0.9825479388237,grad_norm: 0.8512609770316243, iteration: 278994
loss: 1.0552111864089966,grad_norm: 1.000000006282582, iteration: 278995
loss: 0.9814963936805725,grad_norm: 0.8539567512441452, iteration: 278996
loss: 1.0171260833740234,grad_norm: 0.7085221760543836, iteration: 278997
loss: 1.01459801197052,grad_norm: 0.9402145737299289, iteration: 278998
loss: 1.0412013530731201,grad_norm: 0.835281148130635, iteration: 278999
loss: 0.9767972826957703,grad_norm: 0.9404228789936775, iteration: 279000
loss: 1.0053514242172241,grad_norm: 0.918961899456708, iteration: 279001
loss: 0.9951848387718201,grad_norm: 0.9999990708680746, iteration: 279002
loss: 0.9950112700462341,grad_norm: 0.9955526026602483, iteration: 279003
loss: 1.0369246006011963,grad_norm: 0.8164597844294627, iteration: 279004
loss: 1.007840871810913,grad_norm: 0.7324924262246341, iteration: 279005
loss: 0.9971448183059692,grad_norm: 0.7493650762935203, iteration: 279006
loss: 0.9893006682395935,grad_norm: 0.679461562662945, iteration: 279007
loss: 1.0102983713150024,grad_norm: 0.8975789886520081, iteration: 279008
loss: 1.0264333486557007,grad_norm: 0.969483847271831, iteration: 279009
loss: 1.0065028667449951,grad_norm: 0.973211678671486, iteration: 279010
loss: 1.0298781394958496,grad_norm: 0.9999992344328836, iteration: 279011
loss: 1.0364794731140137,grad_norm: 0.7952637117668904, iteration: 279012
loss: 0.9879307150840759,grad_norm: 0.8872361107178988, iteration: 279013
loss: 0.9966478943824768,grad_norm: 0.8342713865399929, iteration: 279014
loss: 1.0159008502960205,grad_norm: 0.999999128945958, iteration: 279015
loss: 1.0118268728256226,grad_norm: 0.8886608973404908, iteration: 279016
loss: 0.9822826385498047,grad_norm: 0.9996286849453089, iteration: 279017
loss: 0.9956716895103455,grad_norm: 0.762684552236271, iteration: 279018
loss: 1.0026195049285889,grad_norm: 0.9999991118635679, iteration: 279019
loss: 0.9686254262924194,grad_norm: 0.7013514239129612, iteration: 279020
loss: 1.0159612894058228,grad_norm: 0.7082950702184471, iteration: 279021
loss: 0.9799941778182983,grad_norm: 0.9013340900622151, iteration: 279022
loss: 1.011840581893921,grad_norm: 0.836329557832229, iteration: 279023
loss: 0.9720504879951477,grad_norm: 0.8698245162768938, iteration: 279024
loss: 0.9867244958877563,grad_norm: 0.7157732843404404, iteration: 279025
loss: 1.0020595788955688,grad_norm: 0.9780462284633208, iteration: 279026
loss: 0.9751660823822021,grad_norm: 0.7892242721296616, iteration: 279027
loss: 1.004879355430603,grad_norm: 0.9454936194645522, iteration: 279028
loss: 0.9924389719963074,grad_norm: 0.999999202711286, iteration: 279029
loss: 1.02707839012146,grad_norm: 0.9999990504913816, iteration: 279030
loss: 0.984915554523468,grad_norm: 0.8062114916627836, iteration: 279031
loss: 1.017156720161438,grad_norm: 0.9999991853863531, iteration: 279032
loss: 0.9829496145248413,grad_norm: 0.8364419252189021, iteration: 279033
loss: 1.03223717212677,grad_norm: 0.9999990896541474, iteration: 279034
loss: 1.0434701442718506,grad_norm: 0.9128751739587936, iteration: 279035
loss: 0.9937912225723267,grad_norm: 0.8621012584125727, iteration: 279036
loss: 0.9832526445388794,grad_norm: 0.937712697396315, iteration: 279037
loss: 0.9675489664077759,grad_norm: 0.8805559053056063, iteration: 279038
loss: 0.9805814027786255,grad_norm: 0.9999991540748698, iteration: 279039
loss: 0.9801857471466064,grad_norm: 0.9157423397715458, iteration: 279040
loss: 1.0071816444396973,grad_norm: 0.9193403472342504, iteration: 279041
loss: 1.0107934474945068,grad_norm: 0.8506189596501662, iteration: 279042
loss: 1.0329056978225708,grad_norm: 0.7605792584984742, iteration: 279043
loss: 1.0039606094360352,grad_norm: 0.7966256424368858, iteration: 279044
loss: 0.9627612829208374,grad_norm: 0.8369780866882677, iteration: 279045
loss: 1.0244001150131226,grad_norm: 0.835063746269398, iteration: 279046
loss: 1.062132716178894,grad_norm: 0.9999998545637995, iteration: 279047
loss: 1.020374059677124,grad_norm: 0.8415691930196324, iteration: 279048
loss: 0.9754139184951782,grad_norm: 0.7607805646155749, iteration: 279049
loss: 0.9847736358642578,grad_norm: 0.861825416829054, iteration: 279050
loss: 1.0316003561019897,grad_norm: 0.7742649102606844, iteration: 279051
loss: 1.0110381841659546,grad_norm: 0.7606155479768231, iteration: 279052
loss: 0.9914592504501343,grad_norm: 0.9999991229062544, iteration: 279053
loss: 1.0170444250106812,grad_norm: 0.6850314942819209, iteration: 279054
loss: 1.0087103843688965,grad_norm: 0.8759704942703493, iteration: 279055
loss: 0.9859498739242554,grad_norm: 0.8512491603500522, iteration: 279056
loss: 1.006455898284912,grad_norm: 0.8925176801022879, iteration: 279057
loss: 0.9799121022224426,grad_norm: 0.7502687568808292, iteration: 279058
loss: 1.0332798957824707,grad_norm: 0.7871651576370509, iteration: 279059
loss: 1.0888261795043945,grad_norm: 0.9999991521302788, iteration: 279060
loss: 0.9868783354759216,grad_norm: 0.8871459017173811, iteration: 279061
loss: 0.998911440372467,grad_norm: 0.9999991741826127, iteration: 279062
loss: 1.018844723701477,grad_norm: 0.8017300890276463, iteration: 279063
loss: 0.991142213344574,grad_norm: 0.7891546474068378, iteration: 279064
loss: 0.9692814350128174,grad_norm: 0.9999991330361792, iteration: 279065
loss: 1.0339436531066895,grad_norm: 0.8665381772098875, iteration: 279066
loss: 0.9894446134567261,grad_norm: 0.688025268450874, iteration: 279067
loss: 1.0040019750595093,grad_norm: 0.8161684231859544, iteration: 279068
loss: 1.0259473323822021,grad_norm: 0.9857413802545576, iteration: 279069
loss: 0.9955524206161499,grad_norm: 0.7567933588851087, iteration: 279070
loss: 1.0056822299957275,grad_norm: 0.9619423215445408, iteration: 279071
loss: 0.9995216727256775,grad_norm: 0.9122582514812059, iteration: 279072
loss: 0.9744296669960022,grad_norm: 0.9999990313158685, iteration: 279073
loss: 1.0195393562316895,grad_norm: 0.7426639189061915, iteration: 279074
loss: 1.0046155452728271,grad_norm: 0.8881483453953232, iteration: 279075
loss: 1.012711763381958,grad_norm: 0.8903539303396579, iteration: 279076
loss: 1.0148142576217651,grad_norm: 0.8900954423293729, iteration: 279077
loss: 1.0126895904541016,grad_norm: 0.9131772951425506, iteration: 279078
loss: 1.008190393447876,grad_norm: 0.9256491492997667, iteration: 279079
loss: 0.9768644571304321,grad_norm: 0.9999992080016356, iteration: 279080
loss: 1.0085514783859253,grad_norm: 0.9999990589837652, iteration: 279081
loss: 1.0184859037399292,grad_norm: 0.8553652867708629, iteration: 279082
loss: 0.9842384457588196,grad_norm: 0.7748011222670516, iteration: 279083
loss: 0.9843518137931824,grad_norm: 0.8614530116628516, iteration: 279084
loss: 0.9770470857620239,grad_norm: 0.7848566298970064, iteration: 279085
loss: 0.9746789336204529,grad_norm: 0.7784674035398391, iteration: 279086
loss: 1.0077646970748901,grad_norm: 0.903565533407438, iteration: 279087
loss: 0.9813603162765503,grad_norm: 0.9999990284550646, iteration: 279088
loss: 0.970402181148529,grad_norm: 0.9220003004368662, iteration: 279089
loss: 0.955396294593811,grad_norm: 0.7950557002345601, iteration: 279090
loss: 0.9872526526451111,grad_norm: 0.8660416392633372, iteration: 279091
loss: 0.992264449596405,grad_norm: 0.823506646376243, iteration: 279092
loss: 0.9874139428138733,grad_norm: 0.7992911840903673, iteration: 279093
loss: 1.0058506727218628,grad_norm: 0.9827911413566052, iteration: 279094
loss: 0.9924392104148865,grad_norm: 0.9053601732531468, iteration: 279095
loss: 1.0209063291549683,grad_norm: 0.8235517874830254, iteration: 279096
loss: 0.974358081817627,grad_norm: 0.7879029371889601, iteration: 279097
loss: 1.0100656747817993,grad_norm: 0.7859666449843895, iteration: 279098
loss: 0.9724089503288269,grad_norm: 0.8131936662313263, iteration: 279099
loss: 1.0366666316986084,grad_norm: 0.851240501248273, iteration: 279100
loss: 1.034622073173523,grad_norm: 0.8639618805708247, iteration: 279101
loss: 1.0034035444259644,grad_norm: 0.7414960695911819, iteration: 279102
loss: 0.9975770711898804,grad_norm: 0.8362726226895631, iteration: 279103
loss: 1.0178052186965942,grad_norm: 0.7982804254371114, iteration: 279104
loss: 1.0583746433258057,grad_norm: 0.999999781596828, iteration: 279105
loss: 1.0063583850860596,grad_norm: 0.9999990615889766, iteration: 279106
loss: 1.0085840225219727,grad_norm: 0.8550240871668708, iteration: 279107
loss: 0.9868865609169006,grad_norm: 0.7914536973059708, iteration: 279108
loss: 1.0057096481323242,grad_norm: 0.7144611791723531, iteration: 279109
loss: 1.0068767070770264,grad_norm: 0.9598051408494312, iteration: 279110
loss: 0.9974629282951355,grad_norm: 0.9426343168482505, iteration: 279111
loss: 0.9939903616905212,grad_norm: 0.8512494729319445, iteration: 279112
loss: 1.0049833059310913,grad_norm: 0.7347391878339835, iteration: 279113
loss: 0.984386146068573,grad_norm: 0.880894138613058, iteration: 279114
loss: 1.0027397871017456,grad_norm: 0.914546272162205, iteration: 279115
loss: 0.9718238711357117,grad_norm: 0.9695355472155357, iteration: 279116
loss: 1.0248243808746338,grad_norm: 0.9662084860614916, iteration: 279117
loss: 1.0043021440505981,grad_norm: 0.834344940170027, iteration: 279118
loss: 0.9596721529960632,grad_norm: 0.8629399737833358, iteration: 279119
loss: 0.993038535118103,grad_norm: 0.7566396665847085, iteration: 279120
loss: 1.013076901435852,grad_norm: 0.7983595315644997, iteration: 279121
loss: 1.0615732669830322,grad_norm: 0.9999990057236661, iteration: 279122
loss: 1.0065163373947144,grad_norm: 0.7516034011677963, iteration: 279123
loss: 0.9690302014350891,grad_norm: 0.9572482612167426, iteration: 279124
loss: 0.9766085743904114,grad_norm: 0.9999993210607392, iteration: 279125
loss: 0.9986317157745361,grad_norm: 0.988921362571031, iteration: 279126
loss: 1.0065418481826782,grad_norm: 0.8455334370106306, iteration: 279127
loss: 1.0151273012161255,grad_norm: 0.9778691056039482, iteration: 279128
loss: 0.9934911131858826,grad_norm: 0.999998940836207, iteration: 279129
loss: 1.0169178247451782,grad_norm: 0.8511552162149838, iteration: 279130
loss: 1.0240020751953125,grad_norm: 0.9999992997098403, iteration: 279131
loss: 0.9962291121482849,grad_norm: 0.8309357664428718, iteration: 279132
loss: 0.9630409479141235,grad_norm: 0.9999992420185214, iteration: 279133
loss: 0.9894506335258484,grad_norm: 0.9999991372079737, iteration: 279134
loss: 1.0514227151870728,grad_norm: 0.9999992666381834, iteration: 279135
loss: 0.9689682722091675,grad_norm: 0.8281604742424603, iteration: 279136
loss: 0.9970837235450745,grad_norm: 0.8758579581005477, iteration: 279137
loss: 1.0023322105407715,grad_norm: 0.9745516706988395, iteration: 279138
loss: 1.0117567777633667,grad_norm: 0.999999076936544, iteration: 279139
loss: 1.0036351680755615,grad_norm: 0.8113763031449829, iteration: 279140
loss: 1.0146615505218506,grad_norm: 0.9999992086365654, iteration: 279141
loss: 1.020459771156311,grad_norm: 0.746675712470745, iteration: 279142
loss: 1.0274991989135742,grad_norm: 0.6878424076666796, iteration: 279143
loss: 1.0016493797302246,grad_norm: 0.848731160618021, iteration: 279144
loss: 0.9934465289115906,grad_norm: 0.9375433838275621, iteration: 279145
loss: 0.9840556979179382,grad_norm: 0.8144671706847337, iteration: 279146
loss: 0.9970413446426392,grad_norm: 0.8828788808858139, iteration: 279147
loss: 1.0289958715438843,grad_norm: 0.7616223755265581, iteration: 279148
loss: 1.0317901372909546,grad_norm: 0.8089958119405152, iteration: 279149
loss: 1.0004693269729614,grad_norm: 0.9960080098538825, iteration: 279150
loss: 1.0334365367889404,grad_norm: 0.9999991454087794, iteration: 279151
loss: 1.0286128520965576,grad_norm: 0.9999997242301327, iteration: 279152
loss: 1.001124620437622,grad_norm: 0.965190314604284, iteration: 279153
loss: 1.006340742111206,grad_norm: 0.8162747568822909, iteration: 279154
loss: 1.008123755455017,grad_norm: 0.7952119536227256, iteration: 279155
loss: 1.0207068920135498,grad_norm: 0.7512815086103242, iteration: 279156
loss: 1.0137321949005127,grad_norm: 0.9580621054967661, iteration: 279157
loss: 1.0014591217041016,grad_norm: 0.7168375316833446, iteration: 279158
loss: 0.9533725380897522,grad_norm: 0.7644512631268893, iteration: 279159
loss: 0.9739184379577637,grad_norm: 0.794020687963457, iteration: 279160
loss: 0.9633971452713013,grad_norm: 0.9999990555612289, iteration: 279161
loss: 1.0201219320297241,grad_norm: 0.8631884629012762, iteration: 279162
loss: 0.9906312823295593,grad_norm: 0.9322324350405182, iteration: 279163
loss: 0.9785781502723694,grad_norm: 0.9484461960161633, iteration: 279164
loss: 1.0879127979278564,grad_norm: 0.9999996637790707, iteration: 279165
loss: 0.9937173128128052,grad_norm: 0.9999990742946478, iteration: 279166
loss: 0.9809436798095703,grad_norm: 0.8734553783352742, iteration: 279167
loss: 0.9874722957611084,grad_norm: 0.6882849399605147, iteration: 279168
loss: 0.9901692271232605,grad_norm: 0.7187910062593544, iteration: 279169
loss: 0.9882128834724426,grad_norm: 0.9384445803407999, iteration: 279170
loss: 0.9676708579063416,grad_norm: 0.8669541072679252, iteration: 279171
loss: 1.0375568866729736,grad_norm: 0.9421629418393724, iteration: 279172
loss: 1.0321911573410034,grad_norm: 0.9943744234331887, iteration: 279173
loss: 0.9862563014030457,grad_norm: 0.9742349922573394, iteration: 279174
loss: 0.9807643294334412,grad_norm: 0.7400219041854211, iteration: 279175
loss: 1.0455265045166016,grad_norm: 0.999999780774087, iteration: 279176
loss: 1.0079818964004517,grad_norm: 0.9999989827610203, iteration: 279177
loss: 0.9909157156944275,grad_norm: 0.7273655449219091, iteration: 279178
loss: 1.0359125137329102,grad_norm: 0.9999990594615559, iteration: 279179
loss: 0.9946973323822021,grad_norm: 0.856950128553155, iteration: 279180
loss: 0.9744885563850403,grad_norm: 0.9494985620535468, iteration: 279181
loss: 0.9635046720504761,grad_norm: 0.9367415186880085, iteration: 279182
loss: 1.0078529119491577,grad_norm: 0.8949537364278607, iteration: 279183
loss: 0.9886312484741211,grad_norm: 0.8749979929013677, iteration: 279184
loss: 0.996874988079071,grad_norm: 0.9999990305693874, iteration: 279185
loss: 1.102123737335205,grad_norm: 0.8768033396684518, iteration: 279186
loss: 1.0243704319000244,grad_norm: 0.9999991306323482, iteration: 279187
loss: 1.048549771308899,grad_norm: 0.9999999760067191, iteration: 279188
loss: 1.036615252494812,grad_norm: 0.8917776873944971, iteration: 279189
loss: 1.02222740650177,grad_norm: 0.8201147249163876, iteration: 279190
loss: 1.0006479024887085,grad_norm: 0.9999991664999143, iteration: 279191
loss: 0.9793770909309387,grad_norm: 0.761983624702052, iteration: 279192
loss: 0.9550333023071289,grad_norm: 0.9756056637271436, iteration: 279193
loss: 1.0239050388336182,grad_norm: 0.8143977906943833, iteration: 279194
loss: 0.9854984283447266,grad_norm: 0.9291657406902118, iteration: 279195
loss: 1.035602331161499,grad_norm: 0.803871583061358, iteration: 279196
loss: 0.9758895039558411,grad_norm: 0.8925247538586469, iteration: 279197
loss: 1.0016213655471802,grad_norm: 0.8445307296449821, iteration: 279198
loss: 0.9659151434898376,grad_norm: 0.7809683177866479, iteration: 279199
loss: 1.0182392597198486,grad_norm: 0.8532633845740438, iteration: 279200
loss: 1.0069752931594849,grad_norm: 0.8236141349129131, iteration: 279201
loss: 1.0038719177246094,grad_norm: 0.9049347968667002, iteration: 279202
loss: 1.0046719312667847,grad_norm: 0.870173855115101, iteration: 279203
loss: 0.9747551679611206,grad_norm: 0.7459062987314451, iteration: 279204
loss: 1.0123569965362549,grad_norm: 0.9301384618072746, iteration: 279205
loss: 1.0113345384597778,grad_norm: 0.9999992358522538, iteration: 279206
loss: 0.9806368350982666,grad_norm: 0.808956168504384, iteration: 279207
loss: 1.0105867385864258,grad_norm: 0.7061609913427638, iteration: 279208
loss: 1.004955530166626,grad_norm: 0.6874281809600418, iteration: 279209
loss: 1.0291882753372192,grad_norm: 0.9827268902800006, iteration: 279210
loss: 1.0189098119735718,grad_norm: 0.9999990447301379, iteration: 279211
loss: 1.050458312034607,grad_norm: 0.9999991591505358, iteration: 279212
loss: 1.0045171976089478,grad_norm: 0.8804745891487337, iteration: 279213
loss: 1.0129483938217163,grad_norm: 0.9999993035952057, iteration: 279214
loss: 1.0943502187728882,grad_norm: 0.9999994291204758, iteration: 279215
loss: 1.0555096864700317,grad_norm: 0.9999995745845389, iteration: 279216
loss: 0.9925488829612732,grad_norm: 0.9999991737533165, iteration: 279217
loss: 1.0291718244552612,grad_norm: 0.9999999004808477, iteration: 279218
loss: 1.018886923789978,grad_norm: 0.9653932448254479, iteration: 279219
loss: 0.9797605872154236,grad_norm: 0.9766606396941013, iteration: 279220
loss: 0.9964038729667664,grad_norm: 0.8601441621994859, iteration: 279221
loss: 1.0068488121032715,grad_norm: 0.9053879380454073, iteration: 279222
loss: 1.0350964069366455,grad_norm: 0.9259644369129563, iteration: 279223
loss: 0.9664883613586426,grad_norm: 0.8844300787842888, iteration: 279224
loss: 1.0161911249160767,grad_norm: 0.7904779914570409, iteration: 279225
loss: 1.007662296295166,grad_norm: 0.8618078979744404, iteration: 279226
loss: 0.9807339906692505,grad_norm: 0.793359895737953, iteration: 279227
loss: 0.9999635815620422,grad_norm: 0.949060201397911, iteration: 279228
loss: 0.9806700944900513,grad_norm: 0.776825470902775, iteration: 279229
loss: 1.0591931343078613,grad_norm: 0.9999990681917523, iteration: 279230
loss: 1.0328232049942017,grad_norm: 0.9999989250322271, iteration: 279231
loss: 1.012656569480896,grad_norm: 0.9314913734709176, iteration: 279232
loss: 1.023664951324463,grad_norm: 0.9622665156171148, iteration: 279233
loss: 0.9985752701759338,grad_norm: 0.9999991202390693, iteration: 279234
loss: 0.9902617931365967,grad_norm: 0.9345389136476201, iteration: 279235
loss: 0.9782172441482544,grad_norm: 0.7823464061046397, iteration: 279236
loss: 0.9989597797393799,grad_norm: 0.9234477548568406, iteration: 279237
loss: 1.0005688667297363,grad_norm: 0.8324765931548035, iteration: 279238
loss: 0.9923902153968811,grad_norm: 0.948335310222725, iteration: 279239
loss: 0.9803667664527893,grad_norm: 0.8323016293928616, iteration: 279240
loss: 1.055789589881897,grad_norm: 0.9999990474233035, iteration: 279241
loss: 0.9680985808372498,grad_norm: 0.8028532849973108, iteration: 279242
loss: 0.9766983985900879,grad_norm: 0.8837702611296221, iteration: 279243
loss: 0.9994096159934998,grad_norm: 0.8977536007458925, iteration: 279244
loss: 1.0231852531433105,grad_norm: 0.8018859101162437, iteration: 279245
loss: 0.9983646273612976,grad_norm: 0.9999992064907923, iteration: 279246
loss: 1.0271621942520142,grad_norm: 0.9171042528291508, iteration: 279247
loss: 1.012081503868103,grad_norm: 0.7317593114033271, iteration: 279248
loss: 0.9891889095306396,grad_norm: 0.8965655047133093, iteration: 279249
loss: 0.9561575651168823,grad_norm: 0.9289030276662205, iteration: 279250
loss: 0.9700894355773926,grad_norm: 0.8476330061506128, iteration: 279251
loss: 1.0312174558639526,grad_norm: 0.9000051898483494, iteration: 279252
loss: 0.9685285091400146,grad_norm: 0.9751838625881432, iteration: 279253
loss: 0.9819148778915405,grad_norm: 0.7852529924804096, iteration: 279254
loss: 1.0312532186508179,grad_norm: 0.8846977188824385, iteration: 279255
loss: 0.9837825894355774,grad_norm: 0.856275729948913, iteration: 279256
loss: 1.1008278131484985,grad_norm: 0.9999998545743856, iteration: 279257
loss: 1.0015372037887573,grad_norm: 0.8957750437619346, iteration: 279258
loss: 0.9716126322746277,grad_norm: 0.8655021030484413, iteration: 279259
loss: 1.1144741773605347,grad_norm: 0.9999998194188451, iteration: 279260
loss: 0.9870531558990479,grad_norm: 0.6866486899855202, iteration: 279261
loss: 0.9981446862220764,grad_norm: 0.9999990926708332, iteration: 279262
loss: 0.9800814986228943,grad_norm: 0.969493390904799, iteration: 279263
loss: 0.9704298973083496,grad_norm: 0.8560428043352684, iteration: 279264
loss: 0.9936367273330688,grad_norm: 0.8826747242022301, iteration: 279265
loss: 1.102914810180664,grad_norm: 0.9999996108144575, iteration: 279266
loss: 1.026078462600708,grad_norm: 0.8165442391231125, iteration: 279267
loss: 1.0196537971496582,grad_norm: 0.9999990368158854, iteration: 279268
loss: 0.9496276378631592,grad_norm: 0.763244041132354, iteration: 279269
loss: 1.005704402923584,grad_norm: 0.7115652513229247, iteration: 279270
loss: 0.9720479846000671,grad_norm: 0.8715271044415638, iteration: 279271
loss: 0.9843921065330505,grad_norm: 0.7564724877737772, iteration: 279272
loss: 1.0183236598968506,grad_norm: 0.7612680068713018, iteration: 279273
loss: 1.0799049139022827,grad_norm: 0.9758020257255919, iteration: 279274
loss: 1.0152674913406372,grad_norm: 0.7295842815598377, iteration: 279275
loss: 1.0283653736114502,grad_norm: 0.9017573497312321, iteration: 279276
loss: 0.98249751329422,grad_norm: 0.7577736865002062, iteration: 279277
loss: 1.1430906057357788,grad_norm: 0.9999994179731179, iteration: 279278
loss: 1.0149450302124023,grad_norm: 0.9882930877515151, iteration: 279279
loss: 1.0176012516021729,grad_norm: 0.8428657303663138, iteration: 279280
loss: 1.0296201705932617,grad_norm: 0.6817799549390108, iteration: 279281
loss: 0.9821670055389404,grad_norm: 0.9745017141921353, iteration: 279282
loss: 1.015114426612854,grad_norm: 0.9999993155157844, iteration: 279283
loss: 1.0212091207504272,grad_norm: 0.8815627042185743, iteration: 279284
loss: 1.0144152641296387,grad_norm: 0.9999996568293495, iteration: 279285
loss: 0.9830865859985352,grad_norm: 0.8282080440869461, iteration: 279286
loss: 1.0044465065002441,grad_norm: 0.72200106933669, iteration: 279287
loss: 0.9840879440307617,grad_norm: 0.8163301667896194, iteration: 279288
loss: 1.0078294277191162,grad_norm: 0.9022878521899184, iteration: 279289
loss: 0.9812530875205994,grad_norm: 0.9301980121482697, iteration: 279290
loss: 1.0152226686477661,grad_norm: 0.9969015094117483, iteration: 279291
loss: 1.1070518493652344,grad_norm: 0.9999996493850142, iteration: 279292
loss: 1.0466222763061523,grad_norm: 0.9999997555739119, iteration: 279293
loss: 1.0078051090240479,grad_norm: 0.8792460173201591, iteration: 279294
loss: 0.9681422710418701,grad_norm: 0.9056071561372504, iteration: 279295
loss: 0.9873327612876892,grad_norm: 0.9999991361649468, iteration: 279296
loss: 0.9752470850944519,grad_norm: 0.8488129747359914, iteration: 279297
loss: 0.9998396039009094,grad_norm: 0.839099976626518, iteration: 279298
loss: 1.0238804817199707,grad_norm: 0.9999994960910741, iteration: 279299
loss: 0.9875199794769287,grad_norm: 0.8164661192694517, iteration: 279300
loss: 1.0352708101272583,grad_norm: 0.886221052177857, iteration: 279301
loss: 1.034147024154663,grad_norm: 0.9999990553440574, iteration: 279302
loss: 1.0541741847991943,grad_norm: 0.9999998069617148, iteration: 279303
loss: 0.9906332492828369,grad_norm: 0.9999997801684173, iteration: 279304
loss: 0.9735472798347473,grad_norm: 0.9695225626750676, iteration: 279305
loss: 0.960470974445343,grad_norm: 0.811285911227851, iteration: 279306
loss: 0.9957478642463684,grad_norm: 0.9437314884399889, iteration: 279307
loss: 0.9814927577972412,grad_norm: 0.8565279694533791, iteration: 279308
loss: 0.9838064908981323,grad_norm: 0.8596698504555771, iteration: 279309
loss: 1.010151982307434,grad_norm: 0.8214708683807167, iteration: 279310
loss: 0.9602940082550049,grad_norm: 0.9999994745056355, iteration: 279311
loss: 1.0109212398529053,grad_norm: 0.96013426989729, iteration: 279312
loss: 1.0031890869140625,grad_norm: 0.9999989222760368, iteration: 279313
loss: 1.0319008827209473,grad_norm: 0.9999991250336051, iteration: 279314
loss: 1.0103344917297363,grad_norm: 0.8769677671594466, iteration: 279315
loss: 1.0059677362442017,grad_norm: 0.9313734853427262, iteration: 279316
loss: 1.024186611175537,grad_norm: 0.8471893658032341, iteration: 279317
loss: 1.1597793102264404,grad_norm: 0.9999998394506177, iteration: 279318
loss: 0.9915618300437927,grad_norm: 0.8321586668928759, iteration: 279319
loss: 1.0002835988998413,grad_norm: 0.999999010948989, iteration: 279320
loss: 1.0355123281478882,grad_norm: 0.7988731820893981, iteration: 279321
loss: 0.9824256300926208,grad_norm: 0.9643267710825075, iteration: 279322
loss: 1.019814372062683,grad_norm: 0.791266601448168, iteration: 279323
loss: 1.010243535041809,grad_norm: 0.9999990112368607, iteration: 279324
loss: 0.9986223578453064,grad_norm: 0.7734818895445517, iteration: 279325
loss: 0.9912939667701721,grad_norm: 0.9999990069252973, iteration: 279326
loss: 1.000146508216858,grad_norm: 0.8640575920206276, iteration: 279327
loss: 1.01934814453125,grad_norm: 0.9999992428300004, iteration: 279328
loss: 0.9776127338409424,grad_norm: 0.8626666107969785, iteration: 279329
loss: 1.0025386810302734,grad_norm: 0.9999991494669977, iteration: 279330
loss: 0.9772769212722778,grad_norm: 0.8502859934702457, iteration: 279331
loss: 1.0111678838729858,grad_norm: 0.771644090439632, iteration: 279332
loss: 0.9717423915863037,grad_norm: 0.8622722196698605, iteration: 279333
loss: 1.0184977054595947,grad_norm: 0.7049666347977468, iteration: 279334
loss: 1.0407642126083374,grad_norm: 0.9999990037666523, iteration: 279335
loss: 0.9990213513374329,grad_norm: 0.7569100401114921, iteration: 279336
loss: 0.9834465384483337,grad_norm: 0.876606612314646, iteration: 279337
loss: 1.0611482858657837,grad_norm: 0.903006970705659, iteration: 279338
loss: 0.9852531552314758,grad_norm: 0.8587363990255099, iteration: 279339
loss: 0.9693863391876221,grad_norm: 0.829252517655785, iteration: 279340
loss: 1.0193634033203125,grad_norm: 0.8740332968307735, iteration: 279341
loss: 0.9844859838485718,grad_norm: 0.846250720645978, iteration: 279342
loss: 1.007750391960144,grad_norm: 0.9152104542170693, iteration: 279343
loss: 0.9974100589752197,grad_norm: 0.8663961959298797, iteration: 279344
loss: 0.9995235204696655,grad_norm: 0.9999992867388887, iteration: 279345
loss: 1.0329538583755493,grad_norm: 0.9999990571314437, iteration: 279346
loss: 1.0296052694320679,grad_norm: 0.8079807591971824, iteration: 279347
loss: 1.003693699836731,grad_norm: 0.7882377300905538, iteration: 279348
loss: 1.0578416585922241,grad_norm: 0.850459550241273, iteration: 279349
loss: 1.034944772720337,grad_norm: 0.999998966073119, iteration: 279350
loss: 0.9793716073036194,grad_norm: 0.9999991234602145, iteration: 279351
loss: 1.0206303596496582,grad_norm: 0.8903831719928553, iteration: 279352
loss: 1.0063509941101074,grad_norm: 0.9295066948252024, iteration: 279353
loss: 0.9995274543762207,grad_norm: 0.9911596955389024, iteration: 279354
loss: 1.0378402471542358,grad_norm: 0.8529455067420268, iteration: 279355
loss: 0.9868910908699036,grad_norm: 0.9319359784706366, iteration: 279356
loss: 1.0358632802963257,grad_norm: 0.8933174143807932, iteration: 279357
loss: 0.9998812675476074,grad_norm: 0.8269389010566838, iteration: 279358
loss: 1.0295217037200928,grad_norm: 0.8407850164239058, iteration: 279359
loss: 0.995844304561615,grad_norm: 0.8800574644823261, iteration: 279360
loss: 0.961307168006897,grad_norm: 0.9280808498585295, iteration: 279361
loss: 0.9756542444229126,grad_norm: 0.914627133743507, iteration: 279362
loss: 0.9674993753433228,grad_norm: 0.7674164917987166, iteration: 279363
loss: 0.9968355894088745,grad_norm: 0.9273927870642236, iteration: 279364
loss: 0.9709687232971191,grad_norm: 0.999999041540982, iteration: 279365
loss: 1.009265661239624,grad_norm: 0.9999996879524696, iteration: 279366
loss: 1.0200088024139404,grad_norm: 0.7123147708907022, iteration: 279367
loss: 1.0130529403686523,grad_norm: 0.9637683507834247, iteration: 279368
loss: 1.0155001878738403,grad_norm: 0.9939496236412847, iteration: 279369
loss: 0.9737849831581116,grad_norm: 0.8363323383227778, iteration: 279370
loss: 1.0132269859313965,grad_norm: 0.9999990624535448, iteration: 279371
loss: 0.9675843119621277,grad_norm: 0.9522728166129882, iteration: 279372
loss: 1.0070842504501343,grad_norm: 0.9638087726403923, iteration: 279373
loss: 1.0233098268508911,grad_norm: 0.8806920477400796, iteration: 279374
loss: 1.0076382160186768,grad_norm: 0.9368956194987749, iteration: 279375
loss: 0.9881663918495178,grad_norm: 0.7711277358287943, iteration: 279376
loss: 0.9865128397941589,grad_norm: 0.9999990405870289, iteration: 279377
loss: 1.0265923738479614,grad_norm: 0.8384872288539459, iteration: 279378
loss: 1.0042643547058105,grad_norm: 0.920063009692489, iteration: 279379
loss: 1.0067431926727295,grad_norm: 0.9485882577998266, iteration: 279380
loss: 1.0503535270690918,grad_norm: 0.9686854455349986, iteration: 279381
loss: 0.9533165097236633,grad_norm: 0.8658988930860558, iteration: 279382
loss: 1.0370515584945679,grad_norm: 0.8968879043912212, iteration: 279383
loss: 0.9775201678276062,grad_norm: 0.9999992445056074, iteration: 279384
loss: 1.0047645568847656,grad_norm: 0.8044721812521723, iteration: 279385
loss: 0.9546419382095337,grad_norm: 0.747142746908322, iteration: 279386
loss: 0.9931049346923828,grad_norm: 0.7880158752902054, iteration: 279387
loss: 0.9713718295097351,grad_norm: 0.8123495484862698, iteration: 279388
loss: 0.9821145534515381,grad_norm: 0.9907704030889043, iteration: 279389
loss: 0.9554126858711243,grad_norm: 0.7589952940095009, iteration: 279390
loss: 1.05503511428833,grad_norm: 0.8954399548626905, iteration: 279391
loss: 1.0155423879623413,grad_norm: 0.930078224373058, iteration: 279392
loss: 1.0139546394348145,grad_norm: 0.9442129413750397, iteration: 279393
loss: 1.023919939994812,grad_norm: 0.8864609209268759, iteration: 279394
loss: 0.9996169209480286,grad_norm: 0.9805716272035184, iteration: 279395
loss: 1.0211015939712524,grad_norm: 0.7627199766412572, iteration: 279396
loss: 1.0063161849975586,grad_norm: 0.9999991608589788, iteration: 279397
loss: 0.963603138923645,grad_norm: 0.758574119686784, iteration: 279398
loss: 1.0314512252807617,grad_norm: 0.897089542519057, iteration: 279399
loss: 1.0036015510559082,grad_norm: 0.9113967600128133, iteration: 279400
loss: 1.0492401123046875,grad_norm: 0.911310154549454, iteration: 279401
loss: 1.020411729812622,grad_norm: 0.9999992363814533, iteration: 279402
loss: 0.9993765354156494,grad_norm: 0.9212025786374313, iteration: 279403
loss: 1.0080972909927368,grad_norm: 0.8605326383798768, iteration: 279404
loss: 0.9837772846221924,grad_norm: 0.9999992031304747, iteration: 279405
loss: 0.9913898706436157,grad_norm: 0.6911692001923291, iteration: 279406
loss: 0.979634165763855,grad_norm: 0.8159142444324766, iteration: 279407
loss: 0.9610663652420044,grad_norm: 0.9317339607261095, iteration: 279408
loss: 1.001953125,grad_norm: 0.9999991420472331, iteration: 279409
loss: 1.011584758758545,grad_norm: 0.8822656721498073, iteration: 279410
loss: 1.0036391019821167,grad_norm: 0.8162254456460623, iteration: 279411
loss: 1.0005509853363037,grad_norm: 0.999999199442169, iteration: 279412
loss: 0.9849857091903687,grad_norm: 0.8458825941986194, iteration: 279413
loss: 1.0010390281677246,grad_norm: 0.8914529554911355, iteration: 279414
loss: 1.0020955801010132,grad_norm: 0.9717120511544434, iteration: 279415
loss: 1.0119820833206177,grad_norm: 0.8320464796935682, iteration: 279416
loss: 1.0023688077926636,grad_norm: 0.9156834698074249, iteration: 279417
loss: 0.9632039070129395,grad_norm: 0.9537279597601893, iteration: 279418
loss: 0.987195611000061,grad_norm: 0.9999992930478185, iteration: 279419
loss: 1.0085856914520264,grad_norm: 0.8292048959057525, iteration: 279420
loss: 0.9816207885742188,grad_norm: 0.8167167731037522, iteration: 279421
loss: 0.9806849956512451,grad_norm: 0.8080622629332203, iteration: 279422
loss: 1.0122616291046143,grad_norm: 0.8365250239601231, iteration: 279423
loss: 0.9918885231018066,grad_norm: 0.9589313837279905, iteration: 279424
loss: 0.9786391854286194,grad_norm: 0.820520819055568, iteration: 279425
loss: 1.007000207901001,grad_norm: 0.953239605785462, iteration: 279426
loss: 0.9887261390686035,grad_norm: 0.9235450823664848, iteration: 279427
loss: 1.0050239562988281,grad_norm: 0.9541712592855727, iteration: 279428
loss: 1.0016732215881348,grad_norm: 0.862337624665511, iteration: 279429
loss: 1.0115712881088257,grad_norm: 0.853708417536371, iteration: 279430
loss: 1.0127220153808594,grad_norm: 0.744426446285076, iteration: 279431
loss: 1.0037744045257568,grad_norm: 0.8705491119781441, iteration: 279432
loss: 1.007379412651062,grad_norm: 0.8574218013203672, iteration: 279433
loss: 1.0127431154251099,grad_norm: 0.9999991272762185, iteration: 279434
loss: 1.0147701501846313,grad_norm: 0.8635824548435107, iteration: 279435
loss: 0.9924104809761047,grad_norm: 0.9099490780994588, iteration: 279436
loss: 0.9803974032402039,grad_norm: 0.7063866554231474, iteration: 279437
loss: 1.0022579431533813,grad_norm: 0.8510301789340481, iteration: 279438
loss: 1.0527586936950684,grad_norm: 0.9999998298658184, iteration: 279439
loss: 1.006622076034546,grad_norm: 0.9085955044149879, iteration: 279440
loss: 0.9971198439598083,grad_norm: 0.7726902799103637, iteration: 279441
loss: 1.0431172847747803,grad_norm: 0.9999991837363441, iteration: 279442
loss: 0.9603424072265625,grad_norm: 0.9999991467888955, iteration: 279443
loss: 0.9777774810791016,grad_norm: 0.7411045329857646, iteration: 279444
loss: 0.9720842242240906,grad_norm: 0.8529621480517456, iteration: 279445
loss: 0.9846280217170715,grad_norm: 0.9258110294591623, iteration: 279446
loss: 1.0117286443710327,grad_norm: 0.9067512001581174, iteration: 279447
loss: 1.0213066339492798,grad_norm: 0.777556026295961, iteration: 279448
loss: 0.9782299399375916,grad_norm: 0.923148433999718, iteration: 279449
loss: 0.975928544998169,grad_norm: 0.8572438062486684, iteration: 279450
loss: 1.0068587064743042,grad_norm: 0.9554565716313728, iteration: 279451
loss: 1.00934898853302,grad_norm: 0.8366921472172862, iteration: 279452
loss: 1.0052274465560913,grad_norm: 0.9999991442456758, iteration: 279453
loss: 1.0151056051254272,grad_norm: 0.7824899993844164, iteration: 279454
loss: 1.0032027959823608,grad_norm: 0.9085721224912426, iteration: 279455
loss: 0.9891071915626526,grad_norm: 0.898270299919248, iteration: 279456
loss: 0.9955628514289856,grad_norm: 0.8657832440972495, iteration: 279457
loss: 0.9691311717033386,grad_norm: 0.805672796869947, iteration: 279458
loss: 0.9794676303863525,grad_norm: 0.8971260874099105, iteration: 279459
loss: 1.0315053462982178,grad_norm: 0.999999886566386, iteration: 279460
loss: 0.956404983997345,grad_norm: 0.8944988163675788, iteration: 279461
loss: 1.0058504343032837,grad_norm: 0.8167507802592688, iteration: 279462
loss: 1.0395115613937378,grad_norm: 0.7387460064889474, iteration: 279463
loss: 1.0722044706344604,grad_norm: 0.9999992561050949, iteration: 279464
loss: 0.987877368927002,grad_norm: 0.732717970559517, iteration: 279465
loss: 0.9766001105308533,grad_norm: 0.8622597766333643, iteration: 279466
loss: 1.0104707479476929,grad_norm: 0.9999991267275075, iteration: 279467
loss: 1.0133583545684814,grad_norm: 0.8188017209202975, iteration: 279468
loss: 0.9983081817626953,grad_norm: 0.8310492731110992, iteration: 279469
loss: 0.9490671753883362,grad_norm: 0.8419704028624487, iteration: 279470
loss: 0.9860485792160034,grad_norm: 0.8275079542640784, iteration: 279471
loss: 0.9938746094703674,grad_norm: 0.7799746648959711, iteration: 279472
loss: 0.9820200204849243,grad_norm: 0.8403668817481951, iteration: 279473
loss: 0.9806252121925354,grad_norm: 0.8156022702889472, iteration: 279474
loss: 0.9834710359573364,grad_norm: 0.8090910826842374, iteration: 279475
loss: 0.9848068356513977,grad_norm: 0.9244197716582968, iteration: 279476
loss: 1.0089852809906006,grad_norm: 0.8464584977743477, iteration: 279477
loss: 1.0192030668258667,grad_norm: 0.7840840019660644, iteration: 279478
loss: 0.9636828303337097,grad_norm: 0.9326323600803851, iteration: 279479
loss: 1.0428578853607178,grad_norm: 0.8887323764070568, iteration: 279480
loss: 1.0869580507278442,grad_norm: 0.7777568916782485, iteration: 279481
loss: 0.9914950728416443,grad_norm: 0.9999991880201665, iteration: 279482
loss: 0.989054262638092,grad_norm: 0.8783202606994307, iteration: 279483
loss: 1.0764291286468506,grad_norm: 0.9999997078803065, iteration: 279484
loss: 0.9709270000457764,grad_norm: 0.8418896439604592, iteration: 279485
loss: 0.9856144785881042,grad_norm: 0.9367236852925461, iteration: 279486
loss: 0.9706293344497681,grad_norm: 0.9999990938702683, iteration: 279487
loss: 1.0240973234176636,grad_norm: 0.7466691787121857, iteration: 279488
loss: 1.003231406211853,grad_norm: 0.8229395388033112, iteration: 279489
loss: 1.0627769231796265,grad_norm: 0.9999994627782867, iteration: 279490
loss: 1.0014524459838867,grad_norm: 0.8376497596135855, iteration: 279491
loss: 0.981361448764801,grad_norm: 0.8538723575243478, iteration: 279492
loss: 0.9792922139167786,grad_norm: 0.8795700868278522, iteration: 279493
loss: 1.0314650535583496,grad_norm: 0.9999990792002021, iteration: 279494
loss: 1.0400118827819824,grad_norm: 0.7573427520445524, iteration: 279495
loss: 0.9628096222877502,grad_norm: 0.8515537022238272, iteration: 279496
loss: 0.9986301064491272,grad_norm: 0.8870846393114807, iteration: 279497
loss: 0.9592510461807251,grad_norm: 0.9999990491824621, iteration: 279498
loss: 0.9532880783081055,grad_norm: 0.8486604846998601, iteration: 279499
loss: 1.0290356874465942,grad_norm: 0.8591707936414571, iteration: 279500
loss: 1.0045281648635864,grad_norm: 0.9031825308812972, iteration: 279501
loss: 1.0131958723068237,grad_norm: 0.8359854630292445, iteration: 279502
loss: 0.9901997447013855,grad_norm: 0.7736428535561317, iteration: 279503
loss: 0.9762158989906311,grad_norm: 0.8931134974491843, iteration: 279504
loss: 1.0077499151229858,grad_norm: 0.9797317594342361, iteration: 279505
loss: 0.9921253323554993,grad_norm: 0.8523956806497592, iteration: 279506
loss: 0.9731746912002563,grad_norm: 0.8867454846307702, iteration: 279507
loss: 0.9908373355865479,grad_norm: 0.9861586093709379, iteration: 279508
loss: 0.9901949763298035,grad_norm: 0.8494239020302607, iteration: 279509
loss: 1.01041579246521,grad_norm: 0.9098925142963398, iteration: 279510
loss: 1.007063388824463,grad_norm: 0.9999992781385658, iteration: 279511
loss: 1.0084056854248047,grad_norm: 0.8601124135424463, iteration: 279512
loss: 1.0192697048187256,grad_norm: 0.8062420548801087, iteration: 279513
loss: 1.024017572402954,grad_norm: 0.808991751946709, iteration: 279514
loss: 1.0279713869094849,grad_norm: 0.7989096462766744, iteration: 279515
loss: 1.0400482416152954,grad_norm: 0.7772348963716702, iteration: 279516
loss: 0.9776783585548401,grad_norm: 0.9236839694097886, iteration: 279517
loss: 0.9478126168251038,grad_norm: 0.78051295303277, iteration: 279518
loss: 1.0251420736312866,grad_norm: 0.7337998395380256, iteration: 279519
loss: 0.9638389945030212,grad_norm: 0.7400750818104841, iteration: 279520
loss: 1.0103553533554077,grad_norm: 0.9999990414472141, iteration: 279521
loss: 1.0012452602386475,grad_norm: 0.9999989995365051, iteration: 279522
loss: 0.9655405282974243,grad_norm: 0.9811928451235298, iteration: 279523
loss: 1.000793218612671,grad_norm: 0.683400970038216, iteration: 279524
loss: 0.9936140179634094,grad_norm: 0.8117399685561497, iteration: 279525
loss: 1.0006260871887207,grad_norm: 0.8163266526931822, iteration: 279526
loss: 0.9865010976791382,grad_norm: 0.8339501284940098, iteration: 279527
loss: 1.0096278190612793,grad_norm: 0.9301310368030348, iteration: 279528
loss: 0.9752946496009827,grad_norm: 0.7897304076424297, iteration: 279529
loss: 0.9855072498321533,grad_norm: 0.9327380147459395, iteration: 279530
loss: 1.0086822509765625,grad_norm: 0.786083039793086, iteration: 279531
loss: 0.9756996631622314,grad_norm: 0.7736250512771109, iteration: 279532
loss: 0.9565144777297974,grad_norm: 0.828270967914826, iteration: 279533
loss: 1.0272713899612427,grad_norm: 0.9923029625199276, iteration: 279534
loss: 1.020363688468933,grad_norm: 0.9538636854035546, iteration: 279535
loss: 1.030455231666565,grad_norm: 0.7642170931048522, iteration: 279536
loss: 0.9913060069084167,grad_norm: 0.7720776652803035, iteration: 279537
loss: 1.007957935333252,grad_norm: 0.9490197772524114, iteration: 279538
loss: 1.013171672821045,grad_norm: 0.7544759206931082, iteration: 279539
loss: 0.9936433434486389,grad_norm: 0.9452412652158944, iteration: 279540
loss: 0.9612545967102051,grad_norm: 0.9999991867221557, iteration: 279541
loss: 0.9641051888465881,grad_norm: 0.8631477981217168, iteration: 279542
loss: 1.0246341228485107,grad_norm: 0.7818026502338293, iteration: 279543
loss: 1.0220774412155151,grad_norm: 0.8839721388969444, iteration: 279544
loss: 0.9644597768783569,grad_norm: 0.9396677375637268, iteration: 279545
loss: 0.994830310344696,grad_norm: 0.9999991494533959, iteration: 279546
loss: 1.0039466619491577,grad_norm: 0.9999999490410147, iteration: 279547
loss: 0.9895103573799133,grad_norm: 0.8374201507227146, iteration: 279548
loss: 1.0257395505905151,grad_norm: 0.8243507746028281, iteration: 279549
loss: 1.0062261819839478,grad_norm: 0.8413486386515797, iteration: 279550
loss: 1.036299467086792,grad_norm: 0.999998950081471, iteration: 279551
loss: 1.0228638648986816,grad_norm: 0.8818756726407425, iteration: 279552
loss: 1.0089071989059448,grad_norm: 0.9086347215730232, iteration: 279553
loss: 0.9919344782829285,grad_norm: 0.7924778834201173, iteration: 279554
loss: 1.019789695739746,grad_norm: 0.9202904956265608, iteration: 279555
loss: 0.984826385974884,grad_norm: 0.8174547534676323, iteration: 279556
loss: 1.0105236768722534,grad_norm: 0.8412679270575621, iteration: 279557
loss: 0.9922168254852295,grad_norm: 0.8056813928497193, iteration: 279558
loss: 0.984691858291626,grad_norm: 0.8792210688749256, iteration: 279559
loss: 0.9851076006889343,grad_norm: 0.9423373109244965, iteration: 279560
loss: 0.9532784223556519,grad_norm: 0.9999989481973967, iteration: 279561
loss: 1.0021122694015503,grad_norm: 0.8102390544384191, iteration: 279562
loss: 1.0078587532043457,grad_norm: 0.9999992505225856, iteration: 279563
loss: 0.9809138774871826,grad_norm: 0.91788344310769, iteration: 279564
loss: 1.006138563156128,grad_norm: 0.9902519001926277, iteration: 279565
loss: 1.0189069509506226,grad_norm: 0.8360543890030535, iteration: 279566
loss: 1.011266827583313,grad_norm: 0.8139683381454229, iteration: 279567
loss: 0.9907478094100952,grad_norm: 0.94620008610078, iteration: 279568
loss: 1.0190250873565674,grad_norm: 0.83796008987459, iteration: 279569
loss: 1.0115370750427246,grad_norm: 0.9338700343479223, iteration: 279570
loss: 0.980917751789093,grad_norm: 0.9041101900085774, iteration: 279571
loss: 1.0204466581344604,grad_norm: 0.8850145836077887, iteration: 279572
loss: 0.9866837859153748,grad_norm: 0.8708958659975399, iteration: 279573
loss: 1.0188032388687134,grad_norm: 0.9104461210925577, iteration: 279574
loss: 1.0520143508911133,grad_norm: 0.8350513325112737, iteration: 279575
loss: 0.9808500409126282,grad_norm: 0.9301636102613093, iteration: 279576
loss: 0.9928097724914551,grad_norm: 0.7921433280914008, iteration: 279577
loss: 0.9788740873336792,grad_norm: 0.908392360901658, iteration: 279578
loss: 0.9863207936286926,grad_norm: 0.9999990439397626, iteration: 279579
loss: 0.9470480680465698,grad_norm: 0.9999991170903447, iteration: 279580
loss: 0.9647538065910339,grad_norm: 0.9999992130734786, iteration: 279581
loss: 0.9649726748466492,grad_norm: 0.983085562370058, iteration: 279582
loss: 0.9853469133377075,grad_norm: 0.9999988857738057, iteration: 279583
loss: 0.9960057139396667,grad_norm: 0.8335077107772044, iteration: 279584
loss: 0.9799508452415466,grad_norm: 0.8642161593686044, iteration: 279585
loss: 1.0154026746749878,grad_norm: 0.9351867462050333, iteration: 279586
loss: 1.0316276550292969,grad_norm: 0.8132828406190064, iteration: 279587
loss: 0.9492353796958923,grad_norm: 0.8681746277781828, iteration: 279588
loss: 1.0175501108169556,grad_norm: 0.8824792091904669, iteration: 279589
loss: 1.0391845703125,grad_norm: 0.9999991547321344, iteration: 279590
loss: 0.9914470911026001,grad_norm: 0.9999991176782893, iteration: 279591
loss: 0.9941439628601074,grad_norm: 0.9579680930504101, iteration: 279592
loss: 0.9718495607376099,grad_norm: 0.8287515242368089, iteration: 279593
loss: 1.0151700973510742,grad_norm: 0.7824817235357131, iteration: 279594
loss: 0.9882057905197144,grad_norm: 0.7476618543608611, iteration: 279595
loss: 1.0117695331573486,grad_norm: 0.9289415524829929, iteration: 279596
loss: 1.06526780128479,grad_norm: 0.946050047189412, iteration: 279597
loss: 1.0047869682312012,grad_norm: 0.8975795797581607, iteration: 279598
loss: 0.985098123550415,grad_norm: 0.7685472943407907, iteration: 279599
loss: 1.0441131591796875,grad_norm: 0.9999998064469297, iteration: 279600
loss: 0.9711549282073975,grad_norm: 0.7795076159813414, iteration: 279601
loss: 1.0127991437911987,grad_norm: 0.8410133654441554, iteration: 279602
loss: 0.972991406917572,grad_norm: 0.9513706738327932, iteration: 279603
loss: 1.0350302457809448,grad_norm: 0.7271659143755639, iteration: 279604
loss: 0.9887847900390625,grad_norm: 0.9720362700553138, iteration: 279605
loss: 1.0140399932861328,grad_norm: 0.9999991616975918, iteration: 279606
loss: 0.9668301343917847,grad_norm: 0.8592335822248623, iteration: 279607
loss: 0.9879441857337952,grad_norm: 0.9245526673342811, iteration: 279608
loss: 1.0480759143829346,grad_norm: 0.8073056816055864, iteration: 279609
loss: 1.007359504699707,grad_norm: 0.999999127787897, iteration: 279610
loss: 1.0102790594100952,grad_norm: 0.9495941116436295, iteration: 279611
loss: 1.015290379524231,grad_norm: 0.8122617773127572, iteration: 279612
loss: 0.9922534227371216,grad_norm: 0.8067825173113841, iteration: 279613
loss: 0.9741224050521851,grad_norm: 0.9503800550316722, iteration: 279614
loss: 1.0011855363845825,grad_norm: 0.9999992415430313, iteration: 279615
loss: 1.0044292211532593,grad_norm: 0.9999991484917794, iteration: 279616
loss: 0.986822783946991,grad_norm: 0.9999998247384114, iteration: 279617
loss: 0.9817620515823364,grad_norm: 0.965859054005828, iteration: 279618
loss: 0.9849477410316467,grad_norm: 0.8746653723413738, iteration: 279619
loss: 1.0107142925262451,grad_norm: 0.8335654595468662, iteration: 279620
loss: 0.9992800951004028,grad_norm: 0.8952520377361991, iteration: 279621
loss: 1.00091552734375,grad_norm: 0.8749964878308943, iteration: 279622
loss: 1.0078787803649902,grad_norm: 0.7484671445255893, iteration: 279623
loss: 1.0195456743240356,grad_norm: 0.9999997979079546, iteration: 279624
loss: 1.0219844579696655,grad_norm: 0.8387984770261084, iteration: 279625
loss: 1.0203031301498413,grad_norm: 0.9999995574584408, iteration: 279626
loss: 1.0371997356414795,grad_norm: 0.9939473487857057, iteration: 279627
loss: 1.0275272130966187,grad_norm: 0.7958479285943847, iteration: 279628
loss: 0.9449537992477417,grad_norm: 0.9999990605074263, iteration: 279629
loss: 0.989845871925354,grad_norm: 0.8525904455784855, iteration: 279630
loss: 1.020025372505188,grad_norm: 0.9999988308897794, iteration: 279631
loss: 1.0508533716201782,grad_norm: 0.9577180555477104, iteration: 279632
loss: 0.9929311275482178,grad_norm: 0.83438229208964, iteration: 279633
loss: 1.0951852798461914,grad_norm: 0.9999991842001409, iteration: 279634
loss: 1.0345155000686646,grad_norm: 0.7816029625125764, iteration: 279635
loss: 1.053268313407898,grad_norm: 0.9202558252914218, iteration: 279636
loss: 1.0274097919464111,grad_norm: 0.9999992757649575, iteration: 279637
loss: 0.9868528246879578,grad_norm: 0.926400505631523, iteration: 279638
loss: 0.9738016724586487,grad_norm: 0.9304844368918708, iteration: 279639
loss: 0.9683631062507629,grad_norm: 0.8716023185457002, iteration: 279640
loss: 1.0317186117172241,grad_norm: 0.9923343972375478, iteration: 279641
loss: 1.019850492477417,grad_norm: 0.7526591747074053, iteration: 279642
loss: 0.9894102215766907,grad_norm: 0.8686079482204254, iteration: 279643
loss: 0.9664093852043152,grad_norm: 0.7951398996196255, iteration: 279644
loss: 1.0284875631332397,grad_norm: 0.9999996637858025, iteration: 279645
loss: 0.9607968330383301,grad_norm: 0.9999991363867441, iteration: 279646
loss: 1.0182738304138184,grad_norm: 0.855475541099268, iteration: 279647
loss: 1.0082077980041504,grad_norm: 0.7924742726199944, iteration: 279648
loss: 1.0433564186096191,grad_norm: 0.8203891585777854, iteration: 279649
loss: 1.09922194480896,grad_norm: 0.8912880235848661, iteration: 279650
loss: 1.0161890983581543,grad_norm: 0.9850544355378512, iteration: 279651
loss: 1.0134905576705933,grad_norm: 0.9487558186487087, iteration: 279652
loss: 0.9957915544509888,grad_norm: 0.8654091558510776, iteration: 279653
loss: 0.9976429343223572,grad_norm: 0.9506115089760392, iteration: 279654
loss: 0.9915741086006165,grad_norm: 0.9999992838672117, iteration: 279655
loss: 1.0138063430786133,grad_norm: 0.7354165391300954, iteration: 279656
loss: 0.9910945296287537,grad_norm: 0.8069553808657655, iteration: 279657
loss: 0.9469641447067261,grad_norm: 0.9223324664699385, iteration: 279658
loss: 0.98758465051651,grad_norm: 0.7618848245906498, iteration: 279659
loss: 1.0101001262664795,grad_norm: 0.9019114928822648, iteration: 279660
loss: 0.9882470965385437,grad_norm: 0.9999990168158815, iteration: 279661
loss: 1.0228781700134277,grad_norm: 0.8389959259855169, iteration: 279662
loss: 0.9938585162162781,grad_norm: 0.9928545982558482, iteration: 279663
loss: 0.997891366481781,grad_norm: 0.8263989689512227, iteration: 279664
loss: 1.0101170539855957,grad_norm: 0.9999994997482077, iteration: 279665
loss: 0.9994930028915405,grad_norm: 0.7141509678623621, iteration: 279666
loss: 1.017380714416504,grad_norm: 0.8633601018133044, iteration: 279667
loss: 1.0002552270889282,grad_norm: 0.8435570666336204, iteration: 279668
loss: 0.9878122210502625,grad_norm: 0.934921993859643, iteration: 279669
loss: 1.0633591413497925,grad_norm: 0.999998950910713, iteration: 279670
loss: 1.033109188079834,grad_norm: 0.9999999671708272, iteration: 279671
loss: 1.0017869472503662,grad_norm: 0.8095053111875732, iteration: 279672
loss: 0.9919754862785339,grad_norm: 0.9763521390870932, iteration: 279673
loss: 1.0451102256774902,grad_norm: 0.8288010929122952, iteration: 279674
loss: 0.952086329460144,grad_norm: 0.9982108299008555, iteration: 279675
loss: 0.9743961691856384,grad_norm: 0.9396671248695114, iteration: 279676
loss: 0.9688689708709717,grad_norm: 0.74656402996096, iteration: 279677
loss: 1.0062930583953857,grad_norm: 0.8237094115210565, iteration: 279678
loss: 0.9922248125076294,grad_norm: 0.7554146276180536, iteration: 279679
loss: 0.9705643653869629,grad_norm: 0.8668385928357512, iteration: 279680
loss: 0.9716014862060547,grad_norm: 0.8679975627839704, iteration: 279681
loss: 0.9920545816421509,grad_norm: 0.7743632432160533, iteration: 279682
loss: 1.0083357095718384,grad_norm: 0.999999171301785, iteration: 279683
loss: 0.9813170433044434,grad_norm: 0.8003616756901587, iteration: 279684
loss: 0.9969028830528259,grad_norm: 0.8526379315189052, iteration: 279685
loss: 1.0151493549346924,grad_norm: 0.8160389258525105, iteration: 279686
loss: 0.973293662071228,grad_norm: 0.8904330754053573, iteration: 279687
loss: 1.0114831924438477,grad_norm: 0.8855697091603054, iteration: 279688
loss: 1.0083374977111816,grad_norm: 0.9403852391729016, iteration: 279689
loss: 0.9855889678001404,grad_norm: 0.7731010100254583, iteration: 279690
loss: 0.9961730241775513,grad_norm: 0.7354156751215166, iteration: 279691
loss: 0.9790241122245789,grad_norm: 0.9074985984335215, iteration: 279692
loss: 0.9588043093681335,grad_norm: 0.7515914867530884, iteration: 279693
loss: 1.0291943550109863,grad_norm: 0.8252995789023744, iteration: 279694
loss: 1.0153672695159912,grad_norm: 0.8526070023656468, iteration: 279695
loss: 1.0370159149169922,grad_norm: 0.990112622072155, iteration: 279696
loss: 0.9933602809906006,grad_norm: 0.8567315602409842, iteration: 279697
loss: 0.9635036587715149,grad_norm: 0.9024537760526835, iteration: 279698
loss: 0.9752289056777954,grad_norm: 0.9999990207717548, iteration: 279699
loss: 1.0081895589828491,grad_norm: 0.9999990963488412, iteration: 279700
loss: 0.9562606811523438,grad_norm: 0.8640193968447306, iteration: 279701
loss: 0.9829031825065613,grad_norm: 0.9999991010908397, iteration: 279702
loss: 0.9958592057228088,grad_norm: 0.8109107634892082, iteration: 279703
loss: 0.9936752915382385,grad_norm: 0.7031905750994907, iteration: 279704
loss: 1.0200268030166626,grad_norm: 0.6922372490154719, iteration: 279705
loss: 0.9826964735984802,grad_norm: 0.8060419105654818, iteration: 279706
loss: 1.006557822227478,grad_norm: 0.872735496418419, iteration: 279707
loss: 1.0227118730545044,grad_norm: 0.9999991431117703, iteration: 279708
loss: 1.0030689239501953,grad_norm: 0.9999994710537393, iteration: 279709
loss: 1.0113273859024048,grad_norm: 0.901282121148769, iteration: 279710
loss: 1.0463305711746216,grad_norm: 0.8909619317375106, iteration: 279711
loss: 0.9747753739356995,grad_norm: 0.7867577833573687, iteration: 279712
loss: 0.989741861820221,grad_norm: 0.7068248243273559, iteration: 279713
loss: 0.977679431438446,grad_norm: 0.8909568270027465, iteration: 279714
loss: 1.0069102048873901,grad_norm: 0.9999992431578533, iteration: 279715
loss: 0.9933263659477234,grad_norm: 0.8763927824274634, iteration: 279716
loss: 1.0063130855560303,grad_norm: 0.815158686270603, iteration: 279717
loss: 1.015334963798523,grad_norm: 0.7522205289666903, iteration: 279718
loss: 1.0124531984329224,grad_norm: 0.868737098341451, iteration: 279719
loss: 0.9846166968345642,grad_norm: 0.9076174762806013, iteration: 279720
loss: 0.9708204865455627,grad_norm: 0.8152898410659153, iteration: 279721
loss: 1.0638556480407715,grad_norm: 0.9807530869512735, iteration: 279722
loss: 0.9905486106872559,grad_norm: 0.8109358425207063, iteration: 279723
loss: 0.9846027493476868,grad_norm: 0.8744235019725533, iteration: 279724
loss: 1.006872534751892,grad_norm: 0.9087687609885379, iteration: 279725
loss: 1.046760082244873,grad_norm: 0.8234951127833819, iteration: 279726
loss: 1.0045888423919678,grad_norm: 0.7837614452338423, iteration: 279727
loss: 1.009785771369934,grad_norm: 0.9043227825785457, iteration: 279728
loss: 1.0255192518234253,grad_norm: 0.8857322911375339, iteration: 279729
loss: 0.9822016954421997,grad_norm: 0.8450676273807033, iteration: 279730
loss: 1.0061391592025757,grad_norm: 0.875060694782796, iteration: 279731
loss: 1.0068095922470093,grad_norm: 0.8991971757642157, iteration: 279732
loss: 0.9944918155670166,grad_norm: 0.9469749310661856, iteration: 279733
loss: 1.0260998010635376,grad_norm: 0.9999995197019451, iteration: 279734
loss: 1.0144554376602173,grad_norm: 0.874674415602573, iteration: 279735
loss: 1.0121177434921265,grad_norm: 0.9510495870345683, iteration: 279736
loss: 0.9874881505966187,grad_norm: 0.8601652293756956, iteration: 279737
loss: 0.9837661981582642,grad_norm: 0.819432421785615, iteration: 279738
loss: 0.9884024262428284,grad_norm: 0.9676141995682318, iteration: 279739
loss: 0.9690256714820862,grad_norm: 0.770320287722703, iteration: 279740
loss: 1.0379899740219116,grad_norm: 0.9999990448884043, iteration: 279741
loss: 0.9908154606819153,grad_norm: 0.9531588519316847, iteration: 279742
loss: 0.9778165221214294,grad_norm: 0.8677986393958402, iteration: 279743
loss: 1.0109004974365234,grad_norm: 0.8220602224793616, iteration: 279744
loss: 0.9958851337432861,grad_norm: 0.7938522671576317, iteration: 279745
loss: 1.0028387308120728,grad_norm: 0.7877109923856568, iteration: 279746
loss: 1.0170831680297852,grad_norm: 0.7526027327651408, iteration: 279747
loss: 0.9890536069869995,grad_norm: 0.8203451359041466, iteration: 279748
loss: 1.0014567375183105,grad_norm: 0.8838710358554634, iteration: 279749
loss: 0.9907719492912292,grad_norm: 0.8085444808811874, iteration: 279750
loss: 1.010740876197815,grad_norm: 0.9352846635995008, iteration: 279751
loss: 1.009312391281128,grad_norm: 0.9989616332420458, iteration: 279752
loss: 1.0114963054656982,grad_norm: 0.9999990864046334, iteration: 279753
loss: 1.026023507118225,grad_norm: 0.9999990672948541, iteration: 279754
loss: 0.9860494136810303,grad_norm: 0.8039826536261094, iteration: 279755
loss: 1.0012602806091309,grad_norm: 0.9269333146876129, iteration: 279756
loss: 1.0275529623031616,grad_norm: 0.8498291694647243, iteration: 279757
loss: 0.9754725694656372,grad_norm: 0.9088665919769365, iteration: 279758
loss: 1.00078547000885,grad_norm: 0.7828196795656583, iteration: 279759
loss: 1.0160484313964844,grad_norm: 0.938841662233901, iteration: 279760
loss: 1.00771164894104,grad_norm: 0.8586705413877554, iteration: 279761
loss: 1.0098485946655273,grad_norm: 0.9334467382820785, iteration: 279762
loss: 1.0405288934707642,grad_norm: 0.9999989590492885, iteration: 279763
loss: 1.0056110620498657,grad_norm: 0.9704346846507244, iteration: 279764
loss: 0.9908682703971863,grad_norm: 0.9999989642205396, iteration: 279765
loss: 0.9925116300582886,grad_norm: 0.9999990456828585, iteration: 279766
loss: 1.0661317110061646,grad_norm: 0.8664643235383922, iteration: 279767
loss: 0.9874222278594971,grad_norm: 0.921568583100551, iteration: 279768
loss: 0.994044840335846,grad_norm: 0.9999993966953882, iteration: 279769
loss: 0.9890665411949158,grad_norm: 0.8801967819693758, iteration: 279770
loss: 1.0063221454620361,grad_norm: 0.8282009925252768, iteration: 279771
loss: 1.020957350730896,grad_norm: 0.9370172799629096, iteration: 279772
loss: 1.0283557176589966,grad_norm: 0.8691675171164217, iteration: 279773
loss: 1.0221251249313354,grad_norm: 0.9964935745205197, iteration: 279774
loss: 1.0842130184173584,grad_norm: 0.9887025941318229, iteration: 279775
loss: 0.9905238151550293,grad_norm: 0.8583970101732575, iteration: 279776
loss: 0.9954237937927246,grad_norm: 0.8345123325965274, iteration: 279777
loss: 0.981902003288269,grad_norm: 0.8806054282648889, iteration: 279778
loss: 0.9526199698448181,grad_norm: 0.8217838818190343, iteration: 279779
loss: 1.0337491035461426,grad_norm: 0.8968742533730683, iteration: 279780
loss: 0.9790154695510864,grad_norm: 0.8047077473377687, iteration: 279781
loss: 0.9971464276313782,grad_norm: 0.9923991978755936, iteration: 279782
loss: 0.9995818138122559,grad_norm: 0.7681805751800357, iteration: 279783
loss: 0.9749364256858826,grad_norm: 0.9589308586243107, iteration: 279784
loss: 1.0055235624313354,grad_norm: 0.860059348075135, iteration: 279785
loss: 0.970306396484375,grad_norm: 0.9999991840907508, iteration: 279786
loss: 0.9924020767211914,grad_norm: 0.7346946795474393, iteration: 279787
loss: 0.9584290385246277,grad_norm: 0.8933186489792441, iteration: 279788
loss: 1.0207756757736206,grad_norm: 0.864549965247269, iteration: 279789
loss: 0.9629769921302795,grad_norm: 0.8696270593432355, iteration: 279790
loss: 0.9632576704025269,grad_norm: 0.8532663721715189, iteration: 279791
loss: 1.0071941614151,grad_norm: 0.866179366677441, iteration: 279792
loss: 1.0121235847473145,grad_norm: 0.7192135506304682, iteration: 279793
loss: 1.0200363397598267,grad_norm: 0.8344915696519863, iteration: 279794
loss: 0.9995465874671936,grad_norm: 0.8311932464304982, iteration: 279795
loss: 0.9673871397972107,grad_norm: 0.9999991731283937, iteration: 279796
loss: 0.951487123966217,grad_norm: 0.745792748170088, iteration: 279797
loss: 0.9695111513137817,grad_norm: 0.8339452705155059, iteration: 279798
loss: 1.0106699466705322,grad_norm: 0.8391165213559363, iteration: 279799
loss: 0.9683247804641724,grad_norm: 0.9543254258036672, iteration: 279800
loss: 1.013403058052063,grad_norm: 0.9374280504333007, iteration: 279801
loss: 1.0002342462539673,grad_norm: 0.826869343481328, iteration: 279802
loss: 1.0949561595916748,grad_norm: 0.9999997687407971, iteration: 279803
loss: 1.0011026859283447,grad_norm: 0.9699014999239086, iteration: 279804
loss: 0.9928337931632996,grad_norm: 0.7785148121008253, iteration: 279805
loss: 0.9869460463523865,grad_norm: 0.9709532677477823, iteration: 279806
loss: 1.0024446249008179,grad_norm: 0.9163828133116694, iteration: 279807
loss: 1.01664400100708,grad_norm: 0.876619940490686, iteration: 279808
loss: 0.9733636379241943,grad_norm: 0.8460517890055766, iteration: 279809
loss: 0.989952027797699,grad_norm: 0.793358962813873, iteration: 279810
loss: 1.0262175798416138,grad_norm: 0.9869871631459554, iteration: 279811
loss: 1.011547565460205,grad_norm: 0.9999991365893871, iteration: 279812
loss: 0.9797464609146118,grad_norm: 0.8890158276793312, iteration: 279813
loss: 0.990966796875,grad_norm: 0.9321194454388173, iteration: 279814
loss: 1.0033843517303467,grad_norm: 0.9019653507942257, iteration: 279815
loss: 1.0006237030029297,grad_norm: 0.9999990404371828, iteration: 279816
loss: 1.0065584182739258,grad_norm: 0.9022663118317072, iteration: 279817
loss: 1.0346845388412476,grad_norm: 0.7365400089544829, iteration: 279818
loss: 0.9881866574287415,grad_norm: 0.9523662226073656, iteration: 279819
loss: 0.9892938733100891,grad_norm: 0.9999991942639991, iteration: 279820
loss: 0.9697719216346741,grad_norm: 0.9963253019025046, iteration: 279821
loss: 1.0111194849014282,grad_norm: 0.8874271355085086, iteration: 279822
loss: 1.0525590181350708,grad_norm: 0.9999992799381682, iteration: 279823
loss: 1.020566463470459,grad_norm: 0.9999994711939109, iteration: 279824
loss: 0.9791879057884216,grad_norm: 0.8580814852588349, iteration: 279825
loss: 0.9998883605003357,grad_norm: 0.9185779618169058, iteration: 279826
loss: 0.9627206325531006,grad_norm: 0.8945250080237004, iteration: 279827
loss: 0.9972826242446899,grad_norm: 0.9782966550872124, iteration: 279828
loss: 1.002852201461792,grad_norm: 0.9999993305315841, iteration: 279829
loss: 0.9712552428245544,grad_norm: 0.9734009760269147, iteration: 279830
loss: 1.015944480895996,grad_norm: 0.9837425822186255, iteration: 279831
loss: 1.042992115020752,grad_norm: 0.8118427298040329, iteration: 279832
loss: 0.9954534769058228,grad_norm: 0.8282991691342532, iteration: 279833
loss: 1.0306226015090942,grad_norm: 0.9999997587938868, iteration: 279834
loss: 1.0120106935501099,grad_norm: 0.9999992723333438, iteration: 279835
loss: 1.0591083765029907,grad_norm: 0.9999995090451052, iteration: 279836
loss: 1.0066933631896973,grad_norm: 0.999999143077266, iteration: 279837
loss: 1.0338685512542725,grad_norm: 0.9824098936437559, iteration: 279838
loss: 1.0014503002166748,grad_norm: 0.8296944239530902, iteration: 279839
loss: 1.1223440170288086,grad_norm: 0.9999998979520317, iteration: 279840
loss: 1.0342862606048584,grad_norm: 0.9999994727152585, iteration: 279841
loss: 0.98919677734375,grad_norm: 0.9999994571793667, iteration: 279842
loss: 1.050001621246338,grad_norm: 0.9999998663935584, iteration: 279843
loss: 0.9925249218940735,grad_norm: 0.9188672797997598, iteration: 279844
loss: 0.9694770574569702,grad_norm: 0.793208497689594, iteration: 279845
loss: 1.051348090171814,grad_norm: 0.9999992558560561, iteration: 279846
loss: 1.016861081123352,grad_norm: 0.8182797805168388, iteration: 279847
loss: 1.0691261291503906,grad_norm: 0.9999991664624546, iteration: 279848
loss: 0.9561867713928223,grad_norm: 0.9004639792781121, iteration: 279849
loss: 1.009352445602417,grad_norm: 0.9398929429159736, iteration: 279850
loss: 0.9874154329299927,grad_norm: 0.7290283002944815, iteration: 279851
loss: 0.9743074178695679,grad_norm: 0.8764080630842276, iteration: 279852
loss: 1.0170952081680298,grad_norm: 0.8651714975324468, iteration: 279853
loss: 0.9984248280525208,grad_norm: 0.8987088553258175, iteration: 279854
loss: 0.9679198265075684,grad_norm: 0.7595894001313823, iteration: 279855
loss: 0.9763802886009216,grad_norm: 0.8541462851328612, iteration: 279856
loss: 1.012927770614624,grad_norm: 0.7714146082619577, iteration: 279857
loss: 0.9707688689231873,grad_norm: 0.937029859958106, iteration: 279858
loss: 1.035220980644226,grad_norm: 0.9425440740419618, iteration: 279859
loss: 0.9949380159378052,grad_norm: 0.8318368859079274, iteration: 279860
loss: 0.9716276526451111,grad_norm: 0.9804336815876991, iteration: 279861
loss: 1.006544589996338,grad_norm: 0.9999993498743243, iteration: 279862
loss: 1.0091174840927124,grad_norm: 0.9455760759713396, iteration: 279863
loss: 0.9871073961257935,grad_norm: 0.9991901434387331, iteration: 279864
loss: 0.9986623525619507,grad_norm: 0.999999003174539, iteration: 279865
loss: 0.9646362662315369,grad_norm: 0.9249580675313438, iteration: 279866
loss: 1.0045762062072754,grad_norm: 0.8931750803579318, iteration: 279867
loss: 0.9722185134887695,grad_norm: 0.9956476024041528, iteration: 279868
loss: 0.9635307192802429,grad_norm: 0.8750778142135505, iteration: 279869
loss: 0.9907503724098206,grad_norm: 0.8462427092897198, iteration: 279870
loss: 1.0157949924468994,grad_norm: 0.9412811479959055, iteration: 279871
loss: 0.9811168909072876,grad_norm: 0.9104442550475951, iteration: 279872
loss: 0.9999366998672485,grad_norm: 0.999999255033288, iteration: 279873
loss: 1.0134543180465698,grad_norm: 0.6928211204149923, iteration: 279874
loss: 0.9631849527359009,grad_norm: 0.997595690256208, iteration: 279875
loss: 1.018059253692627,grad_norm: 0.8097124743767518, iteration: 279876
loss: 1.0257205963134766,grad_norm: 0.9999995321615109, iteration: 279877
loss: 0.9603108763694763,grad_norm: 0.8594584214911833, iteration: 279878
loss: 0.9860559105873108,grad_norm: 0.8244158756440683, iteration: 279879
loss: 1.0078293085098267,grad_norm: 0.8137137046206774, iteration: 279880
loss: 1.0144610404968262,grad_norm: 0.9999990755208573, iteration: 279881
loss: 0.9843704700469971,grad_norm: 0.999999140613031, iteration: 279882
loss: 1.0259708166122437,grad_norm: 0.8976614376708324, iteration: 279883
loss: 0.9840636849403381,grad_norm: 0.9999992764555713, iteration: 279884
loss: 1.0310566425323486,grad_norm: 0.8035560548291434, iteration: 279885
loss: 1.0484548807144165,grad_norm: 0.8578728773204581, iteration: 279886
loss: 0.9922013878822327,grad_norm: 0.8396575390177812, iteration: 279887
loss: 0.967925488948822,grad_norm: 0.8749569197998428, iteration: 279888
loss: 0.990401566028595,grad_norm: 0.9173014659222656, iteration: 279889
loss: 1.014528512954712,grad_norm: 0.7238001985792589, iteration: 279890
loss: 0.985742449760437,grad_norm: 0.9755159843941676, iteration: 279891
loss: 0.9734653234481812,grad_norm: 0.8301326920598765, iteration: 279892
loss: 0.9766938090324402,grad_norm: 0.9029891238137188, iteration: 279893
loss: 1.061195731163025,grad_norm: 0.999999562664693, iteration: 279894
loss: 0.9629974365234375,grad_norm: 0.8814169052720737, iteration: 279895
loss: 0.9979930520057678,grad_norm: 0.8765783938530577, iteration: 279896
loss: 1.0038621425628662,grad_norm: 0.7126608759169374, iteration: 279897
loss: 1.0402977466583252,grad_norm: 0.9999995225213965, iteration: 279898
loss: 0.9939706921577454,grad_norm: 0.801222991188894, iteration: 279899
loss: 1.0201027393341064,grad_norm: 0.9999991920720961, iteration: 279900
loss: 1.0292779207229614,grad_norm: 0.999998990844524, iteration: 279901
loss: 0.9566343426704407,grad_norm: 0.8261023326300394, iteration: 279902
loss: 0.9602090120315552,grad_norm: 0.8735375994433777, iteration: 279903
loss: 0.996421217918396,grad_norm: 0.855986859752161, iteration: 279904
loss: 0.9948391914367676,grad_norm: 0.9999992028434722, iteration: 279905
loss: 0.9930786490440369,grad_norm: 0.9538396440305213, iteration: 279906
loss: 1.000489354133606,grad_norm: 0.8738184574126984, iteration: 279907
loss: 1.083816409111023,grad_norm: 0.9647686867158625, iteration: 279908
loss: 1.0016299486160278,grad_norm: 0.9999990770807597, iteration: 279909
loss: 0.9789851307868958,grad_norm: 0.8925594211483799, iteration: 279910
loss: 1.0594760179519653,grad_norm: 0.9625891890940179, iteration: 279911
loss: 0.99253910779953,grad_norm: 0.8893456639613109, iteration: 279912
loss: 1.000751256942749,grad_norm: 0.9999997586043741, iteration: 279913
loss: 1.0162495374679565,grad_norm: 0.8129651940970537, iteration: 279914
loss: 0.9951976537704468,grad_norm: 0.9999991798011059, iteration: 279915
loss: 1.072597622871399,grad_norm: 0.7865299415038131, iteration: 279916
loss: 1.0225276947021484,grad_norm: 0.8565787394146821, iteration: 279917
loss: 0.9987943172454834,grad_norm: 0.8376353572701879, iteration: 279918
loss: 1.0046137571334839,grad_norm: 0.8440486723760333, iteration: 279919
loss: 0.9876610636711121,grad_norm: 0.9999991284990231, iteration: 279920
loss: 0.9724056720733643,grad_norm: 0.7296012040147326, iteration: 279921
loss: 1.0740658044815063,grad_norm: 0.9999996303033856, iteration: 279922
loss: 1.0134445428848267,grad_norm: 0.9999989505444398, iteration: 279923
loss: 0.9583069682121277,grad_norm: 0.8250405469784265, iteration: 279924
loss: 0.9748710989952087,grad_norm: 0.999999264129492, iteration: 279925
loss: 1.0112996101379395,grad_norm: 0.961568739789975, iteration: 279926
loss: 1.0084043741226196,grad_norm: 0.6641714788171893, iteration: 279927
loss: 0.9799212217330933,grad_norm: 0.8313153011947145, iteration: 279928
loss: 1.005866527557373,grad_norm: 0.7663775346487763, iteration: 279929
loss: 0.9743590950965881,grad_norm: 0.8730980019390572, iteration: 279930
loss: 1.005739450454712,grad_norm: 0.8484512639441418, iteration: 279931
loss: 1.0042725801467896,grad_norm: 0.7595339347567095, iteration: 279932
loss: 1.0067611932754517,grad_norm: 0.8860831275253962, iteration: 279933
loss: 0.9811655879020691,grad_norm: 0.7589505744615237, iteration: 279934
loss: 1.0000165700912476,grad_norm: 0.9084084230167045, iteration: 279935
loss: 0.9795629978179932,grad_norm: 0.8124227284434364, iteration: 279936
loss: 1.0157476663589478,grad_norm: 0.8715936852436444, iteration: 279937
loss: 0.9757575988769531,grad_norm: 0.8443319771462031, iteration: 279938
loss: 1.0127818584442139,grad_norm: 0.7456044260833348, iteration: 279939
loss: 1.0183926820755005,grad_norm: 0.7820635502430885, iteration: 279940
loss: 1.0082510709762573,grad_norm: 0.9187297043708699, iteration: 279941
loss: 0.971234917640686,grad_norm: 0.7179998525884229, iteration: 279942
loss: 0.9800700545310974,grad_norm: 0.9999991750477146, iteration: 279943
loss: 0.9703658223152161,grad_norm: 0.9221181135919352, iteration: 279944
loss: 1.0146644115447998,grad_norm: 0.8537587132060409, iteration: 279945
loss: 1.0042330026626587,grad_norm: 0.9999996987905446, iteration: 279946
loss: 1.0220521688461304,grad_norm: 0.8269346989999636, iteration: 279947
loss: 1.0104970932006836,grad_norm: 0.9999996475534404, iteration: 279948
loss: 1.0294276475906372,grad_norm: 0.9999993505655312, iteration: 279949
loss: 0.960976779460907,grad_norm: 0.9351932806190193, iteration: 279950
loss: 0.9945874810218811,grad_norm: 0.9999991961292559, iteration: 279951
loss: 1.023551106452942,grad_norm: 0.8795643223276745, iteration: 279952
loss: 0.9519037008285522,grad_norm: 0.9999991419683828, iteration: 279953
loss: 0.9752640724182129,grad_norm: 0.9137875092669144, iteration: 279954
loss: 0.9948915243148804,grad_norm: 0.8727906953785721, iteration: 279955
loss: 0.9730388522148132,grad_norm: 0.9999990012353738, iteration: 279956
loss: 1.0308868885040283,grad_norm: 0.9999992216497327, iteration: 279957
loss: 1.0248836278915405,grad_norm: 0.8174607997808623, iteration: 279958
loss: 1.0402255058288574,grad_norm: 0.8433242176189613, iteration: 279959
loss: 1.0385030508041382,grad_norm: 0.9172720192209455, iteration: 279960
loss: 1.0038658380508423,grad_norm: 0.9015145023943881, iteration: 279961
loss: 1.000043511390686,grad_norm: 0.9999991911832273, iteration: 279962
loss: 1.0260984897613525,grad_norm: 0.7724471514724921, iteration: 279963
loss: 1.0333575010299683,grad_norm: 0.8735645310340341, iteration: 279964
loss: 0.9832310676574707,grad_norm: 0.7412070160152152, iteration: 279965
loss: 0.9860864877700806,grad_norm: 0.8722890975927785, iteration: 279966
loss: 1.0349128246307373,grad_norm: 0.902794854011413, iteration: 279967
loss: 1.013952612876892,grad_norm: 0.903661177729653, iteration: 279968
loss: 0.998530924320221,grad_norm: 0.9120719857043521, iteration: 279969
loss: 1.035119652748108,grad_norm: 0.9999990132993576, iteration: 279970
loss: 1.001623511314392,grad_norm: 0.9637955060367028, iteration: 279971
loss: 0.9836211204528809,grad_norm: 0.947078755401943, iteration: 279972
loss: 1.0256799459457397,grad_norm: 0.9999993224584948, iteration: 279973
loss: 0.9959695339202881,grad_norm: 0.7038155952379935, iteration: 279974
loss: 0.9887506365776062,grad_norm: 0.8218307611622937, iteration: 279975
loss: 0.9869536757469177,grad_norm: 0.9999992179724604, iteration: 279976
loss: 0.9705491065979004,grad_norm: 0.7562224049914285, iteration: 279977
loss: 0.9800125360488892,grad_norm: 0.8536811625191646, iteration: 279978
loss: 0.9723374843597412,grad_norm: 0.8985505052110024, iteration: 279979
loss: 1.0070874691009521,grad_norm: 0.999998937714399, iteration: 279980
loss: 0.9829160571098328,grad_norm: 0.8920905255216279, iteration: 279981
loss: 0.9661340713500977,grad_norm: 0.776328898158066, iteration: 279982
loss: 1.0279865264892578,grad_norm: 0.9737837096167435, iteration: 279983
loss: 0.9394039511680603,grad_norm: 0.7975811080914283, iteration: 279984
loss: 0.9885982275009155,grad_norm: 0.8155452929851598, iteration: 279985
loss: 1.0035871267318726,grad_norm: 0.8616117405768239, iteration: 279986
loss: 0.986262321472168,grad_norm: 0.8675075263240474, iteration: 279987
loss: 0.9945764541625977,grad_norm: 0.9355410003921645, iteration: 279988
loss: 0.9897595643997192,grad_norm: 0.867434784119072, iteration: 279989
loss: 1.0077217817306519,grad_norm: 0.7982644551142504, iteration: 279990
loss: 0.993671178817749,grad_norm: 0.9602547194680935, iteration: 279991
loss: 1.0176942348480225,grad_norm: 0.8722261172969817, iteration: 279992
loss: 0.9840313792228699,grad_norm: 0.9922897705920459, iteration: 279993
loss: 0.9654213190078735,grad_norm: 0.905378303205813, iteration: 279994
loss: 1.0048171281814575,grad_norm: 0.8909239593264024, iteration: 279995
loss: 0.9997913241386414,grad_norm: 0.80015879580312, iteration: 279996
loss: 1.0288646221160889,grad_norm: 0.8777577657969546, iteration: 279997
loss: 1.0107481479644775,grad_norm: 0.6751192132660742, iteration: 279998
loss: 1.107571005821228,grad_norm: 0.9999994232368462, iteration: 279999
loss: 0.9890881776809692,grad_norm: 0.8613711894216758, iteration: 280000
Evaluating at step 280000
{'val': 0.9952527023851871, 'test': 2.2099993404108114}
loss: 1.0212364196777344,grad_norm: 0.9571966269930254, iteration: 280001
loss: 1.04177725315094,grad_norm: 0.9382958239917848, iteration: 280002
loss: 1.0420386791229248,grad_norm: 0.9999991558504054, iteration: 280003
loss: 1.041815996170044,grad_norm: 0.9999991751712013, iteration: 280004
loss: 1.0154937505722046,grad_norm: 0.7168407848927357, iteration: 280005
loss: 0.9940018653869629,grad_norm: 0.9407089223141788, iteration: 280006
loss: 1.0157208442687988,grad_norm: 0.8818282750190236, iteration: 280007
loss: 1.0048811435699463,grad_norm: 0.9999990298367788, iteration: 280008
loss: 0.9811621308326721,grad_norm: 0.8681569525068134, iteration: 280009
loss: 0.9671881794929504,grad_norm: 0.7832084677841474, iteration: 280010
loss: 0.9855166077613831,grad_norm: 0.7528717528846416, iteration: 280011
loss: 1.0265483856201172,grad_norm: 0.8616042810969844, iteration: 280012
loss: 0.9769181609153748,grad_norm: 0.8922648125283118, iteration: 280013
loss: 0.9982885122299194,grad_norm: 0.7822203696259986, iteration: 280014
loss: 0.9688010215759277,grad_norm: 0.8862664001691739, iteration: 280015
loss: 0.9435632824897766,grad_norm: 0.9572609362771457, iteration: 280016
loss: 1.00019371509552,grad_norm: 0.8475435954549474, iteration: 280017
loss: 1.0630089044570923,grad_norm: 0.9999997558483671, iteration: 280018
loss: 0.9831973910331726,grad_norm: 0.9032160070891474, iteration: 280019
loss: 1.0181875228881836,grad_norm: 0.8642248455403421, iteration: 280020
loss: 0.9975748062133789,grad_norm: 0.8961665314702847, iteration: 280021
loss: 1.0036563873291016,grad_norm: 0.8614969276878421, iteration: 280022
loss: 0.9509432911872864,grad_norm: 0.8086760801522703, iteration: 280023
loss: 0.978678822517395,grad_norm: 0.9228050168003629, iteration: 280024
loss: 0.9960011839866638,grad_norm: 0.847456237628074, iteration: 280025
loss: 0.9820968508720398,grad_norm: 0.9999991382641449, iteration: 280026
loss: 0.9956359267234802,grad_norm: 0.8075154800398858, iteration: 280027
loss: 0.9888368844985962,grad_norm: 0.8527245005103623, iteration: 280028
loss: 1.0026044845581055,grad_norm: 0.9116077819639661, iteration: 280029
loss: 0.9563674926757812,grad_norm: 0.8245152507114866, iteration: 280030
loss: 1.035813570022583,grad_norm: 0.8378615877387164, iteration: 280031
loss: 0.9975668787956238,grad_norm: 0.9999991113528169, iteration: 280032
loss: 1.0757697820663452,grad_norm: 0.9989343281081553, iteration: 280033
loss: 0.9843131899833679,grad_norm: 0.8254755475680852, iteration: 280034
loss: 1.0343940258026123,grad_norm: 0.8940689291284281, iteration: 280035
loss: 1.032674789428711,grad_norm: 0.9886372110615829, iteration: 280036
loss: 0.9861137866973877,grad_norm: 0.8474921799984537, iteration: 280037
loss: 1.01266610622406,grad_norm: 0.8382781923739995, iteration: 280038
loss: 0.9831312298774719,grad_norm: 0.9931343330580492, iteration: 280039
loss: 1.011352777481079,grad_norm: 0.8118164638446236, iteration: 280040
loss: 0.9685073494911194,grad_norm: 0.7119673607469238, iteration: 280041
loss: 1.0309603214263916,grad_norm: 0.9999990693259074, iteration: 280042
loss: 0.9837492108345032,grad_norm: 0.722893110886613, iteration: 280043
loss: 1.005901575088501,grad_norm: 0.9999992015164171, iteration: 280044
loss: 0.9972199201583862,grad_norm: 0.9999992164519389, iteration: 280045
loss: 0.9996286034584045,grad_norm: 0.999999196269324, iteration: 280046
loss: 1.0023939609527588,grad_norm: 0.8911934795878311, iteration: 280047
loss: 1.0172410011291504,grad_norm: 0.8688254640723904, iteration: 280048
loss: 1.0261136293411255,grad_norm: 0.9999998919423013, iteration: 280049
loss: 0.9736402034759521,grad_norm: 0.7895104536048627, iteration: 280050
loss: 1.032292127609253,grad_norm: 0.9566408985616848, iteration: 280051
loss: 1.0111892223358154,grad_norm: 0.7483850989152199, iteration: 280052
loss: 0.9924206733703613,grad_norm: 0.7886857876631026, iteration: 280053
loss: 1.057297945022583,grad_norm: 0.8882212227625653, iteration: 280054
loss: 1.0104511976242065,grad_norm: 0.95200536350789, iteration: 280055
loss: 1.004220724105835,grad_norm: 0.9507852082221853, iteration: 280056
loss: 0.9906938076019287,grad_norm: 0.9999990105963003, iteration: 280057
loss: 1.0172909498214722,grad_norm: 0.8434359297153482, iteration: 280058
loss: 0.9868044257164001,grad_norm: 0.8381690038490701, iteration: 280059
loss: 0.9532870650291443,grad_norm: 0.8890560535302987, iteration: 280060
loss: 0.9986201524734497,grad_norm: 0.9999993544606568, iteration: 280061
loss: 1.0009032487869263,grad_norm: 0.9381533998886432, iteration: 280062
loss: 1.0134553909301758,grad_norm: 0.9999997507767169, iteration: 280063
loss: 0.980883002281189,grad_norm: 0.9567037710038331, iteration: 280064
loss: 1.0176247358322144,grad_norm: 0.8208756170642041, iteration: 280065
loss: 1.0014078617095947,grad_norm: 0.7733456342094209, iteration: 280066
loss: 1.0151093006134033,grad_norm: 0.795087546219777, iteration: 280067
loss: 1.009272813796997,grad_norm: 0.9396886844987317, iteration: 280068
loss: 0.9978728294372559,grad_norm: 0.8596071509424618, iteration: 280069
loss: 1.0188930034637451,grad_norm: 0.9999990154064295, iteration: 280070
loss: 1.0209898948669434,grad_norm: 0.919737727713907, iteration: 280071
loss: 1.0155688524246216,grad_norm: 0.8691094005982103, iteration: 280072
loss: 0.970673143863678,grad_norm: 0.8588681924158199, iteration: 280073
loss: 0.992742121219635,grad_norm: 0.7422038794851818, iteration: 280074
loss: 0.9959815144538879,grad_norm: 0.7978398176048903, iteration: 280075
loss: 0.989782989025116,grad_norm: 0.8176407197918761, iteration: 280076
loss: 0.9815475940704346,grad_norm: 0.8085583104792341, iteration: 280077
loss: 0.992868959903717,grad_norm: 0.8853995932273039, iteration: 280078
loss: 0.9705291390419006,grad_norm: 0.7822777939723777, iteration: 280079
loss: 0.9809097051620483,grad_norm: 0.9509518374758188, iteration: 280080
loss: 0.9574308395385742,grad_norm: 0.8759125108971029, iteration: 280081
loss: 0.9823707938194275,grad_norm: 0.9140081422915829, iteration: 280082
loss: 0.9504494667053223,grad_norm: 0.8427966997187456, iteration: 280083
loss: 1.0194153785705566,grad_norm: 0.9999990558492743, iteration: 280084
loss: 0.9953615069389343,grad_norm: 0.8496390985424975, iteration: 280085
loss: 1.056259274482727,grad_norm: 0.9999997950593545, iteration: 280086
loss: 0.9799129366874695,grad_norm: 0.6837107106486963, iteration: 280087
loss: 0.9898457527160645,grad_norm: 0.9027343578226407, iteration: 280088
loss: 1.0076086521148682,grad_norm: 0.8987132608166647, iteration: 280089
loss: 0.9737499952316284,grad_norm: 0.9109869237861561, iteration: 280090
loss: 0.9861801862716675,grad_norm: 0.8909329607161618, iteration: 280091
loss: 1.0030450820922852,grad_norm: 0.9999992042046542, iteration: 280092
loss: 0.9886894822120667,grad_norm: 0.8776127718269643, iteration: 280093
loss: 0.9746091365814209,grad_norm: 0.8107790414645518, iteration: 280094
loss: 1.0311825275421143,grad_norm: 0.8154965790892109, iteration: 280095
loss: 1.0597667694091797,grad_norm: 0.9999994455650241, iteration: 280096
loss: 0.9934670925140381,grad_norm: 0.8525614905433605, iteration: 280097
loss: 1.0256813764572144,grad_norm: 0.9999989586939848, iteration: 280098
loss: 1.0182338953018188,grad_norm: 0.6546698437959897, iteration: 280099
loss: 0.9832360148429871,grad_norm: 0.9999991223656375, iteration: 280100
loss: 0.9730426669120789,grad_norm: 0.9000381343772027, iteration: 280101
loss: 1.0035301446914673,grad_norm: 0.8556527783166579, iteration: 280102
loss: 1.0212278366088867,grad_norm: 0.9202167080279717, iteration: 280103
loss: 0.9563953280448914,grad_norm: 0.815154779472137, iteration: 280104
loss: 0.9956385493278503,grad_norm: 0.7293690714914813, iteration: 280105
loss: 0.973472535610199,grad_norm: 0.8422551445937251, iteration: 280106
loss: 0.9904494881629944,grad_norm: 0.8077961898686001, iteration: 280107
loss: 1.001498818397522,grad_norm: 0.8178815387018571, iteration: 280108
loss: 0.9852774739265442,grad_norm: 0.6895427541400043, iteration: 280109
loss: 1.0045318603515625,grad_norm: 0.6233392709964367, iteration: 280110
loss: 0.9535011649131775,grad_norm: 0.9999992573534697, iteration: 280111
loss: 0.9809786677360535,grad_norm: 0.8437786214587472, iteration: 280112
loss: 0.9840923547744751,grad_norm: 0.9318876316809978, iteration: 280113
loss: 1.0257917642593384,grad_norm: 0.7746651940492387, iteration: 280114
loss: 0.9967970848083496,grad_norm: 0.9709603382532415, iteration: 280115
loss: 0.9848729968070984,grad_norm: 0.7515294289487853, iteration: 280116
loss: 1.028100609779358,grad_norm: 0.8571957134010518, iteration: 280117
loss: 0.992306113243103,grad_norm: 0.6966563451496134, iteration: 280118
loss: 1.0388612747192383,grad_norm: 0.7377528660946183, iteration: 280119
loss: 1.0237481594085693,grad_norm: 0.7813115400323517, iteration: 280120
loss: 0.9674347639083862,grad_norm: 0.905303468983529, iteration: 280121
loss: 0.9863537549972534,grad_norm: 0.8493665054707144, iteration: 280122
loss: 1.0560216903686523,grad_norm: 0.9778817220746256, iteration: 280123
loss: 1.0356372594833374,grad_norm: 0.999999946530878, iteration: 280124
loss: 1.037522792816162,grad_norm: 0.8716425750418959, iteration: 280125
loss: 1.0515745878219604,grad_norm: 0.9999999715898458, iteration: 280126
loss: 0.9730303287506104,grad_norm: 0.7966301809168584, iteration: 280127
loss: 1.009217381477356,grad_norm: 0.8925712588469152, iteration: 280128
loss: 0.9835724234580994,grad_norm: 0.7900971787090029, iteration: 280129
loss: 1.0070563554763794,grad_norm: 0.8314797495994309, iteration: 280130
loss: 1.0456180572509766,grad_norm: 0.8832600243156923, iteration: 280131
loss: 1.044736385345459,grad_norm: 0.8252136068669036, iteration: 280132
loss: 0.9866361021995544,grad_norm: 0.9232124931987653, iteration: 280133
loss: 1.002470850944519,grad_norm: 0.9585473735973745, iteration: 280134
loss: 1.0038646459579468,grad_norm: 0.903646962424934, iteration: 280135
loss: 1.0280845165252686,grad_norm: 0.9699398358153679, iteration: 280136
loss: 0.9608176350593567,grad_norm: 0.8499083474225915, iteration: 280137
loss: 1.0043480396270752,grad_norm: 0.8368832149740382, iteration: 280138
loss: 1.0058000087738037,grad_norm: 0.999999158444174, iteration: 280139
loss: 0.9911867380142212,grad_norm: 0.9999997180799082, iteration: 280140
loss: 0.999059796333313,grad_norm: 0.691893577767099, iteration: 280141
loss: 0.9906712770462036,grad_norm: 0.8045237381985464, iteration: 280142
loss: 1.0117267370224,grad_norm: 0.9999991218053687, iteration: 280143
loss: 1.0049850940704346,grad_norm: 0.7675984714576254, iteration: 280144
loss: 1.0247089862823486,grad_norm: 0.7696390217290867, iteration: 280145
loss: 0.992129921913147,grad_norm: 0.9169110022449434, iteration: 280146
loss: 0.9869211316108704,grad_norm: 0.8772494845833165, iteration: 280147
loss: 0.9949501156806946,grad_norm: 0.8261870728111969, iteration: 280148
loss: 1.0132042169570923,grad_norm: 0.908847295218808, iteration: 280149
loss: 1.0165926218032837,grad_norm: 0.9999990826884656, iteration: 280150
loss: 1.0192015171051025,grad_norm: 0.9505766733769175, iteration: 280151
loss: 0.9861971735954285,grad_norm: 0.7518935497336485, iteration: 280152
loss: 0.9892795085906982,grad_norm: 0.7936581630143485, iteration: 280153
loss: 1.0419782400131226,grad_norm: 0.9999991851834809, iteration: 280154
loss: 1.0059696435928345,grad_norm: 0.897647482399867, iteration: 280155
loss: 1.007465124130249,grad_norm: 0.9300734817751979, iteration: 280156
loss: 0.9812080264091492,grad_norm: 0.8926472706891283, iteration: 280157
loss: 1.0163602828979492,grad_norm: 0.8397765622579224, iteration: 280158
loss: 1.021659016609192,grad_norm: 0.7923944632876395, iteration: 280159
loss: 1.037154197692871,grad_norm: 0.8660811790699521, iteration: 280160
loss: 0.9781689047813416,grad_norm: 0.8364509508008885, iteration: 280161
loss: 0.9828922748565674,grad_norm: 0.7543757802963633, iteration: 280162
loss: 0.9912907481193542,grad_norm: 0.8228618452392422, iteration: 280163
loss: 1.0506120920181274,grad_norm: 0.9999991029281486, iteration: 280164
loss: 1.0231856107711792,grad_norm: 0.996886368841897, iteration: 280165
loss: 0.9992704391479492,grad_norm: 0.8695540673362229, iteration: 280166
loss: 0.9787241220474243,grad_norm: 0.8389199838764686, iteration: 280167
loss: 0.9849315285682678,grad_norm: 0.8081487869415032, iteration: 280168
loss: 0.9956644177436829,grad_norm: 0.8823691290345793, iteration: 280169
loss: 1.0222375392913818,grad_norm: 0.789158927052434, iteration: 280170
loss: 1.0197550058364868,grad_norm: 0.6580051207298662, iteration: 280171
loss: 1.02692449092865,grad_norm: 0.8910542822434363, iteration: 280172
loss: 1.010717749595642,grad_norm: 0.9999990094509398, iteration: 280173
loss: 0.9692214131355286,grad_norm: 0.8886583450630775, iteration: 280174
loss: 0.9933268427848816,grad_norm: 0.8464261794940274, iteration: 280175
loss: 1.0153388977050781,grad_norm: 0.7256733608489039, iteration: 280176
loss: 1.0690197944641113,grad_norm: 0.9320473706998371, iteration: 280177
loss: 1.001023530960083,grad_norm: 0.9179242468825819, iteration: 280178
loss: 0.9764525294303894,grad_norm: 0.7305441309461939, iteration: 280179
loss: 1.0243264436721802,grad_norm: 0.9188713830968489, iteration: 280180
loss: 0.997497022151947,grad_norm: 0.9251582883368521, iteration: 280181
loss: 0.9676246047019958,grad_norm: 0.8328576635225734, iteration: 280182
loss: 0.9876505136489868,grad_norm: 0.9999991968565394, iteration: 280183
loss: 0.9950515627861023,grad_norm: 0.762495875797943, iteration: 280184
loss: 1.0256699323654175,grad_norm: 0.9999990663883795, iteration: 280185
loss: 0.9900670647621155,grad_norm: 0.9023492956949279, iteration: 280186
loss: 1.0119619369506836,grad_norm: 0.9304155236539557, iteration: 280187
loss: 1.030579686164856,grad_norm: 0.702921368073006, iteration: 280188
loss: 1.0407346487045288,grad_norm: 0.8947351388790571, iteration: 280189
loss: 0.9941692352294922,grad_norm: 0.7776381026908803, iteration: 280190
loss: 0.9563494324684143,grad_norm: 0.745562959068196, iteration: 280191
loss: 1.0734686851501465,grad_norm: 0.9999992189328697, iteration: 280192
loss: 0.9673609137535095,grad_norm: 0.9314145144097549, iteration: 280193
loss: 1.0012911558151245,grad_norm: 0.9170118485625193, iteration: 280194
loss: 0.9968492388725281,grad_norm: 0.7318097134685994, iteration: 280195
loss: 0.9867547154426575,grad_norm: 0.9460470489869025, iteration: 280196
loss: 1.0259047746658325,grad_norm: 0.8548569084806095, iteration: 280197
loss: 1.0109678506851196,grad_norm: 0.8250370875579494, iteration: 280198
loss: 1.0286812782287598,grad_norm: 0.999999001541036, iteration: 280199
loss: 1.0019934177398682,grad_norm: 0.8951619098497389, iteration: 280200
loss: 0.9488586187362671,grad_norm: 0.9882566531915871, iteration: 280201
loss: 1.0098525285720825,grad_norm: 0.9999994342963058, iteration: 280202
loss: 0.9881232976913452,grad_norm: 0.9999989714042186, iteration: 280203
loss: 0.9933197498321533,grad_norm: 0.8859137447072108, iteration: 280204
loss: 1.0219943523406982,grad_norm: 0.9282173391801901, iteration: 280205
loss: 1.0399953126907349,grad_norm: 0.8426986450045161, iteration: 280206
loss: 0.9806703925132751,grad_norm: 0.7589338721136869, iteration: 280207
loss: 0.9779865741729736,grad_norm: 0.7366571967782953, iteration: 280208
loss: 1.0028187036514282,grad_norm: 0.9427432959862296, iteration: 280209
loss: 0.9825260639190674,grad_norm: 0.8197946100466935, iteration: 280210
loss: 1.0139204263687134,grad_norm: 0.9056481196457615, iteration: 280211
loss: 0.9927712082862854,grad_norm: 0.949236675372747, iteration: 280212
loss: 0.9747536182403564,grad_norm: 0.8835364908589218, iteration: 280213
loss: 0.976449191570282,grad_norm: 0.7564050404208752, iteration: 280214
loss: 1.0487865209579468,grad_norm: 0.9999991135288182, iteration: 280215
loss: 0.9673284888267517,grad_norm: 0.974692465935408, iteration: 280216
loss: 0.9659839272499084,grad_norm: 0.9341883136760989, iteration: 280217
loss: 1.0189201831817627,grad_norm: 0.9999991058340416, iteration: 280218
loss: 1.1242258548736572,grad_norm: 0.9999990479141297, iteration: 280219
loss: 1.0274131298065186,grad_norm: 0.9778582314957721, iteration: 280220
loss: 0.9746081829071045,grad_norm: 0.9810005573092031, iteration: 280221
loss: 0.9938579797744751,grad_norm: 0.7443034340920656, iteration: 280222
loss: 1.0123963356018066,grad_norm: 0.9271803329173732, iteration: 280223
loss: 1.0407953262329102,grad_norm: 0.9999991908063396, iteration: 280224
loss: 1.0153692960739136,grad_norm: 0.8528716388790681, iteration: 280225
loss: 1.0035823583602905,grad_norm: 0.8746627791256123, iteration: 280226
loss: 0.9855073690414429,grad_norm: 0.999999179808062, iteration: 280227
loss: 0.9753816723823547,grad_norm: 0.8224413325734948, iteration: 280228
loss: 0.988286554813385,grad_norm: 0.7287462748934224, iteration: 280229
loss: 0.9604343771934509,grad_norm: 0.9360204668346443, iteration: 280230
loss: 1.0247479677200317,grad_norm: 0.7377814703639529, iteration: 280231
loss: 1.0052889585494995,grad_norm: 0.9999991170976041, iteration: 280232
loss: 1.0022608041763306,grad_norm: 0.8322764493126444, iteration: 280233
loss: 1.006778359413147,grad_norm: 0.8732757709801009, iteration: 280234
loss: 0.9524432420730591,grad_norm: 0.8220524186109041, iteration: 280235
loss: 0.9896635413169861,grad_norm: 0.8281961803333172, iteration: 280236
loss: 0.996280312538147,grad_norm: 0.6864505323229737, iteration: 280237
loss: 1.00375235080719,grad_norm: 0.8664414852070792, iteration: 280238
loss: 1.0111823081970215,grad_norm: 0.8290323409608402, iteration: 280239
loss: 1.001806378364563,grad_norm: 0.8347192467313799, iteration: 280240
loss: 1.0080066919326782,grad_norm: 0.7944599986346544, iteration: 280241
loss: 1.0278246402740479,grad_norm: 0.8629046676860566, iteration: 280242
loss: 1.1121755838394165,grad_norm: 0.9999990126986866, iteration: 280243
loss: 1.0209754705429077,grad_norm: 0.9518989765381765, iteration: 280244
loss: 0.9858989715576172,grad_norm: 0.9178314873118485, iteration: 280245
loss: 0.9929458498954773,grad_norm: 0.9517466601742088, iteration: 280246
loss: 1.0264008045196533,grad_norm: 0.9999992470484896, iteration: 280247
loss: 1.056167483329773,grad_norm: 0.9999998087331646, iteration: 280248
loss: 0.9919382333755493,grad_norm: 0.9008983312425399, iteration: 280249
loss: 0.9698202610015869,grad_norm: 0.8472984568677644, iteration: 280250
loss: 1.133603572845459,grad_norm: 0.9999991825305272, iteration: 280251
loss: 0.9740577936172485,grad_norm: 0.835992408214364, iteration: 280252
loss: 0.9701361060142517,grad_norm: 0.7229820808174748, iteration: 280253
loss: 1.0150822401046753,grad_norm: 0.8851215120276367, iteration: 280254
loss: 0.9789420366287231,grad_norm: 0.9890983833134744, iteration: 280255
loss: 1.0026710033416748,grad_norm: 0.8971478455971402, iteration: 280256
loss: 1.0026839971542358,grad_norm: 0.8366053348875758, iteration: 280257
loss: 0.9679974913597107,grad_norm: 0.914726347275656, iteration: 280258
loss: 1.0016030073165894,grad_norm: 0.7380417224808699, iteration: 280259
loss: 0.9689246416091919,grad_norm: 0.8298484669380958, iteration: 280260
loss: 0.9895884394645691,grad_norm: 0.8820799664575975, iteration: 280261
loss: 0.9676541090011597,grad_norm: 0.9999990643265662, iteration: 280262
loss: 1.0269854068756104,grad_norm: 0.8517110710671896, iteration: 280263
loss: 1.0105606317520142,grad_norm: 0.8940410780985769, iteration: 280264
loss: 1.0116249322891235,grad_norm: 0.9252891380098938, iteration: 280265
loss: 0.9843544363975525,grad_norm: 0.9999992086374829, iteration: 280266
loss: 1.1030189990997314,grad_norm: 0.9999995718086664, iteration: 280267
loss: 1.0304124355316162,grad_norm: 0.9582066223995912, iteration: 280268
loss: 1.0205026865005493,grad_norm: 0.8230429563216126, iteration: 280269
loss: 1.010369062423706,grad_norm: 0.9880507222402803, iteration: 280270
loss: 1.027458667755127,grad_norm: 0.9498875295153023, iteration: 280271
loss: 1.0849019289016724,grad_norm: 0.9999991959193498, iteration: 280272
loss: 0.9919468760490417,grad_norm: 0.9762138793022789, iteration: 280273
loss: 1.023134708404541,grad_norm: 0.8292389877932528, iteration: 280274
loss: 0.9983460307121277,grad_norm: 0.8096418477461097, iteration: 280275
loss: 0.9856703281402588,grad_norm: 0.8938932839073247, iteration: 280276
loss: 0.9984015822410583,grad_norm: 0.7975518802679731, iteration: 280277
loss: 1.0907726287841797,grad_norm: 0.9999993116701276, iteration: 280278
loss: 0.9997510313987732,grad_norm: 0.8607515442661239, iteration: 280279
loss: 1.029253363609314,grad_norm: 0.9999992144033747, iteration: 280280
loss: 0.9812027215957642,grad_norm: 0.9867737660784672, iteration: 280281
loss: 0.9765520095825195,grad_norm: 0.999999199251583, iteration: 280282
loss: 1.0092496871948242,grad_norm: 0.7354840799814577, iteration: 280283
loss: 0.99740070104599,grad_norm: 0.8335697629658257, iteration: 280284
loss: 1.0480743646621704,grad_norm: 0.9999995107182937, iteration: 280285
loss: 1.0090218782424927,grad_norm: 0.8017562420431023, iteration: 280286
loss: 0.9892140626907349,grad_norm: 0.8549206391366512, iteration: 280287
loss: 0.9768917560577393,grad_norm: 0.8579380484732959, iteration: 280288
loss: 0.9775390625,grad_norm: 0.993352784709998, iteration: 280289
loss: 1.0011494159698486,grad_norm: 0.9413246866557536, iteration: 280290
loss: 0.9761128425598145,grad_norm: 0.8012627949261515, iteration: 280291
loss: 1.010282278060913,grad_norm: 0.9010042270563525, iteration: 280292
loss: 0.9934273362159729,grad_norm: 0.721169408882965, iteration: 280293
loss: 1.0257964134216309,grad_norm: 0.9354901369459647, iteration: 280294
loss: 0.9982877373695374,grad_norm: 0.8176022732004334, iteration: 280295
loss: 0.978841245174408,grad_norm: 0.9823759419821025, iteration: 280296
loss: 1.0068042278289795,grad_norm: 0.8385072067609175, iteration: 280297
loss: 1.029725193977356,grad_norm: 0.8787038309901347, iteration: 280298
loss: 0.9648167490959167,grad_norm: 0.819061877263808, iteration: 280299
loss: 0.995190441608429,grad_norm: 0.9999993940054379, iteration: 280300
loss: 1.0495622158050537,grad_norm: 0.8555850422690843, iteration: 280301
loss: 0.9912731647491455,grad_norm: 0.937608125915449, iteration: 280302
loss: 1.0795152187347412,grad_norm: 0.9999990231134736, iteration: 280303
loss: 1.0344797372817993,grad_norm: 0.8261030682510803, iteration: 280304
loss: 1.0075932741165161,grad_norm: 0.812768646737809, iteration: 280305
loss: 0.9880547523498535,grad_norm: 0.6869325088994259, iteration: 280306
loss: 1.0070252418518066,grad_norm: 0.8766015186869801, iteration: 280307
loss: 0.9981640577316284,grad_norm: 0.7801518864707743, iteration: 280308
loss: 0.9887381196022034,grad_norm: 0.8247340373977536, iteration: 280309
loss: 0.98136967420578,grad_norm: 0.999999220658941, iteration: 280310
loss: 1.0007848739624023,grad_norm: 0.8962094627969287, iteration: 280311
loss: 1.0103992223739624,grad_norm: 0.8101413273739717, iteration: 280312
loss: 1.0130161046981812,grad_norm: 0.7625044242736522, iteration: 280313
loss: 0.9872148633003235,grad_norm: 0.9410649733304091, iteration: 280314
loss: 1.0322871208190918,grad_norm: 0.8524807552230353, iteration: 280315
loss: 0.9597098231315613,grad_norm: 0.7485233302874666, iteration: 280316
loss: 1.0214084386825562,grad_norm: 0.8641753546836055, iteration: 280317
loss: 0.9720966815948486,grad_norm: 0.7912679008527066, iteration: 280318
loss: 1.0313993692398071,grad_norm: 0.843356360336629, iteration: 280319
loss: 1.011565089225769,grad_norm: 0.813747735686891, iteration: 280320
loss: 0.9874281883239746,grad_norm: 0.7615863806610744, iteration: 280321
loss: 0.9946833252906799,grad_norm: 0.8482527626712314, iteration: 280322
loss: 1.0021363496780396,grad_norm: 0.787379720934365, iteration: 280323
loss: 1.0304828882217407,grad_norm: 0.8879055602132269, iteration: 280324
loss: 0.9931054711341858,grad_norm: 0.9416386587776386, iteration: 280325
loss: 1.003051519393921,grad_norm: 0.999999210807083, iteration: 280326
loss: 1.0228954553604126,grad_norm: 0.8779703778639434, iteration: 280327
loss: 1.0007388591766357,grad_norm: 0.9999990673019018, iteration: 280328
loss: 0.9842081069946289,grad_norm: 0.8321659258860854, iteration: 280329
loss: 0.9958969354629517,grad_norm: 0.7485066274419768, iteration: 280330
loss: 0.9762458205223083,grad_norm: 0.781839111031433, iteration: 280331
loss: 1.0094047784805298,grad_norm: 0.9043254993419988, iteration: 280332
loss: 1.0083746910095215,grad_norm: 0.9116142086411265, iteration: 280333
loss: 1.0177282094955444,grad_norm: 0.8758058525530747, iteration: 280334
loss: 1.0047916173934937,grad_norm: 0.7789176467006542, iteration: 280335
loss: 0.9586003422737122,grad_norm: 0.825986516795134, iteration: 280336
loss: 1.0291526317596436,grad_norm: 0.8264527081798625, iteration: 280337
loss: 1.0557044744491577,grad_norm: 0.9569384582317499, iteration: 280338
loss: 0.9826773405075073,grad_norm: 0.9686070074004821, iteration: 280339
loss: 1.0212308168411255,grad_norm: 0.8935286423560965, iteration: 280340
loss: 0.9947497844696045,grad_norm: 0.9378943480012726, iteration: 280341
loss: 1.0417664051055908,grad_norm: 0.9920556339942244, iteration: 280342
loss: 1.0291210412979126,grad_norm: 0.9192571001312604, iteration: 280343
loss: 0.9803212881088257,grad_norm: 0.99588787268856, iteration: 280344
loss: 1.0264256000518799,grad_norm: 0.9999990436967185, iteration: 280345
loss: 1.0388991832733154,grad_norm: 0.8178366775219064, iteration: 280346
loss: 1.0291937589645386,grad_norm: 0.9999993338674361, iteration: 280347
loss: 1.020329236984253,grad_norm: 0.8309405997879111, iteration: 280348
loss: 1.0025310516357422,grad_norm: 0.841119510362264, iteration: 280349
loss: 0.9857155680656433,grad_norm: 0.9967688033188646, iteration: 280350
loss: 1.0062155723571777,grad_norm: 0.8517648806752007, iteration: 280351
loss: 0.997872531414032,grad_norm: 0.8622694842759191, iteration: 280352
loss: 1.025742769241333,grad_norm: 0.9999993407104665, iteration: 280353
loss: 1.0167180299758911,grad_norm: 0.8584426548081886, iteration: 280354
loss: 0.9523558616638184,grad_norm: 0.8383891484055884, iteration: 280355
loss: 0.9843727946281433,grad_norm: 0.8341687847727187, iteration: 280356
loss: 0.9926393032073975,grad_norm: 0.840485249924335, iteration: 280357
loss: 1.0157924890518188,grad_norm: 0.7949502231700596, iteration: 280358
loss: 1.017403483390808,grad_norm: 0.872407112744006, iteration: 280359
loss: 0.9660338759422302,grad_norm: 0.8525956233145934, iteration: 280360
loss: 1.03434157371521,grad_norm: 0.9999991321521657, iteration: 280361
loss: 1.0481303930282593,grad_norm: 0.8839690988645897, iteration: 280362
loss: 1.0116046667099,grad_norm: 0.8462349545504174, iteration: 280363
loss: 0.9986217617988586,grad_norm: 0.7793805343919713, iteration: 280364
loss: 1.0228030681610107,grad_norm: 0.999999808501569, iteration: 280365
loss: 0.9772598147392273,grad_norm: 0.8926561861516438, iteration: 280366
loss: 0.9768767952919006,grad_norm: 0.8011514501427524, iteration: 280367
loss: 0.9983160495758057,grad_norm: 0.9999990005059263, iteration: 280368
loss: 0.9991549253463745,grad_norm: 0.8458430897034939, iteration: 280369
loss: 0.9905896782875061,grad_norm: 0.9144283688993012, iteration: 280370
loss: 1.0069769620895386,grad_norm: 0.6263227783801291, iteration: 280371
loss: 1.0820841789245605,grad_norm: 0.7884966821551327, iteration: 280372
loss: 1.0446486473083496,grad_norm: 0.999999627985852, iteration: 280373
loss: 1.0101721286773682,grad_norm: 0.9999993430059271, iteration: 280374
loss: 0.9893921613693237,grad_norm: 0.8021307454866433, iteration: 280375
loss: 1.006272792816162,grad_norm: 0.9999990384089923, iteration: 280376
loss: 1.0025603771209717,grad_norm: 0.9641203863252517, iteration: 280377
loss: 1.0009472370147705,grad_norm: 0.7490251576926112, iteration: 280378
loss: 0.9949618577957153,grad_norm: 0.9374786480245376, iteration: 280379
loss: 1.0260744094848633,grad_norm: 0.7663537544202691, iteration: 280380
loss: 1.0241965055465698,grad_norm: 0.9999992087546693, iteration: 280381
loss: 0.9885179996490479,grad_norm: 0.7663946874517653, iteration: 280382
loss: 1.0121833086013794,grad_norm: 0.8695557774947532, iteration: 280383
loss: 1.011530876159668,grad_norm: 0.9999996856859548, iteration: 280384
loss: 1.0122239589691162,grad_norm: 0.7569801320690879, iteration: 280385
loss: 1.0001639127731323,grad_norm: 0.9177215988695021, iteration: 280386
loss: 1.0215524435043335,grad_norm: 0.8640963960390472, iteration: 280387
loss: 0.9931157827377319,grad_norm: 0.8119363248225713, iteration: 280388
loss: 0.9889745712280273,grad_norm: 0.9020825886359649, iteration: 280389
loss: 1.004349708557129,grad_norm: 0.6501717489474557, iteration: 280390
loss: 1.0113403797149658,grad_norm: 0.8871300545991597, iteration: 280391
loss: 1.0197495222091675,grad_norm: 0.8480069559097442, iteration: 280392
loss: 0.994895875453949,grad_norm: 0.7447326691115018, iteration: 280393
loss: 0.9668591022491455,grad_norm: 0.7833577021510523, iteration: 280394
loss: 0.9991758465766907,grad_norm: 0.9089198670940629, iteration: 280395
loss: 0.9914869666099548,grad_norm: 0.8391683166531803, iteration: 280396
loss: 0.9601730108261108,grad_norm: 0.9344518793576646, iteration: 280397
loss: 1.0229707956314087,grad_norm: 0.7330844758352266, iteration: 280398
loss: 1.064066767692566,grad_norm: 0.968810009732101, iteration: 280399
loss: 1.0038042068481445,grad_norm: 0.9999990420128982, iteration: 280400
loss: 0.9713020324707031,grad_norm: 0.9999991013464904, iteration: 280401
loss: 0.9854596853256226,grad_norm: 0.8711033659917143, iteration: 280402
loss: 1.060708999633789,grad_norm: 0.7814628760281639, iteration: 280403
loss: 1.0163072347640991,grad_norm: 0.7530139499942868, iteration: 280404
loss: 1.0496717691421509,grad_norm: 0.9463265986254342, iteration: 280405
loss: 0.9937739968299866,grad_norm: 0.8490459090978345, iteration: 280406
loss: 0.9691717028617859,grad_norm: 0.8290271685400892, iteration: 280407
loss: 0.9827775955200195,grad_norm: 0.8277815823902314, iteration: 280408
loss: 0.9662136435508728,grad_norm: 0.8472894261431982, iteration: 280409
loss: 0.9984429478645325,grad_norm: 0.8255680617071959, iteration: 280410
loss: 0.9953219294548035,grad_norm: 0.8801437993083413, iteration: 280411
loss: 1.0291591882705688,grad_norm: 0.9999997355877777, iteration: 280412
loss: 1.025203824043274,grad_norm: 0.9999990813179505, iteration: 280413
loss: 1.0125176906585693,grad_norm: 0.8146032861487872, iteration: 280414
loss: 0.9790589809417725,grad_norm: 0.6831558783167408, iteration: 280415
loss: 0.9563928246498108,grad_norm: 0.7517364303870293, iteration: 280416
loss: 0.992688775062561,grad_norm: 0.8830939775053449, iteration: 280417
loss: 1.000997543334961,grad_norm: 0.7757979229121255, iteration: 280418
loss: 0.9861783385276794,grad_norm: 0.9024302407114964, iteration: 280419
loss: 0.994708240032196,grad_norm: 0.8347382289909191, iteration: 280420
loss: 1.0006322860717773,grad_norm: 0.7742096871194071, iteration: 280421
loss: 1.0088289976119995,grad_norm: 0.8296621053309292, iteration: 280422
loss: 0.9809907674789429,grad_norm: 0.8875071174744736, iteration: 280423
loss: 0.9812983274459839,grad_norm: 0.9159587635741427, iteration: 280424
loss: 0.9863387942314148,grad_norm: 0.8353461625070403, iteration: 280425
loss: 0.990309476852417,grad_norm: 0.7378011330538757, iteration: 280426
loss: 0.9667225480079651,grad_norm: 0.7843267565096047, iteration: 280427
loss: 1.0031992197036743,grad_norm: 0.9999990300636682, iteration: 280428
loss: 0.985740065574646,grad_norm: 0.8328210827210566, iteration: 280429
loss: 0.9996390342712402,grad_norm: 0.8256669913368144, iteration: 280430
loss: 0.9787730574607849,grad_norm: 0.7768066214261614, iteration: 280431
loss: 1.0169744491577148,grad_norm: 0.8033755362601007, iteration: 280432
loss: 0.9810361862182617,grad_norm: 0.8096617857563209, iteration: 280433
loss: 1.0239924192428589,grad_norm: 0.8784818910133594, iteration: 280434
loss: 0.9732285737991333,grad_norm: 0.8831221976562906, iteration: 280435
loss: 0.9908180236816406,grad_norm: 0.9071743733410397, iteration: 280436
loss: 0.9764373898506165,grad_norm: 0.7741724396636785, iteration: 280437
loss: 1.0155918598175049,grad_norm: 0.9999998786235792, iteration: 280438
loss: 0.9900035262107849,grad_norm: 0.7526374805512488, iteration: 280439
loss: 1.0086387395858765,grad_norm: 0.9724275862998034, iteration: 280440
loss: 1.003568410873413,grad_norm: 0.7437966828550147, iteration: 280441
loss: 1.0115852355957031,grad_norm: 0.9499325614523522, iteration: 280442
loss: 1.051784634590149,grad_norm: 0.99718760792369, iteration: 280443
loss: 0.9658447504043579,grad_norm: 0.8928838559581441, iteration: 280444
loss: 0.9919455647468567,grad_norm: 0.7511533543692248, iteration: 280445
loss: 0.9842506051063538,grad_norm: 0.8471139832941068, iteration: 280446
loss: 0.969273567199707,grad_norm: 0.7427995599671012, iteration: 280447
loss: 1.0306968688964844,grad_norm: 0.9999995400756423, iteration: 280448
loss: 0.9828700423240662,grad_norm: 0.914968518100784, iteration: 280449
loss: 1.0237520933151245,grad_norm: 0.855485531546697, iteration: 280450
loss: 1.0145695209503174,grad_norm: 0.7709393898985578, iteration: 280451
loss: 1.0048216581344604,grad_norm: 0.9939651442863562, iteration: 280452
loss: 1.037671685218811,grad_norm: 0.9999995959307239, iteration: 280453
loss: 0.9943687319755554,grad_norm: 0.8589301917086394, iteration: 280454
loss: 1.0012999773025513,grad_norm: 0.8654895894230976, iteration: 280455
loss: 0.9793501496315002,grad_norm: 0.8995670261450547, iteration: 280456
loss: 1.017365574836731,grad_norm: 0.7558946714506072, iteration: 280457
loss: 0.9746726155281067,grad_norm: 0.8355356832214544, iteration: 280458
loss: 1.016359567642212,grad_norm: 0.8117896457080023, iteration: 280459
loss: 1.0538020133972168,grad_norm: 0.8713502882134706, iteration: 280460
loss: 1.0627524852752686,grad_norm: 0.9168595791463435, iteration: 280461
loss: 1.012381911277771,grad_norm: 0.8794882549737125, iteration: 280462
loss: 0.9875472784042358,grad_norm: 0.7773274086979987, iteration: 280463
loss: 0.9587603211402893,grad_norm: 0.8751927193383562, iteration: 280464
loss: 1.010290265083313,grad_norm: 0.8338682329649826, iteration: 280465
loss: 0.9788097143173218,grad_norm: 0.6745341731829708, iteration: 280466
loss: 1.0566473007202148,grad_norm: 0.999999117089308, iteration: 280467
loss: 0.9996092915534973,grad_norm: 0.8768898443576986, iteration: 280468
loss: 1.0044794082641602,grad_norm: 0.8377449073006569, iteration: 280469
loss: 0.9943603277206421,grad_norm: 0.9047427138648543, iteration: 280470
loss: 1.0477129220962524,grad_norm: 0.9836598170846674, iteration: 280471
loss: 1.0120017528533936,grad_norm: 0.856697224659156, iteration: 280472
loss: 1.020268201828003,grad_norm: 0.9488826268205963, iteration: 280473
loss: 0.9528452157974243,grad_norm: 0.9999990121761059, iteration: 280474
loss: 0.954953134059906,grad_norm: 0.8187941305229564, iteration: 280475
loss: 0.9272206425666809,grad_norm: 0.7864439297637198, iteration: 280476
loss: 1.0158153772354126,grad_norm: 0.8590868787224688, iteration: 280477
loss: 1.027310848236084,grad_norm: 0.9348369621075887, iteration: 280478
loss: 0.9669528603553772,grad_norm: 0.9999991145800494, iteration: 280479
loss: 0.9903972148895264,grad_norm: 0.980475407415664, iteration: 280480
loss: 0.9836182594299316,grad_norm: 0.8975857519310835, iteration: 280481
loss: 1.0237059593200684,grad_norm: 0.9187374156244256, iteration: 280482
loss: 0.9880326986312866,grad_norm: 0.7654161540290995, iteration: 280483
loss: 0.9785067439079285,grad_norm: 0.8752951435714295, iteration: 280484
loss: 0.9786450862884521,grad_norm: 0.8577566255875629, iteration: 280485
loss: 0.9822800755500793,grad_norm: 0.9857609166798781, iteration: 280486
loss: 0.9827797412872314,grad_norm: 0.9428641128065012, iteration: 280487
loss: 1.0166163444519043,grad_norm: 0.72607106085276, iteration: 280488
loss: 0.9780757427215576,grad_norm: 0.8345907331156818, iteration: 280489
loss: 1.0019569396972656,grad_norm: 0.7575418763932175, iteration: 280490
loss: 1.082350492477417,grad_norm: 0.891974717270844, iteration: 280491
loss: 0.9355689883232117,grad_norm: 0.9999989795630373, iteration: 280492
loss: 1.0099284648895264,grad_norm: 0.999999140127322, iteration: 280493
loss: 0.9830234050750732,grad_norm: 0.9344419094765812, iteration: 280494
loss: 0.9963032007217407,grad_norm: 0.8216218644221787, iteration: 280495
loss: 1.0293607711791992,grad_norm: 0.8848681383513378, iteration: 280496
loss: 1.0455271005630493,grad_norm: 0.9999996295577733, iteration: 280497
loss: 0.9994398355484009,grad_norm: 0.7933758118628396, iteration: 280498
loss: 1.0000096559524536,grad_norm: 0.9265272309284575, iteration: 280499
loss: 0.9842426776885986,grad_norm: 0.9369968204819059, iteration: 280500
loss: 0.9991067051887512,grad_norm: 0.9425825153067043, iteration: 280501
loss: 1.1214964389801025,grad_norm: 0.999999621833788, iteration: 280502
loss: 0.9975193738937378,grad_norm: 0.7951565636911058, iteration: 280503
loss: 1.0100278854370117,grad_norm: 0.89807827327007, iteration: 280504
loss: 1.007356882095337,grad_norm: 0.9661584601058617, iteration: 280505
loss: 1.0258100032806396,grad_norm: 0.8407218083106682, iteration: 280506
loss: 1.0120329856872559,grad_norm: 0.9124612235630464, iteration: 280507
loss: 0.9932414293289185,grad_norm: 0.9999991045507503, iteration: 280508
loss: 0.9835699796676636,grad_norm: 0.8885485660907955, iteration: 280509
loss: 0.9803887009620667,grad_norm: 0.9429059116684949, iteration: 280510
loss: 0.9934301376342773,grad_norm: 0.9786333151784636, iteration: 280511
loss: 1.0325595140457153,grad_norm: 0.9999992411504053, iteration: 280512
loss: 0.9822130799293518,grad_norm: 0.9947549357512204, iteration: 280513
loss: 0.9834744334220886,grad_norm: 0.8004642387767575, iteration: 280514
loss: 1.0204850435256958,grad_norm: 0.7747792262740621, iteration: 280515
loss: 1.0186307430267334,grad_norm: 0.9999989815338199, iteration: 280516
loss: 1.0205953121185303,grad_norm: 0.7617774535428354, iteration: 280517
loss: 0.969501256942749,grad_norm: 0.7961837363753053, iteration: 280518
loss: 0.9984518885612488,grad_norm: 0.7942782754128095, iteration: 280519
loss: 1.0017552375793457,grad_norm: 0.8781854447193133, iteration: 280520
loss: 0.9921923875808716,grad_norm: 0.734070740853443, iteration: 280521
loss: 0.9891394376754761,grad_norm: 0.8597407946630043, iteration: 280522
loss: 0.9929561614990234,grad_norm: 0.8752848178803492, iteration: 280523
loss: 0.9857549071311951,grad_norm: 0.8667978090255516, iteration: 280524
loss: 1.0163663625717163,grad_norm: 0.8125300422626272, iteration: 280525
loss: 0.9794148802757263,grad_norm: 0.9999990958796954, iteration: 280526
loss: 1.0149298906326294,grad_norm: 0.985033887694455, iteration: 280527
loss: 0.9646497368812561,grad_norm: 0.8050526527995793, iteration: 280528
loss: 1.0062774419784546,grad_norm: 0.8442863069572402, iteration: 280529
loss: 0.9984414577484131,grad_norm: 0.8324513279870973, iteration: 280530
loss: 0.9932475686073303,grad_norm: 0.8940797813216126, iteration: 280531
loss: 1.0139659643173218,grad_norm: 0.8535049437266097, iteration: 280532
loss: 0.9637329578399658,grad_norm: 0.9891360647343493, iteration: 280533
loss: 1.0474786758422852,grad_norm: 0.9283989325070412, iteration: 280534
loss: 0.988742470741272,grad_norm: 0.745882387366933, iteration: 280535
loss: 1.0029072761535645,grad_norm: 0.9999991343759754, iteration: 280536
loss: 0.96906977891922,grad_norm: 0.8086409536329396, iteration: 280537
loss: 1.0068928003311157,grad_norm: 0.6472879002384452, iteration: 280538
loss: 0.9478932023048401,grad_norm: 0.8121506446013552, iteration: 280539
loss: 0.9983287453651428,grad_norm: 0.9168091372478919, iteration: 280540
loss: 0.9999858140945435,grad_norm: 0.9999999059513941, iteration: 280541
loss: 1.0753681659698486,grad_norm: 0.9999993048053617, iteration: 280542
loss: 0.9693000912666321,grad_norm: 0.8180057206265566, iteration: 280543
loss: 1.0194720029830933,grad_norm: 0.924038065919409, iteration: 280544
loss: 1.0134445428848267,grad_norm: 0.9226251452878466, iteration: 280545
loss: 0.9835904836654663,grad_norm: 0.7705275593883415, iteration: 280546
loss: 0.9761412739753723,grad_norm: 0.6960206500532786, iteration: 280547
loss: 1.030807614326477,grad_norm: 0.9999992697543765, iteration: 280548
loss: 0.9871785640716553,grad_norm: 0.8254022328478016, iteration: 280549
loss: 0.9994890689849854,grad_norm: 0.999999175176443, iteration: 280550
loss: 1.1046943664550781,grad_norm: 0.9999992038225308, iteration: 280551
loss: 0.995086669921875,grad_norm: 0.837075449978191, iteration: 280552
loss: 1.0178343057632446,grad_norm: 0.8420224241018531, iteration: 280553
loss: 0.9922918081283569,grad_norm: 0.8754669973784474, iteration: 280554
loss: 1.0025908946990967,grad_norm: 0.9999991378851849, iteration: 280555
loss: 1.012676477432251,grad_norm: 0.8518425963534293, iteration: 280556
loss: 0.9515960216522217,grad_norm: 0.9523796848836585, iteration: 280557
loss: 1.0183022022247314,grad_norm: 0.9999991096820406, iteration: 280558
loss: 1.0252940654754639,grad_norm: 0.8790810618371063, iteration: 280559
loss: 0.9831874370574951,grad_norm: 0.8631885102487625, iteration: 280560
loss: 0.9963022470474243,grad_norm: 0.748285860051339, iteration: 280561
loss: 0.9999588131904602,grad_norm: 0.7953141367273141, iteration: 280562
loss: 0.989102303981781,grad_norm: 0.7762047978492824, iteration: 280563
loss: 1.012892246246338,grad_norm: 0.7965815041513221, iteration: 280564
loss: 0.9973574280738831,grad_norm: 0.8957599775415318, iteration: 280565
loss: 0.9927706718444824,grad_norm: 0.8727087917808501, iteration: 280566
loss: 0.9831284880638123,grad_norm: 0.7191324490211826, iteration: 280567
loss: 0.9709637761116028,grad_norm: 0.9999991106449914, iteration: 280568
loss: 0.9965981841087341,grad_norm: 0.9999991019894452, iteration: 280569
loss: 1.015401005744934,grad_norm: 0.9999993050877801, iteration: 280570
loss: 0.9757737517356873,grad_norm: 0.8383926916968353, iteration: 280571
loss: 0.9700481295585632,grad_norm: 0.8868529077681936, iteration: 280572
loss: 1.0376001596450806,grad_norm: 0.8215157439199291, iteration: 280573
loss: 0.9948182106018066,grad_norm: 0.965635187822836, iteration: 280574
loss: 1.014728307723999,grad_norm: 0.7182520517514521, iteration: 280575
loss: 0.9978784322738647,grad_norm: 0.8654002651041722, iteration: 280576
loss: 1.0270644426345825,grad_norm: 0.9245294274887733, iteration: 280577
loss: 1.0106322765350342,grad_norm: 0.9215880983445461, iteration: 280578
loss: 0.9973886609077454,grad_norm: 0.7807049685689698, iteration: 280579
loss: 0.9947304129600525,grad_norm: 0.9438071947059992, iteration: 280580
loss: 0.9983605146408081,grad_norm: 0.8744312354800735, iteration: 280581
loss: 1.012993574142456,grad_norm: 0.7387163705680575, iteration: 280582
loss: 0.9526985883712769,grad_norm: 0.9385168652515302, iteration: 280583
loss: 1.0076158046722412,grad_norm: 0.8704709749148851, iteration: 280584
loss: 0.9641057848930359,grad_norm: 0.7976139306302279, iteration: 280585
loss: 1.01153564453125,grad_norm: 0.8905573254254344, iteration: 280586
loss: 0.9729276299476624,grad_norm: 0.9519694023656557, iteration: 280587
loss: 0.9950074553489685,grad_norm: 0.8574141223030852, iteration: 280588
loss: 1.0319277048110962,grad_norm: 0.851219204750221, iteration: 280589
loss: 0.9719029664993286,grad_norm: 0.7994194879378211, iteration: 280590
loss: 0.9792635440826416,grad_norm: 0.8697184031265748, iteration: 280591
loss: 0.9932283163070679,grad_norm: 0.7609002241514861, iteration: 280592
loss: 0.9786267280578613,grad_norm: 0.822338822561158, iteration: 280593
loss: 1.0230532884597778,grad_norm: 0.9142652084027046, iteration: 280594
loss: 1.0391390323638916,grad_norm: 0.9147981325660774, iteration: 280595
loss: 0.978626549243927,grad_norm: 0.9465276037529275, iteration: 280596
loss: 1.0350185632705688,grad_norm: 0.8318529507762058, iteration: 280597
loss: 0.9976339340209961,grad_norm: 0.9999996369281537, iteration: 280598
loss: 1.0138620138168335,grad_norm: 0.9092710591909948, iteration: 280599
loss: 1.012618064880371,grad_norm: 0.7183419311423785, iteration: 280600
loss: 1.0213756561279297,grad_norm: 0.8816720482163171, iteration: 280601
loss: 0.984192430973053,grad_norm: 0.946954002921279, iteration: 280602
loss: 0.988779604434967,grad_norm: 0.9071760815107913, iteration: 280603
loss: 1.0284677743911743,grad_norm: 0.74269529464367, iteration: 280604
loss: 0.9886797070503235,grad_norm: 0.8294598836850265, iteration: 280605
loss: 1.0637526512145996,grad_norm: 0.9999990638986958, iteration: 280606
loss: 0.9494444727897644,grad_norm: 0.940208803099025, iteration: 280607
loss: 0.955954372882843,grad_norm: 0.701309398128668, iteration: 280608
loss: 0.9707117676734924,grad_norm: 0.9999989279721143, iteration: 280609
loss: 1.0397987365722656,grad_norm: 0.8612140679618553, iteration: 280610
loss: 1.0056465864181519,grad_norm: 0.9730045559711373, iteration: 280611
loss: 0.9900126457214355,grad_norm: 0.8345908457583604, iteration: 280612
loss: 1.0440794229507446,grad_norm: 0.8791913800891639, iteration: 280613
loss: 1.007858395576477,grad_norm: 0.9101621928432791, iteration: 280614
loss: 1.0397909879684448,grad_norm: 0.872263488111985, iteration: 280615
loss: 1.0119010210037231,grad_norm: 0.9999992123290846, iteration: 280616
loss: 0.9916454553604126,grad_norm: 0.9620242539139824, iteration: 280617
loss: 0.9917824268341064,grad_norm: 0.8929040725705967, iteration: 280618
loss: 0.9919764995574951,grad_norm: 0.8045740705313461, iteration: 280619
loss: 0.9958515167236328,grad_norm: 0.9188948885453222, iteration: 280620
loss: 0.9947861433029175,grad_norm: 0.7378670123615158, iteration: 280621
loss: 0.9676323533058167,grad_norm: 0.9860772514576925, iteration: 280622
loss: 0.9501427412033081,grad_norm: 0.8951346354694062, iteration: 280623
loss: 0.9860755205154419,grad_norm: 0.7947917238109563, iteration: 280624
loss: 0.9853659272193909,grad_norm: 0.9999990829734767, iteration: 280625
loss: 1.0182548761367798,grad_norm: 0.8005769593319306, iteration: 280626
loss: 1.017075538635254,grad_norm: 0.9292057963924404, iteration: 280627
loss: 1.0129362344741821,grad_norm: 0.8939469912924085, iteration: 280628
loss: 0.9872071146965027,grad_norm: 0.7645835841141925, iteration: 280629
loss: 0.9959847331047058,grad_norm: 0.9999990984842495, iteration: 280630
loss: 1.0261220932006836,grad_norm: 0.8269528522146727, iteration: 280631
loss: 1.0148402452468872,grad_norm: 0.9950917430421795, iteration: 280632
loss: 1.011420726776123,grad_norm: 0.8704349408352232, iteration: 280633
loss: 0.9894008636474609,grad_norm: 0.9070477493336131, iteration: 280634
loss: 0.9727707505226135,grad_norm: 0.8307279900580967, iteration: 280635
loss: 1.0888242721557617,grad_norm: 0.9999990966944399, iteration: 280636
loss: 0.9789717197418213,grad_norm: 0.8828719607151313, iteration: 280637
loss: 0.9749329090118408,grad_norm: 0.8633709220864576, iteration: 280638
loss: 1.0013434886932373,grad_norm: 0.8750299673411205, iteration: 280639
loss: 0.9970529079437256,grad_norm: 0.818142036973084, iteration: 280640
loss: 0.9749116897583008,grad_norm: 0.999999057872573, iteration: 280641
loss: 0.9696059823036194,grad_norm: 0.9982399351480459, iteration: 280642
loss: 0.9858600497245789,grad_norm: 0.9478215980232909, iteration: 280643
loss: 1.0144081115722656,grad_norm: 0.9591786472479064, iteration: 280644
loss: 1.015580415725708,grad_norm: 0.770178550505336, iteration: 280645
loss: 1.0167937278747559,grad_norm: 0.8986685412380157, iteration: 280646
loss: 1.007996678352356,grad_norm: 0.8608647455740878, iteration: 280647
loss: 1.02362060546875,grad_norm: 0.9999996000084493, iteration: 280648
loss: 1.1290876865386963,grad_norm: 0.999999979677127, iteration: 280649
loss: 1.0035954713821411,grad_norm: 0.9999993283680485, iteration: 280650
loss: 1.0137230157852173,grad_norm: 0.9840949903598974, iteration: 280651
loss: 0.9717612862586975,grad_norm: 0.881817900362778, iteration: 280652
loss: 0.998225748538971,grad_norm: 0.9999990908794272, iteration: 280653
loss: 1.0249905586242676,grad_norm: 0.7586249949007795, iteration: 280654
loss: 0.9963811039924622,grad_norm: 0.9569740341895481, iteration: 280655
loss: 0.9990449547767639,grad_norm: 0.999999299081426, iteration: 280656
loss: 1.0362671613693237,grad_norm: 0.8894818675506689, iteration: 280657
loss: 0.9628307223320007,grad_norm: 0.7929208674602494, iteration: 280658
loss: 1.0420106649398804,grad_norm: 0.9999990527202903, iteration: 280659
loss: 1.005782961845398,grad_norm: 0.9671790484003895, iteration: 280660
loss: 1.0254765748977661,grad_norm: 0.8533030378908849, iteration: 280661
loss: 1.0042321681976318,grad_norm: 0.861670083982197, iteration: 280662
loss: 1.0294111967086792,grad_norm: 0.9572477566358061, iteration: 280663
loss: 0.9748743176460266,grad_norm: 0.936699502324955, iteration: 280664
loss: 0.99906325340271,grad_norm: 0.7988075098429743, iteration: 280665
loss: 0.9461687803268433,grad_norm: 0.8199146676231867, iteration: 280666
loss: 0.9927741885185242,grad_norm: 0.9999992069568291, iteration: 280667
loss: 1.0384232997894287,grad_norm: 0.8359452855955177, iteration: 280668
loss: 0.9976430535316467,grad_norm: 0.8621541780711065, iteration: 280669
loss: 0.9440332055091858,grad_norm: 0.7566660772628642, iteration: 280670
loss: 1.012848138809204,grad_norm: 0.863705001865612, iteration: 280671
loss: 1.1186004877090454,grad_norm: 0.9999994792635637, iteration: 280672
loss: 0.9997453093528748,grad_norm: 0.856303227685161, iteration: 280673
loss: 0.9568389058113098,grad_norm: 0.6971108133474778, iteration: 280674
loss: 1.0019761323928833,grad_norm: 0.6890383496269237, iteration: 280675
loss: 1.0000147819519043,grad_norm: 0.7341163820308839, iteration: 280676
loss: 0.9703155755996704,grad_norm: 0.9999992193915646, iteration: 280677
loss: 1.001123309135437,grad_norm: 0.7567354641961945, iteration: 280678
loss: 0.9824223518371582,grad_norm: 0.7595400823326851, iteration: 280679
loss: 1.003014326095581,grad_norm: 0.7858331675156411, iteration: 280680
loss: 1.1752629280090332,grad_norm: 0.9999994164349024, iteration: 280681
loss: 0.9819257855415344,grad_norm: 0.9999992609508904, iteration: 280682
loss: 1.0360534191131592,grad_norm: 0.9999991895483249, iteration: 280683
loss: 0.9964357614517212,grad_norm: 0.7118034692456087, iteration: 280684
loss: 1.1642359495162964,grad_norm: 0.999999122170504, iteration: 280685
loss: 1.0037479400634766,grad_norm: 0.9999991089446773, iteration: 280686
loss: 0.9964053630828857,grad_norm: 0.9999990439321139, iteration: 280687
loss: 0.9750162363052368,grad_norm: 0.7826317676847323, iteration: 280688
loss: 1.0112578868865967,grad_norm: 0.9999995333283993, iteration: 280689
loss: 0.9935377240180969,grad_norm: 0.7957558722279658, iteration: 280690
loss: 1.131211757659912,grad_norm: 0.999999357367441, iteration: 280691
loss: 0.9759313464164734,grad_norm: 0.8433379347940196, iteration: 280692
loss: 1.0048329830169678,grad_norm: 0.7711741447812637, iteration: 280693
loss: 1.0348669290542603,grad_norm: 0.8632648907136676, iteration: 280694
loss: 0.996679425239563,grad_norm: 0.9205021410574353, iteration: 280695
loss: 0.9737386107444763,grad_norm: 0.9173775671948315, iteration: 280696
loss: 1.0233911275863647,grad_norm: 0.9999995050606657, iteration: 280697
loss: 0.9923725128173828,grad_norm: 0.9024962419452773, iteration: 280698
loss: 1.0129029750823975,grad_norm: 0.8814161795612314, iteration: 280699
loss: 0.9904147982597351,grad_norm: 0.8847970974806969, iteration: 280700
loss: 0.9692930579185486,grad_norm: 0.9659218198702879, iteration: 280701
loss: 1.054072618484497,grad_norm: 0.99999984208165, iteration: 280702
loss: 1.0070124864578247,grad_norm: 0.7433259027971089, iteration: 280703
loss: 0.9874996542930603,grad_norm: 0.879518785096396, iteration: 280704
loss: 1.0137004852294922,grad_norm: 0.9999991242400986, iteration: 280705
loss: 0.9943711757659912,grad_norm: 0.9011551671415983, iteration: 280706
loss: 0.9632484316825867,grad_norm: 0.8178833029201987, iteration: 280707
loss: 1.1525752544403076,grad_norm: 0.9999999742139212, iteration: 280708
loss: 0.9544404745101929,grad_norm: 0.8527549055573268, iteration: 280709
loss: 0.9959069490432739,grad_norm: 0.8811816292248653, iteration: 280710
loss: 1.0000485181808472,grad_norm: 0.7043331747883778, iteration: 280711
loss: 1.0148712396621704,grad_norm: 0.9999991317756114, iteration: 280712
loss: 1.0297753810882568,grad_norm: 0.9999990232509884, iteration: 280713
loss: 1.0473753213882446,grad_norm: 0.9323061167022011, iteration: 280714
loss: 1.0237603187561035,grad_norm: 0.8553081391732342, iteration: 280715
loss: 1.033677577972412,grad_norm: 0.8818815080346223, iteration: 280716
loss: 0.983158528804779,grad_norm: 0.9999996194806692, iteration: 280717
loss: 0.9311673045158386,grad_norm: 0.9267727247741658, iteration: 280718
loss: 1.0457463264465332,grad_norm: 0.9999990853141518, iteration: 280719
loss: 1.016841173171997,grad_norm: 0.9999990213483161, iteration: 280720
loss: 0.994304358959198,grad_norm: 0.8886887197343785, iteration: 280721
loss: 1.0414376258850098,grad_norm: 0.9999995388831215, iteration: 280722
loss: 1.0168157815933228,grad_norm: 0.8041880087613358, iteration: 280723
loss: 0.9927807450294495,grad_norm: 0.8523760334165382, iteration: 280724
loss: 1.1337521076202393,grad_norm: 0.9999996543987159, iteration: 280725
loss: 1.076553463935852,grad_norm: 0.9999999010169527, iteration: 280726
loss: 1.002303957939148,grad_norm: 0.9999991278088379, iteration: 280727
loss: 0.9905180335044861,grad_norm: 0.851834461711786, iteration: 280728
loss: 0.9910928010940552,grad_norm: 0.9999990959201389, iteration: 280729
loss: 0.9852302670478821,grad_norm: 0.8673184932674514, iteration: 280730
loss: 1.017492651939392,grad_norm: 0.8746779000633801, iteration: 280731
loss: 1.0100376605987549,grad_norm: 0.9681876582938767, iteration: 280732
loss: 1.012852668762207,grad_norm: 0.9999991094613909, iteration: 280733
loss: 1.0020360946655273,grad_norm: 0.9755762944917986, iteration: 280734
loss: 1.0096404552459717,grad_norm: 0.8321924272168664, iteration: 280735
loss: 0.9954025149345398,grad_norm: 0.7894351995340371, iteration: 280736
loss: 0.98320472240448,grad_norm: 0.7965153995540569, iteration: 280737
loss: 1.0246917009353638,grad_norm: 0.9999989880842056, iteration: 280738
loss: 1.0372029542922974,grad_norm: 0.9999992052954152, iteration: 280739
loss: 0.9971227645874023,grad_norm: 0.7729750125194669, iteration: 280740
loss: 0.9688992500305176,grad_norm: 0.9640973955613356, iteration: 280741
loss: 1.098392367362976,grad_norm: 0.9999995203646919, iteration: 280742
loss: 1.0077530145645142,grad_norm: 0.7187079606572967, iteration: 280743
loss: 1.1704195737838745,grad_norm: 0.999999847315217, iteration: 280744
loss: 0.9851122498512268,grad_norm: 0.9999999265768074, iteration: 280745
loss: 0.9883660078048706,grad_norm: 0.9220824221057509, iteration: 280746
loss: 0.9808661937713623,grad_norm: 0.9331719578595603, iteration: 280747
loss: 0.9834012389183044,grad_norm: 0.8326351937651782, iteration: 280748
loss: 0.96550452709198,grad_norm: 0.8969892138921941, iteration: 280749
loss: 1.0163846015930176,grad_norm: 0.9182024860109711, iteration: 280750
loss: 1.0142613649368286,grad_norm: 0.9999991992236322, iteration: 280751
loss: 1.043898105621338,grad_norm: 0.999999301251862, iteration: 280752
loss: 1.049115538597107,grad_norm: 0.9999991942561495, iteration: 280753
loss: 0.9594564437866211,grad_norm: 0.8606156058119093, iteration: 280754
loss: 0.9923051595687866,grad_norm: 0.7413036693136293, iteration: 280755
loss: 0.9945627450942993,grad_norm: 0.8551678490863281, iteration: 280756
loss: 1.0042359828948975,grad_norm: 0.9999991078903869, iteration: 280757
loss: 0.9830694794654846,grad_norm: 0.9134928118560708, iteration: 280758
loss: 1.0223718881607056,grad_norm: 0.9746120414128325, iteration: 280759
loss: 1.028884768486023,grad_norm: 0.9999994267148428, iteration: 280760
loss: 0.9979111552238464,grad_norm: 0.7902889123194169, iteration: 280761
loss: 0.9964317083358765,grad_norm: 0.8966527684703561, iteration: 280762
loss: 1.0135802030563354,grad_norm: 0.8592222116631997, iteration: 280763
loss: 0.9933609962463379,grad_norm: 0.9999991681295061, iteration: 280764
loss: 1.0060639381408691,grad_norm: 0.9389766121046617, iteration: 280765
loss: 0.9759709239006042,grad_norm: 0.8423021036180375, iteration: 280766
loss: 0.9512579441070557,grad_norm: 0.9999991952243652, iteration: 280767
loss: 0.9706714749336243,grad_norm: 0.7869344222546919, iteration: 280768
loss: 1.018937587738037,grad_norm: 0.9999992464479308, iteration: 280769
loss: 0.986461341381073,grad_norm: 0.806834211635877, iteration: 280770
loss: 1.0322095155715942,grad_norm: 0.7853348042134949, iteration: 280771
loss: 0.9782819151878357,grad_norm: 0.9392221252362276, iteration: 280772
loss: 1.0068589448928833,grad_norm: 0.9999990745909333, iteration: 280773
loss: 0.9726652503013611,grad_norm: 0.9394579239881955, iteration: 280774
loss: 1.0282269716262817,grad_norm: 0.9999990995536, iteration: 280775
loss: 0.9834075570106506,grad_norm: 0.8712799480941089, iteration: 280776
loss: 1.0266270637512207,grad_norm: 0.9999996752561546, iteration: 280777
loss: 1.0584965944290161,grad_norm: 0.8702997802121349, iteration: 280778
loss: 1.0101217031478882,grad_norm: 0.8584749191519857, iteration: 280779
loss: 0.9914677739143372,grad_norm: 0.8821244969320294, iteration: 280780
loss: 0.9591460227966309,grad_norm: 0.8682583356366426, iteration: 280781
loss: 0.989955723285675,grad_norm: 0.8738626227443239, iteration: 280782
loss: 0.9892483949661255,grad_norm: 0.7618200007810336, iteration: 280783
loss: 0.9832035303115845,grad_norm: 0.8373385725793049, iteration: 280784
loss: 1.0103458166122437,grad_norm: 0.7572743833325116, iteration: 280785
loss: 0.9811848998069763,grad_norm: 0.8882606355075336, iteration: 280786
loss: 0.9583113789558411,grad_norm: 0.9306556463788488, iteration: 280787
loss: 1.0928934812545776,grad_norm: 0.9999994578399378, iteration: 280788
loss: 0.9927205443382263,grad_norm: 0.7770736333154902, iteration: 280789
loss: 0.9956287145614624,grad_norm: 0.8727113650760064, iteration: 280790
loss: 1.0054466724395752,grad_norm: 0.9121385368331557, iteration: 280791
loss: 0.9703038930892944,grad_norm: 0.8078312578304653, iteration: 280792
loss: 1.0228594541549683,grad_norm: 0.9999990970501842, iteration: 280793
loss: 1.0044573545455933,grad_norm: 0.8824090034438586, iteration: 280794
loss: 0.9963785409927368,grad_norm: 0.8594730293948314, iteration: 280795
loss: 0.9979767799377441,grad_norm: 0.9677824292836287, iteration: 280796
loss: 1.005062460899353,grad_norm: 0.8446337797812359, iteration: 280797
loss: 0.9769931435585022,grad_norm: 0.9999991270242299, iteration: 280798
loss: 1.0341864824295044,grad_norm: 0.8720837419014629, iteration: 280799
loss: 1.0819567441940308,grad_norm: 0.9999989705564876, iteration: 280800
loss: 1.0094902515411377,grad_norm: 0.7383370978360776, iteration: 280801
loss: 1.0958198308944702,grad_norm: 0.9999990669191551, iteration: 280802
loss: 1.0055618286132812,grad_norm: 0.9999990321519924, iteration: 280803
loss: 1.0020374059677124,grad_norm: 0.842915773122745, iteration: 280804
loss: 1.0207182168960571,grad_norm: 0.9999989745272416, iteration: 280805
loss: 1.0639936923980713,grad_norm: 0.9999992745697402, iteration: 280806
loss: 1.0078253746032715,grad_norm: 0.8393395122039756, iteration: 280807
loss: 0.995161235332489,grad_norm: 0.8650774583144442, iteration: 280808
loss: 1.0286436080932617,grad_norm: 0.7972412252929857, iteration: 280809
loss: 1.0067692995071411,grad_norm: 0.887043143818257, iteration: 280810
loss: 1.0446195602416992,grad_norm: 0.8366873616472996, iteration: 280811
loss: 1.0264352560043335,grad_norm: 0.9999990945154121, iteration: 280812
loss: 1.0496232509613037,grad_norm: 0.9127176590115825, iteration: 280813
loss: 1.016379475593567,grad_norm: 0.804707436313778, iteration: 280814
loss: 1.0007152557373047,grad_norm: 0.7855932143341885, iteration: 280815
loss: 0.980550229549408,grad_norm: 0.8827050770701539, iteration: 280816
loss: 1.0976496934890747,grad_norm: 0.9999993701708335, iteration: 280817
loss: 0.9938898682594299,grad_norm: 0.812919383267877, iteration: 280818
loss: 0.9847190976142883,grad_norm: 0.7829269927720675, iteration: 280819
loss: 1.0030479431152344,grad_norm: 0.768999891256052, iteration: 280820
loss: 1.067789077758789,grad_norm: 0.9999997966034778, iteration: 280821
loss: 0.9924989342689514,grad_norm: 0.9999999529640099, iteration: 280822
loss: 0.977638840675354,grad_norm: 0.9005218287682171, iteration: 280823
loss: 1.0065258741378784,grad_norm: 0.9999991113679161, iteration: 280824
loss: 1.0096992254257202,grad_norm: 0.7814299445706435, iteration: 280825
loss: 1.0458786487579346,grad_norm: 0.9536631645583341, iteration: 280826
loss: 1.0877493619918823,grad_norm: 0.9999997680313168, iteration: 280827
loss: 1.0027621984481812,grad_norm: 0.9459810713178046, iteration: 280828
loss: 1.0038542747497559,grad_norm: 0.9130678200225497, iteration: 280829
loss: 1.0391243696212769,grad_norm: 0.9999997546979894, iteration: 280830
loss: 1.0628372430801392,grad_norm: 0.7342104853284093, iteration: 280831
loss: 0.9953680634498596,grad_norm: 0.8335549541211312, iteration: 280832
loss: 1.0066040754318237,grad_norm: 0.9066721707387635, iteration: 280833
loss: 1.1525624990463257,grad_norm: 0.9999993585244097, iteration: 280834
loss: 0.9833765625953674,grad_norm: 0.8350005433396142, iteration: 280835
loss: 1.0004898309707642,grad_norm: 0.9999991655265849, iteration: 280836
loss: 1.0890475511550903,grad_norm: 0.9999991714333141, iteration: 280837
loss: 1.0062559843063354,grad_norm: 0.7793217897524588, iteration: 280838
loss: 1.2751544713974,grad_norm: 0.9999997638869231, iteration: 280839
loss: 1.1137458086013794,grad_norm: 0.9999991817026677, iteration: 280840
loss: 1.2144720554351807,grad_norm: 0.9999996418147206, iteration: 280841
loss: 1.0314632654190063,grad_norm: 0.9999992291625112, iteration: 280842
loss: 1.0176353454589844,grad_norm: 0.8125604505878689, iteration: 280843
loss: 1.0620204210281372,grad_norm: 0.9999999066447642, iteration: 280844
loss: 1.1280083656311035,grad_norm: 0.8983945110270103, iteration: 280845
loss: 1.0243386030197144,grad_norm: 0.7980741587850154, iteration: 280846
loss: 1.0243531465530396,grad_norm: 0.999999946968975, iteration: 280847
loss: 0.9684372544288635,grad_norm: 0.9380667876655887, iteration: 280848
loss: 1.007058024406433,grad_norm: 0.9899454709196707, iteration: 280849
loss: 1.0363394021987915,grad_norm: 0.9999990729779962, iteration: 280850
loss: 1.0012277364730835,grad_norm: 0.898295047707761, iteration: 280851
loss: 0.9647087454795837,grad_norm: 0.7757869070958634, iteration: 280852
loss: 1.020338773727417,grad_norm: 0.8877615810559449, iteration: 280853
loss: 0.9629430770874023,grad_norm: 0.852298976280724, iteration: 280854
loss: 0.9756122827529907,grad_norm: 0.8499184567143379, iteration: 280855
loss: 1.0264946222305298,grad_norm: 0.8076411016131342, iteration: 280856
loss: 1.020735740661621,grad_norm: 0.9999996201125552, iteration: 280857
loss: 1.0154544115066528,grad_norm: 0.9999991641712324, iteration: 280858
loss: 0.9799903631210327,grad_norm: 0.9310915137916996, iteration: 280859
loss: 0.9709373712539673,grad_norm: 0.9168716847461907, iteration: 280860
loss: 1.013933777809143,grad_norm: 0.9341611319947609, iteration: 280861
loss: 1.0052599906921387,grad_norm: 0.9999992458244822, iteration: 280862
loss: 1.0434893369674683,grad_norm: 0.9999998344508726, iteration: 280863
loss: 0.9926976561546326,grad_norm: 0.8477424606352475, iteration: 280864
loss: 1.0259400606155396,grad_norm: 0.983839330774293, iteration: 280865
loss: 1.0164802074432373,grad_norm: 0.8957804444005995, iteration: 280866
loss: 0.9551945924758911,grad_norm: 0.7979399177053801, iteration: 280867
loss: 1.0152925252914429,grad_norm: 0.8614856041629051, iteration: 280868
loss: 1.0813473463058472,grad_norm: 0.9523960600420375, iteration: 280869
loss: 1.060193419456482,grad_norm: 0.99999938353264, iteration: 280870
loss: 0.9957664608955383,grad_norm: 0.8501933396329704, iteration: 280871
loss: 1.0584884881973267,grad_norm: 0.9555590364386487, iteration: 280872
loss: 1.0086301565170288,grad_norm: 0.9187703162447671, iteration: 280873
loss: 1.0053738355636597,grad_norm: 0.9226432236511183, iteration: 280874
loss: 1.0103017091751099,grad_norm: 0.999999067502586, iteration: 280875
loss: 0.9910831451416016,grad_norm: 0.9999990284966358, iteration: 280876
loss: 1.075777292251587,grad_norm: 0.9999991012429212, iteration: 280877
loss: 0.9836223125457764,grad_norm: 0.8812967784851159, iteration: 280878
loss: 1.0286550521850586,grad_norm: 0.8971930519749591, iteration: 280879
loss: 1.0401835441589355,grad_norm: 0.8662580364474276, iteration: 280880
loss: 1.03083336353302,grad_norm: 0.9891162078831455, iteration: 280881
loss: 1.0713409185409546,grad_norm: 0.9999996646892259, iteration: 280882
loss: 1.0146111249923706,grad_norm: 0.9293953400454324, iteration: 280883
loss: 1.015217661857605,grad_norm: 0.7749091211611461, iteration: 280884
loss: 1.019088625907898,grad_norm: 0.9999991560877893, iteration: 280885
loss: 0.9846969842910767,grad_norm: 0.8733884663250842, iteration: 280886
loss: 1.0086950063705444,grad_norm: 0.9999998597908111, iteration: 280887
loss: 0.9781012535095215,grad_norm: 0.9706910362721721, iteration: 280888
loss: 0.9787309169769287,grad_norm: 0.9999990423643622, iteration: 280889
loss: 1.0742071866989136,grad_norm: 0.9999990329710429, iteration: 280890
loss: 1.0093848705291748,grad_norm: 0.972748209723127, iteration: 280891
loss: 1.0001447200775146,grad_norm: 0.7944928166480947, iteration: 280892
loss: 0.9846153855323792,grad_norm: 0.9428952414042675, iteration: 280893
loss: 1.0344374179840088,grad_norm: 0.8766637717354286, iteration: 280894
loss: 0.9752300381660461,grad_norm: 0.9999989725847502, iteration: 280895
loss: 1.0430742502212524,grad_norm: 0.9999991493691268, iteration: 280896
loss: 1.0129315853118896,grad_norm: 0.8831583660895558, iteration: 280897
loss: 0.9871225357055664,grad_norm: 0.9561541124876602, iteration: 280898
loss: 0.9952245354652405,grad_norm: 0.8999884352626523, iteration: 280899
loss: 0.9820563793182373,grad_norm: 0.9742226514549938, iteration: 280900
loss: 0.9829582571983337,grad_norm: 0.933566820042382, iteration: 280901
loss: 0.9963074326515198,grad_norm: 0.6975868240921342, iteration: 280902
loss: 1.0251890420913696,grad_norm: 0.8648422676080114, iteration: 280903
loss: 0.9946373701095581,grad_norm: 0.8504773905717298, iteration: 280904
loss: 0.9985714554786682,grad_norm: 0.8318923851171075, iteration: 280905
loss: 1.0044692754745483,grad_norm: 0.9999992975934032, iteration: 280906
loss: 0.9944978356361389,grad_norm: 0.8871049803202529, iteration: 280907
loss: 1.046047568321228,grad_norm: 0.8632200258796731, iteration: 280908
loss: 0.992689847946167,grad_norm: 0.9847722202514794, iteration: 280909
loss: 0.9659029841423035,grad_norm: 0.8142649619071551, iteration: 280910
loss: 1.0136362314224243,grad_norm: 0.9999991566442745, iteration: 280911
loss: 1.048411250114441,grad_norm: 0.8898310586337428, iteration: 280912
loss: 0.9827984571456909,grad_norm: 0.7800876960253587, iteration: 280913
loss: 0.9892701506614685,grad_norm: 0.9398189838879825, iteration: 280914
loss: 0.9980483651161194,grad_norm: 0.8794067212156268, iteration: 280915
loss: 0.9859960675239563,grad_norm: 0.8476561590382785, iteration: 280916
loss: 1.0287108421325684,grad_norm: 0.8525026034743652, iteration: 280917
loss: 0.957931399345398,grad_norm: 0.8678890912816826, iteration: 280918
loss: 0.9701880812644958,grad_norm: 0.7957226592860637, iteration: 280919
loss: 1.0268739461898804,grad_norm: 0.9924282986695407, iteration: 280920
loss: 0.9803837537765503,grad_norm: 0.8145780657802792, iteration: 280921
loss: 1.0036091804504395,grad_norm: 0.705150205426601, iteration: 280922
loss: 1.0345700979232788,grad_norm: 0.9018538195370865, iteration: 280923
loss: 1.0083374977111816,grad_norm: 0.8061480657766721, iteration: 280924
loss: 1.0541929006576538,grad_norm: 0.9999995657538736, iteration: 280925
loss: 0.9783824682235718,grad_norm: 0.9999995171747305, iteration: 280926
loss: 0.9944761395454407,grad_norm: 0.927952053647382, iteration: 280927
loss: 1.0133520364761353,grad_norm: 0.9483234605740444, iteration: 280928
loss: 1.0349311828613281,grad_norm: 0.9999996576190698, iteration: 280929
loss: 0.9952778220176697,grad_norm: 0.9999990044605176, iteration: 280930
loss: 0.9960935115814209,grad_norm: 0.876868246947551, iteration: 280931
loss: 1.0003502368927002,grad_norm: 0.8232684556319406, iteration: 280932
loss: 1.025341272354126,grad_norm: 0.9999992675856052, iteration: 280933
loss: 1.0022138357162476,grad_norm: 0.9999995862611352, iteration: 280934
loss: 1.0535366535186768,grad_norm: 0.7945300623590679, iteration: 280935
loss: 0.9841195344924927,grad_norm: 0.9228191962196679, iteration: 280936
loss: 0.9797773957252502,grad_norm: 0.9188429585821827, iteration: 280937
loss: 0.9639906883239746,grad_norm: 0.905797168131779, iteration: 280938
loss: 0.9821826219558716,grad_norm: 0.8774339944267427, iteration: 280939
loss: 0.9700446128845215,grad_norm: 0.8447176731114598, iteration: 280940
loss: 1.0165456533432007,grad_norm: 0.8415477972590337, iteration: 280941
loss: 1.0062990188598633,grad_norm: 0.999999260336838, iteration: 280942
loss: 1.0344955921173096,grad_norm: 0.9268237128211559, iteration: 280943
loss: 0.9831414222717285,grad_norm: 0.863568446299978, iteration: 280944
loss: 0.9915522933006287,grad_norm: 0.7986883457110479, iteration: 280945
loss: 1.0119106769561768,grad_norm: 0.9999997004653219, iteration: 280946
loss: 1.0087368488311768,grad_norm: 0.8542597335301629, iteration: 280947
loss: 1.0286074876785278,grad_norm: 0.9999990578723369, iteration: 280948
loss: 0.9838165044784546,grad_norm: 0.9999990938179673, iteration: 280949
loss: 0.993327796459198,grad_norm: 0.8728070570677955, iteration: 280950
loss: 1.029970645904541,grad_norm: 0.7393636073090178, iteration: 280951
loss: 1.0281418561935425,grad_norm: 0.9999991917295958, iteration: 280952
loss: 1.0086983442306519,grad_norm: 0.9999991109119051, iteration: 280953
loss: 1.0175408124923706,grad_norm: 0.9278416018740173, iteration: 280954
loss: 0.9809965491294861,grad_norm: 0.8230277769353315, iteration: 280955
loss: 0.9674264192581177,grad_norm: 0.9844054484359039, iteration: 280956
loss: 1.0234028100967407,grad_norm: 0.9203700707223764, iteration: 280957
loss: 1.0481362342834473,grad_norm: 0.9999998340740985, iteration: 280958
loss: 1.0229237079620361,grad_norm: 0.9999989502692571, iteration: 280959
loss: 1.0302449464797974,grad_norm: 0.7953856746190583, iteration: 280960
loss: 0.999006450176239,grad_norm: 0.7616889008908859, iteration: 280961
loss: 1.0150165557861328,grad_norm: 0.7066387922086275, iteration: 280962
loss: 1.0499517917633057,grad_norm: 0.9999990509591872, iteration: 280963
loss: 0.9858347773551941,grad_norm: 0.963676007516338, iteration: 280964
loss: 0.9972037076950073,grad_norm: 0.9999991080611361, iteration: 280965
loss: 1.0360172986984253,grad_norm: 0.9999991963758053, iteration: 280966
loss: 1.035560965538025,grad_norm: 0.9446953026198768, iteration: 280967
loss: 0.9655642509460449,grad_norm: 0.896676120172493, iteration: 280968
loss: 0.9811943769454956,grad_norm: 0.9999992124547673, iteration: 280969
loss: 1.0051558017730713,grad_norm: 0.9239188996526212, iteration: 280970
loss: 0.9817329049110413,grad_norm: 0.897219029767595, iteration: 280971
loss: 1.011682152748108,grad_norm: 0.9320736736910427, iteration: 280972
loss: 1.0236997604370117,grad_norm: 0.8206061139083848, iteration: 280973
loss: 1.0373681783676147,grad_norm: 0.9999991510661999, iteration: 280974
loss: 1.0129252672195435,grad_norm: 0.8527507741691294, iteration: 280975
loss: 1.0288991928100586,grad_norm: 0.8841256307009575, iteration: 280976
loss: 1.024491548538208,grad_norm: 0.9846855376085802, iteration: 280977
loss: 0.9679356217384338,grad_norm: 0.7633631189457853, iteration: 280978
loss: 0.9888497591018677,grad_norm: 0.7573964365781106, iteration: 280979
loss: 0.9707995653152466,grad_norm: 0.9999991595805233, iteration: 280980
loss: 0.992397665977478,grad_norm: 0.8844142010195956, iteration: 280981
loss: 0.9500781893730164,grad_norm: 0.8875151145445037, iteration: 280982
loss: 1.0163511037826538,grad_norm: 0.9999996021340292, iteration: 280983
loss: 0.9785966277122498,grad_norm: 0.8710363402136793, iteration: 280984
loss: 0.9838891625404358,grad_norm: 0.8387880363279592, iteration: 280985
loss: 1.0307261943817139,grad_norm: 0.8253124759152495, iteration: 280986
loss: 0.9925329685211182,grad_norm: 0.8970710868917626, iteration: 280987
loss: 1.0212582349777222,grad_norm: 0.8584317891185009, iteration: 280988
loss: 1.025830864906311,grad_norm: 0.9577948272616265, iteration: 280989
loss: 0.9906719326972961,grad_norm: 0.9999991121069765, iteration: 280990
loss: 0.9976603388786316,grad_norm: 0.8099817866712465, iteration: 280991
loss: 0.9859985113143921,grad_norm: 0.7408945070746231, iteration: 280992
loss: 0.991361677646637,grad_norm: 0.9212256390621893, iteration: 280993
loss: 1.0184834003448486,grad_norm: 0.9107679408749997, iteration: 280994
loss: 1.0034996271133423,grad_norm: 0.8378979979739142, iteration: 280995
loss: 1.0089590549468994,grad_norm: 0.962941620745128, iteration: 280996
loss: 0.9600046873092651,grad_norm: 0.8448485225179091, iteration: 280997
loss: 1.0038305521011353,grad_norm: 0.9803216983961078, iteration: 280998
loss: 1.023823618888855,grad_norm: 0.999999156987584, iteration: 280999
loss: 1.0376228094100952,grad_norm: 0.9999997879595225, iteration: 281000
loss: 1.0066742897033691,grad_norm: 0.9999991948346595, iteration: 281001
loss: 1.0069301128387451,grad_norm: 0.8838407030034906, iteration: 281002
loss: 0.9951436519622803,grad_norm: 0.9999990708418973, iteration: 281003
loss: 1.0003870725631714,grad_norm: 0.78086460394974, iteration: 281004
loss: 0.9935998916625977,grad_norm: 0.7481303967790602, iteration: 281005
loss: 0.9860519766807556,grad_norm: 0.8387356172272251, iteration: 281006
loss: 1.0005793571472168,grad_norm: 0.7986947017709272, iteration: 281007
loss: 0.9779133200645447,grad_norm: 0.94081566560333, iteration: 281008
loss: 1.0122065544128418,grad_norm: 0.8968002958157782, iteration: 281009
loss: 0.9881848096847534,grad_norm: 0.8720779071121598, iteration: 281010
loss: 1.0179044008255005,grad_norm: 0.8402838313585042, iteration: 281011
loss: 1.0216598510742188,grad_norm: 0.9999992805952308, iteration: 281012
loss: 1.022926688194275,grad_norm: 0.9999999072137113, iteration: 281013
loss: 1.0065799951553345,grad_norm: 0.8367311244705541, iteration: 281014
loss: 0.9759116768836975,grad_norm: 0.8275812103501621, iteration: 281015
loss: 1.040955662727356,grad_norm: 0.9999990320394113, iteration: 281016
loss: 1.0060677528381348,grad_norm: 0.8304103069117855, iteration: 281017
loss: 0.9733303189277649,grad_norm: 0.8213296751441956, iteration: 281018
loss: 0.9881405830383301,grad_norm: 0.9999996174515187, iteration: 281019
loss: 0.9897987246513367,grad_norm: 0.8975510692839811, iteration: 281020
loss: 0.9924253225326538,grad_norm: 0.821548802999343, iteration: 281021
loss: 0.9987019896507263,grad_norm: 0.8544155795172715, iteration: 281022
loss: 1.0057759284973145,grad_norm: 0.9684045651178375, iteration: 281023
loss: 1.0217804908752441,grad_norm: 0.9999999514997094, iteration: 281024
loss: 1.0329108238220215,grad_norm: 0.9999991310711595, iteration: 281025
loss: 1.0113039016723633,grad_norm: 0.8805583223909479, iteration: 281026
loss: 0.9963582754135132,grad_norm: 0.7887002500868653, iteration: 281027
loss: 1.010477900505066,grad_norm: 0.7913067285277363, iteration: 281028
loss: 1.0368226766586304,grad_norm: 0.8490202482700685, iteration: 281029
loss: 1.0050965547561646,grad_norm: 0.9113609638554946, iteration: 281030
loss: 1.0128991603851318,grad_norm: 0.9999999888847217, iteration: 281031
loss: 0.953133761882782,grad_norm: 0.7810077300078431, iteration: 281032
loss: 0.9951730370521545,grad_norm: 0.9036935333836825, iteration: 281033
loss: 1.0838390588760376,grad_norm: 0.9999991177350827, iteration: 281034
loss: 0.9987145066261292,grad_norm: 0.8873736705599304, iteration: 281035
loss: 1.0980045795440674,grad_norm: 0.9999992658603568, iteration: 281036
loss: 0.9942178726196289,grad_norm: 0.80773289235917, iteration: 281037
loss: 1.0171153545379639,grad_norm: 0.9634573026481187, iteration: 281038
loss: 0.971470832824707,grad_norm: 0.8458461379699153, iteration: 281039
loss: 1.0028300285339355,grad_norm: 0.9999995879481577, iteration: 281040
loss: 1.1075150966644287,grad_norm: 0.9999991972114971, iteration: 281041
loss: 1.052521824836731,grad_norm: 0.9999990854018922, iteration: 281042
loss: 1.0292903184890747,grad_norm: 0.9209311917213376, iteration: 281043
loss: 1.0007210969924927,grad_norm: 0.8740067982050549, iteration: 281044
loss: 1.029695749282837,grad_norm: 0.9999991849375278, iteration: 281045
loss: 1.0013551712036133,grad_norm: 0.6863713419380949, iteration: 281046
loss: 1.0330817699432373,grad_norm: 0.9999995327844174, iteration: 281047
loss: 0.9952723979949951,grad_norm: 0.7874270251523974, iteration: 281048
loss: 1.0011152029037476,grad_norm: 0.8963804897829594, iteration: 281049
loss: 1.0124285221099854,grad_norm: 0.9346747190386969, iteration: 281050
loss: 1.0943855047225952,grad_norm: 0.9999998184001834, iteration: 281051
loss: 1.0250754356384277,grad_norm: 0.9999990871657618, iteration: 281052
loss: 1.0148240327835083,grad_norm: 0.9323403783584066, iteration: 281053
loss: 0.9974880218505859,grad_norm: 0.9624959470336986, iteration: 281054
loss: 0.9763917922973633,grad_norm: 0.9999991319923238, iteration: 281055
loss: 1.1297885179519653,grad_norm: 0.9972892669854461, iteration: 281056
loss: 1.0045826435089111,grad_norm: 0.7720769630268264, iteration: 281057
loss: 1.0216772556304932,grad_norm: 0.9306805196986939, iteration: 281058
loss: 1.0029863119125366,grad_norm: 0.8133122266782644, iteration: 281059
loss: 1.0267750024795532,grad_norm: 0.9145341417595795, iteration: 281060
loss: 1.1034497022628784,grad_norm: 0.8857917081173073, iteration: 281061
loss: 0.9931938648223877,grad_norm: 0.8106619582004796, iteration: 281062
loss: 0.9902742505073547,grad_norm: 0.8141062427455115, iteration: 281063
loss: 0.987339198589325,grad_norm: 0.989683559018149, iteration: 281064
loss: 1.0009275674819946,grad_norm: 0.9747968480173157, iteration: 281065
loss: 0.9520609974861145,grad_norm: 0.9999991071189765, iteration: 281066
loss: 1.018220067024231,grad_norm: 0.8820883322539363, iteration: 281067
loss: 1.0170994997024536,grad_norm: 0.9242250676335132, iteration: 281068
loss: 1.023023009300232,grad_norm: 0.9999993360995373, iteration: 281069
loss: 1.0015650987625122,grad_norm: 0.9274046310351827, iteration: 281070
loss: 0.9837265014648438,grad_norm: 0.9550612938199109, iteration: 281071
loss: 0.9846962690353394,grad_norm: 0.9999989994860183, iteration: 281072
loss: 0.9942249059677124,grad_norm: 0.9999992696560193, iteration: 281073
loss: 1.0085761547088623,grad_norm: 0.9999990369203136, iteration: 281074
loss: 0.9908685684204102,grad_norm: 0.9291267754037542, iteration: 281075
loss: 1.06270170211792,grad_norm: 0.9999998509287975, iteration: 281076
loss: 0.9939385652542114,grad_norm: 0.8509115849711552, iteration: 281077
loss: 0.9948107600212097,grad_norm: 0.9107480701767185, iteration: 281078
loss: 1.012442946434021,grad_norm: 0.999999646666464, iteration: 281079
loss: 1.1012656688690186,grad_norm: 0.9999996612595443, iteration: 281080
loss: 1.0228278636932373,grad_norm: 0.7597860330161762, iteration: 281081
loss: 1.035263180732727,grad_norm: 0.9333600665503851, iteration: 281082
loss: 1.1022340059280396,grad_norm: 0.9999996276862028, iteration: 281083
loss: 1.016090750694275,grad_norm: 0.9999990378958146, iteration: 281084
loss: 1.0484124422073364,grad_norm: 0.9585914294414821, iteration: 281085
loss: 0.9965398907661438,grad_norm: 0.9002716077962081, iteration: 281086
loss: 0.9991764426231384,grad_norm: 0.8392934576109716, iteration: 281087
loss: 0.9699493646621704,grad_norm: 0.984688220414476, iteration: 281088
loss: 1.0419163703918457,grad_norm: 0.7793882492627324, iteration: 281089
loss: 1.2364190816879272,grad_norm: 0.9999997635500897, iteration: 281090
loss: 1.0038692951202393,grad_norm: 0.9435761145285666, iteration: 281091
loss: 1.093018889427185,grad_norm: 0.9999990862409404, iteration: 281092
loss: 1.0067839622497559,grad_norm: 0.8149638661503744, iteration: 281093
loss: 0.9891489148139954,grad_norm: 0.7814112313827112, iteration: 281094
loss: 0.9978271722793579,grad_norm: 0.969613739299682, iteration: 281095
loss: 1.0034972429275513,grad_norm: 0.9533402280835785, iteration: 281096
loss: 1.0093621015548706,grad_norm: 0.8561485065580269, iteration: 281097
loss: 1.0052282810211182,grad_norm: 0.9127359082277895, iteration: 281098
loss: 1.0191222429275513,grad_norm: 0.9405610624778242, iteration: 281099
loss: 0.9983199238777161,grad_norm: 0.9999991969318753, iteration: 281100
loss: 1.072962760925293,grad_norm: 0.9900256396398658, iteration: 281101
loss: 1.041804313659668,grad_norm: 0.9999990343089825, iteration: 281102
loss: 1.0791845321655273,grad_norm: 0.99999987411282, iteration: 281103
loss: 0.9815009832382202,grad_norm: 0.9930571690180158, iteration: 281104
loss: 0.9684640765190125,grad_norm: 0.7222851312637564, iteration: 281105
loss: 1.0159424543380737,grad_norm: 0.9082251533625795, iteration: 281106
loss: 0.9973248243331909,grad_norm: 0.9505758123686173, iteration: 281107
loss: 1.1098355054855347,grad_norm: 0.999999265085622, iteration: 281108
loss: 0.9976441860198975,grad_norm: 0.9364660675566487, iteration: 281109
loss: 1.0139997005462646,grad_norm: 0.8228039146780941, iteration: 281110
loss: 0.9970921874046326,grad_norm: 0.9023168735024684, iteration: 281111
loss: 1.0873125791549683,grad_norm: 0.9999992916552789, iteration: 281112
loss: 1.061922550201416,grad_norm: 0.9999997230462018, iteration: 281113
loss: 1.0492223501205444,grad_norm: 0.9999993519651638, iteration: 281114
loss: 1.008235216140747,grad_norm: 0.9881383092141424, iteration: 281115
loss: 0.9736387133598328,grad_norm: 0.7803836774894469, iteration: 281116
loss: 0.9853841662406921,grad_norm: 0.9474275028903048, iteration: 281117
loss: 0.9960064888000488,grad_norm: 0.9548728008726847, iteration: 281118
loss: 1.0027726888656616,grad_norm: 0.9999997475048231, iteration: 281119
loss: 1.008272647857666,grad_norm: 0.8626634242786584, iteration: 281120
loss: 1.0801966190338135,grad_norm: 0.9999991366075439, iteration: 281121
loss: 0.9686517119407654,grad_norm: 0.9389667158041325, iteration: 281122
loss: 1.0027602910995483,grad_norm: 0.910794646821039, iteration: 281123
loss: 0.9986386895179749,grad_norm: 0.9356769132351288, iteration: 281124
loss: 1.0370044708251953,grad_norm: 0.9999991777977953, iteration: 281125
loss: 0.9913323521614075,grad_norm: 0.9999990491835244, iteration: 281126
loss: 0.9816116094589233,grad_norm: 0.9847983797813399, iteration: 281127
loss: 1.019751787185669,grad_norm: 0.9275158241037614, iteration: 281128
loss: 1.0570491552352905,grad_norm: 0.9426161352381505, iteration: 281129
loss: 0.9963222742080688,grad_norm: 0.8046141892649569, iteration: 281130
loss: 1.0223605632781982,grad_norm: 0.8797980834955024, iteration: 281131
loss: 1.0048762559890747,grad_norm: 0.9420905805900961, iteration: 281132
loss: 1.000485897064209,grad_norm: 0.806108276720558, iteration: 281133
loss: 1.0457770824432373,grad_norm: 0.9999990860898854, iteration: 281134
loss: 1.0202747583389282,grad_norm: 0.9092319962009189, iteration: 281135
loss: 0.9666293263435364,grad_norm: 0.999999202974733, iteration: 281136
loss: 1.0283704996109009,grad_norm: 0.8651069801528655, iteration: 281137
loss: 0.9880880117416382,grad_norm: 0.9004668082353309, iteration: 281138
loss: 1.0263558626174927,grad_norm: 0.9521371578950969, iteration: 281139
loss: 1.0574095249176025,grad_norm: 0.8783205336082649, iteration: 281140
loss: 0.9988144040107727,grad_norm: 0.8551130551298702, iteration: 281141
loss: 1.0027738809585571,grad_norm: 0.8913888712923818, iteration: 281142
loss: 1.034746527671814,grad_norm: 0.9467488986783047, iteration: 281143
loss: 0.9909084439277649,grad_norm: 0.7796210594410387, iteration: 281144
loss: 0.9596756100654602,grad_norm: 0.8358798982626426, iteration: 281145
loss: 0.9911636710166931,grad_norm: 0.8638419087408813, iteration: 281146
loss: 0.9838587045669556,grad_norm: 0.9558745630456813, iteration: 281147
loss: 1.044412612915039,grad_norm: 0.9256564878984475, iteration: 281148
loss: 1.0248091220855713,grad_norm: 0.9999993000521316, iteration: 281149
loss: 1.03948175907135,grad_norm: 0.8716475986445265, iteration: 281150
loss: 0.9823200702667236,grad_norm: 0.9999990838267683, iteration: 281151
loss: 1.0332932472229004,grad_norm: 0.8864138639251843, iteration: 281152
loss: 1.005547046661377,grad_norm: 0.871801967009352, iteration: 281153
loss: 0.9973512887954712,grad_norm: 0.9999990686715091, iteration: 281154
loss: 1.0047740936279297,grad_norm: 0.8292713032143616, iteration: 281155
loss: 1.0172237157821655,grad_norm: 0.9493846337107152, iteration: 281156
loss: 1.0004992485046387,grad_norm: 1.0000000863166902, iteration: 281157
loss: 1.015653371810913,grad_norm: 0.9953703298289668, iteration: 281158
loss: 1.074049711227417,grad_norm: 0.999999212916797, iteration: 281159
loss: 1.092394232749939,grad_norm: 0.9999996450776037, iteration: 281160
loss: 1.0382165908813477,grad_norm: 0.9705443074035082, iteration: 281161
loss: 1.139024257659912,grad_norm: 0.9999994897279146, iteration: 281162
loss: 1.0179007053375244,grad_norm: 0.9002755811572154, iteration: 281163
loss: 1.0150032043457031,grad_norm: 0.814827906419946, iteration: 281164
loss: 1.0111489295959473,grad_norm: 0.8546729164970106, iteration: 281165
loss: 1.0673325061798096,grad_norm: 0.9999991015405676, iteration: 281166
loss: 1.008019208908081,grad_norm: 0.9999989935661813, iteration: 281167
loss: 1.0325568914413452,grad_norm: 0.779858376437107, iteration: 281168
loss: 1.0259037017822266,grad_norm: 0.9979937167621458, iteration: 281169
loss: 1.1603055000305176,grad_norm: 0.999999303545809, iteration: 281170
loss: 0.9694653749465942,grad_norm: 0.9843628073056905, iteration: 281171
loss: 0.961796224117279,grad_norm: 0.8619670062657485, iteration: 281172
loss: 1.105223298072815,grad_norm: 0.9999993843771624, iteration: 281173
loss: 0.9883784055709839,grad_norm: 0.7608749626838792, iteration: 281174
loss: 1.0248891115188599,grad_norm: 0.8549409042796472, iteration: 281175
loss: 1.050026535987854,grad_norm: 0.9999994995164162, iteration: 281176
loss: 0.9982084035873413,grad_norm: 0.7320058377431927, iteration: 281177
loss: 1.0331361293792725,grad_norm: 0.8556730088941176, iteration: 281178
loss: 1.023256778717041,grad_norm: 0.8609650710069601, iteration: 281179
loss: 1.005453109741211,grad_norm: 0.9633493389601595, iteration: 281180
loss: 0.9979940056800842,grad_norm: 0.8695466453818531, iteration: 281181
loss: 1.0026715993881226,grad_norm: 0.8712400735532236, iteration: 281182
loss: 1.0042850971221924,grad_norm: 0.8919893644876568, iteration: 281183
loss: 1.0310245752334595,grad_norm: 0.8469030963038715, iteration: 281184
loss: 0.9968823194503784,grad_norm: 0.7056601953585034, iteration: 281185
loss: 0.9776126742362976,grad_norm: 0.8372527881192491, iteration: 281186
loss: 1.0128438472747803,grad_norm: 0.9999991629060754, iteration: 281187
loss: 0.9939715266227722,grad_norm: 0.8372545378773102, iteration: 281188
loss: 1.114820122718811,grad_norm: 0.999999825475193, iteration: 281189
loss: 0.9852315783500671,grad_norm: 0.9625016923175017, iteration: 281190
loss: 1.0887904167175293,grad_norm: 0.8364164307461733, iteration: 281191
loss: 0.9942331314086914,grad_norm: 0.9346427374327935, iteration: 281192
loss: 1.0166089534759521,grad_norm: 0.8677211193451645, iteration: 281193
loss: 0.9937731623649597,grad_norm: 0.7947823464409757, iteration: 281194
loss: 0.991546630859375,grad_norm: 0.7581554691198953, iteration: 281195
loss: 0.9836136102676392,grad_norm: 0.8459647431612163, iteration: 281196
loss: 1.0336415767669678,grad_norm: 0.9929689749942605, iteration: 281197
loss: 0.9615366458892822,grad_norm: 0.8153199389632052, iteration: 281198
loss: 0.9794542789459229,grad_norm: 0.7695366246512684, iteration: 281199
loss: 0.9899196624755859,grad_norm: 0.9999992730348228, iteration: 281200
loss: 1.0426520109176636,grad_norm: 0.9999991523235319, iteration: 281201
loss: 1.0963726043701172,grad_norm: 0.999999586033994, iteration: 281202
loss: 1.0067716836929321,grad_norm: 0.9999990603034692, iteration: 281203
loss: 1.0531049966812134,grad_norm: 0.9999997495657301, iteration: 281204
loss: 0.9922329783439636,grad_norm: 0.99999910754124, iteration: 281205
loss: 1.0557221174240112,grad_norm: 0.9902162532045048, iteration: 281206
loss: 1.0267274379730225,grad_norm: 0.8221429100403219, iteration: 281207
loss: 0.9678986668586731,grad_norm: 0.7957881899952723, iteration: 281208
loss: 1.0153396129608154,grad_norm: 0.9999991972930024, iteration: 281209
loss: 1.0926485061645508,grad_norm: 0.9999991496971598, iteration: 281210
loss: 1.050580382347107,grad_norm: 0.8648561908019721, iteration: 281211
loss: 1.0170549154281616,grad_norm: 0.8827301082245479, iteration: 281212
loss: 1.0054984092712402,grad_norm: 0.7993084797222898, iteration: 281213
loss: 1.0644508600234985,grad_norm: 0.9999992410043211, iteration: 281214
loss: 0.9998933672904968,grad_norm: 0.846094049657728, iteration: 281215
loss: 0.9905088543891907,grad_norm: 0.7450325304568014, iteration: 281216
loss: 1.0118248462677002,grad_norm: 0.8271598800081154, iteration: 281217
loss: 1.0581488609313965,grad_norm: 0.8508083343130342, iteration: 281218
loss: 1.0584187507629395,grad_norm: 0.821378929416349, iteration: 281219
loss: 1.0301505327224731,grad_norm: 0.9512830319042155, iteration: 281220
loss: 1.103488564491272,grad_norm: 0.9999990601347567, iteration: 281221
loss: 0.9711971282958984,grad_norm: 0.8314460338882462, iteration: 281222
loss: 1.0406314134597778,grad_norm: 0.9722180699570253, iteration: 281223
loss: 1.088918924331665,grad_norm: 0.999999062172495, iteration: 281224
loss: 1.0878108739852905,grad_norm: 1.0000000163021432, iteration: 281225
loss: 1.0221223831176758,grad_norm: 0.9999991699163328, iteration: 281226
loss: 1.2108840942382812,grad_norm: 0.999999815136221, iteration: 281227
loss: 1.0615592002868652,grad_norm: 0.9907831251162738, iteration: 281228
loss: 1.0158880949020386,grad_norm: 0.8475277355912089, iteration: 281229
loss: 1.1017966270446777,grad_norm: 0.9763845648518263, iteration: 281230
loss: 1.0154374837875366,grad_norm: 0.9999997577663492, iteration: 281231
loss: 1.0413976907730103,grad_norm: 0.9999993993243536, iteration: 281232
loss: 1.106580138206482,grad_norm: 0.8874884309078699, iteration: 281233
loss: 1.0015151500701904,grad_norm: 0.9999998949804021, iteration: 281234
loss: 1.1128294467926025,grad_norm: 0.9999994495741373, iteration: 281235
loss: 1.1121349334716797,grad_norm: 0.851718966922573, iteration: 281236
loss: 0.9756750464439392,grad_norm: 0.817565481906836, iteration: 281237
loss: 1.1408804655075073,grad_norm: 0.9999990374236352, iteration: 281238
loss: 1.0489438772201538,grad_norm: 0.999999318765198, iteration: 281239
loss: 1.080621361732483,grad_norm: 0.999999151708628, iteration: 281240
loss: 1.0081324577331543,grad_norm: 0.8538794537931418, iteration: 281241
loss: 1.115929126739502,grad_norm: 0.999999757892753, iteration: 281242
loss: 1.0411266088485718,grad_norm: 0.9999990034050728, iteration: 281243
loss: 1.0847654342651367,grad_norm: 0.999999773190409, iteration: 281244
loss: 1.0351735353469849,grad_norm: 0.999999410009672, iteration: 281245
loss: 1.1124159097671509,grad_norm: 0.9999993436800345, iteration: 281246
loss: 1.1068896055221558,grad_norm: 0.9139267440448741, iteration: 281247
loss: 1.0671859979629517,grad_norm: 1.0000000394083721, iteration: 281248
loss: 1.0001158714294434,grad_norm: 0.8254699307080132, iteration: 281249
loss: 1.0998166799545288,grad_norm: 0.9286556926297367, iteration: 281250
loss: 1.0828152894973755,grad_norm: 0.999999557033783, iteration: 281251
loss: 1.0216237306594849,grad_norm: 0.8607416536812345, iteration: 281252
loss: 1.0234014987945557,grad_norm: 0.9999992082478824, iteration: 281253
loss: 1.0271912813186646,grad_norm: 0.8755779055807873, iteration: 281254
loss: 1.0064901113510132,grad_norm: 0.9329277106467393, iteration: 281255
loss: 1.14458429813385,grad_norm: 0.9999993089330442, iteration: 281256
loss: 0.9629536867141724,grad_norm: 0.9631638057527762, iteration: 281257
loss: 1.0157387256622314,grad_norm: 0.9999990050143001, iteration: 281258
loss: 1.0242218971252441,grad_norm: 0.9999995777869675, iteration: 281259
loss: 1.0057852268218994,grad_norm: 0.9343920555688643, iteration: 281260
loss: 1.0409094095230103,grad_norm: 0.999999233874539, iteration: 281261
loss: 1.0333280563354492,grad_norm: 0.9999992897551381, iteration: 281262
loss: 1.0063966512680054,grad_norm: 0.9999995884175951, iteration: 281263
loss: 0.9756882786750793,grad_norm: 0.9263017552820692, iteration: 281264
loss: 0.9976312518119812,grad_norm: 0.9011034804466999, iteration: 281265
loss: 0.9970318675041199,grad_norm: 0.8871374605212748, iteration: 281266
loss: 1.0738036632537842,grad_norm: 0.9999993114720439, iteration: 281267
loss: 1.0738000869750977,grad_norm: 0.9999996726847156, iteration: 281268
loss: 0.9968398213386536,grad_norm: 0.8834929024674766, iteration: 281269
loss: 1.0589596033096313,grad_norm: 0.9387531295164238, iteration: 281270
loss: 1.0550463199615479,grad_norm: 0.924058564294499, iteration: 281271
loss: 1.0068284273147583,grad_norm: 0.9999991341793701, iteration: 281272
loss: 1.010334849357605,grad_norm: 0.9999991309607843, iteration: 281273
loss: 1.0622503757476807,grad_norm: 0.999999107829686, iteration: 281274
loss: 1.0129704475402832,grad_norm: 0.8980614636871023, iteration: 281275
loss: 1.216934323310852,grad_norm: 0.9999998284680682, iteration: 281276
loss: 1.0265486240386963,grad_norm: 0.8570612047314745, iteration: 281277
loss: 1.0467170476913452,grad_norm: 0.9999990144815655, iteration: 281278
loss: 1.1090810298919678,grad_norm: 0.9999991541768274, iteration: 281279
loss: 0.9986352324485779,grad_norm: 0.9999990302176244, iteration: 281280
loss: 1.069096326828003,grad_norm: 0.9747400358603754, iteration: 281281
loss: 1.0729554891586304,grad_norm: 0.9999996810417435, iteration: 281282
loss: 1.113230586051941,grad_norm: 0.9999993253427375, iteration: 281283
loss: 1.0274747610092163,grad_norm: 0.9999992113113203, iteration: 281284
loss: 0.9897412061691284,grad_norm: 0.9999998208968817, iteration: 281285
loss: 1.040566086769104,grad_norm: 0.9999993380402951, iteration: 281286
loss: 1.0806149244308472,grad_norm: 0.9999991022470278, iteration: 281287
loss: 1.1324970722198486,grad_norm: 0.9999990466455263, iteration: 281288
loss: 1.095095157623291,grad_norm: 0.8778677873972681, iteration: 281289
loss: 1.0511032342910767,grad_norm: 0.9999990601602916, iteration: 281290
loss: 0.9999682903289795,grad_norm: 0.8650128512461914, iteration: 281291
loss: 1.0769667625427246,grad_norm: 0.9999991756861939, iteration: 281292
loss: 1.173265814781189,grad_norm: 0.9999990245358078, iteration: 281293
loss: 0.9708234071731567,grad_norm: 0.7654926753461656, iteration: 281294
loss: 1.0419912338256836,grad_norm: 0.8611240194030894, iteration: 281295
loss: 1.0240422487258911,grad_norm: 0.9999994418548795, iteration: 281296
loss: 1.285587191581726,grad_norm: 0.9999991107808834, iteration: 281297
loss: 1.2762625217437744,grad_norm: 0.9999997102429627, iteration: 281298
loss: 1.06024968624115,grad_norm: 0.9999992533561429, iteration: 281299
loss: 1.086320400238037,grad_norm: 0.9734358519273956, iteration: 281300
loss: 1.1425583362579346,grad_norm: 0.9999995322512407, iteration: 281301
loss: 1.1226743459701538,grad_norm: 0.9510392060805278, iteration: 281302
loss: 1.1841857433319092,grad_norm: 0.9999994224657146, iteration: 281303
loss: 1.088921070098877,grad_norm: 0.9999991052822975, iteration: 281304
loss: 1.078407645225525,grad_norm: 0.9938203842065909, iteration: 281305
loss: 1.2414755821228027,grad_norm: 0.9999996165080985, iteration: 281306
loss: 1.0605571269989014,grad_norm: 0.9999996331896007, iteration: 281307
loss: 1.1504639387130737,grad_norm: 0.9406963642279341, iteration: 281308
loss: 1.056897759437561,grad_norm: 0.8893034950129138, iteration: 281309
loss: 1.1874550580978394,grad_norm: 0.9999992757134173, iteration: 281310
loss: 1.0423665046691895,grad_norm: 0.9999998660327721, iteration: 281311
loss: 1.1010915040969849,grad_norm: 0.9578016164180746, iteration: 281312
loss: 1.031401515007019,grad_norm: 0.8649485436375072, iteration: 281313
loss: 1.1032482385635376,grad_norm: 0.999999255714151, iteration: 281314
loss: 1.0087175369262695,grad_norm: 0.8304856984975314, iteration: 281315
loss: 1.0499423742294312,grad_norm: 0.9999995777724934, iteration: 281316
loss: 1.0280673503875732,grad_norm: 0.9832102142268453, iteration: 281317
loss: 1.0641859769821167,grad_norm: 0.9999990494673505, iteration: 281318
loss: 1.0873289108276367,grad_norm: 0.9999992626830045, iteration: 281319
loss: 1.056836724281311,grad_norm: 0.999999085191542, iteration: 281320
loss: 1.0376132726669312,grad_norm: 0.9939545959092371, iteration: 281321
loss: 1.0074912309646606,grad_norm: 0.9651067506890433, iteration: 281322
loss: 1.0451186895370483,grad_norm: 0.8509554810483205, iteration: 281323
loss: 1.033428430557251,grad_norm: 0.9999992048033634, iteration: 281324
loss: 1.0378947257995605,grad_norm: 0.9161998616495675, iteration: 281325
loss: 0.9977723956108093,grad_norm: 0.9999993505978393, iteration: 281326
loss: 0.9931201934814453,grad_norm: 0.7658010870309756, iteration: 281327
loss: 1.0702167749404907,grad_norm: 1.0000000612517628, iteration: 281328
loss: 1.0486681461334229,grad_norm: 0.999999621469838, iteration: 281329
loss: 1.0042868852615356,grad_norm: 0.811531671070373, iteration: 281330
loss: 0.9928072690963745,grad_norm: 0.8778802137495384, iteration: 281331
loss: 0.9994680881500244,grad_norm: 0.9999992040187171, iteration: 281332
loss: 1.1108627319335938,grad_norm: 0.9600534475650466, iteration: 281333
loss: 1.101348876953125,grad_norm: 0.9999992886141909, iteration: 281334
loss: 0.9818527102470398,grad_norm: 0.9999989842376812, iteration: 281335
loss: 1.0549107789993286,grad_norm: 0.9209433026198951, iteration: 281336
loss: 1.115474820137024,grad_norm: 0.9999992201765786, iteration: 281337
loss: 1.0684096813201904,grad_norm: 0.9999997964788219, iteration: 281338
loss: 1.0623986721038818,grad_norm: 0.8708876124607806, iteration: 281339
loss: 1.0831999778747559,grad_norm: 0.9999993079626862, iteration: 281340
loss: 0.9970636367797852,grad_norm: 0.9239483992672244, iteration: 281341
loss: 1.0336748361587524,grad_norm: 0.9584688285458137, iteration: 281342
loss: 0.9788652062416077,grad_norm: 0.8045114198340796, iteration: 281343
loss: 0.9807753562927246,grad_norm: 0.9062964258253411, iteration: 281344
loss: 1.0013755559921265,grad_norm: 0.9999991154919358, iteration: 281345
loss: 1.0103192329406738,grad_norm: 0.9999992447818314, iteration: 281346
loss: 1.0768253803253174,grad_norm: 0.9999995408405378, iteration: 281347
loss: 1.0557304620742798,grad_norm: 0.87616106705256, iteration: 281348
loss: 1.0143624544143677,grad_norm: 0.9217026468574087, iteration: 281349
loss: 1.046626329421997,grad_norm: 0.9999992386524472, iteration: 281350
loss: 1.000540018081665,grad_norm: 0.954480593088136, iteration: 281351
loss: 0.9861454963684082,grad_norm: 0.8368371283407251, iteration: 281352
loss: 1.08790922164917,grad_norm: 0.9999997110314444, iteration: 281353
loss: 1.00557279586792,grad_norm: 0.8327302386704744, iteration: 281354
loss: 1.039996862411499,grad_norm: 0.9999996393847576, iteration: 281355
loss: 0.9781068563461304,grad_norm: 0.9999989917590234, iteration: 281356
loss: 1.0093774795532227,grad_norm: 0.9999991667689296, iteration: 281357
loss: 1.0003591775894165,grad_norm: 0.9945039758744647, iteration: 281358
loss: 1.0885350704193115,grad_norm: 0.9999993089714372, iteration: 281359
loss: 0.9799929857254028,grad_norm: 0.9999991783836956, iteration: 281360
loss: 1.1272239685058594,grad_norm: 0.999999168233722, iteration: 281361
loss: 0.9913931488990784,grad_norm: 0.8574728756772113, iteration: 281362
loss: 1.060860514640808,grad_norm: 0.9447322926245071, iteration: 281363
loss: 1.0974295139312744,grad_norm: 0.9999991575334607, iteration: 281364
loss: 1.0342592000961304,grad_norm: 0.9999999256695182, iteration: 281365
loss: 0.9985778331756592,grad_norm: 0.8210822010270676, iteration: 281366
loss: 1.043321132659912,grad_norm: 0.9581685817838498, iteration: 281367
loss: 1.1151494979858398,grad_norm: 0.9999997463285045, iteration: 281368
loss: 1.0636032819747925,grad_norm: 0.947925087294127, iteration: 281369
loss: 0.9856196641921997,grad_norm: 0.8666063445715115, iteration: 281370
loss: 0.9765346050262451,grad_norm: 0.8071281694297117, iteration: 281371
loss: 1.0158624649047852,grad_norm: 0.8526728454449258, iteration: 281372
loss: 0.9992229342460632,grad_norm: 0.8530670902235815, iteration: 281373
loss: 1.2144087553024292,grad_norm: 0.9999997740523756, iteration: 281374
loss: 0.9836673140525818,grad_norm: 0.797681028943874, iteration: 281375
loss: 1.0037763118743896,grad_norm: 0.8140216741802536, iteration: 281376
loss: 0.9875537157058716,grad_norm: 0.8459610149994433, iteration: 281377
loss: 1.0567413568496704,grad_norm: 0.9999992562534609, iteration: 281378
loss: 1.0161651372909546,grad_norm: 0.999999157635719, iteration: 281379
loss: 1.0157686471939087,grad_norm: 0.7381371109033883, iteration: 281380
loss: 1.006056547164917,grad_norm: 0.8160704802898279, iteration: 281381
loss: 0.9631015658378601,grad_norm: 0.9129694581467929, iteration: 281382
loss: 1.0026403665542603,grad_norm: 0.9063199836820656, iteration: 281383
loss: 0.97245854139328,grad_norm: 0.9999992469142917, iteration: 281384
loss: 1.091230869293213,grad_norm: 0.99999920214066, iteration: 281385
loss: 1.041054129600525,grad_norm: 0.9813935459672705, iteration: 281386
loss: 1.0127851963043213,grad_norm: 0.7237294949019683, iteration: 281387
loss: 1.0539909601211548,grad_norm: 0.9990617394608011, iteration: 281388
loss: 1.0369783639907837,grad_norm: 0.9999994192518259, iteration: 281389
loss: 0.9993435144424438,grad_norm: 0.8809798545312498, iteration: 281390
loss: 1.0413662195205688,grad_norm: 0.9999994707962864, iteration: 281391
loss: 1.0020776987075806,grad_norm: 0.86558778667562, iteration: 281392
loss: 0.9678761959075928,grad_norm: 0.7920570553133515, iteration: 281393
loss: 1.0295644998550415,grad_norm: 0.879199709664341, iteration: 281394
loss: 1.079113245010376,grad_norm: 0.9997282493520616, iteration: 281395
loss: 1.05899977684021,grad_norm: 0.8728968518201928, iteration: 281396
loss: 0.9865180253982544,grad_norm: 0.9999996023172547, iteration: 281397
loss: 1.0089082717895508,grad_norm: 0.9140275581651539, iteration: 281398
loss: 1.0445398092269897,grad_norm: 0.999999253190631, iteration: 281399
loss: 1.1377562284469604,grad_norm: 0.9999990885928232, iteration: 281400
loss: 0.9979448318481445,grad_norm: 0.9999990263850443, iteration: 281401
loss: 0.9478887915611267,grad_norm: 0.9999995865613996, iteration: 281402
loss: 0.990527331829071,grad_norm: 0.9999995151823573, iteration: 281403
loss: 0.9929819703102112,grad_norm: 0.8932562650326918, iteration: 281404
loss: 1.035937786102295,grad_norm: 0.8477543016004395, iteration: 281405
loss: 1.0231705904006958,grad_norm: 0.9671879081196425, iteration: 281406
loss: 1.0244015455245972,grad_norm: 0.8734133377840687, iteration: 281407
loss: 1.0524301528930664,grad_norm: 0.9999991752750723, iteration: 281408
loss: 1.0392392873764038,grad_norm: 0.907177690703278, iteration: 281409
loss: 1.0402309894561768,grad_norm: 0.7812513273601693, iteration: 281410
loss: 1.0446791648864746,grad_norm: 0.8354031758224231, iteration: 281411
loss: 1.0573657751083374,grad_norm: 0.9999994268038248, iteration: 281412
loss: 0.961174488067627,grad_norm: 0.9527197819148295, iteration: 281413
loss: 1.0272244215011597,grad_norm: 0.8229684106507179, iteration: 281414
loss: 0.9917929768562317,grad_norm: 0.8687845499457881, iteration: 281415
loss: 0.9892827272415161,grad_norm: 0.9909527884764494, iteration: 281416
loss: 1.0631003379821777,grad_norm: 0.9999997779169382, iteration: 281417
loss: 1.0383564233779907,grad_norm: 0.9999993016688943, iteration: 281418
loss: 1.0432831048965454,grad_norm: 0.9999992426116832, iteration: 281419
loss: 1.0298793315887451,grad_norm: 0.8186015174710207, iteration: 281420
loss: 1.0142810344696045,grad_norm: 0.9999992594313701, iteration: 281421
loss: 1.002935528755188,grad_norm: 0.9111866148584555, iteration: 281422
loss: 1.0548368692398071,grad_norm: 0.9999994565988706, iteration: 281423
loss: 1.0131065845489502,grad_norm: 0.7542916408586087, iteration: 281424
loss: 0.9921529293060303,grad_norm: 0.9999991360250835, iteration: 281425
loss: 0.954744279384613,grad_norm: 0.8601919508724316, iteration: 281426
loss: 1.0650132894515991,grad_norm: 0.9999994681575652, iteration: 281427
loss: 1.0098810195922852,grad_norm: 0.9999990617239942, iteration: 281428
loss: 0.9805735349655151,grad_norm: 0.8161433703309058, iteration: 281429
loss: 0.9763930439949036,grad_norm: 0.8894302695286602, iteration: 281430
loss: 0.9993039965629578,grad_norm: 0.999999392938553, iteration: 281431
loss: 1.025876760482788,grad_norm: 0.8741330496084253, iteration: 281432
loss: 0.9919911623001099,grad_norm: 0.8246951576258178, iteration: 281433
loss: 1.1366286277770996,grad_norm: 0.9999998132383833, iteration: 281434
loss: 1.0010391473770142,grad_norm: 0.8864641956474292, iteration: 281435
loss: 0.9850046634674072,grad_norm: 0.8600183482811407, iteration: 281436
loss: 0.9889333844184875,grad_norm: 0.9804219536519125, iteration: 281437
loss: 1.0579421520233154,grad_norm: 0.9999990752804245, iteration: 281438
loss: 0.9834738969802856,grad_norm: 0.9129272828991823, iteration: 281439
loss: 1.0343809127807617,grad_norm: 0.9246794964069555, iteration: 281440
loss: 1.0015790462493896,grad_norm: 0.8980632094075625, iteration: 281441
loss: 1.0388282537460327,grad_norm: 0.9707701057703312, iteration: 281442
loss: 0.9941514134407043,grad_norm: 0.8230541406647246, iteration: 281443
loss: 1.026121973991394,grad_norm: 0.8220644985862936, iteration: 281444
loss: 0.9691064953804016,grad_norm: 0.9999989991895541, iteration: 281445
loss: 1.0154718160629272,grad_norm: 0.9999991519060801, iteration: 281446
loss: 1.054071307182312,grad_norm: 0.9999991555198089, iteration: 281447
loss: 0.9982165098190308,grad_norm: 0.8668111475631184, iteration: 281448
loss: 1.0827126502990723,grad_norm: 0.8893807358825141, iteration: 281449
loss: 0.9879299998283386,grad_norm: 0.8440683432233156, iteration: 281450
loss: 0.9948418140411377,grad_norm: 0.8779989119750565, iteration: 281451
loss: 1.0111604928970337,grad_norm: 0.7764533491867955, iteration: 281452
loss: 1.0246069431304932,grad_norm: 0.9999991509919216, iteration: 281453
loss: 0.9977158308029175,grad_norm: 0.8281572570155333, iteration: 281454
loss: 0.9980868101119995,grad_norm: 0.7877529460422262, iteration: 281455
loss: 1.0046073198318481,grad_norm: 0.8421549950111714, iteration: 281456
loss: 1.0339934825897217,grad_norm: 0.9999991734266355, iteration: 281457
loss: 0.9766224026679993,grad_norm: 0.9759841660023697, iteration: 281458
loss: 0.9814475774765015,grad_norm: 0.9073588863173423, iteration: 281459
loss: 1.00750732421875,grad_norm: 0.8678729434824061, iteration: 281460
loss: 1.0429246425628662,grad_norm: 0.8911610312012491, iteration: 281461
loss: 1.0273363590240479,grad_norm: 0.8788513524173536, iteration: 281462
loss: 1.030042052268982,grad_norm: 0.9385908115293856, iteration: 281463
loss: 0.9800271391868591,grad_norm: 0.7591385822937792, iteration: 281464
loss: 0.9877054691314697,grad_norm: 0.9999991439688608, iteration: 281465
loss: 1.0281087160110474,grad_norm: 0.9999998259155362, iteration: 281466
loss: 1.0000399351119995,grad_norm: 0.8818591156279577, iteration: 281467
loss: 0.9831269383430481,grad_norm: 0.9999990434736713, iteration: 281468
loss: 0.9800357222557068,grad_norm: 0.8522937997988068, iteration: 281469
loss: 1.0002957582473755,grad_norm: 0.7902218408782854, iteration: 281470
loss: 0.9850687384605408,grad_norm: 0.8274084825337691, iteration: 281471
loss: 0.9591576457023621,grad_norm: 0.881996340939167, iteration: 281472
loss: 0.9971352219581604,grad_norm: 0.9047462494270497, iteration: 281473
loss: 1.0450963973999023,grad_norm: 0.9999999634635189, iteration: 281474
loss: 1.0160374641418457,grad_norm: 0.9556324924451408, iteration: 281475
loss: 1.0196056365966797,grad_norm: 0.9999991262109797, iteration: 281476
loss: 1.0695656538009644,grad_norm: 0.8892395993560597, iteration: 281477
loss: 1.000152826309204,grad_norm: 0.9999995804271562, iteration: 281478
loss: 1.010280728340149,grad_norm: 0.7254452651677333, iteration: 281479
loss: 1.0140522718429565,grad_norm: 0.7964952891288634, iteration: 281480
loss: 1.0127136707305908,grad_norm: 0.999999211405072, iteration: 281481
loss: 1.069230318069458,grad_norm: 0.9999999345264416, iteration: 281482
loss: 1.0481879711151123,grad_norm: 0.862223225769314, iteration: 281483
loss: 0.994239866733551,grad_norm: 0.8848274649793659, iteration: 281484
loss: 1.0148241519927979,grad_norm: 0.9999996509424428, iteration: 281485
loss: 0.9870449304580688,grad_norm: 0.8579895966093576, iteration: 281486
loss: 0.9687218070030212,grad_norm: 0.8635371683999606, iteration: 281487
loss: 0.9901415109634399,grad_norm: 0.894661789411844, iteration: 281488
loss: 0.9674078822135925,grad_norm: 0.7217827879576332, iteration: 281489
loss: 1.01906156539917,grad_norm: 0.9999990244055705, iteration: 281490
loss: 1.0650662183761597,grad_norm: 0.9999992239526335, iteration: 281491
loss: 1.0267834663391113,grad_norm: 0.8332859707682871, iteration: 281492
loss: 0.989780068397522,grad_norm: 0.8043736313939959, iteration: 281493
loss: 0.9934855699539185,grad_norm: 0.894535888395232, iteration: 281494
loss: 1.0247249603271484,grad_norm: 0.8393100628713708, iteration: 281495
loss: 0.9869702458381653,grad_norm: 0.6909692421047692, iteration: 281496
loss: 1.000496506690979,grad_norm: 0.9680893270262569, iteration: 281497
loss: 0.9778404831886292,grad_norm: 0.901998299465475, iteration: 281498
loss: 0.987167239189148,grad_norm: 0.9561317429944253, iteration: 281499
loss: 0.9953593015670776,grad_norm: 0.9343363615338022, iteration: 281500
loss: 1.003612756729126,grad_norm: 0.8452903116027257, iteration: 281501
loss: 1.054801106452942,grad_norm: 0.9999992452382807, iteration: 281502
loss: 1.0003238916397095,grad_norm: 0.9960123735151821, iteration: 281503
loss: 1.0219508409500122,grad_norm: 0.9986704078011498, iteration: 281504
loss: 0.9959587454795837,grad_norm: 0.9999991464274853, iteration: 281505
loss: 0.9972111582756042,grad_norm: 0.9999989530956617, iteration: 281506
loss: 1.0121995210647583,grad_norm: 0.8811931954818224, iteration: 281507
loss: 1.011863112449646,grad_norm: 0.830826893099021, iteration: 281508
loss: 1.0121006965637207,grad_norm: 0.9652650673083508, iteration: 281509
loss: 0.9883951544761658,grad_norm: 0.8219462520442334, iteration: 281510
loss: 0.9855980277061462,grad_norm: 0.8455012984948652, iteration: 281511
loss: 1.0620862245559692,grad_norm: 0.8569621849933003, iteration: 281512
loss: 1.039581060409546,grad_norm: 0.8742857839042021, iteration: 281513
loss: 0.9876661896705627,grad_norm: 0.8524157151722664, iteration: 281514
loss: 1.028972864151001,grad_norm: 0.9107125343511788, iteration: 281515
loss: 0.9994590282440186,grad_norm: 0.9377152478443886, iteration: 281516
loss: 0.9826875329017639,grad_norm: 0.8078299623343991, iteration: 281517
loss: 0.969758152961731,grad_norm: 0.9261636708579649, iteration: 281518
loss: 0.9703757166862488,grad_norm: 0.849198449700259, iteration: 281519
loss: 1.0101269483566284,grad_norm: 0.9167742540136355, iteration: 281520
loss: 0.9723263382911682,grad_norm: 0.8333806511830254, iteration: 281521
loss: 1.0263503789901733,grad_norm: 0.8968145250428169, iteration: 281522
loss: 1.153826117515564,grad_norm: 0.9999997977438185, iteration: 281523
loss: 1.0015820264816284,grad_norm: 0.9834612668731989, iteration: 281524
loss: 0.9646254777908325,grad_norm: 0.9999992525148679, iteration: 281525
loss: 0.9609895348548889,grad_norm: 0.9468796413944027, iteration: 281526
loss: 0.9821182489395142,grad_norm: 0.9305506083178611, iteration: 281527
loss: 1.0226649045944214,grad_norm: 0.8718573509912481, iteration: 281528
loss: 0.983184814453125,grad_norm: 0.7733213450224026, iteration: 281529
loss: 1.0188751220703125,grad_norm: 0.9672861033922499, iteration: 281530
loss: 1.0530208349227905,grad_norm: 1.0000000228172725, iteration: 281531
loss: 1.0175014734268188,grad_norm: 0.9999991419437017, iteration: 281532
loss: 1.137183427810669,grad_norm: 0.9999997241627474, iteration: 281533
loss: 1.002921223640442,grad_norm: 0.970830579495365, iteration: 281534
loss: 1.021957516670227,grad_norm: 0.9313103749944628, iteration: 281535
loss: 0.9818297624588013,grad_norm: 0.7465886558277895, iteration: 281536
loss: 1.051403284072876,grad_norm: 0.9999990422046688, iteration: 281537
loss: 1.0125824213027954,grad_norm: 0.8053677120616708, iteration: 281538
loss: 0.9966704249382019,grad_norm: 0.9527320269839261, iteration: 281539
loss: 0.996094822883606,grad_norm: 0.733353660647629, iteration: 281540
loss: 1.0017716884613037,grad_norm: 0.8400776967125788, iteration: 281541
loss: 0.9669179320335388,grad_norm: 0.9672034133004562, iteration: 281542
loss: 1.0089648962020874,grad_norm: 0.7736203611704603, iteration: 281543
loss: 1.000205397605896,grad_norm: 0.9016280806612839, iteration: 281544
loss: 1.0093843936920166,grad_norm: 0.8774763565214687, iteration: 281545
loss: 1.016647219657898,grad_norm: 0.9612942230264849, iteration: 281546
loss: 0.9998331665992737,grad_norm: 0.9999990447138636, iteration: 281547
loss: 0.9998564124107361,grad_norm: 0.9999991585107977, iteration: 281548
loss: 1.0387932062149048,grad_norm: 0.9999997764652623, iteration: 281549
loss: 1.0408143997192383,grad_norm: 0.7228336261610343, iteration: 281550
loss: 1.029537320137024,grad_norm: 0.9028624301978926, iteration: 281551
loss: 0.9679320454597473,grad_norm: 0.8657037272828815, iteration: 281552
loss: 1.0139769315719604,grad_norm: 0.8428998242920355, iteration: 281553
loss: 1.0027920007705688,grad_norm: 0.8155711809755735, iteration: 281554
loss: 0.9991034269332886,grad_norm: 0.9611101032303394, iteration: 281555
loss: 1.009902000427246,grad_norm: 0.8720646870815257, iteration: 281556
loss: 1.022530198097229,grad_norm: 0.8501033951722416, iteration: 281557
loss: 1.0521758794784546,grad_norm: 0.9999990741696678, iteration: 281558
loss: 1.067491888999939,grad_norm: 0.999999417270651, iteration: 281559
loss: 1.0374480485916138,grad_norm: 0.9999992873718475, iteration: 281560
loss: 1.001508355140686,grad_norm: 0.9999991949269575, iteration: 281561
loss: 0.9829727411270142,grad_norm: 0.8613220729777471, iteration: 281562
loss: 0.9944773316383362,grad_norm: 0.7325141521667811, iteration: 281563
loss: 0.9964335560798645,grad_norm: 0.7584678892066377, iteration: 281564
loss: 1.0365707874298096,grad_norm: 0.9999991598610588, iteration: 281565
loss: 0.9759843945503235,grad_norm: 0.936087348737111, iteration: 281566
loss: 1.0258477926254272,grad_norm: 0.9407375813087577, iteration: 281567
loss: 1.112897515296936,grad_norm: 0.8871422678449282, iteration: 281568
loss: 1.0584571361541748,grad_norm: 0.9999993996355953, iteration: 281569
loss: 1.0017738342285156,grad_norm: 0.7532587167817159, iteration: 281570
loss: 1.0101555585861206,grad_norm: 0.8587502491694281, iteration: 281571
loss: 0.974908709526062,grad_norm: 0.9999990243732401, iteration: 281572
loss: 0.9997425079345703,grad_norm: 0.7639869296247868, iteration: 281573
loss: 1.0214450359344482,grad_norm: 0.8522303366395204, iteration: 281574
loss: 0.9896060824394226,grad_norm: 0.9179780506887986, iteration: 281575
loss: 1.0104036331176758,grad_norm: 0.9999990090867287, iteration: 281576
loss: 0.9820399284362793,grad_norm: 0.9999991211504992, iteration: 281577
loss: 0.9870857000350952,grad_norm: 0.7691741619797106, iteration: 281578
loss: 0.9650021195411682,grad_norm: 0.9593202151706606, iteration: 281579
loss: 1.0262343883514404,grad_norm: 0.99999927027729, iteration: 281580
loss: 1.0043541193008423,grad_norm: 0.9999990564200721, iteration: 281581
loss: 0.9701673984527588,grad_norm: 0.877339325437154, iteration: 281582
loss: 1.0068970918655396,grad_norm: 0.9879906789155821, iteration: 281583
loss: 1.0786134004592896,grad_norm: 0.9999994767298699, iteration: 281584
loss: 0.975972592830658,grad_norm: 0.8049732356732376, iteration: 281585
loss: 1.0878353118896484,grad_norm: 0.9373771657926938, iteration: 281586
loss: 1.054678201675415,grad_norm: 0.9999996725154324, iteration: 281587
loss: 0.9922589063644409,grad_norm: 0.9999991208446489, iteration: 281588
loss: 0.9928807616233826,grad_norm: 0.7910117209295235, iteration: 281589
loss: 1.0043386220932007,grad_norm: 0.8426984857563854, iteration: 281590
loss: 0.9983699321746826,grad_norm: 0.7930308688962141, iteration: 281591
loss: 0.9773969650268555,grad_norm: 0.892718889182041, iteration: 281592
loss: 1.0338410139083862,grad_norm: 0.8532585941516309, iteration: 281593
loss: 1.032936692237854,grad_norm: 0.9999991282347042, iteration: 281594
loss: 0.9771822690963745,grad_norm: 0.8182230897305863, iteration: 281595
loss: 1.0090303421020508,grad_norm: 0.9369462518781219, iteration: 281596
loss: 1.021889567375183,grad_norm: 0.7739527908579795, iteration: 281597
loss: 1.0161865949630737,grad_norm: 0.9999991642559912, iteration: 281598
loss: 1.044087529182434,grad_norm: 0.9999991533957033, iteration: 281599
loss: 1.013439416885376,grad_norm: 0.9999992226475091, iteration: 281600
loss: 0.9794853329658508,grad_norm: 0.8299674015208062, iteration: 281601
loss: 0.9998743534088135,grad_norm: 0.8932439110026182, iteration: 281602
loss: 1.0981920957565308,grad_norm: 0.9999991355131773, iteration: 281603
loss: 1.0088976621627808,grad_norm: 0.7461229320293055, iteration: 281604
loss: 0.99439537525177,grad_norm: 0.8229505103144433, iteration: 281605
loss: 1.0223051309585571,grad_norm: 0.7997291816798785, iteration: 281606
loss: 1.0404211282730103,grad_norm: 0.9626169736965429, iteration: 281607
loss: 0.9913419485092163,grad_norm: 0.799914197017078, iteration: 281608
loss: 1.0295287370681763,grad_norm: 0.9999992856807279, iteration: 281609
loss: 1.0024909973144531,grad_norm: 0.9009759280889518, iteration: 281610
loss: 0.9533342719078064,grad_norm: 0.8629479068875774, iteration: 281611
loss: 0.9659663438796997,grad_norm: 0.9887748318455406, iteration: 281612
loss: 1.0712867975234985,grad_norm: 0.7854070036736578, iteration: 281613
loss: 1.004410743713379,grad_norm: 0.7950835535522456, iteration: 281614
loss: 0.9832709431648254,grad_norm: 0.9146796558290443, iteration: 281615
loss: 1.0582107305526733,grad_norm: 0.8799903645045446, iteration: 281616
loss: 1.0354046821594238,grad_norm: 0.9999990938493027, iteration: 281617
loss: 1.0119673013687134,grad_norm: 0.9999991732954534, iteration: 281618
loss: 1.2077776193618774,grad_norm: 0.9999997157877689, iteration: 281619
loss: 0.9910768270492554,grad_norm: 0.874625158474506, iteration: 281620
loss: 0.978919506072998,grad_norm: 0.819241448296251, iteration: 281621
loss: 1.0297205448150635,grad_norm: 0.8203489656188472, iteration: 281622
loss: 1.0453813076019287,grad_norm: 0.9039664837616945, iteration: 281623
loss: 0.9904648065567017,grad_norm: 0.8582465966528469, iteration: 281624
loss: 1.0346705913543701,grad_norm: 0.99999979821895, iteration: 281625
loss: 0.9830196499824524,grad_norm: 0.9999992017818446, iteration: 281626
loss: 0.9877920150756836,grad_norm: 0.8403866660150703, iteration: 281627
loss: 1.000184416770935,grad_norm: 0.9999989127806841, iteration: 281628
loss: 0.9979447722434998,grad_norm: 0.8211582608003273, iteration: 281629
loss: 1.0119086503982544,grad_norm: 0.8709655156514441, iteration: 281630
loss: 0.9671362042427063,grad_norm: 0.8620104679687973, iteration: 281631
loss: 0.9805148243904114,grad_norm: 0.9494163869458169, iteration: 281632
loss: 0.9997029304504395,grad_norm: 0.8052500261050941, iteration: 281633
loss: 1.0161799192428589,grad_norm: 0.9814947361391729, iteration: 281634
loss: 1.085240364074707,grad_norm: 0.9999991413233623, iteration: 281635
loss: 1.0511136054992676,grad_norm: 0.9999994820248718, iteration: 281636
loss: 0.9932503700256348,grad_norm: 0.9999994971324835, iteration: 281637
loss: 1.0686951875686646,grad_norm: 0.9999994414569723, iteration: 281638
loss: 1.0202535390853882,grad_norm: 0.9891958528337075, iteration: 281639
loss: 1.0387893915176392,grad_norm: 0.725424684429077, iteration: 281640
loss: 1.004478931427002,grad_norm: 0.8298192950017238, iteration: 281641
loss: 1.057468056678772,grad_norm: 0.9999991676593426, iteration: 281642
loss: 1.0450921058654785,grad_norm: 0.8739649764480443, iteration: 281643
loss: 0.9900080561637878,grad_norm: 0.9999997481509236, iteration: 281644
loss: 1.1219604015350342,grad_norm: 0.9999993980350353, iteration: 281645
loss: 0.9784430265426636,grad_norm: 0.6922619203119371, iteration: 281646
loss: 1.007912278175354,grad_norm: 0.99999936304027, iteration: 281647
loss: 0.9729108810424805,grad_norm: 0.9697996089191043, iteration: 281648
loss: 0.9935349822044373,grad_norm: 0.8806672673424897, iteration: 281649
loss: 1.0274466276168823,grad_norm: 0.999999364359293, iteration: 281650
loss: 0.95539790391922,grad_norm: 0.9999989854367244, iteration: 281651
loss: 1.0224082469940186,grad_norm: 0.851294547864396, iteration: 281652
loss: 0.9887890815734863,grad_norm: 0.9112471243721335, iteration: 281653
loss: 1.0403286218643188,grad_norm: 0.9999997133854989, iteration: 281654
loss: 1.0255192518234253,grad_norm: 0.9999993493754372, iteration: 281655
loss: 1.00483238697052,grad_norm: 0.8894409192478951, iteration: 281656
loss: 1.047143578529358,grad_norm: 0.8700954875851582, iteration: 281657
loss: 1.038735032081604,grad_norm: 0.9039359814688505, iteration: 281658
loss: 0.9907669425010681,grad_norm: 0.9999990006468126, iteration: 281659
loss: 1.0156311988830566,grad_norm: 0.8680291855844126, iteration: 281660
loss: 0.9909315705299377,grad_norm: 0.7563143688346066, iteration: 281661
loss: 0.9870291352272034,grad_norm: 0.9199666281942069, iteration: 281662
loss: 1.0333538055419922,grad_norm: 0.8040235981470687, iteration: 281663
loss: 1.0918642282485962,grad_norm: 0.9999996919738675, iteration: 281664
loss: 1.036948561668396,grad_norm: 0.9999990313816747, iteration: 281665
loss: 0.9845033288002014,grad_norm: 0.8932991199281552, iteration: 281666
loss: 0.9995325207710266,grad_norm: 0.7612119876252931, iteration: 281667
loss: 1.10819411277771,grad_norm: 0.999999025872921, iteration: 281668
loss: 1.0187644958496094,grad_norm: 0.8614339155298263, iteration: 281669
loss: 1.3110531568527222,grad_norm: 0.9999994974575731, iteration: 281670
loss: 1.067510962486267,grad_norm: 0.9999992601716032, iteration: 281671
loss: 1.068626880645752,grad_norm: 0.9999990046379158, iteration: 281672
loss: 1.0161620378494263,grad_norm: 0.8546851546610993, iteration: 281673
loss: 0.9800419807434082,grad_norm: 0.8314057497204127, iteration: 281674
loss: 1.0606998205184937,grad_norm: 0.9999992384541689, iteration: 281675
loss: 1.0160951614379883,grad_norm: 0.8572139517551951, iteration: 281676
loss: 0.9634718894958496,grad_norm: 0.948496345037729, iteration: 281677
loss: 1.0152783393859863,grad_norm: 0.99999926805513, iteration: 281678
loss: 1.06472909450531,grad_norm: 0.7537111946612476, iteration: 281679
loss: 1.0352493524551392,grad_norm: 0.9999991449136219, iteration: 281680
loss: 1.0162314176559448,grad_norm: 0.9999991424412892, iteration: 281681
loss: 1.0569137334823608,grad_norm: 0.9525469708153265, iteration: 281682
loss: 1.0071581602096558,grad_norm: 0.9506624697824541, iteration: 281683
loss: 1.0294833183288574,grad_norm: 0.9011448154688733, iteration: 281684
loss: 1.0925776958465576,grad_norm: 0.9999996416871414, iteration: 281685
loss: 0.9878939390182495,grad_norm: 0.85298035619311, iteration: 281686
loss: 0.9803029894828796,grad_norm: 0.8105427606366548, iteration: 281687
loss: 0.9887880086898804,grad_norm: 0.9233539125121001, iteration: 281688
loss: 0.9782422780990601,grad_norm: 0.8117704248351234, iteration: 281689
loss: 0.9684615731239319,grad_norm: 0.8892879945067921, iteration: 281690
loss: 1.0766799449920654,grad_norm: 0.9999995972334556, iteration: 281691
loss: 1.0461539030075073,grad_norm: 0.9227559557171121, iteration: 281692
loss: 1.0082471370697021,grad_norm: 0.9582937876887907, iteration: 281693
loss: 0.9490374326705933,grad_norm: 0.8746756323091884, iteration: 281694
loss: 1.009006142616272,grad_norm: 0.844203014040528, iteration: 281695
loss: 1.0326646566390991,grad_norm: 0.9999998423674045, iteration: 281696
loss: 1.044114589691162,grad_norm: 0.9029555719664607, iteration: 281697
loss: 0.9831386804580688,grad_norm: 0.9186982903640005, iteration: 281698
loss: 0.9951371550559998,grad_norm: 0.889145002052426, iteration: 281699
loss: 1.042463779449463,grad_norm: 0.8065943371747618, iteration: 281700
loss: 0.979805052280426,grad_norm: 0.8099351930289401, iteration: 281701
loss: 0.9987871646881104,grad_norm: 0.9999996747441449, iteration: 281702
loss: 0.9980526566505432,grad_norm: 0.9140605313976775, iteration: 281703
loss: 0.9925153255462646,grad_norm: 0.8204035575910468, iteration: 281704
loss: 0.9654254913330078,grad_norm: 0.806789021301266, iteration: 281705
loss: 0.9703546762466431,grad_norm: 0.999999149428682, iteration: 281706
loss: 1.0410633087158203,grad_norm: 0.7402938644690529, iteration: 281707
loss: 1.0362706184387207,grad_norm: 0.9004641962477987, iteration: 281708
loss: 1.0790108442306519,grad_norm: 0.9999998512560118, iteration: 281709
loss: 0.9946823120117188,grad_norm: 0.9179656630410479, iteration: 281710
loss: 1.0050244331359863,grad_norm: 0.99999972140009, iteration: 281711
loss: 1.014088749885559,grad_norm: 0.8515080838267933, iteration: 281712
loss: 1.0520495176315308,grad_norm: 0.8817244440323387, iteration: 281713
loss: 1.029457449913025,grad_norm: 0.8436002636332283, iteration: 281714
loss: 0.9831567406654358,grad_norm: 0.8178752881763496, iteration: 281715
loss: 1.0244377851486206,grad_norm: 0.999999361591726, iteration: 281716
loss: 1.009595513343811,grad_norm: 0.8346230900773052, iteration: 281717
loss: 1.125076413154602,grad_norm: 0.9999996867782688, iteration: 281718
loss: 1.098237156867981,grad_norm: 0.9999996940424187, iteration: 281719
loss: 1.0141369104385376,grad_norm: 0.9321855547906226, iteration: 281720
loss: 0.9651906490325928,grad_norm: 0.7193204725598529, iteration: 281721
loss: 1.1669946908950806,grad_norm: 0.9999999089895703, iteration: 281722
loss: 1.0002254247665405,grad_norm: 0.9952807627958447, iteration: 281723
loss: 1.0020803213119507,grad_norm: 0.8354458043561498, iteration: 281724
loss: 1.1461299657821655,grad_norm: 0.9999991495888529, iteration: 281725
loss: 1.0101066827774048,grad_norm: 0.9449725337048549, iteration: 281726
loss: 1.149965524673462,grad_norm: 0.9999992171174853, iteration: 281727
loss: 1.214079737663269,grad_norm: 0.9999996073244939, iteration: 281728
loss: 1.075281023979187,grad_norm: 0.9999991239759709, iteration: 281729
loss: 1.011345624923706,grad_norm: 0.9999991880382946, iteration: 281730
loss: 1.195096731185913,grad_norm: 0.999999596127063, iteration: 281731
loss: 1.0615354776382446,grad_norm: 0.9999991311701867, iteration: 281732
loss: 1.0046310424804688,grad_norm: 0.8747454373996841, iteration: 281733
loss: 1.0720595121383667,grad_norm: 0.9999993050945423, iteration: 281734
loss: 0.9527974128723145,grad_norm: 0.9295767709428532, iteration: 281735
loss: 0.9830611944198608,grad_norm: 0.8793093244261636, iteration: 281736
loss: 0.9838753342628479,grad_norm: 0.8448609204221896, iteration: 281737
loss: 1.005050539970398,grad_norm: 0.9999993746711632, iteration: 281738
loss: 0.959080696105957,grad_norm: 0.9450917391882206, iteration: 281739
loss: 1.062014102935791,grad_norm: 0.9999991183419271, iteration: 281740
loss: 1.1337931156158447,grad_norm: 0.9999990639643989, iteration: 281741
loss: 1.0175079107284546,grad_norm: 0.7265014849201213, iteration: 281742
loss: 1.0287991762161255,grad_norm: 0.9504014621546797, iteration: 281743
loss: 0.9879612922668457,grad_norm: 0.9999993004030008, iteration: 281744
loss: 1.0385345220565796,grad_norm: 0.8895464129386388, iteration: 281745
loss: 1.0249801874160767,grad_norm: 0.9999992161745124, iteration: 281746
loss: 0.970334529876709,grad_norm: 0.8613665260285913, iteration: 281747
loss: 1.0130738019943237,grad_norm: 0.9874314276263576, iteration: 281748
loss: 1.0262846946716309,grad_norm: 0.9010601911493594, iteration: 281749
loss: 1.0157498121261597,grad_norm: 0.9999992849529884, iteration: 281750
loss: 1.0230932235717773,grad_norm: 0.9999993334531428, iteration: 281751
loss: 1.1383966207504272,grad_norm: 0.9999992423519093, iteration: 281752
loss: 1.0572363138198853,grad_norm: 0.9999996058716383, iteration: 281753
loss: 0.9873157739639282,grad_norm: 0.9033372913531265, iteration: 281754
loss: 0.9983198046684265,grad_norm: 0.999999082504816, iteration: 281755
loss: 1.0087922811508179,grad_norm: 0.9999992798852636, iteration: 281756
loss: 1.007426142692566,grad_norm: 0.7808535612970599, iteration: 281757
loss: 1.0329887866973877,grad_norm: 0.7966995047081175, iteration: 281758
loss: 1.0023473501205444,grad_norm: 0.9141066195417241, iteration: 281759
loss: 1.0051262378692627,grad_norm: 0.7969841547431514, iteration: 281760
loss: 0.9929496049880981,grad_norm: 0.9360684991998576, iteration: 281761
loss: 1.0341767072677612,grad_norm: 0.9067734859848515, iteration: 281762
loss: 0.9679751992225647,grad_norm: 0.8952120294593447, iteration: 281763
loss: 1.0497280359268188,grad_norm: 0.9999990569573088, iteration: 281764
loss: 1.0183496475219727,grad_norm: 0.9999994700258753, iteration: 281765
loss: 1.074930191040039,grad_norm: 0.8127559409806965, iteration: 281766
loss: 1.0019891262054443,grad_norm: 0.9999996745230014, iteration: 281767
loss: 1.0110936164855957,grad_norm: 0.930510345086792, iteration: 281768
loss: 0.9983101487159729,grad_norm: 0.9295401144421405, iteration: 281769
loss: 0.9968116879463196,grad_norm: 0.877891841848027, iteration: 281770
loss: 1.013468623161316,grad_norm: 0.9999991787252359, iteration: 281771
loss: 1.0304123163223267,grad_norm: 0.8931037591163005, iteration: 281772
loss: 1.006461501121521,grad_norm: 0.9023970712472393, iteration: 281773
loss: 0.9795873761177063,grad_norm: 0.9999990987003646, iteration: 281774
loss: 1.022349238395691,grad_norm: 0.9999989629241334, iteration: 281775
loss: 0.9856306910514832,grad_norm: 0.9365139752818568, iteration: 281776
loss: 1.0714834928512573,grad_norm: 0.9999992097306437, iteration: 281777
loss: 0.9877686500549316,grad_norm: 0.981116761121187, iteration: 281778
loss: 1.0339345932006836,grad_norm: 0.999999818799452, iteration: 281779
loss: 0.9660835862159729,grad_norm: 0.8584290327839338, iteration: 281780
loss: 1.0380398035049438,grad_norm: 0.9999991397121675, iteration: 281781
loss: 1.0831332206726074,grad_norm: 0.9692950362500387, iteration: 281782
loss: 0.996020495891571,grad_norm: 0.925939958902536, iteration: 281783
loss: 1.0538394451141357,grad_norm: 0.9999991359134707, iteration: 281784
loss: 0.9836894869804382,grad_norm: 0.9358895262141407, iteration: 281785
loss: 0.9591047763824463,grad_norm: 0.7817351357293874, iteration: 281786
loss: 1.0483424663543701,grad_norm: 0.8861108446332796, iteration: 281787
loss: 0.9949724674224854,grad_norm: 0.9486963655301894, iteration: 281788
loss: 0.969323992729187,grad_norm: 0.999999276228709, iteration: 281789
loss: 0.9922429919242859,grad_norm: 0.9229552611104809, iteration: 281790
loss: 0.9955501556396484,grad_norm: 0.9999995201485219, iteration: 281791
loss: 0.9822153449058533,grad_norm: 0.9590422589278769, iteration: 281792
loss: 1.0400640964508057,grad_norm: 0.9212270622837453, iteration: 281793
loss: 1.0242689847946167,grad_norm: 0.9216393698643057, iteration: 281794
loss: 0.975197970867157,grad_norm: 0.9981162644860613, iteration: 281795
loss: 1.0426404476165771,grad_norm: 0.9999991453625421, iteration: 281796
loss: 1.0047783851623535,grad_norm: 0.9394101719966315, iteration: 281797
loss: 1.0461372137069702,grad_norm: 0.9863011519214504, iteration: 281798
loss: 0.9492731690406799,grad_norm: 0.9999990645212622, iteration: 281799
loss: 1.0275273323059082,grad_norm: 0.7697132090527167, iteration: 281800
loss: 1.0398486852645874,grad_norm: 0.8853810777359249, iteration: 281801
loss: 1.0217180252075195,grad_norm: 0.8459330575096253, iteration: 281802
loss: 1.0074836015701294,grad_norm: 0.8824613192627213, iteration: 281803
loss: 1.0429977178573608,grad_norm: 0.9999992699152932, iteration: 281804
loss: 0.9794407486915588,grad_norm: 0.7098152488826308, iteration: 281805
loss: 0.9862228035926819,grad_norm: 0.9999991676267197, iteration: 281806
loss: 1.011300802230835,grad_norm: 0.870626497136285, iteration: 281807
loss: 1.0386887788772583,grad_norm: 0.9999991984627657, iteration: 281808
loss: 0.969935953617096,grad_norm: 0.7874342473738132, iteration: 281809
loss: 1.0055867433547974,grad_norm: 0.7722092603687775, iteration: 281810
loss: 0.9947716593742371,grad_norm: 0.88828054506341, iteration: 281811
loss: 1.0045909881591797,grad_norm: 0.8987350756981997, iteration: 281812
loss: 1.218958854675293,grad_norm: 0.9999998625514696, iteration: 281813
loss: 1.0477912425994873,grad_norm: 0.9999991187006759, iteration: 281814
loss: 0.9812301993370056,grad_norm: 0.7559868979153436, iteration: 281815
loss: 1.0050411224365234,grad_norm: 0.9558244991882134, iteration: 281816
loss: 1.088726282119751,grad_norm: 0.9999995496491885, iteration: 281817
loss: 1.0346522331237793,grad_norm: 0.9815142182464811, iteration: 281818
loss: 1.0155402421951294,grad_norm: 0.8210340202908198, iteration: 281819
loss: 1.0126579999923706,grad_norm: 0.9084720167540484, iteration: 281820
loss: 1.0201561450958252,grad_norm: 0.9893231227029086, iteration: 281821
loss: 1.0429861545562744,grad_norm: 0.9999991625497682, iteration: 281822
loss: 0.9794682860374451,grad_norm: 0.8989676256278838, iteration: 281823
loss: 1.062829613685608,grad_norm: 0.8540890968377617, iteration: 281824
loss: 1.0117504596710205,grad_norm: 0.9166918202962024, iteration: 281825
loss: 1.051143765449524,grad_norm: 0.9466179355197198, iteration: 281826
loss: 1.0501763820648193,grad_norm: 0.9999989718575757, iteration: 281827
loss: 1.0619248151779175,grad_norm: 0.999999377132911, iteration: 281828
loss: 0.9919911623001099,grad_norm: 0.9925137661627543, iteration: 281829
loss: 1.004880428314209,grad_norm: 0.9131469001001004, iteration: 281830
loss: 0.9942874908447266,grad_norm: 0.8675760036422079, iteration: 281831
loss: 1.0604734420776367,grad_norm: 0.9999993317979985, iteration: 281832
loss: 0.9959118962287903,grad_norm: 0.9999991180753677, iteration: 281833
loss: 1.0102806091308594,grad_norm: 0.8950445275950442, iteration: 281834
loss: 0.9532569050788879,grad_norm: 0.8910618813892826, iteration: 281835
loss: 0.9863373637199402,grad_norm: 0.996664067037254, iteration: 281836
loss: 1.0573039054870605,grad_norm: 0.9858064458956737, iteration: 281837
loss: 1.0163530111312866,grad_norm: 0.9999990576295865, iteration: 281838
loss: 0.9629828929901123,grad_norm: 0.8234533942903818, iteration: 281839
loss: 1.0517257452011108,grad_norm: 0.999999152126638, iteration: 281840
loss: 1.0866490602493286,grad_norm: 0.8597331659681935, iteration: 281841
loss: 0.9927921891212463,grad_norm: 0.9007413402363448, iteration: 281842
loss: 1.091078519821167,grad_norm: 0.9999999161528889, iteration: 281843
loss: 1.0186699628829956,grad_norm: 0.9999998008362971, iteration: 281844
loss: 0.963320791721344,grad_norm: 0.8431372125723431, iteration: 281845
loss: 0.9643627405166626,grad_norm: 0.9999993705726192, iteration: 281846
loss: 1.0015511512756348,grad_norm: 0.7207270088819172, iteration: 281847
loss: 1.0030461549758911,grad_norm: 0.8481052685098369, iteration: 281848
loss: 1.033462405204773,grad_norm: 0.8987824149524866, iteration: 281849
loss: 1.0482420921325684,grad_norm: 0.9999993610751828, iteration: 281850
loss: 0.9965459108352661,grad_norm: 0.99809291878393, iteration: 281851
loss: 1.0787508487701416,grad_norm: 0.8742020022457478, iteration: 281852
loss: 1.01377534866333,grad_norm: 0.9999993526698611, iteration: 281853
loss: 1.054009199142456,grad_norm: 0.999999365647825, iteration: 281854
loss: 1.0226386785507202,grad_norm: 0.8214564252489001, iteration: 281855
loss: 1.0450688600540161,grad_norm: 0.9999991301639025, iteration: 281856
loss: 1.032689094543457,grad_norm: 0.8294942592398008, iteration: 281857
loss: 1.024023175239563,grad_norm: 0.8369391997292641, iteration: 281858
loss: 0.9678348302841187,grad_norm: 0.9185541442983368, iteration: 281859
loss: 0.9781381487846375,grad_norm: 0.8534077248581764, iteration: 281860
loss: 1.0255417823791504,grad_norm: 0.9999993875420355, iteration: 281861
loss: 0.9705291390419006,grad_norm: 0.896943815238358, iteration: 281862
loss: 0.9936556816101074,grad_norm: 0.9999990890706363, iteration: 281863
loss: 0.9947282671928406,grad_norm: 0.7810607451120765, iteration: 281864
loss: 0.9861369132995605,grad_norm: 0.7591901850721948, iteration: 281865
loss: 1.0544971227645874,grad_norm: 0.9485659755258224, iteration: 281866
loss: 1.0264936685562134,grad_norm: 0.803339283918528, iteration: 281867
loss: 1.0133613348007202,grad_norm: 0.8966114363263057, iteration: 281868
loss: 0.9892251491546631,grad_norm: 0.936053678370493, iteration: 281869
loss: 0.995959997177124,grad_norm: 0.9806928068758854, iteration: 281870
loss: 1.0243674516677856,grad_norm: 0.9273296259504503, iteration: 281871
loss: 1.0011757612228394,grad_norm: 0.7991394141467503, iteration: 281872
loss: 0.996564507484436,grad_norm: 0.963549455510838, iteration: 281873
loss: 0.9666779041290283,grad_norm: 0.945120718508729, iteration: 281874
loss: 0.9978150725364685,grad_norm: 0.758226037482822, iteration: 281875
loss: 1.0285488367080688,grad_norm: 0.9999990328842735, iteration: 281876
loss: 1.0424773693084717,grad_norm: 0.8371716745517658, iteration: 281877
loss: 0.9967454671859741,grad_norm: 0.8411321378220198, iteration: 281878
loss: 1.028368592262268,grad_norm: 0.8680496989754272, iteration: 281879
loss: 1.0018049478530884,grad_norm: 0.9701090925371436, iteration: 281880
loss: 1.030036211013794,grad_norm: 0.9999990813062813, iteration: 281881
loss: 1.0207271575927734,grad_norm: 0.7989589313248722, iteration: 281882
loss: 0.9852032661437988,grad_norm: 0.9489467734084063, iteration: 281883
loss: 1.0350322723388672,grad_norm: 0.9731986056837708, iteration: 281884
loss: 1.0127179622650146,grad_norm: 0.9547095444903826, iteration: 281885
loss: 1.017827033996582,grad_norm: 0.799600008743759, iteration: 281886
loss: 0.9755986332893372,grad_norm: 0.9580368109346841, iteration: 281887
loss: 0.9960635900497437,grad_norm: 0.7754351145451748, iteration: 281888
loss: 0.9998667240142822,grad_norm: 0.8836446013036791, iteration: 281889
loss: 0.9912847876548767,grad_norm: 0.9999992864813352, iteration: 281890
loss: 1.025675654411316,grad_norm: 0.8899033281621443, iteration: 281891
loss: 0.9944263696670532,grad_norm: 0.8296319285827392, iteration: 281892
loss: 0.9761718511581421,grad_norm: 0.8065264721077351, iteration: 281893
loss: 0.9966185688972473,grad_norm: 0.8461913389404913, iteration: 281894
loss: 1.0350556373596191,grad_norm: 0.8273584725984633, iteration: 281895
loss: 1.006470799446106,grad_norm: 0.8421096016474479, iteration: 281896
loss: 1.006845474243164,grad_norm: 0.9378322511656446, iteration: 281897
loss: 1.0074013471603394,grad_norm: 0.9999992006857338, iteration: 281898
loss: 0.9979973435401917,grad_norm: 0.8526824334055207, iteration: 281899
loss: 1.0118937492370605,grad_norm: 0.8998412753067115, iteration: 281900
loss: 1.1110243797302246,grad_norm: 0.9999998878490719, iteration: 281901
loss: 0.9724923372268677,grad_norm: 0.9999990972362631, iteration: 281902
loss: 1.000267505645752,grad_norm: 0.7866595804351677, iteration: 281903
loss: 1.0143539905548096,grad_norm: 0.8173881766187441, iteration: 281904
loss: 1.0754154920578003,grad_norm: 0.7522561252795988, iteration: 281905
loss: 1.0134438276290894,grad_norm: 0.8055047288273175, iteration: 281906
loss: 0.9841862916946411,grad_norm: 0.8458585706543722, iteration: 281907
loss: 1.0132447481155396,grad_norm: 0.9999993516113832, iteration: 281908
loss: 1.016038179397583,grad_norm: 0.8537907148362142, iteration: 281909
loss: 1.0158714056015015,grad_norm: 0.9448084638784533, iteration: 281910
loss: 0.9690196514129639,grad_norm: 0.9006153525518121, iteration: 281911
loss: 0.9813125133514404,grad_norm: 0.7375682321642296, iteration: 281912
loss: 1.0168572664260864,grad_norm: 0.9610515017961982, iteration: 281913
loss: 0.9748710989952087,grad_norm: 0.8610298425110705, iteration: 281914
loss: 0.9580639600753784,grad_norm: 0.9200636923501729, iteration: 281915
loss: 0.9785369634628296,grad_norm: 0.9646840067941336, iteration: 281916
loss: 0.9881552457809448,grad_norm: 0.8916869454958238, iteration: 281917
loss: 1.0790964365005493,grad_norm: 0.9999992452765074, iteration: 281918
loss: 0.9445524215698242,grad_norm: 0.8096088479295237, iteration: 281919
loss: 1.0744102001190186,grad_norm: 0.9395168630938412, iteration: 281920
loss: 1.0249333381652832,grad_norm: 0.9999992590873291, iteration: 281921
loss: 1.001071572303772,grad_norm: 0.9046040872920132, iteration: 281922
loss: 1.0128692388534546,grad_norm: 0.9175449033853101, iteration: 281923
loss: 1.0125983953475952,grad_norm: 0.9738858298099713, iteration: 281924
loss: 1.0095232725143433,grad_norm: 0.8602805299811545, iteration: 281925
loss: 0.9669119119644165,grad_norm: 0.9999990587334144, iteration: 281926
loss: 1.0286365747451782,grad_norm: 0.9850687112757844, iteration: 281927
loss: 0.980433464050293,grad_norm: 0.7127408822770822, iteration: 281928
loss: 1.0175708532333374,grad_norm: 0.8225720440966539, iteration: 281929
loss: 1.0180693864822388,grad_norm: 0.9999996255236187, iteration: 281930
loss: 1.0295926332473755,grad_norm: 0.9091920152119374, iteration: 281931
loss: 0.9937928915023804,grad_norm: 0.914662885833926, iteration: 281932
loss: 1.0091382265090942,grad_norm: 0.7975118724665923, iteration: 281933
loss: 0.9658553004264832,grad_norm: 0.906512211446872, iteration: 281934
loss: 1.0225366353988647,grad_norm: 0.8396232570431356, iteration: 281935
loss: 0.9985520243644714,grad_norm: 0.8850457662053073, iteration: 281936
loss: 0.9957605004310608,grad_norm: 0.7608082027416325, iteration: 281937
loss: 0.9840169548988342,grad_norm: 0.8026685493033712, iteration: 281938
loss: 0.9977076053619385,grad_norm: 0.795745084057674, iteration: 281939
loss: 1.0343672037124634,grad_norm: 0.8409217904377807, iteration: 281940
loss: 1.0179228782653809,grad_norm: 0.8412206580345007, iteration: 281941
loss: 1.018478512763977,grad_norm: 0.830353823039417, iteration: 281942
loss: 1.0041228532791138,grad_norm: 0.8346730573742212, iteration: 281943
loss: 0.9734688997268677,grad_norm: 0.9214206141658021, iteration: 281944
loss: 1.0098797082901,grad_norm: 0.8338692314503102, iteration: 281945
loss: 1.0038025379180908,grad_norm: 0.750894492770756, iteration: 281946
loss: 1.024614930152893,grad_norm: 0.7259083595381391, iteration: 281947
loss: 0.977094292640686,grad_norm: 0.9999989850157076, iteration: 281948
loss: 1.1479735374450684,grad_norm: 0.9999993451785583, iteration: 281949
loss: 1.0589003562927246,grad_norm: 0.8625352873717193, iteration: 281950
loss: 1.011398196220398,grad_norm: 0.6442712211008679, iteration: 281951
loss: 0.9807412028312683,grad_norm: 0.7797716589770638, iteration: 281952
loss: 0.9724308848381042,grad_norm: 0.8881540673140497, iteration: 281953
loss: 1.0106196403503418,grad_norm: 0.9450933738971399, iteration: 281954
loss: 1.0189324617385864,grad_norm: 0.892371196174095, iteration: 281955
loss: 1.0161131620407104,grad_norm: 0.822016387175483, iteration: 281956
loss: 1.036637783050537,grad_norm: 0.9999989968234282, iteration: 281957
loss: 1.0135859251022339,grad_norm: 0.979525253062804, iteration: 281958
loss: 1.015492558479309,grad_norm: 0.883656659248738, iteration: 281959
loss: 1.0246158838272095,grad_norm: 0.999999122655858, iteration: 281960
loss: 1.0033782720565796,grad_norm: 0.84817848434288, iteration: 281961
loss: 1.001509189605713,grad_norm: 0.7984052880593255, iteration: 281962
loss: 1.0163418054580688,grad_norm: 0.860569085224199, iteration: 281963
loss: 1.0269434452056885,grad_norm: 0.9302167429430384, iteration: 281964
loss: 1.077014684677124,grad_norm: 0.845963721093713, iteration: 281965
loss: 1.0278820991516113,grad_norm: 0.9999992656008012, iteration: 281966
loss: 0.993928074836731,grad_norm: 0.7455234041099539, iteration: 281967
loss: 1.0453741550445557,grad_norm: 0.9999998601265938, iteration: 281968
loss: 1.0005837678909302,grad_norm: 0.8674173043188326, iteration: 281969
loss: 0.9966664910316467,grad_norm: 0.9204137274300355, iteration: 281970
loss: 1.0056596994400024,grad_norm: 0.8472968936564773, iteration: 281971
loss: 1.0263850688934326,grad_norm: 0.8150129812980806, iteration: 281972
loss: 0.9935532808303833,grad_norm: 0.999099739154279, iteration: 281973
loss: 1.023728370666504,grad_norm: 0.9999991763927467, iteration: 281974
loss: 0.9675791263580322,grad_norm: 0.9313040032107692, iteration: 281975
loss: 1.033326268196106,grad_norm: 0.948974429311096, iteration: 281976
loss: 0.985656201839447,grad_norm: 0.9628366023062545, iteration: 281977
loss: 1.0046769380569458,grad_norm: 0.8840063341847959, iteration: 281978
loss: 1.0384573936462402,grad_norm: 0.9999991201865043, iteration: 281979
loss: 0.9857463836669922,grad_norm: 0.8656489145008646, iteration: 281980
loss: 1.0072181224822998,grad_norm: 0.8432392092728238, iteration: 281981
loss: 1.0003857612609863,grad_norm: 0.9369614259730457, iteration: 281982
loss: 1.0073550939559937,grad_norm: 0.8617748634272648, iteration: 281983
loss: 1.0786573886871338,grad_norm: 0.999999375280508, iteration: 281984
loss: 1.0203696489334106,grad_norm: 0.9999995220529614, iteration: 281985
loss: 0.9845454692840576,grad_norm: 0.7762866115392388, iteration: 281986
loss: 1.0068793296813965,grad_norm: 0.85259088203597, iteration: 281987
loss: 0.9975939393043518,grad_norm: 0.9501122135816137, iteration: 281988
loss: 1.0175570249557495,grad_norm: 0.8194086871128579, iteration: 281989
loss: 1.0848352909088135,grad_norm: 0.9620333411130125, iteration: 281990
loss: 1.0154825448989868,grad_norm: 0.9999997736956407, iteration: 281991
loss: 1.0097490549087524,grad_norm: 0.9664712025074813, iteration: 281992
loss: 1.0675945281982422,grad_norm: 0.9999995652555207, iteration: 281993
loss: 0.986372172832489,grad_norm: 0.8103378455955422, iteration: 281994
loss: 1.0000158548355103,grad_norm: 0.8860975193438939, iteration: 281995
loss: 1.0131676197052002,grad_norm: 0.8308765860357987, iteration: 281996
loss: 0.9932395219802856,grad_norm: 0.9461728754956121, iteration: 281997
loss: 1.0815221071243286,grad_norm: 0.9999998567918374, iteration: 281998
loss: 0.9999509453773499,grad_norm: 0.8941732310555368, iteration: 281999
loss: 1.0186197757720947,grad_norm: 0.7978533206698019, iteration: 282000
loss: 0.9644386172294617,grad_norm: 0.9277504768273046, iteration: 282001
loss: 1.0310064554214478,grad_norm: 0.8710376412426197, iteration: 282002
loss: 0.9987397789955139,grad_norm: 0.9182447940548142, iteration: 282003
loss: 1.0126935243606567,grad_norm: 0.896147591929276, iteration: 282004
loss: 0.9898688793182373,grad_norm: 0.9999991129394891, iteration: 282005
loss: 0.9781587719917297,grad_norm: 0.8386446865262803, iteration: 282006
loss: 0.9971341490745544,grad_norm: 0.8543844115868423, iteration: 282007
loss: 0.9844912886619568,grad_norm: 0.9838651381472653, iteration: 282008
loss: 0.9853863716125488,grad_norm: 0.790993305245408, iteration: 282009
loss: 1.0384703874588013,grad_norm: 0.9495115001868203, iteration: 282010
loss: 1.0061508417129517,grad_norm: 0.7905727239277519, iteration: 282011
loss: 1.0194135904312134,grad_norm: 0.7461600115627992, iteration: 282012
loss: 1.0166317224502563,grad_norm: 0.7476283844670941, iteration: 282013
loss: 1.01716947555542,grad_norm: 0.9999990844148053, iteration: 282014
loss: 0.9729295372962952,grad_norm: 0.8093646083344096, iteration: 282015
loss: 0.9965149164199829,grad_norm: 0.7332050079312856, iteration: 282016
loss: 1.0072821378707886,grad_norm: 0.8431694496056676, iteration: 282017
loss: 1.0333033800125122,grad_norm: 0.9409628056626046, iteration: 282018
loss: 1.0340089797973633,grad_norm: 0.9302750421457995, iteration: 282019
loss: 1.0379892587661743,grad_norm: 0.9728948287397372, iteration: 282020
loss: 0.981451690196991,grad_norm: 0.9999997151418514, iteration: 282021
loss: 0.9941961765289307,grad_norm: 0.9999996390504804, iteration: 282022
loss: 1.0309921503067017,grad_norm: 0.9287561737259864, iteration: 282023
loss: 1.0031707286834717,grad_norm: 0.8575294348807945, iteration: 282024
loss: 0.9891846179962158,grad_norm: 0.807475305216008, iteration: 282025
loss: 1.0061498880386353,grad_norm: 0.8765244378305008, iteration: 282026
loss: 1.0029881000518799,grad_norm: 0.8699155677486027, iteration: 282027
loss: 1.0089843273162842,grad_norm: 0.8396719913411629, iteration: 282028
loss: 1.0167591571807861,grad_norm: 0.8564786955261624, iteration: 282029
loss: 0.9934231042861938,grad_norm: 0.7090535163494877, iteration: 282030
loss: 1.0205357074737549,grad_norm: 0.8213714714230326, iteration: 282031
loss: 0.9543423652648926,grad_norm: 0.9147285097657409, iteration: 282032
loss: 0.987589955329895,grad_norm: 0.9999991515478884, iteration: 282033
loss: 1.018347144126892,grad_norm: 0.9999990507291031, iteration: 282034
loss: 0.9536279439926147,grad_norm: 0.9999997770504572, iteration: 282035
loss: 0.9517605304718018,grad_norm: 0.9680643729304625, iteration: 282036
loss: 0.9797511100769043,grad_norm: 0.8771785038629768, iteration: 282037
loss: 0.9907003045082092,grad_norm: 0.9985308752023473, iteration: 282038
loss: 0.9682332873344421,grad_norm: 0.9296412170604861, iteration: 282039
loss: 1.0040849447250366,grad_norm: 0.700331276813503, iteration: 282040
loss: 0.9783863425254822,grad_norm: 0.9568462087822597, iteration: 282041
loss: 0.9803321957588196,grad_norm: 0.8843542918940094, iteration: 282042
loss: 1.021929144859314,grad_norm: 0.9999990353826711, iteration: 282043
loss: 1.0718928575515747,grad_norm: 0.8996367563106459, iteration: 282044
loss: 0.9989191293716431,grad_norm: 0.8295023000884448, iteration: 282045
loss: 0.974236249923706,grad_norm: 0.8152975317752018, iteration: 282046
loss: 0.9771010875701904,grad_norm: 0.9999992819544857, iteration: 282047
loss: 0.9927093982696533,grad_norm: 0.6895118129117088, iteration: 282048
loss: 0.999417781829834,grad_norm: 0.9999996553919395, iteration: 282049
loss: 1.1151100397109985,grad_norm: 0.999999966585466, iteration: 282050
loss: 1.074692726135254,grad_norm: 0.9999990980114067, iteration: 282051
loss: 0.9683873057365417,grad_norm: 0.7919415994306062, iteration: 282052
loss: 0.9895980954170227,grad_norm: 0.9999998219239351, iteration: 282053
loss: 0.9722493886947632,grad_norm: 0.8990310000498374, iteration: 282054
loss: 1.0275713205337524,grad_norm: 0.9999993190112216, iteration: 282055
loss: 1.017472267150879,grad_norm: 0.9264762690527522, iteration: 282056
loss: 1.0713380575180054,grad_norm: 0.9999998737127723, iteration: 282057
loss: 0.9950035810470581,grad_norm: 0.8196142364854702, iteration: 282058
loss: 1.0692375898361206,grad_norm: 0.9999993871564942, iteration: 282059
loss: 1.0394484996795654,grad_norm: 0.9999991093206569, iteration: 282060
loss: 1.0056570768356323,grad_norm: 0.7465851017284738, iteration: 282061
loss: 0.9886909127235413,grad_norm: 0.8338617894118338, iteration: 282062
loss: 1.0024287700653076,grad_norm: 0.9655285287157941, iteration: 282063
loss: 1.009580135345459,grad_norm: 0.9999991359037115, iteration: 282064
loss: 0.9760671854019165,grad_norm: 0.7049611108251919, iteration: 282065
loss: 1.009614109992981,grad_norm: 0.8022612335210395, iteration: 282066
loss: 0.9807369709014893,grad_norm: 0.926670394173856, iteration: 282067
loss: 0.9858267307281494,grad_norm: 0.9999998083464212, iteration: 282068
loss: 1.0435606241226196,grad_norm: 0.9999994040389276, iteration: 282069
loss: 1.0208995342254639,grad_norm: 0.9999990791459504, iteration: 282070
loss: 0.9954248070716858,grad_norm: 0.9999990995985972, iteration: 282071
loss: 0.9665225148200989,grad_norm: 0.9275983680983289, iteration: 282072
loss: 0.9817352294921875,grad_norm: 0.8380114264890254, iteration: 282073
loss: 0.9948396682739258,grad_norm: 0.9034533857242525, iteration: 282074
loss: 1.0315231084823608,grad_norm: 0.9999998317157092, iteration: 282075
loss: 1.0038667917251587,grad_norm: 0.895502532411058, iteration: 282076
loss: 1.0136562585830688,grad_norm: 0.791508129908538, iteration: 282077
loss: 1.0173523426055908,grad_norm: 0.7955084545228719, iteration: 282078
loss: 0.9836812019348145,grad_norm: 0.8018344024617252, iteration: 282079
loss: 0.9723974466323853,grad_norm: 0.8691761344588549, iteration: 282080
loss: 0.9949592351913452,grad_norm: 0.8052580007021021, iteration: 282081
loss: 1.0059157609939575,grad_norm: 0.999999120133654, iteration: 282082
loss: 1.0972729921340942,grad_norm: 0.8046835183633348, iteration: 282083
loss: 1.0176724195480347,grad_norm: 0.7859874069243641, iteration: 282084
loss: 1.0349253416061401,grad_norm: 0.9556583976120326, iteration: 282085
loss: 1.0123828649520874,grad_norm: 0.7610262786508492, iteration: 282086
loss: 0.9923490285873413,grad_norm: 0.7946886067923398, iteration: 282087
loss: 0.9762924313545227,grad_norm: 0.853201478943932, iteration: 282088
loss: 1.0066533088684082,grad_norm: 0.7706603465789899, iteration: 282089
loss: 0.9876310229301453,grad_norm: 0.8843405132343407, iteration: 282090
loss: 1.10845148563385,grad_norm: 0.9999993794399906, iteration: 282091
loss: 0.9523274302482605,grad_norm: 0.8958817018147143, iteration: 282092
loss: 0.9751996994018555,grad_norm: 0.8095060766495206, iteration: 282093
loss: 1.0023863315582275,grad_norm: 0.8204330908125271, iteration: 282094
loss: 0.9487397074699402,grad_norm: 0.8675301543300649, iteration: 282095
loss: 0.9752790927886963,grad_norm: 0.8326622812386966, iteration: 282096
loss: 1.011430025100708,grad_norm: 0.9612814703235021, iteration: 282097
loss: 1.0207655429840088,grad_norm: 0.7489432036501839, iteration: 282098
loss: 0.9871706962585449,grad_norm: 0.9999990904228564, iteration: 282099
loss: 1.0045166015625,grad_norm: 0.9999989150867687, iteration: 282100
loss: 0.9974048733711243,grad_norm: 0.9418477943432898, iteration: 282101
loss: 1.0187302827835083,grad_norm: 0.8391100657237426, iteration: 282102
loss: 1.0060465335845947,grad_norm: 0.8508806736424935, iteration: 282103
loss: 0.9942308068275452,grad_norm: 0.999999241265723, iteration: 282104
loss: 0.9671637415885925,grad_norm: 0.7525209668556804, iteration: 282105
loss: 0.9583753347396851,grad_norm: 0.9999990829528702, iteration: 282106
loss: 0.9756553769111633,grad_norm: 0.9251104457919987, iteration: 282107
loss: 1.0107173919677734,grad_norm: 0.7910646217218289, iteration: 282108
loss: 0.9839643239974976,grad_norm: 0.9667896016334999, iteration: 282109
loss: 1.067956566810608,grad_norm: 0.9296917169793885, iteration: 282110
loss: 0.9668562412261963,grad_norm: 0.8255619525648377, iteration: 282111
loss: 0.9835522770881653,grad_norm: 0.911271913704251, iteration: 282112
loss: 0.9760888814926147,grad_norm: 0.7970790891738632, iteration: 282113
loss: 0.9763944745063782,grad_norm: 0.9999995528343576, iteration: 282114
loss: 0.9966155886650085,grad_norm: 0.9999991356781037, iteration: 282115
loss: 1.0006155967712402,grad_norm: 0.907793169113799, iteration: 282116
loss: 0.9916422367095947,grad_norm: 0.8416784293517144, iteration: 282117
loss: 1.0251216888427734,grad_norm: 0.7888382156127218, iteration: 282118
loss: 1.0857282876968384,grad_norm: 0.9809824803914883, iteration: 282119
loss: 0.941816508769989,grad_norm: 0.9999990554751133, iteration: 282120
loss: 0.9791895151138306,grad_norm: 0.7761363887077234, iteration: 282121
loss: 1.0256147384643555,grad_norm: 0.7603294078566359, iteration: 282122
loss: 1.0316722393035889,grad_norm: 0.8834834867017354, iteration: 282123
loss: 1.0387440919876099,grad_norm: 0.7727218961248491, iteration: 282124
loss: 1.023545265197754,grad_norm: 0.896021313635555, iteration: 282125
loss: 0.9536430835723877,grad_norm: 0.9199833011741305, iteration: 282126
loss: 1.0316694974899292,grad_norm: 0.9845012645585757, iteration: 282127
loss: 1.018373727798462,grad_norm: 0.9036290399897994, iteration: 282128
loss: 1.0908191204071045,grad_norm: 0.9999991401354467, iteration: 282129
loss: 0.9676985740661621,grad_norm: 0.8752580107855481, iteration: 282130
loss: 1.0597865581512451,grad_norm: 0.852269867882734, iteration: 282131
loss: 0.9999485611915588,grad_norm: 0.8708302559200378, iteration: 282132
loss: 1.0104345083236694,grad_norm: 0.9487522306180624, iteration: 282133
loss: 0.9806849360466003,grad_norm: 0.9652837470429902, iteration: 282134
loss: 1.0366541147232056,grad_norm: 0.9999990363602336, iteration: 282135
loss: 0.9600617289543152,grad_norm: 0.8510823798216484, iteration: 282136
loss: 1.008945107460022,grad_norm: 0.7611789585212297, iteration: 282137
loss: 1.009964108467102,grad_norm: 0.7723008254695273, iteration: 282138
loss: 0.9867232441902161,grad_norm: 0.7553206211687171, iteration: 282139
loss: 0.9821930527687073,grad_norm: 0.8002938581655041, iteration: 282140
loss: 1.0481387376785278,grad_norm: 0.9282239937978817, iteration: 282141
loss: 0.9922018647193909,grad_norm: 0.7923034946787331, iteration: 282142
loss: 1.0274301767349243,grad_norm: 0.8558043541682085, iteration: 282143
loss: 1.0099644660949707,grad_norm: 0.9067981777993394, iteration: 282144
loss: 0.9782377481460571,grad_norm: 0.6420293109525008, iteration: 282145
loss: 0.9962093234062195,grad_norm: 0.9942502453042187, iteration: 282146
loss: 1.0081392526626587,grad_norm: 0.8110476893777958, iteration: 282147
loss: 0.9731838703155518,grad_norm: 0.8969702534596539, iteration: 282148
loss: 1.010283350944519,grad_norm: 0.8144772906035878, iteration: 282149
loss: 1.0294511318206787,grad_norm: 0.995325958089733, iteration: 282150
loss: 1.0135637521743774,grad_norm: 0.9047778074863189, iteration: 282151
loss: 0.9986026287078857,grad_norm: 0.9254853279429146, iteration: 282152
loss: 0.9698247313499451,grad_norm: 0.838714565841692, iteration: 282153
loss: 0.987763524055481,grad_norm: 0.8768139174416604, iteration: 282154
loss: 1.011602759361267,grad_norm: 0.7326787962691943, iteration: 282155
loss: 1.0527458190917969,grad_norm: 0.9999997735483965, iteration: 282156
loss: 0.9915145635604858,grad_norm: 0.9999991323755857, iteration: 282157
loss: 1.015575647354126,grad_norm: 0.8920605965539046, iteration: 282158
loss: 1.0034502744674683,grad_norm: 0.7994179790104761, iteration: 282159
loss: 1.0416548252105713,grad_norm: 1.0000000291055788, iteration: 282160
loss: 1.009356141090393,grad_norm: 0.8231664692735634, iteration: 282161
loss: 1.0670047998428345,grad_norm: 0.7921462886970624, iteration: 282162
loss: 1.0051710605621338,grad_norm: 0.9213522566368463, iteration: 282163
loss: 1.0505579710006714,grad_norm: 0.999999484930323, iteration: 282164
loss: 1.0077530145645142,grad_norm: 0.7808286682775638, iteration: 282165
loss: 1.0047038793563843,grad_norm: 0.9005780410442776, iteration: 282166
loss: 1.0172139406204224,grad_norm: 0.9999992523424888, iteration: 282167
loss: 0.9783828854560852,grad_norm: 0.7237089500596595, iteration: 282168
loss: 1.078892707824707,grad_norm: 0.9999991923730134, iteration: 282169
loss: 1.102607011795044,grad_norm: 0.9999993064496713, iteration: 282170
loss: 1.0339282751083374,grad_norm: 0.8336920953379034, iteration: 282171
loss: 1.0871531963348389,grad_norm: 0.9745919384021818, iteration: 282172
loss: 0.99351966381073,grad_norm: 0.9171698736657296, iteration: 282173
loss: 0.9846956133842468,grad_norm: 0.7887880121480861, iteration: 282174
loss: 0.9911882877349854,grad_norm: 0.8169999739863567, iteration: 282175
loss: 1.0557383298873901,grad_norm: 0.9075479615536545, iteration: 282176
loss: 0.9910313487052917,grad_norm: 0.8914931461093747, iteration: 282177
loss: 1.007795810699463,grad_norm: 0.9999997631065406, iteration: 282178
loss: 0.9678508043289185,grad_norm: 0.8028849983309975, iteration: 282179
loss: 0.9977549910545349,grad_norm: 0.799426538039223, iteration: 282180
loss: 1.0048942565917969,grad_norm: 0.8527169237963301, iteration: 282181
loss: 0.9908908605575562,grad_norm: 0.999999295708171, iteration: 282182
loss: 1.0313539505004883,grad_norm: 0.9293197752299545, iteration: 282183
loss: 1.0538579225540161,grad_norm: 0.9999995137887099, iteration: 282184
loss: 1.0124591588974,grad_norm: 0.8590059298202347, iteration: 282185
loss: 1.08528470993042,grad_norm: 0.9999993462156136, iteration: 282186
loss: 1.0414427518844604,grad_norm: 0.9999997280610206, iteration: 282187
loss: 1.0019259452819824,grad_norm: 0.8523734881412118, iteration: 282188
loss: 0.9584940075874329,grad_norm: 0.8373213493485943, iteration: 282189
loss: 1.0041100978851318,grad_norm: 0.9679336820430072, iteration: 282190
loss: 1.0220171213150024,grad_norm: 0.8170550629960667, iteration: 282191
loss: 1.0102901458740234,grad_norm: 0.7864578815078612, iteration: 282192
loss: 1.0092462301254272,grad_norm: 0.9993038954490213, iteration: 282193
loss: 1.0640500783920288,grad_norm: 0.9999991922967169, iteration: 282194
loss: 1.076421856880188,grad_norm: 0.9819763734037792, iteration: 282195
loss: 1.0461558103561401,grad_norm: 0.9013931302271047, iteration: 282196
loss: 0.9919570684432983,grad_norm: 0.8845052263323916, iteration: 282197
loss: 0.9854938387870789,grad_norm: 0.7836458995081897, iteration: 282198
loss: 1.0052244663238525,grad_norm: 0.8742629130746739, iteration: 282199
loss: 1.0155540704727173,grad_norm: 0.7635926457775416, iteration: 282200
loss: 0.9984171390533447,grad_norm: 0.8761288752056156, iteration: 282201
loss: 1.0212491750717163,grad_norm: 0.999999210659938, iteration: 282202
loss: 1.0117769241333008,grad_norm: 0.987196222827825, iteration: 282203
loss: 1.0462901592254639,grad_norm: 0.8662697511015408, iteration: 282204
loss: 0.989250123500824,grad_norm: 0.9999992643642427, iteration: 282205
loss: 0.9768564701080322,grad_norm: 0.9999993527501528, iteration: 282206
loss: 1.0002025365829468,grad_norm: 0.8046001976374729, iteration: 282207
loss: 1.0087357759475708,grad_norm: 0.9999993934477289, iteration: 282208
loss: 1.0015150308609009,grad_norm: 0.9502543153653734, iteration: 282209
loss: 1.0527735948562622,grad_norm: 0.9828108026684054, iteration: 282210
loss: 1.0048514604568481,grad_norm: 0.999999696418911, iteration: 282211
loss: 0.9760531783103943,grad_norm: 0.8015660761095479, iteration: 282212
loss: 1.0108979940414429,grad_norm: 0.7644686946202397, iteration: 282213
loss: 0.9649484157562256,grad_norm: 0.7860418872765759, iteration: 282214
loss: 0.9948006272315979,grad_norm: 0.6188694505920583, iteration: 282215
loss: 1.0256143808364868,grad_norm: 0.9011835890254027, iteration: 282216
loss: 1.0043926239013672,grad_norm: 0.9432843078037083, iteration: 282217
loss: 0.9907537698745728,grad_norm: 0.8904302612842394, iteration: 282218
loss: 0.9940969347953796,grad_norm: 0.837904764303826, iteration: 282219
loss: 1.000361442565918,grad_norm: 0.8751646463289808, iteration: 282220
loss: 0.9633516073226929,grad_norm: 0.8851918960059657, iteration: 282221
loss: 1.0228216648101807,grad_norm: 0.9431344228594807, iteration: 282222
loss: 1.0007007122039795,grad_norm: 0.9999996978665631, iteration: 282223
loss: 1.0677006244659424,grad_norm: 0.8521592078550996, iteration: 282224
loss: 0.951707124710083,grad_norm: 0.8726869497610071, iteration: 282225
loss: 1.010571002960205,grad_norm: 0.999999170712792, iteration: 282226
loss: 1.0116523504257202,grad_norm: 0.999999699631693, iteration: 282227
loss: 1.0969969034194946,grad_norm: 0.9404476771950305, iteration: 282228
loss: 1.0547761917114258,grad_norm: 0.9999996039677889, iteration: 282229
loss: 1.0759191513061523,grad_norm: 0.9985847594091177, iteration: 282230
loss: 1.0031307935714722,grad_norm: 0.7389775470569785, iteration: 282231
loss: 1.0100125074386597,grad_norm: 0.9599278200849554, iteration: 282232
loss: 0.9710337519645691,grad_norm: 0.826757701503092, iteration: 282233
loss: 1.0410568714141846,grad_norm: 0.9999989235428619, iteration: 282234
loss: 1.0364025831222534,grad_norm: 0.9999991173497327, iteration: 282235
loss: 0.9817832708358765,grad_norm: 0.8034925701541769, iteration: 282236
loss: 1.0230262279510498,grad_norm: 0.9369268983665339, iteration: 282237
loss: 0.9815690517425537,grad_norm: 0.9014517211896815, iteration: 282238
loss: 1.0200860500335693,grad_norm: 0.9999995096269347, iteration: 282239
loss: 1.0022048950195312,grad_norm: 0.6599587571407022, iteration: 282240
loss: 0.9971677660942078,grad_norm: 0.8201972380238097, iteration: 282241
loss: 0.9886539578437805,grad_norm: 0.848838591822694, iteration: 282242
loss: 0.9805905818939209,grad_norm: 0.8995180013807965, iteration: 282243
loss: 0.9897557497024536,grad_norm: 0.752616268718285, iteration: 282244
loss: 1.0128815174102783,grad_norm: 0.8105859852072341, iteration: 282245
loss: 1.0702502727508545,grad_norm: 0.9999992162431848, iteration: 282246
loss: 1.0097076892852783,grad_norm: 0.9999993851435917, iteration: 282247
loss: 0.9977342486381531,grad_norm: 0.7927153486708626, iteration: 282248
loss: 0.9579103589057922,grad_norm: 0.7716561658313159, iteration: 282249
loss: 0.989445686340332,grad_norm: 0.7792908303767319, iteration: 282250
loss: 0.954948902130127,grad_norm: 0.9008853039535321, iteration: 282251
loss: 1.0590819120407104,grad_norm: 0.9999994213744996, iteration: 282252
loss: 0.9901574850082397,grad_norm: 0.838450618832342, iteration: 282253
loss: 1.013010025024414,grad_norm: 0.7854097853125552, iteration: 282254
loss: 1.0010223388671875,grad_norm: 0.9999992031493159, iteration: 282255
loss: 0.9944220185279846,grad_norm: 0.9991721284298811, iteration: 282256
loss: 0.9976502656936646,grad_norm: 0.8586014822159249, iteration: 282257
loss: 1.0331557989120483,grad_norm: 0.9920667466886425, iteration: 282258
loss: 0.9996060132980347,grad_norm: 0.8967176277573354, iteration: 282259
loss: 0.9990968704223633,grad_norm: 0.8475223211262334, iteration: 282260
loss: 1.0096430778503418,grad_norm: 0.8943843436421755, iteration: 282261
loss: 0.9907375574111938,grad_norm: 0.8299172319093001, iteration: 282262
loss: 1.0761991739273071,grad_norm: 0.9999992201743734, iteration: 282263
loss: 1.0199583768844604,grad_norm: 0.8834475882065184, iteration: 282264
loss: 1.0383042097091675,grad_norm: 0.8671256064991797, iteration: 282265
loss: 0.9922646284103394,grad_norm: 0.9999991690203112, iteration: 282266
loss: 0.976265013217926,grad_norm: 0.9557655865428376, iteration: 282267
loss: 0.9903542399406433,grad_norm: 0.7314820273096999, iteration: 282268
loss: 0.9933907389640808,grad_norm: 0.9393542024444237, iteration: 282269
loss: 0.9681747555732727,grad_norm: 0.7902382741778472, iteration: 282270
loss: 0.9845758080482483,grad_norm: 0.8279970829076675, iteration: 282271
loss: 0.9770928025245667,grad_norm: 0.9843682224414381, iteration: 282272
loss: 0.9734346866607666,grad_norm: 0.9660835669312113, iteration: 282273
loss: 0.9849929809570312,grad_norm: 0.8498243558419498, iteration: 282274
loss: 1.0609803199768066,grad_norm: 0.9999990870502635, iteration: 282275
loss: 1.057936668395996,grad_norm: 0.99999978377755, iteration: 282276
loss: 1.087999939918518,grad_norm: 0.978300356237751, iteration: 282277
loss: 1.0148646831512451,grad_norm: 0.7634386806491374, iteration: 282278
loss: 1.0063530206680298,grad_norm: 0.9705872663944093, iteration: 282279
loss: 0.9880218505859375,grad_norm: 0.8176626491766864, iteration: 282280
loss: 1.018301010131836,grad_norm: 0.9999991272444194, iteration: 282281
loss: 1.0491615533828735,grad_norm: 0.9999995599831694, iteration: 282282
loss: 0.9889928102493286,grad_norm: 0.9135542823770844, iteration: 282283
loss: 0.9827812314033508,grad_norm: 0.8967191978556074, iteration: 282284
loss: 0.9818951487541199,grad_norm: 0.9477648287187013, iteration: 282285
loss: 1.0066739320755005,grad_norm: 0.9465611736945321, iteration: 282286
loss: 1.0069438219070435,grad_norm: 0.9036423945999544, iteration: 282287
loss: 1.0901778936386108,grad_norm: 0.8591714013363653, iteration: 282288
loss: 1.036813497543335,grad_norm: 0.8927936806730594, iteration: 282289
loss: 0.9908084869384766,grad_norm: 0.9626202473583553, iteration: 282290
loss: 1.013414740562439,grad_norm: 0.8553151544459829, iteration: 282291
loss: 0.9859305620193481,grad_norm: 0.8284964780547516, iteration: 282292
loss: 0.9755023717880249,grad_norm: 0.8214367473985237, iteration: 282293
loss: 1.0372041463851929,grad_norm: 0.9999993098853074, iteration: 282294
loss: 1.0110341310501099,grad_norm: 0.8640795065202823, iteration: 282295
loss: 0.951995313167572,grad_norm: 0.7971982748861371, iteration: 282296
loss: 0.9954460859298706,grad_norm: 0.8113407157375009, iteration: 282297
loss: 1.0271111726760864,grad_norm: 0.9179507360828247, iteration: 282298
loss: 0.9811958074569702,grad_norm: 0.9816441423436744, iteration: 282299
loss: 0.9784753322601318,grad_norm: 0.8629057398925162, iteration: 282300
loss: 0.9991299510002136,grad_norm: 0.878373731008164, iteration: 282301
loss: 0.9696760773658752,grad_norm: 0.9999990630928945, iteration: 282302
loss: 1.062619924545288,grad_norm: 0.9999992215935549, iteration: 282303
loss: 0.9936773180961609,grad_norm: 0.9318613398682217, iteration: 282304
loss: 0.9842089414596558,grad_norm: 0.855946250485109, iteration: 282305
loss: 1.0505928993225098,grad_norm: 0.9999993994568982, iteration: 282306
loss: 0.992540180683136,grad_norm: 0.9097511846110645, iteration: 282307
loss: 1.0244132280349731,grad_norm: 0.9599567247223014, iteration: 282308
loss: 1.0192396640777588,grad_norm: 0.9999993874726664, iteration: 282309
loss: 0.9760721921920776,grad_norm: 0.8477409566925355, iteration: 282310
loss: 0.9789642095565796,grad_norm: 0.9999990903538449, iteration: 282311
loss: 1.0887161493301392,grad_norm: 0.999999761248196, iteration: 282312
loss: 1.0146585702896118,grad_norm: 0.9087065892923752, iteration: 282313
loss: 0.9903660416603088,grad_norm: 0.9419594674830871, iteration: 282314
loss: 0.9897585511207581,grad_norm: 0.9004567810735953, iteration: 282315
loss: 1.02896249294281,grad_norm: 0.9999995484968521, iteration: 282316
loss: 0.9881951808929443,grad_norm: 0.7610783912454595, iteration: 282317
loss: 1.026412844657898,grad_norm: 0.9999998148532403, iteration: 282318
loss: 1.005744457244873,grad_norm: 0.9999994796759036, iteration: 282319
loss: 1.0071086883544922,grad_norm: 0.8282958356582136, iteration: 282320
loss: 0.9595759510993958,grad_norm: 0.9938615514686555, iteration: 282321
loss: 1.0279688835144043,grad_norm: 0.8310564457508871, iteration: 282322
loss: 0.9949326515197754,grad_norm: 0.9291101229795443, iteration: 282323
loss: 1.030822992324829,grad_norm: 0.670040465524789, iteration: 282324
loss: 1.031206727027893,grad_norm: 0.928247388859702, iteration: 282325
loss: 1.0125409364700317,grad_norm: 0.9999997056627786, iteration: 282326
loss: 0.9954540133476257,grad_norm: 0.9999990990138411, iteration: 282327
loss: 1.047844648361206,grad_norm: 0.9999991309199595, iteration: 282328
loss: 1.1216354370117188,grad_norm: 0.9999998840860517, iteration: 282329
loss: 1.044962763786316,grad_norm: 0.8310951183313687, iteration: 282330
loss: 0.9674578309059143,grad_norm: 0.7700574674173013, iteration: 282331
loss: 1.0781601667404175,grad_norm: 0.9999997513093035, iteration: 282332
loss: 0.9816592931747437,grad_norm: 0.9999998821307091, iteration: 282333
loss: 0.9683161377906799,grad_norm: 0.8351526101004288, iteration: 282334
loss: 0.9790877103805542,grad_norm: 0.9159355373759294, iteration: 282335
loss: 1.0778776407241821,grad_norm: 0.9999998722502272, iteration: 282336
loss: 1.1313905715942383,grad_norm: 0.999999807866586, iteration: 282337
loss: 1.0384846925735474,grad_norm: 0.9702797357705124, iteration: 282338
loss: 1.0906808376312256,grad_norm: 0.9999991616798051, iteration: 282339
loss: 1.1029980182647705,grad_norm: 0.999999033163465, iteration: 282340
loss: 1.1039057970046997,grad_norm: 0.999999497328069, iteration: 282341
loss: 0.9976985454559326,grad_norm: 0.9999999247335618, iteration: 282342
loss: 1.069989800453186,grad_norm: 0.918468225363712, iteration: 282343
loss: 1.1903642416000366,grad_norm: 0.9999993599393798, iteration: 282344
loss: 1.014620304107666,grad_norm: 0.7758064981381637, iteration: 282345
loss: 1.0618159770965576,grad_norm: 0.8511411987277546, iteration: 282346
loss: 1.0203986167907715,grad_norm: 0.9999991499012888, iteration: 282347
loss: 1.22202467918396,grad_norm: 0.99999995301022, iteration: 282348
loss: 1.0209119319915771,grad_norm: 0.9629519116313445, iteration: 282349
loss: 0.9960688352584839,grad_norm: 0.8365608306310913, iteration: 282350
loss: 1.0158426761627197,grad_norm: 0.9145073456049048, iteration: 282351
loss: 1.006627082824707,grad_norm: 0.966981780815142, iteration: 282352
loss: 1.0244182348251343,grad_norm: 0.8272189415418431, iteration: 282353
loss: 1.0039644241333008,grad_norm: 0.8280306791374974, iteration: 282354
loss: 1.032553791999817,grad_norm: 0.9999993877743261, iteration: 282355
loss: 0.9924178123474121,grad_norm: 0.9999991269393472, iteration: 282356
loss: 1.0052649974822998,grad_norm: 0.90341212503014, iteration: 282357
loss: 0.9766660928726196,grad_norm: 0.9467925183911952, iteration: 282358
loss: 0.974442720413208,grad_norm: 0.999999505314298, iteration: 282359
loss: 1.071606993675232,grad_norm: 0.9999998318237199, iteration: 282360
loss: 1.0353854894638062,grad_norm: 0.9099854829538403, iteration: 282361
loss: 1.0001623630523682,grad_norm: 0.6679996068363099, iteration: 282362
loss: 1.0406503677368164,grad_norm: 0.9999994666122308, iteration: 282363
loss: 1.0252076387405396,grad_norm: 0.9999997521391023, iteration: 282364
loss: 1.017930030822754,grad_norm: 0.9278105540858458, iteration: 282365
loss: 0.9899856448173523,grad_norm: 0.861211864823228, iteration: 282366
loss: 1.1434028148651123,grad_norm: 0.999999484053263, iteration: 282367
loss: 1.0196110010147095,grad_norm: 0.9999997527947853, iteration: 282368
loss: 1.0280957221984863,grad_norm: 0.8208535275024712, iteration: 282369
loss: 0.9677952527999878,grad_norm: 0.948938372662394, iteration: 282370
loss: 1.0448973178863525,grad_norm: 0.8356510125312461, iteration: 282371
loss: 0.9814517498016357,grad_norm: 0.9282618290326918, iteration: 282372
loss: 1.0053167343139648,grad_norm: 0.8291636971295886, iteration: 282373
loss: 1.0046457052230835,grad_norm: 0.8338065263945696, iteration: 282374
loss: 0.9738555550575256,grad_norm: 0.9374905825409998, iteration: 282375
loss: 1.0143306255340576,grad_norm: 0.8515551435044133, iteration: 282376
loss: 1.0253214836120605,grad_norm: 0.8366559001699113, iteration: 282377
loss: 1.0265686511993408,grad_norm: 0.8921067710775418, iteration: 282378
loss: 1.0218943357467651,grad_norm: 0.9999990635679544, iteration: 282379
loss: 1.0118871927261353,grad_norm: 0.9999990735326688, iteration: 282380
loss: 1.0191742181777954,grad_norm: 0.8972841444492319, iteration: 282381
loss: 1.0228922367095947,grad_norm: 0.8326773534677189, iteration: 282382
loss: 1.0017673969268799,grad_norm: 0.9999995559214998, iteration: 282383
loss: 1.0349787473678589,grad_norm: 0.9061576578733641, iteration: 282384
loss: 1.012269377708435,grad_norm: 0.9488661943435057, iteration: 282385
loss: 1.0419572591781616,grad_norm: 0.9999995662737584, iteration: 282386
loss: 1.0289207696914673,grad_norm: 0.9999991546947297, iteration: 282387
loss: 1.0102189779281616,grad_norm: 0.9999997067684794, iteration: 282388
loss: 1.0691170692443848,grad_norm: 0.7748659389041631, iteration: 282389
loss: 0.9839027523994446,grad_norm: 0.9999993131069733, iteration: 282390
loss: 0.9804803729057312,grad_norm: 0.9836174157531774, iteration: 282391
loss: 1.0170282125473022,grad_norm: 0.8790547075963061, iteration: 282392
loss: 1.137522578239441,grad_norm: 0.9999990192380327, iteration: 282393
loss: 0.9962142109870911,grad_norm: 0.9999997781549842, iteration: 282394
loss: 1.0402930974960327,grad_norm: 0.9999991949107131, iteration: 282395
loss: 0.9722453951835632,grad_norm: 0.9999989893575868, iteration: 282396
loss: 1.0101745128631592,grad_norm: 0.99999939499252, iteration: 282397
loss: 0.975030243396759,grad_norm: 0.9999993475814314, iteration: 282398
loss: 0.9501522779464722,grad_norm: 0.8622109549621607, iteration: 282399
loss: 0.9873707890510559,grad_norm: 0.9999989888892483, iteration: 282400
loss: 0.9678475260734558,grad_norm: 0.8814303317053142, iteration: 282401
loss: 1.0703067779541016,grad_norm: 0.9999993926919704, iteration: 282402
loss: 0.9824458360671997,grad_norm: 0.8423417869615112, iteration: 282403
loss: 0.9534695148468018,grad_norm: 0.838675067706672, iteration: 282404
loss: 0.9854562282562256,grad_norm: 0.8834920278525681, iteration: 282405
loss: 1.007948875427246,grad_norm: 0.7906874186812021, iteration: 282406
loss: 1.0209909677505493,grad_norm: 0.8864123303701084, iteration: 282407
loss: 0.9857361316680908,grad_norm: 0.9999996725888909, iteration: 282408
loss: 0.9794517755508423,grad_norm: 0.7417342857741667, iteration: 282409
loss: 1.0131536722183228,grad_norm: 0.8674802337508718, iteration: 282410
loss: 0.9997444152832031,grad_norm: 0.9285804918895029, iteration: 282411
loss: 1.1082231998443604,grad_norm: 0.9999998446732463, iteration: 282412
loss: 1.0109912157058716,grad_norm: 0.9999996065757462, iteration: 282413
loss: 0.9808557629585266,grad_norm: 0.7715414914581242, iteration: 282414
loss: 0.9783235192298889,grad_norm: 0.9956419324506312, iteration: 282415
loss: 1.0221225023269653,grad_norm: 0.8996506141368047, iteration: 282416
loss: 1.0837031602859497,grad_norm: 0.9999995919117639, iteration: 282417
loss: 1.0310771465301514,grad_norm: 0.9330477445116926, iteration: 282418
loss: 0.988197386264801,grad_norm: 0.8292568876216374, iteration: 282419
loss: 0.9902780055999756,grad_norm: 0.9601975976218305, iteration: 282420
loss: 1.0036338567733765,grad_norm: 0.999999424975294, iteration: 282421
loss: 0.977564811706543,grad_norm: 0.9675581696482892, iteration: 282422
loss: 1.1834115982055664,grad_norm: 0.9999997318277539, iteration: 282423
loss: 0.9941158294677734,grad_norm: 0.8391593360619471, iteration: 282424
loss: 1.0035409927368164,grad_norm: 0.8665390559678309, iteration: 282425
loss: 1.0749545097351074,grad_norm: 0.9876310953162314, iteration: 282426
loss: 1.0200309753417969,grad_norm: 0.9999993986970477, iteration: 282427
loss: 1.0108425617218018,grad_norm: 0.9999997087552555, iteration: 282428
loss: 0.9800583124160767,grad_norm: 0.9999992474963856, iteration: 282429
loss: 1.0531195402145386,grad_norm: 0.879702575610994, iteration: 282430
loss: 0.992673397064209,grad_norm: 0.7752491680370174, iteration: 282431
loss: 1.0120346546173096,grad_norm: 0.9999997952790073, iteration: 282432
loss: 1.1202924251556396,grad_norm: 1.0000000293676006, iteration: 282433
loss: 1.021952509880066,grad_norm: 0.9999995030603637, iteration: 282434
loss: 1.0323808193206787,grad_norm: 0.8096668417996487, iteration: 282435
loss: 0.9960296750068665,grad_norm: 0.8901853081743102, iteration: 282436
loss: 1.0097763538360596,grad_norm: 0.9999999345832203, iteration: 282437
loss: 1.097479224205017,grad_norm: 0.9999998568242443, iteration: 282438
loss: 0.9693208336830139,grad_norm: 0.9999997296182662, iteration: 282439
loss: 1.004490852355957,grad_norm: 0.9941990248852564, iteration: 282440
loss: 0.971509575843811,grad_norm: 0.999999479748805, iteration: 282441
loss: 0.9622737765312195,grad_norm: 0.8596407824698328, iteration: 282442
loss: 0.9690107703208923,grad_norm: 0.9213036969588584, iteration: 282443
loss: 1.0533270835876465,grad_norm: 0.99999959189673, iteration: 282444
loss: 0.9650449752807617,grad_norm: 0.8961886747274436, iteration: 282445
loss: 1.0227155685424805,grad_norm: 0.7664857268976446, iteration: 282446
loss: 0.9955751299858093,grad_norm: 0.7860469796511358, iteration: 282447
loss: 1.0279322862625122,grad_norm: 0.9999992109640546, iteration: 282448
loss: 0.9788865447044373,grad_norm: 0.8127737208051917, iteration: 282449
loss: 0.9940789937973022,grad_norm: 0.8208807212835487, iteration: 282450
loss: 1.062360405921936,grad_norm: 0.9999998588084344, iteration: 282451
loss: 1.0334361791610718,grad_norm: 0.853393360492464, iteration: 282452
loss: 1.0358673334121704,grad_norm: 0.9999991444545429, iteration: 282453
loss: 0.9805378913879395,grad_norm: 0.9999989637808827, iteration: 282454
loss: 1.1178560256958008,grad_norm: 0.9999995809218452, iteration: 282455
loss: 1.0398025512695312,grad_norm: 0.999999844327075, iteration: 282456
loss: 0.998631477355957,grad_norm: 0.7441386804220199, iteration: 282457
loss: 1.018290638923645,grad_norm: 0.9999992910025306, iteration: 282458
loss: 1.009238839149475,grad_norm: 0.8019219249463623, iteration: 282459
loss: 1.0350215435028076,grad_norm: 0.9999989825062692, iteration: 282460
loss: 1.0493230819702148,grad_norm: 0.9999993741878425, iteration: 282461
loss: 1.0357128381729126,grad_norm: 0.9482764971978993, iteration: 282462
loss: 1.052678108215332,grad_norm: 0.8816410345475897, iteration: 282463
loss: 1.1087236404418945,grad_norm: 0.9999993257451871, iteration: 282464
loss: 1.0162811279296875,grad_norm: 0.7152844582883646, iteration: 282465
loss: 1.0232009887695312,grad_norm: 0.9906138186900648, iteration: 282466
loss: 0.9862420558929443,grad_norm: 0.8342987251822617, iteration: 282467
loss: 1.0311781167984009,grad_norm: 0.9999992720178795, iteration: 282468
loss: 1.0106937885284424,grad_norm: 0.7520225254356253, iteration: 282469
loss: 0.9863960146903992,grad_norm: 0.9374574547017526, iteration: 282470
loss: 0.9980486631393433,grad_norm: 0.944981761739151, iteration: 282471
loss: 1.0028619766235352,grad_norm: 0.98317278706313, iteration: 282472
loss: 1.0032135248184204,grad_norm: 0.7873093891499455, iteration: 282473
loss: 0.9874814748764038,grad_norm: 0.7943928357065203, iteration: 282474
loss: 0.9769338369369507,grad_norm: 0.8821783905816549, iteration: 282475
loss: 1.0970598459243774,grad_norm: 0.9999991209522932, iteration: 282476
loss: 1.0553035736083984,grad_norm: 0.9999991633104546, iteration: 282477
loss: 1.0195363759994507,grad_norm: 0.999999620669569, iteration: 282478
loss: 1.0601731538772583,grad_norm: 0.9999991498961027, iteration: 282479
loss: 0.9995566010475159,grad_norm: 0.7646011716195846, iteration: 282480
loss: 1.0226380825042725,grad_norm: 0.9999997874903017, iteration: 282481
loss: 0.990120530128479,grad_norm: 0.9999990016213774, iteration: 282482
loss: 0.9970775246620178,grad_norm: 0.8977321748358825, iteration: 282483
loss: 0.9536734223365784,grad_norm: 0.8264643194845559, iteration: 282484
loss: 1.1373430490493774,grad_norm: 0.9999997959030611, iteration: 282485
loss: 0.981929361820221,grad_norm: 0.9409360694984872, iteration: 282486
loss: 1.0451964139938354,grad_norm: 0.9999991318355339, iteration: 282487
loss: 0.9981526136398315,grad_norm: 0.9999995310752223, iteration: 282488
loss: 1.0545094013214111,grad_norm: 0.9999999223835667, iteration: 282489
loss: 1.1381115913391113,grad_norm: 0.9678249465668389, iteration: 282490
loss: 1.004999041557312,grad_norm: 0.999999131009696, iteration: 282491
loss: 1.0881431102752686,grad_norm: 0.9999990848133911, iteration: 282492
loss: 1.0829001665115356,grad_norm: 0.9999994863695506, iteration: 282493
loss: 1.1186017990112305,grad_norm: 0.9999998644053064, iteration: 282494
loss: 1.0777955055236816,grad_norm: 0.9858134521727595, iteration: 282495
loss: 1.039962649345398,grad_norm: 1.0000000021399136, iteration: 282496
loss: 0.9686962962150574,grad_norm: 0.9185550717771389, iteration: 282497
loss: 1.0428669452667236,grad_norm: 0.9999991264874001, iteration: 282498
loss: 1.113968014717102,grad_norm: 0.8744764763270448, iteration: 282499
loss: 1.2163463830947876,grad_norm: 0.9999995594344182, iteration: 282500
loss: 1.002869963645935,grad_norm: 0.8206389676332646, iteration: 282501
loss: 1.1070663928985596,grad_norm: 0.9999996900466959, iteration: 282502
loss: 1.1068875789642334,grad_norm: 0.8894715092260406, iteration: 282503
loss: 1.0955040454864502,grad_norm: 0.9999993297362706, iteration: 282504
loss: 1.0434410572052002,grad_norm: 0.8634142750812464, iteration: 282505
loss: 1.0065919160842896,grad_norm: 0.878044000587404, iteration: 282506
loss: 1.0093878507614136,grad_norm: 0.8694704487005308, iteration: 282507
loss: 1.0961570739746094,grad_norm: 0.9999997117991727, iteration: 282508
loss: 1.0547488927841187,grad_norm: 0.9677962734834522, iteration: 282509
loss: 0.9740971922874451,grad_norm: 0.8842362477808816, iteration: 282510
loss: 1.1438875198364258,grad_norm: 1.0000000505651916, iteration: 282511
loss: 1.005285620689392,grad_norm: 0.9999998573566727, iteration: 282512
loss: 0.9932796359062195,grad_norm: 0.9999991419670945, iteration: 282513
loss: 1.082905888557434,grad_norm: 0.9174775056906331, iteration: 282514
loss: 0.9813737869262695,grad_norm: 0.8087403764328307, iteration: 282515
loss: 1.0744035243988037,grad_norm: 0.892008572471534, iteration: 282516
loss: 1.002227783203125,grad_norm: 0.667102542186149, iteration: 282517
loss: 0.9602636098861694,grad_norm: 0.928038568384184, iteration: 282518
loss: 0.9916495680809021,grad_norm: 0.9422941240176058, iteration: 282519
loss: 0.9952635765075684,grad_norm: 0.8546691506294954, iteration: 282520
loss: 1.0048116445541382,grad_norm: 0.9660380942742887, iteration: 282521
loss: 1.02621328830719,grad_norm: 0.8813814724357677, iteration: 282522
loss: 1.0378470420837402,grad_norm: 0.8995692793214678, iteration: 282523
loss: 1.0283775329589844,grad_norm: 0.9999992730296005, iteration: 282524
loss: 1.003665804862976,grad_norm: 0.871134100466941, iteration: 282525
loss: 0.9411453008651733,grad_norm: 0.870872002343606, iteration: 282526
loss: 0.9802504777908325,grad_norm: 0.6607272526500843, iteration: 282527
loss: 1.0163559913635254,grad_norm: 0.8736892820345165, iteration: 282528
loss: 0.9894401431083679,grad_norm: 0.9952218711742258, iteration: 282529
loss: 1.0366533994674683,grad_norm: 0.9999993544294719, iteration: 282530
loss: 1.0491489171981812,grad_norm: 0.8524645400637213, iteration: 282531
loss: 0.9903045296669006,grad_norm: 0.999999667579492, iteration: 282532
loss: 0.9740802049636841,grad_norm: 0.9999996239730556, iteration: 282533
loss: 1.0006792545318604,grad_norm: 0.8582074559263224, iteration: 282534
loss: 1.0195140838623047,grad_norm: 0.9999990504058657, iteration: 282535
loss: 1.0390207767486572,grad_norm: 0.9999998130128522, iteration: 282536
loss: 0.9836878776550293,grad_norm: 0.9016267499470315, iteration: 282537
loss: 1.0120270252227783,grad_norm: 0.9999993868396324, iteration: 282538
loss: 1.0175905227661133,grad_norm: 0.8221679264930474, iteration: 282539
loss: 0.9910870790481567,grad_norm: 0.8152682616529693, iteration: 282540
loss: 1.063094139099121,grad_norm: 0.9999994757532269, iteration: 282541
loss: 0.9939296245574951,grad_norm: 0.8106312998710015, iteration: 282542
loss: 1.0367428064346313,grad_norm: 0.9999991461961573, iteration: 282543
loss: 0.9922738075256348,grad_norm: 0.7347457946420447, iteration: 282544
loss: 1.02024245262146,grad_norm: 0.9999999751690944, iteration: 282545
loss: 1.0119383335113525,grad_norm: 0.7565283184066199, iteration: 282546
loss: 1.0105786323547363,grad_norm: 0.8196278776362073, iteration: 282547
loss: 1.083674669265747,grad_norm: 0.8964516642600583, iteration: 282548
loss: 0.9868220090866089,grad_norm: 0.9999997581685299, iteration: 282549
loss: 1.0474672317504883,grad_norm: 0.9613345074796125, iteration: 282550
loss: 1.0304639339447021,grad_norm: 0.9333209682951028, iteration: 282551
loss: 0.9980321526527405,grad_norm: 0.8203253815578417, iteration: 282552
loss: 0.9947682023048401,grad_norm: 0.8708774144412891, iteration: 282553
loss: 0.9896442294120789,grad_norm: 0.733391330191814, iteration: 282554
loss: 0.9896810054779053,grad_norm: 0.8318378359148335, iteration: 282555
loss: 0.9748177528381348,grad_norm: 0.855768236569853, iteration: 282556
loss: 0.9861651659011841,grad_norm: 0.8170534025561376, iteration: 282557
loss: 1.0798124074935913,grad_norm: 0.9999998582945202, iteration: 282558
loss: 1.0264878273010254,grad_norm: 0.9999991179853339, iteration: 282559
loss: 1.014672040939331,grad_norm: 0.9372798636161265, iteration: 282560
loss: 0.9843264222145081,grad_norm: 0.8012054490588217, iteration: 282561
loss: 1.001120924949646,grad_norm: 0.8664032553306701, iteration: 282562
loss: 1.0060707330703735,grad_norm: 0.9999991325540327, iteration: 282563
loss: 0.9898949265480042,grad_norm: 0.8099014504247732, iteration: 282564
loss: 0.974907636642456,grad_norm: 0.8047098495678888, iteration: 282565
loss: 1.0902878046035767,grad_norm: 0.9999997667171707, iteration: 282566
loss: 1.0468658208847046,grad_norm: 0.9516409522612941, iteration: 282567
loss: 1.0103665590286255,grad_norm: 0.7697393685738538, iteration: 282568
loss: 1.0406074523925781,grad_norm: 0.999999959473493, iteration: 282569
loss: 0.9964760541915894,grad_norm: 0.8564436506028001, iteration: 282570
loss: 0.9773117303848267,grad_norm: 0.8381336012247502, iteration: 282571
loss: 1.0806493759155273,grad_norm: 0.9999999078287866, iteration: 282572
loss: 0.9933084845542908,grad_norm: 0.8778906961304135, iteration: 282573
loss: 1.025101661682129,grad_norm: 0.9999992367460918, iteration: 282574
loss: 1.0022315979003906,grad_norm: 0.9686458597003279, iteration: 282575
loss: 1.0599956512451172,grad_norm: 0.7110188955246949, iteration: 282576
loss: 0.9661790728569031,grad_norm: 0.8137305985564433, iteration: 282577
loss: 1.0086870193481445,grad_norm: 0.8096041117893386, iteration: 282578
loss: 1.0623866319656372,grad_norm: 0.9999997219685131, iteration: 282579
loss: 1.0709195137023926,grad_norm: 0.806604478577569, iteration: 282580
loss: 1.0003292560577393,grad_norm: 0.9243135570144579, iteration: 282581
loss: 1.0293645858764648,grad_norm: 0.9999994606574781, iteration: 282582
loss: 0.9771009087562561,grad_norm: 0.8690261912457226, iteration: 282583
loss: 0.9787026047706604,grad_norm: 0.9006880072992157, iteration: 282584
loss: 0.9738067388534546,grad_norm: 0.8403742344680126, iteration: 282585
loss: 1.0263798236846924,grad_norm: 0.8220646749992173, iteration: 282586
loss: 1.0168662071228027,grad_norm: 0.9392277057981107, iteration: 282587
loss: 0.9952945709228516,grad_norm: 0.7841895650595686, iteration: 282588
loss: 1.028035044670105,grad_norm: 1.000000019802298, iteration: 282589
loss: 0.9871732592582703,grad_norm: 0.8885665893957843, iteration: 282590
loss: 1.0211598873138428,grad_norm: 0.9207236651292122, iteration: 282591
loss: 0.9799792766571045,grad_norm: 0.816196388391861, iteration: 282592
loss: 1.0167865753173828,grad_norm: 0.9999998576077094, iteration: 282593
loss: 0.9799688458442688,grad_norm: 0.8512100022003688, iteration: 282594
loss: 1.0268840789794922,grad_norm: 0.999999877994734, iteration: 282595
loss: 0.9932249188423157,grad_norm: 0.8563150477539302, iteration: 282596
loss: 1.0078816413879395,grad_norm: 0.6929551043244144, iteration: 282597
loss: 1.127240777015686,grad_norm: 0.9999992189616184, iteration: 282598
loss: 0.9907853007316589,grad_norm: 0.9999991480673766, iteration: 282599
loss: 1.0925625562667847,grad_norm: 0.9999993206449429, iteration: 282600
loss: 0.9735896587371826,grad_norm: 0.9999990367058174, iteration: 282601
loss: 0.994951069355011,grad_norm: 0.896021338299462, iteration: 282602
loss: 1.0231133699417114,grad_norm: 0.9818885428846974, iteration: 282603
loss: 1.0123374462127686,grad_norm: 0.9999990085977379, iteration: 282604
loss: 1.0199048519134521,grad_norm: 0.9077285041968556, iteration: 282605
loss: 0.9840186238288879,grad_norm: 0.9999995304330285, iteration: 282606
loss: 0.9816022515296936,grad_norm: 0.8061365092399926, iteration: 282607
loss: 0.9904963374137878,grad_norm: 0.8493498514829257, iteration: 282608
loss: 1.0184054374694824,grad_norm: 0.910649559942429, iteration: 282609
loss: 1.0637913942337036,grad_norm: 0.9862395421111323, iteration: 282610
loss: 1.0085077285766602,grad_norm: 0.8384394590786296, iteration: 282611
loss: 0.9947579503059387,grad_norm: 0.8389879882868274, iteration: 282612
loss: 0.9826791286468506,grad_norm: 0.8684186155972105, iteration: 282613
loss: 0.9978567957878113,grad_norm: 0.7573678453620389, iteration: 282614
loss: 0.9911619424819946,grad_norm: 0.9613076358219511, iteration: 282615
loss: 1.0171383619308472,grad_norm: 0.7809127316593989, iteration: 282616
loss: 0.9836472272872925,grad_norm: 0.9756897505060761, iteration: 282617
loss: 1.046543002128601,grad_norm: 0.9999991948617999, iteration: 282618
loss: 1.1465569734573364,grad_norm: 1.0000000779674936, iteration: 282619
loss: 1.0711185932159424,grad_norm: 0.9568184974596537, iteration: 282620
loss: 0.9738166332244873,grad_norm: 0.7773868570810173, iteration: 282621
loss: 0.9885472655296326,grad_norm: 0.999999610142251, iteration: 282622
loss: 1.0315051078796387,grad_norm: 0.9999999320949283, iteration: 282623
loss: 1.0151187181472778,grad_norm: 0.8476205403613322, iteration: 282624
loss: 1.024119257926941,grad_norm: 0.8178580112182282, iteration: 282625
loss: 0.9776768684387207,grad_norm: 0.7704791583020734, iteration: 282626
loss: 1.0355182886123657,grad_norm: 0.9999991187822497, iteration: 282627
loss: 0.9914719462394714,grad_norm: 0.9430485967751662, iteration: 282628
loss: 1.005056381225586,grad_norm: 0.8964907561654722, iteration: 282629
loss: 1.0045698881149292,grad_norm: 0.8838014991545978, iteration: 282630
loss: 0.992954432964325,grad_norm: 0.8234316277395849, iteration: 282631
loss: 0.9972290992736816,grad_norm: 0.8143035968501706, iteration: 282632
loss: 0.9657531380653381,grad_norm: 0.9327509177674239, iteration: 282633
loss: 1.0264010429382324,grad_norm: 0.9936745495259997, iteration: 282634
loss: 1.0305993556976318,grad_norm: 0.7663844888485797, iteration: 282635
loss: 1.0385867357254028,grad_norm: 0.9999993620166989, iteration: 282636
loss: 0.9747393131256104,grad_norm: 0.6664198992733185, iteration: 282637
loss: 1.0362266302108765,grad_norm: 0.9999992859815385, iteration: 282638
loss: 1.0688515901565552,grad_norm: 0.999999679417987, iteration: 282639
loss: 1.0259888172149658,grad_norm: 0.9999992178770726, iteration: 282640
loss: 1.0131845474243164,grad_norm: 0.7301562905261868, iteration: 282641
loss: 1.0331145524978638,grad_norm: 0.9999996853399324, iteration: 282642
loss: 1.0217297077178955,grad_norm: 0.8627521796625188, iteration: 282643
loss: 0.9416500329971313,grad_norm: 0.9230279476709206, iteration: 282644
loss: 0.9819372892379761,grad_norm: 0.8418687596534902, iteration: 282645
loss: 0.9839162826538086,grad_norm: 0.8799375486789669, iteration: 282646
loss: 1.0021765232086182,grad_norm: 0.9827661159584966, iteration: 282647
loss: 1.0352461338043213,grad_norm: 0.8087717730251558, iteration: 282648
loss: 1.0234895944595337,grad_norm: 0.9066056732885026, iteration: 282649
loss: 0.9781133532524109,grad_norm: 0.8339632592627949, iteration: 282650
loss: 1.0253689289093018,grad_norm: 0.894182615747707, iteration: 282651
loss: 1.0056575536727905,grad_norm: 0.9999991534069398, iteration: 282652
loss: 0.9869416356086731,grad_norm: 0.9999993016261912, iteration: 282653
loss: 0.9965682029724121,grad_norm: 0.9220982117180357, iteration: 282654
loss: 0.9719567894935608,grad_norm: 0.8324156813135588, iteration: 282655
loss: 0.9993312358856201,grad_norm: 0.8642543179752911, iteration: 282656
loss: 0.9674516916275024,grad_norm: 0.9079830539887903, iteration: 282657
loss: 0.9714548587799072,grad_norm: 0.9958397016522944, iteration: 282658
loss: 1.0171364545822144,grad_norm: 0.9014861822636726, iteration: 282659
loss: 0.9991374015808105,grad_norm: 0.9999992267164166, iteration: 282660
loss: 1.1006858348846436,grad_norm: 0.9999991548625389, iteration: 282661
loss: 1.0165619850158691,grad_norm: 0.9999991345240157, iteration: 282662
loss: 1.0177098512649536,grad_norm: 0.8543034972605995, iteration: 282663
loss: 0.9802119135856628,grad_norm: 0.8668060003278394, iteration: 282664
loss: 0.9972572922706604,grad_norm: 0.9360159041306884, iteration: 282665
loss: 1.0114094018936157,grad_norm: 0.9999992263353864, iteration: 282666
loss: 0.9700336456298828,grad_norm: 0.9999990193025595, iteration: 282667
loss: 1.042812705039978,grad_norm: 0.9630719382539645, iteration: 282668
loss: 1.121626853942871,grad_norm: 0.9999997797121122, iteration: 282669
loss: 0.994394063949585,grad_norm: 0.9999992367886823, iteration: 282670
loss: 0.9821928143501282,grad_norm: 0.8735421733743083, iteration: 282671
loss: 1.0585448741912842,grad_norm: 0.9999998399737067, iteration: 282672
loss: 1.043143391609192,grad_norm: 0.8758198580952268, iteration: 282673
loss: 1.0238325595855713,grad_norm: 0.9999990926364662, iteration: 282674
loss: 1.0644267797470093,grad_norm: 0.9999994341720855, iteration: 282675
loss: 0.9980530738830566,grad_norm: 0.7812023646555954, iteration: 282676
loss: 1.0088738203048706,grad_norm: 0.8178749052856336, iteration: 282677
loss: 1.0327014923095703,grad_norm: 0.9646235517577663, iteration: 282678
loss: 0.9843370914459229,grad_norm: 0.9999990566728955, iteration: 282679
loss: 0.9665990471839905,grad_norm: 0.9146516502432137, iteration: 282680
loss: 1.0048444271087646,grad_norm: 0.9105797195749348, iteration: 282681
loss: 1.0086228847503662,grad_norm: 0.8406500605815134, iteration: 282682
loss: 0.9730124473571777,grad_norm: 0.8986116881210066, iteration: 282683
loss: 0.9831083416938782,grad_norm: 0.8071717077379144, iteration: 282684
loss: 1.0152581930160522,grad_norm: 0.8783993396627211, iteration: 282685
loss: 1.025575041770935,grad_norm: 0.9177392427158542, iteration: 282686
loss: 1.0163018703460693,grad_norm: 0.9999995491349728, iteration: 282687
loss: 1.0480979681015015,grad_norm: 0.9999993793035467, iteration: 282688
loss: 1.0083863735198975,grad_norm: 0.8681462582377752, iteration: 282689
loss: 0.9957939386367798,grad_norm: 0.9999992221228605, iteration: 282690
loss: 0.9711616039276123,grad_norm: 0.9286990559238678, iteration: 282691
loss: 0.9883102774620056,grad_norm: 0.9354929726835718, iteration: 282692
loss: 1.0456520318984985,grad_norm: 0.9446993941589884, iteration: 282693
loss: 1.2850227355957031,grad_norm: 0.999999991248809, iteration: 282694
loss: 1.0086292028427124,grad_norm: 0.8752017617362493, iteration: 282695
loss: 1.0015054941177368,grad_norm: 0.7752816522881824, iteration: 282696
loss: 0.9732270240783691,grad_norm: 0.8730072932912966, iteration: 282697
loss: 1.0015296936035156,grad_norm: 0.8619380255197814, iteration: 282698
loss: 0.9956823587417603,grad_norm: 0.9819552023390918, iteration: 282699
loss: 1.012391448020935,grad_norm: 0.9331606688397687, iteration: 282700
loss: 0.9745235443115234,grad_norm: 0.9319353180564596, iteration: 282701
loss: 1.024529218673706,grad_norm: 0.8068238443633143, iteration: 282702
loss: 0.9729759097099304,grad_norm: 0.9293140771903675, iteration: 282703
loss: 0.9935005307197571,grad_norm: 0.9186248029241735, iteration: 282704
loss: 0.9613674879074097,grad_norm: 0.9346803879864841, iteration: 282705
loss: 1.006506085395813,grad_norm: 0.8126010934457615, iteration: 282706
loss: 0.9766321182250977,grad_norm: 0.8206221065513338, iteration: 282707
loss: 1.0048656463623047,grad_norm: 0.9312428628178219, iteration: 282708
loss: 1.0408002138137817,grad_norm: 0.8891237948197119, iteration: 282709
loss: 1.065977692604065,grad_norm: 0.9191824234540061, iteration: 282710
loss: 0.9999130964279175,grad_norm: 0.8301120277640904, iteration: 282711
loss: 0.9626064896583557,grad_norm: 0.9999992028827213, iteration: 282712
loss: 1.0653984546661377,grad_norm: 0.8123045638128242, iteration: 282713
loss: 0.9951555132865906,grad_norm: 0.801557832561705, iteration: 282714
loss: 1.0206505060195923,grad_norm: 0.999999253121863, iteration: 282715
loss: 0.9989141225814819,grad_norm: 0.9892448926874381, iteration: 282716
loss: 1.0107101202011108,grad_norm: 0.8759053532268775, iteration: 282717
loss: 0.9983870387077332,grad_norm: 0.960845133692363, iteration: 282718
loss: 1.0286657810211182,grad_norm: 0.7302139167751159, iteration: 282719
loss: 1.0195101499557495,grad_norm: 0.9482242350026373, iteration: 282720
loss: 1.021964192390442,grad_norm: 0.976035949251628, iteration: 282721
loss: 0.9756704568862915,grad_norm: 0.7061536923902529, iteration: 282722
loss: 1.0080504417419434,grad_norm: 0.8302154563463428, iteration: 282723
loss: 1.0981128215789795,grad_norm: 0.9999999390184173, iteration: 282724
loss: 0.9769639372825623,grad_norm: 0.999999155650529, iteration: 282725
loss: 1.0118108987808228,grad_norm: 0.9999996270731346, iteration: 282726
loss: 1.0052469968795776,grad_norm: 0.7449333960220911, iteration: 282727
loss: 0.9544164538383484,grad_norm: 0.9999989929533071, iteration: 282728
loss: 1.0350090265274048,grad_norm: 0.9364857705849754, iteration: 282729
loss: 0.9959880709648132,grad_norm: 0.8723786764732052, iteration: 282730
loss: 1.0318236351013184,grad_norm: 0.7225316649267546, iteration: 282731
loss: 0.9875608682632446,grad_norm: 1.0000000436254368, iteration: 282732
loss: 0.9965683221817017,grad_norm: 0.9425847379042029, iteration: 282733
loss: 1.0363423824310303,grad_norm: 0.8881657157246707, iteration: 282734
loss: 1.0470324754714966,grad_norm: 0.9999993287962551, iteration: 282735
loss: 1.0483388900756836,grad_norm: 0.999999268842063, iteration: 282736
loss: 1.050190806388855,grad_norm: 0.7897498122962153, iteration: 282737
loss: 1.007866621017456,grad_norm: 0.9999990515299907, iteration: 282738
loss: 1.0087567567825317,grad_norm: 0.7165537411335656, iteration: 282739
loss: 1.0047796964645386,grad_norm: 0.8931154952710731, iteration: 282740
loss: 1.0000325441360474,grad_norm: 0.8024538872029181, iteration: 282741
loss: 1.00681471824646,grad_norm: 0.8732422481748885, iteration: 282742
loss: 1.0504672527313232,grad_norm: 0.9999990838563704, iteration: 282743
loss: 1.00422203540802,grad_norm: 0.8997660763475503, iteration: 282744
loss: 1.0449765920639038,grad_norm: 0.9999997829726545, iteration: 282745
loss: 1.1239814758300781,grad_norm: 0.9133625563062895, iteration: 282746
loss: 0.996981680393219,grad_norm: 0.6738624009972721, iteration: 282747
loss: 1.000260353088379,grad_norm: 0.9999991229741925, iteration: 282748
loss: 1.0038834810256958,grad_norm: 0.8021479157045724, iteration: 282749
loss: 1.0449527502059937,grad_norm: 0.9999998774862826, iteration: 282750
loss: 1.0088125467300415,grad_norm: 0.8066458166737889, iteration: 282751
loss: 1.0183942317962646,grad_norm: 0.9999991335749584, iteration: 282752
loss: 0.9584452509880066,grad_norm: 0.9999991741987344, iteration: 282753
loss: 0.9749763011932373,grad_norm: 0.8480435936292577, iteration: 282754
loss: 0.9842713475227356,grad_norm: 0.8132002340542102, iteration: 282755
loss: 0.9808698892593384,grad_norm: 0.8592424721570487, iteration: 282756
loss: 1.0100635290145874,grad_norm: 0.9999994585389709, iteration: 282757
loss: 1.0092204809188843,grad_norm: 0.9956931132919573, iteration: 282758
loss: 1.0074450969696045,grad_norm: 0.9999990422422467, iteration: 282759
loss: 0.9734763503074646,grad_norm: 0.9868263104383347, iteration: 282760
loss: 1.0517445802688599,grad_norm: 0.9999990794401069, iteration: 282761
loss: 0.9948650598526001,grad_norm: 0.7683654668960466, iteration: 282762
loss: 1.0097533464431763,grad_norm: 0.8374711857245556, iteration: 282763
loss: 0.9965925812721252,grad_norm: 0.976805263631179, iteration: 282764
loss: 1.01907217502594,grad_norm: 0.9999991192663515, iteration: 282765
loss: 1.0005993843078613,grad_norm: 0.915683508917533, iteration: 282766
loss: 0.9651116728782654,grad_norm: 0.7115615745870633, iteration: 282767
loss: 1.0028634071350098,grad_norm: 0.9999990329320145, iteration: 282768
loss: 1.045034408569336,grad_norm: 0.9869277206317204, iteration: 282769
loss: 0.9774466156959534,grad_norm: 0.8125896850126918, iteration: 282770
loss: 0.9829009771347046,grad_norm: 0.7865348626347811, iteration: 282771
loss: 1.0078383684158325,grad_norm: 0.847281054194991, iteration: 282772
loss: 0.9710366725921631,grad_norm: 0.953181059424458, iteration: 282773
loss: 1.0010496377944946,grad_norm: 0.9999991845940909, iteration: 282774
loss: 1.0133390426635742,grad_norm: 0.8678751702984856, iteration: 282775
loss: 0.9965662360191345,grad_norm: 0.9675825011336128, iteration: 282776
loss: 0.9907861948013306,grad_norm: 0.9335991880160415, iteration: 282777
loss: 0.9972987174987793,grad_norm: 0.9999989840737599, iteration: 282778
loss: 1.0144155025482178,grad_norm: 0.9999998262165952, iteration: 282779
loss: 1.0020744800567627,grad_norm: 0.9999990404510868, iteration: 282780
loss: 0.9814262986183167,grad_norm: 0.75872747559466, iteration: 282781
loss: 0.9909443855285645,grad_norm: 0.8969550522021293, iteration: 282782
loss: 1.024369239807129,grad_norm: 0.8427004131888208, iteration: 282783
loss: 0.9624844193458557,grad_norm: 0.8916814359784277, iteration: 282784
loss: 0.9482980370521545,grad_norm: 0.7540733221436973, iteration: 282785
loss: 1.0477241277694702,grad_norm: 0.83455720437583, iteration: 282786
loss: 1.0040242671966553,grad_norm: 0.7994090150454638, iteration: 282787
loss: 0.9977314472198486,grad_norm: 0.8226665685236489, iteration: 282788
loss: 1.0054131746292114,grad_norm: 0.8934010010575987, iteration: 282789
loss: 0.9982943534851074,grad_norm: 0.9308439688110552, iteration: 282790
loss: 1.0072036981582642,grad_norm: 0.8354458428492846, iteration: 282791
loss: 1.0514110326766968,grad_norm: 0.9344906683770151, iteration: 282792
loss: 0.9863921999931335,grad_norm: 0.9353760655160624, iteration: 282793
loss: 1.3861198425292969,grad_norm: 1.0000000016874189, iteration: 282794
loss: 1.0365712642669678,grad_norm: 0.8274305612496745, iteration: 282795
loss: 0.9939116835594177,grad_norm: 0.7075639725114451, iteration: 282796
loss: 1.0838021039962769,grad_norm: 0.9999991879229205, iteration: 282797
loss: 1.1419175863265991,grad_norm: 0.9999996285385414, iteration: 282798
loss: 1.021112322807312,grad_norm: 0.9410672493970635, iteration: 282799
loss: 0.975452184677124,grad_norm: 0.8588932667532185, iteration: 282800
loss: 0.9881030321121216,grad_norm: 0.9999991760086389, iteration: 282801
loss: 1.033431887626648,grad_norm: 0.8123104938646102, iteration: 282802
loss: 1.0151969194412231,grad_norm: 0.9494684421710191, iteration: 282803
loss: 1.1314760446548462,grad_norm: 0.9999998179783214, iteration: 282804
loss: 1.0206685066223145,grad_norm: 0.9664483212187068, iteration: 282805
loss: 1.004773736000061,grad_norm: 0.7753307573926399, iteration: 282806
loss: 1.0155695676803589,grad_norm: 0.8957707740369099, iteration: 282807
loss: 1.0375014543533325,grad_norm: 0.7954118001197228, iteration: 282808
loss: 1.009468674659729,grad_norm: 0.9999990745576969, iteration: 282809
loss: 1.0392754077911377,grad_norm: 0.9684808082212382, iteration: 282810
loss: 1.011664628982544,grad_norm: 0.924052132723343, iteration: 282811
loss: 1.0306763648986816,grad_norm: 0.9999999420569929, iteration: 282812
loss: 1.1044321060180664,grad_norm: 0.9999998543458086, iteration: 282813
loss: 1.0253958702087402,grad_norm: 0.9337261227363505, iteration: 282814
loss: 0.994819700717926,grad_norm: 0.9999991230196844, iteration: 282815
loss: 1.0229313373565674,grad_norm: 0.9999992065666117, iteration: 282816
loss: 1.01481032371521,grad_norm: 0.999999112417148, iteration: 282817
loss: 0.9845298528671265,grad_norm: 0.9999999929861852, iteration: 282818
loss: 1.0341708660125732,grad_norm: 0.6919804852836878, iteration: 282819
loss: 1.1966979503631592,grad_norm: 0.9999998629013537, iteration: 282820
loss: 0.9897177219390869,grad_norm: 0.938356645915185, iteration: 282821
loss: 0.9927645325660706,grad_norm: 0.9999990706597636, iteration: 282822
loss: 0.9635029435157776,grad_norm: 0.8832299087395346, iteration: 282823
loss: 0.9905896186828613,grad_norm: 0.7708731933025816, iteration: 282824
loss: 0.9963703751564026,grad_norm: 0.9999991086617315, iteration: 282825
loss: 1.004312515258789,grad_norm: 0.8977014018232898, iteration: 282826
loss: 0.9961326122283936,grad_norm: 0.7236561723559864, iteration: 282827
loss: 0.9977231621742249,grad_norm: 0.8469393110423935, iteration: 282828
loss: 1.0022691488265991,grad_norm: 0.9630359522907542, iteration: 282829
loss: 1.0348923206329346,grad_norm: 0.9999993555782466, iteration: 282830
loss: 1.0033375024795532,grad_norm: 0.9658458153263897, iteration: 282831
loss: 1.0088539123535156,grad_norm: 0.9999991203750199, iteration: 282832
loss: 1.0127935409545898,grad_norm: 0.8380588419644438, iteration: 282833
loss: 1.0780038833618164,grad_norm: 0.9999990654353452, iteration: 282834
loss: 0.9917537569999695,grad_norm: 0.8302111413302837, iteration: 282835
loss: 0.9984129667282104,grad_norm: 0.7740609714803799, iteration: 282836
loss: 1.0442219972610474,grad_norm: 0.9005791188020273, iteration: 282837
loss: 0.998884916305542,grad_norm: 0.9999990829096503, iteration: 282838
loss: 1.0213537216186523,grad_norm: 0.9845348443431347, iteration: 282839
loss: 1.013475775718689,grad_norm: 0.8291945873028296, iteration: 282840
loss: 0.9770930409431458,grad_norm: 0.8138756889965643, iteration: 282841
loss: 0.998029887676239,grad_norm: 0.9007808066398095, iteration: 282842
loss: 1.0104070901870728,grad_norm: 0.999999143296393, iteration: 282843
loss: 1.053033709526062,grad_norm: 0.9999993313587282, iteration: 282844
loss: 1.008599042892456,grad_norm: 0.9999990802882222, iteration: 282845
loss: 1.026652455329895,grad_norm: 0.734134249283875, iteration: 282846
loss: 1.0526071786880493,grad_norm: 0.832469849904639, iteration: 282847
loss: 0.9950218200683594,grad_norm: 0.9598929682929568, iteration: 282848
loss: 1.0418254137039185,grad_norm: 0.9999992393348319, iteration: 282849
loss: 1.0407177209854126,grad_norm: 0.9999991014063315, iteration: 282850
loss: 0.9923722743988037,grad_norm: 0.9352116186028596, iteration: 282851
loss: 0.9816223382949829,grad_norm: 0.9562518058885308, iteration: 282852
loss: 1.0196471214294434,grad_norm: 0.9344859137100453, iteration: 282853
loss: 1.246775507926941,grad_norm: 0.999999717249155, iteration: 282854
loss: 1.0312613248825073,grad_norm: 0.8182759343345516, iteration: 282855
loss: 0.9812248945236206,grad_norm: 0.7409346453601311, iteration: 282856
loss: 0.9665637016296387,grad_norm: 0.9999991674710008, iteration: 282857
loss: 1.099105715751648,grad_norm: 0.9999991844070134, iteration: 282858
loss: 1.0345686674118042,grad_norm: 0.9999989667292941, iteration: 282859
loss: 1.0023056268692017,grad_norm: 0.8202856890625525, iteration: 282860
loss: 0.9942470788955688,grad_norm: 0.9396213252170139, iteration: 282861
loss: 1.0164986848831177,grad_norm: 0.8262291703224222, iteration: 282862
loss: 0.9562793970108032,grad_norm: 0.9765326913062993, iteration: 282863
loss: 1.0922908782958984,grad_norm: 0.9999991310513882, iteration: 282864
loss: 0.989690363407135,grad_norm: 0.8823416190597126, iteration: 282865
loss: 1.0403553247451782,grad_norm: 0.8721786501845893, iteration: 282866
loss: 1.0296449661254883,grad_norm: 0.9999992283809513, iteration: 282867
loss: 1.0199403762817383,grad_norm: 0.9020726847423329, iteration: 282868
loss: 0.9777461886405945,grad_norm: 0.8923931948670204, iteration: 282869
loss: 0.9892091751098633,grad_norm: 0.9999993446846397, iteration: 282870
loss: 1.006074070930481,grad_norm: 0.943632767315151, iteration: 282871
loss: 0.9824961423873901,grad_norm: 0.7377127396589871, iteration: 282872
loss: 0.978441059589386,grad_norm: 0.7837186926102048, iteration: 282873
loss: 0.9864259362220764,grad_norm: 0.8594995678786748, iteration: 282874
loss: 1.0136321783065796,grad_norm: 0.8258805655095246, iteration: 282875
loss: 0.9935269951820374,grad_norm: 0.7223770113398509, iteration: 282876
loss: 0.9940457940101624,grad_norm: 0.9941441168353068, iteration: 282877
loss: 1.0069465637207031,grad_norm: 0.7415892473847222, iteration: 282878
loss: 0.9667791724205017,grad_norm: 0.9233403304567318, iteration: 282879
loss: 0.9875738024711609,grad_norm: 0.9551798348075345, iteration: 282880
loss: 0.9900227785110474,grad_norm: 0.962717856409884, iteration: 282881
loss: 0.98792564868927,grad_norm: 0.7354275928489936, iteration: 282882
loss: 0.972267210483551,grad_norm: 0.7988966479836963, iteration: 282883
loss: 0.9947361946105957,grad_norm: 0.9799480666776668, iteration: 282884
loss: 1.0218098163604736,grad_norm: 0.9999992828462947, iteration: 282885
loss: 0.99000483751297,grad_norm: 0.8402736179445285, iteration: 282886
loss: 1.1038079261779785,grad_norm: 0.9999998742328836, iteration: 282887
loss: 0.9975541830062866,grad_norm: 0.9156847549639163, iteration: 282888
loss: 1.0153464078903198,grad_norm: 0.8850187304714175, iteration: 282889
loss: 1.0017305612564087,grad_norm: 0.7690223813284004, iteration: 282890
loss: 0.998788058757782,grad_norm: 0.9999990928714776, iteration: 282891
loss: 0.9894564151763916,grad_norm: 0.8381432012530138, iteration: 282892
loss: 0.9963257312774658,grad_norm: 0.9999991099224101, iteration: 282893
loss: 1.0311528444290161,grad_norm: 0.9059021707408693, iteration: 282894
loss: 1.0051547288894653,grad_norm: 0.8266170714094985, iteration: 282895
loss: 1.0268356800079346,grad_norm: 0.6946569906365693, iteration: 282896
loss: 0.9989407062530518,grad_norm: 0.9349079677992738, iteration: 282897
loss: 1.0179845094680786,grad_norm: 0.8509926128395195, iteration: 282898
loss: 0.9748897552490234,grad_norm: 0.9078470588076563, iteration: 282899
loss: 1.0410045385360718,grad_norm: 0.9999995964032783, iteration: 282900
loss: 0.9911208152770996,grad_norm: 0.8423319747754504, iteration: 282901
loss: 1.0033944845199585,grad_norm: 0.9867726710806196, iteration: 282902
loss: 1.0071457624435425,grad_norm: 0.8713256811662576, iteration: 282903
loss: 0.9623618721961975,grad_norm: 0.9999990460628483, iteration: 282904
loss: 1.1167387962341309,grad_norm: 0.9836607834612232, iteration: 282905
loss: 1.026840090751648,grad_norm: 0.8717330793791533, iteration: 282906
loss: 1.0773184299468994,grad_norm: 0.9405478588779117, iteration: 282907
loss: 0.9889942407608032,grad_norm: 0.8084971706406504, iteration: 282908
loss: 1.0088493824005127,grad_norm: 0.8685205741502214, iteration: 282909
loss: 1.0405219793319702,grad_norm: 0.999999244239051, iteration: 282910
loss: 1.0052253007888794,grad_norm: 0.9624028129486197, iteration: 282911
loss: 0.9455999135971069,grad_norm: 0.9016614585029777, iteration: 282912
loss: 1.0290969610214233,grad_norm: 0.7775684517205215, iteration: 282913
loss: 0.964850902557373,grad_norm: 0.7130972838238803, iteration: 282914
loss: 1.0275259017944336,grad_norm: 0.7228723902840198, iteration: 282915
loss: 1.0056335926055908,grad_norm: 0.9058571589328646, iteration: 282916
loss: 0.992536187171936,grad_norm: 0.8625839963550079, iteration: 282917
loss: 1.1284253597259521,grad_norm: 0.9144239344850116, iteration: 282918
loss: 1.0167856216430664,grad_norm: 0.9623958033026967, iteration: 282919
loss: 1.0606153011322021,grad_norm: 0.9999992620629774, iteration: 282920
loss: 1.029707431793213,grad_norm: 0.9999999835564463, iteration: 282921
loss: 1.0103193521499634,grad_norm: 0.7781177103523245, iteration: 282922
loss: 1.0132787227630615,grad_norm: 0.7514980159085156, iteration: 282923
loss: 0.995027482509613,grad_norm: 0.952061902870774, iteration: 282924
loss: 0.9993495345115662,grad_norm: 0.9467626198002176, iteration: 282925
loss: 1.0218031406402588,grad_norm: 0.847542361483809, iteration: 282926
loss: 1.0388212203979492,grad_norm: 0.981802369997169, iteration: 282927
loss: 0.9968306422233582,grad_norm: 0.6993622036877946, iteration: 282928
loss: 1.0107489824295044,grad_norm: 0.8830754076613656, iteration: 282929
loss: 0.9929293394088745,grad_norm: 0.9999989477462248, iteration: 282930
loss: 1.014938473701477,grad_norm: 0.9826063917301875, iteration: 282931
loss: 1.0039511919021606,grad_norm: 0.7727059629816709, iteration: 282932
loss: 1.0346441268920898,grad_norm: 0.9999996031013224, iteration: 282933
loss: 0.979951798915863,grad_norm: 0.9308451679018377, iteration: 282934
loss: 1.0298712253570557,grad_norm: 0.9490475190156648, iteration: 282935
loss: 0.9859870672225952,grad_norm: 0.7819782788736401, iteration: 282936
loss: 0.9965028166770935,grad_norm: 0.9999990786362434, iteration: 282937
loss: 0.9992045760154724,grad_norm: 0.8653727755493227, iteration: 282938
loss: 0.9972082376480103,grad_norm: 0.8211272613720129, iteration: 282939
loss: 1.0165547132492065,grad_norm: 0.9705163952612607, iteration: 282940
loss: 0.9775305390357971,grad_norm: 0.8071045573305611, iteration: 282941
loss: 0.9790399074554443,grad_norm: 0.811730939852733, iteration: 282942
loss: 0.9815055727958679,grad_norm: 0.9164076341854273, iteration: 282943
loss: 0.9800564050674438,grad_norm: 0.8471266329130092, iteration: 282944
loss: 0.9999114274978638,grad_norm: 0.8843428838616993, iteration: 282945
loss: 1.013877272605896,grad_norm: 0.8511851518297493, iteration: 282946
loss: 1.0069273710250854,grad_norm: 0.898973590705774, iteration: 282947
loss: 1.009359359741211,grad_norm: 0.790218354720268, iteration: 282948
loss: 0.9743736982345581,grad_norm: 0.8727664035665972, iteration: 282949
loss: 0.9980459213256836,grad_norm: 0.9945117495927726, iteration: 282950
loss: 1.0181174278259277,grad_norm: 0.8001902105172598, iteration: 282951
loss: 0.998966634273529,grad_norm: 0.9198116356932418, iteration: 282952
loss: 1.004568338394165,grad_norm: 0.9999991926588669, iteration: 282953
loss: 0.9716723561286926,grad_norm: 0.8739007769828483, iteration: 282954
loss: 0.9930446743965149,grad_norm: 0.8399344911973001, iteration: 282955
loss: 0.9771613478660583,grad_norm: 0.8471983983249207, iteration: 282956
loss: 1.0029484033584595,grad_norm: 0.9519846281595115, iteration: 282957
loss: 0.9938992261886597,grad_norm: 0.9723493210384274, iteration: 282958
loss: 1.075693130493164,grad_norm: 0.9999997741056611, iteration: 282959
loss: 0.9729343056678772,grad_norm: 0.9393118722350525, iteration: 282960
loss: 0.9738785028457642,grad_norm: 0.8853237838577211, iteration: 282961
loss: 0.9939484596252441,grad_norm: 0.9999992036276115, iteration: 282962
loss: 0.9895681738853455,grad_norm: 0.7454383361789089, iteration: 282963
loss: 0.9759323596954346,grad_norm: 0.9597956394344259, iteration: 282964
loss: 0.991596519947052,grad_norm: 0.9427227634908816, iteration: 282965
loss: 0.9933180212974548,grad_norm: 0.9171649497693851, iteration: 282966
loss: 1.0017063617706299,grad_norm: 0.9306069473761798, iteration: 282967
loss: 1.0052375793457031,grad_norm: 0.881612533632084, iteration: 282968
loss: 0.9915555119514465,grad_norm: 0.8399009702165667, iteration: 282969
loss: 1.03946053981781,grad_norm: 0.8931912713977032, iteration: 282970
loss: 1.02354896068573,grad_norm: 0.83781920892534, iteration: 282971
loss: 1.055967926979065,grad_norm: 0.8905415821562366, iteration: 282972
loss: 0.9988366961479187,grad_norm: 0.9181444111571314, iteration: 282973
loss: 1.0823276042938232,grad_norm: 0.9999991206434847, iteration: 282974
loss: 1.0069888830184937,grad_norm: 0.9999994691035335, iteration: 282975
loss: 1.01118803024292,grad_norm: 0.8070836712111472, iteration: 282976
loss: 0.9732586741447449,grad_norm: 0.8656638704414346, iteration: 282977
loss: 0.9841550588607788,grad_norm: 0.753175164367489, iteration: 282978
loss: 1.0115442276000977,grad_norm: 0.9800187916292327, iteration: 282979
loss: 0.9943594336509705,grad_norm: 0.7742674867623172, iteration: 282980
loss: 1.0380676984786987,grad_norm: 0.99999941852524, iteration: 282981
loss: 0.9872852563858032,grad_norm: 0.9059438424930807, iteration: 282982
loss: 1.0114558935165405,grad_norm: 0.7134663630601443, iteration: 282983
loss: 0.969254732131958,grad_norm: 0.9852067970011085, iteration: 282984
loss: 1.000514268875122,grad_norm: 0.9897683846348044, iteration: 282985
loss: 1.054142951965332,grad_norm: 0.7288365328169782, iteration: 282986
loss: 1.0096486806869507,grad_norm: 0.8583098525968307, iteration: 282987
loss: 1.028678297996521,grad_norm: 0.9570724833391544, iteration: 282988
loss: 0.9841796159744263,grad_norm: 0.8859669905084684, iteration: 282989
loss: 0.9751443266868591,grad_norm: 0.8552632720478078, iteration: 282990
loss: 0.9753034710884094,grad_norm: 0.9078037837235355, iteration: 282991
loss: 0.965172529220581,grad_norm: 0.7866744813187226, iteration: 282992
loss: 0.9780517816543579,grad_norm: 0.7711258557947334, iteration: 282993
loss: 0.9894931316375732,grad_norm: 0.7531503912430613, iteration: 282994
loss: 1.0116833448410034,grad_norm: 0.9169099715900361, iteration: 282995
loss: 0.992943286895752,grad_norm: 0.6979798677152301, iteration: 282996
loss: 1.0371543169021606,grad_norm: 0.9999991784398811, iteration: 282997
loss: 0.9763060808181763,grad_norm: 0.968380894491945, iteration: 282998
loss: 0.9940168261528015,grad_norm: 0.8165260465676935, iteration: 282999
loss: 1.0166822671890259,grad_norm: 0.779772358997934, iteration: 283000
loss: 0.9903213381767273,grad_norm: 0.8310040196651796, iteration: 283001
loss: 0.9947014451026917,grad_norm: 0.809506283516341, iteration: 283002
loss: 0.995307981967926,grad_norm: 0.8598695055832446, iteration: 283003
loss: 1.0073678493499756,grad_norm: 0.885599447608835, iteration: 283004
loss: 0.977828323841095,grad_norm: 0.6968202009065285, iteration: 283005
loss: 0.9877042770385742,grad_norm: 0.9999989826461445, iteration: 283006
loss: 1.0352420806884766,grad_norm: 0.9999992530874179, iteration: 283007
loss: 1.0180833339691162,grad_norm: 0.7760431727443832, iteration: 283008
loss: 1.0269490480422974,grad_norm: 0.9721873412507579, iteration: 283009
loss: 1.1697622537612915,grad_norm: 0.9999995640783504, iteration: 283010
loss: 0.941408097743988,grad_norm: 0.8213863689663893, iteration: 283011
loss: 1.0100971460342407,grad_norm: 0.7897363768923488, iteration: 283012
loss: 0.971294641494751,grad_norm: 0.9555717485233679, iteration: 283013
loss: 1.0140200853347778,grad_norm: 0.8176896435693958, iteration: 283014
loss: 0.9769073724746704,grad_norm: 0.7240902791474434, iteration: 283015
loss: 0.958915114402771,grad_norm: 0.7682912424092817, iteration: 283016
loss: 1.0145097970962524,grad_norm: 0.8590487168639755, iteration: 283017
loss: 1.0211467742919922,grad_norm: 0.9478603906259648, iteration: 283018
loss: 0.9730672836303711,grad_norm: 0.7335448474933091, iteration: 283019
loss: 0.9971625804901123,grad_norm: 0.815595845700326, iteration: 283020
loss: 1.0215492248535156,grad_norm: 0.7422020794118043, iteration: 283021
loss: 1.0051523447036743,grad_norm: 0.7121227653895007, iteration: 283022
loss: 1.0370886325836182,grad_norm: 0.6854262271799331, iteration: 283023
loss: 1.0683213472366333,grad_norm: 0.9999992463803928, iteration: 283024
loss: 0.9909867644309998,grad_norm: 0.9999992002167124, iteration: 283025
loss: 1.022969365119934,grad_norm: 0.8174915658886486, iteration: 283026
loss: 0.9998010993003845,grad_norm: 0.9298625637436626, iteration: 283027
loss: 1.000917911529541,grad_norm: 0.9613960447403846, iteration: 283028
loss: 1.0138107538223267,grad_norm: 0.9999991268529596, iteration: 283029
loss: 1.015234351158142,grad_norm: 0.8970647709455171, iteration: 283030
loss: 0.9919878840446472,grad_norm: 0.7779586287203752, iteration: 283031
loss: 1.0188034772872925,grad_norm: 0.8155451320540155, iteration: 283032
loss: 0.9817706346511841,grad_norm: 0.9999991145235724, iteration: 283033
loss: 1.028740644454956,grad_norm: 0.9634488150421255, iteration: 283034
loss: 0.9765909314155579,grad_norm: 0.7000649773337094, iteration: 283035
loss: 0.9591866731643677,grad_norm: 0.8529022822360997, iteration: 283036
loss: 1.0278114080429077,grad_norm: 0.766840576371733, iteration: 283037
loss: 1.002978801727295,grad_norm: 0.7793281404113834, iteration: 283038
loss: 1.003009557723999,grad_norm: 0.8965575649964709, iteration: 283039
loss: 1.018473744392395,grad_norm: 0.9999990758289599, iteration: 283040
loss: 1.0062974691390991,grad_norm: 0.9999991083646065, iteration: 283041
loss: 1.0036451816558838,grad_norm: 0.9999990455074299, iteration: 283042
loss: 1.0210468769073486,grad_norm: 0.8944168699740119, iteration: 283043
loss: 1.0142019987106323,grad_norm: 0.9999991519860653, iteration: 283044
loss: 1.0112700462341309,grad_norm: 0.8976369994131058, iteration: 283045
loss: 1.0822163820266724,grad_norm: 0.9999991471922067, iteration: 283046
loss: 1.0012412071228027,grad_norm: 0.8332067191025554, iteration: 283047
loss: 1.0123662948608398,grad_norm: 0.9999992249281382, iteration: 283048
loss: 0.9921183586120605,grad_norm: 0.7437072341709132, iteration: 283049
loss: 1.0080618858337402,grad_norm: 0.7512065513300651, iteration: 283050
loss: 1.0198763608932495,grad_norm: 0.9999998995399881, iteration: 283051
loss: 1.0321450233459473,grad_norm: 0.9999996264826084, iteration: 283052
loss: 1.0013397932052612,grad_norm: 0.9969235314249927, iteration: 283053
loss: 1.0652889013290405,grad_norm: 0.9999992839972417, iteration: 283054
loss: 1.1206071376800537,grad_norm: 0.999999110486917, iteration: 283055
loss: 1.0243624448776245,grad_norm: 0.6972770606948383, iteration: 283056
loss: 1.0011014938354492,grad_norm: 0.8791926878234717, iteration: 283057
loss: 1.0065466165542603,grad_norm: 0.8230674071028354, iteration: 283058
loss: 0.9832634925842285,grad_norm: 0.8530150736462305, iteration: 283059
loss: 0.9707052707672119,grad_norm: 0.8444462196956994, iteration: 283060
loss: 0.9992704391479492,grad_norm: 0.8384465975597366, iteration: 283061
loss: 1.01087486743927,grad_norm: 0.8257856594635354, iteration: 283062
loss: 0.9902070164680481,grad_norm: 0.8184463976253943, iteration: 283063
loss: 0.9903210997581482,grad_norm: 0.8965572478351529, iteration: 283064
loss: 0.9829236268997192,grad_norm: 0.8852628437527489, iteration: 283065
loss: 0.9931477308273315,grad_norm: 0.999999056286135, iteration: 283066
loss: 1.0021921396255493,grad_norm: 0.9016227535464613, iteration: 283067
loss: 1.0060042142868042,grad_norm: 0.8228949142418683, iteration: 283068
loss: 0.9558552503585815,grad_norm: 0.8825157745143788, iteration: 283069
loss: 1.044614315032959,grad_norm: 0.999999545675442, iteration: 283070
loss: 1.0266954898834229,grad_norm: 0.8364814066867495, iteration: 283071
loss: 0.9768233895301819,grad_norm: 0.7703295208587038, iteration: 283072
loss: 1.0097439289093018,grad_norm: 0.9250016761280904, iteration: 283073
loss: 1.0864237546920776,grad_norm: 0.9999992928993904, iteration: 283074
loss: 1.0013521909713745,grad_norm: 0.9004808254583275, iteration: 283075
loss: 0.9916173815727234,grad_norm: 0.9950303147904286, iteration: 283076
loss: 0.995639443397522,grad_norm: 0.9999992195575134, iteration: 283077
loss: 0.9957098364830017,grad_norm: 0.8067032217121111, iteration: 283078
loss: 1.013717532157898,grad_norm: 0.9999996553745261, iteration: 283079
loss: 0.9868210554122925,grad_norm: 0.7870693493288717, iteration: 283080
loss: 0.9623352885246277,grad_norm: 0.8168626174036169, iteration: 283081
loss: 1.008131980895996,grad_norm: 0.832843238634811, iteration: 283082
loss: 1.0229178667068481,grad_norm: 0.999999546051955, iteration: 283083
loss: 1.0324773788452148,grad_norm: 0.796747051308427, iteration: 283084
loss: 1.0150829553604126,grad_norm: 0.9681678210280049, iteration: 283085
loss: 1.008226990699768,grad_norm: 0.7532362143288646, iteration: 283086
loss: 0.9971780776977539,grad_norm: 0.946014980866836, iteration: 283087
loss: 0.9583274126052856,grad_norm: 0.7885645571946173, iteration: 283088
loss: 1.004248023033142,grad_norm: 0.9206484943554022, iteration: 283089
loss: 1.0151914358139038,grad_norm: 0.806833690876916, iteration: 283090
loss: 0.9960730075836182,grad_norm: 0.8035669971633174, iteration: 283091
loss: 1.0204793214797974,grad_norm: 0.8921045339365486, iteration: 283092
loss: 0.9766787886619568,grad_norm: 0.8924117957641721, iteration: 283093
loss: 1.042233943939209,grad_norm: 0.7751576134892677, iteration: 283094
loss: 1.0257610082626343,grad_norm: 0.9999995026995173, iteration: 283095
loss: 1.0256043672561646,grad_norm: 0.9999994716320768, iteration: 283096
loss: 0.9865292310714722,grad_norm: 0.8442096056368874, iteration: 283097
loss: 1.034279227256775,grad_norm: 0.8540128656887812, iteration: 283098
loss: 1.099721908569336,grad_norm: 0.9596464979776547, iteration: 283099
loss: 1.0074987411499023,grad_norm: 0.7512267951462144, iteration: 283100
loss: 0.9770627617835999,grad_norm: 0.9999990906151901, iteration: 283101
loss: 0.9896796941757202,grad_norm: 0.9999991966541819, iteration: 283102
loss: 1.0119731426239014,grad_norm: 0.8737946393867932, iteration: 283103
loss: 1.0041636228561401,grad_norm: 0.7924059211749281, iteration: 283104
loss: 1.0096570253372192,grad_norm: 0.8403480696226749, iteration: 283105
loss: 1.022262454032898,grad_norm: 0.9999992303189581, iteration: 283106
loss: 1.0154249668121338,grad_norm: 0.8503374308499696, iteration: 283107
loss: 1.0113165378570557,grad_norm: 0.8854223519886503, iteration: 283108
loss: 1.0020322799682617,grad_norm: 0.8323380280398651, iteration: 283109
loss: 0.9844443798065186,grad_norm: 0.8243719120111499, iteration: 283110
loss: 1.0163018703460693,grad_norm: 0.8762181893110411, iteration: 283111
loss: 1.2177964448928833,grad_norm: 0.9999994257743148, iteration: 283112
loss: 0.9698870182037354,grad_norm: 0.7634259037774257, iteration: 283113
loss: 1.0347437858581543,grad_norm: 0.9999999137342134, iteration: 283114
loss: 0.9702512621879578,grad_norm: 0.8866318309279203, iteration: 283115
loss: 0.9995719790458679,grad_norm: 0.8314249985466612, iteration: 283116
loss: 1.1559162139892578,grad_norm: 0.9999995729109252, iteration: 283117
loss: 1.0252197980880737,grad_norm: 0.8202060761654256, iteration: 283118
loss: 0.9801568388938904,grad_norm: 0.9701645477747749, iteration: 283119
loss: 1.0963196754455566,grad_norm: 0.999999301100879, iteration: 283120
loss: 0.9806622862815857,grad_norm: 0.8423834216266183, iteration: 283121
loss: 1.0558184385299683,grad_norm: 0.9158521936067927, iteration: 283122
loss: 0.9919151663780212,grad_norm: 0.9796921661379175, iteration: 283123
loss: 1.0208672285079956,grad_norm: 0.7929011750194283, iteration: 283124
loss: 0.9920483827590942,grad_norm: 0.8507107424710743, iteration: 283125
loss: 1.033154010772705,grad_norm: 0.9571860667484537, iteration: 283126
loss: 1.0168554782867432,grad_norm: 0.8322650937633262, iteration: 283127
loss: 0.9628952145576477,grad_norm: 0.9269864057433944, iteration: 283128
loss: 0.9775914549827576,grad_norm: 0.999999131048676, iteration: 283129
loss: 1.018169641494751,grad_norm: 0.7680148415952567, iteration: 283130
loss: 1.000179409980774,grad_norm: 0.828114428429524, iteration: 283131
loss: 1.0098081827163696,grad_norm: 0.8705406628093488, iteration: 283132
loss: 1.0042983293533325,grad_norm: 0.9999991211324837, iteration: 283133
loss: 1.012471079826355,grad_norm: 0.9422469038390602, iteration: 283134
loss: 1.0141302347183228,grad_norm: 0.7193658347496193, iteration: 283135
loss: 1.0084096193313599,grad_norm: 0.8704909753122206, iteration: 283136
loss: 1.0227192640304565,grad_norm: 0.7764746940540759, iteration: 283137
loss: 1.0334504842758179,grad_norm: 0.9999998760452189, iteration: 283138
loss: 1.034970760345459,grad_norm: 0.8866244084426108, iteration: 283139
loss: 1.0252918004989624,grad_norm: 0.886233927662937, iteration: 283140
loss: 1.0293700695037842,grad_norm: 0.7863862858840813, iteration: 283141
loss: 1.0192456245422363,grad_norm: 0.9313346600447996, iteration: 283142
loss: 0.9957111477851868,grad_norm: 0.8758475080507465, iteration: 283143
loss: 1.0324021577835083,grad_norm: 0.9212034058732053, iteration: 283144
loss: 1.0162785053253174,grad_norm: 0.8061055940773734, iteration: 283145
loss: 1.0843907594680786,grad_norm: 0.9999997033742716, iteration: 283146
loss: 0.9878843426704407,grad_norm: 0.9544679389417003, iteration: 283147
loss: 1.0297155380249023,grad_norm: 0.9305488699966148, iteration: 283148
loss: 1.0016674995422363,grad_norm: 0.744870141581469, iteration: 283149
loss: 1.067058801651001,grad_norm: 0.935602941199789, iteration: 283150
loss: 1.0049123764038086,grad_norm: 0.9367792328800779, iteration: 283151
loss: 1.0604287385940552,grad_norm: 0.9999991177565334, iteration: 283152
loss: 1.0255204439163208,grad_norm: 0.8988499761277696, iteration: 283153
loss: 0.9730322957038879,grad_norm: 0.9177939072264624, iteration: 283154
loss: 1.0291532278060913,grad_norm: 0.8347116037272886, iteration: 283155
loss: 1.0011221170425415,grad_norm: 0.9999995970506776, iteration: 283156
loss: 1.0139186382293701,grad_norm: 0.6919722323531867, iteration: 283157
loss: 0.9943652749061584,grad_norm: 0.8760015160725875, iteration: 283158
loss: 0.999430239200592,grad_norm: 0.9139704086207329, iteration: 283159
loss: 0.9681325554847717,grad_norm: 0.7687067257468858, iteration: 283160
loss: 1.0568222999572754,grad_norm: 0.9999998246990571, iteration: 283161
loss: 0.9660757184028625,grad_norm: 0.8930560669500232, iteration: 283162
loss: 0.9813564419746399,grad_norm: 0.9999990758625238, iteration: 283163
loss: 1.0301278829574585,grad_norm: 0.9999991064404077, iteration: 283164
loss: 0.9623900055885315,grad_norm: 0.9392822731209086, iteration: 283165
loss: 0.997921347618103,grad_norm: 0.9999990975123132, iteration: 283166
loss: 0.9847346544265747,grad_norm: 0.7381621148443653, iteration: 283167
loss: 0.9941304922103882,grad_norm: 0.7414716612672186, iteration: 283168
loss: 0.9832556843757629,grad_norm: 0.8814409170802157, iteration: 283169
loss: 1.0409258604049683,grad_norm: 0.9741687898044444, iteration: 283170
loss: 1.0088298320770264,grad_norm: 0.7453970743821147, iteration: 283171
loss: 0.9856324791908264,grad_norm: 0.8249516079766287, iteration: 283172
loss: 0.9858253002166748,grad_norm: 0.8708451667527752, iteration: 283173
loss: 1.0285067558288574,grad_norm: 0.9999998799154989, iteration: 283174
loss: 0.9955427050590515,grad_norm: 0.7914327576479088, iteration: 283175
loss: 1.0036582946777344,grad_norm: 0.7702366201254299, iteration: 283176
loss: 1.0290356874465942,grad_norm: 0.9257082755320657, iteration: 283177
loss: 0.9435779452323914,grad_norm: 0.760770378435756, iteration: 283178
loss: 0.9960721731185913,grad_norm: 0.9999991497370606, iteration: 283179
loss: 0.9969682097434998,grad_norm: 0.9999990875814155, iteration: 283180
loss: 1.0178906917572021,grad_norm: 0.8748152263048512, iteration: 283181
loss: 1.0015860795974731,grad_norm: 0.8723598177365481, iteration: 283182
loss: 0.9946014285087585,grad_norm: 0.9953534133445464, iteration: 283183
loss: 1.0100035667419434,grad_norm: 0.8878434127775796, iteration: 283184
loss: 1.002630591392517,grad_norm: 0.9999990796219119, iteration: 283185
loss: 0.9687718749046326,grad_norm: 0.879469182613946, iteration: 283186
loss: 1.0311392545700073,grad_norm: 0.8694631338283093, iteration: 283187
loss: 1.0334256887435913,grad_norm: 0.9999991327963322, iteration: 283188
loss: 1.0577000379562378,grad_norm: 0.9999990216408369, iteration: 283189
loss: 1.0220195055007935,grad_norm: 0.8106149051298424, iteration: 283190
loss: 1.1023178100585938,grad_norm: 0.9999999991584909, iteration: 283191
loss: 0.9985340237617493,grad_norm: 0.9564266580870155, iteration: 283192
loss: 0.9893280863761902,grad_norm: 0.9999991448381073, iteration: 283193
loss: 0.9665062427520752,grad_norm: 0.8498608392842225, iteration: 283194
loss: 1.0262231826782227,grad_norm: 0.7061406694618061, iteration: 283195
loss: 1.0247747898101807,grad_norm: 0.9999988178220706, iteration: 283196
loss: 0.9558566808700562,grad_norm: 0.8575793960341996, iteration: 283197
loss: 1.0196267366409302,grad_norm: 0.7628219405879939, iteration: 283198
loss: 1.0109531879425049,grad_norm: 0.9931447881509453, iteration: 283199
loss: 0.9932437539100647,grad_norm: 0.737474322236491, iteration: 283200
loss: 0.9556071162223816,grad_norm: 0.9351597517604187, iteration: 283201
loss: 0.9760497212409973,grad_norm: 0.7060923940363466, iteration: 283202
loss: 1.0081462860107422,grad_norm: 0.9655955602729855, iteration: 283203
loss: 1.035529613494873,grad_norm: 0.9392428731975228, iteration: 283204
loss: 0.9813437461853027,grad_norm: 0.8382870920408696, iteration: 283205
loss: 1.0337940454483032,grad_norm: 0.9999992706471534, iteration: 283206
loss: 0.9977611899375916,grad_norm: 0.9999990554314491, iteration: 283207
loss: 1.0350337028503418,grad_norm: 0.9999990467346838, iteration: 283208
loss: 1.0024906396865845,grad_norm: 0.9999991765462386, iteration: 283209
loss: 0.9899783134460449,grad_norm: 0.6961835281971746, iteration: 283210
loss: 0.9860776662826538,grad_norm: 0.8696178764987975, iteration: 283211
loss: 0.9542621970176697,grad_norm: 0.837153490494786, iteration: 283212
loss: 0.9975881576538086,grad_norm: 0.9162402448557122, iteration: 283213
loss: 1.0416210889816284,grad_norm: 0.7750656784582544, iteration: 283214
loss: 0.966649055480957,grad_norm: 0.9999990833415072, iteration: 283215
loss: 1.1427862644195557,grad_norm: 0.9999997118180358, iteration: 283216
loss: 1.0073274374008179,grad_norm: 0.7362004540286311, iteration: 283217
loss: 0.9982401728630066,grad_norm: 0.8453834822000952, iteration: 283218
loss: 1.0462646484375,grad_norm: 0.9999993443935897, iteration: 283219
loss: 1.072015404701233,grad_norm: 0.9999997255062025, iteration: 283220
loss: 1.0267281532287598,grad_norm: 0.7804825411443422, iteration: 283221
loss: 0.9869760870933533,grad_norm: 0.761321062033062, iteration: 283222
loss: 1.003490686416626,grad_norm: 0.9190256389553684, iteration: 283223
loss: 1.0709773302078247,grad_norm: 0.8670758053586944, iteration: 283224
loss: 1.00509512424469,grad_norm: 0.7761813936812432, iteration: 283225
loss: 0.9944334030151367,grad_norm: 0.9999989942768487, iteration: 283226
loss: 1.0022021532058716,grad_norm: 0.9999990171483164, iteration: 283227
loss: 0.9820681214332581,grad_norm: 0.7558540787439525, iteration: 283228
loss: 1.010858416557312,grad_norm: 0.7274426560884379, iteration: 283229
loss: 1.0416173934936523,grad_norm: 0.9999991199248051, iteration: 283230
loss: 1.0057384967803955,grad_norm: 0.7932797722924582, iteration: 283231
loss: 1.0192328691482544,grad_norm: 0.922850062486291, iteration: 283232
loss: 1.051131248474121,grad_norm: 0.999999508762294, iteration: 283233
loss: 1.0217339992523193,grad_norm: 0.7314699447257609, iteration: 283234
loss: 0.9890925884246826,grad_norm: 0.8417857686799378, iteration: 283235
loss: 0.982413649559021,grad_norm: 0.9999990025754996, iteration: 283236
loss: 0.987313985824585,grad_norm: 0.9602628232665553, iteration: 283237
loss: 1.0157630443572998,grad_norm: 0.9999997623309834, iteration: 283238
loss: 1.0148555040359497,grad_norm: 0.8186667956126765, iteration: 283239
loss: 0.9958059191703796,grad_norm: 0.8245073608867062, iteration: 283240
loss: 0.996364414691925,grad_norm: 0.848122311834233, iteration: 283241
loss: 1.0038645267486572,grad_norm: 0.8562041015847679, iteration: 283242
loss: 0.9660079479217529,grad_norm: 0.8665106717440391, iteration: 283243
loss: 1.013805866241455,grad_norm: 0.9102925573542832, iteration: 283244
loss: 0.9803394675254822,grad_norm: 0.9020099937464947, iteration: 283245
loss: 1.0145232677459717,grad_norm: 0.821078469833361, iteration: 283246
loss: 0.9767020344734192,grad_norm: 0.800356135301955, iteration: 283247
loss: 0.9416162967681885,grad_norm: 0.9394452920623253, iteration: 283248
loss: 1.0929228067398071,grad_norm: 0.9999996805028649, iteration: 283249
loss: 0.9926466345787048,grad_norm: 0.999999097706278, iteration: 283250
loss: 0.9968073964118958,grad_norm: 0.8982794997387287, iteration: 283251
loss: 1.0280983448028564,grad_norm: 0.8241981360207087, iteration: 283252
loss: 1.0128980875015259,grad_norm: 0.9163930271514803, iteration: 283253
loss: 0.9958405494689941,grad_norm: 0.6483709011739229, iteration: 283254
loss: 1.0079127550125122,grad_norm: 0.9752274895589599, iteration: 283255
loss: 0.994676411151886,grad_norm: 0.8931468151193169, iteration: 283256
loss: 0.9847819209098816,grad_norm: 0.8480269314007175, iteration: 283257
loss: 1.0148066282272339,grad_norm: 0.7680021748759198, iteration: 283258
loss: 1.0045350790023804,grad_norm: 0.8596380549788254, iteration: 283259
loss: 0.9658325910568237,grad_norm: 0.8400834017173767, iteration: 283260
loss: 1.0175660848617554,grad_norm: 0.8563712573461922, iteration: 283261
loss: 0.9654086828231812,grad_norm: 0.8519486731853174, iteration: 283262
loss: 0.9666727781295776,grad_norm: 0.8891542143005811, iteration: 283263
loss: 1.0189979076385498,grad_norm: 0.9999990154508982, iteration: 283264
loss: 0.9942778944969177,grad_norm: 0.7555286374654366, iteration: 283265
loss: 0.9871808290481567,grad_norm: 0.7626111336332221, iteration: 283266
loss: 1.096007227897644,grad_norm: 0.9999992898153869, iteration: 283267
loss: 0.98250812292099,grad_norm: 0.9191501223162931, iteration: 283268
loss: 1.0045911073684692,grad_norm: 0.9999991355393465, iteration: 283269
loss: 0.9945361614227295,grad_norm: 0.9999994432998739, iteration: 283270
loss: 1.023311734199524,grad_norm: 0.9090960190658579, iteration: 283271
loss: 0.9568997025489807,grad_norm: 0.9999990959742784, iteration: 283272
loss: 0.996712327003479,grad_norm: 0.8553422658966894, iteration: 283273
loss: 0.9872136116027832,grad_norm: 0.8508617804098211, iteration: 283274
loss: 1.008179783821106,grad_norm: 0.9117540371660935, iteration: 283275
loss: 0.9745438694953918,grad_norm: 0.824742372278959, iteration: 283276
loss: 0.9866355657577515,grad_norm: 0.807456749794556, iteration: 283277
loss: 1.0242061614990234,grad_norm: 0.7334187504630196, iteration: 283278
loss: 1.0168648958206177,grad_norm: 0.7904041886526517, iteration: 283279
loss: 1.0457589626312256,grad_norm: 0.8255050331677847, iteration: 283280
loss: 1.0679196119308472,grad_norm: 0.9999990221722723, iteration: 283281
loss: 1.05152428150177,grad_norm: 0.9999996213579406, iteration: 283282
loss: 0.9895334243774414,grad_norm: 0.9434260739369991, iteration: 283283
loss: 0.9954848289489746,grad_norm: 0.9423500402709727, iteration: 283284
loss: 1.0353041887283325,grad_norm: 0.8800023491794913, iteration: 283285
loss: 1.026565432548523,grad_norm: 0.9999991252783773, iteration: 283286
loss: 0.9868167638778687,grad_norm: 0.8038461774607871, iteration: 283287
loss: 1.0023791790008545,grad_norm: 0.9070603602587852, iteration: 283288
loss: 1.0284160375595093,grad_norm: 0.8890052557105054, iteration: 283289
loss: 0.9948694109916687,grad_norm: 0.999999181216446, iteration: 283290
loss: 1.0223580598831177,grad_norm: 0.8405961310220599, iteration: 283291
loss: 1.0011882781982422,grad_norm: 0.8936522874030958, iteration: 283292
loss: 0.9976155161857605,grad_norm: 0.999999555876266, iteration: 283293
loss: 1.0129060745239258,grad_norm: 0.7889041131312301, iteration: 283294
loss: 1.0209006071090698,grad_norm: 0.8628935885430389, iteration: 283295
loss: 1.02460777759552,grad_norm: 0.7891589805073486, iteration: 283296
loss: 1.0061854124069214,grad_norm: 0.9999993537723078, iteration: 283297
loss: 1.004509687423706,grad_norm: 0.8087924409631704, iteration: 283298
loss: 1.0046311616897583,grad_norm: 0.8099163367873957, iteration: 283299
loss: 1.0256497859954834,grad_norm: 0.7813803645701743, iteration: 283300
loss: 1.0034977197647095,grad_norm: 0.7858173603523348, iteration: 283301
loss: 0.9867097735404968,grad_norm: 0.932813465664357, iteration: 283302
loss: 0.9931852221488953,grad_norm: 0.7902459188642426, iteration: 283303
loss: 0.9927325248718262,grad_norm: 0.9261698531843372, iteration: 283304
loss: 0.9967389702796936,grad_norm: 0.7593901436050203, iteration: 283305
loss: 1.0044599771499634,grad_norm: 0.8326867672567795, iteration: 283306
loss: 0.9696049690246582,grad_norm: 0.8044593392760622, iteration: 283307
loss: 0.9566176533699036,grad_norm: 0.7375937326202839, iteration: 283308
loss: 0.9627231955528259,grad_norm: 0.7692253377363397, iteration: 283309
loss: 0.9778628349304199,grad_norm: 0.7353880176194503, iteration: 283310
loss: 0.9969421625137329,grad_norm: 0.9371772697210672, iteration: 283311
loss: 1.0088798999786377,grad_norm: 0.9999990098443158, iteration: 283312
loss: 0.9736828207969666,grad_norm: 0.9785665134229392, iteration: 283313
loss: 1.0429863929748535,grad_norm: 0.9999992764253769, iteration: 283314
loss: 0.9721300601959229,grad_norm: 0.8842412169220231, iteration: 283315
loss: 1.0233850479125977,grad_norm: 0.9999999030521818, iteration: 283316
loss: 1.0190935134887695,grad_norm: 0.9749810037212713, iteration: 283317
loss: 0.9656580090522766,grad_norm: 0.7986537558252405, iteration: 283318
loss: 0.9921760559082031,grad_norm: 0.9999991009909124, iteration: 283319
loss: 0.9856215119361877,grad_norm: 0.9999991794590091, iteration: 283320
loss: 0.9875034093856812,grad_norm: 0.6857077449577718, iteration: 283321
loss: 0.9930822253227234,grad_norm: 0.8779195849053854, iteration: 283322
loss: 0.9754816889762878,grad_norm: 0.7789043418317333, iteration: 283323
loss: 0.9889311194419861,grad_norm: 0.8056590419506942, iteration: 283324
loss: 0.9545269012451172,grad_norm: 0.8444399878493541, iteration: 283325
loss: 0.9918498992919922,grad_norm: 0.8939600695654982, iteration: 283326
loss: 1.018731951713562,grad_norm: 0.9199842702816168, iteration: 283327
loss: 0.988400936126709,grad_norm: 0.8644422668800654, iteration: 283328
loss: 0.9908804893493652,grad_norm: 0.9738575094308076, iteration: 283329
loss: 1.0381736755371094,grad_norm: 0.8485687523404226, iteration: 283330
loss: 0.9802595376968384,grad_norm: 0.9424721546779754, iteration: 283331
loss: 0.9973607659339905,grad_norm: 0.8052767500103571, iteration: 283332
loss: 0.9869093894958496,grad_norm: 0.7493324520573863, iteration: 283333
loss: 0.9956204891204834,grad_norm: 0.9979229719626379, iteration: 283334
loss: 0.9824112057685852,grad_norm: 0.8587768404462102, iteration: 283335
loss: 1.0056438446044922,grad_norm: 0.7363515508094947, iteration: 283336
loss: 1.0047156810760498,grad_norm: 0.9007838597500336, iteration: 283337
loss: 0.9859287142753601,grad_norm: 0.9999991335797596, iteration: 283338
loss: 1.0216776132583618,grad_norm: 0.9999990163260885, iteration: 283339
loss: 1.0076578855514526,grad_norm: 0.8511905025645018, iteration: 283340
loss: 0.9950904250144958,grad_norm: 0.9999993009454053, iteration: 283341
loss: 1.0911643505096436,grad_norm: 0.9999992583715708, iteration: 283342
loss: 1.0787544250488281,grad_norm: 0.9999992717796765, iteration: 283343
loss: 0.984574556350708,grad_norm: 0.7878522383764165, iteration: 283344
loss: 1.021878719329834,grad_norm: 0.7086545747546217, iteration: 283345
loss: 1.0298186540603638,grad_norm: 0.9999996418963807, iteration: 283346
loss: 1.0253552198410034,grad_norm: 0.8294508164616318, iteration: 283347
loss: 1.0206210613250732,grad_norm: 0.9701250059794334, iteration: 283348
loss: 0.9570624828338623,grad_norm: 0.7901686623450971, iteration: 283349
loss: 1.0292792320251465,grad_norm: 0.8888779883587695, iteration: 283350
loss: 1.0220292806625366,grad_norm: 0.9999992955523445, iteration: 283351
loss: 1.011443853378296,grad_norm: 0.8313037386587944, iteration: 283352
loss: 1.0018086433410645,grad_norm: 0.9468449062855592, iteration: 283353
loss: 0.9995715618133545,grad_norm: 0.710798443544815, iteration: 283354
loss: 1.0033533573150635,grad_norm: 0.97895267206013, iteration: 283355
loss: 1.0323984622955322,grad_norm: 0.9999989030811841, iteration: 283356
loss: 1.0216315984725952,grad_norm: 0.9009039589052923, iteration: 283357
loss: 0.9910376667976379,grad_norm: 0.7296568482774404, iteration: 283358
loss: 0.9663389921188354,grad_norm: 0.8348833958241001, iteration: 283359
loss: 0.9792578816413879,grad_norm: 0.843880272952597, iteration: 283360
loss: 0.993407130241394,grad_norm: 0.8660587413681192, iteration: 283361
loss: 1.0142688751220703,grad_norm: 0.7619402005017479, iteration: 283362
loss: 0.9486151933670044,grad_norm: 0.8545306263578648, iteration: 283363
loss: 1.0118392705917358,grad_norm: 0.9179970181579397, iteration: 283364
loss: 0.9941866993904114,grad_norm: 0.9599201761541987, iteration: 283365
loss: 0.9514548182487488,grad_norm: 0.9631969518028277, iteration: 283366
loss: 1.047523856163025,grad_norm: 0.9490115189804761, iteration: 283367
loss: 0.9861098527908325,grad_norm: 0.8540167429989415, iteration: 283368
loss: 1.0858218669891357,grad_norm: 1.0000000530947577, iteration: 283369
loss: 0.9920616149902344,grad_norm: 0.911397854271544, iteration: 283370
loss: 0.9985717535018921,grad_norm: 0.8009908796637347, iteration: 283371
loss: 0.9953054189682007,grad_norm: 0.9023978463788191, iteration: 283372
loss: 0.9888988137245178,grad_norm: 0.7881356631657028, iteration: 283373
loss: 1.0216246843338013,grad_norm: 0.9999995802685183, iteration: 283374
loss: 1.0096358060836792,grad_norm: 0.9174317683839948, iteration: 283375
loss: 0.9947176575660706,grad_norm: 0.7259425499848136, iteration: 283376
loss: 1.021484375,grad_norm: 0.8415252206280218, iteration: 283377
loss: 1.0603864192962646,grad_norm: 0.999999410920671, iteration: 283378
loss: 1.070685863494873,grad_norm: 0.8395460594094064, iteration: 283379
loss: 0.9486042857170105,grad_norm: 0.8267081544668131, iteration: 283380
loss: 1.0807461738586426,grad_norm: 0.8243354420438765, iteration: 283381
loss: 0.9943791031837463,grad_norm: 0.9999989743356513, iteration: 283382
loss: 0.9785911440849304,grad_norm: 0.8598677283506203, iteration: 283383
loss: 0.99853515625,grad_norm: 0.865235485702984, iteration: 283384
loss: 0.9845470786094666,grad_norm: 0.9104566184862977, iteration: 283385
loss: 0.9983707666397095,grad_norm: 0.7850852118680856, iteration: 283386
loss: 1.0047591924667358,grad_norm: 0.9485033842621624, iteration: 283387
loss: 1.1600449085235596,grad_norm: 0.9999998040760056, iteration: 283388
loss: 1.0468822717666626,grad_norm: 0.9999999387035002, iteration: 283389
loss: 0.9969540238380432,grad_norm: 0.7409180138860765, iteration: 283390
loss: 0.9931330680847168,grad_norm: 0.9136153635423151, iteration: 283391
loss: 1.0542594194412231,grad_norm: 0.9999990073023727, iteration: 283392
loss: 0.9882957935333252,grad_norm: 0.7136964657453538, iteration: 283393
loss: 1.0238099098205566,grad_norm: 0.9999992087647728, iteration: 283394
loss: 1.0110009908676147,grad_norm: 0.9447630832619177, iteration: 283395
loss: 1.1176279783248901,grad_norm: 0.9999998248339349, iteration: 283396
loss: 1.119055986404419,grad_norm: 0.9999992944190521, iteration: 283397
loss: 1.000595211982727,grad_norm: 0.8218437962966165, iteration: 283398
loss: 1.0368924140930176,grad_norm: 0.8483501173305675, iteration: 283399
loss: 0.9721576571464539,grad_norm: 0.9435284300920833, iteration: 283400
loss: 0.9810628890991211,grad_norm: 0.8320035498177795, iteration: 283401
loss: 0.9610188603401184,grad_norm: 0.8667362754959693, iteration: 283402
loss: 0.9792605042457581,grad_norm: 0.821353752441893, iteration: 283403
loss: 0.9739782214164734,grad_norm: 0.9286447960965484, iteration: 283404
loss: 0.9457236528396606,grad_norm: 0.8398314540111563, iteration: 283405
loss: 1.0101877450942993,grad_norm: 0.847428329865408, iteration: 283406
loss: 1.0363242626190186,grad_norm: 0.9999992464751256, iteration: 283407
loss: 0.9926059246063232,grad_norm: 0.9999991502364582, iteration: 283408
loss: 1.0194011926651,grad_norm: 0.7755589878577843, iteration: 283409
loss: 0.9875231981277466,grad_norm: 0.7897930246408997, iteration: 283410
loss: 1.075393795967102,grad_norm: 0.8302484209668358, iteration: 283411
loss: 0.9796267747879028,grad_norm: 0.7739742007174824, iteration: 283412
loss: 0.9736204147338867,grad_norm: 0.8239733475563341, iteration: 283413
loss: 0.997246265411377,grad_norm: 0.9952904741288912, iteration: 283414
loss: 1.0753966569900513,grad_norm: 0.8766025277101236, iteration: 283415
loss: 1.0297433137893677,grad_norm: 0.9999994584849967, iteration: 283416
loss: 1.026272177696228,grad_norm: 0.9999990625734225, iteration: 283417
loss: 1.0203590393066406,grad_norm: 0.815137749517466, iteration: 283418
loss: 1.0172784328460693,grad_norm: 0.9149077744689663, iteration: 283419
loss: 0.978932797908783,grad_norm: 0.9160033541447025, iteration: 283420
loss: 1.0142903327941895,grad_norm: 0.9999990328615639, iteration: 283421
loss: 1.02406907081604,grad_norm: 0.7716700177511773, iteration: 283422
loss: 0.998683512210846,grad_norm: 0.809913378279649, iteration: 283423
loss: 0.9905542731285095,grad_norm: 0.9999990684827862, iteration: 283424
loss: 1.0296725034713745,grad_norm: 0.9334528543474276, iteration: 283425
loss: 1.018932580947876,grad_norm: 0.8075190462242768, iteration: 283426
loss: 1.0506216287612915,grad_norm: 0.9999990531264991, iteration: 283427
loss: 1.0030781030654907,grad_norm: 0.8403607193894289, iteration: 283428
loss: 1.0826483964920044,grad_norm: 0.9999994280465271, iteration: 283429
loss: 0.9962829351425171,grad_norm: 0.8861229378528157, iteration: 283430
loss: 0.9725638031959534,grad_norm: 0.9332466171677555, iteration: 283431
loss: 1.05881667137146,grad_norm: 0.9641657617741932, iteration: 283432
loss: 1.0159287452697754,grad_norm: 0.8725484268122685, iteration: 283433
loss: 1.044704556465149,grad_norm: 0.9999999297022275, iteration: 283434
loss: 0.9850454330444336,grad_norm: 0.829127151561176, iteration: 283435
loss: 1.0376179218292236,grad_norm: 0.8935551567870357, iteration: 283436
loss: 0.9644356369972229,grad_norm: 0.7907399684798426, iteration: 283437
loss: 0.9926523566246033,grad_norm: 0.7212300488048023, iteration: 283438
loss: 0.9838479161262512,grad_norm: 0.8655414404152751, iteration: 283439
loss: 0.9648868441581726,grad_norm: 0.8066773964921566, iteration: 283440
loss: 0.9828882217407227,grad_norm: 0.8023086642374155, iteration: 283441
loss: 1.0170537233352661,grad_norm: 0.8776730700159612, iteration: 283442
loss: 1.0299350023269653,grad_norm: 0.8913124174982946, iteration: 283443
loss: 1.01085364818573,grad_norm: 0.872658163159243, iteration: 283444
loss: 1.0450466871261597,grad_norm: 0.9999997894896265, iteration: 283445
loss: 1.0825084447860718,grad_norm: 0.9264101806448435, iteration: 283446
loss: 0.9287602305412292,grad_norm: 0.7955400657330844, iteration: 283447
loss: 1.065507411956787,grad_norm: 0.9999996453468549, iteration: 283448
loss: 1.0183956623077393,grad_norm: 0.9599211016877341, iteration: 283449
loss: 1.019357681274414,grad_norm: 0.8662296836702805, iteration: 283450
loss: 1.0168178081512451,grad_norm: 0.7997151027180948, iteration: 283451
loss: 0.992163360118866,grad_norm: 0.7418872971233138, iteration: 283452
loss: 1.0154244899749756,grad_norm: 0.7929975146258399, iteration: 283453
loss: 0.9876761436462402,grad_norm: 0.7840935902143278, iteration: 283454
loss: 0.9591549038887024,grad_norm: 0.8347999145625972, iteration: 283455
loss: 0.9709258079528809,grad_norm: 0.915979248508073, iteration: 283456
loss: 1.0499335527420044,grad_norm: 0.9999993931290063, iteration: 283457
loss: 0.9973729848861694,grad_norm: 0.9999996586214474, iteration: 283458
loss: 1.0443482398986816,grad_norm: 0.9999991142467014, iteration: 283459
loss: 0.9835957288742065,grad_norm: 0.7427600232684937, iteration: 283460
loss: 0.9961471557617188,grad_norm: 0.6822459718291857, iteration: 283461
loss: 1.0228700637817383,grad_norm: 0.8172227301012123, iteration: 283462
loss: 1.0235103368759155,grad_norm: 0.9976533134974274, iteration: 283463
loss: 0.9768743515014648,grad_norm: 0.9154742088472658, iteration: 283464
loss: 0.9785379767417908,grad_norm: 0.9999990099696711, iteration: 283465
loss: 1.0218783617019653,grad_norm: 0.9999993676769248, iteration: 283466
loss: 0.9906557202339172,grad_norm: 0.7878183950238925, iteration: 283467
loss: 0.9671235680580139,grad_norm: 0.8823329958350816, iteration: 283468
loss: 0.9773211479187012,grad_norm: 0.8951552491577247, iteration: 283469
loss: 0.984969973564148,grad_norm: 0.7935683038839979, iteration: 283470
loss: 1.0703343152999878,grad_norm: 0.7750875020880731, iteration: 283471
loss: 0.9917934536933899,grad_norm: 0.7765861615233421, iteration: 283472
loss: 1.1043355464935303,grad_norm: 0.9596139937147167, iteration: 283473
loss: 1.0135366916656494,grad_norm: 0.9562958462895432, iteration: 283474
loss: 0.9821277260780334,grad_norm: 0.9711978449362549, iteration: 283475
loss: 1.0255296230316162,grad_norm: 0.8777154203876584, iteration: 283476
loss: 0.9981987476348877,grad_norm: 0.9426616979120179, iteration: 283477
loss: 1.004783034324646,grad_norm: 0.9999992655498713, iteration: 283478
loss: 0.9928210377693176,grad_norm: 0.9999991743265029, iteration: 283479
loss: 0.9929953217506409,grad_norm: 0.8995263609409643, iteration: 283480
loss: 0.9835819005966187,grad_norm: 0.8428480476799325, iteration: 283481
loss: 1.0212031602859497,grad_norm: 0.9999990859498603, iteration: 283482
loss: 1.0226657390594482,grad_norm: 0.8364387274583177, iteration: 283483
loss: 1.0745301246643066,grad_norm: 0.8051363162781077, iteration: 283484
loss: 0.9844782948493958,grad_norm: 0.9999989923825212, iteration: 283485
loss: 0.9743715524673462,grad_norm: 0.9392123354579627, iteration: 283486
loss: 0.9955078363418579,grad_norm: 0.9999989573033438, iteration: 283487
loss: 1.0124659538269043,grad_norm: 0.815751346672027, iteration: 283488
loss: 1.0284218788146973,grad_norm: 0.9999991764229126, iteration: 283489
loss: 0.9475309252738953,grad_norm: 0.8524625207156443, iteration: 283490
loss: 1.0126255750656128,grad_norm: 0.9061508266892943, iteration: 283491
loss: 0.9917815923690796,grad_norm: 0.9885648168993052, iteration: 283492
loss: 1.0357310771942139,grad_norm: 0.8775811598010775, iteration: 283493
loss: 1.0073037147521973,grad_norm: 0.9999999125055162, iteration: 283494
loss: 0.968555748462677,grad_norm: 0.7758512802923097, iteration: 283495
loss: 1.0975760221481323,grad_norm: 0.9999998631196128, iteration: 283496
loss: 1.0395845174789429,grad_norm: 0.807957103833014, iteration: 283497
loss: 1.0165120363235474,grad_norm: 0.8839213735869853, iteration: 283498
loss: 1.0136140584945679,grad_norm: 0.8503121502727256, iteration: 283499
loss: 0.9938300251960754,grad_norm: 0.7289608709510432, iteration: 283500
loss: 1.011539340019226,grad_norm: 0.9374189023503762, iteration: 283501
loss: 1.0168037414550781,grad_norm: 0.8887295845830803, iteration: 283502
loss: 1.016114354133606,grad_norm: 0.9999991825626207, iteration: 283503
loss: 1.0671173334121704,grad_norm: 0.9999994720904917, iteration: 283504
loss: 0.9700412750244141,grad_norm: 0.8163955683271338, iteration: 283505
loss: 0.9877924919128418,grad_norm: 0.7981949756335895, iteration: 283506
loss: 0.9901859760284424,grad_norm: 0.7537313556403535, iteration: 283507
loss: 1.0390363931655884,grad_norm: 0.9635547583192684, iteration: 283508
loss: 1.0172520875930786,grad_norm: 0.9027844577952281, iteration: 283509
loss: 1.0077695846557617,grad_norm: 0.9617709461976615, iteration: 283510
loss: 1.0000481605529785,grad_norm: 0.7694463208926722, iteration: 283511
loss: 1.0085374116897583,grad_norm: 0.8177736774214523, iteration: 283512
loss: 0.9691161513328552,grad_norm: 0.8521229203649403, iteration: 283513
loss: 0.9588372707366943,grad_norm: 0.9415302618438277, iteration: 283514
loss: 0.9767628908157349,grad_norm: 0.907558114491109, iteration: 283515
loss: 0.9815362691879272,grad_norm: 0.972051577291976, iteration: 283516
loss: 1.0130062103271484,grad_norm: 0.90390629417417, iteration: 283517
loss: 0.9944120049476624,grad_norm: 0.8635347359103961, iteration: 283518
loss: 1.0288804769515991,grad_norm: 0.9634272402106796, iteration: 283519
loss: 1.0030248165130615,grad_norm: 0.9948072370937252, iteration: 283520
loss: 0.9930158853530884,grad_norm: 0.9257276490191779, iteration: 283521
loss: 0.9970861077308655,grad_norm: 0.9654719676485247, iteration: 283522
loss: 1.0360723733901978,grad_norm: 0.849108848595242, iteration: 283523
loss: 0.9915177822113037,grad_norm: 0.9999989907668849, iteration: 283524
loss: 1.0445616245269775,grad_norm: 0.9999998259441617, iteration: 283525
loss: 1.0107849836349487,grad_norm: 0.9999997181867045, iteration: 283526
loss: 1.013383388519287,grad_norm: 0.9647200805123164, iteration: 283527
loss: 0.931411623954773,grad_norm: 0.9336789759032111, iteration: 283528
loss: 0.9564206600189209,grad_norm: 0.7076122713399279, iteration: 283529
loss: 1.2419885396957397,grad_norm: 1.0000001128541471, iteration: 283530
loss: 1.044238805770874,grad_norm: 0.9481147546491261, iteration: 283531
loss: 0.9875956773757935,grad_norm: 0.820963874983415, iteration: 283532
loss: 0.9972931742668152,grad_norm: 0.9457593452833256, iteration: 283533
loss: 1.0713906288146973,grad_norm: 0.9999991318313888, iteration: 283534
loss: 0.9942719340324402,grad_norm: 0.8351685870544148, iteration: 283535
loss: 0.9943175315856934,grad_norm: 0.8291591752669767, iteration: 283536
loss: 1.0059702396392822,grad_norm: 0.7153383475444188, iteration: 283537
loss: 0.9730055928230286,grad_norm: 0.999999091742743, iteration: 283538
loss: 0.9945269227027893,grad_norm: 0.8563747444517404, iteration: 283539
loss: 1.0196435451507568,grad_norm: 0.8776331233900777, iteration: 283540
loss: 1.0273807048797607,grad_norm: 0.7253017713239476, iteration: 283541
loss: 0.9857854247093201,grad_norm: 0.7772859878851383, iteration: 283542
loss: 1.064901351928711,grad_norm: 0.9999991226010171, iteration: 283543
loss: 0.9944515824317932,grad_norm: 0.7707768763198425, iteration: 283544
loss: 1.0273934602737427,grad_norm: 0.9999991116552945, iteration: 283545
loss: 1.0036048889160156,grad_norm: 0.9999995180636305, iteration: 283546
loss: 0.9928170442581177,grad_norm: 0.8900825071257672, iteration: 283547
loss: 0.9791393876075745,grad_norm: 0.9677377429831585, iteration: 283548
loss: 1.1342264413833618,grad_norm: 0.9999991857218088, iteration: 283549
loss: 1.1321265697479248,grad_norm: 0.9999994697184218, iteration: 283550
loss: 0.9620655179023743,grad_norm: 0.7644517129463507, iteration: 283551
loss: 1.0095009803771973,grad_norm: 0.8361976993235571, iteration: 283552
loss: 0.9860847592353821,grad_norm: 0.9999991863625438, iteration: 283553
loss: 1.3947596549987793,grad_norm: 0.999999982957662, iteration: 283554
loss: 1.0024724006652832,grad_norm: 0.8510105150689322, iteration: 283555
loss: 1.0840357542037964,grad_norm: 0.9999992462055314, iteration: 283556
loss: 1.0307916402816772,grad_norm: 0.7072755234336076, iteration: 283557
loss: 1.001888394355774,grad_norm: 0.9406789948580492, iteration: 283558
loss: 0.9992344975471497,grad_norm: 0.7657713802702822, iteration: 283559
loss: 0.9976666569709778,grad_norm: 0.8709915505631856, iteration: 283560
loss: 1.0173181295394897,grad_norm: 0.8621506810492707, iteration: 283561
loss: 1.0015031099319458,grad_norm: 0.9702368983806445, iteration: 283562
loss: 1.001551628112793,grad_norm: 0.8514252133400244, iteration: 283563
loss: 1.0163028240203857,grad_norm: 0.8568638032216024, iteration: 283564
loss: 1.028585433959961,grad_norm: 0.9999996717131069, iteration: 283565
loss: 1.0157039165496826,grad_norm: 0.9908144967669884, iteration: 283566
loss: 1.0243821144104004,grad_norm: 0.9101770017069638, iteration: 283567
loss: 0.9765572547912598,grad_norm: 0.848261265896775, iteration: 283568
loss: 1.026413083076477,grad_norm: 0.8365681869564332, iteration: 283569
loss: 0.988057017326355,grad_norm: 0.8687363874783122, iteration: 283570
loss: 1.110784649848938,grad_norm: 0.9597304077968115, iteration: 283571
loss: 1.0161594152450562,grad_norm: 0.6463966269660545, iteration: 283572
loss: 1.0096358060836792,grad_norm: 0.8834451983682056, iteration: 283573
loss: 1.0365469455718994,grad_norm: 0.9372748856857955, iteration: 283574
loss: 0.9859269857406616,grad_norm: 0.9672787533681267, iteration: 283575
loss: 1.1862225532531738,grad_norm: 0.9783192314728232, iteration: 283576
loss: 1.0113022327423096,grad_norm: 0.9999990967347838, iteration: 283577
loss: 0.9977578520774841,grad_norm: 0.9777755418450651, iteration: 283578
loss: 0.9905651211738586,grad_norm: 0.9999996334906377, iteration: 283579
loss: 0.9565878510475159,grad_norm: 0.9999991075782182, iteration: 283580
loss: 1.0038745403289795,grad_norm: 0.8754133287832493, iteration: 283581
loss: 1.0038708448410034,grad_norm: 0.91701106088745, iteration: 283582
loss: 1.0214717388153076,grad_norm: 0.9202650902098837, iteration: 283583
loss: 0.9796424508094788,grad_norm: 0.864819921135131, iteration: 283584
loss: 1.0106693506240845,grad_norm: 0.8050787619844312, iteration: 283585
loss: 0.99009108543396,grad_norm: 0.9999994760874518, iteration: 283586
loss: 1.0007091760635376,grad_norm: 0.793800592957157, iteration: 283587
loss: 0.9602413773536682,grad_norm: 0.8862005136461684, iteration: 283588
loss: 0.9999640583992004,grad_norm: 0.9587290269440785, iteration: 283589
loss: 1.0410256385803223,grad_norm: 0.9999996570306791, iteration: 283590
loss: 1.0251502990722656,grad_norm: 0.7594810172415151, iteration: 283591
loss: 1.015339732170105,grad_norm: 0.9999991060132627, iteration: 283592
loss: 0.9960363507270813,grad_norm: 0.8535579068005028, iteration: 283593
loss: 1.025895357131958,grad_norm: 0.8901866203483002, iteration: 283594
loss: 1.001916766166687,grad_norm: 0.9075662648387528, iteration: 283595
loss: 1.057052731513977,grad_norm: 0.9999995078778777, iteration: 283596
loss: 1.0097317695617676,grad_norm: 0.9520501758322528, iteration: 283597
loss: 0.9779295921325684,grad_norm: 0.9999991430102625, iteration: 283598
loss: 1.0077331066131592,grad_norm: 0.7582364429078887, iteration: 283599
loss: 1.010725975036621,grad_norm: 0.9395435031402459, iteration: 283600
loss: 0.9993211030960083,grad_norm: 0.9691270629546772, iteration: 283601
loss: 1.002176284790039,grad_norm: 0.8460916844589748, iteration: 283602
loss: 1.0465655326843262,grad_norm: 0.9999993726850231, iteration: 283603
loss: 1.0190571546554565,grad_norm: 0.8782942598129239, iteration: 283604
loss: 0.9721644520759583,grad_norm: 0.9999993487773962, iteration: 283605
loss: 1.0088708400726318,grad_norm: 0.7461166843052818, iteration: 283606
loss: 1.016925573348999,grad_norm: 0.8999270747535683, iteration: 283607
loss: 1.0604147911071777,grad_norm: 0.9999991180979573, iteration: 283608
loss: 0.9799591302871704,grad_norm: 0.8388533531930685, iteration: 283609
loss: 0.9536192417144775,grad_norm: 0.7903533828781809, iteration: 283610
loss: 0.94493168592453,grad_norm: 0.8143672603514079, iteration: 283611
loss: 1.097486138343811,grad_norm: 0.7764203633389488, iteration: 283612
loss: 1.0344935655593872,grad_norm: 0.9308384329312931, iteration: 283613
loss: 0.9740207195281982,grad_norm: 0.8064624965031031, iteration: 283614
loss: 1.003952145576477,grad_norm: 0.9150154166570186, iteration: 283615
loss: 1.005893349647522,grad_norm: 0.9182630148187952, iteration: 283616
loss: 1.0124424695968628,grad_norm: 0.9999990936728272, iteration: 283617
loss: 1.003056287765503,grad_norm: 0.8553400043343098, iteration: 283618
loss: 0.986128568649292,grad_norm: 0.9358307343758397, iteration: 283619
loss: 1.0898139476776123,grad_norm: 0.8805497193079176, iteration: 283620
loss: 0.9500965476036072,grad_norm: 0.851644443265076, iteration: 283621
loss: 0.9819851517677307,grad_norm: 0.849683303172817, iteration: 283622
loss: 0.9956921339035034,grad_norm: 0.8611330995499784, iteration: 283623
loss: 1.0076675415039062,grad_norm: 0.9999990980202169, iteration: 283624
loss: 1.0104899406433105,grad_norm: 0.7719159622868451, iteration: 283625
loss: 1.0191664695739746,grad_norm: 0.9271828966770204, iteration: 283626
loss: 1.0508562326431274,grad_norm: 0.9734068538690895, iteration: 283627
loss: 0.9784356355667114,grad_norm: 0.8584568497553177, iteration: 283628
loss: 1.071797251701355,grad_norm: 0.9999999811365424, iteration: 283629
loss: 0.9888020753860474,grad_norm: 0.8486596097947483, iteration: 283630
loss: 1.0136821269989014,grad_norm: 0.9999999651039496, iteration: 283631
loss: 1.0048640966415405,grad_norm: 0.9830213802791328, iteration: 283632
loss: 1.096398949623108,grad_norm: 0.8336825290889651, iteration: 283633
loss: 1.0588406324386597,grad_norm: 0.9999992972873344, iteration: 283634
loss: 1.2225418090820312,grad_norm: 1.0000000637406279, iteration: 283635
loss: 1.0092670917510986,grad_norm: 0.9978767552659512, iteration: 283636
loss: 0.9737678170204163,grad_norm: 0.7493146614036151, iteration: 283637
loss: 1.0127406120300293,grad_norm: 0.8896876382667056, iteration: 283638
loss: 1.0516223907470703,grad_norm: 0.9999995932101106, iteration: 283639
loss: 1.0474239587783813,grad_norm: 0.9132913598522661, iteration: 283640
loss: 1.006270170211792,grad_norm: 0.77301537376632, iteration: 283641
loss: 0.9597906470298767,grad_norm: 0.9403585668257199, iteration: 283642
loss: 1.0097838640213013,grad_norm: 0.742446344819641, iteration: 283643
loss: 0.9822052717208862,grad_norm: 0.8886485726293185, iteration: 283644
loss: 1.0418026447296143,grad_norm: 0.9999991825175971, iteration: 283645
loss: 0.9961026906967163,grad_norm: 0.8842214390924956, iteration: 283646
loss: 1.0145670175552368,grad_norm: 0.7789157597316486, iteration: 283647
loss: 0.9942713379859924,grad_norm: 0.7768335224647658, iteration: 283648
loss: 1.0484111309051514,grad_norm: 0.9999994971067752, iteration: 283649
loss: 1.0073297023773193,grad_norm: 0.9001913320882863, iteration: 283650
loss: 1.0125833749771118,grad_norm: 0.9860001910747953, iteration: 283651
loss: 0.9989609122276306,grad_norm: 0.7323714764491945, iteration: 283652
loss: 1.0387498140335083,grad_norm: 0.9999992459006729, iteration: 283653
loss: 1.398241639137268,grad_norm: 0.9999996907391292, iteration: 283654
loss: 1.056531548500061,grad_norm: 0.9999999284403772, iteration: 283655
loss: 1.021592378616333,grad_norm: 0.8784464277203249, iteration: 283656
loss: 0.9927255511283875,grad_norm: 0.8085755687795723, iteration: 283657
loss: 1.001915454864502,grad_norm: 0.9137940090186758, iteration: 283658
loss: 0.9787124395370483,grad_norm: 0.9065939835193144, iteration: 283659
loss: 1.077876329421997,grad_norm: 0.9999991259308935, iteration: 283660
loss: 1.0079667568206787,grad_norm: 0.7915646130676492, iteration: 283661
loss: 0.9991343021392822,grad_norm: 0.8690804030246344, iteration: 283662
loss: 0.9806396961212158,grad_norm: 0.8963985153582784, iteration: 283663
loss: 0.9903156161308289,grad_norm: 0.9060841000868797, iteration: 283664
loss: 1.020735502243042,grad_norm: 0.7974368003504958, iteration: 283665
loss: 1.0244591236114502,grad_norm: 0.7488013792336499, iteration: 283666
loss: 1.0199424028396606,grad_norm: 0.8947193367051439, iteration: 283667
loss: 0.9978777170181274,grad_norm: 0.9352750742163445, iteration: 283668
loss: 0.9907277822494507,grad_norm: 0.8151497349802692, iteration: 283669
loss: 0.9650351405143738,grad_norm: 0.9030910051963337, iteration: 283670
loss: 0.996613621711731,grad_norm: 0.9194437060940277, iteration: 283671
loss: 1.0174891948699951,grad_norm: 0.8948166395464958, iteration: 283672
loss: 0.9511778354644775,grad_norm: 0.7711402596923861, iteration: 283673
loss: 0.9756637215614319,grad_norm: 0.7645359522641618, iteration: 283674
loss: 1.0293774604797363,grad_norm: 0.8063523481759711, iteration: 283675
loss: 1.0120083093643188,grad_norm: 0.7821238508014687, iteration: 283676
loss: 0.9688447713851929,grad_norm: 0.9581555397525605, iteration: 283677
loss: 0.9853729605674744,grad_norm: 0.8975988545573992, iteration: 283678
loss: 1.0557903051376343,grad_norm: 0.9999994457292765, iteration: 283679
loss: 0.9785332679748535,grad_norm: 0.9292056989849822, iteration: 283680
loss: 1.0337764024734497,grad_norm: 0.7854359265356823, iteration: 283681
loss: 0.9709935188293457,grad_norm: 0.8952475100636, iteration: 283682
loss: 1.039730429649353,grad_norm: 0.8506772673444429, iteration: 283683
loss: 0.9873356223106384,grad_norm: 0.9399770108091203, iteration: 283684
loss: 1.1093261241912842,grad_norm: 0.9999998778765019, iteration: 283685
loss: 1.0096535682678223,grad_norm: 0.8616970704502119, iteration: 283686
loss: 1.0346790552139282,grad_norm: 0.7771760494945832, iteration: 283687
loss: 0.9475411176681519,grad_norm: 0.9999991179909591, iteration: 283688
loss: 0.9778760075569153,grad_norm: 0.9999990991366315, iteration: 283689
loss: 1.0082720518112183,grad_norm: 0.7126785952284982, iteration: 283690
loss: 0.97640460729599,grad_norm: 0.7176114395452062, iteration: 283691
loss: 1.018262505531311,grad_norm: 0.8401818223006777, iteration: 283692
loss: 1.100014328956604,grad_norm: 0.9808112134470309, iteration: 283693
loss: 1.021921992301941,grad_norm: 0.9999991417082786, iteration: 283694
loss: 0.9756947755813599,grad_norm: 0.9999999471703476, iteration: 283695
loss: 0.9893760085105896,grad_norm: 0.743574004325514, iteration: 283696
loss: 0.9931442737579346,grad_norm: 0.9275920887587528, iteration: 283697
loss: 1.0021114349365234,grad_norm: 0.9313931758633889, iteration: 283698
loss: 0.9898502826690674,grad_norm: 0.9738132076824636, iteration: 283699
loss: 0.9975795149803162,grad_norm: 0.999999835507932, iteration: 283700
loss: 0.9742121696472168,grad_norm: 0.9037394244880543, iteration: 283701
loss: 0.9899917244911194,grad_norm: 0.9999990648954504, iteration: 283702
loss: 0.9710729122161865,grad_norm: 0.8506670905095735, iteration: 283703
loss: 1.0225919485092163,grad_norm: 0.8876283399241903, iteration: 283704
loss: 0.999991774559021,grad_norm: 0.8894913611951161, iteration: 283705
loss: 1.0748748779296875,grad_norm: 0.9999998769363524, iteration: 283706
loss: 0.9787428379058838,grad_norm: 0.7944292967352253, iteration: 283707
loss: 0.9992537498474121,grad_norm: 0.8947139880590829, iteration: 283708
loss: 0.9894283413887024,grad_norm: 0.9543723170386123, iteration: 283709
loss: 1.144477367401123,grad_norm: 0.9999999245876959, iteration: 283710
loss: 0.9745588898658752,grad_norm: 0.8742892024636958, iteration: 283711
loss: 0.9992916584014893,grad_norm: 0.8747658291472931, iteration: 283712
loss: 1.0318865776062012,grad_norm: 0.7408541286110942, iteration: 283713
loss: 1.0218565464019775,grad_norm: 0.8898539542123118, iteration: 283714
loss: 1.073240876197815,grad_norm: 0.9999997073352093, iteration: 283715
loss: 1.0003306865692139,grad_norm: 0.9169659630398038, iteration: 283716
loss: 0.9910362362861633,grad_norm: 0.795868434401514, iteration: 283717
loss: 0.9724516868591309,grad_norm: 0.8249306074479674, iteration: 283718
loss: 0.9864771962165833,grad_norm: 0.7921913559069603, iteration: 283719
loss: 0.996424674987793,grad_norm: 0.9647402287083403, iteration: 283720
loss: 0.9681851863861084,grad_norm: 0.8257020123772612, iteration: 283721
loss: 0.9765986800193787,grad_norm: 0.8435780847601668, iteration: 283722
loss: 0.9969706535339355,grad_norm: 0.8593134667996549, iteration: 283723
loss: 1.0448158979415894,grad_norm: 0.8209015605391271, iteration: 283724
loss: 1.0084599256515503,grad_norm: 0.999999147171741, iteration: 283725
loss: 1.0040736198425293,grad_norm: 0.8775396632176035, iteration: 283726
loss: 0.995669424533844,grad_norm: 0.9999989895459171, iteration: 283727
loss: 0.9883289933204651,grad_norm: 0.8715881199236937, iteration: 283728
loss: 1.018819808959961,grad_norm: 0.998497198213486, iteration: 283729
loss: 0.9969465136528015,grad_norm: 0.8462947764956146, iteration: 283730
loss: 1.016448974609375,grad_norm: 0.8926643737348918, iteration: 283731
loss: 0.9929686188697815,grad_norm: 0.9130114262273853, iteration: 283732
loss: 0.9925647377967834,grad_norm: 0.9999991007366614, iteration: 283733
loss: 0.9978978037834167,grad_norm: 0.999999475242879, iteration: 283734
loss: 1.0025248527526855,grad_norm: 0.9999992047353553, iteration: 283735
loss: 1.0293289422988892,grad_norm: 0.9064137082333422, iteration: 283736
loss: 0.988943338394165,grad_norm: 0.8354097459784837, iteration: 283737
loss: 0.9928325414657593,grad_norm: 0.8521406018303203, iteration: 283738
loss: 1.0270748138427734,grad_norm: 0.9118340905386159, iteration: 283739
loss: 1.0276741981506348,grad_norm: 0.9406225328038468, iteration: 283740
loss: 1.0053285360336304,grad_norm: 0.8845603481989033, iteration: 283741
loss: 0.9643571972846985,grad_norm: 0.7184596790868834, iteration: 283742
loss: 0.975566565990448,grad_norm: 0.8674020631955336, iteration: 283743
loss: 0.9871804118156433,grad_norm: 0.9789421607893216, iteration: 283744
loss: 0.9445016384124756,grad_norm: 0.7548472129923177, iteration: 283745
loss: 0.9919682145118713,grad_norm: 0.8757870780468365, iteration: 283746
loss: 1.0071742534637451,grad_norm: 0.9633697651571579, iteration: 283747
loss: 0.9864343404769897,grad_norm: 0.9492082736232009, iteration: 283748
loss: 0.9853156805038452,grad_norm: 0.8831948765420691, iteration: 283749
loss: 1.0180368423461914,grad_norm: 0.9379737650087127, iteration: 283750
loss: 1.0416934490203857,grad_norm: 0.9305639405919025, iteration: 283751
loss: 1.02375328540802,grad_norm: 0.999999828477436, iteration: 283752
loss: 1.0623961687088013,grad_norm: 0.9700942591572003, iteration: 283753
loss: 0.9989376664161682,grad_norm: 0.7577585361602249, iteration: 283754
loss: 0.9815278053283691,grad_norm: 0.8939784927892603, iteration: 283755
loss: 0.9663185477256775,grad_norm: 0.8980199659689675, iteration: 283756
loss: 1.0246034860610962,grad_norm: 0.9999998432578779, iteration: 283757
loss: 1.0427818298339844,grad_norm: 0.8169288466130239, iteration: 283758
loss: 0.9701995849609375,grad_norm: 0.875216743472118, iteration: 283759
loss: 1.0159991979599,grad_norm: 0.777728312299511, iteration: 283760
loss: 1.0207912921905518,grad_norm: 0.8799615840743436, iteration: 283761
loss: 1.021010398864746,grad_norm: 0.8829550821062614, iteration: 283762
loss: 0.9986363649368286,grad_norm: 0.9999995124231071, iteration: 283763
loss: 1.00969398021698,grad_norm: 0.9503162218790547, iteration: 283764
loss: 1.0056079626083374,grad_norm: 0.9054243312356797, iteration: 283765
loss: 0.9982749819755554,grad_norm: 0.8252533523447507, iteration: 283766
loss: 0.9539064168930054,grad_norm: 0.982613447259477, iteration: 283767
loss: 0.9911559820175171,grad_norm: 0.8152028171279131, iteration: 283768
loss: 0.9891828298568726,grad_norm: 0.9336438564730188, iteration: 283769
loss: 0.981585681438446,grad_norm: 0.8740781332116467, iteration: 283770
loss: 1.0190995931625366,grad_norm: 0.9999992450226919, iteration: 283771
loss: 1.036917805671692,grad_norm: 0.8640410428574166, iteration: 283772
loss: 1.0056443214416504,grad_norm: 0.9003618296193988, iteration: 283773
loss: 1.0885465145111084,grad_norm: 0.9999990541275917, iteration: 283774
loss: 1.075561285018921,grad_norm: 0.9999991027467589, iteration: 283775
loss: 1.0071436166763306,grad_norm: 0.7753272246786006, iteration: 283776
loss: 0.979938805103302,grad_norm: 0.9999990801455357, iteration: 283777
loss: 1.0134128332138062,grad_norm: 0.9921291480694092, iteration: 283778
loss: 0.9422709941864014,grad_norm: 0.7960441592153346, iteration: 283779
loss: 1.0044212341308594,grad_norm: 0.8308595760610774, iteration: 283780
loss: 1.0106678009033203,grad_norm: 0.9819060435626422, iteration: 283781
loss: 0.9871472120285034,grad_norm: 0.8740362708558328, iteration: 283782
loss: 1.038924217224121,grad_norm: 0.8098578510025911, iteration: 283783
loss: 1.009619951248169,grad_norm: 0.9118695624111706, iteration: 283784
loss: 0.9461831450462341,grad_norm: 0.9999991029851607, iteration: 283785
loss: 1.0088365077972412,grad_norm: 0.8292306973303847, iteration: 283786
loss: 0.9746217131614685,grad_norm: 0.9420252589740394, iteration: 283787
loss: 0.9900392293930054,grad_norm: 0.8549184847433334, iteration: 283788
loss: 1.0151385068893433,grad_norm: 0.9115571512798376, iteration: 283789
loss: 1.012109637260437,grad_norm: 0.9102373567164647, iteration: 283790
loss: 1.0186400413513184,grad_norm: 0.7910179868803762, iteration: 283791
loss: 1.0080971717834473,grad_norm: 0.8979859511095271, iteration: 283792
loss: 0.9893512725830078,grad_norm: 0.9020056183495627, iteration: 283793
loss: 0.9920750260353088,grad_norm: 0.821561724807818, iteration: 283794
loss: 0.9930504560470581,grad_norm: 0.8746948748138564, iteration: 283795
loss: 0.9687187075614929,grad_norm: 0.7344413439221357, iteration: 283796
loss: 1.0036572217941284,grad_norm: 0.978490236226686, iteration: 283797
loss: 1.0341465473175049,grad_norm: 0.9999990121241417, iteration: 283798
loss: 0.9986546039581299,grad_norm: 0.9999990862463677, iteration: 283799
loss: 0.9949303269386292,grad_norm: 0.9999990872011413, iteration: 283800
loss: 1.0016932487487793,grad_norm: 0.9999991493225808, iteration: 283801
loss: 1.02435302734375,grad_norm: 0.9579757600448465, iteration: 283802
loss: 0.98442143201828,grad_norm: 0.84365618510134, iteration: 283803
loss: 0.9978033304214478,grad_norm: 0.8110646772376431, iteration: 283804
loss: 1.0235636234283447,grad_norm: 0.8553604036175159, iteration: 283805
loss: 0.9568738341331482,grad_norm: 0.8370919308419198, iteration: 283806
loss: 0.9776814579963684,grad_norm: 0.7891806006932989, iteration: 283807
loss: 0.9881767630577087,grad_norm: 0.8911910960448367, iteration: 283808
loss: 1.010667085647583,grad_norm: 0.9999991456664714, iteration: 283809
loss: 1.115446925163269,grad_norm: 0.9433092209537309, iteration: 283810
loss: 0.9712974429130554,grad_norm: 0.8243835133943379, iteration: 283811
loss: 0.9655891060829163,grad_norm: 0.8408670303183875, iteration: 283812
loss: 0.9926599860191345,grad_norm: 0.8255060253767579, iteration: 283813
loss: 1.0056681632995605,grad_norm: 0.7285177419107192, iteration: 283814
loss: 0.9870709180831909,grad_norm: 0.8879835442657805, iteration: 283815
loss: 0.9846886396408081,grad_norm: 0.9152244500628967, iteration: 283816
loss: 0.9898173809051514,grad_norm: 0.9654951951246983, iteration: 283817
loss: 0.9699108004570007,grad_norm: 0.7167409165028643, iteration: 283818
loss: 0.9946669936180115,grad_norm: 0.880426881920493, iteration: 283819
loss: 1.0289665460586548,grad_norm: 0.9999991447551023, iteration: 283820
loss: 1.017843246459961,grad_norm: 0.9999995988856325, iteration: 283821
loss: 1.0063732862472534,grad_norm: 0.9999992111216024, iteration: 283822
loss: 1.0013060569763184,grad_norm: 0.9999992239872811, iteration: 283823
loss: 0.9979112148284912,grad_norm: 0.9999992030334144, iteration: 283824
loss: 1.0365372896194458,grad_norm: 0.9751210161025315, iteration: 283825
loss: 1.0043805837631226,grad_norm: 0.8640409923447849, iteration: 283826
loss: 0.931731641292572,grad_norm: 0.8432050795941286, iteration: 283827
loss: 0.9670294523239136,grad_norm: 0.9894145532546865, iteration: 283828
loss: 1.0333274602890015,grad_norm: 0.8728606690370464, iteration: 283829
loss: 0.9847358465194702,grad_norm: 0.9554454958405474, iteration: 283830
loss: 0.957044780254364,grad_norm: 0.9266704415354972, iteration: 283831
loss: 1.003949522972107,grad_norm: 0.7110122336823185, iteration: 283832
loss: 0.992746889591217,grad_norm: 0.9866000829252474, iteration: 283833
loss: 1.0171557664871216,grad_norm: 0.8556012665341786, iteration: 283834
loss: 1.0046335458755493,grad_norm: 0.9198523576788321, iteration: 283835
loss: 1.039399266242981,grad_norm: 0.8946310086901712, iteration: 283836
loss: 1.006556749343872,grad_norm: 0.9999991365668096, iteration: 283837
loss: 1.063060998916626,grad_norm: 0.8612155361516265, iteration: 283838
loss: 1.0107852220535278,grad_norm: 0.7669197316126426, iteration: 283839
loss: 0.9983844757080078,grad_norm: 0.8551601643005441, iteration: 283840
loss: 1.0387659072875977,grad_norm: 0.8219257695813815, iteration: 283841
loss: 1.0036765336990356,grad_norm: 0.9430296620525209, iteration: 283842
loss: 1.0699278116226196,grad_norm: 0.8637865957657429, iteration: 283843
loss: 1.1266595125198364,grad_norm: 0.9999999688943322, iteration: 283844
loss: 0.9722495079040527,grad_norm: 0.9366396649594985, iteration: 283845
loss: 0.9848892092704773,grad_norm: 0.9416819188731013, iteration: 283846
loss: 1.008618712425232,grad_norm: 0.9999993537207342, iteration: 283847
loss: 0.9600281119346619,grad_norm: 0.9387587897462969, iteration: 283848
loss: 1.0166471004486084,grad_norm: 0.9999989873780576, iteration: 283849
loss: 1.0075346231460571,grad_norm: 0.7792124801533149, iteration: 283850
loss: 1.0086393356323242,grad_norm: 0.9999996138162947, iteration: 283851
loss: 0.9856663346290588,grad_norm: 0.9990519304748849, iteration: 283852
loss: 0.9955795407295227,grad_norm: 0.8257991783924535, iteration: 283853
loss: 1.0179697275161743,grad_norm: 0.9999991641473484, iteration: 283854
loss: 0.9897725582122803,grad_norm: 0.8258411612072858, iteration: 283855
loss: 1.1231269836425781,grad_norm: 0.9999990096517323, iteration: 283856
loss: 0.986384928226471,grad_norm: 0.9999991239592435, iteration: 283857
loss: 1.0295047760009766,grad_norm: 0.9999990473312534, iteration: 283858
loss: 0.9867615699768066,grad_norm: 0.7983943813821118, iteration: 283859
loss: 0.9741302132606506,grad_norm: 0.7584745739001675, iteration: 283860
loss: 0.9765175580978394,grad_norm: 0.8631555129508598, iteration: 283861
loss: 1.0337594747543335,grad_norm: 0.99999923446825, iteration: 283862
loss: 1.1307801008224487,grad_norm: 0.9999993009186037, iteration: 283863
loss: 1.016095757484436,grad_norm: 0.7393324813850842, iteration: 283864
loss: 1.0261386632919312,grad_norm: 0.8037578098840259, iteration: 283865
loss: 1.017748236656189,grad_norm: 0.8523213044008283, iteration: 283866
loss: 1.0162686109542847,grad_norm: 0.9718489823165966, iteration: 283867
loss: 0.9748117327690125,grad_norm: 0.849229572352075, iteration: 283868
loss: 1.0098133087158203,grad_norm: 0.7655351576058917, iteration: 283869
loss: 1.016768217086792,grad_norm: 0.8964920423759689, iteration: 283870
loss: 0.9833465814590454,grad_norm: 0.8830795021021492, iteration: 283871
loss: 1.021544337272644,grad_norm: 0.9227488634395173, iteration: 283872
loss: 1.0318057537078857,grad_norm: 0.8342357711084819, iteration: 283873
loss: 1.0275843143463135,grad_norm: 0.7623841937359586, iteration: 283874
loss: 1.0200181007385254,grad_norm: 0.8556496119951216, iteration: 283875
loss: 0.9915491342544556,grad_norm: 0.841940291371943, iteration: 283876
loss: 1.0047895908355713,grad_norm: 0.9435850184352299, iteration: 283877
loss: 1.009137749671936,grad_norm: 0.8673464834800968, iteration: 283878
loss: 0.9791892766952515,grad_norm: 0.7970662354879564, iteration: 283879
loss: 1.0419163703918457,grad_norm: 0.8941985982824789, iteration: 283880
loss: 0.9655362367630005,grad_norm: 0.9069206706188864, iteration: 283881
loss: 0.9982442259788513,grad_norm: 0.8478872389711677, iteration: 283882
loss: 1.0213019847869873,grad_norm: 0.999998970978989, iteration: 283883
loss: 1.0287079811096191,grad_norm: 0.9573636890727806, iteration: 283884
loss: 1.0101006031036377,grad_norm: 0.9999991047331096, iteration: 283885
loss: 1.0198142528533936,grad_norm: 0.7495568048838714, iteration: 283886
loss: 0.9990680813789368,grad_norm: 0.942662880324662, iteration: 283887
loss: 1.015219807624817,grad_norm: 0.87064569277405, iteration: 283888
loss: 0.9885905385017395,grad_norm: 0.9070931069400215, iteration: 283889
loss: 0.9789239168167114,grad_norm: 0.9485079946300773, iteration: 283890
loss: 1.0290658473968506,grad_norm: 0.9999990937917659, iteration: 283891
loss: 0.9727856516838074,grad_norm: 0.9066168991467111, iteration: 283892
loss: 1.0681297779083252,grad_norm: 0.9999999602995581, iteration: 283893
loss: 0.9977688193321228,grad_norm: 0.8501111222165327, iteration: 283894
loss: 0.9834502935409546,grad_norm: 0.9882488961742196, iteration: 283895
loss: 1.0030722618103027,grad_norm: 0.9077263116662764, iteration: 283896
loss: 1.0433008670806885,grad_norm: 0.9030344027040925, iteration: 283897
loss: 0.9980565309524536,grad_norm: 0.7425840364464757, iteration: 283898
loss: 1.0062658786773682,grad_norm: 0.8342614681477252, iteration: 283899
loss: 0.973979115486145,grad_norm: 0.9214156681802497, iteration: 283900
loss: 0.9575673937797546,grad_norm: 0.9013818253432742, iteration: 283901
loss: 1.0169129371643066,grad_norm: 0.9590258807097918, iteration: 283902
loss: 0.9922600388526917,grad_norm: 0.8757174225473373, iteration: 283903
loss: 1.006564736366272,grad_norm: 0.9195185426012147, iteration: 283904
loss: 0.9818260669708252,grad_norm: 0.8495246636169657, iteration: 283905
loss: 0.9709161520004272,grad_norm: 0.8824355451936451, iteration: 283906
loss: 1.0376489162445068,grad_norm: 0.8363271800573779, iteration: 283907
loss: 0.971492350101471,grad_norm: 0.7698638832111238, iteration: 283908
loss: 1.0333482027053833,grad_norm: 0.9999992843785372, iteration: 283909
loss: 0.9823964238166809,grad_norm: 0.7633011972137136, iteration: 283910
loss: 1.0241855382919312,grad_norm: 0.9360454898107934, iteration: 283911
loss: 0.9973432421684265,grad_norm: 0.9010431987008124, iteration: 283912
loss: 0.9689997434616089,grad_norm: 0.7908806169684426, iteration: 283913
loss: 0.9566183090209961,grad_norm: 0.9222776750974745, iteration: 283914
loss: 1.055921196937561,grad_norm: 0.7683776403065921, iteration: 283915
loss: 1.0306403636932373,grad_norm: 0.7765359188190584, iteration: 283916
loss: 1.040188193321228,grad_norm: 0.8181005381641766, iteration: 283917
loss: 0.9849846959114075,grad_norm: 0.9250992121392791, iteration: 283918
loss: 1.0626167058944702,grad_norm: 0.8552507600750092, iteration: 283919
loss: 0.9820074439048767,grad_norm: 0.9034261229053221, iteration: 283920
loss: 1.0130510330200195,grad_norm: 0.6869167794039522, iteration: 283921
loss: 0.9962087869644165,grad_norm: 0.732397965857834, iteration: 283922
loss: 0.9988726377487183,grad_norm: 0.9249088759102845, iteration: 283923
loss: 0.9948268532752991,grad_norm: 0.9999998032152729, iteration: 283924
loss: 1.0327248573303223,grad_norm: 0.832406424862343, iteration: 283925
loss: 1.029126524925232,grad_norm: 0.8985035783795425, iteration: 283926
loss: 1.008128046989441,grad_norm: 0.9285744495452414, iteration: 283927
loss: 1.0167834758758545,grad_norm: 0.7132097201003419, iteration: 283928
loss: 1.0406233072280884,grad_norm: 0.9999991225427385, iteration: 283929
loss: 1.0108431577682495,grad_norm: 0.809730136669011, iteration: 283930
loss: 0.9991742372512817,grad_norm: 0.9531842934968453, iteration: 283931
loss: 1.0750974416732788,grad_norm: 0.9999997203859501, iteration: 283932
loss: 1.008685827255249,grad_norm: 0.9197385803788873, iteration: 283933
loss: 1.0120463371276855,grad_norm: 0.8561245578098489, iteration: 283934
loss: 1.0094058513641357,grad_norm: 0.7374544882056123, iteration: 283935
loss: 1.0150976181030273,grad_norm: 0.9791568894395332, iteration: 283936
loss: 1.019930124282837,grad_norm: 0.9321444851859773, iteration: 283937
loss: 1.0345370769500732,grad_norm: 0.9115467338095586, iteration: 283938
loss: 1.0139274597167969,grad_norm: 0.999999633585577, iteration: 283939
loss: 0.97421795129776,grad_norm: 0.891917850739639, iteration: 283940
loss: 0.9918351769447327,grad_norm: 0.7681480682141462, iteration: 283941
loss: 1.0399119853973389,grad_norm: 0.9994900488098494, iteration: 283942
loss: 1.0126903057098389,grad_norm: 0.9359605299756467, iteration: 283943
loss: 0.9803426861763,grad_norm: 0.9332034994828464, iteration: 283944
loss: 0.9913622736930847,grad_norm: 0.7689120048189638, iteration: 283945
loss: 1.0029869079589844,grad_norm: 0.9999998621000307, iteration: 283946
loss: 1.002348780632019,grad_norm: 0.8779257460715597, iteration: 283947
loss: 0.9747628569602966,grad_norm: 0.7951412633744197, iteration: 283948
loss: 0.9844310879707336,grad_norm: 0.7816784757865674, iteration: 283949
loss: 1.0222769975662231,grad_norm: 0.9999995661133542, iteration: 283950
loss: 0.98612380027771,grad_norm: 0.9176972208595384, iteration: 283951
loss: 1.0011076927185059,grad_norm: 0.876244568122365, iteration: 283952
loss: 1.0040067434310913,grad_norm: 0.7687248393168983, iteration: 283953
loss: 1.0010623931884766,grad_norm: 0.8884430988414853, iteration: 283954
loss: 1.1028428077697754,grad_norm: 0.999999891769195, iteration: 283955
loss: 0.9943787455558777,grad_norm: 0.837326208670737, iteration: 283956
loss: 1.0061157941818237,grad_norm: 0.7309977008916008, iteration: 283957
loss: 0.9857107400894165,grad_norm: 0.7671117395370586, iteration: 283958
loss: 1.0565518140792847,grad_norm: 0.9555527776307765, iteration: 283959
loss: 0.969253659248352,grad_norm: 0.9999991761392945, iteration: 283960
loss: 1.0054025650024414,grad_norm: 0.9054278828780623, iteration: 283961
loss: 1.0091524124145508,grad_norm: 0.9999990849686228, iteration: 283962
loss: 1.032549262046814,grad_norm: 0.9999990095102456, iteration: 283963
loss: 0.9933226108551025,grad_norm: 0.89274228726532, iteration: 283964
loss: 1.0075292587280273,grad_norm: 0.8640703103405828, iteration: 283965
loss: 0.9988101124763489,grad_norm: 0.7373427164457553, iteration: 283966
loss: 0.9846184849739075,grad_norm: 0.8759174491257061, iteration: 283967
loss: 0.9685690999031067,grad_norm: 0.8483636587335557, iteration: 283968
loss: 1.0112794637680054,grad_norm: 0.9999990689219862, iteration: 283969
loss: 1.0024878978729248,grad_norm: 0.9954639590033953, iteration: 283970
loss: 1.0324549674987793,grad_norm: 0.7653886619244985, iteration: 283971
loss: 0.9904218316078186,grad_norm: 0.9999990866191556, iteration: 283972
loss: 1.0072683095932007,grad_norm: 0.8199633138380173, iteration: 283973
loss: 1.0193217992782593,grad_norm: 0.9999992669134399, iteration: 283974
loss: 0.9641671776771545,grad_norm: 0.9703656265470568, iteration: 283975
loss: 0.9991483092308044,grad_norm: 0.7854862684464914, iteration: 283976
loss: 1.0226396322250366,grad_norm: 0.8432818904810422, iteration: 283977
loss: 0.9933857917785645,grad_norm: 0.8783580172830696, iteration: 283978
loss: 1.0173331499099731,grad_norm: 0.9999992296708932, iteration: 283979
loss: 0.9720132946968079,grad_norm: 0.9066723096330124, iteration: 283980
loss: 1.0185128450393677,grad_norm: 0.8483189161723413, iteration: 283981
loss: 0.9930835366249084,grad_norm: 0.7789301887705699, iteration: 283982
loss: 0.995057225227356,grad_norm: 0.846799176547769, iteration: 283983
loss: 1.0067687034606934,grad_norm: 0.9430225690428253, iteration: 283984
loss: 1.0072824954986572,grad_norm: 0.91090048258218, iteration: 283985
loss: 0.9950074553489685,grad_norm: 0.9234260953350513, iteration: 283986
loss: 1.0224707126617432,grad_norm: 0.9269855435727484, iteration: 283987
loss: 1.008305311203003,grad_norm: 0.9999990422617657, iteration: 283988
loss: 1.0458649396896362,grad_norm: 0.8981770585105191, iteration: 283989
loss: 0.9666399359703064,grad_norm: 0.8597987296954147, iteration: 283990
loss: 1.0167847871780396,grad_norm: 0.8469405288276279, iteration: 283991
loss: 1.052701473236084,grad_norm: 0.8035674016630645, iteration: 283992
loss: 1.0314148664474487,grad_norm: 0.9999997992521763, iteration: 283993
loss: 1.0034717321395874,grad_norm: 0.789772482510754, iteration: 283994
loss: 1.0106679201126099,grad_norm: 0.8393023628743403, iteration: 283995
loss: 0.9891114830970764,grad_norm: 0.8677829966494254, iteration: 283996
loss: 0.9858677387237549,grad_norm: 0.7957828106535817, iteration: 283997
loss: 0.9741194844245911,grad_norm: 0.7350697617072782, iteration: 283998
loss: 1.002772331237793,grad_norm: 0.9999992566851891, iteration: 283999
loss: 1.0092865228652954,grad_norm: 0.9487370854808622, iteration: 284000
loss: 0.9635984301567078,grad_norm: 0.9468432628673441, iteration: 284001
loss: 1.014410138130188,grad_norm: 0.9127944999464851, iteration: 284002
loss: 0.9773028492927551,grad_norm: 0.8657217004820897, iteration: 284003
loss: 0.9825035929679871,grad_norm: 0.8780843503119996, iteration: 284004
loss: 0.9848514795303345,grad_norm: 0.9999996120558189, iteration: 284005
loss: 0.9575850367546082,grad_norm: 0.8579579135311081, iteration: 284006
loss: 1.0215582847595215,grad_norm: 0.8840990539407347, iteration: 284007
loss: 0.9934276938438416,grad_norm: 0.7461877261807461, iteration: 284008
loss: 0.9587746262550354,grad_norm: 0.9999992597067117, iteration: 284009
loss: 1.012032389640808,grad_norm: 0.8319696103347244, iteration: 284010
loss: 1.027152419090271,grad_norm: 1.0000000311936863, iteration: 284011
loss: 1.0048160552978516,grad_norm: 0.7417990969124559, iteration: 284012
loss: 1.003509521484375,grad_norm: 0.8002515411312268, iteration: 284013
loss: 0.9938284158706665,grad_norm: 0.8380778432284093, iteration: 284014
loss: 0.9907016754150391,grad_norm: 0.819774136227729, iteration: 284015
loss: 1.0097604990005493,grad_norm: 0.8785367223386458, iteration: 284016
loss: 1.00343918800354,grad_norm: 0.7222235897560653, iteration: 284017
loss: 0.9837228059768677,grad_norm: 0.9999991099289489, iteration: 284018
loss: 0.994077205657959,grad_norm: 0.7384598780812782, iteration: 284019
loss: 1.0000861883163452,grad_norm: 0.7205238263539265, iteration: 284020
loss: 0.9909155964851379,grad_norm: 0.837650232898587, iteration: 284021
loss: 0.9433010220527649,grad_norm: 0.9523296697083092, iteration: 284022
loss: 1.2424696683883667,grad_norm: 0.9999999748149992, iteration: 284023
loss: 1.0883996486663818,grad_norm: 0.9999989926210949, iteration: 284024
loss: 1.0297940969467163,grad_norm: 0.9999991220414147, iteration: 284025
loss: 1.209635615348816,grad_norm: 0.9999992239541552, iteration: 284026
loss: 1.0241261720657349,grad_norm: 0.8717905326268282, iteration: 284027
loss: 0.9787701964378357,grad_norm: 0.8849455189719757, iteration: 284028
loss: 0.9876870512962341,grad_norm: 0.7510031279531537, iteration: 284029
loss: 1.0200997591018677,grad_norm: 0.8607528093084909, iteration: 284030
loss: 1.1023898124694824,grad_norm: 0.9436340523318577, iteration: 284031
loss: 1.02065908908844,grad_norm: 0.7142806300868207, iteration: 284032
loss: 1.031310796737671,grad_norm: 0.8421229618932605, iteration: 284033
loss: 0.999435544013977,grad_norm: 0.9999991692732482, iteration: 284034
loss: 1.012178897857666,grad_norm: 0.6878912887571663, iteration: 284035
loss: 1.0119504928588867,grad_norm: 0.8968149544691028, iteration: 284036
loss: 1.0161584615707397,grad_norm: 0.8741323305220066, iteration: 284037
loss: 0.9932117462158203,grad_norm: 0.8158262026520204, iteration: 284038
loss: 1.0946574211120605,grad_norm: 0.999999787477712, iteration: 284039
loss: 0.9954640865325928,grad_norm: 0.9999991973316664, iteration: 284040
loss: 1.0163341760635376,grad_norm: 0.9609626275918196, iteration: 284041
loss: 1.0333211421966553,grad_norm: 0.8799801301490909, iteration: 284042
loss: 1.07063889503479,grad_norm: 0.9999989892732281, iteration: 284043
loss: 1.0002764463424683,grad_norm: 0.9795634120348096, iteration: 284044
loss: 1.00038480758667,grad_norm: 0.9444747631675644, iteration: 284045
loss: 0.9880200624465942,grad_norm: 0.7985271127518582, iteration: 284046
loss: 0.9892482757568359,grad_norm: 0.979813954330975, iteration: 284047
loss: 1.011309027671814,grad_norm: 0.712162208383803, iteration: 284048
loss: 1.0123652219772339,grad_norm: 0.7820193495158312, iteration: 284049
loss: 1.1870791912078857,grad_norm: 0.8360054486005924, iteration: 284050
loss: 1.073186993598938,grad_norm: 0.9999991140351071, iteration: 284051
loss: 1.156740665435791,grad_norm: 0.9665543654401447, iteration: 284052
loss: 1.0162787437438965,grad_norm: 0.7330570044691932, iteration: 284053
loss: 0.9800019860267639,grad_norm: 0.99999902341238, iteration: 284054
loss: 1.0611950159072876,grad_norm: 0.8957716157103282, iteration: 284055
loss: 1.0281208753585815,grad_norm: 0.8963279579720806, iteration: 284056
loss: 0.9993646740913391,grad_norm: 0.841936734495628, iteration: 284057
loss: 1.0216611623764038,grad_norm: 0.9999990478227642, iteration: 284058
loss: 1.1293704509735107,grad_norm: 0.9999999280471498, iteration: 284059
loss: 1.0342119932174683,grad_norm: 0.8509319335380193, iteration: 284060
loss: 1.0350568294525146,grad_norm: 0.923145622041998, iteration: 284061
loss: 1.021188497543335,grad_norm: 0.8875359215366573, iteration: 284062
loss: 1.1615381240844727,grad_norm: 0.9709833999700542, iteration: 284063
loss: 1.0572755336761475,grad_norm: 0.8089210208224393, iteration: 284064
loss: 1.003431797027588,grad_norm: 0.9228375078605987, iteration: 284065
loss: 1.1392054557800293,grad_norm: 0.9999996927424812, iteration: 284066
loss: 0.9642488956451416,grad_norm: 0.9814021343342672, iteration: 284067
loss: 1.035671353340149,grad_norm: 0.9228253872627612, iteration: 284068
loss: 1.2570888996124268,grad_norm: 0.9999994019916141, iteration: 284069
loss: 1.0491307973861694,grad_norm: 0.9917158684190325, iteration: 284070
loss: 1.025410771369934,grad_norm: 0.8536887962099401, iteration: 284071
loss: 1.0740259885787964,grad_norm: 0.9999993924070296, iteration: 284072
loss: 1.0255590677261353,grad_norm: 0.9999990938002747, iteration: 284073
loss: 1.1081403493881226,grad_norm: 0.96096333525633, iteration: 284074
loss: 1.3309446573257446,grad_norm: 0.9999998372153247, iteration: 284075
loss: 1.13527512550354,grad_norm: 0.9704499585840797, iteration: 284076
loss: 1.402658462524414,grad_norm: 0.9999994645481869, iteration: 284077
loss: 1.2260370254516602,grad_norm: 0.9999990993221414, iteration: 284078
loss: 1.2201229333877563,grad_norm: 0.9999996156329352, iteration: 284079
loss: 1.261755108833313,grad_norm: 1.0000000623644152, iteration: 284080
loss: 1.3147423267364502,grad_norm: 0.9999993734975516, iteration: 284081
loss: 1.0076183080673218,grad_norm: 0.8997194955487805, iteration: 284082
loss: 1.4835984706878662,grad_norm: 0.9999993138694807, iteration: 284083
loss: 1.2475579977035522,grad_norm: 0.9999995299912023, iteration: 284084
loss: 1.2086671590805054,grad_norm: 0.8615803128919403, iteration: 284085
loss: 1.335642695426941,grad_norm: 0.9999996138313869, iteration: 284086
loss: 1.1393415927886963,grad_norm: 0.9999997454741903, iteration: 284087
loss: 1.549472451210022,grad_norm: 0.9999999090296391, iteration: 284088
loss: 0.9539993405342102,grad_norm: 0.9712856575086545, iteration: 284089
loss: 1.0960485935211182,grad_norm: 0.9999992030514981, iteration: 284090
loss: 1.3018786907196045,grad_norm: 0.9999995156922848, iteration: 284091
loss: 1.1980797052383423,grad_norm: 0.9999991460931, iteration: 284092
loss: 1.6756188869476318,grad_norm: 0.9999995997056799, iteration: 284093
loss: 1.3310853242874146,grad_norm: 0.9999997901944555, iteration: 284094
loss: 1.3377271890640259,grad_norm: 0.9999999214104913, iteration: 284095
loss: 0.992121160030365,grad_norm: 0.8657984232518122, iteration: 284096
loss: 1.2502977848052979,grad_norm: 0.9999996516275294, iteration: 284097
loss: 1.24039626121521,grad_norm: 0.9999993437674367, iteration: 284098
loss: 1.1226774454116821,grad_norm: 0.9627126487530312, iteration: 284099
loss: 1.3370800018310547,grad_norm: 0.9999996366293435, iteration: 284100
loss: 1.3410332202911377,grad_norm: 0.9999994348565889, iteration: 284101
loss: 1.1703728437423706,grad_norm: 0.9999994515621378, iteration: 284102
loss: 1.3147393465042114,grad_norm: 0.9999995840499388, iteration: 284103
loss: 1.198511004447937,grad_norm: 0.9999991253419966, iteration: 284104
loss: 1.2270158529281616,grad_norm: 0.999999420245843, iteration: 284105
loss: 1.0172847509384155,grad_norm: 0.9999990634096978, iteration: 284106
loss: 1.063765287399292,grad_norm: 0.9999998149951568, iteration: 284107
loss: 1.2561073303222656,grad_norm: 0.9999997438514395, iteration: 284108
loss: 1.228397011756897,grad_norm: 0.9999990480790413, iteration: 284109
loss: 1.454985499382019,grad_norm: 0.9999998351408961, iteration: 284110
loss: 1.486628532409668,grad_norm: 0.9999999135410819, iteration: 284111
loss: 1.3992611169815063,grad_norm: 0.9999996598557905, iteration: 284112
loss: 1.5670053958892822,grad_norm: 0.9999999511479477, iteration: 284113
loss: 1.061212182044983,grad_norm: 0.9999992633175872, iteration: 284114
loss: 1.2860920429229736,grad_norm: 0.9999994598738566, iteration: 284115
loss: 1.3943395614624023,grad_norm: 0.9999999139317954, iteration: 284116
loss: 1.1133993864059448,grad_norm: 0.9999993882196386, iteration: 284117
loss: 1.2564489841461182,grad_norm: 0.9999996617606086, iteration: 284118
loss: 1.2110904455184937,grad_norm: 0.9999995385820504, iteration: 284119
loss: 1.3465516567230225,grad_norm: 0.9999997726763274, iteration: 284120
loss: 1.2612272500991821,grad_norm: 0.9999995776857408, iteration: 284121
loss: 1.2117677927017212,grad_norm: 0.9999998014703162, iteration: 284122
loss: 1.0118944644927979,grad_norm: 0.9999991365075059, iteration: 284123
loss: 1.0650204420089722,grad_norm: 0.7536171136995143, iteration: 284124
loss: 1.2462049722671509,grad_norm: 0.9999996667427901, iteration: 284125
loss: 1.1105068922042847,grad_norm: 0.9999991779122533, iteration: 284126
loss: 1.5042107105255127,grad_norm: 0.9999995576369399, iteration: 284127
loss: 1.2090182304382324,grad_norm: 0.9428977164930518, iteration: 284128
loss: 1.0877052545547485,grad_norm: 0.8050133896181941, iteration: 284129
loss: 1.058132290840149,grad_norm: 0.8532286249579482, iteration: 284130
loss: 1.0773009061813354,grad_norm: 0.9999997405313681, iteration: 284131
loss: 1.3274108171463013,grad_norm: 0.9999998190257431, iteration: 284132
loss: 1.0778716802597046,grad_norm: 0.9999991535567789, iteration: 284133
loss: 1.1727176904678345,grad_norm: 0.9999996544880471, iteration: 284134
loss: 1.4286975860595703,grad_norm: 0.9999995673585105, iteration: 284135
loss: 1.1629328727722168,grad_norm: 0.9999990716964864, iteration: 284136
loss: 1.3686671257019043,grad_norm: 0.9999998564306813, iteration: 284137
loss: 1.1076802015304565,grad_norm: 0.9999995459859933, iteration: 284138
loss: 1.349819302558899,grad_norm: 1.0000000693180071, iteration: 284139
loss: 1.3300318717956543,grad_norm: 1.00000010933494, iteration: 284140
loss: 1.202627182006836,grad_norm: 0.9999997924491104, iteration: 284141
loss: 1.1588248014450073,grad_norm: 0.9999998370784251, iteration: 284142
loss: 1.2418179512023926,grad_norm: 0.9999997144969464, iteration: 284143
loss: 1.2447296380996704,grad_norm: 0.9999999624173296, iteration: 284144
loss: 1.2787314653396606,grad_norm: 0.9999997174986726, iteration: 284145
loss: 1.193907618522644,grad_norm: 0.9999993398170902, iteration: 284146
loss: 1.2070367336273193,grad_norm: 0.9999997210619556, iteration: 284147
loss: 1.2307487726211548,grad_norm: 0.9999996694493619, iteration: 284148
loss: 1.3610429763793945,grad_norm: 0.9999997843953669, iteration: 284149
loss: 1.2059942483901978,grad_norm: 0.9999993001569866, iteration: 284150
loss: 1.160568118095398,grad_norm: 0.9999995764338528, iteration: 284151
loss: 1.1208512783050537,grad_norm: 0.9999992124124425, iteration: 284152
loss: 1.109663486480713,grad_norm: 0.8924156992995808, iteration: 284153
loss: 1.0879993438720703,grad_norm: 0.9999995360750049, iteration: 284154
loss: 1.5362528562545776,grad_norm: 0.9999998121114216, iteration: 284155
loss: 1.4231103658676147,grad_norm: 0.9999997037388835, iteration: 284156
loss: 1.478311538696289,grad_norm: 0.9999997063818319, iteration: 284157
loss: 1.5358902215957642,grad_norm: 0.9999999785245889, iteration: 284158
loss: 1.4098308086395264,grad_norm: 0.9999998509262246, iteration: 284159
loss: 1.3402047157287598,grad_norm: 0.9999998738868585, iteration: 284160
loss: 1.2851929664611816,grad_norm: 0.9999998253051849, iteration: 284161
loss: 1.1707159280776978,grad_norm: 0.9999998832522946, iteration: 284162
loss: 1.2073686122894287,grad_norm: 1.0000000160165594, iteration: 284163
loss: 1.211890459060669,grad_norm: 0.9999992537587832, iteration: 284164
loss: 1.346035122871399,grad_norm: 0.9999996765362998, iteration: 284165
loss: 1.0866495370864868,grad_norm: 0.9999996981327451, iteration: 284166
loss: 1.132676362991333,grad_norm: 0.9999996534910885, iteration: 284167
loss: 1.1482940912246704,grad_norm: 0.9999994436622688, iteration: 284168
loss: 1.1380336284637451,grad_norm: 0.9999996025949971, iteration: 284169
loss: 1.4338916540145874,grad_norm: 0.9999997451533978, iteration: 284170
loss: 1.0459465980529785,grad_norm: 0.9999992399487068, iteration: 284171
loss: 1.4463826417922974,grad_norm: 0.999999922185791, iteration: 284172
loss: 1.233826994895935,grad_norm: 0.9999999143344739, iteration: 284173
loss: 1.3046084642410278,grad_norm: 0.9999996274830798, iteration: 284174
loss: 1.3449194431304932,grad_norm: 0.9999997719539268, iteration: 284175
loss: 1.3414418697357178,grad_norm: 0.9999996675794636, iteration: 284176
loss: 1.221866488456726,grad_norm: 0.9999995092869148, iteration: 284177
loss: 1.174936056137085,grad_norm: 0.9999991944932879, iteration: 284178
loss: 1.0555949211120605,grad_norm: 0.9999997168031161, iteration: 284179
loss: 1.209923505783081,grad_norm: 0.9999996010814718, iteration: 284180
loss: 1.2753170728683472,grad_norm: 0.9999999728378288, iteration: 284181
loss: 1.3611599206924438,grad_norm: 0.9999998573460709, iteration: 284182
loss: 1.2324376106262207,grad_norm: 0.9999995757560705, iteration: 284183
loss: 1.1798577308654785,grad_norm: 0.9999995230343646, iteration: 284184
loss: 1.1993684768676758,grad_norm: 0.9999998698110537, iteration: 284185
loss: 1.612747073173523,grad_norm: 0.9999998001932308, iteration: 284186
loss: 1.2173900604248047,grad_norm: 0.9999999137810356, iteration: 284187
loss: 1.3449827432632446,grad_norm: 0.999999434223841, iteration: 284188
loss: 1.1031708717346191,grad_norm: 0.999999317884247, iteration: 284189
loss: 1.2256970405578613,grad_norm: 0.999999312767725, iteration: 284190
loss: 1.3178328275680542,grad_norm: 0.9999996915148093, iteration: 284191
loss: 1.2444161176681519,grad_norm: 0.9999993625996161, iteration: 284192
loss: 1.1593996286392212,grad_norm: 0.9999998396212713, iteration: 284193
loss: 1.2705764770507812,grad_norm: 0.9999997641692422, iteration: 284194
loss: 1.4809030294418335,grad_norm: 0.999999751007221, iteration: 284195
loss: 1.2694854736328125,grad_norm: 0.9999993885258437, iteration: 284196
loss: 1.5147247314453125,grad_norm: 0.9999996181634686, iteration: 284197
loss: 1.2963478565216064,grad_norm: 0.9999998263286276, iteration: 284198
loss: 1.497580885887146,grad_norm: 0.9999997425621114, iteration: 284199
loss: 1.3509737253189087,grad_norm: 0.999999784871114, iteration: 284200
loss: 1.221361517906189,grad_norm: 0.9999999313789457, iteration: 284201
loss: 1.2577333450317383,grad_norm: 0.9999998747750974, iteration: 284202
loss: 1.279166579246521,grad_norm: 0.9999994893318893, iteration: 284203
loss: 1.284522533416748,grad_norm: 0.9999993392347398, iteration: 284204
loss: 1.4366637468338013,grad_norm: 0.9999994171777767, iteration: 284205
loss: 1.5116266012191772,grad_norm: 0.9999997201102692, iteration: 284206
loss: 1.4016780853271484,grad_norm: 1.0000000176893045, iteration: 284207
loss: 1.396453857421875,grad_norm: 0.9999998132898101, iteration: 284208
loss: 1.3821889162063599,grad_norm: 0.9999998345557624, iteration: 284209
loss: 1.3192943334579468,grad_norm: 1.0000000089485288, iteration: 284210
loss: 1.4357033967971802,grad_norm: 0.999999800017337, iteration: 284211
loss: 1.3933086395263672,grad_norm: 0.9999999078523868, iteration: 284212
loss: 1.3936735391616821,grad_norm: 0.9999994379609033, iteration: 284213
loss: 1.566802978515625,grad_norm: 0.9999997802427494, iteration: 284214
loss: 1.3057341575622559,grad_norm: 0.9999997800763102, iteration: 284215
loss: 1.4700257778167725,grad_norm: 0.9999999032474765, iteration: 284216
loss: 1.4052698612213135,grad_norm: 0.9999996378310073, iteration: 284217
loss: 1.2898668050765991,grad_norm: 0.9999994521431785, iteration: 284218
loss: 1.3945506811141968,grad_norm: 0.9999995996680038, iteration: 284219
loss: 1.2736514806747437,grad_norm: 0.9999998358714178, iteration: 284220
loss: 1.3096792697906494,grad_norm: 0.9999999627949833, iteration: 284221
loss: 1.3930057287216187,grad_norm: 0.999999735787286, iteration: 284222
loss: 1.3782353401184082,grad_norm: 0.9999998531997703, iteration: 284223
loss: 1.433234691619873,grad_norm: 0.9999998420670569, iteration: 284224
loss: 1.4966230392456055,grad_norm: 0.9999997673183024, iteration: 284225
loss: 1.595502257347107,grad_norm: 0.9999999760980772, iteration: 284226
loss: 1.3810842037200928,grad_norm: 0.9999999645218838, iteration: 284227
loss: 1.6611993312835693,grad_norm: 0.9999998404792522, iteration: 284228
loss: 1.3306490182876587,grad_norm: 0.9999996366662006, iteration: 284229
loss: 1.3149133920669556,grad_norm: 0.9999999235410372, iteration: 284230
loss: 1.3093920946121216,grad_norm: 0.9999992873351904, iteration: 284231
loss: 1.4223291873931885,grad_norm: 0.9999997605635425, iteration: 284232
loss: 1.4100998640060425,grad_norm: 0.9999999457166511, iteration: 284233
loss: 1.3404865264892578,grad_norm: 0.999999920284282, iteration: 284234
loss: 1.431814432144165,grad_norm: 0.9999996879529, iteration: 284235
loss: 1.338296890258789,grad_norm: 0.999999344010131, iteration: 284236
loss: 1.7487210035324097,grad_norm: 0.9999999683355517, iteration: 284237
loss: 1.4266682863235474,grad_norm: 0.9999995467761136, iteration: 284238
loss: 1.290167212486267,grad_norm: 0.9999996999265882, iteration: 284239
loss: 1.573412299156189,grad_norm: 0.99999992333301, iteration: 284240
loss: 1.4630094766616821,grad_norm: 0.9999998387007153, iteration: 284241
loss: 1.6812443733215332,grad_norm: 0.9999997592510679, iteration: 284242
loss: 1.3099753856658936,grad_norm: 0.9999997808064555, iteration: 284243
loss: 1.4409831762313843,grad_norm: 0.9999997554911203, iteration: 284244
loss: 1.6786444187164307,grad_norm: 0.9999998247745159, iteration: 284245
loss: 1.5018495321273804,grad_norm: 0.9999998046072784, iteration: 284246
loss: 1.4984411001205444,grad_norm: 0.9999996128345379, iteration: 284247
loss: 1.4790611267089844,grad_norm: 0.9999996380155147, iteration: 284248
loss: 1.735677719116211,grad_norm: 0.9999998437624196, iteration: 284249
loss: 1.3130048513412476,grad_norm: 0.9999994914605671, iteration: 284250
loss: 1.6056885719299316,grad_norm: 0.9999996147380434, iteration: 284251
loss: 1.6143180131912231,grad_norm: 0.9999998273255826, iteration: 284252
loss: 1.4450503587722778,grad_norm: 1.0000000419821016, iteration: 284253
loss: 1.5226292610168457,grad_norm: 0.9999997588107618, iteration: 284254
loss: 1.4474291801452637,grad_norm: 0.999999896970158, iteration: 284255
loss: 1.3422493934631348,grad_norm: 0.9999994557970429, iteration: 284256
loss: 1.5462990999221802,grad_norm: 0.9999994619387333, iteration: 284257
loss: 1.483736515045166,grad_norm: 0.999999780277201, iteration: 284258
loss: 1.3552131652832031,grad_norm: 0.9999998914602879, iteration: 284259
loss: 1.3663709163665771,grad_norm: 0.9999998598824394, iteration: 284260
loss: 1.6037501096725464,grad_norm: 0.9999997096770454, iteration: 284261
loss: 1.3974628448486328,grad_norm: 0.9999996129109093, iteration: 284262
loss: 1.313319206237793,grad_norm: 0.9999992389149563, iteration: 284263
loss: 1.442651629447937,grad_norm: 0.9999996202557486, iteration: 284264
loss: 1.4873675107955933,grad_norm: 0.9999995404859848, iteration: 284265
loss: 1.2931180000305176,grad_norm: 0.9999994383084762, iteration: 284266
loss: 1.5684643983840942,grad_norm: 1.0000000349629192, iteration: 284267
loss: 1.646974802017212,grad_norm: 0.9999997142839977, iteration: 284268
loss: 1.2089548110961914,grad_norm: 0.9999996821255402, iteration: 284269
loss: 1.4919275045394897,grad_norm: 0.999999755090355, iteration: 284270
loss: 1.473039984703064,grad_norm: 0.9999998214495103, iteration: 284271
loss: 1.344922423362732,grad_norm: 0.9999996602898218, iteration: 284272
loss: 1.1735522747039795,grad_norm: 0.9999994523825069, iteration: 284273
loss: 1.1902719736099243,grad_norm: 0.9999994837167807, iteration: 284274
loss: 1.3642809391021729,grad_norm: 0.9999997716770108, iteration: 284275
loss: 1.358455777168274,grad_norm: 0.9999996770230566, iteration: 284276
loss: 1.4935184717178345,grad_norm: 0.9999999632776819, iteration: 284277
loss: 1.2081819772720337,grad_norm: 0.9999997577277189, iteration: 284278
loss: 1.430629849433899,grad_norm: 0.9999998638735148, iteration: 284279
loss: 1.0999107360839844,grad_norm: 0.9999997486087737, iteration: 284280
loss: 1.4919620752334595,grad_norm: 0.9999996870063306, iteration: 284281
loss: 1.2439497709274292,grad_norm: 0.9999998985066906, iteration: 284282
loss: 1.317143440246582,grad_norm: 0.9999997547891146, iteration: 284283
loss: 1.4156044721603394,grad_norm: 0.9999994006759912, iteration: 284284
loss: 1.5363483428955078,grad_norm: 0.9999997293752503, iteration: 284285
loss: 1.4358720779418945,grad_norm: 0.9999996047795912, iteration: 284286
loss: 1.2065584659576416,grad_norm: 0.9999998245232553, iteration: 284287
loss: 1.4966480731964111,grad_norm: 0.9999997345965339, iteration: 284288
loss: 1.4722909927368164,grad_norm: 0.9999995973524127, iteration: 284289
loss: 1.3501415252685547,grad_norm: 0.9999994869815022, iteration: 284290
loss: 1.475043773651123,grad_norm: 0.9999998415639424, iteration: 284291
loss: 1.270389199256897,grad_norm: 0.9999998321421887, iteration: 284292
loss: 1.3689645528793335,grad_norm: 0.9999997030789359, iteration: 284293
loss: 1.298426866531372,grad_norm: 0.9999996519587138, iteration: 284294
loss: 1.2188364267349243,grad_norm: 0.9999996318494677, iteration: 284295
loss: 1.2010735273361206,grad_norm: 0.9999997063454185, iteration: 284296
loss: 1.3238340616226196,grad_norm: 0.9999995337659467, iteration: 284297
loss: 1.5398489236831665,grad_norm: 0.9999998037050534, iteration: 284298
loss: 1.2914351224899292,grad_norm: 0.9999993906491751, iteration: 284299
loss: 1.3140504360198975,grad_norm: 0.9999994089997695, iteration: 284300
loss: 1.605920433998108,grad_norm: 0.9999999111709393, iteration: 284301
loss: 1.3587290048599243,grad_norm: 0.9999995840035478, iteration: 284302
loss: 1.2615824937820435,grad_norm: 0.999999865861088, iteration: 284303
loss: 1.4053961038589478,grad_norm: 0.999999696071045, iteration: 284304
loss: 1.3874216079711914,grad_norm: 1.0000000798315296, iteration: 284305
loss: 1.3328278064727783,grad_norm: 0.9999998799572773, iteration: 284306
loss: 1.4482383728027344,grad_norm: 0.9999998956845063, iteration: 284307
loss: 1.378782868385315,grad_norm: 1.0000000537506122, iteration: 284308
loss: 1.5352230072021484,grad_norm: 0.9999997600680816, iteration: 284309
loss: 1.2812377214431763,grad_norm: 0.9999998037058199, iteration: 284310
loss: 1.2794325351715088,grad_norm: 0.9999997315125241, iteration: 284311
loss: 1.2241185903549194,grad_norm: 0.9999997206327839, iteration: 284312
loss: 1.244397521018982,grad_norm: 0.9999994822545505, iteration: 284313
loss: 1.459038496017456,grad_norm: 0.9999996270492636, iteration: 284314
loss: 1.453220248222351,grad_norm: 0.9999993540988876, iteration: 284315
loss: 1.3557720184326172,grad_norm: 0.9999999625718652, iteration: 284316
loss: 1.4029908180236816,grad_norm: 0.9999997987443213, iteration: 284317
loss: 1.354416847229004,grad_norm: 0.9999998600663212, iteration: 284318
loss: 1.2933744192123413,grad_norm: 0.9999997886701585, iteration: 284319
loss: 1.1079119443893433,grad_norm: 0.9999992582020525, iteration: 284320
loss: 1.46608567237854,grad_norm: 0.9999996786662045, iteration: 284321
loss: 1.2475154399871826,grad_norm: 0.9999994269518858, iteration: 284322
loss: 1.3534425497055054,grad_norm: 0.9999996985339308, iteration: 284323
loss: 1.3650751113891602,grad_norm: 0.9999998146253632, iteration: 284324
loss: 1.3017189502716064,grad_norm: 0.9999993305483309, iteration: 284325
loss: 1.328193187713623,grad_norm: 0.9999997335981781, iteration: 284326
loss: 1.2860935926437378,grad_norm: 0.9999999192679692, iteration: 284327
loss: 1.298022747039795,grad_norm: 0.9999997724993859, iteration: 284328
loss: 1.4366923570632935,grad_norm: 0.9999997180413106, iteration: 284329
loss: 1.1369508504867554,grad_norm: 0.9999997251357124, iteration: 284330
loss: 1.211747646331787,grad_norm: 0.9999991699134584, iteration: 284331
loss: 1.381608486175537,grad_norm: 0.9999997271735673, iteration: 284332
loss: 1.2134580612182617,grad_norm: 0.9999994035201232, iteration: 284333
loss: 1.1441876888275146,grad_norm: 0.9999994545391617, iteration: 284334
loss: 1.2740089893341064,grad_norm: 0.9999997368369209, iteration: 284335
loss: 1.3643382787704468,grad_norm: 0.9999997150511842, iteration: 284336
loss: 1.223444938659668,grad_norm: 0.9999997880370995, iteration: 284337
loss: 1.2252776622772217,grad_norm: 0.9999993829869446, iteration: 284338
loss: 1.3704237937927246,grad_norm: 0.9999999922518724, iteration: 284339
loss: 1.5630296468734741,grad_norm: 0.9999997208107974, iteration: 284340
loss: 1.346448302268982,grad_norm: 0.9999996675614613, iteration: 284341
loss: 1.3092000484466553,grad_norm: 0.9999997752300054, iteration: 284342
loss: 1.2296032905578613,grad_norm: 0.9999999052795349, iteration: 284343
loss: 1.2894896268844604,grad_norm: 0.9999997464968643, iteration: 284344
loss: 1.2901325225830078,grad_norm: 0.9999997649911383, iteration: 284345
loss: 1.151715636253357,grad_norm: 0.9999998048077844, iteration: 284346
loss: 1.0988727807998657,grad_norm: 0.9999992664427528, iteration: 284347
loss: 1.1492117643356323,grad_norm: 0.9999995971369926, iteration: 284348
loss: 1.3453439474105835,grad_norm: 0.9999997805676832, iteration: 284349
loss: 1.2154184579849243,grad_norm: 0.9999999066829005, iteration: 284350
loss: 1.3426523208618164,grad_norm: 0.9999998100833707, iteration: 284351
loss: 1.3457374572753906,grad_norm: 0.9999996097639955, iteration: 284352
loss: 1.233035922050476,grad_norm: 0.9999996022751649, iteration: 284353
loss: 1.3096450567245483,grad_norm: 0.9999999101821305, iteration: 284354
loss: 1.2590489387512207,grad_norm: 0.9999993919649423, iteration: 284355
loss: 1.2075369358062744,grad_norm: 0.9999999319229234, iteration: 284356
loss: 1.301928162574768,grad_norm: 0.9999998910345239, iteration: 284357
loss: 1.3431227207183838,grad_norm: 0.9999997900999663, iteration: 284358
loss: 1.3698077201843262,grad_norm: 0.9999999489067675, iteration: 284359
loss: 1.19991934299469,grad_norm: 0.999999536162146, iteration: 284360
loss: 1.3363635540008545,grad_norm: 0.999999448829245, iteration: 284361
loss: 1.1350610256195068,grad_norm: 0.9999995151531496, iteration: 284362
loss: 1.2945940494537354,grad_norm: 0.9999993384247583, iteration: 284363
loss: 1.3838552236557007,grad_norm: 0.999999693499758, iteration: 284364
loss: 1.3206485509872437,grad_norm: 0.9999999525578365, iteration: 284365
loss: 1.3899567127227783,grad_norm: 0.9999995746372451, iteration: 284366
loss: 1.0549471378326416,grad_norm: 0.9999992081283443, iteration: 284367
loss: 1.1874016523361206,grad_norm: 0.999999404664477, iteration: 284368
loss: 1.4651985168457031,grad_norm: 0.9999996249732315, iteration: 284369
loss: 1.198527455329895,grad_norm: 1.000000040711222, iteration: 284370
loss: 1.2749735116958618,grad_norm: 0.9999996104137263, iteration: 284371
loss: 1.170103907585144,grad_norm: 0.9999992860145663, iteration: 284372
loss: 1.1291910409927368,grad_norm: 0.9999997637525148, iteration: 284373
loss: 1.3201597929000854,grad_norm: 0.9999999174713136, iteration: 284374
loss: 1.2523688077926636,grad_norm: 0.9999998585777929, iteration: 284375
loss: 1.2798796892166138,grad_norm: 0.9999993391914449, iteration: 284376
loss: 1.4681994915008545,grad_norm: 0.999999759963939, iteration: 284377
loss: 1.0608681440353394,grad_norm: 0.999999402577013, iteration: 284378
loss: 1.1504995822906494,grad_norm: 0.9999995052504039, iteration: 284379
loss: 1.2632149457931519,grad_norm: 0.9999995532310871, iteration: 284380
loss: 1.3483041524887085,grad_norm: 0.9999994856856839, iteration: 284381
loss: 1.1172854900360107,grad_norm: 0.9999997503405135, iteration: 284382
loss: 1.0906755924224854,grad_norm: 0.9999993527413727, iteration: 284383
loss: 1.3522533178329468,grad_norm: 0.9999993947275668, iteration: 284384
loss: 1.3909085988998413,grad_norm: 0.9999999139785806, iteration: 284385
loss: 1.3227150440216064,grad_norm: 0.9999997663056395, iteration: 284386
loss: 1.3473137617111206,grad_norm: 0.9999997785102257, iteration: 284387
loss: 1.2731119394302368,grad_norm: 0.9999996995788387, iteration: 284388
loss: 1.1759581565856934,grad_norm: 0.9999993131959154, iteration: 284389
loss: 1.1598302125930786,grad_norm: 0.9999997982030921, iteration: 284390
loss: 1.3479816913604736,grad_norm: 0.999999795298946, iteration: 284391
loss: 1.2557096481323242,grad_norm: 0.9999996572851174, iteration: 284392
loss: 1.130171298980713,grad_norm: 0.9999994405120348, iteration: 284393
loss: 1.2345473766326904,grad_norm: 0.9999998287040355, iteration: 284394
loss: 1.442978858947754,grad_norm: 0.9999998250221165, iteration: 284395
loss: 1.056230902671814,grad_norm: 0.9999995041200326, iteration: 284396
loss: 1.1997957229614258,grad_norm: 0.9999998823128485, iteration: 284397
loss: 1.132598876953125,grad_norm: 0.9999998094055014, iteration: 284398
loss: 1.2691025733947754,grad_norm: 0.9999997879475172, iteration: 284399
loss: 1.2388169765472412,grad_norm: 0.9999998774742592, iteration: 284400
loss: 1.1578397750854492,grad_norm: 0.9999994540055799, iteration: 284401
loss: 1.4334458112716675,grad_norm: 0.999999484744876, iteration: 284402
loss: 1.2376320362091064,grad_norm: 0.9999994378077045, iteration: 284403
loss: 1.3154611587524414,grad_norm: 0.9999998879475572, iteration: 284404
loss: 1.2702016830444336,grad_norm: 0.9999999300580916, iteration: 284405
loss: 1.497499942779541,grad_norm: 0.999999884927694, iteration: 284406
loss: 1.138101577758789,grad_norm: 0.9999995423501221, iteration: 284407
loss: 1.1658873558044434,grad_norm: 0.9999997333442924, iteration: 284408
loss: 1.2509510517120361,grad_norm: 0.9999995108540513, iteration: 284409
loss: 1.1148508787155151,grad_norm: 0.9999999947007379, iteration: 284410
loss: 1.1647660732269287,grad_norm: 0.9999996728237358, iteration: 284411
loss: 1.4786432981491089,grad_norm: 0.9999997287388834, iteration: 284412
loss: 1.3923981189727783,grad_norm: 0.9999997002322433, iteration: 284413
loss: 1.200831413269043,grad_norm: 0.999999405771654, iteration: 284414
loss: 1.2143603563308716,grad_norm: 0.9999995611462791, iteration: 284415
loss: 1.281649112701416,grad_norm: 0.9999995119830603, iteration: 284416
loss: 1.3600236177444458,grad_norm: 0.9999997209153045, iteration: 284417
loss: 1.3601548671722412,grad_norm: 0.9999993713891572, iteration: 284418
loss: 1.3346086740493774,grad_norm: 0.9999995288093187, iteration: 284419
loss: 1.2472257614135742,grad_norm: 0.9999995460163873, iteration: 284420
loss: 1.1354162693023682,grad_norm: 0.9999991181039579, iteration: 284421
loss: 1.210753321647644,grad_norm: 0.9999997922725057, iteration: 284422
loss: 1.2264446020126343,grad_norm: 1.0000000140624115, iteration: 284423
loss: 1.1581368446350098,grad_norm: 0.9999991277811938, iteration: 284424
loss: 1.1913739442825317,grad_norm: 0.9999991571349419, iteration: 284425
loss: 1.1422847509384155,grad_norm: 0.9999993135887161, iteration: 284426
loss: 1.1824500560760498,grad_norm: 0.9999992593189094, iteration: 284427
loss: 1.4450041055679321,grad_norm: 0.9999996642613092, iteration: 284428
loss: 1.2199161052703857,grad_norm: 0.999999780497853, iteration: 284429
loss: 1.3132704496383667,grad_norm: 0.999999320261324, iteration: 284430
loss: 1.5246038436889648,grad_norm: 0.9999999011068345, iteration: 284431
loss: 1.3469187021255493,grad_norm: 0.9999994911341212, iteration: 284432
loss: 1.2722840309143066,grad_norm: 0.9999994154248517, iteration: 284433
loss: 1.3557482957839966,grad_norm: 0.9999999412862688, iteration: 284434
loss: 1.2736847400665283,grad_norm: 0.9999993738836981, iteration: 284435
loss: 1.4423704147338867,grad_norm: 0.9999997764753458, iteration: 284436
loss: 1.2131935358047485,grad_norm: 0.9999996084575383, iteration: 284437
loss: 1.3391040563583374,grad_norm: 0.9999998887176316, iteration: 284438
loss: 1.1398459672927856,grad_norm: 0.9999995099032263, iteration: 284439
loss: 1.3223397731781006,grad_norm: 0.9999999085141302, iteration: 284440
loss: 1.3989887237548828,grad_norm: 0.9999995462853265, iteration: 284441
loss: 1.3179324865341187,grad_norm: 0.9999999778937706, iteration: 284442
loss: 1.2158007621765137,grad_norm: 0.9999997830948504, iteration: 284443
loss: 1.1975351572036743,grad_norm: 0.9999995998153995, iteration: 284444
loss: 1.2050559520721436,grad_norm: 0.9999995203486003, iteration: 284445
loss: 1.1201807260513306,grad_norm: 0.9999992272586731, iteration: 284446
loss: 1.2059999704360962,grad_norm: 0.9999998311764464, iteration: 284447
loss: 1.3616305589675903,grad_norm: 0.9999993637165868, iteration: 284448
loss: 1.212496042251587,grad_norm: 0.9999997083913847, iteration: 284449
loss: 1.260602593421936,grad_norm: 1.0000000463441607, iteration: 284450
loss: 1.2057877779006958,grad_norm: 0.9999996643865285, iteration: 284451
loss: 1.3694528341293335,grad_norm: 0.9999996831283215, iteration: 284452
loss: 1.2637683153152466,grad_norm: 0.9999998661416925, iteration: 284453
loss: 1.3328818082809448,grad_norm: 0.99999993564974, iteration: 284454
loss: 1.239943027496338,grad_norm: 0.9999998656889634, iteration: 284455
loss: 1.425838589668274,grad_norm: 0.999999892110137, iteration: 284456
loss: 1.232834815979004,grad_norm: 0.9999999939120274, iteration: 284457
loss: 1.3072903156280518,grad_norm: 0.9999998772264364, iteration: 284458
loss: 1.1636497974395752,grad_norm: 0.999999520157682, iteration: 284459
loss: 1.4358075857162476,grad_norm: 0.9999999365599143, iteration: 284460
loss: 1.4080334901809692,grad_norm: 0.9999999368648537, iteration: 284461
loss: 1.1911171674728394,grad_norm: 0.9999993099635602, iteration: 284462
loss: 1.1323765516281128,grad_norm: 0.9807076707244424, iteration: 284463
loss: 1.6597644090652466,grad_norm: 0.9999998507089519, iteration: 284464
loss: 1.1855385303497314,grad_norm: 0.9999994455596134, iteration: 284465
loss: 1.1218609809875488,grad_norm: 0.9999999062354905, iteration: 284466
loss: 1.189843773841858,grad_norm: 0.9999997420806953, iteration: 284467
loss: 1.1447416543960571,grad_norm: 0.9999998363023347, iteration: 284468
loss: 1.1456079483032227,grad_norm: 0.9999995074790647, iteration: 284469
loss: 1.3326492309570312,grad_norm: 0.9999998582126842, iteration: 284470
loss: 1.3240677118301392,grad_norm: 0.9999996465858059, iteration: 284471
loss: 1.3210828304290771,grad_norm: 0.9999999247456633, iteration: 284472
loss: 1.1858893632888794,grad_norm: 0.9999997089882792, iteration: 284473
loss: 1.2346864938735962,grad_norm: 0.9999994668266273, iteration: 284474
loss: 1.1404457092285156,grad_norm: 0.999999980696065, iteration: 284475
loss: 1.1974844932556152,grad_norm: 0.9999999193719762, iteration: 284476
loss: 1.5212481021881104,grad_norm: 1.0000000233597515, iteration: 284477
loss: 1.4277846813201904,grad_norm: 0.9999998011445363, iteration: 284478
loss: 1.1902875900268555,grad_norm: 0.999999289528612, iteration: 284479
loss: 1.2865447998046875,grad_norm: 1.0000000448412971, iteration: 284480
loss: 1.2515790462493896,grad_norm: 0.9999998276672353, iteration: 284481
loss: 1.2048288583755493,grad_norm: 0.9999998436466735, iteration: 284482
loss: 1.5897668600082397,grad_norm: 0.9999996618305277, iteration: 284483
loss: 1.4822421073913574,grad_norm: 0.9999996190535212, iteration: 284484
loss: 1.2539361715316772,grad_norm: 0.9999996533244647, iteration: 284485
loss: 1.227181315422058,grad_norm: 0.9999998157326052, iteration: 284486
loss: 1.2375911474227905,grad_norm: 0.9999999511667149, iteration: 284487
loss: 1.4343111515045166,grad_norm: 0.9999997006614023, iteration: 284488
loss: 1.2821228504180908,grad_norm: 0.9999993393373441, iteration: 284489
loss: 1.1808465719223022,grad_norm: 0.9999997401871031, iteration: 284490
loss: 1.1829168796539307,grad_norm: 0.9999994380665943, iteration: 284491
loss: 1.3491109609603882,grad_norm: 0.9999998023534589, iteration: 284492
loss: 1.1633915901184082,grad_norm: 0.9999997187586913, iteration: 284493
loss: 1.1511602401733398,grad_norm: 0.99999956879555, iteration: 284494
loss: 1.5072052478790283,grad_norm: 0.999999679895369, iteration: 284495
loss: 1.2636641263961792,grad_norm: 0.9999998699964173, iteration: 284496
loss: 1.289723515510559,grad_norm: 0.9999999336870135, iteration: 284497
loss: 1.2474288940429688,grad_norm: 0.9999992721695588, iteration: 284498
loss: 1.3925293684005737,grad_norm: 0.9999996604103434, iteration: 284499
loss: 1.2866621017456055,grad_norm: 0.9999997380204999, iteration: 284500
loss: 1.2150506973266602,grad_norm: 0.9999996959635896, iteration: 284501
loss: 1.2291685342788696,grad_norm: 0.9999994819578668, iteration: 284502
loss: 1.4359889030456543,grad_norm: 0.9999995164890797, iteration: 284503
loss: 1.3324741125106812,grad_norm: 0.9999999420321786, iteration: 284504
loss: 1.3600050210952759,grad_norm: 0.9999992187819625, iteration: 284505
loss: 1.1680759191513062,grad_norm: 0.9999992556565224, iteration: 284506
loss: 1.3321079015731812,grad_norm: 0.9999996345213209, iteration: 284507
loss: 1.099260687828064,grad_norm: 0.9999995622711494, iteration: 284508
loss: 1.3458919525146484,grad_norm: 0.999999716774724, iteration: 284509
loss: 1.3497850894927979,grad_norm: 0.9999993262941648, iteration: 284510
loss: 1.1895602941513062,grad_norm: 1.0000000323428788, iteration: 284511
loss: 1.2907214164733887,grad_norm: 0.9999998243395504, iteration: 284512
loss: 1.4973862171173096,grad_norm: 0.999999703629502, iteration: 284513
loss: 1.2069340944290161,grad_norm: 0.999999496562335, iteration: 284514
loss: 1.1671234369277954,grad_norm: 0.9999997067016767, iteration: 284515
loss: 1.188501000404358,grad_norm: 0.9999998435613392, iteration: 284516
loss: 1.2424081563949585,grad_norm: 0.9999997353200119, iteration: 284517
loss: 1.2184746265411377,grad_norm: 0.9999999831674692, iteration: 284518
loss: 1.1709774732589722,grad_norm: 0.9999999423818451, iteration: 284519
loss: 1.2096790075302124,grad_norm: 0.9999991460835024, iteration: 284520
loss: 1.4034696817398071,grad_norm: 0.9999996695361577, iteration: 284521
loss: 1.3191378116607666,grad_norm: 0.9999993243141032, iteration: 284522
loss: 1.257370114326477,grad_norm: 0.9999995665672655, iteration: 284523
loss: 1.1772239208221436,grad_norm: 0.9999999123819293, iteration: 284524
loss: 1.3456577062606812,grad_norm: 0.9999996563620125, iteration: 284525
loss: 1.0984532833099365,grad_norm: 0.9999994701389683, iteration: 284526
loss: 1.3056904077529907,grad_norm: 0.9999998509201049, iteration: 284527
loss: 1.1355226039886475,grad_norm: 0.999999372088658, iteration: 284528
loss: 1.1857975721359253,grad_norm: 0.9999997907688942, iteration: 284529
loss: 1.3122628927230835,grad_norm: 0.9999999182946935, iteration: 284530
loss: 1.0988301038742065,grad_norm: 0.9999995156388384, iteration: 284531
loss: 1.5611504316329956,grad_norm: 0.9999997502193642, iteration: 284532
loss: 1.2029634714126587,grad_norm: 0.9999996472722048, iteration: 284533
loss: 1.162697196006775,grad_norm: 0.9999996021097137, iteration: 284534
loss: 1.1257965564727783,grad_norm: 0.9999997684334991, iteration: 284535
loss: 1.1497743129730225,grad_norm: 0.9999991004472042, iteration: 284536
loss: 1.1496068239212036,grad_norm: 0.9999996198017956, iteration: 284537
loss: 1.2461069822311401,grad_norm: 0.9999997604487395, iteration: 284538
loss: 1.1077414751052856,grad_norm: 0.999999504533173, iteration: 284539
loss: 1.2641748189926147,grad_norm: 0.999999969696528, iteration: 284540
loss: 1.3593002557754517,grad_norm: 0.9999999510443203, iteration: 284541
loss: 1.2360410690307617,grad_norm: 0.9999998517882683, iteration: 284542
loss: 1.2917119264602661,grad_norm: 0.9999994785389137, iteration: 284543
loss: 1.2788126468658447,grad_norm: 0.9999996125734031, iteration: 284544
loss: 1.2878565788269043,grad_norm: 0.9999997069652453, iteration: 284545
loss: 1.258765697479248,grad_norm: 0.9999997993472616, iteration: 284546
loss: 1.1158998012542725,grad_norm: 0.9999996367386647, iteration: 284547
loss: 1.2433899641036987,grad_norm: 0.9999998273285161, iteration: 284548
loss: 1.3029497861862183,grad_norm: 0.9999997983252347, iteration: 284549
loss: 1.233994960784912,grad_norm: 0.9999995459235246, iteration: 284550
loss: 1.2354657649993896,grad_norm: 0.9999999006434768, iteration: 284551
loss: 1.2706668376922607,grad_norm: 0.9999997409816727, iteration: 284552
loss: 1.2323150634765625,grad_norm: 0.9999993110232749, iteration: 284553
loss: 1.1758251190185547,grad_norm: 0.9999997114283574, iteration: 284554
loss: 1.2506648302078247,grad_norm: 0.9999994516339784, iteration: 284555
loss: 1.4677869081497192,grad_norm: 0.9999999651635936, iteration: 284556
loss: 1.2992335557937622,grad_norm: 0.9999997329305655, iteration: 284557
loss: 1.3493293523788452,grad_norm: 0.999999731547325, iteration: 284558
loss: 1.295988917350769,grad_norm: 0.9999997405334403, iteration: 284559
loss: 1.2045323848724365,grad_norm: 0.9999996348950037, iteration: 284560
loss: 1.137008547782898,grad_norm: 0.9999997153584316, iteration: 284561
loss: 1.2933412790298462,grad_norm: 0.9999997948693462, iteration: 284562
loss: 1.197595238685608,grad_norm: 0.999999727632786, iteration: 284563
loss: 1.3040382862091064,grad_norm: 0.9999996158335093, iteration: 284564
loss: 1.2607145309448242,grad_norm: 0.9999997409841793, iteration: 284565
loss: 1.1764888763427734,grad_norm: 0.99999971339978, iteration: 284566
loss: 1.1542333364486694,grad_norm: 0.9999992121749826, iteration: 284567
loss: 1.1913264989852905,grad_norm: 0.9999998167470935, iteration: 284568
loss: 1.1192659139633179,grad_norm: 0.9999994443499105, iteration: 284569
loss: 1.2503973245620728,grad_norm: 0.9999993259768589, iteration: 284570
loss: 1.190406084060669,grad_norm: 0.9999994787771351, iteration: 284571
loss: 1.3095754384994507,grad_norm: 1.0000000682132302, iteration: 284572
loss: 1.1282992362976074,grad_norm: 0.9999994711673496, iteration: 284573
loss: 1.1623570919036865,grad_norm: 0.9999992757725857, iteration: 284574
loss: 1.1463665962219238,grad_norm: 0.9999999734142683, iteration: 284575
loss: 1.4033392667770386,grad_norm: 0.999999464959868, iteration: 284576
loss: 1.1829866170883179,grad_norm: 0.9999996211384056, iteration: 284577
loss: 1.3754103183746338,grad_norm: 0.9999996844956555, iteration: 284578
loss: 1.3197747468948364,grad_norm: 0.9999997208446892, iteration: 284579
loss: 1.3326793909072876,grad_norm: 0.9999996842044994, iteration: 284580
loss: 1.2598150968551636,grad_norm: 1.000000014400777, iteration: 284581
loss: 1.2164876461029053,grad_norm: 0.99999968742499, iteration: 284582
loss: 1.1757972240447998,grad_norm: 0.999999750787182, iteration: 284583
loss: 1.3679741621017456,grad_norm: 0.9999998357358337, iteration: 284584
loss: 1.2331622838974,grad_norm: 1.0000000386563481, iteration: 284585
loss: 1.2470699548721313,grad_norm: 0.9999998440407754, iteration: 284586
loss: 1.2619438171386719,grad_norm: 0.9999999615629647, iteration: 284587
loss: 1.4405447244644165,grad_norm: 0.999999793200671, iteration: 284588
loss: 1.2932227849960327,grad_norm: 0.9999994109383449, iteration: 284589
loss: 1.4153743982315063,grad_norm: 0.9999999179191049, iteration: 284590
loss: 1.4436091184616089,grad_norm: 0.9999998672481001, iteration: 284591
loss: 1.2129799127578735,grad_norm: 0.999999533814669, iteration: 284592
loss: 1.3679360151290894,grad_norm: 0.9999997962783206, iteration: 284593
loss: 1.12477445602417,grad_norm: 0.9999996634772492, iteration: 284594
loss: 1.478102445602417,grad_norm: 0.9999999426884433, iteration: 284595
loss: 1.1418828964233398,grad_norm: 0.9999998483118092, iteration: 284596
loss: 1.2387428283691406,grad_norm: 0.9999997166553335, iteration: 284597
loss: 1.2020375728607178,grad_norm: 0.9999996641283712, iteration: 284598
loss: 1.2435121536254883,grad_norm: 0.999999286205261, iteration: 284599
loss: 1.2482038736343384,grad_norm: 0.9999997810091021, iteration: 284600
loss: 1.2701247930526733,grad_norm: 0.9999993716829476, iteration: 284601
loss: 1.2347955703735352,grad_norm: 0.9999997058526351, iteration: 284602
loss: 1.302666187286377,grad_norm: 0.9999997191121668, iteration: 284603
loss: 1.2304309606552124,grad_norm: 0.9999994689308556, iteration: 284604
loss: 1.3611369132995605,grad_norm: 0.9999996309316479, iteration: 284605
loss: 1.0875470638275146,grad_norm: 0.9999994578311229, iteration: 284606
loss: 1.157170057296753,grad_norm: 0.9999997430802311, iteration: 284607
loss: 1.2650684118270874,grad_norm: 0.9999998872830962, iteration: 284608
loss: 1.2676883935928345,grad_norm: 0.9999998311904521, iteration: 284609
loss: 1.1158068180084229,grad_norm: 0.9999990570511202, iteration: 284610
loss: 1.3730816841125488,grad_norm: 0.9999999145572491, iteration: 284611
loss: 1.37324059009552,grad_norm: 0.9999996974284403, iteration: 284612
loss: 1.3211236000061035,grad_norm: 1.000000031345149, iteration: 284613
loss: 1.2586504220962524,grad_norm: 0.9999997226897679, iteration: 284614
loss: 1.25385320186615,grad_norm: 0.9999999229199386, iteration: 284615
loss: 1.1971795558929443,grad_norm: 0.9999998481016643, iteration: 284616
loss: 1.3103927373886108,grad_norm: 0.9999998944109244, iteration: 284617
loss: 1.2084903717041016,grad_norm: 0.999999803480121, iteration: 284618
loss: 1.110674500465393,grad_norm: 0.9999994233660104, iteration: 284619
loss: 1.129680871963501,grad_norm: 0.9999999452837723, iteration: 284620
loss: 1.3229461908340454,grad_norm: 0.9999999221328806, iteration: 284621
loss: 1.4418630599975586,grad_norm: 0.9999997691214727, iteration: 284622
loss: 1.2364193201065063,grad_norm: 0.9999999600364893, iteration: 284623
loss: 1.336338996887207,grad_norm: 0.9999997553705584, iteration: 284624
loss: 1.1133947372436523,grad_norm: 0.9999993292515946, iteration: 284625
loss: 1.3448270559310913,grad_norm: 0.9999998706044636, iteration: 284626
loss: 1.2421616315841675,grad_norm: 0.9999994406743375, iteration: 284627
loss: 1.2636715173721313,grad_norm: 0.9999993694922369, iteration: 284628
loss: 1.3285951614379883,grad_norm: 0.9999997200567087, iteration: 284629
loss: 1.5397682189941406,grad_norm: 0.9999998441003817, iteration: 284630
loss: 1.1484298706054688,grad_norm: 0.9999998747223517, iteration: 284631
loss: 1.1516846418380737,grad_norm: 0.9999993024637116, iteration: 284632
loss: 1.1071666479110718,grad_norm: 0.9999993450106074, iteration: 284633
loss: 1.1580054759979248,grad_norm: 0.9999997096350647, iteration: 284634
loss: 1.3178602457046509,grad_norm: 0.9999996287319669, iteration: 284635
loss: 1.286693811416626,grad_norm: 0.9999997596825088, iteration: 284636
loss: 1.1844291687011719,grad_norm: 0.9999994829524693, iteration: 284637
loss: 1.1308151483535767,grad_norm: 0.9999995654443106, iteration: 284638
loss: 1.2367558479309082,grad_norm: 0.9999993779243691, iteration: 284639
loss: 1.2519229650497437,grad_norm: 0.999999660709528, iteration: 284640
loss: 1.3878823518753052,grad_norm: 0.9999996423442142, iteration: 284641
loss: 1.32180655002594,grad_norm: 0.9999995303248411, iteration: 284642
loss: 1.1721304655075073,grad_norm: 0.9999993930322446, iteration: 284643
loss: 1.4825414419174194,grad_norm: 0.9999995866068746, iteration: 284644
loss: 1.104465126991272,grad_norm: 0.9999994112313573, iteration: 284645
loss: 1.2468836307525635,grad_norm: 0.9999999560269319, iteration: 284646
loss: 1.100794792175293,grad_norm: 0.9999992767228886, iteration: 284647
loss: 1.3176870346069336,grad_norm: 0.9999998894988578, iteration: 284648
loss: 1.1749155521392822,grad_norm: 0.9999996184065783, iteration: 284649
loss: 1.2765765190124512,grad_norm: 0.9999992057119275, iteration: 284650
loss: 1.2802058458328247,grad_norm: 0.9999998204803877, iteration: 284651
loss: 1.2240701913833618,grad_norm: 0.9999996928927531, iteration: 284652
loss: 1.1880106925964355,grad_norm: 0.9999993397873853, iteration: 284653
loss: 1.070828914642334,grad_norm: 0.9999992243323194, iteration: 284654
loss: 1.0803643465042114,grad_norm: 0.9999996457051117, iteration: 284655
loss: 1.1733862161636353,grad_norm: 0.999999928887525, iteration: 284656
loss: 1.1834564208984375,grad_norm: 0.9999992760944446, iteration: 284657
loss: 1.2390532493591309,grad_norm: 0.9999996024363094, iteration: 284658
loss: 1.2332305908203125,grad_norm: 0.9999994046279588, iteration: 284659
loss: 1.1702897548675537,grad_norm: 1.0000000493549632, iteration: 284660
loss: 1.1394847631454468,grad_norm: 0.9999999298506612, iteration: 284661
loss: 1.235164761543274,grad_norm: 0.999999424855169, iteration: 284662
loss: 1.0552152395248413,grad_norm: 0.9999992756536509, iteration: 284663
loss: 1.0699779987335205,grad_norm: 0.9999998986005668, iteration: 284664
loss: 1.3037538528442383,grad_norm: 0.9999994594410989, iteration: 284665
loss: 1.013558030128479,grad_norm: 0.9999992398484302, iteration: 284666
loss: 1.2262648344039917,grad_norm: 0.9999996687845545, iteration: 284667
loss: 1.1499403715133667,grad_norm: 0.9999997864232123, iteration: 284668
loss: 1.186744213104248,grad_norm: 0.9999993477759598, iteration: 284669
loss: 1.1686369180679321,grad_norm: 0.9999996135278607, iteration: 284670
loss: 1.2239940166473389,grad_norm: 0.9999993618774042, iteration: 284671
loss: 1.1256593465805054,grad_norm: 0.9999994796538192, iteration: 284672
loss: 1.2943923473358154,grad_norm: 0.9999994615329734, iteration: 284673
loss: 1.2311878204345703,grad_norm: 0.9999997015485617, iteration: 284674
loss: 1.3861898183822632,grad_norm: 0.9999999045084743, iteration: 284675
loss: 1.1788690090179443,grad_norm: 0.9999995283012735, iteration: 284676
loss: 1.214720368385315,grad_norm: 1.0000000548114203, iteration: 284677
loss: 1.299987554550171,grad_norm: 0.9999998793208621, iteration: 284678
loss: 1.0965030193328857,grad_norm: 0.9999992444183813, iteration: 284679
loss: 1.2296738624572754,grad_norm: 0.9999996983695698, iteration: 284680
loss: 1.2797034978866577,grad_norm: 0.9999996975274051, iteration: 284681
loss: 1.1962584257125854,grad_norm: 0.9999994659629587, iteration: 284682
loss: 1.1765363216400146,grad_norm: 0.9999998581729089, iteration: 284683
loss: 1.1061229705810547,grad_norm: 0.9999992767649639, iteration: 284684
loss: 1.1347841024398804,grad_norm: 0.9999993268900508, iteration: 284685
loss: 1.294245719909668,grad_norm: 0.9999993370188324, iteration: 284686
loss: 1.1623414754867554,grad_norm: 0.9999992595903402, iteration: 284687
loss: 1.1301857233047485,grad_norm: 0.9999999298812291, iteration: 284688
loss: 1.2337180376052856,grad_norm: 0.9999996505233849, iteration: 284689
loss: 1.4168771505355835,grad_norm: 0.9999998787153783, iteration: 284690
loss: 1.1339340209960938,grad_norm: 0.9999997640397823, iteration: 284691
loss: 1.0982340574264526,grad_norm: 0.9999993606401626, iteration: 284692
loss: 1.1634092330932617,grad_norm: 0.9999996605584942, iteration: 284693
loss: 1.2111982107162476,grad_norm: 0.9999995481712612, iteration: 284694
loss: 1.2725368738174438,grad_norm: 0.9999999096488097, iteration: 284695
loss: 1.3397669792175293,grad_norm: 0.9999997017519445, iteration: 284696
loss: 1.1774269342422485,grad_norm: 0.999999451982717, iteration: 284697
loss: 1.4144617319107056,grad_norm: 0.9999999077564156, iteration: 284698
loss: 1.4867864847183228,grad_norm: 0.999999923346946, iteration: 284699
loss: 1.504958987236023,grad_norm: 0.9999999175667562, iteration: 284700
loss: 1.4640977382659912,grad_norm: 1.00000000701127, iteration: 284701
loss: 1.3321056365966797,grad_norm: 0.999999887357268, iteration: 284702
loss: 1.1395442485809326,grad_norm: 0.9999994886033629, iteration: 284703
loss: 1.1497600078582764,grad_norm: 0.9999994335334774, iteration: 284704
loss: 1.205041766166687,grad_norm: 0.999999788276497, iteration: 284705
loss: 1.3284785747528076,grad_norm: 0.9999997875132061, iteration: 284706
loss: 1.1726607084274292,grad_norm: 0.9999999582289871, iteration: 284707
loss: 1.5409278869628906,grad_norm: 0.9999999297878062, iteration: 284708
loss: 1.4279922246932983,grad_norm: 0.999999508679167, iteration: 284709
loss: 1.2739096879959106,grad_norm: 0.999999917608696, iteration: 284710
loss: 1.2208198308944702,grad_norm: 0.9999999257910699, iteration: 284711
loss: 1.389955759048462,grad_norm: 0.9999996959356514, iteration: 284712
loss: 1.4124891757965088,grad_norm: 0.9999998431033573, iteration: 284713
loss: 1.3564661741256714,grad_norm: 0.9999995870840257, iteration: 284714
loss: 1.208842396736145,grad_norm: 0.9999996085288587, iteration: 284715
loss: 1.2201857566833496,grad_norm: 0.9999996823784075, iteration: 284716
loss: 1.2314088344573975,grad_norm: 0.9999995047815725, iteration: 284717
loss: 1.3387147188186646,grad_norm: 0.9999998447311893, iteration: 284718
loss: 1.359899878501892,grad_norm: 0.999999304241453, iteration: 284719
loss: 1.2026294469833374,grad_norm: 0.999999571098349, iteration: 284720
loss: 1.2385170459747314,grad_norm: 0.9999995931783778, iteration: 284721
loss: 1.1806490421295166,grad_norm: 0.999999568178619, iteration: 284722
loss: 1.1443434953689575,grad_norm: 0.9999996212782833, iteration: 284723
loss: 1.2715541124343872,grad_norm: 0.9999997325661397, iteration: 284724
loss: 1.3315374851226807,grad_norm: 0.9999999824676532, iteration: 284725
loss: 1.1557931900024414,grad_norm: 0.9999997274318825, iteration: 284726
loss: 1.3322196006774902,grad_norm: 0.9999998053479495, iteration: 284727
loss: 1.0820198059082031,grad_norm: 0.9999996371693265, iteration: 284728
loss: 1.3819574117660522,grad_norm: 0.9999996861606637, iteration: 284729
loss: 1.0928893089294434,grad_norm: 0.9999997477322621, iteration: 284730
loss: 1.3304959535598755,grad_norm: 0.9999999647447502, iteration: 284731
loss: 1.269384503364563,grad_norm: 0.9999997762573227, iteration: 284732
loss: 1.2158020734786987,grad_norm: 0.9999996981812582, iteration: 284733
loss: 1.2170237302780151,grad_norm: 0.9999998839136567, iteration: 284734
loss: 1.3653260469436646,grad_norm: 0.9999995184060971, iteration: 284735
loss: 1.1073307991027832,grad_norm: 0.9999998269313933, iteration: 284736
loss: 1.1892918348312378,grad_norm: 0.999999959488747, iteration: 284737
loss: 1.2245960235595703,grad_norm: 0.9999999276243869, iteration: 284738
loss: 1.0262584686279297,grad_norm: 0.999999219122393, iteration: 284739
loss: 1.3652682304382324,grad_norm: 0.9999992819475304, iteration: 284740
loss: 1.2918683290481567,grad_norm: 0.9999998182347769, iteration: 284741
loss: 1.2493045330047607,grad_norm: 0.9999998558357408, iteration: 284742
loss: 1.3197202682495117,grad_norm: 0.9999994635191104, iteration: 284743
loss: 1.2607024908065796,grad_norm: 0.9999997146328373, iteration: 284744
loss: 1.2068397998809814,grad_norm: 0.9999995232621369, iteration: 284745
loss: 1.2780524492263794,grad_norm: 0.9999996063407494, iteration: 284746
loss: 1.018056035041809,grad_norm: 0.9999996615384035, iteration: 284747
loss: 1.187021255493164,grad_norm: 0.9999997545720974, iteration: 284748
loss: 1.154203176498413,grad_norm: 0.9999990705866243, iteration: 284749
loss: 1.4347047805786133,grad_norm: 0.9999997155510258, iteration: 284750
loss: 1.1824549436569214,grad_norm: 0.9999998001430477, iteration: 284751
loss: 1.0286377668380737,grad_norm: 0.9999991372134501, iteration: 284752
loss: 1.1621652841567993,grad_norm: 0.9999999343200373, iteration: 284753
loss: 1.1144917011260986,grad_norm: 0.9999990467025691, iteration: 284754
loss: 1.1254667043685913,grad_norm: 0.9999997597879129, iteration: 284755
loss: 1.4368728399276733,grad_norm: 0.9999998475194721, iteration: 284756
loss: 1.1142464876174927,grad_norm: 0.9999991769227249, iteration: 284757
loss: 1.1406306028366089,grad_norm: 0.9999995787023424, iteration: 284758
loss: 1.3275439739227295,grad_norm: 0.9999994061386438, iteration: 284759
loss: 1.1867198944091797,grad_norm: 1.0000000056026506, iteration: 284760
loss: 1.2769430875778198,grad_norm: 0.9999997830627915, iteration: 284761
loss: 1.346766471862793,grad_norm: 1.0000000534080526, iteration: 284762
loss: 1.1166565418243408,grad_norm: 0.9999995648510961, iteration: 284763
loss: 1.1850632429122925,grad_norm: 0.9999998389918897, iteration: 284764
loss: 1.1340506076812744,grad_norm: 0.9999998937726524, iteration: 284765
loss: 1.083828330039978,grad_norm: 0.999999706811338, iteration: 284766
loss: 1.1850507259368896,grad_norm: 0.9999997216588777, iteration: 284767
loss: 1.1875392198562622,grad_norm: 0.9999999797641925, iteration: 284768
loss: 1.536755084991455,grad_norm: 0.9999999472023813, iteration: 284769
loss: 1.1512755155563354,grad_norm: 0.999999630750358, iteration: 284770
loss: 1.051578164100647,grad_norm: 0.9999993521832451, iteration: 284771
loss: 1.379132866859436,grad_norm: 0.9999998759379027, iteration: 284772
loss: 1.0779720544815063,grad_norm: 0.9999997648812524, iteration: 284773
loss: 1.1242091655731201,grad_norm: 0.999999226687641, iteration: 284774
loss: 1.2248618602752686,grad_norm: 0.9999993694148105, iteration: 284775
loss: 1.3140839338302612,grad_norm: 0.99999975375462, iteration: 284776
loss: 1.2654109001159668,grad_norm: 0.9999999568269294, iteration: 284777
loss: 1.2770657539367676,grad_norm: 0.999999674942744, iteration: 284778
loss: 1.2053319215774536,grad_norm: 0.9999999024014898, iteration: 284779
loss: 1.1828287839889526,grad_norm: 0.9999998979354672, iteration: 284780
loss: 1.2673485279083252,grad_norm: 0.9999997859940765, iteration: 284781
loss: 1.4303374290466309,grad_norm: 1.0000000006146248, iteration: 284782
loss: 1.2723833322525024,grad_norm: 0.999999870593847, iteration: 284783
loss: 1.2503516674041748,grad_norm: 0.9999999015438436, iteration: 284784
loss: 1.2198872566223145,grad_norm: 0.9999997561509152, iteration: 284785
loss: 1.2146594524383545,grad_norm: 0.9999996541734549, iteration: 284786
loss: 1.3093103170394897,grad_norm: 0.9999997344834382, iteration: 284787
loss: 1.264815092086792,grad_norm: 0.9999998185641162, iteration: 284788
loss: 1.2620271444320679,grad_norm: 0.9999998733822346, iteration: 284789
loss: 1.4473603963851929,grad_norm: 0.9999998085720586, iteration: 284790
loss: 1.1515588760375977,grad_norm: 0.9999998564271779, iteration: 284791
loss: 1.137847900390625,grad_norm: 0.9999998360696672, iteration: 284792
loss: 1.3976211547851562,grad_norm: 0.9999998266969624, iteration: 284793
loss: 1.332965612411499,grad_norm: 0.999999725739636, iteration: 284794
loss: 1.4255443811416626,grad_norm: 0.9999999501657167, iteration: 284795
loss: 1.368284821510315,grad_norm: 0.9999999299720974, iteration: 284796
loss: 1.1285439729690552,grad_norm: 0.9999993473218232, iteration: 284797
loss: 1.2944997549057007,grad_norm: 0.9999999017800912, iteration: 284798
loss: 1.1341150999069214,grad_norm: 0.9999997561521137, iteration: 284799
loss: 1.1381973028182983,grad_norm: 0.9999998631775789, iteration: 284800
loss: 1.2311378717422485,grad_norm: 0.9999999182480619, iteration: 284801
loss: 1.2174793481826782,grad_norm: 0.9999998902368471, iteration: 284802
loss: 1.1832550764083862,grad_norm: 0.9999996958835699, iteration: 284803
loss: 1.2480162382125854,grad_norm: 0.9999994718428932, iteration: 284804
loss: 1.1871165037155151,grad_norm: 0.9999993463095302, iteration: 284805
loss: 1.3869911432266235,grad_norm: 0.9999998963256032, iteration: 284806
loss: 1.3317359685897827,grad_norm: 0.9999998149225136, iteration: 284807
loss: 1.4221160411834717,grad_norm: 0.9999998734906793, iteration: 284808
loss: 1.213366985321045,grad_norm: 0.9999997685407074, iteration: 284809
loss: 1.2673231363296509,grad_norm: 0.9999997132716362, iteration: 284810
loss: 1.2823922634124756,grad_norm: 0.999999722517175, iteration: 284811
loss: 1.3708724975585938,grad_norm: 0.9999998758857274, iteration: 284812
loss: 1.2939479351043701,grad_norm: 0.9999996603949133, iteration: 284813
loss: 1.3193751573562622,grad_norm: 0.9999997243631307, iteration: 284814
loss: 1.248913288116455,grad_norm: 0.9999993020387746, iteration: 284815
loss: 1.2272329330444336,grad_norm: 0.9999997702059202, iteration: 284816
loss: 1.191192865371704,grad_norm: 0.9999996778809322, iteration: 284817
loss: 1.2597662210464478,grad_norm: 0.9999998223497739, iteration: 284818
loss: 1.3909616470336914,grad_norm: 0.9999999921071702, iteration: 284819
loss: 1.2784972190856934,grad_norm: 0.9999997202649727, iteration: 284820
loss: 1.1995590925216675,grad_norm: 1.000000044512169, iteration: 284821
loss: 1.4574395418167114,grad_norm: 0.9999999021199608, iteration: 284822
loss: 1.4167708158493042,grad_norm: 1.0000000160027112, iteration: 284823
loss: 1.536697268486023,grad_norm: 0.999999796275005, iteration: 284824
loss: 1.1413289308547974,grad_norm: 0.9999999193601934, iteration: 284825
loss: 1.331399917602539,grad_norm: 0.999999954036294, iteration: 284826
loss: 1.4056401252746582,grad_norm: 0.9999998438567538, iteration: 284827
loss: 1.2380198240280151,grad_norm: 0.999999863407373, iteration: 284828
loss: 1.264178991317749,grad_norm: 0.999999800582529, iteration: 284829
loss: 1.1495685577392578,grad_norm: 0.99999990016505, iteration: 284830
loss: 1.18647038936615,grad_norm: 0.9999995132443029, iteration: 284831
loss: 1.4143977165222168,grad_norm: 0.9999999371019981, iteration: 284832
loss: 1.1164569854736328,grad_norm: 0.9999999433973819, iteration: 284833
loss: 1.3139424324035645,grad_norm: 0.9999999841241235, iteration: 284834
loss: 1.299107551574707,grad_norm: 0.9999996951773535, iteration: 284835
loss: 1.4507924318313599,grad_norm: 0.9999999678984584, iteration: 284836
loss: 1.1312291622161865,grad_norm: 0.9999994746506248, iteration: 284837
loss: 1.1401597261428833,grad_norm: 0.9999994041664289, iteration: 284838
loss: 1.0856324434280396,grad_norm: 0.9999991456481628, iteration: 284839
loss: 1.1181005239486694,grad_norm: 0.9999993787767675, iteration: 284840
loss: 1.0069369077682495,grad_norm: 0.8374618484056601, iteration: 284841
loss: 1.0756148099899292,grad_norm: 0.9999993674771256, iteration: 284842
loss: 1.5268428325653076,grad_norm: 0.9999999586093617, iteration: 284843
loss: 1.2074946165084839,grad_norm: 0.9999998836559839, iteration: 284844
loss: 1.2343297004699707,grad_norm: 0.9999998425687296, iteration: 284845
loss: 1.210363507270813,grad_norm: 0.999999820482065, iteration: 284846
loss: 1.2936749458312988,grad_norm: 0.9999997570849241, iteration: 284847
loss: 1.2183759212493896,grad_norm: 0.9999998371606416, iteration: 284848
loss: 1.191739559173584,grad_norm: 0.9999994498364927, iteration: 284849
loss: 1.0448939800262451,grad_norm: 0.9999993509928223, iteration: 284850
loss: 1.1809802055358887,grad_norm: 0.9999997335075242, iteration: 284851
loss: 1.2506883144378662,grad_norm: 0.9999999354943678, iteration: 284852
loss: 1.3421680927276611,grad_norm: 0.9999998722368327, iteration: 284853
loss: 1.2049700021743774,grad_norm: 0.99999987797776, iteration: 284854
loss: 1.4052574634552002,grad_norm: 0.9999999670738065, iteration: 284855
loss: 1.124190092086792,grad_norm: 0.9999997758461217, iteration: 284856
loss: 1.209535837173462,grad_norm: 0.9999997059137812, iteration: 284857
loss: 1.2186874151229858,grad_norm: 0.9999998179420186, iteration: 284858
loss: 1.1922404766082764,grad_norm: 0.9999994979391875, iteration: 284859
loss: 1.2910711765289307,grad_norm: 0.9999997053251036, iteration: 284860
loss: 1.0738221406936646,grad_norm: 0.9999999585870336, iteration: 284861
loss: 1.174424171447754,grad_norm: 0.9999995567650418, iteration: 284862
loss: 1.2580430507659912,grad_norm: 0.9999998266477264, iteration: 284863
loss: 1.2452911138534546,grad_norm: 0.9999996500373908, iteration: 284864
loss: 1.0808689594268799,grad_norm: 0.999999668462898, iteration: 284865
loss: 1.0375380516052246,grad_norm: 0.783599153369656, iteration: 284866
loss: 1.233778476715088,grad_norm: 0.9999995792950552, iteration: 284867
loss: 1.2377504110336304,grad_norm: 0.9999997067343562, iteration: 284868
loss: 1.2027497291564941,grad_norm: 0.9999997411349758, iteration: 284869
loss: 1.0572197437286377,grad_norm: 0.9946962677840149, iteration: 284870
loss: 1.3724429607391357,grad_norm: 0.9999996280288173, iteration: 284871
loss: 1.1843394041061401,grad_norm: 0.9999997619109185, iteration: 284872
loss: 1.4316203594207764,grad_norm: 0.9999997239340226, iteration: 284873
loss: 1.3215192556381226,grad_norm: 0.999999414983825, iteration: 284874
loss: 1.0983508825302124,grad_norm: 0.9999992117951328, iteration: 284875
loss: 1.1913893222808838,grad_norm: 0.9999995631041649, iteration: 284876
loss: 1.2766586542129517,grad_norm: 0.9999998904693571, iteration: 284877
loss: 1.1657922267913818,grad_norm: 0.999999655626021, iteration: 284878
loss: 1.2706773281097412,grad_norm: 0.9999997456055768, iteration: 284879
loss: 1.137930989265442,grad_norm: 0.9999993795521402, iteration: 284880
loss: 1.1995246410369873,grad_norm: 0.9999992850061922, iteration: 284881
loss: 1.1319639682769775,grad_norm: 0.999999528834661, iteration: 284882
loss: 1.3226298093795776,grad_norm: 0.999999384502898, iteration: 284883
loss: 1.166092038154602,grad_norm: 0.9999989992883164, iteration: 284884
loss: 1.1530412435531616,grad_norm: 0.9999992666059317, iteration: 284885
loss: 1.1639070510864258,grad_norm: 0.9999992328625492, iteration: 284886
loss: 1.135615587234497,grad_norm: 0.999999929326106, iteration: 284887
loss: 1.2587693929672241,grad_norm: 0.9999999434792028, iteration: 284888
loss: 1.2768498659133911,grad_norm: 0.9999993398208064, iteration: 284889
loss: 1.3239303827285767,grad_norm: 0.9999998462178423, iteration: 284890
loss: 1.2884037494659424,grad_norm: 0.9999999222205295, iteration: 284891
loss: 1.1024600267410278,grad_norm: 0.9999996898103453, iteration: 284892
loss: 1.1704773902893066,grad_norm: 0.9999994014067706, iteration: 284893
loss: 1.1068974733352661,grad_norm: 0.9999991581273675, iteration: 284894
loss: 1.3912609815597534,grad_norm: 0.9999998744414571, iteration: 284895
loss: 1.0831888914108276,grad_norm: 0.9999998086699213, iteration: 284896
loss: 1.2529057264328003,grad_norm: 0.9999992501403608, iteration: 284897
loss: 1.117698311805725,grad_norm: 0.9999994471661309, iteration: 284898
loss: 1.1759757995605469,grad_norm: 0.9999999043181265, iteration: 284899
loss: 1.2207964658737183,grad_norm: 0.9999999709731255, iteration: 284900
loss: 1.110673189163208,grad_norm: 0.999999618302441, iteration: 284901
loss: 1.2486928701400757,grad_norm: 0.9999999036906491, iteration: 284902
loss: 1.2205767631530762,grad_norm: 0.9999999419929516, iteration: 284903
loss: 1.1542518138885498,grad_norm: 0.9994207039761509, iteration: 284904
loss: 1.2354729175567627,grad_norm: 0.9999997573253037, iteration: 284905
loss: 1.1931779384613037,grad_norm: 0.9999991389954509, iteration: 284906
loss: 1.0818277597427368,grad_norm: 0.9999995814212658, iteration: 284907
loss: 1.113294243812561,grad_norm: 0.9999997161888461, iteration: 284908
loss: 1.1363775730133057,grad_norm: 0.9999994675563979, iteration: 284909
loss: 1.249892234802246,grad_norm: 0.9999995273093135, iteration: 284910
loss: 1.10187828540802,grad_norm: 0.9999996876636588, iteration: 284911
loss: 1.108218789100647,grad_norm: 0.9999999731694801, iteration: 284912
loss: 1.168799877166748,grad_norm: 0.9999995980902175, iteration: 284913
loss: 1.1633718013763428,grad_norm: 0.9999993952090862, iteration: 284914
loss: 1.2755839824676514,grad_norm: 0.9999998333945758, iteration: 284915
loss: 1.2100579738616943,grad_norm: 0.9999997886121351, iteration: 284916
loss: 1.1671043634414673,grad_norm: 1.0000000337022878, iteration: 284917
loss: 1.3152861595153809,grad_norm: 0.9999992179407499, iteration: 284918
loss: 1.118909239768982,grad_norm: 0.9999994879692753, iteration: 284919
loss: 1.2474113702774048,grad_norm: 0.999999887091847, iteration: 284920
loss: 1.0759837627410889,grad_norm: 0.9999998093238995, iteration: 284921
loss: 1.3561699390411377,grad_norm: 0.9999998446759534, iteration: 284922
loss: 1.2586301565170288,grad_norm: 0.999999402138483, iteration: 284923
loss: 1.1144894361495972,grad_norm: 0.9999997190100636, iteration: 284924
loss: 1.2515795230865479,grad_norm: 0.9999998095120886, iteration: 284925
loss: 1.1718038320541382,grad_norm: 0.9999997452069707, iteration: 284926
loss: 1.2545627355575562,grad_norm: 0.9999996142593034, iteration: 284927
loss: 1.0323354005813599,grad_norm: 0.9999994975798452, iteration: 284928
loss: 1.2253512144088745,grad_norm: 0.9999996845519665, iteration: 284929
loss: 1.1511274576187134,grad_norm: 0.99999979742527, iteration: 284930
loss: 1.241370439529419,grad_norm: 0.9999998622539161, iteration: 284931
loss: 1.3035492897033691,grad_norm: 0.9999998470576218, iteration: 284932
loss: 1.1309605836868286,grad_norm: 0.9999995119736563, iteration: 284933
loss: 1.2242401838302612,grad_norm: 0.999999998177153, iteration: 284934
loss: 1.3674030303955078,grad_norm: 0.9999998156972095, iteration: 284935
loss: 1.0823564529418945,grad_norm: 0.9999997590165114, iteration: 284936
loss: 1.2756296396255493,grad_norm: 0.9999999085027387, iteration: 284937
loss: 1.330569863319397,grad_norm: 0.9999995064335205, iteration: 284938
loss: 1.1117260456085205,grad_norm: 0.9999991183247912, iteration: 284939
loss: 1.3375319242477417,grad_norm: 0.9999999477138138, iteration: 284940
loss: 1.2171742916107178,grad_norm: 0.9999994956556606, iteration: 284941
loss: 1.1647504568099976,grad_norm: 0.9999996955068796, iteration: 284942
loss: 1.2176330089569092,grad_norm: 0.9999996785551156, iteration: 284943
loss: 1.1458412408828735,grad_norm: 0.9999995472689669, iteration: 284944
loss: 1.0788705348968506,grad_norm: 0.9999996165797306, iteration: 284945
loss: 1.1663157939910889,grad_norm: 1.0000000083806706, iteration: 284946
loss: 1.0420390367507935,grad_norm: 0.9999990861190914, iteration: 284947
loss: 1.160881757736206,grad_norm: 0.9953096353449643, iteration: 284948
loss: 1.2763422727584839,grad_norm: 0.9999998827013948, iteration: 284949
loss: 1.2143453359603882,grad_norm: 0.9999999366989113, iteration: 284950
loss: 1.1473405361175537,grad_norm: 0.9999991480645686, iteration: 284951
loss: 1.194512963294983,grad_norm: 0.9999998566179217, iteration: 284952
loss: 1.0848537683486938,grad_norm: 0.9999995780530302, iteration: 284953
loss: 1.0962321758270264,grad_norm: 0.9999997355269413, iteration: 284954
loss: 1.4271435737609863,grad_norm: 0.9999998308449788, iteration: 284955
loss: 1.2927730083465576,grad_norm: 0.9999997781164395, iteration: 284956
loss: 1.0882084369659424,grad_norm: 0.9999992924138638, iteration: 284957
loss: 1.0243520736694336,grad_norm: 0.9999994709234363, iteration: 284958
loss: 1.12974214553833,grad_norm: 0.9999996526745941, iteration: 284959
loss: 1.3364977836608887,grad_norm: 0.9999997656072658, iteration: 284960
loss: 1.1933594942092896,grad_norm: 0.999999426227461, iteration: 284961
loss: 1.3494583368301392,grad_norm: 1.0000000153310669, iteration: 284962
loss: 1.0942691564559937,grad_norm: 0.9999993823493885, iteration: 284963
loss: 1.095045804977417,grad_norm: 0.9999996239128115, iteration: 284964
loss: 1.3071180582046509,grad_norm: 0.99999986397736, iteration: 284965
loss: 1.1600489616394043,grad_norm: 0.9999999801296009, iteration: 284966
loss: 1.2754844427108765,grad_norm: 0.999999200773946, iteration: 284967
loss: 1.2365565299987793,grad_norm: 0.9999999930744495, iteration: 284968
loss: 1.1790645122528076,grad_norm: 0.9999992619669968, iteration: 284969
loss: 1.2562845945358276,grad_norm: 0.9999997690871159, iteration: 284970
loss: 1.165651798248291,grad_norm: 0.9999996943890799, iteration: 284971
loss: 1.1873035430908203,grad_norm: 0.999999430199539, iteration: 284972
loss: 1.1893175840377808,grad_norm: 0.9999996774580198, iteration: 284973
loss: 1.1951824426651,grad_norm: 0.9999995235465978, iteration: 284974
loss: 1.094002366065979,grad_norm: 0.9999999136733996, iteration: 284975
loss: 1.1192905902862549,grad_norm: 0.9999994686953831, iteration: 284976
loss: 1.088878870010376,grad_norm: 0.9999994163874465, iteration: 284977
loss: 1.073685884475708,grad_norm: 0.9999996945307489, iteration: 284978
loss: 1.0728435516357422,grad_norm: 0.9999992147269249, iteration: 284979
loss: 1.2913928031921387,grad_norm: 0.9999998525536015, iteration: 284980
loss: 1.1467485427856445,grad_norm: 0.9999999389929338, iteration: 284981
loss: 1.0731041431427002,grad_norm: 0.9999999491790226, iteration: 284982
loss: 1.201410174369812,grad_norm: 0.9999993254136379, iteration: 284983
loss: 1.1902227401733398,grad_norm: 0.9999994623082071, iteration: 284984
loss: 1.170255422592163,grad_norm: 0.999999504488473, iteration: 284985
loss: 1.1316238641738892,grad_norm: 0.9999992032173283, iteration: 284986
loss: 1.136465072631836,grad_norm: 0.9999999205327292, iteration: 284987
loss: 1.200300931930542,grad_norm: 0.9999998250446493, iteration: 284988
loss: 1.0964621305465698,grad_norm: 0.9999992037212624, iteration: 284989
loss: 1.164352536201477,grad_norm: 0.9999996758279805, iteration: 284990
loss: 1.3704322576522827,grad_norm: 0.9999998460821208, iteration: 284991
loss: 1.0938647985458374,grad_norm: 0.9999998089327174, iteration: 284992
loss: 1.2879228591918945,grad_norm: 0.999999873433947, iteration: 284993
loss: 1.1059091091156006,grad_norm: 0.999999844720317, iteration: 284994
loss: 1.1770331859588623,grad_norm: 0.9999995731098494, iteration: 284995
loss: 1.1359071731567383,grad_norm: 0.9999998842522302, iteration: 284996
loss: 1.1075955629348755,grad_norm: 0.9999990838786934, iteration: 284997
loss: 1.1504400968551636,grad_norm: 0.9999997408676469, iteration: 284998
loss: 1.0915358066558838,grad_norm: 0.999999770836689, iteration: 284999
loss: 1.0788872241973877,grad_norm: 0.9999991103547895, iteration: 285000
loss: 1.2436999082565308,grad_norm: 0.9999995746732333, iteration: 285001
loss: 1.1253736019134521,grad_norm: 0.9999999461889143, iteration: 285002
loss: 1.0702296495437622,grad_norm: 0.9999998874365298, iteration: 285003
loss: 1.239784598350525,grad_norm: 0.9999997769131663, iteration: 285004
loss: 1.0691317319869995,grad_norm: 0.9999996775162858, iteration: 285005
loss: 1.1683844327926636,grad_norm: 0.9999993236152378, iteration: 285006
loss: 1.3987993001937866,grad_norm: 0.999999860801879, iteration: 285007
loss: 1.2575255632400513,grad_norm: 0.9999994736447771, iteration: 285008
loss: 1.131269931793213,grad_norm: 0.9999999371203576, iteration: 285009
loss: 1.1662840843200684,grad_norm: 0.9999998006350356, iteration: 285010
loss: 1.0947387218475342,grad_norm: 0.9999996353641534, iteration: 285011
loss: 1.083588719367981,grad_norm: 0.9999992539102972, iteration: 285012
loss: 1.1758006811141968,grad_norm: 0.9999999096374251, iteration: 285013
loss: 1.118972659111023,grad_norm: 0.9999995354041686, iteration: 285014
loss: 1.043552279472351,grad_norm: 0.9999991818864082, iteration: 285015
loss: 1.1220229864120483,grad_norm: 0.9999992964512032, iteration: 285016
loss: 1.0285675525665283,grad_norm: 0.924166194766408, iteration: 285017
loss: 1.249155044555664,grad_norm: 0.9999999820609602, iteration: 285018
loss: 1.1803975105285645,grad_norm: 0.9999995438884406, iteration: 285019
loss: 1.1682745218276978,grad_norm: 0.9999998026198335, iteration: 285020
loss: 1.0653700828552246,grad_norm: 0.9999996709714252, iteration: 285021
loss: 1.037840485572815,grad_norm: 0.9999998797479976, iteration: 285022
loss: 1.289566159248352,grad_norm: 0.9999995678562772, iteration: 285023
loss: 1.194157361984253,grad_norm: 0.9999998865825223, iteration: 285024
loss: 1.1492538452148438,grad_norm: 0.9999993360006643, iteration: 285025
loss: 1.1659666299819946,grad_norm: 0.9999992089967311, iteration: 285026
loss: 1.2170056104660034,grad_norm: 0.9999998793002127, iteration: 285027
loss: 1.0622336864471436,grad_norm: 0.9999991870642714, iteration: 285028
loss: 1.2043664455413818,grad_norm: 0.9999996045423074, iteration: 285029
loss: 1.1989156007766724,grad_norm: 0.9999991588973186, iteration: 285030
loss: 1.376104712486267,grad_norm: 0.9999998119750817, iteration: 285031
loss: 1.1036884784698486,grad_norm: 0.9999994858860501, iteration: 285032
loss: 1.1185078620910645,grad_norm: 0.9999998100722346, iteration: 285033
loss: 1.2273632287979126,grad_norm: 0.9999996854260453, iteration: 285034
loss: 1.1860045194625854,grad_norm: 0.9999995160858175, iteration: 285035
loss: 1.1168854236602783,grad_norm: 0.999999774827722, iteration: 285036
loss: 1.1068726778030396,grad_norm: 0.9999996543389864, iteration: 285037
loss: 1.2443652153015137,grad_norm: 0.9999997889289693, iteration: 285038
loss: 1.1370042562484741,grad_norm: 0.9999998339499955, iteration: 285039
loss: 1.1458544731140137,grad_norm: 0.9999996644996039, iteration: 285040
loss: 1.1966161727905273,grad_norm: 0.999999816208909, iteration: 285041
loss: 1.1121008396148682,grad_norm: 0.9999996343353617, iteration: 285042
loss: 1.2040127515792847,grad_norm: 0.9999995700397043, iteration: 285043
loss: 1.1317650079727173,grad_norm: 0.9999999080933342, iteration: 285044
loss: 1.177524209022522,grad_norm: 0.9999997086599673, iteration: 285045
loss: 1.052876353263855,grad_norm: 0.99999915280036, iteration: 285046
loss: 1.2917494773864746,grad_norm: 0.9999996586118158, iteration: 285047
loss: 1.220066785812378,grad_norm: 0.9999996865443825, iteration: 285048
loss: 1.2461947202682495,grad_norm: 0.9999998788143578, iteration: 285049
loss: 1.220577359199524,grad_norm: 0.9999994192041509, iteration: 285050
loss: 1.1028554439544678,grad_norm: 0.9999998417619916, iteration: 285051
loss: 1.1507854461669922,grad_norm: 0.999999715607813, iteration: 285052
loss: 1.0783036947250366,grad_norm: 0.9999994635410793, iteration: 285053
loss: 1.208524227142334,grad_norm: 0.9999993655416758, iteration: 285054
loss: 1.092997670173645,grad_norm: 0.9999992813743034, iteration: 285055
loss: 1.128577709197998,grad_norm: 0.9999997764216684, iteration: 285056
loss: 1.1561343669891357,grad_norm: 0.9999994737096192, iteration: 285057
loss: 1.149410605430603,grad_norm: 0.9999999187114711, iteration: 285058
loss: 1.2876986265182495,grad_norm: 0.9999996130928498, iteration: 285059
loss: 1.2503730058670044,grad_norm: 0.9999999427747138, iteration: 285060
loss: 1.2584352493286133,grad_norm: 0.999999656384305, iteration: 285061
loss: 1.084967017173767,grad_norm: 0.9999996450084416, iteration: 285062
loss: 1.2593015432357788,grad_norm: 0.9999998721174898, iteration: 285063
loss: 1.158514142036438,grad_norm: 0.9999996083654336, iteration: 285064
loss: 1.0667378902435303,grad_norm: 0.9999991801769931, iteration: 285065
loss: 1.313186764717102,grad_norm: 0.9999998483921558, iteration: 285066
loss: 1.2197308540344238,grad_norm: 0.9999998339963365, iteration: 285067
loss: 1.1300479173660278,grad_norm: 0.9999994449030893, iteration: 285068
loss: 1.093787670135498,grad_norm: 0.9999993553549297, iteration: 285069
loss: 1.0725494623184204,grad_norm: 0.9999992755326115, iteration: 285070
loss: 1.291022539138794,grad_norm: 0.9999998782793674, iteration: 285071
loss: 1.187084674835205,grad_norm: 0.9999995618992538, iteration: 285072
loss: 1.1593577861785889,grad_norm: 0.9999997891886492, iteration: 285073
loss: 1.2867263555526733,grad_norm: 0.9999997257803619, iteration: 285074
loss: 1.1086657047271729,grad_norm: 0.9999995711013637, iteration: 285075
loss: 1.094594955444336,grad_norm: 0.9999994210506231, iteration: 285076
loss: 1.0888493061065674,grad_norm: 0.9999996927101087, iteration: 285077
loss: 1.1284483671188354,grad_norm: 0.9999995078709937, iteration: 285078
loss: 1.0977473258972168,grad_norm: 0.9999994573632568, iteration: 285079
loss: 1.1465004682540894,grad_norm: 0.9999998411214363, iteration: 285080
loss: 1.1327040195465088,grad_norm: 0.9999997312622556, iteration: 285081
loss: 1.2060608863830566,grad_norm: 0.9999999123861116, iteration: 285082
loss: 1.1014890670776367,grad_norm: 0.9999993934092197, iteration: 285083
loss: 1.177878975868225,grad_norm: 0.9999993729541065, iteration: 285084
loss: 1.133711814880371,grad_norm: 0.9999996919278178, iteration: 285085
loss: 1.032711148262024,grad_norm: 0.999999406429937, iteration: 285086
loss: 1.1393389701843262,grad_norm: 0.999999712440381, iteration: 285087
loss: 1.0142353773117065,grad_norm: 0.9999992926296846, iteration: 285088
loss: 1.081009030342102,grad_norm: 0.999999192079658, iteration: 285089
loss: 1.1709929704666138,grad_norm: 0.9999998756998143, iteration: 285090
loss: 1.1095261573791504,grad_norm: 0.9999992653592282, iteration: 285091
loss: 1.2964800596237183,grad_norm: 0.9999997556128617, iteration: 285092
loss: 1.0636132955551147,grad_norm: 0.9999990766809849, iteration: 285093
loss: 1.1595910787582397,grad_norm: 0.9999997835009115, iteration: 285094
loss: 1.147567629814148,grad_norm: 0.9999996767384964, iteration: 285095
loss: 1.2205337285995483,grad_norm: 0.9999997876776142, iteration: 285096
loss: 1.1063563823699951,grad_norm: 0.9999997883102391, iteration: 285097
loss: 1.0388301610946655,grad_norm: 0.9999995489322306, iteration: 285098
loss: 1.2913936376571655,grad_norm: 0.9999995249709355, iteration: 285099
loss: 1.2967239618301392,grad_norm: 0.9999999459018415, iteration: 285100
loss: 1.4233587980270386,grad_norm: 1.0000000793304895, iteration: 285101
loss: 1.2139694690704346,grad_norm: 0.9999994894323405, iteration: 285102
loss: 1.2848879098892212,grad_norm: 1.0000000753878464, iteration: 285103
loss: 1.0168418884277344,grad_norm: 0.9999994384672525, iteration: 285104
loss: 1.09909188747406,grad_norm: 0.9999995984791895, iteration: 285105
loss: 1.005693793296814,grad_norm: 0.9999994398093325, iteration: 285106
loss: 1.1538996696472168,grad_norm: 0.9999992941774629, iteration: 285107
loss: 1.2081665992736816,grad_norm: 0.9999994885386323, iteration: 285108
loss: 1.1203316450119019,grad_norm: 0.9999998712275256, iteration: 285109
loss: 1.270499348640442,grad_norm: 0.999999915530685, iteration: 285110
loss: 1.150895595550537,grad_norm: 0.9999995170068693, iteration: 285111
loss: 1.1360613107681274,grad_norm: 0.9999992674624796, iteration: 285112
loss: 1.1090525388717651,grad_norm: 0.9999998797292716, iteration: 285113
loss: 1.0550438165664673,grad_norm: 0.9999993188559744, iteration: 285114
loss: 1.1074457168579102,grad_norm: 1.0000000756848824, iteration: 285115
loss: 1.1048444509506226,grad_norm: 0.9999993163780083, iteration: 285116
loss: 1.1672464609146118,grad_norm: 0.9999997533507619, iteration: 285117
loss: 1.1344025135040283,grad_norm: 0.9999996207241962, iteration: 285118
loss: 1.2084009647369385,grad_norm: 0.9999997597332461, iteration: 285119
loss: 1.1156675815582275,grad_norm: 0.9999995725132693, iteration: 285120
loss: 1.073366641998291,grad_norm: 0.9999994552557406, iteration: 285121
loss: 1.2199032306671143,grad_norm: 0.9999998131900719, iteration: 285122
loss: 0.9791894555091858,grad_norm: 0.9999991827828708, iteration: 285123
loss: 1.1886194944381714,grad_norm: 0.9999999388299052, iteration: 285124
loss: 1.2473925352096558,grad_norm: 0.9999996217297176, iteration: 285125
loss: 1.0632023811340332,grad_norm: 0.999999124313019, iteration: 285126
loss: 1.1567186117172241,grad_norm: 0.9999997521548734, iteration: 285127
loss: 1.1044734716415405,grad_norm: 0.9999991708947242, iteration: 285128
loss: 1.2594373226165771,grad_norm: 0.9999996245881989, iteration: 285129
loss: 1.0754221677780151,grad_norm: 0.9999992943841833, iteration: 285130
loss: 1.2575902938842773,grad_norm: 0.9999999540843116, iteration: 285131
loss: 1.1959199905395508,grad_norm: 0.9999994356143791, iteration: 285132
loss: 0.9781339764595032,grad_norm: 0.9999993513353008, iteration: 285133
loss: 1.0958127975463867,grad_norm: 0.9999998045604601, iteration: 285134
loss: 1.1326276063919067,grad_norm: 0.9999995278573728, iteration: 285135
loss: 1.0970563888549805,grad_norm: 0.9999992983209423, iteration: 285136
loss: 1.1673177480697632,grad_norm: 0.9999992700807419, iteration: 285137
loss: 1.1576459407806396,grad_norm: 0.9999998010588754, iteration: 285138
loss: 1.1666874885559082,grad_norm: 0.9999997084265594, iteration: 285139
loss: 1.0540543794631958,grad_norm: 0.9999996731066952, iteration: 285140
loss: 1.0879206657409668,grad_norm: 0.9999998646782611, iteration: 285141
loss: 1.0834704637527466,grad_norm: 0.9999991133591964, iteration: 285142
loss: 1.183983325958252,grad_norm: 0.9999993590124048, iteration: 285143
loss: 1.1535344123840332,grad_norm: 0.9999995590818325, iteration: 285144
loss: 1.0955413579940796,grad_norm: 0.9999998447749694, iteration: 285145
loss: 1.3416448831558228,grad_norm: 0.999999571238656, iteration: 285146
loss: 1.101791501045227,grad_norm: 0.9999996396169646, iteration: 285147
loss: 1.1758270263671875,grad_norm: 0.9999996738963817, iteration: 285148
loss: 1.1146881580352783,grad_norm: 0.9999998390784627, iteration: 285149
loss: 1.0812740325927734,grad_norm: 0.9999999171738779, iteration: 285150
loss: 1.064619541168213,grad_norm: 0.999999810189391, iteration: 285151
loss: 1.1745102405548096,grad_norm: 0.9999994310814179, iteration: 285152
loss: 1.114270567893982,grad_norm: 0.9999993639720511, iteration: 285153
loss: 1.141097903251648,grad_norm: 0.9999990759949267, iteration: 285154
loss: 1.10956871509552,grad_norm: 0.9999993485996198, iteration: 285155
loss: 1.2690980434417725,grad_norm: 0.9999998246969565, iteration: 285156
loss: 1.280804991722107,grad_norm: 1.000000024369732, iteration: 285157
loss: 1.0849359035491943,grad_norm: 0.9999994011519916, iteration: 285158
loss: 1.1976416110992432,grad_norm: 0.9999999631759431, iteration: 285159
loss: 1.209749460220337,grad_norm: 0.9999991627696061, iteration: 285160
loss: 1.1135939359664917,grad_norm: 0.9999993511652231, iteration: 285161
loss: 1.0500327348709106,grad_norm: 0.9999996188527289, iteration: 285162
loss: 1.087677240371704,grad_norm: 0.9999999353932334, iteration: 285163
loss: 1.1661279201507568,grad_norm: 0.9999997824675639, iteration: 285164
loss: 1.05363929271698,grad_norm: 0.9999992132162765, iteration: 285165
loss: 1.177851676940918,grad_norm: 0.9999997501422689, iteration: 285166
loss: 1.0914665460586548,grad_norm: 0.9999995147556163, iteration: 285167
loss: 1.1826645135879517,grad_norm: 0.9999996815758667, iteration: 285168
loss: 1.1108988523483276,grad_norm: 0.9999998060700217, iteration: 285169
loss: 1.101678729057312,grad_norm: 0.9999995176082986, iteration: 285170
loss: 1.134450912475586,grad_norm: 0.9999998119166559, iteration: 285171
loss: 1.149253249168396,grad_norm: 0.9999993514029321, iteration: 285172
loss: 1.2467960119247437,grad_norm: 0.9999998774993681, iteration: 285173
loss: 1.0417667627334595,grad_norm: 0.9999998798624881, iteration: 285174
loss: 1.0387060642242432,grad_norm: 0.9186701252474903, iteration: 285175
loss: 1.1211742162704468,grad_norm: 0.9999995442376993, iteration: 285176
loss: 1.085654616355896,grad_norm: 0.999999706325596, iteration: 285177
loss: 1.0376442670822144,grad_norm: 0.9999999463948479, iteration: 285178
loss: 1.1146738529205322,grad_norm: 0.9999998590689256, iteration: 285179
loss: 1.2752041816711426,grad_norm: 0.999999890832666, iteration: 285180
loss: 1.0465304851531982,grad_norm: 0.9999994413399557, iteration: 285181
loss: 1.0916668176651,grad_norm: 0.9999994943973155, iteration: 285182
loss: 1.1411240100860596,grad_norm: 0.9999998606796351, iteration: 285183
loss: 1.089863657951355,grad_norm: 0.9999991179061918, iteration: 285184
loss: 1.0646463632583618,grad_norm: 0.999999573485723, iteration: 285185
loss: 1.116525411605835,grad_norm: 0.9999995953914231, iteration: 285186
loss: 1.0725133419036865,grad_norm: 0.9999994079189871, iteration: 285187
loss: 1.1077343225479126,grad_norm: 0.9999991464307236, iteration: 285188
loss: 1.1346701383590698,grad_norm: 0.9999994825523923, iteration: 285189
loss: 1.0842170715332031,grad_norm: 0.9999991094420517, iteration: 285190
loss: 1.0636265277862549,grad_norm: 0.9999992129520835, iteration: 285191
loss: 1.1343547105789185,grad_norm: 0.9999994435831472, iteration: 285192
loss: 1.1408462524414062,grad_norm: 0.9999993309333163, iteration: 285193
loss: 1.2078102827072144,grad_norm: 0.9999996465682043, iteration: 285194
loss: 1.195486068725586,grad_norm: 0.9999998003281722, iteration: 285195
loss: 1.2788405418395996,grad_norm: 0.9999994751190454, iteration: 285196
loss: 1.1686745882034302,grad_norm: 0.9999996215594582, iteration: 285197
loss: 1.052844762802124,grad_norm: 0.9999992933724499, iteration: 285198
loss: 1.0285511016845703,grad_norm: 0.9999992130087665, iteration: 285199
loss: 1.2046178579330444,grad_norm: 0.9999998981243918, iteration: 285200
loss: 1.2050740718841553,grad_norm: 0.9999998893512937, iteration: 285201
loss: 1.0393701791763306,grad_norm: 0.9999993736778093, iteration: 285202
loss: 1.0849883556365967,grad_norm: 0.9999995940867503, iteration: 285203
loss: 1.0475966930389404,grad_norm: 0.999999185938618, iteration: 285204
loss: 1.1600877046585083,grad_norm: 0.9999995137704709, iteration: 285205
loss: 1.1934494972229004,grad_norm: 0.9999999517100366, iteration: 285206
loss: 1.1847212314605713,grad_norm: 0.9999994824432777, iteration: 285207
loss: 1.0041592121124268,grad_norm: 0.9999990195166408, iteration: 285208
loss: 1.0829495191574097,grad_norm: 0.9999991946984772, iteration: 285209
loss: 1.2385144233703613,grad_norm: 0.999999722217243, iteration: 285210
loss: 1.0853475332260132,grad_norm: 0.9999996331927774, iteration: 285211
loss: 1.247286319732666,grad_norm: 0.999999519755431, iteration: 285212
loss: 1.1348756551742554,grad_norm: 0.9999998506407163, iteration: 285213
loss: 1.2274267673492432,grad_norm: 0.9999992244198582, iteration: 285214
loss: 1.150593876838684,grad_norm: 0.9999997193925556, iteration: 285215
loss: 1.1624201536178589,grad_norm: 0.9999992886354558, iteration: 285216
loss: 1.117255687713623,grad_norm: 0.9999996984075432, iteration: 285217
loss: 1.1448469161987305,grad_norm: 0.9999996973006661, iteration: 285218
loss: 1.2413733005523682,grad_norm: 0.9999999594852312, iteration: 285219
loss: 1.075348138809204,grad_norm: 0.9999992373949743, iteration: 285220
loss: 1.1435129642486572,grad_norm: 0.9999993564350711, iteration: 285221
loss: 1.0584052801132202,grad_norm: 0.9999999930495029, iteration: 285222
loss: 1.0408940315246582,grad_norm: 0.9925064541347969, iteration: 285223
loss: 1.0619134902954102,grad_norm: 0.9999996493952905, iteration: 285224
loss: 1.1964774131774902,grad_norm: 0.9999994855776458, iteration: 285225
loss: 1.1050207614898682,grad_norm: 0.9999995091727731, iteration: 285226
loss: 1.0687708854675293,grad_norm: 0.9080534119832914, iteration: 285227
loss: 1.106525182723999,grad_norm: 0.9999996989600936, iteration: 285228
loss: 1.144369125366211,grad_norm: 0.9999993820576291, iteration: 285229
loss: 1.170311689376831,grad_norm: 0.9999993014477387, iteration: 285230
loss: 1.2878741025924683,grad_norm: 0.9999996843122856, iteration: 285231
loss: 1.2215579748153687,grad_norm: 0.9999992313804051, iteration: 285232
loss: 1.1153194904327393,grad_norm: 0.9999995530230069, iteration: 285233
loss: 1.2054997682571411,grad_norm: 0.9999998246064266, iteration: 285234
loss: 1.0485597848892212,grad_norm: 0.9999992406827027, iteration: 285235
loss: 1.100950837135315,grad_norm: 0.9999993974261613, iteration: 285236
loss: 1.2270270586013794,grad_norm: 0.9999999929372899, iteration: 285237
loss: 1.1870763301849365,grad_norm: 0.9999999424020997, iteration: 285238
loss: 1.0852224826812744,grad_norm: 0.9999994429268807, iteration: 285239
loss: 1.3844085931777954,grad_norm: 0.9999999239475031, iteration: 285240
loss: 1.0876693725585938,grad_norm: 0.9999992748734096, iteration: 285241
loss: 1.1023215055465698,grad_norm: 0.9999992850145519, iteration: 285242
loss: 1.037656545639038,grad_norm: 0.9999998513713176, iteration: 285243
loss: 1.129192590713501,grad_norm: 0.9999998405137607, iteration: 285244
loss: 1.111741304397583,grad_norm: 0.9999998397671791, iteration: 285245
loss: 1.1396247148513794,grad_norm: 0.9999999633334178, iteration: 285246
loss: 1.0782051086425781,grad_norm: 0.9999998293942938, iteration: 285247
loss: 1.1297667026519775,grad_norm: 0.9999992456209533, iteration: 285248
loss: 1.0667895078659058,grad_norm: 0.875993049446551, iteration: 285249
loss: 1.1571214199066162,grad_norm: 0.9999998614620215, iteration: 285250
loss: 1.2743791341781616,grad_norm: 0.9999998902144086, iteration: 285251
loss: 1.1049319505691528,grad_norm: 0.9999999331639774, iteration: 285252
loss: 1.0912609100341797,grad_norm: 0.9999997000853178, iteration: 285253
loss: 1.076593041419983,grad_norm: 0.9999994753078135, iteration: 285254
loss: 1.1732099056243896,grad_norm: 0.9999998539269226, iteration: 285255
loss: 1.1604628562927246,grad_norm: 0.9999999010780555, iteration: 285256
loss: 1.033150553703308,grad_norm: 0.9776874872700883, iteration: 285257
loss: 1.2091584205627441,grad_norm: 0.9999999632216053, iteration: 285258
loss: 0.9926918148994446,grad_norm: 0.9203676066026446, iteration: 285259
loss: 1.096780776977539,grad_norm: 0.9999997782184867, iteration: 285260
loss: 1.2376437187194824,grad_norm: 1.0000000276226302, iteration: 285261
loss: 1.1045136451721191,grad_norm: 0.999999066913963, iteration: 285262
loss: 1.1390024423599243,grad_norm: 0.9999997714431004, iteration: 285263
loss: 1.2449573278427124,grad_norm: 0.999999871710564, iteration: 285264
loss: 1.1617932319641113,grad_norm: 0.9999995645018357, iteration: 285265
loss: 1.073366641998291,grad_norm: 0.9999993650480296, iteration: 285266
loss: 1.11543869972229,grad_norm: 0.9999998976717156, iteration: 285267
loss: 1.0681408643722534,grad_norm: 0.9999991564819455, iteration: 285268
loss: 1.1020479202270508,grad_norm: 0.9999992542556262, iteration: 285269
loss: 1.0524306297302246,grad_norm: 0.9999991463968685, iteration: 285270
loss: 1.2397639751434326,grad_norm: 0.9999996377283915, iteration: 285271
loss: 0.9884206652641296,grad_norm: 0.9999991340165536, iteration: 285272
loss: 1.0832442045211792,grad_norm: 0.9999999517113297, iteration: 285273
loss: 1.061150074005127,grad_norm: 0.9999991273551215, iteration: 285274
loss: 1.1635607481002808,grad_norm: 0.9999997187003838, iteration: 285275
loss: 1.0823695659637451,grad_norm: 0.9999990758747928, iteration: 285276
loss: 1.1325595378875732,grad_norm: 0.9999994847373593, iteration: 285277
loss: 1.498930811882019,grad_norm: 0.9999997835250775, iteration: 285278
loss: 1.298302412033081,grad_norm: 0.9999997334242691, iteration: 285279
loss: 1.0870856046676636,grad_norm: 0.9999990758155277, iteration: 285280
loss: 1.0755785703659058,grad_norm: 0.9999998483603006, iteration: 285281
loss: 1.077189564704895,grad_norm: 0.9999994721680933, iteration: 285282
loss: 1.066801905632019,grad_norm: 0.999999325573518, iteration: 285283
loss: 1.1374661922454834,grad_norm: 0.9999998697786899, iteration: 285284
loss: 1.1764376163482666,grad_norm: 0.9999999299218398, iteration: 285285
loss: 1.0194298028945923,grad_norm: 0.99999897801851, iteration: 285286
loss: 1.0915278196334839,grad_norm: 0.9999994304299282, iteration: 285287
loss: 1.1658520698547363,grad_norm: 0.9999997381942192, iteration: 285288
loss: 1.1551669836044312,grad_norm: 0.9999997069328476, iteration: 285289
loss: 1.1221245527267456,grad_norm: 0.9999994482938666, iteration: 285290
loss: 1.0437755584716797,grad_norm: 0.9999998440755873, iteration: 285291
loss: 1.056715965270996,grad_norm: 0.9999997489814573, iteration: 285292
loss: 1.2139679193496704,grad_norm: 0.9999992615638504, iteration: 285293
loss: 1.0173168182373047,grad_norm: 0.999999185318908, iteration: 285294
loss: 1.1307833194732666,grad_norm: 0.9999995771527445, iteration: 285295
loss: 1.0561909675598145,grad_norm: 0.9999993201222549, iteration: 285296
loss: 1.0897431373596191,grad_norm: 0.999999907373073, iteration: 285297
loss: 1.062571406364441,grad_norm: 0.9999993604779278, iteration: 285298
loss: 1.1835428476333618,grad_norm: 0.9999996332641126, iteration: 285299
loss: 1.04427170753479,grad_norm: 0.9999996095481721, iteration: 285300
loss: 1.0530675649642944,grad_norm: 0.9772732331325942, iteration: 285301
loss: 1.1589735746383667,grad_norm: 0.9999995100136222, iteration: 285302
loss: 1.1257027387619019,grad_norm: 0.9999991956565981, iteration: 285303
loss: 1.0214343070983887,grad_norm: 0.9919998854548782, iteration: 285304
loss: 1.1406619548797607,grad_norm: 1.0000000226353318, iteration: 285305
loss: 1.2303663492202759,grad_norm: 1.0000000343196405, iteration: 285306
loss: 1.162015676498413,grad_norm: 0.9999999347425185, iteration: 285307
loss: 1.4388128519058228,grad_norm: 0.9999997139373096, iteration: 285308
loss: 1.2741578817367554,grad_norm: 0.9999999200634704, iteration: 285309
loss: 1.0123788118362427,grad_norm: 0.9999995794476327, iteration: 285310
loss: 1.1118950843811035,grad_norm: 0.9999999439771896, iteration: 285311
loss: 1.093544840812683,grad_norm: 0.9999993457437749, iteration: 285312
loss: 1.1797281503677368,grad_norm: 0.9999998878922465, iteration: 285313
loss: 1.0644103288650513,grad_norm: 0.9999993734854261, iteration: 285314
loss: 1.1593573093414307,grad_norm: 0.9999997141890393, iteration: 285315
loss: 1.151244044303894,grad_norm: 0.9999996473457704, iteration: 285316
loss: 1.0762749910354614,grad_norm: 0.9999992297311943, iteration: 285317
loss: 1.1839932203292847,grad_norm: 0.9999994628093278, iteration: 285318
loss: 1.1396448612213135,grad_norm: 0.9999995544890042, iteration: 285319
loss: 1.0958895683288574,grad_norm: 0.9999998009216559, iteration: 285320
loss: 1.1677813529968262,grad_norm: 0.9999994428128389, iteration: 285321
loss: 1.064188003540039,grad_norm: 0.9999991203884692, iteration: 285322
loss: 1.3295153379440308,grad_norm: 0.9999995519863242, iteration: 285323
loss: 1.1850955486297607,grad_norm: 0.9999998576285006, iteration: 285324
loss: 1.437978982925415,grad_norm: 0.9999999621644741, iteration: 285325
loss: 1.2665239572525024,grad_norm: 0.999999986258166, iteration: 285326
loss: 1.0792758464813232,grad_norm: 0.9999991872555267, iteration: 285327
loss: 1.130717396736145,grad_norm: 0.9999996372988177, iteration: 285328
loss: 1.1276679039001465,grad_norm: 0.9999993057366863, iteration: 285329
loss: 1.2942029237747192,grad_norm: 0.9999996543966811, iteration: 285330
loss: 1.0234678983688354,grad_norm: 0.9999991406695753, iteration: 285331
loss: 1.0908970832824707,grad_norm: 0.9999991500966321, iteration: 285332
loss: 1.2006758451461792,grad_norm: 0.9999997208221829, iteration: 285333
loss: 1.2269381284713745,grad_norm: 0.9999997378397888, iteration: 285334
loss: 1.053329348564148,grad_norm: 0.8530591368457144, iteration: 285335
loss: 1.1934770345687866,grad_norm: 0.999999354487615, iteration: 285336
loss: 1.0844851732254028,grad_norm: 0.9999991301954201, iteration: 285337
loss: 1.0870062112808228,grad_norm: 0.9999992766468091, iteration: 285338
loss: 1.2335606813430786,grad_norm: 0.9999996868624427, iteration: 285339
loss: 1.1584279537200928,grad_norm: 0.9999993136588414, iteration: 285340
loss: 1.0334769487380981,grad_norm: 0.998259161057487, iteration: 285341
loss: 1.1312708854675293,grad_norm: 0.999999642424592, iteration: 285342
loss: 1.1978291273117065,grad_norm: 0.9999998658345076, iteration: 285343
loss: 1.0070459842681885,grad_norm: 0.9999994589659488, iteration: 285344
loss: 1.1098649501800537,grad_norm: 0.9999994450555797, iteration: 285345
loss: 1.1703400611877441,grad_norm: 0.999999644063216, iteration: 285346
loss: 1.1003719568252563,grad_norm: 0.9999999273838103, iteration: 285347
loss: 1.0286176204681396,grad_norm: 0.9999991504087155, iteration: 285348
loss: 1.0643881559371948,grad_norm: 0.947352455019985, iteration: 285349
loss: 1.2768487930297852,grad_norm: 0.9999997155719731, iteration: 285350
loss: 1.1326725482940674,grad_norm: 0.9999995459358364, iteration: 285351
loss: 1.1434305906295776,grad_norm: 0.9999998481242764, iteration: 285352
loss: 1.1203124523162842,grad_norm: 0.9999993759041692, iteration: 285353
loss: 1.066164255142212,grad_norm: 0.9999992765828942, iteration: 285354
loss: 1.0546469688415527,grad_norm: 0.9999990410856134, iteration: 285355
loss: 1.432054877281189,grad_norm: 0.9999995695557463, iteration: 285356
loss: 1.07929265499115,grad_norm: 0.9999996913976287, iteration: 285357
loss: 1.1093003749847412,grad_norm: 0.9999993092069356, iteration: 285358
loss: 1.0516928434371948,grad_norm: 0.9874769339478863, iteration: 285359
loss: 1.1063534021377563,grad_norm: 0.9999997670471905, iteration: 285360
loss: 1.0571184158325195,grad_norm: 0.9999997122439994, iteration: 285361
loss: 1.1770384311676025,grad_norm: 0.9999995797076504, iteration: 285362
loss: 1.1265767812728882,grad_norm: 0.9999992009579818, iteration: 285363
loss: 1.1546186208724976,grad_norm: 0.9999995265479472, iteration: 285364
loss: 1.1785199642181396,grad_norm: 0.9999997339871417, iteration: 285365
loss: 1.0954395532608032,grad_norm: 0.9999996294556028, iteration: 285366
loss: 1.1398353576660156,grad_norm: 0.9999999711682094, iteration: 285367
loss: 1.1616270542144775,grad_norm: 0.9999996490227286, iteration: 285368
loss: 1.156829833984375,grad_norm: 0.9999994682619479, iteration: 285369
loss: 1.1335418224334717,grad_norm: 0.9999993408786816, iteration: 285370
loss: 1.0459109544754028,grad_norm: 0.9999992260201003, iteration: 285371
loss: 1.0864055156707764,grad_norm: 0.9999995989107531, iteration: 285372
loss: 1.0553196668624878,grad_norm: 0.9999991464411114, iteration: 285373
loss: 1.2498654127120972,grad_norm: 0.9999998899993392, iteration: 285374
loss: 1.1051967144012451,grad_norm: 0.999999633101797, iteration: 285375
loss: 1.0530015230178833,grad_norm: 0.999999972420626, iteration: 285376
loss: 1.0197118520736694,grad_norm: 0.9999992130414233, iteration: 285377
loss: 1.0984385013580322,grad_norm: 0.9999991518673951, iteration: 285378
loss: 1.1716362237930298,grad_norm: 0.9999997205025745, iteration: 285379
loss: 1.2934316396713257,grad_norm: 0.9999994014957309, iteration: 285380
loss: 1.1768125295639038,grad_norm: 0.9999997173563926, iteration: 285381
loss: 1.4546220302581787,grad_norm: 0.9999999350979156, iteration: 285382
loss: 1.1917272806167603,grad_norm: 0.9999998827951445, iteration: 285383
loss: 1.0737382173538208,grad_norm: 0.9999999898954267, iteration: 285384
loss: 1.1713248491287231,grad_norm: 0.9999998695205621, iteration: 285385
loss: 1.0543855428695679,grad_norm: 0.9999993373452113, iteration: 285386
loss: 1.196593999862671,grad_norm: 0.9999997674580728, iteration: 285387
loss: 1.1669788360595703,grad_norm: 0.999999514347539, iteration: 285388
loss: 1.0811454057693481,grad_norm: 0.9999995015795871, iteration: 285389
loss: 1.0432884693145752,grad_norm: 0.9999993291471365, iteration: 285390
loss: 1.0392765998840332,grad_norm: 0.9999995639291739, iteration: 285391
loss: 1.1452628374099731,grad_norm: 0.9999999988518322, iteration: 285392
loss: 1.1772792339324951,grad_norm: 0.9999999559014627, iteration: 285393
loss: 1.2387417554855347,grad_norm: 0.9999999151915868, iteration: 285394
loss: 1.1005209684371948,grad_norm: 0.9993828608935282, iteration: 285395
loss: 1.190189003944397,grad_norm: 0.9999998467677464, iteration: 285396
loss: 1.1156116724014282,grad_norm: 0.9999999045228186, iteration: 285397
loss: 1.0501141548156738,grad_norm: 0.9999992862555493, iteration: 285398
loss: 1.0767520666122437,grad_norm: 0.9999991409186787, iteration: 285399
loss: 1.1220494508743286,grad_norm: 0.9999998399787902, iteration: 285400
loss: 1.2504185438156128,grad_norm: 0.9999999767827144, iteration: 285401
loss: 1.041691780090332,grad_norm: 0.9999998132338956, iteration: 285402
loss: 1.4128820896148682,grad_norm: 0.9999999143183758, iteration: 285403
loss: 1.1057695150375366,grad_norm: 0.9999993120275348, iteration: 285404
loss: 1.2564752101898193,grad_norm: 0.9999996489388433, iteration: 285405
loss: 1.1058777570724487,grad_norm: 0.9999993125219693, iteration: 285406
loss: 1.1186519861221313,grad_norm: 0.999999104649531, iteration: 285407
loss: 1.221287727355957,grad_norm: 0.999999772706164, iteration: 285408
loss: 1.1550254821777344,grad_norm: 0.9999997597855259, iteration: 285409
loss: 1.1133321523666382,grad_norm: 0.9999997459862231, iteration: 285410
loss: 1.1472069025039673,grad_norm: 0.9999993462386161, iteration: 285411
loss: 1.1182525157928467,grad_norm: 0.9999998464558526, iteration: 285412
loss: 1.0110667943954468,grad_norm: 0.9062546057438539, iteration: 285413
loss: 1.2057925462722778,grad_norm: 0.9916630817398394, iteration: 285414
loss: 1.2414276599884033,grad_norm: 0.9999994730692111, iteration: 285415
loss: 1.1428675651550293,grad_norm: 0.999999666067002, iteration: 285416
loss: 1.1426734924316406,grad_norm: 0.9999998350368461, iteration: 285417
loss: 1.2280497550964355,grad_norm: 0.9999996052589374, iteration: 285418
loss: 1.0950931310653687,grad_norm: 1.0000000264612057, iteration: 285419
loss: 1.1510003805160522,grad_norm: 0.9999991910365515, iteration: 285420
loss: 1.1615866422653198,grad_norm: 0.9999998723550921, iteration: 285421
loss: 1.2192652225494385,grad_norm: 0.9999999757809549, iteration: 285422
loss: 1.1342202425003052,grad_norm: 0.9999994441220956, iteration: 285423
loss: 1.0605326890945435,grad_norm: 0.9999996987628439, iteration: 285424
loss: 1.282202124595642,grad_norm: 0.9999995110443601, iteration: 285425
loss: 1.0603795051574707,grad_norm: 1.0000000045020019, iteration: 285426
loss: 1.1439809799194336,grad_norm: 0.9999998849111377, iteration: 285427
loss: 1.1382627487182617,grad_norm: 0.9999997077133088, iteration: 285428
loss: 1.2145692110061646,grad_norm: 0.9999997823042303, iteration: 285429
loss: 1.1130125522613525,grad_norm: 0.9999998918987282, iteration: 285430
loss: 1.1660289764404297,grad_norm: 0.9999993129172432, iteration: 285431
loss: 1.1041275262832642,grad_norm: 0.9999996910273794, iteration: 285432
loss: 1.1937861442565918,grad_norm: 1.0000000281209245, iteration: 285433
loss: 1.014528512954712,grad_norm: 0.9999991352856448, iteration: 285434
loss: 1.1970092058181763,grad_norm: 0.9999994030739096, iteration: 285435
loss: 1.1319427490234375,grad_norm: 0.9999992635773413, iteration: 285436
loss: 1.1645351648330688,grad_norm: 0.999999648713804, iteration: 285437
loss: 1.144149899482727,grad_norm: 0.999999509405779, iteration: 285438
loss: 1.2142783403396606,grad_norm: 0.9999998591947801, iteration: 285439
loss: 1.159552812576294,grad_norm: 0.9999995566069652, iteration: 285440
loss: 1.0375906229019165,grad_norm: 0.9999990001581641, iteration: 285441
loss: 1.2653836011886597,grad_norm: 0.9999999224106642, iteration: 285442
loss: 1.1418863534927368,grad_norm: 0.9999997539736561, iteration: 285443
loss: 1.1171951293945312,grad_norm: 0.9999997220928456, iteration: 285444
loss: 1.1830785274505615,grad_norm: 0.9999997879070694, iteration: 285445
loss: 1.2382745742797852,grad_norm: 1.0000000361357166, iteration: 285446
loss: 1.0504311323165894,grad_norm: 0.9999991468626784, iteration: 285447
loss: 1.164624810218811,grad_norm: 0.9999994833017488, iteration: 285448
loss: 1.0504883527755737,grad_norm: 0.9999990406683933, iteration: 285449
loss: 1.1263333559036255,grad_norm: 0.999999386695574, iteration: 285450
loss: 1.3180699348449707,grad_norm: 1.000000048932528, iteration: 285451
loss: 1.365895390510559,grad_norm: 0.999999984918704, iteration: 285452
loss: 1.133800745010376,grad_norm: 0.9999997454990489, iteration: 285453
loss: 1.2456239461898804,grad_norm: 0.9999999000214475, iteration: 285454
loss: 1.1462973356246948,grad_norm: 0.9999991585816368, iteration: 285455
loss: 1.2751423120498657,grad_norm: 0.9999998819875097, iteration: 285456
loss: 1.2504762411117554,grad_norm: 0.9999997492262723, iteration: 285457
loss: 1.0758426189422607,grad_norm: 0.9999995316435132, iteration: 285458
loss: 1.0592410564422607,grad_norm: 0.9999996912563608, iteration: 285459
loss: 1.1118570566177368,grad_norm: 0.999999835021283, iteration: 285460
loss: 1.2503300905227661,grad_norm: 0.9999999206993448, iteration: 285461
loss: 1.10504949092865,grad_norm: 0.9999999376923006, iteration: 285462
loss: 1.1220664978027344,grad_norm: 0.999999342930814, iteration: 285463
loss: 1.0792851448059082,grad_norm: 0.9999999227781732, iteration: 285464
loss: 1.212600588798523,grad_norm: 0.999999557966007, iteration: 285465
loss: 1.080938696861267,grad_norm: 0.9999992341338307, iteration: 285466
loss: 1.2260310649871826,grad_norm: 0.9999999699130492, iteration: 285467
loss: 1.2352017164230347,grad_norm: 0.9999999126159528, iteration: 285468
loss: 1.2158212661743164,grad_norm: 0.9999992253094638, iteration: 285469
loss: 1.2001309394836426,grad_norm: 0.9999997570284342, iteration: 285470
loss: 1.0846282243728638,grad_norm: 0.999999739427655, iteration: 285471
loss: 1.1544982194900513,grad_norm: 0.9999996386793352, iteration: 285472
loss: 1.0528210401535034,grad_norm: 0.9999992980767166, iteration: 285473
loss: 1.354103922843933,grad_norm: 0.9999996427413218, iteration: 285474
loss: 1.1343854665756226,grad_norm: 0.9999999108128387, iteration: 285475
loss: 1.2011234760284424,grad_norm: 0.9999997472009486, iteration: 285476
loss: 1.1120595932006836,grad_norm: 0.9999994154555805, iteration: 285477
loss: 1.0531861782073975,grad_norm: 0.9925816359656444, iteration: 285478
loss: 1.1323602199554443,grad_norm: 0.9999995006263429, iteration: 285479
loss: 1.1130377054214478,grad_norm: 0.9999998972362484, iteration: 285480
loss: 1.120245099067688,grad_norm: 1.0000000269172107, iteration: 285481
loss: 1.3250763416290283,grad_norm: 0.9999998243242244, iteration: 285482
loss: 1.087667465209961,grad_norm: 0.9999993795121417, iteration: 285483
loss: 1.1309113502502441,grad_norm: 0.9999996603432486, iteration: 285484
loss: 1.1321051120758057,grad_norm: 0.9999997561122211, iteration: 285485
loss: 1.2328734397888184,grad_norm: 0.9999994614444102, iteration: 285486
loss: 1.0262287855148315,grad_norm: 0.9289961999888587, iteration: 285487
loss: 1.094133973121643,grad_norm: 0.9999993468289362, iteration: 285488
loss: 1.0342748165130615,grad_norm: 0.9999992837946519, iteration: 285489
loss: 1.0022543668746948,grad_norm: 0.9999989798840485, iteration: 285490
loss: 1.072159767150879,grad_norm: 0.9999991185818832, iteration: 285491
loss: 1.0699143409729004,grad_norm: 0.9999994614873584, iteration: 285492
loss: 1.0300112962722778,grad_norm: 0.8677053036715884, iteration: 285493
loss: 1.2562692165374756,grad_norm: 0.9999999232642214, iteration: 285494
loss: 1.2291920185089111,grad_norm: 0.9999993330904356, iteration: 285495
loss: 1.0856038331985474,grad_norm: 0.9999991207551008, iteration: 285496
loss: 1.1053459644317627,grad_norm: 0.9999998865202894, iteration: 285497
loss: 1.1065760850906372,grad_norm: 0.9999998867881426, iteration: 285498
loss: 1.0830347537994385,grad_norm: 0.999999501994194, iteration: 285499
loss: 1.1061649322509766,grad_norm: 0.9999990438708639, iteration: 285500
loss: 1.1064785718917847,grad_norm: 0.9999999976858566, iteration: 285501
loss: 1.1596776247024536,grad_norm: 0.9999991273206222, iteration: 285502
loss: 1.0015922784805298,grad_norm: 0.7074962449711637, iteration: 285503
loss: 1.048310399055481,grad_norm: 0.9999995782868611, iteration: 285504
loss: 1.069042682647705,grad_norm: 0.999999342700506, iteration: 285505
loss: 1.2202258110046387,grad_norm: 0.9999998118073704, iteration: 285506
loss: 1.0749413967132568,grad_norm: 0.9999997624925054, iteration: 285507
loss: 1.1476171016693115,grad_norm: 0.9999996207849053, iteration: 285508
loss: 1.0506532192230225,grad_norm: 0.9999997776120415, iteration: 285509
loss: 1.0264712572097778,grad_norm: 0.9999990616625534, iteration: 285510
loss: 1.0413755178451538,grad_norm: 0.999999317217097, iteration: 285511
loss: 1.0791258811950684,grad_norm: 0.9999993436506845, iteration: 285512
loss: 1.230039119720459,grad_norm: 0.9999992114044257, iteration: 285513
loss: 1.0272085666656494,grad_norm: 0.9999998446581995, iteration: 285514
loss: 1.1136444807052612,grad_norm: 0.9999995273653576, iteration: 285515
loss: 1.065062165260315,grad_norm: 0.9999998223242653, iteration: 285516
loss: 1.1269359588623047,grad_norm: 0.9999995458929207, iteration: 285517
loss: 1.000571846961975,grad_norm: 0.8620358344789758, iteration: 285518
loss: 1.041011095046997,grad_norm: 0.9999990910878561, iteration: 285519
loss: 1.0529946088790894,grad_norm: 0.9999992599688445, iteration: 285520
loss: 1.2134582996368408,grad_norm: 0.9999996346085045, iteration: 285521
loss: 1.0892125368118286,grad_norm: 0.9999997304281538, iteration: 285522
loss: 1.1781501770019531,grad_norm: 0.9999996598123092, iteration: 285523
loss: 1.0922623872756958,grad_norm: 0.9999993694792559, iteration: 285524
loss: 1.1618373394012451,grad_norm: 0.9999994419361823, iteration: 285525
loss: 1.0305479764938354,grad_norm: 0.9999996050397164, iteration: 285526
loss: 1.095320463180542,grad_norm: 0.9999997867780297, iteration: 285527
loss: 1.0882822275161743,grad_norm: 0.9999990801621947, iteration: 285528
loss: 1.1943917274475098,grad_norm: 0.9999998755860497, iteration: 285529
loss: 1.0333740711212158,grad_norm: 0.9999992526030503, iteration: 285530
loss: 1.0954110622406006,grad_norm: 0.9999997256433831, iteration: 285531
loss: 1.0986292362213135,grad_norm: 0.9999999075199247, iteration: 285532
loss: 1.0667548179626465,grad_norm: 0.9999997462708948, iteration: 285533
loss: 1.038441777229309,grad_norm: 0.9999991417922506, iteration: 285534
loss: 1.0535515546798706,grad_norm: 0.9999990671682947, iteration: 285535
loss: 1.0590482950210571,grad_norm: 0.9999992555296963, iteration: 285536
loss: 0.9773920178413391,grad_norm: 0.9999990650069731, iteration: 285537
loss: 1.1282894611358643,grad_norm: 0.9999990729444858, iteration: 285538
loss: 1.0034946203231812,grad_norm: 0.9999992744785842, iteration: 285539
loss: 1.06972336769104,grad_norm: 0.999999466830809, iteration: 285540
loss: 1.1300667524337769,grad_norm: 0.9999993271764701, iteration: 285541
loss: 1.049828052520752,grad_norm: 0.9999998029823103, iteration: 285542
loss: 1.0739312171936035,grad_norm: 0.999999485994605, iteration: 285543
loss: 1.0185922384262085,grad_norm: 0.9999992053626301, iteration: 285544
loss: 1.0386816263198853,grad_norm: 0.9286318771632833, iteration: 285545
loss: 1.018142580986023,grad_norm: 0.9999991143138302, iteration: 285546
loss: 1.0515872240066528,grad_norm: 0.9999995583252275, iteration: 285547
loss: 1.1262340545654297,grad_norm: 0.9999992356476913, iteration: 285548
loss: 1.0795009136199951,grad_norm: 0.9999995102046095, iteration: 285549
loss: 1.1724205017089844,grad_norm: 0.9999995949561463, iteration: 285550
loss: 1.0581649541854858,grad_norm: 0.999999152594448, iteration: 285551
loss: 1.207830548286438,grad_norm: 0.9999998200331397, iteration: 285552
loss: 1.0418922901153564,grad_norm: 0.9311520116468912, iteration: 285553
loss: 1.0365815162658691,grad_norm: 0.879434637987177, iteration: 285554
loss: 1.174696683883667,grad_norm: 0.9999998945123105, iteration: 285555
loss: 1.0899254083633423,grad_norm: 0.9451976847360173, iteration: 285556
loss: 1.0174938440322876,grad_norm: 0.99999985312607, iteration: 285557
loss: 1.1345694065093994,grad_norm: 0.9754698611620081, iteration: 285558
loss: 1.0925984382629395,grad_norm: 0.9999998554425524, iteration: 285559
loss: 1.0776066780090332,grad_norm: 0.9999996302130756, iteration: 285560
loss: 1.0020010471343994,grad_norm: 0.9999990949531744, iteration: 285561
loss: 1.0355688333511353,grad_norm: 0.9999997610819121, iteration: 285562
loss: 1.0263164043426514,grad_norm: 0.9124712005725564, iteration: 285563
loss: 1.0049314498901367,grad_norm: 0.9280172186880582, iteration: 285564
loss: 1.0536470413208008,grad_norm: 0.9999998154587804, iteration: 285565
loss: 1.0048915147781372,grad_norm: 0.9314093990705651, iteration: 285566
loss: 1.0705753564834595,grad_norm: 0.9999991919305541, iteration: 285567
loss: 1.0103106498718262,grad_norm: 0.9999996388158245, iteration: 285568
loss: 1.032755732536316,grad_norm: 0.9848960210961589, iteration: 285569
loss: 1.1078529357910156,grad_norm: 0.999999944685211, iteration: 285570
loss: 1.0973860025405884,grad_norm: 0.999999521088027, iteration: 285571
loss: 1.1812528371810913,grad_norm: 0.9999995409704748, iteration: 285572
loss: 1.0298064947128296,grad_norm: 0.9999990909388952, iteration: 285573
loss: 1.1097736358642578,grad_norm: 0.9999994287741326, iteration: 285574
loss: 1.0273504257202148,grad_norm: 0.9999997615161886, iteration: 285575
loss: 1.0905587673187256,grad_norm: 0.9999994873047745, iteration: 285576
loss: 1.2264269590377808,grad_norm: 0.9999992326781282, iteration: 285577
loss: 1.0992448329925537,grad_norm: 0.9999995554631959, iteration: 285578
loss: 0.9989021420478821,grad_norm: 0.8348043256959164, iteration: 285579
loss: 1.023531436920166,grad_norm: 0.9999999175859394, iteration: 285580
loss: 0.9576390385627747,grad_norm: 0.9526918573825013, iteration: 285581
loss: 1.1200497150421143,grad_norm: 0.9999996438402581, iteration: 285582
loss: 1.0315656661987305,grad_norm: 0.9999990603053561, iteration: 285583
loss: 1.0396696329116821,grad_norm: 0.9999997115985142, iteration: 285584
loss: 1.1961252689361572,grad_norm: 0.9999995708025636, iteration: 285585
loss: 1.0392402410507202,grad_norm: 0.9682503396130375, iteration: 285586
loss: 1.1315065622329712,grad_norm: 0.9999998048500909, iteration: 285587
loss: 1.072339653968811,grad_norm: 0.9999992060727524, iteration: 285588
loss: 1.0878431797027588,grad_norm: 0.9999997461366953, iteration: 285589
loss: 1.0961657762527466,grad_norm: 0.8836487705194632, iteration: 285590
loss: 1.0417206287384033,grad_norm: 0.9999993029612612, iteration: 285591
loss: 1.149832844734192,grad_norm: 0.9999998285617723, iteration: 285592
loss: 1.0775686502456665,grad_norm: 0.9305456731363814, iteration: 285593
loss: 1.0439423322677612,grad_norm: 0.9070814442999111, iteration: 285594
loss: 1.128156065940857,grad_norm: 0.9999990740961774, iteration: 285595
loss: 1.0436114072799683,grad_norm: 0.9999998173087686, iteration: 285596
loss: 1.5116045475006104,grad_norm: 0.99999985111177, iteration: 285597
loss: 1.2069629430770874,grad_norm: 0.9999997536137313, iteration: 285598
loss: 1.1507086753845215,grad_norm: 0.999999150399616, iteration: 285599
loss: 1.010851502418518,grad_norm: 0.999999197652652, iteration: 285600
loss: 1.114313006401062,grad_norm: 0.9999998237741986, iteration: 285601
loss: 1.0324045419692993,grad_norm: 0.8596716896997281, iteration: 285602
loss: 1.0458567142486572,grad_norm: 0.9999992893061594, iteration: 285603
loss: 1.2777519226074219,grad_norm: 0.9999995357340864, iteration: 285604
loss: 1.1614576578140259,grad_norm: 0.9999995481902745, iteration: 285605
loss: 1.0993236303329468,grad_norm: 0.9999992188869017, iteration: 285606
loss: 1.0681438446044922,grad_norm: 0.9999992133177519, iteration: 285607
loss: 1.0628706216812134,grad_norm: 0.9999997521041454, iteration: 285608
loss: 1.1571147441864014,grad_norm: 0.9999993326861235, iteration: 285609
loss: 1.0371073484420776,grad_norm: 0.9999991870707156, iteration: 285610
loss: 0.9925586581230164,grad_norm: 0.9999994749536159, iteration: 285611
loss: 0.9951792359352112,grad_norm: 0.9999992407425168, iteration: 285612
loss: 1.002789855003357,grad_norm: 0.8802810615162854, iteration: 285613
loss: 1.2089670896530151,grad_norm: 0.9999995221969343, iteration: 285614
loss: 1.059739112854004,grad_norm: 0.9706042035758302, iteration: 285615
loss: 1.046202301979065,grad_norm: 0.9999991726645524, iteration: 285616
loss: 1.0170444250106812,grad_norm: 0.7939214040156004, iteration: 285617
loss: 1.0140568017959595,grad_norm: 0.8817696080288397, iteration: 285618
loss: 1.0497453212738037,grad_norm: 0.9999998982925653, iteration: 285619
loss: 1.047739863395691,grad_norm: 0.9999999379458816, iteration: 285620
loss: 0.9832726716995239,grad_norm: 0.7700230471748477, iteration: 285621
loss: 1.1034613847732544,grad_norm: 0.999999183046316, iteration: 285622
loss: 1.1309967041015625,grad_norm: 0.9999998903801532, iteration: 285623
loss: 1.0041143894195557,grad_norm: 0.95882257762765, iteration: 285624
loss: 1.0730164051055908,grad_norm: 0.9999993086775407, iteration: 285625
loss: 1.135059118270874,grad_norm: 0.9999997639743883, iteration: 285626
loss: 1.0402342081069946,grad_norm: 0.9999999033990792, iteration: 285627
loss: 1.0240368843078613,grad_norm: 0.9999995164909224, iteration: 285628
loss: 1.093945026397705,grad_norm: 0.9192882084917061, iteration: 285629
loss: 1.3051117658615112,grad_norm: 0.9999993045339157, iteration: 285630
loss: 1.0325013399124146,grad_norm: 0.9999995094652104, iteration: 285631
loss: 1.0350041389465332,grad_norm: 0.999999189018158, iteration: 285632
loss: 1.0960862636566162,grad_norm: 0.9999997885775853, iteration: 285633
loss: 1.0792120695114136,grad_norm: 0.9600571744686939, iteration: 285634
loss: 1.0615278482437134,grad_norm: 0.9661957170223892, iteration: 285635
loss: 1.1309062242507935,grad_norm: 0.9999994726912099, iteration: 285636
loss: 1.0540815591812134,grad_norm: 0.8976710372118436, iteration: 285637
loss: 1.0948221683502197,grad_norm: 0.9999994180195423, iteration: 285638
loss: 1.0180668830871582,grad_norm: 0.8195862859175139, iteration: 285639
loss: 1.052985429763794,grad_norm: 0.9999990971641808, iteration: 285640
loss: 1.044981837272644,grad_norm: 0.9999997163468182, iteration: 285641
loss: 1.122764229774475,grad_norm: 0.9999992869524001, iteration: 285642
loss: 1.0288019180297852,grad_norm: 0.9999994494872203, iteration: 285643
loss: 1.0134379863739014,grad_norm: 0.8556379963995207, iteration: 285644
loss: 1.029394507408142,grad_norm: 0.9604065026314537, iteration: 285645
loss: 1.0073845386505127,grad_norm: 0.9999991991737726, iteration: 285646
loss: 1.168516993522644,grad_norm: 0.9999995979700367, iteration: 285647
loss: 1.112605094909668,grad_norm: 0.9999995995816722, iteration: 285648
loss: 0.9887441396713257,grad_norm: 0.8020724294242589, iteration: 285649
loss: 0.9978076815605164,grad_norm: 0.9999996240419179, iteration: 285650
loss: 1.0114731788635254,grad_norm: 0.9999996625844482, iteration: 285651
loss: 1.0531271696090698,grad_norm: 0.9056045645282121, iteration: 285652
loss: 1.0927218198776245,grad_norm: 0.9999996273071654, iteration: 285653
loss: 1.0730273723602295,grad_norm: 0.9903049531669379, iteration: 285654
loss: 1.0947744846343994,grad_norm: 0.9999992451486625, iteration: 285655
loss: 1.0270379781723022,grad_norm: 0.999999057814339, iteration: 285656
loss: 1.0406321287155151,grad_norm: 0.9999999152914244, iteration: 285657
loss: 1.0478334426879883,grad_norm: 0.9999996654459964, iteration: 285658
loss: 1.0899279117584229,grad_norm: 0.908630798884417, iteration: 285659
loss: 1.1187719106674194,grad_norm: 0.9999992425406821, iteration: 285660
loss: 1.0289002656936646,grad_norm: 0.9999996583946401, iteration: 285661
loss: 1.0110044479370117,grad_norm: 0.8742553892988452, iteration: 285662
loss: 1.132310390472412,grad_norm: 0.9999996381057478, iteration: 285663
loss: 1.1116950511932373,grad_norm: 0.9999992214895339, iteration: 285664
loss: 1.06223464012146,grad_norm: 0.999999145884773, iteration: 285665
loss: 1.1008940935134888,grad_norm: 0.9999996108900565, iteration: 285666
loss: 1.2164419889450073,grad_norm: 0.999999982377953, iteration: 285667
loss: 1.0475692749023438,grad_norm: 0.9999997997953164, iteration: 285668
loss: 1.016325831413269,grad_norm: 0.9999993805939028, iteration: 285669
loss: 1.0174771547317505,grad_norm: 0.7392027292929823, iteration: 285670
loss: 1.029504418373108,grad_norm: 0.9999992830491666, iteration: 285671
loss: 1.006851077079773,grad_norm: 0.9413742399244361, iteration: 285672
loss: 1.2018595933914185,grad_norm: 0.9999998336945325, iteration: 285673
loss: 1.009636402130127,grad_norm: 0.9999993060354019, iteration: 285674
loss: 1.0232815742492676,grad_norm: 0.9999992315773811, iteration: 285675
loss: 1.0125737190246582,grad_norm: 0.9999990852301635, iteration: 285676
loss: 1.0153026580810547,grad_norm: 0.9538693447773958, iteration: 285677
loss: 1.0096840858459473,grad_norm: 0.8326654496523849, iteration: 285678
loss: 1.0500518083572388,grad_norm: 0.9999999961166259, iteration: 285679
loss: 1.1034138202667236,grad_norm: 0.9999994690119085, iteration: 285680
loss: 1.0586127042770386,grad_norm: 0.9999992132196084, iteration: 285681
loss: 1.068536400794983,grad_norm: 0.9999996517358686, iteration: 285682
loss: 0.9811163544654846,grad_norm: 0.999999257576876, iteration: 285683
loss: 1.0522663593292236,grad_norm: 0.9999990842549551, iteration: 285684
loss: 1.1211961507797241,grad_norm: 0.99999984851772, iteration: 285685
loss: 1.0384013652801514,grad_norm: 0.9505365425442533, iteration: 285686
loss: 1.0776716470718384,grad_norm: 0.9178597622794291, iteration: 285687
loss: 1.0556902885437012,grad_norm: 0.9999998465028689, iteration: 285688
loss: 1.05509614944458,grad_norm: 0.807234719713958, iteration: 285689
loss: 1.0618137121200562,grad_norm: 0.8627253724391728, iteration: 285690
loss: 0.9797253012657166,grad_norm: 0.9125305049430279, iteration: 285691
loss: 1.0527706146240234,grad_norm: 0.9999990293474558, iteration: 285692
loss: 1.0116606950759888,grad_norm: 0.9355255846610919, iteration: 285693
loss: 1.0153419971466064,grad_norm: 0.8978071435037214, iteration: 285694
loss: 0.9739093780517578,grad_norm: 0.884535963565902, iteration: 285695
loss: 1.0219532251358032,grad_norm: 0.9049888293495891, iteration: 285696
loss: 1.1172218322753906,grad_norm: 0.999999847559734, iteration: 285697
loss: 1.19114089012146,grad_norm: 0.9999992126305708, iteration: 285698
loss: 1.0009567737579346,grad_norm: 0.9231430230698119, iteration: 285699
loss: 1.0329488515853882,grad_norm: 0.9999990140701193, iteration: 285700
loss: 1.0898281335830688,grad_norm: 0.9999990467779318, iteration: 285701
loss: 1.104093074798584,grad_norm: 0.9999999084097184, iteration: 285702
loss: 1.0169650316238403,grad_norm: 0.8808121471151308, iteration: 285703
loss: 1.0767533779144287,grad_norm: 0.9999997526252338, iteration: 285704
loss: 1.0027903318405151,grad_norm: 0.9999991318208292, iteration: 285705
loss: 1.0915679931640625,grad_norm: 0.9999998769282481, iteration: 285706
loss: 1.0335646867752075,grad_norm: 0.9999990968887871, iteration: 285707
loss: 0.9984154105186462,grad_norm: 0.9064922176047628, iteration: 285708
loss: 0.9533452391624451,grad_norm: 0.7336066523961474, iteration: 285709
loss: 1.0478562116622925,grad_norm: 0.9999994753410246, iteration: 285710
loss: 1.1020406484603882,grad_norm: 0.9999993954618888, iteration: 285711
loss: 1.0094707012176514,grad_norm: 0.9999990710720259, iteration: 285712
loss: 1.0553665161132812,grad_norm: 0.9999998852326918, iteration: 285713
loss: 1.0267916917800903,grad_norm: 0.8884332872547199, iteration: 285714
loss: 1.0822839736938477,grad_norm: 0.9999996055120076, iteration: 285715
loss: 0.956933856010437,grad_norm: 0.8571098335381333, iteration: 285716
loss: 1.1227302551269531,grad_norm: 0.9999990247187821, iteration: 285717
loss: 0.9816072583198547,grad_norm: 0.9868239664722492, iteration: 285718
loss: 0.9861409068107605,grad_norm: 0.869124471033116, iteration: 285719
loss: 1.021122694015503,grad_norm: 0.9206383238549839, iteration: 285720
loss: 1.0641731023788452,grad_norm: 0.9999999019227056, iteration: 285721
loss: 1.0179953575134277,grad_norm: 0.9999996173692746, iteration: 285722
loss: 1.0121160745620728,grad_norm: 0.8298808416520967, iteration: 285723
loss: 1.1065332889556885,grad_norm: 0.9999994944640791, iteration: 285724
loss: 0.9946321845054626,grad_norm: 0.9999991590149221, iteration: 285725
loss: 1.1333509683609009,grad_norm: 0.9999993506716656, iteration: 285726
loss: 1.0516860485076904,grad_norm: 0.9999992040108936, iteration: 285727
loss: 1.0674914121627808,grad_norm: 0.9999991577219693, iteration: 285728
loss: 1.0386396646499634,grad_norm: 0.999999158009528, iteration: 285729
loss: 1.0002119541168213,grad_norm: 0.9999990771971646, iteration: 285730
loss: 1.0380674600601196,grad_norm: 0.8368060769483965, iteration: 285731
loss: 1.0146127939224243,grad_norm: 0.7671058434675129, iteration: 285732
loss: 1.0993858575820923,grad_norm: 0.9193902132786278, iteration: 285733
loss: 1.000714898109436,grad_norm: 0.9860275846657367, iteration: 285734
loss: 1.0324182510375977,grad_norm: 0.8812780153084655, iteration: 285735
loss: 1.2139885425567627,grad_norm: 0.9999999829270622, iteration: 285736
loss: 0.9893869757652283,grad_norm: 0.8573426464746721, iteration: 285737
loss: 0.9944171905517578,grad_norm: 0.9107351994512535, iteration: 285738
loss: 1.0172226428985596,grad_norm: 0.8907203469666545, iteration: 285739
loss: 1.0770541429519653,grad_norm: 0.9999991467286662, iteration: 285740
loss: 1.0261043310165405,grad_norm: 0.8719649093743495, iteration: 285741
loss: 1.2199779748916626,grad_norm: 0.999999507396599, iteration: 285742
loss: 1.303597092628479,grad_norm: 0.9999999402381057, iteration: 285743
loss: 0.9715191125869751,grad_norm: 0.8343847117546219, iteration: 285744
loss: 1.0469290018081665,grad_norm: 0.9999998633793377, iteration: 285745
loss: 0.9744505882263184,grad_norm: 0.9968556496441898, iteration: 285746
loss: 1.0869969129562378,grad_norm: 0.9999991486972472, iteration: 285747
loss: 1.0650895833969116,grad_norm: 0.9999999726791273, iteration: 285748
loss: 1.2130250930786133,grad_norm: 0.9999991608293283, iteration: 285749
loss: 1.1344221830368042,grad_norm: 0.9999992382015115, iteration: 285750
loss: 0.9966302514076233,grad_norm: 0.9999990831299514, iteration: 285751
loss: 1.0032017230987549,grad_norm: 0.93679805290786, iteration: 285752
loss: 1.0259369611740112,grad_norm: 0.999999103308281, iteration: 285753
loss: 0.9929280281066895,grad_norm: 0.9999990087654619, iteration: 285754
loss: 1.016046404838562,grad_norm: 0.9552303736639566, iteration: 285755
loss: 1.0430046319961548,grad_norm: 0.9291264308942799, iteration: 285756
loss: 1.0809807777404785,grad_norm: 0.9999993827342202, iteration: 285757
loss: 1.0244829654693604,grad_norm: 0.999999049635778, iteration: 285758
loss: 1.0963866710662842,grad_norm: 0.9999991624445621, iteration: 285759
loss: 1.0204010009765625,grad_norm: 0.9999994758756999, iteration: 285760
loss: 0.9792382121086121,grad_norm: 0.811418896900149, iteration: 285761
loss: 1.0115989446640015,grad_norm: 0.9999990432467051, iteration: 285762
loss: 1.1747839450836182,grad_norm: 0.9496794674531476, iteration: 285763
loss: 1.1022939682006836,grad_norm: 0.9999998437164389, iteration: 285764
loss: 1.1248400211334229,grad_norm: 0.9816172202858675, iteration: 285765
loss: 1.062673568725586,grad_norm: 0.9999990339676905, iteration: 285766
loss: 1.0367767810821533,grad_norm: 0.7748870515468719, iteration: 285767
loss: 0.9951663017272949,grad_norm: 0.9326854765763007, iteration: 285768
loss: 1.0004332065582275,grad_norm: 0.7126842453867805, iteration: 285769
loss: 0.973210871219635,grad_norm: 0.9999993708636052, iteration: 285770
loss: 1.1210836172103882,grad_norm: 0.9999994764030316, iteration: 285771
loss: 1.033331274986267,grad_norm: 0.9999995832846759, iteration: 285772
loss: 1.0204381942749023,grad_norm: 0.8078600318829829, iteration: 285773
loss: 0.9919281601905823,grad_norm: 0.9999994967282103, iteration: 285774
loss: 1.0367287397384644,grad_norm: 0.9999997342026083, iteration: 285775
loss: 1.0286543369293213,grad_norm: 0.8342215676990283, iteration: 285776
loss: 1.1618788242340088,grad_norm: 0.999999798575687, iteration: 285777
loss: 0.9560205936431885,grad_norm: 0.8676883220729422, iteration: 285778
loss: 1.0412604808807373,grad_norm: 0.8787848170388038, iteration: 285779
loss: 1.0039392709732056,grad_norm: 0.7752084495658572, iteration: 285780
loss: 1.109445333480835,grad_norm: 0.999999415783348, iteration: 285781
loss: 0.9895606637001038,grad_norm: 0.9267018528836376, iteration: 285782
loss: 1.0019766092300415,grad_norm: 0.9781063482649869, iteration: 285783
loss: 1.0633803606033325,grad_norm: 0.9999996100659396, iteration: 285784
loss: 0.9956457018852234,grad_norm: 0.9272673368778754, iteration: 285785
loss: 1.1057231426239014,grad_norm: 0.9999996562762495, iteration: 285786
loss: 1.064067006111145,grad_norm: 0.9999995074479939, iteration: 285787
loss: 1.0846132040023804,grad_norm: 0.9999991134478764, iteration: 285788
loss: 1.0465270280838013,grad_norm: 0.9999990391441104, iteration: 285789
loss: 1.0277597904205322,grad_norm: 0.999999048522244, iteration: 285790
loss: 1.0395328998565674,grad_norm: 0.9999996881538629, iteration: 285791
loss: 1.225520372390747,grad_norm: 0.9999995831256517, iteration: 285792
loss: 0.9699515104293823,grad_norm: 0.8388542406288988, iteration: 285793
loss: 1.3144171237945557,grad_norm: 0.9999994889069492, iteration: 285794
loss: 1.0639225244522095,grad_norm: 0.9999998415567849, iteration: 285795
loss: 1.003377914428711,grad_norm: 0.7135169609055827, iteration: 285796
loss: 1.025773048400879,grad_norm: 0.999999107444745, iteration: 285797
loss: 1.2106118202209473,grad_norm: 0.9999992331842675, iteration: 285798
loss: 1.0286402702331543,grad_norm: 0.9999994738539612, iteration: 285799
loss: 1.0282772779464722,grad_norm: 0.977498170098248, iteration: 285800
loss: 1.0475302934646606,grad_norm: 0.9999992838508902, iteration: 285801
loss: 1.0186692476272583,grad_norm: 0.9373551804905691, iteration: 285802
loss: 1.0206676721572876,grad_norm: 0.9288964153527319, iteration: 285803
loss: 1.074609637260437,grad_norm: 0.9386420407039734, iteration: 285804
loss: 1.0764464139938354,grad_norm: 0.9999997102475986, iteration: 285805
loss: 1.0443816184997559,grad_norm: 0.9999992149081249, iteration: 285806
loss: 1.058186411857605,grad_norm: 0.9999995316167171, iteration: 285807
loss: 0.9921380877494812,grad_norm: 0.7710036741273609, iteration: 285808
loss: 0.9792925715446472,grad_norm: 0.6989438604052626, iteration: 285809
loss: 1.0039480924606323,grad_norm: 0.8396634633911118, iteration: 285810
loss: 0.9992074370384216,grad_norm: 0.8394048704447423, iteration: 285811
loss: 1.0461512804031372,grad_norm: 0.793675613171819, iteration: 285812
loss: 1.0090670585632324,grad_norm: 0.8717523105841215, iteration: 285813
loss: 1.0133095979690552,grad_norm: 0.9560956371185713, iteration: 285814
loss: 1.0943547487258911,grad_norm: 0.9978086741318258, iteration: 285815
loss: 1.0606231689453125,grad_norm: 0.9999998541380694, iteration: 285816
loss: 1.0246894359588623,grad_norm: 0.9433292044030934, iteration: 285817
loss: 1.0496324300765991,grad_norm: 0.9999999655744053, iteration: 285818
loss: 0.9998145699501038,grad_norm: 0.968191439794824, iteration: 285819
loss: 0.997756838798523,grad_norm: 0.9999998785797136, iteration: 285820
loss: 0.9975289106369019,grad_norm: 0.8794924856811724, iteration: 285821
loss: 1.0042519569396973,grad_norm: 0.743800112254917, iteration: 285822
loss: 1.04579758644104,grad_norm: 0.9999991909913016, iteration: 285823
loss: 1.073067307472229,grad_norm: 0.8440459474485645, iteration: 285824
loss: 0.9927200675010681,grad_norm: 0.9721908402405881, iteration: 285825
loss: 1.059561848640442,grad_norm: 0.8544579058947359, iteration: 285826
loss: 0.9754977226257324,grad_norm: 0.9801670862890601, iteration: 285827
loss: 1.0695050954818726,grad_norm: 0.999999345545181, iteration: 285828
loss: 1.129065752029419,grad_norm: 0.999999794074652, iteration: 285829
loss: 1.0476865768432617,grad_norm: 0.9999999167582946, iteration: 285830
loss: 1.0484224557876587,grad_norm: 0.9999998626833566, iteration: 285831
loss: 1.0130025148391724,grad_norm: 0.9999991325269649, iteration: 285832
loss: 1.0187143087387085,grad_norm: 0.8958885517362254, iteration: 285833
loss: 1.0401179790496826,grad_norm: 0.9017242942624241, iteration: 285834
loss: 1.015002727508545,grad_norm: 0.8501707220629348, iteration: 285835
loss: 1.0334364175796509,grad_norm: 0.9999999279612487, iteration: 285836
loss: 1.0054295063018799,grad_norm: 0.7869650037332577, iteration: 285837
loss: 0.9957603216171265,grad_norm: 0.8469458460864322, iteration: 285838
loss: 1.0431385040283203,grad_norm: 0.9190228604647526, iteration: 285839
loss: 1.0580462217330933,grad_norm: 0.8557005523995266, iteration: 285840
loss: 0.9613189101219177,grad_norm: 0.9106612876828654, iteration: 285841
loss: 1.0514308214187622,grad_norm: 1.0000000525324866, iteration: 285842
loss: 1.0469006299972534,grad_norm: 0.9368047024166564, iteration: 285843
loss: 0.9863919615745544,grad_norm: 0.8417557573404224, iteration: 285844
loss: 1.0168282985687256,grad_norm: 0.9999998050955714, iteration: 285845
loss: 0.962877631187439,grad_norm: 0.9999996776601656, iteration: 285846
loss: 1.0254074335098267,grad_norm: 0.9122355709930285, iteration: 285847
loss: 1.0433977842330933,grad_norm: 0.9482560414779455, iteration: 285848
loss: 0.9782416224479675,grad_norm: 0.9477024133092101, iteration: 285849
loss: 1.1046046018600464,grad_norm: 0.9999992155967092, iteration: 285850
loss: 1.0326215028762817,grad_norm: 0.9388820700917411, iteration: 285851
loss: 1.0831798315048218,grad_norm: 0.8665974201607697, iteration: 285852
loss: 1.147209882736206,grad_norm: 0.9999993899291596, iteration: 285853
loss: 1.0292303562164307,grad_norm: 0.8770806358661616, iteration: 285854
loss: 1.0497307777404785,grad_norm: 0.9999992053074573, iteration: 285855
loss: 1.0153088569641113,grad_norm: 0.9999991979813647, iteration: 285856
loss: 0.9863843321800232,grad_norm: 0.9450226753133516, iteration: 285857
loss: 1.0744425058364868,grad_norm: 0.9999991658190508, iteration: 285858
loss: 1.0206406116485596,grad_norm: 0.7310233244472316, iteration: 285859
loss: 1.019620656967163,grad_norm: 0.731336661399704, iteration: 285860
loss: 1.0850471258163452,grad_norm: 0.9999999142050415, iteration: 285861
loss: 1.0663965940475464,grad_norm: 0.8262563632726728, iteration: 285862
loss: 1.0238051414489746,grad_norm: 0.9999990968010998, iteration: 285863
loss: 1.048248052597046,grad_norm: 0.9999995071795924, iteration: 285864
loss: 0.9706840515136719,grad_norm: 0.8779659708809375, iteration: 285865
loss: 0.9781255722045898,grad_norm: 0.9999990098783903, iteration: 285866
loss: 1.0797431468963623,grad_norm: 0.999999615127101, iteration: 285867
loss: 1.0100696086883545,grad_norm: 0.8120776863619674, iteration: 285868
loss: 1.1221094131469727,grad_norm: 0.9999996695526456, iteration: 285869
loss: 1.1393089294433594,grad_norm: 0.9999996504623676, iteration: 285870
loss: 1.0330955982208252,grad_norm: 0.8789320598518265, iteration: 285871
loss: 1.013397216796875,grad_norm: 0.9999993948799679, iteration: 285872
loss: 1.117143154144287,grad_norm: 0.9999989934602752, iteration: 285873
loss: 0.9948278069496155,grad_norm: 0.8299417428151845, iteration: 285874
loss: 1.0182100534439087,grad_norm: 0.9999989559956418, iteration: 285875
loss: 1.0333738327026367,grad_norm: 0.8782745543741128, iteration: 285876
loss: 1.0485553741455078,grad_norm: 0.9999992861808252, iteration: 285877
loss: 0.9785110950469971,grad_norm: 0.9999990568216368, iteration: 285878
loss: 1.0105772018432617,grad_norm: 0.9999994346644039, iteration: 285879
loss: 1.1025217771530151,grad_norm: 0.9999998550379047, iteration: 285880
loss: 1.0181894302368164,grad_norm: 0.9526573094912332, iteration: 285881
loss: 1.0098826885223389,grad_norm: 0.9871083384206067, iteration: 285882
loss: 1.1356556415557861,grad_norm: 0.9999999256024786, iteration: 285883
loss: 1.0139248371124268,grad_norm: 0.9999992926479548, iteration: 285884
loss: 0.9852257966995239,grad_norm: 0.9999992629744794, iteration: 285885
loss: 1.0956153869628906,grad_norm: 0.9999999094537292, iteration: 285886
loss: 1.0438082218170166,grad_norm: 0.8651258097483203, iteration: 285887
loss: 1.025154948234558,grad_norm: 0.9491386299794303, iteration: 285888
loss: 0.961319625377655,grad_norm: 0.731367752697457, iteration: 285889
loss: 0.9633767008781433,grad_norm: 0.9310455343004489, iteration: 285890
loss: 1.024835228919983,grad_norm: 0.8058817114884701, iteration: 285891
loss: 1.1171351671218872,grad_norm: 0.9999991704433928, iteration: 285892
loss: 1.3140699863433838,grad_norm: 0.9999997748526445, iteration: 285893
loss: 1.165462613105774,grad_norm: 0.9999996935453243, iteration: 285894
loss: 0.9832528233528137,grad_norm: 0.8699916690836559, iteration: 285895
loss: 1.0620425939559937,grad_norm: 0.9999992947736482, iteration: 285896
loss: 0.9837040901184082,grad_norm: 0.9280808269548876, iteration: 285897
loss: 1.078872799873352,grad_norm: 0.999999367908176, iteration: 285898
loss: 1.0656603574752808,grad_norm: 0.9999993959584283, iteration: 285899
loss: 1.0491029024124146,grad_norm: 0.9999990704589503, iteration: 285900
loss: 1.131388783454895,grad_norm: 0.9999995862905993, iteration: 285901
loss: 1.0176522731781006,grad_norm: 0.7991433867542771, iteration: 285902
loss: 0.9757320284843445,grad_norm: 0.9999992512413246, iteration: 285903
loss: 0.9848145246505737,grad_norm: 0.7318525819784983, iteration: 285904
loss: 1.1057499647140503,grad_norm: 0.9999998318482506, iteration: 285905
loss: 1.0375767946243286,grad_norm: 0.9999998924777103, iteration: 285906
loss: 1.0355888605117798,grad_norm: 0.9057534582350877, iteration: 285907
loss: 1.0488959550857544,grad_norm: 0.9999996964004575, iteration: 285908
loss: 0.9795642495155334,grad_norm: 0.9789987556253432, iteration: 285909
loss: 0.9953742623329163,grad_norm: 0.8758270731260019, iteration: 285910
loss: 1.0655207633972168,grad_norm: 0.9445899685078954, iteration: 285911
loss: 1.0688533782958984,grad_norm: 0.8542037341392481, iteration: 285912
loss: 1.060523271560669,grad_norm: 0.9999992253042845, iteration: 285913
loss: 1.047052264213562,grad_norm: 0.9775899599735383, iteration: 285914
loss: 1.1031965017318726,grad_norm: 0.999999517321267, iteration: 285915
loss: 1.056904673576355,grad_norm: 0.9999992087433427, iteration: 285916
loss: 1.246182918548584,grad_norm: 0.9999998104375729, iteration: 285917
loss: 0.9849947690963745,grad_norm: 0.9999926950642261, iteration: 285918
loss: 1.019416093826294,grad_norm: 0.9999993377136847, iteration: 285919
loss: 1.059525489807129,grad_norm: 0.9999998125086332, iteration: 285920
loss: 1.063791036605835,grad_norm: 0.9409136217608646, iteration: 285921
loss: 1.0756593942642212,grad_norm: 1.000000059387416, iteration: 285922
loss: 0.988827645778656,grad_norm: 0.9999997107636889, iteration: 285923
loss: 0.9967364072799683,grad_norm: 0.9999992841212179, iteration: 285924
loss: 1.049515962600708,grad_norm: 0.9999990450819912, iteration: 285925
loss: 1.023465633392334,grad_norm: 0.9999995220841279, iteration: 285926
loss: 1.0276281833648682,grad_norm: 0.9999990895869036, iteration: 285927
loss: 1.0545833110809326,grad_norm: 0.9999995915466341, iteration: 285928
loss: 1.0341256856918335,grad_norm: 0.9999992610927456, iteration: 285929
loss: 1.0026428699493408,grad_norm: 0.8907536740135825, iteration: 285930
loss: 0.9869675040245056,grad_norm: 0.840631717929856, iteration: 285931
loss: 1.0110796689987183,grad_norm: 0.8549548757952682, iteration: 285932
loss: 1.0004037618637085,grad_norm: 0.9999993083570415, iteration: 285933
loss: 1.0261565446853638,grad_norm: 0.9002183467736935, iteration: 285934
loss: 0.963489294052124,grad_norm: 0.8570239346018332, iteration: 285935
loss: 1.1095181703567505,grad_norm: 0.9999992610819171, iteration: 285936
loss: 1.009171724319458,grad_norm: 0.9999993411971949, iteration: 285937
loss: 1.0083099603652954,grad_norm: 0.9783126763717369, iteration: 285938
loss: 1.0270267724990845,grad_norm: 0.9999998258607052, iteration: 285939
loss: 1.0126646757125854,grad_norm: 0.8646106342119951, iteration: 285940
loss: 1.1172821521759033,grad_norm: 0.9325638142015512, iteration: 285941
loss: 1.088997483253479,grad_norm: 0.9999998802899623, iteration: 285942
loss: 1.0846209526062012,grad_norm: 0.999998995422969, iteration: 285943
loss: 0.9723319411277771,grad_norm: 0.7291746840041674, iteration: 285944
loss: 1.0139646530151367,grad_norm: 0.8639416591002769, iteration: 285945
loss: 1.069164752960205,grad_norm: 0.9999999860495623, iteration: 285946
loss: 1.1105146408081055,grad_norm: 0.9999994807996704, iteration: 285947
loss: 0.9811562895774841,grad_norm: 0.8694918043010241, iteration: 285948
loss: 1.0189130306243896,grad_norm: 0.9999999336352887, iteration: 285949
loss: 0.976307213306427,grad_norm: 0.9304687923392765, iteration: 285950
loss: 1.0145409107208252,grad_norm: 0.9500365996703664, iteration: 285951
loss: 1.0112534761428833,grad_norm: 0.9999995498869116, iteration: 285952
loss: 0.9796978831291199,grad_norm: 0.9999992881267021, iteration: 285953
loss: 1.049183964729309,grad_norm: 0.9999994216680512, iteration: 285954
loss: 1.0167121887207031,grad_norm: 0.9999993973840678, iteration: 285955
loss: 0.9858754277229309,grad_norm: 0.7475662807106738, iteration: 285956
loss: 0.9922245740890503,grad_norm: 0.7815490932871727, iteration: 285957
loss: 0.9863974452018738,grad_norm: 0.9765361682639828, iteration: 285958
loss: 0.9884569644927979,grad_norm: 0.9999992591603568, iteration: 285959
loss: 1.0557631254196167,grad_norm: 0.9999990995292294, iteration: 285960
loss: 1.0039024353027344,grad_norm: 0.9999990810823289, iteration: 285961
loss: 0.988210141658783,grad_norm: 0.7854689153617898, iteration: 285962
loss: 1.040794849395752,grad_norm: 0.7519050186533353, iteration: 285963
loss: 0.9740075469017029,grad_norm: 0.8185538728384611, iteration: 285964
loss: 1.0035498142242432,grad_norm: 0.9999990876406571, iteration: 285965
loss: 0.9882318377494812,grad_norm: 0.8066455991335635, iteration: 285966
loss: 1.0079936981201172,grad_norm: 0.9999995318537637, iteration: 285967
loss: 1.0417828559875488,grad_norm: 0.8955198190519356, iteration: 285968
loss: 1.030455470085144,grad_norm: 0.9800703727238099, iteration: 285969
loss: 0.9905241131782532,grad_norm: 0.7478301001393682, iteration: 285970
loss: 1.0156621932983398,grad_norm: 0.9999992916370434, iteration: 285971
loss: 1.0044093132019043,grad_norm: 0.9999993741227998, iteration: 285972
loss: 1.1095216274261475,grad_norm: 0.9999994458551195, iteration: 285973
loss: 0.9588873982429504,grad_norm: 0.7829499697100439, iteration: 285974
loss: 1.0129811763763428,grad_norm: 0.792830094486885, iteration: 285975
loss: 0.9952302575111389,grad_norm: 0.9999997516849163, iteration: 285976
loss: 1.074569821357727,grad_norm: 0.9999993249343672, iteration: 285977
loss: 0.9893971085548401,grad_norm: 0.7850817146565282, iteration: 285978
loss: 1.0440319776535034,grad_norm: 0.9999996318750848, iteration: 285979
loss: 1.120306134223938,grad_norm: 0.9999993236960741, iteration: 285980
loss: 0.9984366297721863,grad_norm: 0.9268703564931478, iteration: 285981
loss: 1.0142189264297485,grad_norm: 0.999999812231466, iteration: 285982
loss: 1.0587718486785889,grad_norm: 0.9999992169111511, iteration: 285983
loss: 0.9996206164360046,grad_norm: 0.8700635504284739, iteration: 285984
loss: 1.0392485857009888,grad_norm: 0.9051301114879087, iteration: 285985
loss: 1.0759849548339844,grad_norm: 0.9999993662909294, iteration: 285986
loss: 1.0048006772994995,grad_norm: 0.9999999454439324, iteration: 285987
loss: 1.0384634733200073,grad_norm: 0.940654887027379, iteration: 285988
loss: 0.986017644405365,grad_norm: 0.6587910351653363, iteration: 285989
loss: 1.0276927947998047,grad_norm: 0.8951081530147952, iteration: 285990
loss: 1.1476515531539917,grad_norm: 0.9999996523105047, iteration: 285991
loss: 1.1024965047836304,grad_norm: 0.9888061428001177, iteration: 285992
loss: 1.0125131607055664,grad_norm: 0.7384187899665301, iteration: 285993
loss: 0.9591899514198303,grad_norm: 0.8179720968728829, iteration: 285994
loss: 1.1005250215530396,grad_norm: 0.9999990660766036, iteration: 285995
loss: 0.9993287920951843,grad_norm: 0.9032328208984369, iteration: 285996
loss: 1.0806165933609009,grad_norm: 0.9999994400053354, iteration: 285997
loss: 1.0156543254852295,grad_norm: 0.8946059935838844, iteration: 285998
loss: 1.0149641036987305,grad_norm: 0.9999989627351267, iteration: 285999
loss: 1.0169007778167725,grad_norm: 0.9999991733023343, iteration: 286000
loss: 1.0797624588012695,grad_norm: 0.9999996654715825, iteration: 286001
loss: 1.011049509048462,grad_norm: 0.9999989280136634, iteration: 286002
loss: 0.9987382292747498,grad_norm: 0.7541033354392696, iteration: 286003
loss: 1.0453789234161377,grad_norm: 0.9999996028852504, iteration: 286004
loss: 1.0279040336608887,grad_norm: 0.959749597910599, iteration: 286005
loss: 1.0630658864974976,grad_norm: 0.9999991866872385, iteration: 286006
loss: 1.093647837638855,grad_norm: 0.9999998251782891, iteration: 286007
loss: 1.089760184288025,grad_norm: 0.9999995313170452, iteration: 286008
loss: 1.0450352430343628,grad_norm: 0.6928388829054952, iteration: 286009
loss: 1.011600375175476,grad_norm: 0.8961885085532683, iteration: 286010
loss: 1.042079210281372,grad_norm: 0.8034849271501886, iteration: 286011
loss: 0.9985239505767822,grad_norm: 0.8462600647667582, iteration: 286012
loss: 1.05704927444458,grad_norm: 0.9999993885908445, iteration: 286013
loss: 1.0453462600708008,grad_norm: 1.0000000041518777, iteration: 286014
loss: 1.0623854398727417,grad_norm: 0.9999997297210687, iteration: 286015
loss: 0.9940915107727051,grad_norm: 0.9337041865796404, iteration: 286016
loss: 1.0829170942306519,grad_norm: 0.9999992708615587, iteration: 286017
loss: 1.0717452764511108,grad_norm: 0.999999238022757, iteration: 286018
loss: 1.1230614185333252,grad_norm: 0.9999995005534973, iteration: 286019
loss: 1.0383330583572388,grad_norm: 0.9999995141372305, iteration: 286020
loss: 0.9826298952102661,grad_norm: 0.676984332909947, iteration: 286021
loss: 1.015176773071289,grad_norm: 0.9565143928246261, iteration: 286022
loss: 1.0401356220245361,grad_norm: 0.9320687319992514, iteration: 286023
loss: 1.0132737159729004,grad_norm: 0.8166580131184041, iteration: 286024
loss: 1.0320748090744019,grad_norm: 0.9457497500998785, iteration: 286025
loss: 0.9862199425697327,grad_norm: 0.8964672371016672, iteration: 286026
loss: 1.0356276035308838,grad_norm: 0.9999997934359903, iteration: 286027
loss: 1.070708155632019,grad_norm: 0.8674550463720517, iteration: 286028
loss: 1.0166187286376953,grad_norm: 0.8834731930610106, iteration: 286029
loss: 1.0946210622787476,grad_norm: 0.8911806466039143, iteration: 286030
loss: 1.0951385498046875,grad_norm: 0.9875313521872131, iteration: 286031
loss: 1.0124505758285522,grad_norm: 0.7961832359838028, iteration: 286032
loss: 1.0067837238311768,grad_norm: 0.8258055065342085, iteration: 286033
loss: 1.0236774682998657,grad_norm: 0.9999998070845023, iteration: 286034
loss: 1.0076345205307007,grad_norm: 0.7954224654537048, iteration: 286035
loss: 0.951611340045929,grad_norm: 0.7604330414793176, iteration: 286036
loss: 0.9810640811920166,grad_norm: 0.9208069914310846, iteration: 286037
loss: 1.09862220287323,grad_norm: 0.9999997131546322, iteration: 286038
loss: 1.037482738494873,grad_norm: 0.9352784554621808, iteration: 286039
loss: 1.0028973817825317,grad_norm: 0.7520996774361661, iteration: 286040
loss: 1.0114389657974243,grad_norm: 0.879312207650829, iteration: 286041
loss: 1.0182112455368042,grad_norm: 0.8455551467797964, iteration: 286042
loss: 1.0556957721710205,grad_norm: 0.9999996869437827, iteration: 286043
loss: 1.040940284729004,grad_norm: 0.9999989709364657, iteration: 286044
loss: 1.032175064086914,grad_norm: 0.9999990982490384, iteration: 286045
loss: 1.0613112449645996,grad_norm: 0.9205825780437923, iteration: 286046
loss: 1.0180633068084717,grad_norm: 0.9496796479463622, iteration: 286047
loss: 0.9898433685302734,grad_norm: 0.9113609360599498, iteration: 286048
loss: 1.0599300861358643,grad_norm: 0.9093588351537474, iteration: 286049
loss: 1.0690137147903442,grad_norm: 0.9999999361192438, iteration: 286050
loss: 1.0098252296447754,grad_norm: 0.9999991635079659, iteration: 286051
loss: 1.1103510856628418,grad_norm: 0.999999136860933, iteration: 286052
loss: 1.0053942203521729,grad_norm: 0.9999993602640994, iteration: 286053
loss: 1.01341712474823,grad_norm: 0.9765933487077311, iteration: 286054
loss: 1.0410752296447754,grad_norm: 0.8667586747103075, iteration: 286055
loss: 0.994518518447876,grad_norm: 0.9999993907380234, iteration: 286056
loss: 1.0029343366622925,grad_norm: 0.8519760603639266, iteration: 286057
loss: 0.993819534778595,grad_norm: 0.901002587862438, iteration: 286058
loss: 1.0005289316177368,grad_norm: 0.9382286412376672, iteration: 286059
loss: 1.1475350856781006,grad_norm: 0.9999994143546207, iteration: 286060
loss: 1.023057460784912,grad_norm: 0.8325025879962702, iteration: 286061
loss: 1.0590174198150635,grad_norm: 0.9999991210056515, iteration: 286062
loss: 1.0809329748153687,grad_norm: 0.9636042683622943, iteration: 286063
loss: 1.005303144454956,grad_norm: 0.890546140194889, iteration: 286064
loss: 1.0300443172454834,grad_norm: 0.9999991628563718, iteration: 286065
loss: 1.0228039026260376,grad_norm: 0.9999993887498712, iteration: 286066
loss: 1.074674367904663,grad_norm: 0.9999991277093419, iteration: 286067
loss: 1.116778016090393,grad_norm: 0.9999996737199184, iteration: 286068
loss: 1.0231449604034424,grad_norm: 0.9999994315916457, iteration: 286069
loss: 1.107892632484436,grad_norm: 0.9999999771371559, iteration: 286070
loss: 1.0414998531341553,grad_norm: 0.999999042418621, iteration: 286071
loss: 1.0499567985534668,grad_norm: 0.9999997952289233, iteration: 286072
loss: 0.99932861328125,grad_norm: 0.8902397886479292, iteration: 286073
loss: 1.0144652128219604,grad_norm: 0.8535252803551948, iteration: 286074
loss: 1.0301116704940796,grad_norm: 0.8725293968100549, iteration: 286075
loss: 1.0531017780303955,grad_norm: 0.999999090975869, iteration: 286076
loss: 1.0299361944198608,grad_norm: 0.9999998053640913, iteration: 286077
loss: 0.9869889616966248,grad_norm: 0.9999990504739253, iteration: 286078
loss: 1.1327764987945557,grad_norm: 1.000000024911819, iteration: 286079
loss: 0.9997704029083252,grad_norm: 0.999999001836841, iteration: 286080
loss: 1.0129421949386597,grad_norm: 0.9999991288527567, iteration: 286081
loss: 1.005354642868042,grad_norm: 0.9999995327048219, iteration: 286082
loss: 1.07707679271698,grad_norm: 0.8464805229479178, iteration: 286083
loss: 1.0268422365188599,grad_norm: 0.9999995938792293, iteration: 286084
loss: 1.0291168689727783,grad_norm: 0.9999993663366481, iteration: 286085
loss: 1.029430627822876,grad_norm: 0.8927825375435415, iteration: 286086
loss: 0.9971999526023865,grad_norm: 0.9368719729839734, iteration: 286087
loss: 1.1592336893081665,grad_norm: 0.9999990222212333, iteration: 286088
loss: 0.9811455011367798,grad_norm: 0.9999990643456453, iteration: 286089
loss: 1.0914335250854492,grad_norm: 0.9999994751578423, iteration: 286090
loss: 1.0309689044952393,grad_norm: 0.7847343288104106, iteration: 286091
loss: 0.9978958964347839,grad_norm: 0.936893354759361, iteration: 286092
loss: 1.01413094997406,grad_norm: 0.8860039379474854, iteration: 286093
loss: 0.9960207939147949,grad_norm: 0.8774963495855664, iteration: 286094
loss: 0.9901891946792603,grad_norm: 0.8600705625138446, iteration: 286095
loss: 1.0311819314956665,grad_norm: 0.9999992565850668, iteration: 286096
loss: 0.9910673499107361,grad_norm: 0.9726415292670101, iteration: 286097
loss: 0.9727849364280701,grad_norm: 0.9120442280581753, iteration: 286098
loss: 1.0768409967422485,grad_norm: 0.9999991037469026, iteration: 286099
loss: 1.1087837219238281,grad_norm: 0.9999991922658237, iteration: 286100
loss: 1.188062071800232,grad_norm: 0.9999996339285875, iteration: 286101
loss: 0.9703985452651978,grad_norm: 0.98508635418115, iteration: 286102
loss: 0.9728429913520813,grad_norm: 0.7920923673585826, iteration: 286103
loss: 1.0865904092788696,grad_norm: 0.8992879572219915, iteration: 286104
loss: 1.042776346206665,grad_norm: 0.9999993946276969, iteration: 286105
loss: 1.057636022567749,grad_norm: 0.9999996318748726, iteration: 286106
loss: 1.0008708238601685,grad_norm: 0.9047006881899275, iteration: 286107
loss: 1.0358200073242188,grad_norm: 0.8959059667521043, iteration: 286108
loss: 1.026868462562561,grad_norm: 0.9345262454835888, iteration: 286109
loss: 1.027868628501892,grad_norm: 0.9999998882591927, iteration: 286110
loss: 1.0238817930221558,grad_norm: 0.8229224883483642, iteration: 286111
loss: 1.0055288076400757,grad_norm: 0.8613954582015867, iteration: 286112
loss: 0.9556668400764465,grad_norm: 0.9999995821249724, iteration: 286113
loss: 1.0487759113311768,grad_norm: 0.9379484443476465, iteration: 286114
loss: 1.0616058111190796,grad_norm: 0.9999994683726438, iteration: 286115
loss: 1.024518609046936,grad_norm: 0.999999523805254, iteration: 286116
loss: 1.0123707056045532,grad_norm: 0.9999994590706114, iteration: 286117
loss: 0.9938188195228577,grad_norm: 0.8657420795906277, iteration: 286118
loss: 1.020745038986206,grad_norm: 0.9999995495982115, iteration: 286119
loss: 1.0308548212051392,grad_norm: 1.000000017846493, iteration: 286120
loss: 1.1798791885375977,grad_norm: 0.9999997591465389, iteration: 286121
loss: 1.082498550415039,grad_norm: 0.9999997347075396, iteration: 286122
loss: 1.0580538511276245,grad_norm: 0.9999991542231658, iteration: 286123
loss: 1.0791555643081665,grad_norm: 0.9999994713138761, iteration: 286124
loss: 1.007765293121338,grad_norm: 0.9999995020294444, iteration: 286125
loss: 1.068211317062378,grad_norm: 0.7501419972064984, iteration: 286126
loss: 1.0237345695495605,grad_norm: 0.9054992276443101, iteration: 286127
loss: 1.0965206623077393,grad_norm: 0.8939548858693835, iteration: 286128
loss: 1.0736141204833984,grad_norm: 0.9999991759245193, iteration: 286129
loss: 1.014155626296997,grad_norm: 0.9579011300744398, iteration: 286130
loss: 1.0043442249298096,grad_norm: 0.9999992151343857, iteration: 286131
loss: 0.987042248249054,grad_norm: 0.9874632489337358, iteration: 286132
loss: 1.0002505779266357,grad_norm: 0.9031636555962813, iteration: 286133
loss: 0.9572706818580627,grad_norm: 0.9155168364791685, iteration: 286134
loss: 0.9950555562973022,grad_norm: 0.9999991234505989, iteration: 286135
loss: 1.4377367496490479,grad_norm: 0.9999996533289675, iteration: 286136
loss: 0.9979735612869263,grad_norm: 0.9963524314793906, iteration: 286137
loss: 1.0105242729187012,grad_norm: 0.9999990913418081, iteration: 286138
loss: 1.1165984869003296,grad_norm: 0.9999996820858227, iteration: 286139
loss: 1.023929238319397,grad_norm: 0.9999990698777719, iteration: 286140
loss: 1.0479921102523804,grad_norm: 0.8618387213966932, iteration: 286141
loss: 0.9982783794403076,grad_norm: 0.9999996992877073, iteration: 286142
loss: 1.0758590698242188,grad_norm: 0.999999838856808, iteration: 286143
loss: 1.0758417844772339,grad_norm: 0.9999990837557143, iteration: 286144
loss: 0.9992846846580505,grad_norm: 0.9999990705027608, iteration: 286145
loss: 0.984377920627594,grad_norm: 0.8535540973287359, iteration: 286146
loss: 1.064390778541565,grad_norm: 0.999999158909306, iteration: 286147
loss: 0.9708173274993896,grad_norm: 0.734151706563454, iteration: 286148
loss: 1.071895956993103,grad_norm: 0.9265138042579972, iteration: 286149
loss: 0.9793822765350342,grad_norm: 0.9006986536253284, iteration: 286150
loss: 1.0379359722137451,grad_norm: 0.930857471780842, iteration: 286151
loss: 1.0531820058822632,grad_norm: 0.9999995165230924, iteration: 286152
loss: 1.0469465255737305,grad_norm: 0.9999990161239807, iteration: 286153
loss: 1.0038055181503296,grad_norm: 0.8672603226257388, iteration: 286154
loss: 0.9559371471405029,grad_norm: 0.9999989625536471, iteration: 286155
loss: 0.9872336387634277,grad_norm: 0.8412016247030151, iteration: 286156
loss: 1.19343900680542,grad_norm: 0.9999998545759959, iteration: 286157
loss: 1.0303393602371216,grad_norm: 0.9999990914858919, iteration: 286158
loss: 1.2625281810760498,grad_norm: 0.9999992483003078, iteration: 286159
loss: 1.031897783279419,grad_norm: 0.9999992345647285, iteration: 286160
loss: 1.0128200054168701,grad_norm: 0.9363519405083245, iteration: 286161
loss: 1.1345428228378296,grad_norm: 0.9999991762051701, iteration: 286162
loss: 1.0126827955245972,grad_norm: 0.7174989739565258, iteration: 286163
loss: 1.0130352973937988,grad_norm: 0.9642379774159254, iteration: 286164
loss: 1.0141276121139526,grad_norm: 0.8632752330756207, iteration: 286165
loss: 1.0827879905700684,grad_norm: 0.9946374020015217, iteration: 286166
loss: 1.1103482246398926,grad_norm: 0.9999991310936316, iteration: 286167
loss: 1.1973518133163452,grad_norm: 0.9999999791991566, iteration: 286168
loss: 1.0226167440414429,grad_norm: 0.9978668426323257, iteration: 286169
loss: 1.0060222148895264,grad_norm: 0.8182329971723629, iteration: 286170
loss: 0.9941103458404541,grad_norm: 0.869189282046895, iteration: 286171
loss: 0.9891757965087891,grad_norm: 0.6931073898825793, iteration: 286172
loss: 1.145409107208252,grad_norm: 0.9999995713709744, iteration: 286173
loss: 1.00565767288208,grad_norm: 0.7243121114874443, iteration: 286174
loss: 1.028977870941162,grad_norm: 0.9459416828376297, iteration: 286175
loss: 1.0428190231323242,grad_norm: 0.9113164150934151, iteration: 286176
loss: 1.0732892751693726,grad_norm: 0.9999999135870745, iteration: 286177
loss: 1.244221568107605,grad_norm: 0.9999992821747183, iteration: 286178
loss: 1.0178864002227783,grad_norm: 0.9999990933441473, iteration: 286179
loss: 1.0445719957351685,grad_norm: 0.9399871276976817, iteration: 286180
loss: 1.0591131448745728,grad_norm: 0.9999990167632845, iteration: 286181
loss: 0.9628904461860657,grad_norm: 0.7269859212317686, iteration: 286182
loss: 1.0198286771774292,grad_norm: 0.9999993930291551, iteration: 286183
loss: 1.007483959197998,grad_norm: 0.948953283671511, iteration: 286184
loss: 1.1114094257354736,grad_norm: 0.9999995877440235, iteration: 286185
loss: 1.003127098083496,grad_norm: 0.9999996323772905, iteration: 286186
loss: 1.058066487312317,grad_norm: 0.9999993293310203, iteration: 286187
loss: 1.0974421501159668,grad_norm: 0.999999631381676, iteration: 286188
loss: 1.0684764385223389,grad_norm: 0.9999990191739777, iteration: 286189
loss: 1.0251057147979736,grad_norm: 0.9999997822073771, iteration: 286190
loss: 1.0034384727478027,grad_norm: 0.8463709027205663, iteration: 286191
loss: 1.0039606094360352,grad_norm: 0.7839791734432364, iteration: 286192
loss: 1.0712168216705322,grad_norm: 0.9999990449930624, iteration: 286193
loss: 0.9966925978660583,grad_norm: 0.7808451643166924, iteration: 286194
loss: 1.1364538669586182,grad_norm: 0.9999997898433768, iteration: 286195
loss: 1.0397818088531494,grad_norm: 0.9999989604381602, iteration: 286196
loss: 1.0026767253875732,grad_norm: 0.6854312353629728, iteration: 286197
loss: 1.0172406435012817,grad_norm: 0.8557675954886863, iteration: 286198
loss: 1.0182194709777832,grad_norm: 0.9042468872469475, iteration: 286199
loss: 1.0300838947296143,grad_norm: 0.9999993688018849, iteration: 286200
loss: 1.0288028717041016,grad_norm: 0.8073860775318652, iteration: 286201
loss: 1.013581395149231,grad_norm: 0.8393383119223538, iteration: 286202
loss: 1.0302085876464844,grad_norm: 0.9999993336663737, iteration: 286203
loss: 0.9922031164169312,grad_norm: 0.943969988083841, iteration: 286204
loss: 1.0054340362548828,grad_norm: 0.8901138094942512, iteration: 286205
loss: 1.1366643905639648,grad_norm: 0.9999998540827296, iteration: 286206
loss: 1.0066337585449219,grad_norm: 0.9999995961463699, iteration: 286207
loss: 1.0161925554275513,grad_norm: 0.9191737290941207, iteration: 286208
loss: 0.9845236539840698,grad_norm: 0.9605524071147962, iteration: 286209
loss: 1.0626473426818848,grad_norm: 0.999999260527529, iteration: 286210
loss: 1.0256961584091187,grad_norm: 0.9978151773813843, iteration: 286211
loss: 0.9922760128974915,grad_norm: 0.972581545057185, iteration: 286212
loss: 1.0585217475891113,grad_norm: 0.7664994020165108, iteration: 286213
loss: 0.9652249217033386,grad_norm: 0.9999991136036942, iteration: 286214
loss: 1.0351588726043701,grad_norm: 0.9999997762294711, iteration: 286215
loss: 1.2915425300598145,grad_norm: 1.0000000578679817, iteration: 286216
loss: 1.018330693244934,grad_norm: 0.9999989878827478, iteration: 286217
loss: 1.0685561895370483,grad_norm: 0.9999994846148672, iteration: 286218
loss: 1.0113611221313477,grad_norm: 0.8662419967229135, iteration: 286219
loss: 1.0642989873886108,grad_norm: 0.9999992439725924, iteration: 286220
loss: 1.0288134813308716,grad_norm: 0.9999991947915186, iteration: 286221
loss: 1.100889801979065,grad_norm: 0.9999993309411821, iteration: 286222
loss: 1.0712915658950806,grad_norm: 0.9762490862200798, iteration: 286223
loss: 0.9834420680999756,grad_norm: 0.7968175720046708, iteration: 286224
loss: 1.0222901105880737,grad_norm: 0.8763685267904723, iteration: 286225
loss: 1.021348476409912,grad_norm: 0.9999992634394201, iteration: 286226
loss: 1.0070202350616455,grad_norm: 0.8933384316399058, iteration: 286227
loss: 1.0048551559448242,grad_norm: 0.7372177397673526, iteration: 286228
loss: 1.0032471418380737,grad_norm: 0.9999998801349308, iteration: 286229
loss: 1.0064877271652222,grad_norm: 0.9128204909404852, iteration: 286230
loss: 1.1105453968048096,grad_norm: 0.999999909218584, iteration: 286231
loss: 1.0435045957565308,grad_norm: 0.9233450407132119, iteration: 286232
loss: 1.0483921766281128,grad_norm: 0.9999991127261801, iteration: 286233
loss: 1.0071378946304321,grad_norm: 0.9622983761674064, iteration: 286234
loss: 1.0197569131851196,grad_norm: 0.7677166027351049, iteration: 286235
loss: 1.0801103115081787,grad_norm: 0.9769156928756805, iteration: 286236
loss: 1.078712821006775,grad_norm: 0.9999991194892431, iteration: 286237
loss: 1.0353972911834717,grad_norm: 0.7495134338186917, iteration: 286238
loss: 1.0048718452453613,grad_norm: 0.8372604610426199, iteration: 286239
loss: 1.0733203887939453,grad_norm: 0.9999990230744346, iteration: 286240
loss: 1.0438884496688843,grad_norm: 0.7464727826137406, iteration: 286241
loss: 1.0021344423294067,grad_norm: 0.9448041324229002, iteration: 286242
loss: 1.0327702760696411,grad_norm: 0.8195567614352014, iteration: 286243
loss: 1.0449490547180176,grad_norm: 0.9999990020027049, iteration: 286244
loss: 1.0115044116973877,grad_norm: 0.9999991653449313, iteration: 286245
loss: 1.0007916688919067,grad_norm: 0.9999989591433255, iteration: 286246
loss: 0.9905723929405212,grad_norm: 0.8686293598970076, iteration: 286247
loss: 1.0627516508102417,grad_norm: 0.9999994420732776, iteration: 286248
loss: 0.9944813251495361,grad_norm: 0.9999991001591587, iteration: 286249
loss: 0.9913229942321777,grad_norm: 0.8578822078402978, iteration: 286250
loss: 0.9989261627197266,grad_norm: 0.8167216396466775, iteration: 286251
loss: 1.0320241451263428,grad_norm: 0.7746631582252539, iteration: 286252
loss: 1.0240421295166016,grad_norm: 0.9012123059163006, iteration: 286253
loss: 1.0117619037628174,grad_norm: 0.9371738173607349, iteration: 286254
loss: 0.9903972744941711,grad_norm: 0.8920355825040357, iteration: 286255
loss: 1.0125172138214111,grad_norm: 0.8759268012711566, iteration: 286256
loss: 1.110939860343933,grad_norm: 0.9999995965486635, iteration: 286257
loss: 1.022952675819397,grad_norm: 0.8338275591108656, iteration: 286258
loss: 1.0379700660705566,grad_norm: 0.95924134745818, iteration: 286259
loss: 1.093257188796997,grad_norm: 0.999999854942752, iteration: 286260
loss: 1.0903241634368896,grad_norm: 0.9999991524246402, iteration: 286261
loss: 1.0660148859024048,grad_norm: 0.9652371660986994, iteration: 286262
loss: 1.0101737976074219,grad_norm: 0.8930202944405639, iteration: 286263
loss: 1.0097641944885254,grad_norm: 0.9999997215681531, iteration: 286264
loss: 1.0188161134719849,grad_norm: 0.9999991455073578, iteration: 286265
loss: 1.0641919374465942,grad_norm: 0.9999992095368777, iteration: 286266
loss: 1.017541527748108,grad_norm: 0.9999997089870507, iteration: 286267
loss: 0.9936396479606628,grad_norm: 0.8130493870162796, iteration: 286268
loss: 0.993887722492218,grad_norm: 0.7552395518527357, iteration: 286269
loss: 1.045716404914856,grad_norm: 0.999999350268095, iteration: 286270
loss: 1.012827754020691,grad_norm: 0.8487397239939354, iteration: 286271
loss: 1.115425705909729,grad_norm: 0.9892369330305493, iteration: 286272
loss: 0.9632835388183594,grad_norm: 0.9969247150713542, iteration: 286273
loss: 1.015015721321106,grad_norm: 0.9778260850867763, iteration: 286274
loss: 1.007620096206665,grad_norm: 0.7993448707238405, iteration: 286275
loss: 1.0154774188995361,grad_norm: 0.7893908125017517, iteration: 286276
loss: 1.000204086303711,grad_norm: 0.9412557104083162, iteration: 286277
loss: 1.1696006059646606,grad_norm: 0.9999997242135906, iteration: 286278
loss: 1.0640360116958618,grad_norm: 0.9999994812764774, iteration: 286279
loss: 1.028037428855896,grad_norm: 0.9483807387412417, iteration: 286280
loss: 1.0558679103851318,grad_norm: 0.9999996312440742, iteration: 286281
loss: 0.9892441034317017,grad_norm: 0.7238482222870956, iteration: 286282
loss: 1.0376074314117432,grad_norm: 0.9857795508315004, iteration: 286283
loss: 1.0841341018676758,grad_norm: 0.9999991194303216, iteration: 286284
loss: 0.9904716610908508,grad_norm: 0.9553135097141063, iteration: 286285
loss: 1.0465552806854248,grad_norm: 0.9999991338350397, iteration: 286286
loss: 0.9871947765350342,grad_norm: 0.9738859002721705, iteration: 286287
loss: 0.9858736991882324,grad_norm: 0.8483479564697859, iteration: 286288
loss: 1.0085625648498535,grad_norm: 0.9999994138427016, iteration: 286289
loss: 1.0150811672210693,grad_norm: 0.9999998299122526, iteration: 286290
loss: 0.9698954224586487,grad_norm: 0.8933537580956982, iteration: 286291
loss: 1.044852614402771,grad_norm: 0.8726813385050813, iteration: 286292
loss: 0.9843380451202393,grad_norm: 0.7864178904742882, iteration: 286293
loss: 1.0091938972473145,grad_norm: 0.9999995767075661, iteration: 286294
loss: 0.9992181658744812,grad_norm: 0.8798758026584551, iteration: 286295
loss: 1.0317356586456299,grad_norm: 0.8426541561937622, iteration: 286296
loss: 0.9909113049507141,grad_norm: 0.8548110180551035, iteration: 286297
loss: 0.9873653650283813,grad_norm: 0.8482500497859835, iteration: 286298
loss: 1.0493556261062622,grad_norm: 0.9891943715721957, iteration: 286299
loss: 1.0191141366958618,grad_norm: 0.802211601881186, iteration: 286300
loss: 0.9902065396308899,grad_norm: 0.9774761827682594, iteration: 286301
loss: 0.9966564774513245,grad_norm: 0.781097158327676, iteration: 286302
loss: 1.0520849227905273,grad_norm: 0.9999998655255405, iteration: 286303
loss: 0.9738514423370361,grad_norm: 0.9999998598441416, iteration: 286304
loss: 0.9735643267631531,grad_norm: 0.8989190927230695, iteration: 286305
loss: 0.9909211993217468,grad_norm: 0.9999990721638619, iteration: 286306
loss: 1.0020543336868286,grad_norm: 0.9857941938018845, iteration: 286307
loss: 0.9428876638412476,grad_norm: 0.8506555461648392, iteration: 286308
loss: 1.1013803482055664,grad_norm: 0.9999992490612757, iteration: 286309
loss: 1.089215636253357,grad_norm: 1.0000000215167157, iteration: 286310
loss: 1.0279831886291504,grad_norm: 0.8095788731381786, iteration: 286311
loss: 1.0421619415283203,grad_norm: 0.9999996005882382, iteration: 286312
loss: 0.9820737242698669,grad_norm: 0.929376429372193, iteration: 286313
loss: 1.0228508710861206,grad_norm: 0.842022090734689, iteration: 286314
loss: 0.9863527417182922,grad_norm: 0.7825970678952593, iteration: 286315
loss: 0.9821786284446716,grad_norm: 0.999999372137185, iteration: 286316
loss: 1.049887776374817,grad_norm: 0.9999994171235754, iteration: 286317
loss: 1.0416463613510132,grad_norm: 0.9999992261275694, iteration: 286318
loss: 0.9937348961830139,grad_norm: 0.9122883575583789, iteration: 286319
loss: 0.991158664226532,grad_norm: 0.9186078147441178, iteration: 286320
loss: 1.032426357269287,grad_norm: 0.9999992466079055, iteration: 286321
loss: 1.0485862493515015,grad_norm: 0.9999994027581038, iteration: 286322
loss: 1.0174870491027832,grad_norm: 0.9260582476837103, iteration: 286323
loss: 1.059644341468811,grad_norm: 0.895101676978603, iteration: 286324
loss: 1.0180432796478271,grad_norm: 0.9802106886813658, iteration: 286325
loss: 0.9935858249664307,grad_norm: 0.9229133232790994, iteration: 286326
loss: 0.978608250617981,grad_norm: 0.7579892306586089, iteration: 286327
loss: 1.026571273803711,grad_norm: 0.9904809060663926, iteration: 286328
loss: 1.0047821998596191,grad_norm: 0.8848101137980975, iteration: 286329
loss: 0.9879252910614014,grad_norm: 0.7850266296888198, iteration: 286330
loss: 1.0344517230987549,grad_norm: 0.9999996296741421, iteration: 286331
loss: 1.2316830158233643,grad_norm: 0.9999995290799826, iteration: 286332
loss: 0.9780253171920776,grad_norm: 0.8823325823567373, iteration: 286333
loss: 1.05562162399292,grad_norm: 0.9999998516697799, iteration: 286334
loss: 1.0509616136550903,grad_norm: 0.9222591427891772, iteration: 286335
loss: 0.9909306168556213,grad_norm: 0.9999992093786298, iteration: 286336
loss: 1.0047224760055542,grad_norm: 0.8022544395753902, iteration: 286337
loss: 0.9750705361366272,grad_norm: 0.9103293142282445, iteration: 286338
loss: 1.0166178941726685,grad_norm: 0.9999991162611053, iteration: 286339
loss: 1.00285804271698,grad_norm: 0.949499637789384, iteration: 286340
loss: 0.9975491166114807,grad_norm: 0.7949913208225723, iteration: 286341
loss: 1.124565839767456,grad_norm: 0.9999999130449233, iteration: 286342
loss: 1.059497356414795,grad_norm: 0.9999992737948805, iteration: 286343
loss: 1.0012977123260498,grad_norm: 0.937759245440912, iteration: 286344
loss: 0.997022807598114,grad_norm: 0.9502275596446759, iteration: 286345
loss: 1.0274813175201416,grad_norm: 0.8832377408425423, iteration: 286346
loss: 0.9893035888671875,grad_norm: 0.8058963892443448, iteration: 286347
loss: 0.9986475706100464,grad_norm: 0.9999991013797201, iteration: 286348
loss: 1.0160956382751465,grad_norm: 0.9450175589565172, iteration: 286349
loss: 1.0214341878890991,grad_norm: 0.8636383304210438, iteration: 286350
loss: 1.0268676280975342,grad_norm: 0.9497225351122424, iteration: 286351
loss: 0.9528912305831909,grad_norm: 0.8432917048148284, iteration: 286352
loss: 0.9400135278701782,grad_norm: 0.8174976451130144, iteration: 286353
loss: 1.0394843816757202,grad_norm: 0.9999994923114387, iteration: 286354
loss: 1.0283300876617432,grad_norm: 0.9922868776525289, iteration: 286355
loss: 1.0242955684661865,grad_norm: 0.8424558122878382, iteration: 286356
loss: 1.0957090854644775,grad_norm: 0.9440424677442121, iteration: 286357
loss: 0.9957109689712524,grad_norm: 0.9228445855716789, iteration: 286358
loss: 1.0904219150543213,grad_norm: 0.9999995294645603, iteration: 286359
loss: 0.9715334177017212,grad_norm: 0.7559651654382828, iteration: 286360
loss: 1.025661587715149,grad_norm: 1.0000000340571353, iteration: 286361
loss: 1.0412102937698364,grad_norm: 0.999999552266298, iteration: 286362
loss: 1.044213056564331,grad_norm: 0.9999990502314084, iteration: 286363
loss: 1.0163028240203857,grad_norm: 0.8676880142375711, iteration: 286364
loss: 1.0008525848388672,grad_norm: 0.8607594193380542, iteration: 286365
loss: 0.9916136264801025,grad_norm: 0.8995496238241978, iteration: 286366
loss: 1.080915093421936,grad_norm: 0.9719238588717892, iteration: 286367
loss: 1.1580227613449097,grad_norm: 0.9999995398379895, iteration: 286368
loss: 0.9857617616653442,grad_norm: 0.9999990575641619, iteration: 286369
loss: 1.1253347396850586,grad_norm: 0.9999995075154744, iteration: 286370
loss: 1.114516019821167,grad_norm: 0.9999990672015903, iteration: 286371
loss: 1.0331460237503052,grad_norm: 0.9999995578527064, iteration: 286372
loss: 1.0372525453567505,grad_norm: 0.802951188402101, iteration: 286373
loss: 0.9940666556358337,grad_norm: 0.8176901255902316, iteration: 286374
loss: 1.0562833547592163,grad_norm: 0.9999998970690464, iteration: 286375
loss: 1.0085256099700928,grad_norm: 0.772633176814783, iteration: 286376
loss: 1.0207488536834717,grad_norm: 0.8029357018874538, iteration: 286377
loss: 1.107406735420227,grad_norm: 0.928611680825284, iteration: 286378
loss: 1.1011576652526855,grad_norm: 0.9844278303092354, iteration: 286379
loss: 0.968603789806366,grad_norm: 0.8060902027371409, iteration: 286380
loss: 1.0085688829421997,grad_norm: 0.9904123112347164, iteration: 286381
loss: 1.0132410526275635,grad_norm: 0.8842020331433921, iteration: 286382
loss: 1.0314013957977295,grad_norm: 0.9999994756734638, iteration: 286383
loss: 0.9879044890403748,grad_norm: 0.9999994116383815, iteration: 286384
loss: 1.0371556282043457,grad_norm: 0.9999996850194325, iteration: 286385
loss: 1.0771583318710327,grad_norm: 0.9999992136002092, iteration: 286386
loss: 1.0439246892929077,grad_norm: 0.9999992328984831, iteration: 286387
loss: 1.0092644691467285,grad_norm: 0.999999078000793, iteration: 286388
loss: 1.0287766456604004,grad_norm: 0.8052923270132536, iteration: 286389
loss: 0.9716927409172058,grad_norm: 0.9999991166375151, iteration: 286390
loss: 1.0082134008407593,grad_norm: 0.9999995716040669, iteration: 286391
loss: 0.9616433382034302,grad_norm: 0.9999991332153327, iteration: 286392
loss: 1.111762285232544,grad_norm: 0.9999993017731865, iteration: 286393
loss: 1.0024504661560059,grad_norm: 0.9256666072995927, iteration: 286394
loss: 1.0269882678985596,grad_norm: 0.9498467916314954, iteration: 286395
loss: 1.0639196634292603,grad_norm: 0.9999996441962479, iteration: 286396
loss: 1.0367522239685059,grad_norm: 0.9999991191987843, iteration: 286397
loss: 1.0541538000106812,grad_norm: 0.9999992352460886, iteration: 286398
loss: 0.9953846335411072,grad_norm: 0.9999997791151479, iteration: 286399
loss: 1.0473850965499878,grad_norm: 0.9999995734712889, iteration: 286400
loss: 1.0132756233215332,grad_norm: 0.8306563899429362, iteration: 286401
loss: 1.0722575187683105,grad_norm: 0.9732437689092317, iteration: 286402
loss: 0.974155604839325,grad_norm: 0.8134017357466646, iteration: 286403
loss: 1.1324074268341064,grad_norm: 0.9999992789979902, iteration: 286404
loss: 1.2187787294387817,grad_norm: 0.9999997621829597, iteration: 286405
loss: 1.0467195510864258,grad_norm: 0.7424932445189824, iteration: 286406
loss: 1.0395419597625732,grad_norm: 0.8970176182725645, iteration: 286407
loss: 0.9909346699714661,grad_norm: 0.9999998179026123, iteration: 286408
loss: 1.0278176069259644,grad_norm: 0.8867689369301306, iteration: 286409
loss: 0.9466187953948975,grad_norm: 0.8428435352243013, iteration: 286410
loss: 1.012306571006775,grad_norm: 0.8207871758222016, iteration: 286411
loss: 1.0773533582687378,grad_norm: 0.9999991242745923, iteration: 286412
loss: 1.0101404190063477,grad_norm: 0.9817299246035998, iteration: 286413
loss: 1.0076216459274292,grad_norm: 0.9999998206219598, iteration: 286414
loss: 1.0591423511505127,grad_norm: 0.9999998052030541, iteration: 286415
loss: 1.0080854892730713,grad_norm: 0.7857164569573228, iteration: 286416
loss: 0.9928916692733765,grad_norm: 0.7927416101460828, iteration: 286417
loss: 1.0100839138031006,grad_norm: 0.9823212036856442, iteration: 286418
loss: 0.9985545873641968,grad_norm: 1.0000000191014076, iteration: 286419
loss: 1.0239077806472778,grad_norm: 0.8681909860488503, iteration: 286420
loss: 0.9988318681716919,grad_norm: 0.9999994150065661, iteration: 286421
loss: 0.9752174019813538,grad_norm: 0.7815207821481115, iteration: 286422
loss: 0.9884586930274963,grad_norm: 0.8012651010354668, iteration: 286423
loss: 1.0691843032836914,grad_norm: 0.999999863759476, iteration: 286424
loss: 0.9746960997581482,grad_norm: 0.9166163304312462, iteration: 286425
loss: 0.9875761866569519,grad_norm: 0.8349864587655923, iteration: 286426
loss: 0.9825539588928223,grad_norm: 0.7302352326103525, iteration: 286427
loss: 1.0121541023254395,grad_norm: 0.8876100057905841, iteration: 286428
loss: 0.99493408203125,grad_norm: 0.9999990820489295, iteration: 286429
loss: 1.024794578552246,grad_norm: 0.9233433431029668, iteration: 286430
loss: 1.0077625513076782,grad_norm: 0.9008571977982687, iteration: 286431
loss: 0.9648269414901733,grad_norm: 0.860578089172678, iteration: 286432
loss: 0.9831365942955017,grad_norm: 0.7358046123143869, iteration: 286433
loss: 1.0753906965255737,grad_norm: 0.9999995124392623, iteration: 286434
loss: 1.0098267793655396,grad_norm: 0.9107528954889639, iteration: 286435
loss: 1.0679829120635986,grad_norm: 0.8792563736080707, iteration: 286436
loss: 1.0207937955856323,grad_norm: 0.8698042578430871, iteration: 286437
loss: 1.0031083822250366,grad_norm: 0.8129164267391146, iteration: 286438
loss: 1.0783919095993042,grad_norm: 0.8789284075223357, iteration: 286439
loss: 1.0252959728240967,grad_norm: 0.9999994359534922, iteration: 286440
loss: 0.9999408721923828,grad_norm: 0.8639230821163437, iteration: 286441
loss: 0.9810601472854614,grad_norm: 0.8035121877460618, iteration: 286442
loss: 1.1063779592514038,grad_norm: 0.9999991329723477, iteration: 286443
loss: 0.983917236328125,grad_norm: 0.9324088531561419, iteration: 286444
loss: 1.034778356552124,grad_norm: 0.990287016619074, iteration: 286445
loss: 1.0325983762741089,grad_norm: 0.9020544975563923, iteration: 286446
loss: 1.017171859741211,grad_norm: 0.9063815504803728, iteration: 286447
loss: 1.0145930051803589,grad_norm: 0.9373272636608564, iteration: 286448
loss: 1.0003986358642578,grad_norm: 0.7493484893077971, iteration: 286449
loss: 1.033205270767212,grad_norm: 0.8520251177370077, iteration: 286450
loss: 1.0405046939849854,grad_norm: 0.9999992651455533, iteration: 286451
loss: 1.1312122344970703,grad_norm: 0.999999791754185, iteration: 286452
loss: 1.0007249116897583,grad_norm: 0.9182935571615377, iteration: 286453
loss: 0.9995695948600769,grad_norm: 0.9993079882924727, iteration: 286454
loss: 1.0233324766159058,grad_norm: 0.9301078201566956, iteration: 286455
loss: 1.0701794624328613,grad_norm: 0.8401347431145375, iteration: 286456
loss: 0.9988415837287903,grad_norm: 0.8454779132850901, iteration: 286457
loss: 0.9825356006622314,grad_norm: 0.9584109747640555, iteration: 286458
loss: 1.014275074005127,grad_norm: 0.9999990766250375, iteration: 286459
loss: 1.0140087604522705,grad_norm: 0.778894813809166, iteration: 286460
loss: 1.0839605331420898,grad_norm: 0.9380339526804972, iteration: 286461
loss: 1.008273720741272,grad_norm: 0.800970066843002, iteration: 286462
loss: 1.0062274932861328,grad_norm: 0.9376969894945532, iteration: 286463
loss: 1.035364031791687,grad_norm: 0.8773150836915717, iteration: 286464
loss: 1.0180280208587646,grad_norm: 0.9999991079327152, iteration: 286465
loss: 0.9908055067062378,grad_norm: 0.7481214152519112, iteration: 286466
loss: 0.9909735321998596,grad_norm: 0.8844197659513022, iteration: 286467
loss: 1.0507861375808716,grad_norm: 0.9371384949567061, iteration: 286468
loss: 1.0133233070373535,grad_norm: 0.9170796451349591, iteration: 286469
loss: 1.0143547058105469,grad_norm: 0.9999991212843822, iteration: 286470
loss: 1.0406996011734009,grad_norm: 0.9001892134500915, iteration: 286471
loss: 0.9882686138153076,grad_norm: 0.6889947844948766, iteration: 286472
loss: 1.0437495708465576,grad_norm: 0.9669668152793419, iteration: 286473
loss: 0.9771491885185242,grad_norm: 0.9999989837184212, iteration: 286474
loss: 1.020934820175171,grad_norm: 0.972170805784387, iteration: 286475
loss: 1.0118390321731567,grad_norm: 0.9049909945397079, iteration: 286476
loss: 1.0405479669570923,grad_norm: 0.9433815263581257, iteration: 286477
loss: 1.0652337074279785,grad_norm: 0.900928513591751, iteration: 286478
loss: 0.967949628829956,grad_norm: 0.9085780686487038, iteration: 286479
loss: 1.006455659866333,grad_norm: 0.8844518573916482, iteration: 286480
loss: 0.9991609454154968,grad_norm: 0.9299717282805551, iteration: 286481
loss: 1.088577389717102,grad_norm: 0.8579324685238672, iteration: 286482
loss: 1.011113166809082,grad_norm: 0.927425830577962, iteration: 286483
loss: 0.9969633221626282,grad_norm: 0.9999990111663597, iteration: 286484
loss: 0.9752895832061768,grad_norm: 0.9101605206429693, iteration: 286485
loss: 0.9882642030715942,grad_norm: 0.7801613412788122, iteration: 286486
loss: 1.0505270957946777,grad_norm: 0.9172387798319896, iteration: 286487
loss: 0.9760488271713257,grad_norm: 0.8094648363951572, iteration: 286488
loss: 1.0155763626098633,grad_norm: 0.826575592750029, iteration: 286489
loss: 0.991725504398346,grad_norm: 0.8345241024752567, iteration: 286490
loss: 1.0092637538909912,grad_norm: 0.8951008825584479, iteration: 286491
loss: 1.0035945177078247,grad_norm: 0.8225619577829791, iteration: 286492
loss: 0.9426245093345642,grad_norm: 0.8644689508286352, iteration: 286493
loss: 1.0263112783432007,grad_norm: 0.7557964536294242, iteration: 286494
loss: 0.9825102686882019,grad_norm: 0.8626163678477973, iteration: 286495
loss: 1.0286669731140137,grad_norm: 0.9999994281679488, iteration: 286496
loss: 1.0027014017105103,grad_norm: 0.804300455795332, iteration: 286497
loss: 1.1333383321762085,grad_norm: 0.9999999628241637, iteration: 286498
loss: 1.0131293535232544,grad_norm: 0.6535028448799266, iteration: 286499
loss: 1.0946428775787354,grad_norm: 0.9999994462676048, iteration: 286500
loss: 1.116217017173767,grad_norm: 0.9999999092045068, iteration: 286501
loss: 0.970396101474762,grad_norm: 0.7085990537327244, iteration: 286502
loss: 1.0131621360778809,grad_norm: 0.8660617242207561, iteration: 286503
loss: 1.0059314966201782,grad_norm: 0.8603045538651838, iteration: 286504
loss: 1.0252236127853394,grad_norm: 0.6688746220247093, iteration: 286505
loss: 1.018439531326294,grad_norm: 0.9010766804536425, iteration: 286506
loss: 0.9840567708015442,grad_norm: 0.9999992024096689, iteration: 286507
loss: 0.9983543753623962,grad_norm: 0.8043939716749632, iteration: 286508
loss: 0.9952829480171204,grad_norm: 0.8891939479553819, iteration: 286509
loss: 1.0194696187973022,grad_norm: 0.9023742096334051, iteration: 286510
loss: 0.9936487674713135,grad_norm: 0.9593804768597012, iteration: 286511
loss: 1.0067235231399536,grad_norm: 0.9999991706598866, iteration: 286512
loss: 1.0129644870758057,grad_norm: 0.7467531460303408, iteration: 286513
loss: 1.0800729990005493,grad_norm: 0.9999994161177294, iteration: 286514
loss: 1.0340129137039185,grad_norm: 0.9999992561113761, iteration: 286515
loss: 1.0207427740097046,grad_norm: 0.8230132576376075, iteration: 286516
loss: 1.0546807050704956,grad_norm: 0.7994019639156944, iteration: 286517
loss: 0.9995388984680176,grad_norm: 0.7779932811741952, iteration: 286518
loss: 0.9595262408256531,grad_norm: 0.9155777108970772, iteration: 286519
loss: 0.9842520356178284,grad_norm: 0.8250734635986898, iteration: 286520
loss: 1.0100425481796265,grad_norm: 0.9435207393561057, iteration: 286521
loss: 1.0306388139724731,grad_norm: 0.9999990282722442, iteration: 286522
loss: 1.0222079753875732,grad_norm: 0.9999992219699417, iteration: 286523
loss: 1.040116310119629,grad_norm: 0.9793181910675216, iteration: 286524
loss: 0.9970348477363586,grad_norm: 0.8993757706762628, iteration: 286525
loss: 1.0804866552352905,grad_norm: 0.9999992710437583, iteration: 286526
loss: 0.9946326613426208,grad_norm: 0.8220378653117459, iteration: 286527
loss: 0.9924994111061096,grad_norm: 0.9999998874791324, iteration: 286528
loss: 1.0258194208145142,grad_norm: 0.9663699506572725, iteration: 286529
loss: 1.0270154476165771,grad_norm: 0.8430618521174308, iteration: 286530
loss: 0.9888405203819275,grad_norm: 0.8633149624589458, iteration: 286531
loss: 0.9946145415306091,grad_norm: 0.9728919352988272, iteration: 286532
loss: 1.0403739213943481,grad_norm: 0.999999539642014, iteration: 286533
loss: 0.9929965138435364,grad_norm: 0.8847466847825489, iteration: 286534
loss: 1.0229917764663696,grad_norm: 0.7483486810043604, iteration: 286535
loss: 1.0208916664123535,grad_norm: 0.7791079114686568, iteration: 286536
loss: 1.0293701887130737,grad_norm: 0.8975577176322995, iteration: 286537
loss: 0.9479451179504395,grad_norm: 0.7670224060539473, iteration: 286538
loss: 1.0562455654144287,grad_norm: 0.9999991410659504, iteration: 286539
loss: 1.0017346143722534,grad_norm: 0.8334203717889431, iteration: 286540
loss: 0.9741069674491882,grad_norm: 0.7797059492130495, iteration: 286541
loss: 0.9867590069770813,grad_norm: 0.858122617044546, iteration: 286542
loss: 1.0684831142425537,grad_norm: 0.9999995804265857, iteration: 286543
loss: 1.0228198766708374,grad_norm: 0.9468801720231156, iteration: 286544
loss: 1.0794626474380493,grad_norm: 0.9999997394952629, iteration: 286545
loss: 1.0318489074707031,grad_norm: 0.9999989737711954, iteration: 286546
loss: 1.0112067461013794,grad_norm: 0.952386632254676, iteration: 286547
loss: 0.9991282224655151,grad_norm: 0.9999990996353322, iteration: 286548
loss: 1.007749319076538,grad_norm: 0.9011134480810037, iteration: 286549
loss: 1.0522295236587524,grad_norm: 0.9999990893989168, iteration: 286550
loss: 1.0287165641784668,grad_norm: 0.9999996526009318, iteration: 286551
loss: 1.059486985206604,grad_norm: 0.9999999504578055, iteration: 286552
loss: 0.9889756441116333,grad_norm: 0.9484048180274893, iteration: 286553
loss: 1.0291153192520142,grad_norm: 0.8691794492075309, iteration: 286554
loss: 1.0021421909332275,grad_norm: 0.7958172763415946, iteration: 286555
loss: 1.0042903423309326,grad_norm: 0.7774044198540822, iteration: 286556
loss: 1.0079569816589355,grad_norm: 0.9150802946603397, iteration: 286557
loss: 0.9933095574378967,grad_norm: 0.8294664975009102, iteration: 286558
loss: 1.0221930742263794,grad_norm: 0.9700240862503559, iteration: 286559
loss: 0.9863563179969788,grad_norm: 0.8539422460453483, iteration: 286560
loss: 0.9918799996376038,grad_norm: 0.7784385094387086, iteration: 286561
loss: 0.9622370004653931,grad_norm: 0.9009289596552554, iteration: 286562
loss: 0.9991658329963684,grad_norm: 0.9446792968864869, iteration: 286563
loss: 0.9987838864326477,grad_norm: 0.9999992760302392, iteration: 286564
loss: 0.9853364825248718,grad_norm: 0.851722829743324, iteration: 286565
loss: 1.0035161972045898,grad_norm: 0.7137896333991034, iteration: 286566
loss: 0.996174693107605,grad_norm: 0.7176261746340743, iteration: 286567
loss: 1.0347294807434082,grad_norm: 0.877127275763819, iteration: 286568
loss: 0.9460723996162415,grad_norm: 0.989443602261719, iteration: 286569
loss: 1.0211942195892334,grad_norm: 0.848485296132112, iteration: 286570
loss: 1.0005921125411987,grad_norm: 0.7818930009513184, iteration: 286571
loss: 1.0297355651855469,grad_norm: 0.999999240489848, iteration: 286572
loss: 1.031683087348938,grad_norm: 0.9413342932323259, iteration: 286573
loss: 1.021425724029541,grad_norm: 0.999999078967652, iteration: 286574
loss: 1.0073606967926025,grad_norm: 0.7654854111807106, iteration: 286575
loss: 1.0071933269500732,grad_norm: 0.8173870048459078, iteration: 286576
loss: 1.0089315176010132,grad_norm: 0.6639953630509813, iteration: 286577
loss: 0.971098780632019,grad_norm: 0.8428539528118597, iteration: 286578
loss: 0.9977181553840637,grad_norm: 0.8771810355468508, iteration: 286579
loss: 0.9625110030174255,grad_norm: 0.71549391310225, iteration: 286580
loss: 1.01051926612854,grad_norm: 0.8499093775136299, iteration: 286581
loss: 1.0109500885009766,grad_norm: 0.9642041387826483, iteration: 286582
loss: 1.005513072013855,grad_norm: 0.7452954542051954, iteration: 286583
loss: 0.9620495438575745,grad_norm: 0.7624886210781748, iteration: 286584
loss: 1.0103638172149658,grad_norm: 0.9999990258509985, iteration: 286585
loss: 0.9772738218307495,grad_norm: 0.9205249912704444, iteration: 286586
loss: 1.0606166124343872,grad_norm: 0.9449425618747511, iteration: 286587
loss: 1.0100524425506592,grad_norm: 0.8569338411339033, iteration: 286588
loss: 0.9729209542274475,grad_norm: 0.9999991409246716, iteration: 286589
loss: 0.9927273392677307,grad_norm: 0.8117088460419801, iteration: 286590
loss: 1.0503945350646973,grad_norm: 0.8225607094789406, iteration: 286591
loss: 1.081308126449585,grad_norm: 0.9999994255362578, iteration: 286592
loss: 0.9773517847061157,grad_norm: 0.9756955168559734, iteration: 286593
loss: 1.0305956602096558,grad_norm: 0.9594755634286222, iteration: 286594
loss: 0.9921078085899353,grad_norm: 0.9386185439797634, iteration: 286595
loss: 1.0712246894836426,grad_norm: 0.9789454917576749, iteration: 286596
loss: 0.9630484580993652,grad_norm: 0.8278150233497493, iteration: 286597
loss: 1.0341429710388184,grad_norm: 0.8660857136931476, iteration: 286598
loss: 0.976523756980896,grad_norm: 0.7453549135635018, iteration: 286599
loss: 1.03350031375885,grad_norm: 0.9999999367920455, iteration: 286600
loss: 1.1634784936904907,grad_norm: 0.9999995711551779, iteration: 286601
loss: 0.9895864725112915,grad_norm: 0.7389258813673605, iteration: 286602
loss: 1.179162621498108,grad_norm: 0.9999997565405805, iteration: 286603
loss: 0.9729872345924377,grad_norm: 0.9999990538957543, iteration: 286604
loss: 1.0453710556030273,grad_norm: 0.8939500744488689, iteration: 286605
loss: 1.005828619003296,grad_norm: 0.7966792980721887, iteration: 286606
loss: 1.07266104221344,grad_norm: 0.827066123185489, iteration: 286607
loss: 1.06980299949646,grad_norm: 0.999999342341016, iteration: 286608
loss: 1.0015980005264282,grad_norm: 0.7910876729403281, iteration: 286609
loss: 1.0052711963653564,grad_norm: 0.9202772174156744, iteration: 286610
loss: 1.0429059267044067,grad_norm: 0.999999945856117, iteration: 286611
loss: 0.948191225528717,grad_norm: 0.7025355011659142, iteration: 286612
loss: 1.001975655555725,grad_norm: 0.9112348401927288, iteration: 286613
loss: 1.0098893642425537,grad_norm: 0.8692008102852925, iteration: 286614
loss: 0.9728760719299316,grad_norm: 0.8803401448257867, iteration: 286615
loss: 1.026708960533142,grad_norm: 0.8604250075870108, iteration: 286616
loss: 1.0425249338150024,grad_norm: 0.9999991439412359, iteration: 286617
loss: 0.968081533908844,grad_norm: 0.9444062633398909, iteration: 286618
loss: 1.0147743225097656,grad_norm: 0.9327751751159037, iteration: 286619
loss: 0.9973313808441162,grad_norm: 0.9999989605936996, iteration: 286620
loss: 0.9980927109718323,grad_norm: 0.7431008958287147, iteration: 286621
loss: 0.987352192401886,grad_norm: 0.9051133307616172, iteration: 286622
loss: 1.0087740421295166,grad_norm: 0.9819984621104603, iteration: 286623
loss: 0.994005024433136,grad_norm: 0.810330764756216, iteration: 286624
loss: 1.0218333005905151,grad_norm: 0.8078067786485708, iteration: 286625
loss: 1.028576135635376,grad_norm: 0.914504484432155, iteration: 286626
loss: 0.9723384380340576,grad_norm: 0.9557709331353638, iteration: 286627
loss: 0.9812995195388794,grad_norm: 0.8831265106731172, iteration: 286628
loss: 0.9900215864181519,grad_norm: 0.9883500329394004, iteration: 286629
loss: 1.038401484489441,grad_norm: 0.9999992250562895, iteration: 286630
loss: 1.1787580251693726,grad_norm: 0.9999998033119801, iteration: 286631
loss: 1.0132777690887451,grad_norm: 0.999999140056452, iteration: 286632
loss: 0.9742774367332458,grad_norm: 0.8344901725438619, iteration: 286633
loss: 1.0923787355422974,grad_norm: 0.9999999905091177, iteration: 286634
loss: 1.0189670324325562,grad_norm: 0.9999990499229119, iteration: 286635
loss: 1.0030763149261475,grad_norm: 0.7727863214380629, iteration: 286636
loss: 1.0741242170333862,grad_norm: 0.8219000711868611, iteration: 286637
loss: 1.0432016849517822,grad_norm: 0.9067193464383871, iteration: 286638
loss: 0.9914145469665527,grad_norm: 0.8662823422949384, iteration: 286639
loss: 0.9641959071159363,grad_norm: 0.9980886356907336, iteration: 286640
loss: 1.0188190937042236,grad_norm: 0.999999053664196, iteration: 286641
loss: 1.0225526094436646,grad_norm: 0.7438055844104309, iteration: 286642
loss: 0.970461368560791,grad_norm: 0.8059839824066851, iteration: 286643
loss: 1.0321999788284302,grad_norm: 0.9710960007891112, iteration: 286644
loss: 1.083448886871338,grad_norm: 0.9999997021386456, iteration: 286645
loss: 1.0065745115280151,grad_norm: 0.9375531547869295, iteration: 286646
loss: 1.0349093675613403,grad_norm: 0.7762456406846204, iteration: 286647
loss: 0.9800130128860474,grad_norm: 0.7314740898017359, iteration: 286648
loss: 1.0255590677261353,grad_norm: 0.9481350248946012, iteration: 286649
loss: 1.043502926826477,grad_norm: 0.8970401659824654, iteration: 286650
loss: 0.9835739731788635,grad_norm: 0.999999669731255, iteration: 286651
loss: 1.1587040424346924,grad_norm: 0.9999995465027666, iteration: 286652
loss: 0.9659130573272705,grad_norm: 0.8123306705386713, iteration: 286653
loss: 0.9808064699172974,grad_norm: 0.8537504629970412, iteration: 286654
loss: 1.006905198097229,grad_norm: 0.978898985196271, iteration: 286655
loss: 0.9804476499557495,grad_norm: 0.8135738368026942, iteration: 286656
loss: 1.017048716545105,grad_norm: 0.9999993317953745, iteration: 286657
loss: 0.9782143235206604,grad_norm: 0.7985212205735676, iteration: 286658
loss: 1.0374691486358643,grad_norm: 0.9999990304914372, iteration: 286659
loss: 0.9923341274261475,grad_norm: 0.7952397333107716, iteration: 286660
loss: 0.964478075504303,grad_norm: 0.9999990749326789, iteration: 286661
loss: 1.0010271072387695,grad_norm: 0.8360024263443702, iteration: 286662
loss: 1.022965431213379,grad_norm: 0.9950627477762873, iteration: 286663
loss: 1.0385698080062866,grad_norm: 0.7876471206214544, iteration: 286664
loss: 1.0755598545074463,grad_norm: 0.999999758635043, iteration: 286665
loss: 0.9959076642990112,grad_norm: 0.9355155984806235, iteration: 286666
loss: 0.9963306188583374,grad_norm: 0.8322172367422049, iteration: 286667
loss: 1.003578543663025,grad_norm: 0.8817803765752578, iteration: 286668
loss: 0.9641134142875671,grad_norm: 0.7947710681423773, iteration: 286669
loss: 1.0305092334747314,grad_norm: 0.9999990282146926, iteration: 286670
loss: 1.0035884380340576,grad_norm: 0.9015962263058737, iteration: 286671
loss: 0.9994848370552063,grad_norm: 0.9999994757754987, iteration: 286672
loss: 0.9823333621025085,grad_norm: 0.9440868871962523, iteration: 286673
loss: 0.9546855092048645,grad_norm: 0.9410462341334287, iteration: 286674
loss: 0.9865938425064087,grad_norm: 0.8621478116620082, iteration: 286675
loss: 0.9290215969085693,grad_norm: 0.8177013214150635, iteration: 286676
loss: 1.004410982131958,grad_norm: 0.8605804031845341, iteration: 286677
loss: 1.0136882066726685,grad_norm: 0.9801207083775245, iteration: 286678
loss: 1.0002329349517822,grad_norm: 0.999999219212496, iteration: 286679
loss: 0.9945977330207825,grad_norm: 0.9999997747349055, iteration: 286680
loss: 1.0104628801345825,grad_norm: 0.9640088672033645, iteration: 286681
loss: 1.0047104358673096,grad_norm: 0.9999995420580515, iteration: 286682
loss: 0.9627946019172668,grad_norm: 0.8115953429293608, iteration: 286683
loss: 0.9993981719017029,grad_norm: 0.9999990151323135, iteration: 286684
loss: 1.0991778373718262,grad_norm: 0.9999994283365276, iteration: 286685
loss: 1.0192161798477173,grad_norm: 0.9999990732097441, iteration: 286686
loss: 1.0501017570495605,grad_norm: 0.7847931172657203, iteration: 286687
loss: 1.139468789100647,grad_norm: 0.9999998505272393, iteration: 286688
loss: 1.0104602575302124,grad_norm: 0.99999938987188, iteration: 286689
loss: 1.0221613645553589,grad_norm: 0.9878858367368624, iteration: 286690
loss: 1.0471810102462769,grad_norm: 0.8636608487392043, iteration: 286691
loss: 1.0114538669586182,grad_norm: 0.9759026514132312, iteration: 286692
loss: 0.9974106550216675,grad_norm: 0.7706790369651627, iteration: 286693
loss: 0.9858988523483276,grad_norm: 0.9421493997579117, iteration: 286694
loss: 0.9740708470344543,grad_norm: 0.9242875366765468, iteration: 286695
loss: 1.231889009475708,grad_norm: 0.9999996520391617, iteration: 286696
loss: 0.9882962703704834,grad_norm: 0.9222898723080585, iteration: 286697
loss: 0.9824363589286804,grad_norm: 0.9167263106063188, iteration: 286698
loss: 1.0709707736968994,grad_norm: 0.9909349935708472, iteration: 286699
loss: 1.0256866216659546,grad_norm: 0.999999933812072, iteration: 286700
loss: 1.197741150856018,grad_norm: 0.9999997880892972, iteration: 286701
loss: 1.0081807374954224,grad_norm: 0.8325831566579864, iteration: 286702
loss: 0.998199462890625,grad_norm: 0.9984499020086383, iteration: 286703
loss: 1.0796188116073608,grad_norm: 0.8752805437289369, iteration: 286704
loss: 0.9768178462982178,grad_norm: 0.8499712881316414, iteration: 286705
loss: 1.0311261415481567,grad_norm: 0.963282801723786, iteration: 286706
loss: 1.0174922943115234,grad_norm: 0.7691193037181783, iteration: 286707
loss: 1.031646966934204,grad_norm: 0.8396323061512916, iteration: 286708
loss: 0.9629539251327515,grad_norm: 0.9240724768299886, iteration: 286709
loss: 1.027370810508728,grad_norm: 0.9236250689620112, iteration: 286710
loss: 0.983034610748291,grad_norm: 0.7781976616425369, iteration: 286711
loss: 1.0278401374816895,grad_norm: 0.9496115578011612, iteration: 286712
loss: 0.9887328147888184,grad_norm: 0.8713262198476741, iteration: 286713
loss: 1.0010868310928345,grad_norm: 0.8837014095177256, iteration: 286714
loss: 1.0120662450790405,grad_norm: 0.821756337645374, iteration: 286715
loss: 1.0251141786575317,grad_norm: 0.999999446033112, iteration: 286716
loss: 1.0252230167388916,grad_norm: 0.9999990365269372, iteration: 286717
loss: 1.0345357656478882,grad_norm: 0.9183446922185932, iteration: 286718
loss: 1.0746943950653076,grad_norm: 0.9999993508786167, iteration: 286719
loss: 1.00984525680542,grad_norm: 0.9992784801546786, iteration: 286720
loss: 0.9794155359268188,grad_norm: 0.9811858486790952, iteration: 286721
loss: 1.0499080419540405,grad_norm: 0.7805716728034734, iteration: 286722
loss: 1.0296990871429443,grad_norm: 0.7812330923082221, iteration: 286723
loss: 1.0129506587982178,grad_norm: 0.909036976665627, iteration: 286724
loss: 0.9334301352500916,grad_norm: 0.9999991804320366, iteration: 286725
loss: 0.9889668226242065,grad_norm: 0.8211054994197192, iteration: 286726
loss: 0.9879565238952637,grad_norm: 0.827054979788652, iteration: 286727
loss: 1.002092719078064,grad_norm: 0.9289205539459818, iteration: 286728
loss: 1.0185853242874146,grad_norm: 0.8991794584134025, iteration: 286729
loss: 0.9808321595191956,grad_norm: 0.9999990719259692, iteration: 286730
loss: 1.0082329511642456,grad_norm: 0.8563448887328902, iteration: 286731
loss: 1.021364688873291,grad_norm: 0.9999990483456287, iteration: 286732
loss: 0.9877079129219055,grad_norm: 0.7783190478480555, iteration: 286733
loss: 1.1476079225540161,grad_norm: 0.9999999594908476, iteration: 286734
loss: 1.02242910861969,grad_norm: 0.8271499779035282, iteration: 286735
loss: 0.9993975758552551,grad_norm: 0.83966552831813, iteration: 286736
loss: 1.0701045989990234,grad_norm: 0.9999997254779295, iteration: 286737
loss: 0.9939345121383667,grad_norm: 0.8116147324099996, iteration: 286738
loss: 1.022011160850525,grad_norm: 0.8836158005653825, iteration: 286739
loss: 0.9764322638511658,grad_norm: 0.7816847448691533, iteration: 286740
loss: 1.010117769241333,grad_norm: 0.8480983112649804, iteration: 286741
loss: 1.027699589729309,grad_norm: 0.9999992212736063, iteration: 286742
loss: 0.9872626066207886,grad_norm: 0.9999992990972146, iteration: 286743
loss: 1.0454655885696411,grad_norm: 0.8307779617355808, iteration: 286744
loss: 0.9870749115943909,grad_norm: 0.8886145342970044, iteration: 286745
loss: 1.0251716375350952,grad_norm: 0.9999999612409177, iteration: 286746
loss: 0.9606243968009949,grad_norm: 0.9136452179211729, iteration: 286747
loss: 0.9542109966278076,grad_norm: 0.9999992242263155, iteration: 286748
loss: 0.9657799601554871,grad_norm: 0.999998910006344, iteration: 286749
loss: 0.9960468411445618,grad_norm: 0.8103704500456097, iteration: 286750
loss: 0.9906541109085083,grad_norm: 0.8619509435068319, iteration: 286751
loss: 1.0241137742996216,grad_norm: 0.9777817192957754, iteration: 286752
loss: 1.0176974534988403,grad_norm: 0.8833358698517229, iteration: 286753
loss: 1.0103223323822021,grad_norm: 0.8101097785084659, iteration: 286754
loss: 0.9940834641456604,grad_norm: 0.9110227086418928, iteration: 286755
loss: 1.0203787088394165,grad_norm: 0.9722619786331249, iteration: 286756
loss: 1.0536832809448242,grad_norm: 0.9999999748223732, iteration: 286757
loss: 1.009905457496643,grad_norm: 0.9999993195080655, iteration: 286758
loss: 0.9880832433700562,grad_norm: 0.9999991570972974, iteration: 286759
loss: 1.0346574783325195,grad_norm: 0.8747426297311559, iteration: 286760
loss: 1.0207602977752686,grad_norm: 0.9388640859444237, iteration: 286761
loss: 1.0107578039169312,grad_norm: 0.9999998007483736, iteration: 286762
loss: 0.9707701206207275,grad_norm: 0.9999990775855812, iteration: 286763
loss: 0.9746209383010864,grad_norm: 0.9342191148986011, iteration: 286764
loss: 1.0131421089172363,grad_norm: 0.9999995577207731, iteration: 286765
loss: 0.9990015625953674,grad_norm: 0.9999997400744763, iteration: 286766
loss: 1.1019655466079712,grad_norm: 0.999999619089136, iteration: 286767
loss: 0.9850460886955261,grad_norm: 0.7747041310814833, iteration: 286768
loss: 1.0583144426345825,grad_norm: 0.9999995502048924, iteration: 286769
loss: 1.0129510164260864,grad_norm: 0.7995121869540256, iteration: 286770
loss: 0.9865421056747437,grad_norm: 0.9274908546155576, iteration: 286771
loss: 0.9850725531578064,grad_norm: 0.9760650026038902, iteration: 286772
loss: 0.9862936735153198,grad_norm: 0.7363844287785136, iteration: 286773
loss: 1.0092744827270508,grad_norm: 0.9596467671484322, iteration: 286774
loss: 1.0011178255081177,grad_norm: 0.871082066716053, iteration: 286775
loss: 0.9751648306846619,grad_norm: 0.8831797542372324, iteration: 286776
loss: 0.994586169719696,grad_norm: 0.9999992903118756, iteration: 286777
loss: 1.0469132661819458,grad_norm: 0.9411868892812072, iteration: 286778
loss: 0.9995328187942505,grad_norm: 0.8253192286373913, iteration: 286779
loss: 1.0480269193649292,grad_norm: 0.9999996455447177, iteration: 286780
loss: 0.9468210339546204,grad_norm: 0.735811392576749, iteration: 286781
loss: 0.9908015727996826,grad_norm: 0.9071242208719944, iteration: 286782
loss: 1.0250605344772339,grad_norm: 0.9999999115354208, iteration: 286783
loss: 1.0035991668701172,grad_norm: 0.9999992065003132, iteration: 286784
loss: 0.9786205291748047,grad_norm: 0.9450124345923432, iteration: 286785
loss: 1.0006898641586304,grad_norm: 0.8183805433591345, iteration: 286786
loss: 1.0427888631820679,grad_norm: 0.9999992440427314, iteration: 286787
loss: 0.979333758354187,grad_norm: 0.9123874278120213, iteration: 286788
loss: 1.1189382076263428,grad_norm: 0.9999992548992005, iteration: 286789
loss: 0.9861587882041931,grad_norm: 0.9999991341123329, iteration: 286790
loss: 0.9635711908340454,grad_norm: 0.7681632253991025, iteration: 286791
loss: 0.9994086623191833,grad_norm: 0.9826966947292349, iteration: 286792
loss: 1.0290303230285645,grad_norm: 0.999999371986268, iteration: 286793
loss: 1.007974624633789,grad_norm: 0.7757951598525025, iteration: 286794
loss: 1.0328338146209717,grad_norm: 0.8420386251223924, iteration: 286795
loss: 1.0320887565612793,grad_norm: 0.9999991945292206, iteration: 286796
loss: 0.9926838278770447,grad_norm: 0.8216114921761586, iteration: 286797
loss: 1.0725071430206299,grad_norm: 0.9999995587652689, iteration: 286798
loss: 0.9751481413841248,grad_norm: 0.9999998322359975, iteration: 286799
loss: 1.012463092803955,grad_norm: 0.8467281385563794, iteration: 286800
loss: 1.0612741708755493,grad_norm: 0.9999991018860859, iteration: 286801
loss: 1.04628324508667,grad_norm: 0.9999998617991593, iteration: 286802
loss: 0.9946762919425964,grad_norm: 0.8385936201779026, iteration: 286803
loss: 1.0043023824691772,grad_norm: 0.8635739794331702, iteration: 286804
loss: 0.9991253614425659,grad_norm: 0.7639932605204127, iteration: 286805
loss: 1.2948192358016968,grad_norm: 0.9999999171956928, iteration: 286806
loss: 1.03438401222229,grad_norm: 0.9999991126460123, iteration: 286807
loss: 0.9875239729881287,grad_norm: 0.8466094611239567, iteration: 286808
loss: 1.03346848487854,grad_norm: 0.8440570679075452, iteration: 286809
loss: 1.0114314556121826,grad_norm: 0.999999091933534, iteration: 286810
loss: 1.0155607461929321,grad_norm: 0.8343458804931817, iteration: 286811
loss: 0.9813767671585083,grad_norm: 0.9999995379048102, iteration: 286812
loss: 1.0421355962753296,grad_norm: 0.9999995390192804, iteration: 286813
loss: 1.0771241188049316,grad_norm: 0.975012643048271, iteration: 286814
loss: 1.0229690074920654,grad_norm: 0.9847939032483651, iteration: 286815
loss: 1.0118532180786133,grad_norm: 0.9000211284652642, iteration: 286816
loss: 0.9995852112770081,grad_norm: 0.782822082496496, iteration: 286817
loss: 1.0533666610717773,grad_norm: 0.9999990862896959, iteration: 286818
loss: 0.9909162521362305,grad_norm: 0.8732828625262267, iteration: 286819
loss: 1.0204581022262573,grad_norm: 0.9999992083824762, iteration: 286820
loss: 0.9729631543159485,grad_norm: 0.9496053705359393, iteration: 286821
loss: 1.0029596090316772,grad_norm: 0.9670070950740793, iteration: 286822
loss: 1.0082114934921265,grad_norm: 0.9207790618412562, iteration: 286823
loss: 1.0488239526748657,grad_norm: 0.9999991255798623, iteration: 286824
loss: 1.0776047706604004,grad_norm: 0.9999998187462202, iteration: 286825
loss: 0.9805395603179932,grad_norm: 0.7430660820947346, iteration: 286826
loss: 1.1608123779296875,grad_norm: 0.9999997590133097, iteration: 286827
loss: 1.0554040670394897,grad_norm: 0.9999990223106332, iteration: 286828
loss: 1.0034799575805664,grad_norm: 0.9999990724279606, iteration: 286829
loss: 1.0069020986557007,grad_norm: 0.8261801099289106, iteration: 286830
loss: 0.9912388920783997,grad_norm: 0.9999999929307217, iteration: 286831
loss: 0.9629591107368469,grad_norm: 0.9632132704022083, iteration: 286832
loss: 0.9504225254058838,grad_norm: 0.999999129072339, iteration: 286833
loss: 1.04506254196167,grad_norm: 0.9999998870433847, iteration: 286834
loss: 1.110884666442871,grad_norm: 0.9674633261159795, iteration: 286835
loss: 1.0841389894485474,grad_norm: 0.8353381786450181, iteration: 286836
loss: 1.0186042785644531,grad_norm: 0.8576717957310475, iteration: 286837
loss: 0.9931656122207642,grad_norm: 0.9999992415576499, iteration: 286838
loss: 1.0077117681503296,grad_norm: 0.8695123381674881, iteration: 286839
loss: 1.0388755798339844,grad_norm: 0.7919250321945527, iteration: 286840
loss: 1.0499656200408936,grad_norm: 0.9999998206283655, iteration: 286841
loss: 1.0140756368637085,grad_norm: 0.9312877140318365, iteration: 286842
loss: 0.9550917148590088,grad_norm: 0.7990041289340152, iteration: 286843
loss: 0.9830256700515747,grad_norm: 0.720102753272243, iteration: 286844
loss: 1.0265398025512695,grad_norm: 0.9350679160049928, iteration: 286845
loss: 1.076900601387024,grad_norm: 0.9999998368480841, iteration: 286846
loss: 1.0286991596221924,grad_norm: 0.9999993387080789, iteration: 286847
loss: 1.0032520294189453,grad_norm: 0.9066284131300102, iteration: 286848
loss: 1.0881956815719604,grad_norm: 0.9999997002830066, iteration: 286849
loss: 0.9925843477249146,grad_norm: 0.9055445125317924, iteration: 286850
loss: 0.994607150554657,grad_norm: 0.8478221065273, iteration: 286851
loss: 1.0386989116668701,grad_norm: 0.999999128327698, iteration: 286852
loss: 1.000604510307312,grad_norm: 0.9002952451310438, iteration: 286853
loss: 1.0651943683624268,grad_norm: 0.9999994437293078, iteration: 286854
loss: 1.0050163269042969,grad_norm: 0.8526679225956356, iteration: 286855
loss: 0.9885039329528809,grad_norm: 0.7861785172952597, iteration: 286856
loss: 1.145484447479248,grad_norm: 0.9886510679405259, iteration: 286857
loss: 1.0200508832931519,grad_norm: 0.970141108360654, iteration: 286858
loss: 1.042168140411377,grad_norm: 0.9611907456455089, iteration: 286859
loss: 0.9913468956947327,grad_norm: 0.9999997646193008, iteration: 286860
loss: 1.056944489479065,grad_norm: 0.9622836586364274, iteration: 286861
loss: 1.0746983289718628,grad_norm: 0.9999997150013262, iteration: 286862
loss: 1.0969712734222412,grad_norm: 0.9999991235532792, iteration: 286863
loss: 1.0936135053634644,grad_norm: 0.855223762740931, iteration: 286864
loss: 1.0138670206069946,grad_norm: 0.9055129713738095, iteration: 286865
loss: 1.017236351966858,grad_norm: 0.8546397834134155, iteration: 286866
loss: 0.9906979203224182,grad_norm: 0.9601925441952581, iteration: 286867
loss: 0.9991000294685364,grad_norm: 0.9999997713976602, iteration: 286868
loss: 1.0537608861923218,grad_norm: 0.9999994609682119, iteration: 286869
loss: 1.150222659111023,grad_norm: 0.9999997119222419, iteration: 286870
loss: 1.0632144212722778,grad_norm: 0.9999998172546639, iteration: 286871
loss: 1.0191876888275146,grad_norm: 0.8992697332448939, iteration: 286872
loss: 1.1564472913742065,grad_norm: 0.9999999948723747, iteration: 286873
loss: 1.0194083452224731,grad_norm: 0.9999993944654598, iteration: 286874
loss: 0.9945751428604126,grad_norm: 0.8434001212620862, iteration: 286875
loss: 1.1314888000488281,grad_norm: 0.9999999839239936, iteration: 286876
loss: 1.009968876838684,grad_norm: 0.7725626822975853, iteration: 286877
loss: 1.021186351776123,grad_norm: 0.8840279795824663, iteration: 286878
loss: 0.9746830463409424,grad_norm: 0.7629422005797494, iteration: 286879
loss: 1.0222508907318115,grad_norm: 0.8958007069574001, iteration: 286880
loss: 1.0043374300003052,grad_norm: 0.8811245449552932, iteration: 286881
loss: 0.9971470236778259,grad_norm: 0.7521081072453707, iteration: 286882
loss: 0.9959647059440613,grad_norm: 0.9999993862707759, iteration: 286883
loss: 1.024940848350525,grad_norm: 0.9999992516030997, iteration: 286884
loss: 0.9582356810569763,grad_norm: 0.8115292333168743, iteration: 286885
loss: 1.0620242357254028,grad_norm: 0.9999991821734034, iteration: 286886
loss: 1.0175693035125732,grad_norm: 0.9999996069476715, iteration: 286887
loss: 1.0417890548706055,grad_norm: 0.9999989260549358, iteration: 286888
loss: 1.0011184215545654,grad_norm: 0.9999991415719075, iteration: 286889
loss: 1.0811203718185425,grad_norm: 0.9999991298083097, iteration: 286890
loss: 1.0084177255630493,grad_norm: 0.9675161485509761, iteration: 286891
loss: 1.0775320529937744,grad_norm: 0.9999999231923598, iteration: 286892
loss: 1.0231153964996338,grad_norm: 0.7691728333345837, iteration: 286893
loss: 1.0795668363571167,grad_norm: 0.7711170519160484, iteration: 286894
loss: 1.0060209035873413,grad_norm: 0.9076933574565592, iteration: 286895
loss: 1.076488971710205,grad_norm: 0.9560386334467651, iteration: 286896
loss: 1.0806440114974976,grad_norm: 0.9293648554694721, iteration: 286897
loss: 1.0221203565597534,grad_norm: 0.9020094804151764, iteration: 286898
loss: 0.9786028861999512,grad_norm: 0.9999998354304338, iteration: 286899
loss: 1.0608617067337036,grad_norm: 0.9999998723767419, iteration: 286900
loss: 0.9802937507629395,grad_norm: 0.7267559829895655, iteration: 286901
loss: 1.0206462144851685,grad_norm: 0.7958312150740617, iteration: 286902
loss: 1.0384509563446045,grad_norm: 0.9999991823255997, iteration: 286903
loss: 1.0193369388580322,grad_norm: 0.9757431249233659, iteration: 286904
loss: 0.9868903756141663,grad_norm: 0.9999990125505004, iteration: 286905
loss: 1.079349398612976,grad_norm: 0.9607159277507176, iteration: 286906
loss: 0.9497030377388,grad_norm: 0.999999451010828, iteration: 286907
loss: 0.9892151951789856,grad_norm: 0.7233732767710189, iteration: 286908
loss: 1.0232566595077515,grad_norm: 0.998563340255845, iteration: 286909
loss: 1.0415927171707153,grad_norm: 0.9999999658832497, iteration: 286910
loss: 1.0830552577972412,grad_norm: 0.9999999245908244, iteration: 286911
loss: 1.0057573318481445,grad_norm: 0.999999656886942, iteration: 286912
loss: 1.0222405195236206,grad_norm: 0.9077010101855998, iteration: 286913
loss: 1.0084928274154663,grad_norm: 0.9999992086653084, iteration: 286914
loss: 0.9937633872032166,grad_norm: 0.8025130524322491, iteration: 286915
loss: 1.0680227279663086,grad_norm: 0.999999380437953, iteration: 286916
loss: 1.0486319065093994,grad_norm: 0.8459202930282793, iteration: 286917
loss: 0.9882655739784241,grad_norm: 0.8481205708261302, iteration: 286918
loss: 1.1477277278900146,grad_norm: 0.9999998129897731, iteration: 286919
loss: 0.9618136882781982,grad_norm: 0.749384206120073, iteration: 286920
loss: 1.0075469017028809,grad_norm: 0.9999998291427351, iteration: 286921
loss: 1.0009677410125732,grad_norm: 0.8536379469694134, iteration: 286922
loss: 0.9871793389320374,grad_norm: 0.7574889704223366, iteration: 286923
loss: 1.2246403694152832,grad_norm: 0.9999997153293944, iteration: 286924
loss: 1.0145293474197388,grad_norm: 0.9999993084195717, iteration: 286925
loss: 1.0273256301879883,grad_norm: 0.9999998983944502, iteration: 286926
loss: 0.9588151574134827,grad_norm: 0.9999991482905348, iteration: 286927
loss: 0.998716413974762,grad_norm: 0.769642610477012, iteration: 286928
loss: 1.151644229888916,grad_norm: 0.9999994838912372, iteration: 286929
loss: 0.9825780391693115,grad_norm: 0.9610647449658309, iteration: 286930
loss: 1.1169523000717163,grad_norm: 0.9999991097991937, iteration: 286931
loss: 0.9887725114822388,grad_norm: 0.7870490302857419, iteration: 286932
loss: 1.012147068977356,grad_norm: 0.7353980424718757, iteration: 286933
loss: 0.9975465536117554,grad_norm: 0.7734664476533364, iteration: 286934
loss: 0.9862620234489441,grad_norm: 0.9690689346966231, iteration: 286935
loss: 1.0221233367919922,grad_norm: 0.9736021523924073, iteration: 286936
loss: 1.1107323169708252,grad_norm: 0.9999990270939029, iteration: 286937
loss: 0.9659423828125,grad_norm: 0.9173476709706526, iteration: 286938
loss: 1.0759674310684204,grad_norm: 0.9999998191287353, iteration: 286939
loss: 1.006169319152832,grad_norm: 0.908986701875762, iteration: 286940
loss: 1.1036826372146606,grad_norm: 0.9999997953273608, iteration: 286941
loss: 1.184895634651184,grad_norm: 0.9999995038786897, iteration: 286942
loss: 1.022709608078003,grad_norm: 0.8952399641868949, iteration: 286943
loss: 1.0738801956176758,grad_norm: 0.9999990502825291, iteration: 286944
loss: 1.033774495124817,grad_norm: 0.9041907852855549, iteration: 286945
loss: 1.0276265144348145,grad_norm: 0.8469637009000206, iteration: 286946
loss: 1.0308256149291992,grad_norm: 0.9999993082240127, iteration: 286947
loss: 1.0057806968688965,grad_norm: 0.7910336486124556, iteration: 286948
loss: 1.0382626056671143,grad_norm: 0.751031136805689, iteration: 286949
loss: 1.053397536277771,grad_norm: 0.9999999188483576, iteration: 286950
loss: 1.085837483406067,grad_norm: 0.9999998348052501, iteration: 286951
loss: 1.080843448638916,grad_norm: 0.9999999817867611, iteration: 286952
loss: 1.0561814308166504,grad_norm: 0.9999998776802583, iteration: 286953
loss: 1.059491753578186,grad_norm: 0.9999990376821339, iteration: 286954
loss: 1.1072921752929688,grad_norm: 0.9999998004128422, iteration: 286955
loss: 0.9861510992050171,grad_norm: 0.9771634963472211, iteration: 286956
loss: 1.0343412160873413,grad_norm: 0.9717381902056802, iteration: 286957
loss: 1.096136212348938,grad_norm: 0.999999215131249, iteration: 286958
loss: 1.1362605094909668,grad_norm: 0.999999860994056, iteration: 286959
loss: 1.0683801174163818,grad_norm: 0.9999991901791447, iteration: 286960
loss: 1.0174189805984497,grad_norm: 0.999999364147866, iteration: 286961
loss: 0.9885574579238892,grad_norm: 0.9246548860850755, iteration: 286962
loss: 1.163600206375122,grad_norm: 0.9999996337909128, iteration: 286963
loss: 0.977422297000885,grad_norm: 0.9550240871242069, iteration: 286964
loss: 0.983801007270813,grad_norm: 0.9556140990529441, iteration: 286965
loss: 1.1011226177215576,grad_norm: 0.9999996851015958, iteration: 286966
loss: 1.1837419271469116,grad_norm: 0.9999992352245523, iteration: 286967
loss: 1.0162804126739502,grad_norm: 0.9999992108198918, iteration: 286968
loss: 0.9916844367980957,grad_norm: 0.8080490718343442, iteration: 286969
loss: 0.990035355091095,grad_norm: 1.0000000679622436, iteration: 286970
loss: 1.0524331331253052,grad_norm: 0.9999992808370806, iteration: 286971
loss: 1.0450682640075684,grad_norm: 0.9999998879844366, iteration: 286972
loss: 1.0025393962860107,grad_norm: 0.999999194806523, iteration: 286973
loss: 0.9694182872772217,grad_norm: 0.8396922880835743, iteration: 286974
loss: 1.026656985282898,grad_norm: 0.9999996891961027, iteration: 286975
loss: 0.987280011177063,grad_norm: 0.9999991613944222, iteration: 286976
loss: 1.0420079231262207,grad_norm: 0.9479439679204783, iteration: 286977
loss: 1.0021041631698608,grad_norm: 0.7999677388046942, iteration: 286978
loss: 0.9969093203544617,grad_norm: 0.8159666108742157, iteration: 286979
loss: 0.9885411262512207,grad_norm: 0.9108121127151688, iteration: 286980
loss: 1.02585768699646,grad_norm: 0.8529969448894226, iteration: 286981
loss: 1.0347756147384644,grad_norm: 0.9999994190820611, iteration: 286982
loss: 0.960468590259552,grad_norm: 0.8665109143518822, iteration: 286983
loss: 1.0545377731323242,grad_norm: 0.9999990969523749, iteration: 286984
loss: 1.0711936950683594,grad_norm: 0.9558801640287995, iteration: 286985
loss: 1.0215290784835815,grad_norm: 0.9520933203462049, iteration: 286986
loss: 1.0221569538116455,grad_norm: 0.8853411661613204, iteration: 286987
loss: 1.0272129774093628,grad_norm: 0.8386924870920319, iteration: 286988
loss: 0.9695567488670349,grad_norm: 0.999999231146298, iteration: 286989
loss: 0.9819823503494263,grad_norm: 0.897461681847818, iteration: 286990
loss: 1.0354819297790527,grad_norm: 0.9999997441151527, iteration: 286991
loss: 1.0013624429702759,grad_norm: 0.7442156644479511, iteration: 286992
loss: 1.0572445392608643,grad_norm: 0.9999997373683249, iteration: 286993
loss: 1.0407291650772095,grad_norm: 0.8950208263650894, iteration: 286994
loss: 1.0092582702636719,grad_norm: 0.7928580992829289, iteration: 286995
loss: 1.0003198385238647,grad_norm: 0.735634416892858, iteration: 286996
loss: 1.008870244026184,grad_norm: 0.930815930999995, iteration: 286997
loss: 1.0612263679504395,grad_norm: 0.9999991193896053, iteration: 286998
loss: 1.085045576095581,grad_norm: 0.9999998775304616, iteration: 286999
loss: 1.0910274982452393,grad_norm: 0.9999989454043388, iteration: 287000
loss: 1.0381829738616943,grad_norm: 0.9999998194707073, iteration: 287001
loss: 1.1628236770629883,grad_norm: 1.0000000273247402, iteration: 287002
loss: 1.0036989450454712,grad_norm: 0.9167999303066404, iteration: 287003
loss: 1.0674946308135986,grad_norm: 0.999999630413268, iteration: 287004
loss: 1.0254693031311035,grad_norm: 0.964441300164027, iteration: 287005
loss: 1.1197627782821655,grad_norm: 0.9999996626667049, iteration: 287006
loss: 1.0501633882522583,grad_norm: 0.9999996441441866, iteration: 287007
loss: 1.0796602964401245,grad_norm: 0.9999998195639306, iteration: 287008
loss: 0.9960747361183167,grad_norm: 0.8858162030126159, iteration: 287009
loss: 1.0166943073272705,grad_norm: 0.8484376598859796, iteration: 287010
loss: 0.9586286544799805,grad_norm: 0.890805428918646, iteration: 287011
loss: 0.9934470653533936,grad_norm: 0.9448728263635284, iteration: 287012
loss: 1.044622540473938,grad_norm: 0.8198724601887821, iteration: 287013
loss: 1.0448400974273682,grad_norm: 0.9483936936448643, iteration: 287014
loss: 1.1126328706741333,grad_norm: 0.999999312236942, iteration: 287015
loss: 0.9628481268882751,grad_norm: 0.999999109808872, iteration: 287016
loss: 1.0767871141433716,grad_norm: 0.9999996854279052, iteration: 287017
loss: 1.0637904405593872,grad_norm: 0.999999728933065, iteration: 287018
loss: 1.0347729921340942,grad_norm: 0.999999252177192, iteration: 287019
loss: 1.0491265058517456,grad_norm: 0.9999999529681427, iteration: 287020
loss: 1.0433701276779175,grad_norm: 0.9647334426058538, iteration: 287021
loss: 1.024786114692688,grad_norm: 0.9938745964668075, iteration: 287022
loss: 1.0113525390625,grad_norm: 0.8495543425871626, iteration: 287023
loss: 1.1275312900543213,grad_norm: 0.999999132455938, iteration: 287024
loss: 1.0380821228027344,grad_norm: 0.9999998509517105, iteration: 287025
loss: 0.9655508995056152,grad_norm: 0.9999999256413346, iteration: 287026
loss: 1.0337902307510376,grad_norm: 0.9999996054139236, iteration: 287027
loss: 1.107518196105957,grad_norm: 0.8531138075896461, iteration: 287028
loss: 0.9948078989982605,grad_norm: 0.7447724645477392, iteration: 287029
loss: 0.9604368805885315,grad_norm: 0.7425751545161597, iteration: 287030
loss: 1.0975416898727417,grad_norm: 0.9999995925532552, iteration: 287031
loss: 1.1076509952545166,grad_norm: 0.8939117391359276, iteration: 287032
loss: 1.0267953872680664,grad_norm: 0.9247150306163269, iteration: 287033
loss: 1.0456165075302124,grad_norm: 0.8475578892294938, iteration: 287034
loss: 1.1367214918136597,grad_norm: 0.9999999539261526, iteration: 287035
loss: 1.040640950202942,grad_norm: 0.9531071784544635, iteration: 287036
loss: 1.0264798402786255,grad_norm: 0.9999995667281157, iteration: 287037
loss: 1.0177761316299438,grad_norm: 0.999999636143726, iteration: 287038
loss: 0.9905038475990295,grad_norm: 0.9447408374616968, iteration: 287039
loss: 0.9940457940101624,grad_norm: 0.9628548967583701, iteration: 287040
loss: 1.005026936531067,grad_norm: 0.9999998135945004, iteration: 287041
loss: 1.0788758993148804,grad_norm: 0.9999990728720446, iteration: 287042
loss: 0.9835752844810486,grad_norm: 0.6710848404728074, iteration: 287043
loss: 0.9756430387496948,grad_norm: 0.8010626458910974, iteration: 287044
loss: 1.0413402318954468,grad_norm: 0.9327339232178247, iteration: 287045
loss: 1.051308512687683,grad_norm: 0.9581886853200816, iteration: 287046
loss: 1.0340238809585571,grad_norm: 0.9999993206943465, iteration: 287047
loss: 1.0812402963638306,grad_norm: 0.9999998297189322, iteration: 287048
loss: 1.0515750646591187,grad_norm: 0.9999997380934378, iteration: 287049
loss: 1.0145533084869385,grad_norm: 0.9999993712219413, iteration: 287050
loss: 1.0057607889175415,grad_norm: 0.8121781031569149, iteration: 287051
loss: 0.9956008791923523,grad_norm: 0.8243269365861801, iteration: 287052
loss: 0.9989638924598694,grad_norm: 0.7811181182545323, iteration: 287053
loss: 1.038839340209961,grad_norm: 0.9999990566642747, iteration: 287054
loss: 1.0121383666992188,grad_norm: 0.6766089177641847, iteration: 287055
loss: 1.0488835573196411,grad_norm: 0.9999991630970276, iteration: 287056
loss: 1.0164506435394287,grad_norm: 0.7360880610119293, iteration: 287057
loss: 1.0155056715011597,grad_norm: 0.9999999724253165, iteration: 287058
loss: 1.047615647315979,grad_norm: 0.7686193606755477, iteration: 287059
loss: 1.0099012851715088,grad_norm: 0.8481892888910144, iteration: 287060
loss: 1.0569796562194824,grad_norm: 0.9999991435595894, iteration: 287061
loss: 1.0142037868499756,grad_norm: 0.9999991503961012, iteration: 287062
loss: 1.0725511312484741,grad_norm: 0.8967388085054845, iteration: 287063
loss: 1.0236499309539795,grad_norm: 0.845889627476684, iteration: 287064
loss: 1.052521824836731,grad_norm: 0.9851966820318276, iteration: 287065
loss: 1.023375153541565,grad_norm: 0.8838832110079439, iteration: 287066
loss: 1.0011005401611328,grad_norm: 0.9999994580179961, iteration: 287067
loss: 0.9940564036369324,grad_norm: 0.7806422858307613, iteration: 287068
loss: 1.0512748956680298,grad_norm: 0.99999924501589, iteration: 287069
loss: 1.0247656106948853,grad_norm: 0.9999990678173432, iteration: 287070
loss: 1.0051701068878174,grad_norm: 0.999999470745192, iteration: 287071
loss: 1.0064570903778076,grad_norm: 0.9051084622210573, iteration: 287072
loss: 1.0608103275299072,grad_norm: 0.999999973312607, iteration: 287073
loss: 1.0579215288162231,grad_norm: 0.8466309902544956, iteration: 287074
loss: 0.9912469387054443,grad_norm: 0.9081782598834508, iteration: 287075
loss: 1.0784488916397095,grad_norm: 0.9999999507838794, iteration: 287076
loss: 0.9761355519294739,grad_norm: 0.8528549065288393, iteration: 287077
loss: 1.005292534828186,grad_norm: 0.8389912547462085, iteration: 287078
loss: 1.0397677421569824,grad_norm: 0.7669696387667968, iteration: 287079
loss: 0.991102933883667,grad_norm: 0.8319869810913972, iteration: 287080
loss: 0.9908923506736755,grad_norm: 0.9999991390270585, iteration: 287081
loss: 0.9902324676513672,grad_norm: 0.9999993552402828, iteration: 287082
loss: 0.9662598967552185,grad_norm: 0.8503865489806895, iteration: 287083
loss: 0.9890610575675964,grad_norm: 0.8690504582503898, iteration: 287084
loss: 1.014182209968567,grad_norm: 0.999999152291127, iteration: 287085
loss: 1.0008903741836548,grad_norm: 0.7891639358036194, iteration: 287086
loss: 1.0673692226409912,grad_norm: 0.9999989706610034, iteration: 287087
loss: 1.058761715888977,grad_norm: 0.9950624834733036, iteration: 287088
loss: 1.0130352973937988,grad_norm: 0.9210003020694697, iteration: 287089
loss: 1.0092402696609497,grad_norm: 0.8750633352131459, iteration: 287090
loss: 1.0190740823745728,grad_norm: 0.9999990230330128, iteration: 287091
loss: 0.9985651969909668,grad_norm: 0.9613600934688328, iteration: 287092
loss: 1.0631780624389648,grad_norm: 0.9599778766015319, iteration: 287093
loss: 0.9863378405570984,grad_norm: 0.9149475025949562, iteration: 287094
loss: 1.008790135383606,grad_norm: 0.9999991918920117, iteration: 287095
loss: 0.9644581079483032,grad_norm: 0.7750166924713013, iteration: 287096
loss: 1.0074920654296875,grad_norm: 0.822429189670416, iteration: 287097
loss: 0.9775035381317139,grad_norm: 0.7617819587880711, iteration: 287098
loss: 0.9845407605171204,grad_norm: 0.7979305233968287, iteration: 287099
loss: 1.0474413633346558,grad_norm: 0.9999993167346778, iteration: 287100
loss: 0.9946799874305725,grad_norm: 0.892922837599042, iteration: 287101
loss: 1.014746069908142,grad_norm: 0.9921872041803925, iteration: 287102
loss: 1.0677803754806519,grad_norm: 0.9999991074623418, iteration: 287103
loss: 0.9908096194267273,grad_norm: 0.84124965646333, iteration: 287104
loss: 1.0010449886322021,grad_norm: 0.9572905230891967, iteration: 287105
loss: 1.0256867408752441,grad_norm: 0.9999999027882197, iteration: 287106
loss: 0.9868495464324951,grad_norm: 0.9498301467191982, iteration: 287107
loss: 1.0574029684066772,grad_norm: 0.9453383374757698, iteration: 287108
loss: 1.15459144115448,grad_norm: 0.9999994631288823, iteration: 287109
loss: 1.0227289199829102,grad_norm: 0.9223317484971665, iteration: 287110
loss: 1.017526626586914,grad_norm: 0.9999997716245687, iteration: 287111
loss: 0.9908556938171387,grad_norm: 0.9999990043036756, iteration: 287112
loss: 1.0384581089019775,grad_norm: 0.9999995353777542, iteration: 287113
loss: 1.0298190116882324,grad_norm: 0.734958715816067, iteration: 287114
loss: 0.9838294386863708,grad_norm: 0.8266004131816306, iteration: 287115
loss: 1.0034794807434082,grad_norm: 0.7504975856137253, iteration: 287116
loss: 1.0286465883255005,grad_norm: 0.9940143520198815, iteration: 287117
loss: 0.9481039643287659,grad_norm: 0.7530826315773842, iteration: 287118
loss: 0.9866904616355896,grad_norm: 0.9999993946550317, iteration: 287119
loss: 1.0184017419815063,grad_norm: 0.9612985182103794, iteration: 287120
loss: 1.0008054971694946,grad_norm: 0.9999992507544656, iteration: 287121
loss: 1.0439411401748657,grad_norm: 0.9291335674211757, iteration: 287122
loss: 0.9912959337234497,grad_norm: 0.9999991134422924, iteration: 287123
loss: 0.9867639541625977,grad_norm: 0.9999990161818644, iteration: 287124
loss: 1.0698423385620117,grad_norm: 0.9999992037374702, iteration: 287125
loss: 1.0214507579803467,grad_norm: 0.8882154949191238, iteration: 287126
loss: 1.039408802986145,grad_norm: 0.9999992806828996, iteration: 287127
loss: 0.9892920851707458,grad_norm: 0.8662115503593902, iteration: 287128
loss: 0.9869524836540222,grad_norm: 0.979339106395523, iteration: 287129
loss: 1.0588237047195435,grad_norm: 0.9999995868016496, iteration: 287130
loss: 0.9796666502952576,grad_norm: 0.7101127589236202, iteration: 287131
loss: 1.069920539855957,grad_norm: 0.9999994869628178, iteration: 287132
loss: 1.0067335367202759,grad_norm: 0.8673517860782937, iteration: 287133
loss: 1.020803689956665,grad_norm: 0.9999991964999214, iteration: 287134
loss: 1.007729411125183,grad_norm: 0.8244890270536377, iteration: 287135
loss: 1.0514105558395386,grad_norm: 0.9508559745841285, iteration: 287136
loss: 1.028861403465271,grad_norm: 0.999999539362032, iteration: 287137
loss: 1.0650713443756104,grad_norm: 0.9999995083787013, iteration: 287138
loss: 1.0238933563232422,grad_norm: 0.8137626066289751, iteration: 287139
loss: 1.000702142715454,grad_norm: 0.8717277213774121, iteration: 287140
loss: 1.0274990797042847,grad_norm: 0.9356190612641123, iteration: 287141
loss: 1.0121514797210693,grad_norm: 0.999999336533313, iteration: 287142
loss: 1.0096713304519653,grad_norm: 0.8001418728968444, iteration: 287143
loss: 1.0193843841552734,grad_norm: 0.9578198840255119, iteration: 287144
loss: 1.2021387815475464,grad_norm: 0.9999997444277356, iteration: 287145
loss: 0.9937536120414734,grad_norm: 0.9561268207929313, iteration: 287146
loss: 0.9874979257583618,grad_norm: 0.9091562982745387, iteration: 287147
loss: 1.0030725002288818,grad_norm: 0.9881654695366269, iteration: 287148
loss: 1.033493995666504,grad_norm: 0.8184564433215014, iteration: 287149
loss: 1.0270164012908936,grad_norm: 0.9260417332833126, iteration: 287150
loss: 1.0358511209487915,grad_norm: 0.9729586107379766, iteration: 287151
loss: 1.0327397584915161,grad_norm: 0.8724468194299208, iteration: 287152
loss: 1.0264805555343628,grad_norm: 0.9999990381296305, iteration: 287153
loss: 0.9757683277130127,grad_norm: 0.9999991833636107, iteration: 287154
loss: 1.0449512004852295,grad_norm: 0.9999999980457558, iteration: 287155
loss: 1.001114845275879,grad_norm: 0.9379809187690564, iteration: 287156
loss: 1.0383954048156738,grad_norm: 0.8670641791466894, iteration: 287157
loss: 1.0664448738098145,grad_norm: 0.9999998063931615, iteration: 287158
loss: 1.045331358909607,grad_norm: 0.9999991218508058, iteration: 287159
loss: 0.9934025406837463,grad_norm: 0.9540191030094805, iteration: 287160
loss: 1.0241572856903076,grad_norm: 0.8441578980764582, iteration: 287161
loss: 0.996683657169342,grad_norm: 0.8570055512130798, iteration: 287162
loss: 0.9887316226959229,grad_norm: 0.8275172088427449, iteration: 287163
loss: 1.0296638011932373,grad_norm: 0.8280986010236563, iteration: 287164
loss: 1.055423617362976,grad_norm: 0.8061271261348486, iteration: 287165
loss: 1.0025972127914429,grad_norm: 0.7553210426003183, iteration: 287166
loss: 1.0759764909744263,grad_norm: 0.8665217283161357, iteration: 287167
loss: 0.9752194285392761,grad_norm: 0.9999999991491306, iteration: 287168
loss: 0.971164882183075,grad_norm: 0.8704268144707723, iteration: 287169
loss: 0.9961406588554382,grad_norm: 0.8984009256870936, iteration: 287170
loss: 1.0198428630828857,grad_norm: 0.8236272100748732, iteration: 287171
loss: 0.9646375775337219,grad_norm: 0.8324169064776099, iteration: 287172
loss: 1.025011658668518,grad_norm: 0.883317354859222, iteration: 287173
loss: 1.0399854183197021,grad_norm: 0.9057778166789604, iteration: 287174
loss: 1.011101245880127,grad_norm: 0.7925192174697653, iteration: 287175
loss: 0.9876211881637573,grad_norm: 0.8732366358336562, iteration: 287176
loss: 1.0515713691711426,grad_norm: 0.9999994619737042, iteration: 287177
loss: 0.9783769845962524,grad_norm: 0.9270084234257312, iteration: 287178
loss: 1.0008474588394165,grad_norm: 0.9587436292087833, iteration: 287179
loss: 1.0314747095108032,grad_norm: 0.8741417717954619, iteration: 287180
loss: 0.9775583744049072,grad_norm: 0.7808615388537676, iteration: 287181
loss: 0.982397735118866,grad_norm: 0.7906526346916637, iteration: 287182
loss: 0.9877171516418457,grad_norm: 0.7995698126693436, iteration: 287183
loss: 0.987048327922821,grad_norm: 0.7153404333569758, iteration: 287184
loss: 0.9807764887809753,grad_norm: 0.7603369580674839, iteration: 287185
loss: 1.0155044794082642,grad_norm: 0.8394714812238239, iteration: 287186
loss: 1.0626864433288574,grad_norm: 0.8435472023842064, iteration: 287187
loss: 1.0644395351409912,grad_norm: 0.8730922927607776, iteration: 287188
loss: 1.026779055595398,grad_norm: 0.8974622519478959, iteration: 287189
loss: 1.0172467231750488,grad_norm: 0.912023488671507, iteration: 287190
loss: 0.9990354776382446,grad_norm: 0.9315520889802135, iteration: 287191
loss: 1.0309984683990479,grad_norm: 0.9999999361266115, iteration: 287192
loss: 1.004689335823059,grad_norm: 0.8241848245374672, iteration: 287193
loss: 0.9921211004257202,grad_norm: 0.9999994165825703, iteration: 287194
loss: 1.0712507963180542,grad_norm: 0.9701722884621563, iteration: 287195
loss: 0.9914650917053223,grad_norm: 0.8677066405907411, iteration: 287196
loss: 1.0067729949951172,grad_norm: 0.8063183554294224, iteration: 287197
loss: 1.001058578491211,grad_norm: 0.83261445623042, iteration: 287198
loss: 0.9818283319473267,grad_norm: 0.8276431780512887, iteration: 287199
loss: 0.9906195402145386,grad_norm: 0.9508488455999661, iteration: 287200
loss: 1.1047122478485107,grad_norm: 0.9999994500003431, iteration: 287201
loss: 0.9923716187477112,grad_norm: 0.925816960036828, iteration: 287202
loss: 0.9679980874061584,grad_norm: 0.9981622868692275, iteration: 287203
loss: 1.044918179512024,grad_norm: 0.9999989713200386, iteration: 287204
loss: 1.0092136859893799,grad_norm: 0.9999991991092761, iteration: 287205
loss: 1.001885175704956,grad_norm: 0.8357388094124774, iteration: 287206
loss: 1.029678463935852,grad_norm: 0.9426369302439049, iteration: 287207
loss: 0.9643015265464783,grad_norm: 0.7239078982973084, iteration: 287208
loss: 1.0523340702056885,grad_norm: 0.9999992232236388, iteration: 287209
loss: 0.9782097935676575,grad_norm: 0.8933098480335989, iteration: 287210
loss: 0.9962256550788879,grad_norm: 0.9401527283970351, iteration: 287211
loss: 1.0542229413986206,grad_norm: 0.9617017629650955, iteration: 287212
loss: 1.0144836902618408,grad_norm: 0.9455569911772244, iteration: 287213
loss: 1.0023359060287476,grad_norm: 0.9999999769244003, iteration: 287214
loss: 0.9688615798950195,grad_norm: 0.9105484507468674, iteration: 287215
loss: 0.9977350234985352,grad_norm: 0.8214481418781056, iteration: 287216
loss: 1.0306514501571655,grad_norm: 0.9999998667830803, iteration: 287217
loss: 0.983024001121521,grad_norm: 0.9185724144485403, iteration: 287218
loss: 1.0189588069915771,grad_norm: 0.8969805680076922, iteration: 287219
loss: 1.0171229839324951,grad_norm: 0.9466330602539922, iteration: 287220
loss: 0.9694395661354065,grad_norm: 0.9394987899284788, iteration: 287221
loss: 1.0370694398880005,grad_norm: 0.9999997086996776, iteration: 287222
loss: 1.0641567707061768,grad_norm: 0.9999991326705281, iteration: 287223
loss: 1.0999070405960083,grad_norm: 0.8920917722274186, iteration: 287224
loss: 0.9900926947593689,grad_norm: 0.8897371182410618, iteration: 287225
loss: 0.9933085441589355,grad_norm: 0.768892110999867, iteration: 287226
loss: 0.9435202479362488,grad_norm: 0.7388874729376187, iteration: 287227
loss: 1.0152575969696045,grad_norm: 0.9999997605722026, iteration: 287228
loss: 1.0331202745437622,grad_norm: 0.7463801813521485, iteration: 287229
loss: 0.9701741337776184,grad_norm: 0.8442605889208424, iteration: 287230
loss: 0.9809534549713135,grad_norm: 0.818017231432209, iteration: 287231
loss: 1.0288242101669312,grad_norm: 0.9999997564724321, iteration: 287232
loss: 1.036924123764038,grad_norm: 0.9999998897814325, iteration: 287233
loss: 1.0736490488052368,grad_norm: 0.8233139904930161, iteration: 287234
loss: 1.0419244766235352,grad_norm: 0.8511841403492558, iteration: 287235
loss: 0.9628763794898987,grad_norm: 0.7120969441024262, iteration: 287236
loss: 1.0355989933013916,grad_norm: 0.9053302859062063, iteration: 287237
loss: 0.9855481386184692,grad_norm: 0.7632589169067587, iteration: 287238
loss: 0.9719122648239136,grad_norm: 0.9471035551079314, iteration: 287239
loss: 0.9975521564483643,grad_norm: 0.837722852855478, iteration: 287240
loss: 0.9706686735153198,grad_norm: 0.8381612602052375, iteration: 287241
loss: 0.9719176888465881,grad_norm: 0.8467165486883219, iteration: 287242
loss: 1.090480089187622,grad_norm: 0.991702367553291, iteration: 287243
loss: 0.9887383580207825,grad_norm: 0.8736296966018501, iteration: 287244
loss: 1.0124564170837402,grad_norm: 0.8619152423136812, iteration: 287245
loss: 1.0355135202407837,grad_norm: 0.9837140384615589, iteration: 287246
loss: 0.9425622224807739,grad_norm: 0.9000121821263614, iteration: 287247
loss: 1.0339046716690063,grad_norm: 0.8945890559567378, iteration: 287248
loss: 1.0119946002960205,grad_norm: 0.9999996074052493, iteration: 287249
loss: 1.0768098831176758,grad_norm: 0.958204246332341, iteration: 287250
loss: 0.9965395927429199,grad_norm: 0.7725730409555276, iteration: 287251
loss: 1.0193383693695068,grad_norm: 0.9999991616689268, iteration: 287252
loss: 1.0428569316864014,grad_norm: 0.8725212843023972, iteration: 287253
loss: 0.9718946814537048,grad_norm: 0.8303388092905195, iteration: 287254
loss: 0.9727412462234497,grad_norm: 0.8771778280398143, iteration: 287255
loss: 1.0053192377090454,grad_norm: 0.7342081135821539, iteration: 287256
loss: 0.9982938170433044,grad_norm: 0.8909222232344401, iteration: 287257
loss: 0.9927642941474915,grad_norm: 0.8609491985031854, iteration: 287258
loss: 0.9881740808486938,grad_norm: 0.8596080928797488, iteration: 287259
loss: 1.0249578952789307,grad_norm: 0.9389695528516399, iteration: 287260
loss: 1.1047303676605225,grad_norm: 0.9389939270103717, iteration: 287261
loss: 0.9953024983406067,grad_norm: 0.8642225941471903, iteration: 287262
loss: 0.9642481803894043,grad_norm: 0.7855261451517351, iteration: 287263
loss: 1.066410779953003,grad_norm: 0.999999068506981, iteration: 287264
loss: 1.0047780275344849,grad_norm: 0.8628927731820821, iteration: 287265
loss: 1.0027918815612793,grad_norm: 0.7453530841341993, iteration: 287266
loss: 1.013379693031311,grad_norm: 0.8573992744181583, iteration: 287267
loss: 1.007494330406189,grad_norm: 0.7710983672859976, iteration: 287268
loss: 0.9929733276367188,grad_norm: 0.9999991784503633, iteration: 287269
loss: 1.0560485124588013,grad_norm: 0.7802246612833798, iteration: 287270
loss: 0.9940669536590576,grad_norm: 0.8496771586475155, iteration: 287271
loss: 0.9750887751579285,grad_norm: 0.7221337362862299, iteration: 287272
loss: 1.0037142038345337,grad_norm: 0.7569094405030623, iteration: 287273
loss: 1.0087093114852905,grad_norm: 0.9198962430575225, iteration: 287274
loss: 0.9667006731033325,grad_norm: 0.8728632177508892, iteration: 287275
loss: 0.9946016073226929,grad_norm: 0.755812973431796, iteration: 287276
loss: 1.033250093460083,grad_norm: 0.9999997091804486, iteration: 287277
loss: 1.0007226467132568,grad_norm: 0.7367603580152275, iteration: 287278
loss: 0.952081024646759,grad_norm: 0.8753193205794839, iteration: 287279
loss: 0.9616576433181763,grad_norm: 0.8827655320254227, iteration: 287280
loss: 0.9499946236610413,grad_norm: 0.7729699162234858, iteration: 287281
loss: 1.0143258571624756,grad_norm: 0.6791094294974312, iteration: 287282
loss: 0.9960739016532898,grad_norm: 0.9065753171333321, iteration: 287283
loss: 0.9920726418495178,grad_norm: 0.9999991045330383, iteration: 287284
loss: 1.032221794128418,grad_norm: 0.8788898824182778, iteration: 287285
loss: 1.0042320489883423,grad_norm: 0.7727267562971746, iteration: 287286
loss: 0.9910277724266052,grad_norm: 0.8110611451960761, iteration: 287287
loss: 1.0064880847930908,grad_norm: 0.864348357261397, iteration: 287288
loss: 1.0354905128479004,grad_norm: 0.961076634149334, iteration: 287289
loss: 1.075530767440796,grad_norm: 0.9999995809603013, iteration: 287290
loss: 1.0341172218322754,grad_norm: 0.7983841605222138, iteration: 287291
loss: 0.9966967105865479,grad_norm: 0.9532539063767331, iteration: 287292
loss: 1.0249353647232056,grad_norm: 0.8149255364981858, iteration: 287293
loss: 1.0000978708267212,grad_norm: 0.8718844477179065, iteration: 287294
loss: 1.1577258110046387,grad_norm: 1.0000000082377172, iteration: 287295
loss: 0.9466883540153503,grad_norm: 0.9242361378995597, iteration: 287296
loss: 1.0225961208343506,grad_norm: 0.9211364833770844, iteration: 287297
loss: 0.9696775078773499,grad_norm: 0.8491945283443603, iteration: 287298
loss: 1.0063855648040771,grad_norm: 0.9706218110542378, iteration: 287299
loss: 0.9915620684623718,grad_norm: 0.8945840174923053, iteration: 287300
loss: 0.9983282685279846,grad_norm: 0.6919226963170391, iteration: 287301
loss: 1.0155916213989258,grad_norm: 0.8417480741214428, iteration: 287302
loss: 0.9639543294906616,grad_norm: 0.8820688891992121, iteration: 287303
loss: 0.9669250845909119,grad_norm: 0.7401294718392869, iteration: 287304
loss: 1.0283631086349487,grad_norm: 0.7367180114152123, iteration: 287305
loss: 0.9762400388717651,grad_norm: 0.7682845360050932, iteration: 287306
loss: 0.9890445470809937,grad_norm: 0.9999990299158185, iteration: 287307
loss: 1.0533405542373657,grad_norm: 0.9999997933737507, iteration: 287308
loss: 1.033003807067871,grad_norm: 0.8192018731231845, iteration: 287309
loss: 1.057451844215393,grad_norm: 0.985111912222213, iteration: 287310
loss: 1.001800537109375,grad_norm: 0.9999993326460868, iteration: 287311
loss: 1.0174001455307007,grad_norm: 0.9219344376727729, iteration: 287312
loss: 1.2013388872146606,grad_norm: 0.9567364861853042, iteration: 287313
loss: 1.0044012069702148,grad_norm: 0.7382583805736717, iteration: 287314
loss: 0.9729235172271729,grad_norm: 0.7745770326843201, iteration: 287315
loss: 0.9809251427650452,grad_norm: 0.9999992154253673, iteration: 287316
loss: 1.0232864618301392,grad_norm: 0.9766179035482015, iteration: 287317
loss: 0.9973487257957458,grad_norm: 0.8673332739744168, iteration: 287318
loss: 0.9851712584495544,grad_norm: 0.7682443586509121, iteration: 287319
loss: 1.0652391910552979,grad_norm: 0.7767815009260747, iteration: 287320
loss: 1.0232902765274048,grad_norm: 0.9999992721646679, iteration: 287321
loss: 1.0082203149795532,grad_norm: 0.7503723752030772, iteration: 287322
loss: 1.0776236057281494,grad_norm: 0.9837639546607065, iteration: 287323
loss: 1.086058497428894,grad_norm: 0.9999993817944782, iteration: 287324
loss: 1.1259547472000122,grad_norm: 0.9999995917169258, iteration: 287325
loss: 0.9727276563644409,grad_norm: 0.8645987657827845, iteration: 287326
loss: 1.02622389793396,grad_norm: 0.8584244696957822, iteration: 287327
loss: 0.985439121723175,grad_norm: 0.98008928384447, iteration: 287328
loss: 1.02845299243927,grad_norm: 0.7431622845825889, iteration: 287329
loss: 1.005695104598999,grad_norm: 0.8829010701460871, iteration: 287330
loss: 1.0649031400680542,grad_norm: 0.999999869981786, iteration: 287331
loss: 1.0099397897720337,grad_norm: 0.6707067236634958, iteration: 287332
loss: 1.000359296798706,grad_norm: 0.8784791852448506, iteration: 287333
loss: 1.0102678537368774,grad_norm: 0.8743985229795082, iteration: 287334
loss: 0.9619261622428894,grad_norm: 0.8684805088301641, iteration: 287335
loss: 1.0140284299850464,grad_norm: 0.9999991758552871, iteration: 287336
loss: 0.9991652965545654,grad_norm: 0.9145611777410333, iteration: 287337
loss: 1.045923113822937,grad_norm: 0.8635499045886326, iteration: 287338
loss: 1.0680115222930908,grad_norm: 0.9374711210576933, iteration: 287339
loss: 1.0517065525054932,grad_norm: 0.999999153830496, iteration: 287340
loss: 0.9747576713562012,grad_norm: 0.9999990997086625, iteration: 287341
loss: 0.998464047908783,grad_norm: 0.8840335455922697, iteration: 287342
loss: 1.0288960933685303,grad_norm: 0.9999998849069781, iteration: 287343
loss: 1.0431935787200928,grad_norm: 1.0000000726469724, iteration: 287344
loss: 0.9822134971618652,grad_norm: 0.8823429105519772, iteration: 287345
loss: 1.117897391319275,grad_norm: 0.9999994247470662, iteration: 287346
loss: 1.0073992013931274,grad_norm: 0.7298214456312625, iteration: 287347
loss: 1.0371514558792114,grad_norm: 0.900133594914227, iteration: 287348
loss: 1.0791195631027222,grad_norm: 0.9999999534736814, iteration: 287349
loss: 1.031630516052246,grad_norm: 0.9999990549284499, iteration: 287350
loss: 1.010347843170166,grad_norm: 0.7642525222441773, iteration: 287351
loss: 0.9957854747772217,grad_norm: 0.9999991278850113, iteration: 287352
loss: 0.9855547547340393,grad_norm: 0.9999992190146487, iteration: 287353
loss: 1.02967369556427,grad_norm: 0.985657019007045, iteration: 287354
loss: 0.9561964273452759,grad_norm: 0.8610397178762726, iteration: 287355
loss: 1.0406383275985718,grad_norm: 0.9506889922004015, iteration: 287356
loss: 0.9967935681343079,grad_norm: 0.8394750281570051, iteration: 287357
loss: 0.9879756569862366,grad_norm: 0.8470253900100982, iteration: 287358
loss: 1.0315272808074951,grad_norm: 0.9999991637045222, iteration: 287359
loss: 1.0064629316329956,grad_norm: 0.8710294303291446, iteration: 287360
loss: 1.0370103120803833,grad_norm: 0.9999996506633558, iteration: 287361
loss: 1.007187843322754,grad_norm: 0.9999998546993113, iteration: 287362
loss: 1.022791862487793,grad_norm: 0.9809048334111007, iteration: 287363
loss: 0.9876459836959839,grad_norm: 0.7932169658532182, iteration: 287364
loss: 1.0270816087722778,grad_norm: 0.8561787420866901, iteration: 287365
loss: 1.0154539346694946,grad_norm: 0.999999029485666, iteration: 287366
loss: 0.9768195152282715,grad_norm: 0.9656905870839851, iteration: 287367
loss: 1.0208059549331665,grad_norm: 0.9999989893001854, iteration: 287368
loss: 0.9893501400947571,grad_norm: 0.7768233833325372, iteration: 287369
loss: 0.9939326047897339,grad_norm: 0.9705837192421186, iteration: 287370
loss: 0.9898910522460938,grad_norm: 0.7997135109222302, iteration: 287371
loss: 0.9926612973213196,grad_norm: 0.8396889229706508, iteration: 287372
loss: 1.032252550125122,grad_norm: 0.846908675528846, iteration: 287373
loss: 1.0027663707733154,grad_norm: 0.9453273070760697, iteration: 287374
loss: 0.9692220091819763,grad_norm: 0.9049866855661646, iteration: 287375
loss: 1.03361976146698,grad_norm: 0.9999995626916109, iteration: 287376
loss: 1.0102568864822388,grad_norm: 0.738256269183879, iteration: 287377
loss: 0.9869132041931152,grad_norm: 0.881218852978237, iteration: 287378
loss: 1.0190025568008423,grad_norm: 0.8951753074670746, iteration: 287379
loss: 0.994831919670105,grad_norm: 0.999999196111633, iteration: 287380
loss: 1.001466155052185,grad_norm: 0.9871949172528985, iteration: 287381
loss: 1.1012704372406006,grad_norm: 0.8771378084947615, iteration: 287382
loss: 1.021186113357544,grad_norm: 0.9999990354067113, iteration: 287383
loss: 0.9872782230377197,grad_norm: 0.7430035271313129, iteration: 287384
loss: 0.9603071808815002,grad_norm: 0.827539313058675, iteration: 287385
loss: 0.9949089884757996,grad_norm: 0.8339188861901309, iteration: 287386
loss: 1.0073981285095215,grad_norm: 0.9945974166292041, iteration: 287387
loss: 0.9749560952186584,grad_norm: 0.8504991883045601, iteration: 287388
loss: 0.9858466982841492,grad_norm: 0.9743165847831188, iteration: 287389
loss: 0.9934715032577515,grad_norm: 0.9299380805910156, iteration: 287390
loss: 1.013043999671936,grad_norm: 0.9348892297221784, iteration: 287391
loss: 1.0092551708221436,grad_norm: 0.8581049920440356, iteration: 287392
loss: 0.999761164188385,grad_norm: 0.7998477243506911, iteration: 287393
loss: 1.062086582183838,grad_norm: 0.9999990527970469, iteration: 287394
loss: 0.9882089495658875,grad_norm: 0.8257887002411162, iteration: 287395
loss: 0.9923145771026611,grad_norm: 0.8262084135318944, iteration: 287396
loss: 1.0742915868759155,grad_norm: 0.9543340455787075, iteration: 287397
loss: 1.0164939165115356,grad_norm: 0.9379437886907774, iteration: 287398
loss: 1.0331345796585083,grad_norm: 0.9999992201502267, iteration: 287399
loss: 1.0120658874511719,grad_norm: 0.7776575429514054, iteration: 287400
loss: 0.9836178421974182,grad_norm: 0.7231437728289105, iteration: 287401
loss: 0.9727292656898499,grad_norm: 0.8782124966014513, iteration: 287402
loss: 1.0154941082000732,grad_norm: 0.8184859143977034, iteration: 287403
loss: 1.019843339920044,grad_norm: 0.9999991166284795, iteration: 287404
loss: 1.0016224384307861,grad_norm: 0.8161117541552546, iteration: 287405
loss: 1.0517997741699219,grad_norm: 0.863266202513784, iteration: 287406
loss: 1.0000720024108887,grad_norm: 0.9999999787918978, iteration: 287407
loss: 1.0256849527359009,grad_norm: 0.8184262852752868, iteration: 287408
loss: 1.0033048391342163,grad_norm: 0.928414284075602, iteration: 287409
loss: 1.0186009407043457,grad_norm: 0.8117262335262866, iteration: 287410
loss: 0.9864593148231506,grad_norm: 0.795987555781085, iteration: 287411
loss: 0.9925034046173096,grad_norm: 0.9999990840878156, iteration: 287412
loss: 1.001156210899353,grad_norm: 0.832186442275601, iteration: 287413
loss: 0.9851011037826538,grad_norm: 0.9999991943365886, iteration: 287414
loss: 1.021280288696289,grad_norm: 0.8448035996414465, iteration: 287415
loss: 1.0039105415344238,grad_norm: 0.8954369056326629, iteration: 287416
loss: 1.0680503845214844,grad_norm: 0.9999990515324582, iteration: 287417
loss: 1.0072312355041504,grad_norm: 0.734775168002129, iteration: 287418
loss: 1.0446312427520752,grad_norm: 0.999999049343529, iteration: 287419
loss: 0.9718331098556519,grad_norm: 0.9791928803308678, iteration: 287420
loss: 0.9862303137779236,grad_norm: 0.999999127270224, iteration: 287421
loss: 0.9827008247375488,grad_norm: 0.9072800919676167, iteration: 287422
loss: 1.0510671138763428,grad_norm: 0.9999990812118342, iteration: 287423
loss: 0.985185444355011,grad_norm: 0.9486216226501456, iteration: 287424
loss: 1.0075445175170898,grad_norm: 0.7043852354020776, iteration: 287425
loss: 0.9588449001312256,grad_norm: 0.7429377481710365, iteration: 287426
loss: 1.0358095169067383,grad_norm: 0.9276930266155933, iteration: 287427
loss: 1.0055060386657715,grad_norm: 0.8541362394139687, iteration: 287428
loss: 1.00760018825531,grad_norm: 0.8979441733024027, iteration: 287429
loss: 1.030616283416748,grad_norm: 0.7168185134316912, iteration: 287430
loss: 0.9917490482330322,grad_norm: 0.9035767956659648, iteration: 287431
loss: 0.9868188500404358,grad_norm: 0.8630845978138616, iteration: 287432
loss: 0.9964438080787659,grad_norm: 0.8540779065455703, iteration: 287433
loss: 1.035142183303833,grad_norm: 0.9999991284227069, iteration: 287434
loss: 1.0778080224990845,grad_norm: 0.9462106470556642, iteration: 287435
loss: 1.0108723640441895,grad_norm: 0.9999996135690286, iteration: 287436
loss: 1.028975009918213,grad_norm: 0.9999999227717995, iteration: 287437
loss: 1.0044312477111816,grad_norm: 0.9479273568644375, iteration: 287438
loss: 0.9775149822235107,grad_norm: 0.8446496412957016, iteration: 287439
loss: 0.9714720845222473,grad_norm: 0.7196699138010023, iteration: 287440
loss: 0.9687926173210144,grad_norm: 0.744485842205918, iteration: 287441
loss: 1.014073371887207,grad_norm: 0.8935177980029668, iteration: 287442
loss: 0.9911125898361206,grad_norm: 0.8772172025768613, iteration: 287443
loss: 0.9864491820335388,grad_norm: 0.8155415023894161, iteration: 287444
loss: 1.0240485668182373,grad_norm: 0.8005497310485526, iteration: 287445
loss: 1.0051897764205933,grad_norm: 0.8458569864376541, iteration: 287446
loss: 0.9748640060424805,grad_norm: 0.9999990625047334, iteration: 287447
loss: 0.9952077269554138,grad_norm: 0.686763971099373, iteration: 287448
loss: 0.9873470067977905,grad_norm: 0.8769787449149308, iteration: 287449
loss: 0.9850565791130066,grad_norm: 0.7797966346556799, iteration: 287450
loss: 0.982202410697937,grad_norm: 0.9064463363982389, iteration: 287451
loss: 0.990124523639679,grad_norm: 0.7964301662007703, iteration: 287452
loss: 1.021755576133728,grad_norm: 0.8598335950884889, iteration: 287453
loss: 1.1643949747085571,grad_norm: 0.9873049290509713, iteration: 287454
loss: 1.0396078824996948,grad_norm: 0.9298317437412009, iteration: 287455
loss: 0.9482267498970032,grad_norm: 0.8057629826530469, iteration: 287456
loss: 0.9955451488494873,grad_norm: 0.7805615490332081, iteration: 287457
loss: 0.983591616153717,grad_norm: 0.749143571805437, iteration: 287458
loss: 1.1024787425994873,grad_norm: 0.9999992099970522, iteration: 287459
loss: 1.020421028137207,grad_norm: 0.7718995618562717, iteration: 287460
loss: 0.9992580413818359,grad_norm: 0.8167216797984336, iteration: 287461
loss: 0.978983461856842,grad_norm: 0.7487209167387104, iteration: 287462
loss: 0.971181333065033,grad_norm: 0.8435179051732696, iteration: 287463
loss: 1.0059365034103394,grad_norm: 0.8541035124505628, iteration: 287464
loss: 1.0341198444366455,grad_norm: 0.9087223261044486, iteration: 287465
loss: 1.0424333810806274,grad_norm: 0.9068344065181239, iteration: 287466
loss: 0.9847292900085449,grad_norm: 0.9999991963412767, iteration: 287467
loss: 1.0353096723556519,grad_norm: 0.9999992167794265, iteration: 287468
loss: 0.9957414269447327,grad_norm: 0.9158998978182439, iteration: 287469
loss: 0.9869678020477295,grad_norm: 0.7902071514572473, iteration: 287470
loss: 0.985028088092804,grad_norm: 0.7219876618769828, iteration: 287471
loss: 1.028735637664795,grad_norm: 0.9040029515996125, iteration: 287472
loss: 1.052604079246521,grad_norm: 0.9999995299221331, iteration: 287473
loss: 0.9932608604431152,grad_norm: 0.9484794293077387, iteration: 287474
loss: 0.9845519661903381,grad_norm: 0.8984389482368277, iteration: 287475
loss: 0.9904153943061829,grad_norm: 0.8337491579836949, iteration: 287476
loss: 1.0262348651885986,grad_norm: 0.822039606731638, iteration: 287477
loss: 1.016500473022461,grad_norm: 0.7925232133574334, iteration: 287478
loss: 0.9985466599464417,grad_norm: 0.9667631489534697, iteration: 287479
loss: 0.9831474423408508,grad_norm: 0.8255212170597735, iteration: 287480
loss: 0.957207977771759,grad_norm: 0.9163896355455641, iteration: 287481
loss: 1.0067089796066284,grad_norm: 0.9678526876005763, iteration: 287482
loss: 1.0231616497039795,grad_norm: 0.91470271155409, iteration: 287483
loss: 0.9808870553970337,grad_norm: 0.8273077111815997, iteration: 287484
loss: 1.0252676010131836,grad_norm: 0.7962153825863028, iteration: 287485
loss: 1.011029839515686,grad_norm: 0.8406355105846015, iteration: 287486
loss: 1.0039498805999756,grad_norm: 0.8139423600352715, iteration: 287487
loss: 0.9771233201026917,grad_norm: 0.9999998207179516, iteration: 287488
loss: 1.0432497262954712,grad_norm: 0.942995151523427, iteration: 287489
loss: 1.0251176357269287,grad_norm: 0.8672173687054754, iteration: 287490
loss: 0.9684549570083618,grad_norm: 0.6875558610371203, iteration: 287491
loss: 1.0252248048782349,grad_norm: 0.9092789680106913, iteration: 287492
loss: 1.0650463104248047,grad_norm: 0.9899409524147488, iteration: 287493
loss: 1.0153998136520386,grad_norm: 0.9811413085048425, iteration: 287494
loss: 0.9791152477264404,grad_norm: 0.8086569886963192, iteration: 287495
loss: 0.9991001486778259,grad_norm: 0.7843655798116547, iteration: 287496
loss: 0.9942167401313782,grad_norm: 0.8127303414300417, iteration: 287497
loss: 1.0019854307174683,grad_norm: 0.8369383439177779, iteration: 287498
loss: 1.0153807401657104,grad_norm: 0.8465054026911367, iteration: 287499
loss: 1.0020461082458496,grad_norm: 0.9702978908011941, iteration: 287500
loss: 1.0047515630722046,grad_norm: 0.887318743280022, iteration: 287501
loss: 1.0563548803329468,grad_norm: 0.9367907008437844, iteration: 287502
loss: 1.0111687183380127,grad_norm: 0.9195258813035716, iteration: 287503
loss: 1.0292164087295532,grad_norm: 0.784353707728097, iteration: 287504
loss: 1.013499140739441,grad_norm: 0.8484815347063763, iteration: 287505
loss: 0.9894548058509827,grad_norm: 0.8938507252634906, iteration: 287506
loss: 1.0042994022369385,grad_norm: 0.7567860844047716, iteration: 287507
loss: 0.9827723503112793,grad_norm: 0.9891903784121464, iteration: 287508
loss: 0.9980420470237732,grad_norm: 0.9656991628711354, iteration: 287509
loss: 0.9809428453445435,grad_norm: 0.7857421272937737, iteration: 287510
loss: 0.9435373544692993,grad_norm: 0.9357832207984774, iteration: 287511
loss: 0.978661060333252,grad_norm: 0.838905736660283, iteration: 287512
loss: 1.003664255142212,grad_norm: 0.8159691188630794, iteration: 287513
loss: 1.0107463598251343,grad_norm: 0.7737264385877142, iteration: 287514
loss: 1.0130850076675415,grad_norm: 0.9076165968636776, iteration: 287515
loss: 1.0531264543533325,grad_norm: 0.9021109712230945, iteration: 287516
loss: 0.9925914406776428,grad_norm: 0.8704376841715534, iteration: 287517
loss: 1.0004221200942993,grad_norm: 0.8025212082976216, iteration: 287518
loss: 1.0435926914215088,grad_norm: 0.9999996973142193, iteration: 287519
loss: 1.0322368144989014,grad_norm: 0.9999991169184862, iteration: 287520
loss: 0.9975652098655701,grad_norm: 0.9999994548063881, iteration: 287521
loss: 0.9660260677337646,grad_norm: 0.9010221643946736, iteration: 287522
loss: 1.0061897039413452,grad_norm: 0.8926346288484656, iteration: 287523
loss: 1.0061924457550049,grad_norm: 0.8391608499299563, iteration: 287524
loss: 1.017514705657959,grad_norm: 0.8573403189708915, iteration: 287525
loss: 0.9692264199256897,grad_norm: 0.9999989714046013, iteration: 287526
loss: 1.0064682960510254,grad_norm: 0.7909031348474395, iteration: 287527
loss: 1.0014266967773438,grad_norm: 0.8037117405775653, iteration: 287528
loss: 1.0235843658447266,grad_norm: 0.79647291322991, iteration: 287529
loss: 1.0188060998916626,grad_norm: 0.9460680188048447, iteration: 287530
loss: 1.0052884817123413,grad_norm: 0.7464817868540523, iteration: 287531
loss: 1.0085196495056152,grad_norm: 0.9999991302000683, iteration: 287532
loss: 1.0008448362350464,grad_norm: 0.7407816420679978, iteration: 287533
loss: 1.0130771398544312,grad_norm: 0.8634719567556082, iteration: 287534
loss: 0.9749851822853088,grad_norm: 0.9999995234294418, iteration: 287535
loss: 0.9749512672424316,grad_norm: 0.7460758120985743, iteration: 287536
loss: 1.0462461709976196,grad_norm: 0.9692162836887024, iteration: 287537
loss: 1.0130319595336914,grad_norm: 0.8255557708037319, iteration: 287538
loss: 1.0283873081207275,grad_norm: 0.9225782772239886, iteration: 287539
loss: 1.003832221031189,grad_norm: 0.9464782599400211, iteration: 287540
loss: 1.0031392574310303,grad_norm: 0.8477922398034634, iteration: 287541
loss: 0.9869117140769958,grad_norm: 0.8171379532196094, iteration: 287542
loss: 0.993272066116333,grad_norm: 0.8822766515789338, iteration: 287543
loss: 0.9851193428039551,grad_norm: 0.9999990694107026, iteration: 287544
loss: 0.9922571778297424,grad_norm: 0.9999994551715573, iteration: 287545
loss: 1.001071810722351,grad_norm: 0.7780887704033626, iteration: 287546
loss: 0.9992857575416565,grad_norm: 0.9750204710547826, iteration: 287547
loss: 1.0009939670562744,grad_norm: 0.8044801066152332, iteration: 287548
loss: 1.0113084316253662,grad_norm: 0.9348896760474423, iteration: 287549
loss: 0.9899972677230835,grad_norm: 0.7375845862549986, iteration: 287550
loss: 1.0281623601913452,grad_norm: 0.8427916411870661, iteration: 287551
loss: 0.9775570631027222,grad_norm: 0.8238417900484899, iteration: 287552
loss: 1.0234166383743286,grad_norm: 0.9999997311682287, iteration: 287553
loss: 0.9747039675712585,grad_norm: 0.8879548311879535, iteration: 287554
loss: 1.002411127090454,grad_norm: 0.8808451448275332, iteration: 287555
loss: 1.0240201950073242,grad_norm: 0.9873953044641577, iteration: 287556
loss: 1.007087230682373,grad_norm: 0.9999991411814625, iteration: 287557
loss: 0.9780171513557434,grad_norm: 0.9501110699821733, iteration: 287558
loss: 1.0286856889724731,grad_norm: 0.8466769905071945, iteration: 287559
loss: 1.009079098701477,grad_norm: 0.9999992304771007, iteration: 287560
loss: 0.973588764667511,grad_norm: 0.7613918471566781, iteration: 287561
loss: 1.032845139503479,grad_norm: 0.8282230347842193, iteration: 287562
loss: 1.0233051776885986,grad_norm: 0.9469691043444807, iteration: 287563
loss: 1.0348849296569824,grad_norm: 0.8800944812231604, iteration: 287564
loss: 0.9775434732437134,grad_norm: 0.7223922805290304, iteration: 287565
loss: 1.006911277770996,grad_norm: 0.897400082815727, iteration: 287566
loss: 0.9732038974761963,grad_norm: 0.8673196074019311, iteration: 287567
loss: 1.0228395462036133,grad_norm: 0.99999901932755, iteration: 287568
loss: 0.9888059496879578,grad_norm: 0.8990881093286923, iteration: 287569
loss: 0.9908457398414612,grad_norm: 0.8674115018720702, iteration: 287570
loss: 0.9617209434509277,grad_norm: 0.9256190095484182, iteration: 287571
loss: 0.9993006587028503,grad_norm: 0.8722025824408995, iteration: 287572
loss: 1.0258225202560425,grad_norm: 0.7807548649724262, iteration: 287573
loss: 0.9751752614974976,grad_norm: 0.8083452357469645, iteration: 287574
loss: 1.1176880598068237,grad_norm: 0.9999999118178717, iteration: 287575
loss: 1.073788046836853,grad_norm: 0.9999997763677039, iteration: 287576
loss: 1.0056036710739136,grad_norm: 0.8993273448568982, iteration: 287577
loss: 1.0183494091033936,grad_norm: 0.9561966635459552, iteration: 287578
loss: 0.9939651489257812,grad_norm: 0.8646572009875302, iteration: 287579
loss: 1.00618577003479,grad_norm: 0.7532806795943767, iteration: 287580
loss: 0.9667983055114746,grad_norm: 0.8470632259056401, iteration: 287581
loss: 0.9832803606987,grad_norm: 0.9746428247659583, iteration: 287582
loss: 0.9896447658538818,grad_norm: 0.8130874888997045, iteration: 287583
loss: 1.0258234739303589,grad_norm: 0.8708011064912498, iteration: 287584
loss: 0.9898852705955505,grad_norm: 0.875034567468377, iteration: 287585
loss: 0.9981664419174194,grad_norm: 0.8165940685131498, iteration: 287586
loss: 1.0072484016418457,grad_norm: 0.985355789601203, iteration: 287587
loss: 0.992746114730835,grad_norm: 0.7635797159851044, iteration: 287588
loss: 0.9998166561126709,grad_norm: 0.9999998476342071, iteration: 287589
loss: 1.0029535293579102,grad_norm: 0.7539743013472088, iteration: 287590
loss: 1.0161751508712769,grad_norm: 0.758664617517098, iteration: 287591
loss: 1.0025415420532227,grad_norm: 0.8486629457522641, iteration: 287592
loss: 0.9784647226333618,grad_norm: 0.7023502050093184, iteration: 287593
loss: 1.0222885608673096,grad_norm: 0.9999991169524689, iteration: 287594
loss: 1.0073965787887573,grad_norm: 0.8447823252540666, iteration: 287595
loss: 0.9972847700119019,grad_norm: 0.9999992378648392, iteration: 287596
loss: 0.9962318539619446,grad_norm: 0.9999992030491561, iteration: 287597
loss: 0.9766075015068054,grad_norm: 0.9817785019440818, iteration: 287598
loss: 0.9776449799537659,grad_norm: 0.8071127515391517, iteration: 287599
loss: 0.9855666160583496,grad_norm: 0.7226839302380315, iteration: 287600
loss: 1.0111867189407349,grad_norm: 0.820458172463821, iteration: 287601
loss: 1.1062642335891724,grad_norm: 0.9999996600852816, iteration: 287602
loss: 0.9959641695022583,grad_norm: 0.9879450031444017, iteration: 287603
loss: 0.9850097894668579,grad_norm: 0.9957318802866167, iteration: 287604
loss: 0.9916436076164246,grad_norm: 0.7803443818124233, iteration: 287605
loss: 1.0199811458587646,grad_norm: 0.8611661758957431, iteration: 287606
loss: 1.0023959875106812,grad_norm: 0.7231151283241759, iteration: 287607
loss: 1.1672942638397217,grad_norm: 0.9999991947703546, iteration: 287608
loss: 1.09207022190094,grad_norm: 0.9999991630443738, iteration: 287609
loss: 1.0057145357131958,grad_norm: 0.9999992854553571, iteration: 287610
loss: 0.9790335893630981,grad_norm: 0.8514123372417479, iteration: 287611
loss: 1.0109148025512695,grad_norm: 0.9020838191448608, iteration: 287612
loss: 1.071945071220398,grad_norm: 0.9999993788404732, iteration: 287613
loss: 0.9592999815940857,grad_norm: 0.8238139920300962, iteration: 287614
loss: 0.9755819439888,grad_norm: 0.8059421507693878, iteration: 287615
loss: 0.9630588293075562,grad_norm: 0.8469801886068861, iteration: 287616
loss: 1.0028576850891113,grad_norm: 0.9410750704269066, iteration: 287617
loss: 1.0125981569290161,grad_norm: 0.9999994477145558, iteration: 287618
loss: 0.9862691760063171,grad_norm: 0.8206211025651712, iteration: 287619
loss: 0.9764670133590698,grad_norm: 0.8972807906883039, iteration: 287620
loss: 1.0317773818969727,grad_norm: 0.8677447917860576, iteration: 287621
loss: 1.0432078838348389,grad_norm: 0.8342923509600542, iteration: 287622
loss: 1.0135904550552368,grad_norm: 0.8364081627986879, iteration: 287623
loss: 1.019813060760498,grad_norm: 0.7579573398589569, iteration: 287624
loss: 1.002841830253601,grad_norm: 0.9107101812583858, iteration: 287625
loss: 0.9804909229278564,grad_norm: 0.7300576084609625, iteration: 287626
loss: 1.0106559991836548,grad_norm: 0.843458988403321, iteration: 287627
loss: 1.136000156402588,grad_norm: 0.99999922189827, iteration: 287628
loss: 0.9908655285835266,grad_norm: 0.9278207935055247, iteration: 287629
loss: 0.9621762037277222,grad_norm: 0.7102673300655777, iteration: 287630
loss: 1.0395265817642212,grad_norm: 0.8883861313396102, iteration: 287631
loss: 0.9970688223838806,grad_norm: 0.8705758413748411, iteration: 287632
loss: 1.1194783449172974,grad_norm: 0.9999999830270894, iteration: 287633
loss: 0.9897503852844238,grad_norm: 0.8341596525865986, iteration: 287634
loss: 1.0109446048736572,grad_norm: 0.8357214728061523, iteration: 287635
loss: 1.0516966581344604,grad_norm: 0.9999994069136271, iteration: 287636
loss: 0.991060733795166,grad_norm: 0.8961523116518422, iteration: 287637
loss: 0.9912744760513306,grad_norm: 0.9928734180894675, iteration: 287638
loss: 1.0158530473709106,grad_norm: 0.8932538828651239, iteration: 287639
loss: 1.0467230081558228,grad_norm: 0.7663615084613128, iteration: 287640
loss: 1.0656766891479492,grad_norm: 0.9999991439662489, iteration: 287641
loss: 0.9886417388916016,grad_norm: 0.7769427526770648, iteration: 287642
loss: 0.9921172261238098,grad_norm: 0.6903164618656383, iteration: 287643
loss: 1.0096921920776367,grad_norm: 0.8814437854169924, iteration: 287644
loss: 1.0939933061599731,grad_norm: 0.9999999544660447, iteration: 287645
loss: 0.9710230231285095,grad_norm: 0.7031331019955767, iteration: 287646
loss: 1.033809781074524,grad_norm: 0.8836636273098647, iteration: 287647
loss: 0.9744779467582703,grad_norm: 0.9999991444166417, iteration: 287648
loss: 1.0012282133102417,grad_norm: 0.8006937884072585, iteration: 287649
loss: 1.0252691507339478,grad_norm: 0.7695398477102, iteration: 287650
loss: 1.0126218795776367,grad_norm: 0.8933715860255269, iteration: 287651
loss: 0.9727768301963806,grad_norm: 0.9324629007307808, iteration: 287652
loss: 1.019829273223877,grad_norm: 0.695770682572147, iteration: 287653
loss: 0.9800423383712769,grad_norm: 0.799991661514098, iteration: 287654
loss: 0.9879907965660095,grad_norm: 0.9815011442411241, iteration: 287655
loss: 1.0094908475875854,grad_norm: 0.8574184955947743, iteration: 287656
loss: 0.9655439853668213,grad_norm: 0.900909076064443, iteration: 287657
loss: 1.0089025497436523,grad_norm: 0.999999031427651, iteration: 287658
loss: 0.9768043756484985,grad_norm: 0.9169201305408383, iteration: 287659
loss: 0.9857674241065979,grad_norm: 0.9999990875464151, iteration: 287660
loss: 0.9940558671951294,grad_norm: 0.879611770179514, iteration: 287661
loss: 1.0249427556991577,grad_norm: 0.9407498057558832, iteration: 287662
loss: 0.9943006634712219,grad_norm: 0.808411595345817, iteration: 287663
loss: 1.0062952041625977,grad_norm: 0.8208210758062615, iteration: 287664
loss: 0.9946494102478027,grad_norm: 0.7162791515455916, iteration: 287665
loss: 1.0452628135681152,grad_norm: 0.9999996343949615, iteration: 287666
loss: 1.0299460887908936,grad_norm: 0.9999991505310486, iteration: 287667
loss: 0.9942983388900757,grad_norm: 0.7651451923769298, iteration: 287668
loss: 0.9820631146430969,grad_norm: 0.753288793123193, iteration: 287669
loss: 0.9905449151992798,grad_norm: 0.7820098374531804, iteration: 287670
loss: 0.9865742921829224,grad_norm: 0.8468954544122608, iteration: 287671
loss: 1.015256643295288,grad_norm: 0.8581638410873678, iteration: 287672
loss: 0.9551341533660889,grad_norm: 0.809754755195486, iteration: 287673
loss: 0.9678460955619812,grad_norm: 0.8784504159374691, iteration: 287674
loss: 1.0063530206680298,grad_norm: 0.7888522759090449, iteration: 287675
loss: 0.9868456721305847,grad_norm: 0.8424775078543438, iteration: 287676
loss: 1.0269910097122192,grad_norm: 0.9783051427614392, iteration: 287677
loss: 0.9755904674530029,grad_norm: 0.9999990390863819, iteration: 287678
loss: 1.0039682388305664,grad_norm: 0.879710251587294, iteration: 287679
loss: 0.9638186097145081,grad_norm: 0.8051150062930191, iteration: 287680
loss: 0.9936841130256653,grad_norm: 0.9999993199509826, iteration: 287681
loss: 0.9900062084197998,grad_norm: 0.9999995521911573, iteration: 287682
loss: 1.0366346836090088,grad_norm: 0.7861922171141836, iteration: 287683
loss: 0.9824684858322144,grad_norm: 0.9999992285082671, iteration: 287684
loss: 0.9860204458236694,grad_norm: 0.9999991918530676, iteration: 287685
loss: 1.034596562385559,grad_norm: 0.8382835385850269, iteration: 287686
loss: 0.9830904006958008,grad_norm: 0.8995727665684812, iteration: 287687
loss: 1.0526589155197144,grad_norm: 0.9067581886815224, iteration: 287688
loss: 1.01743745803833,grad_norm: 0.9999988414619316, iteration: 287689
loss: 1.0257927179336548,grad_norm: 0.8193496738298522, iteration: 287690
loss: 0.9849847555160522,grad_norm: 0.899868206304725, iteration: 287691
loss: 0.9921881556510925,grad_norm: 0.8617947617314065, iteration: 287692
loss: 0.9851065278053284,grad_norm: 0.999999204816498, iteration: 287693
loss: 1.0030288696289062,grad_norm: 0.7996166579837323, iteration: 287694
loss: 1.0262328386306763,grad_norm: 0.9999998185319393, iteration: 287695
loss: 0.9979898929595947,grad_norm: 0.8486982244190209, iteration: 287696
loss: 1.012373924255371,grad_norm: 0.7716290674312863, iteration: 287697
loss: 0.9912863373756409,grad_norm: 0.8224764756282925, iteration: 287698
loss: 1.0294995307922363,grad_norm: 0.9787098386782248, iteration: 287699
loss: 0.9993614554405212,grad_norm: 0.9999991193976039, iteration: 287700
loss: 0.9946626424789429,grad_norm: 0.9999991582998639, iteration: 287701
loss: 1.0295759439468384,grad_norm: 0.9999993959381391, iteration: 287702
loss: 0.9814092516899109,grad_norm: 0.8851132756530399, iteration: 287703
loss: 0.993887722492218,grad_norm: 0.9374464415548579, iteration: 287704
loss: 1.0136549472808838,grad_norm: 0.8475184387846221, iteration: 287705
loss: 1.0143009424209595,grad_norm: 0.9999990665595975, iteration: 287706
loss: 0.9853708744049072,grad_norm: 0.9703149373327942, iteration: 287707
loss: 1.0487802028656006,grad_norm: 0.8695105788566232, iteration: 287708
loss: 0.9697071313858032,grad_norm: 0.8511178623441407, iteration: 287709
loss: 1.0122401714324951,grad_norm: 0.7934870559051838, iteration: 287710
loss: 1.0209425687789917,grad_norm: 0.9688158494127043, iteration: 287711
loss: 1.0127251148223877,grad_norm: 0.7677874182444814, iteration: 287712
loss: 1.0087538957595825,grad_norm: 0.9999998796941628, iteration: 287713
loss: 1.02088463306427,grad_norm: 0.8227622588816933, iteration: 287714
loss: 1.0249378681182861,grad_norm: 0.9080171463822095, iteration: 287715
loss: 0.996712327003479,grad_norm: 0.9999990615245246, iteration: 287716
loss: 1.1151068210601807,grad_norm: 0.9999992994383634, iteration: 287717
loss: 0.9441283941268921,grad_norm: 0.8365308330362605, iteration: 287718
loss: 0.9859486222267151,grad_norm: 0.9999994930179906, iteration: 287719
loss: 1.042201042175293,grad_norm: 0.9999997666882252, iteration: 287720
loss: 1.0545791387557983,grad_norm: 0.7487509837112145, iteration: 287721
loss: 0.9809079170227051,grad_norm: 0.9999995228803644, iteration: 287722
loss: 1.0752259492874146,grad_norm: 0.9999990632902047, iteration: 287723
loss: 0.9755185842514038,grad_norm: 0.738526567951377, iteration: 287724
loss: 0.9840953350067139,grad_norm: 0.999999918194259, iteration: 287725
loss: 0.9725778698921204,grad_norm: 0.9448335184659992, iteration: 287726
loss: 1.0279362201690674,grad_norm: 0.9387134326975101, iteration: 287727
loss: 1.0220584869384766,grad_norm: 0.9999992458262768, iteration: 287728
loss: 1.0184568166732788,grad_norm: 0.8548194438922191, iteration: 287729
loss: 1.02568781375885,grad_norm: 0.9647863659003024, iteration: 287730
loss: 1.018401861190796,grad_norm: 0.9996424381703941, iteration: 287731
loss: 0.9531881809234619,grad_norm: 0.8956104269494665, iteration: 287732
loss: 1.0253652334213257,grad_norm: 0.9608770611744473, iteration: 287733
loss: 1.0157305002212524,grad_norm: 0.9462541722803481, iteration: 287734
loss: 1.0073025226593018,grad_norm: 0.870255382109291, iteration: 287735
loss: 0.9824287295341492,grad_norm: 0.8265762304600187, iteration: 287736
loss: 0.9804866313934326,grad_norm: 0.9721970518064768, iteration: 287737
loss: 1.0087274312973022,grad_norm: 0.8986181102776288, iteration: 287738
loss: 1.0315464735031128,grad_norm: 0.9999992577075699, iteration: 287739
loss: 0.9781172871589661,grad_norm: 0.7640879520775756, iteration: 287740
loss: 0.9787930846214294,grad_norm: 0.833522646792658, iteration: 287741
loss: 0.9967309832572937,grad_norm: 0.9999990654861982, iteration: 287742
loss: 1.0238195657730103,grad_norm: 0.979104924329763, iteration: 287743
loss: 0.9710738062858582,grad_norm: 0.8821506892982804, iteration: 287744
loss: 0.9875994920730591,grad_norm: 0.7916411216300274, iteration: 287745
loss: 1.0168898105621338,grad_norm: 0.7361105055668907, iteration: 287746
loss: 0.9731885194778442,grad_norm: 0.842863804768109, iteration: 287747
loss: 0.9781034588813782,grad_norm: 0.7768995804309877, iteration: 287748
loss: 1.0389705896377563,grad_norm: 0.8301973333473844, iteration: 287749
loss: 1.073042392730713,grad_norm: 0.9002421170221754, iteration: 287750
loss: 1.0377874374389648,grad_norm: 0.8497493152761881, iteration: 287751
loss: 1.05054771900177,grad_norm: 0.9999990925831429, iteration: 287752
loss: 0.9750267863273621,grad_norm: 0.8588727975002349, iteration: 287753
loss: 0.9884310364723206,grad_norm: 0.850683278282228, iteration: 287754
loss: 1.0020557641983032,grad_norm: 0.8932041106382471, iteration: 287755
loss: 1.002142071723938,grad_norm: 0.912992983754199, iteration: 287756
loss: 0.9881945848464966,grad_norm: 0.7733245127579965, iteration: 287757
loss: 0.9806295037269592,grad_norm: 0.8911781147876942, iteration: 287758
loss: 0.9705646634101868,grad_norm: 0.9920824299852686, iteration: 287759
loss: 1.0147068500518799,grad_norm: 0.9999993619720415, iteration: 287760
loss: 1.022512435913086,grad_norm: 0.9725296329243106, iteration: 287761
loss: 1.000874638557434,grad_norm: 0.9718522221606507, iteration: 287762
loss: 1.068873405456543,grad_norm: 0.9999997118644257, iteration: 287763
loss: 1.028464674949646,grad_norm: 0.8973640849717325, iteration: 287764
loss: 0.9865871667861938,grad_norm: 0.9778484402498158, iteration: 287765
loss: 0.9879960417747498,grad_norm: 0.9999993437157483, iteration: 287766
loss: 0.9732519388198853,grad_norm: 0.7892978999003514, iteration: 287767
loss: 1.179506778717041,grad_norm: 0.9999997749638573, iteration: 287768
loss: 0.9662506580352783,grad_norm: 0.8570308751066797, iteration: 287769
loss: 1.015860676765442,grad_norm: 0.8955806410360119, iteration: 287770
loss: 0.9877945184707642,grad_norm: 0.7430721094453132, iteration: 287771
loss: 0.9585126638412476,grad_norm: 0.7707742185193506, iteration: 287772
loss: 1.0540330410003662,grad_norm: 0.9999999674038266, iteration: 287773
loss: 1.008863091468811,grad_norm: 0.8552283933884356, iteration: 287774
loss: 0.9937069416046143,grad_norm: 0.9999992738891447, iteration: 287775
loss: 0.9951031804084778,grad_norm: 0.7864096134025788, iteration: 287776
loss: 0.9776301980018616,grad_norm: 0.8679924298549778, iteration: 287777
loss: 1.0286940336227417,grad_norm: 0.8707397078709332, iteration: 287778
loss: 1.0106788873672485,grad_norm: 0.8312272388441042, iteration: 287779
loss: 1.0565593242645264,grad_norm: 0.9710834841111721, iteration: 287780
loss: 0.9810817837715149,grad_norm: 0.8136182603712253, iteration: 287781
loss: 1.0179579257965088,grad_norm: 0.8648568188562232, iteration: 287782
loss: 0.9885976910591125,grad_norm: 0.7460161300269316, iteration: 287783
loss: 1.0458847284317017,grad_norm: 0.9999992069587572, iteration: 287784
loss: 1.0809844732284546,grad_norm: 0.9999995741874056, iteration: 287785
loss: 0.9550081491470337,grad_norm: 0.9999991586194998, iteration: 287786
loss: 1.0251654386520386,grad_norm: 0.8887984775540764, iteration: 287787
loss: 1.0044527053833008,grad_norm: 0.8722275804230646, iteration: 287788
loss: 1.0492748022079468,grad_norm: 0.9228309676052587, iteration: 287789
loss: 0.985897958278656,grad_norm: 0.9794236325437465, iteration: 287790
loss: 0.9874140024185181,grad_norm: 0.6981530804509097, iteration: 287791
loss: 1.005454659461975,grad_norm: 0.8968503165490529, iteration: 287792
loss: 0.992339015007019,grad_norm: 0.7106388970828532, iteration: 287793
loss: 0.9588831067085266,grad_norm: 0.9058337964823358, iteration: 287794
loss: 0.9931102991104126,grad_norm: 0.9211078941032235, iteration: 287795
loss: 1.0200140476226807,grad_norm: 0.845543360188491, iteration: 287796
loss: 1.0048234462738037,grad_norm: 0.9999991983423832, iteration: 287797
loss: 0.9873588681221008,grad_norm: 0.8776810331744086, iteration: 287798
loss: 0.9984145164489746,grad_norm: 0.859728163219077, iteration: 287799
loss: 0.9665393233299255,grad_norm: 0.8142161540501484, iteration: 287800
loss: 1.0221058130264282,grad_norm: 0.9552804322222025, iteration: 287801
loss: 1.0237953662872314,grad_norm: 0.8744432602767274, iteration: 287802
loss: 0.9642465710639954,grad_norm: 0.8420368232662597, iteration: 287803
loss: 1.0693631172180176,grad_norm: 0.9645612535100325, iteration: 287804
loss: 0.9826560020446777,grad_norm: 0.8892664802400658, iteration: 287805
loss: 1.1231985092163086,grad_norm: 0.9999997892849287, iteration: 287806
loss: 0.9902286529541016,grad_norm: 0.9999992687785454, iteration: 287807
loss: 0.9618535041809082,grad_norm: 0.8084790591102903, iteration: 287808
loss: 0.9661848545074463,grad_norm: 0.7588478272887321, iteration: 287809
loss: 0.9609642028808594,grad_norm: 0.999999372682038, iteration: 287810
loss: 1.0776045322418213,grad_norm: 0.99999942539945, iteration: 287811
loss: 0.9774505496025085,grad_norm: 0.8469570630738559, iteration: 287812
loss: 1.0125128030776978,grad_norm: 0.8408103150264897, iteration: 287813
loss: 0.96815025806427,grad_norm: 0.9043324569152623, iteration: 287814
loss: 1.0108286142349243,grad_norm: 0.9999997030155722, iteration: 287815
loss: 1.0316919088363647,grad_norm: 0.9299185510873379, iteration: 287816
loss: 0.9837244153022766,grad_norm: 0.8899337714558178, iteration: 287817
loss: 1.0014325380325317,grad_norm: 0.9220150836676524, iteration: 287818
loss: 0.9880131483078003,grad_norm: 0.9427224585177831, iteration: 287819
loss: 1.0066654682159424,grad_norm: 0.6910577422520804, iteration: 287820
loss: 1.009591817855835,grad_norm: 0.9999990673519111, iteration: 287821
loss: 1.0173606872558594,grad_norm: 0.7560809879801018, iteration: 287822
loss: 0.982431173324585,grad_norm: 0.8406485348144482, iteration: 287823
loss: 1.0074986219406128,grad_norm: 0.8285261052281081, iteration: 287824
loss: 1.0518900156021118,grad_norm: 0.9999994448256705, iteration: 287825
loss: 0.9840625524520874,grad_norm: 0.9609114243910056, iteration: 287826
loss: 0.99800044298172,grad_norm: 0.7393009952552849, iteration: 287827
loss: 0.9932061433792114,grad_norm: 0.8174447898238197, iteration: 287828
loss: 1.0094200372695923,grad_norm: 0.7499954934893488, iteration: 287829
loss: 0.9936765432357788,grad_norm: 0.9199917375499357, iteration: 287830
loss: 1.0224692821502686,grad_norm: 0.9999991637004065, iteration: 287831
loss: 0.9777082800865173,grad_norm: 0.8918694541988674, iteration: 287832
loss: 1.2521013021469116,grad_norm: 0.9999998696239549, iteration: 287833
loss: 1.0264461040496826,grad_norm: 0.9999996556762517, iteration: 287834
loss: 1.088622808456421,grad_norm: 0.9999992515871394, iteration: 287835
loss: 0.9703361988067627,grad_norm: 0.9331090959529049, iteration: 287836
loss: 0.9542561769485474,grad_norm: 0.7574825852002023, iteration: 287837
loss: 1.0294163227081299,grad_norm: 0.810789839426643, iteration: 287838
loss: 1.3157891035079956,grad_norm: 0.9999998072140298, iteration: 287839
loss: 0.993007242679596,grad_norm: 0.9999993719570505, iteration: 287840
loss: 1.0067650079727173,grad_norm: 0.8896331686111321, iteration: 287841
loss: 0.9929904937744141,grad_norm: 0.7458424183596805, iteration: 287842
loss: 0.9673478007316589,grad_norm: 0.8867965129027039, iteration: 287843
loss: 1.0102202892303467,grad_norm: 0.8960442957290318, iteration: 287844
loss: 0.9949229955673218,grad_norm: 0.7361515079389195, iteration: 287845
loss: 1.027403712272644,grad_norm: 0.8566154095836106, iteration: 287846
loss: 1.0047211647033691,grad_norm: 0.9535874068173691, iteration: 287847
loss: 1.0034691095352173,grad_norm: 0.7986933498976866, iteration: 287848
loss: 0.9497160315513611,grad_norm: 0.8059470225018698, iteration: 287849
loss: 0.9883884787559509,grad_norm: 0.8064711291306021, iteration: 287850
loss: 0.9680710434913635,grad_norm: 0.8435289450421318, iteration: 287851
loss: 0.9756308197975159,grad_norm: 0.7700758777987721, iteration: 287852
loss: 0.9639713168144226,grad_norm: 0.7381657897247192, iteration: 287853
loss: 1.160414218902588,grad_norm: 0.999999252968948, iteration: 287854
loss: 0.9956011176109314,grad_norm: 0.8896627299465731, iteration: 287855
loss: 1.007778286933899,grad_norm: 0.9936296457685431, iteration: 287856
loss: 1.0236296653747559,grad_norm: 0.9999993888698668, iteration: 287857
loss: 0.9761775732040405,grad_norm: 0.9999996070208083, iteration: 287858
loss: 1.0099561214447021,grad_norm: 0.7783683511074343, iteration: 287859
loss: 0.9622122049331665,grad_norm: 0.7604180421893391, iteration: 287860
loss: 1.0301783084869385,grad_norm: 0.8774801805934754, iteration: 287861
loss: 1.0001214742660522,grad_norm: 0.826961577947418, iteration: 287862
loss: 0.9977002739906311,grad_norm: 0.8561794516576422, iteration: 287863
loss: 1.0235300064086914,grad_norm: 0.7707626178222446, iteration: 287864
loss: 0.9699404239654541,grad_norm: 0.8917003195002182, iteration: 287865
loss: 0.9894570112228394,grad_norm: 0.9414887234849286, iteration: 287866
loss: 1.012894630432129,grad_norm: 0.8927815419054083, iteration: 287867
loss: 1.0262736082077026,grad_norm: 0.9650411898135084, iteration: 287868
loss: 1.0494091510772705,grad_norm: 0.999999205428313, iteration: 287869
loss: 1.074178695678711,grad_norm: 0.9999998501493179, iteration: 287870
loss: 0.9505847692489624,grad_norm: 0.8037881179529553, iteration: 287871
loss: 0.9955992102622986,grad_norm: 0.8449488609573241, iteration: 287872
loss: 1.018278956413269,grad_norm: 0.9170708364099079, iteration: 287873
loss: 1.0116366147994995,grad_norm: 0.7633758158264933, iteration: 287874
loss: 1.0449411869049072,grad_norm: 0.8473932175935084, iteration: 287875
loss: 1.0123190879821777,grad_norm: 0.939063778380304, iteration: 287876
loss: 0.9794619679450989,grad_norm: 0.8298727922023618, iteration: 287877
loss: 1.023682713508606,grad_norm: 0.7689093021977007, iteration: 287878
loss: 0.9655100703239441,grad_norm: 0.9441549177858148, iteration: 287879
loss: 1.0384225845336914,grad_norm: 0.9999998717678935, iteration: 287880
loss: 1.043635368347168,grad_norm: 0.999999103972444, iteration: 287881
loss: 1.0169520378112793,grad_norm: 0.9999997467594735, iteration: 287882
loss: 0.9841193556785583,grad_norm: 0.8928685072223492, iteration: 287883
loss: 1.0368552207946777,grad_norm: 0.9999998982907082, iteration: 287884
loss: 1.0305358171463013,grad_norm: 0.7665844893330482, iteration: 287885
loss: 0.9544943571090698,grad_norm: 0.9119641972920326, iteration: 287886
loss: 1.0007299184799194,grad_norm: 0.9999993546181984, iteration: 287887
loss: 0.9720512628555298,grad_norm: 0.7731392048027731, iteration: 287888
loss: 1.0623902082443237,grad_norm: 0.9999998841241283, iteration: 287889
loss: 0.9872158765792847,grad_norm: 0.9999990452542223, iteration: 287890
loss: 0.9982103705406189,grad_norm: 0.8500184610443664, iteration: 287891
loss: 0.9830412268638611,grad_norm: 0.9009259057715585, iteration: 287892
loss: 0.950537919998169,grad_norm: 0.9999992096875757, iteration: 287893
loss: 1.0330777168273926,grad_norm: 0.8240081613850493, iteration: 287894
loss: 0.9621939659118652,grad_norm: 0.8299409668043907, iteration: 287895
loss: 0.9769493937492371,grad_norm: 0.9820177830864955, iteration: 287896
loss: 1.054412603378296,grad_norm: 0.9999994205164285, iteration: 287897
loss: 1.017385482788086,grad_norm: 0.7486292432190943, iteration: 287898
loss: 1.2138181924819946,grad_norm: 0.9999997721581632, iteration: 287899
loss: 1.0079622268676758,grad_norm: 0.7787840351499757, iteration: 287900
loss: 1.0207101106643677,grad_norm: 0.9288635549413263, iteration: 287901
loss: 1.0018779039382935,grad_norm: 0.831507110902458, iteration: 287902
loss: 0.970539391040802,grad_norm: 0.8484973183782843, iteration: 287903
loss: 1.0067121982574463,grad_norm: 0.8604463274624995, iteration: 287904
loss: 1.015134334564209,grad_norm: 0.9670896798202252, iteration: 287905
loss: 1.0439866781234741,grad_norm: 0.7885373539215185, iteration: 287906
loss: 1.0093519687652588,grad_norm: 0.9062565362853399, iteration: 287907
loss: 0.9835523366928101,grad_norm: 0.7893593827431725, iteration: 287908
loss: 0.9908556938171387,grad_norm: 0.9727925180836897, iteration: 287909
loss: 1.0692543983459473,grad_norm: 0.9999991208086715, iteration: 287910
loss: 1.0976035594940186,grad_norm: 1.0000000173506696, iteration: 287911
loss: 1.0499131679534912,grad_norm: 0.9999993255630832, iteration: 287912
loss: 1.0267621278762817,grad_norm: 0.7958159341396284, iteration: 287913
loss: 0.9838812351226807,grad_norm: 0.8976865456779544, iteration: 287914
loss: 0.9649634957313538,grad_norm: 0.8975453175507894, iteration: 287915
loss: 1.0109935998916626,grad_norm: 0.9999995255113159, iteration: 287916
loss: 0.9837554097175598,grad_norm: 0.9539367573400857, iteration: 287917
loss: 1.038216471672058,grad_norm: 0.9999995452329502, iteration: 287918
loss: 1.0083101987838745,grad_norm: 0.849358849127551, iteration: 287919
loss: 1.0076783895492554,grad_norm: 0.8880390588786353, iteration: 287920
loss: 1.0088967084884644,grad_norm: 0.9557954411069148, iteration: 287921
loss: 1.0259568691253662,grad_norm: 0.9963972144497372, iteration: 287922
loss: 1.0655813217163086,grad_norm: 0.9786126012755262, iteration: 287923
loss: 1.0073944330215454,grad_norm: 0.9197116531545488, iteration: 287924
loss: 1.0415462255477905,grad_norm: 0.9999991367866564, iteration: 287925
loss: 0.9955705404281616,grad_norm: 0.8298843709054745, iteration: 287926
loss: 0.9949203729629517,grad_norm: 0.9999993043319516, iteration: 287927
loss: 1.031380295753479,grad_norm: 0.8090468475882217, iteration: 287928
loss: 0.9831326007843018,grad_norm: 0.6947453086368145, iteration: 287929
loss: 0.9936161637306213,grad_norm: 0.7721995327199128, iteration: 287930
loss: 0.993703305721283,grad_norm: 0.7607752481553377, iteration: 287931
loss: 1.071989893913269,grad_norm: 0.9999993958076829, iteration: 287932
loss: 1.0988575220108032,grad_norm: 0.9999999264814722, iteration: 287933
loss: 1.0206475257873535,grad_norm: 0.9999991903216406, iteration: 287934
loss: 1.0032906532287598,grad_norm: 0.8119798522998328, iteration: 287935
loss: 0.9834100008010864,grad_norm: 0.9290398161408743, iteration: 287936
loss: 0.9956570863723755,grad_norm: 0.9999991572063134, iteration: 287937
loss: 1.0264286994934082,grad_norm: 0.999999389554007, iteration: 287938
loss: 1.0051944255828857,grad_norm: 0.9981486111842366, iteration: 287939
loss: 1.0077526569366455,grad_norm: 0.9107614165868827, iteration: 287940
loss: 0.9940879940986633,grad_norm: 0.8439715420890176, iteration: 287941
loss: 1.0095446109771729,grad_norm: 0.6707979605329036, iteration: 287942
loss: 1.0047526359558105,grad_norm: 0.8847530839658607, iteration: 287943
loss: 1.010690689086914,grad_norm: 0.8782299636482331, iteration: 287944
loss: 0.9897644519805908,grad_norm: 0.9793906136575021, iteration: 287945
loss: 1.0069345235824585,grad_norm: 0.9103186081729179, iteration: 287946
loss: 1.0016419887542725,grad_norm: 0.7854377270319247, iteration: 287947
loss: 0.9948648810386658,grad_norm: 0.7482184831779953, iteration: 287948
loss: 1.0976773500442505,grad_norm: 0.9999995501600201, iteration: 287949
loss: 1.010353922843933,grad_norm: 0.8702828708581591, iteration: 287950
loss: 1.0002169609069824,grad_norm: 0.7561711588769497, iteration: 287951
loss: 0.9672784805297852,grad_norm: 0.8528514006380001, iteration: 287952
loss: 0.9958617091178894,grad_norm: 0.9833958469412724, iteration: 287953
loss: 0.9632347226142883,grad_norm: 0.7687887682833671, iteration: 287954
loss: 1.0379304885864258,grad_norm: 0.9614510569255861, iteration: 287955
loss: 0.9910454154014587,grad_norm: 0.8824629426784915, iteration: 287956
loss: 1.0254045724868774,grad_norm: 0.9999998717814821, iteration: 287957
loss: 1.0416913032531738,grad_norm: 0.928123379945003, iteration: 287958
loss: 1.034729242324829,grad_norm: 0.999999312733837, iteration: 287959
loss: 1.0373190641403198,grad_norm: 0.9631790536722344, iteration: 287960
loss: 1.049428939819336,grad_norm: 0.999999606663623, iteration: 287961
loss: 1.0256778001785278,grad_norm: 0.9069975082980112, iteration: 287962
loss: 1.0065094232559204,grad_norm: 0.7978365660896993, iteration: 287963
loss: 1.0122005939483643,grad_norm: 0.8145055582582218, iteration: 287964
loss: 1.014787197113037,grad_norm: 0.858799007703456, iteration: 287965
loss: 1.0349739789962769,grad_norm: 0.9235343528035471, iteration: 287966
loss: 0.9732493162155151,grad_norm: 0.9999990420284836, iteration: 287967
loss: 1.0110543966293335,grad_norm: 0.8908400506410006, iteration: 287968
loss: 1.0495986938476562,grad_norm: 0.9999992039763953, iteration: 287969
loss: 1.012562870979309,grad_norm: 0.932730778145713, iteration: 287970
loss: 0.9995933771133423,grad_norm: 0.8896615570462176, iteration: 287971
loss: 1.0273479223251343,grad_norm: 0.8052710805196601, iteration: 287972
loss: 0.9856126308441162,grad_norm: 0.6292052381377977, iteration: 287973
loss: 1.0248029232025146,grad_norm: 0.9999995535486788, iteration: 287974
loss: 1.020375370979309,grad_norm: 0.8310123410057711, iteration: 287975
loss: 0.9739106297492981,grad_norm: 0.999999085960324, iteration: 287976
loss: 0.9692083597183228,grad_norm: 0.936222897463052, iteration: 287977
loss: 1.0720858573913574,grad_norm: 0.9999990780633712, iteration: 287978
loss: 1.009912133216858,grad_norm: 0.7862345431163413, iteration: 287979
loss: 0.9974721074104309,grad_norm: 0.8759768808245691, iteration: 287980
loss: 0.9988768696784973,grad_norm: 0.8721201009571417, iteration: 287981
loss: 0.996677041053772,grad_norm: 0.8748328230114792, iteration: 287982
loss: 0.9955213665962219,grad_norm: 0.9387780172630928, iteration: 287983
loss: 0.9858438968658447,grad_norm: 0.6277786314669128, iteration: 287984
loss: 0.9680749177932739,grad_norm: 0.8681821797374264, iteration: 287985
loss: 1.006962537765503,grad_norm: 0.7616819892723764, iteration: 287986
loss: 0.9815508127212524,grad_norm: 0.9977847891560601, iteration: 287987
loss: 0.9997407793998718,grad_norm: 0.8895673026596825, iteration: 287988
loss: 1.0022125244140625,grad_norm: 0.8507729603539647, iteration: 287989
loss: 0.9469051361083984,grad_norm: 0.8544590530494717, iteration: 287990
loss: 1.0199253559112549,grad_norm: 0.9999995936430967, iteration: 287991
loss: 1.0121194124221802,grad_norm: 0.9518530080774182, iteration: 287992
loss: 1.0505688190460205,grad_norm: 0.9350505716940435, iteration: 287993
loss: 1.0106199979782104,grad_norm: 0.8745354293673674, iteration: 287994
loss: 0.975567102432251,grad_norm: 0.9341774186048966, iteration: 287995
loss: 1.005132794380188,grad_norm: 0.765190085803387, iteration: 287996
loss: 1.0551092624664307,grad_norm: 0.9999993807491778, iteration: 287997
loss: 1.0213117599487305,grad_norm: 0.8203374865207806, iteration: 287998
loss: 0.9876427054405212,grad_norm: 0.9022644289753672, iteration: 287999
loss: 1.022761344909668,grad_norm: 0.8346037920944958, iteration: 288000
loss: 1.0613592863082886,grad_norm: 0.9356964914331547, iteration: 288001
loss: 1.027858853340149,grad_norm: 0.794478851845, iteration: 288002
loss: 0.9977115392684937,grad_norm: 0.9040042553065545, iteration: 288003
loss: 0.9715389013290405,grad_norm: 0.8506797348151286, iteration: 288004
loss: 0.9899013638496399,grad_norm: 0.8593285155907072, iteration: 288005
loss: 0.992041826248169,grad_norm: 0.9587638400530362, iteration: 288006
loss: 0.9858839511871338,grad_norm: 0.8861044420302868, iteration: 288007
loss: 0.9514700770378113,grad_norm: 0.7587237793667617, iteration: 288008
loss: 1.0031301975250244,grad_norm: 0.8389396760137987, iteration: 288009
loss: 0.9943265914916992,grad_norm: 0.916383210442848, iteration: 288010
loss: 1.0220677852630615,grad_norm: 0.9744415698710855, iteration: 288011
loss: 0.9817270636558533,grad_norm: 0.724797166880926, iteration: 288012
loss: 0.9970049858093262,grad_norm: 0.9448081609092581, iteration: 288013
loss: 1.0004096031188965,grad_norm: 0.7446225538468054, iteration: 288014
loss: 0.991938054561615,grad_norm: 0.9999990991388462, iteration: 288015
loss: 1.01616632938385,grad_norm: 0.7894059782589242, iteration: 288016
loss: 0.9965357780456543,grad_norm: 0.8987104765157692, iteration: 288017
loss: 0.9852389097213745,grad_norm: 0.8919497816179168, iteration: 288018
loss: 1.0123564004898071,grad_norm: 0.717300326885248, iteration: 288019
loss: 1.0132710933685303,grad_norm: 0.8998114267669598, iteration: 288020
loss: 0.9819565415382385,grad_norm: 0.9216789860269862, iteration: 288021
loss: 1.001848816871643,grad_norm: 0.8335934796184302, iteration: 288022
loss: 0.9760216474533081,grad_norm: 0.9509090542062595, iteration: 288023
loss: 1.0296443700790405,grad_norm: 0.8514184274649187, iteration: 288024
loss: 0.9939184188842773,grad_norm: 0.9133667288283962, iteration: 288025
loss: 0.9974954724311829,grad_norm: 0.8512851964423501, iteration: 288026
loss: 1.052268147468567,grad_norm: 0.9999994250478826, iteration: 288027
loss: 1.0289016962051392,grad_norm: 0.9063785561360734, iteration: 288028
loss: 0.9986612200737,grad_norm: 0.7241591470116318, iteration: 288029
loss: 1.0166329145431519,grad_norm: 0.9999991371108188, iteration: 288030
loss: 0.9635847210884094,grad_norm: 0.9999991510361163, iteration: 288031
loss: 1.0070796012878418,grad_norm: 0.9334073989283136, iteration: 288032
loss: 1.0323822498321533,grad_norm: 0.819937148795055, iteration: 288033
loss: 1.0122753381729126,grad_norm: 0.8356646616463103, iteration: 288034
loss: 1.0888984203338623,grad_norm: 0.9999996277830668, iteration: 288035
loss: 0.97965407371521,grad_norm: 0.9579847406874951, iteration: 288036
loss: 1.0548145771026611,grad_norm: 0.9999991157476255, iteration: 288037
loss: 1.091915249824524,grad_norm: 0.9999991694642489, iteration: 288038
loss: 0.9553750157356262,grad_norm: 0.9300959064676386, iteration: 288039
loss: 0.977072536945343,grad_norm: 0.9657732124239398, iteration: 288040
loss: 0.9875020384788513,grad_norm: 0.7193133190634073, iteration: 288041
loss: 1.014174222946167,grad_norm: 0.9480961015350742, iteration: 288042
loss: 1.0027801990509033,grad_norm: 0.9999990893921659, iteration: 288043
loss: 0.996410071849823,grad_norm: 0.875563360682494, iteration: 288044
loss: 1.04990816116333,grad_norm: 0.9999992131420435, iteration: 288045
loss: 0.974987804889679,grad_norm: 0.8344119397366753, iteration: 288046
loss: 1.071665644645691,grad_norm: 0.7921573563291743, iteration: 288047
loss: 1.002099633216858,grad_norm: 0.9610575172220975, iteration: 288048
loss: 1.0115671157836914,grad_norm: 0.7268485877809063, iteration: 288049
loss: 0.9929477572441101,grad_norm: 0.8026133651801455, iteration: 288050
loss: 0.9740267395973206,grad_norm: 0.9929904727568835, iteration: 288051
loss: 0.9966015219688416,grad_norm: 0.8916763798089679, iteration: 288052
loss: 0.9935809969902039,grad_norm: 0.9316730565200924, iteration: 288053
loss: 0.9908454418182373,grad_norm: 0.976490893725136, iteration: 288054
loss: 0.9890396595001221,grad_norm: 0.8116588537045523, iteration: 288055
loss: 0.983993649482727,grad_norm: 0.999999159532118, iteration: 288056
loss: 0.9757071137428284,grad_norm: 0.8349400628134843, iteration: 288057
loss: 0.9941186308860779,grad_norm: 0.7606602669789481, iteration: 288058
loss: 1.0058493614196777,grad_norm: 0.867715240801636, iteration: 288059
loss: 0.9558913111686707,grad_norm: 0.8967416649235017, iteration: 288060
loss: 1.070858120918274,grad_norm: 0.9999996999849061, iteration: 288061
loss: 0.9897980690002441,grad_norm: 0.9999993447835355, iteration: 288062
loss: 1.0340189933776855,grad_norm: 0.9999994008431348, iteration: 288063
loss: 1.061163067817688,grad_norm: 0.7557618075298219, iteration: 288064
loss: 0.9918125867843628,grad_norm: 0.8633749684377126, iteration: 288065
loss: 1.0046632289886475,grad_norm: 0.8323910458907728, iteration: 288066
loss: 0.9902253150939941,grad_norm: 0.8979382413397518, iteration: 288067
loss: 1.0028148889541626,grad_norm: 0.8146845212790885, iteration: 288068
loss: 1.000808596611023,grad_norm: 0.798408397294722, iteration: 288069
loss: 1.0235352516174316,grad_norm: 0.8288572540108042, iteration: 288070
loss: 0.9933900237083435,grad_norm: 0.9423881694272219, iteration: 288071
loss: 0.9742091298103333,grad_norm: 0.7927822595150225, iteration: 288072
loss: 1.0502605438232422,grad_norm: 0.9999993585550679, iteration: 288073
loss: 1.0277608633041382,grad_norm: 0.9999990995341216, iteration: 288074
loss: 1.0425963401794434,grad_norm: 0.9999988853550251, iteration: 288075
loss: 0.9955931901931763,grad_norm: 0.9442111330543188, iteration: 288076
loss: 0.9898833632469177,grad_norm: 0.8552214209085474, iteration: 288077
loss: 1.0233227014541626,grad_norm: 0.8405865522660481, iteration: 288078
loss: 0.9859271049499512,grad_norm: 0.7952942445135566, iteration: 288079
loss: 1.0103256702423096,grad_norm: 0.9999990633799454, iteration: 288080
loss: 0.9861925840377808,grad_norm: 0.796458505838745, iteration: 288081
loss: 0.9611255526542664,grad_norm: 0.8696303785671873, iteration: 288082
loss: 0.978075385093689,grad_norm: 0.9207281627081044, iteration: 288083
loss: 1.008909821510315,grad_norm: 0.9999991158939878, iteration: 288084
loss: 1.023474097251892,grad_norm: 0.8095475737235307, iteration: 288085
loss: 1.061000108718872,grad_norm: 0.8781657223288162, iteration: 288086
loss: 0.9581443071365356,grad_norm: 0.8496055327477999, iteration: 288087
loss: 1.0042052268981934,grad_norm: 0.8252502615472697, iteration: 288088
loss: 0.9702311754226685,grad_norm: 0.7896410672943989, iteration: 288089
loss: 1.07425057888031,grad_norm: 0.9999992226459778, iteration: 288090
loss: 1.0153543949127197,grad_norm: 0.8490971918300144, iteration: 288091
loss: 1.0006442070007324,grad_norm: 0.9055444025173633, iteration: 288092
loss: 1.0501697063446045,grad_norm: 0.99836921391679, iteration: 288093
loss: 0.9852836728096008,grad_norm: 0.6418026269778275, iteration: 288094
loss: 1.0345814228057861,grad_norm: 0.6974153082486265, iteration: 288095
loss: 1.048586130142212,grad_norm: 0.8434532640048181, iteration: 288096
loss: 0.9875110983848572,grad_norm: 0.7953380400900686, iteration: 288097
loss: 1.0368213653564453,grad_norm: 0.9999996417458479, iteration: 288098
loss: 0.9869179129600525,grad_norm: 0.867538097447554, iteration: 288099
loss: 1.028505563735962,grad_norm: 0.9015787005014512, iteration: 288100
loss: 1.0001932382583618,grad_norm: 0.9527929140301148, iteration: 288101
loss: 0.9776223301887512,grad_norm: 0.9195592568918564, iteration: 288102
loss: 0.9936703443527222,grad_norm: 0.8868367552428024, iteration: 288103
loss: 0.9841825366020203,grad_norm: 0.9159192734219428, iteration: 288104
loss: 0.9726532101631165,grad_norm: 0.7496527796499847, iteration: 288105
loss: 1.0849599838256836,grad_norm: 0.9999993841343321, iteration: 288106
loss: 0.9833604693412781,grad_norm: 0.9999989293927751, iteration: 288107
loss: 1.0141712427139282,grad_norm: 0.7277527133484758, iteration: 288108
loss: 1.0013105869293213,grad_norm: 0.7836981385056084, iteration: 288109
loss: 0.9691630601882935,grad_norm: 0.9986546334919971, iteration: 288110
loss: 1.0223755836486816,grad_norm: 0.9999989769838601, iteration: 288111
loss: 0.9881334900856018,grad_norm: 0.7725795240381703, iteration: 288112
loss: 1.0110876560211182,grad_norm: 0.9037340814932842, iteration: 288113
loss: 1.0302642583847046,grad_norm: 0.9230537874522879, iteration: 288114
loss: 0.9694730639457703,grad_norm: 0.798448446425284, iteration: 288115
loss: 1.0472933053970337,grad_norm: 0.9999996048395913, iteration: 288116
loss: 0.9603835940361023,grad_norm: 0.8251119038926591, iteration: 288117
loss: 1.0166654586791992,grad_norm: 0.8054641678843335, iteration: 288118
loss: 0.9916040301322937,grad_norm: 0.8452025177405531, iteration: 288119
loss: 1.0461207628250122,grad_norm: 0.7199471283743898, iteration: 288120
loss: 0.9800618886947632,grad_norm: 0.7970592038597158, iteration: 288121
loss: 1.002624750137329,grad_norm: 0.9999990013972859, iteration: 288122
loss: 0.9663577675819397,grad_norm: 0.7982205146538452, iteration: 288123
loss: 1.0143402814865112,grad_norm: 0.9403995814402845, iteration: 288124
loss: 0.9890704154968262,grad_norm: 0.8056482048922597, iteration: 288125
loss: 0.9949506521224976,grad_norm: 0.9667554231186509, iteration: 288126
loss: 1.0019314289093018,grad_norm: 0.9782967339502652, iteration: 288127
loss: 1.048353910446167,grad_norm: 0.8369470892547572, iteration: 288128
loss: 1.0128238201141357,grad_norm: 0.9661578361571569, iteration: 288129
loss: 0.9919212460517883,grad_norm: 0.8678813209873908, iteration: 288130
loss: 1.0730674266815186,grad_norm: 0.7430809797271085, iteration: 288131
loss: 0.9838926792144775,grad_norm: 0.9999994116810845, iteration: 288132
loss: 0.9970473051071167,grad_norm: 0.9999990682965246, iteration: 288133
loss: 1.0225937366485596,grad_norm: 0.9266340823795669, iteration: 288134
loss: 1.0083972215652466,grad_norm: 0.8074360603048274, iteration: 288135
loss: 1.0452903509140015,grad_norm: 0.9999998642240951, iteration: 288136
loss: 1.0024317502975464,grad_norm: 0.8735409663885462, iteration: 288137
loss: 0.9899004101753235,grad_norm: 0.7711313519765247, iteration: 288138
loss: 0.9778091907501221,grad_norm: 0.9999991585875339, iteration: 288139
loss: 0.9951688051223755,grad_norm: 0.8527966399941683, iteration: 288140
loss: 0.9687630534172058,grad_norm: 0.8975851086393432, iteration: 288141
loss: 0.9922325611114502,grad_norm: 0.9999991573515131, iteration: 288142
loss: 0.9990589022636414,grad_norm: 0.9287802078298847, iteration: 288143
loss: 1.0192179679870605,grad_norm: 0.6801794727563475, iteration: 288144
loss: 1.0116268396377563,grad_norm: 0.7173787150526612, iteration: 288145
loss: 0.99721759557724,grad_norm: 0.9999992743196393, iteration: 288146
loss: 0.975500226020813,grad_norm: 0.9945246207013803, iteration: 288147
loss: 0.9694507718086243,grad_norm: 0.94746399062524, iteration: 288148
loss: 0.9960777759552002,grad_norm: 0.9999992030990691, iteration: 288149
loss: 0.9801732897758484,grad_norm: 0.9711984058670432, iteration: 288150
loss: 1.0147395133972168,grad_norm: 0.8766680547284641, iteration: 288151
loss: 0.9620932936668396,grad_norm: 0.8307417859918367, iteration: 288152
loss: 0.9608825445175171,grad_norm: 0.7341043718279922, iteration: 288153
loss: 1.0432358980178833,grad_norm: 0.9999996987098577, iteration: 288154
loss: 1.0874230861663818,grad_norm: 0.9999995315288668, iteration: 288155
loss: 0.9809105396270752,grad_norm: 0.7061667603743703, iteration: 288156
loss: 1.0334866046905518,grad_norm: 0.7919249572106352, iteration: 288157
loss: 0.97037672996521,grad_norm: 0.8882954933894317, iteration: 288158
loss: 1.0107758045196533,grad_norm: 0.8989602540441033, iteration: 288159
loss: 1.0244419574737549,grad_norm: 0.9394966000761681, iteration: 288160
loss: 1.023938536643982,grad_norm: 0.8361164449672949, iteration: 288161
loss: 1.0382356643676758,grad_norm: 0.9999997592077758, iteration: 288162
loss: 0.9995551705360413,grad_norm: 0.7170965490904605, iteration: 288163
loss: 0.9694498777389526,grad_norm: 0.8166217976314907, iteration: 288164
loss: 1.0348116159439087,grad_norm: 0.8116753668383958, iteration: 288165
loss: 1.037983775138855,grad_norm: 0.9999991066662944, iteration: 288166
loss: 1.1008282899856567,grad_norm: 0.9999993098378265, iteration: 288167
loss: 0.9714779257774353,grad_norm: 0.9999992417335404, iteration: 288168
loss: 1.0158889293670654,grad_norm: 0.9475658042595053, iteration: 288169
loss: 1.0096161365509033,grad_norm: 0.8262922207561487, iteration: 288170
loss: 0.986061155796051,grad_norm: 0.7693051042627274, iteration: 288171
loss: 1.0112745761871338,grad_norm: 0.911634163026106, iteration: 288172
loss: 1.014025092124939,grad_norm: 0.9999991455073413, iteration: 288173
loss: 1.0072033405303955,grad_norm: 0.8465802690870964, iteration: 288174
loss: 0.9688175916671753,grad_norm: 0.9145072543769193, iteration: 288175
loss: 0.9936071634292603,grad_norm: 0.7824248471105506, iteration: 288176
loss: 0.9828973412513733,grad_norm: 0.725496705665382, iteration: 288177
loss: 0.9607340693473816,grad_norm: 0.8188131907395135, iteration: 288178
loss: 1.1247215270996094,grad_norm: 0.9999995770206369, iteration: 288179
loss: 1.0096150636672974,grad_norm: 0.9943814145775705, iteration: 288180
loss: 1.0075522661209106,grad_norm: 0.8474614311193871, iteration: 288181
loss: 1.0243594646453857,grad_norm: 0.7470901676597023, iteration: 288182
loss: 0.9915369153022766,grad_norm: 0.7153491030228218, iteration: 288183
loss: 1.0023884773254395,grad_norm: 0.7761601202612233, iteration: 288184
loss: 1.0086826086044312,grad_norm: 0.9999994918199531, iteration: 288185
loss: 1.052686095237732,grad_norm: 0.9999991344371189, iteration: 288186
loss: 0.978579580783844,grad_norm: 0.9999990162718374, iteration: 288187
loss: 0.9724791049957275,grad_norm: 0.9248369037318259, iteration: 288188
loss: 1.0108391046524048,grad_norm: 0.8876219163943524, iteration: 288189
loss: 0.9847444295883179,grad_norm: 0.7807988429154963, iteration: 288190
loss: 0.97901451587677,grad_norm: 0.9362527708544662, iteration: 288191
loss: 1.018126368522644,grad_norm: 0.99999905227808, iteration: 288192
loss: 0.9924209117889404,grad_norm: 0.7518835213451195, iteration: 288193
loss: 1.0109866857528687,grad_norm: 0.8174568341053537, iteration: 288194
loss: 0.9892790913581848,grad_norm: 0.8689858539834032, iteration: 288195
loss: 0.9997074007987976,grad_norm: 0.79316618831349, iteration: 288196
loss: 1.02576744556427,grad_norm: 0.9408355831904401, iteration: 288197
loss: 0.9985418319702148,grad_norm: 0.8808834559516298, iteration: 288198
loss: 1.0171213150024414,grad_norm: 0.8176505580761835, iteration: 288199
loss: 0.998653769493103,grad_norm: 0.8286644001415373, iteration: 288200
loss: 0.989640474319458,grad_norm: 0.8338761486224688, iteration: 288201
loss: 1.0440657138824463,grad_norm: 0.9999999288768758, iteration: 288202
loss: 0.952380359172821,grad_norm: 0.8962216969362342, iteration: 288203
loss: 1.0359135866165161,grad_norm: 0.9999992135575406, iteration: 288204
loss: 0.9583061337471008,grad_norm: 0.8240463242180996, iteration: 288205
loss: 0.9910179376602173,grad_norm: 0.7654619825068554, iteration: 288206
loss: 1.043017864227295,grad_norm: 0.8169612658753518, iteration: 288207
loss: 1.0106313228607178,grad_norm: 0.9550277051901609, iteration: 288208
loss: 0.9868130683898926,grad_norm: 0.999536322852719, iteration: 288209
loss: 0.9974876642227173,grad_norm: 0.787913580034315, iteration: 288210
loss: 1.0290627479553223,grad_norm: 0.7256645059434558, iteration: 288211
loss: 1.0097144842147827,grad_norm: 0.87438389011357, iteration: 288212
loss: 1.0047438144683838,grad_norm: 0.7920755392849904, iteration: 288213
loss: 0.9743388891220093,grad_norm: 0.8356515406141679, iteration: 288214
loss: 1.0192551612854004,grad_norm: 0.7676759706713925, iteration: 288215
loss: 1.0156710147857666,grad_norm: 0.8787133677206357, iteration: 288216
loss: 1.0067163705825806,grad_norm: 0.8285028759071703, iteration: 288217
loss: 1.0084365606307983,grad_norm: 0.8019032181221794, iteration: 288218
loss: 0.9899552464485168,grad_norm: 0.9044115166733443, iteration: 288219
loss: 0.9868939518928528,grad_norm: 0.8387978925464113, iteration: 288220
loss: 0.962510883808136,grad_norm: 0.9740168243729623, iteration: 288221
loss: 1.0248790979385376,grad_norm: 0.8014119633398051, iteration: 288222
loss: 1.0159893035888672,grad_norm: 0.8420851601338702, iteration: 288223
loss: 0.9879714250564575,grad_norm: 0.9678495126624508, iteration: 288224
loss: 0.9798324704170227,grad_norm: 0.9610869879892638, iteration: 288225
loss: 1.0148831605911255,grad_norm: 0.9999990980604726, iteration: 288226
loss: 1.0085371732711792,grad_norm: 0.8452354182054931, iteration: 288227
loss: 0.9942890405654907,grad_norm: 0.7286113272143052, iteration: 288228
loss: 1.0552695989608765,grad_norm: 0.7582124785997517, iteration: 288229
loss: 0.9929280877113342,grad_norm: 0.7762118664814532, iteration: 288230
loss: 1.0285569429397583,grad_norm: 0.9528280364890426, iteration: 288231
loss: 0.9619194865226746,grad_norm: 0.7922022146328008, iteration: 288232
loss: 1.0227395296096802,grad_norm: 0.9995723636181837, iteration: 288233
loss: 0.9991949200630188,grad_norm: 0.8079231051665081, iteration: 288234
loss: 0.9907858371734619,grad_norm: 0.7741761923757294, iteration: 288235
loss: 0.9566830992698669,grad_norm: 0.8755742671687154, iteration: 288236
loss: 1.0134894847869873,grad_norm: 0.8235875945907863, iteration: 288237
loss: 1.0207749605178833,grad_norm: 0.9999991811251577, iteration: 288238
loss: 1.0046461820602417,grad_norm: 0.9999992079137039, iteration: 288239
loss: 1.055909514427185,grad_norm: 0.8515619167002889, iteration: 288240
loss: 1.0163134336471558,grad_norm: 0.8988660963738504, iteration: 288241
loss: 1.0402989387512207,grad_norm: 0.8572895662764047, iteration: 288242
loss: 1.0292240381240845,grad_norm: 0.9999989701242764, iteration: 288243
loss: 1.038476586341858,grad_norm: 0.864648001917978, iteration: 288244
loss: 0.991843044757843,grad_norm: 0.9627727172157707, iteration: 288245
loss: 0.9825960993766785,grad_norm: 0.8422810123371048, iteration: 288246
loss: 0.9845594167709351,grad_norm: 0.9999990739747855, iteration: 288247
loss: 0.9936615824699402,grad_norm: 0.8227805196446489, iteration: 288248
loss: 0.9630029201507568,grad_norm: 0.8651533413413771, iteration: 288249
loss: 1.0286937952041626,grad_norm: 0.9999989860773563, iteration: 288250
loss: 1.0009393692016602,grad_norm: 0.845968485972074, iteration: 288251
loss: 0.966759204864502,grad_norm: 0.871340307249188, iteration: 288252
loss: 1.0104973316192627,grad_norm: 0.9401455001447451, iteration: 288253
loss: 1.0288602113723755,grad_norm: 0.9553618680797568, iteration: 288254
loss: 0.9838619232177734,grad_norm: 0.9901596508074119, iteration: 288255
loss: 1.0482778549194336,grad_norm: 0.8244862697514125, iteration: 288256
loss: 0.9636117219924927,grad_norm: 0.9150625536169192, iteration: 288257
loss: 0.9881435632705688,grad_norm: 0.9579933580141811, iteration: 288258
loss: 1.028554081916809,grad_norm: 0.8909355499733421, iteration: 288259
loss: 0.9857925176620483,grad_norm: 0.8399150566531443, iteration: 288260
loss: 1.0118961334228516,grad_norm: 0.7818356434892204, iteration: 288261
loss: 0.9796115159988403,grad_norm: 0.9353591525202671, iteration: 288262
loss: 0.9845475554466248,grad_norm: 1.0000000104406197, iteration: 288263
loss: 0.9833564162254333,grad_norm: 0.9051986158955608, iteration: 288264
loss: 0.9802649617195129,grad_norm: 0.7718482641384442, iteration: 288265
loss: 0.9819844961166382,grad_norm: 0.8841118410330674, iteration: 288266
loss: 0.9947428107261658,grad_norm: 0.8323861819359016, iteration: 288267
loss: 1.0428218841552734,grad_norm: 0.92906410143285, iteration: 288268
loss: 0.9704989790916443,grad_norm: 0.9117550087723505, iteration: 288269
loss: 1.000512719154358,grad_norm: 0.7419127276498293, iteration: 288270
loss: 1.0141767263412476,grad_norm: 0.9043769767725511, iteration: 288271
loss: 1.0104458332061768,grad_norm: 0.6994662452885989, iteration: 288272
loss: 1.0115023851394653,grad_norm: 0.968867731415027, iteration: 288273
loss: 0.9976065158843994,grad_norm: 0.8439093205737914, iteration: 288274
loss: 1.0293067693710327,grad_norm: 0.9055257626669484, iteration: 288275
loss: 0.9853891134262085,grad_norm: 0.9026260200126242, iteration: 288276
loss: 1.071946144104004,grad_norm: 0.7652396326879086, iteration: 288277
loss: 0.9816259741783142,grad_norm: 0.8272621712989537, iteration: 288278
loss: 1.038853645324707,grad_norm: 0.9999995445093752, iteration: 288279
loss: 1.0021971464157104,grad_norm: 0.9796311454586243, iteration: 288280
loss: 1.001619577407837,grad_norm: 0.7803195793658328, iteration: 288281
loss: 0.9884389042854309,grad_norm: 0.8734789912787031, iteration: 288282
loss: 0.9908518195152283,grad_norm: 0.7809181008488667, iteration: 288283
loss: 1.0088794231414795,grad_norm: 0.8342055501157349, iteration: 288284
loss: 1.0265696048736572,grad_norm: 0.8570908558636312, iteration: 288285
loss: 1.0329171419143677,grad_norm: 0.9999996726040334, iteration: 288286
loss: 0.9871684312820435,grad_norm: 0.7448726032396372, iteration: 288287
loss: 0.9907805323600769,grad_norm: 0.999998998394586, iteration: 288288
loss: 0.9905241131782532,grad_norm: 0.8219126881970984, iteration: 288289
loss: 1.0273882150650024,grad_norm: 0.8842533873912651, iteration: 288290
loss: 1.0038155317306519,grad_norm: 0.9582199070329271, iteration: 288291
loss: 1.0194181203842163,grad_norm: 0.7796823445849687, iteration: 288292
loss: 0.992898166179657,grad_norm: 0.9999990604780826, iteration: 288293
loss: 0.9736884236335754,grad_norm: 0.8313402792278882, iteration: 288294
loss: 1.1046630144119263,grad_norm: 0.881066346937323, iteration: 288295
loss: 0.9933111667633057,grad_norm: 0.7932059174296823, iteration: 288296
loss: 0.9806575775146484,grad_norm: 0.9052687399294329, iteration: 288297
loss: 1.036781668663025,grad_norm: 0.8651325305203026, iteration: 288298
loss: 1.0250262022018433,grad_norm: 0.8536563665221985, iteration: 288299
loss: 1.0367090702056885,grad_norm: 0.999999100824899, iteration: 288300
loss: 1.0625174045562744,grad_norm: 0.8797350420906005, iteration: 288301
loss: 0.9904012680053711,grad_norm: 0.8610051094344895, iteration: 288302
loss: 1.0255128145217896,grad_norm: 0.9019669423305943, iteration: 288303
loss: 1.003861665725708,grad_norm: 0.8824305497139072, iteration: 288304
loss: 0.9921246767044067,grad_norm: 0.9013226761158865, iteration: 288305
loss: 1.0307482481002808,grad_norm: 0.9001585185477109, iteration: 288306
loss: 0.984001874923706,grad_norm: 0.7329723068508796, iteration: 288307
loss: 1.0408834218978882,grad_norm: 0.8114838783278892, iteration: 288308
loss: 1.00849187374115,grad_norm: 0.7974614691409537, iteration: 288309
loss: 1.01578688621521,grad_norm: 0.9999991951806898, iteration: 288310
loss: 1.0150306224822998,grad_norm: 0.9815100862789174, iteration: 288311
loss: 0.9787890911102295,grad_norm: 0.8479157794258744, iteration: 288312
loss: 1.0416706800460815,grad_norm: 0.873977910276756, iteration: 288313
loss: 0.9863251447677612,grad_norm: 0.7362618999522332, iteration: 288314
loss: 0.9821463227272034,grad_norm: 0.8131324018779842, iteration: 288315
loss: 1.0414303541183472,grad_norm: 0.9999990501465247, iteration: 288316
loss: 1.009554386138916,grad_norm: 0.9999995933358314, iteration: 288317
loss: 1.0270017385482788,grad_norm: 0.89611028728345, iteration: 288318
loss: 0.9920053482055664,grad_norm: 0.8255273859136619, iteration: 288319
loss: 0.986950159072876,grad_norm: 0.9153490828585579, iteration: 288320
loss: 1.0067684650421143,grad_norm: 0.9714577896992542, iteration: 288321
loss: 0.994575023651123,grad_norm: 0.7330226353158686, iteration: 288322
loss: 1.009398341178894,grad_norm: 0.9767226533753915, iteration: 288323
loss: 0.9884166717529297,grad_norm: 0.9414690137096487, iteration: 288324
loss: 1.0034239292144775,grad_norm: 0.8004414917246531, iteration: 288325
loss: 0.995871365070343,grad_norm: 0.9344357748870291, iteration: 288326
loss: 1.0220671892166138,grad_norm: 0.9410117521352442, iteration: 288327
loss: 0.9961827397346497,grad_norm: 0.836812800027219, iteration: 288328
loss: 0.9900945425033569,grad_norm: 0.81001108861603, iteration: 288329
loss: 1.0339106321334839,grad_norm: 0.7377709985734507, iteration: 288330
loss: 1.0231777429580688,grad_norm: 0.9047775096436738, iteration: 288331
loss: 1.0248700380325317,grad_norm: 0.9261814741301726, iteration: 288332
loss: 1.0855027437210083,grad_norm: 0.9205260564999983, iteration: 288333
loss: 0.9721883535385132,grad_norm: 0.828416437389404, iteration: 288334
loss: 0.9769677519798279,grad_norm: 0.7580485312928087, iteration: 288335
loss: 1.0073966979980469,grad_norm: 0.7666617346539774, iteration: 288336
loss: 0.9938289523124695,grad_norm: 0.9999991036669471, iteration: 288337
loss: 1.0145398378372192,grad_norm: 0.9999992353784161, iteration: 288338
loss: 0.9849424362182617,grad_norm: 0.9999990750818171, iteration: 288339
loss: 1.0372679233551025,grad_norm: 0.8285044663842556, iteration: 288340
loss: 0.9823211431503296,grad_norm: 0.9496147967106564, iteration: 288341
loss: 1.0184926986694336,grad_norm: 0.8214739306215627, iteration: 288342
loss: 0.9514214992523193,grad_norm: 0.7595744188236628, iteration: 288343
loss: 1.0003581047058105,grad_norm: 0.7944723867702075, iteration: 288344
loss: 1.0141104459762573,grad_norm: 0.8621062987783004, iteration: 288345
loss: 1.0146417617797852,grad_norm: 0.9870676215888023, iteration: 288346
loss: 1.0186587572097778,grad_norm: 0.9999990185184331, iteration: 288347
loss: 0.9915966391563416,grad_norm: 0.7911526776868966, iteration: 288348
loss: 0.9773982763290405,grad_norm: 0.9611793430867626, iteration: 288349
loss: 0.9937905073165894,grad_norm: 0.8497771866043359, iteration: 288350
loss: 0.9941204786300659,grad_norm: 0.922326715003748, iteration: 288351
loss: 0.9905927181243896,grad_norm: 0.9049594129174249, iteration: 288352
loss: 0.9790017008781433,grad_norm: 0.9628431788993754, iteration: 288353
loss: 1.0042638778686523,grad_norm: 0.8599979551663068, iteration: 288354
loss: 1.0418438911437988,grad_norm: 0.9943694120468474, iteration: 288355
loss: 0.9750452637672424,grad_norm: 0.9203537665566648, iteration: 288356
loss: 1.0186071395874023,grad_norm: 0.8541013612595746, iteration: 288357
loss: 1.0181113481521606,grad_norm: 0.8501633653983753, iteration: 288358
loss: 0.9655378460884094,grad_norm: 0.9999992101630792, iteration: 288359
loss: 0.9791961312294006,grad_norm: 0.8017379948677629, iteration: 288360
loss: 0.9741645455360413,grad_norm: 0.7957531616202781, iteration: 288361
loss: 0.9829866290092468,grad_norm: 0.9999989902824669, iteration: 288362
loss: 1.0440305471420288,grad_norm: 0.9999991433252735, iteration: 288363
loss: 1.0102314949035645,grad_norm: 0.7043539785418013, iteration: 288364
loss: 0.9893149137496948,grad_norm: 0.9999993236873085, iteration: 288365
loss: 0.9900186061859131,grad_norm: 0.8155817145827258, iteration: 288366
loss: 0.9985793828964233,grad_norm: 0.9999990074850402, iteration: 288367
loss: 0.9820978045463562,grad_norm: 0.9650308962457946, iteration: 288368
loss: 0.9627128839492798,grad_norm: 0.8285388660954125, iteration: 288369
loss: 0.987769603729248,grad_norm: 0.8139816638232524, iteration: 288370
loss: 1.0329623222351074,grad_norm: 0.7140937537016334, iteration: 288371
loss: 0.9817390441894531,grad_norm: 0.7847060928884317, iteration: 288372
loss: 1.0907130241394043,grad_norm: 0.9999996861089621, iteration: 288373
loss: 0.9661474823951721,grad_norm: 0.9098126455084667, iteration: 288374
loss: 0.9917759299278259,grad_norm: 0.8108862061327881, iteration: 288375
loss: 1.0191630125045776,grad_norm: 0.9308628508413911, iteration: 288376
loss: 0.9880490899085999,grad_norm: 0.9717885181481507, iteration: 288377
loss: 0.979789674282074,grad_norm: 0.8288497361127438, iteration: 288378
loss: 1.002186894416809,grad_norm: 0.8221485232144835, iteration: 288379
loss: 0.9707911014556885,grad_norm: 0.9098606010557043, iteration: 288380
loss: 1.0210483074188232,grad_norm: 0.999999139746093, iteration: 288381
loss: 0.9144421815872192,grad_norm: 0.9209597078184527, iteration: 288382
loss: 0.9922829270362854,grad_norm: 0.9783244627154953, iteration: 288383
loss: 1.1784828901290894,grad_norm: 0.9999999461914298, iteration: 288384
loss: 1.0110619068145752,grad_norm: 0.9356260909626245, iteration: 288385
loss: 0.9859604835510254,grad_norm: 0.808146714044232, iteration: 288386
loss: 0.9695854783058167,grad_norm: 0.8796348774097608, iteration: 288387
loss: 1.0003966093063354,grad_norm: 0.8630868622333442, iteration: 288388
loss: 1.029287338256836,grad_norm: 0.9999990012801522, iteration: 288389
loss: 1.0055370330810547,grad_norm: 0.999999051062881, iteration: 288390
loss: 0.9975951910018921,grad_norm: 0.7314184641469392, iteration: 288391
loss: 1.019127607345581,grad_norm: 0.83948748871618, iteration: 288392
loss: 0.9799275994300842,grad_norm: 0.8619421283784938, iteration: 288393
loss: 0.9761964678764343,grad_norm: 0.9194874376266525, iteration: 288394
loss: 0.9983680844306946,grad_norm: 0.8180492488758949, iteration: 288395
loss: 1.0559251308441162,grad_norm: 0.9999998356094112, iteration: 288396
loss: 1.0242462158203125,grad_norm: 0.8612362530924041, iteration: 288397
loss: 0.9808401465415955,grad_norm: 0.9999996676595252, iteration: 288398
loss: 1.005614161491394,grad_norm: 0.8443309349186792, iteration: 288399
loss: 1.0244569778442383,grad_norm: 0.9983716321532433, iteration: 288400
loss: 0.9744442105293274,grad_norm: 0.8539831918036708, iteration: 288401
loss: 0.9976365566253662,grad_norm: 0.816529178743782, iteration: 288402
loss: 0.9974290728569031,grad_norm: 0.9999990424322014, iteration: 288403
loss: 0.9893854260444641,grad_norm: 0.8159207446129786, iteration: 288404
loss: 0.9985263347625732,grad_norm: 0.7756531553982037, iteration: 288405
loss: 1.0106472969055176,grad_norm: 0.8245844029973957, iteration: 288406
loss: 0.9753745198249817,grad_norm: 0.9999990707940197, iteration: 288407
loss: 0.9902800917625427,grad_norm: 0.6626203274704117, iteration: 288408
loss: 0.9901083707809448,grad_norm: 0.8144281238861695, iteration: 288409
loss: 0.9893618226051331,grad_norm: 0.9556837605823071, iteration: 288410
loss: 1.0667922496795654,grad_norm: 0.9999996759475112, iteration: 288411
loss: 0.9576995372772217,grad_norm: 0.9020036416579543, iteration: 288412
loss: 0.9986492395401001,grad_norm: 0.8715274817341898, iteration: 288413
loss: 0.9872753024101257,grad_norm: 0.7699154442095045, iteration: 288414
loss: 0.9909576773643494,grad_norm: 0.7578026938785988, iteration: 288415
loss: 1.0092806816101074,grad_norm: 0.8865241513639404, iteration: 288416
loss: 1.0245071649551392,grad_norm: 0.7732242319770849, iteration: 288417
loss: 1.0318890810012817,grad_norm: 0.9999990463466678, iteration: 288418
loss: 1.0237935781478882,grad_norm: 0.8884106920699594, iteration: 288419
loss: 0.9510940313339233,grad_norm: 0.8490976821442142, iteration: 288420
loss: 0.9976188540458679,grad_norm: 0.9391320978262285, iteration: 288421
loss: 0.9934936165809631,grad_norm: 0.9218814428001838, iteration: 288422
loss: 1.0494465827941895,grad_norm: 0.9999991867759513, iteration: 288423
loss: 0.9867746829986572,grad_norm: 0.9041163193612473, iteration: 288424
loss: 1.0169798135757446,grad_norm: 0.7962414542190387, iteration: 288425
loss: 0.9666861891746521,grad_norm: 0.8837098978287988, iteration: 288426
loss: 1.0111223459243774,grad_norm: 0.8283061480007805, iteration: 288427
loss: 0.933253288269043,grad_norm: 0.8937178996730482, iteration: 288428
loss: 0.9717150330543518,grad_norm: 0.8263075582901735, iteration: 288429
loss: 0.9870834946632385,grad_norm: 0.8094211669360866, iteration: 288430
loss: 1.0206401348114014,grad_norm: 0.9999993569323516, iteration: 288431
loss: 0.9854336977005005,grad_norm: 0.8983684120939246, iteration: 288432
loss: 1.0530741214752197,grad_norm: 0.9999998499755521, iteration: 288433
loss: 1.0272213220596313,grad_norm: 0.8281214402531195, iteration: 288434
loss: 0.9687159061431885,grad_norm: 0.8601198886547063, iteration: 288435
loss: 1.0015150308609009,grad_norm: 0.7892781114224969, iteration: 288436
loss: 1.0419361591339111,grad_norm: 0.9999991628637233, iteration: 288437
loss: 1.0561996698379517,grad_norm: 0.9999993633388904, iteration: 288438
loss: 1.052381157875061,grad_norm: 0.8508075728911404, iteration: 288439
loss: 0.9971789121627808,grad_norm: 0.9470532083510526, iteration: 288440
loss: 1.0435031652450562,grad_norm: 0.9999993785029299, iteration: 288441
loss: 0.9975277781486511,grad_norm: 0.7842575048217331, iteration: 288442
loss: 1.0387043952941895,grad_norm: 0.8942905147643431, iteration: 288443
loss: 1.0139695405960083,grad_norm: 0.999999883341705, iteration: 288444
loss: 0.9811664819717407,grad_norm: 0.9089537849295918, iteration: 288445
loss: 1.0138320922851562,grad_norm: 0.7771216387319649, iteration: 288446
loss: 1.0107911825180054,grad_norm: 0.9679936382424803, iteration: 288447
loss: 1.0053741931915283,grad_norm: 0.999999046046943, iteration: 288448
loss: 1.0052071809768677,grad_norm: 0.9999990818909295, iteration: 288449
loss: 0.9568769931793213,grad_norm: 0.8736721334015158, iteration: 288450
loss: 0.9927359223365784,grad_norm: 0.8382447263686005, iteration: 288451
loss: 1.0202934741973877,grad_norm: 0.8270201386009248, iteration: 288452
loss: 1.0710945129394531,grad_norm: 0.9730587056946948, iteration: 288453
loss: 0.9609136581420898,grad_norm: 0.7909562634813759, iteration: 288454
loss: 0.9547817707061768,grad_norm: 0.8403534490061251, iteration: 288455
loss: 1.1999642848968506,grad_norm: 0.9999998110304061, iteration: 288456
loss: 0.978443443775177,grad_norm: 0.8487448831968714, iteration: 288457
loss: 1.0131349563598633,grad_norm: 0.9506047238568824, iteration: 288458
loss: 1.037769079208374,grad_norm: 0.8618531663351822, iteration: 288459
loss: 1.000675916671753,grad_norm: 0.826454797745508, iteration: 288460
loss: 1.0386208295822144,grad_norm: 0.9999993176557215, iteration: 288461
loss: 1.118535041809082,grad_norm: 0.9999998558187793, iteration: 288462
loss: 1.0324987173080444,grad_norm: 0.9999991476702621, iteration: 288463
loss: 1.0333880186080933,grad_norm: 0.8418034664598137, iteration: 288464
loss: 0.9931909441947937,grad_norm: 0.8322647558493484, iteration: 288465
loss: 0.9603611826896667,grad_norm: 0.928325947230152, iteration: 288466
loss: 1.0702159404754639,grad_norm: 0.8524522318010221, iteration: 288467
loss: 0.9733283519744873,grad_norm: 0.9192039956077724, iteration: 288468
loss: 0.9966256022453308,grad_norm: 0.8905599754460908, iteration: 288469
loss: 1.0049160718917847,grad_norm: 0.8308473551102498, iteration: 288470
loss: 0.9920631647109985,grad_norm: 0.9354820432434244, iteration: 288471
loss: 0.9700000286102295,grad_norm: 0.8289790739270049, iteration: 288472
loss: 1.0306211709976196,grad_norm: 0.8641685409268587, iteration: 288473
loss: 1.0121912956237793,grad_norm: 0.8483177362385326, iteration: 288474
loss: 1.014014482498169,grad_norm: 0.8301324905411905, iteration: 288475
loss: 1.0065110921859741,grad_norm: 0.9369793091081443, iteration: 288476
loss: 0.9897421002388,grad_norm: 0.9999999756894039, iteration: 288477
loss: 1.0074950456619263,grad_norm: 0.7660115448605045, iteration: 288478
loss: 0.9721206426620483,grad_norm: 0.9541643645744088, iteration: 288479
loss: 0.9783200025558472,grad_norm: 0.8408114831317294, iteration: 288480
loss: 1.028917670249939,grad_norm: 0.7715027973580371, iteration: 288481
loss: 1.057878017425537,grad_norm: 0.8797832143905581, iteration: 288482
loss: 1.0042786598205566,grad_norm: 0.9308115287432772, iteration: 288483
loss: 0.9950357675552368,grad_norm: 0.8332197955121303, iteration: 288484
loss: 1.0272504091262817,grad_norm: 0.9487853234098783, iteration: 288485
loss: 0.9483075737953186,grad_norm: 0.8548833896141717, iteration: 288486
loss: 1.006934404373169,grad_norm: 0.8713373795294482, iteration: 288487
loss: 0.9624601602554321,grad_norm: 0.7989437476900402, iteration: 288488
loss: 0.9817401766777039,grad_norm: 0.7892059106659495, iteration: 288489
loss: 0.9971625208854675,grad_norm: 0.7361874653405188, iteration: 288490
loss: 0.9964273571968079,grad_norm: 0.9999990941435706, iteration: 288491
loss: 1.003778338432312,grad_norm: 0.9459606305063648, iteration: 288492
loss: 1.0041098594665527,grad_norm: 0.9999990150748673, iteration: 288493
loss: 0.9967869520187378,grad_norm: 0.7603084598474182, iteration: 288494
loss: 0.9903843402862549,grad_norm: 0.8755865443569677, iteration: 288495
loss: 1.0143754482269287,grad_norm: 0.8390578552370187, iteration: 288496
loss: 0.9854265451431274,grad_norm: 0.8021914864912691, iteration: 288497
loss: 1.015977144241333,grad_norm: 0.7351562388430615, iteration: 288498
loss: 1.014151692390442,grad_norm: 0.8177675746391384, iteration: 288499
loss: 0.9762958884239197,grad_norm: 0.8507774127777423, iteration: 288500
loss: 1.002322793006897,grad_norm: 0.7275323174801402, iteration: 288501
loss: 0.9827484488487244,grad_norm: 0.871691014772318, iteration: 288502
loss: 0.9619640111923218,grad_norm: 0.7910600338516215, iteration: 288503
loss: 1.0057222843170166,grad_norm: 0.9999990211591335, iteration: 288504
loss: 0.9865020513534546,grad_norm: 0.8068003915074154, iteration: 288505
loss: 0.9836403131484985,grad_norm: 0.8478137285897027, iteration: 288506
loss: 0.9839417934417725,grad_norm: 0.8429039702759571, iteration: 288507
loss: 0.9822813272476196,grad_norm: 0.8067346977970098, iteration: 288508
loss: 1.0221399068832397,grad_norm: 0.9999993714308593, iteration: 288509
loss: 1.0561281442642212,grad_norm: 0.9999991231033015, iteration: 288510
loss: 1.1049798727035522,grad_norm: 0.9653148829482646, iteration: 288511
loss: 0.9646145105361938,grad_norm: 0.841993408888151, iteration: 288512
loss: 0.9783540368080139,grad_norm: 0.7103465303177994, iteration: 288513
loss: 0.9981459379196167,grad_norm: 0.9999991304717031, iteration: 288514
loss: 1.0402265787124634,grad_norm: 0.8828268903999045, iteration: 288515
loss: 1.0217887163162231,grad_norm: 0.8627608230681151, iteration: 288516
loss: 1.0367355346679688,grad_norm: 0.8736032385162836, iteration: 288517
loss: 0.9887528419494629,grad_norm: 0.8501652527929182, iteration: 288518
loss: 0.9754814505577087,grad_norm: 0.965171861364819, iteration: 288519
loss: 1.131272315979004,grad_norm: 0.9999998851902995, iteration: 288520
loss: 0.9875853657722473,grad_norm: 0.8915467049406448, iteration: 288521
loss: 0.9914093017578125,grad_norm: 0.8136383189766153, iteration: 288522
loss: 1.0129449367523193,grad_norm: 0.8138507522752287, iteration: 288523
loss: 1.089990496635437,grad_norm: 0.9999992852717013, iteration: 288524
loss: 0.9964213967323303,grad_norm: 0.8519900374422164, iteration: 288525
loss: 0.9889078736305237,grad_norm: 0.858485876957859, iteration: 288526
loss: 1.0104504823684692,grad_norm: 0.9284580258311875, iteration: 288527
loss: 0.9868882298469543,grad_norm: 0.95582639848315, iteration: 288528
loss: 1.0262577533721924,grad_norm: 0.8861993599409459, iteration: 288529
loss: 0.9573386311531067,grad_norm: 0.8095242037237389, iteration: 288530
loss: 1.0133863687515259,grad_norm: 0.8078844952694679, iteration: 288531
loss: 1.0136630535125732,grad_norm: 0.9999991674176208, iteration: 288532
loss: 1.0601778030395508,grad_norm: 0.9999992125627635, iteration: 288533
loss: 0.9945703744888306,grad_norm: 0.9466155216982629, iteration: 288534
loss: 0.9882118701934814,grad_norm: 0.9439179458455618, iteration: 288535
loss: 0.9836404323577881,grad_norm: 0.8545210215551239, iteration: 288536
loss: 1.0819453001022339,grad_norm: 0.7086935670970672, iteration: 288537
loss: 0.968733012676239,grad_norm: 0.8792412159407799, iteration: 288538
loss: 1.0219213962554932,grad_norm: 0.9999990412409153, iteration: 288539
loss: 0.9912014007568359,grad_norm: 0.9574505146473283, iteration: 288540
loss: 0.9933527112007141,grad_norm: 0.999999857710386, iteration: 288541
loss: 1.002300500869751,grad_norm: 0.8609941602305886, iteration: 288542
loss: 0.9939936399459839,grad_norm: 0.940023485856306, iteration: 288543
loss: 1.01137375831604,grad_norm: 0.9982018501764617, iteration: 288544
loss: 0.9996768832206726,grad_norm: 0.9999990296160177, iteration: 288545
loss: 0.961536705493927,grad_norm: 0.7807523952671596, iteration: 288546
loss: 1.0264949798583984,grad_norm: 0.8842468863279254, iteration: 288547
loss: 1.0074760913848877,grad_norm: 0.7749545646631925, iteration: 288548
loss: 0.9724095463752747,grad_norm: 0.9974153721712958, iteration: 288549
loss: 1.1817705631256104,grad_norm: 0.9289230243755742, iteration: 288550
loss: 0.9934219121932983,grad_norm: 0.7956516049252298, iteration: 288551
loss: 1.0583369731903076,grad_norm: 0.9266835514295912, iteration: 288552
loss: 1.0181478261947632,grad_norm: 0.9557707850144281, iteration: 288553
loss: 1.0225011110305786,grad_norm: 0.9999991121006707, iteration: 288554
loss: 1.0278793573379517,grad_norm: 0.9999991413241814, iteration: 288555
loss: 0.9599267840385437,grad_norm: 0.9122358551472449, iteration: 288556
loss: 1.0041850805282593,grad_norm: 0.9999991132798469, iteration: 288557
loss: 0.9888331294059753,grad_norm: 0.7952268372305205, iteration: 288558
loss: 1.0511987209320068,grad_norm: 0.9999993757826976, iteration: 288559
loss: 1.0189425945281982,grad_norm: 0.8506460127206583, iteration: 288560
loss: 1.0463716983795166,grad_norm: 0.9703176748744803, iteration: 288561
loss: 0.9904155731201172,grad_norm: 0.7688752376231064, iteration: 288562
loss: 1.0121902227401733,grad_norm: 0.7748423330277747, iteration: 288563
loss: 1.0335873365402222,grad_norm: 0.999999173835147, iteration: 288564
loss: 1.0006452798843384,grad_norm: 0.7328089590711587, iteration: 288565
loss: 1.0143427848815918,grad_norm: 0.8633967946400376, iteration: 288566
loss: 1.0115777254104614,grad_norm: 0.7688515869201886, iteration: 288567
loss: 1.0135471820831299,grad_norm: 0.9999991279124538, iteration: 288568
loss: 0.9916979074478149,grad_norm: 0.7953440487273103, iteration: 288569
loss: 1.030129313468933,grad_norm: 0.8406483030142745, iteration: 288570
loss: 0.9697283506393433,grad_norm: 0.9999996115062944, iteration: 288571
loss: 1.0051007270812988,grad_norm: 0.7432161650178866, iteration: 288572
loss: 0.9903826713562012,grad_norm: 0.8333759333521433, iteration: 288573
loss: 0.9784541726112366,grad_norm: 0.9769211657051335, iteration: 288574
loss: 1.0483258962631226,grad_norm: 0.9999989420299348, iteration: 288575
loss: 1.0089313983917236,grad_norm: 0.7709512736157146, iteration: 288576
loss: 0.9998860359191895,grad_norm: 0.9116747541347558, iteration: 288577
loss: 0.9904115200042725,grad_norm: 0.7867370199447621, iteration: 288578
loss: 1.0346790552139282,grad_norm: 0.9999998885639927, iteration: 288579
loss: 1.0014562606811523,grad_norm: 0.8469433372198001, iteration: 288580
loss: 1.0218589305877686,grad_norm: 0.9999992707484213, iteration: 288581
loss: 1.0119075775146484,grad_norm: 0.792126112791048, iteration: 288582
loss: 1.045607089996338,grad_norm: 0.9999994968711957, iteration: 288583
loss: 1.053683876991272,grad_norm: 0.8956612539514405, iteration: 288584
loss: 0.9825627207756042,grad_norm: 0.7647370172700589, iteration: 288585
loss: 0.9949800372123718,grad_norm: 0.9090640521473617, iteration: 288586
loss: 0.9910925626754761,grad_norm: 0.7930289056163253, iteration: 288587
loss: 1.113580346107483,grad_norm: 0.9999994040966489, iteration: 288588
loss: 1.008481502532959,grad_norm: 0.7875896078245614, iteration: 288589
loss: 1.0159372091293335,grad_norm: 0.880057032093601, iteration: 288590
loss: 0.9940161108970642,grad_norm: 0.883470190349445, iteration: 288591
loss: 1.0724471807479858,grad_norm: 0.9999999242088481, iteration: 288592
loss: 1.003867506980896,grad_norm: 0.8413980240144832, iteration: 288593
loss: 0.9924803376197815,grad_norm: 0.8021021847016436, iteration: 288594
loss: 1.0042507648468018,grad_norm: 0.8081590724466048, iteration: 288595
loss: 1.1305146217346191,grad_norm: 1.0000000417417199, iteration: 288596
loss: 1.0108888149261475,grad_norm: 0.795491873199723, iteration: 288597
loss: 0.9992402195930481,grad_norm: 0.82393651093308, iteration: 288598
loss: 1.0257421731948853,grad_norm: 0.999999104525184, iteration: 288599
loss: 1.0342950820922852,grad_norm: 0.9999998802486878, iteration: 288600
loss: 1.0303391218185425,grad_norm: 0.8803394449281585, iteration: 288601
loss: 0.963600754737854,grad_norm: 0.8884176073180869, iteration: 288602
loss: 1.020881175994873,grad_norm: 0.8266765876238988, iteration: 288603
loss: 0.9727755188941956,grad_norm: 0.889043911301525, iteration: 288604
loss: 1.0196861028671265,grad_norm: 0.9999992181242336, iteration: 288605
loss: 0.9898483753204346,grad_norm: 0.7565973934311345, iteration: 288606
loss: 1.0037305355072021,grad_norm: 0.8340868481056042, iteration: 288607
loss: 1.0155987739562988,grad_norm: 0.9219911781511476, iteration: 288608
loss: 1.059511661529541,grad_norm: 0.8320136694158305, iteration: 288609
loss: 0.9919350147247314,grad_norm: 0.9999992812583247, iteration: 288610
loss: 1.0061899423599243,grad_norm: 0.963461530526863, iteration: 288611
loss: 1.0294625759124756,grad_norm: 0.9035526865677806, iteration: 288612
loss: 1.0156488418579102,grad_norm: 0.9999990550736518, iteration: 288613
loss: 0.9891372919082642,grad_norm: 0.8120379540165561, iteration: 288614
loss: 1.0207240581512451,grad_norm: 0.9999990871657232, iteration: 288615
loss: 1.0204912424087524,grad_norm: 0.7150514888986685, iteration: 288616
loss: 0.9878901839256287,grad_norm: 0.8017671671625002, iteration: 288617
loss: 1.0203720331192017,grad_norm: 0.7375434392590356, iteration: 288618
loss: 1.025890827178955,grad_norm: 0.7967718190238388, iteration: 288619
loss: 1.0149346590042114,grad_norm: 0.858628459488343, iteration: 288620
loss: 1.0090620517730713,grad_norm: 0.9999994357900718, iteration: 288621
loss: 1.0262573957443237,grad_norm: 0.9999995282787703, iteration: 288622
loss: 1.0075348615646362,grad_norm: 0.8531102180097053, iteration: 288623
loss: 0.963558554649353,grad_norm: 0.9999989189284223, iteration: 288624
loss: 1.0198158025741577,grad_norm: 0.7050395871500235, iteration: 288625
loss: 0.9737534523010254,grad_norm: 0.9847728024841512, iteration: 288626
loss: 1.0248134136199951,grad_norm: 0.9343860652856731, iteration: 288627
loss: 1.0691324472427368,grad_norm: 0.9999994575888544, iteration: 288628
loss: 0.9718732833862305,grad_norm: 0.9177753915319412, iteration: 288629
loss: 1.0079845190048218,grad_norm: 0.7158050679148888, iteration: 288630
loss: 1.0348544120788574,grad_norm: 0.9382383148018724, iteration: 288631
loss: 0.9856957197189331,grad_norm: 0.8080713547263974, iteration: 288632
loss: 1.0174899101257324,grad_norm: 0.8424074154020443, iteration: 288633
loss: 0.9780876040458679,grad_norm: 0.8504293945675204, iteration: 288634
loss: 1.0090514421463013,grad_norm: 0.8037927372372485, iteration: 288635
loss: 1.0122058391571045,grad_norm: 0.9710379601285833, iteration: 288636
loss: 0.9951669573783875,grad_norm: 0.9399742709674184, iteration: 288637
loss: 0.9744979739189148,grad_norm: 0.7892376378115745, iteration: 288638
loss: 1.0615516901016235,grad_norm: 0.9999990384110088, iteration: 288639
loss: 1.0215561389923096,grad_norm: 0.7657222238760972, iteration: 288640
loss: 0.9804332852363586,grad_norm: 0.949877783316584, iteration: 288641
loss: 0.9732316136360168,grad_norm: 0.7992115737605514, iteration: 288642
loss: 0.9971067905426025,grad_norm: 0.9495680541509645, iteration: 288643
loss: 1.0426199436187744,grad_norm: 0.9999998746123518, iteration: 288644
loss: 1.0389503240585327,grad_norm: 0.9999996534474875, iteration: 288645
loss: 1.0076552629470825,grad_norm: 0.798804986435756, iteration: 288646
loss: 0.9955111742019653,grad_norm: 0.9439359076073546, iteration: 288647
loss: 1.0023362636566162,grad_norm: 0.7469941228889619, iteration: 288648
loss: 0.9971487522125244,grad_norm: 0.810484180908817, iteration: 288649
loss: 1.0187067985534668,grad_norm: 0.8878682399665953, iteration: 288650
loss: 0.9929028153419495,grad_norm: 0.8013277650091787, iteration: 288651
loss: 1.0565540790557861,grad_norm: 0.9999999077168169, iteration: 288652
loss: 0.9996529221534729,grad_norm: 0.8429469431139798, iteration: 288653
loss: 0.9715820550918579,grad_norm: 0.9988445009688965, iteration: 288654
loss: 1.035949468612671,grad_norm: 0.9323073216195374, iteration: 288655
loss: 1.0341874361038208,grad_norm: 0.9416101841163418, iteration: 288656
loss: 0.9713802933692932,grad_norm: 0.9148859714317545, iteration: 288657
loss: 1.019424319267273,grad_norm: 0.9999990375303714, iteration: 288658
loss: 0.9913730621337891,grad_norm: 0.7671130359881101, iteration: 288659
loss: 0.9916963577270508,grad_norm: 0.791829517674706, iteration: 288660
loss: 1.0354269742965698,grad_norm: 0.8388385614254398, iteration: 288661
loss: 0.9830098748207092,grad_norm: 0.7819862927572533, iteration: 288662
loss: 1.0053300857543945,grad_norm: 0.9999991338734234, iteration: 288663
loss: 0.9926879405975342,grad_norm: 0.9999991318176521, iteration: 288664
loss: 0.9788292050361633,grad_norm: 0.8618434251218335, iteration: 288665
loss: 0.9923537969589233,grad_norm: 0.9255471237149597, iteration: 288666
loss: 0.9893184900283813,grad_norm: 0.8871885569094249, iteration: 288667
loss: 0.9969817996025085,grad_norm: 0.999999074217097, iteration: 288668
loss: 1.014739751815796,grad_norm: 0.7840176892504536, iteration: 288669
loss: 0.9855908751487732,grad_norm: 0.8299204009913766, iteration: 288670
loss: 1.0170319080352783,grad_norm: 0.8858586580285204, iteration: 288671
loss: 0.9955552816390991,grad_norm: 0.7915784994508465, iteration: 288672
loss: 0.9540286064147949,grad_norm: 0.9567832421702716, iteration: 288673
loss: 1.1481397151947021,grad_norm: 1.0000000558361064, iteration: 288674
loss: 0.9958351254463196,grad_norm: 0.7898256329186142, iteration: 288675
loss: 0.9396841526031494,grad_norm: 0.713063944214986, iteration: 288676
loss: 1.0197007656097412,grad_norm: 0.9999990213208153, iteration: 288677
loss: 0.9893584847450256,grad_norm: 0.8314371099325537, iteration: 288678
loss: 1.0269076824188232,grad_norm: 0.809786639357621, iteration: 288679
loss: 0.9417815804481506,grad_norm: 0.7932450402470218, iteration: 288680
loss: 0.9583666324615479,grad_norm: 0.7662556680764097, iteration: 288681
loss: 0.9832828640937805,grad_norm: 0.9443134400331266, iteration: 288682
loss: 1.0352463722229004,grad_norm: 0.9940905512461343, iteration: 288683
loss: 1.0017166137695312,grad_norm: 0.9117693685435245, iteration: 288684
loss: 1.0137032270431519,grad_norm: 0.8095707241834836, iteration: 288685
loss: 1.0321956872940063,grad_norm: 0.9999990212901387, iteration: 288686
loss: 1.0454224348068237,grad_norm: 0.9432660572748212, iteration: 288687
loss: 1.0048083066940308,grad_norm: 0.6661647148412432, iteration: 288688
loss: 0.9870326519012451,grad_norm: 0.8038769105259291, iteration: 288689
loss: 0.9828028678894043,grad_norm: 0.9860472706843191, iteration: 288690
loss: 0.9735844731330872,grad_norm: 0.8442943721488758, iteration: 288691
loss: 1.0259586572647095,grad_norm: 0.9030984816311401, iteration: 288692
loss: 0.9770621657371521,grad_norm: 0.8156453847026744, iteration: 288693
loss: 0.9989871978759766,grad_norm: 0.9302593053003857, iteration: 288694
loss: 0.9733187556266785,grad_norm: 0.8899119532577117, iteration: 288695
loss: 0.9913849830627441,grad_norm: 0.9263974311548745, iteration: 288696
loss: 1.0931496620178223,grad_norm: 0.9999995790989978, iteration: 288697
loss: 0.9886124730110168,grad_norm: 0.8115586071242531, iteration: 288698
loss: 1.002718448638916,grad_norm: 0.8022861946020423, iteration: 288699
loss: 0.974397599697113,grad_norm: 0.8369413213827135, iteration: 288700
loss: 0.994884192943573,grad_norm: 0.9999994863009924, iteration: 288701
loss: 1.0073586702346802,grad_norm: 0.7672875282095699, iteration: 288702
loss: 0.9854601621627808,grad_norm: 0.9517634310870349, iteration: 288703
loss: 0.9957613945007324,grad_norm: 0.8501172021466338, iteration: 288704
loss: 0.9697782397270203,grad_norm: 0.9198459671416916, iteration: 288705
loss: 0.998786449432373,grad_norm: 0.80317984711075, iteration: 288706
loss: 1.0202348232269287,grad_norm: 0.9999994386440023, iteration: 288707
loss: 0.9841933250427246,grad_norm: 0.999999107064595, iteration: 288708
loss: 0.9961612820625305,grad_norm: 0.6816567392899047, iteration: 288709
loss: 0.9824038743972778,grad_norm: 0.8575882908185438, iteration: 288710
loss: 0.9701458811759949,grad_norm: 0.9061107639803847, iteration: 288711
loss: 1.0125303268432617,grad_norm: 0.9999993670127106, iteration: 288712
loss: 1.0165810585021973,grad_norm: 0.9214732591909534, iteration: 288713
loss: 0.9741435050964355,grad_norm: 0.7853302202608397, iteration: 288714
loss: 0.9923615455627441,grad_norm: 0.7976127644945151, iteration: 288715
loss: 0.958465576171875,grad_norm: 0.8106816919810278, iteration: 288716
loss: 1.0061734914779663,grad_norm: 0.8617631921563966, iteration: 288717
loss: 1.0435012578964233,grad_norm: 0.9999994825043701, iteration: 288718
loss: 0.976034939289093,grad_norm: 0.9999990820484749, iteration: 288719
loss: 0.9869347214698792,grad_norm: 0.9106415157774281, iteration: 288720
loss: 1.020417332649231,grad_norm: 0.8157623438039814, iteration: 288721
loss: 0.964475154876709,grad_norm: 0.9257255936080033, iteration: 288722
loss: 0.987781822681427,grad_norm: 0.851307368321287, iteration: 288723
loss: 0.985749363899231,grad_norm: 0.8282310557152799, iteration: 288724
loss: 1.0540825128555298,grad_norm: 0.9655968617836544, iteration: 288725
loss: 0.986107587814331,grad_norm: 0.8568909616898406, iteration: 288726
loss: 0.9985514283180237,grad_norm: 0.8609521823740868, iteration: 288727
loss: 0.9981729388237,grad_norm: 0.833457012536879, iteration: 288728
loss: 0.9953517317771912,grad_norm: 0.8706346206722088, iteration: 288729
loss: 0.9570948481559753,grad_norm: 0.7696899587081545, iteration: 288730
loss: 0.9975149035453796,grad_norm: 0.7722319071249218, iteration: 288731
loss: 1.015516996383667,grad_norm: 0.8711347312854627, iteration: 288732
loss: 1.0148676633834839,grad_norm: 0.8494180892855977, iteration: 288733
loss: 0.9892249703407288,grad_norm: 0.7983435897497762, iteration: 288734
loss: 1.0110701322555542,grad_norm: 0.8631584216471866, iteration: 288735
loss: 1.00926673412323,grad_norm: 0.8726741485169804, iteration: 288736
loss: 1.0230263471603394,grad_norm: 0.8003024795248797, iteration: 288737
loss: 1.0046141147613525,grad_norm: 0.9999999630839751, iteration: 288738
loss: 0.9648147821426392,grad_norm: 0.9999998696781519, iteration: 288739
loss: 1.0248452425003052,grad_norm: 0.8850925108792455, iteration: 288740
loss: 1.0188754796981812,grad_norm: 0.8612786421544093, iteration: 288741
loss: 0.9985629916191101,grad_norm: 0.9490891314439325, iteration: 288742
loss: 1.0407332181930542,grad_norm: 0.9999994532473576, iteration: 288743
loss: 1.0367944240570068,grad_norm: 0.8981658082106448, iteration: 288744
loss: 1.0068776607513428,grad_norm: 0.6977630467634783, iteration: 288745
loss: 1.0252346992492676,grad_norm: 0.9004644673868964, iteration: 288746
loss: 0.9977197647094727,grad_norm: 0.8235694144197403, iteration: 288747
loss: 1.0115116834640503,grad_norm: 0.9995757700654548, iteration: 288748
loss: 1.0255718231201172,grad_norm: 0.9999997577141262, iteration: 288749
loss: 1.036759853363037,grad_norm: 0.9999991680753416, iteration: 288750
loss: 0.9378664493560791,grad_norm: 0.8236326382473677, iteration: 288751
loss: 1.1572787761688232,grad_norm: 0.9999993242334562, iteration: 288752
loss: 1.019484043121338,grad_norm: 0.9999994327916961, iteration: 288753
loss: 1.0165756940841675,grad_norm: 0.9999992504979445, iteration: 288754
loss: 0.997885525226593,grad_norm: 0.9674681811504644, iteration: 288755
loss: 0.9681424498558044,grad_norm: 0.9999991338464175, iteration: 288756
loss: 1.0264207124710083,grad_norm: 0.8904066080164031, iteration: 288757
loss: 0.9849280714988708,grad_norm: 0.9213862065458572, iteration: 288758
loss: 1.0250316858291626,grad_norm: 0.9999995688916008, iteration: 288759
loss: 1.0294688940048218,grad_norm: 0.9999999117002905, iteration: 288760
loss: 1.0164066553115845,grad_norm: 0.9999998331188907, iteration: 288761
loss: 0.9887396693229675,grad_norm: 0.7285819663632311, iteration: 288762
loss: 1.0088915824890137,grad_norm: 0.8517954144212663, iteration: 288763
loss: 0.994402289390564,grad_norm: 0.7790223941085876, iteration: 288764
loss: 1.0140447616577148,grad_norm: 0.8916000939844261, iteration: 288765
loss: 1.0388429164886475,grad_norm: 0.9999989863297797, iteration: 288766
loss: 0.9898491501808167,grad_norm: 0.8121449003695708, iteration: 288767
loss: 0.9957634210586548,grad_norm: 0.8170655480032958, iteration: 288768
loss: 0.9783281087875366,grad_norm: 0.7835072138053633, iteration: 288769
loss: 1.0132654905319214,grad_norm: 0.9794102535220188, iteration: 288770
loss: 1.0301817655563354,grad_norm: 0.9999990506052634, iteration: 288771
loss: 1.0243674516677856,grad_norm: 0.8363915851715157, iteration: 288772
loss: 0.9939601421356201,grad_norm: 0.8944692654365073, iteration: 288773
loss: 0.9950419664382935,grad_norm: 1.0000000328260736, iteration: 288774
loss: 1.0151607990264893,grad_norm: 0.9999990775935171, iteration: 288775
loss: 1.0140458345413208,grad_norm: 0.9260180379445259, iteration: 288776
loss: 1.0278403759002686,grad_norm: 0.9557548434784473, iteration: 288777
loss: 0.9834467768669128,grad_norm: 0.7894211498005085, iteration: 288778
loss: 0.9557088017463684,grad_norm: 0.7450261589728682, iteration: 288779
loss: 0.9783037900924683,grad_norm: 0.8754614914833949, iteration: 288780
loss: 0.9906346201896667,grad_norm: 0.8112163350698316, iteration: 288781
loss: 1.0288094282150269,grad_norm: 0.8752736792087862, iteration: 288782
loss: 1.0306092500686646,grad_norm: 0.9999999219831216, iteration: 288783
loss: 0.9837139844894409,grad_norm: 0.7937486791035109, iteration: 288784
loss: 1.0355817079544067,grad_norm: 0.8715321513725549, iteration: 288785
loss: 1.027000069618225,grad_norm: 0.9137161210055739, iteration: 288786
loss: 1.0339041948318481,grad_norm: 0.8337951097751604, iteration: 288787
loss: 0.9983818531036377,grad_norm: 0.8383503237029126, iteration: 288788
loss: 1.088140606880188,grad_norm: 0.9999993172672719, iteration: 288789
loss: 0.9797766804695129,grad_norm: 0.8040323861898988, iteration: 288790
loss: 1.021854281425476,grad_norm: 0.8371283929685085, iteration: 288791
loss: 0.9917806386947632,grad_norm: 0.8432650047750659, iteration: 288792
loss: 1.0153775215148926,grad_norm: 0.9525215860326846, iteration: 288793
loss: 0.9998005628585815,grad_norm: 0.952506806255158, iteration: 288794
loss: 1.0129655599594116,grad_norm: 0.7856579636567134, iteration: 288795
loss: 1.0583466291427612,grad_norm: 0.9999999033002475, iteration: 288796
loss: 1.0117623805999756,grad_norm: 0.9507126985638583, iteration: 288797
loss: 0.9984870553016663,grad_norm: 0.7380211708208005, iteration: 288798
loss: 1.0095672607421875,grad_norm: 0.7348385593736606, iteration: 288799
loss: 0.988913893699646,grad_norm: 0.8720540811000428, iteration: 288800
loss: 1.0068928003311157,grad_norm: 0.730944176241905, iteration: 288801
loss: 1.0521451234817505,grad_norm: 0.9999991751306709, iteration: 288802
loss: 0.9929903149604797,grad_norm: 0.9041926289472476, iteration: 288803
loss: 1.0251481533050537,grad_norm: 0.9948525472594479, iteration: 288804
loss: 1.0182596445083618,grad_norm: 0.8370517849753036, iteration: 288805
loss: 0.9687649011611938,grad_norm: 0.9999992754278876, iteration: 288806
loss: 1.0526996850967407,grad_norm: 0.9999996468543193, iteration: 288807
loss: 1.0409024953842163,grad_norm: 0.8173745505478741, iteration: 288808
loss: 0.987755537033081,grad_norm: 0.79842193189083, iteration: 288809
loss: 1.0064337253570557,grad_norm: 0.9468502441399712, iteration: 288810
loss: 0.9844955205917358,grad_norm: 0.9588705489741014, iteration: 288811
loss: 1.0435824394226074,grad_norm: 0.9098976626308075, iteration: 288812
loss: 1.0111057758331299,grad_norm: 0.8231914997455719, iteration: 288813
loss: 1.009261131286621,grad_norm: 0.9999999051036916, iteration: 288814
loss: 0.9862433075904846,grad_norm: 0.8447038020014681, iteration: 288815
loss: 0.9919835925102234,grad_norm: 0.8335993238074633, iteration: 288816
loss: 0.9923607707023621,grad_norm: 0.807794644988731, iteration: 288817
loss: 1.0031616687774658,grad_norm: 0.8999049722446806, iteration: 288818
loss: 0.9703230261802673,grad_norm: 0.9999990991886631, iteration: 288819
loss: 0.9966238141059875,grad_norm: 0.8759061964348115, iteration: 288820
loss: 1.0497889518737793,grad_norm: 0.9999998388202429, iteration: 288821
loss: 1.014630913734436,grad_norm: 0.9319714477099947, iteration: 288822
loss: 0.9290474057197571,grad_norm: 0.8479418654588291, iteration: 288823
loss: 1.119701862335205,grad_norm: 0.9999998288882498, iteration: 288824
loss: 0.9599078893661499,grad_norm: 0.7563107818923956, iteration: 288825
loss: 0.9627603888511658,grad_norm: 0.8212347474721517, iteration: 288826
loss: 0.9892648458480835,grad_norm: 0.8690078866651124, iteration: 288827
loss: 1.0179592370986938,grad_norm: 0.9999992167013819, iteration: 288828
loss: 1.0269660949707031,grad_norm: 0.9999990985775307, iteration: 288829
loss: 0.9774079918861389,grad_norm: 0.9830722402848349, iteration: 288830
loss: 0.9816122651100159,grad_norm: 0.9999990047138754, iteration: 288831
loss: 1.0016660690307617,grad_norm: 0.8925763419368464, iteration: 288832
loss: 1.001773476600647,grad_norm: 0.9999990844201523, iteration: 288833
loss: 1.0012609958648682,grad_norm: 0.9600880584411401, iteration: 288834
loss: 1.0241018533706665,grad_norm: 0.9999990762101207, iteration: 288835
loss: 0.9910602569580078,grad_norm: 0.8580777115292494, iteration: 288836
loss: 1.040345549583435,grad_norm: 0.7375419819163426, iteration: 288837
loss: 1.0116037130355835,grad_norm: 0.7891406060203462, iteration: 288838
loss: 0.9763908386230469,grad_norm: 0.7718382445526174, iteration: 288839
loss: 0.9753797650337219,grad_norm: 0.908792270965959, iteration: 288840
loss: 1.0192558765411377,grad_norm: 0.9999991644582112, iteration: 288841
loss: 1.0141915082931519,grad_norm: 0.9999997177897811, iteration: 288842
loss: 0.9870689511299133,grad_norm: 0.7723623147386425, iteration: 288843
loss: 1.0038241147994995,grad_norm: 0.7895310151838306, iteration: 288844
loss: 1.019901156425476,grad_norm: 0.8138269387544592, iteration: 288845
loss: 0.9983156323432922,grad_norm: 0.9999991271705442, iteration: 288846
loss: 0.9855036735534668,grad_norm: 0.8404435918822954, iteration: 288847
loss: 0.9774155616760254,grad_norm: 0.9443487741879618, iteration: 288848
loss: 0.9975741505622864,grad_norm: 0.7472261606551343, iteration: 288849
loss: 0.9782159328460693,grad_norm: 0.7784208403527966, iteration: 288850
loss: 0.9680970907211304,grad_norm: 0.7570300721037815, iteration: 288851
loss: 1.0054328441619873,grad_norm: 0.9004887373798481, iteration: 288852
loss: 0.9877499938011169,grad_norm: 0.9123956229182433, iteration: 288853
loss: 1.020935297012329,grad_norm: 0.9084318172551065, iteration: 288854
loss: 0.9782626628875732,grad_norm: 0.753196675251676, iteration: 288855
loss: 0.9482405781745911,grad_norm: 0.9999991918111912, iteration: 288856
loss: 0.980889081954956,grad_norm: 0.8898660155033714, iteration: 288857
loss: 1.0200952291488647,grad_norm: 0.7236120051492145, iteration: 288858
loss: 0.9743479490280151,grad_norm: 0.8410922571162878, iteration: 288859
loss: 1.0274180173873901,grad_norm: 0.8889403944947727, iteration: 288860
loss: 0.9972785115242004,grad_norm: 0.7618425903200416, iteration: 288861
loss: 1.0142041444778442,grad_norm: 0.7800600190848799, iteration: 288862
loss: 1.0005509853363037,grad_norm: 0.7832575604335679, iteration: 288863
loss: 1.0299952030181885,grad_norm: 0.9999993136528211, iteration: 288864
loss: 1.0806673765182495,grad_norm: 1.0000000288081412, iteration: 288865
loss: 1.0037264823913574,grad_norm: 0.8122216009123362, iteration: 288866
loss: 1.006509780883789,grad_norm: 0.8188315037113484, iteration: 288867
loss: 0.9997220635414124,grad_norm: 0.8992937802817268, iteration: 288868
loss: 1.0022211074829102,grad_norm: 0.8353820221255027, iteration: 288869
loss: 1.008833408355713,grad_norm: 0.8666882587799969, iteration: 288870
loss: 0.9699519276618958,grad_norm: 0.77195100026883, iteration: 288871
loss: 0.9518343210220337,grad_norm: 0.8196524120325708, iteration: 288872
loss: 1.020747184753418,grad_norm: 0.9392739961246295, iteration: 288873
loss: 0.977342963218689,grad_norm: 0.9588168276682457, iteration: 288874
loss: 1.0532386302947998,grad_norm: 0.9999991239525625, iteration: 288875
loss: 1.0217655897140503,grad_norm: 0.9999992100178768, iteration: 288876
loss: 1.0356543064117432,grad_norm: 0.8595925853170144, iteration: 288877
loss: 1.0529733896255493,grad_norm: 0.9999991084007872, iteration: 288878
loss: 1.018783450126648,grad_norm: 0.9823149922422216, iteration: 288879
loss: 1.0070959329605103,grad_norm: 0.883258094886705, iteration: 288880
loss: 0.942844569683075,grad_norm: 0.8982156337423393, iteration: 288881
loss: 0.9953338503837585,grad_norm: 0.8795481592579779, iteration: 288882
loss: 1.0239646434783936,grad_norm: 0.8569808972626537, iteration: 288883
loss: 1.0280219316482544,grad_norm: 0.9999991675442974, iteration: 288884
loss: 0.9807084202766418,grad_norm: 0.9221738906523652, iteration: 288885
loss: 1.0207817554473877,grad_norm: 0.884810119120659, iteration: 288886
loss: 1.0134917497634888,grad_norm: 0.8780770788683393, iteration: 288887
loss: 1.0406486988067627,grad_norm: 0.999999175804049, iteration: 288888
loss: 1.0216429233551025,grad_norm: 0.9667311759681307, iteration: 288889
loss: 0.9937718510627747,grad_norm: 0.9207777696394279, iteration: 288890
loss: 0.9998956322669983,grad_norm: 0.8590300213018899, iteration: 288891
loss: 1.0593509674072266,grad_norm: 0.9999993449800337, iteration: 288892
loss: 1.0182030200958252,grad_norm: 0.8546295213671911, iteration: 288893
loss: 1.0396687984466553,grad_norm: 0.9999996936239534, iteration: 288894
loss: 1.0090175867080688,grad_norm: 0.700129349713438, iteration: 288895
loss: 0.964816689491272,grad_norm: 0.7166154269759318, iteration: 288896
loss: 0.9969462752342224,grad_norm: 1.0000000586242679, iteration: 288897
loss: 0.9919919371604919,grad_norm: 0.987447037807385, iteration: 288898
loss: 0.9957689642906189,grad_norm: 0.9046504115294954, iteration: 288899
loss: 1.0124444961547852,grad_norm: 0.9999994669878748, iteration: 288900
loss: 1.0367493629455566,grad_norm: 0.9059257383258134, iteration: 288901
loss: 1.0540401935577393,grad_norm: 0.999999697760975, iteration: 288902
loss: 1.0219570398330688,grad_norm: 0.9015208784068649, iteration: 288903
loss: 1.0180224180221558,grad_norm: 0.7664076051476485, iteration: 288904
loss: 1.0375066995620728,grad_norm: 0.9999990561469351, iteration: 288905
loss: 1.0671778917312622,grad_norm: 0.9999999014143846, iteration: 288906
loss: 1.0041587352752686,grad_norm: 0.8317973699097693, iteration: 288907
loss: 1.00931978225708,grad_norm: 0.8432908347692625, iteration: 288908
loss: 1.0062092542648315,grad_norm: 0.8626882716695639, iteration: 288909
loss: 1.0021312236785889,grad_norm: 0.9590042026466701, iteration: 288910
loss: 1.002739667892456,grad_norm: 0.7717228620574079, iteration: 288911
loss: 1.0256716012954712,grad_norm: 0.9999989898151354, iteration: 288912
loss: 1.0019477605819702,grad_norm: 0.645177017703818, iteration: 288913
loss: 1.0370844602584839,grad_norm: 0.9999992270541548, iteration: 288914
loss: 0.9889731407165527,grad_norm: 0.8367124660586258, iteration: 288915
loss: 1.0570685863494873,grad_norm: 0.9173960039398429, iteration: 288916
loss: 1.011186957359314,grad_norm: 0.9999993501972495, iteration: 288917
loss: 1.0637319087982178,grad_norm: 0.8984406066624254, iteration: 288918
loss: 1.0288106203079224,grad_norm: 0.9999997023708345, iteration: 288919
loss: 1.0729544162750244,grad_norm: 0.9554780102069713, iteration: 288920
loss: 1.0155123472213745,grad_norm: 0.9271863100671647, iteration: 288921
loss: 0.9795814752578735,grad_norm: 0.8747083382101909, iteration: 288922
loss: 0.9786866307258606,grad_norm: 0.9999990760992595, iteration: 288923
loss: 1.0612547397613525,grad_norm: 0.869333017023353, iteration: 288924
loss: 1.1246790885925293,grad_norm: 0.7898389656462125, iteration: 288925
loss: 1.0035783052444458,grad_norm: 0.9535333404323607, iteration: 288926
loss: 1.0886603593826294,grad_norm: 0.9999995677626532, iteration: 288927
loss: 1.0237783193588257,grad_norm: 0.8841235822351351, iteration: 288928
loss: 1.0045316219329834,grad_norm: 0.8331058499078932, iteration: 288929
loss: 1.0093724727630615,grad_norm: 0.9999992103332062, iteration: 288930
loss: 1.063960075378418,grad_norm: 0.9999992088182836, iteration: 288931
loss: 0.9530932903289795,grad_norm: 0.9999991417794383, iteration: 288932
loss: 1.0241634845733643,grad_norm: 0.9865855871618668, iteration: 288933
loss: 0.9881966710090637,grad_norm: 0.9999992114120632, iteration: 288934
loss: 1.0135438442230225,grad_norm: 0.744894994676273, iteration: 288935
loss: 1.0301733016967773,grad_norm: 1.0000000856465676, iteration: 288936
loss: 1.1185535192489624,grad_norm: 0.9999990984296149, iteration: 288937
loss: 1.0416911840438843,grad_norm: 0.9706497561185452, iteration: 288938
loss: 0.9997245073318481,grad_norm: 0.8881070877810964, iteration: 288939
loss: 1.111395001411438,grad_norm: 0.9999993174453364, iteration: 288940
loss: 1.0636041164398193,grad_norm: 0.9999990787288667, iteration: 288941
loss: 1.0526690483093262,grad_norm: 0.9559756516566947, iteration: 288942
loss: 0.9777851700782776,grad_norm: 0.9541331665778701, iteration: 288943
loss: 1.082365870475769,grad_norm: 0.8031806631737265, iteration: 288944
loss: 1.0155740976333618,grad_norm: 0.9999991846024994, iteration: 288945
loss: 1.0217729806900024,grad_norm: 0.780141184138935, iteration: 288946
loss: 1.0780754089355469,grad_norm: 0.9999993522550014, iteration: 288947
loss: 1.023357629776001,grad_norm: 0.9999995213122755, iteration: 288948
loss: 1.0170313119888306,grad_norm: 0.8933363539960457, iteration: 288949
loss: 1.013949990272522,grad_norm: 0.9999996982430543, iteration: 288950
loss: 1.012454867362976,grad_norm: 0.7208913416267559, iteration: 288951
loss: 0.9943779706954956,grad_norm: 0.9628167321750101, iteration: 288952
loss: 1.0361402034759521,grad_norm: 0.9999991783977672, iteration: 288953
loss: 0.9994321465492249,grad_norm: 0.7892402007972844, iteration: 288954
loss: 1.1146262884140015,grad_norm: 0.9999998004957077, iteration: 288955
loss: 0.991202175617218,grad_norm: 0.9843468887500166, iteration: 288956
loss: 1.0308552980422974,grad_norm: 0.9789863386969341, iteration: 288957
loss: 1.002658724784851,grad_norm: 0.7111520880433249, iteration: 288958
loss: 1.0465563535690308,grad_norm: 0.9999990357602419, iteration: 288959
loss: 1.0544291734695435,grad_norm: 0.9999993251424945, iteration: 288960
loss: 1.013845443725586,grad_norm: 0.7983710143699915, iteration: 288961
loss: 0.9944698810577393,grad_norm: 0.9604373288005091, iteration: 288962
loss: 1.0405972003936768,grad_norm: 0.8515260942648816, iteration: 288963
loss: 0.9746106863021851,grad_norm: 0.8453518633806052, iteration: 288964
loss: 1.0109341144561768,grad_norm: 0.9125696428791866, iteration: 288965
loss: 1.0331885814666748,grad_norm: 0.9736823258735807, iteration: 288966
loss: 1.282766342163086,grad_norm: 0.9999999672957177, iteration: 288967
loss: 1.0399421453475952,grad_norm: 0.9953472395524522, iteration: 288968
loss: 1.0254133939743042,grad_norm: 0.9999998942764676, iteration: 288969
loss: 0.9752718210220337,grad_norm: 0.9999990417622815, iteration: 288970
loss: 0.9541913270950317,grad_norm: 0.9937325819510021, iteration: 288971
loss: 1.0741443634033203,grad_norm: 0.916517597879938, iteration: 288972
loss: 1.029589056968689,grad_norm: 0.9999992438468426, iteration: 288973
loss: 0.9856082201004028,grad_norm: 0.8943979568043928, iteration: 288974
loss: 0.9738437533378601,grad_norm: 0.8918611107719522, iteration: 288975
loss: 1.0352404117584229,grad_norm: 0.9255510725620717, iteration: 288976
loss: 1.0105842351913452,grad_norm: 0.8210842755580471, iteration: 288977
loss: 1.0221394300460815,grad_norm: 0.9799116134556113, iteration: 288978
loss: 1.0135940313339233,grad_norm: 0.9806574833203249, iteration: 288979
loss: 0.9698403477668762,grad_norm: 0.920526295934049, iteration: 288980
loss: 1.081668734550476,grad_norm: 1.000000018337742, iteration: 288981
loss: 1.0511339902877808,grad_norm: 0.999999610860699, iteration: 288982
loss: 0.9942159652709961,grad_norm: 0.8010194688213195, iteration: 288983
loss: 1.1079734563827515,grad_norm: 0.9999994890831566, iteration: 288984
loss: 1.0165538787841797,grad_norm: 0.9793964391000003, iteration: 288985
loss: 1.0169053077697754,grad_norm: 0.8563188682148887, iteration: 288986
loss: 0.9981120824813843,grad_norm: 0.7641620575868543, iteration: 288987
loss: 1.011603832244873,grad_norm: 0.8142711665899878, iteration: 288988
loss: 1.038931965827942,grad_norm: 0.9999991081501757, iteration: 288989
loss: 1.0546094179153442,grad_norm: 0.9171952190251749, iteration: 288990
loss: 1.029090166091919,grad_norm: 0.8726839896565151, iteration: 288991
loss: 1.1870259046554565,grad_norm: 0.9999999850087552, iteration: 288992
loss: 1.001936912536621,grad_norm: 0.8561109863742403, iteration: 288993
loss: 0.9927126169204712,grad_norm: 0.96854039090253, iteration: 288994
loss: 0.9715903401374817,grad_norm: 0.9999991447371448, iteration: 288995
loss: 0.941455066204071,grad_norm: 0.9903181778831606, iteration: 288996
loss: 1.1964443922042847,grad_norm: 0.9999998605825106, iteration: 288997
loss: 1.0456211566925049,grad_norm: 0.9029121491994616, iteration: 288998
loss: 1.0121800899505615,grad_norm: 0.937963995173873, iteration: 288999
loss: 1.004775881767273,grad_norm: 0.8451481251913086, iteration: 289000
loss: 0.9802616834640503,grad_norm: 0.9478648898710864, iteration: 289001
loss: 1.0031088590621948,grad_norm: 0.8517153051571145, iteration: 289002
loss: 1.052412986755371,grad_norm: 0.7851929872734102, iteration: 289003
loss: 1.006155252456665,grad_norm: 0.7604065928273255, iteration: 289004
loss: 1.0433133840560913,grad_norm: 0.8673195673435434, iteration: 289005
loss: 1.0360968112945557,grad_norm: 0.8482175629473244, iteration: 289006
loss: 1.0729632377624512,grad_norm: 0.9999991300517798, iteration: 289007
loss: 0.9827640056610107,grad_norm: 0.9077127553942155, iteration: 289008
loss: 1.0016067028045654,grad_norm: 0.7596209277491528, iteration: 289009
loss: 1.0125501155853271,grad_norm: 0.9152383236269723, iteration: 289010
loss: 1.024550199508667,grad_norm: 0.970320162062477, iteration: 289011
loss: 1.0914009809494019,grad_norm: 0.9999998844350058, iteration: 289012
loss: 0.9982441663742065,grad_norm: 0.7925318405775027, iteration: 289013
loss: 1.074069619178772,grad_norm: 0.999999368078235, iteration: 289014
loss: 1.09065580368042,grad_norm: 0.9567769863590634, iteration: 289015
loss: 1.058648705482483,grad_norm: 0.8251475811321449, iteration: 289016
loss: 1.0519591569900513,grad_norm: 0.9999997138753441, iteration: 289017
loss: 0.9936470985412598,grad_norm: 0.7335382462454693, iteration: 289018
loss: 1.0140448808670044,grad_norm: 0.7544990724081828, iteration: 289019
loss: 0.9931241869926453,grad_norm: 0.9624231968267419, iteration: 289020
loss: 0.9695408344268799,grad_norm: 0.9368136776530777, iteration: 289021
loss: 1.0081788301467896,grad_norm: 0.9210019592292965, iteration: 289022
loss: 1.0031136274337769,grad_norm: 0.9286128899192644, iteration: 289023
loss: 0.9426803588867188,grad_norm: 0.9441241325376694, iteration: 289024
loss: 0.9981557726860046,grad_norm: 0.9271788332054486, iteration: 289025
loss: 0.9929780960083008,grad_norm: 0.9001743878165472, iteration: 289026
loss: 1.0435774326324463,grad_norm: 0.9999991302100859, iteration: 289027
loss: 1.005679965019226,grad_norm: 0.9200873477603405, iteration: 289028
loss: 0.9809161424636841,grad_norm: 0.8028577475476981, iteration: 289029
loss: 1.1471855640411377,grad_norm: 0.9999999570037824, iteration: 289030
loss: 1.201850414276123,grad_norm: 0.9999992797361058, iteration: 289031
loss: 1.014574408531189,grad_norm: 0.8110991673426987, iteration: 289032
loss: 1.0144507884979248,grad_norm: 0.8016681395847104, iteration: 289033
loss: 1.0485965013504028,grad_norm: 0.9999996171213794, iteration: 289034
loss: 1.3985283374786377,grad_norm: 0.9999996341576105, iteration: 289035
loss: 0.9616739749908447,grad_norm: 0.8310743139455883, iteration: 289036
loss: 0.9918860197067261,grad_norm: 0.7258618342128691, iteration: 289037
loss: 1.1088792085647583,grad_norm: 0.999999927914852, iteration: 289038
loss: 0.9616948366165161,grad_norm: 0.9749249291761697, iteration: 289039
loss: 1.0144684314727783,grad_norm: 0.9175254430027384, iteration: 289040
loss: 0.996417224407196,grad_norm: 0.9999992162000254, iteration: 289041
loss: 0.990619957447052,grad_norm: 0.7200487119516207, iteration: 289042
loss: 1.006723165512085,grad_norm: 0.9632492895032189, iteration: 289043
loss: 0.9848641753196716,grad_norm: 0.6544817235986882, iteration: 289044
loss: 1.0239171981811523,grad_norm: 0.999999800528158, iteration: 289045
loss: 0.987088680267334,grad_norm: 0.9999991624362402, iteration: 289046
loss: 1.0891923904418945,grad_norm: 0.7648510826007241, iteration: 289047
loss: 1.0110543966293335,grad_norm: 0.8754239175912791, iteration: 289048
loss: 1.0642852783203125,grad_norm: 0.9745453125707004, iteration: 289049
loss: 0.9402899742126465,grad_norm: 0.8878668009220103, iteration: 289050
loss: 0.9988014698028564,grad_norm: 0.8089378892507686, iteration: 289051
loss: 0.9844645857810974,grad_norm: 0.763722652200981, iteration: 289052
loss: 1.0240211486816406,grad_norm: 0.9999990966933964, iteration: 289053
loss: 0.9726496338844299,grad_norm: 0.7618957441102121, iteration: 289054
loss: 1.0317341089248657,grad_norm: 0.9357522593082368, iteration: 289055
loss: 0.9728179574012756,grad_norm: 0.8829034736894146, iteration: 289056
loss: 0.964171826839447,grad_norm: 0.9999989526687935, iteration: 289057
loss: 1.019034743309021,grad_norm: 0.8744928553835806, iteration: 289058
loss: 1.0286585092544556,grad_norm: 0.999999299549466, iteration: 289059
loss: 0.9985710978507996,grad_norm: 0.8959750594127694, iteration: 289060
loss: 0.9811553359031677,grad_norm: 0.9999990311132387, iteration: 289061
loss: 1.067502737045288,grad_norm: 0.8390359385321452, iteration: 289062
loss: 0.985822856426239,grad_norm: 0.7845386285314653, iteration: 289063
loss: 1.0122389793395996,grad_norm: 0.9999995879160508, iteration: 289064
loss: 0.977107584476471,grad_norm: 0.8813656131181331, iteration: 289065
loss: 1.0108553171157837,grad_norm: 0.7621564388815227, iteration: 289066
loss: 1.0831875801086426,grad_norm: 0.999999737877817, iteration: 289067
loss: 0.9795948266983032,grad_norm: 0.9999993389333878, iteration: 289068
loss: 1.016119122505188,grad_norm: 0.9035721386541924, iteration: 289069
loss: 0.9884121417999268,grad_norm: 0.8018236616552556, iteration: 289070
loss: 0.9773300290107727,grad_norm: 0.807582317641933, iteration: 289071
loss: 1.0875236988067627,grad_norm: 0.99999909508359, iteration: 289072
loss: 0.9849698543548584,grad_norm: 0.8511582530682571, iteration: 289073
loss: 1.0836714506149292,grad_norm: 0.9999990544366727, iteration: 289074
loss: 1.0245181322097778,grad_norm: 0.8808213958658402, iteration: 289075
loss: 1.0043952465057373,grad_norm: 0.9674345570906973, iteration: 289076
loss: 0.9942359328269958,grad_norm: 0.9413812803261521, iteration: 289077
loss: 1.0382236242294312,grad_norm: 1.0000000074119246, iteration: 289078
loss: 1.0058225393295288,grad_norm: 0.9839034436475251, iteration: 289079
loss: 1.010753870010376,grad_norm: 0.9372044760132386, iteration: 289080
loss: 1.0642976760864258,grad_norm: 0.9999991030270315, iteration: 289081
loss: 1.0766427516937256,grad_norm: 0.7653940453886722, iteration: 289082
loss: 0.9903697371482849,grad_norm: 0.7681137932806807, iteration: 289083
loss: 0.9873969554901123,grad_norm: 0.772099853328608, iteration: 289084
loss: 0.9996707439422607,grad_norm: 0.9536819624193399, iteration: 289085
loss: 1.0441442728042603,grad_norm: 0.8601585513732001, iteration: 289086
loss: 0.9790211915969849,grad_norm: 0.7796333971466224, iteration: 289087
loss: 0.9940202236175537,grad_norm: 0.8717779786582976, iteration: 289088
loss: 0.984462559223175,grad_norm: 0.9016731880881707, iteration: 289089
loss: 0.995604395866394,grad_norm: 0.9881148189226406, iteration: 289090
loss: 1.1231225728988647,grad_norm: 0.9999991536206748, iteration: 289091
loss: 0.9936665296554565,grad_norm: 0.8121810648243756, iteration: 289092
loss: 1.0333645343780518,grad_norm: 0.9999993782394413, iteration: 289093
loss: 1.000042200088501,grad_norm: 0.7854364282709088, iteration: 289094
loss: 0.9669234156608582,grad_norm: 0.8922417439757966, iteration: 289095
loss: 0.9585906863212585,grad_norm: 0.7685260473161076, iteration: 289096
loss: 1.0107626914978027,grad_norm: 0.6851406953994367, iteration: 289097
loss: 1.0001336336135864,grad_norm: 0.8389835751702764, iteration: 289098
loss: 1.0286710262298584,grad_norm: 0.8429846066140098, iteration: 289099
loss: 1.0020629167556763,grad_norm: 0.9738321686095992, iteration: 289100
loss: 1.0021530389785767,grad_norm: 0.9999993698670322, iteration: 289101
loss: 1.0894399881362915,grad_norm: 0.9999991616922329, iteration: 289102
loss: 0.9789498448371887,grad_norm: 0.8366052240086201, iteration: 289103
loss: 1.107591152191162,grad_norm: 0.99999983322519, iteration: 289104
loss: 0.9526183605194092,grad_norm: 0.9086556091547704, iteration: 289105
loss: 1.0168815851211548,grad_norm: 0.8663672029618984, iteration: 289106
loss: 1.0435172319412231,grad_norm: 0.8169157725001105, iteration: 289107
loss: 1.059788465499878,grad_norm: 0.8818037556686158, iteration: 289108
loss: 0.9878634214401245,grad_norm: 0.9402939557017522, iteration: 289109
loss: 1.0722143650054932,grad_norm: 0.9999994864204023, iteration: 289110
loss: 1.007219910621643,grad_norm: 0.9999996183508202, iteration: 289111
loss: 1.012071967124939,grad_norm: 0.8927293147347891, iteration: 289112
loss: 1.1143070459365845,grad_norm: 0.9999993402007858, iteration: 289113
loss: 1.0192652940750122,grad_norm: 0.8106840766120977, iteration: 289114
loss: 0.9923586845397949,grad_norm: 0.8732212015789118, iteration: 289115
loss: 0.9904199242591858,grad_norm: 0.927757908398938, iteration: 289116
loss: 0.9724485278129578,grad_norm: 0.7757903125024656, iteration: 289117
loss: 1.0179216861724854,grad_norm: 0.8618283694395756, iteration: 289118
loss: 0.9622992873191833,grad_norm: 0.7008554854436673, iteration: 289119
loss: 0.9828733205795288,grad_norm: 0.9191790314387094, iteration: 289120
loss: 1.0039728879928589,grad_norm: 0.7863258132855486, iteration: 289121
loss: 1.0221588611602783,grad_norm: 0.9999995230435114, iteration: 289122
loss: 1.0123865604400635,grad_norm: 0.7063349561111755, iteration: 289123
loss: 1.0008329153060913,grad_norm: 0.8786218522042597, iteration: 289124
loss: 1.0230746269226074,grad_norm: 0.9999995420014213, iteration: 289125
loss: 1.0735487937927246,grad_norm: 0.9597754589737324, iteration: 289126
loss: 1.1003155708312988,grad_norm: 0.7275900750797819, iteration: 289127
loss: 0.9881340265274048,grad_norm: 0.9999995807403553, iteration: 289128
loss: 1.017736792564392,grad_norm: 0.999999231783944, iteration: 289129
loss: 1.0570040941238403,grad_norm: 0.9396072130942442, iteration: 289130
loss: 1.0177727937698364,grad_norm: 0.8069915824189348, iteration: 289131
loss: 0.991887629032135,grad_norm: 0.8719762797189262, iteration: 289132
loss: 0.9950390458106995,grad_norm: 0.7664356353514662, iteration: 289133
loss: 1.0070791244506836,grad_norm: 0.8351401408440723, iteration: 289134
loss: 0.9642307162284851,grad_norm: 0.693109659199876, iteration: 289135
loss: 0.9858411550521851,grad_norm: 0.9236695650843486, iteration: 289136
loss: 1.037709355354309,grad_norm: 0.9999990748149535, iteration: 289137
loss: 0.9710020422935486,grad_norm: 0.9999990655649071, iteration: 289138
loss: 1.0161396265029907,grad_norm: 0.9134094328530629, iteration: 289139
loss: 1.0050158500671387,grad_norm: 0.8126135873189833, iteration: 289140
loss: 1.0014270544052124,grad_norm: 0.875764458658523, iteration: 289141
loss: 1.0300170183181763,grad_norm: 0.7051569793716368, iteration: 289142
loss: 0.982802152633667,grad_norm: 0.8777719479864934, iteration: 289143
loss: 0.9888342022895813,grad_norm: 0.9999999303512863, iteration: 289144
loss: 1.0465786457061768,grad_norm: 0.852895145049771, iteration: 289145
loss: 0.9774370193481445,grad_norm: 0.6529018443083898, iteration: 289146
loss: 1.0018221139907837,grad_norm: 0.897415582375857, iteration: 289147
loss: 1.0229536294937134,grad_norm: 0.9413471070004675, iteration: 289148
loss: 1.0103068351745605,grad_norm: 0.9999992036204196, iteration: 289149
loss: 1.0129789113998413,grad_norm: 0.8375787293323089, iteration: 289150
loss: 1.0286113023757935,grad_norm: 0.9999992831127764, iteration: 289151
loss: 1.016239047050476,grad_norm: 0.9764757972257851, iteration: 289152
loss: 1.0045475959777832,grad_norm: 0.8416347511548675, iteration: 289153
loss: 0.9281982183456421,grad_norm: 0.9700787087585176, iteration: 289154
loss: 1.0132665634155273,grad_norm: 0.8189494247671469, iteration: 289155
loss: 0.9976317286491394,grad_norm: 0.9999990335523573, iteration: 289156
loss: 0.9847934246063232,grad_norm: 0.8908430493782371, iteration: 289157
loss: 0.9982448220252991,grad_norm: 0.8336625894273468, iteration: 289158
loss: 1.0908626317977905,grad_norm: 0.9999991688833737, iteration: 289159
loss: 0.9512486457824707,grad_norm: 0.9128462665404743, iteration: 289160
loss: 1.1128625869750977,grad_norm: 0.8573296515044583, iteration: 289161
loss: 1.1439975500106812,grad_norm: 0.9999998542435761, iteration: 289162
loss: 1.0154260396957397,grad_norm: 0.9999992253378345, iteration: 289163
loss: 0.9916541576385498,grad_norm: 0.8339342629383707, iteration: 289164
loss: 0.9657349586486816,grad_norm: 0.9999989664801263, iteration: 289165
loss: 1.101547122001648,grad_norm: 0.9999991198251988, iteration: 289166
loss: 1.0229156017303467,grad_norm: 0.9999997135345129, iteration: 289167
loss: 0.9892765283584595,grad_norm: 0.8003762013317729, iteration: 289168
loss: 0.9874488115310669,grad_norm: 0.819840777820186, iteration: 289169
loss: 0.9862034916877747,grad_norm: 0.9562412463367839, iteration: 289170
loss: 1.0007096529006958,grad_norm: 0.9103298810842608, iteration: 289171
loss: 1.039864182472229,grad_norm: 0.8845041574121931, iteration: 289172
loss: 0.9799366593360901,grad_norm: 0.9111633133298728, iteration: 289173
loss: 1.0093883275985718,grad_norm: 0.9844823775229213, iteration: 289174
loss: 1.0002321004867554,grad_norm: 0.8871259826865252, iteration: 289175
loss: 0.9810422658920288,grad_norm: 0.9999996338901765, iteration: 289176
loss: 0.9748327136039734,grad_norm: 0.9447048917378316, iteration: 289177
loss: 0.9984500408172607,grad_norm: 0.9999990526971687, iteration: 289178
loss: 1.0060919523239136,grad_norm: 0.9999991816144637, iteration: 289179
loss: 1.000930666923523,grad_norm: 0.776411177510683, iteration: 289180
loss: 1.000522494316101,grad_norm: 0.8944511354078324, iteration: 289181
loss: 1.0474448204040527,grad_norm: 0.9740540304096125, iteration: 289182
loss: 1.003041386604309,grad_norm: 0.999999090204725, iteration: 289183
loss: 0.9812010526657104,grad_norm: 0.8883050342540825, iteration: 289184
loss: 1.0048907995224,grad_norm: 0.8910021966457856, iteration: 289185
loss: 1.0217310190200806,grad_norm: 0.7613458922889684, iteration: 289186
loss: 0.9872539043426514,grad_norm: 0.8064729302292425, iteration: 289187
loss: 1.0031784772872925,grad_norm: 0.941743367314695, iteration: 289188
loss: 0.9722017645835876,grad_norm: 0.7555358207508983, iteration: 289189
loss: 1.0199670791625977,grad_norm: 0.8274178494838227, iteration: 289190
loss: 1.0084116458892822,grad_norm: 0.9999991420092704, iteration: 289191
loss: 1.0177253484725952,grad_norm: 0.9840049554200008, iteration: 289192
loss: 1.0271689891815186,grad_norm: 0.9068245909492196, iteration: 289193
loss: 0.9872456789016724,grad_norm: 0.8259108523813847, iteration: 289194
loss: 0.9636046886444092,grad_norm: 0.7062845134706739, iteration: 289195
loss: 0.9568947553634644,grad_norm: 0.8137218511334373, iteration: 289196
loss: 1.1171220541000366,grad_norm: 0.9999991859889257, iteration: 289197
loss: 1.06448495388031,grad_norm: 0.7989764892247242, iteration: 289198
loss: 1.0246268510818481,grad_norm: 0.9999991375134408, iteration: 289199
loss: 1.0176390409469604,grad_norm: 0.8722216591896005, iteration: 289200
loss: 0.9899166822433472,grad_norm: 0.9999990654380858, iteration: 289201
loss: 1.0182652473449707,grad_norm: 0.9240143111537911, iteration: 289202
loss: 0.9938294291496277,grad_norm: 0.7758253964177018, iteration: 289203
loss: 0.972046434879303,grad_norm: 0.810839771136428, iteration: 289204
loss: 1.0295436382293701,grad_norm: 0.9999996003977265, iteration: 289205
loss: 0.9971080422401428,grad_norm: 0.9999998175696863, iteration: 289206
loss: 0.9644654989242554,grad_norm: 0.9975233127981605, iteration: 289207
loss: 1.056939959526062,grad_norm: 0.980741874057201, iteration: 289208
loss: 1.076712727546692,grad_norm: 0.8898563860233898, iteration: 289209
loss: 1.073244571685791,grad_norm: 0.9999999124925764, iteration: 289210
loss: 1.0694445371627808,grad_norm: 0.999999757705387, iteration: 289211
loss: 1.0288020372390747,grad_norm: 0.8952027004056895, iteration: 289212
loss: 0.9835681319236755,grad_norm: 0.9598027529471372, iteration: 289213
loss: 1.0152485370635986,grad_norm: 0.9999992258293886, iteration: 289214
loss: 0.9901775121688843,grad_norm: 0.8535373306953192, iteration: 289215
loss: 1.0918787717819214,grad_norm: 0.831706067897142, iteration: 289216
loss: 1.0244789123535156,grad_norm: 0.9999991783727664, iteration: 289217
loss: 0.9811475872993469,grad_norm: 0.9874318731657105, iteration: 289218
loss: 0.9607411026954651,grad_norm: 0.9999990057763808, iteration: 289219
loss: 1.0212104320526123,grad_norm: 0.877708054694716, iteration: 289220
loss: 1.0213913917541504,grad_norm: 0.9956847220572638, iteration: 289221
loss: 1.0207622051239014,grad_norm: 0.6997542994973934, iteration: 289222
loss: 1.0296821594238281,grad_norm: 0.7712540249764425, iteration: 289223
loss: 1.011695384979248,grad_norm: 0.7321122807666898, iteration: 289224
loss: 0.9812289476394653,grad_norm: 0.8302807278239648, iteration: 289225
loss: 1.004384994506836,grad_norm: 0.8914283562828162, iteration: 289226
loss: 1.0018465518951416,grad_norm: 0.8335367598246479, iteration: 289227
loss: 1.028171420097351,grad_norm: 0.9999991252766497, iteration: 289228
loss: 1.0415945053100586,grad_norm: 0.9533883706199703, iteration: 289229
loss: 1.0071073770523071,grad_norm: 0.834791300037725, iteration: 289230
loss: 1.025115966796875,grad_norm: 0.999999275089166, iteration: 289231
loss: 0.9989727139472961,grad_norm: 0.9293816656315326, iteration: 289232
loss: 0.9826570153236389,grad_norm: 0.9999991606419624, iteration: 289233
loss: 1.031929850578308,grad_norm: 0.9999997541404202, iteration: 289234
loss: 0.9991424083709717,grad_norm: 0.8916126273416592, iteration: 289235
loss: 0.9998701214790344,grad_norm: 0.9611942558539442, iteration: 289236
loss: 0.9895864725112915,grad_norm: 0.9230366116020366, iteration: 289237
loss: 0.9689291715621948,grad_norm: 0.9567167916471105, iteration: 289238
loss: 1.0228745937347412,grad_norm: 0.9999990888965652, iteration: 289239
loss: 0.9967615604400635,grad_norm: 0.7515308082680687, iteration: 289240
loss: 1.0083502531051636,grad_norm: 0.9999989695510826, iteration: 289241
loss: 1.1949864625930786,grad_norm: 0.9999995746746592, iteration: 289242
loss: 0.999640166759491,grad_norm: 0.9106461091915371, iteration: 289243
loss: 1.0493390560150146,grad_norm: 0.928239314038858, iteration: 289244
loss: 0.9939473867416382,grad_norm: 0.8393927937950707, iteration: 289245
loss: 1.02012038230896,grad_norm: 0.9026349912954328, iteration: 289246
loss: 1.0289897918701172,grad_norm: 0.8848425522529318, iteration: 289247
loss: 0.983525276184082,grad_norm: 0.8666080512389897, iteration: 289248
loss: 0.9974043369293213,grad_norm: 0.9964330677992161, iteration: 289249
loss: 1.0169944763183594,grad_norm: 0.7836631072901274, iteration: 289250
loss: 0.9857359528541565,grad_norm: 0.9052044795785383, iteration: 289251
loss: 0.9772456884384155,grad_norm: 0.7639075585910922, iteration: 289252
loss: 1.02472722530365,grad_norm: 0.9999992239363312, iteration: 289253
loss: 1.02249014377594,grad_norm: 0.8232806032886264, iteration: 289254
loss: 0.9626293182373047,grad_norm: 0.8377620014329209, iteration: 289255
loss: 0.973848819732666,grad_norm: 0.9337971180443485, iteration: 289256
loss: 1.0460492372512817,grad_norm: 0.9246933502453283, iteration: 289257
loss: 0.9747447967529297,grad_norm: 0.9999991303603177, iteration: 289258
loss: 0.9557662010192871,grad_norm: 0.8256611875726427, iteration: 289259
loss: 0.9963677525520325,grad_norm: 0.769919452902669, iteration: 289260
loss: 1.0307886600494385,grad_norm: 0.999999095893566, iteration: 289261
loss: 1.0452314615249634,grad_norm: 0.999999619329566, iteration: 289262
loss: 1.0469712018966675,grad_norm: 0.8676020975499965, iteration: 289263
loss: 1.0038217306137085,grad_norm: 0.9999990980539937, iteration: 289264
loss: 1.024156928062439,grad_norm: 0.8116529716465759, iteration: 289265
loss: 1.0562528371810913,grad_norm: 0.9330474991503883, iteration: 289266
loss: 1.0108399391174316,grad_norm: 0.8628681036276303, iteration: 289267
loss: 0.9849085807800293,grad_norm: 0.9464563984404871, iteration: 289268
loss: 1.107112169265747,grad_norm: 0.9999995760173485, iteration: 289269
loss: 1.0215803384780884,grad_norm: 0.999999017265794, iteration: 289270
loss: 1.036906361579895,grad_norm: 0.9999998348762665, iteration: 289271
loss: 1.0131059885025024,grad_norm: 0.7595630590239226, iteration: 289272
loss: 1.0880926847457886,grad_norm: 0.9999991218054158, iteration: 289273
loss: 0.9965188503265381,grad_norm: 0.725169173522258, iteration: 289274
loss: 1.021775245666504,grad_norm: 0.999999166516412, iteration: 289275
loss: 1.0324780941009521,grad_norm: 0.9999993550862047, iteration: 289276
loss: 1.0107570886611938,grad_norm: 0.9999990981387991, iteration: 289277
loss: 0.9730288982391357,grad_norm: 0.9064978456799208, iteration: 289278
loss: 0.9687137007713318,grad_norm: 0.9999991629900673, iteration: 289279
loss: 0.9850824475288391,grad_norm: 0.9667382063905737, iteration: 289280
loss: 0.9869451522827148,grad_norm: 0.9999998372318004, iteration: 289281
loss: 1.0023518800735474,grad_norm: 0.8545232956735266, iteration: 289282
loss: 0.9820827841758728,grad_norm: 0.7509723521481957, iteration: 289283
loss: 1.0026661157608032,grad_norm: 0.9999998592929998, iteration: 289284
loss: 1.0066032409667969,grad_norm: 0.9150503171640786, iteration: 289285
loss: 1.0652661323547363,grad_norm: 0.7574329102628634, iteration: 289286
loss: 0.9738855361938477,grad_norm: 0.9999991606039237, iteration: 289287
loss: 0.9886425137519836,grad_norm: 0.9309921263391037, iteration: 289288
loss: 1.0013370513916016,grad_norm: 0.9999991249988179, iteration: 289289
loss: 1.041577696800232,grad_norm: 0.9999996889990579, iteration: 289290
loss: 1.0826456546783447,grad_norm: 0.8826649640372121, iteration: 289291
loss: 0.9965000152587891,grad_norm: 0.8540777354156929, iteration: 289292
loss: 1.0099475383758545,grad_norm: 0.9183896224445579, iteration: 289293
loss: 0.998268723487854,grad_norm: 0.784862765920515, iteration: 289294
loss: 0.9690145254135132,grad_norm: 0.8402046523077382, iteration: 289295
loss: 1.001349687576294,grad_norm: 0.7787960081504095, iteration: 289296
loss: 1.0187345743179321,grad_norm: 0.9999990700948244, iteration: 289297
loss: 1.03821861743927,grad_norm: 0.875891642784104, iteration: 289298
loss: 1.004747986793518,grad_norm: 0.7929987848633979, iteration: 289299
loss: 1.0334997177124023,grad_norm: 0.9999990252936264, iteration: 289300
loss: 1.0129488706588745,grad_norm: 0.8346918880586401, iteration: 289301
loss: 0.9870744347572327,grad_norm: 0.9185572407503926, iteration: 289302
loss: 1.055871844291687,grad_norm: 0.9471944762072305, iteration: 289303
loss: 1.0254929065704346,grad_norm: 0.9999997270821612, iteration: 289304
loss: 1.033452033996582,grad_norm: 0.8684999934155212, iteration: 289305
loss: 0.9699681401252747,grad_norm: 0.7802774290851213, iteration: 289306
loss: 0.9893977642059326,grad_norm: 0.8086928261125802, iteration: 289307
loss: 1.0147138833999634,grad_norm: 0.8464511184217741, iteration: 289308
loss: 0.9831796288490295,grad_norm: 0.7557090859330057, iteration: 289309
loss: 1.0760971307754517,grad_norm: 0.9999998106925887, iteration: 289310
loss: 1.0057876110076904,grad_norm: 0.9999992294487015, iteration: 289311
loss: 0.9982088208198547,grad_norm: 0.9225304113495606, iteration: 289312
loss: 0.984275758266449,grad_norm: 0.9948346704183314, iteration: 289313
loss: 1.0212064981460571,grad_norm: 0.891275611575238, iteration: 289314
loss: 1.002158761024475,grad_norm: 0.9909342932549137, iteration: 289315
loss: 1.007643461227417,grad_norm: 0.8844192288676013, iteration: 289316
loss: 1.0002328157424927,grad_norm: 0.7759110440134028, iteration: 289317
loss: 1.0620653629302979,grad_norm: 0.9999990817869953, iteration: 289318
loss: 1.0103440284729004,grad_norm: 0.8143074465172646, iteration: 289319
loss: 1.0250954627990723,grad_norm: 0.9999990705527584, iteration: 289320
loss: 0.9927792549133301,grad_norm: 0.7669736255108306, iteration: 289321
loss: 1.0019733905792236,grad_norm: 0.999999542948299, iteration: 289322
loss: 0.9885898232460022,grad_norm: 0.7624398563674749, iteration: 289323
loss: 1.1706575155258179,grad_norm: 0.9999991132839108, iteration: 289324
loss: 1.0360424518585205,grad_norm: 0.872207630455881, iteration: 289325
loss: 1.0203760862350464,grad_norm: 0.8555349863050918, iteration: 289326
loss: 0.983456015586853,grad_norm: 0.9143628083364425, iteration: 289327
loss: 1.0177502632141113,grad_norm: 0.9304469349051042, iteration: 289328
loss: 0.9959365129470825,grad_norm: 0.8246190502448537, iteration: 289329
loss: 1.005142092704773,grad_norm: 0.809587013687774, iteration: 289330
loss: 1.1105082035064697,grad_norm: 0.9999992394240437, iteration: 289331
loss: 0.9888854026794434,grad_norm: 0.9725952690450331, iteration: 289332
loss: 1.019551396369934,grad_norm: 0.8343890704647501, iteration: 289333
loss: 0.9904407858848572,grad_norm: 0.8307628373375698, iteration: 289334
loss: 0.9848707318305969,grad_norm: 0.9005397901637717, iteration: 289335
loss: 1.1545515060424805,grad_norm: 0.9999994710634741, iteration: 289336
loss: 1.071249008178711,grad_norm: 0.9193665029538833, iteration: 289337
loss: 1.0195575952529907,grad_norm: 0.8866277423767683, iteration: 289338
loss: 0.9775117635726929,grad_norm: 0.911002841400659, iteration: 289339
loss: 0.9845315217971802,grad_norm: 0.7659805656545638, iteration: 289340
loss: 1.0082130432128906,grad_norm: 0.8084040368504789, iteration: 289341
loss: 0.9739395976066589,grad_norm: 0.8047641677014432, iteration: 289342
loss: 0.9919130206108093,grad_norm: 0.9467180409539803, iteration: 289343
loss: 1.0142152309417725,grad_norm: 0.9485565950506927, iteration: 289344
loss: 1.0093379020690918,grad_norm: 0.8859798289500811, iteration: 289345
loss: 0.9981359243392944,grad_norm: 0.8593098924156739, iteration: 289346
loss: 0.9999175667762756,grad_norm: 0.9999997951447258, iteration: 289347
loss: 1.0142956972122192,grad_norm: 0.9650478741195329, iteration: 289348
loss: 1.01717209815979,grad_norm: 0.8392881006835208, iteration: 289349
loss: 0.9935893416404724,grad_norm: 0.8898916268417821, iteration: 289350
loss: 1.0125977993011475,grad_norm: 0.8919801733106495, iteration: 289351
loss: 0.9820414185523987,grad_norm: 0.896967839609291, iteration: 289352
loss: 1.010858178138733,grad_norm: 0.8676898410498288, iteration: 289353
loss: 0.9856500029563904,grad_norm: 0.7661404589951815, iteration: 289354
loss: 1.0251827239990234,grad_norm: 0.9535713817438064, iteration: 289355
loss: 1.0018291473388672,grad_norm: 0.8633263130964434, iteration: 289356
loss: 1.0067622661590576,grad_norm: 0.7341853945010368, iteration: 289357
loss: 0.9832591414451599,grad_norm: 0.9391541666124752, iteration: 289358
loss: 1.0136761665344238,grad_norm: 0.9999991809203898, iteration: 289359
loss: 1.0003435611724854,grad_norm: 0.9999991038988014, iteration: 289360
loss: 1.0452580451965332,grad_norm: 0.8453703274806245, iteration: 289361
loss: 1.0408974885940552,grad_norm: 0.909709638497227, iteration: 289362
loss: 0.9935965538024902,grad_norm: 0.7588524213427553, iteration: 289363
loss: 0.9944974184036255,grad_norm: 0.9283526533860194, iteration: 289364
loss: 0.9695903062820435,grad_norm: 0.8781236216391879, iteration: 289365
loss: 1.0031688213348389,grad_norm: 0.9999990864144314, iteration: 289366
loss: 1.006955623626709,grad_norm: 0.7819246582088281, iteration: 289367
loss: 1.0526670217514038,grad_norm: 0.9374262959045965, iteration: 289368
loss: 0.9662291407585144,grad_norm: 0.9999992717185249, iteration: 289369
loss: 1.0114996433258057,grad_norm: 0.9999999591582279, iteration: 289370
loss: 1.0372766256332397,grad_norm: 0.8059058332675815, iteration: 289371
loss: 0.9997921586036682,grad_norm: 0.7064606565748112, iteration: 289372
loss: 0.981138288974762,grad_norm: 0.7838384193092428, iteration: 289373
loss: 0.9882378578186035,grad_norm: 0.9128036513290214, iteration: 289374
loss: 1.0086243152618408,grad_norm: 0.9102471045801079, iteration: 289375
loss: 0.9604815244674683,grad_norm: 0.9999991485661051, iteration: 289376
loss: 1.011426568031311,grad_norm: 0.638882549675707, iteration: 289377
loss: 0.9635578989982605,grad_norm: 0.8767520475470069, iteration: 289378
loss: 1.0081944465637207,grad_norm: 0.807444858877339, iteration: 289379
loss: 1.0300705432891846,grad_norm: 0.7160509891719958, iteration: 289380
loss: 0.9973206520080566,grad_norm: 0.8663875621572767, iteration: 289381
loss: 1.1387977600097656,grad_norm: 1.0000001147705584, iteration: 289382
loss: 1.008278727531433,grad_norm: 0.999999006715128, iteration: 289383
loss: 0.9821615815162659,grad_norm: 0.8281923186445418, iteration: 289384
loss: 0.9837130308151245,grad_norm: 0.9999993353533865, iteration: 289385
loss: 1.0172028541564941,grad_norm: 0.9514061432929722, iteration: 289386
loss: 1.0235949754714966,grad_norm: 0.9999998272628217, iteration: 289387
loss: 1.003341794013977,grad_norm: 0.6860869573727243, iteration: 289388
loss: 0.9746849536895752,grad_norm: 0.9548980644975875, iteration: 289389
loss: 1.0204060077667236,grad_norm: 0.7162013330943909, iteration: 289390
loss: 1.0406221151351929,grad_norm: 0.9999997950959097, iteration: 289391
loss: 1.0608972311019897,grad_norm: 0.9999992361264262, iteration: 289392
loss: 0.9735448956489563,grad_norm: 0.7979572499232329, iteration: 289393
loss: 1.0524826049804688,grad_norm: 0.82897848327928, iteration: 289394
loss: 0.9788482189178467,grad_norm: 0.9603519248225234, iteration: 289395
loss: 1.010180950164795,grad_norm: 0.8599442358148525, iteration: 289396
loss: 1.0196037292480469,grad_norm: 0.8074516777881308, iteration: 289397
loss: 1.0140814781188965,grad_norm: 0.9094500062134543, iteration: 289398
loss: 0.9997686743736267,grad_norm: 0.7412162090049698, iteration: 289399
loss: 0.9674019813537598,grad_norm: 0.999999722851744, iteration: 289400
loss: 0.9832161664962769,grad_norm: 0.956188294860269, iteration: 289401
loss: 0.994779109954834,grad_norm: 0.9178911604116199, iteration: 289402
loss: 0.9823445081710815,grad_norm: 0.8991090186340551, iteration: 289403
loss: 1.0731955766677856,grad_norm: 0.9999996748964717, iteration: 289404
loss: 0.9612540602684021,grad_norm: 0.9999989736801885, iteration: 289405
loss: 1.0025290250778198,grad_norm: 0.8062375624466697, iteration: 289406
loss: 1.0096248388290405,grad_norm: 0.8472358335869855, iteration: 289407
loss: 1.0152665376663208,grad_norm: 0.8128430830200769, iteration: 289408
loss: 1.0166243314743042,grad_norm: 0.9266618241928237, iteration: 289409
loss: 1.0308709144592285,grad_norm: 0.9999991200945204, iteration: 289410
loss: 1.0106496810913086,grad_norm: 0.795115266442556, iteration: 289411
loss: 1.0079048871994019,grad_norm: 0.815332133442054, iteration: 289412
loss: 1.013445496559143,grad_norm: 0.8061871939131129, iteration: 289413
loss: 0.9675013422966003,grad_norm: 0.9455896303888462, iteration: 289414
loss: 1.031999111175537,grad_norm: 0.9999990247609835, iteration: 289415
loss: 1.0134773254394531,grad_norm: 0.808162380278748, iteration: 289416
loss: 0.9866083860397339,grad_norm: 0.9309544016057801, iteration: 289417
loss: 0.9984849095344543,grad_norm: 0.9082588033536915, iteration: 289418
loss: 1.028421401977539,grad_norm: 0.9115727469490817, iteration: 289419
loss: 0.9899075627326965,grad_norm: 0.796111731159603, iteration: 289420
loss: 1.0091837644577026,grad_norm: 0.8361244950088411, iteration: 289421
loss: 1.0280203819274902,grad_norm: 0.9182565217715136, iteration: 289422
loss: 1.0020450353622437,grad_norm: 0.806574094628004, iteration: 289423
loss: 0.9770799279212952,grad_norm: 0.8553253748298776, iteration: 289424
loss: 0.9853257536888123,grad_norm: 0.9999989784525636, iteration: 289425
loss: 1.0191354751586914,grad_norm: 0.8040926814726449, iteration: 289426
loss: 0.9826290011405945,grad_norm: 0.9999990584311002, iteration: 289427
loss: 1.046472191810608,grad_norm: 0.9999993821960252, iteration: 289428
loss: 1.0036253929138184,grad_norm: 0.8926904472905669, iteration: 289429
loss: 0.9673503041267395,grad_norm: 0.9008472000147355, iteration: 289430
loss: 1.0015283823013306,grad_norm: 0.8941390288221301, iteration: 289431
loss: 0.987917959690094,grad_norm: 0.9999990912971302, iteration: 289432
loss: 0.9794769287109375,grad_norm: 0.8071584595964841, iteration: 289433
loss: 0.9930863380432129,grad_norm: 0.8968268983143325, iteration: 289434
loss: 0.9793531894683838,grad_norm: 0.9120961946891248, iteration: 289435
loss: 1.0550999641418457,grad_norm: 0.9609648670593924, iteration: 289436
loss: 1.0109683275222778,grad_norm: 0.7972738197014173, iteration: 289437
loss: 1.0311760902404785,grad_norm: 0.7122624809909396, iteration: 289438
loss: 1.0236985683441162,grad_norm: 0.9999990439881524, iteration: 289439
loss: 1.0722442865371704,grad_norm: 0.9999991424329372, iteration: 289440
loss: 1.3395779132843018,grad_norm: 0.9999997846903639, iteration: 289441
loss: 1.0340802669525146,grad_norm: 0.8358991186273159, iteration: 289442
loss: 1.0054258108139038,grad_norm: 0.848500268623966, iteration: 289443
loss: 1.0111050605773926,grad_norm: 0.940414319207353, iteration: 289444
loss: 1.06834876537323,grad_norm: 0.9621690658351961, iteration: 289445
loss: 1.003648281097412,grad_norm: 0.9428423667663905, iteration: 289446
loss: 1.0135936737060547,grad_norm: 0.9813982998320604, iteration: 289447
loss: 1.0084326267242432,grad_norm: 0.7993970337543974, iteration: 289448
loss: 0.9975343346595764,grad_norm: 0.8155404209097805, iteration: 289449
loss: 1.0075868368148804,grad_norm: 0.8810318799499773, iteration: 289450
loss: 0.9838175177574158,grad_norm: 0.9376716858571408, iteration: 289451
loss: 0.9966799020767212,grad_norm: 0.9217969529041097, iteration: 289452
loss: 1.0099064111709595,grad_norm: 0.7778455662437416, iteration: 289453
loss: 1.0186554193496704,grad_norm: 0.9999990600322605, iteration: 289454
loss: 1.1626980304718018,grad_norm: 0.9999998254206127, iteration: 289455
loss: 0.9962387084960938,grad_norm: 0.8387102634620679, iteration: 289456
loss: 1.0620758533477783,grad_norm: 0.9999990995256522, iteration: 289457
loss: 1.186258316040039,grad_norm: 0.9999998085810428, iteration: 289458
loss: 1.0134354829788208,grad_norm: 0.8401031350299518, iteration: 289459
loss: 0.992295503616333,grad_norm: 0.9341621328197645, iteration: 289460
loss: 1.1001530885696411,grad_norm: 0.9999992259772976, iteration: 289461
loss: 1.0301182270050049,grad_norm: 0.8843312988961346, iteration: 289462
loss: 1.006434679031372,grad_norm: 0.8194205668305864, iteration: 289463
loss: 1.019822597503662,grad_norm: 0.7985362641403919, iteration: 289464
loss: 1.0233962535858154,grad_norm: 0.9241875362156978, iteration: 289465
loss: 0.97689288854599,grad_norm: 0.9664915982498853, iteration: 289466
loss: 0.9762392044067383,grad_norm: 0.8434792154672853, iteration: 289467
loss: 0.9953644871711731,grad_norm: 0.8241733491358973, iteration: 289468
loss: 0.9885675311088562,grad_norm: 0.8463497921064539, iteration: 289469
loss: 1.0109699964523315,grad_norm: 0.803808001303804, iteration: 289470
loss: 0.9766965508460999,grad_norm: 0.7494543687196613, iteration: 289471
loss: 1.0074604749679565,grad_norm: 0.999999073657972, iteration: 289472
loss: 1.0070087909698486,grad_norm: 0.8995674874736307, iteration: 289473
loss: 0.9606963396072388,grad_norm: 0.999999361132024, iteration: 289474
loss: 1.0587308406829834,grad_norm: 0.9999997575795387, iteration: 289475
loss: 1.0175672769546509,grad_norm: 0.8455171934214004, iteration: 289476
loss: 0.9923925399780273,grad_norm: 0.9663761799605638, iteration: 289477
loss: 0.9767188429832458,grad_norm: 0.9127292025339204, iteration: 289478
loss: 1.0229134559631348,grad_norm: 0.8939299679711712, iteration: 289479
loss: 0.9414776563644409,grad_norm: 0.9438275187309335, iteration: 289480
loss: 0.9790984392166138,grad_norm: 0.9999991343560752, iteration: 289481
loss: 1.0529148578643799,grad_norm: 0.9999995013852, iteration: 289482
loss: 1.0353381633758545,grad_norm: 0.779252543309131, iteration: 289483
loss: 1.0144891738891602,grad_norm: 0.9304338866099258, iteration: 289484
loss: 1.0009920597076416,grad_norm: 0.9999994980517711, iteration: 289485
loss: 1.160353183746338,grad_norm: 0.9954910121333409, iteration: 289486
loss: 1.018471598625183,grad_norm: 0.9344223787340997, iteration: 289487
loss: 1.0036588907241821,grad_norm: 0.892782468047791, iteration: 289488
loss: 0.9858217239379883,grad_norm: 0.8916176235024208, iteration: 289489
loss: 1.0344220399856567,grad_norm: 0.9154159827233143, iteration: 289490
loss: 0.9804812073707581,grad_norm: 0.8536019253072894, iteration: 289491
loss: 0.9898198843002319,grad_norm: 0.9168972067885836, iteration: 289492
loss: 0.9685379266738892,grad_norm: 0.9658199895303958, iteration: 289493
loss: 1.0237103700637817,grad_norm: 0.9999996958441792, iteration: 289494
loss: 1.0321046113967896,grad_norm: 0.999999088413964, iteration: 289495
loss: 1.005678415298462,grad_norm: 0.893038788073948, iteration: 289496
loss: 0.9625275135040283,grad_norm: 0.9753777437072797, iteration: 289497
loss: 1.0153403282165527,grad_norm: 0.9240606535577486, iteration: 289498
loss: 1.0516620874404907,grad_norm: 0.9244738476218449, iteration: 289499
loss: 0.9808231592178345,grad_norm: 0.8326161962701883, iteration: 289500
loss: 0.9928423762321472,grad_norm: 0.8999308720247692, iteration: 289501
loss: 0.9601748585700989,grad_norm: 0.947899074719684, iteration: 289502
loss: 0.9306223392486572,grad_norm: 0.9377856937697373, iteration: 289503
loss: 1.0206986665725708,grad_norm: 0.7245950222132211, iteration: 289504
loss: 1.0090619325637817,grad_norm: 0.8664769352929199, iteration: 289505
loss: 1.0755568742752075,grad_norm: 0.8674372002149063, iteration: 289506
loss: 0.9852349162101746,grad_norm: 0.9860221938339065, iteration: 289507
loss: 1.0595097541809082,grad_norm: 0.9999995655474481, iteration: 289508
loss: 0.9955869317054749,grad_norm: 0.8517189490393443, iteration: 289509
loss: 1.1903278827667236,grad_norm: 0.9999998157849036, iteration: 289510
loss: 1.1190142631530762,grad_norm: 0.9999994555380612, iteration: 289511
loss: 0.9715535640716553,grad_norm: 0.9988633018376876, iteration: 289512
loss: 1.0169789791107178,grad_norm: 0.8222555509464929, iteration: 289513
loss: 1.006273865699768,grad_norm: 0.7665359270373494, iteration: 289514
loss: 1.0255972146987915,grad_norm: 0.8358452387600884, iteration: 289515
loss: 0.9975669980049133,grad_norm: 0.7653437843871269, iteration: 289516
loss: 0.9951292872428894,grad_norm: 0.808016832653492, iteration: 289517
loss: 0.9996851682662964,grad_norm: 0.8294139832004275, iteration: 289518
loss: 1.0771108865737915,grad_norm: 0.9999998367691773, iteration: 289519
loss: 1.0299855470657349,grad_norm: 0.99236430687141, iteration: 289520
loss: 1.0168794393539429,grad_norm: 0.9589199256648517, iteration: 289521
loss: 1.0549277067184448,grad_norm: 0.999999864183697, iteration: 289522
loss: 1.0003557205200195,grad_norm: 0.7759824217980563, iteration: 289523
loss: 1.014835000038147,grad_norm: 0.9999989774510265, iteration: 289524
loss: 0.9762033820152283,grad_norm: 0.8361464464196477, iteration: 289525
loss: 1.0372576713562012,grad_norm: 0.9999992150460718, iteration: 289526
loss: 1.0013411045074463,grad_norm: 0.8112870006496612, iteration: 289527
loss: 0.9912359714508057,grad_norm: 0.6626361489879657, iteration: 289528
loss: 0.9855597019195557,grad_norm: 0.9999994987669462, iteration: 289529
loss: 1.04944908618927,grad_norm: 0.8588181880098674, iteration: 289530
loss: 1.0115158557891846,grad_norm: 0.9064538772443875, iteration: 289531
loss: 0.9973959922790527,grad_norm: 0.8184811892924592, iteration: 289532
loss: 1.0697576999664307,grad_norm: 1.0000001237904619, iteration: 289533
loss: 0.9582557082176208,grad_norm: 0.8247553893356617, iteration: 289534
loss: 1.0311201810836792,grad_norm: 0.8658339185759574, iteration: 289535
loss: 1.0083143711090088,grad_norm: 0.8453724891826638, iteration: 289536
loss: 1.0309966802597046,grad_norm: 0.9747551016673471, iteration: 289537
loss: 0.985673189163208,grad_norm: 0.8117928718673859, iteration: 289538
loss: 1.0084216594696045,grad_norm: 0.8523835453314536, iteration: 289539
loss: 0.9978414177894592,grad_norm: 0.9800442323437315, iteration: 289540
loss: 1.0098161697387695,grad_norm: 0.9928777701993333, iteration: 289541
loss: 0.9624945521354675,grad_norm: 0.7685971396228771, iteration: 289542
loss: 1.2128509283065796,grad_norm: 0.9999996673687581, iteration: 289543
loss: 1.02491295337677,grad_norm: 0.8494076939566714, iteration: 289544
loss: 1.081853985786438,grad_norm: 0.941089611301997, iteration: 289545
loss: 0.9749860763549805,grad_norm: 0.8738046941846482, iteration: 289546
loss: 1.0240806341171265,grad_norm: 0.9205076640972184, iteration: 289547
loss: 1.0154731273651123,grad_norm: 0.9999991745292127, iteration: 289548
loss: 0.9731383323669434,grad_norm: 0.8981616479097325, iteration: 289549
loss: 1.0038644075393677,grad_norm: 0.7908117734347805, iteration: 289550
loss: 0.9891456961631775,grad_norm: 0.9219014526038066, iteration: 289551
loss: 1.0306458473205566,grad_norm: 0.9181367767777174, iteration: 289552
loss: 1.0155317783355713,grad_norm: 0.8456988316554214, iteration: 289553
loss: 1.0316178798675537,grad_norm: 0.8144875990780102, iteration: 289554
loss: 1.0499379634857178,grad_norm: 0.9999992097584488, iteration: 289555
loss: 0.9968392848968506,grad_norm: 0.8485743836128462, iteration: 289556
loss: 1.0207240581512451,grad_norm: 0.7840106794099726, iteration: 289557
loss: 1.016501784324646,grad_norm: 0.9999991994557602, iteration: 289558
loss: 0.992127001285553,grad_norm: 0.9951035623039424, iteration: 289559
loss: 1.0251003503799438,grad_norm: 0.8044544268145757, iteration: 289560
loss: 0.9836000204086304,grad_norm: 0.7292181362819907, iteration: 289561
loss: 1.0018306970596313,grad_norm: 0.9999989790413032, iteration: 289562
loss: 1.0181077718734741,grad_norm: 0.9119059615151153, iteration: 289563
loss: 1.0040092468261719,grad_norm: 0.953381528473923, iteration: 289564
loss: 1.0552830696105957,grad_norm: 0.7380688491794061, iteration: 289565
loss: 1.040075659751892,grad_norm: 0.8118965921655211, iteration: 289566
loss: 1.0527372360229492,grad_norm: 0.9999991276857144, iteration: 289567
loss: 1.093510627746582,grad_norm: 0.9999998099956793, iteration: 289568
loss: 1.0172449350357056,grad_norm: 0.9259096247624264, iteration: 289569
loss: 1.0747685432434082,grad_norm: 0.9999996013463739, iteration: 289570
loss: 1.13474702835083,grad_norm: 0.9999993238934383, iteration: 289571
loss: 0.9767681360244751,grad_norm: 0.9113028068414388, iteration: 289572
loss: 1.0164915323257446,grad_norm: 0.7818913182373782, iteration: 289573
loss: 0.985292375087738,grad_norm: 0.7960216258940851, iteration: 289574
loss: 1.0175676345825195,grad_norm: 0.9762068343098717, iteration: 289575
loss: 0.9487103223800659,grad_norm: 0.9282870094858499, iteration: 289576
loss: 1.0099339485168457,grad_norm: 0.8226222929443439, iteration: 289577
loss: 1.0108031034469604,grad_norm: 0.7918295092734373, iteration: 289578
loss: 1.0218983888626099,grad_norm: 0.9669843032826767, iteration: 289579
loss: 1.019510269165039,grad_norm: 0.8206504730191085, iteration: 289580
loss: 1.0159757137298584,grad_norm: 0.9999994861756099, iteration: 289581
loss: 0.9974672198295593,grad_norm: 0.9148130355414925, iteration: 289582
loss: 0.9843892455101013,grad_norm: 0.999999558902254, iteration: 289583
loss: 1.0033605098724365,grad_norm: 0.9242252893920652, iteration: 289584
loss: 0.9819433093070984,grad_norm: 0.8617372943921807, iteration: 289585
loss: 1.1586031913757324,grad_norm: 0.999999141864264, iteration: 289586
loss: 0.9913999438285828,grad_norm: 0.8084370062031889, iteration: 289587
loss: 1.0182842016220093,grad_norm: 0.7582167892552588, iteration: 289588
loss: 0.9675247073173523,grad_norm: 0.8766878506109321, iteration: 289589
loss: 1.0108476877212524,grad_norm: 0.9999989808364659, iteration: 289590
loss: 0.999540388584137,grad_norm: 0.8392116778803865, iteration: 289591
loss: 1.0347563028335571,grad_norm: 0.8734234884819381, iteration: 289592
loss: 1.0320632457733154,grad_norm: 0.9999997999208448, iteration: 289593
loss: 1.0297962427139282,grad_norm: 0.9999992384035191, iteration: 289594
loss: 1.0314706563949585,grad_norm: 0.9999992593295539, iteration: 289595
loss: 1.018904447555542,grad_norm: 0.945235134250908, iteration: 289596
loss: 1.009771704673767,grad_norm: 0.9999990287997204, iteration: 289597
loss: 1.0630828142166138,grad_norm: 0.831800194861367, iteration: 289598
loss: 1.0406581163406372,grad_norm: 0.9704381412340979, iteration: 289599
loss: 0.9994648098945618,grad_norm: 0.7483923564954913, iteration: 289600
loss: 1.012892246246338,grad_norm: 0.781196978887757, iteration: 289601
loss: 1.0074901580810547,grad_norm: 0.8863446263907264, iteration: 289602
loss: 0.992777943611145,grad_norm: 0.9999994589174146, iteration: 289603
loss: 0.9981144666671753,grad_norm: 0.9309794010916299, iteration: 289604
loss: 1.0285006761550903,grad_norm: 1.0000000218110183, iteration: 289605
loss: 0.9965369701385498,grad_norm: 0.9999991363366462, iteration: 289606
loss: 1.0326862335205078,grad_norm: 0.903025743430009, iteration: 289607
loss: 0.9824298620223999,grad_norm: 0.8021560392023827, iteration: 289608
loss: 1.0080488920211792,grad_norm: 0.8553671473974035, iteration: 289609
loss: 0.9916902780532837,grad_norm: 0.9999991197372801, iteration: 289610
loss: 0.9945192933082581,grad_norm: 0.8334960249795674, iteration: 289611
loss: 0.992530882358551,grad_norm: 0.8331623660353026, iteration: 289612
loss: 0.9942030310630798,grad_norm: 0.7964747589648035, iteration: 289613
loss: 0.9974574446678162,grad_norm: 0.8032776014408602, iteration: 289614
loss: 1.0263639688491821,grad_norm: 0.8630458852759904, iteration: 289615
loss: 1.0176879167556763,grad_norm: 0.9999990637574442, iteration: 289616
loss: 1.0066192150115967,grad_norm: 0.9999991690682917, iteration: 289617
loss: 1.020982027053833,grad_norm: 0.9999990621675123, iteration: 289618
loss: 0.9785889387130737,grad_norm: 0.8393204213995307, iteration: 289619
loss: 1.0176193714141846,grad_norm: 0.9470157252230631, iteration: 289620
loss: 1.001004695892334,grad_norm: 0.9004424578419716, iteration: 289621
loss: 0.9850943684577942,grad_norm: 0.7652607715474522, iteration: 289622
loss: 0.9836048483848572,grad_norm: 0.7752446850335648, iteration: 289623
loss: 0.9869232177734375,grad_norm: 0.9556084400710907, iteration: 289624
loss: 0.9907314777374268,grad_norm: 0.847728331853451, iteration: 289625
loss: 1.1011427640914917,grad_norm: 0.9605783206357691, iteration: 289626
loss: 1.006204605102539,grad_norm: 0.7609517358021137, iteration: 289627
loss: 1.0221420526504517,grad_norm: 0.760935307246979, iteration: 289628
loss: 0.9969363808631897,grad_norm: 0.7724456303283576, iteration: 289629
loss: 0.9767892956733704,grad_norm: 0.8919481960997099, iteration: 289630
loss: 0.9562726616859436,grad_norm: 0.917876172758941, iteration: 289631
loss: 0.9932202100753784,grad_norm: 0.8380183406873218, iteration: 289632
loss: 1.0209734439849854,grad_norm: 0.9999991993731239, iteration: 289633
loss: 1.034148097038269,grad_norm: 0.9657049646841344, iteration: 289634
loss: 1.1236587762832642,grad_norm: 0.9999997812180347, iteration: 289635
loss: 0.9666655659675598,grad_norm: 0.8740241557042768, iteration: 289636
loss: 0.9914209842681885,grad_norm: 0.8178065601824086, iteration: 289637
loss: 0.9896814227104187,grad_norm: 0.9999994730837533, iteration: 289638
loss: 0.9995487332344055,grad_norm: 0.7327614269512619, iteration: 289639
loss: 1.0712543725967407,grad_norm: 0.9051446228228441, iteration: 289640
loss: 0.9954683780670166,grad_norm: 0.8311003938804625, iteration: 289641
loss: 1.029181957244873,grad_norm: 0.9999997625455999, iteration: 289642
loss: 0.9881547093391418,grad_norm: 0.9660781670644625, iteration: 289643
loss: 0.9860863089561462,grad_norm: 0.9999999407317446, iteration: 289644
loss: 1.1294819116592407,grad_norm: 0.9999995682155789, iteration: 289645
loss: 1.010902762413025,grad_norm: 0.8592415950856395, iteration: 289646
loss: 1.0009859800338745,grad_norm: 0.8323474684520732, iteration: 289647
loss: 1.0037107467651367,grad_norm: 0.9323197103893811, iteration: 289648
loss: 0.9968124628067017,grad_norm: 0.8281286415938349, iteration: 289649
loss: 1.0093228816986084,grad_norm: 0.9999992304545425, iteration: 289650
loss: 1.0325661897659302,grad_norm: 0.9658485583887031, iteration: 289651
loss: 1.0387134552001953,grad_norm: 0.9999996391983048, iteration: 289652
loss: 1.0663715600967407,grad_norm: 0.9999998547421838, iteration: 289653
loss: 1.063904881477356,grad_norm: 0.9999995160744681, iteration: 289654
loss: 1.0391765832901,grad_norm: 0.8936536766865947, iteration: 289655
loss: 1.0231329202651978,grad_norm: 0.9393649677026396, iteration: 289656
loss: 1.011277437210083,grad_norm: 0.8789787776931606, iteration: 289657
loss: 0.9970530867576599,grad_norm: 0.8658021525581442, iteration: 289658
loss: 1.1977534294128418,grad_norm: 0.999999905161535, iteration: 289659
loss: 0.9861388802528381,grad_norm: 0.8017685083329911, iteration: 289660
loss: 1.0255982875823975,grad_norm: 0.8432244546782919, iteration: 289661
loss: 1.0188106298446655,grad_norm: 0.9999990299222872, iteration: 289662
loss: 1.1255015134811401,grad_norm: 0.9999998922815999, iteration: 289663
loss: 1.0211626291275024,grad_norm: 0.9999991362104794, iteration: 289664
loss: 1.0673279762268066,grad_norm: 0.9999998615679543, iteration: 289665
loss: 1.0918233394622803,grad_norm: 0.9999991374819036, iteration: 289666
loss: 1.0249018669128418,grad_norm: 0.9963288971498564, iteration: 289667
loss: 1.0165414810180664,grad_norm: 0.7971053923894434, iteration: 289668
loss: 1.0745928287506104,grad_norm: 0.9999992330307431, iteration: 289669
loss: 1.0161648988723755,grad_norm: 0.8990534759884284, iteration: 289670
loss: 1.1111564636230469,grad_norm: 0.9999999273463435, iteration: 289671
loss: 1.0203145742416382,grad_norm: 0.8694807705375506, iteration: 289672
loss: 1.177003264427185,grad_norm: 0.9999992587943005, iteration: 289673
loss: 1.0446594953536987,grad_norm: 0.9999990981983247, iteration: 289674
loss: 1.0101604461669922,grad_norm: 0.9999991051486524, iteration: 289675
loss: 0.9904336929321289,grad_norm: 0.8481317726309987, iteration: 289676
loss: 1.0574367046356201,grad_norm: 0.9999999659835638, iteration: 289677
loss: 0.9625173807144165,grad_norm: 0.6944606332474912, iteration: 289678
loss: 1.0506819486618042,grad_norm: 0.8838592510843143, iteration: 289679
loss: 0.9730454683303833,grad_norm: 0.9065276499083753, iteration: 289680
loss: 0.9985623359680176,grad_norm: 0.99999902792414, iteration: 289681
loss: 1.0662572383880615,grad_norm: 0.9999992961373869, iteration: 289682
loss: 1.0287163257598877,grad_norm: 0.8686119563140914, iteration: 289683
loss: 1.040769100189209,grad_norm: 0.9999997347114395, iteration: 289684
loss: 1.0505574941635132,grad_norm: 1.0000000233543613, iteration: 289685
loss: 1.057704210281372,grad_norm: 0.9999995681484183, iteration: 289686
loss: 0.9930923581123352,grad_norm: 0.9715732804466068, iteration: 289687
loss: 1.0252119302749634,grad_norm: 0.849256785028396, iteration: 289688
loss: 1.171879529953003,grad_norm: 0.9999999826632116, iteration: 289689
loss: 1.0233814716339111,grad_norm: 0.9999992905446253, iteration: 289690
loss: 0.9912922978401184,grad_norm: 0.9999997765721745, iteration: 289691
loss: 1.0264465808868408,grad_norm: 0.7666612615224105, iteration: 289692
loss: 1.0805310010910034,grad_norm: 0.999999063901206, iteration: 289693
loss: 0.9726414680480957,grad_norm: 0.8812103071613334, iteration: 289694
loss: 1.0310781002044678,grad_norm: 0.9999993608815536, iteration: 289695
loss: 1.0259857177734375,grad_norm: 0.9396096735298839, iteration: 289696
loss: 0.9634566307067871,grad_norm: 0.999999070582811, iteration: 289697
loss: 1.1134310960769653,grad_norm: 0.9999989912552983, iteration: 289698
loss: 0.9799663424491882,grad_norm: 0.8246733193913454, iteration: 289699
loss: 1.013848900794983,grad_norm: 0.8755421499319308, iteration: 289700
loss: 0.9753953814506531,grad_norm: 0.8486538674416114, iteration: 289701
loss: 0.9841766357421875,grad_norm: 0.9999990658068446, iteration: 289702
loss: 0.9882569313049316,grad_norm: 0.7254121605938253, iteration: 289703
loss: 0.991655170917511,grad_norm: 0.8646224930786661, iteration: 289704
loss: 1.0089722871780396,grad_norm: 0.9943061772203332, iteration: 289705
loss: 1.0423407554626465,grad_norm: 0.9999994281905168, iteration: 289706
loss: 1.0105834007263184,grad_norm: 0.9708562802771813, iteration: 289707
loss: 1.0222208499908447,grad_norm: 0.8967501294156052, iteration: 289708
loss: 0.9891325235366821,grad_norm: 0.7700023632257276, iteration: 289709
loss: 1.0041184425354004,grad_norm: 0.9999990436445498, iteration: 289710
loss: 1.0185452699661255,grad_norm: 0.9303553354181741, iteration: 289711
loss: 1.0209765434265137,grad_norm: 0.9999989728376719, iteration: 289712
loss: 1.0270055532455444,grad_norm: 0.8489945004340852, iteration: 289713
loss: 1.009832739830017,grad_norm: 0.9999994399488641, iteration: 289714
loss: 1.0091841220855713,grad_norm: 0.9988543657389696, iteration: 289715
loss: 1.092904806137085,grad_norm: 0.9999996770588274, iteration: 289716
loss: 1.00177001953125,grad_norm: 0.9999991728726182, iteration: 289717
loss: 1.0128064155578613,grad_norm: 0.8973130003216304, iteration: 289718
loss: 1.0306389331817627,grad_norm: 0.9999998807294402, iteration: 289719
loss: 1.015722632408142,grad_norm: 0.9643785022711413, iteration: 289720
loss: 1.0107078552246094,grad_norm: 0.8671989580795719, iteration: 289721
loss: 1.0432891845703125,grad_norm: 0.8306184319347091, iteration: 289722
loss: 1.034254550933838,grad_norm: 0.9999991219633084, iteration: 289723
loss: 1.0087603330612183,grad_norm: 0.9999992112424787, iteration: 289724
loss: 0.9898501038551331,grad_norm: 0.8834775151932566, iteration: 289725
loss: 0.9753093719482422,grad_norm: 0.9999991498681512, iteration: 289726
loss: 0.977480947971344,grad_norm: 0.8292946559797716, iteration: 289727
loss: 1.0161727666854858,grad_norm: 0.9999990716054324, iteration: 289728
loss: 0.9878547787666321,grad_norm: 0.8398660507082093, iteration: 289729
loss: 0.9814879894256592,grad_norm: 0.8256703765480032, iteration: 289730
loss: 1.0077630281448364,grad_norm: 0.9999995144833029, iteration: 289731
loss: 1.1028952598571777,grad_norm: 0.9999999953081657, iteration: 289732
loss: 0.9989905953407288,grad_norm: 0.8217295963838535, iteration: 289733
loss: 1.0178231000900269,grad_norm: 0.8281104005012849, iteration: 289734
loss: 1.0351871252059937,grad_norm: 0.999999013088236, iteration: 289735
loss: 1.0001837015151978,grad_norm: 0.9018196009213935, iteration: 289736
loss: 0.9839543104171753,grad_norm: 0.8335128301826237, iteration: 289737
loss: 1.0154714584350586,grad_norm: 0.999999625731166, iteration: 289738
loss: 0.964753270149231,grad_norm: 0.7753003372251153, iteration: 289739
loss: 1.0487196445465088,grad_norm: 0.9999996602685683, iteration: 289740
loss: 1.0154844522476196,grad_norm: 0.9999992854720905, iteration: 289741
loss: 0.991478443145752,grad_norm: 0.9999989964379296, iteration: 289742
loss: 0.9894608855247498,grad_norm: 0.9723989905439164, iteration: 289743
loss: 0.9449235796928406,grad_norm: 0.7883723504845481, iteration: 289744
loss: 1.0121357440948486,grad_norm: 0.9368982445988558, iteration: 289745
loss: 0.9698553085327148,grad_norm: 0.8793871513492822, iteration: 289746
loss: 1.0215284824371338,grad_norm: 0.9033418561384413, iteration: 289747
loss: 0.973926305770874,grad_norm: 0.9999991176458116, iteration: 289748
loss: 0.9876090288162231,grad_norm: 0.8868427689915676, iteration: 289749
loss: 1.0535730123519897,grad_norm: 0.9157550945725668, iteration: 289750
loss: 0.9599758982658386,grad_norm: 0.8886621295748879, iteration: 289751
loss: 0.9972250461578369,grad_norm: 0.7522339898565862, iteration: 289752
loss: 1.1514678001403809,grad_norm: 0.9999991834612844, iteration: 289753
loss: 1.1218016147613525,grad_norm: 0.9999994706082534, iteration: 289754
loss: 1.057586431503296,grad_norm: 1.00000004939705, iteration: 289755
loss: 0.9858980178833008,grad_norm: 0.8250021538997961, iteration: 289756
loss: 1.1842912435531616,grad_norm: 0.9999999452156151, iteration: 289757
loss: 0.9604235887527466,grad_norm: 0.7439354159108984, iteration: 289758
loss: 1.0274964570999146,grad_norm: 0.9275415183856834, iteration: 289759
loss: 0.9906424283981323,grad_norm: 0.7704258200676544, iteration: 289760
loss: 1.009628176689148,grad_norm: 0.670215813905074, iteration: 289761
loss: 1.0370368957519531,grad_norm: 0.9999990875826111, iteration: 289762
loss: 0.9691529870033264,grad_norm: 0.9999988669410734, iteration: 289763
loss: 1.0977212190628052,grad_norm: 0.8830889793756236, iteration: 289764
loss: 1.047193169593811,grad_norm: 0.9999993514870678, iteration: 289765
loss: 0.9840649366378784,grad_norm: 0.8356479575483416, iteration: 289766
loss: 1.018237829208374,grad_norm: 0.9999996945861988, iteration: 289767
loss: 1.0201729536056519,grad_norm: 0.779224872805525, iteration: 289768
loss: 1.0628184080123901,grad_norm: 0.8210312210425678, iteration: 289769
loss: 0.9942381978034973,grad_norm: 0.8423572331314949, iteration: 289770
loss: 1.0083703994750977,grad_norm: 0.9999992323283007, iteration: 289771
loss: 0.9915041923522949,grad_norm: 0.9422784632863078, iteration: 289772
loss: 0.9852827191352844,grad_norm: 0.933035230895141, iteration: 289773
loss: 1.0121902227401733,grad_norm: 0.769478821122069, iteration: 289774
loss: 1.0818467140197754,grad_norm: 0.9999998935327261, iteration: 289775
loss: 0.9816759824752808,grad_norm: 0.9999992271382048, iteration: 289776
loss: 1.0127639770507812,grad_norm: 0.9999994689583455, iteration: 289777
loss: 1.0136816501617432,grad_norm: 0.8497676953787555, iteration: 289778
loss: 1.0600298643112183,grad_norm: 0.9999997541336422, iteration: 289779
loss: 1.0930676460266113,grad_norm: 0.999999396377115, iteration: 289780
loss: 1.0553303956985474,grad_norm: 0.9263149030043172, iteration: 289781
loss: 1.0226433277130127,grad_norm: 0.9999993188862613, iteration: 289782
loss: 0.980558454990387,grad_norm: 0.9333958751432742, iteration: 289783
loss: 1.0102113485336304,grad_norm: 0.8185471014284506, iteration: 289784
loss: 1.0095287561416626,grad_norm: 0.8140638060000356, iteration: 289785
loss: 1.0107430219650269,grad_norm: 0.9999997598302053, iteration: 289786
loss: 1.000745177268982,grad_norm: 0.8585642421812729, iteration: 289787
loss: 1.0145564079284668,grad_norm: 0.7348244548216448, iteration: 289788
loss: 0.9678843021392822,grad_norm: 0.9999991030131564, iteration: 289789
loss: 1.0277223587036133,grad_norm: 0.9999989754187981, iteration: 289790
loss: 1.108015537261963,grad_norm: 0.9999994607384395, iteration: 289791
loss: 1.029118537902832,grad_norm: 0.9999999384164309, iteration: 289792
loss: 0.9882497787475586,grad_norm: 0.9999992737294227, iteration: 289793
loss: 1.0084925889968872,grad_norm: 0.9999992342365875, iteration: 289794
loss: 1.0270397663116455,grad_norm: 0.9999998069292719, iteration: 289795
loss: 1.029727816581726,grad_norm: 0.9999994903634045, iteration: 289796
loss: 1.024153232574463,grad_norm: 0.8233472191539298, iteration: 289797
loss: 1.0140869617462158,grad_norm: 0.8615627934877624, iteration: 289798
loss: 1.0279414653778076,grad_norm: 0.9999998729758419, iteration: 289799
loss: 1.0456410646438599,grad_norm: 0.8100339758366196, iteration: 289800
loss: 0.9748293161392212,grad_norm: 0.8327552909355435, iteration: 289801
loss: 1.0818415880203247,grad_norm: 0.999999847837563, iteration: 289802
loss: 1.0537267923355103,grad_norm: 0.9999998344548869, iteration: 289803
loss: 1.0055596828460693,grad_norm: 0.9186529338189466, iteration: 289804
loss: 1.0324651002883911,grad_norm: 0.8081404223320074, iteration: 289805
loss: 0.9738226532936096,grad_norm: 0.8400988502082357, iteration: 289806
loss: 1.018082618713379,grad_norm: 0.8413746767234973, iteration: 289807
loss: 0.9376916885375977,grad_norm: 0.7327681906270046, iteration: 289808
loss: 1.0210851430892944,grad_norm: 0.7554378539493656, iteration: 289809
loss: 0.9935486912727356,grad_norm: 0.7310427679887337, iteration: 289810
loss: 1.0016868114471436,grad_norm: 0.999999131647279, iteration: 289811
loss: 0.9882028102874756,grad_norm: 0.9534363061871739, iteration: 289812
loss: 0.9887157082557678,grad_norm: 0.8512355680938175, iteration: 289813
loss: 0.959206223487854,grad_norm: 0.9999990625048402, iteration: 289814
loss: 1.056880235671997,grad_norm: 0.9999994508795795, iteration: 289815
loss: 1.0000028610229492,grad_norm: 0.9999991057892987, iteration: 289816
loss: 1.069744348526001,grad_norm: 0.8913534295009291, iteration: 289817
loss: 1.0186151266098022,grad_norm: 0.920981612655216, iteration: 289818
loss: 0.9941748380661011,grad_norm: 0.7151733486053566, iteration: 289819
loss: 1.0035792589187622,grad_norm: 0.9999992480204623, iteration: 289820
loss: 1.000566005706787,grad_norm: 0.7283695672582811, iteration: 289821
loss: 0.9916107058525085,grad_norm: 0.9999990842471624, iteration: 289822
loss: 0.9999333620071411,grad_norm: 0.9999993426726232, iteration: 289823
loss: 0.9709048271179199,grad_norm: 0.8027922463144392, iteration: 289824
loss: 1.0760961771011353,grad_norm: 0.9999995415665662, iteration: 289825
loss: 0.9959271550178528,grad_norm: 0.9999994974942804, iteration: 289826
loss: 1.0055880546569824,grad_norm: 0.80831839146588, iteration: 289827
loss: 1.0067086219787598,grad_norm: 0.9999991583080106, iteration: 289828
loss: 1.0936330556869507,grad_norm: 0.9999997178999233, iteration: 289829
loss: 1.0396937131881714,grad_norm: 0.7046116800950049, iteration: 289830
loss: 1.0341670513153076,grad_norm: 0.9916459816398725, iteration: 289831
loss: 1.0510227680206299,grad_norm: 0.9999995973470389, iteration: 289832
loss: 1.1021736860275269,grad_norm: 0.999999878897951, iteration: 289833
loss: 0.9785314202308655,grad_norm: 0.8516206573668258, iteration: 289834
loss: 1.0036406517028809,grad_norm: 0.9516646561821044, iteration: 289835
loss: 0.9646308422088623,grad_norm: 0.8803490495264382, iteration: 289836
loss: 1.0344809293746948,grad_norm: 0.7593717125439996, iteration: 289837
loss: 1.0300213098526,grad_norm: 1.0000000844769894, iteration: 289838
loss: 1.0383049249649048,grad_norm: 0.9999992988204511, iteration: 289839
loss: 1.0377777814865112,grad_norm: 0.8250680048817467, iteration: 289840
loss: 1.0216121673583984,grad_norm: 0.9116236968515269, iteration: 289841
loss: 1.0090349912643433,grad_norm: 0.7285901209731693, iteration: 289842
loss: 1.0159510374069214,grad_norm: 0.7557557352230949, iteration: 289843
loss: 1.0213371515274048,grad_norm: 0.9999993591855423, iteration: 289844
loss: 0.9793999195098877,grad_norm: 0.9999991836978058, iteration: 289845
loss: 1.0005338191986084,grad_norm: 0.9358466323011854, iteration: 289846
loss: 0.9975892901420593,grad_norm: 0.9097012695239246, iteration: 289847
loss: 1.0404449701309204,grad_norm: 0.9999990538590372, iteration: 289848
loss: 1.0049434900283813,grad_norm: 0.724069728545217, iteration: 289849
loss: 1.0067996978759766,grad_norm: 0.9485609964460827, iteration: 289850
loss: 0.980483889579773,grad_norm: 0.7925990888100823, iteration: 289851
loss: 1.0085182189941406,grad_norm: 0.9777487724433943, iteration: 289852
loss: 0.9868676066398621,grad_norm: 0.7563104056164569, iteration: 289853
loss: 0.9804088473320007,grad_norm: 0.8162653073212999, iteration: 289854
loss: 0.9873377680778503,grad_norm: 0.8557529358913984, iteration: 289855
loss: 0.9768877029418945,grad_norm: 0.7885791330766287, iteration: 289856
loss: 0.9827427864074707,grad_norm: 0.9871233753473364, iteration: 289857
loss: 1.0408341884613037,grad_norm: 0.9999992381778396, iteration: 289858
loss: 1.0378772020339966,grad_norm: 0.8628187862703889, iteration: 289859
loss: 1.011026382446289,grad_norm: 0.9558421325942338, iteration: 289860
loss: 0.987076461315155,grad_norm: 0.9926374357346075, iteration: 289861
loss: 1.0132415294647217,grad_norm: 0.9115968929012889, iteration: 289862
loss: 1.006075143814087,grad_norm: 0.8739083381246303, iteration: 289863
loss: 1.0036433935165405,grad_norm: 0.7592980277200027, iteration: 289864
loss: 0.9821081757545471,grad_norm: 0.9999991175813794, iteration: 289865
loss: 0.9853501915931702,grad_norm: 0.7901623725266632, iteration: 289866
loss: 0.9782886505126953,grad_norm: 0.8409101985092751, iteration: 289867
loss: 1.0313971042633057,grad_norm: 0.9999991323073648, iteration: 289868
loss: 1.0012831687927246,grad_norm: 0.8646554474748482, iteration: 289869
loss: 1.0132489204406738,grad_norm: 0.8733590311929997, iteration: 289870
loss: 0.9595299363136292,grad_norm: 0.9576804921242504, iteration: 289871
loss: 1.0747370719909668,grad_norm: 0.9181071119744659, iteration: 289872
loss: 0.9851272702217102,grad_norm: 0.7418175721574031, iteration: 289873
loss: 1.0170568227767944,grad_norm: 0.739647790223126, iteration: 289874
loss: 1.020219326019287,grad_norm: 0.9144852394869039, iteration: 289875
loss: 1.0164121389389038,grad_norm: 0.9999992215459705, iteration: 289876
loss: 0.9752964973449707,grad_norm: 0.9999990582205943, iteration: 289877
loss: 1.0121738910675049,grad_norm: 0.9722344382543728, iteration: 289878
loss: 1.0016547441482544,grad_norm: 0.999999189357094, iteration: 289879
loss: 1.0079559087753296,grad_norm: 0.8937639280845012, iteration: 289880
loss: 0.9672284126281738,grad_norm: 0.999999085096866, iteration: 289881
loss: 0.97557133436203,grad_norm: 0.9999993043322645, iteration: 289882
loss: 0.9943879842758179,grad_norm: 0.7842004274347296, iteration: 289883
loss: 0.9838837385177612,grad_norm: 0.9999991037026054, iteration: 289884
loss: 1.013103723526001,grad_norm: 0.9044100621918912, iteration: 289885
loss: 1.0315169095993042,grad_norm: 0.7464309101456217, iteration: 289886
loss: 0.948769211769104,grad_norm: 0.9429440470620416, iteration: 289887
loss: 1.0492724180221558,grad_norm: 0.9999991806065189, iteration: 289888
loss: 1.013548731803894,grad_norm: 0.9999991097061104, iteration: 289889
loss: 0.985013484954834,grad_norm: 0.8700274035571485, iteration: 289890
loss: 0.9602603316307068,grad_norm: 0.9323902728037816, iteration: 289891
loss: 0.9697852730751038,grad_norm: 0.7915745326252667, iteration: 289892
loss: 1.0095089673995972,grad_norm: 0.9406953258213084, iteration: 289893
loss: 1.009749174118042,grad_norm: 0.7466484177791285, iteration: 289894
loss: 1.0192465782165527,grad_norm: 0.8327479800134913, iteration: 289895
loss: 1.0167683362960815,grad_norm: 0.9999996164018632, iteration: 289896
loss: 0.9708454012870789,grad_norm: 0.8044819260110425, iteration: 289897
loss: 0.9902336001396179,grad_norm: 0.8289277189419647, iteration: 289898
loss: 0.9949871301651001,grad_norm: 0.6726779159601027, iteration: 289899
loss: 0.9892289638519287,grad_norm: 0.9999991066767673, iteration: 289900
loss: 0.9566890001296997,grad_norm: 0.8320722833119468, iteration: 289901
loss: 0.9749220609664917,grad_norm: 0.9067909647034024, iteration: 289902
loss: 1.0174145698547363,grad_norm: 0.7784183911998256, iteration: 289903
loss: 1.0290310382843018,grad_norm: 0.9549114053333673, iteration: 289904
loss: 1.035470962524414,grad_norm: 0.7520138016946546, iteration: 289905
loss: 1.0173399448394775,grad_norm: 0.9517191563806923, iteration: 289906
loss: 1.0022286176681519,grad_norm: 0.8025625635129006, iteration: 289907
loss: 0.9602013826370239,grad_norm: 0.847036589024309, iteration: 289908
loss: 1.0051263570785522,grad_norm: 0.7289899106917206, iteration: 289909
loss: 0.9519190192222595,grad_norm: 0.9999990231619533, iteration: 289910
loss: 0.9896801114082336,grad_norm: 0.7346791685759698, iteration: 289911
loss: 0.9759789109230042,grad_norm: 0.7804173071909499, iteration: 289912
loss: 0.9741944670677185,grad_norm: 0.8787398850497495, iteration: 289913
loss: 0.9947571754455566,grad_norm: 0.8115766487125095, iteration: 289914
loss: 1.0099499225616455,grad_norm: 0.8246639454291194, iteration: 289915
loss: 1.0294891595840454,grad_norm: 0.8176566337554408, iteration: 289916
loss: 1.0327754020690918,grad_norm: 0.7930752984217603, iteration: 289917
loss: 1.01057767868042,grad_norm: 0.8058627149968236, iteration: 289918
loss: 0.9792160987854004,grad_norm: 0.8809362423332757, iteration: 289919
loss: 0.9804955124855042,grad_norm: 0.9999991544130257, iteration: 289920
loss: 0.9811701774597168,grad_norm: 0.8088910367826325, iteration: 289921
loss: 1.019912600517273,grad_norm: 0.933982733556522, iteration: 289922
loss: 1.0042482614517212,grad_norm: 0.9198771625359675, iteration: 289923
loss: 1.0100047588348389,grad_norm: 0.9531927183760588, iteration: 289924
loss: 1.0278737545013428,grad_norm: 0.8355888803199307, iteration: 289925
loss: 1.0095710754394531,grad_norm: 0.9999991045384371, iteration: 289926
loss: 1.0413367748260498,grad_norm: 0.8348345199026411, iteration: 289927
loss: 1.0359437465667725,grad_norm: 0.9999992181435023, iteration: 289928
loss: 1.0121982097625732,grad_norm: 0.7810888740054899, iteration: 289929
loss: 1.1015712022781372,grad_norm: 0.9999998109394714, iteration: 289930
loss: 0.9779177904129028,grad_norm: 0.7802193404975689, iteration: 289931
loss: 1.0307949781417847,grad_norm: 0.9844215972112647, iteration: 289932
loss: 1.0319616794586182,grad_norm: 0.9318197596140642, iteration: 289933
loss: 1.0355123281478882,grad_norm: 0.9999992285635694, iteration: 289934
loss: 1.0174543857574463,grad_norm: 0.8565432771901672, iteration: 289935
loss: 1.0100009441375732,grad_norm: 0.9999990953726442, iteration: 289936
loss: 1.0332072973251343,grad_norm: 0.99999920528204, iteration: 289937
loss: 0.9907163381576538,grad_norm: 0.8086931235612485, iteration: 289938
loss: 0.9973759651184082,grad_norm: 0.9999990924053976, iteration: 289939
loss: 0.9704413414001465,grad_norm: 0.8664005729554801, iteration: 289940
loss: 0.9934358596801758,grad_norm: 0.699303362312143, iteration: 289941
loss: 0.9711775183677673,grad_norm: 0.9982823961846291, iteration: 289942
loss: 0.9923352003097534,grad_norm: 0.9298719019182781, iteration: 289943
loss: 1.009770393371582,grad_norm: 0.9999992126228411, iteration: 289944
loss: 0.9665107131004333,grad_norm: 0.8882467615808546, iteration: 289945
loss: 0.9950395226478577,grad_norm: 0.9286270657942369, iteration: 289946
loss: 1.0368200540542603,grad_norm: 0.9999992845769315, iteration: 289947
loss: 1.0252443552017212,grad_norm: 0.9243996863757562, iteration: 289948
loss: 0.9787125587463379,grad_norm: 0.8780997268653562, iteration: 289949
loss: 1.0074927806854248,grad_norm: 0.7392743856918287, iteration: 289950
loss: 1.033689260482788,grad_norm: 0.9131211552926437, iteration: 289951
loss: 0.9918700456619263,grad_norm: 0.9164214056866214, iteration: 289952
loss: 1.0156522989273071,grad_norm: 0.8480427759775848, iteration: 289953
loss: 0.9790756106376648,grad_norm: 0.825755185505318, iteration: 289954
loss: 1.02928626537323,grad_norm: 0.8307404491769281, iteration: 289955
loss: 0.9680911302566528,grad_norm: 0.7838384403602467, iteration: 289956
loss: 0.979745626449585,grad_norm: 0.8890748459033114, iteration: 289957
loss: 0.998091459274292,grad_norm: 0.7102679440902679, iteration: 289958
loss: 1.0122549533843994,grad_norm: 0.9999999369368313, iteration: 289959
loss: 0.9747534990310669,grad_norm: 0.8856149099368326, iteration: 289960
loss: 0.9968775510787964,grad_norm: 0.8430361917831394, iteration: 289961
loss: 0.9822686910629272,grad_norm: 0.8367896902764163, iteration: 289962
loss: 1.1090373992919922,grad_norm: 0.8192918604286774, iteration: 289963
loss: 0.9829146265983582,grad_norm: 0.817339454043749, iteration: 289964
loss: 1.050839900970459,grad_norm: 0.829274934522203, iteration: 289965
loss: 0.9865626096725464,grad_norm: 0.9317142773291043, iteration: 289966
loss: 0.9657753705978394,grad_norm: 0.840304004973508, iteration: 289967
loss: 1.0362434387207031,grad_norm: 0.8151973835381653, iteration: 289968
loss: 0.9934132099151611,grad_norm: 0.8618313107318256, iteration: 289969
loss: 1.0070748329162598,grad_norm: 0.9999991811203325, iteration: 289970
loss: 1.0206226110458374,grad_norm: 0.8765678560133937, iteration: 289971
loss: 0.9641929268836975,grad_norm: 0.6840050675354119, iteration: 289972
loss: 1.0039278268814087,grad_norm: 0.8200691523738651, iteration: 289973
loss: 1.0139309167861938,grad_norm: 0.8773886081077695, iteration: 289974
loss: 1.01221764087677,grad_norm: 0.7362520969955193, iteration: 289975
loss: 1.0396605730056763,grad_norm: 0.9999992805413431, iteration: 289976
loss: 1.036647081375122,grad_norm: 0.9665717897535837, iteration: 289977
loss: 1.024949073791504,grad_norm: 0.9585743107587937, iteration: 289978
loss: 1.0133863687515259,grad_norm: 0.6758091928249247, iteration: 289979
loss: 1.0541950464248657,grad_norm: 0.9999992954991168, iteration: 289980
loss: 1.0021476745605469,grad_norm: 0.9999989952512836, iteration: 289981
loss: 1.002803921699524,grad_norm: 0.8036880321728942, iteration: 289982
loss: 0.9406647086143494,grad_norm: 0.8894678409617094, iteration: 289983
loss: 0.9877458810806274,grad_norm: 0.9999990478936115, iteration: 289984
loss: 1.00131356716156,grad_norm: 0.8545047098762012, iteration: 289985
loss: 0.9925714135169983,grad_norm: 0.8181631767448189, iteration: 289986
loss: 1.1071312427520752,grad_norm: 0.9999997966285968, iteration: 289987
loss: 0.9742892980575562,grad_norm: 0.8422415197142794, iteration: 289988
loss: 1.0083214044570923,grad_norm: 0.9001052465835182, iteration: 289989
loss: 1.0202133655548096,grad_norm: 0.8157455318486468, iteration: 289990
loss: 1.0086826086044312,grad_norm: 0.9999990287482502, iteration: 289991
loss: 1.022943139076233,grad_norm: 0.791366693943393, iteration: 289992
loss: 1.0244991779327393,grad_norm: 0.7862013393475988, iteration: 289993
loss: 0.9902452826499939,grad_norm: 0.9277708578823876, iteration: 289994
loss: 0.9998822212219238,grad_norm: 0.9264953776831553, iteration: 289995
loss: 0.9991164207458496,grad_norm: 0.9470943566032478, iteration: 289996
loss: 1.0052295923233032,grad_norm: 0.7522148993760388, iteration: 289997
loss: 0.9726851582527161,grad_norm: 0.9263753547609265, iteration: 289998
loss: 1.0145516395568848,grad_norm: 0.999999241330619, iteration: 289999
loss: 0.9601017832756042,grad_norm: 0.9573417010565783, iteration: 290000
Evaluating at step 290000
{'val': 0.9969946034252644, 'test': 1.976707385751767}
loss: 1.0034942626953125,grad_norm: 0.9999992282160403, iteration: 290001
loss: 0.9897037148475647,grad_norm: 0.9149220981583313, iteration: 290002
loss: 0.9886342883110046,grad_norm: 0.8842875145238431, iteration: 290003
loss: 0.971510112285614,grad_norm: 0.806760618803678, iteration: 290004
loss: 1.0074589252471924,grad_norm: 0.9518358340526102, iteration: 290005
loss: 0.9901980757713318,grad_norm: 0.7712118973010358, iteration: 290006
loss: 1.0181571245193481,grad_norm: 0.8724275467572763, iteration: 290007
loss: 0.9707621335983276,grad_norm: 0.9999991080238904, iteration: 290008
loss: 0.9840930700302124,grad_norm: 0.9999990720480013, iteration: 290009
loss: 1.00749933719635,grad_norm: 0.9765807124294489, iteration: 290010
loss: 0.993746817111969,grad_norm: 0.8281762210364452, iteration: 290011
loss: 0.964834451675415,grad_norm: 0.81729231913632, iteration: 290012
loss: 0.9938161969184875,grad_norm: 0.9999993368635997, iteration: 290013
loss: 1.0320554971694946,grad_norm: 0.8661419815418802, iteration: 290014
loss: 1.0333328247070312,grad_norm: 0.9999999237649728, iteration: 290015
loss: 0.9912552237510681,grad_norm: 0.8223108357625545, iteration: 290016
loss: 1.0978295803070068,grad_norm: 1.00000000500356, iteration: 290017
loss: 0.9814271330833435,grad_norm: 0.9733358267034914, iteration: 290018
loss: 0.9708554744720459,grad_norm: 0.8085577355563073, iteration: 290019
loss: 1.0119524002075195,grad_norm: 0.8851429798932291, iteration: 290020
loss: 1.0178720951080322,grad_norm: 0.8290885834396997, iteration: 290021
loss: 0.9840207099914551,grad_norm: 0.7491910836651874, iteration: 290022
loss: 0.9844061732292175,grad_norm: 0.8343768880613031, iteration: 290023
loss: 0.9796420335769653,grad_norm: 0.8763865061745247, iteration: 290024
loss: 1.0314291715621948,grad_norm: 0.9999991441766004, iteration: 290025
loss: 1.0262483358383179,grad_norm: 0.7692307863255001, iteration: 290026
loss: 0.9895403385162354,grad_norm: 0.7711382651821826, iteration: 290027
loss: 0.9884417057037354,grad_norm: 0.832284020018175, iteration: 290028
loss: 0.9643032550811768,grad_norm: 0.8313433709332098, iteration: 290029
loss: 1.0009912252426147,grad_norm: 0.8637316695544042, iteration: 290030
loss: 1.0157668590545654,grad_norm: 0.9548980196117303, iteration: 290031
loss: 0.9961146116256714,grad_norm: 0.9309773542717529, iteration: 290032
loss: 0.9872495532035828,grad_norm: 0.9302262221911994, iteration: 290033
loss: 0.982999861240387,grad_norm: 0.9530984431041899, iteration: 290034
loss: 0.9915755987167358,grad_norm: 0.7084612840864055, iteration: 290035
loss: 0.9799190759658813,grad_norm: 0.9399892896138613, iteration: 290036
loss: 1.0015573501586914,grad_norm: 0.8874765702341203, iteration: 290037
loss: 0.9936136603355408,grad_norm: 0.7923022272879602, iteration: 290038
loss: 0.9770174622535706,grad_norm: 0.999999061111856, iteration: 290039
loss: 0.9836588501930237,grad_norm: 0.7925520880702452, iteration: 290040
loss: 0.9711514711380005,grad_norm: 0.8108217680893254, iteration: 290041
loss: 1.0018726587295532,grad_norm: 0.8408362630092733, iteration: 290042
loss: 0.9752746224403381,grad_norm: 0.7214187367494675, iteration: 290043
loss: 1.0980991125106812,grad_norm: 0.9999991299780527, iteration: 290044
loss: 1.016539216041565,grad_norm: 0.8324884147560274, iteration: 290045
loss: 0.974063515663147,grad_norm: 0.7860358385473178, iteration: 290046
loss: 1.0152359008789062,grad_norm: 0.9261859221525124, iteration: 290047
loss: 0.9693519473075867,grad_norm: 0.7670886678598332, iteration: 290048
loss: 0.9801151752471924,grad_norm: 0.8305424177743236, iteration: 290049
loss: 0.9898382425308228,grad_norm: 0.8468548510846017, iteration: 290050
loss: 0.9734041094779968,grad_norm: 0.831073785046154, iteration: 290051
loss: 1.0540351867675781,grad_norm: 0.9999999418650765, iteration: 290052
loss: 1.0127133131027222,grad_norm: 0.824629889983682, iteration: 290053
loss: 1.0055818557739258,grad_norm: 0.9999999419424501, iteration: 290054
loss: 1.019978642463684,grad_norm: 0.8889302169525306, iteration: 290055
loss: 1.0016858577728271,grad_norm: 0.9687250228448943, iteration: 290056
loss: 1.0203734636306763,grad_norm: 0.999999003475601, iteration: 290057
loss: 0.9942770004272461,grad_norm: 0.8630255328406721, iteration: 290058
loss: 0.9680187106132507,grad_norm: 0.8819439703838882, iteration: 290059
loss: 0.9954285025596619,grad_norm: 0.9917898761501661, iteration: 290060
loss: 0.9850871562957764,grad_norm: 0.9999991356626642, iteration: 290061
loss: 0.9685792326927185,grad_norm: 0.9999989822566938, iteration: 290062
loss: 1.031488299369812,grad_norm: 0.9110529341212616, iteration: 290063
loss: 1.0468493700027466,grad_norm: 0.9568248402213646, iteration: 290064
loss: 1.0123940706253052,grad_norm: 0.957081350711835, iteration: 290065
loss: 0.9659379720687866,grad_norm: 0.9999990321625952, iteration: 290066
loss: 1.035682201385498,grad_norm: 0.925798948440035, iteration: 290067
loss: 1.0039892196655273,grad_norm: 0.742778721132168, iteration: 290068
loss: 0.9917800426483154,grad_norm: 0.7067502908084513, iteration: 290069
loss: 0.9421775341033936,grad_norm: 0.7645089554956304, iteration: 290070
loss: 1.018802523612976,grad_norm: 0.8892256880770135, iteration: 290071
loss: 0.9904718995094299,grad_norm: 0.8928773355586701, iteration: 290072
loss: 1.0097211599349976,grad_norm: 0.8590938918212907, iteration: 290073
loss: 0.9698498845100403,grad_norm: 0.7876468261573473, iteration: 290074
loss: 0.9983074069023132,grad_norm: 0.7686471211572803, iteration: 290075
loss: 0.9896670579910278,grad_norm: 0.8697243828212651, iteration: 290076
loss: 0.979491651058197,grad_norm: 0.823301692298048, iteration: 290077
loss: 1.0205153226852417,grad_norm: 0.8410963130017307, iteration: 290078
loss: 1.1271915435791016,grad_norm: 0.9999997358020556, iteration: 290079
loss: 1.0284887552261353,grad_norm: 0.8781798361013526, iteration: 290080
loss: 0.987097978591919,grad_norm: 0.858088105870584, iteration: 290081
loss: 0.9977521896362305,grad_norm: 0.9999992042284912, iteration: 290082
loss: 0.9578549861907959,grad_norm: 0.9999992531055609, iteration: 290083
loss: 1.0093318223953247,grad_norm: 0.766529954700414, iteration: 290084
loss: 0.983052134513855,grad_norm: 0.7132918010435584, iteration: 290085
loss: 1.0154979228973389,grad_norm: 0.9961438395198018, iteration: 290086
loss: 0.9905065894126892,grad_norm: 0.8104192384686962, iteration: 290087
loss: 1.0071074962615967,grad_norm: 0.9589369250138913, iteration: 290088
loss: 1.0094249248504639,grad_norm: 0.7521091156877212, iteration: 290089
loss: 1.0074362754821777,grad_norm: 0.7715431965435734, iteration: 290090
loss: 1.0148478746414185,grad_norm: 0.901166250346321, iteration: 290091
loss: 0.99811851978302,grad_norm: 0.8269028878602429, iteration: 290092
loss: 1.0030107498168945,grad_norm: 0.9999993365996032, iteration: 290093
loss: 0.9783668518066406,grad_norm: 0.9999991297643263, iteration: 290094
loss: 0.9819333553314209,grad_norm: 0.8891002674980276, iteration: 290095
loss: 1.0310161113739014,grad_norm: 0.9163676773308939, iteration: 290096
loss: 0.9904701113700867,grad_norm: 0.7335988473694922, iteration: 290097
loss: 1.0388600826263428,grad_norm: 0.9102344665215367, iteration: 290098
loss: 1.0018885135650635,grad_norm: 0.7533311977074633, iteration: 290099
loss: 0.966017484664917,grad_norm: 0.7870569663363385, iteration: 290100
loss: 1.0050138235092163,grad_norm: 0.9999990378245949, iteration: 290101
loss: 0.964594304561615,grad_norm: 0.9999989892266598, iteration: 290102
loss: 1.000998616218567,grad_norm: 0.7742329908576808, iteration: 290103
loss: 0.9812445044517517,grad_norm: 0.927681670535013, iteration: 290104
loss: 1.0029056072235107,grad_norm: 0.9999991171025391, iteration: 290105
loss: 1.0180495977401733,grad_norm: 0.921946711335578, iteration: 290106
loss: 0.9682238698005676,grad_norm: 0.874344601423979, iteration: 290107
loss: 1.0047982931137085,grad_norm: 0.7444149711235306, iteration: 290108
loss: 1.0245782136917114,grad_norm: 0.7798854423109793, iteration: 290109
loss: 1.0170643329620361,grad_norm: 0.9419637850616626, iteration: 290110
loss: 1.0483548641204834,grad_norm: 0.8659759927014666, iteration: 290111
loss: 1.0195908546447754,grad_norm: 0.9999990948860872, iteration: 290112
loss: 0.9842774271965027,grad_norm: 0.9536064407306079, iteration: 290113
loss: 1.005849003791809,grad_norm: 0.7363099550909278, iteration: 290114
loss: 1.0137429237365723,grad_norm: 0.8179564600885556, iteration: 290115
loss: 1.0193109512329102,grad_norm: 0.8977015409577419, iteration: 290116
loss: 1.0177452564239502,grad_norm: 0.7839944566149883, iteration: 290117
loss: 1.0057892799377441,grad_norm: 0.8235286157266407, iteration: 290118
loss: 1.0059314966201782,grad_norm: 0.9178100065179231, iteration: 290119
loss: 1.0155309438705444,grad_norm: 0.9063692211282581, iteration: 290120
loss: 1.0154098272323608,grad_norm: 0.8196233060581022, iteration: 290121
loss: 1.0332248210906982,grad_norm: 0.8512467468643415, iteration: 290122
loss: 0.9960070848464966,grad_norm: 0.7589637202934276, iteration: 290123
loss: 1.01859450340271,grad_norm: 0.8284483807663171, iteration: 290124
loss: 1.0239225625991821,grad_norm: 0.9531902623179054, iteration: 290125
loss: 0.9557259678840637,grad_norm: 0.9999997965342876, iteration: 290126
loss: 1.0012956857681274,grad_norm: 0.8930429445523034, iteration: 290127
loss: 0.956585168838501,grad_norm: 0.9903985176442215, iteration: 290128
loss: 0.9900192618370056,grad_norm: 0.8027209437866167, iteration: 290129
loss: 0.977957546710968,grad_norm: 0.9493714725977325, iteration: 290130
loss: 0.9918391108512878,grad_norm: 0.9999991124243699, iteration: 290131
loss: 0.9913355708122253,grad_norm: 0.8540270504585497, iteration: 290132
loss: 1.036247730255127,grad_norm: 0.7922189815201595, iteration: 290133
loss: 0.9984951019287109,grad_norm: 0.7015813790710604, iteration: 290134
loss: 1.0452282428741455,grad_norm: 0.9999991259981325, iteration: 290135
loss: 1.0275225639343262,grad_norm: 0.8527590236623761, iteration: 290136
loss: 1.0206949710845947,grad_norm: 0.999999005437724, iteration: 290137
loss: 1.0184905529022217,grad_norm: 0.8893414391516087, iteration: 290138
loss: 0.9982848763465881,grad_norm: 0.79917847181256, iteration: 290139
loss: 0.9918391704559326,grad_norm: 0.7690319728745224, iteration: 290140
loss: 1.0209137201309204,grad_norm: 0.9201516663977264, iteration: 290141
loss: 0.9829657077789307,grad_norm: 0.9212346807695194, iteration: 290142
loss: 0.9990224838256836,grad_norm: 0.9999993555098581, iteration: 290143
loss: 1.0212479829788208,grad_norm: 0.7524968124576901, iteration: 290144
loss: 1.005776286125183,grad_norm: 0.8421136802378084, iteration: 290145
loss: 0.9640529751777649,grad_norm: 0.8429593128626213, iteration: 290146
loss: 0.977007269859314,grad_norm: 0.7276045626771005, iteration: 290147
loss: 1.0410785675048828,grad_norm: 0.893102926691083, iteration: 290148
loss: 0.9491189122200012,grad_norm: 0.9710367763672205, iteration: 290149
loss: 0.9929250478744507,grad_norm: 0.7692692964485205, iteration: 290150
loss: 1.01253342628479,grad_norm: 0.8255273477081551, iteration: 290151
loss: 0.9911977052688599,grad_norm: 0.9036663297827319, iteration: 290152
loss: 1.0057083368301392,grad_norm: 0.8422814427701053, iteration: 290153
loss: 1.0190792083740234,grad_norm: 0.8738193927106535, iteration: 290154
loss: 0.9835013747215271,grad_norm: 0.9504068042317273, iteration: 290155
loss: 1.044887661933899,grad_norm: 0.9620468618603161, iteration: 290156
loss: 1.0483717918395996,grad_norm: 0.8430352559698626, iteration: 290157
loss: 1.0163472890853882,grad_norm: 0.7833004786143553, iteration: 290158
loss: 1.0241434574127197,grad_norm: 0.852681991601657, iteration: 290159
loss: 1.0239499807357788,grad_norm: 0.9654756170946166, iteration: 290160
loss: 0.9405456781387329,grad_norm: 0.8862676190872136, iteration: 290161
loss: 0.999418318271637,grad_norm: 0.8888579889631001, iteration: 290162
loss: 1.0073316097259521,grad_norm: 0.8825360812531046, iteration: 290163
loss: 1.0056589841842651,grad_norm: 0.9999992305327844, iteration: 290164
loss: 1.0102083683013916,grad_norm: 0.9444989027319891, iteration: 290165
loss: 1.0070898532867432,grad_norm: 0.7421451800964687, iteration: 290166
loss: 1.0030640363693237,grad_norm: 0.9877125278352406, iteration: 290167
loss: 1.0130984783172607,grad_norm: 0.9523663757243911, iteration: 290168
loss: 1.0315592288970947,grad_norm: 0.9011998882689893, iteration: 290169
loss: 0.9992491006851196,grad_norm: 0.8484843014901466, iteration: 290170
loss: 0.9789817333221436,grad_norm: 0.9489432519173345, iteration: 290171
loss: 1.0211440324783325,grad_norm: 0.9999990768227637, iteration: 290172
loss: 1.0074856281280518,grad_norm: 0.9138778734072648, iteration: 290173
loss: 0.9837321043014526,grad_norm: 0.8749336350528555, iteration: 290174
loss: 1.0199755430221558,grad_norm: 0.9351430689056339, iteration: 290175
loss: 0.9850634336471558,grad_norm: 0.9390924974267845, iteration: 290176
loss: 1.0252025127410889,grad_norm: 0.9999991710207712, iteration: 290177
loss: 0.9936421513557434,grad_norm: 0.9999999349577728, iteration: 290178
loss: 0.988876223564148,grad_norm: 0.7292690144163261, iteration: 290179
loss: 1.0245851278305054,grad_norm: 0.9029215428384856, iteration: 290180
loss: 0.9688053727149963,grad_norm: 0.9571356672008592, iteration: 290181
loss: 0.9853346347808838,grad_norm: 0.8781398597744706, iteration: 290182
loss: 1.0201750993728638,grad_norm: 0.8035343752333715, iteration: 290183
loss: 1.1161446571350098,grad_norm: 0.9999991267411311, iteration: 290184
loss: 1.0029873847961426,grad_norm: 0.8617911989565331, iteration: 290185
loss: 1.0002130270004272,grad_norm: 0.8762474969210088, iteration: 290186
loss: 1.0179895162582397,grad_norm: 0.8780154493535345, iteration: 290187
loss: 0.9881933331489563,grad_norm: 0.8186174610037711, iteration: 290188
loss: 1.0094717741012573,grad_norm: 0.8450585849163346, iteration: 290189
loss: 1.0244512557983398,grad_norm: 0.9268552563611245, iteration: 290190
loss: 0.9899460077285767,grad_norm: 0.8462811563311843, iteration: 290191
loss: 1.0054129362106323,grad_norm: 0.9178224588231059, iteration: 290192
loss: 1.003777027130127,grad_norm: 0.987843023460025, iteration: 290193
loss: 0.9979171752929688,grad_norm: 0.8360107529812362, iteration: 290194
loss: 0.9957031607627869,grad_norm: 0.8695262993654975, iteration: 290195
loss: 0.9774001836776733,grad_norm: 0.824697253207384, iteration: 290196
loss: 0.9816228151321411,grad_norm: 0.999999186366409, iteration: 290197
loss: 0.9954177141189575,grad_norm: 0.9257644209580699, iteration: 290198
loss: 1.008916974067688,grad_norm: 0.8475670445213089, iteration: 290199
loss: 1.0854004621505737,grad_norm: 0.9443314699619323, iteration: 290200
loss: 0.9968898296356201,grad_norm: 0.8280724765041342, iteration: 290201
loss: 1.0076446533203125,grad_norm: 0.7587773493174377, iteration: 290202
loss: 1.0309311151504517,grad_norm: 0.8199188818607167, iteration: 290203
loss: 0.9844880104064941,grad_norm: 0.7658539183852512, iteration: 290204
loss: 1.0462727546691895,grad_norm: 0.9891212995676057, iteration: 290205
loss: 0.9800060391426086,grad_norm: 0.8535646486907204, iteration: 290206
loss: 1.0121403932571411,grad_norm: 0.9870263643899055, iteration: 290207
loss: 1.0045198202133179,grad_norm: 0.7623469348167904, iteration: 290208
loss: 1.0436627864837646,grad_norm: 0.7901263902880193, iteration: 290209
loss: 1.0338410139083862,grad_norm: 0.8984238522236476, iteration: 290210
loss: 1.026814579963684,grad_norm: 0.9999990169784563, iteration: 290211
loss: 1.023515224456787,grad_norm: 0.8090446563707548, iteration: 290212
loss: 0.9856079816818237,grad_norm: 0.9963773744271205, iteration: 290213
loss: 1.0396709442138672,grad_norm: 0.706254225957491, iteration: 290214
loss: 0.9903377294540405,grad_norm: 0.9917763002561071, iteration: 290215
loss: 0.9793246388435364,grad_norm: 0.8921722793666755, iteration: 290216
loss: 1.0930169820785522,grad_norm: 0.9999996845595494, iteration: 290217
loss: 0.983955442905426,grad_norm: 0.8806992907621016, iteration: 290218
loss: 0.9944488406181335,grad_norm: 0.8185907561790408, iteration: 290219
loss: 1.0073469877243042,grad_norm: 0.9999991266414133, iteration: 290220
loss: 0.9821733832359314,grad_norm: 0.8199263500100908, iteration: 290221
loss: 0.9808874130249023,grad_norm: 0.9999992440014471, iteration: 290222
loss: 1.0029494762420654,grad_norm: 0.7531667495615808, iteration: 290223
loss: 0.9881250858306885,grad_norm: 0.8825366779352805, iteration: 290224
loss: 1.0246986150741577,grad_norm: 0.8291274918199348, iteration: 290225
loss: 1.044844627380371,grad_norm: 0.9999998092039395, iteration: 290226
loss: 0.9939172267913818,grad_norm: 0.8654089527214892, iteration: 290227
loss: 0.9852920770645142,grad_norm: 0.7699026004124239, iteration: 290228
loss: 1.0027865171432495,grad_norm: 0.9999993891957883, iteration: 290229
loss: 1.0080602169036865,grad_norm: 0.8435950142186792, iteration: 290230
loss: 1.0280085802078247,grad_norm: 0.8380557300129052, iteration: 290231
loss: 0.9929027557373047,grad_norm: 0.8390311675485738, iteration: 290232
loss: 1.0041627883911133,grad_norm: 0.912166130008441, iteration: 290233
loss: 1.0011223554611206,grad_norm: 0.7322005197402716, iteration: 290234
loss: 1.0147645473480225,grad_norm: 0.7744139309771378, iteration: 290235
loss: 1.0082573890686035,grad_norm: 0.7861884731303667, iteration: 290236
loss: 1.0113184452056885,grad_norm: 0.9639272479866015, iteration: 290237
loss: 0.9894125461578369,grad_norm: 0.7718940399704656, iteration: 290238
loss: 0.9640147686004639,grad_norm: 0.7966808138832832, iteration: 290239
loss: 0.9906424880027771,grad_norm: 0.8445609284691312, iteration: 290240
loss: 1.0176899433135986,grad_norm: 0.8618174676559118, iteration: 290241
loss: 0.991421103477478,grad_norm: 0.8174615558652577, iteration: 290242
loss: 0.9966620206832886,grad_norm: 0.8548058198422398, iteration: 290243
loss: 0.9772778749465942,grad_norm: 0.9328411906049381, iteration: 290244
loss: 0.9645892381668091,grad_norm: 0.8257065787481875, iteration: 290245
loss: 0.9830412268638611,grad_norm: 0.8067368396379034, iteration: 290246
loss: 1.0370539426803589,grad_norm: 0.9999995103088261, iteration: 290247
loss: 1.0103213787078857,grad_norm: 0.823967757866286, iteration: 290248
loss: 0.9926208257675171,grad_norm: 0.8113710115654909, iteration: 290249
loss: 0.9915951490402222,grad_norm: 0.9874413176043135, iteration: 290250
loss: 0.9681460857391357,grad_norm: 0.8694260473054548, iteration: 290251
loss: 1.0230716466903687,grad_norm: 0.9422978214732937, iteration: 290252
loss: 0.9549558758735657,grad_norm: 0.7608727594773942, iteration: 290253
loss: 1.0197666883468628,grad_norm: 0.9673769391934417, iteration: 290254
loss: 1.0203673839569092,grad_norm: 0.9957535274092557, iteration: 290255
loss: 0.9594651460647583,grad_norm: 0.9999992401633672, iteration: 290256
loss: 0.979903519153595,grad_norm: 0.7321936307826711, iteration: 290257
loss: 0.998015820980072,grad_norm: 0.8580345040947229, iteration: 290258
loss: 1.016975998878479,grad_norm: 0.9999990537218123, iteration: 290259
loss: 1.0595405101776123,grad_norm: 0.9090236200691747, iteration: 290260
loss: 1.0028735399246216,grad_norm: 0.6995881147063403, iteration: 290261
loss: 0.9905234575271606,grad_norm: 0.891525832670995, iteration: 290262
loss: 1.0281668901443481,grad_norm: 0.9999992041350669, iteration: 290263
loss: 1.004102349281311,grad_norm: 0.7019183264525283, iteration: 290264
loss: 0.9766258001327515,grad_norm: 0.8699426305003416, iteration: 290265
loss: 1.0180132389068604,grad_norm: 0.9999991746848408, iteration: 290266
loss: 0.9815264344215393,grad_norm: 0.9234893382665607, iteration: 290267
loss: 0.9884709119796753,grad_norm: 0.8980649658617916, iteration: 290268
loss: 1.0031639337539673,grad_norm: 0.9999992352792906, iteration: 290269
loss: 1.0558280944824219,grad_norm: 0.9999999013530814, iteration: 290270
loss: 0.9822965264320374,grad_norm: 0.8490987878328535, iteration: 290271
loss: 1.0113540887832642,grad_norm: 0.7695702431725508, iteration: 290272
loss: 1.0054630041122437,grad_norm: 0.8074868900476108, iteration: 290273
loss: 0.9919635653495789,grad_norm: 0.7586569065695918, iteration: 290274
loss: 1.0337085723876953,grad_norm: 0.8674590846072016, iteration: 290275
loss: 0.9964642524719238,grad_norm: 0.9355046631315886, iteration: 290276
loss: 0.9882864356040955,grad_norm: 0.9999989879653988, iteration: 290277
loss: 1.1524938344955444,grad_norm: 0.9999998899898411, iteration: 290278
loss: 1.0469393730163574,grad_norm: 0.9999992271153175, iteration: 290279
loss: 1.0010217428207397,grad_norm: 0.857940066361934, iteration: 290280
loss: 0.9924096465110779,grad_norm: 0.8375457837473864, iteration: 290281
loss: 1.0110790729522705,grad_norm: 0.7359070993562398, iteration: 290282
loss: 1.0049703121185303,grad_norm: 0.9918458527022074, iteration: 290283
loss: 1.0497933626174927,grad_norm: 0.9999991706264612, iteration: 290284
loss: 0.9898838996887207,grad_norm: 0.9299425782880435, iteration: 290285
loss: 0.9925876259803772,grad_norm: 0.979850396156405, iteration: 290286
loss: 1.0154932737350464,grad_norm: 0.999999228396315, iteration: 290287
loss: 1.0170667171478271,grad_norm: 0.7308260447743454, iteration: 290288
loss: 1.0129892826080322,grad_norm: 0.9999991440928845, iteration: 290289
loss: 0.9556174278259277,grad_norm: 0.740148539340631, iteration: 290290
loss: 0.9904382824897766,grad_norm: 0.8957242766442858, iteration: 290291
loss: 1.0247682332992554,grad_norm: 0.7918670454836568, iteration: 290292
loss: 1.0202046632766724,grad_norm: 0.9999990203708253, iteration: 290293
loss: 1.2228530645370483,grad_norm: 0.9978348552455543, iteration: 290294
loss: 0.9966368675231934,grad_norm: 0.999999064620478, iteration: 290295
loss: 1.0120350122451782,grad_norm: 0.7067419334555523, iteration: 290296
loss: 1.0176422595977783,grad_norm: 0.8963016381456541, iteration: 290297
loss: 1.1030536890029907,grad_norm: 0.9999995331501185, iteration: 290298
loss: 0.9921555519104004,grad_norm: 0.9999998422592141, iteration: 290299
loss: 0.9789904952049255,grad_norm: 0.9818336486589898, iteration: 290300
loss: 0.989388644695282,grad_norm: 0.7218419970202957, iteration: 290301
loss: 0.9604390859603882,grad_norm: 0.9020852636583072, iteration: 290302
loss: 0.9957221746444702,grad_norm: 0.8308144555243115, iteration: 290303
loss: 0.9763327240943909,grad_norm: 0.8735022890933433, iteration: 290304
loss: 1.0916550159454346,grad_norm: 0.9999999337911161, iteration: 290305
loss: 1.023901343345642,grad_norm: 0.9567628545689593, iteration: 290306
loss: 0.979404866695404,grad_norm: 0.8619123400556219, iteration: 290307
loss: 1.0195157527923584,grad_norm: 0.7163192323841072, iteration: 290308
loss: 0.9838254451751709,grad_norm: 0.8369916789029155, iteration: 290309
loss: 1.0376590490341187,grad_norm: 0.922853041338795, iteration: 290310
loss: 1.1338151693344116,grad_norm: 0.999999279230331, iteration: 290311
loss: 0.9581841826438904,grad_norm: 0.8315845307310566, iteration: 290312
loss: 0.9884599447250366,grad_norm: 0.9300753571290267, iteration: 290313
loss: 1.1234632730484009,grad_norm: 0.9999997292675346, iteration: 290314
loss: 1.034914255142212,grad_norm: 0.9999992637024001, iteration: 290315
loss: 1.0208548307418823,grad_norm: 0.9095312800187666, iteration: 290316
loss: 1.0228557586669922,grad_norm: 0.8290295771101232, iteration: 290317
loss: 1.0152060985565186,grad_norm: 0.8689220460331243, iteration: 290318
loss: 0.9789563417434692,grad_norm: 0.9340943101955305, iteration: 290319
loss: 1.0037380456924438,grad_norm: 0.8660903111395156, iteration: 290320
loss: 0.962203860282898,grad_norm: 0.9999990758849591, iteration: 290321
loss: 0.9964913129806519,grad_norm: 0.8406837391791536, iteration: 290322
loss: 0.9757700562477112,grad_norm: 0.945302123614636, iteration: 290323
loss: 0.994721531867981,grad_norm: 0.8327720580290772, iteration: 290324
loss: 0.9907308220863342,grad_norm: 0.9999991660562266, iteration: 290325
loss: 1.0430861711502075,grad_norm: 1.000000065839905, iteration: 290326
loss: 1.0325777530670166,grad_norm: 0.9999990568340439, iteration: 290327
loss: 0.9771890044212341,grad_norm: 0.8955280036858299, iteration: 290328
loss: 0.9718771576881409,grad_norm: 0.9755805839862632, iteration: 290329
loss: 0.9892457127571106,grad_norm: 0.8921924084078902, iteration: 290330
loss: 1.0770013332366943,grad_norm: 0.9999996307922437, iteration: 290331
loss: 0.989812970161438,grad_norm: 0.8409911399648566, iteration: 290332
loss: 0.9946267008781433,grad_norm: 0.7464564154931899, iteration: 290333
loss: 1.0232881307601929,grad_norm: 0.8292066609591157, iteration: 290334
loss: 1.0209230184555054,grad_norm: 0.7614435821202616, iteration: 290335
loss: 0.9970666170120239,grad_norm: 0.9925250491094917, iteration: 290336
loss: 0.9898324012756348,grad_norm: 0.8207364644780042, iteration: 290337
loss: 0.9825606942176819,grad_norm: 0.9999990566804278, iteration: 290338
loss: 0.9985860586166382,grad_norm: 0.9415380101654504, iteration: 290339
loss: 1.0770207643508911,grad_norm: 0.9999994687474764, iteration: 290340
loss: 1.0220893621444702,grad_norm: 0.7833578315199543, iteration: 290341
loss: 1.0354217290878296,grad_norm: 0.9499000332622677, iteration: 290342
loss: 0.9838641285896301,grad_norm: 0.8490310098495546, iteration: 290343
loss: 0.9916657209396362,grad_norm: 0.7865317496115698, iteration: 290344
loss: 1.068394422531128,grad_norm: 0.9999998629946498, iteration: 290345
loss: 0.993462085723877,grad_norm: 0.9999990258906108, iteration: 290346
loss: 0.9727843999862671,grad_norm: 0.8299778936608194, iteration: 290347
loss: 1.018111228942871,grad_norm: 0.9330455308009659, iteration: 290348
loss: 1.0282524824142456,grad_norm: 0.9999992504580719, iteration: 290349
loss: 1.0260196924209595,grad_norm: 0.9999990982126057, iteration: 290350
loss: 1.0223928689956665,grad_norm: 0.9478350236533913, iteration: 290351
loss: 0.9994906187057495,grad_norm: 0.8811399754745048, iteration: 290352
loss: 1.003074049949646,grad_norm: 0.8273923202112087, iteration: 290353
loss: 0.9707226753234863,grad_norm: 0.9999991394323565, iteration: 290354
loss: 1.0265671014785767,grad_norm: 0.9425478547050747, iteration: 290355
loss: 0.9830354452133179,grad_norm: 0.9314118122162718, iteration: 290356
loss: 0.9842190146446228,grad_norm: 0.7508651789503143, iteration: 290357
loss: 0.9834644794464111,grad_norm: 0.8448481805188216, iteration: 290358
loss: 1.010676622390747,grad_norm: 0.9999991218734943, iteration: 290359
loss: 1.016332745552063,grad_norm: 0.9870049816989133, iteration: 290360
loss: 0.9759196043014526,grad_norm: 0.790574099005911, iteration: 290361
loss: 0.9771302938461304,grad_norm: 0.8738230632101831, iteration: 290362
loss: 1.0053600072860718,grad_norm: 0.9705420525944372, iteration: 290363
loss: 1.0424909591674805,grad_norm: 0.7656099373463849, iteration: 290364
loss: 1.0154989957809448,grad_norm: 0.9187763328121976, iteration: 290365
loss: 0.9975196719169617,grad_norm: 0.8728340332434642, iteration: 290366
loss: 1.005403757095337,grad_norm: 0.8048759582833331, iteration: 290367
loss: 1.0408847332000732,grad_norm: 0.936659668954716, iteration: 290368
loss: 1.0001373291015625,grad_norm: 0.9999994789563708, iteration: 290369
loss: 1.0081241130828857,grad_norm: 0.9624141984478717, iteration: 290370
loss: 1.0189157724380493,grad_norm: 0.7013153123119407, iteration: 290371
loss: 0.9814531803131104,grad_norm: 0.9191587515727055, iteration: 290372
loss: 1.0123491287231445,grad_norm: 0.9999996373832963, iteration: 290373
loss: 1.0158833265304565,grad_norm: 0.8406613804896418, iteration: 290374
loss: 1.0319277048110962,grad_norm: 0.9149587669697348, iteration: 290375
loss: 1.005385160446167,grad_norm: 0.9001728507032987, iteration: 290376
loss: 1.029667615890503,grad_norm: 0.8002583309483031, iteration: 290377
loss: 0.9896022081375122,grad_norm: 0.9999990244075979, iteration: 290378
loss: 1.0195939540863037,grad_norm: 0.711905373777844, iteration: 290379
loss: 1.042577862739563,grad_norm: 0.9999999761759956, iteration: 290380
loss: 0.9922223687171936,grad_norm: 0.7995311371960706, iteration: 290381
loss: 1.0114316940307617,grad_norm: 0.8534990536792646, iteration: 290382
loss: 1.0025235414505005,grad_norm: 0.873470370601058, iteration: 290383
loss: 0.9939221739768982,grad_norm: 0.9031719406863681, iteration: 290384
loss: 1.0109789371490479,grad_norm: 0.9795491532557433, iteration: 290385
loss: 0.9990527033805847,grad_norm: 0.8119254287548415, iteration: 290386
loss: 0.9650893211364746,grad_norm: 0.8160404670851574, iteration: 290387
loss: 0.9789057970046997,grad_norm: 0.8577578493131819, iteration: 290388
loss: 1.0068023204803467,grad_norm: 0.74700515665535, iteration: 290389
loss: 0.9884933233261108,grad_norm: 0.7843378808510579, iteration: 290390
loss: 0.9971916675567627,grad_norm: 0.8487382102995567, iteration: 290391
loss: 1.147671103477478,grad_norm: 0.9999992861805403, iteration: 290392
loss: 0.985467791557312,grad_norm: 0.7233551642936341, iteration: 290393
loss: 0.9790664315223694,grad_norm: 0.9542734111727534, iteration: 290394
loss: 1.0126804113388062,grad_norm: 0.9999991785763734, iteration: 290395
loss: 0.9887956380844116,grad_norm: 0.7610799564584582, iteration: 290396
loss: 0.9787890315055847,grad_norm: 0.9011861670255145, iteration: 290397
loss: 0.9751713871955872,grad_norm: 0.9283032151938703, iteration: 290398
loss: 1.0508534908294678,grad_norm: 0.7192031496886705, iteration: 290399
loss: 1.0706617832183838,grad_norm: 0.9999995056988903, iteration: 290400
loss: 0.9999592900276184,grad_norm: 0.8451839389384439, iteration: 290401
loss: 0.9944605827331543,grad_norm: 0.9366207603524386, iteration: 290402
loss: 1.1627503633499146,grad_norm: 0.9999992687782792, iteration: 290403
loss: 0.9625068306922913,grad_norm: 0.8149028451511089, iteration: 290404
loss: 0.9890549182891846,grad_norm: 0.9188575129097474, iteration: 290405
loss: 1.0325077772140503,grad_norm: 0.9999992252611193, iteration: 290406
loss: 0.9730035662651062,grad_norm: 0.9121129450097473, iteration: 290407
loss: 1.0165088176727295,grad_norm: 0.7329028179550784, iteration: 290408
loss: 0.9738140106201172,grad_norm: 0.935279163146562, iteration: 290409
loss: 0.9860471487045288,grad_norm: 0.9999991591498292, iteration: 290410
loss: 1.0162327289581299,grad_norm: 0.7823021004137519, iteration: 290411
loss: 1.0864923000335693,grad_norm: 0.999999919684056, iteration: 290412
loss: 0.9983839988708496,grad_norm: 0.8659407197382258, iteration: 290413
loss: 0.9758975505828857,grad_norm: 0.8407476273624996, iteration: 290414
loss: 1.008529543876648,grad_norm: 0.9283787794629432, iteration: 290415
loss: 0.998255729675293,grad_norm: 0.7203427123508036, iteration: 290416
loss: 1.053161382675171,grad_norm: 0.7990929981080978, iteration: 290417
loss: 1.024550437927246,grad_norm: 0.7561894349579923, iteration: 290418
loss: 1.0207191705703735,grad_norm: 0.7571637032661752, iteration: 290419
loss: 0.9910846948623657,grad_norm: 0.8224749037122717, iteration: 290420
loss: 1.0154569149017334,grad_norm: 0.8445587313754435, iteration: 290421
loss: 0.9946253299713135,grad_norm: 0.8963134685080458, iteration: 290422
loss: 0.9787497520446777,grad_norm: 0.870015733052341, iteration: 290423
loss: 0.9851503968238831,grad_norm: 0.8804454520542236, iteration: 290424
loss: 0.992348313331604,grad_norm: 0.8838109919610205, iteration: 290425
loss: 1.1102005243301392,grad_norm: 0.9537812906693235, iteration: 290426
loss: 0.9875249862670898,grad_norm: 0.8629191981199663, iteration: 290427
loss: 1.0648093223571777,grad_norm: 0.9197958604209493, iteration: 290428
loss: 0.9882456064224243,grad_norm: 0.9205031527719271, iteration: 290429
loss: 0.9859721064567566,grad_norm: 0.9375533978674077, iteration: 290430
loss: 1.0326112508773804,grad_norm: 0.7578383770123172, iteration: 290431
loss: 0.9882268905639648,grad_norm: 0.9079906273465868, iteration: 290432
loss: 0.9804030656814575,grad_norm: 0.7849307073376133, iteration: 290433
loss: 1.0223963260650635,grad_norm: 0.7991167801992151, iteration: 290434
loss: 0.9997913241386414,grad_norm: 0.8455888515965861, iteration: 290435
loss: 0.972379744052887,grad_norm: 0.8844141604524512, iteration: 290436
loss: 1.032755732536316,grad_norm: 0.8260382782059978, iteration: 290437
loss: 0.9919759035110474,grad_norm: 0.8777905498408418, iteration: 290438
loss: 0.9768546223640442,grad_norm: 0.8463073364810523, iteration: 290439
loss: 0.9746740460395813,grad_norm: 0.9073435109316528, iteration: 290440
loss: 0.9621126055717468,grad_norm: 0.8954051926889816, iteration: 290441
loss: 1.0037473440170288,grad_norm: 0.9509068135009241, iteration: 290442
loss: 0.9584606289863586,grad_norm: 0.9625589682037871, iteration: 290443
loss: 1.0102382898330688,grad_norm: 0.8368221236173546, iteration: 290444
loss: 0.9443044662475586,grad_norm: 0.971548686663989, iteration: 290445
loss: 1.0089950561523438,grad_norm: 0.8840794946781295, iteration: 290446
loss: 1.035970687866211,grad_norm: 0.8509076966725884, iteration: 290447
loss: 1.027532696723938,grad_norm: 0.8304716974672727, iteration: 290448
loss: 1.0281087160110474,grad_norm: 0.9174772810605641, iteration: 290449
loss: 1.0397772789001465,grad_norm: 0.8744340652699549, iteration: 290450
loss: 0.9922432899475098,grad_norm: 0.7540321527485677, iteration: 290451
loss: 1.0122977495193481,grad_norm: 0.6901353492116845, iteration: 290452
loss: 1.0032663345336914,grad_norm: 0.7874256883791002, iteration: 290453
loss: 1.0042959451675415,grad_norm: 0.8447892591270939, iteration: 290454
loss: 1.022900104522705,grad_norm: 0.800602055955739, iteration: 290455
loss: 0.9971487522125244,grad_norm: 0.8441427146492425, iteration: 290456
loss: 1.0068620443344116,grad_norm: 0.9144282723157936, iteration: 290457
loss: 0.9585795402526855,grad_norm: 0.9395865581915218, iteration: 290458
loss: 1.0203641653060913,grad_norm: 0.999999169463119, iteration: 290459
loss: 1.024861454963684,grad_norm: 0.8823385189753623, iteration: 290460
loss: 1.0499060153961182,grad_norm: 0.9999993689145645, iteration: 290461
loss: 0.9996771812438965,grad_norm: 0.9262046257660869, iteration: 290462
loss: 1.0021958351135254,grad_norm: 0.9999991140608601, iteration: 290463
loss: 0.9539937376976013,grad_norm: 0.8493968116010882, iteration: 290464
loss: 1.0010676383972168,grad_norm: 0.8851680290758761, iteration: 290465
loss: 1.0036327838897705,grad_norm: 0.7031055601934462, iteration: 290466
loss: 1.0347521305084229,grad_norm: 0.6877643413238346, iteration: 290467
loss: 0.9647563099861145,grad_norm: 0.850395428173478, iteration: 290468
loss: 0.9861805438995361,grad_norm: 0.6998176286009306, iteration: 290469
loss: 1.0126646757125854,grad_norm: 0.9999993892537126, iteration: 290470
loss: 1.0087260007858276,grad_norm: 0.9999997967067557, iteration: 290471
loss: 0.9712604880332947,grad_norm: 0.8343649815219194, iteration: 290472
loss: 0.9992764592170715,grad_norm: 0.9206436446129799, iteration: 290473
loss: 1.1693309545516968,grad_norm: 0.9999995953736872, iteration: 290474
loss: 1.0194488763809204,grad_norm: 0.8485024541515122, iteration: 290475
loss: 1.040716528892517,grad_norm: 0.9395951226039306, iteration: 290476
loss: 1.0196866989135742,grad_norm: 0.6437184576985472, iteration: 290477
loss: 1.0095226764678955,grad_norm: 0.7244728951396998, iteration: 290478
loss: 0.9909130930900574,grad_norm: 0.8510572053508791, iteration: 290479
loss: 1.012637734413147,grad_norm: 0.7252208732809637, iteration: 290480
loss: 1.0743358135223389,grad_norm: 0.9322785616883092, iteration: 290481
loss: 0.9864058494567871,grad_norm: 0.9480919586212615, iteration: 290482
loss: 1.0384633541107178,grad_norm: 0.7351046555940933, iteration: 290483
loss: 0.9997941851615906,grad_norm: 0.8746012463860167, iteration: 290484
loss: 1.0661920309066772,grad_norm: 0.9999994061473051, iteration: 290485
loss: 0.9946750998497009,grad_norm: 0.9241314410919965, iteration: 290486
loss: 0.9986530542373657,grad_norm: 0.9415168721548166, iteration: 290487
loss: 1.0667787790298462,grad_norm: 0.9999997812981789, iteration: 290488
loss: 0.9394964575767517,grad_norm: 0.8814529837819592, iteration: 290489
loss: 1.0410374402999878,grad_norm: 0.9999993288586103, iteration: 290490
loss: 1.0152188539505005,grad_norm: 0.999999120327428, iteration: 290491
loss: 0.9937015175819397,grad_norm: 0.9547098031183646, iteration: 290492
loss: 1.0458019971847534,grad_norm: 0.9999994800967416, iteration: 290493
loss: 1.01194167137146,grad_norm: 0.8664424481871942, iteration: 290494
loss: 0.9649364948272705,grad_norm: 0.898681921315507, iteration: 290495
loss: 0.9661308526992798,grad_norm: 0.99999914316021, iteration: 290496
loss: 0.9850644469261169,grad_norm: 0.9114891243568614, iteration: 290497
loss: 0.9776209592819214,grad_norm: 0.9098583346708865, iteration: 290498
loss: 1.0058122873306274,grad_norm: 0.908237893412393, iteration: 290499
loss: 0.9620360136032104,grad_norm: 0.7514745348953124, iteration: 290500
loss: 0.974985659122467,grad_norm: 0.6904079600981765, iteration: 290501
loss: 1.0086252689361572,grad_norm: 0.7896159152097786, iteration: 290502
loss: 0.9811941385269165,grad_norm: 0.8170791572480297, iteration: 290503
loss: 0.987159788608551,grad_norm: 0.7165268729724159, iteration: 290504
loss: 1.0285712480545044,grad_norm: 0.9999994048437207, iteration: 290505
loss: 1.0037243366241455,grad_norm: 0.855284581579416, iteration: 290506
loss: 0.9940469264984131,grad_norm: 0.9810173067812568, iteration: 290507
loss: 1.0051058530807495,grad_norm: 0.8722816518636336, iteration: 290508
loss: 1.0243626832962036,grad_norm: 0.7945702824745229, iteration: 290509
loss: 1.0227464437484741,grad_norm: 0.9134277049054418, iteration: 290510
loss: 1.0335814952850342,grad_norm: 0.7017660177132825, iteration: 290511
loss: 1.039196252822876,grad_norm: 0.9999990492495769, iteration: 290512
loss: 1.0097990036010742,grad_norm: 0.73291972076135, iteration: 290513
loss: 1.0095478296279907,grad_norm: 0.8692077300114289, iteration: 290514
loss: 0.9532228112220764,grad_norm: 0.8150688921659244, iteration: 290515
loss: 1.0339276790618896,grad_norm: 0.8680646116591818, iteration: 290516
loss: 1.0280601978302002,grad_norm: 0.9387433287322597, iteration: 290517
loss: 0.9855592846870422,grad_norm: 0.7091157390241454, iteration: 290518
loss: 1.0928479433059692,grad_norm: 0.9999992108303465, iteration: 290519
loss: 1.0197317600250244,grad_norm: 0.826322618634939, iteration: 290520
loss: 0.9728688597679138,grad_norm: 0.9999990617887446, iteration: 290521
loss: 1.006296992301941,grad_norm: 0.7486084389734413, iteration: 290522
loss: 1.0326751470565796,grad_norm: 0.8676165242294995, iteration: 290523
loss: 1.0230493545532227,grad_norm: 0.7736643865561518, iteration: 290524
loss: 0.9817877411842346,grad_norm: 0.788778691593481, iteration: 290525
loss: 1.0252784490585327,grad_norm: 0.7865112917110673, iteration: 290526
loss: 0.9981774091720581,grad_norm: 0.8864706859477267, iteration: 290527
loss: 0.9742926359176636,grad_norm: 0.7135015457048978, iteration: 290528
loss: 1.0057148933410645,grad_norm: 0.8915253489540785, iteration: 290529
loss: 1.0217552185058594,grad_norm: 0.834509142837067, iteration: 290530
loss: 1.0092878341674805,grad_norm: 0.9944272290627514, iteration: 290531
loss: 1.008564829826355,grad_norm: 0.8803886347874516, iteration: 290532
loss: 1.0030220746994019,grad_norm: 0.9999989112991927, iteration: 290533
loss: 1.0772227048873901,grad_norm: 0.9999994664360264, iteration: 290534
loss: 0.9760487079620361,grad_norm: 0.8032998298889867, iteration: 290535
loss: 1.0084654092788696,grad_norm: 0.7594438677824701, iteration: 290536
loss: 1.002788782119751,grad_norm: 0.8305354013836898, iteration: 290537
loss: 0.9698073863983154,grad_norm: 0.9482580466035437, iteration: 290538
loss: 1.0002827644348145,grad_norm: 0.878703373221837, iteration: 290539
loss: 0.9933143258094788,grad_norm: 0.8912287215763955, iteration: 290540
loss: 1.0407695770263672,grad_norm: 0.9999991153884475, iteration: 290541
loss: 1.0229690074920654,grad_norm: 0.8027205580386898, iteration: 290542
loss: 0.9647707343101501,grad_norm: 0.8709273133829778, iteration: 290543
loss: 0.9642778635025024,grad_norm: 0.9525233362947656, iteration: 290544
loss: 1.0129882097244263,grad_norm: 0.7920850803251722, iteration: 290545
loss: 1.024641513824463,grad_norm: 0.6684361082030987, iteration: 290546
loss: 0.9745205044746399,grad_norm: 0.8709407820645998, iteration: 290547
loss: 0.9408203363418579,grad_norm: 0.7823702072571217, iteration: 290548
loss: 1.0102403163909912,grad_norm: 0.9653509564978828, iteration: 290549
loss: 1.0041464567184448,grad_norm: 0.9522986734935635, iteration: 290550
loss: 0.9895551800727844,grad_norm: 0.9391444944415865, iteration: 290551
loss: 1.0455985069274902,grad_norm: 0.9999995304679038, iteration: 290552
loss: 1.0223075151443481,grad_norm: 0.9999997989921491, iteration: 290553
loss: 1.000316858291626,grad_norm: 0.8810156947443162, iteration: 290554
loss: 1.0287680625915527,grad_norm: 0.9999990399566028, iteration: 290555
loss: 1.00556480884552,grad_norm: 0.8197606409929524, iteration: 290556
loss: 0.9707674980163574,grad_norm: 0.876839749495012, iteration: 290557
loss: 1.0374609231948853,grad_norm: 0.7674304968213199, iteration: 290558
loss: 0.9685741662979126,grad_norm: 0.7946174991060548, iteration: 290559
loss: 0.9787699580192566,grad_norm: 0.8232859211717867, iteration: 290560
loss: 1.0064847469329834,grad_norm: 0.8905431091218767, iteration: 290561
loss: 0.9868981242179871,grad_norm: 0.9999990354221128, iteration: 290562
loss: 1.0037554502487183,grad_norm: 0.7950333719083379, iteration: 290563
loss: 0.9820876121520996,grad_norm: 0.8540500019953006, iteration: 290564
loss: 0.9959368109703064,grad_norm: 0.885155264038237, iteration: 290565
loss: 1.023830533027649,grad_norm: 0.7802418815488936, iteration: 290566
loss: 1.056896686553955,grad_norm: 0.8776714037177984, iteration: 290567
loss: 0.9743335247039795,grad_norm: 0.8416050576892234, iteration: 290568
loss: 1.013710379600525,grad_norm: 0.8877851504811769, iteration: 290569
loss: 0.9751707315444946,grad_norm: 0.810594811044975, iteration: 290570
loss: 0.9716070890426636,grad_norm: 0.8316950532253684, iteration: 290571
loss: 1.0001777410507202,grad_norm: 0.9178797142686472, iteration: 290572
loss: 1.0191118717193604,grad_norm: 0.9999997664992271, iteration: 290573
loss: 0.986483097076416,grad_norm: 0.9233880940341608, iteration: 290574
loss: 0.9796182513237,grad_norm: 0.9299866141392977, iteration: 290575
loss: 1.0384955406188965,grad_norm: 0.8937836007396406, iteration: 290576
loss: 0.9800649881362915,grad_norm: 0.778744301263448, iteration: 290577
loss: 0.978070080280304,grad_norm: 0.8643227794810742, iteration: 290578
loss: 0.9961370229721069,grad_norm: 0.8437308599627574, iteration: 290579
loss: 0.9927184581756592,grad_norm: 0.7817806281964754, iteration: 290580
loss: 1.0269709825515747,grad_norm: 0.7039492367786686, iteration: 290581
loss: 1.0796442031860352,grad_norm: 0.8997839370447168, iteration: 290582
loss: 0.9785523414611816,grad_norm: 0.6976808206820347, iteration: 290583
loss: 1.0314722061157227,grad_norm: 0.999999368743778, iteration: 290584
loss: 1.0570558309555054,grad_norm: 0.9999990680888774, iteration: 290585
loss: 1.008137822151184,grad_norm: 0.6715700111614277, iteration: 290586
loss: 1.0168012380599976,grad_norm: 0.9999993655882896, iteration: 290587
loss: 1.0100208520889282,grad_norm: 0.9988755859482721, iteration: 290588
loss: 1.0178955793380737,grad_norm: 0.9999991505346761, iteration: 290589
loss: 0.9945673942565918,grad_norm: 0.999999732218113, iteration: 290590
loss: 1.0280565023422241,grad_norm: 0.7997461062693985, iteration: 290591
loss: 1.1411254405975342,grad_norm: 0.999999087842855, iteration: 290592
loss: 0.9722750186920166,grad_norm: 0.8066330665246858, iteration: 290593
loss: 1.1346970796585083,grad_norm: 0.9999996122437105, iteration: 290594
loss: 1.0318293571472168,grad_norm: 0.8014379350938159, iteration: 290595
loss: 0.999538779258728,grad_norm: 0.7081340014692374, iteration: 290596
loss: 0.9936316609382629,grad_norm: 0.9703543601998136, iteration: 290597
loss: 0.9797384142875671,grad_norm: 0.9447128483008624, iteration: 290598
loss: 1.0474773645401,grad_norm: 0.9062412872609378, iteration: 290599
loss: 0.9854996204376221,grad_norm: 0.9042476383700601, iteration: 290600
loss: 1.0013631582260132,grad_norm: 0.9999990474930069, iteration: 290601
loss: 0.9977980256080627,grad_norm: 0.9999993756946292, iteration: 290602
loss: 0.9936830401420593,grad_norm: 0.9865990284734729, iteration: 290603
loss: 0.9671986103057861,grad_norm: 0.7659413573801974, iteration: 290604
loss: 0.9817342758178711,grad_norm: 0.7868537530121495, iteration: 290605
loss: 0.9576023817062378,grad_norm: 0.9005618856409172, iteration: 290606
loss: 0.9830050468444824,grad_norm: 0.8707767065919672, iteration: 290607
loss: 1.0830320119857788,grad_norm: 0.942576063256086, iteration: 290608
loss: 0.9843456149101257,grad_norm: 0.7237773550313411, iteration: 290609
loss: 0.987920343875885,grad_norm: 0.7703886509332663, iteration: 290610
loss: 0.9967531561851501,grad_norm: 0.8033193203192694, iteration: 290611
loss: 1.0086510181427002,grad_norm: 0.7886767752191806, iteration: 290612
loss: 0.9499669075012207,grad_norm: 0.8572343692815281, iteration: 290613
loss: 0.9667297601699829,grad_norm: 0.8282625696036445, iteration: 290614
loss: 1.0032435655593872,grad_norm: 0.8797981753848586, iteration: 290615
loss: 0.9589508175849915,grad_norm: 0.8688946619432693, iteration: 290616
loss: 1.0157185792922974,grad_norm: 0.9999988813018337, iteration: 290617
loss: 1.0005640983581543,grad_norm: 0.9308011106751928, iteration: 290618
loss: 0.9935657978057861,grad_norm: 0.8975023237628913, iteration: 290619
loss: 0.9776018857955933,grad_norm: 0.7270497471046997, iteration: 290620
loss: 1.008429765701294,grad_norm: 0.9479340439570045, iteration: 290621
loss: 1.0242373943328857,grad_norm: 0.938606807896423, iteration: 290622
loss: 1.0125725269317627,grad_norm: 0.9919719418206455, iteration: 290623
loss: 1.021674394607544,grad_norm: 0.9999992124877153, iteration: 290624
loss: 0.9612898826599121,grad_norm: 0.8099530259349222, iteration: 290625
loss: 1.0038727521896362,grad_norm: 0.9999992860968447, iteration: 290626
loss: 1.0065635442733765,grad_norm: 0.9999991766191554, iteration: 290627
loss: 1.0280604362487793,grad_norm: 0.9999991062835826, iteration: 290628
loss: 1.0490615367889404,grad_norm: 0.9277063064429409, iteration: 290629
loss: 0.9804024696350098,grad_norm: 0.7939466329131452, iteration: 290630
loss: 0.9795241355895996,grad_norm: 0.77635820009181, iteration: 290631
loss: 1.00679612159729,grad_norm: 0.7502466220545148, iteration: 290632
loss: 0.9974496364593506,grad_norm: 0.9000995672430584, iteration: 290633
loss: 0.9879259467124939,grad_norm: 0.9055482959569554, iteration: 290634
loss: 1.013625144958496,grad_norm: 0.7572203653328061, iteration: 290635
loss: 0.9941269159317017,grad_norm: 0.8592727767469397, iteration: 290636
loss: 1.035999059677124,grad_norm: 0.9679493697393629, iteration: 290637
loss: 0.9856554269790649,grad_norm: 0.8682465139158131, iteration: 290638
loss: 1.03900945186615,grad_norm: 0.9500971287221903, iteration: 290639
loss: 1.0301109552383423,grad_norm: 0.8965253006017674, iteration: 290640
loss: 1.0178325176239014,grad_norm: 0.8322162188994251, iteration: 290641
loss: 0.9913337230682373,grad_norm: 0.9999992768643322, iteration: 290642
loss: 0.995589017868042,grad_norm: 0.8150314279528323, iteration: 290643
loss: 1.0450439453125,grad_norm: 0.9999994090776012, iteration: 290644
loss: 0.9830591678619385,grad_norm: 0.8158395550663499, iteration: 290645
loss: 0.9613258242607117,grad_norm: 0.8000710253894676, iteration: 290646
loss: 0.9431716203689575,grad_norm: 0.999999188140513, iteration: 290647
loss: 0.9872628450393677,grad_norm: 0.8056902594195414, iteration: 290648
loss: 0.9991021752357483,grad_norm: 0.8928308934930053, iteration: 290649
loss: 1.0002837181091309,grad_norm: 0.9801554071720114, iteration: 290650
loss: 0.9736404418945312,grad_norm: 0.8949875047648456, iteration: 290651
loss: 0.9879826307296753,grad_norm: 0.7572917809748034, iteration: 290652
loss: 1.0149294137954712,grad_norm: 0.8455807686047581, iteration: 290653
loss: 0.961308479309082,grad_norm: 0.9430491553191628, iteration: 290654
loss: 1.0292375087738037,grad_norm: 0.9693472922629618, iteration: 290655
loss: 0.9787381291389465,grad_norm: 0.9999993391016886, iteration: 290656
loss: 0.9670254588127136,grad_norm: 0.9557308209815384, iteration: 290657
loss: 0.9803783893585205,grad_norm: 0.7995681191331274, iteration: 290658
loss: 1.0347555875778198,grad_norm: 0.9999999695149955, iteration: 290659
loss: 1.0056979656219482,grad_norm: 0.7357029239574561, iteration: 290660
loss: 1.0085471868515015,grad_norm: 0.9219323962701848, iteration: 290661
loss: 1.1297698020935059,grad_norm: 0.9999993449533588, iteration: 290662
loss: 1.0039212703704834,grad_norm: 0.9999991453452713, iteration: 290663
loss: 1.0108511447906494,grad_norm: 0.9206201820816201, iteration: 290664
loss: 1.1630480289459229,grad_norm: 0.9999996151840745, iteration: 290665
loss: 0.991921067237854,grad_norm: 0.7869868964820392, iteration: 290666
loss: 1.0014820098876953,grad_norm: 0.9999991112707539, iteration: 290667
loss: 0.9949778914451599,grad_norm: 0.8826987074363115, iteration: 290668
loss: 0.9846413731575012,grad_norm: 0.9970658320964407, iteration: 290669
loss: 1.035327672958374,grad_norm: 0.9999997829572738, iteration: 290670
loss: 1.0904675722122192,grad_norm: 0.8510048583252666, iteration: 290671
loss: 1.0174447298049927,grad_norm: 0.8910971880582853, iteration: 290672
loss: 0.9861232042312622,grad_norm: 0.7598165353162171, iteration: 290673
loss: 1.0146204233169556,grad_norm: 0.909022244720947, iteration: 290674
loss: 1.003528356552124,grad_norm: 0.9875542342460181, iteration: 290675
loss: 1.001895785331726,grad_norm: 0.8682425531482135, iteration: 290676
loss: 1.0008968114852905,grad_norm: 0.8517183390645112, iteration: 290677
loss: 0.987751841545105,grad_norm: 0.8841148735418367, iteration: 290678
loss: 1.0029042959213257,grad_norm: 0.893267944754863, iteration: 290679
loss: 1.0275145769119263,grad_norm: 0.9540301809569494, iteration: 290680
loss: 1.0536490678787231,grad_norm: 0.8642439184420255, iteration: 290681
loss: 1.0036687850952148,grad_norm: 0.7876452379381841, iteration: 290682
loss: 0.989949643611908,grad_norm: 0.8959338315972336, iteration: 290683
loss: 0.9687933325767517,grad_norm: 0.8336180726415591, iteration: 290684
loss: 0.9707987904548645,grad_norm: 0.9128660726618126, iteration: 290685
loss: 1.068415641784668,grad_norm: 0.8581931123864847, iteration: 290686
loss: 0.9930109977722168,grad_norm: 0.9078037031643451, iteration: 290687
loss: 1.0792856216430664,grad_norm: 0.9221900677960227, iteration: 290688
loss: 1.01680326461792,grad_norm: 0.8072091176048243, iteration: 290689
loss: 0.9809979796409607,grad_norm: 0.8314283244814396, iteration: 290690
loss: 0.9587505459785461,grad_norm: 0.9999990384878806, iteration: 290691
loss: 1.0056520700454712,grad_norm: 0.9338226943371857, iteration: 290692
loss: 1.0327554941177368,grad_norm: 0.8820213792951815, iteration: 290693
loss: 0.9911581873893738,grad_norm: 0.7694552631520569, iteration: 290694
loss: 0.9924362897872925,grad_norm: 0.8565895866705666, iteration: 290695
loss: 0.9887624979019165,grad_norm: 0.9999991126330905, iteration: 290696
loss: 1.022621989250183,grad_norm: 0.9358280887700062, iteration: 290697
loss: 0.9678351879119873,grad_norm: 0.8322812264982882, iteration: 290698
loss: 1.0154213905334473,grad_norm: 0.8639178624942286, iteration: 290699
loss: 1.0315744876861572,grad_norm: 0.6256868967865158, iteration: 290700
loss: 0.9637474417686462,grad_norm: 0.8825765126795893, iteration: 290701
loss: 1.0185127258300781,grad_norm: 0.9601218367505968, iteration: 290702
loss: 0.9823378324508667,grad_norm: 0.8103621752951303, iteration: 290703
loss: 1.0889391899108887,grad_norm: 0.9999993073097334, iteration: 290704
loss: 0.9779171943664551,grad_norm: 0.9999992148607952, iteration: 290705
loss: 0.965369462966919,grad_norm: 0.9999989998134349, iteration: 290706
loss: 0.975760817527771,grad_norm: 0.8657430768634706, iteration: 290707
loss: 0.9823755025863647,grad_norm: 0.8417187403108153, iteration: 290708
loss: 0.9922211766242981,grad_norm: 0.8151835406553761, iteration: 290709
loss: 0.9964396953582764,grad_norm: 0.9599317562745882, iteration: 290710
loss: 1.0673295259475708,grad_norm: 0.9999995990665663, iteration: 290711
loss: 1.0124238729476929,grad_norm: 0.7055416126963857, iteration: 290712
loss: 1.0083194971084595,grad_norm: 0.9697912599868698, iteration: 290713
loss: 1.0355415344238281,grad_norm: 0.9999991686647338, iteration: 290714
loss: 0.9809295535087585,grad_norm: 0.9999990753621297, iteration: 290715
loss: 1.017897129058838,grad_norm: 0.8223553358033596, iteration: 290716
loss: 1.10862398147583,grad_norm: 0.9999996691504978, iteration: 290717
loss: 0.97798752784729,grad_norm: 0.9366589431042669, iteration: 290718
loss: 1.0109702348709106,grad_norm: 0.999999162991587, iteration: 290719
loss: 1.011134386062622,grad_norm: 0.8262166877683103, iteration: 290720
loss: 1.013656735420227,grad_norm: 0.8628790752339002, iteration: 290721
loss: 0.9766990542411804,grad_norm: 0.7396790070753646, iteration: 290722
loss: 1.0104392766952515,grad_norm: 0.9044928102141591, iteration: 290723
loss: 0.9780345559120178,grad_norm: 0.9351021995594118, iteration: 290724
loss: 1.0312697887420654,grad_norm: 0.9344328258366456, iteration: 290725
loss: 1.0604041814804077,grad_norm: 0.999999231388281, iteration: 290726
loss: 1.030893087387085,grad_norm: 0.8079816764217638, iteration: 290727
loss: 1.0326730012893677,grad_norm: 0.9999989582825225, iteration: 290728
loss: 0.9992385506629944,grad_norm: 0.9545756992650117, iteration: 290729
loss: 1.0264430046081543,grad_norm: 0.8288883341081384, iteration: 290730
loss: 1.023022174835205,grad_norm: 0.9717655475179788, iteration: 290731
loss: 0.9978178143501282,grad_norm: 0.8068282181291343, iteration: 290732
loss: 1.052372932434082,grad_norm: 0.8147613043080155, iteration: 290733
loss: 1.0039035081863403,grad_norm: 0.8858742634801554, iteration: 290734
loss: 0.9719771146774292,grad_norm: 0.9999990707004637, iteration: 290735
loss: 0.9749816656112671,grad_norm: 0.8414343914445984, iteration: 290736
loss: 0.9879622459411621,grad_norm: 0.8954505664255417, iteration: 290737
loss: 0.9770793914794922,grad_norm: 0.9501619403315901, iteration: 290738
loss: 1.0503915548324585,grad_norm: 0.7450518206409482, iteration: 290739
loss: 0.9816749095916748,grad_norm: 0.7693922630365639, iteration: 290740
loss: 1.011113166809082,grad_norm: 0.7765821156088007, iteration: 290741
loss: 0.9892829656600952,grad_norm: 0.632363654014436, iteration: 290742
loss: 1.0108165740966797,grad_norm: 0.7994919677262843, iteration: 290743
loss: 0.9955888986587524,grad_norm: 0.8891351399124879, iteration: 290744
loss: 0.9600440263748169,grad_norm: 0.8580557309464424, iteration: 290745
loss: 1.0253137350082397,grad_norm: 0.9999990696415141, iteration: 290746
loss: 1.0253794193267822,grad_norm: 0.7903906587630788, iteration: 290747
loss: 1.0149667263031006,grad_norm: 0.8137929926257701, iteration: 290748
loss: 0.9759841561317444,grad_norm: 0.7053454600269127, iteration: 290749
loss: 0.9751142859458923,grad_norm: 0.9920161643156783, iteration: 290750
loss: 1.0930430889129639,grad_norm: 0.9999998725318641, iteration: 290751
loss: 0.9835559129714966,grad_norm: 0.735373384431707, iteration: 290752
loss: 0.9707400798797607,grad_norm: 0.9045773956812381, iteration: 290753
loss: 1.0567023754119873,grad_norm: 0.8025504680262432, iteration: 290754
loss: 0.9773212671279907,grad_norm: 0.837530445227758, iteration: 290755
loss: 1.0135140419006348,grad_norm: 0.8864557650712119, iteration: 290756
loss: 1.0340451002120972,grad_norm: 0.9999991024353134, iteration: 290757
loss: 1.0103394985198975,grad_norm: 0.7409611024943865, iteration: 290758
loss: 0.9971174001693726,grad_norm: 0.8777739444142716, iteration: 290759
loss: 0.9735269546508789,grad_norm: 0.8322405650061104, iteration: 290760
loss: 1.0218994617462158,grad_norm: 0.9999992736329952, iteration: 290761
loss: 1.008415699005127,grad_norm: 0.772872059931219, iteration: 290762
loss: 1.050325870513916,grad_norm: 0.7682241066620024, iteration: 290763
loss: 1.0209919214248657,grad_norm: 0.9999990561315365, iteration: 290764
loss: 1.0044656991958618,grad_norm: 0.9286826425177879, iteration: 290765
loss: 1.0336421728134155,grad_norm: 0.8712599138886207, iteration: 290766
loss: 1.0067529678344727,grad_norm: 0.67620473735718, iteration: 290767
loss: 1.008042812347412,grad_norm: 0.9165627803423732, iteration: 290768
loss: 1.0074260234832764,grad_norm: 0.8454860672137283, iteration: 290769
loss: 0.9843381643295288,grad_norm: 0.8572182915605602, iteration: 290770
loss: 1.034005045890808,grad_norm: 0.7751574302392736, iteration: 290771
loss: 0.9923104047775269,grad_norm: 0.8750894577717319, iteration: 290772
loss: 0.9823768138885498,grad_norm: 0.9140200903960585, iteration: 290773
loss: 1.0194666385650635,grad_norm: 0.9999993726158064, iteration: 290774
loss: 1.0345467329025269,grad_norm: 0.8244347159300658, iteration: 290775
loss: 0.9760922789573669,grad_norm: 0.9298550146957699, iteration: 290776
loss: 0.973301351070404,grad_norm: 0.9432119619601684, iteration: 290777
loss: 1.070055365562439,grad_norm: 0.9103701152752272, iteration: 290778
loss: 0.9839167594909668,grad_norm: 0.9883713845861862, iteration: 290779
loss: 1.0171856880187988,grad_norm: 0.7812543675260035, iteration: 290780
loss: 0.9454272985458374,grad_norm: 0.9073355300852467, iteration: 290781
loss: 1.010759711265564,grad_norm: 0.9421872199971507, iteration: 290782
loss: 1.0232295989990234,grad_norm: 0.7418298370530865, iteration: 290783
loss: 1.0094444751739502,grad_norm: 0.7836067944091295, iteration: 290784
loss: 0.9756974577903748,grad_norm: 0.8190544745257003, iteration: 290785
loss: 0.9800606966018677,grad_norm: 0.8096192840147284, iteration: 290786
loss: 1.0199710130691528,grad_norm: 0.8953506465400567, iteration: 290787
loss: 1.0191152095794678,grad_norm: 0.8196964541895697, iteration: 290788
loss: 1.0023705959320068,grad_norm: 0.8640886713009809, iteration: 290789
loss: 1.0120760202407837,grad_norm: 0.8547192496380538, iteration: 290790
loss: 1.0256810188293457,grad_norm: 0.999999547587006, iteration: 290791
loss: 1.0250135660171509,grad_norm: 0.7898632346035634, iteration: 290792
loss: 0.9875451326370239,grad_norm: 0.790247755881475, iteration: 290793
loss: 1.040305495262146,grad_norm: 0.8165659181522312, iteration: 290794
loss: 0.9858958125114441,grad_norm: 0.9999989960874522, iteration: 290795
loss: 0.9873752593994141,grad_norm: 0.9999989879205926, iteration: 290796
loss: 1.0045909881591797,grad_norm: 0.9999989458621582, iteration: 290797
loss: 1.0008585453033447,grad_norm: 0.7292549484932163, iteration: 290798
loss: 1.0240269899368286,grad_norm: 0.9595584116794819, iteration: 290799
loss: 0.990626871585846,grad_norm: 0.8223970343463823, iteration: 290800
loss: 1.0402731895446777,grad_norm: 0.9999991679868256, iteration: 290801
loss: 0.9583597183227539,grad_norm: 0.9937977858841868, iteration: 290802
loss: 1.0219117403030396,grad_norm: 0.9474916153849735, iteration: 290803
loss: 1.0093530416488647,grad_norm: 0.891456006186485, iteration: 290804
loss: 0.9832015037536621,grad_norm: 0.9999994881140774, iteration: 290805
loss: 0.9949702024459839,grad_norm: 0.8592265660880586, iteration: 290806
loss: 0.9855793118476868,grad_norm: 0.7437432747862551, iteration: 290807
loss: 1.0205609798431396,grad_norm: 0.8947370431934568, iteration: 290808
loss: 1.0225988626480103,grad_norm: 0.8471570788295889, iteration: 290809
loss: 1.0125720500946045,grad_norm: 0.723823794953049, iteration: 290810
loss: 0.95054692029953,grad_norm: 0.9272710889653634, iteration: 290811
loss: 0.9810498952865601,grad_norm: 0.8630584008825368, iteration: 290812
loss: 1.0098525285720825,grad_norm: 0.8863149836851456, iteration: 290813
loss: 1.01947021484375,grad_norm: 0.8545333532513198, iteration: 290814
loss: 0.9950838685035706,grad_norm: 0.7886678172513849, iteration: 290815
loss: 0.989776074886322,grad_norm: 0.9999989038103005, iteration: 290816
loss: 1.0151296854019165,grad_norm: 0.999999017420322, iteration: 290817
loss: 0.9861270189285278,grad_norm: 0.933975716871268, iteration: 290818
loss: 0.9798144698143005,grad_norm: 0.8630477666775793, iteration: 290819
loss: 0.9786487221717834,grad_norm: 0.6989507956462341, iteration: 290820
loss: 1.0545618534088135,grad_norm: 0.9999995760341328, iteration: 290821
loss: 1.0001780986785889,grad_norm: 0.8561965264942135, iteration: 290822
loss: 1.1579207181930542,grad_norm: 0.9921671861351171, iteration: 290823
loss: 1.0444622039794922,grad_norm: 0.9999992807870232, iteration: 290824
loss: 1.0091557502746582,grad_norm: 0.7833464718984539, iteration: 290825
loss: 1.0551589727401733,grad_norm: 0.9179989652613498, iteration: 290826
loss: 1.0213242769241333,grad_norm: 0.9999992631579014, iteration: 290827
loss: 1.0078905820846558,grad_norm: 0.8642213516174452, iteration: 290828
loss: 0.9841329455375671,grad_norm: 0.9999990945972098, iteration: 290829
loss: 1.0213371515274048,grad_norm: 0.9999999055273825, iteration: 290830
loss: 1.0591323375701904,grad_norm: 0.9999997047169726, iteration: 290831
loss: 1.012367844581604,grad_norm: 0.9999992653220708, iteration: 290832
loss: 1.0237884521484375,grad_norm: 0.9741641639027769, iteration: 290833
loss: 0.9992825388908386,grad_norm: 0.9999991489529556, iteration: 290834
loss: 0.9834063053131104,grad_norm: 0.7157343568358969, iteration: 290835
loss: 1.010407567024231,grad_norm: 0.9999991511074493, iteration: 290836
loss: 0.9856411218643188,grad_norm: 0.8791966036871769, iteration: 290837
loss: 1.0316146612167358,grad_norm: 0.7529563385317782, iteration: 290838
loss: 1.002709150314331,grad_norm: 0.971702719793394, iteration: 290839
loss: 0.9994104504585266,grad_norm: 0.8200454845233913, iteration: 290840
loss: 0.9945303797721863,grad_norm: 0.8165427728084729, iteration: 290841
loss: 1.0119088888168335,grad_norm: 0.8667216919957984, iteration: 290842
loss: 0.9930104613304138,grad_norm: 0.8315146802761775, iteration: 290843
loss: 0.9823114275932312,grad_norm: 0.8484381132874621, iteration: 290844
loss: 1.009960651397705,grad_norm: 0.8352427096919328, iteration: 290845
loss: 0.9842256307601929,grad_norm: 0.9999999506861444, iteration: 290846
loss: 1.153502345085144,grad_norm: 0.999999832607305, iteration: 290847
loss: 1.0208367109298706,grad_norm: 0.7611029887092077, iteration: 290848
loss: 1.0182355642318726,grad_norm: 0.8916727637608984, iteration: 290849
loss: 0.9733737707138062,grad_norm: 0.8902851459084189, iteration: 290850
loss: 0.9687983393669128,grad_norm: 0.845983896742957, iteration: 290851
loss: 0.9816893339157104,grad_norm: 0.7935069098497888, iteration: 290852
loss: 1.0346482992172241,grad_norm: 0.8767252174653044, iteration: 290853
loss: 1.014770746231079,grad_norm: 0.9999991022484066, iteration: 290854
loss: 0.9922832250595093,grad_norm: 0.9999993048937061, iteration: 290855
loss: 0.9862179756164551,grad_norm: 0.7962005922581817, iteration: 290856
loss: 0.9952046275138855,grad_norm: 0.8639779941335615, iteration: 290857
loss: 0.983375608921051,grad_norm: 0.6957107298483808, iteration: 290858
loss: 1.0410619974136353,grad_norm: 0.999999370797961, iteration: 290859
loss: 0.9646519422531128,grad_norm: 0.8097411337833066, iteration: 290860
loss: 0.9760058522224426,grad_norm: 0.7216542795709288, iteration: 290861
loss: 0.974107027053833,grad_norm: 0.8921210106754858, iteration: 290862
loss: 0.9615330100059509,grad_norm: 0.907747911676291, iteration: 290863
loss: 0.9788728356361389,grad_norm: 0.8706556184060846, iteration: 290864
loss: 1.0355814695358276,grad_norm: 0.8937216064129504, iteration: 290865
loss: 1.0151870250701904,grad_norm: 0.9416616592480691, iteration: 290866
loss: 0.9495483636856079,grad_norm: 0.8575107676000215, iteration: 290867
loss: 1.0072517395019531,grad_norm: 0.8285469644847633, iteration: 290868
loss: 0.9779424667358398,grad_norm: 0.9999992275350997, iteration: 290869
loss: 1.0098472833633423,grad_norm: 0.9459089663902223, iteration: 290870
loss: 1.0022906064987183,grad_norm: 0.9999990871359191, iteration: 290871
loss: 0.9896462559700012,grad_norm: 0.9697061146915379, iteration: 290872
loss: 1.0379399061203003,grad_norm: 0.8353032837758594, iteration: 290873
loss: 0.9981428980827332,grad_norm: 0.8541025678273084, iteration: 290874
loss: 0.9880435466766357,grad_norm: 0.7796814936222677, iteration: 290875
loss: 0.9733301997184753,grad_norm: 0.9759378340966389, iteration: 290876
loss: 0.9988190531730652,grad_norm: 0.9268636348600899, iteration: 290877
loss: 1.000299334526062,grad_norm: 0.9999990198682542, iteration: 290878
loss: 0.9820964336395264,grad_norm: 0.7741649355759345, iteration: 290879
loss: 1.0182160139083862,grad_norm: 0.853754293837315, iteration: 290880
loss: 0.993765115737915,grad_norm: 0.93831907374242, iteration: 290881
loss: 0.9889999628067017,grad_norm: 0.9940573921505061, iteration: 290882
loss: 1.0228883028030396,grad_norm: 0.7768927543730859, iteration: 290883
loss: 1.018478512763977,grad_norm: 0.8275102194234503, iteration: 290884
loss: 0.9675353169441223,grad_norm: 0.9397938405645413, iteration: 290885
loss: 1.0825793743133545,grad_norm: 0.9999994145908874, iteration: 290886
loss: 0.998948335647583,grad_norm: 0.9691940325182061, iteration: 290887
loss: 1.0135170221328735,grad_norm: 0.7375823717472418, iteration: 290888
loss: 1.0440219640731812,grad_norm: 0.9345854911956083, iteration: 290889
loss: 0.9823113679885864,grad_norm: 0.943444449400961, iteration: 290890
loss: 0.9741531014442444,grad_norm: 0.7791655636183809, iteration: 290891
loss: 1.009287714958191,grad_norm: 0.9560846497340749, iteration: 290892
loss: 1.025025725364685,grad_norm: 0.7848113345479418, iteration: 290893
loss: 1.0079736709594727,grad_norm: 0.8007451044742546, iteration: 290894
loss: 1.008967638015747,grad_norm: 0.8789309478148293, iteration: 290895
loss: 1.0286451578140259,grad_norm: 0.8814738727365314, iteration: 290896
loss: 0.9865946769714355,grad_norm: 0.8618893450012827, iteration: 290897
loss: 0.9725281596183777,grad_norm: 0.9287460509703755, iteration: 290898
loss: 0.9881511926651001,grad_norm: 0.8246666289847923, iteration: 290899
loss: 0.9977546334266663,grad_norm: 0.7964983349342069, iteration: 290900
loss: 0.9938827157020569,grad_norm: 0.855814620560042, iteration: 290901
loss: 1.0023036003112793,grad_norm: 0.8392146008761626, iteration: 290902
loss: 1.025765061378479,grad_norm: 0.9367362604072924, iteration: 290903
loss: 0.9999275207519531,grad_norm: 0.7935343044097383, iteration: 290904
loss: 1.0136035680770874,grad_norm: 0.7773273305588017, iteration: 290905
loss: 1.0184165239334106,grad_norm: 0.9999999185030829, iteration: 290906
loss: 0.9745272397994995,grad_norm: 0.9999991130284711, iteration: 290907
loss: 1.0127778053283691,grad_norm: 0.859544858256998, iteration: 290908
loss: 0.9954505562782288,grad_norm: 0.6932036791467111, iteration: 290909
loss: 0.9975287914276123,grad_norm: 0.8205467379995385, iteration: 290910
loss: 0.9851323366165161,grad_norm: 0.8823258759209697, iteration: 290911
loss: 1.009181261062622,grad_norm: 0.9999991167844857, iteration: 290912
loss: 1.0148112773895264,grad_norm: 0.9319285303025872, iteration: 290913
loss: 0.9999274611473083,grad_norm: 0.8723015963925949, iteration: 290914
loss: 1.001644492149353,grad_norm: 0.987239916085148, iteration: 290915
loss: 0.9889755249023438,grad_norm: 0.8894615423556745, iteration: 290916
loss: 1.022853970527649,grad_norm: 0.9999998249367853, iteration: 290917
loss: 0.9869852066040039,grad_norm: 0.7294078857835692, iteration: 290918
loss: 0.9897068738937378,grad_norm: 0.7507975579029779, iteration: 290919
loss: 1.0209673643112183,grad_norm: 0.8781454387759822, iteration: 290920
loss: 0.9574801921844482,grad_norm: 0.8711810859123881, iteration: 290921
loss: 0.9812815189361572,grad_norm: 0.7799249884974925, iteration: 290922
loss: 0.9933426380157471,grad_norm: 0.9999992594776107, iteration: 290923
loss: 1.0368150472640991,grad_norm: 0.9999999388919887, iteration: 290924
loss: 1.0084006786346436,grad_norm: 0.992445448981046, iteration: 290925
loss: 1.0148799419403076,grad_norm: 0.7432456897161739, iteration: 290926
loss: 0.9717545509338379,grad_norm: 0.9378770794563573, iteration: 290927
loss: 0.9972484707832336,grad_norm: 0.9999994896835175, iteration: 290928
loss: 1.0034570693969727,grad_norm: 0.9126889752675026, iteration: 290929
loss: 0.994943380355835,grad_norm: 0.8464185563232229, iteration: 290930
loss: 1.0265612602233887,grad_norm: 0.7346935167339562, iteration: 290931
loss: 1.0162616968154907,grad_norm: 0.9248875381253566, iteration: 290932
loss: 1.2504643201828003,grad_norm: 0.9999998733930713, iteration: 290933
loss: 0.9982313513755798,grad_norm: 0.8896302964722878, iteration: 290934
loss: 0.9941647052764893,grad_norm: 0.9467871557214769, iteration: 290935
loss: 1.0027222633361816,grad_norm: 0.8545043313247171, iteration: 290936
loss: 0.9983951449394226,grad_norm: 0.7586207711116567, iteration: 290937
loss: 0.9866233468055725,grad_norm: 0.7764788610791505, iteration: 290938
loss: 1.0164703130722046,grad_norm: 0.8197000717227583, iteration: 290939
loss: 0.9846855998039246,grad_norm: 0.9087987366353093, iteration: 290940
loss: 1.016796588897705,grad_norm: 0.907398593769739, iteration: 290941
loss: 0.9967269897460938,grad_norm: 0.9929583468575423, iteration: 290942
loss: 0.9894043207168579,grad_norm: 0.8559417930024882, iteration: 290943
loss: 1.0315043926239014,grad_norm: 0.9705575375865377, iteration: 290944
loss: 1.0427888631820679,grad_norm: 0.958052505829145, iteration: 290945
loss: 0.9745730757713318,grad_norm: 0.9049054734240453, iteration: 290946
loss: 1.0001435279846191,grad_norm: 0.8247955786759117, iteration: 290947
loss: 1.0158753395080566,grad_norm: 0.9999991606277044, iteration: 290948
loss: 1.0025262832641602,grad_norm: 0.9502281218140359, iteration: 290949
loss: 1.0066187381744385,grad_norm: 0.8523799354933332, iteration: 290950
loss: 1.0509339570999146,grad_norm: 0.9999990672680251, iteration: 290951
loss: 1.0513222217559814,grad_norm: 0.8262445792046001, iteration: 290952
loss: 1.0219604969024658,grad_norm: 0.9999992351628822, iteration: 290953
loss: 0.9764490127563477,grad_norm: 0.7071719201697004, iteration: 290954
loss: 1.0059572458267212,grad_norm: 0.9999990513047688, iteration: 290955
loss: 0.9883036613464355,grad_norm: 0.9999991615626745, iteration: 290956
loss: 0.9745792150497437,grad_norm: 0.8760788917063508, iteration: 290957
loss: 0.9849941730499268,grad_norm: 0.7597223332308504, iteration: 290958
loss: 1.0036656856536865,grad_norm: 0.8954416890542143, iteration: 290959
loss: 1.0610706806182861,grad_norm: 0.7882677661357204, iteration: 290960
loss: 0.9839116930961609,grad_norm: 0.8841214944398492, iteration: 290961
loss: 1.0023964643478394,grad_norm: 0.8858428159805767, iteration: 290962
loss: 0.9843646883964539,grad_norm: 0.8371908999764951, iteration: 290963
loss: 1.0131114721298218,grad_norm: 0.885899611081132, iteration: 290964
loss: 1.0318466424942017,grad_norm: 0.9295487760269998, iteration: 290965
loss: 1.0064553022384644,grad_norm: 0.8755592803054264, iteration: 290966
loss: 0.9610492587089539,grad_norm: 0.7645130046942, iteration: 290967
loss: 1.0155988931655884,grad_norm: 0.7637363687558751, iteration: 290968
loss: 1.017966628074646,grad_norm: 0.8269815504605482, iteration: 290969
loss: 1.0552113056182861,grad_norm: 0.999999172706499, iteration: 290970
loss: 0.9996895790100098,grad_norm: 0.7684794393542236, iteration: 290971
loss: 0.9783210158348083,grad_norm: 0.9457309221422935, iteration: 290972
loss: 0.9907090663909912,grad_norm: 0.7882118705030683, iteration: 290973
loss: 1.0145670175552368,grad_norm: 0.8865376941379409, iteration: 290974
loss: 0.971406877040863,grad_norm: 0.8086858820103184, iteration: 290975
loss: 1.0076662302017212,grad_norm: 0.8086853333459039, iteration: 290976
loss: 0.9862373471260071,grad_norm: 0.8323508849091151, iteration: 290977
loss: 0.9902144074440002,grad_norm: 0.817281053584764, iteration: 290978
loss: 0.9599673748016357,grad_norm: 0.7875667479239219, iteration: 290979
loss: 1.02162504196167,grad_norm: 0.8662245271845881, iteration: 290980
loss: 0.998467206954956,grad_norm: 0.7615608787131286, iteration: 290981
loss: 0.9984586238861084,grad_norm: 0.7401899717229633, iteration: 290982
loss: 1.0311484336853027,grad_norm: 0.9749687017675195, iteration: 290983
loss: 1.008371114730835,grad_norm: 0.7519614139227959, iteration: 290984
loss: 1.0216960906982422,grad_norm: 0.9244989136387437, iteration: 290985
loss: 0.9993001818656921,grad_norm: 0.8486382256919199, iteration: 290986
loss: 1.0191713571548462,grad_norm: 0.8189996906251776, iteration: 290987
loss: 1.019085168838501,grad_norm: 0.833586435857339, iteration: 290988
loss: 0.97184157371521,grad_norm: 0.9999993962404009, iteration: 290989
loss: 1.039779543876648,grad_norm: 0.9999998046243065, iteration: 290990
loss: 0.9956734776496887,grad_norm: 0.8574715516100134, iteration: 290991
loss: 1.0111640691757202,grad_norm: 0.9999996857730753, iteration: 290992
loss: 1.0147134065628052,grad_norm: 0.9412371162248736, iteration: 290993
loss: 0.9850332736968994,grad_norm: 0.9171265041046291, iteration: 290994
loss: 1.0265781879425049,grad_norm: 0.9999991221543535, iteration: 290995
loss: 1.007438063621521,grad_norm: 0.9999990013640032, iteration: 290996
loss: 0.9787129163742065,grad_norm: 0.8974810425611808, iteration: 290997
loss: 1.0202659368515015,grad_norm: 0.7731306982207292, iteration: 290998
loss: 0.9780222773551941,grad_norm: 0.7931432974520304, iteration: 290999
loss: 1.014438271522522,grad_norm: 0.8614826581470584, iteration: 291000
loss: 0.9919769167900085,grad_norm: 0.8943898467215348, iteration: 291001
loss: 0.9798091650009155,grad_norm: 0.9592842864435579, iteration: 291002
loss: 1.019779920578003,grad_norm: 0.8958664007138758, iteration: 291003
loss: 1.0064254999160767,grad_norm: 0.9999998839620831, iteration: 291004
loss: 0.9975232481956482,grad_norm: 0.8945828730211944, iteration: 291005
loss: 1.0362069606781006,grad_norm: 0.8128633669527534, iteration: 291006
loss: 1.0564111471176147,grad_norm: 0.999999951402946, iteration: 291007
loss: 0.968045175075531,grad_norm: 0.8711441663595064, iteration: 291008
loss: 1.0187522172927856,grad_norm: 0.8401197157139837, iteration: 291009
loss: 0.9792358875274658,grad_norm: 0.9938134507477662, iteration: 291010
loss: 0.9864444732666016,grad_norm: 0.8290356915909418, iteration: 291011
loss: 1.0000026226043701,grad_norm: 0.9999999579876402, iteration: 291012
loss: 1.0103445053100586,grad_norm: 0.8901294005279514, iteration: 291013
loss: 1.0258508920669556,grad_norm: 0.999999128746306, iteration: 291014
loss: 0.9721072316169739,grad_norm: 0.8668043231223458, iteration: 291015
loss: 1.030089020729065,grad_norm: 0.8590893850815084, iteration: 291016
loss: 1.0214065313339233,grad_norm: 0.9980240001925758, iteration: 291017
loss: 1.0275394916534424,grad_norm: 0.8911768196297929, iteration: 291018
loss: 0.9882808327674866,grad_norm: 0.9861663557830626, iteration: 291019
loss: 0.9886041879653931,grad_norm: 0.891850354725178, iteration: 291020
loss: 0.9844096899032593,grad_norm: 0.9725735682785773, iteration: 291021
loss: 1.1014349460601807,grad_norm: 0.9999991499153198, iteration: 291022
loss: 1.0351271629333496,grad_norm: 0.8775834972821417, iteration: 291023
loss: 0.9611707329750061,grad_norm: 0.8129941919244422, iteration: 291024
loss: 0.9918525218963623,grad_norm: 0.7282324412580071, iteration: 291025
loss: 1.068560004234314,grad_norm: 0.9249436149627718, iteration: 291026
loss: 0.9892295002937317,grad_norm: 0.7125866784162743, iteration: 291027
loss: 0.9825907945632935,grad_norm: 0.8311139485945216, iteration: 291028
loss: 1.0010625123977661,grad_norm: 0.8724301198578571, iteration: 291029
loss: 1.0126848220825195,grad_norm: 0.9232876990747573, iteration: 291030
loss: 1.0371557474136353,grad_norm: 0.9597572151737804, iteration: 291031
loss: 1.044581413269043,grad_norm: 0.9999989984751031, iteration: 291032
loss: 0.979556679725647,grad_norm: 0.7134800323005255, iteration: 291033
loss: 1.0432257652282715,grad_norm: 0.8002316241291448, iteration: 291034
loss: 1.0008734464645386,grad_norm: 0.7670758277725269, iteration: 291035
loss: 0.9880502223968506,grad_norm: 0.7995956104650287, iteration: 291036
loss: 0.9636825919151306,grad_norm: 0.8126236541545593, iteration: 291037
loss: 1.0416994094848633,grad_norm: 0.7987994967612612, iteration: 291038
loss: 0.9694162011146545,grad_norm: 0.8309812257631818, iteration: 291039
loss: 0.9687504768371582,grad_norm: 0.8291740093550614, iteration: 291040
loss: 0.9825261235237122,grad_norm: 0.731397566978677, iteration: 291041
loss: 1.0025603771209717,grad_norm: 0.9999993891737726, iteration: 291042
loss: 1.0041732788085938,grad_norm: 0.7948143450500168, iteration: 291043
loss: 1.000423550605774,grad_norm: 0.8731296514333132, iteration: 291044
loss: 0.9882291555404663,grad_norm: 0.7851272866902588, iteration: 291045
loss: 1.010891318321228,grad_norm: 0.9317944198936887, iteration: 291046
loss: 0.9909180998802185,grad_norm: 0.8700469962519662, iteration: 291047
loss: 1.0356643199920654,grad_norm: 0.7771969820775014, iteration: 291048
loss: 0.9981489777565002,grad_norm: 0.7464365097434785, iteration: 291049
loss: 0.9712023138999939,grad_norm: 0.9044289585613031, iteration: 291050
loss: 0.9877347946166992,grad_norm: 0.8872851584186504, iteration: 291051
loss: 1.0036853551864624,grad_norm: 0.8964137200187646, iteration: 291052
loss: 0.9998546242713928,grad_norm: 0.9999995787989363, iteration: 291053
loss: 1.0191398859024048,grad_norm: 0.7720109425840984, iteration: 291054
loss: 0.9796968698501587,grad_norm: 0.9071995802678325, iteration: 291055
loss: 1.010524868965149,grad_norm: 0.8961159387991218, iteration: 291056
loss: 0.9654866456985474,grad_norm: 0.9774325873230836, iteration: 291057
loss: 1.0034385919570923,grad_norm: 0.9999993599977663, iteration: 291058
loss: 1.0290088653564453,grad_norm: 0.9988513249069909, iteration: 291059
loss: 1.003130316734314,grad_norm: 0.8751360589245921, iteration: 291060
loss: 1.0093761682510376,grad_norm: 0.7837891129216914, iteration: 291061
loss: 1.0096036195755005,grad_norm: 0.8074840751651134, iteration: 291062
loss: 1.0244805812835693,grad_norm: 0.9052296334096094, iteration: 291063
loss: 1.0440877676010132,grad_norm: 0.9562462425682433, iteration: 291064
loss: 1.0055397748947144,grad_norm: 0.87305066602759, iteration: 291065
loss: 0.9937193393707275,grad_norm: 0.9302025330035588, iteration: 291066
loss: 0.9674870371818542,grad_norm: 0.7744746333137696, iteration: 291067
loss: 0.9992097616195679,grad_norm: 0.903968302339341, iteration: 291068
loss: 0.9779226183891296,grad_norm: 0.8723062235933197, iteration: 291069
loss: 0.9766637086868286,grad_norm: 0.8901649291682164, iteration: 291070
loss: 0.9888543486595154,grad_norm: 0.7670185033321126, iteration: 291071
loss: 1.0207836627960205,grad_norm: 0.9531283750146049, iteration: 291072
loss: 0.9918641448020935,grad_norm: 0.8910731244790044, iteration: 291073
loss: 1.011488676071167,grad_norm: 0.74855051292491, iteration: 291074
loss: 1.0180327892303467,grad_norm: 0.7555162903822025, iteration: 291075
loss: 1.0316320657730103,grad_norm: 0.9999992355528212, iteration: 291076
loss: 0.986321210861206,grad_norm: 0.8179947098097832, iteration: 291077
loss: 0.9980247020721436,grad_norm: 0.894004157177979, iteration: 291078
loss: 0.9915040135383606,grad_norm: 0.938206445944096, iteration: 291079
loss: 1.0250053405761719,grad_norm: 0.7775010686539326, iteration: 291080
loss: 1.0138705968856812,grad_norm: 0.9999992846338001, iteration: 291081
loss: 1.0131105184555054,grad_norm: 0.9999997348667671, iteration: 291082
loss: 1.056580662727356,grad_norm: 0.7751172118432038, iteration: 291083
loss: 1.0161590576171875,grad_norm: 0.8064147331452459, iteration: 291084
loss: 1.020458459854126,grad_norm: 0.9999996511069765, iteration: 291085
loss: 0.9884863495826721,grad_norm: 0.9999992806765512, iteration: 291086
loss: 1.0035719871520996,grad_norm: 0.7455366693229356, iteration: 291087
loss: 1.0228886604309082,grad_norm: 0.870759258450777, iteration: 291088
loss: 0.9763094186782837,grad_norm: 0.9094146703739175, iteration: 291089
loss: 1.0238487720489502,grad_norm: 0.9102200032772163, iteration: 291090
loss: 1.0145996809005737,grad_norm: 0.99999913769675, iteration: 291091
loss: 0.9807624220848083,grad_norm: 0.9747042054619995, iteration: 291092
loss: 0.9823524951934814,grad_norm: 0.8145159119774831, iteration: 291093
loss: 1.0061064958572388,grad_norm: 0.9872873119537776, iteration: 291094
loss: 1.0492894649505615,grad_norm: 0.9999999553956704, iteration: 291095
loss: 0.9820457696914673,grad_norm: 0.9231241890105817, iteration: 291096
loss: 0.9953771233558655,grad_norm: 0.893934791011491, iteration: 291097
loss: 1.0962862968444824,grad_norm: 0.9488117117112558, iteration: 291098
loss: 0.9987459182739258,grad_norm: 0.8612765986138737, iteration: 291099
loss: 0.9898238182067871,grad_norm: 0.9999991937467674, iteration: 291100
loss: 1.032882571220398,grad_norm: 0.9999990300612706, iteration: 291101
loss: 1.0160080194473267,grad_norm: 0.9444066980308649, iteration: 291102
loss: 1.007477879524231,grad_norm: 0.7979777179765369, iteration: 291103
loss: 0.9786368608474731,grad_norm: 0.7610186633091903, iteration: 291104
loss: 1.0211988687515259,grad_norm: 0.7276380317915714, iteration: 291105
loss: 0.9799146056175232,grad_norm: 0.9999990591902402, iteration: 291106
loss: 1.0157989263534546,grad_norm: 0.9999990376360135, iteration: 291107
loss: 1.023733377456665,grad_norm: 0.7739574058009734, iteration: 291108
loss: 0.9828441143035889,grad_norm: 0.8175807200446412, iteration: 291109
loss: 0.9953207969665527,grad_norm: 0.9031337831988083, iteration: 291110
loss: 0.9692884087562561,grad_norm: 0.8464749884478109, iteration: 291111
loss: 1.007798433303833,grad_norm: 0.7859840672941895, iteration: 291112
loss: 0.9855390191078186,grad_norm: 0.7231914347324409, iteration: 291113
loss: 0.9962217211723328,grad_norm: 0.9923882882391957, iteration: 291114
loss: 0.9604241251945496,grad_norm: 0.8796907269514078, iteration: 291115
loss: 1.0133973360061646,grad_norm: 0.9095247095210618, iteration: 291116
loss: 1.007710337638855,grad_norm: 0.8817993078446201, iteration: 291117
loss: 0.9879436492919922,grad_norm: 0.868493373434971, iteration: 291118
loss: 1.031667709350586,grad_norm: 0.9628154750914132, iteration: 291119
loss: 1.005427360534668,grad_norm: 0.999999118300851, iteration: 291120
loss: 1.022735595703125,grad_norm: 0.8854466933029249, iteration: 291121
loss: 1.0180596113204956,grad_norm: 0.9999989650431569, iteration: 291122
loss: 1.0062541961669922,grad_norm: 0.657986314415346, iteration: 291123
loss: 1.0166566371917725,grad_norm: 0.7935130659961498, iteration: 291124
loss: 1.0338305234909058,grad_norm: 0.9999991151634873, iteration: 291125
loss: 1.022241234779358,grad_norm: 0.8146309927494956, iteration: 291126
loss: 1.0197278261184692,grad_norm: 0.9981567927707833, iteration: 291127
loss: 0.9938761591911316,grad_norm: 0.9999994593415399, iteration: 291128
loss: 1.01468825340271,grad_norm: 0.8557296645953523, iteration: 291129
loss: 1.0333092212677002,grad_norm: 0.7516610519193037, iteration: 291130
loss: 0.9981552958488464,grad_norm: 0.8128317343247972, iteration: 291131
loss: 1.0087714195251465,grad_norm: 0.9757503916306255, iteration: 291132
loss: 0.9933539032936096,grad_norm: 0.9315231870838843, iteration: 291133
loss: 0.9785844683647156,grad_norm: 0.6436036483656211, iteration: 291134
loss: 1.0144314765930176,grad_norm: 0.83304770601287, iteration: 291135
loss: 1.143052339553833,grad_norm: 0.9999990381469651, iteration: 291136
loss: 1.0019112825393677,grad_norm: 0.9998525477462687, iteration: 291137
loss: 0.9900818467140198,grad_norm: 0.9096266603818685, iteration: 291138
loss: 1.0141228437423706,grad_norm: 0.9999992530514947, iteration: 291139
loss: 1.023139476776123,grad_norm: 0.9526031988548345, iteration: 291140
loss: 0.9897634983062744,grad_norm: 0.9999995479625031, iteration: 291141
loss: 0.975275993347168,grad_norm: 0.9544912539799796, iteration: 291142
loss: 1.0007277727127075,grad_norm: 0.7468099262655794, iteration: 291143
loss: 1.0352030992507935,grad_norm: 0.9686362349562527, iteration: 291144
loss: 1.0704847574234009,grad_norm: 0.9999993937625752, iteration: 291145
loss: 0.9984224438667297,grad_norm: 0.9999991017658393, iteration: 291146
loss: 1.0135430097579956,grad_norm: 0.7317457537261093, iteration: 291147
loss: 1.0316914319992065,grad_norm: 0.8716293566853109, iteration: 291148
loss: 1.0074151754379272,grad_norm: 0.784319503015953, iteration: 291149
loss: 0.9901769757270813,grad_norm: 0.845656600416582, iteration: 291150
loss: 1.0347431898117065,grad_norm: 0.9446212869116233, iteration: 291151
loss: 1.005985140800476,grad_norm: 0.9999991183179354, iteration: 291152
loss: 1.0144622325897217,grad_norm: 0.8794972416331491, iteration: 291153
loss: 0.9887076616287231,grad_norm: 0.999999084677004, iteration: 291154
loss: 1.022122859954834,grad_norm: 0.9513253902367448, iteration: 291155
loss: 1.0338630676269531,grad_norm: 0.8846975919678021, iteration: 291156
loss: 0.9849331974983215,grad_norm: 0.9625708841162207, iteration: 291157
loss: 1.0173786878585815,grad_norm: 0.8541625105449542, iteration: 291158
loss: 0.9881067276000977,grad_norm: 0.9999991637230718, iteration: 291159
loss: 0.9970279335975647,grad_norm: 0.9999990602799573, iteration: 291160
loss: 0.9975634813308716,grad_norm: 0.8976763170025305, iteration: 291161
loss: 1.0146528482437134,grad_norm: 0.8485994508295561, iteration: 291162
loss: 0.9831734895706177,grad_norm: 0.8412241486389451, iteration: 291163
loss: 1.0116121768951416,grad_norm: 0.962031706926069, iteration: 291164
loss: 0.9980054497718811,grad_norm: 0.8818338049524671, iteration: 291165
loss: 1.0160608291625977,grad_norm: 0.8601082665587121, iteration: 291166
loss: 1.0118658542633057,grad_norm: 0.850353838276068, iteration: 291167
loss: 1.1573717594146729,grad_norm: 1.0000000993340832, iteration: 291168
loss: 1.0152705907821655,grad_norm: 0.8684814734870313, iteration: 291169
loss: 0.9623206257820129,grad_norm: 0.7419254259037399, iteration: 291170
loss: 0.9960286617279053,grad_norm: 0.8802325006246481, iteration: 291171
loss: 0.9999932646751404,grad_norm: 0.8853757206030959, iteration: 291172
loss: 0.9853618144989014,grad_norm: 0.8429589251922569, iteration: 291173
loss: 1.0188548564910889,grad_norm: 0.9999992210495122, iteration: 291174
loss: 0.980442225933075,grad_norm: 0.932662879707083, iteration: 291175
loss: 1.0002319812774658,grad_norm: 0.9999992400328186, iteration: 291176
loss: 0.9720513820648193,grad_norm: 0.7946937949427958, iteration: 291177
loss: 0.9994948506355286,grad_norm: 0.9303904383401709, iteration: 291178
loss: 0.9720544219017029,grad_norm: 0.786823711975506, iteration: 291179
loss: 1.0007630586624146,grad_norm: 0.8777785619051283, iteration: 291180
loss: 1.001449704170227,grad_norm: 0.8129001633449898, iteration: 291181
loss: 0.9897831678390503,grad_norm: 0.9999992654031448, iteration: 291182
loss: 1.0015078783035278,grad_norm: 0.8533972004221896, iteration: 291183
loss: 1.0180448293685913,grad_norm: 0.7821979919690872, iteration: 291184
loss: 1.0076996088027954,grad_norm: 0.9025046951190538, iteration: 291185
loss: 1.0507913827896118,grad_norm: 0.8485860050577343, iteration: 291186
loss: 1.010607123374939,grad_norm: 0.7764258425282219, iteration: 291187
loss: 0.9850524663925171,grad_norm: 0.9008266942778766, iteration: 291188
loss: 1.0008368492126465,grad_norm: 0.8846180962266075, iteration: 291189
loss: 0.9714073538780212,grad_norm: 0.8113802799742906, iteration: 291190
loss: 0.9823288917541504,grad_norm: 0.9652665564291643, iteration: 291191
loss: 0.9872058629989624,grad_norm: 0.9999991440013943, iteration: 291192
loss: 1.0270241498947144,grad_norm: 0.6914124218337121, iteration: 291193
loss: 1.025559902191162,grad_norm: 0.8404585449968173, iteration: 291194
loss: 0.9762275815010071,grad_norm: 0.9136892597313275, iteration: 291195
loss: 0.9615383148193359,grad_norm: 0.8771434137674783, iteration: 291196
loss: 1.012546181678772,grad_norm: 0.9272820347342039, iteration: 291197
loss: 1.0096949338912964,grad_norm: 0.8138711452990517, iteration: 291198
loss: 1.0092295408248901,grad_norm: 0.9883764716068209, iteration: 291199
loss: 1.0041675567626953,grad_norm: 0.9999991459982582, iteration: 291200
loss: 0.9981178641319275,grad_norm: 0.7813576402205407, iteration: 291201
loss: 1.0010441541671753,grad_norm: 0.9028705187045659, iteration: 291202
loss: 0.9752966165542603,grad_norm: 0.8987273772395642, iteration: 291203
loss: 1.02836012840271,grad_norm: 0.8716484510606382, iteration: 291204
loss: 0.976779580116272,grad_norm: 0.8802041686811383, iteration: 291205
loss: 0.9701134562492371,grad_norm: 0.8330764946139821, iteration: 291206
loss: 1.0010617971420288,grad_norm: 0.6923635290930678, iteration: 291207
loss: 1.027411699295044,grad_norm: 0.9219002758836445, iteration: 291208
loss: 1.0057059526443481,grad_norm: 0.9461802380945391, iteration: 291209
loss: 1.0588864088058472,grad_norm: 0.999999691152959, iteration: 291210
loss: 1.0274146795272827,grad_norm: 0.8790271229705361, iteration: 291211
loss: 1.0286868810653687,grad_norm: 0.9999998325000388, iteration: 291212
loss: 0.9866482615470886,grad_norm: 0.8511194761407408, iteration: 291213
loss: 0.9969580769538879,grad_norm: 0.8779253737239834, iteration: 291214
loss: 1.0290086269378662,grad_norm: 0.9076284176224128, iteration: 291215
loss: 1.022616982460022,grad_norm: 0.8719100441793453, iteration: 291216
loss: 1.0057801008224487,grad_norm: 0.999999549585553, iteration: 291217
loss: 0.9880233407020569,grad_norm: 0.8605422217378407, iteration: 291218
loss: 1.0474412441253662,grad_norm: 0.9159633600665751, iteration: 291219
loss: 1.0115214586257935,grad_norm: 0.8276617945595157, iteration: 291220
loss: 0.9926095604896545,grad_norm: 0.753867387842223, iteration: 291221
loss: 0.9952003359794617,grad_norm: 0.7970251307067413, iteration: 291222
loss: 1.0077213048934937,grad_norm: 0.9999992576724092, iteration: 291223
loss: 0.978643000125885,grad_norm: 0.9999990318646677, iteration: 291224
loss: 1.0301417112350464,grad_norm: 0.7627076607899756, iteration: 291225
loss: 0.9950636029243469,grad_norm: 0.693884201534812, iteration: 291226
loss: 0.9844731688499451,grad_norm: 0.9340594619293895, iteration: 291227
loss: 0.992764949798584,grad_norm: 0.9176266051212075, iteration: 291228
loss: 1.0098435878753662,grad_norm: 0.9999991096626154, iteration: 291229
loss: 1.003542184829712,grad_norm: 0.7561359394368813, iteration: 291230
loss: 0.98725426197052,grad_norm: 0.7717296548625078, iteration: 291231
loss: 1.0063220262527466,grad_norm: 0.8005318659178783, iteration: 291232
loss: 0.9725903272628784,grad_norm: 0.8982852183409032, iteration: 291233
loss: 0.9940460324287415,grad_norm: 0.8930167927693149, iteration: 291234
loss: 1.0168159008026123,grad_norm: 0.9999993254600021, iteration: 291235
loss: 1.0027861595153809,grad_norm: 0.7963367676876842, iteration: 291236
loss: 1.0121543407440186,grad_norm: 0.9999991231426777, iteration: 291237
loss: 0.9856275916099548,grad_norm: 0.9120211603169911, iteration: 291238
loss: 1.0107128620147705,grad_norm: 0.9623561897870772, iteration: 291239
loss: 1.0082429647445679,grad_norm: 0.9999992133472638, iteration: 291240
loss: 0.993714451789856,grad_norm: 0.7765343557388837, iteration: 291241
loss: 0.978262722492218,grad_norm: 0.8199566585116458, iteration: 291242
loss: 1.0652366876602173,grad_norm: 0.9999991498465193, iteration: 291243
loss: 0.9788604378700256,grad_norm: 0.809280937668085, iteration: 291244
loss: 0.9640259146690369,grad_norm: 0.8243256952219316, iteration: 291245
loss: 0.9901857376098633,grad_norm: 0.9999990864777245, iteration: 291246
loss: 1.0002316236495972,grad_norm: 0.9999989973074465, iteration: 291247
loss: 0.9775091409683228,grad_norm: 0.9999990428039776, iteration: 291248
loss: 0.9864551424980164,grad_norm: 0.8082778674124107, iteration: 291249
loss: 0.9896795749664307,grad_norm: 0.7282721522914657, iteration: 291250
loss: 1.0024257898330688,grad_norm: 0.8282001755239388, iteration: 291251
loss: 1.0269887447357178,grad_norm: 0.9999991281402356, iteration: 291252
loss: 0.9844245314598083,grad_norm: 0.8307838052101088, iteration: 291253
loss: 1.0041507482528687,grad_norm: 0.974903299561344, iteration: 291254
loss: 0.9743438959121704,grad_norm: 0.9999994297971274, iteration: 291255
loss: 1.0119901895523071,grad_norm: 0.8273880609343297, iteration: 291256
loss: 1.037274718284607,grad_norm: 0.8539747259569161, iteration: 291257
loss: 0.9981638193130493,grad_norm: 0.7977068553317828, iteration: 291258
loss: 0.9637064933776855,grad_norm: 0.8418141854282245, iteration: 291259
loss: 1.0422732830047607,grad_norm: 0.9453098607050744, iteration: 291260
loss: 1.0122963190078735,grad_norm: 0.8054644519128122, iteration: 291261
loss: 0.9479764103889465,grad_norm: 0.9999994887856752, iteration: 291262
loss: 0.9941278696060181,grad_norm: 0.7794761412182005, iteration: 291263
loss: 1.031622290611267,grad_norm: 0.9600072720612157, iteration: 291264
loss: 0.9774137139320374,grad_norm: 0.7648506982168982, iteration: 291265
loss: 1.0970966815948486,grad_norm: 0.9999997240895133, iteration: 291266
loss: 0.9957935214042664,grad_norm: 0.8088053688520447, iteration: 291267
loss: 0.9877366423606873,grad_norm: 0.8540143850306378, iteration: 291268
loss: 1.0054469108581543,grad_norm: 0.9999991699544809, iteration: 291269
loss: 1.0038248300552368,grad_norm: 0.7939045894046756, iteration: 291270
loss: 1.0102472305297852,grad_norm: 0.7776863921475046, iteration: 291271
loss: 1.0339478254318237,grad_norm: 0.8995806166407848, iteration: 291272
loss: 0.9647557139396667,grad_norm: 0.7984400248665883, iteration: 291273
loss: 0.9733766913414001,grad_norm: 0.8839486646633238, iteration: 291274
loss: 1.0068484544754028,grad_norm: 0.8667326236102557, iteration: 291275
loss: 1.0044806003570557,grad_norm: 0.9999997381312384, iteration: 291276
loss: 1.0271718502044678,grad_norm: 0.8584458304103234, iteration: 291277
loss: 1.0147820711135864,grad_norm: 0.8228916717187609, iteration: 291278
loss: 1.0031137466430664,grad_norm: 0.9573932488917215, iteration: 291279
loss: 1.0236376523971558,grad_norm: 0.7367283485801247, iteration: 291280
loss: 1.0005141496658325,grad_norm: 0.9996441581019863, iteration: 291281
loss: 1.0194997787475586,grad_norm: 0.9246889486200921, iteration: 291282
loss: 0.9757503867149353,grad_norm: 0.774224489189914, iteration: 291283
loss: 1.0048903226852417,grad_norm: 0.7168429100568514, iteration: 291284
loss: 1.0034587383270264,grad_norm: 0.7910248164707189, iteration: 291285
loss: 1.0220057964324951,grad_norm: 0.9324755579162547, iteration: 291286
loss: 0.9941277503967285,grad_norm: 0.7054166667974908, iteration: 291287
loss: 0.9931488037109375,grad_norm: 0.8982269142006368, iteration: 291288
loss: 0.9733670353889465,grad_norm: 0.8544881025078069, iteration: 291289
loss: 0.9487091898918152,grad_norm: 0.8915245311300108, iteration: 291290
loss: 1.0289868116378784,grad_norm: 0.9466899776894367, iteration: 291291
loss: 0.9811918139457703,grad_norm: 0.9382730566775558, iteration: 291292
loss: 1.0213972330093384,grad_norm: 0.9999991482855053, iteration: 291293
loss: 0.9935171008110046,grad_norm: 0.7856868831258419, iteration: 291294
loss: 1.0044465065002441,grad_norm: 0.7981942430214622, iteration: 291295
loss: 0.9763787984848022,grad_norm: 0.8361956361798639, iteration: 291296
loss: 1.0158144235610962,grad_norm: 0.9999991957953251, iteration: 291297
loss: 0.9979084730148315,grad_norm: 0.7686974914638897, iteration: 291298
loss: 0.9853851199150085,grad_norm: 0.8701565137562727, iteration: 291299
loss: 1.0137605667114258,grad_norm: 0.8800517875386946, iteration: 291300
loss: 0.9818065762519836,grad_norm: 0.9999991049911753, iteration: 291301
loss: 0.9906171560287476,grad_norm: 0.7769491332262938, iteration: 291302
loss: 0.999919056892395,grad_norm: 0.7682130697427226, iteration: 291303
loss: 0.9748387336730957,grad_norm: 0.9004183202662673, iteration: 291304
loss: 1.0149661302566528,grad_norm: 0.9726523779266049, iteration: 291305
loss: 1.0140557289123535,grad_norm: 0.8969241410902437, iteration: 291306
loss: 0.9908284544944763,grad_norm: 0.9999997955487196, iteration: 291307
loss: 1.0012433528900146,grad_norm: 0.9475099737759157, iteration: 291308
loss: 0.9992930293083191,grad_norm: 0.9999993359432742, iteration: 291309
loss: 0.9883639812469482,grad_norm: 0.823073377955665, iteration: 291310
loss: 1.0148872137069702,grad_norm: 0.9999993312337326, iteration: 291311
loss: 1.0042197704315186,grad_norm: 0.8522705562068486, iteration: 291312
loss: 0.9910369515419006,grad_norm: 0.8679349586011844, iteration: 291313
loss: 0.9962995052337646,grad_norm: 0.8616984791045992, iteration: 291314
loss: 0.9662643671035767,grad_norm: 0.80831382994866, iteration: 291315
loss: 0.9678935408592224,grad_norm: 0.9703209536108175, iteration: 291316
loss: 1.004887342453003,grad_norm: 0.8996449832792127, iteration: 291317
loss: 1.0453022718429565,grad_norm: 0.6703627876588907, iteration: 291318
loss: 0.9809341430664062,grad_norm: 0.7952951313903738, iteration: 291319
loss: 1.1890748739242554,grad_norm: 0.9999995565935224, iteration: 291320
loss: 0.9936388731002808,grad_norm: 0.9999995926115403, iteration: 291321
loss: 1.0174362659454346,grad_norm: 0.7414367006948813, iteration: 291322
loss: 1.0119545459747314,grad_norm: 0.9291640211689933, iteration: 291323
loss: 1.005346655845642,grad_norm: 0.7282380884721091, iteration: 291324
loss: 1.038481593132019,grad_norm: 0.7444202065037111, iteration: 291325
loss: 0.9952778220176697,grad_norm: 0.9149056603209921, iteration: 291326
loss: 0.949923038482666,grad_norm: 0.790517999453397, iteration: 291327
loss: 1.044063687324524,grad_norm: 0.9301698572107057, iteration: 291328
loss: 0.9544073343276978,grad_norm: 0.7777827308428858, iteration: 291329
loss: 0.9907801151275635,grad_norm: 0.9999991464482575, iteration: 291330
loss: 1.0078707933425903,grad_norm: 0.7522590212905056, iteration: 291331
loss: 1.053252100944519,grad_norm: 0.9999991452479977, iteration: 291332
loss: 0.9851418137550354,grad_norm: 0.7202029690245803, iteration: 291333
loss: 1.0919995307922363,grad_norm: 0.8371823951095594, iteration: 291334
loss: 0.9384509325027466,grad_norm: 0.9999990958004817, iteration: 291335
loss: 1.0092545747756958,grad_norm: 0.999999906566537, iteration: 291336
loss: 1.0459990501403809,grad_norm: 0.7438922122596426, iteration: 291337
loss: 0.9959992170333862,grad_norm: 0.9360871050829483, iteration: 291338
loss: 0.9906970858573914,grad_norm: 0.9223477992293052, iteration: 291339
loss: 1.1599416732788086,grad_norm: 0.9999999534086906, iteration: 291340
loss: 1.0015285015106201,grad_norm: 0.7308866985834842, iteration: 291341
loss: 0.991549551486969,grad_norm: 0.8466777730061289, iteration: 291342
loss: 0.9938982129096985,grad_norm: 0.9632766762185333, iteration: 291343
loss: 1.0901846885681152,grad_norm: 0.9999992105672768, iteration: 291344
loss: 0.9961684346199036,grad_norm: 0.8764461279880946, iteration: 291345
loss: 1.0125694274902344,grad_norm: 0.8924462332101801, iteration: 291346
loss: 0.9923228025436401,grad_norm: 0.9987598349693595, iteration: 291347
loss: 1.0214898586273193,grad_norm: 0.7050619345410537, iteration: 291348
loss: 1.013706088066101,grad_norm: 0.9999991362231314, iteration: 291349
loss: 0.9740841388702393,grad_norm: 0.9999991142571593, iteration: 291350
loss: 1.0077136754989624,grad_norm: 0.7451133120248362, iteration: 291351
loss: 1.0276901721954346,grad_norm: 0.854952859513947, iteration: 291352
loss: 1.0497586727142334,grad_norm: 0.9999990501191209, iteration: 291353
loss: 1.0920135974884033,grad_norm: 0.9999993578862635, iteration: 291354
loss: 1.0118297338485718,grad_norm: 0.8588632441908797, iteration: 291355
loss: 0.9822140336036682,grad_norm: 0.8579307402374107, iteration: 291356
loss: 0.9911837577819824,grad_norm: 0.923263599491376, iteration: 291357
loss: 0.9807425737380981,grad_norm: 0.8028898342408973, iteration: 291358
loss: 1.0096250772476196,grad_norm: 0.7816242003701268, iteration: 291359
loss: 0.9869533181190491,grad_norm: 0.9999990589424296, iteration: 291360
loss: 0.9951719045639038,grad_norm: 0.8578704294600467, iteration: 291361
loss: 1.1142780780792236,grad_norm: 0.9999999432092473, iteration: 291362
loss: 0.972481369972229,grad_norm: 0.8830760581764315, iteration: 291363
loss: 1.0000640153884888,grad_norm: 0.879598394130042, iteration: 291364
loss: 0.9764540791511536,grad_norm: 0.858452316289282, iteration: 291365
loss: 0.9586346745491028,grad_norm: 0.8067671076053261, iteration: 291366
loss: 0.9783355593681335,grad_norm: 0.7318669808397803, iteration: 291367
loss: 1.0006511211395264,grad_norm: 0.905488091770585, iteration: 291368
loss: 0.9909184575080872,grad_norm: 0.8985358844590808, iteration: 291369
loss: 0.9862167835235596,grad_norm: 0.9999990151620214, iteration: 291370
loss: 1.0108911991119385,grad_norm: 0.7947757043405292, iteration: 291371
loss: 1.007832646369934,grad_norm: 0.9999991300871668, iteration: 291372
loss: 0.98481285572052,grad_norm: 0.8628878421224861, iteration: 291373
loss: 1.2032470703125,grad_norm: 0.9999994152521036, iteration: 291374
loss: 0.9891825318336487,grad_norm: 0.7933145220667853, iteration: 291375
loss: 1.0015466213226318,grad_norm: 0.7942954961890253, iteration: 291376
loss: 0.9890491962432861,grad_norm: 0.8042793863702065, iteration: 291377
loss: 0.9756037592887878,grad_norm: 0.8994649149484455, iteration: 291378
loss: 1.0010114908218384,grad_norm: 0.8456465212746681, iteration: 291379
loss: 1.030643105506897,grad_norm: 0.999999502667886, iteration: 291380
loss: 1.0086679458618164,grad_norm: 0.9999991358101605, iteration: 291381
loss: 1.0077180862426758,grad_norm: 0.8856278784049691, iteration: 291382
loss: 0.9489070773124695,grad_norm: 0.9667848202098972, iteration: 291383
loss: 0.9859369993209839,grad_norm: 0.929384615643524, iteration: 291384
loss: 0.9943936467170715,grad_norm: 0.7948478858067619, iteration: 291385
loss: 0.9829190969467163,grad_norm: 0.8479272057121398, iteration: 291386
loss: 0.9826768040657043,grad_norm: 0.7039450355794094, iteration: 291387
loss: 0.9691764712333679,grad_norm: 0.7129190682309969, iteration: 291388
loss: 0.9969647526741028,grad_norm: 0.8626904958248932, iteration: 291389
loss: 0.9986867308616638,grad_norm: 0.8633144700634268, iteration: 291390
loss: 1.003421664237976,grad_norm: 0.9414325402909919, iteration: 291391
loss: 0.9960894584655762,grad_norm: 0.7616943397930669, iteration: 291392
loss: 1.0031747817993164,grad_norm: 0.7966489425041565, iteration: 291393
loss: 0.9669345021247864,grad_norm: 0.8169925915147564, iteration: 291394
loss: 1.0074951648712158,grad_norm: 0.9836135900772934, iteration: 291395
loss: 0.9956021308898926,grad_norm: 0.8394352613794873, iteration: 291396
loss: 1.0037853717803955,grad_norm: 0.7644895280336169, iteration: 291397
loss: 0.9840796589851379,grad_norm: 0.7617646011638447, iteration: 291398
loss: 1.0070345401763916,grad_norm: 0.8229492928370812, iteration: 291399
loss: 1.0358927249908447,grad_norm: 0.8725761798915732, iteration: 291400
loss: 1.0113160610198975,grad_norm: 0.7857321052325886, iteration: 291401
loss: 0.987761914730072,grad_norm: 0.7225760293654607, iteration: 291402
loss: 0.9914174675941467,grad_norm: 0.9999991597237384, iteration: 291403
loss: 0.9727818965911865,grad_norm: 0.895233354359866, iteration: 291404
loss: 1.1302841901779175,grad_norm: 1.0000000128437212, iteration: 291405
loss: 1.0072637796401978,grad_norm: 0.8087253902944784, iteration: 291406
loss: 0.9931039810180664,grad_norm: 0.9149881305050374, iteration: 291407
loss: 0.9688648581504822,grad_norm: 0.8971931409445632, iteration: 291408
loss: 0.9543914794921875,grad_norm: 0.7558551588349268, iteration: 291409
loss: 1.024370551109314,grad_norm: 0.8091648132477491, iteration: 291410
loss: 1.0011482238769531,grad_norm: 0.7891434196219973, iteration: 291411
loss: 0.9991018772125244,grad_norm: 0.9878886589395781, iteration: 291412
loss: 1.1567760705947876,grad_norm: 0.9999993590957135, iteration: 291413
loss: 1.016833782196045,grad_norm: 0.8513385303603811, iteration: 291414
loss: 0.9413343667984009,grad_norm: 0.9078600933094593, iteration: 291415
loss: 1.0420753955841064,grad_norm: 0.9999998617666142, iteration: 291416
loss: 1.015113353729248,grad_norm: 0.8765018512400597, iteration: 291417
loss: 0.9869102835655212,grad_norm: 0.8964354284026633, iteration: 291418
loss: 1.0285004377365112,grad_norm: 0.8624909686304432, iteration: 291419
loss: 0.989700436592102,grad_norm: 0.9999991226424785, iteration: 291420
loss: 0.9878522157669067,grad_norm: 0.8704382761746754, iteration: 291421
loss: 0.9724161028862,grad_norm: 0.7898477564913822, iteration: 291422
loss: 0.9950889348983765,grad_norm: 0.9525096767388878, iteration: 291423
loss: 0.9763681888580322,grad_norm: 0.8935582109864459, iteration: 291424
loss: 0.9689220190048218,grad_norm: 0.9620926099417333, iteration: 291425
loss: 0.9871558547019958,grad_norm: 0.6753658801412068, iteration: 291426
loss: 1.025785207748413,grad_norm: 0.8628687456525541, iteration: 291427
loss: 1.0062326192855835,grad_norm: 0.8000618492224524, iteration: 291428
loss: 0.991415798664093,grad_norm: 0.8022200187696342, iteration: 291429
loss: 0.9947612285614014,grad_norm: 0.9999990845705402, iteration: 291430
loss: 1.040327548980713,grad_norm: 0.9999991681031922, iteration: 291431
loss: 1.00839102268219,grad_norm: 0.9999995939643948, iteration: 291432
loss: 0.9916390180587769,grad_norm: 0.8733057297854724, iteration: 291433
loss: 0.9833304286003113,grad_norm: 0.9999994742159637, iteration: 291434
loss: 1.0079418420791626,grad_norm: 0.8251658446515913, iteration: 291435
loss: 1.0007009506225586,grad_norm: 0.847884352784056, iteration: 291436
loss: 1.047344446182251,grad_norm: 0.9999990485594241, iteration: 291437
loss: 0.9598172307014465,grad_norm: 0.6936255576355488, iteration: 291438
loss: 1.007676601409912,grad_norm: 0.9169041332114684, iteration: 291439
loss: 1.0165984630584717,grad_norm: 0.999999221540071, iteration: 291440
loss: 1.0213356018066406,grad_norm: 0.9820656333535488, iteration: 291441
loss: 1.0594992637634277,grad_norm: 0.9999992992369112, iteration: 291442
loss: 1.0262563228607178,grad_norm: 0.8818988083630943, iteration: 291443
loss: 0.9711982011795044,grad_norm: 0.8951563855585001, iteration: 291444
loss: 0.9791809916496277,grad_norm: 0.9999991655893149, iteration: 291445
loss: 1.0367918014526367,grad_norm: 0.9999992920250351, iteration: 291446
loss: 1.0643339157104492,grad_norm: 0.9999997371800445, iteration: 291447
loss: 0.9652582406997681,grad_norm: 0.8132790750204046, iteration: 291448
loss: 1.0150524377822876,grad_norm: 0.9602682862019525, iteration: 291449
loss: 0.9855034947395325,grad_norm: 0.8187036791800754, iteration: 291450
loss: 1.0645917654037476,grad_norm: 0.9999992116345238, iteration: 291451
loss: 1.0226539373397827,grad_norm: 0.9999992265869297, iteration: 291452
loss: 0.9856279492378235,grad_norm: 0.9545455844838154, iteration: 291453
loss: 0.9907055497169495,grad_norm: 0.9040654291209437, iteration: 291454
loss: 1.023789882659912,grad_norm: 0.9595944851531049, iteration: 291455
loss: 1.0036062002182007,grad_norm: 0.909882894157429, iteration: 291456
loss: 1.0034443140029907,grad_norm: 0.9624492257617896, iteration: 291457
loss: 0.9722328782081604,grad_norm: 0.8590827173230886, iteration: 291458
loss: 0.9403395056724548,grad_norm: 0.933482631359719, iteration: 291459
loss: 1.0248061418533325,grad_norm: 0.9999991883791801, iteration: 291460
loss: 1.0109281539916992,grad_norm: 0.9153448899106414, iteration: 291461
loss: 1.0442076921463013,grad_norm: 0.9999992687645655, iteration: 291462
loss: 1.0182359218597412,grad_norm: 0.9238914032125606, iteration: 291463
loss: 0.9918888211250305,grad_norm: 0.7868051193995371, iteration: 291464
loss: 1.0108494758605957,grad_norm: 0.8218684281091799, iteration: 291465
loss: 1.0333598852157593,grad_norm: 0.9999991517635729, iteration: 291466
loss: 0.962056040763855,grad_norm: 0.7650803385011888, iteration: 291467
loss: 0.9779394268989563,grad_norm: 0.7846136701225408, iteration: 291468
loss: 1.0176308155059814,grad_norm: 0.9999991079051976, iteration: 291469
loss: 1.0699130296707153,grad_norm: 0.9999998075450093, iteration: 291470
loss: 0.9679440855979919,grad_norm: 0.9387023920830498, iteration: 291471
loss: 1.02485191822052,grad_norm: 0.8462501667038475, iteration: 291472
loss: 0.9579155445098877,grad_norm: 0.9002893542370171, iteration: 291473
loss: 0.980204701423645,grad_norm: 0.8150973960929212, iteration: 291474
loss: 1.0322104692459106,grad_norm: 0.9999996830834752, iteration: 291475
loss: 0.9865013957023621,grad_norm: 0.8295089863824578, iteration: 291476
loss: 1.0257502794265747,grad_norm: 0.815409329503134, iteration: 291477
loss: 1.0161985158920288,grad_norm: 0.9999991982958729, iteration: 291478
loss: 1.0128751993179321,grad_norm: 0.8343317423963867, iteration: 291479
loss: 1.0369999408721924,grad_norm: 0.9339584603151385, iteration: 291480
loss: 0.9749543070793152,grad_norm: 0.7559661604417381, iteration: 291481
loss: 1.0239797830581665,grad_norm: 0.7808720777519634, iteration: 291482
loss: 1.0261117219924927,grad_norm: 0.8274407354447111, iteration: 291483
loss: 1.015095829963684,grad_norm: 0.8803601349781828, iteration: 291484
loss: 1.0024040937423706,grad_norm: 0.7546903672123058, iteration: 291485
loss: 0.9862299561500549,grad_norm: 0.8099264315001171, iteration: 291486
loss: 0.9699850678443909,grad_norm: 0.7875239867519367, iteration: 291487
loss: 1.0797511339187622,grad_norm: 0.7176668041933932, iteration: 291488
loss: 1.048671841621399,grad_norm: 0.9999994483293398, iteration: 291489
loss: 1.0327459573745728,grad_norm: 0.9999990302663705, iteration: 291490
loss: 0.9701719284057617,grad_norm: 0.772216399536961, iteration: 291491
loss: 0.996802031993866,grad_norm: 0.9844824826802616, iteration: 291492
loss: 1.0452805757522583,grad_norm: 0.9999997506008447, iteration: 291493
loss: 0.9628071784973145,grad_norm: 0.9184965887963508, iteration: 291494
loss: 1.0248173475265503,grad_norm: 0.9999990847525507, iteration: 291495
loss: 1.0082961320877075,grad_norm: 0.8458200693736916, iteration: 291496
loss: 1.0137983560562134,grad_norm: 0.9595589304328977, iteration: 291497
loss: 0.9676228165626526,grad_norm: 0.8113587211733713, iteration: 291498
loss: 1.0012931823730469,grad_norm: 0.8681811822461355, iteration: 291499
loss: 1.0332962274551392,grad_norm: 0.8714804332907167, iteration: 291500
loss: 0.9981495141983032,grad_norm: 0.6913695719084678, iteration: 291501
loss: 1.0144973993301392,grad_norm: 0.9999996224692379, iteration: 291502
loss: 1.0106780529022217,grad_norm: 0.8700393188036383, iteration: 291503
loss: 0.9958612322807312,grad_norm: 0.8695700553050439, iteration: 291504
loss: 1.023952841758728,grad_norm: 0.7349883620648322, iteration: 291505
loss: 0.9964949488639832,grad_norm: 0.9286093539953134, iteration: 291506
loss: 0.966009795665741,grad_norm: 0.8556599860885221, iteration: 291507
loss: 0.987908661365509,grad_norm: 0.9999994301366427, iteration: 291508
loss: 1.0182965993881226,grad_norm: 0.8293491458859692, iteration: 291509
loss: 1.058197021484375,grad_norm: 0.9999998383700031, iteration: 291510
loss: 0.987083375453949,grad_norm: 0.9999996146958819, iteration: 291511
loss: 0.9966233968734741,grad_norm: 0.9999992494615992, iteration: 291512
loss: 1.014997959136963,grad_norm: 0.8304202131028101, iteration: 291513
loss: 1.0301405191421509,grad_norm: 0.9151926434612297, iteration: 291514
loss: 1.009230375289917,grad_norm: 0.9999991426773142, iteration: 291515
loss: 1.0145747661590576,grad_norm: 0.9003444465266218, iteration: 291516
loss: 0.9966012835502625,grad_norm: 0.8063523596140324, iteration: 291517
loss: 1.0066468715667725,grad_norm: 0.8138970368103933, iteration: 291518
loss: 1.0264441967010498,grad_norm: 0.9999990816064694, iteration: 291519
loss: 0.9884321093559265,grad_norm: 0.8638434022219826, iteration: 291520
loss: 0.990151584148407,grad_norm: 0.9999991321954192, iteration: 291521
loss: 0.9848308563232422,grad_norm: 0.7670997902803312, iteration: 291522
loss: 1.0062676668167114,grad_norm: 0.9999999342773523, iteration: 291523
loss: 1.0019230842590332,grad_norm: 0.999999214256687, iteration: 291524
loss: 1.0135571956634521,grad_norm: 0.7860271152981972, iteration: 291525
loss: 0.9992292523384094,grad_norm: 0.8206546288512059, iteration: 291526
loss: 0.9940906167030334,grad_norm: 0.6945837567389241, iteration: 291527
loss: 0.9782279133796692,grad_norm: 0.9394007204554073, iteration: 291528
loss: 1.0809723138809204,grad_norm: 0.9449392678339058, iteration: 291529
loss: 0.9969987869262695,grad_norm: 0.8589882615719902, iteration: 291530
loss: 1.0157197713851929,grad_norm: 0.77032591082183, iteration: 291531
loss: 1.005711317062378,grad_norm: 0.8690480354298533, iteration: 291532
loss: 1.0236583948135376,grad_norm: 0.847733400473979, iteration: 291533
loss: 0.9984445571899414,grad_norm: 0.7414482168976869, iteration: 291534
loss: 1.0410189628601074,grad_norm: 0.9316727623549127, iteration: 291535
loss: 1.0224475860595703,grad_norm: 0.7773217800557525, iteration: 291536
loss: 0.9875207543373108,grad_norm: 0.999999290457571, iteration: 291537
loss: 0.9858189821243286,grad_norm: 0.99999896066738, iteration: 291538
loss: 0.9945478439331055,grad_norm: 0.9654096873014703, iteration: 291539
loss: 0.9895829558372498,grad_norm: 0.8893142590006051, iteration: 291540
loss: 1.0113794803619385,grad_norm: 0.8359872191809228, iteration: 291541
loss: 1.0071666240692139,grad_norm: 0.7750738457383433, iteration: 291542
loss: 1.0297439098358154,grad_norm: 0.9545462823601236, iteration: 291543
loss: 1.0009098052978516,grad_norm: 0.8923041421669107, iteration: 291544
loss: 1.0274920463562012,grad_norm: 0.8148711999988011, iteration: 291545
loss: 0.997181236743927,grad_norm: 0.999999017940777, iteration: 291546
loss: 0.9797682762145996,grad_norm: 0.8781592453497744, iteration: 291547
loss: 0.9962043762207031,grad_norm: 0.9999990660361614, iteration: 291548
loss: 0.9754828810691833,grad_norm: 0.9999991872160372, iteration: 291549
loss: 1.1929489374160767,grad_norm: 0.9999992239155733, iteration: 291550
loss: 0.9990270733833313,grad_norm: 0.9634724920582359, iteration: 291551
loss: 0.9567273855209351,grad_norm: 0.9999991231341488, iteration: 291552
loss: 1.0119130611419678,grad_norm: 0.8628516117098204, iteration: 291553
loss: 1.01145601272583,grad_norm: 0.7646676296978215, iteration: 291554
loss: 0.9954978227615356,grad_norm: 0.7873220535638615, iteration: 291555
loss: 0.9608974456787109,grad_norm: 0.7146898828024467, iteration: 291556
loss: 1.00797438621521,grad_norm: 0.7959818144753118, iteration: 291557
loss: 1.026159405708313,grad_norm: 0.774535691192926, iteration: 291558
loss: 0.982707142829895,grad_norm: 0.978880504290115, iteration: 291559
loss: 1.055943489074707,grad_norm: 0.9999991176706617, iteration: 291560
loss: 0.9903839230537415,grad_norm: 0.9563225645781859, iteration: 291561
loss: 1.0276118516921997,grad_norm: 0.8945039409176029, iteration: 291562
loss: 1.010555624961853,grad_norm: 0.9999991430791468, iteration: 291563
loss: 1.0374102592468262,grad_norm: 0.9217360583553293, iteration: 291564
loss: 1.0109686851501465,grad_norm: 0.9413161905167279, iteration: 291565
loss: 1.1263360977172852,grad_norm: 0.8989552779633386, iteration: 291566
loss: 0.961273193359375,grad_norm: 0.8301058388329953, iteration: 291567
loss: 0.9699514508247375,grad_norm: 0.999999125580996, iteration: 291568
loss: 1.0103157758712769,grad_norm: 0.7721332005540851, iteration: 291569
loss: 1.0525091886520386,grad_norm: 0.9710701870207098, iteration: 291570
loss: 0.982434868812561,grad_norm: 0.8656603548476226, iteration: 291571
loss: 0.9779590964317322,grad_norm: 0.9398232505087423, iteration: 291572
loss: 1.008355975151062,grad_norm: 0.9118289418178709, iteration: 291573
loss: 1.004159927368164,grad_norm: 0.9999996451604232, iteration: 291574
loss: 1.0377064943313599,grad_norm: 0.9999992994067889, iteration: 291575
loss: 0.9747686982154846,grad_norm: 0.9604787240341774, iteration: 291576
loss: 0.9989058375358582,grad_norm: 0.9577880067048299, iteration: 291577
loss: 0.9963782429695129,grad_norm: 0.7606887420102044, iteration: 291578
loss: 1.0020695924758911,grad_norm: 0.7413063708247799, iteration: 291579
loss: 1.0324195623397827,grad_norm: 0.9417702133117937, iteration: 291580
loss: 1.0351680517196655,grad_norm: 0.8503837283793262, iteration: 291581
loss: 1.012076735496521,grad_norm: 0.9064681287717425, iteration: 291582
loss: 1.0279792547225952,grad_norm: 0.9169582262786233, iteration: 291583
loss: 1.133374571800232,grad_norm: 0.9999993959453802, iteration: 291584
loss: 0.9594703316688538,grad_norm: 0.7488663460127071, iteration: 291585
loss: 1.0116913318634033,grad_norm: 0.9999992579918683, iteration: 291586
loss: 1.0358622074127197,grad_norm: 0.7692988776009587, iteration: 291587
loss: 0.9916526675224304,grad_norm: 0.9999991211795246, iteration: 291588
loss: 1.0125480890274048,grad_norm: 0.9181713081242704, iteration: 291589
loss: 1.0131630897521973,grad_norm: 0.939710227657762, iteration: 291590
loss: 1.007016897201538,grad_norm: 0.8421042101540168, iteration: 291591
loss: 1.0273905992507935,grad_norm: 0.9999990469833167, iteration: 291592
loss: 0.980302631855011,grad_norm: 0.6814037950996049, iteration: 291593
loss: 1.017454981803894,grad_norm: 0.8758720113402635, iteration: 291594
loss: 1.0104308128356934,grad_norm: 0.9261570743408186, iteration: 291595
loss: 0.9915711283683777,grad_norm: 0.8427748038761347, iteration: 291596
loss: 0.9651060104370117,grad_norm: 0.9708812918369583, iteration: 291597
loss: 0.9905398488044739,grad_norm: 0.9500700467341087, iteration: 291598
loss: 0.9394676685333252,grad_norm: 0.897422553651981, iteration: 291599
loss: 0.9841989278793335,grad_norm: 0.7933474889937783, iteration: 291600
loss: 1.0050617456436157,grad_norm: 0.7946492103440508, iteration: 291601
loss: 1.0333776473999023,grad_norm: 0.8627986068567648, iteration: 291602
loss: 0.9808911085128784,grad_norm: 0.7910988774937918, iteration: 291603
loss: 0.9978349804878235,grad_norm: 0.7870092898822685, iteration: 291604
loss: 0.9891068935394287,grad_norm: 0.9999992397205464, iteration: 291605
loss: 0.9613970518112183,grad_norm: 0.8049824080255413, iteration: 291606
loss: 0.9811750650405884,grad_norm: 0.999999250915347, iteration: 291607
loss: 0.9865550994873047,grad_norm: 0.9999990318490167, iteration: 291608
loss: 1.0192879438400269,grad_norm: 0.9402002324478571, iteration: 291609
loss: 0.9942967891693115,grad_norm: 0.977295875562241, iteration: 291610
loss: 0.9575385451316833,grad_norm: 0.9769834901090116, iteration: 291611
loss: 0.9546105861663818,grad_norm: 0.93046657584124, iteration: 291612
loss: 0.9580954313278198,grad_norm: 0.9420107367987268, iteration: 291613
loss: 0.9955945611000061,grad_norm: 0.8538219751278232, iteration: 291614
loss: 1.0450067520141602,grad_norm: 0.9999993375316171, iteration: 291615
loss: 1.0041667222976685,grad_norm: 0.9436584810480586, iteration: 291616
loss: 0.9974310398101807,grad_norm: 0.9875470905311167, iteration: 291617
loss: 1.0124001502990723,grad_norm: 0.9999998633218795, iteration: 291618
loss: 0.9747893810272217,grad_norm: 0.9999990448164586, iteration: 291619
loss: 0.9673877954483032,grad_norm: 0.8068583153929179, iteration: 291620
loss: 1.0447500944137573,grad_norm: 0.999999360708462, iteration: 291621
loss: 1.010053277015686,grad_norm: 0.7730308009143769, iteration: 291622
loss: 1.0160530805587769,grad_norm: 0.864724853243851, iteration: 291623
loss: 0.9778065085411072,grad_norm: 0.7882772408274022, iteration: 291624
loss: 1.0255197286605835,grad_norm: 0.9999990058444271, iteration: 291625
loss: 1.0489379167556763,grad_norm: 0.9999995671924141, iteration: 291626
loss: 1.0442677736282349,grad_norm: 0.9999997430225349, iteration: 291627
loss: 0.9823455810546875,grad_norm: 0.9645026736412925, iteration: 291628
loss: 0.9794135689735413,grad_norm: 0.840127771426326, iteration: 291629
loss: 1.0470612049102783,grad_norm: 0.9999996523678667, iteration: 291630
loss: 1.0259701013565063,grad_norm: 0.7242115514122958, iteration: 291631
loss: 1.0499690771102905,grad_norm: 0.9311536271598977, iteration: 291632
loss: 1.0028257369995117,grad_norm: 0.9999990481538176, iteration: 291633
loss: 0.9870175719261169,grad_norm: 0.9894352293580181, iteration: 291634
loss: 1.1402473449707031,grad_norm: 0.9999991170577675, iteration: 291635
loss: 1.009463906288147,grad_norm: 0.9999989236423164, iteration: 291636
loss: 0.9918100237846375,grad_norm: 0.9651676031861852, iteration: 291637
loss: 1.013462781906128,grad_norm: 0.9435678804156864, iteration: 291638
loss: 1.0415111780166626,grad_norm: 0.9999997645758275, iteration: 291639
loss: 0.9938880801200867,grad_norm: 0.9999990361304565, iteration: 291640
loss: 1.0471810102462769,grad_norm: 0.6882118060734786, iteration: 291641
loss: 1.0131568908691406,grad_norm: 0.7562463887252984, iteration: 291642
loss: 0.9927997589111328,grad_norm: 0.9999991427107559, iteration: 291643
loss: 1.020583987236023,grad_norm: 0.8339873747687432, iteration: 291644
loss: 0.982746958732605,grad_norm: 0.8697716555693911, iteration: 291645
loss: 0.9978739619255066,grad_norm: 0.9128323862577967, iteration: 291646
loss: 1.0169931650161743,grad_norm: 0.8788123166276255, iteration: 291647
loss: 0.9851329922676086,grad_norm: 0.8748096559967723, iteration: 291648
loss: 0.98929363489151,grad_norm: 0.9459302832532663, iteration: 291649
loss: 0.9689566493034363,grad_norm: 0.8495552227347267, iteration: 291650
loss: 0.9703091979026794,grad_norm: 0.7972125841505923, iteration: 291651
loss: 0.9869668483734131,grad_norm: 0.8695380063816281, iteration: 291652
loss: 1.002174735069275,grad_norm: 0.8067188165820731, iteration: 291653
loss: 1.0207805633544922,grad_norm: 0.7872170053296024, iteration: 291654
loss: 0.9695056080818176,grad_norm: 0.831298007580053, iteration: 291655
loss: 1.0326590538024902,grad_norm: 0.8438472889848452, iteration: 291656
loss: 0.9782164096832275,grad_norm: 0.9999991566774182, iteration: 291657
loss: 1.0051078796386719,grad_norm: 0.7970376716707644, iteration: 291658
loss: 1.00160813331604,grad_norm: 0.740410669653214, iteration: 291659
loss: 1.0092933177947998,grad_norm: 0.7674608896968018, iteration: 291660
loss: 1.0236605405807495,grad_norm: 0.7805362286675575, iteration: 291661
loss: 1.0157830715179443,grad_norm: 0.8466415589062889, iteration: 291662
loss: 1.0102784633636475,grad_norm: 0.8865413006837951, iteration: 291663
loss: 0.95949387550354,grad_norm: 0.9006637698554063, iteration: 291664
loss: 1.0235328674316406,grad_norm: 0.9999996990663644, iteration: 291665
loss: 1.0293926000595093,grad_norm: 0.7304116134791843, iteration: 291666
loss: 0.9768037796020508,grad_norm: 0.8650058832481705, iteration: 291667
loss: 0.9782140851020813,grad_norm: 0.9999993707585425, iteration: 291668
loss: 1.0044814348220825,grad_norm: 0.7361674110731985, iteration: 291669
loss: 0.9995625019073486,grad_norm: 0.7549547809544593, iteration: 291670
loss: 1.0370830297470093,grad_norm: 0.9999994759131186, iteration: 291671
loss: 1.0489192008972168,grad_norm: 0.9006483722620754, iteration: 291672
loss: 1.0032001733779907,grad_norm: 0.8218497071811538, iteration: 291673
loss: 0.9924774169921875,grad_norm: 0.9929165807901575, iteration: 291674
loss: 1.0688047409057617,grad_norm: 0.9999999529847055, iteration: 291675
loss: 0.9873908758163452,grad_norm: 0.7658910634693177, iteration: 291676
loss: 0.9810037612915039,grad_norm: 0.8289467918295353, iteration: 291677
loss: 1.0318365097045898,grad_norm: 0.9999999479190493, iteration: 291678
loss: 0.9919918179512024,grad_norm: 0.8717890504187842, iteration: 291679
loss: 1.024649739265442,grad_norm: 0.8377316471405797, iteration: 291680
loss: 1.0063259601593018,grad_norm: 0.7477523956034444, iteration: 291681
loss: 0.9787084460258484,grad_norm: 0.9184914011535665, iteration: 291682
loss: 0.9865131378173828,grad_norm: 0.8833204120992482, iteration: 291683
loss: 0.966571033000946,grad_norm: 0.9999991522155701, iteration: 291684
loss: 1.0008409023284912,grad_norm: 0.8332514472201707, iteration: 291685
loss: 1.0005714893341064,grad_norm: 0.7375099748001509, iteration: 291686
loss: 1.010353684425354,grad_norm: 0.770950421860145, iteration: 291687
loss: 0.9965806007385254,grad_norm: 0.8559343054888913, iteration: 291688
loss: 1.1007227897644043,grad_norm: 0.7936650694203107, iteration: 291689
loss: 0.9782092571258545,grad_norm: 0.8643891056743821, iteration: 291690
loss: 1.0046169757843018,grad_norm: 0.8125217409759374, iteration: 291691
loss: 1.0052767992019653,grad_norm: 0.9842555337101767, iteration: 291692
loss: 0.9785376191139221,grad_norm: 0.9999991995439363, iteration: 291693
loss: 0.9877985715866089,grad_norm: 0.956278443634417, iteration: 291694
loss: 1.0001400709152222,grad_norm: 0.9112593541861926, iteration: 291695
loss: 0.9926953911781311,grad_norm: 0.9999988591486761, iteration: 291696
loss: 1.0065807104110718,grad_norm: 0.873589213409735, iteration: 291697
loss: 1.0307461023330688,grad_norm: 0.8378261517381601, iteration: 291698
loss: 0.9988691210746765,grad_norm: 0.9999991541991685, iteration: 291699
loss: 1.0249640941619873,grad_norm: 0.9999995604360237, iteration: 291700
loss: 1.0200754404067993,grad_norm: 0.9999990753582223, iteration: 291701
loss: 0.9890616536140442,grad_norm: 0.7577714274085744, iteration: 291702
loss: 0.9851043820381165,grad_norm: 0.8439537339357355, iteration: 291703
loss: 1.074077844619751,grad_norm: 0.9999991416245764, iteration: 291704
loss: 0.9809327721595764,grad_norm: 0.7533820875608569, iteration: 291705
loss: 1.0072693824768066,grad_norm: 0.9530793814216579, iteration: 291706
loss: 1.0534216165542603,grad_norm: 0.9999996964518807, iteration: 291707
loss: 1.0219035148620605,grad_norm: 0.7748058668138562, iteration: 291708
loss: 1.0193992853164673,grad_norm: 0.8900977829712575, iteration: 291709
loss: 1.1050111055374146,grad_norm: 0.9999992876043757, iteration: 291710
loss: 1.2419712543487549,grad_norm: 0.9999990932387968, iteration: 291711
loss: 1.00272798538208,grad_norm: 0.808525227427606, iteration: 291712
loss: 1.0410127639770508,grad_norm: 0.9999991254580873, iteration: 291713
loss: 1.034614086151123,grad_norm: 0.920545820812486, iteration: 291714
loss: 1.0000625848770142,grad_norm: 0.952355423715849, iteration: 291715
loss: 0.9890109300613403,grad_norm: 0.7891867583411619, iteration: 291716
loss: 1.0145800113677979,grad_norm: 0.999999093274333, iteration: 291717
loss: 1.013384461402893,grad_norm: 0.8852233366742102, iteration: 291718
loss: 0.9506489038467407,grad_norm: 0.9999989678468244, iteration: 291719
loss: 1.0086709260940552,grad_norm: 0.7620602928292389, iteration: 291720
loss: 1.0198179483413696,grad_norm: 0.9636612510228473, iteration: 291721
loss: 0.9868102073669434,grad_norm: 0.7323316548697314, iteration: 291722
loss: 1.0051363706588745,grad_norm: 0.9999989505378297, iteration: 291723
loss: 1.0735431909561157,grad_norm: 0.906358796763775, iteration: 291724
loss: 0.9605844616889954,grad_norm: 0.9999990557677659, iteration: 291725
loss: 1.0221880674362183,grad_norm: 0.9999991938532284, iteration: 291726
loss: 1.0073697566986084,grad_norm: 0.875426699515811, iteration: 291727
loss: 1.0087380409240723,grad_norm: 0.9150137914991632, iteration: 291728
loss: 1.0116406679153442,grad_norm: 0.9999994882859655, iteration: 291729
loss: 0.9881996512413025,grad_norm: 0.9999995733685864, iteration: 291730
loss: 0.9868219494819641,grad_norm: 0.9187590327030211, iteration: 291731
loss: 0.9714710712432861,grad_norm: 0.9545934364979678, iteration: 291732
loss: 1.0225422382354736,grad_norm: 0.9756852532440753, iteration: 291733
loss: 1.0263769626617432,grad_norm: 0.935832605707155, iteration: 291734
loss: 0.9916124939918518,grad_norm: 0.891450268191711, iteration: 291735
loss: 1.0192701816558838,grad_norm: 0.9052185365950295, iteration: 291736
loss: 1.0162546634674072,grad_norm: 0.7917006939797753, iteration: 291737
loss: 1.045210838317871,grad_norm: 0.9246379747219233, iteration: 291738
loss: 1.0991255044937134,grad_norm: 0.9999999383882293, iteration: 291739
loss: 1.0144586563110352,grad_norm: 0.8341019553144801, iteration: 291740
loss: 1.0491586923599243,grad_norm: 0.9074522771914328, iteration: 291741
loss: 1.0329232215881348,grad_norm: 0.9167738549932892, iteration: 291742
loss: 1.0496710538864136,grad_norm: 0.9852412475200302, iteration: 291743
loss: 1.0281462669372559,grad_norm: 0.7147100131667795, iteration: 291744
loss: 0.9753735065460205,grad_norm: 0.9228635479902124, iteration: 291745
loss: 1.008460283279419,grad_norm: 0.8414941852434379, iteration: 291746
loss: 0.972219705581665,grad_norm: 0.9999991331581836, iteration: 291747
loss: 0.9891544580459595,grad_norm: 0.9698697689261697, iteration: 291748
loss: 0.9788066148757935,grad_norm: 0.8548983134921604, iteration: 291749
loss: 0.9849414825439453,grad_norm: 0.9999989712258703, iteration: 291750
loss: 0.9988675117492676,grad_norm: 0.7889270373407828, iteration: 291751
loss: 0.9911675453186035,grad_norm: 0.8178709243649576, iteration: 291752
loss: 1.037555456161499,grad_norm: 0.8664883428747089, iteration: 291753
loss: 1.005913496017456,grad_norm: 0.9999990964581219, iteration: 291754
loss: 1.0123271942138672,grad_norm: 0.9999990835715512, iteration: 291755
loss: 0.959771454334259,grad_norm: 0.8145775011032923, iteration: 291756
loss: 0.9907312393188477,grad_norm: 0.7922671999123905, iteration: 291757
loss: 1.0056039094924927,grad_norm: 0.9999992203004245, iteration: 291758
loss: 1.0300955772399902,grad_norm: 0.9999999630348148, iteration: 291759
loss: 0.9839937686920166,grad_norm: 0.9999999235419088, iteration: 291760
loss: 0.9743277430534363,grad_norm: 0.7793202988709284, iteration: 291761
loss: 0.9769281148910522,grad_norm: 0.8857305129887281, iteration: 291762
loss: 1.003678560256958,grad_norm: 0.8628359440574032, iteration: 291763
loss: 1.0012351274490356,grad_norm: 0.6478471162306225, iteration: 291764
loss: 1.0130151510238647,grad_norm: 0.8766234430278017, iteration: 291765
loss: 1.0150643587112427,grad_norm: 0.8108929424285977, iteration: 291766
loss: 1.017868995666504,grad_norm: 0.9999995333527195, iteration: 291767
loss: 1.0190166234970093,grad_norm: 0.7344778843280441, iteration: 291768
loss: 1.011767864227295,grad_norm: 0.8054783950281512, iteration: 291769
loss: 1.0040955543518066,grad_norm: 0.8647547931451134, iteration: 291770
loss: 0.9940181970596313,grad_norm: 0.8316062834874224, iteration: 291771
loss: 0.9959447979927063,grad_norm: 0.973983677913128, iteration: 291772
loss: 1.0062028169631958,grad_norm: 0.9271063479268757, iteration: 291773
loss: 1.0036734342575073,grad_norm: 0.841253151509429, iteration: 291774
loss: 1.01975417137146,grad_norm: 0.7659379640286526, iteration: 291775
loss: 1.057804822921753,grad_norm: 0.9683965541403065, iteration: 291776
loss: 0.9964178800582886,grad_norm: 0.804376367843592, iteration: 291777
loss: 0.9793599843978882,grad_norm: 0.8410606213258501, iteration: 291778
loss: 1.0169843435287476,grad_norm: 0.9999998972628461, iteration: 291779
loss: 1.0035158395767212,grad_norm: 0.861220818643142, iteration: 291780
loss: 0.9674325585365295,grad_norm: 0.9125778346452837, iteration: 291781
loss: 0.9903585314750671,grad_norm: 0.9354380474007672, iteration: 291782
loss: 1.0238131284713745,grad_norm: 0.9801988195632786, iteration: 291783
loss: 1.057315707206726,grad_norm: 0.9341973904445513, iteration: 291784
loss: 1.0352615118026733,grad_norm: 0.9999998543013366, iteration: 291785
loss: 1.0295917987823486,grad_norm: 0.954548508243995, iteration: 291786
loss: 1.0669106245040894,grad_norm: 0.7579714290826055, iteration: 291787
loss: 1.02048921585083,grad_norm: 0.9748520435596312, iteration: 291788
loss: 1.0142295360565186,grad_norm: 0.7258108294956935, iteration: 291789
loss: 1.0325648784637451,grad_norm: 0.8815771005125558, iteration: 291790
loss: 1.1164346933364868,grad_norm: 1.0000000130122124, iteration: 291791
loss: 1.0219197273254395,grad_norm: 0.8522681592488747, iteration: 291792
loss: 1.0350701808929443,grad_norm: 0.9999991942261902, iteration: 291793
loss: 1.0262019634246826,grad_norm: 0.9999991998928054, iteration: 291794
loss: 0.9639115333557129,grad_norm: 0.737246524200612, iteration: 291795
loss: 0.9633870720863342,grad_norm: 0.7681578052756509, iteration: 291796
loss: 1.0017257928848267,grad_norm: 0.9999992318457244, iteration: 291797
loss: 0.9742178916931152,grad_norm: 0.9999990559224735, iteration: 291798
loss: 1.0147099494934082,grad_norm: 0.9980829336131745, iteration: 291799
loss: 1.0650949478149414,grad_norm: 0.999999341871193, iteration: 291800
loss: 1.045310378074646,grad_norm: 0.8408403101554094, iteration: 291801
loss: 1.0351096391677856,grad_norm: 0.9546402482856897, iteration: 291802
loss: 0.9828341603279114,grad_norm: 0.8574734419677512, iteration: 291803
loss: 1.0252622365951538,grad_norm: 0.8255200762039956, iteration: 291804
loss: 1.0258840322494507,grad_norm: 0.8362914237686314, iteration: 291805
loss: 0.9959230422973633,grad_norm: 0.8978634177433312, iteration: 291806
loss: 0.9692738056182861,grad_norm: 0.8530814656125184, iteration: 291807
loss: 1.0309169292449951,grad_norm: 0.8380081767594287, iteration: 291808
loss: 1.007751226425171,grad_norm: 0.7930002319720776, iteration: 291809
loss: 0.9899349808692932,grad_norm: 0.9519009244759952, iteration: 291810
loss: 1.0085519552230835,grad_norm: 0.9999998043057343, iteration: 291811
loss: 1.0173838138580322,grad_norm: 0.7555662470264652, iteration: 291812
loss: 1.018592119216919,grad_norm: 0.9999989581448989, iteration: 291813
loss: 1.0399107933044434,grad_norm: 0.8993872125094766, iteration: 291814
loss: 0.9974495768547058,grad_norm: 0.8012193893429413, iteration: 291815
loss: 1.0174715518951416,grad_norm: 0.877852150721144, iteration: 291816
loss: 0.9990648627281189,grad_norm: 0.7930099954033802, iteration: 291817
loss: 0.9794631004333496,grad_norm: 0.7577134203359469, iteration: 291818
loss: 0.9756127595901489,grad_norm: 0.8099082513728249, iteration: 291819
loss: 1.0009056329727173,grad_norm: 0.8255280253670628, iteration: 291820
loss: 0.9611184000968933,grad_norm: 0.8547865176669123, iteration: 291821
loss: 1.0234479904174805,grad_norm: 0.9329458097116093, iteration: 291822
loss: 0.9651784896850586,grad_norm: 0.8742309434706174, iteration: 291823
loss: 1.0089524984359741,grad_norm: 0.8005144011096221, iteration: 291824
loss: 1.009935975074768,grad_norm: 0.9999992890890873, iteration: 291825
loss: 0.9843058586120605,grad_norm: 0.9853146898328684, iteration: 291826
loss: 1.0007965564727783,grad_norm: 0.9725156876999993, iteration: 291827
loss: 1.0078250169754028,grad_norm: 0.9664011682994316, iteration: 291828
loss: 1.0338002443313599,grad_norm: 0.8210476197541433, iteration: 291829
loss: 0.9872035384178162,grad_norm: 0.9431221856114169, iteration: 291830
loss: 0.9910473823547363,grad_norm: 0.8461656058631017, iteration: 291831
loss: 1.0243016481399536,grad_norm: 0.7504260907862843, iteration: 291832
loss: 1.0172728300094604,grad_norm: 0.8266826413465655, iteration: 291833
loss: 1.0184470415115356,grad_norm: 0.9240964621313459, iteration: 291834
loss: 1.0060911178588867,grad_norm: 0.7749632177354181, iteration: 291835
loss: 1.0262738466262817,grad_norm: 0.9999990434848841, iteration: 291836
loss: 1.0074955224990845,grad_norm: 0.8777998067261107, iteration: 291837
loss: 0.9817913770675659,grad_norm: 0.9999991172188911, iteration: 291838
loss: 0.9846309423446655,grad_norm: 0.7801520610848003, iteration: 291839
loss: 1.0159941911697388,grad_norm: 0.9155711174718527, iteration: 291840
loss: 1.0103086233139038,grad_norm: 0.9775614387473593, iteration: 291841
loss: 0.9754525423049927,grad_norm: 0.9270038732385577, iteration: 291842
loss: 0.9837822318077087,grad_norm: 0.7306914628042085, iteration: 291843
loss: 0.9713419675827026,grad_norm: 0.9508714936436179, iteration: 291844
loss: 1.0514575242996216,grad_norm: 0.9999994432166891, iteration: 291845
loss: 1.0160633325576782,grad_norm: 0.7596928461495578, iteration: 291846
loss: 1.0052998065948486,grad_norm: 0.9571427965140256, iteration: 291847
loss: 0.9760568141937256,grad_norm: 0.9124587983061669, iteration: 291848
loss: 1.020374059677124,grad_norm: 0.6484192532272411, iteration: 291849
loss: 1.0510380268096924,grad_norm: 0.9855228476803546, iteration: 291850
loss: 1.019972324371338,grad_norm: 0.8436953515841259, iteration: 291851
loss: 1.0390207767486572,grad_norm: 0.9999994134386904, iteration: 291852
loss: 1.014209270477295,grad_norm: 0.9229457561191664, iteration: 291853
loss: 0.9946846961975098,grad_norm: 0.8390935873648563, iteration: 291854
loss: 0.9771180748939514,grad_norm: 0.9410167915076093, iteration: 291855
loss: 1.0316245555877686,grad_norm: 0.9981074891588971, iteration: 291856
loss: 1.0031903982162476,grad_norm: 0.8525412728125097, iteration: 291857
loss: 1.0072071552276611,grad_norm: 0.9999990386091812, iteration: 291858
loss: 1.0173547267913818,grad_norm: 0.9452536631844096, iteration: 291859
loss: 1.0280272960662842,grad_norm: 0.9999992657188116, iteration: 291860
loss: 0.9956866502761841,grad_norm: 0.8311543463619177, iteration: 291861
loss: 1.0407919883728027,grad_norm: 0.8072447083739395, iteration: 291862
loss: 1.0129199028015137,grad_norm: 0.8611616286479344, iteration: 291863
loss: 0.9884521961212158,grad_norm: 0.8081806838472515, iteration: 291864
loss: 1.004348874092102,grad_norm: 0.8481536577748806, iteration: 291865
loss: 0.9762662053108215,grad_norm: 0.7996785714560499, iteration: 291866
loss: 0.9993956089019775,grad_norm: 0.8878398201522872, iteration: 291867
loss: 0.9544814825057983,grad_norm: 0.9999991908822187, iteration: 291868
loss: 0.9672864675521851,grad_norm: 0.7781707217378808, iteration: 291869
loss: 0.9622605443000793,grad_norm: 0.9771801128631724, iteration: 291870
loss: 1.0485528707504272,grad_norm: 0.9413729783926746, iteration: 291871
loss: 0.9818232655525208,grad_norm: 0.9834485906170136, iteration: 291872
loss: 1.0343152284622192,grad_norm: 0.8180145316298459, iteration: 291873
loss: 1.0323398113250732,grad_norm: 0.9999994006843214, iteration: 291874
loss: 1.0046350955963135,grad_norm: 0.8447409064190684, iteration: 291875
loss: 0.9538984894752502,grad_norm: 0.7600646166824072, iteration: 291876
loss: 0.989065945148468,grad_norm: 0.9474116655463294, iteration: 291877
loss: 0.9844285845756531,grad_norm: 0.8174895077647819, iteration: 291878
loss: 1.03069269657135,grad_norm: 0.747817972163096, iteration: 291879
loss: 1.0025520324707031,grad_norm: 0.9521106528708606, iteration: 291880
loss: 0.9951186180114746,grad_norm: 0.9421319379486486, iteration: 291881
loss: 1.0032856464385986,grad_norm: 0.9999990180481512, iteration: 291882
loss: 0.992702066898346,grad_norm: 0.8743393089090862, iteration: 291883
loss: 1.0257714986801147,grad_norm: 0.7004753519526508, iteration: 291884
loss: 1.0174049139022827,grad_norm: 0.865776987523961, iteration: 291885
loss: 1.001976490020752,grad_norm: 0.892676435382721, iteration: 291886
loss: 1.002181053161621,grad_norm: 0.8356480450841294, iteration: 291887
loss: 1.0002882480621338,grad_norm: 0.9999992830517749, iteration: 291888
loss: 1.012123703956604,grad_norm: 0.999999058205298, iteration: 291889
loss: 0.9782925844192505,grad_norm: 0.9194864175096036, iteration: 291890
loss: 1.0526772737503052,grad_norm: 0.9999998200606928, iteration: 291891
loss: 0.9765864014625549,grad_norm: 0.7366596752502046, iteration: 291892
loss: 0.9625541567802429,grad_norm: 0.9114813303850393, iteration: 291893
loss: 1.0054408311843872,grad_norm: 0.999999143380175, iteration: 291894
loss: 1.008154034614563,grad_norm: 0.9999991235459857, iteration: 291895
loss: 0.991637110710144,grad_norm: 0.9558697800798397, iteration: 291896
loss: 0.9954349994659424,grad_norm: 0.846536534471632, iteration: 291897
loss: 0.9964069724082947,grad_norm: 0.7973584325924438, iteration: 291898
loss: 0.97105872631073,grad_norm: 0.8318395590592085, iteration: 291899
loss: 0.9736974835395813,grad_norm: 0.8937807315697689, iteration: 291900
loss: 0.9949821829795837,grad_norm: 0.7977469650568545, iteration: 291901
loss: 1.0001698732376099,grad_norm: 0.7472522932983124, iteration: 291902
loss: 1.0105952024459839,grad_norm: 0.7586211571453975, iteration: 291903
loss: 0.9893195629119873,grad_norm: 0.8756837217663224, iteration: 291904
loss: 0.9766202569007874,grad_norm: 0.8582108165182563, iteration: 291905
loss: 1.0088081359863281,grad_norm: 0.8899578337075075, iteration: 291906
loss: 0.9899306297302246,grad_norm: 0.8411441142738733, iteration: 291907
loss: 1.0142464637756348,grad_norm: 0.8607332215214921, iteration: 291908
loss: 0.9722076654434204,grad_norm: 0.8978409625631081, iteration: 291909
loss: 1.0349546670913696,grad_norm: 0.8808192258047362, iteration: 291910
loss: 0.9833212494850159,grad_norm: 0.9999991415807541, iteration: 291911
loss: 0.9733636379241943,grad_norm: 0.9462109239323733, iteration: 291912
loss: 1.0024610757827759,grad_norm: 0.7296527535654105, iteration: 291913
loss: 1.0294411182403564,grad_norm: 0.7215610057041233, iteration: 291914
loss: 1.0139107704162598,grad_norm: 0.999999204658886, iteration: 291915
loss: 1.013563632965088,grad_norm: 0.6781564381311332, iteration: 291916
loss: 1.0148483514785767,grad_norm: 0.9999991224722528, iteration: 291917
loss: 0.9971004724502563,grad_norm: 0.8347718276698575, iteration: 291918
loss: 0.9886640906333923,grad_norm: 0.7848765113620418, iteration: 291919
loss: 0.9995193481445312,grad_norm: 0.8189269658539519, iteration: 291920
loss: 0.9877496361732483,grad_norm: 0.9174897361398325, iteration: 291921
loss: 1.0213663578033447,grad_norm: 0.7856422379068154, iteration: 291922
loss: 1.0049376487731934,grad_norm: 0.9999995128489529, iteration: 291923
loss: 1.018439769744873,grad_norm: 0.7569428876012897, iteration: 291924
loss: 0.9969637393951416,grad_norm: 0.8794941741361252, iteration: 291925
loss: 1.0120853185653687,grad_norm: 0.9999992418482968, iteration: 291926
loss: 0.9832302331924438,grad_norm: 0.9999993416875376, iteration: 291927
loss: 0.9984043836593628,grad_norm: 0.8426114635211561, iteration: 291928
loss: 1.0031771659851074,grad_norm: 0.8598240169904976, iteration: 291929
loss: 0.9881001710891724,grad_norm: 0.8161781597710138, iteration: 291930
loss: 0.9384306073188782,grad_norm: 0.8632300872164917, iteration: 291931
loss: 1.02092707157135,grad_norm: 0.8389156573841048, iteration: 291932
loss: 0.9992234706878662,grad_norm: 0.8322215761451562, iteration: 291933
loss: 0.9798163771629333,grad_norm: 0.818349898143671, iteration: 291934
loss: 1.0024657249450684,grad_norm: 0.9999990043212296, iteration: 291935
loss: 1.0188044309616089,grad_norm: 0.9875622625783521, iteration: 291936
loss: 0.9887461066246033,grad_norm: 0.6998963279033978, iteration: 291937
loss: 0.9998292922973633,grad_norm: 0.7611084897117566, iteration: 291938
loss: 0.9555951952934265,grad_norm: 0.8791415994947636, iteration: 291939
loss: 0.9485675692558289,grad_norm: 0.8693795795589543, iteration: 291940
loss: 1.0024834871292114,grad_norm: 0.7161387062219843, iteration: 291941
loss: 0.9906768798828125,grad_norm: 0.9709189153711649, iteration: 291942
loss: 0.9925185441970825,grad_norm: 0.9250348719618229, iteration: 291943
loss: 1.0255992412567139,grad_norm: 0.9999991562166929, iteration: 291944
loss: 1.0255324840545654,grad_norm: 0.8264769870922836, iteration: 291945
loss: 0.9569427371025085,grad_norm: 0.9247396619084901, iteration: 291946
loss: 1.0006846189498901,grad_norm: 0.9999991072302641, iteration: 291947
loss: 1.0153483152389526,grad_norm: 0.8878704926572369, iteration: 291948
loss: 0.9798224568367004,grad_norm: 0.765273124793556, iteration: 291949
loss: 0.9773332476615906,grad_norm: 0.7590364009395479, iteration: 291950
loss: 0.9556706547737122,grad_norm: 0.7097610184294312, iteration: 291951
loss: 0.9947559237480164,grad_norm: 0.8173153228600092, iteration: 291952
loss: 1.010423183441162,grad_norm: 0.8975481470158857, iteration: 291953
loss: 0.9769370555877686,grad_norm: 0.8700155963508871, iteration: 291954
loss: 1.031995177268982,grad_norm: 0.7317448574441615, iteration: 291955
loss: 0.9756909012794495,grad_norm: 0.8205249916313191, iteration: 291956
loss: 1.0130261182785034,grad_norm: 0.9999997591768747, iteration: 291957
loss: 0.990111768245697,grad_norm: 0.7284564107149611, iteration: 291958
loss: 0.9833489060401917,grad_norm: 0.8411381905545394, iteration: 291959
loss: 0.9815081357955933,grad_norm: 0.7530982762060046, iteration: 291960
loss: 0.9683951139450073,grad_norm: 0.7881192239249547, iteration: 291961
loss: 1.0116949081420898,grad_norm: 0.8664073816494334, iteration: 291962
loss: 0.9643872380256653,grad_norm: 0.7986890622094319, iteration: 291963
loss: 0.9552735686302185,grad_norm: 0.7283501959150093, iteration: 291964
loss: 1.0230977535247803,grad_norm: 0.8892788933541395, iteration: 291965
loss: 1.0825122594833374,grad_norm: 0.9999992071889835, iteration: 291966
loss: 0.9889594912528992,grad_norm: 0.7608824742577283, iteration: 291967
loss: 1.0209338665008545,grad_norm: 0.9151223190457427, iteration: 291968
loss: 1.0192952156066895,grad_norm: 0.9709433890248793, iteration: 291969
loss: 1.0164092779159546,grad_norm: 0.9999990918741488, iteration: 291970
loss: 1.0205421447753906,grad_norm: 0.7591525185918021, iteration: 291971
loss: 0.97874915599823,grad_norm: 0.829757128949028, iteration: 291972
loss: 1.0262900590896606,grad_norm: 0.8078813577412088, iteration: 291973
loss: 1.014559268951416,grad_norm: 0.96116001999357, iteration: 291974
loss: 1.0809401273727417,grad_norm: 0.9999994328385897, iteration: 291975
loss: 0.9610462188720703,grad_norm: 0.8781381440163281, iteration: 291976
loss: 1.042683720588684,grad_norm: 0.9999997955329722, iteration: 291977
loss: 0.9843200445175171,grad_norm: 0.7266178423715717, iteration: 291978
loss: 0.9941139221191406,grad_norm: 0.8812746532476591, iteration: 291979
loss: 0.9658432602882385,grad_norm: 0.7143784263308957, iteration: 291980
loss: 0.9822707176208496,grad_norm: 0.9999990882703493, iteration: 291981
loss: 0.9607006311416626,grad_norm: 0.9460410514849154, iteration: 291982
loss: 0.9715486168861389,grad_norm: 0.8888624149885942, iteration: 291983
loss: 0.9338610172271729,grad_norm: 0.857382482654799, iteration: 291984
loss: 0.9935963749885559,grad_norm: 0.9999994934410479, iteration: 291985
loss: 1.0186023712158203,grad_norm: 0.9149582209694573, iteration: 291986
loss: 1.0214790105819702,grad_norm: 0.9999990356828008, iteration: 291987
loss: 0.9904460310935974,grad_norm: 0.8007165975486492, iteration: 291988
loss: 1.0004249811172485,grad_norm: 0.9346125071749626, iteration: 291989
loss: 0.9992436766624451,grad_norm: 0.9632869454874712, iteration: 291990
loss: 1.0597199201583862,grad_norm: 0.7380025839808876, iteration: 291991
loss: 0.9881054759025574,grad_norm: 0.6622982791982824, iteration: 291992
loss: 0.9882783889770508,grad_norm: 0.79051982463253, iteration: 291993
loss: 0.9961048364639282,grad_norm: 0.8522588656098706, iteration: 291994
loss: 0.989892303943634,grad_norm: 0.999999597086553, iteration: 291995
loss: 0.9983193278312683,grad_norm: 0.8550165379412239, iteration: 291996
loss: 1.0528653860092163,grad_norm: 0.9999996252539476, iteration: 291997
loss: 0.9886437654495239,grad_norm: 0.9695972882356573, iteration: 291998
loss: 0.9593039155006409,grad_norm: 0.9999990997007011, iteration: 291999
loss: 1.0381050109863281,grad_norm: 0.8366135262461692, iteration: 292000
loss: 1.0085660219192505,grad_norm: 0.7562515064321279, iteration: 292001
loss: 1.0166735649108887,grad_norm: 0.9154843024656915, iteration: 292002
loss: 1.0238853693008423,grad_norm: 0.7360281805137818, iteration: 292003
loss: 1.0244948863983154,grad_norm: 0.9999992801290859, iteration: 292004
loss: 1.0163869857788086,grad_norm: 0.9999991789404867, iteration: 292005
loss: 1.0276862382888794,grad_norm: 0.9720543989216104, iteration: 292006
loss: 0.9898974895477295,grad_norm: 0.8794977367448155, iteration: 292007
loss: 0.980847179889679,grad_norm: 0.8433611264170491, iteration: 292008
loss: 0.9856022000312805,grad_norm: 0.7812919063311722, iteration: 292009
loss: 1.0525896549224854,grad_norm: 0.9999990969055625, iteration: 292010
loss: 0.9985390305519104,grad_norm: 0.9999990401858899, iteration: 292011
loss: 0.9914751052856445,grad_norm: 0.9939416498932643, iteration: 292012
loss: 0.9853174090385437,grad_norm: 0.9999994324484369, iteration: 292013
loss: 0.967444896697998,grad_norm: 0.8473259733947974, iteration: 292014
loss: 0.9952630996704102,grad_norm: 0.8938498615696636, iteration: 292015
loss: 0.9763104319572449,grad_norm: 0.7788045862665859, iteration: 292016
loss: 1.0140116214752197,grad_norm: 0.8034089927372431, iteration: 292017
loss: 1.016136646270752,grad_norm: 0.7322901033849796, iteration: 292018
loss: 1.0306334495544434,grad_norm: 0.9999996041668705, iteration: 292019
loss: 0.9900298118591309,grad_norm: 0.8594921563778589, iteration: 292020
loss: 0.9983039498329163,grad_norm: 0.854787910802254, iteration: 292021
loss: 1.0324898958206177,grad_norm: 0.9999993714079229, iteration: 292022
loss: 1.0041359663009644,grad_norm: 0.7861705398158597, iteration: 292023
loss: 0.9867090582847595,grad_norm: 0.8671940397099585, iteration: 292024
loss: 0.9841207265853882,grad_norm: 0.7526073444566904, iteration: 292025
loss: 0.9806898236274719,grad_norm: 0.8260412605347748, iteration: 292026
loss: 0.9990936517715454,grad_norm: 0.9156242744276187, iteration: 292027
loss: 0.9968423843383789,grad_norm: 0.9017987771828232, iteration: 292028
loss: 1.00569748878479,grad_norm: 0.8733699764746987, iteration: 292029
loss: 0.9560401439666748,grad_norm: 0.8629749869927686, iteration: 292030
loss: 0.9838672876358032,grad_norm: 0.9174306687616157, iteration: 292031
loss: 0.9828490614891052,grad_norm: 0.9026230214694586, iteration: 292032
loss: 0.9905509352684021,grad_norm: 0.7994427617394596, iteration: 292033
loss: 0.9996824860572815,grad_norm: 0.9999993074238364, iteration: 292034
loss: 1.0460777282714844,grad_norm: 0.999999364773222, iteration: 292035
loss: 1.0029417276382446,grad_norm: 0.810931652292539, iteration: 292036
loss: 1.0096198320388794,grad_norm: 0.8775736486631558, iteration: 292037
loss: 1.0048160552978516,grad_norm: 0.8102388453935282, iteration: 292038
loss: 1.0913907289505005,grad_norm: 0.9200636706957301, iteration: 292039
loss: 0.9624754190444946,grad_norm: 0.6957348573097927, iteration: 292040
loss: 1.0293606519699097,grad_norm: 0.8424815987147234, iteration: 292041
loss: 1.0024253129959106,grad_norm: 0.781971722827892, iteration: 292042
loss: 1.014662742614746,grad_norm: 0.8084829493086085, iteration: 292043
loss: 0.9847671985626221,grad_norm: 0.999999198985372, iteration: 292044
loss: 1.0191541910171509,grad_norm: 0.7562875346573368, iteration: 292045
loss: 0.9926422238349915,grad_norm: 0.9999991076985022, iteration: 292046
loss: 0.9821240901947021,grad_norm: 0.9949994836564928, iteration: 292047
loss: 1.0180145502090454,grad_norm: 0.9783393085865225, iteration: 292048
loss: 1.0201712846755981,grad_norm: 0.9312188635433047, iteration: 292049
loss: 0.981123685836792,grad_norm: 0.7320411701536994, iteration: 292050
loss: 1.041061520576477,grad_norm: 0.9999990259156896, iteration: 292051
loss: 1.0116273164749146,grad_norm: 0.9086226275092182, iteration: 292052
loss: 1.008056640625,grad_norm: 0.8628464623339868, iteration: 292053
loss: 0.9967485070228577,grad_norm: 0.850590211290112, iteration: 292054
loss: 1.0092628002166748,grad_norm: 0.8376846492386649, iteration: 292055
loss: 1.008565902709961,grad_norm: 0.9824129235640503, iteration: 292056
loss: 0.9818353652954102,grad_norm: 0.8029230808575778, iteration: 292057
loss: 1.0089296102523804,grad_norm: 0.9028777051767225, iteration: 292058
loss: 0.9867069721221924,grad_norm: 0.8985729763603856, iteration: 292059
loss: 1.0184613466262817,grad_norm: 0.9999990068824084, iteration: 292060
loss: 0.9724986553192139,grad_norm: 0.8864243000939612, iteration: 292061
loss: 0.9858754277229309,grad_norm: 0.9999989521549814, iteration: 292062
loss: 0.9748144149780273,grad_norm: 0.9999991914398461, iteration: 292063
loss: 1.01405930519104,grad_norm: 0.9107662294873198, iteration: 292064
loss: 0.951492965221405,grad_norm: 0.7938741699886548, iteration: 292065
loss: 0.9893317818641663,grad_norm: 0.6912176723426, iteration: 292066
loss: 1.0197510719299316,grad_norm: 0.8044513575714235, iteration: 292067
loss: 0.983174741268158,grad_norm: 0.9144782687065106, iteration: 292068
loss: 1.0242581367492676,grad_norm: 0.9589761523101212, iteration: 292069
loss: 1.0509450435638428,grad_norm: 0.9999990505859776, iteration: 292070
loss: 0.9930557012557983,grad_norm: 0.8729773560377219, iteration: 292071
loss: 1.063689947128296,grad_norm: 0.9308329334596314, iteration: 292072
loss: 1.008457064628601,grad_norm: 0.7748198713681328, iteration: 292073
loss: 1.0062551498413086,grad_norm: 0.8756216219982301, iteration: 292074
loss: 1.0348154306411743,grad_norm: 0.9591621827172978, iteration: 292075
loss: 0.9694178700447083,grad_norm: 0.9999990416547521, iteration: 292076
loss: 1.0175597667694092,grad_norm: 0.7185209124552315, iteration: 292077
loss: 0.9577123522758484,grad_norm: 0.7692213648714346, iteration: 292078
loss: 1.0267341136932373,grad_norm: 0.9430780601286828, iteration: 292079
loss: 0.9796085357666016,grad_norm: 0.873195325950153, iteration: 292080
loss: 0.9882930517196655,grad_norm: 0.8495673044423433, iteration: 292081
loss: 1.014170527458191,grad_norm: 0.9485979984741959, iteration: 292082
loss: 1.0042319297790527,grad_norm: 0.9999991825672115, iteration: 292083
loss: 1.0010544061660767,grad_norm: 0.9296070332921157, iteration: 292084
loss: 0.9933140277862549,grad_norm: 0.7528419093630491, iteration: 292085
loss: 1.0304595232009888,grad_norm: 0.8437771566936707, iteration: 292086
loss: 1.008306860923767,grad_norm: 0.9371264945143242, iteration: 292087
loss: 0.9586836099624634,grad_norm: 0.7719655835668198, iteration: 292088
loss: 0.9866279363632202,grad_norm: 0.7235566227066471, iteration: 292089
loss: 1.038106918334961,grad_norm: 0.9934101106211283, iteration: 292090
loss: 0.9851659536361694,grad_norm: 0.9042668360938203, iteration: 292091
loss: 1.0091679096221924,grad_norm: 0.9999995158661253, iteration: 292092
loss: 0.9783658385276794,grad_norm: 0.9249156660793638, iteration: 292093
loss: 0.9954301118850708,grad_norm: 0.918974046848964, iteration: 292094
loss: 1.013907790184021,grad_norm: 0.9780376604941663, iteration: 292095
loss: 0.9744051694869995,grad_norm: 0.9999990482872859, iteration: 292096
loss: 0.979248046875,grad_norm: 0.8473007839187247, iteration: 292097
loss: 1.0053808689117432,grad_norm: 0.9864964831450136, iteration: 292098
loss: 0.9836854338645935,grad_norm: 0.8300230271457387, iteration: 292099
loss: 1.0326582193374634,grad_norm: 0.9382225176162056, iteration: 292100
loss: 1.018581509590149,grad_norm: 0.9063950727110276, iteration: 292101
loss: 0.975534200668335,grad_norm: 0.8142195906601227, iteration: 292102
loss: 1.0020298957824707,grad_norm: 0.8761701092747051, iteration: 292103
loss: 0.9961866736412048,grad_norm: 0.9693904212746235, iteration: 292104
loss: 0.9981141090393066,grad_norm: 0.7747552891806094, iteration: 292105
loss: 1.0389385223388672,grad_norm: 0.9999995465830784, iteration: 292106
loss: 0.9883923530578613,grad_norm: 0.8630441950716259, iteration: 292107
loss: 0.974406361579895,grad_norm: 0.8391288747501808, iteration: 292108
loss: 1.0066062211990356,grad_norm: 0.7518444753061929, iteration: 292109
loss: 1.0188181400299072,grad_norm: 0.8680934198587047, iteration: 292110
loss: 1.0231451988220215,grad_norm: 0.6883833825408071, iteration: 292111
loss: 0.9881404638290405,grad_norm: 0.888825456757692, iteration: 292112
loss: 0.994121253490448,grad_norm: 0.8997762765286927, iteration: 292113
loss: 1.093042254447937,grad_norm: 0.9330256418404728, iteration: 292114
loss: 0.9552015066146851,grad_norm: 0.7804649088081117, iteration: 292115
loss: 0.9934728741645813,grad_norm: 0.8591923000729214, iteration: 292116
loss: 0.965232789516449,grad_norm: 0.7838097159366921, iteration: 292117
loss: 1.0045164823532104,grad_norm: 0.8313102842836201, iteration: 292118
loss: 0.9793569445610046,grad_norm: 0.7709326379723895, iteration: 292119
loss: 0.9891956448554993,grad_norm: 0.9999992192532157, iteration: 292120
loss: 1.0324186086654663,grad_norm: 0.9111152594929239, iteration: 292121
loss: 0.9973852634429932,grad_norm: 0.9696361772761504, iteration: 292122
loss: 1.0772203207015991,grad_norm: 0.9999992834310685, iteration: 292123
loss: 0.9684353470802307,grad_norm: 0.7307617517565538, iteration: 292124
loss: 1.0330321788787842,grad_norm: 0.9486106632381507, iteration: 292125
loss: 1.0234400033950806,grad_norm: 0.986549861659999, iteration: 292126
loss: 1.0258053541183472,grad_norm: 0.9999991149009615, iteration: 292127
loss: 1.0096958875656128,grad_norm: 0.8750005965177603, iteration: 292128
loss: 1.0070319175720215,grad_norm: 0.9999990989002163, iteration: 292129
loss: 1.0244675874710083,grad_norm: 0.8684881707612124, iteration: 292130
loss: 0.9784933924674988,grad_norm: 0.8674202964314256, iteration: 292131
loss: 0.9634280204772949,grad_norm: 0.8050736003438373, iteration: 292132
loss: 0.9932859539985657,grad_norm: 0.9999989867062655, iteration: 292133
loss: 1.1443787813186646,grad_norm: 0.9999998844349922, iteration: 292134
loss: 0.9984208345413208,grad_norm: 0.9894008403767977, iteration: 292135
loss: 1.0024950504302979,grad_norm: 0.8144804316652337, iteration: 292136
loss: 1.0127997398376465,grad_norm: 0.9999990653383319, iteration: 292137
loss: 0.9828159809112549,grad_norm: 0.7391043049840174, iteration: 292138
loss: 0.9985788464546204,grad_norm: 0.8376111325694015, iteration: 292139
loss: 1.0037330389022827,grad_norm: 0.8282572240908248, iteration: 292140
loss: 1.0197919607162476,grad_norm: 0.8162161614849149, iteration: 292141
loss: 0.9884553551673889,grad_norm: 0.8818383562876265, iteration: 292142
loss: 0.9705312252044678,grad_norm: 0.999999176297572, iteration: 292143
loss: 1.1095097064971924,grad_norm: 0.9999990231675117, iteration: 292144
loss: 1.0088263750076294,grad_norm: 0.8616397810241836, iteration: 292145
loss: 0.9641340374946594,grad_norm: 0.9189144204321683, iteration: 292146
loss: 1.0150585174560547,grad_norm: 0.805039365944362, iteration: 292147
loss: 0.9917442202568054,grad_norm: 0.9675870673633391, iteration: 292148
loss: 0.9755637645721436,grad_norm: 0.9512816858974835, iteration: 292149
loss: 1.0073033571243286,grad_norm: 0.8774819402458232, iteration: 292150
loss: 1.0325758457183838,grad_norm: 0.8718362962650713, iteration: 292151
loss: 0.984388530254364,grad_norm: 0.8554010525416741, iteration: 292152
loss: 0.9772826433181763,grad_norm: 0.8109469556792579, iteration: 292153
loss: 0.9680633544921875,grad_norm: 0.8401679650293177, iteration: 292154
loss: 0.9807701706886292,grad_norm: 0.99999895712136, iteration: 292155
loss: 0.9963116645812988,grad_norm: 0.9581041035057802, iteration: 292156
loss: 1.0578689575195312,grad_norm: 1.0000000154945319, iteration: 292157
loss: 0.987209141254425,grad_norm: 0.9999991661186655, iteration: 292158
loss: 1.0094561576843262,grad_norm: 0.9999990980746546, iteration: 292159
loss: 0.9785293340682983,grad_norm: 0.999999234101565, iteration: 292160
loss: 1.0208219289779663,grad_norm: 0.9267673021129007, iteration: 292161
loss: 0.9987584948539734,grad_norm: 0.9999991000507589, iteration: 292162
loss: 1.0191564559936523,grad_norm: 0.8685384298197337, iteration: 292163
loss: 1.0128425359725952,grad_norm: 0.8059861553625425, iteration: 292164
loss: 1.0993216037750244,grad_norm: 0.9999996955264372, iteration: 292165
loss: 0.9795054793357849,grad_norm: 0.9236775414085643, iteration: 292166
loss: 1.0177879333496094,grad_norm: 0.8508405849835636, iteration: 292167
loss: 1.0558780431747437,grad_norm: 0.9999991948035954, iteration: 292168
loss: 1.0354149341583252,grad_norm: 0.9818925862573612, iteration: 292169
loss: 1.0386959314346313,grad_norm: 0.7315440934568845, iteration: 292170
loss: 1.038793683052063,grad_norm: 0.9999992306038814, iteration: 292171
loss: 1.0234012603759766,grad_norm: 0.8164397669020261, iteration: 292172
loss: 1.1371195316314697,grad_norm: 0.9602180585040124, iteration: 292173
loss: 1.000037670135498,grad_norm: 0.9999993599316048, iteration: 292174
loss: 0.976183295249939,grad_norm: 0.8320364834177874, iteration: 292175
loss: 0.9756511449813843,grad_norm: 0.7780256065169859, iteration: 292176
loss: 1.0013090372085571,grad_norm: 0.9268901950449728, iteration: 292177
loss: 0.977843165397644,grad_norm: 0.8034904729424642, iteration: 292178
loss: 0.9843151569366455,grad_norm: 0.999999154434709, iteration: 292179
loss: 1.0293022394180298,grad_norm: 0.9999991689291271, iteration: 292180
loss: 1.0039581060409546,grad_norm: 0.9999994247806271, iteration: 292181
loss: 1.0371007919311523,grad_norm: 0.8351611256458147, iteration: 292182
loss: 0.9966171979904175,grad_norm: 0.9610056224755342, iteration: 292183
loss: 1.0271967649459839,grad_norm: 0.8827120410331741, iteration: 292184
loss: 1.0334175825119019,grad_norm: 0.9999989311884628, iteration: 292185
loss: 0.986379086971283,grad_norm: 0.8905605179348838, iteration: 292186
loss: 0.9931575059890747,grad_norm: 0.8218339516596382, iteration: 292187
loss: 0.9777688980102539,grad_norm: 0.8277547064358034, iteration: 292188
loss: 0.9941748380661011,grad_norm: 0.7905163831055626, iteration: 292189
loss: 1.0213874578475952,grad_norm: 0.9999992942705104, iteration: 292190
loss: 0.9991500973701477,grad_norm: 0.9710536641629093, iteration: 292191
loss: 0.9719884991645813,grad_norm: 0.7878521807821861, iteration: 292192
loss: 1.0412181615829468,grad_norm: 0.8355577148501894, iteration: 292193
loss: 1.089497447013855,grad_norm: 0.9999994754800084, iteration: 292194
loss: 0.9844772815704346,grad_norm: 0.8905512725547678, iteration: 292195
loss: 1.0179773569107056,grad_norm: 0.841359522116977, iteration: 292196
loss: 1.001298189163208,grad_norm: 0.9999990845926781, iteration: 292197
loss: 1.103217363357544,grad_norm: 0.9999992661984669, iteration: 292198
loss: 0.9880079030990601,grad_norm: 0.835917654708779, iteration: 292199
loss: 0.9890000224113464,grad_norm: 0.9841189404540719, iteration: 292200
loss: 1.0342564582824707,grad_norm: 0.8655545979940817, iteration: 292201
loss: 0.9930481910705566,grad_norm: 0.7738731660315712, iteration: 292202
loss: 0.9927935004234314,grad_norm: 0.9999991113666068, iteration: 292203
loss: 0.9768519401550293,grad_norm: 0.8941572091117714, iteration: 292204
loss: 0.9678431153297424,grad_norm: 0.7773360321267253, iteration: 292205
loss: 0.982579231262207,grad_norm: 0.8991697409539204, iteration: 292206
loss: 0.9880974888801575,grad_norm: 0.8422394651630879, iteration: 292207
loss: 1.0231456756591797,grad_norm: 0.9999989871278475, iteration: 292208
loss: 1.0617443323135376,grad_norm: 0.9999991885289526, iteration: 292209
loss: 1.0080020427703857,grad_norm: 0.9789034484362034, iteration: 292210
loss: 1.006333351135254,grad_norm: 0.7961669316764345, iteration: 292211
loss: 0.980755627155304,grad_norm: 0.8042533346749822, iteration: 292212
loss: 0.9976004362106323,grad_norm: 0.8721212918953978, iteration: 292213
loss: 1.0305426120758057,grad_norm: 0.8915935857469331, iteration: 292214
loss: 0.9988188147544861,grad_norm: 0.9471823913157572, iteration: 292215
loss: 1.002573847770691,grad_norm: 0.8724292864751878, iteration: 292216
loss: 0.995477020740509,grad_norm: 0.9412016040511106, iteration: 292217
loss: 1.0266307592391968,grad_norm: 0.9411408155109532, iteration: 292218
loss: 0.9897815585136414,grad_norm: 0.8436604803466995, iteration: 292219
loss: 0.9779397249221802,grad_norm: 0.9451234872714365, iteration: 292220
loss: 0.9762383699417114,grad_norm: 0.9999989590704581, iteration: 292221
loss: 1.0779296159744263,grad_norm: 0.9740172677728347, iteration: 292222
loss: 0.9854997396469116,grad_norm: 0.8255659236078768, iteration: 292223
loss: 0.9989721179008484,grad_norm: 0.8628149816232795, iteration: 292224
loss: 0.9704087972640991,grad_norm: 0.842219189426249, iteration: 292225
loss: 1.0123838186264038,grad_norm: 0.8241907198285694, iteration: 292226
loss: 0.9971604347229004,grad_norm: 0.9999993464680383, iteration: 292227
loss: 1.0057319402694702,grad_norm: 0.9727010092830838, iteration: 292228
loss: 1.113571286201477,grad_norm: 0.9999996910783551, iteration: 292229
loss: 0.9977931380271912,grad_norm: 0.9468912147421148, iteration: 292230
loss: 1.0333932638168335,grad_norm: 0.9999991780464367, iteration: 292231
loss: 0.9899783730506897,grad_norm: 0.8305855499531145, iteration: 292232
loss: 0.9859230518341064,grad_norm: 0.7837314249661412, iteration: 292233
loss: 0.9948310256004333,grad_norm: 0.7757314268770621, iteration: 292234
loss: 0.9895585775375366,grad_norm: 0.9485255010545471, iteration: 292235
loss: 1.0344734191894531,grad_norm: 0.8210260465039433, iteration: 292236
loss: 1.0342603921890259,grad_norm: 0.897844538570306, iteration: 292237
loss: 1.0067588090896606,grad_norm: 0.8343597480018043, iteration: 292238
loss: 1.075919270515442,grad_norm: 0.9999992409391667, iteration: 292239
loss: 1.030084252357483,grad_norm: 0.8933644649669461, iteration: 292240
loss: 1.0040805339813232,grad_norm: 0.8812788912315914, iteration: 292241
loss: 1.0377048254013062,grad_norm: 0.9999992899426539, iteration: 292242
loss: 0.9916871190071106,grad_norm: 0.9355433583205462, iteration: 292243
loss: 0.9962820410728455,grad_norm: 0.7704694600189131, iteration: 292244
loss: 1.0085196495056152,grad_norm: 0.9999989424003779, iteration: 292245
loss: 1.0549927949905396,grad_norm: 0.9999996064015035, iteration: 292246
loss: 0.9877148270606995,grad_norm: 0.9250344941157324, iteration: 292247
loss: 0.9817063808441162,grad_norm: 0.7953121492701866, iteration: 292248
loss: 1.0045242309570312,grad_norm: 0.9999994413298332, iteration: 292249
loss: 1.0086215734481812,grad_norm: 0.8922108438870074, iteration: 292250
loss: 1.1153911352157593,grad_norm: 0.9999999924667854, iteration: 292251
loss: 0.9969826340675354,grad_norm: 0.8403934286159038, iteration: 292252
loss: 0.9652324318885803,grad_norm: 0.999999123430353, iteration: 292253
loss: 0.9988650679588318,grad_norm: 0.8858280004141532, iteration: 292254
loss: 0.9700199961662292,grad_norm: 0.9976427325633032, iteration: 292255
loss: 0.9951199293136597,grad_norm: 0.8074914307794713, iteration: 292256
loss: 1.0167019367218018,grad_norm: 0.8743663382649489, iteration: 292257
loss: 1.0092999935150146,grad_norm: 0.7648479260459213, iteration: 292258
loss: 0.97336745262146,grad_norm: 0.8045075254748113, iteration: 292259
loss: 1.004080057144165,grad_norm: 0.8782493620819245, iteration: 292260
loss: 1.0288647413253784,grad_norm: 0.9999994086144308, iteration: 292261
loss: 0.9857658743858337,grad_norm: 0.9436633983226285, iteration: 292262
loss: 1.0030879974365234,grad_norm: 0.8122146754642865, iteration: 292263
loss: 1.0209527015686035,grad_norm: 0.8201979255708481, iteration: 292264
loss: 1.0658717155456543,grad_norm: 0.8782654743234725, iteration: 292265
loss: 0.9747270345687866,grad_norm: 0.8988246778593421, iteration: 292266
loss: 0.9896437525749207,grad_norm: 0.9811043917708206, iteration: 292267
loss: 1.0338571071624756,grad_norm: 0.9999992402367056, iteration: 292268
loss: 0.9942049384117126,grad_norm: 0.9999994647373276, iteration: 292269
loss: 1.0213077068328857,grad_norm: 0.9559286089358834, iteration: 292270
loss: 0.9883618354797363,grad_norm: 0.8192879974928193, iteration: 292271
loss: 1.0318071842193604,grad_norm: 0.8182782959025146, iteration: 292272
loss: 0.9753333926200867,grad_norm: 0.8925515523605949, iteration: 292273
loss: 1.0566984415054321,grad_norm: 0.8572728080621665, iteration: 292274
loss: 1.0011345148086548,grad_norm: 0.9605193058914723, iteration: 292275
loss: 0.9855290651321411,grad_norm: 0.8781634034808161, iteration: 292276
loss: 1.0025731325149536,grad_norm: 0.9841064324369112, iteration: 292277
loss: 1.0048248767852783,grad_norm: 0.9151430122838996, iteration: 292278
loss: 1.0130995512008667,grad_norm: 0.82539998149973, iteration: 292279
loss: 1.0235458612442017,grad_norm: 0.7370019023067198, iteration: 292280
loss: 0.9972889423370361,grad_norm: 0.9861444839636883, iteration: 292281
loss: 0.9564871788024902,grad_norm: 0.8179446292535975, iteration: 292282
loss: 1.016577124595642,grad_norm: 0.8575814868170589, iteration: 292283
loss: 0.9873992800712585,grad_norm: 0.9999992097592285, iteration: 292284
loss: 0.9414627552032471,grad_norm: 0.8079806342738337, iteration: 292285
loss: 0.9974018931388855,grad_norm: 0.8835678950879957, iteration: 292286
loss: 1.0021507740020752,grad_norm: 0.7721483978336092, iteration: 292287
loss: 0.9903209805488586,grad_norm: 0.8627672846167721, iteration: 292288
loss: 1.0040593147277832,grad_norm: 0.8118343329578309, iteration: 292289
loss: 0.9958803653717041,grad_norm: 0.7694895306606776, iteration: 292290
loss: 0.9898702502250671,grad_norm: 0.9114027745144454, iteration: 292291
loss: 1.0248793363571167,grad_norm: 0.8678052193509953, iteration: 292292
loss: 1.0131793022155762,grad_norm: 0.9461876623952178, iteration: 292293
loss: 1.04097318649292,grad_norm: 0.8506924238520903, iteration: 292294
loss: 0.9888194799423218,grad_norm: 0.9804395316032078, iteration: 292295
loss: 1.012729525566101,grad_norm: 0.8454761847511354, iteration: 292296
loss: 0.9823979735374451,grad_norm: 0.9065983578021102, iteration: 292297
loss: 0.9881833791732788,grad_norm: 0.7817202019863895, iteration: 292298
loss: 1.0001357793807983,grad_norm: 0.9354698737025767, iteration: 292299
loss: 1.0748603343963623,grad_norm: 0.9183894859729482, iteration: 292300
loss: 1.0246762037277222,grad_norm: 0.8386032769705327, iteration: 292301
loss: 1.012459635734558,grad_norm: 0.8113444674341629, iteration: 292302
loss: 0.9985668063163757,grad_norm: 0.8006879882369173, iteration: 292303
loss: 0.9822278618812561,grad_norm: 0.999999456406325, iteration: 292304
loss: 0.9917982220649719,grad_norm: 0.9999996694997216, iteration: 292305
loss: 1.0163218975067139,grad_norm: 0.8115972105552351, iteration: 292306
loss: 1.004884123802185,grad_norm: 0.8272210027173303, iteration: 292307
loss: 1.0063570737838745,grad_norm: 0.7916075249739594, iteration: 292308
loss: 0.9888507127761841,grad_norm: 0.9240528552816178, iteration: 292309
loss: 1.0745537281036377,grad_norm: 0.945972682349278, iteration: 292310
loss: 0.969526469707489,grad_norm: 0.8930912254920403, iteration: 292311
loss: 1.0043329000473022,grad_norm: 0.8285025006074835, iteration: 292312
loss: 1.0078145265579224,grad_norm: 0.8467357546204675, iteration: 292313
loss: 1.0235060453414917,grad_norm: 0.8815256306290931, iteration: 292314
loss: 0.9827808737754822,grad_norm: 0.8910190928053917, iteration: 292315
loss: 0.9996907114982605,grad_norm: 0.8317241755156185, iteration: 292316
loss: 0.9980065226554871,grad_norm: 0.9274047855563566, iteration: 292317
loss: 1.0907652378082275,grad_norm: 0.7568531739610866, iteration: 292318
loss: 1.0082412958145142,grad_norm: 0.999999895612909, iteration: 292319
loss: 0.981042742729187,grad_norm: 0.9209413724627321, iteration: 292320
loss: 1.028275966644287,grad_norm: 0.7353418895815462, iteration: 292321
loss: 1.1218938827514648,grad_norm: 0.9280473075314125, iteration: 292322
loss: 0.9918921589851379,grad_norm: 0.8666767481112092, iteration: 292323
loss: 1.0228774547576904,grad_norm: 0.9280547024137268, iteration: 292324
loss: 0.9950077533721924,grad_norm: 0.9852355117195691, iteration: 292325
loss: 1.0077170133590698,grad_norm: 0.9259518193643389, iteration: 292326
loss: 1.1368954181671143,grad_norm: 0.9999996620569702, iteration: 292327
loss: 1.0140202045440674,grad_norm: 0.917870040675047, iteration: 292328
loss: 1.0532594919204712,grad_norm: 0.943710097019224, iteration: 292329
loss: 1.0781443119049072,grad_norm: 0.9746061717194608, iteration: 292330
loss: 0.9638561010360718,grad_norm: 0.8482582944922386, iteration: 292331
loss: 1.006292700767517,grad_norm: 0.9996146516134862, iteration: 292332
loss: 0.9954740405082703,grad_norm: 0.698995604899465, iteration: 292333
loss: 1.03238844871521,grad_norm: 0.9999993363836396, iteration: 292334
loss: 0.9922932982444763,grad_norm: 0.8632944811983823, iteration: 292335
loss: 0.9904407858848572,grad_norm: 0.816098347243755, iteration: 292336
loss: 0.9739630818367004,grad_norm: 0.8053222340728089, iteration: 292337
loss: 1.0110541582107544,grad_norm: 0.9806502260237799, iteration: 292338
loss: 1.00306236743927,grad_norm: 0.8490197458449, iteration: 292339
loss: 0.9780279994010925,grad_norm: 0.7146480155929917, iteration: 292340
loss: 0.9926627278327942,grad_norm: 0.9999994591520619, iteration: 292341
loss: 1.0336068868637085,grad_norm: 0.8918632704785988, iteration: 292342
loss: 1.0079593658447266,grad_norm: 0.8072741851649422, iteration: 292343
loss: 1.0346260070800781,grad_norm: 0.9999991507171266, iteration: 292344
loss: 1.0274646282196045,grad_norm: 0.9559814235642098, iteration: 292345
loss: 0.9864603877067566,grad_norm: 0.702792973034633, iteration: 292346
loss: 1.0230522155761719,grad_norm: 0.9999998068268575, iteration: 292347
loss: 0.9884467124938965,grad_norm: 0.9999991868233957, iteration: 292348
loss: 1.0054839849472046,grad_norm: 0.7294822626905931, iteration: 292349
loss: 0.984051525592804,grad_norm: 0.7417226135609863, iteration: 292350
loss: 1.0506795644760132,grad_norm: 0.8156123527416178, iteration: 292351
loss: 1.190954566001892,grad_norm: 0.9999999637174617, iteration: 292352
loss: 1.010495662689209,grad_norm: 0.9999998790311848, iteration: 292353
loss: 1.012258768081665,grad_norm: 0.7781841893612602, iteration: 292354
loss: 1.2395662069320679,grad_norm: 1.0000000528465514, iteration: 292355
loss: 1.0749468803405762,grad_norm: 0.9999992634693986, iteration: 292356
loss: 1.0065211057662964,grad_norm: 0.9999991560827717, iteration: 292357
loss: 0.9867500066757202,grad_norm: 0.715498027977401, iteration: 292358
loss: 1.0245872735977173,grad_norm: 0.9999998870275876, iteration: 292359
loss: 1.0386197566986084,grad_norm: 0.8823550194195892, iteration: 292360
loss: 1.0870158672332764,grad_norm: 0.9999999278841155, iteration: 292361
loss: 1.1039193868637085,grad_norm: 0.9999990410856627, iteration: 292362
loss: 1.12575364112854,grad_norm: 0.9999996542560743, iteration: 292363
loss: 1.027867317199707,grad_norm: 0.7365263047461761, iteration: 292364
loss: 1.0557774305343628,grad_norm: 0.999999264111871, iteration: 292365
loss: 1.0007853507995605,grad_norm: 0.8861758115207988, iteration: 292366
loss: 1.010580062866211,grad_norm: 0.8078261230483486, iteration: 292367
loss: 0.9627834558486938,grad_norm: 0.8650496027661388, iteration: 292368
loss: 1.0525104999542236,grad_norm: 0.9763527739962242, iteration: 292369
loss: 1.015323281288147,grad_norm: 0.84863251571402, iteration: 292370
loss: 1.0160534381866455,grad_norm: 0.9999996012471023, iteration: 292371
loss: 1.0260984897613525,grad_norm: 0.9999995205444498, iteration: 292372
loss: 0.9978195428848267,grad_norm: 0.8516574135914217, iteration: 292373
loss: 0.9873974323272705,grad_norm: 0.8745066079973458, iteration: 292374
loss: 0.9992415904998779,grad_norm: 0.999999595168048, iteration: 292375
loss: 1.025436520576477,grad_norm: 0.9999991955225442, iteration: 292376
loss: 1.0166791677474976,grad_norm: 0.7678429600156247, iteration: 292377
loss: 1.0146888494491577,grad_norm: 0.999999163167116, iteration: 292378
loss: 1.0313307046890259,grad_norm: 0.8297083585256466, iteration: 292379
loss: 0.9840481877326965,grad_norm: 0.881083443840276, iteration: 292380
loss: 0.985384464263916,grad_norm: 0.7573122213226877, iteration: 292381
loss: 1.0370184183120728,grad_norm: 0.9999994310009481, iteration: 292382
loss: 0.9960784316062927,grad_norm: 0.8511258756005038, iteration: 292383
loss: 1.0000900030136108,grad_norm: 0.8519491974343082, iteration: 292384
loss: 0.9834749698638916,grad_norm: 0.9053839847461013, iteration: 292385
loss: 1.0009119510650635,grad_norm: 0.9230571403788147, iteration: 292386
loss: 0.9622843861579895,grad_norm: 0.971665014502345, iteration: 292387
loss: 1.0953515768051147,grad_norm: 0.9224427405345024, iteration: 292388
loss: 1.0709513425827026,grad_norm: 0.9999992955993372, iteration: 292389
loss: 1.0596023797988892,grad_norm: 0.9476996573632847, iteration: 292390
loss: 1.0880242586135864,grad_norm: 0.9999999322888302, iteration: 292391
loss: 1.0776845216751099,grad_norm: 0.9999994438737819, iteration: 292392
loss: 1.201333999633789,grad_norm: 0.9601999825447352, iteration: 292393
loss: 1.0059503316879272,grad_norm: 0.9628705402878499, iteration: 292394
loss: 0.9863652586936951,grad_norm: 0.8132003189319396, iteration: 292395
loss: 0.9936453104019165,grad_norm: 0.9023149699691086, iteration: 292396
loss: 1.0244797468185425,grad_norm: 0.8047013757113121, iteration: 292397
loss: 1.0074092149734497,grad_norm: 0.9044075396683346, iteration: 292398
loss: 1.0048280954360962,grad_norm: 0.9999991332519328, iteration: 292399
loss: 1.042183518409729,grad_norm: 0.9999990226535636, iteration: 292400
loss: 1.0610145330429077,grad_norm: 0.8194532823446681, iteration: 292401
loss: 0.9774817228317261,grad_norm: 0.8533300023756265, iteration: 292402
loss: 1.062279224395752,grad_norm: 0.7837072731161508, iteration: 292403
loss: 0.975663959980011,grad_norm: 0.9532065168203367, iteration: 292404
loss: 1.0921883583068848,grad_norm: 0.9999997126116461, iteration: 292405
loss: 1.0291045904159546,grad_norm: 0.9526352180227902, iteration: 292406
loss: 1.1000794172286987,grad_norm: 0.9999995042024439, iteration: 292407
loss: 1.0045634508132935,grad_norm: 0.7563455128101623, iteration: 292408
loss: 1.009716272354126,grad_norm: 0.9999991685452255, iteration: 292409
loss: 0.9980382919311523,grad_norm: 0.98400244329328, iteration: 292410
loss: 1.0024745464324951,grad_norm: 0.9340368843355651, iteration: 292411
loss: 0.9993095993995667,grad_norm: 0.9960759949596814, iteration: 292412
loss: 1.072572946548462,grad_norm: 0.946472823067785, iteration: 292413
loss: 1.0942413806915283,grad_norm: 0.99999955432255, iteration: 292414
loss: 0.9851879477500916,grad_norm: 0.9999992805609107, iteration: 292415
loss: 0.9747777581214905,grad_norm: 0.6717273640243391, iteration: 292416
loss: 0.9890021681785583,grad_norm: 0.8430272222334186, iteration: 292417
loss: 0.9952502846717834,grad_norm: 0.8272842170065136, iteration: 292418
loss: 1.2254624366760254,grad_norm: 0.9999996085511231, iteration: 292419
loss: 1.0240715742111206,grad_norm: 0.7972474403664799, iteration: 292420
loss: 1.0164698362350464,grad_norm: 0.880485739011359, iteration: 292421
loss: 1.0197734832763672,grad_norm: 0.9999991378037285, iteration: 292422
loss: 0.9893752932548523,grad_norm: 0.73381755132021, iteration: 292423
loss: 1.0046167373657227,grad_norm: 0.9999989872153164, iteration: 292424
loss: 1.0142345428466797,grad_norm: 0.945888601295656, iteration: 292425
loss: 0.9622395038604736,grad_norm: 0.9035261914575439, iteration: 292426
loss: 0.983154296875,grad_norm: 0.9612620390906542, iteration: 292427
loss: 1.0059568881988525,grad_norm: 0.7715732875230537, iteration: 292428
loss: 1.0292251110076904,grad_norm: 0.9999991048624418, iteration: 292429
loss: 1.005036473274231,grad_norm: 0.9717839476096234, iteration: 292430
loss: 0.989531397819519,grad_norm: 0.6946381170137828, iteration: 292431
loss: 1.088064432144165,grad_norm: 0.999999848119686, iteration: 292432
loss: 1.0005513429641724,grad_norm: 0.9747171440611306, iteration: 292433
loss: 0.9577111601829529,grad_norm: 0.9183504294912602, iteration: 292434
loss: 1.034165382385254,grad_norm: 0.7980863909501591, iteration: 292435
loss: 1.0019947290420532,grad_norm: 0.9789878489917323, iteration: 292436
loss: 1.0325038433074951,grad_norm: 0.8140187509857278, iteration: 292437
loss: 1.0090974569320679,grad_norm: 0.9402495320568345, iteration: 292438
loss: 1.055834174156189,grad_norm: 0.9706358028152757, iteration: 292439
loss: 0.9777456521987915,grad_norm: 0.8402176258934411, iteration: 292440
loss: 0.9917999505996704,grad_norm: 0.9015330763196014, iteration: 292441
loss: 0.9897265434265137,grad_norm: 0.8341612868484816, iteration: 292442
loss: 0.9908226132392883,grad_norm: 0.8897731360729052, iteration: 292443
loss: 1.0804309844970703,grad_norm: 0.9999992631323833, iteration: 292444
loss: 1.0349570512771606,grad_norm: 0.8078018065690352, iteration: 292445
loss: 1.0194785594940186,grad_norm: 0.9167040681860661, iteration: 292446
loss: 1.0814546346664429,grad_norm: 0.9999998421742583, iteration: 292447
loss: 0.9831648468971252,grad_norm: 0.8570027158295817, iteration: 292448
loss: 1.0530731678009033,grad_norm: 0.8472011775623017, iteration: 292449
loss: 0.9859235882759094,grad_norm: 0.863910059343865, iteration: 292450
loss: 1.0132412910461426,grad_norm: 0.9059302086791221, iteration: 292451
loss: 0.9944236874580383,grad_norm: 0.8933075899360172, iteration: 292452
loss: 1.0031062364578247,grad_norm: 0.9999998854455827, iteration: 292453
loss: 0.9848231077194214,grad_norm: 0.7735461218281704, iteration: 292454
loss: 0.9968717694282532,grad_norm: 0.8440628548390356, iteration: 292455
loss: 0.9837997555732727,grad_norm: 0.8855684909052934, iteration: 292456
loss: 0.9599117040634155,grad_norm: 0.8115234378778483, iteration: 292457
loss: 1.001139521598816,grad_norm: 0.9563956314600944, iteration: 292458
loss: 0.9629302620887756,grad_norm: 0.9560320964266794, iteration: 292459
loss: 0.9913915395736694,grad_norm: 0.8348284300175639, iteration: 292460
loss: 0.9925243258476257,grad_norm: 0.9999990446054471, iteration: 292461
loss: 1.019789457321167,grad_norm: 0.9999998448823634, iteration: 292462
loss: 0.9706549644470215,grad_norm: 0.8448554138772426, iteration: 292463
loss: 1.0385161638259888,grad_norm: 0.925973822283834, iteration: 292464
loss: 1.0686992406845093,grad_norm: 0.9999994520123594, iteration: 292465
loss: 0.9683427810668945,grad_norm: 0.8709692625967371, iteration: 292466
loss: 0.9601704478263855,grad_norm: 0.8492914869806527, iteration: 292467
loss: 1.0568827390670776,grad_norm: 0.9186517853098721, iteration: 292468
loss: 1.0439091920852661,grad_norm: 0.9018596919720099, iteration: 292469
loss: 1.0360345840454102,grad_norm: 0.9057767147865177, iteration: 292470
loss: 0.9721019864082336,grad_norm: 0.910427869903985, iteration: 292471
loss: 0.9974769949913025,grad_norm: 0.9999989856296128, iteration: 292472
loss: 1.0011879205703735,grad_norm: 0.9999996922685899, iteration: 292473
loss: 1.041422963142395,grad_norm: 0.999999126891496, iteration: 292474
loss: 0.9870092272758484,grad_norm: 0.7420351840176189, iteration: 292475
loss: 1.0176750421524048,grad_norm: 0.934516890634777, iteration: 292476
loss: 0.9923979043960571,grad_norm: 0.811664968649273, iteration: 292477
loss: 0.99767005443573,grad_norm: 0.8064775947119447, iteration: 292478
loss: 1.004271388053894,grad_norm: 0.7546019840758321, iteration: 292479
loss: 1.0008735656738281,grad_norm: 0.9999991305308722, iteration: 292480
loss: 1.059598445892334,grad_norm: 0.982934528088543, iteration: 292481
loss: 1.0437976121902466,grad_norm: 0.927622748241025, iteration: 292482
loss: 0.9947669506072998,grad_norm: 0.8829672055250256, iteration: 292483
loss: 1.0204451084136963,grad_norm: 0.9999990770921078, iteration: 292484
loss: 1.0092718601226807,grad_norm: 0.8741507838783092, iteration: 292485
loss: 1.0666970014572144,grad_norm: 0.9999990850635276, iteration: 292486
loss: 1.0107795000076294,grad_norm: 0.8860447653230987, iteration: 292487
loss: 0.999210000038147,grad_norm: 0.9999999320307977, iteration: 292488
loss: 1.0187872648239136,grad_norm: 0.9999993646106995, iteration: 292489
loss: 0.965086817741394,grad_norm: 0.8112837705518595, iteration: 292490
loss: 0.9712458848953247,grad_norm: 0.864338060605637, iteration: 292491
loss: 0.9449155926704407,grad_norm: 0.7859010000553016, iteration: 292492
loss: 0.9936901926994324,grad_norm: 0.8334456063519461, iteration: 292493
loss: 1.05708646774292,grad_norm: 0.9328132260923101, iteration: 292494
loss: 0.9967788457870483,grad_norm: 0.9399995318168537, iteration: 292495
loss: 1.0058132410049438,grad_norm: 0.8531841774591311, iteration: 292496
loss: 0.994914174079895,grad_norm: 0.7089669538448695, iteration: 292497
loss: 0.9792958498001099,grad_norm: 0.8086706235397301, iteration: 292498
loss: 1.0636883974075317,grad_norm: 0.999999383937022, iteration: 292499
loss: 1.0538763999938965,grad_norm: 0.9999995428019259, iteration: 292500
loss: 1.0412994623184204,grad_norm: 0.9999999718022422, iteration: 292501
loss: 0.9435640573501587,grad_norm: 0.8984015112609054, iteration: 292502
loss: 1.0233527421951294,grad_norm: 0.9999998878035928, iteration: 292503
loss: 1.014719009399414,grad_norm: 0.7224583759955125, iteration: 292504
loss: 1.00253164768219,grad_norm: 0.9999992376670059, iteration: 292505
loss: 1.0186913013458252,grad_norm: 0.9999991869714836, iteration: 292506
loss: 1.1592013835906982,grad_norm: 0.9999996020575141, iteration: 292507
loss: 1.0281343460083008,grad_norm: 0.9472277934752505, iteration: 292508
loss: 1.0505625009536743,grad_norm: 0.9999998957845191, iteration: 292509
loss: 1.0037447214126587,grad_norm: 0.9999990390677932, iteration: 292510
loss: 1.0255780220031738,grad_norm: 0.8682141636941697, iteration: 292511
loss: 1.0796897411346436,grad_norm: 0.9999993723503849, iteration: 292512
loss: 1.0171217918395996,grad_norm: 0.7717097121836534, iteration: 292513
loss: 1.0270087718963623,grad_norm: 0.9999997651037772, iteration: 292514
loss: 0.9611881971359253,grad_norm: 0.9563973638490121, iteration: 292515
loss: 1.019250512123108,grad_norm: 0.9999989819855214, iteration: 292516
loss: 1.0176739692687988,grad_norm: 0.8893637228933209, iteration: 292517
loss: 1.0189104080200195,grad_norm: 0.9999997613793163, iteration: 292518
loss: 0.9732106924057007,grad_norm: 0.8004701445039946, iteration: 292519
loss: 1.0136343240737915,grad_norm: 0.9732595953507593, iteration: 292520
loss: 0.9753832817077637,grad_norm: 0.8690978921404331, iteration: 292521
loss: 0.9896507859230042,grad_norm: 0.9999991789424658, iteration: 292522
loss: 0.9564709067344666,grad_norm: 0.8179262007906253, iteration: 292523
loss: 0.9772269129753113,grad_norm: 0.9999994330932257, iteration: 292524
loss: 0.9971771240234375,grad_norm: 0.8709462328332013, iteration: 292525
loss: 1.0050277709960938,grad_norm: 0.9169351904843894, iteration: 292526
loss: 0.9810056686401367,grad_norm: 0.7530335708169765, iteration: 292527
loss: 1.000591516494751,grad_norm: 0.939842947121326, iteration: 292528
loss: 0.9662922024726868,grad_norm: 0.7787789944566208, iteration: 292529
loss: 1.1140244007110596,grad_norm: 0.9999996331565547, iteration: 292530
loss: 0.9757224917411804,grad_norm: 0.9999993759021428, iteration: 292531
loss: 1.0018348693847656,grad_norm: 0.8017487041432025, iteration: 292532
loss: 1.0280680656433105,grad_norm: 0.7756058762282869, iteration: 292533
loss: 1.0089248418807983,grad_norm: 0.9591706030611139, iteration: 292534
loss: 1.0826185941696167,grad_norm: 0.999999829359772, iteration: 292535
loss: 1.0162098407745361,grad_norm: 0.8507452134523011, iteration: 292536
loss: 1.0076355934143066,grad_norm: 0.9999992772030072, iteration: 292537
loss: 1.0083764791488647,grad_norm: 0.8233387544727017, iteration: 292538
loss: 1.0122722387313843,grad_norm: 0.9497630231626101, iteration: 292539
loss: 1.0014878511428833,grad_norm: 0.8110410981131119, iteration: 292540
loss: 1.0100679397583008,grad_norm: 0.7950743991807087, iteration: 292541
loss: 1.0026167631149292,grad_norm: 0.8415800982369099, iteration: 292542
loss: 0.974088728427887,grad_norm: 0.7809239655236734, iteration: 292543
loss: 1.0088468790054321,grad_norm: 0.9999991566175528, iteration: 292544
loss: 0.9993053078651428,grad_norm: 0.796217160142356, iteration: 292545
loss: 1.0535863637924194,grad_norm: 0.8958042346695085, iteration: 292546
loss: 1.0087478160858154,grad_norm: 0.7959136322505947, iteration: 292547
loss: 1.0221428871154785,grad_norm: 0.8968637551011069, iteration: 292548
loss: 0.9672775864601135,grad_norm: 0.8226062087284038, iteration: 292549
loss: 1.0063532590866089,grad_norm: 0.8138840717516077, iteration: 292550
loss: 1.0005948543548584,grad_norm: 0.9932598979642654, iteration: 292551
loss: 0.9980925917625427,grad_norm: 0.8729435864588174, iteration: 292552
loss: 1.0096172094345093,grad_norm: 0.8427844497369928, iteration: 292553
loss: 0.9449952840805054,grad_norm: 0.7209645945625401, iteration: 292554
loss: 1.023017406463623,grad_norm: 0.8277267139917651, iteration: 292555
loss: 1.0174884796142578,grad_norm: 0.99999920108794, iteration: 292556
loss: 1.0096209049224854,grad_norm: 0.9999990933018545, iteration: 292557
loss: 1.0540059804916382,grad_norm: 0.9999993735804107, iteration: 292558
loss: 1.0779707431793213,grad_norm: 0.9954424485326138, iteration: 292559
loss: 0.9975900053977966,grad_norm: 0.838933321195884, iteration: 292560
loss: 1.0113886594772339,grad_norm: 0.8465810509326608, iteration: 292561
loss: 1.0018010139465332,grad_norm: 0.9055462902548065, iteration: 292562
loss: 1.0443482398986816,grad_norm: 0.999999172438373, iteration: 292563
loss: 1.0773431062698364,grad_norm: 0.9999998271551366, iteration: 292564
loss: 0.9675236344337463,grad_norm: 0.7793475786762871, iteration: 292565
loss: 1.0027951002120972,grad_norm: 0.9999993360935182, iteration: 292566
loss: 0.9820572137832642,grad_norm: 0.6490846983575154, iteration: 292567
loss: 1.002744436264038,grad_norm: 0.9999989702037422, iteration: 292568
loss: 0.9771040081977844,grad_norm: 0.8053683970479998, iteration: 292569
loss: 0.9898754954338074,grad_norm: 0.9999993014636579, iteration: 292570
loss: 1.0095239877700806,grad_norm: 0.8588241497447922, iteration: 292571
loss: 1.0421273708343506,grad_norm: 0.985251713062757, iteration: 292572
loss: 1.0070170164108276,grad_norm: 0.7726930757684097, iteration: 292573
loss: 1.0281009674072266,grad_norm: 0.7576917521818332, iteration: 292574
loss: 1.0025285482406616,grad_norm: 0.950561066378059, iteration: 292575
loss: 0.9707615375518799,grad_norm: 0.9871031543561898, iteration: 292576
loss: 1.0069221258163452,grad_norm: 0.8287131454510914, iteration: 292577
loss: 0.9732271432876587,grad_norm: 0.9030707420294535, iteration: 292578
loss: 0.9783807992935181,grad_norm: 0.753375301443745, iteration: 292579
loss: 1.0045901536941528,grad_norm: 0.7251360785711621, iteration: 292580
loss: 1.033705234527588,grad_norm: 0.9081706361461412, iteration: 292581
loss: 1.01584792137146,grad_norm: 0.9999994121819072, iteration: 292582
loss: 0.9793581366539001,grad_norm: 0.960873383595568, iteration: 292583
loss: 0.9752141237258911,grad_norm: 0.8605555805717156, iteration: 292584
loss: 1.0330634117126465,grad_norm: 0.9999996986776526, iteration: 292585
loss: 1.1077579259872437,grad_norm: 0.9999999487194967, iteration: 292586
loss: 0.9943277835845947,grad_norm: 0.8654721978674412, iteration: 292587
loss: 1.0018928050994873,grad_norm: 0.8669448575761831, iteration: 292588
loss: 1.0373719930648804,grad_norm: 0.9999998852460354, iteration: 292589
loss: 0.9699599146842957,grad_norm: 0.9999991011757775, iteration: 292590
loss: 1.231347680091858,grad_norm: 0.9999998673969477, iteration: 292591
loss: 1.0201423168182373,grad_norm: 0.7728189276654454, iteration: 292592
loss: 1.0101101398468018,grad_norm: 0.9193801572715821, iteration: 292593
loss: 1.003949522972107,grad_norm: 0.8329350618484136, iteration: 292594
loss: 0.9861593842506409,grad_norm: 0.7440777100643514, iteration: 292595
loss: 0.99605792760849,grad_norm: 0.8384273223427456, iteration: 292596
loss: 0.9640285968780518,grad_norm: 0.6664855158226091, iteration: 292597
loss: 1.0258570909500122,grad_norm: 0.7873421686940544, iteration: 292598
loss: 1.014288067817688,grad_norm: 0.9999992185342844, iteration: 292599
loss: 1.0257173776626587,grad_norm: 0.773966222088885, iteration: 292600
loss: 0.9755343198776245,grad_norm: 0.9123660734334684, iteration: 292601
loss: 1.0100445747375488,grad_norm: 0.9411977980380316, iteration: 292602
loss: 0.9495725631713867,grad_norm: 0.753488976932619, iteration: 292603
loss: 0.991180419921875,grad_norm: 0.7867122635878204, iteration: 292604
loss: 0.9812542200088501,grad_norm: 0.8240562782030724, iteration: 292605
loss: 0.9922819137573242,grad_norm: 0.8456333088515608, iteration: 292606
loss: 1.0284582376480103,grad_norm: 0.7811716212254197, iteration: 292607
loss: 0.9919000864028931,grad_norm: 0.7701748376822588, iteration: 292608
loss: 1.0145854949951172,grad_norm: 0.8861453273353753, iteration: 292609
loss: 1.016476035118103,grad_norm: 0.7303578049502235, iteration: 292610
loss: 1.0135332345962524,grad_norm: 0.8368139299731326, iteration: 292611
loss: 1.0148295164108276,grad_norm: 0.9233770840256357, iteration: 292612
loss: 1.0013294219970703,grad_norm: 0.9871940243371001, iteration: 292613
loss: 0.9851297736167908,grad_norm: 0.9132909451081548, iteration: 292614
loss: 1.0413641929626465,grad_norm: 0.9999989911279747, iteration: 292615
loss: 0.9735578894615173,grad_norm: 0.9999991550174945, iteration: 292616
loss: 0.9916749000549316,grad_norm: 0.8860542324686252, iteration: 292617
loss: 0.9642957448959351,grad_norm: 0.8008438157958913, iteration: 292618
loss: 0.9968662261962891,grad_norm: 0.8502451837599148, iteration: 292619
loss: 1.0299285650253296,grad_norm: 0.828193274312295, iteration: 292620
loss: 0.9848096966743469,grad_norm: 0.8937622667562135, iteration: 292621
loss: 0.9966685771942139,grad_norm: 0.8264833194913759, iteration: 292622
loss: 1.0275036096572876,grad_norm: 0.999999569942652, iteration: 292623
loss: 0.9506460428237915,grad_norm: 0.963318970767873, iteration: 292624
loss: 1.0011777877807617,grad_norm: 0.8328532380571397, iteration: 292625
loss: 1.0075361728668213,grad_norm: 0.9882319710409094, iteration: 292626
loss: 1.0096139907836914,grad_norm: 0.7563102187813018, iteration: 292627
loss: 0.9835479855537415,grad_norm: 0.9286364578675067, iteration: 292628
loss: 1.0067763328552246,grad_norm: 0.8320634779890226, iteration: 292629
loss: 1.087861180305481,grad_norm: 0.9999990672321942, iteration: 292630
loss: 0.9656609296798706,grad_norm: 0.8891323500094441, iteration: 292631
loss: 0.9880118370056152,grad_norm: 0.9240672766068524, iteration: 292632
loss: 1.0499365329742432,grad_norm: 0.9999994545704318, iteration: 292633
loss: 0.9712429642677307,grad_norm: 0.8034609393152125, iteration: 292634
loss: 0.9967398643493652,grad_norm: 0.9583978408667919, iteration: 292635
loss: 1.0057426691055298,grad_norm: 0.8685525434333837, iteration: 292636
loss: 0.987828254699707,grad_norm: 0.8380874656206192, iteration: 292637
loss: 1.0139538049697876,grad_norm: 0.8109466120788831, iteration: 292638
loss: 0.9898596405982971,grad_norm: 0.8743349013244107, iteration: 292639
loss: 0.9947957992553711,grad_norm: 0.8885327542650714, iteration: 292640
loss: 1.0471101999282837,grad_norm: 0.9999996111297372, iteration: 292641
loss: 1.0097200870513916,grad_norm: 0.9999991004520231, iteration: 292642
loss: 0.984369695186615,grad_norm: 0.7796284796867606, iteration: 292643
loss: 0.9785799384117126,grad_norm: 0.8430891135894555, iteration: 292644
loss: 0.9865816831588745,grad_norm: 0.9586838138764743, iteration: 292645
loss: 1.0351823568344116,grad_norm: 0.8584828880212355, iteration: 292646
loss: 0.9995468258857727,grad_norm: 0.878696350547756, iteration: 292647
loss: 1.0053982734680176,grad_norm: 0.90598995343399, iteration: 292648
loss: 1.1372849941253662,grad_norm: 0.9999997108021542, iteration: 292649
loss: 1.0020267963409424,grad_norm: 0.8858691896110584, iteration: 292650
loss: 1.0125257968902588,grad_norm: 0.9999991711927032, iteration: 292651
loss: 0.9879730343818665,grad_norm: 0.9578735253744141, iteration: 292652
loss: 1.0015228986740112,grad_norm: 0.999999385527189, iteration: 292653
loss: 0.9835401773452759,grad_norm: 0.8864964262985751, iteration: 292654
loss: 1.0083935260772705,grad_norm: 0.9999990559625171, iteration: 292655
loss: 0.9851681590080261,grad_norm: 0.9999991211242102, iteration: 292656
loss: 0.9778029322624207,grad_norm: 0.8033764313935319, iteration: 292657
loss: 1.0043011903762817,grad_norm: 0.916374542880046, iteration: 292658
loss: 0.9951043128967285,grad_norm: 0.9267810132906509, iteration: 292659
loss: 1.0932183265686035,grad_norm: 0.9999991144822513, iteration: 292660
loss: 1.0243639945983887,grad_norm: 0.9896830188404805, iteration: 292661
loss: 1.0101556777954102,grad_norm: 0.7825324762239181, iteration: 292662
loss: 0.9651563763618469,grad_norm: 0.709983337142553, iteration: 292663
loss: 1.0080761909484863,grad_norm: 0.8237594639636608, iteration: 292664
loss: 1.0042872428894043,grad_norm: 0.7744842669893655, iteration: 292665
loss: 0.995191216468811,grad_norm: 0.9100088313859295, iteration: 292666
loss: 0.9817889332771301,grad_norm: 0.7553132934235957, iteration: 292667
loss: 0.997603178024292,grad_norm: 0.7208887605844084, iteration: 292668
loss: 1.007633090019226,grad_norm: 0.9821344868220264, iteration: 292669
loss: 0.9926298260688782,grad_norm: 0.9631243161797155, iteration: 292670
loss: 1.0261858701705933,grad_norm: 0.7965756029775289, iteration: 292671
loss: 0.9924170970916748,grad_norm: 0.9999991240353421, iteration: 292672
loss: 1.0136852264404297,grad_norm: 0.8082117128912831, iteration: 292673
loss: 1.0553961992263794,grad_norm: 0.9999997840854667, iteration: 292674
loss: 0.9735086560249329,grad_norm: 0.9999990111950863, iteration: 292675
loss: 0.9611015319824219,grad_norm: 0.8824209358267023, iteration: 292676
loss: 0.9968143701553345,grad_norm: 0.7691526607548029, iteration: 292677
loss: 0.982119619846344,grad_norm: 0.883363836262575, iteration: 292678
loss: 0.9794538021087646,grad_norm: 0.730671547235994, iteration: 292679
loss: 0.9869293570518494,grad_norm: 0.9423652091128308, iteration: 292680
loss: 1.0184311866760254,grad_norm: 0.9311912902864714, iteration: 292681
loss: 1.0246021747589111,grad_norm: 0.894509744220568, iteration: 292682
loss: 0.9538975954055786,grad_norm: 0.8868563031127077, iteration: 292683
loss: 1.0064183473587036,grad_norm: 0.8560561121124344, iteration: 292684
loss: 1.0026785135269165,grad_norm: 0.9999995220090924, iteration: 292685
loss: 1.0132375955581665,grad_norm: 0.7842530473118188, iteration: 292686
loss: 1.00698721408844,grad_norm: 0.8709585609909956, iteration: 292687
loss: 0.9865414500236511,grad_norm: 0.7763538074328443, iteration: 292688
loss: 0.9814738035202026,grad_norm: 0.8422864048794437, iteration: 292689
loss: 1.03617262840271,grad_norm: 0.9999990627211818, iteration: 292690
loss: 0.9791702032089233,grad_norm: 0.8750929974763998, iteration: 292691
loss: 1.0290124416351318,grad_norm: 0.9082858791445804, iteration: 292692
loss: 0.9647786617279053,grad_norm: 0.9137674750949247, iteration: 292693
loss: 1.010154128074646,grad_norm: 0.9280807090877229, iteration: 292694
loss: 0.9610462784767151,grad_norm: 0.9068492350779678, iteration: 292695
loss: 0.9567077159881592,grad_norm: 0.7260323663566964, iteration: 292696
loss: 0.9625656604766846,grad_norm: 0.8792194063713065, iteration: 292697
loss: 0.984178900718689,grad_norm: 0.8249082914603258, iteration: 292698
loss: 0.9648540616035461,grad_norm: 0.777298784918189, iteration: 292699
loss: 1.0342745780944824,grad_norm: 0.8339795822465993, iteration: 292700
loss: 1.0251471996307373,grad_norm: 0.9999997898362105, iteration: 292701
loss: 1.022608757019043,grad_norm: 0.8648432153420711, iteration: 292702
loss: 1.0091583728790283,grad_norm: 0.787301973038913, iteration: 292703
loss: 1.0040372610092163,grad_norm: 0.8595026524051528, iteration: 292704
loss: 1.0093470811843872,grad_norm: 0.6822645543329201, iteration: 292705
loss: 1.0407018661499023,grad_norm: 0.890860875185301, iteration: 292706
loss: 1.0818623304367065,grad_norm: 0.932616699135596, iteration: 292707
loss: 1.0183652639389038,grad_norm: 0.7639576715198537, iteration: 292708
loss: 1.048741102218628,grad_norm: 0.8814519912197412, iteration: 292709
loss: 0.994647741317749,grad_norm: 0.9338626204179286, iteration: 292710
loss: 1.0072426795959473,grad_norm: 0.9001794051351502, iteration: 292711
loss: 1.000993251800537,grad_norm: 0.9390622329675724, iteration: 292712
loss: 0.9937595725059509,grad_norm: 0.9999996763677205, iteration: 292713
loss: 0.986546516418457,grad_norm: 0.9473686407951413, iteration: 292714
loss: 0.9979187846183777,grad_norm: 0.8976963862792974, iteration: 292715
loss: 1.0285354852676392,grad_norm: 0.8168789367240336, iteration: 292716
loss: 0.9635250568389893,grad_norm: 0.9181234721625272, iteration: 292717
loss: 1.0135841369628906,grad_norm: 0.9230613201532346, iteration: 292718
loss: 0.9196237325668335,grad_norm: 0.8848134064301046, iteration: 292719
loss: 1.0221842527389526,grad_norm: 0.9281782409354714, iteration: 292720
loss: 1.0331283807754517,grad_norm: 0.8834419296194406, iteration: 292721
loss: 0.9845247864723206,grad_norm: 0.8546657044573961, iteration: 292722
loss: 0.9856148362159729,grad_norm: 0.9110296268562902, iteration: 292723
loss: 0.9913080930709839,grad_norm: 0.8443762072784071, iteration: 292724
loss: 0.9880923628807068,grad_norm: 0.9999989993212138, iteration: 292725
loss: 0.9804080128669739,grad_norm: 0.7625678672766114, iteration: 292726
loss: 1.0413494110107422,grad_norm: 0.9018661547307846, iteration: 292727
loss: 0.9904274344444275,grad_norm: 0.9999991064972025, iteration: 292728
loss: 0.9828911423683167,grad_norm: 0.7282376386626382, iteration: 292729
loss: 0.9767001271247864,grad_norm: 0.9605806365921548, iteration: 292730
loss: 1.0285648107528687,grad_norm: 0.985721045595998, iteration: 292731
loss: 1.0026947259902954,grad_norm: 0.8564434216099874, iteration: 292732
loss: 0.9808923602104187,grad_norm: 0.7737419324121048, iteration: 292733
loss: 1.0043437480926514,grad_norm: 0.9696635340391971, iteration: 292734
loss: 0.9864318370819092,grad_norm: 0.9999989811509722, iteration: 292735
loss: 0.9612652063369751,grad_norm: 0.880688616738724, iteration: 292736
loss: 0.9950238466262817,grad_norm: 0.8350171420695692, iteration: 292737
loss: 0.9955275654792786,grad_norm: 0.720078891274336, iteration: 292738
loss: 1.0110937356948853,grad_norm: 0.8517311113354737, iteration: 292739
loss: 1.014462947845459,grad_norm: 0.7592888056784328, iteration: 292740
loss: 1.0141165256500244,grad_norm: 0.9462257374337945, iteration: 292741
loss: 0.989830493927002,grad_norm: 0.7510638975890607, iteration: 292742
loss: 1.0299656391143799,grad_norm: 0.9999992602122744, iteration: 292743
loss: 0.9955904483795166,grad_norm: 0.9454358296863576, iteration: 292744
loss: 1.0354199409484863,grad_norm: 0.783146042802487, iteration: 292745
loss: 0.9990524053573608,grad_norm: 0.83685202395713, iteration: 292746
loss: 1.0090991258621216,grad_norm: 0.7948997021483888, iteration: 292747
loss: 0.9960998296737671,grad_norm: 0.7799576090788527, iteration: 292748
loss: 1.0045267343521118,grad_norm: 0.9321208917721335, iteration: 292749
loss: 0.9795039296150208,grad_norm: 0.813968642419059, iteration: 292750
loss: 1.0046740770339966,grad_norm: 0.9999991318962143, iteration: 292751
loss: 0.9781994819641113,grad_norm: 0.8056782789435835, iteration: 292752
loss: 1.0222628116607666,grad_norm: 0.8507483694006691, iteration: 292753
loss: 1.0186495780944824,grad_norm: 0.7979709378763059, iteration: 292754
loss: 0.9665712118148804,grad_norm: 0.8125941890142104, iteration: 292755
loss: 0.9999386072158813,grad_norm: 0.7207515209057791, iteration: 292756
loss: 1.0199447870254517,grad_norm: 0.7866608891021072, iteration: 292757
loss: 1.0343033075332642,grad_norm: 0.9497836555931206, iteration: 292758
loss: 0.999600350856781,grad_norm: 0.9112708156322358, iteration: 292759
loss: 1.0014605522155762,grad_norm: 0.8004097651530324, iteration: 292760
loss: 0.9717921018600464,grad_norm: 0.7095357390215565, iteration: 292761
loss: 1.0219210386276245,grad_norm: 0.8304547832218044, iteration: 292762
loss: 0.9967838525772095,grad_norm: 0.9080515309118927, iteration: 292763
loss: 1.0092846155166626,grad_norm: 0.7592263218748138, iteration: 292764
loss: 1.0956206321716309,grad_norm: 0.9999997413758015, iteration: 292765
loss: 1.0540165901184082,grad_norm: 0.8114622738547704, iteration: 292766
loss: 1.0253584384918213,grad_norm: 0.999999128027226, iteration: 292767
loss: 0.9764194488525391,grad_norm: 0.9701425403424663, iteration: 292768
loss: 0.9710489511489868,grad_norm: 0.9919220022051858, iteration: 292769
loss: 0.990623414516449,grad_norm: 0.8355904156918558, iteration: 292770
loss: 0.9931718707084656,grad_norm: 0.7559290248974999, iteration: 292771
loss: 0.972783625125885,grad_norm: 0.7810896859970502, iteration: 292772
loss: 0.9918650388717651,grad_norm: 0.826013325566494, iteration: 292773
loss: 1.003684401512146,grad_norm: 0.8631658492325484, iteration: 292774
loss: 1.033022403717041,grad_norm: 0.9393373937910507, iteration: 292775
loss: 1.0020711421966553,grad_norm: 0.9444387913457318, iteration: 292776
loss: 1.0212316513061523,grad_norm: 0.9198703467840537, iteration: 292777
loss: 0.9987933039665222,grad_norm: 0.8511697755261252, iteration: 292778
loss: 1.021822214126587,grad_norm: 0.8595583959867662, iteration: 292779
loss: 0.9971431493759155,grad_norm: 0.7976704148288776, iteration: 292780
loss: 0.9569883942604065,grad_norm: 0.7878523336038595, iteration: 292781
loss: 1.0117683410644531,grad_norm: 0.8495412583570792, iteration: 292782
loss: 1.0028252601623535,grad_norm: 0.8316522347823431, iteration: 292783
loss: 0.9944531917572021,grad_norm: 0.8811630364986593, iteration: 292784
loss: 1.0150970220565796,grad_norm: 0.8151578577398402, iteration: 292785
loss: 0.9615097045898438,grad_norm: 0.7754308083956324, iteration: 292786
loss: 0.9936055541038513,grad_norm: 0.8785054647791213, iteration: 292787
loss: 1.0309275388717651,grad_norm: 0.7596169064844644, iteration: 292788
loss: 0.9700115919113159,grad_norm: 0.7193776880083091, iteration: 292789
loss: 1.0018656253814697,grad_norm: 0.999999132683466, iteration: 292790
loss: 0.9579348564147949,grad_norm: 0.9205363333070258, iteration: 292791
loss: 1.0072370767593384,grad_norm: 0.8398554475885679, iteration: 292792
loss: 1.0067481994628906,grad_norm: 0.7457958622903551, iteration: 292793
loss: 1.0050448179244995,grad_norm: 0.8261713415078479, iteration: 292794
loss: 0.9567670226097107,grad_norm: 0.9777706476879721, iteration: 292795
loss: 0.9907416701316833,grad_norm: 0.9228827405015936, iteration: 292796
loss: 1.025310754776001,grad_norm: 0.9999992147924804, iteration: 292797
loss: 1.0185267925262451,grad_norm: 0.9289696863998507, iteration: 292798
loss: 1.027214765548706,grad_norm: 0.9301263190579508, iteration: 292799
loss: 1.0055937767028809,grad_norm: 0.9111435684259739, iteration: 292800
loss: 1.0609914064407349,grad_norm: 0.9999994388861506, iteration: 292801
loss: 0.9950743913650513,grad_norm: 0.7630708404769699, iteration: 292802
loss: 1.02969229221344,grad_norm: 0.9994558334155449, iteration: 292803
loss: 1.0104589462280273,grad_norm: 0.9014606960658363, iteration: 292804
loss: 0.9943282604217529,grad_norm: 0.8271237035018086, iteration: 292805
loss: 0.9898525476455688,grad_norm: 0.8037160294975163, iteration: 292806
loss: 1.019155502319336,grad_norm: 0.8502087205851642, iteration: 292807
loss: 1.0516294240951538,grad_norm: 0.9999998299412564, iteration: 292808
loss: 0.9693037271499634,grad_norm: 0.7176735399399815, iteration: 292809
loss: 0.989094614982605,grad_norm: 0.8073873240711532, iteration: 292810
loss: 1.0089750289916992,grad_norm: 0.8317538313239317, iteration: 292811
loss: 1.0061235427856445,grad_norm: 0.8956050144167331, iteration: 292812
loss: 0.9777722358703613,grad_norm: 0.9656533635486398, iteration: 292813
loss: 0.9684762358665466,grad_norm: 0.8606914708201046, iteration: 292814
loss: 0.9869755506515503,grad_norm: 0.8111629106969681, iteration: 292815
loss: 1.0214803218841553,grad_norm: 0.9999992174175621, iteration: 292816
loss: 1.0137619972229004,grad_norm: 0.8488231603888752, iteration: 292817
loss: 0.9803916215896606,grad_norm: 0.9238793707469166, iteration: 292818
loss: 1.017556071281433,grad_norm: 0.7171652971290653, iteration: 292819
loss: 0.9787647128105164,grad_norm: 0.8783946958328145, iteration: 292820
loss: 0.9805001616477966,grad_norm: 0.9999990536475735, iteration: 292821
loss: 0.9912965893745422,grad_norm: 0.7493943732155769, iteration: 292822
loss: 1.0355461835861206,grad_norm: 0.9999994007887452, iteration: 292823
loss: 1.0134124755859375,grad_norm: 0.9335348159610989, iteration: 292824
loss: 1.00142240524292,grad_norm: 0.9418048087741414, iteration: 292825
loss: 0.9888293743133545,grad_norm: 0.9807626617522296, iteration: 292826
loss: 1.0157344341278076,grad_norm: 0.9714924402172357, iteration: 292827
loss: 1.019314169883728,grad_norm: 0.8357868238641245, iteration: 292828
loss: 1.006335735321045,grad_norm: 0.9586800654851725, iteration: 292829
loss: 1.0136662721633911,grad_norm: 0.7374642654220869, iteration: 292830
loss: 1.00437593460083,grad_norm: 0.7852532334263798, iteration: 292831
loss: 1.022804856300354,grad_norm: 0.99999893779602, iteration: 292832
loss: 1.1059719324111938,grad_norm: 0.9999997148015741, iteration: 292833
loss: 1.0150903463363647,grad_norm: 0.9999992804238945, iteration: 292834
loss: 1.0311341285705566,grad_norm: 0.9999995663859697, iteration: 292835
loss: 0.9906859397888184,grad_norm: 0.9999995223483925, iteration: 292836
loss: 0.9864990711212158,grad_norm: 0.9999989465909602, iteration: 292837
loss: 1.0195343494415283,grad_norm: 0.9103142421079935, iteration: 292838
loss: 0.97959303855896,grad_norm: 0.7834186864509891, iteration: 292839
loss: 0.9756886959075928,grad_norm: 0.7344978475772291, iteration: 292840
loss: 1.0434612035751343,grad_norm: 0.7905154861450211, iteration: 292841
loss: 1.1395596265792847,grad_norm: 0.9999998798764801, iteration: 292842
loss: 1.0249288082122803,grad_norm: 0.827338053838896, iteration: 292843
loss: 1.0100563764572144,grad_norm: 0.936851675099665, iteration: 292844
loss: 0.97367262840271,grad_norm: 0.6612927329144261, iteration: 292845
loss: 0.9749459624290466,grad_norm: 0.7554048030635504, iteration: 292846
loss: 1.0150566101074219,grad_norm: 0.9949989440807933, iteration: 292847
loss: 1.0067976713180542,grad_norm: 0.8095410795165474, iteration: 292848
loss: 1.0405199527740479,grad_norm: 0.9316353453134818, iteration: 292849
loss: 1.0293484926223755,grad_norm: 0.7984237272125729, iteration: 292850
loss: 1.0144997835159302,grad_norm: 0.9999990662137135, iteration: 292851
loss: 0.9763343930244446,grad_norm: 0.8480267730278034, iteration: 292852
loss: 0.9710091948509216,grad_norm: 0.8398729580327873, iteration: 292853
loss: 0.9918105006217957,grad_norm: 0.7209418419693617, iteration: 292854
loss: 0.9981858134269714,grad_norm: 0.9589627020600812, iteration: 292855
loss: 0.9923032522201538,grad_norm: 0.8967015296119899, iteration: 292856
loss: 0.9992219805717468,grad_norm: 0.9999995085260144, iteration: 292857
loss: 1.0047706365585327,grad_norm: 0.8484068954443831, iteration: 292858
loss: 0.9770301580429077,grad_norm: 0.7367350770718166, iteration: 292859
loss: 1.0168354511260986,grad_norm: 0.9239799022662045, iteration: 292860
loss: 0.96162348985672,grad_norm: 0.999999094829131, iteration: 292861
loss: 1.033684492111206,grad_norm: 0.8422751326700147, iteration: 292862
loss: 1.0235594511032104,grad_norm: 0.919650611064384, iteration: 292863
loss: 1.0206221342086792,grad_norm: 0.8733214873290938, iteration: 292864
loss: 1.0080937147140503,grad_norm: 0.7851648911461376, iteration: 292865
loss: 1.0162067413330078,grad_norm: 0.6651866874307727, iteration: 292866
loss: 1.0186481475830078,grad_norm: 0.6785041037855417, iteration: 292867
loss: 1.0094716548919678,grad_norm: 0.7249072260232028, iteration: 292868
loss: 0.9915696978569031,grad_norm: 0.7780260189192405, iteration: 292869
loss: 1.0157766342163086,grad_norm: 0.9337018083289677, iteration: 292870
loss: 0.983482837677002,grad_norm: 0.9297787659495461, iteration: 292871
loss: 1.0058350563049316,grad_norm: 0.7027976874517401, iteration: 292872
loss: 0.9828243255615234,grad_norm: 0.8153912263491948, iteration: 292873
loss: 1.0031808614730835,grad_norm: 0.8819368788214003, iteration: 292874
loss: 0.9787653684616089,grad_norm: 0.9999989969260016, iteration: 292875
loss: 1.0156657695770264,grad_norm: 0.8400091166914192, iteration: 292876
loss: 1.018222451210022,grad_norm: 0.9999996637422481, iteration: 292877
loss: 1.0222092866897583,grad_norm: 0.748611295963585, iteration: 292878
loss: 0.9974789619445801,grad_norm: 0.8311425872640367, iteration: 292879
loss: 1.0113296508789062,grad_norm: 0.8387306632139981, iteration: 292880
loss: 1.0421087741851807,grad_norm: 0.9428834327199505, iteration: 292881
loss: 1.0081806182861328,grad_norm: 0.8109723589419854, iteration: 292882
loss: 1.0060875415802002,grad_norm: 0.8371305449053936, iteration: 292883
loss: 0.9790939688682556,grad_norm: 0.8038800240898292, iteration: 292884
loss: 1.0108801126480103,grad_norm: 0.9032683143256252, iteration: 292885
loss: 1.0256340503692627,grad_norm: 0.9948389651590591, iteration: 292886
loss: 0.9700974822044373,grad_norm: 0.9106109896792542, iteration: 292887
loss: 1.018540382385254,grad_norm: 0.9999997008645325, iteration: 292888
loss: 0.9948972463607788,grad_norm: 0.9654506382247073, iteration: 292889
loss: 1.0003948211669922,grad_norm: 0.8864159702636579, iteration: 292890
loss: 0.9707162976264954,grad_norm: 0.8390771702412321, iteration: 292891
loss: 0.9876898527145386,grad_norm: 0.7630122758727834, iteration: 292892
loss: 1.0154014825820923,grad_norm: 0.8518231793222515, iteration: 292893
loss: 0.9582694172859192,grad_norm: 0.8919228639571152, iteration: 292894
loss: 0.9855456352233887,grad_norm: 0.9999989894423752, iteration: 292895
loss: 0.999813973903656,grad_norm: 0.9528954565341536, iteration: 292896
loss: 0.9885043501853943,grad_norm: 0.712759223784489, iteration: 292897
loss: 0.9925222396850586,grad_norm: 0.7604454952883531, iteration: 292898
loss: 1.0113486051559448,grad_norm: 0.7877778376472808, iteration: 292899
loss: 1.0235213041305542,grad_norm: 0.9999991929223309, iteration: 292900
loss: 1.002028226852417,grad_norm: 0.8629409294943733, iteration: 292901
loss: 0.9866518974304199,grad_norm: 0.8570537930091536, iteration: 292902
loss: 0.9979202747344971,grad_norm: 0.9188683827829098, iteration: 292903
loss: 0.9849918484687805,grad_norm: 0.8549617597548028, iteration: 292904
loss: 1.0676378011703491,grad_norm: 0.7655454829820306, iteration: 292905
loss: 1.0082643032073975,grad_norm: 0.9999995764916839, iteration: 292906
loss: 0.9980356097221375,grad_norm: 0.7070189524235745, iteration: 292907
loss: 1.0071125030517578,grad_norm: 0.917027278628422, iteration: 292908
loss: 1.057697057723999,grad_norm: 0.999999544065256, iteration: 292909
loss: 1.0223180055618286,grad_norm: 0.7811933063680396, iteration: 292910
loss: 1.0024281740188599,grad_norm: 0.9440444985386891, iteration: 292911
loss: 0.9908978939056396,grad_norm: 0.8847027856832825, iteration: 292912
loss: 1.011852502822876,grad_norm: 0.9978911453659614, iteration: 292913
loss: 1.027590274810791,grad_norm: 0.8182693146349791, iteration: 292914
loss: 1.0052597522735596,grad_norm: 0.7077149276945247, iteration: 292915
loss: 0.9936696290969849,grad_norm: 0.9620182494557682, iteration: 292916
loss: 1.0016658306121826,grad_norm: 0.9073798615882879, iteration: 292917
loss: 1.0162478685379028,grad_norm: 0.8671251258585388, iteration: 292918
loss: 1.006024718284607,grad_norm: 0.7583398978042163, iteration: 292919
loss: 0.9810426235198975,grad_norm: 0.8590333842732523, iteration: 292920
loss: 1.0146057605743408,grad_norm: 0.8056103529385108, iteration: 292921
loss: 1.0207798480987549,grad_norm: 0.8012935704577223, iteration: 292922
loss: 1.0131224393844604,grad_norm: 0.999999279744227, iteration: 292923
loss: 1.027298092842102,grad_norm: 0.9060597952409969, iteration: 292924
loss: 1.0806552171707153,grad_norm: 0.9064683050571621, iteration: 292925
loss: 0.9717442393302917,grad_norm: 0.8903770641877511, iteration: 292926
loss: 0.9506960511207581,grad_norm: 0.7736834485710339, iteration: 292927
loss: 1.024971842765808,grad_norm: 0.9999993940373235, iteration: 292928
loss: 0.9977360963821411,grad_norm: 0.9196797723452459, iteration: 292929
loss: 0.9842232465744019,grad_norm: 0.722832762754616, iteration: 292930
loss: 0.9827902317047119,grad_norm: 0.8635177453419435, iteration: 292931
loss: 0.9651328921318054,grad_norm: 0.8063453396482579, iteration: 292932
loss: 1.0475376844406128,grad_norm: 0.9999990047142887, iteration: 292933
loss: 1.0142194032669067,grad_norm: 0.9536207408222087, iteration: 292934
loss: 1.0287938117980957,grad_norm: 0.9999997158696937, iteration: 292935
loss: 0.9706582427024841,grad_norm: 0.8830690760011678, iteration: 292936
loss: 0.9641750454902649,grad_norm: 0.8428864531338185, iteration: 292937
loss: 1.027274250984192,grad_norm: 0.9999989220002874, iteration: 292938
loss: 1.1225792169570923,grad_norm: 1.0000000302223109, iteration: 292939
loss: 1.0011680126190186,grad_norm: 0.8035314652610408, iteration: 292940
loss: 1.0339608192443848,grad_norm: 0.9999993165036897, iteration: 292941
loss: 0.9666287302970886,grad_norm: 0.8035024960722732, iteration: 292942
loss: 1.0284919738769531,grad_norm: 0.9999997495726385, iteration: 292943
loss: 0.9589576125144958,grad_norm: 0.8749581086483477, iteration: 292944
loss: 0.9853805303573608,grad_norm: 0.8177580997069752, iteration: 292945
loss: 0.9807568788528442,grad_norm: 0.694457206789595, iteration: 292946
loss: 1.0018682479858398,grad_norm: 0.8244662833864065, iteration: 292947
loss: 1.0032647848129272,grad_norm: 0.841209803674353, iteration: 292948
loss: 1.0350345373153687,grad_norm: 0.8999905522454765, iteration: 292949
loss: 0.9961637854576111,grad_norm: 0.8744404862131343, iteration: 292950
loss: 0.966624915599823,grad_norm: 0.8801186446119356, iteration: 292951
loss: 0.9973574280738831,grad_norm: 0.9999996701014046, iteration: 292952
loss: 1.0078727006912231,grad_norm: 0.9371643848212938, iteration: 292953
loss: 0.978980302810669,grad_norm: 0.7921539235223158, iteration: 292954
loss: 1.0296378135681152,grad_norm: 0.9729758364200968, iteration: 292955
loss: 0.9893059730529785,grad_norm: 0.8532177351479145, iteration: 292956
loss: 0.9886810779571533,grad_norm: 0.9950468897554142, iteration: 292957
loss: 1.103968620300293,grad_norm: 0.999999333834696, iteration: 292958
loss: 0.9764901995658875,grad_norm: 0.9999991006979447, iteration: 292959
loss: 0.9816660284996033,grad_norm: 0.7462509771240855, iteration: 292960
loss: 0.976265549659729,grad_norm: 0.8892826436440009, iteration: 292961
loss: 0.9693754315376282,grad_norm: 0.7699831176719123, iteration: 292962
loss: 0.9888238310813904,grad_norm: 0.8317877760107723, iteration: 292963
loss: 1.010826587677002,grad_norm: 0.8144346796028931, iteration: 292964
loss: 0.9996055960655212,grad_norm: 0.8223292905707438, iteration: 292965
loss: 0.9671172499656677,grad_norm: 0.9999999224696249, iteration: 292966
loss: 0.99188232421875,grad_norm: 0.9999989396002071, iteration: 292967
loss: 0.995134174823761,grad_norm: 0.9999991440792528, iteration: 292968
loss: 0.9650259017944336,grad_norm: 0.9149122131060109, iteration: 292969
loss: 1.0075523853302002,grad_norm: 0.7819096114879024, iteration: 292970
loss: 1.004178762435913,grad_norm: 0.9999991758572564, iteration: 292971
loss: 1.0056020021438599,grad_norm: 0.7792303731139695, iteration: 292972
loss: 1.001872181892395,grad_norm: 0.8557622496857522, iteration: 292973
loss: 0.983957827091217,grad_norm: 0.7148306404646548, iteration: 292974
loss: 1.004184365272522,grad_norm: 0.9326406722630072, iteration: 292975
loss: 0.9811311364173889,grad_norm: 0.922185164691801, iteration: 292976
loss: 0.9850603342056274,grad_norm: 0.746249933946885, iteration: 292977
loss: 1.0083495378494263,grad_norm: 0.8205966303455166, iteration: 292978
loss: 1.0063881874084473,grad_norm: 0.9999996095911864, iteration: 292979
loss: 1.006792664527893,grad_norm: 0.8344659629792892, iteration: 292980
loss: 1.012681245803833,grad_norm: 0.9999992025671076, iteration: 292981
loss: 0.9798715114593506,grad_norm: 0.9339753981438631, iteration: 292982
loss: 0.9837722778320312,grad_norm: 0.8409635406179209, iteration: 292983
loss: 1.0055732727050781,grad_norm: 0.8003480593151208, iteration: 292984
loss: 1.008414626121521,grad_norm: 0.9999990809211086, iteration: 292985
loss: 0.9999838471412659,grad_norm: 0.8074393935927501, iteration: 292986
loss: 0.9889551997184753,grad_norm: 0.7915424731732565, iteration: 292987
loss: 1.0007860660552979,grad_norm: 0.8210114236601377, iteration: 292988
loss: 0.9848671555519104,grad_norm: 0.9999997093828379, iteration: 292989
loss: 1.0003652572631836,grad_norm: 0.7105308832799554, iteration: 292990
loss: 0.9989713430404663,grad_norm: 0.750629343133198, iteration: 292991
loss: 0.9879668354988098,grad_norm: 0.8799371209957649, iteration: 292992
loss: 1.0045676231384277,grad_norm: 0.9010145159794853, iteration: 292993
loss: 0.9796689748764038,grad_norm: 0.8048460029623759, iteration: 292994
loss: 0.9795294404029846,grad_norm: 0.913412063512607, iteration: 292995
loss: 1.0044466257095337,grad_norm: 0.9473299130169225, iteration: 292996
loss: 1.0509806871414185,grad_norm: 0.9999994275218541, iteration: 292997
loss: 1.0112943649291992,grad_norm: 0.8745023036626521, iteration: 292998
loss: 0.9838533997535706,grad_norm: 0.8353901352087152, iteration: 292999
loss: 1.0346406698226929,grad_norm: 0.7433300250155275, iteration: 293000
loss: 1.0129868984222412,grad_norm: 0.9086967151321842, iteration: 293001
loss: 1.0233291387557983,grad_norm: 0.9999990683402037, iteration: 293002
loss: 0.9701948165893555,grad_norm: 0.8672529437492842, iteration: 293003
loss: 0.9822039604187012,grad_norm: 0.7295179050815649, iteration: 293004
loss: 0.9606544375419617,grad_norm: 0.8816887090413987, iteration: 293005
loss: 1.0014405250549316,grad_norm: 0.9999990337305747, iteration: 293006
loss: 0.9987648129463196,grad_norm: 0.8316343355730235, iteration: 293007
loss: 0.9908815026283264,grad_norm: 0.9999991788645577, iteration: 293008
loss: 1.0136172771453857,grad_norm: 0.8970840825667785, iteration: 293009
loss: 0.9804545044898987,grad_norm: 0.8022816930924589, iteration: 293010
loss: 0.9675095677375793,grad_norm: 0.827789526329755, iteration: 293011
loss: 0.9776229858398438,grad_norm: 0.9613956363769381, iteration: 293012
loss: 0.9890955090522766,grad_norm: 0.7802799236749058, iteration: 293013
loss: 1.0268341302871704,grad_norm: 0.9032359606880535, iteration: 293014
loss: 1.0012621879577637,grad_norm: 0.9222328770861923, iteration: 293015
loss: 1.0358880758285522,grad_norm: 0.9999997613585229, iteration: 293016
loss: 1.025526762008667,grad_norm: 0.8915553901522063, iteration: 293017
loss: 0.971729040145874,grad_norm: 0.855478874353678, iteration: 293018
loss: 0.9648032188415527,grad_norm: 0.998494076061379, iteration: 293019
loss: 1.0110222101211548,grad_norm: 0.8434313936326555, iteration: 293020
loss: 0.990000307559967,grad_norm: 0.920384386841282, iteration: 293021
loss: 0.959423303604126,grad_norm: 0.9856557671281082, iteration: 293022
loss: 1.0129032135009766,grad_norm: 0.9999989506740078, iteration: 293023
loss: 0.9904518723487854,grad_norm: 0.8788577126493157, iteration: 293024
loss: 0.9564525485038757,grad_norm: 0.8522413694199009, iteration: 293025
loss: 1.0216974020004272,grad_norm: 0.8331077531788276, iteration: 293026
loss: 0.985956609249115,grad_norm: 0.8388212272668549, iteration: 293027
loss: 0.9764893651008606,grad_norm: 0.8359993819266511, iteration: 293028
loss: 1.0005788803100586,grad_norm: 0.9095543479539723, iteration: 293029
loss: 1.0342626571655273,grad_norm: 0.9443439394206562, iteration: 293030
loss: 1.034874439239502,grad_norm: 0.8520286953256911, iteration: 293031
loss: 0.9537919163703918,grad_norm: 0.8011830492351405, iteration: 293032
loss: 0.9790129065513611,grad_norm: 0.7999777922061547, iteration: 293033
loss: 1.0070868730545044,grad_norm: 0.8757059179864838, iteration: 293034
loss: 1.0458101034164429,grad_norm: 0.8604188970175964, iteration: 293035
loss: 0.9741655588150024,grad_norm: 0.9999990136727622, iteration: 293036
loss: 0.9580265879631042,grad_norm: 0.8163824820986855, iteration: 293037
loss: 1.0035581588745117,grad_norm: 0.8264548698458825, iteration: 293038
loss: 1.0083590745925903,grad_norm: 0.9353671928577265, iteration: 293039
loss: 0.9923393726348877,grad_norm: 0.8364940457808816, iteration: 293040
loss: 0.9777578115463257,grad_norm: 0.9808269436491879, iteration: 293041
loss: 0.9655834436416626,grad_norm: 0.8524566763576057, iteration: 293042
loss: 1.0451586246490479,grad_norm: 0.9999991474992651, iteration: 293043
loss: 1.0254443883895874,grad_norm: 0.9895643465179615, iteration: 293044
loss: 0.9744977355003357,grad_norm: 0.895732595639925, iteration: 293045
loss: 1.000467300415039,grad_norm: 0.8678434061739018, iteration: 293046
loss: 1.0050723552703857,grad_norm: 0.9077886198652917, iteration: 293047
loss: 0.9999292492866516,grad_norm: 0.8311599725691969, iteration: 293048
loss: 0.9936949610710144,grad_norm: 0.729340269095732, iteration: 293049
loss: 1.0193862915039062,grad_norm: 0.869865663792421, iteration: 293050
loss: 1.0510337352752686,grad_norm: 0.9999995569453745, iteration: 293051
loss: 0.9608190059661865,grad_norm: 0.9212972648778903, iteration: 293052
loss: 1.0482947826385498,grad_norm: 0.8048942587293605, iteration: 293053
loss: 1.0282652378082275,grad_norm: 0.999999650902971, iteration: 293054
loss: 0.9835836291313171,grad_norm: 0.848525757789206, iteration: 293055
loss: 0.9979467391967773,grad_norm: 0.9363265028719169, iteration: 293056
loss: 0.9943526387214661,grad_norm: 0.8809354449680082, iteration: 293057
loss: 1.0072425603866577,grad_norm: 0.8188868671110051, iteration: 293058
loss: 0.9770864248275757,grad_norm: 0.9999990521389209, iteration: 293059
loss: 1.0125572681427002,grad_norm: 0.7961418589829757, iteration: 293060
loss: 1.0165693759918213,grad_norm: 0.8705844636203133, iteration: 293061
loss: 1.0194804668426514,grad_norm: 0.9999993124114492, iteration: 293062
loss: 0.9848906993865967,grad_norm: 0.7707881057667572, iteration: 293063
loss: 0.9987119436264038,grad_norm: 0.9876643486027853, iteration: 293064
loss: 1.1026010513305664,grad_norm: 0.9999994205427214, iteration: 293065
loss: 0.9855505228042603,grad_norm: 0.9999989470676731, iteration: 293066
loss: 0.991504967212677,grad_norm: 0.8619933535313176, iteration: 293067
loss: 0.9774407148361206,grad_norm: 0.9999991333941994, iteration: 293068
loss: 1.0211501121520996,grad_norm: 0.8722366631543192, iteration: 293069
loss: 1.0499714612960815,grad_norm: 0.898781629694341, iteration: 293070
loss: 0.960978090763092,grad_norm: 0.9183088590886626, iteration: 293071
loss: 1.0118138790130615,grad_norm: 0.9945363169935492, iteration: 293072
loss: 0.9924213886260986,grad_norm: 0.9291263803765475, iteration: 293073
loss: 1.0182708501815796,grad_norm: 0.7890941822368984, iteration: 293074
loss: 0.9903122186660767,grad_norm: 0.8911934735497462, iteration: 293075
loss: 0.9860013127326965,grad_norm: 0.748550815187064, iteration: 293076
loss: 0.9874606728553772,grad_norm: 0.8894170004575144, iteration: 293077
loss: 1.0258045196533203,grad_norm: 0.885744964352974, iteration: 293078
loss: 0.9694772362709045,grad_norm: 0.9999991536746574, iteration: 293079
loss: 0.9582367539405823,grad_norm: 0.7996524682857028, iteration: 293080
loss: 1.009667158126831,grad_norm: 0.947515529308671, iteration: 293081
loss: 1.0403541326522827,grad_norm: 0.8290878059743281, iteration: 293082
loss: 0.9708548188209534,grad_norm: 0.9699865823921743, iteration: 293083
loss: 1.006310224533081,grad_norm: 0.7124735441868032, iteration: 293084
loss: 0.9863796830177307,grad_norm: 0.9171981910398959, iteration: 293085
loss: 1.143308162689209,grad_norm: 0.9999993388587568, iteration: 293086
loss: 0.9859386086463928,grad_norm: 0.7641855264488713, iteration: 293087
loss: 0.9700998067855835,grad_norm: 0.9999991436261212, iteration: 293088
loss: 1.0137766599655151,grad_norm: 0.8627550216923184, iteration: 293089
loss: 1.01047682762146,grad_norm: 0.9600801514399414, iteration: 293090
loss: 1.0584951639175415,grad_norm: 0.9999999050399905, iteration: 293091
loss: 1.0762953758239746,grad_norm: 0.9046052029139798, iteration: 293092
loss: 1.0090185403823853,grad_norm: 0.79604819940353, iteration: 293093
loss: 0.9946126937866211,grad_norm: 0.970966208389196, iteration: 293094
loss: 1.0091673135757446,grad_norm: 0.9932059053901174, iteration: 293095
loss: 0.9931690096855164,grad_norm: 0.7351201396427236, iteration: 293096
loss: 1.0337613821029663,grad_norm: 0.9043274159443565, iteration: 293097
loss: 1.0274200439453125,grad_norm: 1.0000000318144593, iteration: 293098
loss: 0.9959758520126343,grad_norm: 0.9999990810961029, iteration: 293099
loss: 0.9950186610221863,grad_norm: 0.9915453992601724, iteration: 293100
loss: 0.994746744632721,grad_norm: 0.8881144580374221, iteration: 293101
loss: 1.0766851902008057,grad_norm: 0.9999990151445453, iteration: 293102
loss: 0.958766520023346,grad_norm: 0.944967720008, iteration: 293103
loss: 0.986341655254364,grad_norm: 0.9032152948928338, iteration: 293104
loss: 1.0180782079696655,grad_norm: 0.9999999140602602, iteration: 293105
loss: 1.015805959701538,grad_norm: 0.8712036894354546, iteration: 293106
loss: 0.9961676597595215,grad_norm: 0.7632467144509285, iteration: 293107
loss: 1.0595927238464355,grad_norm: 0.9999994903434494, iteration: 293108
loss: 1.0302038192749023,grad_norm: 0.9420007823790736, iteration: 293109
loss: 0.9963106513023376,grad_norm: 0.7192304089280561, iteration: 293110
loss: 0.9774744510650635,grad_norm: 0.7086310531849696, iteration: 293111
loss: 0.9872443675994873,grad_norm: 0.8188901887112978, iteration: 293112
loss: 0.9871287941932678,grad_norm: 0.8169872681948513, iteration: 293113
loss: 0.9997724294662476,grad_norm: 0.8386525984821561, iteration: 293114
loss: 0.9895303249359131,grad_norm: 0.9999992148164326, iteration: 293115
loss: 0.9790331721305847,grad_norm: 0.8978114681402097, iteration: 293116
loss: 1.0390437841415405,grad_norm: 0.8258065165084549, iteration: 293117
loss: 0.9719734191894531,grad_norm: 0.8436680395612718, iteration: 293118
loss: 1.0021021366119385,grad_norm: 0.9489690911092733, iteration: 293119
loss: 1.0347785949707031,grad_norm: 0.9371242895043768, iteration: 293120
loss: 1.0350385904312134,grad_norm: 0.907141363221438, iteration: 293121
loss: 0.9955354928970337,grad_norm: 0.9999990063573742, iteration: 293122
loss: 0.9765980839729309,grad_norm: 0.8509529235857668, iteration: 293123
loss: 1.0299593210220337,grad_norm: 0.9432532828922124, iteration: 293124
loss: 0.9845206141471863,grad_norm: 0.9841069412214485, iteration: 293125
loss: 0.9886215925216675,grad_norm: 0.9358201717351257, iteration: 293126
loss: 1.0282211303710938,grad_norm: 0.9999991913871262, iteration: 293127
loss: 0.9957247376441956,grad_norm: 0.7934839334420495, iteration: 293128
loss: 0.9937489032745361,grad_norm: 0.7476441449089727, iteration: 293129
loss: 0.9866076111793518,grad_norm: 0.7467139594504809, iteration: 293130
loss: 0.9831531047821045,grad_norm: 0.8894488145983647, iteration: 293131
loss: 0.977274477481842,grad_norm: 0.8320990564967792, iteration: 293132
loss: 1.0132777690887451,grad_norm: 0.9191213106190911, iteration: 293133
loss: 1.0253316164016724,grad_norm: 0.8179860646951846, iteration: 293134
loss: 0.9652422070503235,grad_norm: 0.7851388311672551, iteration: 293135
loss: 1.0091981887817383,grad_norm: 0.847463689492039, iteration: 293136
loss: 1.000716209411621,grad_norm: 0.9100791661758398, iteration: 293137
loss: 1.0044286251068115,grad_norm: 0.9729998799968113, iteration: 293138
loss: 1.0086771249771118,grad_norm: 0.8770820435283292, iteration: 293139
loss: 1.000969409942627,grad_norm: 0.8220423272077683, iteration: 293140
loss: 0.9570654034614563,grad_norm: 0.8516145929156527, iteration: 293141
loss: 0.9722279906272888,grad_norm: 0.7898829513657017, iteration: 293142
loss: 0.9866255521774292,grad_norm: 0.8290967390755996, iteration: 293143
loss: 1.0691152811050415,grad_norm: 0.9999993558684501, iteration: 293144
loss: 1.0015355348587036,grad_norm: 0.8469444883335313, iteration: 293145
loss: 0.9911292195320129,grad_norm: 0.7120845117369154, iteration: 293146
loss: 1.016287922859192,grad_norm: 0.7625771824970854, iteration: 293147
loss: 1.0099738836288452,grad_norm: 0.7392918298679897, iteration: 293148
loss: 0.9971346855163574,grad_norm: 0.8470282850029468, iteration: 293149
loss: 1.0218075513839722,grad_norm: 0.9999992147590387, iteration: 293150
loss: 0.989009439945221,grad_norm: 0.815773837128735, iteration: 293151
loss: 1.0199384689331055,grad_norm: 0.8699369772458998, iteration: 293152
loss: 0.9861881136894226,grad_norm: 0.813507380833429, iteration: 293153
loss: 0.9715113043785095,grad_norm: 0.8285302755317365, iteration: 293154
loss: 0.9821593761444092,grad_norm: 0.9473496580488722, iteration: 293155
loss: 1.0210028886795044,grad_norm: 0.7663657393957578, iteration: 293156
loss: 0.9861510992050171,grad_norm: 0.9131070542491834, iteration: 293157
loss: 0.9866541028022766,grad_norm: 0.9999989543383841, iteration: 293158
loss: 1.0190678834915161,grad_norm: 0.9999990694267942, iteration: 293159
loss: 0.9951708912849426,grad_norm: 0.8768057708476155, iteration: 293160
loss: 0.9995660185813904,grad_norm: 0.9205762995008392, iteration: 293161
loss: 1.00044584274292,grad_norm: 0.9026322616788216, iteration: 293162
loss: 1.0014559030532837,grad_norm: 0.9081348545901184, iteration: 293163
loss: 1.0015867948532104,grad_norm: 0.9999995629266761, iteration: 293164
loss: 1.0003477334976196,grad_norm: 0.9940856165002588, iteration: 293165
loss: 0.9721044301986694,grad_norm: 0.8547719293434111, iteration: 293166
loss: 0.9948771595954895,grad_norm: 0.769769975095692, iteration: 293167
loss: 1.0054529905319214,grad_norm: 0.8396111505530326, iteration: 293168
loss: 1.1694210767745972,grad_norm: 0.9999997449143306, iteration: 293169
loss: 1.0616682767868042,grad_norm: 0.9999991501817459, iteration: 293170
loss: 1.0226835012435913,grad_norm: 0.9999991996723158, iteration: 293171
loss: 1.0403879880905151,grad_norm: 0.7708240180582162, iteration: 293172
loss: 1.03558349609375,grad_norm: 0.886262125679927, iteration: 293173
loss: 0.9754762649536133,grad_norm: 0.7984811237192556, iteration: 293174
loss: 0.9952021241188049,grad_norm: 0.8340086045484625, iteration: 293175
loss: 1.2043054103851318,grad_norm: 0.9999993382838739, iteration: 293176
loss: 1.0037095546722412,grad_norm: 0.7944881123433151, iteration: 293177
loss: 1.021287202835083,grad_norm: 0.9999995129476464, iteration: 293178
loss: 0.9831960201263428,grad_norm: 0.8969132843675508, iteration: 293179
loss: 0.9738300442695618,grad_norm: 0.902978119855793, iteration: 293180
loss: 1.0329017639160156,grad_norm: 0.9477401018812831, iteration: 293181
loss: 0.9798454642295837,grad_norm: 0.8955291434848237, iteration: 293182
loss: 0.9958345293998718,grad_norm: 0.9999991969117221, iteration: 293183
loss: 1.0263898372650146,grad_norm: 0.8737271674352648, iteration: 293184
loss: 0.9781856536865234,grad_norm: 0.9286499116221125, iteration: 293185
loss: 0.994976818561554,grad_norm: 0.8681150404660527, iteration: 293186
loss: 1.019763469696045,grad_norm: 0.8544028813382433, iteration: 293187
loss: 0.9697785973548889,grad_norm: 0.7585438463765766, iteration: 293188
loss: 0.9821966886520386,grad_norm: 0.8094991313628707, iteration: 293189
loss: 0.9612721800804138,grad_norm: 0.8654156403428019, iteration: 293190
loss: 1.0345395803451538,grad_norm: 0.8482283864332083, iteration: 293191
loss: 1.0164612531661987,grad_norm: 0.7777929019884325, iteration: 293192
loss: 0.9926177263259888,grad_norm: 0.9999991725473618, iteration: 293193
loss: 0.9828299283981323,grad_norm: 0.999999094256123, iteration: 293194
loss: 0.9590901136398315,grad_norm: 0.9999990734136086, iteration: 293195
loss: 1.0369882583618164,grad_norm: 0.8889542889031395, iteration: 293196
loss: 1.0167704820632935,grad_norm: 0.9999990173401241, iteration: 293197
loss: 0.9779964089393616,grad_norm: 0.726278682098722, iteration: 293198
loss: 0.9984326362609863,grad_norm: 0.7630876504276677, iteration: 293199
loss: 1.057127594947815,grad_norm: 0.9999992550780118, iteration: 293200
loss: 1.0003048181533813,grad_norm: 0.7629569414275944, iteration: 293201
loss: 0.9882969856262207,grad_norm: 0.8665359773276732, iteration: 293202
loss: 1.0066330432891846,grad_norm: 0.7996259539968347, iteration: 293203
loss: 0.9972687363624573,grad_norm: 0.8035584442919639, iteration: 293204
loss: 0.9901423454284668,grad_norm: 0.8813831184992807, iteration: 293205
loss: 1.0198689699172974,grad_norm: 0.9488060521442218, iteration: 293206
loss: 0.9986599087715149,grad_norm: 0.7540772019233561, iteration: 293207
loss: 1.0033886432647705,grad_norm: 0.9661389789882024, iteration: 293208
loss: 0.9699630737304688,grad_norm: 0.9585151309348264, iteration: 293209
loss: 0.994213879108429,grad_norm: 0.9999997951304275, iteration: 293210
loss: 1.0438600778579712,grad_norm: 0.7481907634556743, iteration: 293211
loss: 1.009917140007019,grad_norm: 0.8558018497078643, iteration: 293212
loss: 1.0145795345306396,grad_norm: 0.9743594161815443, iteration: 293213
loss: 0.9597842693328857,grad_norm: 0.8297268493480204, iteration: 293214
loss: 0.9908784627914429,grad_norm: 0.8161088015576952, iteration: 293215
loss: 1.1271781921386719,grad_norm: 0.9999998965310282, iteration: 293216
loss: 0.9943678975105286,grad_norm: 0.8311835536792596, iteration: 293217
loss: 0.9653278589248657,grad_norm: 0.8569156989244939, iteration: 293218
loss: 0.9987117052078247,grad_norm: 0.8517134984507386, iteration: 293219
loss: 0.9844255447387695,grad_norm: 0.8135610911835299, iteration: 293220
loss: 0.9754393100738525,grad_norm: 0.8049186500247051, iteration: 293221
loss: 0.9847952723503113,grad_norm: 0.9999991961205622, iteration: 293222
loss: 1.0089391469955444,grad_norm: 0.796214829846135, iteration: 293223
loss: 0.9812093377113342,grad_norm: 0.8873407333718664, iteration: 293224
loss: 1.084920883178711,grad_norm: 0.9999993221186393, iteration: 293225
loss: 1.0176451206207275,grad_norm: 0.9294471059888345, iteration: 293226
loss: 0.9873752593994141,grad_norm: 0.9732317445911953, iteration: 293227
loss: 0.9866483807563782,grad_norm: 0.8552109268282061, iteration: 293228
loss: 1.0420992374420166,grad_norm: 1.0000000362965766, iteration: 293229
loss: 1.0180586576461792,grad_norm: 0.8358883170367416, iteration: 293230
loss: 0.9971681833267212,grad_norm: 0.8289732311979752, iteration: 293231
loss: 1.0316083431243896,grad_norm: 0.8748552313678473, iteration: 293232
loss: 0.9967262744903564,grad_norm: 0.9762423889436236, iteration: 293233
loss: 1.0190157890319824,grad_norm: 0.826174834272243, iteration: 293234
loss: 1.060163974761963,grad_norm: 0.9999993373910187, iteration: 293235
loss: 1.0234798192977905,grad_norm: 0.7042774177971789, iteration: 293236
loss: 0.9819498062133789,grad_norm: 0.9017029088909447, iteration: 293237
loss: 0.9840584397315979,grad_norm: 0.7497670676354431, iteration: 293238
loss: 1.0048197507858276,grad_norm: 0.8262127490372062, iteration: 293239
loss: 1.0414800643920898,grad_norm: 0.7788158217897242, iteration: 293240
loss: 1.0233006477355957,grad_norm: 0.7838464044927099, iteration: 293241
loss: 0.9651682376861572,grad_norm: 0.846215998475795, iteration: 293242
loss: 0.9883772134780884,grad_norm: 0.8515752038579147, iteration: 293243
loss: 1.0194001197814941,grad_norm: 0.9288006813193929, iteration: 293244
loss: 0.991794228553772,grad_norm: 0.8250136741581294, iteration: 293245
loss: 0.9881181120872498,grad_norm: 0.9108920246021535, iteration: 293246
loss: 1.0301069021224976,grad_norm: 0.7013765409481749, iteration: 293247
loss: 1.0113608837127686,grad_norm: 0.9918108213615855, iteration: 293248
loss: 0.9878503680229187,grad_norm: 0.9999995419620116, iteration: 293249
loss: 0.9911555647850037,grad_norm: 0.7782344243833184, iteration: 293250
loss: 0.986242949962616,grad_norm: 0.9599124553916047, iteration: 293251
loss: 0.970136821269989,grad_norm: 0.9350188245191581, iteration: 293252
loss: 0.9977446794509888,grad_norm: 0.8268087396034827, iteration: 293253
loss: 1.0105607509613037,grad_norm: 0.7435088514224129, iteration: 293254
loss: 1.1831424236297607,grad_norm: 0.9999996108503871, iteration: 293255
loss: 1.0206024646759033,grad_norm: 0.8494500783392334, iteration: 293256
loss: 0.9764644503593445,grad_norm: 0.8489593529138793, iteration: 293257
loss: 1.0038163661956787,grad_norm: 0.8170745672370269, iteration: 293258
loss: 1.0217878818511963,grad_norm: 0.7686852288052208, iteration: 293259
loss: 0.98163241147995,grad_norm: 0.8369822709207435, iteration: 293260
loss: 0.9945483803749084,grad_norm: 0.8423075354080634, iteration: 293261
loss: 1.0500108003616333,grad_norm: 0.999999146223537, iteration: 293262
loss: 0.9635955691337585,grad_norm: 0.8885324650990833, iteration: 293263
loss: 1.0083774328231812,grad_norm: 0.9596755521196833, iteration: 293264
loss: 1.0976611375808716,grad_norm: 0.9999991491739776, iteration: 293265
loss: 1.0189673900604248,grad_norm: 0.924146539686652, iteration: 293266
loss: 1.0387543439865112,grad_norm: 0.9999992042314553, iteration: 293267
loss: 0.9921332001686096,grad_norm: 0.8104928755294927, iteration: 293268
loss: 0.9607763886451721,grad_norm: 0.924947562937305, iteration: 293269
loss: 0.9874687194824219,grad_norm: 0.8697356443604661, iteration: 293270
loss: 0.9771562218666077,grad_norm: 0.7925276811616019, iteration: 293271
loss: 0.9979139566421509,grad_norm: 0.9807801428987609, iteration: 293272
loss: 1.00734281539917,grad_norm: 0.7656753696988389, iteration: 293273
loss: 1.0203273296356201,grad_norm: 0.999999969868228, iteration: 293274
loss: 1.0510188341140747,grad_norm: 0.9999999438485023, iteration: 293275
loss: 0.9890622496604919,grad_norm: 0.9999990685430042, iteration: 293276
loss: 1.0021220445632935,grad_norm: 0.9946671793767856, iteration: 293277
loss: 1.0322710275650024,grad_norm: 0.7239977607328532, iteration: 293278
loss: 1.0298755168914795,grad_norm: 0.8428714314787024, iteration: 293279
loss: 1.0536460876464844,grad_norm: 0.8993635349825554, iteration: 293280
loss: 0.9961240291595459,grad_norm: 0.8490582158236801, iteration: 293281
loss: 1.0373659133911133,grad_norm: 0.8870017452585929, iteration: 293282
loss: 1.0683988332748413,grad_norm: 0.9999993804790513, iteration: 293283
loss: 1.0052580833435059,grad_norm: 0.7382039515410125, iteration: 293284
loss: 1.0034805536270142,grad_norm: 0.8877986199926718, iteration: 293285
loss: 1.0075587034225464,grad_norm: 0.9999997722565024, iteration: 293286
loss: 0.9937042593955994,grad_norm: 0.8978867901879302, iteration: 293287
loss: 1.0298683643341064,grad_norm: 0.8264905375005638, iteration: 293288
loss: 0.9764039516448975,grad_norm: 0.8087327145277037, iteration: 293289
loss: 0.9805507659912109,grad_norm: 0.999999580757788, iteration: 293290
loss: 1.0165278911590576,grad_norm: 0.9028476895862196, iteration: 293291
loss: 1.002548098564148,grad_norm: 0.9999994456032851, iteration: 293292
loss: 0.9976577758789062,grad_norm: 0.8219949420760029, iteration: 293293
loss: 1.0051288604736328,grad_norm: 0.9999992364837634, iteration: 293294
loss: 0.9887731671333313,grad_norm: 0.8522040956909154, iteration: 293295
loss: 0.9723817706108093,grad_norm: 0.9100724142466505, iteration: 293296
loss: 0.9821436405181885,grad_norm: 0.6928196438508716, iteration: 293297
loss: 0.9584842920303345,grad_norm: 0.786680473440038, iteration: 293298
loss: 1.0094711780548096,grad_norm: 0.7644919342806785, iteration: 293299
loss: 0.9682998061180115,grad_norm: 0.8287635718183284, iteration: 293300
loss: 0.9844273328781128,grad_norm: 0.9743986372037295, iteration: 293301
loss: 0.9797766208648682,grad_norm: 0.9360802567989477, iteration: 293302
loss: 1.0006712675094604,grad_norm: 0.9213469858140291, iteration: 293303
loss: 0.9917441010475159,grad_norm: 0.9052855600103731, iteration: 293304
loss: 1.0585700273513794,grad_norm: 0.98012981661552, iteration: 293305
loss: 1.009573221206665,grad_norm: 0.9999992379470442, iteration: 293306
loss: 1.052323579788208,grad_norm: 0.8943810742314756, iteration: 293307
loss: 0.9777007699012756,grad_norm: 0.7658497066508435, iteration: 293308
loss: 0.9700424671173096,grad_norm: 0.7598992207684476, iteration: 293309
loss: 0.9770275354385376,grad_norm: 0.9166121748826255, iteration: 293310
loss: 1.0075756311416626,grad_norm: 0.9999993503597236, iteration: 293311
loss: 1.0058637857437134,grad_norm: 0.8263422617511781, iteration: 293312
loss: 0.9955257177352905,grad_norm: 0.7294420262173312, iteration: 293313
loss: 1.082773208618164,grad_norm: 0.8760616260385173, iteration: 293314
loss: 1.0186413526535034,grad_norm: 0.9999991146848111, iteration: 293315
loss: 1.1193079948425293,grad_norm: 0.9999994611148126, iteration: 293316
loss: 1.0243420600891113,grad_norm: 0.9223227172020589, iteration: 293317
loss: 0.9732909202575684,grad_norm: 0.7615271588379656, iteration: 293318
loss: 0.9978283643722534,grad_norm: 0.8491887141493115, iteration: 293319
loss: 1.001298427581787,grad_norm: 0.8268612235050249, iteration: 293320
loss: 1.008829951286316,grad_norm: 0.9999989705200876, iteration: 293321
loss: 1.0075069665908813,grad_norm: 0.8775819686074684, iteration: 293322
loss: 1.00641667842865,grad_norm: 0.9565688839945913, iteration: 293323
loss: 0.9665238261222839,grad_norm: 0.8368234520899064, iteration: 293324
loss: 1.0204088687896729,grad_norm: 0.6768472008046478, iteration: 293325
loss: 1.0163824558258057,grad_norm: 0.7949517741513484, iteration: 293326
loss: 0.9526770114898682,grad_norm: 0.8999977194798437, iteration: 293327
loss: 1.0150566101074219,grad_norm: 0.8051885722606235, iteration: 293328
loss: 1.0687613487243652,grad_norm: 0.9999998805631117, iteration: 293329
loss: 0.9925654530525208,grad_norm: 0.9999990083534824, iteration: 293330
loss: 1.0090245008468628,grad_norm: 0.9368357432668508, iteration: 293331
loss: 1.0043525695800781,grad_norm: 0.897325303802107, iteration: 293332
loss: 0.9873232841491699,grad_norm: 0.8481332625605881, iteration: 293333
loss: 0.9751482009887695,grad_norm: 0.8415243776470173, iteration: 293334
loss: 0.9994567632675171,grad_norm: 0.9153148636194512, iteration: 293335
loss: 0.9910721778869629,grad_norm: 0.9191791001807232, iteration: 293336
loss: 0.9847058653831482,grad_norm: 0.8032485168644712, iteration: 293337
loss: 0.9732335209846497,grad_norm: 0.9999990769554727, iteration: 293338
loss: 0.9940614104270935,grad_norm: 0.7782693104617524, iteration: 293339
loss: 1.0262901782989502,grad_norm: 0.9550162235351881, iteration: 293340
loss: 0.9727702736854553,grad_norm: 0.9999991222992777, iteration: 293341
loss: 1.01239812374115,grad_norm: 0.8117522353051313, iteration: 293342
loss: 1.174574851989746,grad_norm: 0.9999997221436828, iteration: 293343
loss: 1.0254406929016113,grad_norm: 0.7762003912403381, iteration: 293344
loss: 0.9769301414489746,grad_norm: 0.9079912908065699, iteration: 293345
loss: 1.0522823333740234,grad_norm: 0.907277185674128, iteration: 293346
loss: 1.031940221786499,grad_norm: 0.9999993399897587, iteration: 293347
loss: 0.9747593402862549,grad_norm: 0.9999996596190923, iteration: 293348
loss: 0.9845303297042847,grad_norm: 0.8941441942782005, iteration: 293349
loss: 0.9802166223526001,grad_norm: 0.8871298332837153, iteration: 293350
loss: 1.0228195190429688,grad_norm: 0.9999993090373017, iteration: 293351
loss: 1.0071083307266235,grad_norm: 0.949571606111469, iteration: 293352
loss: 0.9865243434906006,grad_norm: 0.8480584437049327, iteration: 293353
loss: 1.0149363279342651,grad_norm: 0.9691853634613057, iteration: 293354
loss: 0.9960505962371826,grad_norm: 0.9293595973020997, iteration: 293355
loss: 0.9693171977996826,grad_norm: 0.8599233800314455, iteration: 293356
loss: 1.034906029701233,grad_norm: 0.9999993962496302, iteration: 293357
loss: 0.9926992654800415,grad_norm: 0.9197832862660403, iteration: 293358
loss: 1.0090593099594116,grad_norm: 0.8268190360124493, iteration: 293359
loss: 1.0028215646743774,grad_norm: 0.9999991047867384, iteration: 293360
loss: 1.1604136228561401,grad_norm: 0.9999995823199356, iteration: 293361
loss: 1.011843204498291,grad_norm: 0.8120587195161226, iteration: 293362
loss: 0.9955062866210938,grad_norm: 0.9999992261018211, iteration: 293363
loss: 0.9962799549102783,grad_norm: 0.7960078260997585, iteration: 293364
loss: 1.0159214735031128,grad_norm: 0.9999993397486834, iteration: 293365
loss: 0.9688243269920349,grad_norm: 0.6761876992887, iteration: 293366
loss: 1.0794236660003662,grad_norm: 0.9999999829557044, iteration: 293367
loss: 0.9799804091453552,grad_norm: 0.9999990919816116, iteration: 293368
loss: 0.996474027633667,grad_norm: 0.8226105334566329, iteration: 293369
loss: 1.0041640996932983,grad_norm: 0.779947590623403, iteration: 293370
loss: 0.9816737174987793,grad_norm: 0.9999991765276013, iteration: 293371
loss: 1.0047998428344727,grad_norm: 0.9135145653135658, iteration: 293372
loss: 1.0210988521575928,grad_norm: 0.7925589659316709, iteration: 293373
loss: 1.0274678468704224,grad_norm: 0.9999998212343273, iteration: 293374
loss: 0.9558893442153931,grad_norm: 0.8187629644250584, iteration: 293375
loss: 1.0946329832077026,grad_norm: 0.999999666203787, iteration: 293376
loss: 0.9564822912216187,grad_norm: 0.7967582804413434, iteration: 293377
loss: 0.9358532428741455,grad_norm: 0.8394149352198164, iteration: 293378
loss: 0.9965227842330933,grad_norm: 0.7401197110639567, iteration: 293379
loss: 1.0098450183868408,grad_norm: 0.8491473675021098, iteration: 293380
loss: 1.0192623138427734,grad_norm: 0.8407115549164598, iteration: 293381
loss: 0.9554528594017029,grad_norm: 0.8290503241508294, iteration: 293382
loss: 1.0039228200912476,grad_norm: 0.8527603883026709, iteration: 293383
loss: 0.96970534324646,grad_norm: 0.9154114955681122, iteration: 293384
loss: 1.0067893266677856,grad_norm: 0.817722398896868, iteration: 293385
loss: 0.9991918802261353,grad_norm: 0.7778982033859263, iteration: 293386
loss: 0.9982722997665405,grad_norm: 0.8738142526855258, iteration: 293387
loss: 0.9994714856147766,grad_norm: 0.818036863959357, iteration: 293388
loss: 1.0232150554656982,grad_norm: 0.9999994327712632, iteration: 293389
loss: 1.002543330192566,grad_norm: 0.99999926789818, iteration: 293390
loss: 1.0097434520721436,grad_norm: 0.8072858967612981, iteration: 293391
loss: 0.9935036301612854,grad_norm: 0.7125568089199766, iteration: 293392
loss: 0.9896402955055237,grad_norm: 0.8200591840902376, iteration: 293393
loss: 1.0875630378723145,grad_norm: 0.9999997225336037, iteration: 293394
loss: 1.047297716140747,grad_norm: 0.9999992536779756, iteration: 293395
loss: 1.0887690782546997,grad_norm: 0.9999994289565133, iteration: 293396
loss: 1.0272438526153564,grad_norm: 0.8662671980628774, iteration: 293397
loss: 1.0182569026947021,grad_norm: 0.7473142335595806, iteration: 293398
loss: 0.99261075258255,grad_norm: 0.7577809412151916, iteration: 293399
loss: 1.0220563411712646,grad_norm: 0.9927303439429396, iteration: 293400
loss: 1.0257447957992554,grad_norm: 0.9999992753286086, iteration: 293401
loss: 0.980993926525116,grad_norm: 0.8426624258224013, iteration: 293402
loss: 1.0047125816345215,grad_norm: 0.9999995390763448, iteration: 293403
loss: 1.0203273296356201,grad_norm: 0.9459911839487715, iteration: 293404
loss: 0.9906104803085327,grad_norm: 0.8993864647760325, iteration: 293405
loss: 0.9994837641716003,grad_norm: 0.7697680097147108, iteration: 293406
loss: 0.9964318871498108,grad_norm: 0.8871221859224834, iteration: 293407
loss: 0.9796085953712463,grad_norm: 0.8781317838102841, iteration: 293408
loss: 1.0033648014068604,grad_norm: 0.8955264438188444, iteration: 293409
loss: 0.994555652141571,grad_norm: 0.8500006502393462, iteration: 293410
loss: 1.159195899963379,grad_norm: 0.9999998157824833, iteration: 293411
loss: 0.9683188796043396,grad_norm: 0.9999990857568065, iteration: 293412
loss: 0.9834915995597839,grad_norm: 0.8503838065777855, iteration: 293413
loss: 0.9852929711341858,grad_norm: 0.9792153751456867, iteration: 293414
loss: 0.9619064331054688,grad_norm: 0.8107572891804085, iteration: 293415
loss: 1.0099806785583496,grad_norm: 0.8347203691828718, iteration: 293416
loss: 1.0192005634307861,grad_norm: 0.6722233441580746, iteration: 293417
loss: 1.0239866971969604,grad_norm: 0.8856039523401639, iteration: 293418
loss: 0.9727814197540283,grad_norm: 0.9471112582082485, iteration: 293419
loss: 1.0576093196868896,grad_norm: 0.9999992228619164, iteration: 293420
loss: 0.9596505761146545,grad_norm: 0.8699724222176126, iteration: 293421
loss: 1.0878340005874634,grad_norm: 0.99999967482973, iteration: 293422
loss: 1.0169090032577515,grad_norm: 0.72697031123548, iteration: 293423
loss: 1.0244907140731812,grad_norm: 0.9999993351680047, iteration: 293424
loss: 0.9910451173782349,grad_norm: 0.7692681648138721, iteration: 293425
loss: 1.0105668306350708,grad_norm: 0.9231377101240442, iteration: 293426
loss: 0.9855492115020752,grad_norm: 0.9421494299895044, iteration: 293427
loss: 0.9888954162597656,grad_norm: 0.9541761586206137, iteration: 293428
loss: 1.0558966398239136,grad_norm: 0.9999998334217653, iteration: 293429
loss: 0.9568568468093872,grad_norm: 0.9117610034143979, iteration: 293430
loss: 0.9793931245803833,grad_norm: 0.9999991670913332, iteration: 293431
loss: 0.985261082649231,grad_norm: 0.9999991960506981, iteration: 293432
loss: 1.0126032829284668,grad_norm: 0.8280547684173707, iteration: 293433
loss: 0.9951909780502319,grad_norm: 0.8477191278699097, iteration: 293434
loss: 0.9993626475334167,grad_norm: 0.9999995210232361, iteration: 293435
loss: 0.9600850939750671,grad_norm: 0.8004638458726115, iteration: 293436
loss: 1.0426342487335205,grad_norm: 0.9098927134713837, iteration: 293437
loss: 1.0069977045059204,grad_norm: 0.8119713830592052, iteration: 293438
loss: 0.9689281582832336,grad_norm: 0.8412110537822598, iteration: 293439
loss: 0.9888724088668823,grad_norm: 0.7140685339781927, iteration: 293440
loss: 1.0160222053527832,grad_norm: 0.6784542999657864, iteration: 293441
loss: 0.9928902983665466,grad_norm: 0.9999992827999636, iteration: 293442
loss: 1.0022509098052979,grad_norm: 0.9999991800773177, iteration: 293443
loss: 1.0118911266326904,grad_norm: 0.8972507407112407, iteration: 293444
loss: 0.9793434739112854,grad_norm: 0.7933179605153604, iteration: 293445
loss: 0.9835054278373718,grad_norm: 0.8664512582368294, iteration: 293446
loss: 1.0274521112442017,grad_norm: 0.9999996400998151, iteration: 293447
loss: 1.003434181213379,grad_norm: 0.956276882884066, iteration: 293448
loss: 1.01697838306427,grad_norm: 0.7996320127540218, iteration: 293449
loss: 0.9806948900222778,grad_norm: 0.9676074618835933, iteration: 293450
loss: 1.0236698389053345,grad_norm: 0.9082631391673531, iteration: 293451
loss: 1.0334447622299194,grad_norm: 0.9041314053412961, iteration: 293452
loss: 0.9944139719009399,grad_norm: 0.9999994270100722, iteration: 293453
loss: 0.9666981101036072,grad_norm: 0.7886005935682217, iteration: 293454
loss: 1.005235195159912,grad_norm: 0.9368153019385944, iteration: 293455
loss: 1.0132697820663452,grad_norm: 0.8488856269772389, iteration: 293456
loss: 0.9975404739379883,grad_norm: 0.8677730370043507, iteration: 293457
loss: 1.019951581954956,grad_norm: 0.8012363839076576, iteration: 293458
loss: 1.00080144405365,grad_norm: 0.9999990120770559, iteration: 293459
loss: 1.016530990600586,grad_norm: 0.9999989717571999, iteration: 293460
loss: 1.0109736919403076,grad_norm: 0.8495793766378129, iteration: 293461
loss: 0.9857397079467773,grad_norm: 0.74681042638405, iteration: 293462
loss: 0.9839414358139038,grad_norm: 0.8529574008150589, iteration: 293463
loss: 1.0287708044052124,grad_norm: 0.7852849924229743, iteration: 293464
loss: 1.0325738191604614,grad_norm: 0.831620681312501, iteration: 293465
loss: 1.0438108444213867,grad_norm: 0.9999991370733881, iteration: 293466
loss: 0.9950223565101624,grad_norm: 0.9999991888022846, iteration: 293467
loss: 0.983485758304596,grad_norm: 0.8943538257143215, iteration: 293468
loss: 0.9903470873832703,grad_norm: 0.9999992451728889, iteration: 293469
loss: 1.0147818326950073,grad_norm: 0.8268111369795162, iteration: 293470
loss: 1.1665204763412476,grad_norm: 0.9999999279286179, iteration: 293471
loss: 0.9646263718605042,grad_norm: 0.8822955817107011, iteration: 293472
loss: 1.003602385520935,grad_norm: 0.9175678421600499, iteration: 293473
loss: 1.0188603401184082,grad_norm: 0.9999991259857198, iteration: 293474
loss: 1.0018320083618164,grad_norm: 0.8233329139769643, iteration: 293475
loss: 0.9830920696258545,grad_norm: 0.8577701734550619, iteration: 293476
loss: 1.0270155668258667,grad_norm: 0.8805758033262348, iteration: 293477
loss: 1.0044498443603516,grad_norm: 0.8688316744299553, iteration: 293478
loss: 0.9840788841247559,grad_norm: 0.7920846658244747, iteration: 293479
loss: 1.0084322690963745,grad_norm: 0.7429197928065197, iteration: 293480
loss: 0.9827398061752319,grad_norm: 0.8639203445854932, iteration: 293481
loss: 0.9950351119041443,grad_norm: 0.8031616853664351, iteration: 293482
loss: 1.0100890398025513,grad_norm: 0.9323387509227061, iteration: 293483
loss: 1.0119996070861816,grad_norm: 0.9999991464896327, iteration: 293484
loss: 0.9808390736579895,grad_norm: 0.9080677156883046, iteration: 293485
loss: 1.0116772651672363,grad_norm: 0.9999991244160981, iteration: 293486
loss: 0.9729185104370117,grad_norm: 0.8393233330968306, iteration: 293487
loss: 1.046649694442749,grad_norm: 0.7654488696727316, iteration: 293488
loss: 0.9994856715202332,grad_norm: 0.9179181746851722, iteration: 293489
loss: 1.0265192985534668,grad_norm: 0.9999991456785108, iteration: 293490
loss: 1.0073795318603516,grad_norm: 0.8600339096577965, iteration: 293491
loss: 0.9514655470848083,grad_norm: 0.7831420045009256, iteration: 293492
loss: 1.0406328439712524,grad_norm: 0.9348818955254066, iteration: 293493
loss: 0.9937506914138794,grad_norm: 0.7986984641863397, iteration: 293494
loss: 1.0189474821090698,grad_norm: 0.9342552092957134, iteration: 293495
loss: 0.9821274876594543,grad_norm: 0.9999990759674378, iteration: 293496
loss: 1.0019983053207397,grad_norm: 0.9290985800255336, iteration: 293497
loss: 0.9489755034446716,grad_norm: 0.9310408047796264, iteration: 293498
loss: 1.010735273361206,grad_norm: 0.9999992255407937, iteration: 293499
loss: 0.9812313318252563,grad_norm: 0.8895829333402062, iteration: 293500
loss: 1.025174856185913,grad_norm: 0.8051168562484373, iteration: 293501
loss: 1.0354350805282593,grad_norm: 0.8310140878983663, iteration: 293502
loss: 0.9610187411308289,grad_norm: 0.9660179082673445, iteration: 293503
loss: 0.9849593043327332,grad_norm: 0.9999991523139542, iteration: 293504
loss: 1.0310009717941284,grad_norm: 0.9999990576242432, iteration: 293505
loss: 1.058727502822876,grad_norm: 0.9999993495428892, iteration: 293506
loss: 0.952603816986084,grad_norm: 0.8620984251566463, iteration: 293507
loss: 1.1152100563049316,grad_norm: 0.9999993861949057, iteration: 293508
loss: 1.013967752456665,grad_norm: 0.9155852741688323, iteration: 293509
loss: 0.9520661234855652,grad_norm: 0.9985845690152783, iteration: 293510
loss: 0.9609410166740417,grad_norm: 0.9412377307176845, iteration: 293511
loss: 1.0560420751571655,grad_norm: 0.7618312817845301, iteration: 293512
loss: 1.0232667922973633,grad_norm: 0.9999998239050204, iteration: 293513
loss: 0.9970588684082031,grad_norm: 0.8628374141998234, iteration: 293514
loss: 0.9883697032928467,grad_norm: 0.7477686062404347, iteration: 293515
loss: 1.0235687494277954,grad_norm: 0.9999996795075772, iteration: 293516
loss: 0.9601689577102661,grad_norm: 0.8034254175426563, iteration: 293517
loss: 1.00916588306427,grad_norm: 0.9727194340538166, iteration: 293518
loss: 1.0084192752838135,grad_norm: 0.7992090161920672, iteration: 293519
loss: 0.930608868598938,grad_norm: 0.8084886059600975, iteration: 293520
loss: 1.0365386009216309,grad_norm: 0.8973696485434126, iteration: 293521
loss: 1.0066473484039307,grad_norm: 0.9999996926423927, iteration: 293522
loss: 1.0272048711776733,grad_norm: 0.9999990816505674, iteration: 293523
loss: 1.0137449502944946,grad_norm: 0.817235632925443, iteration: 293524
loss: 1.0219979286193848,grad_norm: 0.999999658860995, iteration: 293525
loss: 1.034533143043518,grad_norm: 0.8257491974471972, iteration: 293526
loss: 0.974099338054657,grad_norm: 0.9999990840525329, iteration: 293527
loss: 1.0450458526611328,grad_norm: 0.9999992367106544, iteration: 293528
loss: 1.1304019689559937,grad_norm: 0.9999998215021016, iteration: 293529
loss: 1.018939733505249,grad_norm: 0.8669026666361239, iteration: 293530
loss: 0.9842789769172668,grad_norm: 0.8053709046568589, iteration: 293531
loss: 1.0360828638076782,grad_norm: 0.8501305210915439, iteration: 293532
loss: 0.9892873764038086,grad_norm: 0.8938555717886941, iteration: 293533
loss: 0.9628278613090515,grad_norm: 0.8272146423764529, iteration: 293534
loss: 1.026734471321106,grad_norm: 0.8201926698589127, iteration: 293535
loss: 0.9657618999481201,grad_norm: 0.9999989890155051, iteration: 293536
loss: 0.9645890593528748,grad_norm: 0.8401753876900764, iteration: 293537
loss: 1.0071308612823486,grad_norm: 0.9999994662503844, iteration: 293538
loss: 1.007111668586731,grad_norm: 0.7697073019010126, iteration: 293539
loss: 0.9794358611106873,grad_norm: 0.833812598788476, iteration: 293540
loss: 0.981482982635498,grad_norm: 0.9999991100222142, iteration: 293541
loss: 0.9765640497207642,grad_norm: 0.7849665918757005, iteration: 293542
loss: 1.0298737287521362,grad_norm: 0.8916262872299086, iteration: 293543
loss: 0.9965682029724121,grad_norm: 0.9999992314179958, iteration: 293544
loss: 0.9589555263519287,grad_norm: 0.8067536946238203, iteration: 293545
loss: 1.0210585594177246,grad_norm: 0.6981767817936785, iteration: 293546
loss: 0.9594383835792542,grad_norm: 0.8079473218099226, iteration: 293547
loss: 0.9800503253936768,grad_norm: 0.6914730200862244, iteration: 293548
loss: 1.0455936193466187,grad_norm: 0.8300622874503143, iteration: 293549
loss: 1.016576886177063,grad_norm: 0.8967685837880625, iteration: 293550
loss: 0.98555588722229,grad_norm: 0.8876007896606519, iteration: 293551
loss: 1.0421338081359863,grad_norm: 0.792380688105335, iteration: 293552
loss: 1.0984222888946533,grad_norm: 0.9942342479277563, iteration: 293553
loss: 1.0174200534820557,grad_norm: 0.7561033027836824, iteration: 293554
loss: 1.0178455114364624,grad_norm: 0.8506160969899232, iteration: 293555
loss: 0.9980943202972412,grad_norm: 0.8458642380877982, iteration: 293556
loss: 1.017843246459961,grad_norm: 0.8758318296479817, iteration: 293557
loss: 0.9589091539382935,grad_norm: 0.8697361370027212, iteration: 293558
loss: 0.9646625518798828,grad_norm: 0.83813358591592, iteration: 293559
loss: 0.9719420671463013,grad_norm: 0.9468725460290386, iteration: 293560
loss: 0.9926312565803528,grad_norm: 0.9727332409135112, iteration: 293561
loss: 0.9964408278465271,grad_norm: 0.8056039142578799, iteration: 293562
loss: 0.9992469549179077,grad_norm: 0.8749227129611579, iteration: 293563
loss: 0.9839223027229309,grad_norm: 0.9999991953273718, iteration: 293564
loss: 0.9854829907417297,grad_norm: 0.8863979325356796, iteration: 293565
loss: 0.9846903681755066,grad_norm: 0.7571619141203092, iteration: 293566
loss: 1.010296106338501,grad_norm: 0.7731528043431707, iteration: 293567
loss: 0.9979844689369202,grad_norm: 0.727302317127018, iteration: 293568
loss: 1.0052646398544312,grad_norm: 0.942358360441431, iteration: 293569
loss: 0.9810754656791687,grad_norm: 0.8347448093170428, iteration: 293570
loss: 1.0247561931610107,grad_norm: 0.7492225859934013, iteration: 293571
loss: 1.0143433809280396,grad_norm: 0.7612892874135071, iteration: 293572
loss: 0.99701327085495,grad_norm: 0.931090388216278, iteration: 293573
loss: 1.1745299100875854,grad_norm: 0.9999997986204849, iteration: 293574
loss: 1.0345298051834106,grad_norm: 0.7572362004280014, iteration: 293575
loss: 0.9915633797645569,grad_norm: 0.9379959223010296, iteration: 293576
loss: 1.0008573532104492,grad_norm: 0.8504915348161742, iteration: 293577
loss: 0.9483108520507812,grad_norm: 0.7931086346374502, iteration: 293578
loss: 0.9933843612670898,grad_norm: 0.8645348122528745, iteration: 293579
loss: 1.0084866285324097,grad_norm: 0.7304771128715266, iteration: 293580
loss: 1.0444681644439697,grad_norm: 0.8123428129602384, iteration: 293581
loss: 0.9990827441215515,grad_norm: 0.7727726837345391, iteration: 293582
loss: 0.9664719700813293,grad_norm: 0.9724793164258295, iteration: 293583
loss: 1.039054036140442,grad_norm: 0.7991598573429183, iteration: 293584
loss: 0.9930256009101868,grad_norm: 0.7808057387026176, iteration: 293585
loss: 1.0123811960220337,grad_norm: 0.8764417667576684, iteration: 293586
loss: 0.9890896081924438,grad_norm: 0.8691298704514739, iteration: 293587
loss: 1.0699187517166138,grad_norm: 0.9053066306107507, iteration: 293588
loss: 1.000616431236267,grad_norm: 0.9999998109525048, iteration: 293589
loss: 0.9949953556060791,grad_norm: 0.8521847958001459, iteration: 293590
loss: 0.9900327324867249,grad_norm: 0.8197378632363829, iteration: 293591
loss: 1.0140085220336914,grad_norm: 0.8563291807415858, iteration: 293592
loss: 0.9999527335166931,grad_norm: 0.9999994384104244, iteration: 293593
loss: 1.0200525522232056,grad_norm: 0.836423790934576, iteration: 293594
loss: 0.9689352512359619,grad_norm: 0.9422394771076068, iteration: 293595
loss: 0.974568247795105,grad_norm: 0.7097005491454312, iteration: 293596
loss: 1.028971791267395,grad_norm: 0.9286246943947645, iteration: 293597
loss: 0.9750962257385254,grad_norm: 0.7186192450996179, iteration: 293598
loss: 0.9999845623970032,grad_norm: 0.9132719871209555, iteration: 293599
loss: 0.978889524936676,grad_norm: 0.795637054547532, iteration: 293600
loss: 1.0090080499649048,grad_norm: 0.9479337168463317, iteration: 293601
loss: 1.0208568572998047,grad_norm: 0.7859190084745795, iteration: 293602
loss: 0.9809297323226929,grad_norm: 0.8781067176020746, iteration: 293603
loss: 1.006069540977478,grad_norm: 0.8693564832372234, iteration: 293604
loss: 0.9789064526557922,grad_norm: 0.9999990998920026, iteration: 293605
loss: 0.9535263180732727,grad_norm: 0.9245828221724854, iteration: 293606
loss: 1.0058702230453491,grad_norm: 0.7874318092937584, iteration: 293607
loss: 0.9751203656196594,grad_norm: 0.8315950805654786, iteration: 293608
loss: 1.0155787467956543,grad_norm: 0.7531887260968189, iteration: 293609
loss: 0.9832260608673096,grad_norm: 0.9075741841332573, iteration: 293610
loss: 1.0057181119918823,grad_norm: 0.8559385987178524, iteration: 293611
loss: 0.9668963551521301,grad_norm: 0.8933282884262606, iteration: 293612
loss: 1.0498923063278198,grad_norm: 0.9999998465232095, iteration: 293613
loss: 1.0191650390625,grad_norm: 0.9999993679054451, iteration: 293614
loss: 1.0134638547897339,grad_norm: 0.9999989588430183, iteration: 293615
loss: 0.9999446272850037,grad_norm: 0.8771647934301363, iteration: 293616
loss: 1.037499189376831,grad_norm: 0.808108415419644, iteration: 293617
loss: 1.0243890285491943,grad_norm: 0.999999048490614, iteration: 293618
loss: 0.9978994727134705,grad_norm: 0.8079608999814767, iteration: 293619
loss: 1.0053056478500366,grad_norm: 0.7406689964510894, iteration: 293620
loss: 1.0242866277694702,grad_norm: 0.8760538412920129, iteration: 293621
loss: 0.9756741523742676,grad_norm: 0.8597843447036833, iteration: 293622
loss: 0.9962736368179321,grad_norm: 0.7886720733726892, iteration: 293623
loss: 1.0366488695144653,grad_norm: 0.9999992209333588, iteration: 293624
loss: 1.0116294622421265,grad_norm: 0.7502648237790642, iteration: 293625
loss: 0.9829263091087341,grad_norm: 0.9999991421719716, iteration: 293626
loss: 0.964958906173706,grad_norm: 0.8714814573523526, iteration: 293627
loss: 1.0055803060531616,grad_norm: 0.8213185315658299, iteration: 293628
loss: 0.9836107492446899,grad_norm: 0.8576205948476427, iteration: 293629
loss: 1.016333818435669,grad_norm: 0.8205891697324657, iteration: 293630
loss: 0.9967241287231445,grad_norm: 0.8119598861642543, iteration: 293631
loss: 1.0071290731430054,grad_norm: 0.753572000842836, iteration: 293632
loss: 0.9973649978637695,grad_norm: 0.6821395197613015, iteration: 293633
loss: 1.0095064640045166,grad_norm: 0.9999991198600757, iteration: 293634
loss: 0.9880611300468445,grad_norm: 0.7682055596802402, iteration: 293635
loss: 0.9885150790214539,grad_norm: 0.878838380215025, iteration: 293636
loss: 0.990776777267456,grad_norm: 0.7642128146851777, iteration: 293637
loss: 1.0115760564804077,grad_norm: 0.7424722295816504, iteration: 293638
loss: 0.9562061429023743,grad_norm: 0.7804396886095483, iteration: 293639
loss: 0.9802597165107727,grad_norm: 0.8678002336271158, iteration: 293640
loss: 0.9901553988456726,grad_norm: 0.920261670961911, iteration: 293641
loss: 0.9984753727912903,grad_norm: 0.8637557522485455, iteration: 293642
loss: 0.9964404702186584,grad_norm: 0.9232664728273655, iteration: 293643
loss: 0.9862267971038818,grad_norm: 0.7674046637958795, iteration: 293644
loss: 0.9850444793701172,grad_norm: 0.9999990569098999, iteration: 293645
loss: 0.9895859360694885,grad_norm: 0.8767921512190797, iteration: 293646
loss: 0.9810499548912048,grad_norm: 0.7481564260866471, iteration: 293647
loss: 1.0026015043258667,grad_norm: 0.8485823298988241, iteration: 293648
loss: 1.0090786218643188,grad_norm: 0.8282271341023778, iteration: 293649
loss: 1.1093461513519287,grad_norm: 0.9999993036101635, iteration: 293650
loss: 0.992902934551239,grad_norm: 0.8105915616446449, iteration: 293651
loss: 1.0208123922348022,grad_norm: 0.732446860768583, iteration: 293652
loss: 0.9976950883865356,grad_norm: 0.9268067335659146, iteration: 293653
loss: 1.0137022733688354,grad_norm: 0.9999992966295126, iteration: 293654
loss: 1.0182826519012451,grad_norm: 0.888584520927994, iteration: 293655
loss: 1.0105794668197632,grad_norm: 0.8869488276749256, iteration: 293656
loss: 1.0061150789260864,grad_norm: 0.9150054881407803, iteration: 293657
loss: 0.9735689163208008,grad_norm: 0.9999992070515636, iteration: 293658
loss: 1.0039637088775635,grad_norm: 0.9321592821902478, iteration: 293659
loss: 1.0062817335128784,grad_norm: 0.7743177788439071, iteration: 293660
loss: 0.9982392191886902,grad_norm: 0.9999994690696971, iteration: 293661
loss: 1.0049751996994019,grad_norm: 0.7289759108871869, iteration: 293662
loss: 1.018783450126648,grad_norm: 0.8049661796057869, iteration: 293663
loss: 0.9783160090446472,grad_norm: 0.8920278795564206, iteration: 293664
loss: 1.020559549331665,grad_norm: 0.7806764691406012, iteration: 293665
loss: 1.016257882118225,grad_norm: 0.8218745617321528, iteration: 293666
loss: 0.962784469127655,grad_norm: 0.8662475660180546, iteration: 293667
loss: 0.9766823053359985,grad_norm: 0.9229347569164154, iteration: 293668
loss: 1.0184195041656494,grad_norm: 0.8684321951670083, iteration: 293669
loss: 1.0270718336105347,grad_norm: 0.8913482711633375, iteration: 293670
loss: 1.046857476234436,grad_norm: 0.9999998448069326, iteration: 293671
loss: 0.9916700720787048,grad_norm: 0.7941373609126339, iteration: 293672
loss: 0.9748705625534058,grad_norm: 0.8415973594395961, iteration: 293673
loss: 0.9762933254241943,grad_norm: 0.8750136962871837, iteration: 293674
loss: 1.005967378616333,grad_norm: 0.8074031009562637, iteration: 293675
loss: 0.9933866262435913,grad_norm: 0.897432935690404, iteration: 293676
loss: 1.0111980438232422,grad_norm: 0.8230159774739643, iteration: 293677
loss: 0.9844523668289185,grad_norm: 0.816092644498379, iteration: 293678
loss: 0.98460853099823,grad_norm: 0.9999990895128679, iteration: 293679
loss: 0.9487186074256897,grad_norm: 0.9176725837365206, iteration: 293680
loss: 0.9823258519172668,grad_norm: 0.842733109727022, iteration: 293681
loss: 0.988349974155426,grad_norm: 0.9638522025888729, iteration: 293682
loss: 0.993931770324707,grad_norm: 0.9062157118243631, iteration: 293683
loss: 0.9988057613372803,grad_norm: 0.879896786059913, iteration: 293684
loss: 1.0355514287948608,grad_norm: 0.807138486957719, iteration: 293685
loss: 0.9909273982048035,grad_norm: 0.9999991915264258, iteration: 293686
loss: 1.0289294719696045,grad_norm: 0.8471575352689651, iteration: 293687
loss: 1.0344284772872925,grad_norm: 0.7051517655348097, iteration: 293688
loss: 1.001541256904602,grad_norm: 0.8807191128876726, iteration: 293689
loss: 1.0007036924362183,grad_norm: 0.8342491077947871, iteration: 293690
loss: 0.9916125535964966,grad_norm: 0.8185720708015163, iteration: 293691
loss: 0.9747148752212524,grad_norm: 0.8474622398906063, iteration: 293692
loss: 1.0265449285507202,grad_norm: 0.7896519046380884, iteration: 293693
loss: 1.0030673742294312,grad_norm: 0.9307486523298195, iteration: 293694
loss: 1.0027472972869873,grad_norm: 0.99332799384632, iteration: 293695
loss: 0.9704650044441223,grad_norm: 0.8341969914454457, iteration: 293696
loss: 0.9905206561088562,grad_norm: 0.9999998536085191, iteration: 293697
loss: 1.0150052309036255,grad_norm: 0.8408198120150047, iteration: 293698
loss: 0.9704493284225464,grad_norm: 0.9999991142684701, iteration: 293699
loss: 0.991919994354248,grad_norm: 0.9999990935817278, iteration: 293700
loss: 0.9968201518058777,grad_norm: 0.9999991495084951, iteration: 293701
loss: 0.9863729476928711,grad_norm: 0.773535541859147, iteration: 293702
loss: 0.9941109418869019,grad_norm: 0.7858077939403499, iteration: 293703
loss: 1.0730526447296143,grad_norm: 0.8414096140098385, iteration: 293704
loss: 0.9962469935417175,grad_norm: 0.8714951842830762, iteration: 293705
loss: 1.0145140886306763,grad_norm: 0.9004020824970291, iteration: 293706
loss: 1.1193956136703491,grad_norm: 0.9999995305507579, iteration: 293707
loss: 0.9841184616088867,grad_norm: 0.7869376949997158, iteration: 293708
loss: 0.9968322515487671,grad_norm: 0.8785152209080401, iteration: 293709
loss: 0.9916860461235046,grad_norm: 0.9374072909895821, iteration: 293710
loss: 1.0003199577331543,grad_norm: 0.917009030183028, iteration: 293711
loss: 0.9662070274353027,grad_norm: 0.8122860774822693, iteration: 293712
loss: 0.9735233187675476,grad_norm: 0.7693385483689957, iteration: 293713
loss: 1.0008474588394165,grad_norm: 0.8607621800071981, iteration: 293714
loss: 1.009936809539795,grad_norm: 0.842259522832253, iteration: 293715
loss: 1.0193854570388794,grad_norm: 0.8493700229303165, iteration: 293716
loss: 0.9959645867347717,grad_norm: 0.8032537458442521, iteration: 293717
loss: 1.026202917098999,grad_norm: 0.967650453329088, iteration: 293718
loss: 0.9905346632003784,grad_norm: 0.8665301282876285, iteration: 293719
loss: 1.0120689868927002,grad_norm: 0.8278246130826127, iteration: 293720
loss: 1.0323824882507324,grad_norm: 0.762205437625382, iteration: 293721
loss: 1.0208866596221924,grad_norm: 0.9999997162359119, iteration: 293722
loss: 0.9677894115447998,grad_norm: 0.7993765558418531, iteration: 293723
loss: 1.0645405054092407,grad_norm: 0.9999994873747753, iteration: 293724
loss: 0.9780624508857727,grad_norm: 0.7279105725246707, iteration: 293725
loss: 0.985030472278595,grad_norm: 0.9889185892335997, iteration: 293726
loss: 0.9990087747573853,grad_norm: 0.8476932434745004, iteration: 293727
loss: 0.9870608448982239,grad_norm: 0.9077712714622233, iteration: 293728
loss: 1.0412598848342896,grad_norm: 0.7661770979889786, iteration: 293729
loss: 1.013149380683899,grad_norm: 0.8834575578099187, iteration: 293730
loss: 0.9874353408813477,grad_norm: 0.9530399533994067, iteration: 293731
loss: 1.004787802696228,grad_norm: 0.7345666014089698, iteration: 293732
loss: 1.0077533721923828,grad_norm: 0.75813301248146, iteration: 293733
loss: 0.9572049975395203,grad_norm: 0.8445950557517273, iteration: 293734
loss: 1.0186479091644287,grad_norm: 0.9607466287148766, iteration: 293735
loss: 0.9924983978271484,grad_norm: 0.7883454240368984, iteration: 293736
loss: 1.0366344451904297,grad_norm: 0.8484685780406827, iteration: 293737
loss: 0.9882053732872009,grad_norm: 0.7824304673742108, iteration: 293738
loss: 0.9867545962333679,grad_norm: 0.7421013009757987, iteration: 293739
loss: 0.9800164103507996,grad_norm: 0.8342741511039784, iteration: 293740
loss: 0.9731407761573792,grad_norm: 0.9999989943089675, iteration: 293741
loss: 0.9624446034431458,grad_norm: 0.773449493606179, iteration: 293742
loss: 1.0332237482070923,grad_norm: 0.7859545654794687, iteration: 293743
loss: 0.9873241186141968,grad_norm: 0.9019654767255005, iteration: 293744
loss: 0.9999196529388428,grad_norm: 0.8650115612999506, iteration: 293745
loss: 1.0009510517120361,grad_norm: 0.7858870273099672, iteration: 293746
loss: 0.9929782748222351,grad_norm: 0.9182284068086588, iteration: 293747
loss: 1.0116862058639526,grad_norm: 0.9808434074590767, iteration: 293748
loss: 1.0241830348968506,grad_norm: 0.8532700603126203, iteration: 293749
loss: 0.996326208114624,grad_norm: 0.9774272275168783, iteration: 293750
loss: 0.9831849932670593,grad_norm: 0.7512581578273083, iteration: 293751
loss: 0.9894762635231018,grad_norm: 0.7266206611609284, iteration: 293752
loss: 0.9255225658416748,grad_norm: 0.9999988569344268, iteration: 293753
loss: 0.9963207244873047,grad_norm: 0.8046734870393664, iteration: 293754
loss: 1.0516844987869263,grad_norm: 0.9999995429949894, iteration: 293755
loss: 1.0393816232681274,grad_norm: 0.7852035009978533, iteration: 293756
loss: 1.0017874240875244,grad_norm: 0.7757902190589802, iteration: 293757
loss: 1.030139446258545,grad_norm: 0.8811608875135726, iteration: 293758
loss: 0.9907814860343933,grad_norm: 0.9682585576232625, iteration: 293759
loss: 1.0466974973678589,grad_norm: 0.9999993068316553, iteration: 293760
loss: 0.9500688314437866,grad_norm: 0.9913909031653497, iteration: 293761
loss: 0.9556311964988708,grad_norm: 0.7823078076029587, iteration: 293762
loss: 0.9585227370262146,grad_norm: 0.9016767854619364, iteration: 293763
loss: 1.0072300434112549,grad_norm: 0.9999989928298262, iteration: 293764
loss: 1.0286405086517334,grad_norm: 0.7879151023970227, iteration: 293765
loss: 0.9857758283615112,grad_norm: 0.782657823509343, iteration: 293766
loss: 0.9864974617958069,grad_norm: 0.7897479494798318, iteration: 293767
loss: 1.0306388139724731,grad_norm: 0.9252399056353532, iteration: 293768
loss: 1.006199836730957,grad_norm: 0.8620398210027903, iteration: 293769
loss: 0.9881805181503296,grad_norm: 0.8178190060376687, iteration: 293770
loss: 1.018652319908142,grad_norm: 0.8818640480949234, iteration: 293771
loss: 0.9800397157669067,grad_norm: 0.8205580540639752, iteration: 293772
loss: 1.0084991455078125,grad_norm: 0.6674538733264379, iteration: 293773
loss: 0.9913578629493713,grad_norm: 0.907389264570407, iteration: 293774
loss: 0.9898902177810669,grad_norm: 0.9133938908576463, iteration: 293775
loss: 1.00337553024292,grad_norm: 0.815631358386773, iteration: 293776
loss: 1.0040150880813599,grad_norm: 0.8339427717161743, iteration: 293777
loss: 0.9863820672035217,grad_norm: 0.9234326298609501, iteration: 293778
loss: 1.0100417137145996,grad_norm: 0.7912444121798272, iteration: 293779
loss: 0.9789323210716248,grad_norm: 0.7968556930229281, iteration: 293780
loss: 1.154738426208496,grad_norm: 0.9999999961062815, iteration: 293781
loss: 0.9916494488716125,grad_norm: 0.7583734454367699, iteration: 293782
loss: 1.0031858682632446,grad_norm: 0.9568457592664574, iteration: 293783
loss: 0.9940745830535889,grad_norm: 0.8405265428084356, iteration: 293784
loss: 1.0225756168365479,grad_norm: 0.8273806947159704, iteration: 293785
loss: 0.9963361024856567,grad_norm: 0.7761535955084985, iteration: 293786
loss: 0.9654378890991211,grad_norm: 0.8109155821011257, iteration: 293787
loss: 1.0158109664916992,grad_norm: 0.9999991560457449, iteration: 293788
loss: 0.973838746547699,grad_norm: 0.7960581778328295, iteration: 293789
loss: 1.0542879104614258,grad_norm: 0.9999998160922912, iteration: 293790
loss: 1.0132800340652466,grad_norm: 0.6898788411957615, iteration: 293791
loss: 1.0428504943847656,grad_norm: 0.829106417355861, iteration: 293792
loss: 0.955009400844574,grad_norm: 0.8064321560531924, iteration: 293793
loss: 0.9975227117538452,grad_norm: 0.8538805681452796, iteration: 293794
loss: 0.9843356609344482,grad_norm: 0.8508636737518585, iteration: 293795
loss: 0.9783035516738892,grad_norm: 0.777379288835927, iteration: 293796
loss: 1.0257834196090698,grad_norm: 0.9999991205821196, iteration: 293797
loss: 1.0132170915603638,grad_norm: 0.8156543177269935, iteration: 293798
loss: 1.026918649673462,grad_norm: 0.9464616603514588, iteration: 293799
loss: 0.9567855596542358,grad_norm: 0.9166043865750925, iteration: 293800
loss: 1.0071582794189453,grad_norm: 0.762667393520013, iteration: 293801
loss: 1.0057655572891235,grad_norm: 0.9999991410824199, iteration: 293802
loss: 0.985927402973175,grad_norm: 0.7176322692675081, iteration: 293803
loss: 1.0135235786437988,grad_norm: 0.773885132127385, iteration: 293804
loss: 0.9658774137496948,grad_norm: 0.8515826961204954, iteration: 293805
loss: 0.9751538038253784,grad_norm: 0.8750251411226512, iteration: 293806
loss: 0.9684301018714905,grad_norm: 0.9184115318893029, iteration: 293807
loss: 1.0030303001403809,grad_norm: 0.9916355801973187, iteration: 293808
loss: 1.0053807497024536,grad_norm: 0.8995212560340076, iteration: 293809
loss: 1.0150566101074219,grad_norm: 0.8708409929230888, iteration: 293810
loss: 1.0183442831039429,grad_norm: 0.862540660990841, iteration: 293811
loss: 0.9856168031692505,grad_norm: 0.8155591091409434, iteration: 293812
loss: 1.0263621807098389,grad_norm: 0.9306223290143001, iteration: 293813
loss: 0.9993147850036621,grad_norm: 0.8323253451943935, iteration: 293814
loss: 0.9861441254615784,grad_norm: 0.9893909515596726, iteration: 293815
loss: 1.038457989692688,grad_norm: 0.9999997315473572, iteration: 293816
loss: 0.9767614006996155,grad_norm: 0.9598263557604925, iteration: 293817
loss: 1.0212682485580444,grad_norm: 0.7939636663955105, iteration: 293818
loss: 0.9847680330276489,grad_norm: 0.9609771832194327, iteration: 293819
loss: 0.9384052753448486,grad_norm: 0.9737873865559592, iteration: 293820
loss: 1.0075212717056274,grad_norm: 0.7425529152733169, iteration: 293821
loss: 1.0095252990722656,grad_norm: 0.8794725675594708, iteration: 293822
loss: 1.0068458318710327,grad_norm: 0.9952653576223821, iteration: 293823
loss: 0.9741349816322327,grad_norm: 0.9999998755861361, iteration: 293824
loss: 1.006838321685791,grad_norm: 0.9005697034772715, iteration: 293825
loss: 1.0399686098098755,grad_norm: 0.999999101721901, iteration: 293826
loss: 0.9823399186134338,grad_norm: 0.699427125710279, iteration: 293827
loss: 1.0027284622192383,grad_norm: 0.9999996980837706, iteration: 293828
loss: 0.9869659543037415,grad_norm: 0.8484976258430315, iteration: 293829
loss: 1.024993658065796,grad_norm: 0.9999992760875149, iteration: 293830
loss: 0.9898449182510376,grad_norm: 0.9691978578890361, iteration: 293831
loss: 1.0140039920806885,grad_norm: 0.7887028970986816, iteration: 293832
loss: 0.9995867013931274,grad_norm: 0.8202740948388713, iteration: 293833
loss: 0.9919195771217346,grad_norm: 0.7770297306189207, iteration: 293834
loss: 0.9901585578918457,grad_norm: 0.859586000716894, iteration: 293835
loss: 1.0071927309036255,grad_norm: 0.8082090912414797, iteration: 293836
loss: 1.0827702283859253,grad_norm: 0.8486307071103382, iteration: 293837
loss: 0.984161376953125,grad_norm: 0.8015210778747395, iteration: 293838
loss: 0.9928230047225952,grad_norm: 0.6898831203921459, iteration: 293839
loss: 0.9438294172286987,grad_norm: 0.9999989296230561, iteration: 293840
loss: 1.011890172958374,grad_norm: 0.7599010976979806, iteration: 293841
loss: 0.9876039624214172,grad_norm: 0.8454173662388171, iteration: 293842
loss: 1.0315625667572021,grad_norm: 0.9999990285438787, iteration: 293843
loss: 0.9708501100540161,grad_norm: 0.8169712006700046, iteration: 293844
loss: 0.9803690910339355,grad_norm: 0.8492979180508329, iteration: 293845
loss: 1.0085399150848389,grad_norm: 0.8033782920055884, iteration: 293846
loss: 0.9889283776283264,grad_norm: 0.932802792925431, iteration: 293847
loss: 1.115512728691101,grad_norm: 0.9999997566948877, iteration: 293848
loss: 1.0182857513427734,grad_norm: 0.999999168684349, iteration: 293849
loss: 1.0375556945800781,grad_norm: 0.8447087714413732, iteration: 293850
loss: 1.0041917562484741,grad_norm: 0.7444914722158988, iteration: 293851
loss: 0.9657416343688965,grad_norm: 0.9999991808322647, iteration: 293852
loss: 0.9646496772766113,grad_norm: 0.9152629537724905, iteration: 293853
loss: 0.9682320952415466,grad_norm: 0.8417928055497057, iteration: 293854
loss: 0.9872578382492065,grad_norm: 0.8848144435068941, iteration: 293855
loss: 0.9657016396522522,grad_norm: 0.8878667133445968, iteration: 293856
loss: 1.0052708387374878,grad_norm: 0.7395349679614931, iteration: 293857
loss: 1.0164128541946411,grad_norm: 0.8498164689646968, iteration: 293858
loss: 1.072846531867981,grad_norm: 0.9086715324198804, iteration: 293859
loss: 0.9448761343955994,grad_norm: 0.8986286144290893, iteration: 293860
loss: 1.0475590229034424,grad_norm: 0.9999992575953877, iteration: 293861
loss: 0.9786700010299683,grad_norm: 0.7400103057760145, iteration: 293862
loss: 1.0316169261932373,grad_norm: 0.8084068349499987, iteration: 293863
loss: 0.9768849611282349,grad_norm: 0.8220490197696683, iteration: 293864
loss: 1.005553126335144,grad_norm: 0.9570748603753569, iteration: 293865
loss: 0.9941288828849792,grad_norm: 0.8417940820024025, iteration: 293866
loss: 1.0077474117279053,grad_norm: 0.6967889093597256, iteration: 293867
loss: 1.0258069038391113,grad_norm: 0.7687187994605083, iteration: 293868
loss: 0.9958369731903076,grad_norm: 0.893182633026485, iteration: 293869
loss: 0.9738925099372864,grad_norm: 0.8290035192597217, iteration: 293870
loss: 0.9986788630485535,grad_norm: 0.7923417485702068, iteration: 293871
loss: 1.0148438215255737,grad_norm: 0.9999992615157093, iteration: 293872
loss: 0.963908314704895,grad_norm: 0.7901321284503802, iteration: 293873
loss: 0.9622275829315186,grad_norm: 0.8974362152165198, iteration: 293874
loss: 0.9754347801208496,grad_norm: 0.8348199946311473, iteration: 293875
loss: 1.00093674659729,grad_norm: 0.846680634846858, iteration: 293876
loss: 1.0179097652435303,grad_norm: 0.8450619649881901, iteration: 293877
loss: 1.0124319791793823,grad_norm: 0.9999995175089537, iteration: 293878
loss: 1.0162590742111206,grad_norm: 0.9212968591994135, iteration: 293879
loss: 0.9823315143585205,grad_norm: 0.751539767514622, iteration: 293880
loss: 0.9628386497497559,grad_norm: 0.9999990387366775, iteration: 293881
loss: 0.9869627952575684,grad_norm: 0.9999991226096397, iteration: 293882
loss: 1.0876398086547852,grad_norm: 0.9783877991386432, iteration: 293883
loss: 0.9577592611312866,grad_norm: 0.7706755380871823, iteration: 293884
loss: 1.0002906322479248,grad_norm: 0.8789127291137033, iteration: 293885
loss: 1.0063867568969727,grad_norm: 0.8399252605201027, iteration: 293886
loss: 0.970034658908844,grad_norm: 0.7708529996280064, iteration: 293887
loss: 1.0163815021514893,grad_norm: 0.8501052974531298, iteration: 293888
loss: 0.9713392853736877,grad_norm: 0.8316776295175087, iteration: 293889
loss: 1.001895785331726,grad_norm: 0.8419999534710053, iteration: 293890
loss: 0.9711679816246033,grad_norm: 0.7999976139035896, iteration: 293891
loss: 1.000971794128418,grad_norm: 0.8608102188149758, iteration: 293892
loss: 0.997958242893219,grad_norm: 0.9999991074252407, iteration: 293893
loss: 0.9831230640411377,grad_norm: 0.831123862514083, iteration: 293894
loss: 1.0252671241760254,grad_norm: 0.99999922198729, iteration: 293895
loss: 0.9608742594718933,grad_norm: 0.8243777456141901, iteration: 293896
loss: 1.0065789222717285,grad_norm: 0.9979418314635047, iteration: 293897
loss: 0.9817856550216675,grad_norm: 0.990433294978545, iteration: 293898
loss: 0.9936797618865967,grad_norm: 0.7661317944703634, iteration: 293899
loss: 0.9706180691719055,grad_norm: 0.8208293733206439, iteration: 293900
loss: 0.9996775984764099,grad_norm: 0.9790787270975044, iteration: 293901
loss: 0.9648987054824829,grad_norm: 1.0000000080138773, iteration: 293902
loss: 0.9737576842308044,grad_norm: 0.9750157080904851, iteration: 293903
loss: 0.9829910397529602,grad_norm: 0.8860413821752722, iteration: 293904
loss: 1.013564109802246,grad_norm: 0.8313819452800466, iteration: 293905
loss: 0.9646395444869995,grad_norm: 0.8137541755756876, iteration: 293906
loss: 0.9895186424255371,grad_norm: 0.9347725653815867, iteration: 293907
loss: 1.0140411853790283,grad_norm: 0.7421123530198729, iteration: 293908
loss: 1.0378904342651367,grad_norm: 0.8953244370161656, iteration: 293909
loss: 0.9954231381416321,grad_norm: 0.9999990010645754, iteration: 293910
loss: 0.9868704676628113,grad_norm: 0.771010298962729, iteration: 293911
loss: 1.0018420219421387,grad_norm: 0.9999990303434734, iteration: 293912
loss: 1.0269640684127808,grad_norm: 0.9116755119024462, iteration: 293913
loss: 0.9948310256004333,grad_norm: 0.7772360793709747, iteration: 293914
loss: 0.9765673279762268,grad_norm: 0.8192095028166364, iteration: 293915
loss: 0.9918866753578186,grad_norm: 0.9163398757650827, iteration: 293916
loss: 0.9456720352172852,grad_norm: 0.9498966831957643, iteration: 293917
loss: 0.986407458782196,grad_norm: 0.8480439606731922, iteration: 293918
loss: 1.0493305921554565,grad_norm: 0.8093180957245735, iteration: 293919
loss: 0.9923811554908752,grad_norm: 0.6985991863535611, iteration: 293920
loss: 0.948312520980835,grad_norm: 0.8641799435370394, iteration: 293921
loss: 0.9779000282287598,grad_norm: 0.7758112982686467, iteration: 293922
loss: 0.974654495716095,grad_norm: 0.940217090740575, iteration: 293923
loss: 1.0188496112823486,grad_norm: 0.9297810606243256, iteration: 293924
loss: 1.0209182500839233,grad_norm: 0.9999988061289341, iteration: 293925
loss: 0.9907889366149902,grad_norm: 0.7747165537055556, iteration: 293926
loss: 0.9963485598564148,grad_norm: 0.824498706447876, iteration: 293927
loss: 1.0112665891647339,grad_norm: 0.8939725193643886, iteration: 293928
loss: 1.0125212669372559,grad_norm: 0.775238927266892, iteration: 293929
loss: 0.9805067777633667,grad_norm: 0.9533456712581466, iteration: 293930
loss: 0.9959307909011841,grad_norm: 0.8223096525362719, iteration: 293931
loss: 0.983431875705719,grad_norm: 0.8218117417377451, iteration: 293932
loss: 1.0273756980895996,grad_norm: 0.9999990111883781, iteration: 293933
loss: 1.1218050718307495,grad_norm: 0.9999990213730624, iteration: 293934
loss: 1.0234588384628296,grad_norm: 0.9439710376396159, iteration: 293935
loss: 0.9755013585090637,grad_norm: 0.7242762538153371, iteration: 293936
loss: 1.0228279829025269,grad_norm: 0.9863693694057436, iteration: 293937
loss: 1.0176451206207275,grad_norm: 0.9999989715999585, iteration: 293938
loss: 0.955971360206604,grad_norm: 0.9126528682739874, iteration: 293939
loss: 0.9825413823127747,grad_norm: 0.8203394177642291, iteration: 293940
loss: 0.9577528238296509,grad_norm: 0.7696392444635237, iteration: 293941
loss: 1.0074464082717896,grad_norm: 0.84372523339665, iteration: 293942
loss: 1.0249676704406738,grad_norm: 0.999999150312814, iteration: 293943
loss: 1.019370198249817,grad_norm: 0.9068799205347507, iteration: 293944
loss: 1.015113115310669,grad_norm: 0.9999992174086779, iteration: 293945
loss: 0.9937384724617004,grad_norm: 0.6938895606062413, iteration: 293946
loss: 1.0024980306625366,grad_norm: 0.6808875579949036, iteration: 293947
loss: 0.9912769198417664,grad_norm: 0.8270708863292107, iteration: 293948
loss: 1.007948875427246,grad_norm: 0.8506299437954113, iteration: 293949
loss: 1.0220987796783447,grad_norm: 0.9381521113884999, iteration: 293950
loss: 0.9867786169052124,grad_norm: 0.812620874781949, iteration: 293951
loss: 1.0206257104873657,grad_norm: 0.7831331310408562, iteration: 293952
loss: 0.9627295732498169,grad_norm: 0.9999989832716558, iteration: 293953
loss: 0.9728086590766907,grad_norm: 0.8716282740701229, iteration: 293954
loss: 1.0179551839828491,grad_norm: 0.818540571109752, iteration: 293955
loss: 1.012597918510437,grad_norm: 0.9445190260448126, iteration: 293956
loss: 0.9767882823944092,grad_norm: 0.915208280518041, iteration: 293957
loss: 0.9649066925048828,grad_norm: 0.9999991193914358, iteration: 293958
loss: 1.0206537246704102,grad_norm: 0.9999990280261709, iteration: 293959
loss: 0.9533801078796387,grad_norm: 0.8273570951142919, iteration: 293960
loss: 0.9963414669036865,grad_norm: 0.9999990957088772, iteration: 293961
loss: 0.9910403490066528,grad_norm: 0.9645893875487522, iteration: 293962
loss: 0.9981220960617065,grad_norm: 0.8848252234210735, iteration: 293963
loss: 1.0107736587524414,grad_norm: 0.8959227187913744, iteration: 293964
loss: 1.0114330053329468,grad_norm: 0.9999991998616908, iteration: 293965
loss: 1.022863507270813,grad_norm: 0.9963934676845317, iteration: 293966
loss: 0.9919906258583069,grad_norm: 0.7613222449616737, iteration: 293967
loss: 1.0245999097824097,grad_norm: 0.7772311300804824, iteration: 293968
loss: 1.0182000398635864,grad_norm: 0.9143999137818597, iteration: 293969
loss: 0.9783112406730652,grad_norm: 0.9031117680187278, iteration: 293970
loss: 0.9931511878967285,grad_norm: 0.8090012300997584, iteration: 293971
loss: 1.0009346008300781,grad_norm: 0.884459864281659, iteration: 293972
loss: 1.0065395832061768,grad_norm: 0.929139949451227, iteration: 293973
loss: 1.0074033737182617,grad_norm: 0.9351021982568382, iteration: 293974
loss: 0.9973914623260498,grad_norm: 0.9999990846223367, iteration: 293975
loss: 1.0135372877120972,grad_norm: 0.869722994512743, iteration: 293976
loss: 0.9886016845703125,grad_norm: 0.9126471619088706, iteration: 293977
loss: 1.018173098564148,grad_norm: 0.9763598571733382, iteration: 293978
loss: 1.0050021409988403,grad_norm: 0.9999991314188124, iteration: 293979
loss: 1.0176968574523926,grad_norm: 0.8166291700739906, iteration: 293980
loss: 0.9760370254516602,grad_norm: 0.8406788239990063, iteration: 293981
loss: 1.012627124786377,grad_norm: 0.8582105884311486, iteration: 293982
loss: 1.0238269567489624,grad_norm: 0.8855445297709832, iteration: 293983
loss: 1.0222716331481934,grad_norm: 0.9027746270145329, iteration: 293984
loss: 0.9709801077842712,grad_norm: 0.7674656629713846, iteration: 293985
loss: 0.9654518961906433,grad_norm: 0.8280714815900632, iteration: 293986
loss: 1.0158747434616089,grad_norm: 0.9572819310749295, iteration: 293987
loss: 0.9991025924682617,grad_norm: 0.7232473037812464, iteration: 293988
loss: 0.9860047101974487,grad_norm: 0.7836205387881024, iteration: 293989
loss: 1.002791166305542,grad_norm: 0.7747394249754163, iteration: 293990
loss: 0.9981192946434021,grad_norm: 0.9999989034316223, iteration: 293991
loss: 0.9858712553977966,grad_norm: 0.7463325734538898, iteration: 293992
loss: 1.0096023082733154,grad_norm: 0.8498226592052996, iteration: 293993
loss: 1.007940649986267,grad_norm: 0.9999990188793494, iteration: 293994
loss: 1.0213721990585327,grad_norm: 0.9408686301188226, iteration: 293995
loss: 1.0395267009735107,grad_norm: 0.8639094938105554, iteration: 293996
loss: 1.0536882877349854,grad_norm: 0.8269735430266373, iteration: 293997
loss: 1.020796775817871,grad_norm: 0.9159761831450245, iteration: 293998
loss: 0.9818302989006042,grad_norm: 0.878592799007133, iteration: 293999
loss: 0.9543445706367493,grad_norm: 0.8345478249313552, iteration: 294000
loss: 0.9991885423660278,grad_norm: 0.8356055221065359, iteration: 294001
loss: 1.0126534700393677,grad_norm: 0.9540568929620316, iteration: 294002
loss: 0.9854902029037476,grad_norm: 0.9999992913983924, iteration: 294003
loss: 1.0395113229751587,grad_norm: 0.8810178165581326, iteration: 294004
loss: 0.9998846054077148,grad_norm: 0.8436585151858277, iteration: 294005
loss: 1.009077548980713,grad_norm: 0.8762960622387512, iteration: 294006
loss: 0.9863449931144714,grad_norm: 0.7799990885681607, iteration: 294007
loss: 0.9979672431945801,grad_norm: 0.9675049256677453, iteration: 294008
loss: 0.9677684307098389,grad_norm: 0.7675319179446335, iteration: 294009
loss: 1.0119123458862305,grad_norm: 0.9999991652944211, iteration: 294010
loss: 1.0232527256011963,grad_norm: 0.8069970254193349, iteration: 294011
loss: 0.9911689162254333,grad_norm: 0.9292458232366988, iteration: 294012
loss: 1.0020085573196411,grad_norm: 0.9587588689297717, iteration: 294013
loss: 0.991997480392456,grad_norm: 0.8306406476476614, iteration: 294014
loss: 1.1691256761550903,grad_norm: 0.9999996490827223, iteration: 294015
loss: 0.9769752621650696,grad_norm: 0.8601183695225894, iteration: 294016
loss: 0.9845094084739685,grad_norm: 0.8037074921306099, iteration: 294017
loss: 0.9817654490470886,grad_norm: 0.8256702251683388, iteration: 294018
loss: 0.9772356748580933,grad_norm: 0.8037968834695595, iteration: 294019
loss: 0.9606464505195618,grad_norm: 0.7729090627245899, iteration: 294020
loss: 0.9971478581428528,grad_norm: 0.8791837203467157, iteration: 294021
loss: 0.9973546862602234,grad_norm: 0.999999109770722, iteration: 294022
loss: 1.0184186697006226,grad_norm: 0.9999992047362082, iteration: 294023
loss: 1.0158876180648804,grad_norm: 0.7641977526002736, iteration: 294024
loss: 0.9971456527709961,grad_norm: 0.8219988510107208, iteration: 294025
loss: 0.9929801821708679,grad_norm: 0.8578122237200019, iteration: 294026
loss: 0.9744981527328491,grad_norm: 0.7291403909300509, iteration: 294027
loss: 0.9845620393753052,grad_norm: 0.8967016360587091, iteration: 294028
loss: 0.9824138879776001,grad_norm: 0.9999996437777512, iteration: 294029
loss: 0.9938857555389404,grad_norm: 0.9999994403957018, iteration: 294030
loss: 1.0388704538345337,grad_norm: 0.9692152612711977, iteration: 294031
loss: 1.0256463289260864,grad_norm: 0.9999990672004111, iteration: 294032
loss: 0.9990890026092529,grad_norm: 0.9999991098236856, iteration: 294033
loss: 1.051887035369873,grad_norm: 0.9999994677259935, iteration: 294034
loss: 0.9831787943840027,grad_norm: 0.8570126426473813, iteration: 294035
loss: 1.0151383876800537,grad_norm: 0.7373908991649819, iteration: 294036
loss: 0.9846366047859192,grad_norm: 0.8102928998678619, iteration: 294037
loss: 0.995061993598938,grad_norm: 0.9999991305673852, iteration: 294038
loss: 1.0026062726974487,grad_norm: 0.9858641107448364, iteration: 294039
loss: 1.0074341297149658,grad_norm: 0.8115443966563762, iteration: 294040
loss: 1.0419819355010986,grad_norm: 0.9999991495000278, iteration: 294041
loss: 1.0241808891296387,grad_norm: 0.9149193165147022, iteration: 294042
loss: 1.0928287506103516,grad_norm: 0.9999994910978416, iteration: 294043
loss: 1.064310073852539,grad_norm: 0.999999146615619, iteration: 294044
loss: 1.0606346130371094,grad_norm: 0.9145661327411734, iteration: 294045
loss: 0.9482420682907104,grad_norm: 0.9999991103292407, iteration: 294046
loss: 1.0272929668426514,grad_norm: 0.9516081185978574, iteration: 294047
loss: 0.9623801708221436,grad_norm: 0.9188519757603483, iteration: 294048
loss: 0.9841856360435486,grad_norm: 0.8828198211104173, iteration: 294049
loss: 1.041793942451477,grad_norm: 0.9999993989826598, iteration: 294050
loss: 1.020251989364624,grad_norm: 0.7833822341706074, iteration: 294051
loss: 0.9952793121337891,grad_norm: 0.9999990256389581, iteration: 294052
loss: 0.9876871705055237,grad_norm: 0.8504631051504715, iteration: 294053
loss: 1.0367892980575562,grad_norm: 0.999999375238124, iteration: 294054
loss: 1.0146960020065308,grad_norm: 0.9999990625605665, iteration: 294055
loss: 1.0246754884719849,grad_norm: 0.9999991028336483, iteration: 294056
loss: 0.9812079668045044,grad_norm: 0.8139201669356343, iteration: 294057
loss: 1.0078142881393433,grad_norm: 0.8639989575340993, iteration: 294058
loss: 1.011506199836731,grad_norm: 0.7840781548754134, iteration: 294059
loss: 0.9680670499801636,grad_norm: 0.8572491022045021, iteration: 294060
loss: 1.0054666996002197,grad_norm: 0.9999994480359791, iteration: 294061
loss: 0.9789314866065979,grad_norm: 0.8562736007407205, iteration: 294062
loss: 1.0568897724151611,grad_norm: 0.9999991287770311, iteration: 294063
loss: 0.9786778688430786,grad_norm: 0.8034117437685735, iteration: 294064
loss: 0.9498793482780457,grad_norm: 0.9841047057875337, iteration: 294065
loss: 0.999445915222168,grad_norm: 0.9999990414786252, iteration: 294066
loss: 1.0238176584243774,grad_norm: 0.98643238534956, iteration: 294067
loss: 0.9776413440704346,grad_norm: 0.9790895521661878, iteration: 294068
loss: 1.0056211948394775,grad_norm: 0.9999996687203138, iteration: 294069
loss: 0.9740898013114929,grad_norm: 0.8275114473503437, iteration: 294070
loss: 0.9781562685966492,grad_norm: 0.8259827103883455, iteration: 294071
loss: 1.0276389122009277,grad_norm: 0.7688815596340567, iteration: 294072
loss: 1.022603154182434,grad_norm: 0.9179666111319743, iteration: 294073
loss: 0.9933499693870544,grad_norm: 0.9030717943978009, iteration: 294074
loss: 1.005133032798767,grad_norm: 0.9008822389089319, iteration: 294075
loss: 0.977224588394165,grad_norm: 0.7903603658390277, iteration: 294076
loss: 1.0140259265899658,grad_norm: 0.7424995367502298, iteration: 294077
loss: 1.0147051811218262,grad_norm: 0.9391860629551104, iteration: 294078
loss: 1.020208716392517,grad_norm: 0.888072820740355, iteration: 294079
loss: 0.9629144072532654,grad_norm: 0.8709088376510776, iteration: 294080
loss: 0.95991450548172,grad_norm: 0.8431014412375171, iteration: 294081
loss: 1.040786623954773,grad_norm: 0.9999990949669552, iteration: 294082
loss: 1.0054510831832886,grad_norm: 0.9999992574167161, iteration: 294083
loss: 1.0008151531219482,grad_norm: 0.9932852809412706, iteration: 294084
loss: 1.0301616191864014,grad_norm: 0.7782218852449113, iteration: 294085
loss: 0.9849192500114441,grad_norm: 0.7755260519813163, iteration: 294086
loss: 0.9838563799858093,grad_norm: 0.7176622940971117, iteration: 294087
loss: 0.9920157790184021,grad_norm: 0.6719255086575975, iteration: 294088
loss: 0.9796596169471741,grad_norm: 0.7992885820805331, iteration: 294089
loss: 1.0069503784179688,grad_norm: 0.9230715491475685, iteration: 294090
loss: 1.027853012084961,grad_norm: 0.9999989491973003, iteration: 294091
loss: 0.9932941198348999,grad_norm: 0.6815127584191004, iteration: 294092
loss: 0.988463819026947,grad_norm: 0.8377542173031259, iteration: 294093
loss: 1.0103586912155151,grad_norm: 0.7785893996210859, iteration: 294094
loss: 1.0051910877227783,grad_norm: 0.863070250462409, iteration: 294095
loss: 0.9754824638366699,grad_norm: 0.753657712272047, iteration: 294096
loss: 0.9629359841346741,grad_norm: 0.8296997857824235, iteration: 294097
loss: 1.0158090591430664,grad_norm: 0.8925095148087975, iteration: 294098
loss: 0.9893890619277954,grad_norm: 0.8433039284280707, iteration: 294099
loss: 1.007307767868042,grad_norm: 0.9999992325341461, iteration: 294100
loss: 0.9766978025436401,grad_norm: 0.9848551923768938, iteration: 294101
loss: 0.988879919052124,grad_norm: 0.8393990545591666, iteration: 294102
loss: 0.9945712089538574,grad_norm: 0.8118455474129223, iteration: 294103
loss: 1.074491024017334,grad_norm: 0.9999989890742511, iteration: 294104
loss: 1.0081149339675903,grad_norm: 0.8606694995578147, iteration: 294105
loss: 1.0086612701416016,grad_norm: 0.8159153878088053, iteration: 294106
loss: 0.9892288446426392,grad_norm: 0.9099205601761661, iteration: 294107
loss: 1.0433295965194702,grad_norm: 0.9116055980095731, iteration: 294108
loss: 1.0069375038146973,grad_norm: 0.8982811112870251, iteration: 294109
loss: 0.9934605956077576,grad_norm: 0.9650344729773207, iteration: 294110
loss: 0.9829240441322327,grad_norm: 0.7855964063312436, iteration: 294111
loss: 1.0079749822616577,grad_norm: 0.9404240890824781, iteration: 294112
loss: 0.9775468707084656,grad_norm: 0.9806571451038346, iteration: 294113
loss: 1.0119614601135254,grad_norm: 0.7097102789795476, iteration: 294114
loss: 1.033007264137268,grad_norm: 0.9999990621401531, iteration: 294115
loss: 1.0085452795028687,grad_norm: 0.8922277612248153, iteration: 294116
loss: 0.9918897747993469,grad_norm: 0.8382053306886226, iteration: 294117
loss: 0.9722207188606262,grad_norm: 0.8336379185768532, iteration: 294118
loss: 1.0278750658035278,grad_norm: 0.8560669304666271, iteration: 294119
loss: 1.0167301893234253,grad_norm: 0.8675002239032601, iteration: 294120
loss: 1.0042868852615356,grad_norm: 0.7920728018595286, iteration: 294121
loss: 0.991759717464447,grad_norm: 0.7824334348913563, iteration: 294122
loss: 0.9727053046226501,grad_norm: 0.7906162294717312, iteration: 294123
loss: 0.9954021573066711,grad_norm: 0.6561560180223402, iteration: 294124
loss: 1.024718165397644,grad_norm: 0.8447906544713858, iteration: 294125
loss: 1.0506250858306885,grad_norm: 0.9999995900329354, iteration: 294126
loss: 1.017243504524231,grad_norm: 0.9999990822624822, iteration: 294127
loss: 0.9894593358039856,grad_norm: 0.7504587676339464, iteration: 294128
loss: 0.9850145578384399,grad_norm: 0.896387573222102, iteration: 294129
loss: 0.9938859939575195,grad_norm: 0.8874953550099788, iteration: 294130
loss: 0.982844352722168,grad_norm: 0.8832248838091098, iteration: 294131
loss: 1.0410025119781494,grad_norm: 0.7688833547850712, iteration: 294132
loss: 1.041137456893921,grad_norm: 0.9999993276582861, iteration: 294133
loss: 1.0047619342803955,grad_norm: 0.800018320516361, iteration: 294134
loss: 1.0438339710235596,grad_norm: 0.9536122687293298, iteration: 294135
loss: 0.9926096200942993,grad_norm: 0.7331309189576648, iteration: 294136
loss: 1.0083385705947876,grad_norm: 0.7213734786565049, iteration: 294137
loss: 0.9816026091575623,grad_norm: 0.8651437986172913, iteration: 294138
loss: 0.9933263063430786,grad_norm: 0.8659323076301864, iteration: 294139
loss: 0.9914403557777405,grad_norm: 0.904554296531429, iteration: 294140
loss: 1.007763385772705,grad_norm: 0.8368340410006291, iteration: 294141
loss: 0.9849418997764587,grad_norm: 0.9991371233620199, iteration: 294142
loss: 0.9978888630867004,grad_norm: 0.8701766775984181, iteration: 294143
loss: 1.0045766830444336,grad_norm: 0.8340915852345756, iteration: 294144
loss: 0.9877645969390869,grad_norm: 0.9407696729713595, iteration: 294145
loss: 1.0233983993530273,grad_norm: 0.8594076507508541, iteration: 294146
loss: 1.0389665365219116,grad_norm: 0.93893837179821, iteration: 294147
loss: 1.0153244733810425,grad_norm: 0.9386268123708462, iteration: 294148
loss: 1.0012286901474,grad_norm: 0.8814140774728056, iteration: 294149
loss: 0.9419518709182739,grad_norm: 0.6913016975565617, iteration: 294150
loss: 1.012826681137085,grad_norm: 0.9999988824517076, iteration: 294151
loss: 1.0169726610183716,grad_norm: 0.753253615650473, iteration: 294152
loss: 1.0059692859649658,grad_norm: 0.999999111766408, iteration: 294153
loss: 0.9950558543205261,grad_norm: 0.9109378340952959, iteration: 294154
loss: 1.0173989534378052,grad_norm: 0.9999995782412372, iteration: 294155
loss: 0.9921819567680359,grad_norm: 0.8096116066596196, iteration: 294156
loss: 0.9793074131011963,grad_norm: 0.7850244262473056, iteration: 294157
loss: 0.9784703850746155,grad_norm: 0.8668974381849702, iteration: 294158
loss: 0.9980438351631165,grad_norm: 0.7867660715821456, iteration: 294159
loss: 0.9725169539451599,grad_norm: 0.753471204360362, iteration: 294160
loss: 0.9902916550636292,grad_norm: 0.7060241091359706, iteration: 294161
loss: 0.9772433042526245,grad_norm: 0.8727522121363126, iteration: 294162
loss: 1.0264880657196045,grad_norm: 0.8563819986980908, iteration: 294163
loss: 0.9898415207862854,grad_norm: 0.9999994070850933, iteration: 294164
loss: 0.986518383026123,grad_norm: 0.9999992501168286, iteration: 294165
loss: 1.0932446718215942,grad_norm: 0.9062493771454903, iteration: 294166
loss: 0.9886096715927124,grad_norm: 0.687358640815729, iteration: 294167
loss: 1.0001322031021118,grad_norm: 0.9211875514140669, iteration: 294168
loss: 1.022783875465393,grad_norm: 0.9167100520231288, iteration: 294169
loss: 1.0033801794052124,grad_norm: 0.7495790370432202, iteration: 294170
loss: 0.9961934089660645,grad_norm: 0.779949466543515, iteration: 294171
loss: 1.0210236310958862,grad_norm: 1.000000062272296, iteration: 294172
loss: 1.0139542818069458,grad_norm: 0.7566300310888097, iteration: 294173
loss: 0.9775602221488953,grad_norm: 0.82835568719725, iteration: 294174
loss: 1.0221678018569946,grad_norm: 0.9999995724240326, iteration: 294175
loss: 0.9964662790298462,grad_norm: 0.7272580312647998, iteration: 294176
loss: 0.9803067445755005,grad_norm: 0.9999989346310376, iteration: 294177
loss: 1.0048160552978516,grad_norm: 0.780403323898487, iteration: 294178
loss: 1.010603666305542,grad_norm: 0.9999991276444616, iteration: 294179
loss: 1.0142079591751099,grad_norm: 0.8168295735162258, iteration: 294180
loss: 1.0070655345916748,grad_norm: 0.8251912735599104, iteration: 294181
loss: 0.9995816946029663,grad_norm: 0.8366803651749531, iteration: 294182
loss: 0.9707688093185425,grad_norm: 0.9477695566703828, iteration: 294183
loss: 1.042222499847412,grad_norm: 0.9999999279695458, iteration: 294184
loss: 1.023777961730957,grad_norm: 0.9848508709949759, iteration: 294185
loss: 0.9996010661125183,grad_norm: 0.9999990029309156, iteration: 294186
loss: 0.9952474236488342,grad_norm: 0.8453490639906099, iteration: 294187
loss: 1.002556324005127,grad_norm: 0.7721467082374192, iteration: 294188
loss: 1.0107084512710571,grad_norm: 0.8200420135291671, iteration: 294189
loss: 0.9925127625465393,grad_norm: 0.9683923397037448, iteration: 294190
loss: 0.9879636764526367,grad_norm: 0.7411676547393038, iteration: 294191
loss: 0.9826396703720093,grad_norm: 0.9999990734755576, iteration: 294192
loss: 0.9830884337425232,grad_norm: 0.7337081724889202, iteration: 294193
loss: 1.107187271118164,grad_norm: 1.0000000097159671, iteration: 294194
loss: 0.9765806794166565,grad_norm: 0.7004992719701577, iteration: 294195
loss: 0.976921021938324,grad_norm: 0.8750202227265717, iteration: 294196
loss: 0.9966453909873962,grad_norm: 0.9999989942829774, iteration: 294197
loss: 1.0183258056640625,grad_norm: 0.7213534924698201, iteration: 294198
loss: 0.9636756777763367,grad_norm: 0.7677618911269556, iteration: 294199
loss: 1.0234326124191284,grad_norm: 0.9420095278691984, iteration: 294200
loss: 0.969642698764801,grad_norm: 0.7975659960107645, iteration: 294201
loss: 0.9868049621582031,grad_norm: 0.9197780756987353, iteration: 294202
loss: 1.0324255228042603,grad_norm: 0.9581044821758276, iteration: 294203
loss: 0.9756567478179932,grad_norm: 0.7203391290725375, iteration: 294204
loss: 1.0098530054092407,grad_norm: 0.9847415769889678, iteration: 294205
loss: 1.034743309020996,grad_norm: 0.9999990602467316, iteration: 294206
loss: 0.9822936654090881,grad_norm: 0.9414809459032315, iteration: 294207
loss: 1.0109338760375977,grad_norm: 0.9352211755314049, iteration: 294208
loss: 0.9957411289215088,grad_norm: 0.8410866354627314, iteration: 294209
loss: 0.9851682782173157,grad_norm: 0.9137218411338431, iteration: 294210
loss: 1.0284416675567627,grad_norm: 0.7614361477950997, iteration: 294211
loss: 1.0015045404434204,grad_norm: 0.7266620629763286, iteration: 294212
loss: 0.9845744967460632,grad_norm: 0.8689825605495428, iteration: 294213
loss: 0.9817359447479248,grad_norm: 0.8913008401478943, iteration: 294214
loss: 1.0141890048980713,grad_norm: 0.8027133762431564, iteration: 294215
loss: 1.0161077976226807,grad_norm: 0.999999908356132, iteration: 294216
loss: 1.099199891090393,grad_norm: 1.0000000050918976, iteration: 294217
loss: 0.9995135068893433,grad_norm: 0.7227396628637516, iteration: 294218
loss: 1.0129215717315674,grad_norm: 0.761356211068037, iteration: 294219
loss: 0.9845524430274963,grad_norm: 0.7851950049891582, iteration: 294220
loss: 1.0349241495132446,grad_norm: 0.7845514617855953, iteration: 294221
loss: 1.0378892421722412,grad_norm: 0.8866606099576906, iteration: 294222
loss: 1.0691719055175781,grad_norm: 0.9999995757648554, iteration: 294223
loss: 1.0462044477462769,grad_norm: 0.9999993117858502, iteration: 294224
loss: 0.9734372496604919,grad_norm: 0.9999991212016581, iteration: 294225
loss: 1.0252434015274048,grad_norm: 0.9999998825258642, iteration: 294226
loss: 1.0116580724716187,grad_norm: 0.9999990331439024, iteration: 294227
loss: 1.0156776905059814,grad_norm: 0.9999995920941598, iteration: 294228
loss: 0.9716479778289795,grad_norm: 0.8512206414401511, iteration: 294229
loss: 0.974281907081604,grad_norm: 0.999999176375088, iteration: 294230
loss: 1.0077342987060547,grad_norm: 0.7840586695423168, iteration: 294231
loss: 1.0070610046386719,grad_norm: 0.8945949271324132, iteration: 294232
loss: 1.0001733303070068,grad_norm: 0.8314823342252617, iteration: 294233
loss: 1.0083279609680176,grad_norm: 0.7956737412420053, iteration: 294234
loss: 0.975238025188446,grad_norm: 0.9258531294141122, iteration: 294235
loss: 0.9662905931472778,grad_norm: 0.8276997813867019, iteration: 294236
loss: 1.0066105127334595,grad_norm: 0.7994390679399345, iteration: 294237
loss: 1.0206776857376099,grad_norm: 0.793127037897735, iteration: 294238
loss: 1.0109626054763794,grad_norm: 0.7003307949065672, iteration: 294239
loss: 1.0634242296218872,grad_norm: 0.9999999908762078, iteration: 294240
loss: 1.0368787050247192,grad_norm: 0.8819426095668297, iteration: 294241
loss: 1.0931850671768188,grad_norm: 0.9999990371375764, iteration: 294242
loss: 1.009385347366333,grad_norm: 0.9539168655637474, iteration: 294243
loss: 1.026064157485962,grad_norm: 0.8915427469120322, iteration: 294244
loss: 0.9806156158447266,grad_norm: 0.9999992744428474, iteration: 294245
loss: 1.022239089012146,grad_norm: 0.7178849196547494, iteration: 294246
loss: 0.9525885581970215,grad_norm: 0.7162916808939155, iteration: 294247
loss: 0.9904454946517944,grad_norm: 0.8423548281593135, iteration: 294248
loss: 0.9925004839897156,grad_norm: 0.9357251933958262, iteration: 294249
loss: 0.9568862915039062,grad_norm: 0.9441688211452786, iteration: 294250
loss: 0.9885535836219788,grad_norm: 0.7513370357903276, iteration: 294251
loss: 0.9633880853652954,grad_norm: 0.9855206029751692, iteration: 294252
loss: 0.9435487389564514,grad_norm: 0.9954065614064063, iteration: 294253
loss: 1.001097321510315,grad_norm: 0.999999071328233, iteration: 294254
loss: 1.0449814796447754,grad_norm: 0.9999990346854413, iteration: 294255
loss: 1.015787124633789,grad_norm: 0.9373981194976925, iteration: 294256
loss: 0.9908398389816284,grad_norm: 0.8310407912881779, iteration: 294257
loss: 0.9880039095878601,grad_norm: 0.8508160369598996, iteration: 294258
loss: 1.0901254415512085,grad_norm: 0.9999991666146205, iteration: 294259
loss: 0.9881051182746887,grad_norm: 0.9081494455344682, iteration: 294260
loss: 1.0384016036987305,grad_norm: 0.9999999744016757, iteration: 294261
loss: 1.0906144380569458,grad_norm: 0.9999998865922545, iteration: 294262
loss: 1.020255208015442,grad_norm: 0.9999998316559454, iteration: 294263
loss: 1.0515406131744385,grad_norm: 0.9999992903893957, iteration: 294264
loss: 1.0397212505340576,grad_norm: 0.9912460385453057, iteration: 294265
loss: 1.018835425376892,grad_norm: 0.9146176026541714, iteration: 294266
loss: 0.9910820722579956,grad_norm: 0.8224433589456106, iteration: 294267
loss: 1.0007529258728027,grad_norm: 0.7932428091972191, iteration: 294268
loss: 1.0237964391708374,grad_norm: 0.9999995929928409, iteration: 294269
loss: 0.9984136819839478,grad_norm: 0.8201915604114247, iteration: 294270
loss: 0.9704902768135071,grad_norm: 0.9999995150222583, iteration: 294271
loss: 0.9897461533546448,grad_norm: 0.7266710252659473, iteration: 294272
loss: 1.012468695640564,grad_norm: 0.9999994021460107, iteration: 294273
loss: 1.0379340648651123,grad_norm: 0.9999998271031045, iteration: 294274
loss: 0.9902353882789612,grad_norm: 0.836681109011313, iteration: 294275
loss: 0.9676126837730408,grad_norm: 0.8719261366905727, iteration: 294276
loss: 0.96267169713974,grad_norm: 0.7900815758174993, iteration: 294277
loss: 1.011041283607483,grad_norm: 0.8763077507867393, iteration: 294278
loss: 1.0188592672348022,grad_norm: 0.9848761191711501, iteration: 294279
loss: 0.9573931694030762,grad_norm: 0.8046872751211682, iteration: 294280
loss: 1.0091418027877808,grad_norm: 0.9999994951561361, iteration: 294281
loss: 1.0002857446670532,grad_norm: 0.829717942357078, iteration: 294282
loss: 1.09443998336792,grad_norm: 0.8585640352200474, iteration: 294283
loss: 1.067980170249939,grad_norm: 0.9999997099614482, iteration: 294284
loss: 1.009701132774353,grad_norm: 0.9930430612457143, iteration: 294285
loss: 1.0723901987075806,grad_norm: 0.9999993488423256, iteration: 294286
loss: 0.992755115032196,grad_norm: 0.9999989623490444, iteration: 294287
loss: 0.9849715232849121,grad_norm: 0.7946252811370564, iteration: 294288
loss: 0.967556893825531,grad_norm: 0.9204464182191148, iteration: 294289
loss: 0.979156494140625,grad_norm: 0.9374697909027719, iteration: 294290
loss: 1.0008875131607056,grad_norm: 0.75025949515215, iteration: 294291
loss: 1.017140507698059,grad_norm: 0.7764787727313652, iteration: 294292
loss: 0.9835432171821594,grad_norm: 0.8147385369603017, iteration: 294293
loss: 1.0285065174102783,grad_norm: 0.7682758721032646, iteration: 294294
loss: 0.9447205066680908,grad_norm: 0.9716861897406631, iteration: 294295
loss: 1.012862205505371,grad_norm: 0.9054506088897439, iteration: 294296
loss: 0.9799461364746094,grad_norm: 0.9523057822838618, iteration: 294297
loss: 0.9922917485237122,grad_norm: 0.9568452951164574, iteration: 294298
loss: 0.9872620105743408,grad_norm: 0.8230744438412935, iteration: 294299
loss: 0.9804803729057312,grad_norm: 0.7917728315759395, iteration: 294300
loss: 1.0085577964782715,grad_norm: 0.9112301571106449, iteration: 294301
loss: 1.0570223331451416,grad_norm: 0.9738976145283765, iteration: 294302
loss: 0.9754746556282043,grad_norm: 0.9999990907428148, iteration: 294303
loss: 1.0122343301773071,grad_norm: 0.7679024153852845, iteration: 294304
loss: 1.0215271711349487,grad_norm: 0.9520569800843869, iteration: 294305
loss: 1.0221118927001953,grad_norm: 0.7361164508193364, iteration: 294306
loss: 0.9843471646308899,grad_norm: 0.8959954099054269, iteration: 294307
loss: 0.9755499958992004,grad_norm: 0.7866064762037556, iteration: 294308
loss: 0.9982264637947083,grad_norm: 0.8943295672230217, iteration: 294309
loss: 1.030418038368225,grad_norm: 0.999999365224889, iteration: 294310
loss: 1.0618621110916138,grad_norm: 0.999999057261204, iteration: 294311
loss: 1.0543643236160278,grad_norm: 0.9999993296410399, iteration: 294312
loss: 1.0035569667816162,grad_norm: 0.9704105087253942, iteration: 294313
loss: 0.9953896403312683,grad_norm: 0.7266417606124429, iteration: 294314
loss: 0.9565590620040894,grad_norm: 0.7454195181630751, iteration: 294315
loss: 0.9827900528907776,grad_norm: 0.8913363152544013, iteration: 294316
loss: 1.011115550994873,grad_norm: 0.8697724900383276, iteration: 294317
loss: 0.9838537573814392,grad_norm: 0.8615877251774098, iteration: 294318
loss: 1.0141723155975342,grad_norm: 0.9663026846746602, iteration: 294319
loss: 1.0301045179367065,grad_norm: 0.9999991669271828, iteration: 294320
loss: 0.979570746421814,grad_norm: 0.9442570579217441, iteration: 294321
loss: 1.0161269903182983,grad_norm: 0.9133211020312842, iteration: 294322
loss: 0.9800113439559937,grad_norm: 0.9496636976322126, iteration: 294323
loss: 0.9758643507957458,grad_norm: 0.9264542751104035, iteration: 294324
loss: 1.0144343376159668,grad_norm: 0.9999993327964788, iteration: 294325
loss: 1.022075891494751,grad_norm: 0.9999991414624435, iteration: 294326
loss: 1.0362690687179565,grad_norm: 0.9999998870771001, iteration: 294327
loss: 1.0060747861862183,grad_norm: 0.9456080612759972, iteration: 294328
loss: 1.0597642660140991,grad_norm: 0.9999997533325947, iteration: 294329
loss: 0.9748712182044983,grad_norm: 0.958441883581612, iteration: 294330
loss: 0.9787812829017639,grad_norm: 0.7868786592064484, iteration: 294331
loss: 0.997194230556488,grad_norm: 0.8168982587061153, iteration: 294332
loss: 1.0259056091308594,grad_norm: 0.9999993057241953, iteration: 294333
loss: 1.0176318883895874,grad_norm: 0.7884771087746415, iteration: 294334
loss: 1.032643437385559,grad_norm: 0.7706196216374583, iteration: 294335
loss: 1.004753828048706,grad_norm: 0.999999288535175, iteration: 294336
loss: 1.0250357389450073,grad_norm: 0.9299807011418386, iteration: 294337
loss: 1.0541660785675049,grad_norm: 0.6887333296181188, iteration: 294338
loss: 1.0277231931686401,grad_norm: 0.9537230539736061, iteration: 294339
loss: 0.9868454337120056,grad_norm: 0.9999998488996014, iteration: 294340
loss: 0.9880118370056152,grad_norm: 0.9161085368358381, iteration: 294341
loss: 1.0447702407836914,grad_norm: 0.9999992283017377, iteration: 294342
loss: 0.9968163967132568,grad_norm: 0.6929980112405444, iteration: 294343
loss: 0.9650583863258362,grad_norm: 0.9999995935075232, iteration: 294344
loss: 0.9888237118721008,grad_norm: 0.8815019569496274, iteration: 294345
loss: 1.1161181926727295,grad_norm: 0.9999999305802848, iteration: 294346
loss: 0.9865370392799377,grad_norm: 0.9999999172877175, iteration: 294347
loss: 1.054775357246399,grad_norm: 0.9999998953579222, iteration: 294348
loss: 0.9806036353111267,grad_norm: 0.9999992671578057, iteration: 294349
loss: 1.009099006652832,grad_norm: 0.9044187348933732, iteration: 294350
loss: 1.0189213752746582,grad_norm: 0.7658730921611333, iteration: 294351
loss: 0.984121561050415,grad_norm: 0.9999998398433295, iteration: 294352
loss: 1.0331577062606812,grad_norm: 0.9999994541397864, iteration: 294353
loss: 1.0358848571777344,grad_norm: 0.9941286656898016, iteration: 294354
loss: 0.957638144493103,grad_norm: 0.9839112944885776, iteration: 294355
loss: 1.046875238418579,grad_norm: 0.999999163857808, iteration: 294356
loss: 1.0144046545028687,grad_norm: 0.8660312981991032, iteration: 294357
loss: 1.027647852897644,grad_norm: 0.7672209260076208, iteration: 294358
loss: 1.0098923444747925,grad_norm: 0.7926407428509246, iteration: 294359
loss: 1.0172423124313354,grad_norm: 0.9037882346823273, iteration: 294360
loss: 1.0053130388259888,grad_norm: 0.9356803121909648, iteration: 294361
loss: 0.980437159538269,grad_norm: 0.8619082273528554, iteration: 294362
loss: 0.9841893911361694,grad_norm: 0.8742489728731554, iteration: 294363
loss: 1.0250391960144043,grad_norm: 0.9999993726981736, iteration: 294364
loss: 1.0442383289337158,grad_norm: 0.9389669205466011, iteration: 294365
loss: 0.9910346269607544,grad_norm: 0.8613831885040532, iteration: 294366
loss: 0.9935638904571533,grad_norm: 0.793754997530591, iteration: 294367
loss: 1.0133243799209595,grad_norm: 0.6474904568530506, iteration: 294368
loss: 0.9642685055732727,grad_norm: 0.8170784803898428, iteration: 294369
loss: 1.0053144693374634,grad_norm: 0.8744680727395102, iteration: 294370
loss: 0.9984580874443054,grad_norm: 0.8806785403652024, iteration: 294371
loss: 1.035133719444275,grad_norm: 0.9999991842876836, iteration: 294372
loss: 1.0316364765167236,grad_norm: 0.9999996520567529, iteration: 294373
loss: 0.9957630634307861,grad_norm: 0.6723145878901876, iteration: 294374
loss: 1.0450031757354736,grad_norm: 0.9999990388342094, iteration: 294375
loss: 1.0168919563293457,grad_norm: 0.9999990293390918, iteration: 294376
loss: 0.9796819090843201,grad_norm: 0.9999990711102331, iteration: 294377
loss: 0.9651525020599365,grad_norm: 0.8441152346950533, iteration: 294378
loss: 0.9970839619636536,grad_norm: 0.9999999087359747, iteration: 294379
loss: 1.0072227716445923,grad_norm: 0.8983170559386092, iteration: 294380
loss: 1.0418294668197632,grad_norm: 0.7904799734449857, iteration: 294381
loss: 1.0780237913131714,grad_norm: 0.9999998393060271, iteration: 294382
loss: 1.0362709760665894,grad_norm: 0.9999990541117246, iteration: 294383
loss: 0.9917320609092712,grad_norm: 0.8248574070899012, iteration: 294384
loss: 1.0200724601745605,grad_norm: 0.7512298858267763, iteration: 294385
loss: 0.9564045667648315,grad_norm: 0.9597996572776126, iteration: 294386
loss: 1.0116232633590698,grad_norm: 0.8440191296088432, iteration: 294387
loss: 0.9967557191848755,grad_norm: 0.9999996338571352, iteration: 294388
loss: 0.9970262050628662,grad_norm: 0.9999991095201847, iteration: 294389
loss: 1.0075594186782837,grad_norm: 0.855948954536841, iteration: 294390
loss: 0.9740036725997925,grad_norm: 0.9669240119331489, iteration: 294391
loss: 1.2268387079238892,grad_norm: 0.999999206791518, iteration: 294392
loss: 1.0448956489562988,grad_norm: 0.9999994869342478, iteration: 294393
loss: 1.0632530450820923,grad_norm: 0.9843178115804709, iteration: 294394
loss: 0.9852058291435242,grad_norm: 0.932175292901212, iteration: 294395
loss: 1.0067147016525269,grad_norm: 0.7714970352000527, iteration: 294396
loss: 1.0131797790527344,grad_norm: 0.8887483566314045, iteration: 294397
loss: 0.9992507696151733,grad_norm: 0.9999995986755876, iteration: 294398
loss: 1.0850085020065308,grad_norm: 0.9999997019491081, iteration: 294399
loss: 1.0890461206436157,grad_norm: 0.9999993463701156, iteration: 294400
loss: 0.9958324432373047,grad_norm: 0.8250098340500684, iteration: 294401
loss: 1.0175249576568604,grad_norm: 0.9999996229566323, iteration: 294402
loss: 1.0625884532928467,grad_norm: 0.9999994753327035, iteration: 294403
loss: 1.1114898920059204,grad_norm: 0.9999997563347949, iteration: 294404
loss: 1.0368982553482056,grad_norm: 0.7518476364039848, iteration: 294405
loss: 0.982128381729126,grad_norm: 0.8806964969125962, iteration: 294406
loss: 1.002245545387268,grad_norm: 0.8505587155139834, iteration: 294407
loss: 0.9996293187141418,grad_norm: 0.7206933902039394, iteration: 294408
loss: 1.0196338891983032,grad_norm: 0.9999997221792958, iteration: 294409
loss: 0.9873932003974915,grad_norm: 0.9760427993512876, iteration: 294410
loss: 1.0357519388198853,grad_norm: 0.9999992259749196, iteration: 294411
loss: 1.010185956954956,grad_norm: 0.8681640906714261, iteration: 294412
loss: 1.0160796642303467,grad_norm: 0.6690526589819922, iteration: 294413
loss: 1.0427302122116089,grad_norm: 0.7874488761258714, iteration: 294414
loss: 1.0086547136306763,grad_norm: 0.907453270583104, iteration: 294415
loss: 0.9787312150001526,grad_norm: 0.9050615296650958, iteration: 294416
loss: 0.9883166551589966,grad_norm: 0.9993569557166073, iteration: 294417
loss: 0.9916643500328064,grad_norm: 0.9777127164948717, iteration: 294418
loss: 0.9980219006538391,grad_norm: 0.8999178062751165, iteration: 294419
loss: 1.0361469984054565,grad_norm: 0.9999990432211808, iteration: 294420
loss: 1.032466173171997,grad_norm: 0.8430864268707953, iteration: 294421
loss: 0.9914369583129883,grad_norm: 0.9999994911276598, iteration: 294422
loss: 0.973466694355011,grad_norm: 0.9999992133504084, iteration: 294423
loss: 1.0207260847091675,grad_norm: 0.999999869553728, iteration: 294424
loss: 0.9987565875053406,grad_norm: 0.9552811749744852, iteration: 294425
loss: 1.064537525177002,grad_norm: 0.9999994035386772, iteration: 294426
loss: 0.9778099060058594,grad_norm: 0.9132378132803279, iteration: 294427
loss: 1.0102403163909912,grad_norm: 0.8992003295436586, iteration: 294428
loss: 1.0896658897399902,grad_norm: 0.9999993113238971, iteration: 294429
loss: 1.0141631364822388,grad_norm: 0.9461789746824129, iteration: 294430
loss: 1.019547700881958,grad_norm: 0.8172734247217711, iteration: 294431
loss: 1.0532492399215698,grad_norm: 0.9999989855312041, iteration: 294432
loss: 0.973876953125,grad_norm: 0.7506162450697766, iteration: 294433
loss: 1.0257238149642944,grad_norm: 0.9376747552505966, iteration: 294434
loss: 1.0084377527236938,grad_norm: 0.8309266629779217, iteration: 294435
loss: 1.0028234720230103,grad_norm: 0.7471508533585717, iteration: 294436
loss: 1.0160503387451172,grad_norm: 0.9999991434003365, iteration: 294437
loss: 0.9715287089347839,grad_norm: 0.9336388261402314, iteration: 294438
loss: 0.9989840984344482,grad_norm: 0.9935253323854598, iteration: 294439
loss: 1.0059930086135864,grad_norm: 0.8751266987515968, iteration: 294440
loss: 0.9895892143249512,grad_norm: 0.8027986457203361, iteration: 294441
loss: 1.104331612586975,grad_norm: 0.99999975585488, iteration: 294442
loss: 1.0942541360855103,grad_norm: 0.9999998501581722, iteration: 294443
loss: 1.0411494970321655,grad_norm: 0.9999995621509811, iteration: 294444
loss: 0.9744694828987122,grad_norm: 0.9740950961621718, iteration: 294445
loss: 0.9944823384284973,grad_norm: 0.934194017463372, iteration: 294446
loss: 1.060971736907959,grad_norm: 0.9999998549356169, iteration: 294447
loss: 1.0481846332550049,grad_norm: 1.0000000003811378, iteration: 294448
loss: 0.9851729273796082,grad_norm: 0.9999991404459134, iteration: 294449
loss: 1.0799487829208374,grad_norm: 0.999999864787707, iteration: 294450
loss: 0.9952104687690735,grad_norm: 0.9999995126510868, iteration: 294451
loss: 1.0249578952789307,grad_norm: 0.8809943344385486, iteration: 294452
loss: 0.9911538362503052,grad_norm: 0.9999993864185549, iteration: 294453
loss: 1.0480742454528809,grad_norm: 0.9999995090994738, iteration: 294454
loss: 1.008574366569519,grad_norm: 0.9556607965990941, iteration: 294455
loss: 1.0284944772720337,grad_norm: 0.999999352954665, iteration: 294456
loss: 1.0084197521209717,grad_norm: 0.7892479240733067, iteration: 294457
loss: 1.102885365486145,grad_norm: 0.9619503171242788, iteration: 294458
loss: 0.9945421814918518,grad_norm: 0.7643349310752505, iteration: 294459
loss: 0.9856504201889038,grad_norm: 0.8911881303942114, iteration: 294460
loss: 1.2158303260803223,grad_norm: 0.999999251915206, iteration: 294461
loss: 1.0353819131851196,grad_norm: 0.9999998946608043, iteration: 294462
loss: 1.0121443271636963,grad_norm: 0.9999998333715302, iteration: 294463
loss: 1.0506995916366577,grad_norm: 0.9999996820124611, iteration: 294464
loss: 0.9712910652160645,grad_norm: 0.9007690520467608, iteration: 294465
loss: 1.074061393737793,grad_norm: 0.999999724466053, iteration: 294466
loss: 1.0448106527328491,grad_norm: 0.9999991997659574, iteration: 294467
loss: 1.0579499006271362,grad_norm: 0.9999999910183204, iteration: 294468
loss: 1.0294325351715088,grad_norm: 0.9999991125937933, iteration: 294469
loss: 1.0303032398223877,grad_norm: 0.8838843508934486, iteration: 294470
loss: 1.012250304222107,grad_norm: 0.8972098553011265, iteration: 294471
loss: 1.0400593280792236,grad_norm: 0.9999997107858581, iteration: 294472
loss: 1.0599044561386108,grad_norm: 0.9999990969212368, iteration: 294473
loss: 1.0180214643478394,grad_norm: 0.9999995558725955, iteration: 294474
loss: 1.0502718687057495,grad_norm: 0.990078062694846, iteration: 294475
loss: 0.9831703305244446,grad_norm: 0.7859266767940537, iteration: 294476
loss: 1.0696605443954468,grad_norm: 0.9619512901074679, iteration: 294477
loss: 1.0466850996017456,grad_norm: 0.9999998578843945, iteration: 294478
loss: 0.9908546209335327,grad_norm: 0.8278404322324534, iteration: 294479
loss: 1.0615311861038208,grad_norm: 0.7974520238846287, iteration: 294480
loss: 0.9948993921279907,grad_norm: 0.9443435742323409, iteration: 294481
loss: 0.9928149580955505,grad_norm: 0.8123454580674074, iteration: 294482
loss: 0.9713224172592163,grad_norm: 0.9999999237355467, iteration: 294483
loss: 1.0175981521606445,grad_norm: 0.9999992429386456, iteration: 294484
loss: 1.1077766418457031,grad_norm: 0.999999667414873, iteration: 294485
loss: 0.9928737878799438,grad_norm: 0.9151971347220553, iteration: 294486
loss: 0.9973907470703125,grad_norm: 0.9696362814243125, iteration: 294487
loss: 1.0405930280685425,grad_norm: 0.9999991213896988, iteration: 294488
loss: 1.0200997591018677,grad_norm: 0.8262455200957342, iteration: 294489
loss: 1.059633493423462,grad_norm: 0.999999196778849, iteration: 294490
loss: 0.9969291090965271,grad_norm: 0.9999998950071144, iteration: 294491
loss: 0.9802992343902588,grad_norm: 0.9999999982572976, iteration: 294492
loss: 0.9903096556663513,grad_norm: 0.7554586745117887, iteration: 294493
loss: 1.0577462911605835,grad_norm: 0.999999754202372, iteration: 294494
loss: 1.0069823265075684,grad_norm: 0.8592072573993075, iteration: 294495
loss: 1.0030620098114014,grad_norm: 0.9999991084610087, iteration: 294496
loss: 1.0609194040298462,grad_norm: 0.9666566192800728, iteration: 294497
loss: 0.9945381879806519,grad_norm: 0.9287205391387596, iteration: 294498
loss: 1.076987624168396,grad_norm: 0.9999990134433634, iteration: 294499
loss: 1.0264372825622559,grad_norm: 0.9999996685130571, iteration: 294500
loss: 1.1492228507995605,grad_norm: 0.9999995742094062, iteration: 294501
loss: 1.0182195901870728,grad_norm: 0.8913262646024004, iteration: 294502
loss: 0.9741900563240051,grad_norm: 0.9999994448017395, iteration: 294503
loss: 1.1113454103469849,grad_norm: 0.9999993291862774, iteration: 294504
loss: 1.043344497680664,grad_norm: 0.8675157042947634, iteration: 294505
loss: 1.019822120666504,grad_norm: 0.9999991508952017, iteration: 294506
loss: 1.0905866622924805,grad_norm: 0.8820350069645277, iteration: 294507
loss: 1.0543557405471802,grad_norm: 0.9999994169406079, iteration: 294508
loss: 1.0080974102020264,grad_norm: 0.9999994061448828, iteration: 294509
loss: 1.1275287866592407,grad_norm: 0.999999881842662, iteration: 294510
loss: 1.0279232263565063,grad_norm: 0.9999995412560683, iteration: 294511
loss: 1.0924320220947266,grad_norm: 0.9999993234547963, iteration: 294512
loss: 1.0322520732879639,grad_norm: 0.9971156759541685, iteration: 294513
loss: 1.0714625120162964,grad_norm: 0.9999996958352183, iteration: 294514
loss: 0.9875624775886536,grad_norm: 0.9999991732065658, iteration: 294515
loss: 1.0032471418380737,grad_norm: 0.9999992227753739, iteration: 294516
loss: 1.0669838190078735,grad_norm: 0.9999994794383306, iteration: 294517
loss: 1.1252323389053345,grad_norm: 1.000000007178662, iteration: 294518
loss: 1.0426274538040161,grad_norm: 0.9999990962194643, iteration: 294519
loss: 1.025380253791809,grad_norm: 0.999999464836203, iteration: 294520
loss: 1.1527456045150757,grad_norm: 1.000000018573659, iteration: 294521
loss: 1.0546760559082031,grad_norm: 0.9999995271171792, iteration: 294522
loss: 1.1319384574890137,grad_norm: 0.9088461121226065, iteration: 294523
loss: 0.9650507569313049,grad_norm: 0.8379073636372922, iteration: 294524
loss: 1.3438165187835693,grad_norm: 0.9999995745245043, iteration: 294525
loss: 1.054420828819275,grad_norm: 0.9999994769452144, iteration: 294526
loss: 1.1016982793807983,grad_norm: 0.9999992159994711, iteration: 294527
loss: 1.288793921470642,grad_norm: 0.9999994965529383, iteration: 294528
loss: 1.3641546964645386,grad_norm: 0.9999997959347551, iteration: 294529
loss: 1.2387492656707764,grad_norm: 0.9999998634305585, iteration: 294530
loss: 1.1374083757400513,grad_norm: 0.9999999402888384, iteration: 294531
loss: 1.1889152526855469,grad_norm: 0.9999999446093014, iteration: 294532
loss: 1.089986801147461,grad_norm: 0.9527275392906277, iteration: 294533
loss: 1.1312534809112549,grad_norm: 0.9999992407106583, iteration: 294534
loss: 1.3972105979919434,grad_norm: 0.9999994862388437, iteration: 294535
loss: 1.1066514253616333,grad_norm: 0.9999997199484352, iteration: 294536
loss: 1.1684671640396118,grad_norm: 0.9999996170773736, iteration: 294537
loss: 1.164090633392334,grad_norm: 0.999999273141953, iteration: 294538
loss: 1.1640620231628418,grad_norm: 0.9999996184729146, iteration: 294539
loss: 1.0303267240524292,grad_norm: 0.9999995482491411, iteration: 294540
loss: 1.0642398595809937,grad_norm: 0.9999992150367428, iteration: 294541
loss: 1.0316380262374878,grad_norm: 0.9999993714183836, iteration: 294542
loss: 1.0286030769348145,grad_norm: 0.9999996165992865, iteration: 294543
loss: 1.0479532480239868,grad_norm: 0.9999994783041335, iteration: 294544
loss: 1.0498096942901611,grad_norm: 0.9999994185748781, iteration: 294545
loss: 1.033295750617981,grad_norm: 0.9999998030547255, iteration: 294546
loss: 1.23448646068573,grad_norm: 0.9999998176079861, iteration: 294547
loss: 1.0532686710357666,grad_norm: 0.999999435634512, iteration: 294548
loss: 0.9625067710876465,grad_norm: 0.9668948648680827, iteration: 294549
loss: 1.2150651216506958,grad_norm: 0.999999975194703, iteration: 294550
loss: 1.0715351104736328,grad_norm: 0.9999992436890554, iteration: 294551
loss: 1.1219030618667603,grad_norm: 0.9999998460923497, iteration: 294552
loss: 1.028061866760254,grad_norm: 0.9114122519641399, iteration: 294553
loss: 1.0089573860168457,grad_norm: 0.8715130432897931, iteration: 294554
loss: 1.0088160037994385,grad_norm: 0.9999995081119917, iteration: 294555
loss: 1.0139907598495483,grad_norm: 0.9999993812307525, iteration: 294556
loss: 1.0731217861175537,grad_norm: 0.8808418756703387, iteration: 294557
loss: 1.02375328540802,grad_norm: 0.9999995499932383, iteration: 294558
loss: 1.093911051750183,grad_norm: 0.9999991486093514, iteration: 294559
loss: 1.0150587558746338,grad_norm: 0.9999991006680338, iteration: 294560
loss: 1.06169855594635,grad_norm: 0.9999996447051673, iteration: 294561
loss: 1.0866936445236206,grad_norm: 0.9999996459804062, iteration: 294562
loss: 1.3029956817626953,grad_norm: 0.9999999639661058, iteration: 294563
loss: 1.1632816791534424,grad_norm: 0.9999997970497951, iteration: 294564
loss: 1.009500503540039,grad_norm: 0.9277915857055457, iteration: 294565
loss: 1.1002546548843384,grad_norm: 0.9999999948010208, iteration: 294566
loss: 1.0034949779510498,grad_norm: 0.9999992678579039, iteration: 294567
loss: 0.9821913242340088,grad_norm: 0.9999998051848178, iteration: 294568
loss: 1.0183048248291016,grad_norm: 0.9315926736429182, iteration: 294569
loss: 1.1540062427520752,grad_norm: 0.9999998412711207, iteration: 294570
loss: 1.0695858001708984,grad_norm: 0.9999999172824554, iteration: 294571
loss: 1.380990743637085,grad_norm: 0.9999999270418669, iteration: 294572
loss: 1.1078964471817017,grad_norm: 0.9999991831629097, iteration: 294573
loss: 0.9771960377693176,grad_norm: 0.9999992470313519, iteration: 294574
loss: 1.0389952659606934,grad_norm: 0.866897763759001, iteration: 294575
loss: 1.045969009399414,grad_norm: 0.9999991717249028, iteration: 294576
loss: 1.0773698091506958,grad_norm: 0.7889630721416615, iteration: 294577
loss: 1.1313353776931763,grad_norm: 0.9999996803238914, iteration: 294578
loss: 1.1232224702835083,grad_norm: 0.9999992218156996, iteration: 294579
loss: 1.184838056564331,grad_norm: 0.9999997023088353, iteration: 294580
loss: 1.0861473083496094,grad_norm: 0.9999993729374018, iteration: 294581
loss: 1.129833459854126,grad_norm: 0.9999999559391749, iteration: 294582
loss: 1.0282056331634521,grad_norm: 0.9999991572695441, iteration: 294583
loss: 1.157845377922058,grad_norm: 0.999999482663935, iteration: 294584
loss: 1.1879589557647705,grad_norm: 0.9999992763253562, iteration: 294585
loss: 1.1330852508544922,grad_norm: 0.9999999776551745, iteration: 294586
loss: 1.1682653427124023,grad_norm: 0.9999993043410718, iteration: 294587
loss: 1.6780441999435425,grad_norm: 0.9999998389758744, iteration: 294588
loss: 1.0322550535202026,grad_norm: 0.9999996373013006, iteration: 294589
loss: 1.1239410638809204,grad_norm: 0.9999998794100823, iteration: 294590
loss: 1.0600271224975586,grad_norm: 0.9778563987335349, iteration: 294591
loss: 1.142765760421753,grad_norm: 0.999999383199674, iteration: 294592
loss: 1.0177580118179321,grad_norm: 0.9999995719013415, iteration: 294593
loss: 0.9788907766342163,grad_norm: 0.9999990564799501, iteration: 294594
loss: 1.0080304145812988,grad_norm: 0.8809395266309948, iteration: 294595
loss: 1.0318692922592163,grad_norm: 0.9448719428052627, iteration: 294596
loss: 1.0261763334274292,grad_norm: 0.9999989854090694, iteration: 294597
loss: 1.0209455490112305,grad_norm: 0.9999990492954551, iteration: 294598
loss: 1.0810261964797974,grad_norm: 0.9999995856570048, iteration: 294599
loss: 1.1616955995559692,grad_norm: 0.9999992611513259, iteration: 294600
loss: 1.0493359565734863,grad_norm: 0.9999998914814384, iteration: 294601
loss: 1.0338151454925537,grad_norm: 0.9999994686590392, iteration: 294602
loss: 1.0992904901504517,grad_norm: 0.9999995835375326, iteration: 294603
loss: 1.0818610191345215,grad_norm: 0.9999994081441275, iteration: 294604
loss: 1.0215638875961304,grad_norm: 0.885511710266721, iteration: 294605
loss: 1.087801456451416,grad_norm: 0.9999997472038348, iteration: 294606
loss: 1.0317128896713257,grad_norm: 0.9999998359291574, iteration: 294607
loss: 1.0365970134735107,grad_norm: 0.9999992088527553, iteration: 294608
loss: 1.1044399738311768,grad_norm: 0.9999997648627391, iteration: 294609
loss: 1.0007426738739014,grad_norm: 0.9231019084743378, iteration: 294610
loss: 1.1167570352554321,grad_norm: 0.9999998629346385, iteration: 294611
loss: 1.0682759284973145,grad_norm: 0.8620859520233756, iteration: 294612
loss: 1.0735909938812256,grad_norm: 0.9999996167112617, iteration: 294613
loss: 1.0056504011154175,grad_norm: 0.9999999836217369, iteration: 294614
loss: 1.223633885383606,grad_norm: 0.9999998942025982, iteration: 294615
loss: 1.0423692464828491,grad_norm: 0.9999990693790086, iteration: 294616
loss: 1.2138335704803467,grad_norm: 0.999999868813746, iteration: 294617
loss: 1.1413389444351196,grad_norm: 0.9999996472604961, iteration: 294618
loss: 1.007093906402588,grad_norm: 0.868499872333396, iteration: 294619
loss: 1.0131864547729492,grad_norm: 0.9999993856425806, iteration: 294620
loss: 1.0865486860275269,grad_norm: 0.8491861632743306, iteration: 294621
loss: 1.1439366340637207,grad_norm: 0.9481285799726933, iteration: 294622
loss: 1.0065240859985352,grad_norm: 0.9999991847326501, iteration: 294623
loss: 1.0369306802749634,grad_norm: 0.9999993558010477, iteration: 294624
loss: 1.017350673675537,grad_norm: 0.999999219680612, iteration: 294625
loss: 1.2963954210281372,grad_norm: 0.9999993029322939, iteration: 294626
loss: 1.0492068529129028,grad_norm: 1.0000000450145052, iteration: 294627
loss: 1.103165864944458,grad_norm: 0.9999993929691905, iteration: 294628
loss: 1.073871374130249,grad_norm: 1.0000000478480888, iteration: 294629
loss: 1.0012425184249878,grad_norm: 0.7978944094434292, iteration: 294630
loss: 1.0384490489959717,grad_norm: 0.9999992302530446, iteration: 294631
loss: 1.0532779693603516,grad_norm: 0.9999995261770567, iteration: 294632
loss: 1.0570621490478516,grad_norm: 1.0000000019383548, iteration: 294633
loss: 1.0474798679351807,grad_norm: 0.9999995518612834, iteration: 294634
loss: 1.1510748863220215,grad_norm: 0.9999996290082762, iteration: 294635
loss: 1.114058017730713,grad_norm: 0.9999993988629171, iteration: 294636
loss: 1.0340394973754883,grad_norm: 0.9999994546827745, iteration: 294637
loss: 1.1436189413070679,grad_norm: 0.9999992535581054, iteration: 294638
loss: 1.059865117073059,grad_norm: 0.9999999188415898, iteration: 294639
loss: 1.101291298866272,grad_norm: 0.999999226876056, iteration: 294640
loss: 0.9928263425827026,grad_norm: 0.9999996994817751, iteration: 294641
loss: 1.3710663318634033,grad_norm: 0.9999997872313257, iteration: 294642
loss: 1.1186985969543457,grad_norm: 0.9999999050972568, iteration: 294643
loss: 1.0063285827636719,grad_norm: 0.9999992525637271, iteration: 294644
loss: 1.024011254310608,grad_norm: 0.9999995599667012, iteration: 294645
loss: 1.1684683561325073,grad_norm: 0.9999997533488183, iteration: 294646
loss: 1.1254642009735107,grad_norm: 0.9999996800259644, iteration: 294647
loss: 1.1026698350906372,grad_norm: 0.9999998286998716, iteration: 294648
loss: 1.0391490459442139,grad_norm: 0.9999991382137234, iteration: 294649
loss: 1.0347704887390137,grad_norm: 0.9999992727529682, iteration: 294650
loss: 1.192469596862793,grad_norm: 0.9999997150584655, iteration: 294651
loss: 1.0503166913986206,grad_norm: 0.9999992961941744, iteration: 294652
loss: 1.047629714012146,grad_norm: 0.8780383286109167, iteration: 294653
loss: 1.1458085775375366,grad_norm: 0.9999999472954233, iteration: 294654
loss: 0.9841687679290771,grad_norm: 0.9999991113439125, iteration: 294655
loss: 1.0326813459396362,grad_norm: 0.9718353218596036, iteration: 294656
loss: 1.1870379447937012,grad_norm: 0.9999997238193286, iteration: 294657
loss: 1.1419641971588135,grad_norm: 0.9999996109327838, iteration: 294658
loss: 1.1120623350143433,grad_norm: 0.9999998756549969, iteration: 294659
loss: 1.036085844039917,grad_norm: 0.9999998231474623, iteration: 294660
loss: 1.3033769130706787,grad_norm: 0.9999997666534167, iteration: 294661
loss: 1.1022266149520874,grad_norm: 0.999999624395071, iteration: 294662
loss: 1.0828492641448975,grad_norm: 0.9999993825746084, iteration: 294663
loss: 1.089501142501831,grad_norm: 0.9999999176563125, iteration: 294664
loss: 1.010613203048706,grad_norm: 0.9215845821980954, iteration: 294665
loss: 1.0555591583251953,grad_norm: 0.9999994446275917, iteration: 294666
loss: 1.070572018623352,grad_norm: 0.9999996708426969, iteration: 294667
loss: 1.0164549350738525,grad_norm: 0.999999472743593, iteration: 294668
loss: 1.0642915964126587,grad_norm: 0.999999987429099, iteration: 294669
loss: 1.1032708883285522,grad_norm: 0.9999998396492386, iteration: 294670
loss: 1.016140103340149,grad_norm: 0.9999994463508328, iteration: 294671
loss: 1.0377801656723022,grad_norm: 0.9999992967870603, iteration: 294672
loss: 1.070133924484253,grad_norm: 0.9999991295033113, iteration: 294673
loss: 1.2050726413726807,grad_norm: 0.9999999210702332, iteration: 294674
loss: 0.9947731494903564,grad_norm: 0.9222941645984355, iteration: 294675
loss: 1.049939513206482,grad_norm: 0.9999991004617543, iteration: 294676
loss: 0.9832936525344849,grad_norm: 0.9999998583830482, iteration: 294677
loss: 1.1502063274383545,grad_norm: 0.9999995328281335, iteration: 294678
loss: 1.1573156118392944,grad_norm: 0.999999844496797, iteration: 294679
loss: 1.0305218696594238,grad_norm: 0.9999995128044746, iteration: 294680
loss: 0.9757440090179443,grad_norm: 0.9999990344887083, iteration: 294681
loss: 1.0883253812789917,grad_norm: 0.9999992030921208, iteration: 294682
loss: 1.0990934371948242,grad_norm: 1.0000000254564205, iteration: 294683
loss: 1.1581896543502808,grad_norm: 0.9999994397875017, iteration: 294684
loss: 1.041512131690979,grad_norm: 0.9999998205824397, iteration: 294685
loss: 1.13841712474823,grad_norm: 0.976969574084084, iteration: 294686
loss: 1.0806387662887573,grad_norm: 0.9999993413054091, iteration: 294687
loss: 1.056746244430542,grad_norm: 0.9999993366714325, iteration: 294688
loss: 1.0559455156326294,grad_norm: 0.9999991185063419, iteration: 294689
loss: 1.0714285373687744,grad_norm: 0.9999992487382606, iteration: 294690
loss: 1.4166841506958008,grad_norm: 0.999999358611749, iteration: 294691
loss: 1.0447901487350464,grad_norm: 0.9999989768216132, iteration: 294692
loss: 0.9688231348991394,grad_norm: 0.9414577824106138, iteration: 294693
loss: 1.1109377145767212,grad_norm: 0.9999992690194774, iteration: 294694
loss: 1.037154197692871,grad_norm: 0.9999990892612574, iteration: 294695
loss: 0.971118152141571,grad_norm: 0.9999998957833439, iteration: 294696
loss: 1.0586944818496704,grad_norm: 0.999999600672103, iteration: 294697
loss: 1.138530969619751,grad_norm: 0.9999996194234442, iteration: 294698
loss: 1.0462779998779297,grad_norm: 0.9999992279196543, iteration: 294699
loss: 1.1688127517700195,grad_norm: 0.9999998134976126, iteration: 294700
loss: 0.9702481031417847,grad_norm: 0.9999992394252668, iteration: 294701
loss: 1.082656741142273,grad_norm: 0.9999993818357541, iteration: 294702
loss: 1.0204955339431763,grad_norm: 0.999999244077015, iteration: 294703
loss: 0.9445405006408691,grad_norm: 0.8186187428501988, iteration: 294704
loss: 1.04743492603302,grad_norm: 0.9999992056815553, iteration: 294705
loss: 0.9483532905578613,grad_norm: 0.9999991150511339, iteration: 294706
loss: 1.0385477542877197,grad_norm: 0.8621475732440997, iteration: 294707
loss: 1.1421589851379395,grad_norm: 0.9999991582015264, iteration: 294708
loss: 1.037137746810913,grad_norm: 0.9999991898148263, iteration: 294709
loss: 1.0290018320083618,grad_norm: 0.9414278217284343, iteration: 294710
loss: 1.020297646522522,grad_norm: 0.9999997647266238, iteration: 294711
loss: 0.9596540927886963,grad_norm: 0.8341583490348446, iteration: 294712
loss: 1.044224739074707,grad_norm: 0.9999997039449287, iteration: 294713
loss: 1.0286260843276978,grad_norm: 0.9999995116557439, iteration: 294714
loss: 0.9962195754051208,grad_norm: 0.8889462152202314, iteration: 294715
loss: 1.0920209884643555,grad_norm: 0.9999994687661561, iteration: 294716
loss: 0.9872031807899475,grad_norm: 0.8760678882666453, iteration: 294717
loss: 1.0305644273757935,grad_norm: 0.9978735505947307, iteration: 294718
loss: 1.1400792598724365,grad_norm: 1.0000000321562308, iteration: 294719
loss: 0.9926950931549072,grad_norm: 0.9999998161639582, iteration: 294720
loss: 1.0342522859573364,grad_norm: 0.9999990385566847, iteration: 294721
loss: 0.9813317656517029,grad_norm: 0.9999998822016197, iteration: 294722
loss: 1.1401724815368652,grad_norm: 0.9999993721925537, iteration: 294723
loss: 1.056748628616333,grad_norm: 0.9999990759887997, iteration: 294724
loss: 1.0486236810684204,grad_norm: 0.9999999496368732, iteration: 294725
loss: 1.0992071628570557,grad_norm: 0.9999998903591163, iteration: 294726
loss: 1.0256425142288208,grad_norm: 0.9999993860786648, iteration: 294727
loss: 1.039845585823059,grad_norm: 0.9999993786718129, iteration: 294728
loss: 1.0584608316421509,grad_norm: 0.9999995973229912, iteration: 294729
loss: 0.9539318680763245,grad_norm: 0.907308332508416, iteration: 294730
loss: 0.9798317551612854,grad_norm: 0.8899906485847402, iteration: 294731
loss: 1.0406968593597412,grad_norm: 0.9999993601971988, iteration: 294732
loss: 1.0272337198257446,grad_norm: 0.8868772944127141, iteration: 294733
loss: 0.9956845641136169,grad_norm: 0.8230578349206059, iteration: 294734
loss: 1.0004005432128906,grad_norm: 0.8550753995962536, iteration: 294735
loss: 0.9736003875732422,grad_norm: 0.6893869463758593, iteration: 294736
loss: 1.0923378467559814,grad_norm: 0.9999992840867422, iteration: 294737
loss: 1.0623782873153687,grad_norm: 0.9999999812506245, iteration: 294738
loss: 1.0680450201034546,grad_norm: 0.9999995871101827, iteration: 294739
loss: 1.1274518966674805,grad_norm: 0.9999992556646667, iteration: 294740
loss: 1.0894616842269897,grad_norm: 0.9999999068944906, iteration: 294741
loss: 0.9949144721031189,grad_norm: 0.9625883335257327, iteration: 294742
loss: 1.0351338386535645,grad_norm: 0.9999993229526077, iteration: 294743
loss: 1.1216191053390503,grad_norm: 0.9999994420580128, iteration: 294744
loss: 1.095042109489441,grad_norm: 0.999999720116577, iteration: 294745
loss: 1.0354863405227661,grad_norm: 0.9999998206695385, iteration: 294746
loss: 1.0485320091247559,grad_norm: 0.9046086135419, iteration: 294747
loss: 1.0254844427108765,grad_norm: 0.9999998250955554, iteration: 294748
loss: 0.9735234975814819,grad_norm: 0.99999955233025, iteration: 294749
loss: 1.117451548576355,grad_norm: 1.0000000174239478, iteration: 294750
loss: 1.085411548614502,grad_norm: 0.9999998014184004, iteration: 294751
loss: 1.0478731393814087,grad_norm: 0.9344417463597748, iteration: 294752
loss: 1.022843837738037,grad_norm: 0.999999259227304, iteration: 294753
loss: 1.0773069858551025,grad_norm: 0.9999992341547046, iteration: 294754
loss: 1.085650086402893,grad_norm: 0.9999997420619171, iteration: 294755
loss: 1.2178071737289429,grad_norm: 0.999999329515249, iteration: 294756
loss: 0.9933830499649048,grad_norm: 0.999999041247061, iteration: 294757
loss: 1.064214825630188,grad_norm: 0.9999999373770737, iteration: 294758
loss: 0.9838969111442566,grad_norm: 0.9474912076036672, iteration: 294759
loss: 1.0198493003845215,grad_norm: 0.8590499499368727, iteration: 294760
loss: 1.0300703048706055,grad_norm: 0.9999990635659982, iteration: 294761
loss: 0.9965159296989441,grad_norm: 0.7611936669223685, iteration: 294762
loss: 1.0630018711090088,grad_norm: 0.9270186953950609, iteration: 294763
loss: 0.9956942796707153,grad_norm: 0.8361557886590723, iteration: 294764
loss: 1.017404556274414,grad_norm: 0.9999997319826696, iteration: 294765
loss: 1.0388476848602295,grad_norm: 0.9999999205488675, iteration: 294766
loss: 1.08998703956604,grad_norm: 0.9392077197594543, iteration: 294767
loss: 1.1075928211212158,grad_norm: 0.9999999788692147, iteration: 294768
loss: 1.1079944372177124,grad_norm: 0.9999990604476344, iteration: 294769
loss: 1.1813266277313232,grad_norm: 0.999999728494066, iteration: 294770
loss: 1.163828730583191,grad_norm: 0.9999995728149533, iteration: 294771
loss: 1.0419114828109741,grad_norm: 0.9999991340659786, iteration: 294772
loss: 1.0749423503875732,grad_norm: 0.9622707892804516, iteration: 294773
loss: 1.0453661680221558,grad_norm: 0.9999997507407047, iteration: 294774
loss: 1.1098606586456299,grad_norm: 0.8996369290355699, iteration: 294775
loss: 1.1236919164657593,grad_norm: 0.999999758332033, iteration: 294776
loss: 1.0312913656234741,grad_norm: 0.9999990409402686, iteration: 294777
loss: 1.0930787324905396,grad_norm: 0.9969404470873052, iteration: 294778
loss: 1.057112455368042,grad_norm: 0.999999789026092, iteration: 294779
loss: 1.0418535470962524,grad_norm: 1.0000000077191675, iteration: 294780
loss: 0.9959783554077148,grad_norm: 0.9999991212679297, iteration: 294781
loss: 1.2248140573501587,grad_norm: 0.9999995044088175, iteration: 294782
loss: 1.0270785093307495,grad_norm: 0.9999990560416552, iteration: 294783
loss: 1.0291746854782104,grad_norm: 0.9803990678061645, iteration: 294784
loss: 1.0140517950057983,grad_norm: 0.9999994821087068, iteration: 294785
loss: 0.9954478144645691,grad_norm: 0.9999993511162085, iteration: 294786
loss: 1.0135033130645752,grad_norm: 0.9999992731955021, iteration: 294787
loss: 1.0635496377944946,grad_norm: 0.9273823683299471, iteration: 294788
loss: 0.9881327152252197,grad_norm: 0.7887056226352778, iteration: 294789
loss: 1.239402413368225,grad_norm: 0.9999995865223887, iteration: 294790
loss: 0.9939180016517639,grad_norm: 0.9999989299166587, iteration: 294791
loss: 1.1213951110839844,grad_norm: 0.9999994134782716, iteration: 294792
loss: 1.0150349140167236,grad_norm: 0.9999997984177289, iteration: 294793
loss: 1.0524506568908691,grad_norm: 0.9999997434746498, iteration: 294794
loss: 1.040207028388977,grad_norm: 1.000000029802578, iteration: 294795
loss: 1.0905494689941406,grad_norm: 0.9658767307123723, iteration: 294796
loss: 1.010398030281067,grad_norm: 0.9880030689495771, iteration: 294797
loss: 1.3163472414016724,grad_norm: 0.9999995350898094, iteration: 294798
loss: 1.0202288627624512,grad_norm: 0.9270989631597586, iteration: 294799
loss: 1.1482841968536377,grad_norm: 0.9999999192575592, iteration: 294800
loss: 1.041894555091858,grad_norm: 0.9999992350125897, iteration: 294801
loss: 0.9910136461257935,grad_norm: 0.9999994842951708, iteration: 294802
loss: 1.0654469728469849,grad_norm: 0.9999998967449316, iteration: 294803
loss: 0.958052396774292,grad_norm: 0.7847426656483961, iteration: 294804
loss: 1.1522040367126465,grad_norm: 0.9999999852189084, iteration: 294805
loss: 1.0586332082748413,grad_norm: 0.9916863337131934, iteration: 294806
loss: 1.2733256816864014,grad_norm: 0.9999996841809214, iteration: 294807
loss: 1.1442735195159912,grad_norm: 0.9999999376933058, iteration: 294808
loss: 1.0601564645767212,grad_norm: 0.9999989976521361, iteration: 294809
loss: 1.1532444953918457,grad_norm: 0.9999994729057741, iteration: 294810
loss: 1.1560312509536743,grad_norm: 0.9999991844356866, iteration: 294811
loss: 1.0299307107925415,grad_norm: 1.0000000615287612, iteration: 294812
loss: 1.3155155181884766,grad_norm: 0.999999873207653, iteration: 294813
loss: 1.001746416091919,grad_norm: 0.9999995804169677, iteration: 294814
loss: 1.0179758071899414,grad_norm: 0.9999996120456165, iteration: 294815
loss: 1.051368236541748,grad_norm: 0.9999999215710039, iteration: 294816
loss: 1.055721640586853,grad_norm: 0.9999992475044113, iteration: 294817
loss: 1.0543993711471558,grad_norm: 0.9999999255566575, iteration: 294818
loss: 1.1257528066635132,grad_norm: 0.9999993670010463, iteration: 294819
loss: 1.0794869661331177,grad_norm: 0.9999991584876443, iteration: 294820
loss: 1.0934300422668457,grad_norm: 0.9999997653243734, iteration: 294821
loss: 1.2304447889328003,grad_norm: 0.9999998630344357, iteration: 294822
loss: 1.0703455209732056,grad_norm: 0.9138466039097591, iteration: 294823
loss: 1.1146135330200195,grad_norm: 0.999999447670037, iteration: 294824
loss: 0.9719497561454773,grad_norm: 0.9999991775082734, iteration: 294825
loss: 1.113269329071045,grad_norm: 0.9999992333607904, iteration: 294826
loss: 1.214964509010315,grad_norm: 0.999999209669558, iteration: 294827
loss: 1.1573625802993774,grad_norm: 0.9999999449936712, iteration: 294828
loss: 1.078269362449646,grad_norm: 0.9999998688343692, iteration: 294829
loss: 1.4062532186508179,grad_norm: 0.9999998828211988, iteration: 294830
loss: 1.1185659170150757,grad_norm: 0.9999998434451317, iteration: 294831
loss: 0.9999343156814575,grad_norm: 0.8770483792316279, iteration: 294832
loss: 1.0204604864120483,grad_norm: 0.7833577310689308, iteration: 294833
loss: 1.0752356052398682,grad_norm: 0.9999992467621548, iteration: 294834
loss: 1.0222357511520386,grad_norm: 0.9999993574333563, iteration: 294835
loss: 1.1379057168960571,grad_norm: 0.9999995449784401, iteration: 294836
loss: 1.2873233556747437,grad_norm: 0.9999999147828721, iteration: 294837
loss: 1.0464245080947876,grad_norm: 0.9999998191562778, iteration: 294838
loss: 0.9965303540229797,grad_norm: 0.7539134488250527, iteration: 294839
loss: 1.1585513353347778,grad_norm: 0.9999997076710833, iteration: 294840
loss: 0.9964919686317444,grad_norm: 0.8046646032886823, iteration: 294841
loss: 0.9913239479064941,grad_norm: 0.9999990680499652, iteration: 294842
loss: 1.1185439825057983,grad_norm: 0.9999999166426616, iteration: 294843
loss: 1.0529959201812744,grad_norm: 0.999999135067992, iteration: 294844
loss: 1.0329818725585938,grad_norm: 0.9999999837663751, iteration: 294845
loss: 1.031868577003479,grad_norm: 0.7773534332461827, iteration: 294846
loss: 0.9967738389968872,grad_norm: 0.9999993795844618, iteration: 294847
loss: 1.0164827108383179,grad_norm: 0.9999990049512697, iteration: 294848
loss: 1.0608166456222534,grad_norm: 0.9999996902220071, iteration: 294849
loss: 1.0870838165283203,grad_norm: 0.9999991726001233, iteration: 294850
loss: 1.1564877033233643,grad_norm: 0.9999995495735179, iteration: 294851
loss: 0.9898656606674194,grad_norm: 0.836621568396248, iteration: 294852
loss: 1.0782122611999512,grad_norm: 0.9999994103428321, iteration: 294853
loss: 0.9835805296897888,grad_norm: 0.9999996002533214, iteration: 294854
loss: 1.0511353015899658,grad_norm: 0.9999991536772176, iteration: 294855
loss: 1.0740416049957275,grad_norm: 0.9871262383772729, iteration: 294856
loss: 1.020685076713562,grad_norm: 0.9999999707249082, iteration: 294857
loss: 0.9997395873069763,grad_norm: 0.9999996188940908, iteration: 294858
loss: 1.0154571533203125,grad_norm: 0.899780023539697, iteration: 294859
loss: 0.9939311742782593,grad_norm: 0.9999996766452668, iteration: 294860
loss: 1.0948184728622437,grad_norm: 0.999999521624797, iteration: 294861
loss: 0.981139600276947,grad_norm: 0.9999989895795289, iteration: 294862
loss: 1.0452797412872314,grad_norm: 0.9999996683499819, iteration: 294863
loss: 1.0505235195159912,grad_norm: 0.9999990072244836, iteration: 294864
loss: 1.0493645668029785,grad_norm: 0.8217529620199352, iteration: 294865
loss: 1.00004243850708,grad_norm: 0.9705418740851626, iteration: 294866
loss: 1.1579127311706543,grad_norm: 0.9999994166548323, iteration: 294867
loss: 1.0956716537475586,grad_norm: 0.9999996444252773, iteration: 294868
loss: 1.0819016695022583,grad_norm: 0.9999990295849779, iteration: 294869
loss: 1.0050328969955444,grad_norm: 0.9404404561934607, iteration: 294870
loss: 1.0471376180648804,grad_norm: 0.999999702748198, iteration: 294871
loss: 1.0401537418365479,grad_norm: 0.9999998818464321, iteration: 294872
loss: 1.0823893547058105,grad_norm: 0.9999992764858745, iteration: 294873
loss: 1.0800645351409912,grad_norm: 0.9999993423425243, iteration: 294874
loss: 1.0331518650054932,grad_norm: 0.9475801967719668, iteration: 294875
loss: 1.0346790552139282,grad_norm: 0.9999993177897512, iteration: 294876
loss: 0.9863038063049316,grad_norm: 0.9999994876660742, iteration: 294877
loss: 0.9885794520378113,grad_norm: 0.7767772079693317, iteration: 294878
loss: 0.9984033703804016,grad_norm: 0.9907017379382348, iteration: 294879
loss: 1.023896336555481,grad_norm: 0.9999991412904629, iteration: 294880
loss: 1.0675514936447144,grad_norm: 0.9999997965604289, iteration: 294881
loss: 1.1629799604415894,grad_norm: 0.9999990182980073, iteration: 294882
loss: 1.0186909437179565,grad_norm: 0.9334145712735873, iteration: 294883
loss: 1.0211467742919922,grad_norm: 0.9999992419601859, iteration: 294884
loss: 1.0921471118927002,grad_norm: 0.9999996037442226, iteration: 294885
loss: 1.154702067375183,grad_norm: 1.0000000095373562, iteration: 294886
loss: 1.3137303590774536,grad_norm: 0.9999995691695016, iteration: 294887
loss: 0.9707090258598328,grad_norm: 0.9383119319422212, iteration: 294888
loss: 1.0240564346313477,grad_norm: 0.9999994242915913, iteration: 294889
loss: 1.0303595066070557,grad_norm: 0.9999999714006315, iteration: 294890
loss: 1.1071035861968994,grad_norm: 0.9999991267777386, iteration: 294891
loss: 1.210290789604187,grad_norm: 0.9999998750992148, iteration: 294892
loss: 1.0699037313461304,grad_norm: 0.999999127932915, iteration: 294893
loss: 1.037712574005127,grad_norm: 0.9999993435651928, iteration: 294894
loss: 1.0470551252365112,grad_norm: 0.9999996470792433, iteration: 294895
loss: 1.0400856733322144,grad_norm: 0.9999997874144817, iteration: 294896
loss: 1.0812170505523682,grad_norm: 0.9999992995165451, iteration: 294897
loss: 1.042080283164978,grad_norm: 0.9999996035510524, iteration: 294898
loss: 1.0397430658340454,grad_norm: 0.9970598801808123, iteration: 294899
loss: 0.9918830990791321,grad_norm: 0.9999995786325989, iteration: 294900
loss: 0.9782006740570068,grad_norm: 0.87796073689267, iteration: 294901
loss: 1.0355232954025269,grad_norm: 0.9999994131350258, iteration: 294902
loss: 1.005266785621643,grad_norm: 0.8014660202917315, iteration: 294903
loss: 1.0253369808197021,grad_norm: 0.9999999743289946, iteration: 294904
loss: 0.9926396012306213,grad_norm: 0.6789056284719027, iteration: 294905
loss: 1.0717337131500244,grad_norm: 0.9999991085927499, iteration: 294906
loss: 1.0571680068969727,grad_norm: 0.9999994394618621, iteration: 294907
loss: 1.1454392671585083,grad_norm: 0.9999993510651556, iteration: 294908
loss: 1.0423157215118408,grad_norm: 0.988605150288204, iteration: 294909
loss: 1.032677173614502,grad_norm: 0.9999999212289606, iteration: 294910
loss: 1.0914617776870728,grad_norm: 0.9999997870531103, iteration: 294911
loss: 1.1044572591781616,grad_norm: 0.9999995303555184, iteration: 294912
loss: 0.9655234813690186,grad_norm: 0.859314716458003, iteration: 294913
loss: 1.021033525466919,grad_norm: 0.8515664870494889, iteration: 294914
loss: 1.0186306238174438,grad_norm: 0.9999992007877436, iteration: 294915
loss: 1.0428274869918823,grad_norm: 0.9999994551608662, iteration: 294916
loss: 1.1019086837768555,grad_norm: 0.9999999550150711, iteration: 294917
loss: 1.1115617752075195,grad_norm: 0.9999999265085048, iteration: 294918
loss: 1.0725895166397095,grad_norm: 0.9999999684686708, iteration: 294919
loss: 1.0319746732711792,grad_norm: 0.9999996995800084, iteration: 294920
loss: 1.0687583684921265,grad_norm: 0.9999994499235031, iteration: 294921
loss: 0.9960483312606812,grad_norm: 0.9999994682489329, iteration: 294922
loss: 1.048577904701233,grad_norm: 0.9999993312962903, iteration: 294923
loss: 1.085570216178894,grad_norm: 0.9999995445710592, iteration: 294924
loss: 1.001121163368225,grad_norm: 0.8981291366601863, iteration: 294925
loss: 1.0054240226745605,grad_norm: 0.8978805720994311, iteration: 294926
loss: 1.066365122795105,grad_norm: 0.9999999778758654, iteration: 294927
loss: 0.9940272569656372,grad_norm: 0.9856462792502898, iteration: 294928
loss: 1.0888031721115112,grad_norm: 0.9999994844798008, iteration: 294929
loss: 1.0063811540603638,grad_norm: 0.9919809496928131, iteration: 294930
loss: 1.0764172077178955,grad_norm: 0.8680934181913864, iteration: 294931
loss: 0.9871915578842163,grad_norm: 0.9999995639425187, iteration: 294932
loss: 1.0293052196502686,grad_norm: 0.8975154710885564, iteration: 294933
loss: 1.0883406400680542,grad_norm: 0.999999784900578, iteration: 294934
loss: 1.0760231018066406,grad_norm: 0.9999994131412823, iteration: 294935
loss: 1.0991414785385132,grad_norm: 0.9999990658928861, iteration: 294936
loss: 1.097432017326355,grad_norm: 0.999999297431331, iteration: 294937
loss: 1.023500680923462,grad_norm: 0.8903056258195392, iteration: 294938
loss: 1.0502375364303589,grad_norm: 0.9999991291921317, iteration: 294939
loss: 1.0169470310211182,grad_norm: 0.8875677231708485, iteration: 294940
loss: 1.0628048181533813,grad_norm: 0.9999991791481934, iteration: 294941
loss: 1.0151907205581665,grad_norm: 0.8694179644183637, iteration: 294942
loss: 1.0835366249084473,grad_norm: 0.9999995792232917, iteration: 294943
loss: 1.0419752597808838,grad_norm: 0.9983463260989368, iteration: 294944
loss: 1.0435181856155396,grad_norm: 0.9999994920683397, iteration: 294945
loss: 1.0486351251602173,grad_norm: 0.9999995080836185, iteration: 294946
loss: 1.018172264099121,grad_norm: 0.9264877843894122, iteration: 294947
loss: 0.9795683026313782,grad_norm: 0.7286403180057135, iteration: 294948
loss: 1.0188162326812744,grad_norm: 0.9013749548789709, iteration: 294949
loss: 1.01146399974823,grad_norm: 0.9999992042009678, iteration: 294950
loss: 1.1235781908035278,grad_norm: 0.9999997049510435, iteration: 294951
loss: 0.9933885931968689,grad_norm: 0.7604138067052443, iteration: 294952
loss: 0.9780395030975342,grad_norm: 0.9999991598318698, iteration: 294953
loss: 1.0176931619644165,grad_norm: 0.8904515627690934, iteration: 294954
loss: 1.0231157541275024,grad_norm: 0.9861777704872654, iteration: 294955
loss: 1.076454758644104,grad_norm: 0.999999302809901, iteration: 294956
loss: 1.0603947639465332,grad_norm: 0.9999991343241307, iteration: 294957
loss: 0.9948154091835022,grad_norm: 0.7952510520048918, iteration: 294958
loss: 1.0137858390808105,grad_norm: 0.8954163575557882, iteration: 294959
loss: 1.0479438304901123,grad_norm: 0.9999998969088444, iteration: 294960
loss: 1.0930448770523071,grad_norm: 0.9999999382329688, iteration: 294961
loss: 1.0344669818878174,grad_norm: 0.9999990931854823, iteration: 294962
loss: 0.9879757761955261,grad_norm: 0.8984198447199453, iteration: 294963
loss: 1.145041584968567,grad_norm: 0.999999911706925, iteration: 294964
loss: 1.0328916311264038,grad_norm: 0.9923013693363167, iteration: 294965
loss: 1.027937412261963,grad_norm: 0.8638823606601289, iteration: 294966
loss: 0.9961214661598206,grad_norm: 0.9999991022643483, iteration: 294967
loss: 0.9920757412910461,grad_norm: 0.9999990342349677, iteration: 294968
loss: 1.210187315940857,grad_norm: 0.9999999844420833, iteration: 294969
loss: 1.0047929286956787,grad_norm: 0.9999997083628859, iteration: 294970
loss: 1.0415445566177368,grad_norm: 0.8979742103827144, iteration: 294971
loss: 1.0666006803512573,grad_norm: 0.9999990626050329, iteration: 294972
loss: 1.0900483131408691,grad_norm: 0.9999998646689029, iteration: 294973
loss: 1.0241097211837769,grad_norm: 0.8872044359943432, iteration: 294974
loss: 1.0581969022750854,grad_norm: 0.8557791091658113, iteration: 294975
loss: 1.0165427923202515,grad_norm: 0.9911608013640849, iteration: 294976
loss: 0.9690945148468018,grad_norm: 0.9703403407215958, iteration: 294977
loss: 0.9949703812599182,grad_norm: 0.9999991089887837, iteration: 294978
loss: 1.0677721500396729,grad_norm: 0.9999991585316865, iteration: 294979
loss: 1.0029726028442383,grad_norm: 0.7932724226427432, iteration: 294980
loss: 1.0141704082489014,grad_norm: 0.9999991782883819, iteration: 294981
loss: 1.0066189765930176,grad_norm: 0.9999991821090886, iteration: 294982
loss: 1.0736478567123413,grad_norm: 0.8593358188625226, iteration: 294983
loss: 1.1226577758789062,grad_norm: 0.9999999328093134, iteration: 294984
loss: 1.121065616607666,grad_norm: 0.9999990683530641, iteration: 294985
loss: 1.0563737154006958,grad_norm: 0.9999992412203652, iteration: 294986
loss: 1.0119414329528809,grad_norm: 0.9999993987672388, iteration: 294987
loss: 1.076812505722046,grad_norm: 0.9999996219992923, iteration: 294988
loss: 1.0780003070831299,grad_norm: 0.9999999372855942, iteration: 294989
loss: 1.0695511102676392,grad_norm: 0.9999991206155192, iteration: 294990
loss: 1.0531500577926636,grad_norm: 0.9969336094207312, iteration: 294991
loss: 1.0128189325332642,grad_norm: 0.9999993356463621, iteration: 294992
loss: 1.0168087482452393,grad_norm: 0.999999188765646, iteration: 294993
loss: 1.0618816614151,grad_norm: 0.9999992748667521, iteration: 294994
loss: 1.0597326755523682,grad_norm: 0.9999991524510392, iteration: 294995
loss: 1.0800299644470215,grad_norm: 0.9603246684985938, iteration: 294996
loss: 1.0493803024291992,grad_norm: 0.9999995903056239, iteration: 294997
loss: 1.020692229270935,grad_norm: 0.9528999657426026, iteration: 294998
loss: 1.0672131776809692,grad_norm: 0.8366348542840165, iteration: 294999
loss: 1.0393097400665283,grad_norm: 0.9999992975704941, iteration: 295000
loss: 1.1037203073501587,grad_norm: 0.9999996133438748, iteration: 295001
loss: 1.0535788536071777,grad_norm: 0.9999990408753094, iteration: 295002
loss: 0.9904820919036865,grad_norm: 0.81742505273656, iteration: 295003
loss: 1.0590236186981201,grad_norm: 0.9999990095299007, iteration: 295004
loss: 1.020658254623413,grad_norm: 0.9085992626281886, iteration: 295005
loss: 1.0268586874008179,grad_norm: 0.9999989882253505, iteration: 295006
loss: 1.0481367111206055,grad_norm: 0.999999470806833, iteration: 295007
loss: 0.9986150860786438,grad_norm: 0.9999991774379555, iteration: 295008
loss: 1.060880184173584,grad_norm: 0.9999991353273431, iteration: 295009
loss: 1.0440610647201538,grad_norm: 0.9999998487004949, iteration: 295010
loss: 1.0471291542053223,grad_norm: 0.9999993191309855, iteration: 295011
loss: 1.0057685375213623,grad_norm: 0.9999990978033165, iteration: 295012
loss: 1.0127249956130981,grad_norm: 0.9999989688289102, iteration: 295013
loss: 0.9951218366622925,grad_norm: 0.9999994940043705, iteration: 295014
loss: 1.0340681076049805,grad_norm: 0.9999992487627802, iteration: 295015
loss: 1.0822616815567017,grad_norm: 0.9707959283147743, iteration: 295016
loss: 1.0131025314331055,grad_norm: 0.9999990609931481, iteration: 295017
loss: 1.1155530214309692,grad_norm: 0.9999995162431399, iteration: 295018
loss: 1.0659857988357544,grad_norm: 0.9999992256943953, iteration: 295019
loss: 1.0715789794921875,grad_norm: 0.9999994948557595, iteration: 295020
loss: 1.0030453205108643,grad_norm: 0.9999995984675085, iteration: 295021
loss: 1.009036898612976,grad_norm: 0.8162414313961968, iteration: 295022
loss: 1.0578968524932861,grad_norm: 0.9999998618980402, iteration: 295023
loss: 1.053336262702942,grad_norm: 0.9999991210362961, iteration: 295024
loss: 1.0593972206115723,grad_norm: 0.931059079106011, iteration: 295025
loss: 1.0098321437835693,grad_norm: 0.999999141586904, iteration: 295026
loss: 1.1799122095108032,grad_norm: 0.9999996763696566, iteration: 295027
loss: 0.9951538443565369,grad_norm: 0.9999991293522971, iteration: 295028
loss: 1.0072829723358154,grad_norm: 0.9325448676933413, iteration: 295029
loss: 1.0670870542526245,grad_norm: 0.9999989871805526, iteration: 295030
loss: 1.1128264665603638,grad_norm: 0.9999998455847365, iteration: 295031
loss: 1.0875390768051147,grad_norm: 0.9999991455260961, iteration: 295032
loss: 1.021735429763794,grad_norm: 0.9999996773312317, iteration: 295033
loss: 1.1466295719146729,grad_norm: 0.9999997845903057, iteration: 295034
loss: 1.0671905279159546,grad_norm: 0.999999354072837, iteration: 295035
loss: 1.0259126424789429,grad_norm: 0.9999991506812931, iteration: 295036
loss: 1.0072921514511108,grad_norm: 0.9999994974987486, iteration: 295037
loss: 1.0330160856246948,grad_norm: 0.9999993853067287, iteration: 295038
loss: 1.027319312095642,grad_norm: 0.8227662487120136, iteration: 295039
loss: 1.017111897468567,grad_norm: 0.9999997477659015, iteration: 295040
loss: 1.1155668497085571,grad_norm: 0.9999993157201428, iteration: 295041
loss: 0.9734196066856384,grad_norm: 0.8788199081906766, iteration: 295042
loss: 1.1102631092071533,grad_norm: 0.999999291008, iteration: 295043
loss: 1.0491634607315063,grad_norm: 0.8488609236177443, iteration: 295044
loss: 1.0755594968795776,grad_norm: 0.9038893506775011, iteration: 295045
loss: 1.0181365013122559,grad_norm: 0.8982158036926047, iteration: 295046
loss: 1.0483014583587646,grad_norm: 0.8984077847737466, iteration: 295047
loss: 1.0327461957931519,grad_norm: 0.7644367063978419, iteration: 295048
loss: 1.0829483270645142,grad_norm: 0.9988856540703047, iteration: 295049
loss: 1.3484165668487549,grad_norm: 0.9999994654507333, iteration: 295050
loss: 1.139994502067566,grad_norm: 0.9999995473019697, iteration: 295051
loss: 1.0665463209152222,grad_norm: 0.9999996805443201, iteration: 295052
loss: 1.086129903793335,grad_norm: 0.9999998790828253, iteration: 295053
loss: 1.1465758085250854,grad_norm: 0.999999662083825, iteration: 295054
loss: 1.2942930459976196,grad_norm: 0.9999997548642402, iteration: 295055
loss: 1.004780888557434,grad_norm: 0.9671326623187472, iteration: 295056
loss: 0.9724577069282532,grad_norm: 0.8685861766764237, iteration: 295057
loss: 1.0329203605651855,grad_norm: 0.9999991017793244, iteration: 295058
loss: 1.0299161672592163,grad_norm: 0.9999995720520629, iteration: 295059
loss: 1.0526573657989502,grad_norm: 0.8887007840852899, iteration: 295060
loss: 1.027206540107727,grad_norm: 0.8606145828179271, iteration: 295061
loss: 1.0166469812393188,grad_norm: 0.9999992438293885, iteration: 295062
loss: 1.0566304922103882,grad_norm: 0.9999996186830311, iteration: 295063
loss: 1.005339503288269,grad_norm: 0.9999994679287505, iteration: 295064
loss: 1.1138168573379517,grad_norm: 0.9999991637165146, iteration: 295065
loss: 0.9925726056098938,grad_norm: 0.8900533070437165, iteration: 295066
loss: 1.0182896852493286,grad_norm: 0.9387129571330243, iteration: 295067
loss: 1.0200965404510498,grad_norm: 0.8310067283437857, iteration: 295068
loss: 1.0820860862731934,grad_norm: 0.9999990716001704, iteration: 295069
loss: 1.021548867225647,grad_norm: 0.914246711245789, iteration: 295070
loss: 0.9622219204902649,grad_norm: 0.8552340846078726, iteration: 295071
loss: 1.0188933610916138,grad_norm: 0.9999994486752298, iteration: 295072
loss: 1.0302337408065796,grad_norm: 0.9999998285538552, iteration: 295073
loss: 1.0454822778701782,grad_norm: 0.999999181647726, iteration: 295074
loss: 1.0177712440490723,grad_norm: 0.9999997851986449, iteration: 295075
loss: 1.0972968339920044,grad_norm: 0.999999251921754, iteration: 295076
loss: 1.06736421585083,grad_norm: 0.99999955622204, iteration: 295077
loss: 1.0481375455856323,grad_norm: 0.9929060541242414, iteration: 295078
loss: 1.138950228691101,grad_norm: 0.9999995034030749, iteration: 295079
loss: 1.0001798868179321,grad_norm: 0.7969131078505455, iteration: 295080
loss: 1.0243127346038818,grad_norm: 0.9822101242077375, iteration: 295081
loss: 0.9788142442703247,grad_norm: 0.9992375433144695, iteration: 295082
loss: 0.9789503812789917,grad_norm: 0.9195146217578662, iteration: 295083
loss: 1.087572455406189,grad_norm: 0.9999997075875926, iteration: 295084
loss: 0.9928212761878967,grad_norm: 0.9999990486880992, iteration: 295085
loss: 1.1384929418563843,grad_norm: 0.9999997441103998, iteration: 295086
loss: 1.0228254795074463,grad_norm: 0.9999992605602616, iteration: 295087
loss: 1.0269988775253296,grad_norm: 0.8706748256240595, iteration: 295088
loss: 1.0472925901412964,grad_norm: 0.9999995558830178, iteration: 295089
loss: 1.0118879079818726,grad_norm: 0.9400736374739557, iteration: 295090
loss: 1.1073657274246216,grad_norm: 0.9999994389771877, iteration: 295091
loss: 1.0443263053894043,grad_norm: 0.9999991624265188, iteration: 295092
loss: 1.0153934955596924,grad_norm: 0.9999992135329203, iteration: 295093
loss: 1.0293208360671997,grad_norm: 0.7657843052332493, iteration: 295094
loss: 1.0103740692138672,grad_norm: 0.9999991163993193, iteration: 295095
loss: 1.0171780586242676,grad_norm: 0.872423504166568, iteration: 295096
loss: 1.0270029306411743,grad_norm: 0.9999997581852237, iteration: 295097
loss: 1.0448240041732788,grad_norm: 0.9999999968900868, iteration: 295098
loss: 1.025070071220398,grad_norm: 0.9999992632283455, iteration: 295099
loss: 1.0309019088745117,grad_norm: 0.9834680299769644, iteration: 295100
loss: 1.0265915393829346,grad_norm: 0.9999991396626604, iteration: 295101
loss: 1.0093270540237427,grad_norm: 0.8299210010306503, iteration: 295102
loss: 1.0526565313339233,grad_norm: 0.9999992585054233, iteration: 295103
loss: 0.9978225827217102,grad_norm: 0.999999725373115, iteration: 295104
loss: 1.0030442476272583,grad_norm: 0.9517512355487033, iteration: 295105
loss: 1.0187175273895264,grad_norm: 0.9999991061366278, iteration: 295106
loss: 0.9971517324447632,grad_norm: 0.807402225725907, iteration: 295107
loss: 1.0473612546920776,grad_norm: 0.9999990812483022, iteration: 295108
loss: 1.028862714767456,grad_norm: 0.9999991126343618, iteration: 295109
loss: 1.0237867832183838,grad_norm: 0.9999993973560426, iteration: 295110
loss: 1.0091891288757324,grad_norm: 0.9338713677393825, iteration: 295111
loss: 1.0501673221588135,grad_norm: 0.9573980580438214, iteration: 295112
loss: 1.0461078882217407,grad_norm: 0.9426937700644547, iteration: 295113
loss: 1.0659476518630981,grad_norm: 0.9999995486941123, iteration: 295114
loss: 0.9797172546386719,grad_norm: 0.9631729399664195, iteration: 295115
loss: 1.0684431791305542,grad_norm: 0.9999996054164981, iteration: 295116
loss: 0.9951124787330627,grad_norm: 0.9612487474471787, iteration: 295117
loss: 1.0763156414031982,grad_norm: 0.9999992184202275, iteration: 295118
loss: 0.9829404950141907,grad_norm: 0.9048732491043412, iteration: 295119
loss: 0.9993584752082825,grad_norm: 0.9999989598225475, iteration: 295120
loss: 1.0542519092559814,grad_norm: 0.9999994422286135, iteration: 295121
loss: 0.9866476655006409,grad_norm: 0.8799335133878375, iteration: 295122
loss: 1.0291093587875366,grad_norm: 0.9999991053026578, iteration: 295123
loss: 0.9907959699630737,grad_norm: 0.883643241485434, iteration: 295124
loss: 1.0343676805496216,grad_norm: 0.9999999227261919, iteration: 295125
loss: 1.0565130710601807,grad_norm: 0.9999995205419795, iteration: 295126
loss: 1.0652316808700562,grad_norm: 1.00000003045443, iteration: 295127
loss: 1.0452110767364502,grad_norm: 0.9491309768311269, iteration: 295128
loss: 1.0413494110107422,grad_norm: 0.9999997043382525, iteration: 295129
loss: 1.0867583751678467,grad_norm: 0.999999248138211, iteration: 295130
loss: 1.0147876739501953,grad_norm: 0.8965757030950323, iteration: 295131
loss: 1.1177089214324951,grad_norm: 0.9999991406921499, iteration: 295132
loss: 1.0332087278366089,grad_norm: 0.8923799264534851, iteration: 295133
loss: 1.1113947629928589,grad_norm: 0.9999995810374351, iteration: 295134
loss: 0.9928396344184875,grad_norm: 0.7654076928227785, iteration: 295135
loss: 0.9779370427131653,grad_norm: 0.9999992313217506, iteration: 295136
loss: 1.0236965417861938,grad_norm: 0.9042180910420818, iteration: 295137
loss: 0.9835141897201538,grad_norm: 0.9999990609029823, iteration: 295138
loss: 1.025356411933899,grad_norm: 0.9850933035966124, iteration: 295139
loss: 0.9825578331947327,grad_norm: 0.999999202637662, iteration: 295140
loss: 1.0561860799789429,grad_norm: 0.9999998382838298, iteration: 295141
loss: 1.1284301280975342,grad_norm: 0.9999992797398111, iteration: 295142
loss: 1.0056029558181763,grad_norm: 0.9205477128959104, iteration: 295143
loss: 1.0067214965820312,grad_norm: 0.9999995863112159, iteration: 295144
loss: 0.9680414795875549,grad_norm: 0.9999990884022029, iteration: 295145
loss: 1.059548258781433,grad_norm: 0.9999992004974605, iteration: 295146
loss: 1.0223487615585327,grad_norm: 0.9999992217354411, iteration: 295147
loss: 1.0377379655838013,grad_norm: 0.8263109641731189, iteration: 295148
loss: 1.1029469966888428,grad_norm: 0.9999995074356874, iteration: 295149
loss: 1.0107752084732056,grad_norm: 0.7822419682651538, iteration: 295150
loss: 1.018459677696228,grad_norm: 0.9999992170854259, iteration: 295151
loss: 0.9954637885093689,grad_norm: 0.9999998301332611, iteration: 295152
loss: 1.0093750953674316,grad_norm: 0.831093483724993, iteration: 295153
loss: 1.0273233652114868,grad_norm: 0.9987472625154771, iteration: 295154
loss: 1.0184240341186523,grad_norm: 0.9999997957263582, iteration: 295155
loss: 1.0650919675827026,grad_norm: 0.9999993470396648, iteration: 295156
loss: 1.036679744720459,grad_norm: 0.9999999047513349, iteration: 295157
loss: 0.9786860942840576,grad_norm: 0.8703931565395745, iteration: 295158
loss: 0.9914690256118774,grad_norm: 0.9456496522081366, iteration: 295159
loss: 1.0506149530410767,grad_norm: 0.9999998544012773, iteration: 295160
loss: 1.0736846923828125,grad_norm: 0.9999990525200201, iteration: 295161
loss: 1.0622503757476807,grad_norm: 0.9196382589855208, iteration: 295162
loss: 1.0150963068008423,grad_norm: 0.9999993143150417, iteration: 295163
loss: 1.0049155950546265,grad_norm: 0.9315394470220599, iteration: 295164
loss: 0.9906648993492126,grad_norm: 0.7908870168976343, iteration: 295165
loss: 1.016650915145874,grad_norm: 0.9618636490643867, iteration: 295166
loss: 1.020244836807251,grad_norm: 0.9999995197911483, iteration: 295167
loss: 1.0826618671417236,grad_norm: 0.9999997565300819, iteration: 295168
loss: 1.0506223440170288,grad_norm: 0.9999991411139838, iteration: 295169
loss: 0.9910372495651245,grad_norm: 0.9999993150785129, iteration: 295170
loss: 0.9955193996429443,grad_norm: 0.8465887772666084, iteration: 295171
loss: 1.0134392976760864,grad_norm: 0.9999992103104206, iteration: 295172
loss: 1.0178320407867432,grad_norm: 0.8877380601817858, iteration: 295173
loss: 1.08979070186615,grad_norm: 0.9999991943764739, iteration: 295174
loss: 1.0534400939941406,grad_norm: 0.9999994905831144, iteration: 295175
loss: 1.065232276916504,grad_norm: 0.999999184725969, iteration: 295176
loss: 1.00296950340271,grad_norm: 0.8689250041289827, iteration: 295177
loss: 1.0460704565048218,grad_norm: 0.9999994121415559, iteration: 295178
loss: 0.9847097992897034,grad_norm: 0.7746158078215123, iteration: 295179
loss: 1.0147604942321777,grad_norm: 0.9999998547527194, iteration: 295180
loss: 0.9646768569946289,grad_norm: 0.876601548537957, iteration: 295181
loss: 1.0017534494400024,grad_norm: 0.9999989534651053, iteration: 295182
loss: 1.0095434188842773,grad_norm: 0.8940416464071251, iteration: 295183
loss: 0.9992926120758057,grad_norm: 0.8181112856656311, iteration: 295184
loss: 1.0438904762268066,grad_norm: 0.9999994606415161, iteration: 295185
loss: 1.1060434579849243,grad_norm: 0.9999992407578785, iteration: 295186
loss: 1.003196120262146,grad_norm: 0.8154262662480315, iteration: 295187
loss: 0.9664173126220703,grad_norm: 0.9363241839124922, iteration: 295188
loss: 1.0014941692352295,grad_norm: 0.9999993771244159, iteration: 295189
loss: 1.1889477968215942,grad_norm: 0.9999991046951302, iteration: 295190
loss: 1.0377891063690186,grad_norm: 0.9999989078303996, iteration: 295191
loss: 0.9794167876243591,grad_norm: 0.9417485811146611, iteration: 295192
loss: 0.9757346510887146,grad_norm: 0.9999992977730818, iteration: 295193
loss: 1.08039391040802,grad_norm: 0.9999991752109088, iteration: 295194
loss: 0.9915317296981812,grad_norm: 0.9089056510043797, iteration: 295195
loss: 1.311506986618042,grad_norm: 0.9999995972821657, iteration: 295196
loss: 0.9727619886398315,grad_norm: 0.8557989653121538, iteration: 295197
loss: 0.9974545240402222,grad_norm: 0.9999990914765768, iteration: 295198
loss: 1.0296591520309448,grad_norm: 0.9488020805862937, iteration: 295199
loss: 1.0363545417785645,grad_norm: 0.9725629278872724, iteration: 295200
loss: 1.0350412130355835,grad_norm: 0.9999996805080815, iteration: 295201
loss: 1.129211664199829,grad_norm: 0.9999995119947299, iteration: 295202
loss: 1.0484977960586548,grad_norm: 0.9999998058276417, iteration: 295203
loss: 0.9549907445907593,grad_norm: 0.9999993002924246, iteration: 295204
loss: 1.0211495161056519,grad_norm: 0.8772290090738465, iteration: 295205
loss: 0.9728730916976929,grad_norm: 0.9999993908250532, iteration: 295206
loss: 1.0774604082107544,grad_norm: 0.8751814759140807, iteration: 295207
loss: 1.0255216360092163,grad_norm: 0.9488261428922707, iteration: 295208
loss: 0.9671436548233032,grad_norm: 0.7730483020184608, iteration: 295209
loss: 1.0259276628494263,grad_norm: 0.9667279181056903, iteration: 295210
loss: 1.0891205072402954,grad_norm: 0.9999997616447976, iteration: 295211
loss: 1.0405330657958984,grad_norm: 0.9999998798485848, iteration: 295212
loss: 0.9880867600440979,grad_norm: 0.8998081167323688, iteration: 295213
loss: 1.0250201225280762,grad_norm: 0.9999991294277737, iteration: 295214
loss: 0.9878559112548828,grad_norm: 0.8469930760466609, iteration: 295215
loss: 1.1222245693206787,grad_norm: 1.0000000588200855, iteration: 295216
loss: 1.0081548690795898,grad_norm: 0.999999739983926, iteration: 295217
loss: 1.1414871215820312,grad_norm: 0.9999996126503594, iteration: 295218
loss: 1.033587098121643,grad_norm: 0.8571635339356041, iteration: 295219
loss: 1.0552529096603394,grad_norm: 0.9999991673095576, iteration: 295220
loss: 0.9875634908676147,grad_norm: 0.7706493621874986, iteration: 295221
loss: 0.9894281029701233,grad_norm: 0.8152946667459335, iteration: 295222
loss: 1.010161280632019,grad_norm: 0.9999997294120329, iteration: 295223
loss: 1.065455675125122,grad_norm: 0.9999991645430805, iteration: 295224
loss: 0.9729843139648438,grad_norm: 0.9999994351736003, iteration: 295225
loss: 1.0596224069595337,grad_norm: 0.8102631383258099, iteration: 295226
loss: 1.0095362663269043,grad_norm: 0.8868722597704732, iteration: 295227
loss: 1.024501919746399,grad_norm: 0.8636063992205805, iteration: 295228
loss: 1.0341507196426392,grad_norm: 0.9763531037817567, iteration: 295229
loss: 0.967552125453949,grad_norm: 0.9706846628983897, iteration: 295230
loss: 0.9969268441200256,grad_norm: 0.7753114911691307, iteration: 295231
loss: 1.10568368434906,grad_norm: 0.9999998486304028, iteration: 295232
loss: 1.0342928171157837,grad_norm: 0.9999993346336723, iteration: 295233
loss: 1.074067234992981,grad_norm: 0.8849742754410679, iteration: 295234
loss: 1.0785667896270752,grad_norm: 0.9999992993251818, iteration: 295235
loss: 1.1184102296829224,grad_norm: 0.9999991409819289, iteration: 295236
loss: 1.0066359043121338,grad_norm: 0.9999995276828643, iteration: 295237
loss: 1.0466662645339966,grad_norm: 0.9999999319929933, iteration: 295238
loss: 0.9909194707870483,grad_norm: 0.9999990462163514, iteration: 295239
loss: 1.076803207397461,grad_norm: 0.9999993026005726, iteration: 295240
loss: 1.0670545101165771,grad_norm: 0.9999990979918698, iteration: 295241
loss: 0.9818799495697021,grad_norm: 0.9526318273610357, iteration: 295242
loss: 1.0430996417999268,grad_norm: 0.8719698003046845, iteration: 295243
loss: 1.0215650796890259,grad_norm: 0.9960965525306448, iteration: 295244
loss: 1.0112966299057007,grad_norm: 0.9999994471871594, iteration: 295245
loss: 0.9824363589286804,grad_norm: 0.7107547059512064, iteration: 295246
loss: 0.9866785407066345,grad_norm: 0.8354388276199002, iteration: 295247
loss: 1.004184365272522,grad_norm: 0.8917515199698121, iteration: 295248
loss: 1.0456053018569946,grad_norm: 0.7920008546871414, iteration: 295249
loss: 1.0128141641616821,grad_norm: 0.9999991790282847, iteration: 295250
loss: 1.022031545639038,grad_norm: 0.910649825866803, iteration: 295251
loss: 1.066667914390564,grad_norm: 0.9999996065358108, iteration: 295252
loss: 0.9874765872955322,grad_norm: 0.7758421859083166, iteration: 295253
loss: 1.0168309211730957,grad_norm: 0.9999992626336571, iteration: 295254
loss: 1.0687999725341797,grad_norm: 0.9999999483771614, iteration: 295255
loss: 1.0927976369857788,grad_norm: 0.999999911300748, iteration: 295256
loss: 1.0197579860687256,grad_norm: 0.999999092716468, iteration: 295257
loss: 0.9944315552711487,grad_norm: 0.7894245542238764, iteration: 295258
loss: 1.1209349632263184,grad_norm: 0.9999991595410843, iteration: 295259
loss: 1.085657000541687,grad_norm: 0.9999998495501878, iteration: 295260
loss: 1.031354546546936,grad_norm: 0.9669347998149406, iteration: 295261
loss: 1.020153284072876,grad_norm: 0.8151176414831511, iteration: 295262
loss: 0.9793574213981628,grad_norm: 0.8925117047333403, iteration: 295263
loss: 1.029515027999878,grad_norm: 0.8476656241192329, iteration: 295264
loss: 1.0502759218215942,grad_norm: 0.9999991344449034, iteration: 295265
loss: 1.0073429346084595,grad_norm: 0.9167406002169836, iteration: 295266
loss: 1.0789214372634888,grad_norm: 0.9999990645909386, iteration: 295267
loss: 1.0576430559158325,grad_norm: 0.999999132939832, iteration: 295268
loss: 1.0134291648864746,grad_norm: 0.999999956801268, iteration: 295269
loss: 0.9908512234687805,grad_norm: 0.99999976400567, iteration: 295270
loss: 0.9576981067657471,grad_norm: 0.8784860042168118, iteration: 295271
loss: 0.9698585271835327,grad_norm: 0.8758623953880014, iteration: 295272
loss: 1.0005584955215454,grad_norm: 0.9999997162086407, iteration: 295273
loss: 0.9768166542053223,grad_norm: 0.8913381510208207, iteration: 295274
loss: 0.9667615294456482,grad_norm: 0.9999992521095689, iteration: 295275
loss: 0.9991054534912109,grad_norm: 0.9999990682778144, iteration: 295276
loss: 0.978230357170105,grad_norm: 0.7724945470002729, iteration: 295277
loss: 1.0205577611923218,grad_norm: 0.9999990465254162, iteration: 295278
loss: 1.0159119367599487,grad_norm: 0.9999990406775274, iteration: 295279
loss: 1.016446828842163,grad_norm: 0.9999990747302001, iteration: 295280
loss: 1.0172905921936035,grad_norm: 0.9999992087897681, iteration: 295281
loss: 1.030832052230835,grad_norm: 0.9999991305567721, iteration: 295282
loss: 0.9835920333862305,grad_norm: 0.7655130024252866, iteration: 295283
loss: 0.9485280513763428,grad_norm: 0.7516880377023399, iteration: 295284
loss: 0.9864627718925476,grad_norm: 0.9999995818594366, iteration: 295285
loss: 0.997435450553894,grad_norm: 0.783267887056239, iteration: 295286
loss: 1.0351327657699585,grad_norm: 0.9063111759637171, iteration: 295287
loss: 0.974976122379303,grad_norm: 0.999999033043124, iteration: 295288
loss: 1.0930304527282715,grad_norm: 0.9999995964655988, iteration: 295289
loss: 0.9823514223098755,grad_norm: 0.9999997705987644, iteration: 295290
loss: 1.0255987644195557,grad_norm: 0.999999268267646, iteration: 295291
loss: 1.0348454713821411,grad_norm: 0.8634733932596813, iteration: 295292
loss: 1.009264349937439,grad_norm: 0.9810537452027812, iteration: 295293
loss: 0.9700466394424438,grad_norm: 0.7807728457835484, iteration: 295294
loss: 1.0090523958206177,grad_norm: 0.9999999909149531, iteration: 295295
loss: 0.9812085628509521,grad_norm: 0.7659436257155121, iteration: 295296
loss: 1.0067638158798218,grad_norm: 0.9419676548366933, iteration: 295297
loss: 0.9870384931564331,grad_norm: 0.7973853180507212, iteration: 295298
loss: 0.9662164449691772,grad_norm: 0.8857077430582887, iteration: 295299
loss: 0.9977548122406006,grad_norm: 0.9320419755691622, iteration: 295300
loss: 1.0342870950698853,grad_norm: 0.9999990849126352, iteration: 295301
loss: 1.0386369228363037,grad_norm: 0.99999912353464, iteration: 295302
loss: 1.0236570835113525,grad_norm: 0.9999995861682273, iteration: 295303
loss: 0.9797695875167847,grad_norm: 0.7508489136992985, iteration: 295304
loss: 1.0743597745895386,grad_norm: 0.9999997132267437, iteration: 295305
loss: 1.0583689212799072,grad_norm: 0.8281030944325699, iteration: 295306
loss: 1.0059834718704224,grad_norm: 0.9039388673289802, iteration: 295307
loss: 1.0613365173339844,grad_norm: 0.9836844087290437, iteration: 295308
loss: 1.01017165184021,grad_norm: 0.9999998569320913, iteration: 295309
loss: 0.9853372573852539,grad_norm: 0.7277217763705471, iteration: 295310
loss: 0.9946463704109192,grad_norm: 0.7779332371926664, iteration: 295311
loss: 1.1683531999588013,grad_norm: 0.9999996005630208, iteration: 295312
loss: 0.9937565326690674,grad_norm: 0.8395892109750913, iteration: 295313
loss: 0.9940522909164429,grad_norm: 0.8953269045285596, iteration: 295314
loss: 1.0524719953536987,grad_norm: 0.8672630796993295, iteration: 295315
loss: 0.9570342302322388,grad_norm: 0.8456376021660639, iteration: 295316
loss: 1.1618977785110474,grad_norm: 1.0000000092640458, iteration: 295317
loss: 1.1567282676696777,grad_norm: 0.9999992963310118, iteration: 295318
loss: 0.9878125190734863,grad_norm: 0.6739447424122856, iteration: 295319
loss: 1.0217427015304565,grad_norm: 0.9999992496362899, iteration: 295320
loss: 0.9629200100898743,grad_norm: 0.726125944509179, iteration: 295321
loss: 1.0301607847213745,grad_norm: 0.9999992931216778, iteration: 295322
loss: 0.9858672618865967,grad_norm: 0.8735299640266936, iteration: 295323
loss: 1.007562518119812,grad_norm: 0.9180152291056558, iteration: 295324
loss: 1.1029417514801025,grad_norm: 0.9999996805056478, iteration: 295325
loss: 1.0450985431671143,grad_norm: 0.9999998808076778, iteration: 295326
loss: 1.0220658779144287,grad_norm: 0.9763905948010553, iteration: 295327
loss: 1.0007017850875854,grad_norm: 0.9999995527616681, iteration: 295328
loss: 0.9900557398796082,grad_norm: 0.8497299679724153, iteration: 295329
loss: 1.052295446395874,grad_norm: 0.829235763982064, iteration: 295330
loss: 1.0646251440048218,grad_norm: 0.8441980283937438, iteration: 295331
loss: 1.0166268348693848,grad_norm: 0.9999990643975493, iteration: 295332
loss: 1.0021597146987915,grad_norm: 0.8756549494877807, iteration: 295333
loss: 1.0298010110855103,grad_norm: 0.9179868439400743, iteration: 295334
loss: 0.9937699437141418,grad_norm: 0.8029368319369768, iteration: 295335
loss: 0.9916443824768066,grad_norm: 0.9999990828188964, iteration: 295336
loss: 1.3068263530731201,grad_norm: 0.9999997134734369, iteration: 295337
loss: 1.0267223119735718,grad_norm: 0.8784940688214236, iteration: 295338
loss: 1.0135828256607056,grad_norm: 0.9886846971047331, iteration: 295339
loss: 1.077405333518982,grad_norm: 0.8245529441513255, iteration: 295340
loss: 0.9707984924316406,grad_norm: 0.852000886430499, iteration: 295341
loss: 1.0117378234863281,grad_norm: 0.813683617409938, iteration: 295342
loss: 1.064220905303955,grad_norm: 0.9999992083965039, iteration: 295343
loss: 1.064005732536316,grad_norm: 1.0000000185803724, iteration: 295344
loss: 1.0365347862243652,grad_norm: 0.9566635224003911, iteration: 295345
loss: 0.9922330975532532,grad_norm: 0.8550409661561292, iteration: 295346
loss: 1.0633511543273926,grad_norm: 0.9999989465775141, iteration: 295347
loss: 1.0173941850662231,grad_norm: 0.8333807753233932, iteration: 295348
loss: 0.994721531867981,grad_norm: 0.9932118340059382, iteration: 295349
loss: 1.1013373136520386,grad_norm: 1.0000000013693042, iteration: 295350
loss: 0.9900973439216614,grad_norm: 0.9000777729769025, iteration: 295351
loss: 1.0051343441009521,grad_norm: 0.6795019751705553, iteration: 295352
loss: 1.0034397840499878,grad_norm: 0.8270558016562201, iteration: 295353
loss: 1.0386064052581787,grad_norm: 0.9048913461872155, iteration: 295354
loss: 0.9875723123550415,grad_norm: 0.8440617279275614, iteration: 295355
loss: 0.9718008637428284,grad_norm: 0.8193062401386598, iteration: 295356
loss: 1.0568056106567383,grad_norm: 0.9925734298858089, iteration: 295357
loss: 1.0404407978057861,grad_norm: 0.999999769939505, iteration: 295358
loss: 1.0129363536834717,grad_norm: 0.8443957839078221, iteration: 295359
loss: 1.0259066820144653,grad_norm: 0.9399259273075942, iteration: 295360
loss: 1.0048812627792358,grad_norm: 0.9999991982658455, iteration: 295361
loss: 1.0440562963485718,grad_norm: 0.999999306681027, iteration: 295362
loss: 1.0173527002334595,grad_norm: 0.9999991192116454, iteration: 295363
loss: 0.9965033531188965,grad_norm: 0.7097461843993453, iteration: 295364
loss: 0.9590325355529785,grad_norm: 0.9999989841606569, iteration: 295365
loss: 0.979739785194397,grad_norm: 0.9482686613386753, iteration: 295366
loss: 0.9942686557769775,grad_norm: 0.8484201460815514, iteration: 295367
loss: 1.0202720165252686,grad_norm: 0.9999990059465043, iteration: 295368
loss: 1.0198382139205933,grad_norm: 0.9999990841818395, iteration: 295369
loss: 1.0799415111541748,grad_norm: 0.9999995271875829, iteration: 295370
loss: 0.9794569611549377,grad_norm: 0.9831061903408498, iteration: 295371
loss: 0.9929139018058777,grad_norm: 0.7873673013294723, iteration: 295372
loss: 1.0092225074768066,grad_norm: 0.7556672717491921, iteration: 295373
loss: 1.0026293992996216,grad_norm: 0.9499435554073997, iteration: 295374
loss: 1.0812739133834839,grad_norm: 0.9999997998398982, iteration: 295375
loss: 1.0291402339935303,grad_norm: 0.8283746978800918, iteration: 295376
loss: 0.9861084818840027,grad_norm: 0.8660511334640427, iteration: 295377
loss: 0.9673536419868469,grad_norm: 0.8649490320285561, iteration: 295378
loss: 1.0013461112976074,grad_norm: 0.9999991006842754, iteration: 295379
loss: 0.9983472228050232,grad_norm: 0.8570679856156335, iteration: 295380
loss: 1.003233790397644,grad_norm: 0.8843361497125644, iteration: 295381
loss: 0.982944667339325,grad_norm: 0.8513457355988111, iteration: 295382
loss: 1.069311499595642,grad_norm: 0.9999998393772385, iteration: 295383
loss: 1.0279427766799927,grad_norm: 0.8037939259363097, iteration: 295384
loss: 1.0143640041351318,grad_norm: 0.9999992147704677, iteration: 295385
loss: 0.9710690975189209,grad_norm: 0.8693489264707152, iteration: 295386
loss: 1.0634077787399292,grad_norm: 0.9999991106563769, iteration: 295387
loss: 1.0401976108551025,grad_norm: 0.9999993133734252, iteration: 295388
loss: 0.990218997001648,grad_norm: 0.8853570393474542, iteration: 295389
loss: 0.9972122311592102,grad_norm: 0.8472770153191957, iteration: 295390
loss: 1.0341498851776123,grad_norm: 0.9999990931034287, iteration: 295391
loss: 0.9911189079284668,grad_norm: 0.9199332704076407, iteration: 295392
loss: 0.9866349101066589,grad_norm: 0.9999991532328528, iteration: 295393
loss: 0.990990400314331,grad_norm: 0.9812594469097058, iteration: 295394
loss: 1.0306487083435059,grad_norm: 0.999999391812697, iteration: 295395
loss: 0.9795488119125366,grad_norm: 0.8300832935103066, iteration: 295396
loss: 1.0205568075180054,grad_norm: 0.9999992125313171, iteration: 295397
loss: 0.9907518029212952,grad_norm: 0.8005094334679197, iteration: 295398
loss: 0.9787133932113647,grad_norm: 0.7563054168165885, iteration: 295399
loss: 0.9933025240898132,grad_norm: 0.9377240840410643, iteration: 295400
loss: 0.9817692637443542,grad_norm: 0.9213684725389438, iteration: 295401
loss: 0.9988346695899963,grad_norm: 0.8400855284055706, iteration: 295402
loss: 1.0576632022857666,grad_norm: 0.9999994049054635, iteration: 295403
loss: 1.186594009399414,grad_norm: 0.9999995944162438, iteration: 295404
loss: 1.0103546380996704,grad_norm: 0.8062517021596487, iteration: 295405
loss: 1.0208745002746582,grad_norm: 0.8140228305635532, iteration: 295406
loss: 1.0071041584014893,grad_norm: 0.9999993239135453, iteration: 295407
loss: 0.974821150302887,grad_norm: 0.8751377109050743, iteration: 295408
loss: 0.9788050055503845,grad_norm: 0.8972308691883047, iteration: 295409
loss: 0.9750106334686279,grad_norm: 0.9999997946271737, iteration: 295410
loss: 1.0219975709915161,grad_norm: 0.9999991188816884, iteration: 295411
loss: 1.006950855255127,grad_norm: 0.7783008587639572, iteration: 295412
loss: 0.9976972341537476,grad_norm: 0.8475933871279292, iteration: 295413
loss: 1.0022255182266235,grad_norm: 0.9999996353487697, iteration: 295414
loss: 0.9496042132377625,grad_norm: 0.9613437867527708, iteration: 295415
loss: 0.9693809151649475,grad_norm: 0.9999998659911665, iteration: 295416
loss: 0.9999119639396667,grad_norm: 0.9150752846197306, iteration: 295417
loss: 0.996971845626831,grad_norm: 0.723319458025658, iteration: 295418
loss: 1.0703682899475098,grad_norm: 0.9999995846408379, iteration: 295419
loss: 1.0420092344284058,grad_norm: 0.8259170142209926, iteration: 295420
loss: 1.0143557786941528,grad_norm: 0.9999994348648725, iteration: 295421
loss: 0.9924830198287964,grad_norm: 0.9133049756533265, iteration: 295422
loss: 0.9561443328857422,grad_norm: 0.8270347955612036, iteration: 295423
loss: 1.0118906497955322,grad_norm: 0.8443454036159708, iteration: 295424
loss: 0.9999085068702698,grad_norm: 0.9769177214321991, iteration: 295425
loss: 1.0016671419143677,grad_norm: 0.9443858053903363, iteration: 295426
loss: 1.0010261535644531,grad_norm: 0.9064682471362407, iteration: 295427
loss: 1.0976967811584473,grad_norm: 0.9999993080035876, iteration: 295428
loss: 1.0752142667770386,grad_norm: 0.9999998969523057, iteration: 295429
loss: 1.0045373439788818,grad_norm: 0.9159268584334009, iteration: 295430
loss: 1.0095046758651733,grad_norm: 0.8414037411957248, iteration: 295431
loss: 1.01308274269104,grad_norm: 0.8938067674087509, iteration: 295432
loss: 0.9661655426025391,grad_norm: 0.7749887184521846, iteration: 295433
loss: 1.0411933660507202,grad_norm: 0.9757568987566021, iteration: 295434
loss: 0.9885202646255493,grad_norm: 0.8427229113485908, iteration: 295435
loss: 0.9646288752555847,grad_norm: 0.9999990606210236, iteration: 295436
loss: 1.0204824209213257,grad_norm: 0.782586094094432, iteration: 295437
loss: 1.0368194580078125,grad_norm: 0.8359170026162027, iteration: 295438
loss: 1.032753348350525,grad_norm: 0.8976760448674114, iteration: 295439
loss: 1.012481451034546,grad_norm: 0.871530034638343, iteration: 295440
loss: 1.0307867527008057,grad_norm: 0.9999994678862678, iteration: 295441
loss: 0.9796165227890015,grad_norm: 0.9602034118039051, iteration: 295442
loss: 1.0190110206604004,grad_norm: 0.9999999190293892, iteration: 295443
loss: 0.9819012880325317,grad_norm: 0.8799348451545611, iteration: 295444
loss: 1.0785741806030273,grad_norm: 0.9999991185067786, iteration: 295445
loss: 0.991665780544281,grad_norm: 0.9999990668944337, iteration: 295446
loss: 1.0239311456680298,grad_norm: 0.89447143275815, iteration: 295447
loss: 1.0196619033813477,grad_norm: 0.8598317814657332, iteration: 295448
loss: 1.000140905380249,grad_norm: 0.9999993026080498, iteration: 295449
loss: 0.9524105191230774,grad_norm: 0.9001789011387539, iteration: 295450
loss: 0.9754796028137207,grad_norm: 0.9999990961297686, iteration: 295451
loss: 1.0029345750808716,grad_norm: 0.9999991381361828, iteration: 295452
loss: 0.9847412109375,grad_norm: 0.8976063053009615, iteration: 295453
loss: 0.9890664219856262,grad_norm: 0.9999992521695069, iteration: 295454
loss: 1.0025765895843506,grad_norm: 0.8668853401299057, iteration: 295455
loss: 1.011252999305725,grad_norm: 0.7458545994890456, iteration: 295456
loss: 1.0272945165634155,grad_norm: 0.8039461033648096, iteration: 295457
loss: 1.006370186805725,grad_norm: 0.8479588595657643, iteration: 295458
loss: 1.0078727006912231,grad_norm: 0.8160527983471623, iteration: 295459
loss: 1.0022631883621216,grad_norm: 0.912345919747827, iteration: 295460
loss: 1.0521501302719116,grad_norm: 0.9999991035557363, iteration: 295461
loss: 1.0224899053573608,grad_norm: 0.9999991606543991, iteration: 295462
loss: 0.9795717597007751,grad_norm: 0.8883578061137962, iteration: 295463
loss: 0.997840404510498,grad_norm: 0.9999999257225743, iteration: 295464
loss: 1.0780247449874878,grad_norm: 0.9999998476804101, iteration: 295465
loss: 1.0416464805603027,grad_norm: 0.9999991362988317, iteration: 295466
loss: 0.9932420253753662,grad_norm: 0.9090686417255013, iteration: 295467
loss: 0.9941934943199158,grad_norm: 0.9939673777283627, iteration: 295468
loss: 1.0106655359268188,grad_norm: 0.999999357389689, iteration: 295469
loss: 0.9935053586959839,grad_norm: 0.8577645966639271, iteration: 295470
loss: 1.1298414468765259,grad_norm: 0.8583170884657682, iteration: 295471
loss: 0.9710665345191956,grad_norm: 0.8530097368536167, iteration: 295472
loss: 1.0011143684387207,grad_norm: 0.9842153681937001, iteration: 295473
loss: 1.0096887350082397,grad_norm: 0.9999991052090043, iteration: 295474
loss: 1.0013974905014038,grad_norm: 0.8152710712371147, iteration: 295475
loss: 1.0131678581237793,grad_norm: 0.9999991486375797, iteration: 295476
loss: 1.068345546722412,grad_norm: 0.9999993620863749, iteration: 295477
loss: 0.9974344372749329,grad_norm: 0.9999989868617318, iteration: 295478
loss: 1.0499171018600464,grad_norm: 0.9999991822055252, iteration: 295479
loss: 0.9722800850868225,grad_norm: 0.8441599901185863, iteration: 295480
loss: 1.0259194374084473,grad_norm: 0.9999998322591848, iteration: 295481
loss: 0.9590680599212646,grad_norm: 0.8344868076530692, iteration: 295482
loss: 0.9716566801071167,grad_norm: 0.904197162544154, iteration: 295483
loss: 1.0275827646255493,grad_norm: 0.9999991491399014, iteration: 295484
loss: 1.0293400287628174,grad_norm: 0.9999991319986614, iteration: 295485
loss: 0.9725872278213501,grad_norm: 0.7950113547008598, iteration: 295486
loss: 1.0002470016479492,grad_norm: 0.9613959112824383, iteration: 295487
loss: 0.9789429903030396,grad_norm: 0.9518026992533437, iteration: 295488
loss: 1.0548174381256104,grad_norm: 0.9407221603075352, iteration: 295489
loss: 0.9762750864028931,grad_norm: 0.8383156164063142, iteration: 295490
loss: 1.001379370689392,grad_norm: 0.999999550258774, iteration: 295491
loss: 1.0371097326278687,grad_norm: 0.9290647849337945, iteration: 295492
loss: 0.9669480323791504,grad_norm: 0.7530534286855015, iteration: 295493
loss: 1.011146068572998,grad_norm: 0.921813010565621, iteration: 295494
loss: 0.9830688834190369,grad_norm: 0.9999992171732176, iteration: 295495
loss: 1.0142124891281128,grad_norm: 0.7355016935047783, iteration: 295496
loss: 0.9963414669036865,grad_norm: 0.9756567702536914, iteration: 295497
loss: 1.019503116607666,grad_norm: 0.9757438235479345, iteration: 295498
loss: 1.0125255584716797,grad_norm: 0.9999992907368906, iteration: 295499
loss: 0.957558810710907,grad_norm: 0.928202745006611, iteration: 295500
loss: 0.991328775882721,grad_norm: 0.9999990891585248, iteration: 295501
loss: 0.9749270677566528,grad_norm: 0.8540387970245544, iteration: 295502
loss: 1.0634273290634155,grad_norm: 0.9999995857783937, iteration: 295503
loss: 0.9971643686294556,grad_norm: 0.9967328257855514, iteration: 295504
loss: 1.1013940572738647,grad_norm: 0.999999985410831, iteration: 295505
loss: 0.9560509324073792,grad_norm: 0.864908581695081, iteration: 295506
loss: 1.0252659320831299,grad_norm: 0.9999990658696609, iteration: 295507
loss: 0.9536541104316711,grad_norm: 0.8083270750526412, iteration: 295508
loss: 1.0429961681365967,grad_norm: 0.9180775221202133, iteration: 295509
loss: 1.045552372932434,grad_norm: 0.8501162629836597, iteration: 295510
loss: 1.0362643003463745,grad_norm: 0.9999991842545073, iteration: 295511
loss: 0.9838731288909912,grad_norm: 0.9092700176661755, iteration: 295512
loss: 1.0065003633499146,grad_norm: 0.9558975384938742, iteration: 295513
loss: 0.9829409718513489,grad_norm: 0.9999993009945203, iteration: 295514
loss: 0.9599054455757141,grad_norm: 0.8101530147663238, iteration: 295515
loss: 1.0596449375152588,grad_norm: 0.9999992809081073, iteration: 295516
loss: 1.0050147771835327,grad_norm: 0.9347153036180393, iteration: 295517
loss: 1.0286357402801514,grad_norm: 0.9495165144891804, iteration: 295518
loss: 0.9698395729064941,grad_norm: 0.9974000196020333, iteration: 295519
loss: 1.095995545387268,grad_norm: 0.9999993242153429, iteration: 295520
loss: 0.9774985313415527,grad_norm: 0.8408581228651533, iteration: 295521
loss: 1.0089906454086304,grad_norm: 0.9999995276814131, iteration: 295522
loss: 1.0239530801773071,grad_norm: 0.9999997144578641, iteration: 295523
loss: 1.0033533573150635,grad_norm: 0.9999995129931154, iteration: 295524
loss: 0.9943504929542542,grad_norm: 0.9999990191561485, iteration: 295525
loss: 1.0093059539794922,grad_norm: 0.9999999350083543, iteration: 295526
loss: 0.973535418510437,grad_norm: 0.9590942931584397, iteration: 295527
loss: 1.0356521606445312,grad_norm: 0.9999990760562607, iteration: 295528
loss: 1.0127043724060059,grad_norm: 0.9999992156485772, iteration: 295529
loss: 1.001175045967102,grad_norm: 0.7947047037401298, iteration: 295530
loss: 1.0059518814086914,grad_norm: 0.8540175695648271, iteration: 295531
loss: 0.9869299530982971,grad_norm: 0.9594772100376668, iteration: 295532
loss: 1.049484133720398,grad_norm: 0.9041553692796162, iteration: 295533
loss: 0.983845055103302,grad_norm: 0.8815574874108351, iteration: 295534
loss: 0.9959728121757507,grad_norm: 0.8309447262028004, iteration: 295535
loss: 1.0229668617248535,grad_norm: 0.9999992410261292, iteration: 295536
loss: 1.023107647895813,grad_norm: 0.9086133995900572, iteration: 295537
loss: 1.011347770690918,grad_norm: 0.9999995272692503, iteration: 295538
loss: 1.0182232856750488,grad_norm: 0.9999990455402373, iteration: 295539
loss: 0.999053955078125,grad_norm: 0.8462103576358944, iteration: 295540
loss: 0.9706299304962158,grad_norm: 0.7792058943487831, iteration: 295541
loss: 0.9653762578964233,grad_norm: 0.9426302443587476, iteration: 295542
loss: 1.0471857786178589,grad_norm: 0.9999995913826961, iteration: 295543
loss: 1.003669261932373,grad_norm: 0.8910561665271775, iteration: 295544
loss: 1.0040082931518555,grad_norm: 0.8741061412843408, iteration: 295545
loss: 1.0966397523880005,grad_norm: 0.9999997698296526, iteration: 295546
loss: 1.0303771495819092,grad_norm: 0.7525221330780282, iteration: 295547
loss: 0.9998257160186768,grad_norm: 0.9354786512787531, iteration: 295548
loss: 1.024949073791504,grad_norm: 0.7738813189733229, iteration: 295549
loss: 1.0100479125976562,grad_norm: 0.7115046357465342, iteration: 295550
loss: 0.9797701835632324,grad_norm: 0.9602019552161116, iteration: 295551
loss: 0.9748727083206177,grad_norm: 0.9999991922943284, iteration: 295552
loss: 0.9939298629760742,grad_norm: 0.6811154928372297, iteration: 295553
loss: 1.0506397485733032,grad_norm: 0.8811924057743592, iteration: 295554
loss: 1.0217803716659546,grad_norm: 0.8833504601885873, iteration: 295555
loss: 1.0071607828140259,grad_norm: 0.7338574999660472, iteration: 295556
loss: 1.0205261707305908,grad_norm: 0.8064875883275493, iteration: 295557
loss: 0.9969296455383301,grad_norm: 0.802355968829501, iteration: 295558
loss: 1.0144643783569336,grad_norm: 0.9638700928170124, iteration: 295559
loss: 0.990660548210144,grad_norm: 0.742037767768112, iteration: 295560
loss: 0.9660594463348389,grad_norm: 0.8837885260805325, iteration: 295561
loss: 1.015672206878662,grad_norm: 0.9999991136301818, iteration: 295562
loss: 1.0036535263061523,grad_norm: 0.7767600023653567, iteration: 295563
loss: 1.0736961364746094,grad_norm: 0.999999778184087, iteration: 295564
loss: 1.0296454429626465,grad_norm: 0.8182156461748, iteration: 295565
loss: 1.021119475364685,grad_norm: 0.9999994812695415, iteration: 295566
loss: 1.011278510093689,grad_norm: 0.8462455424354899, iteration: 295567
loss: 0.9830124974250793,grad_norm: 0.8171487774218184, iteration: 295568
loss: 1.0021722316741943,grad_norm: 0.7126107887156836, iteration: 295569
loss: 1.0130188465118408,grad_norm: 0.7724624218254015, iteration: 295570
loss: 0.9821686744689941,grad_norm: 0.8577847016163282, iteration: 295571
loss: 1.0051522254943848,grad_norm: 0.9999991815367713, iteration: 295572
loss: 0.977010190486908,grad_norm: 0.986417582736686, iteration: 295573
loss: 0.9796937108039856,grad_norm: 0.9630366064568571, iteration: 295574
loss: 1.0186123847961426,grad_norm: 0.9999997643209634, iteration: 295575
loss: 1.0336745977401733,grad_norm: 0.9999991234519273, iteration: 295576
loss: 1.1426135301589966,grad_norm: 0.9999999161664166, iteration: 295577
loss: 0.9813219308853149,grad_norm: 0.7360803673336594, iteration: 295578
loss: 1.0122594833374023,grad_norm: 0.9999990039718383, iteration: 295579
loss: 0.9762328267097473,grad_norm: 0.8617132870079007, iteration: 295580
loss: 1.0972927808761597,grad_norm: 0.9919947634575812, iteration: 295581
loss: 0.9883149862289429,grad_norm: 0.8485214859431639, iteration: 295582
loss: 0.9860219955444336,grad_norm: 0.810777756823467, iteration: 295583
loss: 0.9680750966072083,grad_norm: 0.8792936674177271, iteration: 295584
loss: 1.0315871238708496,grad_norm: 0.9159667838946058, iteration: 295585
loss: 1.04006028175354,grad_norm: 0.8602466094008009, iteration: 295586
loss: 0.9972442388534546,grad_norm: 0.86703784694211, iteration: 295587
loss: 1.0540494918823242,grad_norm: 0.9999992768776067, iteration: 295588
loss: 1.03579580783844,grad_norm: 0.883576618707854, iteration: 295589
loss: 0.986099898815155,grad_norm: 0.8969071410213111, iteration: 295590
loss: 0.990450918674469,grad_norm: 0.8427147093844569, iteration: 295591
loss: 1.0550652742385864,grad_norm: 0.9999992867685015, iteration: 295592
loss: 1.0204631090164185,grad_norm: 0.9999993763892064, iteration: 295593
loss: 1.0579698085784912,grad_norm: 0.9999994131127407, iteration: 295594
loss: 1.0004485845565796,grad_norm: 0.9704967821476047, iteration: 295595
loss: 0.9699518084526062,grad_norm: 0.7857215165321377, iteration: 295596
loss: 0.9709643125534058,grad_norm: 0.9999995676456903, iteration: 295597
loss: 1.0121651887893677,grad_norm: 0.8433627225740471, iteration: 295598
loss: 1.0938693284988403,grad_norm: 0.9999993150352758, iteration: 295599
loss: 1.0319249629974365,grad_norm: 0.9999990192475879, iteration: 295600
loss: 0.9753634333610535,grad_norm: 0.9999990930986673, iteration: 295601
loss: 0.9891446232795715,grad_norm: 0.9999994862048085, iteration: 295602
loss: 1.04060697555542,grad_norm: 0.9999991127842605, iteration: 295603
loss: 1.0113518238067627,grad_norm: 0.9999994126709538, iteration: 295604
loss: 1.029955506324768,grad_norm: 0.99999983392372, iteration: 295605
loss: 0.9780293107032776,grad_norm: 0.9296883886446429, iteration: 295606
loss: 1.0167292356491089,grad_norm: 0.9999993206513554, iteration: 295607
loss: 0.9736000895500183,grad_norm: 0.9067758716911575, iteration: 295608
loss: 1.0056333541870117,grad_norm: 0.855619158370756, iteration: 295609
loss: 1.0127918720245361,grad_norm: 0.8790652344294134, iteration: 295610
loss: 1.0260306596755981,grad_norm: 0.9799854708588106, iteration: 295611
loss: 0.9885533452033997,grad_norm: 0.9999991538003437, iteration: 295612
loss: 1.012050747871399,grad_norm: 0.9999992101756493, iteration: 295613
loss: 0.9772284030914307,grad_norm: 0.8912237268227821, iteration: 295614
loss: 0.9917052388191223,grad_norm: 0.8453255444809193, iteration: 295615
loss: 1.0528432130813599,grad_norm: 0.9787065487489959, iteration: 295616
loss: 1.0097041130065918,grad_norm: 0.8989736585520932, iteration: 295617
loss: 1.023169755935669,grad_norm: 0.9999994010355733, iteration: 295618
loss: 0.9645585417747498,grad_norm: 0.918629586187062, iteration: 295619
loss: 1.0178170204162598,grad_norm: 0.8203522647330104, iteration: 295620
loss: 1.0193971395492554,grad_norm: 0.9946251222748607, iteration: 295621
loss: 1.0011619329452515,grad_norm: 0.8862957478811089, iteration: 295622
loss: 0.9915540218353271,grad_norm: 0.999999231274698, iteration: 295623
loss: 0.9723837375640869,grad_norm: 0.9346548643327266, iteration: 295624
loss: 0.9755809307098389,grad_norm: 0.794260895479713, iteration: 295625
loss: 1.0014790296554565,grad_norm: 0.7692913648179016, iteration: 295626
loss: 0.995935320854187,grad_norm: 0.9999990878270895, iteration: 295627
loss: 0.964272677898407,grad_norm: 0.9263768964513779, iteration: 295628
loss: 0.9952388405799866,grad_norm: 0.9314366808649698, iteration: 295629
loss: 1.0108286142349243,grad_norm: 0.8586714319591283, iteration: 295630
loss: 0.977064847946167,grad_norm: 0.9633818982400212, iteration: 295631
loss: 1.0294837951660156,grad_norm: 0.7970750556056774, iteration: 295632
loss: 1.050470232963562,grad_norm: 0.9999993155083888, iteration: 295633
loss: 0.9875328540802002,grad_norm: 0.7241033373672358, iteration: 295634
loss: 1.0230501890182495,grad_norm: 0.9999997434578011, iteration: 295635
loss: 1.0375014543533325,grad_norm: 0.9518467541952368, iteration: 295636
loss: 1.063353180885315,grad_norm: 0.9999994740347186, iteration: 295637
loss: 0.9893598556518555,grad_norm: 0.8077139848060058, iteration: 295638
loss: 1.0323621034622192,grad_norm: 0.8353271040973651, iteration: 295639
loss: 0.998716413974762,grad_norm: 0.7551460766700351, iteration: 295640
loss: 1.0109543800354004,grad_norm: 0.8717613921614551, iteration: 295641
loss: 0.9860015511512756,grad_norm: 0.9999991384498285, iteration: 295642
loss: 0.9236312508583069,grad_norm: 0.9999996368342516, iteration: 295643
loss: 1.023284912109375,grad_norm: 0.9999993599116951, iteration: 295644
loss: 1.0180578231811523,grad_norm: 0.8343194827829797, iteration: 295645
loss: 1.0115718841552734,grad_norm: 0.8454619644372606, iteration: 295646
loss: 0.9866379499435425,grad_norm: 0.9999993616328992, iteration: 295647
loss: 0.9724853038787842,grad_norm: 0.9228267516762898, iteration: 295648
loss: 1.0363984107971191,grad_norm: 0.9553825997173786, iteration: 295649
loss: 1.053942322731018,grad_norm: 0.821767913280474, iteration: 295650
loss: 1.0205305814743042,grad_norm: 0.9225489912970847, iteration: 295651
loss: 1.0799914598464966,grad_norm: 0.9999991530426406, iteration: 295652
loss: 1.0320297479629517,grad_norm: 0.9999998206836691, iteration: 295653
loss: 1.0243233442306519,grad_norm: 0.8400435076168538, iteration: 295654
loss: 0.9852246046066284,grad_norm: 0.8196102741457377, iteration: 295655
loss: 1.0201178789138794,grad_norm: 0.910328817975503, iteration: 295656
loss: 1.0411458015441895,grad_norm: 0.8947962660972744, iteration: 295657
loss: 1.0241520404815674,grad_norm: 0.9479532866798056, iteration: 295658
loss: 1.0150808095932007,grad_norm: 0.877926824894902, iteration: 295659
loss: 1.0711220502853394,grad_norm: 0.9999995235496295, iteration: 295660
loss: 1.0286141633987427,grad_norm: 0.9999997735020963, iteration: 295661
loss: 1.0098236799240112,grad_norm: 0.9999992515301275, iteration: 295662
loss: 1.0898444652557373,grad_norm: 0.9667783104646313, iteration: 295663
loss: 0.991802453994751,grad_norm: 0.7527208205632371, iteration: 295664
loss: 1.0093079805374146,grad_norm: 0.776584441865782, iteration: 295665
loss: 1.0411392450332642,grad_norm: 0.9999990741058753, iteration: 295666
loss: 0.9585951566696167,grad_norm: 0.9705527931818195, iteration: 295667
loss: 1.006880283355713,grad_norm: 0.9401393093379701, iteration: 295668
loss: 1.0159521102905273,grad_norm: 0.9999993555265138, iteration: 295669
loss: 1.009481430053711,grad_norm: 0.9185078858418831, iteration: 295670
loss: 0.9977496862411499,grad_norm: 0.9237832828095617, iteration: 295671
loss: 1.0093551874160767,grad_norm: 0.8413570123826977, iteration: 295672
loss: 1.00723397731781,grad_norm: 0.802794798670519, iteration: 295673
loss: 0.9918805956840515,grad_norm: 0.9446188685808776, iteration: 295674
loss: 1.026594877243042,grad_norm: 0.9023870403369236, iteration: 295675
loss: 1.0149405002593994,grad_norm: 0.9969778264236817, iteration: 295676
loss: 0.9612258672714233,grad_norm: 0.7472915660722766, iteration: 295677
loss: 1.0085337162017822,grad_norm: 0.8359444960542078, iteration: 295678
loss: 1.0114455223083496,grad_norm: 0.8857207568547092, iteration: 295679
loss: 0.9868378043174744,grad_norm: 0.9999989855947372, iteration: 295680
loss: 1.0012097358703613,grad_norm: 0.8013005960233811, iteration: 295681
loss: 1.0250744819641113,grad_norm: 0.999999077506513, iteration: 295682
loss: 0.992871105670929,grad_norm: 0.9999991103246015, iteration: 295683
loss: 1.058924913406372,grad_norm: 0.9508980847945668, iteration: 295684
loss: 1.0141456127166748,grad_norm: 0.7653628951198057, iteration: 295685
loss: 0.9677143096923828,grad_norm: 0.877388130814032, iteration: 295686
loss: 1.025895357131958,grad_norm: 0.9999991754306727, iteration: 295687
loss: 1.014365315437317,grad_norm: 0.8604378407784485, iteration: 295688
loss: 1.001470685005188,grad_norm: 0.9999991693593366, iteration: 295689
loss: 0.9862463474273682,grad_norm: 0.7834436893493925, iteration: 295690
loss: 1.0411834716796875,grad_norm: 0.9999999585318008, iteration: 295691
loss: 0.9658454060554504,grad_norm: 0.946076359929849, iteration: 295692
loss: 1.0284539461135864,grad_norm: 0.8423439983781472, iteration: 295693
loss: 0.9671738147735596,grad_norm: 0.8462849613478142, iteration: 295694
loss: 0.9623932242393494,grad_norm: 0.8387604575883203, iteration: 295695
loss: 1.0159993171691895,grad_norm: 0.9533480897386897, iteration: 295696
loss: 0.9828503727912903,grad_norm: 0.9179934047107765, iteration: 295697
loss: 1.0207933187484741,grad_norm: 0.7205305489414904, iteration: 295698
loss: 1.0135211944580078,grad_norm: 0.9999990706865967, iteration: 295699
loss: 1.0219347476959229,grad_norm: 0.7880439013374713, iteration: 295700
loss: 1.052886724472046,grad_norm: 0.9999992317956657, iteration: 295701
loss: 1.0156389474868774,grad_norm: 0.9769725954516433, iteration: 295702
loss: 1.082322120666504,grad_norm: 0.9999998377968353, iteration: 295703
loss: 1.0008618831634521,grad_norm: 0.8214586358800597, iteration: 295704
loss: 1.0085632801055908,grad_norm: 0.999999077117465, iteration: 295705
loss: 0.984325110912323,grad_norm: 0.8554393341347934, iteration: 295706
loss: 1.0572855472564697,grad_norm: 0.9671581821198961, iteration: 295707
loss: 1.0549724102020264,grad_norm: 0.9999996327761315, iteration: 295708
loss: 0.9891191720962524,grad_norm: 0.913524338664775, iteration: 295709
loss: 1.0104200839996338,grad_norm: 0.9999989889952203, iteration: 295710
loss: 1.0194811820983887,grad_norm: 1.000000055611408, iteration: 295711
loss: 1.1265350580215454,grad_norm: 0.999999791679528, iteration: 295712
loss: 1.0404963493347168,grad_norm: 0.9072287325981534, iteration: 295713
loss: 1.0488556623458862,grad_norm: 0.8897306121344546, iteration: 295714
loss: 1.089439034461975,grad_norm: 0.9214070762654671, iteration: 295715
loss: 1.0300679206848145,grad_norm: 0.9999990036980264, iteration: 295716
loss: 1.0168017148971558,grad_norm: 0.8573657670799367, iteration: 295717
loss: 1.0488563776016235,grad_norm: 0.9999992027789795, iteration: 295718
loss: 0.9788287878036499,grad_norm: 0.8140439584639899, iteration: 295719
loss: 0.9468521475791931,grad_norm: 0.8512393685287094, iteration: 295720
loss: 1.0042799711227417,grad_norm: 0.9634113630711878, iteration: 295721
loss: 0.9745705723762512,grad_norm: 0.6868033804688402, iteration: 295722
loss: 1.0369524955749512,grad_norm: 0.9999995074925417, iteration: 295723
loss: 1.0042120218276978,grad_norm: 0.9294667169755108, iteration: 295724
loss: 1.0283571481704712,grad_norm: 0.952191021205292, iteration: 295725
loss: 1.0447453260421753,grad_norm: 0.999999808380764, iteration: 295726
loss: 1.0075129270553589,grad_norm: 0.7429015115716744, iteration: 295727
loss: 0.9838705062866211,grad_norm: 0.9676511871556702, iteration: 295728
loss: 1.0156021118164062,grad_norm: 0.9999991252545977, iteration: 295729
loss: 0.954266369342804,grad_norm: 0.9999995253507902, iteration: 295730
loss: 0.9922971725463867,grad_norm: 0.9125949922228893, iteration: 295731
loss: 0.9879175424575806,grad_norm: 0.8757475339942964, iteration: 295732
loss: 1.001899242401123,grad_norm: 0.7837855601820958, iteration: 295733
loss: 1.068247675895691,grad_norm: 0.9999994438013199, iteration: 295734
loss: 0.9769240617752075,grad_norm: 0.8480180889568869, iteration: 295735
loss: 1.007278323173523,grad_norm: 0.8420808548553363, iteration: 295736
loss: 0.9669385552406311,grad_norm: 0.8489049911460771, iteration: 295737
loss: 0.9913516044616699,grad_norm: 0.8634091601214529, iteration: 295738
loss: 1.008772373199463,grad_norm: 0.8178347435552619, iteration: 295739
loss: 0.9867240786552429,grad_norm: 0.8809426101650992, iteration: 295740
loss: 1.0350841283798218,grad_norm: 0.8338480097699971, iteration: 295741
loss: 0.9826048016548157,grad_norm: 0.8038048223701107, iteration: 295742
loss: 1.0019081830978394,grad_norm: 0.9999992470567654, iteration: 295743
loss: 1.0022380352020264,grad_norm: 0.9999992266515374, iteration: 295744
loss: 0.9512925744056702,grad_norm: 0.9999991463389001, iteration: 295745
loss: 0.9858043193817139,grad_norm: 0.8767440966700317, iteration: 295746
loss: 0.9992190003395081,grad_norm: 0.7799556782944423, iteration: 295747
loss: 1.1330779790878296,grad_norm: 0.9999995740533618, iteration: 295748
loss: 1.0094091892242432,grad_norm: 0.8935614599336843, iteration: 295749
loss: 0.9746890068054199,grad_norm: 0.8346436810276701, iteration: 295750
loss: 1.0326403379440308,grad_norm: 0.9999997708279454, iteration: 295751
loss: 1.0071014165878296,grad_norm: 0.8108346400794182, iteration: 295752
loss: 0.9689154624938965,grad_norm: 0.7451480826941563, iteration: 295753
loss: 0.9962788224220276,grad_norm: 0.9976754833878135, iteration: 295754
loss: 0.9741219282150269,grad_norm: 0.8525857239213861, iteration: 295755
loss: 1.017576813697815,grad_norm: 0.8926786700492897, iteration: 295756
loss: 0.9878964424133301,grad_norm: 0.9999994110688751, iteration: 295757
loss: 0.9819798469543457,grad_norm: 0.880478009497001, iteration: 295758
loss: 0.9843090176582336,grad_norm: 0.9999999403768383, iteration: 295759
loss: 1.0507984161376953,grad_norm: 0.9999990340451471, iteration: 295760
loss: 1.0141692161560059,grad_norm: 0.9999998824962678, iteration: 295761
loss: 0.9852931499481201,grad_norm: 0.9999992910902188, iteration: 295762
loss: 0.9916077852249146,grad_norm: 0.999999085377598, iteration: 295763
loss: 0.9853261709213257,grad_norm: 0.8660224468914601, iteration: 295764
loss: 0.9702222943305969,grad_norm: 0.8018773452502223, iteration: 295765
loss: 1.0596741437911987,grad_norm: 0.9999995311692652, iteration: 295766
loss: 0.9799468517303467,grad_norm: 0.9054189663757144, iteration: 295767
loss: 0.9857982993125916,grad_norm: 0.8291155810649414, iteration: 295768
loss: 1.0172446966171265,grad_norm: 0.9999992723313887, iteration: 295769
loss: 0.9883760809898376,grad_norm: 0.868442830854307, iteration: 295770
loss: 1.0526447296142578,grad_norm: 0.9999996491913661, iteration: 295771
loss: 1.0177072286605835,grad_norm: 0.9543632983552021, iteration: 295772
loss: 0.9973758459091187,grad_norm: 0.924624993102487, iteration: 295773
loss: 1.0084410905838013,grad_norm: 0.82763573830072, iteration: 295774
loss: 1.027126431465149,grad_norm: 0.9999989852893294, iteration: 295775
loss: 1.091296672821045,grad_norm: 0.9999993451314142, iteration: 295776
loss: 1.0252866744995117,grad_norm: 0.9999990565055201, iteration: 295777
loss: 1.0007336139678955,grad_norm: 0.8518428628139747, iteration: 295778
loss: 1.0039511919021606,grad_norm: 0.9999993002432249, iteration: 295779
loss: 0.9663320779800415,grad_norm: 0.9999990876794008, iteration: 295780
loss: 1.0181819200515747,grad_norm: 0.9454877187211885, iteration: 295781
loss: 1.0069981813430786,grad_norm: 0.9067158904140425, iteration: 295782
loss: 0.9646437168121338,grad_norm: 0.8232036541381141, iteration: 295783
loss: 1.003919243812561,grad_norm: 0.8301745055603622, iteration: 295784
loss: 0.9710667729377747,grad_norm: 0.7522765329627568, iteration: 295785
loss: 1.0598853826522827,grad_norm: 0.9999991008937101, iteration: 295786
loss: 1.0591537952423096,grad_norm: 0.8358878593484582, iteration: 295787
loss: 1.0025020837783813,grad_norm: 0.858781102347235, iteration: 295788
loss: 1.0034618377685547,grad_norm: 0.812427865973425, iteration: 295789
loss: 0.9703947901725769,grad_norm: 0.8629344628042481, iteration: 295790
loss: 1.02961003780365,grad_norm: 0.9111379144672112, iteration: 295791
loss: 1.0077282190322876,grad_norm: 0.9999991707379221, iteration: 295792
loss: 1.0198540687561035,grad_norm: 0.8503396389718739, iteration: 295793
loss: 1.084580898284912,grad_norm: 0.9999991053031118, iteration: 295794
loss: 0.9919300079345703,grad_norm: 0.9669426024737489, iteration: 295795
loss: 0.9497023224830627,grad_norm: 0.7661552586249277, iteration: 295796
loss: 1.0041589736938477,grad_norm: 0.7475551978015152, iteration: 295797
loss: 1.046528935432434,grad_norm: 0.9999990069556774, iteration: 295798
loss: 1.0425889492034912,grad_norm: 0.9999994028106247, iteration: 295799
loss: 1.0197969675064087,grad_norm: 0.939351720055168, iteration: 295800
loss: 1.1379249095916748,grad_norm: 0.9231569509072847, iteration: 295801
loss: 1.07650625705719,grad_norm: 0.9999995253150268, iteration: 295802
loss: 0.979921817779541,grad_norm: 0.8370756963710599, iteration: 295803
loss: 1.0078370571136475,grad_norm: 0.9999991288455906, iteration: 295804
loss: 0.9647512435913086,grad_norm: 0.7215248422555031, iteration: 295805
loss: 1.0262686014175415,grad_norm: 0.999999791500821, iteration: 295806
loss: 1.0161815881729126,grad_norm: 0.8055279309558787, iteration: 295807
loss: 1.0415689945220947,grad_norm: 0.921712113909938, iteration: 295808
loss: 1.0016403198242188,grad_norm: 0.9598015087807096, iteration: 295809
loss: 1.0093719959259033,grad_norm: 0.8583784478520542, iteration: 295810
loss: 1.0948750972747803,grad_norm: 0.9279597033291667, iteration: 295811
loss: 1.0097110271453857,grad_norm: 0.999999599040488, iteration: 295812
loss: 1.0145100355148315,grad_norm: 0.7994807316111082, iteration: 295813
loss: 1.0021681785583496,grad_norm: 0.934647217546292, iteration: 295814
loss: 1.0000354051589966,grad_norm: 0.8858319649880113, iteration: 295815
loss: 1.0008257627487183,grad_norm: 0.871059690997647, iteration: 295816
loss: 1.178327202796936,grad_norm: 0.999999651562322, iteration: 295817
loss: 0.9835275411605835,grad_norm: 0.9999995221822398, iteration: 295818
loss: 1.0020751953125,grad_norm: 0.8923508246146172, iteration: 295819
loss: 0.9956855177879333,grad_norm: 0.886249883489351, iteration: 295820
loss: 1.0498448610305786,grad_norm: 0.9452679514204545, iteration: 295821
loss: 1.0208629369735718,grad_norm: 0.8826233185205414, iteration: 295822
loss: 1.0146194696426392,grad_norm: 0.9999992285075723, iteration: 295823
loss: 0.99378901720047,grad_norm: 0.8855297995404547, iteration: 295824
loss: 1.014761209487915,grad_norm: 0.9999990094740534, iteration: 295825
loss: 1.0536174774169922,grad_norm: 0.8277471223928764, iteration: 295826
loss: 1.0009156465530396,grad_norm: 0.9999990450078747, iteration: 295827
loss: 0.9998939633369446,grad_norm: 0.9999995712052767, iteration: 295828
loss: 0.9818162322044373,grad_norm: 0.8538390164905031, iteration: 295829
loss: 0.991263210773468,grad_norm: 0.8854225373672929, iteration: 295830
loss: 1.0081955194473267,grad_norm: 0.9135899128844015, iteration: 295831
loss: 1.024267554283142,grad_norm: 0.9468731739055497, iteration: 295832
loss: 0.9478400945663452,grad_norm: 0.9179761602478355, iteration: 295833
loss: 1.035478115081787,grad_norm: 0.9270958927565222, iteration: 295834
loss: 0.9546911120414734,grad_norm: 0.7479032498204791, iteration: 295835
loss: 0.9747751951217651,grad_norm: 0.8044763310160792, iteration: 295836
loss: 1.0090038776397705,grad_norm: 0.9424583139086679, iteration: 295837
loss: 1.0064727067947388,grad_norm: 0.9582519766965997, iteration: 295838
loss: 1.000714659690857,grad_norm: 0.767276215035014, iteration: 295839
loss: 1.000828742980957,grad_norm: 0.7819867285713864, iteration: 295840
loss: 0.9968458414077759,grad_norm: 0.8921659441559837, iteration: 295841
loss: 1.0044680833816528,grad_norm: 0.8332825987872068, iteration: 295842
loss: 1.0188804864883423,grad_norm: 0.90479274426746, iteration: 295843
loss: 1.0320523977279663,grad_norm: 0.9247270577757405, iteration: 295844
loss: 0.9658064842224121,grad_norm: 0.9800859939962887, iteration: 295845
loss: 0.9727975726127625,grad_norm: 0.8411462131968754, iteration: 295846
loss: 0.9947332739830017,grad_norm: 0.9999989747950893, iteration: 295847
loss: 0.9997334480285645,grad_norm: 0.9999989882047571, iteration: 295848
loss: 1.0037542581558228,grad_norm: 0.8141392442488398, iteration: 295849
loss: 1.0420399904251099,grad_norm: 0.8264066815055356, iteration: 295850
loss: 0.9614588022232056,grad_norm: 0.9999991753416455, iteration: 295851
loss: 0.9752147197723389,grad_norm: 0.8170004509791808, iteration: 295852
loss: 1.0486247539520264,grad_norm: 0.8305116333529733, iteration: 295853
loss: 0.9885126352310181,grad_norm: 0.9999990189287097, iteration: 295854
loss: 0.9944940805435181,grad_norm: 0.9742613161625581, iteration: 295855
loss: 1.0327337980270386,grad_norm: 0.9924244951077916, iteration: 295856
loss: 0.9647861123085022,grad_norm: 0.7478116700412674, iteration: 295857
loss: 1.0242631435394287,grad_norm: 0.9999991504870533, iteration: 295858
loss: 1.048930048942566,grad_norm: 0.9999991134598282, iteration: 295859
loss: 0.978135347366333,grad_norm: 0.8747872552120632, iteration: 295860
loss: 0.9841461777687073,grad_norm: 0.842312273374213, iteration: 295861
loss: 1.097576379776001,grad_norm: 0.9059840320703255, iteration: 295862
loss: 0.9838420748710632,grad_norm: 0.9969795421067591, iteration: 295863
loss: 0.9993948936462402,grad_norm: 0.7341491963092978, iteration: 295864
loss: 0.9747673869132996,grad_norm: 0.8412981379173895, iteration: 295865
loss: 1.0004909038543701,grad_norm: 0.9999992197711162, iteration: 295866
loss: 1.0136582851409912,grad_norm: 0.8908235217114104, iteration: 295867
loss: 1.0391443967819214,grad_norm: 0.9999997317924653, iteration: 295868
loss: 1.0181009769439697,grad_norm: 0.8312752316400472, iteration: 295869
loss: 0.9809582829475403,grad_norm: 0.8487684647368559, iteration: 295870
loss: 1.0004388093948364,grad_norm: 0.9891454533375337, iteration: 295871
loss: 0.9852254390716553,grad_norm: 0.8136382743574808, iteration: 295872
loss: 1.0154461860656738,grad_norm: 0.7639466373274515, iteration: 295873
loss: 0.9913447499275208,grad_norm: 0.8324341934815455, iteration: 295874
loss: 0.9988816380500793,grad_norm: 0.8775000629543986, iteration: 295875
loss: 1.011096477508545,grad_norm: 0.7294965515406028, iteration: 295876
loss: 0.9969962239265442,grad_norm: 0.8166452410988239, iteration: 295877
loss: 1.0343738794326782,grad_norm: 0.9611278072571192, iteration: 295878
loss: 1.0371044874191284,grad_norm: 0.9999992169075, iteration: 295879
loss: 1.0010604858398438,grad_norm: 0.7857239006545201, iteration: 295880
loss: 1.0070652961730957,grad_norm: 0.8332181471195168, iteration: 295881
loss: 1.0375853776931763,grad_norm: 0.8262900888218204, iteration: 295882
loss: 1.006386399269104,grad_norm: 0.9999995503633191, iteration: 295883
loss: 1.0369939804077148,grad_norm: 0.8135789602789221, iteration: 295884
loss: 1.0082041025161743,grad_norm: 0.7963630853384188, iteration: 295885
loss: 0.9865257143974304,grad_norm: 0.7719047132284929, iteration: 295886
loss: 1.0628589391708374,grad_norm: 0.9028699809873596, iteration: 295887
loss: 0.9551142454147339,grad_norm: 0.7918598244172022, iteration: 295888
loss: 1.008428931236267,grad_norm: 0.9999990753672888, iteration: 295889
loss: 1.0521522760391235,grad_norm: 0.8958761263891297, iteration: 295890
loss: 0.9594829678535461,grad_norm: 0.8770919271965764, iteration: 295891
loss: 0.961753785610199,grad_norm: 0.965863258813822, iteration: 295892
loss: 0.9940462708473206,grad_norm: 0.9594196134259912, iteration: 295893
loss: 0.9833270311355591,grad_norm: 0.9634716643032506, iteration: 295894
loss: 0.9667825102806091,grad_norm: 0.9999998691726653, iteration: 295895
loss: 0.9952431321144104,grad_norm: 0.7724841046572873, iteration: 295896
loss: 1.0352919101715088,grad_norm: 0.9999997379318741, iteration: 295897
loss: 1.0217255353927612,grad_norm: 0.8515438102508721, iteration: 295898
loss: 0.9614118337631226,grad_norm: 0.8456450746493045, iteration: 295899
loss: 0.9662250876426697,grad_norm: 0.8262274364709755, iteration: 295900
loss: 1.0241963863372803,grad_norm: 0.9685467375913129, iteration: 295901
loss: 1.057621955871582,grad_norm: 1.000000008927506, iteration: 295902
loss: 1.000972032546997,grad_norm: 0.7562418264505567, iteration: 295903
loss: 0.9984934329986572,grad_norm: 0.999999162510342, iteration: 295904
loss: 1.0619088411331177,grad_norm: 0.9999996313953495, iteration: 295905
loss: 1.011473536491394,grad_norm: 0.9833739098197072, iteration: 295906
loss: 1.0237247943878174,grad_norm: 0.9841125257181305, iteration: 295907
loss: 1.037192940711975,grad_norm: 0.9206033768742103, iteration: 295908
loss: 1.0001506805419922,grad_norm: 0.8085398692417106, iteration: 295909
loss: 1.0164532661437988,grad_norm: 0.9418517977595489, iteration: 295910
loss: 1.0959569215774536,grad_norm: 0.99999908737603, iteration: 295911
loss: 0.9889918565750122,grad_norm: 0.8914933787675856, iteration: 295912
loss: 0.9981521368026733,grad_norm: 0.9999990071337858, iteration: 295913
loss: 1.1132458448410034,grad_norm: 0.9999993492959158, iteration: 295914
loss: 0.9693794250488281,grad_norm: 0.7874384136115541, iteration: 295915
loss: 1.0055179595947266,grad_norm: 0.9833735867009813, iteration: 295916
loss: 1.0149314403533936,grad_norm: 0.9999994172812201, iteration: 295917
loss: 0.977775514125824,grad_norm: 0.7761064707226637, iteration: 295918
loss: 1.0169501304626465,grad_norm: 0.7538644981345897, iteration: 295919
loss: 0.9647967219352722,grad_norm: 0.8762783637552087, iteration: 295920
loss: 1.0020054578781128,grad_norm: 0.892564837285751, iteration: 295921
loss: 1.0644192695617676,grad_norm: 0.999999688004915, iteration: 295922
loss: 1.2264593839645386,grad_norm: 0.9999991582369371, iteration: 295923
loss: 0.9783182740211487,grad_norm: 0.8741143438136254, iteration: 295924
loss: 0.9857960939407349,grad_norm: 0.8324526764635836, iteration: 295925
loss: 1.0185575485229492,grad_norm: 0.9999990901963091, iteration: 295926
loss: 1.034630298614502,grad_norm: 0.9348832665510816, iteration: 295927
loss: 1.019992470741272,grad_norm: 0.7690756067745691, iteration: 295928
loss: 1.0005619525909424,grad_norm: 0.8245474401851647, iteration: 295929
loss: 0.9874000549316406,grad_norm: 0.7847587334182362, iteration: 295930
loss: 1.0236347913742065,grad_norm: 0.8643103302797003, iteration: 295931
loss: 1.028016448020935,grad_norm: 0.8868635619072998, iteration: 295932
loss: 1.0397135019302368,grad_norm: 0.9999998901396188, iteration: 295933
loss: 0.994448184967041,grad_norm: 0.9291486534696682, iteration: 295934
loss: 1.0068366527557373,grad_norm: 0.8213139274277074, iteration: 295935
loss: 0.9941691756248474,grad_norm: 0.9999990213617522, iteration: 295936
loss: 0.9755174517631531,grad_norm: 0.9999994976455594, iteration: 295937
loss: 0.9998425841331482,grad_norm: 0.9336390081811737, iteration: 295938
loss: 0.9808748364448547,grad_norm: 0.8769605266404688, iteration: 295939
loss: 1.12650465965271,grad_norm: 0.8282726331251092, iteration: 295940
loss: 0.9850733876228333,grad_norm: 0.8615260167023645, iteration: 295941
loss: 0.99767005443573,grad_norm: 0.7781925725616955, iteration: 295942
loss: 1.020193099975586,grad_norm: 0.9721579629920437, iteration: 295943
loss: 0.980189323425293,grad_norm: 0.7536214281995168, iteration: 295944
loss: 1.0016454458236694,grad_norm: 0.8937395562309353, iteration: 295945
loss: 0.9868135452270508,grad_norm: 0.8263994151111062, iteration: 295946
loss: 0.9894186854362488,grad_norm: 0.9999990772433619, iteration: 295947
loss: 1.0198086500167847,grad_norm: 0.8706745189642445, iteration: 295948
loss: 0.9978480935096741,grad_norm: 0.901987511585048, iteration: 295949
loss: 1.038041353225708,grad_norm: 0.7468899535945983, iteration: 295950
loss: 0.9630424380302429,grad_norm: 0.8513969722078016, iteration: 295951
loss: 0.9976751208305359,grad_norm: 0.9312110425333742, iteration: 295952
loss: 0.996338427066803,grad_norm: 0.9522393601776363, iteration: 295953
loss: 0.9841172695159912,grad_norm: 0.9999991113237744, iteration: 295954
loss: 1.0022956132888794,grad_norm: 0.7581408215501324, iteration: 295955
loss: 0.9908095002174377,grad_norm: 0.999999058958719, iteration: 295956
loss: 1.049834966659546,grad_norm: 0.9999993451066703, iteration: 295957
loss: 0.9776722192764282,grad_norm: 0.9999998498971155, iteration: 295958
loss: 0.9811663031578064,grad_norm: 0.7652459609221478, iteration: 295959
loss: 0.9796623587608337,grad_norm: 0.8629527662203863, iteration: 295960
loss: 1.038090705871582,grad_norm: 0.9999999382911978, iteration: 295961
loss: 0.9961452484130859,grad_norm: 0.9198248220190417, iteration: 295962
loss: 1.009300947189331,grad_norm: 0.9999998976370031, iteration: 295963
loss: 1.0589081048965454,grad_norm: 0.9999997326888505, iteration: 295964
loss: 0.9906129837036133,grad_norm: 0.9999990649242462, iteration: 295965
loss: 1.0666857957839966,grad_norm: 0.999999080350147, iteration: 295966
loss: 0.992290735244751,grad_norm: 0.9999989995105768, iteration: 295967
loss: 0.9988623857498169,grad_norm: 0.7386094563414555, iteration: 295968
loss: 1.011871099472046,grad_norm: 0.999999027191271, iteration: 295969
loss: 1.0128110647201538,grad_norm: 0.8606514876279281, iteration: 295970
loss: 1.0088578462600708,grad_norm: 0.9261603232092288, iteration: 295971
loss: 0.9965065121650696,grad_norm: 0.9999989758053147, iteration: 295972
loss: 1.0119993686676025,grad_norm: 0.8918553437469732, iteration: 295973
loss: 0.999786913394928,grad_norm: 0.8090119744419424, iteration: 295974
loss: 1.0079344511032104,grad_norm: 0.9999995487769243, iteration: 295975
loss: 0.991658627986908,grad_norm: 0.9999995544143924, iteration: 295976
loss: 0.9889497756958008,grad_norm: 0.7427984305114692, iteration: 295977
loss: 0.9912092089653015,grad_norm: 0.9999990313453093, iteration: 295978
loss: 1.009861707687378,grad_norm: 0.7651852462099381, iteration: 295979
loss: 1.0040254592895508,grad_norm: 0.7227180713477136, iteration: 295980
loss: 1.0424928665161133,grad_norm: 0.9048493018654299, iteration: 295981
loss: 1.014256238937378,grad_norm: 0.8118281420257094, iteration: 295982
loss: 1.0426007509231567,grad_norm: 0.9421550345788131, iteration: 295983
loss: 1.0550076961517334,grad_norm: 0.9999996526153356, iteration: 295984
loss: 1.007570743560791,grad_norm: 0.8100986825287976, iteration: 295985
loss: 0.9927968382835388,grad_norm: 0.7909985310728463, iteration: 295986
loss: 0.9744442701339722,grad_norm: 0.8691432856688317, iteration: 295987
loss: 1.0078063011169434,grad_norm: 0.9724692832839286, iteration: 295988
loss: 1.0535765886306763,grad_norm: 0.9999993378632575, iteration: 295989
loss: 1.0212007761001587,grad_norm: 0.7658987048897258, iteration: 295990
loss: 1.0214849710464478,grad_norm: 0.9625886390428783, iteration: 295991
loss: 1.0389872789382935,grad_norm: 0.7865184854188445, iteration: 295992
loss: 1.0127882957458496,grad_norm: 0.8184547923252984, iteration: 295993
loss: 0.9799202084541321,grad_norm: 0.9999990434415503, iteration: 295994
loss: 1.0403504371643066,grad_norm: 0.9708427153960895, iteration: 295995
loss: 1.0399285554885864,grad_norm: 0.9039807354521361, iteration: 295996
loss: 1.0238111019134521,grad_norm: 0.9999994293320491, iteration: 295997
loss: 0.9870136976242065,grad_norm: 0.7473534568742216, iteration: 295998
loss: 1.025842308998108,grad_norm: 0.9065976689987056, iteration: 295999
loss: 1.0178285837173462,grad_norm: 0.8243535417142713, iteration: 296000
loss: 1.016768217086792,grad_norm: 0.7421965259023765, iteration: 296001
loss: 0.9988374710083008,grad_norm: 0.8587857483605408, iteration: 296002
loss: 0.9717023372650146,grad_norm: 0.9999991912047576, iteration: 296003
loss: 1.0118225812911987,grad_norm: 0.8275891746837535, iteration: 296004
loss: 1.018172025680542,grad_norm: 0.898385409993136, iteration: 296005
loss: 1.002996802330017,grad_norm: 0.832369710229985, iteration: 296006
loss: 0.9961525797843933,grad_norm: 0.7920277778424719, iteration: 296007
loss: 1.0158103704452515,grad_norm: 0.8710411617597409, iteration: 296008
loss: 1.0477678775787354,grad_norm: 0.837506496072641, iteration: 296009
loss: 1.0148175954818726,grad_norm: 0.7303059147188123, iteration: 296010
loss: 1.0316849946975708,grad_norm: 0.9999992171736368, iteration: 296011
loss: 1.0165544748306274,grad_norm: 0.9999990320287965, iteration: 296012
loss: 1.0199559926986694,grad_norm: 0.8967483421220607, iteration: 296013
loss: 0.9889852404594421,grad_norm: 0.8896069133270439, iteration: 296014
loss: 1.0317116975784302,grad_norm: 0.9928868471131927, iteration: 296015
loss: 1.022992730140686,grad_norm: 0.8195849666140566, iteration: 296016
loss: 0.9999969005584717,grad_norm: 0.8235676756672922, iteration: 296017
loss: 0.9809643626213074,grad_norm: 0.7374278514375675, iteration: 296018
loss: 0.9946075677871704,grad_norm: 0.9728712068650044, iteration: 296019
loss: 1.000824213027954,grad_norm: 0.8087520071906166, iteration: 296020
loss: 0.9893718957901001,grad_norm: 0.9292800624454177, iteration: 296021
loss: 1.0309228897094727,grad_norm: 0.7602457363167275, iteration: 296022
loss: 1.0199779272079468,grad_norm: 0.7808344410964629, iteration: 296023
loss: 0.9923098683357239,grad_norm: 0.7712799920362117, iteration: 296024
loss: 0.9944434762001038,grad_norm: 0.8686328382559892, iteration: 296025
loss: 1.0288739204406738,grad_norm: 0.8650814589600426, iteration: 296026
loss: 1.0014499425888062,grad_norm: 0.7017507373587306, iteration: 296027
loss: 1.0342373847961426,grad_norm: 0.7869595618203384, iteration: 296028
loss: 1.0131334066390991,grad_norm: 0.8213132844957433, iteration: 296029
loss: 0.9687460064888,grad_norm: 0.9311668813993867, iteration: 296030
loss: 1.0050667524337769,grad_norm: 0.8824135862872446, iteration: 296031
loss: 1.0321484804153442,grad_norm: 0.8438443543775745, iteration: 296032
loss: 1.0332669019699097,grad_norm: 0.8534505287393233, iteration: 296033
loss: 1.0282280445098877,grad_norm: 0.8634776397249917, iteration: 296034
loss: 1.0119796991348267,grad_norm: 0.8585340565738608, iteration: 296035
loss: 1.0212799310684204,grad_norm: 0.7538871224858579, iteration: 296036
loss: 0.9794291853904724,grad_norm: 0.735762698417868, iteration: 296037
loss: 0.9839828014373779,grad_norm: 0.9910991035832545, iteration: 296038
loss: 0.9968291521072388,grad_norm: 0.7491966974327842, iteration: 296039
loss: 0.991985023021698,grad_norm: 0.8864724394977794, iteration: 296040
loss: 1.0511343479156494,grad_norm: 0.8318194980986768, iteration: 296041
loss: 0.998604953289032,grad_norm: 0.8080008992580728, iteration: 296042
loss: 0.9937086701393127,grad_norm: 0.9622099858306822, iteration: 296043
loss: 1.0062520503997803,grad_norm: 0.7933543573197654, iteration: 296044
loss: 0.9833235144615173,grad_norm: 0.8920940306222092, iteration: 296045
loss: 0.984998345375061,grad_norm: 0.8408588355958546, iteration: 296046
loss: 0.9811010360717773,grad_norm: 0.6648547506652498, iteration: 296047
loss: 1.0309783220291138,grad_norm: 0.9720380963788615, iteration: 296048
loss: 0.9880565404891968,grad_norm: 0.7727960781778167, iteration: 296049
loss: 1.0210715532302856,grad_norm: 0.8688328287239458, iteration: 296050
loss: 0.9676384925842285,grad_norm: 0.779461583834188, iteration: 296051
loss: 1.0074493885040283,grad_norm: 0.7275139329406718, iteration: 296052
loss: 0.9683398008346558,grad_norm: 0.6385838786479241, iteration: 296053
loss: 0.9754164218902588,grad_norm: 0.8008509594120325, iteration: 296054
loss: 0.9946485757827759,grad_norm: 0.9999990441652806, iteration: 296055
loss: 0.994392991065979,grad_norm: 0.9059951685481441, iteration: 296056
loss: 0.982864499092102,grad_norm: 0.9999991950024829, iteration: 296057
loss: 0.9899251461029053,grad_norm: 0.9128738669936645, iteration: 296058
loss: 1.0650649070739746,grad_norm: 0.9999995126417305, iteration: 296059
loss: 1.0039312839508057,grad_norm: 0.9999991327420794, iteration: 296060
loss: 1.001275897026062,grad_norm: 0.7538405423601409, iteration: 296061
loss: 0.9822297692298889,grad_norm: 0.9999992479758542, iteration: 296062
loss: 1.0169899463653564,grad_norm: 0.9043084376615661, iteration: 296063
loss: 1.0144838094711304,grad_norm: 0.8652445896341425, iteration: 296064
loss: 0.9547407031059265,grad_norm: 0.9551318904558345, iteration: 296065
loss: 0.9834814071655273,grad_norm: 0.8832281455310542, iteration: 296066
loss: 0.9983529448509216,grad_norm: 0.7723289554928657, iteration: 296067
loss: 1.016054630279541,grad_norm: 0.9309758267198561, iteration: 296068
loss: 0.9600017666816711,grad_norm: 0.9287280966838223, iteration: 296069
loss: 0.9951010942459106,grad_norm: 0.8168734262986854, iteration: 296070
loss: 1.0325871706008911,grad_norm: 0.9999991975697974, iteration: 296071
loss: 0.9592164158821106,grad_norm: 0.9999994693346032, iteration: 296072
loss: 0.9873366355895996,grad_norm: 0.6861750106314527, iteration: 296073
loss: 1.0157241821289062,grad_norm: 0.9969526376184449, iteration: 296074
loss: 1.0345559120178223,grad_norm: 0.9999998016890118, iteration: 296075
loss: 1.0143853425979614,grad_norm: 0.999999106178224, iteration: 296076
loss: 0.9993346333503723,grad_norm: 0.890240685397912, iteration: 296077
loss: 1.0117831230163574,grad_norm: 0.8575203902154189, iteration: 296078
loss: 0.9773004651069641,grad_norm: 0.786243662970564, iteration: 296079
loss: 0.9816296100616455,grad_norm: 0.9999990218335253, iteration: 296080
loss: 0.9859558939933777,grad_norm: 0.9999998058868256, iteration: 296081
loss: 0.9859002828598022,grad_norm: 0.783760775911168, iteration: 296082
loss: 0.993769645690918,grad_norm: 0.9999991864299137, iteration: 296083
loss: 1.04946768283844,grad_norm: 0.9442739500531264, iteration: 296084
loss: 1.0101821422576904,grad_norm: 0.9999990151895377, iteration: 296085
loss: 0.9991767406463623,grad_norm: 0.9610932830145117, iteration: 296086
loss: 0.978556752204895,grad_norm: 0.8468430133949065, iteration: 296087
loss: 1.0145403146743774,grad_norm: 0.7558508725769401, iteration: 296088
loss: 1.0185037851333618,grad_norm: 0.7786480568904882, iteration: 296089
loss: 0.9890890717506409,grad_norm: 0.8691663619958455, iteration: 296090
loss: 1.055633306503296,grad_norm: 0.9999996214558553, iteration: 296091
loss: 1.0267106294631958,grad_norm: 0.8593190337568418, iteration: 296092
loss: 0.9720790982246399,grad_norm: 0.9792812056679958, iteration: 296093
loss: 0.9948544502258301,grad_norm: 0.8798366326094296, iteration: 296094
loss: 0.9789389967918396,grad_norm: 0.8610887992284681, iteration: 296095
loss: 1.0257625579833984,grad_norm: 0.8236674705530104, iteration: 296096
loss: 1.0156117677688599,grad_norm: 0.9956463956316594, iteration: 296097
loss: 1.0220062732696533,grad_norm: 0.7623014695625872, iteration: 296098
loss: 1.0688093900680542,grad_norm: 0.9999994061241432, iteration: 296099
loss: 1.0099858045578003,grad_norm: 0.7961355903447552, iteration: 296100
loss: 1.1005196571350098,grad_norm: 0.999999754742617, iteration: 296101
loss: 0.9930511116981506,grad_norm: 0.9999989527558203, iteration: 296102
loss: 0.9806520342826843,grad_norm: 0.8892017105865281, iteration: 296103
loss: 1.003151535987854,grad_norm: 0.9610777338900153, iteration: 296104
loss: 1.014527440071106,grad_norm: 0.7960805622461847, iteration: 296105
loss: 1.022089958190918,grad_norm: 0.9124873312744141, iteration: 296106
loss: 1.0296845436096191,grad_norm: 0.8715658638156121, iteration: 296107
loss: 0.9775456786155701,grad_norm: 0.8677527093610669, iteration: 296108
loss: 0.9880800843238831,grad_norm: 0.8429820835234006, iteration: 296109
loss: 1.0010513067245483,grad_norm: 0.9846067224010379, iteration: 296110
loss: 0.9906531572341919,grad_norm: 0.7874436031494391, iteration: 296111
loss: 1.0000009536743164,grad_norm: 0.8777743988521494, iteration: 296112
loss: 0.9852036833763123,grad_norm: 0.8107539526329475, iteration: 296113
loss: 0.9726282954216003,grad_norm: 0.9999997536172758, iteration: 296114
loss: 0.9976967573165894,grad_norm: 0.7934753132215363, iteration: 296115
loss: 1.0107173919677734,grad_norm: 0.8933281720868363, iteration: 296116
loss: 1.0023208856582642,grad_norm: 0.8821982597273967, iteration: 296117
loss: 1.0203380584716797,grad_norm: 0.9315583559793971, iteration: 296118
loss: 0.9645448923110962,grad_norm: 0.9261364396749725, iteration: 296119
loss: 0.9808788299560547,grad_norm: 0.7090483820581999, iteration: 296120
loss: 1.0049415826797485,grad_norm: 0.8172551368211314, iteration: 296121
loss: 0.9461647272109985,grad_norm: 0.9283581789066111, iteration: 296122
loss: 1.015439748764038,grad_norm: 0.7435432734505473, iteration: 296123
loss: 1.0209285020828247,grad_norm: 0.9999991487792598, iteration: 296124
loss: 1.0028421878814697,grad_norm: 0.9533537140578683, iteration: 296125
loss: 0.9782187342643738,grad_norm: 0.9003826404959531, iteration: 296126
loss: 0.9852393865585327,grad_norm: 0.9999996404240806, iteration: 296127
loss: 0.9739788174629211,grad_norm: 0.9295195844571682, iteration: 296128
loss: 1.0012325048446655,grad_norm: 0.8290691362346452, iteration: 296129
loss: 0.9836854934692383,grad_norm: 0.9999990266525909, iteration: 296130
loss: 1.011799693107605,grad_norm: 0.9646297453848797, iteration: 296131
loss: 0.9966349601745605,grad_norm: 0.8108370036186666, iteration: 296132
loss: 0.9946602582931519,grad_norm: 0.7220520551662847, iteration: 296133
loss: 0.9946109652519226,grad_norm: 0.8469053735704095, iteration: 296134
loss: 1.0184204578399658,grad_norm: 0.8622917478056694, iteration: 296135
loss: 1.0751646757125854,grad_norm: 0.9999995152691694, iteration: 296136
loss: 0.9846261739730835,grad_norm: 0.8971963374103812, iteration: 296137
loss: 1.0256078243255615,grad_norm: 0.7861979744630012, iteration: 296138
loss: 1.0732659101486206,grad_norm: 0.9999992133630428, iteration: 296139
loss: 0.972808837890625,grad_norm: 0.9999990805942697, iteration: 296140
loss: 1.0952774286270142,grad_norm: 0.9999997286378757, iteration: 296141
loss: 1.0135070085525513,grad_norm: 0.8841276471758394, iteration: 296142
loss: 0.989348828792572,grad_norm: 0.9673979560774819, iteration: 296143
loss: 1.019626498222351,grad_norm: 0.7097731324947945, iteration: 296144
loss: 1.0259543657302856,grad_norm: 0.9169423129007691, iteration: 296145
loss: 0.9968236684799194,grad_norm: 0.9089786144619942, iteration: 296146
loss: 1.0272842645645142,grad_norm: 0.9999991103082613, iteration: 296147
loss: 1.0622243881225586,grad_norm: 0.9999992428104574, iteration: 296148
loss: 1.0151381492614746,grad_norm: 0.9999995459734825, iteration: 296149
loss: 1.0001360177993774,grad_norm: 0.9999989198004944, iteration: 296150
loss: 1.0184940099716187,grad_norm: 0.8073636514283218, iteration: 296151
loss: 0.9776216149330139,grad_norm: 0.8017211055548503, iteration: 296152
loss: 0.9995259046554565,grad_norm: 0.9999990331012404, iteration: 296153
loss: 0.9360066652297974,grad_norm: 0.8966696070688839, iteration: 296154
loss: 0.9697477221488953,grad_norm: 0.7174022574901789, iteration: 296155
loss: 0.9981822967529297,grad_norm: 0.7773641384053932, iteration: 296156
loss: 1.0071762800216675,grad_norm: 0.9999991392953032, iteration: 296157
loss: 0.9739547371864319,grad_norm: 0.8197011894221703, iteration: 296158
loss: 1.004213809967041,grad_norm: 0.9999990435983136, iteration: 296159
loss: 1.0254048109054565,grad_norm: 0.8299684170106129, iteration: 296160
loss: 1.0246573686599731,grad_norm: 0.994184467445113, iteration: 296161
loss: 1.0180989503860474,grad_norm: 0.9999991754555991, iteration: 296162
loss: 0.981440007686615,grad_norm: 0.87646008537987, iteration: 296163
loss: 1.0004971027374268,grad_norm: 0.7946696885021268, iteration: 296164
loss: 1.0238511562347412,grad_norm: 0.8057158621191367, iteration: 296165
loss: 1.0143450498580933,grad_norm: 0.9229192321886734, iteration: 296166
loss: 1.0412522554397583,grad_norm: 0.9999997807145652, iteration: 296167
loss: 0.9823174476623535,grad_norm: 0.8109573773704623, iteration: 296168
loss: 1.022424340248108,grad_norm: 0.8692546056417666, iteration: 296169
loss: 1.0019761323928833,grad_norm: 0.6839735011989634, iteration: 296170
loss: 1.0132962465286255,grad_norm: 0.8523673606840518, iteration: 296171
loss: 1.0040686130523682,grad_norm: 0.7568340206587959, iteration: 296172
loss: 1.0101584196090698,grad_norm: 0.8628964642071965, iteration: 296173
loss: 1.0684025287628174,grad_norm: 0.9120817855146633, iteration: 296174
loss: 0.9988667368888855,grad_norm: 0.9999989519251142, iteration: 296175
loss: 1.0279669761657715,grad_norm: 0.9944361450290822, iteration: 296176
loss: 0.984935462474823,grad_norm: 0.9999989700421473, iteration: 296177
loss: 1.0562318563461304,grad_norm: 0.999999380529786, iteration: 296178
loss: 1.052229404449463,grad_norm: 0.999999693603158, iteration: 296179
loss: 0.9813475012779236,grad_norm: 0.8213476613076895, iteration: 296180
loss: 1.0132743120193481,grad_norm: 0.816246641311667, iteration: 296181
loss: 0.9833145141601562,grad_norm: 0.8592144805693804, iteration: 296182
loss: 0.990198016166687,grad_norm: 0.8198736061183242, iteration: 296183
loss: 0.9510735273361206,grad_norm: 0.9480095642182195, iteration: 296184
loss: 1.0958343744277954,grad_norm: 0.9000933820166954, iteration: 296185
loss: 0.997968316078186,grad_norm: 0.9906352651539045, iteration: 296186
loss: 1.0248844623565674,grad_norm: 0.8078127172454485, iteration: 296187
loss: 0.9466850161552429,grad_norm: 0.8933678670895315, iteration: 296188
loss: 0.97201007604599,grad_norm: 0.9176967103246486, iteration: 296189
loss: 0.9691447615623474,grad_norm: 0.8770042883273579, iteration: 296190
loss: 0.9669125080108643,grad_norm: 0.9999991565217288, iteration: 296191
loss: 1.0154051780700684,grad_norm: 0.9999990209597167, iteration: 296192
loss: 1.0355435609817505,grad_norm: 0.9999999172381648, iteration: 296193
loss: 0.9954232573509216,grad_norm: 0.8757763676526896, iteration: 296194
loss: 1.0093141794204712,grad_norm: 0.910565822555186, iteration: 296195
loss: 1.0165462493896484,grad_norm: 0.9999990397015052, iteration: 296196
loss: 0.97845858335495,grad_norm: 0.7947027331861531, iteration: 296197
loss: 0.9836738705635071,grad_norm: 0.7902829425147225, iteration: 296198
loss: 0.9904649257659912,grad_norm: 0.950558295390795, iteration: 296199
loss: 0.9689931869506836,grad_norm: 0.9999990853674221, iteration: 296200
loss: 0.967569887638092,grad_norm: 0.7362454723516031, iteration: 296201
loss: 0.9850870370864868,grad_norm: 0.9674942349617826, iteration: 296202
loss: 1.0019102096557617,grad_norm: 0.7795066811839655, iteration: 296203
loss: 1.0130505561828613,grad_norm: 0.7276903263534201, iteration: 296204
loss: 1.0022493600845337,grad_norm: 0.8510916980404296, iteration: 296205
loss: 0.9714056849479675,grad_norm: 0.8678552087123479, iteration: 296206
loss: 1.0087555646896362,grad_norm: 0.8276113217053618, iteration: 296207
loss: 1.0037155151367188,grad_norm: 0.9342997191188475, iteration: 296208
loss: 1.0019159317016602,grad_norm: 0.8849089685954936, iteration: 296209
loss: 1.0428667068481445,grad_norm: 0.8944866450821748, iteration: 296210
loss: 1.0132811069488525,grad_norm: 0.8534311401249245, iteration: 296211
loss: 1.007285714149475,grad_norm: 0.9091602310078565, iteration: 296212
loss: 0.9976362586021423,grad_norm: 0.8045235410703082, iteration: 296213
loss: 0.9882466793060303,grad_norm: 0.9206346671047667, iteration: 296214
loss: 0.9758203625679016,grad_norm: 0.9668508994801752, iteration: 296215
loss: 0.9665781259536743,grad_norm: 0.8889363786359121, iteration: 296216
loss: 0.9986241459846497,grad_norm: 0.8697970091875848, iteration: 296217
loss: 0.9872570633888245,grad_norm: 0.7869858244196875, iteration: 296218
loss: 0.9699172973632812,grad_norm: 0.8228582634128901, iteration: 296219
loss: 1.0349351167678833,grad_norm: 0.996944439030011, iteration: 296220
loss: 0.9926301836967468,grad_norm: 0.826866606530046, iteration: 296221
loss: 1.0241990089416504,grad_norm: 0.8661594540925036, iteration: 296222
loss: 1.0010151863098145,grad_norm: 0.8987954268153251, iteration: 296223
loss: 0.9516863226890564,grad_norm: 0.8179284230877732, iteration: 296224
loss: 1.0447111129760742,grad_norm: 0.8059142537877035, iteration: 296225
loss: 1.0050278902053833,grad_norm: 0.8494447340458698, iteration: 296226
loss: 1.047107458114624,grad_norm: 0.9999990074645283, iteration: 296227
loss: 1.0021575689315796,grad_norm: 0.9999992285570493, iteration: 296228
loss: 1.006363034248352,grad_norm: 0.9710336740304457, iteration: 296229
loss: 0.9631116986274719,grad_norm: 0.9999990688141546, iteration: 296230
loss: 1.0318368673324585,grad_norm: 0.999999121391901, iteration: 296231
loss: 0.9749186038970947,grad_norm: 0.9999990929189332, iteration: 296232
loss: 1.0122365951538086,grad_norm: 0.8282127355112239, iteration: 296233
loss: 1.0078864097595215,grad_norm: 0.8363903282320945, iteration: 296234
loss: 1.0264701843261719,grad_norm: 0.9999992672409641, iteration: 296235
loss: 1.1810075044631958,grad_norm: 0.9999995283104451, iteration: 296236
loss: 0.9890458583831787,grad_norm: 0.9999989735464537, iteration: 296237
loss: 0.9823550581932068,grad_norm: 0.7652682603195361, iteration: 296238
loss: 1.122115969657898,grad_norm: 0.999999448134532, iteration: 296239
loss: 1.005397081375122,grad_norm: 0.66946658252911, iteration: 296240
loss: 0.9798054099082947,grad_norm: 0.8456105528045311, iteration: 296241
loss: 1.0111384391784668,grad_norm: 0.769305749283311, iteration: 296242
loss: 1.0130168199539185,grad_norm: 0.8571186129080997, iteration: 296243
loss: 1.0033748149871826,grad_norm: 0.7928374337351402, iteration: 296244
loss: 0.9921136498451233,grad_norm: 0.9999992814111794, iteration: 296245
loss: 1.0172135829925537,grad_norm: 0.9999995396323433, iteration: 296246
loss: 0.9834393858909607,grad_norm: 0.9166466173192717, iteration: 296247
loss: 1.0126203298568726,grad_norm: 0.915258450508804, iteration: 296248
loss: 1.001423716545105,grad_norm: 0.677092412315037, iteration: 296249
loss: 1.0618915557861328,grad_norm: 0.9649332528858653, iteration: 296250
loss: 1.0283169746398926,grad_norm: 0.7856655809091226, iteration: 296251
loss: 0.989707350730896,grad_norm: 0.9999996851710707, iteration: 296252
loss: 0.9742203950881958,grad_norm: 0.9999990125660774, iteration: 296253
loss: 0.9715455174446106,grad_norm: 0.7393061960660392, iteration: 296254
loss: 0.9648602604866028,grad_norm: 0.9999991407720907, iteration: 296255
loss: 1.2011862993240356,grad_norm: 0.9999995569807916, iteration: 296256
loss: 1.0464221239089966,grad_norm: 0.7716697251835529, iteration: 296257
loss: 0.9616423845291138,grad_norm: 0.9634189279645876, iteration: 296258
loss: 1.0668036937713623,grad_norm: 0.9999994317304561, iteration: 296259
loss: 1.0143176317214966,grad_norm: 0.9202457821954378, iteration: 296260
loss: 0.9902169108390808,grad_norm: 0.9999990946155488, iteration: 296261
loss: 0.9894565939903259,grad_norm: 0.7660138808781815, iteration: 296262
loss: 1.0236982107162476,grad_norm: 0.7301830792428258, iteration: 296263
loss: 1.0084424018859863,grad_norm: 0.827973803895194, iteration: 296264
loss: 0.9942705631256104,grad_norm: 0.999999287959753, iteration: 296265
loss: 1.0506188869476318,grad_norm: 0.9379747696145104, iteration: 296266
loss: 1.0329313278198242,grad_norm: 0.8924886700389216, iteration: 296267
loss: 1.0746746063232422,grad_norm: 0.9046539407140443, iteration: 296268
loss: 1.0154460668563843,grad_norm: 0.8370309608391694, iteration: 296269
loss: 1.0255508422851562,grad_norm: 0.9942941534469967, iteration: 296270
loss: 0.9995140433311462,grad_norm: 0.9047550075698181, iteration: 296271
loss: 1.0208802223205566,grad_norm: 0.8807873142784323, iteration: 296272
loss: 1.014924168586731,grad_norm: 0.9085350552929843, iteration: 296273
loss: 0.9905251264572144,grad_norm: 0.8101766343845916, iteration: 296274
loss: 1.028310775756836,grad_norm: 0.9837674520558733, iteration: 296275
loss: 1.0075147151947021,grad_norm: 0.7972856615991938, iteration: 296276
loss: 1.0150814056396484,grad_norm: 0.9999990810519821, iteration: 296277
loss: 0.9767183065414429,grad_norm: 0.7795745975465173, iteration: 296278
loss: 1.150343418121338,grad_norm: 0.999999966753476, iteration: 296279
loss: 0.9882657527923584,grad_norm: 0.8478619221493087, iteration: 296280
loss: 1.0725198984146118,grad_norm: 0.9999998136787694, iteration: 296281
loss: 1.024924397468567,grad_norm: 0.9139148518594803, iteration: 296282
loss: 0.9991781115531921,grad_norm: 0.8173250498141436, iteration: 296283
loss: 1.0080814361572266,grad_norm: 0.6667805012739262, iteration: 296284
loss: 0.9799509048461914,grad_norm: 0.7612780249683024, iteration: 296285
loss: 1.027281403541565,grad_norm: 0.7781731928928788, iteration: 296286
loss: 1.0078856945037842,grad_norm: 0.856750229054207, iteration: 296287
loss: 1.0577540397644043,grad_norm: 0.9999991099514604, iteration: 296288
loss: 0.9794286489486694,grad_norm: 0.8667124945698607, iteration: 296289
loss: 1.0018559694290161,grad_norm: 0.9613742969281442, iteration: 296290
loss: 0.983790397644043,grad_norm: 0.7910051747176831, iteration: 296291
loss: 0.9952548742294312,grad_norm: 0.7382653314361275, iteration: 296292
loss: 1.034590244293213,grad_norm: 0.9999990816141702, iteration: 296293
loss: 1.0243470668792725,grad_norm: 0.999999044997311, iteration: 296294
loss: 0.9698076844215393,grad_norm: 0.9191337374255055, iteration: 296295
loss: 1.015893816947937,grad_norm: 0.9939948054979426, iteration: 296296
loss: 0.9935852289199829,grad_norm: 0.755026575225527, iteration: 296297
loss: 0.9921855926513672,grad_norm: 0.958617648198432, iteration: 296298
loss: 1.0227680206298828,grad_norm: 0.9388908333962414, iteration: 296299
loss: 1.0066657066345215,grad_norm: 0.79509159027994, iteration: 296300
loss: 0.9504684805870056,grad_norm: 0.7695177285416375, iteration: 296301
loss: 1.0009123086929321,grad_norm: 0.8734737897417016, iteration: 296302
loss: 0.9716431498527527,grad_norm: 0.7266030216314356, iteration: 296303
loss: 0.9845027327537537,grad_norm: 0.9916509555417717, iteration: 296304
loss: 0.9951031804084778,grad_norm: 0.885485822916825, iteration: 296305
loss: 0.998379647731781,grad_norm: 0.9169518675586491, iteration: 296306
loss: 1.051640272140503,grad_norm: 0.7835465082999092, iteration: 296307
loss: 0.9735668301582336,grad_norm: 0.9999991619025896, iteration: 296308
loss: 1.266416072845459,grad_norm: 0.999999750986537, iteration: 296309
loss: 0.9954314827919006,grad_norm: 0.9378791337052895, iteration: 296310
loss: 1.0264002084732056,grad_norm: 0.9999995027044435, iteration: 296311
loss: 1.0391634702682495,grad_norm: 0.8680586504730499, iteration: 296312
loss: 1.0481969118118286,grad_norm: 1.000000017646601, iteration: 296313
loss: 1.0126068592071533,grad_norm: 0.905339346516303, iteration: 296314
loss: 0.9668986797332764,grad_norm: 0.7413906051773026, iteration: 296315
loss: 1.0094515085220337,grad_norm: 0.8439935188190144, iteration: 296316
loss: 1.0418401956558228,grad_norm: 0.9570731226860885, iteration: 296317
loss: 1.0267093181610107,grad_norm: 0.8444264897376835, iteration: 296318
loss: 1.0044924020767212,grad_norm: 0.9999990966615654, iteration: 296319
loss: 1.01181161403656,grad_norm: 0.8872882422127469, iteration: 296320
loss: 1.0616949796676636,grad_norm: 0.7875141029472323, iteration: 296321
loss: 0.9980614185333252,grad_norm: 0.7859351636737785, iteration: 296322
loss: 0.9958090782165527,grad_norm: 0.8561580385207089, iteration: 296323
loss: 0.998776376247406,grad_norm: 0.9253402965957651, iteration: 296324
loss: 1.0271470546722412,grad_norm: 0.9009817606098982, iteration: 296325
loss: 0.9564636945724487,grad_norm: 0.8001503641849904, iteration: 296326
loss: 1.0107005834579468,grad_norm: 0.7707850722325851, iteration: 296327
loss: 0.9872020483016968,grad_norm: 0.8842942217625871, iteration: 296328
loss: 1.0153710842132568,grad_norm: 0.7771220375087693, iteration: 296329
loss: 0.9859863519668579,grad_norm: 0.7647035892776685, iteration: 296330
loss: 1.018672227859497,grad_norm: 0.9082057238961215, iteration: 296331
loss: 1.0521663427352905,grad_norm: 0.9999997332480919, iteration: 296332
loss: 0.9864698052406311,grad_norm: 0.999999936225431, iteration: 296333
loss: 1.0157442092895508,grad_norm: 0.8914053250676773, iteration: 296334
loss: 1.0024956464767456,grad_norm: 0.9999990357284723, iteration: 296335
loss: 1.0008488893508911,grad_norm: 0.8478764238725529, iteration: 296336
loss: 1.0484226942062378,grad_norm: 0.9892843878431351, iteration: 296337
loss: 1.0011979341506958,grad_norm: 0.9604635587453204, iteration: 296338
loss: 1.0025299787521362,grad_norm: 0.9088165991816393, iteration: 296339
loss: 1.0353589057922363,grad_norm: 0.6602626058651055, iteration: 296340
loss: 1.1126402616500854,grad_norm: 0.999999218249222, iteration: 296341
loss: 0.9937006235122681,grad_norm: 0.8768501151204111, iteration: 296342
loss: 1.0013282299041748,grad_norm: 0.9999992490329497, iteration: 296343
loss: 0.9892062544822693,grad_norm: 0.8332423083014011, iteration: 296344
loss: 0.9588674306869507,grad_norm: 0.9999992950398895, iteration: 296345
loss: 0.9769337177276611,grad_norm: 0.778493946581252, iteration: 296346
loss: 1.0339974164962769,grad_norm: 0.8397599687491643, iteration: 296347
loss: 0.9571114778518677,grad_norm: 0.7960025343636298, iteration: 296348
loss: 0.9840418100357056,grad_norm: 0.9999996640685554, iteration: 296349
loss: 1.0177254676818848,grad_norm: 0.9999989971070393, iteration: 296350
loss: 0.9568296074867249,grad_norm: 0.7867524693692007, iteration: 296351
loss: 1.0129106044769287,grad_norm: 0.9148769620558455, iteration: 296352
loss: 1.0090148448944092,grad_norm: 0.8395733648564432, iteration: 296353
loss: 0.9749636650085449,grad_norm: 0.9999990906802123, iteration: 296354
loss: 1.0558220148086548,grad_norm: 0.9468044652970669, iteration: 296355
loss: 1.0232974290847778,grad_norm: 0.7630637769022808, iteration: 296356
loss: 1.0230201482772827,grad_norm: 0.9999991655693322, iteration: 296357
loss: 1.0027822256088257,grad_norm: 0.7818500741235668, iteration: 296358
loss: 1.01571786403656,grad_norm: 0.9453861218213391, iteration: 296359
loss: 0.988893985748291,grad_norm: 0.9470565070071545, iteration: 296360
loss: 1.002870798110962,grad_norm: 0.9999992797489219, iteration: 296361
loss: 0.9870350360870361,grad_norm: 0.8322632748957798, iteration: 296362
loss: 0.974536657333374,grad_norm: 0.7083111501180979, iteration: 296363
loss: 0.9840821027755737,grad_norm: 0.845369550456176, iteration: 296364
loss: 1.0029267072677612,grad_norm: 0.7264985207267121, iteration: 296365
loss: 1.0194628238677979,grad_norm: 0.941546647579209, iteration: 296366
loss: 0.9712476134300232,grad_norm: 0.8670139951812669, iteration: 296367
loss: 1.025811791419983,grad_norm: 0.7969005363324193, iteration: 296368
loss: 1.0127495527267456,grad_norm: 0.9999990788118092, iteration: 296369
loss: 0.9549962282180786,grad_norm: 0.9621476442935074, iteration: 296370
loss: 0.9917165637016296,grad_norm: 0.730776751448538, iteration: 296371
loss: 0.9991900324821472,grad_norm: 0.9999990449504641, iteration: 296372
loss: 1.01601243019104,grad_norm: 0.8370884004449171, iteration: 296373
loss: 0.9967562556266785,grad_norm: 0.798564173326997, iteration: 296374
loss: 1.0100356340408325,grad_norm: 0.9999991871461527, iteration: 296375
loss: 1.0235247611999512,grad_norm: 0.9999989970042705, iteration: 296376
loss: 1.0117268562316895,grad_norm: 0.8621104326022199, iteration: 296377
loss: 0.9971959590911865,grad_norm: 0.999999940612497, iteration: 296378
loss: 0.9994401931762695,grad_norm: 0.8100999203415724, iteration: 296379
loss: 1.0005799531936646,grad_norm: 0.9999990731749874, iteration: 296380
loss: 1.0138641595840454,grad_norm: 0.9999991018893624, iteration: 296381
loss: 1.132533073425293,grad_norm: 0.8938074031974355, iteration: 296382
loss: 0.9937874674797058,grad_norm: 0.8516991708339129, iteration: 296383
loss: 0.9791135191917419,grad_norm: 0.9264394729436258, iteration: 296384
loss: 0.9820837378501892,grad_norm: 0.763943042238916, iteration: 296385
loss: 1.0591837167739868,grad_norm: 0.9918437648339189, iteration: 296386
loss: 1.0079232454299927,grad_norm: 0.762392146681574, iteration: 296387
loss: 0.9757505655288696,grad_norm: 0.7669871146891117, iteration: 296388
loss: 0.9692872166633606,grad_norm: 0.9195251707620634, iteration: 296389
loss: 1.033560872077942,grad_norm: 0.9999991397137393, iteration: 296390
loss: 1.0442503690719604,grad_norm: 0.8050601391951026, iteration: 296391
loss: 0.9922715425491333,grad_norm: 0.910003750437999, iteration: 296392
loss: 1.006463885307312,grad_norm: 0.8967841264560616, iteration: 296393
loss: 1.0258219242095947,grad_norm: 0.8090211017104512, iteration: 296394
loss: 0.9875330924987793,grad_norm: 0.8832823209913916, iteration: 296395
loss: 0.9925234913825989,grad_norm: 0.8899641627472787, iteration: 296396
loss: 1.0177870988845825,grad_norm: 0.9999996641468585, iteration: 296397
loss: 0.9811605215072632,grad_norm: 0.8682841043346071, iteration: 296398
loss: 1.166573405265808,grad_norm: 0.9999993193745715, iteration: 296399
loss: 1.0035951137542725,grad_norm: 0.9795293806555292, iteration: 296400
loss: 1.000906229019165,grad_norm: 0.7794600447452167, iteration: 296401
loss: 1.0224062204360962,grad_norm: 0.933034651719084, iteration: 296402
loss: 0.9958043694496155,grad_norm: 0.989958022726329, iteration: 296403
loss: 1.0071786642074585,grad_norm: 0.9654634383883484, iteration: 296404
loss: 0.9736130237579346,grad_norm: 0.8526429046472137, iteration: 296405
loss: 1.0217574834823608,grad_norm: 0.9196259755388414, iteration: 296406
loss: 0.9905142784118652,grad_norm: 0.8303300267462034, iteration: 296407
loss: 1.0209773778915405,grad_norm: 0.7871798427066204, iteration: 296408
loss: 0.9994708895683289,grad_norm: 0.7702772344718628, iteration: 296409
loss: 0.9837175607681274,grad_norm: 0.8344725519001702, iteration: 296410
loss: 0.9696412086486816,grad_norm: 0.7472497458619611, iteration: 296411
loss: 1.0314236879348755,grad_norm: 0.8900227931303218, iteration: 296412
loss: 0.9835434556007385,grad_norm: 0.9169011073282782, iteration: 296413
loss: 1.0175763368606567,grad_norm: 0.9999996663382126, iteration: 296414
loss: 0.9789250493049622,grad_norm: 0.9177960375650629, iteration: 296415
loss: 0.9806841015815735,grad_norm: 0.793613737763301, iteration: 296416
loss: 0.9937758445739746,grad_norm: 0.8511376779476771, iteration: 296417
loss: 1.024368166923523,grad_norm: 0.9220288195094758, iteration: 296418
loss: 0.9882424473762512,grad_norm: 0.8611425283577161, iteration: 296419
loss: 0.9776421785354614,grad_norm: 0.9317277218306897, iteration: 296420
loss: 1.0011245012283325,grad_norm: 0.8472364753706415, iteration: 296421
loss: 0.9751670956611633,grad_norm: 0.7452168734625111, iteration: 296422
loss: 1.057591199874878,grad_norm: 0.8462499103305385, iteration: 296423
loss: 1.0880987644195557,grad_norm: 0.9999990366673077, iteration: 296424
loss: 0.9946044683456421,grad_norm: 0.8041148266137812, iteration: 296425
loss: 1.002351999282837,grad_norm: 0.7783721793232626, iteration: 296426
loss: 1.0071241855621338,grad_norm: 0.911446276324214, iteration: 296427
loss: 0.9822052717208862,grad_norm: 0.8508028824558141, iteration: 296428
loss: 1.027306318283081,grad_norm: 0.9999993945789087, iteration: 296429
loss: 1.0305176973342896,grad_norm: 0.9936439892626803, iteration: 296430
loss: 0.9883570671081543,grad_norm: 0.9412331920567366, iteration: 296431
loss: 1.0180895328521729,grad_norm: 0.9999999894765733, iteration: 296432
loss: 1.0100131034851074,grad_norm: 0.796065703983011, iteration: 296433
loss: 0.9764341711997986,grad_norm: 0.7904583396455608, iteration: 296434
loss: 0.9859210252761841,grad_norm: 0.9284508775737205, iteration: 296435
loss: 0.969730019569397,grad_norm: 0.8305980370105243, iteration: 296436
loss: 1.079910159111023,grad_norm: 0.8701256884359075, iteration: 296437
loss: 1.000396966934204,grad_norm: 0.9999998986761499, iteration: 296438
loss: 0.981992244720459,grad_norm: 0.7696685433907638, iteration: 296439
loss: 0.9938559532165527,grad_norm: 0.9801574043514529, iteration: 296440
loss: 0.9956498146057129,grad_norm: 0.7934710432000275, iteration: 296441
loss: 0.991218626499176,grad_norm: 0.6707997596042196, iteration: 296442
loss: 1.0184154510498047,grad_norm: 0.9950334049029886, iteration: 296443
loss: 1.0137909650802612,grad_norm: 0.8970438071522916, iteration: 296444
loss: 0.9719370603561401,grad_norm: 0.9250174680218773, iteration: 296445
loss: 1.0168603658676147,grad_norm: 0.9999989061205717, iteration: 296446
loss: 0.9697074294090271,grad_norm: 0.8441647338513352, iteration: 296447
loss: 1.0283170938491821,grad_norm: 0.999999542000509, iteration: 296448
loss: 0.9686155915260315,grad_norm: 0.8330529034234911, iteration: 296449
loss: 1.008608102798462,grad_norm: 0.8459113091205231, iteration: 296450
loss: 1.01425039768219,grad_norm: 0.9999991144999999, iteration: 296451
loss: 0.9751279950141907,grad_norm: 0.8429421409676324, iteration: 296452
loss: 1.0231759548187256,grad_norm: 0.8397006437909416, iteration: 296453
loss: 1.060394048690796,grad_norm: 0.9999992979963447, iteration: 296454
loss: 0.99142986536026,grad_norm: 0.8287365493462038, iteration: 296455
loss: 1.0290621519088745,grad_norm: 0.8151195765481896, iteration: 296456
loss: 0.9892483353614807,grad_norm: 0.9450535073910623, iteration: 296457
loss: 1.0316517353057861,grad_norm: 0.871135427195797, iteration: 296458
loss: 1.0277388095855713,grad_norm: 0.9999991718110068, iteration: 296459
loss: 1.0162590742111206,grad_norm: 0.8318963239400028, iteration: 296460
loss: 0.9838235378265381,grad_norm: 0.9854864158916958, iteration: 296461
loss: 0.9974522590637207,grad_norm: 0.9999991690231036, iteration: 296462
loss: 1.015680193901062,grad_norm: 0.9932889451380669, iteration: 296463
loss: 0.9932584166526794,grad_norm: 0.8385846632352497, iteration: 296464
loss: 1.0127818584442139,grad_norm: 0.9344581258819948, iteration: 296465
loss: 1.0106101036071777,grad_norm: 0.852999144123312, iteration: 296466
loss: 0.9609283208847046,grad_norm: 0.8704040002531165, iteration: 296467
loss: 1.0002501010894775,grad_norm: 0.8686313553244971, iteration: 296468
loss: 1.165381669998169,grad_norm: 0.9999994018064428, iteration: 296469
loss: 0.9772701263427734,grad_norm: 0.741933893709646, iteration: 296470
loss: 1.0237772464752197,grad_norm: 0.9999992927006164, iteration: 296471
loss: 0.966803789138794,grad_norm: 0.800306322574859, iteration: 296472
loss: 0.9684191942214966,grad_norm: 0.8269742082746904, iteration: 296473
loss: 1.0647289752960205,grad_norm: 0.9999991823687759, iteration: 296474
loss: 0.9907976388931274,grad_norm: 0.9714927837376133, iteration: 296475
loss: 0.9766106605529785,grad_norm: 0.8478219923443601, iteration: 296476
loss: 0.9941798448562622,grad_norm: 0.743169740425853, iteration: 296477
loss: 1.011073350906372,grad_norm: 0.8866500843146787, iteration: 296478
loss: 1.0135552883148193,grad_norm: 0.7796094780127609, iteration: 296479
loss: 0.9848676919937134,grad_norm: 0.8627479756131446, iteration: 296480
loss: 0.9797077178955078,grad_norm: 0.7944694739706353, iteration: 296481
loss: 1.0340592861175537,grad_norm: 0.905147685958031, iteration: 296482
loss: 1.0085420608520508,grad_norm: 0.9664471518774341, iteration: 296483
loss: 1.0260576009750366,grad_norm: 0.8327731325975166, iteration: 296484
loss: 1.0293813943862915,grad_norm: 0.9999990565803463, iteration: 296485
loss: 1.006184458732605,grad_norm: 0.9210096384381432, iteration: 296486
loss: 1.0116580724716187,grad_norm: 0.85960400372563, iteration: 296487
loss: 1.0175892114639282,grad_norm: 0.8957458538607925, iteration: 296488
loss: 1.040024995803833,grad_norm: 0.828545063883884, iteration: 296489
loss: 1.0299696922302246,grad_norm: 0.9999992536209051, iteration: 296490
loss: 1.016420841217041,grad_norm: 0.9999990996232093, iteration: 296491
loss: 1.0113868713378906,grad_norm: 0.7997366427864474, iteration: 296492
loss: 1.0423909425735474,grad_norm: 0.999999248321347, iteration: 296493
loss: 1.0312644243240356,grad_norm: 0.7449274380751518, iteration: 296494
loss: 1.0257166624069214,grad_norm: 0.9999991168570359, iteration: 296495
loss: 1.0127410888671875,grad_norm: 0.956635133848301, iteration: 296496
loss: 1.001497745513916,grad_norm: 0.999999349441041, iteration: 296497
loss: 0.9901131987571716,grad_norm: 0.8304101257289738, iteration: 296498
loss: 1.0109885931015015,grad_norm: 0.8488300550732151, iteration: 296499
loss: 0.9564161896705627,grad_norm: 0.7783261660726178, iteration: 296500
loss: 0.9953064918518066,grad_norm: 0.9419971458817029, iteration: 296501
loss: 0.9816811084747314,grad_norm: 0.677173094714183, iteration: 296502
loss: 0.9972031712532043,grad_norm: 0.8583314358349912, iteration: 296503
loss: 0.960634708404541,grad_norm: 0.9999991870114107, iteration: 296504
loss: 0.976707935333252,grad_norm: 0.9562248165803567, iteration: 296505
loss: 1.0079482793807983,grad_norm: 0.9887409871892465, iteration: 296506
loss: 1.0021185874938965,grad_norm: 0.9219213155033306, iteration: 296507
loss: 0.988926351070404,grad_norm: 0.8015087088193661, iteration: 296508
loss: 0.9948213696479797,grad_norm: 0.729114440632976, iteration: 296509
loss: 1.018997311592102,grad_norm: 0.8989314887472893, iteration: 296510
loss: 0.9515424966812134,grad_norm: 0.8580118491590513, iteration: 296511
loss: 0.9772442579269409,grad_norm: 0.9076586215500134, iteration: 296512
loss: 1.0155222415924072,grad_norm: 0.9999997892646658, iteration: 296513
loss: 0.9693973064422607,grad_norm: 0.9999992657163642, iteration: 296514
loss: 1.0350313186645508,grad_norm: 0.774985944135906, iteration: 296515
loss: 1.0691125392913818,grad_norm: 0.9999999249603139, iteration: 296516
loss: 1.0127345323562622,grad_norm: 0.914236537124459, iteration: 296517
loss: 1.03044593334198,grad_norm: 0.9999996308246144, iteration: 296518
loss: 1.0481808185577393,grad_norm: 0.9999995243141998, iteration: 296519
loss: 1.0198559761047363,grad_norm: 0.8591064485576003, iteration: 296520
loss: 0.9820592999458313,grad_norm: 0.8663473290345445, iteration: 296521
loss: 0.9806757569313049,grad_norm: 0.9267040199939509, iteration: 296522
loss: 1.014797329902649,grad_norm: 0.713318658677827, iteration: 296523
loss: 1.0099778175354004,grad_norm: 0.8132694200229993, iteration: 296524
loss: 1.0273422002792358,grad_norm: 0.9010033769092584, iteration: 296525
loss: 0.9665861129760742,grad_norm: 0.8238094683334407, iteration: 296526
loss: 1.0307947397232056,grad_norm: 0.8585139513874948, iteration: 296527
loss: 0.9842246174812317,grad_norm: 0.7215216947061587, iteration: 296528
loss: 1.0073856115341187,grad_norm: 0.873196321552329, iteration: 296529
loss: 1.063668131828308,grad_norm: 0.9999998109446447, iteration: 296530
loss: 0.9850074648857117,grad_norm: 0.9999992811878967, iteration: 296531
loss: 0.946365475654602,grad_norm: 0.9999990628972824, iteration: 296532
loss: 0.9856393933296204,grad_norm: 0.9489668294171051, iteration: 296533
loss: 1.0028223991394043,grad_norm: 0.8553146898651479, iteration: 296534
loss: 0.9859761595726013,grad_norm: 0.9999990638307198, iteration: 296535
loss: 1.0013080835342407,grad_norm: 0.8082390066240307, iteration: 296536
loss: 1.065618872642517,grad_norm: 1.000000017263344, iteration: 296537
loss: 1.0677858591079712,grad_norm: 0.9999993673766145, iteration: 296538
loss: 0.9921346306800842,grad_norm: 0.7259812234696997, iteration: 296539
loss: 1.0255507230758667,grad_norm: 0.9999998450415459, iteration: 296540
loss: 0.9969796538352966,grad_norm: 0.9999990949510159, iteration: 296541
loss: 1.0645579099655151,grad_norm: 0.9999991467134233, iteration: 296542
loss: 1.036681056022644,grad_norm: 0.982619009786698, iteration: 296543
loss: 1.0008033514022827,grad_norm: 0.8301568257166886, iteration: 296544
loss: 1.0142441987991333,grad_norm: 0.8978707062713638, iteration: 296545
loss: 1.0279884338378906,grad_norm: 0.8862640928982856, iteration: 296546
loss: 0.9913113117218018,grad_norm: 0.7410297540903509, iteration: 296547
loss: 1.0082306861877441,grad_norm: 0.9653606529072327, iteration: 296548
loss: 1.0009108781814575,grad_norm: 0.7982808959107213, iteration: 296549
loss: 1.0144462585449219,grad_norm: 0.7827472255183282, iteration: 296550
loss: 1.0259897708892822,grad_norm: 0.7952035342340171, iteration: 296551
loss: 0.957952082157135,grad_norm: 0.9272158258598321, iteration: 296552
loss: 1.163918375968933,grad_norm: 0.999999186615233, iteration: 296553
loss: 0.9842898845672607,grad_norm: 0.8012253138985016, iteration: 296554
loss: 0.9951056241989136,grad_norm: 0.7193925630195696, iteration: 296555
loss: 1.0165657997131348,grad_norm: 0.8611027148593686, iteration: 296556
loss: 0.9784075021743774,grad_norm: 0.7979935845139152, iteration: 296557
loss: 1.0457379817962646,grad_norm: 0.9999991977916108, iteration: 296558
loss: 1.0252101421356201,grad_norm: 0.8985424042413279, iteration: 296559
loss: 0.997745931148529,grad_norm: 0.7003579622563625, iteration: 296560
loss: 0.9937856197357178,grad_norm: 0.8573257742335162, iteration: 296561
loss: 1.056485652923584,grad_norm: 0.9999993020023403, iteration: 296562
loss: 0.9985202550888062,grad_norm: 0.9967371316493291, iteration: 296563
loss: 1.0246292352676392,grad_norm: 0.999999270726693, iteration: 296564
loss: 1.0348988771438599,grad_norm: 0.9305567788363568, iteration: 296565
loss: 1.0369603633880615,grad_norm: 0.9999991768255957, iteration: 296566
loss: 1.0276869535446167,grad_norm: 0.9999991151974355, iteration: 296567
loss: 1.0020335912704468,grad_norm: 0.7928610641451413, iteration: 296568
loss: 1.0138216018676758,grad_norm: 0.9189733037834128, iteration: 296569
loss: 1.0107656717300415,grad_norm: 0.8762305300854553, iteration: 296570
loss: 1.0709894895553589,grad_norm: 0.9653164962556846, iteration: 296571
loss: 0.983449399471283,grad_norm: 0.9999991832927954, iteration: 296572
loss: 1.0250669717788696,grad_norm: 0.8362714789373693, iteration: 296573
loss: 0.9523677825927734,grad_norm: 0.9248609764670189, iteration: 296574
loss: 1.0190691947937012,grad_norm: 0.9999990200527588, iteration: 296575
loss: 1.0040931701660156,grad_norm: 0.9590105675740083, iteration: 296576
loss: 0.9966393113136292,grad_norm: 0.7684110638356769, iteration: 296577
loss: 1.0783421993255615,grad_norm: 0.9093338771589172, iteration: 296578
loss: 0.987021267414093,grad_norm: 0.815805734695841, iteration: 296579
loss: 1.038426399230957,grad_norm: 0.9999990076337092, iteration: 296580
loss: 1.0599654912948608,grad_norm: 0.9999991873259395, iteration: 296581
loss: 1.002722978591919,grad_norm: 0.7878516509169102, iteration: 296582
loss: 1.023033857345581,grad_norm: 0.8309725709444673, iteration: 296583
loss: 0.9818142056465149,grad_norm: 0.9999992744524282, iteration: 296584
loss: 0.9871677756309509,grad_norm: 0.8342998710120711, iteration: 296585
loss: 0.9891864657402039,grad_norm: 0.999999162964378, iteration: 296586
loss: 0.9830123782157898,grad_norm: 0.8278678364972064, iteration: 296587
loss: 0.9938567876815796,grad_norm: 0.9999990310266308, iteration: 296588
loss: 0.9896801114082336,grad_norm: 0.7632748839972221, iteration: 296589
loss: 1.0016006231307983,grad_norm: 0.9999991109883056, iteration: 296590
loss: 0.9752022624015808,grad_norm: 0.8365440960611554, iteration: 296591
loss: 1.0129449367523193,grad_norm: 0.9999992299980441, iteration: 296592
loss: 1.028777003288269,grad_norm: 0.7978061053971212, iteration: 296593
loss: 1.045822024345398,grad_norm: 0.946643466592954, iteration: 296594
loss: 1.0207263231277466,grad_norm: 0.7885253058400251, iteration: 296595
loss: 1.293190836906433,grad_norm: 0.9999995641048476, iteration: 296596
loss: 0.9792895317077637,grad_norm: 0.7869009632390741, iteration: 296597
loss: 1.046032190322876,grad_norm: 0.999999199194231, iteration: 296598
loss: 1.0125550031661987,grad_norm: 0.956489841696106, iteration: 296599
loss: 0.9939413666725159,grad_norm: 0.9999993859856234, iteration: 296600
loss: 1.0057109594345093,grad_norm: 0.8256356155201648, iteration: 296601
loss: 1.0168887376785278,grad_norm: 0.810722988257504, iteration: 296602
loss: 0.9774541258811951,grad_norm: 0.9696299791698959, iteration: 296603
loss: 0.9962242245674133,grad_norm: 0.828754095744444, iteration: 296604
loss: 1.03974449634552,grad_norm: 0.8044026102281716, iteration: 296605
loss: 1.0132265090942383,grad_norm: 0.7779715763978402, iteration: 296606
loss: 1.0052555799484253,grad_norm: 0.9999990609106439, iteration: 296607
loss: 0.9773089289665222,grad_norm: 0.8659452850043757, iteration: 296608
loss: 1.063133955001831,grad_norm: 0.9553176198503109, iteration: 296609
loss: 0.9535431265830994,grad_norm: 0.9999996630432465, iteration: 296610
loss: 1.0516911745071411,grad_norm: 0.9999991602730657, iteration: 296611
loss: 1.0140806436538696,grad_norm: 0.9464132673453901, iteration: 296612
loss: 1.0102721452713013,grad_norm: 0.9999991177415933, iteration: 296613
loss: 1.038162350654602,grad_norm: 0.8230003796367068, iteration: 296614
loss: 1.0046424865722656,grad_norm: 0.9606456335999709, iteration: 296615
loss: 1.0157169103622437,grad_norm: 0.9999990206084386, iteration: 296616
loss: 0.9896152019500732,grad_norm: 0.8901560053404671, iteration: 296617
loss: 1.0469080209732056,grad_norm: 0.9999997600315769, iteration: 296618
loss: 1.0039335489273071,grad_norm: 0.9999992168570502, iteration: 296619
loss: 1.1029613018035889,grad_norm: 0.9999994337513508, iteration: 296620
loss: 0.9890446662902832,grad_norm: 0.7930635086222871, iteration: 296621
loss: 1.0130656957626343,grad_norm: 0.8076650933129732, iteration: 296622
loss: 1.012590765953064,grad_norm: 0.9433550374833586, iteration: 296623
loss: 0.9684662222862244,grad_norm: 0.8202848243140453, iteration: 296624
loss: 1.0106338262557983,grad_norm: 0.7655578153963258, iteration: 296625
loss: 1.0055323839187622,grad_norm: 0.7720923164487627, iteration: 296626
loss: 1.0185364484786987,grad_norm: 0.9843442062708129, iteration: 296627
loss: 1.0508553981781006,grad_norm: 0.8773187492057526, iteration: 296628
loss: 1.0398812294006348,grad_norm: 0.9714994402830518, iteration: 296629
loss: 1.0237776041030884,grad_norm: 0.8964203458657972, iteration: 296630
loss: 1.022858738899231,grad_norm: 1.0000000505504785, iteration: 296631
loss: 1.0208748579025269,grad_norm: 0.9999991041958954, iteration: 296632
loss: 1.00770902633667,grad_norm: 0.8659343354250975, iteration: 296633
loss: 0.9969016313552856,grad_norm: 0.6760285411034278, iteration: 296634
loss: 1.0060182809829712,grad_norm: 0.7619477773672756, iteration: 296635
loss: 1.0181915760040283,grad_norm: 0.7991554459563808, iteration: 296636
loss: 1.026460886001587,grad_norm: 0.9947547298527306, iteration: 296637
loss: 0.9784993529319763,grad_norm: 0.7826999124829879, iteration: 296638
loss: 1.0249733924865723,grad_norm: 0.7273959908810803, iteration: 296639
loss: 1.0093053579330444,grad_norm: 0.8164546561522606, iteration: 296640
loss: 0.9726274013519287,grad_norm: 0.8912710652999754, iteration: 296641
loss: 0.9867563843727112,grad_norm: 0.9635696843803655, iteration: 296642
loss: 1.0156444311141968,grad_norm: 0.871842991764827, iteration: 296643
loss: 0.9713999629020691,grad_norm: 0.9999988984077833, iteration: 296644
loss: 0.9842561483383179,grad_norm: 0.7976372266574511, iteration: 296645
loss: 1.0151658058166504,grad_norm: 0.9999992067219001, iteration: 296646
loss: 0.9894310235977173,grad_norm: 0.9999991071072271, iteration: 296647
loss: 0.9690005779266357,grad_norm: 0.8255754544449929, iteration: 296648
loss: 1.0139496326446533,grad_norm: 0.8596385168824214, iteration: 296649
loss: 0.995543897151947,grad_norm: 0.7950978068642917, iteration: 296650
loss: 0.9792075753211975,grad_norm: 0.854759088903636, iteration: 296651
loss: 1.0350968837738037,grad_norm: 0.8379494651329437, iteration: 296652
loss: 0.9920632839202881,grad_norm: 0.9033333934655865, iteration: 296653
loss: 1.02959144115448,grad_norm: 0.799695987743881, iteration: 296654
loss: 0.9728872179985046,grad_norm: 0.9691932100520221, iteration: 296655
loss: 1.015919804573059,grad_norm: 0.9089792461881481, iteration: 296656
loss: 1.0377026796340942,grad_norm: 0.8096323221530936, iteration: 296657
loss: 1.0044796466827393,grad_norm: 0.7470800182856153, iteration: 296658
loss: 0.9939498901367188,grad_norm: 0.7936146337157101, iteration: 296659
loss: 1.005226731300354,grad_norm: 0.8256504521574869, iteration: 296660
loss: 1.0069575309753418,grad_norm: 0.8491556819919969, iteration: 296661
loss: 1.035717487335205,grad_norm: 0.7965763798516334, iteration: 296662
loss: 0.9849874973297119,grad_norm: 0.994108504978815, iteration: 296663
loss: 0.9982377886772156,grad_norm: 0.999999083642558, iteration: 296664
loss: 1.0188829898834229,grad_norm: 0.8431621684878232, iteration: 296665
loss: 1.003011703491211,grad_norm: 0.681248212300747, iteration: 296666
loss: 0.9810507893562317,grad_norm: 0.9119614492893632, iteration: 296667
loss: 1.0323973894119263,grad_norm: 0.7685904881268829, iteration: 296668
loss: 0.9857068061828613,grad_norm: 0.7806597336406567, iteration: 296669
loss: 0.9987517595291138,grad_norm: 0.9132342526058237, iteration: 296670
loss: 1.0028188228607178,grad_norm: 0.9999991728717135, iteration: 296671
loss: 1.002964973449707,grad_norm: 0.7858156011165051, iteration: 296672
loss: 1.0366010665893555,grad_norm: 0.9999992041910643, iteration: 296673
loss: 1.0415351390838623,grad_norm: 0.8995983932343787, iteration: 296674
loss: 1.003373622894287,grad_norm: 0.9540838493471375, iteration: 296675
loss: 1.016694188117981,grad_norm: 0.9277176909499047, iteration: 296676
loss: 0.985040545463562,grad_norm: 0.9644875799264397, iteration: 296677
loss: 0.9659909605979919,grad_norm: 0.9563810490999558, iteration: 296678
loss: 1.0142831802368164,grad_norm: 0.8773377693769927, iteration: 296679
loss: 1.0178563594818115,grad_norm: 0.9999989781507646, iteration: 296680
loss: 0.9889262318611145,grad_norm: 0.8424492526402768, iteration: 296681
loss: 0.9899908900260925,grad_norm: 0.9347454582332658, iteration: 296682
loss: 0.9940013289451599,grad_norm: 0.846916341882441, iteration: 296683
loss: 1.0305454730987549,grad_norm: 0.9402221172971427, iteration: 296684
loss: 0.9527125358581543,grad_norm: 0.9135144748449908, iteration: 296685
loss: 0.9708391427993774,grad_norm: 0.8636613294674268, iteration: 296686
loss: 0.9765807390213013,grad_norm: 0.863853877813282, iteration: 296687
loss: 1.1002084016799927,grad_norm: 0.862010781367507, iteration: 296688
loss: 0.9955496191978455,grad_norm: 0.9999993310727027, iteration: 296689
loss: 1.004323124885559,grad_norm: 0.7014691176906696, iteration: 296690
loss: 0.9954773783683777,grad_norm: 0.9999993010356745, iteration: 296691
loss: 1.012619972229004,grad_norm: 0.869660358852994, iteration: 296692
loss: 1.0038129091262817,grad_norm: 0.8962812416900274, iteration: 296693
loss: 1.0103658437728882,grad_norm: 0.9918832155750856, iteration: 296694
loss: 0.9937969446182251,grad_norm: 0.833912661141629, iteration: 296695
loss: 0.9950782656669617,grad_norm: 0.7763716852684935, iteration: 296696
loss: 1.005380392074585,grad_norm: 0.8709718025743196, iteration: 296697
loss: 0.9854950904846191,grad_norm: 0.7844159489273218, iteration: 296698
loss: 1.0033719539642334,grad_norm: 0.9526362879685177, iteration: 296699
loss: 0.9727290868759155,grad_norm: 0.8327787186401892, iteration: 296700
loss: 1.0200469493865967,grad_norm: 0.7770111873074962, iteration: 296701
loss: 0.9924648404121399,grad_norm: 0.8969546945275985, iteration: 296702
loss: 0.9693121314048767,grad_norm: 0.8466341410873983, iteration: 296703
loss: 0.9970523715019226,grad_norm: 0.7831515997853634, iteration: 296704
loss: 1.0175845623016357,grad_norm: 0.8966000366897614, iteration: 296705
loss: 1.0039929151535034,grad_norm: 0.9909375004373694, iteration: 296706
loss: 0.9968094229698181,grad_norm: 0.9575079496720218, iteration: 296707
loss: 0.9785067439079285,grad_norm: 0.7765529788314689, iteration: 296708
loss: 1.0049302577972412,grad_norm: 0.9999991881230516, iteration: 296709
loss: 1.0306861400604248,grad_norm: 0.849240295460831, iteration: 296710
loss: 1.0369848012924194,grad_norm: 0.803440929288642, iteration: 296711
loss: 0.9832320213317871,grad_norm: 0.9999991069070274, iteration: 296712
loss: 0.9793620109558105,grad_norm: 0.9101729888568145, iteration: 296713
loss: 1.0081064701080322,grad_norm: 0.7708407296638566, iteration: 296714
loss: 1.0211519002914429,grad_norm: 0.8404256515787907, iteration: 296715
loss: 0.9821271300315857,grad_norm: 0.8220076282378389, iteration: 296716
loss: 0.9619929790496826,grad_norm: 0.7001933605333517, iteration: 296717
loss: 0.959825873374939,grad_norm: 0.8831779289526969, iteration: 296718
loss: 1.0154494047164917,grad_norm: 0.8123528434494467, iteration: 296719
loss: 0.9904634356498718,grad_norm: 0.9022678288429231, iteration: 296720
loss: 1.0140161514282227,grad_norm: 0.8016531812810379, iteration: 296721
loss: 1.008999228477478,grad_norm: 0.9859583084875408, iteration: 296722
loss: 0.9876312613487244,grad_norm: 0.8790037265168006, iteration: 296723
loss: 0.99462890625,grad_norm: 0.9999999364613267, iteration: 296724
loss: 0.9971604347229004,grad_norm: 0.7024677737669365, iteration: 296725
loss: 1.0138707160949707,grad_norm: 0.8603225142566586, iteration: 296726
loss: 1.012082576751709,grad_norm: 0.7949319486002402, iteration: 296727
loss: 1.0215857028961182,grad_norm: 0.8212027823774823, iteration: 296728
loss: 0.9944332838058472,grad_norm: 0.8510855844766397, iteration: 296729
loss: 1.0202131271362305,grad_norm: 0.9156941354370953, iteration: 296730
loss: 0.972632110118866,grad_norm: 0.7529098731269831, iteration: 296731
loss: 0.9784780740737915,grad_norm: 0.817048721778833, iteration: 296732
loss: 0.9925870895385742,grad_norm: 0.8900198321882805, iteration: 296733
loss: 0.9704165458679199,grad_norm: 0.9999990109477197, iteration: 296734
loss: 1.0068156719207764,grad_norm: 0.7765024402990933, iteration: 296735
loss: 0.9999936819076538,grad_norm: 0.9096210203048156, iteration: 296736
loss: 0.9985885620117188,grad_norm: 0.9164926886370178, iteration: 296737
loss: 1.0113887786865234,grad_norm: 0.9173532608675314, iteration: 296738
loss: 0.9576839208602905,grad_norm: 0.824174824069198, iteration: 296739
loss: 1.0002044439315796,grad_norm: 0.9999995010830842, iteration: 296740
loss: 1.0022010803222656,grad_norm: 0.8299567176458932, iteration: 296741
loss: 1.0373327732086182,grad_norm: 0.9999990511386392, iteration: 296742
loss: 0.9914076328277588,grad_norm: 0.7635037852239052, iteration: 296743
loss: 0.975080668926239,grad_norm: 0.8057416578027425, iteration: 296744
loss: 0.9825154542922974,grad_norm: 0.8169254954507517, iteration: 296745
loss: 0.9843971133232117,grad_norm: 0.8243364608144657, iteration: 296746
loss: 0.9874131679534912,grad_norm: 0.7396252528563688, iteration: 296747
loss: 0.9943352341651917,grad_norm: 0.8875606097900723, iteration: 296748
loss: 1.0083905458450317,grad_norm: 0.9164884445328416, iteration: 296749
loss: 1.0370898246765137,grad_norm: 1.00000007092319, iteration: 296750
loss: 1.0361113548278809,grad_norm: 0.9795631335059343, iteration: 296751
loss: 1.033503532409668,grad_norm: 0.7581957913139993, iteration: 296752
loss: 0.9967193603515625,grad_norm: 0.8076327330694141, iteration: 296753
loss: 0.9839738011360168,grad_norm: 0.726553947363791, iteration: 296754
loss: 1.013395071029663,grad_norm: 0.9999996574439783, iteration: 296755
loss: 1.0095213651657104,grad_norm: 0.8140498052693523, iteration: 296756
loss: 0.9853205680847168,grad_norm: 0.9999990724804387, iteration: 296757
loss: 0.9963600635528564,grad_norm: 0.7186877366258617, iteration: 296758
loss: 0.9871457815170288,grad_norm: 0.7017437341494626, iteration: 296759
loss: 1.012336254119873,grad_norm: 0.8616893055834359, iteration: 296760
loss: 0.9778900146484375,grad_norm: 0.838614889809649, iteration: 296761
loss: 1.015762209892273,grad_norm: 0.8568519248108938, iteration: 296762
loss: 0.9921517372131348,grad_norm: 0.8776782407167144, iteration: 296763
loss: 1.0506787300109863,grad_norm: 0.9999992623157212, iteration: 296764
loss: 0.989855170249939,grad_norm: 0.8115621935877565, iteration: 296765
loss: 0.9974202513694763,grad_norm: 0.6998418633741381, iteration: 296766
loss: 0.9968734979629517,grad_norm: 0.8250396659169508, iteration: 296767
loss: 1.0279029607772827,grad_norm: 0.9999992813238592, iteration: 296768
loss: 0.97857666015625,grad_norm: 0.9268144308051748, iteration: 296769
loss: 0.9973545670509338,grad_norm: 0.7833122811500722, iteration: 296770
loss: 1.022047996520996,grad_norm: 0.8630133783710701, iteration: 296771
loss: 1.0143733024597168,grad_norm: 0.9049452259602194, iteration: 296772
loss: 1.0041203498840332,grad_norm: 0.9999994328790441, iteration: 296773
loss: 1.0126899480819702,grad_norm: 0.9070401665946769, iteration: 296774
loss: 0.9819185137748718,grad_norm: 0.8301078130118948, iteration: 296775
loss: 1.0220136642456055,grad_norm: 0.85556966441333, iteration: 296776
loss: 1.0241918563842773,grad_norm: 0.7287149163788468, iteration: 296777
loss: 0.9897416830062866,grad_norm: 0.731487036220271, iteration: 296778
loss: 1.0003191232681274,grad_norm: 0.7856956465480452, iteration: 296779
loss: 1.0391933917999268,grad_norm: 0.9999991858430751, iteration: 296780
loss: 1.0129823684692383,grad_norm: 0.9572001331589963, iteration: 296781
loss: 1.0123214721679688,grad_norm: 0.9999993687201009, iteration: 296782
loss: 0.9774467349052429,grad_norm: 0.9588857727856959, iteration: 296783
loss: 0.9782054424285889,grad_norm: 0.8501829266540715, iteration: 296784
loss: 1.0128785371780396,grad_norm: 0.9999998769090164, iteration: 296785
loss: 1.0088093280792236,grad_norm: 0.9337329528288376, iteration: 296786
loss: 1.0078186988830566,grad_norm: 0.7490079320882943, iteration: 296787
loss: 0.9895761013031006,grad_norm: 0.9338100459035694, iteration: 296788
loss: 1.0063556432724,grad_norm: 0.8807707105024293, iteration: 296789
loss: 1.031827688217163,grad_norm: 0.8028501156777623, iteration: 296790
loss: 1.0039938688278198,grad_norm: 0.7405315041196008, iteration: 296791
loss: 0.987907350063324,grad_norm: 0.9205025725168045, iteration: 296792
loss: 0.9856052994728088,grad_norm: 0.7873424468650263, iteration: 296793
loss: 0.9689319133758545,grad_norm: 0.933540587833265, iteration: 296794
loss: 1.0451500415802002,grad_norm: 0.9594536414062469, iteration: 296795
loss: 0.9752158522605896,grad_norm: 0.8274868830579767, iteration: 296796
loss: 1.0792981386184692,grad_norm: 0.8941654758705614, iteration: 296797
loss: 0.9711873531341553,grad_norm: 0.8168703381603659, iteration: 296798
loss: 1.0430437326431274,grad_norm: 0.8408959357813256, iteration: 296799
loss: 1.0330479145050049,grad_norm: 0.8660003817971691, iteration: 296800
loss: 0.9890338778495789,grad_norm: 0.8797916081577544, iteration: 296801
loss: 1.2596266269683838,grad_norm: 1.000000078228154, iteration: 296802
loss: 0.99783855676651,grad_norm: 0.8454693822534135, iteration: 296803
loss: 0.9908403158187866,grad_norm: 0.7345479219039648, iteration: 296804
loss: 0.9711065888404846,grad_norm: 0.8391947329976814, iteration: 296805
loss: 1.0166168212890625,grad_norm: 0.8020194665058826, iteration: 296806
loss: 1.004108190536499,grad_norm: 0.7292915096920514, iteration: 296807
loss: 1.0023671388626099,grad_norm: 0.8896729072905326, iteration: 296808
loss: 0.9725081324577332,grad_norm: 0.6548926047332403, iteration: 296809
loss: 1.0108308792114258,grad_norm: 0.7395372775865903, iteration: 296810
loss: 1.028407335281372,grad_norm: 0.8064288104279517, iteration: 296811
loss: 0.9978540539741516,grad_norm: 0.8714914472036154, iteration: 296812
loss: 1.0070255994796753,grad_norm: 0.8085671785361963, iteration: 296813
loss: 1.142598271369934,grad_norm: 0.8738475292804173, iteration: 296814
loss: 1.0024805068969727,grad_norm: 0.9426976056652866, iteration: 296815
loss: 0.9662616848945618,grad_norm: 0.876128635648101, iteration: 296816
loss: 1.130124568939209,grad_norm: 0.8685028293410164, iteration: 296817
loss: 0.9834859371185303,grad_norm: 0.9531722985178563, iteration: 296818
loss: 1.0052975416183472,grad_norm: 0.9504461109983093, iteration: 296819
loss: 0.9682329893112183,grad_norm: 0.9403183118851434, iteration: 296820
loss: 0.9841167330741882,grad_norm: 0.7959822390048192, iteration: 296821
loss: 1.0068899393081665,grad_norm: 0.837938500999861, iteration: 296822
loss: 1.0586193799972534,grad_norm: 0.9265032375124868, iteration: 296823
loss: 1.0240914821624756,grad_norm: 0.9999991615042161, iteration: 296824
loss: 1.0130081176757812,grad_norm: 0.9178225472947568, iteration: 296825
loss: 0.9704115986824036,grad_norm: 0.8459315402877904, iteration: 296826
loss: 0.9803161025047302,grad_norm: 0.9133247053540354, iteration: 296827
loss: 1.014510154724121,grad_norm: 0.8373678822893857, iteration: 296828
loss: 1.0051873922348022,grad_norm: 0.9999991421717355, iteration: 296829
loss: 1.0181223154067993,grad_norm: 0.9501798373291602, iteration: 296830
loss: 0.9954681396484375,grad_norm: 0.9839211595711038, iteration: 296831
loss: 1.023971438407898,grad_norm: 0.9545863413173187, iteration: 296832
loss: 1.001154899597168,grad_norm: 0.9999993067913642, iteration: 296833
loss: 0.9921754002571106,grad_norm: 0.9999990682454726, iteration: 296834
loss: 1.0046725273132324,grad_norm: 0.8955472456791977, iteration: 296835
loss: 1.0197315216064453,grad_norm: 0.9999991851020161, iteration: 296836
loss: 1.0184270143508911,grad_norm: 0.8679896446159687, iteration: 296837
loss: 1.2962977886199951,grad_norm: 0.9999994630938372, iteration: 296838
loss: 1.028178095817566,grad_norm: 0.7722110884939375, iteration: 296839
loss: 1.056952953338623,grad_norm: 0.9999995962887512, iteration: 296840
loss: 1.0218851566314697,grad_norm: 0.9498896923504595, iteration: 296841
loss: 0.9855089783668518,grad_norm: 0.8694407159667275, iteration: 296842
loss: 1.0175305604934692,grad_norm: 0.8020982429764701, iteration: 296843
loss: 1.0049165487289429,grad_norm: 0.9999993307678879, iteration: 296844
loss: 0.9695816040039062,grad_norm: 0.991798906135065, iteration: 296845
loss: 0.9881806969642639,grad_norm: 0.8284433550295952, iteration: 296846
loss: 1.0336188077926636,grad_norm: 0.9999991819796055, iteration: 296847
loss: 1.0035102367401123,grad_norm: 0.9999992547051237, iteration: 296848
loss: 0.9574816226959229,grad_norm: 0.9266168170710407, iteration: 296849
loss: 1.0187638998031616,grad_norm: 0.8077923502700374, iteration: 296850
loss: 1.006584644317627,grad_norm: 0.7295756565900745, iteration: 296851
loss: 0.9873561263084412,grad_norm: 0.8793326876331464, iteration: 296852
loss: 0.9834840297698975,grad_norm: 0.8643528730604729, iteration: 296853
loss: 0.9386333227157593,grad_norm: 0.9999993936753562, iteration: 296854
loss: 0.9955296516418457,grad_norm: 0.7598784228691114, iteration: 296855
loss: 0.9988997578620911,grad_norm: 0.9999990137740808, iteration: 296856
loss: 0.9870581030845642,grad_norm: 0.999999010466796, iteration: 296857
loss: 0.977159321308136,grad_norm: 0.7717460537640749, iteration: 296858
loss: 0.9992368817329407,grad_norm: 0.9457208303167898, iteration: 296859
loss: 0.9934003353118896,grad_norm: 0.8734000767790105, iteration: 296860
loss: 1.001363754272461,grad_norm: 0.7817555146889109, iteration: 296861
loss: 1.0003308057785034,grad_norm: 0.7283597636227617, iteration: 296862
loss: 0.969030499458313,grad_norm: 0.9759021791721186, iteration: 296863
loss: 1.0048105716705322,grad_norm: 0.9672570522664556, iteration: 296864
loss: 1.0153546333312988,grad_norm: 0.925614568616612, iteration: 296865
loss: 1.0256750583648682,grad_norm: 0.7754763342662226, iteration: 296866
loss: 1.0134526491165161,grad_norm: 0.8973390717252925, iteration: 296867
loss: 1.0112454891204834,grad_norm: 0.9087436542998442, iteration: 296868
loss: 0.9928691983222961,grad_norm: 0.8152893614155855, iteration: 296869
loss: 1.0120562314987183,grad_norm: 0.9999991339862602, iteration: 296870
loss: 1.0376386642456055,grad_norm: 0.9999991590129174, iteration: 296871
loss: 1.0247164964675903,grad_norm: 0.9999991652694616, iteration: 296872
loss: 1.002030849456787,grad_norm: 0.846721458859395, iteration: 296873
loss: 0.9878531098365784,grad_norm: 0.8183490223195006, iteration: 296874
loss: 0.9807910919189453,grad_norm: 0.9207257783594941, iteration: 296875
loss: 1.0192748308181763,grad_norm: 0.8638861625583321, iteration: 296876
loss: 0.9416866898536682,grad_norm: 0.9467070691536577, iteration: 296877
loss: 1.0026520490646362,grad_norm: 0.9550752342814222, iteration: 296878
loss: 1.0451867580413818,grad_norm: 0.9999990620155118, iteration: 296879
loss: 1.00684654712677,grad_norm: 0.9999990437305737, iteration: 296880
loss: 0.9882786870002747,grad_norm: 0.6829528616201763, iteration: 296881
loss: 1.0094859600067139,grad_norm: 0.8362392138492525, iteration: 296882
loss: 1.0011916160583496,grad_norm: 0.8334290807440033, iteration: 296883
loss: 0.9991326928138733,grad_norm: 0.7226947992099186, iteration: 296884
loss: 0.9822120070457458,grad_norm: 0.9999991052258004, iteration: 296885
loss: 1.0037026405334473,grad_norm: 0.9312813688839352, iteration: 296886
loss: 0.9902956485748291,grad_norm: 0.8718032451743364, iteration: 296887
loss: 1.0387011766433716,grad_norm: 0.7181247422327116, iteration: 296888
loss: 1.0864143371582031,grad_norm: 0.9999999737656858, iteration: 296889
loss: 1.0094386339187622,grad_norm: 0.767007982921888, iteration: 296890
loss: 0.9950118064880371,grad_norm: 0.8332504700644167, iteration: 296891
loss: 0.9638127684593201,grad_norm: 0.7846338692374972, iteration: 296892
loss: 0.9837620258331299,grad_norm: 0.8632153808566462, iteration: 296893
loss: 1.0084624290466309,grad_norm: 0.8738344822433638, iteration: 296894
loss: 1.0016038417816162,grad_norm: 0.8010176531917994, iteration: 296895
loss: 1.0908762216567993,grad_norm: 0.9999996649082067, iteration: 296896
loss: 0.9903900623321533,grad_norm: 0.8827507676317838, iteration: 296897
loss: 1.0086768865585327,grad_norm: 0.7409571256541491, iteration: 296898
loss: 1.0006740093231201,grad_norm: 0.7224349541983687, iteration: 296899
loss: 0.9948006868362427,grad_norm: 0.9999989834235286, iteration: 296900
loss: 0.9762387275695801,grad_norm: 0.8688491045358854, iteration: 296901
loss: 0.9897106885910034,grad_norm: 0.8985448269057807, iteration: 296902
loss: 0.9992914795875549,grad_norm: 0.843648484660506, iteration: 296903
loss: 1.0128512382507324,grad_norm: 0.7668807040593769, iteration: 296904
loss: 1.0090490579605103,grad_norm: 0.7926966986367906, iteration: 296905
loss: 1.0156534910202026,grad_norm: 0.9999990712837621, iteration: 296906
loss: 0.9871683716773987,grad_norm: 0.758736297762581, iteration: 296907
loss: 1.0161712169647217,grad_norm: 0.9054967258209494, iteration: 296908
loss: 0.939924955368042,grad_norm: 0.7523110338400599, iteration: 296909
loss: 1.0157352685928345,grad_norm: 0.8413217341562115, iteration: 296910
loss: 1.0092830657958984,grad_norm: 0.8592289152599913, iteration: 296911
loss: 1.0212385654449463,grad_norm: 0.989782608163087, iteration: 296912
loss: 0.9927852153778076,grad_norm: 0.8272217335059421, iteration: 296913
loss: 0.9551525115966797,grad_norm: 0.9999993153509863, iteration: 296914
loss: 1.011968731880188,grad_norm: 0.8827236296024916, iteration: 296915
loss: 0.9533494710922241,grad_norm: 0.993438172500171, iteration: 296916
loss: 0.9954156875610352,grad_norm: 0.9999990665305242, iteration: 296917
loss: 1.0324782133102417,grad_norm: 0.8687843797290352, iteration: 296918
loss: 1.0082499980926514,grad_norm: 0.9791137212551234, iteration: 296919
loss: 1.0045554637908936,grad_norm: 0.9002608632019421, iteration: 296920
loss: 0.9961788058280945,grad_norm: 0.843712439859747, iteration: 296921
loss: 1.0003494024276733,grad_norm: 0.9999993097386239, iteration: 296922
loss: 1.0204821825027466,grad_norm: 0.8284613949513291, iteration: 296923
loss: 0.9930495023727417,grad_norm: 0.999999085449829, iteration: 296924
loss: 0.9976192116737366,grad_norm: 0.9254241173451134, iteration: 296925
loss: 1.016730546951294,grad_norm: 0.9538344367256454, iteration: 296926
loss: 0.9780199527740479,grad_norm: 0.8381063365772894, iteration: 296927
loss: 1.022873878479004,grad_norm: 0.8712353402095337, iteration: 296928
loss: 1.0022937059402466,grad_norm: 0.8401128796697821, iteration: 296929
loss: 1.040148377418518,grad_norm: 0.9033401268956699, iteration: 296930
loss: 0.9762570858001709,grad_norm: 0.9115668867901986, iteration: 296931
loss: 0.9830866456031799,grad_norm: 0.9735781103124095, iteration: 296932
loss: 0.9761614799499512,grad_norm: 0.7909948535871725, iteration: 296933
loss: 1.0501737594604492,grad_norm: 0.9999992468281862, iteration: 296934
loss: 1.0038820505142212,grad_norm: 0.8379804126055513, iteration: 296935
loss: 1.001288652420044,grad_norm: 0.9237809255428681, iteration: 296936
loss: 0.988996684551239,grad_norm: 0.9999995138993281, iteration: 296937
loss: 1.0280362367630005,grad_norm: 0.842641987042754, iteration: 296938
loss: 0.9535717368125916,grad_norm: 0.9315578794071108, iteration: 296939
loss: 1.0574986934661865,grad_norm: 0.8790652398101704, iteration: 296940
loss: 0.9973955750465393,grad_norm: 0.8185515808222077, iteration: 296941
loss: 1.0358909368515015,grad_norm: 0.7906188936656892, iteration: 296942
loss: 1.0219863653182983,grad_norm: 0.7404255150780776, iteration: 296943
loss: 1.005734920501709,grad_norm: 0.9820032092323351, iteration: 296944
loss: 0.9636855125427246,grad_norm: 0.9281406112382017, iteration: 296945
loss: 1.0031245946884155,grad_norm: 0.8382475137597246, iteration: 296946
loss: 0.9730105400085449,grad_norm: 0.7904146369652998, iteration: 296947
loss: 0.9922482371330261,grad_norm: 0.9223465098240563, iteration: 296948
loss: 0.9644368290901184,grad_norm: 0.9938982352707222, iteration: 296949
loss: 0.9837552905082703,grad_norm: 0.8654227122840711, iteration: 296950
loss: 0.9742510318756104,grad_norm: 0.780068502545675, iteration: 296951
loss: 0.9920787215232849,grad_norm: 0.9519815236264587, iteration: 296952
loss: 0.990449070930481,grad_norm: 0.7345403929802279, iteration: 296953
loss: 1.0001769065856934,grad_norm: 0.8615251246259209, iteration: 296954
loss: 0.9953894019126892,grad_norm: 0.9492583508949697, iteration: 296955
loss: 0.9959450960159302,grad_norm: 0.7605854484902613, iteration: 296956
loss: 0.9783747792243958,grad_norm: 0.7381180955510241, iteration: 296957
loss: 1.0251291990280151,grad_norm: 0.9147918293969945, iteration: 296958
loss: 1.016182541847229,grad_norm: 0.906507039287724, iteration: 296959
loss: 1.0149308443069458,grad_norm: 0.7847359493190823, iteration: 296960
loss: 1.0097836256027222,grad_norm: 0.8029887541606673, iteration: 296961
loss: 0.991945207118988,grad_norm: 0.7305015828593309, iteration: 296962
loss: 1.035610318183899,grad_norm: 0.8986831214629327, iteration: 296963
loss: 0.9984210729598999,grad_norm: 0.82585118048683, iteration: 296964
loss: 1.0519535541534424,grad_norm: 0.793750201055538, iteration: 296965
loss: 1.0106946229934692,grad_norm: 0.9999993763092199, iteration: 296966
loss: 1.0055831670761108,grad_norm: 0.7832881304055015, iteration: 296967
loss: 1.0080534219741821,grad_norm: 0.9999996486302217, iteration: 296968
loss: 1.0043540000915527,grad_norm: 0.9390944088412787, iteration: 296969
loss: 1.016574740409851,grad_norm: 0.9999992915663637, iteration: 296970
loss: 0.9945196509361267,grad_norm: 0.9751054406359803, iteration: 296971
loss: 1.015600562095642,grad_norm: 0.8591878187501483, iteration: 296972
loss: 0.9954283833503723,grad_norm: 0.9816410400446033, iteration: 296973
loss: 1.0338975191116333,grad_norm: 0.9018483757459006, iteration: 296974
loss: 1.0195997953414917,grad_norm: 0.999999056832287, iteration: 296975
loss: 0.9911266565322876,grad_norm: 0.8701978348125783, iteration: 296976
loss: 0.9979303479194641,grad_norm: 0.8702441306765472, iteration: 296977
loss: 0.9839213490486145,grad_norm: 0.8032699704523792, iteration: 296978
loss: 1.0568643808364868,grad_norm: 0.9664235175693615, iteration: 296979
loss: 1.010162591934204,grad_norm: 0.9616059406335725, iteration: 296980
loss: 1.0035910606384277,grad_norm: 0.9356358123529306, iteration: 296981
loss: 1.0203146934509277,grad_norm: 0.8791998557556421, iteration: 296982
loss: 1.0125421285629272,grad_norm: 0.9999991637050988, iteration: 296983
loss: 0.990729033946991,grad_norm: 0.9406769880187857, iteration: 296984
loss: 0.9852617979049683,grad_norm: 0.8625133300993515, iteration: 296985
loss: 1.0004225969314575,grad_norm: 0.9555993984792805, iteration: 296986
loss: 1.029036521911621,grad_norm: 0.848285382321862, iteration: 296987
loss: 1.008089303970337,grad_norm: 0.9292943095412749, iteration: 296988
loss: 0.9857680201530457,grad_norm: 0.906240135964762, iteration: 296989
loss: 1.0279675722122192,grad_norm: 0.8645548946714496, iteration: 296990
loss: 0.9869827628135681,grad_norm: 0.7527690053784434, iteration: 296991
loss: 1.0073647499084473,grad_norm: 0.6689785800350768, iteration: 296992
loss: 1.0083155632019043,grad_norm: 0.8928761816710894, iteration: 296993
loss: 0.9566360116004944,grad_norm: 0.8075571785813765, iteration: 296994
loss: 0.9731382131576538,grad_norm: 0.7572825921319398, iteration: 296995
loss: 1.0201821327209473,grad_norm: 0.8217131032604764, iteration: 296996
loss: 0.9980247616767883,grad_norm: 0.9538553676756113, iteration: 296997
loss: 1.0141558647155762,grad_norm: 0.7684467302529384, iteration: 296998
loss: 1.012841820716858,grad_norm: 0.9999991342895969, iteration: 296999
loss: 1.0034241676330566,grad_norm: 0.8992151468259387, iteration: 297000
loss: 1.0499498844146729,grad_norm: 0.9999991998571707, iteration: 297001
loss: 1.0176631212234497,grad_norm: 0.9071062767764086, iteration: 297002
loss: 1.0219941139221191,grad_norm: 0.7788864878189862, iteration: 297003
loss: 0.994264543056488,grad_norm: 0.9164351344960938, iteration: 297004
loss: 0.9969305992126465,grad_norm: 0.8637903837315877, iteration: 297005
loss: 1.0108511447906494,grad_norm: 0.9143974842809349, iteration: 297006
loss: 1.0035382509231567,grad_norm: 0.8236685847900395, iteration: 297007
loss: 1.0056955814361572,grad_norm: 0.9999992149816734, iteration: 297008
loss: 1.0169156789779663,grad_norm: 0.8030735239269361, iteration: 297009
loss: 0.9920856356620789,grad_norm: 0.822757237356724, iteration: 297010
loss: 0.9903375506401062,grad_norm: 0.9733278672929336, iteration: 297011
loss: 1.014330506324768,grad_norm: 0.8091897628600075, iteration: 297012
loss: 0.9922549724578857,grad_norm: 0.893980064764224, iteration: 297013
loss: 0.9859645366668701,grad_norm: 0.9174597245178555, iteration: 297014
loss: 0.9998448491096497,grad_norm: 0.9189355209295429, iteration: 297015
loss: 1.0189299583435059,grad_norm: 0.7107891258493096, iteration: 297016
loss: 1.0136758089065552,grad_norm: 0.8547479862103914, iteration: 297017
loss: 1.0423706769943237,grad_norm: 0.8207629024154603, iteration: 297018
loss: 1.015504240989685,grad_norm: 0.833568423380478, iteration: 297019
loss: 0.9725222587585449,grad_norm: 0.7792903527119254, iteration: 297020
loss: 0.9748157858848572,grad_norm: 0.7465892254530602, iteration: 297021
loss: 0.9883971214294434,grad_norm: 0.9999989978221021, iteration: 297022
loss: 0.9437307119369507,grad_norm: 0.8825175664289495, iteration: 297023
loss: 0.9926411509513855,grad_norm: 0.8009144761452683, iteration: 297024
loss: 0.9479320645332336,grad_norm: 0.8956878559379462, iteration: 297025
loss: 0.9978144764900208,grad_norm: 0.8252264953610521, iteration: 297026
loss: 0.9927796125411987,grad_norm: 0.9824270437248982, iteration: 297027
loss: 0.9854522347450256,grad_norm: 0.7922148813619213, iteration: 297028
loss: 0.9703707695007324,grad_norm: 0.8225567077889446, iteration: 297029
loss: 1.002570629119873,grad_norm: 0.8676995273179389, iteration: 297030
loss: 0.9865986704826355,grad_norm: 0.8003029735969157, iteration: 297031
loss: 0.9959149956703186,grad_norm: 0.7643863577038689, iteration: 297032
loss: 1.0099515914916992,grad_norm: 0.8772749307816542, iteration: 297033
loss: 0.9474446773529053,grad_norm: 0.9024448785766689, iteration: 297034
loss: 0.9928538203239441,grad_norm: 0.7905281270262293, iteration: 297035
loss: 0.9602557420730591,grad_norm: 0.8284795152391643, iteration: 297036
loss: 1.0010721683502197,grad_norm: 0.7216003383432692, iteration: 297037
loss: 1.0349934101104736,grad_norm: 0.9999989984869767, iteration: 297038
loss: 1.0077928304672241,grad_norm: 0.9803898798958401, iteration: 297039
loss: 0.9810128808021545,grad_norm: 0.8910630456520805, iteration: 297040
loss: 0.996910572052002,grad_norm: 0.8010411531345135, iteration: 297041
loss: 1.0163227319717407,grad_norm: 0.9999991875692683, iteration: 297042
loss: 0.9853581786155701,grad_norm: 0.9012308069195601, iteration: 297043
loss: 0.9727147817611694,grad_norm: 0.9400637801712272, iteration: 297044
loss: 1.0066449642181396,grad_norm: 0.9999992608161064, iteration: 297045
loss: 0.9956896901130676,grad_norm: 0.9962872111449635, iteration: 297046
loss: 0.9970659017562866,grad_norm: 0.9649144667335106, iteration: 297047
loss: 1.0250746011734009,grad_norm: 0.9999990943125825, iteration: 297048
loss: 0.9923285841941833,grad_norm: 0.8376873369730219, iteration: 297049
loss: 0.9704307913780212,grad_norm: 0.7834136854296722, iteration: 297050
loss: 1.0225069522857666,grad_norm: 0.9999996890673449, iteration: 297051
loss: 0.9782726168632507,grad_norm: 0.9017654179467658, iteration: 297052
loss: 0.9875220656394958,grad_norm: 0.9485498639853089, iteration: 297053
loss: 0.9962314963340759,grad_norm: 0.8180112279331098, iteration: 297054
loss: 1.0204346179962158,grad_norm: 0.9999991458714591, iteration: 297055
loss: 1.014728307723999,grad_norm: 0.8813688385617244, iteration: 297056
loss: 1.014176368713379,grad_norm: 0.9496159829037042, iteration: 297057
loss: 1.0045398473739624,grad_norm: 0.8800269307223574, iteration: 297058
loss: 0.9660740494728088,grad_norm: 0.7945673470688132, iteration: 297059
loss: 1.0076872110366821,grad_norm: 0.9999991435006728, iteration: 297060
loss: 0.9703770875930786,grad_norm: 0.9999995350054927, iteration: 297061
loss: 0.9713549017906189,grad_norm: 0.9729228056310224, iteration: 297062
loss: 1.0021336078643799,grad_norm: 0.9836242652724874, iteration: 297063
loss: 0.9862498641014099,grad_norm: 0.8146172541356782, iteration: 297064
loss: 0.983846127986908,grad_norm: 0.9692360240449004, iteration: 297065
loss: 1.0412832498550415,grad_norm: 0.999999094386018, iteration: 297066
loss: 0.9954741597175598,grad_norm: 0.7668442250818851, iteration: 297067
loss: 0.9897769093513489,grad_norm: 0.9999991961190395, iteration: 297068
loss: 1.0182291269302368,grad_norm: 0.905630552441752, iteration: 297069
loss: 1.0212241411209106,grad_norm: 0.9888221074259889, iteration: 297070
loss: 0.9932085275650024,grad_norm: 0.7809526915323218, iteration: 297071
loss: 0.974016010761261,grad_norm: 0.9999992078137966, iteration: 297072
loss: 1.0297372341156006,grad_norm: 0.9886061244838407, iteration: 297073
loss: 0.979804277420044,grad_norm: 0.8727018429864308, iteration: 297074
loss: 0.9961318373680115,grad_norm: 0.9172409681264425, iteration: 297075
loss: 0.9762969017028809,grad_norm: 0.7330376412272769, iteration: 297076
loss: 1.0369813442230225,grad_norm: 0.9999991313139331, iteration: 297077
loss: 0.9740267992019653,grad_norm: 0.7765850435042005, iteration: 297078
loss: 1.0042834281921387,grad_norm: 0.7693644619153831, iteration: 297079
loss: 0.975094199180603,grad_norm: 0.6927900919283448, iteration: 297080
loss: 1.0361297130584717,grad_norm: 0.840864740174203, iteration: 297081
loss: 0.9997685551643372,grad_norm: 0.962656806397735, iteration: 297082
loss: 1.0103116035461426,grad_norm: 0.7782965802144995, iteration: 297083
loss: 0.9778427481651306,grad_norm: 0.9667299649298078, iteration: 297084
loss: 1.0520459413528442,grad_norm: 0.9999991739715297, iteration: 297085
loss: 0.9712361097335815,grad_norm: 0.9999995237762455, iteration: 297086
loss: 0.967711329460144,grad_norm: 0.8576895067163387, iteration: 297087
loss: 0.9945005774497986,grad_norm: 0.7662496113720565, iteration: 297088
loss: 1.0081162452697754,grad_norm: 0.8233726995408105, iteration: 297089
loss: 0.9890718460083008,grad_norm: 0.8422195709343444, iteration: 297090
loss: 0.9939600825309753,grad_norm: 0.9295667640035842, iteration: 297091
loss: 1.0207854509353638,grad_norm: 0.9452346430553312, iteration: 297092
loss: 0.9942628741264343,grad_norm: 0.9146769239556122, iteration: 297093
loss: 1.0159847736358643,grad_norm: 0.9999991565222914, iteration: 297094
loss: 0.9965028166770935,grad_norm: 0.9999990615129293, iteration: 297095
loss: 0.9635506868362427,grad_norm: 0.9108402032382406, iteration: 297096
loss: 0.9423748850822449,grad_norm: 0.8524278421349305, iteration: 297097
loss: 0.9669147729873657,grad_norm: 0.8426921480726012, iteration: 297098
loss: 0.9684497117996216,grad_norm: 0.8799979274697711, iteration: 297099
loss: 1.002428650856018,grad_norm: 0.8486303108905142, iteration: 297100
loss: 0.9749908447265625,grad_norm: 0.9336756593495292, iteration: 297101
loss: 0.9798892736434937,grad_norm: 0.8645008395656003, iteration: 297102
loss: 1.0339820384979248,grad_norm: 0.9395695337213168, iteration: 297103
loss: 0.9517386555671692,grad_norm: 0.794530210045169, iteration: 297104
loss: 1.0047681331634521,grad_norm: 0.8023630475990936, iteration: 297105
loss: 0.993051290512085,grad_norm: 0.7100175682110049, iteration: 297106
loss: 0.9972646832466125,grad_norm: 0.7911367712522612, iteration: 297107
loss: 1.0485820770263672,grad_norm: 0.9999998106768272, iteration: 297108
loss: 1.0085995197296143,grad_norm: 0.9999993424472529, iteration: 297109
loss: 0.9863554835319519,grad_norm: 0.9951341536156704, iteration: 297110
loss: 1.001782774925232,grad_norm: 0.9381611875157755, iteration: 297111
loss: 1.022821307182312,grad_norm: 0.9999992064540202, iteration: 297112
loss: 0.984307587146759,grad_norm: 0.6931813536410718, iteration: 297113
loss: 0.9976491332054138,grad_norm: 0.9217742353381847, iteration: 297114
loss: 0.98465895652771,grad_norm: 0.8986990754349031, iteration: 297115
loss: 1.0190249681472778,grad_norm: 0.8754201019556577, iteration: 297116
loss: 1.0907187461853027,grad_norm: 0.8715944532126453, iteration: 297117
loss: 0.9986045360565186,grad_norm: 0.9280434767846719, iteration: 297118
loss: 1.0076221227645874,grad_norm: 0.9029752300777172, iteration: 297119
loss: 1.0183604955673218,grad_norm: 0.8672576796718791, iteration: 297120
loss: 0.9903935194015503,grad_norm: 0.768937285573098, iteration: 297121
loss: 0.9992867708206177,grad_norm: 0.9699645634762053, iteration: 297122
loss: 1.0596649646759033,grad_norm: 0.9225137671086816, iteration: 297123
loss: 1.0055564641952515,grad_norm: 0.7317705430771269, iteration: 297124
loss: 1.0315768718719482,grad_norm: 0.9999999103783818, iteration: 297125
loss: 0.9732396006584167,grad_norm: 0.8608546410360204, iteration: 297126
loss: 0.9821759462356567,grad_norm: 0.9774294765658444, iteration: 297127
loss: 0.9917230010032654,grad_norm: 0.921713868865283, iteration: 297128
loss: 0.9982516765594482,grad_norm: 0.7659738194968688, iteration: 297129
loss: 1.0016593933105469,grad_norm: 0.7917590536025421, iteration: 297130
loss: 0.9993820190429688,grad_norm: 0.7834521166529546, iteration: 297131
loss: 0.9680891633033752,grad_norm: 0.7490180201921252, iteration: 297132
loss: 1.0153136253356934,grad_norm: 0.9279008970759316, iteration: 297133
loss: 1.0216963291168213,grad_norm: 0.8906438894584283, iteration: 297134
loss: 0.968704104423523,grad_norm: 0.8282192860928771, iteration: 297135
loss: 1.0172195434570312,grad_norm: 0.999999130914908, iteration: 297136
loss: 1.0198462009429932,grad_norm: 0.9999992763102674, iteration: 297137
loss: 1.0101009607315063,grad_norm: 0.7531263874577999, iteration: 297138
loss: 0.9694558382034302,grad_norm: 0.8815142886085162, iteration: 297139
loss: 0.989579975605011,grad_norm: 0.8681373633186137, iteration: 297140
loss: 0.9829457402229309,grad_norm: 0.8499963914890935, iteration: 297141
loss: 1.0061051845550537,grad_norm: 0.9711670725455889, iteration: 297142
loss: 0.9603034257888794,grad_norm: 0.8901351696157035, iteration: 297143
loss: 0.9956565499305725,grad_norm: 0.9298600454249172, iteration: 297144
loss: 1.0150361061096191,grad_norm: 0.9718469810091809, iteration: 297145
loss: 0.9668899178504944,grad_norm: 0.7839421829985742, iteration: 297146
loss: 1.0242420434951782,grad_norm: 0.7500932467935258, iteration: 297147
loss: 1.069488525390625,grad_norm: 0.850838236912875, iteration: 297148
loss: 0.9721064567565918,grad_norm: 0.9652334888384122, iteration: 297149
loss: 1.0001440048217773,grad_norm: 0.7605307477840126, iteration: 297150
loss: 1.0133264064788818,grad_norm: 0.7234618205546203, iteration: 297151
loss: 0.9791463613510132,grad_norm: 0.8268758269974533, iteration: 297152
loss: 1.0184457302093506,grad_norm: 0.999999934375851, iteration: 297153
loss: 1.0146030187606812,grad_norm: 0.9137905684336134, iteration: 297154
loss: 0.9419772624969482,grad_norm: 0.9145321271421781, iteration: 297155
loss: 1.0212862491607666,grad_norm: 0.984034030644669, iteration: 297156
loss: 1.0345278978347778,grad_norm: 0.7846039779892765, iteration: 297157
loss: 1.0011060237884521,grad_norm: 0.9999991699148414, iteration: 297158
loss: 1.0015531778335571,grad_norm: 0.8305318766667694, iteration: 297159
loss: 1.0241129398345947,grad_norm: 0.9481545709426612, iteration: 297160
loss: 1.0300010442733765,grad_norm: 0.7914089533337695, iteration: 297161
loss: 0.9671077132225037,grad_norm: 0.9999990294464455, iteration: 297162
loss: 1.2452846765518188,grad_norm: 0.9999996210996449, iteration: 297163
loss: 0.9752375483512878,grad_norm: 0.8296181643823435, iteration: 297164
loss: 1.039392113685608,grad_norm: 0.9999995581560177, iteration: 297165
loss: 1.014394998550415,grad_norm: 0.9097318386808978, iteration: 297166
loss: 0.9748981595039368,grad_norm: 0.8418513826323799, iteration: 297167
loss: 1.0295518636703491,grad_norm: 0.7402497811328248, iteration: 297168
loss: 0.9831591844558716,grad_norm: 0.9306063335581913, iteration: 297169
loss: 0.9915463328361511,grad_norm: 0.7813467810182477, iteration: 297170
loss: 1.000087857246399,grad_norm: 0.8855726194376385, iteration: 297171
loss: 0.9924971461296082,grad_norm: 0.8915536318216458, iteration: 297172
loss: 1.0692088603973389,grad_norm: 0.9070133886904772, iteration: 297173
loss: 0.9681174755096436,grad_norm: 0.7488210208962349, iteration: 297174
loss: 1.0021100044250488,grad_norm: 0.8460970286363432, iteration: 297175
loss: 0.9921755790710449,grad_norm: 0.8941481914769965, iteration: 297176
loss: 0.9846283793449402,grad_norm: 0.8848356184846317, iteration: 297177
loss: 1.015618920326233,grad_norm: 0.8431958272599196, iteration: 297178
loss: 1.0150786638259888,grad_norm: 0.9984259288131526, iteration: 297179
loss: 1.0106689929962158,grad_norm: 0.9063028930347243, iteration: 297180
loss: 1.0224125385284424,grad_norm: 0.9999991753274053, iteration: 297181
loss: 0.9725301861763,grad_norm: 0.7923939241755631, iteration: 297182
loss: 1.0398027896881104,grad_norm: 0.9999995673665002, iteration: 297183
loss: 1.0138086080551147,grad_norm: 0.9999990993613809, iteration: 297184
loss: 0.9942561388015747,grad_norm: 0.7988740953370305, iteration: 297185
loss: 1.009819746017456,grad_norm: 0.8366703995985454, iteration: 297186
loss: 0.9912187457084656,grad_norm: 0.8211047742059487, iteration: 297187
loss: 1.0008174180984497,grad_norm: 0.7889525109261419, iteration: 297188
loss: 0.9708980321884155,grad_norm: 0.8525045295274595, iteration: 297189
loss: 1.0126219987869263,grad_norm: 0.7607978847164945, iteration: 297190
loss: 1.0136759281158447,grad_norm: 0.9147602484606996, iteration: 297191
loss: 1.023292899131775,grad_norm: 0.9569685872818849, iteration: 297192
loss: 1.0148521661758423,grad_norm: 0.8915056440670793, iteration: 297193
loss: 0.9665862321853638,grad_norm: 0.6974850545830835, iteration: 297194
loss: 1.0094923973083496,grad_norm: 0.9255420431215533, iteration: 297195
loss: 0.9741358757019043,grad_norm: 0.8499941106434825, iteration: 297196
loss: 1.0006400346755981,grad_norm: 0.7081797311168339, iteration: 297197
loss: 0.954614520072937,grad_norm: 0.8361411943436553, iteration: 297198
loss: 1.0054131746292114,grad_norm: 0.8955903468836915, iteration: 297199
loss: 1.0309516191482544,grad_norm: 0.921845927821141, iteration: 297200
loss: 1.011378526687622,grad_norm: 0.7615831790557097, iteration: 297201
loss: 1.000099778175354,grad_norm: 0.8618194131691183, iteration: 297202
loss: 1.0335479974746704,grad_norm: 0.9999990387637108, iteration: 297203
loss: 0.9542953968048096,grad_norm: 0.8941274053073843, iteration: 297204
loss: 0.9798012375831604,grad_norm: 0.8684225687117576, iteration: 297205
loss: 1.0020270347595215,grad_norm: 0.8174067847967027, iteration: 297206
loss: 1.0562925338745117,grad_norm: 0.9113703143796166, iteration: 297207
loss: 1.056745171546936,grad_norm: 0.9999997371966322, iteration: 297208
loss: 0.9966739416122437,grad_norm: 0.9024373618690806, iteration: 297209
loss: 1.0071218013763428,grad_norm: 0.7720004989400221, iteration: 297210
loss: 0.9961495995521545,grad_norm: 0.9254904781172252, iteration: 297211
loss: 0.9805974364280701,grad_norm: 0.7897249037704984, iteration: 297212
loss: 1.0164984464645386,grad_norm: 0.6603368336916547, iteration: 297213
loss: 0.99015873670578,grad_norm: 0.7979949691964918, iteration: 297214
loss: 0.9730821251869202,grad_norm: 0.8559820122134246, iteration: 297215
loss: 1.0219552516937256,grad_norm: 0.7931021773818103, iteration: 297216
loss: 0.9728840589523315,grad_norm: 0.8207423933076159, iteration: 297217
loss: 0.9646785259246826,grad_norm: 0.9469889433932246, iteration: 297218
loss: 1.0086697340011597,grad_norm: 0.9999990783354392, iteration: 297219
loss: 1.043577790260315,grad_norm: 0.8842672605055509, iteration: 297220
loss: 1.0659759044647217,grad_norm: 0.9999997402083259, iteration: 297221
loss: 1.0203801393508911,grad_norm: 0.9999991389817983, iteration: 297222
loss: 0.9916606545448303,grad_norm: 0.8819510505560668, iteration: 297223
loss: 1.096620798110962,grad_norm: 0.8117562794414167, iteration: 297224
loss: 1.037023663520813,grad_norm: 0.867107364619464, iteration: 297225
loss: 0.9918982982635498,grad_norm: 0.9486503081187647, iteration: 297226
loss: 1.0467314720153809,grad_norm: 0.9999992528368994, iteration: 297227
loss: 1.0256857872009277,grad_norm: 0.8034957297838414, iteration: 297228
loss: 1.0260947942733765,grad_norm: 0.9843999898470999, iteration: 297229
loss: 0.9661664962768555,grad_norm: 0.7155305867504802, iteration: 297230
loss: 0.9685046672821045,grad_norm: 0.9233472055399459, iteration: 297231
loss: 1.0969008207321167,grad_norm: 0.9999993336078852, iteration: 297232
loss: 0.9747709631919861,grad_norm: 0.7831930958917435, iteration: 297233
loss: 1.005642056465149,grad_norm: 0.8398396995176014, iteration: 297234
loss: 0.9968916773796082,grad_norm: 0.793200761987154, iteration: 297235
loss: 1.0257246494293213,grad_norm: 0.8092356094759926, iteration: 297236
loss: 0.9767879247665405,grad_norm: 0.9497632835229931, iteration: 297237
loss: 0.9796541333198547,grad_norm: 0.9528759202627498, iteration: 297238
loss: 0.9456990957260132,grad_norm: 0.987915487382927, iteration: 297239
loss: 0.9930216670036316,grad_norm: 0.7063644150306114, iteration: 297240
loss: 0.9833479523658752,grad_norm: 0.7381818987479513, iteration: 297241
loss: 1.0212233066558838,grad_norm: 0.8765449757658905, iteration: 297242
loss: 1.04714834690094,grad_norm: 0.9731898688326731, iteration: 297243
loss: 0.9969274401664734,grad_norm: 0.8394068973884735, iteration: 297244
loss: 1.1301599740982056,grad_norm: 0.9999995769565334, iteration: 297245
loss: 0.9623689651489258,grad_norm: 0.8805937871354598, iteration: 297246
loss: 1.0121893882751465,grad_norm: 0.9999991951349974, iteration: 297247
loss: 0.9823716878890991,grad_norm: 0.780255813195478, iteration: 297248
loss: 1.0019557476043701,grad_norm: 0.781287847921109, iteration: 297249
loss: 1.01100492477417,grad_norm: 0.8628276502182879, iteration: 297250
loss: 1.0088568925857544,grad_norm: 0.8061570472261208, iteration: 297251
loss: 0.9708015322685242,grad_norm: 0.999999697614994, iteration: 297252
loss: 0.9592326879501343,grad_norm: 0.8017414766701554, iteration: 297253
loss: 0.9437938928604126,grad_norm: 0.8379838329875413, iteration: 297254
loss: 1.012911319732666,grad_norm: 0.8830424586386801, iteration: 297255
loss: 0.9919525384902954,grad_norm: 0.9999992434372549, iteration: 297256
loss: 1.0475068092346191,grad_norm: 0.8677469084540227, iteration: 297257
loss: 1.0111110210418701,grad_norm: 0.9999996077976022, iteration: 297258
loss: 0.974841296672821,grad_norm: 0.8076193829371601, iteration: 297259
loss: 0.972040593624115,grad_norm: 0.790329435750018, iteration: 297260
loss: 1.0090540647506714,grad_norm: 0.7928900364566697, iteration: 297261
loss: 1.020199179649353,grad_norm: 0.8480601486698097, iteration: 297262
loss: 1.0018665790557861,grad_norm: 0.8402572426894653, iteration: 297263
loss: 0.9960957169532776,grad_norm: 0.9482689843609873, iteration: 297264
loss: 1.0385864973068237,grad_norm: 0.835293816821883, iteration: 297265
loss: 0.9795727729797363,grad_norm: 0.9128151462729159, iteration: 297266
loss: 0.9933776259422302,grad_norm: 0.7527836619116302, iteration: 297267
loss: 1.041117548942566,grad_norm: 0.9999990486296243, iteration: 297268
loss: 1.0148260593414307,grad_norm: 0.9999991944999143, iteration: 297269
loss: 0.996988832950592,grad_norm: 0.7894847535033768, iteration: 297270
loss: 1.0014749765396118,grad_norm: 0.793217564347802, iteration: 297271
loss: 0.9767652750015259,grad_norm: 0.8518029131668658, iteration: 297272
loss: 0.998015820980072,grad_norm: 0.9753232041899171, iteration: 297273
loss: 1.046525001525879,grad_norm: 0.8048924571234773, iteration: 297274
loss: 0.9601157307624817,grad_norm: 0.8669143226438287, iteration: 297275
loss: 0.9748542308807373,grad_norm: 0.9999992366892011, iteration: 297276
loss: 1.0182684659957886,grad_norm: 0.7546903951315678, iteration: 297277
loss: 0.9848670959472656,grad_norm: 0.7679865078670273, iteration: 297278
loss: 1.0427905321121216,grad_norm: 0.9999991083951175, iteration: 297279
loss: 1.018033742904663,grad_norm: 0.8602934762785397, iteration: 297280
loss: 1.064234972000122,grad_norm: 0.9999994270578055, iteration: 297281
loss: 0.9773809909820557,grad_norm: 0.9466044325569302, iteration: 297282
loss: 1.0375943183898926,grad_norm: 0.7111016554212638, iteration: 297283
loss: 0.9742236137390137,grad_norm: 0.765917589298577, iteration: 297284
loss: 0.9941771030426025,grad_norm: 0.9999994210064451, iteration: 297285
loss: 0.9964417815208435,grad_norm: 0.7477010227965849, iteration: 297286
loss: 0.9570140242576599,grad_norm: 0.9005948667464331, iteration: 297287
loss: 0.9863700270652771,grad_norm: 0.9212894210437867, iteration: 297288
loss: 0.9769614934921265,grad_norm: 0.8678138718120506, iteration: 297289
loss: 0.9651725888252258,grad_norm: 0.8360635686296894, iteration: 297290
loss: 1.0176554918289185,grad_norm: 0.8553128777244298, iteration: 297291
loss: 0.9646754264831543,grad_norm: 0.9665341893349558, iteration: 297292
loss: 0.9720566868782043,grad_norm: 0.9641519568621477, iteration: 297293
loss: 1.0087535381317139,grad_norm: 0.805986986501244, iteration: 297294
loss: 0.964412271976471,grad_norm: 0.8212851686137423, iteration: 297295
loss: 1.0379582643508911,grad_norm: 0.7489136976383022, iteration: 297296
loss: 0.9761536121368408,grad_norm: 0.7957304365003854, iteration: 297297
loss: 1.027786135673523,grad_norm: 0.9948304980710271, iteration: 297298
loss: 0.9877466559410095,grad_norm: 0.9999997100900246, iteration: 297299
loss: 1.0178064107894897,grad_norm: 0.8264662941922628, iteration: 297300
loss: 1.0397999286651611,grad_norm: 0.8598466692105711, iteration: 297301
loss: 0.9758964776992798,grad_norm: 0.6977510070507401, iteration: 297302
loss: 1.0280872583389282,grad_norm: 0.9445434361424491, iteration: 297303
loss: 0.9887790083885193,grad_norm: 0.8479648289605689, iteration: 297304
loss: 0.9919068217277527,grad_norm: 0.999999073931768, iteration: 297305
loss: 0.9679670333862305,grad_norm: 0.9999991664964084, iteration: 297306
loss: 0.9919758439064026,grad_norm: 0.8964016957665162, iteration: 297307
loss: 1.108691692352295,grad_norm: 0.9273570084961935, iteration: 297308
loss: 0.9687398672103882,grad_norm: 0.9597357402320826, iteration: 297309
loss: 0.9862577319145203,grad_norm: 0.7929118901759867, iteration: 297310
loss: 0.9948256015777588,grad_norm: 0.8850296331763531, iteration: 297311
loss: 1.0012496709823608,grad_norm: 0.784394286112962, iteration: 297312
loss: 0.9920042157173157,grad_norm: 0.9268168026895822, iteration: 297313
loss: 1.0089478492736816,grad_norm: 0.8162481899199966, iteration: 297314
loss: 0.9752951264381409,grad_norm: 0.9900573131075188, iteration: 297315
loss: 0.9939866662025452,grad_norm: 0.8518127639689265, iteration: 297316
loss: 0.9910807609558105,grad_norm: 0.7856924694044362, iteration: 297317
loss: 1.0494252443313599,grad_norm: 0.9663913052458746, iteration: 297318
loss: 0.9931966066360474,grad_norm: 0.8759483813812933, iteration: 297319
loss: 0.983396589756012,grad_norm: 0.8554854980762363, iteration: 297320
loss: 0.9641187191009521,grad_norm: 0.8754689944534444, iteration: 297321
loss: 0.9961709976196289,grad_norm: 0.9119648936464855, iteration: 297322
loss: 0.9787740111351013,grad_norm: 0.8769777972055659, iteration: 297323
loss: 0.9476176500320435,grad_norm: 0.8005601942050362, iteration: 297324
loss: 0.9696613550186157,grad_norm: 0.962283916215954, iteration: 297325
loss: 0.9674275517463684,grad_norm: 0.7776301900304672, iteration: 297326
loss: 1.03891921043396,grad_norm: 0.9999997118556021, iteration: 297327
loss: 0.9874473810195923,grad_norm: 0.7399614712337604, iteration: 297328
loss: 1.0624130964279175,grad_norm: 0.999999002753415, iteration: 297329
loss: 1.0819129943847656,grad_norm: 0.9999990937372024, iteration: 297330
loss: 0.9978479743003845,grad_norm: 0.7881307111627835, iteration: 297331
loss: 0.9881234765052795,grad_norm: 0.8011886541063294, iteration: 297332
loss: 1.0155766010284424,grad_norm: 0.8413900335225903, iteration: 297333
loss: 0.9975212216377258,grad_norm: 0.9999991459647509, iteration: 297334
loss: 1.0562231540679932,grad_norm: 0.9999994238210689, iteration: 297335
loss: 0.9837886095046997,grad_norm: 0.8280092144465473, iteration: 297336
loss: 1.0053786039352417,grad_norm: 0.9090943506476635, iteration: 297337
loss: 1.0251338481903076,grad_norm: 0.7574349083080033, iteration: 297338
loss: 1.006791353225708,grad_norm: 0.9051237302966918, iteration: 297339
loss: 1.1289880275726318,grad_norm: 0.9999995892015362, iteration: 297340
loss: 0.9593905210494995,grad_norm: 0.7920681213755851, iteration: 297341
loss: 0.9857738018035889,grad_norm: 0.9999995390261263, iteration: 297342
loss: 1.0204960107803345,grad_norm: 0.9999993634084543, iteration: 297343
loss: 0.9928576350212097,grad_norm: 0.8679274756691562, iteration: 297344
loss: 1.0316002368927002,grad_norm: 0.8622293356553616, iteration: 297345
loss: 0.9920165538787842,grad_norm: 0.8412751033892149, iteration: 297346
loss: 1.0040191411972046,grad_norm: 0.9214858063413304, iteration: 297347
loss: 0.9964041709899902,grad_norm: 0.8579938686254703, iteration: 297348
loss: 1.017124056816101,grad_norm: 0.8339609586690447, iteration: 297349
loss: 1.0047129392623901,grad_norm: 0.7554465222250814, iteration: 297350
loss: 1.1526883840560913,grad_norm: 0.9999991433409564, iteration: 297351
loss: 1.0337934494018555,grad_norm: 0.9999990873606174, iteration: 297352
loss: 0.9849237203598022,grad_norm: 0.8962369562432089, iteration: 297353
loss: 1.015784740447998,grad_norm: 0.8181742374854752, iteration: 297354
loss: 0.9883691668510437,grad_norm: 0.7474576410228264, iteration: 297355
loss: 0.9905948638916016,grad_norm: 0.7867062295262656, iteration: 297356
loss: 1.0014373064041138,grad_norm: 0.9210862674765837, iteration: 297357
loss: 1.0148215293884277,grad_norm: 0.8228656153552023, iteration: 297358
loss: 0.993626058101654,grad_norm: 0.9999991079652636, iteration: 297359
loss: 0.9931536316871643,grad_norm: 0.9435662776187064, iteration: 297360
loss: 1.018561840057373,grad_norm: 0.8940293482941931, iteration: 297361
loss: 0.9910711646080017,grad_norm: 0.9572403151437959, iteration: 297362
loss: 1.010051965713501,grad_norm: 0.6984357232694886, iteration: 297363
loss: 0.9650377035140991,grad_norm: 0.8662731623783532, iteration: 297364
loss: 1.0224008560180664,grad_norm: 0.8670705461044624, iteration: 297365
loss: 1.0031033754348755,grad_norm: 0.9932119301145869, iteration: 297366
loss: 0.95566326379776,grad_norm: 0.9114505305381996, iteration: 297367
loss: 0.9428511261940002,grad_norm: 0.904485893570867, iteration: 297368
loss: 1.0095763206481934,grad_norm: 0.8140531156352145, iteration: 297369
loss: 1.0198509693145752,grad_norm: 0.8261409761664735, iteration: 297370
loss: 0.992584764957428,grad_norm: 0.7366350928508789, iteration: 297371
loss: 0.9975731372833252,grad_norm: 0.8367186487080608, iteration: 297372
loss: 1.0362664461135864,grad_norm: 0.9999991876675873, iteration: 297373
loss: 0.9930752515792847,grad_norm: 0.9284288663701828, iteration: 297374
loss: 1.0093719959259033,grad_norm: 0.9153181716534353, iteration: 297375
loss: 1.0092583894729614,grad_norm: 0.9353682913803169, iteration: 297376
loss: 1.014088749885559,grad_norm: 0.7171767113238735, iteration: 297377
loss: 0.984967052936554,grad_norm: 0.867440030823728, iteration: 297378
loss: 0.9764057993888855,grad_norm: 0.9999991099009207, iteration: 297379
loss: 0.9843834042549133,grad_norm: 0.6609143781426973, iteration: 297380
loss: 0.9827102422714233,grad_norm: 0.7872070596379785, iteration: 297381
loss: 0.984117865562439,grad_norm: 0.7877804513936483, iteration: 297382
loss: 1.0089844465255737,grad_norm: 0.7869926275143128, iteration: 297383
loss: 1.0088382959365845,grad_norm: 0.8354357315127285, iteration: 297384
loss: 1.0088653564453125,grad_norm: 0.9591249086661855, iteration: 297385
loss: 0.9995980262756348,grad_norm: 0.8668900967659572, iteration: 297386
loss: 1.0563596487045288,grad_norm: 0.7064921816039105, iteration: 297387
loss: 0.9827900528907776,grad_norm: 0.872481954011817, iteration: 297388
loss: 0.9937511682510376,grad_norm: 0.9608806585402785, iteration: 297389
loss: 0.9940783977508545,grad_norm: 0.8629539214415114, iteration: 297390
loss: 1.008819580078125,grad_norm: 0.9166716431466099, iteration: 297391
loss: 0.9749747514724731,grad_norm: 0.7616384098045983, iteration: 297392
loss: 1.009849190711975,grad_norm: 0.8231398085324154, iteration: 297393
loss: 1.0027493238449097,grad_norm: 0.8851097486528944, iteration: 297394
loss: 1.044119119644165,grad_norm: 0.8122983689059657, iteration: 297395
loss: 1.0145765542984009,grad_norm: 0.9429882522300985, iteration: 297396
loss: 1.02003812789917,grad_norm: 0.7947540069581666, iteration: 297397
loss: 0.9835672378540039,grad_norm: 0.9115107642872711, iteration: 297398
loss: 1.0262782573699951,grad_norm: 0.8882771875913901, iteration: 297399
loss: 0.9816638231277466,grad_norm: 0.796252831818204, iteration: 297400
loss: 0.9927013516426086,grad_norm: 0.9999992024243534, iteration: 297401
loss: 0.995954692363739,grad_norm: 0.8689562265426313, iteration: 297402
loss: 0.9656563401222229,grad_norm: 0.9230884682934072, iteration: 297403
loss: 0.9669830203056335,grad_norm: 0.9252218661144378, iteration: 297404
loss: 1.000637412071228,grad_norm: 0.9684924088840746, iteration: 297405
loss: 1.0285965204238892,grad_norm: 0.9961777308681331, iteration: 297406
loss: 0.9704651236534119,grad_norm: 0.8482586221549182, iteration: 297407
loss: 0.9943366646766663,grad_norm: 0.7340219919626854, iteration: 297408
loss: 1.0177500247955322,grad_norm: 0.8731299587973135, iteration: 297409
loss: 0.9913008213043213,grad_norm: 0.7863465099966023, iteration: 297410
loss: 0.9860290288925171,grad_norm: 0.8903273904790835, iteration: 297411
loss: 0.9994991421699524,grad_norm: 0.7066076813959891, iteration: 297412
loss: 1.0054279565811157,grad_norm: 0.864797464338546, iteration: 297413
loss: 0.9638416767120361,grad_norm: 0.6952320496489188, iteration: 297414
loss: 1.0689904689788818,grad_norm: 0.7897088647437761, iteration: 297415
loss: 0.9802190661430359,grad_norm: 0.7707264197187385, iteration: 297416
loss: 0.9972069263458252,grad_norm: 0.9880803361323675, iteration: 297417
loss: 0.9899466037750244,grad_norm: 0.9040659855763518, iteration: 297418
loss: 1.0016939640045166,grad_norm: 0.9758385557098426, iteration: 297419
loss: 1.0087615251541138,grad_norm: 0.9136209099320078, iteration: 297420
loss: 0.9992823600769043,grad_norm: 0.8285532742057247, iteration: 297421
loss: 1.0297105312347412,grad_norm: 0.8670919459307589, iteration: 297422
loss: 1.0167609453201294,grad_norm: 0.8338044071034828, iteration: 297423
loss: 0.9889014363288879,grad_norm: 0.8366070393801073, iteration: 297424
loss: 0.9852961897850037,grad_norm: 0.9306745924028244, iteration: 297425
loss: 0.9973512291908264,grad_norm: 0.7656655241646234, iteration: 297426
loss: 0.9796056151390076,grad_norm: 0.9999989919868306, iteration: 297427
loss: 1.0267887115478516,grad_norm: 0.9429241854035504, iteration: 297428
loss: 1.0104200839996338,grad_norm: 0.7792028607738201, iteration: 297429
loss: 1.003004550933838,grad_norm: 0.8601906571912841, iteration: 297430
loss: 0.9509429335594177,grad_norm: 0.8432105193362736, iteration: 297431
loss: 0.9955923557281494,grad_norm: 0.849357492689686, iteration: 297432
loss: 0.9750884771347046,grad_norm: 0.838555149691898, iteration: 297433
loss: 0.9768307209014893,grad_norm: 0.7138640247982776, iteration: 297434
loss: 1.0044209957122803,grad_norm: 0.907473634046468, iteration: 297435
loss: 0.987246036529541,grad_norm: 0.9000712273377048, iteration: 297436
loss: 1.0016916990280151,grad_norm: 0.7183816554996816, iteration: 297437
loss: 1.0260708332061768,grad_norm: 0.8309083733210444, iteration: 297438
loss: 0.9919275641441345,grad_norm: 0.9999990359936934, iteration: 297439
loss: 0.9833484888076782,grad_norm: 0.858465287213311, iteration: 297440
loss: 0.9846931099891663,grad_norm: 0.7088874973976826, iteration: 297441
loss: 0.9865013957023621,grad_norm: 0.7869221550513029, iteration: 297442
loss: 1.0294002294540405,grad_norm: 0.8185489456709356, iteration: 297443
loss: 1.0078134536743164,grad_norm: 0.8952317444678538, iteration: 297444
loss: 1.0144965648651123,grad_norm: 0.9256293119486562, iteration: 297445
loss: 1.0567177534103394,grad_norm: 0.9999997027571255, iteration: 297446
loss: 0.9607170820236206,grad_norm: 0.8285079030343971, iteration: 297447
loss: 0.9626200795173645,grad_norm: 0.7423917439794511, iteration: 297448
loss: 1.0540975332260132,grad_norm: 0.8360017363981433, iteration: 297449
loss: 0.9701257944107056,grad_norm: 0.9286400054931337, iteration: 297450
loss: 0.9865782260894775,grad_norm: 0.6832468382361232, iteration: 297451
loss: 0.9765738844871521,grad_norm: 0.840929380729241, iteration: 297452
loss: 0.9875890612602234,grad_norm: 0.8715734702077256, iteration: 297453
loss: 0.9909972548484802,grad_norm: 0.8016146737097338, iteration: 297454
loss: 1.0354646444320679,grad_norm: 0.8848898927225305, iteration: 297455
loss: 0.9863318800926208,grad_norm: 0.8532953782491747, iteration: 297456
loss: 1.0226306915283203,grad_norm: 0.7905667332217068, iteration: 297457
loss: 0.9999239444732666,grad_norm: 0.8171065593838549, iteration: 297458
loss: 1.0221303701400757,grad_norm: 0.8509496232594247, iteration: 297459
loss: 0.9912989139556885,grad_norm: 0.8798576975281052, iteration: 297460
loss: 1.0080358982086182,grad_norm: 0.8923725615637839, iteration: 297461
loss: 0.9866092801094055,grad_norm: 0.7886783427926174, iteration: 297462
loss: 1.0144412517547607,grad_norm: 0.9999991041192106, iteration: 297463
loss: 1.0082523822784424,grad_norm: 0.796475671131137, iteration: 297464
loss: 1.0099859237670898,grad_norm: 0.8320949014471997, iteration: 297465
loss: 1.0492162704467773,grad_norm: 0.9999997770746331, iteration: 297466
loss: 0.9785067439079285,grad_norm: 0.6989282263684441, iteration: 297467
loss: 0.9736078381538391,grad_norm: 0.91314038564321, iteration: 297468
loss: 0.9698166847229004,grad_norm: 0.7941957011843713, iteration: 297469
loss: 0.9983530044555664,grad_norm: 0.9999997387100459, iteration: 297470
loss: 0.9993659257888794,grad_norm: 0.7653640449820731, iteration: 297471
loss: 0.98732590675354,grad_norm: 0.6953203481143877, iteration: 297472
loss: 1.010270595550537,grad_norm: 0.7908748920227906, iteration: 297473
loss: 0.9893935322761536,grad_norm: 0.8035998429068152, iteration: 297474
loss: 1.0204415321350098,grad_norm: 0.8478080085256414, iteration: 297475
loss: 1.0102431774139404,grad_norm: 0.8419256695126753, iteration: 297476
loss: 1.0179531574249268,grad_norm: 0.8325179586531116, iteration: 297477
loss: 1.030984878540039,grad_norm: 0.8077700613656337, iteration: 297478
loss: 0.9866452813148499,grad_norm: 0.8714584999682617, iteration: 297479
loss: 1.0448660850524902,grad_norm: 0.9416447986767726, iteration: 297480
loss: 0.9951805472373962,grad_norm: 0.9999991475204176, iteration: 297481
loss: 0.9681487679481506,grad_norm: 0.7750667114176533, iteration: 297482
loss: 0.955414891242981,grad_norm: 0.8112295054535112, iteration: 297483
loss: 1.0129599571228027,grad_norm: 0.8442573413876963, iteration: 297484
loss: 0.9987677335739136,grad_norm: 0.8106241531935658, iteration: 297485
loss: 0.9628791809082031,grad_norm: 0.8639486530717464, iteration: 297486
loss: 1.001449704170227,grad_norm: 0.999999344656457, iteration: 297487
loss: 0.9750558733940125,grad_norm: 0.8766142866724114, iteration: 297488
loss: 0.994295060634613,grad_norm: 0.7929126260277567, iteration: 297489
loss: 1.0209908485412598,grad_norm: 0.8124554501778636, iteration: 297490
loss: 0.9863506555557251,grad_norm: 0.7137420337134976, iteration: 297491
loss: 0.9945263266563416,grad_norm: 0.8359905702475744, iteration: 297492
loss: 0.9845480918884277,grad_norm: 0.7696077332430846, iteration: 297493
loss: 0.9875584244728088,grad_norm: 0.7330879181624564, iteration: 297494
loss: 0.9988418221473694,grad_norm: 0.8738078254738552, iteration: 297495
loss: 1.007185459136963,grad_norm: 0.9999990932485295, iteration: 297496
loss: 0.9963135123252869,grad_norm: 0.8175742513895007, iteration: 297497
loss: 0.9960833787918091,grad_norm: 0.9999991962156328, iteration: 297498
loss: 1.046323299407959,grad_norm: 0.6898807900858733, iteration: 297499
loss: 1.0566225051879883,grad_norm: 0.8308148485803152, iteration: 297500
loss: 0.9960998892784119,grad_norm: 0.882404778073029, iteration: 297501
loss: 0.9749282002449036,grad_norm: 0.7454283778790926, iteration: 297502
loss: 1.0048577785491943,grad_norm: 0.908336836898511, iteration: 297503
loss: 1.0117568969726562,grad_norm: 0.9999992376721613, iteration: 297504
loss: 1.0083926916122437,grad_norm: 0.779482102689606, iteration: 297505
loss: 1.0398707389831543,grad_norm: 0.8142497979046823, iteration: 297506
loss: 1.0504834651947021,grad_norm: 0.9999990282126539, iteration: 297507
loss: 0.966664731502533,grad_norm: 0.8692828467563816, iteration: 297508
loss: 1.0079432725906372,grad_norm: 0.8771309223904847, iteration: 297509
loss: 1.0101569890975952,grad_norm: 0.7559212660838304, iteration: 297510
loss: 0.9830005764961243,grad_norm: 0.8778321519619284, iteration: 297511
loss: 1.0156326293945312,grad_norm: 0.9999991040706966, iteration: 297512
loss: 0.9632098078727722,grad_norm: 0.9177454895580932, iteration: 297513
loss: 1.0313202142715454,grad_norm: 0.9644907865101259, iteration: 297514
loss: 1.033013105392456,grad_norm: 0.8435156161388924, iteration: 297515
loss: 0.9722012281417847,grad_norm: 0.7179841726117913, iteration: 297516
loss: 1.015669822692871,grad_norm: 0.8409157675488973, iteration: 297517
loss: 0.9880939722061157,grad_norm: 0.8743278839079919, iteration: 297518
loss: 0.9860455989837646,grad_norm: 0.9386000322217806, iteration: 297519
loss: 1.0744514465332031,grad_norm: 0.9999992391296141, iteration: 297520
loss: 1.0326836109161377,grad_norm: 0.999999114216451, iteration: 297521
loss: 0.9997053742408752,grad_norm: 0.9062835166545113, iteration: 297522
loss: 1.0195717811584473,grad_norm: 0.7441012737704618, iteration: 297523
loss: 0.9915093779563904,grad_norm: 0.6975071803859585, iteration: 297524
loss: 0.9995986819267273,grad_norm: 0.762920692387979, iteration: 297525
loss: 1.0134309530258179,grad_norm: 0.999999116518397, iteration: 297526
loss: 1.018475890159607,grad_norm: 0.889048116704156, iteration: 297527
loss: 0.9739094972610474,grad_norm: 0.9800578022460938, iteration: 297528
loss: 1.0260915756225586,grad_norm: 0.829754244247299, iteration: 297529
loss: 1.0276652574539185,grad_norm: 0.9999998195570718, iteration: 297530
loss: 0.9602816700935364,grad_norm: 0.8569404567032304, iteration: 297531
loss: 1.0286213159561157,grad_norm: 0.8063694490601206, iteration: 297532
loss: 0.9557718634605408,grad_norm: 0.8657797367265611, iteration: 297533
loss: 1.0209465026855469,grad_norm: 0.8467539477294105, iteration: 297534
loss: 1.026725172996521,grad_norm: 0.9999995289360217, iteration: 297535
loss: 0.9699946641921997,grad_norm: 0.9999991613689468, iteration: 297536
loss: 1.0100364685058594,grad_norm: 0.8704993477964122, iteration: 297537
loss: 1.0201832056045532,grad_norm: 0.8467904682043409, iteration: 297538
loss: 0.9694176912307739,grad_norm: 0.9999992529267923, iteration: 297539
loss: 1.0084041357040405,grad_norm: 0.822650204913728, iteration: 297540
loss: 1.0209262371063232,grad_norm: 0.9602935745349582, iteration: 297541
loss: 0.9812147617340088,grad_norm: 0.849629466591048, iteration: 297542
loss: 1.0120543241500854,grad_norm: 0.8684215935355215, iteration: 297543
loss: 1.0228595733642578,grad_norm: 0.9357015234644285, iteration: 297544
loss: 1.001585841178894,grad_norm: 0.95781833017324, iteration: 297545
loss: 0.9473162293434143,grad_norm: 0.9803201859096321, iteration: 297546
loss: 1.0233515501022339,grad_norm: 0.9822237311076317, iteration: 297547
loss: 1.0286399126052856,grad_norm: 0.9021526121775738, iteration: 297548
loss: 1.0425926446914673,grad_norm: 0.8434432561510171, iteration: 297549
loss: 0.9427588582038879,grad_norm: 0.993093198336909, iteration: 297550
loss: 1.0271536111831665,grad_norm: 0.747611570101292, iteration: 297551
loss: 1.0291324853897095,grad_norm: 0.6981097809545934, iteration: 297552
loss: 1.029408574104309,grad_norm: 0.7871902886452089, iteration: 297553
loss: 1.0155096054077148,grad_norm: 0.7700648949333698, iteration: 297554
loss: 1.0313141345977783,grad_norm: 0.9962453301034426, iteration: 297555
loss: 0.9916175007820129,grad_norm: 0.9999996327293231, iteration: 297556
loss: 0.9817103743553162,grad_norm: 0.826155553327013, iteration: 297557
loss: 1.0037648677825928,grad_norm: 0.708707641196006, iteration: 297558
loss: 1.1079498529434204,grad_norm: 0.9999991312136917, iteration: 297559
loss: 0.9505396485328674,grad_norm: 0.8148492339096193, iteration: 297560
loss: 1.0212843418121338,grad_norm: 0.8550709099345498, iteration: 297561
loss: 0.9894740581512451,grad_norm: 0.7641746124106461, iteration: 297562
loss: 1.0075188875198364,grad_norm: 0.9999991839888455, iteration: 297563
loss: 1.0252329111099243,grad_norm: 0.999999821817784, iteration: 297564
loss: 0.9847800135612488,grad_norm: 0.8227803336441277, iteration: 297565
loss: 0.962865948677063,grad_norm: 0.7415024771548901, iteration: 297566
loss: 0.977367103099823,grad_norm: 0.7731649391429485, iteration: 297567
loss: 0.9836756587028503,grad_norm: 0.9084474706990843, iteration: 297568
loss: 0.9867455959320068,grad_norm: 0.8489346244815963, iteration: 297569
loss: 1.0200070142745972,grad_norm: 0.8807014584686516, iteration: 297570
loss: 0.9990301728248596,grad_norm: 0.8099676915798593, iteration: 297571
loss: 1.029548168182373,grad_norm: 0.9125201514254746, iteration: 297572
loss: 0.9975310564041138,grad_norm: 0.793502492782897, iteration: 297573
loss: 1.0128480195999146,grad_norm: 0.9999990692152152, iteration: 297574
loss: 1.025048017501831,grad_norm: 0.738707194881357, iteration: 297575
loss: 0.9965158700942993,grad_norm: 0.8743505536023094, iteration: 297576
loss: 1.025807499885559,grad_norm: 0.7260518204214684, iteration: 297577
loss: 1.0388551950454712,grad_norm: 0.9999992492201463, iteration: 297578
loss: 1.0008894205093384,grad_norm: 0.7982270601131246, iteration: 297579
loss: 0.9923538565635681,grad_norm: 0.8524587878089039, iteration: 297580
loss: 0.981945812702179,grad_norm: 0.9999989112075998, iteration: 297581
loss: 0.984596312046051,grad_norm: 0.9260702536946909, iteration: 297582
loss: 0.9696761965751648,grad_norm: 0.9876835441388978, iteration: 297583
loss: 0.9665293097496033,grad_norm: 0.9999989550658892, iteration: 297584
loss: 1.0179184675216675,grad_norm: 0.9431974995816089, iteration: 297585
loss: 0.9990277290344238,grad_norm: 0.7847101469462304, iteration: 297586
loss: 1.0110260248184204,grad_norm: 0.9999992756046512, iteration: 297587
loss: 0.9791997671127319,grad_norm: 0.8286490563040957, iteration: 297588
loss: 1.016262173652649,grad_norm: 0.723090192334618, iteration: 297589
loss: 1.0099290609359741,grad_norm: 0.8003709311678929, iteration: 297590
loss: 1.0202890634536743,grad_norm: 0.9512463930268701, iteration: 297591
loss: 1.0037389993667603,grad_norm: 0.9450555031020073, iteration: 297592
loss: 0.987248957157135,grad_norm: 0.8967007837405863, iteration: 297593
loss: 1.0512007474899292,grad_norm: 0.7029909788043084, iteration: 297594
loss: 1.0187513828277588,grad_norm: 0.9820017491547998, iteration: 297595
loss: 1.0569018125534058,grad_norm: 0.9999990308753436, iteration: 297596
loss: 0.9995678067207336,grad_norm: 0.8954917011802196, iteration: 297597
loss: 1.04335618019104,grad_norm: 0.8683651330443799, iteration: 297598
loss: 1.0282387733459473,grad_norm: 0.8124861865591022, iteration: 297599
loss: 1.0231728553771973,grad_norm: 0.9784901072619725, iteration: 297600
loss: 1.0503473281860352,grad_norm: 0.96173893357648, iteration: 297601
loss: 0.9875684976577759,grad_norm: 0.8099991947384305, iteration: 297602
loss: 1.0038235187530518,grad_norm: 0.9147983497872643, iteration: 297603
loss: 1.0135987997055054,grad_norm: 0.7263564606670364, iteration: 297604
loss: 0.9686249494552612,grad_norm: 0.8485041523271325, iteration: 297605
loss: 0.9842588901519775,grad_norm: 0.9999990858467261, iteration: 297606
loss: 0.9675204157829285,grad_norm: 0.9723379716880293, iteration: 297607
loss: 1.018041968345642,grad_norm: 0.8990010397228821, iteration: 297608
loss: 1.033388614654541,grad_norm: 0.9999993355654574, iteration: 297609
loss: 1.0077253580093384,grad_norm: 0.8190551426474169, iteration: 297610
loss: 0.9686222672462463,grad_norm: 0.7555472495624471, iteration: 297611
loss: 0.9934825301170349,grad_norm: 0.6898121684613764, iteration: 297612
loss: 1.0324983596801758,grad_norm: 0.9268223907571598, iteration: 297613
loss: 1.01643705368042,grad_norm: 0.891533437897385, iteration: 297614
loss: 0.984527587890625,grad_norm: 0.8559145753644013, iteration: 297615
loss: 1.0566930770874023,grad_norm: 0.8661209166719183, iteration: 297616
loss: 0.9742343425750732,grad_norm: 0.8646254522026818, iteration: 297617
loss: 1.002195119857788,grad_norm: 0.954650256866464, iteration: 297618
loss: 1.0163828134536743,grad_norm: 0.8288745435548288, iteration: 297619
loss: 1.0081268548965454,grad_norm: 0.8652437573325689, iteration: 297620
loss: 1.0157393217086792,grad_norm: 0.846805913536146, iteration: 297621
loss: 1.0345828533172607,grad_norm: 0.9017660601211713, iteration: 297622
loss: 1.0132310390472412,grad_norm: 0.8930751752308365, iteration: 297623
loss: 0.9894853830337524,grad_norm: 0.9225212546565473, iteration: 297624
loss: 0.9864150881767273,grad_norm: 0.8159258476853778, iteration: 297625
loss: 1.0129362344741821,grad_norm: 0.999999169102061, iteration: 297626
loss: 0.984230101108551,grad_norm: 0.8480774543978056, iteration: 297627
loss: 1.0324115753173828,grad_norm: 0.926486084950531, iteration: 297628
loss: 0.9786850214004517,grad_norm: 0.7378927473162821, iteration: 297629
loss: 0.9603241086006165,grad_norm: 0.7568454355555193, iteration: 297630
loss: 0.9996252059936523,grad_norm: 0.8147284998425837, iteration: 297631
loss: 0.9923326373100281,grad_norm: 0.7672721873387411, iteration: 297632
loss: 0.9865553975105286,grad_norm: 0.9793587317327966, iteration: 297633
loss: 0.9888367652893066,grad_norm: 0.875697958562758, iteration: 297634
loss: 1.0211114883422852,grad_norm: 0.9999990092077993, iteration: 297635
loss: 0.9873992204666138,grad_norm: 0.7731116700246271, iteration: 297636
loss: 0.9713304042816162,grad_norm: 0.7189656358779919, iteration: 297637
loss: 1.0014338493347168,grad_norm: 0.9886085903626793, iteration: 297638
loss: 1.0356543064117432,grad_norm: 0.8940731829156696, iteration: 297639
loss: 0.9943091869354248,grad_norm: 0.8835762345335626, iteration: 297640
loss: 1.0195046663284302,grad_norm: 0.8386034312603571, iteration: 297641
loss: 1.0062146186828613,grad_norm: 0.9999991391004898, iteration: 297642
loss: 0.9731536507606506,grad_norm: 0.7397926656468958, iteration: 297643
loss: 1.0124069452285767,grad_norm: 0.9931945286362465, iteration: 297644
loss: 1.0049985647201538,grad_norm: 0.9999990584590672, iteration: 297645
loss: 1.0170722007751465,grad_norm: 0.9155769931751233, iteration: 297646
loss: 0.9919210076332092,grad_norm: 0.8472985757385285, iteration: 297647
loss: 0.9895423650741577,grad_norm: 0.7047371275140675, iteration: 297648
loss: 1.010759949684143,grad_norm: 0.7222639968638955, iteration: 297649
loss: 1.012009859085083,grad_norm: 0.8239918458185728, iteration: 297650
loss: 1.0105973482131958,grad_norm: 0.9081075467074522, iteration: 297651
loss: 1.0503536462783813,grad_norm: 0.8281942678495573, iteration: 297652
loss: 1.0026636123657227,grad_norm: 0.8401287002738371, iteration: 297653
loss: 1.0091153383255005,grad_norm: 0.9999991516747428, iteration: 297654
loss: 0.9782900810241699,grad_norm: 0.8172753795225375, iteration: 297655
loss: 0.9888575077056885,grad_norm: 0.8241856121508844, iteration: 297656
loss: 1.165062665939331,grad_norm: 0.9365102165131552, iteration: 297657
loss: 0.9902728199958801,grad_norm: 0.7848733250060335, iteration: 297658
loss: 1.0004202127456665,grad_norm: 0.9738860510710233, iteration: 297659
loss: 1.003827452659607,grad_norm: 0.910251380026534, iteration: 297660
loss: 1.015148639678955,grad_norm: 0.8488338032852188, iteration: 297661
loss: 0.9757634997367859,grad_norm: 0.8510162391009699, iteration: 297662
loss: 1.0223702192306519,grad_norm: 0.7902691591513807, iteration: 297663
loss: 1.021734595298767,grad_norm: 0.8987717048722188, iteration: 297664
loss: 0.9669296145439148,grad_norm: 0.912017810362491, iteration: 297665
loss: 1.0114449262619019,grad_norm: 0.8498921403484117, iteration: 297666
loss: 1.012174367904663,grad_norm: 0.9230203838970094, iteration: 297667
loss: 1.0157510042190552,grad_norm: 0.9999990665681054, iteration: 297668
loss: 0.9582598805427551,grad_norm: 0.8852347973109226, iteration: 297669
loss: 0.9697757363319397,grad_norm: 0.7720086427024931, iteration: 297670
loss: 0.9965653419494629,grad_norm: 0.8274457536931896, iteration: 297671
loss: 1.018774151802063,grad_norm: 0.7848830634645136, iteration: 297672
loss: 0.9928102493286133,grad_norm: 0.8769464053568887, iteration: 297673
loss: 0.9849560260772705,grad_norm: 0.7875574595135584, iteration: 297674
loss: 0.9719887971878052,grad_norm: 0.9999990319440124, iteration: 297675
loss: 1.0090957880020142,grad_norm: 0.8218853484263658, iteration: 297676
loss: 1.0278600454330444,grad_norm: 0.8107661720998907, iteration: 297677
loss: 1.0126153230667114,grad_norm: 0.8714889974248913, iteration: 297678
loss: 1.0330214500427246,grad_norm: 0.7791506021433243, iteration: 297679
loss: 1.0098227262496948,grad_norm: 0.7456511217480585, iteration: 297680
loss: 1.0068328380584717,grad_norm: 0.7405223933167459, iteration: 297681
loss: 0.9871349334716797,grad_norm: 0.8209528318156846, iteration: 297682
loss: 0.9530901312828064,grad_norm: 0.7877800296075709, iteration: 297683
loss: 0.9985848665237427,grad_norm: 0.9999991295782158, iteration: 297684
loss: 1.0250422954559326,grad_norm: 0.8606447962367463, iteration: 297685
loss: 1.002756118774414,grad_norm: 0.7787006031891344, iteration: 297686
loss: 1.022108554840088,grad_norm: 0.8313610263269987, iteration: 297687
loss: 1.022248387336731,grad_norm: 0.9223809176550483, iteration: 297688
loss: 1.0296844244003296,grad_norm: 0.9999990549601738, iteration: 297689
loss: 0.9867013096809387,grad_norm: 0.8857817044668171, iteration: 297690
loss: 0.9711664319038391,grad_norm: 0.86399422687038, iteration: 297691
loss: 0.9897419214248657,grad_norm: 0.9680015467303116, iteration: 297692
loss: 1.0095373392105103,grad_norm: 0.9622165799588385, iteration: 297693
loss: 1.0032345056533813,grad_norm: 0.8717456570031777, iteration: 297694
loss: 0.9918683767318726,grad_norm: 0.9999990100642201, iteration: 297695
loss: 0.9982147216796875,grad_norm: 0.7810822896406826, iteration: 297696
loss: 1.0201892852783203,grad_norm: 0.8798924924733221, iteration: 297697
loss: 1.0095372200012207,grad_norm: 0.8283688188634561, iteration: 297698
loss: 1.0112214088439941,grad_norm: 0.9464596095305278, iteration: 297699
loss: 1.0003879070281982,grad_norm: 0.8442979435317642, iteration: 297700
loss: 1.0279902219772339,grad_norm: 0.8864324726487943, iteration: 297701
loss: 0.9951686859130859,grad_norm: 0.7712896449201694, iteration: 297702
loss: 0.9922897219657898,grad_norm: 0.7904155064285188, iteration: 297703
loss: 1.0039162635803223,grad_norm: 0.7342895650033472, iteration: 297704
loss: 0.9884869456291199,grad_norm: 0.7846942659015054, iteration: 297705
loss: 1.017099142074585,grad_norm: 0.7249144778027674, iteration: 297706
loss: 1.0214561223983765,grad_norm: 0.9999995670970786, iteration: 297707
loss: 0.9937071204185486,grad_norm: 0.9352473921350734, iteration: 297708
loss: 1.0538839101791382,grad_norm: 0.9495288110784056, iteration: 297709
loss: 1.0010839700698853,grad_norm: 0.8503415106088409, iteration: 297710
loss: 1.0047736167907715,grad_norm: 0.759843499041498, iteration: 297711
loss: 0.9921428561210632,grad_norm: 0.8331850245518669, iteration: 297712
loss: 0.9942959547042847,grad_norm: 0.6958630155514492, iteration: 297713
loss: 0.9797812104225159,grad_norm: 0.8773380727530489, iteration: 297714
loss: 0.9762927293777466,grad_norm: 0.967046348400252, iteration: 297715
loss: 0.9963659644126892,grad_norm: 0.8285352558325365, iteration: 297716
loss: 1.0365660190582275,grad_norm: 0.9999990002353316, iteration: 297717
loss: 0.9824215769767761,grad_norm: 0.9874391168201347, iteration: 297718
loss: 1.0100899934768677,grad_norm: 0.7528769119847799, iteration: 297719
loss: 1.0073463916778564,grad_norm: 0.9999996921375389, iteration: 297720
loss: 1.0002009868621826,grad_norm: 0.7567601152524108, iteration: 297721
loss: 0.9708912968635559,grad_norm: 0.8746247388724234, iteration: 297722
loss: 1.0088963508605957,grad_norm: 0.8143133859596219, iteration: 297723
loss: 0.9690768122673035,grad_norm: 0.99999907072836, iteration: 297724
loss: 0.9720302820205688,grad_norm: 0.8592359021401031, iteration: 297725
loss: 1.0380882024765015,grad_norm: 0.8188769785888974, iteration: 297726
loss: 0.9523184895515442,grad_norm: 0.9463150207541413, iteration: 297727
loss: 1.0033936500549316,grad_norm: 0.8552653812489021, iteration: 297728
loss: 0.9831778407096863,grad_norm: 0.8612985161488829, iteration: 297729
loss: 0.9816591739654541,grad_norm: 0.8817338578634086, iteration: 297730
loss: 0.9711555242538452,grad_norm: 0.8971337266988867, iteration: 297731
loss: 1.005545735359192,grad_norm: 0.9961394575171884, iteration: 297732
loss: 1.0550639629364014,grad_norm: 0.7472650823895877, iteration: 297733
loss: 1.0072401762008667,grad_norm: 0.6797825269571364, iteration: 297734
loss: 1.0126851797103882,grad_norm: 0.9999990401323384, iteration: 297735
loss: 0.979800820350647,grad_norm: 0.766418171801508, iteration: 297736
loss: 1.0063706636428833,grad_norm: 0.732270501568431, iteration: 297737
loss: 1.0330283641815186,grad_norm: 0.938163100019955, iteration: 297738
loss: 0.9864479899406433,grad_norm: 0.8138859276293314, iteration: 297739
loss: 0.9906874895095825,grad_norm: 0.9362223037180727, iteration: 297740
loss: 1.0071871280670166,grad_norm: 0.9999990517745574, iteration: 297741
loss: 0.9871289730072021,grad_norm: 0.7680789137316969, iteration: 297742
loss: 0.9586525559425354,grad_norm: 0.742034923453005, iteration: 297743
loss: 0.9994062185287476,grad_norm: 0.9560227384469147, iteration: 297744
loss: 1.0139172077178955,grad_norm: 0.926428290926714, iteration: 297745
loss: 1.0100533962249756,grad_norm: 0.9773906294788192, iteration: 297746
loss: 1.005082130432129,grad_norm: 0.9438796114092722, iteration: 297747
loss: 1.022623896598816,grad_norm: 0.9046574848633164, iteration: 297748
loss: 1.0037111043930054,grad_norm: 0.8468253461216197, iteration: 297749
loss: 0.9572966694831848,grad_norm: 0.8503763355184287, iteration: 297750
loss: 1.0169490575790405,grad_norm: 0.8729938293041721, iteration: 297751
loss: 1.0469813346862793,grad_norm: 0.9999997660833698, iteration: 297752
loss: 0.9909726977348328,grad_norm: 0.8531899878031146, iteration: 297753
loss: 1.0128257274627686,grad_norm: 0.812473736554033, iteration: 297754
loss: 0.9973633289337158,grad_norm: 0.999999501227958, iteration: 297755
loss: 0.9807471632957458,grad_norm: 0.808962483560784, iteration: 297756
loss: 1.0011752843856812,grad_norm: 0.9091910604517455, iteration: 297757
loss: 1.034286618232727,grad_norm: 0.8752205198140763, iteration: 297758
loss: 0.9950623512268066,grad_norm: 0.9317624215322506, iteration: 297759
loss: 1.075609803199768,grad_norm: 0.9999990726917704, iteration: 297760
loss: 0.9957970976829529,grad_norm: 0.9175062888640986, iteration: 297761
loss: 1.0018715858459473,grad_norm: 0.9406119587317481, iteration: 297762
loss: 1.031817078590393,grad_norm: 0.9084584870783181, iteration: 297763
loss: 0.9938169717788696,grad_norm: 0.7931250703039358, iteration: 297764
loss: 0.9671362042427063,grad_norm: 0.9194433973098711, iteration: 297765
loss: 0.9764654636383057,grad_norm: 0.8352628739734709, iteration: 297766
loss: 1.0122286081314087,grad_norm: 0.8146580879552159, iteration: 297767
loss: 0.9600023031234741,grad_norm: 0.9999991516999021, iteration: 297768
loss: 0.9732141494750977,grad_norm: 0.9526251435025392, iteration: 297769
loss: 1.0037238597869873,grad_norm: 0.8575697377775683, iteration: 297770
loss: 0.9937509894371033,grad_norm: 0.9539941489665663, iteration: 297771
loss: 0.9666788578033447,grad_norm: 0.847238093696807, iteration: 297772
loss: 0.9751700162887573,grad_norm: 0.9999991107138569, iteration: 297773
loss: 1.0178349018096924,grad_norm: 0.8847910753053984, iteration: 297774
loss: 1.0017179250717163,grad_norm: 0.9999991949078525, iteration: 297775
loss: 0.9652191996574402,grad_norm: 0.8128177532030071, iteration: 297776
loss: 1.030198335647583,grad_norm: 0.9152926741002373, iteration: 297777
loss: 0.994704008102417,grad_norm: 0.8671807218183037, iteration: 297778
loss: 1.0300997495651245,grad_norm: 0.9677126614208755, iteration: 297779
loss: 0.9570478200912476,grad_norm: 0.8033479588166982, iteration: 297780
loss: 0.9766173362731934,grad_norm: 0.8431521308937382, iteration: 297781
loss: 1.0025511980056763,grad_norm: 0.98802522014933, iteration: 297782
loss: 0.9841699600219727,grad_norm: 0.9142822987896768, iteration: 297783
loss: 0.9886282682418823,grad_norm: 0.7874419998643724, iteration: 297784
loss: 1.0388263463974,grad_norm: 0.783120223005726, iteration: 297785
loss: 0.993181049823761,grad_norm: 0.8669816373293868, iteration: 297786
loss: 0.9928292036056519,grad_norm: 0.816091421170854, iteration: 297787
loss: 0.9964714646339417,grad_norm: 0.8554430458233709, iteration: 297788
loss: 1.024255394935608,grad_norm: 0.9199522927006017, iteration: 297789
loss: 1.0043818950653076,grad_norm: 0.9251062290167917, iteration: 297790
loss: 1.0060025453567505,grad_norm: 0.8145921125011468, iteration: 297791
loss: 0.988853931427002,grad_norm: 0.8247373169403484, iteration: 297792
loss: 0.9986158609390259,grad_norm: 0.825495805426425, iteration: 297793
loss: 0.9889383316040039,grad_norm: 0.8404798387957327, iteration: 297794
loss: 0.9664238691329956,grad_norm: 0.794769089909765, iteration: 297795
loss: 1.0143804550170898,grad_norm: 0.9014860513554442, iteration: 297796
loss: 1.0276072025299072,grad_norm: 0.8317718394698455, iteration: 297797
loss: 1.033616542816162,grad_norm: 0.8345388661244929, iteration: 297798
loss: 1.0172584056854248,grad_norm: 0.8805917292103027, iteration: 297799
loss: 0.9647819399833679,grad_norm: 0.9517314389333582, iteration: 297800
loss: 0.9891582727432251,grad_norm: 0.774369521990799, iteration: 297801
loss: 0.9781239628791809,grad_norm: 0.9999991657941233, iteration: 297802
loss: 1.0315603017807007,grad_norm: 0.9660311418632115, iteration: 297803
loss: 1.003800392150879,grad_norm: 0.9314347212946757, iteration: 297804
loss: 1.00685453414917,grad_norm: 0.7310971558946888, iteration: 297805
loss: 1.0165172815322876,grad_norm: 0.9624136191143315, iteration: 297806
loss: 1.0192152261734009,grad_norm: 0.9431903054274535, iteration: 297807
loss: 1.0197713375091553,grad_norm: 0.8882381986400862, iteration: 297808
loss: 0.9760894775390625,grad_norm: 0.9999991455848681, iteration: 297809
loss: 0.9512640237808228,grad_norm: 0.8602068212914774, iteration: 297810
loss: 0.9967384934425354,grad_norm: 0.8420514976374663, iteration: 297811
loss: 0.998465895652771,grad_norm: 0.7597303697933078, iteration: 297812
loss: 1.0060697793960571,grad_norm: 0.9746593277596758, iteration: 297813
loss: 0.991731584072113,grad_norm: 0.9530333268794908, iteration: 297814
loss: 1.0207196474075317,grad_norm: 0.9999991632837484, iteration: 297815
loss: 1.029137372970581,grad_norm: 0.8814063418259788, iteration: 297816
loss: 1.0201294422149658,grad_norm: 0.9116984585764845, iteration: 297817
loss: 1.004108190536499,grad_norm: 0.7018498915430986, iteration: 297818
loss: 1.0201908349990845,grad_norm: 0.9999992141239538, iteration: 297819
loss: 0.9774962663650513,grad_norm: 0.809129874179935, iteration: 297820
loss: 1.0027743577957153,grad_norm: 0.8648427435937782, iteration: 297821
loss: 1.0076886415481567,grad_norm: 0.8104156382357935, iteration: 297822
loss: 0.9817154407501221,grad_norm: 0.800053641398472, iteration: 297823
loss: 0.9878203868865967,grad_norm: 0.8491923932985521, iteration: 297824
loss: 1.0274677276611328,grad_norm: 0.7906798641984931, iteration: 297825
loss: 0.9927268624305725,grad_norm: 0.7045815126062025, iteration: 297826
loss: 0.9954472780227661,grad_norm: 0.9231920083494299, iteration: 297827
loss: 0.9902653694152832,grad_norm: 0.7944164607696866, iteration: 297828
loss: 0.9964605569839478,grad_norm: 0.8181923424102769, iteration: 297829
loss: 0.9866476058959961,grad_norm: 0.7244552979704645, iteration: 297830
loss: 0.9845032095909119,grad_norm: 0.8989774955084899, iteration: 297831
loss: 1.038803219795227,grad_norm: 0.7531752608994218, iteration: 297832
loss: 0.9866041541099548,grad_norm: 0.8200624143741577, iteration: 297833
loss: 0.9654373526573181,grad_norm: 0.9277053268027331, iteration: 297834
loss: 1.0269232988357544,grad_norm: 0.8978354171576293, iteration: 297835
loss: 0.9488382339477539,grad_norm: 0.8920155120467823, iteration: 297836
loss: 0.9650354385375977,grad_norm: 0.9999991275747724, iteration: 297837
loss: 1.0165282487869263,grad_norm: 0.917167327038061, iteration: 297838
loss: 0.9952786564826965,grad_norm: 0.986475535882947, iteration: 297839
loss: 0.9715892672538757,grad_norm: 0.8383206954181108, iteration: 297840
loss: 0.934336245059967,grad_norm: 0.8434600275044788, iteration: 297841
loss: 1.0035951137542725,grad_norm: 0.8192884775307704, iteration: 297842
loss: 0.9555720090866089,grad_norm: 0.84749250039069, iteration: 297843
loss: 1.0371419191360474,grad_norm: 0.9070089214537994, iteration: 297844
loss: 1.0118244886398315,grad_norm: 0.895092154566255, iteration: 297845
loss: 1.020650029182434,grad_norm: 0.8503141544631817, iteration: 297846
loss: 1.0027729272842407,grad_norm: 0.9804627815170461, iteration: 297847
loss: 1.0051071643829346,grad_norm: 0.7666701063516401, iteration: 297848
loss: 1.0050876140594482,grad_norm: 0.9497708838475821, iteration: 297849
loss: 1.0048993825912476,grad_norm: 0.8072831331831719, iteration: 297850
loss: 1.0515486001968384,grad_norm: 0.787669525510401, iteration: 297851
loss: 0.9828439950942993,grad_norm: 0.7024238541631805, iteration: 297852
loss: 1.0211982727050781,grad_norm: 0.8182018354321682, iteration: 297853
loss: 1.021951675415039,grad_norm: 0.8471351077182122, iteration: 297854
loss: 1.0059268474578857,grad_norm: 0.8047323785070123, iteration: 297855
loss: 0.9926230311393738,grad_norm: 0.8155056085550023, iteration: 297856
loss: 1.025124430656433,grad_norm: 0.8347732973150992, iteration: 297857
loss: 1.0038115978240967,grad_norm: 0.6816813205369117, iteration: 297858
loss: 0.9854418039321899,grad_norm: 0.8584612209282513, iteration: 297859
loss: 0.9977536797523499,grad_norm: 0.7635828296420627, iteration: 297860
loss: 0.9938995838165283,grad_norm: 0.9189844298720493, iteration: 297861
loss: 1.000627040863037,grad_norm: 0.9999990726495152, iteration: 297862
loss: 0.9885414242744446,grad_norm: 0.8638172564486699, iteration: 297863
loss: 0.9697269201278687,grad_norm: 0.9432753306439543, iteration: 297864
loss: 0.9956104159355164,grad_norm: 0.9999990239469372, iteration: 297865
loss: 0.9939666986465454,grad_norm: 0.8109613006339823, iteration: 297866
loss: 1.0371754169464111,grad_norm: 0.9587723283612587, iteration: 297867
loss: 1.0206615924835205,grad_norm: 0.8976890021009306, iteration: 297868
loss: 0.955682635307312,grad_norm: 0.8995061026198251, iteration: 297869
loss: 0.967350959777832,grad_norm: 0.9999991934729591, iteration: 297870
loss: 0.9992533326148987,grad_norm: 0.8810243611998403, iteration: 297871
loss: 0.9498833417892456,grad_norm: 0.7408869472001097, iteration: 297872
loss: 1.0079095363616943,grad_norm: 0.9889611466478038, iteration: 297873
loss: 0.9990835189819336,grad_norm: 0.7682525695926458, iteration: 297874
loss: 0.9988150000572205,grad_norm: 0.9909648542641132, iteration: 297875
loss: 1.0132557153701782,grad_norm: 0.9999990547211315, iteration: 297876
loss: 0.9920015335083008,grad_norm: 0.8300756021751894, iteration: 297877
loss: 0.969168484210968,grad_norm: 0.8697053496683737, iteration: 297878
loss: 1.0087507963180542,grad_norm: 0.9999995850628187, iteration: 297879
loss: 0.9940108060836792,grad_norm: 0.6951940111715926, iteration: 297880
loss: 1.0071628093719482,grad_norm: 0.8548341253583887, iteration: 297881
loss: 1.0236786603927612,grad_norm: 0.9372562274164883, iteration: 297882
loss: 0.9664615392684937,grad_norm: 0.8788037250020886, iteration: 297883
loss: 0.9770152568817139,grad_norm: 0.8356119387341074, iteration: 297884
loss: 0.9770116806030273,grad_norm: 0.8646915795226413, iteration: 297885
loss: 1.075714111328125,grad_norm: 0.9349774520672901, iteration: 297886
loss: 0.9641760587692261,grad_norm: 0.9999991030795322, iteration: 297887
loss: 0.9911637902259827,grad_norm: 0.8280916538029179, iteration: 297888
loss: 0.9544129967689514,grad_norm: 0.9356283958887445, iteration: 297889
loss: 1.0084584951400757,grad_norm: 0.870867650438228, iteration: 297890
loss: 1.0168622732162476,grad_norm: 0.7906074076185796, iteration: 297891
loss: 1.022156834602356,grad_norm: 0.9557639321030247, iteration: 297892
loss: 1.0496652126312256,grad_norm: 0.93441451936493, iteration: 297893
loss: 0.9851375818252563,grad_norm: 0.722063827172592, iteration: 297894
loss: 0.9882904887199402,grad_norm: 0.9999991622051252, iteration: 297895
loss: 0.9673176407814026,grad_norm: 0.8762850569013505, iteration: 297896
loss: 0.9739217162132263,grad_norm: 0.8339874218645247, iteration: 297897
loss: 1.0039308071136475,grad_norm: 0.8501992331669989, iteration: 297898
loss: 0.9836904406547546,grad_norm: 0.9999991134973144, iteration: 297899
loss: 1.0042724609375,grad_norm: 0.8060027958639999, iteration: 297900
loss: 1.0082964897155762,grad_norm: 0.850775565159386, iteration: 297901
loss: 0.9488344788551331,grad_norm: 0.8246236826339449, iteration: 297902
loss: 0.9967408776283264,grad_norm: 0.9376307201814219, iteration: 297903
loss: 0.9989227652549744,grad_norm: 0.9999994818430141, iteration: 297904
loss: 1.0174816846847534,grad_norm: 0.8891914884410799, iteration: 297905
loss: 0.990902841091156,grad_norm: 0.7741356292200023, iteration: 297906
loss: 0.9686380624771118,grad_norm: 0.9999990758402197, iteration: 297907
loss: 0.9643272161483765,grad_norm: 0.9999990546575722, iteration: 297908
loss: 0.974218487739563,grad_norm: 0.7903483842814412, iteration: 297909
loss: 1.0435402393341064,grad_norm: 0.7790650789863766, iteration: 297910
loss: 0.995339572429657,grad_norm: 0.8389604332361028, iteration: 297911
loss: 0.9638655781745911,grad_norm: 0.8489717901372168, iteration: 297912
loss: 0.9865649342536926,grad_norm: 0.7931850160522139, iteration: 297913
loss: 1.0272233486175537,grad_norm: 0.9999990427148703, iteration: 297914
loss: 1.0196423530578613,grad_norm: 0.9209134147605259, iteration: 297915
loss: 0.9774365425109863,grad_norm: 0.7313595011464172, iteration: 297916
loss: 0.9949172139167786,grad_norm: 0.8034250756373235, iteration: 297917
loss: 1.0105912685394287,grad_norm: 0.9999990789098661, iteration: 297918
loss: 1.0250165462493896,grad_norm: 0.999999190161591, iteration: 297919
loss: 0.9903369545936584,grad_norm: 0.7682826128388127, iteration: 297920
loss: 1.0143309831619263,grad_norm: 0.8789605705389942, iteration: 297921
loss: 1.0291599035263062,grad_norm: 0.8430861324083734, iteration: 297922
loss: 1.0027769804000854,grad_norm: 0.9745825181864403, iteration: 297923
loss: 1.0305652618408203,grad_norm: 0.9999994173380948, iteration: 297924
loss: 1.0113741159439087,grad_norm: 0.9130750069578892, iteration: 297925
loss: 0.984413743019104,grad_norm: 0.8768933816440841, iteration: 297926
loss: 0.9371429085731506,grad_norm: 0.8620816963069391, iteration: 297927
loss: 0.9828339219093323,grad_norm: 0.8836430042300046, iteration: 297928
loss: 1.0011242628097534,grad_norm: 0.9577096704309619, iteration: 297929
loss: 0.9606694579124451,grad_norm: 0.9999993994757784, iteration: 297930
loss: 0.9852688908576965,grad_norm: 0.9999992319017335, iteration: 297931
loss: 0.9820936322212219,grad_norm: 0.8733239354970112, iteration: 297932
loss: 1.0013673305511475,grad_norm: 0.9275567900313197, iteration: 297933
loss: 1.0020599365234375,grad_norm: 0.8963935989650659, iteration: 297934
loss: 0.9776279330253601,grad_norm: 0.9022024448264603, iteration: 297935
loss: 1.0059235095977783,grad_norm: 0.9999993208533337, iteration: 297936
loss: 0.9702209234237671,grad_norm: 0.9439612760938907, iteration: 297937
loss: 0.9928527474403381,grad_norm: 0.7381186412366139, iteration: 297938
loss: 1.032086968421936,grad_norm: 0.8267948526266343, iteration: 297939
loss: 1.026002049446106,grad_norm: 0.835267669385626, iteration: 297940
loss: 1.0072569847106934,grad_norm: 0.9999991158797608, iteration: 297941
loss: 0.9574735760688782,grad_norm: 0.9999989412526316, iteration: 297942
loss: 1.0122405290603638,grad_norm: 0.7577019241979615, iteration: 297943
loss: 0.9947803020477295,grad_norm: 0.8516290463719562, iteration: 297944
loss: 0.9927341938018799,grad_norm: 0.870628959849415, iteration: 297945
loss: 0.9782665967941284,grad_norm: 0.6826561497850517, iteration: 297946
loss: 1.0390084981918335,grad_norm: 0.9999995273607633, iteration: 297947
loss: 1.0053168535232544,grad_norm: 0.9434235047506181, iteration: 297948
loss: 1.0181516408920288,grad_norm: 0.9369400417821314, iteration: 297949
loss: 1.0236258506774902,grad_norm: 0.873655767071177, iteration: 297950
loss: 0.9746667146682739,grad_norm: 0.8762837867236477, iteration: 297951
loss: 0.9790971875190735,grad_norm: 0.8020531747796915, iteration: 297952
loss: 1.0277687311172485,grad_norm: 0.9999991390597276, iteration: 297953
loss: 1.0143768787384033,grad_norm: 0.9909484427168821, iteration: 297954
loss: 1.0259345769882202,grad_norm: 0.8158204411220114, iteration: 297955
loss: 1.000101089477539,grad_norm: 0.8663164687122326, iteration: 297956
loss: 0.9875734448432922,grad_norm: 0.8662977679894964, iteration: 297957
loss: 0.9975953698158264,grad_norm: 0.8497146365634434, iteration: 297958
loss: 1.0395498275756836,grad_norm: 0.7978367603479681, iteration: 297959
loss: 1.002133846282959,grad_norm: 0.7874561320826421, iteration: 297960
loss: 1.0195255279541016,grad_norm: 0.98632364997011, iteration: 297961
loss: 1.0167263746261597,grad_norm: 0.9519124209908519, iteration: 297962
loss: 1.0221891403198242,grad_norm: 0.9338966563653259, iteration: 297963
loss: 1.0123176574707031,grad_norm: 0.9999991825074318, iteration: 297964
loss: 0.9883118271827698,grad_norm: 0.7620670191107144, iteration: 297965
loss: 1.0305278301239014,grad_norm: 0.9493959116550158, iteration: 297966
loss: 1.0240813493728638,grad_norm: 0.798077681152721, iteration: 297967
loss: 1.0272423028945923,grad_norm: 0.876399198723904, iteration: 297968
loss: 0.9975964426994324,grad_norm: 0.762254016544146, iteration: 297969
loss: 0.991535484790802,grad_norm: 0.8986179571400109, iteration: 297970
loss: 1.0101189613342285,grad_norm: 0.9999990245008448, iteration: 297971
loss: 0.9705052971839905,grad_norm: 0.9619698972640183, iteration: 297972
loss: 1.040926218032837,grad_norm: 0.7573065489207097, iteration: 297973
loss: 1.017520546913147,grad_norm: 0.68892831744965, iteration: 297974
loss: 0.9930716156959534,grad_norm: 0.8825054717082224, iteration: 297975
loss: 0.973705530166626,grad_norm: 0.9485507049518282, iteration: 297976
loss: 0.9752184748649597,grad_norm: 0.7872407433775884, iteration: 297977
loss: 0.9973372220993042,grad_norm: 0.9411725840729386, iteration: 297978
loss: 1.0049850940704346,grad_norm: 0.866710515126346, iteration: 297979
loss: 0.9709033966064453,grad_norm: 0.8197896328957284, iteration: 297980
loss: 1.0551862716674805,grad_norm: 0.886883364948061, iteration: 297981
loss: 1.0313315391540527,grad_norm: 0.9525501229806848, iteration: 297982
loss: 1.0172394514083862,grad_norm: 0.8346937995894396, iteration: 297983
loss: 1.0049231052398682,grad_norm: 0.9999992027118255, iteration: 297984
loss: 0.9991888403892517,grad_norm: 0.74178591063138, iteration: 297985
loss: 1.0158023834228516,grad_norm: 0.9999990184877605, iteration: 297986
loss: 0.9888556599617004,grad_norm: 0.9641670088225961, iteration: 297987
loss: 1.0092968940734863,grad_norm: 0.9743266007229777, iteration: 297988
loss: 1.0093684196472168,grad_norm: 0.7690762750356964, iteration: 297989
loss: 1.0399595499038696,grad_norm: 0.8525991969294907, iteration: 297990
loss: 1.0003947019577026,grad_norm: 0.8595847117279675, iteration: 297991
loss: 1.0033278465270996,grad_norm: 0.6942006728947856, iteration: 297992
loss: 0.9826785922050476,grad_norm: 0.7766235849558781, iteration: 297993
loss: 0.9941965937614441,grad_norm: 0.9999990220066113, iteration: 297994
loss: 1.000024676322937,grad_norm: 0.7777116085358188, iteration: 297995
loss: 0.9918718934059143,grad_norm: 0.8746333085779002, iteration: 297996
loss: 0.9984302520751953,grad_norm: 0.9999991253714319, iteration: 297997
loss: 1.017281174659729,grad_norm: 0.8167530607936918, iteration: 297998
loss: 0.9927070140838623,grad_norm: 0.8525459733255686, iteration: 297999
loss: 1.0035899877548218,grad_norm: 0.7888093049737213, iteration: 298000
loss: 1.00528883934021,grad_norm: 0.8962122377469045, iteration: 298001
loss: 0.9746922850608826,grad_norm: 0.8095031289882203, iteration: 298002
loss: 0.980314314365387,grad_norm: 0.8059501159657311, iteration: 298003
loss: 1.0032936334609985,grad_norm: 0.8233284983324897, iteration: 298004
loss: 1.0182539224624634,grad_norm: 0.8658971700360812, iteration: 298005
loss: 1.0068591833114624,grad_norm: 0.9999990925701482, iteration: 298006
loss: 0.9815409779548645,grad_norm: 0.8839363613246147, iteration: 298007
loss: 1.026124358177185,grad_norm: 0.8337667754111975, iteration: 298008
loss: 0.9923753142356873,grad_norm: 0.8051507780144297, iteration: 298009
loss: 0.990239143371582,grad_norm: 0.9752857919098044, iteration: 298010
loss: 1.067827820777893,grad_norm: 0.9548810855391794, iteration: 298011
loss: 1.0040379762649536,grad_norm: 0.8616640526266377, iteration: 298012
loss: 0.9678487181663513,grad_norm: 0.8843197811105972, iteration: 298013
loss: 1.0000314712524414,grad_norm: 0.8090553847457055, iteration: 298014
loss: 1.0084130764007568,grad_norm: 0.7428236558546562, iteration: 298015
loss: 1.0002944469451904,grad_norm: 0.930943306184479, iteration: 298016
loss: 0.962813675403595,grad_norm: 0.8382747719305552, iteration: 298017
loss: 1.0035005807876587,grad_norm: 0.9999990168589492, iteration: 298018
loss: 1.008016586303711,grad_norm: 0.9999989876438687, iteration: 298019
loss: 1.0294795036315918,grad_norm: 0.7337522259619911, iteration: 298020
loss: 0.9480274319648743,grad_norm: 0.9009029214635927, iteration: 298021
loss: 1.0139511823654175,grad_norm: 0.8521037250579989, iteration: 298022
loss: 1.0188287496566772,grad_norm: 0.8303041732818829, iteration: 298023
loss: 0.9884509444236755,grad_norm: 0.9602040338003421, iteration: 298024
loss: 0.9413846731185913,grad_norm: 0.8620212029443617, iteration: 298025
loss: 1.0111125707626343,grad_norm: 0.7771848684278418, iteration: 298026
loss: 1.0078717470169067,grad_norm: 0.9167739196135776, iteration: 298027
loss: 0.998747706413269,grad_norm: 0.9900557129451973, iteration: 298028
loss: 1.041360855102539,grad_norm: 0.878375662931247, iteration: 298029
loss: 0.9966675043106079,grad_norm: 0.7588238080307268, iteration: 298030
loss: 1.013511300086975,grad_norm: 0.9395254291412853, iteration: 298031
loss: 1.010289192199707,grad_norm: 0.921313905668967, iteration: 298032
loss: 1.018086314201355,grad_norm: 0.8027483426780143, iteration: 298033
loss: 0.9808472394943237,grad_norm: 0.9999990592882583, iteration: 298034
loss: 0.9918277263641357,grad_norm: 0.6917415994528652, iteration: 298035
loss: 0.9643177390098572,grad_norm: 0.7617251590982449, iteration: 298036
loss: 1.01389479637146,grad_norm: 0.999999561618967, iteration: 298037
loss: 0.9971461892127991,grad_norm: 0.8300879890069277, iteration: 298038
loss: 0.9738602042198181,grad_norm: 0.922916576100385, iteration: 298039
loss: 1.0067260265350342,grad_norm: 0.7823638863419078, iteration: 298040
loss: 1.006925344467163,grad_norm: 0.8725292656132848, iteration: 298041
loss: 1.0062882900238037,grad_norm: 0.7585741067140204, iteration: 298042
loss: 1.0054574012756348,grad_norm: 0.9050662042463479, iteration: 298043
loss: 1.0042158365249634,grad_norm: 0.8916470984776376, iteration: 298044
loss: 0.9730265140533447,grad_norm: 0.8740675317914773, iteration: 298045
loss: 1.0347588062286377,grad_norm: 0.750976372950589, iteration: 298046
loss: 0.9699458479881287,grad_norm: 0.9699298443615958, iteration: 298047
loss: 1.0142260789871216,grad_norm: 0.9350414498925254, iteration: 298048
loss: 1.031039834022522,grad_norm: 0.8010730904947273, iteration: 298049
loss: 0.9735147953033447,grad_norm: 0.8280082453089131, iteration: 298050
loss: 1.0268664360046387,grad_norm: 0.9999995966241592, iteration: 298051
loss: 1.0258556604385376,grad_norm: 0.8476025553189654, iteration: 298052
loss: 1.0401625633239746,grad_norm: 0.8036411915784157, iteration: 298053
loss: 1.0039091110229492,grad_norm: 0.8612391511395507, iteration: 298054
loss: 0.9805855751037598,grad_norm: 0.8475519264171696, iteration: 298055
loss: 0.9853065609931946,grad_norm: 0.9999991180293194, iteration: 298056
loss: 1.0052863359451294,grad_norm: 0.8273888518873996, iteration: 298057
loss: 0.9979772567749023,grad_norm: 0.9514789340706002, iteration: 298058
loss: 1.0491724014282227,grad_norm: 0.7890436798312156, iteration: 298059
loss: 1.0023846626281738,grad_norm: 0.9506990827039434, iteration: 298060
loss: 0.9718174934387207,grad_norm: 0.7867310109079624, iteration: 298061
loss: 0.9976686835289001,grad_norm: 0.8868147991776065, iteration: 298062
loss: 0.95823073387146,grad_norm: 0.9465974247178623, iteration: 298063
loss: 0.9758564233779907,grad_norm: 0.9114069990639414, iteration: 298064
loss: 1.019747018814087,grad_norm: 0.9999999045773353, iteration: 298065
loss: 1.0053168535232544,grad_norm: 0.8431656927630146, iteration: 298066
loss: 0.988226056098938,grad_norm: 0.7956332491864787, iteration: 298067
loss: 0.977419912815094,grad_norm: 0.8531032319154842, iteration: 298068
loss: 0.9765908718109131,grad_norm: 0.8294022469349173, iteration: 298069
loss: 0.9521470069885254,grad_norm: 0.8681481016778838, iteration: 298070
loss: 1.0180424451828003,grad_norm: 0.9030643678013832, iteration: 298071
loss: 1.0327848196029663,grad_norm: 0.8069124080166501, iteration: 298072
loss: 0.9768689870834351,grad_norm: 0.8784720425351349, iteration: 298073
loss: 0.9792230725288391,grad_norm: 0.9280594084535443, iteration: 298074
loss: 0.9879344701766968,grad_norm: 0.790017976129276, iteration: 298075
loss: 0.987267792224884,grad_norm: 0.9459013331314222, iteration: 298076
loss: 1.0069653987884521,grad_norm: 0.7650549586553841, iteration: 298077
loss: 0.9579840302467346,grad_norm: 0.8011689768048113, iteration: 298078
loss: 1.0023959875106812,grad_norm: 0.9271393338936943, iteration: 298079
loss: 0.9900892972946167,grad_norm: 0.9397051010081063, iteration: 298080
loss: 1.0125229358673096,grad_norm: 0.8013656201016192, iteration: 298081
loss: 1.0273066759109497,grad_norm: 0.8905172291903056, iteration: 298082
loss: 0.9961624145507812,grad_norm: 0.8421641901149276, iteration: 298083
loss: 0.9659268260002136,grad_norm: 0.9761080249531546, iteration: 298084
loss: 1.0275310277938843,grad_norm: 0.8904161833375834, iteration: 298085
loss: 0.9967883229255676,grad_norm: 0.9162058966094231, iteration: 298086
loss: 0.9865502715110779,grad_norm: 0.9741963657158512, iteration: 298087
loss: 0.9881305694580078,grad_norm: 0.8306675929561113, iteration: 298088
loss: 1.0118029117584229,grad_norm: 0.9689999322788496, iteration: 298089
loss: 0.9912250638008118,grad_norm: 0.7629628866285769, iteration: 298090
loss: 1.0229064226150513,grad_norm: 0.8724120047256054, iteration: 298091
loss: 1.0013847351074219,grad_norm: 0.9195144415633818, iteration: 298092
loss: 0.996580183506012,grad_norm: 0.8028357679739412, iteration: 298093
loss: 0.9809786081314087,grad_norm: 0.7855491485262015, iteration: 298094
loss: 0.9652957320213318,grad_norm: 0.7364268419427459, iteration: 298095
loss: 0.9910624027252197,grad_norm: 0.8434821308656569, iteration: 298096
loss: 0.9901224374771118,grad_norm: 0.9999998130989807, iteration: 298097
loss: 0.9809123277664185,grad_norm: 0.8059652987272046, iteration: 298098
loss: 1.050809621810913,grad_norm: 0.8254006278789304, iteration: 298099
loss: 1.0095239877700806,grad_norm: 0.9780686294992544, iteration: 298100
loss: 1.0057986974716187,grad_norm: 0.8470248005921414, iteration: 298101
loss: 0.9917423725128174,grad_norm: 0.761826977585759, iteration: 298102
loss: 0.9870538711547852,grad_norm: 0.7355408410163651, iteration: 298103
loss: 1.0063714981079102,grad_norm: 0.862293109001664, iteration: 298104
loss: 1.0061237812042236,grad_norm: 0.9552465824008377, iteration: 298105
loss: 0.9953386187553406,grad_norm: 0.7412024964372582, iteration: 298106
loss: 0.9889591336250305,grad_norm: 0.8484839600097986, iteration: 298107
loss: 0.9644597768783569,grad_norm: 0.8392252406589245, iteration: 298108
loss: 1.0231375694274902,grad_norm: 0.8899034358383666, iteration: 298109
loss: 0.9736498594284058,grad_norm: 0.8430533315881737, iteration: 298110
loss: 1.037845492362976,grad_norm: 0.8319084236333907, iteration: 298111
loss: 0.9836889505386353,grad_norm: 0.931978755716233, iteration: 298112
loss: 0.9854878783226013,grad_norm: 0.8207849118980884, iteration: 298113
loss: 0.9785491228103638,grad_norm: 0.8210948387380337, iteration: 298114
loss: 0.9852550029754639,grad_norm: 0.8410427913900383, iteration: 298115
loss: 0.9531815648078918,grad_norm: 0.8003236171357541, iteration: 298116
loss: 0.97825688123703,grad_norm: 0.7902930484326317, iteration: 298117
loss: 0.9973406195640564,grad_norm: 0.8804404495212047, iteration: 298118
loss: 0.9713834524154663,grad_norm: 0.8388777375129068, iteration: 298119
loss: 1.0476921796798706,grad_norm: 0.9999998121760556, iteration: 298120
loss: 1.0198718309402466,grad_norm: 0.9999991380191865, iteration: 298121
loss: 0.9819003343582153,grad_norm: 0.8890032506700232, iteration: 298122
loss: 1.0050956010818481,grad_norm: 0.8027554874911194, iteration: 298123
loss: 1.0124499797821045,grad_norm: 0.8517297528182026, iteration: 298124
loss: 1.0173428058624268,grad_norm: 0.999999088207324, iteration: 298125
loss: 0.9966317415237427,grad_norm: 0.905033126079475, iteration: 298126
loss: 1.0227091312408447,grad_norm: 0.8477458922122938, iteration: 298127
loss: 0.9955464005470276,grad_norm: 0.9236979386958408, iteration: 298128
loss: 0.9875783920288086,grad_norm: 0.9031424019189076, iteration: 298129
loss: 1.0244265794754028,grad_norm: 0.9999991179926184, iteration: 298130
loss: 0.9987260699272156,grad_norm: 0.8304577232077347, iteration: 298131
loss: 0.9833746552467346,grad_norm: 0.9999992148880529, iteration: 298132
loss: 1.0253798961639404,grad_norm: 0.9999991773861621, iteration: 298133
loss: 0.9725760817527771,grad_norm: 0.9051941932601089, iteration: 298134
loss: 0.9942888021469116,grad_norm: 0.8498479994122744, iteration: 298135
loss: 1.0332155227661133,grad_norm: 0.6883609255654419, iteration: 298136
loss: 1.008373498916626,grad_norm: 0.9372696518046707, iteration: 298137
loss: 1.003145694732666,grad_norm: 0.8129525341050244, iteration: 298138
loss: 0.9983459115028381,grad_norm: 0.9367257271914459, iteration: 298139
loss: 1.0008777379989624,grad_norm: 0.8091838446934202, iteration: 298140
loss: 0.9924551844596863,grad_norm: 0.8864880566242657, iteration: 298141
loss: 1.0048353672027588,grad_norm: 0.9599767903000594, iteration: 298142
loss: 1.0060198307037354,grad_norm: 0.9999990561050504, iteration: 298143
loss: 0.9893155694007874,grad_norm: 0.7623941988973515, iteration: 298144
loss: 1.0066231489181519,grad_norm: 0.8599706569713452, iteration: 298145
loss: 0.9807224869728088,grad_norm: 0.8265157344514189, iteration: 298146
loss: 0.9487793445587158,grad_norm: 0.9299155078147197, iteration: 298147
loss: 1.0235602855682373,grad_norm: 0.7074132058203857, iteration: 298148
loss: 0.9845077395439148,grad_norm: 0.9772994038975048, iteration: 298149
loss: 0.9864116907119751,grad_norm: 0.9999990238393607, iteration: 298150
loss: 1.0156466960906982,grad_norm: 0.7938020311722109, iteration: 298151
loss: 1.0087614059448242,grad_norm: 0.7914522567234151, iteration: 298152
loss: 0.9898861646652222,grad_norm: 0.7879653970691616, iteration: 298153
loss: 0.9996585845947266,grad_norm: 0.8178206353361305, iteration: 298154
loss: 0.9986797571182251,grad_norm: 0.9387184861665386, iteration: 298155
loss: 0.9964696168899536,grad_norm: 0.6784629930332394, iteration: 298156
loss: 0.9880779981613159,grad_norm: 0.763490206502563, iteration: 298157
loss: 1.0553951263427734,grad_norm: 0.9999990504590117, iteration: 298158
loss: 1.0073707103729248,grad_norm: 0.8435454856102094, iteration: 298159
loss: 0.982434868812561,grad_norm: 0.8836106009228467, iteration: 298160
loss: 0.9786813855171204,grad_norm: 0.9999989280373127, iteration: 298161
loss: 0.9957921504974365,grad_norm: 0.8168838068160708, iteration: 298162
loss: 1.0117650032043457,grad_norm: 0.7688034941567128, iteration: 298163
loss: 1.0051686763763428,grad_norm: 0.9999989751460516, iteration: 298164
loss: 0.994491457939148,grad_norm: 0.8832483968517163, iteration: 298165
loss: 1.117862343788147,grad_norm: 0.9999992360458158, iteration: 298166
loss: 1.018813133239746,grad_norm: 0.9999991939219148, iteration: 298167
loss: 1.0129666328430176,grad_norm: 0.8920739891781444, iteration: 298168
loss: 1.0313708782196045,grad_norm: 0.8796841530991709, iteration: 298169
loss: 1.0603466033935547,grad_norm: 0.98276089795398, iteration: 298170
loss: 0.9913825988769531,grad_norm: 0.7340720829012606, iteration: 298171
loss: 1.0074375867843628,grad_norm: 0.8267381896212851, iteration: 298172
loss: 1.005904197692871,grad_norm: 0.926909167865412, iteration: 298173
loss: 1.0021952390670776,grad_norm: 0.7864414528661401, iteration: 298174
loss: 1.018823266029358,grad_norm: 0.9164717880788722, iteration: 298175
loss: 1.001084566116333,grad_norm: 0.7284353970783127, iteration: 298176
loss: 0.9710391759872437,grad_norm: 0.8773104100955594, iteration: 298177
loss: 0.9643266797065735,grad_norm: 0.7613555633718652, iteration: 298178
loss: 1.0039604902267456,grad_norm: 0.7739941903949697, iteration: 298179
loss: 1.0122370719909668,grad_norm: 0.8826142845972305, iteration: 298180
loss: 0.996809184551239,grad_norm: 0.9411386155601814, iteration: 298181
loss: 1.0023926496505737,grad_norm: 0.7510370969597276, iteration: 298182
loss: 0.9986598491668701,grad_norm: 0.8016907130501795, iteration: 298183
loss: 0.9700732231140137,grad_norm: 0.8835083178568797, iteration: 298184
loss: 1.0103402137756348,grad_norm: 0.9999990412258383, iteration: 298185
loss: 1.0296659469604492,grad_norm: 0.8564268646772197, iteration: 298186
loss: 0.9971560835838318,grad_norm: 0.8791903037917664, iteration: 298187
loss: 0.9659995436668396,grad_norm: 0.8008788689291799, iteration: 298188
loss: 1.0292079448699951,grad_norm: 0.7999254289995443, iteration: 298189
loss: 1.0077271461486816,grad_norm: 0.9004452473751171, iteration: 298190
loss: 1.0213921070098877,grad_norm: 0.7355642582999607, iteration: 298191
loss: 1.0143861770629883,grad_norm: 0.9999991881109137, iteration: 298192
loss: 0.9763131737709045,grad_norm: 0.7881902632912422, iteration: 298193
loss: 1.0016268491744995,grad_norm: 0.8098940869646156, iteration: 298194
loss: 1.0318052768707275,grad_norm: 0.9999991847212099, iteration: 298195
loss: 1.0035357475280762,grad_norm: 0.824586148422767, iteration: 298196
loss: 0.9929546117782593,grad_norm: 0.7977265743456579, iteration: 298197
loss: 0.997156023979187,grad_norm: 0.7880277097881304, iteration: 298198
loss: 1.0065311193466187,grad_norm: 0.8434885145314058, iteration: 298199
loss: 0.9987457990646362,grad_norm: 0.9778884080560998, iteration: 298200
loss: 1.0145682096481323,grad_norm: 0.9999989883775786, iteration: 298201
loss: 1.0465396642684937,grad_norm: 0.7155334780285885, iteration: 298202
loss: 1.0071642398834229,grad_norm: 0.8787022893329045, iteration: 298203
loss: 1.0292872190475464,grad_norm: 0.8182706193896853, iteration: 298204
loss: 0.9983105063438416,grad_norm: 0.7989532224763292, iteration: 298205
loss: 0.9760922789573669,grad_norm: 0.8904852337226729, iteration: 298206
loss: 1.0429942607879639,grad_norm: 0.8995886206953976, iteration: 298207
loss: 0.9941494464874268,grad_norm: 0.7702156683983056, iteration: 298208
loss: 1.0065066814422607,grad_norm: 0.9259875280523242, iteration: 298209
loss: 1.025557279586792,grad_norm: 0.8955508909532903, iteration: 298210
loss: 1.0299595594406128,grad_norm: 0.79252863736243, iteration: 298211
loss: 1.017212152481079,grad_norm: 0.7878752410302376, iteration: 298212
loss: 0.9908154010772705,grad_norm: 0.847019877957548, iteration: 298213
loss: 1.0092686414718628,grad_norm: 0.9999989935433158, iteration: 298214
loss: 1.0099844932556152,grad_norm: 0.8110915768860577, iteration: 298215
loss: 0.978606641292572,grad_norm: 0.9532238180892103, iteration: 298216
loss: 1.0253825187683105,grad_norm: 0.9999992869580863, iteration: 298217
loss: 0.9785221219062805,grad_norm: 0.8846728323624181, iteration: 298218
loss: 0.9964006543159485,grad_norm: 0.9999991014508063, iteration: 298219
loss: 0.9609236717224121,grad_norm: 0.875033023894778, iteration: 298220
loss: 0.9791617393493652,grad_norm: 0.7955543800072868, iteration: 298221
loss: 0.9969785213470459,grad_norm: 0.7307824244983522, iteration: 298222
loss: 1.0036180019378662,grad_norm: 0.7973934560385516, iteration: 298223
loss: 1.0412654876708984,grad_norm: 0.7682660123113649, iteration: 298224
loss: 0.9747597575187683,grad_norm: 0.8708814776328415, iteration: 298225
loss: 0.9523409605026245,grad_norm: 0.930351940053035, iteration: 298226
loss: 1.0243949890136719,grad_norm: 0.8564239815906759, iteration: 298227
loss: 1.0170880556106567,grad_norm: 0.9448918268340787, iteration: 298228
loss: 0.974160373210907,grad_norm: 0.864709631223068, iteration: 298229
loss: 1.0050145387649536,grad_norm: 0.9999997963007229, iteration: 298230
loss: 0.9424619674682617,grad_norm: 0.9758455047873318, iteration: 298231
loss: 1.0277270078659058,grad_norm: 0.6708447669224314, iteration: 298232
loss: 0.9856123328208923,grad_norm: 0.9999991728834139, iteration: 298233
loss: 1.014095664024353,grad_norm: 0.8688523885767657, iteration: 298234
loss: 0.9862427115440369,grad_norm: 0.8361380690556287, iteration: 298235
loss: 1.0242798328399658,grad_norm: 0.8329253435953375, iteration: 298236
loss: 0.9983865022659302,grad_norm: 0.8634455734230793, iteration: 298237
loss: 1.0036076307296753,grad_norm: 0.7342740650877589, iteration: 298238
loss: 1.0171562433242798,grad_norm: 0.8865074488397765, iteration: 298239
loss: 0.9916256666183472,grad_norm: 0.7713851463605981, iteration: 298240
loss: 0.9845582842826843,grad_norm: 0.8667577296689537, iteration: 298241
loss: 1.0043978691101074,grad_norm: 0.7633376675717237, iteration: 298242
loss: 1.0320841073989868,grad_norm: 0.7301351734355562, iteration: 298243
loss: 1.0380946397781372,grad_norm: 0.970872851898578, iteration: 298244
loss: 1.0271532535552979,grad_norm: 0.87287919109076, iteration: 298245
loss: 1.022276520729065,grad_norm: 0.9999998666356384, iteration: 298246
loss: 1.1095077991485596,grad_norm: 0.9999991156477537, iteration: 298247
loss: 0.9537302255630493,grad_norm: 0.9999994154131004, iteration: 298248
loss: 1.0064085721969604,grad_norm: 0.7450716998709053, iteration: 298249
loss: 0.9936440587043762,grad_norm: 0.7231506156680468, iteration: 298250
loss: 1.028880000114441,grad_norm: 0.6574363319577213, iteration: 298251
loss: 1.0291821956634521,grad_norm: 0.9361154626248814, iteration: 298252
loss: 0.9955043792724609,grad_norm: 0.9468579184349677, iteration: 298253
loss: 1.0059984922409058,grad_norm: 0.833566384237959, iteration: 298254
loss: 1.015515685081482,grad_norm: 0.8228776855318655, iteration: 298255
loss: 0.9716816544532776,grad_norm: 0.854886709906302, iteration: 298256
loss: 1.0044504404067993,grad_norm: 0.9189807629653814, iteration: 298257
loss: 1.0083884000778198,grad_norm: 0.7050311885139, iteration: 298258
loss: 1.0191129446029663,grad_norm: 0.9999992732512045, iteration: 298259
loss: 0.9706861972808838,grad_norm: 0.9172041721164659, iteration: 298260
loss: 0.9810745120048523,grad_norm: 0.9999990685729013, iteration: 298261
loss: 0.9872543215751648,grad_norm: 0.7884522644758948, iteration: 298262
loss: 1.0056183338165283,grad_norm: 0.8595231645054991, iteration: 298263
loss: 1.0138980150222778,grad_norm: 0.765404352928856, iteration: 298264
loss: 1.087938904762268,grad_norm: 0.9999990442245901, iteration: 298265
loss: 1.033667802810669,grad_norm: 0.7274466625534913, iteration: 298266
loss: 1.0023218393325806,grad_norm: 0.9359993744013001, iteration: 298267
loss: 0.9632928371429443,grad_norm: 0.8608288080032123, iteration: 298268
loss: 1.0381096601486206,grad_norm: 0.8442431193141102, iteration: 298269
loss: 0.9690935015678406,grad_norm: 0.9219846949380993, iteration: 298270
loss: 1.0072011947631836,grad_norm: 0.8800101495978537, iteration: 298271
loss: 0.9993103742599487,grad_norm: 0.9999990409111341, iteration: 298272
loss: 0.9923028349876404,grad_norm: 0.9999991063737557, iteration: 298273
loss: 1.0436525344848633,grad_norm: 0.9576218288247023, iteration: 298274
loss: 1.0011441707611084,grad_norm: 0.6734454473463402, iteration: 298275
loss: 1.0432164669036865,grad_norm: 0.9782058160333038, iteration: 298276
loss: 0.9855217933654785,grad_norm: 0.8464316812196584, iteration: 298277
loss: 1.0179941654205322,grad_norm: 0.9006388532885338, iteration: 298278
loss: 1.000882625579834,grad_norm: 0.7416385116160817, iteration: 298279
loss: 1.02198326587677,grad_norm: 0.8561091826420515, iteration: 298280
loss: 1.0010629892349243,grad_norm: 0.7557161040446372, iteration: 298281
loss: 0.9615125060081482,grad_norm: 0.8834260756016231, iteration: 298282
loss: 1.038224697113037,grad_norm: 0.7094278354491699, iteration: 298283
loss: 1.0099060535430908,grad_norm: 0.8209159488420428, iteration: 298284
loss: 0.9976249933242798,grad_norm: 0.8923369307240762, iteration: 298285
loss: 1.0345388650894165,grad_norm: 0.9999997055975822, iteration: 298286
loss: 0.9961589574813843,grad_norm: 0.8416185299738344, iteration: 298287
loss: 0.9917564988136292,grad_norm: 0.8960611232479037, iteration: 298288
loss: 0.9905627369880676,grad_norm: 0.947249576293962, iteration: 298289
loss: 0.9949307441711426,grad_norm: 0.7735559989018865, iteration: 298290
loss: 0.9789153933525085,grad_norm: 0.798007158122047, iteration: 298291
loss: 1.0140597820281982,grad_norm: 0.7290070721997134, iteration: 298292
loss: 0.9709093570709229,grad_norm: 0.9272362560515102, iteration: 298293
loss: 1.006312608718872,grad_norm: 0.9079617020295785, iteration: 298294
loss: 0.9909272193908691,grad_norm: 0.7653748327284651, iteration: 298295
loss: 0.9917302131652832,grad_norm: 0.8509161272097514, iteration: 298296
loss: 0.9489185810089111,grad_norm: 0.8748393147055677, iteration: 298297
loss: 1.0366114377975464,grad_norm: 0.743774990331665, iteration: 298298
loss: 1.0012608766555786,grad_norm: 0.9005454768678084, iteration: 298299
loss: 1.016275405883789,grad_norm: 0.9999995420035892, iteration: 298300
loss: 0.9997621774673462,grad_norm: 0.7893381142174938, iteration: 298301
loss: 1.005255103111267,grad_norm: 0.9881723436123594, iteration: 298302
loss: 0.9663153886795044,grad_norm: 0.8785657615333775, iteration: 298303
loss: 0.9666260480880737,grad_norm: 0.8713976432006169, iteration: 298304
loss: 1.0304924249649048,grad_norm: 0.7733528157037785, iteration: 298305
loss: 0.9903247952461243,grad_norm: 0.9168394485002831, iteration: 298306
loss: 1.0390605926513672,grad_norm: 0.8259220773484415, iteration: 298307
loss: 0.9927142858505249,grad_norm: 0.9186372973148192, iteration: 298308
loss: 0.9813864827156067,grad_norm: 0.7847694406823178, iteration: 298309
loss: 1.004058599472046,grad_norm: 0.8933384619443969, iteration: 298310
loss: 1.0086703300476074,grad_norm: 0.9333947224664731, iteration: 298311
loss: 0.9775645732879639,grad_norm: 0.8756809814413907, iteration: 298312
loss: 1.0009020566940308,grad_norm: 0.9594645503041184, iteration: 298313
loss: 0.9986361265182495,grad_norm: 0.9999991120076333, iteration: 298314
loss: 0.9931678175926208,grad_norm: 0.8517992305293286, iteration: 298315
loss: 1.0006346702575684,grad_norm: 0.8531726760027505, iteration: 298316
loss: 0.9986732006072998,grad_norm: 0.9883553338448182, iteration: 298317
loss: 1.0078383684158325,grad_norm: 0.8142490577216648, iteration: 298318
loss: 0.9962558746337891,grad_norm: 0.9797999041513137, iteration: 298319
loss: 1.0006794929504395,grad_norm: 0.7495119511689707, iteration: 298320
loss: 0.9841524958610535,grad_norm: 0.9100113341334664, iteration: 298321
loss: 0.9825465083122253,grad_norm: 0.7832517684927949, iteration: 298322
loss: 1.0340617895126343,grad_norm: 0.9158922426825093, iteration: 298323
loss: 0.9854663610458374,grad_norm: 0.8773957614108342, iteration: 298324
loss: 1.022148847579956,grad_norm: 0.8183103848370464, iteration: 298325
loss: 0.9972102046012878,grad_norm: 0.9999996007494026, iteration: 298326
loss: 1.0535318851470947,grad_norm: 0.9999992355010315, iteration: 298327
loss: 0.9956365823745728,grad_norm: 0.9002057792850799, iteration: 298328
loss: 1.0059638023376465,grad_norm: 0.8890076024797454, iteration: 298329
loss: 1.0019389390945435,grad_norm: 0.8356197509257532, iteration: 298330
loss: 1.0026801824569702,grad_norm: 0.9999992238944247, iteration: 298331
loss: 1.0056085586547852,grad_norm: 0.9074256377316451, iteration: 298332
loss: 0.9994094371795654,grad_norm: 0.9919560675533944, iteration: 298333
loss: 1.02976655960083,grad_norm: 0.8898048865196784, iteration: 298334
loss: 1.0087772607803345,grad_norm: 0.8333033633573658, iteration: 298335
loss: 1.01872718334198,grad_norm: 0.9486260693730412, iteration: 298336
loss: 0.9705013036727905,grad_norm: 0.9999991803464224, iteration: 298337
loss: 0.9729369878768921,grad_norm: 0.8437426746471693, iteration: 298338
loss: 0.9881746768951416,grad_norm: 0.8587778737039169, iteration: 298339
loss: 0.9632596969604492,grad_norm: 0.9133183737202495, iteration: 298340
loss: 0.9661882519721985,grad_norm: 0.7000416699576837, iteration: 298341
loss: 0.9918060302734375,grad_norm: 0.6738867543230596, iteration: 298342
loss: 0.9995614290237427,grad_norm: 0.8652137296531902, iteration: 298343
loss: 0.9749545454978943,grad_norm: 0.7456597942715517, iteration: 298344
loss: 1.0232330560684204,grad_norm: 0.9999991290286194, iteration: 298345
loss: 1.0266108512878418,grad_norm: 0.9999993592398068, iteration: 298346
loss: 1.01260244846344,grad_norm: 0.9999991971173067, iteration: 298347
loss: 1.000303030014038,grad_norm: 0.9999990568635427, iteration: 298348
loss: 1.0136629343032837,grad_norm: 0.8112196933497912, iteration: 298349
loss: 0.9980943202972412,grad_norm: 0.8531631151557377, iteration: 298350
loss: 1.0566658973693848,grad_norm: 0.9999991707327225, iteration: 298351
loss: 0.9356944561004639,grad_norm: 0.8755168022514147, iteration: 298352
loss: 0.9808517694473267,grad_norm: 0.9474857454693221, iteration: 298353
loss: 1.0615484714508057,grad_norm: 0.9011596079818635, iteration: 298354
loss: 1.0034348964691162,grad_norm: 0.8626643516945358, iteration: 298355
loss: 1.0143986940383911,grad_norm: 0.8277290966607624, iteration: 298356
loss: 0.9884982109069824,grad_norm: 0.89086212926896, iteration: 298357
loss: 1.0013151168823242,grad_norm: 0.7851195859689412, iteration: 298358
loss: 0.9514625072479248,grad_norm: 0.8751663906144507, iteration: 298359
loss: 0.9822339415550232,grad_norm: 0.7739626950598488, iteration: 298360
loss: 1.006988525390625,grad_norm: 0.9174971457046786, iteration: 298361
loss: 1.0663310289382935,grad_norm: 0.977681241297204, iteration: 298362
loss: 0.9704427123069763,grad_norm: 0.9078774204868397, iteration: 298363
loss: 1.0182863473892212,grad_norm: 0.8923549233448304, iteration: 298364
loss: 0.9954484701156616,grad_norm: 0.7209125747109227, iteration: 298365
loss: 0.9642259478569031,grad_norm: 0.9999989717268164, iteration: 298366
loss: 1.0003998279571533,grad_norm: 0.8819386537953094, iteration: 298367
loss: 1.0105884075164795,grad_norm: 0.6508310999758052, iteration: 298368
loss: 0.9327592253684998,grad_norm: 0.9999990774646226, iteration: 298369
loss: 1.0158175230026245,grad_norm: 0.8184128463585868, iteration: 298370
loss: 0.9954559206962585,grad_norm: 0.8355449641249298, iteration: 298371
loss: 1.0168731212615967,grad_norm: 0.9632561195547793, iteration: 298372
loss: 0.995114266872406,grad_norm: 0.7231514440271193, iteration: 298373
loss: 0.9709043502807617,grad_norm: 0.9466803683411988, iteration: 298374
loss: 0.9996579885482788,grad_norm: 0.7818199307565913, iteration: 298375
loss: 1.013728380203247,grad_norm: 0.9999991057769451, iteration: 298376
loss: 1.0181950330734253,grad_norm: 0.7616637008323353, iteration: 298377
loss: 0.9472584128379822,grad_norm: 0.7800836204633237, iteration: 298378
loss: 1.0189568996429443,grad_norm: 0.9763219061750921, iteration: 298379
loss: 1.0117236375808716,grad_norm: 0.7820788565584973, iteration: 298380
loss: 1.017091155052185,grad_norm: 0.99999906147895, iteration: 298381
loss: 0.9868178963661194,grad_norm: 0.9764837161235762, iteration: 298382
loss: 0.9845174551010132,grad_norm: 0.8377598248363918, iteration: 298383
loss: 0.9966610074043274,grad_norm: 0.9189949806847111, iteration: 298384
loss: 1.0197910070419312,grad_norm: 0.835545568558709, iteration: 298385
loss: 0.9834510684013367,grad_norm: 0.999999094533706, iteration: 298386
loss: 1.0204627513885498,grad_norm: 0.8791285731171176, iteration: 298387
loss: 1.011741280555725,grad_norm: 0.7852542056053118, iteration: 298388
loss: 1.019913673400879,grad_norm: 0.8178191152485581, iteration: 298389
loss: 1.0021618604660034,grad_norm: 0.9515902812170731, iteration: 298390
loss: 1.01393723487854,grad_norm: 0.7906822773837255, iteration: 298391
loss: 0.9973106980323792,grad_norm: 0.8987644320529531, iteration: 298392
loss: 0.9711398482322693,grad_norm: 0.8009810530998897, iteration: 298393
loss: 0.9722659587860107,grad_norm: 0.7189422618470127, iteration: 298394
loss: 1.1063276529312134,grad_norm: 0.9258545906816351, iteration: 298395
loss: 1.0035330057144165,grad_norm: 0.9031401849854803, iteration: 298396
loss: 0.9877899289131165,grad_norm: 0.7303455866290035, iteration: 298397
loss: 0.9717078804969788,grad_norm: 0.6903863171002482, iteration: 298398
loss: 0.9969408512115479,grad_norm: 0.899769329812706, iteration: 298399
loss: 1.0086208581924438,grad_norm: 0.7610388284682624, iteration: 298400
loss: 1.0316787958145142,grad_norm: 0.944705822445973, iteration: 298401
loss: 0.99912428855896,grad_norm: 0.8116337424691975, iteration: 298402
loss: 1.0033395290374756,grad_norm: 0.820665927430217, iteration: 298403
loss: 1.0068950653076172,grad_norm: 0.7580428914211739, iteration: 298404
loss: 1.0113977193832397,grad_norm: 0.6811343034378684, iteration: 298405
loss: 0.9925908446311951,grad_norm: 0.96747037716785, iteration: 298406
loss: 0.9819085001945496,grad_norm: 0.8753663720956285, iteration: 298407
loss: 1.0793251991271973,grad_norm: 0.9999990962959681, iteration: 298408
loss: 1.015333890914917,grad_norm: 0.9559057456762257, iteration: 298409
loss: 0.9952801465988159,grad_norm: 0.8722593465689625, iteration: 298410
loss: 1.0122216939926147,grad_norm: 0.7096085159934805, iteration: 298411
loss: 1.0039784908294678,grad_norm: 0.8960824290220188, iteration: 298412
loss: 0.9529394507408142,grad_norm: 0.8829733018108351, iteration: 298413
loss: 0.9994654059410095,grad_norm: 0.7583789882835534, iteration: 298414
loss: 1.0378252267837524,grad_norm: 0.8045741479183158, iteration: 298415
loss: 0.9861884713172913,grad_norm: 0.8652614425018745, iteration: 298416
loss: 0.9962199926376343,grad_norm: 0.9249076122256988, iteration: 298417
loss: 0.9797400832176208,grad_norm: 0.9821097185253481, iteration: 298418
loss: 0.9701721668243408,grad_norm: 0.7486037881083165, iteration: 298419
loss: 0.998664140701294,grad_norm: 0.8158092177725618, iteration: 298420
loss: 0.9744555354118347,grad_norm: 0.9999991685176053, iteration: 298421
loss: 1.0156797170639038,grad_norm: 0.7536209756365874, iteration: 298422
loss: 0.9843566417694092,grad_norm: 0.6468487340931229, iteration: 298423
loss: 0.966813325881958,grad_norm: 0.7188648131412868, iteration: 298424
loss: 0.9925671815872192,grad_norm: 0.9413832069024308, iteration: 298425
loss: 0.992193341255188,grad_norm: 0.8959857135464189, iteration: 298426
loss: 0.9907658696174622,grad_norm: 0.8940709895701132, iteration: 298427
loss: 1.0185956954956055,grad_norm: 0.9999992957039122, iteration: 298428
loss: 0.9751702547073364,grad_norm: 0.7456710345460347, iteration: 298429
loss: 1.0039173364639282,grad_norm: 0.9999992302725685, iteration: 298430
loss: 1.1013673543930054,grad_norm: 0.9999995350640142, iteration: 298431
loss: 0.984265148639679,grad_norm: 0.8588225933156398, iteration: 298432
loss: 0.9672585725784302,grad_norm: 0.9999992081876753, iteration: 298433
loss: 0.988236665725708,grad_norm: 0.7197540711234638, iteration: 298434
loss: 0.9613195061683655,grad_norm: 0.851642445048837, iteration: 298435
loss: 1.0089236497879028,grad_norm: 0.9599573003513868, iteration: 298436
loss: 1.000797986984253,grad_norm: 0.8702223159818272, iteration: 298437
loss: 1.007442593574524,grad_norm: 0.9575455828971804, iteration: 298438
loss: 0.9951033592224121,grad_norm: 0.9999990421137007, iteration: 298439
loss: 0.9920229911804199,grad_norm: 0.9999998949565236, iteration: 298440
loss: 0.9865700602531433,grad_norm: 0.8671117643092584, iteration: 298441
loss: 0.9897010326385498,grad_norm: 0.8204660721813708, iteration: 298442
loss: 0.9871633648872375,grad_norm: 0.7108228183968284, iteration: 298443
loss: 1.0145052671432495,grad_norm: 0.8358892546285788, iteration: 298444
loss: 1.0219453573226929,grad_norm: 0.7835800213491156, iteration: 298445
loss: 1.0943890810012817,grad_norm: 0.999539057145112, iteration: 298446
loss: 1.0219383239746094,grad_norm: 0.723606183287634, iteration: 298447
loss: 1.0147258043289185,grad_norm: 0.9119968916159283, iteration: 298448
loss: 1.0100711584091187,grad_norm: 0.7478402559935824, iteration: 298449
loss: 1.1440504789352417,grad_norm: 0.9999995327799167, iteration: 298450
loss: 1.020890235900879,grad_norm: 0.8518081200600244, iteration: 298451
loss: 0.9718960523605347,grad_norm: 0.6913427807277025, iteration: 298452
loss: 0.999321699142456,grad_norm: 0.9999997495895313, iteration: 298453
loss: 0.9840900301933289,grad_norm: 0.7907215747019612, iteration: 298454
loss: 1.0167816877365112,grad_norm: 0.9999991612523516, iteration: 298455
loss: 0.9955999255180359,grad_norm: 0.8357405056328919, iteration: 298456
loss: 1.0189183950424194,grad_norm: 0.8206969159228257, iteration: 298457
loss: 1.010949969291687,grad_norm: 0.7735159762696375, iteration: 298458
loss: 0.9975823760032654,grad_norm: 0.8781384958909927, iteration: 298459
loss: 0.985962450504303,grad_norm: 0.9853846246683253, iteration: 298460
loss: 1.0083061456680298,grad_norm: 0.9999995099102504, iteration: 298461
loss: 0.9909629225730896,grad_norm: 0.8390050059024685, iteration: 298462
loss: 1.003060221672058,grad_norm: 0.9501595910344313, iteration: 298463
loss: 1.0441150665283203,grad_norm: 0.8004444458806134, iteration: 298464
loss: 1.0090672969818115,grad_norm: 0.9167578247188838, iteration: 298465
loss: 1.0332552194595337,grad_norm: 0.8553164688336461, iteration: 298466
loss: 1.0094308853149414,grad_norm: 0.9226298518100071, iteration: 298467
loss: 1.012543797492981,grad_norm: 0.9999990875223495, iteration: 298468
loss: 1.0290212631225586,grad_norm: 0.999999096116472, iteration: 298469
loss: 0.9731865525245667,grad_norm: 0.8263719422336152, iteration: 298470
loss: 1.020656943321228,grad_norm: 0.9999990148318704, iteration: 298471
loss: 1.0202460289001465,grad_norm: 0.8929911795680381, iteration: 298472
loss: 1.0086562633514404,grad_norm: 0.999999170525473, iteration: 298473
loss: 0.9253884553909302,grad_norm: 0.9461795233957531, iteration: 298474
loss: 1.0027902126312256,grad_norm: 0.8680518225391739, iteration: 298475
loss: 1.028174877166748,grad_norm: 0.8665105974329589, iteration: 298476
loss: 0.9786432981491089,grad_norm: 0.756742698543634, iteration: 298477
loss: 1.0571829080581665,grad_norm: 0.9144673617869719, iteration: 298478
loss: 0.9854168891906738,grad_norm: 0.8554335328393889, iteration: 298479
loss: 1.0066187381744385,grad_norm: 0.902002306875263, iteration: 298480
loss: 0.9915351271629333,grad_norm: 0.8015885730018157, iteration: 298481
loss: 1.0034452676773071,grad_norm: 0.7724777930429613, iteration: 298482
loss: 1.0543385744094849,grad_norm: 0.9999990108317923, iteration: 298483
loss: 0.9909903407096863,grad_norm: 0.770909360115175, iteration: 298484
loss: 0.9909377694129944,grad_norm: 0.7226595081228682, iteration: 298485
loss: 0.9689812064170837,grad_norm: 0.8910257027674239, iteration: 298486
loss: 1.0128066539764404,grad_norm: 0.8167812703881719, iteration: 298487
loss: 0.9911036491394043,grad_norm: 0.8696287656145875, iteration: 298488
loss: 0.9912453293800354,grad_norm: 0.8138132351559534, iteration: 298489
loss: 1.007501482963562,grad_norm: 0.999999487656615, iteration: 298490
loss: 1.001739740371704,grad_norm: 0.9283745365978523, iteration: 298491
loss: 0.9979062080383301,grad_norm: 0.8295434516245436, iteration: 298492
loss: 0.943743884563446,grad_norm: 0.9999990108863902, iteration: 298493
loss: 1.0107481479644775,grad_norm: 0.9253039230943058, iteration: 298494
loss: 1.0021284818649292,grad_norm: 0.8146037152003331, iteration: 298495
loss: 1.000216007232666,grad_norm: 0.9710626876321269, iteration: 298496
loss: 0.9824453592300415,grad_norm: 0.8711094449079185, iteration: 298497
loss: 0.9910709857940674,grad_norm: 0.8421044496967083, iteration: 298498
loss: 1.0279232263565063,grad_norm: 0.9999994417055207, iteration: 298499
loss: 0.9953354597091675,grad_norm: 0.9149761607507237, iteration: 298500
loss: 1.0148344039916992,grad_norm: 0.9483881037688203, iteration: 298501
loss: 0.9832653999328613,grad_norm: 0.8512997347976136, iteration: 298502
loss: 1.0366584062576294,grad_norm: 0.8647617151620403, iteration: 298503
loss: 0.9365996718406677,grad_norm: 0.9461252966947522, iteration: 298504
loss: 1.001362919807434,grad_norm: 0.8207387075674616, iteration: 298505
loss: 1.0117686986923218,grad_norm: 0.8703432444995821, iteration: 298506
loss: 0.9993541836738586,grad_norm: 0.84194394649908, iteration: 298507
loss: 1.0097349882125854,grad_norm: 0.89125086475221, iteration: 298508
loss: 0.9693788886070251,grad_norm: 0.8794563219007095, iteration: 298509
loss: 0.9866230487823486,grad_norm: 0.7905826013521428, iteration: 298510
loss: 0.9924987554550171,grad_norm: 0.9999991571939193, iteration: 298511
loss: 1.0481702089309692,grad_norm: 0.8718504095461068, iteration: 298512
loss: 1.004571557044983,grad_norm: 0.9272447102971836, iteration: 298513
loss: 1.0107643604278564,grad_norm: 0.9301365132289394, iteration: 298514
loss: 1.0099711418151855,grad_norm: 0.8671030143558254, iteration: 298515
loss: 0.9933301210403442,grad_norm: 0.9110312167853466, iteration: 298516
loss: 1.0241429805755615,grad_norm: 0.9372428469946169, iteration: 298517
loss: 0.9863409996032715,grad_norm: 0.9415917807312498, iteration: 298518
loss: 1.0258077383041382,grad_norm: 0.9673461583806952, iteration: 298519
loss: 1.0008577108383179,grad_norm: 0.9449803043002548, iteration: 298520
loss: 0.9919633269309998,grad_norm: 0.9160300999487169, iteration: 298521
loss: 1.0120048522949219,grad_norm: 0.8058059149058752, iteration: 298522
loss: 0.9792278409004211,grad_norm: 0.9050521369571506, iteration: 298523
loss: 1.0571355819702148,grad_norm: 0.7486688645235184, iteration: 298524
loss: 0.9925851225852966,grad_norm: 0.854468406654068, iteration: 298525
loss: 0.9475759267807007,grad_norm: 0.9790430673181951, iteration: 298526
loss: 0.9696263074874878,grad_norm: 0.8426794202516564, iteration: 298527
loss: 0.9880584478378296,grad_norm: 0.999999364646425, iteration: 298528
loss: 1.0058798789978027,grad_norm: 0.9336682438200377, iteration: 298529
loss: 0.9801552295684814,grad_norm: 0.8210286826654313, iteration: 298530
loss: 0.9687602519989014,grad_norm: 0.827525111489682, iteration: 298531
loss: 0.990055501461029,grad_norm: 0.9195079611212998, iteration: 298532
loss: 0.9451445937156677,grad_norm: 0.9999991830799699, iteration: 298533
loss: 0.9871352910995483,grad_norm: 0.7985596326491216, iteration: 298534
loss: 1.0038331747055054,grad_norm: 0.9999991088345296, iteration: 298535
loss: 1.031731367111206,grad_norm: 0.9192823326731469, iteration: 298536
loss: 0.9716973900794983,grad_norm: 0.8665540340403464, iteration: 298537
loss: 1.0816378593444824,grad_norm: 0.9999997384935354, iteration: 298538
loss: 1.0781725645065308,grad_norm: 0.9318804580820664, iteration: 298539
loss: 1.0249170064926147,grad_norm: 0.9720141475674122, iteration: 298540
loss: 1.0182809829711914,grad_norm: 0.7838812437289373, iteration: 298541
loss: 1.0400755405426025,grad_norm: 0.9764895093360699, iteration: 298542
loss: 0.9967712163925171,grad_norm: 0.8043695297761698, iteration: 298543
loss: 1.0030410289764404,grad_norm: 0.9999990485087131, iteration: 298544
loss: 0.9746849536895752,grad_norm: 0.7961018489125651, iteration: 298545
loss: 1.0309181213378906,grad_norm: 0.7618707815886319, iteration: 298546
loss: 1.007398247718811,grad_norm: 0.8395048760881312, iteration: 298547
loss: 1.0011247396469116,grad_norm: 0.751394931616316, iteration: 298548
loss: 0.9528629183769226,grad_norm: 0.8861090285320038, iteration: 298549
loss: 0.9988318085670471,grad_norm: 0.7929541952264707, iteration: 298550
loss: 1.0089973211288452,grad_norm: 0.7619638925925959, iteration: 298551
loss: 1.0489012002944946,grad_norm: 0.9999989584505966, iteration: 298552
loss: 1.0525660514831543,grad_norm: 0.82308650576729, iteration: 298553
loss: 0.97598797082901,grad_norm: 0.84093703304729, iteration: 298554
loss: 1.0111478567123413,grad_norm: 0.9096524946797104, iteration: 298555
loss: 0.9699156880378723,grad_norm: 0.921844005693452, iteration: 298556
loss: 0.9916179776191711,grad_norm: 0.8150552255214499, iteration: 298557
loss: 0.9769648313522339,grad_norm: 0.9276597470500639, iteration: 298558
loss: 0.9912434220314026,grad_norm: 0.6632296882277604, iteration: 298559
loss: 1.038333773612976,grad_norm: 0.9789969417547729, iteration: 298560
loss: 0.9997434616088867,grad_norm: 0.9999990212118985, iteration: 298561
loss: 0.9821296334266663,grad_norm: 0.999999028510933, iteration: 298562
loss: 0.9835387468338013,grad_norm: 0.7315413392383422, iteration: 298563
loss: 0.9717726707458496,grad_norm: 0.7060314726782858, iteration: 298564
loss: 1.0132158994674683,grad_norm: 0.891394430727368, iteration: 298565
loss: 1.0106028318405151,grad_norm: 0.7277746457439701, iteration: 298566
loss: 0.9860055446624756,grad_norm: 0.7965521105269121, iteration: 298567
loss: 1.0296552181243896,grad_norm: 0.8778067166095621, iteration: 298568
loss: 0.9626070857048035,grad_norm: 0.7119659740610855, iteration: 298569
loss: 0.981715977191925,grad_norm: 0.7304086949899032, iteration: 298570
loss: 1.009968876838684,grad_norm: 0.8360036126799893, iteration: 298571
loss: 1.0040758848190308,grad_norm: 0.9540553567433638, iteration: 298572
loss: 1.0253188610076904,grad_norm: 0.8884638304787832, iteration: 298573
loss: 0.9692331552505493,grad_norm: 0.8852348115019939, iteration: 298574
loss: 1.002372145652771,grad_norm: 0.9999991264520178, iteration: 298575
loss: 0.9670010805130005,grad_norm: 0.9826597520862368, iteration: 298576
loss: 1.0258688926696777,grad_norm: 0.9999994730548003, iteration: 298577
loss: 1.0059021711349487,grad_norm: 0.8595322513899827, iteration: 298578
loss: 0.9849781394004822,grad_norm: 0.8422846251312138, iteration: 298579
loss: 0.9948210716247559,grad_norm: 0.7995869720744325, iteration: 298580
loss: 1.0132290124893188,grad_norm: 0.7936724668685704, iteration: 298581
loss: 1.030447244644165,grad_norm: 0.9999991035101299, iteration: 298582
loss: 0.9759065508842468,grad_norm: 0.8627411016719005, iteration: 298583
loss: 1.025273084640503,grad_norm: 0.9902786816826296, iteration: 298584
loss: 0.9949634671211243,grad_norm: 0.918264869461041, iteration: 298585
loss: 0.9813712239265442,grad_norm: 0.7219914093845691, iteration: 298586
loss: 1.0000382661819458,grad_norm: 0.8190866660922073, iteration: 298587
loss: 1.02088463306427,grad_norm: 0.9272794490041706, iteration: 298588
loss: 1.0016005039215088,grad_norm: 0.7613251673839615, iteration: 298589
loss: 0.9809828400611877,grad_norm: 0.7838444482129818, iteration: 298590
loss: 0.9691533446311951,grad_norm: 0.9270184959736854, iteration: 298591
loss: 1.0030766725540161,grad_norm: 0.6777873509416271, iteration: 298592
loss: 1.0519198179244995,grad_norm: 0.9999997731343847, iteration: 298593
loss: 1.0332342386245728,grad_norm: 0.7705079561921799, iteration: 298594
loss: 1.0183395147323608,grad_norm: 0.9967026813829556, iteration: 298595
loss: 1.0005170106887817,grad_norm: 0.8264266051418769, iteration: 298596
loss: 0.9919480085372925,grad_norm: 0.9999991467009258, iteration: 298597
loss: 1.013021469116211,grad_norm: 0.8280690496351458, iteration: 298598
loss: 1.0321961641311646,grad_norm: 0.8066812694616521, iteration: 298599
loss: 0.994470477104187,grad_norm: 0.8791804963933887, iteration: 298600
loss: 1.0384798049926758,grad_norm: 0.8901963269711787, iteration: 298601
loss: 1.0059034824371338,grad_norm: 0.820017644310616, iteration: 298602
loss: 0.9830853343009949,grad_norm: 0.9999990131068818, iteration: 298603
loss: 0.9763719439506531,grad_norm: 0.8572402075330756, iteration: 298604
loss: 0.9714239239692688,grad_norm: 0.8332765957119435, iteration: 298605
loss: 1.0532392263412476,grad_norm: 0.7915532665450637, iteration: 298606
loss: 1.013535737991333,grad_norm: 0.7756522547643916, iteration: 298607
loss: 0.9809986352920532,grad_norm: 0.9406013068778527, iteration: 298608
loss: 0.9684857130050659,grad_norm: 0.9733014589406556, iteration: 298609
loss: 0.9910985231399536,grad_norm: 0.853412746439369, iteration: 298610
loss: 1.0112007856369019,grad_norm: 0.7174289512909587, iteration: 298611
loss: 1.0227560997009277,grad_norm: 0.8605354671341602, iteration: 298612
loss: 0.9975587725639343,grad_norm: 0.7343233334351418, iteration: 298613
loss: 1.0312249660491943,grad_norm: 0.7296012617519395, iteration: 298614
loss: 0.959532618522644,grad_norm: 0.7040533410976916, iteration: 298615
loss: 0.9917686581611633,grad_norm: 0.9382413667511071, iteration: 298616
loss: 0.9823039770126343,grad_norm: 0.9846623407738563, iteration: 298617
loss: 1.0377514362335205,grad_norm: 0.9999990722427841, iteration: 298618
loss: 1.0091127157211304,grad_norm: 0.9370613533579689, iteration: 298619
loss: 1.00640070438385,grad_norm: 0.999998941631145, iteration: 298620
loss: 1.0000022649765015,grad_norm: 0.8552057428255276, iteration: 298621
loss: 1.0030840635299683,grad_norm: 0.8549180799891931, iteration: 298622
loss: 0.9993807673454285,grad_norm: 0.9999989498934078, iteration: 298623
loss: 0.9901296496391296,grad_norm: 0.8629972974500565, iteration: 298624
loss: 1.014936923980713,grad_norm: 0.8038406398360686, iteration: 298625
loss: 0.9612840414047241,grad_norm: 0.8471674410064415, iteration: 298626
loss: 0.995423436164856,grad_norm: 0.7735947894439738, iteration: 298627
loss: 0.9922229051589966,grad_norm: 0.76214898566695, iteration: 298628
loss: 1.0183309316635132,grad_norm: 0.9638210995025036, iteration: 298629
loss: 0.9865704774856567,grad_norm: 0.8026944326924271, iteration: 298630
loss: 0.9989551901817322,grad_norm: 0.7470282164314516, iteration: 298631
loss: 1.0031846761703491,grad_norm: 0.9999990105605129, iteration: 298632
loss: 0.9856111407279968,grad_norm: 0.7534497063555274, iteration: 298633
loss: 0.9791310429573059,grad_norm: 0.8975143704074064, iteration: 298634
loss: 1.0088273286819458,grad_norm: 0.9182272370328425, iteration: 298635
loss: 1.0054904222488403,grad_norm: 0.8874572853999861, iteration: 298636
loss: 0.9803383350372314,grad_norm: 0.7989791540825673, iteration: 298637
loss: 1.0043271780014038,grad_norm: 0.7560438570887901, iteration: 298638
loss: 1.0278490781784058,grad_norm: 0.9102583183480046, iteration: 298639
loss: 1.0053890943527222,grad_norm: 0.8273916421062938, iteration: 298640
loss: 1.0277163982391357,grad_norm: 0.9312033918755872, iteration: 298641
loss: 1.0338637828826904,grad_norm: 0.9290629581685083, iteration: 298642
loss: 0.9801443219184875,grad_norm: 0.7569074982651964, iteration: 298643
loss: 1.0122870206832886,grad_norm: 0.99999917561057, iteration: 298644
loss: 0.9874308705329895,grad_norm: 0.7385105614337112, iteration: 298645
loss: 0.9722275733947754,grad_norm: 0.7833163431946829, iteration: 298646
loss: 0.9962007403373718,grad_norm: 0.8837242840284624, iteration: 298647
loss: 0.9795872569084167,grad_norm: 0.9501256231514588, iteration: 298648
loss: 0.972332775592804,grad_norm: 0.9999993603459506, iteration: 298649
loss: 1.00314462184906,grad_norm: 0.8487231623705055, iteration: 298650
loss: 1.0158663988113403,grad_norm: 0.8254446562743806, iteration: 298651
loss: 1.0200196504592896,grad_norm: 0.9091138067288749, iteration: 298652
loss: 1.0333458185195923,grad_norm: 0.939558547491194, iteration: 298653
loss: 1.0409725904464722,grad_norm: 0.9999999966506605, iteration: 298654
loss: 0.9687548875808716,grad_norm: 0.8295386505178616, iteration: 298655
loss: 1.0087013244628906,grad_norm: 0.8468472904218174, iteration: 298656
loss: 1.01283860206604,grad_norm: 0.7939458172017051, iteration: 298657
loss: 1.0039924383163452,grad_norm: 0.9038004460974547, iteration: 298658
loss: 1.0038862228393555,grad_norm: 0.9636002996254169, iteration: 298659
loss: 0.9969953298568726,grad_norm: 0.8098248150958338, iteration: 298660
loss: 0.9900185465812683,grad_norm: 0.9892153460870203, iteration: 298661
loss: 1.0065799951553345,grad_norm: 0.8991473851957092, iteration: 298662
loss: 1.0208508968353271,grad_norm: 0.9999990226509062, iteration: 298663
loss: 0.9822292923927307,grad_norm: 0.8685345909374272, iteration: 298664
loss: 0.9517815709114075,grad_norm: 0.9329839350278463, iteration: 298665
loss: 1.0156692266464233,grad_norm: 0.9144770424351014, iteration: 298666
loss: 0.9662020802497864,grad_norm: 0.8978990398152498, iteration: 298667
loss: 0.9584208726882935,grad_norm: 0.7876055093614222, iteration: 298668
loss: 0.9755066633224487,grad_norm: 0.9999991288990729, iteration: 298669
loss: 0.9816248416900635,grad_norm: 0.8622109096993243, iteration: 298670
loss: 0.9626991152763367,grad_norm: 0.9854037745582933, iteration: 298671
loss: 0.9998077154159546,grad_norm: 0.6771736147266783, iteration: 298672
loss: 0.9841127395629883,grad_norm: 0.9999990304219856, iteration: 298673
loss: 1.03286612033844,grad_norm: 0.9734755959684923, iteration: 298674
loss: 1.0060813426971436,grad_norm: 0.9256251894751596, iteration: 298675
loss: 0.9683570861816406,grad_norm: 0.8937104284164304, iteration: 298676
loss: 0.9862961769104004,grad_norm: 0.9338600941784376, iteration: 298677
loss: 1.047555923461914,grad_norm: 0.9141242271049934, iteration: 298678
loss: 1.0194367170333862,grad_norm: 0.7917931755094137, iteration: 298679
loss: 1.0123366117477417,grad_norm: 0.850132675769091, iteration: 298680
loss: 1.044355034828186,grad_norm: 0.7990931138934216, iteration: 298681
loss: 0.9926138520240784,grad_norm: 0.8613085546174443, iteration: 298682
loss: 1.010294795036316,grad_norm: 0.9999990796850172, iteration: 298683
loss: 1.0012151002883911,grad_norm: 0.9999991972514198, iteration: 298684
loss: 1.0155223608016968,grad_norm: 0.870444383678654, iteration: 298685
loss: 0.9883984327316284,grad_norm: 0.8139072189145391, iteration: 298686
loss: 0.9893662333488464,grad_norm: 0.8735845041026529, iteration: 298687
loss: 1.0054851770401,grad_norm: 0.9418010086085432, iteration: 298688
loss: 0.9793410897254944,grad_norm: 0.9059504416469171, iteration: 298689
loss: 1.0265495777130127,grad_norm: 0.987908696144069, iteration: 298690
loss: 1.0339304208755493,grad_norm: 0.9870005276243256, iteration: 298691
loss: 1.0031465291976929,grad_norm: 0.9693007987458818, iteration: 298692
loss: 0.9672343730926514,grad_norm: 0.8672140115162196, iteration: 298693
loss: 1.0101537704467773,grad_norm: 0.7781941127714553, iteration: 298694
loss: 0.9979822039604187,grad_norm: 0.8888844751549927, iteration: 298695
loss: 1.0121288299560547,grad_norm: 0.9455588894974035, iteration: 298696
loss: 0.9500826597213745,grad_norm: 0.7348419932951067, iteration: 298697
loss: 0.9776908159255981,grad_norm: 0.7613998437533235, iteration: 298698
loss: 1.0207878351211548,grad_norm: 0.8585845270168437, iteration: 298699
loss: 0.9959421753883362,grad_norm: 0.8958577966872872, iteration: 298700
loss: 1.0102717876434326,grad_norm: 0.829741282690882, iteration: 298701
loss: 1.0022765398025513,grad_norm: 0.999999123761311, iteration: 298702
loss: 0.9749177694320679,grad_norm: 0.8150983079719673, iteration: 298703
loss: 1.1150439977645874,grad_norm: 0.9267226620393997, iteration: 298704
loss: 1.004787802696228,grad_norm: 0.9065891346280744, iteration: 298705
loss: 1.0368252992630005,grad_norm: 0.9999991587102064, iteration: 298706
loss: 1.0110677480697632,grad_norm: 0.9176441439622893, iteration: 298707
loss: 1.0217344760894775,grad_norm: 0.99999917226603, iteration: 298708
loss: 0.9740873575210571,grad_norm: 0.7687584347123398, iteration: 298709
loss: 1.0069881677627563,grad_norm: 0.877096085581074, iteration: 298710
loss: 1.0307554006576538,grad_norm: 0.7810719109922701, iteration: 298711
loss: 1.0078387260437012,grad_norm: 0.8618510132873016, iteration: 298712
loss: 1.031420350074768,grad_norm: 0.7927987070896378, iteration: 298713
loss: 1.0145379304885864,grad_norm: 0.8144269155744968, iteration: 298714
loss: 1.0111854076385498,grad_norm: 0.7532455361422369, iteration: 298715
loss: 1.0254806280136108,grad_norm: 0.9589914744680746, iteration: 298716
loss: 0.9979698061943054,grad_norm: 0.8532062560705614, iteration: 298717
loss: 0.9649984240531921,grad_norm: 0.9068481502561451, iteration: 298718
loss: 0.9942613244056702,grad_norm: 0.9016260567992432, iteration: 298719
loss: 0.9446263313293457,grad_norm: 0.9999997799102017, iteration: 298720
loss: 0.9665886163711548,grad_norm: 0.8734362488278882, iteration: 298721
loss: 0.9640476703643799,grad_norm: 0.7735080060613809, iteration: 298722
loss: 0.9937798976898193,grad_norm: 0.8322558875532717, iteration: 298723
loss: 0.9882491230964661,grad_norm: 0.8733858134824838, iteration: 298724
loss: 1.0173687934875488,grad_norm: 0.8513866133127597, iteration: 298725
loss: 0.9655832052230835,grad_norm: 0.906300961685259, iteration: 298726
loss: 1.0231647491455078,grad_norm: 0.7494195846108906, iteration: 298727
loss: 1.0266858339309692,grad_norm: 0.9284572523813599, iteration: 298728
loss: 1.0275152921676636,grad_norm: 0.8041978181534292, iteration: 298729
loss: 0.999078094959259,grad_norm: 0.7178735604827376, iteration: 298730
loss: 0.9986204504966736,grad_norm: 0.937665181220984, iteration: 298731
loss: 0.9823627471923828,grad_norm: 0.6702926730548401, iteration: 298732
loss: 0.9621282815933228,grad_norm: 0.8374315365621932, iteration: 298733
loss: 1.0101439952850342,grad_norm: 0.9999993965005809, iteration: 298734
loss: 0.9950705766677856,grad_norm: 0.8570155456027008, iteration: 298735
loss: 0.9965066313743591,grad_norm: 0.6954383848011075, iteration: 298736
loss: 0.9795932769775391,grad_norm: 0.7955373334008856, iteration: 298737
loss: 1.031511902809143,grad_norm: 0.9230593202359478, iteration: 298738
loss: 1.017959475517273,grad_norm: 0.8292633474899636, iteration: 298739
loss: 0.99444180727005,grad_norm: 0.8789755346209892, iteration: 298740
loss: 1.028917670249939,grad_norm: 0.9999990583102152, iteration: 298741
loss: 0.994713544845581,grad_norm: 0.8652937433282526, iteration: 298742
loss: 0.979630708694458,grad_norm: 0.8407068694898145, iteration: 298743
loss: 1.0091391801834106,grad_norm: 0.9939573176741568, iteration: 298744
loss: 1.0299900770187378,grad_norm: 0.7567532954525726, iteration: 298745
loss: 0.9656139016151428,grad_norm: 0.8354122399911655, iteration: 298746
loss: 1.0009396076202393,grad_norm: 0.8650900037155591, iteration: 298747
loss: 0.983355700969696,grad_norm: 0.8480354955651662, iteration: 298748
loss: 0.9914627075195312,grad_norm: 0.9010021129379275, iteration: 298749
loss: 1.0009136199951172,grad_norm: 0.9645517139932098, iteration: 298750
loss: 0.9655885100364685,grad_norm: 0.899737267281873, iteration: 298751
loss: 0.9908482432365417,grad_norm: 0.8397924072180402, iteration: 298752
loss: 1.018693208694458,grad_norm: 0.9208579001035584, iteration: 298753
loss: 1.0042403936386108,grad_norm: 0.9633452691682172, iteration: 298754
loss: 1.0119627714157104,grad_norm: 0.8833990472726242, iteration: 298755
loss: 1.022024154663086,grad_norm: 0.9578795418720177, iteration: 298756
loss: 0.9928276538848877,grad_norm: 0.8137470888490288, iteration: 298757
loss: 1.0029836893081665,grad_norm: 0.891616292470929, iteration: 298758
loss: 1.0154768228530884,grad_norm: 0.9816453101814457, iteration: 298759
loss: 0.9864996075630188,grad_norm: 0.9557712467796798, iteration: 298760
loss: 0.9588721394538879,grad_norm: 0.8714026435735798, iteration: 298761
loss: 0.9876269698143005,grad_norm: 0.7690204349883534, iteration: 298762
loss: 0.9950545430183411,grad_norm: 0.9372116341788732, iteration: 298763
loss: 0.9910520315170288,grad_norm: 0.8250953157366047, iteration: 298764
loss: 1.0013861656188965,grad_norm: 0.9808512857467317, iteration: 298765
loss: 1.012211799621582,grad_norm: 0.9263303766272761, iteration: 298766
loss: 0.948025107383728,grad_norm: 0.9279342911232192, iteration: 298767
loss: 1.016514778137207,grad_norm: 0.7769859515224705, iteration: 298768
loss: 0.9811195135116577,grad_norm: 0.8267782645026164, iteration: 298769
loss: 0.9744862914085388,grad_norm: 0.7785060110290746, iteration: 298770
loss: 1.0031708478927612,grad_norm: 0.8750857840761227, iteration: 298771
loss: 0.9733637571334839,grad_norm: 0.8154982845514371, iteration: 298772
loss: 1.0099343061447144,grad_norm: 0.7564573173726971, iteration: 298773
loss: 1.0099397897720337,grad_norm: 0.7205479678567568, iteration: 298774
loss: 1.0001264810562134,grad_norm: 0.7179042939321593, iteration: 298775
loss: 1.0084460973739624,grad_norm: 0.8677720941488711, iteration: 298776
loss: 0.9844565987586975,grad_norm: 0.8060247131727892, iteration: 298777
loss: 1.0085012912750244,grad_norm: 0.8327961955289499, iteration: 298778
loss: 0.967505931854248,grad_norm: 0.8911735735689077, iteration: 298779
loss: 1.0026075839996338,grad_norm: 0.8229348895707799, iteration: 298780
loss: 1.0136325359344482,grad_norm: 0.9300223195343047, iteration: 298781
loss: 1.0256023406982422,grad_norm: 0.9185361586547197, iteration: 298782
loss: 0.9814408421516418,grad_norm: 0.7481537610190035, iteration: 298783
loss: 0.9576635360717773,grad_norm: 0.8172646253286158, iteration: 298784
loss: 1.005138635635376,grad_norm: 0.8786264475700594, iteration: 298785
loss: 0.9805724620819092,grad_norm: 0.8231607315545506, iteration: 298786
loss: 1.0155092477798462,grad_norm: 0.9999990083008575, iteration: 298787
loss: 1.0068304538726807,grad_norm: 0.882709377294029, iteration: 298788
loss: 0.991523265838623,grad_norm: 0.7377872828407329, iteration: 298789
loss: 1.0076041221618652,grad_norm: 0.9484927493877948, iteration: 298790
loss: 0.9813087582588196,grad_norm: 0.9999991208263378, iteration: 298791
loss: 0.9909695386886597,grad_norm: 0.8080521262819369, iteration: 298792
loss: 0.9637428522109985,grad_norm: 0.9999990601373573, iteration: 298793
loss: 0.9687638878822327,grad_norm: 0.9999991556000368, iteration: 298794
loss: 0.9935832023620605,grad_norm: 0.9952832464818524, iteration: 298795
loss: 0.9838356971740723,grad_norm: 0.9999990835582157, iteration: 298796
loss: 1.0154374837875366,grad_norm: 0.9220319137555474, iteration: 298797
loss: 1.0360347032546997,grad_norm: 0.9999990503115829, iteration: 298798
loss: 0.9995787143707275,grad_norm: 0.9445337284731957, iteration: 298799
loss: 0.9873184561729431,grad_norm: 0.9999993089547099, iteration: 298800
loss: 1.023389458656311,grad_norm: 0.7579440917968713, iteration: 298801
loss: 0.9953852295875549,grad_norm: 0.7542607231462056, iteration: 298802
loss: 1.0717310905456543,grad_norm: 0.911320151454065, iteration: 298803
loss: 1.0675379037857056,grad_norm: 0.9999991985521731, iteration: 298804
loss: 1.0194824934005737,grad_norm: 0.8350805754267305, iteration: 298805
loss: 1.0282845497131348,grad_norm: 0.8573876336399022, iteration: 298806
loss: 0.9937112927436829,grad_norm: 0.7941101768855356, iteration: 298807
loss: 0.9768977165222168,grad_norm: 0.9999994518119547, iteration: 298808
loss: 0.979612410068512,grad_norm: 0.8404160878383519, iteration: 298809
loss: 0.9990721344947815,grad_norm: 0.9236454905887083, iteration: 298810
loss: 0.9911981225013733,grad_norm: 0.9333481124508812, iteration: 298811
loss: 0.9568804502487183,grad_norm: 0.8193571540697744, iteration: 298812
loss: 1.0097464323043823,grad_norm: 0.8652634269495, iteration: 298813
loss: 1.0257091522216797,grad_norm: 0.8280197163309129, iteration: 298814
loss: 1.0105713605880737,grad_norm: 0.7607890477886486, iteration: 298815
loss: 0.9899726510047913,grad_norm: 0.8555480437013281, iteration: 298816
loss: 0.9831522703170776,grad_norm: 0.8577732636950196, iteration: 298817
loss: 0.9719440937042236,grad_norm: 0.8208133903271333, iteration: 298818
loss: 0.990294873714447,grad_norm: 0.8076003801719432, iteration: 298819
loss: 0.9878348708152771,grad_norm: 0.9912113753019385, iteration: 298820
loss: 1.0261039733886719,grad_norm: 0.7647173200535182, iteration: 298821
loss: 0.9604036808013916,grad_norm: 0.8888420411001476, iteration: 298822
loss: 1.0246012210845947,grad_norm: 0.8017902890121239, iteration: 298823
loss: 0.9636917114257812,grad_norm: 0.714504369080573, iteration: 298824
loss: 1.0608325004577637,grad_norm: 0.8282192419793682, iteration: 298825
loss: 1.0092867612838745,grad_norm: 0.7688837536295563, iteration: 298826
loss: 1.0288654565811157,grad_norm: 0.8710796013760824, iteration: 298827
loss: 0.9605079293251038,grad_norm: 0.8071901773842262, iteration: 298828
loss: 0.9928972721099854,grad_norm: 0.8129016041578433, iteration: 298829
loss: 0.981070876121521,grad_norm: 0.9857758193164181, iteration: 298830
loss: 1.0066680908203125,grad_norm: 0.9030752859076673, iteration: 298831
loss: 0.9818925261497498,grad_norm: 0.9038443713393421, iteration: 298832
loss: 1.0160480737686157,grad_norm: 0.7741152269180608, iteration: 298833
loss: 1.001245141029358,grad_norm: 0.8391599906300584, iteration: 298834
loss: 1.0033905506134033,grad_norm: 0.856857608914657, iteration: 298835
loss: 0.9805050492286682,grad_norm: 0.7067383995129709, iteration: 298836
loss: 0.9967179298400879,grad_norm: 0.9694607984354475, iteration: 298837
loss: 1.0535272359848022,grad_norm: 0.9999994574948333, iteration: 298838
loss: 0.9984689950942993,grad_norm: 0.9257987361174173, iteration: 298839
loss: 0.995881974697113,grad_norm: 0.8736230710588929, iteration: 298840
loss: 1.0155388116836548,grad_norm: 0.7371204440491526, iteration: 298841
loss: 0.9931207895278931,grad_norm: 0.8660358850297706, iteration: 298842
loss: 0.9823102951049805,grad_norm: 0.7283707839584361, iteration: 298843
loss: 1.0200858116149902,grad_norm: 0.773347748766816, iteration: 298844
loss: 0.9510419368743896,grad_norm: 0.8250926702601584, iteration: 298845
loss: 1.0006539821624756,grad_norm: 0.8520153724173721, iteration: 298846
loss: 0.9922541379928589,grad_norm: 0.804823353778801, iteration: 298847
loss: 0.9669853448867798,grad_norm: 0.8370621787098572, iteration: 298848
loss: 1.0006937980651855,grad_norm: 0.9029815987584655, iteration: 298849
loss: 0.969459593296051,grad_norm: 0.7308566144092744, iteration: 298850
loss: 1.031603455543518,grad_norm: 0.8318027803105003, iteration: 298851
loss: 0.9952362179756165,grad_norm: 0.999999109326975, iteration: 298852
loss: 1.0843751430511475,grad_norm: 0.9999990047050754, iteration: 298853
loss: 1.0127264261245728,grad_norm: 0.7709235383857425, iteration: 298854
loss: 1.0231668949127197,grad_norm: 0.9390673953950304, iteration: 298855
loss: 1.0718300342559814,grad_norm: 0.8210463495522858, iteration: 298856
loss: 0.9560137391090393,grad_norm: 0.8005557040531297, iteration: 298857
loss: 0.9990953207015991,grad_norm: 0.7452262058056991, iteration: 298858
loss: 0.9772778749465942,grad_norm: 0.8917501058232221, iteration: 298859
loss: 0.9839060306549072,grad_norm: 0.7776435049342971, iteration: 298860
loss: 0.9722068309783936,grad_norm: 0.9648723188024115, iteration: 298861
loss: 0.977982223033905,grad_norm: 0.7987197742538108, iteration: 298862
loss: 0.9695491194725037,grad_norm: 0.8542641849357331, iteration: 298863
loss: 1.0190589427947998,grad_norm: 0.8635632339591042, iteration: 298864
loss: 1.0234522819519043,grad_norm: 0.8300439035571087, iteration: 298865
loss: 0.9996088743209839,grad_norm: 0.7667800369522305, iteration: 298866
loss: 1.0262566804885864,grad_norm: 0.993562440900348, iteration: 298867
loss: 1.0000914335250854,grad_norm: 0.8143913182735302, iteration: 298868
loss: 0.9812427759170532,grad_norm: 0.9597864514593868, iteration: 298869
loss: 1.0030430555343628,grad_norm: 0.9999990057708406, iteration: 298870
loss: 0.9591774344444275,grad_norm: 0.922162116594917, iteration: 298871
loss: 0.9574682712554932,grad_norm: 0.9999991647522719, iteration: 298872
loss: 0.9983730912208557,grad_norm: 0.892792381554885, iteration: 298873
loss: 0.9399821758270264,grad_norm: 0.9999991558631737, iteration: 298874
loss: 1.027930736541748,grad_norm: 0.853843450770019, iteration: 298875
loss: 0.9712375402450562,grad_norm: 0.961373132570886, iteration: 298876
loss: 0.991431713104248,grad_norm: 0.7599449187163809, iteration: 298877
loss: 0.9974095225334167,grad_norm: 0.8597126468736052, iteration: 298878
loss: 1.0124568939208984,grad_norm: 0.8108003186254837, iteration: 298879
loss: 1.0005161762237549,grad_norm: 0.9047330644411024, iteration: 298880
loss: 0.9813317656517029,grad_norm: 0.8600401297511281, iteration: 298881
loss: 1.030608057975769,grad_norm: 0.9069713008660194, iteration: 298882
loss: 0.9704994559288025,grad_norm: 0.9999990853280515, iteration: 298883
loss: 1.0144530534744263,grad_norm: 0.9452862106062186, iteration: 298884
loss: 0.9842594265937805,grad_norm: 0.7363547687000732, iteration: 298885
loss: 0.9921515583992004,grad_norm: 0.8423070555402025, iteration: 298886
loss: 1.0159506797790527,grad_norm: 0.9999996842630166, iteration: 298887
loss: 0.9752543568611145,grad_norm: 0.9092551890506243, iteration: 298888
loss: 1.0119585990905762,grad_norm: 0.8984692863191641, iteration: 298889
loss: 0.9941524863243103,grad_norm: 0.8601822597272679, iteration: 298890
loss: 1.003872275352478,grad_norm: 0.8611322285259511, iteration: 298891
loss: 0.9911434054374695,grad_norm: 0.8822133686925927, iteration: 298892
loss: 1.0003770589828491,grad_norm: 0.881283363604628, iteration: 298893
loss: 1.0078611373901367,grad_norm: 0.9999991546619181, iteration: 298894
loss: 1.0230846405029297,grad_norm: 0.9120150516113023, iteration: 298895
loss: 0.9904874563217163,grad_norm: 0.9012334734918398, iteration: 298896
loss: 1.0920144319534302,grad_norm: 0.9999993147926003, iteration: 298897
loss: 1.0836848020553589,grad_norm: 0.9444580433552374, iteration: 298898
loss: 1.013879418373108,grad_norm: 0.859417033152506, iteration: 298899
loss: 0.9961637258529663,grad_norm: 0.8683235589433392, iteration: 298900
loss: 0.9856826066970825,grad_norm: 0.9604960178037546, iteration: 298901
loss: 0.980014443397522,grad_norm: 0.9208479072602558, iteration: 298902
loss: 0.9858927726745605,grad_norm: 0.9252585965867625, iteration: 298903
loss: 0.9641850590705872,grad_norm: 0.9470210082119046, iteration: 298904
loss: 1.0210237503051758,grad_norm: 0.7348100577349291, iteration: 298905
loss: 0.9910988807678223,grad_norm: 0.8083745629810818, iteration: 298906
loss: 1.0018773078918457,grad_norm: 0.9971743062053458, iteration: 298907
loss: 1.0021734237670898,grad_norm: 0.9237056933661203, iteration: 298908
loss: 1.051823616027832,grad_norm: 0.9999990510348367, iteration: 298909
loss: 1.0042190551757812,grad_norm: 0.7928301284052367, iteration: 298910
loss: 0.983194887638092,grad_norm: 0.675992446103885, iteration: 298911
loss: 0.9853368997573853,grad_norm: 0.692265551862671, iteration: 298912
loss: 1.027441382408142,grad_norm: 0.742651003546587, iteration: 298913
loss: 1.004318118095398,grad_norm: 0.7789325803035406, iteration: 298914
loss: 0.9915508031845093,grad_norm: 0.8105786809218721, iteration: 298915
loss: 1.000783920288086,grad_norm: 0.872381056012164, iteration: 298916
loss: 1.0387920141220093,grad_norm: 0.9999991288061851, iteration: 298917
loss: 1.0143004655838013,grad_norm: 0.9618027027285309, iteration: 298918
loss: 1.0430580377578735,grad_norm: 0.999999622852757, iteration: 298919
loss: 1.0176297426223755,grad_norm: 0.9999999755833348, iteration: 298920
loss: 0.9928330183029175,grad_norm: 0.7380516184598923, iteration: 298921
loss: 0.9943225383758545,grad_norm: 0.9675007083286274, iteration: 298922
loss: 0.993223249912262,grad_norm: 0.8354393163052701, iteration: 298923
loss: 0.9955707788467407,grad_norm: 0.994574287046195, iteration: 298924
loss: 0.9824678301811218,grad_norm: 0.8205767735842893, iteration: 298925
loss: 0.9923933148384094,grad_norm: 0.9537322422294217, iteration: 298926
loss: 0.9803143739700317,grad_norm: 0.8572406820191565, iteration: 298927
loss: 1.003921389579773,grad_norm: 0.8794577079411732, iteration: 298928
loss: 0.9932241439819336,grad_norm: 0.8282229360345573, iteration: 298929
loss: 1.0277295112609863,grad_norm: 0.8219011425597212, iteration: 298930
loss: 0.9978883862495422,grad_norm: 0.7819596200629723, iteration: 298931
loss: 0.999411404132843,grad_norm: 0.9999992402321082, iteration: 298932
loss: 0.976363480091095,grad_norm: 0.8931127702568838, iteration: 298933
loss: 1.0025917291641235,grad_norm: 0.9520270634330507, iteration: 298934
loss: 0.9957733154296875,grad_norm: 0.8643483835949907, iteration: 298935
loss: 0.9943068027496338,grad_norm: 0.6917444300918295, iteration: 298936
loss: 1.0097665786743164,grad_norm: 0.6658530965529452, iteration: 298937
loss: 0.9915792942047119,grad_norm: 0.8964217232356441, iteration: 298938
loss: 1.0448105335235596,grad_norm: 0.9999990944105569, iteration: 298939
loss: 0.9773783087730408,grad_norm: 0.9892802155750001, iteration: 298940
loss: 0.948582649230957,grad_norm: 0.9999989884041397, iteration: 298941
loss: 1.0492180585861206,grad_norm: 0.999999643541051, iteration: 298942
loss: 1.0210914611816406,grad_norm: 0.9999992376316924, iteration: 298943
loss: 0.977401077747345,grad_norm: 0.8609174449174081, iteration: 298944
loss: 1.0166324377059937,grad_norm: 0.8277413150280005, iteration: 298945
loss: 0.9955320358276367,grad_norm: 0.9999991441812766, iteration: 298946
loss: 0.9921340346336365,grad_norm: 0.8069894571647728, iteration: 298947
loss: 0.9895835518836975,grad_norm: 0.7599092616435088, iteration: 298948
loss: 1.0011249780654907,grad_norm: 0.8917749583127023, iteration: 298949
loss: 0.9816158413887024,grad_norm: 0.8469778852744506, iteration: 298950
loss: 1.051772117614746,grad_norm: 0.8151736975267627, iteration: 298951
loss: 0.9857089519500732,grad_norm: 0.8139181413654923, iteration: 298952
loss: 0.974214494228363,grad_norm: 0.8262607967384527, iteration: 298953
loss: 0.9904419183731079,grad_norm: 0.9287038014972555, iteration: 298954
loss: 0.9917585253715515,grad_norm: 0.8964472653862584, iteration: 298955
loss: 1.0028884410858154,grad_norm: 0.8009478811786731, iteration: 298956
loss: 1.0298471450805664,grad_norm: 0.9999999072822654, iteration: 298957
loss: 0.9936751127243042,grad_norm: 0.907931821513718, iteration: 298958
loss: 0.9921347498893738,grad_norm: 0.9073997809014341, iteration: 298959
loss: 0.9891713857650757,grad_norm: 0.7744195956719975, iteration: 298960
loss: 1.0088679790496826,grad_norm: 0.9196929174590746, iteration: 298961
loss: 0.9461450576782227,grad_norm: 0.6680858416875566, iteration: 298962
loss: 0.9700571298599243,grad_norm: 0.81707008067528, iteration: 298963
loss: 1.0422693490982056,grad_norm: 0.9244799293043515, iteration: 298964
loss: 1.0157898664474487,grad_norm: 0.8524065835385503, iteration: 298965
loss: 0.9789649248123169,grad_norm: 0.7306051018356082, iteration: 298966
loss: 0.9765011072158813,grad_norm: 0.9056215168067502, iteration: 298967
loss: 0.9907350540161133,grad_norm: 0.7887161037527226, iteration: 298968
loss: 0.9582288861274719,grad_norm: 0.9214426644694613, iteration: 298969
loss: 1.0268505811691284,grad_norm: 0.8511201033119101, iteration: 298970
loss: 0.9940598011016846,grad_norm: 0.9546348259541481, iteration: 298971
loss: 1.0412803888320923,grad_norm: 0.9432437734702529, iteration: 298972
loss: 0.9751168489456177,grad_norm: 0.9728453776977048, iteration: 298973
loss: 1.012811541557312,grad_norm: 0.9999993104619218, iteration: 298974
loss: 0.9982588887214661,grad_norm: 0.8371175956656448, iteration: 298975
loss: 0.9935869574546814,grad_norm: 0.9833527058666813, iteration: 298976
loss: 0.9986782670021057,grad_norm: 0.7526068989684906, iteration: 298977
loss: 0.9662310481071472,grad_norm: 0.7795591918916986, iteration: 298978
loss: 1.00857675075531,grad_norm: 0.7625163652205756, iteration: 298979
loss: 1.0276517868041992,grad_norm: 0.7861451031253873, iteration: 298980
loss: 0.9829095602035522,grad_norm: 0.8233390174196805, iteration: 298981
loss: 0.997788667678833,grad_norm: 0.7042237439612625, iteration: 298982
loss: 1.014321208000183,grad_norm: 0.7360365402273067, iteration: 298983
loss: 1.111107587814331,grad_norm: 0.9999993318790588, iteration: 298984
loss: 1.0192545652389526,grad_norm: 0.6700772469738513, iteration: 298985
loss: 0.9787074327468872,grad_norm: 0.8645167961633461, iteration: 298986
loss: 0.9863334894180298,grad_norm: 0.9042543602304893, iteration: 298987
loss: 0.9807206988334656,grad_norm: 0.8351828078409365, iteration: 298988
loss: 0.9789028167724609,grad_norm: 0.9496090783707921, iteration: 298989
loss: 1.089499592781067,grad_norm: 0.9999996698805177, iteration: 298990
loss: 0.9744991064071655,grad_norm: 0.821608018543084, iteration: 298991
loss: 1.0018951892852783,grad_norm: 0.742145067050808, iteration: 298992
loss: 0.9985768795013428,grad_norm: 0.8837395199805763, iteration: 298993
loss: 0.9842866659164429,grad_norm: 0.833462092208303, iteration: 298994
loss: 0.9803966283798218,grad_norm: 0.901962009525858, iteration: 298995
loss: 1.0395593643188477,grad_norm: 0.839174640526254, iteration: 298996
loss: 0.9894524216651917,grad_norm: 0.894648364583554, iteration: 298997
loss: 1.004231333732605,grad_norm: 0.9999991308794479, iteration: 298998
loss: 1.0097922086715698,grad_norm: 0.8789685883819002, iteration: 298999
loss: 0.992038369178772,grad_norm: 0.7975094725692483, iteration: 299000
loss: 1.0023709535598755,grad_norm: 0.7039146986178876, iteration: 299001
loss: 0.990608811378479,grad_norm: 0.788448676709953, iteration: 299002
loss: 0.9685196876525879,grad_norm: 0.8278128136822169, iteration: 299003
loss: 1.0212839841842651,grad_norm: 0.7977582931620157, iteration: 299004
loss: 1.0264480113983154,grad_norm: 0.8293382996892941, iteration: 299005
loss: 0.9742087125778198,grad_norm: 0.9584757448779516, iteration: 299006
loss: 0.9819181561470032,grad_norm: 0.708733264040684, iteration: 299007
loss: 1.0279499292373657,grad_norm: 0.9775422738472027, iteration: 299008
loss: 1.0003063678741455,grad_norm: 0.8056119866767678, iteration: 299009
loss: 0.9881714582443237,grad_norm: 0.7665443575583587, iteration: 299010
loss: 1.0269534587860107,grad_norm: 0.8626563968150056, iteration: 299011
loss: 1.0421503782272339,grad_norm: 0.8341047187653204, iteration: 299012
loss: 1.0038621425628662,grad_norm: 0.8732235928653083, iteration: 299013
loss: 1.0131680965423584,grad_norm: 0.8757260602333998, iteration: 299014
loss: 1.0204119682312012,grad_norm: 0.9999991163969734, iteration: 299015
loss: 0.9849743247032166,grad_norm: 0.7052307201089526, iteration: 299016
loss: 0.9973823428153992,grad_norm: 0.7637393660889379, iteration: 299017
loss: 0.996788740158081,grad_norm: 0.8928412375090617, iteration: 299018
loss: 0.9727386236190796,grad_norm: 0.8240501542451804, iteration: 299019
loss: 1.0304250717163086,grad_norm: 0.9999990188990288, iteration: 299020
loss: 1.0023006200790405,grad_norm: 0.7458032650425155, iteration: 299021
loss: 0.9915755987167358,grad_norm: 0.9001942102367242, iteration: 299022
loss: 1.0309078693389893,grad_norm: 0.9991093185604318, iteration: 299023
loss: 0.9803342819213867,grad_norm: 0.8782554186782595, iteration: 299024
loss: 1.0643943548202515,grad_norm: 0.9999999217052936, iteration: 299025
loss: 0.9911729693412781,grad_norm: 0.9161366804906145, iteration: 299026
loss: 1.0426326990127563,grad_norm: 0.999999785065815, iteration: 299027
loss: 0.9886794090270996,grad_norm: 0.8600605337957704, iteration: 299028
loss: 1.018009901046753,grad_norm: 0.8986788066660035, iteration: 299029
loss: 0.9877591729164124,grad_norm: 0.788399763572101, iteration: 299030
loss: 0.9746127128601074,grad_norm: 0.7731837180456861, iteration: 299031
loss: 1.0234284400939941,grad_norm: 0.9999989541796439, iteration: 299032
loss: 0.9599713087081909,grad_norm: 0.9997839762379515, iteration: 299033
loss: 0.9989913702011108,grad_norm: 0.9999992710630409, iteration: 299034
loss: 0.9914179444313049,grad_norm: 0.9685888221021627, iteration: 299035
loss: 0.978766918182373,grad_norm: 0.9189286327134802, iteration: 299036
loss: 1.02483332157135,grad_norm: 0.868854483853321, iteration: 299037
loss: 1.045894980430603,grad_norm: 0.9999998252010064, iteration: 299038
loss: 1.0078681707382202,grad_norm: 0.9081893409426983, iteration: 299039
loss: 1.0264215469360352,grad_norm: 0.9786892215888453, iteration: 299040
loss: 0.9759363532066345,grad_norm: 0.8863182709383343, iteration: 299041
loss: 1.0433599948883057,grad_norm: 0.820147066006508, iteration: 299042
loss: 1.0284830331802368,grad_norm: 0.9303294010575116, iteration: 299043
loss: 1.025001049041748,grad_norm: 0.9676837907922154, iteration: 299044
loss: 1.0003726482391357,grad_norm: 0.8807017036913636, iteration: 299045
loss: 1.0198026895523071,grad_norm: 0.8100824351503666, iteration: 299046
loss: 1.0220162868499756,grad_norm: 1.0000000024442257, iteration: 299047
loss: 1.0244265794754028,grad_norm: 0.8521204552831192, iteration: 299048
loss: 1.0092889070510864,grad_norm: 0.7667837648172263, iteration: 299049
loss: 1.0156995058059692,grad_norm: 0.9004284546498744, iteration: 299050
loss: 1.0102680921554565,grad_norm: 0.8862187681876978, iteration: 299051
loss: 0.9891111850738525,grad_norm: 0.9334443675217432, iteration: 299052
loss: 1.014774203300476,grad_norm: 0.9999994854872315, iteration: 299053
loss: 1.030523657798767,grad_norm: 0.891003028327918, iteration: 299054
loss: 1.0113130807876587,grad_norm: 0.9510167258141633, iteration: 299055
loss: 1.038037657737732,grad_norm: 0.9999993271991184, iteration: 299056
loss: 0.9993100762367249,grad_norm: 0.9999993784612692, iteration: 299057
loss: 0.992439329624176,grad_norm: 0.8866639333639125, iteration: 299058
loss: 0.9619705080986023,grad_norm: 0.8732801672653897, iteration: 299059
loss: 1.0740772485733032,grad_norm: 0.9999992082579243, iteration: 299060
loss: 1.3311762809753418,grad_norm: 0.9999998249506734, iteration: 299061
loss: 1.026916265487671,grad_norm: 0.9135834846425585, iteration: 299062
loss: 1.0260694026947021,grad_norm: 0.9999991931928836, iteration: 299063
loss: 1.0205901861190796,grad_norm: 0.9999993789146733, iteration: 299064
loss: 1.033579707145691,grad_norm: 0.8170734980480993, iteration: 299065
loss: 1.0168299674987793,grad_norm: 0.9507858081788564, iteration: 299066
loss: 1.0011247396469116,grad_norm: 0.9757495263636935, iteration: 299067
loss: 1.0210883617401123,grad_norm: 0.92217619260561, iteration: 299068
loss: 1.0120718479156494,grad_norm: 0.8884857079943761, iteration: 299069
loss: 0.991913378238678,grad_norm: 0.8226474483424753, iteration: 299070
loss: 1.019985556602478,grad_norm: 0.7772055770193544, iteration: 299071
loss: 1.035030722618103,grad_norm: 0.8968334932729237, iteration: 299072
loss: 0.9603483080863953,grad_norm: 0.9999991375068938, iteration: 299073
loss: 1.0340025424957275,grad_norm: 0.8883451734611179, iteration: 299074
loss: 0.9934341311454773,grad_norm: 0.7150517170154549, iteration: 299075
loss: 0.979102373123169,grad_norm: 0.9999990171948219, iteration: 299076
loss: 0.9939054250717163,grad_norm: 0.8385708863793434, iteration: 299077
loss: 1.012890100479126,grad_norm: 0.8144873965426002, iteration: 299078
loss: 1.0006133317947388,grad_norm: 0.9999993212048355, iteration: 299079
loss: 0.9767946600914001,grad_norm: 0.9380354963693123, iteration: 299080
loss: 0.9797573685646057,grad_norm: 0.8243935237327493, iteration: 299081
loss: 1.0564887523651123,grad_norm: 0.9197735976758025, iteration: 299082
loss: 0.9765163660049438,grad_norm: 0.999999029520242, iteration: 299083
loss: 0.976071834564209,grad_norm: 0.6218743949681569, iteration: 299084
loss: 0.9622606039047241,grad_norm: 0.9552071199988943, iteration: 299085
loss: 0.9917040467262268,grad_norm: 0.9999991229814154, iteration: 299086
loss: 0.9892359972000122,grad_norm: 0.8218208569796788, iteration: 299087
loss: 1.0154876708984375,grad_norm: 0.7555833265597484, iteration: 299088
loss: 0.9737298488616943,grad_norm: 0.8576196719487718, iteration: 299089
loss: 0.9755002856254578,grad_norm: 0.7412563893592259, iteration: 299090
loss: 0.9928910732269287,grad_norm: 0.8614468403608351, iteration: 299091
loss: 0.9911487102508545,grad_norm: 0.8489521128165527, iteration: 299092
loss: 0.9758484363555908,grad_norm: 0.853445944403276, iteration: 299093
loss: 0.9792418479919434,grad_norm: 0.9388021433282882, iteration: 299094
loss: 1.01043701171875,grad_norm: 0.8456922526243384, iteration: 299095
loss: 0.9869653582572937,grad_norm: 0.9999989852722947, iteration: 299096
loss: 1.0071808099746704,grad_norm: 0.780214184055782, iteration: 299097
loss: 1.0258933305740356,grad_norm: 0.996074765271767, iteration: 299098
loss: 1.0068291425704956,grad_norm: 0.8475135867743179, iteration: 299099
loss: 0.9955629110336304,grad_norm: 0.9048258196502463, iteration: 299100
loss: 1.0137487649917603,grad_norm: 0.9802324689259919, iteration: 299101
loss: 1.0107344388961792,grad_norm: 0.9938855711503709, iteration: 299102
loss: 1.0100764036178589,grad_norm: 0.9569559023493545, iteration: 299103
loss: 0.9949381351470947,grad_norm: 0.9999999950843911, iteration: 299104
loss: 0.9974960088729858,grad_norm: 0.8317689686464358, iteration: 299105
loss: 1.0300673246383667,grad_norm: 0.8048082455344322, iteration: 299106
loss: 0.998478889465332,grad_norm: 0.8930666619281029, iteration: 299107
loss: 1.0595293045043945,grad_norm: 0.9999996679729618, iteration: 299108
loss: 1.0066614151000977,grad_norm: 0.8971016369667755, iteration: 299109
loss: 1.0265766382217407,grad_norm: 0.7890412422780464, iteration: 299110
loss: 0.9707561135292053,grad_norm: 0.9866340584448319, iteration: 299111
loss: 1.0058567523956299,grad_norm: 0.7731652350842616, iteration: 299112
loss: 0.9663591384887695,grad_norm: 0.7719283986699924, iteration: 299113
loss: 0.9649868011474609,grad_norm: 0.8634598060968939, iteration: 299114
loss: 0.9889015555381775,grad_norm: 0.7859048753563217, iteration: 299115
loss: 1.0322518348693848,grad_norm: 0.7188394592019195, iteration: 299116
loss: 1.0093779563903809,grad_norm: 0.9392018083275663, iteration: 299117
loss: 1.0071214437484741,grad_norm: 0.8355892913467543, iteration: 299118
loss: 0.9949454665184021,grad_norm: 0.8267718042056686, iteration: 299119
loss: 1.0195046663284302,grad_norm: 0.8012061360586411, iteration: 299120
loss: 1.0396769046783447,grad_norm: 0.9999998693278664, iteration: 299121
loss: 0.989959180355072,grad_norm: 0.7905913671194296, iteration: 299122
loss: 1.005224585533142,grad_norm: 0.9672674541789915, iteration: 299123
loss: 1.066522240638733,grad_norm: 0.8685027558669096, iteration: 299124
loss: 0.9897183179855347,grad_norm: 0.7515495665267244, iteration: 299125
loss: 1.0208097696304321,grad_norm: 0.9999991555795711, iteration: 299126
loss: 0.9675247669219971,grad_norm: 0.8533696590252784, iteration: 299127
loss: 1.0352169275283813,grad_norm: 0.9176678418833752, iteration: 299128
loss: 1.0331318378448486,grad_norm: 0.8242527614092918, iteration: 299129
loss: 0.9605879187583923,grad_norm: 0.9999991024577656, iteration: 299130
loss: 0.9993773102760315,grad_norm: 0.8289049756548076, iteration: 299131
loss: 0.9959518909454346,grad_norm: 0.7107885693932938, iteration: 299132
loss: 1.0536143779754639,grad_norm: 0.9023438792129912, iteration: 299133
loss: 1.0259454250335693,grad_norm: 0.8715295003545506, iteration: 299134
loss: 1.0047203302383423,grad_norm: 0.7758967976935207, iteration: 299135
loss: 0.9974408149719238,grad_norm: 0.7711914065323986, iteration: 299136
loss: 0.9887136220932007,grad_norm: 0.8150858115348247, iteration: 299137
loss: 0.9782900810241699,grad_norm: 0.7967409150525745, iteration: 299138
loss: 1.0265387296676636,grad_norm: 0.9999991654599515, iteration: 299139
loss: 1.0319520235061646,grad_norm: 0.8916876994096294, iteration: 299140
loss: 0.9903683066368103,grad_norm: 0.7939700615477434, iteration: 299141
loss: 1.0241774320602417,grad_norm: 0.7849218179697842, iteration: 299142
loss: 1.0056092739105225,grad_norm: 0.9999990212753482, iteration: 299143
loss: 0.9964287281036377,grad_norm: 0.7772979924724824, iteration: 299144
loss: 0.995503842830658,grad_norm: 0.8994849587310964, iteration: 299145
loss: 1.000858187675476,grad_norm: 0.9037693511019984, iteration: 299146
loss: 1.0259190797805786,grad_norm: 0.9999998026272182, iteration: 299147
loss: 1.0519169569015503,grad_norm: 0.9999995880227203, iteration: 299148
loss: 0.9975478649139404,grad_norm: 0.8934471697916143, iteration: 299149
loss: 0.9903088808059692,grad_norm: 0.7800340479363551, iteration: 299150
loss: 1.0041428804397583,grad_norm: 0.8082023631761232, iteration: 299151
loss: 0.9869869351387024,grad_norm: 0.9999991621961426, iteration: 299152
loss: 0.9706387519836426,grad_norm: 0.8472476465554479, iteration: 299153
loss: 1.0167714357376099,grad_norm: 0.8939890729259642, iteration: 299154
loss: 1.0650125741958618,grad_norm: 0.9999999424503487, iteration: 299155
loss: 0.9769696593284607,grad_norm: 0.9999990060924525, iteration: 299156
loss: 1.0368484258651733,grad_norm: 0.7751709482287009, iteration: 299157
loss: 1.0001585483551025,grad_norm: 0.7530456486779877, iteration: 299158
loss: 1.0227750539779663,grad_norm: 0.886629598715987, iteration: 299159
loss: 0.9938666224479675,grad_norm: 0.8832780169059692, iteration: 299160
loss: 1.0211145877838135,grad_norm: 0.8255403337700971, iteration: 299161
loss: 1.0101238489151,grad_norm: 0.9373316072979998, iteration: 299162
loss: 0.9631703495979309,grad_norm: 0.8580027057151342, iteration: 299163
loss: 1.0058048963546753,grad_norm: 0.8619894268765315, iteration: 299164
loss: 0.9876405596733093,grad_norm: 0.7603837662182109, iteration: 299165
loss: 0.9873726963996887,grad_norm: 0.8875293321987018, iteration: 299166
loss: 1.0582327842712402,grad_norm: 0.9999993412687064, iteration: 299167
loss: 0.9924123883247375,grad_norm: 0.8479426579603895, iteration: 299168
loss: 1.0040004253387451,grad_norm: 0.9737487953959616, iteration: 299169
loss: 1.0251061916351318,grad_norm: 0.9395847728380018, iteration: 299170
loss: 1.003037452697754,grad_norm: 0.7864509276481253, iteration: 299171
loss: 0.982086718082428,grad_norm: 0.8967416470279749, iteration: 299172
loss: 0.9920523166656494,grad_norm: 0.9999995463336638, iteration: 299173
loss: 1.0399138927459717,grad_norm: 0.9999994013793593, iteration: 299174
loss: 1.0014405250549316,grad_norm: 0.8104930755159713, iteration: 299175
loss: 0.9843885898590088,grad_norm: 0.9837987276138844, iteration: 299176
loss: 0.9845225214958191,grad_norm: 0.9374250201293441, iteration: 299177
loss: 1.0294736623764038,grad_norm: 0.9999991485134696, iteration: 299178
loss: 1.0078414678573608,grad_norm: 0.9558042391730142, iteration: 299179
loss: 0.980981171131134,grad_norm: 0.8281749903316528, iteration: 299180
loss: 1.0087392330169678,grad_norm: 0.9643390280765862, iteration: 299181
loss: 0.9997870326042175,grad_norm: 0.9999991779806859, iteration: 299182
loss: 1.096084713935852,grad_norm: 0.9999998289775563, iteration: 299183
loss: 1.0565253496170044,grad_norm: 0.9999993938514569, iteration: 299184
loss: 0.9946280121803284,grad_norm: 0.8576977654755735, iteration: 299185
loss: 0.991249680519104,grad_norm: 0.95544811008065, iteration: 299186
loss: 1.0027070045471191,grad_norm: 0.7854470018581295, iteration: 299187
loss: 1.0039637088775635,grad_norm: 0.8865731835066597, iteration: 299188
loss: 0.9716737270355225,grad_norm: 0.9033696457673462, iteration: 299189
loss: 0.9826765060424805,grad_norm: 0.8756125025385706, iteration: 299190
loss: 1.0279580354690552,grad_norm: 0.872811410757739, iteration: 299191
loss: 0.9832079410552979,grad_norm: 0.7045405887581094, iteration: 299192
loss: 0.9861894249916077,grad_norm: 0.8716800909567166, iteration: 299193
loss: 1.0239338874816895,grad_norm: 0.7880755156855846, iteration: 299194
loss: 0.951662003993988,grad_norm: 0.7168393853995086, iteration: 299195
loss: 1.0738863945007324,grad_norm: 0.9999995599934522, iteration: 299196
loss: 1.0055770874023438,grad_norm: 0.7943515492126112, iteration: 299197
loss: 1.0049402713775635,grad_norm: 0.7963583777966786, iteration: 299198
loss: 1.0058335065841675,grad_norm: 0.8528505119059319, iteration: 299199
loss: 0.9791216850280762,grad_norm: 0.9245671831843542, iteration: 299200
loss: 0.9832338690757751,grad_norm: 0.7826576767702615, iteration: 299201
loss: 0.9987714290618896,grad_norm: 0.7492133169597123, iteration: 299202
loss: 1.0143568515777588,grad_norm: 0.9577199650247263, iteration: 299203
loss: 0.978229284286499,grad_norm: 0.852490809409853, iteration: 299204
loss: 1.01095712184906,grad_norm: 0.9999989005307828, iteration: 299205
loss: 1.0360071659088135,grad_norm: 0.9313678331748851, iteration: 299206
loss: 1.0096722841262817,grad_norm: 0.9700753962055918, iteration: 299207
loss: 0.9924252033233643,grad_norm: 0.7915219333271886, iteration: 299208
loss: 1.0219205617904663,grad_norm: 0.7405675158475606, iteration: 299209
loss: 1.0383968353271484,grad_norm: 0.7709768524948369, iteration: 299210
loss: 1.0122767686843872,grad_norm: 0.7547800888070179, iteration: 299211
loss: 0.9924892783164978,grad_norm: 0.7801738249464822, iteration: 299212
loss: 1.001212239265442,grad_norm: 0.8432626145832525, iteration: 299213
loss: 0.9454546570777893,grad_norm: 0.842191858213251, iteration: 299214
loss: 0.9982760548591614,grad_norm: 0.7341896982829059, iteration: 299215
loss: 0.9846048355102539,grad_norm: 0.8038353067526924, iteration: 299216
loss: 0.9845170974731445,grad_norm: 0.7911460925523185, iteration: 299217
loss: 0.9912256598472595,grad_norm: 0.9657822072398167, iteration: 299218
loss: 0.9689757823944092,grad_norm: 0.9999990396857373, iteration: 299219
loss: 0.9794661998748779,grad_norm: 0.7756002132834577, iteration: 299220
loss: 0.9989778399467468,grad_norm: 0.8367467256992257, iteration: 299221
loss: 1.001035451889038,grad_norm: 0.7907013399097241, iteration: 299222
loss: 0.9719434380531311,grad_norm: 0.7888510748337274, iteration: 299223
loss: 0.99066162109375,grad_norm: 0.8551732077713718, iteration: 299224
loss: 0.9958022236824036,grad_norm: 0.8060345291650157, iteration: 299225
loss: 0.9835380911827087,grad_norm: 0.7963877580293653, iteration: 299226
loss: 0.973289966583252,grad_norm: 0.82733029061155, iteration: 299227
loss: 1.007198452949524,grad_norm: 0.9999998802740019, iteration: 299228
loss: 0.9939664602279663,grad_norm: 0.9471381050410003, iteration: 299229
loss: 1.0126404762268066,grad_norm: 0.9999991006472491, iteration: 299230
loss: 0.9496976137161255,grad_norm: 0.83709537121436, iteration: 299231
loss: 1.0010913610458374,grad_norm: 0.8746587360598438, iteration: 299232
loss: 1.0110455751419067,grad_norm: 0.8429454276490832, iteration: 299233
loss: 1.023443579673767,grad_norm: 0.8511820106759141, iteration: 299234
loss: 1.0279598236083984,grad_norm: 0.8004345622201207, iteration: 299235
loss: 0.9879287481307983,grad_norm: 0.9320049224440471, iteration: 299236
loss: 0.9660549759864807,grad_norm: 0.9526542820869185, iteration: 299237
loss: 0.9966043829917908,grad_norm: 0.8857301786967049, iteration: 299238
loss: 0.9858532547950745,grad_norm: 0.774349262927793, iteration: 299239
loss: 0.9712070822715759,grad_norm: 0.7624587767767772, iteration: 299240
loss: 1.0808181762695312,grad_norm: 0.9999995014072317, iteration: 299241
loss: 0.9677922129631042,grad_norm: 0.854116205231951, iteration: 299242
loss: 0.9880660772323608,grad_norm: 0.752054477004284, iteration: 299243
loss: 0.9936351776123047,grad_norm: 0.8718125998387273, iteration: 299244
loss: 1.0081651210784912,grad_norm: 0.7872259003377251, iteration: 299245
loss: 1.0050346851348877,grad_norm: 0.9763672119744401, iteration: 299246
loss: 0.9998676180839539,grad_norm: 0.7108884586703232, iteration: 299247
loss: 0.953074038028717,grad_norm: 0.9346252632987871, iteration: 299248
loss: 1.0145764350891113,grad_norm: 0.9271084984015607, iteration: 299249
loss: 1.0297991037368774,grad_norm: 0.994230676127239, iteration: 299250
loss: 0.9748316407203674,grad_norm: 0.939798130486574, iteration: 299251
loss: 0.9870908856391907,grad_norm: 0.6943945161625587, iteration: 299252
loss: 1.0057364702224731,grad_norm: 0.9999991894607378, iteration: 299253
loss: 0.9818322062492371,grad_norm: 0.8114657848940594, iteration: 299254
loss: 0.9827206134796143,grad_norm: 0.8204868794469106, iteration: 299255
loss: 1.02700674533844,grad_norm: 0.9338414333266082, iteration: 299256
loss: 1.0108143091201782,grad_norm: 0.9842444260728969, iteration: 299257
loss: 1.0141984224319458,grad_norm: 0.847825971027169, iteration: 299258
loss: 0.9569181203842163,grad_norm: 0.7762113589635746, iteration: 299259
loss: 0.9526255130767822,grad_norm: 0.797604776777613, iteration: 299260
loss: 0.9798508286476135,grad_norm: 0.9273350706245508, iteration: 299261
loss: 0.9912246465682983,grad_norm: 0.9475360558402437, iteration: 299262
loss: 0.9925060272216797,grad_norm: 0.8328375220906951, iteration: 299263
loss: 1.0165908336639404,grad_norm: 0.9999989945588494, iteration: 299264
loss: 1.032374382019043,grad_norm: 0.88233189412384, iteration: 299265
loss: 0.9585166573524475,grad_norm: 0.8955468053808517, iteration: 299266
loss: 1.0707898139953613,grad_norm: 0.999999685150033, iteration: 299267
loss: 1.0216375589370728,grad_norm: 0.7424520577210886, iteration: 299268
loss: 0.9697177410125732,grad_norm: 0.9498461923112942, iteration: 299269
loss: 0.9764706492424011,grad_norm: 0.9999991378727593, iteration: 299270
loss: 0.9828714728355408,grad_norm: 0.7177866925552842, iteration: 299271
loss: 0.9743481874465942,grad_norm: 0.7921579097062628, iteration: 299272
loss: 1.0020877122879028,grad_norm: 0.7180460514508589, iteration: 299273
loss: 0.9749306440353394,grad_norm: 0.8392381899870223, iteration: 299274
loss: 0.9843637347221375,grad_norm: 0.8409606748809071, iteration: 299275
loss: 0.9755527377128601,grad_norm: 0.9197886251492264, iteration: 299276
loss: 0.9875444173812866,grad_norm: 0.7387422753801254, iteration: 299277
loss: 1.0173518657684326,grad_norm: 0.9999995538973244, iteration: 299278
loss: 1.0085945129394531,grad_norm: 0.7617902241375197, iteration: 299279
loss: 1.0059731006622314,grad_norm: 0.9052076198682504, iteration: 299280
loss: 0.9742448329925537,grad_norm: 0.8225702492002162, iteration: 299281
loss: 0.9878547787666321,grad_norm: 0.9999990352145616, iteration: 299282
loss: 0.9809533357620239,grad_norm: 0.9271941947254578, iteration: 299283
loss: 1.0374501943588257,grad_norm: 0.9999991853706509, iteration: 299284
loss: 1.0633703470230103,grad_norm: 0.8545982022657628, iteration: 299285
loss: 0.9519666433334351,grad_norm: 0.751273545151115, iteration: 299286
loss: 1.0100593566894531,grad_norm: 0.8640334044363011, iteration: 299287
loss: 0.9924465417861938,grad_norm: 1.0000000112608065, iteration: 299288
loss: 0.9675424695014954,grad_norm: 0.9999992140227114, iteration: 299289
loss: 0.9821707606315613,grad_norm: 0.888627423599786, iteration: 299290
loss: 1.004397988319397,grad_norm: 0.8075846895530895, iteration: 299291
loss: 0.9874212145805359,grad_norm: 0.9999994955066546, iteration: 299292
loss: 0.9866238832473755,grad_norm: 0.9681630827985861, iteration: 299293
loss: 0.9682010412216187,grad_norm: 0.999999183196424, iteration: 299294
loss: 1.0072498321533203,grad_norm: 0.8731658600874259, iteration: 299295
loss: 1.0318596363067627,grad_norm: 0.9999999270594614, iteration: 299296
loss: 0.963161051273346,grad_norm: 0.8778904736327041, iteration: 299297
loss: 1.0575165748596191,grad_norm: 0.9999992898490776, iteration: 299298
loss: 1.0296217203140259,grad_norm: 0.999999104096504, iteration: 299299
loss: 0.9905225038528442,grad_norm: 0.7567046452984414, iteration: 299300
loss: 0.9929866790771484,grad_norm: 0.9052970705882694, iteration: 299301
loss: 1.0064588785171509,grad_norm: 0.9934730281296805, iteration: 299302
loss: 1.0984758138656616,grad_norm: 0.9249502981456541, iteration: 299303
loss: 1.1104964017868042,grad_norm: 0.9999997823942288, iteration: 299304
loss: 1.090831995010376,grad_norm: 0.9999998924934831, iteration: 299305
loss: 1.0085197687149048,grad_norm: 0.8674444149838657, iteration: 299306
loss: 0.9988849759101868,grad_norm: 0.698461576877065, iteration: 299307
loss: 0.9865162372589111,grad_norm: 0.9356227753925548, iteration: 299308
loss: 0.9799091815948486,grad_norm: 0.8141413641685227, iteration: 299309
loss: 0.9800258874893188,grad_norm: 0.7783486206246775, iteration: 299310
loss: 0.9954311847686768,grad_norm: 0.7771888340441983, iteration: 299311
loss: 1.0153151750564575,grad_norm: 0.8099905406989089, iteration: 299312
loss: 1.029366374015808,grad_norm: 0.8533200520299369, iteration: 299313
loss: 0.9875218868255615,grad_norm: 0.8257634307882579, iteration: 299314
loss: 0.9768756031990051,grad_norm: 0.9439634883819007, iteration: 299315
loss: 0.9646815061569214,grad_norm: 0.9999991538270383, iteration: 299316
loss: 0.9549422860145569,grad_norm: 0.8004034771369554, iteration: 299317
loss: 1.0189663171768188,grad_norm: 0.8839348093500731, iteration: 299318
loss: 1.0004690885543823,grad_norm: 0.9999992919266352, iteration: 299319
loss: 0.9931082725524902,grad_norm: 0.829061934517557, iteration: 299320
loss: 1.0164819955825806,grad_norm: 0.999999166152028, iteration: 299321
loss: 0.9773868918418884,grad_norm: 0.9225695609044333, iteration: 299322
loss: 1.0158271789550781,grad_norm: 0.9607028132847357, iteration: 299323
loss: 1.0355455875396729,grad_norm: 0.9999994661608058, iteration: 299324
loss: 0.9977164268493652,grad_norm: 0.7734772436326204, iteration: 299325
loss: 0.991005539894104,grad_norm: 0.7767475515087681, iteration: 299326
loss: 0.9733501672744751,grad_norm: 0.903907745896152, iteration: 299327
loss: 1.0034630298614502,grad_norm: 0.8808365333363517, iteration: 299328
loss: 1.0953713655471802,grad_norm: 0.9999991393669392, iteration: 299329
loss: 0.9695585370063782,grad_norm: 0.9522101980237532, iteration: 299330
loss: 0.9819448590278625,grad_norm: 0.7712359901064632, iteration: 299331
loss: 1.0032436847686768,grad_norm: 0.8781791240016728, iteration: 299332
loss: 1.0384786128997803,grad_norm: 0.916616088627533, iteration: 299333
loss: 1.0089645385742188,grad_norm: 0.9999991234435097, iteration: 299334
loss: 0.9745690226554871,grad_norm: 0.9525045915004351, iteration: 299335
loss: 1.0448126792907715,grad_norm: 0.7999302763112243, iteration: 299336
loss: 0.9702760577201843,grad_norm: 0.8481869448912682, iteration: 299337
loss: 1.0321255922317505,grad_norm: 0.9478306249558988, iteration: 299338
loss: 1.000192403793335,grad_norm: 0.884434950809404, iteration: 299339
loss: 0.9924821257591248,grad_norm: 0.8258537861152213, iteration: 299340
loss: 0.961030125617981,grad_norm: 0.999999613512086, iteration: 299341
loss: 0.9951865673065186,grad_norm: 0.8718472228558312, iteration: 299342
loss: 1.0146702527999878,grad_norm: 0.8956993949306761, iteration: 299343
loss: 1.0118286609649658,grad_norm: 0.8024285969063079, iteration: 299344
loss: 1.0409692525863647,grad_norm: 0.8040882153424357, iteration: 299345
loss: 0.9727384448051453,grad_norm: 0.9999991047292099, iteration: 299346
loss: 1.0213966369628906,grad_norm: 0.9584561231197657, iteration: 299347
loss: 1.0427937507629395,grad_norm: 0.9999989777937871, iteration: 299348
loss: 1.0205243825912476,grad_norm: 0.7751498067097151, iteration: 299349
loss: 0.9727821350097656,grad_norm: 0.725457646245566, iteration: 299350
loss: 1.011069655418396,grad_norm: 0.9999992901464158, iteration: 299351
loss: 0.9928135871887207,grad_norm: 0.8327383076454346, iteration: 299352
loss: 1.0204720497131348,grad_norm: 0.7584370990405945, iteration: 299353
loss: 0.9823386669158936,grad_norm: 0.7563192942489807, iteration: 299354
loss: 0.9939655661582947,grad_norm: 0.8435766178602301, iteration: 299355
loss: 0.9848816990852356,grad_norm: 0.7536637756388366, iteration: 299356
loss: 1.0235141515731812,grad_norm: 0.8257064078822202, iteration: 299357
loss: 1.0080440044403076,grad_norm: 0.9999991914723018, iteration: 299358
loss: 1.0014466047286987,grad_norm: 0.8527737540765673, iteration: 299359
loss: 1.0121564865112305,grad_norm: 0.9999993106723019, iteration: 299360
loss: 1.0007989406585693,grad_norm: 0.8512950450888996, iteration: 299361
loss: 1.0080331563949585,grad_norm: 0.9437549916925032, iteration: 299362
loss: 1.0365296602249146,grad_norm: 0.8867391801085069, iteration: 299363
loss: 1.0376468896865845,grad_norm: 0.9999990870785787, iteration: 299364
loss: 1.0169910192489624,grad_norm: 0.8461354552577756, iteration: 299365
loss: 1.0260956287384033,grad_norm: 0.999999082251719, iteration: 299366
loss: 0.9942092895507812,grad_norm: 0.9762932342838216, iteration: 299367
loss: 0.9817431569099426,grad_norm: 0.7172747529458224, iteration: 299368
loss: 0.9761897325515747,grad_norm: 0.943730619107448, iteration: 299369
loss: 0.9981474280357361,grad_norm: 0.8130655326428098, iteration: 299370
loss: 0.9973478317260742,grad_norm: 0.8034079642548835, iteration: 299371
loss: 1.0170788764953613,grad_norm: 0.9030240071225119, iteration: 299372
loss: 0.9978023171424866,grad_norm: 0.8571952543394648, iteration: 299373
loss: 1.0263031721115112,grad_norm: 0.9999992186344366, iteration: 299374
loss: 0.9875618815422058,grad_norm: 0.950365918146489, iteration: 299375
loss: 0.9661052227020264,grad_norm: 0.9999993022380033, iteration: 299376
loss: 0.9948152303695679,grad_norm: 0.9867676424593871, iteration: 299377
loss: 0.9809166193008423,grad_norm: 0.7844724666934162, iteration: 299378
loss: 0.9820805788040161,grad_norm: 0.8693144092275383, iteration: 299379
loss: 0.96474689245224,grad_norm: 0.7687151636483549, iteration: 299380
loss: 1.0472744703292847,grad_norm: 1.000000048417567, iteration: 299381
loss: 0.9628704190254211,grad_norm: 0.7382996250723447, iteration: 299382
loss: 1.0006511211395264,grad_norm: 0.9999998433821249, iteration: 299383
loss: 1.0149449110031128,grad_norm: 0.8865470326981881, iteration: 299384
loss: 0.964278519153595,grad_norm: 0.9817673741973124, iteration: 299385
loss: 0.9970476627349854,grad_norm: 0.727578136604394, iteration: 299386
loss: 0.9927093386650085,grad_norm: 0.9121619223536885, iteration: 299387
loss: 0.970947265625,grad_norm: 0.9999998845497193, iteration: 299388
loss: 0.9995448589324951,grad_norm: 0.9057108631709038, iteration: 299389
loss: 0.9956130981445312,grad_norm: 0.7979030683340282, iteration: 299390
loss: 0.969577431678772,grad_norm: 0.9045825016103834, iteration: 299391
loss: 1.0244768857955933,grad_norm: 0.8466898678170357, iteration: 299392
loss: 1.0063308477401733,grad_norm: 0.948974330777373, iteration: 299393
loss: 0.9942694902420044,grad_norm: 0.7749874362074219, iteration: 299394
loss: 0.985400915145874,grad_norm: 0.821297826688252, iteration: 299395
loss: 0.9881697297096252,grad_norm: 0.918498003956774, iteration: 299396
loss: 1.1021766662597656,grad_norm: 0.7942770498820105, iteration: 299397
loss: 0.9953548312187195,grad_norm: 0.7658026454577656, iteration: 299398
loss: 0.9963343739509583,grad_norm: 0.9664991674031612, iteration: 299399
loss: 1.0082812309265137,grad_norm: 0.8485053396289001, iteration: 299400
loss: 1.015932321548462,grad_norm: 0.9999992267975114, iteration: 299401
loss: 0.9640486240386963,grad_norm: 0.928885671480869, iteration: 299402
loss: 0.9775622487068176,grad_norm: 0.7965881209816467, iteration: 299403
loss: 0.9423055052757263,grad_norm: 0.7991650620101959, iteration: 299404
loss: 1.037047028541565,grad_norm: 0.7779689095273566, iteration: 299405
loss: 0.9772416949272156,grad_norm: 0.900984473574058, iteration: 299406
loss: 0.9837222695350647,grad_norm: 0.7859816953167084, iteration: 299407
loss: 1.0335719585418701,grad_norm: 0.8609283208067813, iteration: 299408
loss: 0.9584941267967224,grad_norm: 0.8002706756893175, iteration: 299409
loss: 1.0075815916061401,grad_norm: 0.9999997734488342, iteration: 299410
loss: 0.9481090903282166,grad_norm: 0.7293500857164842, iteration: 299411
loss: 1.0342050790786743,grad_norm: 0.803031292014765, iteration: 299412
loss: 1.0163538455963135,grad_norm: 0.8387494523337043, iteration: 299413
loss: 0.9859458804130554,grad_norm: 0.9999990673267999, iteration: 299414
loss: 0.981766402721405,grad_norm: 0.8891022735147633, iteration: 299415
loss: 0.9789252281188965,grad_norm: 0.7846448681471353, iteration: 299416
loss: 1.0051226615905762,grad_norm: 0.9999991345095507, iteration: 299417
loss: 0.9956504106521606,grad_norm: 0.8633968801820642, iteration: 299418
loss: 0.9944043159484863,grad_norm: 0.8358886239999652, iteration: 299419
loss: 0.9773681163787842,grad_norm: 0.7011035108431297, iteration: 299420
loss: 0.9708347320556641,grad_norm: 0.8316607147280848, iteration: 299421
loss: 0.9925889372825623,grad_norm: 0.8694650655026364, iteration: 299422
loss: 0.9860355257987976,grad_norm: 0.9999992042173023, iteration: 299423
loss: 1.0792245864868164,grad_norm: 0.9999990482639739, iteration: 299424
loss: 1.0473889112472534,grad_norm: 0.8830372199520684, iteration: 299425
loss: 0.998671293258667,grad_norm: 0.8243110662158845, iteration: 299426
loss: 1.0084717273712158,grad_norm: 0.8029526075231307, iteration: 299427
loss: 0.9950699806213379,grad_norm: 0.9291644519282332, iteration: 299428
loss: 1.0390714406967163,grad_norm: 0.8775807620194834, iteration: 299429
loss: 0.991804301738739,grad_norm: 0.9999997842347359, iteration: 299430
loss: 0.9903725981712341,grad_norm: 0.9999994100518259, iteration: 299431
loss: 1.0057048797607422,grad_norm: 0.8972164827975917, iteration: 299432
loss: 0.9986425638198853,grad_norm: 0.7058421419190971, iteration: 299433
loss: 0.9933701157569885,grad_norm: 0.717087936407629, iteration: 299434
loss: 0.9651796817779541,grad_norm: 0.8903345732378093, iteration: 299435
loss: 1.0050926208496094,grad_norm: 0.8766727599988905, iteration: 299436
loss: 0.9866662621498108,grad_norm: 0.779337317404525, iteration: 299437
loss: 1.007185935974121,grad_norm: 0.8211210133185688, iteration: 299438
loss: 0.9859102964401245,grad_norm: 0.9212041770665558, iteration: 299439
loss: 0.9950914978981018,grad_norm: 0.8741782955972761, iteration: 299440
loss: 0.9969359040260315,grad_norm: 0.8384708086330637, iteration: 299441
loss: 1.0114319324493408,grad_norm: 0.8534410326400963, iteration: 299442
loss: 1.0246690511703491,grad_norm: 0.8839597167000143, iteration: 299443
loss: 0.9958649277687073,grad_norm: 0.7882564396219748, iteration: 299444
loss: 0.9767932295799255,grad_norm: 0.8922255444941313, iteration: 299445
loss: 1.0401525497436523,grad_norm: 0.8225911915392015, iteration: 299446
loss: 1.0127241611480713,grad_norm: 0.9010438530685996, iteration: 299447
loss: 1.0141510963439941,grad_norm: 0.9825754812367615, iteration: 299448
loss: 0.9702286720275879,grad_norm: 0.8931069440207944, iteration: 299449
loss: 1.0630351305007935,grad_norm: 0.8082798244196242, iteration: 299450
loss: 0.9932678937911987,grad_norm: 0.8416080541972276, iteration: 299451
loss: 1.0148754119873047,grad_norm: 0.8007987405591378, iteration: 299452
loss: 0.988344132900238,grad_norm: 0.9999998729712325, iteration: 299453
loss: 0.9855673909187317,grad_norm: 0.8128902094615247, iteration: 299454
loss: 0.9767897725105286,grad_norm: 0.8737438466724254, iteration: 299455
loss: 1.008801817893982,grad_norm: 0.8535209319112705, iteration: 299456
loss: 1.0020947456359863,grad_norm: 0.8675227708576273, iteration: 299457
loss: 0.9968087077140808,grad_norm: 0.9536700474224055, iteration: 299458
loss: 0.993864119052887,grad_norm: 0.8023311043532113, iteration: 299459
loss: 0.9995022416114807,grad_norm: 0.8974405501348126, iteration: 299460
loss: 1.0632022619247437,grad_norm: 0.9999995571391652, iteration: 299461
loss: 1.0450892448425293,grad_norm: 0.8811108242744344, iteration: 299462
loss: 0.9926056265830994,grad_norm: 0.9085338299090426, iteration: 299463
loss: 1.0006535053253174,grad_norm: 0.8854445094947754, iteration: 299464
loss: 1.0175986289978027,grad_norm: 0.974003592588727, iteration: 299465
loss: 1.0363519191741943,grad_norm: 0.7583345871605445, iteration: 299466
loss: 1.0075644254684448,grad_norm: 0.8315421676943137, iteration: 299467
loss: 0.9966551065444946,grad_norm: 0.9999994191752538, iteration: 299468
loss: 1.0003067255020142,grad_norm: 0.9999998967101689, iteration: 299469
loss: 1.0447629690170288,grad_norm: 0.7421838054709338, iteration: 299470
loss: 0.9792276620864868,grad_norm: 0.8542774823123137, iteration: 299471
loss: 0.9816069602966309,grad_norm: 0.8485354420837081, iteration: 299472
loss: 0.9688535928726196,grad_norm: 0.9999991282749876, iteration: 299473
loss: 0.980303168296814,grad_norm: 0.8772612319965694, iteration: 299474
loss: 0.99666827917099,grad_norm: 0.8294405681994379, iteration: 299475
loss: 0.9967078566551208,grad_norm: 0.959363973674545, iteration: 299476
loss: 0.9838129281997681,grad_norm: 0.8739681501816754, iteration: 299477
loss: 1.0208889245986938,grad_norm: 0.8996697019792611, iteration: 299478
loss: 0.9751204252243042,grad_norm: 0.9999990972975966, iteration: 299479
loss: 1.026342511177063,grad_norm: 0.8592864534109033, iteration: 299480
loss: 0.9952024817466736,grad_norm: 0.8157648244516273, iteration: 299481
loss: 0.9949324131011963,grad_norm: 0.8165104724423604, iteration: 299482
loss: 0.9413837790489197,grad_norm: 0.9361441320808791, iteration: 299483
loss: 0.9821914434432983,grad_norm: 0.8623230084481289, iteration: 299484
loss: 1.0090975761413574,grad_norm: 0.9999998803947653, iteration: 299485
loss: 0.9745457768440247,grad_norm: 0.867403945300893, iteration: 299486
loss: 0.9715535640716553,grad_norm: 0.6913892831319721, iteration: 299487
loss: 1.0277410745620728,grad_norm: 0.8884819950032785, iteration: 299488
loss: 0.9876728057861328,grad_norm: 0.9999991545792932, iteration: 299489
loss: 1.0158299207687378,grad_norm: 0.6931604142717194, iteration: 299490
loss: 0.9649194478988647,grad_norm: 0.8737048225446399, iteration: 299491
loss: 0.9801003336906433,grad_norm: 0.9119967790730299, iteration: 299492
loss: 1.00059175491333,grad_norm: 0.9705309205415397, iteration: 299493
loss: 1.0190925598144531,grad_norm: 0.7488166278712336, iteration: 299494
loss: 1.0138319730758667,grad_norm: 0.704816342085334, iteration: 299495
loss: 1.008921504020691,grad_norm: 0.9999990345128816, iteration: 299496
loss: 1.002655267715454,grad_norm: 0.9655137393758417, iteration: 299497
loss: 0.9914866089820862,grad_norm: 0.7464875862402032, iteration: 299498
loss: 1.043127417564392,grad_norm: 0.9999993272916273, iteration: 299499
loss: 0.9941550493240356,grad_norm: 0.999999006748711, iteration: 299500
loss: 0.9945486187934875,grad_norm: 0.9097132669391952, iteration: 299501
loss: 0.997300386428833,grad_norm: 0.7555949358299668, iteration: 299502
loss: 1.0093203783035278,grad_norm: 0.8614593573331415, iteration: 299503
loss: 0.9812813997268677,grad_norm: 0.8638351735135883, iteration: 299504
loss: 0.9921641945838928,grad_norm: 0.7607571872511935, iteration: 299505
loss: 1.0061063766479492,grad_norm: 0.99999976177132, iteration: 299506
loss: 0.9985840320587158,grad_norm: 0.9256988869009419, iteration: 299507
loss: 0.9713349342346191,grad_norm: 0.8353710674536011, iteration: 299508
loss: 0.9995826482772827,grad_norm: 0.8290491880681726, iteration: 299509
loss: 0.9703220725059509,grad_norm: 0.6977772822769824, iteration: 299510
loss: 0.9869022965431213,grad_norm: 0.8235648128939215, iteration: 299511
loss: 0.977794349193573,grad_norm: 0.9669293169149168, iteration: 299512
loss: 0.9997056126594543,grad_norm: 0.7627200270493522, iteration: 299513
loss: 0.9989147186279297,grad_norm: 0.9503877948282828, iteration: 299514
loss: 0.9865741729736328,grad_norm: 0.7656727043823063, iteration: 299515
loss: 0.9879175424575806,grad_norm: 0.9413622282397566, iteration: 299516
loss: 0.992811918258667,grad_norm: 0.8856657162000934, iteration: 299517
loss: 0.9785211086273193,grad_norm: 0.7529632274932551, iteration: 299518
loss: 1.0135365724563599,grad_norm: 0.7562050013514018, iteration: 299519
loss: 1.017453670501709,grad_norm: 0.7103270902027973, iteration: 299520
loss: 1.0062857866287231,grad_norm: 0.9999991596765927, iteration: 299521
loss: 1.0127707719802856,grad_norm: 0.9999994919478544, iteration: 299522
loss: 0.9626851081848145,grad_norm: 0.8421359986428314, iteration: 299523
loss: 0.9897063970565796,grad_norm: 0.9219829956362454, iteration: 299524
loss: 1.0449126958847046,grad_norm: 0.8795118527893785, iteration: 299525
loss: 0.9838831424713135,grad_norm: 0.7906680569278739, iteration: 299526
loss: 0.9720416069030762,grad_norm: 0.7615399445588793, iteration: 299527
loss: 1.0198410749435425,grad_norm: 0.9851914742411112, iteration: 299528
loss: 1.0460543632507324,grad_norm: 0.9999990770303294, iteration: 299529
loss: 0.9950976967811584,grad_norm: 0.8209320577258321, iteration: 299530
loss: 0.9778118133544922,grad_norm: 0.9033292582335954, iteration: 299531
loss: 0.9950774908065796,grad_norm: 0.9999993082268661, iteration: 299532
loss: 1.006954312324524,grad_norm: 0.9829392382506497, iteration: 299533
loss: 1.0287777185440063,grad_norm: 0.8455877411998659, iteration: 299534
loss: 0.981515645980835,grad_norm: 0.9999990776816619, iteration: 299535
loss: 1.0084266662597656,grad_norm: 0.8428753268933287, iteration: 299536
loss: 0.9834853410720825,grad_norm: 0.9508245509682509, iteration: 299537
loss: 1.064949631690979,grad_norm: 0.9999991235829269, iteration: 299538
loss: 1.005920171737671,grad_norm: 0.9999990160542043, iteration: 299539
loss: 0.9743407368659973,grad_norm: 0.8527422698029906, iteration: 299540
loss: 0.9938485026359558,grad_norm: 0.9917990654835799, iteration: 299541
loss: 1.0024352073669434,grad_norm: 0.7705139102904309, iteration: 299542
loss: 1.027070164680481,grad_norm: 0.8375615048197746, iteration: 299543
loss: 1.0458050966262817,grad_norm: 0.940028513799581, iteration: 299544
loss: 1.0298283100128174,grad_norm: 0.9999999003554385, iteration: 299545
loss: 0.9855901002883911,grad_norm: 0.7797569092091685, iteration: 299546
loss: 1.031682014465332,grad_norm: 0.7343225371437687, iteration: 299547
loss: 0.9885327816009521,grad_norm: 0.7613350079881862, iteration: 299548
loss: 1.0015218257904053,grad_norm: 0.9527736309409368, iteration: 299549
loss: 1.0285406112670898,grad_norm: 0.860811863889205, iteration: 299550
loss: 0.9718772172927856,grad_norm: 0.8110049489449235, iteration: 299551
loss: 0.9948011636734009,grad_norm: 0.975829885306612, iteration: 299552
loss: 0.9978715181350708,grad_norm: 0.9255258867871253, iteration: 299553
loss: 0.9717350602149963,grad_norm: 0.9430329065705824, iteration: 299554
loss: 0.9932403564453125,grad_norm: 0.7678379862902871, iteration: 299555
loss: 1.002305507659912,grad_norm: 0.84379671847825, iteration: 299556
loss: 1.0151351690292358,grad_norm: 0.7371945965166019, iteration: 299557
loss: 0.9967582821846008,grad_norm: 0.9999991385675576, iteration: 299558
loss: 1.0280816555023193,grad_norm: 0.9382955580900563, iteration: 299559
loss: 1.0273033380508423,grad_norm: 0.7553314830296469, iteration: 299560
loss: 0.9751014709472656,grad_norm: 0.8116732642785337, iteration: 299561
loss: 1.0170162916183472,grad_norm: 0.7439434987632565, iteration: 299562
loss: 1.0010966062545776,grad_norm: 0.9999993650072342, iteration: 299563
loss: 1.0029008388519287,grad_norm: 0.9999990904740109, iteration: 299564
loss: 0.9711093902587891,grad_norm: 0.8371197098431603, iteration: 299565
loss: 1.0213441848754883,grad_norm: 0.9999991066035849, iteration: 299566
loss: 1.0387619733810425,grad_norm: 0.874517922655096, iteration: 299567
loss: 0.9654379487037659,grad_norm: 0.7348066719686965, iteration: 299568
loss: 1.0286343097686768,grad_norm: 0.8609501103050787, iteration: 299569
loss: 1.09408438205719,grad_norm: 0.9999996101015475, iteration: 299570
loss: 0.988970160484314,grad_norm: 0.8275025135504271, iteration: 299571
loss: 1.0055088996887207,grad_norm: 0.9999993672706111, iteration: 299572
loss: 0.9666547179222107,grad_norm: 0.9999990829927206, iteration: 299573
loss: 1.0306254625320435,grad_norm: 0.9350245744682991, iteration: 299574
loss: 0.9724426865577698,grad_norm: 0.9446272414341974, iteration: 299575
loss: 0.976570725440979,grad_norm: 0.8959314278036268, iteration: 299576
loss: 1.0027391910552979,grad_norm: 0.9146240292202403, iteration: 299577
loss: 0.966974675655365,grad_norm: 0.6836966344219776, iteration: 299578
loss: 1.0223727226257324,grad_norm: 0.9194646315786754, iteration: 299579
loss: 1.0361130237579346,grad_norm: 0.8684153890812356, iteration: 299580
loss: 1.0168477296829224,grad_norm: 0.79772953702737, iteration: 299581
loss: 1.0324618816375732,grad_norm: 0.7681126529871026, iteration: 299582
loss: 0.9899858832359314,grad_norm: 0.8381213975798759, iteration: 299583
loss: 0.993622899055481,grad_norm: 0.9320827613231824, iteration: 299584
loss: 0.9668470621109009,grad_norm: 0.7600279046600427, iteration: 299585
loss: 1.026259183883667,grad_norm: 0.7057305290684639, iteration: 299586
loss: 1.0036691427230835,grad_norm: 0.8721590579241895, iteration: 299587
loss: 0.9559110403060913,grad_norm: 0.8722843719997274, iteration: 299588
loss: 0.9890873432159424,grad_norm: 0.9161025559353749, iteration: 299589
loss: 1.0005179643630981,grad_norm: 0.8571252948166932, iteration: 299590
loss: 0.9441450834274292,grad_norm: 0.900776929538383, iteration: 299591
loss: 0.9979140758514404,grad_norm: 0.9999992247712008, iteration: 299592
loss: 0.9940161108970642,grad_norm: 0.9857432883346365, iteration: 299593
loss: 1.0547124147415161,grad_norm: 0.999999332318051, iteration: 299594
loss: 1.0055636167526245,grad_norm: 0.9115528418323705, iteration: 299595
loss: 0.9890445470809937,grad_norm: 0.9356487017406259, iteration: 299596
loss: 0.9922229647636414,grad_norm: 0.9999994056892084, iteration: 299597
loss: 1.024503469467163,grad_norm: 0.8734728559148732, iteration: 299598
loss: 1.0380181074142456,grad_norm: 0.9999991694941024, iteration: 299599
loss: 0.9744938611984253,grad_norm: 0.7209998468509147, iteration: 299600
loss: 1.0303961038589478,grad_norm: 0.8178785125569835, iteration: 299601
loss: 0.9986051321029663,grad_norm: 0.81066222640751, iteration: 299602
loss: 0.9805465936660767,grad_norm: 0.8681015351926121, iteration: 299603
loss: 0.9705605506896973,grad_norm: 0.9999991514661423, iteration: 299604
loss: 0.9975431561470032,grad_norm: 0.8513811978045013, iteration: 299605
loss: 1.0075273513793945,grad_norm: 0.9999996734195418, iteration: 299606
loss: 1.017513394355774,grad_norm: 0.8748430170425417, iteration: 299607
loss: 1.0003836154937744,grad_norm: 0.8234727157622641, iteration: 299608
loss: 0.9744119644165039,grad_norm: 0.8176604524968479, iteration: 299609
loss: 1.0182077884674072,grad_norm: 0.7962958835005218, iteration: 299610
loss: 0.9804220199584961,grad_norm: 0.8522429478915013, iteration: 299611
loss: 1.0091367959976196,grad_norm: 0.9999991315400271, iteration: 299612
loss: 1.0312387943267822,grad_norm: 0.8012931938799129, iteration: 299613
loss: 1.0061324834823608,grad_norm: 0.8056083546793357, iteration: 299614
loss: 0.9785212874412537,grad_norm: 0.8696203498079764, iteration: 299615
loss: 1.002137541770935,grad_norm: 0.9757297875975224, iteration: 299616
loss: 0.9557772278785706,grad_norm: 0.8591213933456008, iteration: 299617
loss: 1.011701226234436,grad_norm: 0.8095576715282704, iteration: 299618
loss: 0.9703162908554077,grad_norm: 0.7677161289057671, iteration: 299619
loss: 1.0062085390090942,grad_norm: 0.851464144123885, iteration: 299620
loss: 0.9794734716415405,grad_norm: 0.7052959981557763, iteration: 299621
loss: 1.0189443826675415,grad_norm: 0.9787142717175499, iteration: 299622
loss: 0.997891902923584,grad_norm: 0.8628334033158659, iteration: 299623
loss: 0.9644040465354919,grad_norm: 0.9703302309787323, iteration: 299624
loss: 1.014607310295105,grad_norm: 0.9249432131129348, iteration: 299625
loss: 0.9873257279396057,grad_norm: 0.946553017428504, iteration: 299626
loss: 0.964813232421875,grad_norm: 0.7601090983453707, iteration: 299627
loss: 0.9632014632225037,grad_norm: 0.9284380298639109, iteration: 299628
loss: 1.011795997619629,grad_norm: 0.8334034913903889, iteration: 299629
loss: 1.0214637517929077,grad_norm: 0.9807615868217596, iteration: 299630
loss: 1.0354059934616089,grad_norm: 0.8738904422095874, iteration: 299631
loss: 0.9920132756233215,grad_norm: 0.7925875516231364, iteration: 299632
loss: 1.0027623176574707,grad_norm: 0.9296270770042806, iteration: 299633
loss: 1.0608934164047241,grad_norm: 0.9999996123679652, iteration: 299634
loss: 1.0115206241607666,grad_norm: 0.8425291699238739, iteration: 299635
loss: 1.016992449760437,grad_norm: 0.9212327009295511, iteration: 299636
loss: 1.0179412364959717,grad_norm: 0.9999994512347654, iteration: 299637
loss: 1.0507463216781616,grad_norm: 0.9999999393738429, iteration: 299638
loss: 1.0079071521759033,grad_norm: 0.933846292009488, iteration: 299639
loss: 0.9685854315757751,grad_norm: 0.6543268230617273, iteration: 299640
loss: 1.0362300872802734,grad_norm: 0.9654487482701513, iteration: 299641
loss: 1.060192584991455,grad_norm: 0.85230684270569, iteration: 299642
loss: 1.041877031326294,grad_norm: 0.9624971214009229, iteration: 299643
loss: 1.0374197959899902,grad_norm: 0.9999989702613041, iteration: 299644
loss: 0.9803382754325867,grad_norm: 0.902045969874478, iteration: 299645
loss: 0.9968620538711548,grad_norm: 0.8388113691435104, iteration: 299646
loss: 0.9900125861167908,grad_norm: 0.7651123733634092, iteration: 299647
loss: 1.013750433921814,grad_norm: 0.7674787484341709, iteration: 299648
loss: 1.004993200302124,grad_norm: 0.8403369524347518, iteration: 299649
loss: 1.0000065565109253,grad_norm: 0.7577028285506393, iteration: 299650
loss: 1.052592158317566,grad_norm: 0.7915370638429503, iteration: 299651
loss: 1.0129568576812744,grad_norm: 0.9124543973749434, iteration: 299652
loss: 1.0118272304534912,grad_norm: 0.8575588833301551, iteration: 299653
loss: 1.0294803380966187,grad_norm: 0.9999990542533056, iteration: 299654
loss: 1.0113632678985596,grad_norm: 0.7667287886373875, iteration: 299655
loss: 1.0575999021530151,grad_norm: 0.8488637010144932, iteration: 299656
loss: 0.9765189290046692,grad_norm: 0.7996875331480414, iteration: 299657
loss: 1.0336904525756836,grad_norm: 0.99999975225917, iteration: 299658
loss: 1.008341908454895,grad_norm: 0.820704424179863, iteration: 299659
loss: 1.0091930627822876,grad_norm: 0.8457781391742077, iteration: 299660
loss: 1.008580207824707,grad_norm: 0.8518787974569249, iteration: 299661
loss: 1.031543493270874,grad_norm: 0.8148695170898083, iteration: 299662
loss: 0.9833674430847168,grad_norm: 0.6974593482337095, iteration: 299663
loss: 0.9933254718780518,grad_norm: 0.792315873055272, iteration: 299664
loss: 1.0111918449401855,grad_norm: 0.8326846487586648, iteration: 299665
loss: 1.0225095748901367,grad_norm: 0.7309510710065428, iteration: 299666
loss: 0.9974435567855835,grad_norm: 0.739358034145511, iteration: 299667
loss: 0.9838207364082336,grad_norm: 0.9999991437070574, iteration: 299668
loss: 1.0287662744522095,grad_norm: 0.88031566389842, iteration: 299669
loss: 0.9983124732971191,grad_norm: 0.9999990958008387, iteration: 299670
loss: 1.0247918367385864,grad_norm: 0.8047224559071655, iteration: 299671
loss: 0.9909263849258423,grad_norm: 0.7552590223805363, iteration: 299672
loss: 0.9740380644798279,grad_norm: 0.999999174130888, iteration: 299673
loss: 1.0094527006149292,grad_norm: 0.7063838965411932, iteration: 299674
loss: 1.0228720903396606,grad_norm: 0.8218916987055018, iteration: 299675
loss: 1.0041773319244385,grad_norm: 0.8198218065399053, iteration: 299676
loss: 1.0264791250228882,grad_norm: 0.8205288140862057, iteration: 299677
loss: 0.9619415402412415,grad_norm: 0.8954603249429328, iteration: 299678
loss: 0.9879376888275146,grad_norm: 0.9465317466074108, iteration: 299679
loss: 0.9811457991600037,grad_norm: 0.8853761742357792, iteration: 299680
loss: 0.9977667331695557,grad_norm: 0.8824602496496025, iteration: 299681
loss: 0.984790563583374,grad_norm: 0.9460977311751197, iteration: 299682
loss: 0.9917572736740112,grad_norm: 0.7733689259077584, iteration: 299683
loss: 0.9814746975898743,grad_norm: 0.8697085280843191, iteration: 299684
loss: 1.0015692710876465,grad_norm: 0.9524270337563603, iteration: 299685
loss: 0.9996704459190369,grad_norm: 0.7821069525442267, iteration: 299686
loss: 1.0038031339645386,grad_norm: 0.6468798827157725, iteration: 299687
loss: 1.0120192766189575,grad_norm: 0.9277969910427107, iteration: 299688
loss: 1.0047886371612549,grad_norm: 0.589151229961402, iteration: 299689
loss: 0.9974068999290466,grad_norm: 0.9999992932804309, iteration: 299690
loss: 1.0058149099349976,grad_norm: 0.9745551827929905, iteration: 299691
loss: 1.0046558380126953,grad_norm: 0.9999992093496004, iteration: 299692
loss: 1.0128469467163086,grad_norm: 0.9146959176833366, iteration: 299693
loss: 1.0086047649383545,grad_norm: 0.924492814157584, iteration: 299694
loss: 0.9757179021835327,grad_norm: 0.9771652438155886, iteration: 299695
loss: 1.0148743391036987,grad_norm: 0.822918480391297, iteration: 299696
loss: 1.0153063535690308,grad_norm: 0.9999991251713592, iteration: 299697
loss: 1.006445288658142,grad_norm: 0.9999990527242969, iteration: 299698
loss: 1.0064517259597778,grad_norm: 0.7469087475714067, iteration: 299699
loss: 0.9931119084358215,grad_norm: 0.8111462188839192, iteration: 299700
loss: 0.9794217944145203,grad_norm: 0.8751764326884166, iteration: 299701
loss: 0.9808166027069092,grad_norm: 0.9999991122542865, iteration: 299702
loss: 0.9816253185272217,grad_norm: 0.8242320034336686, iteration: 299703
loss: 0.9709022641181946,grad_norm: 0.8734745825630035, iteration: 299704
loss: 0.9529419541358948,grad_norm: 0.8970930189756078, iteration: 299705
loss: 0.9941363334655762,grad_norm: 0.8484082495381357, iteration: 299706
loss: 0.9987753033638,grad_norm: 0.8331907674565657, iteration: 299707
loss: 1.0303683280944824,grad_norm: 0.9119912934985915, iteration: 299708
loss: 0.9879863858222961,grad_norm: 0.8197213833833419, iteration: 299709
loss: 1.0097488164901733,grad_norm: 0.9999991484420363, iteration: 299710
loss: 1.026258945465088,grad_norm: 0.9999995823600616, iteration: 299711
loss: 1.0615493059158325,grad_norm: 0.9353888698080692, iteration: 299712
loss: 1.0096094608306885,grad_norm: 0.9999989796768503, iteration: 299713
loss: 1.0114688873291016,grad_norm: 0.87444868112934, iteration: 299714
loss: 1.0274235010147095,grad_norm: 0.8102315085572291, iteration: 299715
loss: 1.0226645469665527,grad_norm: 0.8018654539308483, iteration: 299716
loss: 0.9994490146636963,grad_norm: 0.9212755854021242, iteration: 299717
loss: 1.0322962999343872,grad_norm: 0.9999994337176568, iteration: 299718
loss: 1.013358473777771,grad_norm: 0.7278388595646543, iteration: 299719
loss: 0.9980116486549377,grad_norm: 0.8751988034135957, iteration: 299720
loss: 1.0020830631256104,grad_norm: 0.8125264816639934, iteration: 299721
loss: 1.0842671394348145,grad_norm: 0.9999992311648354, iteration: 299722
loss: 1.0045886039733887,grad_norm: 0.881260352066565, iteration: 299723
loss: 1.0084701776504517,grad_norm: 0.9720009975861895, iteration: 299724
loss: 0.9860336780548096,grad_norm: 0.7879018628068211, iteration: 299725
loss: 1.0060222148895264,grad_norm: 0.9510377180238451, iteration: 299726
loss: 1.0053972005844116,grad_norm: 0.8191556279981862, iteration: 299727
loss: 0.9827830791473389,grad_norm: 0.8117930044662373, iteration: 299728
loss: 0.9594224095344543,grad_norm: 0.9004555032382385, iteration: 299729
loss: 1.0139297246932983,grad_norm: 0.999999297734417, iteration: 299730
loss: 1.0032637119293213,grad_norm: 0.9169883385266593, iteration: 299731
loss: 1.0087722539901733,grad_norm: 0.6934681454778647, iteration: 299732
loss: 1.0337088108062744,grad_norm: 0.7293684693038289, iteration: 299733
loss: 0.9714579582214355,grad_norm: 0.7865531968505485, iteration: 299734
loss: 0.9909376502037048,grad_norm: 0.7187623426266296, iteration: 299735
loss: 1.0073215961456299,grad_norm: 0.9999992197430477, iteration: 299736
loss: 1.001238226890564,grad_norm: 0.9860636354476886, iteration: 299737
loss: 1.047289490699768,grad_norm: 0.804998628570278, iteration: 299738
loss: 0.9976066946983337,grad_norm: 0.7347473546692217, iteration: 299739
loss: 0.9392666816711426,grad_norm: 0.863031734267537, iteration: 299740
loss: 1.0906442403793335,grad_norm: 1.0000001098887443, iteration: 299741
loss: 0.9897880554199219,grad_norm: 0.9250921961972033, iteration: 299742
loss: 0.9945483207702637,grad_norm: 0.8144406525229637, iteration: 299743
loss: 0.9909128546714783,grad_norm: 0.8760556998515256, iteration: 299744
loss: 1.1253002882003784,grad_norm: 0.9999996176925271, iteration: 299745
loss: 1.0498050451278687,grad_norm: 0.9999996452312964, iteration: 299746
loss: 1.029767632484436,grad_norm: 0.9999991040709354, iteration: 299747
loss: 0.9947303533554077,grad_norm: 0.8142564829812832, iteration: 299748
loss: 0.9906361103057861,grad_norm: 0.8148336582459773, iteration: 299749
loss: 1.003975749015808,grad_norm: 0.8807676714433523, iteration: 299750
loss: 1.0029994249343872,grad_norm: 0.9999991489276092, iteration: 299751
loss: 0.9876328110694885,grad_norm: 0.9999991828764178, iteration: 299752
loss: 1.0229953527450562,grad_norm: 0.8273073606631695, iteration: 299753
loss: 1.0107120275497437,grad_norm: 0.724935781749314, iteration: 299754
loss: 0.9708240628242493,grad_norm: 0.789926265801084, iteration: 299755
loss: 0.9440026879310608,grad_norm: 0.8111550080415856, iteration: 299756
loss: 1.0570249557495117,grad_norm: 0.9327089830935212, iteration: 299757
loss: 0.9808717966079712,grad_norm: 0.8918934565480827, iteration: 299758
loss: 0.997432291507721,grad_norm: 0.9389402561843369, iteration: 299759
loss: 1.0413697957992554,grad_norm: 0.8829373223152664, iteration: 299760
loss: 1.000750184059143,grad_norm: 0.7356906065852936, iteration: 299761
loss: 1.0227570533752441,grad_norm: 0.9999991946655368, iteration: 299762
loss: 1.0094455480575562,grad_norm: 0.8094287086317186, iteration: 299763
loss: 1.0114070177078247,grad_norm: 0.9999990716331955, iteration: 299764
loss: 0.9977927803993225,grad_norm: 0.745159210538757, iteration: 299765
loss: 1.075514316558838,grad_norm: 0.9999994371832223, iteration: 299766
loss: 0.988483726978302,grad_norm: 0.9999990393853289, iteration: 299767
loss: 0.9822604060173035,grad_norm: 0.7206802992975008, iteration: 299768
loss: 1.0033007860183716,grad_norm: 0.9999989910602108, iteration: 299769
loss: 0.980617880821228,grad_norm: 0.9999991871300283, iteration: 299770
loss: 0.9937064051628113,grad_norm: 0.8102102757039481, iteration: 299771
loss: 1.004891276359558,grad_norm: 0.9530656094761315, iteration: 299772
loss: 0.966163158416748,grad_norm: 0.6889184060263609, iteration: 299773
loss: 1.0273061990737915,grad_norm: 0.7943935281941004, iteration: 299774
loss: 1.0056242942810059,grad_norm: 0.7322138237058168, iteration: 299775
loss: 0.9836021661758423,grad_norm: 0.9400715723125492, iteration: 299776
loss: 1.0189975500106812,grad_norm: 0.817928233441293, iteration: 299777
loss: 0.9828707575798035,grad_norm: 0.7920404380267367, iteration: 299778
loss: 1.0097113847732544,grad_norm: 0.7870392113217168, iteration: 299779
loss: 0.9838423728942871,grad_norm: 0.9999990536721257, iteration: 299780
loss: 0.9771533608436584,grad_norm: 0.8265918941174535, iteration: 299781
loss: 0.9980565309524536,grad_norm: 0.9245524307595284, iteration: 299782
loss: 0.9786373972892761,grad_norm: 0.8404597166073812, iteration: 299783
loss: 0.9879899024963379,grad_norm: 0.8857008448619348, iteration: 299784
loss: 0.9864584803581238,grad_norm: 0.6805374581968838, iteration: 299785
loss: 1.023893117904663,grad_norm: 0.9024510987798902, iteration: 299786
loss: 0.9662796258926392,grad_norm: 0.7456468396277217, iteration: 299787
loss: 1.0255241394042969,grad_norm: 0.9999992816247516, iteration: 299788
loss: 1.0193791389465332,grad_norm: 0.9999990157222762, iteration: 299789
loss: 1.0095089673995972,grad_norm: 0.8112023063259182, iteration: 299790
loss: 0.9845896363258362,grad_norm: 0.9999990299657171, iteration: 299791
loss: 1.022455096244812,grad_norm: 0.9307054320547992, iteration: 299792
loss: 1.0229748487472534,grad_norm: 0.9565740492119237, iteration: 299793
loss: 1.0629197359085083,grad_norm: 0.9691022260727664, iteration: 299794
loss: 0.9712187647819519,grad_norm: 0.9999990831922673, iteration: 299795
loss: 1.0101524591445923,grad_norm: 0.8414296391720474, iteration: 299796
loss: 1.001600980758667,grad_norm: 0.7044065932414229, iteration: 299797
loss: 0.9916761517524719,grad_norm: 0.7415993332668197, iteration: 299798
loss: 0.9734230637550354,grad_norm: 0.894875513174137, iteration: 299799
loss: 1.024084210395813,grad_norm: 0.9396094556798145, iteration: 299800
loss: 0.9768748879432678,grad_norm: 0.8879664888185645, iteration: 299801
loss: 0.9935470223426819,grad_norm: 0.9999989705922793, iteration: 299802
loss: 0.993594765663147,grad_norm: 0.7962862748421589, iteration: 299803
loss: 1.0000348091125488,grad_norm: 0.7654402296743282, iteration: 299804
loss: 1.0017088651657104,grad_norm: 0.8188666109975467, iteration: 299805
loss: 0.9810699820518494,grad_norm: 0.891553363380372, iteration: 299806
loss: 1.013770580291748,grad_norm: 0.7889381961566841, iteration: 299807
loss: 1.0199401378631592,grad_norm: 0.7164406462395528, iteration: 299808
loss: 1.0449994802474976,grad_norm: 0.9177417156925904, iteration: 299809
loss: 0.9931153059005737,grad_norm: 0.7862911638942558, iteration: 299810
loss: 1.0428372621536255,grad_norm: 0.9999991408880511, iteration: 299811
loss: 1.0196714401245117,grad_norm: 0.9574806323833239, iteration: 299812
loss: 0.9720554947853088,grad_norm: 0.9905341254745239, iteration: 299813
loss: 1.0185065269470215,grad_norm: 0.7732736019356586, iteration: 299814
loss: 0.9713993072509766,grad_norm: 0.7954075686472787, iteration: 299815
loss: 1.0228294134140015,grad_norm: 0.8025814939942174, iteration: 299816
loss: 0.9713668823242188,grad_norm: 0.9666425804891834, iteration: 299817
loss: 0.9926310181617737,grad_norm: 0.8155168187855051, iteration: 299818
loss: 1.0221054553985596,grad_norm: 0.9999989963671186, iteration: 299819
loss: 1.0525590181350708,grad_norm: 0.9999994675375511, iteration: 299820
loss: 0.9716286063194275,grad_norm: 0.9150037751216709, iteration: 299821
loss: 0.9866745471954346,grad_norm: 0.7873424220438715, iteration: 299822
loss: 0.9933344721794128,grad_norm: 0.8545797589216608, iteration: 299823
loss: 1.0078898668289185,grad_norm: 0.757381703319188, iteration: 299824
loss: 0.9755373001098633,grad_norm: 0.8478915192753503, iteration: 299825
loss: 0.9874250292778015,grad_norm: 0.8458632738960303, iteration: 299826
loss: 0.9994514584541321,grad_norm: 0.8655459166979776, iteration: 299827
loss: 1.126211166381836,grad_norm: 0.9999999315727399, iteration: 299828
loss: 1.0101807117462158,grad_norm: 0.9899309111892169, iteration: 299829
loss: 0.9674909710884094,grad_norm: 0.8420008148772871, iteration: 299830
loss: 1.0196536779403687,grad_norm: 0.8458072224304043, iteration: 299831
loss: 0.991865336894989,grad_norm: 0.8725679825820004, iteration: 299832
loss: 0.9645123481750488,grad_norm: 0.8609653622094603, iteration: 299833
loss: 1.0345021486282349,grad_norm: 0.9999990868724761, iteration: 299834
loss: 0.9966516494750977,grad_norm: 0.9999994274088596, iteration: 299835
loss: 1.0011249780654907,grad_norm: 0.9999990737267624, iteration: 299836
loss: 1.0273807048797607,grad_norm: 0.7445222951512791, iteration: 299837
loss: 1.0267276763916016,grad_norm: 0.9088651321992158, iteration: 299838
loss: 0.9869973063468933,grad_norm: 0.9784691941346297, iteration: 299839
loss: 1.0008589029312134,grad_norm: 0.7440159204518676, iteration: 299840
loss: 0.9964380860328674,grad_norm: 0.8377207158917528, iteration: 299841
loss: 0.9898229241371155,grad_norm: 0.8107427149688824, iteration: 299842
loss: 1.0223430395126343,grad_norm: 0.8962293293312708, iteration: 299843
loss: 0.9971579909324646,grad_norm: 0.8294034193653956, iteration: 299844
loss: 0.9870394468307495,grad_norm: 0.8183384788005074, iteration: 299845
loss: 1.1371688842773438,grad_norm: 0.9999993740454415, iteration: 299846
loss: 1.0082814693450928,grad_norm: 0.8254941793431311, iteration: 299847
loss: 0.9662595391273499,grad_norm: 0.9999990880993875, iteration: 299848
loss: 1.0458897352218628,grad_norm: 0.999999971485733, iteration: 299849
loss: 0.9809848666191101,grad_norm: 0.832836153251617, iteration: 299850
loss: 0.9577277302742004,grad_norm: 0.9210762275293244, iteration: 299851
loss: 0.9728733897209167,grad_norm: 0.9589640169257737, iteration: 299852
loss: 0.989471435546875,grad_norm: 0.8650086999534382, iteration: 299853
loss: 1.0050902366638184,grad_norm: 0.8162216737794792, iteration: 299854
loss: 1.0928947925567627,grad_norm: 0.9999990616802628, iteration: 299855
loss: 1.2687581777572632,grad_norm: 0.9999999102838031, iteration: 299856
loss: 1.1857616901397705,grad_norm: 0.9999996713360579, iteration: 299857
loss: 1.1039401292800903,grad_norm: 0.9999996387432785, iteration: 299858
loss: 1.0056790113449097,grad_norm: 0.9999996028747113, iteration: 299859
loss: 0.9737818837165833,grad_norm: 0.8593221927917906, iteration: 299860
loss: 1.0184662342071533,grad_norm: 0.8867223056368689, iteration: 299861
loss: 0.9877064824104309,grad_norm: 0.9530957584971954, iteration: 299862
loss: 1.0145317316055298,grad_norm: 0.8230871564905357, iteration: 299863
loss: 0.9969589710235596,grad_norm: 0.7591044456135257, iteration: 299864
loss: 0.990900456905365,grad_norm: 0.7933026568844193, iteration: 299865
loss: 1.1131565570831299,grad_norm: 0.9671295261294396, iteration: 299866
loss: 0.9864559769630432,grad_norm: 0.8538969340882361, iteration: 299867
loss: 1.021105408668518,grad_norm: 0.999999637782643, iteration: 299868
loss: 0.9825023412704468,grad_norm: 0.9999989497570374, iteration: 299869
loss: 0.9738629460334778,grad_norm: 0.8265388351695584, iteration: 299870
loss: 0.9807870984077454,grad_norm: 0.9383099022250865, iteration: 299871
loss: 0.9600847959518433,grad_norm: 0.7797966755693984, iteration: 299872
loss: 1.038403868675232,grad_norm: 0.959071357702982, iteration: 299873
loss: 1.0293428897857666,grad_norm: 0.9370332250718089, iteration: 299874
loss: 0.983385443687439,grad_norm: 0.791834629630092, iteration: 299875
loss: 1.0166258811950684,grad_norm: 0.999999396868956, iteration: 299876
loss: 0.956464409828186,grad_norm: 0.7862437302517522, iteration: 299877
loss: 1.032503604888916,grad_norm: 0.9999995338580011, iteration: 299878
loss: 0.9875149130821228,grad_norm: 0.7819412580984276, iteration: 299879
loss: 1.0107495784759521,grad_norm: 0.8430370674447188, iteration: 299880
loss: 1.0116703510284424,grad_norm: 0.8054485649148312, iteration: 299881
loss: 1.0194286108016968,grad_norm: 0.7891303494039456, iteration: 299882
loss: 0.9702006578445435,grad_norm: 0.8983551873381407, iteration: 299883
loss: 1.0054850578308105,grad_norm: 0.695610870336235, iteration: 299884
loss: 0.9939908981323242,grad_norm: 0.9999991501715502, iteration: 299885
loss: 1.0194437503814697,grad_norm: 0.8373661195081907, iteration: 299886
loss: 1.0288187265396118,grad_norm: 0.9289168208911491, iteration: 299887
loss: 0.9798570275306702,grad_norm: 0.8079379399501486, iteration: 299888
loss: 1.0226839780807495,grad_norm: 0.8697736841764819, iteration: 299889
loss: 0.9962331056594849,grad_norm: 0.8632826454454023, iteration: 299890
loss: 1.0230735540390015,grad_norm: 0.99999958735089, iteration: 299891
loss: 0.9887409210205078,grad_norm: 0.7754192103441047, iteration: 299892
loss: 1.0118279457092285,grad_norm: 0.8208214897165139, iteration: 299893
loss: 0.9918822646141052,grad_norm: 0.8837136948087576, iteration: 299894
loss: 1.0278639793395996,grad_norm: 0.8824680974101614, iteration: 299895
loss: 1.0191948413848877,grad_norm: 0.8001704292335602, iteration: 299896
loss: 0.972492516040802,grad_norm: 0.7844057349079696, iteration: 299897
loss: 1.0894107818603516,grad_norm: 0.8291149183677854, iteration: 299898
loss: 1.0216466188430786,grad_norm: 0.7577473961590383, iteration: 299899
loss: 0.9920441508293152,grad_norm: 0.8100857885027722, iteration: 299900
loss: 0.981653094291687,grad_norm: 0.7524351829053479, iteration: 299901
loss: 1.0027539730072021,grad_norm: 0.8854814505336847, iteration: 299902
loss: 0.9689332842826843,grad_norm: 0.8636388761473394, iteration: 299903
loss: 1.0048879384994507,grad_norm: 0.9999990127927783, iteration: 299904
loss: 1.0016568899154663,grad_norm: 0.8730526092715999, iteration: 299905
loss: 1.0204273462295532,grad_norm: 0.8233640911529527, iteration: 299906
loss: 0.9940094351768494,grad_norm: 0.9414159346599772, iteration: 299907
loss: 1.007804274559021,grad_norm: 0.9999995481528647, iteration: 299908
loss: 1.005556583404541,grad_norm: 0.6961626147516501, iteration: 299909
loss: 1.0064988136291504,grad_norm: 0.8293667068208971, iteration: 299910
loss: 0.9889890551567078,grad_norm: 0.8869767048763412, iteration: 299911
loss: 1.0382463932037354,grad_norm: 0.7969521874659841, iteration: 299912
loss: 0.9885727167129517,grad_norm: 0.8721558152153064, iteration: 299913
loss: 1.0441784858703613,grad_norm: 0.9639356256081472, iteration: 299914
loss: 0.9644043445587158,grad_norm: 0.9101641767681582, iteration: 299915
loss: 1.0064470767974854,grad_norm: 0.792756423586181, iteration: 299916
loss: 1.012895941734314,grad_norm: 0.8725391577968041, iteration: 299917
loss: 0.9923890829086304,grad_norm: 0.8375704784951585, iteration: 299918
loss: 1.0566277503967285,grad_norm: 0.9999998588931044, iteration: 299919
loss: 0.988567054271698,grad_norm: 0.9618471607886415, iteration: 299920
loss: 1.0102118253707886,grad_norm: 0.9467209503423002, iteration: 299921
loss: 1.0244536399841309,grad_norm: 0.9308589100188379, iteration: 299922
loss: 1.0081568956375122,grad_norm: 0.9285122511853764, iteration: 299923
loss: 1.0228279829025269,grad_norm: 0.9999992663362087, iteration: 299924
loss: 0.9793667793273926,grad_norm: 0.8490278381043941, iteration: 299925
loss: 1.0052827596664429,grad_norm: 0.7899168112442798, iteration: 299926
loss: 0.9739962220191956,grad_norm: 0.8122090621628919, iteration: 299927
loss: 1.0121629238128662,grad_norm: 0.9999997193197233, iteration: 299928
loss: 0.9913989901542664,grad_norm: 0.7510472249166813, iteration: 299929
loss: 1.0121692419052124,grad_norm: 0.9236729939484304, iteration: 299930
loss: 1.0091283321380615,grad_norm: 0.8360261275429068, iteration: 299931
loss: 1.0525373220443726,grad_norm: 0.9128597306857775, iteration: 299932
loss: 0.9981693625450134,grad_norm: 0.9935719532209143, iteration: 299933
loss: 0.993748664855957,grad_norm: 0.7817870854010738, iteration: 299934
loss: 1.0847411155700684,grad_norm: 0.9999999928148817, iteration: 299935
loss: 1.0672633647918701,grad_norm: 0.9999990806607882, iteration: 299936
loss: 1.030842661857605,grad_norm: 0.9999992477580947, iteration: 299937
loss: 0.9946728348731995,grad_norm: 0.9818700248594674, iteration: 299938
loss: 1.0270397663116455,grad_norm: 0.9186582388905807, iteration: 299939
loss: 0.9483535289764404,grad_norm: 0.8865729164966675, iteration: 299940
loss: 1.0149725675582886,grad_norm: 0.8929302300742447, iteration: 299941
loss: 0.9812214374542236,grad_norm: 0.8324494865623356, iteration: 299942
loss: 0.9995336532592773,grad_norm: 0.9758745832737596, iteration: 299943
loss: 0.9805556535720825,grad_norm: 0.9999991880551506, iteration: 299944
loss: 0.9803223013877869,grad_norm: 0.9736881340021138, iteration: 299945
loss: 0.9897842407226562,grad_norm: 0.8198622514828251, iteration: 299946
loss: 1.0595686435699463,grad_norm: 0.7513895577412393, iteration: 299947
loss: 1.008730173110962,grad_norm: 0.9319784755454583, iteration: 299948
loss: 0.9930641651153564,grad_norm: 0.9296568414820733, iteration: 299949
loss: 1.0081127882003784,grad_norm: 0.8377971302252372, iteration: 299950
loss: 0.9974116086959839,grad_norm: 0.9999994874384771, iteration: 299951
loss: 0.9845457673072815,grad_norm: 0.7386403483607527, iteration: 299952
loss: 0.9806488752365112,grad_norm: 0.7926122949894984, iteration: 299953
loss: 1.0104891061782837,grad_norm: 0.9802951147947617, iteration: 299954
loss: 1.2645199298858643,grad_norm: 0.9999998419805651, iteration: 299955
loss: 1.0262260437011719,grad_norm: 0.9726488570306674, iteration: 299956
loss: 1.01171875,grad_norm: 0.8912584158306938, iteration: 299957
loss: 0.9667767882347107,grad_norm: 0.7489484548037199, iteration: 299958
loss: 0.9776368141174316,grad_norm: 0.7979160448335122, iteration: 299959
loss: 1.0135242938995361,grad_norm: 0.8508746177828169, iteration: 299960
loss: 1.0092283487319946,grad_norm: 0.8699115202932033, iteration: 299961
loss: 0.983130156993866,grad_norm: 0.8424872251740102, iteration: 299962
loss: 1.013258695602417,grad_norm: 0.915331810782435, iteration: 299963
loss: 1.009176254272461,grad_norm: 0.8548855154339348, iteration: 299964
loss: 1.0145289897918701,grad_norm: 0.9999991645859518, iteration: 299965
loss: 1.047940731048584,grad_norm: 0.9893609176831221, iteration: 299966
loss: 1.0180813074111938,grad_norm: 0.7455310473697074, iteration: 299967
loss: 0.9965219497680664,grad_norm: 0.9999991441361767, iteration: 299968
loss: 1.1418451070785522,grad_norm: 0.9999993390515683, iteration: 299969
loss: 1.0110222101211548,grad_norm: 0.8507300894210658, iteration: 299970
loss: 1.0128613710403442,grad_norm: 0.9999989183851894, iteration: 299971
loss: 1.0103846788406372,grad_norm: 0.9999990557978367, iteration: 299972
loss: 1.012271761894226,grad_norm: 0.8373847010448665, iteration: 299973
loss: 0.9622631072998047,grad_norm: 0.8380504573558966, iteration: 299974
loss: 1.0326811075210571,grad_norm: 0.999999414622575, iteration: 299975
loss: 1.0132267475128174,grad_norm: 0.9999989281128518, iteration: 299976
loss: 1.0990512371063232,grad_norm: 0.8895703546527411, iteration: 299977
loss: 1.0162575244903564,grad_norm: 0.7471142764654514, iteration: 299978
loss: 1.000576376914978,grad_norm: 0.9124001297773607, iteration: 299979
loss: 0.9874167442321777,grad_norm: 0.6965712597253998, iteration: 299980
loss: 0.98565673828125,grad_norm: 0.8948918268099151, iteration: 299981
loss: 1.027091383934021,grad_norm: 0.9763131656661356, iteration: 299982
loss: 0.9765035510063171,grad_norm: 0.7388204715723904, iteration: 299983
loss: 0.9731144905090332,grad_norm: 0.9478586873335298, iteration: 299984
loss: 1.0183604955673218,grad_norm: 0.9296806057659437, iteration: 299985
loss: 1.014711618423462,grad_norm: 0.9999991291630896, iteration: 299986
loss: 1.022143840789795,grad_norm: 0.8666937889201735, iteration: 299987
loss: 1.0045305490493774,grad_norm: 0.9410004434120152, iteration: 299988
loss: 1.0138874053955078,grad_norm: 0.7855721472167061, iteration: 299989
loss: 1.0375473499298096,grad_norm: 0.9999995721127718, iteration: 299990
loss: 1.1879218816757202,grad_norm: 0.9999991932460695, iteration: 299991
loss: 1.0035922527313232,grad_norm: 0.7509722337517291, iteration: 299992
loss: 0.9945577383041382,grad_norm: 0.9999991199927079, iteration: 299993
loss: 0.9825771450996399,grad_norm: 0.8886781225755884, iteration: 299994
loss: 1.0076191425323486,grad_norm: 0.9999993182278221, iteration: 299995
loss: 0.9790933132171631,grad_norm: 0.7021571440880364, iteration: 299996
loss: 1.0352740287780762,grad_norm: 0.7779188191119089, iteration: 299997
loss: 0.9776667952537537,grad_norm: 0.8464089624355976, iteration: 299998
loss: 1.0078750848770142,grad_norm: 0.9999991461259863, iteration: 299999
loss: 1.0054813623428345,grad_norm: 0.7472919234937194, iteration: 300000
Evaluating at step 300000
{'val': 0.9958710763603449, 'test': 2.389850243973656}
loss: 0.9593926668167114,grad_norm: 0.9254387304306425, iteration: 300001
loss: 1.0030860900878906,grad_norm: 0.9073408446235044, iteration: 300002
loss: 1.0108026266098022,grad_norm: 0.9180996132384529, iteration: 300003
loss: 0.9832956790924072,grad_norm: 0.9947332702537349, iteration: 300004
loss: 0.9685330390930176,grad_norm: 0.9999991302457198, iteration: 300005
loss: 0.9894799590110779,grad_norm: 0.7991569224852055, iteration: 300006
loss: 1.0485506057739258,grad_norm: 0.9999996422878912, iteration: 300007
loss: 0.995991051197052,grad_norm: 0.8733512844709973, iteration: 300008
loss: 0.9681956171989441,grad_norm: 0.846714449255011, iteration: 300009
loss: 0.9981827139854431,grad_norm: 0.7995239088520695, iteration: 300010
loss: 0.9450696110725403,grad_norm: 0.8038982169520165, iteration: 300011
loss: 1.0298646688461304,grad_norm: 0.9999990625332332, iteration: 300012
loss: 0.9793072938919067,grad_norm: 0.8440555310134376, iteration: 300013
loss: 1.0214239358901978,grad_norm: 0.8605115843279523, iteration: 300014
loss: 1.0937683582305908,grad_norm: 0.9999991304043684, iteration: 300015
loss: 0.9918842911720276,grad_norm: 0.8092752376009567, iteration: 300016
loss: 0.9987314939498901,grad_norm: 0.9999995765642368, iteration: 300017
loss: 1.2103911638259888,grad_norm: 0.999999267075393, iteration: 300018
loss: 1.0203156471252441,grad_norm: 0.9535690625155492, iteration: 300019
loss: 1.0002714395523071,grad_norm: 0.9721875994316732, iteration: 300020
loss: 1.059637427330017,grad_norm: 0.9999999503568062, iteration: 300021
loss: 1.0125901699066162,grad_norm: 0.9547333356244846, iteration: 300022
loss: 1.0065217018127441,grad_norm: 0.7767384766792914, iteration: 300023
loss: 0.9928982257843018,grad_norm: 0.7517165409138254, iteration: 300024
loss: 0.9882417321205139,grad_norm: 0.9999995831672749, iteration: 300025
loss: 0.9649159908294678,grad_norm: 0.9999990944599372, iteration: 300026
loss: 1.0336685180664062,grad_norm: 0.8998633942257477, iteration: 300027
loss: 1.1775121688842773,grad_norm: 0.9999990268125379, iteration: 300028
loss: 0.997611939907074,grad_norm: 0.9452754038813865, iteration: 300029
loss: 1.1883848905563354,grad_norm: 0.9999991681783409, iteration: 300030
loss: 0.9868049025535583,grad_norm: 0.8228161547832344, iteration: 300031
loss: 0.9965578317642212,grad_norm: 0.8427581886047247, iteration: 300032
loss: 0.9859716296195984,grad_norm: 0.7755475513715723, iteration: 300033
loss: 1.202269196510315,grad_norm: 0.999999345925763, iteration: 300034
loss: 1.0044900178909302,grad_norm: 0.99999925655198, iteration: 300035
loss: 1.0034292936325073,grad_norm: 0.7781846431768924, iteration: 300036
loss: 0.9802444577217102,grad_norm: 0.9175019691455875, iteration: 300037
loss: 1.015142560005188,grad_norm: 0.9999992472789004, iteration: 300038
loss: 1.0052471160888672,grad_norm: 0.8689354247175439, iteration: 300039
loss: 1.0037012100219727,grad_norm: 0.9999994489859878, iteration: 300040
loss: 0.9887879490852356,grad_norm: 0.8105209495959093, iteration: 300041
loss: 1.0974304676055908,grad_norm: 0.9999994317321159, iteration: 300042
loss: 0.9971749186515808,grad_norm: 0.7381685417715865, iteration: 300043
loss: 0.9932217001914978,grad_norm: 0.8200047433105884, iteration: 300044
loss: 1.007298231124878,grad_norm: 0.9999995726684835, iteration: 300045
loss: 0.9852318167686462,grad_norm: 0.9999992446784628, iteration: 300046
loss: 1.042410969734192,grad_norm: 0.999999042634801, iteration: 300047
loss: 0.9989703297615051,grad_norm: 0.9999989422855327, iteration: 300048
loss: 0.9972145557403564,grad_norm: 0.8748419672967818, iteration: 300049
loss: 0.9757075905799866,grad_norm: 0.8826776530708763, iteration: 300050
loss: 1.0217498540878296,grad_norm: 0.7905567996973961, iteration: 300051
loss: 0.9855011105537415,grad_norm: 0.856352983508192, iteration: 300052
loss: 1.038346529006958,grad_norm: 0.9729820837217523, iteration: 300053
loss: 1.0294369459152222,grad_norm: 0.9586738295871416, iteration: 300054
loss: 1.0597230195999146,grad_norm: 0.9999992461793114, iteration: 300055
loss: 0.9985518455505371,grad_norm: 0.8084701176895607, iteration: 300056
loss: 1.0224632024765015,grad_norm: 0.983547796043302, iteration: 300057
loss: 1.00528085231781,grad_norm: 0.9059498447160383, iteration: 300058
loss: 1.012908697128296,grad_norm: 0.9999990814974382, iteration: 300059
loss: 0.9952266216278076,grad_norm: 0.7939350766396658, iteration: 300060
loss: 0.9952898621559143,grad_norm: 0.89670759238077, iteration: 300061
loss: 1.002498745918274,grad_norm: 0.9040925460171999, iteration: 300062
loss: 0.9821683168411255,grad_norm: 0.6940893690198592, iteration: 300063
loss: 0.9932746291160583,grad_norm: 0.9999991474645334, iteration: 300064
loss: 1.0112019777297974,grad_norm: 0.7222970186589716, iteration: 300065
loss: 0.9598291516304016,grad_norm: 0.7210740646863687, iteration: 300066
loss: 0.9870836138725281,grad_norm: 0.8718175918110289, iteration: 300067
loss: 1.0577071905136108,grad_norm: 0.8375562457654057, iteration: 300068
loss: 0.9647939801216125,grad_norm: 0.7265139331749904, iteration: 300069
loss: 1.0088120698928833,grad_norm: 0.8953373105547814, iteration: 300070
loss: 1.0094857215881348,grad_norm: 0.7213421842018066, iteration: 300071
loss: 1.0534218549728394,grad_norm: 0.910545471473482, iteration: 300072
loss: 1.001539707183838,grad_norm: 0.9062174356586493, iteration: 300073
loss: 0.9831616878509521,grad_norm: 0.8070644862542304, iteration: 300074
loss: 1.0317840576171875,grad_norm: 0.9426556605884844, iteration: 300075
loss: 0.9733129739761353,grad_norm: 0.9148897995398146, iteration: 300076
loss: 1.061165690422058,grad_norm: 0.9999990908757848, iteration: 300077
loss: 0.9841790795326233,grad_norm: 0.8486358858491397, iteration: 300078
loss: 0.9940284490585327,grad_norm: 0.7363514217840985, iteration: 300079
loss: 0.9574947357177734,grad_norm: 0.9906442443156419, iteration: 300080
loss: 1.0246179103851318,grad_norm: 0.9999992996906429, iteration: 300081
loss: 0.9863284826278687,grad_norm: 0.902572306550819, iteration: 300082
loss: 1.1168652772903442,grad_norm: 0.9999998663074364, iteration: 300083
loss: 1.0210752487182617,grad_norm: 0.6500129762092518, iteration: 300084
loss: 0.9556522369384766,grad_norm: 0.9101122286018399, iteration: 300085
loss: 0.9880615472793579,grad_norm: 0.9728056452314141, iteration: 300086
loss: 1.0084574222564697,grad_norm: 0.8741130158469476, iteration: 300087
loss: 0.9673820734024048,grad_norm: 0.8429490348443469, iteration: 300088
loss: 0.9984753727912903,grad_norm: 0.7030044414313564, iteration: 300089
loss: 1.0010795593261719,grad_norm: 0.8688385138828079, iteration: 300090
loss: 1.0263680219650269,grad_norm: 0.8592450161324107, iteration: 300091
loss: 1.0065771341323853,grad_norm: 0.8388676625584437, iteration: 300092
loss: 1.0287039279937744,grad_norm: 0.9518928781385279, iteration: 300093
loss: 0.9789828062057495,grad_norm: 0.8683814691520827, iteration: 300094
loss: 1.0023452043533325,grad_norm: 0.7900073923698325, iteration: 300095
loss: 0.9677383899688721,grad_norm: 0.7872807211091721, iteration: 300096
loss: 0.9745141267776489,grad_norm: 0.8912642292842344, iteration: 300097
loss: 1.0855871438980103,grad_norm: 0.9999992290481381, iteration: 300098
loss: 1.0032974481582642,grad_norm: 0.9482852462079061, iteration: 300099
loss: 1.0008509159088135,grad_norm: 0.8839539101328681, iteration: 300100
loss: 1.0043468475341797,grad_norm: 0.7191769756999089, iteration: 300101
loss: 1.0046614408493042,grad_norm: 0.8228197126615537, iteration: 300102
loss: 1.0009839534759521,grad_norm: 0.9999991180529171, iteration: 300103
loss: 0.9716097116470337,grad_norm: 0.908164956331542, iteration: 300104
loss: 1.0288374423980713,grad_norm: 0.9999990462952906, iteration: 300105
loss: 0.9945278763771057,grad_norm: 0.9999999239945069, iteration: 300106
loss: 0.9962524771690369,grad_norm: 0.99999901825285, iteration: 300107
loss: 1.0166488885879517,grad_norm: 0.9300380054560236, iteration: 300108
loss: 1.0225918292999268,grad_norm: 0.9999997526600355, iteration: 300109
loss: 1.0242137908935547,grad_norm: 0.7650343162920794, iteration: 300110
loss: 1.0089493989944458,grad_norm: 0.9409887176739921, iteration: 300111
loss: 1.0082099437713623,grad_norm: 0.7398413631490159, iteration: 300112
loss: 0.9907301068305969,grad_norm: 0.7546688480284107, iteration: 300113
loss: 1.0081472396850586,grad_norm: 0.7357603303390181, iteration: 300114
loss: 1.0102078914642334,grad_norm: 0.7809911932121798, iteration: 300115
loss: 0.9919068813323975,grad_norm: 0.7560263222186931, iteration: 300116
loss: 0.9782391786575317,grad_norm: 0.7999614106309118, iteration: 300117
loss: 0.9857400059700012,grad_norm: 0.8851970647823989, iteration: 300118
loss: 0.9981984496116638,grad_norm: 0.9000130708459362, iteration: 300119
loss: 1.00142502784729,grad_norm: 0.8829048528516832, iteration: 300120
loss: 0.9861447811126709,grad_norm: 0.8090611951676856, iteration: 300121
loss: 1.0247164964675903,grad_norm: 0.9264128422869145, iteration: 300122
loss: 1.0032026767730713,grad_norm: 0.9498262981804148, iteration: 300123
loss: 0.9993866682052612,grad_norm: 0.8936564425097099, iteration: 300124
loss: 1.0370100736618042,grad_norm: 0.8754678252715667, iteration: 300125
loss: 1.028572678565979,grad_norm: 0.9405941606456902, iteration: 300126
loss: 1.0182712078094482,grad_norm: 0.7553019098539493, iteration: 300127
loss: 1.0184437036514282,grad_norm: 0.8073551589234297, iteration: 300128
loss: 0.9472250938415527,grad_norm: 0.9999990826170359, iteration: 300129
loss: 1.0058618783950806,grad_norm: 0.9258139495810302, iteration: 300130
loss: 1.0134588479995728,grad_norm: 0.9101733724422689, iteration: 300131
loss: 0.9874581098556519,grad_norm: 0.9999990145375278, iteration: 300132
loss: 0.9936680793762207,grad_norm: 0.8053069120876862, iteration: 300133
loss: 0.9852047562599182,grad_norm: 0.8935664459520021, iteration: 300134
loss: 0.9909121990203857,grad_norm: 0.8011532884442599, iteration: 300135
loss: 1.0145938396453857,grad_norm: 0.9020789336545465, iteration: 300136
loss: 1.023638129234314,grad_norm: 0.92912418581658, iteration: 300137
loss: 1.2095015048980713,grad_norm: 0.9999997044078487, iteration: 300138
loss: 1.003291368484497,grad_norm: 0.9565769107621102, iteration: 300139
loss: 1.0075342655181885,grad_norm: 0.8952312456113075, iteration: 300140
loss: 1.0097821950912476,grad_norm: 0.9999992508861114, iteration: 300141
loss: 0.982049286365509,grad_norm: 0.8214830972833769, iteration: 300142
loss: 0.9665219783782959,grad_norm: 0.8194825446000205, iteration: 300143
loss: 1.0271767377853394,grad_norm: 0.7868522712839133, iteration: 300144
loss: 0.9960936307907104,grad_norm: 0.8548162604924627, iteration: 300145
loss: 0.987945020198822,grad_norm: 0.779305209969815, iteration: 300146
loss: 0.9835382699966431,grad_norm: 0.8841558121513206, iteration: 300147
loss: 0.981313943862915,grad_norm: 0.824785377612347, iteration: 300148
loss: 0.9985988140106201,grad_norm: 0.785996988527706, iteration: 300149
loss: 0.9638850092887878,grad_norm: 0.9565757096763027, iteration: 300150
loss: 1.0173619985580444,grad_norm: 0.8673601104014355, iteration: 300151
loss: 0.9601215720176697,grad_norm: 0.7844172691251395, iteration: 300152
loss: 1.0166511535644531,grad_norm: 0.8891471796320188, iteration: 300153
loss: 1.0232336521148682,grad_norm: 0.8548399244711472, iteration: 300154
loss: 1.0238412618637085,grad_norm: 0.9999991502004774, iteration: 300155
loss: 0.9748977422714233,grad_norm: 0.7751119806441874, iteration: 300156
loss: 0.9916191697120667,grad_norm: 0.857266720496789, iteration: 300157
loss: 0.9875786900520325,grad_norm: 0.9999989829839095, iteration: 300158
loss: 0.9856322407722473,grad_norm: 0.8743269394919744, iteration: 300159
loss: 1.0070383548736572,grad_norm: 0.8289615374364764, iteration: 300160
loss: 1.0121166706085205,grad_norm: 0.999999534237978, iteration: 300161
loss: 0.9945210814476013,grad_norm: 0.8764555012649078, iteration: 300162
loss: 0.9972033500671387,grad_norm: 0.9999990826501253, iteration: 300163
loss: 1.0267246961593628,grad_norm: 0.9999995134943053, iteration: 300164
loss: 0.9931713938713074,grad_norm: 0.9870707484584134, iteration: 300165
loss: 1.029260277748108,grad_norm: 0.9024568735322357, iteration: 300166
loss: 1.0313562154769897,grad_norm: 0.884994914436866, iteration: 300167
loss: 0.9992179870605469,grad_norm: 0.8319474228290412, iteration: 300168
loss: 0.9936544299125671,grad_norm: 0.7651435991199511, iteration: 300169
loss: 0.993351936340332,grad_norm: 0.7133770839594985, iteration: 300170
loss: 1.0261335372924805,grad_norm: 0.7704419365175742, iteration: 300171
loss: 1.004892110824585,grad_norm: 0.9560579012693586, iteration: 300172
loss: 0.9966027736663818,grad_norm: 0.9838132676927365, iteration: 300173
loss: 0.9912833571434021,grad_norm: 0.9425057699962692, iteration: 300174
loss: 1.009528636932373,grad_norm: 0.9588158700794308, iteration: 300175
loss: 0.9684341549873352,grad_norm: 0.9711295826527733, iteration: 300176
loss: 1.0200077295303345,grad_norm: 0.8352772361289049, iteration: 300177
loss: 1.01076078414917,grad_norm: 0.7625384345867956, iteration: 300178
loss: 1.0141817331314087,grad_norm: 0.7672985422576601, iteration: 300179
loss: 1.0004889965057373,grad_norm: 0.7495130913371715, iteration: 300180
loss: 1.0097373723983765,grad_norm: 0.8831726023761617, iteration: 300181
loss: 1.0056822299957275,grad_norm: 0.8588340300909245, iteration: 300182
loss: 1.0131561756134033,grad_norm: 0.816347172739687, iteration: 300183
loss: 1.0475795269012451,grad_norm: 0.9999993473365039, iteration: 300184
loss: 1.007056474685669,grad_norm: 0.784280691706627, iteration: 300185
loss: 0.9986574053764343,grad_norm: 0.7639409092546406, iteration: 300186
loss: 0.9951849579811096,grad_norm: 0.9049766196223391, iteration: 300187
loss: 0.9777593612670898,grad_norm: 0.9329640482246176, iteration: 300188
loss: 1.0330604314804077,grad_norm: 0.9999992288534488, iteration: 300189
loss: 1.0210769176483154,grad_norm: 0.8833256477013921, iteration: 300190
loss: 1.008184790611267,grad_norm: 0.7564896334476923, iteration: 300191
loss: 0.9778867363929749,grad_norm: 0.7808269370483685, iteration: 300192
loss: 1.013942837715149,grad_norm: 0.9999991137334526, iteration: 300193
loss: 1.0069512128829956,grad_norm: 0.7125514956439494, iteration: 300194
loss: 0.9830058813095093,grad_norm: 0.8499317731272596, iteration: 300195
loss: 1.0029296875,grad_norm: 0.8166626732088949, iteration: 300196
loss: 0.9806815981864929,grad_norm: 0.8711720053252395, iteration: 300197
loss: 0.9968356490135193,grad_norm: 0.8227766007727574, iteration: 300198
loss: 0.9503771662712097,grad_norm: 0.7905984835702642, iteration: 300199
loss: 1.0378786325454712,grad_norm: 0.7865208493895539, iteration: 300200
loss: 1.0356024503707886,grad_norm: 0.8476663280906263, iteration: 300201
loss: 1.013114333152771,grad_norm: 0.9119129185186542, iteration: 300202
loss: 0.983280599117279,grad_norm: 0.7523646883394032, iteration: 300203
loss: 1.0628135204315186,grad_norm: 0.9999998244445751, iteration: 300204
loss: 1.0111547708511353,grad_norm: 0.7440664776146603, iteration: 300205
loss: 1.0256768465042114,grad_norm: 0.9999999011841558, iteration: 300206
loss: 0.9994981288909912,grad_norm: 0.8103521627183109, iteration: 300207
loss: 1.036726951599121,grad_norm: 0.8694318051560626, iteration: 300208
loss: 0.9750387072563171,grad_norm: 0.7185624957222958, iteration: 300209
loss: 1.017053246498108,grad_norm: 0.7132864764859572, iteration: 300210
loss: 1.0211548805236816,grad_norm: 0.9349810788331153, iteration: 300211
loss: 1.0029505491256714,grad_norm: 0.8419155800007196, iteration: 300212
loss: 0.995366096496582,grad_norm: 0.9165609622325547, iteration: 300213
loss: 0.9854703545570374,grad_norm: 0.7569842920598423, iteration: 300214
loss: 1.0053737163543701,grad_norm: 0.9884691899460446, iteration: 300215
loss: 0.9826796650886536,grad_norm: 0.9999991508878445, iteration: 300216
loss: 1.021573543548584,grad_norm: 0.9489259547015517, iteration: 300217
loss: 1.0077784061431885,grad_norm: 0.9999990603590463, iteration: 300218
loss: 1.1104893684387207,grad_norm: 0.9999998459257706, iteration: 300219
loss: 1.0407711267471313,grad_norm: 0.9999991773696915, iteration: 300220
loss: 0.9929590821266174,grad_norm: 0.9052931886219783, iteration: 300221
loss: 1.0172384977340698,grad_norm: 0.9999991790864867, iteration: 300222
loss: 0.9808509349822998,grad_norm: 0.8367679136644814, iteration: 300223
loss: 1.0066460371017456,grad_norm: 0.9530148852214522, iteration: 300224
loss: 1.020668864250183,grad_norm: 0.9612652871450253, iteration: 300225
loss: 1.0074293613433838,grad_norm: 0.733310077409694, iteration: 300226
loss: 1.1099570989608765,grad_norm: 0.9999991554875308, iteration: 300227
loss: 1.0016494989395142,grad_norm: 0.827297921025369, iteration: 300228
loss: 1.0090097188949585,grad_norm: 0.9435171949632697, iteration: 300229
loss: 1.010815143585205,grad_norm: 0.9050057150965085, iteration: 300230
loss: 1.0604859590530396,grad_norm: 0.9999990575136546, iteration: 300231
loss: 0.9996258616447449,grad_norm: 0.8816887398365474, iteration: 300232
loss: 0.9652391076087952,grad_norm: 0.7658563683687091, iteration: 300233
loss: 1.015952229499817,grad_norm: 0.91294694160552, iteration: 300234
loss: 0.9901738166809082,grad_norm: 0.9999992096726642, iteration: 300235
loss: 0.9868667125701904,grad_norm: 0.7346587751192163, iteration: 300236
loss: 0.993154764175415,grad_norm: 0.8418082005386963, iteration: 300237
loss: 1.03635835647583,grad_norm: 0.7863395749902109, iteration: 300238
loss: 1.0206682682037354,grad_norm: 0.7597790864288729, iteration: 300239
loss: 0.9914950728416443,grad_norm: 0.912960142629524, iteration: 300240
loss: 1.0090734958648682,grad_norm: 0.999999026789722, iteration: 300241
loss: 1.0606839656829834,grad_norm: 0.9580642537112417, iteration: 300242
loss: 0.9930989146232605,grad_norm: 0.7969565132029856, iteration: 300243
loss: 0.9892740249633789,grad_norm: 0.7253398184054201, iteration: 300244
loss: 1.1834708452224731,grad_norm: 0.999999277108081, iteration: 300245
loss: 1.0000742673873901,grad_norm: 0.8663504364238185, iteration: 300246
loss: 1.0149989128112793,grad_norm: 0.8428616661123114, iteration: 300247
loss: 1.0071786642074585,grad_norm: 0.7573519324139142, iteration: 300248
loss: 1.0199209451675415,grad_norm: 0.9999990938903879, iteration: 300249
loss: 0.9978528022766113,grad_norm: 0.8666851149575732, iteration: 300250
loss: 0.989314615726471,grad_norm: 0.9999991055124288, iteration: 300251
loss: 0.9710533022880554,grad_norm: 0.960784646352051, iteration: 300252
loss: 0.9915838241577148,grad_norm: 0.9065558881986372, iteration: 300253
loss: 1.0439523458480835,grad_norm: 0.9796595769953242, iteration: 300254
loss: 1.0040639638900757,grad_norm: 0.692298440781123, iteration: 300255
loss: 0.9945853352546692,grad_norm: 0.8009441543487821, iteration: 300256
loss: 1.0032806396484375,grad_norm: 0.8355042252736243, iteration: 300257
loss: 1.0095369815826416,grad_norm: 0.928592884605908, iteration: 300258
loss: 0.971677839756012,grad_norm: 0.7525438480784551, iteration: 300259
loss: 0.9856913089752197,grad_norm: 0.7453934421059346, iteration: 300260
loss: 0.9693644046783447,grad_norm: 0.7559549859867186, iteration: 300261
loss: 1.0132383108139038,grad_norm: 0.8832944577813825, iteration: 300262
loss: 0.9806481003761292,grad_norm: 0.9199256394753801, iteration: 300263
loss: 1.0674052238464355,grad_norm: 0.9999991567079966, iteration: 300264
loss: 0.9846168160438538,grad_norm: 0.9740611225794099, iteration: 300265
loss: 0.9797543883323669,grad_norm: 0.8255321762103096, iteration: 300266
loss: 1.0535613298416138,grad_norm: 0.9022426604893355, iteration: 300267
loss: 1.0001931190490723,grad_norm: 0.999999156948292, iteration: 300268
loss: 1.0329926013946533,grad_norm: 0.8201401095332186, iteration: 300269
loss: 1.0128378868103027,grad_norm: 0.8893824071205236, iteration: 300270
loss: 0.9746145009994507,grad_norm: 0.8680105143262627, iteration: 300271
loss: 0.9892153739929199,grad_norm: 0.9999991599740873, iteration: 300272
loss: 1.0105620622634888,grad_norm: 0.8569336980003286, iteration: 300273
loss: 0.9917468428611755,grad_norm: 0.7716183615505131, iteration: 300274
loss: 0.9833106398582458,grad_norm: 0.8895612639188216, iteration: 300275
loss: 1.0067163705825806,grad_norm: 0.8779132858026711, iteration: 300276
loss: 1.0070679187774658,grad_norm: 0.9999990449099616, iteration: 300277
loss: 1.0194810628890991,grad_norm: 0.6908296213863592, iteration: 300278
loss: 0.9795182943344116,grad_norm: 0.9574569124762184, iteration: 300279
loss: 0.9621846675872803,grad_norm: 0.772079730691682, iteration: 300280
loss: 0.9969707131385803,grad_norm: 0.9782860162257548, iteration: 300281
loss: 0.985791802406311,grad_norm: 0.8411546979111965, iteration: 300282
loss: 1.0052080154418945,grad_norm: 0.8520718129764742, iteration: 300283
loss: 1.0329500436782837,grad_norm: 0.8072465482546345, iteration: 300284
loss: 0.9872771501541138,grad_norm: 0.7739233655511648, iteration: 300285
loss: 0.9895696043968201,grad_norm: 0.7827178846505399, iteration: 300286
loss: 1.0067907571792603,grad_norm: 0.6669480570370796, iteration: 300287
loss: 1.0213674306869507,grad_norm: 0.8457108138505237, iteration: 300288
loss: 1.0377479791641235,grad_norm: 0.9999998346511042, iteration: 300289
loss: 0.9887354969978333,grad_norm: 0.8349350933285676, iteration: 300290
loss: 0.9836885333061218,grad_norm: 0.8394547654859345, iteration: 300291
loss: 1.0261958837509155,grad_norm: 0.9999990960239017, iteration: 300292
loss: 1.0085361003875732,grad_norm: 0.9999991125852935, iteration: 300293
loss: 1.0064653158187866,grad_norm: 0.8175070640665039, iteration: 300294
loss: 1.0142607688903809,grad_norm: 0.999999734767621, iteration: 300295
loss: 0.959091305732727,grad_norm: 0.8115595007645485, iteration: 300296
loss: 1.005041480064392,grad_norm: 0.9999990822528863, iteration: 300297
loss: 1.0722987651824951,grad_norm: 0.9999992751706829, iteration: 300298
loss: 0.9884233474731445,grad_norm: 0.7390911452940807, iteration: 300299
loss: 1.0037809610366821,grad_norm: 0.791914035503427, iteration: 300300
loss: 0.9778444766998291,grad_norm: 0.9999989900722712, iteration: 300301
loss: 1.0286481380462646,grad_norm: 0.7543277401629293, iteration: 300302
loss: 1.0086534023284912,grad_norm: 0.9686025096293022, iteration: 300303
loss: 0.9566449522972107,grad_norm: 0.7257798735600035, iteration: 300304
loss: 0.9655863642692566,grad_norm: 0.764119599447516, iteration: 300305
loss: 1.0215805768966675,grad_norm: 0.7623054413424111, iteration: 300306
loss: 1.0144836902618408,grad_norm: 0.8820013408971977, iteration: 300307
loss: 1.0217432975769043,grad_norm: 0.8934516318460723, iteration: 300308
loss: 0.9890925288200378,grad_norm: 0.850169143522298, iteration: 300309
loss: 1.0157051086425781,grad_norm: 0.7144779326966506, iteration: 300310
loss: 0.9712021946907043,grad_norm: 0.8656465657601763, iteration: 300311
loss: 0.9892938137054443,grad_norm: 0.889619998150586, iteration: 300312
loss: 1.004923939704895,grad_norm: 0.9321505886924913, iteration: 300313
loss: 0.9907152652740479,grad_norm: 0.7560839298404943, iteration: 300314
loss: 0.9786714911460876,grad_norm: 0.744067592209006, iteration: 300315
loss: 0.9843611121177673,grad_norm: 0.8529915069496954, iteration: 300316
loss: 1.0159555673599243,grad_norm: 0.7582772756922201, iteration: 300317
loss: 1.0569758415222168,grad_norm: 0.9999990972691788, iteration: 300318
loss: 0.9728088974952698,grad_norm: 0.8204630580684145, iteration: 300319
loss: 0.994629979133606,grad_norm: 0.8237070629363666, iteration: 300320
loss: 1.0141812562942505,grad_norm: 0.8951091485353265, iteration: 300321
loss: 0.9890456795692444,grad_norm: 0.8393236078442766, iteration: 300322
loss: 1.0103397369384766,grad_norm: 0.7436234530843336, iteration: 300323
loss: 1.0014691352844238,grad_norm: 0.8623824972023351, iteration: 300324
loss: 0.9868395328521729,grad_norm: 0.7539046141362864, iteration: 300325
loss: 1.0227123498916626,grad_norm: 0.999999599614615, iteration: 300326
loss: 0.9618258476257324,grad_norm: 0.6929546469863633, iteration: 300327
loss: 1.0120407342910767,grad_norm: 0.7324587098484125, iteration: 300328
loss: 0.9970266222953796,grad_norm: 0.8514706803389965, iteration: 300329
loss: 1.0014374256134033,grad_norm: 0.8437619270777731, iteration: 300330
loss: 1.0005091428756714,grad_norm: 0.956060583697669, iteration: 300331
loss: 0.9917107224464417,grad_norm: 0.9249768608618701, iteration: 300332
loss: 1.0058636665344238,grad_norm: 0.8252383095804221, iteration: 300333
loss: 0.9869607090950012,grad_norm: 0.9999991316303022, iteration: 300334
loss: 0.9786743521690369,grad_norm: 0.8630334731762387, iteration: 300335
loss: 1.0630054473876953,grad_norm: 0.8576268905324924, iteration: 300336
loss: 0.9978886246681213,grad_norm: 0.885794352371386, iteration: 300337
loss: 1.0613102912902832,grad_norm: 0.9999998286982501, iteration: 300338
loss: 1.0150114297866821,grad_norm: 0.868986392915836, iteration: 300339
loss: 1.0122876167297363,grad_norm: 0.7743825526982385, iteration: 300340
loss: 0.9829459190368652,grad_norm: 0.8669784490671872, iteration: 300341
loss: 0.9803289771080017,grad_norm: 0.8910833053467658, iteration: 300342
loss: 0.9584512710571289,grad_norm: 0.7993009589683054, iteration: 300343
loss: 0.9762877821922302,grad_norm: 0.9999992240998414, iteration: 300344
loss: 0.985396683216095,grad_norm: 0.9032998619564198, iteration: 300345
loss: 0.9755900502204895,grad_norm: 0.9999990746364478, iteration: 300346
loss: 1.0112665891647339,grad_norm: 0.7298327470498771, iteration: 300347
loss: 0.9984812140464783,grad_norm: 0.7890346993222903, iteration: 300348
loss: 1.0031044483184814,grad_norm: 0.7568988710423159, iteration: 300349
loss: 1.0397803783416748,grad_norm: 0.9999990967851401, iteration: 300350
loss: 1.0318759679794312,grad_norm: 0.9454202248317343, iteration: 300351
loss: 1.0172971487045288,grad_norm: 0.759050413409728, iteration: 300352
loss: 1.0104808807373047,grad_norm: 0.9831837732564614, iteration: 300353
loss: 0.9933929443359375,grad_norm: 0.9999990485731166, iteration: 300354
loss: 0.9631220102310181,grad_norm: 0.8155426407489961, iteration: 300355
loss: 0.9824920296669006,grad_norm: 0.7923229858755246, iteration: 300356
loss: 1.021612286567688,grad_norm: 0.7342934674223902, iteration: 300357
loss: 0.996034562587738,grad_norm: 0.8733238631153997, iteration: 300358
loss: 0.9928961992263794,grad_norm: 0.9467839104076938, iteration: 300359
loss: 0.9656680822372437,grad_norm: 0.9396516339280518, iteration: 300360
loss: 0.9878498911857605,grad_norm: 0.9006707886726538, iteration: 300361
loss: 1.0029962062835693,grad_norm: 0.8088799604430088, iteration: 300362
loss: 0.9847681522369385,grad_norm: 0.7531784470205972, iteration: 300363
loss: 1.006242275238037,grad_norm: 0.9266332888513462, iteration: 300364
loss: 1.0073343515396118,grad_norm: 0.8850957785809491, iteration: 300365
loss: 1.001819133758545,grad_norm: 0.9037024138698494, iteration: 300366
loss: 1.0071524381637573,grad_norm: 0.9531635479584517, iteration: 300367
loss: 1.0387022495269775,grad_norm: 0.9999994040850796, iteration: 300368
loss: 1.0027906894683838,grad_norm: 0.9684445656012564, iteration: 300369
loss: 0.9913323521614075,grad_norm: 0.977776838394933, iteration: 300370
loss: 1.030357837677002,grad_norm: 0.9999998824830958, iteration: 300371
loss: 1.0371218919754028,grad_norm: 0.8630453382592532, iteration: 300372
loss: 1.0172550678253174,grad_norm: 0.9257151573947268, iteration: 300373
loss: 1.0068684816360474,grad_norm: 0.9202234369242244, iteration: 300374
loss: 0.9929829835891724,grad_norm: 0.91741467060668, iteration: 300375
loss: 0.9850909113883972,grad_norm: 0.855744229338994, iteration: 300376
loss: 0.981307327747345,grad_norm: 0.7902314185262294, iteration: 300377
loss: 0.9831241369247437,grad_norm: 0.9999991138453558, iteration: 300378
loss: 1.068774700164795,grad_norm: 0.8552269277239056, iteration: 300379
loss: 1.0258928537368774,grad_norm: 0.8329797296480893, iteration: 300380
loss: 1.0597716569900513,grad_norm: 0.9007302479628618, iteration: 300381
loss: 0.9867508411407471,grad_norm: 0.8297419805221246, iteration: 300382
loss: 1.0362681150436401,grad_norm: 0.940735958637969, iteration: 300383
loss: 0.9919077754020691,grad_norm: 0.9240167986161392, iteration: 300384
loss: 1.006374478340149,grad_norm: 0.8925188081217474, iteration: 300385
loss: 1.009285807609558,grad_norm: 0.8613175830563341, iteration: 300386
loss: 1.004007339477539,grad_norm: 0.7759660657376339, iteration: 300387
loss: 0.9963635206222534,grad_norm: 0.7022766829284018, iteration: 300388
loss: 0.9919515252113342,grad_norm: 0.8876141714348154, iteration: 300389
loss: 0.9362688660621643,grad_norm: 0.825149449552022, iteration: 300390
loss: 1.0019160509109497,grad_norm: 0.7339818516924973, iteration: 300391
loss: 0.9975941777229309,grad_norm: 0.6996928891467589, iteration: 300392
loss: 0.9831647872924805,grad_norm: 0.7892383463507167, iteration: 300393
loss: 0.9820663928985596,grad_norm: 0.9271367460734402, iteration: 300394
loss: 1.0209015607833862,grad_norm: 0.97990350554773, iteration: 300395
loss: 0.9891746044158936,grad_norm: 0.910026136324993, iteration: 300396
loss: 0.9848214387893677,grad_norm: 0.9891068804943696, iteration: 300397
loss: 0.9950282573699951,grad_norm: 0.9999999012168688, iteration: 300398
loss: 1.0031861066818237,grad_norm: 0.9999991299045458, iteration: 300399
loss: 0.9830344915390015,grad_norm: 0.7916942568354802, iteration: 300400
loss: 0.9775422811508179,grad_norm: 0.9596439745640395, iteration: 300401
loss: 1.0196157693862915,grad_norm: 0.8022270405591355, iteration: 300402
loss: 1.0050936937332153,grad_norm: 0.7838149135198546, iteration: 300403
loss: 0.9853689074516296,grad_norm: 0.8504670291518056, iteration: 300404
loss: 1.047879934310913,grad_norm: 0.8753808420959314, iteration: 300405
loss: 0.9713692665100098,grad_norm: 0.815858430943839, iteration: 300406
loss: 1.0298380851745605,grad_norm: 0.9168138598005678, iteration: 300407
loss: 1.0053836107254028,grad_norm: 0.9457891653998401, iteration: 300408
loss: 0.9950938820838928,grad_norm: 0.8235306497056505, iteration: 300409
loss: 1.0028672218322754,grad_norm: 0.9244658575658625, iteration: 300410
loss: 0.9926316142082214,grad_norm: 0.8095112712793444, iteration: 300411
loss: 0.9932803511619568,grad_norm: 0.9638908405370944, iteration: 300412
loss: 0.976571261882782,grad_norm: 0.823686278100334, iteration: 300413
loss: 0.9904283285140991,grad_norm: 0.7508277739050319, iteration: 300414
loss: 0.9960012435913086,grad_norm: 0.8788601987161102, iteration: 300415
loss: 0.9754501581192017,grad_norm: 0.8274962935218297, iteration: 300416
loss: 1.0068788528442383,grad_norm: 0.8801650549280093, iteration: 300417
loss: 0.9984537363052368,grad_norm: 0.6590996025366248, iteration: 300418
loss: 1.0106806755065918,grad_norm: 0.8896384058770138, iteration: 300419
loss: 1.0363264083862305,grad_norm: 0.7590349436369671, iteration: 300420
loss: 1.0198431015014648,grad_norm: 0.7101513259651707, iteration: 300421
loss: 1.0841186046600342,grad_norm: 0.9999993075094674, iteration: 300422
loss: 0.9911895394325256,grad_norm: 0.75230600785433, iteration: 300423
loss: 1.0198817253112793,grad_norm: 0.9248071186625425, iteration: 300424
loss: 0.9881277084350586,grad_norm: 0.8618549691130534, iteration: 300425
loss: 1.0045074224472046,grad_norm: 0.9999991832327091, iteration: 300426
loss: 1.0041801929473877,grad_norm: 0.8394232518162895, iteration: 300427
loss: 0.9838380813598633,grad_norm: 0.7390452267628118, iteration: 300428
loss: 1.0414133071899414,grad_norm: 0.8917373063941254, iteration: 300429
loss: 0.9869791269302368,grad_norm: 0.806289355969424, iteration: 300430
loss: 0.9826854467391968,grad_norm: 0.8000852128930862, iteration: 300431
loss: 1.0059722661972046,grad_norm: 0.8480612400850904, iteration: 300432
loss: 1.013452410697937,grad_norm: 0.9999991855155064, iteration: 300433
loss: 0.9557974934577942,grad_norm: 0.9118548268708101, iteration: 300434
loss: 1.224622368812561,grad_norm: 1.000000013026492, iteration: 300435
loss: 0.9976028203964233,grad_norm: 0.9568829110442675, iteration: 300436
loss: 0.9527192115783691,grad_norm: 0.8398032164201841, iteration: 300437
loss: 0.9840078949928284,grad_norm: 0.7803562370330983, iteration: 300438
loss: 0.9716880917549133,grad_norm: 0.7722004079305917, iteration: 300439
loss: 0.9966780543327332,grad_norm: 0.6695200291107407, iteration: 300440
loss: 0.9745770692825317,grad_norm: 0.8947369395641667, iteration: 300441
loss: 1.0033915042877197,grad_norm: 0.9314501325779876, iteration: 300442
loss: 0.994488000869751,grad_norm: 0.9061535090449749, iteration: 300443
loss: 1.0710681676864624,grad_norm: 0.9361854874755625, iteration: 300444
loss: 0.9787652492523193,grad_norm: 0.999998962951436, iteration: 300445
loss: 1.0108546018600464,grad_norm: 0.7828845345260658, iteration: 300446
loss: 1.0300822257995605,grad_norm: 0.9783763421048322, iteration: 300447
loss: 0.9839856028556824,grad_norm: 0.903378997958119, iteration: 300448
loss: 1.0047703981399536,grad_norm: 0.9092731102532811, iteration: 300449
loss: 1.119755506515503,grad_norm: 0.999999342161735, iteration: 300450
loss: 1.0234875679016113,grad_norm: 0.7352967829589522, iteration: 300451
loss: 0.9962266087532043,grad_norm: 0.824983616323486, iteration: 300452
loss: 0.9907436370849609,grad_norm: 0.9637999565315108, iteration: 300453
loss: 1.0804141759872437,grad_norm: 0.9999991979225853, iteration: 300454
loss: 1.0119805335998535,grad_norm: 0.9999991309265707, iteration: 300455
loss: 0.9611061811447144,grad_norm: 0.8623795317796398, iteration: 300456
loss: 1.072403073310852,grad_norm: 0.9373963692696741, iteration: 300457
loss: 1.0109997987747192,grad_norm: 0.7665656939515685, iteration: 300458
loss: 1.0122148990631104,grad_norm: 0.9853085267103918, iteration: 300459
loss: 0.9896835088729858,grad_norm: 0.8692477684160005, iteration: 300460
loss: 0.9692888855934143,grad_norm: 0.7113662348558599, iteration: 300461
loss: 1.0200787782669067,grad_norm: 0.8358580229279906, iteration: 300462
loss: 0.9895551204681396,grad_norm: 0.8078317559685255, iteration: 300463
loss: 0.9907804727554321,grad_norm: 0.9999989908003538, iteration: 300464
loss: 0.9766513109207153,grad_norm: 0.8936199875821657, iteration: 300465
loss: 0.976880669593811,grad_norm: 0.875471181446282, iteration: 300466
loss: 1.0434626340866089,grad_norm: 0.9999996805557249, iteration: 300467
loss: 1.014036774635315,grad_norm: 0.9999998734040615, iteration: 300468
loss: 0.9614276885986328,grad_norm: 0.7607860365903978, iteration: 300469
loss: 0.9854201674461365,grad_norm: 0.8410030651689603, iteration: 300470
loss: 1.0078004598617554,grad_norm: 0.9999991232511765, iteration: 300471
loss: 1.0153379440307617,grad_norm: 0.7713792631533731, iteration: 300472
loss: 1.0020087957382202,grad_norm: 0.8926624068298931, iteration: 300473
loss: 0.9705721139907837,grad_norm: 0.8973065657417177, iteration: 300474
loss: 1.0680725574493408,grad_norm: 0.8537188847223268, iteration: 300475
loss: 1.1114798784255981,grad_norm: 0.8533924343843007, iteration: 300476
loss: 1.0065512657165527,grad_norm: 0.9955915241827332, iteration: 300477
loss: 0.9832034111022949,grad_norm: 0.9999992253709564, iteration: 300478
loss: 0.9951213002204895,grad_norm: 0.7781055519801356, iteration: 300479
loss: 0.9854074716567993,grad_norm: 0.818727992837198, iteration: 300480
loss: 0.9948791861534119,grad_norm: 0.835770355952769, iteration: 300481
loss: 1.0262449979782104,grad_norm: 0.8587046881129233, iteration: 300482
loss: 0.9843966960906982,grad_norm: 0.6638327971377821, iteration: 300483
loss: 0.9984532594680786,grad_norm: 0.9146488839496476, iteration: 300484
loss: 0.9816867113113403,grad_norm: 0.7854142541970874, iteration: 300485
loss: 0.9900888204574585,grad_norm: 0.8461207496940084, iteration: 300486
loss: 0.9965329766273499,grad_norm: 0.8857380755150273, iteration: 300487
loss: 0.9978033900260925,grad_norm: 0.894475723758379, iteration: 300488
loss: 0.990287721157074,grad_norm: 0.6477917137245782, iteration: 300489
loss: 1.0031676292419434,grad_norm: 0.823963354655269, iteration: 300490
loss: 0.9573051333427429,grad_norm: 0.7462884311373721, iteration: 300491
loss: 1.0010454654693604,grad_norm: 0.7321238766128885, iteration: 300492
loss: 1.0368489027023315,grad_norm: 0.8894197880179987, iteration: 300493
loss: 1.0279523134231567,grad_norm: 0.9999993654075665, iteration: 300494
loss: 1.028789758682251,grad_norm: 0.8275923237260873, iteration: 300495
loss: 1.0428358316421509,grad_norm: 0.9387123489859426, iteration: 300496
loss: 0.9745506644248962,grad_norm: 0.999999142140719, iteration: 300497
loss: 1.032617211341858,grad_norm: 0.7973307809034247, iteration: 300498
loss: 0.9939542412757874,grad_norm: 0.7299313140341184, iteration: 300499
loss: 1.0362510681152344,grad_norm: 0.7679969302704296, iteration: 300500
loss: 1.0124379396438599,grad_norm: 0.9369866239571125, iteration: 300501
loss: 1.0170108079910278,grad_norm: 0.7887101259239997, iteration: 300502
loss: 0.9796607494354248,grad_norm: 0.8103594116821546, iteration: 300503
loss: 1.0215632915496826,grad_norm: 0.9999996231956215, iteration: 300504
loss: 0.9880126714706421,grad_norm: 0.8436344615952612, iteration: 300505
loss: 0.9916458129882812,grad_norm: 0.849239810430962, iteration: 300506
loss: 0.9844075441360474,grad_norm: 0.7447429648717895, iteration: 300507
loss: 0.9899047613143921,grad_norm: 0.7941408367980669, iteration: 300508
loss: 1.0354509353637695,grad_norm: 0.7741203806167666, iteration: 300509
loss: 1.0207494497299194,grad_norm: 0.8274100492572667, iteration: 300510
loss: 1.010777473449707,grad_norm: 0.8608504158760767, iteration: 300511
loss: 1.038028597831726,grad_norm: 0.999999085500619, iteration: 300512
loss: 0.990585207939148,grad_norm: 0.7565743691047141, iteration: 300513
loss: 0.9862647652626038,grad_norm: 0.9021579403902088, iteration: 300514
loss: 0.9727904796600342,grad_norm: 0.9757911443553918, iteration: 300515
loss: 0.9570449590682983,grad_norm: 0.9270639532036575, iteration: 300516
loss: 1.0142008066177368,grad_norm: 0.7514433388253671, iteration: 300517
loss: 0.9730218052864075,grad_norm: 0.8186648279351134, iteration: 300518
loss: 0.9977919459342957,grad_norm: 0.8269563116127303, iteration: 300519
loss: 0.9819255471229553,grad_norm: 0.9898907595290223, iteration: 300520
loss: 0.9763177037239075,grad_norm: 0.9374943598067788, iteration: 300521
loss: 1.0851958990097046,grad_norm: 0.9951891977563553, iteration: 300522
loss: 0.9723041653633118,grad_norm: 0.722419691388175, iteration: 300523
loss: 1.0210367441177368,grad_norm: 0.8593301999622417, iteration: 300524
loss: 1.0029823780059814,grad_norm: 0.9452406646939145, iteration: 300525
loss: 1.033493161201477,grad_norm: 0.8135543215505525, iteration: 300526
loss: 1.0084595680236816,grad_norm: 0.8220322039206459, iteration: 300527
loss: 0.994270920753479,grad_norm: 0.7223351977358002, iteration: 300528
loss: 0.9959481954574585,grad_norm: 0.7694883136473627, iteration: 300529
loss: 0.99676513671875,grad_norm: 0.8050487024989302, iteration: 300530
loss: 1.001716136932373,grad_norm: 0.9999990923466981, iteration: 300531
loss: 0.961725652217865,grad_norm: 0.9276998660283632, iteration: 300532
loss: 1.0209153890609741,grad_norm: 0.8727160959518738, iteration: 300533
loss: 1.004909634590149,grad_norm: 0.8005903924670558, iteration: 300534
loss: 0.973035454750061,grad_norm: 0.8631905263682765, iteration: 300535
loss: 0.998827338218689,grad_norm: 0.7410728141994968, iteration: 300536
loss: 1.0233385562896729,grad_norm: 0.9999990644199788, iteration: 300537
loss: 0.973980188369751,grad_norm: 0.7501782657138691, iteration: 300538
loss: 0.9995733499526978,grad_norm: 0.9240556327378681, iteration: 300539
loss: 0.9605304598808289,grad_norm: 0.8098001410359019, iteration: 300540
loss: 1.0006155967712402,grad_norm: 0.895987534204762, iteration: 300541
loss: 1.039649248123169,grad_norm: 0.8696403041115874, iteration: 300542
loss: 1.009853482246399,grad_norm: 0.8735913594795607, iteration: 300543
loss: 0.9790716767311096,grad_norm: 0.7920450453648145, iteration: 300544
loss: 1.0047974586486816,grad_norm: 0.9241497431776509, iteration: 300545
loss: 1.0146428346633911,grad_norm: 0.9999991761885494, iteration: 300546
loss: 0.9786748290061951,grad_norm: 0.7252400423008656, iteration: 300547
loss: 1.0272431373596191,grad_norm: 0.9574568405906447, iteration: 300548
loss: 1.0094785690307617,grad_norm: 0.7756308631028034, iteration: 300549
loss: 0.9845142364501953,grad_norm: 0.9218495219417212, iteration: 300550
loss: 1.000282883644104,grad_norm: 0.7467928380458747, iteration: 300551
loss: 0.9627673625946045,grad_norm: 0.8968700523045797, iteration: 300552
loss: 0.998908281326294,grad_norm: 0.8657008854539993, iteration: 300553
loss: 0.9885316491127014,grad_norm: 0.8647393219427155, iteration: 300554
loss: 1.002561330795288,grad_norm: 0.9162107837695757, iteration: 300555
loss: 1.0310719013214111,grad_norm: 0.8060550634587623, iteration: 300556
loss: 0.9963023662567139,grad_norm: 0.8457240537938188, iteration: 300557
loss: 1.0050541162490845,grad_norm: 0.9999990318171162, iteration: 300558
loss: 1.0100505352020264,grad_norm: 0.8474602933660612, iteration: 300559
loss: 0.9725701212882996,grad_norm: 0.9074777869593142, iteration: 300560
loss: 0.9815027713775635,grad_norm: 0.8666687447366328, iteration: 300561
loss: 0.9812819957733154,grad_norm: 0.8962529201889545, iteration: 300562
loss: 1.006961464881897,grad_norm: 0.7909202675738256, iteration: 300563
loss: 1.0207722187042236,grad_norm: 0.8308501932346849, iteration: 300564
loss: 1.000544548034668,grad_norm: 0.8348467375732489, iteration: 300565
loss: 1.0726109743118286,grad_norm: 0.9999995620320898, iteration: 300566
loss: 1.0032289028167725,grad_norm: 0.9871958885909036, iteration: 300567
loss: 0.9915602803230286,grad_norm: 0.9181422028795446, iteration: 300568
loss: 0.9937605857849121,grad_norm: 0.8892599359165201, iteration: 300569
loss: 0.9771265387535095,grad_norm: 0.8826599165662098, iteration: 300570
loss: 1.020154356956482,grad_norm: 0.8763012950832855, iteration: 300571
loss: 1.0296714305877686,grad_norm: 0.8951639036582719, iteration: 300572
loss: 1.0073127746582031,grad_norm: 0.7354959592167288, iteration: 300573
loss: 0.9810518026351929,grad_norm: 0.8166531638146872, iteration: 300574
loss: 0.9976910948753357,grad_norm: 0.9580110459742346, iteration: 300575
loss: 1.0093936920166016,grad_norm: 0.8352135260484522, iteration: 300576
loss: 1.015407681465149,grad_norm: 0.7615370664184238, iteration: 300577
loss: 1.029348611831665,grad_norm: 0.8613779799997653, iteration: 300578
loss: 0.9951659440994263,grad_norm: 0.8758147109440623, iteration: 300579
loss: 1.03219735622406,grad_norm: 0.9267229051602665, iteration: 300580
loss: 1.0265707969665527,grad_norm: 0.8195318799433081, iteration: 300581
loss: 1.0248017311096191,grad_norm: 0.8512836740650168, iteration: 300582
loss: 1.0112624168395996,grad_norm: 0.851851709675791, iteration: 300583
loss: 0.9528363943099976,grad_norm: 0.8281525954317912, iteration: 300584
loss: 0.9715084433555603,grad_norm: 0.815920813029693, iteration: 300585
loss: 1.0341901779174805,grad_norm: 0.9613015713346592, iteration: 300586
loss: 0.9964436292648315,grad_norm: 0.9725640544716793, iteration: 300587
loss: 1.0103877782821655,grad_norm: 0.774570918614503, iteration: 300588
loss: 1.0923848152160645,grad_norm: 0.9999991860798861, iteration: 300589
loss: 0.9937768578529358,grad_norm: 0.7341540641366415, iteration: 300590
loss: 1.036015510559082,grad_norm: 0.9381471654098129, iteration: 300591
loss: 0.9936908483505249,grad_norm: 0.9367341174372817, iteration: 300592
loss: 1.0026311874389648,grad_norm: 0.8593074874211224, iteration: 300593
loss: 1.0204020738601685,grad_norm: 0.8446261911902551, iteration: 300594
loss: 0.9613274335861206,grad_norm: 0.8766303168582911, iteration: 300595
loss: 0.9821416139602661,grad_norm: 0.6834533362822584, iteration: 300596
loss: 0.9632967114448547,grad_norm: 0.8914386724047089, iteration: 300597
loss: 1.0178523063659668,grad_norm: 0.8115682917698183, iteration: 300598
loss: 0.9876888990402222,grad_norm: 0.7875459619229495, iteration: 300599
loss: 0.9975630640983582,grad_norm: 0.9461146404440862, iteration: 300600
loss: 1.029599905014038,grad_norm: 0.922127011773908, iteration: 300601
loss: 0.9893037676811218,grad_norm: 0.9308090400199428, iteration: 300602
loss: 1.0086103677749634,grad_norm: 0.999999428090347, iteration: 300603
loss: 0.9667345285415649,grad_norm: 0.8511289795714466, iteration: 300604
loss: 1.041380763053894,grad_norm: 0.7078634977123168, iteration: 300605
loss: 1.0007647275924683,grad_norm: 0.9478864457621774, iteration: 300606
loss: 0.9959430694580078,grad_norm: 0.8859501245637351, iteration: 300607
loss: 0.9656450152397156,grad_norm: 0.9392050758003654, iteration: 300608
loss: 0.9927434921264648,grad_norm: 0.7885960326423609, iteration: 300609
loss: 0.9889692068099976,grad_norm: 0.9149792875167244, iteration: 300610
loss: 0.9923207759857178,grad_norm: 0.8620442837395508, iteration: 300611
loss: 1.0995292663574219,grad_norm: 0.9999993424701181, iteration: 300612
loss: 1.0007617473602295,grad_norm: 0.9171073259683827, iteration: 300613
loss: 0.9990485906600952,grad_norm: 0.8479878099874693, iteration: 300614
loss: 0.9925583600997925,grad_norm: 0.9999991405006702, iteration: 300615
loss: 1.0110392570495605,grad_norm: 0.7517048222123881, iteration: 300616
loss: 1.1531920433044434,grad_norm: 0.9999995836293237, iteration: 300617
loss: 0.9643309116363525,grad_norm: 0.9160852853855835, iteration: 300618
loss: 0.9608489274978638,grad_norm: 0.7522536825252657, iteration: 300619
loss: 1.0114057064056396,grad_norm: 0.9241535798262892, iteration: 300620
loss: 1.0269978046417236,grad_norm: 0.8321462536160771, iteration: 300621
loss: 1.0058460235595703,grad_norm: 0.762476467080894, iteration: 300622
loss: 1.0101783275604248,grad_norm: 0.9502160440369037, iteration: 300623
loss: 0.9973496198654175,grad_norm: 0.7588347684469257, iteration: 300624
loss: 0.9954851865768433,grad_norm: 0.9047624814415337, iteration: 300625
loss: 0.9609346985816956,grad_norm: 0.9088243639033039, iteration: 300626
loss: 0.9714460968971252,grad_norm: 0.9878157239575421, iteration: 300627
loss: 1.0326465368270874,grad_norm: 0.9999998046714547, iteration: 300628
loss: 1.039268970489502,grad_norm: 0.9999998463377677, iteration: 300629
loss: 1.037192702293396,grad_norm: 0.9999993149079702, iteration: 300630
loss: 0.9957625865936279,grad_norm: 0.7334986480898081, iteration: 300631
loss: 0.9937317371368408,grad_norm: 0.7119950935071768, iteration: 300632
loss: 1.0010358095169067,grad_norm: 0.9999990288583173, iteration: 300633
loss: 0.9916937947273254,grad_norm: 0.8229074221332454, iteration: 300634
loss: 1.000006079673767,grad_norm: 0.9999990761028421, iteration: 300635
loss: 1.0032284259796143,grad_norm: 0.9414314930003319, iteration: 300636
loss: 0.996910035610199,grad_norm: 0.9770258989302817, iteration: 300637
loss: 0.9987326264381409,grad_norm: 0.9999991330020073, iteration: 300638
loss: 1.0394177436828613,grad_norm: 0.9174397105004419, iteration: 300639
loss: 1.0338069200515747,grad_norm: 0.9999991533790565, iteration: 300640
loss: 0.9719586372375488,grad_norm: 0.777990917971072, iteration: 300641
loss: 1.016709804534912,grad_norm: 0.8800603974984221, iteration: 300642
loss: 0.9923509359359741,grad_norm: 0.9831476183059129, iteration: 300643
loss: 1.0233728885650635,grad_norm: 0.9999993823822815, iteration: 300644
loss: 1.0054917335510254,grad_norm: 0.7706988086608663, iteration: 300645
loss: 0.9991838932037354,grad_norm: 0.7638169175942079, iteration: 300646
loss: 1.0260965824127197,grad_norm: 0.8623334347615468, iteration: 300647
loss: 0.9587603807449341,grad_norm: 0.8894765292468255, iteration: 300648
loss: 1.0161019563674927,grad_norm: 0.8281996483801819, iteration: 300649
loss: 1.0235334634780884,grad_norm: 0.8725500559383276, iteration: 300650
loss: 0.9967775940895081,grad_norm: 0.8562367928862453, iteration: 300651
loss: 0.9839671850204468,grad_norm: 0.9250373279832882, iteration: 300652
loss: 0.9960797429084778,grad_norm: 0.8815171157799878, iteration: 300653
loss: 0.9747606515884399,grad_norm: 0.9999991757188627, iteration: 300654
loss: 0.9960980415344238,grad_norm: 0.756437424156528, iteration: 300655
loss: 0.9905216097831726,grad_norm: 0.7927044347022963, iteration: 300656
loss: 0.9906430244445801,grad_norm: 0.8533042363183747, iteration: 300657
loss: 0.978668212890625,grad_norm: 0.7810210266645826, iteration: 300658
loss: 1.0820789337158203,grad_norm: 0.8307755535999038, iteration: 300659
loss: 1.0083413124084473,grad_norm: 0.898543247402694, iteration: 300660
loss: 0.9405515789985657,grad_norm: 0.8380793290981498, iteration: 300661
loss: 1.0752865076065063,grad_norm: 0.9999991349858633, iteration: 300662
loss: 0.9982888102531433,grad_norm: 0.7695829018084359, iteration: 300663
loss: 1.0491007566452026,grad_norm: 0.9999996987385305, iteration: 300664
loss: 1.0464483499526978,grad_norm: 0.9348317905785786, iteration: 300665
loss: 0.9923576712608337,grad_norm: 0.8976924766431369, iteration: 300666
loss: 0.9940794110298157,grad_norm: 0.9999990741620282, iteration: 300667
loss: 1.0276356935501099,grad_norm: 0.9394194070713009, iteration: 300668
loss: 0.95474773645401,grad_norm: 0.846570654603376, iteration: 300669
loss: 0.9971552491188049,grad_norm: 0.8970375073496655, iteration: 300670
loss: 1.0383340120315552,grad_norm: 0.9999991159276359, iteration: 300671
loss: 1.0062562227249146,grad_norm: 0.9999997814044311, iteration: 300672
loss: 0.9845450520515442,grad_norm: 0.9791995486210497, iteration: 300673
loss: 1.067642331123352,grad_norm: 0.7422362635714702, iteration: 300674
loss: 0.9987619519233704,grad_norm: 0.8412054446729403, iteration: 300675
loss: 0.9868987202644348,grad_norm: 0.8011127168625004, iteration: 300676
loss: 1.0549867153167725,grad_norm: 0.9112921906553185, iteration: 300677
loss: 0.9756540656089783,grad_norm: 0.8170397270062222, iteration: 300678
loss: 0.9865593314170837,grad_norm: 0.8667193018271411, iteration: 300679
loss: 1.0093404054641724,grad_norm: 0.9999991234873987, iteration: 300680
loss: 0.9828970432281494,grad_norm: 0.7290906070693988, iteration: 300681
loss: 1.0193443298339844,grad_norm: 0.8370656830720502, iteration: 300682
loss: 1.0252598524093628,grad_norm: 0.8997408315028653, iteration: 300683
loss: 0.9988994598388672,grad_norm: 0.7887465126078278, iteration: 300684
loss: 0.9654554724693298,grad_norm: 0.7014274064180828, iteration: 300685
loss: 0.9942004680633545,grad_norm: 0.8415445519610264, iteration: 300686
loss: 0.989132285118103,grad_norm: 0.9455459901138511, iteration: 300687
loss: 0.977954626083374,grad_norm: 0.8186254718564359, iteration: 300688
loss: 0.9740855693817139,grad_norm: 0.8797960547199529, iteration: 300689
loss: 1.0020174980163574,grad_norm: 0.7579202687334922, iteration: 300690
loss: 0.9916168451309204,grad_norm: 0.981094016334938, iteration: 300691
loss: 1.1709336042404175,grad_norm: 0.9999991756327772, iteration: 300692
loss: 0.9675795435905457,grad_norm: 0.9999991323047154, iteration: 300693
loss: 1.0355854034423828,grad_norm: 0.8161764659398707, iteration: 300694
loss: 0.9846813678741455,grad_norm: 0.9429488752969399, iteration: 300695
loss: 0.9872713685035706,grad_norm: 0.9999991561459162, iteration: 300696
loss: 0.9963176846504211,grad_norm: 0.8027595930468253, iteration: 300697
loss: 1.0113836526870728,grad_norm: 0.8846790228844181, iteration: 300698
loss: 1.0336018800735474,grad_norm: 0.8879513820625508, iteration: 300699
loss: 0.9731222987174988,grad_norm: 0.8741012084822826, iteration: 300700
loss: 1.0142625570297241,grad_norm: 0.8122574086102456, iteration: 300701
loss: 1.0144566297531128,grad_norm: 0.8412422869356768, iteration: 300702
loss: 0.9992328882217407,grad_norm: 0.8264939119677536, iteration: 300703
loss: 1.0027837753295898,grad_norm: 0.9999992218247574, iteration: 300704
loss: 0.9551767706871033,grad_norm: 0.831470492079189, iteration: 300705
loss: 0.9937875270843506,grad_norm: 0.9999994223976258, iteration: 300706
loss: 1.0404303073883057,grad_norm: 0.8052732543358037, iteration: 300707
loss: 1.0053778886795044,grad_norm: 0.8164853292261364, iteration: 300708
loss: 1.041638970375061,grad_norm: 0.8479516338286287, iteration: 300709
loss: 1.0142451524734497,grad_norm: 0.874147841440837, iteration: 300710
loss: 0.99722820520401,grad_norm: 0.9066623323572556, iteration: 300711
loss: 1.002636432647705,grad_norm: 0.8065583337294455, iteration: 300712
loss: 1.0404534339904785,grad_norm: 0.8646232293008603, iteration: 300713
loss: 0.9746031165122986,grad_norm: 0.8902833946559502, iteration: 300714
loss: 1.002403974533081,grad_norm: 0.9868334791195144, iteration: 300715
loss: 1.012195110321045,grad_norm: 0.9999991103518088, iteration: 300716
loss: 1.0037918090820312,grad_norm: 0.8489990911081778, iteration: 300717
loss: 0.9846817851066589,grad_norm: 0.9016969027396128, iteration: 300718
loss: 1.0262093544006348,grad_norm: 0.7672751714225297, iteration: 300719
loss: 0.9977279901504517,grad_norm: 0.9649715020712961, iteration: 300720
loss: 0.9871642589569092,grad_norm: 0.9999991526374902, iteration: 300721
loss: 1.0104455947875977,grad_norm: 0.8679470826683473, iteration: 300722
loss: 1.0125374794006348,grad_norm: 0.8276171975086859, iteration: 300723
loss: 1.0099377632141113,grad_norm: 0.8547180540058998, iteration: 300724
loss: 0.9872308969497681,grad_norm: 0.8071945795463443, iteration: 300725
loss: 1.0236051082611084,grad_norm: 0.9999991692997597, iteration: 300726
loss: 0.9876632690429688,grad_norm: 0.9092108170903562, iteration: 300727
loss: 0.9820852875709534,grad_norm: 0.8240738601722414, iteration: 300728
loss: 0.9909045696258545,grad_norm: 0.790980277864111, iteration: 300729
loss: 0.9801242351531982,grad_norm: 0.9850827656492191, iteration: 300730
loss: 1.0309314727783203,grad_norm: 0.9766261143874512, iteration: 300731
loss: 0.9935307502746582,grad_norm: 0.7131983467887271, iteration: 300732
loss: 0.9905930757522583,grad_norm: 0.9198460783354486, iteration: 300733
loss: 0.992401123046875,grad_norm: 0.8289028832771335, iteration: 300734
loss: 1.1253217458724976,grad_norm: 0.9999998664713688, iteration: 300735
loss: 0.9614824056625366,grad_norm: 0.8921140265601226, iteration: 300736
loss: 1.0099984407424927,grad_norm: 0.8373576267468972, iteration: 300737
loss: 0.9865183234214783,grad_norm: 0.7929471781414953, iteration: 300738
loss: 1.0564075708389282,grad_norm: 0.9999997020622685, iteration: 300739
loss: 1.0437233448028564,grad_norm: 0.9999990327231355, iteration: 300740
loss: 0.9730991125106812,grad_norm: 0.8783822941278264, iteration: 300741
loss: 1.0182584524154663,grad_norm: 0.8707463002586003, iteration: 300742
loss: 0.9590579867362976,grad_norm: 0.8274503468310587, iteration: 300743
loss: 1.0802775621414185,grad_norm: 0.8845289969775145, iteration: 300744
loss: 1.0019581317901611,grad_norm: 0.8073248182296832, iteration: 300745
loss: 0.9945705533027649,grad_norm: 0.9999994331224469, iteration: 300746
loss: 1.0096852779388428,grad_norm: 0.8413237811538943, iteration: 300747
loss: 1.005730390548706,grad_norm: 0.9239757454955247, iteration: 300748
loss: 0.9987940192222595,grad_norm: 0.828164268131611, iteration: 300749
loss: 1.017762303352356,grad_norm: 0.9999997514699005, iteration: 300750
loss: 0.9808381199836731,grad_norm: 0.915905501638816, iteration: 300751
loss: 0.9998924136161804,grad_norm: 0.6545488867291248, iteration: 300752
loss: 0.9990134835243225,grad_norm: 0.9999996049028916, iteration: 300753
loss: 1.036567211151123,grad_norm: 0.8078152736670098, iteration: 300754
loss: 1.0169463157653809,grad_norm: 0.9999989587129102, iteration: 300755
loss: 1.0468180179595947,grad_norm: 0.9840886091950332, iteration: 300756
loss: 1.0138815641403198,grad_norm: 0.8330149105310887, iteration: 300757
loss: 0.9830538630485535,grad_norm: 0.8451607086486245, iteration: 300758
loss: 1.0105700492858887,grad_norm: 0.7130527694455745, iteration: 300759
loss: 0.9508979916572571,grad_norm: 0.6807502696040671, iteration: 300760
loss: 1.034294605255127,grad_norm: 0.9999996414290352, iteration: 300761
loss: 1.0425645112991333,grad_norm: 0.7856684778968434, iteration: 300762
loss: 1.041148066520691,grad_norm: 0.8286299788416038, iteration: 300763
loss: 1.0326544046401978,grad_norm: 0.9999992220797468, iteration: 300764
loss: 1.0410860776901245,grad_norm: 0.9056479984721015, iteration: 300765
loss: 0.9861659407615662,grad_norm: 0.7763119391176515, iteration: 300766
loss: 0.9879952669143677,grad_norm: 0.9799939062182377, iteration: 300767
loss: 1.0140575170516968,grad_norm: 0.9999991511453065, iteration: 300768
loss: 1.00664484500885,grad_norm: 0.9051991869096668, iteration: 300769
loss: 0.9514820575714111,grad_norm: 0.8482599397530333, iteration: 300770
loss: 1.033071756362915,grad_norm: 0.929174501846393, iteration: 300771
loss: 1.0160201787948608,grad_norm: 0.8024130202481046, iteration: 300772
loss: 1.0037555694580078,grad_norm: 0.8692340121995193, iteration: 300773
loss: 1.025436520576477,grad_norm: 0.9103118086517239, iteration: 300774
loss: 1.0370186567306519,grad_norm: 0.8584106791232655, iteration: 300775
loss: 1.0251960754394531,grad_norm: 0.8111588558484194, iteration: 300776
loss: 1.0167893171310425,grad_norm: 0.7508588593068983, iteration: 300777
loss: 1.0064259767532349,grad_norm: 0.7918580894811269, iteration: 300778
loss: 0.9875492453575134,grad_norm: 0.7673513439572326, iteration: 300779
loss: 0.987359881401062,grad_norm: 0.8688969103731435, iteration: 300780
loss: 0.9953413605690002,grad_norm: 0.8234477847448237, iteration: 300781
loss: 1.013575553894043,grad_norm: 0.9999991984092037, iteration: 300782
loss: 0.9937474131584167,grad_norm: 0.6616463403345281, iteration: 300783
loss: 0.9853513836860657,grad_norm: 0.8227088485421394, iteration: 300784
loss: 0.9638819098472595,grad_norm: 0.7662626368446714, iteration: 300785
loss: 1.0048264265060425,grad_norm: 0.8405653545762195, iteration: 300786
loss: 1.0075137615203857,grad_norm: 0.7905403079718754, iteration: 300787
loss: 0.992990255355835,grad_norm: 0.8712948767988327, iteration: 300788
loss: 0.9914590120315552,grad_norm: 0.7544982176069635, iteration: 300789
loss: 0.9788179993629456,grad_norm: 0.9429143826981844, iteration: 300790
loss: 1.0276285409927368,grad_norm: 0.7962031978802583, iteration: 300791
loss: 0.984592080116272,grad_norm: 0.7157077497814368, iteration: 300792
loss: 0.9748239517211914,grad_norm: 0.7771403925062899, iteration: 300793
loss: 1.0192865133285522,grad_norm: 0.6982496221771013, iteration: 300794
loss: 0.9849856495857239,grad_norm: 0.7068305668746571, iteration: 300795
loss: 0.9891279339790344,grad_norm: 0.9155564093124879, iteration: 300796
loss: 1.0208797454833984,grad_norm: 0.8831895540428489, iteration: 300797
loss: 1.1063611507415771,grad_norm: 0.9999996933305918, iteration: 300798
loss: 0.9995098114013672,grad_norm: 0.9312479991600848, iteration: 300799
loss: 0.9913845658302307,grad_norm: 0.8303656227512206, iteration: 300800
loss: 0.9815235137939453,grad_norm: 0.9324010297780873, iteration: 300801
loss: 0.9902918934822083,grad_norm: 0.8270667775242753, iteration: 300802
loss: 1.00580632686615,grad_norm: 0.8426507042358541, iteration: 300803
loss: 1.0049289464950562,grad_norm: 0.8523715292831843, iteration: 300804
loss: 0.9333918690681458,grad_norm: 0.7946285831873723, iteration: 300805
loss: 1.0227054357528687,grad_norm: 0.7493905170001208, iteration: 300806
loss: 1.004897117614746,grad_norm: 0.8644738442121507, iteration: 300807
loss: 1.0350900888442993,grad_norm: 1.0000000443879, iteration: 300808
loss: 0.989581286907196,grad_norm: 0.886035379610665, iteration: 300809
loss: 1.0148098468780518,grad_norm: 0.9999996965280767, iteration: 300810
loss: 0.9810342192649841,grad_norm: 0.8287613646302947, iteration: 300811
loss: 1.0016053915023804,grad_norm: 0.9635541409440654, iteration: 300812
loss: 0.9830722212791443,grad_norm: 0.8249751495430094, iteration: 300813
loss: 0.9894345998764038,grad_norm: 0.9226717699681054, iteration: 300814
loss: 1.0228092670440674,grad_norm: 0.9318864585037563, iteration: 300815
loss: 1.0047117471694946,grad_norm: 0.8834105378650853, iteration: 300816
loss: 1.0078155994415283,grad_norm: 0.8818274320063859, iteration: 300817
loss: 0.9942225813865662,grad_norm: 0.7450690579046207, iteration: 300818
loss: 0.9956346750259399,grad_norm: 0.8166761122847465, iteration: 300819
loss: 1.0156623125076294,grad_norm: 0.9160858573386433, iteration: 300820
loss: 0.9648493528366089,grad_norm: 0.9043977603316501, iteration: 300821
loss: 0.9958739280700684,grad_norm: 0.7401194091621959, iteration: 300822
loss: 1.0397868156433105,grad_norm: 0.9421637596814807, iteration: 300823
loss: 0.956508994102478,grad_norm: 0.897467279260963, iteration: 300824
loss: 1.0203721523284912,grad_norm: 0.7866505087429325, iteration: 300825
loss: 0.9701135754585266,grad_norm: 0.8811221765620276, iteration: 300826
loss: 0.9792895317077637,grad_norm: 0.804340193612357, iteration: 300827
loss: 0.9741134643554688,grad_norm: 0.9999992137582345, iteration: 300828
loss: 1.0016226768493652,grad_norm: 0.7239706313230635, iteration: 300829
loss: 0.9893884658813477,grad_norm: 0.8971198318527926, iteration: 300830
loss: 1.0235073566436768,grad_norm: 0.7599121903508133, iteration: 300831
loss: 1.0502146482467651,grad_norm: 0.8609973106610713, iteration: 300832
loss: 1.0069234371185303,grad_norm: 0.9363700385172369, iteration: 300833
loss: 1.008659839630127,grad_norm: 0.9999993870067443, iteration: 300834
loss: 0.9878498911857605,grad_norm: 0.7940109868177503, iteration: 300835
loss: 1.0009169578552246,grad_norm: 0.9999989668292835, iteration: 300836
loss: 0.9702069759368896,grad_norm: 0.876219889204394, iteration: 300837
loss: 1.0115219354629517,grad_norm: 0.9500531815638688, iteration: 300838
loss: 1.0531357526779175,grad_norm: 0.9396327169168447, iteration: 300839
loss: 1.0165412425994873,grad_norm: 0.9931272163364543, iteration: 300840
loss: 1.0342402458190918,grad_norm: 0.8984220605730828, iteration: 300841
loss: 0.9721714854240417,grad_norm: 0.9546039434496009, iteration: 300842
loss: 1.0012227296829224,grad_norm: 0.6938836059753706, iteration: 300843
loss: 1.0114870071411133,grad_norm: 0.8032157396042532, iteration: 300844
loss: 0.992012619972229,grad_norm: 0.8544916966498958, iteration: 300845
loss: 1.0153168439865112,grad_norm: 0.9999990355875856, iteration: 300846
loss: 1.0071609020233154,grad_norm: 0.7718971541506021, iteration: 300847
loss: 1.0116727352142334,grad_norm: 0.9264553170800659, iteration: 300848
loss: 1.0392839908599854,grad_norm: 0.9999998797115855, iteration: 300849
loss: 1.0040028095245361,grad_norm: 0.8713784694249537, iteration: 300850
loss: 0.9991428852081299,grad_norm: 0.8408018776603073, iteration: 300851
loss: 0.98517245054245,grad_norm: 0.7954942839069395, iteration: 300852
loss: 1.0263010263442993,grad_norm: 0.7977291005890261, iteration: 300853
loss: 1.000022530555725,grad_norm: 0.8075978902791888, iteration: 300854
loss: 0.9412108063697815,grad_norm: 0.7756617926629416, iteration: 300855
loss: 0.9801528453826904,grad_norm: 0.7903676647029246, iteration: 300856
loss: 0.9897710084915161,grad_norm: 0.7879144011979806, iteration: 300857
loss: 0.9997472167015076,grad_norm: 0.8141608207389849, iteration: 300858
loss: 1.0047651529312134,grad_norm: 0.9999999257919114, iteration: 300859
loss: 1.0401228666305542,grad_norm: 0.7898987071969518, iteration: 300860
loss: 1.0104503631591797,grad_norm: 0.8946967537756286, iteration: 300861
loss: 0.9913291335105896,grad_norm: 0.7677272710080879, iteration: 300862
loss: 1.0078710317611694,grad_norm: 0.8048869470032275, iteration: 300863
loss: 1.0230920314788818,grad_norm: 0.8433959870954387, iteration: 300864
loss: 0.97491055727005,grad_norm: 0.7737123492852831, iteration: 300865
loss: 0.9808768630027771,grad_norm: 0.7060269905142624, iteration: 300866
loss: 1.0167466402053833,grad_norm: 0.9808866831132377, iteration: 300867
loss: 1.010081171989441,grad_norm: 0.9999990924331588, iteration: 300868
loss: 0.995701253414154,grad_norm: 0.8381480636234703, iteration: 300869
loss: 1.0068544149398804,grad_norm: 0.9999991663176179, iteration: 300870
loss: 0.9833388328552246,grad_norm: 0.9799774865893897, iteration: 300871
loss: 0.9883842468261719,grad_norm: 0.7245792894274354, iteration: 300872
loss: 0.9705533385276794,grad_norm: 0.8808766333897632, iteration: 300873
loss: 1.030851125717163,grad_norm: 0.8808510805665141, iteration: 300874
loss: 1.0116418600082397,grad_norm: 0.9999997732030458, iteration: 300875
loss: 0.9855226874351501,grad_norm: 0.9053500146378192, iteration: 300876
loss: 1.005133867263794,grad_norm: 0.8927071175516743, iteration: 300877
loss: 0.9953967332839966,grad_norm: 0.726710614129923, iteration: 300878
loss: 1.0118346214294434,grad_norm: 0.8469932201330065, iteration: 300879
loss: 0.98848557472229,grad_norm: 0.8811493453350504, iteration: 300880
loss: 0.9767038226127625,grad_norm: 0.7257847716292739, iteration: 300881
loss: 1.0161532163619995,grad_norm: 0.8133383999305096, iteration: 300882
loss: 0.9529426097869873,grad_norm: 0.8999011910749852, iteration: 300883
loss: 0.9912002682685852,grad_norm: 0.8576224041686457, iteration: 300884
loss: 1.027213454246521,grad_norm: 0.8408020958973159, iteration: 300885
loss: 1.0385680198669434,grad_norm: 0.8969541041477795, iteration: 300886
loss: 0.9795922636985779,grad_norm: 0.9201563629954531, iteration: 300887
loss: 1.0407150983810425,grad_norm: 0.9079299762672886, iteration: 300888
loss: 0.9856553673744202,grad_norm: 0.8382473450559149, iteration: 300889
loss: 0.981200098991394,grad_norm: 0.695183900797068, iteration: 300890
loss: 0.9940696358680725,grad_norm: 0.9152958671564415, iteration: 300891
loss: 1.0333185195922852,grad_norm: 0.9999991249621261, iteration: 300892
loss: 0.9828622937202454,grad_norm: 0.7490084600274315, iteration: 300893
loss: 1.0001312494277954,grad_norm: 0.9999989570381914, iteration: 300894
loss: 0.957928478717804,grad_norm: 0.8559042713029251, iteration: 300895
loss: 1.0243173837661743,grad_norm: 0.8464288847093893, iteration: 300896
loss: 1.0140844583511353,grad_norm: 0.9194875252588286, iteration: 300897
loss: 0.9983941912651062,grad_norm: 0.773968761524069, iteration: 300898
loss: 1.0196175575256348,grad_norm: 0.9057940704002287, iteration: 300899
loss: 0.9876665472984314,grad_norm: 0.8877529548828486, iteration: 300900
loss: 0.9696314930915833,grad_norm: 0.9501954673321547, iteration: 300901
loss: 0.9964447021484375,grad_norm: 0.6648772809255615, iteration: 300902
loss: 0.9949923157691956,grad_norm: 0.9105354508093199, iteration: 300903
loss: 0.9767582416534424,grad_norm: 0.7790670744213003, iteration: 300904
loss: 1.0068529844284058,grad_norm: 0.8025823496785648, iteration: 300905
loss: 0.9833553433418274,grad_norm: 0.8700534457771135, iteration: 300906
loss: 0.9812026023864746,grad_norm: 0.8177151737702992, iteration: 300907
loss: 0.9973183274269104,grad_norm: 0.9734651927612479, iteration: 300908
loss: 1.0115464925765991,grad_norm: 0.8869191981658512, iteration: 300909
loss: 1.0016943216323853,grad_norm: 0.7593529049312651, iteration: 300910
loss: 1.002734661102295,grad_norm: 0.7724713571821619, iteration: 300911
loss: 1.0139484405517578,grad_norm: 0.9999991050862488, iteration: 300912
loss: 0.9889670014381409,grad_norm: 0.8947655781366338, iteration: 300913
loss: 1.0183314085006714,grad_norm: 0.8767554467231893, iteration: 300914
loss: 1.046097993850708,grad_norm: 0.999999218876025, iteration: 300915
loss: 1.0189082622528076,grad_norm: 0.9546193813403533, iteration: 300916
loss: 1.045176386833191,grad_norm: 0.8206884042665857, iteration: 300917
loss: 0.9762319922447205,grad_norm: 0.7947078797371542, iteration: 300918
loss: 1.0170648097991943,grad_norm: 0.8179034130220336, iteration: 300919
loss: 0.9944425225257874,grad_norm: 0.9999992024975518, iteration: 300920
loss: 1.022316813468933,grad_norm: 0.8706171235604071, iteration: 300921
loss: 0.9882637858390808,grad_norm: 0.8682214688046466, iteration: 300922
loss: 1.0075410604476929,grad_norm: 0.9352706236653392, iteration: 300923
loss: 1.0097720623016357,grad_norm: 0.9428247893003282, iteration: 300924
loss: 1.0054571628570557,grad_norm: 0.854744651200177, iteration: 300925
loss: 1.0573111772537231,grad_norm: 0.9999990116689674, iteration: 300926
loss: 1.0258712768554688,grad_norm: 0.8436905688183355, iteration: 300927
loss: 1.0188170671463013,grad_norm: 0.9999992063395836, iteration: 300928
loss: 1.0124410390853882,grad_norm: 0.9965162839031876, iteration: 300929
loss: 0.9909501671791077,grad_norm: 0.7672521531283689, iteration: 300930
loss: 0.998659610748291,grad_norm: 0.9999992306986173, iteration: 300931
loss: 1.0302180051803589,grad_norm: 0.8759504905761438, iteration: 300932
loss: 1.0001546144485474,grad_norm: 0.790520847615574, iteration: 300933
loss: 1.0277092456817627,grad_norm: 0.8429922410177991, iteration: 300934
loss: 0.9591109752655029,grad_norm: 0.9999990499402474, iteration: 300935
loss: 1.0091708898544312,grad_norm: 0.683268470353093, iteration: 300936
loss: 0.9971526265144348,grad_norm: 0.9999990228381872, iteration: 300937
loss: 1.082218885421753,grad_norm: 0.99999911943326, iteration: 300938
loss: 1.020228624343872,grad_norm: 0.9999993627295396, iteration: 300939
loss: 0.9971891045570374,grad_norm: 0.8523279032937547, iteration: 300940
loss: 0.9577366709709167,grad_norm: 0.9381775891100959, iteration: 300941
loss: 1.014409065246582,grad_norm: 0.8148587459557701, iteration: 300942
loss: 0.9900410771369934,grad_norm: 0.8669598483872211, iteration: 300943
loss: 0.9541724920272827,grad_norm: 0.9014408231476947, iteration: 300944
loss: 1.0150913000106812,grad_norm: 0.7794206923519609, iteration: 300945
loss: 0.9858551025390625,grad_norm: 0.9999989497452347, iteration: 300946
loss: 0.9809178709983826,grad_norm: 0.7804597489751044, iteration: 300947
loss: 1.0050420761108398,grad_norm: 0.8495878786759944, iteration: 300948
loss: 0.9656521081924438,grad_norm: 0.9107644520395884, iteration: 300949
loss: 1.0044424533843994,grad_norm: 0.9946235177139263, iteration: 300950
loss: 0.9908138513565063,grad_norm: 0.9119687787186778, iteration: 300951
loss: 1.0082814693450928,grad_norm: 0.9746636200034887, iteration: 300952
loss: 0.9720233678817749,grad_norm: 0.7390618672033443, iteration: 300953
loss: 0.9652165770530701,grad_norm: 0.8568487597648408, iteration: 300954
loss: 0.9890353083610535,grad_norm: 0.782835573239725, iteration: 300955
loss: 0.9958208203315735,grad_norm: 0.9999990989059473, iteration: 300956
loss: 0.9977734684944153,grad_norm: 0.9307885872619743, iteration: 300957
loss: 0.9835150837898254,grad_norm: 0.9187662196720504, iteration: 300958
loss: 0.9877448678016663,grad_norm: 0.9407769825123506, iteration: 300959
loss: 1.0338430404663086,grad_norm: 0.9067599861877998, iteration: 300960
loss: 1.0018236637115479,grad_norm: 0.8445811373995528, iteration: 300961
loss: 1.0162214040756226,grad_norm: 0.9431111994729697, iteration: 300962
loss: 0.9779840111732483,grad_norm: 0.8635166697251945, iteration: 300963
loss: 0.9992184042930603,grad_norm: 0.733358733754366, iteration: 300964
loss: 1.0511574745178223,grad_norm: 0.8579511480971623, iteration: 300965
loss: 1.0371283292770386,grad_norm: 0.9477942584788633, iteration: 300966
loss: 0.9718222618103027,grad_norm: 0.8680290446660166, iteration: 300967
loss: 0.991832435131073,grad_norm: 0.7893546232293422, iteration: 300968
loss: 1.0784310102462769,grad_norm: 0.999999710474966, iteration: 300969
loss: 1.008623719215393,grad_norm: 0.9754942922252516, iteration: 300970
loss: 1.0094473361968994,grad_norm: 0.9185212769868781, iteration: 300971
loss: 1.0201433897018433,grad_norm: 0.8498066268614155, iteration: 300972
loss: 0.9923489093780518,grad_norm: 0.7246233812136293, iteration: 300973
loss: 1.0188877582550049,grad_norm: 0.7687371009085634, iteration: 300974
loss: 1.0003228187561035,grad_norm: 0.9174084309747284, iteration: 300975
loss: 0.9690185785293579,grad_norm: 0.7218152178720219, iteration: 300976
loss: 0.9798800349235535,grad_norm: 0.7228297288799651, iteration: 300977
loss: 0.9959830641746521,grad_norm: 0.9999990220755552, iteration: 300978
loss: 0.9802172780036926,grad_norm: 0.9512957171707306, iteration: 300979
loss: 1.016100287437439,grad_norm: 0.9358326461820256, iteration: 300980
loss: 1.0133006572723389,grad_norm: 0.875704520553304, iteration: 300981
loss: 1.0139600038528442,grad_norm: 0.7849704720817656, iteration: 300982
loss: 1.007419228553772,grad_norm: 0.90049885712201, iteration: 300983
loss: 0.9821204543113708,grad_norm: 0.8251770033691247, iteration: 300984
loss: 0.9891679883003235,grad_norm: 0.8410144377856968, iteration: 300985
loss: 0.9945454001426697,grad_norm: 0.7245482016206969, iteration: 300986
loss: 0.9640684723854065,grad_norm: 0.9942220849847322, iteration: 300987
loss: 1.0102754831314087,grad_norm: 0.9644737391738152, iteration: 300988
loss: 1.007797122001648,grad_norm: 0.7629757214380951, iteration: 300989
loss: 1.0111397504806519,grad_norm: 0.8002250056463344, iteration: 300990
loss: 1.0117239952087402,grad_norm: 0.9999994515886559, iteration: 300991
loss: 0.9916083812713623,grad_norm: 0.8828031547201411, iteration: 300992
loss: 0.976617157459259,grad_norm: 0.917879929413412, iteration: 300993
loss: 0.9943820834159851,grad_norm: 0.9029283723862778, iteration: 300994
loss: 0.9925063848495483,grad_norm: 0.7833086163933726, iteration: 300995
loss: 0.9672539234161377,grad_norm: 0.9000173405062725, iteration: 300996
loss: 0.9961092472076416,grad_norm: 0.7888447909143648, iteration: 300997
loss: 1.0059128999710083,grad_norm: 0.8940482092848521, iteration: 300998
loss: 0.9975258708000183,grad_norm: 0.894989514657317, iteration: 300999
loss: 1.018514633178711,grad_norm: 0.8550228438416074, iteration: 301000
loss: 1.0191149711608887,grad_norm: 0.9999991857973246, iteration: 301001
loss: 0.9928196668624878,grad_norm: 0.8711816409640897, iteration: 301002
loss: 1.0520228147506714,grad_norm: 0.8944736102078324, iteration: 301003
loss: 1.02118980884552,grad_norm: 0.8261213119245909, iteration: 301004
loss: 1.0170587301254272,grad_norm: 0.8359586450974242, iteration: 301005
loss: 0.9561505317687988,grad_norm: 0.8560839921559905, iteration: 301006
loss: 0.9796322584152222,grad_norm: 0.7385715569003362, iteration: 301007
loss: 1.003389596939087,grad_norm: 0.9265136323797142, iteration: 301008
loss: 0.9783746004104614,grad_norm: 0.9465143447653442, iteration: 301009
loss: 1.02619206905365,grad_norm: 0.9248028193715235, iteration: 301010
loss: 0.9767410159111023,grad_norm: 0.8201060994827234, iteration: 301011
loss: 0.9751519560813904,grad_norm: 0.8040412329633281, iteration: 301012
loss: 1.0082440376281738,grad_norm: 0.8151888275733566, iteration: 301013
loss: 0.982672393321991,grad_norm: 0.8242186886455001, iteration: 301014
loss: 0.9962356090545654,grad_norm: 0.8588621812867236, iteration: 301015
loss: 0.9737167358398438,grad_norm: 0.6886762915485983, iteration: 301016
loss: 0.9928054809570312,grad_norm: 0.9659753959310917, iteration: 301017
loss: 1.0196917057037354,grad_norm: 0.9999990049967512, iteration: 301018
loss: 1.0196690559387207,grad_norm: 0.8142512115844052, iteration: 301019
loss: 0.9638233184814453,grad_norm: 0.8018597244790876, iteration: 301020
loss: 1.0119621753692627,grad_norm: 0.9999990730641184, iteration: 301021
loss: 0.9845365881919861,grad_norm: 0.917304407666895, iteration: 301022
loss: 0.9898322224617004,grad_norm: 0.7956781437538222, iteration: 301023
loss: 0.9630858302116394,grad_norm: 0.9435104586003594, iteration: 301024
loss: 0.9829105138778687,grad_norm: 0.8092821231931149, iteration: 301025
loss: 1.0005052089691162,grad_norm: 0.9758868241456058, iteration: 301026
loss: 1.0009983777999878,grad_norm: 0.713141855498335, iteration: 301027
loss: 1.0150251388549805,grad_norm: 0.8173171375127308, iteration: 301028
loss: 1.042119026184082,grad_norm: 0.888772603337291, iteration: 301029
loss: 0.9917557835578918,grad_norm: 0.9140682664494836, iteration: 301030
loss: 1.0027129650115967,grad_norm: 0.9180011753414539, iteration: 301031
loss: 0.982482373714447,grad_norm: 0.9742616776905852, iteration: 301032
loss: 1.0308654308319092,grad_norm: 0.7701166124257931, iteration: 301033
loss: 1.0119508504867554,grad_norm: 0.7444585880283648, iteration: 301034
loss: 0.989329993724823,grad_norm: 0.943816170047786, iteration: 301035
loss: 1.0259983539581299,grad_norm: 0.7766115868463909, iteration: 301036
loss: 0.9958027601242065,grad_norm: 0.9498198034472386, iteration: 301037
loss: 0.981757640838623,grad_norm: 0.8518327821165861, iteration: 301038
loss: 1.0030733346939087,grad_norm: 0.8849525079139574, iteration: 301039
loss: 1.0038089752197266,grad_norm: 0.8909004347746045, iteration: 301040
loss: 1.018693208694458,grad_norm: 0.7780258542978047, iteration: 301041
loss: 1.015978455543518,grad_norm: 0.8367956592853103, iteration: 301042
loss: 1.0571603775024414,grad_norm: 0.927259738841653, iteration: 301043
loss: 1.0144222974777222,grad_norm: 0.8022588107249627, iteration: 301044
loss: 1.0173611640930176,grad_norm: 0.7932318212515512, iteration: 301045
loss: 0.9610074758529663,grad_norm: 0.8012165114027283, iteration: 301046
loss: 0.9888168573379517,grad_norm: 0.8540377410535179, iteration: 301047
loss: 1.0173548460006714,grad_norm: 0.9873697815565408, iteration: 301048
loss: 1.0057761669158936,grad_norm: 0.921051249049605, iteration: 301049
loss: 0.9910135865211487,grad_norm: 0.7871160329680269, iteration: 301050
loss: 1.019943118095398,grad_norm: 0.880787134176227, iteration: 301051
loss: 1.0195341110229492,grad_norm: 0.6902245339913847, iteration: 301052
loss: 0.9877618551254272,grad_norm: 0.9997356961819001, iteration: 301053
loss: 1.0279468297958374,grad_norm: 0.8631213408575924, iteration: 301054
loss: 0.9647289514541626,grad_norm: 0.8888328558258279, iteration: 301055
loss: 1.0065487623214722,grad_norm: 0.9466121393718417, iteration: 301056
loss: 0.9860660433769226,grad_norm: 0.7375117880083962, iteration: 301057
loss: 0.9945891499519348,grad_norm: 0.6665512326722154, iteration: 301058
loss: 1.0191890001296997,grad_norm: 0.7562469880466656, iteration: 301059
loss: 0.990777313709259,grad_norm: 0.999999038788789, iteration: 301060
loss: 0.9692422747612,grad_norm: 0.9258618301209816, iteration: 301061
loss: 0.9912887811660767,grad_norm: 0.9888330072003048, iteration: 301062
loss: 0.9961744546890259,grad_norm: 0.8817995302912067, iteration: 301063
loss: 0.9891939759254456,grad_norm: 0.8012097936988894, iteration: 301064
loss: 0.9805757403373718,grad_norm: 0.7876361666021332, iteration: 301065
loss: 1.0137332677841187,grad_norm: 0.8812158793645078, iteration: 301066
loss: 1.0155041217803955,grad_norm: 0.8150350253849589, iteration: 301067
loss: 0.9684379696846008,grad_norm: 0.7676687336190482, iteration: 301068
loss: 1.0048469305038452,grad_norm: 0.7586061878501853, iteration: 301069
loss: 1.068511724472046,grad_norm: 0.99999923354225, iteration: 301070
loss: 1.003332257270813,grad_norm: 0.9668620622076258, iteration: 301071
loss: 1.0191417932510376,grad_norm: 0.849443367713682, iteration: 301072
loss: 1.027217984199524,grad_norm: 0.8242632241853621, iteration: 301073
loss: 1.0238096714019775,grad_norm: 0.9181351303660356, iteration: 301074
loss: 0.9670546054840088,grad_norm: 0.9155325664605354, iteration: 301075
loss: 0.997894823551178,grad_norm: 0.7525060096567173, iteration: 301076
loss: 0.9979791641235352,grad_norm: 0.9611792053280198, iteration: 301077
loss: 1.0123875141143799,grad_norm: 0.8766667820503756, iteration: 301078
loss: 0.9310647249221802,grad_norm: 0.9753530047176188, iteration: 301079
loss: 0.9997527003288269,grad_norm: 0.8291513530897392, iteration: 301080
loss: 0.9753934741020203,grad_norm: 0.9310348832296396, iteration: 301081
loss: 0.9493482708930969,grad_norm: 0.8891376018556739, iteration: 301082
loss: 1.0056626796722412,grad_norm: 0.8496072467280101, iteration: 301083
loss: 1.0024378299713135,grad_norm: 0.7443375767449922, iteration: 301084
loss: 1.0372933149337769,grad_norm: 0.9422078942911193, iteration: 301085
loss: 0.9761624932289124,grad_norm: 0.802669515433593, iteration: 301086
loss: 0.9889062643051147,grad_norm: 0.8347077949171432, iteration: 301087
loss: 1.0218987464904785,grad_norm: 0.9564056915549394, iteration: 301088
loss: 0.9961605072021484,grad_norm: 0.9999991672220131, iteration: 301089
loss: 1.0058578252792358,grad_norm: 0.9237783047538426, iteration: 301090
loss: 0.9884403944015503,grad_norm: 0.770683601142897, iteration: 301091
loss: 1.0281492471694946,grad_norm: 0.8455554197085535, iteration: 301092
loss: 0.9892850518226624,grad_norm: 0.960698038947566, iteration: 301093
loss: 0.9957674741744995,grad_norm: 0.746425311751932, iteration: 301094
loss: 1.0010467767715454,grad_norm: 0.8637932495207223, iteration: 301095
loss: 0.9854375720024109,grad_norm: 0.8889863713233613, iteration: 301096
loss: 0.9950390458106995,grad_norm: 0.8862652084219569, iteration: 301097
loss: 0.9952548146247864,grad_norm: 0.8986943490967161, iteration: 301098
loss: 0.9803922176361084,grad_norm: 0.8143775034103427, iteration: 301099
loss: 0.9940482974052429,grad_norm: 0.7782198769575521, iteration: 301100
loss: 0.9870818257331848,grad_norm: 0.8713507477901101, iteration: 301101
loss: 1.0183396339416504,grad_norm: 0.9569022938352144, iteration: 301102
loss: 0.9869813323020935,grad_norm: 0.7921701681331139, iteration: 301103
loss: 0.9913550019264221,grad_norm: 0.9930787434724927, iteration: 301104
loss: 1.079724669456482,grad_norm: 0.9999996553202468, iteration: 301105
loss: 1.0129514932632446,grad_norm: 0.7724807590752482, iteration: 301106
loss: 0.9813007712364197,grad_norm: 0.7995145764675567, iteration: 301107
loss: 0.9929970502853394,grad_norm: 0.8636377588665973, iteration: 301108
loss: 1.017209768295288,grad_norm: 0.923368095783164, iteration: 301109
loss: 1.0120599269866943,grad_norm: 0.7569769969149186, iteration: 301110
loss: 1.0138325691223145,grad_norm: 0.9308178496818527, iteration: 301111
loss: 1.010693907737732,grad_norm: 0.8941213559927452, iteration: 301112
loss: 1.0027002096176147,grad_norm: 0.9035164519476336, iteration: 301113
loss: 1.002815842628479,grad_norm: 0.8958875325722326, iteration: 301114
loss: 0.9917877316474915,grad_norm: 0.9118670416897291, iteration: 301115
loss: 0.990695059299469,grad_norm: 0.8580900248261402, iteration: 301116
loss: 0.9990846514701843,grad_norm: 0.9149851008950993, iteration: 301117
loss: 1.0216642618179321,grad_norm: 0.7552728133801277, iteration: 301118
loss: 1.002389669418335,grad_norm: 0.9220110901574176, iteration: 301119
loss: 0.9958456158638,grad_norm: 0.8325176360347527, iteration: 301120
loss: 1.004309058189392,grad_norm: 0.9428067716376662, iteration: 301121
loss: 1.0687451362609863,grad_norm: 0.9999993340229469, iteration: 301122
loss: 1.0105191469192505,grad_norm: 0.8636569590094715, iteration: 301123
loss: 0.9882553815841675,grad_norm: 0.9531870143429115, iteration: 301124
loss: 0.9890779852867126,grad_norm: 0.9999991970914731, iteration: 301125
loss: 0.9621323347091675,grad_norm: 0.8922451926854277, iteration: 301126
loss: 0.98853999376297,grad_norm: 0.7857203633856258, iteration: 301127
loss: 0.9807475209236145,grad_norm: 0.7756417418127143, iteration: 301128
loss: 1.0493522882461548,grad_norm: 0.9954255986345009, iteration: 301129
loss: 0.9849616289138794,grad_norm: 0.8795135659642095, iteration: 301130
loss: 0.986354649066925,grad_norm: 0.8379569803236876, iteration: 301131
loss: 0.9891359806060791,grad_norm: 0.948585142295585, iteration: 301132
loss: 0.9826793074607849,grad_norm: 0.7204044040750115, iteration: 301133
loss: 1.001184344291687,grad_norm: 0.7276787370019842, iteration: 301134
loss: 0.998697817325592,grad_norm: 0.9306248001615582, iteration: 301135
loss: 1.0020643472671509,grad_norm: 0.8082780551410048, iteration: 301136
loss: 1.0081790685653687,grad_norm: 0.9086580147738514, iteration: 301137
loss: 1.0002576112747192,grad_norm: 0.9999991537418673, iteration: 301138
loss: 0.9611071944236755,grad_norm: 0.8419747626927652, iteration: 301139
loss: 0.9693589806556702,grad_norm: 0.9699105361739684, iteration: 301140
loss: 1.0044746398925781,grad_norm: 0.9999991844126594, iteration: 301141
loss: 0.9780648946762085,grad_norm: 0.7936462390689943, iteration: 301142
loss: 1.017251968383789,grad_norm: 0.7723316198829249, iteration: 301143
loss: 1.005927324295044,grad_norm: 0.9415089748919421, iteration: 301144
loss: 0.9772974252700806,grad_norm: 0.894531599186212, iteration: 301145
loss: 0.9929817914962769,grad_norm: 0.8101322949358917, iteration: 301146
loss: 1.0093913078308105,grad_norm: 0.9337640556689665, iteration: 301147
loss: 1.0018882751464844,grad_norm: 0.8180035317786128, iteration: 301148
loss: 1.0253342390060425,grad_norm: 0.925843269826972, iteration: 301149
loss: 1.029820203781128,grad_norm: 0.8087492423438343, iteration: 301150
loss: 0.9858459234237671,grad_norm: 0.8613409970409261, iteration: 301151
loss: 1.0249426364898682,grad_norm: 0.8110645582550634, iteration: 301152
loss: 0.9770779013633728,grad_norm: 0.8454752320927102, iteration: 301153
loss: 1.0403542518615723,grad_norm: 0.9897129894224632, iteration: 301154
loss: 1.0118178129196167,grad_norm: 0.9999991488548738, iteration: 301155
loss: 0.9685112237930298,grad_norm: 0.8846725026793693, iteration: 301156
loss: 1.0478583574295044,grad_norm: 0.9999998431083548, iteration: 301157
loss: 0.98957759141922,grad_norm: 0.8090944206685112, iteration: 301158
loss: 0.9609744548797607,grad_norm: 0.8317510179888497, iteration: 301159
loss: 0.991021990776062,grad_norm: 0.9688768056428415, iteration: 301160
loss: 1.0146327018737793,grad_norm: 0.8500937879602841, iteration: 301161
loss: 1.0006555318832397,grad_norm: 0.9999991422343022, iteration: 301162
loss: 0.9799611568450928,grad_norm: 0.825875016451231, iteration: 301163
loss: 0.9820448160171509,grad_norm: 0.8533411768358438, iteration: 301164
loss: 0.9930405020713806,grad_norm: 0.7219418058342933, iteration: 301165
loss: 1.0000759363174438,grad_norm: 0.7726528705036664, iteration: 301166
loss: 0.9931504726409912,grad_norm: 0.7404744621161881, iteration: 301167
loss: 0.9258971214294434,grad_norm: 0.8917504938849532, iteration: 301168
loss: 1.018083930015564,grad_norm: 0.8850417724768852, iteration: 301169
loss: 1.0055047273635864,grad_norm: 0.837380674097549, iteration: 301170
loss: 1.0795972347259521,grad_norm: 0.9621638249589598, iteration: 301171
loss: 1.00272536277771,grad_norm: 0.9999991781383998, iteration: 301172
loss: 0.9782941341400146,grad_norm: 0.9999992624245884, iteration: 301173
loss: 0.99623703956604,grad_norm: 0.9716916448701067, iteration: 301174
loss: 1.0083677768707275,grad_norm: 0.8732781522264976, iteration: 301175
loss: 1.038547396659851,grad_norm: 0.9287956095904147, iteration: 301176
loss: 0.9943240880966187,grad_norm: 0.8781800618959336, iteration: 301177
loss: 1.0271967649459839,grad_norm: 0.8956219097450601, iteration: 301178
loss: 1.0014642477035522,grad_norm: 0.8753413272478957, iteration: 301179
loss: 0.9954043030738831,grad_norm: 0.7573685022989873, iteration: 301180
loss: 1.0264257192611694,grad_norm: 0.8607019713165571, iteration: 301181
loss: 1.1477645635604858,grad_norm: 0.9999999946316849, iteration: 301182
loss: 0.9947551488876343,grad_norm: 0.8296184297365979, iteration: 301183
loss: 1.0249475240707397,grad_norm: 0.9999992068141742, iteration: 301184
loss: 1.0584733486175537,grad_norm: 0.9151574952541942, iteration: 301185
loss: 1.0316375494003296,grad_norm: 0.7739567054051724, iteration: 301186
loss: 1.0066125392913818,grad_norm: 0.999998945155059, iteration: 301187
loss: 0.952528178691864,grad_norm: 0.8537270421846805, iteration: 301188
loss: 1.0939533710479736,grad_norm: 0.9999994497583278, iteration: 301189
loss: 0.9959337711334229,grad_norm: 0.8921790349010925, iteration: 301190
loss: 0.999504804611206,grad_norm: 0.8673736706424207, iteration: 301191
loss: 0.977600634098053,grad_norm: 0.938746430533168, iteration: 301192
loss: 1.0231311321258545,grad_norm: 0.9748288258135325, iteration: 301193
loss: 0.9798711538314819,grad_norm: 0.7716232386166963, iteration: 301194
loss: 1.0053794384002686,grad_norm: 0.8043145914244447, iteration: 301195
loss: 1.01711905002594,grad_norm: 0.9999994648181662, iteration: 301196
loss: 1.0059231519699097,grad_norm: 0.8953953663836569, iteration: 301197
loss: 1.0142818689346313,grad_norm: 0.937337324888872, iteration: 301198
loss: 1.0526045560836792,grad_norm: 0.7944543593893645, iteration: 301199
loss: 1.0051203966140747,grad_norm: 0.8379094066658844, iteration: 301200
loss: 1.2000912427902222,grad_norm: 0.9999994998518336, iteration: 301201
loss: 1.004210114479065,grad_norm: 0.7817334884050006, iteration: 301202
loss: 1.0483269691467285,grad_norm: 0.9999992272906949, iteration: 301203
loss: 1.0031009912490845,grad_norm: 0.9999990559318784, iteration: 301204
loss: 0.9916033148765564,grad_norm: 0.8619848243273669, iteration: 301205
loss: 1.0793898105621338,grad_norm: 0.9999991026708726, iteration: 301206
loss: 0.9659280180931091,grad_norm: 0.8317942262403855, iteration: 301207
loss: 1.0005757808685303,grad_norm: 0.8546467673127025, iteration: 301208
loss: 1.091757893562317,grad_norm: 0.9816011224310532, iteration: 301209
loss: 1.0563212633132935,grad_norm: 0.9999994830004588, iteration: 301210
loss: 0.9951890110969543,grad_norm: 0.8763588447458498, iteration: 301211
loss: 0.9869101047515869,grad_norm: 0.769966679750733, iteration: 301212
loss: 0.9817307591438293,grad_norm: 0.8004902589298231, iteration: 301213
loss: 1.0866881608963013,grad_norm: 0.9999998512270811, iteration: 301214
loss: 1.131743311882019,grad_norm: 0.9999991655588172, iteration: 301215
loss: 1.0367454290390015,grad_norm: 0.9999998948630434, iteration: 301216
loss: 1.0410958528518677,grad_norm: 0.999999104479225, iteration: 301217
loss: 1.0231760740280151,grad_norm: 0.9999991300080295, iteration: 301218
loss: 1.0272786617279053,grad_norm: 0.9189823833799532, iteration: 301219
loss: 1.053205966949463,grad_norm: 0.9999993696634434, iteration: 301220
loss: 1.0974109172821045,grad_norm: 0.9999992659786748, iteration: 301221
loss: 1.0357462167739868,grad_norm: 0.9999997833423899, iteration: 301222
loss: 1.007559895515442,grad_norm: 0.9999990743029581, iteration: 301223
loss: 1.0302473306655884,grad_norm: 0.9999990813886357, iteration: 301224
loss: 0.9990025758743286,grad_norm: 0.8735159260337049, iteration: 301225
loss: 1.0187314748764038,grad_norm: 0.7181166998407702, iteration: 301226
loss: 1.0043407678604126,grad_norm: 0.8462934605815368, iteration: 301227
loss: 1.0120152235031128,grad_norm: 0.9999991439200903, iteration: 301228
loss: 1.0092087984085083,grad_norm: 0.9760178633245923, iteration: 301229
loss: 1.0124998092651367,grad_norm: 0.7967276797467522, iteration: 301230
loss: 0.9867244958877563,grad_norm: 0.9999990943897187, iteration: 301231
loss: 1.009886384010315,grad_norm: 1.0000000329015148, iteration: 301232
loss: 1.033379077911377,grad_norm: 0.8143043473833906, iteration: 301233
loss: 0.9884302020072937,grad_norm: 0.9833637280793478, iteration: 301234
loss: 1.0192105770111084,grad_norm: 0.9999997720022439, iteration: 301235
loss: 1.1363667249679565,grad_norm: 0.9999991301815097, iteration: 301236
loss: 0.9846517443656921,grad_norm: 0.8186883260055192, iteration: 301237
loss: 1.0182597637176514,grad_norm: 0.883515236959988, iteration: 301238
loss: 1.0172258615493774,grad_norm: 0.8210377908867488, iteration: 301239
loss: 0.9760269522666931,grad_norm: 0.9999990364492549, iteration: 301240
loss: 1.0314561128616333,grad_norm: 0.999999385789025, iteration: 301241
loss: 1.0018028020858765,grad_norm: 0.8226157877965746, iteration: 301242
loss: 1.0108226537704468,grad_norm: 0.7403973510944216, iteration: 301243
loss: 0.9753853678703308,grad_norm: 0.8699464977357615, iteration: 301244
loss: 0.9778497815132141,grad_norm: 0.9999991261365315, iteration: 301245
loss: 1.026043176651001,grad_norm: 0.7942335111172623, iteration: 301246
loss: 1.0280194282531738,grad_norm: 0.8454079409954576, iteration: 301247
loss: 0.9841827750205994,grad_norm: 0.8625569002079826, iteration: 301248
loss: 1.025971531867981,grad_norm: 0.71577637050346, iteration: 301249
loss: 0.993900716304779,grad_norm: 0.9605594446457382, iteration: 301250
loss: 1.0175045728683472,grad_norm: 0.9203009750773924, iteration: 301251
loss: 0.9745619893074036,grad_norm: 0.9250734106254046, iteration: 301252
loss: 1.035132646560669,grad_norm: 0.8705652839642842, iteration: 301253
loss: 0.9753108024597168,grad_norm: 0.983330545707795, iteration: 301254
loss: 0.9583110809326172,grad_norm: 0.9939181019664458, iteration: 301255
loss: 1.0255029201507568,grad_norm: 0.8071219856460126, iteration: 301256
loss: 0.9715096950531006,grad_norm: 0.9677746253245267, iteration: 301257
loss: 0.9799437522888184,grad_norm: 0.8453300965925428, iteration: 301258
loss: 1.0344583988189697,grad_norm: 0.7581857145204859, iteration: 301259
loss: 1.0118831396102905,grad_norm: 0.961615659841575, iteration: 301260
loss: 1.0016804933547974,grad_norm: 0.8098865540335568, iteration: 301261
loss: 0.9430940747261047,grad_norm: 0.810375379743447, iteration: 301262
loss: 1.028911828994751,grad_norm: 0.9140143053258396, iteration: 301263
loss: 1.0572282075881958,grad_norm: 0.8963834643865234, iteration: 301264
loss: 0.9926698803901672,grad_norm: 0.9115996157697067, iteration: 301265
loss: 0.9979295134544373,grad_norm: 0.999999024991198, iteration: 301266
loss: 0.9515472054481506,grad_norm: 0.8862841497683029, iteration: 301267
loss: 0.9834823608398438,grad_norm: 0.8139548568588182, iteration: 301268
loss: 1.0243427753448486,grad_norm: 0.8970096987588287, iteration: 301269
loss: 0.9687709808349609,grad_norm: 0.8689336879074806, iteration: 301270
loss: 0.9875310063362122,grad_norm: 0.7773137069674693, iteration: 301271
loss: 0.9877216815948486,grad_norm: 0.7237057977464629, iteration: 301272
loss: 0.9517948031425476,grad_norm: 0.8455712156218197, iteration: 301273
loss: 1.0251281261444092,grad_norm: 0.7885569460635313, iteration: 301274
loss: 0.976784884929657,grad_norm: 0.8821650412031647, iteration: 301275
loss: 0.992256224155426,grad_norm: 0.8340647679575905, iteration: 301276
loss: 1.0220296382904053,grad_norm: 0.8794678479984025, iteration: 301277
loss: 0.9987677335739136,grad_norm: 0.8365882485490959, iteration: 301278
loss: 0.9786574244499207,grad_norm: 0.7929516842888595, iteration: 301279
loss: 1.066085934638977,grad_norm: 0.999999046492547, iteration: 301280
loss: 0.9964326620101929,grad_norm: 0.8872250168263893, iteration: 301281
loss: 0.9949254393577576,grad_norm: 0.6753510232286638, iteration: 301282
loss: 1.0121886730194092,grad_norm: 0.8785595238695988, iteration: 301283
loss: 0.9742845296859741,grad_norm: 0.8575324861580929, iteration: 301284
loss: 0.9722659587860107,grad_norm: 0.7651519107615358, iteration: 301285
loss: 0.9953117966651917,grad_norm: 0.8423545151848816, iteration: 301286
loss: 1.014398455619812,grad_norm: 0.7947056592645293, iteration: 301287
loss: 0.9646434783935547,grad_norm: 0.9999992730683034, iteration: 301288
loss: 1.0188615322113037,grad_norm: 0.7959537948711245, iteration: 301289
loss: 0.9836541414260864,grad_norm: 0.8502285435292279, iteration: 301290
loss: 1.0193278789520264,grad_norm: 0.9276853649525136, iteration: 301291
loss: 1.0225306749343872,grad_norm: 0.864126682584719, iteration: 301292
loss: 0.991905927658081,grad_norm: 0.7495916198164064, iteration: 301293
loss: 1.0155123472213745,grad_norm: 0.9892184847470242, iteration: 301294
loss: 1.0031688213348389,grad_norm: 0.8020324764526982, iteration: 301295
loss: 0.9647481441497803,grad_norm: 0.8293690541725427, iteration: 301296
loss: 1.0115019083023071,grad_norm: 0.7615385230326723, iteration: 301297
loss: 0.9928267002105713,grad_norm: 0.8794792228907821, iteration: 301298
loss: 1.0213552713394165,grad_norm: 0.8481250341882124, iteration: 301299
loss: 0.9734885692596436,grad_norm: 0.8009092105045471, iteration: 301300
loss: 1.0028210878372192,grad_norm: 0.7288100835042467, iteration: 301301
loss: 1.090864896774292,grad_norm: 0.9999998013451578, iteration: 301302
loss: 1.007264256477356,grad_norm: 0.8480649829241029, iteration: 301303
loss: 0.9756571054458618,grad_norm: 0.9999994314951698, iteration: 301304
loss: 0.9708741903305054,grad_norm: 0.9064527918023192, iteration: 301305
loss: 0.9937185049057007,grad_norm: 0.9537458952489929, iteration: 301306
loss: 1.0212446451187134,grad_norm: 0.8216928050573441, iteration: 301307
loss: 1.003219723701477,grad_norm: 0.8428516528506987, iteration: 301308
loss: 1.0031967163085938,grad_norm: 0.9999990611586673, iteration: 301309
loss: 1.0226337909698486,grad_norm: 0.936135088224912, iteration: 301310
loss: 0.9963698983192444,grad_norm: 0.921766626290449, iteration: 301311
loss: 1.0098543167114258,grad_norm: 0.7205490773543828, iteration: 301312
loss: 1.069862723350525,grad_norm: 0.9769357012765414, iteration: 301313
loss: 1.0474720001220703,grad_norm: 0.9999998899644437, iteration: 301314
loss: 0.991815447807312,grad_norm: 0.9999989818844385, iteration: 301315
loss: 1.0316303968429565,grad_norm: 0.8661543887962423, iteration: 301316
loss: 1.0178133249282837,grad_norm: 0.9999992129363493, iteration: 301317
loss: 1.0077762603759766,grad_norm: 0.7597477009107756, iteration: 301318
loss: 1.0507211685180664,grad_norm: 0.8563024072787335, iteration: 301319
loss: 1.0214314460754395,grad_norm: 0.9656266109925247, iteration: 301320
loss: 1.0231419801712036,grad_norm: 0.7311094803963318, iteration: 301321
loss: 1.0331491231918335,grad_norm: 0.8703612895796471, iteration: 301322
loss: 1.000215768814087,grad_norm: 0.8645257072582021, iteration: 301323
loss: 0.9978495240211487,grad_norm: 0.7935574687902375, iteration: 301324
loss: 0.9758737683296204,grad_norm: 0.8303134170077319, iteration: 301325
loss: 1.0090372562408447,grad_norm: 0.7349339481601062, iteration: 301326
loss: 1.0647494792938232,grad_norm: 0.9999998953958691, iteration: 301327
loss: 1.0236880779266357,grad_norm: 0.9215610876324697, iteration: 301328
loss: 1.038734793663025,grad_norm: 0.9999991795507487, iteration: 301329
loss: 1.0090101957321167,grad_norm: 0.8309315315999244, iteration: 301330
loss: 1.0073310136795044,grad_norm: 0.7654252404899919, iteration: 301331
loss: 0.9737573862075806,grad_norm: 0.8448823840845124, iteration: 301332
loss: 1.1129180192947388,grad_norm: 0.8990839795533295, iteration: 301333
loss: 1.0315780639648438,grad_norm: 0.9930966527801602, iteration: 301334
loss: 1.0150638818740845,grad_norm: 0.8904863446734391, iteration: 301335
loss: 0.9877941012382507,grad_norm: 0.7995145701335216, iteration: 301336
loss: 0.9922098517417908,grad_norm: 0.727677900418394, iteration: 301337
loss: 1.0831570625305176,grad_norm: 0.99999939047665, iteration: 301338
loss: 1.0089356899261475,grad_norm: 0.7684101734788044, iteration: 301339
loss: 0.9942909479141235,grad_norm: 0.8593588273599231, iteration: 301340
loss: 1.2093158960342407,grad_norm: 0.9999995067605917, iteration: 301341
loss: 0.9775877594947815,grad_norm: 0.999999136053773, iteration: 301342
loss: 1.016771674156189,grad_norm: 0.8227840976791959, iteration: 301343
loss: 1.0049699544906616,grad_norm: 0.9999998366971368, iteration: 301344
loss: 1.0063010454177856,grad_norm: 0.9999991561355046, iteration: 301345
loss: 0.999323844909668,grad_norm: 0.8661245216072564, iteration: 301346
loss: 0.9840331673622131,grad_norm: 0.9999993572457593, iteration: 301347
loss: 1.0060346126556396,grad_norm: 0.9999991306151523, iteration: 301348
loss: 0.9946805238723755,grad_norm: 0.8041442632315127, iteration: 301349
loss: 1.031566858291626,grad_norm: 0.9999990707238064, iteration: 301350
loss: 0.9829057455062866,grad_norm: 0.7635244826103866, iteration: 301351
loss: 0.9975574016571045,grad_norm: 0.697597409459659, iteration: 301352
loss: 0.9825024008750916,grad_norm: 0.9999991784202993, iteration: 301353
loss: 1.0221174955368042,grad_norm: 0.8425983868584238, iteration: 301354
loss: 0.9733359813690186,grad_norm: 0.8598267562659785, iteration: 301355
loss: 0.9665030241012573,grad_norm: 0.75880276793323, iteration: 301356
loss: 0.9958680868148804,grad_norm: 0.8404048688308966, iteration: 301357
loss: 1.00171959400177,grad_norm: 0.9999991982145678, iteration: 301358
loss: 0.9989784955978394,grad_norm: 0.7957658158352372, iteration: 301359
loss: 0.991081178188324,grad_norm: 0.6891748947936671, iteration: 301360
loss: 1.0299222469329834,grad_norm: 0.9196463304149745, iteration: 301361
loss: 1.0002459287643433,grad_norm: 0.7669447343442345, iteration: 301362
loss: 1.0150704383850098,grad_norm: 0.6880334617923846, iteration: 301363
loss: 0.9871176481246948,grad_norm: 0.8186542680175177, iteration: 301364
loss: 1.0481302738189697,grad_norm: 0.7392940345937026, iteration: 301365
loss: 0.9758419990539551,grad_norm: 0.819557439769548, iteration: 301366
loss: 1.028055191040039,grad_norm: 0.999999095723145, iteration: 301367
loss: 1.0287144184112549,grad_norm: 0.97017704906927, iteration: 301368
loss: 0.9689393639564514,grad_norm: 0.999999415225132, iteration: 301369
loss: 1.0069996118545532,grad_norm: 0.9999990828260494, iteration: 301370
loss: 1.004117727279663,grad_norm: 0.7698593838705479, iteration: 301371
loss: 1.0281095504760742,grad_norm: 0.9814248436967274, iteration: 301372
loss: 1.0020052194595337,grad_norm: 0.923837981898935, iteration: 301373
loss: 0.993006706237793,grad_norm: 0.8594413162939168, iteration: 301374
loss: 0.9879626631736755,grad_norm: 0.9952649204326923, iteration: 301375
loss: 0.9826774001121521,grad_norm: 0.8493065263579043, iteration: 301376
loss: 1.0551598072052002,grad_norm: 0.9983713510866844, iteration: 301377
loss: 1.0195226669311523,grad_norm: 0.9999993063129348, iteration: 301378
loss: 1.0035709142684937,grad_norm: 0.8388542703317448, iteration: 301379
loss: 0.9747284650802612,grad_norm: 0.8624326864132824, iteration: 301380
loss: 0.9674711227416992,grad_norm: 0.9193896495731434, iteration: 301381
loss: 1.03116774559021,grad_norm: 0.7608551671502057, iteration: 301382
loss: 0.9655664563179016,grad_norm: 0.7499817187214839, iteration: 301383
loss: 0.9846633076667786,grad_norm: 0.7882220271640955, iteration: 301384
loss: 0.9878430366516113,grad_norm: 0.9999992063059504, iteration: 301385
loss: 1.016772985458374,grad_norm: 0.9054196815902145, iteration: 301386
loss: 1.0269615650177002,grad_norm: 0.8250756206580181, iteration: 301387
loss: 1.0163352489471436,grad_norm: 0.7686745594020625, iteration: 301388
loss: 0.9936724901199341,grad_norm: 0.9754161132359088, iteration: 301389
loss: 1.0070043802261353,grad_norm: 0.9999992664629496, iteration: 301390
loss: 1.0307940244674683,grad_norm: 0.7678553756800287, iteration: 301391
loss: 0.9822946190834045,grad_norm: 0.8405317764626061, iteration: 301392
loss: 0.997658908367157,grad_norm: 0.8373432317365984, iteration: 301393
loss: 1.012975811958313,grad_norm: 0.975748020993728, iteration: 301394
loss: 1.0703190565109253,grad_norm: 0.9999992105800425, iteration: 301395
loss: 0.9858483076095581,grad_norm: 0.8640318168241307, iteration: 301396
loss: 1.0215049982070923,grad_norm: 0.872121615505502, iteration: 301397
loss: 0.9870343208312988,grad_norm: 0.8421622005112696, iteration: 301398
loss: 0.9753438234329224,grad_norm: 0.8697327395503807, iteration: 301399
loss: 0.9799308180809021,grad_norm: 0.8309726224355788, iteration: 301400
loss: 0.9784473180770874,grad_norm: 0.8900724721040755, iteration: 301401
loss: 1.023911714553833,grad_norm: 0.9030921767276906, iteration: 301402
loss: 1.0062294006347656,grad_norm: 0.9999991572837253, iteration: 301403
loss: 0.9587979912757874,grad_norm: 0.6821668391823373, iteration: 301404
loss: 1.0110504627227783,grad_norm: 0.9355552766339567, iteration: 301405
loss: 0.9991968870162964,grad_norm: 0.8681161306800347, iteration: 301406
loss: 0.9816010594367981,grad_norm: 0.9322268627336755, iteration: 301407
loss: 0.9999138712882996,grad_norm: 0.7575220644027401, iteration: 301408
loss: 1.0495572090148926,grad_norm: 0.9999994240973145, iteration: 301409
loss: 0.976771891117096,grad_norm: 0.9024447922555798, iteration: 301410
loss: 1.0377247333526611,grad_norm: 0.8928017128249294, iteration: 301411
loss: 0.9889161586761475,grad_norm: 0.8352512271307008, iteration: 301412
loss: 1.0176217555999756,grad_norm: 0.9999991993074479, iteration: 301413
loss: 1.03371000289917,grad_norm: 0.9727302309797342, iteration: 301414
loss: 0.996341347694397,grad_norm: 0.9999989923862559, iteration: 301415
loss: 0.9842041730880737,grad_norm: 0.7288103702390832, iteration: 301416
loss: 0.9521684646606445,grad_norm: 0.8455551852941285, iteration: 301417
loss: 0.9929336309432983,grad_norm: 0.9658336521759451, iteration: 301418
loss: 1.0111817121505737,grad_norm: 0.7289704425193075, iteration: 301419
loss: 1.0098974704742432,grad_norm: 0.9999996112445604, iteration: 301420
loss: 1.0112602710723877,grad_norm: 0.7400723382723416, iteration: 301421
loss: 1.027313232421875,grad_norm: 0.8926824009322423, iteration: 301422
loss: 0.9905762076377869,grad_norm: 0.8134514460071541, iteration: 301423
loss: 0.9877839088439941,grad_norm: 0.900817180071524, iteration: 301424
loss: 0.994915783405304,grad_norm: 0.888328080199322, iteration: 301425
loss: 1.0068436861038208,grad_norm: 0.920156328156472, iteration: 301426
loss: 0.9991777539253235,grad_norm: 0.7602396443333435, iteration: 301427
loss: 1.0067731142044067,grad_norm: 0.99999902338354, iteration: 301428
loss: 0.9797566533088684,grad_norm: 0.9498611138507421, iteration: 301429
loss: 1.0318596363067627,grad_norm: 0.946048009700839, iteration: 301430
loss: 1.0283050537109375,grad_norm: 0.9999990538294127, iteration: 301431
loss: 0.9946240186691284,grad_norm: 0.9840642223951167, iteration: 301432
loss: 1.0090587139129639,grad_norm: 0.8835403504031842, iteration: 301433
loss: 1.0131537914276123,grad_norm: 0.7349058607484753, iteration: 301434
loss: 1.007170557975769,grad_norm: 0.8405283105377469, iteration: 301435
loss: 1.0027202367782593,grad_norm: 0.8507874677593222, iteration: 301436
loss: 1.0048141479492188,grad_norm: 0.9999990772002019, iteration: 301437
loss: 1.000719666481018,grad_norm: 0.9999991491439347, iteration: 301438
loss: 0.9568203091621399,grad_norm: 0.8744676196962196, iteration: 301439
loss: 1.009738564491272,grad_norm: 0.9817862835821605, iteration: 301440
loss: 1.0288487672805786,grad_norm: 0.8351883366658072, iteration: 301441
loss: 0.9708374738693237,grad_norm: 0.7635363150016998, iteration: 301442
loss: 0.9895473718643188,grad_norm: 0.8109833923223344, iteration: 301443
loss: 1.000383734703064,grad_norm: 0.7687044680797146, iteration: 301444
loss: 1.005919337272644,grad_norm: 0.9267643700288853, iteration: 301445
loss: 1.0059092044830322,grad_norm: 0.8412060360189522, iteration: 301446
loss: 1.03080415725708,grad_norm: 0.9999993700645632, iteration: 301447
loss: 0.9966302514076233,grad_norm: 0.9999990591219378, iteration: 301448
loss: 0.9797500371932983,grad_norm: 0.8527483259057982, iteration: 301449
loss: 0.9716767072677612,grad_norm: 0.8596487377873754, iteration: 301450
loss: 1.0307153463363647,grad_norm: 0.7942189801863162, iteration: 301451
loss: 0.9922216534614563,grad_norm: 0.8619980425317856, iteration: 301452
loss: 1.027045726776123,grad_norm: 0.8687257144087418, iteration: 301453
loss: 0.9888176321983337,grad_norm: 0.922448559810466, iteration: 301454
loss: 0.9662466645240784,grad_norm: 0.72914347947446, iteration: 301455
loss: 0.9599295854568481,grad_norm: 0.834676965324363, iteration: 301456
loss: 0.9792563319206238,grad_norm: 0.9527335618326512, iteration: 301457
loss: 0.9803324341773987,grad_norm: 0.8751474699916132, iteration: 301458
loss: 0.9899541139602661,grad_norm: 0.7875385441755917, iteration: 301459
loss: 0.9859539270401001,grad_norm: 0.895663398990927, iteration: 301460
loss: 0.9899858832359314,grad_norm: 0.7491343612998426, iteration: 301461
loss: 0.9908666014671326,grad_norm: 0.9148101307489441, iteration: 301462
loss: 1.2111504077911377,grad_norm: 0.9999994851102673, iteration: 301463
loss: 0.9843466281890869,grad_norm: 0.9729347274816779, iteration: 301464
loss: 0.9674935340881348,grad_norm: 0.7748799607703427, iteration: 301465
loss: 1.0078513622283936,grad_norm: 0.8447491842200184, iteration: 301466
loss: 0.9746451377868652,grad_norm: 0.9128843325274253, iteration: 301467
loss: 1.0017093420028687,grad_norm: 0.8793521641066449, iteration: 301468
loss: 1.0174548625946045,grad_norm: 0.9012307194860033, iteration: 301469
loss: 1.0434306859970093,grad_norm: 0.8982204921860624, iteration: 301470
loss: 0.9912123680114746,grad_norm: 0.8592602742600021, iteration: 301471
loss: 1.0130341053009033,grad_norm: 0.8223365815462692, iteration: 301472
loss: 1.0106678009033203,grad_norm: 0.8599288956081642, iteration: 301473
loss: 1.0131916999816895,grad_norm: 0.862848154727954, iteration: 301474
loss: 0.9576637744903564,grad_norm: 0.991122996285902, iteration: 301475
loss: 0.9759946465492249,grad_norm: 0.8805247447237619, iteration: 301476
loss: 1.0441027879714966,grad_norm: 0.8144820698907717, iteration: 301477
loss: 1.0387766361236572,grad_norm: 0.9999992935113625, iteration: 301478
loss: 1.0309885740280151,grad_norm: 0.8468279581614078, iteration: 301479
loss: 0.9939870834350586,grad_norm: 0.8640754026649591, iteration: 301480
loss: 1.0906659364700317,grad_norm: 0.9999992186429674, iteration: 301481
loss: 1.0206151008605957,grad_norm: 0.9304267514349773, iteration: 301482
loss: 0.989084780216217,grad_norm: 0.8586733773737031, iteration: 301483
loss: 0.9847153425216675,grad_norm: 0.9529663692563911, iteration: 301484
loss: 0.9861117601394653,grad_norm: 0.8178349522414421, iteration: 301485
loss: 0.9935947060585022,grad_norm: 0.8062942617627923, iteration: 301486
loss: 0.9894042611122131,grad_norm: 0.8840222413765615, iteration: 301487
loss: 1.0066840648651123,grad_norm: 0.9999990142760754, iteration: 301488
loss: 1.0276004076004028,grad_norm: 0.9008262693504265, iteration: 301489
loss: 0.978869616985321,grad_norm: 0.8379955316655573, iteration: 301490
loss: 0.955207884311676,grad_norm: 0.8902672570416628, iteration: 301491
loss: 0.9745982885360718,grad_norm: 0.9999990600524101, iteration: 301492
loss: 0.9987945556640625,grad_norm: 0.775202638324119, iteration: 301493
loss: 1.0202019214630127,grad_norm: 0.7832640153632607, iteration: 301494
loss: 0.9801790118217468,grad_norm: 0.9999997463971241, iteration: 301495
loss: 0.9721055626869202,grad_norm: 0.8050855075120684, iteration: 301496
loss: 0.9817575812339783,grad_norm: 0.7773596680963892, iteration: 301497
loss: 1.065293312072754,grad_norm: 0.8011651539615697, iteration: 301498
loss: 0.9855512976646423,grad_norm: 0.9715585875483688, iteration: 301499
loss: 1.0249255895614624,grad_norm: 0.999999217847553, iteration: 301500
loss: 0.9872364401817322,grad_norm: 0.8555279468484199, iteration: 301501
loss: 1.0012129545211792,grad_norm: 0.7240049920623168, iteration: 301502
loss: 0.9996616244316101,grad_norm: 0.8457674591432228, iteration: 301503
loss: 1.008876919746399,grad_norm: 0.9999990517528964, iteration: 301504
loss: 1.0153422355651855,grad_norm: 0.928960512412398, iteration: 301505
loss: 0.9911919236183167,grad_norm: 0.9660133652169904, iteration: 301506
loss: 1.04390287399292,grad_norm: 0.9758052881910362, iteration: 301507
loss: 1.0005052089691162,grad_norm: 0.9372433916955035, iteration: 301508
loss: 1.0122549533843994,grad_norm: 0.8515144542311831, iteration: 301509
loss: 1.039163589477539,grad_norm: 0.8528308293370618, iteration: 301510
loss: 0.9945138692855835,grad_norm: 0.955160123682556, iteration: 301511
loss: 0.9815002083778381,grad_norm: 0.8194714796515694, iteration: 301512
loss: 1.000373363494873,grad_norm: 0.8710321485409239, iteration: 301513
loss: 1.0168628692626953,grad_norm: 0.9999990987196836, iteration: 301514
loss: 0.992854118347168,grad_norm: 0.8148003189668137, iteration: 301515
loss: 0.9985740780830383,grad_norm: 0.7992202569685255, iteration: 301516
loss: 0.9935411810874939,grad_norm: 0.8127894357254632, iteration: 301517
loss: 0.9895056486129761,grad_norm: 0.9808922568101509, iteration: 301518
loss: 1.0699703693389893,grad_norm: 0.9499505831021199, iteration: 301519
loss: 1.0004186630249023,grad_norm: 0.9999992431969831, iteration: 301520
loss: 0.9868699908256531,grad_norm: 0.8453971203909416, iteration: 301521
loss: 0.9911649823188782,grad_norm: 0.8697712914507172, iteration: 301522
loss: 0.9975915551185608,grad_norm: 0.8329249983001807, iteration: 301523
loss: 0.9739956855773926,grad_norm: 0.8442481150439999, iteration: 301524
loss: 1.051565170288086,grad_norm: 0.9999990939968779, iteration: 301525
loss: 1.0166317224502563,grad_norm: 0.8076397436106939, iteration: 301526
loss: 1.004727840423584,grad_norm: 0.8407357788610579, iteration: 301527
loss: 0.9869953393936157,grad_norm: 0.8580572733102606, iteration: 301528
loss: 1.02128267288208,grad_norm: 0.7911141450670697, iteration: 301529
loss: 1.0204212665557861,grad_norm: 0.9100720860758597, iteration: 301530
loss: 0.9808698296546936,grad_norm: 0.9505552561808149, iteration: 301531
loss: 0.990473210811615,grad_norm: 0.8979311929138406, iteration: 301532
loss: 0.9678308367729187,grad_norm: 0.9999990696564601, iteration: 301533
loss: 0.9867095351219177,grad_norm: 0.8410799502282331, iteration: 301534
loss: 1.0309752225875854,grad_norm: 0.8167156364028156, iteration: 301535
loss: 0.9917760491371155,grad_norm: 0.8988283415011724, iteration: 301536
loss: 1.0023448467254639,grad_norm: 0.9276743870976205, iteration: 301537
loss: 1.034164309501648,grad_norm: 0.9999990660578643, iteration: 301538
loss: 0.9967483282089233,grad_norm: 0.9999991830986439, iteration: 301539
loss: 0.9710879921913147,grad_norm: 0.7797999951426071, iteration: 301540
loss: 1.0213193893432617,grad_norm: 0.8193312971435811, iteration: 301541
loss: 0.9999206066131592,grad_norm: 0.9780735129959427, iteration: 301542
loss: 1.0278810262680054,grad_norm: 0.9810060806324735, iteration: 301543
loss: 0.9758970737457275,grad_norm: 0.7380955680601672, iteration: 301544
loss: 1.0216562747955322,grad_norm: 0.774312551756103, iteration: 301545
loss: 1.0069928169250488,grad_norm: 0.8665784438200058, iteration: 301546
loss: 1.007239818572998,grad_norm: 0.9999989802843215, iteration: 301547
loss: 1.0056695938110352,grad_norm: 0.8103779032315822, iteration: 301548
loss: 1.1254597902297974,grad_norm: 0.9999991232915111, iteration: 301549
loss: 0.9842351078987122,grad_norm: 0.7948615477365868, iteration: 301550
loss: 1.011451005935669,grad_norm: 0.9999991165888539, iteration: 301551
loss: 0.9995573163032532,grad_norm: 0.9184482014825478, iteration: 301552
loss: 1.0160902738571167,grad_norm: 0.8276651189482257, iteration: 301553
loss: 1.009270191192627,grad_norm: 0.9992454097516762, iteration: 301554
loss: 1.0042695999145508,grad_norm: 0.9999991115995631, iteration: 301555
loss: 0.9737730026245117,grad_norm: 0.7697850373199927, iteration: 301556
loss: 0.9740686416625977,grad_norm: 0.65668215933289, iteration: 301557
loss: 0.9794577360153198,grad_norm: 0.8892118741786588, iteration: 301558
loss: 0.9889681339263916,grad_norm: 0.7617858336063198, iteration: 301559
loss: 0.9966080784797668,grad_norm: 0.999998978654124, iteration: 301560
loss: 1.0188462734222412,grad_norm: 0.8089477224155214, iteration: 301561
loss: 0.9839997887611389,grad_norm: 0.8029216867579697, iteration: 301562
loss: 0.9895251989364624,grad_norm: 0.8030526049806219, iteration: 301563
loss: 0.9888105988502502,grad_norm: 0.9789083838362213, iteration: 301564
loss: 1.019499659538269,grad_norm: 0.9588264399159927, iteration: 301565
loss: 0.9851135015487671,grad_norm: 0.8045271498362598, iteration: 301566
loss: 1.0137966871261597,grad_norm: 0.9999998579120569, iteration: 301567
loss: 0.9794449806213379,grad_norm: 0.8653278770478724, iteration: 301568
loss: 0.9979389905929565,grad_norm: 0.8723668762028439, iteration: 301569
loss: 0.9925873875617981,grad_norm: 0.8193948730168337, iteration: 301570
loss: 0.9744206666946411,grad_norm: 0.8710553995420223, iteration: 301571
loss: 1.022594928741455,grad_norm: 0.8572631204435034, iteration: 301572
loss: 1.017680048942566,grad_norm: 0.9999990797463135, iteration: 301573
loss: 1.0308637619018555,grad_norm: 0.9616668316633775, iteration: 301574
loss: 1.0119075775146484,grad_norm: 0.9263011408524628, iteration: 301575
loss: 1.0117355585098267,grad_norm: 0.8879427658061791, iteration: 301576
loss: 0.9964439868927002,grad_norm: 0.7500384609569398, iteration: 301577
loss: 1.0138697624206543,grad_norm: 0.8798188609206098, iteration: 301578
loss: 0.9414322972297668,grad_norm: 0.7649422685687932, iteration: 301579
loss: 0.954158365726471,grad_norm: 0.813490911423249, iteration: 301580
loss: 0.963578999042511,grad_norm: 0.8450135991015567, iteration: 301581
loss: 1.0010231733322144,grad_norm: 0.8862873644151098, iteration: 301582
loss: 1.036286473274231,grad_norm: 0.7992051912005896, iteration: 301583
loss: 0.9818176031112671,grad_norm: 0.9999991160612002, iteration: 301584
loss: 1.037078619003296,grad_norm: 0.9028312651232134, iteration: 301585
loss: 1.027016043663025,grad_norm: 0.7708864143869242, iteration: 301586
loss: 1.0036197900772095,grad_norm: 0.7893892176843067, iteration: 301587
loss: 1.0277187824249268,grad_norm: 0.9236397953260839, iteration: 301588
loss: 0.9537674784660339,grad_norm: 0.8648881649194092, iteration: 301589
loss: 1.021155595779419,grad_norm: 0.8503131587215185, iteration: 301590
loss: 0.9934418797492981,grad_norm: 0.8373386748631011, iteration: 301591
loss: 0.9925739765167236,grad_norm: 0.8503500815852614, iteration: 301592
loss: 1.0491008758544922,grad_norm: 0.97248527048085, iteration: 301593
loss: 0.968991756439209,grad_norm: 0.9296512594097704, iteration: 301594
loss: 0.9752820134162903,grad_norm: 0.7332535925729418, iteration: 301595
loss: 1.0022177696228027,grad_norm: 0.9691238996210979, iteration: 301596
loss: 1.0399205684661865,grad_norm: 0.7242590706241828, iteration: 301597
loss: 1.0048866271972656,grad_norm: 0.9158817245485642, iteration: 301598
loss: 1.0041345357894897,grad_norm: 0.8278505096111115, iteration: 301599
loss: 1.0356247425079346,grad_norm: 0.7905264555672258, iteration: 301600
loss: 0.9936807155609131,grad_norm: 0.733222254107786, iteration: 301601
loss: 1.0049388408660889,grad_norm: 0.7947534421473478, iteration: 301602
loss: 0.9834198951721191,grad_norm: 0.7947982985513461, iteration: 301603
loss: 1.0283105373382568,grad_norm: 0.9999993214001522, iteration: 301604
loss: 1.0378947257995605,grad_norm: 0.9074274931503188, iteration: 301605
loss: 0.9933498501777649,grad_norm: 0.856189898632264, iteration: 301606
loss: 1.0515209436416626,grad_norm: 0.9364840159373501, iteration: 301607
loss: 0.9596632122993469,grad_norm: 0.9740463417676071, iteration: 301608
loss: 0.9737687706947327,grad_norm: 0.999999122367519, iteration: 301609
loss: 0.9786502718925476,grad_norm: 0.7916959074671369, iteration: 301610
loss: 0.967893123626709,grad_norm: 0.8205878460761202, iteration: 301611
loss: 0.9867150783538818,grad_norm: 0.8538204450248246, iteration: 301612
loss: 1.1622084379196167,grad_norm: 0.9999995746035157, iteration: 301613
loss: 1.0303610563278198,grad_norm: 0.7393738230761548, iteration: 301614
loss: 1.0015629529953003,grad_norm: 0.8336810870608488, iteration: 301615
loss: 0.9788578152656555,grad_norm: 0.8047307098977087, iteration: 301616
loss: 0.9960479140281677,grad_norm: 0.9999992039910198, iteration: 301617
loss: 0.9629382491111755,grad_norm: 0.7554513011881224, iteration: 301618
loss: 1.0262051820755005,grad_norm: 0.8514083664550344, iteration: 301619
loss: 1.0190176963806152,grad_norm: 0.881537314984094, iteration: 301620
loss: 0.9890038371086121,grad_norm: 0.7987109886780971, iteration: 301621
loss: 0.9804189205169678,grad_norm: 0.7675135584405427, iteration: 301622
loss: 0.9977292418479919,grad_norm: 0.9999991424065907, iteration: 301623
loss: 1.011441707611084,grad_norm: 0.9762722504746668, iteration: 301624
loss: 1.0114705562591553,grad_norm: 0.7548163794501692, iteration: 301625
loss: 0.9614721536636353,grad_norm: 0.826726297377808, iteration: 301626
loss: 0.9979990124702454,grad_norm: 0.7642231609241003, iteration: 301627
loss: 0.9689443707466125,grad_norm: 0.8680215598679281, iteration: 301628
loss: 0.9909696578979492,grad_norm: 0.9999990428652433, iteration: 301629
loss: 0.9948806762695312,grad_norm: 0.9610434714934862, iteration: 301630
loss: 1.0308040380477905,grad_norm: 0.9999990730939757, iteration: 301631
loss: 0.9623119831085205,grad_norm: 0.9253352501579716, iteration: 301632
loss: 1.0267093181610107,grad_norm: 0.9005656618421529, iteration: 301633
loss: 1.0383752584457397,grad_norm: 0.8585869928408665, iteration: 301634
loss: 1.02676260471344,grad_norm: 0.8020273889988186, iteration: 301635
loss: 0.9951679706573486,grad_norm: 0.7215572003444384, iteration: 301636
loss: 0.9845572113990784,grad_norm: 0.7839718018662358, iteration: 301637
loss: 0.9718917608261108,grad_norm: 0.7599564392187863, iteration: 301638
loss: 0.9707559943199158,grad_norm: 0.9690695523861133, iteration: 301639
loss: 1.0226614475250244,grad_norm: 0.8897347575589621, iteration: 301640
loss: 1.0013649463653564,grad_norm: 0.8720046088958474, iteration: 301641
loss: 0.9889038801193237,grad_norm: 0.8870217759491804, iteration: 301642
loss: 0.9721606969833374,grad_norm: 0.7103123503159271, iteration: 301643
loss: 1.014503836631775,grad_norm: 0.8547083249711135, iteration: 301644
loss: 1.0389162302017212,grad_norm: 0.8451018623102535, iteration: 301645
loss: 0.9720861315727234,grad_norm: 0.8994303902044278, iteration: 301646
loss: 1.0259343385696411,grad_norm: 0.8686237536450799, iteration: 301647
loss: 1.0059973001480103,grad_norm: 0.8166418688182686, iteration: 301648
loss: 1.0157901048660278,grad_norm: 0.9999992628941522, iteration: 301649
loss: 1.0180238485336304,grad_norm: 0.7854395968561074, iteration: 301650
loss: 1.0005922317504883,grad_norm: 0.8222730509801023, iteration: 301651
loss: 0.9906027317047119,grad_norm: 0.9776246745897486, iteration: 301652
loss: 0.979255735874176,grad_norm: 0.7410341748401719, iteration: 301653
loss: 0.9863564968109131,grad_norm: 0.9999991777513585, iteration: 301654
loss: 0.9949505925178528,grad_norm: 0.9575387946963627, iteration: 301655
loss: 0.9851983785629272,grad_norm: 0.9244961464205215, iteration: 301656
loss: 1.0398656129837036,grad_norm: 0.9776362087562667, iteration: 301657
loss: 0.9780596494674683,grad_norm: 0.9999989926361303, iteration: 301658
loss: 0.9911932945251465,grad_norm: 0.8735699608970429, iteration: 301659
loss: 0.9798588156700134,grad_norm: 0.8411661940850932, iteration: 301660
loss: 1.0032563209533691,grad_norm: 0.8897221041470861, iteration: 301661
loss: 0.9793614745140076,grad_norm: 0.899685738477185, iteration: 301662
loss: 0.9599542617797852,grad_norm: 0.8409348331537388, iteration: 301663
loss: 0.993863046169281,grad_norm: 0.9299393199020832, iteration: 301664
loss: 0.9877012968063354,grad_norm: 0.9541584594858894, iteration: 301665
loss: 1.0116969347000122,grad_norm: 0.7218037863042781, iteration: 301666
loss: 1.0292905569076538,grad_norm: 0.9999992867587922, iteration: 301667
loss: 0.980824887752533,grad_norm: 0.8422487104920605, iteration: 301668
loss: 1.0142556428909302,grad_norm: 0.914666424295721, iteration: 301669
loss: 1.0194621086120605,grad_norm: 0.8446450750689525, iteration: 301670
loss: 1.0679863691329956,grad_norm: 0.9999992223193986, iteration: 301671
loss: 0.9675533771514893,grad_norm: 0.9165551319169848, iteration: 301672
loss: 1.0355397462844849,grad_norm: 0.7497561663246954, iteration: 301673
loss: 1.0133870840072632,grad_norm: 0.6494809917669961, iteration: 301674
loss: 0.9931992292404175,grad_norm: 0.9944182342198349, iteration: 301675
loss: 0.9961497187614441,grad_norm: 0.9771841729223495, iteration: 301676
loss: 0.9586377143859863,grad_norm: 0.8099498989570566, iteration: 301677
loss: 0.9680472016334534,grad_norm: 0.8984413606824283, iteration: 301678
loss: 1.0167354345321655,grad_norm: 0.9008575795797892, iteration: 301679
loss: 1.052157998085022,grad_norm: 0.935113423742667, iteration: 301680
loss: 0.9851633310317993,grad_norm: 0.7943978922479882, iteration: 301681
loss: 1.0012271404266357,grad_norm: 0.8242733998304493, iteration: 301682
loss: 1.0133532285690308,grad_norm: 0.8966497155717607, iteration: 301683
loss: 0.9694973826408386,grad_norm: 0.9999997698057695, iteration: 301684
loss: 0.950770914554596,grad_norm: 0.7323929414955738, iteration: 301685
loss: 0.9579746127128601,grad_norm: 0.9025699728429382, iteration: 301686
loss: 1.0138517618179321,grad_norm: 0.8352640044433681, iteration: 301687
loss: 0.9458613991737366,grad_norm: 0.885476236326638, iteration: 301688
loss: 1.0084120035171509,grad_norm: 0.831421581720658, iteration: 301689
loss: 0.9966201782226562,grad_norm: 0.8844122000695233, iteration: 301690
loss: 0.996906042098999,grad_norm: 0.7832734478284111, iteration: 301691
loss: 1.0381813049316406,grad_norm: 0.7389767793159426, iteration: 301692
loss: 1.0146125555038452,grad_norm: 0.9064066095115906, iteration: 301693
loss: 1.0146050453186035,grad_norm: 0.9330184824733535, iteration: 301694
loss: 1.0239977836608887,grad_norm: 0.9058582765007599, iteration: 301695
loss: 1.024655818939209,grad_norm: 0.9600440007491162, iteration: 301696
loss: 0.9768908023834229,grad_norm: 0.9999990431127285, iteration: 301697
loss: 0.9818755388259888,grad_norm: 0.7776187859469749, iteration: 301698
loss: 1.039313793182373,grad_norm: 0.8713695059261672, iteration: 301699
loss: 1.0100970268249512,grad_norm: 0.9571597046173279, iteration: 301700
loss: 1.0005321502685547,grad_norm: 0.7623399275831311, iteration: 301701
loss: 0.9937838315963745,grad_norm: 0.8891107456520905, iteration: 301702
loss: 1.0343945026397705,grad_norm: 0.8272255419616134, iteration: 301703
loss: 1.0068899393081665,grad_norm: 0.8402299023501264, iteration: 301704
loss: 1.0260889530181885,grad_norm: 0.9558949559801574, iteration: 301705
loss: 1.005096673965454,grad_norm: 0.9999993555113033, iteration: 301706
loss: 1.032186508178711,grad_norm: 0.8029281356919592, iteration: 301707
loss: 1.004327654838562,grad_norm: 0.9706508769071481, iteration: 301708
loss: 1.0174977779388428,grad_norm: 0.9846734096980284, iteration: 301709
loss: 1.001768708229065,grad_norm: 0.9602104290729073, iteration: 301710
loss: 0.9672380089759827,grad_norm: 0.8128058490976738, iteration: 301711
loss: 0.9865682721138,grad_norm: 0.9999991182291785, iteration: 301712
loss: 0.9795700907707214,grad_norm: 0.9633959971388597, iteration: 301713
loss: 1.0487794876098633,grad_norm: 0.9999993695195727, iteration: 301714
loss: 0.9843541383743286,grad_norm: 0.9517081719041746, iteration: 301715
loss: 0.9963886141777039,grad_norm: 0.8664952713454293, iteration: 301716
loss: 0.9880540370941162,grad_norm: 0.7863297567803772, iteration: 301717
loss: 1.0255793333053589,grad_norm: 0.9999990684572103, iteration: 301718
loss: 1.0376793146133423,grad_norm: 0.9265901329183589, iteration: 301719
loss: 1.0190349817276,grad_norm: 0.7063943067775722, iteration: 301720
loss: 0.9885059595108032,grad_norm: 0.9999990118943738, iteration: 301721
loss: 1.0062074661254883,grad_norm: 0.9505657712233587, iteration: 301722
loss: 0.9855439066886902,grad_norm: 0.8644161584521023, iteration: 301723
loss: 1.0124098062515259,grad_norm: 0.9999991055243612, iteration: 301724
loss: 1.0180723667144775,grad_norm: 0.8954901934340541, iteration: 301725
loss: 0.9939867854118347,grad_norm: 0.7484641276728501, iteration: 301726
loss: 0.9474858641624451,grad_norm: 0.902337197319766, iteration: 301727
loss: 0.9995150566101074,grad_norm: 0.8827667978907687, iteration: 301728
loss: 1.0304006338119507,grad_norm: 0.9999991965431002, iteration: 301729
loss: 1.0103418827056885,grad_norm: 0.9057730676638032, iteration: 301730
loss: 1.0973308086395264,grad_norm: 0.9999992487307682, iteration: 301731
loss: 1.0389765501022339,grad_norm: 0.8201342095602979, iteration: 301732
loss: 0.9902501106262207,grad_norm: 0.9343106262949081, iteration: 301733
loss: 1.004804253578186,grad_norm: 0.9999991307140639, iteration: 301734
loss: 0.978824257850647,grad_norm: 0.9999991235306835, iteration: 301735
loss: 1.0106892585754395,grad_norm: 0.9434572837268997, iteration: 301736
loss: 0.9889857769012451,grad_norm: 0.9533193757591893, iteration: 301737
loss: 1.0044069290161133,grad_norm: 0.9572198158120654, iteration: 301738
loss: 0.9893460869789124,grad_norm: 0.9751424234890611, iteration: 301739
loss: 0.9967765808105469,grad_norm: 0.7939083944988974, iteration: 301740
loss: 1.0040735006332397,grad_norm: 0.9669568500271082, iteration: 301741
loss: 1.0138012170791626,grad_norm: 0.8418974923637446, iteration: 301742
loss: 0.9965599775314331,grad_norm: 0.966187506745661, iteration: 301743
loss: 0.9707605838775635,grad_norm: 0.8712437332604636, iteration: 301744
loss: 1.0135079622268677,grad_norm: 0.9834233114191849, iteration: 301745
loss: 0.9798856973648071,grad_norm: 0.999998967651181, iteration: 301746
loss: 1.0241633653640747,grad_norm: 0.80757136908104, iteration: 301747
loss: 1.0407934188842773,grad_norm: 0.9999991200280683, iteration: 301748
loss: 1.0373845100402832,grad_norm: 0.7569390257829768, iteration: 301749
loss: 1.0003187656402588,grad_norm: 0.8224045026013336, iteration: 301750
loss: 0.953376829624176,grad_norm: 0.8215002992384138, iteration: 301751
loss: 0.998704195022583,grad_norm: 0.9999988960920568, iteration: 301752
loss: 1.0220239162445068,grad_norm: 0.8493501823358758, iteration: 301753
loss: 1.0013998746871948,grad_norm: 0.8242631280163919, iteration: 301754
loss: 0.9821472764015198,grad_norm: 0.785707496061045, iteration: 301755
loss: 0.9998143315315247,grad_norm: 0.6953652482242862, iteration: 301756
loss: 0.9880651831626892,grad_norm: 0.7636410036144494, iteration: 301757
loss: 1.0287902355194092,grad_norm: 0.8871678035668081, iteration: 301758
loss: 1.0280264616012573,grad_norm: 0.8059613433514787, iteration: 301759
loss: 0.9803813099861145,grad_norm: 0.9445818900429793, iteration: 301760
loss: 1.0008643865585327,grad_norm: 0.9230244455869607, iteration: 301761
loss: 1.0158549547195435,grad_norm: 0.8633320171581879, iteration: 301762
loss: 1.0006279945373535,grad_norm: 0.8642096235162592, iteration: 301763
loss: 0.9546749591827393,grad_norm: 0.8104062359570604, iteration: 301764
loss: 0.9877015948295593,grad_norm: 0.9189274334576202, iteration: 301765
loss: 1.0743306875228882,grad_norm: 0.9999992804678632, iteration: 301766
loss: 0.9748636484146118,grad_norm: 0.7721071438887154, iteration: 301767
loss: 0.9992373585700989,grad_norm: 0.7616003069041927, iteration: 301768
loss: 0.9947255849838257,grad_norm: 0.8329488528198873, iteration: 301769
loss: 0.9740612506866455,grad_norm: 0.877374409897133, iteration: 301770
loss: 1.0129402875900269,grad_norm: 0.9219499120229591, iteration: 301771
loss: 1.1083587408065796,grad_norm: 0.9999993341736702, iteration: 301772
loss: 0.9720405340194702,grad_norm: 0.8097011537570554, iteration: 301773
loss: 1.0034935474395752,grad_norm: 0.8669741725652275, iteration: 301774
loss: 1.01535165309906,grad_norm: 0.8853810280914144, iteration: 301775
loss: 1.0272256135940552,grad_norm: 0.9331787380877044, iteration: 301776
loss: 0.9925639629364014,grad_norm: 0.9933160151572831, iteration: 301777
loss: 0.9901837110519409,grad_norm: 0.7741867067325177, iteration: 301778
loss: 0.9970861077308655,grad_norm: 0.8617956813709057, iteration: 301779
loss: 0.9908324480056763,grad_norm: 0.8848998414058157, iteration: 301780
loss: 1.0045928955078125,grad_norm: 0.9999992020037142, iteration: 301781
loss: 0.9326565265655518,grad_norm: 0.8760407167973263, iteration: 301782
loss: 0.9702516794204712,grad_norm: 0.905968940221547, iteration: 301783
loss: 0.9826130867004395,grad_norm: 0.8635195264521334, iteration: 301784
loss: 1.0424201488494873,grad_norm: 0.8723494262592119, iteration: 301785
loss: 1.0125038623809814,grad_norm: 0.8381790176788595, iteration: 301786
loss: 1.0093399286270142,grad_norm: 0.8128882967439895, iteration: 301787
loss: 1.065956711769104,grad_norm: 0.8954655695770276, iteration: 301788
loss: 1.0033071041107178,grad_norm: 0.9468718489928576, iteration: 301789
loss: 0.9759859442710876,grad_norm: 0.8621927810532783, iteration: 301790
loss: 0.9695948362350464,grad_norm: 0.8385345861369652, iteration: 301791
loss: 1.0138273239135742,grad_norm: 0.9290404949676333, iteration: 301792
loss: 1.0065641403198242,grad_norm: 0.8102862932542091, iteration: 301793
loss: 0.9731126427650452,grad_norm: 0.7632308414447939, iteration: 301794
loss: 1.0293203592300415,grad_norm: 0.9551920623571253, iteration: 301795
loss: 1.0074865818023682,grad_norm: 0.8711719827109884, iteration: 301796
loss: 0.999001145362854,grad_norm: 0.9999990432081051, iteration: 301797
loss: 1.0240591764450073,grad_norm: 0.6999305741222458, iteration: 301798
loss: 0.9969238042831421,grad_norm: 0.9999990143938982, iteration: 301799
loss: 0.9996724724769592,grad_norm: 0.9999989591879325, iteration: 301800
loss: 1.0775797367095947,grad_norm: 0.9999999057594146, iteration: 301801
loss: 1.008095145225525,grad_norm: 0.9294233461110833, iteration: 301802
loss: 1.028540015220642,grad_norm: 0.9322277287869424, iteration: 301803
loss: 1.0034269094467163,grad_norm: 0.8539698107743399, iteration: 301804
loss: 1.0388981103897095,grad_norm: 0.8386985741184653, iteration: 301805
loss: 0.9830011129379272,grad_norm: 0.8966629204903119, iteration: 301806
loss: 0.9858377575874329,grad_norm: 0.7521585488350027, iteration: 301807
loss: 1.0437226295471191,grad_norm: 0.9999990754966559, iteration: 301808
loss: 0.9728638529777527,grad_norm: 0.8129114644282683, iteration: 301809
loss: 0.9921272397041321,grad_norm: 0.8570274848604323, iteration: 301810
loss: 1.0260367393493652,grad_norm: 0.8300391121562652, iteration: 301811
loss: 1.0046018362045288,grad_norm: 0.740895950124418, iteration: 301812
loss: 0.9814420938491821,grad_norm: 0.7477124049290582, iteration: 301813
loss: 0.987683892250061,grad_norm: 0.9999992270844058, iteration: 301814
loss: 0.9796569347381592,grad_norm: 0.8972100669962544, iteration: 301815
loss: 1.0089753866195679,grad_norm: 0.8334694020648381, iteration: 301816
loss: 1.0011968612670898,grad_norm: 0.826633816552276, iteration: 301817
loss: 1.0339187383651733,grad_norm: 0.9880638421535051, iteration: 301818
loss: 1.0124881267547607,grad_norm: 0.7794980138178484, iteration: 301819
loss: 1.01888906955719,grad_norm: 0.9206755229986296, iteration: 301820
loss: 0.9891564249992371,grad_norm: 0.8687030625782621, iteration: 301821
loss: 0.9857659339904785,grad_norm: 0.9999992325594697, iteration: 301822
loss: 0.9881342649459839,grad_norm: 0.9999991817573798, iteration: 301823
loss: 1.0002813339233398,grad_norm: 0.8251255191314455, iteration: 301824
loss: 0.9907521605491638,grad_norm: 0.8724918559416451, iteration: 301825
loss: 0.9992858171463013,grad_norm: 0.7047482729033673, iteration: 301826
loss: 1.0154738426208496,grad_norm: 0.9268514348648521, iteration: 301827
loss: 0.9666743874549866,grad_norm: 0.9561050636463665, iteration: 301828
loss: 0.9783241748809814,grad_norm: 0.9401157381271723, iteration: 301829
loss: 0.9743857979774475,grad_norm: 0.8996645742160693, iteration: 301830
loss: 1.0044382810592651,grad_norm: 0.8751354508849659, iteration: 301831
loss: 0.9990659952163696,grad_norm: 0.8308348211682778, iteration: 301832
loss: 1.0082978010177612,grad_norm: 0.8855753492039053, iteration: 301833
loss: 1.0133628845214844,grad_norm: 0.7287836668279942, iteration: 301834
loss: 1.0126818418502808,grad_norm: 0.9999992536697841, iteration: 301835
loss: 1.072971224784851,grad_norm: 0.9999997844054188, iteration: 301836
loss: 0.9902302622795105,grad_norm: 0.8625439312970642, iteration: 301837
loss: 0.9671264886856079,grad_norm: 0.9780289669160008, iteration: 301838
loss: 1.0181330442428589,grad_norm: 0.8015677702985404, iteration: 301839
loss: 1.0093327760696411,grad_norm: 0.8580207186359776, iteration: 301840
loss: 0.9958738088607788,grad_norm: 0.8697948773364312, iteration: 301841
loss: 1.048285722732544,grad_norm: 0.8577078556993414, iteration: 301842
loss: 1.0168298482894897,grad_norm: 0.86258709983821, iteration: 301843
loss: 1.0055122375488281,grad_norm: 0.9999992307988805, iteration: 301844
loss: 0.9953954219818115,grad_norm: 0.9999991596479372, iteration: 301845
loss: 1.0076268911361694,grad_norm: 0.8478389406931162, iteration: 301846
loss: 1.0152524709701538,grad_norm: 0.7970396676924227, iteration: 301847
loss: 1.002108097076416,grad_norm: 0.9999991495698942, iteration: 301848
loss: 0.9883294105529785,grad_norm: 0.8665620294093023, iteration: 301849
loss: 0.9995766282081604,grad_norm: 0.7703351141207396, iteration: 301850
loss: 1.0133179426193237,grad_norm: 0.7818950386287966, iteration: 301851
loss: 0.9671535491943359,grad_norm: 0.9375519203287136, iteration: 301852
loss: 1.0258747339248657,grad_norm: 0.896391543970754, iteration: 301853
loss: 1.0255026817321777,grad_norm: 0.8662562300995204, iteration: 301854
loss: 1.0096871852874756,grad_norm: 0.7398511985339716, iteration: 301855
loss: 0.9819958209991455,grad_norm: 0.9999990724875418, iteration: 301856
loss: 1.005897045135498,grad_norm: 0.8023139335984845, iteration: 301857
loss: 0.9954044818878174,grad_norm: 0.712336575521357, iteration: 301858
loss: 0.9834027886390686,grad_norm: 0.9469304601614486, iteration: 301859
loss: 1.0511372089385986,grad_norm: 0.7684559845906487, iteration: 301860
loss: 1.0100148916244507,grad_norm: 0.8714070152383192, iteration: 301861
loss: 1.006379246711731,grad_norm: 0.8981107749031048, iteration: 301862
loss: 1.0267443656921387,grad_norm: 0.9857937727225341, iteration: 301863
loss: 1.0586862564086914,grad_norm: 0.9999997004455652, iteration: 301864
loss: 1.014351487159729,grad_norm: 0.8317990647029185, iteration: 301865
loss: 0.9977300763130188,grad_norm: 0.9999992927145163, iteration: 301866
loss: 0.9798479080200195,grad_norm: 0.822972114426297, iteration: 301867
loss: 0.996721625328064,grad_norm: 0.9999989785684794, iteration: 301868
loss: 1.0032188892364502,grad_norm: 0.9999992066393003, iteration: 301869
loss: 0.9533539414405823,grad_norm: 0.787832101820579, iteration: 301870
loss: 1.0050981044769287,grad_norm: 0.8327828602444309, iteration: 301871
loss: 0.9870892763137817,grad_norm: 0.8920251559394936, iteration: 301872
loss: 1.0322386026382446,grad_norm: 0.9999998670399923, iteration: 301873
loss: 1.0448096990585327,grad_norm: 0.9643932552586243, iteration: 301874
loss: 0.9807918071746826,grad_norm: 0.9264212333194307, iteration: 301875
loss: 1.02283775806427,grad_norm: 0.9999993328280461, iteration: 301876
loss: 0.9719170331954956,grad_norm: 0.7866495639242004, iteration: 301877
loss: 0.9451577663421631,grad_norm: 0.7880496162332161, iteration: 301878
loss: 1.0013657808303833,grad_norm: 0.7869228558086759, iteration: 301879
loss: 0.9972695708274841,grad_norm: 0.893262222999969, iteration: 301880
loss: 1.0459318161010742,grad_norm: 0.9854058782044958, iteration: 301881
loss: 0.9781387448310852,grad_norm: 0.9140877128038194, iteration: 301882
loss: 1.002631425857544,grad_norm: 0.7442769848026175, iteration: 301883
loss: 1.0449097156524658,grad_norm: 0.8935035817051, iteration: 301884
loss: 0.9724412560462952,grad_norm: 0.7871621171812194, iteration: 301885
loss: 1.1002601385116577,grad_norm: 0.9999997717700727, iteration: 301886
loss: 1.0361449718475342,grad_norm: 0.848413809918773, iteration: 301887
loss: 0.9960792660713196,grad_norm: 0.9999990901406686, iteration: 301888
loss: 0.9854102730751038,grad_norm: 0.9999992961248211, iteration: 301889
loss: 1.1974135637283325,grad_norm: 0.9999997186039776, iteration: 301890
loss: 1.032798409461975,grad_norm: 0.9999995137377244, iteration: 301891
loss: 1.3984026908874512,grad_norm: 0.999999914679559, iteration: 301892
loss: 1.331037998199463,grad_norm: 0.9999994088321237, iteration: 301893
loss: 1.2259199619293213,grad_norm: 0.9999992611391268, iteration: 301894
loss: 1.111202597618103,grad_norm: 0.8620630586918858, iteration: 301895
loss: 1.077618956565857,grad_norm: 0.9917128536854236, iteration: 301896
loss: 1.0067147016525269,grad_norm: 0.8932143881894115, iteration: 301897
loss: 1.0831369161605835,grad_norm: 0.999999108770973, iteration: 301898
loss: 1.0322026014328003,grad_norm: 0.9168360957767545, iteration: 301899
loss: 1.1080831289291382,grad_norm: 0.9999990856384455, iteration: 301900
loss: 1.0062143802642822,grad_norm: 0.8489336801420357, iteration: 301901
loss: 1.1484675407409668,grad_norm: 0.916488472439348, iteration: 301902
loss: 1.0416203737258911,grad_norm: 0.9999992491900683, iteration: 301903
loss: 1.043225884437561,grad_norm: 0.9999993748758027, iteration: 301904
loss: 0.9907406568527222,grad_norm: 0.9656526579664717, iteration: 301905
loss: 1.0021928548812866,grad_norm: 0.8867916179370654, iteration: 301906
loss: 0.9865014553070068,grad_norm: 0.9999992496528244, iteration: 301907
loss: 0.985176146030426,grad_norm: 0.9263560199921029, iteration: 301908
loss: 1.197347640991211,grad_norm: 0.9999999516794273, iteration: 301909
loss: 1.0993947982788086,grad_norm: 0.9999992240929004, iteration: 301910
loss: 1.03139066696167,grad_norm: 0.829069113209768, iteration: 301911
loss: 1.1616764068603516,grad_norm: 0.9999996072016732, iteration: 301912
loss: 1.0489271879196167,grad_norm: 0.9999995333283785, iteration: 301913
loss: 1.0278599262237549,grad_norm: 0.9999990256190574, iteration: 301914
loss: 1.0015296936035156,grad_norm: 0.9999997569765984, iteration: 301915
loss: 1.1752022504806519,grad_norm: 0.9999995738094722, iteration: 301916
loss: 1.0421483516693115,grad_norm: 0.9999991627016847, iteration: 301917
loss: 1.0025482177734375,grad_norm: 0.9488138785046036, iteration: 301918
loss: 0.9773094058036804,grad_norm: 0.7071439926742304, iteration: 301919
loss: 1.0051653385162354,grad_norm: 0.8694532026311187, iteration: 301920
loss: 1.0226356983184814,grad_norm: 0.9999990957463445, iteration: 301921
loss: 1.005982518196106,grad_norm: 0.7800758407959393, iteration: 301922
loss: 1.0026825666427612,grad_norm: 0.9999996173281958, iteration: 301923
loss: 1.0339998006820679,grad_norm: 0.6736717671080789, iteration: 301924
loss: 1.003344178199768,grad_norm: 0.8423046680911578, iteration: 301925
loss: 1.0062072277069092,grad_norm: 0.7285232961286868, iteration: 301926
loss: 0.9713773131370544,grad_norm: 0.9979019107634428, iteration: 301927
loss: 1.019498586654663,grad_norm: 0.9999990213741239, iteration: 301928
loss: 1.0017657279968262,grad_norm: 0.7140288034932835, iteration: 301929
loss: 1.007310152053833,grad_norm: 0.9385246519224586, iteration: 301930
loss: 0.9967741370201111,grad_norm: 0.9544920023558952, iteration: 301931
loss: 1.0061057806015015,grad_norm: 0.9999995568779791, iteration: 301932
loss: 1.023025631904602,grad_norm: 0.9999991884926923, iteration: 301933
loss: 1.0946687459945679,grad_norm: 0.853484541658483, iteration: 301934
loss: 1.0209059715270996,grad_norm: 0.8913082934885145, iteration: 301935
loss: 1.0373425483703613,grad_norm: 0.9999992061099526, iteration: 301936
loss: 1.0205169916152954,grad_norm: 0.999999036836622, iteration: 301937
loss: 1.0333606004714966,grad_norm: 0.9999990698143267, iteration: 301938
loss: 1.0346145629882812,grad_norm: 0.8830956148151538, iteration: 301939
loss: 1.0138394832611084,grad_norm: 0.8406116987596466, iteration: 301940
loss: 1.0428245067596436,grad_norm: 0.9811236162718822, iteration: 301941
loss: 0.9847970604896545,grad_norm: 0.9046946815572674, iteration: 301942
loss: 1.0268278121948242,grad_norm: 0.8701653329883415, iteration: 301943
loss: 1.016830563545227,grad_norm: 0.8386252288405257, iteration: 301944
loss: 0.9972620010375977,grad_norm: 0.7522365270628596, iteration: 301945
loss: 1.0278602838516235,grad_norm: 0.9999991043874952, iteration: 301946
loss: 1.0544657707214355,grad_norm: 0.9946839660046992, iteration: 301947
loss: 1.0363885164260864,grad_norm: 0.9999994035366045, iteration: 301948
loss: 1.0262877941131592,grad_norm: 0.9938634047696967, iteration: 301949
loss: 1.0797148942947388,grad_norm: 0.8610421087363762, iteration: 301950
loss: 0.9980788230895996,grad_norm: 0.8637259495845109, iteration: 301951
loss: 1.0065765380859375,grad_norm: 0.8013572215960015, iteration: 301952
loss: 1.041208028793335,grad_norm: 0.6823275228772425, iteration: 301953
loss: 0.9920302033424377,grad_norm: 0.8504427785407016, iteration: 301954
loss: 1.0246986150741577,grad_norm: 0.9234341445669919, iteration: 301955
loss: 1.0284080505371094,grad_norm: 1.000000016818403, iteration: 301956
loss: 1.000531554222107,grad_norm: 0.8850089346291848, iteration: 301957
loss: 0.997711718082428,grad_norm: 0.8141721507813298, iteration: 301958
loss: 0.9699743986129761,grad_norm: 0.8730778060020117, iteration: 301959
loss: 1.175423502922058,grad_norm: 0.9999991345480359, iteration: 301960
loss: 1.0222669839859009,grad_norm: 0.9271717209520803, iteration: 301961
loss: 1.056210994720459,grad_norm: 0.9999992087852737, iteration: 301962
loss: 0.9996770620346069,grad_norm: 0.8263090684617833, iteration: 301963
loss: 1.018814206123352,grad_norm: 0.9081792387830951, iteration: 301964
loss: 1.0123308897018433,grad_norm: 0.7588228611993775, iteration: 301965
loss: 1.010421872138977,grad_norm: 0.7009959699204463, iteration: 301966
loss: 1.0012580156326294,grad_norm: 0.9999991562099889, iteration: 301967
loss: 0.9701629877090454,grad_norm: 0.7348251412098085, iteration: 301968
loss: 0.955693781375885,grad_norm: 0.8844164705055383, iteration: 301969
loss: 1.0003997087478638,grad_norm: 0.7933223605755841, iteration: 301970
loss: 0.987860381603241,grad_norm: 0.7484695191158387, iteration: 301971
loss: 0.9728879332542419,grad_norm: 0.9999991107590684, iteration: 301972
loss: 0.9687275886535645,grad_norm: 0.8388513456860638, iteration: 301973
loss: 0.9721562266349792,grad_norm: 0.8599464862399211, iteration: 301974
loss: 0.9972381591796875,grad_norm: 0.8256884408229153, iteration: 301975
loss: 1.002834439277649,grad_norm: 0.7898534749027756, iteration: 301976
loss: 1.0040922164916992,grad_norm: 0.9999997972122694, iteration: 301977
loss: 1.0005927085876465,grad_norm: 0.714032414481995, iteration: 301978
loss: 1.0050503015518188,grad_norm: 0.9303732590264764, iteration: 301979
loss: 1.000186562538147,grad_norm: 0.7908767039495063, iteration: 301980
loss: 1.094330906867981,grad_norm: 0.9999992795978024, iteration: 301981
loss: 0.9584155082702637,grad_norm: 0.7256473063776574, iteration: 301982
loss: 1.0374705791473389,grad_norm: 0.9141028883668174, iteration: 301983
loss: 0.9946140050888062,grad_norm: 0.9084597142683078, iteration: 301984
loss: 0.9953362941741943,grad_norm: 0.7562615117207121, iteration: 301985
loss: 1.0181641578674316,grad_norm: 0.8423301856296012, iteration: 301986
loss: 0.9782184362411499,grad_norm: 0.8253243835582316, iteration: 301987
loss: 1.0117123126983643,grad_norm: 0.9999991281637627, iteration: 301988
loss: 0.9764036536216736,grad_norm: 0.9117259783423229, iteration: 301989
loss: 0.9940142035484314,grad_norm: 0.9999990594658087, iteration: 301990
loss: 1.0666671991348267,grad_norm: 0.7574294790699927, iteration: 301991
loss: 1.0542815923690796,grad_norm: 0.9999992397949836, iteration: 301992
loss: 0.9754094481468201,grad_norm: 0.999998939859881, iteration: 301993
loss: 1.0156275033950806,grad_norm: 0.9999990391313637, iteration: 301994
loss: 1.0341911315917969,grad_norm: 0.880706854062012, iteration: 301995
loss: 0.9955587387084961,grad_norm: 0.8019888059806435, iteration: 301996
loss: 1.0131757259368896,grad_norm: 0.774054945787513, iteration: 301997
loss: 0.980658233165741,grad_norm: 0.9999992797505654, iteration: 301998
loss: 1.0053725242614746,grad_norm: 0.8042935028987893, iteration: 301999
loss: 1.0126579999923706,grad_norm: 0.857750435443443, iteration: 302000
loss: 0.9892681837081909,grad_norm: 0.9628019893844766, iteration: 302001
loss: 0.9941883683204651,grad_norm: 0.9816808819894394, iteration: 302002
loss: 0.9958627820014954,grad_norm: 0.9771689225279108, iteration: 302003
loss: 0.9864794611930847,grad_norm: 0.7297421704050215, iteration: 302004
loss: 1.0485972166061401,grad_norm: 0.9294451486894265, iteration: 302005
loss: 0.985353946685791,grad_norm: 0.7493753954687744, iteration: 302006
loss: 0.9754067659378052,grad_norm: 0.9462496571927352, iteration: 302007
loss: 1.012748122215271,grad_norm: 0.9999994358623688, iteration: 302008
loss: 0.9987104535102844,grad_norm: 0.7240549480173372, iteration: 302009
loss: 0.9679263830184937,grad_norm: 0.8010638876215445, iteration: 302010
loss: 1.0007145404815674,grad_norm: 0.7919733533537762, iteration: 302011
loss: 1.0432257652282715,grad_norm: 0.8846993014065497, iteration: 302012
loss: 0.9683386087417603,grad_norm: 0.8831534157641489, iteration: 302013
loss: 0.9736990332603455,grad_norm: 0.7289247128321994, iteration: 302014
loss: 1.0061646699905396,grad_norm: 0.8265668374523503, iteration: 302015
loss: 1.0404510498046875,grad_norm: 0.9999991391325512, iteration: 302016
loss: 0.9852883219718933,grad_norm: 0.8540443240032068, iteration: 302017
loss: 1.0850895643234253,grad_norm: 0.8887401151804811, iteration: 302018
loss: 0.9911271929740906,grad_norm: 0.7065171887617644, iteration: 302019
loss: 0.9669796824455261,grad_norm: 0.9282963175479007, iteration: 302020
loss: 0.9919501543045044,grad_norm: 0.8655453574908115, iteration: 302021
loss: 0.9829220771789551,grad_norm: 0.9965900040286956, iteration: 302022
loss: 1.019040584564209,grad_norm: 0.8526196760046267, iteration: 302023
loss: 1.0102574825286865,grad_norm: 0.8726899192724797, iteration: 302024
loss: 0.9622194766998291,grad_norm: 0.9999990690095772, iteration: 302025
loss: 1.060445785522461,grad_norm: 0.9999993403018029, iteration: 302026
loss: 1.0204566717147827,grad_norm: 0.999999134665136, iteration: 302027
loss: 0.9712613224983215,grad_norm: 0.9315539126541514, iteration: 302028
loss: 0.9939588904380798,grad_norm: 0.8940623429455785, iteration: 302029
loss: 0.9681274890899658,grad_norm: 0.8877478986087307, iteration: 302030
loss: 0.9970179796218872,grad_norm: 0.8415671505704502, iteration: 302031
loss: 1.0120137929916382,grad_norm: 0.8766730768872381, iteration: 302032
loss: 0.9575474858283997,grad_norm: 0.9999991455418408, iteration: 302033
loss: 0.9572537541389465,grad_norm: 0.8768771816540842, iteration: 302034
loss: 1.0148210525512695,grad_norm: 0.9999997246425661, iteration: 302035
loss: 1.0042413473129272,grad_norm: 0.8482535965714978, iteration: 302036
loss: 0.9755725860595703,grad_norm: 0.5826118840292681, iteration: 302037
loss: 1.0010267496109009,grad_norm: 0.8148238434845838, iteration: 302038
loss: 1.12642240524292,grad_norm: 0.9826527388477466, iteration: 302039
loss: 0.9910546541213989,grad_norm: 0.7363769175536719, iteration: 302040
loss: 1.0677036046981812,grad_norm: 0.9999991400331102, iteration: 302041
loss: 0.9981195330619812,grad_norm: 0.864617217442678, iteration: 302042
loss: 0.9655004143714905,grad_norm: 0.8661319750761824, iteration: 302043
loss: 0.9592405557632446,grad_norm: 0.7403161566427695, iteration: 302044
loss: 1.0187981128692627,grad_norm: 0.7653021051082981, iteration: 302045
loss: 1.0014779567718506,grad_norm: 0.8830994202211058, iteration: 302046
loss: 1.0163370370864868,grad_norm: 0.9999992317228454, iteration: 302047
loss: 0.9881793260574341,grad_norm: 0.869205090311249, iteration: 302048
loss: 1.0523093938827515,grad_norm: 0.9999998908716554, iteration: 302049
loss: 1.0104148387908936,grad_norm: 0.7947155707995213, iteration: 302050
loss: 0.9761223196983337,grad_norm: 0.8571927237910147, iteration: 302051
loss: 0.993902862071991,grad_norm: 0.7461856729431662, iteration: 302052
loss: 1.011732816696167,grad_norm: 0.945359220794573, iteration: 302053
loss: 1.003765344619751,grad_norm: 0.9584720970374174, iteration: 302054
loss: 1.0113329887390137,grad_norm: 0.722896735633273, iteration: 302055
loss: 1.0067905187606812,grad_norm: 0.9115191476928407, iteration: 302056
loss: 1.0113506317138672,grad_norm: 0.7281918913988811, iteration: 302057
loss: 0.9841848611831665,grad_norm: 0.8198414371430622, iteration: 302058
loss: 1.0311534404754639,grad_norm: 0.9061817761691133, iteration: 302059
loss: 0.9967601895332336,grad_norm: 0.9088426251937946, iteration: 302060
loss: 0.9865813255310059,grad_norm: 0.8571229144680166, iteration: 302061
loss: 0.9724928736686707,grad_norm: 0.8897116436455667, iteration: 302062
loss: 0.9946369528770447,grad_norm: 0.9999991833045973, iteration: 302063
loss: 1.0154775381088257,grad_norm: 0.9999991514943883, iteration: 302064
loss: 1.0231138467788696,grad_norm: 0.9999991951862818, iteration: 302065
loss: 0.999840497970581,grad_norm: 0.9999992677013181, iteration: 302066
loss: 1.0088870525360107,grad_norm: 0.915893458528399, iteration: 302067
loss: 1.0441217422485352,grad_norm: 0.999999698865543, iteration: 302068
loss: 1.0341490507125854,grad_norm: 0.9972436378752018, iteration: 302069
loss: 1.009186863899231,grad_norm: 0.845847990437164, iteration: 302070
loss: 1.0172992944717407,grad_norm: 0.9999994812244939, iteration: 302071
loss: 0.9873606562614441,grad_norm: 0.7647582099893449, iteration: 302072
loss: 1.0579484701156616,grad_norm: 0.9999991897339683, iteration: 302073
loss: 1.0110021829605103,grad_norm: 0.8371570923675139, iteration: 302074
loss: 0.9983898997306824,grad_norm: 0.9831372956961251, iteration: 302075
loss: 0.9980994462966919,grad_norm: 0.8774693972901069, iteration: 302076
loss: 1.009108304977417,grad_norm: 0.999999292145605, iteration: 302077
loss: 1.0038717985153198,grad_norm: 0.9999991490519499, iteration: 302078
loss: 0.9718760848045349,grad_norm: 0.8105837865948161, iteration: 302079
loss: 0.9919639825820923,grad_norm: 0.9341979155389857, iteration: 302080
loss: 1.0035728216171265,grad_norm: 0.8587261925750667, iteration: 302081
loss: 1.0301389694213867,grad_norm: 0.7895038354695775, iteration: 302082
loss: 1.039908766746521,grad_norm: 0.9999995715631559, iteration: 302083
loss: 0.9797744750976562,grad_norm: 0.7096256099210506, iteration: 302084
loss: 0.9893837571144104,grad_norm: 0.7953545488363238, iteration: 302085
loss: 1.0101009607315063,grad_norm: 0.7803446154059016, iteration: 302086
loss: 1.0659347772598267,grad_norm: 0.999999927772309, iteration: 302087
loss: 1.0001956224441528,grad_norm: 0.8460318879755522, iteration: 302088
loss: 0.9943737387657166,grad_norm: 0.9999993664099064, iteration: 302089
loss: 1.018031120300293,grad_norm: 0.9999992384201701, iteration: 302090
loss: 1.0802799463272095,grad_norm: 0.9999993521653164, iteration: 302091
loss: 1.0184558629989624,grad_norm: 0.7048942586406302, iteration: 302092
loss: 1.003831386566162,grad_norm: 0.9999997536297951, iteration: 302093
loss: 1.0144003629684448,grad_norm: 0.7750774709103939, iteration: 302094
loss: 0.961122453212738,grad_norm: 0.9999992366404377, iteration: 302095
loss: 1.0021121501922607,grad_norm: 0.9872967484096672, iteration: 302096
loss: 1.138515591621399,grad_norm: 0.9999994303830175, iteration: 302097
loss: 0.9838458895683289,grad_norm: 0.7674984186866677, iteration: 302098
loss: 0.9818358421325684,grad_norm: 0.9999990361907631, iteration: 302099
loss: 0.9995428919792175,grad_norm: 0.910086534499093, iteration: 302100
loss: 0.9933595657348633,grad_norm: 0.9999991613995357, iteration: 302101
loss: 0.9628973603248596,grad_norm: 0.7783645296119652, iteration: 302102
loss: 0.9639768004417419,grad_norm: 0.8330771305923791, iteration: 302103
loss: 0.9928885698318481,grad_norm: 0.8424323821019304, iteration: 302104
loss: 1.0254908800125122,grad_norm: 0.9999995911916302, iteration: 302105
loss: 0.9984509348869324,grad_norm: 0.866143430873944, iteration: 302106
loss: 1.0168349742889404,grad_norm: 0.8782112340261159, iteration: 302107
loss: 0.9937615990638733,grad_norm: 0.9907954355212568, iteration: 302108
loss: 1.0186141729354858,grad_norm: 0.8467970771021857, iteration: 302109
loss: 1.0446666479110718,grad_norm: 0.9999993449433185, iteration: 302110
loss: 1.0435566902160645,grad_norm: 0.9999992194615293, iteration: 302111
loss: 0.9965781569480896,grad_norm: 0.8406586436384809, iteration: 302112
loss: 1.0213054418563843,grad_norm: 0.9663074471089669, iteration: 302113
loss: 0.9537633657455444,grad_norm: 0.7833002531730281, iteration: 302114
loss: 1.0182783603668213,grad_norm: 0.8824970581645775, iteration: 302115
loss: 0.9763742685317993,grad_norm: 0.8632664168122745, iteration: 302116
loss: 1.0694741010665894,grad_norm: 0.9999994690005939, iteration: 302117
loss: 1.0102458000183105,grad_norm: 0.9999992210246615, iteration: 302118
loss: 1.024759292602539,grad_norm: 0.7037420732853183, iteration: 302119
loss: 0.9760453104972839,grad_norm: 0.8364014816734247, iteration: 302120
loss: 0.963625431060791,grad_norm: 0.7560233727725497, iteration: 302121
loss: 0.9663158655166626,grad_norm: 0.8245194758420674, iteration: 302122
loss: 1.0495858192443848,grad_norm: 0.7629575564333555, iteration: 302123
loss: 1.0834934711456299,grad_norm: 0.9999999019012349, iteration: 302124
loss: 1.0031441450119019,grad_norm: 0.7298451884379482, iteration: 302125
loss: 0.9960724115371704,grad_norm: 0.7959327568431634, iteration: 302126
loss: 1.081434726715088,grad_norm: 0.9999994645240247, iteration: 302127
loss: 0.9881148338317871,grad_norm: 0.8451782558398253, iteration: 302128
loss: 1.0031535625457764,grad_norm: 0.8760607795093551, iteration: 302129
loss: 0.9561837315559387,grad_norm: 0.9162644151045575, iteration: 302130
loss: 0.9889499545097351,grad_norm: 0.8702446789659436, iteration: 302131
loss: 1.0264041423797607,grad_norm: 0.9999997035503853, iteration: 302132
loss: 1.0089609622955322,grad_norm: 0.8226469514171405, iteration: 302133
loss: 0.9820940494537354,grad_norm: 0.9999999863432514, iteration: 302134
loss: 1.019553303718567,grad_norm: 0.9999989943540658, iteration: 302135
loss: 1.041307806968689,grad_norm: 0.9120174402265693, iteration: 302136
loss: 1.0330743789672852,grad_norm: 0.9999991898360293, iteration: 302137
loss: 0.9968540668487549,grad_norm: 0.9637683587254002, iteration: 302138
loss: 0.9848995208740234,grad_norm: 0.9999995160866048, iteration: 302139
loss: 0.9934379458427429,grad_norm: 0.9999991677269996, iteration: 302140
loss: 1.007614016532898,grad_norm: 0.8378870609213336, iteration: 302141
loss: 0.960852861404419,grad_norm: 0.7556789452704589, iteration: 302142
loss: 0.9719361662864685,grad_norm: 0.8507832470387504, iteration: 302143
loss: 1.027784824371338,grad_norm: 0.9999992067763218, iteration: 302144
loss: 0.9968023896217346,grad_norm: 0.999999644617453, iteration: 302145
loss: 1.0326632261276245,grad_norm: 0.9678181487620168, iteration: 302146
loss: 1.0006687641143799,grad_norm: 0.76235106999148, iteration: 302147
loss: 0.976951539516449,grad_norm: 0.9247114517739053, iteration: 302148
loss: 1.0931639671325684,grad_norm: 0.9999991543131492, iteration: 302149
loss: 1.0621442794799805,grad_norm: 0.8313698245189155, iteration: 302150
loss: 1.0364116430282593,grad_norm: 0.9434080280516489, iteration: 302151
loss: 1.066313624382019,grad_norm: 0.8098608005508349, iteration: 302152
loss: 1.0527414083480835,grad_norm: 0.9999997298743126, iteration: 302153
loss: 1.0175782442092896,grad_norm: 0.9403708541622081, iteration: 302154
loss: 0.985948920249939,grad_norm: 0.8361965697251786, iteration: 302155
loss: 0.9994907975196838,grad_norm: 0.9999996914971291, iteration: 302156
loss: 0.9786721467971802,grad_norm: 0.9706828150358936, iteration: 302157
loss: 1.0185678005218506,grad_norm: 0.8213218429730447, iteration: 302158
loss: 0.9816150665283203,grad_norm: 0.6958561553348727, iteration: 302159
loss: 1.1313844919204712,grad_norm: 0.9999997447992911, iteration: 302160
loss: 1.1054273843765259,grad_norm: 0.9156589676662221, iteration: 302161
loss: 1.0335476398468018,grad_norm: 0.9632275500610353, iteration: 302162
loss: 1.0683746337890625,grad_norm: 0.9999998539546675, iteration: 302163
loss: 1.0332834720611572,grad_norm: 0.9999996004085919, iteration: 302164
loss: 0.9754968285560608,grad_norm: 0.7329989106284637, iteration: 302165
loss: 0.987004280090332,grad_norm: 0.8306294699535328, iteration: 302166
loss: 0.9929419755935669,grad_norm: 0.7968696664033906, iteration: 302167
loss: 1.044474482536316,grad_norm: 0.9999997774439447, iteration: 302168
loss: 1.037786602973938,grad_norm: 0.7167649581961225, iteration: 302169
loss: 1.0596897602081299,grad_norm: 0.9999993667376611, iteration: 302170
loss: 1.0369293689727783,grad_norm: 0.8432757093347216, iteration: 302171
loss: 0.9407848119735718,grad_norm: 0.8460945898283305, iteration: 302172
loss: 1.026077151298523,grad_norm: 0.9999995424868358, iteration: 302173
loss: 1.0090886354446411,grad_norm: 0.9999997998787685, iteration: 302174
loss: 1.0001579523086548,grad_norm: 0.8622248515179616, iteration: 302175
loss: 0.9863854646682739,grad_norm: 0.9999995545868481, iteration: 302176
loss: 1.0436052083969116,grad_norm: 0.9999994655415664, iteration: 302177
loss: 0.9954347014427185,grad_norm: 0.7271434239750345, iteration: 302178
loss: 1.001918911933899,grad_norm: 0.8450812316974814, iteration: 302179
loss: 1.0369521379470825,grad_norm: 0.7771464353553809, iteration: 302180
loss: 1.0118173360824585,grad_norm: 0.9078392712180915, iteration: 302181
loss: 0.9980751872062683,grad_norm: 0.6783542391969021, iteration: 302182
loss: 0.9728385806083679,grad_norm: 0.9999991612385019, iteration: 302183
loss: 1.080681562423706,grad_norm: 0.9999998384444501, iteration: 302184
loss: 0.9631308317184448,grad_norm: 0.9248364887066558, iteration: 302185
loss: 0.957116425037384,grad_norm: 0.7004823985183768, iteration: 302186
loss: 1.0142312049865723,grad_norm: 0.9999994974764639, iteration: 302187
loss: 1.018409252166748,grad_norm: 0.9999991603918973, iteration: 302188
loss: 0.9743441343307495,grad_norm: 0.8978931230334208, iteration: 302189
loss: 1.0140366554260254,grad_norm: 0.9541787199230016, iteration: 302190
loss: 0.9670820832252502,grad_norm: 0.8000022428543122, iteration: 302191
loss: 0.9697169065475464,grad_norm: 0.9741701292574415, iteration: 302192
loss: 1.0002634525299072,grad_norm: 0.8878389594702703, iteration: 302193
loss: 0.995550274848938,grad_norm: 0.9871561316506067, iteration: 302194
loss: 0.990393340587616,grad_norm: 0.8183050755100119, iteration: 302195
loss: 1.0189820528030396,grad_norm: 0.7894568912842623, iteration: 302196
loss: 1.0357683897018433,grad_norm: 0.999999170159792, iteration: 302197
loss: 1.0600581169128418,grad_norm: 0.9999990625938693, iteration: 302198
loss: 1.0262762308120728,grad_norm: 0.9999990085533693, iteration: 302199
loss: 1.040328025817871,grad_norm: 0.8900991844574933, iteration: 302200
loss: 0.9921374320983887,grad_norm: 0.9017127979381114, iteration: 302201
loss: 1.0185065269470215,grad_norm: 0.9826348502053305, iteration: 302202
loss: 1.0279089212417603,grad_norm: 0.9678786886060471, iteration: 302203
loss: 1.0032978057861328,grad_norm: 0.6937141457418372, iteration: 302204
loss: 1.0570400953292847,grad_norm: 0.9999998628098007, iteration: 302205
loss: 0.9637944102287292,grad_norm: 0.8490626054378325, iteration: 302206
loss: 1.0169440507888794,grad_norm: 0.8052237539545611, iteration: 302207
loss: 1.004930853843689,grad_norm: 0.9762132207906868, iteration: 302208
loss: 1.0754975080490112,grad_norm: 0.9999992994878225, iteration: 302209
loss: 1.0626140832901,grad_norm: 0.9857344609086557, iteration: 302210
loss: 1.0066239833831787,grad_norm: 0.8442719490636099, iteration: 302211
loss: 1.0017002820968628,grad_norm: 0.8274255806283665, iteration: 302212
loss: 0.993026614189148,grad_norm: 0.9722456662682475, iteration: 302213
loss: 1.0774884223937988,grad_norm: 0.9999990618774247, iteration: 302214
loss: 1.0219974517822266,grad_norm: 0.7580684081406852, iteration: 302215
loss: 1.0628150701522827,grad_norm: 0.9999994725476958, iteration: 302216
loss: 1.030798316001892,grad_norm: 0.8899337947627538, iteration: 302217
loss: 1.1087790727615356,grad_norm: 0.9208984647073994, iteration: 302218
loss: 0.9817072749137878,grad_norm: 0.9999991187992967, iteration: 302219
loss: 1.0217080116271973,grad_norm: 0.9947185127088876, iteration: 302220
loss: 1.0184377431869507,grad_norm: 0.847613929724914, iteration: 302221
loss: 1.067689061164856,grad_norm: 0.9999995563967006, iteration: 302222
loss: 1.0071263313293457,grad_norm: 0.8884808376244326, iteration: 302223
loss: 1.0124553442001343,grad_norm: 0.9999992120931025, iteration: 302224
loss: 1.1051491498947144,grad_norm: 0.9999998094127263, iteration: 302225
loss: 1.043988823890686,grad_norm: 0.9931467018771961, iteration: 302226
loss: 0.9698928594589233,grad_norm: 0.7708037008845745, iteration: 302227
loss: 0.999106764793396,grad_norm: 0.8733608186431158, iteration: 302228
loss: 1.0049023628234863,grad_norm: 0.8386695830451005, iteration: 302229
loss: 1.004873514175415,grad_norm: 0.8835145646637879, iteration: 302230
loss: 0.9912876486778259,grad_norm: 0.964146580576014, iteration: 302231
loss: 0.9927824139595032,grad_norm: 0.8472679703106833, iteration: 302232
loss: 1.0014437437057495,grad_norm: 0.8814867699216699, iteration: 302233
loss: 1.0124540328979492,grad_norm: 0.9999990247158081, iteration: 302234
loss: 1.0169932842254639,grad_norm: 0.9999989405128009, iteration: 302235
loss: 1.015008568763733,grad_norm: 0.9999991718605868, iteration: 302236
loss: 1.002373218536377,grad_norm: 0.929259572453149, iteration: 302237
loss: 0.9751397371292114,grad_norm: 0.8429625514465341, iteration: 302238
loss: 1.013724684715271,grad_norm: 0.852884883880931, iteration: 302239
loss: 1.0962464809417725,grad_norm: 0.9084634403495931, iteration: 302240
loss: 1.0299367904663086,grad_norm: 0.8989200479827734, iteration: 302241
loss: 1.0374035835266113,grad_norm: 0.9999990806189215, iteration: 302242
loss: 1.0242359638214111,grad_norm: 0.9582987622455584, iteration: 302243
loss: 0.9507283568382263,grad_norm: 0.9999991177456437, iteration: 302244
loss: 1.044458031654358,grad_norm: 0.9500025159836787, iteration: 302245
loss: 0.9763661623001099,grad_norm: 0.7323195315753729, iteration: 302246
loss: 1.0344535112380981,grad_norm: 0.9917456698562072, iteration: 302247
loss: 0.9895949363708496,grad_norm: 0.7862476527965406, iteration: 302248
loss: 1.0300036668777466,grad_norm: 0.808238962217401, iteration: 302249
loss: 0.9595744013786316,grad_norm: 0.9423900974236805, iteration: 302250
loss: 1.0067540407180786,grad_norm: 0.843304516960096, iteration: 302251
loss: 0.9880911111831665,grad_norm: 0.9203466080638868, iteration: 302252
loss: 1.0294268131256104,grad_norm: 0.7834416537480122, iteration: 302253
loss: 1.0049142837524414,grad_norm: 0.9372523212111148, iteration: 302254
loss: 0.9891276955604553,grad_norm: 0.9710332175931977, iteration: 302255
loss: 0.9897677302360535,grad_norm: 0.7427717048454887, iteration: 302256
loss: 0.982542872428894,grad_norm: 0.8874375991005525, iteration: 302257
loss: 0.9536570310592651,grad_norm: 0.9117903609230278, iteration: 302258
loss: 1.000105619430542,grad_norm: 0.8363136573086227, iteration: 302259
loss: 0.9815183281898499,grad_norm: 0.9999995260393743, iteration: 302260
loss: 0.988444983959198,grad_norm: 0.928253492268011, iteration: 302261
loss: 0.9530302286148071,grad_norm: 0.7549782413021486, iteration: 302262
loss: 1.09047269821167,grad_norm: 0.8676530738276974, iteration: 302263
loss: 1.0582773685455322,grad_norm: 0.9999998569178462, iteration: 302264
loss: 0.9609006643295288,grad_norm: 0.9999991614883383, iteration: 302265
loss: 1.019102931022644,grad_norm: 0.8308934095075785, iteration: 302266
loss: 0.9898120760917664,grad_norm: 0.977055916360955, iteration: 302267
loss: 0.990761399269104,grad_norm: 0.7198768859160919, iteration: 302268
loss: 1.0720252990722656,grad_norm: 0.9999997375573545, iteration: 302269
loss: 0.9747684597969055,grad_norm: 0.8148208812896116, iteration: 302270
loss: 0.9628811478614807,grad_norm: 0.7894803472250819, iteration: 302271
loss: 0.9665893316268921,grad_norm: 0.7058231430248747, iteration: 302272
loss: 0.9988940954208374,grad_norm: 0.922241621373338, iteration: 302273
loss: 0.9974415898323059,grad_norm: 0.9999991750638395, iteration: 302274
loss: 1.0186601877212524,grad_norm: 0.8823696801093246, iteration: 302275
loss: 1.0121288299560547,grad_norm: 0.9999991957283039, iteration: 302276
loss: 1.030003547668457,grad_norm: 0.9505491439877348, iteration: 302277
loss: 0.9876851439476013,grad_norm: 0.7697383722090748, iteration: 302278
loss: 1.0220005512237549,grad_norm: 0.9011846675130861, iteration: 302279
loss: 0.9657033085823059,grad_norm: 0.885902824875781, iteration: 302280
loss: 0.9859340786933899,grad_norm: 0.9286376722566415, iteration: 302281
loss: 1.038289189338684,grad_norm: 0.6995731431175801, iteration: 302282
loss: 1.033607006072998,grad_norm: 0.989648912222528, iteration: 302283
loss: 1.002683401107788,grad_norm: 0.8802837024619874, iteration: 302284
loss: 1.0099092721939087,grad_norm: 0.9999991674262589, iteration: 302285
loss: 1.0348633527755737,grad_norm: 0.8194871575489532, iteration: 302286
loss: 1.001375675201416,grad_norm: 0.9933657570562545, iteration: 302287
loss: 0.9993219971656799,grad_norm: 0.9999994300649443, iteration: 302288
loss: 0.9743196368217468,grad_norm: 0.9134812888384504, iteration: 302289
loss: 1.0350192785263062,grad_norm: 0.9999990738939843, iteration: 302290
loss: 0.9465168118476868,grad_norm: 0.8217877499505909, iteration: 302291
loss: 1.0036479234695435,grad_norm: 0.9999992170402539, iteration: 302292
loss: 1.028122901916504,grad_norm: 0.9063714290855721, iteration: 302293
loss: 1.2147072553634644,grad_norm: 0.9999997702405926, iteration: 302294
loss: 1.0230717658996582,grad_norm: 0.8927534314397282, iteration: 302295
loss: 1.0437864065170288,grad_norm: 0.9999995213244189, iteration: 302296
loss: 0.9896855354309082,grad_norm: 0.794004550435424, iteration: 302297
loss: 1.0095113515853882,grad_norm: 0.8903615516567294, iteration: 302298
loss: 1.0035698413848877,grad_norm: 0.8049718560153083, iteration: 302299
loss: 1.0210931301116943,grad_norm: 0.9999991674948496, iteration: 302300
loss: 1.0034602880477905,grad_norm: 0.9156461203821131, iteration: 302301
loss: 0.9857615232467651,grad_norm: 0.8622638984468292, iteration: 302302
loss: 1.010021448135376,grad_norm: 0.9999993155796474, iteration: 302303
loss: 0.9625346064567566,grad_norm: 0.8657404562976747, iteration: 302304
loss: 1.0055179595947266,grad_norm: 0.6758554310751513, iteration: 302305
loss: 1.0194395780563354,grad_norm: 0.8645192532303153, iteration: 302306
loss: 1.0149385929107666,grad_norm: 0.9528782849034869, iteration: 302307
loss: 0.9920434355735779,grad_norm: 0.9044846225808256, iteration: 302308
loss: 0.9883787631988525,grad_norm: 0.798977363884717, iteration: 302309
loss: 0.994361400604248,grad_norm: 0.8887071909717521, iteration: 302310
loss: 0.9931202530860901,grad_norm: 0.7453581075814587, iteration: 302311
loss: 1.0003033876419067,grad_norm: 0.8620250790441139, iteration: 302312
loss: 0.9865769147872925,grad_norm: 0.7886091395407182, iteration: 302313
loss: 1.00734281539917,grad_norm: 0.8105901310646122, iteration: 302314
loss: 1.009497046470642,grad_norm: 0.7985951530003913, iteration: 302315
loss: 0.971616804599762,grad_norm: 0.9727978846532976, iteration: 302316
loss: 0.9617948532104492,grad_norm: 0.7236533028056253, iteration: 302317
loss: 0.9997994899749756,grad_norm: 0.718341571040204, iteration: 302318
loss: 0.9793599843978882,grad_norm: 0.9999991567733358, iteration: 302319
loss: 1.1084617376327515,grad_norm: 0.9999990518177192, iteration: 302320
loss: 1.0158050060272217,grad_norm: 0.8587948437872768, iteration: 302321
loss: 1.0082032680511475,grad_norm: 0.8806250445716467, iteration: 302322
loss: 0.9938014149665833,grad_norm: 0.800296300012524, iteration: 302323
loss: 0.9909384250640869,grad_norm: 0.7983767826089965, iteration: 302324
loss: 0.9631611704826355,grad_norm: 0.791245267330518, iteration: 302325
loss: 1.0065897703170776,grad_norm: 0.7463467014194458, iteration: 302326
loss: 1.0304293632507324,grad_norm: 0.8120857902615644, iteration: 302327
loss: 1.0258996486663818,grad_norm: 0.9999997034785186, iteration: 302328
loss: 1.0146008729934692,grad_norm: 0.9988213041115382, iteration: 302329
loss: 1.0067013502120972,grad_norm: 0.9127797102931821, iteration: 302330
loss: 1.0067797899246216,grad_norm: 0.8433236646943716, iteration: 302331
loss: 0.9773010611534119,grad_norm: 0.9999997581475931, iteration: 302332
loss: 1.3665854930877686,grad_norm: 0.9999992594453209, iteration: 302333
loss: 0.999011754989624,grad_norm: 0.7081519167412762, iteration: 302334
loss: 1.0232820510864258,grad_norm: 0.9533884758871106, iteration: 302335
loss: 0.9725127816200256,grad_norm: 0.8921782815682521, iteration: 302336
loss: 0.9794631600379944,grad_norm: 0.9906076450834758, iteration: 302337
loss: 0.9800133109092712,grad_norm: 0.7686828333657407, iteration: 302338
loss: 1.0074896812438965,grad_norm: 0.878004854186582, iteration: 302339
loss: 0.9648902416229248,grad_norm: 0.7975748040558052, iteration: 302340
loss: 0.9565393328666687,grad_norm: 0.8747453192138189, iteration: 302341
loss: 1.0293924808502197,grad_norm: 0.9323653259889049, iteration: 302342
loss: 1.014597773551941,grad_norm: 0.9999991333263403, iteration: 302343
loss: 0.9932741522789001,grad_norm: 0.8464974044039322, iteration: 302344
loss: 0.9785414338111877,grad_norm: 0.7783531730802024, iteration: 302345
loss: 0.9593861103057861,grad_norm: 0.9308421360788518, iteration: 302346
loss: 1.0036640167236328,grad_norm: 0.8279602667374929, iteration: 302347
loss: 1.1773438453674316,grad_norm: 0.999999846581848, iteration: 302348
loss: 0.9638300538063049,grad_norm: 0.9999992468643896, iteration: 302349
loss: 1.0286524295806885,grad_norm: 0.9999991907752936, iteration: 302350
loss: 1.047127366065979,grad_norm: 0.999999069981819, iteration: 302351
loss: 1.039175271987915,grad_norm: 0.9999992030839546, iteration: 302352
loss: 1.0035160779953003,grad_norm: 0.7139313535298664, iteration: 302353
loss: 1.042561650276184,grad_norm: 0.8032970097885236, iteration: 302354
loss: 1.0135265588760376,grad_norm: 0.9999998421799091, iteration: 302355
loss: 0.9906715154647827,grad_norm: 0.9108360202689718, iteration: 302356
loss: 1.007980227470398,grad_norm: 0.8282615693928231, iteration: 302357
loss: 1.0006128549575806,grad_norm: 0.9999991401080484, iteration: 302358
loss: 0.9910190105438232,grad_norm: 0.7210078040607644, iteration: 302359
loss: 1.0267802476882935,grad_norm: 0.8825862565616848, iteration: 302360
loss: 1.026635766029358,grad_norm: 0.9999999246644405, iteration: 302361
loss: 0.9991963505744934,grad_norm: 0.6754791226987186, iteration: 302362
loss: 0.9881542921066284,grad_norm: 0.7064584005655451, iteration: 302363
loss: 0.9986458420753479,grad_norm: 0.9422904100393437, iteration: 302364
loss: 1.1593403816223145,grad_norm: 0.9999993957915101, iteration: 302365
loss: 1.0188721418380737,grad_norm: 0.9545541986081126, iteration: 302366
loss: 0.9756395816802979,grad_norm: 0.7359272553337484, iteration: 302367
loss: 1.0082335472106934,grad_norm: 0.8903445173793116, iteration: 302368
loss: 0.9752697348594666,grad_norm: 0.8318013655074505, iteration: 302369
loss: 0.9793311357498169,grad_norm: 0.9343561692183766, iteration: 302370
loss: 1.0571964979171753,grad_norm: 0.8901703458856285, iteration: 302371
loss: 1.050768494606018,grad_norm: 0.9999997296518665, iteration: 302372
loss: 1.1553847789764404,grad_norm: 0.9999998620547564, iteration: 302373
loss: 1.0234483480453491,grad_norm: 0.7586511502746616, iteration: 302374
loss: 0.9953386783599854,grad_norm: 0.9999996209574772, iteration: 302375
loss: 0.9735074639320374,grad_norm: 0.7907717035203674, iteration: 302376
loss: 0.9847815036773682,grad_norm: 0.6708581619203408, iteration: 302377
loss: 1.01365327835083,grad_norm: 0.8371485609777858, iteration: 302378
loss: 1.0251245498657227,grad_norm: 0.8461558398194342, iteration: 302379
loss: 0.9645121097564697,grad_norm: 0.9999997269043748, iteration: 302380
loss: 1.0224769115447998,grad_norm: 0.9999990622359207, iteration: 302381
loss: 1.0606849193572998,grad_norm: 0.9999995228547526, iteration: 302382
loss: 1.0113517045974731,grad_norm: 0.8036916681878259, iteration: 302383
loss: 1.0620349645614624,grad_norm: 1.0000000496576686, iteration: 302384
loss: 1.0502549409866333,grad_norm: 0.9999997261110671, iteration: 302385
loss: 1.0233749151229858,grad_norm: 0.9457554940120148, iteration: 302386
loss: 0.9877296686172485,grad_norm: 0.9824336948303751, iteration: 302387
loss: 1.076655387878418,grad_norm: 0.7888215984803889, iteration: 302388
loss: 0.9651163816452026,grad_norm: 0.8710035667540543, iteration: 302389
loss: 1.0098577737808228,grad_norm: 0.8726739791538756, iteration: 302390
loss: 1.0208631753921509,grad_norm: 0.8848435929069305, iteration: 302391
loss: 1.032608985900879,grad_norm: 0.9061412399448017, iteration: 302392
loss: 1.0859354734420776,grad_norm: 0.8388504974686922, iteration: 302393
loss: 1.0672576427459717,grad_norm: 0.9999996071669744, iteration: 302394
loss: 1.006556749343872,grad_norm: 0.9029051608109239, iteration: 302395
loss: 0.9937537908554077,grad_norm: 0.9999997115635446, iteration: 302396
loss: 0.9969849586486816,grad_norm: 0.9999990032493056, iteration: 302397
loss: 0.9938192367553711,grad_norm: 0.8227279845096189, iteration: 302398
loss: 0.9493604898452759,grad_norm: 0.8516111152390119, iteration: 302399
loss: 1.035959005355835,grad_norm: 0.9999990706362643, iteration: 302400
loss: 1.0118407011032104,grad_norm: 0.9051176470024289, iteration: 302401
loss: 1.0346451997756958,grad_norm: 0.8776126986708273, iteration: 302402
loss: 1.012579321861267,grad_norm: 0.9999993547707223, iteration: 302403
loss: 0.9696560502052307,grad_norm: 0.7781327007253532, iteration: 302404
loss: 1.0205856561660767,grad_norm: 0.999999214939575, iteration: 302405
loss: 1.0141795873641968,grad_norm: 0.8922290309485885, iteration: 302406
loss: 1.0024583339691162,grad_norm: 0.7494814581151962, iteration: 302407
loss: 1.0007057189941406,grad_norm: 0.9999990566223348, iteration: 302408
loss: 1.0176960229873657,grad_norm: 0.9606752720080448, iteration: 302409
loss: 1.0025516748428345,grad_norm: 0.9317743926183083, iteration: 302410
loss: 1.0089311599731445,grad_norm: 0.8136612324222472, iteration: 302411
loss: 0.9913533926010132,grad_norm: 0.9999991549329773, iteration: 302412
loss: 0.9686566591262817,grad_norm: 0.8217815137451823, iteration: 302413
loss: 0.9749197363853455,grad_norm: 0.9999989833776414, iteration: 302414
loss: 0.9917135238647461,grad_norm: 0.9999997931811415, iteration: 302415
loss: 1.0124772787094116,grad_norm: 0.7551438971120098, iteration: 302416
loss: 1.0121643543243408,grad_norm: 0.9999998761711588, iteration: 302417
loss: 1.1018061637878418,grad_norm: 0.9999997562702899, iteration: 302418
loss: 1.0689553022384644,grad_norm: 0.9279381089984144, iteration: 302419
loss: 0.9979357719421387,grad_norm: 0.7861685234789524, iteration: 302420
loss: 0.9953198432922363,grad_norm: 0.8789469852687403, iteration: 302421
loss: 1.0041015148162842,grad_norm: 0.9999993757760759, iteration: 302422
loss: 1.0184645652770996,grad_norm: 0.921262291911467, iteration: 302423
loss: 1.0001206398010254,grad_norm: 0.9816743094837216, iteration: 302424
loss: 0.9697202444076538,grad_norm: 0.9205316858082787, iteration: 302425
loss: 0.9957336187362671,grad_norm: 0.9216079198873357, iteration: 302426
loss: 1.0031744241714478,grad_norm: 0.8001829215153005, iteration: 302427
loss: 0.9652450084686279,grad_norm: 0.9999989857903331, iteration: 302428
loss: 1.0505236387252808,grad_norm: 0.9481562339164206, iteration: 302429
loss: 0.9740837216377258,grad_norm: 0.7261188083334672, iteration: 302430
loss: 1.0085991621017456,grad_norm: 0.9999990279206664, iteration: 302431
loss: 1.0247637033462524,grad_norm: 0.9767749608911205, iteration: 302432
loss: 1.0849311351776123,grad_norm: 0.8238422153856211, iteration: 302433
loss: 0.9859748482704163,grad_norm: 0.8948949324469408, iteration: 302434
loss: 1.0043179988861084,grad_norm: 0.9999989404868279, iteration: 302435
loss: 1.0110328197479248,grad_norm: 0.9999997712178016, iteration: 302436
loss: 0.9959139823913574,grad_norm: 0.9415401547202458, iteration: 302437
loss: 0.9740746021270752,grad_norm: 0.7721176936719989, iteration: 302438
loss: 1.0279693603515625,grad_norm: 0.999999294083287, iteration: 302439
loss: 0.9939309358596802,grad_norm: 0.9999998275821025, iteration: 302440
loss: 1.0148388147354126,grad_norm: 0.9999990389133222, iteration: 302441
loss: 1.008650541305542,grad_norm: 0.8979101559420944, iteration: 302442
loss: 1.0725854635238647,grad_norm: 0.9595555640270791, iteration: 302443
loss: 1.0160082578659058,grad_norm: 0.9755785490283787, iteration: 302444
loss: 1.066728115081787,grad_norm: 0.9999998687295484, iteration: 302445
loss: 0.98060542345047,grad_norm: 0.7795483391235875, iteration: 302446
loss: 1.0155799388885498,grad_norm: 0.835053191342147, iteration: 302447
loss: 0.9891834855079651,grad_norm: 0.9419809387435852, iteration: 302448
loss: 1.0261894464492798,grad_norm: 0.9999999179174609, iteration: 302449
loss: 1.0237071514129639,grad_norm: 0.9999992253537681, iteration: 302450
loss: 0.9991266131401062,grad_norm: 0.8817317813344562, iteration: 302451
loss: 0.9865444898605347,grad_norm: 0.9436856836358226, iteration: 302452
loss: 1.0003578662872314,grad_norm: 0.9838584240306827, iteration: 302453
loss: 0.9740697145462036,grad_norm: 0.8651753037565666, iteration: 302454
loss: 1.0335522890090942,grad_norm: 0.8364444703425953, iteration: 302455
loss: 1.0518807172775269,grad_norm: 0.9999996328313758, iteration: 302456
loss: 1.0728882551193237,grad_norm: 0.9999995937155304, iteration: 302457
loss: 1.0916743278503418,grad_norm: 0.9999991256659608, iteration: 302458
loss: 0.9566596746444702,grad_norm: 0.8372193824221856, iteration: 302459
loss: 1.0174636840820312,grad_norm: 0.9999997872429082, iteration: 302460
loss: 0.992866575717926,grad_norm: 0.9322562844521052, iteration: 302461
loss: 1.108519196510315,grad_norm: 0.9999992324284754, iteration: 302462
loss: 0.986979603767395,grad_norm: 0.6859079750305611, iteration: 302463
loss: 1.0478558540344238,grad_norm: 0.8253702428039338, iteration: 302464
loss: 0.9824581146240234,grad_norm: 0.7983346652405033, iteration: 302465
loss: 1.2587717771530151,grad_norm: 0.9999999140141835, iteration: 302466
loss: 1.0167394876480103,grad_norm: 0.9491587545030845, iteration: 302467
loss: 1.1476863622665405,grad_norm: 0.9999994940965227, iteration: 302468
loss: 1.0043625831604004,grad_norm: 0.9656368021353836, iteration: 302469
loss: 1.1097575426101685,grad_norm: 0.9620178831267515, iteration: 302470
loss: 1.0337724685668945,grad_norm: 0.9999993201249212, iteration: 302471
loss: 1.014702558517456,grad_norm: 0.7645477643523345, iteration: 302472
loss: 1.0200278759002686,grad_norm: 0.8940038654740634, iteration: 302473
loss: 1.0025050640106201,grad_norm: 0.9433035502481004, iteration: 302474
loss: 1.0851794481277466,grad_norm: 0.9999996631635205, iteration: 302475
loss: 0.9787703156471252,grad_norm: 0.7418601804332023, iteration: 302476
loss: 0.9875370860099792,grad_norm: 0.7829473338826869, iteration: 302477
loss: 1.020678162574768,grad_norm: 0.9046883509570541, iteration: 302478
loss: 1.1936709880828857,grad_norm: 0.9999999035253927, iteration: 302479
loss: 1.0299490690231323,grad_norm: 0.9999994852788766, iteration: 302480
loss: 1.2716435194015503,grad_norm: 0.9999999085452549, iteration: 302481
loss: 1.0692726373672485,grad_norm: 0.9999999192530515, iteration: 302482
loss: 1.0591763257980347,grad_norm: 0.9999995975477864, iteration: 302483
loss: 1.1210993528366089,grad_norm: 0.9999998369276364, iteration: 302484
loss: 1.23569655418396,grad_norm: 0.999999725263674, iteration: 302485
loss: 1.1922407150268555,grad_norm: 0.9999999632827403, iteration: 302486
loss: 1.1246089935302734,grad_norm: 0.999999372296033, iteration: 302487
loss: 1.3255772590637207,grad_norm: 0.9999998792334235, iteration: 302488
loss: 1.497881531715393,grad_norm: 0.9999999027946387, iteration: 302489
loss: 1.2184555530548096,grad_norm: 0.9999998486636457, iteration: 302490
loss: 1.2504290342330933,grad_norm: 0.9999999078450565, iteration: 302491
loss: 1.3479828834533691,grad_norm: 0.9999998937763123, iteration: 302492
loss: 1.4410721063613892,grad_norm: 0.9999998014677146, iteration: 302493
loss: 1.0488150119781494,grad_norm: 0.9999992395395537, iteration: 302494
loss: 1.2761167287826538,grad_norm: 0.9999998929955853, iteration: 302495
loss: 1.070343255996704,grad_norm: 0.9999995040043586, iteration: 302496
loss: 1.283917784690857,grad_norm: 0.9999998299085125, iteration: 302497
loss: 1.3215495347976685,grad_norm: 0.9999999178423554, iteration: 302498
loss: 1.0750291347503662,grad_norm: 0.9999997437779715, iteration: 302499
loss: 1.0873197317123413,grad_norm: 0.9999992867268627, iteration: 302500
loss: 1.0665287971496582,grad_norm: 0.9999992927669099, iteration: 302501
loss: 1.1105432510375977,grad_norm: 0.9999990748741916, iteration: 302502
loss: 1.0300689935684204,grad_norm: 0.9999997302534243, iteration: 302503
loss: 1.2580422163009644,grad_norm: 0.99999963704714, iteration: 302504
loss: 1.102988600730896,grad_norm: 0.9999996763852084, iteration: 302505
loss: 1.0715051889419556,grad_norm: 0.9337772260437847, iteration: 302506
loss: 1.1492822170257568,grad_norm: 0.9999995721872136, iteration: 302507
loss: 1.0540999174118042,grad_norm: 0.9999991995109763, iteration: 302508
loss: 1.1713892221450806,grad_norm: 0.9999999222808899, iteration: 302509
loss: 1.0631800889968872,grad_norm: 0.9999996589802003, iteration: 302510
loss: 1.1857362985610962,grad_norm: 0.9999996898452954, iteration: 302511
loss: 1.1105637550354004,grad_norm: 0.999998981085737, iteration: 302512
loss: 1.2029740810394287,grad_norm: 0.9999999908071113, iteration: 302513
loss: 1.074741244316101,grad_norm: 0.9999995851158611, iteration: 302514
loss: 1.121454119682312,grad_norm: 0.9999993687066018, iteration: 302515
loss: 1.1167196035385132,grad_norm: 0.9999995859730638, iteration: 302516
loss: 1.2040060758590698,grad_norm: 0.9999999673166007, iteration: 302517
loss: 1.0185350179672241,grad_norm: 0.9999991403186359, iteration: 302518
loss: 1.0554745197296143,grad_norm: 0.9999991147018927, iteration: 302519
loss: 1.0352072715759277,grad_norm: 0.9999991006421555, iteration: 302520
loss: 1.1364942789077759,grad_norm: 0.9999997847221369, iteration: 302521
loss: 1.3505157232284546,grad_norm: 0.9999999611982906, iteration: 302522
loss: 1.0869909524917603,grad_norm: 0.9999999614872155, iteration: 302523
loss: 0.996798574924469,grad_norm: 0.999999719413417, iteration: 302524
loss: 1.0271998643875122,grad_norm: 0.9999994279321643, iteration: 302525
loss: 1.2006516456604004,grad_norm: 0.9999999812652122, iteration: 302526
loss: 1.0749220848083496,grad_norm: 1.000000050371933, iteration: 302527
loss: 1.141569972038269,grad_norm: 0.9999996756494054, iteration: 302528
loss: 1.0110304355621338,grad_norm: 0.9801191572804073, iteration: 302529
loss: 1.0456078052520752,grad_norm: 0.9999998833987012, iteration: 302530
loss: 1.2479273080825806,grad_norm: 0.9999998582380917, iteration: 302531
loss: 1.1349703073501587,grad_norm: 0.9999998905280132, iteration: 302532
loss: 1.1521284580230713,grad_norm: 0.9999999054495444, iteration: 302533
loss: 1.066585659980774,grad_norm: 0.9999994689919216, iteration: 302534
loss: 1.017386555671692,grad_norm: 0.9999990880123129, iteration: 302535
loss: 1.1606237888336182,grad_norm: 0.9999995806254425, iteration: 302536
loss: 1.1346372365951538,grad_norm: 0.9999996849004352, iteration: 302537
loss: 1.0548059940338135,grad_norm: 0.9999998624104768, iteration: 302538
loss: 1.1156399250030518,grad_norm: 0.9999998994257564, iteration: 302539
loss: 1.1271008253097534,grad_norm: 0.9999996309759884, iteration: 302540
loss: 1.2795301675796509,grad_norm: 0.9999997853947549, iteration: 302541
loss: 1.0615192651748657,grad_norm: 0.9999991212803119, iteration: 302542
loss: 1.0349870920181274,grad_norm: 0.9999995767618332, iteration: 302543
loss: 1.04323410987854,grad_norm: 0.9999992744843998, iteration: 302544
loss: 1.2748897075653076,grad_norm: 0.9999998848883058, iteration: 302545
loss: 1.084661841392517,grad_norm: 0.9999996364849376, iteration: 302546
loss: 1.1361466646194458,grad_norm: 0.9999996298103853, iteration: 302547
loss: 1.06807279586792,grad_norm: 0.999999018988487, iteration: 302548
loss: 1.224223017692566,grad_norm: 0.9999998227783803, iteration: 302549
loss: 0.9942136406898499,grad_norm: 0.9999999624336267, iteration: 302550
loss: 1.14320969581604,grad_norm: 0.9999999531913657, iteration: 302551
loss: 1.0045537948608398,grad_norm: 0.9999997548857363, iteration: 302552
loss: 1.0421953201293945,grad_norm: 0.9999993912818554, iteration: 302553
loss: 1.0816669464111328,grad_norm: 1.0000000008354946, iteration: 302554
loss: 1.0356847047805786,grad_norm: 0.9999992684905059, iteration: 302555
loss: 0.9725865125656128,grad_norm: 0.9999990553558442, iteration: 302556
loss: 1.247795820236206,grad_norm: 0.9999997831304386, iteration: 302557
loss: 1.2162748575210571,grad_norm: 0.9999996203731703, iteration: 302558
loss: 1.2075262069702148,grad_norm: 0.9999996133152159, iteration: 302559
loss: 1.1958690881729126,grad_norm: 0.999999710525491, iteration: 302560
loss: 1.0091795921325684,grad_norm: 0.709745306449758, iteration: 302561
loss: 0.9880826473236084,grad_norm: 0.999999516038958, iteration: 302562
loss: 1.0321407318115234,grad_norm: 0.9350873579562906, iteration: 302563
loss: 1.0253875255584717,grad_norm: 0.9999992149154119, iteration: 302564
loss: 1.0130046606063843,grad_norm: 0.837867248969225, iteration: 302565
loss: 1.0736685991287231,grad_norm: 0.9999999788082073, iteration: 302566
loss: 0.9914121627807617,grad_norm: 0.9999997948587316, iteration: 302567
loss: 1.0308210849761963,grad_norm: 0.9999995807521544, iteration: 302568
loss: 1.0098737478256226,grad_norm: 0.9999998746429549, iteration: 302569
loss: 1.0026801824569702,grad_norm: 0.8354929445529096, iteration: 302570
loss: 1.1038061380386353,grad_norm: 0.9999997753865305, iteration: 302571
loss: 1.1394387483596802,grad_norm: 0.9999994782960077, iteration: 302572
loss: 1.0534913539886475,grad_norm: 0.9999997570254426, iteration: 302573
loss: 1.0434046983718872,grad_norm: 0.9999991287417799, iteration: 302574
loss: 1.08650803565979,grad_norm: 0.9999996032613904, iteration: 302575
loss: 1.1041765213012695,grad_norm: 0.9999992326565857, iteration: 302576
loss: 1.027112603187561,grad_norm: 0.7150228304773046, iteration: 302577
loss: 1.1420646905899048,grad_norm: 1.000000000805034, iteration: 302578
loss: 0.9756157994270325,grad_norm: 0.8950952709051825, iteration: 302579
loss: 1.0315208435058594,grad_norm: 0.9999999074064981, iteration: 302580
loss: 1.0075571537017822,grad_norm: 0.9791412470564341, iteration: 302581
loss: 0.984686553478241,grad_norm: 0.9999992489456676, iteration: 302582
loss: 0.9981369376182556,grad_norm: 0.8821443932320855, iteration: 302583
loss: 0.9851408004760742,grad_norm: 0.853940905154946, iteration: 302584
loss: 1.010416030883789,grad_norm: 0.7886963122340908, iteration: 302585
loss: 1.126888632774353,grad_norm: 0.9999996012663145, iteration: 302586
loss: 1.0101039409637451,grad_norm: 0.9999994997998213, iteration: 302587
loss: 1.0332074165344238,grad_norm: 0.9285985377505241, iteration: 302588
loss: 1.0452680587768555,grad_norm: 0.9999999628729347, iteration: 302589
loss: 1.017995834350586,grad_norm: 0.9999993496981088, iteration: 302590
loss: 1.03394615650177,grad_norm: 0.999999657561725, iteration: 302591
loss: 1.032209873199463,grad_norm: 0.999999428678037, iteration: 302592
loss: 1.1189930438995361,grad_norm: 0.999999660745441, iteration: 302593
loss: 1.0125316381454468,grad_norm: 0.9999998483761922, iteration: 302594
loss: 1.0152181386947632,grad_norm: 0.9999997403489559, iteration: 302595
loss: 1.0386412143707275,grad_norm: 0.9999993983284979, iteration: 302596
loss: 0.9994004964828491,grad_norm: 0.88742199247162, iteration: 302597
loss: 1.0586504936218262,grad_norm: 0.999999149822509, iteration: 302598
loss: 1.0138171911239624,grad_norm: 0.9999990688046968, iteration: 302599
loss: 1.0583330392837524,grad_norm: 0.999999681795322, iteration: 302600
loss: 0.9730767011642456,grad_norm: 0.8266825434229809, iteration: 302601
loss: 1.0351828336715698,grad_norm: 0.8385949145045756, iteration: 302602
loss: 0.9883819222450256,grad_norm: 0.8111970610607435, iteration: 302603
loss: 1.0006513595581055,grad_norm: 0.9304769883363877, iteration: 302604
loss: 1.0617748498916626,grad_norm: 0.9999994648315031, iteration: 302605
loss: 1.03708016872406,grad_norm: 0.8622621940316604, iteration: 302606
loss: 1.062558889389038,grad_norm: 0.9273971291123021, iteration: 302607
loss: 1.025936484336853,grad_norm: 0.9200908814026506, iteration: 302608
loss: 0.9901474118232727,grad_norm: 0.99999917051731, iteration: 302609
loss: 1.0008411407470703,grad_norm: 0.9999995490889032, iteration: 302610
loss: 1.0270706415176392,grad_norm: 0.9999996677119615, iteration: 302611
loss: 1.1154156923294067,grad_norm: 0.9999994163314326, iteration: 302612
loss: 1.227477788925171,grad_norm: 0.9999990090162137, iteration: 302613
loss: 0.9857866764068604,grad_norm: 0.9583661859210019, iteration: 302614
loss: 0.96895831823349,grad_norm: 0.9999999205306498, iteration: 302615
loss: 1.0353718996047974,grad_norm: 0.9999997805008084, iteration: 302616
loss: 0.9835209250450134,grad_norm: 0.7995823124209563, iteration: 302617
loss: 0.9936770796775818,grad_norm: 0.9999991891600798, iteration: 302618
loss: 1.0550806522369385,grad_norm: 0.9472668835236371, iteration: 302619
loss: 1.0311083793640137,grad_norm: 0.9682769677042741, iteration: 302620
loss: 1.0250755548477173,grad_norm: 0.9182520088972225, iteration: 302621
loss: 1.0656232833862305,grad_norm: 0.9999998684208653, iteration: 302622
loss: 1.1709036827087402,grad_norm: 1.0000000515668843, iteration: 302623
loss: 0.9847399592399597,grad_norm: 0.8685502618938624, iteration: 302624
loss: 1.084372639656067,grad_norm: 0.9999993237512002, iteration: 302625
loss: 1.1038404703140259,grad_norm: 0.9999995207281925, iteration: 302626
loss: 1.0150803327560425,grad_norm: 0.9999993215537619, iteration: 302627
loss: 1.0101417303085327,grad_norm: 0.9999990430686477, iteration: 302628
loss: 1.0076321363449097,grad_norm: 0.8552361622038683, iteration: 302629
loss: 1.0905473232269287,grad_norm: 0.9999996547009254, iteration: 302630
loss: 0.9918603897094727,grad_norm: 0.8541122150048708, iteration: 302631
loss: 0.9876524209976196,grad_norm: 0.9705911175552907, iteration: 302632
loss: 0.9950686097145081,grad_norm: 0.9580360018189377, iteration: 302633
loss: 0.980560839176178,grad_norm: 0.9999990958816793, iteration: 302634
loss: 1.0337773561477661,grad_norm: 0.9999992299218373, iteration: 302635
loss: 1.0164293050765991,grad_norm: 0.7931864412006392, iteration: 302636
loss: 1.0017883777618408,grad_norm: 0.9999993921075426, iteration: 302637
loss: 0.9922484755516052,grad_norm: 0.99999957360549, iteration: 302638
loss: 1.005158543586731,grad_norm: 0.7728741546207948, iteration: 302639
loss: 1.0029264688491821,grad_norm: 0.9999999061649184, iteration: 302640
loss: 1.100953459739685,grad_norm: 0.9999997296759839, iteration: 302641
loss: 1.0378073453903198,grad_norm: 0.9999991128410227, iteration: 302642
loss: 1.074702262878418,grad_norm: 0.8615997407577642, iteration: 302643
loss: 1.009272813796997,grad_norm: 0.8692105472579493, iteration: 302644
loss: 1.0181071758270264,grad_norm: 0.7645273445306423, iteration: 302645
loss: 0.9980802536010742,grad_norm: 0.9999990150720419, iteration: 302646
loss: 1.1256409883499146,grad_norm: 0.999999219151215, iteration: 302647
loss: 1.0167136192321777,grad_norm: 0.9887680316257605, iteration: 302648
loss: 0.9908775687217712,grad_norm: 0.7952458182063679, iteration: 302649
loss: 1.0245375633239746,grad_norm: 0.9999992087624139, iteration: 302650
loss: 0.9973462224006653,grad_norm: 0.9999999569364945, iteration: 302651
loss: 1.0619875192642212,grad_norm: 0.9999992238151231, iteration: 302652
loss: 1.094308614730835,grad_norm: 0.9716707995949404, iteration: 302653
loss: 1.016257643699646,grad_norm: 0.8800701912001968, iteration: 302654
loss: 1.0110344886779785,grad_norm: 0.8545130497982998, iteration: 302655
loss: 1.1048626899719238,grad_norm: 0.9999991853464794, iteration: 302656
loss: 1.0475126504898071,grad_norm: 0.8392616985397126, iteration: 302657
loss: 0.9957747459411621,grad_norm: 0.9999997618861992, iteration: 302658
loss: 1.0241951942443848,grad_norm: 0.9999990298997283, iteration: 302659
loss: 0.999789834022522,grad_norm: 0.9999991850659695, iteration: 302660
loss: 1.0509439706802368,grad_norm: 1.0000000177780968, iteration: 302661
loss: 0.991777777671814,grad_norm: 0.8814642256417589, iteration: 302662
loss: 1.0178735256195068,grad_norm: 0.9999997955305424, iteration: 302663
loss: 1.0292458534240723,grad_norm: 0.999999372366565, iteration: 302664
loss: 1.0013829469680786,grad_norm: 0.92115041232446, iteration: 302665
loss: 1.0147572755813599,grad_norm: 0.999999122578276, iteration: 302666
loss: 1.0044162273406982,grad_norm: 0.8373227961623063, iteration: 302667
loss: 0.9860729575157166,grad_norm: 0.7483750587862402, iteration: 302668
loss: 1.0010915994644165,grad_norm: 0.8022101564742192, iteration: 302669
loss: 0.9964634776115417,grad_norm: 0.9999998664209432, iteration: 302670
loss: 0.9858930110931396,grad_norm: 0.8290501670424776, iteration: 302671
loss: 1.0322166681289673,grad_norm: 0.7942827317374325, iteration: 302672
loss: 0.9993491768836975,grad_norm: 0.9999993952740568, iteration: 302673
loss: 1.0118145942687988,grad_norm: 0.8331650260193477, iteration: 302674
loss: 0.9677374958992004,grad_norm: 0.9999991391013281, iteration: 302675
loss: 1.0418816804885864,grad_norm: 0.8755594250147295, iteration: 302676
loss: 0.9795989990234375,grad_norm: 0.9999990823835607, iteration: 302677
loss: 1.010607361793518,grad_norm: 0.77591300975151, iteration: 302678
loss: 1.023127794265747,grad_norm: 0.9147476263773529, iteration: 302679
loss: 1.038325309753418,grad_norm: 0.9999992328923206, iteration: 302680
loss: 1.0356106758117676,grad_norm: 0.8411057190707472, iteration: 302681
loss: 1.0297691822052002,grad_norm: 0.9999991908864152, iteration: 302682
loss: 0.9873964190483093,grad_norm: 0.876266078965215, iteration: 302683
loss: 1.0278160572052002,grad_norm: 0.9999993348491036, iteration: 302684
loss: 1.126317024230957,grad_norm: 0.9999997334731933, iteration: 302685
loss: 1.0297324657440186,grad_norm: 0.9999991374396392, iteration: 302686
loss: 1.0193438529968262,grad_norm: 0.9999990760892026, iteration: 302687
loss: 0.9962829947471619,grad_norm: 0.9304012839643891, iteration: 302688
loss: 1.0177028179168701,grad_norm: 0.9999992112199237, iteration: 302689
loss: 1.0068811178207397,grad_norm: 0.9999993493083261, iteration: 302690
loss: 1.0781604051589966,grad_norm: 0.9670965614306182, iteration: 302691
loss: 1.0231505632400513,grad_norm: 0.9709114026694257, iteration: 302692
loss: 0.9874132871627808,grad_norm: 0.85344654586602, iteration: 302693
loss: 1.0975770950317383,grad_norm: 0.999999923665648, iteration: 302694
loss: 0.9799554347991943,grad_norm: 0.8865361291447362, iteration: 302695
loss: 0.9983105063438416,grad_norm: 0.7729598721774112, iteration: 302696
loss: 1.0100445747375488,grad_norm: 0.8729880854747558, iteration: 302697
loss: 1.0384936332702637,grad_norm: 0.999999264280009, iteration: 302698
loss: 1.0053170919418335,grad_norm: 0.9999990581617865, iteration: 302699
loss: 1.0616190433502197,grad_norm: 0.9999990816383137, iteration: 302700
loss: 0.990118682384491,grad_norm: 0.9544457018701661, iteration: 302701
loss: 1.0323501825332642,grad_norm: 0.9373438872952294, iteration: 302702
loss: 0.9996030330657959,grad_norm: 0.8563782969766298, iteration: 302703
loss: 1.0044597387313843,grad_norm: 0.8653526560814688, iteration: 302704
loss: 0.995281994342804,grad_norm: 0.9999995271132438, iteration: 302705
loss: 1.1045399904251099,grad_norm: 0.9999995204901443, iteration: 302706
loss: 1.0594068765640259,grad_norm: 0.999999371466384, iteration: 302707
loss: 0.9874438047409058,grad_norm: 0.7545809265418765, iteration: 302708
loss: 1.0131258964538574,grad_norm: 0.7327383501536586, iteration: 302709
loss: 1.0711942911148071,grad_norm: 0.9999991852232423, iteration: 302710
loss: 1.0459777116775513,grad_norm: 0.8868519929935378, iteration: 302711
loss: 0.9931266903877258,grad_norm: 0.9612222001286405, iteration: 302712
loss: 1.0099881887435913,grad_norm: 0.9999993860742552, iteration: 302713
loss: 1.092714786529541,grad_norm: 0.9999992939592292, iteration: 302714
loss: 1.056968331336975,grad_norm: 0.9999996894756197, iteration: 302715
loss: 0.9894382953643799,grad_norm: 0.9692571764408195, iteration: 302716
loss: 1.0515468120574951,grad_norm: 0.9999992992759047, iteration: 302717
loss: 0.9779863357543945,grad_norm: 0.8109488781240797, iteration: 302718
loss: 0.9869518280029297,grad_norm: 1.000000003138394, iteration: 302719
loss: 1.1150412559509277,grad_norm: 0.999999099806273, iteration: 302720
loss: 1.0449116230010986,grad_norm: 0.8999255726868868, iteration: 302721
loss: 1.0529953241348267,grad_norm: 0.9999991136349979, iteration: 302722
loss: 1.0026969909667969,grad_norm: 0.9999996273634926, iteration: 302723
loss: 1.030083417892456,grad_norm: 0.862354580137102, iteration: 302724
loss: 1.0151556730270386,grad_norm: 0.9999992074538405, iteration: 302725
loss: 1.2086073160171509,grad_norm: 0.9999997728858737, iteration: 302726
loss: 1.0198845863342285,grad_norm: 0.9087154143666866, iteration: 302727
loss: 1.0276371240615845,grad_norm: 0.8976691090010058, iteration: 302728
loss: 0.995236873626709,grad_norm: 0.7498295270432285, iteration: 302729
loss: 1.018798828125,grad_norm: 0.7644813224906518, iteration: 302730
loss: 1.0997638702392578,grad_norm: 0.9999995352422619, iteration: 302731
loss: 0.9714781641960144,grad_norm: 0.836383549052481, iteration: 302732
loss: 0.9863296151161194,grad_norm: 0.9999990777273192, iteration: 302733
loss: 1.0730916261672974,grad_norm: 0.9999991575974483, iteration: 302734
loss: 1.020326018333435,grad_norm: 0.9999990756766914, iteration: 302735
loss: 1.1169825792312622,grad_norm: 0.9999998567081766, iteration: 302736
loss: 1.0610466003417969,grad_norm: 0.9999999389080204, iteration: 302737
loss: 0.9985693097114563,grad_norm: 0.999999125145656, iteration: 302738
loss: 1.0093798637390137,grad_norm: 0.9999999734205788, iteration: 302739
loss: 1.0389906167984009,grad_norm: 0.8056647652757549, iteration: 302740
loss: 0.9468927383422852,grad_norm: 0.8752877560938593, iteration: 302741
loss: 1.028272032737732,grad_norm: 0.8417435445796805, iteration: 302742
loss: 1.0910972356796265,grad_norm: 0.9999993855688103, iteration: 302743
loss: 1.124434232711792,grad_norm: 0.9999996086245525, iteration: 302744
loss: 1.1083486080169678,grad_norm: 0.9999991225996113, iteration: 302745
loss: 1.1044996976852417,grad_norm: 0.9138440316954045, iteration: 302746
loss: 1.013214349746704,grad_norm: 0.99999915653365, iteration: 302747
loss: 1.1119048595428467,grad_norm: 0.9999992724404271, iteration: 302748
loss: 1.0286595821380615,grad_norm: 0.8515146117926102, iteration: 302749
loss: 1.0664973258972168,grad_norm: 0.9999992487271139, iteration: 302750
loss: 1.037714958190918,grad_norm: 0.9999995566949164, iteration: 302751
loss: 1.1258443593978882,grad_norm: 0.9999994362214433, iteration: 302752
loss: 1.0662764310836792,grad_norm: 0.9999993723243575, iteration: 302753
loss: 1.0492366552352905,grad_norm: 0.9234826729778263, iteration: 302754
loss: 0.9853230714797974,grad_norm: 0.9999992672114788, iteration: 302755
loss: 1.0681540966033936,grad_norm: 0.9999999428442634, iteration: 302756
loss: 0.9606260657310486,grad_norm: 0.7484616191725391, iteration: 302757
loss: 1.0396090745925903,grad_norm: 0.9999993644938431, iteration: 302758
loss: 1.1240135431289673,grad_norm: 0.9999991530600916, iteration: 302759
loss: 1.0114164352416992,grad_norm: 0.9099875485810395, iteration: 302760
loss: 0.9773270487785339,grad_norm: 0.9024648009745223, iteration: 302761
loss: 1.014024019241333,grad_norm: 0.8329438760760205, iteration: 302762
loss: 1.0956388711929321,grad_norm: 0.9999993466161862, iteration: 302763
loss: 1.0132144689559937,grad_norm: 0.9999999396830545, iteration: 302764
loss: 1.0646562576293945,grad_norm: 0.9999992326106177, iteration: 302765
loss: 1.0184258222579956,grad_norm: 0.9999991227440586, iteration: 302766
loss: 1.037794828414917,grad_norm: 0.9999993245169517, iteration: 302767
loss: 1.1138765811920166,grad_norm: 0.9999990999358499, iteration: 302768
loss: 1.1546928882598877,grad_norm: 0.9999999975823333, iteration: 302769
loss: 1.058890700340271,grad_norm: 0.9999994795179047, iteration: 302770
loss: 0.9908472895622253,grad_norm: 0.7913467002244996, iteration: 302771
loss: 1.037140130996704,grad_norm: 1.0000000071497348, iteration: 302772
loss: 1.0018867254257202,grad_norm: 0.9999994479071396, iteration: 302773
loss: 1.0975408554077148,grad_norm: 0.8562312354229916, iteration: 302774
loss: 1.0316475629806519,grad_norm: 0.9999991662732964, iteration: 302775
loss: 1.0024199485778809,grad_norm: 0.7404204330907652, iteration: 302776
loss: 1.0861670970916748,grad_norm: 0.9999996607646771, iteration: 302777
loss: 1.0017907619476318,grad_norm: 0.7393052319434553, iteration: 302778
loss: 1.0747994184494019,grad_norm: 0.9969498021912682, iteration: 302779
loss: 0.9977448582649231,grad_norm: 0.9999989664007274, iteration: 302780
loss: 1.099908709526062,grad_norm: 0.9999998590260875, iteration: 302781
loss: 1.1019073724746704,grad_norm: 0.9999992679128451, iteration: 302782
loss: 0.9800994992256165,grad_norm: 0.9999992144906635, iteration: 302783
loss: 1.0097657442092896,grad_norm: 0.9999990993443414, iteration: 302784
loss: 1.057084321975708,grad_norm: 0.999999283038157, iteration: 302785
loss: 0.9898207783699036,grad_norm: 0.9999990627563233, iteration: 302786
loss: 0.9878465533256531,grad_norm: 0.9631125731212178, iteration: 302787
loss: 1.0097448825836182,grad_norm: 0.7805505371124734, iteration: 302788
loss: 1.112765908241272,grad_norm: 0.9999993189585897, iteration: 302789
loss: 1.0016652345657349,grad_norm: 0.999999201236811, iteration: 302790
loss: 1.0713342428207397,grad_norm: 0.9999993693567248, iteration: 302791
loss: 1.0166910886764526,grad_norm: 0.9999997217694899, iteration: 302792
loss: 1.0475174188613892,grad_norm: 0.9999991134897235, iteration: 302793
loss: 1.0161383152008057,grad_norm: 0.9999992387070747, iteration: 302794
loss: 1.1072361469268799,grad_norm: 0.9999992102282174, iteration: 302795
loss: 1.1729130744934082,grad_norm: 0.9999996548552661, iteration: 302796
loss: 1.0635756254196167,grad_norm: 0.9999997184128916, iteration: 302797
loss: 1.0839184522628784,grad_norm: 0.9999994658154816, iteration: 302798
loss: 1.0195198059082031,grad_norm: 1.0000000079306612, iteration: 302799
loss: 1.0292060375213623,grad_norm: 0.9113114787106641, iteration: 302800
loss: 1.2289555072784424,grad_norm: 0.9999998964192561, iteration: 302801
loss: 1.0110536813735962,grad_norm: 0.9012157074172279, iteration: 302802
loss: 1.1760647296905518,grad_norm: 0.9999995062195506, iteration: 302803
loss: 1.0384502410888672,grad_norm: 0.9999992924205363, iteration: 302804
loss: 1.0370550155639648,grad_norm: 0.9999990243278217, iteration: 302805
loss: 1.0011298656463623,grad_norm: 0.7906875259899693, iteration: 302806
loss: 1.0493286848068237,grad_norm: 0.8963949793084033, iteration: 302807
loss: 1.0635238885879517,grad_norm: 0.9999994629896549, iteration: 302808
loss: 1.0325132608413696,grad_norm: 0.9999993344657387, iteration: 302809
loss: 1.071966290473938,grad_norm: 0.8780886024501099, iteration: 302810
loss: 1.1144331693649292,grad_norm: 0.9999997631286454, iteration: 302811
loss: 0.9832746982574463,grad_norm: 0.8169119822010013, iteration: 302812
loss: 1.061435580253601,grad_norm: 0.9999991833474356, iteration: 302813
loss: 1.0242902040481567,grad_norm: 0.9999993694958281, iteration: 302814
loss: 1.214145541191101,grad_norm: 0.9999997837088512, iteration: 302815
loss: 1.0974355936050415,grad_norm: 0.9999997691345307, iteration: 302816
loss: 1.112330436706543,grad_norm: 0.999999431492092, iteration: 302817
loss: 1.0840380191802979,grad_norm: 0.9999992596509373, iteration: 302818
loss: 1.0325958728790283,grad_norm: 0.821660502215437, iteration: 302819
loss: 1.0373798608779907,grad_norm: 0.9999991759687801, iteration: 302820
loss: 1.0951460599899292,grad_norm: 0.9999991470057348, iteration: 302821
loss: 1.0870541334152222,grad_norm: 0.9999999459939384, iteration: 302822
loss: 1.0143585205078125,grad_norm: 0.828306246933338, iteration: 302823
loss: 1.026459813117981,grad_norm: 0.945735619087761, iteration: 302824
loss: 1.033382534980774,grad_norm: 0.8462289596167358, iteration: 302825
loss: 1.0061724185943604,grad_norm: 0.8607462065448049, iteration: 302826
loss: 1.004411220550537,grad_norm: 0.9999992415741834, iteration: 302827
loss: 0.9971862435340881,grad_norm: 0.9999992926967876, iteration: 302828
loss: 1.2224639654159546,grad_norm: 0.9999994649241276, iteration: 302829
loss: 1.0372459888458252,grad_norm: 1.0000000637950217, iteration: 302830
loss: 1.0386604070663452,grad_norm: 0.9102786143783005, iteration: 302831
loss: 1.0620290040969849,grad_norm: 0.9187181027846151, iteration: 302832
loss: 1.1065099239349365,grad_norm: 0.9999997776917995, iteration: 302833
loss: 1.0062904357910156,grad_norm: 0.8803206801919461, iteration: 302834
loss: 1.0607659816741943,grad_norm: 0.9999991390207401, iteration: 302835
loss: 1.060148000717163,grad_norm: 0.9999990547950206, iteration: 302836
loss: 1.039075493812561,grad_norm: 0.9505876081041549, iteration: 302837
loss: 1.0065739154815674,grad_norm: 0.8852945811281482, iteration: 302838
loss: 1.0455440282821655,grad_norm: 0.8895400044673908, iteration: 302839
loss: 0.9711908102035522,grad_norm: 0.7640186806323, iteration: 302840
loss: 1.046532154083252,grad_norm: 0.7283290902501806, iteration: 302841
loss: 1.0501947402954102,grad_norm: 0.999999550787477, iteration: 302842
loss: 1.041791558265686,grad_norm: 0.8903986249948256, iteration: 302843
loss: 1.130439043045044,grad_norm: 0.9999991167245483, iteration: 302844
loss: 1.017486810684204,grad_norm: 0.9734363148952955, iteration: 302845
loss: 1.1161463260650635,grad_norm: 0.9999995974586446, iteration: 302846
loss: 1.0027967691421509,grad_norm: 0.9614247070150551, iteration: 302847
loss: 1.0870288610458374,grad_norm: 0.999999747433234, iteration: 302848
loss: 1.089733600616455,grad_norm: 0.9999995288482534, iteration: 302849
loss: 1.022279977798462,grad_norm: 0.8557806276170471, iteration: 302850
loss: 1.012067437171936,grad_norm: 0.9999999047856741, iteration: 302851
loss: 1.0411573648452759,grad_norm: 0.9999993400541279, iteration: 302852
loss: 1.0047820806503296,grad_norm: 0.9136034372619377, iteration: 302853
loss: 1.03020441532135,grad_norm: 0.9229512194593259, iteration: 302854
loss: 0.9875114560127258,grad_norm: 0.8364357473076232, iteration: 302855
loss: 1.0481256246566772,grad_norm: 0.9999993254433484, iteration: 302856
loss: 0.9688777327537537,grad_norm: 0.9935086629506097, iteration: 302857
loss: 0.9984604120254517,grad_norm: 0.9999990260472141, iteration: 302858
loss: 1.0841635465621948,grad_norm: 0.9999992748996053, iteration: 302859
loss: 1.0097851753234863,grad_norm: 0.9999991798465205, iteration: 302860
loss: 1.0242931842803955,grad_norm: 0.7933134957003631, iteration: 302861
loss: 1.034164547920227,grad_norm: 0.9513763033860744, iteration: 302862
loss: 1.006303071975708,grad_norm: 0.8643449223109195, iteration: 302863
loss: 1.0212942361831665,grad_norm: 0.8289115727553389, iteration: 302864
loss: 1.0652803182601929,grad_norm: 0.8733048823783378, iteration: 302865
loss: 1.0329005718231201,grad_norm: 0.9999995427316289, iteration: 302866
loss: 0.9804895520210266,grad_norm: 0.9863092314134404, iteration: 302867
loss: 1.0209048986434937,grad_norm: 0.7111401615096297, iteration: 302868
loss: 1.0128296613693237,grad_norm: 0.9999992087231884, iteration: 302869
loss: 1.1772375106811523,grad_norm: 0.9999999934320143, iteration: 302870
loss: 1.0070102214813232,grad_norm: 0.6786469411683822, iteration: 302871
loss: 0.9874807000160217,grad_norm: 0.7680469086882636, iteration: 302872
loss: 1.2333989143371582,grad_norm: 0.9999999237211059, iteration: 302873
loss: 1.0640777349472046,grad_norm: 0.9999994898937348, iteration: 302874
loss: 1.0055890083312988,grad_norm: 0.8353490099528833, iteration: 302875
loss: 1.055303931236267,grad_norm: 0.9998799432978539, iteration: 302876
loss: 1.0431166887283325,grad_norm: 0.9999998177552835, iteration: 302877
loss: 1.1239246129989624,grad_norm: 0.9999994361521296, iteration: 302878
loss: 1.0451781749725342,grad_norm: 0.9612671287956515, iteration: 302879
loss: 1.0974005460739136,grad_norm: 0.942834042777475, iteration: 302880
loss: 1.0165455341339111,grad_norm: 0.9874976150124866, iteration: 302881
loss: 1.1125260591506958,grad_norm: 0.9999995225186578, iteration: 302882
loss: 1.114094614982605,grad_norm: 0.9999990835427485, iteration: 302883
loss: 1.0250136852264404,grad_norm: 0.9999997984125315, iteration: 302884
loss: 1.0132893323898315,grad_norm: 0.9992173082981616, iteration: 302885
loss: 1.1093419790267944,grad_norm: 0.9999996782668346, iteration: 302886
loss: 1.036277174949646,grad_norm: 0.9999994136242177, iteration: 302887
loss: 1.0350055694580078,grad_norm: 0.99999999121286, iteration: 302888
loss: 1.0085862874984741,grad_norm: 0.8801848104304157, iteration: 302889
loss: 1.087913990020752,grad_norm: 0.999999417670954, iteration: 302890
loss: 0.9948137998580933,grad_norm: 0.7634465569225272, iteration: 302891
loss: 1.0385968685150146,grad_norm: 0.9999992717048877, iteration: 302892
loss: 1.1154661178588867,grad_norm: 0.9999996950579352, iteration: 302893
loss: 1.0247260332107544,grad_norm: 0.9999995666224832, iteration: 302894
loss: 1.0070892572402954,grad_norm: 0.8719041671854045, iteration: 302895
loss: 0.9960504174232483,grad_norm: 0.8995794740014033, iteration: 302896
loss: 1.0652492046356201,grad_norm: 0.9999995119504193, iteration: 302897
loss: 1.0666249990463257,grad_norm: 0.9999993478738769, iteration: 302898
loss: 1.0959224700927734,grad_norm: 0.9999992101934975, iteration: 302899
loss: 1.0359079837799072,grad_norm: 0.9999995368382328, iteration: 302900
loss: 1.0167953968048096,grad_norm: 0.9999992288451707, iteration: 302901
loss: 1.0358093976974487,grad_norm: 0.8730180828531651, iteration: 302902
loss: 1.0277059078216553,grad_norm: 0.9999993277497908, iteration: 302903
loss: 1.084383249282837,grad_norm: 0.9999994270113485, iteration: 302904
loss: 0.970567524433136,grad_norm: 0.999999531428872, iteration: 302905
loss: 1.0329416990280151,grad_norm: 0.7487299028175706, iteration: 302906
loss: 1.0952192544937134,grad_norm: 0.9999993274950703, iteration: 302907
loss: 1.0400670766830444,grad_norm: 0.9999994165557451, iteration: 302908
loss: 1.063558578491211,grad_norm: 0.9999999517373287, iteration: 302909
loss: 1.156703233718872,grad_norm: 0.9999995495390874, iteration: 302910
loss: 0.9927008152008057,grad_norm: 0.9999992512384968, iteration: 302911
loss: 1.0092686414718628,grad_norm: 0.8637657443744873, iteration: 302912
loss: 1.0502668619155884,grad_norm: 0.999999465499452, iteration: 302913
loss: 0.9841494560241699,grad_norm: 0.9999991561444723, iteration: 302914
loss: 0.9816040992736816,grad_norm: 0.8772750424636837, iteration: 302915
loss: 0.9991555213928223,grad_norm: 0.9999999833165865, iteration: 302916
loss: 1.0543947219848633,grad_norm: 0.9999993726033031, iteration: 302917
loss: 1.0350978374481201,grad_norm: 0.9999999206796286, iteration: 302918
loss: 1.0008530616760254,grad_norm: 0.8330526548923931, iteration: 302919
loss: 0.9826934933662415,grad_norm: 0.9999999680779112, iteration: 302920
loss: 1.0355027914047241,grad_norm: 0.8057715259470439, iteration: 302921
loss: 1.0538933277130127,grad_norm: 0.9999992126072629, iteration: 302922
loss: 1.0960060358047485,grad_norm: 0.999999830391675, iteration: 302923
loss: 1.0019532442092896,grad_norm: 0.8559057676484851, iteration: 302924
loss: 0.9868065118789673,grad_norm: 0.999999771949258, iteration: 302925
loss: 1.0697520971298218,grad_norm: 0.9999999900093024, iteration: 302926
loss: 1.017931580543518,grad_norm: 0.9999999314014184, iteration: 302927
loss: 1.0016934871673584,grad_norm: 0.7685494959130585, iteration: 302928
loss: 1.004245400428772,grad_norm: 0.9364888527970195, iteration: 302929
loss: 1.0099855661392212,grad_norm: 0.8592821218300343, iteration: 302930
loss: 1.0712519884109497,grad_norm: 0.9999992098835382, iteration: 302931
loss: 1.057478666305542,grad_norm: 0.9471126362399611, iteration: 302932
loss: 1.0251184701919556,grad_norm: 0.9844967646331105, iteration: 302933
loss: 1.0479844808578491,grad_norm: 0.883235815541652, iteration: 302934
loss: 1.0268735885620117,grad_norm: 0.9999995085203849, iteration: 302935
loss: 1.1450839042663574,grad_norm: 0.9999997497082168, iteration: 302936
loss: 1.0202559232711792,grad_norm: 0.999998886394646, iteration: 302937
loss: 1.0271598100662231,grad_norm: 0.9296351505971543, iteration: 302938
loss: 1.013523817062378,grad_norm: 0.9999993365348094, iteration: 302939
loss: 1.0102190971374512,grad_norm: 0.7719668313614549, iteration: 302940
loss: 0.9927601218223572,grad_norm: 0.8713810888938202, iteration: 302941
loss: 0.9882826209068298,grad_norm: 0.9999989860643846, iteration: 302942
loss: 0.9797155261039734,grad_norm: 0.9999994058301872, iteration: 302943
loss: 0.9988127946853638,grad_norm: 0.8304795885323448, iteration: 302944
loss: 0.9951316118240356,grad_norm: 0.999999710591173, iteration: 302945
loss: 1.0007734298706055,grad_norm: 0.9999990714585111, iteration: 302946
loss: 1.0104907751083374,grad_norm: 0.951883002611106, iteration: 302947
loss: 1.0322171449661255,grad_norm: 0.8821887898298467, iteration: 302948
loss: 1.0311436653137207,grad_norm: 0.8963920797994438, iteration: 302949
loss: 0.98088139295578,grad_norm: 0.8127058224253084, iteration: 302950
loss: 1.0196748971939087,grad_norm: 0.999999427246855, iteration: 302951
loss: 1.0262149572372437,grad_norm: 0.8474691756053685, iteration: 302952
loss: 1.01546049118042,grad_norm: 0.719880413647699, iteration: 302953
loss: 1.0135395526885986,grad_norm: 0.9999990410781744, iteration: 302954
loss: 1.0117141008377075,grad_norm: 0.8151763335411086, iteration: 302955
loss: 1.0636004209518433,grad_norm: 0.9999999525228134, iteration: 302956
loss: 1.013925313949585,grad_norm: 0.9999991331918551, iteration: 302957
loss: 1.0223909616470337,grad_norm: 0.9999999544895768, iteration: 302958
loss: 1.0607385635375977,grad_norm: 0.9999996409469933, iteration: 302959
loss: 1.0323787927627563,grad_norm: 0.8908204908302578, iteration: 302960
loss: 1.0414505004882812,grad_norm: 0.9817503125933789, iteration: 302961
loss: 1.002291202545166,grad_norm: 0.9789152760945549, iteration: 302962
loss: 1.0063908100128174,grad_norm: 0.7578221353293625, iteration: 302963
loss: 0.9522197842597961,grad_norm: 0.9704912364858993, iteration: 302964
loss: 1.00034499168396,grad_norm: 0.9664813547319192, iteration: 302965
loss: 1.0180758237838745,grad_norm: 0.9999990140432827, iteration: 302966
loss: 1.038618803024292,grad_norm: 0.999999436066491, iteration: 302967
loss: 0.9747323989868164,grad_norm: 0.9999989642957032, iteration: 302968
loss: 1.0932880640029907,grad_norm: 0.9573618231168439, iteration: 302969
loss: 1.1391621828079224,grad_norm: 0.9999998867983841, iteration: 302970
loss: 1.0126042366027832,grad_norm: 0.8478221234616129, iteration: 302971
loss: 1.033606767654419,grad_norm: 0.8634466677021657, iteration: 302972
loss: 0.9993425011634827,grad_norm: 0.8632551837929101, iteration: 302973
loss: 1.0350277423858643,grad_norm: 0.9999993302094188, iteration: 302974
loss: 1.0267157554626465,grad_norm: 0.9895221354078734, iteration: 302975
loss: 1.104804277420044,grad_norm: 0.9719214684382912, iteration: 302976
loss: 0.9972004890441895,grad_norm: 0.8133545081369568, iteration: 302977
loss: 1.0489751100540161,grad_norm: 0.9999997188469427, iteration: 302978
loss: 1.0354180335998535,grad_norm: 0.9999990759779135, iteration: 302979
loss: 1.0211427211761475,grad_norm: 0.9999991545775877, iteration: 302980
loss: 1.0085536241531372,grad_norm: 0.7879691561452284, iteration: 302981
loss: 1.0541560649871826,grad_norm: 0.7445534850670983, iteration: 302982
loss: 0.9856855869293213,grad_norm: 0.9057590842956486, iteration: 302983
loss: 1.0593504905700684,grad_norm: 0.9999992966507364, iteration: 302984
loss: 1.020326018333435,grad_norm: 0.8895765916924665, iteration: 302985
loss: 0.9740224480628967,grad_norm: 0.7983111773053811, iteration: 302986
loss: 1.0001717805862427,grad_norm: 0.7180745281202471, iteration: 302987
loss: 1.0208327770233154,grad_norm: 0.8699731368479869, iteration: 302988
loss: 0.9670875072479248,grad_norm: 0.7795927230346376, iteration: 302989
loss: 1.0401813983917236,grad_norm: 0.8550737434030827, iteration: 302990
loss: 0.9834318161010742,grad_norm: 0.8286318023998355, iteration: 302991
loss: 1.0298993587493896,grad_norm: 0.9547167233662366, iteration: 302992
loss: 1.0403674840927124,grad_norm: 0.9768161086205875, iteration: 302993
loss: 0.994149386882782,grad_norm: 0.7993211020231045, iteration: 302994
loss: 1.0268782377243042,grad_norm: 0.8050522723515001, iteration: 302995
loss: 1.0724449157714844,grad_norm: 0.999999832984533, iteration: 302996
loss: 1.0121880769729614,grad_norm: 0.9999991420036712, iteration: 302997
loss: 0.9516999125480652,grad_norm: 0.7945598891661562, iteration: 302998
loss: 1.0095679759979248,grad_norm: 0.8871581677349145, iteration: 302999
loss: 1.0182558298110962,grad_norm: 0.7187666722461674, iteration: 303000
loss: 1.0328266620635986,grad_norm: 0.9999999735356772, iteration: 303001
loss: 0.997954785823822,grad_norm: 0.950209829637422, iteration: 303002
loss: 1.0105185508728027,grad_norm: 0.7116182017870515, iteration: 303003
loss: 1.121254324913025,grad_norm: 0.9999995150818403, iteration: 303004
loss: 0.9941253662109375,grad_norm: 0.9002244163038274, iteration: 303005
loss: 1.0107532739639282,grad_norm: 0.9767139123004889, iteration: 303006
loss: 1.1080586910247803,grad_norm: 0.9999998060356889, iteration: 303007
loss: 0.9890950322151184,grad_norm: 0.9340394763190871, iteration: 303008
loss: 0.9609168171882629,grad_norm: 0.9350316213254806, iteration: 303009
loss: 1.0420280694961548,grad_norm: 0.9999992412755357, iteration: 303010
loss: 1.015051007270813,grad_norm: 0.822755592613962, iteration: 303011
loss: 1.022010087966919,grad_norm: 0.8269289654100682, iteration: 303012
loss: 1.0784037113189697,grad_norm: 0.9999990737808976, iteration: 303013
loss: 1.0072436332702637,grad_norm: 1.0000000232528237, iteration: 303014
loss: 1.073689341545105,grad_norm: 0.8566927961957921, iteration: 303015
loss: 0.9967681169509888,grad_norm: 0.9999990652563925, iteration: 303016
loss: 1.0393582582473755,grad_norm: 0.848522894772624, iteration: 303017
loss: 0.9811778664588928,grad_norm: 0.8412329077617491, iteration: 303018
loss: 1.0334089994430542,grad_norm: 0.8338728344785358, iteration: 303019
loss: 1.1103354692459106,grad_norm: 0.9999992134640936, iteration: 303020
loss: 0.9759562015533447,grad_norm: 0.9435251539845044, iteration: 303021
loss: 0.9839282631874084,grad_norm: 0.8929903425809088, iteration: 303022
loss: 1.0223331451416016,grad_norm: 0.9070586920703239, iteration: 303023
loss: 1.0490084886550903,grad_norm: 0.9999998180817137, iteration: 303024
loss: 1.0181711912155151,grad_norm: 0.8531390873461154, iteration: 303025
loss: 1.0395216941833496,grad_norm: 0.7291537236546762, iteration: 303026
loss: 1.0362495183944702,grad_norm: 0.9999993118028786, iteration: 303027
loss: 1.0098824501037598,grad_norm: 0.7739207605839112, iteration: 303028
loss: 1.143239974975586,grad_norm: 0.999999480181925, iteration: 303029
loss: 1.0705827474594116,grad_norm: 0.999999174488023, iteration: 303030
loss: 1.0185436010360718,grad_norm: 0.9999993647979629, iteration: 303031
loss: 1.1497083902359009,grad_norm: 0.9999996603482824, iteration: 303032
loss: 0.9937551617622375,grad_norm: 0.7194432572623786, iteration: 303033
loss: 0.9715594053268433,grad_norm: 0.81209442077216, iteration: 303034
loss: 1.0184822082519531,grad_norm: 0.9999999145016251, iteration: 303035
loss: 1.0383543968200684,grad_norm: 0.9999995192348337, iteration: 303036
loss: 0.9916127324104309,grad_norm: 0.6251949495469681, iteration: 303037
loss: 1.0374119281768799,grad_norm: 0.8663526316595667, iteration: 303038
loss: 0.9904444813728333,grad_norm: 0.8678321044276138, iteration: 303039
loss: 0.9805915355682373,grad_norm: 0.9999997267521706, iteration: 303040
loss: 1.1008622646331787,grad_norm: 0.9999998968334963, iteration: 303041
loss: 1.0056843757629395,grad_norm: 0.9999997796854297, iteration: 303042
loss: 0.9980195760726929,grad_norm: 0.775062359444831, iteration: 303043
loss: 0.9809016585350037,grad_norm: 0.9999990609373783, iteration: 303044
loss: 1.007561445236206,grad_norm: 0.9999990308670109, iteration: 303045
loss: 1.069234848022461,grad_norm: 0.9999998584381179, iteration: 303046
loss: 1.0308078527450562,grad_norm: 0.8236728177032876, iteration: 303047
loss: 1.026478886604309,grad_norm: 0.8160202690879061, iteration: 303048
loss: 1.033653974533081,grad_norm: 0.9999993465264039, iteration: 303049
loss: 1.1551631689071655,grad_norm: 0.9999995808972005, iteration: 303050
loss: 0.9833226799964905,grad_norm: 0.8075479264726741, iteration: 303051
loss: 1.0027028322219849,grad_norm: 0.9976199935543939, iteration: 303052
loss: 1.0291163921356201,grad_norm: 0.9999991974338184, iteration: 303053
loss: 0.9651545286178589,grad_norm: 0.874339970339651, iteration: 303054
loss: 1.024096131324768,grad_norm: 0.949219220684669, iteration: 303055
loss: 0.9674433469772339,grad_norm: 0.9999990853090951, iteration: 303056
loss: 0.9894922971725464,grad_norm: 0.8315845270142266, iteration: 303057
loss: 0.999954342842102,grad_norm: 0.9999993532226894, iteration: 303058
loss: 1.0209163427352905,grad_norm: 0.7810352813349343, iteration: 303059
loss: 1.0048131942749023,grad_norm: 0.9999990699849851, iteration: 303060
loss: 1.0286861658096313,grad_norm: 0.8685413639193922, iteration: 303061
loss: 0.9599961042404175,grad_norm: 0.780042733483681, iteration: 303062
loss: 0.9724406599998474,grad_norm: 0.8717091175979013, iteration: 303063
loss: 1.0180381536483765,grad_norm: 0.7021330758868266, iteration: 303064
loss: 0.9980985522270203,grad_norm: 0.9931706078623054, iteration: 303065
loss: 1.1212407350540161,grad_norm: 0.9999997629484224, iteration: 303066
loss: 0.9700103998184204,grad_norm: 0.9999996464507246, iteration: 303067
loss: 0.9817220568656921,grad_norm: 0.9999991683324924, iteration: 303068
loss: 1.0351630449295044,grad_norm: 0.814600340325761, iteration: 303069
loss: 1.0704526901245117,grad_norm: 0.999999273439016, iteration: 303070
loss: 1.0987083911895752,grad_norm: 0.9999991192894693, iteration: 303071
loss: 1.0566411018371582,grad_norm: 0.9999996867701024, iteration: 303072
loss: 1.022046685218811,grad_norm: 0.9296773093584935, iteration: 303073
loss: 1.0169130563735962,grad_norm: 0.8619010222417647, iteration: 303074
loss: 1.0470377206802368,grad_norm: 0.8957965531574708, iteration: 303075
loss: 1.0063165426254272,grad_norm: 0.7735369813869672, iteration: 303076
loss: 1.010076880455017,grad_norm: 0.8373496343100147, iteration: 303077
loss: 0.9907099008560181,grad_norm: 0.855880967936788, iteration: 303078
loss: 1.1253892183303833,grad_norm: 0.9999992904908757, iteration: 303079
loss: 0.9310743808746338,grad_norm: 0.8585997012023153, iteration: 303080
loss: 1.0026557445526123,grad_norm: 0.9999992968387854, iteration: 303081
loss: 1.0191352367401123,grad_norm: 0.8280515330956142, iteration: 303082
loss: 0.9881181120872498,grad_norm: 0.8213129958936551, iteration: 303083
loss: 0.9928208589553833,grad_norm: 0.7958628499068378, iteration: 303084
loss: 0.9658628106117249,grad_norm: 0.6685460591447722, iteration: 303085
loss: 1.0067466497421265,grad_norm: 0.9550077362197495, iteration: 303086
loss: 1.0189430713653564,grad_norm: 0.7904385634549809, iteration: 303087
loss: 0.9976273775100708,grad_norm: 0.9999991025619804, iteration: 303088
loss: 0.9889336228370667,grad_norm: 0.8730440423800946, iteration: 303089
loss: 1.0246326923370361,grad_norm: 0.848616710331261, iteration: 303090
loss: 1.0321804285049438,grad_norm: 0.9797848645212557, iteration: 303091
loss: 0.9932847023010254,grad_norm: 0.8185838613863059, iteration: 303092
loss: 1.0321801900863647,grad_norm: 0.9999990990705496, iteration: 303093
loss: 0.9773957133293152,grad_norm: 0.8350509141120974, iteration: 303094
loss: 0.9933538436889648,grad_norm: 0.8423832448991255, iteration: 303095
loss: 0.9661689400672913,grad_norm: 0.8730609883290863, iteration: 303096
loss: 1.0186971426010132,grad_norm: 0.8587103993827733, iteration: 303097
loss: 1.0023865699768066,grad_norm: 0.991004304798953, iteration: 303098
loss: 1.0010343790054321,grad_norm: 0.8094852779960571, iteration: 303099
loss: 1.0166575908660889,grad_norm: 0.9999990095242444, iteration: 303100
loss: 0.9957801103591919,grad_norm: 0.9058769117649323, iteration: 303101
loss: 1.038277268409729,grad_norm: 0.9999998460489782, iteration: 303102
loss: 1.0607936382293701,grad_norm: 0.8729382335138918, iteration: 303103
loss: 1.0326969623565674,grad_norm: 0.9999990809129702, iteration: 303104
loss: 1.0474820137023926,grad_norm: 0.9999994264349389, iteration: 303105
loss: 1.0053379535675049,grad_norm: 0.8115867398344464, iteration: 303106
loss: 0.9804681539535522,grad_norm: 0.8750529583525435, iteration: 303107
loss: 1.0414583683013916,grad_norm: 1.000000037529309, iteration: 303108
loss: 0.9626002311706543,grad_norm: 0.9999993171618561, iteration: 303109
loss: 1.015244960784912,grad_norm: 0.8324976107880026, iteration: 303110
loss: 1.0178146362304688,grad_norm: 0.999999421309614, iteration: 303111
loss: 1.0210423469543457,grad_norm: 0.9999996906209885, iteration: 303112
loss: 1.0151467323303223,grad_norm: 0.9999996198224878, iteration: 303113
loss: 1.030387043952942,grad_norm: 0.791513219052426, iteration: 303114
loss: 1.078748106956482,grad_norm: 0.9999992317939222, iteration: 303115
loss: 1.0377166271209717,grad_norm: 0.8280855716864636, iteration: 303116
loss: 0.9760806560516357,grad_norm: 0.8727961528378774, iteration: 303117
loss: 0.9885867238044739,grad_norm: 0.802918143274979, iteration: 303118
loss: 0.9878849387168884,grad_norm: 0.9903584552328129, iteration: 303119
loss: 1.013533115386963,grad_norm: 0.9999993518174574, iteration: 303120
loss: 0.9574639201164246,grad_norm: 0.7892799876340694, iteration: 303121
loss: 0.9606440663337708,grad_norm: 0.99999965287337, iteration: 303122
loss: 0.9921338558197021,grad_norm: 0.9105471650927823, iteration: 303123
loss: 1.0307649374008179,grad_norm: 0.9999993478856373, iteration: 303124
loss: 1.017035722732544,grad_norm: 0.7531885872729455, iteration: 303125
loss: 1.0073992013931274,grad_norm: 0.9999990472338558, iteration: 303126
loss: 1.0279239416122437,grad_norm: 0.9583380092948695, iteration: 303127
loss: 1.0567982196807861,grad_norm: 0.9999992662054727, iteration: 303128
loss: 1.0126585960388184,grad_norm: 0.9226586966175132, iteration: 303129
loss: 1.0506192445755005,grad_norm: 0.8570733771475358, iteration: 303130
loss: 1.0187726020812988,grad_norm: 0.9999991211153787, iteration: 303131
loss: 0.9917321801185608,grad_norm: 0.8633801624428643, iteration: 303132
loss: 1.0182313919067383,grad_norm: 0.9011330975797409, iteration: 303133
loss: 1.0561479330062866,grad_norm: 0.9999996775588573, iteration: 303134
loss: 1.0012929439544678,grad_norm: 0.9999997759269357, iteration: 303135
loss: 1.0993000268936157,grad_norm: 0.9172935325006116, iteration: 303136
loss: 1.0069069862365723,grad_norm: 0.8639332628431496, iteration: 303137
loss: 1.0338850021362305,grad_norm: 0.9719084344507786, iteration: 303138
loss: 1.014907717704773,grad_norm: 0.9999994606437759, iteration: 303139
loss: 0.976616382598877,grad_norm: 0.811861651780408, iteration: 303140
loss: 1.0221092700958252,grad_norm: 0.8211210916979876, iteration: 303141
loss: 1.037066102027893,grad_norm: 0.9999991605218964, iteration: 303142
loss: 0.9767635464668274,grad_norm: 0.9163563994994269, iteration: 303143
loss: 1.0292984247207642,grad_norm: 0.8623071431655717, iteration: 303144
loss: 0.9938041567802429,grad_norm: 0.9091691058857214, iteration: 303145
loss: 0.9746724963188171,grad_norm: 0.8041709677753862, iteration: 303146
loss: 1.1282166242599487,grad_norm: 0.9999996023723333, iteration: 303147
loss: 1.0159413814544678,grad_norm: 0.7181394588420844, iteration: 303148
loss: 0.9992814660072327,grad_norm: 0.7482851527673888, iteration: 303149
loss: 0.9942276477813721,grad_norm: 0.9999990810854803, iteration: 303150
loss: 0.9918398857116699,grad_norm: 0.8299773355031601, iteration: 303151
loss: 1.0155493021011353,grad_norm: 0.7548204279034589, iteration: 303152
loss: 1.0232102870941162,grad_norm: 0.9083503687583322, iteration: 303153
loss: 1.021063208580017,grad_norm: 0.7442534100644088, iteration: 303154
loss: 0.9934678673744202,grad_norm: 0.802269236496078, iteration: 303155
loss: 1.0128240585327148,grad_norm: 0.9999991134628591, iteration: 303156
loss: 0.9730169773101807,grad_norm: 0.9230657576957689, iteration: 303157
loss: 1.005842924118042,grad_norm: 0.9142334853443198, iteration: 303158
loss: 1.0276764631271362,grad_norm: 0.9999989929712644, iteration: 303159
loss: 1.0161654949188232,grad_norm: 0.8393691224810287, iteration: 303160
loss: 1.0335923433303833,grad_norm: 0.7113910137762287, iteration: 303161
loss: 0.973516047000885,grad_norm: 0.7806480255660424, iteration: 303162
loss: 1.0365612506866455,grad_norm: 0.7577267819729274, iteration: 303163
loss: 0.9539998769760132,grad_norm: 0.8169585196849906, iteration: 303164
loss: 1.0028607845306396,grad_norm: 0.9395616818278223, iteration: 303165
loss: 0.9890583157539368,grad_norm: 0.7612806388366821, iteration: 303166
loss: 1.0257763862609863,grad_norm: 0.8002601484907472, iteration: 303167
loss: 1.0755128860473633,grad_norm: 1.0000000481749132, iteration: 303168
loss: 0.9862991571426392,grad_norm: 0.7223793448671963, iteration: 303169
loss: 1.063071370124817,grad_norm: 0.903620316782083, iteration: 303170
loss: 0.999690055847168,grad_norm: 0.9538700227212498, iteration: 303171
loss: 0.9905046820640564,grad_norm: 0.7151608063095651, iteration: 303172
loss: 0.9537814855575562,grad_norm: 0.8401939603321789, iteration: 303173
loss: 1.0990694761276245,grad_norm: 0.9999994123002665, iteration: 303174
loss: 0.9920375347137451,grad_norm: 0.848570938475999, iteration: 303175
loss: 1.0647927522659302,grad_norm: 0.8694649150669359, iteration: 303176
loss: 0.9928742051124573,grad_norm: 0.7744789388143973, iteration: 303177
loss: 1.1089677810668945,grad_norm: 0.999999641611113, iteration: 303178
loss: 0.9712247848510742,grad_norm: 0.7979074648061483, iteration: 303179
loss: 1.0029634237289429,grad_norm: 0.8272720883535707, iteration: 303180
loss: 1.0304538011550903,grad_norm: 0.9999993370727829, iteration: 303181
loss: 1.036075472831726,grad_norm: 0.8096612108422161, iteration: 303182
loss: 1.0187240839004517,grad_norm: 0.8008751017110182, iteration: 303183
loss: 1.0344865322113037,grad_norm: 0.9999995584485739, iteration: 303184
loss: 0.9834560751914978,grad_norm: 0.9431549583500837, iteration: 303185
loss: 0.9579501152038574,grad_norm: 0.8508132691134311, iteration: 303186
loss: 1.0131617784500122,grad_norm: 0.9999993835380134, iteration: 303187
loss: 0.9766451120376587,grad_norm: 0.9999991186753512, iteration: 303188
loss: 1.0054445266723633,grad_norm: 0.832164603518241, iteration: 303189
loss: 1.0048466920852661,grad_norm: 0.8826196669873758, iteration: 303190
loss: 1.004676342010498,grad_norm: 0.9505009792379342, iteration: 303191
loss: 1.0175700187683105,grad_norm: 0.7801772417979096, iteration: 303192
loss: 0.9456242322921753,grad_norm: 0.7659715492118471, iteration: 303193
loss: 0.9945504069328308,grad_norm: 0.9999990719974092, iteration: 303194
loss: 1.0033003091812134,grad_norm: 0.8652201392236496, iteration: 303195
loss: 1.0255579948425293,grad_norm: 0.9538962586103107, iteration: 303196
loss: 1.0059022903442383,grad_norm: 0.8259649032201355, iteration: 303197
loss: 1.0142778158187866,grad_norm: 0.7167225723930953, iteration: 303198
loss: 0.9912813305854797,grad_norm: 0.6941865538043224, iteration: 303199
loss: 1.1077096462249756,grad_norm: 0.9999989564977017, iteration: 303200
loss: 0.9651805758476257,grad_norm: 0.7280943662572664, iteration: 303201
loss: 1.008595585823059,grad_norm: 0.8298997889658701, iteration: 303202
loss: 1.0260885953903198,grad_norm: 0.9085291319593634, iteration: 303203
loss: 0.9990636110305786,grad_norm: 0.9999992747405467, iteration: 303204
loss: 1.017198085784912,grad_norm: 0.9999991396446858, iteration: 303205
loss: 1.019349217414856,grad_norm: 0.9435557692417994, iteration: 303206
loss: 0.985414981842041,grad_norm: 0.7829744676767196, iteration: 303207
loss: 1.0031671524047852,grad_norm: 0.9156387737891242, iteration: 303208
loss: 1.0107569694519043,grad_norm: 0.9999991504788456, iteration: 303209
loss: 1.037821650505066,grad_norm: 0.855162760205486, iteration: 303210
loss: 0.9882733821868896,grad_norm: 0.8332263683255375, iteration: 303211
loss: 1.0152034759521484,grad_norm: 0.9498991123221547, iteration: 303212
loss: 1.044184684753418,grad_norm: 0.760772387915026, iteration: 303213
loss: 1.0215624570846558,grad_norm: 0.9999992949216391, iteration: 303214
loss: 1.03244948387146,grad_norm: 0.9999992919941904, iteration: 303215
loss: 1.0067468881607056,grad_norm: 0.9578348083890581, iteration: 303216
loss: 0.9643645882606506,grad_norm: 0.8143757093462509, iteration: 303217
loss: 1.0213792324066162,grad_norm: 0.7802505158640266, iteration: 303218
loss: 1.0460119247436523,grad_norm: 0.9545752681282911, iteration: 303219
loss: 1.0446436405181885,grad_norm: 0.8631912427320798, iteration: 303220
loss: 1.033446192741394,grad_norm: 0.8392467583830445, iteration: 303221
loss: 1.065028190612793,grad_norm: 0.9999990829100233, iteration: 303222
loss: 1.0255738496780396,grad_norm: 0.9312594568944936, iteration: 303223
loss: 1.0123575925827026,grad_norm: 0.9999994327912255, iteration: 303224
loss: 1.0854367017745972,grad_norm: 0.9245592454378042, iteration: 303225
loss: 0.9774865508079529,grad_norm: 0.9999990521147507, iteration: 303226
loss: 1.0297789573669434,grad_norm: 0.9999991303398392, iteration: 303227
loss: 1.001442313194275,grad_norm: 0.9999990148479725, iteration: 303228
loss: 1.0541654825210571,grad_norm: 0.999999228180278, iteration: 303229
loss: 1.0226171016693115,grad_norm: 0.7663959393493174, iteration: 303230
loss: 1.0649007558822632,grad_norm: 0.9999996199165264, iteration: 303231
loss: 0.9829817414283752,grad_norm: 0.7635798365991118, iteration: 303232
loss: 0.979762077331543,grad_norm: 0.8820759235217233, iteration: 303233
loss: 1.018412709236145,grad_norm: 0.9999997902422716, iteration: 303234
loss: 1.0279353857040405,grad_norm: 0.9999990589827803, iteration: 303235
loss: 1.0039284229278564,grad_norm: 0.8052621928705463, iteration: 303236
loss: 0.9642623662948608,grad_norm: 0.7249357894341323, iteration: 303237
loss: 0.9703790545463562,grad_norm: 0.8218581481877879, iteration: 303238
loss: 1.0433547496795654,grad_norm: 0.9999998879452696, iteration: 303239
loss: 1.0050779581069946,grad_norm: 0.8747267601813503, iteration: 303240
loss: 1.0086631774902344,grad_norm: 0.9940853539098125, iteration: 303241
loss: 0.9956797361373901,grad_norm: 0.8762507399531823, iteration: 303242
loss: 1.0294232368469238,grad_norm: 0.932055682262038, iteration: 303243
loss: 0.9854789972305298,grad_norm: 0.7964049770569167, iteration: 303244
loss: 1.0078167915344238,grad_norm: 0.9999991325439992, iteration: 303245
loss: 0.996965229511261,grad_norm: 0.8164849118199516, iteration: 303246
loss: 0.995238184928894,grad_norm: 0.7863868162166383, iteration: 303247
loss: 0.9832444190979004,grad_norm: 0.9005107928348917, iteration: 303248
loss: 1.0099016427993774,grad_norm: 0.8691724573999584, iteration: 303249
loss: 0.9579823613166809,grad_norm: 0.7634092926459594, iteration: 303250
loss: 0.9609729647636414,grad_norm: 0.8532644897583406, iteration: 303251
loss: 0.9866041541099548,grad_norm: 0.9215386644441336, iteration: 303252
loss: 1.016600251197815,grad_norm: 0.9088409059332412, iteration: 303253
loss: 1.0285691022872925,grad_norm: 0.9818162224841784, iteration: 303254
loss: 1.0028995275497437,grad_norm: 0.8253239969969223, iteration: 303255
loss: 1.022193431854248,grad_norm: 0.9999997365408649, iteration: 303256
loss: 1.029259204864502,grad_norm: 0.6889072991144818, iteration: 303257
loss: 1.0025837421417236,grad_norm: 0.9999992398365717, iteration: 303258
loss: 0.9856411218643188,grad_norm: 0.9282437042214937, iteration: 303259
loss: 0.9955980777740479,grad_norm: 0.9108360837061473, iteration: 303260
loss: 0.9630773067474365,grad_norm: 0.850279620677419, iteration: 303261
loss: 1.0320013761520386,grad_norm: 0.8426020893541343, iteration: 303262
loss: 1.0090649127960205,grad_norm: 0.9904029067439238, iteration: 303263
loss: 1.0002353191375732,grad_norm: 0.8537612368987842, iteration: 303264
loss: 0.9994512796401978,grad_norm: 0.7694790488003983, iteration: 303265
loss: 1.0478051900863647,grad_norm: 0.9077311112527303, iteration: 303266
loss: 1.0130219459533691,grad_norm: 0.787702365583307, iteration: 303267
loss: 1.0261988639831543,grad_norm: 0.8813489825613035, iteration: 303268
loss: 1.0055429935455322,grad_norm: 0.7229804037586013, iteration: 303269
loss: 1.0401134490966797,grad_norm: 0.9999996269801671, iteration: 303270
loss: 1.0047658681869507,grad_norm: 0.9999991111233028, iteration: 303271
loss: 1.0009757280349731,grad_norm: 0.8254302563403413, iteration: 303272
loss: 0.9808493256568909,grad_norm: 0.8542862895405563, iteration: 303273
loss: 1.0231961011886597,grad_norm: 0.999999679536203, iteration: 303274
loss: 1.0170705318450928,grad_norm: 0.9999997668921771, iteration: 303275
loss: 0.9850891828536987,grad_norm: 0.8398746540487365, iteration: 303276
loss: 1.005388617515564,grad_norm: 0.7654778692010796, iteration: 303277
loss: 1.0194247961044312,grad_norm: 0.8139024128211602, iteration: 303278
loss: 1.003813624382019,grad_norm: 0.77619768816021, iteration: 303279
loss: 1.0524991750717163,grad_norm: 0.9127450088927389, iteration: 303280
loss: 1.0037517547607422,grad_norm: 0.8396427571411722, iteration: 303281
loss: 0.9954910278320312,grad_norm: 0.8701541286211153, iteration: 303282
loss: 1.025001883506775,grad_norm: 0.9999993197053487, iteration: 303283
loss: 1.026854157447815,grad_norm: 0.999999089798727, iteration: 303284
loss: 1.0202792882919312,grad_norm: 0.8446352349266929, iteration: 303285
loss: 1.0314267873764038,grad_norm: 0.9999990859121717, iteration: 303286
loss: 0.997886598110199,grad_norm: 0.997787949765934, iteration: 303287
loss: 1.045780062675476,grad_norm: 0.9489376186817143, iteration: 303288
loss: 0.9894992709159851,grad_norm: 0.7270255212528112, iteration: 303289
loss: 0.9917859435081482,grad_norm: 0.9262926919795028, iteration: 303290
loss: 1.0295637845993042,grad_norm: 0.9999991678883785, iteration: 303291
loss: 0.9804965853691101,grad_norm: 0.9417467623163936, iteration: 303292
loss: 0.9882798790931702,grad_norm: 0.7677011392250312, iteration: 303293
loss: 0.9459854364395142,grad_norm: 0.8900517556674402, iteration: 303294
loss: 0.9643453359603882,grad_norm: 0.7742743384363618, iteration: 303295
loss: 0.9949140548706055,grad_norm: 0.9151461770251915, iteration: 303296
loss: 0.9848167896270752,grad_norm: 0.8272986396090102, iteration: 303297
loss: 0.9771652817726135,grad_norm: 0.7837472880670459, iteration: 303298
loss: 0.9933516383171082,grad_norm: 0.8581817587430954, iteration: 303299
loss: 0.9954659342765808,grad_norm: 0.8833712759964847, iteration: 303300
loss: 0.9760139584541321,grad_norm: 0.8468076667918625, iteration: 303301
loss: 1.040582299232483,grad_norm: 0.9999992642584271, iteration: 303302
loss: 0.9770437479019165,grad_norm: 0.802776060969344, iteration: 303303
loss: 1.020195722579956,grad_norm: 0.9999990873071376, iteration: 303304
loss: 1.011224627494812,grad_norm: 0.8331536578817677, iteration: 303305
loss: 1.0711065530776978,grad_norm: 0.9999994456745759, iteration: 303306
loss: 1.0346088409423828,grad_norm: 0.9999992194380438, iteration: 303307
loss: 1.0004101991653442,grad_norm: 0.8103889274847961, iteration: 303308
loss: 1.023913025856018,grad_norm: 0.8753683945004969, iteration: 303309
loss: 1.0212068557739258,grad_norm: 0.8381304024521589, iteration: 303310
loss: 1.1219069957733154,grad_norm: 0.9999997443696192, iteration: 303311
loss: 1.0239834785461426,grad_norm: 0.7976148535306317, iteration: 303312
loss: 1.0073862075805664,grad_norm: 0.7725550756973623, iteration: 303313
loss: 0.970790445804596,grad_norm: 0.9999998058742939, iteration: 303314
loss: 1.027471661567688,grad_norm: 0.9999997480946057, iteration: 303315
loss: 0.9786916375160217,grad_norm: 0.8652106835137795, iteration: 303316
loss: 0.9865624904632568,grad_norm: 0.860842310179593, iteration: 303317
loss: 0.9987302422523499,grad_norm: 0.830428040441728, iteration: 303318
loss: 0.9718621969223022,grad_norm: 0.721069782598581, iteration: 303319
loss: 1.012749433517456,grad_norm: 0.980807450316268, iteration: 303320
loss: 0.993337869644165,grad_norm: 0.8727851389759972, iteration: 303321
loss: 0.9871424436569214,grad_norm: 0.7442176574054051, iteration: 303322
loss: 1.013665795326233,grad_norm: 0.9999998156080557, iteration: 303323
loss: 1.0212897062301636,grad_norm: 0.9159505227509871, iteration: 303324
loss: 0.992129921913147,grad_norm: 0.8753743463966467, iteration: 303325
loss: 0.9864071011543274,grad_norm: 0.664901032171305, iteration: 303326
loss: 1.0043777227401733,grad_norm: 0.6967113411924524, iteration: 303327
loss: 0.9992108345031738,grad_norm: 0.7042321627595978, iteration: 303328
loss: 0.9733535647392273,grad_norm: 0.773820613236814, iteration: 303329
loss: 1.0210052728652954,grad_norm: 0.999999147256302, iteration: 303330
loss: 1.1425467729568481,grad_norm: 0.9999990551451484, iteration: 303331
loss: 0.9899044632911682,grad_norm: 0.7205920044645108, iteration: 303332
loss: 0.960597038269043,grad_norm: 0.949911630297563, iteration: 303333
loss: 1.00283682346344,grad_norm: 0.8508403125821413, iteration: 303334
loss: 1.0026838779449463,grad_norm: 0.8153201228769996, iteration: 303335
loss: 1.0320500135421753,grad_norm: 0.8055662936725363, iteration: 303336
loss: 1.0660603046417236,grad_norm: 0.9999992282204733, iteration: 303337
loss: 0.9784321784973145,grad_norm: 0.9085056006982475, iteration: 303338
loss: 1.0239295959472656,grad_norm: 0.9999992658152435, iteration: 303339
loss: 1.1244016885757446,grad_norm: 0.9999995457727805, iteration: 303340
loss: 0.9755495190620422,grad_norm: 0.999999362038709, iteration: 303341
loss: 0.9842491149902344,grad_norm: 0.9861028331585869, iteration: 303342
loss: 1.0214221477508545,grad_norm: 0.9999995419541896, iteration: 303343
loss: 0.9852097034454346,grad_norm: 0.8650410786020221, iteration: 303344
loss: 1.060605525970459,grad_norm: 0.9531920114585035, iteration: 303345
loss: 1.0834022760391235,grad_norm: 0.8882704560713995, iteration: 303346
loss: 1.0100154876708984,grad_norm: 0.7836027278490625, iteration: 303347
loss: 0.9915626645088196,grad_norm: 0.999999178463123, iteration: 303348
loss: 0.9692931771278381,grad_norm: 0.8482161915617262, iteration: 303349
loss: 1.0127288103103638,grad_norm: 0.8406588974087866, iteration: 303350
loss: 1.0570298433303833,grad_norm: 0.999999449144595, iteration: 303351
loss: 1.0011470317840576,grad_norm: 0.8790172995643613, iteration: 303352
loss: 1.0273339748382568,grad_norm: 0.9999991729207229, iteration: 303353
loss: 0.9892320036888123,grad_norm: 0.8151276196151903, iteration: 303354
loss: 0.9813413619995117,grad_norm: 0.9389523829187472, iteration: 303355
loss: 1.011878252029419,grad_norm: 0.9999992914918422, iteration: 303356
loss: 0.9988475441932678,grad_norm: 0.999999063771757, iteration: 303357
loss: 0.9913890957832336,grad_norm: 0.8036556520017031, iteration: 303358
loss: 1.0631808042526245,grad_norm: 0.9999990762197747, iteration: 303359
loss: 1.008324384689331,grad_norm: 0.7872020095592281, iteration: 303360
loss: 1.0237315893173218,grad_norm: 0.9420582384429492, iteration: 303361
loss: 1.0356106758117676,grad_norm: 0.7052015279031338, iteration: 303362
loss: 1.020228624343872,grad_norm: 0.8979459216738662, iteration: 303363
loss: 0.9990719556808472,grad_norm: 0.8658096003206731, iteration: 303364
loss: 1.0035507678985596,grad_norm: 0.855078724052547, iteration: 303365
loss: 1.0903791189193726,grad_norm: 0.9999995529040125, iteration: 303366
loss: 1.006104588508606,grad_norm: 0.9999991924008349, iteration: 303367
loss: 0.9746856689453125,grad_norm: 0.8005164340713364, iteration: 303368
loss: 1.05968177318573,grad_norm: 0.9999991919647008, iteration: 303369
loss: 1.0261164903640747,grad_norm: 0.9999992737425891, iteration: 303370
loss: 0.985160768032074,grad_norm: 0.9902877950806318, iteration: 303371
loss: 1.1426019668579102,grad_norm: 0.9563284332462672, iteration: 303372
loss: 0.9996127486228943,grad_norm: 0.7129279270412479, iteration: 303373
loss: 1.0059748888015747,grad_norm: 0.8561126914764035, iteration: 303374
loss: 1.0138976573944092,grad_norm: 0.9083639130268406, iteration: 303375
loss: 1.0532588958740234,grad_norm: 0.8519409336340946, iteration: 303376
loss: 1.0228463411331177,grad_norm: 0.9999999278243649, iteration: 303377
loss: 1.1053745746612549,grad_norm: 0.9999998071694026, iteration: 303378
loss: 1.00130033493042,grad_norm: 0.8119328989320967, iteration: 303379
loss: 1.0147637128829956,grad_norm: 0.9999999189033427, iteration: 303380
loss: 1.010622501373291,grad_norm: 0.9575092873683797, iteration: 303381
loss: 0.9825576543807983,grad_norm: 0.9653222208299413, iteration: 303382
loss: 1.1685452461242676,grad_norm: 0.9999991499341592, iteration: 303383
loss: 1.0352596044540405,grad_norm: 0.8186702472814541, iteration: 303384
loss: 1.01219642162323,grad_norm: 0.9999996906673858, iteration: 303385
loss: 0.9948281645774841,grad_norm: 0.8897005050425533, iteration: 303386
loss: 1.0097368955612183,grad_norm: 0.8441120444130467, iteration: 303387
loss: 0.9966748356819153,grad_norm: 0.7584797175516369, iteration: 303388
loss: 1.0010679960250854,grad_norm: 0.8036594552333961, iteration: 303389
loss: 0.9971467852592468,grad_norm: 0.9828076310962608, iteration: 303390
loss: 1.0282385349273682,grad_norm: 0.8160322700472227, iteration: 303391
loss: 0.9899441003799438,grad_norm: 0.9999997483472928, iteration: 303392
loss: 1.0012649297714233,grad_norm: 0.7746922743519298, iteration: 303393
loss: 1.0179855823516846,grad_norm: 0.9999990254602947, iteration: 303394
loss: 0.9928035140037537,grad_norm: 0.9637247971785199, iteration: 303395
loss: 1.0389353036880493,grad_norm: 0.9999998505921678, iteration: 303396
loss: 0.9865871667861938,grad_norm: 0.9940094460832525, iteration: 303397
loss: 1.0110816955566406,grad_norm: 0.9999992918383025, iteration: 303398
loss: 0.996526300907135,grad_norm: 0.7650293844534674, iteration: 303399
loss: 1.0055160522460938,grad_norm: 0.7736543194006568, iteration: 303400
loss: 1.020365595817566,grad_norm: 0.8396281519085758, iteration: 303401
loss: 1.0425738096237183,grad_norm: 0.9984308820871957, iteration: 303402
loss: 1.060978651046753,grad_norm: 0.8060201616666263, iteration: 303403
loss: 0.9801903963088989,grad_norm: 0.8127508025139406, iteration: 303404
loss: 0.963330090045929,grad_norm: 0.9533552854409445, iteration: 303405
loss: 1.0383728742599487,grad_norm: 0.8950509926836635, iteration: 303406
loss: 1.0194536447525024,grad_norm: 0.9999991479577193, iteration: 303407
loss: 0.9839347004890442,grad_norm: 0.9083663535718235, iteration: 303408
loss: 1.0066602230072021,grad_norm: 0.7498493869406656, iteration: 303409
loss: 1.0112050771713257,grad_norm: 0.8023275321287242, iteration: 303410
loss: 0.9852443933486938,grad_norm: 0.8764539369928169, iteration: 303411
loss: 1.0154896974563599,grad_norm: 0.8741657519442149, iteration: 303412
loss: 0.9790452122688293,grad_norm: 0.9632555994947688, iteration: 303413
loss: 0.995585024356842,grad_norm: 0.8051638767176541, iteration: 303414
loss: 1.0373355150222778,grad_norm: 0.9999994527484467, iteration: 303415
loss: 1.0092339515686035,grad_norm: 0.8514293451190217, iteration: 303416
loss: 1.0244159698486328,grad_norm: 0.8381573448791585, iteration: 303417
loss: 1.0157679319381714,grad_norm: 0.8472854916792566, iteration: 303418
loss: 1.0062284469604492,grad_norm: 0.7304384500317113, iteration: 303419
loss: 1.1351341009140015,grad_norm: 0.9999993077899776, iteration: 303420
loss: 1.0045126676559448,grad_norm: 0.8769120010865497, iteration: 303421
loss: 0.9798222184181213,grad_norm: 0.8884715475092345, iteration: 303422
loss: 1.003483533859253,grad_norm: 0.9999998733224407, iteration: 303423
loss: 0.9967454671859741,grad_norm: 0.9999990154688798, iteration: 303424
loss: 0.9976930618286133,grad_norm: 0.8571782935740954, iteration: 303425
loss: 1.0364880561828613,grad_norm: 0.9505960668992837, iteration: 303426
loss: 1.0295815467834473,grad_norm: 0.8468595453327749, iteration: 303427
loss: 1.002279281616211,grad_norm: 0.8468722949770974, iteration: 303428
loss: 1.0119805335998535,grad_norm: 0.9205102805808263, iteration: 303429
loss: 0.9840015172958374,grad_norm: 0.8492884930041351, iteration: 303430
loss: 1.0123943090438843,grad_norm: 0.8696412571377338, iteration: 303431
loss: 0.9779123663902283,grad_norm: 0.961166246273001, iteration: 303432
loss: 0.9732930064201355,grad_norm: 0.9374837583543878, iteration: 303433
loss: 0.9725831747055054,grad_norm: 0.7858189191124213, iteration: 303434
loss: 0.9873215556144714,grad_norm: 0.7275863168716415, iteration: 303435
loss: 1.0253351926803589,grad_norm: 0.7775073402050481, iteration: 303436
loss: 1.0002837181091309,grad_norm: 0.6663060145938259, iteration: 303437
loss: 1.0097039937973022,grad_norm: 0.8819772321183321, iteration: 303438
loss: 0.9991154074668884,grad_norm: 0.7399658134859292, iteration: 303439
loss: 1.028556227684021,grad_norm: 0.9999995058572371, iteration: 303440
loss: 1.005210280418396,grad_norm: 0.8903916713509578, iteration: 303441
loss: 1.055376410484314,grad_norm: 0.9999999527049211, iteration: 303442
loss: 1.0408450365066528,grad_norm: 0.9031748013853392, iteration: 303443
loss: 1.0061726570129395,grad_norm: 0.7375829387154444, iteration: 303444
loss: 1.015315294265747,grad_norm: 0.9854724876974142, iteration: 303445
loss: 0.9802512526512146,grad_norm: 0.9768266451992127, iteration: 303446
loss: 0.9985357522964478,grad_norm: 0.8794772124197072, iteration: 303447
loss: 0.9978163242340088,grad_norm: 0.8370947273648021, iteration: 303448
loss: 0.999542236328125,grad_norm: 0.8484826947006667, iteration: 303449
loss: 1.0376935005187988,grad_norm: 0.9267924170642766, iteration: 303450
loss: 0.9914270639419556,grad_norm: 0.8381654705345677, iteration: 303451
loss: 1.0080511569976807,grad_norm: 0.829557056874599, iteration: 303452
loss: 0.9972952008247375,grad_norm: 0.7397207797483837, iteration: 303453
loss: 0.9979740977287292,grad_norm: 0.930515998647404, iteration: 303454
loss: 1.0394868850708008,grad_norm: 0.9999994625800184, iteration: 303455
loss: 0.9859098792076111,grad_norm: 0.7428364017709416, iteration: 303456
loss: 1.0305168628692627,grad_norm: 0.9999998798794317, iteration: 303457
loss: 1.0450189113616943,grad_norm: 0.8042848728846483, iteration: 303458
loss: 1.0325043201446533,grad_norm: 0.9999998365686422, iteration: 303459
loss: 1.043170690536499,grad_norm: 0.7565740066660642, iteration: 303460
loss: 0.993190348148346,grad_norm: 0.8616733856839519, iteration: 303461
loss: 1.0088037252426147,grad_norm: 0.9088711793845918, iteration: 303462
loss: 1.0893845558166504,grad_norm: 0.9999996511178246, iteration: 303463
loss: 1.0246567726135254,grad_norm: 0.9703567156174059, iteration: 303464
loss: 0.9998433589935303,grad_norm: 0.9222546050567275, iteration: 303465
loss: 1.0186586380004883,grad_norm: 0.9999999041581454, iteration: 303466
loss: 1.0103135108947754,grad_norm: 0.7628355169176335, iteration: 303467
loss: 1.1813085079193115,grad_norm: 0.9999994682872244, iteration: 303468
loss: 1.030774712562561,grad_norm: 0.9999990648250282, iteration: 303469
loss: 1.0110347270965576,grad_norm: 0.8542390260345398, iteration: 303470
loss: 0.9976253509521484,grad_norm: 0.9538877185265772, iteration: 303471
loss: 1.0158895254135132,grad_norm: 0.8739595166208955, iteration: 303472
loss: 0.9911960959434509,grad_norm: 0.9999991271166367, iteration: 303473
loss: 0.9993663430213928,grad_norm: 0.8681080139201863, iteration: 303474
loss: 1.002977728843689,grad_norm: 0.8679706370286935, iteration: 303475
loss: 1.0210493803024292,grad_norm: 0.9233894983097246, iteration: 303476
loss: 1.013495922088623,grad_norm: 0.8393080829959975, iteration: 303477
loss: 1.012892246246338,grad_norm: 0.9999989890513837, iteration: 303478
loss: 0.9857392311096191,grad_norm: 0.9775896317076956, iteration: 303479
loss: 1.0389348268508911,grad_norm: 0.7570772019021147, iteration: 303480
loss: 0.9801878929138184,grad_norm: 0.8994050003788857, iteration: 303481
loss: 1.015580177307129,grad_norm: 0.9591196059052931, iteration: 303482
loss: 1.0025367736816406,grad_norm: 0.7839142456609391, iteration: 303483
loss: 1.0071674585342407,grad_norm: 0.9138984179805545, iteration: 303484
loss: 0.9981160759925842,grad_norm: 0.731247209927193, iteration: 303485
loss: 1.0196316242218018,grad_norm: 0.9999991178691223, iteration: 303486
loss: 1.033361554145813,grad_norm: 0.8169492688043515, iteration: 303487
loss: 0.9845655560493469,grad_norm: 0.8532654679341843, iteration: 303488
loss: 0.993302047252655,grad_norm: 0.7815375650716534, iteration: 303489
loss: 0.9988977313041687,grad_norm: 0.8658484611982691, iteration: 303490
loss: 0.9932283163070679,grad_norm: 0.8037699520249459, iteration: 303491
loss: 0.9661983847618103,grad_norm: 0.9159408460428787, iteration: 303492
loss: 1.0814381837844849,grad_norm: 0.9999991166335555, iteration: 303493
loss: 1.0721830129623413,grad_norm: 0.9999994837931492, iteration: 303494
loss: 0.9717069268226624,grad_norm: 0.9206518315156352, iteration: 303495
loss: 1.0185720920562744,grad_norm: 0.8313839577241423, iteration: 303496
loss: 1.2080823183059692,grad_norm: 0.9999998210793529, iteration: 303497
loss: 0.998775064945221,grad_norm: 0.8651885276680492, iteration: 303498
loss: 1.043182373046875,grad_norm: 0.9999998943712242, iteration: 303499
loss: 1.0047868490219116,grad_norm: 0.943857960048238, iteration: 303500
loss: 1.0204403400421143,grad_norm: 0.9462321131827948, iteration: 303501
loss: 1.0075808763504028,grad_norm: 0.798582334886242, iteration: 303502
loss: 1.0124002695083618,grad_norm: 0.9999989801717829, iteration: 303503
loss: 1.0137592554092407,grad_norm: 0.9015856298412966, iteration: 303504
loss: 1.0186948776245117,grad_norm: 0.9813451066770543, iteration: 303505
loss: 1.038943886756897,grad_norm: 0.7231339290228297, iteration: 303506
loss: 1.0168850421905518,grad_norm: 0.9774823288376753, iteration: 303507
loss: 1.0297095775604248,grad_norm: 0.9201169771770684, iteration: 303508
loss: 1.0054856538772583,grad_norm: 0.839824384742268, iteration: 303509
loss: 0.970510721206665,grad_norm: 0.8906169829967295, iteration: 303510
loss: 0.9660758376121521,grad_norm: 0.9203619605971014, iteration: 303511
loss: 1.0052467584609985,grad_norm: 0.9635269326742215, iteration: 303512
loss: 1.0252854824066162,grad_norm: 0.9999991125399782, iteration: 303513
loss: 1.0424082279205322,grad_norm: 0.9999997343093968, iteration: 303514
loss: 0.992186963558197,grad_norm: 0.9043167983151513, iteration: 303515
loss: 0.9990522265434265,grad_norm: 0.9334060537675861, iteration: 303516
loss: 0.9973738193511963,grad_norm: 0.8637190625233391, iteration: 303517
loss: 1.0095614194869995,grad_norm: 0.8398065023710569, iteration: 303518
loss: 0.9812235236167908,grad_norm: 0.8950814859801773, iteration: 303519
loss: 0.9807450175285339,grad_norm: 0.999999034463806, iteration: 303520
loss: 1.028013825416565,grad_norm: 0.9999994946137979, iteration: 303521
loss: 1.0049784183502197,grad_norm: 0.7874161875629799, iteration: 303522
loss: 0.9898834228515625,grad_norm: 0.9216803869252853, iteration: 303523
loss: 0.9714745879173279,grad_norm: 0.9103723456068914, iteration: 303524
loss: 1.024247646331787,grad_norm: 0.999999301949338, iteration: 303525
loss: 1.0172830820083618,grad_norm: 0.9999989534343133, iteration: 303526
loss: 1.0395113229751587,grad_norm: 0.9999994977764508, iteration: 303527
loss: 0.9872309565544128,grad_norm: 0.9139908252674237, iteration: 303528
loss: 1.0294075012207031,grad_norm: 0.9999996873472515, iteration: 303529
loss: 0.9894827604293823,grad_norm: 0.8354810046172976, iteration: 303530
loss: 1.0240474939346313,grad_norm: 0.999999565754228, iteration: 303531
loss: 0.9739910364151001,grad_norm: 0.7686437971745324, iteration: 303532
loss: 0.9819095730781555,grad_norm: 0.9084336361895167, iteration: 303533
loss: 1.0024480819702148,grad_norm: 0.6331783377131526, iteration: 303534
loss: 1.006282091140747,grad_norm: 0.9835880359597282, iteration: 303535
loss: 1.011549711227417,grad_norm: 0.9999992794168499, iteration: 303536
loss: 0.9803195595741272,grad_norm: 0.8446221316456645, iteration: 303537
loss: 1.0332146883010864,grad_norm: 0.9999991652133604, iteration: 303538
loss: 1.0489096641540527,grad_norm: 0.8728645433936199, iteration: 303539
loss: 0.9974029660224915,grad_norm: 0.7991932577689164, iteration: 303540
loss: 0.9997087121009827,grad_norm: 0.7440232160953298, iteration: 303541
loss: 0.9965483546257019,grad_norm: 0.8375769069448308, iteration: 303542
loss: 1.0515220165252686,grad_norm: 0.9999994451225044, iteration: 303543
loss: 0.9954440593719482,grad_norm: 0.8640951591721409, iteration: 303544
loss: 1.0036442279815674,grad_norm: 0.8139365643182545, iteration: 303545
loss: 1.0461865663528442,grad_norm: 0.9999995017209615, iteration: 303546
loss: 1.0014508962631226,grad_norm: 0.8083688743612609, iteration: 303547
loss: 0.980463445186615,grad_norm: 0.9999995532261636, iteration: 303548
loss: 1.0691990852355957,grad_norm: 0.9999999261643044, iteration: 303549
loss: 1.0145937204360962,grad_norm: 0.9999998214049994, iteration: 303550
loss: 0.9724014401435852,grad_norm: 0.7505506066768102, iteration: 303551
loss: 1.0302486419677734,grad_norm: 0.8123120193004322, iteration: 303552
loss: 0.9914329051971436,grad_norm: 0.8470388444253404, iteration: 303553
loss: 0.9697892069816589,grad_norm: 0.924378807642634, iteration: 303554
loss: 0.9845395684242249,grad_norm: 0.7921606225251004, iteration: 303555
loss: 1.0435452461242676,grad_norm: 0.8836759823229743, iteration: 303556
loss: 1.0514631271362305,grad_norm: 0.9999991589494337, iteration: 303557
loss: 1.0157214403152466,grad_norm: 0.8225257997017444, iteration: 303558
loss: 1.038304328918457,grad_norm: 0.9999992097289129, iteration: 303559
loss: 0.979633092880249,grad_norm: 0.7706944047402106, iteration: 303560
loss: 0.9961593151092529,grad_norm: 0.9999992095953172, iteration: 303561
loss: 0.9743351340293884,grad_norm: 0.9446928100039702, iteration: 303562
loss: 0.9896964430809021,grad_norm: 0.8120870355709743, iteration: 303563
loss: 0.9781532883644104,grad_norm: 0.7701012885858565, iteration: 303564
loss: 0.9624610543251038,grad_norm: 0.6897298383331122, iteration: 303565
loss: 0.9875555634498596,grad_norm: 0.7516511941537818, iteration: 303566
loss: 1.0042258501052856,grad_norm: 0.9233693781585667, iteration: 303567
loss: 1.0113106966018677,grad_norm: 0.878494276246095, iteration: 303568
loss: 0.9766294360160828,grad_norm: 0.8321434109434174, iteration: 303569
loss: 1.008615493774414,grad_norm: 0.7410006784592117, iteration: 303570
loss: 0.9703627228736877,grad_norm: 0.9132893145210903, iteration: 303571
loss: 1.0041342973709106,grad_norm: 0.8238228223383407, iteration: 303572
loss: 1.0246014595031738,grad_norm: 0.9999992314089066, iteration: 303573
loss: 1.0520098209381104,grad_norm: 0.9999990535507127, iteration: 303574
loss: 0.9889227747917175,grad_norm: 0.8978174592804229, iteration: 303575
loss: 1.0227395296096802,grad_norm: 0.9999995118346529, iteration: 303576
loss: 0.9752991199493408,grad_norm: 0.9069688032729663, iteration: 303577
loss: 0.9652708172798157,grad_norm: 0.999999011481629, iteration: 303578
loss: 0.9685444831848145,grad_norm: 0.9999990771619368, iteration: 303579
loss: 1.0133237838745117,grad_norm: 0.9999994745629878, iteration: 303580
loss: 1.0150322914123535,grad_norm: 0.9999989962820066, iteration: 303581
loss: 0.9801110029220581,grad_norm: 0.9367256092537319, iteration: 303582
loss: 0.9927421808242798,grad_norm: 0.7795253016779262, iteration: 303583
loss: 1.0231826305389404,grad_norm: 0.7171540183687384, iteration: 303584
loss: 1.0272570848464966,grad_norm: 0.670299218225531, iteration: 303585
loss: 1.0158333778381348,grad_norm: 0.9849639039537283, iteration: 303586
loss: 1.009926676750183,grad_norm: 0.7931766648243894, iteration: 303587
loss: 1.0075806379318237,grad_norm: 0.9999996235545153, iteration: 303588
loss: 0.9757207632064819,grad_norm: 0.8896327697138018, iteration: 303589
loss: 1.0338687896728516,grad_norm: 0.8354760821709561, iteration: 303590
loss: 1.0189752578735352,grad_norm: 0.9999990005792152, iteration: 303591
loss: 0.9966745972633362,grad_norm: 0.8718384707610678, iteration: 303592
loss: 0.9942052960395813,grad_norm: 0.8279694237612062, iteration: 303593
loss: 1.1106815338134766,grad_norm: 0.9999993755803195, iteration: 303594
loss: 1.0370677709579468,grad_norm: 0.9999999750036601, iteration: 303595
loss: 0.9806205630302429,grad_norm: 0.7170999099653157, iteration: 303596
loss: 1.0559558868408203,grad_norm: 0.999999817228207, iteration: 303597
loss: 1.0379706621170044,grad_norm: 0.9999990485747395, iteration: 303598
loss: 1.0028793811798096,grad_norm: 0.722228622336892, iteration: 303599
loss: 1.0092921257019043,grad_norm: 0.751465074767891, iteration: 303600
loss: 0.9801955819129944,grad_norm: 0.8282777091224887, iteration: 303601
loss: 1.0021686553955078,grad_norm: 0.9999993184668218, iteration: 303602
loss: 1.0026758909225464,grad_norm: 0.9986194841662853, iteration: 303603
loss: 0.9637237787246704,grad_norm: 0.7633615297266103, iteration: 303604
loss: 0.9878086447715759,grad_norm: 0.9998694861385506, iteration: 303605
loss: 0.9998464584350586,grad_norm: 0.7780167359388824, iteration: 303606
loss: 0.9987995624542236,grad_norm: 0.8418512247319792, iteration: 303607
loss: 1.0180574655532837,grad_norm: 0.9999995054794295, iteration: 303608
loss: 0.967448353767395,grad_norm: 0.7806949492041789, iteration: 303609
loss: 1.044762134552002,grad_norm: 0.8800473762275596, iteration: 303610
loss: 1.0771982669830322,grad_norm: 0.9999990657093801, iteration: 303611
loss: 1.028937578201294,grad_norm: 0.7609975736264429, iteration: 303612
loss: 1.0524234771728516,grad_norm: 0.9999998086926579, iteration: 303613
loss: 1.0120227336883545,grad_norm: 0.8322389501642283, iteration: 303614
loss: 0.980265200138092,grad_norm: 0.9764368122949104, iteration: 303615
loss: 0.9909504652023315,grad_norm: 0.9876592604625541, iteration: 303616
loss: 1.0172532796859741,grad_norm: 0.9388276674457953, iteration: 303617
loss: 1.0253773927688599,grad_norm: 0.9999991467206979, iteration: 303618
loss: 1.07011878490448,grad_norm: 0.9999998903528023, iteration: 303619
loss: 0.9934541583061218,grad_norm: 0.9398390157368697, iteration: 303620
loss: 1.066200852394104,grad_norm: 0.9999990809506373, iteration: 303621
loss: 0.9683858752250671,grad_norm: 0.7905138202308554, iteration: 303622
loss: 1.0776152610778809,grad_norm: 0.9999991406229449, iteration: 303623
loss: 1.1282966136932373,grad_norm: 0.9999990581917302, iteration: 303624
loss: 1.0043294429779053,grad_norm: 0.8906177101306874, iteration: 303625
loss: 1.0387530326843262,grad_norm: 0.9999991560640443, iteration: 303626
loss: 0.9689790606498718,grad_norm: 0.8585938145258027, iteration: 303627
loss: 1.0280462503433228,grad_norm: 0.7939719878204612, iteration: 303628
loss: 1.0199111700057983,grad_norm: 0.9133080255323744, iteration: 303629
loss: 1.0017328262329102,grad_norm: 0.9999995102006725, iteration: 303630
loss: 1.0112876892089844,grad_norm: 0.9999995836208508, iteration: 303631
loss: 0.9839462041854858,grad_norm: 0.7610138674744262, iteration: 303632
loss: 1.0129550695419312,grad_norm: 0.9239896201819501, iteration: 303633
loss: 0.9997788071632385,grad_norm: 0.8164616849437868, iteration: 303634
loss: 0.9897580742835999,grad_norm: 0.9999991923616806, iteration: 303635
loss: 0.9861334562301636,grad_norm: 0.8511731588902199, iteration: 303636
loss: 1.023032546043396,grad_norm: 0.7114739588249104, iteration: 303637
loss: 1.000723958015442,grad_norm: 0.9620476879901642, iteration: 303638
loss: 0.9903771281242371,grad_norm: 0.7588205041462697, iteration: 303639
loss: 0.9795655012130737,grad_norm: 0.9225035461282937, iteration: 303640
loss: 1.0609352588653564,grad_norm: 0.8256835856951654, iteration: 303641
loss: 0.9827365875244141,grad_norm: 0.8276133237772767, iteration: 303642
loss: 0.9916020631790161,grad_norm: 0.7457285937615574, iteration: 303643
loss: 0.9569827318191528,grad_norm: 0.8973080222639316, iteration: 303644
loss: 0.9892834424972534,grad_norm: 0.8154686751571897, iteration: 303645
loss: 0.9907974004745483,grad_norm: 0.9670746987531502, iteration: 303646
loss: 0.975008487701416,grad_norm: 0.8581781395442521, iteration: 303647
loss: 0.9771494269371033,grad_norm: 0.8332992834538895, iteration: 303648
loss: 0.9832972884178162,grad_norm: 0.7996948562439359, iteration: 303649
loss: 0.9864739775657654,grad_norm: 0.752499920372219, iteration: 303650
loss: 1.0060032606124878,grad_norm: 0.7807864431596878, iteration: 303651
loss: 0.9712364077568054,grad_norm: 0.9999992702977523, iteration: 303652
loss: 1.0084148645401,grad_norm: 0.8260246817611455, iteration: 303653
loss: 1.0031485557556152,grad_norm: 0.9566594312834122, iteration: 303654
loss: 1.012658715248108,grad_norm: 0.8702376741395255, iteration: 303655
loss: 0.9894382953643799,grad_norm: 0.9589756268058942, iteration: 303656
loss: 0.9954859018325806,grad_norm: 0.7031678283779084, iteration: 303657
loss: 0.9742085337638855,grad_norm: 0.9544270186068988, iteration: 303658
loss: 1.0466978549957275,grad_norm: 0.9999992132824687, iteration: 303659
loss: 1.0493139028549194,grad_norm: 0.7324943343264988, iteration: 303660
loss: 1.0569740533828735,grad_norm: 0.924834335763811, iteration: 303661
loss: 0.9874424338340759,grad_norm: 0.8140051501612448, iteration: 303662
loss: 1.047606110572815,grad_norm: 0.9236174256866074, iteration: 303663
loss: 0.993558943271637,grad_norm: 0.7824226477354892, iteration: 303664
loss: 0.9639716744422913,grad_norm: 0.7595444918078942, iteration: 303665
loss: 0.9810464382171631,grad_norm: 0.8356637262424358, iteration: 303666
loss: 1.0121424198150635,grad_norm: 0.9373166458414159, iteration: 303667
loss: 0.9551472067832947,grad_norm: 0.9999991146083391, iteration: 303668
loss: 0.9639899730682373,grad_norm: 0.9999991056958731, iteration: 303669
loss: 1.0004106760025024,grad_norm: 0.798110204084914, iteration: 303670
loss: 1.026158094406128,grad_norm: 0.9999989712692586, iteration: 303671
loss: 1.0078381299972534,grad_norm: 0.7284300861584279, iteration: 303672
loss: 0.9975308775901794,grad_norm: 0.9999993408771651, iteration: 303673
loss: 0.9964885115623474,grad_norm: 0.9503433321913741, iteration: 303674
loss: 1.0462342500686646,grad_norm: 0.7638772105207469, iteration: 303675
loss: 1.0041186809539795,grad_norm: 0.9999992574739894, iteration: 303676
loss: 1.0049067735671997,grad_norm: 0.9999998141405237, iteration: 303677
loss: 1.1115567684173584,grad_norm: 0.8779636339949854, iteration: 303678
loss: 0.9962230920791626,grad_norm: 0.778883796741813, iteration: 303679
loss: 1.0160675048828125,grad_norm: 0.8482722421853953, iteration: 303680
loss: 1.0192644596099854,grad_norm: 0.9999991244450589, iteration: 303681
loss: 1.0002907514572144,grad_norm: 0.8308637054229988, iteration: 303682
loss: 1.0122405290603638,grad_norm: 0.9999991552787045, iteration: 303683
loss: 0.9752070307731628,grad_norm: 0.8321106541305004, iteration: 303684
loss: 1.0383214950561523,grad_norm: 0.7399015278073426, iteration: 303685
loss: 1.0311516523361206,grad_norm: 0.9999995108482999, iteration: 303686
loss: 1.0286996364593506,grad_norm: 0.9468141064991615, iteration: 303687
loss: 0.9813951849937439,grad_norm: 0.999998948738896, iteration: 303688
loss: 1.0178388357162476,grad_norm: 0.744556667217938, iteration: 303689
loss: 1.0893275737762451,grad_norm: 0.9999993338909486, iteration: 303690
loss: 1.014588713645935,grad_norm: 0.9495674110621432, iteration: 303691
loss: 1.0300543308258057,grad_norm: 0.8753162415658688, iteration: 303692
loss: 1.0040383338928223,grad_norm: 0.9072811218852741, iteration: 303693
loss: 0.9664351940155029,grad_norm: 0.9145800341658705, iteration: 303694
loss: 0.9928265810012817,grad_norm: 0.8103130028697801, iteration: 303695
loss: 0.9959368109703064,grad_norm: 0.9943659304921194, iteration: 303696
loss: 0.9571369886398315,grad_norm: 0.8149926521860308, iteration: 303697
loss: 0.9716768264770508,grad_norm: 0.9952731764005117, iteration: 303698
loss: 0.997801661491394,grad_norm: 0.8849482535517879, iteration: 303699
loss: 0.9966830611228943,grad_norm: 0.8480546862174854, iteration: 303700
loss: 0.9894015192985535,grad_norm: 0.9509007150581336, iteration: 303701
loss: 1.046978235244751,grad_norm: 0.9999991705557105, iteration: 303702
loss: 0.9345889091491699,grad_norm: 0.7454639805495591, iteration: 303703
loss: 1.0401421785354614,grad_norm: 0.9999991290057435, iteration: 303704
loss: 0.9729294180870056,grad_norm: 0.9670416764546718, iteration: 303705
loss: 0.9867674112319946,grad_norm: 0.8806653474388341, iteration: 303706
loss: 0.9797151684761047,grad_norm: 0.7655839755384347, iteration: 303707
loss: 1.0104548931121826,grad_norm: 0.9202853328808066, iteration: 303708
loss: 1.057416319847107,grad_norm: 1.000000059453156, iteration: 303709
loss: 1.0251432657241821,grad_norm: 0.8765422998914231, iteration: 303710
loss: 1.018571376800537,grad_norm: 0.8370583205494667, iteration: 303711
loss: 1.1667362451553345,grad_norm: 0.9999989006999309, iteration: 303712
loss: 0.989722490310669,grad_norm: 0.9999992807084206, iteration: 303713
loss: 1.0021710395812988,grad_norm: 0.7777601900762181, iteration: 303714
loss: 1.0011212825775146,grad_norm: 0.9489848882059899, iteration: 303715
loss: 1.1136904954910278,grad_norm: 0.9999991371129363, iteration: 303716
loss: 0.9666937589645386,grad_norm: 0.751648249862397, iteration: 303717
loss: 1.0421617031097412,grad_norm: 0.9999998333674159, iteration: 303718
loss: 1.0256813764572144,grad_norm: 0.9999993693623417, iteration: 303719
loss: 0.9952588677406311,grad_norm: 0.9153465923483771, iteration: 303720
loss: 0.9836044907569885,grad_norm: 0.7872438419641498, iteration: 303721
loss: 0.9969033002853394,grad_norm: 0.74021704734528, iteration: 303722
loss: 1.015242338180542,grad_norm: 0.9999994419631382, iteration: 303723
loss: 1.0292342901229858,grad_norm: 0.9999995403759676, iteration: 303724
loss: 0.965409517288208,grad_norm: 0.8248573134487931, iteration: 303725
loss: 0.9977059364318848,grad_norm: 0.8935481905780757, iteration: 303726
loss: 1.0923268795013428,grad_norm: 0.9999998039625709, iteration: 303727
loss: 1.1807661056518555,grad_norm: 0.9999993399542573, iteration: 303728
loss: 1.0224767923355103,grad_norm: 0.7562090068689149, iteration: 303729
loss: 1.0803732872009277,grad_norm: 0.7130881841332186, iteration: 303730
loss: 1.009871244430542,grad_norm: 0.9999990762736966, iteration: 303731
loss: 0.9527917504310608,grad_norm: 0.8699160151662293, iteration: 303732
loss: 0.9698886871337891,grad_norm: 0.9999991424127044, iteration: 303733
loss: 0.9958512187004089,grad_norm: 0.8687843475056432, iteration: 303734
loss: 1.0083404779434204,grad_norm: 0.8295199866556714, iteration: 303735
loss: 1.0064553022384644,grad_norm: 0.8617363600939221, iteration: 303736
loss: 1.1795082092285156,grad_norm: 0.9999991709908466, iteration: 303737
loss: 1.0222963094711304,grad_norm: 0.9876357647083717, iteration: 303738
loss: 1.1096432209014893,grad_norm: 0.9999991224673999, iteration: 303739
loss: 1.0218652486801147,grad_norm: 0.999999738847803, iteration: 303740
loss: 1.0498172044754028,grad_norm: 0.9305660231442128, iteration: 303741
loss: 0.9601408839225769,grad_norm: 0.8309821856003854, iteration: 303742
loss: 0.9441766142845154,grad_norm: 0.7074832178175676, iteration: 303743
loss: 0.990954577922821,grad_norm: 0.9410883630372601, iteration: 303744
loss: 0.9894659519195557,grad_norm: 0.9999998335449788, iteration: 303745
loss: 0.9873218536376953,grad_norm: 0.7841588003431906, iteration: 303746
loss: 1.016563892364502,grad_norm: 0.7915555727804734, iteration: 303747
loss: 1.065453290939331,grad_norm: 0.9095476814730805, iteration: 303748
loss: 1.072493076324463,grad_norm: 0.7984490581538067, iteration: 303749
loss: 1.0080815553665161,grad_norm: 0.859711549462976, iteration: 303750
loss: 1.1275392770767212,grad_norm: 0.9999999160643875, iteration: 303751
loss: 1.0066516399383545,grad_norm: 0.8684737737167448, iteration: 303752
loss: 1.0828404426574707,grad_norm: 0.9050216160071126, iteration: 303753
loss: 0.9556949138641357,grad_norm: 0.8161987719119593, iteration: 303754
loss: 0.9677305817604065,grad_norm: 0.8120641397593212, iteration: 303755
loss: 1.017006754875183,grad_norm: 0.9442390012418799, iteration: 303756
loss: 0.990062952041626,grad_norm: 0.7566042646696453, iteration: 303757
loss: 1.0009809732437134,grad_norm: 0.9999991407067935, iteration: 303758
loss: 0.9899404048919678,grad_norm: 0.7642556803288758, iteration: 303759
loss: 1.0747276544570923,grad_norm: 0.9999995390381656, iteration: 303760
loss: 0.9950414896011353,grad_norm: 0.7594456800124652, iteration: 303761
loss: 1.0590485334396362,grad_norm: 0.7242702407494563, iteration: 303762
loss: 0.9859376549720764,grad_norm: 0.8171550707128946, iteration: 303763
loss: 1.0793578624725342,grad_norm: 0.9999999725891006, iteration: 303764
loss: 0.9951115846633911,grad_norm: 0.7843324311020046, iteration: 303765
loss: 0.9744783639907837,grad_norm: 0.8749193824609209, iteration: 303766
loss: 1.053491234779358,grad_norm: 0.9999997560507262, iteration: 303767
loss: 1.0228798389434814,grad_norm: 0.7824768457380742, iteration: 303768
loss: 0.9824468493461609,grad_norm: 0.7643574102566948, iteration: 303769
loss: 1.033516764640808,grad_norm: 0.8096846491800255, iteration: 303770
loss: 1.1948572397232056,grad_norm: 0.9999990852669965, iteration: 303771
loss: 1.0431122779846191,grad_norm: 0.9999992618755972, iteration: 303772
loss: 1.0517146587371826,grad_norm: 0.8777449240132897, iteration: 303773
loss: 0.9940431714057922,grad_norm: 0.7880121538463559, iteration: 303774
loss: 1.017176866531372,grad_norm: 0.9999998086904475, iteration: 303775
loss: 1.0211963653564453,grad_norm: 0.9383122710044236, iteration: 303776
loss: 0.9975409507751465,grad_norm: 0.8356452322261462, iteration: 303777
loss: 1.1046842336654663,grad_norm: 0.9999993539438694, iteration: 303778
loss: 1.0050978660583496,grad_norm: 0.7818331813630425, iteration: 303779
loss: 0.9767622351646423,grad_norm: 0.7861992113578855, iteration: 303780
loss: 1.1190755367279053,grad_norm: 0.9448777859900019, iteration: 303781
loss: 1.028951644897461,grad_norm: 0.9999991624544338, iteration: 303782
loss: 1.0709967613220215,grad_norm: 0.9999995469525219, iteration: 303783
loss: 0.9792373776435852,grad_norm: 0.8319644382007431, iteration: 303784
loss: 1.1477668285369873,grad_norm: 0.9999990823248953, iteration: 303785
loss: 1.0240318775177002,grad_norm: 0.9999999612997156, iteration: 303786
loss: 1.0664862394332886,grad_norm: 0.9999990950007918, iteration: 303787
loss: 1.170859694480896,grad_norm: 0.9573442162409215, iteration: 303788
loss: 1.0360175371170044,grad_norm: 0.9999991122430336, iteration: 303789
loss: 0.9773438572883606,grad_norm: 0.9715703304753296, iteration: 303790
loss: 1.0173193216323853,grad_norm: 0.9999991508112136, iteration: 303791
loss: 1.1195125579833984,grad_norm: 0.9999994846706143, iteration: 303792
loss: 0.9956899285316467,grad_norm: 0.9999990121621113, iteration: 303793
loss: 1.0959724187850952,grad_norm: 0.9999992806638779, iteration: 303794
loss: 0.9821510910987854,grad_norm: 0.9639473865076195, iteration: 303795
loss: 1.068552851676941,grad_norm: 0.9999990722212413, iteration: 303796
loss: 1.0553585290908813,grad_norm: 0.8276724759029255, iteration: 303797
loss: 0.9778421521186829,grad_norm: 0.9237608935410514, iteration: 303798
loss: 1.0080540180206299,grad_norm: 0.9999991973866089, iteration: 303799
loss: 1.0256997346878052,grad_norm: 0.8524078553830683, iteration: 303800
loss: 1.1355512142181396,grad_norm: 0.9999995958583817, iteration: 303801
loss: 1.2809957265853882,grad_norm: 0.999999980379136, iteration: 303802
loss: 0.998734712600708,grad_norm: 0.9253108395022592, iteration: 303803
loss: 1.0580155849456787,grad_norm: 0.7908695657164517, iteration: 303804
loss: 1.1050190925598145,grad_norm: 0.9999999584648347, iteration: 303805
loss: 1.1572315692901611,grad_norm: 0.9999996720354565, iteration: 303806
loss: 1.062306523323059,grad_norm: 0.9907829605336792, iteration: 303807
loss: 1.0290582180023193,grad_norm: 0.9999991092265266, iteration: 303808
loss: 1.090082049369812,grad_norm: 0.999999320811778, iteration: 303809
loss: 1.021409511566162,grad_norm: 0.8028620381041103, iteration: 303810
loss: 1.0311546325683594,grad_norm: 0.9218835411919564, iteration: 303811
loss: 0.9733570218086243,grad_norm: 0.9379030584561563, iteration: 303812
loss: 1.0545094013214111,grad_norm: 0.9999997628250695, iteration: 303813
loss: 1.041275143623352,grad_norm: 0.7449476048045651, iteration: 303814
loss: 1.1028274297714233,grad_norm: 0.999999468569628, iteration: 303815
loss: 1.0519903898239136,grad_norm: 0.9557277693887257, iteration: 303816
loss: 1.0073305368423462,grad_norm: 0.8827920632800462, iteration: 303817
loss: 1.0241138935089111,grad_norm: 0.7445240351274793, iteration: 303818
loss: 0.9882984161376953,grad_norm: 0.80857081745866, iteration: 303819
loss: 0.9784610867500305,grad_norm: 0.8485377220594398, iteration: 303820
loss: 0.9953873157501221,grad_norm: 0.733542238648828, iteration: 303821
loss: 0.9661971926689148,grad_norm: 0.8750402673372464, iteration: 303822
loss: 1.0496994256973267,grad_norm: 0.9001618696791122, iteration: 303823
loss: 1.040803074836731,grad_norm: 0.9454088628395703, iteration: 303824
loss: 1.028202772140503,grad_norm: 0.7283728199409305, iteration: 303825
loss: 1.0105719566345215,grad_norm: 0.8945539397583334, iteration: 303826
loss: 1.024110198020935,grad_norm: 0.9999995209133473, iteration: 303827
loss: 0.9840006232261658,grad_norm: 0.9571489954115658, iteration: 303828
loss: 1.0032479763031006,grad_norm: 0.7984643375910256, iteration: 303829
loss: 1.0217612981796265,grad_norm: 0.9999992769751831, iteration: 303830
loss: 1.0341625213623047,grad_norm: 0.8537638485980787, iteration: 303831
loss: 1.0111557245254517,grad_norm: 0.808479189747735, iteration: 303832
loss: 1.0406978130340576,grad_norm: 0.897777374017201, iteration: 303833
loss: 1.1124597787857056,grad_norm: 0.9999990860690341, iteration: 303834
loss: 1.0562865734100342,grad_norm: 0.9999991915176291, iteration: 303835
loss: 1.0257654190063477,grad_norm: 0.9999993237774298, iteration: 303836
loss: 0.9934535026550293,grad_norm: 0.8816360165517109, iteration: 303837
loss: 1.149276852607727,grad_norm: 0.9999994026027783, iteration: 303838
loss: 1.034137487411499,grad_norm: 0.9999990750534109, iteration: 303839
loss: 1.0142261981964111,grad_norm: 0.8988872233186842, iteration: 303840
loss: 1.0771763324737549,grad_norm: 0.9884114982664897, iteration: 303841
loss: 1.016295313835144,grad_norm: 0.9999998715535535, iteration: 303842
loss: 0.9769379496574402,grad_norm: 0.9196136291036262, iteration: 303843
loss: 0.9642069339752197,grad_norm: 0.8854674211522203, iteration: 303844
loss: 0.9952124357223511,grad_norm: 0.7970377672345643, iteration: 303845
loss: 0.9950159788131714,grad_norm: 0.8246286330665885, iteration: 303846
loss: 1.049062728881836,grad_norm: 0.916994002208371, iteration: 303847
loss: 0.9911813735961914,grad_norm: 0.9104837995671986, iteration: 303848
loss: 1.016455054283142,grad_norm: 0.9408753265189488, iteration: 303849
loss: 0.9801375269889832,grad_norm: 0.7165776395406139, iteration: 303850
loss: 1.0184402465820312,grad_norm: 0.7761163060074707, iteration: 303851
loss: 1.0250011682510376,grad_norm: 0.9402723061997823, iteration: 303852
loss: 1.0730311870574951,grad_norm: 0.8165799337182271, iteration: 303853
loss: 0.9865895509719849,grad_norm: 0.8312680122643533, iteration: 303854
loss: 1.0147172212600708,grad_norm: 0.7587460300418262, iteration: 303855
loss: 0.9893175363540649,grad_norm: 0.8042754050629142, iteration: 303856
loss: 1.0448431968688965,grad_norm: 0.9999993843906732, iteration: 303857
loss: 1.0133699178695679,grad_norm: 0.9135973528088598, iteration: 303858
loss: 0.9834024906158447,grad_norm: 0.9999990604153012, iteration: 303859
loss: 1.0199878215789795,grad_norm: 0.9247865446982326, iteration: 303860
loss: 1.0615293979644775,grad_norm: 0.9999992123886756, iteration: 303861
loss: 1.011143684387207,grad_norm: 0.8410447827007891, iteration: 303862
loss: 0.9711363315582275,grad_norm: 0.8314586285644856, iteration: 303863
loss: 0.9898414015769958,grad_norm: 0.8584307567479106, iteration: 303864
loss: 0.9975332617759705,grad_norm: 0.8837138426611314, iteration: 303865
loss: 1.1019104719161987,grad_norm: 0.9999991684006317, iteration: 303866
loss: 1.082689881324768,grad_norm: 0.999999498245783, iteration: 303867
loss: 1.0280133485794067,grad_norm: 0.8107105501784749, iteration: 303868
loss: 1.022986650466919,grad_norm: 0.8938734371428524, iteration: 303869
loss: 0.9776576161384583,grad_norm: 0.6883416621300276, iteration: 303870
loss: 1.0130281448364258,grad_norm: 0.8896179725617472, iteration: 303871
loss: 1.0057507753372192,grad_norm: 0.6795971732716055, iteration: 303872
loss: 0.9985011219978333,grad_norm: 0.9362825744195452, iteration: 303873
loss: 1.0083953142166138,grad_norm: 0.8185679486263079, iteration: 303874
loss: 0.9754364490509033,grad_norm: 0.8484062229703156, iteration: 303875
loss: 0.9775822162628174,grad_norm: 0.7756155129345012, iteration: 303876
loss: 0.982494592666626,grad_norm: 0.994612447214553, iteration: 303877
loss: 0.988078773021698,grad_norm: 0.9999994374626054, iteration: 303878
loss: 1.0280799865722656,grad_norm: 0.8050713381120168, iteration: 303879
loss: 1.0564658641815186,grad_norm: 0.9999995912827183, iteration: 303880
loss: 1.0070406198501587,grad_norm: 0.7174167589595636, iteration: 303881
loss: 1.085658073425293,grad_norm: 0.9999997062209236, iteration: 303882
loss: 1.0345995426177979,grad_norm: 0.9999989882938205, iteration: 303883
loss: 1.0154780149459839,grad_norm: 0.9999994735032733, iteration: 303884
loss: 1.022073745727539,grad_norm: 0.9999994548522146, iteration: 303885
loss: 0.9808446764945984,grad_norm: 0.9519777178981794, iteration: 303886
loss: 0.9826093316078186,grad_norm: 1.0000000064258552, iteration: 303887
loss: 0.9937254190444946,grad_norm: 0.7563779203963491, iteration: 303888
loss: 1.0284587144851685,grad_norm: 0.9999993143446374, iteration: 303889
loss: 1.0357179641723633,grad_norm: 0.9999997541238621, iteration: 303890
loss: 0.9996442794799805,grad_norm: 0.7922519295262197, iteration: 303891
loss: 1.0038237571716309,grad_norm: 0.9042931371986388, iteration: 303892
loss: 1.1737549304962158,grad_norm: 0.9999998418336596, iteration: 303893
loss: 1.0914990901947021,grad_norm: 1.0000000407591414, iteration: 303894
loss: 1.000768780708313,grad_norm: 0.7778052849748676, iteration: 303895
loss: 1.1038753986358643,grad_norm: 0.9999996457914662, iteration: 303896
loss: 0.9878836870193481,grad_norm: 0.9179524206558304, iteration: 303897
loss: 1.0794403553009033,grad_norm: 1.0000000127795572, iteration: 303898
loss: 0.9919080138206482,grad_norm: 0.9999990227228163, iteration: 303899
loss: 0.9958747625350952,grad_norm: 0.8829807307207861, iteration: 303900
loss: 1.00404691696167,grad_norm: 0.8674938521849463, iteration: 303901
loss: 0.969622790813446,grad_norm: 0.9882712916788073, iteration: 303902
loss: 1.063004493713379,grad_norm: 0.9999993837756908, iteration: 303903
loss: 1.1414464712142944,grad_norm: 0.9999990117525489, iteration: 303904
loss: 1.0149344205856323,grad_norm: 0.8105730140444241, iteration: 303905
loss: 1.2288920879364014,grad_norm: 0.9999996674800233, iteration: 303906
loss: 1.1309545040130615,grad_norm: 0.9999998663003559, iteration: 303907
loss: 1.089141845703125,grad_norm: 0.9999999830716044, iteration: 303908
loss: 0.9998113512992859,grad_norm: 0.9999991897073764, iteration: 303909
loss: 1.010195255279541,grad_norm: 0.9999995388822608, iteration: 303910
loss: 1.0402759313583374,grad_norm: 0.7426414249572595, iteration: 303911
loss: 1.2370896339416504,grad_norm: 0.9999993902316867, iteration: 303912
loss: 1.0900148153305054,grad_norm: 0.9584485932793, iteration: 303913
loss: 1.0137226581573486,grad_norm: 0.7107900536087897, iteration: 303914
loss: 1.0331171751022339,grad_norm: 0.9999997196147191, iteration: 303915
loss: 1.004905343055725,grad_norm: 0.8270723823876766, iteration: 303916
loss: 1.0135796070098877,grad_norm: 0.9297202901913513, iteration: 303917
loss: 1.0206774473190308,grad_norm: 0.99999917386458, iteration: 303918
loss: 1.0705448389053345,grad_norm: 1.0000001001420709, iteration: 303919
loss: 0.9993541836738586,grad_norm: 0.8075456134689027, iteration: 303920
loss: 1.0068485736846924,grad_norm: 0.8512229842014316, iteration: 303921
loss: 0.9543716311454773,grad_norm: 0.8424863840523273, iteration: 303922
loss: 1.077593207359314,grad_norm: 0.9999997821509636, iteration: 303923
loss: 1.1303420066833496,grad_norm: 0.9999998469078419, iteration: 303924
loss: 1.0321094989776611,grad_norm: 0.9835668865695256, iteration: 303925
loss: 1.0042039155960083,grad_norm: 0.9999992328497296, iteration: 303926
loss: 1.0023244619369507,grad_norm: 0.889254322317892, iteration: 303927
loss: 1.0032265186309814,grad_norm: 0.6866168917242513, iteration: 303928
loss: 0.9473041892051697,grad_norm: 0.9561013115068229, iteration: 303929
loss: 1.0497990846633911,grad_norm: 0.9999992648564826, iteration: 303930
loss: 1.0145419836044312,grad_norm: 0.97686571484279, iteration: 303931
loss: 1.0089890956878662,grad_norm: 0.8075366934045209, iteration: 303932
loss: 1.1181974411010742,grad_norm: 0.9999998756002797, iteration: 303933
loss: 1.0455420017242432,grad_norm: 0.9071531496087896, iteration: 303934
loss: 0.9902461767196655,grad_norm: 0.8509017943270551, iteration: 303935
loss: 1.015081763267517,grad_norm: 0.691081215044939, iteration: 303936
loss: 0.9932462573051453,grad_norm: 0.9041846347601934, iteration: 303937
loss: 0.9891742467880249,grad_norm: 0.9999989772439316, iteration: 303938
loss: 1.041669249534607,grad_norm: 0.9999994666504561, iteration: 303939
loss: 1.0188263654708862,grad_norm: 0.8729461259673644, iteration: 303940
loss: 0.9994350671768188,grad_norm: 0.9999993834206018, iteration: 303941
loss: 1.1653549671173096,grad_norm: 0.9999997097566775, iteration: 303942
loss: 1.037140965461731,grad_norm: 0.834859102575625, iteration: 303943
loss: 0.9553883671760559,grad_norm: 0.8020673138361908, iteration: 303944
loss: 1.0372071266174316,grad_norm: 0.9999991322015661, iteration: 303945
loss: 1.0318397283554077,grad_norm: 0.8047883931723192, iteration: 303946
loss: 1.0182257890701294,grad_norm: 0.8722774661180985, iteration: 303947
loss: 1.023553490638733,grad_norm: 0.9999993619731572, iteration: 303948
loss: 0.9268303513526917,grad_norm: 0.9356609732150246, iteration: 303949
loss: 0.9852674603462219,grad_norm: 0.9608855246540047, iteration: 303950
loss: 1.0029284954071045,grad_norm: 0.8528607158242255, iteration: 303951
loss: 1.0176210403442383,grad_norm: 0.8558560007094609, iteration: 303952
loss: 0.9696900248527527,grad_norm: 0.8352320095984398, iteration: 303953
loss: 1.0249236822128296,grad_norm: 0.999999204786204, iteration: 303954
loss: 0.9963140487670898,grad_norm: 0.8683425052801288, iteration: 303955
loss: 0.9882552623748779,grad_norm: 0.8737732219001513, iteration: 303956
loss: 0.9720461964607239,grad_norm: 0.9478692849108753, iteration: 303957
loss: 0.9651474952697754,grad_norm: 0.9525020003073302, iteration: 303958
loss: 0.9627190232276917,grad_norm: 0.9188386397160829, iteration: 303959
loss: 0.998244047164917,grad_norm: 0.8063141686272719, iteration: 303960
loss: 1.025696873664856,grad_norm: 0.8427758233638419, iteration: 303961
loss: 1.0016313791275024,grad_norm: 0.874962129983144, iteration: 303962
loss: 0.9893816709518433,grad_norm: 0.9227458658166245, iteration: 303963
loss: 1.0518136024475098,grad_norm: 0.8949594854221503, iteration: 303964
loss: 1.1186695098876953,grad_norm: 0.9999997328175539, iteration: 303965
loss: 1.1012773513793945,grad_norm: 0.9999997423785775, iteration: 303966
loss: 1.0225932598114014,grad_norm: 0.999999134506452, iteration: 303967
loss: 1.0060763359069824,grad_norm: 0.9999991792362475, iteration: 303968
loss: 1.024106502532959,grad_norm: 0.9999995453125708, iteration: 303969
loss: 0.9803130030632019,grad_norm: 0.8816950084100184, iteration: 303970
loss: 0.9828441739082336,grad_norm: 0.9539323263108724, iteration: 303971
loss: 1.108568549156189,grad_norm: 0.9999992406323354, iteration: 303972
loss: 0.9986169338226318,grad_norm: 0.8922087395483429, iteration: 303973
loss: 0.9772129654884338,grad_norm: 0.9425001547308809, iteration: 303974
loss: 1.0396605730056763,grad_norm: 0.85118154412128, iteration: 303975
loss: 1.0515071153640747,grad_norm: 0.8170589100227349, iteration: 303976
loss: 1.0114561319351196,grad_norm: 0.9999991259981543, iteration: 303977
loss: 1.0634628534317017,grad_norm: 0.999999723952833, iteration: 303978
loss: 1.0136677026748657,grad_norm: 0.8852540233610522, iteration: 303979
loss: 0.9523176550865173,grad_norm: 0.9999989329173951, iteration: 303980
loss: 1.0371983051300049,grad_norm: 0.8220760383491593, iteration: 303981
loss: 1.0302311182022095,grad_norm: 0.9999996728000864, iteration: 303982
loss: 1.0591521263122559,grad_norm: 0.9455972940448814, iteration: 303983
loss: 1.0572919845581055,grad_norm: 0.9636169149665283, iteration: 303984
loss: 1.046021819114685,grad_norm: 0.9930364203492007, iteration: 303985
loss: 1.086909294128418,grad_norm: 0.9999998342485052, iteration: 303986
loss: 1.2154314517974854,grad_norm: 0.9999997702505315, iteration: 303987
loss: 1.0079352855682373,grad_norm: 0.8755991699003617, iteration: 303988
loss: 1.0598114728927612,grad_norm: 0.9459292899858749, iteration: 303989
loss: 0.9783627986907959,grad_norm: 0.9731871211202114, iteration: 303990
loss: 1.012283205986023,grad_norm: 0.7608501482755893, iteration: 303991
loss: 1.0279501676559448,grad_norm: 0.8775489452795847, iteration: 303992
loss: 0.9399023056030273,grad_norm: 0.9768463470671983, iteration: 303993
loss: 1.029569149017334,grad_norm: 0.8386246687896992, iteration: 303994
loss: 1.0277187824249268,grad_norm: 0.8979207473015234, iteration: 303995
loss: 0.9599955081939697,grad_norm: 0.8522473628379691, iteration: 303996
loss: 0.995126485824585,grad_norm: 0.6644389652603742, iteration: 303997
loss: 0.994183361530304,grad_norm: 0.7420518699384931, iteration: 303998
loss: 1.0152052640914917,grad_norm: 0.9999992038782742, iteration: 303999
loss: 1.0385714769363403,grad_norm: 0.9999999119356839, iteration: 304000
loss: 0.9906970858573914,grad_norm: 0.8934118364401631, iteration: 304001
loss: 1.0686182975769043,grad_norm: 0.8706912382490167, iteration: 304002
loss: 1.0410043001174927,grad_norm: 0.9772590244625219, iteration: 304003
loss: 1.0019139051437378,grad_norm: 0.7349556404817839, iteration: 304004
loss: 1.0211929082870483,grad_norm: 0.9153428782275009, iteration: 304005
loss: 0.9722625613212585,grad_norm: 0.8998507875066724, iteration: 304006
loss: 1.036347508430481,grad_norm: 0.9063204026826168, iteration: 304007
loss: 1.0383737087249756,grad_norm: 0.9999991097514189, iteration: 304008
loss: 0.9968975782394409,grad_norm: 0.999999491072731, iteration: 304009
loss: 0.9778164625167847,grad_norm: 0.7697575678745466, iteration: 304010
loss: 0.9893094301223755,grad_norm: 0.8279863735008037, iteration: 304011
loss: 1.025739312171936,grad_norm: 0.999999485029616, iteration: 304012
loss: 0.9797570705413818,grad_norm: 0.9999995799241054, iteration: 304013
loss: 1.084854006767273,grad_norm: 0.9999990070808604, iteration: 304014
loss: 1.0138322114944458,grad_norm: 0.9952727614361812, iteration: 304015
loss: 1.0100111961364746,grad_norm: 0.9658095528962649, iteration: 304016
loss: 0.9837203025817871,grad_norm: 0.8504517760169565, iteration: 304017
loss: 1.0038753747940063,grad_norm: 0.8129573906700401, iteration: 304018
loss: 1.0068306922912598,grad_norm: 0.6756386238533635, iteration: 304019
loss: 1.0224977731704712,grad_norm: 0.9386038495010519, iteration: 304020
loss: 1.2076811790466309,grad_norm: 0.9999999329455308, iteration: 304021
loss: 1.0363155603408813,grad_norm: 0.9401759742270016, iteration: 304022
loss: 0.9813107848167419,grad_norm: 0.8740311179504803, iteration: 304023
loss: 0.9941850900650024,grad_norm: 0.8957927149067957, iteration: 304024
loss: 1.024211049079895,grad_norm: 0.9999997648165865, iteration: 304025
loss: 1.0533888339996338,grad_norm: 0.9988581447346506, iteration: 304026
loss: 1.0222630500793457,grad_norm: 0.9999993805526699, iteration: 304027
loss: 1.0118755102157593,grad_norm: 0.8721728299878536, iteration: 304028
loss: 1.0052626132965088,grad_norm: 0.9999990959939605, iteration: 304029
loss: 1.0072685480117798,grad_norm: 0.9999993867371517, iteration: 304030
loss: 1.09257173538208,grad_norm: 0.9999996636742565, iteration: 304031
loss: 1.0074058771133423,grad_norm: 0.6787491090542619, iteration: 304032
loss: 0.9771707653999329,grad_norm: 0.9999991694284514, iteration: 304033
loss: 0.9991893172264099,grad_norm: 0.9999991425137842, iteration: 304034
loss: 0.9767569899559021,grad_norm: 0.8049329485044925, iteration: 304035
loss: 1.0359742641448975,grad_norm: 0.9999992744995535, iteration: 304036
loss: 1.038367509841919,grad_norm: 0.9999992115440793, iteration: 304037
loss: 1.0210236310958862,grad_norm: 0.7690434424548647, iteration: 304038
loss: 1.0419261455535889,grad_norm: 0.9110293046217349, iteration: 304039
loss: 1.0187803506851196,grad_norm: 0.9999998091082788, iteration: 304040
loss: 1.001929759979248,grad_norm: 0.9999990871211577, iteration: 304041
loss: 1.021899700164795,grad_norm: 0.9678150430701037, iteration: 304042
loss: 0.9912117719650269,grad_norm: 0.7986603143171198, iteration: 304043
loss: 0.9896683096885681,grad_norm: 0.5875214943587669, iteration: 304044
loss: 0.9893038868904114,grad_norm: 0.8495033093386908, iteration: 304045
loss: 0.9662521481513977,grad_norm: 0.8126697028587044, iteration: 304046
loss: 1.011610746383667,grad_norm: 0.9445626148699754, iteration: 304047
loss: 1.0443023443222046,grad_norm: 0.9999993028480093, iteration: 304048
loss: 0.9913550615310669,grad_norm: 0.8044653867852037, iteration: 304049
loss: 1.019432544708252,grad_norm: 0.8818855859984753, iteration: 304050
loss: 1.025588035583496,grad_norm: 0.9476338981777801, iteration: 304051
loss: 0.9832528829574585,grad_norm: 0.9999996737802209, iteration: 304052
loss: 0.9716142416000366,grad_norm: 0.9463300028942367, iteration: 304053
loss: 0.9804490804672241,grad_norm: 0.909000603741203, iteration: 304054
loss: 1.0429903268814087,grad_norm: 0.8407975276416062, iteration: 304055
loss: 1.0336592197418213,grad_norm: 0.7997773184094611, iteration: 304056
loss: 1.035609245300293,grad_norm: 0.9713941831614846, iteration: 304057
loss: 1.0184619426727295,grad_norm: 0.7562563709629736, iteration: 304058
loss: 1.047082543373108,grad_norm: 0.9373449201401293, iteration: 304059
loss: 0.9981679916381836,grad_norm: 0.999999723948348, iteration: 304060
loss: 1.009430170059204,grad_norm: 0.9272010007438969, iteration: 304061
loss: 1.0211225748062134,grad_norm: 0.9147339008796838, iteration: 304062
loss: 1.0235424041748047,grad_norm: 0.7590234442272228, iteration: 304063
loss: 1.0736180543899536,grad_norm: 0.9999997892226349, iteration: 304064
loss: 1.0382529497146606,grad_norm: 0.999999190006412, iteration: 304065
loss: 1.0396403074264526,grad_norm: 0.9384577124448499, iteration: 304066
loss: 0.9766871929168701,grad_norm: 0.7817879931222098, iteration: 304067
loss: 1.019568681716919,grad_norm: 0.7940177823386435, iteration: 304068
loss: 1.1956080198287964,grad_norm: 0.9999993077026796, iteration: 304069
loss: 0.9463010430335999,grad_norm: 0.999999429949086, iteration: 304070
loss: 1.0553033351898193,grad_norm: 0.9999994627398053, iteration: 304071
loss: 1.0670852661132812,grad_norm: 0.9999996729765682, iteration: 304072
loss: 1.001001000404358,grad_norm: 0.9999996631856629, iteration: 304073
loss: 0.9963403344154358,grad_norm: 0.7664483889069512, iteration: 304074
loss: 0.9909700751304626,grad_norm: 0.9999999136057657, iteration: 304075
loss: 1.0379501581192017,grad_norm: 0.7648558289443486, iteration: 304076
loss: 1.2127841711044312,grad_norm: 0.9999995931025145, iteration: 304077
loss: 1.1093701124191284,grad_norm: 0.9999993066316327, iteration: 304078
loss: 1.0255366563796997,grad_norm: 0.7902853988030047, iteration: 304079
loss: 0.988805890083313,grad_norm: 0.9625021312899363, iteration: 304080
loss: 1.0278935432434082,grad_norm: 0.8083785288903992, iteration: 304081
loss: 1.0008076429367065,grad_norm: 0.9490770024562539, iteration: 304082
loss: 0.9929060935974121,grad_norm: 0.8142380328070363, iteration: 304083
loss: 1.0076698064804077,grad_norm: 0.9999990543471052, iteration: 304084
loss: 0.993003785610199,grad_norm: 0.9144160757072909, iteration: 304085
loss: 0.9887145757675171,grad_norm: 0.981078972352941, iteration: 304086
loss: 0.997282862663269,grad_norm: 0.8300420164109079, iteration: 304087
loss: 0.9902381896972656,grad_norm: 0.8389250168761816, iteration: 304088
loss: 1.0308204889297485,grad_norm: 0.825932564192199, iteration: 304089
loss: 1.1688505411148071,grad_norm: 0.9999996894914194, iteration: 304090
loss: 1.011285662651062,grad_norm: 0.9601015302273541, iteration: 304091
loss: 0.9733211398124695,grad_norm: 0.7366514024361609, iteration: 304092
loss: 0.9625913500785828,grad_norm: 0.9999991295030213, iteration: 304093
loss: 1.1720993518829346,grad_norm: 0.9999993748225342, iteration: 304094
loss: 0.9853503704071045,grad_norm: 0.7710273159904835, iteration: 304095
loss: 0.9979910254478455,grad_norm: 0.9999991735867975, iteration: 304096
loss: 1.016190767288208,grad_norm: 0.8746271606881765, iteration: 304097
loss: 1.0119975805282593,grad_norm: 0.7241208726701821, iteration: 304098
loss: 0.9585486650466919,grad_norm: 0.9999991866994485, iteration: 304099
loss: 0.9957129955291748,grad_norm: 0.9999996043814294, iteration: 304100
loss: 1.0225441455841064,grad_norm: 0.99999972078159, iteration: 304101
loss: 1.032155156135559,grad_norm: 0.9487839197202664, iteration: 304102
loss: 1.1733200550079346,grad_norm: 0.9999999594017505, iteration: 304103
loss: 1.0215516090393066,grad_norm: 0.7754959449945282, iteration: 304104
loss: 1.074006199836731,grad_norm: 0.9999993556551017, iteration: 304105
loss: 0.979647159576416,grad_norm: 0.86750710135533, iteration: 304106
loss: 0.9900060296058655,grad_norm: 0.7771505053828168, iteration: 304107
loss: 1.0109308958053589,grad_norm: 0.6727206548827022, iteration: 304108
loss: 0.979387104511261,grad_norm: 0.8320485514488578, iteration: 304109
loss: 0.9551461338996887,grad_norm: 0.8081783093311976, iteration: 304110
loss: 1.0214561223983765,grad_norm: 0.8255917248590652, iteration: 304111
loss: 0.9935203790664673,grad_norm: 0.8837067671051748, iteration: 304112
loss: 0.9906281232833862,grad_norm: 0.8353513209246904, iteration: 304113
loss: 0.9855203628540039,grad_norm: 0.8367591111004951, iteration: 304114
loss: 1.0145567655563354,grad_norm: 0.9999999684341377, iteration: 304115
loss: 1.0235590934753418,grad_norm: 0.8833439177350106, iteration: 304116
loss: 1.001173496246338,grad_norm: 0.8641138010383155, iteration: 304117
loss: 1.0130337476730347,grad_norm: 0.7987717902289181, iteration: 304118
loss: 1.010345220565796,grad_norm: 0.8524824478792932, iteration: 304119
loss: 0.9678863883018494,grad_norm: 0.9999994553718172, iteration: 304120
loss: 1.1068071126937866,grad_norm: 0.9999998565031917, iteration: 304121
loss: 1.0301553010940552,grad_norm: 0.9999992344399492, iteration: 304122
loss: 0.9922457933425903,grad_norm: 0.9793973902697513, iteration: 304123
loss: 1.030261516571045,grad_norm: 0.9999994917529391, iteration: 304124
loss: 0.9649473428726196,grad_norm: 0.7479501621534658, iteration: 304125
loss: 1.0007014274597168,grad_norm: 0.8058547727281563, iteration: 304126
loss: 1.0139013528823853,grad_norm: 0.9999993718980973, iteration: 304127
loss: 0.9892660975456238,grad_norm: 0.9999991079345989, iteration: 304128
loss: 0.9681079387664795,grad_norm: 0.9999995848384038, iteration: 304129
loss: 1.0017207860946655,grad_norm: 0.8636839485177702, iteration: 304130
loss: 0.983083963394165,grad_norm: 0.9130528885763152, iteration: 304131
loss: 0.9984400272369385,grad_norm: 0.8757615390638672, iteration: 304132
loss: 0.9447826147079468,grad_norm: 0.787747703314947, iteration: 304133
loss: 1.0313020944595337,grad_norm: 0.842677628417948, iteration: 304134
loss: 1.033769130706787,grad_norm: 0.8522665629204507, iteration: 304135
loss: 0.9955426454544067,grad_norm: 0.9999992309840763, iteration: 304136
loss: 0.98003089427948,grad_norm: 0.8378411024501354, iteration: 304137
loss: 1.0057815313339233,grad_norm: 0.8140543162237742, iteration: 304138
loss: 0.9893752336502075,grad_norm: 0.8776202736416422, iteration: 304139
loss: 1.0643818378448486,grad_norm: 0.9999997902465065, iteration: 304140
loss: 1.0123660564422607,grad_norm: 0.9999990317522942, iteration: 304141
loss: 1.0218123197555542,grad_norm: 0.888864505598774, iteration: 304142
loss: 0.9835218787193298,grad_norm: 0.8308807861237985, iteration: 304143
loss: 0.9949313998222351,grad_norm: 0.999999604718976, iteration: 304144
loss: 1.0057778358459473,grad_norm: 0.8117961516549822, iteration: 304145
loss: 1.0108948945999146,grad_norm: 0.9866034668777331, iteration: 304146
loss: 1.0230063199996948,grad_norm: 0.8266860515008332, iteration: 304147
loss: 1.0232512950897217,grad_norm: 0.8568505033833212, iteration: 304148
loss: 0.9787269830703735,grad_norm: 0.79724052002417, iteration: 304149
loss: 1.0715965032577515,grad_norm: 0.9472817837409658, iteration: 304150
loss: 0.9847227931022644,grad_norm: 0.8053690720901511, iteration: 304151
loss: 1.0953025817871094,grad_norm: 0.9999990289327194, iteration: 304152
loss: 0.9787026047706604,grad_norm: 0.7514109855134171, iteration: 304153
loss: 1.0155434608459473,grad_norm: 0.8738957475694668, iteration: 304154
loss: 1.0179400444030762,grad_norm: 0.8091728218879963, iteration: 304155
loss: 0.9953371286392212,grad_norm: 0.9999994695997931, iteration: 304156
loss: 1.0284051895141602,grad_norm: 0.8127604639780404, iteration: 304157
loss: 1.0236117839813232,grad_norm: 0.8250576988315315, iteration: 304158
loss: 1.0164436101913452,grad_norm: 0.9999990852732962, iteration: 304159
loss: 1.0081595182418823,grad_norm: 0.8312744811701964, iteration: 304160
loss: 1.0291476249694824,grad_norm: 0.7846043550433488, iteration: 304161
loss: 0.9963862895965576,grad_norm: 0.7795308097765071, iteration: 304162
loss: 1.0178484916687012,grad_norm: 0.8427187804705444, iteration: 304163
loss: 1.0137126445770264,grad_norm: 0.9999996171973741, iteration: 304164
loss: 0.966153621673584,grad_norm: 0.9963410237225743, iteration: 304165
loss: 0.9664998054504395,grad_norm: 0.8922423437662861, iteration: 304166
loss: 1.0615040063858032,grad_norm: 0.9999996935610995, iteration: 304167
loss: 0.9922197461128235,grad_norm: 0.815320463401401, iteration: 304168
loss: 1.0928517580032349,grad_norm: 0.9999990994784681, iteration: 304169
loss: 1.003366231918335,grad_norm: 0.9999990650809653, iteration: 304170
loss: 1.0006791353225708,grad_norm: 0.9049588955952124, iteration: 304171
loss: 0.9979081153869629,grad_norm: 0.9105013239069913, iteration: 304172
loss: 0.9749306440353394,grad_norm: 0.7679413590732822, iteration: 304173
loss: 1.002963662147522,grad_norm: 0.8694837447085794, iteration: 304174
loss: 1.061445951461792,grad_norm: 0.9999992452372658, iteration: 304175
loss: 1.0460290908813477,grad_norm: 0.999999970277702, iteration: 304176
loss: 0.9952700734138489,grad_norm: 0.8619015503634214, iteration: 304177
loss: 0.9828031063079834,grad_norm: 0.7634184077297523, iteration: 304178
loss: 0.9864001274108887,grad_norm: 0.7952087407894511, iteration: 304179
loss: 1.0090852975845337,grad_norm: 0.8901274731980707, iteration: 304180
loss: 1.0145264863967896,grad_norm: 0.9999995242782613, iteration: 304181
loss: 1.0107923746109009,grad_norm: 0.8634476482044281, iteration: 304182
loss: 0.9609596133232117,grad_norm: 0.8007867071738265, iteration: 304183
loss: 1.005410075187683,grad_norm: 0.8951859809709494, iteration: 304184
loss: 1.0898268222808838,grad_norm: 0.9551437831147457, iteration: 304185
loss: 1.071762204170227,grad_norm: 0.9999996302520664, iteration: 304186
loss: 1.0378611087799072,grad_norm: 0.7426628617556948, iteration: 304187
loss: 0.9970869421958923,grad_norm: 0.8263516315239328, iteration: 304188
loss: 1.0482467412948608,grad_norm: 0.9999990333720565, iteration: 304189
loss: 0.9862095713615417,grad_norm: 0.8990535015814928, iteration: 304190
loss: 1.0040684938430786,grad_norm: 0.9999990598688604, iteration: 304191
loss: 1.0159088373184204,grad_norm: 0.9906321705072236, iteration: 304192
loss: 1.0017048120498657,grad_norm: 0.7775465260136681, iteration: 304193
loss: 1.0142606496810913,grad_norm: 0.8931677534487228, iteration: 304194
loss: 1.007986068725586,grad_norm: 0.9999991577314576, iteration: 304195
loss: 1.080966830253601,grad_norm: 0.9481808293556768, iteration: 304196
loss: 0.9777259826660156,grad_norm: 0.9999991836719331, iteration: 304197
loss: 0.9919058680534363,grad_norm: 0.9999990972934112, iteration: 304198
loss: 0.9856558442115784,grad_norm: 0.8413410912036258, iteration: 304199
loss: 0.9705134630203247,grad_norm: 0.8622145127508781, iteration: 304200
loss: 0.982924222946167,grad_norm: 0.7972141619828297, iteration: 304201
loss: 1.0149157047271729,grad_norm: 0.9148046300083863, iteration: 304202
loss: 0.9866538047790527,grad_norm: 0.8494020072956898, iteration: 304203
loss: 1.0213408470153809,grad_norm: 0.999999156256444, iteration: 304204
loss: 1.0837810039520264,grad_norm: 0.9999991486348552, iteration: 304205
loss: 1.003372311592102,grad_norm: 0.7933703974172716, iteration: 304206
loss: 0.9964907169342041,grad_norm: 0.7372167146890363, iteration: 304207
loss: 1.0941849946975708,grad_norm: 0.9999990656106981, iteration: 304208
loss: 1.0287692546844482,grad_norm: 0.7621571248347918, iteration: 304209
loss: 0.9720810651779175,grad_norm: 0.9362605669126337, iteration: 304210
loss: 1.0296339988708496,grad_norm: 0.9624716272582218, iteration: 304211
loss: 1.0129075050354004,grad_norm: 0.9197976107200414, iteration: 304212
loss: 1.0209500789642334,grad_norm: 0.9865571971783446, iteration: 304213
loss: 1.0422934293746948,grad_norm: 0.8356834298691141, iteration: 304214
loss: 1.0094019174575806,grad_norm: 0.746321237312423, iteration: 304215
loss: 1.001692771911621,grad_norm: 0.891228907642406, iteration: 304216
loss: 0.9989717602729797,grad_norm: 0.9244095666567401, iteration: 304217
loss: 1.0092720985412598,grad_norm: 0.9999992467416463, iteration: 304218
loss: 1.0385282039642334,grad_norm: 0.9999991274201467, iteration: 304219
loss: 1.0316897630691528,grad_norm: 0.8885780571458638, iteration: 304220
loss: 1.0141478776931763,grad_norm: 0.7922887425823041, iteration: 304221
loss: 1.0185866355895996,grad_norm: 0.9999994154236238, iteration: 304222
loss: 1.1566053628921509,grad_norm: 0.9999998637805995, iteration: 304223
loss: 1.0063656568527222,grad_norm: 0.7020152018035516, iteration: 304224
loss: 0.985319972038269,grad_norm: 0.9799006210073545, iteration: 304225
loss: 1.0071849822998047,grad_norm: 0.8162612569066107, iteration: 304226
loss: 1.0160408020019531,grad_norm: 0.8555190393267464, iteration: 304227
loss: 1.0176444053649902,grad_norm: 0.8576807759322164, iteration: 304228
loss: 0.9954814910888672,grad_norm: 0.8485023448693533, iteration: 304229
loss: 1.0558161735534668,grad_norm: 0.9999996005677572, iteration: 304230
loss: 1.0011752843856812,grad_norm: 0.999999210900737, iteration: 304231
loss: 1.1206783056259155,grad_norm: 0.9999991559684541, iteration: 304232
loss: 1.003980040550232,grad_norm: 0.8850598724816385, iteration: 304233
loss: 0.9911678433418274,grad_norm: 0.9999991413983562, iteration: 304234
loss: 1.0039422512054443,grad_norm: 0.8706275977662102, iteration: 304235
loss: 0.9788379669189453,grad_norm: 0.8962358848699294, iteration: 304236
loss: 1.0121679306030273,grad_norm: 0.7022087920218419, iteration: 304237
loss: 0.9667069911956787,grad_norm: 0.7880854989078127, iteration: 304238
loss: 1.0355876684188843,grad_norm: 0.9215540006540072, iteration: 304239
loss: 1.046156644821167,grad_norm: 0.9999992536303773, iteration: 304240
loss: 1.0137938261032104,grad_norm: 0.8291888145308542, iteration: 304241
loss: 1.0353775024414062,grad_norm: 0.9999992922338429, iteration: 304242
loss: 1.0179996490478516,grad_norm: 0.8474829384218812, iteration: 304243
loss: 0.9967187643051147,grad_norm: 0.7800220476498309, iteration: 304244
loss: 1.0255111455917358,grad_norm: 0.8590320976280862, iteration: 304245
loss: 1.088111162185669,grad_norm: 0.9999996316395481, iteration: 304246
loss: 0.9799469113349915,grad_norm: 0.8376473144904877, iteration: 304247
loss: 1.057004690170288,grad_norm: 0.8995852816762677, iteration: 304248
loss: 0.9991970658302307,grad_norm: 0.8829868004271122, iteration: 304249
loss: 0.9723461866378784,grad_norm: 0.8065837667063523, iteration: 304250
loss: 0.9743841290473938,grad_norm: 0.7408973008745985, iteration: 304251
loss: 0.9884936809539795,grad_norm: 0.8208000780974978, iteration: 304252
loss: 1.0082390308380127,grad_norm: 0.8872355244529598, iteration: 304253
loss: 0.9895150065422058,grad_norm: 1.0000000362361081, iteration: 304254
loss: 0.9638064503669739,grad_norm: 0.7861646328252531, iteration: 304255
loss: 1.0432977676391602,grad_norm: 0.9999998817937901, iteration: 304256
loss: 0.9772781133651733,grad_norm: 0.8136084691288392, iteration: 304257
loss: 0.9887455105781555,grad_norm: 0.8859886171385855, iteration: 304258
loss: 1.0453051328659058,grad_norm: 0.9999990525432707, iteration: 304259
loss: 1.0084725618362427,grad_norm: 0.7888366416259437, iteration: 304260
loss: 1.0060086250305176,grad_norm: 0.7386606091068021, iteration: 304261
loss: 1.0529677867889404,grad_norm: 0.953001640423757, iteration: 304262
loss: 1.0185353755950928,grad_norm: 0.825700614898446, iteration: 304263
loss: 1.0183664560317993,grad_norm: 0.8395801768981885, iteration: 304264
loss: 0.9778150916099548,grad_norm: 0.7310626791634462, iteration: 304265
loss: 0.9871753454208374,grad_norm: 0.8509013582435438, iteration: 304266
loss: 1.0442991256713867,grad_norm: 0.8750119075074754, iteration: 304267
loss: 0.9658761620521545,grad_norm: 0.83010136787193, iteration: 304268
loss: 1.0140708684921265,grad_norm: 0.9999993053370934, iteration: 304269
loss: 0.9859100580215454,grad_norm: 0.9999990824954267, iteration: 304270
loss: 1.0373408794403076,grad_norm: 0.7790168051256129, iteration: 304271
loss: 1.0056368112564087,grad_norm: 0.8618366448016744, iteration: 304272
loss: 1.0761547088623047,grad_norm: 0.999999838877775, iteration: 304273
loss: 0.9773028492927551,grad_norm: 0.8886216309888162, iteration: 304274
loss: 1.0202913284301758,grad_norm: 0.9374334683848491, iteration: 304275
loss: 1.0186911821365356,grad_norm: 0.9999996631497444, iteration: 304276
loss: 1.0337504148483276,grad_norm: 0.9999998011956874, iteration: 304277
loss: 1.1109784841537476,grad_norm: 0.9999992207222462, iteration: 304278
loss: 1.0457712411880493,grad_norm: 0.840330415278667, iteration: 304279
loss: 1.072171926498413,grad_norm: 0.999999791062263, iteration: 304280
loss: 1.0420650243759155,grad_norm: 0.732480174668884, iteration: 304281
loss: 1.1294208765029907,grad_norm: 0.999999026132766, iteration: 304282
loss: 1.0285171270370483,grad_norm: 0.814954355949222, iteration: 304283
loss: 0.998151421546936,grad_norm: 0.7318261792604932, iteration: 304284
loss: 1.0035057067871094,grad_norm: 0.9999992623127663, iteration: 304285
loss: 1.0132865905761719,grad_norm: 0.8938895391536077, iteration: 304286
loss: 0.997005045413971,grad_norm: 0.8969580556527191, iteration: 304287
loss: 1.0320724248886108,grad_norm: 0.9285477654041635, iteration: 304288
loss: 0.9659149646759033,grad_norm: 0.8683307118569152, iteration: 304289
loss: 1.0158605575561523,grad_norm: 0.999999055230253, iteration: 304290
loss: 1.0167467594146729,grad_norm: 0.9999992655025804, iteration: 304291
loss: 1.0495227575302124,grad_norm: 0.9999998971647998, iteration: 304292
loss: 1.003341555595398,grad_norm: 0.6481308704306753, iteration: 304293
loss: 1.0365476608276367,grad_norm: 0.9999994926465198, iteration: 304294
loss: 0.979224681854248,grad_norm: 0.6569451498726271, iteration: 304295
loss: 0.9773656725883484,grad_norm: 0.9778342415904998, iteration: 304296
loss: 0.9792423844337463,grad_norm: 0.726545145184659, iteration: 304297
loss: 1.013369083404541,grad_norm: 0.8097776876392905, iteration: 304298
loss: 1.057541012763977,grad_norm: 0.9999996032604798, iteration: 304299
loss: 0.9934789538383484,grad_norm: 0.851277601364955, iteration: 304300
loss: 0.9854543209075928,grad_norm: 0.895207161618125, iteration: 304301
loss: 0.9911110997200012,grad_norm: 0.9184760704730052, iteration: 304302
loss: 1.0266969203948975,grad_norm: 0.9999998623549864, iteration: 304303
loss: 0.9776519536972046,grad_norm: 0.8628400725594387, iteration: 304304
loss: 1.0092427730560303,grad_norm: 0.7914217051067328, iteration: 304305
loss: 0.9999493360519409,grad_norm: 0.7905784899620596, iteration: 304306
loss: 1.001534104347229,grad_norm: 0.9999990301899463, iteration: 304307
loss: 1.0605217218399048,grad_norm: 0.9999993935408612, iteration: 304308
loss: 0.9911825656890869,grad_norm: 0.9313445256243537, iteration: 304309
loss: 1.0169347524642944,grad_norm: 0.7696750862221868, iteration: 304310
loss: 1.0488560199737549,grad_norm: 0.9999994643994112, iteration: 304311
loss: 0.9559206366539001,grad_norm: 0.9208216878735599, iteration: 304312
loss: 1.051787257194519,grad_norm: 0.999999616290047, iteration: 304313
loss: 1.0048593282699585,grad_norm: 0.9084033370481107, iteration: 304314
loss: 1.000769019126892,grad_norm: 0.8130484822383409, iteration: 304315
loss: 1.032780408859253,grad_norm: 0.9999999011206044, iteration: 304316
loss: 1.11806058883667,grad_norm: 0.9999993566367179, iteration: 304317
loss: 1.0704878568649292,grad_norm: 0.9999997902832032, iteration: 304318
loss: 1.0379770994186401,grad_norm: 0.905546743478966, iteration: 304319
loss: 1.005198359489441,grad_norm: 0.9067477726352633, iteration: 304320
loss: 1.051645278930664,grad_norm: 0.8668261667745999, iteration: 304321
loss: 0.9900926947593689,grad_norm: 0.7392647182931606, iteration: 304322
loss: 0.9969176650047302,grad_norm: 0.9999995523748392, iteration: 304323
loss: 1.005681037902832,grad_norm: 0.7783114598811715, iteration: 304324
loss: 0.9966568946838379,grad_norm: 0.7674636834142115, iteration: 304325
loss: 1.006279468536377,grad_norm: 0.8780733011539819, iteration: 304326
loss: 0.9870378971099854,grad_norm: 0.9056915975330698, iteration: 304327
loss: 0.9913935661315918,grad_norm: 0.963588450328004, iteration: 304328
loss: 1.0121361017227173,grad_norm: 0.9999991701542547, iteration: 304329
loss: 1.0575103759765625,grad_norm: 0.9999992347535905, iteration: 304330
loss: 0.9670107960700989,grad_norm: 0.994997584702141, iteration: 304331
loss: 0.9656340479850769,grad_norm: 0.8176261619097441, iteration: 304332
loss: 1.0000666379928589,grad_norm: 0.953507062421053, iteration: 304333
loss: 0.987433910369873,grad_norm: 0.9999989424741459, iteration: 304334
loss: 1.0252453088760376,grad_norm: 0.9999994795714635, iteration: 304335
loss: 0.9946848750114441,grad_norm: 0.9999993724540636, iteration: 304336
loss: 0.984386146068573,grad_norm: 0.8823131448200264, iteration: 304337
loss: 1.006651520729065,grad_norm: 0.9821258910635989, iteration: 304338
loss: 1.0337861776351929,grad_norm: 0.9999999129290753, iteration: 304339
loss: 0.9954435229301453,grad_norm: 0.9999991286531782, iteration: 304340
loss: 1.0224359035491943,grad_norm: 0.8610045914754775, iteration: 304341
loss: 0.9931690692901611,grad_norm: 0.822446380187832, iteration: 304342
loss: 1.0203410387039185,grad_norm: 0.9901469177267839, iteration: 304343
loss: 1.0261790752410889,grad_norm: 0.9999996347108101, iteration: 304344
loss: 1.022248387336731,grad_norm: 0.9979119323277533, iteration: 304345
loss: 1.01483952999115,grad_norm: 0.8972596870551611, iteration: 304346
loss: 1.0347707271575928,grad_norm: 0.9999994785361057, iteration: 304347
loss: 0.9542714357376099,grad_norm: 0.9999993404643308, iteration: 304348
loss: 0.9767490029335022,grad_norm: 0.9999991805890697, iteration: 304349
loss: 0.9760698080062866,grad_norm: 0.7616043390631726, iteration: 304350
loss: 1.0272107124328613,grad_norm: 0.9999998462476314, iteration: 304351
loss: 0.9887716770172119,grad_norm: 0.7526756972573467, iteration: 304352
loss: 1.0080112218856812,grad_norm: 0.8772120514920918, iteration: 304353
loss: 1.0155985355377197,grad_norm: 0.8883189649949412, iteration: 304354
loss: 1.0689448118209839,grad_norm: 0.9999996921288646, iteration: 304355
loss: 0.97909015417099,grad_norm: 0.9999995643144558, iteration: 304356
loss: 0.9649776220321655,grad_norm: 0.8962302718292906, iteration: 304357
loss: 1.017629861831665,grad_norm: 0.9999991146958339, iteration: 304358
loss: 1.0051403045654297,grad_norm: 0.6541229919820192, iteration: 304359
loss: 1.0478355884552002,grad_norm: 0.9999991015250143, iteration: 304360
loss: 0.9738258719444275,grad_norm: 0.9999992686964009, iteration: 304361
loss: 0.9986498951911926,grad_norm: 0.8760963855840418, iteration: 304362
loss: 1.00973379611969,grad_norm: 0.8621189175894985, iteration: 304363
loss: 0.951299786567688,grad_norm: 0.8262814553852794, iteration: 304364
loss: 0.9911878705024719,grad_norm: 0.9999992463493039, iteration: 304365
loss: 1.0742112398147583,grad_norm: 0.9999990814172344, iteration: 304366
loss: 1.0052030086517334,grad_norm: 0.9999991020390832, iteration: 304367
loss: 1.0706958770751953,grad_norm: 0.9999999015942674, iteration: 304368
loss: 0.9974208474159241,grad_norm: 0.8763623514714441, iteration: 304369
loss: 0.9904047846794128,grad_norm: 0.8453602915971956, iteration: 304370
loss: 0.9856948852539062,grad_norm: 0.7067655257188746, iteration: 304371
loss: 0.9807002544403076,grad_norm: 0.8681655186767799, iteration: 304372
loss: 1.0155866146087646,grad_norm: 0.9999991503876179, iteration: 304373
loss: 0.9813871383666992,grad_norm: 0.8884379769132431, iteration: 304374
loss: 0.9778594374656677,grad_norm: 0.9999991379868407, iteration: 304375
loss: 0.9955151081085205,grad_norm: 0.9067674194842211, iteration: 304376
loss: 1.0301291942596436,grad_norm: 0.7941525949428085, iteration: 304377
loss: 1.0334875583648682,grad_norm: 0.8122622488261367, iteration: 304378
loss: 1.0356345176696777,grad_norm: 0.9999990277885942, iteration: 304379
loss: 1.0104738473892212,grad_norm: 0.827830730031537, iteration: 304380
loss: 1.0522459745407104,grad_norm: 0.887179416066851, iteration: 304381
loss: 0.9995355010032654,grad_norm: 0.9052027575449996, iteration: 304382
loss: 1.0302307605743408,grad_norm: 0.9999995312117045, iteration: 304383
loss: 0.9995307922363281,grad_norm: 0.9999990150632644, iteration: 304384
loss: 1.0011351108551025,grad_norm: 0.8454033671922155, iteration: 304385
loss: 1.0174996852874756,grad_norm: 0.8038639442334313, iteration: 304386
loss: 1.0032483339309692,grad_norm: 0.9999991135510294, iteration: 304387
loss: 0.9693351984024048,grad_norm: 0.8068473037184977, iteration: 304388
loss: 0.9804653525352478,grad_norm: 0.8294309059626821, iteration: 304389
loss: 1.008496642112732,grad_norm: 0.9074905445759007, iteration: 304390
loss: 1.005194067955017,grad_norm: 0.7192949219160092, iteration: 304391
loss: 1.0019810199737549,grad_norm: 0.8013776919086152, iteration: 304392
loss: 1.036456823348999,grad_norm: 0.8891375086809069, iteration: 304393
loss: 1.007249355316162,grad_norm: 0.9999996608912639, iteration: 304394
loss: 0.992854654788971,grad_norm: 0.8603682319697168, iteration: 304395
loss: 0.9846763014793396,grad_norm: 0.9494051852283892, iteration: 304396
loss: 1.0009715557098389,grad_norm: 0.9999990238715589, iteration: 304397
loss: 0.99029141664505,grad_norm: 0.9481951688786618, iteration: 304398
loss: 1.0219217538833618,grad_norm: 0.900020863817844, iteration: 304399
loss: 0.9752083420753479,grad_norm: 0.8112443385477576, iteration: 304400
loss: 0.984935462474823,grad_norm: 0.9999992061006208, iteration: 304401
loss: 0.9948269128799438,grad_norm: 0.9999990792583117, iteration: 304402
loss: 0.9792242050170898,grad_norm: 0.8061694333254678, iteration: 304403
loss: 0.967384934425354,grad_norm: 0.7259697363222719, iteration: 304404
loss: 1.0222222805023193,grad_norm: 0.9475332830771944, iteration: 304405
loss: 1.1188817024230957,grad_norm: 0.9999997510081334, iteration: 304406
loss: 1.0379385948181152,grad_norm: 0.7819229485068956, iteration: 304407
loss: 0.9998005032539368,grad_norm: 0.9999992143037755, iteration: 304408
loss: 0.9875137209892273,grad_norm: 0.8675351085353931, iteration: 304409
loss: 1.0152688026428223,grad_norm: 0.822587453401341, iteration: 304410
loss: 1.015651822090149,grad_norm: 0.9103733343729948, iteration: 304411
loss: 1.0134903192520142,grad_norm: 0.7261493195598046, iteration: 304412
loss: 1.0496081113815308,grad_norm: 0.9999997685523211, iteration: 304413
loss: 0.9927978515625,grad_norm: 0.9617978323998546, iteration: 304414
loss: 1.0030511617660522,grad_norm: 0.8404237520259057, iteration: 304415
loss: 1.0404962301254272,grad_norm: 0.7811890359369269, iteration: 304416
loss: 1.0126270055770874,grad_norm: 0.9264189450715857, iteration: 304417
loss: 0.9857838153839111,grad_norm: 0.7579071258289082, iteration: 304418
loss: 1.0359351634979248,grad_norm: 0.9999997621824392, iteration: 304419
loss: 1.0090105533599854,grad_norm: 0.9999990780397258, iteration: 304420
loss: 0.9642033576965332,grad_norm: 0.9999990359008211, iteration: 304421
loss: 1.0228427648544312,grad_norm: 0.9999992704596811, iteration: 304422
loss: 0.9892033934593201,grad_norm: 0.8981091835863886, iteration: 304423
loss: 0.9456212520599365,grad_norm: 0.8357888792889541, iteration: 304424
loss: 0.9991085529327393,grad_norm: 0.8070089096184716, iteration: 304425
loss: 1.0196276903152466,grad_norm: 0.9382900730763141, iteration: 304426
loss: 1.0276503562927246,grad_norm: 0.9311941448166658, iteration: 304427
loss: 0.9913352131843567,grad_norm: 0.8477663394058085, iteration: 304428
loss: 1.0556503534317017,grad_norm: 0.9999990535346676, iteration: 304429
loss: 0.98760986328125,grad_norm: 0.7383078068891032, iteration: 304430
loss: 1.037111520767212,grad_norm: 0.7525508203888553, iteration: 304431
loss: 0.9526468515396118,grad_norm: 0.9314271141520057, iteration: 304432
loss: 1.020340085029602,grad_norm: 0.8221017249532205, iteration: 304433
loss: 1.00418221950531,grad_norm: 0.8363769652715501, iteration: 304434
loss: 1.006489634513855,grad_norm: 0.75437738165438, iteration: 304435
loss: 0.9999198317527771,grad_norm: 0.8040705070905707, iteration: 304436
loss: 0.9362325668334961,grad_norm: 0.8738123847480016, iteration: 304437
loss: 1.0140907764434814,grad_norm: 0.858259720006595, iteration: 304438
loss: 0.9749805331230164,grad_norm: 0.8447431467731356, iteration: 304439
loss: 0.9650217294692993,grad_norm: 0.773549295546646, iteration: 304440
loss: 1.0352959632873535,grad_norm: 0.8473606326828936, iteration: 304441
loss: 1.0061419010162354,grad_norm: 0.8849650967183406, iteration: 304442
loss: 1.0288662910461426,grad_norm: 0.7352594817353922, iteration: 304443
loss: 1.0600661039352417,grad_norm: 0.9999991859553268, iteration: 304444
loss: 1.00827956199646,grad_norm: 0.9999989855484602, iteration: 304445
loss: 0.9765837788581848,grad_norm: 0.7842836127062012, iteration: 304446
loss: 0.9840546250343323,grad_norm: 0.999999796391087, iteration: 304447
loss: 1.0548317432403564,grad_norm: 0.8198785343294951, iteration: 304448
loss: 0.9921846389770508,grad_norm: 0.916479892835396, iteration: 304449
loss: 0.9612614512443542,grad_norm: 0.8561645960286753, iteration: 304450
loss: 1.0024570226669312,grad_norm: 0.8903349745880679, iteration: 304451
loss: 0.9880655407905579,grad_norm: 0.8472560618941949, iteration: 304452
loss: 1.1342740058898926,grad_norm: 0.9999990825966368, iteration: 304453
loss: 0.9922199845314026,grad_norm: 0.999999150238271, iteration: 304454
loss: 1.0146422386169434,grad_norm: 0.748825802650967, iteration: 304455
loss: 1.0256574153900146,grad_norm: 0.8535893654375541, iteration: 304456
loss: 1.020419955253601,grad_norm: 0.9349737012055148, iteration: 304457
loss: 0.9853666424751282,grad_norm: 0.8186558941800091, iteration: 304458
loss: 1.0190948247909546,grad_norm: 0.939604784277038, iteration: 304459
loss: 1.1160897016525269,grad_norm: 0.9086389515312878, iteration: 304460
loss: 0.9829908609390259,grad_norm: 0.9000950515201458, iteration: 304461
loss: 0.9894235134124756,grad_norm: 0.7021469883398612, iteration: 304462
loss: 1.0326972007751465,grad_norm: 0.8463536733586762, iteration: 304463
loss: 0.9747228622436523,grad_norm: 0.9309551779605065, iteration: 304464
loss: 1.0037055015563965,grad_norm: 0.8655462003671726, iteration: 304465
loss: 0.9921371340751648,grad_norm: 0.8012413638224324, iteration: 304466
loss: 0.9788079857826233,grad_norm: 0.852301044473196, iteration: 304467
loss: 1.0206992626190186,grad_norm: 0.973198513551144, iteration: 304468
loss: 1.019105076789856,grad_norm: 0.8155816715453006, iteration: 304469
loss: 1.005651831626892,grad_norm: 0.7199000979755964, iteration: 304470
loss: 0.9662402868270874,grad_norm: 0.8025300832839326, iteration: 304471
loss: 0.9557344913482666,grad_norm: 0.8332890772466488, iteration: 304472
loss: 0.9958434700965881,grad_norm: 0.9269143084614337, iteration: 304473
loss: 0.9980066418647766,grad_norm: 0.9032196560529072, iteration: 304474
loss: 0.996004045009613,grad_norm: 0.7959568995323059, iteration: 304475
loss: 1.01215398311615,grad_norm: 0.7610728068523844, iteration: 304476
loss: 0.9767252206802368,grad_norm: 0.9293163360219507, iteration: 304477
loss: 1.0002092123031616,grad_norm: 0.8147310216783792, iteration: 304478
loss: 1.005200743675232,grad_norm: 0.8106926551200232, iteration: 304479
loss: 1.0298542976379395,grad_norm: 0.9999992348704255, iteration: 304480
loss: 0.982466995716095,grad_norm: 0.9017136673019253, iteration: 304481
loss: 1.0311698913574219,grad_norm: 1.0000000072093969, iteration: 304482
loss: 1.0082836151123047,grad_norm: 0.999999650084282, iteration: 304483
loss: 1.0044505596160889,grad_norm: 0.8332336368058768, iteration: 304484
loss: 0.973017692565918,grad_norm: 0.7960329537923546, iteration: 304485
loss: 1.0115817785263062,grad_norm: 0.93022238330238, iteration: 304486
loss: 1.0154539346694946,grad_norm: 0.9999997503837295, iteration: 304487
loss: 1.013459324836731,grad_norm: 0.8883499305405357, iteration: 304488
loss: 0.9723538160324097,grad_norm: 0.9296319197101465, iteration: 304489
loss: 1.0009695291519165,grad_norm: 0.953538018405336, iteration: 304490
loss: 0.994499146938324,grad_norm: 0.8258324744534881, iteration: 304491
loss: 0.9926771521568298,grad_norm: 0.7433698145449509, iteration: 304492
loss: 0.9679360389709473,grad_norm: 0.953582479988532, iteration: 304493
loss: 1.0019290447235107,grad_norm: 0.9029048603877323, iteration: 304494
loss: 0.9912858009338379,grad_norm: 0.8069675832372127, iteration: 304495
loss: 1.0311912298202515,grad_norm: 0.9999991640034304, iteration: 304496
loss: 1.025248408317566,grad_norm: 0.825622711940302, iteration: 304497
loss: 1.004035472869873,grad_norm: 0.8658057576056131, iteration: 304498
loss: 0.972378671169281,grad_norm: 0.8178867384832603, iteration: 304499
loss: 1.098422646522522,grad_norm: 0.9999998502199325, iteration: 304500
loss: 0.9639055132865906,grad_norm: 0.8065486529986662, iteration: 304501
loss: 1.0137463808059692,grad_norm: 0.8415812240832597, iteration: 304502
loss: 0.9972066879272461,grad_norm: 0.9129360651042963, iteration: 304503
loss: 1.03593909740448,grad_norm: 0.9999995997944462, iteration: 304504
loss: 0.9882820844650269,grad_norm: 0.8205085837138429, iteration: 304505
loss: 1.020707607269287,grad_norm: 0.8199921036951884, iteration: 304506
loss: 0.9725899696350098,grad_norm: 0.8126140433858856, iteration: 304507
loss: 0.9762896299362183,grad_norm: 0.8081378781286629, iteration: 304508
loss: 0.9993261098861694,grad_norm: 0.8175882696353866, iteration: 304509
loss: 0.9609209895133972,grad_norm: 0.9999992771394333, iteration: 304510
loss: 1.0089542865753174,grad_norm: 0.8966495743480243, iteration: 304511
loss: 0.9907439351081848,grad_norm: 0.9999990802198157, iteration: 304512
loss: 1.0028427839279175,grad_norm: 0.9064844358184072, iteration: 304513
loss: 0.9853155016899109,grad_norm: 0.8386672137542872, iteration: 304514
loss: 1.008488416671753,grad_norm: 0.8189234057876381, iteration: 304515
loss: 1.0353957414627075,grad_norm: 0.9999995761888966, iteration: 304516
loss: 0.9814230799674988,grad_norm: 0.8374186957685955, iteration: 304517
loss: 1.018088936805725,grad_norm: 0.7963122481473873, iteration: 304518
loss: 0.9480453729629517,grad_norm: 0.8614069695096732, iteration: 304519
loss: 0.9828243851661682,grad_norm: 0.7222440461260622, iteration: 304520
loss: 0.9876543879508972,grad_norm: 0.941048466108899, iteration: 304521
loss: 1.0346895456314087,grad_norm: 0.969192511491422, iteration: 304522
loss: 1.088354468345642,grad_norm: 0.9999998616450811, iteration: 304523
loss: 1.002590298652649,grad_norm: 0.746721306586888, iteration: 304524
loss: 1.0208923816680908,grad_norm: 0.9999996179857552, iteration: 304525
loss: 1.0590405464172363,grad_norm: 0.7752225159765008, iteration: 304526
loss: 1.0604056119918823,grad_norm: 0.840680183877589, iteration: 304527
loss: 0.9989492297172546,grad_norm: 0.9999990920673476, iteration: 304528
loss: 1.0271152257919312,grad_norm: 0.9269160403565561, iteration: 304529
loss: 0.989159345626831,grad_norm: 0.8715517709874331, iteration: 304530
loss: 0.9868098497390747,grad_norm: 0.8617852512317807, iteration: 304531
loss: 1.029288411140442,grad_norm: 0.9999993842023718, iteration: 304532
loss: 0.9939163327217102,grad_norm: 0.8274735444612799, iteration: 304533
loss: 1.078874945640564,grad_norm: 0.9999990139469026, iteration: 304534
loss: 0.9555156826972961,grad_norm: 0.7846014453063959, iteration: 304535
loss: 1.005571961402893,grad_norm: 0.8092570409155634, iteration: 304536
loss: 0.988240122795105,grad_norm: 0.7276694212534751, iteration: 304537
loss: 1.1585478782653809,grad_norm: 0.9999993795121224, iteration: 304538
loss: 1.0148320198059082,grad_norm: 0.8399357152368817, iteration: 304539
loss: 1.0056043863296509,grad_norm: 0.9460416857213346, iteration: 304540
loss: 1.0354859828948975,grad_norm: 0.7781847963462883, iteration: 304541
loss: 0.9902679324150085,grad_norm: 0.9581667886752266, iteration: 304542
loss: 1.026944875717163,grad_norm: 0.9883107464834062, iteration: 304543
loss: 1.0046463012695312,grad_norm: 0.8494608652161679, iteration: 304544
loss: 1.0008246898651123,grad_norm: 0.9117296342736632, iteration: 304545
loss: 0.9957519173622131,grad_norm: 0.8890863378410009, iteration: 304546
loss: 1.0329945087432861,grad_norm: 0.9999993299697993, iteration: 304547
loss: 0.9883843660354614,grad_norm: 0.839904966361643, iteration: 304548
loss: 0.967742919921875,grad_norm: 0.8871535282473361, iteration: 304549
loss: 0.9891077876091003,grad_norm: 0.7181018966885234, iteration: 304550
loss: 1.006117582321167,grad_norm: 0.7629023276102078, iteration: 304551
loss: 0.9992714524269104,grad_norm: 0.9999991321949626, iteration: 304552
loss: 0.980711817741394,grad_norm: 0.9999989885785264, iteration: 304553
loss: 0.9988622665405273,grad_norm: 0.7731828455300364, iteration: 304554
loss: 0.991639256477356,grad_norm: 0.764150222126048, iteration: 304555
loss: 0.9838904738426208,grad_norm: 0.9279716903596376, iteration: 304556
loss: 1.0167908668518066,grad_norm: 0.8499251027583399, iteration: 304557
loss: 0.9850137829780579,grad_norm: 0.79691250928108, iteration: 304558
loss: 1.010668158531189,grad_norm: 0.9521359502345397, iteration: 304559
loss: 1.0020334720611572,grad_norm: 0.9999992255471292, iteration: 304560
loss: 1.019635796546936,grad_norm: 0.953437476781862, iteration: 304561
loss: 0.977584183216095,grad_norm: 0.8432564651218099, iteration: 304562
loss: 0.9944536685943604,grad_norm: 0.9474297587865556, iteration: 304563
loss: 1.013453483581543,grad_norm: 0.7984215877630453, iteration: 304564
loss: 1.0215513706207275,grad_norm: 0.8966295338598191, iteration: 304565
loss: 1.0033520460128784,grad_norm: 0.9135897361042464, iteration: 304566
loss: 1.0409897565841675,grad_norm: 0.9999990724206487, iteration: 304567
loss: 1.1257684230804443,grad_norm: 0.9999992735395384, iteration: 304568
loss: 1.0344347953796387,grad_norm: 0.8937578168813124, iteration: 304569
loss: 0.9663883447647095,grad_norm: 0.907275084424303, iteration: 304570
loss: 1.0326570272445679,grad_norm: 0.8886306737407904, iteration: 304571
loss: 0.9841995239257812,grad_norm: 0.9999988818874896, iteration: 304572
loss: 0.9766044616699219,grad_norm: 0.7907852335133104, iteration: 304573
loss: 1.0621459484100342,grad_norm: 0.8668125564099816, iteration: 304574
loss: 0.994216799736023,grad_norm: 0.8750681887326612, iteration: 304575
loss: 1.0328277349472046,grad_norm: 0.8362381529794993, iteration: 304576
loss: 1.0688380002975464,grad_norm: 0.9999996643076597, iteration: 304577
loss: 1.0985502004623413,grad_norm: 0.9999991514154093, iteration: 304578
loss: 1.0126579999923706,grad_norm: 0.8317044226783553, iteration: 304579
loss: 1.0111441612243652,grad_norm: 0.9483337502756954, iteration: 304580
loss: 1.0124423503875732,grad_norm: 0.709570471775761, iteration: 304581
loss: 1.0846467018127441,grad_norm: 0.8422606023108613, iteration: 304582
loss: 1.0275508165359497,grad_norm: 0.9999999892257299, iteration: 304583
loss: 1.0761687755584717,grad_norm: 0.8944204706530317, iteration: 304584
loss: 0.9992756247520447,grad_norm: 0.7665483959193153, iteration: 304585
loss: 0.9985056519508362,grad_norm: 0.8600394094892537, iteration: 304586
loss: 0.9768834710121155,grad_norm: 0.812662449706492, iteration: 304587
loss: 0.9910251498222351,grad_norm: 0.853279159764088, iteration: 304588
loss: 0.9708726406097412,grad_norm: 0.9049899922924658, iteration: 304589
loss: 1.016757607460022,grad_norm: 0.7454608514442802, iteration: 304590
loss: 0.9919819235801697,grad_norm: 0.753977349325582, iteration: 304591
loss: 0.973853588104248,grad_norm: 0.8510609297677537, iteration: 304592
loss: 1.0042445659637451,grad_norm: 0.8728701029322389, iteration: 304593
loss: 0.9936521649360657,grad_norm: 0.8628802878703303, iteration: 304594
loss: 1.0057190656661987,grad_norm: 0.9327738686986655, iteration: 304595
loss: 0.9744157791137695,grad_norm: 0.8017129859894494, iteration: 304596
loss: 0.998207688331604,grad_norm: 0.8416141767347018, iteration: 304597
loss: 0.9924246072769165,grad_norm: 0.7453532736955903, iteration: 304598
loss: 0.9846106767654419,grad_norm: 0.761957969666455, iteration: 304599
loss: 1.0125677585601807,grad_norm: 0.8610969432659056, iteration: 304600
loss: 0.9845046997070312,grad_norm: 0.9485212836910418, iteration: 304601
loss: 1.0232454538345337,grad_norm: 0.789428524881409, iteration: 304602
loss: 1.040702223777771,grad_norm: 0.9999995343268048, iteration: 304603
loss: 0.9900487661361694,grad_norm: 0.9892879238462072, iteration: 304604
loss: 1.0166059732437134,grad_norm: 0.9999991213005645, iteration: 304605
loss: 1.0421525239944458,grad_norm: 0.81673846569592, iteration: 304606
loss: 1.0002540349960327,grad_norm: 0.9999990147164097, iteration: 304607
loss: 0.9842587113380432,grad_norm: 0.8272817990563984, iteration: 304608
loss: 0.9959614276885986,grad_norm: 0.8361926054511658, iteration: 304609
loss: 1.0161826610565186,grad_norm: 0.9485887525997624, iteration: 304610
loss: 1.014593243598938,grad_norm: 0.8010583018690226, iteration: 304611
loss: 1.011757254600525,grad_norm: 0.7603751184297189, iteration: 304612
loss: 0.9970546364784241,grad_norm: 0.9999990671593416, iteration: 304613
loss: 1.0103000402450562,grad_norm: 0.7534690509612433, iteration: 304614
loss: 1.0785807371139526,grad_norm: 0.9999999394147615, iteration: 304615
loss: 1.0137419700622559,grad_norm: 0.8458043964444264, iteration: 304616
loss: 0.9862129092216492,grad_norm: 0.8839560150267461, iteration: 304617
loss: 0.9967696070671082,grad_norm: 0.9999994173901942, iteration: 304618
loss: 1.003152847290039,grad_norm: 0.7797117019602092, iteration: 304619
loss: 0.9995092153549194,grad_norm: 0.9999993556710738, iteration: 304620
loss: 0.9917390942573547,grad_norm: 0.9085275990907895, iteration: 304621
loss: 1.0345510244369507,grad_norm: 0.8614246592091129, iteration: 304622
loss: 1.0182886123657227,grad_norm: 0.8186750354659896, iteration: 304623
loss: 1.0687915086746216,grad_norm: 0.999999533118085, iteration: 304624
loss: 0.9713531732559204,grad_norm: 0.830066425274693, iteration: 304625
loss: 0.9892164468765259,grad_norm: 0.7851409673880486, iteration: 304626
loss: 1.0492746829986572,grad_norm: 0.9151689383749354, iteration: 304627
loss: 1.0054879188537598,grad_norm: 0.9422462418498676, iteration: 304628
loss: 1.045293927192688,grad_norm: 0.8087435571763338, iteration: 304629
loss: 1.0400769710540771,grad_norm: 0.999999273537865, iteration: 304630
loss: 1.0159554481506348,grad_norm: 0.8205634590479037, iteration: 304631
loss: 1.024648666381836,grad_norm: 0.9292651248334605, iteration: 304632
loss: 1.1081160306930542,grad_norm: 0.9999998378552183, iteration: 304633
loss: 1.0161644220352173,grad_norm: 0.8698626874853393, iteration: 304634
loss: 1.0472105741500854,grad_norm: 0.9999990937847164, iteration: 304635
loss: 1.008276104927063,grad_norm: 0.999999872780471, iteration: 304636
loss: 1.0488954782485962,grad_norm: 0.9266105369458907, iteration: 304637
loss: 0.9776989817619324,grad_norm: 0.8172775513798971, iteration: 304638
loss: 1.0093053579330444,grad_norm: 0.8049136139832724, iteration: 304639
loss: 1.041131615638733,grad_norm: 0.8087328513784335, iteration: 304640
loss: 0.9975229501724243,grad_norm: 0.5939010997982588, iteration: 304641
loss: 1.0213913917541504,grad_norm: 0.7659661996807783, iteration: 304642
loss: 0.9744343161582947,grad_norm: 0.9999990827776023, iteration: 304643
loss: 1.0048246383666992,grad_norm: 0.836611137106424, iteration: 304644
loss: 0.9901090860366821,grad_norm: 0.8356048482501613, iteration: 304645
loss: 1.0071218013763428,grad_norm: 0.9014882181386751, iteration: 304646
loss: 0.9795150756835938,grad_norm: 0.6915106139710708, iteration: 304647
loss: 1.0236668586730957,grad_norm: 0.7803668567290123, iteration: 304648
loss: 1.0135080814361572,grad_norm: 0.8683636601959502, iteration: 304649
loss: 1.0252586603164673,grad_norm: 0.9999989557061073, iteration: 304650
loss: 1.1067641973495483,grad_norm: 0.9999998972783936, iteration: 304651
loss: 0.9650918841362,grad_norm: 0.9798112585739033, iteration: 304652
loss: 0.9904800653457642,grad_norm: 0.9031777353548177, iteration: 304653
loss: 0.9977803230285645,grad_norm: 0.8845375245303175, iteration: 304654
loss: 0.9406818151473999,grad_norm: 0.9134135727564809, iteration: 304655
loss: 0.9820683598518372,grad_norm: 0.824371591100392, iteration: 304656
loss: 1.0087785720825195,grad_norm: 0.9306208110190827, iteration: 304657
loss: 1.037283182144165,grad_norm: 0.9554451529254941, iteration: 304658
loss: 1.031700611114502,grad_norm: 0.9036597509202379, iteration: 304659
loss: 1.0219265222549438,grad_norm: 0.9999992089478573, iteration: 304660
loss: 1.0135301351547241,grad_norm: 0.9999998316089913, iteration: 304661
loss: 1.001303791999817,grad_norm: 0.8567979881679144, iteration: 304662
loss: 0.9990118145942688,grad_norm: 0.7661156393113427, iteration: 304663
loss: 0.9977679252624512,grad_norm: 0.8571836126879508, iteration: 304664
loss: 1.0272444486618042,grad_norm: 0.9999989798719078, iteration: 304665
loss: 1.0046969652175903,grad_norm: 0.8651423415935885, iteration: 304666
loss: 1.0646034479141235,grad_norm: 0.9999997947811432, iteration: 304667
loss: 1.1245002746582031,grad_norm: 0.9999995802743121, iteration: 304668
loss: 1.00808846950531,grad_norm: 0.831747415649824, iteration: 304669
loss: 1.0109624862670898,grad_norm: 0.7232999050029335, iteration: 304670
loss: 1.0289276838302612,grad_norm: 0.707387083993054, iteration: 304671
loss: 0.9855217933654785,grad_norm: 0.9397842741338065, iteration: 304672
loss: 0.9881475567817688,grad_norm: 0.939201849516788, iteration: 304673
loss: 0.9764975905418396,grad_norm: 0.933084291278032, iteration: 304674
loss: 0.9790100455284119,grad_norm: 0.9119982785249935, iteration: 304675
loss: 1.0050609111785889,grad_norm: 0.8910278927853286, iteration: 304676
loss: 0.9634000062942505,grad_norm: 0.8717398657987728, iteration: 304677
loss: 0.9879776835441589,grad_norm: 0.8825645673804945, iteration: 304678
loss: 1.0227158069610596,grad_norm: 0.9784931477352526, iteration: 304679
loss: 0.9825078845024109,grad_norm: 0.8110904603567244, iteration: 304680
loss: 1.0027977228164673,grad_norm: 0.7490872189099654, iteration: 304681
loss: 1.0069764852523804,grad_norm: 0.7474712762994911, iteration: 304682
loss: 1.0044914484024048,grad_norm: 0.833232607280629, iteration: 304683
loss: 1.0209256410598755,grad_norm: 0.9999992842275507, iteration: 304684
loss: 1.0887668132781982,grad_norm: 0.9968639463541309, iteration: 304685
loss: 1.0174040794372559,grad_norm: 0.9999990627305512, iteration: 304686
loss: 0.966048002243042,grad_norm: 0.8265072984450988, iteration: 304687
loss: 0.9767965078353882,grad_norm: 0.6716002768098723, iteration: 304688
loss: 0.9885788559913635,grad_norm: 0.8925049653084272, iteration: 304689
loss: 1.014899492263794,grad_norm: 0.7954861954032161, iteration: 304690
loss: 1.2160154581069946,grad_norm: 0.9999993334830978, iteration: 304691
loss: 0.9873080849647522,grad_norm: 0.8540709649323224, iteration: 304692
loss: 0.9981845617294312,grad_norm: 0.9423587506685832, iteration: 304693
loss: 1.0046088695526123,grad_norm: 0.6893241667551047, iteration: 304694
loss: 0.9850351214408875,grad_norm: 0.8436023441434943, iteration: 304695
loss: 0.9814446568489075,grad_norm: 0.7181556755240623, iteration: 304696
loss: 1.0360716581344604,grad_norm: 0.9999992587632798, iteration: 304697
loss: 1.0086469650268555,grad_norm: 0.9807631458638277, iteration: 304698
loss: 0.9905432462692261,grad_norm: 0.9999990504227204, iteration: 304699
loss: 1.0969740152359009,grad_norm: 0.99999907866376, iteration: 304700
loss: 0.9727696776390076,grad_norm: 0.8175040295114666, iteration: 304701
loss: 1.0097419023513794,grad_norm: 0.7932026341280534, iteration: 304702
loss: 1.0750153064727783,grad_norm: 0.9381420409227544, iteration: 304703
loss: 1.027858853340149,grad_norm: 0.9580557950565091, iteration: 304704
loss: 1.030435562133789,grad_norm: 0.9926718913750088, iteration: 304705
loss: 1.0017732381820679,grad_norm: 0.9999992054101492, iteration: 304706
loss: 1.029783844947815,grad_norm: 0.8832106138286764, iteration: 304707
loss: 1.0201983451843262,grad_norm: 0.7749512930911351, iteration: 304708
loss: 0.9947870969772339,grad_norm: 0.9208778281882984, iteration: 304709
loss: 1.0015690326690674,grad_norm: 0.9999995489282754, iteration: 304710
loss: 0.990668535232544,grad_norm: 0.7555691498710473, iteration: 304711
loss: 1.0198646783828735,grad_norm: 0.9400628333194738, iteration: 304712
loss: 1.02349853515625,grad_norm: 0.8782682912884915, iteration: 304713
loss: 0.976530909538269,grad_norm: 0.7981660065616968, iteration: 304714
loss: 1.0091736316680908,grad_norm: 0.8636318342720419, iteration: 304715
loss: 1.0177370309829712,grad_norm: 0.9321606954266386, iteration: 304716
loss: 1.0124731063842773,grad_norm: 0.9246782948532333, iteration: 304717
loss: 1.0053625106811523,grad_norm: 0.9999991264583571, iteration: 304718
loss: 0.9949840903282166,grad_norm: 0.6926372735155973, iteration: 304719
loss: 0.9899177551269531,grad_norm: 0.906895350061127, iteration: 304720
loss: 0.9909113049507141,grad_norm: 0.7747908303773838, iteration: 304721
loss: 0.9867424368858337,grad_norm: 0.8941311067938673, iteration: 304722
loss: 0.9976900219917297,grad_norm: 0.8432224722306282, iteration: 304723
loss: 0.9943259358406067,grad_norm: 0.9020734901889007, iteration: 304724
loss: 1.0526429414749146,grad_norm: 0.9999992219301755, iteration: 304725
loss: 0.9873443841934204,grad_norm: 0.7940915003433332, iteration: 304726
loss: 0.9828549027442932,grad_norm: 0.7954892924036602, iteration: 304727
loss: 1.0047369003295898,grad_norm: 0.7073320716464525, iteration: 304728
loss: 1.0162396430969238,grad_norm: 0.9999997604578865, iteration: 304729
loss: 1.0074697732925415,grad_norm: 0.8893064551604846, iteration: 304730
loss: 1.005610466003418,grad_norm: 0.9999993765499018, iteration: 304731
loss: 1.037744402885437,grad_norm: 0.9999999677657248, iteration: 304732
loss: 0.9951300024986267,grad_norm: 0.9081023177079863, iteration: 304733
loss: 1.0148495435714722,grad_norm: 0.9326930426571048, iteration: 304734
loss: 0.9877263307571411,grad_norm: 0.7945751174883892, iteration: 304735
loss: 0.9932400584220886,grad_norm: 0.9999996449531887, iteration: 304736
loss: 1.037081241607666,grad_norm: 0.8015589163671275, iteration: 304737
loss: 0.9852108955383301,grad_norm: 0.7794202568275754, iteration: 304738
loss: 0.9905853271484375,grad_norm: 0.7337985508408214, iteration: 304739
loss: 1.0263210535049438,grad_norm: 0.8248599209200078, iteration: 304740
loss: 1.0018157958984375,grad_norm: 0.9400653922189018, iteration: 304741
loss: 1.0021936893463135,grad_norm: 0.9999995238557585, iteration: 304742
loss: 1.0115481615066528,grad_norm: 0.9999996703414163, iteration: 304743
loss: 0.9798362255096436,grad_norm: 0.8133011547147987, iteration: 304744
loss: 0.9753503203392029,grad_norm: 0.8652472433616283, iteration: 304745
loss: 0.9995138645172119,grad_norm: 0.798724951830149, iteration: 304746
loss: 1.000259280204773,grad_norm: 0.8442637282044204, iteration: 304747
loss: 0.9824657440185547,grad_norm: 0.9866901001169825, iteration: 304748
loss: 0.9935874938964844,grad_norm: 0.829405789015351, iteration: 304749
loss: 1.0529276132583618,grad_norm: 0.9999993468231934, iteration: 304750
loss: 0.9975817203521729,grad_norm: 0.8221385525587931, iteration: 304751
loss: 0.9876610040664673,grad_norm: 0.8703542396404242, iteration: 304752
loss: 1.0092085599899292,grad_norm: 0.9999997514298498, iteration: 304753
loss: 0.9592253565788269,grad_norm: 0.8324250526509301, iteration: 304754
loss: 0.992299497127533,grad_norm: 0.7549190393989874, iteration: 304755
loss: 1.0104272365570068,grad_norm: 0.8948835363614971, iteration: 304756
loss: 0.979749321937561,grad_norm: 0.8439064287551935, iteration: 304757
loss: 1.055099368095398,grad_norm: 0.7447423195180191, iteration: 304758
loss: 1.0128998756408691,grad_norm: 0.9999990228833906, iteration: 304759
loss: 0.9697366952896118,grad_norm: 0.6863229664772587, iteration: 304760
loss: 0.9802958369255066,grad_norm: 0.8723290262518714, iteration: 304761
loss: 0.9690312743186951,grad_norm: 0.8282273558688928, iteration: 304762
loss: 0.9861901998519897,grad_norm: 0.9247393781361836, iteration: 304763
loss: 1.012035608291626,grad_norm: 0.7832641659745512, iteration: 304764
loss: 1.0341380834579468,grad_norm: 0.9999999053347556, iteration: 304765
loss: 0.9770088791847229,grad_norm: 0.6803291171678637, iteration: 304766
loss: 0.9839892983436584,grad_norm: 0.8116032233700343, iteration: 304767
loss: 0.9719182848930359,grad_norm: 0.8260792608413208, iteration: 304768
loss: 0.993243396282196,grad_norm: 0.8532834089368453, iteration: 304769
loss: 1.0134575366973877,grad_norm: 0.7698336549217633, iteration: 304770
loss: 0.9990100860595703,grad_norm: 0.9999990796494999, iteration: 304771
loss: 1.0149706602096558,grad_norm: 0.9999991554522255, iteration: 304772
loss: 1.077671766281128,grad_norm: 0.8711649040363835, iteration: 304773
loss: 1.00619637966156,grad_norm: 0.7929527176912944, iteration: 304774
loss: 1.0134083032608032,grad_norm: 0.9193425190913297, iteration: 304775
loss: 0.999406099319458,grad_norm: 0.7743435910433641, iteration: 304776
loss: 1.0139358043670654,grad_norm: 0.6849675313936195, iteration: 304777
loss: 1.0336005687713623,grad_norm: 0.9999990347479988, iteration: 304778
loss: 0.974453866481781,grad_norm: 0.9189908614401034, iteration: 304779
loss: 1.016584038734436,grad_norm: 0.9791187171846811, iteration: 304780
loss: 1.0381371974945068,grad_norm: 0.9999993732111933, iteration: 304781
loss: 0.9795371890068054,grad_norm: 0.8106440863407136, iteration: 304782
loss: 1.0123224258422852,grad_norm: 0.8843230225723478, iteration: 304783
loss: 0.9851575493812561,grad_norm: 0.6747877340519655, iteration: 304784
loss: 0.996249794960022,grad_norm: 0.75195186575881, iteration: 304785
loss: 1.0019431114196777,grad_norm: 0.9999999157182696, iteration: 304786
loss: 0.9943308234214783,grad_norm: 0.9999992490375614, iteration: 304787
loss: 0.9714995622634888,grad_norm: 0.999999697499306, iteration: 304788
loss: 1.0477945804595947,grad_norm: 0.999999031298667, iteration: 304789
loss: 1.0563892126083374,grad_norm: 0.9999992357486817, iteration: 304790
loss: 1.0035910606384277,grad_norm: 0.8895762209724509, iteration: 304791
loss: 1.0058248043060303,grad_norm: 0.9999992603753496, iteration: 304792
loss: 1.167453408241272,grad_norm: 0.9999998845515478, iteration: 304793
loss: 0.9847688674926758,grad_norm: 0.8426226498837376, iteration: 304794
loss: 0.9772859811782837,grad_norm: 0.8566605388082724, iteration: 304795
loss: 1.0628718137741089,grad_norm: 0.9999996832068354, iteration: 304796
loss: 0.9681267738342285,grad_norm: 0.7937067916141204, iteration: 304797
loss: 0.9933674335479736,grad_norm: 0.8780242637949904, iteration: 304798
loss: 1.0276554822921753,grad_norm: 0.8307786981084857, iteration: 304799
loss: 1.020113468170166,grad_norm: 0.8664006599936815, iteration: 304800
loss: 1.057908535003662,grad_norm: 0.9999999800825272, iteration: 304801
loss: 0.9753534197807312,grad_norm: 0.8298548355487559, iteration: 304802
loss: 1.0305228233337402,grad_norm: 0.7607223831666867, iteration: 304803
loss: 1.1261190176010132,grad_norm: 0.9999991954392301, iteration: 304804
loss: 1.0311179161071777,grad_norm: 0.798053490928117, iteration: 304805
loss: 1.0969337224960327,grad_norm: 0.9999999235518027, iteration: 304806
loss: 0.9909158945083618,grad_norm: 0.8296958349704836, iteration: 304807
loss: 1.0321234464645386,grad_norm: 0.7487515257430399, iteration: 304808
loss: 0.9979093074798584,grad_norm: 0.9999994620450189, iteration: 304809
loss: 0.9892640113830566,grad_norm: 0.9999996958085625, iteration: 304810
loss: 1.0340827703475952,grad_norm: 0.9885843555056949, iteration: 304811
loss: 0.9892439842224121,grad_norm: 0.8390873024437434, iteration: 304812
loss: 0.9999279379844666,grad_norm: 0.832104980529439, iteration: 304813
loss: 1.0124670267105103,grad_norm: 0.9999994216096775, iteration: 304814
loss: 1.024827480316162,grad_norm: 0.9162315002818943, iteration: 304815
loss: 0.9914426207542419,grad_norm: 0.8907569000485127, iteration: 304816
loss: 1.0025627613067627,grad_norm: 0.8601748453308093, iteration: 304817
loss: 1.0508277416229248,grad_norm: 0.8595604659309767, iteration: 304818
loss: 0.9779916405677795,grad_norm: 0.8611339645453717, iteration: 304819
loss: 0.974023163318634,grad_norm: 0.8824967197246115, iteration: 304820
loss: 0.9969980716705322,grad_norm: 0.990497450321354, iteration: 304821
loss: 1.0294344425201416,grad_norm: 0.8312721435506614, iteration: 304822
loss: 0.9834089279174805,grad_norm: 0.8040281663173221, iteration: 304823
loss: 1.0166287422180176,grad_norm: 0.9999989995254123, iteration: 304824
loss: 0.9857414960861206,grad_norm: 0.9441840373267544, iteration: 304825
loss: 0.9738641977310181,grad_norm: 0.9999996004561263, iteration: 304826
loss: 0.9738509058952332,grad_norm: 0.7926023788506826, iteration: 304827
loss: 1.1127935647964478,grad_norm: 0.999999223358869, iteration: 304828
loss: 0.9798145890235901,grad_norm: 0.71851361445868, iteration: 304829
loss: 0.9936739802360535,grad_norm: 0.8282479877079887, iteration: 304830
loss: 1.0210022926330566,grad_norm: 0.9999992887474967, iteration: 304831
loss: 0.9612748026847839,grad_norm: 0.8893704840594392, iteration: 304832
loss: 0.9729856848716736,grad_norm: 0.9999991233675278, iteration: 304833
loss: 0.9802571535110474,grad_norm: 0.8048006710166459, iteration: 304834
loss: 0.981847882270813,grad_norm: 0.7311362602113126, iteration: 304835
loss: 1.0388281345367432,grad_norm: 0.93455215289034, iteration: 304836
loss: 1.0045762062072754,grad_norm: 0.9999994382878696, iteration: 304837
loss: 1.1021521091461182,grad_norm: 0.9999999607082007, iteration: 304838
loss: 1.0189530849456787,grad_norm: 0.9999990563764461, iteration: 304839
loss: 0.9936451315879822,grad_norm: 0.7523796347275097, iteration: 304840
loss: 0.9973752498626709,grad_norm: 0.7899236266959863, iteration: 304841
loss: 1.0192979574203491,grad_norm: 0.6713132140222605, iteration: 304842
loss: 0.9913221001625061,grad_norm: 0.9192555304287212, iteration: 304843
loss: 0.9872217178344727,grad_norm: 0.8322241461674567, iteration: 304844
loss: 1.0354663133621216,grad_norm: 0.868677403260857, iteration: 304845
loss: 0.9901017546653748,grad_norm: 0.8063952146820643, iteration: 304846
loss: 0.9606764316558838,grad_norm: 0.9094575105385594, iteration: 304847
loss: 0.9918579459190369,grad_norm: 0.8821117911314023, iteration: 304848
loss: 0.966971755027771,grad_norm: 0.9999989854478296, iteration: 304849
loss: 0.9583122134208679,grad_norm: 0.9102525303461046, iteration: 304850
loss: 1.0089126825332642,grad_norm: 0.9999995780860101, iteration: 304851
loss: 1.004321575164795,grad_norm: 0.7478102135331746, iteration: 304852
loss: 1.0411630868911743,grad_norm: 0.9999998415092964, iteration: 304853
loss: 1.0072754621505737,grad_norm: 0.8400250232652744, iteration: 304854
loss: 1.0237092971801758,grad_norm: 0.9999992234937267, iteration: 304855
loss: 0.9814813733100891,grad_norm: 0.8931657881732716, iteration: 304856
loss: 1.0173499584197998,grad_norm: 0.8178782151953613, iteration: 304857
loss: 1.0464277267456055,grad_norm: 0.9993415666886803, iteration: 304858
loss: 1.0927584171295166,grad_norm: 0.9999993337370645, iteration: 304859
loss: 1.0106433629989624,grad_norm: 0.7834764049802163, iteration: 304860
loss: 0.9991461038589478,grad_norm: 0.8317788434044131, iteration: 304861
loss: 1.0271079540252686,grad_norm: 0.999999148800268, iteration: 304862
loss: 1.0023952722549438,grad_norm: 0.9426132962166226, iteration: 304863
loss: 0.9827627539634705,grad_norm: 0.9299283556046084, iteration: 304864
loss: 0.9670397639274597,grad_norm: 0.7917580351296862, iteration: 304865
loss: 1.0294196605682373,grad_norm: 0.972277962218234, iteration: 304866
loss: 1.008105993270874,grad_norm: 0.96466310460029, iteration: 304867
loss: 1.0265973806381226,grad_norm: 0.9999992776515988, iteration: 304868
loss: 1.0127522945404053,grad_norm: 0.9999990753051806, iteration: 304869
loss: 0.9912999272346497,grad_norm: 0.999999192933221, iteration: 304870
loss: 1.0102869272232056,grad_norm: 0.9999991419122926, iteration: 304871
loss: 0.9744531512260437,grad_norm: 0.9575212722351628, iteration: 304872
loss: 0.9799620509147644,grad_norm: 0.8106316253010496, iteration: 304873
loss: 1.048653483390808,grad_norm: 0.9999990963571084, iteration: 304874
loss: 1.0558918714523315,grad_norm: 0.9604055454203724, iteration: 304875
loss: 1.008778691291809,grad_norm: 0.9909497139701685, iteration: 304876
loss: 1.0097715854644775,grad_norm: 0.8987268519535865, iteration: 304877
loss: 0.9698182344436646,grad_norm: 0.9532689534584251, iteration: 304878
loss: 1.0392909049987793,grad_norm: 0.8483248939129049, iteration: 304879
loss: 1.0363417863845825,grad_norm: 0.6882587343272922, iteration: 304880
loss: 1.026627779006958,grad_norm: 0.7984024724283544, iteration: 304881
loss: 1.042777419090271,grad_norm: 0.9999999486973217, iteration: 304882
loss: 0.9805209040641785,grad_norm: 0.8803803679406489, iteration: 304883
loss: 1.0119160413742065,grad_norm: 0.7758225702588772, iteration: 304884
loss: 0.9820106625556946,grad_norm: 0.7688252561530253, iteration: 304885
loss: 0.9938269257545471,grad_norm: 0.8627743348628998, iteration: 304886
loss: 1.0308290719985962,grad_norm: 0.8414958089746284, iteration: 304887
loss: 1.037162184715271,grad_norm: 0.814820842955266, iteration: 304888
loss: 0.9755829572677612,grad_norm: 0.9999990754636331, iteration: 304889
loss: 1.0382274389266968,grad_norm: 0.8034000967672915, iteration: 304890
loss: 0.9944502115249634,grad_norm: 0.999999083958631, iteration: 304891
loss: 1.0372319221496582,grad_norm: 0.9999994269323251, iteration: 304892
loss: 1.0057904720306396,grad_norm: 0.8666729842050781, iteration: 304893
loss: 0.9730224609375,grad_norm: 0.8313998127320514, iteration: 304894
loss: 0.9639163613319397,grad_norm: 0.9694387610333385, iteration: 304895
loss: 1.0146454572677612,grad_norm: 0.825004968488311, iteration: 304896
loss: 1.1092135906219482,grad_norm: 0.9868515725030133, iteration: 304897
loss: 0.991385281085968,grad_norm: 0.9136928549897523, iteration: 304898
loss: 1.0146167278289795,grad_norm: 0.8411438060766636, iteration: 304899
loss: 1.0019726753234863,grad_norm: 0.8821595502504455, iteration: 304900
loss: 0.999857485294342,grad_norm: 0.7819720505994021, iteration: 304901
loss: 1.0095863342285156,grad_norm: 0.8534735353003311, iteration: 304902
loss: 0.9783048629760742,grad_norm: 0.9999991009722664, iteration: 304903
loss: 1.024736762046814,grad_norm: 0.88407325638063, iteration: 304904
loss: 1.0109213590621948,grad_norm: 0.999999553961309, iteration: 304905
loss: 0.9982779026031494,grad_norm: 0.8116198673941597, iteration: 304906
loss: 1.0264499187469482,grad_norm: 0.8095222593598357, iteration: 304907
loss: 1.0029449462890625,grad_norm: 0.9151384644819387, iteration: 304908
loss: 0.9966123700141907,grad_norm: 0.8311072160783463, iteration: 304909
loss: 1.0189175605773926,grad_norm: 0.9425063464991386, iteration: 304910
loss: 0.9734808206558228,grad_norm: 0.9999991561554511, iteration: 304911
loss: 1.0117391347885132,grad_norm: 0.9999989522233718, iteration: 304912
loss: 1.0186753273010254,grad_norm: 0.8962269707157342, iteration: 304913
loss: 1.016011357307434,grad_norm: 0.7546427069491991, iteration: 304914
loss: 0.9728688597679138,grad_norm: 0.9709166265291715, iteration: 304915
loss: 0.9992572069168091,grad_norm: 0.7774469598043467, iteration: 304916
loss: 1.0428974628448486,grad_norm: 0.8970106590453819, iteration: 304917
loss: 0.9680336117744446,grad_norm: 0.7973664813301318, iteration: 304918
loss: 0.9977641701698303,grad_norm: 0.7176851474021219, iteration: 304919
loss: 1.0030608177185059,grad_norm: 0.9999993835905696, iteration: 304920
loss: 0.980581521987915,grad_norm: 0.787761815466019, iteration: 304921
loss: 0.9881043434143066,grad_norm: 0.9281295175197282, iteration: 304922
loss: 0.9748083353042603,grad_norm: 0.8166539574446937, iteration: 304923
loss: 1.0022249221801758,grad_norm: 0.7474173732937744, iteration: 304924
loss: 0.9823206067085266,grad_norm: 0.9149224841164675, iteration: 304925
loss: 1.0028260946273804,grad_norm: 0.7548898574867323, iteration: 304926
loss: 1.0804572105407715,grad_norm: 0.9999995944687168, iteration: 304927
loss: 0.9971299767494202,grad_norm: 0.6807555001573885, iteration: 304928
loss: 1.0418599843978882,grad_norm: 0.9999993935721536, iteration: 304929
loss: 1.0543543100357056,grad_norm: 0.9999993634260639, iteration: 304930
loss: 1.0059337615966797,grad_norm: 0.8673305944711749, iteration: 304931
loss: 0.9873241782188416,grad_norm: 0.7850234171393539, iteration: 304932
loss: 0.9986787438392639,grad_norm: 0.9999990015361934, iteration: 304933
loss: 0.9728501439094543,grad_norm: 0.89409773459153, iteration: 304934
loss: 1.0297132730484009,grad_norm: 0.9999990425811481, iteration: 304935
loss: 1.1329649686813354,grad_norm: 0.9999994301127346, iteration: 304936
loss: 1.115039587020874,grad_norm: 0.8842596153917031, iteration: 304937
loss: 1.019838809967041,grad_norm: 0.7804640513588799, iteration: 304938
loss: 1.0508579015731812,grad_norm: 0.9999991762317382, iteration: 304939
loss: 1.0905400514602661,grad_norm: 0.9999992056255548, iteration: 304940
loss: 0.9839856624603271,grad_norm: 0.9313952002188306, iteration: 304941
loss: 1.0276374816894531,grad_norm: 0.9999993176835156, iteration: 304942
loss: 0.9747235178947449,grad_norm: 0.9999992614294009, iteration: 304943
loss: 1.011531114578247,grad_norm: 0.9999999653964874, iteration: 304944
loss: 1.0091713666915894,grad_norm: 0.8580806007110815, iteration: 304945
loss: 1.0247327089309692,grad_norm: 0.999998997464598, iteration: 304946
loss: 1.0850180387496948,grad_norm: 0.9999991061223653, iteration: 304947
loss: 1.0105422735214233,grad_norm: 0.8098284812720534, iteration: 304948
loss: 1.0628217458724976,grad_norm: 0.999999058887629, iteration: 304949
loss: 1.0187305212020874,grad_norm: 0.9999994318201245, iteration: 304950
loss: 1.0272668600082397,grad_norm: 0.7959409814021311, iteration: 304951
loss: 0.9862764477729797,grad_norm: 0.9677178488669784, iteration: 304952
loss: 1.0280139446258545,grad_norm: 0.9999995125007332, iteration: 304953
loss: 1.1097303628921509,grad_norm: 1.0000000907937965, iteration: 304954
loss: 1.0949270725250244,grad_norm: 0.9999992727078256, iteration: 304955
loss: 1.0047212839126587,grad_norm: 0.9999991911210823, iteration: 304956
loss: 1.0079479217529297,grad_norm: 0.7569188061094199, iteration: 304957
loss: 1.0300488471984863,grad_norm: 0.9999999036789364, iteration: 304958
loss: 1.0051854848861694,grad_norm: 0.9149753290425109, iteration: 304959
loss: 1.0522725582122803,grad_norm: 0.9999996685121159, iteration: 304960
loss: 1.033845067024231,grad_norm: 0.7714881115678057, iteration: 304961
loss: 1.0096514225006104,grad_norm: 0.9999998570123482, iteration: 304962
loss: 1.0080697536468506,grad_norm: 0.7882090144916937, iteration: 304963
loss: 1.081631064414978,grad_norm: 0.9999997543390223, iteration: 304964
loss: 1.223565936088562,grad_norm: 0.9999993430758567, iteration: 304965
loss: 1.0252732038497925,grad_norm: 0.987782039514986, iteration: 304966
loss: 1.0485914945602417,grad_norm: 0.9999995366149885, iteration: 304967
loss: 1.1085606813430786,grad_norm: 0.9999992041828971, iteration: 304968
loss: 0.9664666652679443,grad_norm: 0.8371737342881473, iteration: 304969
loss: 1.0665318965911865,grad_norm: 0.9999990077812904, iteration: 304970
loss: 0.9995480179786682,grad_norm: 0.7669756197726999, iteration: 304971
loss: 1.0621649026870728,grad_norm: 0.9999995080835477, iteration: 304972
loss: 0.99922776222229,grad_norm: 0.9999999704288978, iteration: 304973
loss: 1.0289839506149292,grad_norm: 0.9999991702434271, iteration: 304974
loss: 0.9997599124908447,grad_norm: 0.9477618070121681, iteration: 304975
loss: 0.9926276803016663,grad_norm: 0.6328013369836485, iteration: 304976
loss: 0.9626525640487671,grad_norm: 0.7980461900861553, iteration: 304977
loss: 0.9860244989395142,grad_norm: 0.7246009484808795, iteration: 304978
loss: 1.0690891742706299,grad_norm: 0.9999990205122018, iteration: 304979
loss: 1.119955062866211,grad_norm: 0.9999997814717939, iteration: 304980
loss: 0.9901329278945923,grad_norm: 0.9233567055165562, iteration: 304981
loss: 0.9737995862960815,grad_norm: 0.8510907903519079, iteration: 304982
loss: 1.0115810632705688,grad_norm: 0.7899858395819973, iteration: 304983
loss: 1.0650124549865723,grad_norm: 0.8475156304738335, iteration: 304984
loss: 1.0051904916763306,grad_norm: 0.9117724640648492, iteration: 304985
loss: 0.9886457324028015,grad_norm: 0.8228141948833432, iteration: 304986
loss: 1.036570429801941,grad_norm: 0.9999991167241938, iteration: 304987
loss: 0.9793393015861511,grad_norm: 0.7552140762556696, iteration: 304988
loss: 0.9596827626228333,grad_norm: 0.9274187779087328, iteration: 304989
loss: 1.0073108673095703,grad_norm: 0.9986585809448032, iteration: 304990
loss: 1.0813658237457275,grad_norm: 0.9999992177395814, iteration: 304991
loss: 0.996789276599884,grad_norm: 0.8581658128546515, iteration: 304992
loss: 1.0119518041610718,grad_norm: 0.9759902405586645, iteration: 304993
loss: 1.0190695524215698,grad_norm: 0.9999993279765772, iteration: 304994
loss: 1.016679286956787,grad_norm: 0.7629945163710432, iteration: 304995
loss: 1.0041793584823608,grad_norm: 0.8886560352641185, iteration: 304996
loss: 1.0868332386016846,grad_norm: 0.9334442654178606, iteration: 304997
loss: 0.990677535533905,grad_norm: 0.8134748530513981, iteration: 304998
loss: 1.014228105545044,grad_norm: 0.999999554943481, iteration: 304999
loss: 1.0076764822006226,grad_norm: 0.8206815014448305, iteration: 305000
loss: 0.9953832030296326,grad_norm: 0.926844254814124, iteration: 305001
loss: 0.9971495270729065,grad_norm: 0.7838369734814099, iteration: 305002
loss: 1.0083082914352417,grad_norm: 0.9605665917357832, iteration: 305003
loss: 1.1921452283859253,grad_norm: 0.9999993892312444, iteration: 305004
loss: 1.036424994468689,grad_norm: 0.9999998198856709, iteration: 305005
loss: 1.0049870014190674,grad_norm: 0.9999990322653584, iteration: 305006
loss: 0.992554783821106,grad_norm: 0.7749141708992594, iteration: 305007
loss: 1.0684480667114258,grad_norm: 0.9913022399362927, iteration: 305008
loss: 0.9630622863769531,grad_norm: 0.7504651142021306, iteration: 305009
loss: 1.0535359382629395,grad_norm: 0.7073010232065635, iteration: 305010
loss: 1.0011274814605713,grad_norm: 0.7883284248814894, iteration: 305011
loss: 1.0195971727371216,grad_norm: 0.9999992928859326, iteration: 305012
loss: 1.016249418258667,grad_norm: 0.999999219471125, iteration: 305013
loss: 1.1206587553024292,grad_norm: 0.999999252522571, iteration: 305014
loss: 0.9795604348182678,grad_norm: 0.8056730172364223, iteration: 305015
loss: 1.0277241468429565,grad_norm: 0.8654784686257292, iteration: 305016
loss: 0.9952718019485474,grad_norm: 0.999999323411281, iteration: 305017
loss: 1.0329827070236206,grad_norm: 0.9662399015247688, iteration: 305018
loss: 1.0307869911193848,grad_norm: 0.881623241912651, iteration: 305019
loss: 1.0187150239944458,grad_norm: 0.8712625247850476, iteration: 305020
loss: 0.9761327505111694,grad_norm: 0.8062652155081788, iteration: 305021
loss: 1.0632835626602173,grad_norm: 0.9999989996749447, iteration: 305022
loss: 0.9884158372879028,grad_norm: 0.7875683841594697, iteration: 305023
loss: 0.9654379487037659,grad_norm: 0.9617783196910972, iteration: 305024
loss: 1.0029948949813843,grad_norm: 0.9252285575251964, iteration: 305025
loss: 1.0035618543624878,grad_norm: 0.8451900934237085, iteration: 305026
loss: 0.9841004610061646,grad_norm: 0.7958579081358317, iteration: 305027
loss: 1.0239155292510986,grad_norm: 0.704119058108818, iteration: 305028
loss: 0.9781890511512756,grad_norm: 0.9011762484510121, iteration: 305029
loss: 1.0963563919067383,grad_norm: 0.999999515054441, iteration: 305030
loss: 1.003809928894043,grad_norm: 0.8906604360787889, iteration: 305031
loss: 0.9889625310897827,grad_norm: 0.8586806710135924, iteration: 305032
loss: 0.9958887100219727,grad_norm: 0.8646858571687395, iteration: 305033
loss: 1.045102596282959,grad_norm: 0.9999993376753695, iteration: 305034
loss: 1.0226141214370728,grad_norm: 0.8337082774397911, iteration: 305035
loss: 1.0809791088104248,grad_norm: 1.0000000091167218, iteration: 305036
loss: 0.9888251423835754,grad_norm: 0.768807769803023, iteration: 305037
loss: 1.0306707620620728,grad_norm: 0.890664065438552, iteration: 305038
loss: 1.014694094657898,grad_norm: 0.8611539862759344, iteration: 305039
loss: 1.0206005573272705,grad_norm: 0.9999989135799261, iteration: 305040
loss: 0.9957128763198853,grad_norm: 0.7914637840598976, iteration: 305041
loss: 1.0281593799591064,grad_norm: 0.9999998082664898, iteration: 305042
loss: 0.9787262678146362,grad_norm: 0.813100031681949, iteration: 305043
loss: 1.021240234375,grad_norm: 0.8564327397277789, iteration: 305044
loss: 0.9776123762130737,grad_norm: 0.7535285766940709, iteration: 305045
loss: 0.9988370537757874,grad_norm: 0.7999012824652516, iteration: 305046
loss: 1.1232515573501587,grad_norm: 0.9999999587018684, iteration: 305047
loss: 0.9770656824111938,grad_norm: 0.999999017446851, iteration: 305048
loss: 0.9913192987442017,grad_norm: 0.8215330026479881, iteration: 305049
loss: 1.0296132564544678,grad_norm: 0.7922406288695718, iteration: 305050
loss: 0.9839305877685547,grad_norm: 0.8789328289959514, iteration: 305051
loss: 1.0071438550949097,grad_norm: 0.999999923408228, iteration: 305052
loss: 1.0118000507354736,grad_norm: 0.9999990584959824, iteration: 305053
loss: 0.9978379011154175,grad_norm: 0.8885671117765317, iteration: 305054
loss: 1.0551934242248535,grad_norm: 0.9999991460497765, iteration: 305055
loss: 1.0688250064849854,grad_norm: 0.8582746026302199, iteration: 305056
loss: 1.0279033184051514,grad_norm: 0.8365031944542574, iteration: 305057
loss: 0.9827605485916138,grad_norm: 0.7983091832818613, iteration: 305058
loss: 0.9536678194999695,grad_norm: 0.873909193670342, iteration: 305059
loss: 0.9908445477485657,grad_norm: 0.8943237764425404, iteration: 305060
loss: 0.9935831427574158,grad_norm: 0.9405946810121288, iteration: 305061
loss: 1.0301837921142578,grad_norm: 0.9999991467607658, iteration: 305062
loss: 1.0158308744430542,grad_norm: 0.8515296645586088, iteration: 305063
loss: 1.0085872411727905,grad_norm: 0.9442884900938446, iteration: 305064
loss: 1.0235393047332764,grad_norm: 0.9999990516765392, iteration: 305065
loss: 0.9742868542671204,grad_norm: 0.9999991203680613, iteration: 305066
loss: 1.0464915037155151,grad_norm: 0.9999991339707336, iteration: 305067
loss: 0.9914118647575378,grad_norm: 0.745141919957159, iteration: 305068
loss: 1.0093953609466553,grad_norm: 0.9074265884425222, iteration: 305069
loss: 0.9771389961242676,grad_norm: 0.8751203036772609, iteration: 305070
loss: 1.0045459270477295,grad_norm: 0.661599317775232, iteration: 305071
loss: 0.9863025546073914,grad_norm: 0.8324681710185249, iteration: 305072
loss: 1.0131001472473145,grad_norm: 0.830762479383119, iteration: 305073
loss: 0.9942153692245483,grad_norm: 0.9822732084108953, iteration: 305074
loss: 1.0252450704574585,grad_norm: 0.8733644300943151, iteration: 305075
loss: 1.0297173261642456,grad_norm: 0.8031334222029504, iteration: 305076
loss: 1.0010321140289307,grad_norm: 0.930504906995067, iteration: 305077
loss: 0.9980453252792358,grad_norm: 0.9868198571008983, iteration: 305078
loss: 1.0460875034332275,grad_norm: 0.999999197007749, iteration: 305079
loss: 0.9803717732429504,grad_norm: 0.7575436550239739, iteration: 305080
loss: 1.0132113695144653,grad_norm: 0.890573752797746, iteration: 305081
loss: 1.0058541297912598,grad_norm: 0.8002044600429438, iteration: 305082
loss: 1.014430046081543,grad_norm: 0.8210153042092504, iteration: 305083
loss: 0.9907777905464172,grad_norm: 0.9099187268399499, iteration: 305084
loss: 1.053901195526123,grad_norm: 0.8657243769207226, iteration: 305085
loss: 1.0749156475067139,grad_norm: 0.8510216690323583, iteration: 305086
loss: 0.9990297555923462,grad_norm: 0.8419626321543352, iteration: 305087
loss: 1.093338131904602,grad_norm: 0.9999991473169058, iteration: 305088
loss: 1.0547196865081787,grad_norm: 0.8986545132007288, iteration: 305089
loss: 0.9864696860313416,grad_norm: 0.813896988838888, iteration: 305090
loss: 0.9935672283172607,grad_norm: 0.9999993395128358, iteration: 305091
loss: 1.028024673461914,grad_norm: 0.7872585917489168, iteration: 305092
loss: 0.99669349193573,grad_norm: 0.9999990714413938, iteration: 305093
loss: 0.9621878862380981,grad_norm: 0.7970099113788731, iteration: 305094
loss: 0.9453055262565613,grad_norm: 0.9343576102375339, iteration: 305095
loss: 0.9869446158409119,grad_norm: 0.8696068891164924, iteration: 305096
loss: 0.9949142932891846,grad_norm: 0.8232742302005488, iteration: 305097
loss: 0.9902448654174805,grad_norm: 0.7596700686111526, iteration: 305098
loss: 1.0019875764846802,grad_norm: 0.8067772038870238, iteration: 305099
loss: 1.0138901472091675,grad_norm: 0.8193001254628194, iteration: 305100
loss: 0.9724957346916199,grad_norm: 0.6836635335056397, iteration: 305101
loss: 0.9591093063354492,grad_norm: 0.8571868780261276, iteration: 305102
loss: 0.9896923303604126,grad_norm: 0.8637211486167664, iteration: 305103
loss: 0.9872438907623291,grad_norm: 0.9409258987983392, iteration: 305104
loss: 0.9905409812927246,grad_norm: 0.9999991282582638, iteration: 305105
loss: 1.0196031332015991,grad_norm: 0.9291922101416711, iteration: 305106
loss: 1.0394772291183472,grad_norm: 0.9535137714866145, iteration: 305107
loss: 0.9882062673568726,grad_norm: 0.7117268722098838, iteration: 305108
loss: 0.9874587655067444,grad_norm: 0.7429412644554197, iteration: 305109
loss: 0.9760432839393616,grad_norm: 0.8886620533898882, iteration: 305110
loss: 1.0150847434997559,grad_norm: 0.9999995700519515, iteration: 305111
loss: 1.0319644212722778,grad_norm: 0.8465540032697368, iteration: 305112
loss: 0.9953603148460388,grad_norm: 0.7334411796134762, iteration: 305113
loss: 1.0186207294464111,grad_norm: 0.9999998333427281, iteration: 305114
loss: 0.9787307977676392,grad_norm: 0.8109419690894217, iteration: 305115
loss: 1.0481129884719849,grad_norm: 0.9999992464002739, iteration: 305116
loss: 1.0264378786087036,grad_norm: 0.9999991578578568, iteration: 305117
loss: 1.0049070119857788,grad_norm: 0.7703508327135898, iteration: 305118
loss: 1.010686993598938,grad_norm: 0.8531655311393115, iteration: 305119
loss: 1.011971116065979,grad_norm: 0.9043838727502925, iteration: 305120
loss: 1.001879096031189,grad_norm: 0.8987681201336599, iteration: 305121
loss: 1.0090656280517578,grad_norm: 0.9423896058789324, iteration: 305122
loss: 1.0149427652359009,grad_norm: 0.9128880752537366, iteration: 305123
loss: 0.9989131093025208,grad_norm: 0.9999990904023087, iteration: 305124
loss: 0.9992192387580872,grad_norm: 0.8921713874812476, iteration: 305125
loss: 1.0018439292907715,grad_norm: 0.7228487946907078, iteration: 305126
loss: 0.9646744132041931,grad_norm: 0.8680632397799489, iteration: 305127
loss: 0.9807400703430176,grad_norm: 0.9999991790155114, iteration: 305128
loss: 1.0018868446350098,grad_norm: 0.9999991612790211, iteration: 305129
loss: 1.0153602361679077,grad_norm: 0.9999995000282375, iteration: 305130
loss: 1.0053235292434692,grad_norm: 0.7721568193588574, iteration: 305131
loss: 1.0001996755599976,grad_norm: 0.8568048771722031, iteration: 305132
loss: 0.9977550506591797,grad_norm: 0.767670456674456, iteration: 305133
loss: 0.9778065085411072,grad_norm: 0.9637727455159937, iteration: 305134
loss: 0.9890276789665222,grad_norm: 0.7388499807931906, iteration: 305135
loss: 0.9905704259872437,grad_norm: 0.7771692134856695, iteration: 305136
loss: 1.0237032175064087,grad_norm: 0.880620877512516, iteration: 305137
loss: 1.0243794918060303,grad_norm: 0.9311607473561924, iteration: 305138
loss: 1.0155768394470215,grad_norm: 0.7760527026819927, iteration: 305139
loss: 1.0548962354660034,grad_norm: 0.999999173696094, iteration: 305140
loss: 1.021464228630066,grad_norm: 0.938161476005301, iteration: 305141
loss: 0.9876991510391235,grad_norm: 0.748971904276078, iteration: 305142
loss: 0.9829533696174622,grad_norm: 0.8775968796686271, iteration: 305143
loss: 0.9804345965385437,grad_norm: 0.9999992161788748, iteration: 305144
loss: 0.9920312166213989,grad_norm: 0.9999991977532009, iteration: 305145
loss: 1.027287483215332,grad_norm: 0.9496795713110856, iteration: 305146
loss: 0.9903431534767151,grad_norm: 0.8161665586283552, iteration: 305147
loss: 0.981309175491333,grad_norm: 0.691998778333284, iteration: 305148
loss: 1.0537378787994385,grad_norm: 0.8573840153051381, iteration: 305149
loss: 1.0235168933868408,grad_norm: 0.9999995927577602, iteration: 305150
loss: 0.9974798560142517,grad_norm: 0.999998976780374, iteration: 305151
loss: 0.9851425290107727,grad_norm: 0.9999990932856199, iteration: 305152
loss: 1.0130689144134521,grad_norm: 0.8693776815154401, iteration: 305153
loss: 0.9549828171730042,grad_norm: 0.8476200140610431, iteration: 305154
loss: 1.0164765119552612,grad_norm: 0.968244448015921, iteration: 305155
loss: 1.0774613618850708,grad_norm: 0.9999993619048588, iteration: 305156
loss: 0.9760016202926636,grad_norm: 0.8518409821272671, iteration: 305157
loss: 1.0403491258621216,grad_norm: 0.8420116303443695, iteration: 305158
loss: 0.9727601408958435,grad_norm: 0.754510548498794, iteration: 305159
loss: 1.0232360363006592,grad_norm: 0.8173061651656743, iteration: 305160
loss: 0.9950067400932312,grad_norm: 0.8152712700540737, iteration: 305161
loss: 1.0365370512008667,grad_norm: 0.8876131276406884, iteration: 305162
loss: 0.9980603456497192,grad_norm: 0.7885207294605923, iteration: 305163
loss: 1.0163377523422241,grad_norm: 0.9065968707241491, iteration: 305164
loss: 0.9808179140090942,grad_norm: 0.7835698049483185, iteration: 305165
loss: 0.9892956018447876,grad_norm: 0.7844875774627167, iteration: 305166
loss: 1.0521419048309326,grad_norm: 0.9999998747759418, iteration: 305167
loss: 1.0416029691696167,grad_norm: 0.724753406251859, iteration: 305168
loss: 1.1019513607025146,grad_norm: 0.9999992217446185, iteration: 305169
loss: 0.9880155324935913,grad_norm: 0.8258639892513716, iteration: 305170
loss: 1.0193655490875244,grad_norm: 0.9658350649374898, iteration: 305171
loss: 1.0011149644851685,grad_norm: 0.8694529192990172, iteration: 305172
loss: 0.9956536293029785,grad_norm: 0.7871010064813122, iteration: 305173
loss: 0.978578507900238,grad_norm: 0.7212121162686945, iteration: 305174
loss: 1.0467770099639893,grad_norm: 0.7573416287077109, iteration: 305175
loss: 0.9899834394454956,grad_norm: 0.8238406347713958, iteration: 305176
loss: 1.0338449478149414,grad_norm: 0.9159508694593033, iteration: 305177
loss: 1.0092908143997192,grad_norm: 0.9237046347467964, iteration: 305178
loss: 0.9675021171569824,grad_norm: 0.9508822886165588, iteration: 305179
loss: 1.0079495906829834,grad_norm: 0.6930998673860064, iteration: 305180
loss: 1.0901461839675903,grad_norm: 0.9318288021100903, iteration: 305181
loss: 0.9602669477462769,grad_norm: 0.721252867562775, iteration: 305182
loss: 1.0133306980133057,grad_norm: 0.9999991467889104, iteration: 305183
loss: 1.0231411457061768,grad_norm: 0.8140169164484863, iteration: 305184
loss: 1.0197601318359375,grad_norm: 0.9999999032795269, iteration: 305185
loss: 1.0503294467926025,grad_norm: 0.9856360056524284, iteration: 305186
loss: 0.9936245679855347,grad_norm: 0.9357540833715892, iteration: 305187
loss: 1.03120756149292,grad_norm: 0.910114409259021, iteration: 305188
loss: 0.9794831871986389,grad_norm: 0.7525249049036264, iteration: 305189
loss: 1.0516058206558228,grad_norm: 0.9450371526093658, iteration: 305190
loss: 0.9904712438583374,grad_norm: 0.9608794047601715, iteration: 305191
loss: 1.058785080909729,grad_norm: 0.911562184013969, iteration: 305192
loss: 1.000848412513733,grad_norm: 0.999999125670515, iteration: 305193
loss: 1.0409514904022217,grad_norm: 0.999999079090676, iteration: 305194
loss: 1.0317621231079102,grad_norm: 0.9999999120562418, iteration: 305195
loss: 0.979569673538208,grad_norm: 0.9999989823658701, iteration: 305196
loss: 0.9858381748199463,grad_norm: 0.8259648583462665, iteration: 305197
loss: 1.052097201347351,grad_norm: 1.000000044837186, iteration: 305198
loss: 1.0267343521118164,grad_norm: 0.9999990033626837, iteration: 305199
loss: 1.0223462581634521,grad_norm: 0.9999991610524108, iteration: 305200
loss: 1.0292807817459106,grad_norm: 0.9701226893994842, iteration: 305201
loss: 1.0159343481063843,grad_norm: 0.8038435855429702, iteration: 305202
loss: 1.0006195306777954,grad_norm: 0.9999990898123449, iteration: 305203
loss: 1.068802833557129,grad_norm: 0.999999162670309, iteration: 305204
loss: 0.9882450699806213,grad_norm: 0.8221035512152268, iteration: 305205
loss: 0.997812032699585,grad_norm: 0.9999990626624506, iteration: 305206
loss: 0.9846881628036499,grad_norm: 0.9999994332932386, iteration: 305207
loss: 0.9936961531639099,grad_norm: 0.9096648698632159, iteration: 305208
loss: 1.0177083015441895,grad_norm: 0.7938371950301674, iteration: 305209
loss: 1.074782371520996,grad_norm: 0.9999991530302749, iteration: 305210
loss: 1.0162564516067505,grad_norm: 0.9024789611223732, iteration: 305211
loss: 1.0916752815246582,grad_norm: 0.9999995637995103, iteration: 305212
loss: 1.013034701347351,grad_norm: 0.9999994717619614, iteration: 305213
loss: 1.0095995664596558,grad_norm: 0.7610070022818507, iteration: 305214
loss: 0.9645726680755615,grad_norm: 0.6816670855838343, iteration: 305215
loss: 1.045302152633667,grad_norm: 0.8546202144471668, iteration: 305216
loss: 0.999453604221344,grad_norm: 0.9999990795158015, iteration: 305217
loss: 0.9889043569564819,grad_norm: 0.8323048837494839, iteration: 305218
loss: 1.0312740802764893,grad_norm: 0.8976387394125775, iteration: 305219
loss: 1.0886240005493164,grad_norm: 0.9999991989813394, iteration: 305220
loss: 0.9936807751655579,grad_norm: 0.8354880060245248, iteration: 305221
loss: 0.958777129650116,grad_norm: 0.7293245295090901, iteration: 305222
loss: 1.0231876373291016,grad_norm: 0.8600363921411979, iteration: 305223
loss: 0.9829137921333313,grad_norm: 0.8423520200937374, iteration: 305224
loss: 0.9885944128036499,grad_norm: 0.8581071085510192, iteration: 305225
loss: 1.0248761177062988,grad_norm: 0.8215753481226954, iteration: 305226
loss: 1.0220412015914917,grad_norm: 0.8938601831892435, iteration: 305227
loss: 1.0455775260925293,grad_norm: 0.9999996358452686, iteration: 305228
loss: 1.029502272605896,grad_norm: 0.9758001361225956, iteration: 305229
loss: 0.982042133808136,grad_norm: 0.7183452170611722, iteration: 305230
loss: 0.981797993183136,grad_norm: 0.9999991992668128, iteration: 305231
loss: 0.9762638211250305,grad_norm: 0.8243572259301719, iteration: 305232
loss: 1.0224024057388306,grad_norm: 0.7733514309296291, iteration: 305233
loss: 0.9979051351547241,grad_norm: 0.8379108011649751, iteration: 305234
loss: 1.0356948375701904,grad_norm: 0.9793465362346518, iteration: 305235
loss: 1.0268418788909912,grad_norm: 0.9999990719977404, iteration: 305236
loss: 1.159929871559143,grad_norm: 0.9999998116157044, iteration: 305237
loss: 1.0483022928237915,grad_norm: 0.9999990434343257, iteration: 305238
loss: 1.0283455848693848,grad_norm: 0.9999991476577533, iteration: 305239
loss: 0.9637945890426636,grad_norm: 0.8078972277586098, iteration: 305240
loss: 0.998279333114624,grad_norm: 0.7256834672246485, iteration: 305241
loss: 1.0446999073028564,grad_norm: 0.9999999743298483, iteration: 305242
loss: 0.9814226031303406,grad_norm: 0.6773074174337592, iteration: 305243
loss: 0.9633824229240417,grad_norm: 0.7401786844301815, iteration: 305244
loss: 0.9856655597686768,grad_norm: 0.8159176968844225, iteration: 305245
loss: 1.037263035774231,grad_norm: 0.9771023148772386, iteration: 305246
loss: 1.058964729309082,grad_norm: 0.9999991896325865, iteration: 305247
loss: 1.006850004196167,grad_norm: 0.9566909499317051, iteration: 305248
loss: 0.9592571258544922,grad_norm: 0.8060210654799713, iteration: 305249
loss: 0.9469106197357178,grad_norm: 0.8293341360460126, iteration: 305250
loss: 0.9645735025405884,grad_norm: 0.999999007953351, iteration: 305251
loss: 0.9584447741508484,grad_norm: 0.9999991675792255, iteration: 305252
loss: 1.0147607326507568,grad_norm: 0.9057144311988422, iteration: 305253
loss: 0.9658665060997009,grad_norm: 0.8711431237353351, iteration: 305254
loss: 0.9919249415397644,grad_norm: 0.9999994399991928, iteration: 305255
loss: 1.0586655139923096,grad_norm: 0.9999991413419005, iteration: 305256
loss: 1.0977540016174316,grad_norm: 0.9999992127616808, iteration: 305257
loss: 0.9932478070259094,grad_norm: 0.8735575385105285, iteration: 305258
loss: 0.9982276558876038,grad_norm: 0.9999997168800187, iteration: 305259
loss: 0.9305874109268188,grad_norm: 0.9683277675950219, iteration: 305260
loss: 1.0069009065628052,grad_norm: 0.8953128327805516, iteration: 305261
loss: 1.0330240726470947,grad_norm: 0.7874270133720273, iteration: 305262
loss: 0.9810786843299866,grad_norm: 0.9999999619232791, iteration: 305263
loss: 1.1104315519332886,grad_norm: 0.999999225478448, iteration: 305264
loss: 0.9853674173355103,grad_norm: 0.8012513539387635, iteration: 305265
loss: 1.0004924535751343,grad_norm: 0.7792498604007126, iteration: 305266
loss: 0.9948146343231201,grad_norm: 0.7561551464075487, iteration: 305267
loss: 1.1113462448120117,grad_norm: 0.9999998340970625, iteration: 305268
loss: 1.0286192893981934,grad_norm: 0.9999991410111573, iteration: 305269
loss: 0.974290668964386,grad_norm: 0.8160707044460247, iteration: 305270
loss: 1.0686049461364746,grad_norm: 0.9999990113715842, iteration: 305271
loss: 1.0329399108886719,grad_norm: 0.9999991505484922, iteration: 305272
loss: 1.0259771347045898,grad_norm: 0.8563833611381256, iteration: 305273
loss: 1.0656546354293823,grad_norm: 0.9999991232572475, iteration: 305274
loss: 0.9678129553794861,grad_norm: 0.794392638197071, iteration: 305275
loss: 1.0009950399398804,grad_norm: 0.8684215722904784, iteration: 305276
loss: 1.1186869144439697,grad_norm: 0.9999995237166011, iteration: 305277
loss: 1.0770927667617798,grad_norm: 0.999999231319288, iteration: 305278
loss: 0.9959393739700317,grad_norm: 0.9553086593468486, iteration: 305279
loss: 0.9652751088142395,grad_norm: 0.9048860846315833, iteration: 305280
loss: 1.0081250667572021,grad_norm: 0.7530715337061603, iteration: 305281
loss: 1.0180613994598389,grad_norm: 0.8578996267206894, iteration: 305282
loss: 1.0720818042755127,grad_norm: 0.920164750693985, iteration: 305283
loss: 0.9837959408760071,grad_norm: 0.7327593200719918, iteration: 305284
loss: 1.0964804887771606,grad_norm: 0.9999989617470045, iteration: 305285
loss: 0.9554376006126404,grad_norm: 0.987637606365432, iteration: 305286
loss: 1.0280838012695312,grad_norm: 0.9999999352027872, iteration: 305287
loss: 1.0202571153640747,grad_norm: 0.9999999447620657, iteration: 305288
loss: 1.3377835750579834,grad_norm: 0.9999998413728937, iteration: 305289
loss: 1.008309245109558,grad_norm: 0.9324976850836769, iteration: 305290
loss: 1.0179626941680908,grad_norm: 0.8523537974078002, iteration: 305291
loss: 0.9677448868751526,grad_norm: 0.9999998548392057, iteration: 305292
loss: 0.9937685132026672,grad_norm: 0.7403543925885547, iteration: 305293
loss: 1.0261636972427368,grad_norm: 0.7748325061747139, iteration: 305294
loss: 0.9904425740242004,grad_norm: 0.9605365306857738, iteration: 305295
loss: 1.0666266679763794,grad_norm: 0.999999036067974, iteration: 305296
loss: 1.0238900184631348,grad_norm: 0.807175629869061, iteration: 305297
loss: 1.0199905633926392,grad_norm: 0.9766744843422897, iteration: 305298
loss: 0.945023775100708,grad_norm: 0.9132594800889783, iteration: 305299
loss: 0.9901030659675598,grad_norm: 0.9999991070255588, iteration: 305300
loss: 1.0142961740493774,grad_norm: 0.7645229523410664, iteration: 305301
loss: 1.101798176765442,grad_norm: 0.999999642301055, iteration: 305302
loss: 1.0038973093032837,grad_norm: 0.7486999428028662, iteration: 305303
loss: 0.9880104660987854,grad_norm: 0.9999990496749613, iteration: 305304
loss: 0.9993018507957458,grad_norm: 0.762813463842271, iteration: 305305
loss: 1.0340038537979126,grad_norm: 0.9999991638839848, iteration: 305306
loss: 1.1235250234603882,grad_norm: 0.999999948926403, iteration: 305307
loss: 1.026341438293457,grad_norm: 0.7913879137318032, iteration: 305308
loss: 0.9869723320007324,grad_norm: 0.8256765000023244, iteration: 305309
loss: 0.9839323163032532,grad_norm: 0.9035524754741199, iteration: 305310
loss: 0.9912642240524292,grad_norm: 0.9999990469641566, iteration: 305311
loss: 1.0270308256149292,grad_norm: 0.980635122377873, iteration: 305312
loss: 1.0114437341690063,grad_norm: 0.8923592851759657, iteration: 305313
loss: 1.0002521276474,grad_norm: 0.9999995254425728, iteration: 305314
loss: 1.0658985376358032,grad_norm: 0.9999991414094442, iteration: 305315
loss: 1.0301889181137085,grad_norm: 0.9146925209337158, iteration: 305316
loss: 0.9549515843391418,grad_norm: 0.9999991608481216, iteration: 305317
loss: 1.0260493755340576,grad_norm: 0.9999990928352851, iteration: 305318
loss: 1.0091365575790405,grad_norm: 0.7873232211363521, iteration: 305319
loss: 1.0403343439102173,grad_norm: 0.8299700404897942, iteration: 305320
loss: 1.0305743217468262,grad_norm: 0.9999990988603464, iteration: 305321
loss: 0.9964966773986816,grad_norm: 0.8116005901099373, iteration: 305322
loss: 0.9817963242530823,grad_norm: 0.6619262180093385, iteration: 305323
loss: 1.034332275390625,grad_norm: 0.9234710555886722, iteration: 305324
loss: 1.117270827293396,grad_norm: 0.9999989783931313, iteration: 305325
loss: 1.0246639251708984,grad_norm: 0.8779347706867597, iteration: 305326
loss: 1.0351439714431763,grad_norm: 0.9644505578659539, iteration: 305327
loss: 1.023792028427124,grad_norm: 0.8918913885805015, iteration: 305328
loss: 1.0314788818359375,grad_norm: 0.9999994704066273, iteration: 305329
loss: 1.189806580543518,grad_norm: 0.9999993359522936, iteration: 305330
loss: 1.03925621509552,grad_norm: 0.9999997509186598, iteration: 305331
loss: 1.0938807725906372,grad_norm: 0.9999990465576979, iteration: 305332
loss: 0.9601224660873413,grad_norm: 0.8229820019007533, iteration: 305333
loss: 0.9667652249336243,grad_norm: 0.6580215486167725, iteration: 305334
loss: 0.9888235926628113,grad_norm: 0.9745950524170647, iteration: 305335
loss: 1.0034489631652832,grad_norm: 0.817720753665761, iteration: 305336
loss: 1.008906602859497,grad_norm: 0.807737271043752, iteration: 305337
loss: 0.9842579960823059,grad_norm: 0.8235567763732914, iteration: 305338
loss: 0.9972293376922607,grad_norm: 0.9999991624488103, iteration: 305339
loss: 1.009113073348999,grad_norm: 0.8234356550225107, iteration: 305340
loss: 1.1316337585449219,grad_norm: 0.9999993617469198, iteration: 305341
loss: 0.997894287109375,grad_norm: 0.8809835206204968, iteration: 305342
loss: 1.0145924091339111,grad_norm: 0.9471272591857515, iteration: 305343
loss: 1.0046242475509644,grad_norm: 0.9157840732178401, iteration: 305344
loss: 0.9938607215881348,grad_norm: 0.9999992553615382, iteration: 305345
loss: 1.0047922134399414,grad_norm: 0.9999990758491947, iteration: 305346
loss: 1.0493544340133667,grad_norm: 0.9999998807039637, iteration: 305347
loss: 1.1152169704437256,grad_norm: 0.9999991013843521, iteration: 305348
loss: 1.0421630144119263,grad_norm: 0.9999990757544986, iteration: 305349
loss: 1.010695219039917,grad_norm: 0.9999997920807314, iteration: 305350
loss: 1.0630905628204346,grad_norm: 1.0000000660127513, iteration: 305351
loss: 0.9878697991371155,grad_norm: 0.8613969352310391, iteration: 305352
loss: 1.0169888734817505,grad_norm: 0.7528779877248337, iteration: 305353
loss: 1.030684471130371,grad_norm: 0.9999995322999282, iteration: 305354
loss: 1.0122296810150146,grad_norm: 0.8562521537388307, iteration: 305355
loss: 1.0105432271957397,grad_norm: 0.9999991088492852, iteration: 305356
loss: 1.0091677904129028,grad_norm: 0.8232060468085471, iteration: 305357
loss: 1.0005736351013184,grad_norm: 0.977039912621965, iteration: 305358
loss: 1.0040528774261475,grad_norm: 0.7726871488942925, iteration: 305359
loss: 1.044203758239746,grad_norm: 0.9999991280721986, iteration: 305360
loss: 0.9928154349327087,grad_norm: 0.9077290658407072, iteration: 305361
loss: 0.9510324597358704,grad_norm: 0.9555664801053181, iteration: 305362
loss: 0.9809163212776184,grad_norm: 0.8835265911549265, iteration: 305363
loss: 1.0063130855560303,grad_norm: 0.956538239588129, iteration: 305364
loss: 1.1093453168869019,grad_norm: 0.9999996928285649, iteration: 305365
loss: 1.0058951377868652,grad_norm: 0.999999631240144, iteration: 305366
loss: 1.0227348804473877,grad_norm: 0.9104805843402253, iteration: 305367
loss: 1.1533583402633667,grad_norm: 0.9999998487961501, iteration: 305368
loss: 1.0180299282073975,grad_norm: 0.9999991301826435, iteration: 305369
loss: 1.0036321878433228,grad_norm: 0.8984364521386812, iteration: 305370
loss: 1.039554238319397,grad_norm: 0.9999989642476665, iteration: 305371
loss: 0.9845597147941589,grad_norm: 0.9999991750620822, iteration: 305372
loss: 1.0110450983047485,grad_norm: 0.8802010648023956, iteration: 305373
loss: 1.0897842645645142,grad_norm: 0.9557199145665188, iteration: 305374
loss: 1.0622434616088867,grad_norm: 0.999999196658928, iteration: 305375
loss: 0.9907817244529724,grad_norm: 0.6907148430160199, iteration: 305376
loss: 0.963466227054596,grad_norm: 0.9057918054558834, iteration: 305377
loss: 0.9990544319152832,grad_norm: 0.7457083285982685, iteration: 305378
loss: 1.0177497863769531,grad_norm: 0.8500793111749482, iteration: 305379
loss: 0.9978827238082886,grad_norm: 0.9059572291033196, iteration: 305380
loss: 0.9958414435386658,grad_norm: 0.9999993031368435, iteration: 305381
loss: 0.9892753958702087,grad_norm: 0.9999996075604577, iteration: 305382
loss: 1.0211623907089233,grad_norm: 0.9608403656223574, iteration: 305383
loss: 1.0127516984939575,grad_norm: 0.8734474946926785, iteration: 305384
loss: 1.0018924474716187,grad_norm: 0.9999991201153184, iteration: 305385
loss: 0.9840996265411377,grad_norm: 0.8080104502254791, iteration: 305386
loss: 1.0478618144989014,grad_norm: 0.9999997899194213, iteration: 305387
loss: 1.0060442686080933,grad_norm: 0.999999069932905, iteration: 305388
loss: 0.9899936318397522,grad_norm: 0.982794587272982, iteration: 305389
loss: 1.000985026359558,grad_norm: 0.9741572439597845, iteration: 305390
loss: 1.0257843732833862,grad_norm: 0.8630696584958831, iteration: 305391
loss: 0.9839335680007935,grad_norm: 0.959512075685001, iteration: 305392
loss: 1.005630373954773,grad_norm: 0.8185218108687846, iteration: 305393
loss: 1.087172269821167,grad_norm: 0.9999994545751587, iteration: 305394
loss: 1.0185503959655762,grad_norm: 0.7344569439915153, iteration: 305395
loss: 1.0018000602722168,grad_norm: 0.7826704247308793, iteration: 305396
loss: 1.124153971672058,grad_norm: 0.9999998438078305, iteration: 305397
loss: 1.0096638202667236,grad_norm: 0.8675639323006948, iteration: 305398
loss: 0.9927262663841248,grad_norm: 0.9973694870944046, iteration: 305399
loss: 0.9618950486183167,grad_norm: 0.7673425978991262, iteration: 305400
loss: 1.1562741994857788,grad_norm: 0.9999998383669876, iteration: 305401
loss: 0.9949334263801575,grad_norm: 0.8117776137599948, iteration: 305402
loss: 1.0803356170654297,grad_norm: 0.9999994880045368, iteration: 305403
loss: 1.0084511041641235,grad_norm: 0.8326374583848978, iteration: 305404
loss: 0.9725609421730042,grad_norm: 0.8096889396054086, iteration: 305405
loss: 1.0553572177886963,grad_norm: 0.7357040427249374, iteration: 305406
loss: 1.044224739074707,grad_norm: 0.999999797948891, iteration: 305407
loss: 0.9906927347183228,grad_norm: 0.8630099115613354, iteration: 305408
loss: 1.0531712770462036,grad_norm: 0.9695934799364069, iteration: 305409
loss: 1.0135886669158936,grad_norm: 0.7671298259767597, iteration: 305410
loss: 0.9713886976242065,grad_norm: 0.8727621123430693, iteration: 305411
loss: 1.0169861316680908,grad_norm: 0.9999991210591082, iteration: 305412
loss: 1.0153555870056152,grad_norm: 0.9999989551900377, iteration: 305413
loss: 1.0114277601242065,grad_norm: 0.898818119005641, iteration: 305414
loss: 1.0219906568527222,grad_norm: 0.8981070765032189, iteration: 305415
loss: 1.0092008113861084,grad_norm: 0.9047916748628284, iteration: 305416
loss: 1.0102158784866333,grad_norm: 0.9999993914397268, iteration: 305417
loss: 0.9836984276771545,grad_norm: 0.7361433735670218, iteration: 305418
loss: 0.9949477314949036,grad_norm: 0.9999994345790437, iteration: 305419
loss: 1.0172284841537476,grad_norm: 0.8589262253406151, iteration: 305420
loss: 1.010798692703247,grad_norm: 0.9292582739814461, iteration: 305421
loss: 0.963788628578186,grad_norm: 0.9141810808298808, iteration: 305422
loss: 1.0056074857711792,grad_norm: 0.7482367049374951, iteration: 305423
loss: 0.9923437237739563,grad_norm: 0.763192563718131, iteration: 305424
loss: 1.2379447221755981,grad_norm: 0.9999998562465139, iteration: 305425
loss: 1.000144600868225,grad_norm: 0.9999993181299355, iteration: 305426
loss: 1.0552423000335693,grad_norm: 0.9999997741809851, iteration: 305427
loss: 1.0024911165237427,grad_norm: 0.7982037487466289, iteration: 305428
loss: 0.9959988594055176,grad_norm: 0.9536665021578232, iteration: 305429
loss: 0.9836459159851074,grad_norm: 0.835378688685157, iteration: 305430
loss: 1.0180518627166748,grad_norm: 0.7828295694443095, iteration: 305431
loss: 0.9827845096588135,grad_norm: 0.9999990876661822, iteration: 305432
loss: 1.0357002019882202,grad_norm: 0.999999127627169, iteration: 305433
loss: 0.9814906716346741,grad_norm: 0.8737349113286654, iteration: 305434
loss: 1.0072920322418213,grad_norm: 0.9999990664561774, iteration: 305435
loss: 1.0219875574111938,grad_norm: 0.8236374697514901, iteration: 305436
loss: 1.047973871231079,grad_norm: 0.9999998577444058, iteration: 305437
loss: 1.0074772834777832,grad_norm: 0.9034688571275926, iteration: 305438
loss: 1.263279914855957,grad_norm: 0.9999999631303033, iteration: 305439
loss: 1.0597190856933594,grad_norm: 0.9999991957465219, iteration: 305440
loss: 0.9947471022605896,grad_norm: 0.8355668706331522, iteration: 305441
loss: 1.075379729270935,grad_norm: 0.9999995441256876, iteration: 305442
loss: 0.9843302369117737,grad_norm: 0.9999990590932769, iteration: 305443
loss: 0.9978322982788086,grad_norm: 0.948297300824764, iteration: 305444
loss: 0.9903837442398071,grad_norm: 0.9999992814583646, iteration: 305445
loss: 1.0072673559188843,grad_norm: 0.8683938349120441, iteration: 305446
loss: 0.9957621693611145,grad_norm: 0.8415772233993449, iteration: 305447
loss: 1.016191005706787,grad_norm: 0.9028276783176415, iteration: 305448
loss: 1.0001593828201294,grad_norm: 0.8651152636643764, iteration: 305449
loss: 1.0661838054656982,grad_norm: 0.999999174353099, iteration: 305450
loss: 1.0081180334091187,grad_norm: 0.7483197849353832, iteration: 305451
loss: 1.022315263748169,grad_norm: 0.8374332831439794, iteration: 305452
loss: 1.0111385583877563,grad_norm: 0.9302184000279025, iteration: 305453
loss: 1.0319039821624756,grad_norm: 0.8835067539281027, iteration: 305454
loss: 0.9947373867034912,grad_norm: 0.8672967534979971, iteration: 305455
loss: 0.9827585816383362,grad_norm: 0.8164808048161687, iteration: 305456
loss: 1.0294171571731567,grad_norm: 0.9999990794968397, iteration: 305457
loss: 1.1142561435699463,grad_norm: 0.9276418139640402, iteration: 305458
loss: 1.031195044517517,grad_norm: 0.9147833422766992, iteration: 305459
loss: 1.0806615352630615,grad_norm: 0.9999995216849851, iteration: 305460
loss: 1.079136610031128,grad_norm: 0.9999998423675702, iteration: 305461
loss: 1.0717250108718872,grad_norm: 0.8453874959441106, iteration: 305462
loss: 0.9654479026794434,grad_norm: 0.9999996327400471, iteration: 305463
loss: 1.0509815216064453,grad_norm: 0.9999998521304887, iteration: 305464
loss: 1.1782313585281372,grad_norm: 0.9999998003984001, iteration: 305465
loss: 0.9685855507850647,grad_norm: 0.8134425073996546, iteration: 305466
loss: 0.9917442798614502,grad_norm: 0.9300686925884607, iteration: 305467
loss: 1.012500524520874,grad_norm: 0.8089078069140562, iteration: 305468
loss: 1.0112273693084717,grad_norm: 0.8209312226420279, iteration: 305469
loss: 1.0229398012161255,grad_norm: 0.7063985188293928, iteration: 305470
loss: 1.008172869682312,grad_norm: 0.9303570083591366, iteration: 305471
loss: 0.996467113494873,grad_norm: 0.9999991229716937, iteration: 305472
loss: 1.1273356676101685,grad_norm: 0.9999994810298233, iteration: 305473
loss: 1.0207428932189941,grad_norm: 0.8743532648665705, iteration: 305474
loss: 1.029101014137268,grad_norm: 0.7634999488227189, iteration: 305475
loss: 1.0130425691604614,grad_norm: 0.9999991042503247, iteration: 305476
loss: 0.9882139563560486,grad_norm: 0.9761402590285613, iteration: 305477
loss: 1.1247658729553223,grad_norm: 0.999999270865476, iteration: 305478
loss: 1.019555687904358,grad_norm: 0.9766155638267908, iteration: 305479
loss: 0.9896990656852722,grad_norm: 0.8919968736476884, iteration: 305480
loss: 1.0400484800338745,grad_norm: 0.9999993180133571, iteration: 305481
loss: 1.0127568244934082,grad_norm: 0.6953608950368706, iteration: 305482
loss: 0.9871968030929565,grad_norm: 0.8567015712595246, iteration: 305483
loss: 1.1219984292984009,grad_norm: 0.9999991733339338, iteration: 305484
loss: 1.0787990093231201,grad_norm: 0.9999994185930128, iteration: 305485
loss: 1.0572993755340576,grad_norm: 0.9999991392093777, iteration: 305486
loss: 1.0259013175964355,grad_norm: 0.9542371114650525, iteration: 305487
loss: 1.0305386781692505,grad_norm: 0.9999993395634553, iteration: 305488
loss: 1.0432064533233643,grad_norm: 0.9999996911083104, iteration: 305489
loss: 1.0203230381011963,grad_norm: 0.93293385608778, iteration: 305490
loss: 1.0166624784469604,grad_norm: 0.899352560291297, iteration: 305491
loss: 0.9760883450508118,grad_norm: 0.9756903596516866, iteration: 305492
loss: 0.9787512421607971,grad_norm: 0.9264314843433865, iteration: 305493
loss: 1.0026538372039795,grad_norm: 0.9999992953158635, iteration: 305494
loss: 1.1902841329574585,grad_norm: 0.9999998275180862, iteration: 305495
loss: 1.0088008642196655,grad_norm: 0.9999991386969219, iteration: 305496
loss: 1.0111806392669678,grad_norm: 0.8628112620019065, iteration: 305497
loss: 1.010000228881836,grad_norm: 0.9999990824445342, iteration: 305498
loss: 1.1157306432724,grad_norm: 0.9999996541298419, iteration: 305499
loss: 1.014716625213623,grad_norm: 0.9999998530148615, iteration: 305500
loss: 1.0353665351867676,grad_norm: 0.9943493558389208, iteration: 305501
loss: 1.0204566717147827,grad_norm: 0.9999992878105076, iteration: 305502
loss: 1.0318479537963867,grad_norm: 0.9999990137068446, iteration: 305503
loss: 1.0882794857025146,grad_norm: 0.9999990728121493, iteration: 305504
loss: 1.0646803379058838,grad_norm: 0.9017089830324813, iteration: 305505
loss: 1.0071141719818115,grad_norm: 0.9339418607268248, iteration: 305506
loss: 1.0609965324401855,grad_norm: 0.99999931569044, iteration: 305507
loss: 1.0166456699371338,grad_norm: 0.7067689490060298, iteration: 305508
loss: 1.0678750276565552,grad_norm: 0.9684689346135286, iteration: 305509
loss: 1.1555514335632324,grad_norm: 0.9999990997371094, iteration: 305510
loss: 1.0447975397109985,grad_norm: 0.9999992975532758, iteration: 305511
loss: 0.9690923094749451,grad_norm: 0.9576618047618151, iteration: 305512
loss: 1.1170710325241089,grad_norm: 0.9999998355189871, iteration: 305513
loss: 1.0119585990905762,grad_norm: 0.8994034982291532, iteration: 305514
loss: 1.0030081272125244,grad_norm: 0.8755420282266065, iteration: 305515
loss: 0.9853248596191406,grad_norm: 0.9999991877739454, iteration: 305516
loss: 0.9866483807563782,grad_norm: 0.9122180076460292, iteration: 305517
loss: 1.083810567855835,grad_norm: 0.999999175093179, iteration: 305518
loss: 1.2038155794143677,grad_norm: 0.9999997252310883, iteration: 305519
loss: 1.0205957889556885,grad_norm: 0.9056171628732386, iteration: 305520
loss: 1.2214066982269287,grad_norm: 0.8499916376902088, iteration: 305521
loss: 1.0168538093566895,grad_norm: 0.9710568977309042, iteration: 305522
loss: 0.9806563854217529,grad_norm: 0.821473205336439, iteration: 305523
loss: 0.9811097979545593,grad_norm: 0.8227107728914816, iteration: 305524
loss: 1.0804744958877563,grad_norm: 0.9999996858054138, iteration: 305525
loss: 0.9618711471557617,grad_norm: 0.8382611258801828, iteration: 305526
loss: 1.0392005443572998,grad_norm: 0.9999994008074169, iteration: 305527
loss: 1.005977988243103,grad_norm: 0.7873661800351692, iteration: 305528
loss: 1.0130195617675781,grad_norm: 0.7669682681599044, iteration: 305529
loss: 0.9782131314277649,grad_norm: 0.8329157119072658, iteration: 305530
loss: 0.9432673454284668,grad_norm: 0.9178092349698235, iteration: 305531
loss: 1.0732862949371338,grad_norm: 0.9999995992717894, iteration: 305532
loss: 1.0565422773361206,grad_norm: 0.8403942375346223, iteration: 305533
loss: 0.9392246007919312,grad_norm: 0.7665412845700416, iteration: 305534
loss: 1.004593849182129,grad_norm: 0.9856788174006086, iteration: 305535
loss: 1.0098810195922852,grad_norm: 0.8340006616046037, iteration: 305536
loss: 1.0192971229553223,grad_norm: 0.9999992661158652, iteration: 305537
loss: 1.07677161693573,grad_norm: 0.9999999176881139, iteration: 305538
loss: 1.000197410583496,grad_norm: 0.735736977516916, iteration: 305539
loss: 0.9733233451843262,grad_norm: 0.8003813869567293, iteration: 305540
loss: 1.0464600324630737,grad_norm: 0.9999993797389689, iteration: 305541
loss: 0.9692953824996948,grad_norm: 0.8933823735130211, iteration: 305542
loss: 1.0136080980300903,grad_norm: 0.9999993895803082, iteration: 305543
loss: 1.039829969406128,grad_norm: 0.9257432524381806, iteration: 305544
loss: 1.009285807609558,grad_norm: 0.7933450224085086, iteration: 305545
loss: 1.0281875133514404,grad_norm: 0.9197332313275922, iteration: 305546
loss: 1.0469881296157837,grad_norm: 0.999999873447629, iteration: 305547
loss: 1.0045708417892456,grad_norm: 0.9077294860095187, iteration: 305548
loss: 1.0326257944107056,grad_norm: 0.8313347646294539, iteration: 305549
loss: 1.018166422843933,grad_norm: 0.873653048174951, iteration: 305550
loss: 1.0370811223983765,grad_norm: 0.9484682089503379, iteration: 305551
loss: 0.9834383726119995,grad_norm: 0.813941988394939, iteration: 305552
loss: 1.0156314373016357,grad_norm: 0.6807025654365363, iteration: 305553
loss: 1.0098613500595093,grad_norm: 0.7740209597076749, iteration: 305554
loss: 1.057497262954712,grad_norm: 0.9999999462493764, iteration: 305555
loss: 0.9783113598823547,grad_norm: 0.8327571703880645, iteration: 305556
loss: 1.0083885192871094,grad_norm: 0.8135588787610073, iteration: 305557
loss: 1.0021806955337524,grad_norm: 0.8167779287244692, iteration: 305558
loss: 1.0091419219970703,grad_norm: 0.9999997633812935, iteration: 305559
loss: 1.0233442783355713,grad_norm: 0.8845694520147956, iteration: 305560
loss: 0.9747326374053955,grad_norm: 0.8929515805517205, iteration: 305561
loss: 1.0870312452316284,grad_norm: 0.999999165621175, iteration: 305562
loss: 0.9851009249687195,grad_norm: 0.8128832482455085, iteration: 305563
loss: 1.0207393169403076,grad_norm: 0.9447430135873116, iteration: 305564
loss: 1.0758867263793945,grad_norm: 0.9999999157010352, iteration: 305565
loss: 1.0040290355682373,grad_norm: 0.853242421671845, iteration: 305566
loss: 1.0747462511062622,grad_norm: 0.9999992063248982, iteration: 305567
loss: 1.0234419107437134,grad_norm: 0.9552709362524325, iteration: 305568
loss: 1.0565078258514404,grad_norm: 0.8463841223489276, iteration: 305569
loss: 0.9797919988632202,grad_norm: 0.7816133673500512, iteration: 305570
loss: 1.016970157623291,grad_norm: 0.9999994374045813, iteration: 305571
loss: 1.0189833641052246,grad_norm: 0.9999996230979993, iteration: 305572
loss: 1.0343220233917236,grad_norm: 0.7859870304822892, iteration: 305573
loss: 1.1337145566940308,grad_norm: 0.9999991477129674, iteration: 305574
loss: 1.0311543941497803,grad_norm: 0.7767351413509005, iteration: 305575
loss: 1.0664570331573486,grad_norm: 0.999999141028161, iteration: 305576
loss: 1.0463392734527588,grad_norm: 0.9999992797744144, iteration: 305577
loss: 0.98595130443573,grad_norm: 0.7505881094563781, iteration: 305578
loss: 1.0426957607269287,grad_norm: 0.9999995036448825, iteration: 305579
loss: 1.027769923210144,grad_norm: 0.9999997842924592, iteration: 305580
loss: 0.9933148622512817,grad_norm: 0.9082782618776547, iteration: 305581
loss: 1.0503162145614624,grad_norm: 0.9596576812987565, iteration: 305582
loss: 1.0138784646987915,grad_norm: 0.9999995551145997, iteration: 305583
loss: 1.054549217224121,grad_norm: 0.9999990755275264, iteration: 305584
loss: 1.0467864274978638,grad_norm: 0.9999992466992889, iteration: 305585
loss: 1.011376976966858,grad_norm: 0.99999980268445, iteration: 305586
loss: 1.0046236515045166,grad_norm: 0.9886615478653747, iteration: 305587
loss: 0.9579377174377441,grad_norm: 0.9999992303535716, iteration: 305588
loss: 0.984803318977356,grad_norm: 0.8594302021707506, iteration: 305589
loss: 0.9846818447113037,grad_norm: 0.8998117677974543, iteration: 305590
loss: 1.0069345235824585,grad_norm: 0.7663063477796312, iteration: 305591
loss: 1.0535022020339966,grad_norm: 0.8172013395292945, iteration: 305592
loss: 1.0136324167251587,grad_norm: 0.9487175089932857, iteration: 305593
loss: 0.9580847024917603,grad_norm: 0.8172642485512431, iteration: 305594
loss: 1.0364857912063599,grad_norm: 0.9999997987701219, iteration: 305595
loss: 0.992318868637085,grad_norm: 0.9339664718783367, iteration: 305596
loss: 1.0176092386245728,grad_norm: 0.9999994167413315, iteration: 305597
loss: 0.9803224205970764,grad_norm: 0.8205507038946122, iteration: 305598
loss: 1.0049033164978027,grad_norm: 0.9999991527894029, iteration: 305599
loss: 1.0099092721939087,grad_norm: 0.7950735139596493, iteration: 305600
loss: 1.0500448942184448,grad_norm: 0.9743156470945323, iteration: 305601
loss: 1.0240856409072876,grad_norm: 0.8904384433267878, iteration: 305602
loss: 1.029167890548706,grad_norm: 0.9279534926908877, iteration: 305603
loss: 1.041061282157898,grad_norm: 0.9999990796777177, iteration: 305604
loss: 1.1251603364944458,grad_norm: 0.9814853349057759, iteration: 305605
loss: 1.0074692964553833,grad_norm: 0.709466328397681, iteration: 305606
loss: 1.0237669944763184,grad_norm: 0.9418740502445021, iteration: 305607
loss: 1.0006448030471802,grad_norm: 0.8113890004456815, iteration: 305608
loss: 0.9842612743377686,grad_norm: 0.8999761024481268, iteration: 305609
loss: 0.9880512356758118,grad_norm: 0.7852711800109341, iteration: 305610
loss: 1.0371900796890259,grad_norm: 0.8888378913000398, iteration: 305611
loss: 1.011394739151001,grad_norm: 0.8622379004695261, iteration: 305612
loss: 0.9919425249099731,grad_norm: 0.870266340178113, iteration: 305613
loss: 1.1309325695037842,grad_norm: 0.9999999496560078, iteration: 305614
loss: 0.9857383370399475,grad_norm: 0.8281805352850693, iteration: 305615
loss: 1.017213225364685,grad_norm: 0.8950514141636537, iteration: 305616
loss: 0.9913091659545898,grad_norm: 0.8181498780039185, iteration: 305617
loss: 1.0121182203292847,grad_norm: 0.9474019027212423, iteration: 305618
loss: 1.0322240591049194,grad_norm: 0.8699882914475757, iteration: 305619
loss: 1.0278733968734741,grad_norm: 0.9999996615705135, iteration: 305620
loss: 1.0285437107086182,grad_norm: 0.8910773637498541, iteration: 305621
loss: 0.9932509064674377,grad_norm: 0.8040211298554354, iteration: 305622
loss: 1.0135247707366943,grad_norm: 0.7674462397738285, iteration: 305623
loss: 1.0120234489440918,grad_norm: 0.9706699220115736, iteration: 305624
loss: 0.9992901086807251,grad_norm: 0.8061550884790991, iteration: 305625
loss: 1.0092756748199463,grad_norm: 0.7468587439271286, iteration: 305626
loss: 1.006407618522644,grad_norm: 0.9999992153620594, iteration: 305627
loss: 1.2116456031799316,grad_norm: 0.9999995966647581, iteration: 305628
loss: 1.0714982748031616,grad_norm: 0.9999998239129081, iteration: 305629
loss: 0.9634139537811279,grad_norm: 0.8850765592366941, iteration: 305630
loss: 1.0284141302108765,grad_norm: 0.8800214463711079, iteration: 305631
loss: 0.9619882106781006,grad_norm: 0.9185347621662843, iteration: 305632
loss: 1.0618245601654053,grad_norm: 0.9999998616222673, iteration: 305633
loss: 0.9906731843948364,grad_norm: 0.8826730349568938, iteration: 305634
loss: 0.9926816821098328,grad_norm: 0.9318688241283968, iteration: 305635
loss: 0.9637075066566467,grad_norm: 0.9999990012770857, iteration: 305636
loss: 1.032549500465393,grad_norm: 0.9999990673228841, iteration: 305637
loss: 1.0477261543273926,grad_norm: 0.8783685552491963, iteration: 305638
loss: 1.0264415740966797,grad_norm: 0.9656096374460159, iteration: 305639
loss: 1.0196205377578735,grad_norm: 0.8490715046670497, iteration: 305640
loss: 1.001724362373352,grad_norm: 0.9999993628485656, iteration: 305641
loss: 1.0058140754699707,grad_norm: 0.9137163024646918, iteration: 305642
loss: 0.9475459456443787,grad_norm: 0.6829654439515451, iteration: 305643
loss: 1.006019949913025,grad_norm: 0.7450701645737221, iteration: 305644
loss: 1.0270415544509888,grad_norm: 0.8444310132782954, iteration: 305645
loss: 0.9957574605941772,grad_norm: 0.9286978244704714, iteration: 305646
loss: 0.9973864555358887,grad_norm: 0.860368937294614, iteration: 305647
loss: 1.0346739292144775,grad_norm: 0.9999997682397004, iteration: 305648
loss: 1.04832923412323,grad_norm: 0.7948943368603385, iteration: 305649
loss: 1.003158450126648,grad_norm: 0.8449366993386599, iteration: 305650
loss: 0.9989531636238098,grad_norm: 0.8879670130325976, iteration: 305651
loss: 1.0889801979064941,grad_norm: 0.9999990763687223, iteration: 305652
loss: 1.0185633897781372,grad_norm: 0.9999994736295412, iteration: 305653
loss: 0.9507597088813782,grad_norm: 0.9177376104690742, iteration: 305654
loss: 1.0262404680252075,grad_norm: 0.7901503347594844, iteration: 305655
loss: 1.0863581895828247,grad_norm: 0.9999991831660714, iteration: 305656
loss: 1.039285659790039,grad_norm: 0.9999998935319754, iteration: 305657
loss: 0.9904146194458008,grad_norm: 0.9999991040990904, iteration: 305658
loss: 1.0296443700790405,grad_norm: 0.7436443720923545, iteration: 305659
loss: 1.0049935579299927,grad_norm: 0.7616270768628153, iteration: 305660
loss: 1.012992262840271,grad_norm: 0.8251665847357365, iteration: 305661
loss: 1.0219801664352417,grad_norm: 0.8919341281640739, iteration: 305662
loss: 1.0444936752319336,grad_norm: 0.9999995391210038, iteration: 305663
loss: 1.0014350414276123,grad_norm: 0.9684589703233157, iteration: 305664
loss: 1.008548378944397,grad_norm: 0.7772238891340526, iteration: 305665
loss: 0.9928914904594421,grad_norm: 0.8511450714070099, iteration: 305666
loss: 1.1930928230285645,grad_norm: 0.9999993282342802, iteration: 305667
loss: 1.0273267030715942,grad_norm: 0.9140435818626179, iteration: 305668
loss: 1.0234293937683105,grad_norm: 0.8118199926979527, iteration: 305669
loss: 1.0291343927383423,grad_norm: 0.8253934307813429, iteration: 305670
loss: 0.9672092199325562,grad_norm: 0.7702991870797168, iteration: 305671
loss: 0.9794292449951172,grad_norm: 0.876109604070404, iteration: 305672
loss: 1.0111031532287598,grad_norm: 0.917727755765623, iteration: 305673
loss: 1.008626103401184,grad_norm: 0.9999999982412401, iteration: 305674
loss: 1.038253903388977,grad_norm: 0.7035588858190698, iteration: 305675
loss: 1.0888460874557495,grad_norm: 0.968574668207653, iteration: 305676
loss: 1.1145687103271484,grad_norm: 0.9999992072207646, iteration: 305677
loss: 1.0004593133926392,grad_norm: 0.9999995742774157, iteration: 305678
loss: 0.978207528591156,grad_norm: 0.7801913475200783, iteration: 305679
loss: 1.0011688470840454,grad_norm: 0.931079338226912, iteration: 305680
loss: 1.0728076696395874,grad_norm: 0.9999996646666969, iteration: 305681
loss: 1.1299262046813965,grad_norm: 0.9999990634589625, iteration: 305682
loss: 1.0072287321090698,grad_norm: 0.9999991478357826, iteration: 305683
loss: 1.0222415924072266,grad_norm: 0.7867647388540464, iteration: 305684
loss: 0.997529149055481,grad_norm: 0.9127081366545303, iteration: 305685
loss: 1.0015298128128052,grad_norm: 0.7048959807601698, iteration: 305686
loss: 1.002062439918518,grad_norm: 0.8108930359684148, iteration: 305687
loss: 0.9777387380599976,grad_norm: 0.9624541062499352, iteration: 305688
loss: 0.9873554706573486,grad_norm: 0.7915623447316124, iteration: 305689
loss: 0.9858875274658203,grad_norm: 0.8763973817150994, iteration: 305690
loss: 1.0324691534042358,grad_norm: 0.9999993830388787, iteration: 305691
loss: 1.0305626392364502,grad_norm: 0.9999996015905076, iteration: 305692
loss: 1.0138959884643555,grad_norm: 0.9999992532485767, iteration: 305693
loss: 1.012712001800537,grad_norm: 0.8117990448382812, iteration: 305694
loss: 1.1503418684005737,grad_norm: 0.99999982590889, iteration: 305695
loss: 1.133908748626709,grad_norm: 0.9999994967428276, iteration: 305696
loss: 1.039588451385498,grad_norm: 0.9999992743991228, iteration: 305697
loss: 0.9921139478683472,grad_norm: 0.8900802676139838, iteration: 305698
loss: 1.0005830526351929,grad_norm: 0.83235591181498, iteration: 305699
loss: 1.09315025806427,grad_norm: 0.7535959906547204, iteration: 305700
loss: 1.094069480895996,grad_norm: 0.9999993560623471, iteration: 305701
loss: 0.9442877769470215,grad_norm: 0.700823126043973, iteration: 305702
loss: 0.9797576665878296,grad_norm: 0.9999993895814919, iteration: 305703
loss: 0.9550584554672241,grad_norm: 0.8613559842871181, iteration: 305704
loss: 1.0050829648971558,grad_norm: 0.8121034572898213, iteration: 305705
loss: 1.0107262134552002,grad_norm: 0.8711912234599899, iteration: 305706
loss: 1.0334949493408203,grad_norm: 0.9674795037455858, iteration: 305707
loss: 1.0978800058364868,grad_norm: 0.9999999218997936, iteration: 305708
loss: 0.9940314888954163,grad_norm: 0.822803943658631, iteration: 305709
loss: 1.0088040828704834,grad_norm: 0.9999992802767759, iteration: 305710
loss: 0.984875500202179,grad_norm: 0.7730468293553518, iteration: 305711
loss: 0.9872227907180786,grad_norm: 0.8746258128500244, iteration: 305712
loss: 1.0409623384475708,grad_norm: 0.7944123537269601, iteration: 305713
loss: 1.1170785427093506,grad_norm: 1.0000000718005506, iteration: 305714
loss: 1.0092424154281616,grad_norm: 0.8795305615666656, iteration: 305715
loss: 0.998225212097168,grad_norm: 0.8826820533636188, iteration: 305716
loss: 1.005907654762268,grad_norm: 0.999999039162128, iteration: 305717
loss: 0.972288191318512,grad_norm: 0.8582585650895244, iteration: 305718
loss: 1.187647819519043,grad_norm: 0.999999653391383, iteration: 305719
loss: 1.0282443761825562,grad_norm: 0.9999997878670616, iteration: 305720
loss: 0.9922779202461243,grad_norm: 0.9076022576729491, iteration: 305721
loss: 1.018601655960083,grad_norm: 0.9149356300168537, iteration: 305722
loss: 0.9979897141456604,grad_norm: 0.8573224400228125, iteration: 305723
loss: 1.0094109773635864,grad_norm: 0.9904425679999279, iteration: 305724
loss: 1.0076758861541748,grad_norm: 0.9999990423840881, iteration: 305725
loss: 0.9857551455497742,grad_norm: 0.6878310966720114, iteration: 305726
loss: 0.9980657696723938,grad_norm: 0.8044593441580618, iteration: 305727
loss: 1.1198194026947021,grad_norm: 0.9999995605397564, iteration: 305728
loss: 0.9884058833122253,grad_norm: 0.9532073907999993, iteration: 305729
loss: 1.0395697355270386,grad_norm: 0.6699178806673904, iteration: 305730
loss: 0.9975118041038513,grad_norm: 0.9753137466660403, iteration: 305731
loss: 1.0561649799346924,grad_norm: 0.8292761880542483, iteration: 305732
loss: 1.0670143365859985,grad_norm: 0.9999997696062153, iteration: 305733
loss: 0.9675182104110718,grad_norm: 0.8672768489835059, iteration: 305734
loss: 1.006424069404602,grad_norm: 0.9999990805717567, iteration: 305735
loss: 1.0275201797485352,grad_norm: 0.9845535233612823, iteration: 305736
loss: 1.053611159324646,grad_norm: 0.9999999540436859, iteration: 305737
loss: 1.0196925401687622,grad_norm: 0.8232846804410481, iteration: 305738
loss: 1.0542759895324707,grad_norm: 0.9999996187740293, iteration: 305739
loss: 1.0464051961898804,grad_norm: 0.9999995122859483, iteration: 305740
loss: 1.0135828256607056,grad_norm: 0.9637155310458652, iteration: 305741
loss: 1.0699827671051025,grad_norm: 0.8609674579124553, iteration: 305742
loss: 1.0003798007965088,grad_norm: 0.9999990626198453, iteration: 305743
loss: 1.0156594514846802,grad_norm: 0.8298186166885552, iteration: 305744
loss: 1.0034525394439697,grad_norm: 0.7462513674333457, iteration: 305745
loss: 0.9890186190605164,grad_norm: 0.7781356038360738, iteration: 305746
loss: 0.9904146194458008,grad_norm: 0.6920561607857206, iteration: 305747
loss: 1.076238751411438,grad_norm: 0.8880324072766717, iteration: 305748
loss: 1.0541372299194336,grad_norm: 0.9999992758662012, iteration: 305749
loss: 1.0524215698242188,grad_norm: 0.7648363521571411, iteration: 305750
loss: 1.0806125402450562,grad_norm: 0.9434222711684249, iteration: 305751
loss: 1.0285820960998535,grad_norm: 0.9999990204067067, iteration: 305752
loss: 1.0280998945236206,grad_norm: 0.8765803242445562, iteration: 305753
loss: 1.0585945844650269,grad_norm: 0.9101988253921559, iteration: 305754
loss: 1.0086426734924316,grad_norm: 0.761474860667202, iteration: 305755
loss: 0.9808772206306458,grad_norm: 0.9303260365861258, iteration: 305756
loss: 0.9868382215499878,grad_norm: 0.9965960839299873, iteration: 305757
loss: 1.052398920059204,grad_norm: 0.9021877143765824, iteration: 305758
loss: 0.9863479137420654,grad_norm: 0.9308256648565392, iteration: 305759
loss: 1.0010985136032104,grad_norm: 0.964781397839653, iteration: 305760
loss: 1.0362836122512817,grad_norm: 0.9835070755501095, iteration: 305761
loss: 0.9848653078079224,grad_norm: 0.8440864692380642, iteration: 305762
loss: 0.9676731824874878,grad_norm: 0.7568705349322746, iteration: 305763
loss: 1.0167150497436523,grad_norm: 0.9999996299521268, iteration: 305764
loss: 0.9705260992050171,grad_norm: 0.9322492976671924, iteration: 305765
loss: 0.9599128365516663,grad_norm: 0.7757997783443418, iteration: 305766
loss: 1.0198116302490234,grad_norm: 0.6982463612526826, iteration: 305767
loss: 1.0193711519241333,grad_norm: 0.7987097613187275, iteration: 305768
loss: 1.0114134550094604,grad_norm: 0.9999991899545847, iteration: 305769
loss: 1.0077531337738037,grad_norm: 0.8823280191799581, iteration: 305770
loss: 0.9773610234260559,grad_norm: 0.648372549022414, iteration: 305771
loss: 1.0242379903793335,grad_norm: 0.9999991435109603, iteration: 305772
loss: 1.1935093402862549,grad_norm: 0.9999991009268577, iteration: 305773
loss: 1.0302428007125854,grad_norm: 0.8979863892529987, iteration: 305774
loss: 1.076250433921814,grad_norm: 0.9425518761466368, iteration: 305775
loss: 1.0291961431503296,grad_norm: 0.8016609682581516, iteration: 305776
loss: 0.9849892258644104,grad_norm: 0.8481991406914638, iteration: 305777
loss: 1.292618989944458,grad_norm: 0.9999994233654055, iteration: 305778
loss: 0.9908968806266785,grad_norm: 0.8381771012569049, iteration: 305779
loss: 1.0039349794387817,grad_norm: 0.8193882261813195, iteration: 305780
loss: 0.9780400991439819,grad_norm: 0.9881565623772407, iteration: 305781
loss: 1.031435489654541,grad_norm: 0.9757845917518262, iteration: 305782
loss: 1.0083820819854736,grad_norm: 0.9193665918077774, iteration: 305783
loss: 1.0108011960983276,grad_norm: 0.9990547160539867, iteration: 305784
loss: 1.054218053817749,grad_norm: 0.999999523161138, iteration: 305785
loss: 0.9861576557159424,grad_norm: 0.9999991426634762, iteration: 305786
loss: 1.0566270351409912,grad_norm: 0.9999991756394181, iteration: 305787
loss: 1.046962857246399,grad_norm: 0.9999990956994931, iteration: 305788
loss: 1.005986213684082,grad_norm: 0.9999990552240228, iteration: 305789
loss: 1.0168638229370117,grad_norm: 0.9999991927854724, iteration: 305790
loss: 1.001847743988037,grad_norm: 0.9999995119409621, iteration: 305791
loss: 0.9806057214736938,grad_norm: 0.9828846353379658, iteration: 305792
loss: 1.0057857036590576,grad_norm: 0.7938677995944198, iteration: 305793
loss: 1.0292880535125732,grad_norm: 0.9999991040805324, iteration: 305794
loss: 0.9735220670700073,grad_norm: 0.9999998690227323, iteration: 305795
loss: 1.009347915649414,grad_norm: 0.9999998676620128, iteration: 305796
loss: 1.0012801885604858,grad_norm: 0.9999991486823865, iteration: 305797
loss: 0.9943692088127136,grad_norm: 0.8796730382192331, iteration: 305798
loss: 1.0065662860870361,grad_norm: 0.9999993551988331, iteration: 305799
loss: 0.9998462796211243,grad_norm: 0.7870638013115151, iteration: 305800
loss: 0.981021523475647,grad_norm: 0.8039794277909791, iteration: 305801
loss: 1.0648000240325928,grad_norm: 0.9999996722621248, iteration: 305802
loss: 0.9862578511238098,grad_norm: 0.777353075875857, iteration: 305803
loss: 1.017492413520813,grad_norm: 0.8956208691683514, iteration: 305804
loss: 1.0533981323242188,grad_norm: 0.9999991989727208, iteration: 305805
loss: 1.0111000537872314,grad_norm: 0.9999990739366279, iteration: 305806
loss: 1.0349793434143066,grad_norm: 0.7609911814177078, iteration: 305807
loss: 1.0193411111831665,grad_norm: 0.9296415264045395, iteration: 305808
loss: 1.0076521635055542,grad_norm: 0.9999992996157818, iteration: 305809
loss: 1.1178385019302368,grad_norm: 0.9999998992819092, iteration: 305810
loss: 1.0048327445983887,grad_norm: 0.9584976435774771, iteration: 305811
loss: 1.0025640726089478,grad_norm: 0.9999998008324933, iteration: 305812
loss: 1.0057424306869507,grad_norm: 0.8910352168100295, iteration: 305813
loss: 0.9657846093177795,grad_norm: 0.999999210165072, iteration: 305814
loss: 1.1331110000610352,grad_norm: 0.9565019460575067, iteration: 305815
loss: 1.074671983718872,grad_norm: 0.8476572105418267, iteration: 305816
loss: 1.0152652263641357,grad_norm: 0.9999997465278816, iteration: 305817
loss: 1.0190606117248535,grad_norm: 0.9999992251320963, iteration: 305818
loss: 1.122556447982788,grad_norm: 0.9999993427060238, iteration: 305819
loss: 1.0123385190963745,grad_norm: 0.9015107973729998, iteration: 305820
loss: 1.0344470739364624,grad_norm: 0.7245081359510941, iteration: 305821
loss: 1.1001918315887451,grad_norm: 0.9999991650494742, iteration: 305822
loss: 0.9726079106330872,grad_norm: 0.9999990267660712, iteration: 305823
loss: 1.0079643726348877,grad_norm: 0.9999995752523089, iteration: 305824
loss: 1.02657151222229,grad_norm: 0.8461683043229252, iteration: 305825
loss: 0.9637928009033203,grad_norm: 0.8853606444217884, iteration: 305826
loss: 1.0175234079360962,grad_norm: 0.8797477952920058, iteration: 305827
loss: 1.0990327596664429,grad_norm: 0.9999999297732799, iteration: 305828
loss: 1.0557811260223389,grad_norm: 0.9999994942090824, iteration: 305829
loss: 1.0919687747955322,grad_norm: 0.9999990426863021, iteration: 305830
loss: 1.020447850227356,grad_norm: 0.9999996601357225, iteration: 305831
loss: 1.0463457107543945,grad_norm: 0.9999990488650532, iteration: 305832
loss: 1.0357134342193604,grad_norm: 0.7577595751066937, iteration: 305833
loss: 0.982028067111969,grad_norm: 0.754486134322668, iteration: 305834
loss: 0.9919214844703674,grad_norm: 0.7882980300756678, iteration: 305835
loss: 1.070509910583496,grad_norm: 0.9999992805767296, iteration: 305836
loss: 0.9671451449394226,grad_norm: 0.849157014074056, iteration: 305837
loss: 0.9909549355506897,grad_norm: 0.7221118481145936, iteration: 305838
loss: 0.9899423718452454,grad_norm: 0.7616636442696546, iteration: 305839
loss: 1.173997163772583,grad_norm: 0.9999999140831812, iteration: 305840
loss: 1.0673474073410034,grad_norm: 0.9230375112044908, iteration: 305841
loss: 1.0576249361038208,grad_norm: 0.9999996875352879, iteration: 305842
loss: 1.0066324472427368,grad_norm: 0.9999990951741436, iteration: 305843
loss: 0.9857850074768066,grad_norm: 0.8474227100325668, iteration: 305844
loss: 1.1225817203521729,grad_norm: 0.9999997438591672, iteration: 305845
loss: 0.9927743673324585,grad_norm: 0.9999998401121744, iteration: 305846
loss: 0.9660170078277588,grad_norm: 0.9999992108861511, iteration: 305847
loss: 1.1529507637023926,grad_norm: 0.9999991912065987, iteration: 305848
loss: 1.0160424709320068,grad_norm: 0.919682363729609, iteration: 305849
loss: 0.9713608026504517,grad_norm: 0.8697854889336742, iteration: 305850
loss: 1.0329554080963135,grad_norm: 0.9425085654845697, iteration: 305851
loss: 1.012082576751709,grad_norm: 0.7557854421196467, iteration: 305852
loss: 0.9933000802993774,grad_norm: 0.8977292381240107, iteration: 305853
loss: 1.0050292015075684,grad_norm: 0.7589939566193403, iteration: 305854
loss: 1.0838998556137085,grad_norm: 0.9999994831353937, iteration: 305855
loss: 1.021193504333496,grad_norm: 0.9999996703077545, iteration: 305856
loss: 1.0999417304992676,grad_norm: 0.9989466175350413, iteration: 305857
loss: 1.0012658834457397,grad_norm: 0.8728691631468237, iteration: 305858
loss: 1.0121684074401855,grad_norm: 0.9872386077868499, iteration: 305859
loss: 0.999182939529419,grad_norm: 0.9211510508582395, iteration: 305860
loss: 0.9927628636360168,grad_norm: 0.9999991521993247, iteration: 305861
loss: 0.9825088381767273,grad_norm: 0.796834853894332, iteration: 305862
loss: 0.9646027088165283,grad_norm: 0.9160222958854951, iteration: 305863
loss: 1.0014493465423584,grad_norm: 0.6978228988623869, iteration: 305864
loss: 0.992865264415741,grad_norm: 0.9999991762747545, iteration: 305865
loss: 1.0149339437484741,grad_norm: 0.9999991022560965, iteration: 305866
loss: 1.060250997543335,grad_norm: 0.862370424680086, iteration: 305867
loss: 1.0166505575180054,grad_norm: 0.7883986915811216, iteration: 305868
loss: 0.9849281311035156,grad_norm: 0.9956718803898756, iteration: 305869
loss: 1.0106271505355835,grad_norm: 0.9808712830196776, iteration: 305870
loss: 0.997567892074585,grad_norm: 0.786628908968339, iteration: 305871
loss: 1.01080322265625,grad_norm: 0.8014796806546141, iteration: 305872
loss: 0.9940306544303894,grad_norm: 0.8674620915556088, iteration: 305873
loss: 1.0300045013427734,grad_norm: 0.9999990538997819, iteration: 305874
loss: 1.0035961866378784,grad_norm: 0.8159941269198987, iteration: 305875
loss: 1.010425090789795,grad_norm: 0.9999991903807379, iteration: 305876
loss: 1.0138025283813477,grad_norm: 0.9550625099964927, iteration: 305877
loss: 0.9878943562507629,grad_norm: 0.812376108432595, iteration: 305878
loss: 1.0808850526809692,grad_norm: 0.999999060291156, iteration: 305879
loss: 1.0063854455947876,grad_norm: 0.7636659292560333, iteration: 305880
loss: 1.0358340740203857,grad_norm: 0.9999998066003922, iteration: 305881
loss: 0.9607795476913452,grad_norm: 0.9999998814378278, iteration: 305882
loss: 0.9985467791557312,grad_norm: 0.8498873848265261, iteration: 305883
loss: 1.0566277503967285,grad_norm: 0.999999942397486, iteration: 305884
loss: 0.9804799556732178,grad_norm: 0.8835451558068065, iteration: 305885
loss: 1.089554786682129,grad_norm: 0.9999999900348159, iteration: 305886
loss: 1.0565067529678345,grad_norm: 0.8954369941417766, iteration: 305887
loss: 1.082360863685608,grad_norm: 0.8665389411106754, iteration: 305888
loss: 1.0054584741592407,grad_norm: 0.8941199502305434, iteration: 305889
loss: 0.990225613117218,grad_norm: 0.7346100271608074, iteration: 305890
loss: 1.0350996255874634,grad_norm: 0.9999992293147227, iteration: 305891
loss: 1.0311516523361206,grad_norm: 0.8259135414666113, iteration: 305892
loss: 0.9900028109550476,grad_norm: 0.822138327002573, iteration: 305893
loss: 1.069025993347168,grad_norm: 0.9582238141559536, iteration: 305894
loss: 1.017452597618103,grad_norm: 0.9999993236082875, iteration: 305895
loss: 1.1241644620895386,grad_norm: 0.9999991737684835, iteration: 305896
loss: 1.0513098239898682,grad_norm: 0.9999996441375049, iteration: 305897
loss: 1.0601520538330078,grad_norm: 0.9921377271529683, iteration: 305898
loss: 0.968668520450592,grad_norm: 0.7443494022098718, iteration: 305899
loss: 1.0359750986099243,grad_norm: 0.8289925450355419, iteration: 305900
loss: 1.0089961290359497,grad_norm: 0.9999996775403707, iteration: 305901
loss: 0.9950714111328125,grad_norm: 0.9999992255173574, iteration: 305902
loss: 1.0065187215805054,grad_norm: 0.7596487762860576, iteration: 305903
loss: 0.9866194128990173,grad_norm: 0.8757288396261828, iteration: 305904
loss: 0.9797653555870056,grad_norm: 0.803760995343965, iteration: 305905
loss: 0.9726361036300659,grad_norm: 0.9298100723263948, iteration: 305906
loss: 1.0732543468475342,grad_norm: 0.8870882427030075, iteration: 305907
loss: 0.9999046325683594,grad_norm: 0.819396293255054, iteration: 305908
loss: 0.9711806774139404,grad_norm: 0.7368280743209122, iteration: 305909
loss: 0.9834508895874023,grad_norm: 0.9078558520392201, iteration: 305910
loss: 1.0186024904251099,grad_norm: 0.8254368487364991, iteration: 305911
loss: 1.050162672996521,grad_norm: 0.8049890008013949, iteration: 305912
loss: 0.9926719069480896,grad_norm: 0.7839841418017238, iteration: 305913
loss: 1.0296618938446045,grad_norm: 0.8413428773868288, iteration: 305914
loss: 1.0561046600341797,grad_norm: 0.8115986377998011, iteration: 305915
loss: 1.067063570022583,grad_norm: 0.8886760854063904, iteration: 305916
loss: 0.9970808029174805,grad_norm: 0.837171986731238, iteration: 305917
loss: 1.0204641819000244,grad_norm: 0.734234125516551, iteration: 305918
loss: 0.9705514311790466,grad_norm: 0.8905151969964867, iteration: 305919
loss: 1.0275688171386719,grad_norm: 0.7968776397317568, iteration: 305920
loss: 0.9912655353546143,grad_norm: 0.8335041398446049, iteration: 305921
loss: 1.0067925453186035,grad_norm: 0.7809580135002007, iteration: 305922
loss: 0.983148455619812,grad_norm: 0.901139908994765, iteration: 305923
loss: 0.997380256652832,grad_norm: 0.6329598849915227, iteration: 305924
loss: 0.9821208715438843,grad_norm: 0.8550303218955558, iteration: 305925
loss: 1.0025532245635986,grad_norm: 0.8835942293046862, iteration: 305926
loss: 1.0302554368972778,grad_norm: 0.999999308633598, iteration: 305927
loss: 1.0189307928085327,grad_norm: 0.8877223835876918, iteration: 305928
loss: 0.9925406575202942,grad_norm: 0.8822979830283132, iteration: 305929
loss: 1.1075501441955566,grad_norm: 0.9999992824817695, iteration: 305930
loss: 1.023454189300537,grad_norm: 0.7594294704890322, iteration: 305931
loss: 1.004207730293274,grad_norm: 0.8783004794693212, iteration: 305932
loss: 1.3312604427337646,grad_norm: 0.9999994275571952, iteration: 305933
loss: 1.0853968858718872,grad_norm: 0.9999996790838437, iteration: 305934
loss: 1.0062217712402344,grad_norm: 0.777762305669753, iteration: 305935
loss: 0.9639583826065063,grad_norm: 0.9999991437025627, iteration: 305936
loss: 1.0164501667022705,grad_norm: 0.8364332400331496, iteration: 305937
loss: 1.035696268081665,grad_norm: 0.9814053850242087, iteration: 305938
loss: 1.0315966606140137,grad_norm: 0.9999991649247977, iteration: 305939
loss: 0.9990573525428772,grad_norm: 0.9999990091029515, iteration: 305940
loss: 1.0043326616287231,grad_norm: 0.8110123148344863, iteration: 305941
loss: 1.3306918144226074,grad_norm: 0.9999996708578675, iteration: 305942
loss: 1.0084528923034668,grad_norm: 0.776291256414388, iteration: 305943
loss: 1.0750869512557983,grad_norm: 0.9519683190343081, iteration: 305944
loss: 0.9766939282417297,grad_norm: 0.9050014519001797, iteration: 305945
loss: 1.034745693206787,grad_norm: 0.9999994539070273, iteration: 305946
loss: 1.0010647773742676,grad_norm: 0.999999917822525, iteration: 305947
loss: 0.9753877520561218,grad_norm: 0.9999990718934627, iteration: 305948
loss: 0.9945076107978821,grad_norm: 0.8961845195822227, iteration: 305949
loss: 1.0278218984603882,grad_norm: 0.8807018638851108, iteration: 305950
loss: 1.0494345426559448,grad_norm: 0.9999990362922536, iteration: 305951
loss: 0.9628016352653503,grad_norm: 0.9999991271591042, iteration: 305952
loss: 1.1069588661193848,grad_norm: 0.9999992438911195, iteration: 305953
loss: 0.9842173457145691,grad_norm: 0.8435013365098403, iteration: 305954
loss: 1.1346544027328491,grad_norm: 0.9999999515901745, iteration: 305955
loss: 1.0198915004730225,grad_norm: 0.8576717500061999, iteration: 305956
loss: 0.9263134002685547,grad_norm: 0.9999989954027133, iteration: 305957
loss: 1.04159414768219,grad_norm: 0.913094940088198, iteration: 305958
loss: 1.0148377418518066,grad_norm: 0.7487565869984897, iteration: 305959
loss: 0.999557614326477,grad_norm: 0.7802712312803943, iteration: 305960
loss: 0.9871864914894104,grad_norm: 0.9999991796567533, iteration: 305961
loss: 0.9913665056228638,grad_norm: 0.8434324912693562, iteration: 305962
loss: 1.0857149362564087,grad_norm: 0.999999240347116, iteration: 305963
loss: 0.9869546294212341,grad_norm: 0.8932080358959266, iteration: 305964
loss: 0.998622477054596,grad_norm: 0.8359390588963854, iteration: 305965
loss: 1.0317209959030151,grad_norm: 0.9144281911470825, iteration: 305966
loss: 1.0054056644439697,grad_norm: 0.8487826516020592, iteration: 305967
loss: 1.0013105869293213,grad_norm: 0.8249088657737123, iteration: 305968
loss: 0.9838422536849976,grad_norm: 0.7872669692810053, iteration: 305969
loss: 0.9947680830955505,grad_norm: 0.7351282056706069, iteration: 305970
loss: 1.036933183670044,grad_norm: 0.9999992090563419, iteration: 305971
loss: 1.0054370164871216,grad_norm: 0.9999991218304788, iteration: 305972
loss: 0.9821153879165649,grad_norm: 0.8630833922049809, iteration: 305973
loss: 1.0158963203430176,grad_norm: 0.9999991769518356, iteration: 305974
loss: 0.9630008339881897,grad_norm: 0.8801058856928986, iteration: 305975
loss: 1.0289078950881958,grad_norm: 0.804423889735055, iteration: 305976
loss: 1.052729606628418,grad_norm: 0.825406288713561, iteration: 305977
loss: 1.0365405082702637,grad_norm: 0.7939770990116836, iteration: 305978
loss: 1.0373300313949585,grad_norm: 0.9999990679519193, iteration: 305979
loss: 1.0332744121551514,grad_norm: 0.8787586208375614, iteration: 305980
loss: 1.0203487873077393,grad_norm: 0.8539003720472286, iteration: 305981
loss: 0.9806960225105286,grad_norm: 0.9625646963136278, iteration: 305982
loss: 1.0540592670440674,grad_norm: 0.8346730241101903, iteration: 305983
loss: 1.0017800331115723,grad_norm: 0.6903123662917682, iteration: 305984
loss: 0.982043981552124,grad_norm: 0.8217961340568993, iteration: 305985
loss: 0.9952688813209534,grad_norm: 0.8605058057103413, iteration: 305986
loss: 1.0636892318725586,grad_norm: 0.9999994428574078, iteration: 305987
loss: 1.010942816734314,grad_norm: 0.8805566905618055, iteration: 305988
loss: 1.032708764076233,grad_norm: 0.9193345355319932, iteration: 305989
loss: 1.004250407218933,grad_norm: 0.9999997126688464, iteration: 305990
loss: 0.9983601570129395,grad_norm: 0.7350619944378047, iteration: 305991
loss: 0.969942569732666,grad_norm: 0.8759454228183746, iteration: 305992
loss: 0.9823119640350342,grad_norm: 0.7286247408342091, iteration: 305993
loss: 1.003584623336792,grad_norm: 0.8888500948198362, iteration: 305994
loss: 1.0051474571228027,grad_norm: 0.7331417985638514, iteration: 305995
loss: 1.0048596858978271,grad_norm: 0.9568281263721843, iteration: 305996
loss: 1.028443694114685,grad_norm: 0.8436782339384397, iteration: 305997
loss: 0.9866028428077698,grad_norm: 0.8127193781705683, iteration: 305998
loss: 1.0545779466629028,grad_norm: 0.7779122141749881, iteration: 305999
loss: 1.0619933605194092,grad_norm: 0.97818389955863, iteration: 306000
loss: 0.9673784971237183,grad_norm: 0.8909577158686167, iteration: 306001
loss: 0.9992660880088806,grad_norm: 0.7456217474556287, iteration: 306002
loss: 0.9982138276100159,grad_norm: 0.7738723200636382, iteration: 306003
loss: 0.988998532295227,grad_norm: 0.7657338773767601, iteration: 306004
loss: 0.9903526306152344,grad_norm: 0.9126032335676292, iteration: 306005
loss: 0.9888219833374023,grad_norm: 0.9999991685248969, iteration: 306006
loss: 1.0226659774780273,grad_norm: 0.8131609836626952, iteration: 306007
loss: 0.9951187372207642,grad_norm: 0.981881863667062, iteration: 306008
loss: 1.0067445039749146,grad_norm: 0.7955085359895945, iteration: 306009
loss: 0.9993441700935364,grad_norm: 0.8438736806994823, iteration: 306010
loss: 0.9955313205718994,grad_norm: 0.8607463616066722, iteration: 306011
loss: 1.0047646760940552,grad_norm: 0.8684004301717654, iteration: 306012
loss: 0.994139552116394,grad_norm: 0.8860106884478304, iteration: 306013
loss: 0.9880139231681824,grad_norm: 0.8859352802683168, iteration: 306014
loss: 1.002554178237915,grad_norm: 0.9999997097306502, iteration: 306015
loss: 1.078015685081482,grad_norm: 0.942450400015014, iteration: 306016
loss: 0.9957736134529114,grad_norm: 0.7223002846246543, iteration: 306017
loss: 0.9857885241508484,grad_norm: 0.8467877542492649, iteration: 306018
loss: 1.0056641101837158,grad_norm: 0.7860482271451149, iteration: 306019
loss: 1.0109297037124634,grad_norm: 0.8772333309240964, iteration: 306020
loss: 0.9696633219718933,grad_norm: 0.8222378911554442, iteration: 306021
loss: 1.006134271621704,grad_norm: 0.8451373381624189, iteration: 306022
loss: 0.9871478080749512,grad_norm: 0.9207558737590753, iteration: 306023
loss: 1.013543725013733,grad_norm: 0.7064718691759841, iteration: 306024
loss: 1.0329265594482422,grad_norm: 0.9999991046390444, iteration: 306025
loss: 0.9864192008972168,grad_norm: 0.8587204894603422, iteration: 306026
loss: 0.9937524795532227,grad_norm: 0.9999997197855354, iteration: 306027
loss: 0.9785808324813843,grad_norm: 0.9999995391990099, iteration: 306028
loss: 1.1370221376419067,grad_norm: 0.9999996220986314, iteration: 306029
loss: 1.0293439626693726,grad_norm: 0.9999995074028298, iteration: 306030
loss: 0.9539836049079895,grad_norm: 0.9151509408164779, iteration: 306031
loss: 1.0414925813674927,grad_norm: 0.8324492354877194, iteration: 306032
loss: 1.001537799835205,grad_norm: 0.6825348848413862, iteration: 306033
loss: 1.006925344467163,grad_norm: 0.8797079368179012, iteration: 306034
loss: 1.057592749595642,grad_norm: 0.961975145715928, iteration: 306035
loss: 1.0076144933700562,grad_norm: 0.7950205866103036, iteration: 306036
loss: 1.0393880605697632,grad_norm: 0.8498622425229124, iteration: 306037
loss: 1.0235097408294678,grad_norm: 0.9361096038658379, iteration: 306038
loss: 1.0125046968460083,grad_norm: 0.7781871189883621, iteration: 306039
loss: 1.0249112844467163,grad_norm: 0.8042248275131427, iteration: 306040
loss: 1.0005741119384766,grad_norm: 0.9890202763205868, iteration: 306041
loss: 0.9858742952346802,grad_norm: 0.9999990391562149, iteration: 306042
loss: 1.0122779607772827,grad_norm: 0.8730429823698371, iteration: 306043
loss: 0.9926257133483887,grad_norm: 0.9106878595739375, iteration: 306044
loss: 0.9976205825805664,grad_norm: 0.8537417008751224, iteration: 306045
loss: 0.9870063066482544,grad_norm: 0.7480290786917713, iteration: 306046
loss: 1.004437804222107,grad_norm: 0.9999996960246775, iteration: 306047
loss: 1.0503065586090088,grad_norm: 0.9999999245554096, iteration: 306048
loss: 0.9575557112693787,grad_norm: 0.7219196204052573, iteration: 306049
loss: 1.0515162944793701,grad_norm: 0.9355479803871538, iteration: 306050
loss: 1.0394946336746216,grad_norm: 0.9999995775696684, iteration: 306051
loss: 1.0081775188446045,grad_norm: 0.7754499337571924, iteration: 306052
loss: 1.0078189373016357,grad_norm: 0.7803139877833538, iteration: 306053
loss: 0.9911680817604065,grad_norm: 0.9763309769926798, iteration: 306054
loss: 1.0145386457443237,grad_norm: 0.7933534255757336, iteration: 306055
loss: 0.9556630253791809,grad_norm: 0.6816725836514053, iteration: 306056
loss: 1.0052766799926758,grad_norm: 0.8569040193175859, iteration: 306057
loss: 1.0257543325424194,grad_norm: 0.9289433090714815, iteration: 306058
loss: 1.0006272792816162,grad_norm: 0.8047279395722611, iteration: 306059
loss: 0.9888430237770081,grad_norm: 0.9309265382939907, iteration: 306060
loss: 1.0274388790130615,grad_norm: 0.8635710533769971, iteration: 306061
loss: 0.9730854630470276,grad_norm: 0.8023738926105871, iteration: 306062
loss: 1.0404850244522095,grad_norm: 0.9822778186791938, iteration: 306063
loss: 1.052876353263855,grad_norm: 0.8180118997055977, iteration: 306064
loss: 0.9843711256980896,grad_norm: 0.791115456105897, iteration: 306065
loss: 1.0624558925628662,grad_norm: 0.999999419812931, iteration: 306066
loss: 0.9935526847839355,grad_norm: 0.8560228923057601, iteration: 306067
loss: 1.0178231000900269,grad_norm: 0.8900665274961411, iteration: 306068
loss: 0.973816990852356,grad_norm: 0.9924164963992382, iteration: 306069
loss: 0.9726279973983765,grad_norm: 0.7766776954583859, iteration: 306070
loss: 1.1550043821334839,grad_norm: 0.9999995503573907, iteration: 306071
loss: 1.0463916063308716,grad_norm: 0.8363738903457445, iteration: 306072
loss: 0.9864291548728943,grad_norm: 0.9999991774631846, iteration: 306073
loss: 0.9645779728889465,grad_norm: 0.9926045610714758, iteration: 306074
loss: 0.9727653861045837,grad_norm: 0.7394991439023283, iteration: 306075
loss: 0.9888150095939636,grad_norm: 0.7805146312765159, iteration: 306076
loss: 0.9681320786476135,grad_norm: 0.7991606654930591, iteration: 306077
loss: 0.9987891912460327,grad_norm: 0.9999997507923113, iteration: 306078
loss: 1.0254936218261719,grad_norm: 0.9999990691602068, iteration: 306079
loss: 1.0309003591537476,grad_norm: 0.9999989606110147, iteration: 306080
loss: 0.9874633550643921,grad_norm: 0.9999991271869776, iteration: 306081
loss: 1.0469990968704224,grad_norm: 0.9999999619539309, iteration: 306082
loss: 0.9939953088760376,grad_norm: 0.7340458422127975, iteration: 306083
loss: 0.9948470592498779,grad_norm: 0.9100303677849404, iteration: 306084
loss: 1.0506967306137085,grad_norm: 0.9999989520962601, iteration: 306085
loss: 1.0252971649169922,grad_norm: 0.9823059237531113, iteration: 306086
loss: 1.0529675483703613,grad_norm: 0.7900843997319063, iteration: 306087
loss: 0.9885343313217163,grad_norm: 0.848157925467666, iteration: 306088
loss: 0.9960917830467224,grad_norm: 0.923910216126563, iteration: 306089
loss: 0.985757052898407,grad_norm: 0.8132339466927576, iteration: 306090
loss: 0.9856126308441162,grad_norm: 0.9115030098014223, iteration: 306091
loss: 1.0017423629760742,grad_norm: 0.9999989859455024, iteration: 306092
loss: 0.9882733225822449,grad_norm: 0.9096598565977978, iteration: 306093
loss: 0.9937237501144409,grad_norm: 0.8330966697513018, iteration: 306094
loss: 1.0055392980575562,grad_norm: 0.9699681710774871, iteration: 306095
loss: 0.9686655402183533,grad_norm: 0.8801348771064484, iteration: 306096
loss: 0.9890657663345337,grad_norm: 0.7424538235514255, iteration: 306097
loss: 1.0069968700408936,grad_norm: 0.9999999714749095, iteration: 306098
loss: 0.9733191728591919,grad_norm: 0.8313989365080556, iteration: 306099
loss: 1.0483052730560303,grad_norm: 0.9999996709193657, iteration: 306100
loss: 1.0149097442626953,grad_norm: 0.8913430429509218, iteration: 306101
loss: 1.0067620277404785,grad_norm: 0.7036265907757334, iteration: 306102
loss: 0.9860403537750244,grad_norm: 0.7225883090967657, iteration: 306103
loss: 0.990910530090332,grad_norm: 0.8654779991649685, iteration: 306104
loss: 1.0843231678009033,grad_norm: 0.9999991882017942, iteration: 306105
loss: 1.0242820978164673,grad_norm: 0.7158660984144873, iteration: 306106
loss: 1.0096046924591064,grad_norm: 0.9999990164110875, iteration: 306107
loss: 0.9676381349563599,grad_norm: 0.9298452190743803, iteration: 306108
loss: 0.9977448582649231,grad_norm: 0.8911535400280489, iteration: 306109
loss: 0.9762084484100342,grad_norm: 0.8709636342988121, iteration: 306110
loss: 1.0176427364349365,grad_norm: 0.8233513440890975, iteration: 306111
loss: 1.0230534076690674,grad_norm: 0.8088727420657293, iteration: 306112
loss: 0.9920380115509033,grad_norm: 0.9305572435479573, iteration: 306113
loss: 1.0015887022018433,grad_norm: 0.8892063182213715, iteration: 306114
loss: 0.999086320400238,grad_norm: 0.7178989617388813, iteration: 306115
loss: 0.9933697581291199,grad_norm: 0.8653843202847172, iteration: 306116
loss: 0.9994583129882812,grad_norm: 0.7526221241839041, iteration: 306117
loss: 0.990157425403595,grad_norm: 0.9999993162792655, iteration: 306118
loss: 0.9893691539764404,grad_norm: 0.9999993075216179, iteration: 306119
loss: 1.0143969058990479,grad_norm: 0.8092475531942999, iteration: 306120
loss: 1.1344046592712402,grad_norm: 0.9999994828522344, iteration: 306121
loss: 0.9921236038208008,grad_norm: 0.8790160902424596, iteration: 306122
loss: 1.0942860841751099,grad_norm: 0.9999998376919959, iteration: 306123
loss: 1.0047016143798828,grad_norm: 0.8023912738994344, iteration: 306124
loss: 1.023371934890747,grad_norm: 0.9999994365498002, iteration: 306125
loss: 1.0379928350448608,grad_norm: 0.771299836413432, iteration: 306126
loss: 1.002151608467102,grad_norm: 0.761279949478016, iteration: 306127
loss: 1.0032658576965332,grad_norm: 0.7995009104201108, iteration: 306128
loss: 1.052093505859375,grad_norm: 0.7644771997374804, iteration: 306129
loss: 1.0123728513717651,grad_norm: 0.848838666993809, iteration: 306130
loss: 1.009231686592102,grad_norm: 0.8341077625278936, iteration: 306131
loss: 1.0061832666397095,grad_norm: 0.7749331916606985, iteration: 306132
loss: 0.9768710136413574,grad_norm: 0.7864249632877711, iteration: 306133
loss: 0.9965044260025024,grad_norm: 0.8173643718942216, iteration: 306134
loss: 0.9839912056922913,grad_norm: 0.8341939703111809, iteration: 306135
loss: 1.03452467918396,grad_norm: 0.7669786005938766, iteration: 306136
loss: 1.0254789590835571,grad_norm: 0.8710183857274231, iteration: 306137
loss: 1.240976095199585,grad_norm: 0.9999996555794276, iteration: 306138
loss: 1.0674870014190674,grad_norm: 0.9999990847448886, iteration: 306139
loss: 0.9906479120254517,grad_norm: 0.8038371238601175, iteration: 306140
loss: 0.9683139324188232,grad_norm: 0.7350186584382196, iteration: 306141
loss: 0.9826928973197937,grad_norm: 0.795892571679066, iteration: 306142
loss: 0.9924197793006897,grad_norm: 0.6920514086907533, iteration: 306143
loss: 1.1633431911468506,grad_norm: 0.9999997669460337, iteration: 306144
loss: 1.0867754220962524,grad_norm: 0.9049351649957317, iteration: 306145
loss: 1.0195924043655396,grad_norm: 0.9999996079973411, iteration: 306146
loss: 0.971030592918396,grad_norm: 0.8991640534294254, iteration: 306147
loss: 1.0120315551757812,grad_norm: 0.8221524567976837, iteration: 306148
loss: 1.020572304725647,grad_norm: 0.9999994609791519, iteration: 306149
loss: 1.0076812505722046,grad_norm: 0.8527646193961758, iteration: 306150
loss: 1.0205512046813965,grad_norm: 0.9999991252467454, iteration: 306151
loss: 0.9946759939193726,grad_norm: 0.7699454227004763, iteration: 306152
loss: 0.987579882144928,grad_norm: 0.8723911480702128, iteration: 306153
loss: 0.9978686571121216,grad_norm: 0.8873364339924721, iteration: 306154
loss: 1.014864206314087,grad_norm: 0.8596380633727869, iteration: 306155
loss: 1.01221764087677,grad_norm: 0.9557894485239051, iteration: 306156
loss: 1.1410068273544312,grad_norm: 0.9999993593463542, iteration: 306157
loss: 1.0342679023742676,grad_norm: 0.9999992557240671, iteration: 306158
loss: 0.9851657748222351,grad_norm: 0.9096162582742561, iteration: 306159
loss: 1.0061156749725342,grad_norm: 0.9999990561272253, iteration: 306160
loss: 1.0227293968200684,grad_norm: 0.7260242701669521, iteration: 306161
loss: 0.9949826598167419,grad_norm: 0.7591638489067629, iteration: 306162
loss: 0.97366863489151,grad_norm: 0.8129191897409661, iteration: 306163
loss: 1.1111887693405151,grad_norm: 1.0000000801058293, iteration: 306164
loss: 1.0454658269882202,grad_norm: 0.7704860370697881, iteration: 306165
loss: 1.0339652299880981,grad_norm: 0.9999999581020109, iteration: 306166
loss: 0.9654813408851624,grad_norm: 0.8483282077340591, iteration: 306167
loss: 1.0074330568313599,grad_norm: 0.852826949297227, iteration: 306168
loss: 0.990641176700592,grad_norm: 0.8205558075093196, iteration: 306169
loss: 1.0592989921569824,grad_norm: 0.8305136198275993, iteration: 306170
loss: 1.04598867893219,grad_norm: 0.8597021781421281, iteration: 306171
loss: 1.0184738636016846,grad_norm: 0.7180478797976468, iteration: 306172
loss: 0.9686142206192017,grad_norm: 0.8869037322975221, iteration: 306173
loss: 0.9857031106948853,grad_norm: 0.777830870845451, iteration: 306174
loss: 1.119991660118103,grad_norm: 0.9999999424270429, iteration: 306175
loss: 0.9971597790718079,grad_norm: 0.8476931500755729, iteration: 306176
loss: 0.9826959371566772,grad_norm: 0.8596969784992241, iteration: 306177
loss: 0.9988235831260681,grad_norm: 0.9999994593022229, iteration: 306178
loss: 1.0112197399139404,grad_norm: 0.9122499987684262, iteration: 306179
loss: 1.014929175376892,grad_norm: 0.8503182255344611, iteration: 306180
loss: 0.9937712550163269,grad_norm: 0.8524227726373385, iteration: 306181
loss: 0.9874535202980042,grad_norm: 0.9435242805438252, iteration: 306182
loss: 0.9878400564193726,grad_norm: 0.7278868877823921, iteration: 306183
loss: 1.0583620071411133,grad_norm: 0.9764572671764208, iteration: 306184
loss: 1.0079902410507202,grad_norm: 0.7588755532870186, iteration: 306185
loss: 1.0849586725234985,grad_norm: 0.9999994429665284, iteration: 306186
loss: 1.0141685009002686,grad_norm: 0.7629904489412122, iteration: 306187
loss: 0.9781959056854248,grad_norm: 0.7881219341954381, iteration: 306188
loss: 0.9285836219787598,grad_norm: 0.7316248407787719, iteration: 306189
loss: 1.0128779411315918,grad_norm: 0.954199738557837, iteration: 306190
loss: 0.9776541590690613,grad_norm: 0.9999996374324416, iteration: 306191
loss: 1.163520097732544,grad_norm: 0.9999998505903382, iteration: 306192
loss: 0.9200894832611084,grad_norm: 0.9659399025636972, iteration: 306193
loss: 1.004433035850525,grad_norm: 0.9999992196279613, iteration: 306194
loss: 1.110660195350647,grad_norm: 0.9999990881108574, iteration: 306195
loss: 0.9898107647895813,grad_norm: 0.9999993383027916, iteration: 306196
loss: 0.9789066910743713,grad_norm: 0.8958295186404385, iteration: 306197
loss: 0.9807673096656799,grad_norm: 0.8142768437822652, iteration: 306198
loss: 0.9929919838905334,grad_norm: 0.6880855467125147, iteration: 306199
loss: 0.9822248220443726,grad_norm: 0.9008150022222827, iteration: 306200
loss: 1.0167127847671509,grad_norm: 0.819782615683757, iteration: 306201
loss: 1.0375034809112549,grad_norm: 0.9544822906634212, iteration: 306202
loss: 1.0240036249160767,grad_norm: 0.8745971796664826, iteration: 306203
loss: 0.991773247718811,grad_norm: 0.7793658901483645, iteration: 306204
loss: 1.0132313966751099,grad_norm: 0.9999995208460989, iteration: 306205
loss: 1.0196640491485596,grad_norm: 0.8987484663631079, iteration: 306206
loss: 0.9574432373046875,grad_norm: 0.8598417301670407, iteration: 306207
loss: 0.9830171465873718,grad_norm: 0.7132448558758181, iteration: 306208
loss: 1.0252312421798706,grad_norm: 0.7980520107439669, iteration: 306209
loss: 1.009771466255188,grad_norm: 0.8940314857172645, iteration: 306210
loss: 1.0026752948760986,grad_norm: 0.999999274225764, iteration: 306211
loss: 0.9793184399604797,grad_norm: 0.7866740631359445, iteration: 306212
loss: 0.998682975769043,grad_norm: 0.8340888204722837, iteration: 306213
loss: 1.0130621194839478,grad_norm: 0.884450983218143, iteration: 306214
loss: 0.9438331127166748,grad_norm: 0.9409741220303843, iteration: 306215
loss: 0.9754969477653503,grad_norm: 0.9484764342800992, iteration: 306216
loss: 1.0527660846710205,grad_norm: 0.9916670420013731, iteration: 306217
loss: 0.9976176023483276,grad_norm: 0.7853630953658158, iteration: 306218
loss: 1.034705638885498,grad_norm: 0.9999990391525432, iteration: 306219
loss: 0.9905497431755066,grad_norm: 0.8831012414442836, iteration: 306220
loss: 1.0288782119750977,grad_norm: 0.7613278250662379, iteration: 306221
loss: 1.034601092338562,grad_norm: 0.9746423748416089, iteration: 306222
loss: 1.0243337154388428,grad_norm: 0.774591000319327, iteration: 306223
loss: 0.9712026715278625,grad_norm: 0.7321647804203072, iteration: 306224
loss: 0.9811139702796936,grad_norm: 0.7712039540887272, iteration: 306225
loss: 1.002694845199585,grad_norm: 0.719676734215037, iteration: 306226
loss: 1.0022947788238525,grad_norm: 0.9066713065616775, iteration: 306227
loss: 1.0091912746429443,grad_norm: 0.8711899689951041, iteration: 306228
loss: 1.0522574186325073,grad_norm: 0.8131321738758294, iteration: 306229
loss: 0.9701538681983948,grad_norm: 0.7546866172056711, iteration: 306230
loss: 1.0034290552139282,grad_norm: 0.8217932505454603, iteration: 306231
loss: 0.9831412434577942,grad_norm: 0.8692417091930051, iteration: 306232
loss: 0.9781369566917419,grad_norm: 0.8130073807155301, iteration: 306233
loss: 1.017506718635559,grad_norm: 0.9551675940858725, iteration: 306234
loss: 1.1022206544876099,grad_norm: 1.0000000071354183, iteration: 306235
loss: 1.0343059301376343,grad_norm: 0.9999999780628234, iteration: 306236
loss: 1.1212114095687866,grad_norm: 0.9999993767627513, iteration: 306237
loss: 1.0146485567092896,grad_norm: 0.9999991321659181, iteration: 306238
loss: 1.0267150402069092,grad_norm: 0.9999998513471682, iteration: 306239
loss: 1.0227880477905273,grad_norm: 0.8955429047168028, iteration: 306240
loss: 1.0125845670700073,grad_norm: 0.6974034749380309, iteration: 306241
loss: 1.0044022798538208,grad_norm: 0.8593376089306601, iteration: 306242
loss: 0.9756991267204285,grad_norm: 0.8437257652836065, iteration: 306243
loss: 0.9879987239837646,grad_norm: 0.7564310293956529, iteration: 306244
loss: 0.9908650517463684,grad_norm: 0.8274482209442952, iteration: 306245
loss: 1.0149205923080444,grad_norm: 0.8700796832194211, iteration: 306246
loss: 0.9893695712089539,grad_norm: 0.9259181631539086, iteration: 306247
loss: 1.0261476039886475,grad_norm: 0.8583357645164651, iteration: 306248
loss: 0.9854708313941956,grad_norm: 0.895981376476163, iteration: 306249
loss: 0.9799544811248779,grad_norm: 0.8611266537719163, iteration: 306250
loss: 0.9997060298919678,grad_norm: 0.8240336443272661, iteration: 306251
loss: 0.9868173599243164,grad_norm: 0.7761149687059549, iteration: 306252
loss: 0.9872337579727173,grad_norm: 0.9469282296564883, iteration: 306253
loss: 0.9997482895851135,grad_norm: 0.779889286258382, iteration: 306254
loss: 0.9878098368644714,grad_norm: 0.9273549863986452, iteration: 306255
loss: 1.0080348253250122,grad_norm: 0.8268418018735968, iteration: 306256
loss: 0.9865427613258362,grad_norm: 0.8749681550950691, iteration: 306257
loss: 1.055625319480896,grad_norm: 0.9999991524632955, iteration: 306258
loss: 1.0319030284881592,grad_norm: 0.8765395751589753, iteration: 306259
loss: 1.0345515012741089,grad_norm: 0.8443690661667913, iteration: 306260
loss: 1.018936038017273,grad_norm: 0.7808662414542772, iteration: 306261
loss: 1.0550689697265625,grad_norm: 0.9999990273369672, iteration: 306262
loss: 1.0011323690414429,grad_norm: 0.8452797589015911, iteration: 306263
loss: 1.0278239250183105,grad_norm: 0.9159253425057341, iteration: 306264
loss: 0.9967758059501648,grad_norm: 0.7845197680393234, iteration: 306265
loss: 1.0226386785507202,grad_norm: 0.9478337317626032, iteration: 306266
loss: 1.0015616416931152,grad_norm: 0.9372161866027975, iteration: 306267
loss: 1.0199918746948242,grad_norm: 0.8039449552709013, iteration: 306268
loss: 1.0142298936843872,grad_norm: 0.8265339398877013, iteration: 306269
loss: 1.0014879703521729,grad_norm: 0.8457482466959463, iteration: 306270
loss: 0.9537986516952515,grad_norm: 0.8639413352497638, iteration: 306271
loss: 1.015404462814331,grad_norm: 0.9999997998352699, iteration: 306272
loss: 1.1033222675323486,grad_norm: 0.9999991019758072, iteration: 306273
loss: 1.002345323562622,grad_norm: 0.9999990754088947, iteration: 306274
loss: 1.0855121612548828,grad_norm: 0.9999994363198235, iteration: 306275
loss: 1.0055385828018188,grad_norm: 0.8148789938933725, iteration: 306276
loss: 1.0691957473754883,grad_norm: 0.9999998294493904, iteration: 306277
loss: 1.006237268447876,grad_norm: 0.8312175027934329, iteration: 306278
loss: 0.99375981092453,grad_norm: 0.999999997298135, iteration: 306279
loss: 0.9790107607841492,grad_norm: 0.7522912403150847, iteration: 306280
loss: 0.9876571297645569,grad_norm: 0.7309565637510208, iteration: 306281
loss: 1.010977029800415,grad_norm: 0.8046875016975132, iteration: 306282
loss: 0.9941623210906982,grad_norm: 0.8511616930479842, iteration: 306283
loss: 0.953621506690979,grad_norm: 0.901137912548945, iteration: 306284
loss: 0.9779669642448425,grad_norm: 0.7592362122144086, iteration: 306285
loss: 1.0016529560089111,grad_norm: 0.9999990918075838, iteration: 306286
loss: 1.0179160833358765,grad_norm: 0.869982717173214, iteration: 306287
loss: 0.9869612455368042,grad_norm: 0.9249909749261693, iteration: 306288
loss: 1.0103343725204468,grad_norm: 0.9354169493243922, iteration: 306289
loss: 0.9871395230293274,grad_norm: 0.6771584919529546, iteration: 306290
loss: 1.0488046407699585,grad_norm: 0.7820532976208514, iteration: 306291
loss: 0.9642682671546936,grad_norm: 0.8489246298432033, iteration: 306292
loss: 0.9920822978019714,grad_norm: 0.9218937687694299, iteration: 306293
loss: 1.138731598854065,grad_norm: 0.9999993944830662, iteration: 306294
loss: 1.0144481658935547,grad_norm: 0.9052388051196958, iteration: 306295
loss: 1.029265284538269,grad_norm: 0.9559289727318449, iteration: 306296
loss: 0.9976533651351929,grad_norm: 0.7692087135761869, iteration: 306297
loss: 0.9875673055648804,grad_norm: 0.9999991689680399, iteration: 306298
loss: 1.0087456703186035,grad_norm: 0.9999991056522436, iteration: 306299
loss: 1.0368125438690186,grad_norm: 0.9175916486727577, iteration: 306300
loss: 1.0389618873596191,grad_norm: 0.8717712978350358, iteration: 306301
loss: 1.0246130228042603,grad_norm: 0.9999998799889929, iteration: 306302
loss: 1.0277155637741089,grad_norm: 0.9999990056771775, iteration: 306303
loss: 1.0185675621032715,grad_norm: 1.0000000470109323, iteration: 306304
loss: 1.0056833028793335,grad_norm: 0.8101560626366352, iteration: 306305
loss: 0.9629703760147095,grad_norm: 0.9243934992273884, iteration: 306306
loss: 1.001484751701355,grad_norm: 0.7951002751587154, iteration: 306307
loss: 0.9728725552558899,grad_norm: 0.7849443050249431, iteration: 306308
loss: 0.9956925511360168,grad_norm: 0.9764291638044784, iteration: 306309
loss: 1.0018560886383057,grad_norm: 0.9016796671366646, iteration: 306310
loss: 0.9973729252815247,grad_norm: 0.864995597913988, iteration: 306311
loss: 1.0164687633514404,grad_norm: 0.8882889814902397, iteration: 306312
loss: 1.0318272113800049,grad_norm: 0.807374614299609, iteration: 306313
loss: 1.0098979473114014,grad_norm: 0.9983717735808791, iteration: 306314
loss: 1.0468569993972778,grad_norm: 0.839369996402751, iteration: 306315
loss: 1.0234044790267944,grad_norm: 0.909481911238975, iteration: 306316
loss: 0.977447509765625,grad_norm: 0.8413582662949264, iteration: 306317
loss: 1.0027879476547241,grad_norm: 0.7356780181208358, iteration: 306318
loss: 0.9764895439147949,grad_norm: 0.8460037236927574, iteration: 306319
loss: 1.0437085628509521,grad_norm: 0.7741734800462318, iteration: 306320
loss: 1.023390293121338,grad_norm: 0.8874027539836351, iteration: 306321
loss: 1.0140641927719116,grad_norm: 0.9999999277952781, iteration: 306322
loss: 1.0200945138931274,grad_norm: 0.8901855334450769, iteration: 306323
loss: 1.0390483140945435,grad_norm: 0.8710984602431667, iteration: 306324
loss: 0.995232105255127,grad_norm: 0.7497017733854595, iteration: 306325
loss: 0.9987654685974121,grad_norm: 0.7868856910420289, iteration: 306326
loss: 0.9877827167510986,grad_norm: 0.9999991108689447, iteration: 306327
loss: 1.0146690607070923,grad_norm: 0.7681154076074701, iteration: 306328
loss: 0.9770131707191467,grad_norm: 0.836719665585822, iteration: 306329
loss: 1.074119210243225,grad_norm: 0.9999995282665991, iteration: 306330
loss: 1.0993574857711792,grad_norm: 0.9294347716107882, iteration: 306331
loss: 0.9866207838058472,grad_norm: 0.8993103783953261, iteration: 306332
loss: 1.0168145895004272,grad_norm: 0.9218824788965101, iteration: 306333
loss: 1.0131118297576904,grad_norm: 0.8661894591559814, iteration: 306334
loss: 0.9847502708435059,grad_norm: 0.9748299813535016, iteration: 306335
loss: 0.9966698884963989,grad_norm: 0.7732678348576649, iteration: 306336
loss: 1.0327808856964111,grad_norm: 0.9158459780559739, iteration: 306337
loss: 1.015199065208435,grad_norm: 0.7540207072177769, iteration: 306338
loss: 0.9945890307426453,grad_norm: 0.8130748072588133, iteration: 306339
loss: 0.9755542278289795,grad_norm: 0.8915469796016837, iteration: 306340
loss: 1.0863380432128906,grad_norm: 0.9999998676222864, iteration: 306341
loss: 1.0183582305908203,grad_norm: 0.6892140965388195, iteration: 306342
loss: 0.9775958061218262,grad_norm: 0.9435322911155093, iteration: 306343
loss: 0.9970500469207764,grad_norm: 0.8534501087178822, iteration: 306344
loss: 1.0041197538375854,grad_norm: 0.8105579249251078, iteration: 306345
loss: 0.9811664819717407,grad_norm: 0.8221676974067332, iteration: 306346
loss: 0.994514524936676,grad_norm: 0.9154805447952107, iteration: 306347
loss: 1.0293338298797607,grad_norm: 0.7378465388449497, iteration: 306348
loss: 1.0162819623947144,grad_norm: 0.999999053848595, iteration: 306349
loss: 0.9915875792503357,grad_norm: 0.9256717905676732, iteration: 306350
loss: 1.0422117710113525,grad_norm: 0.804321747758796, iteration: 306351
loss: 0.9914194941520691,grad_norm: 0.7311658080718567, iteration: 306352
loss: 1.0348950624465942,grad_norm: 0.8368091375628806, iteration: 306353
loss: 0.9961440563201904,grad_norm: 0.8103030541931302, iteration: 306354
loss: 0.9840738773345947,grad_norm: 0.8401054822243886, iteration: 306355
loss: 0.99144446849823,grad_norm: 0.7843083028557573, iteration: 306356
loss: 1.0181829929351807,grad_norm: 0.9999991890182123, iteration: 306357
loss: 1.0705829858779907,grad_norm: 0.9999993490132536, iteration: 306358
loss: 0.9918403625488281,grad_norm: 0.926968603598743, iteration: 306359
loss: 0.9537994861602783,grad_norm: 0.8622474933548792, iteration: 306360
loss: 1.0746709108352661,grad_norm: 0.9999999734990144, iteration: 306361
loss: 1.005967617034912,grad_norm: 0.8202438513674797, iteration: 306362
loss: 1.0065582990646362,grad_norm: 0.8078262392134821, iteration: 306363
loss: 1.0021004676818848,grad_norm: 0.9749894916804215, iteration: 306364
loss: 1.0099400281906128,grad_norm: 0.857193250641828, iteration: 306365
loss: 1.0039901733398438,grad_norm: 0.9999989544095614, iteration: 306366
loss: 0.9764477610588074,grad_norm: 0.9509810566782393, iteration: 306367
loss: 0.9626403450965881,grad_norm: 0.9623576918290537, iteration: 306368
loss: 1.0146275758743286,grad_norm: 0.9136254513948588, iteration: 306369
loss: 1.0384331941604614,grad_norm: 0.8913294771397492, iteration: 306370
loss: 0.9704710841178894,grad_norm: 0.7882633503038778, iteration: 306371
loss: 0.9672901034355164,grad_norm: 0.9658414733314122, iteration: 306372
loss: 0.9755417108535767,grad_norm: 0.8295300910278881, iteration: 306373
loss: 1.1052812337875366,grad_norm: 0.9999990421358725, iteration: 306374
loss: 1.033439040184021,grad_norm: 0.867811016081716, iteration: 306375
loss: 0.9742770791053772,grad_norm: 0.9999991852009479, iteration: 306376
loss: 1.0191397666931152,grad_norm: 0.8921217210507691, iteration: 306377
loss: 1.1532487869262695,grad_norm: 1.0000000693573738, iteration: 306378
loss: 1.1366122961044312,grad_norm: 0.9999996682153639, iteration: 306379
loss: 1.0264757871627808,grad_norm: 0.7277444533633844, iteration: 306380
loss: 1.018249750137329,grad_norm: 0.9577825699243335, iteration: 306381
loss: 0.9837594628334045,grad_norm: 0.7643021772403185, iteration: 306382
loss: 1.0064982175827026,grad_norm: 0.7886079473487573, iteration: 306383
loss: 0.9796145558357239,grad_norm: 0.8612841240998157, iteration: 306384
loss: 1.0167909860610962,grad_norm: 0.8888786517244364, iteration: 306385
loss: 1.0576444864273071,grad_norm: 0.9999999837270365, iteration: 306386
loss: 1.0198462009429932,grad_norm: 0.8470707844109661, iteration: 306387
loss: 1.0597407817840576,grad_norm: 0.9999998601041445, iteration: 306388
loss: 1.0949567556381226,grad_norm: 0.9999991808658404, iteration: 306389
loss: 1.0101169347763062,grad_norm: 0.7160042018674131, iteration: 306390
loss: 1.006076693534851,grad_norm: 0.9539191678376723, iteration: 306391
loss: 1.0079647302627563,grad_norm: 0.9999997619807794, iteration: 306392
loss: 1.049620509147644,grad_norm: 0.9999996732129388, iteration: 306393
loss: 1.0103378295898438,grad_norm: 0.9102598946740876, iteration: 306394
loss: 0.9672732949256897,grad_norm: 0.9037081399252358, iteration: 306395
loss: 1.0159509181976318,grad_norm: 0.8183488239352341, iteration: 306396
loss: 1.0164903402328491,grad_norm: 0.9999990148048076, iteration: 306397
loss: 0.9944731593132019,grad_norm: 0.81096918516349, iteration: 306398
loss: 1.0619603395462036,grad_norm: 0.8127968093819423, iteration: 306399
loss: 0.98048335313797,grad_norm: 0.9598271155753315, iteration: 306400
loss: 1.0242080688476562,grad_norm: 0.8765922998816879, iteration: 306401
loss: 1.0470600128173828,grad_norm: 0.9999990294458546, iteration: 306402
loss: 0.9890149831771851,grad_norm: 0.9733043678992381, iteration: 306403
loss: 1.0209225416183472,grad_norm: 0.9999991671460904, iteration: 306404
loss: 0.997828483581543,grad_norm: 0.8604080905162772, iteration: 306405
loss: 1.0280874967575073,grad_norm: 0.893263153374982, iteration: 306406
loss: 1.017015814781189,grad_norm: 0.7582009920056404, iteration: 306407
loss: 1.009605884552002,grad_norm: 0.6887794939226823, iteration: 306408
loss: 1.0214773416519165,grad_norm: 0.8173436601987079, iteration: 306409
loss: 1.0163064002990723,grad_norm: 0.9999999902802631, iteration: 306410
loss: 0.977976381778717,grad_norm: 0.9999995327275873, iteration: 306411
loss: 0.9688988327980042,grad_norm: 0.7159695674504842, iteration: 306412
loss: 1.038183569908142,grad_norm: 0.9999992799748412, iteration: 306413
loss: 0.9672134518623352,grad_norm: 0.9999991657078197, iteration: 306414
loss: 0.9935964941978455,grad_norm: 0.8561929446582708, iteration: 306415
loss: 1.0137523412704468,grad_norm: 0.7630209461234414, iteration: 306416
loss: 1.0195385217666626,grad_norm: 0.8344422743626054, iteration: 306417
loss: 1.1786283254623413,grad_norm: 0.9999996497214045, iteration: 306418
loss: 0.9961909651756287,grad_norm: 0.9999991706153929, iteration: 306419
loss: 0.9948389530181885,grad_norm: 0.895011437256995, iteration: 306420
loss: 1.1292556524276733,grad_norm: 0.9999998334436493, iteration: 306421
loss: 1.0209405422210693,grad_norm: 0.8711172929778551, iteration: 306422
loss: 1.0424822568893433,grad_norm: 0.9999996888005668, iteration: 306423
loss: 1.0184531211853027,grad_norm: 0.9999990826716485, iteration: 306424
loss: 1.163534164428711,grad_norm: 0.9999994780465711, iteration: 306425
loss: 1.0247734785079956,grad_norm: 0.9999996497586172, iteration: 306426
loss: 1.023032546043396,grad_norm: 0.9653129588547096, iteration: 306427
loss: 0.9716917276382446,grad_norm: 0.8668864478675313, iteration: 306428
loss: 0.9701009392738342,grad_norm: 0.8203668493116786, iteration: 306429
loss: 1.046644687652588,grad_norm: 0.9999989714608113, iteration: 306430
loss: 1.0177080631256104,grad_norm: 0.8349065140712727, iteration: 306431
loss: 1.0396208763122559,grad_norm: 0.9238528576132723, iteration: 306432
loss: 0.9882081747055054,grad_norm: 0.8800224615529826, iteration: 306433
loss: 0.9806932806968689,grad_norm: 0.7304362815835603, iteration: 306434
loss: 0.9900192618370056,grad_norm: 0.9999990329459025, iteration: 306435
loss: 1.0068554878234863,grad_norm: 0.8746488076853497, iteration: 306436
loss: 1.0129774808883667,grad_norm: 0.7695674925689365, iteration: 306437
loss: 1.0464560985565186,grad_norm: 0.9714383811077247, iteration: 306438
loss: 0.988219141960144,grad_norm: 0.6997944052356755, iteration: 306439
loss: 0.9431970119476318,grad_norm: 0.8664999479930063, iteration: 306440
loss: 0.982408881187439,grad_norm: 0.7924417380146314, iteration: 306441
loss: 1.0362226963043213,grad_norm: 0.9999991003206546, iteration: 306442
loss: 0.9660540223121643,grad_norm: 0.934413865069127, iteration: 306443
loss: 1.0509225130081177,grad_norm: 0.9999991042978978, iteration: 306444
loss: 0.9833230376243591,grad_norm: 0.8555830990648738, iteration: 306445
loss: 1.0411638021469116,grad_norm: 0.7559188632320007, iteration: 306446
loss: 0.9904829859733582,grad_norm: 0.7495941923677648, iteration: 306447
loss: 1.0883128643035889,grad_norm: 0.898914805823531, iteration: 306448
loss: 1.014662504196167,grad_norm: 0.7783509426657506, iteration: 306449
loss: 0.999596357345581,grad_norm: 0.7841074357283169, iteration: 306450
loss: 1.0422433614730835,grad_norm: 1.0000000576913988, iteration: 306451
loss: 1.0048764944076538,grad_norm: 0.7709292228502932, iteration: 306452
loss: 0.9930018186569214,grad_norm: 0.7433924852601922, iteration: 306453
loss: 0.987623929977417,grad_norm: 0.6812774394201224, iteration: 306454
loss: 1.0081040859222412,grad_norm: 0.9999995573530897, iteration: 306455
loss: 0.9800238609313965,grad_norm: 0.8455181266516582, iteration: 306456
loss: 1.0440195798873901,grad_norm: 0.8808392358155154, iteration: 306457
loss: 0.9893796443939209,grad_norm: 0.9999991159267763, iteration: 306458
loss: 0.9921197295188904,grad_norm: 0.869630061159632, iteration: 306459
loss: 1.0316184759140015,grad_norm: 0.7053485715581612, iteration: 306460
loss: 0.9603298306465149,grad_norm: 0.7680019189112823, iteration: 306461
loss: 1.0077500343322754,grad_norm: 0.9999992071590701, iteration: 306462
loss: 0.9686262011528015,grad_norm: 0.9677603764381513, iteration: 306463
loss: 1.037235140800476,grad_norm: 0.9069425145587485, iteration: 306464
loss: 1.0849450826644897,grad_norm: 0.9999992327730487, iteration: 306465
loss: 0.9961239099502563,grad_norm: 0.9610306480438708, iteration: 306466
loss: 1.0672502517700195,grad_norm: 0.9999998479068971, iteration: 306467
loss: 1.0247337818145752,grad_norm: 0.9999993005336941, iteration: 306468
loss: 0.9876389503479004,grad_norm: 0.9050671556701028, iteration: 306469
loss: 1.0285651683807373,grad_norm: 0.9999997081378806, iteration: 306470
loss: 0.9716023802757263,grad_norm: 0.8177014578037345, iteration: 306471
loss: 0.9848163723945618,grad_norm: 0.7558778922724634, iteration: 306472
loss: 1.0179442167282104,grad_norm: 0.8474533085019971, iteration: 306473
loss: 1.044327974319458,grad_norm: 0.8855630405819318, iteration: 306474
loss: 1.1771965026855469,grad_norm: 1.0000000874213169, iteration: 306475
loss: 1.0398614406585693,grad_norm: 0.9999992274532596, iteration: 306476
loss: 0.9780975580215454,grad_norm: 0.8714150238490063, iteration: 306477
loss: 1.0849637985229492,grad_norm: 0.9999995703361565, iteration: 306478
loss: 1.0840626955032349,grad_norm: 0.8685917659876126, iteration: 306479
loss: 1.011720895767212,grad_norm: 0.9763264873685029, iteration: 306480
loss: 1.0083599090576172,grad_norm: 0.8367974319838292, iteration: 306481
loss: 1.0008567571640015,grad_norm: 0.7703966587861419, iteration: 306482
loss: 1.0607179403305054,grad_norm: 0.8374372250460858, iteration: 306483
loss: 1.145119547843933,grad_norm: 0.9999991539080333, iteration: 306484
loss: 0.9774523377418518,grad_norm: 0.7861092099832551, iteration: 306485
loss: 1.038684606552124,grad_norm: 0.9999991662829856, iteration: 306486
loss: 1.0320494174957275,grad_norm: 0.7717834846956219, iteration: 306487
loss: 1.0423243045806885,grad_norm: 0.9999996069565881, iteration: 306488
loss: 1.0770643949508667,grad_norm: 0.9999991986188871, iteration: 306489
loss: 1.0721359252929688,grad_norm: 0.8434032283671437, iteration: 306490
loss: 1.0037999153137207,grad_norm: 0.9934163442913925, iteration: 306491
loss: 1.0341519117355347,grad_norm: 0.9999990683239893, iteration: 306492
loss: 1.0052775144577026,grad_norm: 0.999999375603315, iteration: 306493
loss: 1.0034191608428955,grad_norm: 0.8036206173499838, iteration: 306494
loss: 0.9653588533401489,grad_norm: 0.999999127853102, iteration: 306495
loss: 1.0638856887817383,grad_norm: 0.9999992827973612, iteration: 306496
loss: 1.0654659271240234,grad_norm: 0.9598399900735828, iteration: 306497
loss: 0.9818239808082581,grad_norm: 0.9999991554693826, iteration: 306498
loss: 1.0465883016586304,grad_norm: 0.8225523816758524, iteration: 306499
loss: 1.128088116645813,grad_norm: 0.9999998779221624, iteration: 306500
loss: 0.9714303612709045,grad_norm: 0.7702662651106732, iteration: 306501
loss: 0.954552948474884,grad_norm: 0.8255175142039816, iteration: 306502
loss: 0.9845530986785889,grad_norm: 0.9999996547690251, iteration: 306503
loss: 1.0189529657363892,grad_norm: 0.88524497940627, iteration: 306504
loss: 1.0034894943237305,grad_norm: 0.8064532712897989, iteration: 306505
loss: 1.032473087310791,grad_norm: 0.8137960146595477, iteration: 306506
loss: 1.0053058862686157,grad_norm: 0.9369690947940446, iteration: 306507
loss: 0.9814589023590088,grad_norm: 0.9187893629980224, iteration: 306508
loss: 1.0468478202819824,grad_norm: 0.866413112179827, iteration: 306509
loss: 0.9896178841590881,grad_norm: 0.8861795797246993, iteration: 306510
loss: 1.0432167053222656,grad_norm: 0.8755766155926755, iteration: 306511
loss: 0.9775235652923584,grad_norm: 0.8711602728761962, iteration: 306512
loss: 1.0192559957504272,grad_norm: 0.9600087133853717, iteration: 306513
loss: 0.9909297823905945,grad_norm: 0.8248190155263135, iteration: 306514
loss: 1.0166374444961548,grad_norm: 0.7337568807601694, iteration: 306515
loss: 1.0853865146636963,grad_norm: 0.9999992529403172, iteration: 306516
loss: 1.0149387121200562,grad_norm: 0.9213298986674207, iteration: 306517
loss: 1.0546237230300903,grad_norm: 0.8559677270506265, iteration: 306518
loss: 1.0347093343734741,grad_norm: 0.9866580911943685, iteration: 306519
loss: 1.0331828594207764,grad_norm: 0.9999990777468125, iteration: 306520
loss: 0.9723144173622131,grad_norm: 0.9180313860893317, iteration: 306521
loss: 1.0906680822372437,grad_norm: 0.9241933365868528, iteration: 306522
loss: 1.0370697975158691,grad_norm: 0.9999990192166204, iteration: 306523
loss: 1.0260121822357178,grad_norm: 0.9689669036397655, iteration: 306524
loss: 0.9871857762336731,grad_norm: 0.999999129256673, iteration: 306525
loss: 1.0054858922958374,grad_norm: 0.8517836641787095, iteration: 306526
loss: 0.9903512001037598,grad_norm: 0.7315956849268948, iteration: 306527
loss: 0.9689618349075317,grad_norm: 0.870023401761728, iteration: 306528
loss: 1.0140037536621094,grad_norm: 0.9999990974335451, iteration: 306529
loss: 1.0396002531051636,grad_norm: 0.8494406779546869, iteration: 306530
loss: 0.9667723178863525,grad_norm: 0.8407613302478841, iteration: 306531
loss: 1.0176892280578613,grad_norm: 0.8923273242356861, iteration: 306532
loss: 1.2036011219024658,grad_norm: 0.9999995865366967, iteration: 306533
loss: 0.9607403874397278,grad_norm: 0.789577192584897, iteration: 306534
loss: 1.023765206336975,grad_norm: 0.9888716570821873, iteration: 306535
loss: 1.0430203676223755,grad_norm: 0.999999586476369, iteration: 306536
loss: 0.9998194575309753,grad_norm: 0.840388462117956, iteration: 306537
loss: 1.0565251111984253,grad_norm: 0.9984354783334293, iteration: 306538
loss: 0.9771044254302979,grad_norm: 0.9504705726862858, iteration: 306539
loss: 0.9994924664497375,grad_norm: 0.7726251828902722, iteration: 306540
loss: 1.0556907653808594,grad_norm: 0.8164746405172219, iteration: 306541
loss: 1.0359102487564087,grad_norm: 0.8633681698907936, iteration: 306542
loss: 1.0272231101989746,grad_norm: 0.9733633707485254, iteration: 306543
loss: 1.0920064449310303,grad_norm: 0.9999997549240042, iteration: 306544
loss: 1.0561631917953491,grad_norm: 0.9999991362834129, iteration: 306545
loss: 1.0163344144821167,grad_norm: 0.9999999550982148, iteration: 306546
loss: 1.0276553630828857,grad_norm: 0.8661262688323303, iteration: 306547
loss: 1.000807285308838,grad_norm: 0.8235551001333552, iteration: 306548
loss: 0.9762468338012695,grad_norm: 0.8097607178389723, iteration: 306549
loss: 1.0350818634033203,grad_norm: 0.954126495863959, iteration: 306550
loss: 1.0093193054199219,grad_norm: 0.999999537534142, iteration: 306551
loss: 1.023611307144165,grad_norm: 0.8967302075575084, iteration: 306552
loss: 1.000697374343872,grad_norm: 0.7648092288139156, iteration: 306553
loss: 0.9729712009429932,grad_norm: 0.8122579268681048, iteration: 306554
loss: 1.02873957157135,grad_norm: 0.9509054954770839, iteration: 306555
loss: 0.9849559664726257,grad_norm: 0.8629463888737618, iteration: 306556
loss: 0.968239426612854,grad_norm: 0.8348458403625528, iteration: 306557
loss: 0.9973480701446533,grad_norm: 0.9999989695640209, iteration: 306558
loss: 1.000299334526062,grad_norm: 0.8346710976128925, iteration: 306559
loss: 1.0200499296188354,grad_norm: 0.779303916269253, iteration: 306560
loss: 1.001395344734192,grad_norm: 0.8589517463580881, iteration: 306561
loss: 1.0048032999038696,grad_norm: 0.8794772673216902, iteration: 306562
loss: 0.9948063492774963,grad_norm: 0.9276714755756521, iteration: 306563
loss: 1.0548484325408936,grad_norm: 0.6726768683174857, iteration: 306564
loss: 1.0241491794586182,grad_norm: 0.9999995208044036, iteration: 306565
loss: 0.9871408939361572,grad_norm: 0.8082253690077807, iteration: 306566
loss: 0.9993401765823364,grad_norm: 0.9066132938442979, iteration: 306567
loss: 1.0032273530960083,grad_norm: 0.7857816304403104, iteration: 306568
loss: 0.9636258482933044,grad_norm: 0.8442947622808337, iteration: 306569
loss: 1.0221359729766846,grad_norm: 0.9301387000362389, iteration: 306570
loss: 1.0369573831558228,grad_norm: 0.7953134611491361, iteration: 306571
loss: 1.0150829553604126,grad_norm: 0.9567071078761473, iteration: 306572
loss: 0.9900592565536499,grad_norm: 0.9882348618303664, iteration: 306573
loss: 1.0136704444885254,grad_norm: 0.9415599356712662, iteration: 306574
loss: 1.022849202156067,grad_norm: 0.7844896531145205, iteration: 306575
loss: 1.0065510272979736,grad_norm: 0.9999998795813941, iteration: 306576
loss: 0.9861035943031311,grad_norm: 0.9511697799198782, iteration: 306577
loss: 1.0148781538009644,grad_norm: 0.9999995697386731, iteration: 306578
loss: 1.098090410232544,grad_norm: 0.9999991624677744, iteration: 306579
loss: 0.9850084185600281,grad_norm: 0.9103694006818956, iteration: 306580
loss: 1.01936674118042,grad_norm: 0.725225296094117, iteration: 306581
loss: 1.0726678371429443,grad_norm: 0.8857820975222166, iteration: 306582
loss: 1.0462836027145386,grad_norm: 0.9855016881036238, iteration: 306583
loss: 1.0434906482696533,grad_norm: 0.9999990945700128, iteration: 306584
loss: 1.0120229721069336,grad_norm: 0.9999992457268594, iteration: 306585
loss: 1.0171318054199219,grad_norm: 0.9999991815721148, iteration: 306586
loss: 1.0372793674468994,grad_norm: 0.999999943357252, iteration: 306587
loss: 1.031782865524292,grad_norm: 0.7949943693535201, iteration: 306588
loss: 0.9985817670822144,grad_norm: 0.9999992516464141, iteration: 306589
loss: 0.9794645309448242,grad_norm: 0.7788161191069735, iteration: 306590
loss: 1.0180226564407349,grad_norm: 0.8248524653892392, iteration: 306591
loss: 0.9759580492973328,grad_norm: 0.9999991241485259, iteration: 306592
loss: 1.031939148902893,grad_norm: 0.9102965921139403, iteration: 306593
loss: 1.0572738647460938,grad_norm: 0.9999990325679117, iteration: 306594
loss: 1.0584776401519775,grad_norm: 0.9999998991697429, iteration: 306595
loss: 0.9962745904922485,grad_norm: 0.9999999387083403, iteration: 306596
loss: 1.0069650411605835,grad_norm: 0.999999804405978, iteration: 306597
loss: 1.0342395305633545,grad_norm: 0.9999994477356862, iteration: 306598
loss: 0.9697543978691101,grad_norm: 0.8570464947674372, iteration: 306599
loss: 1.1170170307159424,grad_norm: 0.8506991527382377, iteration: 306600
loss: 0.9939237833023071,grad_norm: 0.7939660476181724, iteration: 306601
loss: 0.9620010852813721,grad_norm: 0.8939535519101183, iteration: 306602
loss: 0.9926024675369263,grad_norm: 0.836501121341896, iteration: 306603
loss: 1.0137850046157837,grad_norm: 0.899878498129223, iteration: 306604
loss: 1.011863350868225,grad_norm: 0.8969411314374875, iteration: 306605
loss: 1.021370768547058,grad_norm: 0.9999991265927749, iteration: 306606
loss: 1.0162855386734009,grad_norm: 0.7777207189031043, iteration: 306607
loss: 0.9725388884544373,grad_norm: 0.9999991483196099, iteration: 306608
loss: 0.9763551950454712,grad_norm: 0.8399458424901081, iteration: 306609
loss: 1.0302329063415527,grad_norm: 0.9999996527406394, iteration: 306610
loss: 1.0175296068191528,grad_norm: 0.9448889966375601, iteration: 306611
loss: 0.9903150200843811,grad_norm: 0.986527986578722, iteration: 306612
loss: 1.0476939678192139,grad_norm: 0.9999991961804303, iteration: 306613
loss: 1.0139825344085693,grad_norm: 0.851288995283125, iteration: 306614
loss: 0.9893267154693604,grad_norm: 0.7908483939118063, iteration: 306615
loss: 1.0041100978851318,grad_norm: 0.9104330671744705, iteration: 306616
loss: 1.0266735553741455,grad_norm: 0.958803705799194, iteration: 306617
loss: 0.9659073352813721,grad_norm: 0.8199150763482695, iteration: 306618
loss: 1.021918535232544,grad_norm: 0.8510340756421276, iteration: 306619
loss: 1.0380773544311523,grad_norm: 0.8833318615126938, iteration: 306620
loss: 1.0142982006072998,grad_norm: 0.7290908740662848, iteration: 306621
loss: 1.0048867464065552,grad_norm: 0.9999990261431277, iteration: 306622
loss: 0.9874712228775024,grad_norm: 0.7488377154291493, iteration: 306623
loss: 1.0313429832458496,grad_norm: 0.9999990385470475, iteration: 306624
loss: 1.0515575408935547,grad_norm: 0.9999989144338239, iteration: 306625
loss: 0.9911932349205017,grad_norm: 0.9997531638134919, iteration: 306626
loss: 1.0395863056182861,grad_norm: 0.999999293744844, iteration: 306627
loss: 0.9888805150985718,grad_norm: 0.7827717673604488, iteration: 306628
loss: 0.9989750981330872,grad_norm: 0.9622964384472344, iteration: 306629
loss: 1.0660009384155273,grad_norm: 0.9999991848715187, iteration: 306630
loss: 1.0106524229049683,grad_norm: 0.8707912720912574, iteration: 306631
loss: 0.9690766930580139,grad_norm: 0.9999990785065342, iteration: 306632
loss: 0.9887458086013794,grad_norm: 0.9901312161451911, iteration: 306633
loss: 1.0202419757843018,grad_norm: 0.7155464352998007, iteration: 306634
loss: 1.0730113983154297,grad_norm: 0.9999996939638949, iteration: 306635
loss: 1.005778431892395,grad_norm: 0.9124243564950439, iteration: 306636
loss: 0.9849339723587036,grad_norm: 0.9999990019240931, iteration: 306637
loss: 1.0334196090698242,grad_norm: 0.9999990956750476, iteration: 306638
loss: 0.9818812608718872,grad_norm: 0.7883850761005793, iteration: 306639
loss: 0.9906085133552551,grad_norm: 0.981269453353248, iteration: 306640
loss: 0.9651235938072205,grad_norm: 0.9897861986262472, iteration: 306641
loss: 1.034753680229187,grad_norm: 0.9194228114688805, iteration: 306642
loss: 0.9853838086128235,grad_norm: 0.9999995017903457, iteration: 306643
loss: 0.995507538318634,grad_norm: 0.8581667899288025, iteration: 306644
loss: 0.9710495471954346,grad_norm: 0.7596145899569009, iteration: 306645
loss: 0.9695178270339966,grad_norm: 0.826723894771503, iteration: 306646
loss: 0.9946423768997192,grad_norm: 0.9999991796421275, iteration: 306647
loss: 0.9670593738555908,grad_norm: 0.9919458399261004, iteration: 306648
loss: 1.0067951679229736,grad_norm: 0.9999995460164576, iteration: 306649
loss: 0.9832450747489929,grad_norm: 0.8654317815782796, iteration: 306650
loss: 1.012762188911438,grad_norm: 0.7195338731436025, iteration: 306651
loss: 1.1368865966796875,grad_norm: 0.9999994753175508, iteration: 306652
loss: 1.018731951713562,grad_norm: 0.9896593027829071, iteration: 306653
loss: 0.9893457889556885,grad_norm: 0.8907255667359613, iteration: 306654
loss: 1.2128653526306152,grad_norm: 0.9999997672852186, iteration: 306655
loss: 1.0171020030975342,grad_norm: 0.9999997190818617, iteration: 306656
loss: 0.9666311144828796,grad_norm: 0.8307059187304623, iteration: 306657
loss: 0.9965865015983582,grad_norm: 0.7154520330841144, iteration: 306658
loss: 0.9910959601402283,grad_norm: 0.8414385671164154, iteration: 306659
loss: 1.0253843069076538,grad_norm: 0.8437327444334283, iteration: 306660
loss: 0.9715354442596436,grad_norm: 0.8031445432891775, iteration: 306661
loss: 1.038497805595398,grad_norm: 0.8107903964074905, iteration: 306662
loss: 1.0503813028335571,grad_norm: 0.9000310261557996, iteration: 306663
loss: 0.99849933385849,grad_norm: 0.9200896917972787, iteration: 306664
loss: 1.0955618619918823,grad_norm: 0.999999052585379, iteration: 306665
loss: 1.0230162143707275,grad_norm: 0.9302613140885206, iteration: 306666
loss: 1.020880103111267,grad_norm: 0.8400879122445294, iteration: 306667
loss: 1.0208942890167236,grad_norm: 0.9923358527435928, iteration: 306668
loss: 0.9534624218940735,grad_norm: 0.9433785078111925, iteration: 306669
loss: 0.9635893702507019,grad_norm: 0.8686289614451462, iteration: 306670
loss: 0.989951491355896,grad_norm: 0.8974339457257219, iteration: 306671
loss: 1.00486421585083,grad_norm: 0.791012706745731, iteration: 306672
loss: 0.9831296801567078,grad_norm: 0.7771276650253472, iteration: 306673
loss: 1.0251388549804688,grad_norm: 0.9274879158442928, iteration: 306674
loss: 1.0760715007781982,grad_norm: 0.8231531842122981, iteration: 306675
loss: 0.9923324584960938,grad_norm: 0.8667311744308012, iteration: 306676
loss: 0.9978724122047424,grad_norm: 0.9999997940575264, iteration: 306677
loss: 1.0108892917633057,grad_norm: 0.8475906982182516, iteration: 306678
loss: 0.9922981858253479,grad_norm: 0.9999995784978825, iteration: 306679
loss: 0.9904341101646423,grad_norm: 0.7832527493991556, iteration: 306680
loss: 0.9993253350257874,grad_norm: 0.9999990620751931, iteration: 306681
loss: 0.9955176115036011,grad_norm: 0.9954052902158566, iteration: 306682
loss: 0.9868789911270142,grad_norm: 0.7859112336143682, iteration: 306683
loss: 1.1008630990982056,grad_norm: 0.9999995947768368, iteration: 306684
loss: 0.9949823617935181,grad_norm: 0.7627809186720371, iteration: 306685
loss: 1.0463249683380127,grad_norm: 0.9999995787078308, iteration: 306686
loss: 1.0073390007019043,grad_norm: 0.7436768654753191, iteration: 306687
loss: 1.0237400531768799,grad_norm: 0.9999997105592761, iteration: 306688
loss: 0.9892150163650513,grad_norm: 0.8784136996461597, iteration: 306689
loss: 1.0731441974639893,grad_norm: 0.9999997929230889, iteration: 306690
loss: 1.022955060005188,grad_norm: 0.9999995033713024, iteration: 306691
loss: 1.1663283109664917,grad_norm: 0.999999355918156, iteration: 306692
loss: 0.9742291569709778,grad_norm: 0.7781149832544464, iteration: 306693
loss: 1.043640375137329,grad_norm: 0.999999367619425, iteration: 306694
loss: 1.01372492313385,grad_norm: 0.8500359785858889, iteration: 306695
loss: 0.998033881187439,grad_norm: 0.8877323318415312, iteration: 306696
loss: 0.9931299090385437,grad_norm: 0.869215620628802, iteration: 306697
loss: 1.0105416774749756,grad_norm: 0.9999992571846483, iteration: 306698
loss: 0.9888822436332703,grad_norm: 0.9327769824210403, iteration: 306699
loss: 0.9741339087486267,grad_norm: 0.9233505016126113, iteration: 306700
loss: 1.017148733139038,grad_norm: 0.8421868645122886, iteration: 306701
loss: 1.0566322803497314,grad_norm: 0.7997578772391152, iteration: 306702
loss: 0.9953577518463135,grad_norm: 0.8919445575453061, iteration: 306703
loss: 1.0261274576187134,grad_norm: 0.7947197604932169, iteration: 306704
loss: 0.9681233167648315,grad_norm: 0.999999937648099, iteration: 306705
loss: 1.1700499057769775,grad_norm: 0.999999744070259, iteration: 306706
loss: 0.9888229370117188,grad_norm: 0.902908108416889, iteration: 306707
loss: 1.0126715898513794,grad_norm: 0.9999999363812362, iteration: 306708
loss: 1.0222933292388916,grad_norm: 0.8130592735687857, iteration: 306709
loss: 0.9678992629051208,grad_norm: 0.9158588007125091, iteration: 306710
loss: 1.0254325866699219,grad_norm: 0.9999995583059899, iteration: 306711
loss: 0.9462310075759888,grad_norm: 0.922758211294036, iteration: 306712
loss: 0.9857139587402344,grad_norm: 0.7428660132987702, iteration: 306713
loss: 1.1049023866653442,grad_norm: 0.9999991326097252, iteration: 306714
loss: 0.9965667724609375,grad_norm: 0.9326124607405968, iteration: 306715
loss: 1.0447267293930054,grad_norm: 0.9999991129650276, iteration: 306716
loss: 1.0117719173431396,grad_norm: 0.6816483417570297, iteration: 306717
loss: 0.9912797808647156,grad_norm: 0.8525581043078513, iteration: 306718
loss: 1.0233359336853027,grad_norm: 0.8431011692399909, iteration: 306719
loss: 1.0150867700576782,grad_norm: 0.8623389034003048, iteration: 306720
loss: 0.9934911131858826,grad_norm: 0.8101338132197654, iteration: 306721
loss: 1.021237850189209,grad_norm: 0.7200206868905783, iteration: 306722
loss: 0.9621670842170715,grad_norm: 0.9192625175808704, iteration: 306723
loss: 1.0040744543075562,grad_norm: 0.7293856350066842, iteration: 306724
loss: 1.000967264175415,grad_norm: 0.8748896287986466, iteration: 306725
loss: 0.999190092086792,grad_norm: 0.7912115294285524, iteration: 306726
loss: 1.0038832426071167,grad_norm: 0.8367049157267746, iteration: 306727
loss: 0.9843171238899231,grad_norm: 0.9174115324821169, iteration: 306728
loss: 1.0415674448013306,grad_norm: 0.9999991945907335, iteration: 306729
loss: 1.0320607423782349,grad_norm: 0.9999989913345582, iteration: 306730
loss: 1.0266647338867188,grad_norm: 0.9999999005927122, iteration: 306731
loss: 1.0304185152053833,grad_norm: 0.7109888236694344, iteration: 306732
loss: 0.9883090853691101,grad_norm: 0.8288261881075737, iteration: 306733
loss: 1.0320364236831665,grad_norm: 0.9999992664860531, iteration: 306734
loss: 1.029596209526062,grad_norm: 0.781442044757941, iteration: 306735
loss: 1.022538185119629,grad_norm: 0.9999991391597496, iteration: 306736
loss: 0.9896949529647827,grad_norm: 0.9121776845852685, iteration: 306737
loss: 1.0082290172576904,grad_norm: 0.8227587359220788, iteration: 306738
loss: 0.9878467321395874,grad_norm: 0.9155553306722731, iteration: 306739
loss: 1.0185037851333618,grad_norm: 0.8668539761137274, iteration: 306740
loss: 1.0093482732772827,grad_norm: 0.6372040507721992, iteration: 306741
loss: 1.0159696340560913,grad_norm: 0.9220385236807245, iteration: 306742
loss: 0.9949254393577576,grad_norm: 0.999999613417039, iteration: 306743
loss: 0.9847666025161743,grad_norm: 0.8533755341436933, iteration: 306744
loss: 1.060447096824646,grad_norm: 0.8475024967054436, iteration: 306745
loss: 1.0017919540405273,grad_norm: 0.9999994806743092, iteration: 306746
loss: 1.0727696418762207,grad_norm: 0.8705659202521857, iteration: 306747
loss: 1.038344383239746,grad_norm: 0.9999996940483532, iteration: 306748
loss: 0.9927016496658325,grad_norm: 0.8703120662134772, iteration: 306749
loss: 1.0008710622787476,grad_norm: 0.9999995600093752, iteration: 306750
loss: 1.1025477647781372,grad_norm: 0.9377430385960466, iteration: 306751
loss: 1.0561933517456055,grad_norm: 0.9746910004320344, iteration: 306752
loss: 0.9758633375167847,grad_norm: 0.9199024284279631, iteration: 306753
loss: 1.0271329879760742,grad_norm: 0.8037630090493654, iteration: 306754
loss: 0.9627718329429626,grad_norm: 0.846650749560651, iteration: 306755
loss: 1.0524848699569702,grad_norm: 0.961363956073743, iteration: 306756
loss: 1.0178771018981934,grad_norm: 0.8100094058070832, iteration: 306757
loss: 1.0130691528320312,grad_norm: 0.9999997367654216, iteration: 306758
loss: 0.978803277015686,grad_norm: 0.9999998473687433, iteration: 306759
loss: 1.0183838605880737,grad_norm: 0.8034025195390079, iteration: 306760
loss: 1.0442944765090942,grad_norm: 0.9999998731818495, iteration: 306761
loss: 0.9821919202804565,grad_norm: 0.8455192204430243, iteration: 306762
loss: 0.9904195666313171,grad_norm: 0.9999993906540534, iteration: 306763
loss: 1.0080082416534424,grad_norm: 0.7993952171667461, iteration: 306764
loss: 0.9804765582084656,grad_norm: 0.8357676473783902, iteration: 306765
loss: 0.9695061445236206,grad_norm: 0.7490698226251021, iteration: 306766
loss: 0.9930025935173035,grad_norm: 0.9999992110724597, iteration: 306767
loss: 0.9942083954811096,grad_norm: 0.7946746913000593, iteration: 306768
loss: 0.9901017546653748,grad_norm: 0.8816340200213373, iteration: 306769
loss: 0.9762760996818542,grad_norm: 0.9027867563028183, iteration: 306770
loss: 1.0513365268707275,grad_norm: 0.9999996670749263, iteration: 306771
loss: 1.0451723337173462,grad_norm: 0.8934792810522542, iteration: 306772
loss: 1.0738223791122437,grad_norm: 0.9999991128095899, iteration: 306773
loss: 1.0266567468643188,grad_norm: 0.9882595258410739, iteration: 306774
loss: 1.000534176826477,grad_norm: 0.9999994956668407, iteration: 306775
loss: 0.9865660667419434,grad_norm: 0.7755468131232278, iteration: 306776
loss: 0.9944890737533569,grad_norm: 0.9398612863227763, iteration: 306777
loss: 1.0078387260437012,grad_norm: 0.9999991193051324, iteration: 306778
loss: 1.0267088413238525,grad_norm: 0.980069787858536, iteration: 306779
loss: 1.034205675125122,grad_norm: 0.8030363975260798, iteration: 306780
loss: 1.004557490348816,grad_norm: 0.9534265678099105, iteration: 306781
loss: 0.9841052293777466,grad_norm: 0.7210470805396915, iteration: 306782
loss: 1.0245682001113892,grad_norm: 0.9862148305341223, iteration: 306783
loss: 1.006287693977356,grad_norm: 0.9999993538919992, iteration: 306784
loss: 0.9857521653175354,grad_norm: 0.799787447114894, iteration: 306785
loss: 0.9820424318313599,grad_norm: 0.7778685543982309, iteration: 306786
loss: 1.0131806135177612,grad_norm: 0.8483832071873649, iteration: 306787
loss: 1.0311949253082275,grad_norm: 0.999999821599807, iteration: 306788
loss: 1.0270074605941772,grad_norm: 0.9158724440164352, iteration: 306789
loss: 0.9920781850814819,grad_norm: 0.8930851855012992, iteration: 306790
loss: 0.9838669300079346,grad_norm: 0.9584747331703265, iteration: 306791
loss: 0.9876258373260498,grad_norm: 0.9518896499418767, iteration: 306792
loss: 1.097306251525879,grad_norm: 0.7727784989530242, iteration: 306793
loss: 1.0789694786071777,grad_norm: 0.9999994808492821, iteration: 306794
loss: 1.0171197652816772,grad_norm: 0.9999992937044482, iteration: 306795
loss: 1.021338701248169,grad_norm: 0.8250851187575298, iteration: 306796
loss: 1.0176821947097778,grad_norm: 0.9999990434595364, iteration: 306797
loss: 1.0370250940322876,grad_norm: 0.9970450895727098, iteration: 306798
loss: 0.9755915999412537,grad_norm: 0.7387400398061986, iteration: 306799
loss: 1.1295117139816284,grad_norm: 0.9999993702986677, iteration: 306800
loss: 1.0642869472503662,grad_norm: 0.999999447701699, iteration: 306801
loss: 1.009416937828064,grad_norm: 0.9013704869593538, iteration: 306802
loss: 1.015207290649414,grad_norm: 0.8397468857540888, iteration: 306803
loss: 0.952928364276886,grad_norm: 0.8607787093363324, iteration: 306804
loss: 1.0282663106918335,grad_norm: 0.9999991244664186, iteration: 306805
loss: 1.0189634561538696,grad_norm: 0.8688358020594673, iteration: 306806
loss: 0.9735478758811951,grad_norm: 0.9779968107449237, iteration: 306807
loss: 0.9775267839431763,grad_norm: 0.9999991899003969, iteration: 306808
loss: 0.9955554604530334,grad_norm: 0.8927172271927714, iteration: 306809
loss: 1.0203300714492798,grad_norm: 0.9257674889180951, iteration: 306810
loss: 1.003008484840393,grad_norm: 0.999999400229169, iteration: 306811
loss: 1.0112981796264648,grad_norm: 0.9999990277782972, iteration: 306812
loss: 0.9608417749404907,grad_norm: 0.8081189198351978, iteration: 306813
loss: 0.9782959818840027,grad_norm: 0.9806956903056595, iteration: 306814
loss: 0.9616418480873108,grad_norm: 0.8796304507771863, iteration: 306815
loss: 1.023687481880188,grad_norm: 0.9131089023894364, iteration: 306816
loss: 0.9544150233268738,grad_norm: 0.7738516988689776, iteration: 306817
loss: 0.9868097901344299,grad_norm: 0.8669014796380928, iteration: 306818
loss: 0.999988853931427,grad_norm: 0.758243722720776, iteration: 306819
loss: 1.0196657180786133,grad_norm: 0.8824231109784156, iteration: 306820
loss: 1.0180622339248657,grad_norm: 0.9259489093041624, iteration: 306821
loss: 0.9769616723060608,grad_norm: 0.9999996421270851, iteration: 306822
loss: 1.0272502899169922,grad_norm: 0.8559279710103808, iteration: 306823
loss: 1.003753423690796,grad_norm: 0.8825212229224682, iteration: 306824
loss: 0.9676871299743652,grad_norm: 0.889920876753608, iteration: 306825
loss: 1.0281579494476318,grad_norm: 0.9999991645739356, iteration: 306826
loss: 1.0795143842697144,grad_norm: 0.9999996271293965, iteration: 306827
loss: 1.0797038078308105,grad_norm: 0.9999993424315408, iteration: 306828
loss: 1.1357167959213257,grad_norm: 0.9999993746359932, iteration: 306829
loss: 0.9832854270935059,grad_norm: 0.8552424703289803, iteration: 306830
loss: 0.9873267412185669,grad_norm: 0.9620869525012563, iteration: 306831
loss: 0.9934370517730713,grad_norm: 0.7928332859341894, iteration: 306832
loss: 0.9867838025093079,grad_norm: 0.9901129989577216, iteration: 306833
loss: 1.0880768299102783,grad_norm: 0.9999994391692759, iteration: 306834
loss: 1.0370019674301147,grad_norm: 0.9999990594196774, iteration: 306835
loss: 0.9903692007064819,grad_norm: 0.7708131565029133, iteration: 306836
loss: 1.043816328048706,grad_norm: 0.8442414871834824, iteration: 306837
loss: 1.016433835029602,grad_norm: 0.7559137428610982, iteration: 306838
loss: 1.0218424797058105,grad_norm: 0.896834166459506, iteration: 306839
loss: 0.9785698652267456,grad_norm: 0.7650411543305348, iteration: 306840
loss: 0.9832781553268433,grad_norm: 0.9665220179975039, iteration: 306841
loss: 1.023776650428772,grad_norm: 0.9563365551783815, iteration: 306842
loss: 0.9688925743103027,grad_norm: 0.8309508308773895, iteration: 306843
loss: 1.0138620138168335,grad_norm: 0.9031206597737761, iteration: 306844
loss: 1.0207562446594238,grad_norm: 0.849414212766814, iteration: 306845
loss: 1.0107501745224,grad_norm: 0.9250793191916542, iteration: 306846
loss: 1.008738398551941,grad_norm: 0.7836136782076759, iteration: 306847
loss: 0.972744882106781,grad_norm: 0.7972956132806703, iteration: 306848
loss: 0.9879310131072998,grad_norm: 0.8485528285765862, iteration: 306849
loss: 1.0408672094345093,grad_norm: 0.9543584941943081, iteration: 306850
loss: 0.9518163204193115,grad_norm: 0.810869475701334, iteration: 306851
loss: 1.0065373182296753,grad_norm: 0.9059011496451135, iteration: 306852
loss: 1.0130681991577148,grad_norm: 0.9999998862322962, iteration: 306853
loss: 0.9726911783218384,grad_norm: 0.8605672537561345, iteration: 306854
loss: 0.9891932606697083,grad_norm: 0.9078490284055997, iteration: 306855
loss: 1.0702123641967773,grad_norm: 0.9999999580000818, iteration: 306856
loss: 1.0015201568603516,grad_norm: 0.9999999565690225, iteration: 306857
loss: 0.9910149574279785,grad_norm: 0.9279632152729621, iteration: 306858
loss: 0.9435831904411316,grad_norm: 0.8432613527138357, iteration: 306859
loss: 1.0227009057998657,grad_norm: 0.7631055538973439, iteration: 306860
loss: 1.0473670959472656,grad_norm: 0.9999992403246764, iteration: 306861
loss: 1.2704930305480957,grad_norm: 0.9999996090361108, iteration: 306862
loss: 1.0012421607971191,grad_norm: 0.7061094150033965, iteration: 306863
loss: 1.0061393976211548,grad_norm: 0.9660426911181701, iteration: 306864
loss: 1.0616388320922852,grad_norm: 0.9999993671578385, iteration: 306865
loss: 1.01570725440979,grad_norm: 0.8405652345314419, iteration: 306866
loss: 1.012088656425476,grad_norm: 0.9999990906742596, iteration: 306867
loss: 0.9929320812225342,grad_norm: 0.8621025985671462, iteration: 306868
loss: 0.9802976250648499,grad_norm: 0.8351699532864102, iteration: 306869
loss: 1.0639944076538086,grad_norm: 0.9999999262512765, iteration: 306870
loss: 1.0361145734786987,grad_norm: 0.999999102762509, iteration: 306871
loss: 0.9931628704071045,grad_norm: 0.999999254880196, iteration: 306872
loss: 0.9928725361824036,grad_norm: 0.727035657203572, iteration: 306873
loss: 0.9539427757263184,grad_norm: 0.9660265526385668, iteration: 306874
loss: 1.0091122388839722,grad_norm: 0.8962004285644848, iteration: 306875
loss: 0.9577141404151917,grad_norm: 0.9999994740370548, iteration: 306876
loss: 0.9601031541824341,grad_norm: 0.9262620188960073, iteration: 306877
loss: 1.0198899507522583,grad_norm: 0.9618908198629694, iteration: 306878
loss: 1.0145695209503174,grad_norm: 0.7589566802567541, iteration: 306879
loss: 0.9811367392539978,grad_norm: 0.7352049711491879, iteration: 306880
loss: 0.9889719486236572,grad_norm: 0.8095145152552486, iteration: 306881
loss: 0.9938304424285889,grad_norm: 0.9999994713728027, iteration: 306882
loss: 1.0160696506500244,grad_norm: 0.7159659168273451, iteration: 306883
loss: 0.9844116568565369,grad_norm: 0.832335904735627, iteration: 306884
loss: 0.9897395968437195,grad_norm: 0.8421642170478646, iteration: 306885
loss: 0.981062650680542,grad_norm: 0.9999990380652607, iteration: 306886
loss: 1.0875340700149536,grad_norm: 0.909519670879886, iteration: 306887
loss: 1.0216927528381348,grad_norm: 0.8461301194113443, iteration: 306888
loss: 1.0231562852859497,grad_norm: 0.9215330705235627, iteration: 306889
loss: 1.0086398124694824,grad_norm: 0.9999994775588763, iteration: 306890
loss: 0.9797202944755554,grad_norm: 0.7975299757235349, iteration: 306891
loss: 0.9778817296028137,grad_norm: 0.9391113121402284, iteration: 306892
loss: 0.9783979058265686,grad_norm: 0.9999991648843173, iteration: 306893
loss: 1.0425560474395752,grad_norm: 0.8206252493011958, iteration: 306894
loss: 0.9807408452033997,grad_norm: 0.8799168838818662, iteration: 306895
loss: 1.0006247758865356,grad_norm: 0.8713395547615506, iteration: 306896
loss: 1.010525107383728,grad_norm: 0.9999993086268779, iteration: 306897
loss: 1.0076980590820312,grad_norm: 0.7792609189041797, iteration: 306898
loss: 1.013493299484253,grad_norm: 0.9296839813070382, iteration: 306899
loss: 1.0450109243392944,grad_norm: 0.9069007610077102, iteration: 306900
loss: 1.03760826587677,grad_norm: 1.0000000021085094, iteration: 306901
loss: 0.9512905478477478,grad_norm: 0.8227468536896491, iteration: 306902
loss: 1.0506242513656616,grad_norm: 0.9999993070699317, iteration: 306903
loss: 0.9956875443458557,grad_norm: 0.8192534602500144, iteration: 306904
loss: 1.0429638624191284,grad_norm: 0.7241410157367874, iteration: 306905
loss: 1.0245414972305298,grad_norm: 0.9999993020602619, iteration: 306906
loss: 1.0016216039657593,grad_norm: 0.8748363455983724, iteration: 306907
loss: 1.020302176475525,grad_norm: 0.8264144449551645, iteration: 306908
loss: 1.022142767906189,grad_norm: 0.7743221106182976, iteration: 306909
loss: 1.0004104375839233,grad_norm: 0.7005729573263063, iteration: 306910
loss: 1.0409092903137207,grad_norm: 0.986273123308914, iteration: 306911
loss: 0.9731286764144897,grad_norm: 0.8643791833361677, iteration: 306912
loss: 0.9464686512947083,grad_norm: 0.8871963981908364, iteration: 306913
loss: 0.9915682077407837,grad_norm: 0.7714603024456231, iteration: 306914
loss: 1.0097347497940063,grad_norm: 0.8942903255253911, iteration: 306915
loss: 1.0004502534866333,grad_norm: 0.7910766794690172, iteration: 306916
loss: 0.995086669921875,grad_norm: 0.92704753537454, iteration: 306917
loss: 1.2337696552276611,grad_norm: 0.9999998077987743, iteration: 306918
loss: 1.0478075742721558,grad_norm: 0.9873437256013243, iteration: 306919
loss: 0.9483115673065186,grad_norm: 0.674139384507473, iteration: 306920
loss: 0.9788744449615479,grad_norm: 0.6681851406681835, iteration: 306921
loss: 1.0632424354553223,grad_norm: 0.9999994586709873, iteration: 306922
loss: 0.9903912544250488,grad_norm: 0.9999990555088119, iteration: 306923
loss: 1.0086239576339722,grad_norm: 0.883969493586656, iteration: 306924
loss: 1.1635583639144897,grad_norm: 0.9999990418987225, iteration: 306925
loss: 1.0165144205093384,grad_norm: 0.7051338641190824, iteration: 306926
loss: 0.9956618547439575,grad_norm: 0.8655269400399075, iteration: 306927
loss: 1.0998870134353638,grad_norm: 0.9999992038637292, iteration: 306928
loss: 1.020664930343628,grad_norm: 0.9999998191261316, iteration: 306929
loss: 0.9774273037910461,grad_norm: 0.7773913320884431, iteration: 306930
loss: 1.0182636976242065,grad_norm: 0.9999991517257384, iteration: 306931
loss: 1.012349009513855,grad_norm: 0.9299258699178334, iteration: 306932
loss: 1.0241795778274536,grad_norm: 0.7691471252006428, iteration: 306933
loss: 0.9889084100723267,grad_norm: 0.9479044711667576, iteration: 306934
loss: 0.9508039355278015,grad_norm: 0.9275558436814455, iteration: 306935
loss: 0.9779933094978333,grad_norm: 0.8226053594034896, iteration: 306936
loss: 0.9895854592323303,grad_norm: 0.8427982693597701, iteration: 306937
loss: 1.076484203338623,grad_norm: 0.999999993183682, iteration: 306938
loss: 1.0027942657470703,grad_norm: 0.9999992878713456, iteration: 306939
loss: 0.9883801937103271,grad_norm: 0.7699568702440103, iteration: 306940
loss: 1.01411771774292,grad_norm: 0.9959597021143961, iteration: 306941
loss: 0.9691888689994812,grad_norm: 0.9999990744300091, iteration: 306942
loss: 0.9996280670166016,grad_norm: 0.765016573408377, iteration: 306943
loss: 1.0031638145446777,grad_norm: 0.829961358631503, iteration: 306944
loss: 0.9882363080978394,grad_norm: 0.8295986549792785, iteration: 306945
loss: 0.9962219595909119,grad_norm: 0.9999990831134286, iteration: 306946
loss: 1.053634762763977,grad_norm: 0.9821400102450369, iteration: 306947
loss: 0.9785005450248718,grad_norm: 0.8073913372606845, iteration: 306948
loss: 1.0159587860107422,grad_norm: 0.7915364617009962, iteration: 306949
loss: 1.0067166090011597,grad_norm: 0.7478688577945559, iteration: 306950
loss: 1.028988003730774,grad_norm: 0.999999990819117, iteration: 306951
loss: 0.995783805847168,grad_norm: 0.8344790749457144, iteration: 306952
loss: 0.9729720950126648,grad_norm: 0.9016269588724111, iteration: 306953
loss: 1.0066733360290527,grad_norm: 0.9999994524448673, iteration: 306954
loss: 1.002958059310913,grad_norm: 0.7668409461063388, iteration: 306955
loss: 0.9901463985443115,grad_norm: 0.8225049487448758, iteration: 306956
loss: 0.9996011853218079,grad_norm: 0.8953480728045303, iteration: 306957
loss: 0.9858551621437073,grad_norm: 0.8466498425772107, iteration: 306958
loss: 1.0038983821868896,grad_norm: 0.8602605621135062, iteration: 306959
loss: 0.9784700274467468,grad_norm: 0.8003614400379396, iteration: 306960
loss: 0.9802484512329102,grad_norm: 0.8967931166223428, iteration: 306961
loss: 1.0238397121429443,grad_norm: 0.9999999928890907, iteration: 306962
loss: 0.9930115342140198,grad_norm: 0.8077280012019304, iteration: 306963
loss: 0.9958634376525879,grad_norm: 0.9999990497346108, iteration: 306964
loss: 1.0092716217041016,grad_norm: 0.9999991392574998, iteration: 306965
loss: 0.9768980145454407,grad_norm: 0.6923061424582414, iteration: 306966
loss: 0.9835401177406311,grad_norm: 0.7987173491597819, iteration: 306967
loss: 0.962973952293396,grad_norm: 0.8860986204065896, iteration: 306968
loss: 1.0411875247955322,grad_norm: 0.9582806805348763, iteration: 306969
loss: 0.9962382316589355,grad_norm: 0.9999991338337114, iteration: 306970
loss: 0.9703677892684937,grad_norm: 0.820169992741141, iteration: 306971
loss: 1.070601224899292,grad_norm: 0.9309628208902115, iteration: 306972
loss: 0.9561824798583984,grad_norm: 0.7808781532174256, iteration: 306973
loss: 0.971568763256073,grad_norm: 0.8235251436897919, iteration: 306974
loss: 0.9840416312217712,grad_norm: 0.8551072600732157, iteration: 306975
loss: 1.101996898651123,grad_norm: 0.9999991567260773, iteration: 306976
loss: 0.9716957807540894,grad_norm: 0.8154748097135198, iteration: 306977
loss: 1.1162865161895752,grad_norm: 0.9999998664700477, iteration: 306978
loss: 0.9611005187034607,grad_norm: 0.8612213900716164, iteration: 306979
loss: 1.022222638130188,grad_norm: 0.9999991546500961, iteration: 306980
loss: 1.0003950595855713,grad_norm: 0.8092357812108705, iteration: 306981
loss: 1.017982006072998,grad_norm: 0.8238947162226282, iteration: 306982
loss: 1.017481803894043,grad_norm: 0.7507894563231252, iteration: 306983
loss: 0.9689027667045593,grad_norm: 0.7718945573659649, iteration: 306984
loss: 1.0890703201293945,grad_norm: 0.9132997823387464, iteration: 306985
loss: 0.961194634437561,grad_norm: 0.9193462527076238, iteration: 306986
loss: 1.0026686191558838,grad_norm: 0.7892584091414844, iteration: 306987
loss: 1.033097743988037,grad_norm: 0.9999993043179697, iteration: 306988
loss: 1.0169014930725098,grad_norm: 0.7136997224268349, iteration: 306989
loss: 1.1155321598052979,grad_norm: 0.9999993280688219, iteration: 306990
loss: 1.0065441131591797,grad_norm: 0.9056369168232526, iteration: 306991
loss: 0.9744387865066528,grad_norm: 0.8230229513049786, iteration: 306992
loss: 1.0081785917282104,grad_norm: 0.6818354884725457, iteration: 306993
loss: 1.0104683637619019,grad_norm: 0.8705173447118517, iteration: 306994
loss: 0.9776549935340881,grad_norm: 0.733509724432557, iteration: 306995
loss: 1.0288326740264893,grad_norm: 0.9999996706510783, iteration: 306996
loss: 1.0992885828018188,grad_norm: 0.999999222527072, iteration: 306997
loss: 0.9836859703063965,grad_norm: 0.7959511815409996, iteration: 306998
loss: 0.9520962238311768,grad_norm: 0.8435655030103039, iteration: 306999
loss: 0.9976993203163147,grad_norm: 0.782903941762792, iteration: 307000
loss: 1.060905933380127,grad_norm: 0.9999998654240685, iteration: 307001
loss: 0.9580545425415039,grad_norm: 0.8668189942394492, iteration: 307002
loss: 1.0217158794403076,grad_norm: 0.9999991704054328, iteration: 307003
loss: 1.0316771268844604,grad_norm: 0.9036902499950836, iteration: 307004
loss: 1.0210987329483032,grad_norm: 0.7784219223940123, iteration: 307005
loss: 1.006339430809021,grad_norm: 0.8496658806340733, iteration: 307006
loss: 0.9976330399513245,grad_norm: 0.7262590158532163, iteration: 307007
loss: 1.0125960111618042,grad_norm: 0.8925103837794606, iteration: 307008
loss: 1.057208776473999,grad_norm: 0.8915928848148204, iteration: 307009
loss: 1.0223561525344849,grad_norm: 0.7725090526051147, iteration: 307010
loss: 0.9946569204330444,grad_norm: 0.9999991185330953, iteration: 307011
loss: 1.0061302185058594,grad_norm: 0.776841090604457, iteration: 307012
loss: 1.0527324676513672,grad_norm: 0.867643064852955, iteration: 307013
loss: 0.9614599943161011,grad_norm: 0.9415252147623056, iteration: 307014
loss: 0.9831333160400391,grad_norm: 0.923727799198802, iteration: 307015
loss: 0.9703633785247803,grad_norm: 0.9654506411630297, iteration: 307016
loss: 0.9812678098678589,grad_norm: 0.9890518660776466, iteration: 307017
loss: 1.01679527759552,grad_norm: 0.9999999447814616, iteration: 307018
loss: 0.9471134543418884,grad_norm: 0.8780888734505125, iteration: 307019
loss: 1.002278208732605,grad_norm: 0.7723703720109844, iteration: 307020
loss: 1.0065172910690308,grad_norm: 0.8004789628774928, iteration: 307021
loss: 0.9969371557235718,grad_norm: 0.8854298777559103, iteration: 307022
loss: 1.0110266208648682,grad_norm: 0.8319496565660729, iteration: 307023
loss: 1.04790461063385,grad_norm: 0.999999786936727, iteration: 307024
loss: 1.0218509435653687,grad_norm: 0.840867414541697, iteration: 307025
loss: 0.9744724631309509,grad_norm: 0.7073895516129245, iteration: 307026
loss: 1.0137858390808105,grad_norm: 0.9999999919654379, iteration: 307027
loss: 1.0976030826568604,grad_norm: 0.9999998675223891, iteration: 307028
loss: 1.0275706052780151,grad_norm: 0.9999991478167128, iteration: 307029
loss: 1.027083158493042,grad_norm: 0.9564811525055176, iteration: 307030
loss: 1.0096532106399536,grad_norm: 0.8817959868044741, iteration: 307031
loss: 0.9820934534072876,grad_norm: 0.8718142575229354, iteration: 307032
loss: 1.025700330734253,grad_norm: 0.9999991261679503, iteration: 307033
loss: 1.0323841571807861,grad_norm: 0.9999990392163822, iteration: 307034
loss: 1.023511528968811,grad_norm: 0.9999992347298453, iteration: 307035
loss: 0.9925647974014282,grad_norm: 0.7921576879635963, iteration: 307036
loss: 1.0046412944793701,grad_norm: 0.8998009515323931, iteration: 307037
loss: 1.0057251453399658,grad_norm: 0.9794399037565064, iteration: 307038
loss: 1.0162800550460815,grad_norm: 0.6950869050400338, iteration: 307039
loss: 1.070970892906189,grad_norm: 0.9112951491748635, iteration: 307040
loss: 1.0252344608306885,grad_norm: 0.8219123973290268, iteration: 307041
loss: 1.0316680669784546,grad_norm: 0.9541158073466189, iteration: 307042
loss: 1.0082889795303345,grad_norm: 0.8422466484112869, iteration: 307043
loss: 1.12619149684906,grad_norm: 0.999999882241747, iteration: 307044
loss: 1.019769549369812,grad_norm: 0.9732252888265688, iteration: 307045
loss: 1.0466078519821167,grad_norm: 0.999999271313922, iteration: 307046
loss: 1.0404287576675415,grad_norm: 0.7999409679945373, iteration: 307047
loss: 1.0256338119506836,grad_norm: 0.9999996864878391, iteration: 307048
loss: 1.0023369789123535,grad_norm: 1.0000000086574972, iteration: 307049
loss: 1.0201302766799927,grad_norm: 0.830185188684612, iteration: 307050
loss: 0.9762380123138428,grad_norm: 0.8043494398411057, iteration: 307051
loss: 0.9807513356208801,grad_norm: 0.9999994264958922, iteration: 307052
loss: 1.0009669065475464,grad_norm: 0.7581217840625236, iteration: 307053
loss: 0.9730669260025024,grad_norm: 0.9999990737939091, iteration: 307054
loss: 1.0138322114944458,grad_norm: 0.9999992034698376, iteration: 307055
loss: 0.9961416125297546,grad_norm: 0.6894357947261822, iteration: 307056
loss: 0.9934303164482117,grad_norm: 0.9788580224087838, iteration: 307057
loss: 0.9836004972457886,grad_norm: 0.750143964227826, iteration: 307058
loss: 1.056886076927185,grad_norm: 0.915445896250991, iteration: 307059
loss: 1.0004475116729736,grad_norm: 0.7373983972471492, iteration: 307060
loss: 1.039162516593933,grad_norm: 0.9142756274868057, iteration: 307061
loss: 1.0414679050445557,grad_norm: 0.7795374249588307, iteration: 307062
loss: 0.9874622821807861,grad_norm: 0.9051595018889623, iteration: 307063
loss: 1.0454726219177246,grad_norm: 0.9999990559212887, iteration: 307064
loss: 0.9969260096549988,grad_norm: 0.9058218270853625, iteration: 307065
loss: 0.9685239195823669,grad_norm: 0.8989226174398575, iteration: 307066
loss: 1.0405837297439575,grad_norm: 0.9999996640885187, iteration: 307067
loss: 1.0037412643432617,grad_norm: 0.6820479651303417, iteration: 307068
loss: 0.9859486818313599,grad_norm: 0.8778396736057725, iteration: 307069
loss: 0.9956875443458557,grad_norm: 0.8958117147788079, iteration: 307070
loss: 1.1266202926635742,grad_norm: 0.9999996794316273, iteration: 307071
loss: 0.9902299642562866,grad_norm: 0.8628334347804022, iteration: 307072
loss: 1.0636906623840332,grad_norm: 0.99999982124229, iteration: 307073
loss: 0.9570837616920471,grad_norm: 0.9999991223858009, iteration: 307074
loss: 1.0150020122528076,grad_norm: 0.7079159143269439, iteration: 307075
loss: 1.061511754989624,grad_norm: 0.8300889706362096, iteration: 307076
loss: 0.9911455512046814,grad_norm: 0.9999989845991173, iteration: 307077
loss: 1.0138068199157715,grad_norm: 0.9999992128161711, iteration: 307078
loss: 0.9802361130714417,grad_norm: 0.8675546442945002, iteration: 307079
loss: 1.0313701629638672,grad_norm: 0.8758389270008367, iteration: 307080
loss: 1.0642451047897339,grad_norm: 0.966081519917605, iteration: 307081
loss: 1.1979695558547974,grad_norm: 0.999999405102801, iteration: 307082
loss: 1.1508746147155762,grad_norm: 0.9999991191792391, iteration: 307083
loss: 1.1292080879211426,grad_norm: 0.9999991251252716, iteration: 307084
loss: 0.9850543141365051,grad_norm: 0.8362157515098351, iteration: 307085
loss: 1.015281081199646,grad_norm: 0.9999990596292986, iteration: 307086
loss: 1.1057207584381104,grad_norm: 0.8529251049357564, iteration: 307087
loss: 0.9987831711769104,grad_norm: 0.8423771853710552, iteration: 307088
loss: 0.9851614832878113,grad_norm: 0.8444412794949284, iteration: 307089
loss: 0.9733437895774841,grad_norm: 0.931231747504335, iteration: 307090
loss: 1.0261058807373047,grad_norm: 0.9999996484808129, iteration: 307091
loss: 1.055037260055542,grad_norm: 0.999999212778885, iteration: 307092
loss: 1.0390733480453491,grad_norm: 0.9406299509774928, iteration: 307093
loss: 1.098217248916626,grad_norm: 0.9999993807297008, iteration: 307094
loss: 1.0053157806396484,grad_norm: 0.8437154928859418, iteration: 307095
loss: 0.943956732749939,grad_norm: 0.8705151531382743, iteration: 307096
loss: 0.9776966571807861,grad_norm: 0.7402931831732043, iteration: 307097
loss: 0.9618614912033081,grad_norm: 0.8532410628182518, iteration: 307098
loss: 1.0960814952850342,grad_norm: 0.9999990569516738, iteration: 307099
loss: 1.0480982065200806,grad_norm: 0.8789526260883997, iteration: 307100
loss: 0.9824761748313904,grad_norm: 0.8373471444546677, iteration: 307101
loss: 1.0272870063781738,grad_norm: 0.9367101707855814, iteration: 307102
loss: 1.0230060815811157,grad_norm: 0.7987125783523669, iteration: 307103
loss: 1.025291919708252,grad_norm: 0.9165442240457063, iteration: 307104
loss: 1.0154895782470703,grad_norm: 0.9328738183485186, iteration: 307105
loss: 1.0139402151107788,grad_norm: 0.9999996803930395, iteration: 307106
loss: 1.0885655879974365,grad_norm: 0.9999999238272507, iteration: 307107
loss: 1.032012939453125,grad_norm: 0.8283411702458086, iteration: 307108
loss: 0.9996795654296875,grad_norm: 0.7514246002644341, iteration: 307109
loss: 0.9907646775245667,grad_norm: 0.8333038016746125, iteration: 307110
loss: 0.9796298742294312,grad_norm: 0.9999993323350544, iteration: 307111
loss: 1.0139739513397217,grad_norm: 0.9999994899779762, iteration: 307112
loss: 1.002132773399353,grad_norm: 0.6798876313712994, iteration: 307113
loss: 0.9718017578125,grad_norm: 0.8105834372897119, iteration: 307114
loss: 0.970394492149353,grad_norm: 0.9380980288810596, iteration: 307115
loss: 0.9943552017211914,grad_norm: 0.9568676256919285, iteration: 307116
loss: 1.0361038446426392,grad_norm: 1.0000000194285361, iteration: 307117
loss: 0.9933645725250244,grad_norm: 0.9999990369234494, iteration: 307118
loss: 1.0459774732589722,grad_norm: 0.9999995782678298, iteration: 307119
loss: 0.9842235445976257,grad_norm: 0.7844939818499668, iteration: 307120
loss: 1.059970498085022,grad_norm: 0.869893863075658, iteration: 307121
loss: 1.1866439580917358,grad_norm: 0.9999999549765907, iteration: 307122
loss: 1.0286437273025513,grad_norm: 0.8065943770524449, iteration: 307123
loss: 1.0232141017913818,grad_norm: 0.9999990524520005, iteration: 307124
loss: 1.1683690547943115,grad_norm: 0.9999997128296786, iteration: 307125
loss: 0.9432782530784607,grad_norm: 0.9999989759746098, iteration: 307126
loss: 0.9835997223854065,grad_norm: 0.9406526216891969, iteration: 307127
loss: 1.0200319290161133,grad_norm: 0.9304846332682849, iteration: 307128
loss: 1.0487396717071533,grad_norm: 0.9743700696285361, iteration: 307129
loss: 1.082230806350708,grad_norm: 0.9999999330334655, iteration: 307130
loss: 0.9943814873695374,grad_norm: 0.6890714820762136, iteration: 307131
loss: 0.9744768142700195,grad_norm: 0.7640513709765092, iteration: 307132
loss: 1.0042461156845093,grad_norm: 0.8631041936897067, iteration: 307133
loss: 1.102908730506897,grad_norm: 0.9999993240589383, iteration: 307134
loss: 0.9975422024726868,grad_norm: 0.9999991384785792, iteration: 307135
loss: 1.0343601703643799,grad_norm: 0.9999990754454858, iteration: 307136
loss: 0.9572115540504456,grad_norm: 0.9523301238087248, iteration: 307137
loss: 1.0234471559524536,grad_norm: 0.7347672050160456, iteration: 307138
loss: 0.9542371034622192,grad_norm: 0.8850241650568853, iteration: 307139
loss: 0.9953398108482361,grad_norm: 0.8640339614668097, iteration: 307140
loss: 0.9687548279762268,grad_norm: 0.974654478613305, iteration: 307141
loss: 0.9882168173789978,grad_norm: 0.8564256148624354, iteration: 307142
loss: 0.9733988046646118,grad_norm: 0.8410698185632512, iteration: 307143
loss: 0.9927244782447815,grad_norm: 0.9272088634314938, iteration: 307144
loss: 0.9807195067405701,grad_norm: 0.7694170525074241, iteration: 307145
loss: 0.9873040914535522,grad_norm: 0.7956522087027988, iteration: 307146
loss: 0.9877842664718628,grad_norm: 0.7938147018339317, iteration: 307147
loss: 1.0159658193588257,grad_norm: 0.8674402610190929, iteration: 307148
loss: 0.9795838594436646,grad_norm: 0.9885953839044613, iteration: 307149
loss: 1.0047125816345215,grad_norm: 0.9999992558801932, iteration: 307150
loss: 1.0941095352172852,grad_norm: 0.9999992264525039, iteration: 307151
loss: 0.9949822425842285,grad_norm: 0.8360547269620066, iteration: 307152
loss: 1.0545703172683716,grad_norm: 0.7385849170858093, iteration: 307153
loss: 0.9816792011260986,grad_norm: 0.9999992318687555, iteration: 307154
loss: 0.9604806303977966,grad_norm: 0.7886643050495026, iteration: 307155
loss: 1.0245827436447144,grad_norm: 0.9999991920704826, iteration: 307156
loss: 0.9756758213043213,grad_norm: 0.8392158929675407, iteration: 307157
loss: 1.0365140438079834,grad_norm: 0.9084373975487718, iteration: 307158
loss: 0.9945598244667053,grad_norm: 0.7860040017168748, iteration: 307159
loss: 0.9960422515869141,grad_norm: 0.9999991874362508, iteration: 307160
loss: 1.027559757232666,grad_norm: 0.7779409470147732, iteration: 307161
loss: 1.0403276681900024,grad_norm: 0.923140276052094, iteration: 307162
loss: 1.0127619504928589,grad_norm: 0.972865978247876, iteration: 307163
loss: 1.0150564908981323,grad_norm: 0.9999993354943267, iteration: 307164
loss: 0.9980924725532532,grad_norm: 0.809165959463638, iteration: 307165
loss: 1.0061959028244019,grad_norm: 0.7711182209726622, iteration: 307166
loss: 0.9840415120124817,grad_norm: 0.9999992120749552, iteration: 307167
loss: 1.0177943706512451,grad_norm: 0.9960173654378421, iteration: 307168
loss: 0.9757170677185059,grad_norm: 0.8382142160466161, iteration: 307169
loss: 1.003577709197998,grad_norm: 0.9565070683076025, iteration: 307170
loss: 1.0342998504638672,grad_norm: 0.9999991381462616, iteration: 307171
loss: 1.0038384199142456,grad_norm: 0.999999903386749, iteration: 307172
loss: 0.9788445830345154,grad_norm: 0.7725485826248157, iteration: 307173
loss: 1.0004432201385498,grad_norm: 0.8734877026025866, iteration: 307174
loss: 1.2075822353363037,grad_norm: 0.9999990989043628, iteration: 307175
loss: 0.9854382872581482,grad_norm: 0.8459461037196964, iteration: 307176
loss: 1.0552256107330322,grad_norm: 0.9896035444679695, iteration: 307177
loss: 1.053087830543518,grad_norm: 0.9356214983142587, iteration: 307178
loss: 1.064958095550537,grad_norm: 0.9999993606781831, iteration: 307179
loss: 1.0016812086105347,grad_norm: 0.9999990405063544, iteration: 307180
loss: 1.0128670930862427,grad_norm: 0.7303271382035302, iteration: 307181
loss: 1.032659888267517,grad_norm: 0.9546827975387889, iteration: 307182
loss: 1.0629090070724487,grad_norm: 0.999998937326433, iteration: 307183
loss: 1.0504772663116455,grad_norm: 0.9386293475109995, iteration: 307184
loss: 1.0274204015731812,grad_norm: 0.9999994945337359, iteration: 307185
loss: 1.0070991516113281,grad_norm: 0.9999990985113407, iteration: 307186
loss: 1.0307011604309082,grad_norm: 0.9999990476910594, iteration: 307187
loss: 0.9931958913803101,grad_norm: 0.9319426468668983, iteration: 307188
loss: 1.025359034538269,grad_norm: 0.7579308466484341, iteration: 307189
loss: 1.0356810092926025,grad_norm: 0.8982377212437812, iteration: 307190
loss: 1.0159962177276611,grad_norm: 0.8075586318356455, iteration: 307191
loss: 0.9980706572532654,grad_norm: 0.999999239487759, iteration: 307192
loss: 1.0347942113876343,grad_norm: 0.7920978576932884, iteration: 307193
loss: 0.9546303749084473,grad_norm: 0.8651096384939366, iteration: 307194
loss: 1.0113556385040283,grad_norm: 0.8192978544955412, iteration: 307195
loss: 1.0251917839050293,grad_norm: 0.9112085626616028, iteration: 307196
loss: 1.0400201082229614,grad_norm: 0.999999211611382, iteration: 307197
loss: 1.0778290033340454,grad_norm: 0.9370247190430425, iteration: 307198
loss: 1.042099118232727,grad_norm: 0.9522948530597212, iteration: 307199
loss: 1.0256390571594238,grad_norm: 0.9999991582828861, iteration: 307200
loss: 1.0565571784973145,grad_norm: 0.8785700081246779, iteration: 307201
loss: 1.0014145374298096,grad_norm: 0.9999990950533962, iteration: 307202
loss: 0.9624735116958618,grad_norm: 0.9200369058082497, iteration: 307203
loss: 1.0000343322753906,grad_norm: 0.9271858869429451, iteration: 307204
loss: 1.018471360206604,grad_norm: 0.988722892639916, iteration: 307205
loss: 0.9872609376907349,grad_norm: 0.9999991280898717, iteration: 307206
loss: 1.015987753868103,grad_norm: 0.9168811112848887, iteration: 307207
loss: 0.9876472353935242,grad_norm: 0.9999990090860463, iteration: 307208
loss: 1.0202696323394775,grad_norm: 0.8976569233910284, iteration: 307209
loss: 1.0360006093978882,grad_norm: 0.9999997741979777, iteration: 307210
loss: 0.9977092742919922,grad_norm: 0.9876257836144433, iteration: 307211
loss: 0.962777316570282,grad_norm: 0.9999990333397003, iteration: 307212
loss: 0.9522567987442017,grad_norm: 0.8263575347345712, iteration: 307213
loss: 1.0172953605651855,grad_norm: 0.971347530945798, iteration: 307214
loss: 1.006889820098877,grad_norm: 0.8845529359710551, iteration: 307215
loss: 0.9826284646987915,grad_norm: 0.8640214504361776, iteration: 307216
loss: 1.0575937032699585,grad_norm: 0.9999991628034164, iteration: 307217
loss: 0.9407781362533569,grad_norm: 0.8148396811427033, iteration: 307218
loss: 1.1032750606536865,grad_norm: 0.9999996378321752, iteration: 307219
loss: 1.0061018466949463,grad_norm: 0.730500969194349, iteration: 307220
loss: 0.9773817658424377,grad_norm: 0.7783730084007225, iteration: 307221
loss: 0.9999133944511414,grad_norm: 0.8187619646327635, iteration: 307222
loss: 1.0121532678604126,grad_norm: 0.9999996685161802, iteration: 307223
loss: 1.0304780006408691,grad_norm: 0.9999993263643782, iteration: 307224
loss: 1.088620662689209,grad_norm: 0.9922720972970988, iteration: 307225
loss: 1.0624758005142212,grad_norm: 0.8942825082948844, iteration: 307226
loss: 0.9632636308670044,grad_norm: 0.8359214315172462, iteration: 307227
loss: 1.0126798152923584,grad_norm: 0.9999992632460003, iteration: 307228
loss: 1.2299730777740479,grad_norm: 0.9999991879739214, iteration: 307229
loss: 1.0065789222717285,grad_norm: 0.7239053152791702, iteration: 307230
loss: 0.9628569483757019,grad_norm: 0.7534937689870473, iteration: 307231
loss: 0.9877443909645081,grad_norm: 0.7700507049397748, iteration: 307232
loss: 1.0210816860198975,grad_norm: 0.8821798613074084, iteration: 307233
loss: 1.0379599332809448,grad_norm: 0.9999999777965584, iteration: 307234
loss: 1.0113295316696167,grad_norm: 0.6441110694423405, iteration: 307235
loss: 1.0107765197753906,grad_norm: 0.8316702664192148, iteration: 307236
loss: 1.0224735736846924,grad_norm: 0.877040164049469, iteration: 307237
loss: 1.085938572883606,grad_norm: 0.9999995607908231, iteration: 307238
loss: 0.9569602608680725,grad_norm: 0.7640341518455956, iteration: 307239
loss: 1.016937255859375,grad_norm: 0.9091667464263861, iteration: 307240
loss: 0.9969910979270935,grad_norm: 0.9999990178052391, iteration: 307241
loss: 1.0768603086471558,grad_norm: 0.8799508902622083, iteration: 307242
loss: 0.9792091846466064,grad_norm: 0.8455768794102235, iteration: 307243
loss: 1.0455623865127563,grad_norm: 0.7682259928050781, iteration: 307244
loss: 0.9709071516990662,grad_norm: 0.866292148827439, iteration: 307245
loss: 0.9778412580490112,grad_norm: 0.8558554407905472, iteration: 307246
loss: 0.9711490273475647,grad_norm: 0.713706626475164, iteration: 307247
loss: 1.019051194190979,grad_norm: 0.7764270114788073, iteration: 307248
loss: 1.0181366205215454,grad_norm: 0.9140843839025096, iteration: 307249
loss: 0.9782922267913818,grad_norm: 0.6678382024710284, iteration: 307250
loss: 1.0089153051376343,grad_norm: 0.925851813791885, iteration: 307251
loss: 1.0268443822860718,grad_norm: 0.9999990233312128, iteration: 307252
loss: 0.9808810949325562,grad_norm: 0.7237501673475341, iteration: 307253
loss: 0.9923165440559387,grad_norm: 0.9397024307136413, iteration: 307254
loss: 0.9896401166915894,grad_norm: 0.8153867659620619, iteration: 307255
loss: 1.0176198482513428,grad_norm: 0.8299576311419055, iteration: 307256
loss: 1.0320534706115723,grad_norm: 0.9999994711902037, iteration: 307257
loss: 0.9786372184753418,grad_norm: 0.9083913112020324, iteration: 307258
loss: 1.0343637466430664,grad_norm: 0.9747037762778075, iteration: 307259
loss: 0.9853367805480957,grad_norm: 0.9486595904802826, iteration: 307260
loss: 0.9633733034133911,grad_norm: 0.8566883789429113, iteration: 307261
loss: 0.969008207321167,grad_norm: 0.8206169633965714, iteration: 307262
loss: 0.9978898763656616,grad_norm: 0.9434396253627461, iteration: 307263
loss: 0.9949765205383301,grad_norm: 0.9432544204397773, iteration: 307264
loss: 1.005971908569336,grad_norm: 0.8506693733407732, iteration: 307265
loss: 0.9850518107414246,grad_norm: 0.7471432966497578, iteration: 307266
loss: 1.0209506750106812,grad_norm: 0.9999990720235598, iteration: 307267
loss: 1.0893070697784424,grad_norm: 0.799031908921208, iteration: 307268
loss: 1.0743155479431152,grad_norm: 0.9999995993347838, iteration: 307269
loss: 1.0297040939331055,grad_norm: 0.8289766582039603, iteration: 307270
loss: 1.0144904851913452,grad_norm: 0.9250956728060143, iteration: 307271
loss: 0.9663578867912292,grad_norm: 0.9219660586923448, iteration: 307272
loss: 0.9871033430099487,grad_norm: 0.999999131604718, iteration: 307273
loss: 1.0150041580200195,grad_norm: 0.9780962851502351, iteration: 307274
loss: 0.9745627045631409,grad_norm: 0.8690095960784058, iteration: 307275
loss: 1.023667573928833,grad_norm: 0.822785849730842, iteration: 307276
loss: 1.006225347518921,grad_norm: 0.8890161845414123, iteration: 307277
loss: 0.9999300837516785,grad_norm: 0.7941660178451497, iteration: 307278
loss: 0.9981450438499451,grad_norm: 0.8339323506653322, iteration: 307279
loss: 0.9851934909820557,grad_norm: 0.8483031383130067, iteration: 307280
loss: 0.9527873992919922,grad_norm: 0.9490004455387971, iteration: 307281
loss: 1.072152853012085,grad_norm: 0.9999990870146004, iteration: 307282
loss: 1.0157051086425781,grad_norm: 0.9147193704941796, iteration: 307283
loss: 1.0353742837905884,grad_norm: 0.9318758415355749, iteration: 307284
loss: 0.9944309592247009,grad_norm: 0.9143755801159912, iteration: 307285
loss: 1.0315123796463013,grad_norm: 0.8021279866399881, iteration: 307286
loss: 1.0153841972351074,grad_norm: 0.8334776251446786, iteration: 307287
loss: 0.9999958276748657,grad_norm: 0.8212670313555807, iteration: 307288
loss: 1.0087919235229492,grad_norm: 0.8025184491969279, iteration: 307289
loss: 0.9828971028327942,grad_norm: 0.8798936764176777, iteration: 307290
loss: 1.016836166381836,grad_norm: 0.723355102142579, iteration: 307291
loss: 0.9808862805366516,grad_norm: 0.7734060181702344, iteration: 307292
loss: 1.0190492868423462,grad_norm: 0.9193302461119007, iteration: 307293
loss: 1.0689767599105835,grad_norm: 0.9999994364391164, iteration: 307294
loss: 1.0113186836242676,grad_norm: 0.9999994278331601, iteration: 307295
loss: 1.0073105096817017,grad_norm: 0.9999990833518998, iteration: 307296
loss: 0.9942140579223633,grad_norm: 0.9656669365119689, iteration: 307297
loss: 1.035768747329712,grad_norm: 0.9224668199987535, iteration: 307298
loss: 1.0181077718734741,grad_norm: 0.9083716311599801, iteration: 307299
loss: 0.990917444229126,grad_norm: 0.9999990147092118, iteration: 307300
loss: 0.9974865317344666,grad_norm: 0.698276064008338, iteration: 307301
loss: 0.9965193271636963,grad_norm: 0.999999663417466, iteration: 307302
loss: 0.97737717628479,grad_norm: 0.9999995245096892, iteration: 307303
loss: 0.9927471876144409,grad_norm: 0.7144961237502228, iteration: 307304
loss: 0.9972058534622192,grad_norm: 0.8621377161550327, iteration: 307305
loss: 0.9676140546798706,grad_norm: 0.997654378056962, iteration: 307306
loss: 1.0121687650680542,grad_norm: 0.9999990410844303, iteration: 307307
loss: 1.012672781944275,grad_norm: 0.7673999362335743, iteration: 307308
loss: 0.994059145450592,grad_norm: 0.837875375411542, iteration: 307309
loss: 0.9920494556427002,grad_norm: 0.824223154564704, iteration: 307310
loss: 1.0925843715667725,grad_norm: 0.8602049214092703, iteration: 307311
loss: 0.9890042543411255,grad_norm: 0.8500023071513513, iteration: 307312
loss: 1.0001457929611206,grad_norm: 0.9999998094439664, iteration: 307313
loss: 1.0636982917785645,grad_norm: 0.9999989822850992, iteration: 307314
loss: 0.9755781292915344,grad_norm: 0.9370629413381518, iteration: 307315
loss: 1.0784518718719482,grad_norm: 0.9999992456255539, iteration: 307316
loss: 1.0291982889175415,grad_norm: 0.7415956529005595, iteration: 307317
loss: 1.0046497583389282,grad_norm: 0.832632456343605, iteration: 307318
loss: 1.0018260478973389,grad_norm: 0.7945754105797195, iteration: 307319
loss: 1.02536940574646,grad_norm: 0.9393482144532389, iteration: 307320
loss: 1.0111185312271118,grad_norm: 0.9999988557264453, iteration: 307321
loss: 0.9871383905410767,grad_norm: 0.9999993780729689, iteration: 307322
loss: 0.9941331148147583,grad_norm: 0.99999916726121, iteration: 307323
loss: 1.007155418395996,grad_norm: 0.9850790129039086, iteration: 307324
loss: 1.1893566846847534,grad_norm: 0.9999996858057197, iteration: 307325
loss: 0.9720600843429565,grad_norm: 0.8917017605590641, iteration: 307326
loss: 1.0298017263412476,grad_norm: 0.8670011500000241, iteration: 307327
loss: 1.0092177391052246,grad_norm: 0.8882094362022496, iteration: 307328
loss: 1.011918544769287,grad_norm: 0.9999991296434234, iteration: 307329
loss: 0.9887495636940002,grad_norm: 0.8765585015333649, iteration: 307330
loss: 0.9943826198577881,grad_norm: 0.837352820960551, iteration: 307331
loss: 1.0099390745162964,grad_norm: 0.7942803629063842, iteration: 307332
loss: 1.0850573778152466,grad_norm: 0.9999995187125573, iteration: 307333
loss: 1.025813341140747,grad_norm: 0.9999999407721036, iteration: 307334
loss: 1.0089993476867676,grad_norm: 0.9759623504388971, iteration: 307335
loss: 1.0013209581375122,grad_norm: 0.9733248549745876, iteration: 307336
loss: 0.9972354769706726,grad_norm: 0.7585228657452677, iteration: 307337
loss: 0.9743935465812683,grad_norm: 0.9278829821274336, iteration: 307338
loss: 1.0291844606399536,grad_norm: 0.7682312212909446, iteration: 307339
loss: 1.0049043893814087,grad_norm: 0.8900128348568883, iteration: 307340
loss: 1.030770182609558,grad_norm: 0.9999991926725302, iteration: 307341
loss: 0.9921871423721313,grad_norm: 0.7740811922768357, iteration: 307342
loss: 0.9792881608009338,grad_norm: 0.8850350717996379, iteration: 307343
loss: 1.005474328994751,grad_norm: 0.7367525380224154, iteration: 307344
loss: 0.9865498542785645,grad_norm: 0.9558708692169339, iteration: 307345
loss: 0.9871851205825806,grad_norm: 0.8270028620807751, iteration: 307346
loss: 0.9865723848342896,grad_norm: 0.9999990914881265, iteration: 307347
loss: 1.0276603698730469,grad_norm: 0.8811640488489972, iteration: 307348
loss: 0.9750342965126038,grad_norm: 0.9063879760163263, iteration: 307349
loss: 1.0180593729019165,grad_norm: 0.9511827201177121, iteration: 307350
loss: 0.9944022297859192,grad_norm: 0.7759364368866324, iteration: 307351
loss: 0.9997780919075012,grad_norm: 0.9919057211057607, iteration: 307352
loss: 0.9901279211044312,grad_norm: 0.8756747725237339, iteration: 307353
loss: 1.0381702184677124,grad_norm: 0.8601813827821594, iteration: 307354
loss: 0.9726161360740662,grad_norm: 0.9044556330093273, iteration: 307355
loss: 1.0261141061782837,grad_norm: 0.9999991527653764, iteration: 307356
loss: 1.0294617414474487,grad_norm: 0.9999990303506708, iteration: 307357
loss: 1.0425843000411987,grad_norm: 0.9902639515587767, iteration: 307358
loss: 1.0283693075180054,grad_norm: 0.7477071867598019, iteration: 307359
loss: 1.0130599737167358,grad_norm: 0.9026270010415155, iteration: 307360
loss: 0.9629769921302795,grad_norm: 0.9999998529951545, iteration: 307361
loss: 0.9844608902931213,grad_norm: 0.999999105502649, iteration: 307362
loss: 0.997258722782135,grad_norm: 0.9999992093794352, iteration: 307363
loss: 0.9986992478370667,grad_norm: 0.8108198805672198, iteration: 307364
loss: 1.0143139362335205,grad_norm: 0.9999991350432659, iteration: 307365
loss: 0.996828019618988,grad_norm: 0.8818947788671052, iteration: 307366
loss: 0.9894379377365112,grad_norm: 0.8504146579129032, iteration: 307367
loss: 0.9915834069252014,grad_norm: 0.7630547792103544, iteration: 307368
loss: 0.993511438369751,grad_norm: 0.9342541921333135, iteration: 307369
loss: 0.9814167022705078,grad_norm: 0.9999992802488435, iteration: 307370
loss: 0.980440616607666,grad_norm: 0.8151662116934426, iteration: 307371
loss: 1.017771601676941,grad_norm: 0.8512812525678709, iteration: 307372
loss: 1.0142948627471924,grad_norm: 0.900162991801146, iteration: 307373
loss: 1.0399309396743774,grad_norm: 0.96753512242683, iteration: 307374
loss: 0.9635943174362183,grad_norm: 0.9551501614327341, iteration: 307375
loss: 0.9603984951972961,grad_norm: 0.9281415415575093, iteration: 307376
loss: 0.9793905019760132,grad_norm: 0.999999107137766, iteration: 307377
loss: 1.0096018314361572,grad_norm: 0.999998857054585, iteration: 307378
loss: 1.0164518356323242,grad_norm: 0.9999999240212243, iteration: 307379
loss: 0.9993227124214172,grad_norm: 0.9180380674004981, iteration: 307380
loss: 0.9882805347442627,grad_norm: 0.700616069222098, iteration: 307381
loss: 1.0122475624084473,grad_norm: 0.999999371763422, iteration: 307382
loss: 1.0379035472869873,grad_norm: 0.9999998485479987, iteration: 307383
loss: 1.0125477313995361,grad_norm: 0.9999990507997838, iteration: 307384
loss: 1.0011591911315918,grad_norm: 0.9999992166957035, iteration: 307385
loss: 0.9774209856987,grad_norm: 0.7623839031026072, iteration: 307386
loss: 1.0192577838897705,grad_norm: 0.9235240157909905, iteration: 307387
loss: 1.0206115245819092,grad_norm: 0.9999997368733223, iteration: 307388
loss: 0.9815725088119507,grad_norm: 0.8452833573338179, iteration: 307389
loss: 1.022213101387024,grad_norm: 0.8777632003579381, iteration: 307390
loss: 1.0059502124786377,grad_norm: 0.7891747212973849, iteration: 307391
loss: 0.9943667054176331,grad_norm: 0.9210224051530144, iteration: 307392
loss: 1.0233412981033325,grad_norm: 0.8648965694365961, iteration: 307393
loss: 1.0263140201568604,grad_norm: 0.7501517761165154, iteration: 307394
loss: 1.010371446609497,grad_norm: 0.9999994060090415, iteration: 307395
loss: 1.000742793083191,grad_norm: 0.7519759227032043, iteration: 307396
loss: 1.0044740438461304,grad_norm: 0.9024670556205325, iteration: 307397
loss: 1.073919415473938,grad_norm: 0.9999999324483633, iteration: 307398
loss: 0.9919204115867615,grad_norm: 0.7637420468182852, iteration: 307399
loss: 1.014285922050476,grad_norm: 0.9255006448543391, iteration: 307400
loss: 0.9657919406890869,grad_norm: 0.7845181139437516, iteration: 307401
loss: 1.0028736591339111,grad_norm: 0.9297274538294923, iteration: 307402
loss: 0.9891688227653503,grad_norm: 0.9248298680755397, iteration: 307403
loss: 1.019845724105835,grad_norm: 0.8617509069363563, iteration: 307404
loss: 0.9685595631599426,grad_norm: 0.8018936063251719, iteration: 307405
loss: 0.9852859377861023,grad_norm: 0.8530988534302334, iteration: 307406
loss: 1.0162194967269897,grad_norm: 0.7655628372965618, iteration: 307407
loss: 1.0159196853637695,grad_norm: 0.6583554564861983, iteration: 307408
loss: 1.0251797437667847,grad_norm: 0.6941784565252573, iteration: 307409
loss: 1.253916621208191,grad_norm: 0.9999990957467586, iteration: 307410
loss: 1.0210872888565063,grad_norm: 0.9999990465495641, iteration: 307411
loss: 1.0296536684036255,grad_norm: 0.9713316973917163, iteration: 307412
loss: 1.0163300037384033,grad_norm: 0.7512086737048557, iteration: 307413
loss: 0.9904629588127136,grad_norm: 0.8406964266741462, iteration: 307414
loss: 0.9741735458374023,grad_norm: 0.7127542328807743, iteration: 307415
loss: 0.9905763864517212,grad_norm: 0.8287491648824248, iteration: 307416
loss: 1.0028691291809082,grad_norm: 0.9999990747766163, iteration: 307417
loss: 0.9426647424697876,grad_norm: 0.8489593516895517, iteration: 307418
loss: 1.114058017730713,grad_norm: 0.9999996003698971, iteration: 307419
loss: 1.0305111408233643,grad_norm: 0.9325086073485324, iteration: 307420
loss: 1.0179537534713745,grad_norm: 0.8262239858883892, iteration: 307421
loss: 1.0227833986282349,grad_norm: 0.778245969684749, iteration: 307422
loss: 0.9909928441047668,grad_norm: 0.7334411231048715, iteration: 307423
loss: 0.9924267530441284,grad_norm: 0.9127062779814308, iteration: 307424
loss: 0.9704028964042664,grad_norm: 0.9999991135437072, iteration: 307425
loss: 1.0360575914382935,grad_norm: 0.7693755750989978, iteration: 307426
loss: 1.0047917366027832,grad_norm: 0.7384103204909479, iteration: 307427
loss: 0.958957850933075,grad_norm: 0.7583909022626867, iteration: 307428
loss: 1.042488694190979,grad_norm: 0.9999991422054677, iteration: 307429
loss: 1.0068398714065552,grad_norm: 0.7570734547516695, iteration: 307430
loss: 1.0004897117614746,grad_norm: 0.7573640399973435, iteration: 307431
loss: 0.9858884215354919,grad_norm: 0.9219559986995172, iteration: 307432
loss: 1.0221350193023682,grad_norm: 0.8039540455355341, iteration: 307433
loss: 0.9950851798057556,grad_norm: 0.754826450977661, iteration: 307434
loss: 0.9641871452331543,grad_norm: 0.7622928421032004, iteration: 307435
loss: 1.0360866785049438,grad_norm: 0.9999995025636336, iteration: 307436
loss: 0.9968478083610535,grad_norm: 0.9276474536860216, iteration: 307437
loss: 0.9861714839935303,grad_norm: 0.8977742963767249, iteration: 307438
loss: 0.9852620959281921,grad_norm: 0.7305513488692346, iteration: 307439
loss: 0.9814230799674988,grad_norm: 0.9999989383008037, iteration: 307440
loss: 1.0254807472229004,grad_norm: 0.9995280858092765, iteration: 307441
loss: 0.9729158282279968,grad_norm: 0.9748649150206213, iteration: 307442
loss: 1.0159761905670166,grad_norm: 0.9721020935031778, iteration: 307443
loss: 0.9709007143974304,grad_norm: 0.8881547613960524, iteration: 307444
loss: 1.0020970106124878,grad_norm: 0.7793306250094952, iteration: 307445
loss: 0.9857852458953857,grad_norm: 0.7603585722528344, iteration: 307446
loss: 1.0274648666381836,grad_norm: 0.8854801049628812, iteration: 307447
loss: 0.998820424079895,grad_norm: 0.7724567398504464, iteration: 307448
loss: 0.984642744064331,grad_norm: 0.7012329514091432, iteration: 307449
loss: 0.9882572293281555,grad_norm: 0.7525651118557911, iteration: 307450
loss: 0.9823593497276306,grad_norm: 0.8848736561890032, iteration: 307451
loss: 0.985710620880127,grad_norm: 0.9101408440608998, iteration: 307452
loss: 1.031552791595459,grad_norm: 0.9999991514722149, iteration: 307453
loss: 0.9960832595825195,grad_norm: 0.6652759936572086, iteration: 307454
loss: 1.024824857711792,grad_norm: 0.9310610999812668, iteration: 307455
loss: 1.014944314956665,grad_norm: 0.7960621547417958, iteration: 307456
loss: 0.9658669829368591,grad_norm: 0.9363938207731973, iteration: 307457
loss: 0.9792885780334473,grad_norm: 0.8805587969586404, iteration: 307458
loss: 1.034920573234558,grad_norm: 0.8823443876946807, iteration: 307459
loss: 1.0378419160842896,grad_norm: 0.9999998278094145, iteration: 307460
loss: 1.1064914464950562,grad_norm: 0.9999992913977943, iteration: 307461
loss: 0.972867488861084,grad_norm: 0.8302623037598887, iteration: 307462
loss: 0.9911329746246338,grad_norm: 0.8699626723596383, iteration: 307463
loss: 0.9860121607780457,grad_norm: 0.833168238538241, iteration: 307464
loss: 0.9824486374855042,grad_norm: 0.999998984993385, iteration: 307465
loss: 0.9892516732215881,grad_norm: 0.988613715972363, iteration: 307466
loss: 1.0008553266525269,grad_norm: 0.8771168432018851, iteration: 307467
loss: 1.067760705947876,grad_norm: 0.9447168021418343, iteration: 307468
loss: 0.9517002701759338,grad_norm: 0.999999207869126, iteration: 307469
loss: 0.9617227911949158,grad_norm: 0.8113615707451077, iteration: 307470
loss: 0.9778205752372742,grad_norm: 0.9552425375044379, iteration: 307471
loss: 0.9939080476760864,grad_norm: 0.9493335540824803, iteration: 307472
loss: 1.1010860204696655,grad_norm: 1.0000000037926242, iteration: 307473
loss: 0.9995993971824646,grad_norm: 0.9865966092427658, iteration: 307474
loss: 0.9941973090171814,grad_norm: 0.843155341805919, iteration: 307475
loss: 0.981868326663971,grad_norm: 0.8718582172061652, iteration: 307476
loss: 1.02048659324646,grad_norm: 0.6992375254068293, iteration: 307477
loss: 0.9858657121658325,grad_norm: 0.9671443684909308, iteration: 307478
loss: 0.9985461831092834,grad_norm: 0.9999989250152488, iteration: 307479
loss: 1.0279072523117065,grad_norm: 0.9999993179183186, iteration: 307480
loss: 0.9774787425994873,grad_norm: 0.9999991296742122, iteration: 307481
loss: 1.032505750656128,grad_norm: 0.7406570292521472, iteration: 307482
loss: 1.0270401239395142,grad_norm: 0.9999990491922902, iteration: 307483
loss: 0.9958645105361938,grad_norm: 0.9999991897271818, iteration: 307484
loss: 1.0123025178909302,grad_norm: 0.8112063561439778, iteration: 307485
loss: 1.026779055595398,grad_norm: 0.9999992512496264, iteration: 307486
loss: 0.9968975782394409,grad_norm: 0.9999991189824277, iteration: 307487
loss: 1.1815719604492188,grad_norm: 0.9999999864505287, iteration: 307488
loss: 1.0095800161361694,grad_norm: 0.9999997308445591, iteration: 307489
loss: 1.235826849937439,grad_norm: 0.9999995791125057, iteration: 307490
loss: 0.9621691703796387,grad_norm: 0.7528417069723042, iteration: 307491
loss: 1.0050653219223022,grad_norm: 0.9962557023257285, iteration: 307492
loss: 1.0036779642105103,grad_norm: 0.9645161593090847, iteration: 307493
loss: 0.9634418487548828,grad_norm: 0.8190951780376483, iteration: 307494
loss: 0.9860236644744873,grad_norm: 0.8756391498524532, iteration: 307495
loss: 0.9992940425872803,grad_norm: 0.8026999729022612, iteration: 307496
loss: 0.9929921627044678,grad_norm: 0.9784217874450187, iteration: 307497
loss: 1.0051875114440918,grad_norm: 0.9296575259222615, iteration: 307498
loss: 1.0507928133010864,grad_norm: 0.9793671012409406, iteration: 307499
loss: 0.9849608540534973,grad_norm: 0.9618627159216782, iteration: 307500
loss: 1.0024218559265137,grad_norm: 0.9999998776827959, iteration: 307501
loss: 0.9858506917953491,grad_norm: 0.9999993484888335, iteration: 307502
loss: 1.0286891460418701,grad_norm: 0.8540782770625416, iteration: 307503
loss: 1.037641167640686,grad_norm: 0.7882755452385766, iteration: 307504
loss: 0.9846537113189697,grad_norm: 0.8641846637434096, iteration: 307505
loss: 1.0228526592254639,grad_norm: 0.9696693510053168, iteration: 307506
loss: 1.0060429573059082,grad_norm: 0.9024854602275106, iteration: 307507
loss: 1.0227969884872437,grad_norm: 0.9447079745646568, iteration: 307508
loss: 0.9612970948219299,grad_norm: 0.9457180941925396, iteration: 307509
loss: 1.0009998083114624,grad_norm: 0.9302957059542081, iteration: 307510
loss: 1.00602126121521,grad_norm: 0.679439802281134, iteration: 307511
loss: 0.9883517622947693,grad_norm: 0.9589811711571778, iteration: 307512
loss: 0.9458322525024414,grad_norm: 0.9908335632771115, iteration: 307513
loss: 0.969257116317749,grad_norm: 0.9999998378207149, iteration: 307514
loss: 1.0081393718719482,grad_norm: 0.8540899222717435, iteration: 307515
loss: 1.083250641822815,grad_norm: 0.8685132424147515, iteration: 307516
loss: 0.9607381820678711,grad_norm: 0.7550869972472158, iteration: 307517
loss: 1.0009429454803467,grad_norm: 0.7681940700366325, iteration: 307518
loss: 1.01119863986969,grad_norm: 0.753640695267967, iteration: 307519
loss: 1.0083411931991577,grad_norm: 0.9999999226373237, iteration: 307520
loss: 1.0107308626174927,grad_norm: 0.9556757930360839, iteration: 307521
loss: 0.9849971532821655,grad_norm: 0.7948271441850064, iteration: 307522
loss: 0.9750841856002808,grad_norm: 0.9999990193475905, iteration: 307523
loss: 1.001305341720581,grad_norm: 0.8871748805947061, iteration: 307524
loss: 1.0375175476074219,grad_norm: 0.9999992550318755, iteration: 307525
loss: 0.9954659938812256,grad_norm: 0.8703993223503915, iteration: 307526
loss: 1.0431408882141113,grad_norm: 0.99999909013885, iteration: 307527
loss: 0.9622074365615845,grad_norm: 0.943856357635519, iteration: 307528
loss: 0.9654352068901062,grad_norm: 0.8235082719281163, iteration: 307529
loss: 1.0195422172546387,grad_norm: 0.7931351673002228, iteration: 307530
loss: 1.0442928075790405,grad_norm: 0.8084323578970124, iteration: 307531
loss: 1.0764243602752686,grad_norm: 0.9999989444384061, iteration: 307532
loss: 0.9952881932258606,grad_norm: 0.750784413647876, iteration: 307533
loss: 1.021043062210083,grad_norm: 0.8789595403178563, iteration: 307534
loss: 1.0052086114883423,grad_norm: 0.8742915383134001, iteration: 307535
loss: 0.9524909853935242,grad_norm: 0.9999992826685684, iteration: 307536
loss: 0.9885241389274597,grad_norm: 0.8593581670524899, iteration: 307537
loss: 0.9769105315208435,grad_norm: 0.8619275740146985, iteration: 307538
loss: 0.9337037205696106,grad_norm: 0.8156664952796739, iteration: 307539
loss: 0.9608988165855408,grad_norm: 0.8974698517325155, iteration: 307540
loss: 0.979834258556366,grad_norm: 0.9713711254307396, iteration: 307541
loss: 0.9875043630599976,grad_norm: 0.8210508101095432, iteration: 307542
loss: 0.9759324193000793,grad_norm: 0.8613932265249715, iteration: 307543
loss: 1.0773268938064575,grad_norm: 0.9999998119761957, iteration: 307544
loss: 0.9607139825820923,grad_norm: 0.8402526628571294, iteration: 307545
loss: 0.9885530471801758,grad_norm: 0.7927611305330678, iteration: 307546
loss: 0.9735201001167297,grad_norm: 0.7443098856655535, iteration: 307547
loss: 0.9880510568618774,grad_norm: 0.8362147580748757, iteration: 307548
loss: 0.9323803186416626,grad_norm: 0.8240337278517852, iteration: 307549
loss: 1.0283288955688477,grad_norm: 0.8137118237014769, iteration: 307550
loss: 1.0136257410049438,grad_norm: 0.999999009517324, iteration: 307551
loss: 1.0001814365386963,grad_norm: 0.7206758066448045, iteration: 307552
loss: 0.9769096970558167,grad_norm: 0.7584034997108361, iteration: 307553
loss: 1.0512689352035522,grad_norm: 0.7658383922071631, iteration: 307554
loss: 0.9933913946151733,grad_norm: 0.8403593998696121, iteration: 307555
loss: 1.044922947883606,grad_norm: 0.868878317945788, iteration: 307556
loss: 0.9693945646286011,grad_norm: 0.9324817223891766, iteration: 307557
loss: 1.0153599977493286,grad_norm: 0.6654237424530709, iteration: 307558
loss: 1.0369278192520142,grad_norm: 0.9287771589114143, iteration: 307559
loss: 1.027005910873413,grad_norm: 0.7844203554193857, iteration: 307560
loss: 0.9889600872993469,grad_norm: 0.8354943869398731, iteration: 307561
loss: 0.9903286695480347,grad_norm: 0.9396397573295482, iteration: 307562
loss: 0.9860587120056152,grad_norm: 0.886965486238675, iteration: 307563
loss: 1.0064585208892822,grad_norm: 0.7716297912988269, iteration: 307564
loss: 1.0057553052902222,grad_norm: 0.7790169286922052, iteration: 307565
loss: 0.984083890914917,grad_norm: 0.999999111104717, iteration: 307566
loss: 0.9863341450691223,grad_norm: 0.9999997313478843, iteration: 307567
loss: 0.9857792258262634,grad_norm: 0.8770481457280944, iteration: 307568
loss: 1.0050246715545654,grad_norm: 0.981433763350797, iteration: 307569
loss: 1.0320459604263306,grad_norm: 0.8458551651794558, iteration: 307570
loss: 1.0179134607315063,grad_norm: 0.7724257162878911, iteration: 307571
loss: 0.9886317253112793,grad_norm: 0.8098207532161004, iteration: 307572
loss: 0.9979608058929443,grad_norm: 0.7683922857685949, iteration: 307573
loss: 0.9887245893478394,grad_norm: 0.9999994868324883, iteration: 307574
loss: 0.9995494484901428,grad_norm: 0.7970438409585602, iteration: 307575
loss: 0.9669046401977539,grad_norm: 0.8586741351045352, iteration: 307576
loss: 1.0045926570892334,grad_norm: 0.8507819359493844, iteration: 307577
loss: 1.0208077430725098,grad_norm: 0.925291450360021, iteration: 307578
loss: 1.007135033607483,grad_norm: 0.8957467068217281, iteration: 307579
loss: 1.0113744735717773,grad_norm: 0.8121066289674553, iteration: 307580
loss: 1.0091313123703003,grad_norm: 0.8196821102098337, iteration: 307581
loss: 0.9959618449211121,grad_norm: 0.773663782158748, iteration: 307582
loss: 1.0597217082977295,grad_norm: 0.9999999721065205, iteration: 307583
loss: 1.0186517238616943,grad_norm: 0.9999992508836336, iteration: 307584
loss: 1.0028395652770996,grad_norm: 0.8010370240218241, iteration: 307585
loss: 1.016340732574463,grad_norm: 0.9047543147530372, iteration: 307586
loss: 0.9551056623458862,grad_norm: 0.8523311266472027, iteration: 307587
loss: 1.0114246606826782,grad_norm: 0.8091483877309065, iteration: 307588
loss: 1.1467480659484863,grad_norm: 0.9999993194348539, iteration: 307589
loss: 0.9841134548187256,grad_norm: 0.8621518392609528, iteration: 307590
loss: 0.9671890139579773,grad_norm: 0.9999990369675109, iteration: 307591
loss: 1.0035254955291748,grad_norm: 0.9763190773947111, iteration: 307592
loss: 0.9778504967689514,grad_norm: 0.9999993197799373, iteration: 307593
loss: 0.9885559678077698,grad_norm: 0.7478417761388914, iteration: 307594
loss: 1.0200507640838623,grad_norm: 0.9999992778456366, iteration: 307595
loss: 1.0295153856277466,grad_norm: 0.999999097527078, iteration: 307596
loss: 1.0537372827529907,grad_norm: 0.9999999709391949, iteration: 307597
loss: 0.9881677627563477,grad_norm: 0.865499210435517, iteration: 307598
loss: 1.0310308933258057,grad_norm: 0.999999461718497, iteration: 307599
loss: 0.9941197037696838,grad_norm: 0.9999991832130397, iteration: 307600
loss: 0.9860685467720032,grad_norm: 0.9999991491774486, iteration: 307601
loss: 1.028163194656372,grad_norm: 0.9999993099342234, iteration: 307602
loss: 0.9803122878074646,grad_norm: 0.8016828239124505, iteration: 307603
loss: 1.0116585493087769,grad_norm: 0.8158499393167813, iteration: 307604
loss: 0.9766697883605957,grad_norm: 0.7594942624150425, iteration: 307605
loss: 1.0258620977401733,grad_norm: 0.9358362127486333, iteration: 307606
loss: 0.9979667067527771,grad_norm: 0.7670086881770702, iteration: 307607
loss: 1.0102343559265137,grad_norm: 0.8184530077561208, iteration: 307608
loss: 1.1146303415298462,grad_norm: 0.8404928147455165, iteration: 307609
loss: 1.018428921699524,grad_norm: 0.9452933080063858, iteration: 307610
loss: 0.9922617077827454,grad_norm: 0.8719740347499075, iteration: 307611
loss: 1.0317418575286865,grad_norm: 0.8550654448310036, iteration: 307612
loss: 1.024605631828308,grad_norm: 0.8609725073379506, iteration: 307613
loss: 0.9900766611099243,grad_norm: 0.9062785545084601, iteration: 307614
loss: 0.9828406572341919,grad_norm: 0.9830750818061705, iteration: 307615
loss: 1.0164459943771362,grad_norm: 0.9928340900991359, iteration: 307616
loss: 1.1681876182556152,grad_norm: 0.9999998218291324, iteration: 307617
loss: 1.0307244062423706,grad_norm: 0.7033103965818281, iteration: 307618
loss: 0.9899379014968872,grad_norm: 0.9999992801451538, iteration: 307619
loss: 1.0252234935760498,grad_norm: 0.9999995143137955, iteration: 307620
loss: 1.0140613317489624,grad_norm: 0.9999992729966559, iteration: 307621
loss: 0.9981207847595215,grad_norm: 0.8254187208388248, iteration: 307622
loss: 0.9847065806388855,grad_norm: 0.7628477822757835, iteration: 307623
loss: 1.0382002592086792,grad_norm: 0.9999992526548855, iteration: 307624
loss: 1.0801537036895752,grad_norm: 0.9999991711877897, iteration: 307625
loss: 1.0833914279937744,grad_norm: 0.9999991157692694, iteration: 307626
loss: 1.0608384609222412,grad_norm: 0.9350963509820852, iteration: 307627
loss: 0.9903687834739685,grad_norm: 0.8083532178562276, iteration: 307628
loss: 0.9947333335876465,grad_norm: 0.7321573597687586, iteration: 307629
loss: 1.0033451318740845,grad_norm: 0.8578716528018702, iteration: 307630
loss: 1.0503438711166382,grad_norm: 0.9392051344066804, iteration: 307631
loss: 0.9823498725891113,grad_norm: 0.9999990409135894, iteration: 307632
loss: 0.9774124026298523,grad_norm: 0.8614673808976184, iteration: 307633
loss: 0.9900839924812317,grad_norm: 0.8288721141678794, iteration: 307634
loss: 1.0093141794204712,grad_norm: 0.7810242652079322, iteration: 307635
loss: 0.9986533522605896,grad_norm: 0.7709344647794408, iteration: 307636
loss: 0.9613661170005798,grad_norm: 0.7470780179310271, iteration: 307637
loss: 0.9906126856803894,grad_norm: 0.8057496221210242, iteration: 307638
loss: 0.9646877646446228,grad_norm: 0.716330453195556, iteration: 307639
loss: 0.9989566802978516,grad_norm: 0.8096797337814496, iteration: 307640
loss: 0.9626208543777466,grad_norm: 0.9597238495053112, iteration: 307641
loss: 0.9920112490653992,grad_norm: 0.9999991089533976, iteration: 307642
loss: 1.0147619247436523,grad_norm: 0.812364980827131, iteration: 307643
loss: 0.9685037136077881,grad_norm: 0.8769266386135273, iteration: 307644
loss: 0.9841432571411133,grad_norm: 0.9999997318319584, iteration: 307645
loss: 1.0389246940612793,grad_norm: 0.9961957917137639, iteration: 307646
loss: 1.1071301698684692,grad_norm: 0.9999999135362616, iteration: 307647
loss: 1.0276992321014404,grad_norm: 0.7946641655180492, iteration: 307648
loss: 1.0130443572998047,grad_norm: 0.7985984792625266, iteration: 307649
loss: 1.0207583904266357,grad_norm: 0.7350895390180584, iteration: 307650
loss: 0.982265293598175,grad_norm: 0.7683044030745321, iteration: 307651
loss: 0.9874414801597595,grad_norm: 0.9973098260122846, iteration: 307652
loss: 1.033403992652893,grad_norm: 0.8240832006155918, iteration: 307653
loss: 1.0453476905822754,grad_norm: 0.8061172427054604, iteration: 307654
loss: 0.9891923666000366,grad_norm: 0.9999990819549742, iteration: 307655
loss: 0.9812449812889099,grad_norm: 0.9999999554698223, iteration: 307656
loss: 0.9826650619506836,grad_norm: 0.9102943563601816, iteration: 307657
loss: 1.0010895729064941,grad_norm: 0.9999990034483193, iteration: 307658
loss: 1.0080455541610718,grad_norm: 0.8562811483089662, iteration: 307659
loss: 1.0037511587142944,grad_norm: 0.9370756392158919, iteration: 307660
loss: 0.9922095537185669,grad_norm: 0.8141518142242307, iteration: 307661
loss: 0.9891872406005859,grad_norm: 0.906571240644997, iteration: 307662
loss: 1.0702406167984009,grad_norm: 0.9999994137661743, iteration: 307663
loss: 0.9768710732460022,grad_norm: 0.9879038093260553, iteration: 307664
loss: 1.0697044134140015,grad_norm: 0.9296695105621294, iteration: 307665
loss: 1.0198613405227661,grad_norm: 0.9039036189813602, iteration: 307666
loss: 1.0079621076583862,grad_norm: 0.8524868894847022, iteration: 307667
loss: 1.034297227859497,grad_norm: 0.839350167910657, iteration: 307668
loss: 1.0298123359680176,grad_norm: 0.9999995914582188, iteration: 307669
loss: 0.9790634512901306,grad_norm: 0.8801701184819872, iteration: 307670
loss: 0.9874757528305054,grad_norm: 0.8856304502288818, iteration: 307671
loss: 1.0063889026641846,grad_norm: 0.9431516261941705, iteration: 307672
loss: 0.9968814849853516,grad_norm: 0.87098333333292, iteration: 307673
loss: 1.0371699333190918,grad_norm: 0.9999994491456683, iteration: 307674
loss: 1.0026969909667969,grad_norm: 0.7499956253690869, iteration: 307675
loss: 1.0377883911132812,grad_norm: 0.8148097675110514, iteration: 307676
loss: 1.0029795169830322,grad_norm: 0.894678803396001, iteration: 307677
loss: 0.9574928879737854,grad_norm: 0.8969552905009618, iteration: 307678
loss: 1.0452836751937866,grad_norm: 0.9060052706485817, iteration: 307679
loss: 0.9525091052055359,grad_norm: 0.8765568110651205, iteration: 307680
loss: 0.9814028143882751,grad_norm: 0.9165732514015376, iteration: 307681
loss: 1.0199614763259888,grad_norm: 0.8947011830253383, iteration: 307682
loss: 0.9969621300697327,grad_norm: 0.9076801943039576, iteration: 307683
loss: 1.0780227184295654,grad_norm: 0.9999997315330377, iteration: 307684
loss: 0.9956384897232056,grad_norm: 0.8411539391948243, iteration: 307685
loss: 1.0091174840927124,grad_norm: 0.8977798681697662, iteration: 307686
loss: 0.9999342560768127,grad_norm: 0.8715841656934236, iteration: 307687
loss: 1.0171294212341309,grad_norm: 0.8525299107068339, iteration: 307688
loss: 0.9588472247123718,grad_norm: 0.989961416272256, iteration: 307689
loss: 1.0535070896148682,grad_norm: 0.9999991084680285, iteration: 307690
loss: 0.9738580584526062,grad_norm: 0.9999991302241198, iteration: 307691
loss: 0.980059027671814,grad_norm: 0.8032980473925382, iteration: 307692
loss: 0.9964265823364258,grad_norm: 0.7015824795104272, iteration: 307693
loss: 1.003902554512024,grad_norm: 0.7736965402608426, iteration: 307694
loss: 0.9937679767608643,grad_norm: 0.9343784419899952, iteration: 307695
loss: 0.9816661477088928,grad_norm: 0.9727099557358003, iteration: 307696
loss: 1.0077587366104126,grad_norm: 0.8160725745138122, iteration: 307697
loss: 0.9848851561546326,grad_norm: 0.7566047430175763, iteration: 307698
loss: 0.988818347454071,grad_norm: 0.8319232988244273, iteration: 307699
loss: 1.0007011890411377,grad_norm: 0.7074257673358648, iteration: 307700
loss: 0.9912880063056946,grad_norm: 0.8861302039319894, iteration: 307701
loss: 1.0164105892181396,grad_norm: 0.9199217116400362, iteration: 307702
loss: 1.0138698816299438,grad_norm: 0.9323223147231848, iteration: 307703
loss: 1.0106197595596313,grad_norm: 0.9220505441091014, iteration: 307704
loss: 0.9942774772644043,grad_norm: 0.6702947629117095, iteration: 307705
loss: 0.9858615398406982,grad_norm: 0.7890996897373956, iteration: 307706
loss: 0.991343080997467,grad_norm: 0.7778464156610888, iteration: 307707
loss: 1.0688982009887695,grad_norm: 0.9999995762580244, iteration: 307708
loss: 1.0277937650680542,grad_norm: 0.9603808089201904, iteration: 307709
loss: 0.9922173023223877,grad_norm: 0.999999290509216, iteration: 307710
loss: 0.9753813743591309,grad_norm: 0.9320034276121026, iteration: 307711
loss: 1.004695177078247,grad_norm: 0.8397210311249659, iteration: 307712
loss: 1.0507270097732544,grad_norm: 0.9999989408667714, iteration: 307713
loss: 1.013816475868225,grad_norm: 0.9999989480300224, iteration: 307714
loss: 0.9991093277931213,grad_norm: 0.7971869968274296, iteration: 307715
loss: 0.9698582887649536,grad_norm: 0.8153635600940877, iteration: 307716
loss: 0.9953945875167847,grad_norm: 0.7404546126980159, iteration: 307717
loss: 0.9997918009757996,grad_norm: 0.7035830988032346, iteration: 307718
loss: 0.9850658178329468,grad_norm: 0.7544408893108996, iteration: 307719
loss: 0.9773686528205872,grad_norm: 0.6639989176506675, iteration: 307720
loss: 1.0001343488693237,grad_norm: 0.717867312991561, iteration: 307721
loss: 1.0311739444732666,grad_norm: 0.7507285696901971, iteration: 307722
loss: 0.991191565990448,grad_norm: 0.7628156126892457, iteration: 307723
loss: 1.0047000646591187,grad_norm: 0.848652613959947, iteration: 307724
loss: 0.9717755913734436,grad_norm: 0.9115230761593657, iteration: 307725
loss: 1.052663803100586,grad_norm: 0.8845441859635653, iteration: 307726
loss: 0.9953928589820862,grad_norm: 0.894878849035301, iteration: 307727
loss: 1.4369457960128784,grad_norm: 0.9999999334126913, iteration: 307728
loss: 0.9901768565177917,grad_norm: 0.8392917038780702, iteration: 307729
loss: 1.0022376775741577,grad_norm: 0.8222661948045512, iteration: 307730
loss: 0.9760656952857971,grad_norm: 0.9999990546865661, iteration: 307731
loss: 0.9912716746330261,grad_norm: 0.8113347695879681, iteration: 307732
loss: 0.996089518070221,grad_norm: 0.9473289253751193, iteration: 307733
loss: 0.9960917830467224,grad_norm: 0.8096663158232122, iteration: 307734
loss: 1.0032631158828735,grad_norm: 0.9999993058497334, iteration: 307735
loss: 0.9971649646759033,grad_norm: 0.9999992040265675, iteration: 307736
loss: 1.014628529548645,grad_norm: 0.9372976994205766, iteration: 307737
loss: 1.0031614303588867,grad_norm: 0.9000573581807018, iteration: 307738
loss: 1.0445489883422852,grad_norm: 0.8806218061037604, iteration: 307739
loss: 0.956398606300354,grad_norm: 0.8253004055073884, iteration: 307740
loss: 1.0257855653762817,grad_norm: 0.9669470458820915, iteration: 307741
loss: 1.0041861534118652,grad_norm: 0.8367831543010845, iteration: 307742
loss: 0.9826554656028748,grad_norm: 0.9351594859998679, iteration: 307743
loss: 0.9840521216392517,grad_norm: 0.7092289970632418, iteration: 307744
loss: 0.9918217658996582,grad_norm: 0.8179697791880205, iteration: 307745
loss: 0.962837815284729,grad_norm: 0.9999990506994523, iteration: 307746
loss: 0.99748694896698,grad_norm: 0.8455230437665296, iteration: 307747
loss: 0.9910890460014343,grad_norm: 0.775033171447293, iteration: 307748
loss: 1.0021812915802002,grad_norm: 0.9999998953721826, iteration: 307749
loss: 1.0090551376342773,grad_norm: 0.7729693962446367, iteration: 307750
loss: 1.005899429321289,grad_norm: 0.7035181524794427, iteration: 307751
loss: 0.9811415672302246,grad_norm: 0.7719193540695772, iteration: 307752
loss: 0.9765987992286682,grad_norm: 0.8750004071802941, iteration: 307753
loss: 0.9835976958274841,grad_norm: 0.9999992077811152, iteration: 307754
loss: 1.0117971897125244,grad_norm: 0.8412112936717877, iteration: 307755
loss: 0.9910615086555481,grad_norm: 0.7884079389240649, iteration: 307756
loss: 0.9924688935279846,grad_norm: 0.7066643518997048, iteration: 307757
loss: 0.9991515278816223,grad_norm: 0.8641814757115738, iteration: 307758
loss: 1.01266348361969,grad_norm: 0.9999990522200533, iteration: 307759
loss: 1.0180131196975708,grad_norm: 0.8765368761483266, iteration: 307760
loss: 0.9799605011940002,grad_norm: 0.9033497201998586, iteration: 307761
loss: 0.9669428467750549,grad_norm: 0.9999990055558006, iteration: 307762
loss: 1.0178356170654297,grad_norm: 0.7761823739101299, iteration: 307763
loss: 0.9829607605934143,grad_norm: 0.9840502896925204, iteration: 307764
loss: 0.9662723541259766,grad_norm: 0.7384680517672029, iteration: 307765
loss: 1.0093178749084473,grad_norm: 0.7580802954153952, iteration: 307766
loss: 0.9824962615966797,grad_norm: 0.8306287379316838, iteration: 307767
loss: 0.9908578395843506,grad_norm: 0.8426477494250236, iteration: 307768
loss: 1.0141053199768066,grad_norm: 0.7539632013166156, iteration: 307769
loss: 1.0022764205932617,grad_norm: 0.9999992250216571, iteration: 307770
loss: 1.1672779321670532,grad_norm: 0.9999997391985985, iteration: 307771
loss: 1.0712027549743652,grad_norm: 0.8904886168370076, iteration: 307772
loss: 1.0064060688018799,grad_norm: 0.9149224067255473, iteration: 307773
loss: 0.9490988850593567,grad_norm: 0.9999999162093209, iteration: 307774
loss: 0.9799346923828125,grad_norm: 0.9119527177945986, iteration: 307775
loss: 1.0081363916397095,grad_norm: 0.9609236702623686, iteration: 307776
loss: 0.9889065623283386,grad_norm: 0.8728739611465561, iteration: 307777
loss: 0.9847491383552551,grad_norm: 0.8345443689321769, iteration: 307778
loss: 0.9657652378082275,grad_norm: 0.6781697823265529, iteration: 307779
loss: 1.0026946067810059,grad_norm: 0.745004877154192, iteration: 307780
loss: 0.9957596659660339,grad_norm: 0.860738662394345, iteration: 307781
loss: 1.0138704776763916,grad_norm: 0.795777169175744, iteration: 307782
loss: 1.039957880973816,grad_norm: 0.9999990984322766, iteration: 307783
loss: 0.990270733833313,grad_norm: 0.9491197633347718, iteration: 307784
loss: 1.0061153173446655,grad_norm: 0.846004995960288, iteration: 307785
loss: 1.0430864095687866,grad_norm: 0.9999992061638001, iteration: 307786
loss: 0.9854435920715332,grad_norm: 0.9926782446555683, iteration: 307787
loss: 1.0014629364013672,grad_norm: 0.8441403126277934, iteration: 307788
loss: 0.9736216068267822,grad_norm: 0.9285109103733795, iteration: 307789
loss: 1.0084539651870728,grad_norm: 0.8123165553793802, iteration: 307790
loss: 1.0151314735412598,grad_norm: 0.7829261091069134, iteration: 307791
loss: 1.0709190368652344,grad_norm: 0.9999995271957702, iteration: 307792
loss: 1.0122591257095337,grad_norm: 0.9999991944091027, iteration: 307793
loss: 1.0223525762557983,grad_norm: 0.7473168434756913, iteration: 307794
loss: 0.9793635606765747,grad_norm: 0.796910121045637, iteration: 307795
loss: 1.1397546529769897,grad_norm: 0.99999950671643, iteration: 307796
loss: 1.0687137842178345,grad_norm: 0.9999991194602511, iteration: 307797
loss: 0.9938094019889832,grad_norm: 0.831558302900384, iteration: 307798
loss: 0.9931706786155701,grad_norm: 0.999999101268145, iteration: 307799
loss: 1.0024038553237915,grad_norm: 0.6349464440968393, iteration: 307800
loss: 0.9860392808914185,grad_norm: 0.797337240177887, iteration: 307801
loss: 0.9598864316940308,grad_norm: 0.893866714956238, iteration: 307802
loss: 0.9378337264060974,grad_norm: 0.9999990320568493, iteration: 307803
loss: 1.0150561332702637,grad_norm: 0.751251552335325, iteration: 307804
loss: 1.0081363916397095,grad_norm: 0.778335645301804, iteration: 307805
loss: 0.9812943935394287,grad_norm: 0.7208717052013701, iteration: 307806
loss: 0.9987683296203613,grad_norm: 0.7703505685724856, iteration: 307807
loss: 1.0082788467407227,grad_norm: 0.8043523756605993, iteration: 307808
loss: 0.9814492464065552,grad_norm: 0.8490802122760707, iteration: 307809
loss: 0.9968043565750122,grad_norm: 0.999999054243505, iteration: 307810
loss: 1.0196338891983032,grad_norm: 0.8117498350196387, iteration: 307811
loss: 0.986379861831665,grad_norm: 0.9999990939435615, iteration: 307812
loss: 1.0090504884719849,grad_norm: 0.999999184888221, iteration: 307813
loss: 0.9903406500816345,grad_norm: 0.8792899241580159, iteration: 307814
loss: 1.0228794813156128,grad_norm: 0.9999992562514154, iteration: 307815
loss: 0.9755622744560242,grad_norm: 0.9111860805967743, iteration: 307816
loss: 0.9850090742111206,grad_norm: 0.7331953601908217, iteration: 307817
loss: 1.0451909303665161,grad_norm: 0.9999991710644897, iteration: 307818
loss: 0.985451877117157,grad_norm: 0.999999320738818, iteration: 307819
loss: 0.986460268497467,grad_norm: 0.9999992032295507, iteration: 307820
loss: 1.0225906372070312,grad_norm: 0.8271676738981679, iteration: 307821
loss: 1.000550627708435,grad_norm: 0.879770901945043, iteration: 307822
loss: 1.0169553756713867,grad_norm: 0.8675637235106781, iteration: 307823
loss: 0.9701056480407715,grad_norm: 0.995376442724044, iteration: 307824
loss: 0.9739375114440918,grad_norm: 0.8853546277430234, iteration: 307825
loss: 0.9757405519485474,grad_norm: 0.8750406505721904, iteration: 307826
loss: 1.0367958545684814,grad_norm: 0.9889899931898478, iteration: 307827
loss: 1.0214701890945435,grad_norm: 0.6399188801093469, iteration: 307828
loss: 0.9774034023284912,grad_norm: 0.7449836777858079, iteration: 307829
loss: 1.060988426208496,grad_norm: 0.999999374803192, iteration: 307830
loss: 1.0331953763961792,grad_norm: 0.99999907158137, iteration: 307831
loss: 0.9565256834030151,grad_norm: 0.780149656069075, iteration: 307832
loss: 0.989138662815094,grad_norm: 0.9675067642353473, iteration: 307833
loss: 0.9755116701126099,grad_norm: 0.8598188599818778, iteration: 307834
loss: 1.0606772899627686,grad_norm: 0.867958561693122, iteration: 307835
loss: 0.9759669899940491,grad_norm: 0.8161634621659244, iteration: 307836
loss: 1.0087674856185913,grad_norm: 0.5716119179395465, iteration: 307837
loss: 1.0809918642044067,grad_norm: 0.7431304511577702, iteration: 307838
loss: 0.9695762395858765,grad_norm: 0.8547705420804186, iteration: 307839
loss: 0.9656931161880493,grad_norm: 0.9876265757033882, iteration: 307840
loss: 0.9609558582305908,grad_norm: 0.82091149542596, iteration: 307841
loss: 1.03229558467865,grad_norm: 0.8344482089911662, iteration: 307842
loss: 1.0539995431900024,grad_norm: 0.7811244843891151, iteration: 307843
loss: 0.9905780553817749,grad_norm: 0.783692277885814, iteration: 307844
loss: 0.9629855751991272,grad_norm: 0.815473630019639, iteration: 307845
loss: 1.0058746337890625,grad_norm: 0.9017718525261542, iteration: 307846
loss: 1.0048658847808838,grad_norm: 0.9999990493225408, iteration: 307847
loss: 0.999414324760437,grad_norm: 0.8724594366175339, iteration: 307848
loss: 0.983521580696106,grad_norm: 1.0000000031691725, iteration: 307849
loss: 1.059964656829834,grad_norm: 0.9999994027450786, iteration: 307850
loss: 1.0006369352340698,grad_norm: 0.7982397854891721, iteration: 307851
loss: 0.9822594523429871,grad_norm: 0.779589418108962, iteration: 307852
loss: 1.0226774215698242,grad_norm: 0.8052384157433419, iteration: 307853
loss: 1.0007069110870361,grad_norm: 0.9999991393403906, iteration: 307854
loss: 0.9704618453979492,grad_norm: 0.9536930162322153, iteration: 307855
loss: 1.0410597324371338,grad_norm: 0.9999998787950038, iteration: 307856
loss: 0.9917725920677185,grad_norm: 0.8308214559058187, iteration: 307857
loss: 0.9623821377754211,grad_norm: 0.8957663917573834, iteration: 307858
loss: 0.991001546382904,grad_norm: 0.999999040558939, iteration: 307859
loss: 0.9845731854438782,grad_norm: 0.8403514289455236, iteration: 307860
loss: 1.0221742391586304,grad_norm: 0.8220888351305954, iteration: 307861
loss: 0.9905218482017517,grad_norm: 0.6935170792211457, iteration: 307862
loss: 0.9838517904281616,grad_norm: 0.7329987664622315, iteration: 307863
loss: 1.0022461414337158,grad_norm: 0.800198028058323, iteration: 307864
loss: 0.987387478351593,grad_norm: 0.9999993390336536, iteration: 307865
loss: 1.0003290176391602,grad_norm: 0.8082187577209466, iteration: 307866
loss: 1.0544973611831665,grad_norm: 0.9103580461424847, iteration: 307867
loss: 1.008757472038269,grad_norm: 0.6832434794755202, iteration: 307868
loss: 0.9555648565292358,grad_norm: 0.7498372750031116, iteration: 307869
loss: 1.0372307300567627,grad_norm: 0.8278760066069634, iteration: 307870
loss: 1.1062135696411133,grad_norm: 0.9648598432337967, iteration: 307871
loss: 1.0222530364990234,grad_norm: 0.8674813932369784, iteration: 307872
loss: 0.9972968697547913,grad_norm: 0.6848790027106326, iteration: 307873
loss: 1.007225751876831,grad_norm: 0.8630868445211168, iteration: 307874
loss: 0.9788677096366882,grad_norm: 0.9082395268366112, iteration: 307875
loss: 0.9933598637580872,grad_norm: 0.8337581799714185, iteration: 307876
loss: 1.0078781843185425,grad_norm: 0.7879772782537519, iteration: 307877
loss: 0.9938759207725525,grad_norm: 0.7267743725655629, iteration: 307878
loss: 0.9933742880821228,grad_norm: 0.8984936680817801, iteration: 307879
loss: 1.0373362302780151,grad_norm: 0.9999994539538438, iteration: 307880
loss: 1.1056536436080933,grad_norm: 0.9999990722488348, iteration: 307881
loss: 0.9673526883125305,grad_norm: 0.6305992247445552, iteration: 307882
loss: 0.9659364223480225,grad_norm: 0.6957634885120527, iteration: 307883
loss: 1.0009721517562866,grad_norm: 0.8943209014312915, iteration: 307884
loss: 0.9580510854721069,grad_norm: 0.9999992218979621, iteration: 307885
loss: 1.013499140739441,grad_norm: 0.8273403605328378, iteration: 307886
loss: 0.9681859612464905,grad_norm: 0.8107463914696544, iteration: 307887
loss: 1.0323318243026733,grad_norm: 0.821253683946585, iteration: 307888
loss: 1.0113203525543213,grad_norm: 0.806827313291615, iteration: 307889
loss: 0.9771684408187866,grad_norm: 0.7997199769340971, iteration: 307890
loss: 0.9484351873397827,grad_norm: 0.8596874239813531, iteration: 307891
loss: 0.9953258037567139,grad_norm: 0.8504047795490637, iteration: 307892
loss: 0.9956619739532471,grad_norm: 0.8968758170766731, iteration: 307893
loss: 1.0368924140930176,grad_norm: 0.9992982795836788, iteration: 307894
loss: 1.0118860006332397,grad_norm: 0.9044774456639723, iteration: 307895
loss: 1.0211514234542847,grad_norm: 0.8463806247253457, iteration: 307896
loss: 1.0241774320602417,grad_norm: 0.9175559917931376, iteration: 307897
loss: 1.0438786745071411,grad_norm: 0.999999056721386, iteration: 307898
loss: 1.0387572050094604,grad_norm: 0.9999991888263499, iteration: 307899
loss: 1.0174251794815063,grad_norm: 0.9439539892297982, iteration: 307900
loss: 1.0190380811691284,grad_norm: 0.9010986156990837, iteration: 307901
loss: 1.0031155347824097,grad_norm: 0.8303489048243359, iteration: 307902
loss: 1.0111035108566284,grad_norm: 0.7485892404039902, iteration: 307903
loss: 1.1114376783370972,grad_norm: 0.9999997584882804, iteration: 307904
loss: 0.9467689394950867,grad_norm: 0.9999998184331065, iteration: 307905
loss: 1.0296447277069092,grad_norm: 0.8560165360462818, iteration: 307906
loss: 0.975078821182251,grad_norm: 0.9060105105031231, iteration: 307907
loss: 0.9819481372833252,grad_norm: 0.8943725024462894, iteration: 307908
loss: 1.0016217231750488,grad_norm: 0.9301534250865127, iteration: 307909
loss: 0.9972780346870422,grad_norm: 0.9237711677429293, iteration: 307910
loss: 0.971011221408844,grad_norm: 0.855202483213912, iteration: 307911
loss: 1.0202538967132568,grad_norm: 0.92351330994978, iteration: 307912
loss: 1.0049272775650024,grad_norm: 0.8665430900669453, iteration: 307913
loss: 0.9860490560531616,grad_norm: 0.9409203819932875, iteration: 307914
loss: 1.0634740591049194,grad_norm: 0.8542168412717537, iteration: 307915
loss: 0.9824169874191284,grad_norm: 0.9485289631003817, iteration: 307916
loss: 0.9897580742835999,grad_norm: 0.9861078785126497, iteration: 307917
loss: 1.0134221315383911,grad_norm: 0.9450130724690895, iteration: 307918
loss: 0.9874481558799744,grad_norm: 0.8305062407712334, iteration: 307919
loss: 1.1186672449111938,grad_norm: 0.8317351304456052, iteration: 307920
loss: 1.071283221244812,grad_norm: 0.9525252830025133, iteration: 307921
loss: 0.9252483248710632,grad_norm: 0.7838963048909654, iteration: 307922
loss: 1.0270203351974487,grad_norm: 0.9999990740259841, iteration: 307923
loss: 1.001112937927246,grad_norm: 0.8623337655873461, iteration: 307924
loss: 0.9505239725112915,grad_norm: 0.8383076604303992, iteration: 307925
loss: 1.0206997394561768,grad_norm: 0.9229205185033406, iteration: 307926
loss: 1.0381598472595215,grad_norm: 0.9999997790558331, iteration: 307927
loss: 1.082557201385498,grad_norm: 0.9999998425007828, iteration: 307928
loss: 0.9588203430175781,grad_norm: 0.904020848079624, iteration: 307929
loss: 1.0066150426864624,grad_norm: 0.7703714574021839, iteration: 307930
loss: 1.0109293460845947,grad_norm: 0.7648656581006361, iteration: 307931
loss: 1.0003811120986938,grad_norm: 0.9389603944825451, iteration: 307932
loss: 1.0182298421859741,grad_norm: 0.8359029815870919, iteration: 307933
loss: 1.037259817123413,grad_norm: 0.999999068718826, iteration: 307934
loss: 0.9818528294563293,grad_norm: 0.6764143431108893, iteration: 307935
loss: 1.0043457746505737,grad_norm: 0.9050941982474967, iteration: 307936
loss: 1.036839485168457,grad_norm: 0.8648807309373304, iteration: 307937
loss: 0.9744722247123718,grad_norm: 0.7865594742359268, iteration: 307938
loss: 1.067023754119873,grad_norm: 0.83090955367677, iteration: 307939
loss: 0.9962565302848816,grad_norm: 0.8576518711529879, iteration: 307940
loss: 1.0534976720809937,grad_norm: 0.9999993641606164, iteration: 307941
loss: 0.986435055732727,grad_norm: 0.9999990711647608, iteration: 307942
loss: 0.9850566387176514,grad_norm: 0.9999989974021122, iteration: 307943
loss: 1.0094456672668457,grad_norm: 0.984789177533301, iteration: 307944
loss: 1.017090916633606,grad_norm: 0.9999990186819452, iteration: 307945
loss: 1.0208216905593872,grad_norm: 0.8167896965349642, iteration: 307946
loss: 0.9988905787467957,grad_norm: 0.9999993023123567, iteration: 307947
loss: 0.9589258432388306,grad_norm: 0.7886182378043853, iteration: 307948
loss: 1.0261693000793457,grad_norm: 0.9993045023991093, iteration: 307949
loss: 0.9966620802879333,grad_norm: 0.7178974923908095, iteration: 307950
loss: 0.9771285057067871,grad_norm: 0.977649314277978, iteration: 307951
loss: 1.0305956602096558,grad_norm: 0.9999991624061756, iteration: 307952
loss: 1.016075849533081,grad_norm: 0.8097338766883488, iteration: 307953
loss: 0.9787561297416687,grad_norm: 0.9215373771502887, iteration: 307954
loss: 0.9904725551605225,grad_norm: 0.8991231729533353, iteration: 307955
loss: 1.064095377922058,grad_norm: 0.9999995011686873, iteration: 307956
loss: 1.0181158781051636,grad_norm: 0.9001577758717425, iteration: 307957
loss: 1.0446441173553467,grad_norm: 0.8393348273745741, iteration: 307958
loss: 0.9862245917320251,grad_norm: 0.6802479841768585, iteration: 307959
loss: 0.9821562767028809,grad_norm: 0.8821040171151184, iteration: 307960
loss: 1.0267937183380127,grad_norm: 0.9999999455127591, iteration: 307961
loss: 0.9990682601928711,grad_norm: 0.8241934760952182, iteration: 307962
loss: 1.0028669834136963,grad_norm: 0.8278268642069487, iteration: 307963
loss: 0.986366868019104,grad_norm: 0.7653554363961681, iteration: 307964
loss: 1.016383409500122,grad_norm: 0.7777166971083164, iteration: 307965
loss: 1.0014441013336182,grad_norm: 0.8097324723262291, iteration: 307966
loss: 1.01267671585083,grad_norm: 0.9811713110738514, iteration: 307967
loss: 0.9945388436317444,grad_norm: 0.850555430735223, iteration: 307968
loss: 1.044843316078186,grad_norm: 1.0000000518279943, iteration: 307969
loss: 1.0042304992675781,grad_norm: 0.7645482155757793, iteration: 307970
loss: 0.9321040511131287,grad_norm: 0.663862463329766, iteration: 307971
loss: 0.9895055890083313,grad_norm: 0.882345356122458, iteration: 307972
loss: 0.9754167199134827,grad_norm: 0.8065621849822483, iteration: 307973
loss: 1.001374363899231,grad_norm: 0.9056892732930077, iteration: 307974
loss: 1.066055417060852,grad_norm: 0.9441360098891208, iteration: 307975
loss: 1.0164762735366821,grad_norm: 0.9660887236719455, iteration: 307976
loss: 1.0072308778762817,grad_norm: 0.7453454438021136, iteration: 307977
loss: 1.1027042865753174,grad_norm: 0.8801959500070207, iteration: 307978
loss: 1.1297515630722046,grad_norm: 0.9999996344901895, iteration: 307979
loss: 0.9698123931884766,grad_norm: 0.8054110973370191, iteration: 307980
loss: 0.9790351986885071,grad_norm: 0.7594489704416254, iteration: 307981
loss: 0.9944012761116028,grad_norm: 0.8909130268736469, iteration: 307982
loss: 0.9924079775810242,grad_norm: 0.7845828104366598, iteration: 307983
loss: 1.0205466747283936,grad_norm: 0.7436929380524411, iteration: 307984
loss: 1.0448027849197388,grad_norm: 0.9192684360430124, iteration: 307985
loss: 0.9675002694129944,grad_norm: 0.999999046424214, iteration: 307986
loss: 0.9882981181144714,grad_norm: 0.8991280819555372, iteration: 307987
loss: 0.9718175530433655,grad_norm: 0.8229769697482922, iteration: 307988
loss: 0.9984130859375,grad_norm: 0.8916088499542841, iteration: 307989
loss: 0.9852287769317627,grad_norm: 0.7839183237375955, iteration: 307990
loss: 0.9752611517906189,grad_norm: 0.999999183031826, iteration: 307991
loss: 0.9977738261222839,grad_norm: 0.7931807585225281, iteration: 307992
loss: 0.9980769157409668,grad_norm: 0.8079698018998805, iteration: 307993
loss: 1.0060635805130005,grad_norm: 0.853559516861704, iteration: 307994
loss: 1.0312304496765137,grad_norm: 0.9999994825558476, iteration: 307995
loss: 0.9850896000862122,grad_norm: 0.7889399592510351, iteration: 307996
loss: 1.0347646474838257,grad_norm: 0.8822503635112311, iteration: 307997
loss: 1.0108028650283813,grad_norm: 0.8931151725297811, iteration: 307998
loss: 0.9769799113273621,grad_norm: 0.7358849631284246, iteration: 307999
loss: 0.9877251386642456,grad_norm: 0.9999990870708677, iteration: 308000
loss: 0.9222647547721863,grad_norm: 0.8979400768041869, iteration: 308001
loss: 0.9941626787185669,grad_norm: 0.9999991386861753, iteration: 308002
loss: 0.987433671951294,grad_norm: 0.834493079860513, iteration: 308003
loss: 1.0270826816558838,grad_norm: 0.849254179216899, iteration: 308004
loss: 1.0483534336090088,grad_norm: 0.9375791992824255, iteration: 308005
loss: 0.9645916819572449,grad_norm: 0.9181226582979697, iteration: 308006
loss: 1.0570098161697388,grad_norm: 0.7125394011022725, iteration: 308007
loss: 1.0102840662002563,grad_norm: 0.8659090766251999, iteration: 308008
loss: 1.057612419128418,grad_norm: 0.9999993012919464, iteration: 308009
loss: 1.016538143157959,grad_norm: 0.6178359023355112, iteration: 308010
loss: 1.0118128061294556,grad_norm: 0.7920190345445031, iteration: 308011
loss: 0.9861482977867126,grad_norm: 0.8165672151568527, iteration: 308012
loss: 1.003706693649292,grad_norm: 0.8464046652847262, iteration: 308013
loss: 0.9716511368751526,grad_norm: 0.8562971606569765, iteration: 308014
loss: 0.9910753965377808,grad_norm: 0.9999989170367579, iteration: 308015
loss: 1.0082732439041138,grad_norm: 0.9058381308286578, iteration: 308016
loss: 1.0210199356079102,grad_norm: 0.6838286102523606, iteration: 308017
loss: 1.0030810832977295,grad_norm: 0.9999989408928832, iteration: 308018
loss: 0.9671611785888672,grad_norm: 0.9999991468018155, iteration: 308019
loss: 1.058638334274292,grad_norm: 0.9999999646862631, iteration: 308020
loss: 0.9919780492782593,grad_norm: 0.7356821363487885, iteration: 308021
loss: 0.9926367402076721,grad_norm: 0.7775449521787382, iteration: 308022
loss: 0.9778898358345032,grad_norm: 0.7973426616615398, iteration: 308023
loss: 1.01425302028656,grad_norm: 0.9999991623491692, iteration: 308024
loss: 0.9684876799583435,grad_norm: 0.9999995994085196, iteration: 308025
loss: 0.9827315807342529,grad_norm: 0.905141555862416, iteration: 308026
loss: 0.9695994257926941,grad_norm: 0.9999991977095407, iteration: 308027
loss: 1.0017926692962646,grad_norm: 0.9999996665163636, iteration: 308028
loss: 1.0519628524780273,grad_norm: 0.9238087695080387, iteration: 308029
loss: 1.02023184299469,grad_norm: 0.9474435156257757, iteration: 308030
loss: 0.9763917326927185,grad_norm: 0.8877014583261844, iteration: 308031
loss: 1.0071617364883423,grad_norm: 0.8592509737677411, iteration: 308032
loss: 1.0314065217971802,grad_norm: 0.8394446949583775, iteration: 308033
loss: 0.9997432231903076,grad_norm: 0.8547593303578154, iteration: 308034
loss: 0.9864991307258606,grad_norm: 0.9000102482596384, iteration: 308035
loss: 0.9793208241462708,grad_norm: 0.9999993099158215, iteration: 308036
loss: 1.0456222295761108,grad_norm: 0.9999991970909587, iteration: 308037
loss: 1.0091955661773682,grad_norm: 0.9506560212458764, iteration: 308038
loss: 0.9817972779273987,grad_norm: 0.6815954035860243, iteration: 308039
loss: 1.0595757961273193,grad_norm: 0.8831182354928163, iteration: 308040
loss: 1.1160471439361572,grad_norm: 0.9224636464151886, iteration: 308041
loss: 1.033792495727539,grad_norm: 0.8808150503041284, iteration: 308042
loss: 1.0098940134048462,grad_norm: 0.8092342695575228, iteration: 308043
loss: 1.0249499082565308,grad_norm: 0.9010448838106252, iteration: 308044
loss: 1.0210468769073486,grad_norm: 0.9185347500603239, iteration: 308045
loss: 0.9603235721588135,grad_norm: 0.8388033362005365, iteration: 308046
loss: 1.0725964307785034,grad_norm: 0.9999996459674457, iteration: 308047
loss: 0.9949264526367188,grad_norm: 0.7307029052032269, iteration: 308048
loss: 1.069520354270935,grad_norm: 0.9999993224840891, iteration: 308049
loss: 0.9967807531356812,grad_norm: 0.8958318701556743, iteration: 308050
loss: 1.0308700799942017,grad_norm: 0.7477848169333803, iteration: 308051
loss: 1.0859794616699219,grad_norm: 0.9999992981636909, iteration: 308052
loss: 1.00856614112854,grad_norm: 0.7308715950863527, iteration: 308053
loss: 1.0995914936065674,grad_norm: 0.999999342071549, iteration: 308054
loss: 1.003997564315796,grad_norm: 0.8935357058119215, iteration: 308055
loss: 1.0023819208145142,grad_norm: 0.9138765114430646, iteration: 308056
loss: 1.0067161321640015,grad_norm: 0.9999990781559968, iteration: 308057
loss: 0.9895680546760559,grad_norm: 0.8376504765203564, iteration: 308058
loss: 1.0195410251617432,grad_norm: 0.7683029072113996, iteration: 308059
loss: 0.9710119962692261,grad_norm: 0.8702768223900126, iteration: 308060
loss: 0.9859290719032288,grad_norm: 0.8626434796885403, iteration: 308061
loss: 1.0041457414627075,grad_norm: 0.9999993344842744, iteration: 308062
loss: 1.0050214529037476,grad_norm: 0.8726908500285177, iteration: 308063
loss: 0.9678069353103638,grad_norm: 0.8553420487796107, iteration: 308064
loss: 0.9922375679016113,grad_norm: 0.9999999565522973, iteration: 308065
loss: 0.9973738193511963,grad_norm: 0.8410098609133776, iteration: 308066
loss: 0.9766013622283936,grad_norm: 0.7585862032557636, iteration: 308067
loss: 0.9876425266265869,grad_norm: 0.9999996922468042, iteration: 308068
loss: 1.0056442022323608,grad_norm: 0.7365818612232035, iteration: 308069
loss: 0.9843229055404663,grad_norm: 0.8577425138912256, iteration: 308070
loss: 1.0942840576171875,grad_norm: 0.9999995988094275, iteration: 308071
loss: 0.9914847016334534,grad_norm: 0.9175941983590751, iteration: 308072
loss: 1.0057979822158813,grad_norm: 0.9999990428404538, iteration: 308073
loss: 0.9839602708816528,grad_norm: 0.7786024957341009, iteration: 308074
loss: 0.9799242615699768,grad_norm: 0.9999999784534531, iteration: 308075
loss: 1.0014913082122803,grad_norm: 0.6682792913877319, iteration: 308076
loss: 1.0275146961212158,grad_norm: 0.7739844235077825, iteration: 308077
loss: 0.960109293460846,grad_norm: 0.7511471078638708, iteration: 308078
loss: 1.000896692276001,grad_norm: 0.797103487425253, iteration: 308079
loss: 0.9676388502120972,grad_norm: 0.7889172698896032, iteration: 308080
loss: 1.0408241748809814,grad_norm: 0.8531749261050549, iteration: 308081
loss: 1.0124729871749878,grad_norm: 0.8286505899048026, iteration: 308082
loss: 0.9695387482643127,grad_norm: 0.9172797115581146, iteration: 308083
loss: 1.0014837980270386,grad_norm: 0.8114752063411377, iteration: 308084
loss: 1.0310418605804443,grad_norm: 0.7427714315626308, iteration: 308085
loss: 1.0055198669433594,grad_norm: 0.999998967475876, iteration: 308086
loss: 1.028895616531372,grad_norm: 0.9752464578683804, iteration: 308087
loss: 1.0447789430618286,grad_norm: 0.7853445379044197, iteration: 308088
loss: 1.0178111791610718,grad_norm: 0.8148326650758222, iteration: 308089
loss: 1.0165989398956299,grad_norm: 0.8047380660940483, iteration: 308090
loss: 0.9977816343307495,grad_norm: 0.8945551301238011, iteration: 308091
loss: 1.0712820291519165,grad_norm: 0.7684483440012271, iteration: 308092
loss: 1.0116649866104126,grad_norm: 0.9315788180740501, iteration: 308093
loss: 1.0540978908538818,grad_norm: 0.9627083487935093, iteration: 308094
loss: 1.019607663154602,grad_norm: 0.9370324439568277, iteration: 308095
loss: 1.0013290643692017,grad_norm: 0.806559249169407, iteration: 308096
loss: 1.022509217262268,grad_norm: 0.7162472588984254, iteration: 308097
loss: 0.9875419735908508,grad_norm: 0.9999993953748435, iteration: 308098
loss: 1.0002576112747192,grad_norm: 0.890782071655164, iteration: 308099
loss: 0.9688238501548767,grad_norm: 0.9466100305295799, iteration: 308100
loss: 1.0886800289154053,grad_norm: 0.8674122213514948, iteration: 308101
loss: 0.9876247644424438,grad_norm: 0.9616541522730641, iteration: 308102
loss: 0.9886504411697388,grad_norm: 0.7840847362327097, iteration: 308103
loss: 1.0233734846115112,grad_norm: 0.9999991756728934, iteration: 308104
loss: 0.9886001944541931,grad_norm: 0.923788187692512, iteration: 308105
loss: 0.9971513152122498,grad_norm: 0.9114878115192859, iteration: 308106
loss: 0.9887269735336304,grad_norm: 0.8482741234777291, iteration: 308107
loss: 1.02069890499115,grad_norm: 0.9999997280279476, iteration: 308108
loss: 1.0105739831924438,grad_norm: 0.7832775858758504, iteration: 308109
loss: 1.0181511640548706,grad_norm: 0.7112325613269427, iteration: 308110
loss: 1.040163516998291,grad_norm: 0.8971459598351516, iteration: 308111
loss: 1.0135102272033691,grad_norm: 0.8822836843240935, iteration: 308112
loss: 1.0384514331817627,grad_norm: 0.8926071918064655, iteration: 308113
loss: 0.9843912124633789,grad_norm: 0.999999081798876, iteration: 308114
loss: 0.9891815185546875,grad_norm: 0.9999990582788744, iteration: 308115
loss: 0.9969022870063782,grad_norm: 0.9239257931294427, iteration: 308116
loss: 0.9909144639968872,grad_norm: 0.9150405524492322, iteration: 308117
loss: 0.9854960441589355,grad_norm: 0.8778640830047821, iteration: 308118
loss: 1.0544370412826538,grad_norm: 0.9162478612535692, iteration: 308119
loss: 0.9827882051467896,grad_norm: 0.7563975052902338, iteration: 308120
loss: 1.004719614982605,grad_norm: 0.9999990022936661, iteration: 308121
loss: 1.0217305421829224,grad_norm: 0.7331045064905816, iteration: 308122
loss: 1.0392132997512817,grad_norm: 0.9427617850729866, iteration: 308123
loss: 1.0140576362609863,grad_norm: 0.8962344387553469, iteration: 308124
loss: 1.029060959815979,grad_norm: 1.000000009423227, iteration: 308125
loss: 0.9915571808815002,grad_norm: 0.907345882412131, iteration: 308126
loss: 1.0093145370483398,grad_norm: 0.7607028612172759, iteration: 308127
loss: 1.0210845470428467,grad_norm: 0.8939517350161135, iteration: 308128
loss: 1.043984055519104,grad_norm: 0.9481781956419301, iteration: 308129
loss: 0.9753409624099731,grad_norm: 0.9999992126210514, iteration: 308130
loss: 0.9771732091903687,grad_norm: 0.8929396211869327, iteration: 308131
loss: 1.0207600593566895,grad_norm: 0.8839114313920663, iteration: 308132
loss: 0.9991016983985901,grad_norm: 0.775261711500262, iteration: 308133
loss: 1.1638182401657104,grad_norm: 0.9999994269416328, iteration: 308134
loss: 1.04977285861969,grad_norm: 0.9999997528884285, iteration: 308135
loss: 1.0160552263259888,grad_norm: 0.9999992606792912, iteration: 308136
loss: 1.0414395332336426,grad_norm: 0.7919199557678337, iteration: 308137
loss: 1.011042594909668,grad_norm: 0.9999991669203359, iteration: 308138
loss: 1.0264085531234741,grad_norm: 0.8546164405330309, iteration: 308139
loss: 0.990548312664032,grad_norm: 0.8858590118064426, iteration: 308140
loss: 0.9733923673629761,grad_norm: 0.9631534728069462, iteration: 308141
loss: 0.9497408270835876,grad_norm: 0.8225414540410166, iteration: 308142
loss: 1.0008800029754639,grad_norm: 0.9856447442775262, iteration: 308143
loss: 0.981680154800415,grad_norm: 0.9999997584248728, iteration: 308144
loss: 1.0404409170150757,grad_norm: 0.9756555846744143, iteration: 308145
loss: 0.9928966760635376,grad_norm: 0.9999998669470936, iteration: 308146
loss: 1.0122123956680298,grad_norm: 0.9999990913711246, iteration: 308147
loss: 0.9955195784568787,grad_norm: 0.909468522840765, iteration: 308148
loss: 1.0187640190124512,grad_norm: 0.8170313792784604, iteration: 308149
loss: 1.034995198249817,grad_norm: 0.9290284089424709, iteration: 308150
loss: 0.9696311354637146,grad_norm: 0.7539266300460138, iteration: 308151
loss: 1.0275698900222778,grad_norm: 0.8915251510532946, iteration: 308152
loss: 1.0090327262878418,grad_norm: 0.9999993840186683, iteration: 308153
loss: 0.9907822012901306,grad_norm: 0.8838083029708605, iteration: 308154
loss: 0.9857792854309082,grad_norm: 0.7480048610478347, iteration: 308155
loss: 1.0139278173446655,grad_norm: 0.7905902947826119, iteration: 308156
loss: 1.0105150938034058,grad_norm: 0.9187060871850576, iteration: 308157
loss: 0.9775687456130981,grad_norm: 0.8222223539398195, iteration: 308158
loss: 0.9926289916038513,grad_norm: 0.7322076914192177, iteration: 308159
loss: 1.0340968370437622,grad_norm: 0.9396972022844271, iteration: 308160
loss: 1.0164284706115723,grad_norm: 0.9999999283222025, iteration: 308161
loss: 1.0378857851028442,grad_norm: 0.7985947373403796, iteration: 308162
loss: 0.9977121949195862,grad_norm: 0.887969743577954, iteration: 308163
loss: 0.9935526847839355,grad_norm: 0.8834090859109412, iteration: 308164
loss: 1.0539168119430542,grad_norm: 0.9576282362428263, iteration: 308165
loss: 1.0143190622329712,grad_norm: 0.9999993862143572, iteration: 308166
loss: 1.011136531829834,grad_norm: 0.9999993243366179, iteration: 308167
loss: 0.9689712524414062,grad_norm: 0.9999991025327821, iteration: 308168
loss: 0.9935157895088196,grad_norm: 0.7769900064680609, iteration: 308169
loss: 1.2971111536026,grad_norm: 0.9999991436694555, iteration: 308170
loss: 1.0635145902633667,grad_norm: 0.9999994290616413, iteration: 308171
loss: 1.1130832433700562,grad_norm: 0.9999999051255698, iteration: 308172
loss: 0.9921574592590332,grad_norm: 0.999999151974078, iteration: 308173
loss: 1.1185685396194458,grad_norm: 0.9999993703511411, iteration: 308174
loss: 0.97720867395401,grad_norm: 0.8778176061051787, iteration: 308175
loss: 0.9984272122383118,grad_norm: 0.8860952143011454, iteration: 308176
loss: 1.0725072622299194,grad_norm: 0.857292180780536, iteration: 308177
loss: 1.0117292404174805,grad_norm: 0.7214028705326316, iteration: 308178
loss: 1.0138506889343262,grad_norm: 0.9999999099936323, iteration: 308179
loss: 0.9832188487052917,grad_norm: 0.7466535373390729, iteration: 308180
loss: 1.0150868892669678,grad_norm: 0.7260360144535629, iteration: 308181
loss: 1.0360338687896729,grad_norm: 0.9999991274375002, iteration: 308182
loss: 1.026816964149475,grad_norm: 0.9999995237838836, iteration: 308183
loss: 0.9783088564872742,grad_norm: 0.780721309855505, iteration: 308184
loss: 1.0519295930862427,grad_norm: 0.8703267985376277, iteration: 308185
loss: 1.2347368001937866,grad_norm: 0.9999991088426308, iteration: 308186
loss: 1.0223032236099243,grad_norm: 0.9727365453250728, iteration: 308187
loss: 1.0247939825057983,grad_norm: 0.99999992431721, iteration: 308188
loss: 0.9858543276786804,grad_norm: 0.9999998379308341, iteration: 308189
loss: 1.011838436126709,grad_norm: 0.9999995806164254, iteration: 308190
loss: 1.0096770524978638,grad_norm: 0.9999999762288435, iteration: 308191
loss: 1.008337378501892,grad_norm: 1.0000000781762222, iteration: 308192
loss: 1.015031337738037,grad_norm: 0.7478962284914895, iteration: 308193
loss: 1.0203639268875122,grad_norm: 0.9999991522436246, iteration: 308194
loss: 1.10868239402771,grad_norm: 0.9999995322504908, iteration: 308195
loss: 1.0415711402893066,grad_norm: 0.8515216491451341, iteration: 308196
loss: 1.0542861223220825,grad_norm: 0.9985193457666648, iteration: 308197
loss: 1.0461511611938477,grad_norm: 0.9771118509286384, iteration: 308198
loss: 0.9819573163986206,grad_norm: 0.8158277062801718, iteration: 308199
loss: 1.021767020225525,grad_norm: 0.9999994461704348, iteration: 308200
loss: 1.024483323097229,grad_norm: 0.9999993775390037, iteration: 308201
loss: 1.0063445568084717,grad_norm: 0.9325261298636567, iteration: 308202
loss: 1.02177095413208,grad_norm: 0.7820300088668963, iteration: 308203
loss: 1.0776180028915405,grad_norm: 1.0000000034030054, iteration: 308204
loss: 1.0245730876922607,grad_norm: 0.9999992377527231, iteration: 308205
loss: 1.092993974685669,grad_norm: 0.9999992677315129, iteration: 308206
loss: 1.0327471494674683,grad_norm: 0.9159902811810904, iteration: 308207
loss: 1.064494252204895,grad_norm: 0.9999996563621479, iteration: 308208
loss: 1.0167956352233887,grad_norm: 0.8081383198800186, iteration: 308209
loss: 1.026258945465088,grad_norm: 0.878873184358613, iteration: 308210
loss: 1.030529499053955,grad_norm: 0.8855799884597919, iteration: 308211
loss: 0.9985457062721252,grad_norm: 0.8664589280371563, iteration: 308212
loss: 0.9629092812538147,grad_norm: 0.9255882248353188, iteration: 308213
loss: 1.0576590299606323,grad_norm: 0.8537878467263827, iteration: 308214
loss: 0.9907687306404114,grad_norm: 0.9718217644871504, iteration: 308215
loss: 1.0322707891464233,grad_norm: 0.9282929593847853, iteration: 308216
loss: 1.056966781616211,grad_norm: 0.999999664263154, iteration: 308217
loss: 1.0257318019866943,grad_norm: 0.790216074343787, iteration: 308218
loss: 0.9840056300163269,grad_norm: 0.8898713657495076, iteration: 308219
loss: 1.0051279067993164,grad_norm: 0.9739841773520441, iteration: 308220
loss: 1.0124374628067017,grad_norm: 0.9347811434955025, iteration: 308221
loss: 1.0841622352600098,grad_norm: 0.8416692569481553, iteration: 308222
loss: 1.0549250841140747,grad_norm: 0.9642197287990205, iteration: 308223
loss: 1.0147230625152588,grad_norm: 0.8231568162495907, iteration: 308224
loss: 1.0119742155075073,grad_norm: 0.9218902861951274, iteration: 308225
loss: 1.0347142219543457,grad_norm: 0.7753568878354552, iteration: 308226
loss: 0.9827823638916016,grad_norm: 0.8333769910422207, iteration: 308227
loss: 1.025563359260559,grad_norm: 0.9999995996060904, iteration: 308228
loss: 0.9963804483413696,grad_norm: 0.9999991711364024, iteration: 308229
loss: 1.016657829284668,grad_norm: 0.899358160942956, iteration: 308230
loss: 0.9887637495994568,grad_norm: 0.9999997735964383, iteration: 308231
loss: 1.023273229598999,grad_norm: 0.9999990401558174, iteration: 308232
loss: 1.003515601158142,grad_norm: 0.8187607956575591, iteration: 308233
loss: 1.008073091506958,grad_norm: 0.8221509643861814, iteration: 308234
loss: 1.0052454471588135,grad_norm: 0.7000349191478082, iteration: 308235
loss: 1.0112935304641724,grad_norm: 0.8990194001609237, iteration: 308236
loss: 1.1214468479156494,grad_norm: 0.9999998956806546, iteration: 308237
loss: 1.0547120571136475,grad_norm: 0.9999989990127763, iteration: 308238
loss: 1.131775975227356,grad_norm: 0.9999998441540449, iteration: 308239
loss: 0.9756616353988647,grad_norm: 0.9606446842350874, iteration: 308240
loss: 1.014957070350647,grad_norm: 0.8789171100668955, iteration: 308241
loss: 1.0791089534759521,grad_norm: 0.9999999940784242, iteration: 308242
loss: 1.0011757612228394,grad_norm: 0.9701808656790433, iteration: 308243
loss: 1.0134129524230957,grad_norm: 0.7263593452150658, iteration: 308244
loss: 1.0112543106079102,grad_norm: 0.7003930187505247, iteration: 308245
loss: 0.9872174263000488,grad_norm: 0.8470431356412592, iteration: 308246
loss: 0.9983291625976562,grad_norm: 0.7090609608279319, iteration: 308247
loss: 1.0160927772521973,grad_norm: 0.9363846509245395, iteration: 308248
loss: 0.9762648344039917,grad_norm: 0.7281382486212665, iteration: 308249
loss: 1.1313217878341675,grad_norm: 0.9999994973530434, iteration: 308250
loss: 0.9825587868690491,grad_norm: 0.9164459108551166, iteration: 308251
loss: 1.1964703798294067,grad_norm: 0.9999996321720803, iteration: 308252
loss: 1.0125555992126465,grad_norm: 0.7528333525963421, iteration: 308253
loss: 0.9876389503479004,grad_norm: 0.9999991702231457, iteration: 308254
loss: 0.9961947202682495,grad_norm: 0.8646795871270401, iteration: 308255
loss: 1.2337846755981445,grad_norm: 0.99999974831882, iteration: 308256
loss: 0.9664139151573181,grad_norm: 0.9414024106047676, iteration: 308257
loss: 0.9722853302955627,grad_norm: 0.6939529391942602, iteration: 308258
loss: 1.0607937574386597,grad_norm: 0.9999990314436751, iteration: 308259
loss: 0.9615212678909302,grad_norm: 0.8352538237329123, iteration: 308260
loss: 0.9980722069740295,grad_norm: 0.7726756096287136, iteration: 308261
loss: 1.0604636669158936,grad_norm: 0.9999992687543001, iteration: 308262
loss: 1.033400058746338,grad_norm: 0.7789647802446239, iteration: 308263
loss: 0.9802219867706299,grad_norm: 0.9662941929816069, iteration: 308264
loss: 0.9852196574211121,grad_norm: 0.8383775743978301, iteration: 308265
loss: 1.043721079826355,grad_norm: 0.9999996265280541, iteration: 308266
loss: 0.9411633014678955,grad_norm: 0.971238367187832, iteration: 308267
loss: 0.9915215373039246,grad_norm: 0.8783254509252282, iteration: 308268
loss: 0.9941293001174927,grad_norm: 0.8339826447248231, iteration: 308269
loss: 1.021108627319336,grad_norm: 0.9999994829790356, iteration: 308270
loss: 1.0347360372543335,grad_norm: 0.7169754263025538, iteration: 308271
loss: 1.0238256454467773,grad_norm: 0.9999996738021447, iteration: 308272
loss: 1.0017194747924805,grad_norm: 0.8421216406542243, iteration: 308273
loss: 1.0696898698806763,grad_norm: 0.9457135772390083, iteration: 308274
loss: 0.9702566862106323,grad_norm: 0.9999990385903235, iteration: 308275
loss: 0.9984447956085205,grad_norm: 0.9999992483835736, iteration: 308276
loss: 1.0932426452636719,grad_norm: 0.9999991864418035, iteration: 308277
loss: 1.0505046844482422,grad_norm: 0.8117439250625785, iteration: 308278
loss: 1.0430965423583984,grad_norm: 0.857300596942477, iteration: 308279
loss: 1.014890193939209,grad_norm: 0.9715270624691829, iteration: 308280
loss: 0.9983330965042114,grad_norm: 0.7876667455931041, iteration: 308281
loss: 1.0496965646743774,grad_norm: 0.950370176816961, iteration: 308282
loss: 1.0065901279449463,grad_norm: 0.7729581935543728, iteration: 308283
loss: 1.0126781463623047,grad_norm: 0.7831579564973801, iteration: 308284
loss: 1.023478388786316,grad_norm: 0.779183540824909, iteration: 308285
loss: 1.0209767818450928,grad_norm: 0.9310770729651434, iteration: 308286
loss: 1.3624187707901,grad_norm: 0.9999998697254864, iteration: 308287
loss: 1.0170718431472778,grad_norm: 0.763930235238966, iteration: 308288
loss: 0.9767532348632812,grad_norm: 0.8490631801720873, iteration: 308289
loss: 1.0056296586990356,grad_norm: 0.7438701222308278, iteration: 308290
loss: 1.0650887489318848,grad_norm: 0.9930064284908086, iteration: 308291
loss: 1.004150152206421,grad_norm: 0.7463111620455983, iteration: 308292
loss: 0.988268256187439,grad_norm: 1.0000000707839287, iteration: 308293
loss: 0.9960757493972778,grad_norm: 0.9999989752796299, iteration: 308294
loss: 0.9699766635894775,grad_norm: 0.7304377816330522, iteration: 308295
loss: 0.999245822429657,grad_norm: 0.891777805900827, iteration: 308296
loss: 1.3115326166152954,grad_norm: 0.9999996821720294, iteration: 308297
loss: 1.0214319229125977,grad_norm: 0.929269129533367, iteration: 308298
loss: 1.0030263662338257,grad_norm: 0.7235687909595095, iteration: 308299
loss: 1.013632893562317,grad_norm: 0.999999311871326, iteration: 308300
loss: 0.9971477389335632,grad_norm: 0.8723571975998554, iteration: 308301
loss: 0.9748408198356628,grad_norm: 0.8872966518399352, iteration: 308302
loss: 1.012775182723999,grad_norm: 0.8703552911099062, iteration: 308303
loss: 1.02254319190979,grad_norm: 0.9999994014192085, iteration: 308304
loss: 1.0610276460647583,grad_norm: 0.999999870694116, iteration: 308305
loss: 1.0316112041473389,grad_norm: 1.0000000002284168, iteration: 308306
loss: 1.0071204900741577,grad_norm: 0.9999990622649333, iteration: 308307
loss: 1.0017505884170532,grad_norm: 0.6378411459792346, iteration: 308308
loss: 1.0166746377944946,grad_norm: 0.7790973002276657, iteration: 308309
loss: 1.0018023252487183,grad_norm: 0.9204791375545603, iteration: 308310
loss: 0.990327775478363,grad_norm: 0.9999990686266008, iteration: 308311
loss: 0.9972162246704102,grad_norm: 0.7428531857626502, iteration: 308312
loss: 1.0332934856414795,grad_norm: 0.8781056962196802, iteration: 308313
loss: 1.0316299200057983,grad_norm: 0.9999992582504881, iteration: 308314
loss: 1.0151950120925903,grad_norm: 0.8288824327837091, iteration: 308315
loss: 1.1113051176071167,grad_norm: 0.9999994264777823, iteration: 308316
loss: 0.9975807666778564,grad_norm: 0.8628608019512288, iteration: 308317
loss: 1.0038319826126099,grad_norm: 0.9999991763654345, iteration: 308318
loss: 1.0362340211868286,grad_norm: 0.999999096221497, iteration: 308319
loss: 1.0385112762451172,grad_norm: 0.8402777647424223, iteration: 308320
loss: 1.0206820964813232,grad_norm: 0.8262499164876482, iteration: 308321
loss: 0.9982833862304688,grad_norm: 0.7660540548494118, iteration: 308322
loss: 1.132003664970398,grad_norm: 0.9999997915079109, iteration: 308323
loss: 1.001428484916687,grad_norm: 0.8515249731517357, iteration: 308324
loss: 0.9821416735649109,grad_norm: 0.730602182070977, iteration: 308325
loss: 1.0280547142028809,grad_norm: 0.8034765770790223, iteration: 308326
loss: 1.0107049942016602,grad_norm: 0.8491072022909882, iteration: 308327
loss: 1.0376554727554321,grad_norm: 0.9999993789031759, iteration: 308328
loss: 0.9585437774658203,grad_norm: 0.7921382501519062, iteration: 308329
loss: 1.1030429601669312,grad_norm: 0.821719134741058, iteration: 308330
loss: 0.9574809670448303,grad_norm: 0.9999989240151724, iteration: 308331
loss: 0.9942978024482727,grad_norm: 0.8398269855944862, iteration: 308332
loss: 1.0094904899597168,grad_norm: 0.8935180432769455, iteration: 308333
loss: 1.0033928155899048,grad_norm: 0.9096324513550894, iteration: 308334
loss: 0.9803571701049805,grad_norm: 0.6816745988063073, iteration: 308335
loss: 1.180490255355835,grad_norm: 0.9999999629227508, iteration: 308336
loss: 1.0199251174926758,grad_norm: 0.8778410433517545, iteration: 308337
loss: 0.9825958013534546,grad_norm: 0.8053455817612385, iteration: 308338
loss: 1.0324679613113403,grad_norm: 0.8843760566336236, iteration: 308339
loss: 1.0233855247497559,grad_norm: 0.9926770697598676, iteration: 308340
loss: 1.0004934072494507,grad_norm: 0.9153835474241093, iteration: 308341
loss: 1.0086395740509033,grad_norm: 0.9355139838953328, iteration: 308342
loss: 0.9901443123817444,grad_norm: 0.7634956093939205, iteration: 308343
loss: 1.0321381092071533,grad_norm: 0.9999998809238935, iteration: 308344
loss: 1.1221611499786377,grad_norm: 0.9017504891959827, iteration: 308345
loss: 0.9760259389877319,grad_norm: 0.8937406047201613, iteration: 308346
loss: 1.104259967803955,grad_norm: 0.9999992905936306, iteration: 308347
loss: 1.0134838819503784,grad_norm: 0.7894001471713789, iteration: 308348
loss: 1.0435631275177002,grad_norm: 0.8975661359280489, iteration: 308349
loss: 0.9492857456207275,grad_norm: 0.8655128345031879, iteration: 308350
loss: 1.002053141593933,grad_norm: 0.7583440343677698, iteration: 308351
loss: 0.9833836555480957,grad_norm: 0.85562753191756, iteration: 308352
loss: 0.9956239461898804,grad_norm: 0.8029987877756541, iteration: 308353
loss: 1.010328769683838,grad_norm: 0.9215379784010063, iteration: 308354
loss: 1.0217316150665283,grad_norm: 0.7891291217623173, iteration: 308355
loss: 1.011110544204712,grad_norm: 0.7790665083065077, iteration: 308356
loss: 0.9818974137306213,grad_norm: 0.8168762160513529, iteration: 308357
loss: 1.0881564617156982,grad_norm: 0.8225058073797713, iteration: 308358
loss: 1.0133589506149292,grad_norm: 0.8147261339869739, iteration: 308359
loss: 1.0270514488220215,grad_norm: 0.9487890410164037, iteration: 308360
loss: 0.9760539531707764,grad_norm: 0.8373606116965217, iteration: 308361
loss: 1.0070183277130127,grad_norm: 0.8263555770957964, iteration: 308362
loss: 1.0018929243087769,grad_norm: 0.7853393423826087, iteration: 308363
loss: 1.0398061275482178,grad_norm: 0.8822863670909914, iteration: 308364
loss: 0.9925028681755066,grad_norm: 0.8031400007801062, iteration: 308365
loss: 0.993355929851532,grad_norm: 0.725080898672977, iteration: 308366
loss: 1.0396054983139038,grad_norm: 0.922314731747448, iteration: 308367
loss: 1.0113368034362793,grad_norm: 0.886630471754957, iteration: 308368
loss: 1.070907473564148,grad_norm: 0.9024747868635575, iteration: 308369
loss: 1.0086860656738281,grad_norm: 0.7796502116482656, iteration: 308370
loss: 1.0257797241210938,grad_norm: 0.8484986204506468, iteration: 308371
loss: 1.0086249113082886,grad_norm: 0.8955242102334438, iteration: 308372
loss: 0.9728109836578369,grad_norm: 0.9583410366827133, iteration: 308373
loss: 0.9928038120269775,grad_norm: 0.7257955404722543, iteration: 308374
loss: 0.981514573097229,grad_norm: 0.7457522745818107, iteration: 308375
loss: 0.9599478244781494,grad_norm: 0.8264251073579421, iteration: 308376
loss: 1.0193947553634644,grad_norm: 0.8502256565972934, iteration: 308377
loss: 1.0010031461715698,grad_norm: 0.835903502552495, iteration: 308378
loss: 1.0279561281204224,grad_norm: 0.7894273883253238, iteration: 308379
loss: 1.017876386642456,grad_norm: 0.9877380594470194, iteration: 308380
loss: 1.0393623113632202,grad_norm: 0.9999998686273601, iteration: 308381
loss: 0.9330955147743225,grad_norm: 0.7754701164601588, iteration: 308382
loss: 0.9806663990020752,grad_norm: 0.8712386649346582, iteration: 308383
loss: 1.0052446126937866,grad_norm: 0.840026496412317, iteration: 308384
loss: 0.9991994500160217,grad_norm: 0.7546525462204097, iteration: 308385
loss: 0.9748275876045227,grad_norm: 0.999999722539602, iteration: 308386
loss: 0.9864417910575867,grad_norm: 0.9170975210564909, iteration: 308387
loss: 1.007714867591858,grad_norm: 0.9348677691006411, iteration: 308388
loss: 0.9698486328125,grad_norm: 0.7163772180334901, iteration: 308389
loss: 0.9828489422798157,grad_norm: 0.9999990997400356, iteration: 308390
loss: 1.0016010999679565,grad_norm: 0.7130691302791234, iteration: 308391
loss: 0.9668174386024475,grad_norm: 0.7665519217164928, iteration: 308392
loss: 0.9993308186531067,grad_norm: 0.7877776127461537, iteration: 308393
loss: 0.9984647631645203,grad_norm: 0.7976288707407398, iteration: 308394
loss: 1.1016066074371338,grad_norm: 0.9999998277709647, iteration: 308395
loss: 0.9773816466331482,grad_norm: 0.9100339441143577, iteration: 308396
loss: 0.9776061773300171,grad_norm: 0.9475531676324551, iteration: 308397
loss: 0.9559749960899353,grad_norm: 0.8257703331233097, iteration: 308398
loss: 1.0199151039123535,grad_norm: 0.999998978806023, iteration: 308399
loss: 1.0287938117980957,grad_norm: 0.8029011706491137, iteration: 308400
loss: 1.0130321979522705,grad_norm: 0.8550123711548167, iteration: 308401
loss: 1.0026463270187378,grad_norm: 0.7192493996650268, iteration: 308402
loss: 1.015553593635559,grad_norm: 0.9999991558889708, iteration: 308403
loss: 1.0071091651916504,grad_norm: 0.9999990242971318, iteration: 308404
loss: 1.0536437034606934,grad_norm: 0.8330981693710626, iteration: 308405
loss: 1.0092469453811646,grad_norm: 0.7983855344986769, iteration: 308406
loss: 0.9995152354240417,grad_norm: 0.7827942918350671, iteration: 308407
loss: 1.058667540550232,grad_norm: 0.8990876132874086, iteration: 308408
loss: 0.9776211380958557,grad_norm: 0.7956841396258189, iteration: 308409
loss: 0.9953650832176208,grad_norm: 0.810440100601103, iteration: 308410
loss: 1.0073834657669067,grad_norm: 0.9598069982745714, iteration: 308411
loss: 1.005052089691162,grad_norm: 0.9798207482375639, iteration: 308412
loss: 0.991341233253479,grad_norm: 0.8376896430399805, iteration: 308413
loss: 1.0271780490875244,grad_norm: 0.8129586314834694, iteration: 308414
loss: 0.9854485988616943,grad_norm: 0.8608909267762592, iteration: 308415
loss: 0.9851654767990112,grad_norm: 0.7154993695976901, iteration: 308416
loss: 1.006925344467163,grad_norm: 0.7385877717995414, iteration: 308417
loss: 0.9839548468589783,grad_norm: 0.8309633019742938, iteration: 308418
loss: 1.0332119464874268,grad_norm: 0.7720929289634028, iteration: 308419
loss: 1.0024112462997437,grad_norm: 0.7449134473100707, iteration: 308420
loss: 1.0286144018173218,grad_norm: 0.9999994854131573, iteration: 308421
loss: 0.9753274321556091,grad_norm: 0.8228276257442144, iteration: 308422
loss: 1.0172646045684814,grad_norm: 0.9697102324897388, iteration: 308423
loss: 0.98199462890625,grad_norm: 0.9999990411654829, iteration: 308424
loss: 1.0158755779266357,grad_norm: 0.7999391195222484, iteration: 308425
loss: 0.9834275841712952,grad_norm: 0.844374445013184, iteration: 308426
loss: 1.032728910446167,grad_norm: 0.9028853173638914, iteration: 308427
loss: 1.0398658514022827,grad_norm: 0.931176298056557, iteration: 308428
loss: 0.9963864684104919,grad_norm: 0.7866934830021122, iteration: 308429
loss: 1.0145610570907593,grad_norm: 0.9999991658803374, iteration: 308430
loss: 1.1937320232391357,grad_norm: 0.9999999633251598, iteration: 308431
loss: 1.0134079456329346,grad_norm: 0.8835675003520563, iteration: 308432
loss: 0.9911475777626038,grad_norm: 0.9925645022589202, iteration: 308433
loss: 0.9837329983711243,grad_norm: 0.8347591239417621, iteration: 308434
loss: 1.0248892307281494,grad_norm: 0.8002895446456971, iteration: 308435
loss: 0.999723494052887,grad_norm: 0.7672698409018437, iteration: 308436
loss: 0.9828481078147888,grad_norm: 0.8378816554555216, iteration: 308437
loss: 1.0098241567611694,grad_norm: 0.8681083777692329, iteration: 308438
loss: 1.0003923177719116,grad_norm: 0.9136802369836691, iteration: 308439
loss: 1.1846940517425537,grad_norm: 0.9999996815705142, iteration: 308440
loss: 1.0083203315734863,grad_norm: 0.8658274823352489, iteration: 308441
loss: 0.9899331331253052,grad_norm: 0.9431286280479669, iteration: 308442
loss: 0.9507424235343933,grad_norm: 0.7362173058217757, iteration: 308443
loss: 1.0385816097259521,grad_norm: 0.9293523873113088, iteration: 308444
loss: 1.0539356470108032,grad_norm: 0.7755886230543977, iteration: 308445
loss: 1.0079182386398315,grad_norm: 0.7148262889062498, iteration: 308446
loss: 0.9374547600746155,grad_norm: 0.7158206816494876, iteration: 308447
loss: 1.0095887184143066,grad_norm: 0.7667824244472254, iteration: 308448
loss: 1.00825834274292,grad_norm: 0.897140403006667, iteration: 308449
loss: 0.9927209615707397,grad_norm: 0.9540096226435506, iteration: 308450
loss: 1.0083457231521606,grad_norm: 0.8660178122952081, iteration: 308451
loss: 1.014788269996643,grad_norm: 0.913768065369253, iteration: 308452
loss: 1.043960690498352,grad_norm: 0.9999998224847214, iteration: 308453
loss: 1.0020402669906616,grad_norm: 0.8209918473771578, iteration: 308454
loss: 0.984856367111206,grad_norm: 0.9999990384172955, iteration: 308455
loss: 1.003670334815979,grad_norm: 0.8716893688454099, iteration: 308456
loss: 0.9647987484931946,grad_norm: 0.8108064173075303, iteration: 308457
loss: 1.0116745233535767,grad_norm: 0.9521706610741388, iteration: 308458
loss: 0.9983510375022888,grad_norm: 0.8095936524746146, iteration: 308459
loss: 0.9611796140670776,grad_norm: 0.7831969204685679, iteration: 308460
loss: 1.0182758569717407,grad_norm: 0.9999990970517505, iteration: 308461
loss: 1.0185096263885498,grad_norm: 0.999999027285532, iteration: 308462
loss: 0.9903830885887146,grad_norm: 0.7159703022625463, iteration: 308463
loss: 1.0090018510818481,grad_norm: 0.8000923544246649, iteration: 308464
loss: 0.9655286073684692,grad_norm: 0.646713518798492, iteration: 308465
loss: 0.9786200523376465,grad_norm: 0.7217215036433635, iteration: 308466
loss: 0.9958155751228333,grad_norm: 0.9322685662325095, iteration: 308467
loss: 1.0377362966537476,grad_norm: 0.9999996446923874, iteration: 308468
loss: 1.100560188293457,grad_norm: 0.76888016922947, iteration: 308469
loss: 0.9896228909492493,grad_norm: 0.8092869025532482, iteration: 308470
loss: 1.0288008451461792,grad_norm: 0.9123181658795707, iteration: 308471
loss: 0.991825520992279,grad_norm: 0.7519404243358209, iteration: 308472
loss: 1.0213794708251953,grad_norm: 0.9628669872329632, iteration: 308473
loss: 1.0226562023162842,grad_norm: 0.8158376906714941, iteration: 308474
loss: 1.0107494592666626,grad_norm: 0.7463904633640692, iteration: 308475
loss: 0.9976950287818909,grad_norm: 0.92892325923084, iteration: 308476
loss: 0.9761321544647217,grad_norm: 0.9999989954383889, iteration: 308477
loss: 1.008872151374817,grad_norm: 0.6465319687036726, iteration: 308478
loss: 1.0813990831375122,grad_norm: 0.9999993016601223, iteration: 308479
loss: 1.0330259799957275,grad_norm: 0.9999989862310452, iteration: 308480
loss: 0.9933140277862549,grad_norm: 0.8672270105744345, iteration: 308481
loss: 1.0104701519012451,grad_norm: 0.8409271704848291, iteration: 308482
loss: 0.9923368096351624,grad_norm: 0.8483283747733963, iteration: 308483
loss: 1.0283899307250977,grad_norm: 0.7989238785510531, iteration: 308484
loss: 1.0231945514678955,grad_norm: 0.7118292565513548, iteration: 308485
loss: 0.9986515641212463,grad_norm: 0.9820332138070209, iteration: 308486
loss: 0.9962579011917114,grad_norm: 0.7535919392631566, iteration: 308487
loss: 1.062913179397583,grad_norm: 0.8148299515196431, iteration: 308488
loss: 1.0248019695281982,grad_norm: 0.8667680675990417, iteration: 308489
loss: 1.0055427551269531,grad_norm: 0.9999995168519814, iteration: 308490
loss: 0.9764983654022217,grad_norm: 0.7753030954256256, iteration: 308491
loss: 0.9806070327758789,grad_norm: 0.9058777538639129, iteration: 308492
loss: 1.0708452463150024,grad_norm: 0.9999994591511828, iteration: 308493
loss: 1.0226167440414429,grad_norm: 0.7303258791329416, iteration: 308494
loss: 0.9888903498649597,grad_norm: 0.7824101275934564, iteration: 308495
loss: 1.0222188234329224,grad_norm: 0.7439832915753662, iteration: 308496
loss: 1.0093532800674438,grad_norm: 0.9909995050226347, iteration: 308497
loss: 0.9855227470397949,grad_norm: 0.8956461607615137, iteration: 308498
loss: 0.9745215177536011,grad_norm: 0.8593104119359561, iteration: 308499
loss: 1.0289925336837769,grad_norm: 0.9999996320085133, iteration: 308500
loss: 0.9864267706871033,grad_norm: 0.8387231676614755, iteration: 308501
loss: 1.003462314605713,grad_norm: 0.9999993863317593, iteration: 308502
loss: 1.0492942333221436,grad_norm: 0.9732634400487619, iteration: 308503
loss: 1.0611913204193115,grad_norm: 0.9194312460241221, iteration: 308504
loss: 1.0259950160980225,grad_norm: 0.9289786906163773, iteration: 308505
loss: 1.0039403438568115,grad_norm: 0.9999997957351159, iteration: 308506
loss: 0.9970000982284546,grad_norm: 0.8786050277489377, iteration: 308507
loss: 1.0043563842773438,grad_norm: 0.9231818118198128, iteration: 308508
loss: 1.0167510509490967,grad_norm: 0.7557193622538426, iteration: 308509
loss: 1.037885069847107,grad_norm: 0.9438446008628502, iteration: 308510
loss: 1.0253734588623047,grad_norm: 0.9999994007668913, iteration: 308511
loss: 0.9948592185974121,grad_norm: 0.952455096550376, iteration: 308512
loss: 1.0296268463134766,grad_norm: 0.8267522437359058, iteration: 308513
loss: 0.9963806867599487,grad_norm: 0.8675188727046655, iteration: 308514
loss: 0.9994440078735352,grad_norm: 0.7634509091124488, iteration: 308515
loss: 1.0135647058486938,grad_norm: 0.7938813087297564, iteration: 308516
loss: 0.9776619672775269,grad_norm: 0.7780742687445918, iteration: 308517
loss: 1.0048173666000366,grad_norm: 0.7325140743253994, iteration: 308518
loss: 1.0012779235839844,grad_norm: 0.879061225827834, iteration: 308519
loss: 0.9688989520072937,grad_norm: 0.9999991531767344, iteration: 308520
loss: 1.0166857242584229,grad_norm: 0.7857657589950864, iteration: 308521
loss: 0.9892146587371826,grad_norm: 0.8248180281373024, iteration: 308522
loss: 1.0315015316009521,grad_norm: 0.9999990933411438, iteration: 308523
loss: 1.017720103263855,grad_norm: 0.9201783719064275, iteration: 308524
loss: 1.209005355834961,grad_norm: 0.9999995092583589, iteration: 308525
loss: 0.9950132369995117,grad_norm: 0.8092358622306307, iteration: 308526
loss: 1.0331004858016968,grad_norm: 0.8155899846221816, iteration: 308527
loss: 1.013047456741333,grad_norm: 0.8193564810690319, iteration: 308528
loss: 0.9960905313491821,grad_norm: 0.7713803307640925, iteration: 308529
loss: 1.0050575733184814,grad_norm: 0.8332716988469832, iteration: 308530
loss: 0.9870839715003967,grad_norm: 0.791487446832327, iteration: 308531
loss: 0.9879485368728638,grad_norm: 0.8311952682411414, iteration: 308532
loss: 0.9987803101539612,grad_norm: 0.896671528890065, iteration: 308533
loss: 1.0124255418777466,grad_norm: 0.7963507423640007, iteration: 308534
loss: 1.0254446268081665,grad_norm: 0.9999990448849418, iteration: 308535
loss: 0.9838997721672058,grad_norm: 0.8375735268429135, iteration: 308536
loss: 1.0215204954147339,grad_norm: 0.8201835247672379, iteration: 308537
loss: 0.9883020520210266,grad_norm: 0.999999120867184, iteration: 308538
loss: 0.9862717390060425,grad_norm: 0.8321464385140607, iteration: 308539
loss: 0.9848638772964478,grad_norm: 0.9067513367291491, iteration: 308540
loss: 1.0164986848831177,grad_norm: 0.8448840862405211, iteration: 308541
loss: 1.000999927520752,grad_norm: 0.7774363301364338, iteration: 308542
loss: 0.990021288394928,grad_norm: 0.9070521937346544, iteration: 308543
loss: 1.036269187927246,grad_norm: 0.9999991992338927, iteration: 308544
loss: 0.9488625526428223,grad_norm: 0.7420862242810488, iteration: 308545
loss: 0.992642879486084,grad_norm: 0.820950605068774, iteration: 308546
loss: 1.0100252628326416,grad_norm: 0.8746873532717703, iteration: 308547
loss: 1.0137085914611816,grad_norm: 0.9999991735121356, iteration: 308548
loss: 1.0116264820098877,grad_norm: 0.8525560590134758, iteration: 308549
loss: 0.9796072840690613,grad_norm: 0.9559224527552167, iteration: 308550
loss: 1.0090914964675903,grad_norm: 0.8542879914667929, iteration: 308551
loss: 1.01431405544281,grad_norm: 0.778417482445408, iteration: 308552
loss: 1.117318868637085,grad_norm: 0.9999997899741611, iteration: 308553
loss: 1.028643250465393,grad_norm: 0.8337864994454677, iteration: 308554
loss: 1.0088403224945068,grad_norm: 0.9810449287455727, iteration: 308555
loss: 1.0167409181594849,grad_norm: 0.6969460965601215, iteration: 308556
loss: 1.0251985788345337,grad_norm: 0.8517695387983844, iteration: 308557
loss: 1.003464698791504,grad_norm: 0.999999176322788, iteration: 308558
loss: 1.0016542673110962,grad_norm: 0.9693391953723247, iteration: 308559
loss: 0.9675809741020203,grad_norm: 0.8107734492975182, iteration: 308560
loss: 1.0195590257644653,grad_norm: 0.9463860933907812, iteration: 308561
loss: 0.9799795746803284,grad_norm: 0.937399874672052, iteration: 308562
loss: 0.9771796464920044,grad_norm: 0.7939525288439429, iteration: 308563
loss: 0.9956455826759338,grad_norm: 0.8557511373787843, iteration: 308564
loss: 0.9588629007339478,grad_norm: 0.999999120538122, iteration: 308565
loss: 0.9892823100090027,grad_norm: 0.9999994678754593, iteration: 308566
loss: 0.9967929720878601,grad_norm: 0.9560642229966322, iteration: 308567
loss: 0.9539151787757874,grad_norm: 0.993704707851081, iteration: 308568
loss: 0.979078471660614,grad_norm: 0.8595618940321341, iteration: 308569
loss: 1.015494704246521,grad_norm: 0.929162177353452, iteration: 308570
loss: 1.013426423072815,grad_norm: 0.792478691561417, iteration: 308571
loss: 0.9561435580253601,grad_norm: 0.9365723120036592, iteration: 308572
loss: 1.0069013833999634,grad_norm: 0.8222002663457061, iteration: 308573
loss: 1.023607850074768,grad_norm: 0.8135474021568996, iteration: 308574
loss: 1.02012038230896,grad_norm: 0.9957196000526685, iteration: 308575
loss: 0.9642321467399597,grad_norm: 0.8855223547288523, iteration: 308576
loss: 0.9889613389968872,grad_norm: 0.7669873149722416, iteration: 308577
loss: 1.0166891813278198,grad_norm: 0.7426431155064398, iteration: 308578
loss: 0.9813445210456848,grad_norm: 0.9076165201993601, iteration: 308579
loss: 0.9917716383934021,grad_norm: 0.8989470615091143, iteration: 308580
loss: 0.9781702756881714,grad_norm: 0.8379148647243325, iteration: 308581
loss: 0.9827642440795898,grad_norm: 0.9569157850864046, iteration: 308582
loss: 1.0122771263122559,grad_norm: 0.9293388521713354, iteration: 308583
loss: 0.9888356328010559,grad_norm: 0.782020262441506, iteration: 308584
loss: 1.0290613174438477,grad_norm: 0.999999040630373, iteration: 308585
loss: 0.9791660904884338,grad_norm: 0.822919997227951, iteration: 308586
loss: 1.0107113122940063,grad_norm: 0.999999696152167, iteration: 308587
loss: 1.0314445495605469,grad_norm: 0.7718968015545754, iteration: 308588
loss: 0.9937953352928162,grad_norm: 0.8803144314437501, iteration: 308589
loss: 0.9959431290626526,grad_norm: 0.8652545614690701, iteration: 308590
loss: 1.0701298713684082,grad_norm: 0.9085121451086181, iteration: 308591
loss: 0.9585096836090088,grad_norm: 0.9163480831453843, iteration: 308592
loss: 1.061758279800415,grad_norm: 0.9999996249229497, iteration: 308593
loss: 0.9810709953308105,grad_norm: 0.8465211255368211, iteration: 308594
loss: 0.9800872802734375,grad_norm: 0.8197223105471131, iteration: 308595
loss: 0.9943177700042725,grad_norm: 0.7824197836594053, iteration: 308596
loss: 1.0354679822921753,grad_norm: 0.7322988426219909, iteration: 308597
loss: 1.0010160207748413,grad_norm: 0.7362528474470169, iteration: 308598
loss: 1.012656807899475,grad_norm: 0.9999991034273326, iteration: 308599
loss: 0.956774115562439,grad_norm: 0.8635531019010939, iteration: 308600
loss: 0.9960454106330872,grad_norm: 0.9670087538318769, iteration: 308601
loss: 1.0609755516052246,grad_norm: 0.8588726927503837, iteration: 308602
loss: 0.9913368225097656,grad_norm: 0.8129318202047358, iteration: 308603
loss: 0.9874693751335144,grad_norm: 0.7501386254993638, iteration: 308604
loss: 0.9925054907798767,grad_norm: 0.8559637201704434, iteration: 308605
loss: 0.9984124302864075,grad_norm: 0.8837584861882434, iteration: 308606
loss: 1.0247840881347656,grad_norm: 0.8707305543950999, iteration: 308607
loss: 0.9811468124389648,grad_norm: 0.999999485977371, iteration: 308608
loss: 1.0339080095291138,grad_norm: 0.9474909671001095, iteration: 308609
loss: 1.0421487092971802,grad_norm: 0.871733373437773, iteration: 308610
loss: 1.0237981081008911,grad_norm: 0.8318470028531898, iteration: 308611
loss: 1.0623371601104736,grad_norm: 0.9999991910907058, iteration: 308612
loss: 1.0220062732696533,grad_norm: 0.7124034380457395, iteration: 308613
loss: 0.9961459040641785,grad_norm: 0.9624685863129637, iteration: 308614
loss: 0.9975031614303589,grad_norm: 0.9153758053336775, iteration: 308615
loss: 0.9680335521697998,grad_norm: 0.7940702318204635, iteration: 308616
loss: 0.9929826259613037,grad_norm: 0.9978354236998332, iteration: 308617
loss: 0.9902210235595703,grad_norm: 0.781500311311548, iteration: 308618
loss: 0.9905129671096802,grad_norm: 0.9563762999863756, iteration: 308619
loss: 0.9700474143028259,grad_norm: 0.7408505498646493, iteration: 308620
loss: 0.9738311171531677,grad_norm: 0.9138115202659693, iteration: 308621
loss: 1.0737453699111938,grad_norm: 0.9315318792508637, iteration: 308622
loss: 1.046777367591858,grad_norm: 0.6999818985004808, iteration: 308623
loss: 1.0096713304519653,grad_norm: 0.8960494032905407, iteration: 308624
loss: 0.9970607161521912,grad_norm: 0.7429683722248466, iteration: 308625
loss: 0.9800326824188232,grad_norm: 0.7508571865934122, iteration: 308626
loss: 1.0432640314102173,grad_norm: 0.9496967489769786, iteration: 308627
loss: 0.9682425260543823,grad_norm: 0.9999991295853807, iteration: 308628
loss: 0.9839978814125061,grad_norm: 0.7524353685901475, iteration: 308629
loss: 1.0137006044387817,grad_norm: 0.9129514530252189, iteration: 308630
loss: 0.9935377240180969,grad_norm: 0.8248312453968645, iteration: 308631
loss: 1.0468553304672241,grad_norm: 0.7137575893367286, iteration: 308632
loss: 1.0097657442092896,grad_norm: 0.869161024002732, iteration: 308633
loss: 1.0123447179794312,grad_norm: 0.8208778313189197, iteration: 308634
loss: 0.9836598038673401,grad_norm: 0.7852212559588534, iteration: 308635
loss: 0.9807420372962952,grad_norm: 0.9999990183907979, iteration: 308636
loss: 1.0013787746429443,grad_norm: 0.9495283023112022, iteration: 308637
loss: 1.0524321794509888,grad_norm: 0.9999992524999913, iteration: 308638
loss: 0.987278163433075,grad_norm: 0.9137059602474817, iteration: 308639
loss: 0.9776615500450134,grad_norm: 0.9999993112582151, iteration: 308640
loss: 1.0146241188049316,grad_norm: 0.7364778417931849, iteration: 308641
loss: 0.9864615201950073,grad_norm: 0.752794670817948, iteration: 308642
loss: 0.9904276132583618,grad_norm: 0.8964178526475532, iteration: 308643
loss: 1.0196387767791748,grad_norm: 0.9501485310719354, iteration: 308644
loss: 0.9874082803726196,grad_norm: 0.9999991362996975, iteration: 308645
loss: 1.1693730354309082,grad_norm: 0.9999990935115649, iteration: 308646
loss: 0.9663888216018677,grad_norm: 0.9999991111383909, iteration: 308647
loss: 1.0045340061187744,grad_norm: 0.7481078869124627, iteration: 308648
loss: 1.022228717803955,grad_norm: 0.919155318308577, iteration: 308649
loss: 0.9868888854980469,grad_norm: 0.8774736300606916, iteration: 308650
loss: 1.0401310920715332,grad_norm: 0.9718629854846629, iteration: 308651
loss: 1.0988802909851074,grad_norm: 0.9632556274612408, iteration: 308652
loss: 0.996390163898468,grad_norm: 0.8154989002913645, iteration: 308653
loss: 0.9876267313957214,grad_norm: 0.8745267561500348, iteration: 308654
loss: 0.9774289727210999,grad_norm: 0.8343223934699319, iteration: 308655
loss: 1.0036473274230957,grad_norm: 0.7276426642759163, iteration: 308656
loss: 0.9983837604522705,grad_norm: 0.7570561824212717, iteration: 308657
loss: 0.9740956425666809,grad_norm: 0.999999111735388, iteration: 308658
loss: 1.0030958652496338,grad_norm: 0.8659181245695002, iteration: 308659
loss: 1.0130369663238525,grad_norm: 0.9657892303821203, iteration: 308660
loss: 0.9852474331855774,grad_norm: 0.7374831319158174, iteration: 308661
loss: 0.9637741446495056,grad_norm: 0.9578284349409929, iteration: 308662
loss: 1.0452840328216553,grad_norm: 0.8482673609489939, iteration: 308663
loss: 0.9993821382522583,grad_norm: 0.9999992300378011, iteration: 308664
loss: 0.9977977275848389,grad_norm: 0.9376172907856771, iteration: 308665
loss: 0.9873875975608826,grad_norm: 0.7881403250635268, iteration: 308666
loss: 1.0035595893859863,grad_norm: 0.874450327931391, iteration: 308667
loss: 0.9679056406021118,grad_norm: 0.7382891172334806, iteration: 308668
loss: 1.000051498413086,grad_norm: 0.8268061756298393, iteration: 308669
loss: 1.0087895393371582,grad_norm: 0.9999990568369057, iteration: 308670
loss: 1.0170248746871948,grad_norm: 0.8489666168924882, iteration: 308671
loss: 1.0008306503295898,grad_norm: 0.7759966385836882, iteration: 308672
loss: 0.9675090909004211,grad_norm: 0.9270912544068923, iteration: 308673
loss: 1.0015417337417603,grad_norm: 0.8922402998729939, iteration: 308674
loss: 1.0492585897445679,grad_norm: 0.9999991165450596, iteration: 308675
loss: 0.957522988319397,grad_norm: 0.8148355948708222, iteration: 308676
loss: 1.0246400833129883,grad_norm: 0.7241809740355222, iteration: 308677
loss: 0.9870967864990234,grad_norm: 0.8429751172403787, iteration: 308678
loss: 1.1034486293792725,grad_norm: 0.935391676087987, iteration: 308679
loss: 1.0006248950958252,grad_norm: 0.9359639013548341, iteration: 308680
loss: 0.989510715007782,grad_norm: 0.9999993266870516, iteration: 308681
loss: 0.9571178555488586,grad_norm: 0.7527177412718711, iteration: 308682
loss: 0.9994365572929382,grad_norm: 0.9218376174636197, iteration: 308683
loss: 0.9987273812294006,grad_norm: 0.8708367570680444, iteration: 308684
loss: 0.9877021312713623,grad_norm: 0.8744763518334385, iteration: 308685
loss: 0.9915777444839478,grad_norm: 0.772801222159902, iteration: 308686
loss: 0.9887199997901917,grad_norm: 0.8704320118591641, iteration: 308687
loss: 0.9701455235481262,grad_norm: 0.8120545747134652, iteration: 308688
loss: 0.9953458905220032,grad_norm: 0.7893934671024301, iteration: 308689
loss: 0.9727003574371338,grad_norm: 0.8835538899271737, iteration: 308690
loss: 1.0012447834014893,grad_norm: 0.877563851006754, iteration: 308691
loss: 1.025210976600647,grad_norm: 0.8072089714416296, iteration: 308692
loss: 1.06080162525177,grad_norm: 0.9639545615155763, iteration: 308693
loss: 0.9815770983695984,grad_norm: 0.9999990022609722, iteration: 308694
loss: 1.0194677114486694,grad_norm: 0.9163049887510412, iteration: 308695
loss: 1.0073298215866089,grad_norm: 0.7621596163480471, iteration: 308696
loss: 1.0087116956710815,grad_norm: 0.8906775956942921, iteration: 308697
loss: 1.0123257637023926,grad_norm: 0.8606981322172006, iteration: 308698
loss: 1.0388574600219727,grad_norm: 0.9655132215346972, iteration: 308699
loss: 0.9835789203643799,grad_norm: 0.8062213734430976, iteration: 308700
loss: 0.9484865665435791,grad_norm: 0.9018886249466623, iteration: 308701
loss: 0.9973504543304443,grad_norm: 0.8811038046731919, iteration: 308702
loss: 1.0088820457458496,grad_norm: 0.9999996940413002, iteration: 308703
loss: 1.057608962059021,grad_norm: 0.7844357575935654, iteration: 308704
loss: 0.965660572052002,grad_norm: 0.779140382357571, iteration: 308705
loss: 0.9711368680000305,grad_norm: 0.8621095523352803, iteration: 308706
loss: 1.0058355331420898,grad_norm: 0.6729767237302304, iteration: 308707
loss: 1.0167673826217651,grad_norm: 0.9758152743952035, iteration: 308708
loss: 0.9807642102241516,grad_norm: 0.8631156560896698, iteration: 308709
loss: 1.0236996412277222,grad_norm: 0.9999991732194644, iteration: 308710
loss: 0.9736822843551636,grad_norm: 0.9118934796536972, iteration: 308711
loss: 1.0305424928665161,grad_norm: 0.9999996946049028, iteration: 308712
loss: 0.9929917454719543,grad_norm: 0.8655873254154806, iteration: 308713
loss: 1.1550861597061157,grad_norm: 0.9999996914453918, iteration: 308714
loss: 1.0109347105026245,grad_norm: 0.8336780651912064, iteration: 308715
loss: 1.086138367652893,grad_norm: 0.9999996316143746, iteration: 308716
loss: 0.9764622449874878,grad_norm: 0.9080256405413268, iteration: 308717
loss: 0.998431921005249,grad_norm: 0.9999989201122294, iteration: 308718
loss: 0.9911666512489319,grad_norm: 0.9999991040499374, iteration: 308719
loss: 0.9935603141784668,grad_norm: 0.8975825520886814, iteration: 308720
loss: 1.0153224468231201,grad_norm: 0.9999991452933485, iteration: 308721
loss: 0.9769478440284729,grad_norm: 0.7842488490227493, iteration: 308722
loss: 1.0378458499908447,grad_norm: 0.8206238389348595, iteration: 308723
loss: 1.0761380195617676,grad_norm: 1.0000000093862895, iteration: 308724
loss: 1.0256624221801758,grad_norm: 0.7697390525714032, iteration: 308725
loss: 1.0072200298309326,grad_norm: 0.8658887276746141, iteration: 308726
loss: 0.9928684830665588,grad_norm: 0.9999990879907459, iteration: 308727
loss: 1.0022320747375488,grad_norm: 0.9189779008390374, iteration: 308728
loss: 0.9765106439590454,grad_norm: 0.9999993202477306, iteration: 308729
loss: 0.998545229434967,grad_norm: 0.8989268891660569, iteration: 308730
loss: 0.9878805875778198,grad_norm: 0.9999989834428826, iteration: 308731
loss: 1.012439489364624,grad_norm: 0.8526101685721116, iteration: 308732
loss: 1.01265287399292,grad_norm: 0.8402631627875004, iteration: 308733
loss: 1.0822234153747559,grad_norm: 0.9999997818821784, iteration: 308734
loss: 0.9887583255767822,grad_norm: 0.9173031672140335, iteration: 308735
loss: 0.979710042476654,grad_norm: 0.9532887295442634, iteration: 308736
loss: 1.0253515243530273,grad_norm: 0.9999989996824226, iteration: 308737
loss: 1.01954984664917,grad_norm: 0.7650313696649236, iteration: 308738
loss: 1.059043288230896,grad_norm: 0.9563190669264424, iteration: 308739
loss: 1.0561152696609497,grad_norm: 0.9999995770657601, iteration: 308740
loss: 1.0617622137069702,grad_norm: 0.867148317339478, iteration: 308741
loss: 0.9736219048500061,grad_norm: 0.9581830188841305, iteration: 308742
loss: 1.0458579063415527,grad_norm: 0.9999992252975697, iteration: 308743
loss: 0.9843612313270569,grad_norm: 0.9102377117297525, iteration: 308744
loss: 0.9829536080360413,grad_norm: 0.9416971361180574, iteration: 308745
loss: 1.1241592168807983,grad_norm: 0.9999990225762986, iteration: 308746
loss: 0.9983402490615845,grad_norm: 0.8292420039567174, iteration: 308747
loss: 1.010934829711914,grad_norm: 0.7606751495221372, iteration: 308748
loss: 1.0128657817840576,grad_norm: 0.8493372910427008, iteration: 308749
loss: 1.011027455329895,grad_norm: 0.8939150164729528, iteration: 308750
loss: 1.0103365182876587,grad_norm: 0.8751698330132665, iteration: 308751
loss: 1.0016365051269531,grad_norm: 0.8507271205112247, iteration: 308752
loss: 1.0188814401626587,grad_norm: 0.9999991157967405, iteration: 308753
loss: 1.0056699514389038,grad_norm: 0.9601471007290837, iteration: 308754
loss: 1.0212558507919312,grad_norm: 0.956060990699536, iteration: 308755
loss: 1.0308223962783813,grad_norm: 0.9999990918009817, iteration: 308756
loss: 1.0162849426269531,grad_norm: 0.9088180400095752, iteration: 308757
loss: 1.025350570678711,grad_norm: 0.8876765007755292, iteration: 308758
loss: 1.0102083683013916,grad_norm: 0.7790604230141379, iteration: 308759
loss: 1.0211267471313477,grad_norm: 0.9879615460196359, iteration: 308760
loss: 0.9959290623664856,grad_norm: 0.8738276066423727, iteration: 308761
loss: 0.9838797450065613,grad_norm: 0.8209880237000098, iteration: 308762
loss: 1.1303791999816895,grad_norm: 0.9999991688665453, iteration: 308763
loss: 0.9833595156669617,grad_norm: 0.7479972973601005, iteration: 308764
loss: 0.9754000306129456,grad_norm: 0.7958507889467682, iteration: 308765
loss: 0.9880906939506531,grad_norm: 0.9999992880906411, iteration: 308766
loss: 0.9923921823501587,grad_norm: 0.9999994996000793, iteration: 308767
loss: 0.9886523485183716,grad_norm: 0.837783740497093, iteration: 308768
loss: 0.995273232460022,grad_norm: 0.7841451850887184, iteration: 308769
loss: 0.9779747724533081,grad_norm: 0.8830693232529409, iteration: 308770
loss: 1.0075795650482178,grad_norm: 0.936642312794844, iteration: 308771
loss: 1.04670250415802,grad_norm: 0.8197402364333443, iteration: 308772
loss: 0.9725322127342224,grad_norm: 0.8992441005202647, iteration: 308773
loss: 1.0173325538635254,grad_norm: 0.6851323407043213, iteration: 308774
loss: 1.0039933919906616,grad_norm: 0.8655513503854732, iteration: 308775
loss: 1.0052694082260132,grad_norm: 0.8995934103572553, iteration: 308776
loss: 1.0055816173553467,grad_norm: 0.9999995858656909, iteration: 308777
loss: 1.020841360092163,grad_norm: 0.9999995394865332, iteration: 308778
loss: 1.0012861490249634,grad_norm: 0.8065202441238654, iteration: 308779
loss: 0.9633198380470276,grad_norm: 0.7830530421227724, iteration: 308780
loss: 1.040246844291687,grad_norm: 0.9580635133460962, iteration: 308781
loss: 0.9494556188583374,grad_norm: 0.8621384613157609, iteration: 308782
loss: 1.025931715965271,grad_norm: 0.7115597054598318, iteration: 308783
loss: 0.9810702204704285,grad_norm: 0.9999991969464508, iteration: 308784
loss: 0.9926753044128418,grad_norm: 0.9999994705218078, iteration: 308785
loss: 0.9780629873275757,grad_norm: 0.8738410892403613, iteration: 308786
loss: 0.9978955984115601,grad_norm: 0.9552096138531846, iteration: 308787
loss: 0.9649476408958435,grad_norm: 0.849330248740761, iteration: 308788
loss: 0.9939762949943542,grad_norm: 0.8155122859486322, iteration: 308789
loss: 0.9895434975624084,grad_norm: 0.8978201854405079, iteration: 308790
loss: 1.0263041257858276,grad_norm: 0.99999901925322, iteration: 308791
loss: 1.020396113395691,grad_norm: 0.9278279889987476, iteration: 308792
loss: 0.9694169163703918,grad_norm: 0.7677056211551107, iteration: 308793
loss: 0.9691385626792908,grad_norm: 0.7673101643856, iteration: 308794
loss: 1.038493275642395,grad_norm: 0.8490273440191286, iteration: 308795
loss: 1.0230262279510498,grad_norm: 0.7543301864476133, iteration: 308796
loss: 0.9577324986457825,grad_norm: 0.8887442986326319, iteration: 308797
loss: 1.0177314281463623,grad_norm: 0.9999991002968139, iteration: 308798
loss: 1.0302826166152954,grad_norm: 0.9999991670011714, iteration: 308799
loss: 1.0218548774719238,grad_norm: 0.9435522156222119, iteration: 308800
loss: 1.0064085721969604,grad_norm: 0.8524504857036838, iteration: 308801
loss: 0.9611428380012512,grad_norm: 0.7594284816754011, iteration: 308802
loss: 0.9982384443283081,grad_norm: 0.8012755940767833, iteration: 308803
loss: 1.005434513092041,grad_norm: 0.9779488793668901, iteration: 308804
loss: 1.0406831502914429,grad_norm: 0.9698702253898116, iteration: 308805
loss: 1.0169297456741333,grad_norm: 0.6680292126927527, iteration: 308806
loss: 0.9980033040046692,grad_norm: 0.9999991667778627, iteration: 308807
loss: 1.0048339366912842,grad_norm: 0.8384703987390728, iteration: 308808
loss: 1.0119434595108032,grad_norm: 0.7360777038095383, iteration: 308809
loss: 0.9756981730461121,grad_norm: 0.9999990987226063, iteration: 308810
loss: 1.1056574583053589,grad_norm: 0.9999999407454164, iteration: 308811
loss: 0.9890679717063904,grad_norm: 0.8066800564293333, iteration: 308812
loss: 1.021568775177002,grad_norm: 0.7349030226512152, iteration: 308813
loss: 1.0374878644943237,grad_norm: 0.9999989514938784, iteration: 308814
loss: 0.9543903470039368,grad_norm: 0.7699100667426425, iteration: 308815
loss: 1.0406317710876465,grad_norm: 0.860839734990884, iteration: 308816
loss: 1.035093903541565,grad_norm: 0.9999996090460636, iteration: 308817
loss: 1.0109413862228394,grad_norm: 0.8006531876352252, iteration: 308818
loss: 0.9964144825935364,grad_norm: 0.7865455371217163, iteration: 308819
loss: 1.0023550987243652,grad_norm: 0.8652609670768527, iteration: 308820
loss: 1.0288193225860596,grad_norm: 0.8251742958144216, iteration: 308821
loss: 0.9862613081932068,grad_norm: 0.8748864449527852, iteration: 308822
loss: 0.981317400932312,grad_norm: 0.9197987321398362, iteration: 308823
loss: 1.013113260269165,grad_norm: 0.899972308480163, iteration: 308824
loss: 1.0792499780654907,grad_norm: 0.9999998929823828, iteration: 308825
loss: 1.0022246837615967,grad_norm: 0.850926446622749, iteration: 308826
loss: 1.0108641386032104,grad_norm: 0.8562187918934226, iteration: 308827
loss: 0.9921190142631531,grad_norm: 0.8338241182730296, iteration: 308828
loss: 0.99403977394104,grad_norm: 0.7120445716581744, iteration: 308829
loss: 0.9890187978744507,grad_norm: 0.8371605136315315, iteration: 308830
loss: 1.0132445096969604,grad_norm: 0.7711349712408339, iteration: 308831
loss: 1.085994839668274,grad_norm: 0.9999998191885338, iteration: 308832
loss: 1.0519179105758667,grad_norm: 0.8282651597489494, iteration: 308833
loss: 1.1741005182266235,grad_norm: 0.9999993740699111, iteration: 308834
loss: 0.9825456142425537,grad_norm: 0.8320347190903281, iteration: 308835
loss: 1.014768362045288,grad_norm: 0.8377028150268153, iteration: 308836
loss: 0.9791287183761597,grad_norm: 0.7820112303328995, iteration: 308837
loss: 0.9966772198677063,grad_norm: 0.7496247162139217, iteration: 308838
loss: 0.9999630451202393,grad_norm: 0.9571792136629275, iteration: 308839
loss: 1.014094591140747,grad_norm: 0.8392556282088122, iteration: 308840
loss: 1.0617170333862305,grad_norm: 0.9999991037958063, iteration: 308841
loss: 0.9988813996315002,grad_norm: 0.7676625340789492, iteration: 308842
loss: 1.010079026222229,grad_norm: 0.9820960183512749, iteration: 308843
loss: 0.9824773669242859,grad_norm: 0.9999991832918114, iteration: 308844
loss: 0.9977005124092102,grad_norm: 0.6860319693265341, iteration: 308845
loss: 0.9964956045150757,grad_norm: 0.7398103287359193, iteration: 308846
loss: 0.9787766933441162,grad_norm: 0.847140763034066, iteration: 308847
loss: 1.1029337644577026,grad_norm: 0.9999991749269045, iteration: 308848
loss: 0.9998964667320251,grad_norm: 0.7460978159465932, iteration: 308849
loss: 0.9848225712776184,grad_norm: 0.8085498278244647, iteration: 308850
loss: 1.0169545412063599,grad_norm: 0.859309674436369, iteration: 308851
loss: 1.0002291202545166,grad_norm: 0.8387445637264209, iteration: 308852
loss: 0.9955375790596008,grad_norm: 0.9590049798455521, iteration: 308853
loss: 0.9850865602493286,grad_norm: 0.9559726566237495, iteration: 308854
loss: 0.9480050206184387,grad_norm: 0.8686225171541693, iteration: 308855
loss: 0.9794425368309021,grad_norm: 0.9695476150844726, iteration: 308856
loss: 1.0354918241500854,grad_norm: 0.8421183401471446, iteration: 308857
loss: 1.006773829460144,grad_norm: 0.9999991798653163, iteration: 308858
loss: 0.988450825214386,grad_norm: 0.7482811463909895, iteration: 308859
loss: 1.0234739780426025,grad_norm: 0.6754705743614772, iteration: 308860
loss: 1.0213267803192139,grad_norm: 0.832251372306805, iteration: 308861
loss: 0.9982085227966309,grad_norm: 0.8453580027690791, iteration: 308862
loss: 1.0096735954284668,grad_norm: 0.811307008070699, iteration: 308863
loss: 0.9686042666435242,grad_norm: 0.9999989893439225, iteration: 308864
loss: 0.998518168926239,grad_norm: 0.7514989127076795, iteration: 308865
loss: 1.1314620971679688,grad_norm: 0.9999990210146048, iteration: 308866
loss: 0.9984838962554932,grad_norm: 0.757796046923567, iteration: 308867
loss: 1.0159437656402588,grad_norm: 0.7147869297439733, iteration: 308868
loss: 1.0050568580627441,grad_norm: 0.7907679484350825, iteration: 308869
loss: 1.0826555490493774,grad_norm: 0.9999989897771489, iteration: 308870
loss: 1.0198054313659668,grad_norm: 0.8174139221381275, iteration: 308871
loss: 1.0405941009521484,grad_norm: 0.99999937678561, iteration: 308872
loss: 1.0282366275787354,grad_norm: 0.8501231277548935, iteration: 308873
loss: 0.9986288547515869,grad_norm: 0.7535423767173027, iteration: 308874
loss: 0.9799759984016418,grad_norm: 0.8861980844136085, iteration: 308875
loss: 1.0321673154830933,grad_norm: 0.7340417040002629, iteration: 308876
loss: 0.9854162335395813,grad_norm: 0.7802120473597856, iteration: 308877
loss: 1.0155447721481323,grad_norm: 0.9999990463846693, iteration: 308878
loss: 0.9765651822090149,grad_norm: 0.8811162967485263, iteration: 308879
loss: 1.030104637145996,grad_norm: 0.7452524273197123, iteration: 308880
loss: 1.0132123231887817,grad_norm: 0.8251930157069367, iteration: 308881
loss: 0.9696430563926697,grad_norm: 0.8581917557923388, iteration: 308882
loss: 1.0007966756820679,grad_norm: 0.9009700822517749, iteration: 308883
loss: 1.01382577419281,grad_norm: 0.855547849966849, iteration: 308884
loss: 1.0368549823760986,grad_norm: 0.779686032140966, iteration: 308885
loss: 0.9854915738105774,grad_norm: 0.826868952227066, iteration: 308886
loss: 0.9910476803779602,grad_norm: 0.9999990168720123, iteration: 308887
loss: 0.9977741241455078,grad_norm: 0.8738139246888791, iteration: 308888
loss: 0.9904282689094543,grad_norm: 0.7999297942807365, iteration: 308889
loss: 1.0000574588775635,grad_norm: 0.637426236298869, iteration: 308890
loss: 0.9847959876060486,grad_norm: 0.8568196389712829, iteration: 308891
loss: 1.0935029983520508,grad_norm: 0.9999991111075126, iteration: 308892
loss: 1.0267354249954224,grad_norm: 0.7917141795250712, iteration: 308893
loss: 0.9807018041610718,grad_norm: 0.9391811251491586, iteration: 308894
loss: 1.036633014678955,grad_norm: 0.8834089522092892, iteration: 308895
loss: 1.0102839469909668,grad_norm: 0.8220786204102905, iteration: 308896
loss: 0.9654970169067383,grad_norm: 0.9454937303362261, iteration: 308897
loss: 1.0183318853378296,grad_norm: 0.8569869593902936, iteration: 308898
loss: 0.9835442900657654,grad_norm: 0.9820223449613481, iteration: 308899
loss: 1.021501898765564,grad_norm: 0.8785273552720951, iteration: 308900
loss: 0.9998288750648499,grad_norm: 0.8954700711149249, iteration: 308901
loss: 1.0018576383590698,grad_norm: 0.7794816178276096, iteration: 308902
loss: 0.9915787577629089,grad_norm: 0.7682965850120695, iteration: 308903
loss: 1.0283994674682617,grad_norm: 0.8700889292917949, iteration: 308904
loss: 1.0169730186462402,grad_norm: 0.8713270029944539, iteration: 308905
loss: 1.004433035850525,grad_norm: 0.7561807990037928, iteration: 308906
loss: 1.0151855945587158,grad_norm: 0.890666036391923, iteration: 308907
loss: 0.9506334066390991,grad_norm: 0.735236646276712, iteration: 308908
loss: 1.0063204765319824,grad_norm: 0.7974012617330856, iteration: 308909
loss: 0.9460228085517883,grad_norm: 0.6965437432234012, iteration: 308910
loss: 1.0140422582626343,grad_norm: 0.9297740101928375, iteration: 308911
loss: 1.0025094747543335,grad_norm: 0.8584287122774519, iteration: 308912
loss: 1.0437867641448975,grad_norm: 0.9999997451704933, iteration: 308913
loss: 0.9985669851303101,grad_norm: 0.8447600069466343, iteration: 308914
loss: 0.9864670038223267,grad_norm: 0.7020005542596642, iteration: 308915
loss: 1.104605793952942,grad_norm: 0.9999991602820714, iteration: 308916
loss: 1.0141987800598145,grad_norm: 0.9999991245310466, iteration: 308917
loss: 1.0112743377685547,grad_norm: 0.9999998265276968, iteration: 308918
loss: 0.9744153022766113,grad_norm: 0.7381887517000362, iteration: 308919
loss: 1.0045474767684937,grad_norm: 0.8342683309640446, iteration: 308920
loss: 1.0051801204681396,grad_norm: 0.7390106939815724, iteration: 308921
loss: 1.0161054134368896,grad_norm: 0.7279933356585634, iteration: 308922
loss: 0.9839787483215332,grad_norm: 0.7849485362424831, iteration: 308923
loss: 1.014647364616394,grad_norm: 0.9999996763128773, iteration: 308924
loss: 1.0118045806884766,grad_norm: 0.7942081527719652, iteration: 308925
loss: 1.0773885250091553,grad_norm: 0.9805669404489902, iteration: 308926
loss: 0.9924577474594116,grad_norm: 0.9231953966587696, iteration: 308927
loss: 1.027571678161621,grad_norm: 0.6721511717308313, iteration: 308928
loss: 1.0168157815933228,grad_norm: 0.8446143076737319, iteration: 308929
loss: 0.980829119682312,grad_norm: 0.812441679403076, iteration: 308930
loss: 1.0066779851913452,grad_norm: 0.9072762193533388, iteration: 308931
loss: 0.9793796539306641,grad_norm: 0.8650902762707458, iteration: 308932
loss: 1.004796028137207,grad_norm: 0.8393831097612238, iteration: 308933
loss: 1.0326484441757202,grad_norm: 0.9252818281969426, iteration: 308934
loss: 0.9942941069602966,grad_norm: 0.9221921743857177, iteration: 308935
loss: 0.969265341758728,grad_norm: 0.720185777461853, iteration: 308936
loss: 1.0269168615341187,grad_norm: 0.9316343915639393, iteration: 308937
loss: 1.0582698583602905,grad_norm: 0.9999997760990597, iteration: 308938
loss: 0.9862527251243591,grad_norm: 0.7956127492309956, iteration: 308939
loss: 0.9728670120239258,grad_norm: 0.8241220481851188, iteration: 308940
loss: 0.9940111637115479,grad_norm: 0.7103124596460583, iteration: 308941
loss: 1.063156008720398,grad_norm: 0.999999999862114, iteration: 308942
loss: 1.02279531955719,grad_norm: 0.9999993347797437, iteration: 308943
loss: 1.0318189859390259,grad_norm: 0.975467341911171, iteration: 308944
loss: 1.0061146020889282,grad_norm: 0.9389399577710547, iteration: 308945
loss: 0.9982841610908508,grad_norm: 0.8553868030263458, iteration: 308946
loss: 0.9997570514678955,grad_norm: 0.8393337847300876, iteration: 308947
loss: 1.0185178518295288,grad_norm: 0.8203396565469394, iteration: 308948
loss: 1.0045921802520752,grad_norm: 0.9730415511241479, iteration: 308949
loss: 1.0266547203063965,grad_norm: 0.8459770921471684, iteration: 308950
loss: 1.0130302906036377,grad_norm: 0.8094959823661142, iteration: 308951
loss: 1.0459213256835938,grad_norm: 0.9999992370857219, iteration: 308952
loss: 1.002040147781372,grad_norm: 0.6596036219281217, iteration: 308953
loss: 1.0179208517074585,grad_norm: 0.9506382352178635, iteration: 308954
loss: 0.9821688532829285,grad_norm: 0.8971964996827639, iteration: 308955
loss: 0.9816297292709351,grad_norm: 0.8483779336954537, iteration: 308956
loss: 1.0026416778564453,grad_norm: 0.7603436453022604, iteration: 308957
loss: 1.03773033618927,grad_norm: 0.999999090644395, iteration: 308958
loss: 1.008262038230896,grad_norm: 0.7845566441961717, iteration: 308959
loss: 0.999386191368103,grad_norm: 0.9999991275149955, iteration: 308960
loss: 1.0194849967956543,grad_norm: 0.9999990388406603, iteration: 308961
loss: 0.9746142625808716,grad_norm: 0.8120504731878755, iteration: 308962
loss: 1.0010126829147339,grad_norm: 0.9226281881334807, iteration: 308963
loss: 0.993743896484375,grad_norm: 0.9906993984641458, iteration: 308964
loss: 0.9713608026504517,grad_norm: 0.9560309552274715, iteration: 308965
loss: 0.9689608216285706,grad_norm: 0.8600529753996928, iteration: 308966
loss: 1.0764098167419434,grad_norm: 0.9999990987485318, iteration: 308967
loss: 0.9714111089706421,grad_norm: 0.9999990383250833, iteration: 308968
loss: 0.9893360733985901,grad_norm: 0.9999990261062549, iteration: 308969
loss: 1.0114364624023438,grad_norm: 0.8930090074128618, iteration: 308970
loss: 0.967356264591217,grad_norm: 0.9097359461773665, iteration: 308971
loss: 1.0514233112335205,grad_norm: 0.8155725173614685, iteration: 308972
loss: 1.0296326875686646,grad_norm: 0.7747146964997257, iteration: 308973
loss: 1.019753336906433,grad_norm: 0.8895278327188323, iteration: 308974
loss: 1.0246684551239014,grad_norm: 0.9999991354184268, iteration: 308975
loss: 0.9829636812210083,grad_norm: 0.8407430464617931, iteration: 308976
loss: 0.9854402542114258,grad_norm: 0.9999989672865998, iteration: 308977
loss: 0.9903690814971924,grad_norm: 0.6883252067400097, iteration: 308978
loss: 0.978345513343811,grad_norm: 0.8660817441103829, iteration: 308979
loss: 0.9972730278968811,grad_norm: 0.8326675823917248, iteration: 308980
loss: 1.0161256790161133,grad_norm: 0.7625924023765357, iteration: 308981
loss: 1.0077087879180908,grad_norm: 0.9929842921880849, iteration: 308982
loss: 0.9551259875297546,grad_norm: 0.8251901487829998, iteration: 308983
loss: 0.994196891784668,grad_norm: 0.7427483129245237, iteration: 308984
loss: 1.0075269937515259,grad_norm: 0.9581748856406639, iteration: 308985
loss: 1.0454559326171875,grad_norm: 0.7866088984732874, iteration: 308986
loss: 0.9740370512008667,grad_norm: 0.9012010718511245, iteration: 308987
loss: 1.066160798072815,grad_norm: 0.9312976778523924, iteration: 308988
loss: 0.9635412096977234,grad_norm: 0.8178524775247984, iteration: 308989
loss: 1.0219478607177734,grad_norm: 0.8292223076143111, iteration: 308990
loss: 1.0059894323349,grad_norm: 0.8610486400725793, iteration: 308991
loss: 0.9536385536193848,grad_norm: 0.798577329793373, iteration: 308992
loss: 1.008976697921753,grad_norm: 0.9554379885815398, iteration: 308993
loss: 1.0630285739898682,grad_norm: 0.8446770777567779, iteration: 308994
loss: 0.9891100525856018,grad_norm: 0.7531932104927175, iteration: 308995
loss: 0.9833312034606934,grad_norm: 0.8736293915845963, iteration: 308996
loss: 0.9902042746543884,grad_norm: 0.9994003566877714, iteration: 308997
loss: 1.006471872329712,grad_norm: 0.7925366479326439, iteration: 308998
loss: 1.035535454750061,grad_norm: 0.9999990905475717, iteration: 308999
loss: 1.0043517351150513,grad_norm: 0.9108547477236627, iteration: 309000
loss: 1.0212712287902832,grad_norm: 0.8758752804717933, iteration: 309001
loss: 1.040671944618225,grad_norm: 0.8930338589711417, iteration: 309002
loss: 0.9826595783233643,grad_norm: 0.8812940046086898, iteration: 309003
loss: 1.007931113243103,grad_norm: 0.9999997254060156, iteration: 309004
loss: 0.9869271516799927,grad_norm: 0.999999078819552, iteration: 309005
loss: 0.9690560698509216,grad_norm: 0.7321129959562045, iteration: 309006
loss: 1.006868839263916,grad_norm: 0.9009737251013258, iteration: 309007
loss: 1.001133918762207,grad_norm: 0.7709318577548956, iteration: 309008
loss: 0.991037905216217,grad_norm: 0.9999993344065241, iteration: 309009
loss: 1.1395667791366577,grad_norm: 0.9999996421548363, iteration: 309010
loss: 0.9849467873573303,grad_norm: 0.9350405097595017, iteration: 309011
loss: 0.9867863059043884,grad_norm: 0.8164982035951881, iteration: 309012
loss: 1.0184510946273804,grad_norm: 0.9999995060704039, iteration: 309013
loss: 0.9937999844551086,grad_norm: 0.9597304603031432, iteration: 309014
loss: 0.9825606346130371,grad_norm: 0.9743392388173313, iteration: 309015
loss: 0.9857381582260132,grad_norm: 0.9235136685620132, iteration: 309016
loss: 1.0001472234725952,grad_norm: 0.8094686574078411, iteration: 309017
loss: 1.0105571746826172,grad_norm: 0.8005887098035848, iteration: 309018
loss: 0.9578064680099487,grad_norm: 0.9999989841837733, iteration: 309019
loss: 1.0122591257095337,grad_norm: 0.8863109742128231, iteration: 309020
loss: 1.2221672534942627,grad_norm: 0.9999993694529375, iteration: 309021
loss: 1.0211035013198853,grad_norm: 0.8343958864421271, iteration: 309022
loss: 0.9752354621887207,grad_norm: 0.8735745426778839, iteration: 309023
loss: 0.9985010623931885,grad_norm: 0.8580744296999674, iteration: 309024
loss: 1.0052412748336792,grad_norm: 0.9427810095679992, iteration: 309025
loss: 1.020383358001709,grad_norm: 0.9643582861627703, iteration: 309026
loss: 1.0181381702423096,grad_norm: 0.8366962000412492, iteration: 309027
loss: 0.9731268286705017,grad_norm: 0.8891590198998757, iteration: 309028
loss: 0.9949994087219238,grad_norm: 0.9999994231054798, iteration: 309029
loss: 1.0373138189315796,grad_norm: 0.848772626804402, iteration: 309030
loss: 0.9713029265403748,grad_norm: 0.8234692775798829, iteration: 309031
loss: 0.9835585951805115,grad_norm: 0.819700120607996, iteration: 309032
loss: 0.9996210932731628,grad_norm: 0.863353796319108, iteration: 309033
loss: 0.9599907994270325,grad_norm: 0.8787132014519881, iteration: 309034
loss: 0.9922735691070557,grad_norm: 0.8700967117075399, iteration: 309035
loss: 1.114532232284546,grad_norm: 0.9671017640756764, iteration: 309036
loss: 1.0892750024795532,grad_norm: 0.8399502726851834, iteration: 309037
loss: 0.9962933659553528,grad_norm: 0.7504529250613077, iteration: 309038
loss: 1.069039225578308,grad_norm: 0.9230082834106064, iteration: 309039
loss: 1.0006226301193237,grad_norm: 0.7606707520320228, iteration: 309040
loss: 0.9980692267417908,grad_norm: 0.8007026305223117, iteration: 309041
loss: 1.005948781967163,grad_norm: 0.7889769483030615, iteration: 309042
loss: 0.9896358251571655,grad_norm: 0.9999992256105857, iteration: 309043
loss: 1.006393551826477,grad_norm: 0.9999990259829957, iteration: 309044
loss: 1.0325900316238403,grad_norm: 0.8106272587093252, iteration: 309045
loss: 1.0014418363571167,grad_norm: 0.8055586878448576, iteration: 309046
loss: 0.9881373047828674,grad_norm: 0.7932837204507128, iteration: 309047
loss: 0.9930892586708069,grad_norm: 0.697494873557955, iteration: 309048
loss: 1.0112206935882568,grad_norm: 0.8489981007964996, iteration: 309049
loss: 1.0263597965240479,grad_norm: 0.7862397845699131, iteration: 309050
loss: 0.9782366156578064,grad_norm: 0.9113567030072761, iteration: 309051
loss: 0.9781920909881592,grad_norm: 0.8963365037220873, iteration: 309052
loss: 1.066524624824524,grad_norm: 0.9999999986343459, iteration: 309053
loss: 1.0173757076263428,grad_norm: 0.8322977865625001, iteration: 309054
loss: 1.105246901512146,grad_norm: 0.9999998319097729, iteration: 309055
loss: 0.9783430695533752,grad_norm: 0.8693409403070204, iteration: 309056
loss: 1.0167986154556274,grad_norm: 0.7237669525711835, iteration: 309057
loss: 1.0374470949172974,grad_norm: 0.9999990254183753, iteration: 309058
loss: 0.9852550029754639,grad_norm: 0.9876157944602104, iteration: 309059
loss: 1.0579626560211182,grad_norm: 0.9999990073008231, iteration: 309060
loss: 0.9953107237815857,grad_norm: 0.7645431536283103, iteration: 309061
loss: 0.9892421960830688,grad_norm: 0.8433881103537596, iteration: 309062
loss: 1.0246089696884155,grad_norm: 0.8363412421490021, iteration: 309063
loss: 1.015676736831665,grad_norm: 0.8432321881235108, iteration: 309064
loss: 0.9576727151870728,grad_norm: 0.8207399885015344, iteration: 309065
loss: 0.9878705739974976,grad_norm: 0.8243188878091986, iteration: 309066
loss: 0.9969720840454102,grad_norm: 0.7419427045118696, iteration: 309067
loss: 1.033224105834961,grad_norm: 0.873124380671409, iteration: 309068
loss: 1.0121511220932007,grad_norm: 0.9999990917205781, iteration: 309069
loss: 0.9864835739135742,grad_norm: 0.7608228847123352, iteration: 309070
loss: 0.9971527457237244,grad_norm: 0.8452041577836015, iteration: 309071
loss: 1.0036619901657104,grad_norm: 0.7813694351949962, iteration: 309072
loss: 0.9751906991004944,grad_norm: 0.8676881507201855, iteration: 309073
loss: 0.9766780138015747,grad_norm: 0.8137623997526093, iteration: 309074
loss: 1.0390076637268066,grad_norm: 0.83989529187171, iteration: 309075
loss: 0.9684883952140808,grad_norm: 0.7803349542203462, iteration: 309076
loss: 1.042371153831482,grad_norm: 0.8690324909903049, iteration: 309077
loss: 0.9829903244972229,grad_norm: 0.8392915159979905, iteration: 309078
loss: 1.019087314605713,grad_norm: 0.9133429791056188, iteration: 309079
loss: 0.9969688653945923,grad_norm: 0.7483989709701353, iteration: 309080
loss: 0.9566552639007568,grad_norm: 0.7579788377442902, iteration: 309081
loss: 1.0025233030319214,grad_norm: 0.7582429964570754, iteration: 309082
loss: 0.9938052296638489,grad_norm: 0.7522285174723451, iteration: 309083
loss: 0.996415913105011,grad_norm: 0.720700654624147, iteration: 309084
loss: 1.0381673574447632,grad_norm: 0.844809259088425, iteration: 309085
loss: 1.176317811012268,grad_norm: 0.9999996582879116, iteration: 309086
loss: 1.0282715559005737,grad_norm: 0.8720233426383621, iteration: 309087
loss: 1.013593316078186,grad_norm: 0.757422889849821, iteration: 309088
loss: 1.0072706937789917,grad_norm: 0.9215917428291424, iteration: 309089
loss: 1.0738496780395508,grad_norm: 0.9049010581007959, iteration: 309090
loss: 0.9815517067909241,grad_norm: 0.7609689296499592, iteration: 309091
loss: 0.9914907217025757,grad_norm: 0.8036132985842079, iteration: 309092
loss: 0.9889688491821289,grad_norm: 0.8322527907446863, iteration: 309093
loss: 0.9975252747535706,grad_norm: 0.7518675839242102, iteration: 309094
loss: 0.9881677627563477,grad_norm: 0.7607893819635743, iteration: 309095
loss: 1.013445258140564,grad_norm: 0.9999996887038349, iteration: 309096
loss: 1.0047755241394043,grad_norm: 0.885986778148005, iteration: 309097
loss: 1.0228333473205566,grad_norm: 0.9625613910867838, iteration: 309098
loss: 0.9895534515380859,grad_norm: 0.8272936777912485, iteration: 309099
loss: 1.0707072019577026,grad_norm: 0.7508705511460674, iteration: 309100
loss: 1.08272123336792,grad_norm: 0.9999994013203987, iteration: 309101
loss: 1.0556604862213135,grad_norm: 0.756370073847972, iteration: 309102
loss: 1.007033109664917,grad_norm: 0.9999992833468668, iteration: 309103
loss: 1.02721107006073,grad_norm: 0.6622335883862658, iteration: 309104
loss: 1.0392132997512817,grad_norm: 0.9957641823239314, iteration: 309105
loss: 0.9892078638076782,grad_norm: 0.8924906025729658, iteration: 309106
loss: 1.1040704250335693,grad_norm: 0.9999994142079942, iteration: 309107
loss: 0.9920487999916077,grad_norm: 0.99999908464978, iteration: 309108
loss: 0.9946839213371277,grad_norm: 0.7877520139898803, iteration: 309109
loss: 0.9958456158638,grad_norm: 0.97169022560754, iteration: 309110
loss: 0.9834060072898865,grad_norm: 0.7703924916265308, iteration: 309111
loss: 0.9782647490501404,grad_norm: 0.9999991813046838, iteration: 309112
loss: 1.0022234916687012,grad_norm: 0.7765742966612147, iteration: 309113
loss: 1.0965344905853271,grad_norm: 0.9999994078604346, iteration: 309114
loss: 1.0549261569976807,grad_norm: 0.9999995372911088, iteration: 309115
loss: 1.0301976203918457,grad_norm: 0.7836647608875277, iteration: 309116
loss: 1.0268183946609497,grad_norm: 0.9617600728836534, iteration: 309117
loss: 0.9739804267883301,grad_norm: 0.6904622631185999, iteration: 309118
loss: 0.9927389025688171,grad_norm: 0.8052585663565405, iteration: 309119
loss: 1.009154200553894,grad_norm: 0.9999993074111561, iteration: 309120
loss: 1.0413568019866943,grad_norm: 0.999999329243535, iteration: 309121
loss: 1.0027419328689575,grad_norm: 0.730885052021286, iteration: 309122
loss: 0.9936274290084839,grad_norm: 0.7704709949013994, iteration: 309123
loss: 1.0289530754089355,grad_norm: 0.8466160626637784, iteration: 309124
loss: 0.9927820563316345,grad_norm: 0.7557375073784756, iteration: 309125
loss: 1.0200915336608887,grad_norm: 0.8494415294867994, iteration: 309126
loss: 0.9790969491004944,grad_norm: 0.8411220128914574, iteration: 309127
loss: 0.9826774597167969,grad_norm: 0.8874539392535364, iteration: 309128
loss: 0.9870445132255554,grad_norm: 0.8506515648190729, iteration: 309129
loss: 0.9858446717262268,grad_norm: 0.9308319012525741, iteration: 309130
loss: 0.9788506627082825,grad_norm: 0.9179791383277781, iteration: 309131
loss: 0.9817694425582886,grad_norm: 0.8289765362692153, iteration: 309132
loss: 1.0325181484222412,grad_norm: 0.797932463921278, iteration: 309133
loss: 0.9851699471473694,grad_norm: 0.9071682844595896, iteration: 309134
loss: 0.9542670845985413,grad_norm: 0.8076069431475841, iteration: 309135
loss: 1.065308928489685,grad_norm: 0.8103103581322205, iteration: 309136
loss: 1.0838136672973633,grad_norm: 0.9999991889515708, iteration: 309137
loss: 1.055801510810852,grad_norm: 0.7851057837037304, iteration: 309138
loss: 0.9888665676116943,grad_norm: 0.9090937462743172, iteration: 309139
loss: 1.028648853302002,grad_norm: 0.8979954263499933, iteration: 309140
loss: 0.9740495085716248,grad_norm: 0.9333081982888792, iteration: 309141
loss: 0.9927172660827637,grad_norm: 0.7016110876535043, iteration: 309142
loss: 0.9966127872467041,grad_norm: 0.7309183506091437, iteration: 309143
loss: 0.9937503337860107,grad_norm: 0.8772990583676051, iteration: 309144
loss: 1.0001821517944336,grad_norm: 0.7626007834082942, iteration: 309145
loss: 0.9713007211685181,grad_norm: 0.8365163529899978, iteration: 309146
loss: 1.0106830596923828,grad_norm: 0.9999993984866734, iteration: 309147
loss: 0.9893271923065186,grad_norm: 0.7557493836872451, iteration: 309148
loss: 0.9990315437316895,grad_norm: 0.7482090602427117, iteration: 309149
loss: 0.9601949453353882,grad_norm: 0.9345732534235149, iteration: 309150
loss: 1.0305246114730835,grad_norm: 0.8613712501610091, iteration: 309151
loss: 1.0098686218261719,grad_norm: 0.7980010416565998, iteration: 309152
loss: 1.0035734176635742,grad_norm: 0.8869464553943133, iteration: 309153
loss: 1.014668583869934,grad_norm: 0.9999992782318655, iteration: 309154
loss: 0.9805396199226379,grad_norm: 0.999999171157148, iteration: 309155
loss: 0.9933100938796997,grad_norm: 0.9310206871962025, iteration: 309156
loss: 1.0170918703079224,grad_norm: 0.8537794731643172, iteration: 309157
loss: 0.9857499599456787,grad_norm: 0.9061675344728548, iteration: 309158
loss: 1.0664323568344116,grad_norm: 0.9999999820611319, iteration: 309159
loss: 1.0032780170440674,grad_norm: 0.9658700844594952, iteration: 309160
loss: 1.0349469184875488,grad_norm: 0.9999997597652496, iteration: 309161
loss: 1.0394866466522217,grad_norm: 0.8616740936077133, iteration: 309162
loss: 0.9757153987884521,grad_norm: 0.9023052183165481, iteration: 309163
loss: 0.9876415133476257,grad_norm: 0.9182382988988147, iteration: 309164
loss: 1.0882368087768555,grad_norm: 0.9999991703483871, iteration: 309165
loss: 1.0068928003311157,grad_norm: 0.8023004309552394, iteration: 309166
loss: 1.0082308053970337,grad_norm: 0.6571862793215174, iteration: 309167
loss: 1.0273100137710571,grad_norm: 0.8811596054806853, iteration: 309168
loss: 1.0041605234146118,grad_norm: 0.844817145953928, iteration: 309169
loss: 0.9953265190124512,grad_norm: 0.9271400970175808, iteration: 309170
loss: 0.9599061012268066,grad_norm: 0.8260858511536827, iteration: 309171
loss: 1.0559375286102295,grad_norm: 0.9999991545984431, iteration: 309172
loss: 1.0511057376861572,grad_norm: 0.8530809422579105, iteration: 309173
loss: 1.0193099975585938,grad_norm: 0.8598471223191628, iteration: 309174
loss: 0.9885417222976685,grad_norm: 0.8162151255980944, iteration: 309175
loss: 0.9952813982963562,grad_norm: 0.9999990630076632, iteration: 309176
loss: 0.9891673922538757,grad_norm: 0.7512927211289377, iteration: 309177
loss: 1.0071297883987427,grad_norm: 0.8652746764563612, iteration: 309178
loss: 1.0163257122039795,grad_norm: 0.9999993677893432, iteration: 309179
loss: 1.0301142930984497,grad_norm: 0.957935076182467, iteration: 309180
loss: 0.9844647645950317,grad_norm: 0.9074339966594644, iteration: 309181
loss: 1.023569107055664,grad_norm: 0.907954918766742, iteration: 309182
loss: 0.9785312414169312,grad_norm: 0.8879201477769374, iteration: 309183
loss: 0.996695339679718,grad_norm: 0.8073923659517036, iteration: 309184
loss: 1.0957674980163574,grad_norm: 0.9999995901910088, iteration: 309185
loss: 1.0623891353607178,grad_norm: 0.9999991050935192, iteration: 309186
loss: 0.9705207347869873,grad_norm: 0.8102999837888553, iteration: 309187
loss: 1.0192738771438599,grad_norm: 0.9999995885015813, iteration: 309188
loss: 1.0583243370056152,grad_norm: 0.9999993479220959, iteration: 309189
loss: 1.0201390981674194,grad_norm: 1.0000000213155613, iteration: 309190
loss: 0.9757336378097534,grad_norm: 0.8911265586280338, iteration: 309191
loss: 0.9944049715995789,grad_norm: 0.9999999365687808, iteration: 309192
loss: 1.0375144481658936,grad_norm: 0.8228394048527375, iteration: 309193
loss: 1.0184231996536255,grad_norm: 0.8373675440239816, iteration: 309194
loss: 1.0159319639205933,grad_norm: 0.9170714578830224, iteration: 309195
loss: 0.9904853701591492,grad_norm: 0.8861023010187796, iteration: 309196
loss: 0.9768285751342773,grad_norm: 0.7480113940174836, iteration: 309197
loss: 1.0327540636062622,grad_norm: 0.8810864974285932, iteration: 309198
loss: 1.0126447677612305,grad_norm: 0.8637674675067151, iteration: 309199
loss: 1.0136464834213257,grad_norm: 0.9999997707690399, iteration: 309200
loss: 1.0261741876602173,grad_norm: 0.9959275673200682, iteration: 309201
loss: 0.9938293099403381,grad_norm: 0.9999993970086517, iteration: 309202
loss: 1.0432401895523071,grad_norm: 0.9999991259488719, iteration: 309203
loss: 0.9613707065582275,grad_norm: 0.6978685017563172, iteration: 309204
loss: 1.0147629976272583,grad_norm: 0.834937190569946, iteration: 309205
loss: 1.0286805629730225,grad_norm: 0.7240820595172961, iteration: 309206
loss: 0.971096932888031,grad_norm: 0.8770562109644647, iteration: 309207
loss: 0.9920600056648254,grad_norm: 0.9999991357621395, iteration: 309208
loss: 1.046376347541809,grad_norm: 0.7750599283344678, iteration: 309209
loss: 0.9828149080276489,grad_norm: 0.9343759290903146, iteration: 309210
loss: 1.0228759050369263,grad_norm: 0.9999999487488521, iteration: 309211
loss: 1.076918601989746,grad_norm: 0.9999991365516051, iteration: 309212
loss: 1.051864743232727,grad_norm: 0.9232586310883584, iteration: 309213
loss: 1.0021296739578247,grad_norm: 0.8868748069058725, iteration: 309214
loss: 0.9912261366844177,grad_norm: 0.8755680426651343, iteration: 309215
loss: 1.0197932720184326,grad_norm: 0.9037823847849401, iteration: 309216
loss: 0.9769222736358643,grad_norm: 0.8743898525435202, iteration: 309217
loss: 1.0196377038955688,grad_norm: 0.9999992630278567, iteration: 309218
loss: 0.9932834506034851,grad_norm: 0.9084810671259949, iteration: 309219
loss: 0.9927465915679932,grad_norm: 0.7964423471606907, iteration: 309220
loss: 1.0472570657730103,grad_norm: 0.9999996933435599, iteration: 309221
loss: 1.1611378192901611,grad_norm: 0.999999192450926, iteration: 309222
loss: 1.026735544204712,grad_norm: 0.7939978720799377, iteration: 309223
loss: 0.9796373248100281,grad_norm: 0.999999615557628, iteration: 309224
loss: 0.973970353603363,grad_norm: 0.8078875930521349, iteration: 309225
loss: 1.0801362991333008,grad_norm: 0.9999998640322719, iteration: 309226
loss: 1.0117250680923462,grad_norm: 0.8597356190675355, iteration: 309227
loss: 0.9898508191108704,grad_norm: 0.8937288234508437, iteration: 309228
loss: 1.0135918855667114,grad_norm: 0.9791027415988729, iteration: 309229
loss: 1.0031044483184814,grad_norm: 0.833204841392913, iteration: 309230
loss: 1.1056207418441772,grad_norm: 0.999999448337477, iteration: 309231
loss: 1.038630723953247,grad_norm: 0.8160839310940962, iteration: 309232
loss: 1.0109319686889648,grad_norm: 0.9394329764351781, iteration: 309233
loss: 1.0766115188598633,grad_norm: 0.9999996638716544, iteration: 309234
loss: 1.035766363143921,grad_norm: 0.8735556673731238, iteration: 309235
loss: 0.999151349067688,grad_norm: 0.999999898131691, iteration: 309236
loss: 1.024368405342102,grad_norm: 0.801637182172138, iteration: 309237
loss: 1.0125579833984375,grad_norm: 0.8180627279113329, iteration: 309238
loss: 0.960213840007782,grad_norm: 0.8807661169820006, iteration: 309239
loss: 0.9735551476478577,grad_norm: 0.7942889912204154, iteration: 309240
loss: 1.0109705924987793,grad_norm: 0.9999991685084358, iteration: 309241
loss: 0.9529630541801453,grad_norm: 0.741182379172683, iteration: 309242
loss: 1.0273059606552124,grad_norm: 0.9280171438313688, iteration: 309243
loss: 1.153135895729065,grad_norm: 0.9999999974154673, iteration: 309244
loss: 0.9769496917724609,grad_norm: 0.7751073609177219, iteration: 309245
loss: 0.9906274080276489,grad_norm: 0.8911075176464728, iteration: 309246
loss: 1.0031222105026245,grad_norm: 0.8248573828938317, iteration: 309247
loss: 0.9906459450721741,grad_norm: 0.9368478645196606, iteration: 309248
loss: 1.0161924362182617,grad_norm: 0.7523302373548001, iteration: 309249
loss: 0.988980770111084,grad_norm: 0.9999996444210607, iteration: 309250
loss: 0.9740732908248901,grad_norm: 0.7576142215863104, iteration: 309251
loss: 1.0223015546798706,grad_norm: 0.9171558394261926, iteration: 309252
loss: 1.1367632150650024,grad_norm: 0.9999991172369928, iteration: 309253
loss: 1.0140455961227417,grad_norm: 0.9999992031866598, iteration: 309254
loss: 1.1145410537719727,grad_norm: 0.9999999129877304, iteration: 309255
loss: 1.0998328924179077,grad_norm: 0.9999992578558426, iteration: 309256
loss: 1.014373779296875,grad_norm: 0.9104989513415884, iteration: 309257
loss: 0.9904384016990662,grad_norm: 0.8225785357061473, iteration: 309258
loss: 1.1054105758666992,grad_norm: 0.9999991200583793, iteration: 309259
loss: 1.0055900812149048,grad_norm: 0.735503763321127, iteration: 309260
loss: 0.9827384948730469,grad_norm: 0.9891198441521292, iteration: 309261
loss: 1.0282701253890991,grad_norm: 0.9127698985540182, iteration: 309262
loss: 1.0504144430160522,grad_norm: 0.9999996101345132, iteration: 309263
loss: 1.1658494472503662,grad_norm: 0.9999992818706839, iteration: 309264
loss: 1.0178797245025635,grad_norm: 0.819843908131888, iteration: 309265
loss: 1.0879497528076172,grad_norm: 0.9999997588275887, iteration: 309266
loss: 1.074990153312683,grad_norm: 0.9580484260244423, iteration: 309267
loss: 1.1612026691436768,grad_norm: 0.9999994929619227, iteration: 309268
loss: 1.1048436164855957,grad_norm: 0.999999974106261, iteration: 309269
loss: 1.0861774682998657,grad_norm: 0.9379475040099349, iteration: 309270
loss: 0.997599720954895,grad_norm: 0.9999994218297028, iteration: 309271
loss: 0.9956498146057129,grad_norm: 0.876452749199107, iteration: 309272
loss: 1.0749309062957764,grad_norm: 0.8979212882318478, iteration: 309273
loss: 1.0031578540802002,grad_norm: 0.8770085793414689, iteration: 309274
loss: 1.000303030014038,grad_norm: 0.8662225797656335, iteration: 309275
loss: 1.0332419872283936,grad_norm: 0.9999995672133116, iteration: 309276
loss: 0.9870491623878479,grad_norm: 0.8397467010854276, iteration: 309277
loss: 0.9949429631233215,grad_norm: 0.8225162567678624, iteration: 309278
loss: 1.030494213104248,grad_norm: 0.8943986402945298, iteration: 309279
loss: 0.9940971732139587,grad_norm: 0.9999990586657821, iteration: 309280
loss: 1.0448627471923828,grad_norm: 0.8367593767339896, iteration: 309281
loss: 1.291771411895752,grad_norm: 0.999999697812359, iteration: 309282
loss: 1.0780514478683472,grad_norm: 0.9215512358318814, iteration: 309283
loss: 0.9857330322265625,grad_norm: 0.8793242932299485, iteration: 309284
loss: 1.019550085067749,grad_norm: 0.9999998423074686, iteration: 309285
loss: 1.1322526931762695,grad_norm: 0.9999998038346709, iteration: 309286
loss: 1.0493048429489136,grad_norm: 0.7399120013569294, iteration: 309287
loss: 1.0260967016220093,grad_norm: 0.739960861636793, iteration: 309288
loss: 1.0054047107696533,grad_norm: 0.9999991317823468, iteration: 309289
loss: 1.0320343971252441,grad_norm: 0.772294171335905, iteration: 309290
loss: 1.0103850364685059,grad_norm: 0.9999992090091367, iteration: 309291
loss: 1.0647579431533813,grad_norm: 0.9999996503808832, iteration: 309292
loss: 1.0273993015289307,grad_norm: 0.9999999607696479, iteration: 309293
loss: 1.0852090120315552,grad_norm: 1.0000000049558058, iteration: 309294
loss: 1.2306671142578125,grad_norm: 0.9999997702115494, iteration: 309295
loss: 1.004847764968872,grad_norm: 0.9999999343636277, iteration: 309296
loss: 0.9792353510856628,grad_norm: 0.7785356119676633, iteration: 309297
loss: 0.9919319748878479,grad_norm: 0.7450816081783829, iteration: 309298
loss: 1.0495573282241821,grad_norm: 1.0000000130507105, iteration: 309299
loss: 1.0644463300704956,grad_norm: 0.9999990799086119, iteration: 309300
loss: 1.0105725526809692,grad_norm: 0.8091987605542927, iteration: 309301
loss: 0.9747441411018372,grad_norm: 0.7015182739572375, iteration: 309302
loss: 1.0950508117675781,grad_norm: 0.9999997803686486, iteration: 309303
loss: 0.9791573286056519,grad_norm: 0.799548873662119, iteration: 309304
loss: 0.999756395816803,grad_norm: 0.879696584298664, iteration: 309305
loss: 1.0070204734802246,grad_norm: 0.6933670156202866, iteration: 309306
loss: 0.9808911681175232,grad_norm: 0.8826231832237713, iteration: 309307
loss: 0.9829890727996826,grad_norm: 0.7784422181212856, iteration: 309308
loss: 0.9815047979354858,grad_norm: 0.9999991046013079, iteration: 309309
loss: 1.0472567081451416,grad_norm: 0.9999993963391128, iteration: 309310
loss: 1.009272575378418,grad_norm: 0.7572247314421607, iteration: 309311
loss: 0.9646599888801575,grad_norm: 0.8465832221391647, iteration: 309312
loss: 0.978197455406189,grad_norm: 0.7924921601297701, iteration: 309313
loss: 0.9889982342720032,grad_norm: 0.9726277189755078, iteration: 309314
loss: 1.0448356866836548,grad_norm: 0.9999999250788261, iteration: 309315
loss: 1.025587558746338,grad_norm: 0.8500242465385086, iteration: 309316
loss: 0.9939026236534119,grad_norm: 0.8685399003160317, iteration: 309317
loss: 0.9886270761489868,grad_norm: 0.8551171725814517, iteration: 309318
loss: 1.0165729522705078,grad_norm: 0.8235291399189194, iteration: 309319
loss: 1.004178524017334,grad_norm: 0.7584720862580743, iteration: 309320
loss: 1.090846061706543,grad_norm: 0.9999991230227202, iteration: 309321
loss: 1.0914698839187622,grad_norm: 0.7511060944837548, iteration: 309322
loss: 1.0235986709594727,grad_norm: 0.9999991620180081, iteration: 309323
loss: 0.9754544496536255,grad_norm: 0.9999991314274682, iteration: 309324
loss: 0.9799361824989319,grad_norm: 0.8670797518386932, iteration: 309325
loss: 0.9977966547012329,grad_norm: 0.9151090169978986, iteration: 309326
loss: 1.0106308460235596,grad_norm: 0.9450119884006173, iteration: 309327
loss: 0.9896489381790161,grad_norm: 0.9999995169711385, iteration: 309328
loss: 1.038955569267273,grad_norm: 0.8374473214114954, iteration: 309329
loss: 1.0043236017227173,grad_norm: 0.9237315953216872, iteration: 309330
loss: 0.9708381295204163,grad_norm: 0.8597812628842034, iteration: 309331
loss: 1.0225303173065186,grad_norm: 0.7161320265303401, iteration: 309332
loss: 0.9960303902626038,grad_norm: 0.999999298862725, iteration: 309333
loss: 0.9985578656196594,grad_norm: 0.8192310976704231, iteration: 309334
loss: 0.954302191734314,grad_norm: 0.8381481048525885, iteration: 309335
loss: 1.0159844160079956,grad_norm: 0.9999993500129667, iteration: 309336
loss: 1.065785527229309,grad_norm: 0.9999995675599591, iteration: 309337
loss: 1.004727840423584,grad_norm: 0.9999991185718647, iteration: 309338
loss: 0.985612154006958,grad_norm: 0.8049071730442122, iteration: 309339
loss: 1.0091913938522339,grad_norm: 0.8198501647406827, iteration: 309340
loss: 1.0588148832321167,grad_norm: 0.999999515195485, iteration: 309341
loss: 0.9968918561935425,grad_norm: 0.8020889817315605, iteration: 309342
loss: 1.029093861579895,grad_norm: 0.857666143639116, iteration: 309343
loss: 1.020432949066162,grad_norm: 0.8842467897783651, iteration: 309344
loss: 1.0244914293289185,grad_norm: 0.9999992564348171, iteration: 309345
loss: 1.0324915647506714,grad_norm: 0.9999996608737199, iteration: 309346
loss: 1.169115662574768,grad_norm: 0.9999992043993731, iteration: 309347
loss: 1.067846655845642,grad_norm: 0.9999996740944435, iteration: 309348
loss: 0.9928582906723022,grad_norm: 0.8647955065333076, iteration: 309349
loss: 0.9772643446922302,grad_norm: 0.8102357655845848, iteration: 309350
loss: 0.99668949842453,grad_norm: 0.9999996541936081, iteration: 309351
loss: 1.112040400505066,grad_norm: 0.8717532200427933, iteration: 309352
loss: 1.0212278366088867,grad_norm: 0.9999999614703176, iteration: 309353
loss: 1.006552815437317,grad_norm: 0.8471079588022652, iteration: 309354
loss: 0.9509335160255432,grad_norm: 0.9026887284455324, iteration: 309355
loss: 1.0235059261322021,grad_norm: 0.8585424714482269, iteration: 309356
loss: 1.0262715816497803,grad_norm: 0.7692183898421099, iteration: 309357
loss: 0.9694112539291382,grad_norm: 0.9257959814524788, iteration: 309358
loss: 1.0312546491622925,grad_norm: 0.9999992495064939, iteration: 309359
loss: 1.0131850242614746,grad_norm: 0.7768449988130364, iteration: 309360
loss: 1.0582791566848755,grad_norm: 0.9633793961479873, iteration: 309361
loss: 0.9837426543235779,grad_norm: 0.9349354161566136, iteration: 309362
loss: 0.9985477924346924,grad_norm: 0.8476174987060477, iteration: 309363
loss: 1.0613861083984375,grad_norm: 0.9999989590073123, iteration: 309364
loss: 1.086432933807373,grad_norm: 0.999999038635434, iteration: 309365
loss: 0.9960029721260071,grad_norm: 0.8789823030718205, iteration: 309366
loss: 1.030470848083496,grad_norm: 0.9896200051918834, iteration: 309367
loss: 1.0290515422821045,grad_norm: 0.8688928779337965, iteration: 309368
loss: 0.9808424115180969,grad_norm: 0.7747658619790507, iteration: 309369
loss: 0.9996518492698669,grad_norm: 0.8656931383156513, iteration: 309370
loss: 1.0233274698257446,grad_norm: 0.856081920240636, iteration: 309371
loss: 1.0870633125305176,grad_norm: 0.9812075499822723, iteration: 309372
loss: 1.0780659914016724,grad_norm: 0.9999999396937543, iteration: 309373
loss: 0.961837649345398,grad_norm: 0.8665455736303885, iteration: 309374
loss: 0.984240710735321,grad_norm: 0.7696376733579269, iteration: 309375
loss: 1.0787951946258545,grad_norm: 0.9999998981574181, iteration: 309376
loss: 0.9931522011756897,grad_norm: 0.6888344207417575, iteration: 309377
loss: 1.1240497827529907,grad_norm: 0.9999996067034843, iteration: 309378
loss: 0.9765452742576599,grad_norm: 0.8099364525615734, iteration: 309379
loss: 0.9658582806587219,grad_norm: 0.8289018309606563, iteration: 309380
loss: 1.0101122856140137,grad_norm: 0.999999028565474, iteration: 309381
loss: 1.0217704772949219,grad_norm: 0.8403269088265741, iteration: 309382
loss: 1.013286828994751,grad_norm: 0.8997182127763353, iteration: 309383
loss: 1.2282506227493286,grad_norm: 0.9999996797484535, iteration: 309384
loss: 1.011053442955017,grad_norm: 0.8618766945220038, iteration: 309385
loss: 1.0183380842208862,grad_norm: 0.9809648123592629, iteration: 309386
loss: 0.9601110816001892,grad_norm: 0.7611088436461817, iteration: 309387
loss: 1.0419683456420898,grad_norm: 0.9879193645346287, iteration: 309388
loss: 0.97928386926651,grad_norm: 0.9654763704052707, iteration: 309389
loss: 1.0049957036972046,grad_norm: 0.9491043451137005, iteration: 309390
loss: 0.984800398349762,grad_norm: 0.9772154734483685, iteration: 309391
loss: 0.9774453639984131,grad_norm: 0.8774498637389817, iteration: 309392
loss: 1.0341992378234863,grad_norm: 0.9999996318551467, iteration: 309393
loss: 0.9945750832557678,grad_norm: 0.8052331184891321, iteration: 309394
loss: 1.0342684984207153,grad_norm: 0.8396769528152769, iteration: 309395
loss: 0.9811487793922424,grad_norm: 0.7656742034301143, iteration: 309396
loss: 1.0134179592132568,grad_norm: 0.7322750996741697, iteration: 309397
loss: 0.9658526182174683,grad_norm: 0.7647082258452451, iteration: 309398
loss: 0.9595101475715637,grad_norm: 0.9999992456527687, iteration: 309399
loss: 1.0294370651245117,grad_norm: 0.7810936408986991, iteration: 309400
loss: 0.9858018159866333,grad_norm: 0.7876903714546715, iteration: 309401
loss: 0.9840524792671204,grad_norm: 0.9259944616911948, iteration: 309402
loss: 0.9815227389335632,grad_norm: 0.7577990301478807, iteration: 309403
loss: 0.978780210018158,grad_norm: 0.7636792409838163, iteration: 309404
loss: 0.9648001194000244,grad_norm: 0.9999991465279469, iteration: 309405
loss: 0.9913450479507446,grad_norm: 0.8041493632571786, iteration: 309406
loss: 1.020552396774292,grad_norm: 0.8320656163315719, iteration: 309407
loss: 0.9912672638893127,grad_norm: 0.8569143967060475, iteration: 309408
loss: 1.0382859706878662,grad_norm: 0.9999992031430582, iteration: 309409
loss: 1.0850943326950073,grad_norm: 0.8966742385991178, iteration: 309410
loss: 0.9850635528564453,grad_norm: 0.7752029695442328, iteration: 309411
loss: 1.019782304763794,grad_norm: 0.7728821906936667, iteration: 309412
loss: 1.007813572883606,grad_norm: 0.8828700668469461, iteration: 309413
loss: 1.0016274452209473,grad_norm: 0.7452992667383029, iteration: 309414
loss: 1.023269534111023,grad_norm: 0.8424129882626796, iteration: 309415
loss: 0.9591448903083801,grad_norm: 0.9067185361112843, iteration: 309416
loss: 0.9753991961479187,grad_norm: 0.9007084853293369, iteration: 309417
loss: 0.9956968426704407,grad_norm: 0.6644322112397286, iteration: 309418
loss: 1.0098154544830322,grad_norm: 0.9999993324699874, iteration: 309419
loss: 0.9868239164352417,grad_norm: 0.8640724593769036, iteration: 309420
loss: 0.987201452255249,grad_norm: 0.9949500657931625, iteration: 309421
loss: 1.043960452079773,grad_norm: 0.9077212294878598, iteration: 309422
loss: 1.0395336151123047,grad_norm: 0.9999998543857408, iteration: 309423
loss: 0.9934362173080444,grad_norm: 0.7460547726813823, iteration: 309424
loss: 1.0308853387832642,grad_norm: 0.8760770082643033, iteration: 309425
loss: 0.9989815354347229,grad_norm: 0.9999990663707774, iteration: 309426
loss: 1.0660136938095093,grad_norm: 0.9999996730140779, iteration: 309427
loss: 1.0080639123916626,grad_norm: 0.9414943923068881, iteration: 309428
loss: 1.0933696031570435,grad_norm: 0.999999161128817, iteration: 309429
loss: 0.9539338946342468,grad_norm: 0.7471049072303223, iteration: 309430
loss: 1.0082659721374512,grad_norm: 0.7784053678229654, iteration: 309431
loss: 0.9794623255729675,grad_norm: 0.9927922121182169, iteration: 309432
loss: 1.0148694515228271,grad_norm: 0.9915267531244273, iteration: 309433
loss: 0.9580296874046326,grad_norm: 0.7915807812238522, iteration: 309434
loss: 1.069567322731018,grad_norm: 0.999999882378747, iteration: 309435
loss: 0.9899879693984985,grad_norm: 0.7960037447480525, iteration: 309436
loss: 1.0813398361206055,grad_norm: 0.9046783709931719, iteration: 309437
loss: 0.9615115523338318,grad_norm: 0.8273447627259447, iteration: 309438
loss: 0.9870659112930298,grad_norm: 0.999999151348498, iteration: 309439
loss: 1.0905870199203491,grad_norm: 0.9999996528811482, iteration: 309440
loss: 1.026046633720398,grad_norm: 0.7586734655509401, iteration: 309441
loss: 1.0174130201339722,grad_norm: 0.9999990757495024, iteration: 309442
loss: 1.0072894096374512,grad_norm: 0.9940987403716721, iteration: 309443
loss: 0.9948795437812805,grad_norm: 0.8120590323284681, iteration: 309444
loss: 0.9947693943977356,grad_norm: 0.9891519047486259, iteration: 309445
loss: 0.9445629715919495,grad_norm: 0.999999012751863, iteration: 309446
loss: 0.9896457195281982,grad_norm: 0.8830509588146638, iteration: 309447
loss: 1.0165506601333618,grad_norm: 0.9182057869472185, iteration: 309448
loss: 1.0042763948440552,grad_norm: 0.9999999800189135, iteration: 309449
loss: 0.9989433288574219,grad_norm: 0.9414787436096462, iteration: 309450
loss: 0.9884719252586365,grad_norm: 0.9531424391093508, iteration: 309451
loss: 0.9533734321594238,grad_norm: 0.787339930686113, iteration: 309452
loss: 0.9869365692138672,grad_norm: 0.8656357659662578, iteration: 309453
loss: 1.0928412675857544,grad_norm: 0.8131330750566718, iteration: 309454
loss: 1.0091906785964966,grad_norm: 0.9301187275969524, iteration: 309455
loss: 1.026633620262146,grad_norm: 0.7832915769583783, iteration: 309456
loss: 1.0604826211929321,grad_norm: 0.9999997387912162, iteration: 309457
loss: 1.0413126945495605,grad_norm: 0.9378686369890574, iteration: 309458
loss: 1.0052168369293213,grad_norm: 0.9642589872536399, iteration: 309459
loss: 0.9886615872383118,grad_norm: 0.9570122060858863, iteration: 309460
loss: 1.0238004922866821,grad_norm: 0.999999155660401, iteration: 309461
loss: 0.9834301471710205,grad_norm: 0.7973794808038073, iteration: 309462
loss: 0.9608915448188782,grad_norm: 0.9040649413196921, iteration: 309463
loss: 0.999407947063446,grad_norm: 0.8321684251077992, iteration: 309464
loss: 1.0479334592819214,grad_norm: 0.9999991263441207, iteration: 309465
loss: 0.999294102191925,grad_norm: 0.9519865122724358, iteration: 309466
loss: 0.9997822642326355,grad_norm: 0.9146737981913875, iteration: 309467
loss: 1.0160678625106812,grad_norm: 0.7940548351694079, iteration: 309468
loss: 0.993896484375,grad_norm: 0.8385229572068357, iteration: 309469
loss: 0.983127236366272,grad_norm: 0.7829353798840734, iteration: 309470
loss: 1.0588531494140625,grad_norm: 0.9999992162107274, iteration: 309471
loss: 1.0052692890167236,grad_norm: 0.8810571456218587, iteration: 309472
loss: 0.9918895363807678,grad_norm: 0.8623224313304967, iteration: 309473
loss: 1.0089730024337769,grad_norm: 0.948957372289892, iteration: 309474
loss: 1.0209935903549194,grad_norm: 0.9999991872511057, iteration: 309475
loss: 0.9372937679290771,grad_norm: 0.7501795910050227, iteration: 309476
loss: 1.0077162981033325,grad_norm: 0.9999993145140291, iteration: 309477
loss: 1.045665979385376,grad_norm: 0.9999997874293812, iteration: 309478
loss: 1.0400508642196655,grad_norm: 0.9999995032902458, iteration: 309479
loss: 1.0684763193130493,grad_norm: 0.9999991829072272, iteration: 309480
loss: 0.9914152026176453,grad_norm: 0.818931043596755, iteration: 309481
loss: 0.996019721031189,grad_norm: 0.9118505987236674, iteration: 309482
loss: 1.0846225023269653,grad_norm: 0.923744397030051, iteration: 309483
loss: 1.0374057292938232,grad_norm: 0.9060023189451166, iteration: 309484
loss: 1.0087441205978394,grad_norm: 0.719561189227843, iteration: 309485
loss: 1.005228042602539,grad_norm: 0.9766561252803697, iteration: 309486
loss: 1.0053050518035889,grad_norm: 0.9999995455402081, iteration: 309487
loss: 1.001844882965088,grad_norm: 0.8799541819121686, iteration: 309488
loss: 1.0158265829086304,grad_norm: 0.9999992087454899, iteration: 309489
loss: 0.9936574697494507,grad_norm: 0.8152612618963643, iteration: 309490
loss: 0.925416111946106,grad_norm: 0.9344099512163374, iteration: 309491
loss: 1.0314507484436035,grad_norm: 0.9651866429989787, iteration: 309492
loss: 1.0471974611282349,grad_norm: 0.999999267070091, iteration: 309493
loss: 1.0236469507217407,grad_norm: 0.9143972899538918, iteration: 309494
loss: 1.042049765586853,grad_norm: 0.8658803792553501, iteration: 309495
loss: 1.064759373664856,grad_norm: 0.9310101482846286, iteration: 309496
loss: 0.9988067746162415,grad_norm: 0.7227185373955397, iteration: 309497
loss: 1.0599050521850586,grad_norm: 0.8847990432366893, iteration: 309498
loss: 1.024827480316162,grad_norm: 0.9999994617690073, iteration: 309499
loss: 1.0365333557128906,grad_norm: 0.8654565358745899, iteration: 309500
loss: 1.0019214153289795,grad_norm: 0.7625987107342738, iteration: 309501
loss: 1.0411560535430908,grad_norm: 0.9430338292292306, iteration: 309502
loss: 1.0832494497299194,grad_norm: 0.9999996749271216, iteration: 309503
loss: 0.9806563258171082,grad_norm: 0.8831739612225542, iteration: 309504
loss: 1.0139209032058716,grad_norm: 0.999999228483444, iteration: 309505
loss: 1.0139765739440918,grad_norm: 0.9999996893323205, iteration: 309506
loss: 0.9819554090499878,grad_norm: 0.8303249364527171, iteration: 309507
loss: 1.0420390367507935,grad_norm: 0.812536826512624, iteration: 309508
loss: 1.0069427490234375,grad_norm: 0.8205827932647889, iteration: 309509
loss: 1.0023980140686035,grad_norm: 0.8188489397446663, iteration: 309510
loss: 0.9749603867530823,grad_norm: 0.99999961897901, iteration: 309511
loss: 1.0107349157333374,grad_norm: 0.9414398775881168, iteration: 309512
loss: 1.0347822904586792,grad_norm: 0.8879062082314598, iteration: 309513
loss: 1.0180492401123047,grad_norm: 0.9999991794090318, iteration: 309514
loss: 1.0024378299713135,grad_norm: 0.9471168433378773, iteration: 309515
loss: 0.9922457933425903,grad_norm: 0.9999992872183998, iteration: 309516
loss: 0.9815425276756287,grad_norm: 0.9999996982076593, iteration: 309517
loss: 0.984204113483429,grad_norm: 0.7941535155941184, iteration: 309518
loss: 0.988388180732727,grad_norm: 0.8745268703969739, iteration: 309519
loss: 1.0212429761886597,grad_norm: 0.7795998966283546, iteration: 309520
loss: 1.0283151865005493,grad_norm: 0.9517635915280227, iteration: 309521
loss: 1.0060960054397583,grad_norm: 0.8164331032018496, iteration: 309522
loss: 0.9552560448646545,grad_norm: 0.8779848861344344, iteration: 309523
loss: 0.9479427337646484,grad_norm: 0.9107895399993644, iteration: 309524
loss: 1.0115375518798828,grad_norm: 0.9647210114492356, iteration: 309525
loss: 1.009131908416748,grad_norm: 0.7312078577932769, iteration: 309526
loss: 0.9962646961212158,grad_norm: 0.9999997654414848, iteration: 309527
loss: 0.9558666348457336,grad_norm: 0.9685658630594534, iteration: 309528
loss: 1.0064924955368042,grad_norm: 0.7430071133343034, iteration: 309529
loss: 1.0283656120300293,grad_norm: 0.9999999407036971, iteration: 309530
loss: 1.0075063705444336,grad_norm: 0.9228544886896659, iteration: 309531
loss: 0.990479588508606,grad_norm: 0.886538701332862, iteration: 309532
loss: 1.0075063705444336,grad_norm: 0.8308883977452807, iteration: 309533
loss: 1.0227657556533813,grad_norm: 0.9059583888363235, iteration: 309534
loss: 0.984862208366394,grad_norm: 0.7640184896073137, iteration: 309535
loss: 1.0247353315353394,grad_norm: 0.8970061237447733, iteration: 309536
loss: 1.0000510215759277,grad_norm: 0.9657887981498888, iteration: 309537
loss: 1.0800890922546387,grad_norm: 0.9999993083885581, iteration: 309538
loss: 0.9857944250106812,grad_norm: 0.9999991724197692, iteration: 309539
loss: 0.9809454679489136,grad_norm: 0.811168540347841, iteration: 309540
loss: 0.978929877281189,grad_norm: 0.9995570413102526, iteration: 309541
loss: 1.096726655960083,grad_norm: 0.9999996176572192, iteration: 309542
loss: 1.016234040260315,grad_norm: 0.7622723079857043, iteration: 309543
loss: 0.9930339455604553,grad_norm: 0.9999989651151951, iteration: 309544
loss: 1.049067735671997,grad_norm: 0.8260462799938992, iteration: 309545
loss: 0.9309945702552795,grad_norm: 0.8614311937776639, iteration: 309546
loss: 1.0016322135925293,grad_norm: 0.8077626555817586, iteration: 309547
loss: 0.9915332794189453,grad_norm: 0.9999990793211947, iteration: 309548
loss: 1.015942096710205,grad_norm: 0.9999992568579198, iteration: 309549
loss: 0.9588010907173157,grad_norm: 0.8602513902411308, iteration: 309550
loss: 0.993966817855835,grad_norm: 0.8786194105088163, iteration: 309551
loss: 1.0352593660354614,grad_norm: 0.9999991057794692, iteration: 309552
loss: 0.9687113761901855,grad_norm: 0.767868687872639, iteration: 309553
loss: 1.0045546293258667,grad_norm: 0.9046224018152396, iteration: 309554
loss: 0.9619341492652893,grad_norm: 0.8238635679517905, iteration: 309555
loss: 1.0047630071640015,grad_norm: 0.9161148103767328, iteration: 309556
loss: 0.9952998757362366,grad_norm: 0.8854777847666664, iteration: 309557
loss: 0.9908668994903564,grad_norm: 0.8313177838986551, iteration: 309558
loss: 1.069137692451477,grad_norm: 0.9999995755838104, iteration: 309559
loss: 1.0054062604904175,grad_norm: 0.9999999377862842, iteration: 309560
loss: 0.9599612355232239,grad_norm: 0.7604131696614503, iteration: 309561
loss: 0.9738142490386963,grad_norm: 0.9999994697945499, iteration: 309562
loss: 1.0182690620422363,grad_norm: 0.8110813525254746, iteration: 309563
loss: 1.0294415950775146,grad_norm: 0.7843006206506348, iteration: 309564
loss: 0.9695879220962524,grad_norm: 0.9999991191101947, iteration: 309565
loss: 1.0332850217819214,grad_norm: 0.8447013630855934, iteration: 309566
loss: 1.0320398807525635,grad_norm: 0.776423861359427, iteration: 309567
loss: 1.0473066568374634,grad_norm: 0.9999990779108527, iteration: 309568
loss: 1.007204532623291,grad_norm: 0.7749339914866672, iteration: 309569
loss: 0.9877930879592896,grad_norm: 0.9552221503614913, iteration: 309570
loss: 1.0440605878829956,grad_norm: 0.8452349892690826, iteration: 309571
loss: 0.9997234344482422,grad_norm: 0.7472379089797566, iteration: 309572
loss: 0.9681127667427063,grad_norm: 0.696824218378538, iteration: 309573
loss: 1.0597987174987793,grad_norm: 0.9999994429124396, iteration: 309574
loss: 1.0094457864761353,grad_norm: 0.9208171094654709, iteration: 309575
loss: 1.0297561883926392,grad_norm: 0.9530812114470998, iteration: 309576
loss: 0.9951474070549011,grad_norm: 0.8272240919628854, iteration: 309577
loss: 1.001774787902832,grad_norm: 0.759951114167923, iteration: 309578
loss: 0.964939534664154,grad_norm: 0.7321263057168588, iteration: 309579
loss: 0.9820259809494019,grad_norm: 0.9999991443434919, iteration: 309580
loss: 0.998992919921875,grad_norm: 0.9165789855179856, iteration: 309581
loss: 0.9719580411911011,grad_norm: 0.8051539922926514, iteration: 309582
loss: 0.9994959831237793,grad_norm: 0.999999574987342, iteration: 309583
loss: 1.0619345903396606,grad_norm: 0.999999243171276, iteration: 309584
loss: 0.9785121083259583,grad_norm: 0.8476019203930669, iteration: 309585
loss: 1.0206725597381592,grad_norm: 0.8650435625197953, iteration: 309586
loss: 1.0083781480789185,grad_norm: 0.9042628209700159, iteration: 309587
loss: 1.0458916425704956,grad_norm: 0.998341393320439, iteration: 309588
loss: 1.1823197603225708,grad_norm: 0.999999319399296, iteration: 309589
loss: 0.9948122501373291,grad_norm: 0.78547317133313, iteration: 309590
loss: 0.9577938914299011,grad_norm: 0.9652901608988289, iteration: 309591
loss: 1.0057228803634644,grad_norm: 0.9999991595693961, iteration: 309592
loss: 0.9898563623428345,grad_norm: 0.7994465853940086, iteration: 309593
loss: 0.9687840938568115,grad_norm: 0.9153357815132397, iteration: 309594
loss: 0.997880220413208,grad_norm: 0.7439621398456202, iteration: 309595
loss: 1.0157432556152344,grad_norm: 0.999999435332302, iteration: 309596
loss: 0.9858624935150146,grad_norm: 0.9066935072731137, iteration: 309597
loss: 1.0241342782974243,grad_norm: 0.9999996952560952, iteration: 309598
loss: 1.0036197900772095,grad_norm: 0.850496577065992, iteration: 309599
loss: 1.0019917488098145,grad_norm: 0.9659648676687722, iteration: 309600
loss: 1.0096383094787598,grad_norm: 0.8347593679002785, iteration: 309601
loss: 0.9940392971038818,grad_norm: 0.8521976103807557, iteration: 309602
loss: 1.0022870302200317,grad_norm: 0.9999991500219255, iteration: 309603
loss: 0.9897388219833374,grad_norm: 1.0000000640403603, iteration: 309604
loss: 1.0213419198989868,grad_norm: 0.9999989896487337, iteration: 309605
loss: 1.0268752574920654,grad_norm: 0.9134739433046257, iteration: 309606
loss: 1.0438050031661987,grad_norm: 0.8960525011412144, iteration: 309607
loss: 1.0606881380081177,grad_norm: 0.8753164094137398, iteration: 309608
loss: 0.9827744364738464,grad_norm: 0.9999998995690128, iteration: 309609
loss: 1.0820444822311401,grad_norm: 0.9999998334527335, iteration: 309610
loss: 1.0187554359436035,grad_norm: 0.7980000383590076, iteration: 309611
loss: 0.9866834282875061,grad_norm: 0.8503414796996805, iteration: 309612
loss: 1.0411396026611328,grad_norm: 0.9999998075840243, iteration: 309613
loss: 1.0616590976715088,grad_norm: 0.8077221653329641, iteration: 309614
loss: 0.9900578260421753,grad_norm: 0.8316225581690793, iteration: 309615
loss: 0.9932963848114014,grad_norm: 0.7113743360821165, iteration: 309616
loss: 0.9869322180747986,grad_norm: 0.8995561195799097, iteration: 309617
loss: 1.0380828380584717,grad_norm: 0.999999147607652, iteration: 309618
loss: 1.0332856178283691,grad_norm: 0.9999992828778207, iteration: 309619
loss: 0.9783879518508911,grad_norm: 0.8935529183660201, iteration: 309620
loss: 1.0600266456604004,grad_norm: 0.9999997966871474, iteration: 309621
loss: 0.9969812631607056,grad_norm: 0.999998989780427, iteration: 309622
loss: 0.9914954900741577,grad_norm: 0.7597414266300448, iteration: 309623
loss: 1.0035600662231445,grad_norm: 0.7553973449878468, iteration: 309624
loss: 0.9935685992240906,grad_norm: 0.7514012838502542, iteration: 309625
loss: 0.9894054532051086,grad_norm: 0.7587881910295692, iteration: 309626
loss: 1.0133639574050903,grad_norm: 0.7891260269053149, iteration: 309627
loss: 1.0236077308654785,grad_norm: 0.9999989804804444, iteration: 309628
loss: 1.0877004861831665,grad_norm: 0.999232215521512, iteration: 309629
loss: 1.032772183418274,grad_norm: 0.9999991838359262, iteration: 309630
loss: 0.9983548521995544,grad_norm: 0.8387882798559583, iteration: 309631
loss: 1.0993187427520752,grad_norm: 0.9999991098357377, iteration: 309632
loss: 1.1265562772750854,grad_norm: 0.9999990731679286, iteration: 309633
loss: 1.0429863929748535,grad_norm: 0.999999258100526, iteration: 309634
loss: 0.9939238429069519,grad_norm: 0.8933214579618971, iteration: 309635
loss: 1.0062750577926636,grad_norm: 0.6321033210182035, iteration: 309636
loss: 1.0143462419509888,grad_norm: 0.899396790930421, iteration: 309637
loss: 1.0252212285995483,grad_norm: 0.999999656408088, iteration: 309638
loss: 0.9905667304992676,grad_norm: 0.9134635616987813, iteration: 309639
loss: 1.0412598848342896,grad_norm: 1.0000000340236614, iteration: 309640
loss: 1.0209589004516602,grad_norm: 0.7828325451734446, iteration: 309641
loss: 0.9755451679229736,grad_norm: 0.8374767310179736, iteration: 309642
loss: 1.056814432144165,grad_norm: 0.9999990599162241, iteration: 309643
loss: 1.0651153326034546,grad_norm: 0.964540834844689, iteration: 309644
loss: 1.0482087135314941,grad_norm: 0.9999991257957122, iteration: 309645
loss: 0.9901201725006104,grad_norm: 0.9235793625768529, iteration: 309646
loss: 1.0285292863845825,grad_norm: 0.999999145232097, iteration: 309647
loss: 0.9703052043914795,grad_norm: 0.9999990742492691, iteration: 309648
loss: 1.0989925861358643,grad_norm: 0.9435323200393605, iteration: 309649
loss: 1.009594440460205,grad_norm: 0.9308481637274104, iteration: 309650
loss: 1.0493170022964478,grad_norm: 0.9999991342648569, iteration: 309651
loss: 1.070707082748413,grad_norm: 0.8829362337473615, iteration: 309652
loss: 0.9880369305610657,grad_norm: 0.9609942616522585, iteration: 309653
loss: 0.9973135590553284,grad_norm: 0.9480782946694996, iteration: 309654
loss: 1.1034613847732544,grad_norm: 0.9999991408410444, iteration: 309655
loss: 0.9945181012153625,grad_norm: 0.9120783169706506, iteration: 309656
loss: 1.0854178667068481,grad_norm: 0.9999999703105493, iteration: 309657
loss: 1.0125044584274292,grad_norm: 0.9999991766451429, iteration: 309658
loss: 1.0495655536651611,grad_norm: 0.9999996402703325, iteration: 309659
loss: 1.0555533170700073,grad_norm: 0.9999995708383499, iteration: 309660
loss: 1.0346647500991821,grad_norm: 0.8702145994729708, iteration: 309661
loss: 1.0169050693511963,grad_norm: 0.8171261126461666, iteration: 309662
loss: 1.0136204957962036,grad_norm: 0.7864288454416969, iteration: 309663
loss: 1.0014333724975586,grad_norm: 0.9758546362378179, iteration: 309664
loss: 1.0180662870407104,grad_norm: 0.9207747545030265, iteration: 309665
loss: 1.0242352485656738,grad_norm: 0.9999998374366241, iteration: 309666
loss: 1.0534257888793945,grad_norm: 0.999999572411013, iteration: 309667
loss: 1.021816372871399,grad_norm: 0.9299296751747536, iteration: 309668
loss: 0.9869338870048523,grad_norm: 0.7394554827360059, iteration: 309669
loss: 1.025591254234314,grad_norm: 0.8105590850770469, iteration: 309670
loss: 1.0168105363845825,grad_norm: 0.8591815756253288, iteration: 309671
loss: 1.005276083946228,grad_norm: 0.8677783438990391, iteration: 309672
loss: 1.0260322093963623,grad_norm: 0.9999992915609486, iteration: 309673
loss: 0.995685338973999,grad_norm: 0.8978936389138644, iteration: 309674
loss: 1.1470292806625366,grad_norm: 0.9431077867615468, iteration: 309675
loss: 1.0100711584091187,grad_norm: 0.8620990592904186, iteration: 309676
loss: 1.080776333808899,grad_norm: 0.9822807788493261, iteration: 309677
loss: 0.9837568402290344,grad_norm: 0.9999990694122505, iteration: 309678
loss: 1.0326638221740723,grad_norm: 0.9999991617622126, iteration: 309679
loss: 0.9879148602485657,grad_norm: 0.8512805411894317, iteration: 309680
loss: 1.0524286031723022,grad_norm: 0.9999994087920503, iteration: 309681
loss: 1.0146653652191162,grad_norm: 0.7011461129212109, iteration: 309682
loss: 0.9854989647865295,grad_norm: 0.991081672667885, iteration: 309683
loss: 1.0001450777053833,grad_norm: 0.9999995089238102, iteration: 309684
loss: 1.0383936166763306,grad_norm: 0.7717745273716681, iteration: 309685
loss: 0.9867013096809387,grad_norm: 0.7882034775770804, iteration: 309686
loss: 1.1394215822219849,grad_norm: 0.9638332007529239, iteration: 309687
loss: 0.9725615978240967,grad_norm: 0.9999991541095246, iteration: 309688
loss: 0.9993917942047119,grad_norm: 0.8591941702040063, iteration: 309689
loss: 0.9972766041755676,grad_norm: 0.9399783839980721, iteration: 309690
loss: 0.9908742904663086,grad_norm: 0.97084382398969, iteration: 309691
loss: 0.9614743590354919,grad_norm: 0.844916636836409, iteration: 309692
loss: 0.9969117641448975,grad_norm: 0.7368540330379117, iteration: 309693
loss: 1.0206092596054077,grad_norm: 0.8175883805500231, iteration: 309694
loss: 1.035560131072998,grad_norm: 0.8273662178191564, iteration: 309695
loss: 1.0177627801895142,grad_norm: 0.8430602035537121, iteration: 309696
loss: 0.9898891448974609,grad_norm: 0.7710480335959496, iteration: 309697
loss: 1.025200605392456,grad_norm: 0.9128177793133929, iteration: 309698
loss: 1.003746747970581,grad_norm: 0.7764590116557302, iteration: 309699
loss: 1.0131444931030273,grad_norm: 0.8331143808087544, iteration: 309700
loss: 0.9687341451644897,grad_norm: 0.9682788274129348, iteration: 309701
loss: 0.9769452810287476,grad_norm: 0.9999990896661335, iteration: 309702
loss: 0.9979281425476074,grad_norm: 0.7682691299065658, iteration: 309703
loss: 0.9974933862686157,grad_norm: 0.9508102825599211, iteration: 309704
loss: 1.0022633075714111,grad_norm: 0.8343126608280355, iteration: 309705
loss: 1.032766342163086,grad_norm: 0.999999646144076, iteration: 309706
loss: 0.9669012427330017,grad_norm: 0.7495132715147322, iteration: 309707
loss: 0.9729515910148621,grad_norm: 0.8836597136346085, iteration: 309708
loss: 1.0503156185150146,grad_norm: 0.9999996497627266, iteration: 309709
loss: 1.0051560401916504,grad_norm: 0.9999996060022254, iteration: 309710
loss: 1.031875491142273,grad_norm: 0.9999993380788142, iteration: 309711
loss: 0.9992296099662781,grad_norm: 0.8242413201756529, iteration: 309712
loss: 1.009629249572754,grad_norm: 0.9999992058855137, iteration: 309713
loss: 0.9708376526832581,grad_norm: 0.7008580697117279, iteration: 309714
loss: 1.069288730621338,grad_norm: 0.9999990446369144, iteration: 309715
loss: 1.028027057647705,grad_norm: 0.938327040854521, iteration: 309716
loss: 1.034126877784729,grad_norm: 0.9999993850654475, iteration: 309717
loss: 0.9871131777763367,grad_norm: 0.9959094587879589, iteration: 309718
loss: 1.0165632963180542,grad_norm: 0.8706118580790058, iteration: 309719
loss: 1.0239737033843994,grad_norm: 0.8216993442558179, iteration: 309720
loss: 0.9893909692764282,grad_norm: 0.9293093391749359, iteration: 309721
loss: 1.0276626348495483,grad_norm: 0.9999996366778815, iteration: 309722
loss: 1.0988391637802124,grad_norm: 0.8059294223585534, iteration: 309723
loss: 0.9966764450073242,grad_norm: 0.831405339518618, iteration: 309724
loss: 1.0672447681427002,grad_norm: 0.7824175565629385, iteration: 309725
loss: 1.015700101852417,grad_norm: 0.7878795139270284, iteration: 309726
loss: 0.996266782283783,grad_norm: 0.7444917966994276, iteration: 309727
loss: 0.9792520403862,grad_norm: 0.9201546665101806, iteration: 309728
loss: 1.0112907886505127,grad_norm: 0.8693704901361958, iteration: 309729
loss: 0.9798752665519714,grad_norm: 0.7354644317616045, iteration: 309730
loss: 1.0022507905960083,grad_norm: 0.7128716292026732, iteration: 309731
loss: 0.9889755249023438,grad_norm: 0.9099469906270911, iteration: 309732
loss: 1.035249948501587,grad_norm: 0.9999997311208435, iteration: 309733
loss: 1.0366425514221191,grad_norm: 0.8135933319781828, iteration: 309734
loss: 0.9820162057876587,grad_norm: 0.8870629752079134, iteration: 309735
loss: 0.9770402312278748,grad_norm: 0.8256208868143768, iteration: 309736
loss: 0.996235728263855,grad_norm: 0.9250211572880788, iteration: 309737
loss: 0.9948639273643494,grad_norm: 0.932468905204679, iteration: 309738
loss: 0.9539440870285034,grad_norm: 0.9999990282805691, iteration: 309739
loss: 1.0198255777359009,grad_norm: 0.8369088784119492, iteration: 309740
loss: 0.9984357953071594,grad_norm: 0.8189535706297848, iteration: 309741
loss: 1.010564923286438,grad_norm: 0.8030061814738053, iteration: 309742
loss: 0.9773978590965271,grad_norm: 0.9300020440346904, iteration: 309743
loss: 1.0169724225997925,grad_norm: 0.9673439553328913, iteration: 309744
loss: 1.1437958478927612,grad_norm: 0.9999993254190884, iteration: 309745
loss: 1.0112141370773315,grad_norm: 0.7481882856064679, iteration: 309746
loss: 0.9404197335243225,grad_norm: 0.8906175325676108, iteration: 309747
loss: 1.0294727087020874,grad_norm: 0.9631885912982362, iteration: 309748
loss: 0.9940631985664368,grad_norm: 0.6608304699522124, iteration: 309749
loss: 0.9771401286125183,grad_norm: 0.7018707530115788, iteration: 309750
loss: 1.026520013809204,grad_norm: 0.9062581966484926, iteration: 309751
loss: 1.0327998399734497,grad_norm: 0.9999995329327781, iteration: 309752
loss: 1.0066299438476562,grad_norm: 0.9442765828286112, iteration: 309753
loss: 0.9886218905448914,grad_norm: 0.8786738773431622, iteration: 309754
loss: 1.0023809671401978,grad_norm: 0.8303790509091562, iteration: 309755
loss: 0.9999277591705322,grad_norm: 0.9107872363290598, iteration: 309756
loss: 1.0790760517120361,grad_norm: 0.9999998825053253, iteration: 309757
loss: 0.9991868734359741,grad_norm: 0.8866067210776962, iteration: 309758
loss: 0.9708451628684998,grad_norm: 0.8032900793846222, iteration: 309759
loss: 0.9842246770858765,grad_norm: 0.8584083875278403, iteration: 309760
loss: 1.0296739339828491,grad_norm: 0.8574316159911667, iteration: 309761
loss: 1.028802752494812,grad_norm: 0.9999995350070574, iteration: 309762
loss: 1.0155051946640015,grad_norm: 0.8909827644389154, iteration: 309763
loss: 1.0783555507659912,grad_norm: 0.9999995765518177, iteration: 309764
loss: 1.0451197624206543,grad_norm: 0.9999998118779643, iteration: 309765
loss: 0.9879595637321472,grad_norm: 0.8599728696637453, iteration: 309766
loss: 1.0228996276855469,grad_norm: 0.9075816636274945, iteration: 309767
loss: 1.0130053758621216,grad_norm: 0.999999117947424, iteration: 309768
loss: 0.990166187286377,grad_norm: 0.8644809577710064, iteration: 309769
loss: 0.9997301697731018,grad_norm: 0.8733919707651261, iteration: 309770
loss: 0.9639727473258972,grad_norm: 0.7721873372215995, iteration: 309771
loss: 0.9886445999145508,grad_norm: 0.851846655142056, iteration: 309772
loss: 1.0188798904418945,grad_norm: 0.908215252282701, iteration: 309773
loss: 0.9931892156600952,grad_norm: 0.7356589198677106, iteration: 309774
loss: 1.0308047533035278,grad_norm: 0.8231795304422698, iteration: 309775
loss: 0.9861006140708923,grad_norm: 0.940454169840655, iteration: 309776
loss: 0.9910227060317993,grad_norm: 0.8837804809064185, iteration: 309777
loss: 0.9879192113876343,grad_norm: 0.8757047803158546, iteration: 309778
loss: 1.0192533731460571,grad_norm: 0.9090968964780782, iteration: 309779
loss: 1.0002294778823853,grad_norm: 0.7699867106959963, iteration: 309780
loss: 0.9696293473243713,grad_norm: 0.8611988622309442, iteration: 309781
loss: 1.0065075159072876,grad_norm: 0.9194721467740947, iteration: 309782
loss: 1.0028127431869507,grad_norm: 0.999998987146384, iteration: 309783
loss: 0.990913987159729,grad_norm: 0.7495248916235081, iteration: 309784
loss: 1.090491533279419,grad_norm: 0.9248992108192721, iteration: 309785
loss: 1.004865050315857,grad_norm: 0.8503657679381584, iteration: 309786
loss: 1.001226782798767,grad_norm: 0.633436903780962, iteration: 309787
loss: 0.9794928431510925,grad_norm: 0.7887143139452145, iteration: 309788
loss: 1.0341633558273315,grad_norm: 0.9999997292620547, iteration: 309789
loss: 0.9586012959480286,grad_norm: 0.7887801103872182, iteration: 309790
loss: 0.9910598993301392,grad_norm: 0.83193322690785, iteration: 309791
loss: 0.9979450106620789,grad_norm: 0.7718835514177155, iteration: 309792
loss: 0.9697309732437134,grad_norm: 0.7829272103927206, iteration: 309793
loss: 1.0127458572387695,grad_norm: 0.9197350653593924, iteration: 309794
loss: 1.0056747198104858,grad_norm: 0.8494140323762717, iteration: 309795
loss: 1.0608323812484741,grad_norm: 0.9999996324674718, iteration: 309796
loss: 1.0096383094787598,grad_norm: 0.8138967853406037, iteration: 309797
loss: 0.9712702631950378,grad_norm: 0.9999991667762956, iteration: 309798
loss: 1.0056270360946655,grad_norm: 0.753875003235812, iteration: 309799
loss: 1.0231554508209229,grad_norm: 0.8052139629876048, iteration: 309800
loss: 1.033087134361267,grad_norm: 0.9496588573245476, iteration: 309801
loss: 1.0441948175430298,grad_norm: 0.999999106932909, iteration: 309802
loss: 0.9738855361938477,grad_norm: 0.8010779172216567, iteration: 309803
loss: 1.0430810451507568,grad_norm: 0.7965313419913482, iteration: 309804
loss: 1.0080900192260742,grad_norm: 0.9999995355023259, iteration: 309805
loss: 1.028922438621521,grad_norm: 0.9999992687557394, iteration: 309806
loss: 1.0033875703811646,grad_norm: 0.9038774104955998, iteration: 309807
loss: 1.0432864427566528,grad_norm: 0.9516128920227026, iteration: 309808
loss: 0.9577755928039551,grad_norm: 0.8129046664753937, iteration: 309809
loss: 1.1123608350753784,grad_norm: 0.8896576012649716, iteration: 309810
loss: 1.0594381093978882,grad_norm: 0.8639029690322756, iteration: 309811
loss: 1.0149450302124023,grad_norm: 0.8756381171864049, iteration: 309812
loss: 0.988497257232666,grad_norm: 0.806682962591008, iteration: 309813
loss: 1.031411051750183,grad_norm: 0.9792666685374276, iteration: 309814
loss: 1.0375394821166992,grad_norm: 0.6839912183731691, iteration: 309815
loss: 1.0146372318267822,grad_norm: 0.7283489464051176, iteration: 309816
loss: 1.0308464765548706,grad_norm: 0.9026678608866061, iteration: 309817
loss: 1.0530827045440674,grad_norm: 0.9226251676938455, iteration: 309818
loss: 1.0260305404663086,grad_norm: 0.7460078446771756, iteration: 309819
loss: 0.9730063676834106,grad_norm: 0.825608419589966, iteration: 309820
loss: 1.0137929916381836,grad_norm: 0.8933432551932846, iteration: 309821
loss: 1.020819067955017,grad_norm: 0.9239827907097399, iteration: 309822
loss: 0.9883268475532532,grad_norm: 0.7881120122200501, iteration: 309823
loss: 1.0100443363189697,grad_norm: 0.8285145654441154, iteration: 309824
loss: 0.9914543032646179,grad_norm: 0.9999991157529271, iteration: 309825
loss: 1.0607930421829224,grad_norm: 0.9999997143766385, iteration: 309826
loss: 1.0179715156555176,grad_norm: 0.754835176881045, iteration: 309827
loss: 0.9992550015449524,grad_norm: 0.8533742932279583, iteration: 309828
loss: 1.0276508331298828,grad_norm: 0.7450635405924784, iteration: 309829
loss: 0.9981844425201416,grad_norm: 0.735842366072925, iteration: 309830
loss: 0.9722096920013428,grad_norm: 0.7785821264025398, iteration: 309831
loss: 0.9863452911376953,grad_norm: 0.7670284099493747, iteration: 309832
loss: 0.9973397850990295,grad_norm: 0.9999991013918696, iteration: 309833
loss: 0.9710555076599121,grad_norm: 0.8677774459301006, iteration: 309834
loss: 0.9760788679122925,grad_norm: 0.912052381867554, iteration: 309835
loss: 0.9819693565368652,grad_norm: 0.9362555383703249, iteration: 309836
loss: 1.0573350191116333,grad_norm: 0.9999995987673388, iteration: 309837
loss: 0.9791461229324341,grad_norm: 0.9999990447593993, iteration: 309838
loss: 0.9792671799659729,grad_norm: 0.9359547374134489, iteration: 309839
loss: 0.9912763237953186,grad_norm: 0.8989778934978513, iteration: 309840
loss: 0.9811863899230957,grad_norm: 0.9466115549543033, iteration: 309841
loss: 1.0035561323165894,grad_norm: 0.8415984439261887, iteration: 309842
loss: 0.9961037635803223,grad_norm: 0.8762479904126511, iteration: 309843
loss: 1.0241217613220215,grad_norm: 0.8275382679021978, iteration: 309844
loss: 1.0196101665496826,grad_norm: 0.6417907645364295, iteration: 309845
loss: 0.9885281324386597,grad_norm: 0.8164586744366724, iteration: 309846
loss: 1.018324613571167,grad_norm: 0.7318050799153771, iteration: 309847
loss: 1.0091814994812012,grad_norm: 0.6756341234023058, iteration: 309848
loss: 0.9649648070335388,grad_norm: 0.768581588003128, iteration: 309849
loss: 0.9678094387054443,grad_norm: 0.7583145444312801, iteration: 309850
loss: 1.0181260108947754,grad_norm: 0.9999992402799744, iteration: 309851
loss: 1.0145094394683838,grad_norm: 0.7382790665545249, iteration: 309852
loss: 0.9849216938018799,grad_norm: 0.9999995836807148, iteration: 309853
loss: 1.055954933166504,grad_norm: 0.9999996003039052, iteration: 309854
loss: 0.9998242259025574,grad_norm: 0.999999178203858, iteration: 309855
loss: 1.018117070198059,grad_norm: 0.9999993705592723, iteration: 309856
loss: 1.007048487663269,grad_norm: 0.8710189879833623, iteration: 309857
loss: 1.063521385192871,grad_norm: 0.7661547501613898, iteration: 309858
loss: 1.0068145990371704,grad_norm: 0.737386372874007, iteration: 309859
loss: 1.0276010036468506,grad_norm: 0.9999993274068756, iteration: 309860
loss: 1.02492094039917,grad_norm: 0.9981012761457912, iteration: 309861
loss: 0.9972337484359741,grad_norm: 0.8088872749199694, iteration: 309862
loss: 0.9922909140586853,grad_norm: 0.8171705351009062, iteration: 309863
loss: 1.0978260040283203,grad_norm: 0.9999994605650331, iteration: 309864
loss: 0.9991768002510071,grad_norm: 0.7485557582691889, iteration: 309865
loss: 0.9582492709159851,grad_norm: 0.7431422140266677, iteration: 309866
loss: 0.977868914604187,grad_norm: 0.999999107211769, iteration: 309867
loss: 0.9721235036849976,grad_norm: 0.8542647635670283, iteration: 309868
loss: 1.027978539466858,grad_norm: 0.9332502953864604, iteration: 309869
loss: 1.0021862983703613,grad_norm: 0.8114511857950877, iteration: 309870
loss: 1.0313459634780884,grad_norm: 0.9999990798040664, iteration: 309871
loss: 0.9912201762199402,grad_norm: 0.9999991608719752, iteration: 309872
loss: 1.011085867881775,grad_norm: 0.7941043247020234, iteration: 309873
loss: 1.0432941913604736,grad_norm: 0.9999990277699529, iteration: 309874
loss: 0.9736623167991638,grad_norm: 0.8476214800524213, iteration: 309875
loss: 1.028876543045044,grad_norm: 0.999999082541881, iteration: 309876
loss: 1.0486441850662231,grad_norm: 0.8341560097501691, iteration: 309877
loss: 1.0285018682479858,grad_norm: 0.7637411760440729, iteration: 309878
loss: 1.1436465978622437,grad_norm: 0.9999993620720418, iteration: 309879
loss: 1.0117626190185547,grad_norm: 0.9999993438485115, iteration: 309880
loss: 1.072568655014038,grad_norm: 0.9999993014314226, iteration: 309881
loss: 0.9885736703872681,grad_norm: 0.9999991507614248, iteration: 309882
loss: 1.060665488243103,grad_norm: 0.999999614027188, iteration: 309883
loss: 0.9731655716896057,grad_norm: 0.8706000579130546, iteration: 309884
loss: 0.9661495089530945,grad_norm: 0.925385350554581, iteration: 309885
loss: 1.0219850540161133,grad_norm: 0.9999994597190692, iteration: 309886
loss: 0.9731820821762085,grad_norm: 0.8424046161235628, iteration: 309887
loss: 0.9458969235420227,grad_norm: 0.8267105571713484, iteration: 309888
loss: 1.0230712890625,grad_norm: 0.7840550085089976, iteration: 309889
loss: 1.0066614151000977,grad_norm: 0.9099231676934291, iteration: 309890
loss: 1.0065248012542725,grad_norm: 0.9643202307055759, iteration: 309891
loss: 1.0639936923980713,grad_norm: 1.0000000562697453, iteration: 309892
loss: 1.0269453525543213,grad_norm: 0.9999992708522693, iteration: 309893
loss: 1.0130232572555542,grad_norm: 0.7686880481827386, iteration: 309894
loss: 0.9958899021148682,grad_norm: 0.8151765607549278, iteration: 309895
loss: 0.9996721148490906,grad_norm: 0.9273926162135131, iteration: 309896
loss: 0.9861916899681091,grad_norm: 0.8511327049617049, iteration: 309897
loss: 1.0106600522994995,grad_norm: 0.8929289970071085, iteration: 309898
loss: 0.9649109244346619,grad_norm: 0.8559803877319202, iteration: 309899
loss: 0.9889257550239563,grad_norm: 0.9999992580593363, iteration: 309900
loss: 1.020887017250061,grad_norm: 0.8611127299440083, iteration: 309901
loss: 0.9950772523880005,grad_norm: 0.7790066981610694, iteration: 309902
loss: 1.02855384349823,grad_norm: 0.7442873692543542, iteration: 309903
loss: 0.9845843315124512,grad_norm: 0.7664339441101043, iteration: 309904
loss: 0.9769190549850464,grad_norm: 0.9999990306782455, iteration: 309905
loss: 1.0350557565689087,grad_norm: 0.8041607387312407, iteration: 309906
loss: 1.0678964853286743,grad_norm: 0.9747479436117943, iteration: 309907
loss: 0.9782860279083252,grad_norm: 0.9999991220470033, iteration: 309908
loss: 1.004101276397705,grad_norm: 0.7958036118622981, iteration: 309909
loss: 1.0133647918701172,grad_norm: 0.9999992566610554, iteration: 309910
loss: 1.0253890752792358,grad_norm: 0.7465006665342977, iteration: 309911
loss: 0.9850559234619141,grad_norm: 0.9999990031649217, iteration: 309912
loss: 1.0009205341339111,grad_norm: 0.999999205185158, iteration: 309913
loss: 0.9954304695129395,grad_norm: 0.9056004154717965, iteration: 309914
loss: 1.0209167003631592,grad_norm: 0.7138380309990315, iteration: 309915
loss: 0.9811027646064758,grad_norm: 0.7976853639324485, iteration: 309916
loss: 1.0794113874435425,grad_norm: 0.9999998585912979, iteration: 309917
loss: 1.0067310333251953,grad_norm: 0.7656477595649981, iteration: 309918
loss: 0.9738647937774658,grad_norm: 0.7879216871244177, iteration: 309919
loss: 0.999744176864624,grad_norm: 0.933871364012165, iteration: 309920
loss: 0.9724891185760498,grad_norm: 0.8390484848516471, iteration: 309921
loss: 0.9903337955474854,grad_norm: 0.955826734677345, iteration: 309922
loss: 0.9944807887077332,grad_norm: 0.7707275683435563, iteration: 309923
loss: 1.0169792175292969,grad_norm: 0.9999994537167205, iteration: 309924
loss: 1.032306432723999,grad_norm: 0.8305076310897855, iteration: 309925
loss: 1.0510447025299072,grad_norm: 0.9077088766010175, iteration: 309926
loss: 1.0704485177993774,grad_norm: 0.9303342720779142, iteration: 309927
loss: 1.0227108001708984,grad_norm: 0.8829709957889023, iteration: 309928
loss: 0.9924944043159485,grad_norm: 0.8243116782812977, iteration: 309929
loss: 1.0287967920303345,grad_norm: 0.9647323293887071, iteration: 309930
loss: 1.055221676826477,grad_norm: 0.8735870649760817, iteration: 309931
loss: 0.988105833530426,grad_norm: 0.7780248317198638, iteration: 309932
loss: 1.0266343355178833,grad_norm: 0.9628830588720472, iteration: 309933
loss: 1.070354700088501,grad_norm: 0.9999991067650605, iteration: 309934
loss: 0.9866045713424683,grad_norm: 0.8838448429053952, iteration: 309935
loss: 0.9649885892868042,grad_norm: 0.7839769326469603, iteration: 309936
loss: 0.9934831857681274,grad_norm: 0.930090883870279, iteration: 309937
loss: 1.0085002183914185,grad_norm: 0.9932719122752872, iteration: 309938
loss: 0.9934433102607727,grad_norm: 0.6428009118783525, iteration: 309939
loss: 1.02437162399292,grad_norm: 0.7599626515991124, iteration: 309940
loss: 1.0457804203033447,grad_norm: 0.9999990461702347, iteration: 309941
loss: 1.057523250579834,grad_norm: 0.8909495774846043, iteration: 309942
loss: 0.993104875087738,grad_norm: 0.9249739260314562, iteration: 309943
loss: 1.009716510772705,grad_norm: 0.9999993839218753, iteration: 309944
loss: 0.9947296977043152,grad_norm: 0.7872978239755307, iteration: 309945
loss: 0.9911701083183289,grad_norm: 0.7194840133623585, iteration: 309946
loss: 1.0353422164916992,grad_norm: 0.8990552624478555, iteration: 309947
loss: 0.9951289296150208,grad_norm: 0.7311599122775856, iteration: 309948
loss: 0.9940019845962524,grad_norm: 0.7884569629093551, iteration: 309949
loss: 1.0668948888778687,grad_norm: 0.9999996324183172, iteration: 309950
loss: 0.9937795996665955,grad_norm: 0.9726292302874399, iteration: 309951
loss: 1.0032395124435425,grad_norm: 0.8722201333048794, iteration: 309952
loss: 0.9913490414619446,grad_norm: 0.6747686297810493, iteration: 309953
loss: 1.015464425086975,grad_norm: 0.9999996688875, iteration: 309954
loss: 1.011704683303833,grad_norm: 0.9649741674116069, iteration: 309955
loss: 1.0095570087432861,grad_norm: 0.7644973642730374, iteration: 309956
loss: 1.0091959238052368,grad_norm: 0.8642366048966199, iteration: 309957
loss: 0.9890525937080383,grad_norm: 0.8881100653557027, iteration: 309958
loss: 1.007736325263977,grad_norm: 0.9978834407704399, iteration: 309959
loss: 0.9631212949752808,grad_norm: 0.904156369450893, iteration: 309960
loss: 1.01496160030365,grad_norm: 0.7991599667647058, iteration: 309961
loss: 1.0273523330688477,grad_norm: 0.7900493173377965, iteration: 309962
loss: 0.9865690469741821,grad_norm: 0.9999999618466141, iteration: 309963
loss: 0.9989030957221985,grad_norm: 0.8098423705806164, iteration: 309964
loss: 1.1073057651519775,grad_norm: 0.8134818048277677, iteration: 309965
loss: 1.0048543214797974,grad_norm: 0.873834328925817, iteration: 309966
loss: 0.9727770686149597,grad_norm: 0.871730577941844, iteration: 309967
loss: 0.9894152879714966,grad_norm: 0.7208040299161209, iteration: 309968
loss: 1.031516671180725,grad_norm: 0.726439601205904, iteration: 309969
loss: 1.0258203744888306,grad_norm: 0.999999392467411, iteration: 309970
loss: 1.0068877935409546,grad_norm: 0.878615811904974, iteration: 309971
loss: 1.021937370300293,grad_norm: 0.9999990304623845, iteration: 309972
loss: 1.0063180923461914,grad_norm: 0.8163239663519993, iteration: 309973
loss: 0.9896117448806763,grad_norm: 0.7419266963367445, iteration: 309974
loss: 0.964751124382019,grad_norm: 0.7509202055070126, iteration: 309975
loss: 1.0102964639663696,grad_norm: 0.8416605438510898, iteration: 309976
loss: 0.9821024537086487,grad_norm: 0.8285200904663504, iteration: 309977
loss: 1.0155918598175049,grad_norm: 0.7856579684936623, iteration: 309978
loss: 1.0102168321609497,grad_norm: 0.8035494818370272, iteration: 309979
loss: 0.9807766675949097,grad_norm: 0.7368213260508296, iteration: 309980
loss: 0.9965293407440186,grad_norm: 0.8822311028161515, iteration: 309981
loss: 1.0027364492416382,grad_norm: 0.8708107543032333, iteration: 309982
loss: 0.9604517221450806,grad_norm: 0.8745183816944644, iteration: 309983
loss: 0.9890720844268799,grad_norm: 0.9540186419420413, iteration: 309984
loss: 1.0447169542312622,grad_norm: 0.9629893237006302, iteration: 309985
loss: 1.0002373456954956,grad_norm: 0.9999990107392834, iteration: 309986
loss: 1.0187580585479736,grad_norm: 0.8017736838860227, iteration: 309987
loss: 1.0273977518081665,grad_norm: 0.9999993793036819, iteration: 309988
loss: 0.9928672313690186,grad_norm: 0.7175476575624862, iteration: 309989
loss: 1.0015541315078735,grad_norm: 0.89066961206186, iteration: 309990
loss: 0.9771748781204224,grad_norm: 0.65063774846997, iteration: 309991
loss: 0.9999109506607056,grad_norm: 0.9466362952534656, iteration: 309992
loss: 1.0224518775939941,grad_norm: 0.8660273058862711, iteration: 309993
loss: 1.05878484249115,grad_norm: 0.9999999514518046, iteration: 309994
loss: 0.9997778534889221,grad_norm: 0.9701707403563504, iteration: 309995
loss: 0.9559326171875,grad_norm: 0.8242217230415616, iteration: 309996
loss: 1.0195391178131104,grad_norm: 0.7265000218419276, iteration: 309997
loss: 1.0001773834228516,grad_norm: 0.9047991402903767, iteration: 309998
loss: 0.9946439862251282,grad_norm: 0.8653134528792779, iteration: 309999
loss: 1.0845879316329956,grad_norm: 0.9999991967881963, iteration: 310000
Evaluating at step 310000
{'val': 0.9939536619931459, 'test': 2.066127075173984}
loss: 0.9739218354225159,grad_norm: 0.8146058046667327, iteration: 310001
loss: 1.0211162567138672,grad_norm: 0.7720061838529282, iteration: 310002
loss: 0.9928773045539856,grad_norm: 0.8486235327905608, iteration: 310003
loss: 1.0465021133422852,grad_norm: 0.8954487149358449, iteration: 310004
loss: 0.9828062057495117,grad_norm: 0.9608016591493742, iteration: 310005
loss: 0.9994572997093201,grad_norm: 0.9999997974751763, iteration: 310006
loss: 1.0029478073120117,grad_norm: 0.9999991000113714, iteration: 310007
loss: 0.9876937866210938,grad_norm: 0.7590143342308933, iteration: 310008
loss: 1.006284475326538,grad_norm: 0.9830065019956847, iteration: 310009
loss: 1.0104020833969116,grad_norm: 0.8291641966764175, iteration: 310010
loss: 1.0207266807556152,grad_norm: 0.9051480813743309, iteration: 310011
loss: 0.9879469275474548,grad_norm: 0.8933201581297562, iteration: 310012
loss: 1.0605272054672241,grad_norm: 0.999999697057187, iteration: 310013
loss: 0.9768297076225281,grad_norm: 0.8507756519053494, iteration: 310014
loss: 0.9964228868484497,grad_norm: 0.8881249360172352, iteration: 310015
loss: 0.9763342142105103,grad_norm: 0.9974000014275934, iteration: 310016
loss: 1.0310548543930054,grad_norm: 0.7990383299607413, iteration: 310017
loss: 1.0371198654174805,grad_norm: 0.9999992251629722, iteration: 310018
loss: 0.9918667078018188,grad_norm: 0.8410296174083761, iteration: 310019
loss: 1.0495383739471436,grad_norm: 0.9254865522897815, iteration: 310020
loss: 1.1398979425430298,grad_norm: 0.9999991956158426, iteration: 310021
loss: 0.9889051914215088,grad_norm: 0.9999998135777032, iteration: 310022
loss: 1.0023696422576904,grad_norm: 0.7666544016656266, iteration: 310023
loss: 1.0580687522888184,grad_norm: 0.9999995521566408, iteration: 310024
loss: 1.034013032913208,grad_norm: 0.9999991341662795, iteration: 310025
loss: 0.9917594790458679,grad_norm: 0.8608023548099214, iteration: 310026
loss: 1.0896395444869995,grad_norm: 0.9999994585557878, iteration: 310027
loss: 1.070737361907959,grad_norm: 0.8617393218320797, iteration: 310028
loss: 1.011474370956421,grad_norm: 0.8155324134987505, iteration: 310029
loss: 1.0000882148742676,grad_norm: 0.9999990983852318, iteration: 310030
loss: 0.987521231174469,grad_norm: 0.9166974788892255, iteration: 310031
loss: 0.987019956111908,grad_norm: 0.7803617792741495, iteration: 310032
loss: 1.019601583480835,grad_norm: 0.7850118245318088, iteration: 310033
loss: 0.9919440746307373,grad_norm: 0.9999995650445865, iteration: 310034
loss: 0.9560394287109375,grad_norm: 0.8527725479947923, iteration: 310035
loss: 1.0766263008117676,grad_norm: 0.9999991599426209, iteration: 310036
loss: 1.000840187072754,grad_norm: 0.8148717323923027, iteration: 310037
loss: 0.9690570831298828,grad_norm: 0.8221560057827155, iteration: 310038
loss: 1.023812174797058,grad_norm: 0.9711509651347092, iteration: 310039
loss: 0.9963586926460266,grad_norm: 0.8276468796164154, iteration: 310040
loss: 0.9971146583557129,grad_norm: 0.741928970986547, iteration: 310041
loss: 1.0005079507827759,grad_norm: 0.8603941036922244, iteration: 310042
loss: 1.161551833152771,grad_norm: 0.9430130984291912, iteration: 310043
loss: 0.9627259969711304,grad_norm: 0.7108921693900796, iteration: 310044
loss: 1.0140268802642822,grad_norm: 0.999999071268775, iteration: 310045
loss: 1.0024306774139404,grad_norm: 0.6890301323351827, iteration: 310046
loss: 1.0007752180099487,grad_norm: 0.8149993586781421, iteration: 310047
loss: 1.0426641702651978,grad_norm: 0.9999991511985952, iteration: 310048
loss: 1.010988473892212,grad_norm: 0.8683863922924783, iteration: 310049
loss: 0.9931203722953796,grad_norm: 0.7805640107062635, iteration: 310050
loss: 1.0053282976150513,grad_norm: 0.8821328741110203, iteration: 310051
loss: 0.974867045879364,grad_norm: 0.845837783712158, iteration: 310052
loss: 1.0043796300888062,grad_norm: 0.99999912696883, iteration: 310053
loss: 0.9870065450668335,grad_norm: 0.9999991340012201, iteration: 310054
loss: 1.0095402002334595,grad_norm: 0.7977175327179179, iteration: 310055
loss: 1.0435978174209595,grad_norm: 0.9091160489697538, iteration: 310056
loss: 1.3179272413253784,grad_norm: 0.9999998618021769, iteration: 310057
loss: 0.9552534222602844,grad_norm: 0.9762522073270742, iteration: 310058
loss: 1.0263488292694092,grad_norm: 0.9999997129239292, iteration: 310059
loss: 1.0603023767471313,grad_norm: 0.9999997161013476, iteration: 310060
loss: 0.9917600750923157,grad_norm: 0.7448348722304624, iteration: 310061
loss: 1.0757923126220703,grad_norm: 0.999999899409084, iteration: 310062
loss: 1.106663465499878,grad_norm: 0.9999997638593193, iteration: 310063
loss: 1.059179663658142,grad_norm: 0.9999992417488027, iteration: 310064
loss: 1.0574449300765991,grad_norm: 0.9999996805111507, iteration: 310065
loss: 1.0162914991378784,grad_norm: 0.9999997838277012, iteration: 310066
loss: 0.9744287133216858,grad_norm: 0.7927318603827717, iteration: 310067
loss: 1.0169134140014648,grad_norm: 0.8566220025994216, iteration: 310068
loss: 1.0115419626235962,grad_norm: 0.9639198920936065, iteration: 310069
loss: 0.9918505549430847,grad_norm: 0.9293005856577057, iteration: 310070
loss: 1.0079237222671509,grad_norm: 0.8098385589465459, iteration: 310071
loss: 0.9645607471466064,grad_norm: 0.8356833475733686, iteration: 310072
loss: 1.0743813514709473,grad_norm: 0.9125712257072376, iteration: 310073
loss: 1.04082453250885,grad_norm: 0.8057566799224758, iteration: 310074
loss: 1.0078833103179932,grad_norm: 0.8628697473967698, iteration: 310075
loss: 1.0049495697021484,grad_norm: 0.9999992860449228, iteration: 310076
loss: 1.2883070707321167,grad_norm: 1.000000028826339, iteration: 310077
loss: 0.9936585426330566,grad_norm: 0.8003662860745991, iteration: 310078
loss: 1.0208631753921509,grad_norm: 0.773884157607543, iteration: 310079
loss: 1.0482069253921509,grad_norm: 0.9999998855748353, iteration: 310080
loss: 1.0179239511489868,grad_norm: 0.7697148670644478, iteration: 310081
loss: 0.9914172291755676,grad_norm: 0.9592952773321788, iteration: 310082
loss: 1.0025451183319092,grad_norm: 0.757000340507366, iteration: 310083
loss: 1.0046601295471191,grad_norm: 0.7294856425241899, iteration: 310084
loss: 1.0246456861495972,grad_norm: 0.9999990723498413, iteration: 310085
loss: 1.0083355903625488,grad_norm: 0.9999997413346922, iteration: 310086
loss: 0.9857439398765564,grad_norm: 0.9999994672968112, iteration: 310087
loss: 0.9987361431121826,grad_norm: 0.8095839090054182, iteration: 310088
loss: 1.00405752658844,grad_norm: 0.927963246535365, iteration: 310089
loss: 1.009483814239502,grad_norm: 0.9341293121271613, iteration: 310090
loss: 1.041825294494629,grad_norm: 0.999999037366729, iteration: 310091
loss: 0.9789814949035645,grad_norm: 0.8661914601139992, iteration: 310092
loss: 1.0240789651870728,grad_norm: 0.9010572257457468, iteration: 310093
loss: 1.0154476165771484,grad_norm: 0.8814777056858825, iteration: 310094
loss: 0.9654838442802429,grad_norm: 0.9999990962181159, iteration: 310095
loss: 1.0039522647857666,grad_norm: 0.8681776991169482, iteration: 310096
loss: 1.0137641429901123,grad_norm: 0.9999992467574614, iteration: 310097
loss: 0.9698984622955322,grad_norm: 0.9999991368114637, iteration: 310098
loss: 1.0685781240463257,grad_norm: 0.7909065829543284, iteration: 310099
loss: 0.9935858249664307,grad_norm: 0.975796811506438, iteration: 310100
loss: 1.016459345817566,grad_norm: 0.804128454740627, iteration: 310101
loss: 1.0993808507919312,grad_norm: 0.999999825086704, iteration: 310102
loss: 1.0385007858276367,grad_norm: 0.999998996262909, iteration: 310103
loss: 1.0698908567428589,grad_norm: 0.8719004909865485, iteration: 310104
loss: 0.9786267280578613,grad_norm: 0.9104781273499183, iteration: 310105
loss: 1.0465939044952393,grad_norm: 0.9360700351962633, iteration: 310106
loss: 1.1255607604980469,grad_norm: 0.912377391049372, iteration: 310107
loss: 1.008164405822754,grad_norm: 0.7977537881714472, iteration: 310108
loss: 0.9827027320861816,grad_norm: 0.9999995133051295, iteration: 310109
loss: 1.0208048820495605,grad_norm: 0.9144154436597896, iteration: 310110
loss: 0.9861965775489807,grad_norm: 0.8036017518211748, iteration: 310111
loss: 1.0550661087036133,grad_norm: 0.8661842922739913, iteration: 310112
loss: 1.0130019187927246,grad_norm: 0.9999999252998292, iteration: 310113
loss: 1.036458969116211,grad_norm: 0.8516888451609468, iteration: 310114
loss: 0.9934862852096558,grad_norm: 0.9999990518744634, iteration: 310115
loss: 1.0612095594406128,grad_norm: 0.999999296153148, iteration: 310116
loss: 1.0246648788452148,grad_norm: 0.9249768765159373, iteration: 310117
loss: 1.1379899978637695,grad_norm: 0.9999996802169738, iteration: 310118
loss: 1.0724021196365356,grad_norm: 0.9999991120720838, iteration: 310119
loss: 1.0874491930007935,grad_norm: 0.9999991276187392, iteration: 310120
loss: 1.027733564376831,grad_norm: 0.9999990863450254, iteration: 310121
loss: 0.999798059463501,grad_norm: 0.7865613981204466, iteration: 310122
loss: 1.0125044584274292,grad_norm: 0.773774054901338, iteration: 310123
loss: 1.0118989944458008,grad_norm: 0.9697303630912404, iteration: 310124
loss: 1.0063855648040771,grad_norm: 0.8070961202833307, iteration: 310125
loss: 1.0171958208084106,grad_norm: 0.9999993950416326, iteration: 310126
loss: 1.00360906124115,grad_norm: 0.7876095129448688, iteration: 310127
loss: 1.1767982244491577,grad_norm: 0.9999996090003572, iteration: 310128
loss: 1.0422018766403198,grad_norm: 0.9643667266204521, iteration: 310129
loss: 1.0417112112045288,grad_norm: 0.900339553070164, iteration: 310130
loss: 0.9689611792564392,grad_norm: 0.9539373968118277, iteration: 310131
loss: 0.9992730021476746,grad_norm: 0.9233333444830276, iteration: 310132
loss: 1.1535619497299194,grad_norm: 0.8421614449542899, iteration: 310133
loss: 1.0855990648269653,grad_norm: 0.9999995726663071, iteration: 310134
loss: 1.0279167890548706,grad_norm: 0.9999999807769407, iteration: 310135
loss: 1.1090260744094849,grad_norm: 0.9999996876789362, iteration: 310136
loss: 0.9836295247077942,grad_norm: 0.8988409188006399, iteration: 310137
loss: 1.0559734106063843,grad_norm: 0.8805461229917784, iteration: 310138
loss: 0.993211567401886,grad_norm: 0.7979842389349993, iteration: 310139
loss: 1.0531407594680786,grad_norm: 0.9999996339804702, iteration: 310140
loss: 1.012250542640686,grad_norm: 0.9999990882764738, iteration: 310141
loss: 1.0439457893371582,grad_norm: 0.9287293236961283, iteration: 310142
loss: 1.042457103729248,grad_norm: 0.897232661857346, iteration: 310143
loss: 1.0175127983093262,grad_norm: 0.7966362704610671, iteration: 310144
loss: 1.0390609502792358,grad_norm: 0.7539454238229415, iteration: 310145
loss: 1.0175741910934448,grad_norm: 0.8882051874341703, iteration: 310146
loss: 1.0641649961471558,grad_norm: 1.0000000708692454, iteration: 310147
loss: 1.0557825565338135,grad_norm: 0.999999495462123, iteration: 310148
loss: 1.057105302810669,grad_norm: 0.9999999209530291, iteration: 310149
loss: 1.0134482383728027,grad_norm: 0.9999992682893765, iteration: 310150
loss: 1.016870141029358,grad_norm: 0.9999991561598323, iteration: 310151
loss: 1.0050082206726074,grad_norm: 0.6971993797578894, iteration: 310152
loss: 1.0170196294784546,grad_norm: 0.7961071374392404, iteration: 310153
loss: 1.041213035583496,grad_norm: 0.999999528468362, iteration: 310154
loss: 0.9647238850593567,grad_norm: 0.8741034320739779, iteration: 310155
loss: 0.9906252026557922,grad_norm: 0.9999993089983957, iteration: 310156
loss: 1.018548607826233,grad_norm: 0.9285228486290853, iteration: 310157
loss: 1.0061699151992798,grad_norm: 0.7791139672300876, iteration: 310158
loss: 1.1552536487579346,grad_norm: 0.9999998728449622, iteration: 310159
loss: 1.013433575630188,grad_norm: 0.9999999160397006, iteration: 310160
loss: 1.0227826833724976,grad_norm: 0.9999996058690896, iteration: 310161
loss: 1.0592526197433472,grad_norm: 0.9999992377842108, iteration: 310162
loss: 1.0197324752807617,grad_norm: 0.8054592586155548, iteration: 310163
loss: 1.0468426942825317,grad_norm: 0.9999992009614789, iteration: 310164
loss: 1.0124315023422241,grad_norm: 0.6675769691476143, iteration: 310165
loss: 1.0602914094924927,grad_norm: 0.9092769745455922, iteration: 310166
loss: 1.1112796068191528,grad_norm: 0.9999998523427501, iteration: 310167
loss: 1.056013584136963,grad_norm: 0.9466247812280255, iteration: 310168
loss: 1.0065054893493652,grad_norm: 0.8489212387584824, iteration: 310169
loss: 1.0276707410812378,grad_norm: 0.9999990908480954, iteration: 310170
loss: 1.0265324115753174,grad_norm: 0.8399605216863021, iteration: 310171
loss: 1.0311863422393799,grad_norm: 0.8732530586972039, iteration: 310172
loss: 0.9884248971939087,grad_norm: 0.7630621040557366, iteration: 310173
loss: 1.024938941001892,grad_norm: 0.999999367348886, iteration: 310174
loss: 0.9883865118026733,grad_norm: 0.9854615951649198, iteration: 310175
loss: 1.0095072984695435,grad_norm: 0.7946992669800755, iteration: 310176
loss: 1.0024851560592651,grad_norm: 0.9999996522165546, iteration: 310177
loss: 0.9959999918937683,grad_norm: 0.8156971189476464, iteration: 310178
loss: 1.00190269947052,grad_norm: 0.8235139117704517, iteration: 310179
loss: 1.0644863843917847,grad_norm: 0.9999996856774821, iteration: 310180
loss: 0.9853249192237854,grad_norm: 0.7342700652093941, iteration: 310181
loss: 1.104356288909912,grad_norm: 0.999999936464209, iteration: 310182
loss: 1.0142886638641357,grad_norm: 0.8616262476144158, iteration: 310183
loss: 1.1671161651611328,grad_norm: 0.9999994223891371, iteration: 310184
loss: 1.1360111236572266,grad_norm: 0.999999197692232, iteration: 310185
loss: 1.0001697540283203,grad_norm: 0.8160327611465884, iteration: 310186
loss: 1.0440162420272827,grad_norm: 0.8674111946623666, iteration: 310187
loss: 1.063960075378418,grad_norm: 0.9999999879960854, iteration: 310188
loss: 1.0208128690719604,grad_norm: 0.816962571294518, iteration: 310189
loss: 1.0128833055496216,grad_norm: 0.9999999377105852, iteration: 310190
loss: 0.9981659650802612,grad_norm: 0.7364545499702203, iteration: 310191
loss: 1.0256959199905396,grad_norm: 0.9885290418076395, iteration: 310192
loss: 0.9942488074302673,grad_norm: 0.815854849941362, iteration: 310193
loss: 1.104532241821289,grad_norm: 0.9999996446371381, iteration: 310194
loss: 1.0513147115707397,grad_norm: 0.8718068889386662, iteration: 310195
loss: 1.0022695064544678,grad_norm: 0.8130708959804891, iteration: 310196
loss: 1.0855177640914917,grad_norm: 0.8385836883639387, iteration: 310197
loss: 1.051555871963501,grad_norm: 0.8575590180775486, iteration: 310198
loss: 1.0002700090408325,grad_norm: 0.9768070972667313, iteration: 310199
loss: 1.0578620433807373,grad_norm: 0.9999991073608768, iteration: 310200
loss: 1.030301809310913,grad_norm: 0.9259816162002756, iteration: 310201
loss: 1.1084171533584595,grad_norm: 0.9999992755773702, iteration: 310202
loss: 1.019763469696045,grad_norm: 0.9599246688982037, iteration: 310203
loss: 1.0806044340133667,grad_norm: 0.999999143457085, iteration: 310204
loss: 1.011673092842102,grad_norm: 0.8652154075008547, iteration: 310205
loss: 1.0243792533874512,grad_norm: 0.9999991155471404, iteration: 310206
loss: 0.9995905756950378,grad_norm: 0.9891795264134557, iteration: 310207
loss: 1.0029343366622925,grad_norm: 0.8374045368219041, iteration: 310208
loss: 1.0303796529769897,grad_norm: 0.9999991885488148, iteration: 310209
loss: 1.0447107553482056,grad_norm: 0.9999991208028765, iteration: 310210
loss: 1.0236657857894897,grad_norm: 0.9999993181385738, iteration: 310211
loss: 1.0487329959869385,grad_norm: 0.999999008257215, iteration: 310212
loss: 1.055831789970398,grad_norm: 0.9999999675628214, iteration: 310213
loss: 1.0121006965637207,grad_norm: 0.9366657778819335, iteration: 310214
loss: 1.0794463157653809,grad_norm: 0.9405783361899491, iteration: 310215
loss: 1.0171599388122559,grad_norm: 0.9999992205905659, iteration: 310216
loss: 1.310911774635315,grad_norm: 0.9999999357827061, iteration: 310217
loss: 1.0559449195861816,grad_norm: 0.9999991526055294, iteration: 310218
loss: 1.1770342588424683,grad_norm: 0.999999371681131, iteration: 310219
loss: 1.142255425453186,grad_norm: 0.9999997272856878, iteration: 310220
loss: 1.004228949546814,grad_norm: 0.8340446990587923, iteration: 310221
loss: 0.9889428615570068,grad_norm: 0.8061745616842962, iteration: 310222
loss: 0.9846227765083313,grad_norm: 0.8259138897443163, iteration: 310223
loss: 1.0723576545715332,grad_norm: 0.9778390343439831, iteration: 310224
loss: 1.0108972787857056,grad_norm: 0.9999994121367486, iteration: 310225
loss: 1.0693799257278442,grad_norm: 0.9014607384900741, iteration: 310226
loss: 1.0192383527755737,grad_norm: 0.882690794758994, iteration: 310227
loss: 1.038419246673584,grad_norm: 0.8455209452512257, iteration: 310228
loss: 0.9932926297187805,grad_norm: 0.7779359809155085, iteration: 310229
loss: 0.9954697489738464,grad_norm: 0.8592653642312259, iteration: 310230
loss: 0.9958739876747131,grad_norm: 0.9220651626351188, iteration: 310231
loss: 1.1162149906158447,grad_norm: 0.999999846187246, iteration: 310232
loss: 1.1267718076705933,grad_norm: 0.9999992840246081, iteration: 310233
loss: 1.0300670862197876,grad_norm: 0.8875724222051495, iteration: 310234
loss: 1.0892682075500488,grad_norm: 0.9999997127132098, iteration: 310235
loss: 1.0168830156326294,grad_norm: 0.8362119816582398, iteration: 310236
loss: 1.4544991254806519,grad_norm: 1.0000001000828866, iteration: 310237
loss: 0.9617080092430115,grad_norm: 0.8562080451220849, iteration: 310238
loss: 1.0133541822433472,grad_norm: 0.9999995384481106, iteration: 310239
loss: 0.9772033095359802,grad_norm: 0.7734090985138247, iteration: 310240
loss: 1.0003390312194824,grad_norm: 0.9999993307219553, iteration: 310241
loss: 0.9774437546730042,grad_norm: 0.8650410887447639, iteration: 310242
loss: 1.0163671970367432,grad_norm: 0.7646972905425709, iteration: 310243
loss: 1.0707401037216187,grad_norm: 0.9999991871515637, iteration: 310244
loss: 0.9905796647071838,grad_norm: 0.9999999005749242, iteration: 310245
loss: 0.9961743354797363,grad_norm: 0.999999020560276, iteration: 310246
loss: 1.0798052549362183,grad_norm: 0.9999993755696448, iteration: 310247
loss: 1.0099232196807861,grad_norm: 0.9010124694401026, iteration: 310248
loss: 1.0742034912109375,grad_norm: 0.9999993087658972, iteration: 310249
loss: 1.0858466625213623,grad_norm: 0.9999992557161076, iteration: 310250
loss: 1.0289262533187866,grad_norm: 0.7782240422331235, iteration: 310251
loss: 1.1804252862930298,grad_norm: 0.9999998886102669, iteration: 310252
loss: 0.9905939102172852,grad_norm: 0.9999991869350638, iteration: 310253
loss: 1.1141639947891235,grad_norm: 0.9999994779462738, iteration: 310254
loss: 1.047038197517395,grad_norm: 0.9999994567170231, iteration: 310255
loss: 0.9899832606315613,grad_norm: 0.9581666470654238, iteration: 310256
loss: 1.0441842079162598,grad_norm: 0.886412811420993, iteration: 310257
loss: 1.0320053100585938,grad_norm: 0.8318903337320023, iteration: 310258
loss: 0.9853295683860779,grad_norm: 0.8999382483090312, iteration: 310259
loss: 1.077444314956665,grad_norm: 0.8017367540752915, iteration: 310260
loss: 0.996911883354187,grad_norm: 0.8400706868161555, iteration: 310261
loss: 1.1925740242004395,grad_norm: 0.9999998089789616, iteration: 310262
loss: 0.9783857464790344,grad_norm: 0.9999990658293757, iteration: 310263
loss: 0.9799937009811401,grad_norm: 0.8319653051230873, iteration: 310264
loss: 0.9727367162704468,grad_norm: 0.9651548472944627, iteration: 310265
loss: 1.0187522172927856,grad_norm: 0.9691742897807305, iteration: 310266
loss: 1.0459378957748413,grad_norm: 0.9999996651935907, iteration: 310267
loss: 0.9776896834373474,grad_norm: 0.9999990264897342, iteration: 310268
loss: 1.0852738618850708,grad_norm: 0.9999998899389114, iteration: 310269
loss: 0.9645029306411743,grad_norm: 0.8234668465652765, iteration: 310270
loss: 1.2204407453536987,grad_norm: 0.9999998194856633, iteration: 310271
loss: 0.9867355227470398,grad_norm: 0.7583950868767021, iteration: 310272
loss: 1.0496960878372192,grad_norm: 0.9999992182157806, iteration: 310273
loss: 0.9801546335220337,grad_norm: 0.8301658100584861, iteration: 310274
loss: 1.0036776065826416,grad_norm: 0.99999918219566, iteration: 310275
loss: 1.0902940034866333,grad_norm: 0.9999997673092045, iteration: 310276
loss: 1.016154170036316,grad_norm: 0.9999990557766543, iteration: 310277
loss: 1.0707837343215942,grad_norm: 0.9999991831363965, iteration: 310278
loss: 1.014114499092102,grad_norm: 0.9999992198812651, iteration: 310279
loss: 0.9969822764396667,grad_norm: 0.7459491945512429, iteration: 310280
loss: 0.9956284165382385,grad_norm: 0.8616389880940241, iteration: 310281
loss: 1.0434521436691284,grad_norm: 0.9999996218867528, iteration: 310282
loss: 1.0513006448745728,grad_norm: 0.9999998838230789, iteration: 310283
loss: 1.0357191562652588,grad_norm: 0.9999998702809783, iteration: 310284
loss: 1.0071544647216797,grad_norm: 0.9221537352510359, iteration: 310285
loss: 1.027361273765564,grad_norm: 0.9999994679988026, iteration: 310286
loss: 1.0176678895950317,grad_norm: 0.9999997340272649, iteration: 310287
loss: 1.0135747194290161,grad_norm: 0.769310243543152, iteration: 310288
loss: 1.0407648086547852,grad_norm: 0.9931765771745663, iteration: 310289
loss: 1.2441167831420898,grad_norm: 0.9999993297557757, iteration: 310290
loss: 1.0011972188949585,grad_norm: 0.7146728687902426, iteration: 310291
loss: 1.0989341735839844,grad_norm: 0.9999992630608746, iteration: 310292
loss: 1.0620039701461792,grad_norm: 0.9550783158550835, iteration: 310293
loss: 1.1182284355163574,grad_norm: 0.9999990675995696, iteration: 310294
loss: 0.9862275123596191,grad_norm: 0.9099147135514342, iteration: 310295
loss: 1.0821954011917114,grad_norm: 0.9599590825205722, iteration: 310296
loss: 1.1626209020614624,grad_norm: 0.9999995336589015, iteration: 310297
loss: 1.0949792861938477,grad_norm: 0.9999997750519813, iteration: 310298
loss: 1.0407897233963013,grad_norm: 0.8196172664641846, iteration: 310299
loss: 1.1125669479370117,grad_norm: 0.9999997912042394, iteration: 310300
loss: 0.9998600482940674,grad_norm: 0.8908615846026586, iteration: 310301
loss: 1.0832339525222778,grad_norm: 0.8759027449035982, iteration: 310302
loss: 1.1582300662994385,grad_norm: 0.9999992434552584, iteration: 310303
loss: 1.0782749652862549,grad_norm: 0.9999998669165786, iteration: 310304
loss: 1.1213171482086182,grad_norm: 0.8933159840706507, iteration: 310305
loss: 1.1941405534744263,grad_norm: 0.958775021816293, iteration: 310306
loss: 1.088809609413147,grad_norm: 0.8110379634355339, iteration: 310307
loss: 1.021011471748352,grad_norm: 0.8963793794674767, iteration: 310308
loss: 0.9973921179771423,grad_norm: 0.8420142082751818, iteration: 310309
loss: 1.0012195110321045,grad_norm: 0.7790895515995621, iteration: 310310
loss: 1.0694565773010254,grad_norm: 0.9999997384990282, iteration: 310311
loss: 1.0300029516220093,grad_norm: 0.7480490029939705, iteration: 310312
loss: 1.1618592739105225,grad_norm: 0.9999991832890448, iteration: 310313
loss: 1.0564861297607422,grad_norm: 0.9999993254841525, iteration: 310314
loss: 1.034337043762207,grad_norm: 0.9999995229318521, iteration: 310315
loss: 1.0234302282333374,grad_norm: 0.956805384117508, iteration: 310316
loss: 1.0247784852981567,grad_norm: 0.8817844198317586, iteration: 310317
loss: 1.0193144083023071,grad_norm: 0.8100712902010125, iteration: 310318
loss: 1.0203889608383179,grad_norm: 0.9023691323600206, iteration: 310319
loss: 1.0067058801651,grad_norm: 0.8931916551761846, iteration: 310320
loss: 1.0318926572799683,grad_norm: 0.943246513804747, iteration: 310321
loss: 1.0105433464050293,grad_norm: 0.8858336976594577, iteration: 310322
loss: 1.0122270584106445,grad_norm: 0.9999993208785001, iteration: 310323
loss: 1.0220966339111328,grad_norm: 1.0000000997800906, iteration: 310324
loss: 0.9994175434112549,grad_norm: 0.8661149818520654, iteration: 310325
loss: 1.0170401334762573,grad_norm: 0.8832024412017577, iteration: 310326
loss: 1.0322208404541016,grad_norm: 0.7451564228182386, iteration: 310327
loss: 1.0550963878631592,grad_norm: 0.9999991483805593, iteration: 310328
loss: 1.0105634927749634,grad_norm: 0.9999997035040202, iteration: 310329
loss: 1.053410291671753,grad_norm: 0.9221383595803021, iteration: 310330
loss: 1.0366694927215576,grad_norm: 0.93756747857404, iteration: 310331
loss: 1.0591630935668945,grad_norm: 0.9999992464810802, iteration: 310332
loss: 1.0395444631576538,grad_norm: 0.8174396691022976, iteration: 310333
loss: 1.010718584060669,grad_norm: 0.6603722816901691, iteration: 310334
loss: 1.0559889078140259,grad_norm: 0.9999999739251366, iteration: 310335
loss: 0.9959295392036438,grad_norm: 0.9999992856373413, iteration: 310336
loss: 1.0206645727157593,grad_norm: 0.9714889324821666, iteration: 310337
loss: 0.9889667630195618,grad_norm: 0.9999990714906319, iteration: 310338
loss: 1.1070120334625244,grad_norm: 0.7572603443573414, iteration: 310339
loss: 1.0269075632095337,grad_norm: 0.9296629336083396, iteration: 310340
loss: 1.0306050777435303,grad_norm: 0.9999989914663258, iteration: 310341
loss: 1.0247479677200317,grad_norm: 0.9492453642786997, iteration: 310342
loss: 1.0280238389968872,grad_norm: 0.9132169059022268, iteration: 310343
loss: 1.0827486515045166,grad_norm: 0.8661455337425394, iteration: 310344
loss: 1.0604212284088135,grad_norm: 0.9999998255646162, iteration: 310345
loss: 0.9959876537322998,grad_norm: 0.9999990810865814, iteration: 310346
loss: 1.0843919515609741,grad_norm: 0.9999996030323899, iteration: 310347
loss: 1.0096007585525513,grad_norm: 0.8791698819550493, iteration: 310348
loss: 1.145142912864685,grad_norm: 0.9999996508161454, iteration: 310349
loss: 0.9957923889160156,grad_norm: 0.8604101811520375, iteration: 310350
loss: 1.039131999015808,grad_norm: 0.7263581720384872, iteration: 310351
loss: 0.9686073660850525,grad_norm: 0.9703141173838329, iteration: 310352
loss: 1.012316346168518,grad_norm: 0.9399219568042472, iteration: 310353
loss: 1.0166188478469849,grad_norm: 0.9999990130566033, iteration: 310354
loss: 1.17561674118042,grad_norm: 0.9999997778589897, iteration: 310355
loss: 1.0005477666854858,grad_norm: 0.918082008944293, iteration: 310356
loss: 1.0097856521606445,grad_norm: 0.823140883687284, iteration: 310357
loss: 1.019665241241455,grad_norm: 0.9999991666654666, iteration: 310358
loss: 1.0614569187164307,grad_norm: 0.9208750977671779, iteration: 310359
loss: 1.053187608718872,grad_norm: 0.9999995159234791, iteration: 310360
loss: 1.2018080949783325,grad_norm: 0.9999994644424849, iteration: 310361
loss: 1.0663738250732422,grad_norm: 0.9794180256539236, iteration: 310362
loss: 1.0162405967712402,grad_norm: 0.873344297725902, iteration: 310363
loss: 1.0031033754348755,grad_norm: 0.8712913743795941, iteration: 310364
loss: 1.0058521032333374,grad_norm: 0.9999992880826832, iteration: 310365
loss: 1.2783031463623047,grad_norm: 0.9999994777703866, iteration: 310366
loss: 1.021044135093689,grad_norm: 0.7100638624909149, iteration: 310367
loss: 0.9871715307235718,grad_norm: 0.8837737026963535, iteration: 310368
loss: 0.9815756678581238,grad_norm: 0.986120558444898, iteration: 310369
loss: 1.0912657976150513,grad_norm: 0.9999990978245235, iteration: 310370
loss: 0.9993017315864563,grad_norm: 0.99999957872357, iteration: 310371
loss: 0.9758618474006653,grad_norm: 0.982618006773229, iteration: 310372
loss: 1.0556546449661255,grad_norm: 0.9999995502341991, iteration: 310373
loss: 1.0316990613937378,grad_norm: 0.9174138557790025, iteration: 310374
loss: 1.0941658020019531,grad_norm: 0.9999995686231117, iteration: 310375
loss: 0.9942033886909485,grad_norm: 0.9999991627070863, iteration: 310376
loss: 1.0407440662384033,grad_norm: 0.9999992283468472, iteration: 310377
loss: 1.1090478897094727,grad_norm: 0.9999995423664944, iteration: 310378
loss: 1.0918819904327393,grad_norm: 0.999999455375564, iteration: 310379
loss: 1.0199074745178223,grad_norm: 0.9999996055252901, iteration: 310380
loss: 0.9859468340873718,grad_norm: 0.9540505810414629, iteration: 310381
loss: 1.0481911897659302,grad_norm: 0.9999995974470492, iteration: 310382
loss: 0.9842053651809692,grad_norm: 0.8463350407351379, iteration: 310383
loss: 1.1811633110046387,grad_norm: 0.9999999987744002, iteration: 310384
loss: 1.0162105560302734,grad_norm: 0.8799398398432181, iteration: 310385
loss: 0.9866849184036255,grad_norm: 0.8633621380737603, iteration: 310386
loss: 0.9833736419677734,grad_norm: 0.8176171003176054, iteration: 310387
loss: 0.9950962662696838,grad_norm: 0.7932962734057021, iteration: 310388
loss: 1.000885009765625,grad_norm: 0.8205633622334065, iteration: 310389
loss: 1.084043025970459,grad_norm: 0.9999999501602947, iteration: 310390
loss: 1.110146164894104,grad_norm: 0.9999999125924077, iteration: 310391
loss: 1.009432077407837,grad_norm: 0.9123738980995244, iteration: 310392
loss: 1.0066232681274414,grad_norm: 0.9999991457111729, iteration: 310393
loss: 1.0023409128189087,grad_norm: 0.9274903361938865, iteration: 310394
loss: 0.9594883918762207,grad_norm: 0.7711373107927798, iteration: 310395
loss: 1.0129179954528809,grad_norm: 0.9999995763196265, iteration: 310396
loss: 0.993942379951477,grad_norm: 0.9999996689916507, iteration: 310397
loss: 0.9741656184196472,grad_norm: 0.940071725695759, iteration: 310398
loss: 0.9847797751426697,grad_norm: 0.8578737598037219, iteration: 310399
loss: 1.046292781829834,grad_norm: 0.9999991214744958, iteration: 310400
loss: 1.0352643728256226,grad_norm: 0.9999992036707631, iteration: 310401
loss: 1.0193498134613037,grad_norm: 0.9999994173739127, iteration: 310402
loss: 1.085292100906372,grad_norm: 0.9999994377834548, iteration: 310403
loss: 1.011783242225647,grad_norm: 0.8904150296783279, iteration: 310404
loss: 1.0436302423477173,grad_norm: 0.9999996439364903, iteration: 310405
loss: 1.0203187465667725,grad_norm: 0.810778929076818, iteration: 310406
loss: 1.1351929903030396,grad_norm: 0.9999993406479238, iteration: 310407
loss: 1.0721560716629028,grad_norm: 0.9321482826597808, iteration: 310408
loss: 1.0290144681930542,grad_norm: 0.7952197527443192, iteration: 310409
loss: 1.1249831914901733,grad_norm: 0.9999993676289041, iteration: 310410
loss: 0.9820671677589417,grad_norm: 0.876949625505012, iteration: 310411
loss: 0.9763665795326233,grad_norm: 0.8414827821225556, iteration: 310412
loss: 1.0634492635726929,grad_norm: 0.8524723422955663, iteration: 310413
loss: 1.0164674520492554,grad_norm: 0.7317680646746574, iteration: 310414
loss: 1.0580083131790161,grad_norm: 0.9999997872809228, iteration: 310415
loss: 0.99588942527771,grad_norm: 0.8936934504613925, iteration: 310416
loss: 1.0137768983840942,grad_norm: 0.999999197314761, iteration: 310417
loss: 0.9962627291679382,grad_norm: 0.8789929817008829, iteration: 310418
loss: 1.0108509063720703,grad_norm: 0.809039658614442, iteration: 310419
loss: 1.1471366882324219,grad_norm: 0.9999996780735878, iteration: 310420
loss: 1.019862174987793,grad_norm: 0.9999990190080745, iteration: 310421
loss: 0.9169301986694336,grad_norm: 0.9999990461402871, iteration: 310422
loss: 1.1097692251205444,grad_norm: 0.9999996781389966, iteration: 310423
loss: 0.9986969232559204,grad_norm: 0.7646974311185543, iteration: 310424
loss: 1.0830799341201782,grad_norm: 0.8350920579276702, iteration: 310425
loss: 1.0092395544052124,grad_norm: 0.999999480450242, iteration: 310426
loss: 1.0159531831741333,grad_norm: 0.8597767201913722, iteration: 310427
loss: 1.0266371965408325,grad_norm: 0.8275374030421571, iteration: 310428
loss: 1.0819281339645386,grad_norm: 0.9999999303764868, iteration: 310429
loss: 0.983400285243988,grad_norm: 0.9999991195243259, iteration: 310430
loss: 1.027768850326538,grad_norm: 0.9999991374295453, iteration: 310431
loss: 1.0207802057266235,grad_norm: 0.9999992167742402, iteration: 310432
loss: 1.020350456237793,grad_norm: 0.9674591747993944, iteration: 310433
loss: 1.02057945728302,grad_norm: 0.8991025953695085, iteration: 310434
loss: 1.1616132259368896,grad_norm: 0.9999992906613738, iteration: 310435
loss: 1.0219789743423462,grad_norm: 0.9999991492287332, iteration: 310436
loss: 1.077301263809204,grad_norm: 0.9380055516601398, iteration: 310437
loss: 1.0544403791427612,grad_norm: 0.99999902404874, iteration: 310438
loss: 0.9918060302734375,grad_norm: 0.728533266628297, iteration: 310439
loss: 0.9965826869010925,grad_norm: 0.9352937284913131, iteration: 310440
loss: 0.9846290946006775,grad_norm: 0.7615810094759593, iteration: 310441
loss: 1.0418962240219116,grad_norm: 0.999999075309877, iteration: 310442
loss: 1.0050214529037476,grad_norm: 0.8515454116127957, iteration: 310443
loss: 0.9762023091316223,grad_norm: 0.9079225141456928, iteration: 310444
loss: 1.0587021112442017,grad_norm: 0.9999999857406862, iteration: 310445
loss: 0.954174816608429,grad_norm: 0.9536842219328079, iteration: 310446
loss: 0.9752503037452698,grad_norm: 0.83927901265116, iteration: 310447
loss: 0.9565626978874207,grad_norm: 0.8264286028452669, iteration: 310448
loss: 1.0158424377441406,grad_norm: 0.9999994844304987, iteration: 310449
loss: 0.9890687465667725,grad_norm: 0.9744303309253126, iteration: 310450
loss: 1.038067102432251,grad_norm: 0.8253543401124683, iteration: 310451
loss: 1.0540028810501099,grad_norm: 0.9567253993955824, iteration: 310452
loss: 1.0454663038253784,grad_norm: 0.9999997169215511, iteration: 310453
loss: 1.0680263042449951,grad_norm: 0.8683298313015366, iteration: 310454
loss: 1.106805443763733,grad_norm: 0.9999991838021963, iteration: 310455
loss: 1.0972646474838257,grad_norm: 0.9999995698734923, iteration: 310456
loss: 0.9976469874382019,grad_norm: 0.9999998782448478, iteration: 310457
loss: 1.0359138250350952,grad_norm: 0.8164676318259315, iteration: 310458
loss: 1.0120007991790771,grad_norm: 0.8509279300930039, iteration: 310459
loss: 0.9817838072776794,grad_norm: 0.9587572459501454, iteration: 310460
loss: 1.0020960569381714,grad_norm: 0.9999992673014916, iteration: 310461
loss: 1.0136604309082031,grad_norm: 0.9999995323749148, iteration: 310462
loss: 0.9595446586608887,grad_norm: 0.7953231507649947, iteration: 310463
loss: 1.024227261543274,grad_norm: 0.9999992295181858, iteration: 310464
loss: 1.007468581199646,grad_norm: 0.99999972666728, iteration: 310465
loss: 1.08927321434021,grad_norm: 0.9999999419857871, iteration: 310466
loss: 0.9764522910118103,grad_norm: 0.8983206878484739, iteration: 310467
loss: 1.1064344644546509,grad_norm: 0.9999990389202923, iteration: 310468
loss: 1.0379763841629028,grad_norm: 0.9999992620324913, iteration: 310469
loss: 1.0634855031967163,grad_norm: 0.9999999460153115, iteration: 310470
loss: 0.9823698997497559,grad_norm: 0.8277585977020594, iteration: 310471
loss: 1.031004548072815,grad_norm: 0.999999537790456, iteration: 310472
loss: 0.996082067489624,grad_norm: 0.9131782812710385, iteration: 310473
loss: 1.0636885166168213,grad_norm: 0.9669695497983687, iteration: 310474
loss: 0.9485882520675659,grad_norm: 0.9999990111508988, iteration: 310475
loss: 0.9954321384429932,grad_norm: 0.8339109987639715, iteration: 310476
loss: 1.0077693462371826,grad_norm: 0.9999994226865289, iteration: 310477
loss: 0.9988776445388794,grad_norm: 0.8611485821746312, iteration: 310478
loss: 1.0143471956253052,grad_norm: 0.8297218573416907, iteration: 310479
loss: 1.0190359354019165,grad_norm: 0.8794225066013711, iteration: 310480
loss: 1.019271731376648,grad_norm: 0.9999990340346431, iteration: 310481
loss: 0.9963119626045227,grad_norm: 0.999999112872922, iteration: 310482
loss: 1.0167523622512817,grad_norm: 0.8252222838965787, iteration: 310483
loss: 1.013300895690918,grad_norm: 0.9999998667964792, iteration: 310484
loss: 0.9924246668815613,grad_norm: 0.9070559429831873, iteration: 310485
loss: 1.0589261054992676,grad_norm: 0.7895504977993735, iteration: 310486
loss: 0.9712535738945007,grad_norm: 0.8608981027801488, iteration: 310487
loss: 1.0013196468353271,grad_norm: 0.9523451674545979, iteration: 310488
loss: 1.0178343057632446,grad_norm: 0.7644657407363807, iteration: 310489
loss: 1.0621840953826904,grad_norm: 0.9999991630140789, iteration: 310490
loss: 1.0013738870620728,grad_norm: 0.8854186913619992, iteration: 310491
loss: 0.9704602360725403,grad_norm: 0.8724168639681662, iteration: 310492
loss: 0.960494339466095,grad_norm: 0.8004739970248723, iteration: 310493
loss: 1.0115420818328857,grad_norm: 0.9604720677371363, iteration: 310494
loss: 1.0561320781707764,grad_norm: 0.9999990987235375, iteration: 310495
loss: 1.1061478853225708,grad_norm: 0.9999997488002985, iteration: 310496
loss: 1.0141083002090454,grad_norm: 0.9934324686405013, iteration: 310497
loss: 1.031040906906128,grad_norm: 0.8778453859170251, iteration: 310498
loss: 0.9496581554412842,grad_norm: 0.9571043754179736, iteration: 310499
loss: 0.9833407998085022,grad_norm: 0.8817123685495073, iteration: 310500
loss: 0.9859734177589417,grad_norm: 0.7383512727293617, iteration: 310501
loss: 1.0792874097824097,grad_norm: 0.9999998562389761, iteration: 310502
loss: 1.0404189825057983,grad_norm: 0.7413886930479476, iteration: 310503
loss: 1.0082067251205444,grad_norm: 0.9005614277657373, iteration: 310504
loss: 0.9739937782287598,grad_norm: 0.9479710600294156, iteration: 310505
loss: 1.0535980463027954,grad_norm: 0.8859774126723328, iteration: 310506
loss: 1.0132789611816406,grad_norm: 0.9812864085012662, iteration: 310507
loss: 1.0232747793197632,grad_norm: 0.9999996939966417, iteration: 310508
loss: 1.0112625360488892,grad_norm: 0.8341613489456655, iteration: 310509
loss: 1.0078171491622925,grad_norm: 0.9330775999421215, iteration: 310510
loss: 1.0267131328582764,grad_norm: 0.724672340589521, iteration: 310511
loss: 1.0140024423599243,grad_norm: 0.7674144729880488, iteration: 310512
loss: 1.0231021642684937,grad_norm: 0.8513034655879536, iteration: 310513
loss: 0.9653216004371643,grad_norm: 0.9999992983202813, iteration: 310514
loss: 1.1384786367416382,grad_norm: 0.9999998291618971, iteration: 310515
loss: 1.0231391191482544,grad_norm: 0.7189777372537053, iteration: 310516
loss: 0.9775274395942688,grad_norm: 0.841710039718948, iteration: 310517
loss: 1.0304826498031616,grad_norm: 0.8375389670394822, iteration: 310518
loss: 1.0219875574111938,grad_norm: 0.7815537280375386, iteration: 310519
loss: 1.0340280532836914,grad_norm: 0.7920116331132141, iteration: 310520
loss: 0.949602484703064,grad_norm: 0.8229756436581908, iteration: 310521
loss: 1.0590956211090088,grad_norm: 0.9999991866595771, iteration: 310522
loss: 1.035460114479065,grad_norm: 0.859294077134207, iteration: 310523
loss: 0.9745558500289917,grad_norm: 0.9999993723324706, iteration: 310524
loss: 1.0571860074996948,grad_norm: 0.878452448673784, iteration: 310525
loss: 0.9999591112136841,grad_norm: 0.753184959861455, iteration: 310526
loss: 1.0700023174285889,grad_norm: 0.7913154054836039, iteration: 310527
loss: 1.0112640857696533,grad_norm: 0.9622183524013023, iteration: 310528
loss: 1.0338709354400635,grad_norm: 0.9296692718712981, iteration: 310529
loss: 0.9894936680793762,grad_norm: 0.8992215062725364, iteration: 310530
loss: 1.0826363563537598,grad_norm: 0.9571318138237835, iteration: 310531
loss: 1.012710452079773,grad_norm: 0.8386834401998318, iteration: 310532
loss: 0.9600914120674133,grad_norm: 0.733280645415774, iteration: 310533
loss: 1.0465110540390015,grad_norm: 0.9806487537122205, iteration: 310534
loss: 1.0268298387527466,grad_norm: 0.6771022391505933, iteration: 310535
loss: 1.0096042156219482,grad_norm: 0.9649220138930577, iteration: 310536
loss: 1.0156702995300293,grad_norm: 0.9834081587710409, iteration: 310537
loss: 1.0090001821517944,grad_norm: 0.7563648732226157, iteration: 310538
loss: 0.9564048051834106,grad_norm: 0.8737646203775586, iteration: 310539
loss: 0.999258279800415,grad_norm: 0.9999991563210685, iteration: 310540
loss: 1.0562986135482788,grad_norm: 0.7893931955460732, iteration: 310541
loss: 1.0026648044586182,grad_norm: 0.7795954608251108, iteration: 310542
loss: 1.079183578491211,grad_norm: 0.9999997614191772, iteration: 310543
loss: 0.9933810234069824,grad_norm: 0.8191794823113846, iteration: 310544
loss: 1.0006771087646484,grad_norm: 0.9647191837613646, iteration: 310545
loss: 1.0019567012786865,grad_norm: 0.8258247365565996, iteration: 310546
loss: 0.9745590090751648,grad_norm: 0.9999993780196433, iteration: 310547
loss: 1.0643298625946045,grad_norm: 0.9999998138052989, iteration: 310548
loss: 0.9920642375946045,grad_norm: 0.9999991407733371, iteration: 310549
loss: 1.0122655630111694,grad_norm: 0.9999992307074204, iteration: 310550
loss: 1.0717880725860596,grad_norm: 0.9999998461885419, iteration: 310551
loss: 0.9911718368530273,grad_norm: 0.9321680903576806, iteration: 310552
loss: 1.0233675241470337,grad_norm: 0.9999996203617959, iteration: 310553
loss: 1.0136053562164307,grad_norm: 0.8204658222889534, iteration: 310554
loss: 1.0981718301773071,grad_norm: 0.9999992577780844, iteration: 310555
loss: 0.9908072352409363,grad_norm: 0.705829285191221, iteration: 310556
loss: 0.9569845795631409,grad_norm: 0.9384839850210127, iteration: 310557
loss: 0.9915993809700012,grad_norm: 0.9344447449672463, iteration: 310558
loss: 1.0454323291778564,grad_norm: 0.9999991260225422, iteration: 310559
loss: 0.9810712933540344,grad_norm: 0.8788547912617791, iteration: 310560
loss: 1.0027544498443604,grad_norm: 0.8864342360015687, iteration: 310561
loss: 1.0334810018539429,grad_norm: 0.7195903869589926, iteration: 310562
loss: 0.9737937450408936,grad_norm: 0.7630871332200629, iteration: 310563
loss: 1.0137611627578735,grad_norm: 0.8995479846767441, iteration: 310564
loss: 1.0234043598175049,grad_norm: 0.869379804216845, iteration: 310565
loss: 1.0037890672683716,grad_norm: 0.7040280900272836, iteration: 310566
loss: 1.0117343664169312,grad_norm: 0.7958776601745583, iteration: 310567
loss: 1.0055198669433594,grad_norm: 0.8687218137198071, iteration: 310568
loss: 1.0759501457214355,grad_norm: 0.8073546379885174, iteration: 310569
loss: 1.0241451263427734,grad_norm: 0.8093824986942454, iteration: 310570
loss: 0.9914341568946838,grad_norm: 0.8886674773968375, iteration: 310571
loss: 1.045615315437317,grad_norm: 0.9999992476522132, iteration: 310572
loss: 1.091908574104309,grad_norm: 0.9999993354318008, iteration: 310573
loss: 1.027894139289856,grad_norm: 0.9999990926314004, iteration: 310574
loss: 1.0215189456939697,grad_norm: 0.8748670619294484, iteration: 310575
loss: 1.0129503011703491,grad_norm: 0.9999990041747835, iteration: 310576
loss: 0.9788909554481506,grad_norm: 0.8814286360791947, iteration: 310577
loss: 1.0083521604537964,grad_norm: 0.9999992659749014, iteration: 310578
loss: 1.0238945484161377,grad_norm: 0.8389217819236722, iteration: 310579
loss: 0.9907196760177612,grad_norm: 0.7772560156098941, iteration: 310580
loss: 1.0539127588272095,grad_norm: 0.9301513454548208, iteration: 310581
loss: 0.9894254803657532,grad_norm: 0.7458470568101336, iteration: 310582
loss: 1.033557415008545,grad_norm: 0.763812250508148, iteration: 310583
loss: 1.0583668947219849,grad_norm: 0.99999973240985, iteration: 310584
loss: 1.05667245388031,grad_norm: 0.9188392542363187, iteration: 310585
loss: 1.0506519079208374,grad_norm: 0.9663545768302803, iteration: 310586
loss: 1.0079238414764404,grad_norm: 0.7333140304246553, iteration: 310587
loss: 0.994958221912384,grad_norm: 0.8849966760606235, iteration: 310588
loss: 1.0818992853164673,grad_norm: 0.9999994818298322, iteration: 310589
loss: 1.0079859495162964,grad_norm: 0.8027098868515712, iteration: 310590
loss: 1.0301967859268188,grad_norm: 0.9999997565896952, iteration: 310591
loss: 1.0404891967773438,grad_norm: 0.8813633040232444, iteration: 310592
loss: 0.9911900162696838,grad_norm: 0.8153220254893179, iteration: 310593
loss: 0.9859350919723511,grad_norm: 0.8865447891786605, iteration: 310594
loss: 0.9644116759300232,grad_norm: 0.9391588398719644, iteration: 310595
loss: 1.0834194421768188,grad_norm: 0.885927182026631, iteration: 310596
loss: 1.0284650325775146,grad_norm: 0.999999081585691, iteration: 310597
loss: 0.965039849281311,grad_norm: 0.8345958845462146, iteration: 310598
loss: 1.0196317434310913,grad_norm: 0.9999990568993702, iteration: 310599
loss: 0.9839024543762207,grad_norm: 0.8103075856891003, iteration: 310600
loss: 0.9822947382926941,grad_norm: 0.7354169454417474, iteration: 310601
loss: 0.9645522236824036,grad_norm: 0.9883759931563864, iteration: 310602
loss: 0.9986012578010559,grad_norm: 0.9999989764881662, iteration: 310603
loss: 0.9506394267082214,grad_norm: 0.7432633253299472, iteration: 310604
loss: 1.0238550901412964,grad_norm: 0.9999990107666669, iteration: 310605
loss: 1.0029798746109009,grad_norm: 0.9999999442092303, iteration: 310606
loss: 0.9866254925727844,grad_norm: 0.8389683652833958, iteration: 310607
loss: 1.0284546613693237,grad_norm: 0.9999997233901011, iteration: 310608
loss: 1.0155278444290161,grad_norm: 0.9999990658106007, iteration: 310609
loss: 0.9891854524612427,grad_norm: 0.8234553165590224, iteration: 310610
loss: 1.0472763776779175,grad_norm: 0.9656295565439332, iteration: 310611
loss: 0.9834241271018982,grad_norm: 0.9999992737765588, iteration: 310612
loss: 1.055229902267456,grad_norm: 0.9999997295742993, iteration: 310613
loss: 0.9960468411445618,grad_norm: 0.9999991021740735, iteration: 310614
loss: 1.0174449682235718,grad_norm: 0.8213673724289108, iteration: 310615
loss: 1.0231164693832397,grad_norm: 0.9620170404666952, iteration: 310616
loss: 0.9758894443511963,grad_norm: 0.8831203363593041, iteration: 310617
loss: 1.037009596824646,grad_norm: 0.7247669853009541, iteration: 310618
loss: 1.0070172548294067,grad_norm: 0.8645195056685431, iteration: 310619
loss: 1.039406418800354,grad_norm: 0.8204111559083055, iteration: 310620
loss: 1.0151033401489258,grad_norm: 0.8839907896114945, iteration: 310621
loss: 1.0011974573135376,grad_norm: 0.9999994053001695, iteration: 310622
loss: 1.1390918493270874,grad_norm: 0.9999999222170273, iteration: 310623
loss: 0.9752779006958008,grad_norm: 0.8039540370001949, iteration: 310624
loss: 1.0951869487762451,grad_norm: 0.9999995487881224, iteration: 310625
loss: 1.0182791948318481,grad_norm: 0.9135380945906001, iteration: 310626
loss: 0.994061291217804,grad_norm: 0.8155358056301444, iteration: 310627
loss: 0.9723912477493286,grad_norm: 0.7377045609927033, iteration: 310628
loss: 1.03147554397583,grad_norm: 0.9999993789419006, iteration: 310629
loss: 1.0047053098678589,grad_norm: 0.9748509194434614, iteration: 310630
loss: 1.0921016931533813,grad_norm: 0.999999121605797, iteration: 310631
loss: 1.0276950597763062,grad_norm: 0.9531858474648888, iteration: 310632
loss: 1.0033372640609741,grad_norm: 0.7182022077782133, iteration: 310633
loss: 1.0062072277069092,grad_norm: 0.9353728201268949, iteration: 310634
loss: 0.9938438534736633,grad_norm: 0.9999991940917718, iteration: 310635
loss: 1.0199817419052124,grad_norm: 0.9999992163954391, iteration: 310636
loss: 0.9784907102584839,grad_norm: 0.955105902601738, iteration: 310637
loss: 1.0225964784622192,grad_norm: 0.8529657833312115, iteration: 310638
loss: 1.0441699028015137,grad_norm: 0.8856048906572368, iteration: 310639
loss: 1.008907675743103,grad_norm: 0.8820447123779072, iteration: 310640
loss: 0.9899010062217712,grad_norm: 0.9999997637590975, iteration: 310641
loss: 0.9775681495666504,grad_norm: 0.7339029397973097, iteration: 310642
loss: 1.0302289724349976,grad_norm: 0.8557220788564376, iteration: 310643
loss: 1.052770972251892,grad_norm: 0.9999992490062763, iteration: 310644
loss: 0.9785187244415283,grad_norm: 0.901597289038257, iteration: 310645
loss: 1.0159101486206055,grad_norm: 0.9999996619657753, iteration: 310646
loss: 1.2807796001434326,grad_norm: 0.9999996228403278, iteration: 310647
loss: 0.998498260974884,grad_norm: 0.7205001224211859, iteration: 310648
loss: 1.059428334236145,grad_norm: 0.9999991888543155, iteration: 310649
loss: 1.0084872245788574,grad_norm: 0.7901587862267242, iteration: 310650
loss: 0.9925009608268738,grad_norm: 0.9999999159717451, iteration: 310651
loss: 0.9901179671287537,grad_norm: 0.8546255019478464, iteration: 310652
loss: 1.0653672218322754,grad_norm: 0.9999997545844627, iteration: 310653
loss: 1.0224844217300415,grad_norm: 0.7882931944096004, iteration: 310654
loss: 0.9783567190170288,grad_norm: 0.9562299879984145, iteration: 310655
loss: 0.9978917241096497,grad_norm: 0.9730861550599265, iteration: 310656
loss: 0.9699611663818359,grad_norm: 0.7692600544310672, iteration: 310657
loss: 1.0430350303649902,grad_norm: 0.8812032491984648, iteration: 310658
loss: 1.0465526580810547,grad_norm: 0.9999991327024417, iteration: 310659
loss: 0.9870539307594299,grad_norm: 0.9999998309654161, iteration: 310660
loss: 0.9827538132667542,grad_norm: 0.7814263812891362, iteration: 310661
loss: 0.9946902990341187,grad_norm: 0.7324296741792689, iteration: 310662
loss: 1.085105061531067,grad_norm: 0.9999994395886136, iteration: 310663
loss: 1.0325406789779663,grad_norm: 0.9999990206871046, iteration: 310664
loss: 0.9988974332809448,grad_norm: 0.9263535896734041, iteration: 310665
loss: 0.9741058349609375,grad_norm: 0.9253456692990191, iteration: 310666
loss: 0.9654492735862732,grad_norm: 0.8337737307288503, iteration: 310667
loss: 0.9930071830749512,grad_norm: 0.9999993469152927, iteration: 310668
loss: 1.0079290866851807,grad_norm: 0.7351735723283819, iteration: 310669
loss: 0.9841865301132202,grad_norm: 0.9884553348320414, iteration: 310670
loss: 1.0703985691070557,grad_norm: 0.9999992431788584, iteration: 310671
loss: 1.0177576541900635,grad_norm: 0.7716397162129901, iteration: 310672
loss: 1.0155564546585083,grad_norm: 0.7236716555711409, iteration: 310673
loss: 1.0033189058303833,grad_norm: 0.8610840296460809, iteration: 310674
loss: 1.008091688156128,grad_norm: 0.737026154039898, iteration: 310675
loss: 1.0425667762756348,grad_norm: 0.9999992045467101, iteration: 310676
loss: 1.0809439420700073,grad_norm: 0.9999991670881025, iteration: 310677
loss: 1.0753895044326782,grad_norm: 0.999999781489183, iteration: 310678
loss: 0.9871683716773987,grad_norm: 0.9999989823024358, iteration: 310679
loss: 0.9752923250198364,grad_norm: 0.7989099868150894, iteration: 310680
loss: 0.9763485789299011,grad_norm: 0.9999999833753259, iteration: 310681
loss: 1.0007106065750122,grad_norm: 0.9999998762179988, iteration: 310682
loss: 0.9928146600723267,grad_norm: 0.9999991151369447, iteration: 310683
loss: 1.0419012308120728,grad_norm: 0.9999992209684884, iteration: 310684
loss: 0.9615111351013184,grad_norm: 0.8920896460907421, iteration: 310685
loss: 1.0427906513214111,grad_norm: 0.8952285214477783, iteration: 310686
loss: 1.121314525604248,grad_norm: 0.9999997595516946, iteration: 310687
loss: 0.9700706005096436,grad_norm: 0.7874671302978425, iteration: 310688
loss: 1.0364612340927124,grad_norm: 0.7904929059471012, iteration: 310689
loss: 1.0373802185058594,grad_norm: 0.8640017955528329, iteration: 310690
loss: 0.9891808032989502,grad_norm: 0.999999596485653, iteration: 310691
loss: 1.0033221244812012,grad_norm: 0.9999995477810328, iteration: 310692
loss: 1.0289888381958008,grad_norm: 0.8034868527143804, iteration: 310693
loss: 1.0124566555023193,grad_norm: 0.7541435556495294, iteration: 310694
loss: 0.9859450459480286,grad_norm: 0.8817109646848079, iteration: 310695
loss: 1.0218602418899536,grad_norm: 0.7583944390728162, iteration: 310696
loss: 0.9923185110092163,grad_norm: 0.9198238596447926, iteration: 310697
loss: 1.0291050672531128,grad_norm: 0.9999990062444206, iteration: 310698
loss: 1.0108578205108643,grad_norm: 0.8403296061850039, iteration: 310699
loss: 1.0310463905334473,grad_norm: 0.847637068663484, iteration: 310700
loss: 1.024265170097351,grad_norm: 0.9999990215922028, iteration: 310701
loss: 1.0276319980621338,grad_norm: 0.8096075494809832, iteration: 310702
loss: 1.0515902042388916,grad_norm: 0.9999990290762382, iteration: 310703
loss: 1.013960838317871,grad_norm: 0.8752227746344567, iteration: 310704
loss: 1.0270379781723022,grad_norm: 0.8683352538661175, iteration: 310705
loss: 0.996631383895874,grad_norm: 0.999999072557304, iteration: 310706
loss: 1.0027134418487549,grad_norm: 0.9999992144804724, iteration: 310707
loss: 1.068026065826416,grad_norm: 0.9245272427747224, iteration: 310708
loss: 1.002600908279419,grad_norm: 0.821488470457443, iteration: 310709
loss: 0.9959549903869629,grad_norm: 0.8054336994622514, iteration: 310710
loss: 1.0011245012283325,grad_norm: 0.7361332771198676, iteration: 310711
loss: 1.0283504724502563,grad_norm: 0.9336840958199947, iteration: 310712
loss: 1.0394574403762817,grad_norm: 0.9999991205893408, iteration: 310713
loss: 0.9594717025756836,grad_norm: 0.8943884155991897, iteration: 310714
loss: 0.9774777293205261,grad_norm: 0.9791230159145103, iteration: 310715
loss: 0.9869223833084106,grad_norm: 0.8407652378329272, iteration: 310716
loss: 1.0406782627105713,grad_norm: 0.9999999362754017, iteration: 310717
loss: 1.027162790298462,grad_norm: 0.9434590420738737, iteration: 310718
loss: 0.9979520440101624,grad_norm: 0.7081965794425809, iteration: 310719
loss: 1.0331025123596191,grad_norm: 0.99999918069458, iteration: 310720
loss: 0.9723845720291138,grad_norm: 0.6378163079258452, iteration: 310721
loss: 0.9902389645576477,grad_norm: 0.9292802311346339, iteration: 310722
loss: 0.9782206416130066,grad_norm: 0.8756435487423935, iteration: 310723
loss: 0.9966870546340942,grad_norm: 0.8629398690509261, iteration: 310724
loss: 1.006847858428955,grad_norm: 0.8902308407543094, iteration: 310725
loss: 1.0293523073196411,grad_norm: 0.7339410634317586, iteration: 310726
loss: 0.9847528338432312,grad_norm: 0.9999991470779345, iteration: 310727
loss: 1.0213614702224731,grad_norm: 0.7897787299662996, iteration: 310728
loss: 1.007641077041626,grad_norm: 0.8900723316620863, iteration: 310729
loss: 0.9986105561256409,grad_norm: 0.862805907322412, iteration: 310730
loss: 1.0530625581741333,grad_norm: 0.9476180867644995, iteration: 310731
loss: 0.9997287392616272,grad_norm: 0.8675995286215689, iteration: 310732
loss: 1.1592823266983032,grad_norm: 0.999999729743672, iteration: 310733
loss: 0.9782084822654724,grad_norm: 0.9385140905200426, iteration: 310734
loss: 1.026036262512207,grad_norm: 0.9999994588603429, iteration: 310735
loss: 1.0217185020446777,grad_norm: 0.711203413745537, iteration: 310736
loss: 1.011372447013855,grad_norm: 0.7926657100628492, iteration: 310737
loss: 0.9973267912864685,grad_norm: 0.911846007770629, iteration: 310738
loss: 1.150210976600647,grad_norm: 0.9999992565489781, iteration: 310739
loss: 1.0199432373046875,grad_norm: 0.7579033836609986, iteration: 310740
loss: 0.9980040192604065,grad_norm: 0.7551106054184404, iteration: 310741
loss: 0.9916759133338928,grad_norm: 0.936077708796046, iteration: 310742
loss: 1.0101279020309448,grad_norm: 0.8410147002563425, iteration: 310743
loss: 1.0453890562057495,grad_norm: 0.9999621190006772, iteration: 310744
loss: 1.0859843492507935,grad_norm: 0.9999994379365409, iteration: 310745
loss: 1.0092430114746094,grad_norm: 0.9592711289697852, iteration: 310746
loss: 0.9978090524673462,grad_norm: 0.9665404206570355, iteration: 310747
loss: 1.0200591087341309,grad_norm: 0.9999997994873019, iteration: 310748
loss: 1.0282292366027832,grad_norm: 0.8439903197715832, iteration: 310749
loss: 1.0259387493133545,grad_norm: 0.8807728169120158, iteration: 310750
loss: 1.1945098638534546,grad_norm: 0.9999996453164374, iteration: 310751
loss: 1.099249005317688,grad_norm: 0.9999992517142268, iteration: 310752
loss: 1.0290895700454712,grad_norm: 0.9999995052354891, iteration: 310753
loss: 1.0085768699645996,grad_norm: 0.8184464525252766, iteration: 310754
loss: 1.0100359916687012,grad_norm: 0.8834518585539968, iteration: 310755
loss: 0.9810279607772827,grad_norm: 0.7882346147899845, iteration: 310756
loss: 0.9883723855018616,grad_norm: 0.863631148804584, iteration: 310757
loss: 1.022080421447754,grad_norm: 0.859049567488445, iteration: 310758
loss: 0.9989545941352844,grad_norm: 0.865809314269995, iteration: 310759
loss: 0.9972705245018005,grad_norm: 0.9202282410194571, iteration: 310760
loss: 1.0230292081832886,grad_norm: 0.828784099077059, iteration: 310761
loss: 0.9954795241355896,grad_norm: 0.8683059910438986, iteration: 310762
loss: 0.9648288488388062,grad_norm: 0.7859808225412909, iteration: 310763
loss: 1.0542925596237183,grad_norm: 0.9999989695191948, iteration: 310764
loss: 0.9980977177619934,grad_norm: 0.8574459592099846, iteration: 310765
loss: 0.9789989590644836,grad_norm: 0.8610110327889601, iteration: 310766
loss: 0.9860894680023193,grad_norm: 0.99999932377389, iteration: 310767
loss: 1.0072065591812134,grad_norm: 0.8114621978550586, iteration: 310768
loss: 0.9874611496925354,grad_norm: 0.9999991116257811, iteration: 310769
loss: 1.0293065309524536,grad_norm: 0.9654504576995262, iteration: 310770
loss: 1.0029494762420654,grad_norm: 0.9999992706988853, iteration: 310771
loss: 1.0628360509872437,grad_norm: 0.8259875196160333, iteration: 310772
loss: 1.0662541389465332,grad_norm: 0.999999327950603, iteration: 310773
loss: 0.9836671352386475,grad_norm: 0.8123961479138901, iteration: 310774
loss: 0.9792484641075134,grad_norm: 0.851256087493075, iteration: 310775
loss: 1.0165083408355713,grad_norm: 0.932940272118853, iteration: 310776
loss: 0.9975675940513611,grad_norm: 0.7617254561156436, iteration: 310777
loss: 0.9998108744621277,grad_norm: 0.7057257881022936, iteration: 310778
loss: 1.0058244466781616,grad_norm: 0.9109102636443054, iteration: 310779
loss: 0.9865047931671143,grad_norm: 0.9139733108980785, iteration: 310780
loss: 0.9675407409667969,grad_norm: 0.9067106032003873, iteration: 310781
loss: 0.9954262375831604,grad_norm: 0.9438013950274837, iteration: 310782
loss: 0.9826273322105408,grad_norm: 0.883281905674206, iteration: 310783
loss: 1.0070405006408691,grad_norm: 0.9632962836879484, iteration: 310784
loss: 0.993359386920929,grad_norm: 0.8367369531497706, iteration: 310785
loss: 0.9609793424606323,grad_norm: 0.9096680741538783, iteration: 310786
loss: 1.0030438899993896,grad_norm: 0.9999991058791295, iteration: 310787
loss: 0.9638072848320007,grad_norm: 0.7422878008746265, iteration: 310788
loss: 1.0873894691467285,grad_norm: 0.9999990318003146, iteration: 310789
loss: 1.0501325130462646,grad_norm: 0.8450354248361651, iteration: 310790
loss: 1.0876270532608032,grad_norm: 0.9999995724580583, iteration: 310791
loss: 1.0163609981536865,grad_norm: 0.9999995219984177, iteration: 310792
loss: 0.9751903414726257,grad_norm: 0.7273100221070405, iteration: 310793
loss: 1.0368974208831787,grad_norm: 0.8767407504020969, iteration: 310794
loss: 1.020021915435791,grad_norm: 0.9999992385312076, iteration: 310795
loss: 1.127299427986145,grad_norm: 0.9999994022928924, iteration: 310796
loss: 1.0057686567306519,grad_norm: 0.8698876205448183, iteration: 310797
loss: 0.9928030967712402,grad_norm: 0.9450302226446197, iteration: 310798
loss: 1.005104660987854,grad_norm: 0.7850785843460163, iteration: 310799
loss: 1.0712538957595825,grad_norm: 0.8762694474187588, iteration: 310800
loss: 0.9579596519470215,grad_norm: 0.7646538273629141, iteration: 310801
loss: 0.9432228803634644,grad_norm: 0.9191918002810406, iteration: 310802
loss: 0.9615839719772339,grad_norm: 0.8382145741730942, iteration: 310803
loss: 1.0039877891540527,grad_norm: 0.8223423615887144, iteration: 310804
loss: 0.9322831630706787,grad_norm: 0.9037992154455246, iteration: 310805
loss: 0.9980539083480835,grad_norm: 0.771850174418332, iteration: 310806
loss: 0.9885382056236267,grad_norm: 0.8592585178978065, iteration: 310807
loss: 1.0588312149047852,grad_norm: 0.9999991011858929, iteration: 310808
loss: 1.0852949619293213,grad_norm: 0.9999992940073169, iteration: 310809
loss: 0.9655718803405762,grad_norm: 0.9319084671622919, iteration: 310810
loss: 1.0021308660507202,grad_norm: 0.7166159514696061, iteration: 310811
loss: 1.0279536247253418,grad_norm: 0.9459651959131377, iteration: 310812
loss: 1.0256493091583252,grad_norm: 0.9999996794581814, iteration: 310813
loss: 0.9908746480941772,grad_norm: 0.7721775061847052, iteration: 310814
loss: 1.0216270685195923,grad_norm: 0.999999520432752, iteration: 310815
loss: 0.9958694577217102,grad_norm: 0.6632531197673117, iteration: 310816
loss: 0.9412413239479065,grad_norm: 0.874067546859205, iteration: 310817
loss: 1.0077247619628906,grad_norm: 0.7697391823946814, iteration: 310818
loss: 1.0716122388839722,grad_norm: 0.9999990510235393, iteration: 310819
loss: 1.050087809562683,grad_norm: 0.9999990533978689, iteration: 310820
loss: 0.9824106693267822,grad_norm: 0.7974244890768782, iteration: 310821
loss: 1.1032296419143677,grad_norm: 0.9999995700289631, iteration: 310822
loss: 1.019957184791565,grad_norm: 0.8879064744308063, iteration: 310823
loss: 1.047312617301941,grad_norm: 0.9999998702561758, iteration: 310824
loss: 0.9956650733947754,grad_norm: 0.727724662538883, iteration: 310825
loss: 0.9843466877937317,grad_norm: 0.955863823312912, iteration: 310826
loss: 0.9653362035751343,grad_norm: 0.9999991223733133, iteration: 310827
loss: 1.0473392009735107,grad_norm: 0.9999992715027956, iteration: 310828
loss: 1.0588648319244385,grad_norm: 0.9999991469651571, iteration: 310829
loss: 1.0163146257400513,grad_norm: 0.786427240022455, iteration: 310830
loss: 1.0152497291564941,grad_norm: 0.8307672073920351, iteration: 310831
loss: 0.9845768809318542,grad_norm: 0.9039885224723517, iteration: 310832
loss: 1.0002453327178955,grad_norm: 0.8216112417842036, iteration: 310833
loss: 1.0166047811508179,grad_norm: 0.7866380627204644, iteration: 310834
loss: 1.0056155920028687,grad_norm: 0.8340908273416712, iteration: 310835
loss: 1.0399552583694458,grad_norm: 0.818940374360638, iteration: 310836
loss: 0.9889536499977112,grad_norm: 0.8215357812648355, iteration: 310837
loss: 1.020689845085144,grad_norm: 0.9999991978544388, iteration: 310838
loss: 1.0103330612182617,grad_norm: 0.8069962098742456, iteration: 310839
loss: 0.9765005707740784,grad_norm: 0.9611637673955378, iteration: 310840
loss: 1.0015959739685059,grad_norm: 0.9999993975102134, iteration: 310841
loss: 1.0215930938720703,grad_norm: 0.9999994314668651, iteration: 310842
loss: 1.0121455192565918,grad_norm: 0.8057035128441496, iteration: 310843
loss: 0.9848582148551941,grad_norm: 0.7524190026341452, iteration: 310844
loss: 1.0168946981430054,grad_norm: 0.845423817203187, iteration: 310845
loss: 0.9936442375183105,grad_norm: 0.7724588635408269, iteration: 310846
loss: 0.97859787940979,grad_norm: 0.8381008058034678, iteration: 310847
loss: 1.0205878019332886,grad_norm: 0.8164130531454452, iteration: 310848
loss: 0.9903228878974915,grad_norm: 0.7296652366132254, iteration: 310849
loss: 1.0003950595855713,grad_norm: 0.8220109461427644, iteration: 310850
loss: 1.0238873958587646,grad_norm: 0.8784112107412696, iteration: 310851
loss: 1.0740817785263062,grad_norm: 0.9999998656375404, iteration: 310852
loss: 1.0753395557403564,grad_norm: 0.928135259254664, iteration: 310853
loss: 1.0265147686004639,grad_norm: 0.8512380411436128, iteration: 310854
loss: 1.0431749820709229,grad_norm: 0.9485236701128094, iteration: 310855
loss: 1.0055526494979858,grad_norm: 0.7966064784753359, iteration: 310856
loss: 1.0389950275421143,grad_norm: 0.9763721909902671, iteration: 310857
loss: 1.0016542673110962,grad_norm: 0.8776724365465124, iteration: 310858
loss: 1.0332144498825073,grad_norm: 0.7845113207093549, iteration: 310859
loss: 1.0098206996917725,grad_norm: 0.7607774264299397, iteration: 310860
loss: 1.0118707418441772,grad_norm: 0.9769391978210129, iteration: 310861
loss: 1.011481523513794,grad_norm: 0.8545255211948506, iteration: 310862
loss: 0.9992342591285706,grad_norm: 0.8360141982695551, iteration: 310863
loss: 0.9646221399307251,grad_norm: 0.9999990270293995, iteration: 310864
loss: 1.0179392099380493,grad_norm: 0.974926363969156, iteration: 310865
loss: 1.0286799669265747,grad_norm: 0.7469247770321036, iteration: 310866
loss: 0.9926400780677795,grad_norm: 0.8238991971390669, iteration: 310867
loss: 1.0440949201583862,grad_norm: 0.9999991262032473, iteration: 310868
loss: 1.0620838403701782,grad_norm: 0.7685303320558518, iteration: 310869
loss: 1.0359561443328857,grad_norm: 0.9999991108586759, iteration: 310870
loss: 0.9845504760742188,grad_norm: 0.9608141553402053, iteration: 310871
loss: 1.0207699537277222,grad_norm: 0.8324808499364423, iteration: 310872
loss: 1.0661015510559082,grad_norm: 0.9480729668633456, iteration: 310873
loss: 1.0437694787979126,grad_norm: 0.8596474471110143, iteration: 310874
loss: 0.9944204092025757,grad_norm: 0.8600889747620392, iteration: 310875
loss: 0.9941102862358093,grad_norm: 0.9999992126653731, iteration: 310876
loss: 1.048058271408081,grad_norm: 0.80903471709351, iteration: 310877
loss: 1.0028389692306519,grad_norm: 0.9999992129793043, iteration: 310878
loss: 0.9760783314704895,grad_norm: 0.7193380149540641, iteration: 310879
loss: 0.9997495412826538,grad_norm: 0.712297281543541, iteration: 310880
loss: 1.0143324136734009,grad_norm: 0.8471466189819399, iteration: 310881
loss: 1.0078016519546509,grad_norm: 0.8355266168977636, iteration: 310882
loss: 1.031952977180481,grad_norm: 0.9344541378926698, iteration: 310883
loss: 0.962352991104126,grad_norm: 0.7405480237660889, iteration: 310884
loss: 1.0197545289993286,grad_norm: 0.942828541195209, iteration: 310885
loss: 0.9859508275985718,grad_norm: 0.7616152463538586, iteration: 310886
loss: 1.0734351873397827,grad_norm: 0.9999998516970415, iteration: 310887
loss: 0.9712702035903931,grad_norm: 0.809579696341915, iteration: 310888
loss: 0.9905218482017517,grad_norm: 0.8142643679081292, iteration: 310889
loss: 1.0290837287902832,grad_norm: 0.7918657015490389, iteration: 310890
loss: 1.0093194246292114,grad_norm: 0.702071399368642, iteration: 310891
loss: 1.0112347602844238,grad_norm: 0.9999990248525016, iteration: 310892
loss: 0.9971609115600586,grad_norm: 0.9347162280971698, iteration: 310893
loss: 1.0164875984191895,grad_norm: 0.9767495306794067, iteration: 310894
loss: 0.9914725422859192,grad_norm: 0.7976232752366499, iteration: 310895
loss: 1.013843059539795,grad_norm: 0.7948305298581206, iteration: 310896
loss: 1.0219154357910156,grad_norm: 0.8503421282294719, iteration: 310897
loss: 1.016102910041809,grad_norm: 0.7487888646273667, iteration: 310898
loss: 0.9865490198135376,grad_norm: 0.8522492858092349, iteration: 310899
loss: 0.988579511642456,grad_norm: 0.8661509199257268, iteration: 310900
loss: 1.0185062885284424,grad_norm: 0.9199975458487316, iteration: 310901
loss: 0.9752122163772583,grad_norm: 0.9999991507361407, iteration: 310902
loss: 1.0349414348602295,grad_norm: 0.9215077754342208, iteration: 310903
loss: 1.0353235006332397,grad_norm: 0.9999996026355109, iteration: 310904
loss: 1.0413899421691895,grad_norm: 0.9999995363791122, iteration: 310905
loss: 0.9733828902244568,grad_norm: 0.8677650052317463, iteration: 310906
loss: 1.0116220712661743,grad_norm: 0.9999991101823241, iteration: 310907
loss: 1.0227266550064087,grad_norm: 0.8809793860971573, iteration: 310908
loss: 1.0201493501663208,grad_norm: 0.821714578107237, iteration: 310909
loss: 0.9880933165550232,grad_norm: 0.8347764884212735, iteration: 310910
loss: 0.9929073452949524,grad_norm: 0.9158432725933803, iteration: 310911
loss: 1.016079068183899,grad_norm: 0.7934400313739203, iteration: 310912
loss: 1.0076205730438232,grad_norm: 0.9999991999712775, iteration: 310913
loss: 1.1954737901687622,grad_norm: 0.9999998876600895, iteration: 310914
loss: 1.0419976711273193,grad_norm: 0.992049181637365, iteration: 310915
loss: 0.9917348623275757,grad_norm: 0.999999018168989, iteration: 310916
loss: 1.0163573026657104,grad_norm: 0.8770674459852366, iteration: 310917
loss: 1.0117723941802979,grad_norm: 0.9199286136491078, iteration: 310918
loss: 1.0650994777679443,grad_norm: 0.9999993547019411, iteration: 310919
loss: 0.9887719750404358,grad_norm: 0.9816303175980381, iteration: 310920
loss: 1.0387500524520874,grad_norm: 0.8118496683929032, iteration: 310921
loss: 1.0323925018310547,grad_norm: 0.9421526816595822, iteration: 310922
loss: 0.9810971617698669,grad_norm: 0.8728488008519092, iteration: 310923
loss: 1.0050058364868164,grad_norm: 0.8198418705656462, iteration: 310924
loss: 0.9666413068771362,grad_norm: 0.9826008042839671, iteration: 310925
loss: 0.9956414699554443,grad_norm: 0.782027207805194, iteration: 310926
loss: 1.0283927917480469,grad_norm: 0.950325846488642, iteration: 310927
loss: 1.0221666097640991,grad_norm: 0.8791227308774898, iteration: 310928
loss: 1.0800906419754028,grad_norm: 0.9999994214139231, iteration: 310929
loss: 0.9430063366889954,grad_norm: 0.8429038361403152, iteration: 310930
loss: 1.0163884162902832,grad_norm: 0.9302582066114441, iteration: 310931
loss: 1.037361741065979,grad_norm: 0.9999991032313245, iteration: 310932
loss: 0.965654194355011,grad_norm: 0.7963348824713715, iteration: 310933
loss: 0.9902011156082153,grad_norm: 0.7274284180160387, iteration: 310934
loss: 0.9980672597885132,grad_norm: 0.9498883743392377, iteration: 310935
loss: 1.0246431827545166,grad_norm: 0.9999992926661652, iteration: 310936
loss: 1.0123263597488403,grad_norm: 0.8181866628272673, iteration: 310937
loss: 0.9743818044662476,grad_norm: 0.9008859087560084, iteration: 310938
loss: 1.0241562128067017,grad_norm: 0.7476391990721053, iteration: 310939
loss: 0.9890681505203247,grad_norm: 0.891173451029887, iteration: 310940
loss: 0.9881696105003357,grad_norm: 0.9999997363292731, iteration: 310941
loss: 0.9622727036476135,grad_norm: 0.7954406704959173, iteration: 310942
loss: 1.0072845220565796,grad_norm: 0.816871739457731, iteration: 310943
loss: 0.9971115589141846,grad_norm: 0.86898352190863, iteration: 310944
loss: 1.007423758506775,grad_norm: 0.8361057258123001, iteration: 310945
loss: 1.046380639076233,grad_norm: 0.9999997205377233, iteration: 310946
loss: 1.0046865940093994,grad_norm: 0.7200155256391094, iteration: 310947
loss: 1.0061641931533813,grad_norm: 0.8015681962914251, iteration: 310948
loss: 1.0068711042404175,grad_norm: 0.878076612641056, iteration: 310949
loss: 1.0594298839569092,grad_norm: 0.9999991823761276, iteration: 310950
loss: 1.0191895961761475,grad_norm: 0.9999997646347644, iteration: 310951
loss: 1.021576166152954,grad_norm: 0.7613097845314103, iteration: 310952
loss: 1.0179839134216309,grad_norm: 0.9999996113134694, iteration: 310953
loss: 1.001075029373169,grad_norm: 0.7641820975823238, iteration: 310954
loss: 1.0347641706466675,grad_norm: 0.9209796029990333, iteration: 310955
loss: 1.0108309984207153,grad_norm: 0.874717458658243, iteration: 310956
loss: 0.9800089001655579,grad_norm: 0.7759743066066785, iteration: 310957
loss: 1.0030746459960938,grad_norm: 0.8045205491805174, iteration: 310958
loss: 0.9987524747848511,grad_norm: 0.8255207579509922, iteration: 310959
loss: 0.9910998940467834,grad_norm: 0.694941202803017, iteration: 310960
loss: 1.0197689533233643,grad_norm: 0.8586036589509957, iteration: 310961
loss: 1.000930905342102,grad_norm: 0.7455648667645365, iteration: 310962
loss: 1.0240641832351685,grad_norm: 0.996343193856813, iteration: 310963
loss: 1.0355234146118164,grad_norm: 0.9367103260473224, iteration: 310964
loss: 0.9940292835235596,grad_norm: 0.9999991173716171, iteration: 310965
loss: 0.9796744585037231,grad_norm: 0.9124613242447337, iteration: 310966
loss: 1.0110996961593628,grad_norm: 0.9999990699617, iteration: 310967
loss: 1.037833571434021,grad_norm: 0.8423511697199035, iteration: 310968
loss: 1.0187946557998657,grad_norm: 0.9419148371483831, iteration: 310969
loss: 0.979954183101654,grad_norm: 0.7650960269884266, iteration: 310970
loss: 0.9824718832969666,grad_norm: 0.845692048136056, iteration: 310971
loss: 0.986839234828949,grad_norm: 0.999999725736799, iteration: 310972
loss: 1.023876667022705,grad_norm: 0.9999990702332063, iteration: 310973
loss: 1.0218068361282349,grad_norm: 0.7779250899274716, iteration: 310974
loss: 1.1901006698608398,grad_norm: 0.9999997398286198, iteration: 310975
loss: 1.003173828125,grad_norm: 0.7903272220887488, iteration: 310976
loss: 1.0114290714263916,grad_norm: 0.856613892931632, iteration: 310977
loss: 1.0255308151245117,grad_norm: 0.8333154206764422, iteration: 310978
loss: 1.0447795391082764,grad_norm: 0.9999998373122168, iteration: 310979
loss: 0.9963251352310181,grad_norm: 0.8768020204714251, iteration: 310980
loss: 0.9905179142951965,grad_norm: 0.939690754955403, iteration: 310981
loss: 1.0455445051193237,grad_norm: 0.7640327065867133, iteration: 310982
loss: 0.9942513108253479,grad_norm: 0.9025265851925591, iteration: 310983
loss: 1.0270161628723145,grad_norm: 0.9999990780831326, iteration: 310984
loss: 0.9918836355209351,grad_norm: 0.9474279526654893, iteration: 310985
loss: 0.9773791432380676,grad_norm: 0.7783783409501883, iteration: 310986
loss: 0.9813183546066284,grad_norm: 0.8956255080525344, iteration: 310987
loss: 0.97091144323349,grad_norm: 0.9480283208327598, iteration: 310988
loss: 0.9944702982902527,grad_norm: 0.7576973577753308, iteration: 310989
loss: 1.0639355182647705,grad_norm: 0.8020990541647013, iteration: 310990
loss: 1.031631588935852,grad_norm: 0.7557528684180207, iteration: 310991
loss: 0.9900906682014465,grad_norm: 0.8509342324517795, iteration: 310992
loss: 1.0391498804092407,grad_norm: 0.8982895307257655, iteration: 310993
loss: 0.9924928545951843,grad_norm: 0.9252043816503629, iteration: 310994
loss: 1.0775699615478516,grad_norm: 0.9999993901314811, iteration: 310995
loss: 0.9953527450561523,grad_norm: 0.9999992170961383, iteration: 310996
loss: 0.9843447804450989,grad_norm: 0.7922838804249382, iteration: 310997
loss: 0.9799121618270874,grad_norm: 0.9287792735852483, iteration: 310998
loss: 1.00916588306427,grad_norm: 0.712476952293054, iteration: 310999
loss: 1.039458990097046,grad_norm: 0.935726282111973, iteration: 311000
loss: 1.016830563545227,grad_norm: 0.817084875288615, iteration: 311001
loss: 1.0190328359603882,grad_norm: 0.9108763132243669, iteration: 311002
loss: 1.0158120393753052,grad_norm: 0.8574507032882642, iteration: 311003
loss: 1.0379812717437744,grad_norm: 0.7595602783416491, iteration: 311004
loss: 1.0023621320724487,grad_norm: 0.7426273464049403, iteration: 311005
loss: 0.9931491017341614,grad_norm: 0.8537067992930646, iteration: 311006
loss: 0.9631200432777405,grad_norm: 0.7569935274808178, iteration: 311007
loss: 0.9728261828422546,grad_norm: 0.8973200299648519, iteration: 311008
loss: 0.9783985614776611,grad_norm: 0.999999109430073, iteration: 311009
loss: 1.0650330781936646,grad_norm: 0.9999992208083717, iteration: 311010
loss: 0.9887240529060364,grad_norm: 0.9999990445936932, iteration: 311011
loss: 0.9668919444084167,grad_norm: 0.8811642791751589, iteration: 311012
loss: 0.9897952675819397,grad_norm: 0.7552337751788553, iteration: 311013
loss: 1.01700758934021,grad_norm: 0.9264751496718642, iteration: 311014
loss: 0.9848906397819519,grad_norm: 0.8417152412134618, iteration: 311015
loss: 1.0488085746765137,grad_norm: 0.8311831230016467, iteration: 311016
loss: 1.047385811805725,grad_norm: 0.999999849487911, iteration: 311017
loss: 1.0095726251602173,grad_norm: 0.9048614327473865, iteration: 311018
loss: 1.0271450281143188,grad_norm: 0.9309434155103067, iteration: 311019
loss: 1.0563321113586426,grad_norm: 0.999999137916047, iteration: 311020
loss: 0.9887951016426086,grad_norm: 0.8423687436070796, iteration: 311021
loss: 1.011562705039978,grad_norm: 0.7630991612171726, iteration: 311022
loss: 1.0173676013946533,grad_norm: 0.8297829022954158, iteration: 311023
loss: 1.0110774040222168,grad_norm: 0.6668710828590881, iteration: 311024
loss: 0.999300479888916,grad_norm: 0.8477714145383575, iteration: 311025
loss: 1.0006048679351807,grad_norm: 0.8666691922898232, iteration: 311026
loss: 0.9775579571723938,grad_norm: 0.825681321207599, iteration: 311027
loss: 1.1017400026321411,grad_norm: 0.8873887152599138, iteration: 311028
loss: 0.9743806719779968,grad_norm: 0.7465001247278438, iteration: 311029
loss: 1.0662447214126587,grad_norm: 0.7798353413781137, iteration: 311030
loss: 1.0441279411315918,grad_norm: 0.99999914915383, iteration: 311031
loss: 1.0075408220291138,grad_norm: 0.7868593031491142, iteration: 311032
loss: 0.9926117658615112,grad_norm: 0.9482284145704106, iteration: 311033
loss: 1.0056687593460083,grad_norm: 0.6692093732147109, iteration: 311034
loss: 1.0393331050872803,grad_norm: 0.9308493064064466, iteration: 311035
loss: 1.107109785079956,grad_norm: 0.999999237021239, iteration: 311036
loss: 1.0318851470947266,grad_norm: 0.8676416742417026, iteration: 311037
loss: 0.9854509830474854,grad_norm: 0.7555728024173536, iteration: 311038
loss: 1.0189507007598877,grad_norm: 0.9999990259357286, iteration: 311039
loss: 0.9747732877731323,grad_norm: 0.7003029086399752, iteration: 311040
loss: 0.987804114818573,grad_norm: 0.9999990292788121, iteration: 311041
loss: 0.9797611832618713,grad_norm: 0.690654499770386, iteration: 311042
loss: 0.9920374751091003,grad_norm: 0.8954098947713199, iteration: 311043
loss: 1.0043812990188599,grad_norm: 0.7884167052128505, iteration: 311044
loss: 0.9833491444587708,grad_norm: 0.7730997267475603, iteration: 311045
loss: 0.9986769556999207,grad_norm: 0.884352263207254, iteration: 311046
loss: 1.0433590412139893,grad_norm: 0.9092691921628155, iteration: 311047
loss: 0.9994258284568787,grad_norm: 0.6740426184286737, iteration: 311048
loss: 1.042330026626587,grad_norm: 0.9999993092730904, iteration: 311049
loss: 1.0104068517684937,grad_norm: 0.8653477488831527, iteration: 311050
loss: 1.021087884902954,grad_norm: 0.9999998887335172, iteration: 311051
loss: 1.0073167085647583,grad_norm: 0.8145486790374751, iteration: 311052
loss: 0.9570785760879517,grad_norm: 0.9254795310807762, iteration: 311053
loss: 1.0104924440383911,grad_norm: 0.8309645375101355, iteration: 311054
loss: 0.9966028332710266,grad_norm: 0.8860058744566235, iteration: 311055
loss: 1.0014333724975586,grad_norm: 0.9497735240934361, iteration: 311056
loss: 1.0618269443511963,grad_norm: 0.9999990323662933, iteration: 311057
loss: 0.9789252877235413,grad_norm: 0.9513648364789764, iteration: 311058
loss: 0.9709042906761169,grad_norm: 0.7585406108756119, iteration: 311059
loss: 0.9837328791618347,grad_norm: 0.7402886596563554, iteration: 311060
loss: 1.0313667058944702,grad_norm: 0.7665069813632652, iteration: 311061
loss: 1.0255846977233887,grad_norm: 0.9110475914214721, iteration: 311062
loss: 0.9951550364494324,grad_norm: 0.9685191521575977, iteration: 311063
loss: 1.0065577030181885,grad_norm: 0.7582049584676074, iteration: 311064
loss: 0.9937506914138794,grad_norm: 0.8595244950086206, iteration: 311065
loss: 0.9790496230125427,grad_norm: 0.6619625776764415, iteration: 311066
loss: 0.9751628637313843,grad_norm: 0.8419905537926795, iteration: 311067
loss: 1.0452375411987305,grad_norm: 0.9133534330024529, iteration: 311068
loss: 0.9892711043357849,grad_norm: 0.8347996981529763, iteration: 311069
loss: 1.019384503364563,grad_norm: 0.9999996122748265, iteration: 311070
loss: 0.9592639207839966,grad_norm: 0.7855215300345514, iteration: 311071
loss: 0.9819183945655823,grad_norm: 0.9527542406679047, iteration: 311072
loss: 1.0153337717056274,grad_norm: 0.8859917082772965, iteration: 311073
loss: 1.079098105430603,grad_norm: 0.9999993916988479, iteration: 311074
loss: 1.0593211650848389,grad_norm: 0.999999312312801, iteration: 311075
loss: 1.1679232120513916,grad_norm: 1.00000001192174, iteration: 311076
loss: 1.0235456228256226,grad_norm: 0.9999996168527946, iteration: 311077
loss: 1.0254827737808228,grad_norm: 0.8741190276092902, iteration: 311078
loss: 1.019948959350586,grad_norm: 0.7480240152064795, iteration: 311079
loss: 1.0419715642929077,grad_norm: 0.9999991870876684, iteration: 311080
loss: 0.9722864031791687,grad_norm: 0.7996225965389651, iteration: 311081
loss: 1.035597324371338,grad_norm: 0.8842421783947234, iteration: 311082
loss: 1.0309971570968628,grad_norm: 0.9999993132797561, iteration: 311083
loss: 0.9866555333137512,grad_norm: 0.9999996830730571, iteration: 311084
loss: 0.9930122494697571,grad_norm: 0.9302461266824832, iteration: 311085
loss: 1.0072544813156128,grad_norm: 0.8529614387390675, iteration: 311086
loss: 0.9753510355949402,grad_norm: 0.9999991363774107, iteration: 311087
loss: 0.9795053601264954,grad_norm: 0.8619027873120693, iteration: 311088
loss: 0.9943500757217407,grad_norm: 0.8480446194096399, iteration: 311089
loss: 1.1270523071289062,grad_norm: 0.9999993842380106, iteration: 311090
loss: 0.9990317821502686,grad_norm: 0.7161238628422297, iteration: 311091
loss: 1.0112435817718506,grad_norm: 0.861518983119385, iteration: 311092
loss: 1.0173652172088623,grad_norm: 0.7301209297146171, iteration: 311093
loss: 1.0477906465530396,grad_norm: 0.9498527911642702, iteration: 311094
loss: 0.9939600229263306,grad_norm: 0.935441019801876, iteration: 311095
loss: 0.9933450222015381,grad_norm: 0.924999948621864, iteration: 311096
loss: 1.0209946632385254,grad_norm: 0.9999995200836082, iteration: 311097
loss: 1.002551794052124,grad_norm: 0.9011887021129866, iteration: 311098
loss: 1.0004479885101318,grad_norm: 0.9787637867270524, iteration: 311099
loss: 0.9795770645141602,grad_norm: 0.7871045728868526, iteration: 311100
loss: 1.0225638151168823,grad_norm: 0.9999994646410553, iteration: 311101
loss: 1.0017173290252686,grad_norm: 0.8107747478112245, iteration: 311102
loss: 0.9471278190612793,grad_norm: 0.8493963252968667, iteration: 311103
loss: 1.0245989561080933,grad_norm: 0.9156244655575787, iteration: 311104
loss: 1.1779710054397583,grad_norm: 0.9999991500150976, iteration: 311105
loss: 0.983505368232727,grad_norm: 0.9999995160818976, iteration: 311106
loss: 0.9981484413146973,grad_norm: 0.9999990565667766, iteration: 311107
loss: 1.0173628330230713,grad_norm: 0.8230717489631515, iteration: 311108
loss: 0.979114294052124,grad_norm: 0.9166564161401621, iteration: 311109
loss: 1.0958259105682373,grad_norm: 0.9999992712796549, iteration: 311110
loss: 0.9940675497055054,grad_norm: 0.8309843223575362, iteration: 311111
loss: 1.010200023651123,grad_norm: 0.7946523857556801, iteration: 311112
loss: 1.084341049194336,grad_norm: 0.999999801044278, iteration: 311113
loss: 0.9896277189254761,grad_norm: 0.9999990424122549, iteration: 311114
loss: 1.0038964748382568,grad_norm: 0.8469100729601, iteration: 311115
loss: 1.0214767456054688,grad_norm: 0.8126267277550603, iteration: 311116
loss: 0.9929367303848267,grad_norm: 0.747345878387, iteration: 311117
loss: 0.9904322624206543,grad_norm: 0.7682618446170976, iteration: 311118
loss: 0.9877784252166748,grad_norm: 0.9999993935495423, iteration: 311119
loss: 1.032153844833374,grad_norm: 0.7525256881338449, iteration: 311120
loss: 1.0721122026443481,grad_norm: 0.9589617121433632, iteration: 311121
loss: 1.0283499956130981,grad_norm: 0.9952387323328651, iteration: 311122
loss: 1.0239282846450806,grad_norm: 0.999999194976493, iteration: 311123
loss: 0.993269681930542,grad_norm: 0.9999991348635837, iteration: 311124
loss: 1.0338836908340454,grad_norm: 0.9999990340191591, iteration: 311125
loss: 1.0337904691696167,grad_norm: 0.9318549471111581, iteration: 311126
loss: 1.0002280473709106,grad_norm: 0.8696756601137118, iteration: 311127
loss: 1.0467485189437866,grad_norm: 0.938766945489912, iteration: 311128
loss: 1.0058778524398804,grad_norm: 0.9999997783151335, iteration: 311129
loss: 1.036627173423767,grad_norm: 0.7156026621347918, iteration: 311130
loss: 1.0099941492080688,grad_norm: 0.8381515000233178, iteration: 311131
loss: 1.05743408203125,grad_norm: 0.9219528803096771, iteration: 311132
loss: 1.0220675468444824,grad_norm: 0.99999925182422, iteration: 311133
loss: 1.0348091125488281,grad_norm: 0.7940447474766049, iteration: 311134
loss: 1.0097126960754395,grad_norm: 0.8902409232015783, iteration: 311135
loss: 1.0036383867263794,grad_norm: 0.9999998129817596, iteration: 311136
loss: 1.0840692520141602,grad_norm: 0.9999991334222339, iteration: 311137
loss: 1.0216728448867798,grad_norm: 0.9936239016774533, iteration: 311138
loss: 1.000661849975586,grad_norm: 0.9369844945587156, iteration: 311139
loss: 1.0072435140609741,grad_norm: 0.8466693454478247, iteration: 311140
loss: 1.028018593788147,grad_norm: 0.8679874077847037, iteration: 311141
loss: 1.0625543594360352,grad_norm: 0.9999993286199014, iteration: 311142
loss: 0.9979350566864014,grad_norm: 0.7052183398071113, iteration: 311143
loss: 1.061676263809204,grad_norm: 0.999999864864515, iteration: 311144
loss: 0.9878302812576294,grad_norm: 0.8357086720728227, iteration: 311145
loss: 1.0018424987792969,grad_norm: 0.7937996932896317, iteration: 311146
loss: 1.0272587537765503,grad_norm: 0.904638240534988, iteration: 311147
loss: 0.9832417964935303,grad_norm: 0.955325351426961, iteration: 311148
loss: 1.0231034755706787,grad_norm: 0.7665533599698738, iteration: 311149
loss: 1.0314903259277344,grad_norm: 0.9613013528805613, iteration: 311150
loss: 1.0434856414794922,grad_norm: 0.76625603953603, iteration: 311151
loss: 0.9971565008163452,grad_norm: 0.7965163181161408, iteration: 311152
loss: 1.0169211626052856,grad_norm: 0.9532837596223677, iteration: 311153
loss: 0.9940616488456726,grad_norm: 0.8710784115725895, iteration: 311154
loss: 0.9931025505065918,grad_norm: 0.9533420549215055, iteration: 311155
loss: 0.9893096089363098,grad_norm: 0.7378718168949318, iteration: 311156
loss: 0.9972994327545166,grad_norm: 0.7886288772017979, iteration: 311157
loss: 1.0203886032104492,grad_norm: 0.9999991253620815, iteration: 311158
loss: 0.992131233215332,grad_norm: 0.9145960270869073, iteration: 311159
loss: 1.0371698141098022,grad_norm: 0.8668296334692773, iteration: 311160
loss: 0.9838871955871582,grad_norm: 0.8396375788400893, iteration: 311161
loss: 0.991672933101654,grad_norm: 0.9989036591767678, iteration: 311162
loss: 0.9911416172981262,grad_norm: 0.9999997173216647, iteration: 311163
loss: 1.0272530317306519,grad_norm: 0.8076904521470359, iteration: 311164
loss: 1.015550136566162,grad_norm: 0.8443616610838666, iteration: 311165
loss: 1.0095961093902588,grad_norm: 0.9999990397882131, iteration: 311166
loss: 0.9961378574371338,grad_norm: 0.9999999419792575, iteration: 311167
loss: 1.2144379615783691,grad_norm: 0.999999586374547, iteration: 311168
loss: 1.0113389492034912,grad_norm: 0.8896510959253479, iteration: 311169
loss: 1.041481614112854,grad_norm: 0.9999990308456602, iteration: 311170
loss: 1.0015639066696167,grad_norm: 0.7936012030172573, iteration: 311171
loss: 1.0294688940048218,grad_norm: 0.7316901874579131, iteration: 311172
loss: 0.983068585395813,grad_norm: 0.847167347873532, iteration: 311173
loss: 0.9902794361114502,grad_norm: 0.8829486901897157, iteration: 311174
loss: 1.0277049541473389,grad_norm: 0.999999844532015, iteration: 311175
loss: 1.0212599039077759,grad_norm: 0.9570668207568187, iteration: 311176
loss: 1.0084609985351562,grad_norm: 0.7925344775016798, iteration: 311177
loss: 0.9846096038818359,grad_norm: 0.9280920376654299, iteration: 311178
loss: 1.0401502847671509,grad_norm: 0.7221665675203527, iteration: 311179
loss: 0.9965789318084717,grad_norm: 0.9112528282364609, iteration: 311180
loss: 0.984324038028717,grad_norm: 0.8137003650571725, iteration: 311181
loss: 1.1041513681411743,grad_norm: 0.9816837292694994, iteration: 311182
loss: 0.9909386038780212,grad_norm: 0.9254715965275174, iteration: 311183
loss: 1.0085384845733643,grad_norm: 0.9362802056017796, iteration: 311184
loss: 0.9837763905525208,grad_norm: 0.9330436074190335, iteration: 311185
loss: 1.0110890865325928,grad_norm: 0.9022124089722715, iteration: 311186
loss: 1.016932487487793,grad_norm: 0.7313489627021145, iteration: 311187
loss: 1.0166891813278198,grad_norm: 0.868006239524709, iteration: 311188
loss: 1.0080819129943848,grad_norm: 0.9999990560476766, iteration: 311189
loss: 1.020638346672058,grad_norm: 0.7849197587632397, iteration: 311190
loss: 0.9793708920478821,grad_norm: 0.800062173664119, iteration: 311191
loss: 0.9908879399299622,grad_norm: 0.9850699508801243, iteration: 311192
loss: 0.9972169399261475,grad_norm: 0.7589833012756464, iteration: 311193
loss: 0.9648128747940063,grad_norm: 0.9201391293150636, iteration: 311194
loss: 0.9917077422142029,grad_norm: 0.7372900126945456, iteration: 311195
loss: 0.9637200832366943,grad_norm: 0.9594276887894966, iteration: 311196
loss: 1.0251951217651367,grad_norm: 0.7456403276261446, iteration: 311197
loss: 0.9830955862998962,grad_norm: 0.7433281685074118, iteration: 311198
loss: 0.9857969880104065,grad_norm: 0.7028515472055017, iteration: 311199
loss: 1.023988127708435,grad_norm: 0.8032808307429602, iteration: 311200
loss: 1.1098250150680542,grad_norm: 0.9999993949890587, iteration: 311201
loss: 1.0661673545837402,grad_norm: 0.9999997314935732, iteration: 311202
loss: 1.0186506509780884,grad_norm: 0.8305893431820994, iteration: 311203
loss: 1.026738166809082,grad_norm: 0.87777026916183, iteration: 311204
loss: 0.961418628692627,grad_norm: 0.8419887802973097, iteration: 311205
loss: 1.0234562158584595,grad_norm: 0.9717764560499023, iteration: 311206
loss: 1.0363355875015259,grad_norm: 0.999999682145953, iteration: 311207
loss: 0.9973196983337402,grad_norm: 0.8460840488305146, iteration: 311208
loss: 1.0018540620803833,grad_norm: 0.8641699016017043, iteration: 311209
loss: 1.006505012512207,grad_norm: 0.9999989948307533, iteration: 311210
loss: 0.991748571395874,grad_norm: 0.8295733395508297, iteration: 311211
loss: 0.9902826547622681,grad_norm: 0.8655506216210023, iteration: 311212
loss: 1.0326530933380127,grad_norm: 0.740389581070446, iteration: 311213
loss: 0.996105968952179,grad_norm: 0.917295588097576, iteration: 311214
loss: 1.010297417640686,grad_norm: 0.9724731038760087, iteration: 311215
loss: 1.006751537322998,grad_norm: 0.8155325314405379, iteration: 311216
loss: 1.0185109376907349,grad_norm: 0.9999989767628461, iteration: 311217
loss: 0.9627671241760254,grad_norm: 0.9194677474730005, iteration: 311218
loss: 0.9924232363700867,grad_norm: 0.8000254485877919, iteration: 311219
loss: 1.0055567026138306,grad_norm: 0.9017017813496593, iteration: 311220
loss: 1.0818601846694946,grad_norm: 0.9038541445009269, iteration: 311221
loss: 0.9512822031974792,grad_norm: 0.8758023001983924, iteration: 311222
loss: 0.9861941337585449,grad_norm: 0.7659217866455842, iteration: 311223
loss: 0.988382875919342,grad_norm: 0.824328551951414, iteration: 311224
loss: 1.0002655982971191,grad_norm: 0.7704388835250434, iteration: 311225
loss: 1.026528000831604,grad_norm: 0.9165921811440678, iteration: 311226
loss: 0.9858139157295227,grad_norm: 0.8253401651103445, iteration: 311227
loss: 1.0031943321228027,grad_norm: 0.9999995219777463, iteration: 311228
loss: 0.9898056387901306,grad_norm: 0.9999990412862673, iteration: 311229
loss: 0.9747462272644043,grad_norm: 0.8520316184400806, iteration: 311230
loss: 0.9612946510314941,grad_norm: 0.7651270055206701, iteration: 311231
loss: 0.9946932196617126,grad_norm: 0.825960065874893, iteration: 311232
loss: 1.0225073099136353,grad_norm: 0.9999994799987987, iteration: 311233
loss: 1.0212618112564087,grad_norm: 0.9634965561842654, iteration: 311234
loss: 0.9413481950759888,grad_norm: 0.8158851484070798, iteration: 311235
loss: 1.0210317373275757,grad_norm: 0.8642481226592732, iteration: 311236
loss: 0.9679940342903137,grad_norm: 0.830474300884127, iteration: 311237
loss: 1.0391604900360107,grad_norm: 0.7396365967866056, iteration: 311238
loss: 1.0780125856399536,grad_norm: 0.9886391389828609, iteration: 311239
loss: 1.1370095014572144,grad_norm: 0.9999997021705249, iteration: 311240
loss: 1.0121991634368896,grad_norm: 0.7051678430068561, iteration: 311241
loss: 1.1207739114761353,grad_norm: 0.9999994832387843, iteration: 311242
loss: 1.0612167119979858,grad_norm: 0.9999996139924165, iteration: 311243
loss: 1.0025455951690674,grad_norm: 0.9349422899980904, iteration: 311244
loss: 0.9996047616004944,grad_norm: 0.9547662359477662, iteration: 311245
loss: 1.0443693399429321,grad_norm: 0.9988860846305759, iteration: 311246
loss: 1.0379817485809326,grad_norm: 0.8522392707348966, iteration: 311247
loss: 1.0063292980194092,grad_norm: 0.7608740692958799, iteration: 311248
loss: 0.9958574175834656,grad_norm: 0.9706980138503226, iteration: 311249
loss: 1.0035115480422974,grad_norm: 0.9290374034661697, iteration: 311250
loss: 1.011242389678955,grad_norm: 0.8174920456731382, iteration: 311251
loss: 1.0441203117370605,grad_norm: 0.9999992372754152, iteration: 311252
loss: 1.0034229755401611,grad_norm: 0.9686589770275918, iteration: 311253
loss: 1.0709537267684937,grad_norm: 0.9999992394068803, iteration: 311254
loss: 1.0124571323394775,grad_norm: 0.7831107919918614, iteration: 311255
loss: 1.0178112983703613,grad_norm: 0.8822237781271243, iteration: 311256
loss: 0.9835112690925598,grad_norm: 0.7727672217039769, iteration: 311257
loss: 1.0252121686935425,grad_norm: 0.7407413574836165, iteration: 311258
loss: 1.0039012432098389,grad_norm: 0.7622264355443825, iteration: 311259
loss: 0.9993917346000671,grad_norm: 0.7540714544203392, iteration: 311260
loss: 1.0133026838302612,grad_norm: 0.794372852537506, iteration: 311261
loss: 1.0001875162124634,grad_norm: 0.8675485458503914, iteration: 311262
loss: 1.034219741821289,grad_norm: 0.814254283346418, iteration: 311263
loss: 0.9833287596702576,grad_norm: 0.8498343402582533, iteration: 311264
loss: 1.0094709396362305,grad_norm: 0.9999993314253344, iteration: 311265
loss: 0.9901717901229858,grad_norm: 0.9169998698145544, iteration: 311266
loss: 0.9671604633331299,grad_norm: 0.8556254070278396, iteration: 311267
loss: 1.0806525945663452,grad_norm: 0.9999994468832099, iteration: 311268
loss: 0.9746426939964294,grad_norm: 0.8151708985159541, iteration: 311269
loss: 1.0264803171157837,grad_norm: 0.9999993714002562, iteration: 311270
loss: 1.0135921239852905,grad_norm: 0.9999990682084894, iteration: 311271
loss: 0.9939097762107849,grad_norm: 0.7101620207451891, iteration: 311272
loss: 1.0110876560211182,grad_norm: 0.9601019976366042, iteration: 311273
loss: 1.042420744895935,grad_norm: 0.9184154517759292, iteration: 311274
loss: 1.0006715059280396,grad_norm: 0.9999997349233807, iteration: 311275
loss: 0.9892480373382568,grad_norm: 0.8867146825894131, iteration: 311276
loss: 0.9585041999816895,grad_norm: 0.9999992103809475, iteration: 311277
loss: 1.0217441320419312,grad_norm: 0.7389679676858677, iteration: 311278
loss: 1.007058024406433,grad_norm: 0.8668839605081008, iteration: 311279
loss: 0.9802884459495544,grad_norm: 0.7625185908052163, iteration: 311280
loss: 1.0243960618972778,grad_norm: 0.8588181185715594, iteration: 311281
loss: 1.0188868045806885,grad_norm: 0.9999992694142479, iteration: 311282
loss: 1.0955506563186646,grad_norm: 0.999999713769029, iteration: 311283
loss: 1.0109421014785767,grad_norm: 0.8751153038403943, iteration: 311284
loss: 0.9962345361709595,grad_norm: 0.7491254100097604, iteration: 311285
loss: 1.0576106309890747,grad_norm: 0.9206377456757133, iteration: 311286
loss: 1.0751903057098389,grad_norm: 0.9999995132906037, iteration: 311287
loss: 1.0254533290863037,grad_norm: 0.9999994168783272, iteration: 311288
loss: 1.0212504863739014,grad_norm: 0.9689066027349365, iteration: 311289
loss: 1.0633375644683838,grad_norm: 0.9999997465832823, iteration: 311290
loss: 0.98427814245224,grad_norm: 0.7482974419892653, iteration: 311291
loss: 1.0153708457946777,grad_norm: 0.822963283975024, iteration: 311292
loss: 1.0169847011566162,grad_norm: 0.8008270384626653, iteration: 311293
loss: 1.0271356105804443,grad_norm: 0.9999998700247863, iteration: 311294
loss: 0.9853888154029846,grad_norm: 0.9161058363838863, iteration: 311295
loss: 0.9714464545249939,grad_norm: 0.9999990206259487, iteration: 311296
loss: 0.9783892035484314,grad_norm: 0.9489473341761044, iteration: 311297
loss: 0.9823945164680481,grad_norm: 0.7590822056941087, iteration: 311298
loss: 1.0448848009109497,grad_norm: 0.9246489740993575, iteration: 311299
loss: 1.069405198097229,grad_norm: 1.0000000031206653, iteration: 311300
loss: 1.0624561309814453,grad_norm: 0.9999990914161628, iteration: 311301
loss: 1.0253623723983765,grad_norm: 0.9567603221753601, iteration: 311302
loss: 0.9796786904335022,grad_norm: 0.9476148820666253, iteration: 311303
loss: 1.1044118404388428,grad_norm: 0.9999994825551484, iteration: 311304
loss: 1.2343069314956665,grad_norm: 0.9999995008952266, iteration: 311305
loss: 1.1184755563735962,grad_norm: 0.9999998715247048, iteration: 311306
loss: 1.0951663255691528,grad_norm: 0.8412197327843978, iteration: 311307
loss: 1.002478003501892,grad_norm: 0.8688939870230841, iteration: 311308
loss: 1.0132192373275757,grad_norm: 0.9999996847344265, iteration: 311309
loss: 1.1076655387878418,grad_norm: 0.9999993336246803, iteration: 311310
loss: 1.0893975496292114,grad_norm: 0.9999992866865116, iteration: 311311
loss: 1.1186482906341553,grad_norm: 0.9999991109398967, iteration: 311312
loss: 1.010521650314331,grad_norm: 0.7451300714672862, iteration: 311313
loss: 1.0309202671051025,grad_norm: 0.9999993697466519, iteration: 311314
loss: 1.050640344619751,grad_norm: 0.9999998362123644, iteration: 311315
loss: 1.1081578731536865,grad_norm: 0.9580431139453477, iteration: 311316
loss: 1.0516793727874756,grad_norm: 0.972800333175364, iteration: 311317
loss: 1.0248973369598389,grad_norm: 0.9217056334983653, iteration: 311318
loss: 1.0681039094924927,grad_norm: 0.9999999628548191, iteration: 311319
loss: 1.1744835376739502,grad_norm: 0.9999996382755055, iteration: 311320
loss: 1.1113450527191162,grad_norm: 0.9999996968550053, iteration: 311321
loss: 1.1003299951553345,grad_norm: 0.9999999563585469, iteration: 311322
loss: 1.2340402603149414,grad_norm: 0.9999999513416861, iteration: 311323
loss: 1.0714489221572876,grad_norm: 0.9597933276647507, iteration: 311324
loss: 1.0100668668746948,grad_norm: 0.8866775885032023, iteration: 311325
loss: 1.188579797744751,grad_norm: 0.9999998412648744, iteration: 311326
loss: 1.0096032619476318,grad_norm: 0.894198296754379, iteration: 311327
loss: 1.166908860206604,grad_norm: 0.9999993635615271, iteration: 311328
loss: 0.9912413358688354,grad_norm: 0.880482845223015, iteration: 311329
loss: 1.0824640989303589,grad_norm: 0.99999922787347, iteration: 311330
loss: 1.111714482307434,grad_norm: 0.9999998758397474, iteration: 311331
loss: 1.001015067100525,grad_norm: 0.8838860233048816, iteration: 311332
loss: 1.0029985904693604,grad_norm: 0.9965374357362329, iteration: 311333
loss: 1.0356453657150269,grad_norm: 0.9504069347651856, iteration: 311334
loss: 1.0406862497329712,grad_norm: 1.0000000257142931, iteration: 311335
loss: 0.9951101541519165,grad_norm: 0.8984820633565721, iteration: 311336
loss: 1.0549458265304565,grad_norm: 0.8006751185883849, iteration: 311337
loss: 0.997651219367981,grad_norm: 0.7880796532295062, iteration: 311338
loss: 0.9704430103302002,grad_norm: 0.9045008380316559, iteration: 311339
loss: 1.10111665725708,grad_norm: 0.999999866398229, iteration: 311340
loss: 1.1274645328521729,grad_norm: 0.9999997977910016, iteration: 311341
loss: 1.0471159219741821,grad_norm: 0.7209663583612834, iteration: 311342
loss: 1.0451035499572754,grad_norm: 0.7911384129047241, iteration: 311343
loss: 1.0391173362731934,grad_norm: 0.9264359831594556, iteration: 311344
loss: 1.0097322463989258,grad_norm: 0.9999997924632595, iteration: 311345
loss: 1.0357283353805542,grad_norm: 0.8784272377150105, iteration: 311346
loss: 0.9878960251808167,grad_norm: 0.9262425690987712, iteration: 311347
loss: 1.005444049835205,grad_norm: 0.9632612686750938, iteration: 311348
loss: 1.0178430080413818,grad_norm: 0.8053286049382654, iteration: 311349
loss: 1.001829981803894,grad_norm: 0.8027653952728975, iteration: 311350
loss: 0.9874061346054077,grad_norm: 0.9765429425613307, iteration: 311351
loss: 1.0242270231246948,grad_norm: 0.9999991406329547, iteration: 311352
loss: 1.0085828304290771,grad_norm: 0.8183934822566851, iteration: 311353
loss: 0.9951534867286682,grad_norm: 0.8326743909922033, iteration: 311354
loss: 0.9905598759651184,grad_norm: 0.7745433498059906, iteration: 311355
loss: 1.0143513679504395,grad_norm: 0.897715536238925, iteration: 311356
loss: 1.037656307220459,grad_norm: 0.8262458275355591, iteration: 311357
loss: 1.051281213760376,grad_norm: 0.7548640905601556, iteration: 311358
loss: 0.989281415939331,grad_norm: 0.871446351965359, iteration: 311359
loss: 0.9655293822288513,grad_norm: 0.8074842634527438, iteration: 311360
loss: 0.9699569344520569,grad_norm: 0.9973014040376311, iteration: 311361
loss: 1.014691710472107,grad_norm: 0.8232540015346218, iteration: 311362
loss: 1.0247830152511597,grad_norm: 0.9999999282153452, iteration: 311363
loss: 0.9931640625,grad_norm: 0.9999990930260421, iteration: 311364
loss: 1.0167008638381958,grad_norm: 0.9425907018241393, iteration: 311365
loss: 0.9767147302627563,grad_norm: 0.8951017456657231, iteration: 311366
loss: 1.0377553701400757,grad_norm: 0.9999997365136263, iteration: 311367
loss: 1.024606466293335,grad_norm: 0.7509646185940961, iteration: 311368
loss: 0.9707514047622681,grad_norm: 0.8032526966411836, iteration: 311369
loss: 1.0044128894805908,grad_norm: 0.8016759711462424, iteration: 311370
loss: 0.9746251106262207,grad_norm: 0.690908428356201, iteration: 311371
loss: 0.9961704015731812,grad_norm: 0.7103736598501637, iteration: 311372
loss: 0.9874995946884155,grad_norm: 0.8908776268919165, iteration: 311373
loss: 1.0629754066467285,grad_norm: 0.9999998361300643, iteration: 311374
loss: 1.0133099555969238,grad_norm: 0.926221492186626, iteration: 311375
loss: 1.0047260522842407,grad_norm: 0.9999991889566204, iteration: 311376
loss: 1.0073425769805908,grad_norm: 0.9039844785634595, iteration: 311377
loss: 1.0105535984039307,grad_norm: 0.9073426336786431, iteration: 311378
loss: 1.0073466300964355,grad_norm: 0.9999998631118948, iteration: 311379
loss: 0.9673104882240295,grad_norm: 0.9999995102192712, iteration: 311380
loss: 0.9990760087966919,grad_norm: 0.7097582839331659, iteration: 311381
loss: 1.0703356266021729,grad_norm: 0.8628155328860474, iteration: 311382
loss: 1.0424387454986572,grad_norm: 0.8542685724741189, iteration: 311383
loss: 1.0491760969161987,grad_norm: 0.9999990665229018, iteration: 311384
loss: 0.9754212498664856,grad_norm: 0.758316903928562, iteration: 311385
loss: 1.0457326173782349,grad_norm: 0.7803400156012263, iteration: 311386
loss: 1.1169873476028442,grad_norm: 0.9999991863099573, iteration: 311387
loss: 0.9701532125473022,grad_norm: 0.7845380958825023, iteration: 311388
loss: 1.0015738010406494,grad_norm: 0.736891499478367, iteration: 311389
loss: 1.0188980102539062,grad_norm: 0.863825037799106, iteration: 311390
loss: 1.0065206289291382,grad_norm: 0.8732811916547448, iteration: 311391
loss: 0.9966312646865845,grad_norm: 0.9999997651906974, iteration: 311392
loss: 0.9780676960945129,grad_norm: 0.9930198173320545, iteration: 311393
loss: 1.0185893774032593,grad_norm: 0.9875227549143395, iteration: 311394
loss: 1.0195953845977783,grad_norm: 0.6954640759059201, iteration: 311395
loss: 0.962120532989502,grad_norm: 0.8492951352871261, iteration: 311396
loss: 1.0235341787338257,grad_norm: 0.9143248032832915, iteration: 311397
loss: 1.0892106294631958,grad_norm: 0.9999990530004281, iteration: 311398
loss: 0.9835899472236633,grad_norm: 0.8005801736342271, iteration: 311399
loss: 1.0315133333206177,grad_norm: 0.8839017446896322, iteration: 311400
loss: 1.0282797813415527,grad_norm: 0.9999996240459242, iteration: 311401
loss: 1.0103092193603516,grad_norm: 0.9999990347228724, iteration: 311402
loss: 1.019759178161621,grad_norm: 0.8848929972522032, iteration: 311403
loss: 0.9775000810623169,grad_norm: 0.9378309727276241, iteration: 311404
loss: 0.9613697528839111,grad_norm: 0.8896393358617982, iteration: 311405
loss: 1.0046958923339844,grad_norm: 0.9999991437563431, iteration: 311406
loss: 1.0054012537002563,grad_norm: 0.9999994419589406, iteration: 311407
loss: 0.9816950559616089,grad_norm: 0.992183392046671, iteration: 311408
loss: 1.0088293552398682,grad_norm: 0.9999997658106261, iteration: 311409
loss: 0.993614673614502,grad_norm: 0.8776255587958811, iteration: 311410
loss: 1.0042877197265625,grad_norm: 0.7097053222806702, iteration: 311411
loss: 0.9672245979309082,grad_norm: 0.9726511219062487, iteration: 311412
loss: 1.041391134262085,grad_norm: 0.7994455910475231, iteration: 311413
loss: 0.9917553663253784,grad_norm: 0.7738928653949384, iteration: 311414
loss: 1.0094704627990723,grad_norm: 0.9003622906293935, iteration: 311415
loss: 1.0555148124694824,grad_norm: 0.8730818117816606, iteration: 311416
loss: 1.0148983001708984,grad_norm: 0.9999998831263399, iteration: 311417
loss: 1.020491361618042,grad_norm: 0.8692119429298422, iteration: 311418
loss: 0.9796797037124634,grad_norm: 0.8785707742383848, iteration: 311419
loss: 0.9763349294662476,grad_norm: 0.8353849843454308, iteration: 311420
loss: 0.948211133480072,grad_norm: 0.8425891207199944, iteration: 311421
loss: 0.9755083322525024,grad_norm: 0.9315957116814423, iteration: 311422
loss: 1.0023480653762817,grad_norm: 0.7445583706525812, iteration: 311423
loss: 1.0148968696594238,grad_norm: 0.8912767124965054, iteration: 311424
loss: 0.9947859644889832,grad_norm: 0.9681275235005264, iteration: 311425
loss: 1.0467091798782349,grad_norm: 0.8651048640601485, iteration: 311426
loss: 1.050592303276062,grad_norm: 0.9999998910692527, iteration: 311427
loss: 0.9899888038635254,grad_norm: 0.9027294988172216, iteration: 311428
loss: 1.10983407497406,grad_norm: 0.9999998317799069, iteration: 311429
loss: 1.023696780204773,grad_norm: 0.8296891108556922, iteration: 311430
loss: 1.0022335052490234,grad_norm: 0.9999995416494428, iteration: 311431
loss: 0.9647583961486816,grad_norm: 0.813245359881703, iteration: 311432
loss: 0.997853696346283,grad_norm: 0.8311045292923471, iteration: 311433
loss: 0.9975416660308838,grad_norm: 0.797647782683772, iteration: 311434
loss: 1.0153380632400513,grad_norm: 0.999999275633197, iteration: 311435
loss: 0.986792802810669,grad_norm: 0.8826766240437107, iteration: 311436
loss: 1.026637077331543,grad_norm: 0.9999997173793626, iteration: 311437
loss: 1.0217490196228027,grad_norm: 0.8929090085442783, iteration: 311438
loss: 1.0039126873016357,grad_norm: 0.8400298270028184, iteration: 311439
loss: 0.986786961555481,grad_norm: 0.8862641682828103, iteration: 311440
loss: 0.9775992631912231,grad_norm: 0.8294559765457826, iteration: 311441
loss: 1.0880711078643799,grad_norm: 0.999999417806498, iteration: 311442
loss: 1.0126125812530518,grad_norm: 0.7217204153359003, iteration: 311443
loss: 0.9903545379638672,grad_norm: 0.763318350531504, iteration: 311444
loss: 0.9893487095832825,grad_norm: 0.8585364409823738, iteration: 311445
loss: 0.95546954870224,grad_norm: 0.8237864903247383, iteration: 311446
loss: 0.9497637152671814,grad_norm: 0.9566280970777702, iteration: 311447
loss: 1.0011951923370361,grad_norm: 0.9999991113616103, iteration: 311448
loss: 1.008833408355713,grad_norm: 0.9999994726892341, iteration: 311449
loss: 1.0090163946151733,grad_norm: 0.8279830861912463, iteration: 311450
loss: 0.9937562346458435,grad_norm: 0.9040121319996361, iteration: 311451
loss: 0.98274165391922,grad_norm: 0.9999992228411363, iteration: 311452
loss: 1.0218288898468018,grad_norm: 0.9999992250307592, iteration: 311453
loss: 0.9892449975013733,grad_norm: 0.9037580952673345, iteration: 311454
loss: 1.0022070407867432,grad_norm: 0.9999998447834568, iteration: 311455
loss: 0.9743094444274902,grad_norm: 0.8979291143624687, iteration: 311456
loss: 1.0030320882797241,grad_norm: 0.9291840473831648, iteration: 311457
loss: 0.9992169737815857,grad_norm: 0.9605458851045098, iteration: 311458
loss: 1.0008333921432495,grad_norm: 0.9357694850517613, iteration: 311459
loss: 1.0050601959228516,grad_norm: 0.9999991688975204, iteration: 311460
loss: 1.01658296585083,grad_norm: 0.837106975533324, iteration: 311461
loss: 0.9745827317237854,grad_norm: 0.9815066493041287, iteration: 311462
loss: 1.0434653759002686,grad_norm: 0.9999999620943879, iteration: 311463
loss: 1.0117840766906738,grad_norm: 0.8981700747380362, iteration: 311464
loss: 0.9780733585357666,grad_norm: 0.825195737271616, iteration: 311465
loss: 1.0149420499801636,grad_norm: 0.9999991314015039, iteration: 311466
loss: 0.9579660296440125,grad_norm: 0.9999999772138796, iteration: 311467
loss: 1.0154027938842773,grad_norm: 0.9341616320620264, iteration: 311468
loss: 0.9775639176368713,grad_norm: 0.999999496023055, iteration: 311469
loss: 0.9800986647605896,grad_norm: 0.9337822703627601, iteration: 311470
loss: 0.978759765625,grad_norm: 0.7139025026490095, iteration: 311471
loss: 1.0317314863204956,grad_norm: 0.9124864198279454, iteration: 311472
loss: 1.0010472536087036,grad_norm: 0.7735379626269402, iteration: 311473
loss: 1.0153348445892334,grad_norm: 0.9380832052780058, iteration: 311474
loss: 1.0087954998016357,grad_norm: 0.7849092869526726, iteration: 311475
loss: 1.0261728763580322,grad_norm: 0.9056078654572276, iteration: 311476
loss: 1.0010926723480225,grad_norm: 0.8405868166360924, iteration: 311477
loss: 1.0007131099700928,grad_norm: 0.69690559559012, iteration: 311478
loss: 1.2925257682800293,grad_norm: 0.9999994410369573, iteration: 311479
loss: 1.0227751731872559,grad_norm: 0.9999991835631252, iteration: 311480
loss: 0.9756605625152588,grad_norm: 0.8651754280053896, iteration: 311481
loss: 0.9985427856445312,grad_norm: 0.7972083230655627, iteration: 311482
loss: 1.0900734663009644,grad_norm: 0.9249014111839158, iteration: 311483
loss: 1.0060328245162964,grad_norm: 0.7523710574309814, iteration: 311484
loss: 0.9911911487579346,grad_norm: 0.8037575244658658, iteration: 311485
loss: 1.0591737031936646,grad_norm: 0.8069030924310163, iteration: 311486
loss: 1.0239629745483398,grad_norm: 0.8916477769833642, iteration: 311487
loss: 0.9926843047142029,grad_norm: 0.8335984332847969, iteration: 311488
loss: 1.0259736776351929,grad_norm: 0.9999993039432181, iteration: 311489
loss: 1.003566861152649,grad_norm: 0.7667985355760254, iteration: 311490
loss: 1.0173931121826172,grad_norm: 0.8674816720868728, iteration: 311491
loss: 0.9750000834465027,grad_norm: 0.7262397910350911, iteration: 311492
loss: 0.9934307932853699,grad_norm: 0.9999990583444742, iteration: 311493
loss: 0.9933050870895386,grad_norm: 0.8181389909581134, iteration: 311494
loss: 0.9924801588058472,grad_norm: 0.9306503108112231, iteration: 311495
loss: 1.085384726524353,grad_norm: 0.99999990894028, iteration: 311496
loss: 0.9728630185127258,grad_norm: 0.9999990988094636, iteration: 311497
loss: 0.9912437796592712,grad_norm: 0.8089556711220843, iteration: 311498
loss: 1.0405381917953491,grad_norm: 0.812857138113087, iteration: 311499
loss: 0.9614393711090088,grad_norm: 0.999999909216889, iteration: 311500
loss: 1.0942375659942627,grad_norm: 0.999999938395198, iteration: 311501
loss: 1.004736065864563,grad_norm: 0.7972907375242024, iteration: 311502
loss: 1.0043843984603882,grad_norm: 0.8754304687906079, iteration: 311503
loss: 0.9740473628044128,grad_norm: 0.9023427756743245, iteration: 311504
loss: 1.1168768405914307,grad_norm: 0.9999991424044107, iteration: 311505
loss: 1.0063984394073486,grad_norm: 0.9612606925182694, iteration: 311506
loss: 1.0412625074386597,grad_norm: 0.9999998762677186, iteration: 311507
loss: 1.0120124816894531,grad_norm: 0.999999159978415, iteration: 311508
loss: 1.0234185457229614,grad_norm: 0.8550923664915963, iteration: 311509
loss: 1.0381722450256348,grad_norm: 0.9999992191830199, iteration: 311510
loss: 1.029853343963623,grad_norm: 0.9999997052269686, iteration: 311511
loss: 1.0566784143447876,grad_norm: 0.9999998520634191, iteration: 311512
loss: 1.0109524726867676,grad_norm: 0.6958763526043236, iteration: 311513
loss: 1.0169565677642822,grad_norm: 0.9336138404046616, iteration: 311514
loss: 1.0105947256088257,grad_norm: 0.9999990743675561, iteration: 311515
loss: 0.9895332455635071,grad_norm: 0.8492287199401014, iteration: 311516
loss: 0.9903017282485962,grad_norm: 0.8713098680046962, iteration: 311517
loss: 0.9846484661102295,grad_norm: 0.7640305868422603, iteration: 311518
loss: 0.9858711361885071,grad_norm: 0.758099195009158, iteration: 311519
loss: 1.006170392036438,grad_norm: 0.8667091083184899, iteration: 311520
loss: 0.9693548679351807,grad_norm: 0.9376545333381944, iteration: 311521
loss: 1.00692880153656,grad_norm: 0.7657332567447027, iteration: 311522
loss: 0.9776051640510559,grad_norm: 0.9800960672414318, iteration: 311523
loss: 1.0197674036026,grad_norm: 0.7657758949144258, iteration: 311524
loss: 1.0205806493759155,grad_norm: 0.924365778086588, iteration: 311525
loss: 1.0183839797973633,grad_norm: 0.6789463265881022, iteration: 311526
loss: 0.9568747282028198,grad_norm: 0.8526032402405924, iteration: 311527
loss: 0.9986657500267029,grad_norm: 0.8120667498950044, iteration: 311528
loss: 0.9859679341316223,grad_norm: 0.7996686875329658, iteration: 311529
loss: 1.1287187337875366,grad_norm: 0.9999999543272972, iteration: 311530
loss: 1.0203704833984375,grad_norm: 0.8807927569285469, iteration: 311531
loss: 1.0568112134933472,grad_norm: 0.9999997136315343, iteration: 311532
loss: 1.0173989534378052,grad_norm: 0.7777971546707043, iteration: 311533
loss: 1.027199149131775,grad_norm: 0.9999990443353762, iteration: 311534
loss: 1.083584189414978,grad_norm: 0.840558999370364, iteration: 311535
loss: 1.0451205968856812,grad_norm: 0.9067820893151769, iteration: 311536
loss: 1.0487767457962036,grad_norm: 0.9999991297165514, iteration: 311537
loss: 1.0264885425567627,grad_norm: 0.8628150157912714, iteration: 311538
loss: 0.966877281665802,grad_norm: 0.9320413279637639, iteration: 311539
loss: 1.0024826526641846,grad_norm: 0.7574197781608341, iteration: 311540
loss: 1.0068494081497192,grad_norm: 0.8594586752279181, iteration: 311541
loss: 0.9932660460472107,grad_norm: 0.7092316445946986, iteration: 311542
loss: 0.98086017370224,grad_norm: 0.9465696314233079, iteration: 311543
loss: 0.9694464206695557,grad_norm: 0.8360929282102657, iteration: 311544
loss: 1.0000501871109009,grad_norm: 0.943262433513171, iteration: 311545
loss: 0.9728853106498718,grad_norm: 0.8325347688763889, iteration: 311546
loss: 1.0374927520751953,grad_norm: 0.7907113900825792, iteration: 311547
loss: 0.9892503023147583,grad_norm: 0.7881140712595222, iteration: 311548
loss: 1.0027351379394531,grad_norm: 0.8397240399908276, iteration: 311549
loss: 1.0082346200942993,grad_norm: 0.8341821753780214, iteration: 311550
loss: 1.0384671688079834,grad_norm: 0.7132873005522639, iteration: 311551
loss: 1.004284143447876,grad_norm: 0.9017005487324775, iteration: 311552
loss: 1.0271841287612915,grad_norm: 0.939035066589315, iteration: 311553
loss: 0.9824183583259583,grad_norm: 0.8056191886896887, iteration: 311554
loss: 0.9679615497589111,grad_norm: 0.9128768191203244, iteration: 311555
loss: 0.9780061841011047,grad_norm: 0.8649665486879975, iteration: 311556
loss: 0.962527334690094,grad_norm: 0.9524182477484764, iteration: 311557
loss: 0.9981672763824463,grad_norm: 0.8132929290758721, iteration: 311558
loss: 0.9927231669425964,grad_norm: 0.787158146969059, iteration: 311559
loss: 0.9414474368095398,grad_norm: 0.7580711742095391, iteration: 311560
loss: 0.9524462223052979,grad_norm: 0.8503833329614334, iteration: 311561
loss: 0.9884940385818481,grad_norm: 0.9999990678869857, iteration: 311562
loss: 1.207162857055664,grad_norm: 0.9999991427088015, iteration: 311563
loss: 1.0164819955825806,grad_norm: 0.9999994178709497, iteration: 311564
loss: 1.0524699687957764,grad_norm: 0.8148472667749876, iteration: 311565
loss: 1.0103645324707031,grad_norm: 0.7925605599819651, iteration: 311566
loss: 0.9717682600021362,grad_norm: 0.9032789832297979, iteration: 311567
loss: 0.9881026148796082,grad_norm: 0.70298912755119, iteration: 311568
loss: 1.0330055952072144,grad_norm: 0.7600748267199852, iteration: 311569
loss: 0.981197714805603,grad_norm: 0.6924548764603035, iteration: 311570
loss: 0.9674972891807556,grad_norm: 0.7789398271215354, iteration: 311571
loss: 0.9650410413742065,grad_norm: 0.7456594246375583, iteration: 311572
loss: 1.0156333446502686,grad_norm: 0.7906183961826591, iteration: 311573
loss: 0.989876389503479,grad_norm: 0.7561264886580784, iteration: 311574
loss: 0.9981856346130371,grad_norm: 0.999999096262906, iteration: 311575
loss: 0.9924225807189941,grad_norm: 0.9999996823756001, iteration: 311576
loss: 0.9839133620262146,grad_norm: 0.9081492133088314, iteration: 311577
loss: 1.0027729272842407,grad_norm: 0.8609513461653153, iteration: 311578
loss: 1.018160343170166,grad_norm: 0.9602487225370795, iteration: 311579
loss: 0.9600428342819214,grad_norm: 0.8242089531236706, iteration: 311580
loss: 1.0088958740234375,grad_norm: 0.9999991403181777, iteration: 311581
loss: 1.0162111520767212,grad_norm: 0.7717948838410601, iteration: 311582
loss: 0.98919677734375,grad_norm: 0.8808406968429359, iteration: 311583
loss: 1.030454397201538,grad_norm: 0.8526143765636758, iteration: 311584
loss: 1.0521621704101562,grad_norm: 0.8895428270078256, iteration: 311585
loss: 1.0281918048858643,grad_norm: 0.6791078187281505, iteration: 311586
loss: 0.9898195862770081,grad_norm: 0.7469855074892382, iteration: 311587
loss: 1.0275349617004395,grad_norm: 0.9999992114139824, iteration: 311588
loss: 0.9990496039390564,grad_norm: 0.9999994818939684, iteration: 311589
loss: 1.0199936628341675,grad_norm: 0.9387966059878432, iteration: 311590
loss: 1.091265082359314,grad_norm: 0.999999853561733, iteration: 311591
loss: 0.9572610855102539,grad_norm: 0.9452697602736165, iteration: 311592
loss: 1.0203704833984375,grad_norm: 0.8479553711239243, iteration: 311593
loss: 1.19943106174469,grad_norm: 0.9999995945530086, iteration: 311594
loss: 0.9624937176704407,grad_norm: 0.9198543591992887, iteration: 311595
loss: 1.0390714406967163,grad_norm: 0.9999995662053357, iteration: 311596
loss: 1.004334568977356,grad_norm: 0.94022611278121, iteration: 311597
loss: 0.9907423853874207,grad_norm: 0.7449968776109822, iteration: 311598
loss: 1.007091999053955,grad_norm: 0.7822107478896227, iteration: 311599
loss: 1.027234673500061,grad_norm: 0.9998011306208171, iteration: 311600
loss: 0.9681450128555298,grad_norm: 0.746918801711346, iteration: 311601
loss: 1.0360431671142578,grad_norm: 0.9999991775145033, iteration: 311602
loss: 1.0337926149368286,grad_norm: 0.999999412074879, iteration: 311603
loss: 0.9986119270324707,grad_norm: 0.8841785614315801, iteration: 311604
loss: 1.0091580152511597,grad_norm: 0.8812465947513086, iteration: 311605
loss: 1.042762279510498,grad_norm: 0.9999998544957193, iteration: 311606
loss: 0.9817703366279602,grad_norm: 0.9999992115984068, iteration: 311607
loss: 1.004837155342102,grad_norm: 0.8120160589992551, iteration: 311608
loss: 0.9875897765159607,grad_norm: 0.9176978053842797, iteration: 311609
loss: 1.0229345560073853,grad_norm: 0.860850212095587, iteration: 311610
loss: 1.014708161354065,grad_norm: 0.9999995345575635, iteration: 311611
loss: 1.0212174654006958,grad_norm: 0.9118836892132759, iteration: 311612
loss: 1.000099778175354,grad_norm: 0.9999996835250701, iteration: 311613
loss: 0.9943992495536804,grad_norm: 0.9299892944781482, iteration: 311614
loss: 0.943621039390564,grad_norm: 0.877923068777453, iteration: 311615
loss: 1.0115755796432495,grad_norm: 0.6829823194799453, iteration: 311616
loss: 1.0661280155181885,grad_norm: 0.9999995371719803, iteration: 311617
loss: 0.9881317019462585,grad_norm: 0.7610191321717884, iteration: 311618
loss: 0.9984923601150513,grad_norm: 0.9999997370120112, iteration: 311619
loss: 0.9867753982543945,grad_norm: 0.7891040115616811, iteration: 311620
loss: 1.1211508512496948,grad_norm: 0.9999994051232889, iteration: 311621
loss: 0.9984670877456665,grad_norm: 0.8766704342770103, iteration: 311622
loss: 1.0322667360305786,grad_norm: 0.8587983753527173, iteration: 311623
loss: 0.9621784687042236,grad_norm: 0.9090845838749914, iteration: 311624
loss: 1.0092507600784302,grad_norm: 0.9846947802340142, iteration: 311625
loss: 1.032659888267517,grad_norm: 0.9999998940429871, iteration: 311626
loss: 1.1244927644729614,grad_norm: 0.9999994911321347, iteration: 311627
loss: 1.0891516208648682,grad_norm: 0.9999999336213115, iteration: 311628
loss: 1.1811906099319458,grad_norm: 0.9999998574426692, iteration: 311629
loss: 1.0916012525558472,grad_norm: 0.9999992618706811, iteration: 311630
loss: 1.0776582956314087,grad_norm: 0.9313648633931867, iteration: 311631
loss: 1.0374079942703247,grad_norm: 0.9999998914572511, iteration: 311632
loss: 1.0490630865097046,grad_norm: 0.9009972413069686, iteration: 311633
loss: 1.008294939994812,grad_norm: 0.9999992436727899, iteration: 311634
loss: 1.092803955078125,grad_norm: 0.9999999078396669, iteration: 311635
loss: 1.20768141746521,grad_norm: 1.0000000004374427, iteration: 311636
loss: 1.1020853519439697,grad_norm: 0.999999976521364, iteration: 311637
loss: 1.0220947265625,grad_norm: 0.7514249474617081, iteration: 311638
loss: 1.0829358100891113,grad_norm: 0.7807818868386717, iteration: 311639
loss: 0.9838249683380127,grad_norm: 0.8996386625600091, iteration: 311640
loss: 1.079892635345459,grad_norm: 0.959897820029436, iteration: 311641
loss: 1.131240725517273,grad_norm: 0.9999998398019623, iteration: 311642
loss: 1.1121541261672974,grad_norm: 0.999999073364338, iteration: 311643
loss: 1.2282356023788452,grad_norm: 0.9999995971346298, iteration: 311644
loss: 1.0501151084899902,grad_norm: 0.9999991855268721, iteration: 311645
loss: 0.9761763215065002,grad_norm: 0.7765256371825896, iteration: 311646
loss: 0.9970600605010986,grad_norm: 0.7562936660613754, iteration: 311647
loss: 0.9917019009590149,grad_norm: 0.8682715528561983, iteration: 311648
loss: 1.011661171913147,grad_norm: 0.986695063056758, iteration: 311649
loss: 1.018014907836914,grad_norm: 0.904214005615434, iteration: 311650
loss: 0.9527937173843384,grad_norm: 0.853352383064759, iteration: 311651
loss: 1.0057644844055176,grad_norm: 0.7736773992493411, iteration: 311652
loss: 1.0230542421340942,grad_norm: 0.8136525580907328, iteration: 311653
loss: 1.0106576681137085,grad_norm: 0.9797184063609403, iteration: 311654
loss: 0.9824978709220886,grad_norm: 0.8116811421444072, iteration: 311655
loss: 1.0161137580871582,grad_norm: 0.722105221100906, iteration: 311656
loss: 0.9813119769096375,grad_norm: 0.9339233767697716, iteration: 311657
loss: 1.0477591753005981,grad_norm: 0.8153303659330096, iteration: 311658
loss: 1.0085606575012207,grad_norm: 0.803628047754189, iteration: 311659
loss: 1.0371458530426025,grad_norm: 0.9383368859383959, iteration: 311660
loss: 0.9527881741523743,grad_norm: 0.9260158220854406, iteration: 311661
loss: 0.9850632548332214,grad_norm: 0.9219154411326209, iteration: 311662
loss: 0.9774384498596191,grad_norm: 0.8243255640811694, iteration: 311663
loss: 1.0211541652679443,grad_norm: 0.8442999961382883, iteration: 311664
loss: 1.0150595903396606,grad_norm: 0.9114498464277107, iteration: 311665
loss: 0.9752042889595032,grad_norm: 0.9717483968387137, iteration: 311666
loss: 1.0121279954910278,grad_norm: 0.8122750710473382, iteration: 311667
loss: 0.9989525079727173,grad_norm: 0.6625892309883039, iteration: 311668
loss: 1.0286741256713867,grad_norm: 0.7954530289269364, iteration: 311669
loss: 1.0017729997634888,grad_norm: 0.7514013454034121, iteration: 311670
loss: 0.9861006140708923,grad_norm: 0.8489795096646774, iteration: 311671
loss: 0.9825683236122131,grad_norm: 0.9999994814415573, iteration: 311672
loss: 1.0057867765426636,grad_norm: 0.7675984946324927, iteration: 311673
loss: 1.1089372634887695,grad_norm: 0.765333498864325, iteration: 311674
loss: 1.0283252000808716,grad_norm: 0.8718868795115289, iteration: 311675
loss: 1.0144925117492676,grad_norm: 0.871809601613657, iteration: 311676
loss: 0.9837379455566406,grad_norm: 0.82941128716672, iteration: 311677
loss: 1.0178375244140625,grad_norm: 0.7681177870729441, iteration: 311678
loss: 1.0036139488220215,grad_norm: 0.6905051844246031, iteration: 311679
loss: 1.0045990943908691,grad_norm: 0.7589519396873307, iteration: 311680
loss: 1.028412938117981,grad_norm: 0.9999992645595124, iteration: 311681
loss: 1.0398192405700684,grad_norm: 0.8071383478173889, iteration: 311682
loss: 1.0271838903427124,grad_norm: 0.87576704342767, iteration: 311683
loss: 0.9779914021492004,grad_norm: 0.7860616758482369, iteration: 311684
loss: 1.0417925119400024,grad_norm: 0.9999990044336383, iteration: 311685
loss: 0.9977417588233948,grad_norm: 0.8988675718831557, iteration: 311686
loss: 0.9989728927612305,grad_norm: 0.8149550999661453, iteration: 311687
loss: 1.0022469758987427,grad_norm: 0.9402080143610088, iteration: 311688
loss: 0.9976621270179749,grad_norm: 0.6983853411562704, iteration: 311689
loss: 1.0198103189468384,grad_norm: 0.8199960608873161, iteration: 311690
loss: 0.9702509641647339,grad_norm: 0.920632351971276, iteration: 311691
loss: 1.0061115026474,grad_norm: 0.8710853484163846, iteration: 311692
loss: 0.9563955664634705,grad_norm: 0.8072305357629249, iteration: 311693
loss: 1.0208442211151123,grad_norm: 0.9999989650224989, iteration: 311694
loss: 1.1048065423965454,grad_norm: 0.9306309196508704, iteration: 311695
loss: 1.0170248746871948,grad_norm: 0.9340847109793736, iteration: 311696
loss: 0.9574517607688904,grad_norm: 0.8204942819112896, iteration: 311697
loss: 1.0207524299621582,grad_norm: 0.8809459784113944, iteration: 311698
loss: 0.9992445111274719,grad_norm: 0.9523330793338474, iteration: 311699
loss: 1.0196644067764282,grad_norm: 0.8789675601987704, iteration: 311700
loss: 0.9995492100715637,grad_norm: 0.8850922487952527, iteration: 311701
loss: 1.0536941289901733,grad_norm: 0.9999993322007178, iteration: 311702
loss: 1.0086431503295898,grad_norm: 0.8814855455008254, iteration: 311703
loss: 0.9982883334159851,grad_norm: 0.9187408566402926, iteration: 311704
loss: 0.9923216104507446,grad_norm: 0.7573872633324454, iteration: 311705
loss: 1.0628176927566528,grad_norm: 0.9999991665318283, iteration: 311706
loss: 1.093194603919983,grad_norm: 0.9701947341714726, iteration: 311707
loss: 0.9926159977912903,grad_norm: 0.9811678684910112, iteration: 311708
loss: 0.9966928958892822,grad_norm: 0.8180817491700819, iteration: 311709
loss: 1.0235252380371094,grad_norm: 0.9999996183107566, iteration: 311710
loss: 0.9907903671264648,grad_norm: 0.8573586013025849, iteration: 311711
loss: 0.9998764395713806,grad_norm: 0.9099674273894583, iteration: 311712
loss: 0.9743772745132446,grad_norm: 0.8532400407274814, iteration: 311713
loss: 1.0360560417175293,grad_norm: 0.999999490565152, iteration: 311714
loss: 0.9690666794776917,grad_norm: 0.9903401835384439, iteration: 311715
loss: 0.9969558119773865,grad_norm: 0.8493228306115815, iteration: 311716
loss: 1.0351576805114746,grad_norm: 0.8635848971352833, iteration: 311717
loss: 0.9515443444252014,grad_norm: 0.743099765998835, iteration: 311718
loss: 0.9922676682472229,grad_norm: 0.7646925480756213, iteration: 311719
loss: 0.994215726852417,grad_norm: 0.9916555909581499, iteration: 311720
loss: 1.0694431066513062,grad_norm: 0.9999993858512541, iteration: 311721
loss: 1.0285415649414062,grad_norm: 0.9974845114544112, iteration: 311722
loss: 1.0246869325637817,grad_norm: 0.9999992436519122, iteration: 311723
loss: 1.0127002000808716,grad_norm: 0.7313742511309534, iteration: 311724
loss: 0.9947106838226318,grad_norm: 0.8372214447091946, iteration: 311725
loss: 1.0371464490890503,grad_norm: 0.9999996935482266, iteration: 311726
loss: 0.9828537106513977,grad_norm: 0.7730306048111123, iteration: 311727
loss: 0.9904105067253113,grad_norm: 0.9216453773593585, iteration: 311728
loss: 1.025154948234558,grad_norm: 0.9999993407767707, iteration: 311729
loss: 1.0766481161117554,grad_norm: 0.9143230084341234, iteration: 311730
loss: 0.9842850565910339,grad_norm: 0.8709489703392447, iteration: 311731
loss: 1.0269769430160522,grad_norm: 0.7418482840525232, iteration: 311732
loss: 1.093842625617981,grad_norm: 0.9999993632124685, iteration: 311733
loss: 0.9763033986091614,grad_norm: 0.9047935648501824, iteration: 311734
loss: 1.2273738384246826,grad_norm: 0.9999991888484204, iteration: 311735
loss: 0.9751221537590027,grad_norm: 0.8773123680581361, iteration: 311736
loss: 0.9979903101921082,grad_norm: 0.8093240003676712, iteration: 311737
loss: 0.9943114519119263,grad_norm: 0.8349507445530159, iteration: 311738
loss: 1.0149422883987427,grad_norm: 0.7940542760218058, iteration: 311739
loss: 1.0145949125289917,grad_norm: 0.86669328030876, iteration: 311740
loss: 0.9942023158073425,grad_norm: 0.855396662766146, iteration: 311741
loss: 1.0071942806243896,grad_norm: 0.9999991303076274, iteration: 311742
loss: 1.0316810607910156,grad_norm: 0.9999991327815563, iteration: 311743
loss: 1.0320212841033936,grad_norm: 0.7644089244422058, iteration: 311744
loss: 1.0014933347702026,grad_norm: 0.77368067104705, iteration: 311745
loss: 0.9832815527915955,grad_norm: 0.8052732704151123, iteration: 311746
loss: 1.0042661428451538,grad_norm: 0.8145755745572548, iteration: 311747
loss: 1.0279757976531982,grad_norm: 0.8233426162379787, iteration: 311748
loss: 1.0489208698272705,grad_norm: 0.8715018239549857, iteration: 311749
loss: 0.9920945167541504,grad_norm: 0.8343927958428834, iteration: 311750
loss: 1.0038760900497437,grad_norm: 0.9102306259987053, iteration: 311751
loss: 1.0346370935440063,grad_norm: 0.8420253413144688, iteration: 311752
loss: 1.0002050399780273,grad_norm: 0.931212065020178, iteration: 311753
loss: 1.02166748046875,grad_norm: 0.7645695782935222, iteration: 311754
loss: 0.9996753334999084,grad_norm: 0.8068655585204508, iteration: 311755
loss: 1.0181440114974976,grad_norm: 0.9999991893974755, iteration: 311756
loss: 1.007120966911316,grad_norm: 0.9097583669010777, iteration: 311757
loss: 1.023813009262085,grad_norm: 0.9999989649832041, iteration: 311758
loss: 0.9928592443466187,grad_norm: 0.8817070236538972, iteration: 311759
loss: 1.008124589920044,grad_norm: 0.9200163193929081, iteration: 311760
loss: 1.0117814540863037,grad_norm: 0.7239050773396352, iteration: 311761
loss: 1.0033570528030396,grad_norm: 0.9999992365988725, iteration: 311762
loss: 1.0237507820129395,grad_norm: 0.9999990844445467, iteration: 311763
loss: 0.982199490070343,grad_norm: 0.8932223937967411, iteration: 311764
loss: 0.9709432125091553,grad_norm: 0.9999991266156725, iteration: 311765
loss: 0.9782871603965759,grad_norm: 0.9844825157797351, iteration: 311766
loss: 1.0067424774169922,grad_norm: 0.9955421161922956, iteration: 311767
loss: 1.0110325813293457,grad_norm: 0.9910971776529379, iteration: 311768
loss: 0.9995023608207703,grad_norm: 0.8081151176322623, iteration: 311769
loss: 0.9746282696723938,grad_norm: 0.9999995205723017, iteration: 311770
loss: 1.0850050449371338,grad_norm: 0.782262586471529, iteration: 311771
loss: 0.9895281195640564,grad_norm: 0.8565466703825958, iteration: 311772
loss: 1.1371052265167236,grad_norm: 0.9999999957915668, iteration: 311773
loss: 0.973903238773346,grad_norm: 0.8747961301013679, iteration: 311774
loss: 1.1741983890533447,grad_norm: 0.9999994317475761, iteration: 311775
loss: 0.9741247892379761,grad_norm: 0.8738419275745125, iteration: 311776
loss: 1.048305869102478,grad_norm: 0.8355783043580818, iteration: 311777
loss: 0.9708017110824585,grad_norm: 0.8176780388601282, iteration: 311778
loss: 0.9629490375518799,grad_norm: 0.9631784913838646, iteration: 311779
loss: 1.0022703409194946,grad_norm: 0.9290867392744047, iteration: 311780
loss: 1.0749198198318481,grad_norm: 0.9999999446521993, iteration: 311781
loss: 1.0010840892791748,grad_norm: 0.8606785676356604, iteration: 311782
loss: 0.995384156703949,grad_norm: 0.8113926890971627, iteration: 311783
loss: 0.9765353798866272,grad_norm: 0.8202079048302294, iteration: 311784
loss: 1.0654644966125488,grad_norm: 0.9999993867764859, iteration: 311785
loss: 0.9849674701690674,grad_norm: 0.7211052864075752, iteration: 311786
loss: 1.0191035270690918,grad_norm: 0.8682629510224397, iteration: 311787
loss: 1.0091060400009155,grad_norm: 0.8476277302323094, iteration: 311788
loss: 1.018075704574585,grad_norm: 0.8509048590488713, iteration: 311789
loss: 1.0519568920135498,grad_norm: 0.999999156510017, iteration: 311790
loss: 1.0156952142715454,grad_norm: 0.877357842441059, iteration: 311791
loss: 1.0342353582382202,grad_norm: 0.9999996175573515, iteration: 311792
loss: 0.9975373148918152,grad_norm: 0.8367007445952895, iteration: 311793
loss: 1.0534647703170776,grad_norm: 0.999999498249011, iteration: 311794
loss: 1.1147900819778442,grad_norm: 0.9999990547739143, iteration: 311795
loss: 1.1925581693649292,grad_norm: 0.9999994075519688, iteration: 311796
loss: 1.2200140953063965,grad_norm: 0.9999993226160548, iteration: 311797
loss: 1.042871117591858,grad_norm: 0.999999031718298, iteration: 311798
loss: 1.0433918237686157,grad_norm: 0.937930626602347, iteration: 311799
loss: 1.0689802169799805,grad_norm: 0.9616401259863385, iteration: 311800
loss: 1.1414003372192383,grad_norm: 0.9999996823165406, iteration: 311801
loss: 1.1684783697128296,grad_norm: 0.9999997547092423, iteration: 311802
loss: 1.021942138671875,grad_norm: 0.7493328369174738, iteration: 311803
loss: 1.1147305965423584,grad_norm: 0.9999999901487304, iteration: 311804
loss: 1.0178836584091187,grad_norm: 0.9011152511178104, iteration: 311805
loss: 0.9746716022491455,grad_norm: 0.8654903816522116, iteration: 311806
loss: 1.009770393371582,grad_norm: 0.95498196806468, iteration: 311807
loss: 1.082063913345337,grad_norm: 0.9999991246998746, iteration: 311808
loss: 0.9896164536476135,grad_norm: 0.989853619899426, iteration: 311809
loss: 1.104967474937439,grad_norm: 0.9999998496104289, iteration: 311810
loss: 1.0351210832595825,grad_norm: 0.8445847479480503, iteration: 311811
loss: 1.0061657428741455,grad_norm: 0.8150435105462975, iteration: 311812
loss: 0.9871669411659241,grad_norm: 0.7575520243369803, iteration: 311813
loss: 0.9867923259735107,grad_norm: 0.9991137377685492, iteration: 311814
loss: 1.0998939275741577,grad_norm: 0.9999991986890591, iteration: 311815
loss: 1.0157274007797241,grad_norm: 0.9839582257074677, iteration: 311816
loss: 1.0664830207824707,grad_norm: 0.9999994253836461, iteration: 311817
loss: 1.0356214046478271,grad_norm: 0.8524588820787491, iteration: 311818
loss: 1.0181541442871094,grad_norm: 0.8173196497353722, iteration: 311819
loss: 0.9944999814033508,grad_norm: 0.7141184647286266, iteration: 311820
loss: 1.006858468055725,grad_norm: 0.9213031386172812, iteration: 311821
loss: 0.9365197420120239,grad_norm: 0.8795730902594739, iteration: 311822
loss: 1.10914146900177,grad_norm: 0.999999067683281, iteration: 311823
loss: 0.9628891944885254,grad_norm: 0.8616764885763372, iteration: 311824
loss: 0.968557596206665,grad_norm: 0.9196878946287754, iteration: 311825
loss: 1.0149513483047485,grad_norm: 0.8764816011516742, iteration: 311826
loss: 1.2001887559890747,grad_norm: 0.9999991260493271, iteration: 311827
loss: 1.0096189975738525,grad_norm: 0.8026711147790241, iteration: 311828
loss: 0.9932572245597839,grad_norm: 0.9999989863498602, iteration: 311829
loss: 1.011542797088623,grad_norm: 0.7999094927455022, iteration: 311830
loss: 0.9696430563926697,grad_norm: 0.848122154291826, iteration: 311831
loss: 1.0281349420547485,grad_norm: 0.8242900012004664, iteration: 311832
loss: 1.0058033466339111,grad_norm: 0.8239054688629667, iteration: 311833
loss: 0.9869575500488281,grad_norm: 0.8936669059067912, iteration: 311834
loss: 1.0105317831039429,grad_norm: 0.7896002025240338, iteration: 311835
loss: 1.0195698738098145,grad_norm: 0.832214141392592, iteration: 311836
loss: 1.025579571723938,grad_norm: 0.853169826387036, iteration: 311837
loss: 1.0363259315490723,grad_norm: 0.999999721110411, iteration: 311838
loss: 1.006482720375061,grad_norm: 0.8150326909301975, iteration: 311839
loss: 1.0129395723342896,grad_norm: 0.8840640091393548, iteration: 311840
loss: 1.0094482898712158,grad_norm: 0.822545661686421, iteration: 311841
loss: 1.0018525123596191,grad_norm: 0.9356661266938019, iteration: 311842
loss: 1.022735595703125,grad_norm: 0.8933539619605866, iteration: 311843
loss: 1.0238478183746338,grad_norm: 0.9999993679407788, iteration: 311844
loss: 0.9885033965110779,grad_norm: 0.9999996503560337, iteration: 311845
loss: 0.9946866035461426,grad_norm: 0.8554216607044397, iteration: 311846
loss: 1.1008031368255615,grad_norm: 0.9999992250137443, iteration: 311847
loss: 1.1424825191497803,grad_norm: 0.9999991811518776, iteration: 311848
loss: 0.9812420010566711,grad_norm: 0.784135405282863, iteration: 311849
loss: 1.0220292806625366,grad_norm: 0.7895811380714083, iteration: 311850
loss: 0.941378653049469,grad_norm: 0.7897903862585891, iteration: 311851
loss: 0.9977062940597534,grad_norm: 0.8126000056391888, iteration: 311852
loss: 1.03427255153656,grad_norm: 0.9999995134781776, iteration: 311853
loss: 0.98185133934021,grad_norm: 0.8696310893637198, iteration: 311854
loss: 0.9957365393638611,grad_norm: 0.8867072653470586, iteration: 311855
loss: 0.9814710021018982,grad_norm: 0.8107678549208426, iteration: 311856
loss: 1.0116603374481201,grad_norm: 0.8276027448334189, iteration: 311857
loss: 0.9924168586730957,grad_norm: 0.9989243200644675, iteration: 311858
loss: 1.0286232233047485,grad_norm: 0.9478773405932649, iteration: 311859
loss: 0.9772190451622009,grad_norm: 0.8305887942244677, iteration: 311860
loss: 0.9868595004081726,grad_norm: 0.8312738662883565, iteration: 311861
loss: 1.0153393745422363,grad_norm: 0.9999995376525886, iteration: 311862
loss: 1.1486059427261353,grad_norm: 0.9999992591825443, iteration: 311863
loss: 0.9737427830696106,grad_norm: 0.9999991989314841, iteration: 311864
loss: 0.9885546565055847,grad_norm: 0.9686721223370713, iteration: 311865
loss: 0.9772832989692688,grad_norm: 0.99999920018615, iteration: 311866
loss: 0.9987821578979492,grad_norm: 0.7307244455047415, iteration: 311867
loss: 1.0014028549194336,grad_norm: 0.8602221324291965, iteration: 311868
loss: 1.0081299543380737,grad_norm: 0.9198805500048592, iteration: 311869
loss: 0.9642188549041748,grad_norm: 0.922263277576605, iteration: 311870
loss: 1.0078256130218506,grad_norm: 0.9999993720658786, iteration: 311871
loss: 1.0217093229293823,grad_norm: 0.8860656708422141, iteration: 311872
loss: 1.0099916458129883,grad_norm: 0.9999990523293355, iteration: 311873
loss: 1.1196891069412231,grad_norm: 0.9999996098677729, iteration: 311874
loss: 1.0211395025253296,grad_norm: 0.8496697235876257, iteration: 311875
loss: 1.0489751100540161,grad_norm: 0.999999147098961, iteration: 311876
loss: 1.01668119430542,grad_norm: 0.8526900230826905, iteration: 311877
loss: 1.0349549055099487,grad_norm: 0.7507440234044311, iteration: 311878
loss: 1.0486671924591064,grad_norm: 0.77486371858641, iteration: 311879
loss: 1.0488916635513306,grad_norm: 0.8845254696310657, iteration: 311880
loss: 1.0107003450393677,grad_norm: 0.77974783179554, iteration: 311881
loss: 1.017039179801941,grad_norm: 0.9640509219957651, iteration: 311882
loss: 1.0138261318206787,grad_norm: 0.9999990029364796, iteration: 311883
loss: 1.0149608850479126,grad_norm: 0.7923370454937444, iteration: 311884
loss: 1.042201042175293,grad_norm: 0.8402837378871194, iteration: 311885
loss: 1.0491937398910522,grad_norm: 0.8201131912777603, iteration: 311886
loss: 1.1467021703720093,grad_norm: 0.9999996987840658, iteration: 311887
loss: 0.9749121069908142,grad_norm: 0.6987036859872078, iteration: 311888
loss: 1.097808837890625,grad_norm: 0.9999996932917917, iteration: 311889
loss: 0.9725516438484192,grad_norm: 0.7761375899135426, iteration: 311890
loss: 1.0338876247406006,grad_norm: 0.8396396748591126, iteration: 311891
loss: 0.9760541319847107,grad_norm: 0.8907275999810665, iteration: 311892
loss: 1.0982905626296997,grad_norm: 0.9999997296707275, iteration: 311893
loss: 0.9623579382896423,grad_norm: 0.8673212796469852, iteration: 311894
loss: 1.1640046834945679,grad_norm: 0.9999992500352132, iteration: 311895
loss: 1.0213717222213745,grad_norm: 0.9546270836681309, iteration: 311896
loss: 1.0346388816833496,grad_norm: 0.8194020043710877, iteration: 311897
loss: 0.9861447811126709,grad_norm: 0.907702078906413, iteration: 311898
loss: 0.9868360161781311,grad_norm: 0.9999999387393984, iteration: 311899
loss: 0.9916336536407471,grad_norm: 0.9828335440003824, iteration: 311900
loss: 1.0750219821929932,grad_norm: 0.9999995143116361, iteration: 311901
loss: 1.129051923751831,grad_norm: 0.9999992892263576, iteration: 311902
loss: 1.0150493383407593,grad_norm: 0.8916505520541218, iteration: 311903
loss: 1.0566843748092651,grad_norm: 0.9999990790292672, iteration: 311904
loss: 1.0843079090118408,grad_norm: 0.9999996911183228, iteration: 311905
loss: 0.9980263710021973,grad_norm: 0.6855209042132044, iteration: 311906
loss: 0.9965898990631104,grad_norm: 0.9192452822053363, iteration: 311907
loss: 1.000274419784546,grad_norm: 0.8762667206554268, iteration: 311908
loss: 1.001191258430481,grad_norm: 0.8899513333808975, iteration: 311909
loss: 1.0215991735458374,grad_norm: 0.8941562129101481, iteration: 311910
loss: 0.9964715242385864,grad_norm: 0.8113765411597342, iteration: 311911
loss: 0.9857288002967834,grad_norm: 0.8742434964967924, iteration: 311912
loss: 0.9886096715927124,grad_norm: 0.8233912509381129, iteration: 311913
loss: 1.1778279542922974,grad_norm: 0.999999121486708, iteration: 311914
loss: 0.9824182987213135,grad_norm: 0.8453331570975827, iteration: 311915
loss: 1.0273046493530273,grad_norm: 0.8163621891664541, iteration: 311916
loss: 1.0601218938827515,grad_norm: 0.9743961021638011, iteration: 311917
loss: 1.0672802925109863,grad_norm: 0.999999691712011, iteration: 311918
loss: 0.9956899881362915,grad_norm: 0.867795418991032, iteration: 311919
loss: 1.0095133781433105,grad_norm: 0.8849679383213639, iteration: 311920
loss: 0.9900922179222107,grad_norm: 0.8870464646267702, iteration: 311921
loss: 1.0710039138793945,grad_norm: 0.9999991929860633, iteration: 311922
loss: 0.9584624767303467,grad_norm: 0.8754492505788458, iteration: 311923
loss: 0.9632745981216431,grad_norm: 0.7269839866379965, iteration: 311924
loss: 0.9776349663734436,grad_norm: 0.8233115342401383, iteration: 311925
loss: 0.9909660220146179,grad_norm: 0.9609126896332425, iteration: 311926
loss: 0.9934206604957581,grad_norm: 0.9581924330835839, iteration: 311927
loss: 1.0609796047210693,grad_norm: 0.8712115792193642, iteration: 311928
loss: 1.1002973318099976,grad_norm: 0.9834803557545946, iteration: 311929
loss: 1.1132011413574219,grad_norm: 0.9999995527223516, iteration: 311930
loss: 1.0109258890151978,grad_norm: 0.9999991153800075, iteration: 311931
loss: 0.9692004323005676,grad_norm: 0.9999989381314294, iteration: 311932
loss: 0.9947885274887085,grad_norm: 0.9713221927917461, iteration: 311933
loss: 1.0347838401794434,grad_norm: 0.9999999636039109, iteration: 311934
loss: 1.0308220386505127,grad_norm: 0.8087096857068722, iteration: 311935
loss: 1.0147186517715454,grad_norm: 0.8555350901624865, iteration: 311936
loss: 1.0519431829452515,grad_norm: 0.9999993891231671, iteration: 311937
loss: 0.9871248006820679,grad_norm: 0.8374617209357959, iteration: 311938
loss: 0.9729216694831848,grad_norm: 0.8853289578301083, iteration: 311939
loss: 1.0177748203277588,grad_norm: 0.9999991193513146, iteration: 311940
loss: 1.0657809972763062,grad_norm: 0.9150139804023745, iteration: 311941
loss: 1.0375962257385254,grad_norm: 0.9999992996024785, iteration: 311942
loss: 1.0427048206329346,grad_norm: 0.809223127585772, iteration: 311943
loss: 0.9894392490386963,grad_norm: 0.7536198685238601, iteration: 311944
loss: 0.9750605225563049,grad_norm: 0.9018152026442017, iteration: 311945
loss: 1.2189078330993652,grad_norm: 0.999999475245, iteration: 311946
loss: 1.1277201175689697,grad_norm: 0.9999998470180498, iteration: 311947
loss: 1.0164620876312256,grad_norm: 0.8280403852187378, iteration: 311948
loss: 0.9923580884933472,grad_norm: 0.8146931965289339, iteration: 311949
loss: 0.9949157238006592,grad_norm: 0.9999995191982627, iteration: 311950
loss: 0.993729293346405,grad_norm: 0.9522307380865572, iteration: 311951
loss: 0.9644144773483276,grad_norm: 0.9999990623212747, iteration: 311952
loss: 1.1446053981781006,grad_norm: 0.999999518829828, iteration: 311953
loss: 0.9474189877510071,grad_norm: 0.8631716700631685, iteration: 311954
loss: 1.0644750595092773,grad_norm: 0.9999990588813179, iteration: 311955
loss: 1.015715479850769,grad_norm: 0.7757582987324015, iteration: 311956
loss: 0.9683801531791687,grad_norm: 0.8183805932860885, iteration: 311957
loss: 1.0584412813186646,grad_norm: 0.7285669270131451, iteration: 311958
loss: 1.0486923456192017,grad_norm: 0.8107873765964321, iteration: 311959
loss: 0.9953568577766418,grad_norm: 0.8304621182843943, iteration: 311960
loss: 1.0116913318634033,grad_norm: 0.9999991826668594, iteration: 311961
loss: 0.9775199890136719,grad_norm: 0.9999991836177534, iteration: 311962
loss: 1.0224497318267822,grad_norm: 0.7784939064140768, iteration: 311963
loss: 0.9835875630378723,grad_norm: 0.8108416500068215, iteration: 311964
loss: 0.997033417224884,grad_norm: 0.9999990996925201, iteration: 311965
loss: 1.0030639171600342,grad_norm: 0.885303034395547, iteration: 311966
loss: 1.0708625316619873,grad_norm: 0.999999219954949, iteration: 311967
loss: 1.0488128662109375,grad_norm: 0.7925335181047459, iteration: 311968
loss: 1.049352765083313,grad_norm: 0.833304201182919, iteration: 311969
loss: 1.0398074388504028,grad_norm: 1.0000000594871559, iteration: 311970
loss: 1.063123106956482,grad_norm: 0.9999992667236483, iteration: 311971
loss: 0.9722259044647217,grad_norm: 0.8717988388595084, iteration: 311972
loss: 0.968253493309021,grad_norm: 0.829924303604531, iteration: 311973
loss: 1.1235270500183105,grad_norm: 0.9999991368566565, iteration: 311974
loss: 1.0058486461639404,grad_norm: 0.8612042841622981, iteration: 311975
loss: 0.9867917895317078,grad_norm: 0.8507692253880511, iteration: 311976
loss: 0.9785967469215393,grad_norm: 0.8795521713174191, iteration: 311977
loss: 1.0118002891540527,grad_norm: 0.9491070721319604, iteration: 311978
loss: 0.9864783883094788,grad_norm: 0.8196466002721582, iteration: 311979
loss: 0.9713780283927917,grad_norm: 0.8575219296194699, iteration: 311980
loss: 1.0160483121871948,grad_norm: 0.9726465748936721, iteration: 311981
loss: 1.0154281854629517,grad_norm: 0.9999991797899042, iteration: 311982
loss: 0.9961051344871521,grad_norm: 0.8293173126350065, iteration: 311983
loss: 1.0433059930801392,grad_norm: 0.9999996635162698, iteration: 311984
loss: 1.0251784324645996,grad_norm: 0.9168939358419049, iteration: 311985
loss: 1.0060349702835083,grad_norm: 0.9999999605846975, iteration: 311986
loss: 0.9885014891624451,grad_norm: 0.7040648565742368, iteration: 311987
loss: 0.9875606894493103,grad_norm: 0.8916080312425115, iteration: 311988
loss: 1.0312918424606323,grad_norm: 0.7522906981402315, iteration: 311989
loss: 1.0900291204452515,grad_norm: 0.9999999685747248, iteration: 311990
loss: 0.9732768535614014,grad_norm: 0.7630405034917329, iteration: 311991
loss: 0.9908789992332458,grad_norm: 0.8441142774485539, iteration: 311992
loss: 1.0400277376174927,grad_norm: 0.9846659846327364, iteration: 311993
loss: 1.0125274658203125,grad_norm: 0.8758887402915241, iteration: 311994
loss: 1.0104507207870483,grad_norm: 0.9999991002802043, iteration: 311995
loss: 1.0305700302124023,grad_norm: 0.9999997387210867, iteration: 311996
loss: 1.0647928714752197,grad_norm: 0.9999991161768108, iteration: 311997
loss: 1.0138376951217651,grad_norm: 0.8641926044841187, iteration: 311998
loss: 1.0115402936935425,grad_norm: 0.7004763641693286, iteration: 311999
loss: 0.9934921264648438,grad_norm: 0.8510704297901683, iteration: 312000
loss: 1.0135211944580078,grad_norm: 0.8200420126997319, iteration: 312001
loss: 0.999588668346405,grad_norm: 0.9507789857902148, iteration: 312002
loss: 0.980160653591156,grad_norm: 0.7721714594773446, iteration: 312003
loss: 1.0302550792694092,grad_norm: 0.7952790679812537, iteration: 312004
loss: 0.988743245601654,grad_norm: 0.8416880266588886, iteration: 312005
loss: 1.0758846998214722,grad_norm: 0.9999992628887611, iteration: 312006
loss: 1.0193113088607788,grad_norm: 0.8209193822469508, iteration: 312007
loss: 0.9825583696365356,grad_norm: 0.8021407554495879, iteration: 312008
loss: 0.9804641604423523,grad_norm: 0.7940165956996134, iteration: 312009
loss: 1.069961428642273,grad_norm: 0.9980024809320074, iteration: 312010
loss: 1.0086801052093506,grad_norm: 0.9999990931291034, iteration: 312011
loss: 0.984767496585846,grad_norm: 0.7742401754402766, iteration: 312012
loss: 0.9939039349555969,grad_norm: 0.7487934248731917, iteration: 312013
loss: 1.0068411827087402,grad_norm: 0.9071050332304522, iteration: 312014
loss: 1.1159554719924927,grad_norm: 0.9999999234590196, iteration: 312015
loss: 0.9738465547561646,grad_norm: 0.9512171781524403, iteration: 312016
loss: 0.9667121171951294,grad_norm: 0.8497323156996732, iteration: 312017
loss: 0.9737766981124878,grad_norm: 0.818876215435729, iteration: 312018
loss: 0.9904578924179077,grad_norm: 0.9072360977755161, iteration: 312019
loss: 1.0049136877059937,grad_norm: 0.8778769357758932, iteration: 312020
loss: 1.0038121938705444,grad_norm: 0.8314797078830781, iteration: 312021
loss: 1.0606763362884521,grad_norm: 0.9838977324058292, iteration: 312022
loss: 0.9999194741249084,grad_norm: 0.9656691194729826, iteration: 312023
loss: 1.0123286247253418,grad_norm: 0.7672394039787213, iteration: 312024
loss: 1.021079182624817,grad_norm: 0.9023853634682389, iteration: 312025
loss: 0.949394941329956,grad_norm: 0.7037144420164458, iteration: 312026
loss: 1.0035922527313232,grad_norm: 0.8469529290883487, iteration: 312027
loss: 1.1505228281021118,grad_norm: 0.9999996542079027, iteration: 312028
loss: 1.0683808326721191,grad_norm: 0.939802824369135, iteration: 312029
loss: 1.012251615524292,grad_norm: 0.9999999277409314, iteration: 312030
loss: 1.0080034732818604,grad_norm: 0.8827582316090414, iteration: 312031
loss: 1.004313349723816,grad_norm: 0.8612262878980176, iteration: 312032
loss: 0.9896894693374634,grad_norm: 0.9999992880063607, iteration: 312033
loss: 0.9886524677276611,grad_norm: 0.8712744182174301, iteration: 312034
loss: 0.9816567301750183,grad_norm: 0.9781126975753269, iteration: 312035
loss: 1.0031728744506836,grad_norm: 0.992454827717603, iteration: 312036
loss: 0.9860495924949646,grad_norm: 0.7976076814417694, iteration: 312037
loss: 1.0323185920715332,grad_norm: 0.8034869823014599, iteration: 312038
loss: 1.0236384868621826,grad_norm: 0.9999991627607232, iteration: 312039
loss: 1.0062353610992432,grad_norm: 0.8469982865204453, iteration: 312040
loss: 1.1460933685302734,grad_norm: 0.9999994149683311, iteration: 312041
loss: 1.0717118978500366,grad_norm: 0.819743406547685, iteration: 312042
loss: 1.0427342653274536,grad_norm: 0.9999998932301315, iteration: 312043
loss: 0.985981285572052,grad_norm: 0.7261678270096882, iteration: 312044
loss: 1.001118779182434,grad_norm: 0.7409639550061424, iteration: 312045
loss: 1.0181498527526855,grad_norm: 0.7580002159366818, iteration: 312046
loss: 1.0311319828033447,grad_norm: 0.778487912344223, iteration: 312047
loss: 1.1303993463516235,grad_norm: 0.999999877368552, iteration: 312048
loss: 1.0519192218780518,grad_norm: 0.9999997764994207, iteration: 312049
loss: 0.9837415814399719,grad_norm: 0.8204835523030835, iteration: 312050
loss: 1.022900104522705,grad_norm: 0.7502649430735595, iteration: 312051
loss: 1.0179040431976318,grad_norm: 0.8269982918458455, iteration: 312052
loss: 1.023072600364685,grad_norm: 0.6670586802542929, iteration: 312053
loss: 0.9610315561294556,grad_norm: 0.7095089555663683, iteration: 312054
loss: 1.0086073875427246,grad_norm: 0.8047998171698948, iteration: 312055
loss: 1.017085075378418,grad_norm: 0.76540696120282, iteration: 312056
loss: 0.9923062920570374,grad_norm: 0.9999993672259805, iteration: 312057
loss: 0.967268168926239,grad_norm: 0.7228438482959659, iteration: 312058
loss: 1.0119365453720093,grad_norm: 0.8619652859691691, iteration: 312059
loss: 0.9763522148132324,grad_norm: 0.7907813830683279, iteration: 312060
loss: 1.0359437465667725,grad_norm: 0.8197711666835577, iteration: 312061
loss: 0.9979474544525146,grad_norm: 0.8365574796843432, iteration: 312062
loss: 1.01057767868042,grad_norm: 0.7163710321691213, iteration: 312063
loss: 1.0331326723098755,grad_norm: 0.8536119452048327, iteration: 312064
loss: 0.986750066280365,grad_norm: 0.8968658814410082, iteration: 312065
loss: 0.9950233101844788,grad_norm: 0.8317293267563972, iteration: 312066
loss: 0.9957290291786194,grad_norm: 0.8000781260882421, iteration: 312067
loss: 1.029657006263733,grad_norm: 0.9828486549391122, iteration: 312068
loss: 0.9887285232543945,grad_norm: 0.9019143193186834, iteration: 312069
loss: 1.0814156532287598,grad_norm: 0.9740345872441698, iteration: 312070
loss: 1.00574791431427,grad_norm: 0.9586118377031034, iteration: 312071
loss: 1.0046098232269287,grad_norm: 0.7965107515142865, iteration: 312072
loss: 0.9883430600166321,grad_norm: 0.9309950442872778, iteration: 312073
loss: 1.0039756298065186,grad_norm: 0.753346413795223, iteration: 312074
loss: 1.0269237756729126,grad_norm: 0.948868140354808, iteration: 312075
loss: 1.01955246925354,grad_norm: 0.9261677843772831, iteration: 312076
loss: 1.0242295265197754,grad_norm: 0.8765395508516405, iteration: 312077
loss: 0.9923987984657288,grad_norm: 0.844577791438231, iteration: 312078
loss: 1.0078370571136475,grad_norm: 0.9364913056600598, iteration: 312079
loss: 0.994450569152832,grad_norm: 0.7978579549093211, iteration: 312080
loss: 1.002781867980957,grad_norm: 0.811877409185964, iteration: 312081
loss: 1.0412349700927734,grad_norm: 0.999999862406566, iteration: 312082
loss: 0.9751946330070496,grad_norm: 0.8233319927445074, iteration: 312083
loss: 1.0123554468154907,grad_norm: 0.776423025463754, iteration: 312084
loss: 0.9781445264816284,grad_norm: 0.8437854164951156, iteration: 312085
loss: 0.9714629650115967,grad_norm: 0.8357034285707384, iteration: 312086
loss: 1.00046706199646,grad_norm: 0.7693841619878922, iteration: 312087
loss: 1.086563229560852,grad_norm: 0.8983807036432104, iteration: 312088
loss: 1.0146851539611816,grad_norm: 0.9307332115201816, iteration: 312089
loss: 0.9990893602371216,grad_norm: 0.9606076093664133, iteration: 312090
loss: 1.1933103799819946,grad_norm: 0.9999996686573966, iteration: 312091
loss: 0.991207480430603,grad_norm: 0.8854107144609014, iteration: 312092
loss: 1.0269156694412231,grad_norm: 0.9999999519662094, iteration: 312093
loss: 0.969642698764801,grad_norm: 0.8374539219293641, iteration: 312094
loss: 0.9831008315086365,grad_norm: 0.8291938995691448, iteration: 312095
loss: 1.0041449069976807,grad_norm: 0.8133163025364158, iteration: 312096
loss: 0.9519401788711548,grad_norm: 0.7833022407878124, iteration: 312097
loss: 1.000402808189392,grad_norm: 0.9999995731549307, iteration: 312098
loss: 0.9488735198974609,grad_norm: 0.8228285720234128, iteration: 312099
loss: 0.9796563982963562,grad_norm: 0.837840246122988, iteration: 312100
loss: 0.9914675354957581,grad_norm: 0.9164952091952487, iteration: 312101
loss: 1.0067640542984009,grad_norm: 0.7021522755615335, iteration: 312102
loss: 1.0045521259307861,grad_norm: 0.9279444300494798, iteration: 312103
loss: 0.9911308288574219,grad_norm: 0.8544595087084224, iteration: 312104
loss: 1.0777006149291992,grad_norm: 0.9999993043336625, iteration: 312105
loss: 1.0086132287979126,grad_norm: 0.8844131579317582, iteration: 312106
loss: 1.0282528400421143,grad_norm: 0.9999994260998749, iteration: 312107
loss: 0.9902411699295044,grad_norm: 0.7581841920683239, iteration: 312108
loss: 1.027396321296692,grad_norm: 0.8995341305944828, iteration: 312109
loss: 1.1568182706832886,grad_norm: 0.9999993820292465, iteration: 312110
loss: 0.9796964526176453,grad_norm: 0.7630198340683023, iteration: 312111
loss: 0.9854238629341125,grad_norm: 0.9999991550650131, iteration: 312112
loss: 1.0187183618545532,grad_norm: 0.9554958052490641, iteration: 312113
loss: 1.0153577327728271,grad_norm: 0.9999990872109171, iteration: 312114
loss: 0.9935920238494873,grad_norm: 0.9999990066754273, iteration: 312115
loss: 0.9990423917770386,grad_norm: 0.8054196877138176, iteration: 312116
loss: 0.9944876432418823,grad_norm: 0.8570445919587493, iteration: 312117
loss: 0.9828125238418579,grad_norm: 0.8449659099025308, iteration: 312118
loss: 0.9912829399108887,grad_norm: 0.6920506714381682, iteration: 312119
loss: 0.9625739455223083,grad_norm: 0.8074493819451856, iteration: 312120
loss: 1.0184992551803589,grad_norm: 0.9999990574638269, iteration: 312121
loss: 1.0452121496200562,grad_norm: 0.9322008708859836, iteration: 312122
loss: 0.9712998867034912,grad_norm: 0.9999991851534457, iteration: 312123
loss: 1.0704299211502075,grad_norm: 0.9999991075501096, iteration: 312124
loss: 0.9959298372268677,grad_norm: 0.8919542628829588, iteration: 312125
loss: 1.149584174156189,grad_norm: 0.999998999168196, iteration: 312126
loss: 1.0091509819030762,grad_norm: 0.8303854597077454, iteration: 312127
loss: 1.024129033088684,grad_norm: 0.9999999748823412, iteration: 312128
loss: 1.03382408618927,grad_norm: 0.8567320837083015, iteration: 312129
loss: 0.9971004128456116,grad_norm: 0.8156449751014122, iteration: 312130
loss: 0.9815806150436401,grad_norm: 0.8810428326556877, iteration: 312131
loss: 1.0823009014129639,grad_norm: 0.999999297467548, iteration: 312132
loss: 0.9779626727104187,grad_norm: 0.8921734666702463, iteration: 312133
loss: 0.9834033250808716,grad_norm: 0.9243877781411142, iteration: 312134
loss: 1.0139743089675903,grad_norm: 0.8611710718844926, iteration: 312135
loss: 0.9625245332717896,grad_norm: 0.6918358126438778, iteration: 312136
loss: 1.0171188116073608,grad_norm: 0.9387007651365424, iteration: 312137
loss: 1.0906574726104736,grad_norm: 0.9999991535614905, iteration: 312138
loss: 1.0067901611328125,grad_norm: 0.9999998123908168, iteration: 312139
loss: 1.0272904634475708,grad_norm: 0.9110739697844387, iteration: 312140
loss: 0.9655805826187134,grad_norm: 0.7588103142257158, iteration: 312141
loss: 0.9962711930274963,grad_norm: 0.9708221151492905, iteration: 312142
loss: 1.0170718431472778,grad_norm: 0.7884748133428727, iteration: 312143
loss: 0.9828351140022278,grad_norm: 0.8425102731914622, iteration: 312144
loss: 1.0265625715255737,grad_norm: 0.999999088877542, iteration: 312145
loss: 1.0245697498321533,grad_norm: 0.8013879900891937, iteration: 312146
loss: 0.9942793846130371,grad_norm: 0.8255495864133942, iteration: 312147
loss: 1.0023397207260132,grad_norm: 0.7517587687135737, iteration: 312148
loss: 1.0102663040161133,grad_norm: 0.9188884528384581, iteration: 312149
loss: 0.9909685254096985,grad_norm: 0.8507482318634506, iteration: 312150
loss: 1.0278375148773193,grad_norm: 0.937474625464309, iteration: 312151
loss: 0.965021014213562,grad_norm: 0.7456110673403876, iteration: 312152
loss: 0.9782420992851257,grad_norm: 0.8003577784305951, iteration: 312153
loss: 1.0092874765396118,grad_norm: 0.9898175234379033, iteration: 312154
loss: 1.0178275108337402,grad_norm: 0.999999488895426, iteration: 312155
loss: 1.0271062850952148,grad_norm: 0.8688117932234277, iteration: 312156
loss: 1.0922088623046875,grad_norm: 0.8765964152566762, iteration: 312157
loss: 0.9818927645683289,grad_norm: 0.8982380706832291, iteration: 312158
loss: 0.9797828197479248,grad_norm: 0.9273080304603851, iteration: 312159
loss: 0.9893575310707092,grad_norm: 0.9475291803452903, iteration: 312160
loss: 1.0038093328475952,grad_norm: 0.7574626841953275, iteration: 312161
loss: 1.0163567066192627,grad_norm: 0.684375089413536, iteration: 312162
loss: 1.0664293766021729,grad_norm: 0.9999994026803843, iteration: 312163
loss: 0.9958173036575317,grad_norm: 0.8329951710928715, iteration: 312164
loss: 1.0122714042663574,grad_norm: 0.9999990702193245, iteration: 312165
loss: 1.0011576414108276,grad_norm: 0.9999991387819822, iteration: 312166
loss: 1.0125517845153809,grad_norm: 0.889884948449647, iteration: 312167
loss: 0.9798193573951721,grad_norm: 0.7962120700169366, iteration: 312168
loss: 1.0242512226104736,grad_norm: 0.7416982762372146, iteration: 312169
loss: 0.9928039312362671,grad_norm: 0.8775654868753581, iteration: 312170
loss: 0.9612390995025635,grad_norm: 0.9835481892212729, iteration: 312171
loss: 1.0082417726516724,grad_norm: 0.9699735732462208, iteration: 312172
loss: 1.0090411901474,grad_norm: 0.9999991881563328, iteration: 312173
loss: 1.0360255241394043,grad_norm: 0.8804164991437873, iteration: 312174
loss: 1.0127724409103394,grad_norm: 0.8737972158834787, iteration: 312175
loss: 1.0202621221542358,grad_norm: 0.9999998580802368, iteration: 312176
loss: 1.020673155784607,grad_norm: 0.9999992217116034, iteration: 312177
loss: 0.972350001335144,grad_norm: 0.7787325722529321, iteration: 312178
loss: 0.981849193572998,grad_norm: 0.9172912823095741, iteration: 312179
loss: 0.9904131293296814,grad_norm: 0.7718131394123754, iteration: 312180
loss: 1.0497639179229736,grad_norm: 0.9999991959207253, iteration: 312181
loss: 1.0362849235534668,grad_norm: 0.9999991527328159, iteration: 312182
loss: 0.9994446039199829,grad_norm: 0.8446887458029599, iteration: 312183
loss: 1.027664303779602,grad_norm: 0.9999998778655248, iteration: 312184
loss: 1.0234214067459106,grad_norm: 0.9421047886795876, iteration: 312185
loss: 1.002905011177063,grad_norm: 0.7691441945853121, iteration: 312186
loss: 1.0145694017410278,grad_norm: 0.8643190209945917, iteration: 312187
loss: 1.0075411796569824,grad_norm: 0.9071223005773389, iteration: 312188
loss: 1.1331796646118164,grad_norm: 0.9999999893842163, iteration: 312189
loss: 1.0043565034866333,grad_norm: 0.9999991759259003, iteration: 312190
loss: 0.9762172698974609,grad_norm: 0.703885036915868, iteration: 312191
loss: 1.0149952173233032,grad_norm: 0.9999993433328254, iteration: 312192
loss: 1.013996958732605,grad_norm: 0.7682299637565055, iteration: 312193
loss: 1.0391490459442139,grad_norm: 0.9565435513242888, iteration: 312194
loss: 1.0154889822006226,grad_norm: 0.9999991520656069, iteration: 312195
loss: 1.0827577114105225,grad_norm: 0.9999998488679348, iteration: 312196
loss: 0.9979029893875122,grad_norm: 0.8257094112442909, iteration: 312197
loss: 0.9963043928146362,grad_norm: 0.9070301077134449, iteration: 312198
loss: 0.9961457848548889,grad_norm: 0.7028477952693023, iteration: 312199
loss: 0.9634897112846375,grad_norm: 0.920174575717869, iteration: 312200
loss: 1.0163264274597168,grad_norm: 0.9999993608800782, iteration: 312201
loss: 0.9665449261665344,grad_norm: 0.8740018745149193, iteration: 312202
loss: 1.0756454467773438,grad_norm: 0.9446723375700313, iteration: 312203
loss: 1.0138076543807983,grad_norm: 0.8188638884540089, iteration: 312204
loss: 1.0232698917388916,grad_norm: 0.9999993715801693, iteration: 312205
loss: 1.0171043872833252,grad_norm: 0.9570403217585658, iteration: 312206
loss: 0.9980287551879883,grad_norm: 0.9601402520228863, iteration: 312207
loss: 1.0106748342514038,grad_norm: 0.8962940066384151, iteration: 312208
loss: 1.0211753845214844,grad_norm: 0.810197510626962, iteration: 312209
loss: 0.9886714220046997,grad_norm: 0.9999990651240124, iteration: 312210
loss: 1.0170930624008179,grad_norm: 0.9856354487327224, iteration: 312211
loss: 1.0075992345809937,grad_norm: 0.8661555721188923, iteration: 312212
loss: 0.9667093753814697,grad_norm: 0.8554197199761141, iteration: 312213
loss: 0.9631112813949585,grad_norm: 0.86154477985258, iteration: 312214
loss: 1.0040191411972046,grad_norm: 0.7282812785623679, iteration: 312215
loss: 1.008337140083313,grad_norm: 0.999999209902596, iteration: 312216
loss: 1.0048530101776123,grad_norm: 0.7158997163694707, iteration: 312217
loss: 1.0868902206420898,grad_norm: 0.9000064059588347, iteration: 312218
loss: 0.9689176082611084,grad_norm: 0.801522558462957, iteration: 312219
loss: 1.0175999402999878,grad_norm: 0.9999999104968834, iteration: 312220
loss: 1.008656620979309,grad_norm: 0.9999996593997941, iteration: 312221
loss: 0.994697093963623,grad_norm: 0.8694915864062069, iteration: 312222
loss: 1.036623239517212,grad_norm: 0.8435596700875939, iteration: 312223
loss: 0.9681528806686401,grad_norm: 0.9086270319237644, iteration: 312224
loss: 1.0129280090332031,grad_norm: 0.9117675222178195, iteration: 312225
loss: 0.9757251739501953,grad_norm: 0.9588804943536298, iteration: 312226
loss: 1.0004864931106567,grad_norm: 0.9293715392220776, iteration: 312227
loss: 1.013261318206787,grad_norm: 0.8099355906473081, iteration: 312228
loss: 0.9765937924385071,grad_norm: 0.9867182048338758, iteration: 312229
loss: 1.0302761793136597,grad_norm: 0.9909704512694821, iteration: 312230
loss: 1.0123635530471802,grad_norm: 0.8938765574173728, iteration: 312231
loss: 0.9974139332771301,grad_norm: 0.8266034661767138, iteration: 312232
loss: 0.9995743036270142,grad_norm: 0.8706272043646505, iteration: 312233
loss: 1.0160186290740967,grad_norm: 0.8468576134765397, iteration: 312234
loss: 1.0725862979888916,grad_norm: 0.8679933677933453, iteration: 312235
loss: 1.0119619369506836,grad_norm: 0.8751836382032767, iteration: 312236
loss: 0.9820274114608765,grad_norm: 0.9512254836786633, iteration: 312237
loss: 0.9998919367790222,grad_norm: 0.7456575586581888, iteration: 312238
loss: 0.9891026020050049,grad_norm: 0.8579103072864152, iteration: 312239
loss: 1.0057605504989624,grad_norm: 0.9915116959133691, iteration: 312240
loss: 0.9866856336593628,grad_norm: 0.9999991911218042, iteration: 312241
loss: 0.9938967823982239,grad_norm: 0.8460718302997762, iteration: 312242
loss: 0.9972282648086548,grad_norm: 0.6971975797193519, iteration: 312243
loss: 1.0074900388717651,grad_norm: 0.9999991264166034, iteration: 312244
loss: 1.0171092748641968,grad_norm: 0.8361960218841539, iteration: 312245
loss: 1.0042766332626343,grad_norm: 0.999999084662164, iteration: 312246
loss: 0.9821540713310242,grad_norm: 0.8783197014058668, iteration: 312247
loss: 0.9961053729057312,grad_norm: 0.9999998085393506, iteration: 312248
loss: 1.020723819732666,grad_norm: 0.6897368073353376, iteration: 312249
loss: 1.0175464153289795,grad_norm: 0.7552802241133394, iteration: 312250
loss: 1.0292774438858032,grad_norm: 0.851195230074176, iteration: 312251
loss: 0.9719365239143372,grad_norm: 0.8530233805577233, iteration: 312252
loss: 0.9987098574638367,grad_norm: 0.8014272957432045, iteration: 312253
loss: 0.9895530939102173,grad_norm: 0.8258403829417656, iteration: 312254
loss: 1.0843485593795776,grad_norm: 0.8623488052341045, iteration: 312255
loss: 1.0098884105682373,grad_norm: 0.7942788484220507, iteration: 312256
loss: 0.9720457196235657,grad_norm: 0.6642158938952751, iteration: 312257
loss: 1.013004183769226,grad_norm: 0.7562235222297451, iteration: 312258
loss: 1.0082823038101196,grad_norm: 0.8415153152073046, iteration: 312259
loss: 1.0140836238861084,grad_norm: 0.8711615959177014, iteration: 312260
loss: 1.019299030303955,grad_norm: 0.9081530397423992, iteration: 312261
loss: 0.9868670105934143,grad_norm: 0.8645306687230053, iteration: 312262
loss: 0.9981023073196411,grad_norm: 0.9518560903710227, iteration: 312263
loss: 1.0058786869049072,grad_norm: 0.8709230600681858, iteration: 312264
loss: 1.0096875429153442,grad_norm: 0.8990174275773609, iteration: 312265
loss: 0.9852240085601807,grad_norm: 0.9468872549767253, iteration: 312266
loss: 0.9770727157592773,grad_norm: 0.8562500554959527, iteration: 312267
loss: 0.9757500886917114,grad_norm: 0.8846849305640317, iteration: 312268
loss: 1.0146586894989014,grad_norm: 0.9469847478741442, iteration: 312269
loss: 1.0046916007995605,grad_norm: 0.8008575592429218, iteration: 312270
loss: 1.0030577182769775,grad_norm: 0.7947073587937359, iteration: 312271
loss: 0.9770703315734863,grad_norm: 0.9999999525645654, iteration: 312272
loss: 1.006908893585205,grad_norm: 0.8732763185082252, iteration: 312273
loss: 0.9960707426071167,grad_norm: 0.9128690758910595, iteration: 312274
loss: 0.9857541918754578,grad_norm: 0.9783967618791763, iteration: 312275
loss: 1.019716501235962,grad_norm: 0.8307302500513609, iteration: 312276
loss: 1.0240139961242676,grad_norm: 0.7317234236697768, iteration: 312277
loss: 0.9849765300750732,grad_norm: 0.9999991523393575, iteration: 312278
loss: 1.0068461894989014,grad_norm: 0.8799320934253064, iteration: 312279
loss: 1.0291730165481567,grad_norm: 0.7091108058518425, iteration: 312280
loss: 1.0432579517364502,grad_norm: 0.8000660477753345, iteration: 312281
loss: 0.9739436507225037,grad_norm: 0.76817714916306, iteration: 312282
loss: 1.0505179166793823,grad_norm: 0.999999590755711, iteration: 312283
loss: 0.9722204804420471,grad_norm: 0.8672468622153809, iteration: 312284
loss: 1.0045665502548218,grad_norm: 0.8470682415850567, iteration: 312285
loss: 1.0010422468185425,grad_norm: 0.7573046472914965, iteration: 312286
loss: 1.0245522260665894,grad_norm: 0.9999996066000987, iteration: 312287
loss: 1.0101877450942993,grad_norm: 0.8906489206741499, iteration: 312288
loss: 1.0062828063964844,grad_norm: 0.9999992327098313, iteration: 312289
loss: 0.9956575632095337,grad_norm: 0.8575936094149028, iteration: 312290
loss: 0.9945064783096313,grad_norm: 0.9190671160865466, iteration: 312291
loss: 1.0367119312286377,grad_norm: 0.9999997144902539, iteration: 312292
loss: 1.0029690265655518,grad_norm: 0.9266244569551139, iteration: 312293
loss: 0.9967549443244934,grad_norm: 0.9065100284879932, iteration: 312294
loss: 1.0082565546035767,grad_norm: 0.8896051742547332, iteration: 312295
loss: 0.9827436804771423,grad_norm: 0.9580331339978541, iteration: 312296
loss: 1.0495452880859375,grad_norm: 0.999999589925936, iteration: 312297
loss: 0.9994459748268127,grad_norm: 0.9999991344415164, iteration: 312298
loss: 0.9983213543891907,grad_norm: 0.8246018967006131, iteration: 312299
loss: 0.9832393527030945,grad_norm: 0.7039901212862976, iteration: 312300
loss: 0.9763365983963013,grad_norm: 0.8735420850102484, iteration: 312301
loss: 1.014731764793396,grad_norm: 0.9999993863302837, iteration: 312302
loss: 0.9796218276023865,grad_norm: 0.919828171089078, iteration: 312303
loss: 1.0120570659637451,grad_norm: 0.9778846307048804, iteration: 312304
loss: 0.9973219037055969,grad_norm: 0.8262744834538491, iteration: 312305
loss: 1.0149279832839966,grad_norm: 0.8313743890873835, iteration: 312306
loss: 1.007703185081482,grad_norm: 0.9999991921505657, iteration: 312307
loss: 0.9731503129005432,grad_norm: 0.8980818667692019, iteration: 312308
loss: 1.0916296243667603,grad_norm: 0.9999996877814139, iteration: 312309
loss: 1.006036400794983,grad_norm: 0.7479874093010316, iteration: 312310
loss: 1.001549243927002,grad_norm: 0.8024199902703547, iteration: 312311
loss: 0.99725741147995,grad_norm: 0.8222462823976721, iteration: 312312
loss: 0.9883692264556885,grad_norm: 0.7759167949858531, iteration: 312313
loss: 1.0235588550567627,grad_norm: 0.9029284762023351, iteration: 312314
loss: 0.9711620211601257,grad_norm: 0.8558851532280257, iteration: 312315
loss: 1.0010154247283936,grad_norm: 0.7055074274070796, iteration: 312316
loss: 1.0038446187973022,grad_norm: 0.7957289437871433, iteration: 312317
loss: 0.9994862675666809,grad_norm: 0.9890696526223576, iteration: 312318
loss: 1.0066457986831665,grad_norm: 0.9999990250242856, iteration: 312319
loss: 1.0133678913116455,grad_norm: 0.76947203213665, iteration: 312320
loss: 0.9932544827461243,grad_norm: 0.8234278177842802, iteration: 312321
loss: 0.9898136258125305,grad_norm: 0.8328461569193092, iteration: 312322
loss: 1.0040050745010376,grad_norm: 0.9999992178208033, iteration: 312323
loss: 1.003488540649414,grad_norm: 0.7612969756529522, iteration: 312324
loss: 0.9597365856170654,grad_norm: 0.7354490434275945, iteration: 312325
loss: 0.9889315962791443,grad_norm: 0.7204504525495636, iteration: 312326
loss: 1.0040388107299805,grad_norm: 0.9051803204942394, iteration: 312327
loss: 0.9871450662612915,grad_norm: 0.8187455589560608, iteration: 312328
loss: 0.9938728213310242,grad_norm: 0.7801489460263373, iteration: 312329
loss: 0.9959638118743896,grad_norm: 0.819539687009811, iteration: 312330
loss: 0.987205445766449,grad_norm: 0.9484456687186317, iteration: 312331
loss: 1.0314133167266846,grad_norm: 0.9999997031535659, iteration: 312332
loss: 0.9870240688323975,grad_norm: 0.9999990090038258, iteration: 312333
loss: 1.0279045104980469,grad_norm: 0.687712954286851, iteration: 312334
loss: 0.9974544048309326,grad_norm: 0.827681407198037, iteration: 312335
loss: 1.027458906173706,grad_norm: 0.9489645397933741, iteration: 312336
loss: 0.9848031997680664,grad_norm: 0.9778363324522117, iteration: 312337
loss: 0.9930447936058044,grad_norm: 0.8914966052117148, iteration: 312338
loss: 0.9941482543945312,grad_norm: 0.846974148622382, iteration: 312339
loss: 0.9799104928970337,grad_norm: 0.9999992306082149, iteration: 312340
loss: 0.9680874943733215,grad_norm: 0.7441538520851645, iteration: 312341
loss: 1.0029442310333252,grad_norm: 0.9999990588449048, iteration: 312342
loss: 1.0534882545471191,grad_norm: 0.9999993225856026, iteration: 312343
loss: 0.9405373930931091,grad_norm: 0.958332958593431, iteration: 312344
loss: 0.9889631867408752,grad_norm: 0.8474687617375429, iteration: 312345
loss: 1.019458532333374,grad_norm: 0.9512294262119417, iteration: 312346
loss: 1.0602614879608154,grad_norm: 0.9999998938161412, iteration: 312347
loss: 1.0091835260391235,grad_norm: 0.8516787606995183, iteration: 312348
loss: 0.9969479441642761,grad_norm: 0.7891248337127241, iteration: 312349
loss: 0.9843131899833679,grad_norm: 0.7577545918293552, iteration: 312350
loss: 0.9919096231460571,grad_norm: 0.8828113262465292, iteration: 312351
loss: 0.9762402772903442,grad_norm: 0.78424122668524, iteration: 312352
loss: 0.985077440738678,grad_norm: 0.7309198976269516, iteration: 312353
loss: 1.031257152557373,grad_norm: 0.858623570919406, iteration: 312354
loss: 0.9963237643241882,grad_norm: 0.8672980301069046, iteration: 312355
loss: 1.0149109363555908,grad_norm: 0.9136600429846176, iteration: 312356
loss: 0.9658576846122742,grad_norm: 0.7415048275067108, iteration: 312357
loss: 1.0153950452804565,grad_norm: 0.8777442949221814, iteration: 312358
loss: 1.1092174053192139,grad_norm: 0.9691755426591528, iteration: 312359
loss: 0.9932414889335632,grad_norm: 0.9999994106365503, iteration: 312360
loss: 1.018136978149414,grad_norm: 0.9966171682118569, iteration: 312361
loss: 1.0030583143234253,grad_norm: 0.8744542595357495, iteration: 312362
loss: 1.0266218185424805,grad_norm: 0.8318900924920872, iteration: 312363
loss: 1.016592025756836,grad_norm: 0.7481754802422486, iteration: 312364
loss: 1.0071443319320679,grad_norm: 0.9999993422618254, iteration: 312365
loss: 1.0427590608596802,grad_norm: 0.8324891994176475, iteration: 312366
loss: 1.0031334161758423,grad_norm: 0.9149063028014031, iteration: 312367
loss: 0.9988542795181274,grad_norm: 0.9686282679516989, iteration: 312368
loss: 1.0061968564987183,grad_norm: 0.7741785509349365, iteration: 312369
loss: 1.0123761892318726,grad_norm: 0.9381762143218452, iteration: 312370
loss: 1.0049831867218018,grad_norm: 0.8998342177922806, iteration: 312371
loss: 1.0203135013580322,grad_norm: 0.8644754704092302, iteration: 312372
loss: 0.9865679740905762,grad_norm: 0.885088429987416, iteration: 312373
loss: 1.0043505430221558,grad_norm: 0.8707082156393784, iteration: 312374
loss: 1.016298770904541,grad_norm: 0.999999876545536, iteration: 312375
loss: 0.97654128074646,grad_norm: 0.904372851057578, iteration: 312376
loss: 1.0174436569213867,grad_norm: 0.9417987323541529, iteration: 312377
loss: 1.002620816230774,grad_norm: 0.8179192253398299, iteration: 312378
loss: 0.9692595601081848,grad_norm: 0.8871320037835834, iteration: 312379
loss: 1.049796462059021,grad_norm: 0.959271405442997, iteration: 312380
loss: 0.9628763198852539,grad_norm: 0.7094895511958224, iteration: 312381
loss: 0.9783700108528137,grad_norm: 0.9522993313622344, iteration: 312382
loss: 1.0296167135238647,grad_norm: 0.8679001599946279, iteration: 312383
loss: 0.9620378017425537,grad_norm: 0.6991175788813813, iteration: 312384
loss: 0.9832269549369812,grad_norm: 0.7446293528995998, iteration: 312385
loss: 0.9881155490875244,grad_norm: 0.7425767769885125, iteration: 312386
loss: 1.0368938446044922,grad_norm: 0.8680956791756922, iteration: 312387
loss: 0.9892063140869141,grad_norm: 0.9173680997701764, iteration: 312388
loss: 1.1594748497009277,grad_norm: 0.9999992786197849, iteration: 312389
loss: 1.0056654214859009,grad_norm: 0.7248091358177177, iteration: 312390
loss: 1.0024113655090332,grad_norm: 0.9723337359166039, iteration: 312391
loss: 1.0659977197647095,grad_norm: 0.9999993726216262, iteration: 312392
loss: 0.9938883185386658,grad_norm: 0.9803104392489407, iteration: 312393
loss: 0.9820818305015564,grad_norm: 0.9351026513502788, iteration: 312394
loss: 1.050826907157898,grad_norm: 0.9999990396394484, iteration: 312395
loss: 1.0328174829483032,grad_norm: 0.9089964531212024, iteration: 312396
loss: 0.9717166423797607,grad_norm: 0.9960167102723211, iteration: 312397
loss: 0.9986158013343811,grad_norm: 0.7685399747357468, iteration: 312398
loss: 1.0338225364685059,grad_norm: 0.9065624540231257, iteration: 312399
loss: 1.0557477474212646,grad_norm: 0.999999064238224, iteration: 312400
loss: 1.0225073099136353,grad_norm: 0.9999990181516694, iteration: 312401
loss: 1.0369657278060913,grad_norm: 0.8803563916991721, iteration: 312402
loss: 1.0077903270721436,grad_norm: 0.7596599748478909, iteration: 312403
loss: 1.0324444770812988,grad_norm: 0.9219382960252465, iteration: 312404
loss: 1.0053108930587769,grad_norm: 0.8927150715601014, iteration: 312405
loss: 0.9826077818870544,grad_norm: 0.8581068737035169, iteration: 312406
loss: 1.0195392370224,grad_norm: 0.8818089072412659, iteration: 312407
loss: 0.9776796698570251,grad_norm: 0.8716125119212456, iteration: 312408
loss: 1.019645094871521,grad_norm: 0.9999991988290624, iteration: 312409
loss: 1.0059709548950195,grad_norm: 0.8463279756955759, iteration: 312410
loss: 0.9982219934463501,grad_norm: 0.9674390463040293, iteration: 312411
loss: 1.0236610174179077,grad_norm: 0.9999992684554528, iteration: 312412
loss: 1.0126250982284546,grad_norm: 0.9999996818004244, iteration: 312413
loss: 1.0141421556472778,grad_norm: 0.8459042269618376, iteration: 312414
loss: 0.9849944710731506,grad_norm: 0.832611234377367, iteration: 312415
loss: 0.9866527915000916,grad_norm: 0.8976250561090598, iteration: 312416
loss: 0.9849341511726379,grad_norm: 0.9429937591109788, iteration: 312417
loss: 0.9906854033470154,grad_norm: 0.8234831302620587, iteration: 312418
loss: 1.0411540269851685,grad_norm: 0.986625466475018, iteration: 312419
loss: 1.0037912130355835,grad_norm: 0.7473562895262315, iteration: 312420
loss: 0.9898825287818909,grad_norm: 0.90943600463641, iteration: 312421
loss: 1.0509859323501587,grad_norm: 0.9999997765916206, iteration: 312422
loss: 1.0191566944122314,grad_norm: 0.9874433729165072, iteration: 312423
loss: 0.9861437082290649,grad_norm: 0.9490573194085956, iteration: 312424
loss: 0.971581220626831,grad_norm: 0.8769541681883548, iteration: 312425
loss: 0.9792992472648621,grad_norm: 0.8790427609434366, iteration: 312426
loss: 0.9665724039077759,grad_norm: 0.7940989630392137, iteration: 312427
loss: 1.005336880683899,grad_norm: 0.9786710686790765, iteration: 312428
loss: 0.9994372129440308,grad_norm: 0.8154243783761027, iteration: 312429
loss: 1.0087707042694092,grad_norm: 0.9999992900859779, iteration: 312430
loss: 0.9791678190231323,grad_norm: 0.8888917183054947, iteration: 312431
loss: 1.0188812017440796,grad_norm: 0.918845835934442, iteration: 312432
loss: 1.0166312456130981,grad_norm: 0.8404645606222856, iteration: 312433
loss: 1.0702850818634033,grad_norm: 0.909600148645824, iteration: 312434
loss: 0.9856259226799011,grad_norm: 0.9999991118180976, iteration: 312435
loss: 1.0218253135681152,grad_norm: 0.8293850944157275, iteration: 312436
loss: 1.0260655879974365,grad_norm: 0.9323305072226834, iteration: 312437
loss: 1.1138103008270264,grad_norm: 0.9999995797031845, iteration: 312438
loss: 0.9922156929969788,grad_norm: 0.9999998578772774, iteration: 312439
loss: 1.0140728950500488,grad_norm: 0.8089961696988351, iteration: 312440
loss: 0.9843043088912964,grad_norm: 0.9765337133660743, iteration: 312441
loss: 1.0137968063354492,grad_norm: 0.9372561810534437, iteration: 312442
loss: 0.992174506187439,grad_norm: 0.8502964127253916, iteration: 312443
loss: 1.0306577682495117,grad_norm: 0.8839019085094371, iteration: 312444
loss: 1.032394289970398,grad_norm: 0.9999992689286117, iteration: 312445
loss: 1.0254937410354614,grad_norm: 0.8507322037654755, iteration: 312446
loss: 0.9973798394203186,grad_norm: 0.9227261603336181, iteration: 312447
loss: 1.0299237966537476,grad_norm: 0.7992429046046465, iteration: 312448
loss: 0.9915619492530823,grad_norm: 0.7991751562433687, iteration: 312449
loss: 1.0398377180099487,grad_norm: 0.9035910948406417, iteration: 312450
loss: 1.0294615030288696,grad_norm: 0.9899822227654078, iteration: 312451
loss: 1.0476527214050293,grad_norm: 0.9184707461383612, iteration: 312452
loss: 1.0038561820983887,grad_norm: 0.8158769646641265, iteration: 312453
loss: 1.003328800201416,grad_norm: 0.7704637530003755, iteration: 312454
loss: 1.0036736726760864,grad_norm: 0.9260640778624738, iteration: 312455
loss: 0.9891424179077148,grad_norm: 0.9120749966713139, iteration: 312456
loss: 1.2353134155273438,grad_norm: 1.000000046867437, iteration: 312457
loss: 0.9906209707260132,grad_norm: 0.7182669368836607, iteration: 312458
loss: 0.9759955406188965,grad_norm: 0.9999989889125293, iteration: 312459
loss: 1.112016201019287,grad_norm: 0.9999995840236717, iteration: 312460
loss: 1.0226565599441528,grad_norm: 0.8098148112224418, iteration: 312461
loss: 0.9954651594161987,grad_norm: 0.8662983571143625, iteration: 312462
loss: 0.9567811489105225,grad_norm: 0.7184914797012285, iteration: 312463
loss: 0.9776021838188171,grad_norm: 0.7884601834523427, iteration: 312464
loss: 1.0298821926116943,grad_norm: 0.769714996650482, iteration: 312465
loss: 1.0822117328643799,grad_norm: 0.8967492294347057, iteration: 312466
loss: 1.0622633695602417,grad_norm: 0.999999166525907, iteration: 312467
loss: 1.0079824924468994,grad_norm: 0.9920404227715409, iteration: 312468
loss: 0.9889897108078003,grad_norm: 0.8227761119025631, iteration: 312469
loss: 1.0079315900802612,grad_norm: 0.9999995040338318, iteration: 312470
loss: 1.019946813583374,grad_norm: 0.8578156017919759, iteration: 312471
loss: 1.0297274589538574,grad_norm: 0.7779547064138805, iteration: 312472
loss: 0.9810515642166138,grad_norm: 0.8299316052763154, iteration: 312473
loss: 1.006739854812622,grad_norm: 0.9999990758237483, iteration: 312474
loss: 1.0402363538742065,grad_norm: 0.9236879638005306, iteration: 312475
loss: 0.9998709559440613,grad_norm: 0.8905509320969207, iteration: 312476
loss: 1.016695261001587,grad_norm: 0.8168473373400763, iteration: 312477
loss: 0.9592929482460022,grad_norm: 0.8238121866614968, iteration: 312478
loss: 1.008212924003601,grad_norm: 0.7105872249992153, iteration: 312479
loss: 0.9874549508094788,grad_norm: 0.8421453853027309, iteration: 312480
loss: 1.010427713394165,grad_norm: 0.9999993223017615, iteration: 312481
loss: 1.0241079330444336,grad_norm: 0.8265544976069423, iteration: 312482
loss: 0.9961219429969788,grad_norm: 0.9999990924630453, iteration: 312483
loss: 1.0162166357040405,grad_norm: 0.9961030236328675, iteration: 312484
loss: 0.9777283668518066,grad_norm: 0.9073041091768386, iteration: 312485
loss: 0.9958068132400513,grad_norm: 0.8891235660040286, iteration: 312486
loss: 1.0100208520889282,grad_norm: 0.8119538951221082, iteration: 312487
loss: 1.0363585948944092,grad_norm: 0.9999995814833975, iteration: 312488
loss: 1.0050197839736938,grad_norm: 0.891936476819187, iteration: 312489
loss: 0.981458842754364,grad_norm: 0.8834770383695482, iteration: 312490
loss: 1.0034254789352417,grad_norm: 0.9999990559339923, iteration: 312491
loss: 0.9500454068183899,grad_norm: 0.9796801834910251, iteration: 312492
loss: 0.9702125191688538,grad_norm: 0.7232490285213004, iteration: 312493
loss: 0.9892669916152954,grad_norm: 0.9539784725657392, iteration: 312494
loss: 1.0432031154632568,grad_norm: 0.7546661793391528, iteration: 312495
loss: 1.0079706907272339,grad_norm: 0.8535125498379311, iteration: 312496
loss: 1.0446120500564575,grad_norm: 0.785561404241857, iteration: 312497
loss: 1.0079132318496704,grad_norm: 0.8201940817223763, iteration: 312498
loss: 0.9988729357719421,grad_norm: 0.8156255260094205, iteration: 312499
loss: 0.9876794815063477,grad_norm: 0.8524718956463313, iteration: 312500
loss: 1.0043103694915771,grad_norm: 0.8613089937558909, iteration: 312501
loss: 1.0014448165893555,grad_norm: 0.8305998474226827, iteration: 312502
loss: 0.9757351279258728,grad_norm: 0.7475791882697366, iteration: 312503
loss: 1.0430644750595093,grad_norm: 0.8313982840799318, iteration: 312504
loss: 0.9826062321662903,grad_norm: 0.6830535509259853, iteration: 312505
loss: 0.9751257300376892,grad_norm: 0.9868011978626461, iteration: 312506
loss: 1.0776675939559937,grad_norm: 0.9999990884105672, iteration: 312507
loss: 1.0058726072311401,grad_norm: 0.961915937678004, iteration: 312508
loss: 0.9773500561714172,grad_norm: 0.6816184586387042, iteration: 312509
loss: 0.9788098931312561,grad_norm: 0.878025620134187, iteration: 312510
loss: 1.0112231969833374,grad_norm: 0.6915035262937683, iteration: 312511
loss: 0.9972756505012512,grad_norm: 0.8402916571363594, iteration: 312512
loss: 1.0076918601989746,grad_norm: 0.7027024393533526, iteration: 312513
loss: 0.9782302975654602,grad_norm: 0.7367925410290765, iteration: 312514
loss: 1.0250250101089478,grad_norm: 0.8115833712102521, iteration: 312515
loss: 1.034798502922058,grad_norm: 0.9999991877491922, iteration: 312516
loss: 0.9838892817497253,grad_norm: 0.7605764943889606, iteration: 312517
loss: 1.0003901720046997,grad_norm: 0.8173058162144665, iteration: 312518
loss: 0.983034074306488,grad_norm: 0.7858438779593961, iteration: 312519
loss: 1.0131393671035767,grad_norm: 0.722214492785693, iteration: 312520
loss: 1.0136674642562866,grad_norm: 0.9486222317515695, iteration: 312521
loss: 1.021262288093567,grad_norm: 0.7761786318215524, iteration: 312522
loss: 1.0320918560028076,grad_norm: 0.9999998111445286, iteration: 312523
loss: 1.0886107683181763,grad_norm: 0.9999997415895441, iteration: 312524
loss: 1.212032675743103,grad_norm: 0.9999998397910554, iteration: 312525
loss: 0.9926618933677673,grad_norm: 0.8527637468743852, iteration: 312526
loss: 1.0065500736236572,grad_norm: 0.9999989535194046, iteration: 312527
loss: 1.0020101070404053,grad_norm: 0.7592974269901777, iteration: 312528
loss: 0.9991722702980042,grad_norm: 0.8723527636268253, iteration: 312529
loss: 1.0117346048355103,grad_norm: 0.838262320604974, iteration: 312530
loss: 1.0373762845993042,grad_norm: 0.999999312566319, iteration: 312531
loss: 0.9905754327774048,grad_norm: 0.8341350423151619, iteration: 312532
loss: 0.9753613471984863,grad_norm: 0.8572175684511801, iteration: 312533
loss: 0.9809650778770447,grad_norm: 0.7853436072903792, iteration: 312534
loss: 1.027358055114746,grad_norm: 0.7468029588986884, iteration: 312535
loss: 1.0207287073135376,grad_norm: 0.9999998626050933, iteration: 312536
loss: 1.045475959777832,grad_norm: 0.8914202022321248, iteration: 312537
loss: 0.9716305732727051,grad_norm: 0.7902170047759041, iteration: 312538
loss: 0.9884546995162964,grad_norm: 0.795597458476127, iteration: 312539
loss: 0.9770928025245667,grad_norm: 0.8753616824879825, iteration: 312540
loss: 1.164230465888977,grad_norm: 0.9999993647446095, iteration: 312541
loss: 0.989717960357666,grad_norm: 0.8561763449884465, iteration: 312542
loss: 0.9883034825325012,grad_norm: 0.784846143703577, iteration: 312543
loss: 1.0377039909362793,grad_norm: 0.9349161738230688, iteration: 312544
loss: 1.0293660163879395,grad_norm: 0.9999992393504105, iteration: 312545
loss: 1.0040769577026367,grad_norm: 0.8929574382790711, iteration: 312546
loss: 1.029876708984375,grad_norm: 0.8483044744325011, iteration: 312547
loss: 1.0341906547546387,grad_norm: 0.9999994821202486, iteration: 312548
loss: 0.9972640872001648,grad_norm: 0.9894619869823592, iteration: 312549
loss: 0.9770759344100952,grad_norm: 0.7756723200366505, iteration: 312550
loss: 1.0329474210739136,grad_norm: 0.9999991567413524, iteration: 312551
loss: 1.0035279989242554,grad_norm: 0.8501526935454163, iteration: 312552
loss: 0.9955078959465027,grad_norm: 0.7762390530795662, iteration: 312553
loss: 1.0231374502182007,grad_norm: 0.999999199778244, iteration: 312554
loss: 0.9846314191818237,grad_norm: 0.8399867298273099, iteration: 312555
loss: 1.071760892868042,grad_norm: 0.8712551650837476, iteration: 312556
loss: 0.9798028469085693,grad_norm: 0.9999993807852657, iteration: 312557
loss: 0.9995556473731995,grad_norm: 0.9999991060755705, iteration: 312558
loss: 1.019759178161621,grad_norm: 0.8883343662451644, iteration: 312559
loss: 0.9629954695701599,grad_norm: 0.8592534510199058, iteration: 312560
loss: 0.9991636872291565,grad_norm: 0.8529523272423453, iteration: 312561
loss: 1.1255384683609009,grad_norm: 0.9999996586346178, iteration: 312562
loss: 0.967289924621582,grad_norm: 0.8779307530992876, iteration: 312563
loss: 1.014418601989746,grad_norm: 0.7535487003086642, iteration: 312564
loss: 0.9433277249336243,grad_norm: 0.8950551181348143, iteration: 312565
loss: 1.0333445072174072,grad_norm: 0.8521340888682186, iteration: 312566
loss: 1.0096632242202759,grad_norm: 0.9999990781883328, iteration: 312567
loss: 0.9951151013374329,grad_norm: 0.9176439846757988, iteration: 312568
loss: 1.0201889276504517,grad_norm: 0.9999992088085535, iteration: 312569
loss: 1.019029140472412,grad_norm: 0.7721548643303103, iteration: 312570
loss: 0.9969770312309265,grad_norm: 0.7297157972649712, iteration: 312571
loss: 0.9837177395820618,grad_norm: 0.840361352893452, iteration: 312572
loss: 0.9946706295013428,grad_norm: 0.9999989753194247, iteration: 312573
loss: 1.0055984258651733,grad_norm: 0.8791888881144019, iteration: 312574
loss: 1.0356954336166382,grad_norm: 0.9999997922314454, iteration: 312575
loss: 1.016337513923645,grad_norm: 0.8541840091700071, iteration: 312576
loss: 1.013685703277588,grad_norm: 0.782592165821324, iteration: 312577
loss: 1.0279128551483154,grad_norm: 0.8009469346511883, iteration: 312578
loss: 0.9954956769943237,grad_norm: 0.7862885344449481, iteration: 312579
loss: 0.9821684956550598,grad_norm: 0.8893349337992932, iteration: 312580
loss: 0.9662855863571167,grad_norm: 0.8691698289701013, iteration: 312581
loss: 0.9974734783172607,grad_norm: 0.8175702383378094, iteration: 312582
loss: 1.009872317314148,grad_norm: 0.7870010943502818, iteration: 312583
loss: 0.9733404517173767,grad_norm: 0.8278123824449906, iteration: 312584
loss: 0.9621515870094299,grad_norm: 0.8031846958489646, iteration: 312585
loss: 0.9894229769706726,grad_norm: 0.8782166686122177, iteration: 312586
loss: 1.001755714416504,grad_norm: 0.9999994669104019, iteration: 312587
loss: 0.9994111061096191,grad_norm: 0.8215063803736864, iteration: 312588
loss: 1.0353007316589355,grad_norm: 0.9999998440006362, iteration: 312589
loss: 1.0194185972213745,grad_norm: 0.8518212763057588, iteration: 312590
loss: 1.0214011669158936,grad_norm: 0.8878742986952186, iteration: 312591
loss: 1.0123918056488037,grad_norm: 0.909161708263538, iteration: 312592
loss: 1.026517629623413,grad_norm: 0.8602152029131368, iteration: 312593
loss: 0.9748331904411316,grad_norm: 0.9506051793218976, iteration: 312594
loss: 1.0053620338439941,grad_norm: 0.8645637659284382, iteration: 312595
loss: 1.0183477401733398,grad_norm: 0.9999991519400114, iteration: 312596
loss: 1.0141348838806152,grad_norm: 0.7388288125390757, iteration: 312597
loss: 1.0614417791366577,grad_norm: 0.9277442786804634, iteration: 312598
loss: 0.9889445900917053,grad_norm: 0.9999990894402395, iteration: 312599
loss: 1.0083562135696411,grad_norm: 0.9999993400395775, iteration: 312600
loss: 1.000754952430725,grad_norm: 0.9999992327797466, iteration: 312601
loss: 0.985386073589325,grad_norm: 0.8096290552405447, iteration: 312602
loss: 1.0086395740509033,grad_norm: 0.9295067914461413, iteration: 312603
loss: 0.9691720604896545,grad_norm: 0.8882185670080245, iteration: 312604
loss: 1.0120404958724976,grad_norm: 0.8490526287095126, iteration: 312605
loss: 1.0188841819763184,grad_norm: 0.8855029880208333, iteration: 312606
loss: 0.9835882186889648,grad_norm: 0.8600277989804697, iteration: 312607
loss: 1.0188829898834229,grad_norm: 0.7435359695899114, iteration: 312608
loss: 0.9677903056144714,grad_norm: 0.9646161235806138, iteration: 312609
loss: 1.0375689268112183,grad_norm: 0.8272248109256821, iteration: 312610
loss: 0.9849252104759216,grad_norm: 0.9999989744435617, iteration: 312611
loss: 0.9831462502479553,grad_norm: 0.8483792388737784, iteration: 312612
loss: 0.988646388053894,grad_norm: 0.8627892199171479, iteration: 312613
loss: 0.9617438912391663,grad_norm: 0.869758302209642, iteration: 312614
loss: 1.010659098625183,grad_norm: 0.9604273758940752, iteration: 312615
loss: 0.9930955767631531,grad_norm: 0.814665252562205, iteration: 312616
loss: 0.9876571297645569,grad_norm: 0.9222202695241142, iteration: 312617
loss: 0.9853596091270447,grad_norm: 0.8168371312061548, iteration: 312618
loss: 0.9863032102584839,grad_norm: 0.8080684073208277, iteration: 312619
loss: 1.0378541946411133,grad_norm: 0.7683679721869305, iteration: 312620
loss: 0.9914999604225159,grad_norm: 0.7457408362784733, iteration: 312621
loss: 1.0051734447479248,grad_norm: 0.9054474912820072, iteration: 312622
loss: 0.9920028448104858,grad_norm: 0.8273505617640985, iteration: 312623
loss: 0.965225875377655,grad_norm: 0.9999990429741954, iteration: 312624
loss: 1.0026997327804565,grad_norm: 0.8396156805982999, iteration: 312625
loss: 0.9727439284324646,grad_norm: 0.9937529794136728, iteration: 312626
loss: 1.0110118389129639,grad_norm: 0.8419584248197285, iteration: 312627
loss: 1.011476755142212,grad_norm: 0.9734701546143363, iteration: 312628
loss: 1.123547911643982,grad_norm: 0.9999993862331221, iteration: 312629
loss: 1.0271458625793457,grad_norm: 0.8316903628873479, iteration: 312630
loss: 0.9690104722976685,grad_norm: 0.9911229263767971, iteration: 312631
loss: 1.0021064281463623,grad_norm: 0.8870287088246411, iteration: 312632
loss: 1.0092073678970337,grad_norm: 0.9999992166230659, iteration: 312633
loss: 1.0327014923095703,grad_norm: 0.8433136976077681, iteration: 312634
loss: 0.9721806049346924,grad_norm: 0.8132602335897329, iteration: 312635
loss: 0.9853960275650024,grad_norm: 0.7221769841128142, iteration: 312636
loss: 0.9768784046173096,grad_norm: 0.9166201970628136, iteration: 312637
loss: 0.9839006066322327,grad_norm: 0.7571867712310546, iteration: 312638
loss: 1.0087281465530396,grad_norm: 0.7064210059995312, iteration: 312639
loss: 0.9914097189903259,grad_norm: 0.9543302656601393, iteration: 312640
loss: 0.996984601020813,grad_norm: 0.8831811704694249, iteration: 312641
loss: 1.0137397050857544,grad_norm: 0.9571914683592067, iteration: 312642
loss: 0.9807013869285583,grad_norm: 0.9652154291352514, iteration: 312643
loss: 0.9939038753509521,grad_norm: 0.8788271002643122, iteration: 312644
loss: 0.9886050820350647,grad_norm: 0.7690333043195893, iteration: 312645
loss: 0.9801865220069885,grad_norm: 0.9999996383818381, iteration: 312646
loss: 0.9922242164611816,grad_norm: 0.9397417069493692, iteration: 312647
loss: 1.011182188987732,grad_norm: 0.8144306487130865, iteration: 312648
loss: 1.0002895593643188,grad_norm: 0.9999991764337925, iteration: 312649
loss: 1.0116829872131348,grad_norm: 0.8141214084039995, iteration: 312650
loss: 0.9961296319961548,grad_norm: 0.9102523239039267, iteration: 312651
loss: 0.9637651443481445,grad_norm: 0.840435952543651, iteration: 312652
loss: 0.9903454184532166,grad_norm: 0.8515691843721005, iteration: 312653
loss: 1.048150897026062,grad_norm: 0.8488606781277909, iteration: 312654
loss: 1.0204778909683228,grad_norm: 0.999999273720427, iteration: 312655
loss: 0.9832929372787476,grad_norm: 0.9102837430250171, iteration: 312656
loss: 0.9958957433700562,grad_norm: 0.804420320392874, iteration: 312657
loss: 0.993102490901947,grad_norm: 0.7730209478731629, iteration: 312658
loss: 0.9882542490959167,grad_norm: 0.9999999173358223, iteration: 312659
loss: 1.0010405778884888,grad_norm: 0.9649047166418434, iteration: 312660
loss: 1.0345085859298706,grad_norm: 0.8898656905082711, iteration: 312661
loss: 0.9855701327323914,grad_norm: 0.8891605978620616, iteration: 312662
loss: 0.9397792816162109,grad_norm: 0.9999991991704044, iteration: 312663
loss: 1.0134676694869995,grad_norm: 0.8790209879113252, iteration: 312664
loss: 1.0287379026412964,grad_norm: 0.8046785757650615, iteration: 312665
loss: 0.9967853426933289,grad_norm: 0.7501501576991326, iteration: 312666
loss: 1.04196298122406,grad_norm: 0.782673426565097, iteration: 312667
loss: 0.9814134836196899,grad_norm: 0.9951749343778831, iteration: 312668
loss: 1.0075865983963013,grad_norm: 0.9306733917664332, iteration: 312669
loss: 0.9818242192268372,grad_norm: 0.7842206352988089, iteration: 312670
loss: 1.0133516788482666,grad_norm: 0.9999997684577492, iteration: 312671
loss: 0.9779831171035767,grad_norm: 0.958542791447999, iteration: 312672
loss: 1.0046287775039673,grad_norm: 0.8266147681177994, iteration: 312673
loss: 1.0007874965667725,grad_norm: 0.8329874905049275, iteration: 312674
loss: 0.9815725088119507,grad_norm: 0.7981678581694942, iteration: 312675
loss: 1.054806113243103,grad_norm: 0.8947182204996696, iteration: 312676
loss: 0.9728798270225525,grad_norm: 0.9999992125289385, iteration: 312677
loss: 1.0203748941421509,grad_norm: 0.9447367762905967, iteration: 312678
loss: 1.0529406070709229,grad_norm: 0.9999991662855895, iteration: 312679
loss: 0.9803536534309387,grad_norm: 0.7654861656347638, iteration: 312680
loss: 0.9907676577568054,grad_norm: 0.9338725988294642, iteration: 312681
loss: 0.9474432468414307,grad_norm: 0.9726709158082031, iteration: 312682
loss: 0.9735173583030701,grad_norm: 0.8771387824617349, iteration: 312683
loss: 0.9742233157157898,grad_norm: 0.7510784207700761, iteration: 312684
loss: 0.9855862259864807,grad_norm: 0.9999991396474864, iteration: 312685
loss: 0.9716995358467102,grad_norm: 0.8333903650259791, iteration: 312686
loss: 0.9586256742477417,grad_norm: 0.8288155256745602, iteration: 312687
loss: 1.017269492149353,grad_norm: 0.6281472914672743, iteration: 312688
loss: 1.0218366384506226,grad_norm: 0.9995484524053371, iteration: 312689
loss: 1.0214731693267822,grad_norm: 0.8686033323217506, iteration: 312690
loss: 0.9547565579414368,grad_norm: 0.6999665356172481, iteration: 312691
loss: 1.0046147108078003,grad_norm: 0.9457799728194818, iteration: 312692
loss: 0.9662709832191467,grad_norm: 0.8212449355267654, iteration: 312693
loss: 1.0015360116958618,grad_norm: 0.8840720584469981, iteration: 312694
loss: 1.054819941520691,grad_norm: 0.8452517938164764, iteration: 312695
loss: 1.1139599084854126,grad_norm: 0.9999992936853503, iteration: 312696
loss: 1.0544371604919434,grad_norm: 0.9999991707667443, iteration: 312697
loss: 0.9891093969345093,grad_norm: 0.6844506022517146, iteration: 312698
loss: 1.0562865734100342,grad_norm: 0.9999991061486949, iteration: 312699
loss: 0.9821334481239319,grad_norm: 0.8996908220775995, iteration: 312700
loss: 1.0293713808059692,grad_norm: 0.9999990699962951, iteration: 312701
loss: 1.0299981832504272,grad_norm: 0.9999992360765149, iteration: 312702
loss: 0.9679245352745056,grad_norm: 0.8985931317500334, iteration: 312703
loss: 1.0510488748550415,grad_norm: 0.822454955613312, iteration: 312704
loss: 1.0055179595947266,grad_norm: 0.7399782907930434, iteration: 312705
loss: 1.1061723232269287,grad_norm: 0.9999996391724703, iteration: 312706
loss: 1.2813142538070679,grad_norm: 0.999999328429687, iteration: 312707
loss: 1.0494881868362427,grad_norm: 0.9203312951230056, iteration: 312708
loss: 1.005014419555664,grad_norm: 0.8928196368651731, iteration: 312709
loss: 0.9819250702857971,grad_norm: 0.9099589720764186, iteration: 312710
loss: 0.9976971745491028,grad_norm: 0.9529413620785293, iteration: 312711
loss: 0.9687633514404297,grad_norm: 0.8233221982180641, iteration: 312712
loss: 0.9889694452285767,grad_norm: 0.7589339845143307, iteration: 312713
loss: 1.0077837705612183,grad_norm: 0.724025785122057, iteration: 312714
loss: 1.0293551683425903,grad_norm: 0.8651062860489106, iteration: 312715
loss: 0.9903528094291687,grad_norm: 0.7646700137711031, iteration: 312716
loss: 1.0449159145355225,grad_norm: 0.8226658136003625, iteration: 312717
loss: 0.96091628074646,grad_norm: 0.8919598606391649, iteration: 312718
loss: 1.003414511680603,grad_norm: 0.8328552436551916, iteration: 312719
loss: 0.9807297587394714,grad_norm: 0.9153447204334594, iteration: 312720
loss: 0.9986752867698669,grad_norm: 0.8071708205351034, iteration: 312721
loss: 0.9704050421714783,grad_norm: 0.7712036120681254, iteration: 312722
loss: 0.9914580583572388,grad_norm: 0.7431473948320396, iteration: 312723
loss: 1.0234313011169434,grad_norm: 0.9348300379962875, iteration: 312724
loss: 1.015358805656433,grad_norm: 0.999999061336298, iteration: 312725
loss: 1.0749460458755493,grad_norm: 0.9610994517644916, iteration: 312726
loss: 0.9997312426567078,grad_norm: 0.8274850315981868, iteration: 312727
loss: 0.9873425364494324,grad_norm: 0.999998930805268, iteration: 312728
loss: 0.9891689419746399,grad_norm: 0.7843026477084492, iteration: 312729
loss: 1.023995280265808,grad_norm: 0.9244856058323956, iteration: 312730
loss: 0.998256504535675,grad_norm: 0.7757863569051682, iteration: 312731
loss: 1.0034412145614624,grad_norm: 0.8333288734776552, iteration: 312732
loss: 1.0157134532928467,grad_norm: 0.9999991155677402, iteration: 312733
loss: 1.0084619522094727,grad_norm: 0.7974635637954821, iteration: 312734
loss: 0.9860945343971252,grad_norm: 0.9402805463720497, iteration: 312735
loss: 1.0023778676986694,grad_norm: 0.8347855606264348, iteration: 312736
loss: 1.02278470993042,grad_norm: 0.9605454657383382, iteration: 312737
loss: 0.9939350485801697,grad_norm: 0.6925596438759284, iteration: 312738
loss: 1.0079978704452515,grad_norm: 0.9467080969254275, iteration: 312739
loss: 0.9915469288825989,grad_norm: 0.8661370042382956, iteration: 312740
loss: 1.0288664102554321,grad_norm: 0.7555044105397422, iteration: 312741
loss: 1.041776418685913,grad_norm: 0.727478214257159, iteration: 312742
loss: 0.9834302663803101,grad_norm: 0.9999989765271986, iteration: 312743
loss: 0.9962519407272339,grad_norm: 0.9999991057486372, iteration: 312744
loss: 1.0166493654251099,grad_norm: 0.8413926958843184, iteration: 312745
loss: 0.9823806285858154,grad_norm: 0.7362374202843198, iteration: 312746
loss: 1.0071858167648315,grad_norm: 0.889368505216108, iteration: 312747
loss: 1.0073007345199585,grad_norm: 0.8877263250295149, iteration: 312748
loss: 1.0139107704162598,grad_norm: 0.8037887847292366, iteration: 312749
loss: 0.9909125566482544,grad_norm: 0.9126158067083048, iteration: 312750
loss: 1.020036220550537,grad_norm: 0.8901540660883072, iteration: 312751
loss: 1.0132372379302979,grad_norm: 0.736896594553077, iteration: 312752
loss: 1.1050398349761963,grad_norm: 0.9999999910676679, iteration: 312753
loss: 0.993339478969574,grad_norm: 0.7948665350479022, iteration: 312754
loss: 1.0563961267471313,grad_norm: 0.9999998147305706, iteration: 312755
loss: 1.0030866861343384,grad_norm: 0.8948991542704119, iteration: 312756
loss: 1.013038992881775,grad_norm: 0.9999990354592415, iteration: 312757
loss: 0.9631437659263611,grad_norm: 0.8015780831770666, iteration: 312758
loss: 0.9776224493980408,grad_norm: 0.9999990880970137, iteration: 312759
loss: 0.984110414981842,grad_norm: 0.8870144072693125, iteration: 312760
loss: 1.1200361251831055,grad_norm: 0.9999997445430099, iteration: 312761
loss: 0.97771155834198,grad_norm: 0.8544616451657686, iteration: 312762
loss: 0.977414071559906,grad_norm: 0.9044416640861093, iteration: 312763
loss: 1.015526533126831,grad_norm: 0.8384863945888553, iteration: 312764
loss: 1.0485999584197998,grad_norm: 0.9878980481358314, iteration: 312765
loss: 1.015402913093567,grad_norm: 0.7788782864487828, iteration: 312766
loss: 1.0176132917404175,grad_norm: 0.8176830779308416, iteration: 312767
loss: 0.9747537970542908,grad_norm: 0.9999991224526305, iteration: 312768
loss: 0.9613616466522217,grad_norm: 0.9999999092684889, iteration: 312769
loss: 0.9749811291694641,grad_norm: 0.7592234545050347, iteration: 312770
loss: 0.9935098886489868,grad_norm: 0.999999004582469, iteration: 312771
loss: 1.0180473327636719,grad_norm: 0.8598274801626531, iteration: 312772
loss: 1.0060691833496094,grad_norm: 0.8078689059457209, iteration: 312773
loss: 0.9555073380470276,grad_norm: 0.8180365272090139, iteration: 312774
loss: 0.9883620738983154,grad_norm: 0.7320619420258179, iteration: 312775
loss: 1.0023536682128906,grad_norm: 0.9641913484020836, iteration: 312776
loss: 0.9901520609855652,grad_norm: 0.7938496217264362, iteration: 312777
loss: 0.9882717132568359,grad_norm: 0.7261983734954598, iteration: 312778
loss: 1.0313560962677002,grad_norm: 0.8922236275910559, iteration: 312779
loss: 1.0060521364212036,grad_norm: 0.9596531622128198, iteration: 312780
loss: 1.017045021057129,grad_norm: 0.8109633209249573, iteration: 312781
loss: 0.9806923866271973,grad_norm: 0.8197578985681393, iteration: 312782
loss: 1.132596492767334,grad_norm: 0.999999308842724, iteration: 312783
loss: 1.0160019397735596,grad_norm: 0.8247096753940061, iteration: 312784
loss: 1.0032988786697388,grad_norm: 0.8755388314250047, iteration: 312785
loss: 1.009893774986267,grad_norm: 0.833885684479468, iteration: 312786
loss: 0.9866780638694763,grad_norm: 0.7492119228237271, iteration: 312787
loss: 0.9671192169189453,grad_norm: 0.8127129701185315, iteration: 312788
loss: 1.033242106437683,grad_norm: 0.9138535042861636, iteration: 312789
loss: 0.98671555519104,grad_norm: 0.9834106648467712, iteration: 312790
loss: 0.9972800016403198,grad_norm: 0.9999991110436207, iteration: 312791
loss: 0.9632656574249268,grad_norm: 0.8118096581387677, iteration: 312792
loss: 1.0166702270507812,grad_norm: 0.9059675130714645, iteration: 312793
loss: 1.0653104782104492,grad_norm: 0.9999998907453046, iteration: 312794
loss: 1.0081586837768555,grad_norm: 0.7360986517764826, iteration: 312795
loss: 0.9657968878746033,grad_norm: 0.8263614523248789, iteration: 312796
loss: 0.983061671257019,grad_norm: 0.9617648578633384, iteration: 312797
loss: 1.0102581977844238,grad_norm: 0.999999334259307, iteration: 312798
loss: 0.9794697165489197,grad_norm: 0.76160835198912, iteration: 312799
loss: 1.015671968460083,grad_norm: 0.670294054817885, iteration: 312800
loss: 0.984545886516571,grad_norm: 0.6923792426748816, iteration: 312801
loss: 1.000540018081665,grad_norm: 0.980270746051922, iteration: 312802
loss: 1.0381722450256348,grad_norm: 0.6714381960465842, iteration: 312803
loss: 1.0332839488983154,grad_norm: 0.8834660742514552, iteration: 312804
loss: 1.0040035247802734,grad_norm: 0.8143589123640954, iteration: 312805
loss: 1.0048000812530518,grad_norm: 0.8397195674433152, iteration: 312806
loss: 0.9956395030021667,grad_norm: 0.7909176495493964, iteration: 312807
loss: 1.02243173122406,grad_norm: 0.9999997087441909, iteration: 312808
loss: 1.0096473693847656,grad_norm: 0.9458761999348877, iteration: 312809
loss: 0.9835737347602844,grad_norm: 0.9233942281384594, iteration: 312810
loss: 1.0018484592437744,grad_norm: 0.7522234453371118, iteration: 312811
loss: 1.0566234588623047,grad_norm: 0.9999995470943966, iteration: 312812
loss: 1.0128462314605713,grad_norm: 0.8624202081381934, iteration: 312813
loss: 1.0181069374084473,grad_norm: 0.8431143184409247, iteration: 312814
loss: 1.0155820846557617,grad_norm: 0.8689938910107456, iteration: 312815
loss: 0.9991363286972046,grad_norm: 0.7158134651950484, iteration: 312816
loss: 0.9974750280380249,grad_norm: 0.9506492148863274, iteration: 312817
loss: 1.026284098625183,grad_norm: 0.9661274550731364, iteration: 312818
loss: 1.0099889039993286,grad_norm: 0.9999999555295016, iteration: 312819
loss: 0.9980887174606323,grad_norm: 0.788741800563945, iteration: 312820
loss: 0.9953247308731079,grad_norm: 0.8519617788333569, iteration: 312821
loss: 1.0277822017669678,grad_norm: 0.8808000276315029, iteration: 312822
loss: 1.0181655883789062,grad_norm: 0.9249198355729263, iteration: 312823
loss: 0.965390145778656,grad_norm: 0.7687158641913101, iteration: 312824
loss: 0.9857481718063354,grad_norm: 0.9225155681218992, iteration: 312825
loss: 1.0381156206130981,grad_norm: 0.9424505952533488, iteration: 312826
loss: 1.0214602947235107,grad_norm: 0.6375548475111397, iteration: 312827
loss: 1.0117756128311157,grad_norm: 0.79848291605259, iteration: 312828
loss: 0.9534040689468384,grad_norm: 0.80265187177196, iteration: 312829
loss: 1.0109617710113525,grad_norm: 0.9933094022556718, iteration: 312830
loss: 1.0025349855422974,grad_norm: 0.7879603580736914, iteration: 312831
loss: 0.994714081287384,grad_norm: 0.885818693638931, iteration: 312832
loss: 0.9885129928588867,grad_norm: 0.9999991756285617, iteration: 312833
loss: 0.9864441156387329,grad_norm: 0.9999993306284627, iteration: 312834
loss: 1.0039703845977783,grad_norm: 0.9999996968357583, iteration: 312835
loss: 1.0164769887924194,grad_norm: 0.9999990420998169, iteration: 312836
loss: 0.9664495587348938,grad_norm: 0.9144982018433868, iteration: 312837
loss: 1.0129138231277466,grad_norm: 0.9999992158984493, iteration: 312838
loss: 0.9530853033065796,grad_norm: 0.8393800549617703, iteration: 312839
loss: 1.0083339214324951,grad_norm: 0.8882178921884843, iteration: 312840
loss: 0.9900092482566833,grad_norm: 0.9017323468527123, iteration: 312841
loss: 0.9746382832527161,grad_norm: 0.9088331950205291, iteration: 312842
loss: 1.0344938039779663,grad_norm: 0.9135857317134356, iteration: 312843
loss: 1.0254255533218384,grad_norm: 0.7689656913969262, iteration: 312844
loss: 1.0861258506774902,grad_norm: 0.7388168214398282, iteration: 312845
loss: 0.9729854464530945,grad_norm: 0.6371946652523464, iteration: 312846
loss: 1.0079280138015747,grad_norm: 0.8605828279843499, iteration: 312847
loss: 1.0142792463302612,grad_norm: 0.875749553455214, iteration: 312848
loss: 0.9846138954162598,grad_norm: 0.8214150706755915, iteration: 312849
loss: 1.0215024948120117,grad_norm: 0.8788082281579737, iteration: 312850
loss: 0.9779216647148132,grad_norm: 0.9999990200099891, iteration: 312851
loss: 1.0771729946136475,grad_norm: 0.8012489006793844, iteration: 312852
loss: 0.9707890748977661,grad_norm: 0.9415668398871821, iteration: 312853
loss: 0.992567241191864,grad_norm: 0.800075394475303, iteration: 312854
loss: 1.0284926891326904,grad_norm: 0.8595858968161876, iteration: 312855
loss: 0.9785124659538269,grad_norm: 0.8302555957512773, iteration: 312856
loss: 1.0496610403060913,grad_norm: 0.8718464909011748, iteration: 312857
loss: 0.9757111072540283,grad_norm: 0.9020028557772516, iteration: 312858
loss: 0.9679098129272461,grad_norm: 0.7586347275141498, iteration: 312859
loss: 1.017840027809143,grad_norm: 0.9999990776846152, iteration: 312860
loss: 1.0205193758010864,grad_norm: 0.6966715762517119, iteration: 312861
loss: 1.0090899467468262,grad_norm: 0.7838340704166056, iteration: 312862
loss: 1.0127170085906982,grad_norm: 0.8158917746096522, iteration: 312863
loss: 0.9803763031959534,grad_norm: 0.9555631217391105, iteration: 312864
loss: 1.0103623867034912,grad_norm: 0.7808412246145449, iteration: 312865
loss: 1.0131725072860718,grad_norm: 0.7776919365419455, iteration: 312866
loss: 1.0174967050552368,grad_norm: 0.8342183732252448, iteration: 312867
loss: 1.016992449760437,grad_norm: 0.7863569278166869, iteration: 312868
loss: 0.9854727387428284,grad_norm: 0.9999992413703334, iteration: 312869
loss: 1.0375704765319824,grad_norm: 0.832387541842085, iteration: 312870
loss: 1.0007728338241577,grad_norm: 0.7763027218835815, iteration: 312871
loss: 1.0124417543411255,grad_norm: 0.9610099545265594, iteration: 312872
loss: 1.1259340047836304,grad_norm: 1.0000000350857106, iteration: 312873
loss: 1.035619854927063,grad_norm: 0.8639707588846304, iteration: 312874
loss: 1.0803290605545044,grad_norm: 0.8563820892532289, iteration: 312875
loss: 1.0388357639312744,grad_norm: 0.7975070649369725, iteration: 312876
loss: 0.9941626787185669,grad_norm: 0.9972021210078927, iteration: 312877
loss: 1.013211727142334,grad_norm: 0.795556885118244, iteration: 312878
loss: 0.9911330938339233,grad_norm: 0.7490834122818192, iteration: 312879
loss: 1.2742294073104858,grad_norm: 0.9999998764455523, iteration: 312880
loss: 1.0048742294311523,grad_norm: 0.7536536097099241, iteration: 312881
loss: 0.991585910320282,grad_norm: 0.8874400303232712, iteration: 312882
loss: 0.9912623763084412,grad_norm: 0.8317063630745658, iteration: 312883
loss: 0.9832215905189514,grad_norm: 0.7522131797335493, iteration: 312884
loss: 1.04612398147583,grad_norm: 0.798215613360566, iteration: 312885
loss: 1.2141762971878052,grad_norm: 0.9999998846494009, iteration: 312886
loss: 1.0198370218276978,grad_norm: 0.8040905983720701, iteration: 312887
loss: 1.0320383310317993,grad_norm: 0.8485778697339428, iteration: 312888
loss: 0.9660729169845581,grad_norm: 0.8278844105343707, iteration: 312889
loss: 0.9973093271255493,grad_norm: 0.8513666497399798, iteration: 312890
loss: 1.0879098176956177,grad_norm: 0.9999996251831339, iteration: 312891
loss: 1.0569849014282227,grad_norm: 0.9999990438054269, iteration: 312892
loss: 0.9851738214492798,grad_norm: 0.9498544552816438, iteration: 312893
loss: 1.1820659637451172,grad_norm: 0.9999993025811309, iteration: 312894
loss: 1.0408700704574585,grad_norm: 0.9999994138869408, iteration: 312895
loss: 1.0341955423355103,grad_norm: 0.9112976484618369, iteration: 312896
loss: 1.0839895009994507,grad_norm: 0.8246013682931158, iteration: 312897
loss: 0.9861579537391663,grad_norm: 0.8314851265173018, iteration: 312898
loss: 1.0785348415374756,grad_norm: 0.8724697434743504, iteration: 312899
loss: 0.9524573683738708,grad_norm: 0.9403984772434345, iteration: 312900
loss: 1.1902036666870117,grad_norm: 0.9999994642268623, iteration: 312901
loss: 1.027085304260254,grad_norm: 0.9665139268390178, iteration: 312902
loss: 1.0074503421783447,grad_norm: 0.9334553864007034, iteration: 312903
loss: 0.9902943968772888,grad_norm: 0.7181696086492583, iteration: 312904
loss: 0.9843518733978271,grad_norm: 0.801026192274171, iteration: 312905
loss: 1.0250003337860107,grad_norm: 0.9317599057429603, iteration: 312906
loss: 0.980987548828125,grad_norm: 0.9780543792641396, iteration: 312907
loss: 1.128287672996521,grad_norm: 0.9999999545771858, iteration: 312908
loss: 1.0730020999908447,grad_norm: 0.9999995478364215, iteration: 312909
loss: 1.04347825050354,grad_norm: 0.9999997839588293, iteration: 312910
loss: 1.1688013076782227,grad_norm: 0.9999994649097945, iteration: 312911
loss: 1.047709345817566,grad_norm: 0.9094140686325592, iteration: 312912
loss: 1.0502983331680298,grad_norm: 0.9044533163241175, iteration: 312913
loss: 1.0236510038375854,grad_norm: 0.8918492746095037, iteration: 312914
loss: 1.0610586404800415,grad_norm: 0.9999993253545242, iteration: 312915
loss: 1.0801655054092407,grad_norm: 0.9999996231246788, iteration: 312916
loss: 1.0340498685836792,grad_norm: 0.9999992429195352, iteration: 312917
loss: 1.0987753868103027,grad_norm: 0.9999992359447915, iteration: 312918
loss: 1.0663650035858154,grad_norm: 0.9999995155860297, iteration: 312919
loss: 1.0023624897003174,grad_norm: 0.8746604559252701, iteration: 312920
loss: 1.0037198066711426,grad_norm: 0.9748497265317505, iteration: 312921
loss: 1.0564430952072144,grad_norm: 0.8538206695095012, iteration: 312922
loss: 1.0469101667404175,grad_norm: 0.9999995120865676, iteration: 312923
loss: 1.0743803977966309,grad_norm: 0.9999991271295496, iteration: 312924
loss: 1.0577583312988281,grad_norm: 0.9999997923042817, iteration: 312925
loss: 0.9968545436859131,grad_norm: 0.8142575488276016, iteration: 312926
loss: 1.0415585041046143,grad_norm: 0.9999993644825682, iteration: 312927
loss: 0.9929442405700684,grad_norm: 0.8642661672667052, iteration: 312928
loss: 1.0164313316345215,grad_norm: 0.9999996576104224, iteration: 312929
loss: 1.0231000185012817,grad_norm: 0.9352109926698597, iteration: 312930
loss: 1.0073481798171997,grad_norm: 0.9486648728048196, iteration: 312931
loss: 1.0043820142745972,grad_norm: 0.7625084626473542, iteration: 312932
loss: 1.0096757411956787,grad_norm: 0.8410969729803096, iteration: 312933
loss: 1.0292775630950928,grad_norm: 0.9999991408394481, iteration: 312934
loss: 0.9840359687805176,grad_norm: 0.9999991171773456, iteration: 312935
loss: 1.078384518623352,grad_norm: 0.9999993207446145, iteration: 312936
loss: 0.9796158075332642,grad_norm: 0.8621674869861383, iteration: 312937
loss: 1.1401360034942627,grad_norm: 0.9999993356354364, iteration: 312938
loss: 0.9687590003013611,grad_norm: 0.8931780535892025, iteration: 312939
loss: 1.0152047872543335,grad_norm: 0.8977730675132161, iteration: 312940
loss: 1.089385747909546,grad_norm: 0.9999999560213884, iteration: 312941
loss: 1.029126763343811,grad_norm: 0.9999997974954862, iteration: 312942
loss: 1.0614670515060425,grad_norm: 0.9999990001514313, iteration: 312943
loss: 1.0060253143310547,grad_norm: 0.8712558130333535, iteration: 312944
loss: 1.0326546430587769,grad_norm: 0.999999195208538, iteration: 312945
loss: 1.0155125856399536,grad_norm: 0.9999992522562287, iteration: 312946
loss: 1.0951884984970093,grad_norm: 1.0000000171045753, iteration: 312947
loss: 0.9874092936515808,grad_norm: 0.8157211980947652, iteration: 312948
loss: 1.0017669200897217,grad_norm: 0.6920579325815199, iteration: 312949
loss: 0.9795452952384949,grad_norm: 0.8580280103813545, iteration: 312950
loss: 1.0114058256149292,grad_norm: 0.8913656289075323, iteration: 312951
loss: 0.978247344493866,grad_norm: 0.8034799651605554, iteration: 312952
loss: 1.076769471168518,grad_norm: 0.9999995078400594, iteration: 312953
loss: 1.131316065788269,grad_norm: 0.9621533447943622, iteration: 312954
loss: 1.0239038467407227,grad_norm: 0.89118055276144, iteration: 312955
loss: 1.0056867599487305,grad_norm: 0.7308663582092827, iteration: 312956
loss: 1.0603305101394653,grad_norm: 0.999999290481337, iteration: 312957
loss: 1.0049065351486206,grad_norm: 0.8986619361056319, iteration: 312958
loss: 0.9874913096427917,grad_norm: 0.7755435463623657, iteration: 312959
loss: 1.0240446329116821,grad_norm: 0.870756999208842, iteration: 312960
loss: 1.2104573249816895,grad_norm: 0.999999784605504, iteration: 312961
loss: 0.9929656982421875,grad_norm: 0.7978892170998704, iteration: 312962
loss: 0.9973907470703125,grad_norm: 0.7728396244834309, iteration: 312963
loss: 1.0010212659835815,grad_norm: 0.7269327850399211, iteration: 312964
loss: 0.9553751349449158,grad_norm: 0.9125527186591333, iteration: 312965
loss: 1.0032310485839844,grad_norm: 0.8344269367605843, iteration: 312966
loss: 1.0298817157745361,grad_norm: 0.9234750013766996, iteration: 312967
loss: 0.9881996512413025,grad_norm: 0.9020143512709785, iteration: 312968
loss: 1.1289284229278564,grad_norm: 0.7077833851976465, iteration: 312969
loss: 1.0172995328903198,grad_norm: 0.8120740056467479, iteration: 312970
loss: 1.0146445035934448,grad_norm: 0.8164178753500169, iteration: 312971
loss: 0.9823440909385681,grad_norm: 0.6924325353621528, iteration: 312972
loss: 0.9898023009300232,grad_norm: 0.7501337301928078, iteration: 312973
loss: 1.0190609693527222,grad_norm: 0.8194752853399827, iteration: 312974
loss: 1.045597791671753,grad_norm: 0.9999997859624845, iteration: 312975
loss: 0.982364296913147,grad_norm: 0.8932295951164283, iteration: 312976
loss: 1.0607630014419556,grad_norm: 0.9999996103043141, iteration: 312977
loss: 1.0384836196899414,grad_norm: 0.84233101114546, iteration: 312978
loss: 1.036949634552002,grad_norm: 0.8561369640992637, iteration: 312979
loss: 1.0567829608917236,grad_norm: 0.8415524318371504, iteration: 312980
loss: 0.9926508665084839,grad_norm: 0.9907593900845848, iteration: 312981
loss: 0.9927616119384766,grad_norm: 0.7714591439993883, iteration: 312982
loss: 0.9545285105705261,grad_norm: 0.9999992149169314, iteration: 312983
loss: 0.9872090220451355,grad_norm: 0.8588122516292106, iteration: 312984
loss: 0.9575329422950745,grad_norm: 0.9048802736240232, iteration: 312985
loss: 1.0309414863586426,grad_norm: 0.7855190526506488, iteration: 312986
loss: 1.0014530420303345,grad_norm: 0.9999991186259498, iteration: 312987
loss: 1.0151035785675049,grad_norm: 0.7977274053642742, iteration: 312988
loss: 0.9813677668571472,grad_norm: 0.9708174560387545, iteration: 312989
loss: 1.0049033164978027,grad_norm: 0.8472102633761098, iteration: 312990
loss: 1.017484426498413,grad_norm: 0.8051963429237372, iteration: 312991
loss: 0.9987486004829407,grad_norm: 0.8217548033078429, iteration: 312992
loss: 1.009308934211731,grad_norm: 0.867505590355557, iteration: 312993
loss: 1.006433367729187,grad_norm: 0.8159136472845496, iteration: 312994
loss: 1.0064377784729004,grad_norm: 0.9228201686170947, iteration: 312995
loss: 1.1257928609848022,grad_norm: 0.9999994433433325, iteration: 312996
loss: 0.9967132806777954,grad_norm: 0.6980590671144848, iteration: 312997
loss: 1.0690488815307617,grad_norm: 0.8116154421264741, iteration: 312998
loss: 1.0477566719055176,grad_norm: 0.8506629798555001, iteration: 312999
loss: 0.9974802732467651,grad_norm: 0.850335775229418, iteration: 313000
loss: 1.001729965209961,grad_norm: 0.7915760080086855, iteration: 313001
loss: 1.0295742750167847,grad_norm: 0.8576911937031244, iteration: 313002
loss: 1.0304169654846191,grad_norm: 0.9999998990367241, iteration: 313003
loss: 1.0204898118972778,grad_norm: 0.878559533061893, iteration: 313004
loss: 1.0296541452407837,grad_norm: 0.9111835519547192, iteration: 313005
loss: 1.0132867097854614,grad_norm: 0.793943742153145, iteration: 313006
loss: 1.002669334411621,grad_norm: 0.9999998554757189, iteration: 313007
loss: 0.9983412623405457,grad_norm: 0.8204911341105938, iteration: 313008
loss: 0.9958268404006958,grad_norm: 0.9999992071977478, iteration: 313009
loss: 1.0071089267730713,grad_norm: 0.9240742962897762, iteration: 313010
loss: 1.051041841506958,grad_norm: 0.9999994446127128, iteration: 313011
loss: 1.0578124523162842,grad_norm: 0.9999995533835625, iteration: 313012
loss: 0.9724562168121338,grad_norm: 0.9999997111732482, iteration: 313013
loss: 1.0101839303970337,grad_norm: 0.944900398682604, iteration: 313014
loss: 0.9898995757102966,grad_norm: 0.7533916393993172, iteration: 313015
loss: 1.0200291872024536,grad_norm: 0.8483759374238458, iteration: 313016
loss: 0.9859563708305359,grad_norm: 0.9866207531161568, iteration: 313017
loss: 1.0444751977920532,grad_norm: 0.8861821734583227, iteration: 313018
loss: 0.9953367710113525,grad_norm: 0.9686093615155056, iteration: 313019
loss: 0.9846406579017639,grad_norm: 0.7753873595700045, iteration: 313020
loss: 1.0005528926849365,grad_norm: 0.7629146777484518, iteration: 313021
loss: 1.0801231861114502,grad_norm: 0.924506951700589, iteration: 313022
loss: 0.9766790866851807,grad_norm: 0.8764692133482312, iteration: 313023
loss: 1.0920395851135254,grad_norm: 0.8174520478086029, iteration: 313024
loss: 1.1113026142120361,grad_norm: 0.9999993854728206, iteration: 313025
loss: 1.0035319328308105,grad_norm: 0.8207348366037998, iteration: 313026
loss: 1.047884464263916,grad_norm: 0.6276694775655465, iteration: 313027
loss: 0.9884661436080933,grad_norm: 0.999999046455541, iteration: 313028
loss: 1.0234107971191406,grad_norm: 0.930234897183587, iteration: 313029
loss: 1.026415228843689,grad_norm: 0.999999033578376, iteration: 313030
loss: 0.9708994030952454,grad_norm: 0.7863906338515416, iteration: 313031
loss: 0.9770404100418091,grad_norm: 0.8929156535089458, iteration: 313032
loss: 0.9849599003791809,grad_norm: 0.8915872033211242, iteration: 313033
loss: 1.0613596439361572,grad_norm: 0.7165571586559484, iteration: 313034
loss: 1.0175526142120361,grad_norm: 0.710874108638918, iteration: 313035
loss: 1.0528185367584229,grad_norm: 0.9999996055693364, iteration: 313036
loss: 1.0243542194366455,grad_norm: 0.8753500439653871, iteration: 313037
loss: 0.9927648901939392,grad_norm: 0.8335289218389753, iteration: 313038
loss: 1.0139753818511963,grad_norm: 0.8128999336479715, iteration: 313039
loss: 1.0512832403182983,grad_norm: 0.9856250116379626, iteration: 313040
loss: 0.9827693700790405,grad_norm: 0.7553734748171006, iteration: 313041
loss: 0.95296710729599,grad_norm: 0.7474757670900631, iteration: 313042
loss: 1.01323401927948,grad_norm: 0.8734471329629663, iteration: 313043
loss: 1.0003235340118408,grad_norm: 0.8856934217825998, iteration: 313044
loss: 1.0231341123580933,grad_norm: 0.8706615206490781, iteration: 313045
loss: 1.0162181854248047,grad_norm: 0.8648205518613035, iteration: 313046
loss: 0.9927098155021667,grad_norm: 0.9059272446317411, iteration: 313047
loss: 0.9625654220581055,grad_norm: 0.999215768304637, iteration: 313048
loss: 0.9750206470489502,grad_norm: 0.9290641682597749, iteration: 313049
loss: 1.0174869298934937,grad_norm: 0.8011042887065716, iteration: 313050
loss: 0.9709396958351135,grad_norm: 0.8825516157724341, iteration: 313051
loss: 1.116270661354065,grad_norm: 0.8971529399311428, iteration: 313052
loss: 1.0494054555892944,grad_norm: 0.9999995779843925, iteration: 313053
loss: 1.100012183189392,grad_norm: 0.803064452660782, iteration: 313054
loss: 0.9772678017616272,grad_norm: 0.9322916936245138, iteration: 313055
loss: 1.1452572345733643,grad_norm: 0.9999997560062607, iteration: 313056
loss: 0.997368574142456,grad_norm: 0.9048065196807388, iteration: 313057
loss: 1.0510876178741455,grad_norm: 0.9755091951914647, iteration: 313058
loss: 0.9882470965385437,grad_norm: 0.7950242822935798, iteration: 313059
loss: 1.0064971446990967,grad_norm: 0.8891449997967958, iteration: 313060
loss: 1.0132509469985962,grad_norm: 0.7796284668533092, iteration: 313061
loss: 1.077427864074707,grad_norm: 0.9999990628622484, iteration: 313062
loss: 0.9861360192298889,grad_norm: 0.9709961240381683, iteration: 313063
loss: 1.0047427415847778,grad_norm: 0.9999999331725065, iteration: 313064
loss: 1.012218952178955,grad_norm: 0.9087461729164438, iteration: 313065
loss: 1.0081596374511719,grad_norm: 0.7297800579446966, iteration: 313066
loss: 1.0003608465194702,grad_norm: 0.7508390638115888, iteration: 313067
loss: 1.0379910469055176,grad_norm: 0.9763083553961734, iteration: 313068
loss: 0.9724302291870117,grad_norm: 0.9999990780193159, iteration: 313069
loss: 1.0164982080459595,grad_norm: 0.9345996459403476, iteration: 313070
loss: 1.0358771085739136,grad_norm: 0.9999991596925909, iteration: 313071
loss: 0.9909073710441589,grad_norm: 0.7683472724652557, iteration: 313072
loss: 1.021399736404419,grad_norm: 0.7716930780890511, iteration: 313073
loss: 0.9904753565788269,grad_norm: 0.9973751896987011, iteration: 313074
loss: 0.9835911393165588,grad_norm: 0.9999989425858239, iteration: 313075
loss: 1.002182960510254,grad_norm: 0.7039817370064733, iteration: 313076
loss: 0.9861158132553101,grad_norm: 0.7989098808053822, iteration: 313077
loss: 1.0088634490966797,grad_norm: 0.8281458582885775, iteration: 313078
loss: 0.9866414070129395,grad_norm: 0.7572070237503774, iteration: 313079
loss: 1.0160962343215942,grad_norm: 0.8421989998263706, iteration: 313080
loss: 1.0021510124206543,grad_norm: 0.8656529994148808, iteration: 313081
loss: 1.0240074396133423,grad_norm: 0.8940532281484919, iteration: 313082
loss: 0.9867458939552307,grad_norm: 0.9641522287714116, iteration: 313083
loss: 1.0137875080108643,grad_norm: 0.94868669636372, iteration: 313084
loss: 0.99250727891922,grad_norm: 0.9137613580648535, iteration: 313085
loss: 1.0330088138580322,grad_norm: 0.7636313344979446, iteration: 313086
loss: 1.0130648612976074,grad_norm: 0.9999990204817294, iteration: 313087
loss: 0.9736326932907104,grad_norm: 0.8172051243283865, iteration: 313088
loss: 0.9566894173622131,grad_norm: 0.7391313663735813, iteration: 313089
loss: 0.9464228749275208,grad_norm: 0.8421016191814465, iteration: 313090
loss: 1.01992666721344,grad_norm: 0.8893202020123149, iteration: 313091
loss: 0.9801267981529236,grad_norm: 0.9532644550746417, iteration: 313092
loss: 1.0258159637451172,grad_norm: 0.999414397806275, iteration: 313093
loss: 1.0253902673721313,grad_norm: 0.8702255816496349, iteration: 313094
loss: 1.0088728666305542,grad_norm: 0.9430518430796371, iteration: 313095
loss: 1.0196384191513062,grad_norm: 0.7540071003438323, iteration: 313096
loss: 0.9742011427879333,grad_norm: 0.9602512270212994, iteration: 313097
loss: 0.9962841868400574,grad_norm: 0.9486047147343307, iteration: 313098
loss: 1.0237092971801758,grad_norm: 0.9720794770102215, iteration: 313099
loss: 0.9900615215301514,grad_norm: 0.7969291096402646, iteration: 313100
loss: 1.0063503980636597,grad_norm: 0.9999990925487781, iteration: 313101
loss: 0.9977492690086365,grad_norm: 0.8342705844630366, iteration: 313102
loss: 1.0074671506881714,grad_norm: 0.7634325438726826, iteration: 313103
loss: 1.0368788242340088,grad_norm: 0.9361334169697942, iteration: 313104
loss: 0.959252655506134,grad_norm: 0.9999990901958943, iteration: 313105
loss: 0.9804410338401794,grad_norm: 0.9451176861015641, iteration: 313106
loss: 0.9659375548362732,grad_norm: 0.930935499703601, iteration: 313107
loss: 0.9436343312263489,grad_norm: 0.893405248189668, iteration: 313108
loss: 1.0038328170776367,grad_norm: 0.9030999419621216, iteration: 313109
loss: 0.9808309674263,grad_norm: 0.8989094943358026, iteration: 313110
loss: 0.9992488622665405,grad_norm: 0.8252446250568048, iteration: 313111
loss: 0.9804394841194153,grad_norm: 0.8466313902295987, iteration: 313112
loss: 1.0561110973358154,grad_norm: 0.9008321023094464, iteration: 313113
loss: 1.1437405347824097,grad_norm: 0.9999990600335803, iteration: 313114
loss: 0.9805282354354858,grad_norm: 0.7550177160482066, iteration: 313115
loss: 0.9926972389221191,grad_norm: 0.8625184915701432, iteration: 313116
loss: 0.9914780259132385,grad_norm: 0.7717300639802042, iteration: 313117
loss: 0.9961572289466858,grad_norm: 0.9813387279730733, iteration: 313118
loss: 0.985022246837616,grad_norm: 0.8034755610252801, iteration: 313119
loss: 1.0380858182907104,grad_norm: 0.9999999020539878, iteration: 313120
loss: 1.0058709383010864,grad_norm: 0.7597658338410807, iteration: 313121
loss: 1.1391236782073975,grad_norm: 0.9999991939401531, iteration: 313122
loss: 0.9688569903373718,grad_norm: 0.9997290298128259, iteration: 313123
loss: 0.9925234913825989,grad_norm: 0.7828527029993423, iteration: 313124
loss: 1.004638910293579,grad_norm: 0.7826892433753679, iteration: 313125
loss: 1.000136137008667,grad_norm: 0.7092816612369452, iteration: 313126
loss: 0.9950130581855774,grad_norm: 0.8657154101387011, iteration: 313127
loss: 0.9753748774528503,grad_norm: 0.8022823138429893, iteration: 313128
loss: 0.9802976250648499,grad_norm: 0.8594965307985731, iteration: 313129
loss: 1.0168315172195435,grad_norm: 0.7741777773282706, iteration: 313130
loss: 1.0280590057373047,grad_norm: 0.968128641104527, iteration: 313131
loss: 1.1213754415512085,grad_norm: 0.9999997445469273, iteration: 313132
loss: 0.9820900559425354,grad_norm: 0.9212842320583424, iteration: 313133
loss: 1.002570629119873,grad_norm: 0.8183524798866973, iteration: 313134
loss: 1.0677168369293213,grad_norm: 0.9999998269577604, iteration: 313135
loss: 0.9898129105567932,grad_norm: 0.7659783206675356, iteration: 313136
loss: 0.9949163794517517,grad_norm: 0.8699082743121181, iteration: 313137
loss: 0.9761106371879578,grad_norm: 0.8052951671118634, iteration: 313138
loss: 1.018707513809204,grad_norm: 0.9999993769474382, iteration: 313139
loss: 0.9963133335113525,grad_norm: 0.9306207666456237, iteration: 313140
loss: 0.9793328046798706,grad_norm: 0.6997201342734783, iteration: 313141
loss: 1.0277817249298096,grad_norm: 0.9999990576505233, iteration: 313142
loss: 1.0776313543319702,grad_norm: 0.9999991085019194, iteration: 313143
loss: 1.0391974449157715,grad_norm: 0.9891769428760957, iteration: 313144
loss: 1.010715126991272,grad_norm: 0.9443350583866413, iteration: 313145
loss: 0.9866670966148376,grad_norm: 0.815774577960874, iteration: 313146
loss: 0.9995101094245911,grad_norm: 0.9895551635932108, iteration: 313147
loss: 0.9855883121490479,grad_norm: 0.8448671814965891, iteration: 313148
loss: 1.0027152299880981,grad_norm: 0.9769469506026274, iteration: 313149
loss: 1.0164363384246826,grad_norm: 0.8937116544484681, iteration: 313150
loss: 1.0059819221496582,grad_norm: 0.7581832441878515, iteration: 313151
loss: 1.0081586837768555,grad_norm: 0.8961408025006736, iteration: 313152
loss: 1.0978642702102661,grad_norm: 0.9999991037958308, iteration: 313153
loss: 0.9463809728622437,grad_norm: 0.7108768929328089, iteration: 313154
loss: 1.024725317955017,grad_norm: 0.8955237757201714, iteration: 313155
loss: 1.0412439107894897,grad_norm: 0.8559397056711576, iteration: 313156
loss: 1.0106650590896606,grad_norm: 0.913724739154392, iteration: 313157
loss: 0.9861761927604675,grad_norm: 0.8431084966269329, iteration: 313158
loss: 0.9975989460945129,grad_norm: 0.7624902927376358, iteration: 313159
loss: 1.0159673690795898,grad_norm: 0.8047022495192997, iteration: 313160
loss: 0.9895942807197571,grad_norm: 0.809401665087143, iteration: 313161
loss: 1.0219098329544067,grad_norm: 0.9999990193737516, iteration: 313162
loss: 0.974438488483429,grad_norm: 0.7510475330715131, iteration: 313163
loss: 1.0427255630493164,grad_norm: 0.999999145065623, iteration: 313164
loss: 1.0517139434814453,grad_norm: 0.8543909650507044, iteration: 313165
loss: 0.9880462884902954,grad_norm: 0.9235796888649142, iteration: 313166
loss: 0.9836770296096802,grad_norm: 0.8278940351877667, iteration: 313167
loss: 0.9729772210121155,grad_norm: 0.8858169573317345, iteration: 313168
loss: 0.9919243454933167,grad_norm: 0.7160549017412648, iteration: 313169
loss: 1.038309097290039,grad_norm: 0.9504147938809752, iteration: 313170
loss: 1.0042812824249268,grad_norm: 0.8491036120891524, iteration: 313171
loss: 1.0250707864761353,grad_norm: 0.9776950426897414, iteration: 313172
loss: 0.9856200218200684,grad_norm: 0.9161360412542343, iteration: 313173
loss: 1.2329673767089844,grad_norm: 0.999999740243454, iteration: 313174
loss: 0.9673265814781189,grad_norm: 0.7313685522746497, iteration: 313175
loss: 0.9843354225158691,grad_norm: 0.8586284801914198, iteration: 313176
loss: 0.9905083775520325,grad_norm: 0.8168008331376782, iteration: 313177
loss: 0.991476833820343,grad_norm: 0.9999993560286414, iteration: 313178
loss: 1.003603219985962,grad_norm: 0.835122685479595, iteration: 313179
loss: 1.015068531036377,grad_norm: 0.9999990343202363, iteration: 313180
loss: 0.9648887515068054,grad_norm: 0.8061662707958678, iteration: 313181
loss: 0.9583667516708374,grad_norm: 0.8030469566331728, iteration: 313182
loss: 0.9729416966438293,grad_norm: 0.7194099593824748, iteration: 313183
loss: 0.9908502101898193,grad_norm: 0.8300756042399425, iteration: 313184
loss: 0.9603387713432312,grad_norm: 0.7568722231905137, iteration: 313185
loss: 1.0447691679000854,grad_norm: 0.9999990520205289, iteration: 313186
loss: 1.0237833261489868,grad_norm: 0.7995831510726396, iteration: 313187
loss: 1.0146596431732178,grad_norm: 0.8586071879360949, iteration: 313188
loss: 1.0335016250610352,grad_norm: 0.8001254112838451, iteration: 313189
loss: 0.9938995838165283,grad_norm: 0.8816363735744588, iteration: 313190
loss: 1.176132082939148,grad_norm: 0.9999996473065472, iteration: 313191
loss: 0.995064914226532,grad_norm: 0.9999994506979455, iteration: 313192
loss: 1.0273038148880005,grad_norm: 0.888129320779676, iteration: 313193
loss: 0.9728912115097046,grad_norm: 0.8447597309878884, iteration: 313194
loss: 0.9516961574554443,grad_norm: 0.9680117208042467, iteration: 313195
loss: 0.9969292283058167,grad_norm: 0.8672518388944074, iteration: 313196
loss: 0.9855988621711731,grad_norm: 0.8148383719308169, iteration: 313197
loss: 1.068850040435791,grad_norm: 0.8213430998676579, iteration: 313198
loss: 1.0077180862426758,grad_norm: 0.9999990124772564, iteration: 313199
loss: 1.0025256872177124,grad_norm: 0.6925077085383224, iteration: 313200
loss: 1.1140131950378418,grad_norm: 0.9999999218914973, iteration: 313201
loss: 0.975618302822113,grad_norm: 0.7239176002121422, iteration: 313202
loss: 1.0028187036514282,grad_norm: 0.999998947857662, iteration: 313203
loss: 0.9982427358627319,grad_norm: 0.8294387896578778, iteration: 313204
loss: 1.0151338577270508,grad_norm: 0.9728732757290518, iteration: 313205
loss: 0.9654313921928406,grad_norm: 0.9999990426221467, iteration: 313206
loss: 0.9808081984519958,grad_norm: 0.777751451711093, iteration: 313207
loss: 1.0524789094924927,grad_norm: 0.9999991414299643, iteration: 313208
loss: 0.9790747761726379,grad_norm: 0.9946592969080058, iteration: 313209
loss: 1.0365793704986572,grad_norm: 0.9999989766940446, iteration: 313210
loss: 1.013006329536438,grad_norm: 0.9999996814949091, iteration: 313211
loss: 0.9763308167457581,grad_norm: 0.7990454505521296, iteration: 313212
loss: 0.9911608695983887,grad_norm: 0.8830719791179971, iteration: 313213
loss: 1.0303157567977905,grad_norm: 0.9078447203492407, iteration: 313214
loss: 0.9842150807380676,grad_norm: 0.8223986762238519, iteration: 313215
loss: 1.0112606287002563,grad_norm: 0.8835136025318658, iteration: 313216
loss: 1.0153971910476685,grad_norm: 0.9999991091706215, iteration: 313217
loss: 1.0101732015609741,grad_norm: 0.7850050227988755, iteration: 313218
loss: 1.0136852264404297,grad_norm: 0.8923485597103735, iteration: 313219
loss: 0.9944502711296082,grad_norm: 0.8423267069780426, iteration: 313220
loss: 0.988024115562439,grad_norm: 0.8080830712305308, iteration: 313221
loss: 0.9879080653190613,grad_norm: 0.948116003881601, iteration: 313222
loss: 0.986586332321167,grad_norm: 0.7672367858053104, iteration: 313223
loss: 0.9804615378379822,grad_norm: 0.8999129597876614, iteration: 313224
loss: 1.0296093225479126,grad_norm: 0.9999993394795065, iteration: 313225
loss: 0.9937673211097717,grad_norm: 0.9361906839773378, iteration: 313226
loss: 0.997633695602417,grad_norm: 0.8363464808455271, iteration: 313227
loss: 1.0122666358947754,grad_norm: 0.8394671231566394, iteration: 313228
loss: 0.9795708060264587,grad_norm: 0.8746046073567554, iteration: 313229
loss: 1.0318260192871094,grad_norm: 0.8489179175611934, iteration: 313230
loss: 0.9963276386260986,grad_norm: 0.9537711109460694, iteration: 313231
loss: 1.0073661804199219,grad_norm: 0.9999991473208948, iteration: 313232
loss: 1.0174567699432373,grad_norm: 0.8358242866402834, iteration: 313233
loss: 0.9819278120994568,grad_norm: 0.7986673909257779, iteration: 313234
loss: 1.0087615251541138,grad_norm: 0.8203346730099214, iteration: 313235
loss: 0.955920398235321,grad_norm: 0.8111445836462262, iteration: 313236
loss: 1.0022705793380737,grad_norm: 0.9999992085860111, iteration: 313237
loss: 1.0162256956100464,grad_norm: 1.0000000068772714, iteration: 313238
loss: 1.0024807453155518,grad_norm: 0.661942315816696, iteration: 313239
loss: 1.0474634170532227,grad_norm: 0.8519115877074739, iteration: 313240
loss: 1.0041722059249878,grad_norm: 0.757258422677773, iteration: 313241
loss: 1.0249325037002563,grad_norm: 0.9999994505417318, iteration: 313242
loss: 1.015103816986084,grad_norm: 0.7573033722237427, iteration: 313243
loss: 0.9653668403625488,grad_norm: 0.9337638050232925, iteration: 313244
loss: 1.0355831384658813,grad_norm: 0.9266590275578953, iteration: 313245
loss: 1.0048322677612305,grad_norm: 0.9226679040058918, iteration: 313246
loss: 0.9936919808387756,grad_norm: 0.7608179414532985, iteration: 313247
loss: 0.9850791096687317,grad_norm: 0.8207774374272098, iteration: 313248
loss: 1.0029693841934204,grad_norm: 0.8653947972055657, iteration: 313249
loss: 0.9800859093666077,grad_norm: 0.9999990256391785, iteration: 313250
loss: 1.0184433460235596,grad_norm: 0.791503530699052, iteration: 313251
loss: 1.0157634019851685,grad_norm: 0.9999991118771516, iteration: 313252
loss: 0.9723250269889832,grad_norm: 0.8625319517115498, iteration: 313253
loss: 0.9890356659889221,grad_norm: 0.8739614456150406, iteration: 313254
loss: 0.9459106922149658,grad_norm: 0.8657689677062267, iteration: 313255
loss: 1.0091325044631958,grad_norm: 0.8101576955081832, iteration: 313256
loss: 0.9876288175582886,grad_norm: 0.9744592067041535, iteration: 313257
loss: 1.0115412473678589,grad_norm: 0.9661181550422787, iteration: 313258
loss: 0.9788690805435181,grad_norm: 0.884205875426433, iteration: 313259
loss: 0.9612681269645691,grad_norm: 0.8286773186662569, iteration: 313260
loss: 0.999962568283081,grad_norm: 0.9333951962173713, iteration: 313261
loss: 0.996131956577301,grad_norm: 0.8922662831765692, iteration: 313262
loss: 0.9950947165489197,grad_norm: 0.8651628335210431, iteration: 313263
loss: 0.9956145882606506,grad_norm: 0.8002150728270847, iteration: 313264
loss: 1.0148568153381348,grad_norm: 0.9999991030506536, iteration: 313265
loss: 0.9950835108757019,grad_norm: 0.9084502865424503, iteration: 313266
loss: 1.0029244422912598,grad_norm: 0.753975560432288, iteration: 313267
loss: 0.9935104846954346,grad_norm: 0.7134447398453835, iteration: 313268
loss: 1.0248128175735474,grad_norm: 0.8614223955016622, iteration: 313269
loss: 0.9900718331336975,grad_norm: 0.8206258590249617, iteration: 313270
loss: 1.0130168199539185,grad_norm: 0.9061242481531614, iteration: 313271
loss: 1.039265513420105,grad_norm: 0.9999989719987681, iteration: 313272
loss: 0.9558392763137817,grad_norm: 0.7515352697240631, iteration: 313273
loss: 0.9803133606910706,grad_norm: 0.9999990906610322, iteration: 313274
loss: 0.9812425971031189,grad_norm: 0.941903621605089, iteration: 313275
loss: 1.0113587379455566,grad_norm: 0.6744567961792549, iteration: 313276
loss: 1.0108652114868164,grad_norm: 0.8968133970866354, iteration: 313277
loss: 0.9988595247268677,grad_norm: 0.7417248583812385, iteration: 313278
loss: 1.0036622285842896,grad_norm: 0.9664139088171751, iteration: 313279
loss: 1.0136135816574097,grad_norm: 0.9071883326525418, iteration: 313280
loss: 1.0194863080978394,grad_norm: 0.9999993855375632, iteration: 313281
loss: 1.0131293535232544,grad_norm: 0.6699111378206274, iteration: 313282
loss: 0.9726660251617432,grad_norm: 0.852530940704907, iteration: 313283
loss: 1.0207977294921875,grad_norm: 0.9444587128647671, iteration: 313284
loss: 1.0002093315124512,grad_norm: 0.9984319851479506, iteration: 313285
loss: 1.0183899402618408,grad_norm: 0.7801052064176199, iteration: 313286
loss: 1.0071001052856445,grad_norm: 0.8105303774991561, iteration: 313287
loss: 0.9968075752258301,grad_norm: 0.8115618448984113, iteration: 313288
loss: 0.9899643659591675,grad_norm: 0.9999991231593431, iteration: 313289
loss: 1.0117168426513672,grad_norm: 0.8047711318520083, iteration: 313290
loss: 1.0248863697052002,grad_norm: 0.8209241092270052, iteration: 313291
loss: 0.9652224183082581,grad_norm: 0.99999925448996, iteration: 313292
loss: 1.0134247541427612,grad_norm: 0.9340575526497276, iteration: 313293
loss: 0.9952265620231628,grad_norm: 0.8269024112797319, iteration: 313294
loss: 1.0112241506576538,grad_norm: 0.8339344473953322, iteration: 313295
loss: 1.0643553733825684,grad_norm: 0.7882966093863608, iteration: 313296
loss: 0.9896877408027649,grad_norm: 0.9999991700080565, iteration: 313297
loss: 0.9932306408882141,grad_norm: 0.8564647067968799, iteration: 313298
loss: 1.0175062417984009,grad_norm: 0.8804380038799976, iteration: 313299
loss: 1.09535813331604,grad_norm: 0.9999996983824528, iteration: 313300
loss: 0.9713983535766602,grad_norm: 0.8329316027983702, iteration: 313301
loss: 0.9912216067314148,grad_norm: 0.8179473537586858, iteration: 313302
loss: 1.1601231098175049,grad_norm: 0.9999999944916166, iteration: 313303
loss: 0.9967309832572937,grad_norm: 0.9999992295466529, iteration: 313304
loss: 1.0063921213150024,grad_norm: 0.8319287912712408, iteration: 313305
loss: 0.9977607131004333,grad_norm: 0.7743172266207226, iteration: 313306
loss: 1.0039061307907104,grad_norm: 0.9999990526383714, iteration: 313307
loss: 1.0248093605041504,grad_norm: 0.8603419531953641, iteration: 313308
loss: 0.9718061089515686,grad_norm: 0.9999989958848436, iteration: 313309
loss: 1.0185295343399048,grad_norm: 0.7010625459604299, iteration: 313310
loss: 0.9833106398582458,grad_norm: 0.999999282863547, iteration: 313311
loss: 1.005476951599121,grad_norm: 0.7204479576643179, iteration: 313312
loss: 1.0422478914260864,grad_norm: 0.9999992425855244, iteration: 313313
loss: 0.9927613139152527,grad_norm: 0.8699258731059865, iteration: 313314
loss: 1.0239485502243042,grad_norm: 0.8869581044311862, iteration: 313315
loss: 0.9920396208763123,grad_norm: 0.8223890196784222, iteration: 313316
loss: 0.9879333972930908,grad_norm: 0.8447308035510394, iteration: 313317
loss: 0.976384162902832,grad_norm: 0.8084031713361399, iteration: 313318
loss: 1.0279953479766846,grad_norm: 0.9999994504683302, iteration: 313319
loss: 0.9897308349609375,grad_norm: 0.9999991401451066, iteration: 313320
loss: 1.0222508907318115,grad_norm: 0.9999998803807538, iteration: 313321
loss: 1.1594443321228027,grad_norm: 0.9999996583009652, iteration: 313322
loss: 0.9701291918754578,grad_norm: 0.762853339388425, iteration: 313323
loss: 0.9945511817932129,grad_norm: 0.8958456309916036, iteration: 313324
loss: 0.9934197664260864,grad_norm: 0.8001868919018923, iteration: 313325
loss: 0.9760920405387878,grad_norm: 0.8790788471838824, iteration: 313326
loss: 0.9813001751899719,grad_norm: 0.9838932833078773, iteration: 313327
loss: 1.0252842903137207,grad_norm: 0.8760584968239591, iteration: 313328
loss: 1.0010470151901245,grad_norm: 0.9815840382294942, iteration: 313329
loss: 1.0096455812454224,grad_norm: 0.9999992370250083, iteration: 313330
loss: 1.002500295639038,grad_norm: 0.9305428950180317, iteration: 313331
loss: 1.0343302488327026,grad_norm: 0.999999065514489, iteration: 313332
loss: 1.0226247310638428,grad_norm: 0.8233142813959623, iteration: 313333
loss: 1.0208035707473755,grad_norm: 0.7920069207355076, iteration: 313334
loss: 1.0010026693344116,grad_norm: 0.9135302260110939, iteration: 313335
loss: 1.0227129459381104,grad_norm: 0.7735654791943964, iteration: 313336
loss: 1.0219948291778564,grad_norm: 0.9071663665599636, iteration: 313337
loss: 1.0178221464157104,grad_norm: 0.8154480585093712, iteration: 313338
loss: 1.0038594007492065,grad_norm: 0.8899391321470567, iteration: 313339
loss: 1.0802406072616577,grad_norm: 0.8019385771796287, iteration: 313340
loss: 1.031463861465454,grad_norm: 0.9032623425607259, iteration: 313341
loss: 0.996532678604126,grad_norm: 0.9679379189622468, iteration: 313342
loss: 0.9592081308364868,grad_norm: 0.97054302444422, iteration: 313343
loss: 1.0146204233169556,grad_norm: 0.8033141943946597, iteration: 313344
loss: 1.0132886171340942,grad_norm: 0.9205042259915598, iteration: 313345
loss: 0.9918407797813416,grad_norm: 0.5922037689525509, iteration: 313346
loss: 1.0302395820617676,grad_norm: 0.9530149367999913, iteration: 313347
loss: 0.9950652718544006,grad_norm: 0.8144346154172091, iteration: 313348
loss: 1.0140342712402344,grad_norm: 0.9876276231978327, iteration: 313349
loss: 0.974409818649292,grad_norm: 0.8066042910671264, iteration: 313350
loss: 1.007814645767212,grad_norm: 0.9404012037268448, iteration: 313351
loss: 1.1599386930465698,grad_norm: 0.941046530552016, iteration: 313352
loss: 1.0467835664749146,grad_norm: 0.7446922792169686, iteration: 313353
loss: 1.0183264017105103,grad_norm: 0.8737693133286888, iteration: 313354
loss: 1.0398807525634766,grad_norm: 0.8166625765925102, iteration: 313355
loss: 0.9748979210853577,grad_norm: 0.9257084429385508, iteration: 313356
loss: 0.9730546474456787,grad_norm: 0.8507883036702432, iteration: 313357
loss: 1.0373214483261108,grad_norm: 0.7489231340700433, iteration: 313358
loss: 0.9992350339889526,grad_norm: 0.7541291490300002, iteration: 313359
loss: 1.096429705619812,grad_norm: 0.9999996614609066, iteration: 313360
loss: 0.9850195646286011,grad_norm: 0.9553838668458476, iteration: 313361
loss: 0.9875684380531311,grad_norm: 0.8900843892023804, iteration: 313362
loss: 0.9636924266815186,grad_norm: 0.7408800059279744, iteration: 313363
loss: 1.0123183727264404,grad_norm: 0.9893014776995832, iteration: 313364
loss: 1.063032865524292,grad_norm: 0.9008065920851321, iteration: 313365
loss: 1.0289907455444336,grad_norm: 0.8026318916802965, iteration: 313366
loss: 1.0444163084030151,grad_norm: 0.9999995489528379, iteration: 313367
loss: 0.9834800362586975,grad_norm: 0.769248383135067, iteration: 313368
loss: 0.98427414894104,grad_norm: 0.9754524974660989, iteration: 313369
loss: 0.9920763373374939,grad_norm: 0.7430784991768327, iteration: 313370
loss: 1.00425386428833,grad_norm: 0.7231406595148334, iteration: 313371
loss: 1.0042593479156494,grad_norm: 0.9376547825930839, iteration: 313372
loss: 1.038780927658081,grad_norm: 0.8851466621532432, iteration: 313373
loss: 0.9859597086906433,grad_norm: 0.8468601586841591, iteration: 313374
loss: 0.9861046075820923,grad_norm: 0.707824578002126, iteration: 313375
loss: 0.9947705864906311,grad_norm: 0.9121135739926627, iteration: 313376
loss: 0.9886797070503235,grad_norm: 0.7989821231924201, iteration: 313377
loss: 0.977188766002655,grad_norm: 0.999999142736966, iteration: 313378
loss: 0.986935019493103,grad_norm: 0.8163814097309892, iteration: 313379
loss: 0.9654368758201599,grad_norm: 0.9999993937551755, iteration: 313380
loss: 1.0242878198623657,grad_norm: 0.8215464295331427, iteration: 313381
loss: 0.9887976050376892,grad_norm: 0.9999994086627424, iteration: 313382
loss: 1.0051023960113525,grad_norm: 0.9719286431784941, iteration: 313383
loss: 1.0925393104553223,grad_norm: 0.8724795512218207, iteration: 313384
loss: 0.9998456835746765,grad_norm: 0.999999632637129, iteration: 313385
loss: 1.0173349380493164,grad_norm: 0.8744004880504496, iteration: 313386
loss: 1.01305091381073,grad_norm: 0.8084150666851483, iteration: 313387
loss: 1.0297200679779053,grad_norm: 0.9111677131873227, iteration: 313388
loss: 0.9959934949874878,grad_norm: 0.7460278966480798, iteration: 313389
loss: 0.9660061597824097,grad_norm: 0.9205374340195204, iteration: 313390
loss: 1.0103425979614258,grad_norm: 0.8792114970197784, iteration: 313391
loss: 1.001242756843567,grad_norm: 0.9999992975649252, iteration: 313392
loss: 1.0250706672668457,grad_norm: 0.8811559905070532, iteration: 313393
loss: 1.004637360572815,grad_norm: 0.8066147515669637, iteration: 313394
loss: 1.121710181236267,grad_norm: 0.9205050961323028, iteration: 313395
loss: 1.070829153060913,grad_norm: 0.9999994081678825, iteration: 313396
loss: 1.0007131099700928,grad_norm: 0.8401672683752086, iteration: 313397
loss: 0.9936560988426208,grad_norm: 0.8189403318604929, iteration: 313398
loss: 0.9842925071716309,grad_norm: 0.9286037063425217, iteration: 313399
loss: 0.9840479493141174,grad_norm: 0.6865042644137258, iteration: 313400
loss: 0.9993968605995178,grad_norm: 0.7929569525518648, iteration: 313401
loss: 1.0034549236297607,grad_norm: 0.7790585678335805, iteration: 313402
loss: 1.0789240598678589,grad_norm: 0.999999265611279, iteration: 313403
loss: 1.0524747371673584,grad_norm: 0.9999992389878128, iteration: 313404
loss: 1.045230507850647,grad_norm: 0.9569565766793773, iteration: 313405
loss: 1.0594950914382935,grad_norm: 0.9999990371808477, iteration: 313406
loss: 0.9698513746261597,grad_norm: 0.999999851695622, iteration: 313407
loss: 0.9632648825645447,grad_norm: 0.8219406275093736, iteration: 313408
loss: 1.0096007585525513,grad_norm: 0.981182875883593, iteration: 313409
loss: 0.9973849654197693,grad_norm: 0.8475101918394505, iteration: 313410
loss: 1.0479016304016113,grad_norm: 0.9999993203303862, iteration: 313411
loss: 1.0937851667404175,grad_norm: 0.999999658159652, iteration: 313412
loss: 1.1295814514160156,grad_norm: 0.9999999701341198, iteration: 313413
loss: 1.275030255317688,grad_norm: 0.9999997634515084, iteration: 313414
loss: 1.0300989151000977,grad_norm: 0.8964917894267443, iteration: 313415
loss: 0.9704102873802185,grad_norm: 0.859883335097878, iteration: 313416
loss: 1.4483914375305176,grad_norm: 0.9999999148083888, iteration: 313417
loss: 1.2146342992782593,grad_norm: 0.9999991682989281, iteration: 313418
loss: 1.059110164642334,grad_norm: 0.9999990341628848, iteration: 313419
loss: 1.0865989923477173,grad_norm: 0.9999991052590042, iteration: 313420
loss: 1.0168486833572388,grad_norm: 0.758086008535406, iteration: 313421
loss: 1.002562403678894,grad_norm: 0.8102492648962486, iteration: 313422
loss: 1.0378565788269043,grad_norm: 0.999999666208479, iteration: 313423
loss: 1.0118401050567627,grad_norm: 0.792872413908644, iteration: 313424
loss: 1.0907397270202637,grad_norm: 0.9999994822794688, iteration: 313425
loss: 1.0011452436447144,grad_norm: 0.8476442443296933, iteration: 313426
loss: 0.9976987838745117,grad_norm: 0.9215291503643834, iteration: 313427
loss: 1.1074554920196533,grad_norm: 0.9999998522234986, iteration: 313428
loss: 0.9773691892623901,grad_norm: 0.8479456163103434, iteration: 313429
loss: 0.9817010164260864,grad_norm: 0.8808983233537108, iteration: 313430
loss: 1.3358807563781738,grad_norm: 0.9999999208272513, iteration: 313431
loss: 1.0636966228485107,grad_norm: 0.7727405829126905, iteration: 313432
loss: 0.9882462620735168,grad_norm: 0.9300713367805888, iteration: 313433
loss: 1.0189752578735352,grad_norm: 0.9122886444718208, iteration: 313434
loss: 1.0394909381866455,grad_norm: 0.8198007726995323, iteration: 313435
loss: 1.1703499555587769,grad_norm: 0.9999992830812964, iteration: 313436
loss: 1.0491338968276978,grad_norm: 0.8529111376119342, iteration: 313437
loss: 1.006308913230896,grad_norm: 0.9999997792966983, iteration: 313438
loss: 1.0893651247024536,grad_norm: 0.9999993096855512, iteration: 313439
loss: 1.341779112815857,grad_norm: 0.9999999831923714, iteration: 313440
loss: 1.0474846363067627,grad_norm: 0.9999998949477372, iteration: 313441
loss: 1.1519355773925781,grad_norm: 0.999999794665104, iteration: 313442
loss: 1.0457741022109985,grad_norm: 0.9999999317208857, iteration: 313443
loss: 0.9928823113441467,grad_norm: 0.9999991256957416, iteration: 313444
loss: 1.0999784469604492,grad_norm: 0.9999999742558793, iteration: 313445
loss: 1.0633267164230347,grad_norm: 0.999999526113315, iteration: 313446
loss: 1.03084397315979,grad_norm: 0.9999999442306605, iteration: 313447
loss: 1.1517573595046997,grad_norm: 0.9999992960164166, iteration: 313448
loss: 1.0812331438064575,grad_norm: 0.9999999038714644, iteration: 313449
loss: 1.0689563751220703,grad_norm: 0.999999892806081, iteration: 313450
loss: 1.0132204294204712,grad_norm: 0.9433818252495119, iteration: 313451
loss: 0.9989824891090393,grad_norm: 0.7811775981119883, iteration: 313452
loss: 1.0291781425476074,grad_norm: 0.7763673785137166, iteration: 313453
loss: 1.035810947418213,grad_norm: 0.7173932809881829, iteration: 313454
loss: 1.066078543663025,grad_norm: 0.8821034321847874, iteration: 313455
loss: 1.0007436275482178,grad_norm: 0.999999021220921, iteration: 313456
loss: 0.9984430074691772,grad_norm: 0.8612885780061573, iteration: 313457
loss: 1.0345470905303955,grad_norm: 0.7547257957697164, iteration: 313458
loss: 1.0011727809906006,grad_norm: 0.8430369572757438, iteration: 313459
loss: 1.002487301826477,grad_norm: 0.999999031821432, iteration: 313460
loss: 1.0445191860198975,grad_norm: 0.9570835557150502, iteration: 313461
loss: 1.0095880031585693,grad_norm: 0.9999991220082598, iteration: 313462
loss: 1.0705173015594482,grad_norm: 0.9425914230277964, iteration: 313463
loss: 1.046073317527771,grad_norm: 0.9873792974059824, iteration: 313464
loss: 1.0242350101470947,grad_norm: 0.9583695267655985, iteration: 313465
loss: 1.044248104095459,grad_norm: 0.9172131133157087, iteration: 313466
loss: 0.9845162034034729,grad_norm: 0.8054655355491389, iteration: 313467
loss: 1.0294853448867798,grad_norm: 0.9999991032652282, iteration: 313468
loss: 0.9754554629325867,grad_norm: 0.8086203633424134, iteration: 313469
loss: 0.9920878410339355,grad_norm: 0.8329024678069368, iteration: 313470
loss: 0.9792941212654114,grad_norm: 0.9822498700163544, iteration: 313471
loss: 1.0217957496643066,grad_norm: 0.7801814198294998, iteration: 313472
loss: 0.9805654287338257,grad_norm: 0.755208131207594, iteration: 313473
loss: 0.992676317691803,grad_norm: 0.7227092966558336, iteration: 313474
loss: 1.010300636291504,grad_norm: 0.8858572457242405, iteration: 313475
loss: 1.016485571861267,grad_norm: 0.9999990485837951, iteration: 313476
loss: 1.0155260562896729,grad_norm: 0.9308154060210081, iteration: 313477
loss: 1.0147055387496948,grad_norm: 0.784906642140243, iteration: 313478
loss: 1.0269790887832642,grad_norm: 0.9084185397988601, iteration: 313479
loss: 0.9966478943824768,grad_norm: 0.9914541027695889, iteration: 313480
loss: 1.0400840044021606,grad_norm: 0.9900770575416376, iteration: 313481
loss: 1.0309916734695435,grad_norm: 0.9999996132849546, iteration: 313482
loss: 1.0142182111740112,grad_norm: 0.8057126357674931, iteration: 313483
loss: 1.0101758241653442,grad_norm: 0.6628478290006248, iteration: 313484
loss: 0.990186870098114,grad_norm: 0.9536010067042189, iteration: 313485
loss: 0.963877260684967,grad_norm: 0.9133528420046853, iteration: 313486
loss: 0.9813027381896973,grad_norm: 0.9999992105042323, iteration: 313487
loss: 1.1184266805648804,grad_norm: 0.9999996726697795, iteration: 313488
loss: 0.9730349779129028,grad_norm: 0.8760298534056883, iteration: 313489
loss: 1.0294325351715088,grad_norm: 0.999999171919449, iteration: 313490
loss: 0.9860232472419739,grad_norm: 0.8517115276100058, iteration: 313491
loss: 1.0091580152511597,grad_norm: 0.9827552790434899, iteration: 313492
loss: 0.9958910942077637,grad_norm: 0.7086585269827094, iteration: 313493
loss: 1.0239794254302979,grad_norm: 0.727563431652045, iteration: 313494
loss: 1.034925937652588,grad_norm: 0.7973138882782544, iteration: 313495
loss: 0.9760441184043884,grad_norm: 0.9223508897666048, iteration: 313496
loss: 0.99216228723526,grad_norm: 0.9999998176971123, iteration: 313497
loss: 1.0650895833969116,grad_norm: 0.9999993971201656, iteration: 313498
loss: 0.9698899984359741,grad_norm: 0.9590518801948262, iteration: 313499
loss: 0.9878422617912292,grad_norm: 0.7766714808885727, iteration: 313500
loss: 1.1112332344055176,grad_norm: 0.99999957407486, iteration: 313501
loss: 0.9900939464569092,grad_norm: 0.8294456907332609, iteration: 313502
loss: 0.9894677400588989,grad_norm: 0.9340154261232286, iteration: 313503
loss: 1.0134170055389404,grad_norm: 0.9668567362871745, iteration: 313504
loss: 1.0762137174606323,grad_norm: 0.9999992250051005, iteration: 313505
loss: 1.0092010498046875,grad_norm: 0.8259611839649385, iteration: 313506
loss: 1.0009599924087524,grad_norm: 0.8947093888800542, iteration: 313507
loss: 1.1049250364303589,grad_norm: 0.999999603568257, iteration: 313508
loss: 1.0121854543685913,grad_norm: 0.857201944659724, iteration: 313509
loss: 1.0065131187438965,grad_norm: 0.9091927590807255, iteration: 313510
loss: 1.077530026435852,grad_norm: 0.9999991203649935, iteration: 313511
loss: 1.0971319675445557,grad_norm: 0.7401136938726418, iteration: 313512
loss: 1.016444206237793,grad_norm: 0.6981258104532715, iteration: 313513
loss: 0.9964532256126404,grad_norm: 0.9087343034883993, iteration: 313514
loss: 0.9718507528305054,grad_norm: 0.9999995527842691, iteration: 313515
loss: 0.9892758727073669,grad_norm: 0.8905749544333178, iteration: 313516
loss: 0.9809973835945129,grad_norm: 0.8694451136918531, iteration: 313517
loss: 1.0128360986709595,grad_norm: 0.7976528842619226, iteration: 313518
loss: 1.0016101598739624,grad_norm: 0.9999990265676911, iteration: 313519
loss: 1.015494704246521,grad_norm: 0.8191214143854597, iteration: 313520
loss: 0.9944420456886292,grad_norm: 0.7370558536350805, iteration: 313521
loss: 0.9832436442375183,grad_norm: 0.7740449024113957, iteration: 313522
loss: 0.9860734939575195,grad_norm: 0.8903534372942183, iteration: 313523
loss: 0.9830828309059143,grad_norm: 0.9999998171438943, iteration: 313524
loss: 1.0703743696212769,grad_norm: 0.888104201822287, iteration: 313525
loss: 1.012412428855896,grad_norm: 0.826186734078105, iteration: 313526
loss: 1.0221662521362305,grad_norm: 0.9999993527156636, iteration: 313527
loss: 0.9857942461967468,grad_norm: 0.9999989920574893, iteration: 313528
loss: 0.9881784319877625,grad_norm: 0.888178177157204, iteration: 313529
loss: 0.989201545715332,grad_norm: 0.9710020754299138, iteration: 313530
loss: 1.0707647800445557,grad_norm: 0.9999995086746934, iteration: 313531
loss: 0.9918038249015808,grad_norm: 0.8550003785695622, iteration: 313532
loss: 1.0019131898880005,grad_norm: 0.8811713642166421, iteration: 313533
loss: 1.0014190673828125,grad_norm: 0.9298349998984733, iteration: 313534
loss: 1.0313454866409302,grad_norm: 0.8893984825456086, iteration: 313535
loss: 1.0320359468460083,grad_norm: 0.9006438055365197, iteration: 313536
loss: 1.0021134614944458,grad_norm: 0.8770088717011115, iteration: 313537
loss: 0.9760542511940002,grad_norm: 0.9999995148168725, iteration: 313538
loss: 0.9722726345062256,grad_norm: 0.7995503990393342, iteration: 313539
loss: 1.0033138990402222,grad_norm: 0.8249492202575764, iteration: 313540
loss: 0.9719229936599731,grad_norm: 0.9742677553664442, iteration: 313541
loss: 0.9649420380592346,grad_norm: 0.7897561007185785, iteration: 313542
loss: 0.9972304105758667,grad_norm: 0.8210684321835763, iteration: 313543
loss: 1.0120285749435425,grad_norm: 0.8584055757108598, iteration: 313544
loss: 1.0073974132537842,grad_norm: 0.9208323873445476, iteration: 313545
loss: 1.0173577070236206,grad_norm: 0.9337694627517051, iteration: 313546
loss: 1.0189553499221802,grad_norm: 0.8922413075928862, iteration: 313547
loss: 0.9741125106811523,grad_norm: 0.8022549687058304, iteration: 313548
loss: 1.0161471366882324,grad_norm: 0.9308783784338307, iteration: 313549
loss: 0.983178436756134,grad_norm: 0.8658113907889378, iteration: 313550
loss: 0.9738386273384094,grad_norm: 0.8009866581852482, iteration: 313551
loss: 1.0059622526168823,grad_norm: 0.7854511749389319, iteration: 313552
loss: 1.0289591550827026,grad_norm: 0.9180890067567736, iteration: 313553
loss: 0.9988147020339966,grad_norm: 0.9841858782754653, iteration: 313554
loss: 1.0204108953475952,grad_norm: 0.8624949567226213, iteration: 313555
loss: 1.0033833980560303,grad_norm: 0.9941335985553476, iteration: 313556
loss: 1.0274866819381714,grad_norm: 0.7502570945131408, iteration: 313557
loss: 1.0094388723373413,grad_norm: 0.739588608116359, iteration: 313558
loss: 1.015550971031189,grad_norm: 0.947062998642179, iteration: 313559
loss: 0.9822124242782593,grad_norm: 0.8899817509983124, iteration: 313560
loss: 0.9601557850837708,grad_norm: 0.9165239660061963, iteration: 313561
loss: 0.9879235029220581,grad_norm: 0.9344374769951893, iteration: 313562
loss: 1.0345538854599,grad_norm: 0.9999990940316631, iteration: 313563
loss: 1.013024926185608,grad_norm: 0.9999992461515821, iteration: 313564
loss: 1.014367938041687,grad_norm: 0.8603832299963572, iteration: 313565
loss: 0.9894015789031982,grad_norm: 0.9999991564592307, iteration: 313566
loss: 0.9881805777549744,grad_norm: 0.8733832679506148, iteration: 313567
loss: 0.9711501598358154,grad_norm: 0.7972727022840643, iteration: 313568
loss: 1.0032414197921753,grad_norm: 0.8968185911493111, iteration: 313569
loss: 0.9405975937843323,grad_norm: 0.85184840242558, iteration: 313570
loss: 1.0144646167755127,grad_norm: 0.9849895738510978, iteration: 313571
loss: 1.0312626361846924,grad_norm: 0.797111529233478, iteration: 313572
loss: 0.9819372296333313,grad_norm: 0.8581704162737276, iteration: 313573
loss: 0.9908721446990967,grad_norm: 0.999999118761327, iteration: 313574
loss: 1.0050420761108398,grad_norm: 0.8362670270546875, iteration: 313575
loss: 0.9665853381156921,grad_norm: 0.8748972482394665, iteration: 313576
loss: 1.022566556930542,grad_norm: 0.999999564028786, iteration: 313577
loss: 1.003415584564209,grad_norm: 0.8941090067449761, iteration: 313578
loss: 1.0265984535217285,grad_norm: 0.9040508454650941, iteration: 313579
loss: 0.9928414821624756,grad_norm: 0.8105963127871904, iteration: 313580
loss: 1.0279046297073364,grad_norm: 0.8051595391181078, iteration: 313581
loss: 1.0226508378982544,grad_norm: 0.9301019254792698, iteration: 313582
loss: 1.007924199104309,grad_norm: 0.8761851488483839, iteration: 313583
loss: 0.9951607584953308,grad_norm: 0.8093866459405988, iteration: 313584
loss: 0.9960574507713318,grad_norm: 0.7969156441505668, iteration: 313585
loss: 1.0334255695343018,grad_norm: 0.7906138741301715, iteration: 313586
loss: 1.0109715461730957,grad_norm: 0.7960893240902672, iteration: 313587
loss: 1.0107773542404175,grad_norm: 0.9004058473631191, iteration: 313588
loss: 1.0060962438583374,grad_norm: 0.7173267125166544, iteration: 313589
loss: 0.9938674569129944,grad_norm: 0.9121021684127909, iteration: 313590
loss: 1.0413949489593506,grad_norm: 0.7880094958620344, iteration: 313591
loss: 1.0164965391159058,grad_norm: 0.7465181002637595, iteration: 313592
loss: 0.9947783946990967,grad_norm: 0.7365192549573321, iteration: 313593
loss: 1.0176466703414917,grad_norm: 0.7816910748605924, iteration: 313594
loss: 1.0260212421417236,grad_norm: 0.9999990649954364, iteration: 313595
loss: 1.0849565267562866,grad_norm: 0.9999998656242626, iteration: 313596
loss: 0.9689421057701111,grad_norm: 0.7443347033492682, iteration: 313597
loss: 0.9794440865516663,grad_norm: 0.9501090580753131, iteration: 313598
loss: 1.0317543745040894,grad_norm: 0.971157186759476, iteration: 313599
loss: 1.0108484029769897,grad_norm: 0.9170286716804251, iteration: 313600
loss: 1.0145554542541504,grad_norm: 0.9338458210653934, iteration: 313601
loss: 0.9951140284538269,grad_norm: 0.87924694690672, iteration: 313602
loss: 1.0055615901947021,grad_norm: 0.7797064594014712, iteration: 313603
loss: 0.9326394200325012,grad_norm: 0.9999990950693712, iteration: 313604
loss: 0.9950771927833557,grad_norm: 0.8586689921027916, iteration: 313605
loss: 0.9918789863586426,grad_norm: 0.8421638070358712, iteration: 313606
loss: 0.9834080338478088,grad_norm: 0.992843986339622, iteration: 313607
loss: 0.998917818069458,grad_norm: 0.9887370446617106, iteration: 313608
loss: 1.0082910060882568,grad_norm: 0.7965122686619216, iteration: 313609
loss: 1.0658247470855713,grad_norm: 0.9999995905739618, iteration: 313610
loss: 0.9762356877326965,grad_norm: 0.7986653175156877, iteration: 313611
loss: 0.9886400103569031,grad_norm: 0.9065855012348412, iteration: 313612
loss: 1.0124198198318481,grad_norm: 0.9153003518894315, iteration: 313613
loss: 1.012446641921997,grad_norm: 0.9105910140007427, iteration: 313614
loss: 0.9721418619155884,grad_norm: 0.7216491383505643, iteration: 313615
loss: 0.9898133873939514,grad_norm: 0.7908051633501944, iteration: 313616
loss: 0.9778901934623718,grad_norm: 0.8239751422170131, iteration: 313617
loss: 1.0092196464538574,grad_norm: 0.9999992198197263, iteration: 313618
loss: 0.9889313578605652,grad_norm: 0.9999988695965023, iteration: 313619
loss: 1.0846002101898193,grad_norm: 0.8544066822215208, iteration: 313620
loss: 1.0015162229537964,grad_norm: 0.999999113181628, iteration: 313621
loss: 1.0349962711334229,grad_norm: 0.9999998705070813, iteration: 313622
loss: 1.0236495733261108,grad_norm: 0.8078825269066724, iteration: 313623
loss: 1.0133644342422485,grad_norm: 0.8714246286388089, iteration: 313624
loss: 1.0169062614440918,grad_norm: 0.7972794701708855, iteration: 313625
loss: 0.95817631483078,grad_norm: 0.8529816969339785, iteration: 313626
loss: 1.0427131652832031,grad_norm: 0.8635435373236254, iteration: 313627
loss: 1.0888923406600952,grad_norm: 0.8142674513377764, iteration: 313628
loss: 0.9718027710914612,grad_norm: 0.877287242818287, iteration: 313629
loss: 1.067360520362854,grad_norm: 0.8250277974907562, iteration: 313630
loss: 1.0393673181533813,grad_norm: 0.9015906401577399, iteration: 313631
loss: 0.9530966281890869,grad_norm: 0.8992741376564234, iteration: 313632
loss: 1.0225446224212646,grad_norm: 0.842154917595743, iteration: 313633
loss: 1.0091427564620972,grad_norm: 0.8596759647607957, iteration: 313634
loss: 0.9918126463890076,grad_norm: 0.9285381793956368, iteration: 313635
loss: 0.9536087512969971,grad_norm: 0.7757127719125019, iteration: 313636
loss: 1.014737606048584,grad_norm: 0.7350002119866401, iteration: 313637
loss: 1.009384036064148,grad_norm: 0.999999113280217, iteration: 313638
loss: 1.0332417488098145,grad_norm: 0.8129677484674885, iteration: 313639
loss: 0.9622036814689636,grad_norm: 0.7494891474785743, iteration: 313640
loss: 1.0008771419525146,grad_norm: 0.7971405646076547, iteration: 313641
loss: 0.9833312630653381,grad_norm: 0.8463751114637192, iteration: 313642
loss: 1.014387845993042,grad_norm: 0.8002045577635427, iteration: 313643
loss: 1.0344722270965576,grad_norm: 0.9999998166230726, iteration: 313644
loss: 0.9891172647476196,grad_norm: 0.8787337756215078, iteration: 313645
loss: 1.0292166471481323,grad_norm: 0.6773911388677385, iteration: 313646
loss: 1.0414106845855713,grad_norm: 0.9074583582077687, iteration: 313647
loss: 0.957073450088501,grad_norm: 0.7386663846427717, iteration: 313648
loss: 0.9603009223937988,grad_norm: 0.9382305638001525, iteration: 313649
loss: 0.980553150177002,grad_norm: 0.7612801719981769, iteration: 313650
loss: 0.9786409735679626,grad_norm: 0.8664984140698369, iteration: 313651
loss: 0.9888027906417847,grad_norm: 0.782217228032967, iteration: 313652
loss: 1.0275936126708984,grad_norm: 0.7667262055052291, iteration: 313653
loss: 0.9939826130867004,grad_norm: 0.9379724753423235, iteration: 313654
loss: 1.0157579183578491,grad_norm: 0.9211003449747475, iteration: 313655
loss: 1.0027751922607422,grad_norm: 0.8582587890461205, iteration: 313656
loss: 1.1379376649856567,grad_norm: 0.9953390240445158, iteration: 313657
loss: 0.9729262590408325,grad_norm: 0.7678573118785288, iteration: 313658
loss: 0.9938507080078125,grad_norm: 0.8608744149740366, iteration: 313659
loss: 0.9549409747123718,grad_norm: 0.717140621334867, iteration: 313660
loss: 1.1132653951644897,grad_norm: 0.9525413401427096, iteration: 313661
loss: 0.9889603853225708,grad_norm: 0.8460591674471243, iteration: 313662
loss: 0.957389235496521,grad_norm: 0.9266486036165922, iteration: 313663
loss: 0.9759182929992676,grad_norm: 0.9439816154650551, iteration: 313664
loss: 0.9985707998275757,grad_norm: 0.8304952524224007, iteration: 313665
loss: 0.9738432765007019,grad_norm: 0.791567134775128, iteration: 313666
loss: 1.0881832838058472,grad_norm: 0.9999996134913406, iteration: 313667
loss: 1.0298857688903809,grad_norm: 0.9999993376057678, iteration: 313668
loss: 1.0174124240875244,grad_norm: 0.8078569532578965, iteration: 313669
loss: 1.0410534143447876,grad_norm: 0.9999992284137847, iteration: 313670
loss: 1.009537696838379,grad_norm: 0.810176155975389, iteration: 313671
loss: 1.1419175863265991,grad_norm: 0.9999994173221373, iteration: 313672
loss: 0.9946749210357666,grad_norm: 0.8846344660673577, iteration: 313673
loss: 1.027741551399231,grad_norm: 0.8849409929365176, iteration: 313674
loss: 0.9830905795097351,grad_norm: 0.8673509117599002, iteration: 313675
loss: 1.0266965627670288,grad_norm: 0.9207016849425621, iteration: 313676
loss: 1.0036852359771729,grad_norm: 0.8515443395157265, iteration: 313677
loss: 0.9893348813056946,grad_norm: 0.8757260231140409, iteration: 313678
loss: 1.0113255977630615,grad_norm: 0.7427240270083224, iteration: 313679
loss: 1.0036784410476685,grad_norm: 0.7892772671749554, iteration: 313680
loss: 0.968065083026886,grad_norm: 0.8972867919018586, iteration: 313681
loss: 1.0016034841537476,grad_norm: 0.8842339041544627, iteration: 313682
loss: 0.9850918054580688,grad_norm: 0.812167481224662, iteration: 313683
loss: 1.004660964012146,grad_norm: 0.9197697790587978, iteration: 313684
loss: 1.0541770458221436,grad_norm: 0.9999999590442806, iteration: 313685
loss: 0.9954968094825745,grad_norm: 0.9004168952692437, iteration: 313686
loss: 0.9959126710891724,grad_norm: 0.948138443408131, iteration: 313687
loss: 1.2227349281311035,grad_norm: 0.9999996219932171, iteration: 313688
loss: 1.00636625289917,grad_norm: 0.9426633059381732, iteration: 313689
loss: 1.0185816287994385,grad_norm: 0.9102390459617806, iteration: 313690
loss: 0.9719952344894409,grad_norm: 0.909417004047462, iteration: 313691
loss: 0.9784169793128967,grad_norm: 0.7911940180776155, iteration: 313692
loss: 1.1318501234054565,grad_norm: 0.9999997645893915, iteration: 313693
loss: 1.0176295042037964,grad_norm: 0.8448195768703707, iteration: 313694
loss: 1.0000516176223755,grad_norm: 0.7020248154778398, iteration: 313695
loss: 1.0248959064483643,grad_norm: 0.7935663631227179, iteration: 313696
loss: 1.0362491607666016,grad_norm: 0.9999993823217173, iteration: 313697
loss: 0.970947265625,grad_norm: 0.7881965452973412, iteration: 313698
loss: 1.0208756923675537,grad_norm: 0.918787585728701, iteration: 313699
loss: 1.0171842575073242,grad_norm: 0.8209438951523207, iteration: 313700
loss: 0.9886943697929382,grad_norm: 0.7601617293443804, iteration: 313701
loss: 0.9954763054847717,grad_norm: 0.801825444080894, iteration: 313702
loss: 1.0382033586502075,grad_norm: 0.9999990928576843, iteration: 313703
loss: 1.037979006767273,grad_norm: 0.7695375654172621, iteration: 313704
loss: 1.0058283805847168,grad_norm: 0.8330520106010884, iteration: 313705
loss: 1.0233259201049805,grad_norm: 0.8961528321341737, iteration: 313706
loss: 1.04483163356781,grad_norm: 0.9999990629724657, iteration: 313707
loss: 1.0182666778564453,grad_norm: 0.8612782061071483, iteration: 313708
loss: 1.0269304513931274,grad_norm: 0.866791677352626, iteration: 313709
loss: 1.0459434986114502,grad_norm: 0.9713341757632773, iteration: 313710
loss: 1.0142772197723389,grad_norm: 0.6802588761709047, iteration: 313711
loss: 1.0425000190734863,grad_norm: 0.9999993497436237, iteration: 313712
loss: 0.9809242486953735,grad_norm: 0.8556473551063538, iteration: 313713
loss: 0.9981949925422668,grad_norm: 0.8751080972246512, iteration: 313714
loss: 1.0148322582244873,grad_norm: 0.7143500918201091, iteration: 313715
loss: 1.0058413743972778,grad_norm: 0.8229522676175943, iteration: 313716
loss: 0.9623112678527832,grad_norm: 0.9764562311926135, iteration: 313717
loss: 0.9879909753799438,grad_norm: 0.8406587878222241, iteration: 313718
loss: 1.0189489126205444,grad_norm: 0.6929849428898902, iteration: 313719
loss: 1.0213485956192017,grad_norm: 0.9999992697779948, iteration: 313720
loss: 1.032414197921753,grad_norm: 0.8540129811435329, iteration: 313721
loss: 1.0135161876678467,grad_norm: 0.999999331370753, iteration: 313722
loss: 1.0307583808898926,grad_norm: 0.9823374549963608, iteration: 313723
loss: 1.019823670387268,grad_norm: 0.9292175432328856, iteration: 313724
loss: 1.0050355195999146,grad_norm: 0.8225023092886801, iteration: 313725
loss: 0.9935302734375,grad_norm: 0.7981443446025279, iteration: 313726
loss: 1.0111138820648193,grad_norm: 0.7431857736250423, iteration: 313727
loss: 0.994723916053772,grad_norm: 0.7759211275599351, iteration: 313728
loss: 1.0388610363006592,grad_norm: 0.965754868738389, iteration: 313729
loss: 0.9751826524734497,grad_norm: 0.8868232840199913, iteration: 313730
loss: 0.9922829866409302,grad_norm: 0.9999989372908987, iteration: 313731
loss: 1.039198637008667,grad_norm: 0.9312610063346525, iteration: 313732
loss: 0.9937570691108704,grad_norm: 0.9247964296372039, iteration: 313733
loss: 1.0006836652755737,grad_norm: 0.9999991405764227, iteration: 313734
loss: 1.0385980606079102,grad_norm: 0.9206761213919424, iteration: 313735
loss: 0.9979574680328369,grad_norm: 0.9218617689908357, iteration: 313736
loss: 1.0167051553726196,grad_norm: 0.8101601760009028, iteration: 313737
loss: 1.0636277198791504,grad_norm: 0.740439172094665, iteration: 313738
loss: 1.0037897825241089,grad_norm: 0.6938591794325716, iteration: 313739
loss: 0.9947154521942139,grad_norm: 0.89533564517804, iteration: 313740
loss: 0.9806343913078308,grad_norm: 0.8180604028192112, iteration: 313741
loss: 1.0098090171813965,grad_norm: 0.6363241237503835, iteration: 313742
loss: 1.020567536354065,grad_norm: 0.8303453947079431, iteration: 313743
loss: 0.9678188562393188,grad_norm: 0.9248952770668731, iteration: 313744
loss: 1.0079525709152222,grad_norm: 0.9222343062368357, iteration: 313745
loss: 1.0082197189331055,grad_norm: 0.8648711812479555, iteration: 313746
loss: 1.0126044750213623,grad_norm: 0.87887077456011, iteration: 313747
loss: 0.996963381767273,grad_norm: 0.9097760228645649, iteration: 313748
loss: 1.0361196994781494,grad_norm: 0.8880260288008708, iteration: 313749
loss: 1.0849567651748657,grad_norm: 0.9999990533920378, iteration: 313750
loss: 1.0340327024459839,grad_norm: 0.8979566304894527, iteration: 313751
loss: 1.0627615451812744,grad_norm: 0.9825167861346313, iteration: 313752
loss: 1.0130583047866821,grad_norm: 0.9285616553344149, iteration: 313753
loss: 1.0028411149978638,grad_norm: 0.9999996090375741, iteration: 313754
loss: 0.9945592284202576,grad_norm: 0.7815159898340397, iteration: 313755
loss: 0.9999069571495056,grad_norm: 0.8491170415409722, iteration: 313756
loss: 0.9952643513679504,grad_norm: 0.7186294590804437, iteration: 313757
loss: 0.9805809259414673,grad_norm: 0.847732090516142, iteration: 313758
loss: 1.0254515409469604,grad_norm: 0.8771378578515855, iteration: 313759
loss: 1.0092803239822388,grad_norm: 0.9999990270295174, iteration: 313760
loss: 1.0222266912460327,grad_norm: 0.8860607259004216, iteration: 313761
loss: 1.0101232528686523,grad_norm: 0.8597232767003502, iteration: 313762
loss: 0.9797238707542419,grad_norm: 0.7832088666526195, iteration: 313763
loss: 1.0102314949035645,grad_norm: 0.9502809821107824, iteration: 313764
loss: 0.96448814868927,grad_norm: 0.8878270211558047, iteration: 313765
loss: 0.9835271835327148,grad_norm: 0.8223538741221924, iteration: 313766
loss: 0.9780566692352295,grad_norm: 0.9999992222779114, iteration: 313767
loss: 1.0056004524230957,grad_norm: 0.9742823149263559, iteration: 313768
loss: 1.0102744102478027,grad_norm: 0.9165064480038249, iteration: 313769
loss: 1.0102598667144775,grad_norm: 0.8626449534391996, iteration: 313770
loss: 1.0246556997299194,grad_norm: 0.999999453470923, iteration: 313771
loss: 0.9955570697784424,grad_norm: 0.7501448758155133, iteration: 313772
loss: 1.013685703277588,grad_norm: 0.7294606638129965, iteration: 313773
loss: 0.9872789978981018,grad_norm: 0.7860235808490591, iteration: 313774
loss: 0.9851261377334595,grad_norm: 0.9999992192543993, iteration: 313775
loss: 1.0198476314544678,grad_norm: 0.8958663418163251, iteration: 313776
loss: 1.0329298973083496,grad_norm: 0.8089084048534457, iteration: 313777
loss: 0.982146143913269,grad_norm: 0.9999994150900651, iteration: 313778
loss: 0.9986581802368164,grad_norm: 0.906530772036534, iteration: 313779
loss: 0.9916989803314209,grad_norm: 0.7536960946547124, iteration: 313780
loss: 0.9698434472084045,grad_norm: 0.8998645072225011, iteration: 313781
loss: 0.980536162853241,grad_norm: 0.8271946477791767, iteration: 313782
loss: 1.0304466485977173,grad_norm: 0.9961112287614099, iteration: 313783
loss: 0.9985505938529968,grad_norm: 0.8119549541294523, iteration: 313784
loss: 1.0107879638671875,grad_norm: 0.8477402215034381, iteration: 313785
loss: 1.0482995510101318,grad_norm: 0.9740226290155471, iteration: 313786
loss: 1.014959692955017,grad_norm: 0.9007025783671164, iteration: 313787
loss: 1.0023044347763062,grad_norm: 0.6898420539069284, iteration: 313788
loss: 0.9771101474761963,grad_norm: 0.8436305788515572, iteration: 313789
loss: 0.9969468712806702,grad_norm: 0.9503787006768857, iteration: 313790
loss: 1.0092039108276367,grad_norm: 0.753313299860097, iteration: 313791
loss: 1.006088376045227,grad_norm: 0.9023921031223867, iteration: 313792
loss: 1.0565694570541382,grad_norm: 0.9999998093042253, iteration: 313793
loss: 1.0043355226516724,grad_norm: 0.9999998912060706, iteration: 313794
loss: 1.0109846591949463,grad_norm: 0.9361217371999601, iteration: 313795
loss: 0.9871442914009094,grad_norm: 0.9790423122758877, iteration: 313796
loss: 0.9932554960250854,grad_norm: 0.9308495910446006, iteration: 313797
loss: 0.9457017779350281,grad_norm: 0.8007165877795553, iteration: 313798
loss: 0.9757221937179565,grad_norm: 0.8108568728636407, iteration: 313799
loss: 0.9996011853218079,grad_norm: 0.8302977323758541, iteration: 313800
loss: 0.9815709590911865,grad_norm: 0.7567936078279626, iteration: 313801
loss: 0.9591092467308044,grad_norm: 0.9611475329690339, iteration: 313802
loss: 1.0284388065338135,grad_norm: 0.9317156196944957, iteration: 313803
loss: 0.9718881249427795,grad_norm: 0.7777806912757887, iteration: 313804
loss: 1.0099117755889893,grad_norm: 0.7663900985092665, iteration: 313805
loss: 0.9960710406303406,grad_norm: 0.9748793091216944, iteration: 313806
loss: 1.0118314027786255,grad_norm: 0.8435400149823954, iteration: 313807
loss: 1.0206750631332397,grad_norm: 0.9578134553602689, iteration: 313808
loss: 1.0067776441574097,grad_norm: 0.9999997084536631, iteration: 313809
loss: 0.9733448028564453,grad_norm: 0.9116325511581298, iteration: 313810
loss: 1.06531822681427,grad_norm: 0.9999998576110468, iteration: 313811
loss: 0.9726514220237732,grad_norm: 0.8273338279073763, iteration: 313812
loss: 0.9976112246513367,grad_norm: 0.9849903681930261, iteration: 313813
loss: 0.9807514548301697,grad_norm: 0.7782572528094182, iteration: 313814
loss: 0.980879008769989,grad_norm: 0.9361493263653717, iteration: 313815
loss: 0.9521021842956543,grad_norm: 0.8236421198945761, iteration: 313816
loss: 1.0006399154663086,grad_norm: 0.7568501328461587, iteration: 313817
loss: 1.0261486768722534,grad_norm: 0.9999990628532043, iteration: 313818
loss: 1.0086218118667603,grad_norm: 0.8365117883104615, iteration: 313819
loss: 1.0960088968276978,grad_norm: 0.8830837151654072, iteration: 313820
loss: 0.9777940511703491,grad_norm: 0.7472148406520316, iteration: 313821
loss: 0.9983615279197693,grad_norm: 0.9052871415253043, iteration: 313822
loss: 0.9586927890777588,grad_norm: 0.9829137404222161, iteration: 313823
loss: 1.0195550918579102,grad_norm: 1.0000000046707134, iteration: 313824
loss: 1.0151621103286743,grad_norm: 0.817295890414651, iteration: 313825
loss: 1.0143442153930664,grad_norm: 0.9245762240759525, iteration: 313826
loss: 0.9723644256591797,grad_norm: 0.7155771267110708, iteration: 313827
loss: 1.039502501487732,grad_norm: 0.754769195756505, iteration: 313828
loss: 1.0135098695755005,grad_norm: 0.9069429547952454, iteration: 313829
loss: 0.9968138933181763,grad_norm: 0.7660013517987712, iteration: 313830
loss: 1.0117062330245972,grad_norm: 0.838689603555652, iteration: 313831
loss: 0.9947714805603027,grad_norm: 0.9737919135164278, iteration: 313832
loss: 0.9882530570030212,grad_norm: 0.9999991954426256, iteration: 313833
loss: 1.017799735069275,grad_norm: 0.8440949657905586, iteration: 313834
loss: 1.0101685523986816,grad_norm: 0.851942688577475, iteration: 313835
loss: 0.9989218711853027,grad_norm: 0.7510830958306131, iteration: 313836
loss: 1.0199803113937378,grad_norm: 0.9999995944458299, iteration: 313837
loss: 1.0712008476257324,grad_norm: 0.798852597150565, iteration: 313838
loss: 0.9913529753684998,grad_norm: 0.9604023934439492, iteration: 313839
loss: 1.0130348205566406,grad_norm: 0.7810828578742889, iteration: 313840
loss: 1.0207749605178833,grad_norm: 0.9999993420458129, iteration: 313841
loss: 0.9719738364219666,grad_norm: 0.8163220623938282, iteration: 313842
loss: 1.040170431137085,grad_norm: 0.999999315178881, iteration: 313843
loss: 1.0407801866531372,grad_norm: 0.9999991885675372, iteration: 313844
loss: 0.9975305199623108,grad_norm: 0.7688335672229842, iteration: 313845
loss: 1.0110608339309692,grad_norm: 0.8840574522365849, iteration: 313846
loss: 1.016828179359436,grad_norm: 0.9999991412188604, iteration: 313847
loss: 0.9612249135971069,grad_norm: 0.9327873048382664, iteration: 313848
loss: 0.9692051410675049,grad_norm: 0.8645634516932343, iteration: 313849
loss: 1.0149098634719849,grad_norm: 0.7924002853901052, iteration: 313850
loss: 1.0422778129577637,grad_norm: 0.9999990892005116, iteration: 313851
loss: 1.0205433368682861,grad_norm: 0.8820265933460749, iteration: 313852
loss: 0.9878689050674438,grad_norm: 0.9999993111268892, iteration: 313853
loss: 1.0447735786437988,grad_norm: 0.717454799752459, iteration: 313854
loss: 0.9661812782287598,grad_norm: 0.9878086285890324, iteration: 313855
loss: 0.9937118887901306,grad_norm: 0.9491272702736583, iteration: 313856
loss: 0.9920933246612549,grad_norm: 0.9978075465052026, iteration: 313857
loss: 1.0100504159927368,grad_norm: 0.7369402609229031, iteration: 313858
loss: 1.050264596939087,grad_norm: 0.9999997439134577, iteration: 313859
loss: 0.991648256778717,grad_norm: 0.8211567606412548, iteration: 313860
loss: 1.0111711025238037,grad_norm: 0.9999999723797205, iteration: 313861
loss: 1.0482865571975708,grad_norm: 0.9608428324039705, iteration: 313862
loss: 1.0183420181274414,grad_norm: 0.9999991157293117, iteration: 313863
loss: 0.968267560005188,grad_norm: 0.768356744933054, iteration: 313864
loss: 1.0249735116958618,grad_norm: 0.8688544796513837, iteration: 313865
loss: 1.0019546747207642,grad_norm: 0.9718128509759868, iteration: 313866
loss: 1.0183742046356201,grad_norm: 0.778448294833598, iteration: 313867
loss: 1.0506001710891724,grad_norm: 0.9999991035233262, iteration: 313868
loss: 0.9864665865898132,grad_norm: 0.761949366719123, iteration: 313869
loss: 1.002134919166565,grad_norm: 0.8026188971330948, iteration: 313870
loss: 1.0197925567626953,grad_norm: 0.9999997642164228, iteration: 313871
loss: 0.9709932804107666,grad_norm: 0.8360298015379427, iteration: 313872
loss: 0.9667473435401917,grad_norm: 0.8128858384610123, iteration: 313873
loss: 1.0088711977005005,grad_norm: 0.9036130746548006, iteration: 313874
loss: 0.9737308025360107,grad_norm: 0.8982685720143406, iteration: 313875
loss: 0.9764060974121094,grad_norm: 0.900554857707248, iteration: 313876
loss: 1.0743980407714844,grad_norm: 0.9999999327118808, iteration: 313877
loss: 1.1127675771713257,grad_norm: 0.9999997688901967, iteration: 313878
loss: 1.035093069076538,grad_norm: 0.6999390586703634, iteration: 313879
loss: 1.1437264680862427,grad_norm: 0.9999994212566659, iteration: 313880
loss: 1.191989779472351,grad_norm: 0.999999858925877, iteration: 313881
loss: 0.9650948643684387,grad_norm: 0.9171691189571206, iteration: 313882
loss: 1.0782064199447632,grad_norm: 0.981417910534906, iteration: 313883
loss: 1.020277500152588,grad_norm: 0.9999996880666369, iteration: 313884
loss: 1.0322058200836182,grad_norm: 0.9999990941631292, iteration: 313885
loss: 1.0767887830734253,grad_norm: 1.0000000585634243, iteration: 313886
loss: 1.5839958190917969,grad_norm: 0.9999995893867804, iteration: 313887
loss: 1.21247398853302,grad_norm: 0.9999992846821285, iteration: 313888
loss: 1.1078509092330933,grad_norm: 0.9999993181040747, iteration: 313889
loss: 1.2051061391830444,grad_norm: 0.9999997783917144, iteration: 313890
loss: 1.050688624382019,grad_norm: 0.8623112404622202, iteration: 313891
loss: 1.1664437055587769,grad_norm: 0.9999997688688881, iteration: 313892
loss: 1.1518945693969727,grad_norm: 0.9406657709364278, iteration: 313893
loss: 1.21147620677948,grad_norm: 0.999999667668963, iteration: 313894
loss: 1.3492748737335205,grad_norm: 0.9999998892636419, iteration: 313895
loss: 1.223881483078003,grad_norm: 0.9999993512522579, iteration: 313896
loss: 1.4389615058898926,grad_norm: 0.9999998856893866, iteration: 313897
loss: 0.9682559370994568,grad_norm: 0.9999991593248317, iteration: 313898
loss: 1.1242094039916992,grad_norm: 0.9999997074278534, iteration: 313899
loss: 1.0435497760772705,grad_norm: 0.9816532736810414, iteration: 313900
loss: 1.137276291847229,grad_norm: 0.9999991127776664, iteration: 313901
loss: 1.0890545845031738,grad_norm: 0.7182626689630038, iteration: 313902
loss: 1.1488847732543945,grad_norm: 0.9999993154259502, iteration: 313903
loss: 1.188725233078003,grad_norm: 0.9999997668027207, iteration: 313904
loss: 1.090195655822754,grad_norm: 0.9999992041605622, iteration: 313905
loss: 1.0756494998931885,grad_norm: 0.9999992312472066, iteration: 313906
loss: 1.0519609451293945,grad_norm: 0.9179373053623732, iteration: 313907
loss: 1.1831142902374268,grad_norm: 0.9999996293638204, iteration: 313908
loss: 1.058670163154602,grad_norm: 0.9999991709167839, iteration: 313909
loss: 1.2761619091033936,grad_norm: 0.9999992163300345, iteration: 313910
loss: 1.1247224807739258,grad_norm: 0.9999993085569976, iteration: 313911
loss: 1.1037557125091553,grad_norm: 0.9999994274361516, iteration: 313912
loss: 1.215898871421814,grad_norm: 0.9999997428224774, iteration: 313913
loss: 1.0385491847991943,grad_norm: 0.9832838630874325, iteration: 313914
loss: 1.4132636785507202,grad_norm: 0.9999996650198351, iteration: 313915
loss: 1.0643117427825928,grad_norm: 0.9999993769301069, iteration: 313916
loss: 1.1052935123443604,grad_norm: 0.999999169669026, iteration: 313917
loss: 1.0722219944000244,grad_norm: 0.9882855724423568, iteration: 313918
loss: 1.0257399082183838,grad_norm: 0.999999120240484, iteration: 313919
loss: 1.270525336265564,grad_norm: 0.9999998261294727, iteration: 313920
loss: 1.1235841512680054,grad_norm: 0.9999998880276959, iteration: 313921
loss: 1.0981591939926147,grad_norm: 0.7571597579794834, iteration: 313922
loss: 1.4007809162139893,grad_norm: 0.999999527062734, iteration: 313923
loss: 1.2963048219680786,grad_norm: 0.9999999736148478, iteration: 313924
loss: 1.446635365486145,grad_norm: 0.9999999819658678, iteration: 313925
loss: 1.219286561012268,grad_norm: 0.9999995966703897, iteration: 313926
loss: 1.0947757959365845,grad_norm: 0.9999990482763214, iteration: 313927
loss: 1.0960115194320679,grad_norm: 0.9999992822565903, iteration: 313928
loss: 1.2456450462341309,grad_norm: 0.9999998707489282, iteration: 313929
loss: 1.0521632432937622,grad_norm: 0.9999992590144328, iteration: 313930
loss: 1.278554916381836,grad_norm: 0.9999997841763273, iteration: 313931
loss: 1.214279294013977,grad_norm: 0.9999999032522273, iteration: 313932
loss: 1.1322556734085083,grad_norm: 0.9999996142925207, iteration: 313933
loss: 1.1886825561523438,grad_norm: 0.9999998139080991, iteration: 313934
loss: 1.1081931591033936,grad_norm: 0.9855660020517509, iteration: 313935
loss: 1.1155316829681396,grad_norm: 0.999999141391981, iteration: 313936
loss: 1.1943504810333252,grad_norm: 1.0000000376167522, iteration: 313937
loss: 1.031237006187439,grad_norm: 0.9999991759329367, iteration: 313938
loss: 1.2554315328598022,grad_norm: 0.9999999075486846, iteration: 313939
loss: 1.1770384311676025,grad_norm: 0.9999996527866659, iteration: 313940
loss: 1.3004406690597534,grad_norm: 0.9999995329462297, iteration: 313941
loss: 1.1497924327850342,grad_norm: 0.9999996968801954, iteration: 313942
loss: 1.1006375551223755,grad_norm: 0.9999998746811529, iteration: 313943
loss: 1.2926990985870361,grad_norm: 0.9999995830091122, iteration: 313944
loss: 1.114046573638916,grad_norm: 0.9999995140428439, iteration: 313945
loss: 1.0837112665176392,grad_norm: 0.8469887999871392, iteration: 313946
loss: 1.1338365077972412,grad_norm: 1.0000000058042313, iteration: 313947
loss: 1.066476583480835,grad_norm: 0.9999998487513888, iteration: 313948
loss: 1.2105087041854858,grad_norm: 0.9999998130338118, iteration: 313949
loss: 0.9906236529350281,grad_norm: 0.9306614818030552, iteration: 313950
loss: 1.2119364738464355,grad_norm: 0.9999994759295885, iteration: 313951
loss: 1.0867797136306763,grad_norm: 0.9999993230983045, iteration: 313952
loss: 1.0937981605529785,grad_norm: 0.9999999351254678, iteration: 313953
loss: 1.1138982772827148,grad_norm: 0.9150230744118409, iteration: 313954
loss: 1.0176968574523926,grad_norm: 0.977290439749782, iteration: 313955
loss: 1.0709583759307861,grad_norm: 0.9999993316716125, iteration: 313956
loss: 1.0663697719573975,grad_norm: 0.8201128622331478, iteration: 313957
loss: 1.164391279220581,grad_norm: 0.9999993360347502, iteration: 313958
loss: 1.0523115396499634,grad_norm: 0.9999997133332912, iteration: 313959
loss: 1.0766072273254395,grad_norm: 0.9999993310409848, iteration: 313960
loss: 1.0602045059204102,grad_norm: 0.832273694224065, iteration: 313961
loss: 1.1157368421554565,grad_norm: 1.0000000196328198, iteration: 313962
loss: 1.062240719795227,grad_norm: 0.9999991210501172, iteration: 313963
loss: 1.180351734161377,grad_norm: 0.9999995987182408, iteration: 313964
loss: 0.9785591959953308,grad_norm: 0.7839195173464609, iteration: 313965
loss: 1.1032520532608032,grad_norm: 0.9999994139681624, iteration: 313966
loss: 1.2712963819503784,grad_norm: 0.999999535446293, iteration: 313967
loss: 0.9716542363166809,grad_norm: 0.9999989544663537, iteration: 313968
loss: 1.13571298122406,grad_norm: 0.9999997851981032, iteration: 313969
loss: 1.0188031196594238,grad_norm: 0.9269515068746429, iteration: 313970
loss: 1.119964361190796,grad_norm: 0.9999993275826693, iteration: 313971
loss: 1.1088000535964966,grad_norm: 0.9999992998104409, iteration: 313972
loss: 1.1084359884262085,grad_norm: 0.999999611463156, iteration: 313973
loss: 1.031343936920166,grad_norm: 0.9999994095804491, iteration: 313974
loss: 1.0134438276290894,grad_norm: 0.9740550214385904, iteration: 313975
loss: 1.100301742553711,grad_norm: 0.9999994614687945, iteration: 313976
loss: 1.0884134769439697,grad_norm: 0.9999991270219583, iteration: 313977
loss: 1.0823469161987305,grad_norm: 0.9999999260443927, iteration: 313978
loss: 1.036953091621399,grad_norm: 0.9999995567668062, iteration: 313979
loss: 1.142135739326477,grad_norm: 0.9999996048896561, iteration: 313980
loss: 1.0848037004470825,grad_norm: 0.9999992820104858, iteration: 313981
loss: 1.2156823873519897,grad_norm: 0.9999990952021717, iteration: 313982
loss: 1.012255311012268,grad_norm: 0.9999994228920261, iteration: 313983
loss: 1.0201168060302734,grad_norm: 0.9999992511742116, iteration: 313984
loss: 1.0958831310272217,grad_norm: 0.9999991682110566, iteration: 313985
loss: 1.0725276470184326,grad_norm: 0.999999525848446, iteration: 313986
loss: 1.1685649156570435,grad_norm: 1.0000000688403983, iteration: 313987
loss: 1.0379754304885864,grad_norm: 0.9999998661634354, iteration: 313988
loss: 1.143425464630127,grad_norm: 0.9999996346812569, iteration: 313989
loss: 1.0891389846801758,grad_norm: 0.9999996414178266, iteration: 313990
loss: 1.1247059106826782,grad_norm: 0.9999994638460578, iteration: 313991
loss: 0.9888270497322083,grad_norm: 0.8928674975578055, iteration: 313992
loss: 1.0750900506973267,grad_norm: 0.9999995124993595, iteration: 313993
loss: 0.9971664547920227,grad_norm: 0.8917034847295066, iteration: 313994
loss: 1.2493011951446533,grad_norm: 0.9999991930673223, iteration: 313995
loss: 1.2052438259124756,grad_norm: 0.9999998955693329, iteration: 313996
loss: 1.0720618963241577,grad_norm: 0.9999996954705844, iteration: 313997
loss: 1.06399667263031,grad_norm: 0.9999991148679025, iteration: 313998
loss: 1.1433005332946777,grad_norm: 0.99999915061063, iteration: 313999
loss: 1.0215412378311157,grad_norm: 0.9570437646161122, iteration: 314000
loss: 1.0237916707992554,grad_norm: 0.9999991721645871, iteration: 314001
loss: 1.0828754901885986,grad_norm: 0.9999992048667218, iteration: 314002
loss: 1.249932885169983,grad_norm: 0.9999995081100104, iteration: 314003
loss: 1.0470237731933594,grad_norm: 0.9999991080878711, iteration: 314004
loss: 1.0371376276016235,grad_norm: 0.9999998284636124, iteration: 314005
loss: 0.9717490077018738,grad_norm: 0.9366833080779592, iteration: 314006
loss: 1.050665259361267,grad_norm: 0.7979969595181026, iteration: 314007
loss: 1.076027750968933,grad_norm: 0.9999992678271483, iteration: 314008
loss: 1.1902165412902832,grad_norm: 0.9999997635073041, iteration: 314009
loss: 1.0039143562316895,grad_norm: 0.7919028609923705, iteration: 314010
loss: 1.0903033018112183,grad_norm: 0.9999996832801019, iteration: 314011
loss: 1.033144474029541,grad_norm: 0.999999918537269, iteration: 314012
loss: 1.0008296966552734,grad_norm: 0.9999993464861819, iteration: 314013
loss: 1.0120675563812256,grad_norm: 0.9999997864050221, iteration: 314014
loss: 1.0173112154006958,grad_norm: 0.9999989567091402, iteration: 314015
loss: 1.103081464767456,grad_norm: 0.9843870116077184, iteration: 314016
loss: 0.9662942290306091,grad_norm: 0.9999991558702026, iteration: 314017
loss: 1.0692378282546997,grad_norm: 0.999999041775438, iteration: 314018
loss: 1.0043174028396606,grad_norm: 0.8608291462285053, iteration: 314019
loss: 1.0797834396362305,grad_norm: 0.9999995034384924, iteration: 314020
loss: 1.0185143947601318,grad_norm: 0.8471990361925913, iteration: 314021
loss: 1.0032415390014648,grad_norm: 0.8520795082810936, iteration: 314022
loss: 1.248766541481018,grad_norm: 0.9999998655728819, iteration: 314023
loss: 1.1245917081832886,grad_norm: 0.9999993589204706, iteration: 314024
loss: 1.3303760290145874,grad_norm: 0.9999998676632236, iteration: 314025
loss: 1.0398623943328857,grad_norm: 0.9999997776216191, iteration: 314026
loss: 1.0638285875320435,grad_norm: 0.9999995841857617, iteration: 314027
loss: 1.014270544052124,grad_norm: 0.8491777613039535, iteration: 314028
loss: 1.1183624267578125,grad_norm: 0.9999999696989027, iteration: 314029
loss: 1.0199166536331177,grad_norm: 0.9999993882882127, iteration: 314030
loss: 1.0440360307693481,grad_norm: 0.9999998157947985, iteration: 314031
loss: 1.126346230506897,grad_norm: 0.9999994737095931, iteration: 314032
loss: 0.9983124136924744,grad_norm: 0.9999991597519349, iteration: 314033
loss: 1.052446961402893,grad_norm: 0.841971213711304, iteration: 314034
loss: 0.9813376069068909,grad_norm: 0.8789241297469775, iteration: 314035
loss: 1.0665448904037476,grad_norm: 0.9999997918714183, iteration: 314036
loss: 0.9400261640548706,grad_norm: 0.7314387397713729, iteration: 314037
loss: 1.0152177810668945,grad_norm: 0.8978365120805875, iteration: 314038
loss: 1.0449613332748413,grad_norm: 0.999999118705327, iteration: 314039
loss: 1.0199378728866577,grad_norm: 0.9999991384728286, iteration: 314040
loss: 1.0780117511749268,grad_norm: 0.7817532289806431, iteration: 314041
loss: 0.9731912612915039,grad_norm: 0.8799887630566549, iteration: 314042
loss: 1.0503060817718506,grad_norm: 0.9999997662297271, iteration: 314043
loss: 1.0092897415161133,grad_norm: 0.9999991835546762, iteration: 314044
loss: 1.0385626554489136,grad_norm: 0.9999996288983037, iteration: 314045
loss: 1.0095043182373047,grad_norm: 0.9999990655850243, iteration: 314046
loss: 1.2195210456848145,grad_norm: 0.9999999594871228, iteration: 314047
loss: 0.9783521294593811,grad_norm: 0.8025292365756885, iteration: 314048
loss: 1.0237948894500732,grad_norm: 0.9999994616184122, iteration: 314049
loss: 1.0274100303649902,grad_norm: 0.8424274636471808, iteration: 314050
loss: 1.08530592918396,grad_norm: 0.9999998589228242, iteration: 314051
loss: 1.0170928239822388,grad_norm: 0.8844165216287867, iteration: 314052
loss: 1.0305413007736206,grad_norm: 0.9999997307548206, iteration: 314053
loss: 0.9836117625236511,grad_norm: 0.9999990900486957, iteration: 314054
loss: 0.9479591250419617,grad_norm: 0.9999995759686919, iteration: 314055
loss: 1.0076669454574585,grad_norm: 0.8108112784894331, iteration: 314056
loss: 0.9626505970954895,grad_norm: 0.8629113959912823, iteration: 314057
loss: 1.080668330192566,grad_norm: 0.9999989646605316, iteration: 314058
loss: 1.0046683549880981,grad_norm: 0.8072441368762122, iteration: 314059
loss: 0.963911771774292,grad_norm: 0.8638736264089412, iteration: 314060
loss: 0.998822808265686,grad_norm: 0.8217149102128947, iteration: 314061
loss: 1.0548841953277588,grad_norm: 0.7957061461096062, iteration: 314062
loss: 1.0813721418380737,grad_norm: 0.9999991432432548, iteration: 314063
loss: 1.0426223278045654,grad_norm: 0.999998977643591, iteration: 314064
loss: 0.9887122511863708,grad_norm: 0.8934370164447933, iteration: 314065
loss: 0.9697540402412415,grad_norm: 0.717083857621796, iteration: 314066
loss: 1.0419554710388184,grad_norm: 0.95146535330955, iteration: 314067
loss: 1.068044662475586,grad_norm: 0.9999990914841796, iteration: 314068
loss: 1.0599849224090576,grad_norm: 0.9956437505437099, iteration: 314069
loss: 1.0035781860351562,grad_norm: 0.9999990903682335, iteration: 314070
loss: 1.114177942276001,grad_norm: 0.9999998628428536, iteration: 314071
loss: 0.9932525157928467,grad_norm: 0.9134258487594086, iteration: 314072
loss: 1.0236178636550903,grad_norm: 0.8741659618765253, iteration: 314073
loss: 1.0702120065689087,grad_norm: 0.9999998553024132, iteration: 314074
loss: 0.9909765124320984,grad_norm: 0.9999990405530177, iteration: 314075
loss: 1.031290054321289,grad_norm: 0.7577708028422442, iteration: 314076
loss: 1.0332319736480713,grad_norm: 1.0000000469901038, iteration: 314077
loss: 1.0080169439315796,grad_norm: 0.8599286080647475, iteration: 314078
loss: 1.028084397315979,grad_norm: 0.9999999630008787, iteration: 314079
loss: 1.0076642036437988,grad_norm: 0.8062439751329422, iteration: 314080
loss: 1.0447345972061157,grad_norm: 0.9999990676968876, iteration: 314081
loss: 1.0529319047927856,grad_norm: 0.9999999790048599, iteration: 314082
loss: 0.9939782023429871,grad_norm: 0.9999991176973052, iteration: 314083
loss: 0.9943678975105286,grad_norm: 0.8512977783903847, iteration: 314084
loss: 1.1081480979919434,grad_norm: 0.9489621863022477, iteration: 314085
loss: 1.1061484813690186,grad_norm: 0.999999295022573, iteration: 314086
loss: 1.0283902883529663,grad_norm: 0.9999997116434641, iteration: 314087
loss: 1.0879555940628052,grad_norm: 0.999999361635049, iteration: 314088
loss: 1.0452383756637573,grad_norm: 0.9999999097174076, iteration: 314089
loss: 1.0082517862319946,grad_norm: 0.7859066932343316, iteration: 314090
loss: 0.9760201573371887,grad_norm: 0.9476193927921943, iteration: 314091
loss: 0.989282488822937,grad_norm: 0.9740222342752518, iteration: 314092
loss: 1.0078668594360352,grad_norm: 0.8105148243784476, iteration: 314093
loss: 1.0377888679504395,grad_norm: 0.9999990739131619, iteration: 314094
loss: 1.05387544631958,grad_norm: 0.9999991762329449, iteration: 314095
loss: 0.9946767687797546,grad_norm: 0.9847125194542222, iteration: 314096
loss: 1.1349892616271973,grad_norm: 0.999999822061314, iteration: 314097
loss: 1.1369439363479614,grad_norm: 0.7892838002414228, iteration: 314098
loss: 1.0323907136917114,grad_norm: 0.8767654557263592, iteration: 314099
loss: 1.055747389793396,grad_norm: 0.7923777064190093, iteration: 314100
loss: 1.130682110786438,grad_norm: 0.9999997718525807, iteration: 314101
loss: 1.0463212728500366,grad_norm: 0.9999996837209424, iteration: 314102
loss: 0.9628658294677734,grad_norm: 0.9999995482200688, iteration: 314103
loss: 1.0249733924865723,grad_norm: 0.9999993867558747, iteration: 314104
loss: 0.9691762328147888,grad_norm: 0.801247608682599, iteration: 314105
loss: 1.0092895030975342,grad_norm: 0.9379213063380976, iteration: 314106
loss: 1.02883780002594,grad_norm: 0.9999993312182369, iteration: 314107
loss: 0.9842212796211243,grad_norm: 0.9871393914764889, iteration: 314108
loss: 1.0148930549621582,grad_norm: 0.745984920306848, iteration: 314109
loss: 1.0166988372802734,grad_norm: 0.9999996648187441, iteration: 314110
loss: 1.0966554880142212,grad_norm: 0.9999998490982944, iteration: 314111
loss: 1.0345921516418457,grad_norm: 0.8831555783095525, iteration: 314112
loss: 1.02911376953125,grad_norm: 0.9999999340003516, iteration: 314113
loss: 0.9908629059791565,grad_norm: 0.9999990536580571, iteration: 314114
loss: 0.9991680979728699,grad_norm: 0.9999990608941748, iteration: 314115
loss: 1.0373283624649048,grad_norm: 0.9999990216617868, iteration: 314116
loss: 0.9959995150566101,grad_norm: 0.879305195852568, iteration: 314117
loss: 0.9593077898025513,grad_norm: 0.8586462982592378, iteration: 314118
loss: 1.0389976501464844,grad_norm: 0.9999997536948123, iteration: 314119
loss: 1.067794680595398,grad_norm: 0.9999997470373834, iteration: 314120
loss: 1.1549092531204224,grad_norm: 0.9999994035411679, iteration: 314121
loss: 1.0407447814941406,grad_norm: 0.9921659434458503, iteration: 314122
loss: 1.0206257104873657,grad_norm: 0.8542329610047842, iteration: 314123
loss: 1.0171400308609009,grad_norm: 0.9355076913319074, iteration: 314124
loss: 1.0705772638320923,grad_norm: 0.9440708411514082, iteration: 314125
loss: 1.0399490594863892,grad_norm: 0.9999997028471199, iteration: 314126
loss: 1.0310993194580078,grad_norm: 0.9999994944947509, iteration: 314127
loss: 1.0155223608016968,grad_norm: 0.999999433476463, iteration: 314128
loss: 1.0125271081924438,grad_norm: 0.9999998833494208, iteration: 314129
loss: 0.9973328709602356,grad_norm: 0.9999995840965181, iteration: 314130
loss: 1.0257700681686401,grad_norm: 0.9383839801666999, iteration: 314131
loss: 1.0459686517715454,grad_norm: 0.9999995045267513, iteration: 314132
loss: 1.022980809211731,grad_norm: 0.9999992639931219, iteration: 314133
loss: 1.0312153100967407,grad_norm: 0.9220099092868468, iteration: 314134
loss: 1.0162664651870728,grad_norm: 0.9999992865304314, iteration: 314135
loss: 1.0218746662139893,grad_norm: 0.8159672326456742, iteration: 314136
loss: 1.1242380142211914,grad_norm: 0.9999999978839355, iteration: 314137
loss: 1.0244888067245483,grad_norm: 0.9999997150080181, iteration: 314138
loss: 1.0117424726486206,grad_norm: 0.9999990430828348, iteration: 314139
loss: 1.0040886402130127,grad_norm: 0.9999995939736486, iteration: 314140
loss: 1.0011026859283447,grad_norm: 0.9999991669125615, iteration: 314141
loss: 0.978247344493866,grad_norm: 0.9717542408923432, iteration: 314142
loss: 0.9716976881027222,grad_norm: 0.9345220510732262, iteration: 314143
loss: 1.0400848388671875,grad_norm: 0.9999994434637897, iteration: 314144
loss: 1.0271137952804565,grad_norm: 0.9999994177649919, iteration: 314145
loss: 0.982036292552948,grad_norm: 0.9017947285309713, iteration: 314146
loss: 1.0732578039169312,grad_norm: 0.9999996112841841, iteration: 314147
loss: 1.0066486597061157,grad_norm: 0.6472895622310945, iteration: 314148
loss: 1.0273113250732422,grad_norm: 0.9999995500910843, iteration: 314149
loss: 1.0093584060668945,grad_norm: 0.9697001968657101, iteration: 314150
loss: 1.000807762145996,grad_norm: 0.9999991040501088, iteration: 314151
loss: 1.0800305604934692,grad_norm: 0.9401009961932149, iteration: 314152
loss: 1.0635141134262085,grad_norm: 0.9999993960117153, iteration: 314153
loss: 1.1054048538208008,grad_norm: 0.9999996276903429, iteration: 314154
loss: 1.018191933631897,grad_norm: 0.9999991126346476, iteration: 314155
loss: 1.0063151121139526,grad_norm: 0.8839103964589289, iteration: 314156
loss: 0.999579131603241,grad_norm: 0.7838149486159803, iteration: 314157
loss: 1.090725064277649,grad_norm: 0.9999996665059373, iteration: 314158
loss: 1.0856908559799194,grad_norm: 0.9999991481282152, iteration: 314159
loss: 1.028363823890686,grad_norm: 0.9999993284911823, iteration: 314160
loss: 1.0680994987487793,grad_norm: 0.8639243339175298, iteration: 314161
loss: 1.0428342819213867,grad_norm: 0.9999989695694448, iteration: 314162
loss: 1.0146645307540894,grad_norm: 0.8773738432987188, iteration: 314163
loss: 1.043854832649231,grad_norm: 0.9999990755761325, iteration: 314164
loss: 1.0132322311401367,grad_norm: 0.8338185780931706, iteration: 314165
loss: 0.9867545962333679,grad_norm: 0.9999992658405366, iteration: 314166
loss: 1.057543158531189,grad_norm: 0.9999998652949507, iteration: 314167
loss: 1.0091784000396729,grad_norm: 0.9522054178635402, iteration: 314168
loss: 1.0224095582962036,grad_norm: 0.9067260315450352, iteration: 314169
loss: 1.123529314994812,grad_norm: 0.9281305943104111, iteration: 314170
loss: 1.0128883123397827,grad_norm: 0.9999998642201299, iteration: 314171
loss: 0.9904060959815979,grad_norm: 0.9999989819795715, iteration: 314172
loss: 1.0032638311386108,grad_norm: 0.9999990438178542, iteration: 314173
loss: 1.1462275981903076,grad_norm: 0.9999997378131464, iteration: 314174
loss: 0.983466625213623,grad_norm: 0.7989526290104932, iteration: 314175
loss: 0.9852359294891357,grad_norm: 0.8148052157705348, iteration: 314176
loss: 0.9961045980453491,grad_norm: 0.8514876743400653, iteration: 314177
loss: 1.0378828048706055,grad_norm: 0.9961957887934706, iteration: 314178
loss: 0.9993724226951599,grad_norm: 0.9999998862793517, iteration: 314179
loss: 1.1017147302627563,grad_norm: 0.9999990099310553, iteration: 314180
loss: 1.0185946226119995,grad_norm: 0.8787151433840186, iteration: 314181
loss: 0.9891291856765747,grad_norm: 0.8072395177078989, iteration: 314182
loss: 1.0529060363769531,grad_norm: 0.9999998792879891, iteration: 314183
loss: 1.0014442205429077,grad_norm: 0.8294135672818298, iteration: 314184
loss: 0.9698248505592346,grad_norm: 0.894615935178079, iteration: 314185
loss: 1.1127564907073975,grad_norm: 0.9999999887885097, iteration: 314186
loss: 0.9818262457847595,grad_norm: 0.9446958002795514, iteration: 314187
loss: 1.0399640798568726,grad_norm: 0.926492687307167, iteration: 314188
loss: 1.043215036392212,grad_norm: 0.8656608177726931, iteration: 314189
loss: 1.062828540802002,grad_norm: 0.999998995987336, iteration: 314190
loss: 1.0445672273635864,grad_norm: 0.9999991310208164, iteration: 314191
loss: 1.088423252105713,grad_norm: 0.9999990183884307, iteration: 314192
loss: 1.0108413696289062,grad_norm: 0.999999627921108, iteration: 314193
loss: 1.0393331050872803,grad_norm: 0.9999995358495657, iteration: 314194
loss: 1.1372677087783813,grad_norm: 0.9999992124475519, iteration: 314195
loss: 1.0333833694458008,grad_norm: 0.8257934773854366, iteration: 314196
loss: 0.996113657951355,grad_norm: 0.8969243750687744, iteration: 314197
loss: 1.011980652809143,grad_norm: 0.8470518022851173, iteration: 314198
loss: 1.0149956941604614,grad_norm: 0.8389751998241758, iteration: 314199
loss: 1.1307358741760254,grad_norm: 0.9999999146179939, iteration: 314200
loss: 1.030887484550476,grad_norm: 0.9999993062221305, iteration: 314201
loss: 1.0647433996200562,grad_norm: 0.9052626165480271, iteration: 314202
loss: 1.059863805770874,grad_norm: 0.7621261541438253, iteration: 314203
loss: 1.0085002183914185,grad_norm: 0.8989848231782158, iteration: 314204
loss: 1.0110963582992554,grad_norm: 0.9352109472668372, iteration: 314205
loss: 1.0402827262878418,grad_norm: 0.9468058488049532, iteration: 314206
loss: 0.975484311580658,grad_norm: 0.9209764505711975, iteration: 314207
loss: 1.0385891199111938,grad_norm: 0.999999600748702, iteration: 314208
loss: 0.9308789968490601,grad_norm: 0.9585996632999877, iteration: 314209
loss: 1.0557342767715454,grad_norm: 0.99999938674456, iteration: 314210
loss: 1.0445411205291748,grad_norm: 0.99999901896121, iteration: 314211
loss: 1.010523796081543,grad_norm: 0.8159512799718908, iteration: 314212
loss: 0.9950668811798096,grad_norm: 0.7176229186257918, iteration: 314213
loss: 1.0311391353607178,grad_norm: 0.9662322707893606, iteration: 314214
loss: 1.0382364988327026,grad_norm: 0.99999931677331, iteration: 314215
loss: 1.0012985467910767,grad_norm: 0.8957231944594155, iteration: 314216
loss: 0.9668654799461365,grad_norm: 0.7989469145412693, iteration: 314217
loss: 1.0095244646072388,grad_norm: 0.9999997011295927, iteration: 314218
loss: 1.0319440364837646,grad_norm: 0.9999993383906514, iteration: 314219
loss: 1.0305891036987305,grad_norm: 0.999999819191553, iteration: 314220
loss: 0.9885321855545044,grad_norm: 0.9999994703850915, iteration: 314221
loss: 1.0640528202056885,grad_norm: 0.9999993820800686, iteration: 314222
loss: 1.0255802869796753,grad_norm: 0.9999991362072508, iteration: 314223
loss: 1.012436032295227,grad_norm: 0.7764172583600116, iteration: 314224
loss: 1.0214650630950928,grad_norm: 0.9060752961127778, iteration: 314225
loss: 1.0654414892196655,grad_norm: 0.9049431977162745, iteration: 314226
loss: 1.0126128196716309,grad_norm: 0.9999992126178311, iteration: 314227
loss: 1.0353507995605469,grad_norm: 0.9521834646082328, iteration: 314228
loss: 0.9852604866027832,grad_norm: 0.8263999702136331, iteration: 314229
loss: 1.0185900926589966,grad_norm: 0.9683250400365588, iteration: 314230
loss: 0.9930627346038818,grad_norm: 0.9999991014958793, iteration: 314231
loss: 0.9567035436630249,grad_norm: 0.9999992630829124, iteration: 314232
loss: 1.030859351158142,grad_norm: 0.999999752302971, iteration: 314233
loss: 0.9884060621261597,grad_norm: 0.957418123053608, iteration: 314234
loss: 1.0767461061477661,grad_norm: 0.9999991753226156, iteration: 314235
loss: 1.0418940782546997,grad_norm: 0.8064459913708352, iteration: 314236
loss: 0.99686199426651,grad_norm: 0.7616799225913163, iteration: 314237
loss: 1.0227841138839722,grad_norm: 0.9999994692576399, iteration: 314238
loss: 0.9945424199104309,grad_norm: 0.8304453305178064, iteration: 314239
loss: 0.9960676431655884,grad_norm: 0.7755590984375397, iteration: 314240
loss: 0.9893096089363098,grad_norm: 0.9901870579518648, iteration: 314241
loss: 0.9958909153938293,grad_norm: 0.8082388460385, iteration: 314242
loss: 1.010251522064209,grad_norm: 0.9999993207909789, iteration: 314243
loss: 1.0390167236328125,grad_norm: 0.9662428205046341, iteration: 314244
loss: 0.986060380935669,grad_norm: 0.9999990912607378, iteration: 314245
loss: 1.0651626586914062,grad_norm: 0.931858898229305, iteration: 314246
loss: 0.9970070719718933,grad_norm: 0.9999990890145617, iteration: 314247
loss: 0.9938839673995972,grad_norm: 0.8991459323653219, iteration: 314248
loss: 1.0008379220962524,grad_norm: 0.8477965072560862, iteration: 314249
loss: 1.0151426792144775,grad_norm: 0.8762187733959774, iteration: 314250
loss: 1.0147114992141724,grad_norm: 0.999999234155057, iteration: 314251
loss: 1.022857427597046,grad_norm: 0.7657138030325189, iteration: 314252
loss: 0.9959230422973633,grad_norm: 0.7607066411310682, iteration: 314253
loss: 0.9522469639778137,grad_norm: 0.8428379097327553, iteration: 314254
loss: 0.9716437458992004,grad_norm: 0.8876513376257328, iteration: 314255
loss: 0.9671350717544556,grad_norm: 0.7850019864374026, iteration: 314256
loss: 1.0140615701675415,grad_norm: 0.8789715565072422, iteration: 314257
loss: 1.0635312795639038,grad_norm: 0.8586114378541717, iteration: 314258
loss: 1.10603928565979,grad_norm: 0.9805803208253743, iteration: 314259
loss: 0.9541274905204773,grad_norm: 0.8599902758733987, iteration: 314260
loss: 0.9957740902900696,grad_norm: 0.9999999448446358, iteration: 314261
loss: 1.0550568103790283,grad_norm: 0.9762143134053426, iteration: 314262
loss: 1.0255106687545776,grad_norm: 0.8141887801167054, iteration: 314263
loss: 1.0335233211517334,grad_norm: 0.9999996820931917, iteration: 314264
loss: 1.021899700164795,grad_norm: 0.9999991119437789, iteration: 314265
loss: 0.9530079364776611,grad_norm: 0.7052058131521306, iteration: 314266
loss: 0.9680696725845337,grad_norm: 0.8696971326065066, iteration: 314267
loss: 1.0026500225067139,grad_norm: 0.9060713446616679, iteration: 314268
loss: 1.0032955408096313,grad_norm: 0.9999997318715081, iteration: 314269
loss: 1.0318852663040161,grad_norm: 0.999999104144522, iteration: 314270
loss: 1.0161681175231934,grad_norm: 0.833342966627098, iteration: 314271
loss: 1.0176153182983398,grad_norm: 0.9999994407436666, iteration: 314272
loss: 0.9662620425224304,grad_norm: 0.7534880050267558, iteration: 314273
loss: 0.9867308735847473,grad_norm: 0.9999990346087094, iteration: 314274
loss: 0.974284291267395,grad_norm: 0.852769218795412, iteration: 314275
loss: 1.0677498579025269,grad_norm: 0.9706905919533705, iteration: 314276
loss: 0.9641368985176086,grad_norm: 0.7982734663557034, iteration: 314277
loss: 1.0168981552124023,grad_norm: 0.7028307109707259, iteration: 314278
loss: 0.9743270874023438,grad_norm: 0.9562702008060329, iteration: 314279
loss: 1.0333799123764038,grad_norm: 0.9878888558199389, iteration: 314280
loss: 1.0484637022018433,grad_norm: 0.8598025538908494, iteration: 314281
loss: 1.002661108970642,grad_norm: 0.9196088401863021, iteration: 314282
loss: 0.9580057263374329,grad_norm: 0.8360916047236185, iteration: 314283
loss: 1.0119283199310303,grad_norm: 0.7924551959560037, iteration: 314284
loss: 1.0268605947494507,grad_norm: 0.7782882057995978, iteration: 314285
loss: 0.9855749011039734,grad_norm: 0.8931157850012135, iteration: 314286
loss: 1.013243556022644,grad_norm: 0.8586263836251153, iteration: 314287
loss: 1.1929768323898315,grad_norm: 0.9999993619307582, iteration: 314288
loss: 1.0169475078582764,grad_norm: 0.9078132824197518, iteration: 314289
loss: 1.0100719928741455,grad_norm: 0.7934769910406482, iteration: 314290
loss: 1.0283573865890503,grad_norm: 0.9999995105990059, iteration: 314291
loss: 0.9893260598182678,grad_norm: 0.8486511775825939, iteration: 314292
loss: 1.025146484375,grad_norm: 0.9999993227350953, iteration: 314293
loss: 1.0448274612426758,grad_norm: 0.7870462599701413, iteration: 314294
loss: 1.0811625719070435,grad_norm: 0.9999991106132499, iteration: 314295
loss: 0.9932233095169067,grad_norm: 0.999999748079109, iteration: 314296
loss: 0.9713588356971741,grad_norm: 0.9999993745472613, iteration: 314297
loss: 0.9829236268997192,grad_norm: 0.7693963190791306, iteration: 314298
loss: 1.0297482013702393,grad_norm: 0.977013227572452, iteration: 314299
loss: 0.9826738238334656,grad_norm: 0.9653663907069613, iteration: 314300
loss: 1.0448917150497437,grad_norm: 0.8700986321204435, iteration: 314301
loss: 1.0568578243255615,grad_norm: 0.9481976470940738, iteration: 314302
loss: 0.9946118593215942,grad_norm: 0.8443534734947508, iteration: 314303
loss: 1.0877463817596436,grad_norm: 0.9999998751962754, iteration: 314304
loss: 1.01105797290802,grad_norm: 0.8879488452386967, iteration: 314305
loss: 0.9652056097984314,grad_norm: 0.9999994950316334, iteration: 314306
loss: 1.0580549240112305,grad_norm: 0.9999994639488103, iteration: 314307
loss: 0.9853704571723938,grad_norm: 1.0000000357981453, iteration: 314308
loss: 1.0095221996307373,grad_norm: 0.9999993652608654, iteration: 314309
loss: 0.9751486778259277,grad_norm: 0.8989956453449627, iteration: 314310
loss: 1.0117498636245728,grad_norm: 0.7684201586148177, iteration: 314311
loss: 0.9748206734657288,grad_norm: 0.9930956885390616, iteration: 314312
loss: 1.01704740524292,grad_norm: 0.9999990968857851, iteration: 314313
loss: 1.0019965171813965,grad_norm: 0.8647944660816915, iteration: 314314
loss: 1.0816600322723389,grad_norm: 0.9342029421107625, iteration: 314315
loss: 1.0131525993347168,grad_norm: 0.9999990652544356, iteration: 314316
loss: 0.9934345483779907,grad_norm: 0.8033450996765357, iteration: 314317
loss: 1.066975474357605,grad_norm: 0.9292789579942986, iteration: 314318
loss: 1.0267269611358643,grad_norm: 0.9999991434247184, iteration: 314319
loss: 1.0605930089950562,grad_norm: 0.9999991006387091, iteration: 314320
loss: 0.979002058506012,grad_norm: 0.8438128388944184, iteration: 314321
loss: 1.0702660083770752,grad_norm: 0.999999995326263, iteration: 314322
loss: 1.0223608016967773,grad_norm: 0.9009512104996386, iteration: 314323
loss: 1.041260004043579,grad_norm: 0.9889909375913736, iteration: 314324
loss: 0.954308271408081,grad_norm: 0.8500810295817995, iteration: 314325
loss: 0.9593859314918518,grad_norm: 0.7935928734228246, iteration: 314326
loss: 1.0134986639022827,grad_norm: 0.8753711831413763, iteration: 314327
loss: 0.9895059466362,grad_norm: 0.9420585167489518, iteration: 314328
loss: 0.9397500157356262,grad_norm: 0.8070582094572191, iteration: 314329
loss: 0.9912603497505188,grad_norm: 0.9010872504163396, iteration: 314330
loss: 1.0004483461380005,grad_norm: 0.716900016365806, iteration: 314331
loss: 1.0031219720840454,grad_norm: 0.8647270274221918, iteration: 314332
loss: 1.0059939622879028,grad_norm: 0.8831027538468583, iteration: 314333
loss: 1.0162793397903442,grad_norm: 0.9195551633159236, iteration: 314334
loss: 0.9634459614753723,grad_norm: 0.8529169277336418, iteration: 314335
loss: 1.0144261121749878,grad_norm: 0.8215412876885999, iteration: 314336
loss: 1.0106475353240967,grad_norm: 0.8792639734269782, iteration: 314337
loss: 1.0254420042037964,grad_norm: 0.9999991802802094, iteration: 314338
loss: 1.0284711122512817,grad_norm: 0.999999934272222, iteration: 314339
loss: 1.0162358283996582,grad_norm: 0.855572395944727, iteration: 314340
loss: 1.0680869817733765,grad_norm: 0.9180689626429898, iteration: 314341
loss: 1.059672474861145,grad_norm: 0.8281639947899438, iteration: 314342
loss: 1.0205678939819336,grad_norm: 0.7699726164228888, iteration: 314343
loss: 1.024030089378357,grad_norm: 0.9028503041593111, iteration: 314344
loss: 0.9786276817321777,grad_norm: 0.7477198808532024, iteration: 314345
loss: 1.083335518836975,grad_norm: 0.9999996050478507, iteration: 314346
loss: 0.9728502035140991,grad_norm: 0.7826607115069301, iteration: 314347
loss: 0.9895797967910767,grad_norm: 0.7990608810432748, iteration: 314348
loss: 1.0226490497589111,grad_norm: 0.9314494635193592, iteration: 314349
loss: 1.0192177295684814,grad_norm: 0.9274416845941315, iteration: 314350
loss: 1.007584810256958,grad_norm: 0.69593822241455, iteration: 314351
loss: 1.005311369895935,grad_norm: 0.8811955614651037, iteration: 314352
loss: 1.0379148721694946,grad_norm: 0.8598731428811917, iteration: 314353
loss: 0.9489087462425232,grad_norm: 0.800716537024125, iteration: 314354
loss: 1.0310945510864258,grad_norm: 0.7227656184818085, iteration: 314355
loss: 0.9761738181114197,grad_norm: 0.6907544789134209, iteration: 314356
loss: 0.9893580079078674,grad_norm: 0.9999996040152361, iteration: 314357
loss: 1.0155774354934692,grad_norm: 0.9999995391579646, iteration: 314358
loss: 1.0047696828842163,grad_norm: 0.6834189009860261, iteration: 314359
loss: 0.9993802905082703,grad_norm: 0.9999991097432646, iteration: 314360
loss: 1.0276042222976685,grad_norm: 0.7136200090260716, iteration: 314361
loss: 0.9766743779182434,grad_norm: 0.883266890327237, iteration: 314362
loss: 0.9718892574310303,grad_norm: 0.9089240889258536, iteration: 314363
loss: 1.0039256811141968,grad_norm: 0.9388424732528365, iteration: 314364
loss: 0.978787899017334,grad_norm: 0.9108407150461849, iteration: 314365
loss: 1.024169921875,grad_norm: 0.9445424496847008, iteration: 314366
loss: 1.0463011264801025,grad_norm: 0.9999998490575377, iteration: 314367
loss: 1.004910945892334,grad_norm: 0.8881758220194852, iteration: 314368
loss: 0.9966190457344055,grad_norm: 0.7894959288853453, iteration: 314369
loss: 1.000394344329834,grad_norm: 0.8597010335289098, iteration: 314370
loss: 1.0076112747192383,grad_norm: 0.7809739778535685, iteration: 314371
loss: 1.0237115621566772,grad_norm: 0.9999993918866072, iteration: 314372
loss: 1.0084375143051147,grad_norm: 0.9999994294208362, iteration: 314373
loss: 1.0663334131240845,grad_norm: 0.99982504714323, iteration: 314374
loss: 0.9795457720756531,grad_norm: 0.9617681845452626, iteration: 314375
loss: 0.9811036586761475,grad_norm: 0.8390800562700644, iteration: 314376
loss: 1.070302128791809,grad_norm: 0.9999999218504588, iteration: 314377
loss: 1.038774013519287,grad_norm: 0.8161958593393697, iteration: 314378
loss: 1.0161843299865723,grad_norm: 0.9999993522042347, iteration: 314379
loss: 0.9975399971008301,grad_norm: 0.9830939182776754, iteration: 314380
loss: 1.025137186050415,grad_norm: 0.814855167591972, iteration: 314381
loss: 0.9956945776939392,grad_norm: 0.9999996269670866, iteration: 314382
loss: 0.9953397512435913,grad_norm: 0.8065838802511955, iteration: 314383
loss: 0.9705418348312378,grad_norm: 0.7347275097655998, iteration: 314384
loss: 1.009529709815979,grad_norm: 0.9999993185887283, iteration: 314385
loss: 0.9887412190437317,grad_norm: 0.9288892022059675, iteration: 314386
loss: 1.0029535293579102,grad_norm: 0.9999991936813377, iteration: 314387
loss: 1.0835744142532349,grad_norm: 0.8417552088585223, iteration: 314388
loss: 1.0240800380706787,grad_norm: 0.9999992361037202, iteration: 314389
loss: 1.0614581108093262,grad_norm: 0.9999992092255803, iteration: 314390
loss: 0.99030601978302,grad_norm: 0.7231195504742096, iteration: 314391
loss: 1.0038260221481323,grad_norm: 0.9434366492505799, iteration: 314392
loss: 0.9951654076576233,grad_norm: 0.9999990206607872, iteration: 314393
loss: 1.0004668235778809,grad_norm: 0.8348518218239972, iteration: 314394
loss: 1.046190619468689,grad_norm: 0.900917261279047, iteration: 314395
loss: 1.137836217880249,grad_norm: 0.9999998717256225, iteration: 314396
loss: 1.0640541315078735,grad_norm: 0.9999990905143701, iteration: 314397
loss: 0.9919037222862244,grad_norm: 0.800936662603802, iteration: 314398
loss: 1.0971368551254272,grad_norm: 0.7854376157361077, iteration: 314399
loss: 1.0232447385787964,grad_norm: 0.7467244656184117, iteration: 314400
loss: 0.9822250008583069,grad_norm: 0.9999992396655949, iteration: 314401
loss: 1.0702992677688599,grad_norm: 0.9999991932129396, iteration: 314402
loss: 0.9875598549842834,grad_norm: 0.9639493241043807, iteration: 314403
loss: 1.0782586336135864,grad_norm: 0.9682874785042217, iteration: 314404
loss: 0.9791436791419983,grad_norm: 0.8869080136574468, iteration: 314405
loss: 1.092685580253601,grad_norm: 0.999999847040121, iteration: 314406
loss: 1.0324976444244385,grad_norm: 0.9319812747413277, iteration: 314407
loss: 1.0116181373596191,grad_norm: 1.000000102076159, iteration: 314408
loss: 1.0223524570465088,grad_norm: 0.9364613688731988, iteration: 314409
loss: 1.178328037261963,grad_norm: 0.9999999003439763, iteration: 314410
loss: 1.0236707925796509,grad_norm: 0.9902981357984643, iteration: 314411
loss: 1.0405430793762207,grad_norm: 1.0000000093965025, iteration: 314412
loss: 0.9694246649742126,grad_norm: 0.9279619638076955, iteration: 314413
loss: 1.0257158279418945,grad_norm: 0.8317812365299162, iteration: 314414
loss: 1.0151853561401367,grad_norm: 0.9999993632446481, iteration: 314415
loss: 0.9798367023468018,grad_norm: 0.8176915886283315, iteration: 314416
loss: 1.0812963247299194,grad_norm: 0.9999995416732833, iteration: 314417
loss: 1.0623865127563477,grad_norm: 0.9999993647279534, iteration: 314418
loss: 1.001099944114685,grad_norm: 0.9376151077620627, iteration: 314419
loss: 1.0640379190444946,grad_norm: 0.999999525384752, iteration: 314420
loss: 1.0146815776824951,grad_norm: 0.9462529151687583, iteration: 314421
loss: 1.0279603004455566,grad_norm: 0.9999991539320017, iteration: 314422
loss: 1.0253899097442627,grad_norm: 0.9999995642150574, iteration: 314423
loss: 0.9918710589408875,grad_norm: 0.8719923104008195, iteration: 314424
loss: 1.0666537284851074,grad_norm: 0.9999994392281434, iteration: 314425
loss: 1.0132780075073242,grad_norm: 0.7499327942746821, iteration: 314426
loss: 1.0179444551467896,grad_norm: 0.9778209778068528, iteration: 314427
loss: 1.0152031183242798,grad_norm: 0.9270708857904574, iteration: 314428
loss: 0.9853988289833069,grad_norm: 0.9914254885323557, iteration: 314429
loss: 1.0400751829147339,grad_norm: 0.999999823103553, iteration: 314430
loss: 1.0614445209503174,grad_norm: 0.9235906425953808, iteration: 314431
loss: 1.0427321195602417,grad_norm: 0.8265432361179138, iteration: 314432
loss: 1.116773009300232,grad_norm: 1.0000000457487899, iteration: 314433
loss: 1.0814906358718872,grad_norm: 0.9999998045632037, iteration: 314434
loss: 1.090909481048584,grad_norm: 0.9999994724948277, iteration: 314435
loss: 1.0511095523834229,grad_norm: 0.8576197470885302, iteration: 314436
loss: 1.0060616731643677,grad_norm: 0.9999992530656218, iteration: 314437
loss: 1.0659977197647095,grad_norm: 0.9999995199557534, iteration: 314438
loss: 1.0186084508895874,grad_norm: 0.9999995715162616, iteration: 314439
loss: 0.9909892082214355,grad_norm: 0.8891017132256105, iteration: 314440
loss: 0.9714213609695435,grad_norm: 0.9207460343244428, iteration: 314441
loss: 1.046750783920288,grad_norm: 0.999999892269655, iteration: 314442
loss: 1.0819159746170044,grad_norm: 0.8872173300290879, iteration: 314443
loss: 1.1399040222167969,grad_norm: 0.9999991336042241, iteration: 314444
loss: 1.0279018878936768,grad_norm: 0.9415626677953967, iteration: 314445
loss: 1.0231472253799438,grad_norm: 0.7051374775175584, iteration: 314446
loss: 1.0360462665557861,grad_norm: 0.9999992201968193, iteration: 314447
loss: 1.0049666166305542,grad_norm: 0.9999991659091401, iteration: 314448
loss: 1.0498000383377075,grad_norm: 0.7420001748840936, iteration: 314449
loss: 1.0375151634216309,grad_norm: 0.818295649732804, iteration: 314450
loss: 0.9919829964637756,grad_norm: 0.9999996396186955, iteration: 314451
loss: 1.0476338863372803,grad_norm: 0.9999990784237415, iteration: 314452
loss: 1.011187195777893,grad_norm: 0.7873908955238592, iteration: 314453
loss: 0.9865319728851318,grad_norm: 0.821882726481505, iteration: 314454
loss: 1.0078736543655396,grad_norm: 0.8411520029863901, iteration: 314455
loss: 1.0076171159744263,grad_norm: 0.7795440991462058, iteration: 314456
loss: 1.0321400165557861,grad_norm: 0.7773304557538616, iteration: 314457
loss: 1.0053300857543945,grad_norm: 0.9999992899111732, iteration: 314458
loss: 1.0166791677474976,grad_norm: 0.8588801511652475, iteration: 314459
loss: 1.02620530128479,grad_norm: 0.9999997713339898, iteration: 314460
loss: 1.0162475109100342,grad_norm: 0.986099476363003, iteration: 314461
loss: 1.0415529012680054,grad_norm: 0.9999997174840006, iteration: 314462
loss: 0.9807718396186829,grad_norm: 0.8529064111273853, iteration: 314463
loss: 1.0007774829864502,grad_norm: 0.8440396550647182, iteration: 314464
loss: 1.0170094966888428,grad_norm: 0.9999991385440835, iteration: 314465
loss: 1.0628838539123535,grad_norm: 0.851010051503153, iteration: 314466
loss: 1.0023554563522339,grad_norm: 0.9999990559395605, iteration: 314467
loss: 1.0454121828079224,grad_norm: 0.999999891826877, iteration: 314468
loss: 1.0283855199813843,grad_norm: 0.8630195912169891, iteration: 314469
loss: 1.0356374979019165,grad_norm: 0.7096765488245482, iteration: 314470
loss: 1.0227817296981812,grad_norm: 0.999999163583856, iteration: 314471
loss: 1.0413405895233154,grad_norm: 0.8329441196699162, iteration: 314472
loss: 1.0202068090438843,grad_norm: 0.9999996123865118, iteration: 314473
loss: 1.0104223489761353,grad_norm: 0.9999994008381342, iteration: 314474
loss: 1.0005738735198975,grad_norm: 0.7699787954940025, iteration: 314475
loss: 1.044602394104004,grad_norm: 0.9999994524913176, iteration: 314476
loss: 1.0136398077011108,grad_norm: 0.9999992352128099, iteration: 314477
loss: 0.9783294200897217,grad_norm: 0.8664755195837833, iteration: 314478
loss: 1.040823221206665,grad_norm: 0.9999994314334014, iteration: 314479
loss: 1.0073343515396118,grad_norm: 0.9999991887105429, iteration: 314480
loss: 1.0369415283203125,grad_norm: 0.9999997455622083, iteration: 314481
loss: 1.049917221069336,grad_norm: 0.9999992530542978, iteration: 314482
loss: 1.0814284086227417,grad_norm: 0.9999995589910803, iteration: 314483
loss: 1.0075067281723022,grad_norm: 0.9999995335502082, iteration: 314484
loss: 1.028010606765747,grad_norm: 0.9999993317384018, iteration: 314485
loss: 1.0379884243011475,grad_norm: 0.8777390048188616, iteration: 314486
loss: 1.1049197912216187,grad_norm: 0.9999998182162484, iteration: 314487
loss: 0.9976581931114197,grad_norm: 0.8093862338697975, iteration: 314488
loss: 0.9794799089431763,grad_norm: 0.9999993261838932, iteration: 314489
loss: 0.9877760410308838,grad_norm: 0.8798527868227448, iteration: 314490
loss: 1.003648042678833,grad_norm: 0.7495955379187433, iteration: 314491
loss: 1.0055772066116333,grad_norm: 0.9999990359046934, iteration: 314492
loss: 0.9892228841781616,grad_norm: 0.8871430661714326, iteration: 314493
loss: 1.0452362298965454,grad_norm: 0.9999998562914969, iteration: 314494
loss: 1.0381932258605957,grad_norm: 0.9408267569725279, iteration: 314495
loss: 0.9809619784355164,grad_norm: 0.8126702081341354, iteration: 314496
loss: 1.0235352516174316,grad_norm: 0.7520697569380067, iteration: 314497
loss: 1.0405131578445435,grad_norm: 0.9999991539241264, iteration: 314498
loss: 1.1080143451690674,grad_norm: 0.999999151032482, iteration: 314499
loss: 0.9829916954040527,grad_norm: 0.8194285041547957, iteration: 314500
loss: 1.0610625743865967,grad_norm: 0.9999990908893962, iteration: 314501
loss: 1.0330489873886108,grad_norm: 0.9999996997774575, iteration: 314502
loss: 0.9550412893295288,grad_norm: 0.9999990348734412, iteration: 314503
loss: 1.1953452825546265,grad_norm: 0.9999999777330779, iteration: 314504
loss: 1.0919911861419678,grad_norm: 0.9999994692823212, iteration: 314505
loss: 0.9817401170730591,grad_norm: 0.998449421069322, iteration: 314506
loss: 1.0013200044631958,grad_norm: 0.923274824357953, iteration: 314507
loss: 0.9550076723098755,grad_norm: 0.9999997188079951, iteration: 314508
loss: 0.9691037535667419,grad_norm: 0.9999991558904253, iteration: 314509
loss: 0.9846216440200806,grad_norm: 0.8534538846419294, iteration: 314510
loss: 1.0472725629806519,grad_norm: 0.999999237870317, iteration: 314511
loss: 0.9970477819442749,grad_norm: 0.873786335445257, iteration: 314512
loss: 1.015625,grad_norm: 0.9436320707295525, iteration: 314513
loss: 1.007152795791626,grad_norm: 0.9151814632683326, iteration: 314514
loss: 1.081432819366455,grad_norm: 0.9999996963748168, iteration: 314515
loss: 1.0059840679168701,grad_norm: 0.8626023548425198, iteration: 314516
loss: 1.0566126108169556,grad_norm: 0.9999996529672803, iteration: 314517
loss: 1.0233839750289917,grad_norm: 0.9587062049055963, iteration: 314518
loss: 1.0740623474121094,grad_norm: 0.9999992678081926, iteration: 314519
loss: 0.9712980389595032,grad_norm: 0.8803883418692279, iteration: 314520
loss: 1.0276843309402466,grad_norm: 0.9999996487639541, iteration: 314521
loss: 1.0339608192443848,grad_norm: 0.7658753424102321, iteration: 314522
loss: 1.062949538230896,grad_norm: 0.999999983549174, iteration: 314523
loss: 0.9657922387123108,grad_norm: 0.8673752782873463, iteration: 314524
loss: 1.043034553527832,grad_norm: 0.9999994148972562, iteration: 314525
loss: 1.0733400583267212,grad_norm: 0.9999995785573607, iteration: 314526
loss: 1.0401947498321533,grad_norm: 0.9999994856068558, iteration: 314527
loss: 1.0726170539855957,grad_norm: 0.9999999738505317, iteration: 314528
loss: 0.9724742770195007,grad_norm: 0.9999990445883771, iteration: 314529
loss: 1.0999397039413452,grad_norm: 0.9999995813427798, iteration: 314530
loss: 1.1389774084091187,grad_norm: 0.9999998235769272, iteration: 314531
loss: 1.056269884109497,grad_norm: 0.9596939248079412, iteration: 314532
loss: 1.045762538909912,grad_norm: 0.82777297685618, iteration: 314533
loss: 0.9949951767921448,grad_norm: 0.9999997324539495, iteration: 314534
loss: 1.0343470573425293,grad_norm: 0.7648293513707383, iteration: 314535
loss: 1.0834980010986328,grad_norm: 0.9999997536581439, iteration: 314536
loss: 0.9812318086624146,grad_norm: 0.7716238338983549, iteration: 314537
loss: 1.0812382698059082,grad_norm: 0.6800864850320377, iteration: 314538
loss: 0.9859474301338196,grad_norm: 0.753553017067019, iteration: 314539
loss: 0.9564022421836853,grad_norm: 0.9262077690822981, iteration: 314540
loss: 1.005057692527771,grad_norm: 0.9999994184536838, iteration: 314541
loss: 1.0562299489974976,grad_norm: 0.9999998400740284, iteration: 314542
loss: 0.9897621273994446,grad_norm: 0.8191350539732506, iteration: 314543
loss: 1.032367467880249,grad_norm: 0.942478760746531, iteration: 314544
loss: 0.9795583486557007,grad_norm: 0.763330917077452, iteration: 314545
loss: 0.9616405963897705,grad_norm: 0.7828565479076856, iteration: 314546
loss: 0.9897408485412598,grad_norm: 0.8681263425020029, iteration: 314547
loss: 0.9593871831893921,grad_norm: 0.8158968793817544, iteration: 314548
loss: 1.1211379766464233,grad_norm: 0.9999991258083792, iteration: 314549
loss: 1.0047358274459839,grad_norm: 0.7744517576606575, iteration: 314550
loss: 0.9753246307373047,grad_norm: 0.9387291756266783, iteration: 314551
loss: 1.139832854270935,grad_norm: 0.9999995239514424, iteration: 314552
loss: 1.0283199548721313,grad_norm: 0.9999997835412942, iteration: 314553
loss: 0.9740927815437317,grad_norm: 0.8402957656791004, iteration: 314554
loss: 1.1766399145126343,grad_norm: 0.9999993732141415, iteration: 314555
loss: 1.081856608390808,grad_norm: 0.9999991146661849, iteration: 314556
loss: 1.043564796447754,grad_norm: 0.8392838197672186, iteration: 314557
loss: 1.0112000703811646,grad_norm: 0.7339773352254049, iteration: 314558
loss: 1.0113705396652222,grad_norm: 0.9999994495012003, iteration: 314559
loss: 1.0320478677749634,grad_norm: 0.9999993489848591, iteration: 314560
loss: 1.050047516822815,grad_norm: 0.9999991088114114, iteration: 314561
loss: 1.1224905252456665,grad_norm: 0.999999589474444, iteration: 314562
loss: 1.000471591949463,grad_norm: 0.9999991625886507, iteration: 314563
loss: 1.0306594371795654,grad_norm: 0.9999995379715158, iteration: 314564
loss: 1.0158835649490356,grad_norm: 0.9999997084839152, iteration: 314565
loss: 1.1976197957992554,grad_norm: 0.9999992606494035, iteration: 314566
loss: 1.0483981370925903,grad_norm: 0.9999990696141037, iteration: 314567
loss: 0.9866648316383362,grad_norm: 0.7445693544896347, iteration: 314568
loss: 0.9587189555168152,grad_norm: 0.7758020038312545, iteration: 314569
loss: 0.9770312905311584,grad_norm: 0.9495702121127824, iteration: 314570
loss: 1.003499984741211,grad_norm: 0.999999488083012, iteration: 314571
loss: 1.1211233139038086,grad_norm: 0.9999998818351058, iteration: 314572
loss: 1.0218547582626343,grad_norm: 0.9281461556601518, iteration: 314573
loss: 0.9980267882347107,grad_norm: 0.9999991055792145, iteration: 314574
loss: 0.9955621957778931,grad_norm: 0.8496310514884714, iteration: 314575
loss: 0.9799874424934387,grad_norm: 0.8087357124609669, iteration: 314576
loss: 1.0171126127243042,grad_norm: 0.9699749139447972, iteration: 314577
loss: 1.0505290031433105,grad_norm: 0.8475059260485669, iteration: 314578
loss: 1.0774343013763428,grad_norm: 0.9999999075257602, iteration: 314579
loss: 1.005261778831482,grad_norm: 0.9365218645616495, iteration: 314580
loss: 1.1131361722946167,grad_norm: 0.9999999213046589, iteration: 314581
loss: 1.03086519241333,grad_norm: 0.9292559170662695, iteration: 314582
loss: 1.0002129077911377,grad_norm: 0.8409007986741283, iteration: 314583
loss: 1.025612711906433,grad_norm: 0.9999992209889934, iteration: 314584
loss: 1.0229649543762207,grad_norm: 0.9273592522749224, iteration: 314585
loss: 0.9951075315475464,grad_norm: 0.7471886030424925, iteration: 314586
loss: 1.0862209796905518,grad_norm: 0.9999993071002123, iteration: 314587
loss: 0.966670572757721,grad_norm: 0.7239144702464395, iteration: 314588
loss: 1.0125848054885864,grad_norm: 1.0000000218577567, iteration: 314589
loss: 1.0583839416503906,grad_norm: 0.999999540581405, iteration: 314590
loss: 1.0582783222198486,grad_norm: 0.9518655809387576, iteration: 314591
loss: 0.9674432873725891,grad_norm: 0.6669667319372109, iteration: 314592
loss: 1.0753302574157715,grad_norm: 0.9999992557680539, iteration: 314593
loss: 1.0503157377243042,grad_norm: 0.9819464501241728, iteration: 314594
loss: 1.0164803266525269,grad_norm: 0.999999196028828, iteration: 314595
loss: 1.1009678840637207,grad_norm: 0.907579064338622, iteration: 314596
loss: 0.9847506880760193,grad_norm: 0.836151567560119, iteration: 314597
loss: 1.0119203329086304,grad_norm: 0.9271101602285002, iteration: 314598
loss: 0.9793307185173035,grad_norm: 0.9999991416551169, iteration: 314599
loss: 1.1778267621994019,grad_norm: 0.9999996788964011, iteration: 314600
loss: 1.0702049732208252,grad_norm: 0.8031621930027468, iteration: 314601
loss: 1.0155742168426514,grad_norm: 0.9999992768946399, iteration: 314602
loss: 1.0452948808670044,grad_norm: 0.9999995798990126, iteration: 314603
loss: 1.0067174434661865,grad_norm: 0.9488173014154978, iteration: 314604
loss: 1.0114620923995972,grad_norm: 0.9999993235125842, iteration: 314605
loss: 1.0269737243652344,grad_norm: 0.8032392446749089, iteration: 314606
loss: 0.9987727999687195,grad_norm: 0.7018014684968553, iteration: 314607
loss: 1.0412652492523193,grad_norm: 0.9999989832924516, iteration: 314608
loss: 0.9777016043663025,grad_norm: 0.9220587460641081, iteration: 314609
loss: 1.0313040018081665,grad_norm: 0.9386800961479366, iteration: 314610
loss: 0.9761359095573425,grad_norm: 0.8350862906267742, iteration: 314611
loss: 0.9897600412368774,grad_norm: 0.9999991073239505, iteration: 314612
loss: 1.0128884315490723,grad_norm: 0.9999993475749572, iteration: 314613
loss: 0.9960151314735413,grad_norm: 0.7683195403706332, iteration: 314614
loss: 0.9665145874023438,grad_norm: 0.8745523709262784, iteration: 314615
loss: 1.044094443321228,grad_norm: 0.9676488083186597, iteration: 314616
loss: 1.014924168586731,grad_norm: 0.9072731671992885, iteration: 314617
loss: 1.004361629486084,grad_norm: 0.9089062915287707, iteration: 314618
loss: 1.0259714126586914,grad_norm: 0.999998999480305, iteration: 314619
loss: 1.0657776594161987,grad_norm: 0.9999997770151428, iteration: 314620
loss: 1.0083929300308228,grad_norm: 0.8818577688950444, iteration: 314621
loss: 1.0307950973510742,grad_norm: 0.9324437563608108, iteration: 314622
loss: 0.9953844547271729,grad_norm: 0.848004788450599, iteration: 314623
loss: 1.0302857160568237,grad_norm: 0.8502113471122772, iteration: 314624
loss: 0.9855280518531799,grad_norm: 0.8231293246233995, iteration: 314625
loss: 0.9905697703361511,grad_norm: 0.8986227683688915, iteration: 314626
loss: 0.9857774972915649,grad_norm: 0.8376828010698754, iteration: 314627
loss: 0.9996474385261536,grad_norm: 0.750965742843732, iteration: 314628
loss: 1.058203101158142,grad_norm: 0.9999996206494622, iteration: 314629
loss: 0.9856280088424683,grad_norm: 0.9999992262150799, iteration: 314630
loss: 1.0269560813903809,grad_norm: 0.8005901816489397, iteration: 314631
loss: 1.0055632591247559,grad_norm: 0.8430626017955352, iteration: 314632
loss: 1.054564118385315,grad_norm: 0.902738666435752, iteration: 314633
loss: 1.000308871269226,grad_norm: 0.7617301380177436, iteration: 314634
loss: 1.009545087814331,grad_norm: 0.8827231282480982, iteration: 314635
loss: 1.0137710571289062,grad_norm: 0.7512833327616639, iteration: 314636
loss: 0.9489550590515137,grad_norm: 0.76584675157372, iteration: 314637
loss: 1.0264086723327637,grad_norm: 0.9999992775021999, iteration: 314638
loss: 1.0854532718658447,grad_norm: 0.9999999596500415, iteration: 314639
loss: 1.0144747495651245,grad_norm: 0.9999992615516242, iteration: 314640
loss: 0.9940521121025085,grad_norm: 0.7983962337036734, iteration: 314641
loss: 0.9946513175964355,grad_norm: 0.9788729071777954, iteration: 314642
loss: 0.9874038100242615,grad_norm: 0.8813831804497168, iteration: 314643
loss: 0.9947596192359924,grad_norm: 0.7235062948357558, iteration: 314644
loss: 1.0122275352478027,grad_norm: 0.7051015422482791, iteration: 314645
loss: 1.037842869758606,grad_norm: 0.9999990940703152, iteration: 314646
loss: 1.0155165195465088,grad_norm: 0.9999996979350196, iteration: 314647
loss: 0.9833023548126221,grad_norm: 0.8441982142643838, iteration: 314648
loss: 1.0149023532867432,grad_norm: 0.7259312057250007, iteration: 314649
loss: 1.0002310276031494,grad_norm: 0.966532361550227, iteration: 314650
loss: 0.9910798668861389,grad_norm: 0.750831457708476, iteration: 314651
loss: 0.973302960395813,grad_norm: 0.9004391846614221, iteration: 314652
loss: 1.0102298259735107,grad_norm: 0.9999991017824003, iteration: 314653
loss: 0.9949246048927307,grad_norm: 0.8301435451637622, iteration: 314654
loss: 0.9512008428573608,grad_norm: 0.933013549251032, iteration: 314655
loss: 1.0016117095947266,grad_norm: 0.9134671969777082, iteration: 314656
loss: 0.9806930422782898,grad_norm: 0.9511813463997412, iteration: 314657
loss: 1.012752890586853,grad_norm: 0.8442117793504309, iteration: 314658
loss: 1.0370644330978394,grad_norm: 0.9999989690512772, iteration: 314659
loss: 0.9984780550003052,grad_norm: 0.992123333207722, iteration: 314660
loss: 1.0918128490447998,grad_norm: 0.9999997889593757, iteration: 314661
loss: 1.0779674053192139,grad_norm: 0.9999992439994733, iteration: 314662
loss: 1.0607331991195679,grad_norm: 0.9999998125047354, iteration: 314663
loss: 0.9979677796363831,grad_norm: 0.82527554402259, iteration: 314664
loss: 1.0199875831604004,grad_norm: 0.9999993219715698, iteration: 314665
loss: 1.0003530979156494,grad_norm: 0.9999997633992938, iteration: 314666
loss: 0.9934114217758179,grad_norm: 0.7934738905262088, iteration: 314667
loss: 0.9843490123748779,grad_norm: 0.923876636620219, iteration: 314668
loss: 1.0281907320022583,grad_norm: 0.7847975874255873, iteration: 314669
loss: 0.993470311164856,grad_norm: 0.9999999810442407, iteration: 314670
loss: 1.0026919841766357,grad_norm: 0.8248274802905846, iteration: 314671
loss: 1.0318759679794312,grad_norm: 0.8881141715638462, iteration: 314672
loss: 1.0236515998840332,grad_norm: 0.99999939097714, iteration: 314673
loss: 1.0022014379501343,grad_norm: 0.8013830674394258, iteration: 314674
loss: 1.0591094493865967,grad_norm: 0.9999992392105576, iteration: 314675
loss: 0.989558756351471,grad_norm: 0.6481723461134915, iteration: 314676
loss: 0.9913225769996643,grad_norm: 0.84102952996188, iteration: 314677
loss: 0.9848958849906921,grad_norm: 0.8298021232971915, iteration: 314678
loss: 1.002001166343689,grad_norm: 0.8501520421790929, iteration: 314679
loss: 1.0306299924850464,grad_norm: 0.7774373705850475, iteration: 314680
loss: 1.0108283758163452,grad_norm: 0.98030622188795, iteration: 314681
loss: 0.9866951107978821,grad_norm: 0.9122239566424873, iteration: 314682
loss: 0.9918597340583801,grad_norm: 0.7563308567246199, iteration: 314683
loss: 0.9868392944335938,grad_norm: 0.8201053205806453, iteration: 314684
loss: 1.0308090448379517,grad_norm: 0.9999990992783825, iteration: 314685
loss: 1.0470337867736816,grad_norm: 0.9999995542596178, iteration: 314686
loss: 0.9923960566520691,grad_norm: 0.7097232383308794, iteration: 314687
loss: 0.9924482107162476,grad_norm: 0.7439681834859824, iteration: 314688
loss: 0.970058262348175,grad_norm: 0.7861055090711213, iteration: 314689
loss: 1.0058882236480713,grad_norm: 0.9999995129111401, iteration: 314690
loss: 0.9638581871986389,grad_norm: 0.8780437170463429, iteration: 314691
loss: 0.9883111119270325,grad_norm: 0.8481614178110045, iteration: 314692
loss: 1.0272623300552368,grad_norm: 0.7517290738976415, iteration: 314693
loss: 1.0905072689056396,grad_norm: 0.9999991381892092, iteration: 314694
loss: 0.9941741824150085,grad_norm: 0.9337898562472853, iteration: 314695
loss: 0.9901093244552612,grad_norm: 0.8088543097115384, iteration: 314696
loss: 1.033911108970642,grad_norm: 0.7532724432385297, iteration: 314697
loss: 1.014995813369751,grad_norm: 0.8040280374467286, iteration: 314698
loss: 1.021590232849121,grad_norm: 0.7993832959254774, iteration: 314699
loss: 1.013771891593933,grad_norm: 0.717809350162893, iteration: 314700
loss: 1.216926097869873,grad_norm: 0.9999998217525565, iteration: 314701
loss: 0.9861854910850525,grad_norm: 0.8170761550004523, iteration: 314702
loss: 1.0141096115112305,grad_norm: 0.8455066424680777, iteration: 314703
loss: 0.9792122840881348,grad_norm: 0.9212131790918872, iteration: 314704
loss: 0.9995901584625244,grad_norm: 0.9294148706687326, iteration: 314705
loss: 1.03010094165802,grad_norm: 0.8696823439636509, iteration: 314706
loss: 1.0000026226043701,grad_norm: 0.8676023221549426, iteration: 314707
loss: 1.019665002822876,grad_norm: 0.876683156874662, iteration: 314708
loss: 0.973003625869751,grad_norm: 0.8391180529379566, iteration: 314709
loss: 1.029687762260437,grad_norm: 0.8344792465166966, iteration: 314710
loss: 0.996873140335083,grad_norm: 0.8380953107624877, iteration: 314711
loss: 1.1425195932388306,grad_norm: 0.9999997208795881, iteration: 314712
loss: 0.9792468547821045,grad_norm: 0.7990349331644568, iteration: 314713
loss: 0.9755479693412781,grad_norm: 0.8423904898165778, iteration: 314714
loss: 0.9996392726898193,grad_norm: 0.9964621687657532, iteration: 314715
loss: 1.0126358270645142,grad_norm: 0.8239169985893421, iteration: 314716
loss: 1.1293293237686157,grad_norm: 0.9999998342725047, iteration: 314717
loss: 0.9919916987419128,grad_norm: 0.920003331312803, iteration: 314718
loss: 1.1028550863265991,grad_norm: 0.9999995951463808, iteration: 314719
loss: 1.0027556419372559,grad_norm: 0.8927393602131596, iteration: 314720
loss: 0.9737857580184937,grad_norm: 0.9999993770700113, iteration: 314721
loss: 1.032790184020996,grad_norm: 0.9216692516452704, iteration: 314722
loss: 1.022026777267456,grad_norm: 0.7733387609039514, iteration: 314723
loss: 1.0039969682693481,grad_norm: 0.8123787008491793, iteration: 314724
loss: 1.0216937065124512,grad_norm: 0.8633348056856658, iteration: 314725
loss: 1.0183236598968506,grad_norm: 0.8353605377723778, iteration: 314726
loss: 1.0395786762237549,grad_norm: 0.9999993784487198, iteration: 314727
loss: 1.023949384689331,grad_norm: 0.9025119568154113, iteration: 314728
loss: 0.9983046054840088,grad_norm: 0.9999991828028758, iteration: 314729
loss: 0.9780986309051514,grad_norm: 0.9087177202579675, iteration: 314730
loss: 0.983900249004364,grad_norm: 0.8251154142463882, iteration: 314731
loss: 1.0044246912002563,grad_norm: 0.7538184971995259, iteration: 314732
loss: 1.0047003030776978,grad_norm: 0.911316499849214, iteration: 314733
loss: 1.0072892904281616,grad_norm: 0.9999992925020166, iteration: 314734
loss: 0.9899258017539978,grad_norm: 0.8646998371411801, iteration: 314735
loss: 1.013136625289917,grad_norm: 0.8132037122507391, iteration: 314736
loss: 1.0197055339813232,grad_norm: 0.8592014216718362, iteration: 314737
loss: 0.9872900247573853,grad_norm: 0.7032769446390903, iteration: 314738
loss: 1.0122272968292236,grad_norm: 0.8548330218927966, iteration: 314739
loss: 0.9994716644287109,grad_norm: 0.9111507749298171, iteration: 314740
loss: 0.9883785843849182,grad_norm: 0.8314525287103471, iteration: 314741
loss: 0.999711811542511,grad_norm: 0.9999989983544362, iteration: 314742
loss: 1.0115152597427368,grad_norm: 0.9219270781692602, iteration: 314743
loss: 0.9939369559288025,grad_norm: 0.9999990353695771, iteration: 314744
loss: 0.9979901909828186,grad_norm: 0.7874938365004839, iteration: 314745
loss: 1.0175191164016724,grad_norm: 0.8345268323638619, iteration: 314746
loss: 0.9877331852912903,grad_norm: 0.7232581580558717, iteration: 314747
loss: 0.9932547807693481,grad_norm: 0.9399770974440005, iteration: 314748
loss: 0.9719506502151489,grad_norm: 0.8972247763095925, iteration: 314749
loss: 1.0004736185073853,grad_norm: 0.8388997369213417, iteration: 314750
loss: 1.0014008283615112,grad_norm: 0.951839884550424, iteration: 314751
loss: 1.0112682580947876,grad_norm: 0.7866346710661961, iteration: 314752
loss: 1.0452980995178223,grad_norm: 0.885773900718988, iteration: 314753
loss: 1.0017281770706177,grad_norm: 0.8191998091187379, iteration: 314754
loss: 1.015493631362915,grad_norm: 0.9995317552199112, iteration: 314755
loss: 0.9688621759414673,grad_norm: 0.6689509988585601, iteration: 314756
loss: 0.9970393180847168,grad_norm: 0.8791937985024592, iteration: 314757
loss: 1.0049304962158203,grad_norm: 0.8491284765053693, iteration: 314758
loss: 0.9708718657493591,grad_norm: 0.795301635180349, iteration: 314759
loss: 0.9442474246025085,grad_norm: 0.8641934921186163, iteration: 314760
loss: 1.0199590921401978,grad_norm: 0.8511535927846624, iteration: 314761
loss: 0.9845547676086426,grad_norm: 0.8073100708888759, iteration: 314762
loss: 1.0142818689346313,grad_norm: 0.8705544211546281, iteration: 314763
loss: 0.9630646109580994,grad_norm: 0.9332649480793286, iteration: 314764
loss: 0.991913378238678,grad_norm: 0.8655888567001407, iteration: 314765
loss: 1.0256402492523193,grad_norm: 0.8222908316642129, iteration: 314766
loss: 1.0009033679962158,grad_norm: 0.781523264388861, iteration: 314767
loss: 0.9808377027511597,grad_norm: 0.8316669147612835, iteration: 314768
loss: 0.9557269215583801,grad_norm: 0.8528514629439602, iteration: 314769
loss: 1.0075275897979736,grad_norm: 0.7456036893534361, iteration: 314770
loss: 1.001351237297058,grad_norm: 0.9142386467928041, iteration: 314771
loss: 0.9975436925888062,grad_norm: 0.9909294427937825, iteration: 314772
loss: 1.0212916135787964,grad_norm: 0.9787275399501149, iteration: 314773
loss: 0.9845112562179565,grad_norm: 0.9999989585948406, iteration: 314774
loss: 1.0321320295333862,grad_norm: 0.7995428024462656, iteration: 314775
loss: 1.0255926847457886,grad_norm: 0.8687143760744022, iteration: 314776
loss: 0.9830957055091858,grad_norm: 0.8412523756214735, iteration: 314777
loss: 0.9814683794975281,grad_norm: 0.8603900106375957, iteration: 314778
loss: 0.9898172616958618,grad_norm: 0.9731240685455806, iteration: 314779
loss: 1.0066032409667969,grad_norm: 0.7827749287055846, iteration: 314780
loss: 0.986430287361145,grad_norm: 0.8413282678297888, iteration: 314781
loss: 1.0622239112854004,grad_norm: 0.9293074889416123, iteration: 314782
loss: 0.9855585694313049,grad_norm: 0.8801655522020566, iteration: 314783
loss: 0.9705471992492676,grad_norm: 0.9999997322030477, iteration: 314784
loss: 0.9709486365318298,grad_norm: 0.7745310845722894, iteration: 314785
loss: 0.9756159782409668,grad_norm: 0.7561003328462481, iteration: 314786
loss: 0.9902948141098022,grad_norm: 0.8544615570354958, iteration: 314787
loss: 0.984552264213562,grad_norm: 0.8686641053217765, iteration: 314788
loss: 1.0311189889907837,grad_norm: 0.9999998322392037, iteration: 314789
loss: 1.0481910705566406,grad_norm: 1.0000000412952876, iteration: 314790
loss: 1.0325465202331543,grad_norm: 0.7947731353736707, iteration: 314791
loss: 0.9884324073791504,grad_norm: 0.8266451470267708, iteration: 314792
loss: 1.029124140739441,grad_norm: 0.8689314922223906, iteration: 314793
loss: 1.0469552278518677,grad_norm: 0.9999999269201697, iteration: 314794
loss: 1.0097565650939941,grad_norm: 0.7683742191294544, iteration: 314795
loss: 1.0350067615509033,grad_norm: 0.999999655287998, iteration: 314796
loss: 1.0727834701538086,grad_norm: 0.9999998064287956, iteration: 314797
loss: 1.0241804122924805,grad_norm: 0.9618830320391379, iteration: 314798
loss: 1.0092371702194214,grad_norm: 0.9478422788653138, iteration: 314799
loss: 0.9933949708938599,grad_norm: 0.7870062919276096, iteration: 314800
loss: 0.9998894929885864,grad_norm: 0.9999999327227722, iteration: 314801
loss: 1.0026416778564453,grad_norm: 0.8215309805124682, iteration: 314802
loss: 0.9920812845230103,grad_norm: 0.8004728148921303, iteration: 314803
loss: 1.0068162679672241,grad_norm: 0.817313126473591, iteration: 314804
loss: 0.9919602870941162,grad_norm: 0.903885428040798, iteration: 314805
loss: 0.9921676516532898,grad_norm: 0.8029446219967028, iteration: 314806
loss: 0.9958714246749878,grad_norm: 0.9999997028857518, iteration: 314807
loss: 0.9731916785240173,grad_norm: 0.7142264012112245, iteration: 314808
loss: 1.0359206199645996,grad_norm: 0.8372692274061834, iteration: 314809
loss: 1.0140396356582642,grad_norm: 0.8226782394270299, iteration: 314810
loss: 0.9657968282699585,grad_norm: 0.7292935468220585, iteration: 314811
loss: 1.0196831226348877,grad_norm: 0.8469125479162535, iteration: 314812
loss: 1.0341917276382446,grad_norm: 0.9999992341062364, iteration: 314813
loss: 1.0626593828201294,grad_norm: 0.9999995772971498, iteration: 314814
loss: 0.9790297150611877,grad_norm: 0.7705474867058572, iteration: 314815
loss: 0.9850635528564453,grad_norm: 0.8200371051090282, iteration: 314816
loss: 0.9891246557235718,grad_norm: 0.9948727052607236, iteration: 314817
loss: 1.012591004371643,grad_norm: 0.8133624422285094, iteration: 314818
loss: 1.0420128107070923,grad_norm: 0.9999990854988001, iteration: 314819
loss: 1.0189785957336426,grad_norm: 0.9999997367267456, iteration: 314820
loss: 1.0197762250900269,grad_norm: 0.8023710908485174, iteration: 314821
loss: 1.0287195444107056,grad_norm: 0.9999990795790706, iteration: 314822
loss: 1.0549986362457275,grad_norm: 0.999999547808301, iteration: 314823
loss: 1.0126639604568481,grad_norm: 0.7532066083994339, iteration: 314824
loss: 1.04570734500885,grad_norm: 0.8430627898919693, iteration: 314825
loss: 1.0101804733276367,grad_norm: 0.7684512162534188, iteration: 314826
loss: 1.0512944459915161,grad_norm: 0.9999998670404263, iteration: 314827
loss: 0.9766587615013123,grad_norm: 0.960111969484509, iteration: 314828
loss: 1.0036717653274536,grad_norm: 0.820844084838392, iteration: 314829
loss: 0.9973441362380981,grad_norm: 0.7841174455688917, iteration: 314830
loss: 1.0059815645217896,grad_norm: 0.8269970175871126, iteration: 314831
loss: 0.9775481224060059,grad_norm: 0.9163886503831157, iteration: 314832
loss: 1.0170907974243164,grad_norm: 0.7890145953752787, iteration: 314833
loss: 1.009526252746582,grad_norm: 0.8412338045852488, iteration: 314834
loss: 1.0676367282867432,grad_norm: 0.9999998740609968, iteration: 314835
loss: 1.0049318075180054,grad_norm: 0.9506749107694877, iteration: 314836
loss: 1.0165166854858398,grad_norm: 0.9999994244594818, iteration: 314837
loss: 0.9626144170761108,grad_norm: 0.8872286975242929, iteration: 314838
loss: 1.1065765619277954,grad_norm: 0.813281772392348, iteration: 314839
loss: 1.0303994417190552,grad_norm: 0.9999992339995162, iteration: 314840
loss: 1.0066617727279663,grad_norm: 0.8504548988514451, iteration: 314841
loss: 0.9861831068992615,grad_norm: 0.9999992222561265, iteration: 314842
loss: 1.0108033418655396,grad_norm: 0.9004145727405097, iteration: 314843
loss: 1.0029473304748535,grad_norm: 0.9253099342645116, iteration: 314844
loss: 0.996642529964447,grad_norm: 0.7911487877703457, iteration: 314845
loss: 1.0257303714752197,grad_norm: 0.8766488140454561, iteration: 314846
loss: 0.9886263012886047,grad_norm: 0.9999990832278715, iteration: 314847
loss: 1.028635025024414,grad_norm: 0.852426652856674, iteration: 314848
loss: 1.034201741218567,grad_norm: 0.8374917314819189, iteration: 314849
loss: 0.9580236673355103,grad_norm: 0.7517866014747409, iteration: 314850
loss: 0.952490508556366,grad_norm: 0.8763165420501765, iteration: 314851
loss: 0.971217930316925,grad_norm: 0.84477600091146, iteration: 314852
loss: 1.0958787202835083,grad_norm: 0.9999997567910279, iteration: 314853
loss: 0.9754711389541626,grad_norm: 0.9261690305006514, iteration: 314854
loss: 0.9891389608383179,grad_norm: 0.79831912772675, iteration: 314855
loss: 1.0323972702026367,grad_norm: 0.8664572345738136, iteration: 314856
loss: 0.9871973395347595,grad_norm: 0.8764500202161166, iteration: 314857
loss: 0.9695318937301636,grad_norm: 0.9999990818632672, iteration: 314858
loss: 0.9583543539047241,grad_norm: 0.815717537254504, iteration: 314859
loss: 1.0157488584518433,grad_norm: 0.8292717461538515, iteration: 314860
loss: 1.0072559118270874,grad_norm: 0.7970965321463419, iteration: 314861
loss: 1.0060017108917236,grad_norm: 0.84595827107774, iteration: 314862
loss: 1.0370372533798218,grad_norm: 0.9816836436107328, iteration: 314863
loss: 1.1043826341629028,grad_norm: 0.9999997285369072, iteration: 314864
loss: 1.0705968141555786,grad_norm: 0.9267699783725563, iteration: 314865
loss: 1.0616657733917236,grad_norm: 0.7662333836545363, iteration: 314866
loss: 0.9724242091178894,grad_norm: 0.8045701872335247, iteration: 314867
loss: 0.9926279187202454,grad_norm: 0.8894463143829671, iteration: 314868
loss: 1.0255619287490845,grad_norm: 0.8587034842126376, iteration: 314869
loss: 1.0104366540908813,grad_norm: 0.9999992102224082, iteration: 314870
loss: 0.9932197332382202,grad_norm: 0.9809440830370362, iteration: 314871
loss: 1.021519422531128,grad_norm: 0.9858550510997592, iteration: 314872
loss: 1.0123300552368164,grad_norm: 0.9273880008117685, iteration: 314873
loss: 1.0028495788574219,grad_norm: 0.8664977905249172, iteration: 314874
loss: 0.9948428869247437,grad_norm: 0.9077308648703651, iteration: 314875
loss: 0.9830116033554077,grad_norm: 0.8024576269599456, iteration: 314876
loss: 0.980277955532074,grad_norm: 0.999999130294575, iteration: 314877
loss: 0.9904013276100159,grad_norm: 0.9533908054560413, iteration: 314878
loss: 1.0113928318023682,grad_norm: 0.7998182260202795, iteration: 314879
loss: 1.0049233436584473,grad_norm: 0.9078818980378495, iteration: 314880
loss: 0.9644001126289368,grad_norm: 0.9999990090538019, iteration: 314881
loss: 0.9833452105522156,grad_norm: 0.7368854557310359, iteration: 314882
loss: 0.963937520980835,grad_norm: 0.8570380349661164, iteration: 314883
loss: 1.0681101083755493,grad_norm: 0.9999998237566416, iteration: 314884
loss: 0.9979469180107117,grad_norm: 0.921097343964636, iteration: 314885
loss: 1.043527603149414,grad_norm: 0.9279757925861841, iteration: 314886
loss: 1.0133686065673828,grad_norm: 0.9999991931586573, iteration: 314887
loss: 0.9895056486129761,grad_norm: 0.9999990940659532, iteration: 314888
loss: 1.006266474723816,grad_norm: 0.7269147257663882, iteration: 314889
loss: 1.042999029159546,grad_norm: 0.7214778740350041, iteration: 314890
loss: 0.9981553554534912,grad_norm: 0.7841836393190955, iteration: 314891
loss: 1.05902099609375,grad_norm: 0.9999999266512356, iteration: 314892
loss: 0.9839776158332825,grad_norm: 0.9999991357015218, iteration: 314893
loss: 1.0056507587432861,grad_norm: 0.9999992530174685, iteration: 314894
loss: 0.9839399456977844,grad_norm: 0.7122858162227745, iteration: 314895
loss: 1.0456804037094116,grad_norm: 0.8474004218519645, iteration: 314896
loss: 1.0006439685821533,grad_norm: 0.78871430814639, iteration: 314897
loss: 1.0004695653915405,grad_norm: 0.7795034013641201, iteration: 314898
loss: 1.0024065971374512,grad_norm: 0.8073642373070054, iteration: 314899
loss: 0.9751987457275391,grad_norm: 0.9126896346173085, iteration: 314900
loss: 1.0222408771514893,grad_norm: 0.852063912019707, iteration: 314901
loss: 0.9478347301483154,grad_norm: 0.9931798414022991, iteration: 314902
loss: 1.0089224576950073,grad_norm: 0.999999362022052, iteration: 314903
loss: 1.0974388122558594,grad_norm: 0.9999991167213854, iteration: 314904
loss: 1.0199462175369263,grad_norm: 0.9999991218154314, iteration: 314905
loss: 0.9901744723320007,grad_norm: 0.83808565153132, iteration: 314906
loss: 1.0616976022720337,grad_norm: 0.9999991395849483, iteration: 314907
loss: 0.9827178120613098,grad_norm: 0.9999991842695101, iteration: 314908
loss: 1.0418903827667236,grad_norm: 0.9999995561709395, iteration: 314909
loss: 1.002949833869934,grad_norm: 0.7223051592658204, iteration: 314910
loss: 1.0550457239151,grad_norm: 0.999998996375397, iteration: 314911
loss: 1.1302490234375,grad_norm: 0.9999993296808958, iteration: 314912
loss: 0.9947313666343689,grad_norm: 0.9999992131247195, iteration: 314913
loss: 0.9804192185401917,grad_norm: 0.9329947442711253, iteration: 314914
loss: 0.9989500641822815,grad_norm: 0.7661742091193988, iteration: 314915
loss: 1.0124880075454712,grad_norm: 0.8368550848465657, iteration: 314916
loss: 1.0303913354873657,grad_norm: 0.9999996052246939, iteration: 314917
loss: 1.0708003044128418,grad_norm: 0.9999990551770334, iteration: 314918
loss: 1.020985722541809,grad_norm: 0.708738197887042, iteration: 314919
loss: 0.9874669313430786,grad_norm: 0.7556124902744958, iteration: 314920
loss: 1.04745614528656,grad_norm: 0.9999998375628264, iteration: 314921
loss: 1.029905915260315,grad_norm: 0.9999992684983701, iteration: 314922
loss: 0.9836752414703369,grad_norm: 0.8520341655996198, iteration: 314923
loss: 0.9924105405807495,grad_norm: 0.8764454989015464, iteration: 314924
loss: 0.9877333045005798,grad_norm: 0.6922833847442369, iteration: 314925
loss: 1.0080543756484985,grad_norm: 0.7585784153248414, iteration: 314926
loss: 1.007605791091919,grad_norm: 0.9748646908449098, iteration: 314927
loss: 1.006823182106018,grad_norm: 0.794015976767631, iteration: 314928
loss: 0.9923247694969177,grad_norm: 0.7619361726664396, iteration: 314929
loss: 1.0075820684432983,grad_norm: 0.8642556925839132, iteration: 314930
loss: 0.9744983315467834,grad_norm: 0.8683893457400479, iteration: 314931
loss: 1.0109200477600098,grad_norm: 0.7366189852805484, iteration: 314932
loss: 1.052485704421997,grad_norm: 0.8980597918055294, iteration: 314933
loss: 1.0076818466186523,grad_norm: 0.9999990588153413, iteration: 314934
loss: 0.9575850367546082,grad_norm: 0.7985384711190873, iteration: 314935
loss: 0.972542941570282,grad_norm: 0.9794944459435238, iteration: 314936
loss: 0.9991775155067444,grad_norm: 0.918985289741697, iteration: 314937
loss: 0.993061363697052,grad_norm: 0.9527353482727701, iteration: 314938
loss: 1.0131553411483765,grad_norm: 0.8661029068263913, iteration: 314939
loss: 1.0054600238800049,grad_norm: 0.8104583218922814, iteration: 314940
loss: 0.9982225298881531,grad_norm: 0.9999991241633295, iteration: 314941
loss: 0.9953989386558533,grad_norm: 0.9999991408613, iteration: 314942
loss: 1.0478689670562744,grad_norm: 0.9999990142274385, iteration: 314943
loss: 1.0054335594177246,grad_norm: 0.9999991239075946, iteration: 314944
loss: 1.061077356338501,grad_norm: 0.9999993180352349, iteration: 314945
loss: 0.9911693930625916,grad_norm: 0.8375023991680073, iteration: 314946
loss: 1.0257935523986816,grad_norm: 0.8775711300294847, iteration: 314947
loss: 1.0092923641204834,grad_norm: 0.9225497736193029, iteration: 314948
loss: 0.9633409976959229,grad_norm: 0.8630833661111381, iteration: 314949
loss: 1.0089560747146606,grad_norm: 0.9999991737545504, iteration: 314950
loss: 0.9845103025436401,grad_norm: 0.8190693844616433, iteration: 314951
loss: 1.005314588546753,grad_norm: 0.8655961105409619, iteration: 314952
loss: 0.9806384444236755,grad_norm: 0.700828791555447, iteration: 314953
loss: 1.0139729976654053,grad_norm: 0.9117723593473275, iteration: 314954
loss: 0.982097327709198,grad_norm: 0.7518180765895766, iteration: 314955
loss: 1.0308547019958496,grad_norm: 0.7312740269786792, iteration: 314956
loss: 1.020493745803833,grad_norm: 0.942820280130475, iteration: 314957
loss: 1.0287535190582275,grad_norm: 0.9999990657120099, iteration: 314958
loss: 0.9647806882858276,grad_norm: 0.9500171685083081, iteration: 314959
loss: 1.0313050746917725,grad_norm: 0.7208104031492119, iteration: 314960
loss: 1.0044955015182495,grad_norm: 0.8789336412737747, iteration: 314961
loss: 0.9952283501625061,grad_norm: 0.8360020686333806, iteration: 314962
loss: 0.9777047634124756,grad_norm: 0.7986033521989819, iteration: 314963
loss: 1.0041886568069458,grad_norm: 0.9999989196251806, iteration: 314964
loss: 1.0091701745986938,grad_norm: 0.999999741261778, iteration: 314965
loss: 0.9861783385276794,grad_norm: 0.7622050291112252, iteration: 314966
loss: 1.0029925107955933,grad_norm: 0.9999990062786448, iteration: 314967
loss: 1.022578239440918,grad_norm: 0.8518287287948971, iteration: 314968
loss: 0.9835805892944336,grad_norm: 0.8674782001551355, iteration: 314969
loss: 0.9792077541351318,grad_norm: 0.8315426418176637, iteration: 314970
loss: 0.9880523681640625,grad_norm: 0.8059063861125664, iteration: 314971
loss: 1.0609426498413086,grad_norm: 0.9999995495935174, iteration: 314972
loss: 0.9952149391174316,grad_norm: 0.8625542893987876, iteration: 314973
loss: 0.9979919195175171,grad_norm: 0.8335252017155586, iteration: 314974
loss: 1.0052262544631958,grad_norm: 0.8318899317982508, iteration: 314975
loss: 1.0044562816619873,grad_norm: 0.7272376455526756, iteration: 314976
loss: 1.0325111150741577,grad_norm: 0.8263389243433648, iteration: 314977
loss: 1.0134109258651733,grad_norm: 0.9039947514227629, iteration: 314978
loss: 0.9865832924842834,grad_norm: 0.999999199659429, iteration: 314979
loss: 0.9767604470252991,grad_norm: 0.9385925453425527, iteration: 314980
loss: 1.0364989042282104,grad_norm: 0.9999991022740916, iteration: 314981
loss: 0.943549394607544,grad_norm: 0.9516899720519468, iteration: 314982
loss: 1.0340341329574585,grad_norm: 0.6903947544209048, iteration: 314983
loss: 1.0183085203170776,grad_norm: 0.9999990983221136, iteration: 314984
loss: 1.0205498933792114,grad_norm: 0.8834400583600223, iteration: 314985
loss: 0.99188232421875,grad_norm: 0.8156668229945684, iteration: 314986
loss: 1.0016558170318604,grad_norm: 0.9999991781305239, iteration: 314987
loss: 0.9631644487380981,grad_norm: 0.9513946684494357, iteration: 314988
loss: 0.9847402572631836,grad_norm: 0.8054415174059506, iteration: 314989
loss: 1.0027976036071777,grad_norm: 0.9389252559685219, iteration: 314990
loss: 0.9866790771484375,grad_norm: 0.9230806315953995, iteration: 314991
loss: 1.0227397680282593,grad_norm: 0.9999990044118924, iteration: 314992
loss: 0.953224241733551,grad_norm: 0.8615990205624663, iteration: 314993
loss: 1.03779935836792,grad_norm: 0.8670973994502679, iteration: 314994
loss: 1.0181176662445068,grad_norm: 0.8519431446955886, iteration: 314995
loss: 1.002713918685913,grad_norm: 0.9999992260632046, iteration: 314996
loss: 0.9804311394691467,grad_norm: 0.862451754339969, iteration: 314997
loss: 0.9864909052848816,grad_norm: 0.8776158353498604, iteration: 314998
loss: 0.9771888256072998,grad_norm: 0.8092250729966248, iteration: 314999
loss: 0.988970935344696,grad_norm: 0.8627086807741203, iteration: 315000
loss: 1.0043503046035767,grad_norm: 0.9133710618967185, iteration: 315001
loss: 0.9853419065475464,grad_norm: 0.7508186268745555, iteration: 315002
loss: 1.0050283670425415,grad_norm: 0.9999998293615663, iteration: 315003
loss: 0.9960779547691345,grad_norm: 0.9999992877282923, iteration: 315004
loss: 1.0206831693649292,grad_norm: 0.8038213389011422, iteration: 315005
loss: 1.000333309173584,grad_norm: 0.8440620010686302, iteration: 315006
loss: 1.0336586236953735,grad_norm: 0.9307860315513635, iteration: 315007
loss: 0.9479639530181885,grad_norm: 0.9999991288015689, iteration: 315008
loss: 1.0539051294326782,grad_norm: 0.9999996181941767, iteration: 315009
loss: 1.0170258283615112,grad_norm: 0.7758990874709399, iteration: 315010
loss: 0.9650770425796509,grad_norm: 0.9801785426535147, iteration: 315011
loss: 0.9868667721748352,grad_norm: 0.7854787760302152, iteration: 315012
loss: 1.0125751495361328,grad_norm: 0.8288273825772755, iteration: 315013
loss: 0.9912413358688354,grad_norm: 0.8838368568150308, iteration: 315014
loss: 1.0103999376296997,grad_norm: 0.999999768075035, iteration: 315015
loss: 0.950505256652832,grad_norm: 0.7726493837676711, iteration: 315016
loss: 1.012745976448059,grad_norm: 0.9361105535587144, iteration: 315017
loss: 0.9687853455543518,grad_norm: 0.9999990769968254, iteration: 315018
loss: 0.9781543612480164,grad_norm: 0.75383047352383, iteration: 315019
loss: 1.0080288648605347,grad_norm: 0.99999906132841, iteration: 315020
loss: 0.9894818067550659,grad_norm: 0.89109381934076, iteration: 315021
loss: 0.9864676594734192,grad_norm: 0.7874435924233787, iteration: 315022
loss: 0.990708589553833,grad_norm: 0.8332826038518268, iteration: 315023
loss: 0.9670860767364502,grad_norm: 0.8767434186255989, iteration: 315024
loss: 0.9884125590324402,grad_norm: 0.8350141209909963, iteration: 315025
loss: 1.0072362422943115,grad_norm: 0.7716473749836555, iteration: 315026
loss: 0.978298008441925,grad_norm: 0.7832969346498405, iteration: 315027
loss: 0.994653046131134,grad_norm: 0.9999991780389592, iteration: 315028
loss: 1.0060508251190186,grad_norm: 0.7823557295465832, iteration: 315029
loss: 1.050585389137268,grad_norm: 0.8647836661353598, iteration: 315030
loss: 1.0110013484954834,grad_norm: 0.9999996995387542, iteration: 315031
loss: 1.0101900100708008,grad_norm: 0.7974735286637066, iteration: 315032
loss: 0.9464852809906006,grad_norm: 0.781686344427862, iteration: 315033
loss: 1.0117552280426025,grad_norm: 0.8184392113442414, iteration: 315034
loss: 0.9923705458641052,grad_norm: 0.7492552944485361, iteration: 315035
loss: 1.0530956983566284,grad_norm: 0.9256819408354109, iteration: 315036
loss: 1.0505622625350952,grad_norm: 0.8831179579370062, iteration: 315037
loss: 1.0316967964172363,grad_norm: 0.8257136976505225, iteration: 315038
loss: 1.0092668533325195,grad_norm: 0.9999993313763881, iteration: 315039
loss: 0.9619103074073792,grad_norm: 0.9519159368991937, iteration: 315040
loss: 1.016132116317749,grad_norm: 0.909797010189008, iteration: 315041
loss: 0.9969682693481445,grad_norm: 0.8648848111925764, iteration: 315042
loss: 0.9914611577987671,grad_norm: 0.8637672464675274, iteration: 315043
loss: 1.02358078956604,grad_norm: 0.7614434258755776, iteration: 315044
loss: 1.0257946252822876,grad_norm: 0.851320975882191, iteration: 315045
loss: 1.0161288976669312,grad_norm: 0.9381326688967169, iteration: 315046
loss: 1.0046837329864502,grad_norm: 0.8823725536741013, iteration: 315047
loss: 0.980783224105835,grad_norm: 0.8077564653599616, iteration: 315048
loss: 1.0119450092315674,grad_norm: 0.8445662038556684, iteration: 315049
loss: 0.9474241137504578,grad_norm: 0.9716877411429522, iteration: 315050
loss: 1.0100717544555664,grad_norm: 0.991128742809541, iteration: 315051
loss: 1.0132217407226562,grad_norm: 0.8554457810752427, iteration: 315052
loss: 0.9665984511375427,grad_norm: 0.788769786544221, iteration: 315053
loss: 1.0189266204833984,grad_norm: 0.9152792906029965, iteration: 315054
loss: 1.00981867313385,grad_norm: 0.7687262965073703, iteration: 315055
loss: 1.0066324472427368,grad_norm: 0.8679412567418916, iteration: 315056
loss: 0.9520403146743774,grad_norm: 0.9999995358967989, iteration: 315057
loss: 0.9957777261734009,grad_norm: 0.9876852619529582, iteration: 315058
loss: 0.9948886036872864,grad_norm: 0.6577557743384006, iteration: 315059
loss: 0.9869006276130676,grad_norm: 0.8909943573285689, iteration: 315060
loss: 1.053235650062561,grad_norm: 0.8187147611064268, iteration: 315061
loss: 1.0043586492538452,grad_norm: 0.7967638314755097, iteration: 315062
loss: 0.9733228087425232,grad_norm: 0.7546149342265576, iteration: 315063
loss: 1.0121800899505615,grad_norm: 0.817957209696214, iteration: 315064
loss: 0.9986076951026917,grad_norm: 0.877390169981725, iteration: 315065
loss: 1.018951654434204,grad_norm: 0.7684997085761802, iteration: 315066
loss: 1.0105048418045044,grad_norm: 0.9126264368771886, iteration: 315067
loss: 1.0277096033096313,grad_norm: 1.0000000242531581, iteration: 315068
loss: 1.0304348468780518,grad_norm: 0.9147107701332551, iteration: 315069
loss: 0.9945734739303589,grad_norm: 0.8877616606788765, iteration: 315070
loss: 1.0041662454605103,grad_norm: 0.823456730050309, iteration: 315071
loss: 1.0284796953201294,grad_norm: 0.9651126042753301, iteration: 315072
loss: 0.9755350351333618,grad_norm: 0.815733449255968, iteration: 315073
loss: 1.0029562711715698,grad_norm: 0.7521063905657427, iteration: 315074
loss: 0.985983669757843,grad_norm: 0.8687095702574101, iteration: 315075
loss: 1.0152537822723389,grad_norm: 0.9999991311357503, iteration: 315076
loss: 0.9771378040313721,grad_norm: 0.9295996915630015, iteration: 315077
loss: 1.0118076801300049,grad_norm: 0.7304927156496213, iteration: 315078
loss: 0.9983038306236267,grad_norm: 0.8796499846458631, iteration: 315079
loss: 1.0275863409042358,grad_norm: 0.8538361670746928, iteration: 315080
loss: 1.0355709791183472,grad_norm: 0.7362808530667101, iteration: 315081
loss: 0.9752100706100464,grad_norm: 0.8536715438980327, iteration: 315082
loss: 0.9981172680854797,grad_norm: 0.9271809852325519, iteration: 315083
loss: 1.0168368816375732,grad_norm: 0.7349097041123283, iteration: 315084
loss: 0.9776008129119873,grad_norm: 0.6701852535610356, iteration: 315085
loss: 1.0008450746536255,grad_norm: 0.7459918774518806, iteration: 315086
loss: 0.9879975318908691,grad_norm: 0.9999990987738417, iteration: 315087
loss: 1.0320371389389038,grad_norm: 0.9514855834541238, iteration: 315088
loss: 1.0666214227676392,grad_norm: 0.9999991221927607, iteration: 315089
loss: 1.0175832509994507,grad_norm: 0.8750116710470891, iteration: 315090
loss: 1.0212993621826172,grad_norm: 0.9999990793247202, iteration: 315091
loss: 1.0105911493301392,grad_norm: 0.8050254986588656, iteration: 315092
loss: 1.0434002876281738,grad_norm: 0.8272201714402108, iteration: 315093
loss: 0.9965965151786804,grad_norm: 0.761832680831489, iteration: 315094
loss: 1.0108189582824707,grad_norm: 0.999999568262261, iteration: 315095
loss: 1.0128262042999268,grad_norm: 0.9999996970911543, iteration: 315096
loss: 0.9896946549415588,grad_norm: 0.7442130569902694, iteration: 315097
loss: 0.9745185375213623,grad_norm: 0.8719342674606043, iteration: 315098
loss: 1.0752993822097778,grad_norm: 0.9999998766353774, iteration: 315099
loss: 0.9968885183334351,grad_norm: 0.6968461753654775, iteration: 315100
loss: 1.1324700117111206,grad_norm: 0.8580854032625842, iteration: 315101
loss: 0.9536766409873962,grad_norm: 0.9258940140659044, iteration: 315102
loss: 1.0561267137527466,grad_norm: 0.9999991119965684, iteration: 315103
loss: 0.9853318333625793,grad_norm: 0.8308936306632875, iteration: 315104
loss: 0.9750692844390869,grad_norm: 0.9473814888823249, iteration: 315105
loss: 0.9907132983207703,grad_norm: 0.8574906854541336, iteration: 315106
loss: 1.0207610130310059,grad_norm: 0.9211295574551349, iteration: 315107
loss: 0.9698413014411926,grad_norm: 0.7780536856349461, iteration: 315108
loss: 0.959378182888031,grad_norm: 0.9600951796995087, iteration: 315109
loss: 1.0026330947875977,grad_norm: 0.7756876806338094, iteration: 315110
loss: 1.036228895187378,grad_norm: 0.9999999432654998, iteration: 315111
loss: 0.959344744682312,grad_norm: 0.6714062783750093, iteration: 315112
loss: 0.9887202978134155,grad_norm: 0.7648822821516282, iteration: 315113
loss: 1.0157800912857056,grad_norm: 0.9999996036332466, iteration: 315114
loss: 1.0183180570602417,grad_norm: 0.8679061261074904, iteration: 315115
loss: 1.0426141023635864,grad_norm: 0.8521780606906226, iteration: 315116
loss: 1.009297251701355,grad_norm: 0.7798663788378928, iteration: 315117
loss: 0.9992843270301819,grad_norm: 0.8260405145803597, iteration: 315118
loss: 1.0145331621170044,grad_norm: 0.9277019209727974, iteration: 315119
loss: 0.9960662722587585,grad_norm: 0.8687741833641975, iteration: 315120
loss: 0.9870717525482178,grad_norm: 0.7719046374848355, iteration: 315121
loss: 1.0159673690795898,grad_norm: 0.8377670589927995, iteration: 315122
loss: 1.0316336154937744,grad_norm: 0.9775762534869361, iteration: 315123
loss: 1.045793056488037,grad_norm: 0.8374521254426543, iteration: 315124
loss: 1.0067720413208008,grad_norm: 0.851222873836792, iteration: 315125
loss: 1.0135313272476196,grad_norm: 0.9514986757482023, iteration: 315126
loss: 0.9693925976753235,grad_norm: 0.9137713850179282, iteration: 315127
loss: 0.9962900876998901,grad_norm: 0.8314583647840456, iteration: 315128
loss: 0.9826353788375854,grad_norm: 0.7561269424028139, iteration: 315129
loss: 0.9990013241767883,grad_norm: 0.959907101327519, iteration: 315130
loss: 1.0275917053222656,grad_norm: 0.9882241292080819, iteration: 315131
loss: 0.9976757168769836,grad_norm: 0.8358556819049674, iteration: 315132
loss: 1.012700080871582,grad_norm: 0.7456390984224043, iteration: 315133
loss: 1.098854422569275,grad_norm: 1.0000000402418785, iteration: 315134
loss: 0.9884535670280457,grad_norm: 0.9441831327610702, iteration: 315135
loss: 0.991602897644043,grad_norm: 0.8747034097497918, iteration: 315136
loss: 1.032721757888794,grad_norm: 0.8998919171572506, iteration: 315137
loss: 1.0031280517578125,grad_norm: 0.8219119678159107, iteration: 315138
loss: 1.0471009016036987,grad_norm: 0.7197000144717719, iteration: 315139
loss: 1.018040657043457,grad_norm: 0.7861053065026037, iteration: 315140
loss: 0.9717521071434021,grad_norm: 0.7574140466790964, iteration: 315141
loss: 1.026413083076477,grad_norm: 0.9999993123041233, iteration: 315142
loss: 1.0072534084320068,grad_norm: 0.9999991373243043, iteration: 315143
loss: 0.9687522053718567,grad_norm: 0.771694781319518, iteration: 315144
loss: 1.051918625831604,grad_norm: 0.8012135022031242, iteration: 315145
loss: 1.0019729137420654,grad_norm: 0.8747510014420596, iteration: 315146
loss: 1.0382033586502075,grad_norm: 0.6747953739538392, iteration: 315147
loss: 0.9981217384338379,grad_norm: 0.7890538234495119, iteration: 315148
loss: 1.003089189529419,grad_norm: 0.7998676249347003, iteration: 315149
loss: 0.9917960166931152,grad_norm: 0.9999994902302471, iteration: 315150
loss: 0.9979115128517151,grad_norm: 0.7105073158264097, iteration: 315151
loss: 0.9845591187477112,grad_norm: 0.8302135264407844, iteration: 315152
loss: 1.0038093328475952,grad_norm: 0.9444390392538042, iteration: 315153
loss: 0.9757981896400452,grad_norm: 0.7136016855323964, iteration: 315154
loss: 0.968200147151947,grad_norm: 0.8888569772132245, iteration: 315155
loss: 0.984398365020752,grad_norm: 0.8520501547843858, iteration: 315156
loss: 1.0284422636032104,grad_norm: 0.8775201470346052, iteration: 315157
loss: 0.9872967004776001,grad_norm: 0.9999992199315288, iteration: 315158
loss: 0.9601954817771912,grad_norm: 0.6846441864814011, iteration: 315159
loss: 0.9772675633430481,grad_norm: 0.8782642958412166, iteration: 315160
loss: 0.9961247444152832,grad_norm: 0.9999999146817837, iteration: 315161
loss: 1.0409789085388184,grad_norm: 0.8214617707338229, iteration: 315162
loss: 0.9968972206115723,grad_norm: 0.7383864424359193, iteration: 315163
loss: 0.9675975441932678,grad_norm: 0.8046408030770869, iteration: 315164
loss: 1.0297276973724365,grad_norm: 0.9999999392511555, iteration: 315165
loss: 1.0686841011047363,grad_norm: 0.9999992675626604, iteration: 315166
loss: 1.0476816892623901,grad_norm: 0.6925556774380685, iteration: 315167
loss: 0.9996784925460815,grad_norm: 0.9999991576799216, iteration: 315168
loss: 0.9901830554008484,grad_norm: 0.842497667140845, iteration: 315169
loss: 1.0163891315460205,grad_norm: 0.813693705818808, iteration: 315170
loss: 1.007165551185608,grad_norm: 0.9999996449529039, iteration: 315171
loss: 1.0000355243682861,grad_norm: 0.6339844030756709, iteration: 315172
loss: 1.048076868057251,grad_norm: 0.9999996314176174, iteration: 315173
loss: 1.0539312362670898,grad_norm: 0.9463563108339801, iteration: 315174
loss: 1.1046688556671143,grad_norm: 0.9999992386685352, iteration: 315175
loss: 0.9984953999519348,grad_norm: 0.9999994245991881, iteration: 315176
loss: 0.9965332746505737,grad_norm: 0.8975993558420552, iteration: 315177
loss: 1.0048960447311401,grad_norm: 0.8148166781602754, iteration: 315178
loss: 1.0094631910324097,grad_norm: 0.9675075069438004, iteration: 315179
loss: 1.003213882446289,grad_norm: 0.7946769267184265, iteration: 315180
loss: 1.0146671533584595,grad_norm: 0.9858573035109484, iteration: 315181
loss: 0.9997499585151672,grad_norm: 0.9882080081337496, iteration: 315182
loss: 1.0303272008895874,grad_norm: 0.9999995842293338, iteration: 315183
loss: 1.015913486480713,grad_norm: 0.7570360839586443, iteration: 315184
loss: 1.0717227458953857,grad_norm: 0.8725362288134623, iteration: 315185
loss: 1.0582112073898315,grad_norm: 0.999999195680628, iteration: 315186
loss: 1.0270724296569824,grad_norm: 0.9067633497114862, iteration: 315187
loss: 1.0045841932296753,grad_norm: 0.7970488491892948, iteration: 315188
loss: 1.0662747621536255,grad_norm: 0.9999998318447604, iteration: 315189
loss: 0.9924389719963074,grad_norm: 0.8497688724654893, iteration: 315190
loss: 1.030407190322876,grad_norm: 0.999999196996825, iteration: 315191
loss: 1.0077576637268066,grad_norm: 0.8329122633484921, iteration: 315192
loss: 1.005658507347107,grad_norm: 0.9129266519701033, iteration: 315193
loss: 0.9637021422386169,grad_norm: 0.9999990928358066, iteration: 315194
loss: 0.97735595703125,grad_norm: 0.8196386667365173, iteration: 315195
loss: 0.9655620455741882,grad_norm: 0.9148097390324875, iteration: 315196
loss: 1.0096690654754639,grad_norm: 0.7774267053375306, iteration: 315197
loss: 1.0021239519119263,grad_norm: 0.8464761814601235, iteration: 315198
loss: 0.9977917075157166,grad_norm: 0.9214331710456224, iteration: 315199
loss: 1.0367323160171509,grad_norm: 0.8114238651112889, iteration: 315200
loss: 1.0086960792541504,grad_norm: 0.868252060573497, iteration: 315201
loss: 0.9530220627784729,grad_norm: 0.9046110531417925, iteration: 315202
loss: 1.0408146381378174,grad_norm: 0.9904247483118522, iteration: 315203
loss: 0.9762988090515137,grad_norm: 0.8476063783050274, iteration: 315204
loss: 0.9634990692138672,grad_norm: 0.7810654490789349, iteration: 315205
loss: 1.0128300189971924,grad_norm: 0.7928759190336601, iteration: 315206
loss: 0.9866702556610107,grad_norm: 0.9085171412391171, iteration: 315207
loss: 1.0356289148330688,grad_norm: 0.9999996354677362, iteration: 315208
loss: 1.0097477436065674,grad_norm: 0.9072156574892614, iteration: 315209
loss: 1.0410863161087036,grad_norm: 0.9999992389789372, iteration: 315210
loss: 0.9554495811462402,grad_norm: 0.8594177188249578, iteration: 315211
loss: 0.9927664995193481,grad_norm: 0.7561870230993286, iteration: 315212
loss: 1.0141711235046387,grad_norm: 0.8802051993470167, iteration: 315213
loss: 1.0059783458709717,grad_norm: 0.8936276637777603, iteration: 315214
loss: 0.991191029548645,grad_norm: 0.8690665341508214, iteration: 315215
loss: 0.9976464509963989,grad_norm: 0.8727534126800446, iteration: 315216
loss: 1.0580167770385742,grad_norm: 0.9362076887751781, iteration: 315217
loss: 1.007859706878662,grad_norm: 0.935209985874871, iteration: 315218
loss: 0.9886208176612854,grad_norm: 0.9345473785187335, iteration: 315219
loss: 1.0090872049331665,grad_norm: 0.9231377493331525, iteration: 315220
loss: 1.000954508781433,grad_norm: 0.8969295000663506, iteration: 315221
loss: 0.9627037644386292,grad_norm: 0.958859807373159, iteration: 315222
loss: 1.0435333251953125,grad_norm: 0.9999993976034536, iteration: 315223
loss: 1.0016570091247559,grad_norm: 0.9999991482152707, iteration: 315224
loss: 1.013849139213562,grad_norm: 0.9999993083529842, iteration: 315225
loss: 1.0315392017364502,grad_norm: 0.9999991172426315, iteration: 315226
loss: 0.9654700756072998,grad_norm: 0.8967581969979905, iteration: 315227
loss: 1.004157304763794,grad_norm: 0.8675077189078798, iteration: 315228
loss: 1.0910449028015137,grad_norm: 0.9545103085050015, iteration: 315229
loss: 0.996360182762146,grad_norm: 0.7015064744985576, iteration: 315230
loss: 0.9645535349845886,grad_norm: 0.864127263367299, iteration: 315231
loss: 1.1855685710906982,grad_norm: 0.9999998766489151, iteration: 315232
loss: 0.944995105266571,grad_norm: 0.8920801274181362, iteration: 315233
loss: 1.0016326904296875,grad_norm: 0.9959009853099979, iteration: 315234
loss: 1.0086941719055176,grad_norm: 0.9999994466053693, iteration: 315235
loss: 0.9875501394271851,grad_norm: 0.9999993365430796, iteration: 315236
loss: 0.9698386788368225,grad_norm: 0.9157849313841654, iteration: 315237
loss: 1.004513144493103,grad_norm: 0.9187077097337887, iteration: 315238
loss: 1.0079301595687866,grad_norm: 0.7557847814270101, iteration: 315239
loss: 0.9921445846557617,grad_norm: 0.8639116086283465, iteration: 315240
loss: 1.0069481134414673,grad_norm: 0.9175422401987693, iteration: 315241
loss: 1.0070439577102661,grad_norm: 0.9184029727845697, iteration: 315242
loss: 0.9627907872200012,grad_norm: 0.9999989461490163, iteration: 315243
loss: 1.0219892263412476,grad_norm: 0.9999995135912216, iteration: 315244
loss: 0.997165322303772,grad_norm: 0.8406029005470866, iteration: 315245
loss: 1.0106542110443115,grad_norm: 0.9999997524326898, iteration: 315246
loss: 0.995826005935669,grad_norm: 0.999999699884362, iteration: 315247
loss: 1.025795340538025,grad_norm: 0.8651803309510978, iteration: 315248
loss: 0.9496637582778931,grad_norm: 0.999999332824887, iteration: 315249
loss: 1.0071824789047241,grad_norm: 0.8076991695534522, iteration: 315250
loss: 0.981120765209198,grad_norm: 0.7404554282050597, iteration: 315251
loss: 1.0464651584625244,grad_norm: 0.8121820420138292, iteration: 315252
loss: 1.068204641342163,grad_norm: 0.8885495247575856, iteration: 315253
loss: 1.0030882358551025,grad_norm: 0.9219444222653155, iteration: 315254
loss: 1.0352320671081543,grad_norm: 0.7322637048644106, iteration: 315255
loss: 1.0367618799209595,grad_norm: 0.9942838181772561, iteration: 315256
loss: 1.0221083164215088,grad_norm: 0.7982885016909227, iteration: 315257
loss: 0.9629034399986267,grad_norm: 0.8088418583356726, iteration: 315258
loss: 0.9925002455711365,grad_norm: 0.7717774684665399, iteration: 315259
loss: 0.959250807762146,grad_norm: 0.8701660887545598, iteration: 315260
loss: 0.9959406852722168,grad_norm: 0.7707496763545557, iteration: 315261
loss: 1.0011708736419678,grad_norm: 0.9999996086599767, iteration: 315262
loss: 0.9950078129768372,grad_norm: 0.9999997595714607, iteration: 315263
loss: 0.9841303825378418,grad_norm: 0.7474972434701286, iteration: 315264
loss: 0.9981203079223633,grad_norm: 0.8605257592338552, iteration: 315265
loss: 0.9604499936103821,grad_norm: 0.7820901399509728, iteration: 315266
loss: 1.1199017763137817,grad_norm: 0.9866459126869611, iteration: 315267
loss: 0.9895113110542297,grad_norm: 0.9017904281183535, iteration: 315268
loss: 1.0262939929962158,grad_norm: 0.9999994515017098, iteration: 315269
loss: 1.0082712173461914,grad_norm: 0.9999989807659323, iteration: 315270
loss: 0.9933322668075562,grad_norm: 0.7482909167381925, iteration: 315271
loss: 1.054775595664978,grad_norm: 0.7883329770688619, iteration: 315272
loss: 1.011967658996582,grad_norm: 0.7720702535789825, iteration: 315273
loss: 0.9647845029830933,grad_norm: 0.7849208612162478, iteration: 315274
loss: 1.0390654802322388,grad_norm: 0.9999992377641892, iteration: 315275
loss: 0.9715921878814697,grad_norm: 0.8059159327414969, iteration: 315276
loss: 0.9652521014213562,grad_norm: 0.7798358989616936, iteration: 315277
loss: 0.991109311580658,grad_norm: 0.8762318870160508, iteration: 315278
loss: 1.0082049369812012,grad_norm: 0.8600911671371968, iteration: 315279
loss: 0.9758022427558899,grad_norm: 0.9919916759687311, iteration: 315280
loss: 1.0023338794708252,grad_norm: 0.8088156043210909, iteration: 315281
loss: 1.0146790742874146,grad_norm: 0.9688273478917124, iteration: 315282
loss: 0.9773004651069641,grad_norm: 0.8084520427705804, iteration: 315283
loss: 1.0016118288040161,grad_norm: 0.9037563178382689, iteration: 315284
loss: 0.999470055103302,grad_norm: 0.7369622370566782, iteration: 315285
loss: 1.0260611772537231,grad_norm: 0.811953299004945, iteration: 315286
loss: 1.0131994485855103,grad_norm: 0.903594894628913, iteration: 315287
loss: 0.9810227751731873,grad_norm: 0.880137761867909, iteration: 315288
loss: 1.038170576095581,grad_norm: 0.9999999840348682, iteration: 315289
loss: 1.0071132183074951,grad_norm: 0.8431792771217979, iteration: 315290
loss: 1.0320440530776978,grad_norm: 0.8889361665129927, iteration: 315291
loss: 0.9601632952690125,grad_norm: 0.9969059443835647, iteration: 315292
loss: 0.9902685284614563,grad_norm: 0.7717814687234026, iteration: 315293
loss: 1.0243529081344604,grad_norm: 0.9999990267217561, iteration: 315294
loss: 0.9703229069709778,grad_norm: 0.9561746344347253, iteration: 315295
loss: 0.9862686395645142,grad_norm: 0.8193927708086263, iteration: 315296
loss: 0.9660947918891907,grad_norm: 0.7668878616099629, iteration: 315297
loss: 1.0258963108062744,grad_norm: 0.840353779012723, iteration: 315298
loss: 1.0589860677719116,grad_norm: 0.9932085514829683, iteration: 315299
loss: 0.9865719676017761,grad_norm: 0.9067536662832791, iteration: 315300
loss: 1.0400304794311523,grad_norm: 0.9999991919574391, iteration: 315301
loss: 1.0275564193725586,grad_norm: 0.8361571672785151, iteration: 315302
loss: 1.0020114183425903,grad_norm: 0.8655478228301557, iteration: 315303
loss: 1.019510269165039,grad_norm: 0.8974508547271464, iteration: 315304
loss: 1.0190943479537964,grad_norm: 0.9999994782904348, iteration: 315305
loss: 1.0155956745147705,grad_norm: 0.8066478450056678, iteration: 315306
loss: 0.9912125468254089,grad_norm: 0.7664087645847957, iteration: 315307
loss: 0.9753125905990601,grad_norm: 0.858007847357399, iteration: 315308
loss: 0.9815975427627563,grad_norm: 0.8181923200911999, iteration: 315309
loss: 0.991358757019043,grad_norm: 0.7428269981061744, iteration: 315310
loss: 0.9785134196281433,grad_norm: 0.9753967421881349, iteration: 315311
loss: 1.0070600509643555,grad_norm: 0.830656539148204, iteration: 315312
loss: 0.986794650554657,grad_norm: 0.858696509432977, iteration: 315313
loss: 1.072468638420105,grad_norm: 0.9999991025795546, iteration: 315314
loss: 0.987002968788147,grad_norm: 0.8713258579816959, iteration: 315315
loss: 1.0184674263000488,grad_norm: 0.9999992050874803, iteration: 315316
loss: 1.0039587020874023,grad_norm: 0.859269861741568, iteration: 315317
loss: 0.9762986302375793,grad_norm: 0.8123949036182351, iteration: 315318
loss: 1.03428053855896,grad_norm: 0.999999116564998, iteration: 315319
loss: 0.9971235394477844,grad_norm: 0.8505523480529797, iteration: 315320
loss: 0.9946718215942383,grad_norm: 0.7322589147092418, iteration: 315321
loss: 1.0118991136550903,grad_norm: 0.7880201059024096, iteration: 315322
loss: 0.9951580166816711,grad_norm: 0.8679600793520637, iteration: 315323
loss: 1.0299876928329468,grad_norm: 0.6608975431350935, iteration: 315324
loss: 0.9842261672019958,grad_norm: 0.87945382061161, iteration: 315325
loss: 1.015203595161438,grad_norm: 0.9415018707578552, iteration: 315326
loss: 1.033206582069397,grad_norm: 0.989056135828466, iteration: 315327
loss: 1.0563403367996216,grad_norm: 0.7827877736425296, iteration: 315328
loss: 1.0464962720870972,grad_norm: 0.9999997306344706, iteration: 315329
loss: 0.9789897799491882,grad_norm: 0.875336639820607, iteration: 315330
loss: 1.0097119808197021,grad_norm: 0.8090906767901642, iteration: 315331
loss: 1.0142592191696167,grad_norm: 0.999999049235184, iteration: 315332
loss: 0.998432993888855,grad_norm: 0.9084702242490751, iteration: 315333
loss: 1.0037230253219604,grad_norm: 0.7434956171180463, iteration: 315334
loss: 1.0128109455108643,grad_norm: 0.8340320332579366, iteration: 315335
loss: 1.0430307388305664,grad_norm: 0.999999377824335, iteration: 315336
loss: 0.9649109244346619,grad_norm: 0.9999994825404542, iteration: 315337
loss: 0.9829989075660706,grad_norm: 0.6974028917954456, iteration: 315338
loss: 0.9813899993896484,grad_norm: 0.9335177218779248, iteration: 315339
loss: 1.0092501640319824,grad_norm: 0.7950927128984492, iteration: 315340
loss: 1.0251665115356445,grad_norm: 0.8923217551090811, iteration: 315341
loss: 1.0210208892822266,grad_norm: 0.9715059202643557, iteration: 315342
loss: 0.9821265339851379,grad_norm: 0.7994123269125076, iteration: 315343
loss: 0.9971280694007874,grad_norm: 0.9999993242237347, iteration: 315344
loss: 1.0537290573120117,grad_norm: 0.999999057233941, iteration: 315345
loss: 0.9937413930892944,grad_norm: 0.999999217285849, iteration: 315346
loss: 0.9632986187934875,grad_norm: 0.7461522870795975, iteration: 315347
loss: 1.017675757408142,grad_norm: 0.8577592602455398, iteration: 315348
loss: 1.0010024309158325,grad_norm: 0.8847395250778783, iteration: 315349
loss: 0.9831769466400146,grad_norm: 0.7567853858507272, iteration: 315350
loss: 0.9767888784408569,grad_norm: 0.9205414700464046, iteration: 315351
loss: 1.007989525794983,grad_norm: 0.9156312668339238, iteration: 315352
loss: 1.0094202756881714,grad_norm: 0.8711322489418438, iteration: 315353
loss: 1.0061458349227905,grad_norm: 0.7583775418862472, iteration: 315354
loss: 0.9971976280212402,grad_norm: 0.6635584782828632, iteration: 315355
loss: 0.9981182813644409,grad_norm: 0.7703031933540597, iteration: 315356
loss: 0.9716789126396179,grad_norm: 0.682171678225269, iteration: 315357
loss: 0.9943614602088928,grad_norm: 0.7828957458422466, iteration: 315358
loss: 1.0058244466781616,grad_norm: 0.8250590028328523, iteration: 315359
loss: 1.013577938079834,grad_norm: 0.7965379250291637, iteration: 315360
loss: 0.9897506833076477,grad_norm: 0.8728739863454846, iteration: 315361
loss: 1.0334968566894531,grad_norm: 0.999999553037198, iteration: 315362
loss: 0.9729130864143372,grad_norm: 0.7829590934863612, iteration: 315363
loss: 1.0229774713516235,grad_norm: 0.7730755399725466, iteration: 315364
loss: 1.00886070728302,grad_norm: 0.8580981719437786, iteration: 315365
loss: 0.9674808382987976,grad_norm: 0.781882981382819, iteration: 315366
loss: 1.013891339302063,grad_norm: 0.8515574285473901, iteration: 315367
loss: 1.0239484310150146,grad_norm: 0.9999996323489069, iteration: 315368
loss: 0.9770870804786682,grad_norm: 0.8170904969382184, iteration: 315369
loss: 1.0243680477142334,grad_norm: 0.9999991506731268, iteration: 315370
loss: 1.011701226234436,grad_norm: 0.7298112283069339, iteration: 315371
loss: 1.0058929920196533,grad_norm: 0.7454873442870026, iteration: 315372
loss: 1.0244331359863281,grad_norm: 0.9999989732304915, iteration: 315373
loss: 0.9721991419792175,grad_norm: 0.9873574646373766, iteration: 315374
loss: 1.0042107105255127,grad_norm: 0.7466749328675807, iteration: 315375
loss: 0.9904510378837585,grad_norm: 0.6699314266485481, iteration: 315376
loss: 1.2334399223327637,grad_norm: 0.9999995948042342, iteration: 315377
loss: 0.9709885120391846,grad_norm: 0.8250362306944884, iteration: 315378
loss: 1.0068726539611816,grad_norm: 0.8238635528590732, iteration: 315379
loss: 1.001760721206665,grad_norm: 0.8962205456099891, iteration: 315380
loss: 1.0165518522262573,grad_norm: 0.8052071209100319, iteration: 315381
loss: 0.9989807605743408,grad_norm: 0.9313178073008334, iteration: 315382
loss: 1.006512999534607,grad_norm: 0.8166628908572714, iteration: 315383
loss: 0.9954886436462402,grad_norm: 0.8570167132137584, iteration: 315384
loss: 0.9735901355743408,grad_norm: 0.9999990423750548, iteration: 315385
loss: 1.0290443897247314,grad_norm: 0.9086899500131327, iteration: 315386
loss: 1.0250084400177002,grad_norm: 0.8656196812889634, iteration: 315387
loss: 1.0295144319534302,grad_norm: 0.9999990222850033, iteration: 315388
loss: 0.9866927266120911,grad_norm: 0.7068375304756226, iteration: 315389
loss: 1.0125701427459717,grad_norm: 0.8906970601271507, iteration: 315390
loss: 1.003677248954773,grad_norm: 0.9035113347030567, iteration: 315391
loss: 0.9633148312568665,grad_norm: 0.7787147278609654, iteration: 315392
loss: 0.9687460064888,grad_norm: 0.7457986197219973, iteration: 315393
loss: 0.9835622310638428,grad_norm: 0.9999990819007581, iteration: 315394
loss: 1.0456140041351318,grad_norm: 0.9999997589807319, iteration: 315395
loss: 0.981239914894104,grad_norm: 0.8585831653408437, iteration: 315396
loss: 0.9458391070365906,grad_norm: 0.8440599456908162, iteration: 315397
loss: 0.9475947022438049,grad_norm: 0.9999991635762568, iteration: 315398
loss: 1.0040525197982788,grad_norm: 0.9135904209004937, iteration: 315399
loss: 0.9655501246452332,grad_norm: 0.9999993007630164, iteration: 315400
loss: 0.9712081551551819,grad_norm: 0.9465829110583276, iteration: 315401
loss: 1.0294824838638306,grad_norm: 0.8439830267643424, iteration: 315402
loss: 1.0179988145828247,grad_norm: 0.8830821184771409, iteration: 315403
loss: 1.0087854862213135,grad_norm: 0.989468391094804, iteration: 315404
loss: 1.0220366716384888,grad_norm: 0.9999993973423057, iteration: 315405
loss: 1.0066889524459839,grad_norm: 0.9999990878845098, iteration: 315406
loss: 1.015317440032959,grad_norm: 0.7493788494564573, iteration: 315407
loss: 1.037306785583496,grad_norm: 0.9483667089213517, iteration: 315408
loss: 0.9716203808784485,grad_norm: 0.8656796573918788, iteration: 315409
loss: 1.0072824954986572,grad_norm: 0.9063087163183398, iteration: 315410
loss: 1.0407840013504028,grad_norm: 0.8465035340031198, iteration: 315411
loss: 1.0114840269088745,grad_norm: 0.7792195488766183, iteration: 315412
loss: 1.0175189971923828,grad_norm: 0.846325207813042, iteration: 315413
loss: 1.0082272291183472,grad_norm: 0.7625967434962886, iteration: 315414
loss: 1.0107321739196777,grad_norm: 0.7312930325079633, iteration: 315415
loss: 0.9595779776573181,grad_norm: 0.8349356972383147, iteration: 315416
loss: 1.1246048212051392,grad_norm: 0.9999999921338335, iteration: 315417
loss: 0.9765416979789734,grad_norm: 0.9400430806020064, iteration: 315418
loss: 0.9872156977653503,grad_norm: 0.999999154433718, iteration: 315419
loss: 1.025994062423706,grad_norm: 0.9440418925487276, iteration: 315420
loss: 0.9938812255859375,grad_norm: 0.9279882824165601, iteration: 315421
loss: 1.0126926898956299,grad_norm: 0.9999990833006642, iteration: 315422
loss: 0.9995509386062622,grad_norm: 0.8329735514533599, iteration: 315423
loss: 1.0084242820739746,grad_norm: 0.8847975564822437, iteration: 315424
loss: 0.9959338307380676,grad_norm: 0.8623372700203393, iteration: 315425
loss: 0.9571772217750549,grad_norm: 0.7574273419280189, iteration: 315426
loss: 1.0272066593170166,grad_norm: 0.9644559567479761, iteration: 315427
loss: 1.0100963115692139,grad_norm: 0.9009500930889165, iteration: 315428
loss: 0.9847293496131897,grad_norm: 0.8054770912011001, iteration: 315429
loss: 1.001678228378296,grad_norm: 0.8805433681563852, iteration: 315430
loss: 1.069627285003662,grad_norm: 0.9999994450538076, iteration: 315431
loss: 1.0130445957183838,grad_norm: 0.7461602364526866, iteration: 315432
loss: 1.0536906719207764,grad_norm: 0.99999936134257, iteration: 315433
loss: 1.0320295095443726,grad_norm: 0.9999995134663199, iteration: 315434
loss: 1.2042239904403687,grad_norm: 0.9999996490509254, iteration: 315435
loss: 1.0266573429107666,grad_norm: 0.8136152186057233, iteration: 315436
loss: 1.0136009454727173,grad_norm: 0.9595352060568446, iteration: 315437
loss: 1.0059814453125,grad_norm: 0.9457977979495074, iteration: 315438
loss: 0.9923602938652039,grad_norm: 0.9999991169439535, iteration: 315439
loss: 1.034832239151001,grad_norm: 0.848878865116304, iteration: 315440
loss: 1.0117260217666626,grad_norm: 0.7611731511381935, iteration: 315441
loss: 1.016715168952942,grad_norm: 0.8693124800076379, iteration: 315442
loss: 0.9804231524467468,grad_norm: 0.7087342407992614, iteration: 315443
loss: 1.0435802936553955,grad_norm: 0.8121238521821522, iteration: 315444
loss: 1.0356929302215576,grad_norm: 0.9878141586728572, iteration: 315445
loss: 0.973207414150238,grad_norm: 0.8143776853599307, iteration: 315446
loss: 1.0086263418197632,grad_norm: 0.80146729044091, iteration: 315447
loss: 1.0206518173217773,grad_norm: 0.8863549857128676, iteration: 315448
loss: 0.9900651574134827,grad_norm: 0.9895532736751464, iteration: 315449
loss: 0.9526386260986328,grad_norm: 0.9248258940178002, iteration: 315450
loss: 0.9363338947296143,grad_norm: 0.9558572887999887, iteration: 315451
loss: 1.0040638446807861,grad_norm: 0.8925545206127039, iteration: 315452
loss: 1.0032070875167847,grad_norm: 0.8737126776816406, iteration: 315453
loss: 1.0147149562835693,grad_norm: 0.9157626923543887, iteration: 315454
loss: 1.010575771331787,grad_norm: 0.7986322803429781, iteration: 315455
loss: 0.9740269184112549,grad_norm: 0.9313736048161461, iteration: 315456
loss: 1.019317388534546,grad_norm: 0.7517303093598862, iteration: 315457
loss: 0.9973515272140503,grad_norm: 0.7643283705332148, iteration: 315458
loss: 0.9703928232192993,grad_norm: 0.8858055381405904, iteration: 315459
loss: 0.9847425818443298,grad_norm: 0.91879656509453, iteration: 315460
loss: 1.0288888216018677,grad_norm: 0.9062035535329014, iteration: 315461
loss: 0.9992411732673645,grad_norm: 0.7928524286817198, iteration: 315462
loss: 1.0968570709228516,grad_norm: 0.9999995355592056, iteration: 315463
loss: 0.9934019446372986,grad_norm: 0.7663310557066529, iteration: 315464
loss: 1.0304423570632935,grad_norm: 0.7384333611795499, iteration: 315465
loss: 1.0057963132858276,grad_norm: 0.8777921947593856, iteration: 315466
loss: 0.9854637980461121,grad_norm: 0.9076621033210962, iteration: 315467
loss: 0.9970823526382446,grad_norm: 0.8183601283274569, iteration: 315468
loss: 1.0037716627120972,grad_norm: 0.8149457210091686, iteration: 315469
loss: 1.052287220954895,grad_norm: 0.9999998115923949, iteration: 315470
loss: 1.005293369293213,grad_norm: 0.8465936931272783, iteration: 315471
loss: 0.9665805101394653,grad_norm: 0.7734346408146907, iteration: 315472
loss: 0.9882798790931702,grad_norm: 0.7968745122575605, iteration: 315473
loss: 0.9957936406135559,grad_norm: 0.9960018414461236, iteration: 315474
loss: 1.052030324935913,grad_norm: 0.9999993156888437, iteration: 315475
loss: 1.013007402420044,grad_norm: 0.694114780618149, iteration: 315476
loss: 0.9987132549285889,grad_norm: 0.8213665409059796, iteration: 315477
loss: 0.9767541885375977,grad_norm: 0.8406198764006781, iteration: 315478
loss: 1.0559591054916382,grad_norm: 0.9999996054240429, iteration: 315479
loss: 1.0254026651382446,grad_norm: 0.9999991292866712, iteration: 315480
loss: 1.0232094526290894,grad_norm: 0.861565712974846, iteration: 315481
loss: 1.1432949304580688,grad_norm: 0.9623328439929397, iteration: 315482
loss: 0.992626428604126,grad_norm: 0.8468441280785771, iteration: 315483
loss: 0.9615983366966248,grad_norm: 0.8417942669709575, iteration: 315484
loss: 0.9648176431655884,grad_norm: 0.8183939638624679, iteration: 315485
loss: 0.9912748336791992,grad_norm: 0.8837893109347299, iteration: 315486
loss: 1.0439668893814087,grad_norm: 0.9999990469166924, iteration: 315487
loss: 0.9912417531013489,grad_norm: 0.8118060319139991, iteration: 315488
loss: 1.0031614303588867,grad_norm: 0.8441987272516119, iteration: 315489
loss: 1.0128270387649536,grad_norm: 0.7534729211057674, iteration: 315490
loss: 1.0270476341247559,grad_norm: 0.8264183028827886, iteration: 315491
loss: 1.0276778936386108,grad_norm: 0.99999986787261, iteration: 315492
loss: 1.1672388315200806,grad_norm: 0.9999992761476428, iteration: 315493
loss: 1.049625277519226,grad_norm: 1.0000000516201963, iteration: 315494
loss: 1.0097835063934326,grad_norm: 0.7059538970294547, iteration: 315495
loss: 1.0349711179733276,grad_norm: 0.7910842667194752, iteration: 315496
loss: 0.998046338558197,grad_norm: 0.8771914425805434, iteration: 315497
loss: 1.0172005891799927,grad_norm: 0.9332411696300643, iteration: 315498
loss: 1.0533627271652222,grad_norm: 0.9999991583679951, iteration: 315499
loss: 1.0180706977844238,grad_norm: 0.9472325785306206, iteration: 315500
loss: 0.9705691337585449,grad_norm: 0.7754607043487474, iteration: 315501
loss: 1.0193475484848022,grad_norm: 0.8482975520486917, iteration: 315502
loss: 0.9968653321266174,grad_norm: 0.7884181616922107, iteration: 315503
loss: 0.9867170453071594,grad_norm: 0.8434437336199689, iteration: 315504
loss: 0.9957027435302734,grad_norm: 0.7017291290845157, iteration: 315505
loss: 0.9943817853927612,grad_norm: 0.8593981522278844, iteration: 315506
loss: 1.0375169515609741,grad_norm: 0.7135897882799698, iteration: 315507
loss: 0.9908004999160767,grad_norm: 0.8334844272582813, iteration: 315508
loss: 1.012927532196045,grad_norm: 0.8354428400735175, iteration: 315509
loss: 1.0194511413574219,grad_norm: 0.9814369439747638, iteration: 315510
loss: 1.004268765449524,grad_norm: 0.9211920183670281, iteration: 315511
loss: 0.967767059803009,grad_norm: 0.8021829488814853, iteration: 315512
loss: 0.975715696811676,grad_norm: 0.7460396255442869, iteration: 315513
loss: 1.0237234830856323,grad_norm: 0.8756222319043725, iteration: 315514
loss: 1.0057610273361206,grad_norm: 0.7514189228856509, iteration: 315515
loss: 1.0339826345443726,grad_norm: 0.9435345408125934, iteration: 315516
loss: 0.9412887096405029,grad_norm: 0.8081632538962071, iteration: 315517
loss: 1.0098100900650024,grad_norm: 0.7366995098582859, iteration: 315518
loss: 1.1283622980117798,grad_norm: 0.8509718021450074, iteration: 315519
loss: 1.033880591392517,grad_norm: 0.8232755164803548, iteration: 315520
loss: 0.982612669467926,grad_norm: 0.8430725026086774, iteration: 315521
loss: 0.99893718957901,grad_norm: 0.7333020522102347, iteration: 315522
loss: 0.9896284341812134,grad_norm: 0.868372678599306, iteration: 315523
loss: 0.9993277788162231,grad_norm: 0.8057009012702084, iteration: 315524
loss: 1.0313903093338013,grad_norm: 0.9999992025915139, iteration: 315525
loss: 1.0288584232330322,grad_norm: 0.937603202909759, iteration: 315526
loss: 1.0108203887939453,grad_norm: 0.7499660296992818, iteration: 315527
loss: 0.9635579586029053,grad_norm: 0.8486050600792235, iteration: 315528
loss: 0.9905646443367004,grad_norm: 0.8451898007794553, iteration: 315529
loss: 1.021735429763794,grad_norm: 0.7911414115826966, iteration: 315530
loss: 0.975874125957489,grad_norm: 0.8587830566740161, iteration: 315531
loss: 1.0074536800384521,grad_norm: 0.8105981352860709, iteration: 315532
loss: 1.0186794996261597,grad_norm: 0.805251005677049, iteration: 315533
loss: 0.9819223284721375,grad_norm: 0.9496279072206912, iteration: 315534
loss: 1.002106785774231,grad_norm: 0.8158032207406132, iteration: 315535
loss: 0.9840095043182373,grad_norm: 0.7565481220084299, iteration: 315536
loss: 0.97760009765625,grad_norm: 0.8740259575036573, iteration: 315537
loss: 1.035151481628418,grad_norm: 0.8695659249535049, iteration: 315538
loss: 1.0189564228057861,grad_norm: 0.8054837961169117, iteration: 315539
loss: 0.9848428964614868,grad_norm: 0.8648219056824058, iteration: 315540
loss: 1.0191199779510498,grad_norm: 0.999999113691185, iteration: 315541
loss: 1.087708592414856,grad_norm: 0.9999999047985616, iteration: 315542
loss: 1.004629373550415,grad_norm: 0.8309152377482671, iteration: 315543
loss: 1.0904701948165894,grad_norm: 0.9999994892695965, iteration: 315544
loss: 1.0187407732009888,grad_norm: 0.791468614478267, iteration: 315545
loss: 0.9874614477157593,grad_norm: 0.9999999146344937, iteration: 315546
loss: 1.030342936515808,grad_norm: 0.8408150721349813, iteration: 315547
loss: 0.961591362953186,grad_norm: 0.7161522966705373, iteration: 315548
loss: 0.9653860330581665,grad_norm: 0.8338652542068251, iteration: 315549
loss: 0.9950276017189026,grad_norm: 0.8914010809986291, iteration: 315550
loss: 1.0083775520324707,grad_norm: 0.9999994491761397, iteration: 315551
loss: 0.9757968187332153,grad_norm: 0.8530835833725781, iteration: 315552
loss: 1.00278902053833,grad_norm: 0.7301025077683033, iteration: 315553
loss: 1.0366216897964478,grad_norm: 0.9999993553521171, iteration: 315554
loss: 0.9789407253265381,grad_norm: 0.8926040939449222, iteration: 315555
loss: 0.9970239400863647,grad_norm: 0.8941155891705644, iteration: 315556
loss: 0.9719693064689636,grad_norm: 0.9999991427973974, iteration: 315557
loss: 1.0008141994476318,grad_norm: 0.9642142610477125, iteration: 315558
loss: 1.0462130308151245,grad_norm: 0.9999993284794008, iteration: 315559
loss: 0.9852166771888733,grad_norm: 0.792278205169031, iteration: 315560
loss: 0.9503642916679382,grad_norm: 0.8224674447462557, iteration: 315561
loss: 0.9932976961135864,grad_norm: 0.9523925758882714, iteration: 315562
loss: 1.3647956848144531,grad_norm: 0.9999996457725161, iteration: 315563
loss: 0.9906681180000305,grad_norm: 0.9429756273336212, iteration: 315564
loss: 0.976060152053833,grad_norm: 0.7032215529290939, iteration: 315565
loss: 1.011236310005188,grad_norm: 0.9999991176738773, iteration: 315566
loss: 0.980935275554657,grad_norm: 0.9576350927732261, iteration: 315567
loss: 1.0066553354263306,grad_norm: 0.8202564436999462, iteration: 315568
loss: 0.9952275156974792,grad_norm: 0.9745291047771333, iteration: 315569
loss: 1.023116111755371,grad_norm: 0.7544589318700567, iteration: 315570
loss: 1.0810850858688354,grad_norm: 0.9696278706892498, iteration: 315571
loss: 1.0040090084075928,grad_norm: 0.8729816418036002, iteration: 315572
loss: 1.0218970775604248,grad_norm: 0.7976141566721049, iteration: 315573
loss: 1.0609495639801025,grad_norm: 0.9999995735559012, iteration: 315574
loss: 0.9711415767669678,grad_norm: 0.9999990983707973, iteration: 315575
loss: 0.9788202047348022,grad_norm: 0.9306275363289194, iteration: 315576
loss: 1.0228666067123413,grad_norm: 0.9999994534226185, iteration: 315577
loss: 0.9838210344314575,grad_norm: 0.8583285057215552, iteration: 315578
loss: 1.0523220300674438,grad_norm: 0.9999994624300379, iteration: 315579
loss: 1.0399737358093262,grad_norm: 0.9534287981931531, iteration: 315580
loss: 1.0186444520950317,grad_norm: 0.9999991199147205, iteration: 315581
loss: 1.0732059478759766,grad_norm: 0.8797663542158832, iteration: 315582
loss: 0.9832544326782227,grad_norm: 0.8594816960368545, iteration: 315583
loss: 0.9977387189865112,grad_norm: 0.9654063635941321, iteration: 315584
loss: 0.9697197675704956,grad_norm: 0.8346509242498934, iteration: 315585
loss: 0.9992033839225769,grad_norm: 0.8084623609655296, iteration: 315586
loss: 0.9983334541320801,grad_norm: 0.7980637889131983, iteration: 315587
loss: 1.0230498313903809,grad_norm: 0.8408422914112276, iteration: 315588
loss: 0.9966506958007812,grad_norm: 0.9999991291355216, iteration: 315589
loss: 1.0640910863876343,grad_norm: 0.9999993332960759, iteration: 315590
loss: 1.0184158086776733,grad_norm: 0.8348457784024744, iteration: 315591
loss: 0.9830807447433472,grad_norm: 0.8927436616544668, iteration: 315592
loss: 0.9877867102622986,grad_norm: 0.7973441678816472, iteration: 315593
loss: 1.0479934215545654,grad_norm: 0.8726944325978389, iteration: 315594
loss: 0.9848141670227051,grad_norm: 0.9999991695829634, iteration: 315595
loss: 0.9572696089744568,grad_norm: 0.9642164061227365, iteration: 315596
loss: 1.0766993761062622,grad_norm: 0.9999992614616174, iteration: 315597
loss: 0.9876906275749207,grad_norm: 0.9335813185384677, iteration: 315598
loss: 0.9998770356178284,grad_norm: 0.7766285949364244, iteration: 315599
loss: 1.0017157793045044,grad_norm: 0.9999990213607102, iteration: 315600
loss: 1.0264884233474731,grad_norm: 0.9999990206504249, iteration: 315601
loss: 1.0650501251220703,grad_norm: 0.9999995742523886, iteration: 315602
loss: 0.9739285111427307,grad_norm: 0.8109634212409366, iteration: 315603
loss: 1.0633221864700317,grad_norm: 0.9299530418967817, iteration: 315604
loss: 1.0029857158660889,grad_norm: 0.9076577245435086, iteration: 315605
loss: 1.0204910039901733,grad_norm: 0.9244471511924246, iteration: 315606
loss: 0.991199254989624,grad_norm: 0.9049448858529526, iteration: 315607
loss: 0.9813746809959412,grad_norm: 0.7903425196917135, iteration: 315608
loss: 1.0183953046798706,grad_norm: 0.7638612528702977, iteration: 315609
loss: 1.0054411888122559,grad_norm: 0.8299210599712525, iteration: 315610
loss: 0.9852253794670105,grad_norm: 0.8325857605025054, iteration: 315611
loss: 0.9991461634635925,grad_norm: 0.8975394417374511, iteration: 315612
loss: 1.051323413848877,grad_norm: 0.7764264282635701, iteration: 315613
loss: 0.9915274381637573,grad_norm: 0.7557381091246888, iteration: 315614
loss: 1.0411248207092285,grad_norm: 0.7424537274534301, iteration: 315615
loss: 0.9975286722183228,grad_norm: 0.9999995934041184, iteration: 315616
loss: 0.9935007095336914,grad_norm: 0.8997702899836896, iteration: 315617
loss: 1.1663408279418945,grad_norm: 0.9999999971441448, iteration: 315618
loss: 1.0360822677612305,grad_norm: 0.9999995686995123, iteration: 315619
loss: 0.9822960495948792,grad_norm: 0.9999991624329829, iteration: 315620
loss: 1.0090402364730835,grad_norm: 0.7497562472149948, iteration: 315621
loss: 1.028062343597412,grad_norm: 0.8416196420501355, iteration: 315622
loss: 1.0318220853805542,grad_norm: 0.9369383395944786, iteration: 315623
loss: 0.9893564581871033,grad_norm: 0.9127321998831144, iteration: 315624
loss: 1.061347246170044,grad_norm: 0.8926206949468724, iteration: 315625
loss: 0.9661198258399963,grad_norm: 0.8082946932594673, iteration: 315626
loss: 1.0069972276687622,grad_norm: 0.9999998422050372, iteration: 315627
loss: 1.009508490562439,grad_norm: 0.846638700369596, iteration: 315628
loss: 1.0092432498931885,grad_norm: 0.9999993155850989, iteration: 315629
loss: 0.971333384513855,grad_norm: 0.7698538298892285, iteration: 315630
loss: 1.0785715579986572,grad_norm: 0.9515129606861652, iteration: 315631
loss: 1.0299016237258911,grad_norm: 0.8646733155090605, iteration: 315632
loss: 1.0187036991119385,grad_norm: 0.728291297577182, iteration: 315633
loss: 1.014641284942627,grad_norm: 0.8090195155019353, iteration: 315634
loss: 0.9971239566802979,grad_norm: 0.8624107839850916, iteration: 315635
loss: 1.0306764841079712,grad_norm: 0.9999992615562711, iteration: 315636
loss: 0.9982131123542786,grad_norm: 0.8410151729181058, iteration: 315637
loss: 1.0009211301803589,grad_norm: 0.8244157540081942, iteration: 315638
loss: 1.118683099746704,grad_norm: 0.9121567333799541, iteration: 315639
loss: 0.9999232292175293,grad_norm: 0.7592990039191148, iteration: 315640
loss: 0.9842260479927063,grad_norm: 0.8336702647323282, iteration: 315641
loss: 0.9658910036087036,grad_norm: 0.8779515391753192, iteration: 315642
loss: 0.9940642714500427,grad_norm: 0.7813894603931295, iteration: 315643
loss: 0.982671856880188,grad_norm: 0.8147820948555311, iteration: 315644
loss: 0.9712459444999695,grad_norm: 0.9862352927701011, iteration: 315645
loss: 1.0317307710647583,grad_norm: 0.833245466150864, iteration: 315646
loss: 1.0574700832366943,grad_norm: 0.9999995578950568, iteration: 315647
loss: 1.0189194679260254,grad_norm: 0.999999241392794, iteration: 315648
loss: 0.9985138177871704,grad_norm: 0.9973596350142664, iteration: 315649
loss: 1.009466290473938,grad_norm: 0.6906695388559873, iteration: 315650
loss: 1.0034608840942383,grad_norm: 0.999999138870474, iteration: 315651
loss: 1.0340005159378052,grad_norm: 0.9795320985512836, iteration: 315652
loss: 1.0539658069610596,grad_norm: 0.9999993304582317, iteration: 315653
loss: 0.9811995625495911,grad_norm: 0.8453698269022307, iteration: 315654
loss: 1.01067054271698,grad_norm: 0.8292612942711741, iteration: 315655
loss: 1.0281977653503418,grad_norm: 0.9999997567029326, iteration: 315656
loss: 0.9671803712844849,grad_norm: 0.8602787486995435, iteration: 315657
loss: 0.9777161478996277,grad_norm: 0.7860984798532517, iteration: 315658
loss: 0.9871670007705688,grad_norm: 0.7874664752148266, iteration: 315659
loss: 0.9820605516433716,grad_norm: 0.9087066553867224, iteration: 315660
loss: 0.9970746636390686,grad_norm: 0.7506018528733641, iteration: 315661
loss: 1.0372505187988281,grad_norm: 0.9717418120994543, iteration: 315662
loss: 1.0338444709777832,grad_norm: 0.9999990925894734, iteration: 315663
loss: 0.9743373394012451,grad_norm: 0.7737403204648976, iteration: 315664
loss: 1.0302842855453491,grad_norm: 0.8188710823846855, iteration: 315665
loss: 1.0285043716430664,grad_norm: 0.7577315737436912, iteration: 315666
loss: 0.9991415739059448,grad_norm: 0.7609433094317763, iteration: 315667
loss: 1.021174430847168,grad_norm: 0.9999992468974148, iteration: 315668
loss: 1.0209431648254395,grad_norm: 0.958822410508086, iteration: 315669
loss: 0.9941070079803467,grad_norm: 0.7855374002896834, iteration: 315670
loss: 0.9911565780639648,grad_norm: 0.9999990810323098, iteration: 315671
loss: 1.0080560445785522,grad_norm: 0.794037566379924, iteration: 315672
loss: 1.0042530298233032,grad_norm: 0.9065059554990986, iteration: 315673
loss: 0.9800366163253784,grad_norm: 0.9440596509628931, iteration: 315674
loss: 1.0356471538543701,grad_norm: 0.999999722289713, iteration: 315675
loss: 1.0049724578857422,grad_norm: 0.8769927115901913, iteration: 315676
loss: 1.0093986988067627,grad_norm: 0.826433014893241, iteration: 315677
loss: 1.0166406631469727,grad_norm: 0.715978027223437, iteration: 315678
loss: 0.9938788414001465,grad_norm: 0.7906125931679476, iteration: 315679
loss: 1.0084689855575562,grad_norm: 0.8391939583656164, iteration: 315680
loss: 0.9830394387245178,grad_norm: 0.8509760422876472, iteration: 315681
loss: 1.01315438747406,grad_norm: 0.7750189688553517, iteration: 315682
loss: 1.0121859312057495,grad_norm: 0.9857257915937628, iteration: 315683
loss: 0.983023464679718,grad_norm: 0.8696042212200861, iteration: 315684
loss: 0.9980946779251099,grad_norm: 0.7636656669490672, iteration: 315685
loss: 1.0456465482711792,grad_norm: 0.9999998707057032, iteration: 315686
loss: 1.0737333297729492,grad_norm: 0.9999991715272284, iteration: 315687
loss: 0.971591591835022,grad_norm: 0.8976964817995269, iteration: 315688
loss: 0.9822503924369812,grad_norm: 0.9999993478626419, iteration: 315689
loss: 0.9747204184532166,grad_norm: 0.80918364331963, iteration: 315690
loss: 0.9917829632759094,grad_norm: 0.7868664767897044, iteration: 315691
loss: 1.0190846920013428,grad_norm: 0.7950880851378608, iteration: 315692
loss: 1.03274405002594,grad_norm: 0.9572391752408262, iteration: 315693
loss: 0.9812126159667969,grad_norm: 0.7933014076254712, iteration: 315694
loss: 0.9968714118003845,grad_norm: 0.9999992584961664, iteration: 315695
loss: 0.9663522839546204,grad_norm: 0.8346566138769593, iteration: 315696
loss: 0.9850065112113953,grad_norm: 0.7807059403084741, iteration: 315697
loss: 0.9996287226676941,grad_norm: 0.8226329894257799, iteration: 315698
loss: 0.9649025201797485,grad_norm: 0.9495588217144196, iteration: 315699
loss: 1.0195902585983276,grad_norm: 0.8696308477702734, iteration: 315700
loss: 1.0198633670806885,grad_norm: 0.999999040648522, iteration: 315701
loss: 1.0160118341445923,grad_norm: 0.9736243379197601, iteration: 315702
loss: 1.0089895725250244,grad_norm: 0.9999990880165238, iteration: 315703
loss: 1.0065093040466309,grad_norm: 0.7978693642445515, iteration: 315704
loss: 0.9983989000320435,grad_norm: 0.7173423460925092, iteration: 315705
loss: 1.0154213905334473,grad_norm: 0.9999993080893868, iteration: 315706
loss: 0.9955737590789795,grad_norm: 0.8523922769408429, iteration: 315707
loss: 1.0128940343856812,grad_norm: 0.7877861698437334, iteration: 315708
loss: 0.9956863522529602,grad_norm: 0.8141933974015966, iteration: 315709
loss: 1.0266700983047485,grad_norm: 0.9212562897720905, iteration: 315710
loss: 1.0149943828582764,grad_norm: 0.9999992060865833, iteration: 315711
loss: 0.9890036582946777,grad_norm: 0.7404212225316872, iteration: 315712
loss: 0.964484691619873,grad_norm: 0.8067346273587006, iteration: 315713
loss: 1.0629403591156006,grad_norm: 0.9999998274618666, iteration: 315714
loss: 0.9731664657592773,grad_norm: 0.8216519746207481, iteration: 315715
loss: 0.9957937002182007,grad_norm: 0.881055396508336, iteration: 315716
loss: 0.9882558584213257,grad_norm: 0.7833468237839533, iteration: 315717
loss: 0.9897871017456055,grad_norm: 0.8155038251161755, iteration: 315718
loss: 0.9584844708442688,grad_norm: 0.9311591475538386, iteration: 315719
loss: 0.9757139682769775,grad_norm: 0.8978981157624416, iteration: 315720
loss: 0.9810125827789307,grad_norm: 0.8354816412564106, iteration: 315721
loss: 0.9660754203796387,grad_norm: 0.9150799155291223, iteration: 315722
loss: 1.0244802236557007,grad_norm: 0.8388315764071083, iteration: 315723
loss: 1.0012825727462769,grad_norm: 0.7629569022071886, iteration: 315724
loss: 0.9811393618583679,grad_norm: 0.7180530004301655, iteration: 315725
loss: 1.0416189432144165,grad_norm: 0.7007528653152774, iteration: 315726
loss: 0.9995347857475281,grad_norm: 0.8696083849471365, iteration: 315727
loss: 1.0040959119796753,grad_norm: 0.9119569904911762, iteration: 315728
loss: 0.9914092421531677,grad_norm: 0.8195671803827325, iteration: 315729
loss: 0.9699856638908386,grad_norm: 0.8384598593160685, iteration: 315730
loss: 0.9681537747383118,grad_norm: 0.9714542334472377, iteration: 315731
loss: 0.9954639077186584,grad_norm: 0.8759262844612299, iteration: 315732
loss: 0.9653175473213196,grad_norm: 0.844772406302781, iteration: 315733
loss: 1.0390726327896118,grad_norm: 0.9521373229281281, iteration: 315734
loss: 1.0650238990783691,grad_norm: 0.9999998763416499, iteration: 315735
loss: 0.9910680055618286,grad_norm: 0.9362753158749298, iteration: 315736
loss: 0.9883846640586853,grad_norm: 0.9724688024800797, iteration: 315737
loss: 1.0194480419158936,grad_norm: 0.7713794216925757, iteration: 315738
loss: 1.0145516395568848,grad_norm: 0.9088968895259762, iteration: 315739
loss: 0.9992738366127014,grad_norm: 0.9999994408203753, iteration: 315740
loss: 1.0233604907989502,grad_norm: 0.8390651826186638, iteration: 315741
loss: 1.0080143213272095,grad_norm: 0.9602698598493276, iteration: 315742
loss: 1.0698307752609253,grad_norm: 0.9999997828979771, iteration: 315743
loss: 1.0181021690368652,grad_norm: 0.8182868051113628, iteration: 315744
loss: 0.9949845671653748,grad_norm: 0.7591621131407597, iteration: 315745
loss: 0.9592061638832092,grad_norm: 0.8722897692833004, iteration: 315746
loss: 0.9996747374534607,grad_norm: 0.8203853559284544, iteration: 315747
loss: 1.0097826719284058,grad_norm: 0.9213111967377379, iteration: 315748
loss: 1.0865602493286133,grad_norm: 0.871272417252065, iteration: 315749
loss: 1.0100691318511963,grad_norm: 0.9030167193461985, iteration: 315750
loss: 1.0869855880737305,grad_norm: 0.8733215095179468, iteration: 315751
loss: 0.9779152870178223,grad_norm: 0.8291550049256936, iteration: 315752
loss: 1.0250295400619507,grad_norm: 0.7956013199208206, iteration: 315753
loss: 1.0637022256851196,grad_norm: 0.970592435211901, iteration: 315754
loss: 1.0049408674240112,grad_norm: 0.7192588349748711, iteration: 315755
loss: 1.0937153100967407,grad_norm: 0.8837360105590432, iteration: 315756
loss: 0.9790055751800537,grad_norm: 0.8007063158313366, iteration: 315757
loss: 1.020155906677246,grad_norm: 0.8493107033006188, iteration: 315758
loss: 1.0169481039047241,grad_norm: 0.7805675605492073, iteration: 315759
loss: 0.9856411218643188,grad_norm: 0.9620770548270687, iteration: 315760
loss: 1.037914752960205,grad_norm: 0.9999992283389691, iteration: 315761
loss: 0.9970450401306152,grad_norm: 0.8000621324923227, iteration: 315762
loss: 1.0044368505477905,grad_norm: 0.9999996031929235, iteration: 315763
loss: 0.9962794184684753,grad_norm: 0.7677401048471183, iteration: 315764
loss: 0.9786111116409302,grad_norm: 0.8394636879995135, iteration: 315765
loss: 1.025466799736023,grad_norm: 0.7241575156838312, iteration: 315766
loss: 1.0291030406951904,grad_norm: 0.7961562516871395, iteration: 315767
loss: 1.0006067752838135,grad_norm: 0.839581111811397, iteration: 315768
loss: 1.0126090049743652,grad_norm: 0.8074273950551385, iteration: 315769
loss: 1.0516786575317383,grad_norm: 0.9999995903123274, iteration: 315770
loss: 0.9923989176750183,grad_norm: 0.9196092755285817, iteration: 315771
loss: 1.0176756381988525,grad_norm: 0.8038764203839034, iteration: 315772
loss: 1.0276367664337158,grad_norm: 0.7379052698969819, iteration: 315773
loss: 1.015714168548584,grad_norm: 0.8051091607650921, iteration: 315774
loss: 0.979091227054596,grad_norm: 0.7404174377600299, iteration: 315775
loss: 0.9858327507972717,grad_norm: 0.8632447935678434, iteration: 315776
loss: 1.028761386871338,grad_norm: 0.9596919101935467, iteration: 315777
loss: 0.9784691333770752,grad_norm: 0.7966568420310496, iteration: 315778
loss: 0.9689911007881165,grad_norm: 0.7990335879856703, iteration: 315779
loss: 1.0040353536605835,grad_norm: 0.7435168102068405, iteration: 315780
loss: 0.978384792804718,grad_norm: 0.7988669811640177, iteration: 315781
loss: 1.0007039308547974,grad_norm: 0.9289346102930892, iteration: 315782
loss: 1.0328432321548462,grad_norm: 0.8625587367437775, iteration: 315783
loss: 1.0471296310424805,grad_norm: 0.9999991931644145, iteration: 315784
loss: 0.9854103326797485,grad_norm: 0.8777834509303161, iteration: 315785
loss: 1.0184581279754639,grad_norm: 0.8194351804012585, iteration: 315786
loss: 0.9987421035766602,grad_norm: 0.7732050705305562, iteration: 315787
loss: 1.0390141010284424,grad_norm: 0.9492065648581902, iteration: 315788
loss: 0.9980946779251099,grad_norm: 0.922769182136208, iteration: 315789
loss: 0.9890211224555969,grad_norm: 0.999999997594334, iteration: 315790
loss: 0.9851852059364319,grad_norm: 0.7994620153358456, iteration: 315791
loss: 1.0005635023117065,grad_norm: 0.637341350715696, iteration: 315792
loss: 1.019266128540039,grad_norm: 0.7215392852413298, iteration: 315793
loss: 0.9953579306602478,grad_norm: 0.99999905761723, iteration: 315794
loss: 0.9632495045661926,grad_norm: 0.8003644109555695, iteration: 315795
loss: 0.9999243021011353,grad_norm: 0.9999990945141232, iteration: 315796
loss: 0.9946572184562683,grad_norm: 0.7450929877022721, iteration: 315797
loss: 1.029057264328003,grad_norm: 0.7548077349641588, iteration: 315798
loss: 0.9822688102722168,grad_norm: 0.877527640506384, iteration: 315799
loss: 1.0362783670425415,grad_norm: 0.9999991315582688, iteration: 315800
loss: 1.0200097560882568,grad_norm: 0.8302545745517106, iteration: 315801
loss: 0.987282395362854,grad_norm: 0.6745565206151024, iteration: 315802
loss: 1.0287178754806519,grad_norm: 0.9448891612039807, iteration: 315803
loss: 0.9977386593818665,grad_norm: 0.9866377581700883, iteration: 315804
loss: 1.0058966875076294,grad_norm: 0.8775618573553752, iteration: 315805
loss: 0.9726969003677368,grad_norm: 0.6659013558132738, iteration: 315806
loss: 0.9778050780296326,grad_norm: 0.955133791312314, iteration: 315807
loss: 1.0102540254592896,grad_norm: 0.7596591892847248, iteration: 315808
loss: 1.0074101686477661,grad_norm: 0.9024306244053291, iteration: 315809
loss: 0.9889242649078369,grad_norm: 0.7088048000434152, iteration: 315810
loss: 1.006835699081421,grad_norm: 0.8625213684072109, iteration: 315811
loss: 0.9887542724609375,grad_norm: 0.944512731045423, iteration: 315812
loss: 1.009770393371582,grad_norm: 0.7659787295876922, iteration: 315813
loss: 1.0259950160980225,grad_norm: 0.8497203535714289, iteration: 315814
loss: 0.9845675826072693,grad_norm: 0.8091488959662019, iteration: 315815
loss: 0.9906036257743835,grad_norm: 0.7314210315526974, iteration: 315816
loss: 0.9862310290336609,grad_norm: 0.999999050256079, iteration: 315817
loss: 0.9667251110076904,grad_norm: 0.9999990897881743, iteration: 315818
loss: 0.9992507696151733,grad_norm: 0.9444552858059098, iteration: 315819
loss: 1.011586308479309,grad_norm: 0.998418696682336, iteration: 315820
loss: 0.9943782687187195,grad_norm: 0.7030542032842291, iteration: 315821
loss: 1.0017613172531128,grad_norm: 0.8964794817363663, iteration: 315822
loss: 0.9680183529853821,grad_norm: 0.7696576436707779, iteration: 315823
loss: 1.0200856924057007,grad_norm: 0.749968090766984, iteration: 315824
loss: 1.0234559774398804,grad_norm: 0.8023424490518616, iteration: 315825
loss: 1.0162204504013062,grad_norm: 0.8108221609116593, iteration: 315826
loss: 1.0260852575302124,grad_norm: 0.999999032120998, iteration: 315827
loss: 0.9736047983169556,grad_norm: 0.8291894298167636, iteration: 315828
loss: 0.9953063726425171,grad_norm: 0.7908862512568233, iteration: 315829
loss: 0.9747954607009888,grad_norm: 0.9444390785664032, iteration: 315830
loss: 1.016083836555481,grad_norm: 0.8897586436210048, iteration: 315831
loss: 0.9920923709869385,grad_norm: 0.775256040157648, iteration: 315832
loss: 1.0287679433822632,grad_norm: 0.6522995404732569, iteration: 315833
loss: 1.023598074913025,grad_norm: 0.9999991370706303, iteration: 315834
loss: 1.0829741954803467,grad_norm: 0.8331967452099179, iteration: 315835
loss: 1.014480471611023,grad_norm: 0.9999994528880192, iteration: 315836
loss: 1.0190908908843994,grad_norm: 0.7372909741686974, iteration: 315837
loss: 0.9830004572868347,grad_norm: 0.8520697081747572, iteration: 315838
loss: 0.9382333755493164,grad_norm: 0.8983839941472739, iteration: 315839
loss: 0.9674941301345825,grad_norm: 0.9999990083456614, iteration: 315840
loss: 1.0211313962936401,grad_norm: 0.8518546499510583, iteration: 315841
loss: 0.9675774574279785,grad_norm: 0.7672921889803423, iteration: 315842
loss: 0.9790191054344177,grad_norm: 0.8558585659768079, iteration: 315843
loss: 0.9718007445335388,grad_norm: 0.9983264188887144, iteration: 315844
loss: 1.0099472999572754,grad_norm: 0.8004988476238375, iteration: 315845
loss: 0.9903427362442017,grad_norm: 0.9226212858339662, iteration: 315846
loss: 0.990982174873352,grad_norm: 0.8624984953628532, iteration: 315847
loss: 0.9655426740646362,grad_norm: 0.8965969065797487, iteration: 315848
loss: 0.9799202680587769,grad_norm: 0.8471531780463692, iteration: 315849
loss: 1.0037704706192017,grad_norm: 0.8101795496002903, iteration: 315850
loss: 1.0293723344802856,grad_norm: 0.7749879984237646, iteration: 315851
loss: 1.0046314001083374,grad_norm: 0.7967640171041599, iteration: 315852
loss: 0.9825637340545654,grad_norm: 0.899625817730269, iteration: 315853
loss: 1.02507746219635,grad_norm: 0.7590133712017684, iteration: 315854
loss: 1.0085452795028687,grad_norm: 0.8739239543235823, iteration: 315855
loss: 0.9959661364555359,grad_norm: 0.8642752908905605, iteration: 315856
loss: 1.0089023113250732,grad_norm: 0.9999990880061326, iteration: 315857
loss: 1.0191867351531982,grad_norm: 0.9999991810383306, iteration: 315858
loss: 1.0304783582687378,grad_norm: 0.692704975972584, iteration: 315859
loss: 1.01962411403656,grad_norm: 0.8752232852198075, iteration: 315860
loss: 1.0195914506912231,grad_norm: 0.8476855151552153, iteration: 315861
loss: 1.0114701986312866,grad_norm: 0.9451583656904866, iteration: 315862
loss: 1.0650410652160645,grad_norm: 0.9999994662220455, iteration: 315863
loss: 1.0199649333953857,grad_norm: 0.9024703037015575, iteration: 315864
loss: 1.0279089212417603,grad_norm: 0.9999990348507328, iteration: 315865
loss: 0.9801999926567078,grad_norm: 0.8922758052101606, iteration: 315866
loss: 0.9996636509895325,grad_norm: 0.9087374208826527, iteration: 315867
loss: 0.9974551200866699,grad_norm: 0.9573347451497849, iteration: 315868
loss: 0.9678934812545776,grad_norm: 0.9999999551843514, iteration: 315869
loss: 0.970570981502533,grad_norm: 0.850810958918546, iteration: 315870
loss: 1.0314619541168213,grad_norm: 0.9671408444655352, iteration: 315871
loss: 1.0028096437454224,grad_norm: 0.7084070412891706, iteration: 315872
loss: 1.005367398262024,grad_norm: 0.9176653431651643, iteration: 315873
loss: 1.0379377603530884,grad_norm: 0.7939568974919315, iteration: 315874
loss: 1.0255146026611328,grad_norm: 0.8817376816097929, iteration: 315875
loss: 0.9909117221832275,grad_norm: 0.7507353061363826, iteration: 315876
loss: 1.0241868495941162,grad_norm: 0.9999992252813452, iteration: 315877
loss: 0.9698272347450256,grad_norm: 0.891620655308744, iteration: 315878
loss: 0.992348849773407,grad_norm: 0.9227872196692352, iteration: 315879
loss: 1.0160351991653442,grad_norm: 0.6386197533608535, iteration: 315880
loss: 1.008420467376709,grad_norm: 0.9999990950546239, iteration: 315881
loss: 1.0367376804351807,grad_norm: 0.9999991087194984, iteration: 315882
loss: 1.0331828594207764,grad_norm: 0.7294814639314474, iteration: 315883
loss: 1.0329008102416992,grad_norm: 0.7680664484975005, iteration: 315884
loss: 1.0064747333526611,grad_norm: 0.906261186988202, iteration: 315885
loss: 1.0235307216644287,grad_norm: 0.6713258271156509, iteration: 315886
loss: 1.130614995956421,grad_norm: 0.9258998677582488, iteration: 315887
loss: 0.9882294535636902,grad_norm: 0.9532602383052359, iteration: 315888
loss: 1.002692461013794,grad_norm: 0.7849948224752438, iteration: 315889
loss: 0.9961783289909363,grad_norm: 0.8224506747676273, iteration: 315890
loss: 0.9918586015701294,grad_norm: 0.8885514829770633, iteration: 315891
loss: 0.9931313395500183,grad_norm: 0.777592590552527, iteration: 315892
loss: 1.009446144104004,grad_norm: 0.915635571135848, iteration: 315893
loss: 1.0703685283660889,grad_norm: 0.7968608221291792, iteration: 315894
loss: 0.9895856976509094,grad_norm: 0.7929455160056279, iteration: 315895
loss: 0.999605655670166,grad_norm: 0.9662206318370179, iteration: 315896
loss: 1.002117395401001,grad_norm: 0.8311211803994019, iteration: 315897
loss: 0.9979599714279175,grad_norm: 0.7505088386439472, iteration: 315898
loss: 0.9589477181434631,grad_norm: 0.8305918580449788, iteration: 315899
loss: 0.9676419496536255,grad_norm: 0.7328292256264098, iteration: 315900
loss: 0.9911845922470093,grad_norm: 0.7719234631447429, iteration: 315901
loss: 0.9521916508674622,grad_norm: 0.7939767515159498, iteration: 315902
loss: 0.9610532522201538,grad_norm: 0.9808706136265483, iteration: 315903
loss: 0.98835688829422,grad_norm: 0.8165505045034568, iteration: 315904
loss: 1.1720942258834839,grad_norm: 0.9999994189136527, iteration: 315905
loss: 0.9786903858184814,grad_norm: 0.8953194819252199, iteration: 315906
loss: 0.9625387787818909,grad_norm: 0.8703817161847365, iteration: 315907
loss: 1.0093084573745728,grad_norm: 0.8303580342640057, iteration: 315908
loss: 0.9647042155265808,grad_norm: 0.726867931652839, iteration: 315909
loss: 0.9503206610679626,grad_norm: 0.8035945427384246, iteration: 315910
loss: 1.000807523727417,grad_norm: 0.9849483683954072, iteration: 315911
loss: 1.0059725046157837,grad_norm: 0.9999991853138042, iteration: 315912
loss: 1.014122486114502,grad_norm: 0.9713175985237789, iteration: 315913
loss: 1.0145330429077148,grad_norm: 0.9296924252624803, iteration: 315914
loss: 0.9975672960281372,grad_norm: 0.8736261243744177, iteration: 315915
loss: 1.0331096649169922,grad_norm: 0.9999990821227255, iteration: 315916
loss: 1.0014574527740479,grad_norm: 0.951821959399314, iteration: 315917
loss: 1.000948190689087,grad_norm: 0.7547555179653098, iteration: 315918
loss: 1.0042760372161865,grad_norm: 0.741398878474681, iteration: 315919
loss: 0.9683020710945129,grad_norm: 0.7801676436470281, iteration: 315920
loss: 1.025847315788269,grad_norm: 0.8414601269835608, iteration: 315921
loss: 1.0785179138183594,grad_norm: 0.9034980158226319, iteration: 315922
loss: 1.1428289413452148,grad_norm: 0.9999990997976749, iteration: 315923
loss: 1.0077208280563354,grad_norm: 0.8109455524752172, iteration: 315924
loss: 1.0136349201202393,grad_norm: 0.9999993240674971, iteration: 315925
loss: 0.968410849571228,grad_norm: 0.8259163265099894, iteration: 315926
loss: 0.9821661114692688,grad_norm: 0.8225097369171475, iteration: 315927
loss: 1.1318707466125488,grad_norm: 0.999999916441069, iteration: 315928
loss: 1.0360573530197144,grad_norm: 0.8016411134849918, iteration: 315929
loss: 1.0562214851379395,grad_norm: 0.9999992369261066, iteration: 315930
loss: 1.0092804431915283,grad_norm: 0.8497345415753953, iteration: 315931
loss: 0.9788100123405457,grad_norm: 0.9074152067387488, iteration: 315932
loss: 1.0001696348190308,grad_norm: 0.8170321934435592, iteration: 315933
loss: 1.035865306854248,grad_norm: 0.9316621718967225, iteration: 315934
loss: 1.003797173500061,grad_norm: 0.924036590267786, iteration: 315935
loss: 0.9632822871208191,grad_norm: 0.9999992280115013, iteration: 315936
loss: 1.0409822463989258,grad_norm: 0.8172871832435846, iteration: 315937
loss: 0.9895361661911011,grad_norm: 0.9146403024984183, iteration: 315938
loss: 0.9545331597328186,grad_norm: 0.9041101095745606, iteration: 315939
loss: 1.0467262268066406,grad_norm: 0.8391832961593494, iteration: 315940
loss: 0.9858161211013794,grad_norm: 0.8201494529730144, iteration: 315941
loss: 0.9761251211166382,grad_norm: 0.7364936486611064, iteration: 315942
loss: 1.0274370908737183,grad_norm: 0.8323684564468231, iteration: 315943
loss: 0.9550292491912842,grad_norm: 0.9999989941655275, iteration: 315944
loss: 0.993770956993103,grad_norm: 0.7423366870273805, iteration: 315945
loss: 0.9771592617034912,grad_norm: 0.6786594173561774, iteration: 315946
loss: 0.9974799752235413,grad_norm: 0.7893748074615878, iteration: 315947
loss: 1.0187526941299438,grad_norm: 0.8737983937152151, iteration: 315948
loss: 0.9878277778625488,grad_norm: 0.9999998052676873, iteration: 315949
loss: 1.0127915143966675,grad_norm: 0.8796674293154791, iteration: 315950
loss: 1.1054189205169678,grad_norm: 0.9999990832338995, iteration: 315951
loss: 1.0055125951766968,grad_norm: 0.9999991963732325, iteration: 315952
loss: 1.0255943536758423,grad_norm: 0.8904272833146787, iteration: 315953
loss: 1.0125900506973267,grad_norm: 0.832132605551354, iteration: 315954
loss: 0.9866135716438293,grad_norm: 0.8042943776827239, iteration: 315955
loss: 0.9890953302383423,grad_norm: 0.8180657228177217, iteration: 315956
loss: 0.9742949604988098,grad_norm: 0.8615650304342126, iteration: 315957
loss: 1.0135457515716553,grad_norm: 0.7636726733662266, iteration: 315958
loss: 1.0266895294189453,grad_norm: 0.8390407835608424, iteration: 315959
loss: 1.010542869567871,grad_norm: 0.7811007854182759, iteration: 315960
loss: 0.9855597615242004,grad_norm: 0.9053130419095182, iteration: 315961
loss: 1.0127209424972534,grad_norm: 0.9999990007163035, iteration: 315962
loss: 0.9989888072013855,grad_norm: 0.7755779219837167, iteration: 315963
loss: 0.9895838499069214,grad_norm: 0.8195429053942711, iteration: 315964
loss: 1.0204554796218872,grad_norm: 0.9382737313839635, iteration: 315965
loss: 1.013346791267395,grad_norm: 0.8026528811236306, iteration: 315966
loss: 0.983292818069458,grad_norm: 0.9999993538939863, iteration: 315967
loss: 0.9795369505882263,grad_norm: 0.8414709441791615, iteration: 315968
loss: 1.0355299711227417,grad_norm: 0.910555072999819, iteration: 315969
loss: 0.9841017127037048,grad_norm: 0.8026358107140573, iteration: 315970
loss: 1.005370020866394,grad_norm: 0.8449896791668012, iteration: 315971
loss: 0.988013744354248,grad_norm: 0.9999991417685506, iteration: 315972
loss: 1.0088225603103638,grad_norm: 0.9667496109575425, iteration: 315973
loss: 0.9682285785675049,grad_norm: 0.7924533990980129, iteration: 315974
loss: 0.996237576007843,grad_norm: 0.7977254514371536, iteration: 315975
loss: 1.0765475034713745,grad_norm: 0.9477570419010207, iteration: 315976
loss: 0.9840690493583679,grad_norm: 0.9999993490639669, iteration: 315977
loss: 0.9710595011711121,grad_norm: 0.8644736689386777, iteration: 315978
loss: 1.0069109201431274,grad_norm: 0.7920072582469192, iteration: 315979
loss: 0.9875179529190063,grad_norm: 0.9999995918363608, iteration: 315980
loss: 1.052221655845642,grad_norm: 0.9999994269534183, iteration: 315981
loss: 0.9902874231338501,grad_norm: 0.8483538320265128, iteration: 315982
loss: 0.9638338088989258,grad_norm: 0.7908036838715292, iteration: 315983
loss: 0.9968463182449341,grad_norm: 0.8294096726928237, iteration: 315984
loss: 1.0239611864089966,grad_norm: 0.7450178438397889, iteration: 315985
loss: 1.0107684135437012,grad_norm: 0.9999991693711974, iteration: 315986
loss: 1.0158876180648804,grad_norm: 0.8010195564007203, iteration: 315987
loss: 0.9345822334289551,grad_norm: 0.7917646497733125, iteration: 315988
loss: 0.9915528297424316,grad_norm: 0.8614511168734237, iteration: 315989
loss: 1.04728102684021,grad_norm: 0.8357233418259636, iteration: 315990
loss: 0.9647747278213501,grad_norm: 0.7277558553538556, iteration: 315991
loss: 1.023382544517517,grad_norm: 0.9999991089382307, iteration: 315992
loss: 0.9844949245452881,grad_norm: 0.990646799549655, iteration: 315993
loss: 1.0102901458740234,grad_norm: 0.7522696589771652, iteration: 315994
loss: 1.0325682163238525,grad_norm: 0.9999994704322049, iteration: 315995
loss: 1.033979058265686,grad_norm: 0.827678931007024, iteration: 315996
loss: 1.006880521774292,grad_norm: 0.8515262681166743, iteration: 315997
loss: 0.9566516876220703,grad_norm: 0.9798848621393301, iteration: 315998
loss: 0.9624497294425964,grad_norm: 0.826515790569203, iteration: 315999
loss: 1.4253860712051392,grad_norm: 0.9999998300555114, iteration: 316000
loss: 1.0592130422592163,grad_norm: 0.9151661336347285, iteration: 316001
loss: 0.9950121641159058,grad_norm: 0.7450368525779508, iteration: 316002
loss: 0.9961473941802979,grad_norm: 0.7664082247573115, iteration: 316003
loss: 1.0005406141281128,grad_norm: 0.9537124375984658, iteration: 316004
loss: 1.0248140096664429,grad_norm: 0.9699471006318385, iteration: 316005
loss: 0.9952947497367859,grad_norm: 0.7965284793105242, iteration: 316006
loss: 1.0648319721221924,grad_norm: 0.7861587632185224, iteration: 316007
loss: 1.0101933479309082,grad_norm: 0.9031762895565169, iteration: 316008
loss: 1.0003199577331543,grad_norm: 0.7107273296937924, iteration: 316009
loss: 1.0417418479919434,grad_norm: 0.9999991993587691, iteration: 316010
loss: 1.0254521369934082,grad_norm: 0.8313636013409926, iteration: 316011
loss: 0.990696370601654,grad_norm: 0.8987745923030416, iteration: 316012
loss: 0.9806161522865295,grad_norm: 0.991924273041836, iteration: 316013
loss: 1.0028114318847656,grad_norm: 0.9999990734517996, iteration: 316014
loss: 1.0082672834396362,grad_norm: 0.7790696170561666, iteration: 316015
loss: 1.0027785301208496,grad_norm: 0.999999251322523, iteration: 316016
loss: 0.9596459865570068,grad_norm: 0.790163020449564, iteration: 316017
loss: 0.9979771375656128,grad_norm: 0.9574739290328171, iteration: 316018
loss: 0.9574403762817383,grad_norm: 0.8634055501791129, iteration: 316019
loss: 1.0127112865447998,grad_norm: 0.8612669289383756, iteration: 316020
loss: 1.0083749294281006,grad_norm: 0.9999991269759586, iteration: 316021
loss: 1.0508508682250977,grad_norm: 0.9999990721148511, iteration: 316022
loss: 1.0155712366104126,grad_norm: 0.7667061485068628, iteration: 316023
loss: 0.9906849265098572,grad_norm: 0.6810191876626676, iteration: 316024
loss: 0.997570276260376,grad_norm: 0.7914961168949537, iteration: 316025
loss: 1.040563941001892,grad_norm: 0.7948129304970506, iteration: 316026
loss: 0.9962339401245117,grad_norm: 0.9667348961733606, iteration: 316027
loss: 0.9956230521202087,grad_norm: 0.9999992045169986, iteration: 316028
loss: 1.009573221206665,grad_norm: 0.7109003901413916, iteration: 316029
loss: 0.9781666994094849,grad_norm: 0.9999990202110387, iteration: 316030
loss: 0.965105414390564,grad_norm: 0.8204625886595958, iteration: 316031
loss: 0.9962069392204285,grad_norm: 0.8218830389199683, iteration: 316032
loss: 0.9838568568229675,grad_norm: 0.8137088835759325, iteration: 316033
loss: 1.0198442935943604,grad_norm: 0.7454117022180428, iteration: 316034
loss: 1.0356172323226929,grad_norm: 0.8460104056081729, iteration: 316035
loss: 0.9849913716316223,grad_norm: 0.7585469050153858, iteration: 316036
loss: 1.0363965034484863,grad_norm: 0.9999996442657924, iteration: 316037
loss: 1.0028610229492188,grad_norm: 0.8046464406074796, iteration: 316038
loss: 0.9846913814544678,grad_norm: 0.7129973407913992, iteration: 316039
loss: 0.9695247411727905,grad_norm: 0.7820980118196119, iteration: 316040
loss: 0.9701215028762817,grad_norm: 0.9868200912360232, iteration: 316041
loss: 0.9983804225921631,grad_norm: 0.8078540244493484, iteration: 316042
loss: 0.9587697386741638,grad_norm: 0.9999989788706101, iteration: 316043
loss: 1.0804100036621094,grad_norm: 0.9999990851752456, iteration: 316044
loss: 1.0146480798721313,grad_norm: 0.9026377379120654, iteration: 316045
loss: 1.021526575088501,grad_norm: 0.8056466718425075, iteration: 316046
loss: 1.017808198928833,grad_norm: 0.9999994361400358, iteration: 316047
loss: 1.0258673429489136,grad_norm: 0.9534881459051586, iteration: 316048
loss: 1.0278671979904175,grad_norm: 0.9999999152606153, iteration: 316049
loss: 0.9942298531532288,grad_norm: 0.7796659309156722, iteration: 316050
loss: 1.0102733373641968,grad_norm: 0.9733128614275304, iteration: 316051
loss: 0.9669557809829712,grad_norm: 0.9216446324025054, iteration: 316052
loss: 0.986681342124939,grad_norm: 0.8933148461447242, iteration: 316053
loss: 0.9530326128005981,grad_norm: 0.8178091617829389, iteration: 316054
loss: 1.0109516382217407,grad_norm: 0.9097339415594793, iteration: 316055
loss: 0.9816508889198303,grad_norm: 0.8435332757083512, iteration: 316056
loss: 1.0546588897705078,grad_norm: 0.8779218604356229, iteration: 316057
loss: 1.0099042654037476,grad_norm: 0.9672249083219511, iteration: 316058
loss: 0.9797407984733582,grad_norm: 0.8629312887927885, iteration: 316059
loss: 0.9931983947753906,grad_norm: 0.9687821753986514, iteration: 316060
loss: 0.9977331161499023,grad_norm: 0.7371986664986747, iteration: 316061
loss: 1.004448652267456,grad_norm: 0.780495069610991, iteration: 316062
loss: 1.0515881776809692,grad_norm: 0.8865615143956334, iteration: 316063
loss: 0.9763203859329224,grad_norm: 0.7899200527950487, iteration: 316064
loss: 1.0039738416671753,grad_norm: 0.7137425972244863, iteration: 316065
loss: 0.9819016456604004,grad_norm: 0.6819883905244246, iteration: 316066
loss: 0.9943258762359619,grad_norm: 0.9291999811570232, iteration: 316067
loss: 1.0029922723770142,grad_norm: 0.7576343371211791, iteration: 316068
loss: 0.9827173352241516,grad_norm: 0.9931997037077281, iteration: 316069
loss: 0.9921532869338989,grad_norm: 0.8552882572270307, iteration: 316070
loss: 1.0511893033981323,grad_norm: 0.9999995967135991, iteration: 316071
loss: 0.9995225667953491,grad_norm: 0.8084094764264982, iteration: 316072
loss: 0.9837920665740967,grad_norm: 0.8161099819765293, iteration: 316073
loss: 1.0156633853912354,grad_norm: 0.999999239073398, iteration: 316074
loss: 0.9857287406921387,grad_norm: 0.7281159631281804, iteration: 316075
loss: 1.0064435005187988,grad_norm: 0.8650606856056116, iteration: 316076
loss: 0.9805944561958313,grad_norm: 0.7056634350119753, iteration: 316077
loss: 0.9974424242973328,grad_norm: 0.9999990019500061, iteration: 316078
loss: 1.0223690271377563,grad_norm: 0.9999991290207249, iteration: 316079
loss: 0.9779223799705505,grad_norm: 0.9236801491879828, iteration: 316080
loss: 0.9745370149612427,grad_norm: 0.8502749004212862, iteration: 316081
loss: 1.0475636720657349,grad_norm: 0.9774591207189982, iteration: 316082
loss: 0.9787470102310181,grad_norm: 0.9506798031082061, iteration: 316083
loss: 1.3037123680114746,grad_norm: 0.9999998231978484, iteration: 316084
loss: 1.0381536483764648,grad_norm: 0.9724215900834278, iteration: 316085
loss: 0.9559047818183899,grad_norm: 0.9449048507446495, iteration: 316086
loss: 1.000148892402649,grad_norm: 0.7772292463467884, iteration: 316087
loss: 0.9970341324806213,grad_norm: 0.8820823859372939, iteration: 316088
loss: 0.9946116805076599,grad_norm: 0.9528904336016539, iteration: 316089
loss: 0.9796425700187683,grad_norm: 0.8110014480161886, iteration: 316090
loss: 0.9948739409446716,grad_norm: 0.7956408461051471, iteration: 316091
loss: 0.9865286946296692,grad_norm: 0.9068200734677785, iteration: 316092
loss: 1.0183480978012085,grad_norm: 0.9835932246296373, iteration: 316093
loss: 0.9998512864112854,grad_norm: 0.8276245782858594, iteration: 316094
loss: 1.0426716804504395,grad_norm: 0.762917504019131, iteration: 316095
loss: 1.041307806968689,grad_norm: 0.910230382786105, iteration: 316096
loss: 1.0298001766204834,grad_norm: 0.7875910622396172, iteration: 316097
loss: 1.0247734785079956,grad_norm: 0.9999991115876309, iteration: 316098
loss: 1.0088828802108765,grad_norm: 0.7908509205798079, iteration: 316099
loss: 0.9755141139030457,grad_norm: 0.8894132644202749, iteration: 316100
loss: 1.022614598274231,grad_norm: 0.999999233563629, iteration: 316101
loss: 1.023603081703186,grad_norm: 0.8159594477436002, iteration: 316102
loss: 1.0123358964920044,grad_norm: 0.7945723286730291, iteration: 316103
loss: 1.0182324647903442,grad_norm: 0.9484722989087709, iteration: 316104
loss: 0.9750730395317078,grad_norm: 0.9999990313737969, iteration: 316105
loss: 1.0064717531204224,grad_norm: 0.9999990366023195, iteration: 316106
loss: 1.035484790802002,grad_norm: 0.7386015502919359, iteration: 316107
loss: 0.989311158657074,grad_norm: 0.8011188438823371, iteration: 316108
loss: 0.975256621837616,grad_norm: 0.845504677607427, iteration: 316109
loss: 1.0029635429382324,grad_norm: 0.6566240697781248, iteration: 316110
loss: 1.0253058671951294,grad_norm: 0.9999991222847354, iteration: 316111
loss: 0.9973472952842712,grad_norm: 0.7834844060797113, iteration: 316112
loss: 1.0006468296051025,grad_norm: 0.9354648040952589, iteration: 316113
loss: 0.9805673956871033,grad_norm: 0.9847741313849573, iteration: 316114
loss: 1.0192182064056396,grad_norm: 0.7932383637545107, iteration: 316115
loss: 0.9775581955909729,grad_norm: 0.9591696424432742, iteration: 316116
loss: 1.024315595626831,grad_norm: 0.9234371057777064, iteration: 316117
loss: 1.0235565900802612,grad_norm: 0.86363022703563, iteration: 316118
loss: 0.9685938358306885,grad_norm: 0.8675024578454035, iteration: 316119
loss: 0.9941332936286926,grad_norm: 0.8546268532745638, iteration: 316120
loss: 1.0223331451416016,grad_norm: 0.7496965643989637, iteration: 316121
loss: 1.0147175788879395,grad_norm: 0.7483224442081433, iteration: 316122
loss: 0.9891192317008972,grad_norm: 0.7808974943937745, iteration: 316123
loss: 0.9950143098831177,grad_norm: 0.8780479642122803, iteration: 316124
loss: 0.9905356764793396,grad_norm: 0.8036454156336493, iteration: 316125
loss: 1.0230560302734375,grad_norm: 0.7535851969556324, iteration: 316126
loss: 1.027290940284729,grad_norm: 0.9999992543283528, iteration: 316127
loss: 0.9905855655670166,grad_norm: 0.8378337860207871, iteration: 316128
loss: 0.9870597124099731,grad_norm: 0.8749567734384851, iteration: 316129
loss: 0.9573932886123657,grad_norm: 0.9999991634336747, iteration: 316130
loss: 0.9893420338630676,grad_norm: 0.995006565338571, iteration: 316131
loss: 0.9858982563018799,grad_norm: 0.7385959369239944, iteration: 316132
loss: 1.0018080472946167,grad_norm: 0.9754287004894836, iteration: 316133
loss: 1.0035390853881836,grad_norm: 0.8562513254352082, iteration: 316134
loss: 0.9778342247009277,grad_norm: 0.8866498895376957, iteration: 316135
loss: 0.9825405478477478,grad_norm: 0.8245652133599674, iteration: 316136
loss: 0.9970581531524658,grad_norm: 0.8027636712361611, iteration: 316137
loss: 1.026857852935791,grad_norm: 0.7463084392146567, iteration: 316138
loss: 1.0101513862609863,grad_norm: 0.8405796810607982, iteration: 316139
loss: 1.0095511674880981,grad_norm: 0.8803508238635515, iteration: 316140
loss: 0.9943503737449646,grad_norm: 0.8665985187177747, iteration: 316141
loss: 0.9776611328125,grad_norm: 0.9999999040041945, iteration: 316142
loss: 1.013116478919983,grad_norm: 0.8490001422681808, iteration: 316143
loss: 0.9872975945472717,grad_norm: 0.7715737234222184, iteration: 316144
loss: 1.0130723714828491,grad_norm: 0.7542887191357146, iteration: 316145
loss: 0.991536557674408,grad_norm: 0.8449570575969336, iteration: 316146
loss: 0.9943263530731201,grad_norm: 0.9999992543165813, iteration: 316147
loss: 1.0163698196411133,grad_norm: 0.8788439972079473, iteration: 316148
loss: 1.0352776050567627,grad_norm: 0.8772519464258491, iteration: 316149
loss: 0.9988718628883362,grad_norm: 0.9200002469222247, iteration: 316150
loss: 0.9842708110809326,grad_norm: 0.8669556095399322, iteration: 316151
loss: 0.9893994331359863,grad_norm: 0.9447879072413601, iteration: 316152
loss: 0.9899020195007324,grad_norm: 0.7141441516061359, iteration: 316153
loss: 1.0503681898117065,grad_norm: 0.9999998594882249, iteration: 316154
loss: 0.994775652885437,grad_norm: 0.949942078330312, iteration: 316155
loss: 1.0167065858840942,grad_norm: 0.8261779801046127, iteration: 316156
loss: 0.9976800084114075,grad_norm: 0.8455982515138187, iteration: 316157
loss: 1.0093191862106323,grad_norm: 0.6904305834849167, iteration: 316158
loss: 1.0225520133972168,grad_norm: 0.8997562929339528, iteration: 316159
loss: 1.0159475803375244,grad_norm: 0.9999997740356866, iteration: 316160
loss: 0.9807441234588623,grad_norm: 0.9013824812323843, iteration: 316161
loss: 1.0400218963623047,grad_norm: 0.8908348408384552, iteration: 316162
loss: 0.990186870098114,grad_norm: 0.8353853365338497, iteration: 316163
loss: 1.0127496719360352,grad_norm: 0.7639362996115576, iteration: 316164
loss: 1.0005263090133667,grad_norm: 0.9177437114000514, iteration: 316165
loss: 1.0062291622161865,grad_norm: 0.9999990040007734, iteration: 316166
loss: 0.9649322628974915,grad_norm: 0.9540321279353783, iteration: 316167
loss: 0.9981048107147217,grad_norm: 0.8206769594692185, iteration: 316168
loss: 0.9772225618362427,grad_norm: 0.9999990395596515, iteration: 316169
loss: 0.9875423908233643,grad_norm: 0.8036564613894934, iteration: 316170
loss: 1.0365104675292969,grad_norm: 0.9503987350515164, iteration: 316171
loss: 0.9758780598640442,grad_norm: 0.7475857664810075, iteration: 316172
loss: 0.9756373763084412,grad_norm: 0.7170758197569229, iteration: 316173
loss: 1.0019276142120361,grad_norm: 0.999999201603808, iteration: 316174
loss: 1.2039625644683838,grad_norm: 0.9999999487303806, iteration: 316175
loss: 0.9944082498550415,grad_norm: 0.816993824075901, iteration: 316176
loss: 1.0092324018478394,grad_norm: 0.9999991855578924, iteration: 316177
loss: 1.0163698196411133,grad_norm: 0.7535166274711851, iteration: 316178
loss: 1.000931739807129,grad_norm: 0.859780987491916, iteration: 316179
loss: 1.0065217018127441,grad_norm: 0.8261819293589892, iteration: 316180
loss: 1.007359266281128,grad_norm: 0.9999991569180661, iteration: 316181
loss: 0.9824627637863159,grad_norm: 0.9131523194988531, iteration: 316182
loss: 1.0439081192016602,grad_norm: 0.8334241791201615, iteration: 316183
loss: 1.0035194158554077,grad_norm: 0.8713053897968988, iteration: 316184
loss: 0.967174768447876,grad_norm: 0.8757837051091879, iteration: 316185
loss: 1.0080162286758423,grad_norm: 0.9999992707302245, iteration: 316186
loss: 1.0093421936035156,grad_norm: 0.7741137077300286, iteration: 316187
loss: 0.9937042593955994,grad_norm: 0.7451555451197626, iteration: 316188
loss: 1.0210036039352417,grad_norm: 0.8648883262086927, iteration: 316189
loss: 1.0043758153915405,grad_norm: 0.8474123757142549, iteration: 316190
loss: 1.0238844156265259,grad_norm: 0.835755575054861, iteration: 316191
loss: 1.0349615812301636,grad_norm: 0.8660394309364189, iteration: 316192
loss: 1.0212322473526,grad_norm: 0.8612717045509912, iteration: 316193
loss: 1.0229418277740479,grad_norm: 0.9999991637183894, iteration: 316194
loss: 0.9651249647140503,grad_norm: 0.8800232570324674, iteration: 316195
loss: 1.0061768293380737,grad_norm: 0.8535113384013612, iteration: 316196
loss: 1.023361086845398,grad_norm: 0.9006323848854882, iteration: 316197
loss: 1.0092802047729492,grad_norm: 0.8789464714702449, iteration: 316198
loss: 1.0224518775939941,grad_norm: 0.9711052989851147, iteration: 316199
loss: 1.0154707431793213,grad_norm: 0.9712413721819173, iteration: 316200
loss: 1.0182116031646729,grad_norm: 0.8349058416882933, iteration: 316201
loss: 0.9809590578079224,grad_norm: 0.8538347662697531, iteration: 316202
loss: 1.0138684511184692,grad_norm: 0.7605558164521915, iteration: 316203
loss: 1.005988597869873,grad_norm: 0.9035948067363714, iteration: 316204
loss: 0.9784695506095886,grad_norm: 0.9999997475333327, iteration: 316205
loss: 1.0006893873214722,grad_norm: 0.720929964155762, iteration: 316206
loss: 0.9919863343238831,grad_norm: 0.7971103844277969, iteration: 316207
loss: 1.022329568862915,grad_norm: 0.906325287892876, iteration: 316208
loss: 1.0231305360794067,grad_norm: 0.7519879860606561, iteration: 316209
loss: 1.0205843448638916,grad_norm: 0.7992268026079844, iteration: 316210
loss: 1.007145643234253,grad_norm: 0.8473897142308748, iteration: 316211
loss: 1.0116820335388184,grad_norm: 0.9999998572110622, iteration: 316212
loss: 1.0000807046890259,grad_norm: 0.8259953081706327, iteration: 316213
loss: 1.0152859687805176,grad_norm: 0.6946868696348796, iteration: 316214
loss: 1.0133315324783325,grad_norm: 0.9999995724187178, iteration: 316215
loss: 1.0050902366638184,grad_norm: 0.6805066508293998, iteration: 316216
loss: 0.9768441319465637,grad_norm: 0.8842840177519481, iteration: 316217
loss: 0.9957327246665955,grad_norm: 0.9278419086162094, iteration: 316218
loss: 1.001876711845398,grad_norm: 0.8198447703516069, iteration: 316219
loss: 1.045127511024475,grad_norm: 0.7847443313482226, iteration: 316220
loss: 0.984006404876709,grad_norm: 0.8633810516571354, iteration: 316221
loss: 0.9860869646072388,grad_norm: 0.8100115745303045, iteration: 316222
loss: 1.006162405014038,grad_norm: 0.9312863378923891, iteration: 316223
loss: 1.0773727893829346,grad_norm: 0.9028307929621816, iteration: 316224
loss: 0.9921405911445618,grad_norm: 0.8749335180807817, iteration: 316225
loss: 0.9744862914085388,grad_norm: 0.8273750625311532, iteration: 316226
loss: 1.0128933191299438,grad_norm: 0.9225237892517169, iteration: 316227
loss: 0.9605300426483154,grad_norm: 0.8191447801138084, iteration: 316228
loss: 1.0517759323120117,grad_norm: 0.999999574295724, iteration: 316229
loss: 0.9494949579238892,grad_norm: 0.9466549187198178, iteration: 316230
loss: 0.9967295527458191,grad_norm: 0.999999135072544, iteration: 316231
loss: 0.985832691192627,grad_norm: 0.8540455156839112, iteration: 316232
loss: 1.0201441049575806,grad_norm: 0.9645151946104664, iteration: 316233
loss: 0.9945359230041504,grad_norm: 0.9999989411520052, iteration: 316234
loss: 0.9716159701347351,grad_norm: 0.7184747499582601, iteration: 316235
loss: 1.0140494108200073,grad_norm: 0.7352145349910801, iteration: 316236
loss: 1.027612328529358,grad_norm: 0.8373498821657839, iteration: 316237
loss: 0.9612026214599609,grad_norm: 0.8307766088403417, iteration: 316238
loss: 1.0016191005706787,grad_norm: 0.7754792598389582, iteration: 316239
loss: 1.0155442953109741,grad_norm: 0.8953016111082787, iteration: 316240
loss: 0.9706035852432251,grad_norm: 0.9028665681775773, iteration: 316241
loss: 0.957876980304718,grad_norm: 0.7891805543793667, iteration: 316242
loss: 0.9905810356140137,grad_norm: 0.7791194091199942, iteration: 316243
loss: 1.0079447031021118,grad_norm: 0.833221088505599, iteration: 316244
loss: 1.0127698183059692,grad_norm: 0.9112777772793702, iteration: 316245
loss: 0.9998458623886108,grad_norm: 0.8943044547755266, iteration: 316246
loss: 0.966729998588562,grad_norm: 0.8712963364528455, iteration: 316247
loss: 1.0255708694458008,grad_norm: 0.8799731920073991, iteration: 316248
loss: 1.0023664236068726,grad_norm: 0.999999011517044, iteration: 316249
loss: 1.002955436706543,grad_norm: 0.8948488766163554, iteration: 316250
loss: 1.0238609313964844,grad_norm: 0.8588745061746534, iteration: 316251
loss: 1.0042415857315063,grad_norm: 0.6928707105857232, iteration: 316252
loss: 1.0508877038955688,grad_norm: 0.9668237840812297, iteration: 316253
loss: 0.9903734922409058,grad_norm: 0.7241492738801695, iteration: 316254
loss: 0.9578506350517273,grad_norm: 0.8691473499524593, iteration: 316255
loss: 1.0202252864837646,grad_norm: 0.8375448046290429, iteration: 316256
loss: 1.004682183265686,grad_norm: 0.9928199668990909, iteration: 316257
loss: 1.0220973491668701,grad_norm: 0.9041930426418988, iteration: 316258
loss: 1.0300177335739136,grad_norm: 0.9999991308720614, iteration: 316259
loss: 1.1092963218688965,grad_norm: 0.9999991513974908, iteration: 316260
loss: 1.0228393077850342,grad_norm: 0.9434560297518856, iteration: 316261
loss: 1.0541499853134155,grad_norm: 0.9999992035731973, iteration: 316262
loss: 0.9571413397789001,grad_norm: 0.8548183601839473, iteration: 316263
loss: 1.0145264863967896,grad_norm: 0.798432378186673, iteration: 316264
loss: 1.029228687286377,grad_norm: 0.8598990174992625, iteration: 316265
loss: 1.0097323656082153,grad_norm: 0.9106441891617632, iteration: 316266
loss: 1.0081063508987427,grad_norm: 0.7095125059640648, iteration: 316267
loss: 1.024721622467041,grad_norm: 0.8126620128393498, iteration: 316268
loss: 1.03730046749115,grad_norm: 0.923968633159793, iteration: 316269
loss: 0.9818946719169617,grad_norm: 0.7386782144252753, iteration: 316270
loss: 0.9956011176109314,grad_norm: 0.7501425536245316, iteration: 316271
loss: 0.9708374738693237,grad_norm: 0.9996659415031839, iteration: 316272
loss: 0.9522339105606079,grad_norm: 0.8620972171722724, iteration: 316273
loss: 1.0078270435333252,grad_norm: 0.8074593855893688, iteration: 316274
loss: 0.994595468044281,grad_norm: 0.9162523723995718, iteration: 316275
loss: 1.0265607833862305,grad_norm: 0.8685353403630144, iteration: 316276
loss: 1.0168226957321167,grad_norm: 0.9999993738663759, iteration: 316277
loss: 0.9712845087051392,grad_norm: 0.7104055852689375, iteration: 316278
loss: 0.9883015155792236,grad_norm: 0.917846787173517, iteration: 316279
loss: 1.035476803779602,grad_norm: 0.9579623856494828, iteration: 316280
loss: 0.9794924259185791,grad_norm: 0.6895814176751893, iteration: 316281
loss: 0.9819939136505127,grad_norm: 0.7487427070529568, iteration: 316282
loss: 0.953565239906311,grad_norm: 0.7663322542865293, iteration: 316283
loss: 0.9862305521965027,grad_norm: 0.9556199005279313, iteration: 316284
loss: 1.0098811388015747,grad_norm: 0.9773460831062095, iteration: 316285
loss: 1.0625842809677124,grad_norm: 0.9999998455709862, iteration: 316286
loss: 1.005520224571228,grad_norm: 0.9999990942092644, iteration: 316287
loss: 1.005159854888916,grad_norm: 0.9999990678213062, iteration: 316288
loss: 1.0027621984481812,grad_norm: 0.8346561645045195, iteration: 316289
loss: 0.9987674951553345,grad_norm: 0.8987804070903093, iteration: 316290
loss: 0.9814103841781616,grad_norm: 0.672861214259793, iteration: 316291
loss: 0.9811176657676697,grad_norm: 0.8563802878018635, iteration: 316292
loss: 0.9955071806907654,grad_norm: 0.7800065167682112, iteration: 316293
loss: 0.9374725818634033,grad_norm: 0.7436256945616657, iteration: 316294
loss: 0.9601722359657288,grad_norm: 0.7996574141003855, iteration: 316295
loss: 0.9964537620544434,grad_norm: 0.9349888756905219, iteration: 316296
loss: 0.9955081343650818,grad_norm: 0.8444325584473569, iteration: 316297
loss: 0.9647020697593689,grad_norm: 0.8917921022687911, iteration: 316298
loss: 1.0150642395019531,grad_norm: 0.9622930560265676, iteration: 316299
loss: 1.018194556236267,grad_norm: 0.9187856101497699, iteration: 316300
loss: 1.0314847230911255,grad_norm: 0.9185091695140357, iteration: 316301
loss: 1.0546084642410278,grad_norm: 0.9999991578219749, iteration: 316302
loss: 0.9755067825317383,grad_norm: 0.999999239361679, iteration: 316303
loss: 1.0188394784927368,grad_norm: 0.8639544040040332, iteration: 316304
loss: 1.042178750038147,grad_norm: 0.9553216674631582, iteration: 316305
loss: 1.004793405532837,grad_norm: 0.8774873823689829, iteration: 316306
loss: 0.9847282767295837,grad_norm: 0.8634723191883168, iteration: 316307
loss: 0.9641141891479492,grad_norm: 0.8210735100157466, iteration: 316308
loss: 1.0171679258346558,grad_norm: 0.817448865850836, iteration: 316309
loss: 1.037474274635315,grad_norm: 0.9118702044546638, iteration: 316310
loss: 0.973756730556488,grad_norm: 0.9459522623691163, iteration: 316311
loss: 1.0104308128356934,grad_norm: 0.784874590130375, iteration: 316312
loss: 1.0016212463378906,grad_norm: 0.8127650083150665, iteration: 316313
loss: 0.9818145632743835,grad_norm: 0.9601434659731601, iteration: 316314
loss: 0.994647204875946,grad_norm: 0.9999991046584455, iteration: 316315
loss: 0.9997498393058777,grad_norm: 0.9999991161935705, iteration: 316316
loss: 0.9935640096664429,grad_norm: 0.8455313917038142, iteration: 316317
loss: 1.0316513776779175,grad_norm: 0.8377626229293242, iteration: 316318
loss: 1.129979133605957,grad_norm: 0.9999991358125438, iteration: 316319
loss: 1.0066577196121216,grad_norm: 0.7776221101317546, iteration: 316320
loss: 1.0183643102645874,grad_norm: 0.9451082030096398, iteration: 316321
loss: 0.9744759798049927,grad_norm: 0.9423998908711356, iteration: 316322
loss: 1.035366415977478,grad_norm: 0.9251323172512771, iteration: 316323
loss: 1.072008728981018,grad_norm: 0.9999995683327573, iteration: 316324
loss: 1.0130510330200195,grad_norm: 0.7369694439463025, iteration: 316325
loss: 0.9917408227920532,grad_norm: 0.9999992889856684, iteration: 316326
loss: 0.9884941577911377,grad_norm: 0.9999991332146301, iteration: 316327
loss: 0.9689682126045227,grad_norm: 0.9143033709885372, iteration: 316328
loss: 0.9984255433082581,grad_norm: 0.8452511126042435, iteration: 316329
loss: 0.9749756455421448,grad_norm: 0.714380470543602, iteration: 316330
loss: 0.9729648232460022,grad_norm: 0.8331401536207105, iteration: 316331
loss: 1.008223533630371,grad_norm: 0.8149576074854522, iteration: 316332
loss: 1.0099128484725952,grad_norm: 0.9030888471505507, iteration: 316333
loss: 0.9543216824531555,grad_norm: 0.9999991749223871, iteration: 316334
loss: 1.022646427154541,grad_norm: 0.820634827005211, iteration: 316335
loss: 1.0445339679718018,grad_norm: 0.9158250192833544, iteration: 316336
loss: 1.0166748762130737,grad_norm: 0.8352195650191483, iteration: 316337
loss: 1.0134400129318237,grad_norm: 0.8531505130294175, iteration: 316338
loss: 0.9758453369140625,grad_norm: 0.8215150441534457, iteration: 316339
loss: 0.9736286997795105,grad_norm: 0.8155925259656943, iteration: 316340
loss: 0.9974392056465149,grad_norm: 0.9999991340141425, iteration: 316341
loss: 0.9946095943450928,grad_norm: 0.7435733371283701, iteration: 316342
loss: 1.005233645439148,grad_norm: 0.9999993380380728, iteration: 316343
loss: 0.9900902509689331,grad_norm: 0.8140928743205307, iteration: 316344
loss: 1.0241732597351074,grad_norm: 0.7220723806246632, iteration: 316345
loss: 0.9893074631690979,grad_norm: 0.8095267227552083, iteration: 316346
loss: 0.9751548171043396,grad_norm: 0.999999120439282, iteration: 316347
loss: 0.9489815831184387,grad_norm: 0.902273874206089, iteration: 316348
loss: 0.9872313141822815,grad_norm: 0.7391809880910626, iteration: 316349
loss: 0.9909766316413879,grad_norm: 0.772301863488689, iteration: 316350
loss: 0.9934822916984558,grad_norm: 0.7860839193475311, iteration: 316351
loss: 0.9784849882125854,grad_norm: 0.9150448058875799, iteration: 316352
loss: 1.008170485496521,grad_norm: 0.8512690398744819, iteration: 316353
loss: 0.9883319139480591,grad_norm: 0.8961570531782297, iteration: 316354
loss: 0.9846550226211548,grad_norm: 0.9679284675835875, iteration: 316355
loss: 1.0053484439849854,grad_norm: 0.8094710589957844, iteration: 316356
loss: 1.0000483989715576,grad_norm: 0.7883817830138735, iteration: 316357
loss: 1.0198650360107422,grad_norm: 0.8143022560942763, iteration: 316358
loss: 1.0199215412139893,grad_norm: 0.9572915607285001, iteration: 316359
loss: 0.9645673632621765,grad_norm: 0.8557512081480975, iteration: 316360
loss: 1.0128319263458252,grad_norm: 0.9999993046007959, iteration: 316361
loss: 1.0060592889785767,grad_norm: 0.8435411575666155, iteration: 316362
loss: 0.9970051646232605,grad_norm: 0.883767573942055, iteration: 316363
loss: 1.0223947763442993,grad_norm: 0.9999991299335134, iteration: 316364
loss: 1.0133450031280518,grad_norm: 0.9171947102033592, iteration: 316365
loss: 0.9730116128921509,grad_norm: 0.7976144320598386, iteration: 316366
loss: 0.982052206993103,grad_norm: 0.8516603738673951, iteration: 316367
loss: 0.9774478673934937,grad_norm: 0.8283500547532374, iteration: 316368
loss: 1.0171822309494019,grad_norm: 0.8177948711207169, iteration: 316369
loss: 1.0167179107666016,grad_norm: 0.7666712814200473, iteration: 316370
loss: 0.9881357550621033,grad_norm: 0.8025680367336613, iteration: 316371
loss: 1.0189958810806274,grad_norm: 0.953759213911741, iteration: 316372
loss: 0.9908977746963501,grad_norm: 0.7082339337780132, iteration: 316373
loss: 1.014147162437439,grad_norm: 0.7866109989752718, iteration: 316374
loss: 0.9731261134147644,grad_norm: 0.8061758731422466, iteration: 316375
loss: 1.0039095878601074,grad_norm: 0.9999992682416042, iteration: 316376
loss: 1.0213063955307007,grad_norm: 0.9520058645959482, iteration: 316377
loss: 1.0377159118652344,grad_norm: 0.9999996111129267, iteration: 316378
loss: 1.0450917482376099,grad_norm: 0.9999998758157269, iteration: 316379
loss: 0.989567220211029,grad_norm: 0.7948129754065312, iteration: 316380
loss: 1.0223305225372314,grad_norm: 0.9999998144995051, iteration: 316381
loss: 1.0237032175064087,grad_norm: 0.873182244495173, iteration: 316382
loss: 0.9921370148658752,grad_norm: 0.9172964927274299, iteration: 316383
loss: 1.0046321153640747,grad_norm: 0.9999999560993921, iteration: 316384
loss: 0.9774501323699951,grad_norm: 0.870714377171519, iteration: 316385
loss: 1.0202858448028564,grad_norm: 0.8448578419933622, iteration: 316386
loss: 1.0375405550003052,grad_norm: 0.9724103139069542, iteration: 316387
loss: 1.0457429885864258,grad_norm: 0.9668069185448869, iteration: 316388
loss: 0.9979420304298401,grad_norm: 0.8514568641870875, iteration: 316389
loss: 1.0314334630966187,grad_norm: 0.875186172134491, iteration: 316390
loss: 0.993683934211731,grad_norm: 0.8612557911513624, iteration: 316391
loss: 0.9483774304389954,grad_norm: 0.9999993748648834, iteration: 316392
loss: 1.0793721675872803,grad_norm: 0.7993857949907496, iteration: 316393
loss: 1.001042127609253,grad_norm: 0.958820190270961, iteration: 316394
loss: 1.0039116144180298,grad_norm: 0.806075619853402, iteration: 316395
loss: 1.0419330596923828,grad_norm: 0.9999991753033275, iteration: 316396
loss: 0.9846247434616089,grad_norm: 0.9194363216843269, iteration: 316397
loss: 1.0564417839050293,grad_norm: 0.8328469016667372, iteration: 316398
loss: 1.0006588697433472,grad_norm: 0.9999990057653427, iteration: 316399
loss: 1.0308098793029785,grad_norm: 0.7792209598852865, iteration: 316400
loss: 1.0205262899398804,grad_norm: 0.8745099490583849, iteration: 316401
loss: 1.014048457145691,grad_norm: 0.7746765096466799, iteration: 316402
loss: 0.9831781387329102,grad_norm: 0.8540114271373676, iteration: 316403
loss: 1.0084373950958252,grad_norm: 0.999998902074965, iteration: 316404
loss: 1.005858063697815,grad_norm: 0.8516884688459172, iteration: 316405
loss: 1.0292248725891113,grad_norm: 0.9999992570690122, iteration: 316406
loss: 0.9895318150520325,grad_norm: 0.9693422166746262, iteration: 316407
loss: 0.9800105690956116,grad_norm: 0.9999997134013053, iteration: 316408
loss: 1.0593886375427246,grad_norm: 0.9999991042423874, iteration: 316409
loss: 1.0519814491271973,grad_norm: 0.8438942742505257, iteration: 316410
loss: 1.001588225364685,grad_norm: 0.9999990600727483, iteration: 316411
loss: 1.0031858682632446,grad_norm: 0.82801660962669, iteration: 316412
loss: 1.0173205137252808,grad_norm: 0.800671324538514, iteration: 316413
loss: 0.9989062547683716,grad_norm: 0.7701522815888028, iteration: 316414
loss: 0.9678443670272827,grad_norm: 0.7584754329185195, iteration: 316415
loss: 1.0025296211242676,grad_norm: 0.9999992029443859, iteration: 316416
loss: 1.019322156906128,grad_norm: 0.9070994776961114, iteration: 316417
loss: 1.0162200927734375,grad_norm: 0.7444704536726563, iteration: 316418
loss: 1.0369198322296143,grad_norm: 0.9999990054464841, iteration: 316419
loss: 0.9852703809738159,grad_norm: 0.8402165100287913, iteration: 316420
loss: 1.0185303688049316,grad_norm: 0.7940900689481469, iteration: 316421
loss: 1.008949637413025,grad_norm: 0.7034711580105403, iteration: 316422
loss: 1.0211294889450073,grad_norm: 0.8910099286117424, iteration: 316423
loss: 1.0096815824508667,grad_norm: 0.7531229069354525, iteration: 316424
loss: 0.9666215181350708,grad_norm: 0.948885363498781, iteration: 316425
loss: 0.993374228477478,grad_norm: 0.7585980730950426, iteration: 316426
loss: 0.9917365908622742,grad_norm: 0.7294299439518227, iteration: 316427
loss: 1.0728648900985718,grad_norm: 0.9999994513332898, iteration: 316428
loss: 0.9815261960029602,grad_norm: 0.7987818733044209, iteration: 316429
loss: 1.012184977531433,grad_norm: 0.9692028301279918, iteration: 316430
loss: 0.957283079624176,grad_norm: 0.7894941549968626, iteration: 316431
loss: 0.996738851070404,grad_norm: 0.8984756414449276, iteration: 316432
loss: 0.9477589130401611,grad_norm: 0.9582096413116273, iteration: 316433
loss: 1.0271222591400146,grad_norm: 0.9999992560455069, iteration: 316434
loss: 0.9629950523376465,grad_norm: 0.7112366720459634, iteration: 316435
loss: 1.0133129358291626,grad_norm: 0.9116336924785932, iteration: 316436
loss: 0.9912735223770142,grad_norm: 0.8064667698342022, iteration: 316437
loss: 1.0054453611373901,grad_norm: 0.9999990688940218, iteration: 316438
loss: 1.006905198097229,grad_norm: 0.8115504544773112, iteration: 316439
loss: 0.996584415435791,grad_norm: 0.94308765726497, iteration: 316440
loss: 1.0053508281707764,grad_norm: 0.9999991028178011, iteration: 316441
loss: 0.9861464500427246,grad_norm: 0.9999990708340555, iteration: 316442
loss: 1.0189472436904907,grad_norm: 0.7524855269571481, iteration: 316443
loss: 0.9829190373420715,grad_norm: 0.7819624011019026, iteration: 316444
loss: 1.0571743249893188,grad_norm: 0.9951144162600712, iteration: 316445
loss: 1.0138369798660278,grad_norm: 0.8348841372306243, iteration: 316446
loss: 0.9795771837234497,grad_norm: 0.799153012105523, iteration: 316447
loss: 1.0077190399169922,grad_norm: 0.9461782969292503, iteration: 316448
loss: 1.0173211097717285,grad_norm: 0.8056867598607685, iteration: 316449
loss: 1.0557676553726196,grad_norm: 0.9999999242195493, iteration: 316450
loss: 1.0677154064178467,grad_norm: 0.9999991360094584, iteration: 316451
loss: 1.0107309818267822,grad_norm: 0.9194495167666563, iteration: 316452
loss: 1.0089846849441528,grad_norm: 0.7703555974131139, iteration: 316453
loss: 0.9923765659332275,grad_norm: 0.8413152744479763, iteration: 316454
loss: 1.003273844718933,grad_norm: 0.9436336393690972, iteration: 316455
loss: 1.0118681192398071,grad_norm: 0.8623606278722321, iteration: 316456
loss: 1.0510042905807495,grad_norm: 0.7756208711509591, iteration: 316457
loss: 1.0137461423873901,grad_norm: 0.99999967013973, iteration: 316458
loss: 0.9674584269523621,grad_norm: 0.9356939192543288, iteration: 316459
loss: 1.0147709846496582,grad_norm: 0.8293734438987871, iteration: 316460
loss: 0.9959936738014221,grad_norm: 0.7560915249178429, iteration: 316461
loss: 1.03461754322052,grad_norm: 0.8346763050003512, iteration: 316462
loss: 0.9853335618972778,grad_norm: 0.8453585475163705, iteration: 316463
loss: 1.0222691297531128,grad_norm: 0.9010275293344094, iteration: 316464
loss: 0.9938229918479919,grad_norm: 0.7588011840133575, iteration: 316465
loss: 0.9473062753677368,grad_norm: 0.7757718320200281, iteration: 316466
loss: 0.995983898639679,grad_norm: 0.8264310516495273, iteration: 316467
loss: 1.0022796392440796,grad_norm: 0.8798388721155617, iteration: 316468
loss: 1.0714186429977417,grad_norm: 0.9213994996662481, iteration: 316469
loss: 0.9984896779060364,grad_norm: 0.957759477447665, iteration: 316470
loss: 1.0218321084976196,grad_norm: 0.8905429031495257, iteration: 316471
loss: 1.0050417184829712,grad_norm: 0.7486096171967154, iteration: 316472
loss: 0.9737541079521179,grad_norm: 0.8272209340465986, iteration: 316473
loss: 1.0273882150650024,grad_norm: 0.8123341626311819, iteration: 316474
loss: 1.0040067434310913,grad_norm: 0.7906501694256882, iteration: 316475
loss: 0.9871401786804199,grad_norm: 0.9344654542239554, iteration: 316476
loss: 0.9736131429672241,grad_norm: 0.8055612993769042, iteration: 316477
loss: 1.0042470693588257,grad_norm: 0.7879656370630328, iteration: 316478
loss: 0.9928986430168152,grad_norm: 0.9006400390896604, iteration: 316479
loss: 1.0029668807983398,grad_norm: 0.8729398648289698, iteration: 316480
loss: 0.9833781719207764,grad_norm: 0.9011019134445254, iteration: 316481
loss: 0.9776486158370972,grad_norm: 0.8531102471326361, iteration: 316482
loss: 1.0076665878295898,grad_norm: 0.9513459880651437, iteration: 316483
loss: 1.0016882419586182,grad_norm: 0.6845822745742958, iteration: 316484
loss: 1.0151485204696655,grad_norm: 0.9733565651555054, iteration: 316485
loss: 1.0399867296218872,grad_norm: 0.9999990535707968, iteration: 316486
loss: 1.0290634632110596,grad_norm: 0.7982752920349327, iteration: 316487
loss: 1.0105044841766357,grad_norm: 0.9862960390084496, iteration: 316488
loss: 1.0221203565597534,grad_norm: 0.9208134518464003, iteration: 316489
loss: 0.9936750531196594,grad_norm: 0.7588710970354586, iteration: 316490
loss: 1.0070916414260864,grad_norm: 0.9999992653900682, iteration: 316491
loss: 0.9573693871498108,grad_norm: 0.7572733068598279, iteration: 316492
loss: 1.0025559663772583,grad_norm: 0.83054107814242, iteration: 316493
loss: 1.0249148607254028,grad_norm: 0.9999991046490793, iteration: 316494
loss: 1.0518461465835571,grad_norm: 0.9999997009607366, iteration: 316495
loss: 1.0167025327682495,grad_norm: 0.9298985277939977, iteration: 316496
loss: 1.015315294265747,grad_norm: 0.7835072496142877, iteration: 316497
loss: 1.0410085916519165,grad_norm: 0.8212682066740615, iteration: 316498
loss: 0.9936982989311218,grad_norm: 0.7907578697800395, iteration: 316499
loss: 0.9892175793647766,grad_norm: 0.7738292372124995, iteration: 316500
loss: 0.9770712852478027,grad_norm: 0.9200238679496835, iteration: 316501
loss: 0.9899289608001709,grad_norm: 0.7743788609604042, iteration: 316502
loss: 0.9742971062660217,grad_norm: 0.8342144041744654, iteration: 316503
loss: 1.0011433362960815,grad_norm: 0.9396810217850363, iteration: 316504
loss: 0.9834442138671875,grad_norm: 0.963204906179429, iteration: 316505
loss: 0.9887745380401611,grad_norm: 0.7566274656189942, iteration: 316506
loss: 1.003035545349121,grad_norm: 0.8787746293638805, iteration: 316507
loss: 0.9983019828796387,grad_norm: 0.7945963180369939, iteration: 316508
loss: 1.0483673810958862,grad_norm: 0.8639688689347087, iteration: 316509
loss: 1.0108011960983276,grad_norm: 0.9999998004765762, iteration: 316510
loss: 0.9956223964691162,grad_norm: 0.6884157100173954, iteration: 316511
loss: 0.9933512806892395,grad_norm: 0.7296366920534411, iteration: 316512
loss: 1.0368448495864868,grad_norm: 0.9999995169715189, iteration: 316513
loss: 1.011357069015503,grad_norm: 0.965473322531506, iteration: 316514
loss: 0.9569989442825317,grad_norm: 0.8115533535729471, iteration: 316515
loss: 0.9970254898071289,grad_norm: 0.8247237720831027, iteration: 316516
loss: 1.0003248453140259,grad_norm: 0.848213371682788, iteration: 316517
loss: 0.984026312828064,grad_norm: 0.7858784268397307, iteration: 316518
loss: 1.002347707748413,grad_norm: 0.9804660779563281, iteration: 316519
loss: 0.9556489586830139,grad_norm: 0.8754297839594568, iteration: 316520
loss: 0.9865419268608093,grad_norm: 0.9080620461619616, iteration: 316521
loss: 0.9930776953697205,grad_norm: 0.7245471276980145, iteration: 316522
loss: 0.9626975655555725,grad_norm: 0.8316581514054514, iteration: 316523
loss: 0.9689186811447144,grad_norm: 0.7336747858601037, iteration: 316524
loss: 0.999654233455658,grad_norm: 0.8883383105925261, iteration: 316525
loss: 1.012586236000061,grad_norm: 0.8438290486934505, iteration: 316526
loss: 0.9675685167312622,grad_norm: 0.8387194638159376, iteration: 316527
loss: 0.9732732772827148,grad_norm: 0.7111698375057124, iteration: 316528
loss: 1.0221775770187378,grad_norm: 0.9570381044388235, iteration: 316529
loss: 1.0227726697921753,grad_norm: 0.9622448769105993, iteration: 316530
loss: 1.0336580276489258,grad_norm: 0.8448741174140034, iteration: 316531
loss: 1.010208249092102,grad_norm: 0.7462228001569935, iteration: 316532
loss: 1.1324716806411743,grad_norm: 0.9999997440176996, iteration: 316533
loss: 0.9649362564086914,grad_norm: 0.9999991906337362, iteration: 316534
loss: 1.0601389408111572,grad_norm: 0.9971263458954573, iteration: 316535
loss: 0.9963101148605347,grad_norm: 0.8333685050365468, iteration: 316536
loss: 0.9717273712158203,grad_norm: 0.9999990830249312, iteration: 316537
loss: 1.0248197317123413,grad_norm: 0.8786520206882998, iteration: 316538
loss: 0.9979854226112366,grad_norm: 0.9999992823030581, iteration: 316539
loss: 1.0179412364959717,grad_norm: 0.7885804978282935, iteration: 316540
loss: 0.9900439977645874,grad_norm: 0.9999996891089477, iteration: 316541
loss: 0.9834100008010864,grad_norm: 0.8796062306619713, iteration: 316542
loss: 0.9739601612091064,grad_norm: 0.9312693550528982, iteration: 316543
loss: 0.9536661505699158,grad_norm: 0.8162194435137995, iteration: 316544
loss: 0.9958328604698181,grad_norm: 0.8046874886121326, iteration: 316545
loss: 1.018857479095459,grad_norm: 0.7928334986303971, iteration: 316546
loss: 0.9781368970870972,grad_norm: 0.6994583972037886, iteration: 316547
loss: 1.0041160583496094,grad_norm: 0.8300435488112059, iteration: 316548
loss: 0.9785174131393433,grad_norm: 0.8024419052321228, iteration: 316549
loss: 1.022979736328125,grad_norm: 0.7495469661391674, iteration: 316550
loss: 1.0141966342926025,grad_norm: 0.9999999230419606, iteration: 316551
loss: 0.9898384809494019,grad_norm: 0.9311366971132525, iteration: 316552
loss: 0.9700990915298462,grad_norm: 0.7511344238560435, iteration: 316553
loss: 0.9885220527648926,grad_norm: 0.8393073912357636, iteration: 316554
loss: 0.9910528063774109,grad_norm: 0.9999990578334256, iteration: 316555
loss: 1.0013446807861328,grad_norm: 0.7063891942656237, iteration: 316556
loss: 1.016982078552246,grad_norm: 0.6975375936444456, iteration: 316557
loss: 0.9969339966773987,grad_norm: 0.8108712145391747, iteration: 316558
loss: 1.007617473602295,grad_norm: 0.9391175211425066, iteration: 316559
loss: 1.0233086347579956,grad_norm: 0.9085029263914877, iteration: 316560
loss: 1.004845380783081,grad_norm: 0.7742255170492112, iteration: 316561
loss: 1.0428861379623413,grad_norm: 0.99999914792659, iteration: 316562
loss: 1.0224090814590454,grad_norm: 0.741814627059826, iteration: 316563
loss: 0.9814262390136719,grad_norm: 0.8434740582391906, iteration: 316564
loss: 0.9961051344871521,grad_norm: 0.9639948520323004, iteration: 316565
loss: 0.9987826347351074,grad_norm: 0.8112475599040774, iteration: 316566
loss: 1.0763764381408691,grad_norm: 0.9999999281599757, iteration: 316567
loss: 1.0060770511627197,grad_norm: 0.7674158319634857, iteration: 316568
loss: 1.044970154762268,grad_norm: 0.8659220848118676, iteration: 316569
loss: 1.0206888914108276,grad_norm: 0.7572362840247399, iteration: 316570
loss: 1.0055190324783325,grad_norm: 0.9999992385601435, iteration: 316571
loss: 0.9699930548667908,grad_norm: 0.8564765145958454, iteration: 316572
loss: 1.0141613483428955,grad_norm: 0.7928143783221353, iteration: 316573
loss: 1.0966026782989502,grad_norm: 0.8309973417115343, iteration: 316574
loss: 0.9976245164871216,grad_norm: 0.8617022102159912, iteration: 316575
loss: 1.011470079421997,grad_norm: 0.9999990814264205, iteration: 316576
loss: 1.026460886001587,grad_norm: 0.8092688262108383, iteration: 316577
loss: 0.97868412733078,grad_norm: 0.7467477984091715, iteration: 316578
loss: 0.9868646860122681,grad_norm: 0.9719885594312707, iteration: 316579
loss: 1.0338526964187622,grad_norm: 0.94005720278324, iteration: 316580
loss: 1.028110146522522,grad_norm: 0.927935641210101, iteration: 316581
loss: 0.969562292098999,grad_norm: 0.9365893497299503, iteration: 316582
loss: 0.9928981065750122,grad_norm: 0.9739523724428748, iteration: 316583
loss: 1.0000232458114624,grad_norm: 0.8544738566149968, iteration: 316584
loss: 0.9952031373977661,grad_norm: 0.9183268674394819, iteration: 316585
loss: 1.0077019929885864,grad_norm: 0.89825880736488, iteration: 316586
loss: 1.0224947929382324,grad_norm: 0.9947337507204475, iteration: 316587
loss: 0.9346300363540649,grad_norm: 0.8432462884113773, iteration: 316588
loss: 1.0255913734436035,grad_norm: 0.9119659715139405, iteration: 316589
loss: 1.0043622255325317,grad_norm: 0.8655032544094852, iteration: 316590
loss: 0.9970481395721436,grad_norm: 0.999999176280674, iteration: 316591
loss: 0.9869160652160645,grad_norm: 0.846271300033903, iteration: 316592
loss: 0.9945182204246521,grad_norm: 0.9999992183352672, iteration: 316593
loss: 0.9541597366333008,grad_norm: 0.877820327844126, iteration: 316594
loss: 0.9787769913673401,grad_norm: 0.7674887813542349, iteration: 316595
loss: 1.0144741535186768,grad_norm: 0.9999990702615889, iteration: 316596
loss: 1.0205758810043335,grad_norm: 0.9648473440324655, iteration: 316597
loss: 0.9859001636505127,grad_norm: 0.7671386620765049, iteration: 316598
loss: 0.9714564085006714,grad_norm: 0.7851989048194242, iteration: 316599
loss: 1.0042818784713745,grad_norm: 0.7533994172337924, iteration: 316600
loss: 1.001636266708374,grad_norm: 0.7186610676708584, iteration: 316601
loss: 1.0473967790603638,grad_norm: 0.8482215404843225, iteration: 316602
loss: 0.979904294013977,grad_norm: 0.8033441662293943, iteration: 316603
loss: 1.0322514772415161,grad_norm: 0.8902643919240303, iteration: 316604
loss: 1.0236258506774902,grad_norm: 0.9999994953029068, iteration: 316605
loss: 1.0061075687408447,grad_norm: 0.9922371981007985, iteration: 316606
loss: 1.029847264289856,grad_norm: 0.8152419048432556, iteration: 316607
loss: 1.0104920864105225,grad_norm: 0.7870029601269852, iteration: 316608
loss: 0.9681456685066223,grad_norm: 0.7626481671030062, iteration: 316609
loss: 0.9599032402038574,grad_norm: 0.7423900240120322, iteration: 316610
loss: 0.9935225248336792,grad_norm: 0.9425263402742642, iteration: 316611
loss: 0.9366176128387451,grad_norm: 0.7716949407435759, iteration: 316612
loss: 0.9885585904121399,grad_norm: 0.7111568341215825, iteration: 316613
loss: 1.0060830116271973,grad_norm: 0.9999992904992583, iteration: 316614
loss: 1.0347892045974731,grad_norm: 0.8009544487189063, iteration: 316615
loss: 0.9949539303779602,grad_norm: 0.8549360246270732, iteration: 316616
loss: 1.0218744277954102,grad_norm: 0.9999992799207706, iteration: 316617
loss: 1.0029906034469604,grad_norm: 0.7291491603096583, iteration: 316618
loss: 0.9915325045585632,grad_norm: 0.9999991055589136, iteration: 316619
loss: 1.0200092792510986,grad_norm: 0.8300623659600209, iteration: 316620
loss: 1.012223482131958,grad_norm: 0.9831044815049762, iteration: 316621
loss: 1.003152847290039,grad_norm: 0.8004996383150077, iteration: 316622
loss: 0.9800261855125427,grad_norm: 0.9999990284286036, iteration: 316623
loss: 0.9875885844230652,grad_norm: 0.7852484149243292, iteration: 316624
loss: 1.0099031925201416,grad_norm: 0.9999996571735547, iteration: 316625
loss: 1.034213900566101,grad_norm: 0.9999993633885758, iteration: 316626
loss: 1.0213022232055664,grad_norm: 0.9809354239508569, iteration: 316627
loss: 1.0186216831207275,grad_norm: 0.8942228305891624, iteration: 316628
loss: 1.0244280099868774,grad_norm: 0.9999997510353625, iteration: 316629
loss: 0.9777783155441284,grad_norm: 0.8158164856839254, iteration: 316630
loss: 0.9858465194702148,grad_norm: 0.725464776939815, iteration: 316631
loss: 0.9734251499176025,grad_norm: 0.7183517956276768, iteration: 316632
loss: 0.9787026047706604,grad_norm: 0.8417269288914605, iteration: 316633
loss: 1.0637919902801514,grad_norm: 0.9999995425195307, iteration: 316634
loss: 1.020677089691162,grad_norm: 0.8472441479487491, iteration: 316635
loss: 0.992736279964447,grad_norm: 0.9999991427350895, iteration: 316636
loss: 0.9803816080093384,grad_norm: 0.6196375358386985, iteration: 316637
loss: 1.0057158470153809,grad_norm: 0.739165294330484, iteration: 316638
loss: 1.0172066688537598,grad_norm: 0.9999997433579657, iteration: 316639
loss: 1.0202852487564087,grad_norm: 0.9809501432344236, iteration: 316640
loss: 1.0837457180023193,grad_norm: 0.9999999298078582, iteration: 316641
loss: 0.9934488534927368,grad_norm: 0.9124770432116095, iteration: 316642
loss: 0.9919178485870361,grad_norm: 0.7202043286347096, iteration: 316643
loss: 1.0179394483566284,grad_norm: 0.9224864428085205, iteration: 316644
loss: 0.9947138428688049,grad_norm: 0.9735991998055735, iteration: 316645
loss: 0.9888203740119934,grad_norm: 0.9999990190821355, iteration: 316646
loss: 1.0024851560592651,grad_norm: 0.811667533498234, iteration: 316647
loss: 0.9957889914512634,grad_norm: 0.7906654234286026, iteration: 316648
loss: 0.9597533941268921,grad_norm: 0.9665170462957499, iteration: 316649
loss: 1.011238694190979,grad_norm: 0.7542893715788832, iteration: 316650
loss: 0.9452399611473083,grad_norm: 0.8140721706867973, iteration: 316651
loss: 1.077299952507019,grad_norm: 0.9999999267029428, iteration: 316652
loss: 1.003748893737793,grad_norm: 0.7411466425025995, iteration: 316653
loss: 1.0950311422348022,grad_norm: 0.945564205361425, iteration: 316654
loss: 1.005365014076233,grad_norm: 0.8158984665068042, iteration: 316655
loss: 1.0654540061950684,grad_norm: 0.9999991931575296, iteration: 316656
loss: 1.2137960195541382,grad_norm: 0.9999993790772587, iteration: 316657
loss: 1.035314679145813,grad_norm: 0.6874575667159217, iteration: 316658
loss: 0.9935822486877441,grad_norm: 0.9999996152801695, iteration: 316659
loss: 0.9919922351837158,grad_norm: 0.797700025634931, iteration: 316660
loss: 1.0265878438949585,grad_norm: 0.8074845561769071, iteration: 316661
loss: 0.9872660636901855,grad_norm: 0.7717159920458838, iteration: 316662
loss: 1.0208176374435425,grad_norm: 0.9004619419084987, iteration: 316663
loss: 0.9705579876899719,grad_norm: 0.8819799374586078, iteration: 316664
loss: 0.9940415620803833,grad_norm: 0.8439528760800098, iteration: 316665
loss: 0.9979338049888611,grad_norm: 0.8087301730619515, iteration: 316666
loss: 0.9921842813491821,grad_norm: 0.9020798626555536, iteration: 316667
loss: 1.0161532163619995,grad_norm: 0.9919341148380267, iteration: 316668
loss: 1.0352944135665894,grad_norm: 0.9999996935352394, iteration: 316669
loss: 1.005155086517334,grad_norm: 0.923671909764089, iteration: 316670
loss: 1.0186684131622314,grad_norm: 0.8900517548231891, iteration: 316671
loss: 1.0000704526901245,grad_norm: 0.7267560023971087, iteration: 316672
loss: 1.0302274227142334,grad_norm: 0.8925810931215308, iteration: 316673
loss: 0.9708570241928101,grad_norm: 0.7499444742420522, iteration: 316674
loss: 0.9754371643066406,grad_norm: 0.9716175420113531, iteration: 316675
loss: 0.9631583094596863,grad_norm: 0.9027259350704403, iteration: 316676
loss: 1.0075528621673584,grad_norm: 0.9999990585959737, iteration: 316677
loss: 0.988884449005127,grad_norm: 0.8090791077578391, iteration: 316678
loss: 0.9719565510749817,grad_norm: 0.8514825011361172, iteration: 316679
loss: 1.0227936506271362,grad_norm: 0.9460630798604199, iteration: 316680
loss: 0.9643719792366028,grad_norm: 0.8017511354447179, iteration: 316681
loss: 0.982030987739563,grad_norm: 0.8775352251979267, iteration: 316682
loss: 1.0243864059448242,grad_norm: 0.7437136529241052, iteration: 316683
loss: 0.9974609613418579,grad_norm: 0.9999989937021397, iteration: 316684
loss: 1.0048716068267822,grad_norm: 0.8607625556559729, iteration: 316685
loss: 1.044203281402588,grad_norm: 0.9764755683207781, iteration: 316686
loss: 1.0049526691436768,grad_norm: 0.999999807987906, iteration: 316687
loss: 0.9995004534721375,grad_norm: 0.953444263652818, iteration: 316688
loss: 0.9550370573997498,grad_norm: 0.8215034809606322, iteration: 316689
loss: 0.9819732308387756,grad_norm: 0.8604192409313081, iteration: 316690
loss: 1.006290078163147,grad_norm: 0.8098572481547318, iteration: 316691
loss: 1.0053256750106812,grad_norm: 0.8855841056525776, iteration: 316692
loss: 1.0130205154418945,grad_norm: 0.7292388116663778, iteration: 316693
loss: 0.9864497184753418,grad_norm: 0.8825963053426834, iteration: 316694
loss: 1.035036325454712,grad_norm: 0.8294232944175264, iteration: 316695
loss: 1.0103905200958252,grad_norm: 0.9823590352037578, iteration: 316696
loss: 0.9848878383636475,grad_norm: 0.763109382799343, iteration: 316697
loss: 0.9654433727264404,grad_norm: 0.9483947467667969, iteration: 316698
loss: 1.0112570524215698,grad_norm: 0.8845273206015405, iteration: 316699
loss: 1.0445243120193481,grad_norm: 0.9999994037774151, iteration: 316700
loss: 1.0014777183532715,grad_norm: 0.8622015739377674, iteration: 316701
loss: 1.0062021017074585,grad_norm: 0.7679221286785978, iteration: 316702
loss: 0.973048746585846,grad_norm: 0.851275967617516, iteration: 316703
loss: 0.9910789728164673,grad_norm: 0.9999996686650209, iteration: 316704
loss: 0.9991049766540527,grad_norm: 0.7019354408240652, iteration: 316705
loss: 0.9975324273109436,grad_norm: 0.754567841107664, iteration: 316706
loss: 1.0149822235107422,grad_norm: 0.9999992699878361, iteration: 316707
loss: 1.0418518781661987,grad_norm: 0.7626535891073855, iteration: 316708
loss: 0.9847966432571411,grad_norm: 0.7991832251310639, iteration: 316709
loss: 0.9932651519775391,grad_norm: 0.7097978424082672, iteration: 316710
loss: 1.0075210332870483,grad_norm: 0.9041667010173728, iteration: 316711
loss: 0.9563897848129272,grad_norm: 0.7981072650977383, iteration: 316712
loss: 1.0118045806884766,grad_norm: 0.8580926729788101, iteration: 316713
loss: 0.9347203373908997,grad_norm: 0.9246886949960964, iteration: 316714
loss: 0.9863833785057068,grad_norm: 0.8415460548568546, iteration: 316715
loss: 1.0058681964874268,grad_norm: 0.8568627797246658, iteration: 316716
loss: 1.0084673166275024,grad_norm: 0.8895983274824173, iteration: 316717
loss: 1.0342798233032227,grad_norm: 0.9701793024089359, iteration: 316718
loss: 1.0448033809661865,grad_norm: 0.9999992601972928, iteration: 316719
loss: 0.9957600831985474,grad_norm: 0.7205470656453365, iteration: 316720
loss: 1.01171875,grad_norm: 0.9371415916480345, iteration: 316721
loss: 0.9873383641242981,grad_norm: 0.9999991545198389, iteration: 316722
loss: 1.002416729927063,grad_norm: 0.8212021724597088, iteration: 316723
loss: 1.0297698974609375,grad_norm: 0.9999994881945182, iteration: 316724
loss: 0.9573416113853455,grad_norm: 0.7054899984050518, iteration: 316725
loss: 0.9713334441184998,grad_norm: 0.999998972382053, iteration: 316726
loss: 0.9811135530471802,grad_norm: 0.8233318027544854, iteration: 316727
loss: 1.0414425134658813,grad_norm: 0.999999170420334, iteration: 316728
loss: 0.9575414061546326,grad_norm: 0.8852543177053909, iteration: 316729
loss: 0.9790729880332947,grad_norm: 0.9999993170379611, iteration: 316730
loss: 0.9844292402267456,grad_norm: 0.8989966974921886, iteration: 316731
loss: 1.1283040046691895,grad_norm: 0.9614228661094899, iteration: 316732
loss: 1.0247241258621216,grad_norm: 0.9582035503516565, iteration: 316733
loss: 0.9545823335647583,grad_norm: 0.7798826235331078, iteration: 316734
loss: 1.0148742198944092,grad_norm: 0.708639841680878, iteration: 316735
loss: 1.009652853012085,grad_norm: 0.7866225917439282, iteration: 316736
loss: 0.9981615543365479,grad_norm: 0.780105067008344, iteration: 316737
loss: 1.0268888473510742,grad_norm: 0.9562966743963565, iteration: 316738
loss: 1.0565879344940186,grad_norm: 0.9999994309550599, iteration: 316739
loss: 1.0677564144134521,grad_norm: 0.9999997524594447, iteration: 316740
loss: 1.0493448972702026,grad_norm: 0.8845357833347957, iteration: 316741
loss: 0.971230149269104,grad_norm: 0.7974989341605572, iteration: 316742
loss: 0.9968529343605042,grad_norm: 0.7198013891272087, iteration: 316743
loss: 0.9830484986305237,grad_norm: 0.8110183189024855, iteration: 316744
loss: 1.004814863204956,grad_norm: 0.7022957475219572, iteration: 316745
loss: 1.000190019607544,grad_norm: 0.9389217485221963, iteration: 316746
loss: 1.017669677734375,grad_norm: 0.9505815444511039, iteration: 316747
loss: 0.9929706454277039,grad_norm: 0.7340222719541729, iteration: 316748
loss: 1.049023151397705,grad_norm: 0.9999994433955816, iteration: 316749
loss: 1.0072211027145386,grad_norm: 0.81775027980179, iteration: 316750
loss: 1.000852346420288,grad_norm: 0.8943934770693982, iteration: 316751
loss: 1.0001252889633179,grad_norm: 0.8629520540950684, iteration: 316752
loss: 1.0137395858764648,grad_norm: 0.9999990301116083, iteration: 316753
loss: 0.9817973971366882,grad_norm: 0.8002799872393752, iteration: 316754
loss: 0.9957882165908813,grad_norm: 0.720441487930468, iteration: 316755
loss: 0.9908260107040405,grad_norm: 0.8114211799421118, iteration: 316756
loss: 1.0133769512176514,grad_norm: 0.9073425205484192, iteration: 316757
loss: 0.9945089221000671,grad_norm: 0.8479632297019102, iteration: 316758
loss: 1.0109610557556152,grad_norm: 0.8094747386556113, iteration: 316759
loss: 0.9859998226165771,grad_norm: 0.8276673463589095, iteration: 316760
loss: 1.0179129838943481,grad_norm: 0.8727932799977667, iteration: 316761
loss: 1.0017253160476685,grad_norm: 0.8834403514237753, iteration: 316762
loss: 1.0019748210906982,grad_norm: 0.9999994402729795, iteration: 316763
loss: 1.0231170654296875,grad_norm: 0.7748645921067708, iteration: 316764
loss: 1.0012961626052856,grad_norm: 0.9999990795255536, iteration: 316765
loss: 0.9580076336860657,grad_norm: 0.7518958959924729, iteration: 316766
loss: 0.987295389175415,grad_norm: 0.9999999403389638, iteration: 316767
loss: 0.9919811487197876,grad_norm: 0.9188369635836694, iteration: 316768
loss: 1.0069584846496582,grad_norm: 0.9540264878969537, iteration: 316769
loss: 1.0140681266784668,grad_norm: 0.8010270455681254, iteration: 316770
loss: 1.0300952196121216,grad_norm: 0.8606392446100332, iteration: 316771
loss: 1.0114176273345947,grad_norm: 0.7453477607147684, iteration: 316772
loss: 1.035382628440857,grad_norm: 0.9856434040383842, iteration: 316773
loss: 1.0319441556930542,grad_norm: 0.9999993260971615, iteration: 316774
loss: 0.9799445271492004,grad_norm: 0.848840951690088, iteration: 316775
loss: 1.0208550691604614,grad_norm: 0.7504961433194735, iteration: 316776
loss: 1.0019320249557495,grad_norm: 0.9064343936757457, iteration: 316777
loss: 1.0335098505020142,grad_norm: 0.9999998489969356, iteration: 316778
loss: 1.032795786857605,grad_norm: 0.9588777885416556, iteration: 316779
loss: 1.0104459524154663,grad_norm: 0.9489726116878103, iteration: 316780
loss: 1.0341925621032715,grad_norm: 0.9999998755539468, iteration: 316781
loss: 0.989987850189209,grad_norm: 0.9196908095618945, iteration: 316782
loss: 1.0103974342346191,grad_norm: 0.8701428090540931, iteration: 316783
loss: 1.0292816162109375,grad_norm: 0.9324944931595158, iteration: 316784
loss: 0.9857385754585266,grad_norm: 0.8044620507815684, iteration: 316785
loss: 1.0088093280792236,grad_norm: 0.7981057457562256, iteration: 316786
loss: 0.9854323863983154,grad_norm: 0.8877753582666348, iteration: 316787
loss: 1.0047725439071655,grad_norm: 0.9402477655337406, iteration: 316788
loss: 1.0150643587112427,grad_norm: 0.69515552305949, iteration: 316789
loss: 1.0054012537002563,grad_norm: 0.8205419409533073, iteration: 316790
loss: 1.031010627746582,grad_norm: 0.9999989371127123, iteration: 316791
loss: 1.0074286460876465,grad_norm: 0.8286861342151932, iteration: 316792
loss: 0.9928945899009705,grad_norm: 0.7609061847181833, iteration: 316793
loss: 1.0254392623901367,grad_norm: 0.7125270828385489, iteration: 316794
loss: 1.0127631425857544,grad_norm: 0.7252854570062909, iteration: 316795
loss: 0.9827165007591248,grad_norm: 0.9655328062652773, iteration: 316796
loss: 1.033815860748291,grad_norm: 0.9999992983556686, iteration: 316797
loss: 1.0092014074325562,grad_norm: 0.8288822637781418, iteration: 316798
loss: 0.9896514415740967,grad_norm: 0.855844632565674, iteration: 316799
loss: 1.058645248413086,grad_norm: 0.9999996319264297, iteration: 316800
loss: 1.0164240598678589,grad_norm: 0.7656509693886212, iteration: 316801
loss: 0.990502119064331,grad_norm: 0.8329893680049254, iteration: 316802
loss: 0.9979836344718933,grad_norm: 0.8040628539493407, iteration: 316803
loss: 0.9882491827011108,grad_norm: 0.9999996402946502, iteration: 316804
loss: 1.0067838430404663,grad_norm: 0.7967448418028255, iteration: 316805
loss: 0.9706069231033325,grad_norm: 0.8525831478140515, iteration: 316806
loss: 1.0319738388061523,grad_norm: 0.8797667444412162, iteration: 316807
loss: 0.9802148342132568,grad_norm: 0.8337759456013504, iteration: 316808
loss: 1.0167008638381958,grad_norm: 0.7913112832436484, iteration: 316809
loss: 0.9873344898223877,grad_norm: 0.9348485555386071, iteration: 316810
loss: 1.075939416885376,grad_norm: 0.9999992167994008, iteration: 316811
loss: 0.9932725429534912,grad_norm: 0.9999990410913137, iteration: 316812
loss: 0.9958846569061279,grad_norm: 0.9999991882136412, iteration: 316813
loss: 0.9929442405700684,grad_norm: 0.895031393725488, iteration: 316814
loss: 0.9922032356262207,grad_norm: 0.9999990552483814, iteration: 316815
loss: 0.9889228940010071,grad_norm: 0.999999432121956, iteration: 316816
loss: 0.9851536750793457,grad_norm: 0.8144350010809113, iteration: 316817
loss: 0.972872793674469,grad_norm: 0.7273366521611464, iteration: 316818
loss: 1.043299913406372,grad_norm: 0.9999991461666606, iteration: 316819
loss: 0.999755322933197,grad_norm: 0.8011917087677994, iteration: 316820
loss: 0.9557607173919678,grad_norm: 0.9942672849411623, iteration: 316821
loss: 0.9778770208358765,grad_norm: 0.7048068048337839, iteration: 316822
loss: 1.010850191116333,grad_norm: 0.8633661664938587, iteration: 316823
loss: 1.0174765586853027,grad_norm: 0.9999990247814544, iteration: 316824
loss: 1.1109167337417603,grad_norm: 0.9711780756537678, iteration: 316825
loss: 0.9991825819015503,grad_norm: 0.8438694348526585, iteration: 316826
loss: 0.9901142716407776,grad_norm: 0.9323556339852401, iteration: 316827
loss: 1.0467315912246704,grad_norm: 0.7951851555363473, iteration: 316828
loss: 0.9446755051612854,grad_norm: 0.902218520623638, iteration: 316829
loss: 0.954886794090271,grad_norm: 0.9092059399661928, iteration: 316830
loss: 1.0326937437057495,grad_norm: 0.8524500326413819, iteration: 316831
loss: 1.0295770168304443,grad_norm: 0.999999730861344, iteration: 316832
loss: 1.0423946380615234,grad_norm: 0.9999994655843585, iteration: 316833
loss: 0.981154203414917,grad_norm: 0.8172529330310168, iteration: 316834
loss: 0.9576213955879211,grad_norm: 0.8121209324103343, iteration: 316835
loss: 0.9903896450996399,grad_norm: 0.8309442886451264, iteration: 316836
loss: 0.976231038570404,grad_norm: 0.8397348877826982, iteration: 316837
loss: 0.9756374359130859,grad_norm: 0.9999990798597882, iteration: 316838
loss: 1.0167964696884155,grad_norm: 0.9999992940808038, iteration: 316839
loss: 0.963216245174408,grad_norm: 0.9999990635076771, iteration: 316840
loss: 0.9922749996185303,grad_norm: 0.7268858612515303, iteration: 316841
loss: 0.9607691168785095,grad_norm: 0.8385480572117665, iteration: 316842
loss: 1.0198930501937866,grad_norm: 0.8710856671853777, iteration: 316843
loss: 1.0239291191101074,grad_norm: 0.8243490616140224, iteration: 316844
loss: 1.0641136169433594,grad_norm: 0.7763344859309899, iteration: 316845
loss: 0.9644524455070496,grad_norm: 0.8194553254618427, iteration: 316846
loss: 1.0076212882995605,grad_norm: 0.8661588299691423, iteration: 316847
loss: 1.0093151330947876,grad_norm: 0.9704683231582704, iteration: 316848
loss: 0.9823269844055176,grad_norm: 0.933580286374708, iteration: 316849
loss: 0.9878408312797546,grad_norm: 0.8294651954770056, iteration: 316850
loss: 0.9703439474105835,grad_norm: 0.8071541654259969, iteration: 316851
loss: 0.9814923405647278,grad_norm: 0.8419659371842715, iteration: 316852
loss: 0.9825742840766907,grad_norm: 0.9621012829550233, iteration: 316853
loss: 0.9520171284675598,grad_norm: 0.8088739407601037, iteration: 316854
loss: 0.9875465035438538,grad_norm: 0.7394004520704873, iteration: 316855
loss: 0.9931156039237976,grad_norm: 0.8945082008812523, iteration: 316856
loss: 1.0391381978988647,grad_norm: 0.9905815970133927, iteration: 316857
loss: 1.0015785694122314,grad_norm: 0.8283036643143454, iteration: 316858
loss: 1.0207821130752563,grad_norm: 0.9254233926937688, iteration: 316859
loss: 1.0194820165634155,grad_norm: 0.8938041306854666, iteration: 316860
loss: 0.9869533777236938,grad_norm: 0.8125671375720339, iteration: 316861
loss: 1.0161120891571045,grad_norm: 0.9999990453841802, iteration: 316862
loss: 0.969169020652771,grad_norm: 0.9999993876948293, iteration: 316863
loss: 1.0649092197418213,grad_norm: 0.8217415422303604, iteration: 316864
loss: 1.0192558765411377,grad_norm: 0.9040158563116039, iteration: 316865
loss: 0.9899388551712036,grad_norm: 0.9999995251732294, iteration: 316866
loss: 1.0077846050262451,grad_norm: 0.9999998466032584, iteration: 316867
loss: 0.9871814846992493,grad_norm: 0.9999991481591826, iteration: 316868
loss: 1.0325188636779785,grad_norm: 0.9205020456507513, iteration: 316869
loss: 1.0715534687042236,grad_norm: 0.9999999679216041, iteration: 316870
loss: 0.971556544303894,grad_norm: 0.74603233534097, iteration: 316871
loss: 0.9766704440116882,grad_norm: 0.9999992012883735, iteration: 316872
loss: 0.9943575859069824,grad_norm: 0.7259145023758122, iteration: 316873
loss: 1.0165661573410034,grad_norm: 0.9785649177132197, iteration: 316874
loss: 1.0151294469833374,grad_norm: 0.8582438857652112, iteration: 316875
loss: 0.994408905506134,grad_norm: 0.806619017019658, iteration: 316876
loss: 1.0520607233047485,grad_norm: 0.9999995644436577, iteration: 316877
loss: 1.0205600261688232,grad_norm: 0.8361579159368024, iteration: 316878
loss: 1.0559438467025757,grad_norm: 0.9999990988680115, iteration: 316879
loss: 0.9932776093482971,grad_norm: 0.8083823565038865, iteration: 316880
loss: 0.9563238620758057,grad_norm: 0.9144521974469103, iteration: 316881
loss: 0.982296347618103,grad_norm: 0.7756934768879238, iteration: 316882
loss: 1.014113426208496,grad_norm: 0.8688100064109884, iteration: 316883
loss: 1.0014498233795166,grad_norm: 0.8870154167346005, iteration: 316884
loss: 1.0446624755859375,grad_norm: 0.7349698747646082, iteration: 316885
loss: 1.0069833993911743,grad_norm: 0.9957464838391511, iteration: 316886
loss: 0.9706813097000122,grad_norm: 0.8286692613381544, iteration: 316887
loss: 0.9943599104881287,grad_norm: 0.8905930457073958, iteration: 316888
loss: 0.9824067950248718,grad_norm: 0.9382426205982172, iteration: 316889
loss: 0.9789733290672302,grad_norm: 0.8454075978747427, iteration: 316890
loss: 1.0069092512130737,grad_norm: 0.7943035275889584, iteration: 316891
loss: 1.0145041942596436,grad_norm: 0.8922075674361339, iteration: 316892
loss: 1.1043872833251953,grad_norm: 0.8836198243177108, iteration: 316893
loss: 1.0089757442474365,grad_norm: 0.7884621453343346, iteration: 316894
loss: 0.9777644872665405,grad_norm: 0.8701158564008865, iteration: 316895
loss: 0.9741280674934387,grad_norm: 0.8000835173618496, iteration: 316896
loss: 1.0189237594604492,grad_norm: 0.9253018663483086, iteration: 316897
loss: 0.9987195730209351,grad_norm: 0.7854432345653083, iteration: 316898
loss: 1.008090853691101,grad_norm: 0.7749536903075441, iteration: 316899
loss: 0.9973622560501099,grad_norm: 0.7969654566961641, iteration: 316900
loss: 1.0268841981887817,grad_norm: 0.9999998391711952, iteration: 316901
loss: 1.0631860494613647,grad_norm: 0.9999996297030322, iteration: 316902
loss: 1.0274392366409302,grad_norm: 0.8511561538738383, iteration: 316903
loss: 1.034218430519104,grad_norm: 0.8054609733847427, iteration: 316904
loss: 1.0555918216705322,grad_norm: 0.9999991554636197, iteration: 316905
loss: 1.031265377998352,grad_norm: 0.9960130247437562, iteration: 316906
loss: 0.9695375561714172,grad_norm: 0.9844863749480084, iteration: 316907
loss: 1.0250803232192993,grad_norm: 0.999999382780894, iteration: 316908
loss: 1.011797308921814,grad_norm: 0.801730298237244, iteration: 316909
loss: 1.0113847255706787,grad_norm: 0.8098637884250963, iteration: 316910
loss: 0.9617395997047424,grad_norm: 0.8125541438285375, iteration: 316911
loss: 1.0563502311706543,grad_norm: 0.8926310500856894, iteration: 316912
loss: 1.0296975374221802,grad_norm: 0.9999989726201329, iteration: 316913
loss: 0.9533635377883911,grad_norm: 0.791404663654818, iteration: 316914
loss: 1.0002520084381104,grad_norm: 0.8951128917530978, iteration: 316915
loss: 1.06084144115448,grad_norm: 0.9999991397851254, iteration: 316916
loss: 1.020417332649231,grad_norm: 0.9999997119385332, iteration: 316917
loss: 1.007455825805664,grad_norm: 0.9281847214056116, iteration: 316918
loss: 1.0079866647720337,grad_norm: 0.8197850771402275, iteration: 316919
loss: 0.9930163621902466,grad_norm: 0.854104572377046, iteration: 316920
loss: 0.9566147923469543,grad_norm: 0.9999990176265825, iteration: 316921
loss: 1.0306357145309448,grad_norm: 0.9999990447494848, iteration: 316922
loss: 1.016878366470337,grad_norm: 0.8904580313271934, iteration: 316923
loss: 1.0425199270248413,grad_norm: 0.8190540253952115, iteration: 316924
loss: 1.0608351230621338,grad_norm: 0.9937769078232457, iteration: 316925
loss: 0.9818069338798523,grad_norm: 0.8228473846876024, iteration: 316926
loss: 0.9658070802688599,grad_norm: 0.8448897952393654, iteration: 316927
loss: 1.0468143224716187,grad_norm: 0.7689520412952319, iteration: 316928
loss: 1.0044128894805908,grad_norm: 0.8663004365941714, iteration: 316929
loss: 1.0147590637207031,grad_norm: 0.9060624219194197, iteration: 316930
loss: 0.9813072681427002,grad_norm: 0.9041139186846414, iteration: 316931
loss: 1.0282295942306519,grad_norm: 0.8504183109777784, iteration: 316932
loss: 0.9937041401863098,grad_norm: 0.8479155696369772, iteration: 316933
loss: 1.0264607667922974,grad_norm: 0.7941620959693179, iteration: 316934
loss: 1.058941125869751,grad_norm: 0.9999990740154452, iteration: 316935
loss: 0.9944617748260498,grad_norm: 0.7407288156242119, iteration: 316936
loss: 1.0153989791870117,grad_norm: 0.7227612384712888, iteration: 316937
loss: 1.0081219673156738,grad_norm: 0.7825630142323902, iteration: 316938
loss: 0.9980455040931702,grad_norm: 0.9999999128535344, iteration: 316939
loss: 1.022505521774292,grad_norm: 0.8904461195941301, iteration: 316940
loss: 0.9821147918701172,grad_norm: 0.7974868672536024, iteration: 316941
loss: 1.011319875717163,grad_norm: 0.7400790341579572, iteration: 316942
loss: 1.0305819511413574,grad_norm: 0.9999991214814582, iteration: 316943
loss: 1.0346049070358276,grad_norm: 0.9879684513838535, iteration: 316944
loss: 1.0348436832427979,grad_norm: 0.9999992579220504, iteration: 316945
loss: 0.971345841884613,grad_norm: 0.9999989986572563, iteration: 316946
loss: 0.9555255174636841,grad_norm: 0.8460136298432952, iteration: 316947
loss: 0.9757505059242249,grad_norm: 0.99999934624744, iteration: 316948
loss: 1.3031456470489502,grad_norm: 0.9999991318216933, iteration: 316949
loss: 1.0053012371063232,grad_norm: 0.7489455808724058, iteration: 316950
loss: 0.9893085956573486,grad_norm: 0.7579491776461122, iteration: 316951
loss: 0.9910036325454712,grad_norm: 0.8324066504571896, iteration: 316952
loss: 1.1594717502593994,grad_norm: 0.9999993510350496, iteration: 316953
loss: 0.9939157366752625,grad_norm: 0.9904364450353883, iteration: 316954
loss: 1.0080467462539673,grad_norm: 0.8694628106414141, iteration: 316955
loss: 0.995855987071991,grad_norm: 0.818042277908606, iteration: 316956
loss: 1.0173654556274414,grad_norm: 0.719873442705921, iteration: 316957
loss: 1.0293656587600708,grad_norm: 0.9018255979698966, iteration: 316958
loss: 0.989773154258728,grad_norm: 0.9322923849319978, iteration: 316959
loss: 1.008393406867981,grad_norm: 0.9139160784194946, iteration: 316960
loss: 0.9901477098464966,grad_norm: 0.8646758602240134, iteration: 316961
loss: 1.0426629781723022,grad_norm: 0.999999087235808, iteration: 316962
loss: 0.9819785356521606,grad_norm: 0.8603745574383431, iteration: 316963
loss: 0.9809443354606628,grad_norm: 0.8828495902645811, iteration: 316964
loss: 1.0178756713867188,grad_norm: 0.8942363727793877, iteration: 316965
loss: 1.0149755477905273,grad_norm: 0.9402854955902454, iteration: 316966
loss: 1.0049562454223633,grad_norm: 0.9999990297996942, iteration: 316967
loss: 0.9935632944107056,grad_norm: 0.8346882589322746, iteration: 316968
loss: 0.9613781571388245,grad_norm: 0.8062844886523879, iteration: 316969
loss: 0.9906097650527954,grad_norm: 0.9489239996186076, iteration: 316970
loss: 1.0727428197860718,grad_norm: 0.9999990913872671, iteration: 316971
loss: 1.0609354972839355,grad_norm: 0.9999996739646713, iteration: 316972
loss: 1.0115820169448853,grad_norm: 0.8747716762154681, iteration: 316973
loss: 0.946426272392273,grad_norm: 0.9121265897487475, iteration: 316974
loss: 0.9574456810951233,grad_norm: 0.7990687981574558, iteration: 316975
loss: 0.9919778108596802,grad_norm: 0.7535075844974667, iteration: 316976
loss: 1.0140341520309448,grad_norm: 0.9999994637768311, iteration: 316977
loss: 1.0641568899154663,grad_norm: 0.8894148983845653, iteration: 316978
loss: 1.0153212547302246,grad_norm: 0.824364063240711, iteration: 316979
loss: 1.0334134101867676,grad_norm: 0.9999991317221372, iteration: 316980
loss: 1.0074628591537476,grad_norm: 0.8325476010914323, iteration: 316981
loss: 1.0513538122177124,grad_norm: 0.9999991208359642, iteration: 316982
loss: 1.004123330116272,grad_norm: 0.8092359672284338, iteration: 316983
loss: 1.0182090997695923,grad_norm: 0.9273233788049854, iteration: 316984
loss: 0.9897152185440063,grad_norm: 0.7388550202661672, iteration: 316985
loss: 1.1288355588912964,grad_norm: 0.999999188362736, iteration: 316986
loss: 0.9649766683578491,grad_norm: 0.7927926604915718, iteration: 316987
loss: 1.0344198942184448,grad_norm: 0.7506196352706649, iteration: 316988
loss: 1.0312154293060303,grad_norm: 0.9999991514122573, iteration: 316989
loss: 1.0469133853912354,grad_norm: 0.8072984335511357, iteration: 316990
loss: 1.0284053087234497,grad_norm: 0.7063217976579974, iteration: 316991
loss: 1.008958101272583,grad_norm: 0.8799468661830647, iteration: 316992
loss: 1.0557010173797607,grad_norm: 1.0000000263398767, iteration: 316993
loss: 0.9636196494102478,grad_norm: 0.8091174349141468, iteration: 316994
loss: 0.9920280575752258,grad_norm: 0.8464123972926778, iteration: 316995
loss: 0.9614087343215942,grad_norm: 0.7417149896036347, iteration: 316996
loss: 1.0321907997131348,grad_norm: 0.9511889828974407, iteration: 316997
loss: 1.0020684003829956,grad_norm: 0.7797524571964197, iteration: 316998
loss: 0.9880695939064026,grad_norm: 0.9384795576267997, iteration: 316999
loss: 1.0316590070724487,grad_norm: 0.9999999733875721, iteration: 317000
loss: 1.0292013883590698,grad_norm: 0.9745758881591152, iteration: 317001
loss: 1.0123558044433594,grad_norm: 0.8085720561511478, iteration: 317002
loss: 1.0493072271347046,grad_norm: 0.9999996079314979, iteration: 317003
loss: 1.0254343748092651,grad_norm: 0.825038541862164, iteration: 317004
loss: 1.0158766508102417,grad_norm: 0.9078362000061008, iteration: 317005
loss: 1.0903369188308716,grad_norm: 0.999999420763157, iteration: 317006
loss: 1.0722086429595947,grad_norm: 0.9437301974693666, iteration: 317007
loss: 1.0366671085357666,grad_norm: 0.9999990745618401, iteration: 317008
loss: 0.9837170839309692,grad_norm: 0.9999996845274245, iteration: 317009
loss: 0.9924878478050232,grad_norm: 0.7684179180335075, iteration: 317010
loss: 1.0142170190811157,grad_norm: 0.9999990918454682, iteration: 317011
loss: 0.9659196734428406,grad_norm: 0.8285858192829455, iteration: 317012
loss: 1.0156012773513794,grad_norm: 0.9999992347528059, iteration: 317013
loss: 1.1583175659179688,grad_norm: 0.9999998011550078, iteration: 317014
loss: 1.0069955587387085,grad_norm: 0.6906193168721602, iteration: 317015
loss: 1.0013669729232788,grad_norm: 0.8577669602271975, iteration: 317016
loss: 1.0430876016616821,grad_norm: 0.9999996020741632, iteration: 317017
loss: 1.1115044355392456,grad_norm: 0.9999998786763533, iteration: 317018
loss: 0.9641473293304443,grad_norm: 0.6695349481756179, iteration: 317019
loss: 1.0383573770523071,grad_norm: 0.9848175222109671, iteration: 317020
loss: 0.9972111582756042,grad_norm: 0.9999998482704406, iteration: 317021
loss: 1.026307463645935,grad_norm: 0.7541075135963795, iteration: 317022
loss: 0.9577755331993103,grad_norm: 0.9999990484891846, iteration: 317023
loss: 1.0078998804092407,grad_norm: 0.8248875541650997, iteration: 317024
loss: 0.9848549962043762,grad_norm: 0.9999991167041522, iteration: 317025
loss: 0.9570893049240112,grad_norm: 0.8406926075011136, iteration: 317026
loss: 0.9720498323440552,grad_norm: 0.7923521867229222, iteration: 317027
loss: 1.0070754289627075,grad_norm: 0.9154437848783914, iteration: 317028
loss: 1.0428954362869263,grad_norm: 0.9999994582816524, iteration: 317029
loss: 0.9874472618103027,grad_norm: 0.8055022340649779, iteration: 317030
loss: 0.9915492534637451,grad_norm: 0.8839346167677495, iteration: 317031
loss: 0.9966366291046143,grad_norm: 0.9572589652198331, iteration: 317032
loss: 0.9971923828125,grad_norm: 0.7394362964367867, iteration: 317033
loss: 0.9991644620895386,grad_norm: 0.8448650713024515, iteration: 317034
loss: 1.0168991088867188,grad_norm: 0.9082302339848197, iteration: 317035
loss: 0.9939863681793213,grad_norm: 0.6617130477591765, iteration: 317036
loss: 0.9620332717895508,grad_norm: 0.7972190097183561, iteration: 317037
loss: 0.9923592209815979,grad_norm: 0.9382224349653159, iteration: 317038
loss: 0.9960792064666748,grad_norm: 0.9263257089066077, iteration: 317039
loss: 0.9981578588485718,grad_norm: 0.7294371203516217, iteration: 317040
loss: 0.9848484396934509,grad_norm: 0.822017195056312, iteration: 317041
loss: 0.9693657159805298,grad_norm: 0.9999996915215051, iteration: 317042
loss: 1.0191978216171265,grad_norm: 0.8038078433148127, iteration: 317043
loss: 1.0005015134811401,grad_norm: 0.9964344667507252, iteration: 317044
loss: 0.9995561838150024,grad_norm: 0.8280374162630021, iteration: 317045
loss: 0.990540087223053,grad_norm: 0.8163292840725315, iteration: 317046
loss: 0.9845862984657288,grad_norm: 0.7632546181377915, iteration: 317047
loss: 0.9720693230628967,grad_norm: 0.7314016521297715, iteration: 317048
loss: 1.002970814704895,grad_norm: 0.7153901945219857, iteration: 317049
loss: 0.9538798332214355,grad_norm: 0.8003675148905628, iteration: 317050
loss: 0.9536767601966858,grad_norm: 0.7903887621239294, iteration: 317051
loss: 1.0363303422927856,grad_norm: 0.8817132970587545, iteration: 317052
loss: 0.9839112162590027,grad_norm: 0.8638884817196782, iteration: 317053
loss: 1.0083048343658447,grad_norm: 0.8010698448714805, iteration: 317054
loss: 1.0913325548171997,grad_norm: 0.9999995057894427, iteration: 317055
loss: 1.0351965427398682,grad_norm: 0.9481281500131409, iteration: 317056
loss: 0.9566860198974609,grad_norm: 0.7596001336786606, iteration: 317057
loss: 1.0502880811691284,grad_norm: 0.7289980501396042, iteration: 317058
loss: 0.9895579814910889,grad_norm: 0.9083511951029213, iteration: 317059
loss: 0.9942288994789124,grad_norm: 0.7416992420833902, iteration: 317060
loss: 1.0480672121047974,grad_norm: 0.9655545593322323, iteration: 317061
loss: 1.0312566757202148,grad_norm: 0.8341806753769162, iteration: 317062
loss: 0.9694164395332336,grad_norm: 0.9170128149405583, iteration: 317063
loss: 1.0002765655517578,grad_norm: 0.7552847641349305, iteration: 317064
loss: 1.0795069932937622,grad_norm: 0.999999832749874, iteration: 317065
loss: 0.9965366125106812,grad_norm: 0.9999997748765354, iteration: 317066
loss: 0.9771804213523865,grad_norm: 0.8068713530643791, iteration: 317067
loss: 1.0071299076080322,grad_norm: 0.6832898035333514, iteration: 317068
loss: 1.0003098249435425,grad_norm: 0.9999990898140955, iteration: 317069
loss: 1.0080829858779907,grad_norm: 0.9999994682407836, iteration: 317070
loss: 1.016128659248352,grad_norm: 0.9999992393204175, iteration: 317071
loss: 1.0401376485824585,grad_norm: 0.9562106825791253, iteration: 317072
loss: 0.9779870510101318,grad_norm: 0.8510469913221499, iteration: 317073
loss: 1.2021015882492065,grad_norm: 0.9999994378375568, iteration: 317074
loss: 1.0012125968933105,grad_norm: 0.90057810208768, iteration: 317075
loss: 0.9472345113754272,grad_norm: 0.9073684017480663, iteration: 317076
loss: 0.9996901154518127,grad_norm: 0.9999990688505236, iteration: 317077
loss: 1.027789831161499,grad_norm: 0.9999991117903417, iteration: 317078
loss: 1.0042933225631714,grad_norm: 0.9999992479414089, iteration: 317079
loss: 0.9858430027961731,grad_norm: 0.7751702180506522, iteration: 317080
loss: 0.9930426478385925,grad_norm: 0.9999993119922626, iteration: 317081
loss: 1.0301603078842163,grad_norm: 0.9999990093188411, iteration: 317082
loss: 0.9727619290351868,grad_norm: 0.9157527523763946, iteration: 317083
loss: 1.0097436904907227,grad_norm: 0.7598504746728539, iteration: 317084
loss: 1.0126193761825562,grad_norm: 0.7696521151410985, iteration: 317085
loss: 1.0051342248916626,grad_norm: 0.9999995783618163, iteration: 317086
loss: 0.987342119216919,grad_norm: 0.9999992596045107, iteration: 317087
loss: 1.0005475282669067,grad_norm: 0.9999990727327013, iteration: 317088
loss: 1.0126692056655884,grad_norm: 0.9594052073536757, iteration: 317089
loss: 1.0173436403274536,grad_norm: 0.9999990812559518, iteration: 317090
loss: 0.9839727878570557,grad_norm: 0.6896394833372413, iteration: 317091
loss: 1.0074540376663208,grad_norm: 0.9686564643865078, iteration: 317092
loss: 0.9796028733253479,grad_norm: 0.8390373638708366, iteration: 317093
loss: 1.0359400510787964,grad_norm: 0.8717708295591792, iteration: 317094
loss: 1.0021584033966064,grad_norm: 0.8795893492223238, iteration: 317095
loss: 1.0257312059402466,grad_norm: 0.9117210736040307, iteration: 317096
loss: 0.9859059453010559,grad_norm: 0.8418735924322481, iteration: 317097
loss: 1.0637918710708618,grad_norm: 0.9370074214564678, iteration: 317098
loss: 1.0117486715316772,grad_norm: 0.9018537997581056, iteration: 317099
loss: 1.0066499710083008,grad_norm: 0.7420612693352495, iteration: 317100
loss: 0.9647518396377563,grad_norm: 0.7526788037062022, iteration: 317101
loss: 0.9816731810569763,grad_norm: 0.7416701255013453, iteration: 317102
loss: 1.005927324295044,grad_norm: 0.895953518778131, iteration: 317103
loss: 1.0567498207092285,grad_norm: 0.9531158363316848, iteration: 317104
loss: 1.0149755477905273,grad_norm: 0.9381383163850997, iteration: 317105
loss: 1.0181316137313843,grad_norm: 0.9999995308664622, iteration: 317106
loss: 0.9725834727287292,grad_norm: 0.8056015968725303, iteration: 317107
loss: 1.0095428228378296,grad_norm: 0.8820644827407386, iteration: 317108
loss: 1.013813853263855,grad_norm: 0.9999990876191887, iteration: 317109
loss: 0.9968403577804565,grad_norm: 0.8393226410190627, iteration: 317110
loss: 0.9850638508796692,grad_norm: 0.7292840734602625, iteration: 317111
loss: 1.0008283853530884,grad_norm: 0.9063731360198299, iteration: 317112
loss: 1.0420547723770142,grad_norm: 0.9002671444403563, iteration: 317113
loss: 0.9974888563156128,grad_norm: 0.9999994982601506, iteration: 317114
loss: 1.1840441226959229,grad_norm: 0.9999992473165266, iteration: 317115
loss: 1.0345704555511475,grad_norm: 0.9449834276363669, iteration: 317116
loss: 0.9997057914733887,grad_norm: 0.8148164447741975, iteration: 317117
loss: 1.0083043575286865,grad_norm: 0.9999990596596674, iteration: 317118
loss: 1.0512821674346924,grad_norm: 0.8012767343602895, iteration: 317119
loss: 0.9921692609786987,grad_norm: 0.78400598859796, iteration: 317120
loss: 1.0551902055740356,grad_norm: 0.9198422043115835, iteration: 317121
loss: 1.0356086492538452,grad_norm: 0.9999995817948582, iteration: 317122
loss: 1.015130639076233,grad_norm: 0.7573275586908715, iteration: 317123
loss: 0.9971927404403687,grad_norm: 0.8457735235925475, iteration: 317124
loss: 1.1522151231765747,grad_norm: 0.999998957224113, iteration: 317125
loss: 0.978771984577179,grad_norm: 0.7960856326649601, iteration: 317126
loss: 0.9994518160820007,grad_norm: 0.9160566828208317, iteration: 317127
loss: 0.9845319390296936,grad_norm: 0.6851032476504076, iteration: 317128
loss: 1.037391185760498,grad_norm: 0.9583895960665074, iteration: 317129
loss: 0.9744520783424377,grad_norm: 0.8342227363240299, iteration: 317130
loss: 0.9903013110160828,grad_norm: 0.9252737220577233, iteration: 317131
loss: 1.0429104566574097,grad_norm: 1.0000000123360986, iteration: 317132
loss: 0.9675554633140564,grad_norm: 0.7994071973387383, iteration: 317133
loss: 0.9918370246887207,grad_norm: 0.8578491551434922, iteration: 317134
loss: 1.0049842596054077,grad_norm: 0.8439708674300102, iteration: 317135
loss: 1.025657296180725,grad_norm: 0.9999993121564544, iteration: 317136
loss: 0.9864113926887512,grad_norm: 0.8396399433241216, iteration: 317137
loss: 0.997089147567749,grad_norm: 0.7493017337511021, iteration: 317138
loss: 0.9864043593406677,grad_norm: 0.95589230415389, iteration: 317139
loss: 0.9622411131858826,grad_norm: 0.9289319181423926, iteration: 317140
loss: 1.012836217880249,grad_norm: 0.722435911384343, iteration: 317141
loss: 0.9649756550788879,grad_norm: 0.6294921209174084, iteration: 317142
loss: 0.9734245538711548,grad_norm: 0.8255227257795541, iteration: 317143
loss: 0.9876554608345032,grad_norm: 0.7721371085651202, iteration: 317144
loss: 1.0171918869018555,grad_norm: 0.880145296140268, iteration: 317145
loss: 1.0118547677993774,grad_norm: 0.8764356763320564, iteration: 317146
loss: 1.0141818523406982,grad_norm: 0.9999994559282397, iteration: 317147
loss: 1.058778166770935,grad_norm: 0.9999997046236221, iteration: 317148
loss: 1.0179404020309448,grad_norm: 0.9002555087649472, iteration: 317149
loss: 1.018720269203186,grad_norm: 0.657290788095784, iteration: 317150
loss: 0.9736877679824829,grad_norm: 0.792261942801692, iteration: 317151
loss: 1.0387507677078247,grad_norm: 0.9067648265632221, iteration: 317152
loss: 0.9895365834236145,grad_norm: 0.85514323350198, iteration: 317153
loss: 0.9855962991714478,grad_norm: 0.8013976812253212, iteration: 317154
loss: 0.9993260502815247,grad_norm: 0.8085636401551838, iteration: 317155
loss: 1.009838581085205,grad_norm: 0.9395897203604265, iteration: 317156
loss: 0.9810927510261536,grad_norm: 0.7234921515402915, iteration: 317157
loss: 1.0101922750473022,grad_norm: 0.7871927112033782, iteration: 317158
loss: 0.9957572817802429,grad_norm: 0.7431438092582837, iteration: 317159
loss: 0.9837238788604736,grad_norm: 0.9999995610612147, iteration: 317160
loss: 1.018688440322876,grad_norm: 1.000000018355357, iteration: 317161
loss: 0.9683209657669067,grad_norm: 0.9489511868822291, iteration: 317162
loss: 1.012519121170044,grad_norm: 0.9999992913236498, iteration: 317163
loss: 1.0162816047668457,grad_norm: 0.9999991677726833, iteration: 317164
loss: 0.9740977883338928,grad_norm: 0.86269160975772, iteration: 317165
loss: 1.008710503578186,grad_norm: 0.7743638889363295, iteration: 317166
loss: 1.0856006145477295,grad_norm: 0.9999997189680824, iteration: 317167
loss: 1.0066893100738525,grad_norm: 0.8772266389456537, iteration: 317168
loss: 1.0126063823699951,grad_norm: 0.9999992028914958, iteration: 317169
loss: 0.9869962930679321,grad_norm: 0.7042504647560336, iteration: 317170
loss: 1.0097118616104126,grad_norm: 0.7332497046345052, iteration: 317171
loss: 0.9877620935440063,grad_norm: 0.8751636072918221, iteration: 317172
loss: 0.9982710480690002,grad_norm: 0.8028993299009056, iteration: 317173
loss: 1.0790525674819946,grad_norm: 0.7850057178578606, iteration: 317174
loss: 0.998150646686554,grad_norm: 0.7466794286601205, iteration: 317175
loss: 1.0178914070129395,grad_norm: 0.7887220066927396, iteration: 317176
loss: 0.979576826095581,grad_norm: 0.9999998606129156, iteration: 317177
loss: 0.982699453830719,grad_norm: 0.9999991785284285, iteration: 317178
loss: 1.0247008800506592,grad_norm: 0.9999991225448331, iteration: 317179
loss: 0.9953237175941467,grad_norm: 0.6987761391802448, iteration: 317180
loss: 1.0304793119430542,grad_norm: 0.7107789832558202, iteration: 317181
loss: 0.9819963574409485,grad_norm: 0.865698927115602, iteration: 317182
loss: 0.9818310141563416,grad_norm: 0.86135258893048, iteration: 317183
loss: 1.0084648132324219,grad_norm: 0.8955371332129105, iteration: 317184
loss: 0.9836605191230774,grad_norm: 0.8251706227422464, iteration: 317185
loss: 1.0349928140640259,grad_norm: 0.9266723044583353, iteration: 317186
loss: 0.984531581401825,grad_norm: 0.8103961436596527, iteration: 317187
loss: 0.9836385250091553,grad_norm: 0.7242998889000826, iteration: 317188
loss: 1.010433316230774,grad_norm: 0.9999992362360881, iteration: 317189
loss: 0.9838501214981079,grad_norm: 0.9536401063044638, iteration: 317190
loss: 0.9762945771217346,grad_norm: 0.7348693947929439, iteration: 317191
loss: 1.0072441101074219,grad_norm: 0.9999990430714499, iteration: 317192
loss: 1.0363675355911255,grad_norm: 0.9999990232886465, iteration: 317193
loss: 1.003539800643921,grad_norm: 0.9999990180599114, iteration: 317194
loss: 1.0054285526275635,grad_norm: 0.9999996104073002, iteration: 317195
loss: 1.0224164724349976,grad_norm: 0.9999991564217824, iteration: 317196
loss: 1.0303373336791992,grad_norm: 0.7638966466182018, iteration: 317197
loss: 1.0057132244110107,grad_norm: 0.7610636457901795, iteration: 317198
loss: 1.032378911972046,grad_norm: 0.9999999677788483, iteration: 317199
loss: 1.000004529953003,grad_norm: 0.8205667246893958, iteration: 317200
loss: 1.0448061227798462,grad_norm: 0.9999991495045677, iteration: 317201
loss: 1.0220154523849487,grad_norm: 0.758018872282101, iteration: 317202
loss: 0.9965113997459412,grad_norm: 0.8221250926158651, iteration: 317203
loss: 1.0169167518615723,grad_norm: 0.7758541130400481, iteration: 317204
loss: 1.0415581464767456,grad_norm: 0.8862654961421297, iteration: 317205
loss: 1.017007827758789,grad_norm: 0.720742598362344, iteration: 317206
loss: 0.9832252860069275,grad_norm: 0.8393556480956532, iteration: 317207
loss: 1.0207582712173462,grad_norm: 0.8880522804659656, iteration: 317208
loss: 1.0093191862106323,grad_norm: 0.9999992652783213, iteration: 317209
loss: 0.9818574786186218,grad_norm: 0.7943018803782261, iteration: 317210
loss: 1.031693458557129,grad_norm: 0.8175389974340419, iteration: 317211
loss: 1.0020205974578857,grad_norm: 0.9601666115537911, iteration: 317212
loss: 0.9947625398635864,grad_norm: 0.7985817164737504, iteration: 317213
loss: 1.015458583831787,grad_norm: 0.9999989729189793, iteration: 317214
loss: 0.9700796604156494,grad_norm: 0.7707472217954254, iteration: 317215
loss: 0.9525756239891052,grad_norm: 0.8585540619898513, iteration: 317216
loss: 0.9907569289207458,grad_norm: 0.8987073283827995, iteration: 317217
loss: 1.0222601890563965,grad_norm: 0.8482603594769557, iteration: 317218
loss: 0.9916355609893799,grad_norm: 0.8285196694988868, iteration: 317219
loss: 0.9787945747375488,grad_norm: 0.9999990923553037, iteration: 317220
loss: 0.977522611618042,grad_norm: 0.6979018324346162, iteration: 317221
loss: 0.9914178848266602,grad_norm: 0.99999907465565, iteration: 317222
loss: 1.030903935432434,grad_norm: 0.7806299278523099, iteration: 317223
loss: 0.993217408657074,grad_norm: 0.8068034829931818, iteration: 317224
loss: 1.0224612951278687,grad_norm: 0.8635588390740117, iteration: 317225
loss: 1.0436869859695435,grad_norm: 0.9999990164250777, iteration: 317226
loss: 1.009360432624817,grad_norm: 0.9999992605031703, iteration: 317227
loss: 1.0826056003570557,grad_norm: 0.9999998301420857, iteration: 317228
loss: 0.9918957352638245,grad_norm: 0.9854081231880644, iteration: 317229
loss: 0.9684256911277771,grad_norm: 0.9999996010109115, iteration: 317230
loss: 0.9808375239372253,grad_norm: 0.7175727170757559, iteration: 317231
loss: 0.9729307889938354,grad_norm: 0.7646864436812147, iteration: 317232
loss: 1.0258899927139282,grad_norm: 0.7656036719904565, iteration: 317233
loss: 0.9941468834877014,grad_norm: 0.9999993551809779, iteration: 317234
loss: 0.9975253939628601,grad_norm: 0.8852590090264557, iteration: 317235
loss: 1.035183072090149,grad_norm: 0.8687869283575685, iteration: 317236
loss: 1.0454318523406982,grad_norm: 0.9999999266860071, iteration: 317237
loss: 1.0113006830215454,grad_norm: 0.6797970428302207, iteration: 317238
loss: 1.0535820722579956,grad_norm: 0.9999991012933117, iteration: 317239
loss: 1.0158437490463257,grad_norm: 0.8808511634380856, iteration: 317240
loss: 1.0132569074630737,grad_norm: 0.847182344671552, iteration: 317241
loss: 0.9801092743873596,grad_norm: 0.8391554853524336, iteration: 317242
loss: 1.011274814605713,grad_norm: 0.9393625693929869, iteration: 317243
loss: 0.9803083539009094,grad_norm: 0.819972376842374, iteration: 317244
loss: 1.0363045930862427,grad_norm: 0.8056016804533619, iteration: 317245
loss: 1.0258368253707886,grad_norm: 0.9999997217451219, iteration: 317246
loss: 0.9896142482757568,grad_norm: 0.884133637946785, iteration: 317247
loss: 1.0455068349838257,grad_norm: 0.9999998882795378, iteration: 317248
loss: 0.9882132411003113,grad_norm: 0.9999991092550811, iteration: 317249
loss: 1.0075207948684692,grad_norm: 0.8980553763757626, iteration: 317250
loss: 0.9910869598388672,grad_norm: 0.7135046175008202, iteration: 317251
loss: 0.9715622663497925,grad_norm: 0.9148976466755246, iteration: 317252
loss: 0.9904186129570007,grad_norm: 0.8410598281154701, iteration: 317253
loss: 1.0049974918365479,grad_norm: 0.7763706271346086, iteration: 317254
loss: 1.038084864616394,grad_norm: 0.7380026833959351, iteration: 317255
loss: 1.0157768726348877,grad_norm: 0.8597262228042143, iteration: 317256
loss: 1.0214622020721436,grad_norm: 0.9102336589069028, iteration: 317257
loss: 0.9828020334243774,grad_norm: 0.7578600806989793, iteration: 317258
loss: 1.0072224140167236,grad_norm: 0.874511578947039, iteration: 317259
loss: 1.0140620470046997,grad_norm: 0.9999991367279915, iteration: 317260
loss: 1.005737543106079,grad_norm: 0.9249739746314408, iteration: 317261
loss: 0.9966828227043152,grad_norm: 0.8948335853107169, iteration: 317262
loss: 1.034612774848938,grad_norm: 0.9999991027803954, iteration: 317263
loss: 1.0292689800262451,grad_norm: 0.9378487384494878, iteration: 317264
loss: 1.0038115978240967,grad_norm: 0.8122540252005084, iteration: 317265
loss: 0.9969313144683838,grad_norm: 0.8839551755975364, iteration: 317266
loss: 1.019858956336975,grad_norm: 0.8860151924433023, iteration: 317267
loss: 0.9628303647041321,grad_norm: 0.8808820104311427, iteration: 317268
loss: 1.0056450366973877,grad_norm: 0.8967979131183379, iteration: 317269
loss: 0.9914566278457642,grad_norm: 0.7760033696515566, iteration: 317270
loss: 0.9844527244567871,grad_norm: 0.8554219847305444, iteration: 317271
loss: 0.9976867437362671,grad_norm: 0.9285507703118245, iteration: 317272
loss: 0.9703068137168884,grad_norm: 0.999999253336185, iteration: 317273
loss: 0.9709330797195435,grad_norm: 0.8468412245042326, iteration: 317274
loss: 1.0608538389205933,grad_norm: 0.8347524284502615, iteration: 317275
loss: 1.0137919187545776,grad_norm: 0.7350864505180629, iteration: 317276
loss: 0.9908319711685181,grad_norm: 0.8297796324014749, iteration: 317277
loss: 0.9941653609275818,grad_norm: 0.7913115594856225, iteration: 317278
loss: 1.035097360610962,grad_norm: 0.9999993474828862, iteration: 317279
loss: 0.9786932468414307,grad_norm: 0.7195175329610992, iteration: 317280
loss: 1.0310925245285034,grad_norm: 0.7922741976574117, iteration: 317281
loss: 0.9986338019371033,grad_norm: 0.8844602503710727, iteration: 317282
loss: 0.9942054152488708,grad_norm: 0.942875843815007, iteration: 317283
loss: 0.9954876899719238,grad_norm: 0.8255124138921971, iteration: 317284
loss: 0.9873113632202148,grad_norm: 0.7740406672671102, iteration: 317285
loss: 1.0101271867752075,grad_norm: 0.9999999391881257, iteration: 317286
loss: 0.995868444442749,grad_norm: 0.6609734843882439, iteration: 317287
loss: 0.9963474273681641,grad_norm: 0.9999990316803392, iteration: 317288
loss: 1.0128538608551025,grad_norm: 0.7875024429881304, iteration: 317289
loss: 1.014138102531433,grad_norm: 0.7135401296459959, iteration: 317290
loss: 1.0193408727645874,grad_norm: 0.7834321159379609, iteration: 317291
loss: 0.993678629398346,grad_norm: 0.7327984952211961, iteration: 317292
loss: 1.002989649772644,grad_norm: 0.6604467048514399, iteration: 317293
loss: 0.9808219075202942,grad_norm: 0.8942699428857145, iteration: 317294
loss: 0.9897430539131165,grad_norm: 0.8113982530161147, iteration: 317295
loss: 0.9651294350624084,grad_norm: 0.754305183501097, iteration: 317296
loss: 0.9545376300811768,grad_norm: 0.8023913236632085, iteration: 317297
loss: 0.9775394201278687,grad_norm: 0.7149110510986008, iteration: 317298
loss: 0.995846688747406,grad_norm: 0.8993894888338726, iteration: 317299
loss: 0.9945147037506104,grad_norm: 0.8475269667538138, iteration: 317300
loss: 1.0283876657485962,grad_norm: 0.9138118725033637, iteration: 317301
loss: 1.0014368295669556,grad_norm: 0.8249303373215587, iteration: 317302
loss: 0.9969605207443237,grad_norm: 0.9999998925702687, iteration: 317303
loss: 1.0789270401000977,grad_norm: 0.9885008181978113, iteration: 317304
loss: 1.0827256441116333,grad_norm: 0.9999997769964617, iteration: 317305
loss: 1.0099756717681885,grad_norm: 0.8013091016727395, iteration: 317306
loss: 1.0173842906951904,grad_norm: 0.6801426530352108, iteration: 317307
loss: 0.9920077323913574,grad_norm: 0.7583053038689044, iteration: 317308
loss: 1.0256218910217285,grad_norm: 0.8890739117268969, iteration: 317309
loss: 0.9979563355445862,grad_norm: 0.7675282588388457, iteration: 317310
loss: 1.0054727792739868,grad_norm: 0.8493914802310167, iteration: 317311
loss: 0.9568116664886475,grad_norm: 0.8516097468725972, iteration: 317312
loss: 1.039406180381775,grad_norm: 0.9999990393208542, iteration: 317313
loss: 1.0326132774353027,grad_norm: 0.82934399020169, iteration: 317314
loss: 1.0298655033111572,grad_norm: 0.9999995488921524, iteration: 317315
loss: 0.9790247082710266,grad_norm: 0.8847100006918929, iteration: 317316
loss: 0.9761452674865723,grad_norm: 0.8130594321851325, iteration: 317317
loss: 1.0093345642089844,grad_norm: 0.9999991128491229, iteration: 317318
loss: 1.0560967922210693,grad_norm: 0.9999999005763943, iteration: 317319
loss: 1.0260019302368164,grad_norm: 0.9519486603709942, iteration: 317320
loss: 0.9641451239585876,grad_norm: 0.8902532758561086, iteration: 317321
loss: 1.0229542255401611,grad_norm: 0.7052600175013368, iteration: 317322
loss: 1.0286312103271484,grad_norm: 0.9999991417066146, iteration: 317323
loss: 1.0015053749084473,grad_norm: 0.9999991512217233, iteration: 317324
loss: 0.9678314924240112,grad_norm: 0.9400320752262742, iteration: 317325
loss: 0.9936308860778809,grad_norm: 0.7623893805659713, iteration: 317326
loss: 1.006439447402954,grad_norm: 0.9886170530110074, iteration: 317327
loss: 0.9924890398979187,grad_norm: 0.79343322664106, iteration: 317328
loss: 1.0029476881027222,grad_norm: 0.9410087446464703, iteration: 317329
loss: 1.0078723430633545,grad_norm: 0.8792535800643532, iteration: 317330
loss: 0.9799065589904785,grad_norm: 0.8592221427949992, iteration: 317331
loss: 1.0203741788864136,grad_norm: 0.7721622464651119, iteration: 317332
loss: 0.9998508095741272,grad_norm: 0.7991505594178206, iteration: 317333
loss: 0.9916974306106567,grad_norm: 0.9014350417232688, iteration: 317334
loss: 0.9509958624839783,grad_norm: 0.8351643324648682, iteration: 317335
loss: 1.073237419128418,grad_norm: 0.9999993280630806, iteration: 317336
loss: 1.07558274269104,grad_norm: 0.9999999366767545, iteration: 317337
loss: 1.008382797241211,grad_norm: 0.7562491107878361, iteration: 317338
loss: 0.9974109530448914,grad_norm: 0.8990772808655695, iteration: 317339
loss: 0.9980633854866028,grad_norm: 0.7952999149839726, iteration: 317340
loss: 0.9934231042861938,grad_norm: 0.888560229301861, iteration: 317341
loss: 0.9772666096687317,grad_norm: 0.8295700565999569, iteration: 317342
loss: 1.016998291015625,grad_norm: 0.7506529535663881, iteration: 317343
loss: 0.9860946536064148,grad_norm: 0.8295594072040716, iteration: 317344
loss: 0.9925932884216309,grad_norm: 0.7932964550987299, iteration: 317345
loss: 1.0225127935409546,grad_norm: 0.8101188518635773, iteration: 317346
loss: 0.997769832611084,grad_norm: 0.6589132876677058, iteration: 317347
loss: 1.0015431642532349,grad_norm: 0.7752627949940334, iteration: 317348
loss: 0.9980863928794861,grad_norm: 0.8164055111084393, iteration: 317349
loss: 0.9835140705108643,grad_norm: 0.8333393149606205, iteration: 317350
loss: 1.0557899475097656,grad_norm: 0.9758712139150684, iteration: 317351
loss: 1.0061568021774292,grad_norm: 0.8134412224005253, iteration: 317352
loss: 1.0002678632736206,grad_norm: 0.8963158619085757, iteration: 317353
loss: 1.0236124992370605,grad_norm: 0.7987607790793486, iteration: 317354
loss: 1.021619200706482,grad_norm: 0.7672141636596226, iteration: 317355
loss: 1.0234496593475342,grad_norm: 0.8585933228804927, iteration: 317356
loss: 0.9893618226051331,grad_norm: 0.9126938953686494, iteration: 317357
loss: 1.000868320465088,grad_norm: 0.8435168051136873, iteration: 317358
loss: 1.0170954465866089,grad_norm: 0.8318823622336378, iteration: 317359
loss: 0.9698966145515442,grad_norm: 0.9121858990468961, iteration: 317360
loss: 1.0169209241867065,grad_norm: 0.7554375300463478, iteration: 317361
loss: 1.0287346839904785,grad_norm: 0.8553384894536297, iteration: 317362
loss: 0.9798100590705872,grad_norm: 0.7954462871587438, iteration: 317363
loss: 1.0088623762130737,grad_norm: 0.9999999868230667, iteration: 317364
loss: 0.9948956966400146,grad_norm: 0.9228806086490234, iteration: 317365
loss: 0.9459906220436096,grad_norm: 0.7450424526042985, iteration: 317366
loss: 1.0970014333724976,grad_norm: 0.9999997641058943, iteration: 317367
loss: 1.0581196546554565,grad_norm: 0.8678363209439327, iteration: 317368
loss: 0.9936652183532715,grad_norm: 0.7660786006751691, iteration: 317369
loss: 1.0136497020721436,grad_norm: 0.8004097089255915, iteration: 317370
loss: 1.0099985599517822,grad_norm: 0.9860022107353902, iteration: 317371
loss: 0.9994248747825623,grad_norm: 0.8930282791726261, iteration: 317372
loss: 1.005139946937561,grad_norm: 0.8255508804852434, iteration: 317373
loss: 1.013968586921692,grad_norm: 0.9999991654182024, iteration: 317374
loss: 1.0002005100250244,grad_norm: 0.864242874456805, iteration: 317375
loss: 0.9699582457542419,grad_norm: 0.6792807503629518, iteration: 317376
loss: 1.012961745262146,grad_norm: 0.8888524051844963, iteration: 317377
loss: 1.0326042175292969,grad_norm: 0.9999998584690766, iteration: 317378
loss: 1.0036134719848633,grad_norm: 0.7852501292411164, iteration: 317379
loss: 1.0275846719741821,grad_norm: 0.9999991441588116, iteration: 317380
loss: 0.999001145362854,grad_norm: 0.7903200116570652, iteration: 317381
loss: 0.9671710133552551,grad_norm: 0.8452242728309738, iteration: 317382
loss: 1.0017181634902954,grad_norm: 0.886046065249767, iteration: 317383
loss: 0.9910224676132202,grad_norm: 0.7524127586817986, iteration: 317384
loss: 1.046561360359192,grad_norm: 0.8588355533377766, iteration: 317385
loss: 1.025619387626648,grad_norm: 0.8029060927249583, iteration: 317386
loss: 1.0382022857666016,grad_norm: 0.8785481176174719, iteration: 317387
loss: 0.9792247414588928,grad_norm: 0.8569005394362369, iteration: 317388
loss: 1.0002646446228027,grad_norm: 0.9837448132513963, iteration: 317389
loss: 1.0104120969772339,grad_norm: 0.7849147496856155, iteration: 317390
loss: 1.082504153251648,grad_norm: 0.9999991859481467, iteration: 317391
loss: 0.9845167398452759,grad_norm: 0.9036131640217789, iteration: 317392
loss: 1.0057874917984009,grad_norm: 0.8337836213451775, iteration: 317393
loss: 1.0112642049789429,grad_norm: 0.9507034261275888, iteration: 317394
loss: 0.9990462064743042,grad_norm: 0.9999989559616679, iteration: 317395
loss: 0.9870179295539856,grad_norm: 0.8963514028902282, iteration: 317396
loss: 0.9932113289833069,grad_norm: 0.9255713895363867, iteration: 317397
loss: 0.9862185716629028,grad_norm: 0.6805474995146431, iteration: 317398
loss: 0.9808726906776428,grad_norm: 0.8495321339333316, iteration: 317399
loss: 1.0010161399841309,grad_norm: 0.9743493224235658, iteration: 317400
loss: 1.0306060314178467,grad_norm: 0.9999991235440248, iteration: 317401
loss: 0.9876452684402466,grad_norm: 0.8531115082748777, iteration: 317402
loss: 1.0140281915664673,grad_norm: 0.8917029448863468, iteration: 317403
loss: 1.028908133506775,grad_norm: 0.9999998905083215, iteration: 317404
loss: 0.9859715104103088,grad_norm: 0.9999990028828758, iteration: 317405
loss: 1.0248252153396606,grad_norm: 0.9614323039866506, iteration: 317406
loss: 0.9906727075576782,grad_norm: 0.699455905647985, iteration: 317407
loss: 1.0257784128189087,grad_norm: 0.8641172259194675, iteration: 317408
loss: 0.9911215305328369,grad_norm: 0.7918566731969527, iteration: 317409
loss: 1.0120574235916138,grad_norm: 0.8323273415175603, iteration: 317410
loss: 0.9607782959938049,grad_norm: 0.8710664134522309, iteration: 317411
loss: 0.9748629331588745,grad_norm: 0.7637008931329972, iteration: 317412
loss: 0.9946936964988708,grad_norm: 0.9204479837109656, iteration: 317413
loss: 1.0173510313034058,grad_norm: 0.7652360147859685, iteration: 317414
loss: 1.0218175649642944,grad_norm: 0.8552225985363918, iteration: 317415
loss: 1.1475236415863037,grad_norm: 0.999999787673841, iteration: 317416
loss: 1.0330114364624023,grad_norm: 0.9999989967526728, iteration: 317417
loss: 0.9748722314834595,grad_norm: 0.9429732889609428, iteration: 317418
loss: 0.9629324078559875,grad_norm: 0.8978964876163231, iteration: 317419
loss: 1.019051432609558,grad_norm: 0.936181687993701, iteration: 317420
loss: 1.0270835161209106,grad_norm: 0.8161491094296578, iteration: 317421
loss: 0.9944012761116028,grad_norm: 0.8232392069207242, iteration: 317422
loss: 1.0779132843017578,grad_norm: 0.8699325750146247, iteration: 317423
loss: 1.1020982265472412,grad_norm: 0.9999993115586837, iteration: 317424
loss: 1.1001389026641846,grad_norm: 0.9999998717143914, iteration: 317425
loss: 1.0233501195907593,grad_norm: 0.9786644860526238, iteration: 317426
loss: 1.0009469985961914,grad_norm: 0.9246866420154768, iteration: 317427
loss: 1.0387561321258545,grad_norm: 0.9999989778884566, iteration: 317428
loss: 0.9915060997009277,grad_norm: 0.9999999443085655, iteration: 317429
loss: 1.0329279899597168,grad_norm: 0.9999994246415691, iteration: 317430
loss: 0.9922158122062683,grad_norm: 0.9999995166027792, iteration: 317431
loss: 1.0334949493408203,grad_norm: 0.799047533590489, iteration: 317432
loss: 1.1099187135696411,grad_norm: 0.9999998400607948, iteration: 317433
loss: 1.0320414304733276,grad_norm: 0.8792118867959818, iteration: 317434
loss: 1.0053200721740723,grad_norm: 0.9999991256446386, iteration: 317435
loss: 1.0225129127502441,grad_norm: 0.8097508930981305, iteration: 317436
loss: 1.1079994440078735,grad_norm: 0.9999995571915804, iteration: 317437
loss: 1.041192650794983,grad_norm: 0.9999989784306366, iteration: 317438
loss: 1.0045433044433594,grad_norm: 0.9999994114935488, iteration: 317439
loss: 0.9891100525856018,grad_norm: 0.7915772371900309, iteration: 317440
loss: 1.0286986827850342,grad_norm: 0.8613178889141846, iteration: 317441
loss: 1.0883135795593262,grad_norm: 0.9999994451559736, iteration: 317442
loss: 1.1567986011505127,grad_norm: 0.999999589455693, iteration: 317443
loss: 1.102137804031372,grad_norm: 0.9440806317992532, iteration: 317444
loss: 1.15176522731781,grad_norm: 0.9999999244319433, iteration: 317445
loss: 1.046563982963562,grad_norm: 0.9999996067289385, iteration: 317446
loss: 1.0930907726287842,grad_norm: 0.9999990851256431, iteration: 317447
loss: 1.0287206172943115,grad_norm: 0.9999991033909578, iteration: 317448
loss: 1.005782961845398,grad_norm: 0.9335690355920703, iteration: 317449
loss: 1.0969476699829102,grad_norm: 0.9999995974318684, iteration: 317450
loss: 1.016333818435669,grad_norm: 0.8517508636373662, iteration: 317451
loss: 1.0407345294952393,grad_norm: 0.8102981407833778, iteration: 317452
loss: 1.0324046611785889,grad_norm: 0.9999995839925903, iteration: 317453
loss: 1.0160456895828247,grad_norm: 0.9999993030999974, iteration: 317454
loss: 1.294811725616455,grad_norm: 0.9999992738462384, iteration: 317455
loss: 1.0311789512634277,grad_norm: 0.8744883722164669, iteration: 317456
loss: 0.997370719909668,grad_norm: 0.999999897656999, iteration: 317457
loss: 1.0129393339157104,grad_norm: 0.9999992831456646, iteration: 317458
loss: 1.0823227167129517,grad_norm: 0.7981266510587836, iteration: 317459
loss: 0.9868367314338684,grad_norm: 0.8707431981935587, iteration: 317460
loss: 1.00651216506958,grad_norm: 0.9930175018096595, iteration: 317461
loss: 0.9913790225982666,grad_norm: 0.7766269367065476, iteration: 317462
loss: 1.058601975440979,grad_norm: 0.999998990063816, iteration: 317463
loss: 1.0369725227355957,grad_norm: 0.9057753479772163, iteration: 317464
loss: 1.014116644859314,grad_norm: 0.999999978808679, iteration: 317465
loss: 1.1067547798156738,grad_norm: 0.9999996639275733, iteration: 317466
loss: 1.022267460823059,grad_norm: 0.8559131049336443, iteration: 317467
loss: 1.0124797821044922,grad_norm: 0.9999991978742288, iteration: 317468
loss: 0.9499292969703674,grad_norm: 0.8085214120244708, iteration: 317469
loss: 1.0088244676589966,grad_norm: 0.7985679996643215, iteration: 317470
loss: 1.0193196535110474,grad_norm: 0.7676037297801374, iteration: 317471
loss: 1.1093024015426636,grad_norm: 0.9999995499047968, iteration: 317472
loss: 1.0000027418136597,grad_norm: 0.7460480840450805, iteration: 317473
loss: 1.022295594215393,grad_norm: 0.9999991711453835, iteration: 317474
loss: 1.0729328393936157,grad_norm: 0.9999993358114654, iteration: 317475
loss: 1.151598334312439,grad_norm: 0.9999999592199204, iteration: 317476
loss: 1.0429472923278809,grad_norm: 0.9999992325869804, iteration: 317477
loss: 1.212754726409912,grad_norm: 0.9999996783500399, iteration: 317478
loss: 1.0096625089645386,grad_norm: 0.8401040339323377, iteration: 317479
loss: 1.0473164319992065,grad_norm: 0.9999993393641996, iteration: 317480
loss: 1.0335520505905151,grad_norm: 0.9256644339215782, iteration: 317481
loss: 1.0554008483886719,grad_norm: 0.9999997660741792, iteration: 317482
loss: 1.0159006118774414,grad_norm: 0.999999377619454, iteration: 317483
loss: 1.0064622163772583,grad_norm: 0.9999990480092479, iteration: 317484
loss: 1.1369284391403198,grad_norm: 0.9999989087184569, iteration: 317485
loss: 0.9933966398239136,grad_norm: 0.7276990439995611, iteration: 317486
loss: 0.9814674854278564,grad_norm: 0.9999992732216032, iteration: 317487
loss: 1.0190261602401733,grad_norm: 0.9999990769490913, iteration: 317488
loss: 1.0129187107086182,grad_norm: 0.8846016699927027, iteration: 317489
loss: 0.9665988683700562,grad_norm: 0.9999992132981677, iteration: 317490
loss: 1.0053844451904297,grad_norm: 0.819692387621788, iteration: 317491
loss: 0.9930627346038818,grad_norm: 0.7634027655811064, iteration: 317492
loss: 0.9777918457984924,grad_norm: 0.8421619170837071, iteration: 317493
loss: 0.9655506610870361,grad_norm: 0.754656585169311, iteration: 317494
loss: 1.0313987731933594,grad_norm: 0.757317403563633, iteration: 317495
loss: 1.0037137269973755,grad_norm: 0.9035723065419771, iteration: 317496
loss: 0.9901273846626282,grad_norm: 0.8988546225130666, iteration: 317497
loss: 1.0095490217208862,grad_norm: 0.7597280846132601, iteration: 317498
loss: 1.0188732147216797,grad_norm: 0.9999995419543598, iteration: 317499
loss: 1.0167198181152344,grad_norm: 0.9999992204153184, iteration: 317500
loss: 1.0078259706497192,grad_norm: 0.8639041989643237, iteration: 317501
loss: 1.060779094696045,grad_norm: 0.9999997770531227, iteration: 317502
loss: 0.9951756596565247,grad_norm: 0.9635591514232106, iteration: 317503
loss: 1.0501686334609985,grad_norm: 0.9999992936225545, iteration: 317504
loss: 1.0359560251235962,grad_norm: 0.9675516028644073, iteration: 317505
loss: 1.045546054840088,grad_norm: 0.8776339293861719, iteration: 317506
loss: 1.0297739505767822,grad_norm: 0.8794682399720088, iteration: 317507
loss: 0.9913526177406311,grad_norm: 0.7980271322427109, iteration: 317508
loss: 0.9978151917457581,grad_norm: 0.6867308803368417, iteration: 317509
loss: 1.0009151697158813,grad_norm: 0.709165428639764, iteration: 317510
loss: 1.006110668182373,grad_norm: 0.9999990964673899, iteration: 317511
loss: 0.9553577899932861,grad_norm: 0.9318795879701073, iteration: 317512
loss: 1.0146836042404175,grad_norm: 0.9377261535574368, iteration: 317513
loss: 1.0017762184143066,grad_norm: 0.8504438475541255, iteration: 317514
loss: 1.0085175037384033,grad_norm: 0.8582731194590427, iteration: 317515
loss: 0.9888594746589661,grad_norm: 0.988949066557902, iteration: 317516
loss: 1.0033611059188843,grad_norm: 0.689022787375122, iteration: 317517
loss: 1.0093332529067993,grad_norm: 0.8641587827584911, iteration: 317518
loss: 1.0200437307357788,grad_norm: 0.8199198858389105, iteration: 317519
loss: 0.9914965033531189,grad_norm: 0.9818418100865695, iteration: 317520
loss: 0.9920370578765869,grad_norm: 0.7616885872834414, iteration: 317521
loss: 0.984455406665802,grad_norm: 0.9999990328064734, iteration: 317522
loss: 1.0429083108901978,grad_norm: 0.9999995215339741, iteration: 317523
loss: 1.0019711256027222,grad_norm: 0.8297795554598214, iteration: 317524
loss: 1.0300452709197998,grad_norm: 0.999998996191844, iteration: 317525
loss: 0.9854239225387573,grad_norm: 0.904911801452604, iteration: 317526
loss: 1.009867787361145,grad_norm: 0.7438059396237828, iteration: 317527
loss: 1.0518996715545654,grad_norm: 0.9999990644251192, iteration: 317528
loss: 0.9731861352920532,grad_norm: 0.9442047479066032, iteration: 317529
loss: 1.0471857786178589,grad_norm: 0.9999997675148714, iteration: 317530
loss: 0.9829562902450562,grad_norm: 0.9353887041635498, iteration: 317531
loss: 1.084108591079712,grad_norm: 0.9999995415330595, iteration: 317532
loss: 1.0298686027526855,grad_norm: 0.9369132602963155, iteration: 317533
loss: 1.036380648612976,grad_norm: 0.7603458021535868, iteration: 317534
loss: 1.0152413845062256,grad_norm: 0.8378190194824557, iteration: 317535
loss: 0.987265408039093,grad_norm: 0.7932175536161831, iteration: 317536
loss: 0.9942993521690369,grad_norm: 0.7497937441660852, iteration: 317537
loss: 1.0118147134780884,grad_norm: 0.8314108830592802, iteration: 317538
loss: 1.0157382488250732,grad_norm: 0.9679840990177431, iteration: 317539
loss: 0.9701915383338928,grad_norm: 0.8204385192217208, iteration: 317540
loss: 1.0184364318847656,grad_norm: 0.8487269826025197, iteration: 317541
loss: 0.9660001993179321,grad_norm: 0.7847327600731417, iteration: 317542
loss: 1.0161112546920776,grad_norm: 0.8300965551497215, iteration: 317543
loss: 1.0331414937973022,grad_norm: 0.9999996012450737, iteration: 317544
loss: 1.0329145193099976,grad_norm: 0.7816609586070846, iteration: 317545
loss: 0.9996877908706665,grad_norm: 0.8929808923634123, iteration: 317546
loss: 1.0289498567581177,grad_norm: 0.9999997262453821, iteration: 317547
loss: 0.9856632947921753,grad_norm: 0.730974883661209, iteration: 317548
loss: 1.0081901550292969,grad_norm: 0.9999990757954892, iteration: 317549
loss: 1.0094467401504517,grad_norm: 0.9873846344834326, iteration: 317550
loss: 0.9982970952987671,grad_norm: 0.8030146373556646, iteration: 317551
loss: 0.9982069134712219,grad_norm: 0.830122459377723, iteration: 317552
loss: 1.1717711687088013,grad_norm: 0.9999998949635652, iteration: 317553
loss: 1.0183582305908203,grad_norm: 0.9999996130084836, iteration: 317554
loss: 0.9859733581542969,grad_norm: 0.7907503819759257, iteration: 317555
loss: 0.9949138164520264,grad_norm: 0.8081527754787152, iteration: 317556
loss: 0.9781152009963989,grad_norm: 0.805258959578564, iteration: 317557
loss: 0.9957497715950012,grad_norm: 0.7721868506523158, iteration: 317558
loss: 0.9896519184112549,grad_norm: 0.8009758120783971, iteration: 317559
loss: 1.0062929391860962,grad_norm: 0.897213676003188, iteration: 317560
loss: 1.0179516077041626,grad_norm: 0.8618286934011744, iteration: 317561
loss: 1.105989694595337,grad_norm: 0.9999998668788872, iteration: 317562
loss: 1.0028504133224487,grad_norm: 0.9221668743509193, iteration: 317563
loss: 1.103436827659607,grad_norm: 0.999999758746796, iteration: 317564
loss: 0.996936559677124,grad_norm: 0.9528652022458596, iteration: 317565
loss: 1.036595106124878,grad_norm: 0.8435437025823628, iteration: 317566
loss: 1.0265880823135376,grad_norm: 0.8675180078124837, iteration: 317567
loss: 1.1176656484603882,grad_norm: 0.9999997703179779, iteration: 317568
loss: 1.0115422010421753,grad_norm: 0.9999990821582393, iteration: 317569
loss: 1.0107421875,grad_norm: 0.7915503658828795, iteration: 317570
loss: 1.0585023164749146,grad_norm: 0.9079124168141631, iteration: 317571
loss: 1.1417649984359741,grad_norm: 0.9999995976634364, iteration: 317572
loss: 0.9945416450500488,grad_norm: 0.830605329412467, iteration: 317573
loss: 1.1306606531143188,grad_norm: 0.9999994269606456, iteration: 317574
loss: 1.0504175424575806,grad_norm: 0.8740598137067965, iteration: 317575
loss: 1.003138542175293,grad_norm: 0.8637775575203087, iteration: 317576
loss: 1.1232154369354248,grad_norm: 0.9999994896563636, iteration: 317577
loss: 1.014182686805725,grad_norm: 0.8754797627493088, iteration: 317578
loss: 1.0797652006149292,grad_norm: 0.9999990148974574, iteration: 317579
loss: 1.030834436416626,grad_norm: 0.999999392200869, iteration: 317580
loss: 1.1014916896820068,grad_norm: 0.9999998455919833, iteration: 317581
loss: 1.0091248750686646,grad_norm: 0.9999993589861889, iteration: 317582
loss: 1.0282342433929443,grad_norm: 0.7817373214005976, iteration: 317583
loss: 1.0238415002822876,grad_norm: 0.7372783491145823, iteration: 317584
loss: 0.9572337865829468,grad_norm: 0.9129877636206133, iteration: 317585
loss: 1.0709028244018555,grad_norm: 0.999999103662656, iteration: 317586
loss: 1.2364453077316284,grad_norm: 0.9999994962203111, iteration: 317587
loss: 0.9751971960067749,grad_norm: 0.8150768840263835, iteration: 317588
loss: 0.9896005392074585,grad_norm: 0.9999991851969993, iteration: 317589
loss: 1.0227813720703125,grad_norm: 0.9999999690903851, iteration: 317590
loss: 0.9946445822715759,grad_norm: 0.8518688226861194, iteration: 317591
loss: 1.016414761543274,grad_norm: 0.9327453555731529, iteration: 317592
loss: 1.0343621969223022,grad_norm: 1.000000012756671, iteration: 317593
loss: 1.1595221757888794,grad_norm: 0.9999995163091517, iteration: 317594
loss: 0.9906524419784546,grad_norm: 0.8944805198893453, iteration: 317595
loss: 1.0089302062988281,grad_norm: 0.854395347872599, iteration: 317596
loss: 1.0321712493896484,grad_norm: 0.9999996304453871, iteration: 317597
loss: 0.997173547744751,grad_norm: 0.9999994321032805, iteration: 317598
loss: 1.009010910987854,grad_norm: 0.9999990271003544, iteration: 317599
loss: 1.0112155675888062,grad_norm: 0.8922215507538435, iteration: 317600
loss: 0.9910053014755249,grad_norm: 0.999999119472584, iteration: 317601
loss: 1.002514123916626,grad_norm: 0.9999992666430492, iteration: 317602
loss: 1.1376398801803589,grad_norm: 1.00000001791748, iteration: 317603
loss: 0.9782202839851379,grad_norm: 0.933934018444283, iteration: 317604
loss: 1.0019688606262207,grad_norm: 0.8245010602189645, iteration: 317605
loss: 0.9809719324111938,grad_norm: 0.9999999203401724, iteration: 317606
loss: 0.9730498194694519,grad_norm: 0.9999991782558436, iteration: 317607
loss: 1.0750863552093506,grad_norm: 0.9116120209592762, iteration: 317608
loss: 1.034570574760437,grad_norm: 0.9999992906635988, iteration: 317609
loss: 0.9768874645233154,grad_norm: 0.9249408120742308, iteration: 317610
loss: 0.9800543189048767,grad_norm: 0.7173379630832372, iteration: 317611
loss: 1.0169984102249146,grad_norm: 0.9068308364976606, iteration: 317612
loss: 1.01127290725708,grad_norm: 0.9999991593343963, iteration: 317613
loss: 1.123599648475647,grad_norm: 0.9999993204073448, iteration: 317614
loss: 1.0540533065795898,grad_norm: 0.9999999347110906, iteration: 317615
loss: 1.0223957300186157,grad_norm: 0.8005155274979802, iteration: 317616
loss: 1.0031987428665161,grad_norm: 0.9206651100026957, iteration: 317617
loss: 1.0187562704086304,grad_norm: 0.9999993093970406, iteration: 317618
loss: 1.0322147607803345,grad_norm: 0.7412590278864292, iteration: 317619
loss: 1.0063735246658325,grad_norm: 0.9999991039127004, iteration: 317620
loss: 1.0639644861221313,grad_norm: 0.8874113552501529, iteration: 317621
loss: 1.0028393268585205,grad_norm: 0.9999995975155281, iteration: 317622
loss: 0.9715062379837036,grad_norm: 0.750937913381996, iteration: 317623
loss: 1.0205912590026855,grad_norm: 0.6919728328244071, iteration: 317624
loss: 1.0539376735687256,grad_norm: 0.9444169914168081, iteration: 317625
loss: 0.9946597814559937,grad_norm: 0.6952364544393378, iteration: 317626
loss: 0.9874424934387207,grad_norm: 0.8397684448238131, iteration: 317627
loss: 0.9821217656135559,grad_norm: 0.9290626751162829, iteration: 317628
loss: 1.023526906967163,grad_norm: 0.7600052097773913, iteration: 317629
loss: 0.9688295125961304,grad_norm: 0.9502068115188576, iteration: 317630
loss: 1.05267333984375,grad_norm: 0.8411317435951069, iteration: 317631
loss: 1.0134941339492798,grad_norm: 0.82280932026496, iteration: 317632
loss: 1.0215141773223877,grad_norm: 0.9999991603126795, iteration: 317633
loss: 1.089040756225586,grad_norm: 0.9999995583028949, iteration: 317634
loss: 1.0197134017944336,grad_norm: 0.999999126302698, iteration: 317635
loss: 1.0266387462615967,grad_norm: 0.9772386494495738, iteration: 317636
loss: 1.0557081699371338,grad_norm: 0.999999331080008, iteration: 317637
loss: 1.0041934251785278,grad_norm: 0.7859168979358802, iteration: 317638
loss: 1.0193101167678833,grad_norm: 0.8357952817290762, iteration: 317639
loss: 1.021794080734253,grad_norm: 0.9255438034606694, iteration: 317640
loss: 1.0269685983657837,grad_norm: 0.9999993963743817, iteration: 317641
loss: 0.9879917502403259,grad_norm: 0.7115468550299375, iteration: 317642
loss: 0.9874371886253357,grad_norm: 0.8899800655806329, iteration: 317643
loss: 1.0115025043487549,grad_norm: 0.8909266889851479, iteration: 317644
loss: 1.002435564994812,grad_norm: 0.9999990754485217, iteration: 317645
loss: 1.0104223489761353,grad_norm: 0.9999990178515598, iteration: 317646
loss: 1.005350112915039,grad_norm: 0.9999993172142628, iteration: 317647
loss: 0.9868289828300476,grad_norm: 0.9999993659432127, iteration: 317648
loss: 0.9984666705131531,grad_norm: 0.8066014303978996, iteration: 317649
loss: 1.0645912885665894,grad_norm: 0.8358210273569294, iteration: 317650
loss: 0.9850876331329346,grad_norm: 0.8871533253005837, iteration: 317651
loss: 1.0211883783340454,grad_norm: 0.9999996871691023, iteration: 317652
loss: 1.0431411266326904,grad_norm: 0.9999990650399151, iteration: 317653
loss: 0.9948760867118835,grad_norm: 0.9999990696242163, iteration: 317654
loss: 0.9816149473190308,grad_norm: 0.8889325599589776, iteration: 317655
loss: 1.0047434568405151,grad_norm: 0.9999996023787462, iteration: 317656
loss: 1.1794459819793701,grad_norm: 0.9999993113390651, iteration: 317657
loss: 1.010763168334961,grad_norm: 0.6730615171157677, iteration: 317658
loss: 1.0556015968322754,grad_norm: 0.9972795860964557, iteration: 317659
loss: 1.032142996788025,grad_norm: 0.9672720942228864, iteration: 317660
loss: 1.002917766571045,grad_norm: 0.9142617477892883, iteration: 317661
loss: 1.0104817152023315,grad_norm: 0.9387171982318443, iteration: 317662
loss: 1.147093415260315,grad_norm: 0.9999994749418393, iteration: 317663
loss: 1.0063377618789673,grad_norm: 0.8614638333833285, iteration: 317664
loss: 0.9911839962005615,grad_norm: 0.9999994740985392, iteration: 317665
loss: 1.0132205486297607,grad_norm: 0.7845982062257129, iteration: 317666
loss: 1.002872109413147,grad_norm: 0.8214773971777699, iteration: 317667
loss: 0.9898443818092346,grad_norm: 0.7644217942745366, iteration: 317668
loss: 0.9949934482574463,grad_norm: 0.8505333339683152, iteration: 317669
loss: 1.0403761863708496,grad_norm: 0.8057455896305875, iteration: 317670
loss: 1.0116969347000122,grad_norm: 0.8700912379741245, iteration: 317671
loss: 1.0570571422576904,grad_norm: 0.9999995886544196, iteration: 317672
loss: 1.0388870239257812,grad_norm: 0.9999992182442163, iteration: 317673
loss: 1.0276298522949219,grad_norm: 0.8452769754685316, iteration: 317674
loss: 0.9928505420684814,grad_norm: 0.7225845901492739, iteration: 317675
loss: 1.032872200012207,grad_norm: 0.9999993730754339, iteration: 317676
loss: 0.9744448065757751,grad_norm: 0.9999990538714659, iteration: 317677
loss: 1.0046671628952026,grad_norm: 0.9999994718565209, iteration: 317678
loss: 1.0381120443344116,grad_norm: 0.9248991237067827, iteration: 317679
loss: 0.9996160268783569,grad_norm: 0.9661109528526844, iteration: 317680
loss: 1.0147768259048462,grad_norm: 0.7542667135394945, iteration: 317681
loss: 1.015977382659912,grad_norm: 0.7020517685028317, iteration: 317682
loss: 0.9915356636047363,grad_norm: 0.9259514727546132, iteration: 317683
loss: 1.0083872079849243,grad_norm: 0.999999644013774, iteration: 317684
loss: 1.0463380813598633,grad_norm: 0.9999989682833892, iteration: 317685
loss: 0.9771533012390137,grad_norm: 0.8424181117153537, iteration: 317686
loss: 1.0631086826324463,grad_norm: 0.8479716116551682, iteration: 317687
loss: 0.9943301677703857,grad_norm: 1.0000000171134642, iteration: 317688
loss: 0.9879378080368042,grad_norm: 0.8302454387786347, iteration: 317689
loss: 1.0424975156784058,grad_norm: 0.9714177071809142, iteration: 317690
loss: 0.9888001680374146,grad_norm: 0.947926896831599, iteration: 317691
loss: 1.0125855207443237,grad_norm: 0.7612917795571535, iteration: 317692
loss: 0.9864412546157837,grad_norm: 0.9999990948829522, iteration: 317693
loss: 1.0015747547149658,grad_norm: 0.9278758082972197, iteration: 317694
loss: 1.0152839422225952,grad_norm: 0.9999995260606979, iteration: 317695
loss: 0.999614417552948,grad_norm: 0.7981469038341515, iteration: 317696
loss: 1.0529142618179321,grad_norm: 0.9999993322790702, iteration: 317697
loss: 1.0141292810440063,grad_norm: 0.7584868105707133, iteration: 317698
loss: 0.9608662724494934,grad_norm: 0.7066430168300972, iteration: 317699
loss: 0.982175350189209,grad_norm: 0.9999992784508381, iteration: 317700
loss: 0.986963152885437,grad_norm: 0.9263622299094987, iteration: 317701
loss: 0.9756448864936829,grad_norm: 0.999999673844546, iteration: 317702
loss: 1.031543254852295,grad_norm: 0.8293463937737148, iteration: 317703
loss: 0.9770545959472656,grad_norm: 0.7887652984199138, iteration: 317704
loss: 0.9870703220367432,grad_norm: 0.8221897434660125, iteration: 317705
loss: 1.0191177129745483,grad_norm: 0.7134269858204322, iteration: 317706
loss: 1.055648922920227,grad_norm: 0.8650382049305343, iteration: 317707
loss: 0.9943061470985413,grad_norm: 0.9151702571807274, iteration: 317708
loss: 0.9682444334030151,grad_norm: 0.882371967332398, iteration: 317709
loss: 1.0189749002456665,grad_norm: 0.9999998428238399, iteration: 317710
loss: 1.0306648015975952,grad_norm: 0.9999990306872496, iteration: 317711
loss: 1.0093408823013306,grad_norm: 0.841746390995708, iteration: 317712
loss: 1.0182548761367798,grad_norm: 0.9134956826330403, iteration: 317713
loss: 1.00083327293396,grad_norm: 0.7650453551207171, iteration: 317714
loss: 1.0504432916641235,grad_norm: 0.7661110013320516, iteration: 317715
loss: 1.0095343589782715,grad_norm: 0.9999997217409524, iteration: 317716
loss: 1.0313549041748047,grad_norm: 0.7917984002194474, iteration: 317717
loss: 1.0215253829956055,grad_norm: 0.7711228390387762, iteration: 317718
loss: 0.991695761680603,grad_norm: 0.9445029970270491, iteration: 317719
loss: 0.9854657053947449,grad_norm: 0.8834360217474903, iteration: 317720
loss: 1.026580810546875,grad_norm: 0.8167863578126249, iteration: 317721
loss: 1.011775016784668,grad_norm: 0.7803181575053617, iteration: 317722
loss: 1.0132032632827759,grad_norm: 0.9608296850760893, iteration: 317723
loss: 1.064376950263977,grad_norm: 0.9999992578304221, iteration: 317724
loss: 0.9664997458457947,grad_norm: 0.8729136271889607, iteration: 317725
loss: 0.9960681796073914,grad_norm: 0.8355005179301392, iteration: 317726
loss: 1.0274248123168945,grad_norm: 0.9928108940440779, iteration: 317727
loss: 1.0054562091827393,grad_norm: 0.7626074302738354, iteration: 317728
loss: 1.0253256559371948,grad_norm: 0.8493248001138332, iteration: 317729
loss: 0.9714672565460205,grad_norm: 0.8886332983256192, iteration: 317730
loss: 0.9759199619293213,grad_norm: 0.8979011806080691, iteration: 317731
loss: 1.003160834312439,grad_norm: 0.88613753942933, iteration: 317732
loss: 1.0204371213912964,grad_norm: 0.9384689426176526, iteration: 317733
loss: 1.037313461303711,grad_norm: 0.9359192036768563, iteration: 317734
loss: 1.0251624584197998,grad_norm: 0.9181215768630409, iteration: 317735
loss: 1.019297480583191,grad_norm: 0.8068262474861153, iteration: 317736
loss: 0.9780467748641968,grad_norm: 0.7857057664312754, iteration: 317737
loss: 1.0023512840270996,grad_norm: 0.9037171297432457, iteration: 317738
loss: 0.9584333896636963,grad_norm: 0.7290507203305644, iteration: 317739
loss: 0.991279125213623,grad_norm: 0.9999992954992936, iteration: 317740
loss: 1.0029915571212769,grad_norm: 0.9628516438937673, iteration: 317741
loss: 1.0191903114318848,grad_norm: 0.8141421082847314, iteration: 317742
loss: 1.0307648181915283,grad_norm: 0.9711960566434485, iteration: 317743
loss: 1.0369830131530762,grad_norm: 0.9219003051164468, iteration: 317744
loss: 0.9784700274467468,grad_norm: 0.9141345519931321, iteration: 317745
loss: 0.9971091747283936,grad_norm: 0.8103206232962876, iteration: 317746
loss: 1.0232491493225098,grad_norm: 0.9999991981743872, iteration: 317747
loss: 1.0169917345046997,grad_norm: 0.9396510445883828, iteration: 317748
loss: 1.0675325393676758,grad_norm: 0.9999998251430784, iteration: 317749
loss: 1.013644814491272,grad_norm: 0.8873648486074983, iteration: 317750
loss: 1.025997519493103,grad_norm: 0.9999992868974176, iteration: 317751
loss: 0.9666174054145813,grad_norm: 0.8883747506784292, iteration: 317752
loss: 0.9745405912399292,grad_norm: 0.9999989538800552, iteration: 317753
loss: 0.9785248637199402,grad_norm: 0.9928879786342368, iteration: 317754
loss: 1.032142996788025,grad_norm: 0.8306038324922475, iteration: 317755
loss: 0.9840141534805298,grad_norm: 0.8129590222543052, iteration: 317756
loss: 1.0124491453170776,grad_norm: 0.8133698149060844, iteration: 317757
loss: 0.9862396121025085,grad_norm: 0.7699338541579738, iteration: 317758
loss: 1.0347920656204224,grad_norm: 0.9999991016364559, iteration: 317759
loss: 1.0235430002212524,grad_norm: 0.6822443768883583, iteration: 317760
loss: 0.9602635502815247,grad_norm: 0.7508390891627288, iteration: 317761
loss: 0.9652696847915649,grad_norm: 0.8355107419144369, iteration: 317762
loss: 1.0251315832138062,grad_norm: 0.8103625397351611, iteration: 317763
loss: 0.9940473437309265,grad_norm: 0.9024176818466805, iteration: 317764
loss: 0.9642496705055237,grad_norm: 0.9474630124999623, iteration: 317765
loss: 0.9960502982139587,grad_norm: 0.6841222787866249, iteration: 317766
loss: 0.9919044971466064,grad_norm: 0.7146150419353708, iteration: 317767
loss: 0.9940433502197266,grad_norm: 0.795045467846631, iteration: 317768
loss: 1.0004335641860962,grad_norm: 0.9647705923934131, iteration: 317769
loss: 1.015499234199524,grad_norm: 0.9999990827172368, iteration: 317770
loss: 0.9798699617385864,grad_norm: 0.7444870628208285, iteration: 317771
loss: 1.019321322441101,grad_norm: 0.8996033335832336, iteration: 317772
loss: 0.9857704639434814,grad_norm: 0.6910075930431576, iteration: 317773
loss: 0.9749007821083069,grad_norm: 0.7611108573746114, iteration: 317774
loss: 1.0247832536697388,grad_norm: 0.9853219210321095, iteration: 317775
loss: 0.9830527901649475,grad_norm: 0.7841794627988344, iteration: 317776
loss: 1.0282942056655884,grad_norm: 0.999999194283947, iteration: 317777
loss: 1.024989366531372,grad_norm: 0.7194077141121986, iteration: 317778
loss: 1.0062228441238403,grad_norm: 0.7792161832528562, iteration: 317779
loss: 0.9977325201034546,grad_norm: 0.9999996018126929, iteration: 317780
loss: 1.014519214630127,grad_norm: 0.8103443769417766, iteration: 317781
loss: 0.9777873754501343,grad_norm: 0.8938603847467886, iteration: 317782
loss: 1.0388165712356567,grad_norm: 0.8917962567651087, iteration: 317783
loss: 0.9935340285301208,grad_norm: 0.8309926251003991, iteration: 317784
loss: 0.9544026255607605,grad_norm: 0.8738262682989154, iteration: 317785
loss: 0.9826778173446655,grad_norm: 0.7980012857952706, iteration: 317786
loss: 1.0280208587646484,grad_norm: 0.8274486021871845, iteration: 317787
loss: 1.0274088382720947,grad_norm: 0.9999994878231753, iteration: 317788
loss: 1.0119194984436035,grad_norm: 0.7438276577530684, iteration: 317789
loss: 0.989484429359436,grad_norm: 0.9945378350150658, iteration: 317790
loss: 1.02360200881958,grad_norm: 0.9099756672582957, iteration: 317791
loss: 1.0127547979354858,grad_norm: 0.9290341929674685, iteration: 317792
loss: 0.9597555994987488,grad_norm: 0.7841216027424531, iteration: 317793
loss: 0.98895663022995,grad_norm: 0.9999991590082692, iteration: 317794
loss: 0.9754461646080017,grad_norm: 0.8925596359835017, iteration: 317795
loss: 1.083052158355713,grad_norm: 0.986333569958261, iteration: 317796
loss: 1.009676218032837,grad_norm: 0.8300720962365135, iteration: 317797
loss: 0.9482763409614563,grad_norm: 0.7901000291825413, iteration: 317798
loss: 1.0521057844161987,grad_norm: 0.9999993539635147, iteration: 317799
loss: 0.9524496793746948,grad_norm: 0.9363565960343315, iteration: 317800
loss: 1.003651738166809,grad_norm: 0.9068673386515693, iteration: 317801
loss: 0.9839271903038025,grad_norm: 0.7261566803361081, iteration: 317802
loss: 1.0098885297775269,grad_norm: 0.7713256662349468, iteration: 317803
loss: 0.9857694506645203,grad_norm: 0.7808746003367631, iteration: 317804
loss: 0.9762073755264282,grad_norm: 0.7706988756960694, iteration: 317805
loss: 0.9947105050086975,grad_norm: 0.7849928363771461, iteration: 317806
loss: 0.9911761283874512,grad_norm: 0.7084150779256332, iteration: 317807
loss: 1.0355260372161865,grad_norm: 0.9999997037662949, iteration: 317808
loss: 0.9912863373756409,grad_norm: 0.9633175951065929, iteration: 317809
loss: 0.9977913498878479,grad_norm: 0.9378892655410613, iteration: 317810
loss: 0.99467533826828,grad_norm: 0.941912794034267, iteration: 317811
loss: 0.972197949886322,grad_norm: 0.8315521200428866, iteration: 317812
loss: 1.0235172510147095,grad_norm: 0.8602455384199746, iteration: 317813
loss: 1.0234116315841675,grad_norm: 0.810242267980526, iteration: 317814
loss: 1.0040472745895386,grad_norm: 0.8015351240765805, iteration: 317815
loss: 1.0324991941452026,grad_norm: 0.8159416830352253, iteration: 317816
loss: 1.030177116394043,grad_norm: 0.8285657110066622, iteration: 317817
loss: 0.9898066520690918,grad_norm: 0.7344845688446353, iteration: 317818
loss: 1.0164786577224731,grad_norm: 0.8801143016366239, iteration: 317819
loss: 0.9840240478515625,grad_norm: 0.8426735150077805, iteration: 317820
loss: 0.982889711856842,grad_norm: 0.943886796090129, iteration: 317821
loss: 0.9910454750061035,grad_norm: 0.8974836210504534, iteration: 317822
loss: 1.0356800556182861,grad_norm: 0.9767221440472359, iteration: 317823
loss: 1.034926414489746,grad_norm: 0.9999989895210157, iteration: 317824
loss: 0.9864741563796997,grad_norm: 0.999999677672044, iteration: 317825
loss: 1.0106920003890991,grad_norm: 0.9999991433545385, iteration: 317826
loss: 0.9761565923690796,grad_norm: 0.722205775994523, iteration: 317827
loss: 0.9917863011360168,grad_norm: 0.7774644686929255, iteration: 317828
loss: 0.9928399920463562,grad_norm: 0.8688985695872731, iteration: 317829
loss: 1.0284123420715332,grad_norm: 0.7461860141522247, iteration: 317830
loss: 0.9881525039672852,grad_norm: 0.6769974518491546, iteration: 317831
loss: 0.9880263805389404,grad_norm: 0.8215631801405155, iteration: 317832
loss: 1.0392767190933228,grad_norm: 0.9999990326578323, iteration: 317833
loss: 1.0321900844573975,grad_norm: 0.9999999930899907, iteration: 317834
loss: 0.9987595677375793,grad_norm: 0.7003737189429267, iteration: 317835
loss: 1.0468711853027344,grad_norm: 0.826746824762238, iteration: 317836
loss: 1.0022882223129272,grad_norm: 0.9999990566642494, iteration: 317837
loss: 0.990824818611145,grad_norm: 0.7568354583592172, iteration: 317838
loss: 1.0506293773651123,grad_norm: 0.9999991513362266, iteration: 317839
loss: 0.9947800636291504,grad_norm: 0.8266456858788538, iteration: 317840
loss: 1.0231328010559082,grad_norm: 0.9628082450666677, iteration: 317841
loss: 1.0247538089752197,grad_norm: 0.9675537247180901, iteration: 317842
loss: 0.9804790616035461,grad_norm: 0.765597429271193, iteration: 317843
loss: 0.9926014542579651,grad_norm: 0.9494935042722185, iteration: 317844
loss: 1.0039514303207397,grad_norm: 0.8199853851678491, iteration: 317845
loss: 0.9787432551383972,grad_norm: 0.8581548093491388, iteration: 317846
loss: 1.0628207921981812,grad_norm: 0.9999991871325993, iteration: 317847
loss: 1.0038039684295654,grad_norm: 0.7405960151452272, iteration: 317848
loss: 1.0002483129501343,grad_norm: 0.8133007061626512, iteration: 317849
loss: 0.9750783443450928,grad_norm: 0.8145515349278375, iteration: 317850
loss: 1.0241236686706543,grad_norm: 0.9707242237697739, iteration: 317851
loss: 1.0039794445037842,grad_norm: 0.9804077532725204, iteration: 317852
loss: 1.0300178527832031,grad_norm: 0.8019335310337663, iteration: 317853
loss: 1.029097080230713,grad_norm: 0.7529950265216587, iteration: 317854
loss: 1.0091007947921753,grad_norm: 0.9999993318353543, iteration: 317855
loss: 1.0401620864868164,grad_norm: 0.9999997805060304, iteration: 317856
loss: 1.0807139873504639,grad_norm: 0.9500488435088195, iteration: 317857
loss: 0.9829288125038147,grad_norm: 0.7904743203689321, iteration: 317858
loss: 0.997342586517334,grad_norm: 0.9999996963267325, iteration: 317859
loss: 0.9841909408569336,grad_norm: 0.8669173640019057, iteration: 317860
loss: 0.9755001664161682,grad_norm: 0.9175196963042602, iteration: 317861
loss: 0.9618591070175171,grad_norm: 0.8099814594279392, iteration: 317862
loss: 0.9782416224479675,grad_norm: 0.8906721549724734, iteration: 317863
loss: 0.944782018661499,grad_norm: 0.9593840449534617, iteration: 317864
loss: 1.0615695714950562,grad_norm: 0.8520745386773884, iteration: 317865
loss: 0.9741443395614624,grad_norm: 0.9184532360689609, iteration: 317866
loss: 0.9710408449172974,grad_norm: 0.9033094805084444, iteration: 317867
loss: 1.0038069486618042,grad_norm: 0.8803474322056494, iteration: 317868
loss: 0.9828623533248901,grad_norm: 0.8228318038961533, iteration: 317869
loss: 0.9939117431640625,grad_norm: 0.7308861525490443, iteration: 317870
loss: 0.9958404302597046,grad_norm: 0.8584672085716835, iteration: 317871
loss: 1.029712438583374,grad_norm: 0.9999997143434949, iteration: 317872
loss: 0.9891009330749512,grad_norm: 0.747909167967037, iteration: 317873
loss: 1.0138437747955322,grad_norm: 0.8982433228669425, iteration: 317874
loss: 0.9587938785552979,grad_norm: 0.7019512332447769, iteration: 317875
loss: 1.0521422624588013,grad_norm: 0.8789783578131106, iteration: 317876
loss: 1.0192326307296753,grad_norm: 0.7734209754874897, iteration: 317877
loss: 1.0334497690200806,grad_norm: 0.9999991624059194, iteration: 317878
loss: 1.0124229192733765,grad_norm: 0.9999991291268716, iteration: 317879
loss: 0.9815763235092163,grad_norm: 0.9999991585099984, iteration: 317880
loss: 1.0205674171447754,grad_norm: 0.816019412653525, iteration: 317881
loss: 0.9398918747901917,grad_norm: 0.8337826454151867, iteration: 317882
loss: 0.9978635907173157,grad_norm: 0.804858918435046, iteration: 317883
loss: 1.043683648109436,grad_norm: 0.8680942024205848, iteration: 317884
loss: 0.9981676936149597,grad_norm: 0.8429428332448394, iteration: 317885
loss: 0.9959551692008972,grad_norm: 0.8412290216110583, iteration: 317886
loss: 0.9879487156867981,grad_norm: 0.9071139955403169, iteration: 317887
loss: 0.9779915809631348,grad_norm: 0.8586896650181671, iteration: 317888
loss: 0.9984123110771179,grad_norm: 0.891317518262743, iteration: 317889
loss: 1.1145741939544678,grad_norm: 0.9999997898450419, iteration: 317890
loss: 0.9823376536369324,grad_norm: 0.9999990394793121, iteration: 317891
loss: 1.0252805948257446,grad_norm: 0.9999997414754777, iteration: 317892
loss: 1.0031440258026123,grad_norm: 0.8827356070290825, iteration: 317893
loss: 1.0380315780639648,grad_norm: 0.9624870568889033, iteration: 317894
loss: 1.0217067003250122,grad_norm: 0.8840179913855899, iteration: 317895
loss: 1.0432785749435425,grad_norm: 0.9544810494646082, iteration: 317896
loss: 1.0001894235610962,grad_norm: 0.819926610209753, iteration: 317897
loss: 1.031644344329834,grad_norm: 0.9288192605363318, iteration: 317898
loss: 1.0204253196716309,grad_norm: 0.7717185056167261, iteration: 317899
loss: 0.969222903251648,grad_norm: 0.8123814408202489, iteration: 317900
loss: 0.9622164368629456,grad_norm: 0.9999992974114765, iteration: 317901
loss: 0.9892242550849915,grad_norm: 0.7395624705882683, iteration: 317902
loss: 1.0176140069961548,grad_norm: 0.9999994039216139, iteration: 317903
loss: 1.0551902055740356,grad_norm: 0.9999993999754034, iteration: 317904
loss: 1.201812505722046,grad_norm: 0.9999992817098672, iteration: 317905
loss: 0.9796717166900635,grad_norm: 0.9025453790731465, iteration: 317906
loss: 1.076507329940796,grad_norm: 0.9016501412651806, iteration: 317907
loss: 1.0635253190994263,grad_norm: 0.9999996929237484, iteration: 317908
loss: 0.9942758083343506,grad_norm: 0.7862717398972803, iteration: 317909
loss: 0.9900926947593689,grad_norm: 0.9148593131373124, iteration: 317910
loss: 0.992739200592041,grad_norm: 0.9999992672312176, iteration: 317911
loss: 1.030281662940979,grad_norm: 0.999999027785296, iteration: 317912
loss: 1.0063139200210571,grad_norm: 0.9538729251099882, iteration: 317913
loss: 0.9447364211082458,grad_norm: 0.7825192401203802, iteration: 317914
loss: 1.0219390392303467,grad_norm: 0.7577340834817327, iteration: 317915
loss: 0.9873234033584595,grad_norm: 0.9999998466849909, iteration: 317916
loss: 0.9806339740753174,grad_norm: 0.7321786966222117, iteration: 317917
loss: 1.0051982402801514,grad_norm: 0.9155781336917294, iteration: 317918
loss: 1.0317577123641968,grad_norm: 0.9999997616239732, iteration: 317919
loss: 0.9925373196601868,grad_norm: 0.9999993419887637, iteration: 317920
loss: 0.9878238439559937,grad_norm: 0.8037454137606417, iteration: 317921
loss: 0.9961337447166443,grad_norm: 0.8339607528849114, iteration: 317922
loss: 0.9950708150863647,grad_norm: 0.7737138694949119, iteration: 317923
loss: 1.0053309202194214,grad_norm: 0.895042266121747, iteration: 317924
loss: 1.0054517984390259,grad_norm: 0.821260903042637, iteration: 317925
loss: 0.9947304129600525,grad_norm: 0.9142159908039265, iteration: 317926
loss: 0.9676483869552612,grad_norm: 0.7818227460799083, iteration: 317927
loss: 1.156104326248169,grad_norm: 0.9999999653611867, iteration: 317928
loss: 1.0261085033416748,grad_norm: 0.8326222611222635, iteration: 317929
loss: 0.9862844347953796,grad_norm: 0.9999995310636434, iteration: 317930
loss: 0.9597021341323853,grad_norm: 0.9999995064286651, iteration: 317931
loss: 1.0074702501296997,grad_norm: 0.9999999977311292, iteration: 317932
loss: 0.9720425009727478,grad_norm: 0.7805321095763291, iteration: 317933
loss: 1.013150930404663,grad_norm: 0.8467075613090027, iteration: 317934
loss: 1.0304268598556519,grad_norm: 0.9999991810801264, iteration: 317935
loss: 1.032714605331421,grad_norm: 0.9178104154046707, iteration: 317936
loss: 1.0678083896636963,grad_norm: 0.8786003287069547, iteration: 317937
loss: 0.9845611453056335,grad_norm: 0.748359424707012, iteration: 317938
loss: 0.9781898260116577,grad_norm: 0.8385915257100908, iteration: 317939
loss: 1.0224899053573608,grad_norm: 0.8470913574734051, iteration: 317940
loss: 1.0811539888381958,grad_norm: 0.9999993065492404, iteration: 317941
loss: 1.0219318866729736,grad_norm: 0.9346763077004636, iteration: 317942
loss: 0.988821268081665,grad_norm: 0.9222495740285439, iteration: 317943
loss: 1.0337949991226196,grad_norm: 0.9401869528770723, iteration: 317944
loss: 1.1087591648101807,grad_norm: 0.9999998176130184, iteration: 317945
loss: 0.9533452987670898,grad_norm: 0.9357193579661536, iteration: 317946
loss: 0.9918469190597534,grad_norm: 0.7018268981085328, iteration: 317947
loss: 0.9769417643547058,grad_norm: 0.9380384855558274, iteration: 317948
loss: 0.985995888710022,grad_norm: 0.7963844802825072, iteration: 317949
loss: 0.9857324957847595,grad_norm: 0.999999068683322, iteration: 317950
loss: 0.9978786706924438,grad_norm: 0.9999990960743694, iteration: 317951
loss: 0.9968434572219849,grad_norm: 0.7620347393138939, iteration: 317952
loss: 1.021979808807373,grad_norm: 0.9999992997902809, iteration: 317953
loss: 0.9698818922042847,grad_norm: 0.8479405396525004, iteration: 317954
loss: 1.0461500883102417,grad_norm: 0.9999991261262814, iteration: 317955
loss: 1.019841194152832,grad_norm: 0.8102012769403307, iteration: 317956
loss: 1.0380396842956543,grad_norm: 0.8040077085128192, iteration: 317957
loss: 0.9991565346717834,grad_norm: 0.9078826313684917, iteration: 317958
loss: 0.9761587381362915,grad_norm: 0.9985865056664721, iteration: 317959
loss: 0.9874999523162842,grad_norm: 0.813603898710722, iteration: 317960
loss: 1.0037492513656616,grad_norm: 0.9999993154977443, iteration: 317961
loss: 1.0792113542556763,grad_norm: 0.9999994922871338, iteration: 317962
loss: 1.0335369110107422,grad_norm: 0.8832228015993923, iteration: 317963
loss: 0.9503030776977539,grad_norm: 0.9155727180596781, iteration: 317964
loss: 0.9825146198272705,grad_norm: 0.8899656894051119, iteration: 317965
loss: 0.9887887239456177,grad_norm: 0.8764915342617757, iteration: 317966
loss: 0.9782549142837524,grad_norm: 0.9561686155030455, iteration: 317967
loss: 1.0148448944091797,grad_norm: 0.9999991570380027, iteration: 317968
loss: 0.9893339276313782,grad_norm: 0.999999235612129, iteration: 317969
loss: 0.9988316297531128,grad_norm: 0.8071684457408257, iteration: 317970
loss: 1.0181550979614258,grad_norm: 0.8895330236014971, iteration: 317971
loss: 0.9524562358856201,grad_norm: 0.874182727693678, iteration: 317972
loss: 0.9815458059310913,grad_norm: 0.99999930749572, iteration: 317973
loss: 0.9848671555519104,grad_norm: 0.7519442944908248, iteration: 317974
loss: 1.0026280879974365,grad_norm: 0.9333680248631941, iteration: 317975
loss: 0.9878378510475159,grad_norm: 0.8759732937581006, iteration: 317976
loss: 1.031627893447876,grad_norm: 0.7328371170736881, iteration: 317977
loss: 0.9878901243209839,grad_norm: 1.0000000023161308, iteration: 317978
loss: 0.950340747833252,grad_norm: 0.9669255449083232, iteration: 317979
loss: 0.9992074370384216,grad_norm: 0.9377608362082221, iteration: 317980
loss: 0.9755033254623413,grad_norm: 0.7810978180623583, iteration: 317981
loss: 0.976262092590332,grad_norm: 0.9315573426885455, iteration: 317982
loss: 1.0114539861679077,grad_norm: 0.8214782869999162, iteration: 317983
loss: 0.9902939200401306,grad_norm: 0.8462969429649692, iteration: 317984
loss: 1.0019248723983765,grad_norm: 0.9999993254266304, iteration: 317985
loss: 1.0143169164657593,grad_norm: 0.7984054965920926, iteration: 317986
loss: 0.9826332926750183,grad_norm: 0.882141165251528, iteration: 317987
loss: 0.9861593842506409,grad_norm: 0.9540855598794936, iteration: 317988
loss: 0.9802854061126709,grad_norm: 0.7469116381115211, iteration: 317989
loss: 0.9991190433502197,grad_norm: 0.6710688704187416, iteration: 317990
loss: 1.0127015113830566,grad_norm: 0.6878679636100247, iteration: 317991
loss: 0.9456298351287842,grad_norm: 0.7955747048309958, iteration: 317992
loss: 1.0149874687194824,grad_norm: 0.999999386887872, iteration: 317993
loss: 0.9725880026817322,grad_norm: 0.8893906373468337, iteration: 317994
loss: 1.0072460174560547,grad_norm: 0.9999993907754714, iteration: 317995
loss: 0.9997349381446838,grad_norm: 0.7681125137173992, iteration: 317996
loss: 0.9926717877388,grad_norm: 0.7718534871217044, iteration: 317997
loss: 1.0429508686065674,grad_norm: 0.9999997264200137, iteration: 317998
loss: 0.990353524684906,grad_norm: 0.9481464826938449, iteration: 317999
loss: 0.9828507304191589,grad_norm: 0.8839940788655455, iteration: 318000
loss: 0.9904004335403442,grad_norm: 0.8965837058289338, iteration: 318001
loss: 1.0086463689804077,grad_norm: 0.9247662021639927, iteration: 318002
loss: 0.9893470406532288,grad_norm: 0.9999991032129788, iteration: 318003
loss: 0.9890380501747131,grad_norm: 0.7417023039009277, iteration: 318004
loss: 1.006568193435669,grad_norm: 0.795283831266599, iteration: 318005
loss: 0.9960603713989258,grad_norm: 0.7818798530078634, iteration: 318006
loss: 1.0013349056243896,grad_norm: 0.952615869062288, iteration: 318007
loss: 1.0004298686981201,grad_norm: 0.7114024627821792, iteration: 318008
loss: 1.0186271667480469,grad_norm: 0.9518638471793645, iteration: 318009
loss: 1.026617169380188,grad_norm: 0.8132151507596851, iteration: 318010
loss: 1.0142236948013306,grad_norm: 0.7765848102499096, iteration: 318011
loss: 1.0392876863479614,grad_norm: 0.8753328392485863, iteration: 318012
loss: 0.9952142834663391,grad_norm: 0.8137411774023152, iteration: 318013
loss: 0.9906359910964966,grad_norm: 0.9238749716732872, iteration: 318014
loss: 0.9829943776130676,grad_norm: 0.7513664188278991, iteration: 318015
loss: 0.9828718304634094,grad_norm: 0.7721971492902736, iteration: 318016
loss: 0.9618814587593079,grad_norm: 0.7980593508233442, iteration: 318017
loss: 1.0002827644348145,grad_norm: 0.7525972147471474, iteration: 318018
loss: 1.0306241512298584,grad_norm: 0.9722085049710708, iteration: 318019
loss: 1.0503205060958862,grad_norm: 0.9999997472549099, iteration: 318020
loss: 1.0117039680480957,grad_norm: 0.8612532353439583, iteration: 318021
loss: 0.9891775846481323,grad_norm: 0.8884875965463507, iteration: 318022
loss: 1.0150610208511353,grad_norm: 0.9624398686626197, iteration: 318023
loss: 1.041346549987793,grad_norm: 0.999999695756559, iteration: 318024
loss: 0.9769099950790405,grad_norm: 0.9408189784060723, iteration: 318025
loss: 1.0014206171035767,grad_norm: 0.9362928075304819, iteration: 318026
loss: 1.0448390245437622,grad_norm: 0.9999992390872137, iteration: 318027
loss: 1.0122038125991821,grad_norm: 0.9885479451117555, iteration: 318028
loss: 0.9721570014953613,grad_norm: 0.89014841508936, iteration: 318029
loss: 1.1283748149871826,grad_norm: 0.9999997567327933, iteration: 318030
loss: 0.979779064655304,grad_norm: 0.9246004055329127, iteration: 318031
loss: 1.0181033611297607,grad_norm: 0.8498597995020462, iteration: 318032
loss: 0.9645771980285645,grad_norm: 0.9600298583375267, iteration: 318033
loss: 0.9961705803871155,grad_norm: 0.7642076791762524, iteration: 318034
loss: 0.9996199011802673,grad_norm: 0.8658476360604752, iteration: 318035
loss: 0.9699020385742188,grad_norm: 0.9773975917072066, iteration: 318036
loss: 1.030903697013855,grad_norm: 0.9445972006554871, iteration: 318037
loss: 0.9846420884132385,grad_norm: 0.8662834798936493, iteration: 318038
loss: 0.9832289218902588,grad_norm: 0.7897522214415721, iteration: 318039
loss: 0.979939877986908,grad_norm: 0.80295112692728, iteration: 318040
loss: 1.0319573879241943,grad_norm: 0.7825776454605927, iteration: 318041
loss: 0.9882990121841431,grad_norm: 0.927758385713542, iteration: 318042
loss: 1.0191857814788818,grad_norm: 0.6785096756821284, iteration: 318043
loss: 1.0100983381271362,grad_norm: 0.9091143954370439, iteration: 318044
loss: 1.0042325258255005,grad_norm: 0.7146955369891733, iteration: 318045
loss: 1.0641027688980103,grad_norm: 0.9796865704171211, iteration: 318046
loss: 1.0953749418258667,grad_norm: 0.9999990723747436, iteration: 318047
loss: 0.9748794436454773,grad_norm: 0.7033768516678572, iteration: 318048
loss: 0.993855893611908,grad_norm: 0.7815016707949086, iteration: 318049
loss: 1.0246710777282715,grad_norm: 0.7356480635551806, iteration: 318050
loss: 0.9957066774368286,grad_norm: 0.9032066838698882, iteration: 318051
loss: 0.9686138033866882,grad_norm: 0.7717866386135876, iteration: 318052
loss: 0.983478307723999,grad_norm: 0.9859030471621077, iteration: 318053
loss: 1.008222222328186,grad_norm: 0.7613558849576358, iteration: 318054
loss: 1.0018607378005981,grad_norm: 0.7302687615786229, iteration: 318055
loss: 1.0552998781204224,grad_norm: 0.9999992008509663, iteration: 318056
loss: 1.006667137145996,grad_norm: 0.9999991156422945, iteration: 318057
loss: 0.9568265080451965,grad_norm: 0.7848750692477546, iteration: 318058
loss: 1.0354551076889038,grad_norm: 0.93563506816001, iteration: 318059
loss: 1.012650728225708,grad_norm: 0.758425030132274, iteration: 318060
loss: 1.0508832931518555,grad_norm: 0.999999303358666, iteration: 318061
loss: 0.9455450177192688,grad_norm: 0.7798435231556602, iteration: 318062
loss: 1.0193973779678345,grad_norm: 0.940361078909156, iteration: 318063
loss: 1.012070655822754,grad_norm: 0.7083247712351962, iteration: 318064
loss: 0.9997593760490417,grad_norm: 0.9001643285914291, iteration: 318065
loss: 1.01820969581604,grad_norm: 0.860684045754921, iteration: 318066
loss: 0.9605212807655334,grad_norm: 0.9999990865415207, iteration: 318067
loss: 1.0270075798034668,grad_norm: 0.7882885160924179, iteration: 318068
loss: 0.9986156821250916,grad_norm: 0.9055795119801221, iteration: 318069
loss: 0.9669762849807739,grad_norm: 0.927768291675289, iteration: 318070
loss: 1.004110336303711,grad_norm: 0.8736519207359761, iteration: 318071
loss: 0.9638081192970276,grad_norm: 0.9339324134315007, iteration: 318072
loss: 1.020578384399414,grad_norm: 0.9999993454612673, iteration: 318073
loss: 1.021782636642456,grad_norm: 0.9999991194424824, iteration: 318074
loss: 0.9782058596611023,grad_norm: 0.9999989397869635, iteration: 318075
loss: 1.0155843496322632,grad_norm: 0.8148517628024112, iteration: 318076
loss: 0.9565088152885437,grad_norm: 0.942807065734849, iteration: 318077
loss: 1.0604249238967896,grad_norm: 0.9999993470457955, iteration: 318078
loss: 1.0264201164245605,grad_norm: 0.8413438254530562, iteration: 318079
loss: 1.0588982105255127,grad_norm: 0.9999999892868091, iteration: 318080
loss: 0.984857976436615,grad_norm: 0.8964523111730073, iteration: 318081
loss: 1.0365076065063477,grad_norm: 0.7623024834182873, iteration: 318082
loss: 1.0078190565109253,grad_norm: 0.9999998051813964, iteration: 318083
loss: 1.0563408136367798,grad_norm: 0.8100444392761479, iteration: 318084
loss: 1.0370408296585083,grad_norm: 0.9523113271154047, iteration: 318085
loss: 1.0325701236724854,grad_norm: 0.8125930568616292, iteration: 318086
loss: 0.9924044013023376,grad_norm: 0.9999994560918868, iteration: 318087
loss: 0.984100878238678,grad_norm: 0.880097067751618, iteration: 318088
loss: 1.0072795152664185,grad_norm: 0.732836797797031, iteration: 318089
loss: 1.0177608728408813,grad_norm: 0.8008073724571741, iteration: 318090
loss: 1.0142383575439453,grad_norm: 0.8929792696279195, iteration: 318091
loss: 0.954148530960083,grad_norm: 0.8160087139301264, iteration: 318092
loss: 1.0384269952774048,grad_norm: 0.8778565032707153, iteration: 318093
loss: 0.9920492768287659,grad_norm: 0.75748326123338, iteration: 318094
loss: 0.9712273478507996,grad_norm: 0.8545196915747787, iteration: 318095
loss: 1.0099124908447266,grad_norm: 0.7359450952076696, iteration: 318096
loss: 1.002655267715454,grad_norm: 0.737084560755439, iteration: 318097
loss: 0.9740648865699768,grad_norm: 0.716175100058129, iteration: 318098
loss: 0.9936127066612244,grad_norm: 0.8858188405563054, iteration: 318099
loss: 1.003843903541565,grad_norm: 0.8663265177526663, iteration: 318100
loss: 1.0313177108764648,grad_norm: 0.8002115781118063, iteration: 318101
loss: 0.999186098575592,grad_norm: 0.8630021081737318, iteration: 318102
loss: 1.0104972124099731,grad_norm: 0.8261570929034623, iteration: 318103
loss: 1.0022988319396973,grad_norm: 0.7962658479278281, iteration: 318104
loss: 0.9912862777709961,grad_norm: 0.7243787262611017, iteration: 318105
loss: 1.0171645879745483,grad_norm: 0.7723370281553572, iteration: 318106
loss: 0.9976374506950378,grad_norm: 0.6618018554926215, iteration: 318107
loss: 1.007739543914795,grad_norm: 0.9999990325454926, iteration: 318108
loss: 1.0597147941589355,grad_norm: 0.999999067091612, iteration: 318109
loss: 1.0299805402755737,grad_norm: 0.9999998197091138, iteration: 318110
loss: 0.9970045685768127,grad_norm: 0.8662961109333821, iteration: 318111
loss: 0.9790367484092712,grad_norm: 0.8669413702698561, iteration: 318112
loss: 1.036972999572754,grad_norm: 0.7698228933001476, iteration: 318113
loss: 0.9977408647537231,grad_norm: 0.9386299891295079, iteration: 318114
loss: 1.0213534832000732,grad_norm: 0.9999993939508675, iteration: 318115
loss: 1.0007990598678589,grad_norm: 0.7106364018885523, iteration: 318116
loss: 1.0258581638336182,grad_norm: 0.7553744419332463, iteration: 318117
loss: 1.022897481918335,grad_norm: 0.872377133527132, iteration: 318118
loss: 1.010347604751587,grad_norm: 0.999999038688765, iteration: 318119
loss: 0.9994984865188599,grad_norm: 0.89200067943813, iteration: 318120
loss: 1.0301659107208252,grad_norm: 0.8764980757103902, iteration: 318121
loss: 1.0017507076263428,grad_norm: 0.6898887726769634, iteration: 318122
loss: 0.9873006343841553,grad_norm: 0.9001192647038825, iteration: 318123
loss: 1.0064266920089722,grad_norm: 0.9407986472672105, iteration: 318124
loss: 1.0194987058639526,grad_norm: 0.8915812974433581, iteration: 318125
loss: 1.003891110420227,grad_norm: 0.9693489549619414, iteration: 318126
loss: 1.0056110620498657,grad_norm: 0.9266237463316895, iteration: 318127
loss: 0.9869358539581299,grad_norm: 0.7914904630345017, iteration: 318128
loss: 1.00785493850708,grad_norm: 0.8017483214690702, iteration: 318129
loss: 0.9731813073158264,grad_norm: 0.8025629747232339, iteration: 318130
loss: 1.031546950340271,grad_norm: 0.7510393846609591, iteration: 318131
loss: 1.026158094406128,grad_norm: 0.8415197126365925, iteration: 318132
loss: 1.0717651844024658,grad_norm: 0.9999992251567665, iteration: 318133
loss: 0.9766327142715454,grad_norm: 0.9701351810172606, iteration: 318134
loss: 0.9950076341629028,grad_norm: 0.8947189092026083, iteration: 318135
loss: 1.1312137842178345,grad_norm: 0.9999991488722386, iteration: 318136
loss: 1.0144398212432861,grad_norm: 0.7194599752848092, iteration: 318137
loss: 1.0045490264892578,grad_norm: 0.7902369432628378, iteration: 318138
loss: 0.95293128490448,grad_norm: 0.9043841120750254, iteration: 318139
loss: 0.9699028134346008,grad_norm: 0.8304382829089177, iteration: 318140
loss: 0.9513047933578491,grad_norm: 0.7479477598656703, iteration: 318141
loss: 0.9661115407943726,grad_norm: 0.9692306127468298, iteration: 318142
loss: 1.0457926988601685,grad_norm: 0.9999991923146518, iteration: 318143
loss: 0.9938675165176392,grad_norm: 0.9342609382208872, iteration: 318144
loss: 0.9805557727813721,grad_norm: 0.9999990531421998, iteration: 318145
loss: 0.98077791929245,grad_norm: 0.9577962294590004, iteration: 318146
loss: 1.077609658241272,grad_norm: 0.9999991194151965, iteration: 318147
loss: 0.9983452558517456,grad_norm: 0.9999991654480248, iteration: 318148
loss: 0.9818779826164246,grad_norm: 0.9999990777684822, iteration: 318149
loss: 0.9798951745033264,grad_norm: 0.8364481697646912, iteration: 318150
loss: 0.9500128030776978,grad_norm: 0.8853844252092069, iteration: 318151
loss: 0.9946374297142029,grad_norm: 0.8559743044723399, iteration: 318152
loss: 0.9706994891166687,grad_norm: 0.8102631205592271, iteration: 318153
loss: 1.0033304691314697,grad_norm: 0.8209311778659975, iteration: 318154
loss: 0.9755173325538635,grad_norm: 0.8830016095856659, iteration: 318155
loss: 0.9999593496322632,grad_norm: 0.7857091249588255, iteration: 318156
loss: 0.9864010214805603,grad_norm: 0.8021230937684846, iteration: 318157
loss: 1.0106216669082642,grad_norm: 0.8602880841963629, iteration: 318158
loss: 0.980207622051239,grad_norm: 0.7536849924092138, iteration: 318159
loss: 0.9598830342292786,grad_norm: 0.8848185884229133, iteration: 318160
loss: 0.9958423972129822,grad_norm: 0.8964226832663597, iteration: 318161
loss: 1.0390938520431519,grad_norm: 0.9999989221118312, iteration: 318162
loss: 1.0427192449569702,grad_norm: 0.9999995475985232, iteration: 318163
loss: 1.2287214994430542,grad_norm: 0.9999995536406777, iteration: 318164
loss: 1.0117722749710083,grad_norm: 0.8128797512642638, iteration: 318165
loss: 0.9846054315567017,grad_norm: 0.7254767701335049, iteration: 318166
loss: 1.0168617963790894,grad_norm: 0.9114611159874627, iteration: 318167
loss: 0.9673555493354797,grad_norm: 0.7966945363459286, iteration: 318168
loss: 1.0061641931533813,grad_norm: 0.7545421694674738, iteration: 318169
loss: 0.9823512434959412,grad_norm: 0.999999052245394, iteration: 318170
loss: 1.013314962387085,grad_norm: 0.7822162381633246, iteration: 318171
loss: 1.0356576442718506,grad_norm: 0.9999993589791246, iteration: 318172
loss: 0.9922770261764526,grad_norm: 0.7799107910556171, iteration: 318173
loss: 0.9848415851593018,grad_norm: 0.9106872273446385, iteration: 318174
loss: 0.9538019299507141,grad_norm: 0.9080404486411026, iteration: 318175
loss: 1.0272008180618286,grad_norm: 0.9999999421328142, iteration: 318176
loss: 0.98778235912323,grad_norm: 0.8370706944906944, iteration: 318177
loss: 0.9658220410346985,grad_norm: 0.814373730802928, iteration: 318178
loss: 0.9738847613334656,grad_norm: 0.8346504467663077, iteration: 318179
loss: 0.9867139458656311,grad_norm: 0.999999015464091, iteration: 318180
loss: 0.99601149559021,grad_norm: 0.8966799267327004, iteration: 318181
loss: 0.9912518262863159,grad_norm: 0.8478200213279277, iteration: 318182
loss: 1.0317693948745728,grad_norm: 0.8328360991198446, iteration: 318183
loss: 0.9606804847717285,grad_norm: 0.9999990562025258, iteration: 318184
loss: 1.0000805854797363,grad_norm: 0.8578436162288152, iteration: 318185
loss: 0.9749771356582642,grad_norm: 0.7538452787199764, iteration: 318186
loss: 1.0113704204559326,grad_norm: 0.8144425115320938, iteration: 318187
loss: 1.029604196548462,grad_norm: 0.7961469653314949, iteration: 318188
loss: 0.9967327117919922,grad_norm: 0.9999990522330451, iteration: 318189
loss: 0.9935745000839233,grad_norm: 0.9861947754391105, iteration: 318190
loss: 0.98699551820755,grad_norm: 0.841892961148381, iteration: 318191
loss: 1.0012892484664917,grad_norm: 0.8995372068940765, iteration: 318192
loss: 0.9577661156654358,grad_norm: 0.9185663941989229, iteration: 318193
loss: 1.0080550909042358,grad_norm: 0.9271334792401441, iteration: 318194
loss: 1.0399917364120483,grad_norm: 0.7944042017825593, iteration: 318195
loss: 1.0366381406784058,grad_norm: 0.7956706885979002, iteration: 318196
loss: 1.009678602218628,grad_norm: 0.9460215782793528, iteration: 318197
loss: 0.9867093563079834,grad_norm: 0.8197282022995941, iteration: 318198
loss: 0.988196611404419,grad_norm: 0.9999995653322853, iteration: 318199
loss: 0.9310200810432434,grad_norm: 0.7636154243623776, iteration: 318200
loss: 0.9927312135696411,grad_norm: 0.8539969339063882, iteration: 318201
loss: 0.9814682006835938,grad_norm: 0.8518541025649582, iteration: 318202
loss: 0.9942272901535034,grad_norm: 0.9687124535026369, iteration: 318203
loss: 0.9967893362045288,grad_norm: 0.6894679373173898, iteration: 318204
loss: 1.0887961387634277,grad_norm: 0.9863738694702693, iteration: 318205
loss: 0.9608003497123718,grad_norm: 0.8032654515606168, iteration: 318206
loss: 0.9979538917541504,grad_norm: 0.999999160941864, iteration: 318207
loss: 0.9704057574272156,grad_norm: 0.9999990091079698, iteration: 318208
loss: 0.9819077849388123,grad_norm: 0.9999991182825495, iteration: 318209
loss: 0.9860965013504028,grad_norm: 0.9999994064371296, iteration: 318210
loss: 0.9972705841064453,grad_norm: 0.9999990468464153, iteration: 318211
loss: 0.9970386028289795,grad_norm: 0.8300903287218001, iteration: 318212
loss: 0.964655339717865,grad_norm: 0.8867433088198141, iteration: 318213
loss: 0.9952797293663025,grad_norm: 0.8687954572350437, iteration: 318214
loss: 0.9763110876083374,grad_norm: 0.844545901819074, iteration: 318215
loss: 1.0286871194839478,grad_norm: 0.9999992164646034, iteration: 318216
loss: 0.9765421152114868,grad_norm: 0.9999998430383569, iteration: 318217
loss: 1.007365107536316,grad_norm: 0.7735422721375649, iteration: 318218
loss: 0.9645701050758362,grad_norm: 0.8263309799984049, iteration: 318219
loss: 1.0090773105621338,grad_norm: 0.8336549077124906, iteration: 318220
loss: 0.9671306610107422,grad_norm: 0.8010337207058116, iteration: 318221
loss: 1.1137337684631348,grad_norm: 0.9999991305551335, iteration: 318222
loss: 0.973893404006958,grad_norm: 0.9618029193274068, iteration: 318223
loss: 1.0128060579299927,grad_norm: 0.7927515042343265, iteration: 318224
loss: 1.0660744905471802,grad_norm: 0.9208116025595028, iteration: 318225
loss: 1.008804440498352,grad_norm: 0.898130748426453, iteration: 318226
loss: 1.027859091758728,grad_norm: 0.9517900045016603, iteration: 318227
loss: 0.9720101356506348,grad_norm: 0.8647357616461481, iteration: 318228
loss: 0.9833762049674988,grad_norm: 0.9999991082662482, iteration: 318229
loss: 1.0227521657943726,grad_norm: 0.8464504753066123, iteration: 318230
loss: 1.007675290107727,grad_norm: 0.7790738879525635, iteration: 318231
loss: 0.9937769770622253,grad_norm: 0.8172719372010351, iteration: 318232
loss: 1.0058034658432007,grad_norm: 0.7232343703970953, iteration: 318233
loss: 1.0552798509597778,grad_norm: 0.9999993055930538, iteration: 318234
loss: 1.002334713935852,grad_norm: 0.918911900026476, iteration: 318235
loss: 1.0665589570999146,grad_norm: 0.9999994081455884, iteration: 318236
loss: 0.9874098896980286,grad_norm: 0.6908242525683851, iteration: 318237
loss: 0.998113751411438,grad_norm: 0.9999997571628494, iteration: 318238
loss: 0.9789678454399109,grad_norm: 0.8766773095931902, iteration: 318239
loss: 1.041346788406372,grad_norm: 0.9999992443892383, iteration: 318240
loss: 1.0539674758911133,grad_norm: 0.7726699934546796, iteration: 318241
loss: 0.9736189246177673,grad_norm: 0.8555694453788616, iteration: 318242
loss: 1.00387442111969,grad_norm: 0.7005726152356606, iteration: 318243
loss: 1.021620750427246,grad_norm: 0.8732390217124377, iteration: 318244
loss: 0.9966906309127808,grad_norm: 0.8574286378849127, iteration: 318245
loss: 0.9677858948707581,grad_norm: 0.7317132592813551, iteration: 318246
loss: 1.038493275642395,grad_norm: 0.9999991284991708, iteration: 318247
loss: 0.9844177961349487,grad_norm: 0.6641728511192686, iteration: 318248
loss: 1.0221304893493652,grad_norm: 0.7557427193800299, iteration: 318249
loss: 1.0196337699890137,grad_norm: 0.8429639965266456, iteration: 318250
loss: 1.122084140777588,grad_norm: 0.9193986793961485, iteration: 318251
loss: 1.1019313335418701,grad_norm: 0.9999995580321441, iteration: 318252
loss: 0.9829134941101074,grad_norm: 0.8043757483871853, iteration: 318253
loss: 1.0268268585205078,grad_norm: 0.7625798921190188, iteration: 318254
loss: 1.0387558937072754,grad_norm: 0.8322856973918142, iteration: 318255
loss: 1.030755639076233,grad_norm: 0.9346668391141283, iteration: 318256
loss: 0.9673628211021423,grad_norm: 0.8791835163294113, iteration: 318257
loss: 1.0888890027999878,grad_norm: 0.9999994871125313, iteration: 318258
loss: 1.0489039421081543,grad_norm: 0.8391169304910115, iteration: 318259
loss: 0.9977741837501526,grad_norm: 0.865777837665564, iteration: 318260
loss: 0.975855827331543,grad_norm: 0.8856499689631776, iteration: 318261
loss: 1.0158147811889648,grad_norm: 0.8387908234646045, iteration: 318262
loss: 1.021502137184143,grad_norm: 0.7889747041018423, iteration: 318263
loss: 1.0623226165771484,grad_norm: 0.999999152372093, iteration: 318264
loss: 0.9933570027351379,grad_norm: 0.8591628055375765, iteration: 318265
loss: 0.951391875743866,grad_norm: 0.774176364404263, iteration: 318266
loss: 0.9819349646568298,grad_norm: 0.9404271405486183, iteration: 318267
loss: 1.0025506019592285,grad_norm: 0.7235578562138697, iteration: 318268
loss: 1.0061761140823364,grad_norm: 0.9455677665782526, iteration: 318269
loss: 1.0001919269561768,grad_norm: 0.6545239286086401, iteration: 318270
loss: 1.0290249586105347,grad_norm: 0.9999998602785537, iteration: 318271
loss: 1.0255498886108398,grad_norm: 0.8940555380032917, iteration: 318272
loss: 1.019318699836731,grad_norm: 0.9266162817777888, iteration: 318273
loss: 0.9778527021408081,grad_norm: 0.9730812252884651, iteration: 318274
loss: 1.00438392162323,grad_norm: 0.8906526141352165, iteration: 318275
loss: 0.982169508934021,grad_norm: 0.9999990156856352, iteration: 318276
loss: 1.0464075803756714,grad_norm: 0.7528931248557137, iteration: 318277
loss: 0.9675509333610535,grad_norm: 0.9327856147141173, iteration: 318278
loss: 1.1336697340011597,grad_norm: 0.9999992240036985, iteration: 318279
loss: 0.9993681907653809,grad_norm: 0.9788189189304107, iteration: 318280
loss: 1.042946219444275,grad_norm: 0.741650497881112, iteration: 318281
loss: 0.9779239892959595,grad_norm: 0.7941504190658171, iteration: 318282
loss: 0.9889428615570068,grad_norm: 0.7163303728331455, iteration: 318283
loss: 0.9884594678878784,grad_norm: 0.8713118162050222, iteration: 318284
loss: 0.9782108068466187,grad_norm: 0.9274819810765004, iteration: 318285
loss: 0.987628698348999,grad_norm: 0.8109518035902471, iteration: 318286
loss: 0.9861652255058289,grad_norm: 0.8442960597406968, iteration: 318287
loss: 0.9801855087280273,grad_norm: 0.8688931186716815, iteration: 318288
loss: 1.0166795253753662,grad_norm: 0.9999991627032153, iteration: 318289
loss: 1.055762767791748,grad_norm: 0.7443749858181746, iteration: 318290
loss: 0.9677788615226746,grad_norm: 0.7595789770033691, iteration: 318291
loss: 0.9595385193824768,grad_norm: 0.9607198211327242, iteration: 318292
loss: 1.0033270120620728,grad_norm: 0.7418905340223786, iteration: 318293
loss: 0.9870993494987488,grad_norm: 0.7382679696841498, iteration: 318294
loss: 0.9797281622886658,grad_norm: 0.7217863599891355, iteration: 318295
loss: 0.9955765008926392,grad_norm: 0.7778410865512262, iteration: 318296
loss: 0.9666853547096252,grad_norm: 0.9999990199875954, iteration: 318297
loss: 0.9881026148796082,grad_norm: 0.8297560947241697, iteration: 318298
loss: 1.0229665040969849,grad_norm: 0.9999990486826883, iteration: 318299
loss: 1.0076721906661987,grad_norm: 0.6770988664401607, iteration: 318300
loss: 1.014425277709961,grad_norm: 0.9999999725526404, iteration: 318301
loss: 0.9438920617103577,grad_norm: 0.7881489990222442, iteration: 318302
loss: 0.9751787185668945,grad_norm: 0.9403619331413596, iteration: 318303
loss: 0.9628893733024597,grad_norm: 0.7266621307470473, iteration: 318304
loss: 1.036664605140686,grad_norm: 0.9175202322641439, iteration: 318305
loss: 1.0491516590118408,grad_norm: 0.8748425225059338, iteration: 318306
loss: 0.9940600991249084,grad_norm: 0.7578052975001691, iteration: 318307
loss: 1.0077537298202515,grad_norm: 0.8850416789336815, iteration: 318308
loss: 0.9889968633651733,grad_norm: 0.7237852243965922, iteration: 318309
loss: 0.9756227731704712,grad_norm: 0.7476959861752646, iteration: 318310
loss: 1.0004101991653442,grad_norm: 0.8498220461759247, iteration: 318311
loss: 1.0093885660171509,grad_norm: 0.8025720276335279, iteration: 318312
loss: 0.9764256477355957,grad_norm: 0.9653093996018633, iteration: 318313
loss: 0.9685248732566833,grad_norm: 0.907048252208698, iteration: 318314
loss: 1.042595386505127,grad_norm: 0.9999993380355713, iteration: 318315
loss: 1.1081947088241577,grad_norm: 0.9999997186779437, iteration: 318316
loss: 1.2351149320602417,grad_norm: 0.9999997998642634, iteration: 318317
loss: 1.029221534729004,grad_norm: 0.8990354767815999, iteration: 318318
loss: 1.1098484992980957,grad_norm: 0.9999997311789575, iteration: 318319
loss: 0.9857762455940247,grad_norm: 0.7565172003969732, iteration: 318320
loss: 0.9760838747024536,grad_norm: 0.9582802178963001, iteration: 318321
loss: 0.9383484125137329,grad_norm: 0.6533906032762841, iteration: 318322
loss: 1.0256242752075195,grad_norm: 0.7907709667006562, iteration: 318323
loss: 1.003199577331543,grad_norm: 0.9999990610459544, iteration: 318324
loss: 1.1192995309829712,grad_norm: 0.9999996638890537, iteration: 318325
loss: 1.0248008966445923,grad_norm: 0.8760146818719919, iteration: 318326
loss: 1.2756179571151733,grad_norm: 0.9999998850697263, iteration: 318327
loss: 1.0151209831237793,grad_norm: 0.8971717189660761, iteration: 318328
loss: 1.2469391822814941,grad_norm: 0.9999995171252976, iteration: 318329
loss: 1.0065537691116333,grad_norm: 0.8367835678973974, iteration: 318330
loss: 1.242179036140442,grad_norm: 0.9999999050165379, iteration: 318331
loss: 1.0306308269500732,grad_norm: 0.8149394701945013, iteration: 318332
loss: 1.0219815969467163,grad_norm: 0.9999993566878593, iteration: 318333
loss: 1.0486897230148315,grad_norm: 0.8593141054583626, iteration: 318334
loss: 1.270250678062439,grad_norm: 0.9999995944181191, iteration: 318335
loss: 1.0592478513717651,grad_norm: 0.894442130467096, iteration: 318336
loss: 1.2236982583999634,grad_norm: 0.9999995755133833, iteration: 318337
loss: 1.0744836330413818,grad_norm: 0.9999996025125568, iteration: 318338
loss: 1.090971827507019,grad_norm: 0.9999998892397047, iteration: 318339
loss: 1.0698063373565674,grad_norm: 0.9999995164573184, iteration: 318340
loss: 0.9980103373527527,grad_norm: 0.8499460731696189, iteration: 318341
loss: 1.0094099044799805,grad_norm: 0.9055562634451547, iteration: 318342
loss: 1.0220352411270142,grad_norm: 0.9999992222957473, iteration: 318343
loss: 1.0907567739486694,grad_norm: 0.9999998977950386, iteration: 318344
loss: 1.0952988862991333,grad_norm: 0.9999997857322162, iteration: 318345
loss: 1.1431461572647095,grad_norm: 0.9999997052962059, iteration: 318346
loss: 1.0771392583847046,grad_norm: 0.9999999318660995, iteration: 318347
loss: 1.0120518207550049,grad_norm: 0.887760026986548, iteration: 318348
loss: 1.0113468170166016,grad_norm: 0.8367230018707619, iteration: 318349
loss: 1.1309685707092285,grad_norm: 0.9999998496219388, iteration: 318350
loss: 1.0411832332611084,grad_norm: 0.9999995811931892, iteration: 318351
loss: 1.0289236307144165,grad_norm: 0.7300757447266697, iteration: 318352
loss: 1.190118432044983,grad_norm: 0.9999993925746617, iteration: 318353
loss: 1.004713535308838,grad_norm: 0.9999990826251088, iteration: 318354
loss: 0.9905956387519836,grad_norm: 0.7270910448353124, iteration: 318355
loss: 1.0055999755859375,grad_norm: 0.9447993603067347, iteration: 318356
loss: 0.9978806972503662,grad_norm: 0.7813393653768574, iteration: 318357
loss: 1.0278376340866089,grad_norm: 0.9535054394218007, iteration: 318358
loss: 0.955725908279419,grad_norm: 0.9999993179763418, iteration: 318359
loss: 1.026577115058899,grad_norm: 0.9243943737347198, iteration: 318360
loss: 1.2145800590515137,grad_norm: 0.9999995483732687, iteration: 318361
loss: 1.0282084941864014,grad_norm: 0.7389155605433354, iteration: 318362
loss: 1.0210661888122559,grad_norm: 0.9999991186703502, iteration: 318363
loss: 1.0291956663131714,grad_norm: 0.9999992088995122, iteration: 318364
loss: 0.9949097037315369,grad_norm: 0.9354598595901203, iteration: 318365
loss: 1.0804286003112793,grad_norm: 0.9999996335467706, iteration: 318366
loss: 1.0082811117172241,grad_norm: 0.9999992005517819, iteration: 318367
loss: 0.9717474579811096,grad_norm: 0.8079727692235544, iteration: 318368
loss: 1.0294429063796997,grad_norm: 0.999999280470254, iteration: 318369
loss: 1.0942761898040771,grad_norm: 0.9999999484196496, iteration: 318370
loss: 1.08027982711792,grad_norm: 0.9999992173653667, iteration: 318371
loss: 1.017190933227539,grad_norm: 0.7865381874743779, iteration: 318372
loss: 1.012291669845581,grad_norm: 0.7144257286244182, iteration: 318373
loss: 0.9849861860275269,grad_norm: 0.8922505728341894, iteration: 318374
loss: 1.1505401134490967,grad_norm: 0.9217965672070644, iteration: 318375
loss: 1.0352652072906494,grad_norm: 0.9999996963391216, iteration: 318376
loss: 1.0048301219940186,grad_norm: 0.9212526462573659, iteration: 318377
loss: 1.0296224355697632,grad_norm: 0.9071690967829182, iteration: 318378
loss: 0.9914408326148987,grad_norm: 0.7223543627631043, iteration: 318379
loss: 1.0032976865768433,grad_norm: 0.9999992271387036, iteration: 318380
loss: 1.0246775150299072,grad_norm: 0.8220646119935483, iteration: 318381
loss: 0.9390805959701538,grad_norm: 0.9344476336574685, iteration: 318382
loss: 1.2917817831039429,grad_norm: 0.9999992688986338, iteration: 318383
loss: 1.0212253332138062,grad_norm: 0.999999188902707, iteration: 318384
loss: 0.9989834427833557,grad_norm: 0.9457836491151836, iteration: 318385
loss: 0.9740728735923767,grad_norm: 0.8278797281015512, iteration: 318386
loss: 1.0248351097106934,grad_norm: 0.9311251692500383, iteration: 318387
loss: 1.0031052827835083,grad_norm: 0.8948034364920388, iteration: 318388
loss: 0.9921508431434631,grad_norm: 0.7991027511274683, iteration: 318389
loss: 1.0486397743225098,grad_norm: 0.999999915121685, iteration: 318390
loss: 1.0011670589447021,grad_norm: 0.8251342986153761, iteration: 318391
loss: 1.179255485534668,grad_norm: 0.9999993202990416, iteration: 318392
loss: 0.9846693277359009,grad_norm: 0.8283924893018445, iteration: 318393
loss: 1.107602596282959,grad_norm: 0.999999047516834, iteration: 318394
loss: 0.9415826797485352,grad_norm: 0.7057213254623782, iteration: 318395
loss: 1.2180051803588867,grad_norm: 0.9999991349144621, iteration: 318396
loss: 1.0181769132614136,grad_norm: 0.9301452715816088, iteration: 318397
loss: 1.0216153860092163,grad_norm: 0.8564346373883089, iteration: 318398
loss: 1.0096824169158936,grad_norm: 0.9999990876358547, iteration: 318399
loss: 1.03957998752594,grad_norm: 0.9999991203748387, iteration: 318400
loss: 0.9875457882881165,grad_norm: 0.8335272504898852, iteration: 318401
loss: 1.1261800527572632,grad_norm: 0.9999990803566898, iteration: 318402
loss: 1.0009703636169434,grad_norm: 0.8403296659062744, iteration: 318403
loss: 1.208377480506897,grad_norm: 0.9999995598827405, iteration: 318404
loss: 0.9939579367637634,grad_norm: 0.8381844646781065, iteration: 318405
loss: 1.022405743598938,grad_norm: 0.9999990892732282, iteration: 318406
loss: 1.0033067464828491,grad_norm: 0.7458186661751994, iteration: 318407
loss: 0.9729463458061218,grad_norm: 0.8072522368495737, iteration: 318408
loss: 1.013360619544983,grad_norm: 0.8910326688418417, iteration: 318409
loss: 1.1003117561340332,grad_norm: 0.9999998983754241, iteration: 318410
loss: 1.0568161010742188,grad_norm: 0.999999597916523, iteration: 318411
loss: 0.9954020977020264,grad_norm: 0.9716180919068688, iteration: 318412
loss: 0.9895205497741699,grad_norm: 0.8531022904611505, iteration: 318413
loss: 0.9719881415367126,grad_norm: 0.9999992616479436, iteration: 318414
loss: 1.0607916116714478,grad_norm: 0.9999994211943255, iteration: 318415
loss: 0.9592291712760925,grad_norm: 0.999998968636803, iteration: 318416
loss: 1.0661534070968628,grad_norm: 0.9999998523791047, iteration: 318417
loss: 1.0222485065460205,grad_norm: 0.999999155709667, iteration: 318418
loss: 0.9731989502906799,grad_norm: 0.7442777250736665, iteration: 318419
loss: 1.0129296779632568,grad_norm: 0.8502732788821077, iteration: 318420
loss: 1.0655101537704468,grad_norm: 0.9999994534134472, iteration: 318421
loss: 1.1271463632583618,grad_norm: 0.9999992474527168, iteration: 318422
loss: 1.1007328033447266,grad_norm: 0.9999999695255283, iteration: 318423
loss: 1.003941535949707,grad_norm: 0.9999990084549978, iteration: 318424
loss: 1.0209838151931763,grad_norm: 0.9999991086122819, iteration: 318425
loss: 1.0222599506378174,grad_norm: 0.9999996406570983, iteration: 318426
loss: 0.9728546738624573,grad_norm: 0.9999992531736706, iteration: 318427
loss: 1.0737416744232178,grad_norm: 0.9999998858932249, iteration: 318428
loss: 1.0107238292694092,grad_norm: 0.9999992636317167, iteration: 318429
loss: 0.9786070585250854,grad_norm: 0.8645761969324025, iteration: 318430
loss: 1.2377185821533203,grad_norm: 0.9999993648461708, iteration: 318431
loss: 0.9871101975440979,grad_norm: 0.7165763928384605, iteration: 318432
loss: 1.0189480781555176,grad_norm: 0.9969960400912808, iteration: 318433
loss: 1.040907382965088,grad_norm: 0.8442505658491839, iteration: 318434
loss: 0.9983119368553162,grad_norm: 0.8646838914159121, iteration: 318435
loss: 1.0507025718688965,grad_norm: 0.9999992400549027, iteration: 318436
loss: 1.0744937658309937,grad_norm: 0.9999999219608924, iteration: 318437
loss: 1.021702527999878,grad_norm: 0.8755220865511016, iteration: 318438
loss: 1.078591227531433,grad_norm: 0.9999997884665317, iteration: 318439
loss: 1.0306353569030762,grad_norm: 0.9998839365570288, iteration: 318440
loss: 0.9822024703025818,grad_norm: 0.8047123181474781, iteration: 318441
loss: 1.048784852027893,grad_norm: 0.8297949403510606, iteration: 318442
loss: 0.9597083926200867,grad_norm: 0.7071543802257483, iteration: 318443
loss: 1.0094066858291626,grad_norm: 0.9999995756458272, iteration: 318444
loss: 1.0421929359436035,grad_norm: 0.9999995537367166, iteration: 318445
loss: 1.09768807888031,grad_norm: 0.9999993774645339, iteration: 318446
loss: 1.1019848585128784,grad_norm: 0.9999998017705343, iteration: 318447
loss: 1.0302454233169556,grad_norm: 0.8481903662897435, iteration: 318448
loss: 1.1466090679168701,grad_norm: 0.9999993741068453, iteration: 318449
loss: 1.0409836769104004,grad_norm: 0.999999237716756, iteration: 318450
loss: 0.9998987317085266,grad_norm: 0.8566502743620196, iteration: 318451
loss: 1.0475832223892212,grad_norm: 0.9183980189491111, iteration: 318452
loss: 1.0336281061172485,grad_norm: 0.9999993028380423, iteration: 318453
loss: 1.0119307041168213,grad_norm: 0.9209884980639993, iteration: 318454
loss: 1.0220673084259033,grad_norm: 0.9999998786712443, iteration: 318455
loss: 0.9856570363044739,grad_norm: 0.7040065021717286, iteration: 318456
loss: 1.0319788455963135,grad_norm: 0.9999998894187341, iteration: 318457
loss: 0.9931759238243103,grad_norm: 0.8089060651762734, iteration: 318458
loss: 0.9854269623756409,grad_norm: 0.8256382451465063, iteration: 318459
loss: 1.0362662076950073,grad_norm: 0.7862316250356198, iteration: 318460
loss: 0.9631577134132385,grad_norm: 0.8013147000991049, iteration: 318461
loss: 1.2603576183319092,grad_norm: 0.999999671771482, iteration: 318462
loss: 1.0270674228668213,grad_norm: 0.9196211896250897, iteration: 318463
loss: 1.011197566986084,grad_norm: 0.8812950318413528, iteration: 318464
loss: 1.110111951828003,grad_norm: 0.9216169079913489, iteration: 318465
loss: 0.9727699756622314,grad_norm: 0.7786907704991266, iteration: 318466
loss: 1.0346070528030396,grad_norm: 0.999999454559578, iteration: 318467
loss: 1.031023383140564,grad_norm: 0.9999990682172754, iteration: 318468
loss: 0.9857053160667419,grad_norm: 0.7542032397222589, iteration: 318469
loss: 1.0408459901809692,grad_norm: 0.9999992051042725, iteration: 318470
loss: 1.0319676399230957,grad_norm: 0.999999451117025, iteration: 318471
loss: 1.0491127967834473,grad_norm: 0.9591976382312345, iteration: 318472
loss: 1.0125112533569336,grad_norm: 0.9999997503678861, iteration: 318473
loss: 0.9830597043037415,grad_norm: 0.7948341392004137, iteration: 318474
loss: 1.0349949598312378,grad_norm: 0.9999991913053418, iteration: 318475
loss: 1.0204063653945923,grad_norm: 0.9154901662319057, iteration: 318476
loss: 1.013018012046814,grad_norm: 0.9016066240490191, iteration: 318477
loss: 1.2072691917419434,grad_norm: 0.9999999670589433, iteration: 318478
loss: 1.0524455308914185,grad_norm: 0.9999993620063595, iteration: 318479
loss: 1.0193512439727783,grad_norm: 0.75023218501304, iteration: 318480
loss: 1.0028036832809448,grad_norm: 0.8032966221881023, iteration: 318481
loss: 0.9648833274841309,grad_norm: 0.8409861957256375, iteration: 318482
loss: 0.9719861745834351,grad_norm: 0.8786058488188383, iteration: 318483
loss: 1.1073057651519775,grad_norm: 0.9999998627462603, iteration: 318484
loss: 0.9513871669769287,grad_norm: 0.9999993077756353, iteration: 318485
loss: 1.0026280879974365,grad_norm: 0.8446700438877212, iteration: 318486
loss: 1.003787875175476,grad_norm: 0.8275056882513235, iteration: 318487
loss: 1.0097066164016724,grad_norm: 0.8959508065721309, iteration: 318488
loss: 1.0122524499893188,grad_norm: 0.9241534488944253, iteration: 318489
loss: 1.0118895769119263,grad_norm: 0.74981743590431, iteration: 318490
loss: 0.9986150860786438,grad_norm: 0.8350780827752823, iteration: 318491
loss: 1.0502736568450928,grad_norm: 0.9999990217831323, iteration: 318492
loss: 1.001849889755249,grad_norm: 0.7224490314563368, iteration: 318493
loss: 1.0115922689437866,grad_norm: 0.7863040711594261, iteration: 318494
loss: 1.0285277366638184,grad_norm: 0.999999176201394, iteration: 318495
loss: 0.9772425889968872,grad_norm: 0.9418441644834211, iteration: 318496
loss: 1.0052266120910645,grad_norm: 0.8193931295197496, iteration: 318497
loss: 1.0148918628692627,grad_norm: 0.8294833597365797, iteration: 318498
loss: 0.9944791197776794,grad_norm: 0.971397916597711, iteration: 318499
loss: 1.1823440790176392,grad_norm: 0.9999993366706387, iteration: 318500
loss: 1.0450096130371094,grad_norm: 0.9999997516044955, iteration: 318501
loss: 0.9783397912979126,grad_norm: 0.9587621711934935, iteration: 318502
loss: 0.999123215675354,grad_norm: 0.9167575181305175, iteration: 318503
loss: 1.0215506553649902,grad_norm: 0.887319804130352, iteration: 318504
loss: 0.980491042137146,grad_norm: 0.8655207885395358, iteration: 318505
loss: 1.0096651315689087,grad_norm: 0.794501692320322, iteration: 318506
loss: 0.9859481453895569,grad_norm: 0.7642809535172711, iteration: 318507
loss: 1.0599294900894165,grad_norm: 0.9999997935509379, iteration: 318508
loss: 0.9817785024642944,grad_norm: 0.9035637873944675, iteration: 318509
loss: 1.001328706741333,grad_norm: 0.8776623117850751, iteration: 318510
loss: 0.9767349362373352,grad_norm: 0.7505372320240639, iteration: 318511
loss: 0.9994720816612244,grad_norm: 0.9031973467129046, iteration: 318512
loss: 0.9708915948867798,grad_norm: 0.8259557457560165, iteration: 318513
loss: 1.1226555109024048,grad_norm: 0.999999769669974, iteration: 318514
loss: 1.0772113800048828,grad_norm: 0.9999996106849255, iteration: 318515
loss: 1.0606051683425903,grad_norm: 0.9999993857318772, iteration: 318516
loss: 0.9718651175498962,grad_norm: 0.9999993845300961, iteration: 318517
loss: 1.0134990215301514,grad_norm: 0.9999995791323462, iteration: 318518
loss: 0.9798819422721863,grad_norm: 0.9999994135621588, iteration: 318519
loss: 0.9668835997581482,grad_norm: 0.9999991350171046, iteration: 318520
loss: 1.0121294260025024,grad_norm: 0.9999998730709242, iteration: 318521
loss: 0.9947401285171509,grad_norm: 0.783246281181117, iteration: 318522
loss: 0.9644954800605774,grad_norm: 0.8461918550105199, iteration: 318523
loss: 0.9870677590370178,grad_norm: 0.8069539399041405, iteration: 318524
loss: 1.0249409675598145,grad_norm: 0.9291044652465198, iteration: 318525
loss: 0.9935086369514465,grad_norm: 0.9999991928798231, iteration: 318526
loss: 1.1118242740631104,grad_norm: 0.9999993369917247, iteration: 318527
loss: 0.9979037642478943,grad_norm: 0.9999995649192202, iteration: 318528
loss: 1.011118769645691,grad_norm: 0.7242712887712914, iteration: 318529
loss: 1.0119014978408813,grad_norm: 0.7412405363693786, iteration: 318530
loss: 0.9884609580039978,grad_norm: 0.7638986008007923, iteration: 318531
loss: 0.9761538505554199,grad_norm: 0.9060478787007502, iteration: 318532
loss: 1.0025110244750977,grad_norm: 0.8792321399313205, iteration: 318533
loss: 1.0116881132125854,grad_norm: 0.843527764975279, iteration: 318534
loss: 0.9962741732597351,grad_norm: 0.7046335608108313, iteration: 318535
loss: 1.0023983716964722,grad_norm: 0.8419848189205067, iteration: 318536
loss: 1.0265589952468872,grad_norm: 0.9999990236661017, iteration: 318537
loss: 1.0023884773254395,grad_norm: 0.912638095687512, iteration: 318538
loss: 1.0046480894088745,grad_norm: 0.8299619777432072, iteration: 318539
loss: 1.0022777318954468,grad_norm: 0.9999997539307731, iteration: 318540
loss: 0.9841431379318237,grad_norm: 0.7541075895359111, iteration: 318541
loss: 0.9815413951873779,grad_norm: 0.7429750881306165, iteration: 318542
loss: 0.9745839238166809,grad_norm: 0.972464872943805, iteration: 318543
loss: 1.1071739196777344,grad_norm: 0.8379475018021132, iteration: 318544
loss: 1.0126622915267944,grad_norm: 0.9272991143788062, iteration: 318545
loss: 1.0400043725967407,grad_norm: 0.8771764283109171, iteration: 318546
loss: 1.0541348457336426,grad_norm: 0.9999998375513809, iteration: 318547
loss: 0.9979119300842285,grad_norm: 0.9385698955351731, iteration: 318548
loss: 1.0040168762207031,grad_norm: 0.7731789380614511, iteration: 318549
loss: 0.9574671387672424,grad_norm: 0.7240132804058862, iteration: 318550
loss: 0.9688498377799988,grad_norm: 0.9638148168847128, iteration: 318551
loss: 1.0065038204193115,grad_norm: 0.935026567147785, iteration: 318552
loss: 0.9930658340454102,grad_norm: 0.9909067920101656, iteration: 318553
loss: 1.1888197660446167,grad_norm: 1.0000000318145499, iteration: 318554
loss: 0.9806739687919617,grad_norm: 0.9999991297239933, iteration: 318555
loss: 1.111143708229065,grad_norm: 0.9999991737059655, iteration: 318556
loss: 1.009368658065796,grad_norm: 0.9647454690973757, iteration: 318557
loss: 1.0121300220489502,grad_norm: 0.8123473329404899, iteration: 318558
loss: 0.9772425293922424,grad_norm: 0.9485845552146712, iteration: 318559
loss: 0.9957693219184875,grad_norm: 0.9682845551007772, iteration: 318560
loss: 1.031543493270874,grad_norm: 0.9999992333828397, iteration: 318561
loss: 1.0652282238006592,grad_norm: 0.9999996969079096, iteration: 318562
loss: 1.006488561630249,grad_norm: 0.841383021350155, iteration: 318563
loss: 1.027536392211914,grad_norm: 0.8331210676413341, iteration: 318564
loss: 1.0061899423599243,grad_norm: 0.8872480031967807, iteration: 318565
loss: 0.9597934484481812,grad_norm: 0.7936996718004994, iteration: 318566
loss: 0.9593340158462524,grad_norm: 0.8087818026182175, iteration: 318567
loss: 0.9818515181541443,grad_norm: 0.8182769576146985, iteration: 318568
loss: 0.9946930408477783,grad_norm: 0.830554705855824, iteration: 318569
loss: 0.9830251932144165,grad_norm: 0.8699617079143398, iteration: 318570
loss: 1.014112114906311,grad_norm: 0.8070475999554563, iteration: 318571
loss: 1.1033352613449097,grad_norm: 0.9999996029964584, iteration: 318572
loss: 0.9991520643234253,grad_norm: 0.7884876066761186, iteration: 318573
loss: 1.0272305011749268,grad_norm: 0.8323125976483017, iteration: 318574
loss: 1.0012800693511963,grad_norm: 0.8469895917422733, iteration: 318575
loss: 0.9784227609634399,grad_norm: 0.817825054518509, iteration: 318576
loss: 1.0028555393218994,grad_norm: 0.9343915253746469, iteration: 318577
loss: 0.9693296551704407,grad_norm: 0.8874123059139973, iteration: 318578
loss: 0.9878409504890442,grad_norm: 0.9999990643806721, iteration: 318579
loss: 1.0114054679870605,grad_norm: 0.9977218000717025, iteration: 318580
loss: 0.9893675446510315,grad_norm: 0.7592405460198524, iteration: 318581
loss: 0.9745524525642395,grad_norm: 0.9999991191196055, iteration: 318582
loss: 1.005226969718933,grad_norm: 0.8318205178480678, iteration: 318583
loss: 1.0167289972305298,grad_norm: 0.9202267186355524, iteration: 318584
loss: 1.012017011642456,grad_norm: 0.9739101894611414, iteration: 318585
loss: 1.011128306388855,grad_norm: 0.8653252974425265, iteration: 318586
loss: 1.0043277740478516,grad_norm: 0.7325200370040521, iteration: 318587
loss: 0.985321044921875,grad_norm: 0.797629829237923, iteration: 318588
loss: 1.0417104959487915,grad_norm: 0.999999843330067, iteration: 318589
loss: 1.035495638847351,grad_norm: 0.9999990971946867, iteration: 318590
loss: 0.9971482753753662,grad_norm: 0.896763071164327, iteration: 318591
loss: 1.1294937133789062,grad_norm: 0.9999996313988154, iteration: 318592
loss: 1.0665159225463867,grad_norm: 0.9145162879068802, iteration: 318593
loss: 1.0833442211151123,grad_norm: 0.9999990978325608, iteration: 318594
loss: 1.0162688493728638,grad_norm: 0.9755637106010434, iteration: 318595
loss: 1.0123482942581177,grad_norm: 0.6885586019618366, iteration: 318596
loss: 0.9995861649513245,grad_norm: 0.8710313625741428, iteration: 318597
loss: 0.9839158058166504,grad_norm: 0.8501096850082202, iteration: 318598
loss: 1.1194074153900146,grad_norm: 0.9999993552462484, iteration: 318599
loss: 1.0186113119125366,grad_norm: 0.7056033460544773, iteration: 318600
loss: 0.9821129441261292,grad_norm: 0.8757959802764306, iteration: 318601
loss: 1.0571935176849365,grad_norm: 0.7766514044557313, iteration: 318602
loss: 0.9800927639007568,grad_norm: 0.9507405463317322, iteration: 318603
loss: 0.9870668649673462,grad_norm: 0.8175079415947039, iteration: 318604
loss: 0.9927645325660706,grad_norm: 0.7829525765380746, iteration: 318605
loss: 0.9823277592658997,grad_norm: 0.8604318152311432, iteration: 318606
loss: 1.0116662979125977,grad_norm: 0.9999991225796466, iteration: 318607
loss: 1.0181758403778076,grad_norm: 0.881003993160478, iteration: 318608
loss: 1.0111039876937866,grad_norm: 0.9999994711517298, iteration: 318609
loss: 0.9883630275726318,grad_norm: 0.8033793713431383, iteration: 318610
loss: 1.0193730592727661,grad_norm: 0.7395309102743465, iteration: 318611
loss: 1.001153588294983,grad_norm: 0.9999991060580761, iteration: 318612
loss: 1.0867600440979004,grad_norm: 0.9999991911337843, iteration: 318613
loss: 0.9959936738014221,grad_norm: 0.9999992266556932, iteration: 318614
loss: 1.0143513679504395,grad_norm: 0.9836351626161743, iteration: 318615
loss: 0.9885491132736206,grad_norm: 0.9050230043986186, iteration: 318616
loss: 0.9760544300079346,grad_norm: 0.7993519951825465, iteration: 318617
loss: 1.0142215490341187,grad_norm: 0.8919337883769022, iteration: 318618
loss: 0.9704002141952515,grad_norm: 0.9363981206354426, iteration: 318619
loss: 1.035090684890747,grad_norm: 0.9405299650703839, iteration: 318620
loss: 1.0191733837127686,grad_norm: 0.8923275526240252, iteration: 318621
loss: 0.9926791191101074,grad_norm: 0.8424779768892489, iteration: 318622
loss: 1.0011450052261353,grad_norm: 0.8706153222464837, iteration: 318623
loss: 1.0097094774246216,grad_norm: 0.9571655672515063, iteration: 318624
loss: 1.0247751474380493,grad_norm: 0.9744903917068121, iteration: 318625
loss: 0.9801231622695923,grad_norm: 0.7652360771268353, iteration: 318626
loss: 0.9956910014152527,grad_norm: 0.8805851307289447, iteration: 318627
loss: 0.9930221438407898,grad_norm: 0.8456579396646684, iteration: 318628
loss: 0.9857454895973206,grad_norm: 0.6387072992778962, iteration: 318629
loss: 0.9922239780426025,grad_norm: 0.8899830260657691, iteration: 318630
loss: 1.013006329536438,grad_norm: 0.8878301388211356, iteration: 318631
loss: 1.0576977729797363,grad_norm: 0.6760742831103324, iteration: 318632
loss: 0.9733154773712158,grad_norm: 0.8952933822866702, iteration: 318633
loss: 1.0041072368621826,grad_norm: 0.7112850568128083, iteration: 318634
loss: 1.015487551689148,grad_norm: 0.8137439211227495, iteration: 318635
loss: 0.9710281491279602,grad_norm: 0.848749136232337, iteration: 318636
loss: 1.0177595615386963,grad_norm: 0.8403899847333623, iteration: 318637
loss: 0.9770041108131409,grad_norm: 0.9166038330892327, iteration: 318638
loss: 1.0240113735198975,grad_norm: 0.8378376241096717, iteration: 318639
loss: 1.0536192655563354,grad_norm: 0.999999081782744, iteration: 318640
loss: 0.9807137846946716,grad_norm: 0.9063275972046342, iteration: 318641
loss: 1.0070722103118896,grad_norm: 0.8315739867821094, iteration: 318642
loss: 0.996259331703186,grad_norm: 0.6684258515736022, iteration: 318643
loss: 0.990731418132782,grad_norm: 0.7129818640423602, iteration: 318644
loss: 0.9724044799804688,grad_norm: 0.7702089032034227, iteration: 318645
loss: 0.9991529583930969,grad_norm: 0.9867119953850672, iteration: 318646
loss: 0.9773181676864624,grad_norm: 0.8685756555679842, iteration: 318647
loss: 1.0201191902160645,grad_norm: 0.8988111886061985, iteration: 318648
loss: 1.0020453929901123,grad_norm: 0.9141586148675456, iteration: 318649
loss: 1.018803596496582,grad_norm: 0.8208585744730447, iteration: 318650
loss: 1.0056449174880981,grad_norm: 0.9999991976594195, iteration: 318651
loss: 0.9579068422317505,grad_norm: 0.7992043557684386, iteration: 318652
loss: 0.9688103199005127,grad_norm: 0.9999994254734946, iteration: 318653
loss: 1.0091910362243652,grad_norm: 0.9999999448455831, iteration: 318654
loss: 1.0130335092544556,grad_norm: 0.9687595480679357, iteration: 318655
loss: 0.9970724582672119,grad_norm: 0.9999992529671569, iteration: 318656
loss: 1.0229302644729614,grad_norm: 0.7301155459046967, iteration: 318657
loss: 0.9421287178993225,grad_norm: 0.8479574484045621, iteration: 318658
loss: 0.9835643768310547,grad_norm: 0.9772187903058277, iteration: 318659
loss: 1.0112463235855103,grad_norm: 0.9999991999287752, iteration: 318660
loss: 0.997586727142334,grad_norm: 0.9999991510549672, iteration: 318661
loss: 0.9991639852523804,grad_norm: 0.7400009900539829, iteration: 318662
loss: 1.0303910970687866,grad_norm: 0.817742152611732, iteration: 318663
loss: 0.9296320676803589,grad_norm: 0.744554950142067, iteration: 318664
loss: 0.9726235270500183,grad_norm: 0.8221065189507086, iteration: 318665
loss: 1.0249897241592407,grad_norm: 0.8744685493887155, iteration: 318666
loss: 1.0193318128585815,grad_norm: 0.7631012508087545, iteration: 318667
loss: 1.0379990339279175,grad_norm: 0.9122378548886626, iteration: 318668
loss: 0.9964300394058228,grad_norm: 0.9527779409397202, iteration: 318669
loss: 0.9755036234855652,grad_norm: 0.85631387309669, iteration: 318670
loss: 1.0615226030349731,grad_norm: 0.8035546877153557, iteration: 318671
loss: 1.041610598564148,grad_norm: 0.962970473081799, iteration: 318672
loss: 1.0007072687149048,grad_norm: 0.8013866922065386, iteration: 318673
loss: 0.9659407734870911,grad_norm: 0.7862815028622635, iteration: 318674
loss: 0.9920622706413269,grad_norm: 0.8573849123665713, iteration: 318675
loss: 1.0181132555007935,grad_norm: 0.8458863604436438, iteration: 318676
loss: 0.9891260266304016,grad_norm: 0.72842085011143, iteration: 318677
loss: 0.9798826575279236,grad_norm: 0.9999996624962408, iteration: 318678
loss: 1.0042349100112915,grad_norm: 0.9999992337475287, iteration: 318679
loss: 0.9843298196792603,grad_norm: 0.854236705184417, iteration: 318680
loss: 1.0193294286727905,grad_norm: 0.6896644170436319, iteration: 318681
loss: 1.1177997589111328,grad_norm: 0.9999993600856426, iteration: 318682
loss: 0.9625158309936523,grad_norm: 0.9999989293485515, iteration: 318683
loss: 1.0718574523925781,grad_norm: 0.9999991832913308, iteration: 318684
loss: 0.9767178893089294,grad_norm: 0.797671875831146, iteration: 318685
loss: 1.0337241888046265,grad_norm: 0.9406897490372301, iteration: 318686
loss: 1.0010496377944946,grad_norm: 0.8194439725849249, iteration: 318687
loss: 0.9786627888679504,grad_norm: 0.7197614647849193, iteration: 318688
loss: 0.9744260907173157,grad_norm: 0.8291385920745084, iteration: 318689
loss: 0.9939121007919312,grad_norm: 0.86434960303586, iteration: 318690
loss: 1.0094209909439087,grad_norm: 0.9142170966092297, iteration: 318691
loss: 1.0552350282669067,grad_norm: 0.9999992628316821, iteration: 318692
loss: 0.9931501150131226,grad_norm: 0.8669257189623724, iteration: 318693
loss: 1.015303134918213,grad_norm: 0.7626022048106996, iteration: 318694
loss: 0.9881595373153687,grad_norm: 0.9182131939041811, iteration: 318695
loss: 1.0123405456542969,grad_norm: 0.8074582673188795, iteration: 318696
loss: 1.050574779510498,grad_norm: 0.9999990936911622, iteration: 318697
loss: 1.016904592514038,grad_norm: 0.8748575651325757, iteration: 318698
loss: 0.9972121119499207,grad_norm: 0.925298109054587, iteration: 318699
loss: 1.009648323059082,grad_norm: 0.9200371911184618, iteration: 318700
loss: 1.0092387199401855,grad_norm: 0.8229701334290834, iteration: 318701
loss: 1.0104007720947266,grad_norm: 0.9999991207138107, iteration: 318702
loss: 1.0469341278076172,grad_norm: 0.9999990744898667, iteration: 318703
loss: 1.01479172706604,grad_norm: 0.9999994798477565, iteration: 318704
loss: 1.0938732624053955,grad_norm: 0.8780476752401489, iteration: 318705
loss: 1.0025914907455444,grad_norm: 0.9840695870449321, iteration: 318706
loss: 1.0124289989471436,grad_norm: 0.99999905891211, iteration: 318707
loss: 0.9617639183998108,grad_norm: 0.9790652300110191, iteration: 318708
loss: 1.0513386726379395,grad_norm: 0.8268049659921988, iteration: 318709
loss: 0.9610726833343506,grad_norm: 0.8609859297066018, iteration: 318710
loss: 0.9854288101196289,grad_norm: 0.8673380395352119, iteration: 318711
loss: 1.0022039413452148,grad_norm: 0.7615445499575274, iteration: 318712
loss: 0.982170581817627,grad_norm: 0.863124301545649, iteration: 318713
loss: 0.9931668639183044,grad_norm: 0.791147850950301, iteration: 318714
loss: 1.0760349035263062,grad_norm: 0.9999998532209019, iteration: 318715
loss: 0.9944275617599487,grad_norm: 0.913237121377134, iteration: 318716
loss: 1.000696063041687,grad_norm: 0.8378330631839523, iteration: 318717
loss: 1.0688996315002441,grad_norm: 0.999999392468892, iteration: 318718
loss: 0.9776902198791504,grad_norm: 0.8284376058885432, iteration: 318719
loss: 0.9796773195266724,grad_norm: 0.8578131343429694, iteration: 318720
loss: 0.987546980381012,grad_norm: 0.8543035016871098, iteration: 318721
loss: 1.029382348060608,grad_norm: 0.8635396978526046, iteration: 318722
loss: 1.0070289373397827,grad_norm: 0.927795798784989, iteration: 318723
loss: 0.9978852272033691,grad_norm: 0.9824025338281843, iteration: 318724
loss: 0.9875770807266235,grad_norm: 0.9999991445209252, iteration: 318725
loss: 0.9795510172843933,grad_norm: 0.7547421693535129, iteration: 318726
loss: 1.000207781791687,grad_norm: 0.7363109750090413, iteration: 318727
loss: 1.0266845226287842,grad_norm: 0.8842082543590525, iteration: 318728
loss: 1.0030220746994019,grad_norm: 0.8206091172867196, iteration: 318729
loss: 1.0003706216812134,grad_norm: 0.7674263663578798, iteration: 318730
loss: 0.9738447666168213,grad_norm: 0.8240803490091342, iteration: 318731
loss: 0.9788961410522461,grad_norm: 0.904265577910441, iteration: 318732
loss: 1.0015974044799805,grad_norm: 0.7296174147263799, iteration: 318733
loss: 0.9957351684570312,grad_norm: 0.7785158467390108, iteration: 318734
loss: 0.989173948764801,grad_norm: 0.8653152798721355, iteration: 318735
loss: 1.0167205333709717,grad_norm: 0.7068950975956378, iteration: 318736
loss: 0.9837700724601746,grad_norm: 0.8101674117040014, iteration: 318737
loss: 0.9715608358383179,grad_norm: 0.8663590042841401, iteration: 318738
loss: 0.9922113418579102,grad_norm: 0.9263652394390715, iteration: 318739
loss: 1.051023006439209,grad_norm: 0.9064830833947948, iteration: 318740
loss: 1.0020136833190918,grad_norm: 0.8877475222493818, iteration: 318741
loss: 0.9972594380378723,grad_norm: 0.896929940923015, iteration: 318742
loss: 1.019397497177124,grad_norm: 0.8942316184693941, iteration: 318743
loss: 0.9993999600410461,grad_norm: 0.8297157718023237, iteration: 318744
loss: 0.9930552840232849,grad_norm: 0.8708336606082214, iteration: 318745
loss: 1.0053759813308716,grad_norm: 0.6996768946089613, iteration: 318746
loss: 1.002031683921814,grad_norm: 0.8631590787151582, iteration: 318747
loss: 1.083502173423767,grad_norm: 0.9126904063648233, iteration: 318748
loss: 1.0416911840438843,grad_norm: 0.9999996065103476, iteration: 318749
loss: 1.0129717588424683,grad_norm: 0.7384552356191921, iteration: 318750
loss: 0.9888541102409363,grad_norm: 0.9977648312710817, iteration: 318751
loss: 1.0120937824249268,grad_norm: 0.8660383897566143, iteration: 318752
loss: 0.9639273285865784,grad_norm: 0.9071262820306188, iteration: 318753
loss: 1.020965814590454,grad_norm: 0.9999999342345115, iteration: 318754
loss: 0.9769549369812012,grad_norm: 0.8090143235980408, iteration: 318755
loss: 0.9717074632644653,grad_norm: 0.9289510993485754, iteration: 318756
loss: 1.0274603366851807,grad_norm: 0.9129221873520669, iteration: 318757
loss: 0.9806913733482361,grad_norm: 0.8490762926689557, iteration: 318758
loss: 0.9875829815864563,grad_norm: 0.7258186032911804, iteration: 318759
loss: 1.004164218902588,grad_norm: 0.898408509034244, iteration: 318760
loss: 1.0074362754821777,grad_norm: 0.7640280339748716, iteration: 318761
loss: 0.9751095771789551,grad_norm: 0.7294592348159998, iteration: 318762
loss: 1.017777442932129,grad_norm: 0.7470613340812544, iteration: 318763
loss: 1.0111533403396606,grad_norm: 0.8712535605180151, iteration: 318764
loss: 0.9492294788360596,grad_norm: 0.7301628485894911, iteration: 318765
loss: 0.9958025217056274,grad_norm: 0.8599741692910562, iteration: 318766
loss: 1.0066405534744263,grad_norm: 0.9004710030464859, iteration: 318767
loss: 1.0073198080062866,grad_norm: 0.7206159262809315, iteration: 318768
loss: 1.0026426315307617,grad_norm: 0.7874590844863542, iteration: 318769
loss: 1.0161939859390259,grad_norm: 0.8154683851345285, iteration: 318770
loss: 0.9847592711448669,grad_norm: 0.8205327803704757, iteration: 318771
loss: 0.9817047119140625,grad_norm: 0.8020181161090878, iteration: 318772
loss: 1.0112996101379395,grad_norm: 0.7791570317388312, iteration: 318773
loss: 0.9888434410095215,grad_norm: 0.8143803669647318, iteration: 318774
loss: 1.0103780031204224,grad_norm: 0.6770604455838868, iteration: 318775
loss: 0.9975437521934509,grad_norm: 0.8103391270324118, iteration: 318776
loss: 1.0224894285202026,grad_norm: 0.8003035375626271, iteration: 318777
loss: 0.9610225558280945,grad_norm: 0.8478596809887188, iteration: 318778
loss: 0.9769119024276733,grad_norm: 0.793479080129503, iteration: 318779
loss: 1.0041075944900513,grad_norm: 0.80213649556246, iteration: 318780
loss: 0.9842873215675354,grad_norm: 0.9779689420222332, iteration: 318781
loss: 0.9893367886543274,grad_norm: 0.7487115629879052, iteration: 318782
loss: 0.9959089159965515,grad_norm: 0.8068676802424651, iteration: 318783
loss: 0.9876334071159363,grad_norm: 0.9422593367303037, iteration: 318784
loss: 0.9951896071434021,grad_norm: 0.7520006760407465, iteration: 318785
loss: 1.0386756658554077,grad_norm: 0.919252979918244, iteration: 318786
loss: 1.0158002376556396,grad_norm: 0.8032734677085566, iteration: 318787
loss: 1.0531610250473022,grad_norm: 0.8592430243831064, iteration: 318788
loss: 0.9785041213035583,grad_norm: 0.7576083947831567, iteration: 318789
loss: 0.9909629821777344,grad_norm: 0.7887502893131483, iteration: 318790
loss: 1.0155590772628784,grad_norm: 0.9999991308173575, iteration: 318791
loss: 1.002443790435791,grad_norm: 0.8058620953560076, iteration: 318792
loss: 0.9744601845741272,grad_norm: 0.8608094804948057, iteration: 318793
loss: 1.0104583501815796,grad_norm: 0.785042492243802, iteration: 318794
loss: 1.01200532913208,grad_norm: 0.8132596335293575, iteration: 318795
loss: 0.9908398389816284,grad_norm: 0.9999989752414493, iteration: 318796
loss: 0.9979364275932312,grad_norm: 0.729141718105858, iteration: 318797
loss: 0.9625973701477051,grad_norm: 0.9078299946767483, iteration: 318798
loss: 1.0036684274673462,grad_norm: 0.9999990904918116, iteration: 318799
loss: 0.9521473050117493,grad_norm: 0.852485188628755, iteration: 318800
loss: 1.0012143850326538,grad_norm: 0.8619080735307793, iteration: 318801
loss: 0.994108259677887,grad_norm: 0.7205151818664018, iteration: 318802
loss: 0.9648380875587463,grad_norm: 0.9999992541881614, iteration: 318803
loss: 1.008548378944397,grad_norm: 0.88631725355499, iteration: 318804
loss: 0.9987986087799072,grad_norm: 0.8529059595382852, iteration: 318805
loss: 1.0207034349441528,grad_norm: 0.7512493262188669, iteration: 318806
loss: 1.0034043788909912,grad_norm: 0.8215537132395114, iteration: 318807
loss: 1.0572750568389893,grad_norm: 0.9999991013138737, iteration: 318808
loss: 1.038366675376892,grad_norm: 0.8681337050802829, iteration: 318809
loss: 1.046708345413208,grad_norm: 0.9999992240574052, iteration: 318810
loss: 1.0140432119369507,grad_norm: 0.8085220454203087, iteration: 318811
loss: 0.9753944277763367,grad_norm: 0.7836698939218626, iteration: 318812
loss: 0.9772494435310364,grad_norm: 0.7584845370224639, iteration: 318813
loss: 1.005647897720337,grad_norm: 0.9999994165624277, iteration: 318814
loss: 1.0481265783309937,grad_norm: 0.9999991300487768, iteration: 318815
loss: 0.9825097918510437,grad_norm: 0.7074524430670286, iteration: 318816
loss: 1.035041332244873,grad_norm: 0.8589099699476455, iteration: 318817
loss: 1.0057976245880127,grad_norm: 0.8848045216887757, iteration: 318818
loss: 0.9649555683135986,grad_norm: 0.7337882883044178, iteration: 318819
loss: 0.980766773223877,grad_norm: 0.7708537590994244, iteration: 318820
loss: 0.9811829328536987,grad_norm: 0.9451367281377897, iteration: 318821
loss: 0.9755682349205017,grad_norm: 0.822548319412521, iteration: 318822
loss: 0.9868845343589783,grad_norm: 0.7455610777502965, iteration: 318823
loss: 0.9816277027130127,grad_norm: 0.9130152502412648, iteration: 318824
loss: 0.9567509293556213,grad_norm: 0.7494314965071476, iteration: 318825
loss: 0.9930471777915955,grad_norm: 0.7126137966249831, iteration: 318826
loss: 1.0524049997329712,grad_norm: 0.9999990963164623, iteration: 318827
loss: 1.001798391342163,grad_norm: 0.7634254344806756, iteration: 318828
loss: 0.9771052002906799,grad_norm: 0.8567666352292688, iteration: 318829
loss: 0.9968511462211609,grad_norm: 0.9345966006778257, iteration: 318830
loss: 0.9987048506736755,grad_norm: 0.7552421911248226, iteration: 318831
loss: 1.0273377895355225,grad_norm: 0.8554082786582886, iteration: 318832
loss: 1.002364158630371,grad_norm: 0.7095481810213444, iteration: 318833
loss: 1.0317506790161133,grad_norm: 0.8598170991440452, iteration: 318834
loss: 0.9758760929107666,grad_norm: 0.8744666614828001, iteration: 318835
loss: 0.9927658438682556,grad_norm: 0.9301245969244734, iteration: 318836
loss: 1.0112696886062622,grad_norm: 0.7671483330978868, iteration: 318837
loss: 0.972262442111969,grad_norm: 0.7481125839506704, iteration: 318838
loss: 0.9963809251785278,grad_norm: 0.8627173540601459, iteration: 318839
loss: 1.0290905237197876,grad_norm: 0.8086621961466741, iteration: 318840
loss: 0.9840996265411377,grad_norm: 0.8390643703006707, iteration: 318841
loss: 1.0003583431243896,grad_norm: 0.8339702300700083, iteration: 318842
loss: 0.965662956237793,grad_norm: 0.9999991671365763, iteration: 318843
loss: 0.9954564571380615,grad_norm: 0.8086854710704544, iteration: 318844
loss: 1.0131423473358154,grad_norm: 0.8580000612826556, iteration: 318845
loss: 0.9984961748123169,grad_norm: 0.7827738065110778, iteration: 318846
loss: 0.9936695098876953,grad_norm: 0.8854248500248357, iteration: 318847
loss: 0.9791775345802307,grad_norm: 0.800543670969076, iteration: 318848
loss: 1.0250756740570068,grad_norm: 0.7296195685298072, iteration: 318849
loss: 1.0326696634292603,grad_norm: 0.7273060304166205, iteration: 318850
loss: 1.0321362018585205,grad_norm: 0.9999990510105924, iteration: 318851
loss: 0.9400038123130798,grad_norm: 0.8142025819962866, iteration: 318852
loss: 1.0547910928726196,grad_norm: 0.8423341828156458, iteration: 318853
loss: 0.9830885529518127,grad_norm: 0.803237660339615, iteration: 318854
loss: 0.9667598605155945,grad_norm: 0.7390834752276715, iteration: 318855
loss: 1.0513290166854858,grad_norm: 0.857216990310633, iteration: 318856
loss: 0.9975036978721619,grad_norm: 0.9999995972304353, iteration: 318857
loss: 1.014891505241394,grad_norm: 0.9726521171798455, iteration: 318858
loss: 1.0254993438720703,grad_norm: 0.9999991733429198, iteration: 318859
loss: 1.0197292566299438,grad_norm: 0.7216439449209152, iteration: 318860
loss: 1.0061200857162476,grad_norm: 0.9739406515405805, iteration: 318861
loss: 0.9897345900535583,grad_norm: 0.8814902431015124, iteration: 318862
loss: 1.0207444429397583,grad_norm: 0.9668986523846292, iteration: 318863
loss: 1.0096834897994995,grad_norm: 0.8375546483676543, iteration: 318864
loss: 0.9765833020210266,grad_norm: 0.8889417695434153, iteration: 318865
loss: 0.985727071762085,grad_norm: 0.7168259806224141, iteration: 318866
loss: 1.0152822732925415,grad_norm: 0.9999990510014264, iteration: 318867
loss: 0.9962860941886902,grad_norm: 0.8411385076822009, iteration: 318868
loss: 1.01768958568573,grad_norm: 0.8780196570167123, iteration: 318869
loss: 0.9815571308135986,grad_norm: 0.707605031908388, iteration: 318870
loss: 1.0454143285751343,grad_norm: 0.8271900084224393, iteration: 318871
loss: 0.9669308066368103,grad_norm: 0.9237537739028872, iteration: 318872
loss: 1.0116565227508545,grad_norm: 0.6724714106811508, iteration: 318873
loss: 0.989302933216095,grad_norm: 0.7950440770466253, iteration: 318874
loss: 1.0062625408172607,grad_norm: 0.966143790289995, iteration: 318875
loss: 1.0045223236083984,grad_norm: 0.9999997708928297, iteration: 318876
loss: 0.9874499440193176,grad_norm: 0.9939748106041343, iteration: 318877
loss: 0.9914216995239258,grad_norm: 0.7981856493973559, iteration: 318878
loss: 1.1065305471420288,grad_norm: 0.9999992096190178, iteration: 318879
loss: 0.951298177242279,grad_norm: 0.7736578801236375, iteration: 318880
loss: 1.0042545795440674,grad_norm: 0.7784318416626477, iteration: 318881
loss: 1.0077918767929077,grad_norm: 0.7768724452393986, iteration: 318882
loss: 0.9822434186935425,grad_norm: 0.7944660829649804, iteration: 318883
loss: 1.0212308168411255,grad_norm: 0.8467774148483571, iteration: 318884
loss: 0.9824127554893494,grad_norm: 0.837460551464658, iteration: 318885
loss: 1.0887428522109985,grad_norm: 0.999999283104256, iteration: 318886
loss: 1.0243104696273804,grad_norm: 0.830988300371156, iteration: 318887
loss: 0.9823151230812073,grad_norm: 0.7408185219929185, iteration: 318888
loss: 1.0044111013412476,grad_norm: 0.9618843968829177, iteration: 318889
loss: 0.9852355122566223,grad_norm: 0.8375313675935949, iteration: 318890
loss: 1.039225697517395,grad_norm: 0.9999990506404528, iteration: 318891
loss: 0.9830693602561951,grad_norm: 0.7367123092070065, iteration: 318892
loss: 0.9877833724021912,grad_norm: 0.7385476866824195, iteration: 318893
loss: 0.9989543557167053,grad_norm: 0.9999992068449883, iteration: 318894
loss: 0.9699235558509827,grad_norm: 0.9337842241046356, iteration: 318895
loss: 1.0107314586639404,grad_norm: 0.8624786240793738, iteration: 318896
loss: 1.0027765035629272,grad_norm: 0.7265104492428363, iteration: 318897
loss: 1.0710971355438232,grad_norm: 0.999999872688115, iteration: 318898
loss: 1.0042260885238647,grad_norm: 0.8198157672618505, iteration: 318899
loss: 1.015997052192688,grad_norm: 0.7844576604501988, iteration: 318900
loss: 0.9973297119140625,grad_norm: 0.9731752953291191, iteration: 318901
loss: 0.9829539656639099,grad_norm: 0.999999140972355, iteration: 318902
loss: 0.990730345249176,grad_norm: 0.8516848259977697, iteration: 318903
loss: 0.9988282322883606,grad_norm: 0.8683072076965173, iteration: 318904
loss: 1.0235170125961304,grad_norm: 0.744650175495548, iteration: 318905
loss: 1.0053291320800781,grad_norm: 0.9743922567847018, iteration: 318906
loss: 1.0177944898605347,grad_norm: 0.8129592235604554, iteration: 318907
loss: 0.9981228709220886,grad_norm: 0.919781070568282, iteration: 318908
loss: 1.062588095664978,grad_norm: 0.914503168928744, iteration: 318909
loss: 0.9687818884849548,grad_norm: 0.9884620298272323, iteration: 318910
loss: 0.996483564376831,grad_norm: 0.7637921909099896, iteration: 318911
loss: 0.9836665987968445,grad_norm: 0.7738378251191903, iteration: 318912
loss: 1.0133308172225952,grad_norm: 0.904930706566645, iteration: 318913
loss: 0.996878445148468,grad_norm: 0.7132677503582261, iteration: 318914
loss: 0.9935269951820374,grad_norm: 0.9999142079158542, iteration: 318915
loss: 1.0039271116256714,grad_norm: 0.9999992122835655, iteration: 318916
loss: 1.01055109500885,grad_norm: 0.9714796227073201, iteration: 318917
loss: 1.037437915802002,grad_norm: 0.8599083856152414, iteration: 318918
loss: 1.0455394983291626,grad_norm: 0.9999993833624636, iteration: 318919
loss: 1.0193074941635132,grad_norm: 0.8873354295937643, iteration: 318920
loss: 1.0544416904449463,grad_norm: 0.9999991527196538, iteration: 318921
loss: 1.1364303827285767,grad_norm: 0.9549208093238385, iteration: 318922
loss: 1.0242654085159302,grad_norm: 0.8856255807989184, iteration: 318923
loss: 1.0069730281829834,grad_norm: 0.908709385929217, iteration: 318924
loss: 0.9973277449607849,grad_norm: 0.8138723394507875, iteration: 318925
loss: 0.9790882468223572,grad_norm: 0.8775170472920839, iteration: 318926
loss: 1.0619994401931763,grad_norm: 0.8519218770964868, iteration: 318927
loss: 1.0482110977172852,grad_norm: 0.9999993209779833, iteration: 318928
loss: 1.0767170190811157,grad_norm: 0.8225664647714551, iteration: 318929
loss: 0.9919276833534241,grad_norm: 0.7703235055689133, iteration: 318930
loss: 1.006037950515747,grad_norm: 0.9451547114264108, iteration: 318931
loss: 0.9736179709434509,grad_norm: 0.8203171263136564, iteration: 318932
loss: 0.9849944710731506,grad_norm: 0.8000573976255703, iteration: 318933
loss: 0.9698498249053955,grad_norm: 0.8651864757789337, iteration: 318934
loss: 0.9604818224906921,grad_norm: 0.7703271156731478, iteration: 318935
loss: 1.0156261920928955,grad_norm: 0.9026648152519896, iteration: 318936
loss: 1.006241798400879,grad_norm: 0.7256617293248679, iteration: 318937
loss: 1.090718150138855,grad_norm: 0.9999995495131725, iteration: 318938
loss: 0.9820144772529602,grad_norm: 0.8225356049839465, iteration: 318939
loss: 1.0176806449890137,grad_norm: 0.9999990426764322, iteration: 318940
loss: 1.2733529806137085,grad_norm: 0.9999999073928092, iteration: 318941
loss: 1.0189138650894165,grad_norm: 0.9387561843746394, iteration: 318942
loss: 0.9968758225440979,grad_norm: 0.9074023748495873, iteration: 318943
loss: 1.0580581426620483,grad_norm: 0.9999999682127826, iteration: 318944
loss: 0.9720059633255005,grad_norm: 0.7270215425222701, iteration: 318945
loss: 1.0228360891342163,grad_norm: 0.9999994237141258, iteration: 318946
loss: 1.0002309083938599,grad_norm: 0.999999641649393, iteration: 318947
loss: 1.013253092765808,grad_norm: 0.9380706568562646, iteration: 318948
loss: 0.9771731495857239,grad_norm: 0.7820221614712773, iteration: 318949
loss: 0.9617342352867126,grad_norm: 0.962425223343792, iteration: 318950
loss: 1.0217045545578003,grad_norm: 0.9999993835011569, iteration: 318951
loss: 1.0071769952774048,grad_norm: 0.9200177693890826, iteration: 318952
loss: 1.0151258707046509,grad_norm: 0.8466474343889843, iteration: 318953
loss: 0.9809922575950623,grad_norm: 0.7779077006773681, iteration: 318954
loss: 1.029078483581543,grad_norm: 0.9656276026322271, iteration: 318955
loss: 1.0071032047271729,grad_norm: 0.8312852324116984, iteration: 318956
loss: 0.9843079447746277,grad_norm: 0.6868619173869229, iteration: 318957
loss: 1.0702720880508423,grad_norm: 0.6660533035643551, iteration: 318958
loss: 0.9703085422515869,grad_norm: 0.7798971269111824, iteration: 318959
loss: 1.0139880180358887,grad_norm: 0.8482947343756347, iteration: 318960
loss: 0.9717113375663757,grad_norm: 0.808953200177435, iteration: 318961
loss: 1.0722925662994385,grad_norm: 0.999999176730648, iteration: 318962
loss: 1.006574273109436,grad_norm: 0.9999990555176803, iteration: 318963
loss: 1.001880407333374,grad_norm: 0.9428871363946136, iteration: 318964
loss: 1.0039910078048706,grad_norm: 0.8165711271781081, iteration: 318965
loss: 1.0057058334350586,grad_norm: 0.7654034001466584, iteration: 318966
loss: 1.0494309663772583,grad_norm: 0.9224878521873281, iteration: 318967
loss: 1.0094033479690552,grad_norm: 0.9829322907302558, iteration: 318968
loss: 1.2190783023834229,grad_norm: 0.9999996735795111, iteration: 318969
loss: 1.0168012380599976,grad_norm: 0.8807424126078384, iteration: 318970
loss: 1.0056428909301758,grad_norm: 0.7099841223025002, iteration: 318971
loss: 1.0494587421417236,grad_norm: 0.8006023342518183, iteration: 318972
loss: 1.0146348476409912,grad_norm: 0.9999989611897562, iteration: 318973
loss: 1.0155378580093384,grad_norm: 0.898645835647175, iteration: 318974
loss: 0.9706586599349976,grad_norm: 0.8330206348638955, iteration: 318975
loss: 0.9866179823875427,grad_norm: 0.9999990780129022, iteration: 318976
loss: 0.9881842732429504,grad_norm: 0.9999995765051265, iteration: 318977
loss: 1.0525853633880615,grad_norm: 0.9999997121729687, iteration: 318978
loss: 0.9899097681045532,grad_norm: 0.8911200575635987, iteration: 318979
loss: 0.9965329170227051,grad_norm: 0.9612097123523816, iteration: 318980
loss: 1.033866047859192,grad_norm: 0.9448128789376395, iteration: 318981
loss: 1.0113005638122559,grad_norm: 0.9999990106799815, iteration: 318982
loss: 1.062853217124939,grad_norm: 0.7409693146942483, iteration: 318983
loss: 1.0101878643035889,grad_norm: 0.7574470421264717, iteration: 318984
loss: 1.0090796947479248,grad_norm: 0.711014947282053, iteration: 318985
loss: 1.0050913095474243,grad_norm: 0.9047682738779719, iteration: 318986
loss: 1.0151060819625854,grad_norm: 0.7364547143728838, iteration: 318987
loss: 1.0094935894012451,grad_norm: 0.957917340438042, iteration: 318988
loss: 1.0271984338760376,grad_norm: 0.9999992289464809, iteration: 318989
loss: 1.0009181499481201,grad_norm: 0.8391470839954458, iteration: 318990
loss: 1.0597764253616333,grad_norm: 0.9999993243040721, iteration: 318991
loss: 1.0187159776687622,grad_norm: 0.9165718515723393, iteration: 318992
loss: 1.033490538597107,grad_norm: 0.9999994096939737, iteration: 318993
loss: 0.9993427395820618,grad_norm: 0.8660202082713871, iteration: 318994
loss: 1.0057594776153564,grad_norm: 0.8945913897305896, iteration: 318995
loss: 0.9761874675750732,grad_norm: 0.8937168788162247, iteration: 318996
loss: 1.0639605522155762,grad_norm: 0.999999400088679, iteration: 318997
loss: 1.0039236545562744,grad_norm: 0.7550325767877766, iteration: 318998
loss: 1.0101474523544312,grad_norm: 0.7459449974910729, iteration: 318999
loss: 1.0554379224777222,grad_norm: 0.8613842021945345, iteration: 319000
loss: 1.0509339570999146,grad_norm: 0.8691007314526148, iteration: 319001
loss: 1.0469481945037842,grad_norm: 0.8686717736959692, iteration: 319002
loss: 1.0524121522903442,grad_norm: 0.8945550702408313, iteration: 319003
loss: 1.0520306825637817,grad_norm: 0.8183897918083272, iteration: 319004
loss: 1.0844295024871826,grad_norm: 0.9999994249049755, iteration: 319005
loss: 1.1431533098220825,grad_norm: 0.9999992490164444, iteration: 319006
loss: 1.0208603143692017,grad_norm: 0.7829840582621285, iteration: 319007
loss: 0.9688662886619568,grad_norm: 0.772799511582667, iteration: 319008
loss: 0.995185136795044,grad_norm: 0.9999992826887243, iteration: 319009
loss: 0.975664496421814,grad_norm: 0.7165222777823087, iteration: 319010
loss: 1.0063780546188354,grad_norm: 0.8376782440456916, iteration: 319011
loss: 1.0224418640136719,grad_norm: 0.8429392778787446, iteration: 319012
loss: 0.988684892654419,grad_norm: 0.8327302200889158, iteration: 319013
loss: 0.9801906943321228,grad_norm: 0.8008915529323282, iteration: 319014
loss: 1.0763248205184937,grad_norm: 0.9691767292691608, iteration: 319015
loss: 0.9983578324317932,grad_norm: 0.7751878580751183, iteration: 319016
loss: 1.0043586492538452,grad_norm: 0.9274585099612924, iteration: 319017
loss: 1.0111079216003418,grad_norm: 0.8553403789331419, iteration: 319018
loss: 1.0552070140838623,grad_norm: 0.9999992115427178, iteration: 319019
loss: 1.0132228136062622,grad_norm: 0.8394864934966256, iteration: 319020
loss: 1.0844954252243042,grad_norm: 1.0000000654021797, iteration: 319021
loss: 1.0208905935287476,grad_norm: 0.9999990487484411, iteration: 319022
loss: 1.1043683290481567,grad_norm: 0.9999999455461908, iteration: 319023
loss: 0.9737389087677002,grad_norm: 0.9999990527403655, iteration: 319024
loss: 0.9967806935310364,grad_norm: 0.8729232044388141, iteration: 319025
loss: 1.007875919342041,grad_norm: 0.8208477813045415, iteration: 319026
loss: 1.0139154195785522,grad_norm: 0.7914000098147735, iteration: 319027
loss: 0.9745159149169922,grad_norm: 0.8918707707964036, iteration: 319028
loss: 1.0550223588943481,grad_norm: 0.9999998481240858, iteration: 319029
loss: 0.9875781536102295,grad_norm: 0.9999998339929912, iteration: 319030
loss: 1.0296194553375244,grad_norm: 0.9999992886746332, iteration: 319031
loss: 1.0011518001556396,grad_norm: 0.8956242764906174, iteration: 319032
loss: 0.9827173948287964,grad_norm: 0.9999990741840835, iteration: 319033
loss: 0.9914206862449646,grad_norm: 0.9999992359941803, iteration: 319034
loss: 1.018070936203003,grad_norm: 0.9579621057032716, iteration: 319035
loss: 0.9836824536323547,grad_norm: 0.7254829132350694, iteration: 319036
loss: 0.9642890095710754,grad_norm: 0.836664437335782, iteration: 319037
loss: 1.0308666229248047,grad_norm: 0.8966265726332223, iteration: 319038
loss: 1.0143728256225586,grad_norm: 0.7983103975208008, iteration: 319039
loss: 1.0274468660354614,grad_norm: 0.773664419466922, iteration: 319040
loss: 1.0228649377822876,grad_norm: 0.8451339962424549, iteration: 319041
loss: 1.021927833557129,grad_norm: 0.8115484586850109, iteration: 319042
loss: 0.9987014532089233,grad_norm: 0.9999990400556853, iteration: 319043
loss: 1.0291608572006226,grad_norm: 0.7954538691560735, iteration: 319044
loss: 1.007398009300232,grad_norm: 0.7010011995071205, iteration: 319045
loss: 1.0091830492019653,grad_norm: 0.7473654352999684, iteration: 319046
loss: 0.9942896962165833,grad_norm: 0.7766331660962089, iteration: 319047
loss: 1.0226483345031738,grad_norm: 0.9516596942089788, iteration: 319048
loss: 1.0167123079299927,grad_norm: 0.9611561850427887, iteration: 319049
loss: 1.0834237337112427,grad_norm: 0.9999994118715098, iteration: 319050
loss: 1.021985411643982,grad_norm: 0.8878807431463328, iteration: 319051
loss: 0.9938250780105591,grad_norm: 0.9980481405830828, iteration: 319052
loss: 0.9825019836425781,grad_norm: 0.8738293964131125, iteration: 319053
loss: 1.0218156576156616,grad_norm: 0.8028682301995541, iteration: 319054
loss: 0.9878275394439697,grad_norm: 0.9225963460527575, iteration: 319055
loss: 0.9690051674842834,grad_norm: 0.8366904121859458, iteration: 319056
loss: 0.9490455389022827,grad_norm: 0.9999990590337872, iteration: 319057
loss: 1.01028311252594,grad_norm: 0.7521726154871908, iteration: 319058
loss: 0.9929513335227966,grad_norm: 0.8229684161907531, iteration: 319059
loss: 1.1294301748275757,grad_norm: 0.9999993164875723, iteration: 319060
loss: 1.0380210876464844,grad_norm: 0.9448682111782256, iteration: 319061
loss: 0.9679194688796997,grad_norm: 0.9875685841471946, iteration: 319062
loss: 1.0188257694244385,grad_norm: 0.6865704341269548, iteration: 319063
loss: 1.0070688724517822,grad_norm: 0.9098927758012954, iteration: 319064
loss: 1.03811776638031,grad_norm: 0.9999993631081615, iteration: 319065
loss: 0.9910153150558472,grad_norm: 0.8422519897378257, iteration: 319066
loss: 1.0636361837387085,grad_norm: 0.9999992292470046, iteration: 319067
loss: 1.0117590427398682,grad_norm: 0.7859810573663922, iteration: 319068
loss: 0.9879084825515747,grad_norm: 0.999999896761403, iteration: 319069
loss: 0.9916067123413086,grad_norm: 0.8306757641805509, iteration: 319070
loss: 1.0038762092590332,grad_norm: 0.7545740544077257, iteration: 319071
loss: 1.0056594610214233,grad_norm: 0.6986861193781684, iteration: 319072
loss: 1.015457034111023,grad_norm: 0.9712886621828316, iteration: 319073
loss: 1.0349751710891724,grad_norm: 0.7469808714292586, iteration: 319074
loss: 0.9986204504966736,grad_norm: 0.89307138072437, iteration: 319075
loss: 1.0222172737121582,grad_norm: 0.848110138047255, iteration: 319076
loss: 1.0480570793151855,grad_norm: 0.8491090591052348, iteration: 319077
loss: 1.0054373741149902,grad_norm: 0.7031160187559499, iteration: 319078
loss: 1.0202438831329346,grad_norm: 0.8674897661250813, iteration: 319079
loss: 0.9895674586296082,grad_norm: 0.9999999295371188, iteration: 319080
loss: 1.0419448614120483,grad_norm: 0.8669555046224778, iteration: 319081
loss: 1.0247082710266113,grad_norm: 0.7233442472429251, iteration: 319082
loss: 0.976213812828064,grad_norm: 0.999999153484721, iteration: 319083
loss: 1.0140695571899414,grad_norm: 0.9999997451759594, iteration: 319084
loss: 1.003774881362915,grad_norm: 0.748664561773685, iteration: 319085
loss: 1.0022282600402832,grad_norm: 0.99999902928453, iteration: 319086
loss: 0.9932221174240112,grad_norm: 0.8571102408931623, iteration: 319087
loss: 0.9727112054824829,grad_norm: 0.8799613204560358, iteration: 319088
loss: 0.9939260482788086,grad_norm: 0.999999589178485, iteration: 319089
loss: 0.9879061579704285,grad_norm: 0.9029897656398255, iteration: 319090
loss: 0.9986154437065125,grad_norm: 0.8062432799824054, iteration: 319091
loss: 0.9704661965370178,grad_norm: 0.9314432052539098, iteration: 319092
loss: 1.0108457803726196,grad_norm: 0.9318371235454121, iteration: 319093
loss: 1.0119746923446655,grad_norm: 0.6846442889715539, iteration: 319094
loss: 1.0301721096038818,grad_norm: 0.7789821898698436, iteration: 319095
loss: 1.0003740787506104,grad_norm: 0.6667137541640177, iteration: 319096
loss: 1.010145902633667,grad_norm: 0.9770313590442946, iteration: 319097
loss: 0.9960623979568481,grad_norm: 0.8082151584516104, iteration: 319098
loss: 0.9989595413208008,grad_norm: 0.7067017109291599, iteration: 319099
loss: 1.0417016744613647,grad_norm: 0.9999995224099193, iteration: 319100
loss: 0.9894546866416931,grad_norm: 0.9770757104149127, iteration: 319101
loss: 1.0210473537445068,grad_norm: 0.7675497437375554, iteration: 319102
loss: 1.018811583518982,grad_norm: 0.7337748742446296, iteration: 319103
loss: 1.0041269063949585,grad_norm: 0.9092060619148611, iteration: 319104
loss: 0.99956876039505,grad_norm: 0.7982052948060157, iteration: 319105
loss: 1.0706255435943604,grad_norm: 0.9999990390353258, iteration: 319106
loss: 1.0630937814712524,grad_norm: 0.9542099808474386, iteration: 319107
loss: 1.1667920351028442,grad_norm: 0.9999996858317276, iteration: 319108
loss: 0.9658603072166443,grad_norm: 0.8682022892354727, iteration: 319109
loss: 1.0173348188400269,grad_norm: 0.8397070711757028, iteration: 319110
loss: 0.9729976654052734,grad_norm: 0.8226532926144484, iteration: 319111
loss: 1.0072325468063354,grad_norm: 0.9017720866200669, iteration: 319112
loss: 0.9841328263282776,grad_norm: 0.8085782837472251, iteration: 319113
loss: 0.9974419474601746,grad_norm: 1.0000000278970707, iteration: 319114
loss: 1.003667950630188,grad_norm: 0.6135331586714777, iteration: 319115
loss: 1.0354655981063843,grad_norm: 0.9999997243511227, iteration: 319116
loss: 0.997782289981842,grad_norm: 0.7524178461212943, iteration: 319117
loss: 0.9930647015571594,grad_norm: 0.9999991372505217, iteration: 319118
loss: 0.9840283393859863,grad_norm: 0.7317985119313519, iteration: 319119
loss: 1.0163120031356812,grad_norm: 0.9999997023479141, iteration: 319120
loss: 0.9877090454101562,grad_norm: 0.882971699478076, iteration: 319121
loss: 0.963103711605072,grad_norm: 0.887601268542976, iteration: 319122
loss: 0.9924618601799011,grad_norm: 0.7885194972781688, iteration: 319123
loss: 0.9782283902168274,grad_norm: 0.8522828961282225, iteration: 319124
loss: 1.0404720306396484,grad_norm: 0.9999990681285506, iteration: 319125
loss: 1.0297300815582275,grad_norm: 0.6378686569398498, iteration: 319126
loss: 1.0710495710372925,grad_norm: 0.999999407901762, iteration: 319127
loss: 0.9857038855552673,grad_norm: 0.8009282278940141, iteration: 319128
loss: 1.0069003105163574,grad_norm: 0.7639794038183879, iteration: 319129
loss: 1.0104106664657593,grad_norm: 0.8061567507024052, iteration: 319130
loss: 1.0320014953613281,grad_norm: 0.9200592433634073, iteration: 319131
loss: 0.9794244170188904,grad_norm: 0.7461004924094343, iteration: 319132
loss: 1.018735408782959,grad_norm: 0.8466267195676883, iteration: 319133
loss: 0.9961417317390442,grad_norm: 0.8575672825940373, iteration: 319134
loss: 1.0365465879440308,grad_norm: 0.7310525114959238, iteration: 319135
loss: 1.0089471340179443,grad_norm: 0.9999990762424407, iteration: 319136
loss: 0.986282229423523,grad_norm: 0.8135123935809465, iteration: 319137
loss: 0.9877196550369263,grad_norm: 0.9603641297724753, iteration: 319138
loss: 0.9765385389328003,grad_norm: 0.9225372284427721, iteration: 319139
loss: 0.9949244856834412,grad_norm: 0.8582640767838342, iteration: 319140
loss: 1.004036545753479,grad_norm: 0.8151844405794411, iteration: 319141
loss: 0.9999930262565613,grad_norm: 0.7518770546058671, iteration: 319142
loss: 0.9668727517127991,grad_norm: 0.8829245485669248, iteration: 319143
loss: 1.1421400308609009,grad_norm: 0.9996118566067576, iteration: 319144
loss: 1.0176231861114502,grad_norm: 0.9999989795372147, iteration: 319145
loss: 1.035979151725769,grad_norm: 0.999999423951986, iteration: 319146
loss: 0.9805141091346741,grad_norm: 0.781408988106413, iteration: 319147
loss: 1.057347059249878,grad_norm: 0.7818505093026199, iteration: 319148
loss: 1.0132927894592285,grad_norm: 0.8569207037626054, iteration: 319149
loss: 1.001557469367981,grad_norm: 0.9056984899572764, iteration: 319150
loss: 1.0272963047027588,grad_norm: 0.9467608844461926, iteration: 319151
loss: 0.9985423684120178,grad_norm: 0.9060840893330286, iteration: 319152
loss: 0.9852328300476074,grad_norm: 0.7724993297414829, iteration: 319153
loss: 0.9626103043556213,grad_norm: 0.8244791795496637, iteration: 319154
loss: 1.0675561428070068,grad_norm: 0.9999991081309858, iteration: 319155
loss: 1.0156632661819458,grad_norm: 0.8776971581808127, iteration: 319156
loss: 1.0256414413452148,grad_norm: 0.9999993327406197, iteration: 319157
loss: 1.0368036031723022,grad_norm: 0.9999990658858491, iteration: 319158
loss: 0.9919173717498779,grad_norm: 0.874306856710432, iteration: 319159
loss: 1.0248172283172607,grad_norm: 0.9999995881349696, iteration: 319160
loss: 0.9823383688926697,grad_norm: 0.9232499496706307, iteration: 319161
loss: 1.0217208862304688,grad_norm: 0.9999998152982417, iteration: 319162
loss: 0.9996364116668701,grad_norm: 0.7989603946743086, iteration: 319163
loss: 1.0259462594985962,grad_norm: 0.8603497853816879, iteration: 319164
loss: 1.0168226957321167,grad_norm: 0.7675427669575824, iteration: 319165
loss: 0.9865219593048096,grad_norm: 0.9291438583173262, iteration: 319166
loss: 0.9784683585166931,grad_norm: 0.7699582590134997, iteration: 319167
loss: 0.9527587890625,grad_norm: 0.8385219058405146, iteration: 319168
loss: 0.9755039215087891,grad_norm: 0.855953215098856, iteration: 319169
loss: 1.0113338232040405,grad_norm: 0.9999994990422658, iteration: 319170
loss: 1.0875725746154785,grad_norm: 1.000000073319506, iteration: 319171
loss: 0.9998504519462585,grad_norm: 0.7103032837843573, iteration: 319172
loss: 1.012257695198059,grad_norm: 0.8722259918768461, iteration: 319173
loss: 0.9825931191444397,grad_norm: 0.8298826118492116, iteration: 319174
loss: 0.9830049872398376,grad_norm: 0.7702013904676114, iteration: 319175
loss: 1.0192921161651611,grad_norm: 0.9999990815076798, iteration: 319176
loss: 0.9635740518569946,grad_norm: 0.8770271719702404, iteration: 319177
loss: 1.0186858177185059,grad_norm: 0.8576130700931368, iteration: 319178
loss: 0.9880053997039795,grad_norm: 0.7157483346513945, iteration: 319179
loss: 1.0446313619613647,grad_norm: 0.9554998996645, iteration: 319180
loss: 1.0087172985076904,grad_norm: 0.9999995371370044, iteration: 319181
loss: 0.9991621375083923,grad_norm: 0.7126681176170022, iteration: 319182
loss: 1.0354920625686646,grad_norm: 0.9999995470826712, iteration: 319183
loss: 0.9829057455062866,grad_norm: 0.8122404101914972, iteration: 319184
loss: 1.0065046548843384,grad_norm: 0.9830813536523905, iteration: 319185
loss: 0.966159999370575,grad_norm: 0.8594416999172998, iteration: 319186
loss: 0.9884464740753174,grad_norm: 0.9999991937639876, iteration: 319187
loss: 1.0372772216796875,grad_norm: 0.9999996316290098, iteration: 319188
loss: 1.0207804441452026,grad_norm: 0.8270517839901168, iteration: 319189
loss: 1.0082656145095825,grad_norm: 0.7204286814199412, iteration: 319190
loss: 0.9811434149742126,grad_norm: 0.8965583095638303, iteration: 319191
loss: 1.0019763708114624,grad_norm: 0.6719586277174764, iteration: 319192
loss: 1.0086002349853516,grad_norm: 0.9551947029595164, iteration: 319193
loss: 0.9896891713142395,grad_norm: 0.847956236504401, iteration: 319194
loss: 0.9929742813110352,grad_norm: 0.99999912357766, iteration: 319195
loss: 1.0038514137268066,grad_norm: 0.9656979891184427, iteration: 319196
loss: 1.0192480087280273,grad_norm: 0.744383455658552, iteration: 319197
loss: 1.005827784538269,grad_norm: 0.7604833295649122, iteration: 319198
loss: 0.9658679962158203,grad_norm: 0.9323288062242434, iteration: 319199
loss: 1.0019831657409668,grad_norm: 0.8118124559578211, iteration: 319200
loss: 1.0928120613098145,grad_norm: 0.9999998990792249, iteration: 319201
loss: 1.0190460681915283,grad_norm: 0.9090820358689464, iteration: 319202
loss: 1.0366973876953125,grad_norm: 0.8421328650682263, iteration: 319203
loss: 0.9328562021255493,grad_norm: 0.9921463773570136, iteration: 319204
loss: 0.9657328128814697,grad_norm: 0.7894811272560137, iteration: 319205
loss: 1.0437877178192139,grad_norm: 0.999999609211659, iteration: 319206
loss: 1.0103620290756226,grad_norm: 0.9374803382726383, iteration: 319207
loss: 0.9486709833145142,grad_norm: 0.9999991060544031, iteration: 319208
loss: 1.0094801187515259,grad_norm: 0.8166017614555119, iteration: 319209
loss: 1.0073598623275757,grad_norm: 0.8579389802137248, iteration: 319210
loss: 1.0061752796173096,grad_norm: 0.9004787175469813, iteration: 319211
loss: 1.0016210079193115,grad_norm: 0.9999998584962286, iteration: 319212
loss: 0.9904283881187439,grad_norm: 0.7365511751038083, iteration: 319213
loss: 1.020538330078125,grad_norm: 0.9142909021188594, iteration: 319214
loss: 1.0003544092178345,grad_norm: 0.9999991283545182, iteration: 319215
loss: 1.11130690574646,grad_norm: 0.9999991913604965, iteration: 319216
loss: 0.9900067448616028,grad_norm: 0.9999992282180976, iteration: 319217
loss: 1.0110429525375366,grad_norm: 0.9234454608550651, iteration: 319218
loss: 0.9886602759361267,grad_norm: 0.770685751496297, iteration: 319219
loss: 0.9774616360664368,grad_norm: 0.9999996165122932, iteration: 319220
loss: 0.9885075092315674,grad_norm: 0.924807094265391, iteration: 319221
loss: 1.0198853015899658,grad_norm: 0.9906981312799571, iteration: 319222
loss: 0.9739702939987183,grad_norm: 0.9534465686522953, iteration: 319223
loss: 0.9952362179756165,grad_norm: 0.9999994326848469, iteration: 319224
loss: 0.9928104877471924,grad_norm: 0.8653071844073257, iteration: 319225
loss: 1.0278446674346924,grad_norm: 0.7513145132223251, iteration: 319226
loss: 0.9693994522094727,grad_norm: 0.9310318726616271, iteration: 319227
loss: 1.0286016464233398,grad_norm: 0.8324322115619022, iteration: 319228
loss: 1.0051050186157227,grad_norm: 0.8429248810639582, iteration: 319229
loss: 0.9599169492721558,grad_norm: 0.9567810289706946, iteration: 319230
loss: 1.0031378269195557,grad_norm: 0.8038692490535747, iteration: 319231
loss: 0.9711151123046875,grad_norm: 0.849637301029555, iteration: 319232
loss: 1.0559148788452148,grad_norm: 0.9999997828950554, iteration: 319233
loss: 1.0647869110107422,grad_norm: 0.9999993371684734, iteration: 319234
loss: 1.0201911926269531,grad_norm: 0.87976431151639, iteration: 319235
loss: 1.0251142978668213,grad_norm: 0.8895651372226696, iteration: 319236
loss: 0.9888955950737,grad_norm: 0.8046011676427939, iteration: 319237
loss: 0.9996022582054138,grad_norm: 0.6974532849489169, iteration: 319238
loss: 1.0341401100158691,grad_norm: 0.7831614610114078, iteration: 319239
loss: 0.9903059601783752,grad_norm: 0.763973333454646, iteration: 319240
loss: 1.0047167539596558,grad_norm: 0.8071882313116805, iteration: 319241
loss: 0.997562825679779,grad_norm: 0.872994044271026, iteration: 319242
loss: 0.9628110527992249,grad_norm: 0.8297589076347771, iteration: 319243
loss: 0.9689247012138367,grad_norm: 0.9258753272169109, iteration: 319244
loss: 1.0334442853927612,grad_norm: 0.9644338325392244, iteration: 319245
loss: 1.0645177364349365,grad_norm: 0.9999995158208173, iteration: 319246
loss: 0.966178297996521,grad_norm: 0.7822233602291284, iteration: 319247
loss: 1.0652134418487549,grad_norm: 0.8054824311547082, iteration: 319248
loss: 1.016690969467163,grad_norm: 0.999999149309356, iteration: 319249
loss: 0.9857179522514343,grad_norm: 0.7568692958805417, iteration: 319250
loss: 0.9936483502388,grad_norm: 0.8012187501207902, iteration: 319251
loss: 0.9787458777427673,grad_norm: 0.8521572371882409, iteration: 319252
loss: 1.0011236667633057,grad_norm: 0.7578656071436949, iteration: 319253
loss: 1.012404441833496,grad_norm: 0.9871973192414307, iteration: 319254
loss: 1.0028150081634521,grad_norm: 0.9360702780744905, iteration: 319255
loss: 0.9798790216445923,grad_norm: 0.900244693321035, iteration: 319256
loss: 1.0355924367904663,grad_norm: 0.9999989955365424, iteration: 319257
loss: 1.0336381196975708,grad_norm: 0.8332165391217936, iteration: 319258
loss: 1.049890398979187,grad_norm: 0.8559531111324721, iteration: 319259
loss: 0.9769663214683533,grad_norm: 0.7899077289917302, iteration: 319260
loss: 1.025719404220581,grad_norm: 0.8486532569066132, iteration: 319261
loss: 1.0363951921463013,grad_norm: 0.985123494898035, iteration: 319262
loss: 1.027308702468872,grad_norm: 0.9999991288868014, iteration: 319263
loss: 1.1384737491607666,grad_norm: 0.9162307526290413, iteration: 319264
loss: 1.0266082286834717,grad_norm: 0.7398298417224262, iteration: 319265
loss: 1.00297212600708,grad_norm: 0.8632749677767511, iteration: 319266
loss: 1.0242936611175537,grad_norm: 0.9569329512845262, iteration: 319267
loss: 1.029119610786438,grad_norm: 0.8975349954782758, iteration: 319268
loss: 0.9848875403404236,grad_norm: 0.9999998640975901, iteration: 319269
loss: 1.013056993484497,grad_norm: 0.8691028912059493, iteration: 319270
loss: 0.9644668698310852,grad_norm: 0.7435368570930017, iteration: 319271
loss: 1.018830418586731,grad_norm: 0.974636554387498, iteration: 319272
loss: 1.002844214439392,grad_norm: 0.8931993581032092, iteration: 319273
loss: 1.030073642730713,grad_norm: 0.7698064251741387, iteration: 319274
loss: 1.001038908958435,grad_norm: 0.8759334616417045, iteration: 319275
loss: 0.9850316047668457,grad_norm: 0.9474085336421076, iteration: 319276
loss: 0.9804196357727051,grad_norm: 0.8313388442902827, iteration: 319277
loss: 1.017174243927002,grad_norm: 0.9198572120018697, iteration: 319278
loss: 1.0223332643508911,grad_norm: 0.7845342945776794, iteration: 319279
loss: 0.9959713816642761,grad_norm: 0.856862684955908, iteration: 319280
loss: 0.9863317608833313,grad_norm: 0.9999990188046498, iteration: 319281
loss: 1.0212031602859497,grad_norm: 0.7483007937962101, iteration: 319282
loss: 1.051745891571045,grad_norm: 0.9999997773629675, iteration: 319283
loss: 1.0224597454071045,grad_norm: 0.9275788485505803, iteration: 319284
loss: 1.0264906883239746,grad_norm: 0.7049454736985549, iteration: 319285
loss: 1.0215867757797241,grad_norm: 0.9841218325788109, iteration: 319286
loss: 0.9805623888969421,grad_norm: 0.7444073719114341, iteration: 319287
loss: 1.0198206901550293,grad_norm: 0.7645531719157893, iteration: 319288
loss: 1.002455234527588,grad_norm: 0.8467393530067358, iteration: 319289
loss: 0.9755642414093018,grad_norm: 0.8143907687576623, iteration: 319290
loss: 1.0157506465911865,grad_norm: 0.958741368288381, iteration: 319291
loss: 1.0052778720855713,grad_norm: 0.948795989929483, iteration: 319292
loss: 1.044597864151001,grad_norm: 0.7378870550230635, iteration: 319293
loss: 1.0023974180221558,grad_norm: 0.8011397073940519, iteration: 319294
loss: 0.9928336143493652,grad_norm: 0.8667427273740692, iteration: 319295
loss: 1.005946159362793,grad_norm: 0.7755777759500111, iteration: 319296
loss: 1.1118046045303345,grad_norm: 0.8728500979042073, iteration: 319297
loss: 1.0911866426467896,grad_norm: 0.9999997463601665, iteration: 319298
loss: 0.99297034740448,grad_norm: 0.8352544434052586, iteration: 319299
loss: 1.0236841440200806,grad_norm: 0.999999135722205, iteration: 319300
loss: 1.0071049928665161,grad_norm: 0.8302670778026036, iteration: 319301
loss: 1.0088074207305908,grad_norm: 0.8643568582198966, iteration: 319302
loss: 1.0008171796798706,grad_norm: 0.8210007424190711, iteration: 319303
loss: 1.0404491424560547,grad_norm: 0.9999990418238981, iteration: 319304
loss: 0.9979242086410522,grad_norm: 0.8761133651508229, iteration: 319305
loss: 0.9802125096321106,grad_norm: 0.8115911886410192, iteration: 319306
loss: 0.9743045568466187,grad_norm: 0.8348424536856089, iteration: 319307
loss: 1.078600287437439,grad_norm: 0.855744032358472, iteration: 319308
loss: 0.9696336388587952,grad_norm: 0.7112080103221279, iteration: 319309
loss: 0.9806448817253113,grad_norm: 0.8411181093645763, iteration: 319310
loss: 0.9795839786529541,grad_norm: 0.7719953272078267, iteration: 319311
loss: 0.9887771606445312,grad_norm: 0.8861035372477285, iteration: 319312
loss: 1.0017012357711792,grad_norm: 0.9342025767106779, iteration: 319313
loss: 0.9727712273597717,grad_norm: 0.8517769037981726, iteration: 319314
loss: 1.0378490686416626,grad_norm: 0.9840845480535537, iteration: 319315
loss: 1.010542869567871,grad_norm: 0.8314242341424796, iteration: 319316
loss: 1.030784010887146,grad_norm: 0.8693666775127973, iteration: 319317
loss: 0.9764068126678467,grad_norm: 0.732076443547686, iteration: 319318
loss: 0.9508182406425476,grad_norm: 0.8784809600274281, iteration: 319319
loss: 0.9693747758865356,grad_norm: 0.7906117444939728, iteration: 319320
loss: 0.99627286195755,grad_norm: 0.7453717354104734, iteration: 319321
loss: 1.0050638914108276,grad_norm: 0.8979806679963633, iteration: 319322
loss: 1.0333220958709717,grad_norm: 0.7935279549783825, iteration: 319323
loss: 1.0070710182189941,grad_norm: 0.8799008936986841, iteration: 319324
loss: 1.0392439365386963,grad_norm: 0.8433623807771677, iteration: 319325
loss: 0.9646685719490051,grad_norm: 0.8821711983020936, iteration: 319326
loss: 1.004223346710205,grad_norm: 0.9030216544767729, iteration: 319327
loss: 1.0003197193145752,grad_norm: 0.687512196781298, iteration: 319328
loss: 0.9632980227470398,grad_norm: 0.9615762285193488, iteration: 319329
loss: 1.0082579851150513,grad_norm: 0.8411080584943376, iteration: 319330
loss: 0.9799934029579163,grad_norm: 0.8522789923361824, iteration: 319331
loss: 1.0297651290893555,grad_norm: 0.7355250554376452, iteration: 319332
loss: 0.9821773171424866,grad_norm: 0.8289449289060252, iteration: 319333
loss: 0.9781044125556946,grad_norm: 0.8050019326340848, iteration: 319334
loss: 1.0701236724853516,grad_norm: 0.9172763422552083, iteration: 319335
loss: 1.0207282304763794,grad_norm: 0.933698726943891, iteration: 319336
loss: 1.0193781852722168,grad_norm: 0.9999997372346301, iteration: 319337
loss: 0.9794995784759521,grad_norm: 0.624325260564923, iteration: 319338
loss: 0.998660683631897,grad_norm: 0.9081836029157376, iteration: 319339
loss: 0.9856269359588623,grad_norm: 0.9308757798463371, iteration: 319340
loss: 1.0212525129318237,grad_norm: 0.7009764802727237, iteration: 319341
loss: 1.0160706043243408,grad_norm: 0.9136583072488608, iteration: 319342
loss: 1.0430506467819214,grad_norm: 0.8882302711859259, iteration: 319343
loss: 0.96913081407547,grad_norm: 0.8777160823459834, iteration: 319344
loss: 1.0045020580291748,grad_norm: 0.7455891372386427, iteration: 319345
loss: 1.003067970275879,grad_norm: 0.9999992945789951, iteration: 319346
loss: 1.0200414657592773,grad_norm: 0.775374397137856, iteration: 319347
loss: 0.9677720069885254,grad_norm: 0.786864331755046, iteration: 319348
loss: 1.0272327661514282,grad_norm: 0.8288598970269963, iteration: 319349
loss: 0.9606826901435852,grad_norm: 0.8886017416099461, iteration: 319350
loss: 1.0572888851165771,grad_norm: 0.9999998647436349, iteration: 319351
loss: 0.9564791321754456,grad_norm: 0.8762495674451013, iteration: 319352
loss: 1.016330599784851,grad_norm: 0.6729421246685021, iteration: 319353
loss: 0.9971020817756653,grad_norm: 0.6902862360309718, iteration: 319354
loss: 1.0852683782577515,grad_norm: 0.9999999005359271, iteration: 319355
loss: 0.9983009099960327,grad_norm: 0.6999455815088658, iteration: 319356
loss: 1.0309866666793823,grad_norm: 0.9999996400408625, iteration: 319357
loss: 1.009057879447937,grad_norm: 0.9999992217332296, iteration: 319358
loss: 1.0897225141525269,grad_norm: 0.8370060230555518, iteration: 319359
loss: 1.0370351076126099,grad_norm: 0.9999992237736998, iteration: 319360
loss: 0.9979824423789978,grad_norm: 0.97750226552636, iteration: 319361
loss: 0.986194372177124,grad_norm: 0.8370026117633962, iteration: 319362
loss: 0.985967218875885,grad_norm: 0.8171084127435236, iteration: 319363
loss: 1.0340403318405151,grad_norm: 0.9999998357942441, iteration: 319364
loss: 0.994320809841156,grad_norm: 0.7550790772943945, iteration: 319365
loss: 1.0340242385864258,grad_norm: 0.8681405926928139, iteration: 319366
loss: 1.0141104459762573,grad_norm: 0.93535547608328, iteration: 319367
loss: 0.9962305426597595,grad_norm: 0.8161171394688487, iteration: 319368
loss: 1.0033400058746338,grad_norm: 0.7080478197515025, iteration: 319369
loss: 1.012877106666565,grad_norm: 0.9147425430067978, iteration: 319370
loss: 0.985306441783905,grad_norm: 0.6943224835956634, iteration: 319371
loss: 0.9666368365287781,grad_norm: 0.754944266177693, iteration: 319372
loss: 1.0043563842773438,grad_norm: 0.8430256966416846, iteration: 319373
loss: 1.002570629119873,grad_norm: 0.9479036871148896, iteration: 319374
loss: 0.9916326403617859,grad_norm: 0.9521480860072021, iteration: 319375
loss: 1.007595419883728,grad_norm: 0.9999998886990479, iteration: 319376
loss: 0.9777790307998657,grad_norm: 0.7420870221432352, iteration: 319377
loss: 1.0953835248947144,grad_norm: 0.9999991440821415, iteration: 319378
loss: 0.986376941204071,grad_norm: 0.9945480072214348, iteration: 319379
loss: 0.9448234438896179,grad_norm: 0.8243049014349053, iteration: 319380
loss: 1.022188425064087,grad_norm: 0.8835742488437075, iteration: 319381
loss: 0.9972395300865173,grad_norm: 0.7715321448864622, iteration: 319382
loss: 0.9710865616798401,grad_norm: 0.8824805648045597, iteration: 319383
loss: 0.9978604316711426,grad_norm: 0.7215291089710172, iteration: 319384
loss: 1.0227758884429932,grad_norm: 0.9999995448390584, iteration: 319385
loss: 1.0099143981933594,grad_norm: 0.8802072955384176, iteration: 319386
loss: 1.0418686866760254,grad_norm: 0.7959107441013265, iteration: 319387
loss: 0.9475437998771667,grad_norm: 0.7271260762798718, iteration: 319388
loss: 1.0034797191619873,grad_norm: 0.9999994687590342, iteration: 319389
loss: 0.9811744689941406,grad_norm: 0.8055256246243172, iteration: 319390
loss: 0.9970360994338989,grad_norm: 0.8478286500971369, iteration: 319391
loss: 1.015170931816101,grad_norm: 0.7875490873512901, iteration: 319392
loss: 0.9783415794372559,grad_norm: 0.8243518428373349, iteration: 319393
loss: 0.9874070882797241,grad_norm: 0.8408653647973513, iteration: 319394
loss: 1.0336140394210815,grad_norm: 0.999999329760518, iteration: 319395
loss: 1.0407873392105103,grad_norm: 0.8746305954386246, iteration: 319396
loss: 1.0129246711730957,grad_norm: 0.8674322079183984, iteration: 319397
loss: 1.063689947128296,grad_norm: 0.9999992644815447, iteration: 319398
loss: 1.0418356657028198,grad_norm: 0.7773283369408558, iteration: 319399
loss: 1.0210318565368652,grad_norm: 0.9252982466801215, iteration: 319400
loss: 0.9804785251617432,grad_norm: 0.9591530017731923, iteration: 319401
loss: 1.0134644508361816,grad_norm: 0.9999998914876574, iteration: 319402
loss: 0.975762665271759,grad_norm: 0.9747505005657471, iteration: 319403
loss: 0.936337947845459,grad_norm: 0.9448141877563916, iteration: 319404
loss: 1.0025815963745117,grad_norm: 0.8176846209859572, iteration: 319405
loss: 1.0569182634353638,grad_norm: 0.9177246331337845, iteration: 319406
loss: 1.1074222326278687,grad_norm: 0.999999997851227, iteration: 319407
loss: 0.9947092533111572,grad_norm: 0.7763198069631079, iteration: 319408
loss: 0.9988551139831543,grad_norm: 0.7159668887862409, iteration: 319409
loss: 1.0141481161117554,grad_norm: 0.75161393511975, iteration: 319410
loss: 0.9955414533615112,grad_norm: 0.6671476440073185, iteration: 319411
loss: 1.017399787902832,grad_norm: 0.9999991093351216, iteration: 319412
loss: 0.9863345623016357,grad_norm: 0.9242843281707656, iteration: 319413
loss: 0.9991738200187683,grad_norm: 0.731340046161316, iteration: 319414
loss: 0.9780049920082092,grad_norm: 0.7515860224951575, iteration: 319415
loss: 1.0105315446853638,grad_norm: 0.9999992380031394, iteration: 319416
loss: 1.0062661170959473,grad_norm: 0.9908372014747504, iteration: 319417
loss: 1.0042773485183716,grad_norm: 0.8643747920687023, iteration: 319418
loss: 1.0352356433868408,grad_norm: 0.7194040420688216, iteration: 319419
loss: 1.0401229858398438,grad_norm: 0.978521406814353, iteration: 319420
loss: 0.9857429265975952,grad_norm: 0.911132866467754, iteration: 319421
loss: 1.0289525985717773,grad_norm: 0.7193786616640566, iteration: 319422
loss: 0.9870589971542358,grad_norm: 0.794229633431909, iteration: 319423
loss: 0.9757897853851318,grad_norm: 0.8483055583975563, iteration: 319424
loss: 1.0012991428375244,grad_norm: 0.8117508970514142, iteration: 319425
loss: 1.012439250946045,grad_norm: 0.9992519930403381, iteration: 319426
loss: 0.9752392768859863,grad_norm: 0.8307347198817381, iteration: 319427
loss: 0.9836015105247498,grad_norm: 0.7377195621705166, iteration: 319428
loss: 0.9787842631340027,grad_norm: 0.8948921425987565, iteration: 319429
loss: 0.9890958070755005,grad_norm: 0.8853487864999616, iteration: 319430
loss: 1.005536675453186,grad_norm: 0.9999993618736329, iteration: 319431
loss: 1.068611741065979,grad_norm: 0.950987681983128, iteration: 319432
loss: 0.9878212809562683,grad_norm: 0.999999239027766, iteration: 319433
loss: 0.9908291697502136,grad_norm: 0.7583165400415927, iteration: 319434
loss: 1.0100961923599243,grad_norm: 0.8814414279385437, iteration: 319435
loss: 1.0673832893371582,grad_norm: 0.7030207004482424, iteration: 319436
loss: 0.9682685136795044,grad_norm: 0.9103754313725014, iteration: 319437
loss: 1.002905249595642,grad_norm: 0.8988673877364026, iteration: 319438
loss: 1.0155680179595947,grad_norm: 0.848555350154747, iteration: 319439
loss: 1.017108678817749,grad_norm: 0.7368587976234912, iteration: 319440
loss: 1.0328400135040283,grad_norm: 0.9999996608273771, iteration: 319441
loss: 1.0415219068527222,grad_norm: 0.8305067568122814, iteration: 319442
loss: 0.9767677187919617,grad_norm: 0.9999991394216649, iteration: 319443
loss: 1.0769672393798828,grad_norm: 0.9999993134854862, iteration: 319444
loss: 0.9600749015808105,grad_norm: 0.8760771756615153, iteration: 319445
loss: 1.0310884714126587,grad_norm: 0.8550271073741634, iteration: 319446
loss: 1.086682915687561,grad_norm: 0.9999999198140347, iteration: 319447
loss: 1.0149763822555542,grad_norm: 0.9010974003399417, iteration: 319448
loss: 1.0014420747756958,grad_norm: 0.7424097798087123, iteration: 319449
loss: 0.988947331905365,grad_norm: 0.7625485326379295, iteration: 319450
loss: 1.0353556871414185,grad_norm: 0.9999993776896401, iteration: 319451
loss: 1.0052329301834106,grad_norm: 0.8740179534233206, iteration: 319452
loss: 0.9969869256019592,grad_norm: 0.9999994249314933, iteration: 319453
loss: 0.9933090209960938,grad_norm: 0.751182374774162, iteration: 319454
loss: 0.9739876389503479,grad_norm: 0.8075965841499285, iteration: 319455
loss: 0.9822829961776733,grad_norm: 0.9999998644454229, iteration: 319456
loss: 1.0256094932556152,grad_norm: 0.9999994991698158, iteration: 319457
loss: 0.9998361468315125,grad_norm: 0.8599396166946555, iteration: 319458
loss: 0.987544059753418,grad_norm: 0.8728258526700906, iteration: 319459
loss: 1.0684698820114136,grad_norm: 0.99999908381756, iteration: 319460
loss: 0.9976997375488281,grad_norm: 0.8058049730439778, iteration: 319461
loss: 1.0200365781784058,grad_norm: 0.9134764244219584, iteration: 319462
loss: 0.9725002646446228,grad_norm: 0.9204474501786577, iteration: 319463
loss: 1.0079344511032104,grad_norm: 0.8395570032042348, iteration: 319464
loss: 1.0362191200256348,grad_norm: 0.8114439623749161, iteration: 319465
loss: 1.0126935243606567,grad_norm: 0.7242873261315717, iteration: 319466
loss: 1.0043623447418213,grad_norm: 0.7156675673913536, iteration: 319467
loss: 1.016219973564148,grad_norm: 0.855904890520556, iteration: 319468
loss: 1.0250517129898071,grad_norm: 0.9073844119485036, iteration: 319469
loss: 0.9953998327255249,grad_norm: 0.9999990747109, iteration: 319470
loss: 1.0030673742294312,grad_norm: 0.8288545600695404, iteration: 319471
loss: 1.0228444337844849,grad_norm: 0.7260728110951575, iteration: 319472
loss: 0.9708053469657898,grad_norm: 0.8301213194084136, iteration: 319473
loss: 1.0212626457214355,grad_norm: 0.9999991697767107, iteration: 319474
loss: 1.0241752862930298,grad_norm: 0.7712371384986174, iteration: 319475
loss: 1.030234456062317,grad_norm: 0.8768965421555266, iteration: 319476
loss: 1.0043635368347168,grad_norm: 0.9999992274320426, iteration: 319477
loss: 0.9894844889640808,grad_norm: 0.9999991292022301, iteration: 319478
loss: 0.9533116221427917,grad_norm: 0.7794008886088828, iteration: 319479
loss: 1.058048129081726,grad_norm: 0.999999451495779, iteration: 319480
loss: 0.9463359117507935,grad_norm: 0.9174778225489739, iteration: 319481
loss: 1.0046067237854004,grad_norm: 0.9191148509771203, iteration: 319482
loss: 1.012200117111206,grad_norm: 0.8510338824116377, iteration: 319483
loss: 1.0084325075149536,grad_norm: 0.7392939322146768, iteration: 319484
loss: 1.0194560289382935,grad_norm: 0.9999990789993429, iteration: 319485
loss: 1.0117237567901611,grad_norm: 0.9999992570999101, iteration: 319486
loss: 1.0377953052520752,grad_norm: 0.7564230325742156, iteration: 319487
loss: 1.0220415592193604,grad_norm: 0.9999990595862824, iteration: 319488
loss: 1.0309847593307495,grad_norm: 0.7457461006145927, iteration: 319489
loss: 0.9438509941101074,grad_norm: 0.7394504153993152, iteration: 319490
loss: 1.0354926586151123,grad_norm: 0.930950630531116, iteration: 319491
loss: 1.0260145664215088,grad_norm: 0.8865139963828185, iteration: 319492
loss: 1.047288179397583,grad_norm: 0.9999994187710881, iteration: 319493
loss: 1.0007652044296265,grad_norm: 0.9999996460286275, iteration: 319494
loss: 1.0889768600463867,grad_norm: 0.828299190259391, iteration: 319495
loss: 0.9868204593658447,grad_norm: 0.9999991157387794, iteration: 319496
loss: 1.0051484107971191,grad_norm: 0.8944208322510253, iteration: 319497
loss: 0.9963576197624207,grad_norm: 0.9406943563364221, iteration: 319498
loss: 0.9800549149513245,grad_norm: 0.8063478204684262, iteration: 319499
loss: 0.9688190817832947,grad_norm: 0.824917331720839, iteration: 319500
loss: 0.9646007418632507,grad_norm: 0.814245758361748, iteration: 319501
loss: 1.0188634395599365,grad_norm: 0.9999993719949835, iteration: 319502
loss: 1.023769736289978,grad_norm: 0.9999997749093262, iteration: 319503
loss: 0.9785476326942444,grad_norm: 0.8031415048847182, iteration: 319504
loss: 0.9915510416030884,grad_norm: 0.7451227017210016, iteration: 319505
loss: 1.0061149597167969,grad_norm: 0.8056073866752436, iteration: 319506
loss: 1.0033518075942993,grad_norm: 0.7715822360884936, iteration: 319507
loss: 1.0162261724472046,grad_norm: 0.9523019200869741, iteration: 319508
loss: 0.9665637016296387,grad_norm: 0.9999991811727431, iteration: 319509
loss: 0.9875772595405579,grad_norm: 0.8003676266776643, iteration: 319510
loss: 0.9205651879310608,grad_norm: 0.8761772579808237, iteration: 319511
loss: 0.9907196760177612,grad_norm: 0.7722173161941336, iteration: 319512
loss: 1.0294874906539917,grad_norm: 0.999999899374897, iteration: 319513
loss: 1.0366809368133545,grad_norm: 0.8336813269122921, iteration: 319514
loss: 0.9613242745399475,grad_norm: 0.8088058443148332, iteration: 319515
loss: 1.010319471359253,grad_norm: 0.9999997270542283, iteration: 319516
loss: 1.0002281665802002,grad_norm: 0.789595033454438, iteration: 319517
loss: 0.9849458932876587,grad_norm: 0.9455653012383816, iteration: 319518
loss: 0.9845326542854309,grad_norm: 0.7335173681363938, iteration: 319519
loss: 1.0068426132202148,grad_norm: 0.9874591397965056, iteration: 319520
loss: 1.0348436832427979,grad_norm: 0.6896646650061504, iteration: 319521
loss: 0.9855011105537415,grad_norm: 0.8910783141026822, iteration: 319522
loss: 1.0125691890716553,grad_norm: 0.9656008868353156, iteration: 319523
loss: 0.9815436005592346,grad_norm: 0.7879232366107201, iteration: 319524
loss: 1.027980923652649,grad_norm: 0.9412544009366695, iteration: 319525
loss: 0.9647602438926697,grad_norm: 0.9999991468699895, iteration: 319526
loss: 0.9977685809135437,grad_norm: 0.836874475870212, iteration: 319527
loss: 0.9474282264709473,grad_norm: 0.8256764885375465, iteration: 319528
loss: 0.9300866723060608,grad_norm: 0.9691808997485593, iteration: 319529
loss: 0.972594678401947,grad_norm: 0.9999990866825006, iteration: 319530
loss: 0.9950256943702698,grad_norm: 0.8174324623293252, iteration: 319531
loss: 1.040192723274231,grad_norm: 0.999999268976881, iteration: 319532
loss: 1.0154248476028442,grad_norm: 0.7554217372464972, iteration: 319533
loss: 1.0155658721923828,grad_norm: 0.975767072353762, iteration: 319534
loss: 1.0288914442062378,grad_norm: 0.7188930573394662, iteration: 319535
loss: 1.0079811811447144,grad_norm: 0.9036279087717606, iteration: 319536
loss: 0.999555766582489,grad_norm: 0.7373513525484207, iteration: 319537
loss: 0.9970781803131104,grad_norm: 0.8159566553219016, iteration: 319538
loss: 0.9917063117027283,grad_norm: 0.9999991459080787, iteration: 319539
loss: 1.0116862058639526,grad_norm: 0.853747912726015, iteration: 319540
loss: 1.1195276975631714,grad_norm: 0.9999999210020707, iteration: 319541
loss: 1.018791675567627,grad_norm: 0.7662943151215448, iteration: 319542
loss: 0.9646309018135071,grad_norm: 0.9408562282815066, iteration: 319543
loss: 1.0329893827438354,grad_norm: 0.8891348279984846, iteration: 319544
loss: 0.9786134958267212,grad_norm: 0.99999929039575, iteration: 319545
loss: 1.027645230293274,grad_norm: 0.8562839849628393, iteration: 319546
loss: 1.0130857229232788,grad_norm: 0.7251049818738519, iteration: 319547
loss: 1.0085078477859497,grad_norm: 0.7528541253328519, iteration: 319548
loss: 0.9753263592720032,grad_norm: 0.8997269451213031, iteration: 319549
loss: 1.0253154039382935,grad_norm: 0.7964104169353275, iteration: 319550
loss: 0.9996667504310608,grad_norm: 0.9999994915905506, iteration: 319551
loss: 1.100825309753418,grad_norm: 0.9999993038570375, iteration: 319552
loss: 0.9753552675247192,grad_norm: 0.8674073730903051, iteration: 319553
loss: 0.992468535900116,grad_norm: 0.9117174281589577, iteration: 319554
loss: 0.9863379001617432,grad_norm: 0.7653884671926898, iteration: 319555
loss: 1.0931921005249023,grad_norm: 0.9999999255886715, iteration: 319556
loss: 0.9902269840240479,grad_norm: 0.8757249364095934, iteration: 319557
loss: 1.010502815246582,grad_norm: 0.9999990803573425, iteration: 319558
loss: 1.00355064868927,grad_norm: 0.8312176813449845, iteration: 319559
loss: 0.9758962392807007,grad_norm: 0.988887559920584, iteration: 319560
loss: 1.0385841131210327,grad_norm: 0.9999998416273919, iteration: 319561
loss: 0.9832080602645874,grad_norm: 0.7398645303678436, iteration: 319562
loss: 1.0169183015823364,grad_norm: 0.8596863013397071, iteration: 319563
loss: 1.0280416011810303,grad_norm: 0.9999991061016689, iteration: 319564
loss: 0.98204106092453,grad_norm: 0.8347119892551789, iteration: 319565
loss: 0.9926101565361023,grad_norm: 0.8344553380158117, iteration: 319566
loss: 0.9772897958755493,grad_norm: 0.9858817072739241, iteration: 319567
loss: 1.0338366031646729,grad_norm: 0.9999999770923695, iteration: 319568
loss: 1.0317233800888062,grad_norm: 0.8479964696689517, iteration: 319569
loss: 0.9994708895683289,grad_norm: 0.8660795446735868, iteration: 319570
loss: 1.1468026638031006,grad_norm: 0.999999458751571, iteration: 319571
loss: 1.0357104539871216,grad_norm: 0.8300841899428023, iteration: 319572
loss: 1.0024700164794922,grad_norm: 0.7493252289012171, iteration: 319573
loss: 1.0308698415756226,grad_norm: 0.8955489051354889, iteration: 319574
loss: 1.0253468751907349,grad_norm: 0.952057219654269, iteration: 319575
loss: 1.0222737789154053,grad_norm: 0.8774243228558755, iteration: 319576
loss: 1.0525842905044556,grad_norm: 0.9999999799511812, iteration: 319577
loss: 0.9749380350112915,grad_norm: 0.9698962229742211, iteration: 319578
loss: 0.9787153005599976,grad_norm: 0.8110413905076101, iteration: 319579
loss: 1.0205681324005127,grad_norm: 0.999999222846881, iteration: 319580
loss: 1.022868275642395,grad_norm: 0.9999991423108446, iteration: 319581
loss: 1.053648591041565,grad_norm: 0.9999993068543096, iteration: 319582
loss: 1.009826421737671,grad_norm: 0.8127855369877298, iteration: 319583
loss: 1.1437451839447021,grad_norm: 0.8636887555100622, iteration: 319584
loss: 0.9966010451316833,grad_norm: 0.7817314940446488, iteration: 319585
loss: 1.0100914239883423,grad_norm: 0.9999990948145581, iteration: 319586
loss: 1.0301377773284912,grad_norm: 0.9999998680497907, iteration: 319587
loss: 0.9948017001152039,grad_norm: 0.8499110953360934, iteration: 319588
loss: 0.9876587986946106,grad_norm: 0.7523705682481051, iteration: 319589
loss: 1.029930830001831,grad_norm: 0.8665660425469696, iteration: 319590
loss: 1.0251744985580444,grad_norm: 0.9008182006957024, iteration: 319591
loss: 1.0111157894134521,grad_norm: 0.7924113493610594, iteration: 319592
loss: 0.9928833246231079,grad_norm: 0.8814745819899896, iteration: 319593
loss: 0.9669881463050842,grad_norm: 0.8973245897766341, iteration: 319594
loss: 0.9879966378211975,grad_norm: 0.9224227423885311, iteration: 319595
loss: 1.0065609216690063,grad_norm: 0.9999989748472307, iteration: 319596
loss: 0.9451577663421631,grad_norm: 0.7337532421421805, iteration: 319597
loss: 0.975074827671051,grad_norm: 0.9999990534741685, iteration: 319598
loss: 0.9720894694328308,grad_norm: 0.7980067686990928, iteration: 319599
loss: 1.0254701375961304,grad_norm: 0.8427053946770312, iteration: 319600
loss: 1.0288180112838745,grad_norm: 0.9999992097708391, iteration: 319601
loss: 1.0449854135513306,grad_norm: 1.0000001064841022, iteration: 319602
loss: 0.9917328953742981,grad_norm: 0.7958759351923678, iteration: 319603
loss: 1.0764671564102173,grad_norm: 0.9999993805909523, iteration: 319604
loss: 0.989288330078125,grad_norm: 0.9287608848820533, iteration: 319605
loss: 1.0133562088012695,grad_norm: 0.7778210520695459, iteration: 319606
loss: 0.9969608783721924,grad_norm: 0.8013304530907771, iteration: 319607
loss: 0.9588468670845032,grad_norm: 0.7736390395331222, iteration: 319608
loss: 1.0295377969741821,grad_norm: 0.9833417924728277, iteration: 319609
loss: 1.0249531269073486,grad_norm: 0.9738249479937712, iteration: 319610
loss: 1.0225008726119995,grad_norm: 0.9442162943752346, iteration: 319611
loss: 1.130003809928894,grad_norm: 0.9674751381153643, iteration: 319612
loss: 1.007582426071167,grad_norm: 0.859296535804958, iteration: 319613
loss: 0.9944698810577393,grad_norm: 0.8524351134200349, iteration: 319614
loss: 1.0046406984329224,grad_norm: 0.9298389828824093, iteration: 319615
loss: 1.024327039718628,grad_norm: 0.8526136359906156, iteration: 319616
loss: 0.9805154204368591,grad_norm: 0.8175642493332029, iteration: 319617
loss: 1.0099421739578247,grad_norm: 0.8352007728745696, iteration: 319618
loss: 1.01659095287323,grad_norm: 0.8355314200134293, iteration: 319619
loss: 0.9811360239982605,grad_norm: 0.8997355687641418, iteration: 319620
loss: 0.9946590662002563,grad_norm: 0.739831830428709, iteration: 319621
loss: 0.9811450839042664,grad_norm: 0.8983568397680799, iteration: 319622
loss: 0.9912087321281433,grad_norm: 0.9951750922414072, iteration: 319623
loss: 0.991826057434082,grad_norm: 0.8700792941703199, iteration: 319624
loss: 1.056060791015625,grad_norm: 0.9999990241501796, iteration: 319625
loss: 1.0359737873077393,grad_norm: 0.8405751986304844, iteration: 319626
loss: 1.0213055610656738,grad_norm: 0.9999995704484269, iteration: 319627
loss: 1.011950135231018,grad_norm: 0.8426017447766263, iteration: 319628
loss: 1.0120559930801392,grad_norm: 0.7582012745679733, iteration: 319629
loss: 1.0113109350204468,grad_norm: 0.8720979455542119, iteration: 319630
loss: 1.0268135070800781,grad_norm: 0.9530204200570748, iteration: 319631
loss: 1.0090726613998413,grad_norm: 0.8118260128607561, iteration: 319632
loss: 0.9895773530006409,grad_norm: 0.9367166999385188, iteration: 319633
loss: 0.979947566986084,grad_norm: 0.9999992271728692, iteration: 319634
loss: 0.9477842450141907,grad_norm: 0.7317710759071497, iteration: 319635
loss: 0.9799642562866211,grad_norm: 0.7273651350713745, iteration: 319636
loss: 1.0136228799819946,grad_norm: 0.82463737418585, iteration: 319637
loss: 0.9751285910606384,grad_norm: 0.8325910399504469, iteration: 319638
loss: 1.0282831192016602,grad_norm: 0.9999991144932053, iteration: 319639
loss: 0.9685817956924438,grad_norm: 0.8424146676474654, iteration: 319640
loss: 1.0346440076828003,grad_norm: 0.9999991171901303, iteration: 319641
loss: 1.0071488618850708,grad_norm: 0.9398240982493853, iteration: 319642
loss: 0.9737597107887268,grad_norm: 0.9226518751330749, iteration: 319643
loss: 1.0056045055389404,grad_norm: 0.7676655653356939, iteration: 319644
loss: 0.9970160722732544,grad_norm: 0.8561746564957153, iteration: 319645
loss: 0.9817398190498352,grad_norm: 0.7453032640449817, iteration: 319646
loss: 0.9722084403038025,grad_norm: 0.848969708742979, iteration: 319647
loss: 1.0085196495056152,grad_norm: 0.8513386009750065, iteration: 319648
loss: 1.033444881439209,grad_norm: 0.8355023655735428, iteration: 319649
loss: 1.019350528717041,grad_norm: 0.9697398793761695, iteration: 319650
loss: 0.9894437789916992,grad_norm: 0.9335114390793442, iteration: 319651
loss: 1.0186153650283813,grad_norm: 0.8980677225250108, iteration: 319652
loss: 1.012060284614563,grad_norm: 0.9474209867521333, iteration: 319653
loss: 0.9913766980171204,grad_norm: 0.9013702138922672, iteration: 319654
loss: 0.9999812841415405,grad_norm: 0.7456716954166653, iteration: 319655
loss: 0.9913317561149597,grad_norm: 0.8384727717149862, iteration: 319656
loss: 1.0220749378204346,grad_norm: 0.9689926062958154, iteration: 319657
loss: 0.9770480394363403,grad_norm: 0.8224864692392386, iteration: 319658
loss: 1.0007994174957275,grad_norm: 0.9999990351128842, iteration: 319659
loss: 0.9884922504425049,grad_norm: 0.9210003003774998, iteration: 319660
loss: 1.0327686071395874,grad_norm: 0.9999993484136129, iteration: 319661
loss: 0.9944266676902771,grad_norm: 0.8623806816147328, iteration: 319662
loss: 0.9551981091499329,grad_norm: 0.9332230920055303, iteration: 319663
loss: 1.0478793382644653,grad_norm: 0.8878140590623894, iteration: 319664
loss: 1.0358953475952148,grad_norm: 0.999999577586019, iteration: 319665
loss: 0.9932354688644409,grad_norm: 0.8254777883987586, iteration: 319666
loss: 1.006110668182373,grad_norm: 0.9601007866580527, iteration: 319667
loss: 0.9968186020851135,grad_norm: 0.782025482647388, iteration: 319668
loss: 1.0211671590805054,grad_norm: 0.8725072749365657, iteration: 319669
loss: 0.9903590679168701,grad_norm: 0.893178978872304, iteration: 319670
loss: 0.9850659966468811,grad_norm: 0.7888209185997929, iteration: 319671
loss: 0.9822994470596313,grad_norm: 0.8227321038499692, iteration: 319672
loss: 1.0816750526428223,grad_norm: 0.9999993559740731, iteration: 319673
loss: 1.0125843286514282,grad_norm: 0.7912241354529049, iteration: 319674
loss: 0.9640383124351501,grad_norm: 0.8213453216471657, iteration: 319675
loss: 1.1484720706939697,grad_norm: 0.9903514890449107, iteration: 319676
loss: 1.0251798629760742,grad_norm: 0.8129032875639493, iteration: 319677
loss: 0.9903684258460999,grad_norm: 0.8852779021294069, iteration: 319678
loss: 0.9940617680549622,grad_norm: 0.9999990098939826, iteration: 319679
loss: 0.9831759929656982,grad_norm: 0.7322466552641878, iteration: 319680
loss: 1.0407543182373047,grad_norm: 0.9505621104340317, iteration: 319681
loss: 0.9732418060302734,grad_norm: 0.8866859874203881, iteration: 319682
loss: 0.9883877038955688,grad_norm: 0.8400004171684178, iteration: 319683
loss: 1.0208004713058472,grad_norm: 0.9999996993924994, iteration: 319684
loss: 1.0067070722579956,grad_norm: 0.7305020230601444, iteration: 319685
loss: 1.0334484577178955,grad_norm: 0.9012901737727882, iteration: 319686
loss: 0.9913633465766907,grad_norm: 0.8479939630880322, iteration: 319687
loss: 1.0009533166885376,grad_norm: 0.747918146474939, iteration: 319688
loss: 0.9916951060295105,grad_norm: 0.7335682548665335, iteration: 319689
loss: 0.9996209740638733,grad_norm: 0.9157041674991696, iteration: 319690
loss: 0.9561917781829834,grad_norm: 0.6373993341574321, iteration: 319691
loss: 0.991640031337738,grad_norm: 0.6784066780080996, iteration: 319692
loss: 1.0163685083389282,grad_norm: 0.6969233489564312, iteration: 319693
loss: 1.028078317642212,grad_norm: 0.8661191633339251, iteration: 319694
loss: 1.0075417757034302,grad_norm: 0.9999992772887664, iteration: 319695
loss: 1.022051453590393,grad_norm: 0.7519355989214375, iteration: 319696
loss: 0.9378374814987183,grad_norm: 0.7983289306255913, iteration: 319697
loss: 1.04159414768219,grad_norm: 0.9999992460592309, iteration: 319698
loss: 1.0074403285980225,grad_norm: 0.6629637294190602, iteration: 319699
loss: 1.0126140117645264,grad_norm: 0.7585031510565462, iteration: 319700
loss: 1.0563222169876099,grad_norm: 0.9999994098824418, iteration: 319701
loss: 0.9829905033111572,grad_norm: 0.942428106037134, iteration: 319702
loss: 1.0665379762649536,grad_norm: 0.9999993198564651, iteration: 319703
loss: 1.0367013216018677,grad_norm: 0.8877528180995353, iteration: 319704
loss: 0.9873986840248108,grad_norm: 0.7896641618659532, iteration: 319705
loss: 0.9526503682136536,grad_norm: 0.9165985527791624, iteration: 319706
loss: 1.011496901512146,grad_norm: 0.796623825022611, iteration: 319707
loss: 1.0772963762283325,grad_norm: 0.8944195319559569, iteration: 319708
loss: 0.955390989780426,grad_norm: 0.8422709358326071, iteration: 319709
loss: 1.0258334875106812,grad_norm: 0.8016272169722947, iteration: 319710
loss: 0.994790256023407,grad_norm: 0.8952577524468338, iteration: 319711
loss: 0.9766573905944824,grad_norm: 0.8542730540630359, iteration: 319712
loss: 1.0500173568725586,grad_norm: 0.9999996218505449, iteration: 319713
loss: 1.032781720161438,grad_norm: 0.9999991454922703, iteration: 319714
loss: 1.0885826349258423,grad_norm: 0.941563030682696, iteration: 319715
loss: 0.9889563918113708,grad_norm: 0.9928480747613538, iteration: 319716
loss: 1.0262551307678223,grad_norm: 0.8588478512281614, iteration: 319717
loss: 1.0362770557403564,grad_norm: 0.9860950103377661, iteration: 319718
loss: 1.0060622692108154,grad_norm: 0.8059050621373981, iteration: 319719
loss: 1.0218896865844727,grad_norm: 0.9999991211344018, iteration: 319720
loss: 1.0443530082702637,grad_norm: 0.7970222811761347, iteration: 319721
loss: 1.055617094039917,grad_norm: 0.9999997521318742, iteration: 319722
loss: 0.9620929956436157,grad_norm: 0.824026767441682, iteration: 319723
loss: 0.9920634031295776,grad_norm: 0.7622848870550232, iteration: 319724
loss: 1.031985878944397,grad_norm: 0.8814091720421033, iteration: 319725
loss: 1.0438395738601685,grad_norm: 0.9529765705186576, iteration: 319726
loss: 0.9846735596656799,grad_norm: 0.9623478751349857, iteration: 319727
loss: 1.00515878200531,grad_norm: 0.938153437861448, iteration: 319728
loss: 0.9822785258293152,grad_norm: 0.7239746435462505, iteration: 319729
loss: 1.0056771039962769,grad_norm: 0.6514750022977039, iteration: 319730
loss: 1.0290024280548096,grad_norm: 0.9999995778389122, iteration: 319731
loss: 1.0061603784561157,grad_norm: 0.6996970084358225, iteration: 319732
loss: 0.977853000164032,grad_norm: 0.9752529500159896, iteration: 319733
loss: 1.020382285118103,grad_norm: 0.8079932073289962, iteration: 319734
loss: 1.0005183219909668,grad_norm: 0.8532450447124655, iteration: 319735
loss: 0.9875383973121643,grad_norm: 0.9999989278541636, iteration: 319736
loss: 0.9660078883171082,grad_norm: 0.8217281099826276, iteration: 319737
loss: 0.9904777407646179,grad_norm: 0.8598141700364842, iteration: 319738
loss: 1.1989967823028564,grad_norm: 0.9999994079925123, iteration: 319739
loss: 0.9825063943862915,grad_norm: 0.7078193571972401, iteration: 319740
loss: 0.9664372801780701,grad_norm: 0.8246586401137707, iteration: 319741
loss: 0.9721119403839111,grad_norm: 0.7780942686217347, iteration: 319742
loss: 0.9945089817047119,grad_norm: 0.9572535149193021, iteration: 319743
loss: 0.9984484314918518,grad_norm: 0.9240050553129736, iteration: 319744
loss: 0.9817590117454529,grad_norm: 0.7102770292963612, iteration: 319745
loss: 1.0198156833648682,grad_norm: 0.915212551369089, iteration: 319746
loss: 1.0060551166534424,grad_norm: 0.8581577794770474, iteration: 319747
loss: 0.9867526292800903,grad_norm: 0.8513604135010276, iteration: 319748
loss: 0.9972522258758545,grad_norm: 0.9999996126075488, iteration: 319749
loss: 1.0049606561660767,grad_norm: 0.7632675719681524, iteration: 319750
loss: 0.9928977489471436,grad_norm: 0.8605599046524158, iteration: 319751
loss: 1.0573506355285645,grad_norm: 0.9443610124530251, iteration: 319752
loss: 0.9944624900817871,grad_norm: 0.8638327741622639, iteration: 319753
loss: 1.0087076425552368,grad_norm: 0.9999998588387851, iteration: 319754
loss: 0.9895281195640564,grad_norm: 0.7722555419703728, iteration: 319755
loss: 0.9758867621421814,grad_norm: 0.7919066559079796, iteration: 319756
loss: 1.0124130249023438,grad_norm: 0.9433062997361839, iteration: 319757
loss: 0.9783424735069275,grad_norm: 0.7979969821213162, iteration: 319758
loss: 0.9832912683486938,grad_norm: 0.886317102249847, iteration: 319759
loss: 0.9636186957359314,grad_norm: 0.8355552932099851, iteration: 319760
loss: 0.9975797533988953,grad_norm: 0.9999992666765793, iteration: 319761
loss: 0.9990624785423279,grad_norm: 0.8450964718165315, iteration: 319762
loss: 1.0079270601272583,grad_norm: 0.7603865394547484, iteration: 319763
loss: 0.9697911143302917,grad_norm: 0.7870952739692415, iteration: 319764
loss: 1.0532864332199097,grad_norm: 0.8817290930070866, iteration: 319765
loss: 0.995398223400116,grad_norm: 0.7128430245479793, iteration: 319766
loss: 0.9951469898223877,grad_norm: 0.9999990339394704, iteration: 319767
loss: 0.9836029410362244,grad_norm: 0.7974886853866344, iteration: 319768
loss: 1.0302042961120605,grad_norm: 0.8571525822019013, iteration: 319769
loss: 0.9991139769554138,grad_norm: 0.8937896683649934, iteration: 319770
loss: 0.9915788769721985,grad_norm: 0.8716433052153282, iteration: 319771
loss: 1.034732460975647,grad_norm: 0.8392383852511994, iteration: 319772
loss: 0.9885681867599487,grad_norm: 0.7668336928906172, iteration: 319773
loss: 1.0161620378494263,grad_norm: 0.9249382491679589, iteration: 319774
loss: 0.9711093306541443,grad_norm: 0.8900849928223814, iteration: 319775
loss: 1.0024614334106445,grad_norm: 0.9999998882005577, iteration: 319776
loss: 1.032888650894165,grad_norm: 0.8255881490399114, iteration: 319777
loss: 1.0265724658966064,grad_norm: 0.9999997323752683, iteration: 319778
loss: 0.9837996363639832,grad_norm: 0.7738897458673457, iteration: 319779
loss: 1.0182631015777588,grad_norm: 0.9999992117262295, iteration: 319780
loss: 1.0139998197555542,grad_norm: 0.797360899532713, iteration: 319781
loss: 1.00055730342865,grad_norm: 0.8650111019391699, iteration: 319782
loss: 1.0019583702087402,grad_norm: 0.9217458365792544, iteration: 319783
loss: 1.0052552223205566,grad_norm: 0.9707826650624485, iteration: 319784
loss: 0.9500679969787598,grad_norm: 0.9510376253498054, iteration: 319785
loss: 0.9826221466064453,grad_norm: 0.7866696360321489, iteration: 319786
loss: 1.201930046081543,grad_norm: 0.999999552887553, iteration: 319787
loss: 1.00867760181427,grad_norm: 0.8646119486515794, iteration: 319788
loss: 1.004582405090332,grad_norm: 0.9999999043560527, iteration: 319789
loss: 1.0111714601516724,grad_norm: 0.9999991584707497, iteration: 319790
loss: 1.0144374370574951,grad_norm: 0.8438110687109287, iteration: 319791
loss: 0.9429674744606018,grad_norm: 0.770276011002337, iteration: 319792
loss: 1.0354286432266235,grad_norm: 0.9047514290776885, iteration: 319793
loss: 1.0779345035552979,grad_norm: 0.9999991245986098, iteration: 319794
loss: 0.9783185124397278,grad_norm: 0.9100924111676556, iteration: 319795
loss: 1.0218334197998047,grad_norm: 0.9994457094276902, iteration: 319796
loss: 1.1782041788101196,grad_norm: 0.8866124236644887, iteration: 319797
loss: 1.0524792671203613,grad_norm: 0.9802171997688063, iteration: 319798
loss: 0.9906737804412842,grad_norm: 0.8789687092617317, iteration: 319799
loss: 0.9663543105125427,grad_norm: 0.903833137611143, iteration: 319800
loss: 1.0322226285934448,grad_norm: 0.895451878553631, iteration: 319801
loss: 1.0842347145080566,grad_norm: 0.9999990802788923, iteration: 319802
loss: 1.0952221155166626,grad_norm: 0.9999995939174511, iteration: 319803
loss: 1.0222952365875244,grad_norm: 0.9999993088176725, iteration: 319804
loss: 1.027279019355774,grad_norm: 0.7616390094925184, iteration: 319805
loss: 1.033292293548584,grad_norm: 0.9999996420058765, iteration: 319806
loss: 1.1009141206741333,grad_norm: 0.9999992178691264, iteration: 319807
loss: 1.005530595779419,grad_norm: 0.8007253064466846, iteration: 319808
loss: 0.9715456962585449,grad_norm: 0.8111214492595694, iteration: 319809
loss: 0.9962472319602966,grad_norm: 0.9999999464887884, iteration: 319810
loss: 1.0526663064956665,grad_norm: 0.8343123704029215, iteration: 319811
loss: 0.9725951552391052,grad_norm: 0.7870539823606924, iteration: 319812
loss: 1.0398808717727661,grad_norm: 0.7736942709950763, iteration: 319813
loss: 1.0509833097457886,grad_norm: 0.9999991219025427, iteration: 319814
loss: 1.0773378610610962,grad_norm: 0.9999991314961155, iteration: 319815
loss: 0.9975352883338928,grad_norm: 0.999999223848545, iteration: 319816
loss: 1.0851056575775146,grad_norm: 0.999999706513113, iteration: 319817
loss: 0.9648409485816956,grad_norm: 0.8560292849199025, iteration: 319818
loss: 1.0547465085983276,grad_norm: 0.7837693018429208, iteration: 319819
loss: 1.0194482803344727,grad_norm: 0.9846006993932703, iteration: 319820
loss: 0.9717086553573608,grad_norm: 0.8175484817218263, iteration: 319821
loss: 0.9963868260383606,grad_norm: 0.6717437845840843, iteration: 319822
loss: 0.9978832602500916,grad_norm: 0.9999993260897055, iteration: 319823
loss: 1.0058937072753906,grad_norm: 0.7140579706252295, iteration: 319824
loss: 0.9984045028686523,grad_norm: 0.9627652777269826, iteration: 319825
loss: 0.9691181182861328,grad_norm: 0.7615153522837101, iteration: 319826
loss: 1.0004364252090454,grad_norm: 0.9729616161287854, iteration: 319827
loss: 1.0169295072555542,grad_norm: 0.9123451268120364, iteration: 319828
loss: 1.0197771787643433,grad_norm: 0.9999993717401191, iteration: 319829
loss: 0.9876368641853333,grad_norm: 0.9999990478611693, iteration: 319830
loss: 1.0328271389007568,grad_norm: 0.9999994464160914, iteration: 319831
loss: 0.9891625642776489,grad_norm: 0.8880362817719757, iteration: 319832
loss: 0.9923099279403687,grad_norm: 0.9058767918274145, iteration: 319833
loss: 0.9765878915786743,grad_norm: 0.6925179771094317, iteration: 319834
loss: 1.0356453657150269,grad_norm: 0.9415825385175453, iteration: 319835
loss: 1.0356624126434326,grad_norm: 0.84498994835896, iteration: 319836
loss: 1.0111244916915894,grad_norm: 0.9999990174717959, iteration: 319837
loss: 0.9522227048873901,grad_norm: 0.9999991467649242, iteration: 319838
loss: 0.9992666244506836,grad_norm: 0.698650374576006, iteration: 319839
loss: 1.072243332862854,grad_norm: 0.9999991197844202, iteration: 319840
loss: 1.050878882408142,grad_norm: 0.9165226684471817, iteration: 319841
loss: 0.9899490475654602,grad_norm: 0.909846984063524, iteration: 319842
loss: 0.9974104762077332,grad_norm: 0.7724998739674629, iteration: 319843
loss: 0.9757592082023621,grad_norm: 0.9385385599617556, iteration: 319844
loss: 1.015488624572754,grad_norm: 0.9999990009532755, iteration: 319845
loss: 1.0265246629714966,grad_norm: 0.8933364646257266, iteration: 319846
loss: 1.029455542564392,grad_norm: 0.9999999904376842, iteration: 319847
loss: 0.988036036491394,grad_norm: 0.8887963612375426, iteration: 319848
loss: 1.0369240045547485,grad_norm: 0.9999994688410556, iteration: 319849
loss: 1.0137908458709717,grad_norm: 0.8993476236418115, iteration: 319850
loss: 0.9851573705673218,grad_norm: 0.9588948471100042, iteration: 319851
loss: 1.0312845706939697,grad_norm: 0.8706485687567621, iteration: 319852
loss: 1.0018481016159058,grad_norm: 0.8127978026292896, iteration: 319853
loss: 1.0027797222137451,grad_norm: 0.9447987834362627, iteration: 319854
loss: 0.9738785028457642,grad_norm: 0.7563952277102478, iteration: 319855
loss: 1.0304193496704102,grad_norm: 0.9999990429907153, iteration: 319856
loss: 0.987149715423584,grad_norm: 0.8735650280617505, iteration: 319857
loss: 1.0071243047714233,grad_norm: 0.7379754427304779, iteration: 319858
loss: 1.0211102962493896,grad_norm: 0.7658841654901661, iteration: 319859
loss: 1.0535675287246704,grad_norm: 0.7697427224686125, iteration: 319860
loss: 1.02332603931427,grad_norm: 0.6322323390415217, iteration: 319861
loss: 0.9519074559211731,grad_norm: 0.9077133526084343, iteration: 319862
loss: 1.046051025390625,grad_norm: 0.9999991680019081, iteration: 319863
loss: 1.009418249130249,grad_norm: 0.9999991514346547, iteration: 319864
loss: 0.9792027473449707,grad_norm: 0.9204672041922908, iteration: 319865
loss: 1.0760564804077148,grad_norm: 0.9467788360301531, iteration: 319866
loss: 1.0754722356796265,grad_norm: 0.9999991123349515, iteration: 319867
loss: 0.9961869716644287,grad_norm: 0.826751023357304, iteration: 319868
loss: 1.0200412273406982,grad_norm: 0.8198171122282866, iteration: 319869
loss: 1.0283689498901367,grad_norm: 0.9999997722773668, iteration: 319870
loss: 1.014725685119629,grad_norm: 0.9999994726023282, iteration: 319871
loss: 1.0240525007247925,grad_norm: 0.9999994603855265, iteration: 319872
loss: 1.0286959409713745,grad_norm: 0.8502613317680859, iteration: 319873
loss: 0.977701723575592,grad_norm: 0.7973242322013451, iteration: 319874
loss: 1.0035302639007568,grad_norm: 0.8600636140940248, iteration: 319875
loss: 0.9672874808311462,grad_norm: 0.8254704921559183, iteration: 319876
loss: 1.0010578632354736,grad_norm: 0.8661640154620508, iteration: 319877
loss: 1.028501272201538,grad_norm: 0.9999991273894547, iteration: 319878
loss: 0.9888368844985962,grad_norm: 0.8778945730919112, iteration: 319879
loss: 0.9631145596504211,grad_norm: 0.8574377221667774, iteration: 319880
loss: 1.0388977527618408,grad_norm: 0.8673527899526675, iteration: 319881
loss: 1.003328800201416,grad_norm: 0.8638008178447171, iteration: 319882
loss: 1.0157171487808228,grad_norm: 0.7701659708838927, iteration: 319883
loss: 1.001246452331543,grad_norm: 0.7146680941452727, iteration: 319884
loss: 0.968705415725708,grad_norm: 0.8273292911060698, iteration: 319885
loss: 0.9764737486839294,grad_norm: 0.9999990054427016, iteration: 319886
loss: 0.985381007194519,grad_norm: 0.8572525877740896, iteration: 319887
loss: 1.0241729021072388,grad_norm: 0.8583130366931797, iteration: 319888
loss: 0.9904755353927612,grad_norm: 0.8256383580365128, iteration: 319889
loss: 0.9970689415931702,grad_norm: 0.8319659138627971, iteration: 319890
loss: 0.9945351481437683,grad_norm: 0.8901407572796305, iteration: 319891
loss: 0.9942267537117004,grad_norm: 0.9451126094381447, iteration: 319892
loss: 0.9738598465919495,grad_norm: 0.7192480822683202, iteration: 319893
loss: 1.0053497552871704,grad_norm: 0.9999990481345299, iteration: 319894
loss: 1.0020190477371216,grad_norm: 0.9109687592345512, iteration: 319895
loss: 1.0137808322906494,grad_norm: 0.884272816544106, iteration: 319896
loss: 0.9876959919929504,grad_norm: 0.8841363378100056, iteration: 319897
loss: 1.0471553802490234,grad_norm: 0.9999991111058464, iteration: 319898
loss: 0.976039469242096,grad_norm: 0.7700782348155565, iteration: 319899
loss: 0.9868411421775818,grad_norm: 0.6932793364011037, iteration: 319900
loss: 1.0481270551681519,grad_norm: 0.8963472037166845, iteration: 319901
loss: 1.0193819999694824,grad_norm: 0.9318779313852722, iteration: 319902
loss: 1.0340968370437622,grad_norm: 0.7808185687108397, iteration: 319903
loss: 1.0453236103057861,grad_norm: 0.9999997192270815, iteration: 319904
loss: 0.9702736139297485,grad_norm: 0.8025538694327089, iteration: 319905
loss: 0.9857951998710632,grad_norm: 0.7746864895025672, iteration: 319906
loss: 0.9944441914558411,grad_norm: 0.7278315437282235, iteration: 319907
loss: 0.9603666663169861,grad_norm: 0.7763434245850819, iteration: 319908
loss: 0.9925932884216309,grad_norm: 0.9506962048782973, iteration: 319909
loss: 1.01203453540802,grad_norm: 0.9428842725557561, iteration: 319910
loss: 0.9648236036300659,grad_norm: 0.8712457420519057, iteration: 319911
loss: 0.9899001717567444,grad_norm: 0.8343456327727223, iteration: 319912
loss: 0.9899718761444092,grad_norm: 0.9999990757313113, iteration: 319913
loss: 1.0057737827301025,grad_norm: 0.9999992006905688, iteration: 319914
loss: 0.9873231053352356,grad_norm: 0.7664924592782284, iteration: 319915
loss: 1.0323469638824463,grad_norm: 0.8628948618516231, iteration: 319916
loss: 1.025972604751587,grad_norm: 0.793595749889258, iteration: 319917
loss: 1.0074836015701294,grad_norm: 0.7273348156508567, iteration: 319918
loss: 1.0000141859054565,grad_norm: 0.7710290956465577, iteration: 319919
loss: 1.0282235145568848,grad_norm: 0.8139617408789601, iteration: 319920
loss: 1.0341987609863281,grad_norm: 0.9999992373360089, iteration: 319921
loss: 0.9996309280395508,grad_norm: 0.9913958916193341, iteration: 319922
loss: 1.0828075408935547,grad_norm: 0.9172863322613544, iteration: 319923
loss: 0.9990525245666504,grad_norm: 0.7708520903255837, iteration: 319924
loss: 1.0138261318206787,grad_norm: 0.9869832823601162, iteration: 319925
loss: 0.9814786911010742,grad_norm: 0.9999991106084333, iteration: 319926
loss: 1.003097414970398,grad_norm: 0.8591189566204667, iteration: 319927
loss: 0.9952897429466248,grad_norm: 0.7479693162368336, iteration: 319928
loss: 1.064035415649414,grad_norm: 0.9999990805352872, iteration: 319929
loss: 0.9904108643531799,grad_norm: 0.8289319482535994, iteration: 319930
loss: 1.0141981840133667,grad_norm: 0.7705914154762743, iteration: 319931
loss: 1.0270256996154785,grad_norm: 0.999999401001794, iteration: 319932
loss: 0.9684455990791321,grad_norm: 0.7427233657537333, iteration: 319933
loss: 0.9955697655677795,grad_norm: 0.8552193680082365, iteration: 319934
loss: 0.9795530438423157,grad_norm: 0.8741166867373136, iteration: 319935
loss: 0.9957277178764343,grad_norm: 0.9891497246266076, iteration: 319936
loss: 0.9777847528457642,grad_norm: 0.9068321880787706, iteration: 319937
loss: 0.9866328835487366,grad_norm: 0.7872404825617964, iteration: 319938
loss: 1.0118318796157837,grad_norm: 0.9999993453463778, iteration: 319939
loss: 1.0400793552398682,grad_norm: 0.9999992602468144, iteration: 319940
loss: 0.9901157021522522,grad_norm: 0.8360524153598323, iteration: 319941
loss: 0.967793345451355,grad_norm: 0.8252540812690736, iteration: 319942
loss: 0.9951961040496826,grad_norm: 0.8274096706475855, iteration: 319943
loss: 0.9817363023757935,grad_norm: 0.7488784784109982, iteration: 319944
loss: 0.9739246964454651,grad_norm: 0.8658063246246643, iteration: 319945
loss: 0.9637659192085266,grad_norm: 0.8570969109651658, iteration: 319946
loss: 0.9543346166610718,grad_norm: 0.785560644277594, iteration: 319947
loss: 1.0014315843582153,grad_norm: 0.92231943663179, iteration: 319948
loss: 0.9690110683441162,grad_norm: 0.9958122450990305, iteration: 319949
loss: 0.9979256987571716,grad_norm: 0.9999990620243324, iteration: 319950
loss: 0.9843343496322632,grad_norm: 0.8538886731067505, iteration: 319951
loss: 1.0035583972930908,grad_norm: 0.8891033100461824, iteration: 319952
loss: 1.0054974555969238,grad_norm: 0.9310370930496775, iteration: 319953
loss: 0.9957497715950012,grad_norm: 0.9319081679572468, iteration: 319954
loss: 0.9680787324905396,grad_norm: 0.7796738180093002, iteration: 319955
loss: 0.9579566717147827,grad_norm: 0.9390137445539389, iteration: 319956
loss: 1.004236102104187,grad_norm: 0.8922414623899201, iteration: 319957
loss: 0.980219841003418,grad_norm: 0.751713727795066, iteration: 319958
loss: 1.0191062688827515,grad_norm: 0.8561677552401974, iteration: 319959
loss: 1.0369739532470703,grad_norm: 0.9806224049252545, iteration: 319960
loss: 1.001552700996399,grad_norm: 0.8001685133096565, iteration: 319961
loss: 1.027661919593811,grad_norm: 0.9230462883724548, iteration: 319962
loss: 0.9993849992752075,grad_norm: 0.9999995345550189, iteration: 319963
loss: 0.9554749131202698,grad_norm: 0.9511670527856368, iteration: 319964
loss: 1.0064496994018555,grad_norm: 0.7212018599430959, iteration: 319965
loss: 0.9994741678237915,grad_norm: 0.7871638324927591, iteration: 319966
loss: 0.9560363292694092,grad_norm: 0.7832025251645265, iteration: 319967
loss: 0.9994056224822998,grad_norm: 0.7364197129093665, iteration: 319968
loss: 1.0199663639068604,grad_norm: 0.7203842767756059, iteration: 319969
loss: 1.0558401346206665,grad_norm: 0.9999990462693653, iteration: 319970
loss: 1.0084424018859863,grad_norm: 0.8280383461707546, iteration: 319971
loss: 0.9251095652580261,grad_norm: 0.9113494671325675, iteration: 319972
loss: 1.0600314140319824,grad_norm: 0.7992899004804788, iteration: 319973
loss: 1.0252026319503784,grad_norm: 0.9999991111510566, iteration: 319974
loss: 0.9887601137161255,grad_norm: 0.8631519897373024, iteration: 319975
loss: 0.9992885589599609,grad_norm: 0.999999128113353, iteration: 319976
loss: 0.9921061992645264,grad_norm: 0.8520770933037938, iteration: 319977
loss: 1.0149245262145996,grad_norm: 0.8747312303657256, iteration: 319978
loss: 1.027494192123413,grad_norm: 0.768444476029027, iteration: 319979
loss: 1.0176756381988525,grad_norm: 0.8679094735386562, iteration: 319980
loss: 0.9917569160461426,grad_norm: 0.9974197155179307, iteration: 319981
loss: 0.9934855103492737,grad_norm: 0.836172749914586, iteration: 319982
loss: 1.0106468200683594,grad_norm: 0.7968989928301727, iteration: 319983
loss: 1.0190503597259521,grad_norm: 0.6616290465284268, iteration: 319984
loss: 1.0001567602157593,grad_norm: 0.8345167944824957, iteration: 319985
loss: 1.0079224109649658,grad_norm: 0.9098658977131918, iteration: 319986
loss: 1.0095818042755127,grad_norm: 0.9090307207853474, iteration: 319987
loss: 1.0021237134933472,grad_norm: 0.9418354790254894, iteration: 319988
loss: 1.032764196395874,grad_norm: 0.9999991569170672, iteration: 319989
loss: 0.9765470623970032,grad_norm: 0.9999996268206068, iteration: 319990
loss: 0.9539012312889099,grad_norm: 0.9840602095100724, iteration: 319991
loss: 1.0092662572860718,grad_norm: 0.7815356698670273, iteration: 319992
loss: 1.0191823244094849,grad_norm: 0.9642386696743361, iteration: 319993
loss: 0.983228325843811,grad_norm: 0.862898835003204, iteration: 319994
loss: 1.0279244184494019,grad_norm: 0.9999992314460068, iteration: 319995
loss: 1.0554280281066895,grad_norm: 0.9048312182578433, iteration: 319996
loss: 0.9957651495933533,grad_norm: 0.9261689529958348, iteration: 319997
loss: 1.0235886573791504,grad_norm: 0.9084929256867388, iteration: 319998
loss: 0.9664523005485535,grad_norm: 0.9184781077682744, iteration: 319999
loss: 1.011549472808838,grad_norm: 0.8713723632565327, iteration: 320000
Evaluating at step 320000
{'val': 0.9949335996061563, 'test': 2.14350734122645}
loss: 0.9912611842155457,grad_norm: 0.686771129020578, iteration: 320001
loss: 0.9943797588348389,grad_norm: 0.9123125764474823, iteration: 320002
loss: 1.021870493888855,grad_norm: 0.7537811417406624, iteration: 320003
loss: 0.9868783354759216,grad_norm: 0.722344042471183, iteration: 320004
loss: 0.9767622947692871,grad_norm: 0.8325936087557657, iteration: 320005
loss: 1.0056958198547363,grad_norm: 0.6890813507774851, iteration: 320006
loss: 1.0266404151916504,grad_norm: 0.974042510480045, iteration: 320007
loss: 1.0130254030227661,grad_norm: 0.769262439500278, iteration: 320008
loss: 0.9936361312866211,grad_norm: 0.9344840956485901, iteration: 320009
loss: 1.0080926418304443,grad_norm: 0.7605077498487174, iteration: 320010
loss: 0.9849448204040527,grad_norm: 0.7831988105481632, iteration: 320011
loss: 1.0132423639297485,grad_norm: 0.9576517653450977, iteration: 320012
loss: 1.0020784139633179,grad_norm: 0.8448746537722533, iteration: 320013
loss: 1.0092321634292603,grad_norm: 0.8444091648425943, iteration: 320014
loss: 1.0124077796936035,grad_norm: 0.8207857894796723, iteration: 320015
loss: 0.991113543510437,grad_norm: 0.8093335164093978, iteration: 320016
loss: 0.979380190372467,grad_norm: 0.9999990069239972, iteration: 320017
loss: 1.0852304697036743,grad_norm: 0.9999994676563119, iteration: 320018
loss: 0.9951123595237732,grad_norm: 0.6814689051244427, iteration: 320019
loss: 0.9860508441925049,grad_norm: 0.795613899797544, iteration: 320020
loss: 0.9738288521766663,grad_norm: 0.9202831352950015, iteration: 320021
loss: 0.9841777682304382,grad_norm: 0.7195791426168814, iteration: 320022
loss: 1.0001037120819092,grad_norm: 0.7788489992063082, iteration: 320023
loss: 1.0046263933181763,grad_norm: 0.865705516313197, iteration: 320024
loss: 0.9813986420631409,grad_norm: 0.9838208862449631, iteration: 320025
loss: 0.9973315596580505,grad_norm: 0.8656994671619727, iteration: 320026
loss: 0.9994150400161743,grad_norm: 0.9999990878561635, iteration: 320027
loss: 1.007152795791626,grad_norm: 0.7755324551270667, iteration: 320028
loss: 1.007493495941162,grad_norm: 0.8615213789784913, iteration: 320029
loss: 1.0135612487792969,grad_norm: 0.9516287097416022, iteration: 320030
loss: 0.9773975014686584,grad_norm: 0.921404806345349, iteration: 320031
loss: 1.0236510038375854,grad_norm: 0.8590774009013556, iteration: 320032
loss: 1.0168988704681396,grad_norm: 0.6850935742028028, iteration: 320033
loss: 1.0319077968597412,grad_norm: 0.856278369601766, iteration: 320034
loss: 1.0162358283996582,grad_norm: 0.961002421724324, iteration: 320035
loss: 1.019716739654541,grad_norm: 0.8602396732921788, iteration: 320036
loss: 1.0223939418792725,grad_norm: 0.9999990910910054, iteration: 320037
loss: 0.9940097332000732,grad_norm: 0.9999992279401358, iteration: 320038
loss: 1.0291774272918701,grad_norm: 0.9109944883096002, iteration: 320039
loss: 1.031952142715454,grad_norm: 0.9999990767623963, iteration: 320040
loss: 0.9735839366912842,grad_norm: 0.742186596330854, iteration: 320041
loss: 1.023799180984497,grad_norm: 0.8504910124550479, iteration: 320042
loss: 0.9602713584899902,grad_norm: 0.8384565591183721, iteration: 320043
loss: 1.0315272808074951,grad_norm: 0.9999990744871112, iteration: 320044
loss: 0.9905416369438171,grad_norm: 0.8317219589882905, iteration: 320045
loss: 0.9803796410560608,grad_norm: 0.8119434063175123, iteration: 320046
loss: 0.99761563539505,grad_norm: 0.7866208043635196, iteration: 320047
loss: 0.9786958694458008,grad_norm: 0.7888027695870292, iteration: 320048
loss: 1.018446445465088,grad_norm: 0.6598165981497076, iteration: 320049
loss: 1.0024350881576538,grad_norm: 0.7523206158372614, iteration: 320050
loss: 0.9838102459907532,grad_norm: 0.8555098837098821, iteration: 320051
loss: 0.9702401161193848,grad_norm: 0.7262394584002808, iteration: 320052
loss: 1.013181447982788,grad_norm: 0.8479538502350294, iteration: 320053
loss: 0.9461961984634399,grad_norm: 0.9593345749138665, iteration: 320054
loss: 0.9928710460662842,grad_norm: 0.7743797112416523, iteration: 320055
loss: 1.0341904163360596,grad_norm: 0.9999991729152041, iteration: 320056
loss: 1.0740458965301514,grad_norm: 0.9999992744075276, iteration: 320057
loss: 0.961371123790741,grad_norm: 0.7642605492371359, iteration: 320058
loss: 0.9971915483474731,grad_norm: 0.9999990861292274, iteration: 320059
loss: 1.0109742879867554,grad_norm: 0.7491226380492393, iteration: 320060
loss: 0.982674777507782,grad_norm: 0.9391878621453804, iteration: 320061
loss: 1.0185929536819458,grad_norm: 0.9445917164899091, iteration: 320062
loss: 1.0181303024291992,grad_norm: 0.6979641667575683, iteration: 320063
loss: 0.9651175141334534,grad_norm: 0.8038008460496799, iteration: 320064
loss: 0.9838346242904663,grad_norm: 0.9999991264286113, iteration: 320065
loss: 1.0327086448669434,grad_norm: 0.8953866949126622, iteration: 320066
loss: 1.0066083669662476,grad_norm: 0.9430398136152797, iteration: 320067
loss: 0.9826259613037109,grad_norm: 0.9813241501742038, iteration: 320068
loss: 0.9971029162406921,grad_norm: 0.8950893886907892, iteration: 320069
loss: 0.9877252578735352,grad_norm: 0.8296684646524455, iteration: 320070
loss: 0.957057774066925,grad_norm: 0.9990515609041332, iteration: 320071
loss: 1.0286974906921387,grad_norm: 0.7755558389758075, iteration: 320072
loss: 0.9796600937843323,grad_norm: 0.803466482355911, iteration: 320073
loss: 1.0170007944107056,grad_norm: 0.77347568020938, iteration: 320074
loss: 1.0002732276916504,grad_norm: 0.8641761223686674, iteration: 320075
loss: 0.9981748461723328,grad_norm: 0.6963387913163542, iteration: 320076
loss: 0.9663156270980835,grad_norm: 0.8388112398388607, iteration: 320077
loss: 1.023823857307434,grad_norm: 0.9622529658110891, iteration: 320078
loss: 1.0141645669937134,grad_norm: 0.8998215572707012, iteration: 320079
loss: 0.9628682136535645,grad_norm: 0.7904006982220166, iteration: 320080
loss: 0.9643298387527466,grad_norm: 0.8313612473337696, iteration: 320081
loss: 0.9934216737747192,grad_norm: 0.745067414249321, iteration: 320082
loss: 1.0062230825424194,grad_norm: 0.9725925693735905, iteration: 320083
loss: 0.9656025767326355,grad_norm: 0.9999989697643092, iteration: 320084
loss: 1.008804202079773,grad_norm: 0.9999991171259245, iteration: 320085
loss: 0.9985251426696777,grad_norm: 0.7317772577498869, iteration: 320086
loss: 0.9753025770187378,grad_norm: 0.8056779887806459, iteration: 320087
loss: 0.9830629229545593,grad_norm: 0.7805035582161409, iteration: 320088
loss: 1.0306074619293213,grad_norm: 0.756292987740937, iteration: 320089
loss: 1.0134989023208618,grad_norm: 0.7899467506235562, iteration: 320090
loss: 0.9794402122497559,grad_norm: 0.8226242481799206, iteration: 320091
loss: 0.9920203685760498,grad_norm: 0.9611172869113692, iteration: 320092
loss: 0.9688920974731445,grad_norm: 0.7833588791241386, iteration: 320093
loss: 1.0111303329467773,grad_norm: 0.8360855739960538, iteration: 320094
loss: 1.0115911960601807,grad_norm: 0.8921056528347139, iteration: 320095
loss: 0.979145884513855,grad_norm: 0.9145592664899284, iteration: 320096
loss: 1.0292242765426636,grad_norm: 0.7880645347098013, iteration: 320097
loss: 0.986162543296814,grad_norm: 0.7732869404731977, iteration: 320098
loss: 1.0000444650650024,grad_norm: 0.7283364183272967, iteration: 320099
loss: 0.9832912087440491,grad_norm: 0.9625509094688134, iteration: 320100
loss: 1.0035985708236694,grad_norm: 0.9241049955498604, iteration: 320101
loss: 1.0070098638534546,grad_norm: 0.8367304113706244, iteration: 320102
loss: 1.0219614505767822,grad_norm: 0.8131330067636966, iteration: 320103
loss: 0.9667747020721436,grad_norm: 0.8175969717571571, iteration: 320104
loss: 1.0155333280563354,grad_norm: 0.999999071597846, iteration: 320105
loss: 1.03300142288208,grad_norm: 0.7662025636511582, iteration: 320106
loss: 0.983201265335083,grad_norm: 0.9919789109733977, iteration: 320107
loss: 1.002220869064331,grad_norm: 0.9999991961724342, iteration: 320108
loss: 1.0050371885299683,grad_norm: 0.8729093366550293, iteration: 320109
loss: 1.0385698080062866,grad_norm: 0.848899935853638, iteration: 320110
loss: 1.042401671409607,grad_norm: 0.9999990549966273, iteration: 320111
loss: 0.9784765839576721,grad_norm: 0.8321451267743595, iteration: 320112
loss: 0.9977080225944519,grad_norm: 0.8699753701984546, iteration: 320113
loss: 1.0093165636062622,grad_norm: 0.7295597072571904, iteration: 320114
loss: 0.9810935854911804,grad_norm: 0.9999992288303019, iteration: 320115
loss: 0.9998833537101746,grad_norm: 0.8651878231052843, iteration: 320116
loss: 1.0042871236801147,grad_norm: 0.7184989838320645, iteration: 320117
loss: 0.9776098728179932,grad_norm: 0.8951377945524094, iteration: 320118
loss: 0.983473539352417,grad_norm: 0.7738814291206203, iteration: 320119
loss: 1.0014104843139648,grad_norm: 0.8465645267291269, iteration: 320120
loss: 0.9959940314292908,grad_norm: 0.8867343288104073, iteration: 320121
loss: 0.9870941042900085,grad_norm: 0.8045855994932872, iteration: 320122
loss: 1.011396884918213,grad_norm: 0.7482558876640951, iteration: 320123
loss: 0.9805443286895752,grad_norm: 0.6869297812948729, iteration: 320124
loss: 1.0021406412124634,grad_norm: 0.7512590498129093, iteration: 320125
loss: 0.9953441023826599,grad_norm: 0.999998987424486, iteration: 320126
loss: 1.0030840635299683,grad_norm: 0.8434430428628266, iteration: 320127
loss: 0.9978147149085999,grad_norm: 0.8997004010561153, iteration: 320128
loss: 1.0485146045684814,grad_norm: 0.9999997191144608, iteration: 320129
loss: 0.9770010709762573,grad_norm: 0.8618420202727076, iteration: 320130
loss: 1.0213390588760376,grad_norm: 0.9370486142274744, iteration: 320131
loss: 1.0393975973129272,grad_norm: 0.7831787116727095, iteration: 320132
loss: 1.1585307121276855,grad_norm: 0.9999997090813837, iteration: 320133
loss: 1.0139994621276855,grad_norm: 0.8144495173165155, iteration: 320134
loss: 1.0181169509887695,grad_norm: 0.7759088329392683, iteration: 320135
loss: 0.990204393863678,grad_norm: 0.8714727324989446, iteration: 320136
loss: 0.9864651560783386,grad_norm: 0.9999991097878844, iteration: 320137
loss: 0.9629058241844177,grad_norm: 0.690384388696056, iteration: 320138
loss: 1.0033504962921143,grad_norm: 0.7431145210357994, iteration: 320139
loss: 1.0260518789291382,grad_norm: 0.8077591476911865, iteration: 320140
loss: 0.9627519249916077,grad_norm: 0.8151061103541617, iteration: 320141
loss: 1.0336027145385742,grad_norm: 0.759110084669042, iteration: 320142
loss: 1.0534160137176514,grad_norm: 0.8729103847886653, iteration: 320143
loss: 0.9785983562469482,grad_norm: 0.8268098928952664, iteration: 320144
loss: 1.0011903047561646,grad_norm: 0.9065082350013042, iteration: 320145
loss: 0.976952314376831,grad_norm: 0.7282288238536756, iteration: 320146
loss: 1.01765775680542,grad_norm: 0.8581719164928865, iteration: 320147
loss: 0.9998235106468201,grad_norm: 0.8742011280142089, iteration: 320148
loss: 1.0050333738327026,grad_norm: 0.8357816381405145, iteration: 320149
loss: 1.0056108236312866,grad_norm: 0.8535663632288581, iteration: 320150
loss: 0.9940525889396667,grad_norm: 0.8166121185138877, iteration: 320151
loss: 0.9733806252479553,grad_norm: 0.9814587697606969, iteration: 320152
loss: 1.0052937269210815,grad_norm: 0.9567947137207423, iteration: 320153
loss: 0.9792671203613281,grad_norm: 0.6951056889509298, iteration: 320154
loss: 1.0186915397644043,grad_norm: 0.8889871225986222, iteration: 320155
loss: 0.9855799674987793,grad_norm: 0.7855623174779495, iteration: 320156
loss: 1.015608549118042,grad_norm: 0.6997655454897737, iteration: 320157
loss: 0.9805653095245361,grad_norm: 0.7818717574417047, iteration: 320158
loss: 0.9673547148704529,grad_norm: 0.9999991827860693, iteration: 320159
loss: 0.9807248711585999,grad_norm: 0.9355699463172882, iteration: 320160
loss: 0.9833048582077026,grad_norm: 0.8008560616519297, iteration: 320161
loss: 1.010178565979004,grad_norm: 0.7629449555243487, iteration: 320162
loss: 0.9842135310173035,grad_norm: 0.7200045835553056, iteration: 320163
loss: 1.0036797523498535,grad_norm: 0.812782405520386, iteration: 320164
loss: 0.9897123575210571,grad_norm: 0.7986788160079413, iteration: 320165
loss: 1.00496506690979,grad_norm: 0.775886055590867, iteration: 320166
loss: 1.030483365058899,grad_norm: 0.6900716269124524, iteration: 320167
loss: 1.0657637119293213,grad_norm: 0.8498076178110008, iteration: 320168
loss: 0.9720536470413208,grad_norm: 0.9093723010755049, iteration: 320169
loss: 1.008697748184204,grad_norm: 0.8852996141292541, iteration: 320170
loss: 1.019913673400879,grad_norm: 0.9197855666278418, iteration: 320171
loss: 0.9692131876945496,grad_norm: 0.9999990861027016, iteration: 320172
loss: 0.9788296818733215,grad_norm: 0.8506524972752965, iteration: 320173
loss: 1.0193431377410889,grad_norm: 0.8858809040327079, iteration: 320174
loss: 0.990704357624054,grad_norm: 0.8440926429085553, iteration: 320175
loss: 1.0167961120605469,grad_norm: 0.8183274508815885, iteration: 320176
loss: 1.0408849716186523,grad_norm: 0.999999186527441, iteration: 320177
loss: 0.9440376162528992,grad_norm: 0.8100700106701796, iteration: 320178
loss: 1.0618137121200562,grad_norm: 0.9999997614246423, iteration: 320179
loss: 1.007246732711792,grad_norm: 0.9516785470614818, iteration: 320180
loss: 0.9782854914665222,grad_norm: 0.8908414924939191, iteration: 320181
loss: 1.030083417892456,grad_norm: 0.7712184846070107, iteration: 320182
loss: 1.0550581216812134,grad_norm: 0.9999995622043277, iteration: 320183
loss: 0.9933590292930603,grad_norm: 0.9203363208712099, iteration: 320184
loss: 1.0267229080200195,grad_norm: 0.7617825553946578, iteration: 320185
loss: 1.0157979726791382,grad_norm: 0.7702434128041041, iteration: 320186
loss: 0.9924401640892029,grad_norm: 0.9325491169619686, iteration: 320187
loss: 0.9821039438247681,grad_norm: 0.8354647651247247, iteration: 320188
loss: 1.0137165784835815,grad_norm: 0.8987121794607904, iteration: 320189
loss: 0.9987800121307373,grad_norm: 0.7699552140972945, iteration: 320190
loss: 0.9840601086616516,grad_norm: 0.8807532641117168, iteration: 320191
loss: 0.9864073395729065,grad_norm: 0.9999989644537325, iteration: 320192
loss: 1.0004281997680664,grad_norm: 0.9999998211290275, iteration: 320193
loss: 0.9781644344329834,grad_norm: 0.6730705137537413, iteration: 320194
loss: 1.0107004642486572,grad_norm: 0.9131973159042314, iteration: 320195
loss: 1.0186961889266968,grad_norm: 0.9456557988929465, iteration: 320196
loss: 1.010496973991394,grad_norm: 0.7705793280610839, iteration: 320197
loss: 0.9627621173858643,grad_norm: 0.8714435217072595, iteration: 320198
loss: 0.9915334582328796,grad_norm: 0.8655806732844089, iteration: 320199
loss: 0.97585129737854,grad_norm: 0.8397779465047287, iteration: 320200
loss: 0.9878719449043274,grad_norm: 0.9248940027024125, iteration: 320201
loss: 1.0360684394836426,grad_norm: 0.8082132304941312, iteration: 320202
loss: 1.0073357820510864,grad_norm: 0.7679763733634347, iteration: 320203
loss: 1.0113908052444458,grad_norm: 0.8608081033585687, iteration: 320204
loss: 0.9825602769851685,grad_norm: 0.8724154846351533, iteration: 320205
loss: 1.0158355236053467,grad_norm: 0.9204522875814818, iteration: 320206
loss: 1.0526373386383057,grad_norm: 0.9999998655245268, iteration: 320207
loss: 0.9928746819496155,grad_norm: 0.7885661331953803, iteration: 320208
loss: 0.9430328011512756,grad_norm: 0.8462672856898368, iteration: 320209
loss: 1.0374408960342407,grad_norm: 0.999999287250661, iteration: 320210
loss: 0.9821053743362427,grad_norm: 0.6628218298446293, iteration: 320211
loss: 0.9889983534812927,grad_norm: 0.895770191990789, iteration: 320212
loss: 1.0181750059127808,grad_norm: 0.9999990881293926, iteration: 320213
loss: 1.0098627805709839,grad_norm: 0.7899763016060775, iteration: 320214
loss: 0.9933304190635681,grad_norm: 0.9210361807843004, iteration: 320215
loss: 1.0282644033432007,grad_norm: 0.9124713231774852, iteration: 320216
loss: 0.964043378829956,grad_norm: 0.7473346569526585, iteration: 320217
loss: 1.0511841773986816,grad_norm: 0.8812315055583188, iteration: 320218
loss: 0.986570417881012,grad_norm: 0.7540461355429833, iteration: 320219
loss: 0.9817661643028259,grad_norm: 0.9417165778421732, iteration: 320220
loss: 1.025222897529602,grad_norm: 0.8786478362354341, iteration: 320221
loss: 1.0202761888504028,grad_norm: 0.9025525558370313, iteration: 320222
loss: 0.9921281337738037,grad_norm: 0.9129423777265513, iteration: 320223
loss: 0.9872056841850281,grad_norm: 0.9192101508181624, iteration: 320224
loss: 1.0292415618896484,grad_norm: 0.7932460937150805, iteration: 320225
loss: 0.9999688863754272,grad_norm: 0.8656173547207012, iteration: 320226
loss: 0.9825268387794495,grad_norm: 0.7885300285633547, iteration: 320227
loss: 1.0031378269195557,grad_norm: 0.8327427734636909, iteration: 320228
loss: 1.0169049501419067,grad_norm: 0.7539520827804614, iteration: 320229
loss: 1.0312520265579224,grad_norm: 0.8789142315869294, iteration: 320230
loss: 0.956892192363739,grad_norm: 0.8546690044812248, iteration: 320231
loss: 0.989989161491394,grad_norm: 0.8175896710428779, iteration: 320232
loss: 1.084556221961975,grad_norm: 0.9449295872343898, iteration: 320233
loss: 0.9706000685691833,grad_norm: 0.8444354568728311, iteration: 320234
loss: 0.985677182674408,grad_norm: 0.8651589207877041, iteration: 320235
loss: 0.9654057025909424,grad_norm: 0.8792643852474875, iteration: 320236
loss: 1.0089902877807617,grad_norm: 0.8336960133870235, iteration: 320237
loss: 0.9842890501022339,grad_norm: 0.8405451948481212, iteration: 320238
loss: 0.99428391456604,grad_norm: 0.8364667386334931, iteration: 320239
loss: 1.0144301652908325,grad_norm: 0.742170356810367, iteration: 320240
loss: 1.0176069736480713,grad_norm: 0.9999991274468262, iteration: 320241
loss: 0.9863212704658508,grad_norm: 0.9208691265512848, iteration: 320242
loss: 0.9892682433128357,grad_norm: 0.7811837094332867, iteration: 320243
loss: 0.9967779517173767,grad_norm: 0.7701430800029821, iteration: 320244
loss: 0.9842894673347473,grad_norm: 0.7925149257768449, iteration: 320245
loss: 1.0079691410064697,grad_norm: 0.9999998727486923, iteration: 320246
loss: 1.0207462310791016,grad_norm: 0.7502212651828515, iteration: 320247
loss: 0.9814203977584839,grad_norm: 0.8410250591247701, iteration: 320248
loss: 1.0108306407928467,grad_norm: 0.7470134801365595, iteration: 320249
loss: 0.9842613339424133,grad_norm: 0.9999995483089603, iteration: 320250
loss: 0.9794707298278809,grad_norm: 0.9088063200503141, iteration: 320251
loss: 1.006099820137024,grad_norm: 0.8452707657900284, iteration: 320252
loss: 0.9958001375198364,grad_norm: 0.7164991503974345, iteration: 320253
loss: 1.0283836126327515,grad_norm: 0.9999998330954712, iteration: 320254
loss: 0.9907870292663574,grad_norm: 0.658424144443345, iteration: 320255
loss: 1.0024793148040771,grad_norm: 0.8047309940429538, iteration: 320256
loss: 0.9922807812690735,grad_norm: 0.9050823028529812, iteration: 320257
loss: 1.0135334730148315,grad_norm: 0.8064566409269036, iteration: 320258
loss: 0.9756735563278198,grad_norm: 0.7122875606364513, iteration: 320259
loss: 0.9623531103134155,grad_norm: 0.8035360041377866, iteration: 320260
loss: 0.9773361086845398,grad_norm: 0.8314638162153298, iteration: 320261
loss: 0.9803045392036438,grad_norm: 0.9237783072683682, iteration: 320262
loss: 1.0041836500167847,grad_norm: 0.7131803780585985, iteration: 320263
loss: 1.0108706951141357,grad_norm: 0.7867712255905176, iteration: 320264
loss: 1.017873764038086,grad_norm: 0.652509165055346, iteration: 320265
loss: 0.9718779921531677,grad_norm: 0.9042700325464905, iteration: 320266
loss: 0.9950109720230103,grad_norm: 0.8382763013452961, iteration: 320267
loss: 0.9834436178207397,grad_norm: 0.791942219682002, iteration: 320268
loss: 0.9921320080757141,grad_norm: 0.9863352467535569, iteration: 320269
loss: 1.005182147026062,grad_norm: 0.958891209264333, iteration: 320270
loss: 1.006619930267334,grad_norm: 0.7531589219007578, iteration: 320271
loss: 0.9979829788208008,grad_norm: 0.8340184668898829, iteration: 320272
loss: 0.9925572276115417,grad_norm: 0.8570912311349584, iteration: 320273
loss: 1.0436924695968628,grad_norm: 0.9859493591047203, iteration: 320274
loss: 1.0184946060180664,grad_norm: 0.8305375262178585, iteration: 320275
loss: 0.9920768141746521,grad_norm: 0.9526087497818608, iteration: 320276
loss: 1.0096784830093384,grad_norm: 0.9884594227076788, iteration: 320277
loss: 0.9881723523139954,grad_norm: 0.777115477549621, iteration: 320278
loss: 1.0057897567749023,grad_norm: 0.8668642718959358, iteration: 320279
loss: 0.9842767119407654,grad_norm: 0.9566759374340986, iteration: 320280
loss: 1.003374457359314,grad_norm: 0.7262092756339968, iteration: 320281
loss: 1.0012034177780151,grad_norm: 0.9574267385774946, iteration: 320282
loss: 0.9728034734725952,grad_norm: 0.8113658291353112, iteration: 320283
loss: 1.0406975746154785,grad_norm: 0.9999992366055757, iteration: 320284
loss: 1.0363044738769531,grad_norm: 0.9999991558647465, iteration: 320285
loss: 0.9978951811790466,grad_norm: 0.9385471733244397, iteration: 320286
loss: 1.008890151977539,grad_norm: 0.8031893394387077, iteration: 320287
loss: 0.9790177941322327,grad_norm: 0.9102923727159741, iteration: 320288
loss: 1.016076683998108,grad_norm: 0.8972420510710593, iteration: 320289
loss: 1.0196988582611084,grad_norm: 0.7866191021613492, iteration: 320290
loss: 0.9695274829864502,grad_norm: 0.8132921778629438, iteration: 320291
loss: 0.97501540184021,grad_norm: 0.8775144863976816, iteration: 320292
loss: 1.013811707496643,grad_norm: 0.9705791360563703, iteration: 320293
loss: 0.993230402469635,grad_norm: 0.825705467894961, iteration: 320294
loss: 0.9988912343978882,grad_norm: 0.6614537706212278, iteration: 320295
loss: 0.9797422885894775,grad_norm: 0.7190038765420129, iteration: 320296
loss: 0.984804093837738,grad_norm: 0.7688919324594233, iteration: 320297
loss: 1.0021716356277466,grad_norm: 0.7205291856124922, iteration: 320298
loss: 0.9747370481491089,grad_norm: 0.8409253903869319, iteration: 320299
loss: 0.9909617900848389,grad_norm: 0.6863314791027154, iteration: 320300
loss: 1.0241531133651733,grad_norm: 0.7991479768068097, iteration: 320301
loss: 1.011372447013855,grad_norm: 0.8221719230090925, iteration: 320302
loss: 0.9902663230895996,grad_norm: 0.999999064298991, iteration: 320303
loss: 0.9889113903045654,grad_norm: 0.9999995249786514, iteration: 320304
loss: 1.0461769104003906,grad_norm: 0.8884525275493077, iteration: 320305
loss: 1.001868486404419,grad_norm: 0.9234991204930946, iteration: 320306
loss: 1.007381796836853,grad_norm: 0.9783633994225182, iteration: 320307
loss: 0.9544155597686768,grad_norm: 0.9144096558792313, iteration: 320308
loss: 0.9930628538131714,grad_norm: 0.9999991868931989, iteration: 320309
loss: 0.9666454792022705,grad_norm: 0.8627257550173638, iteration: 320310
loss: 0.9755285978317261,grad_norm: 0.7823962348707572, iteration: 320311
loss: 1.0143321752548218,grad_norm: 0.8360493112506017, iteration: 320312
loss: 1.018357515335083,grad_norm: 0.7801437340926787, iteration: 320313
loss: 0.9910537600517273,grad_norm: 0.9827523784219357, iteration: 320314
loss: 0.9856180548667908,grad_norm: 0.8370705087014124, iteration: 320315
loss: 1.0100103616714478,grad_norm: 0.9405884844983211, iteration: 320316
loss: 1.003917932510376,grad_norm: 0.765600478712429, iteration: 320317
loss: 0.9805493354797363,grad_norm: 0.8007703716931435, iteration: 320318
loss: 0.9962183833122253,grad_norm: 0.851474201041726, iteration: 320319
loss: 1.0284011363983154,grad_norm: 0.8634145215598151, iteration: 320320
loss: 1.0553748607635498,grad_norm: 0.905124614101488, iteration: 320321
loss: 0.9610128402709961,grad_norm: 0.8308378820394733, iteration: 320322
loss: 1.003925085067749,grad_norm: 0.8395963015250181, iteration: 320323
loss: 1.016964077949524,grad_norm: 0.999998884337239, iteration: 320324
loss: 0.9728196263313293,grad_norm: 0.8377025422815675, iteration: 320325
loss: 0.9913235902786255,grad_norm: 0.7671699055762835, iteration: 320326
loss: 0.9930688142776489,grad_norm: 0.8649604477070169, iteration: 320327
loss: 0.9745039939880371,grad_norm: 0.8528196722208689, iteration: 320328
loss: 1.0072236061096191,grad_norm: 0.8313532026540337, iteration: 320329
loss: 1.017511010169983,grad_norm: 0.7265446040805377, iteration: 320330
loss: 0.9756379127502441,grad_norm: 0.8894235940749782, iteration: 320331
loss: 0.986595094203949,grad_norm: 0.7510931972235623, iteration: 320332
loss: 1.0679312944412231,grad_norm: 0.9999991728072953, iteration: 320333
loss: 0.9741765856742859,grad_norm: 0.8352025843799401, iteration: 320334
loss: 0.9934331178665161,grad_norm: 0.7525015957476163, iteration: 320335
loss: 1.0469918251037598,grad_norm: 0.9999999623930046, iteration: 320336
loss: 0.9887667894363403,grad_norm: 0.9999994501089948, iteration: 320337
loss: 1.0008167028427124,grad_norm: 0.9999991846006209, iteration: 320338
loss: 0.9836263656616211,grad_norm: 0.732024716268388, iteration: 320339
loss: 1.0146011114120483,grad_norm: 0.8766405956941415, iteration: 320340
loss: 1.0077285766601562,grad_norm: 0.8389006330207102, iteration: 320341
loss: 1.0347567796707153,grad_norm: 0.9116710343742428, iteration: 320342
loss: 1.0137680768966675,grad_norm: 0.9999993149175692, iteration: 320343
loss: 0.9701247215270996,grad_norm: 0.9059706282972887, iteration: 320344
loss: 0.9622676968574524,grad_norm: 0.9605652648232241, iteration: 320345
loss: 0.9588761925697327,grad_norm: 0.9999989198873367, iteration: 320346
loss: 1.00004243850708,grad_norm: 0.949053418083226, iteration: 320347
loss: 0.9981582164764404,grad_norm: 0.9712392373958277, iteration: 320348
loss: 1.0737035274505615,grad_norm: 0.7715274553519123, iteration: 320349
loss: 0.968829870223999,grad_norm: 0.8322656812364871, iteration: 320350
loss: 0.9975045323371887,grad_norm: 0.8835000877179623, iteration: 320351
loss: 0.9922560453414917,grad_norm: 0.9999990198697409, iteration: 320352
loss: 0.9984602332115173,grad_norm: 0.9999991123578978, iteration: 320353
loss: 0.966463029384613,grad_norm: 0.889364605528645, iteration: 320354
loss: 1.0112253427505493,grad_norm: 0.965739771019209, iteration: 320355
loss: 0.9551411271095276,grad_norm: 0.8454102822090749, iteration: 320356
loss: 0.9707372188568115,grad_norm: 0.7967475405685669, iteration: 320357
loss: 0.9867368936538696,grad_norm: 0.7367750606236926, iteration: 320358
loss: 0.9611546397209167,grad_norm: 0.9999991099783834, iteration: 320359
loss: 0.9863341450691223,grad_norm: 0.8995818512514151, iteration: 320360
loss: 1.0347881317138672,grad_norm: 0.7831999572727473, iteration: 320361
loss: 1.001479983329773,grad_norm: 0.8443750027430513, iteration: 320362
loss: 1.0201846361160278,grad_norm: 0.8387551472393834, iteration: 320363
loss: 0.9842754602432251,grad_norm: 0.778758480637134, iteration: 320364
loss: 0.9936754703521729,grad_norm: 0.9999993543034923, iteration: 320365
loss: 0.9735473394393921,grad_norm: 0.838439614562155, iteration: 320366
loss: 1.025442123413086,grad_norm: 0.8301783209049794, iteration: 320367
loss: 0.9996897578239441,grad_norm: 0.8139422917668625, iteration: 320368
loss: 0.9979063868522644,grad_norm: 0.8707522231096448, iteration: 320369
loss: 0.9779270887374878,grad_norm: 0.8093185211681714, iteration: 320370
loss: 0.9990577697753906,grad_norm: 0.8167884169613182, iteration: 320371
loss: 0.9706431031227112,grad_norm: 0.793995401350627, iteration: 320372
loss: 1.0309607982635498,grad_norm: 0.9999992900336189, iteration: 320373
loss: 1.0034207105636597,grad_norm: 0.8056078210765812, iteration: 320374
loss: 0.9651708006858826,grad_norm: 0.8432563811985675, iteration: 320375
loss: 1.0058112144470215,grad_norm: 0.9444480542103236, iteration: 320376
loss: 1.0097728967666626,grad_norm: 0.9145051670388441, iteration: 320377
loss: 0.9715188145637512,grad_norm: 0.843876286786779, iteration: 320378
loss: 0.9848454594612122,grad_norm: 0.9999998034444615, iteration: 320379
loss: 0.9834576845169067,grad_norm: 0.9311342970845415, iteration: 320380
loss: 1.0081098079681396,grad_norm: 0.7953993511065706, iteration: 320381
loss: 1.0313035249710083,grad_norm: 0.6321141522576353, iteration: 320382
loss: 1.0041345357894897,grad_norm: 0.7887767145147317, iteration: 320383
loss: 0.978011429309845,grad_norm: 0.9646630656783667, iteration: 320384
loss: 0.9760227799415588,grad_norm: 0.9999998097910376, iteration: 320385
loss: 0.9956707954406738,grad_norm: 0.8667241121951561, iteration: 320386
loss: 1.0185548067092896,grad_norm: 0.9999991438992983, iteration: 320387
loss: 0.9952964782714844,grad_norm: 0.9999991657258483, iteration: 320388
loss: 0.9947454929351807,grad_norm: 0.659037562825826, iteration: 320389
loss: 1.0081881284713745,grad_norm: 0.8594966863013684, iteration: 320390
loss: 1.0099341869354248,grad_norm: 0.8909460055739332, iteration: 320391
loss: 1.046397089958191,grad_norm: 0.93289512495351, iteration: 320392
loss: 1.0225098133087158,grad_norm: 0.7316896828446974, iteration: 320393
loss: 1.0411510467529297,grad_norm: 0.8024959425296755, iteration: 320394
loss: 1.0190635919570923,grad_norm: 0.7813100313989514, iteration: 320395
loss: 0.9880832433700562,grad_norm: 0.8451951374961414, iteration: 320396
loss: 0.962202250957489,grad_norm: 0.7380322178550195, iteration: 320397
loss: 0.9931101202964783,grad_norm: 0.7687464357929421, iteration: 320398
loss: 1.0098886489868164,grad_norm: 0.8683748791530262, iteration: 320399
loss: 0.9864733815193176,grad_norm: 0.8021567111267957, iteration: 320400
loss: 1.0456995964050293,grad_norm: 0.9999989848248095, iteration: 320401
loss: 1.0296542644500732,grad_norm: 0.8361308807227193, iteration: 320402
loss: 0.99686598777771,grad_norm: 0.6791589575901353, iteration: 320403
loss: 1.0020219087600708,grad_norm: 0.7648372093385885, iteration: 320404
loss: 0.950777530670166,grad_norm: 0.9476242238736243, iteration: 320405
loss: 0.997473418712616,grad_norm: 0.8595267803109515, iteration: 320406
loss: 1.0305683612823486,grad_norm: 0.8150008316285917, iteration: 320407
loss: 0.9772211313247681,grad_norm: 0.8402929791187725, iteration: 320408
loss: 1.0092413425445557,grad_norm: 0.7766355697983226, iteration: 320409
loss: 0.9913323521614075,grad_norm: 0.6796784692961223, iteration: 320410
loss: 1.0045394897460938,grad_norm: 0.7838790997004506, iteration: 320411
loss: 0.9834953546524048,grad_norm: 0.9999998032940624, iteration: 320412
loss: 1.0535176992416382,grad_norm: 0.9999993708694945, iteration: 320413
loss: 1.0047860145568848,grad_norm: 0.8991270471248476, iteration: 320414
loss: 1.0157017707824707,grad_norm: 0.8474191589929612, iteration: 320415
loss: 0.9893311858177185,grad_norm: 0.8597672469138053, iteration: 320416
loss: 0.9667112827301025,grad_norm: 0.8814607764626667, iteration: 320417
loss: 0.9657111167907715,grad_norm: 0.9022680928709044, iteration: 320418
loss: 0.9785193800926208,grad_norm: 0.8626039627149936, iteration: 320419
loss: 1.0710841417312622,grad_norm: 0.8832924377243162, iteration: 320420
loss: 1.0599217414855957,grad_norm: 0.9999994670579857, iteration: 320421
loss: 1.0102347135543823,grad_norm: 0.869178012622406, iteration: 320422
loss: 0.9711428880691528,grad_norm: 0.9999990159346381, iteration: 320423
loss: 1.0166624784469604,grad_norm: 0.8454922312210341, iteration: 320424
loss: 0.9975684881210327,grad_norm: 0.9644046363342839, iteration: 320425
loss: 0.957711398601532,grad_norm: 0.7590275711381858, iteration: 320426
loss: 0.9873755574226379,grad_norm: 0.8086269150547919, iteration: 320427
loss: 1.0287022590637207,grad_norm: 0.9552300143470477, iteration: 320428
loss: 1.0142130851745605,grad_norm: 0.852741568186909, iteration: 320429
loss: 1.000716209411621,grad_norm: 0.9724438765035744, iteration: 320430
loss: 0.9985913038253784,grad_norm: 0.8119283019373423, iteration: 320431
loss: 0.9753172397613525,grad_norm: 0.9130994950289267, iteration: 320432
loss: 1.0163527727127075,grad_norm: 0.7438700170706505, iteration: 320433
loss: 0.9952135682106018,grad_norm: 0.9760025819565357, iteration: 320434
loss: 0.9754366278648376,grad_norm: 0.900858700956802, iteration: 320435
loss: 1.0316448211669922,grad_norm: 0.8777193525489058, iteration: 320436
loss: 1.2662885189056396,grad_norm: 0.9999992418759593, iteration: 320437
loss: 0.9940202832221985,grad_norm: 0.893954108004244, iteration: 320438
loss: 1.0239779949188232,grad_norm: 0.7298073266021926, iteration: 320439
loss: 1.0248799324035645,grad_norm: 0.758812234654186, iteration: 320440
loss: 1.0071930885314941,grad_norm: 0.806700962859274, iteration: 320441
loss: 1.0519964694976807,grad_norm: 0.8634225885063763, iteration: 320442
loss: 1.007097601890564,grad_norm: 0.8180910909131411, iteration: 320443
loss: 0.9581794142723083,grad_norm: 0.9544405713258971, iteration: 320444
loss: 1.0018479824066162,grad_norm: 0.999999265012129, iteration: 320445
loss: 1.033456563949585,grad_norm: 0.9664296526184702, iteration: 320446
loss: 1.006162166595459,grad_norm: 0.9608373273351802, iteration: 320447
loss: 0.9833418130874634,grad_norm: 0.8217572827665731, iteration: 320448
loss: 0.9958625435829163,grad_norm: 0.8734219410705977, iteration: 320449
loss: 1.0169739723205566,grad_norm: 0.921775558187479, iteration: 320450
loss: 1.0257083177566528,grad_norm: 0.7891406940308358, iteration: 320451
loss: 0.9844682812690735,grad_norm: 0.8992482914493242, iteration: 320452
loss: 1.0269091129302979,grad_norm: 0.9999997568497744, iteration: 320453
loss: 0.9810617566108704,grad_norm: 0.7760803790201969, iteration: 320454
loss: 0.9787826538085938,grad_norm: 0.8600463323724945, iteration: 320455
loss: 0.9832835793495178,grad_norm: 0.8561909416968252, iteration: 320456
loss: 0.9740811586380005,grad_norm: 0.767390469305196, iteration: 320457
loss: 1.0024725198745728,grad_norm: 0.9999991976324785, iteration: 320458
loss: 0.9773046970367432,grad_norm: 0.8617986063243481, iteration: 320459
loss: 0.9735387563705444,grad_norm: 0.9178758356371274, iteration: 320460
loss: 1.015795111656189,grad_norm: 0.8766810649853019, iteration: 320461
loss: 1.0128417015075684,grad_norm: 0.8407754346713376, iteration: 320462
loss: 1.02968430519104,grad_norm: 0.8566290056566865, iteration: 320463
loss: 1.0067200660705566,grad_norm: 0.9999991787928427, iteration: 320464
loss: 1.01055109500885,grad_norm: 0.8749308417745549, iteration: 320465
loss: 1.0342152118682861,grad_norm: 0.8368571494940149, iteration: 320466
loss: 1.0188974142074585,grad_norm: 0.8322734077090465, iteration: 320467
loss: 0.9984462261199951,grad_norm: 0.7221325188065644, iteration: 320468
loss: 1.0303219556808472,grad_norm: 0.9999994377933531, iteration: 320469
loss: 0.9864122271537781,grad_norm: 0.9999999307644876, iteration: 320470
loss: 1.0228549242019653,grad_norm: 0.7270319278903605, iteration: 320471
loss: 0.9657655954360962,grad_norm: 0.9178666800303551, iteration: 320472
loss: 0.9766826629638672,grad_norm: 0.7247290866219759, iteration: 320473
loss: 0.9973584413528442,grad_norm: 0.8115179925718928, iteration: 320474
loss: 0.9880844950675964,grad_norm: 0.7678708527180858, iteration: 320475
loss: 0.9748286008834839,grad_norm: 0.759502286706546, iteration: 320476
loss: 0.9612810611724854,grad_norm: 0.7884371410534543, iteration: 320477
loss: 0.9861920475959778,grad_norm: 0.8192518750751475, iteration: 320478
loss: 0.9976216554641724,grad_norm: 0.9999990977573765, iteration: 320479
loss: 1.0435994863510132,grad_norm: 0.9082033917681048, iteration: 320480
loss: 1.0209088325500488,grad_norm: 0.9188428676711222, iteration: 320481
loss: 0.9988544583320618,grad_norm: 0.8932299889508087, iteration: 320482
loss: 0.99141925573349,grad_norm: 0.9204647651229141, iteration: 320483
loss: 0.9981544017791748,grad_norm: 0.6987169835284744, iteration: 320484
loss: 0.999152421951294,grad_norm: 0.7437090959201577, iteration: 320485
loss: 0.9926424026489258,grad_norm: 0.9628735318205919, iteration: 320486
loss: 0.9629772901535034,grad_norm: 0.7034863474799224, iteration: 320487
loss: 0.9739263653755188,grad_norm: 0.7824759563991851, iteration: 320488
loss: 1.0390615463256836,grad_norm: 0.8648780540648341, iteration: 320489
loss: 0.9960840940475464,grad_norm: 0.7712981091691025, iteration: 320490
loss: 0.939599871635437,grad_norm: 0.8598785907475831, iteration: 320491
loss: 0.9659741520881653,grad_norm: 0.9185084124600938, iteration: 320492
loss: 0.9709378480911255,grad_norm: 0.9157159555459048, iteration: 320493
loss: 0.9947973489761353,grad_norm: 0.999999008844095, iteration: 320494
loss: 1.0030044317245483,grad_norm: 0.7187896649413956, iteration: 320495
loss: 0.9823289513587952,grad_norm: 0.825220863387827, iteration: 320496
loss: 0.9842733144760132,grad_norm: 0.7290934794349168, iteration: 320497
loss: 1.0082285404205322,grad_norm: 0.8094161562514225, iteration: 320498
loss: 0.9970927238464355,grad_norm: 0.9517788954400153, iteration: 320499
loss: 1.0984416007995605,grad_norm: 0.9999993164095332, iteration: 320500
loss: 1.0132008790969849,grad_norm: 0.8110787938993396, iteration: 320501
loss: 1.030684471130371,grad_norm: 0.828691284609819, iteration: 320502
loss: 0.994775116443634,grad_norm: 0.9142384738750169, iteration: 320503
loss: 1.0038657188415527,grad_norm: 0.8839441491861082, iteration: 320504
loss: 0.932504415512085,grad_norm: 0.872127971752169, iteration: 320505
loss: 1.0103552341461182,grad_norm: 0.8262864563681251, iteration: 320506
loss: 0.9725368618965149,grad_norm: 0.82110306491682, iteration: 320507
loss: 1.012415885925293,grad_norm: 0.8351910788244727, iteration: 320508
loss: 1.0187993049621582,grad_norm: 0.8489117003224647, iteration: 320509
loss: 1.0302544832229614,grad_norm: 0.9999995242212679, iteration: 320510
loss: 1.0142236948013306,grad_norm: 0.7707768228053917, iteration: 320511
loss: 0.9847049713134766,grad_norm: 0.8973854299053114, iteration: 320512
loss: 1.0653856992721558,grad_norm: 0.9999992409833274, iteration: 320513
loss: 0.9881901144981384,grad_norm: 0.9495709762403993, iteration: 320514
loss: 0.9744901061058044,grad_norm: 0.9085909027953522, iteration: 320515
loss: 1.0066825151443481,grad_norm: 0.7730025030800056, iteration: 320516
loss: 1.006626844406128,grad_norm: 0.7460136694972841, iteration: 320517
loss: 1.0095254182815552,grad_norm: 0.7432394154279061, iteration: 320518
loss: 0.9706445932388306,grad_norm: 0.8681791103480085, iteration: 320519
loss: 1.0217970609664917,grad_norm: 0.7849154021715488, iteration: 320520
loss: 1.0379390716552734,grad_norm: 0.837583436423255, iteration: 320521
loss: 1.0235514640808105,grad_norm: 0.8613710195107448, iteration: 320522
loss: 1.041725516319275,grad_norm: 0.7172271404558389, iteration: 320523
loss: 1.0052611827850342,grad_norm: 0.8590823155268096, iteration: 320524
loss: 0.9836903214454651,grad_norm: 0.9139896020065424, iteration: 320525
loss: 1.0044622421264648,grad_norm: 0.9242162525116097, iteration: 320526
loss: 1.0070302486419678,grad_norm: 0.932116198840491, iteration: 320527
loss: 0.9769090414047241,grad_norm: 0.8657113312878403, iteration: 320528
loss: 0.986997663974762,grad_norm: 0.9575742000359947, iteration: 320529
loss: 1.023331642150879,grad_norm: 0.9999992388260213, iteration: 320530
loss: 0.9751376509666443,grad_norm: 0.9433276706611932, iteration: 320531
loss: 0.9956958293914795,grad_norm: 0.7669313353748668, iteration: 320532
loss: 0.9918438196182251,grad_norm: 0.860575294988957, iteration: 320533
loss: 1.016036868095398,grad_norm: 0.8175833412608013, iteration: 320534
loss: 1.0431276559829712,grad_norm: 0.885106624869546, iteration: 320535
loss: 1.0123646259307861,grad_norm: 0.9244224191176638, iteration: 320536
loss: 0.9805677533149719,grad_norm: 0.7285336744242655, iteration: 320537
loss: 1.0065224170684814,grad_norm: 0.742454319429834, iteration: 320538
loss: 0.9973673224449158,grad_norm: 0.8809094226073687, iteration: 320539
loss: 0.9832063317298889,grad_norm: 0.7550903929484739, iteration: 320540
loss: 1.0098141431808472,grad_norm: 0.8404625278990034, iteration: 320541
loss: 1.015783429145813,grad_norm: 0.8777116361580859, iteration: 320542
loss: 0.9929791688919067,grad_norm: 0.9960750700058729, iteration: 320543
loss: 1.0195125341415405,grad_norm: 0.8181754943278103, iteration: 320544
loss: 0.992534875869751,grad_norm: 0.8679711429138566, iteration: 320545
loss: 0.984247624874115,grad_norm: 0.772125424094751, iteration: 320546
loss: 1.0112767219543457,grad_norm: 0.7626647646099209, iteration: 320547
loss: 1.0332794189453125,grad_norm: 0.7672629095549608, iteration: 320548
loss: 0.9962330460548401,grad_norm: 0.7012137523776502, iteration: 320549
loss: 0.9675740003585815,grad_norm: 0.7126204639890507, iteration: 320550
loss: 1.0286186933517456,grad_norm: 0.9999990935727774, iteration: 320551
loss: 1.0072695016860962,grad_norm: 0.8710641051241, iteration: 320552
loss: 1.001447081565857,grad_norm: 0.999999037523853, iteration: 320553
loss: 0.9492736458778381,grad_norm: 0.9584896495129247, iteration: 320554
loss: 0.9754401445388794,grad_norm: 0.9999989570675977, iteration: 320555
loss: 1.0568393468856812,grad_norm: 0.9999999958382794, iteration: 320556
loss: 0.9767197370529175,grad_norm: 0.7806823870386581, iteration: 320557
loss: 0.9916664958000183,grad_norm: 0.7289447269946793, iteration: 320558
loss: 0.9380795359611511,grad_norm: 0.9918793124220667, iteration: 320559
loss: 1.0008808374404907,grad_norm: 0.7440171728053405, iteration: 320560
loss: 1.0064321756362915,grad_norm: 0.8596192762694528, iteration: 320561
loss: 0.9774964451789856,grad_norm: 0.7696294200317786, iteration: 320562
loss: 0.9865163564682007,grad_norm: 0.8081561483495016, iteration: 320563
loss: 1.0510061979293823,grad_norm: 0.870486978650243, iteration: 320564
loss: 1.00544273853302,grad_norm: 0.9862633137238312, iteration: 320565
loss: 0.9999557733535767,grad_norm: 0.9118071517639171, iteration: 320566
loss: 0.9908953905105591,grad_norm: 0.8846051963691435, iteration: 320567
loss: 0.9861160516738892,grad_norm: 0.7829176570361208, iteration: 320568
loss: 1.0337836742401123,grad_norm: 0.9217228540283695, iteration: 320569
loss: 0.9817234873771667,grad_norm: 0.8725736097335783, iteration: 320570
loss: 0.9641307592391968,grad_norm: 0.9999991968814722, iteration: 320571
loss: 1.003809928894043,grad_norm: 0.7371683771675585, iteration: 320572
loss: 1.005527377128601,grad_norm: 0.848129173171206, iteration: 320573
loss: 1.034111499786377,grad_norm: 0.9999991165455634, iteration: 320574
loss: 1.0331120491027832,grad_norm: 0.9442940004870741, iteration: 320575
loss: 1.0056235790252686,grad_norm: 0.7067529613821014, iteration: 320576
loss: 1.0099068880081177,grad_norm: 0.8529467216698496, iteration: 320577
loss: 0.975626528263092,grad_norm: 0.9563602016263669, iteration: 320578
loss: 0.9657191634178162,grad_norm: 0.8293727618812308, iteration: 320579
loss: 0.9995008111000061,grad_norm: 0.730048998827138, iteration: 320580
loss: 1.0272341966629028,grad_norm: 0.9146519593186845, iteration: 320581
loss: 0.9771382212638855,grad_norm: 0.9999994743377976, iteration: 320582
loss: 0.9941299557685852,grad_norm: 0.9999991358080309, iteration: 320583
loss: 0.938446581363678,grad_norm: 0.8636993452492404, iteration: 320584
loss: 1.0230110883712769,grad_norm: 0.8593995677286296, iteration: 320585
loss: 1.004737138748169,grad_norm: 0.9999989649928365, iteration: 320586
loss: 1.0063551664352417,grad_norm: 0.9999996746025335, iteration: 320587
loss: 0.974503219127655,grad_norm: 0.7574801731275499, iteration: 320588
loss: 1.014689326286316,grad_norm: 0.8611946122389427, iteration: 320589
loss: 0.9660175442695618,grad_norm: 0.7570916920264249, iteration: 320590
loss: 0.9866689443588257,grad_norm: 0.9999990725658214, iteration: 320591
loss: 1.0303854942321777,grad_norm: 0.8574347661483618, iteration: 320592
loss: 0.9910570383071899,grad_norm: 0.8386226385698743, iteration: 320593
loss: 1.033164620399475,grad_norm: 0.8313874180709858, iteration: 320594
loss: 0.9741407036781311,grad_norm: 0.8726345659557405, iteration: 320595
loss: 1.0140674114227295,grad_norm: 0.9999990631482936, iteration: 320596
loss: 0.9735167026519775,grad_norm: 0.9105552040450362, iteration: 320597
loss: 1.0362550020217896,grad_norm: 0.8712252325212109, iteration: 320598
loss: 0.9728319644927979,grad_norm: 0.8792617479390745, iteration: 320599
loss: 0.9853456020355225,grad_norm: 0.7742672732754975, iteration: 320600
loss: 0.9873093962669373,grad_norm: 0.7348592360344336, iteration: 320601
loss: 1.0450329780578613,grad_norm: 0.921853389448705, iteration: 320602
loss: 1.0403952598571777,grad_norm: 0.9999999692960557, iteration: 320603
loss: 1.0130254030227661,grad_norm: 0.999999623086986, iteration: 320604
loss: 0.9992484450340271,grad_norm: 0.8648952163858457, iteration: 320605
loss: 1.0166362524032593,grad_norm: 0.8691289022827333, iteration: 320606
loss: 1.003231406211853,grad_norm: 0.999999172680404, iteration: 320607
loss: 1.0036919116973877,grad_norm: 0.7271953496055142, iteration: 320608
loss: 1.0294536352157593,grad_norm: 0.7260731864398497, iteration: 320609
loss: 1.016273856163025,grad_norm: 0.830523176387496, iteration: 320610
loss: 1.09114670753479,grad_norm: 0.8174651519572971, iteration: 320611
loss: 1.03727126121521,grad_norm: 0.7886899788614204, iteration: 320612
loss: 1.0386474132537842,grad_norm: 0.8146577056708038, iteration: 320613
loss: 0.9720832109451294,grad_norm: 0.9197625304314442, iteration: 320614
loss: 0.9797171354293823,grad_norm: 0.8173505329106823, iteration: 320615
loss: 1.0167378187179565,grad_norm: 0.879587556612217, iteration: 320616
loss: 0.9869261384010315,grad_norm: 0.8707240265710543, iteration: 320617
loss: 0.9908084273338318,grad_norm: 0.8467498955963322, iteration: 320618
loss: 1.016433835029602,grad_norm: 0.6816017242302065, iteration: 320619
loss: 0.9580824971199036,grad_norm: 0.6864612129556166, iteration: 320620
loss: 1.0217524766921997,grad_norm: 0.7779195852618583, iteration: 320621
loss: 0.9735502004623413,grad_norm: 0.8102230061225809, iteration: 320622
loss: 1.0312575101852417,grad_norm: 0.8017929318449257, iteration: 320623
loss: 0.9152361154556274,grad_norm: 0.875443455854224, iteration: 320624
loss: 1.0117675065994263,grad_norm: 0.954928037269913, iteration: 320625
loss: 1.012485384941101,grad_norm: 0.9935219162809716, iteration: 320626
loss: 0.9918702244758606,grad_norm: 0.7110136167794646, iteration: 320627
loss: 0.9796069264411926,grad_norm: 0.9999990486498537, iteration: 320628
loss: 0.964211106300354,grad_norm: 0.9803968039463294, iteration: 320629
loss: 1.0204155445098877,grad_norm: 0.7443191649569909, iteration: 320630
loss: 0.9956561923027039,grad_norm: 0.7767630833085742, iteration: 320631
loss: 1.0178834199905396,grad_norm: 0.8185099224212513, iteration: 320632
loss: 1.0031933784484863,grad_norm: 0.7563623000352114, iteration: 320633
loss: 1.0005619525909424,grad_norm: 0.7978842088121652, iteration: 320634
loss: 1.037588119506836,grad_norm: 0.9999991294057953, iteration: 320635
loss: 0.9658389687538147,grad_norm: 0.7683548288777137, iteration: 320636
loss: 0.9759232401847839,grad_norm: 0.908278596998552, iteration: 320637
loss: 0.9914749264717102,grad_norm: 0.8106629084554492, iteration: 320638
loss: 1.0163780450820923,grad_norm: 0.9470902573086696, iteration: 320639
loss: 1.0194528102874756,grad_norm: 0.705540477884007, iteration: 320640
loss: 1.0192126035690308,grad_norm: 0.9999998098192557, iteration: 320641
loss: 1.0259995460510254,grad_norm: 0.9999998176311288, iteration: 320642
loss: 0.9852155447006226,grad_norm: 0.7659723911644966, iteration: 320643
loss: 0.9942514896392822,grad_norm: 0.9755391415937101, iteration: 320644
loss: 0.9898785948753357,grad_norm: 0.7811625442826368, iteration: 320645
loss: 1.011961817741394,grad_norm: 0.9578880725069493, iteration: 320646
loss: 0.9628730416297913,grad_norm: 0.9731533582210142, iteration: 320647
loss: 0.9707168340682983,grad_norm: 0.8394318483365447, iteration: 320648
loss: 0.9984827637672424,grad_norm: 0.8910244462087532, iteration: 320649
loss: 0.9567628502845764,grad_norm: 0.7310585304945262, iteration: 320650
loss: 0.9961991906166077,grad_norm: 0.9326643292877393, iteration: 320651
loss: 0.9429187774658203,grad_norm: 0.9340659265826267, iteration: 320652
loss: 1.012037754058838,grad_norm: 0.9999992422559915, iteration: 320653
loss: 0.9546279311180115,grad_norm: 0.791268451904976, iteration: 320654
loss: 0.9971752166748047,grad_norm: 0.8991293128755169, iteration: 320655
loss: 1.0145620107650757,grad_norm: 0.9761870568536728, iteration: 320656
loss: 0.9917466044425964,grad_norm: 0.831529340321302, iteration: 320657
loss: 0.9741553068161011,grad_norm: 0.9873548064608979, iteration: 320658
loss: 1.0076004266738892,grad_norm: 0.7836343645764047, iteration: 320659
loss: 1.0247509479522705,grad_norm: 0.8689801959733641, iteration: 320660
loss: 1.017682433128357,grad_norm: 0.9163403885871897, iteration: 320661
loss: 0.9856082797050476,grad_norm: 0.9999990199008726, iteration: 320662
loss: 0.9782236814498901,grad_norm: 0.7074904183757788, iteration: 320663
loss: 1.077283263206482,grad_norm: 0.7739553287318475, iteration: 320664
loss: 0.9946045875549316,grad_norm: 0.8764184102072323, iteration: 320665
loss: 0.9568160176277161,grad_norm: 0.9884205547323194, iteration: 320666
loss: 1.0185145139694214,grad_norm: 0.8135609636924969, iteration: 320667
loss: 1.0098156929016113,grad_norm: 0.7325840970240067, iteration: 320668
loss: 0.9685657620429993,grad_norm: 0.8298596021757726, iteration: 320669
loss: 0.9812638163566589,grad_norm: 0.9999998198201355, iteration: 320670
loss: 0.9999396204948425,grad_norm: 0.8884426343218681, iteration: 320671
loss: 1.0187891721725464,grad_norm: 0.9250953981073555, iteration: 320672
loss: 0.9905668497085571,grad_norm: 0.9999991207727428, iteration: 320673
loss: 1.0109938383102417,grad_norm: 0.8045975483055238, iteration: 320674
loss: 1.024071455001831,grad_norm: 0.8312777839322943, iteration: 320675
loss: 1.0030306577682495,grad_norm: 0.8171368111721864, iteration: 320676
loss: 1.013404369354248,grad_norm: 0.9709697979200052, iteration: 320677
loss: 0.9589526653289795,grad_norm: 0.9573089299235716, iteration: 320678
loss: 0.9951900839805603,grad_norm: 0.9103760599825443, iteration: 320679
loss: 0.9792134165763855,grad_norm: 0.7450576805776433, iteration: 320680
loss: 0.9864824414253235,grad_norm: 0.850953558963184, iteration: 320681
loss: 1.0182265043258667,grad_norm: 0.8393646820923504, iteration: 320682
loss: 0.9879115223884583,grad_norm: 0.9551486404659953, iteration: 320683
loss: 1.0063581466674805,grad_norm: 0.9200174313586884, iteration: 320684
loss: 0.9991112947463989,grad_norm: 0.9805672552325069, iteration: 320685
loss: 0.9839786887168884,grad_norm: 0.6941459989172706, iteration: 320686
loss: 1.007317066192627,grad_norm: 0.8552849926412798, iteration: 320687
loss: 1.0104560852050781,grad_norm: 0.7922665429791119, iteration: 320688
loss: 1.0225142240524292,grad_norm: 0.8476528501164796, iteration: 320689
loss: 0.9928244948387146,grad_norm: 0.7644859383682178, iteration: 320690
loss: 0.9889501929283142,grad_norm: 0.8192678310557523, iteration: 320691
loss: 1.058427095413208,grad_norm: 0.8814948568928074, iteration: 320692
loss: 0.9791184663772583,grad_norm: 0.854355032098183, iteration: 320693
loss: 1.0063482522964478,grad_norm: 0.9999993219194997, iteration: 320694
loss: 1.0131335258483887,grad_norm: 0.8004579457966844, iteration: 320695
loss: 1.0270828008651733,grad_norm: 0.9770398866876211, iteration: 320696
loss: 1.0285977125167847,grad_norm: 0.8644634060167179, iteration: 320697
loss: 0.976486325263977,grad_norm: 0.9904779538230044, iteration: 320698
loss: 1.0128649473190308,grad_norm: 0.8562758062823769, iteration: 320699
loss: 1.0421282052993774,grad_norm: 0.9999998678633867, iteration: 320700
loss: 1.0467685461044312,grad_norm: 0.9999999288158407, iteration: 320701
loss: 1.0094794034957886,grad_norm: 0.8735307648783178, iteration: 320702
loss: 1.0366976261138916,grad_norm: 0.766724678834001, iteration: 320703
loss: 0.963701069355011,grad_norm: 0.984401567492124, iteration: 320704
loss: 0.976676344871521,grad_norm: 0.8035086780426441, iteration: 320705
loss: 0.9932277202606201,grad_norm: 0.774671684336956, iteration: 320706
loss: 0.9971184730529785,grad_norm: 0.9922819425859231, iteration: 320707
loss: 1.1039788722991943,grad_norm: 0.9999989876663771, iteration: 320708
loss: 1.004583477973938,grad_norm: 0.821896404115483, iteration: 320709
loss: 1.0156019926071167,grad_norm: 0.9461323307847497, iteration: 320710
loss: 1.0351572036743164,grad_norm: 0.9943357587384921, iteration: 320711
loss: 0.9826392531394958,grad_norm: 0.9999990022882105, iteration: 320712
loss: 1.0231504440307617,grad_norm: 0.8277802721470628, iteration: 320713
loss: 0.9910758137702942,grad_norm: 0.6921633435593593, iteration: 320714
loss: 0.9626333713531494,grad_norm: 0.7161270888185113, iteration: 320715
loss: 1.0338753461837769,grad_norm: 0.999999119597596, iteration: 320716
loss: 1.0130870342254639,grad_norm: 0.8645298502863575, iteration: 320717
loss: 1.0083332061767578,grad_norm: 0.8777815849404872, iteration: 320718
loss: 0.9909065961837769,grad_norm: 0.8991081695643984, iteration: 320719
loss: 1.0134903192520142,grad_norm: 0.797613215166969, iteration: 320720
loss: 0.9953024983406067,grad_norm: 0.8198185922165255, iteration: 320721
loss: 1.0073797702789307,grad_norm: 0.9979432890849933, iteration: 320722
loss: 0.9576117396354675,grad_norm: 0.8886348460263181, iteration: 320723
loss: 1.064490556716919,grad_norm: 0.9411177044282769, iteration: 320724
loss: 0.9936165809631348,grad_norm: 0.9332798272423396, iteration: 320725
loss: 0.9973635077476501,grad_norm: 0.8336746728377471, iteration: 320726
loss: 1.0032731294631958,grad_norm: 0.7810584147500327, iteration: 320727
loss: 1.0097665786743164,grad_norm: 0.6987275325639294, iteration: 320728
loss: 1.0084357261657715,grad_norm: 0.8019017057732029, iteration: 320729
loss: 1.005374789237976,grad_norm: 0.7771932334560235, iteration: 320730
loss: 0.9971756935119629,grad_norm: 0.7261918919270468, iteration: 320731
loss: 0.9973244071006775,grad_norm: 0.7692701402587678, iteration: 320732
loss: 0.9973586797714233,grad_norm: 0.7431691237843091, iteration: 320733
loss: 0.9502537846565247,grad_norm: 0.7907047514393323, iteration: 320734
loss: 0.9998531341552734,grad_norm: 0.7794217038629088, iteration: 320735
loss: 1.0179977416992188,grad_norm: 0.8497600011733739, iteration: 320736
loss: 1.0325087308883667,grad_norm: 0.9104173467090513, iteration: 320737
loss: 1.0088169574737549,grad_norm: 0.9130693353647196, iteration: 320738
loss: 1.0059994459152222,grad_norm: 0.6874821609705338, iteration: 320739
loss: 0.9858482480049133,grad_norm: 0.8572738985772683, iteration: 320740
loss: 0.9800623655319214,grad_norm: 0.9862074321085857, iteration: 320741
loss: 1.0187454223632812,grad_norm: 0.9132622207512383, iteration: 320742
loss: 0.9917219877243042,grad_norm: 0.8447437586107058, iteration: 320743
loss: 0.9510209560394287,grad_norm: 0.9999989696277972, iteration: 320744
loss: 1.0075231790542603,grad_norm: 0.8650161295183095, iteration: 320745
loss: 1.0016101598739624,grad_norm: 0.816943117464936, iteration: 320746
loss: 1.0230937004089355,grad_norm: 0.8884968414499373, iteration: 320747
loss: 0.9968650341033936,grad_norm: 0.9999991645608077, iteration: 320748
loss: 0.9859088659286499,grad_norm: 0.9526255910327269, iteration: 320749
loss: 1.0298150777816772,grad_norm: 0.8661450478773466, iteration: 320750
loss: 1.0089539289474487,grad_norm: 0.8713702437785912, iteration: 320751
loss: 1.009482741355896,grad_norm: 0.7865650414851223, iteration: 320752
loss: 0.9817606806755066,grad_norm: 0.8715299641030118, iteration: 320753
loss: 0.9831087589263916,grad_norm: 0.7164571879996111, iteration: 320754
loss: 1.0449514389038086,grad_norm: 0.9999997909982689, iteration: 320755
loss: 1.001818299293518,grad_norm: 0.7873330806881922, iteration: 320756
loss: 0.9804314970970154,grad_norm: 0.9008061140983209, iteration: 320757
loss: 1.0253124237060547,grad_norm: 0.7791425510922333, iteration: 320758
loss: 0.9806890487670898,grad_norm: 0.9434904593470443, iteration: 320759
loss: 0.9804942607879639,grad_norm: 0.7402223394268717, iteration: 320760
loss: 0.9826372861862183,grad_norm: 0.7370586831322571, iteration: 320761
loss: 0.9950141906738281,grad_norm: 0.9999999838593513, iteration: 320762
loss: 0.9659175276756287,grad_norm: 0.9155456523860844, iteration: 320763
loss: 1.0389716625213623,grad_norm: 0.805970714931328, iteration: 320764
loss: 1.0135499238967896,grad_norm: 0.8831437326663713, iteration: 320765
loss: 1.0702875852584839,grad_norm: 0.9999997192736183, iteration: 320766
loss: 0.9954591393470764,grad_norm: 0.8518521196739164, iteration: 320767
loss: 0.9715040326118469,grad_norm: 0.939179187166656, iteration: 320768
loss: 1.0046570301055908,grad_norm: 0.7596052995273378, iteration: 320769
loss: 1.0042682886123657,grad_norm: 0.854384687243203, iteration: 320770
loss: 0.9912177920341492,grad_norm: 0.9560911349257828, iteration: 320771
loss: 1.0104358196258545,grad_norm: 0.8421349568701779, iteration: 320772
loss: 0.9779295921325684,grad_norm: 0.7804367730877378, iteration: 320773
loss: 1.001340389251709,grad_norm: 0.8498697179009046, iteration: 320774
loss: 1.0079621076583862,grad_norm: 0.8701932090055734, iteration: 320775
loss: 0.9422374367713928,grad_norm: 0.8129673280667302, iteration: 320776
loss: 1.0315630435943604,grad_norm: 0.6780806718631583, iteration: 320777
loss: 1.00380277633667,grad_norm: 0.9161182005858322, iteration: 320778
loss: 0.9842903017997742,grad_norm: 0.7710230667453216, iteration: 320779
loss: 1.0130895376205444,grad_norm: 0.8521939194631594, iteration: 320780
loss: 1.0191242694854736,grad_norm: 0.9999991379772815, iteration: 320781
loss: 1.015115737915039,grad_norm: 0.9602965873867008, iteration: 320782
loss: 0.9733269810676575,grad_norm: 0.746022484350889, iteration: 320783
loss: 0.9817038178443909,grad_norm: 0.7795074597210806, iteration: 320784
loss: 1.029650092124939,grad_norm: 0.7774092288425091, iteration: 320785
loss: 0.9956952333450317,grad_norm: 0.773616767358072, iteration: 320786
loss: 1.0150953531265259,grad_norm: 0.9999995451181286, iteration: 320787
loss: 1.0041851997375488,grad_norm: 0.7712631184688546, iteration: 320788
loss: 0.982559084892273,grad_norm: 0.9149799287246878, iteration: 320789
loss: 0.9996830224990845,grad_norm: 0.8819064263565479, iteration: 320790
loss: 1.0181218385696411,grad_norm: 0.8381171973287328, iteration: 320791
loss: 0.9786871671676636,grad_norm: 0.6942408459861815, iteration: 320792
loss: 1.0493402481079102,grad_norm: 0.9532324345594528, iteration: 320793
loss: 0.9878742098808289,grad_norm: 0.836985769915664, iteration: 320794
loss: 1.0223883390426636,grad_norm: 0.9999991103138357, iteration: 320795
loss: 1.0146650075912476,grad_norm: 0.7576549844595124, iteration: 320796
loss: 1.0276910066604614,grad_norm: 0.9999994635684593, iteration: 320797
loss: 1.0701848268508911,grad_norm: 0.9999991459618366, iteration: 320798
loss: 0.9565269947052002,grad_norm: 0.9329803192918171, iteration: 320799
loss: 0.9636169075965881,grad_norm: 0.9999991842836781, iteration: 320800
loss: 0.9481366872787476,grad_norm: 0.8436907833459102, iteration: 320801
loss: 1.0044082403182983,grad_norm: 0.8233340294005925, iteration: 320802
loss: 1.0051560401916504,grad_norm: 0.9999999328974971, iteration: 320803
loss: 1.0398552417755127,grad_norm: 0.9670718427476213, iteration: 320804
loss: 1.0113558769226074,grad_norm: 0.7701413871181236, iteration: 320805
loss: 1.0444108247756958,grad_norm: 0.7773951492480353, iteration: 320806
loss: 0.9900779128074646,grad_norm: 0.7276505156591989, iteration: 320807
loss: 1.0107553005218506,grad_norm: 0.9185348730753404, iteration: 320808
loss: 0.9837815761566162,grad_norm: 0.7579422958046662, iteration: 320809
loss: 0.9831491112709045,grad_norm: 0.7613922032265323, iteration: 320810
loss: 1.0221061706542969,grad_norm: 1.0000000376657512, iteration: 320811
loss: 0.993529736995697,grad_norm: 0.8565041289766174, iteration: 320812
loss: 1.0458102226257324,grad_norm: 0.9999996866215938, iteration: 320813
loss: 0.9635282158851624,grad_norm: 0.9999991386224842, iteration: 320814
loss: 1.0100154876708984,grad_norm: 0.8633917712276545, iteration: 320815
loss: 0.977997362613678,grad_norm: 0.8284672636061315, iteration: 320816
loss: 1.0344369411468506,grad_norm: 0.9999999212791013, iteration: 320817
loss: 0.963249683380127,grad_norm: 0.8308120508059412, iteration: 320818
loss: 0.9772043824195862,grad_norm: 0.7393222551580925, iteration: 320819
loss: 1.0515828132629395,grad_norm: 0.9742704871243286, iteration: 320820
loss: 1.062315821647644,grad_norm: 0.9999992504964532, iteration: 320821
loss: 0.9770880341529846,grad_norm: 0.8887372158344512, iteration: 320822
loss: 0.9870840907096863,grad_norm: 0.8964108684989885, iteration: 320823
loss: 1.0266739130020142,grad_norm: 0.9495243848145243, iteration: 320824
loss: 0.9751470685005188,grad_norm: 0.7368197351495737, iteration: 320825
loss: 0.981620728969574,grad_norm: 0.999999593272663, iteration: 320826
loss: 1.0122294425964355,grad_norm: 0.9171233800697027, iteration: 320827
loss: 1.0032000541687012,grad_norm: 0.7899614146631547, iteration: 320828
loss: 1.0798934698104858,grad_norm: 0.9641419752737865, iteration: 320829
loss: 1.0009933710098267,grad_norm: 0.8506719446275657, iteration: 320830
loss: 1.0379395484924316,grad_norm: 0.8738316331539394, iteration: 320831
loss: 1.0268465280532837,grad_norm: 0.9999990502673265, iteration: 320832
loss: 1.0182725191116333,grad_norm: 0.8832797377014391, iteration: 320833
loss: 0.9774364233016968,grad_norm: 0.8132941599002491, iteration: 320834
loss: 0.9936392903327942,grad_norm: 0.9238548531540033, iteration: 320835
loss: 0.9694722890853882,grad_norm: 0.8804296489588403, iteration: 320836
loss: 0.9908139705657959,grad_norm: 0.6088767039690469, iteration: 320837
loss: 1.0002433061599731,grad_norm: 0.9739153556854825, iteration: 320838
loss: 0.9997254014015198,grad_norm: 0.9999996988126281, iteration: 320839
loss: 1.0652474164962769,grad_norm: 0.9999991040249173, iteration: 320840
loss: 1.0101886987686157,grad_norm: 0.9707424499556393, iteration: 320841
loss: 0.9850528240203857,grad_norm: 0.9053435752813979, iteration: 320842
loss: 0.9920204281806946,grad_norm: 0.6437980412226195, iteration: 320843
loss: 0.9995322823524475,grad_norm: 0.8426345855953803, iteration: 320844
loss: 1.004592776298523,grad_norm: 0.9999993294613464, iteration: 320845
loss: 0.996532142162323,grad_norm: 0.999999065572358, iteration: 320846
loss: 1.0057681798934937,grad_norm: 0.9476656847376901, iteration: 320847
loss: 1.0117249488830566,grad_norm: 0.8628827277712551, iteration: 320848
loss: 0.9786798357963562,grad_norm: 0.8261940366615588, iteration: 320849
loss: 0.971630334854126,grad_norm: 0.9021430247347099, iteration: 320850
loss: 1.002759337425232,grad_norm: 0.8262262515487905, iteration: 320851
loss: 1.0575299263000488,grad_norm: 0.8964695823258898, iteration: 320852
loss: 1.0135173797607422,grad_norm: 0.8462796658046109, iteration: 320853
loss: 1.0258135795593262,grad_norm: 0.9999991248829335, iteration: 320854
loss: 1.0254111289978027,grad_norm: 0.8978214805934932, iteration: 320855
loss: 0.9640145897865295,grad_norm: 0.890167615263998, iteration: 320856
loss: 1.0411909818649292,grad_norm: 0.9999994332324078, iteration: 320857
loss: 0.997612714767456,grad_norm: 0.8733096936410147, iteration: 320858
loss: 1.0031672716140747,grad_norm: 0.7542522624888467, iteration: 320859
loss: 1.0000754594802856,grad_norm: 0.8526881878398812, iteration: 320860
loss: 1.0620551109313965,grad_norm: 0.9640670203034863, iteration: 320861
loss: 0.9785633087158203,grad_norm: 1.0000000375492444, iteration: 320862
loss: 0.9868291020393372,grad_norm: 0.9651050120437892, iteration: 320863
loss: 1.023332953453064,grad_norm: 0.8336140182234303, iteration: 320864
loss: 1.0026472806930542,grad_norm: 0.9645736033713767, iteration: 320865
loss: 0.9933203458786011,grad_norm: 0.7892456559168475, iteration: 320866
loss: 0.9827578663825989,grad_norm: 0.9999993094504027, iteration: 320867
loss: 1.031743049621582,grad_norm: 0.9999996110836562, iteration: 320868
loss: 0.994109034538269,grad_norm: 0.682058868938772, iteration: 320869
loss: 0.9861233234405518,grad_norm: 0.8039883419801758, iteration: 320870
loss: 1.0325969457626343,grad_norm: 0.9999991057865772, iteration: 320871
loss: 0.983415961265564,grad_norm: 0.743714520664488, iteration: 320872
loss: 0.9741617441177368,grad_norm: 0.8235038696584859, iteration: 320873
loss: 0.9750909805297852,grad_norm: 0.758806825208277, iteration: 320874
loss: 1.0177327394485474,grad_norm: 0.9999993178764902, iteration: 320875
loss: 0.957979142665863,grad_norm: 0.8903867513109206, iteration: 320876
loss: 1.0345659255981445,grad_norm: 0.9516521404004004, iteration: 320877
loss: 0.9893800616264343,grad_norm: 0.8044973867767, iteration: 320878
loss: 1.0172544717788696,grad_norm: 0.9406299486882728, iteration: 320879
loss: 1.0026538372039795,grad_norm: 0.9243272397010189, iteration: 320880
loss: 0.9987249970436096,grad_norm: 0.9999991759387296, iteration: 320881
loss: 1.0507853031158447,grad_norm: 0.9877790125523044, iteration: 320882
loss: 1.0059359073638916,grad_norm: 0.9999998953727003, iteration: 320883
loss: 1.0013954639434814,grad_norm: 0.8310796234314229, iteration: 320884
loss: 1.0044642686843872,grad_norm: 0.7753602118223023, iteration: 320885
loss: 0.9805030226707458,grad_norm: 0.8122525780338131, iteration: 320886
loss: 1.015317440032959,grad_norm: 0.9242337817709734, iteration: 320887
loss: 0.9866309762001038,grad_norm: 0.852097995670205, iteration: 320888
loss: 1.025986671447754,grad_norm: 0.80461845492011, iteration: 320889
loss: 0.9807966351509094,grad_norm: 0.7710439522015746, iteration: 320890
loss: 1.0235517024993896,grad_norm: 0.7612908085941066, iteration: 320891
loss: 0.9918613433837891,grad_norm: 0.9999991704982824, iteration: 320892
loss: 1.0136796236038208,grad_norm: 0.9999991160948507, iteration: 320893
loss: 1.0129828453063965,grad_norm: 0.8180649949370273, iteration: 320894
loss: 1.0005093812942505,grad_norm: 0.9316014917381522, iteration: 320895
loss: 0.9901571273803711,grad_norm: 0.6998959704504143, iteration: 320896
loss: 0.9945149421691895,grad_norm: 0.7758768979615108, iteration: 320897
loss: 0.9853863716125488,grad_norm: 0.7507510773877635, iteration: 320898
loss: 0.9767925143241882,grad_norm: 0.7755967346102906, iteration: 320899
loss: 1.021095633506775,grad_norm: 0.8212425218206989, iteration: 320900
loss: 1.0513285398483276,grad_norm: 0.999999084468853, iteration: 320901
loss: 1.0151066780090332,grad_norm: 0.8135801497781248, iteration: 320902
loss: 1.0178295373916626,grad_norm: 0.859142310702665, iteration: 320903
loss: 0.9899691343307495,grad_norm: 0.9688373272807336, iteration: 320904
loss: 1.0327162742614746,grad_norm: 0.7901994018762504, iteration: 320905
loss: 1.0233204364776611,grad_norm: 0.8770817898390811, iteration: 320906
loss: 1.0071297883987427,grad_norm: 0.9235884012177933, iteration: 320907
loss: 1.0026862621307373,grad_norm: 0.8515410255083095, iteration: 320908
loss: 0.9804659485816956,grad_norm: 0.7381531211928354, iteration: 320909
loss: 1.0281614065170288,grad_norm: 0.7851493964206528, iteration: 320910
loss: 1.0007202625274658,grad_norm: 0.9403392299115715, iteration: 320911
loss: 1.0079491138458252,grad_norm: 0.729086195637717, iteration: 320912
loss: 0.9754706025123596,grad_norm: 0.8083174871833346, iteration: 320913
loss: 1.0254178047180176,grad_norm: 0.984221684938557, iteration: 320914
loss: 1.0038851499557495,grad_norm: 0.7894632224662614, iteration: 320915
loss: 1.0157098770141602,grad_norm: 0.9999990632363294, iteration: 320916
loss: 0.9508137106895447,grad_norm: 0.9673214005361307, iteration: 320917
loss: 0.9945011734962463,grad_norm: 0.7280189721706356, iteration: 320918
loss: 0.9765505194664001,grad_norm: 0.8104277756823637, iteration: 320919
loss: 0.9973781704902649,grad_norm: 0.7394871974449445, iteration: 320920
loss: 0.9971728920936584,grad_norm: 0.967333006719311, iteration: 320921
loss: 0.9619681239128113,grad_norm: 0.8630733616483693, iteration: 320922
loss: 1.0338444709777832,grad_norm: 0.8944294870628663, iteration: 320923
loss: 1.0223793983459473,grad_norm: 0.999999443006418, iteration: 320924
loss: 0.9859715104103088,grad_norm: 0.9071246134331963, iteration: 320925
loss: 0.9808859825134277,grad_norm: 0.9999993768028332, iteration: 320926
loss: 1.012665867805481,grad_norm: 0.8538547528085185, iteration: 320927
loss: 1.010291576385498,grad_norm: 0.8150363093426529, iteration: 320928
loss: 1.0049246549606323,grad_norm: 0.8556198394601268, iteration: 320929
loss: 1.0124531984329224,grad_norm: 0.9999999003877514, iteration: 320930
loss: 1.0154125690460205,grad_norm: 0.9284900944143122, iteration: 320931
loss: 0.9763224124908447,grad_norm: 0.800355980521295, iteration: 320932
loss: 0.9805564284324646,grad_norm: 0.9999991154129645, iteration: 320933
loss: 1.0218102931976318,grad_norm: 0.9047925545902881, iteration: 320934
loss: 1.0463001728057861,grad_norm: 0.999999905122923, iteration: 320935
loss: 0.9923062920570374,grad_norm: 0.857173425707975, iteration: 320936
loss: 0.9930258393287659,grad_norm: 0.8302189248199658, iteration: 320937
loss: 0.9974709153175354,grad_norm: 0.7066688253074018, iteration: 320938
loss: 0.9714453220367432,grad_norm: 0.8039500984219631, iteration: 320939
loss: 1.0357928276062012,grad_norm: 0.9999991958365884, iteration: 320940
loss: 0.9610194563865662,grad_norm: 0.8439293691651465, iteration: 320941
loss: 1.02036714553833,grad_norm: 0.7955412010513632, iteration: 320942
loss: 1.0091233253479004,grad_norm: 0.8463811596500503, iteration: 320943
loss: 1.0073843002319336,grad_norm: 0.7829813748016997, iteration: 320944
loss: 0.9773259162902832,grad_norm: 0.7587574161995253, iteration: 320945
loss: 0.9754670262336731,grad_norm: 0.8582381001775589, iteration: 320946
loss: 1.0059034824371338,grad_norm: 0.9990627519206378, iteration: 320947
loss: 1.0203381776809692,grad_norm: 0.9489515992041164, iteration: 320948
loss: 0.9967115521430969,grad_norm: 0.7155115627672383, iteration: 320949
loss: 1.0354443788528442,grad_norm: 0.9999992545789663, iteration: 320950
loss: 1.0627437829971313,grad_norm: 0.9999994097495101, iteration: 320951
loss: 1.095432996749878,grad_norm: 0.999999192790557, iteration: 320952
loss: 0.98710036277771,grad_norm: 0.9999990296466206, iteration: 320953
loss: 0.9593487977981567,grad_norm: 0.9303185485176966, iteration: 320954
loss: 0.9765084385871887,grad_norm: 0.9638296870582005, iteration: 320955
loss: 0.9905521869659424,grad_norm: 0.9406464040419548, iteration: 320956
loss: 1.0395948886871338,grad_norm: 0.8605185932529991, iteration: 320957
loss: 0.9975603222846985,grad_norm: 0.6525552172608204, iteration: 320958
loss: 0.9501678943634033,grad_norm: 0.830469628991153, iteration: 320959
loss: 0.983167827129364,grad_norm: 0.8027543037727483, iteration: 320960
loss: 1.0253958702087402,grad_norm: 0.8703352553589752, iteration: 320961
loss: 0.9830964207649231,grad_norm: 0.884079434562225, iteration: 320962
loss: 0.990287184715271,grad_norm: 0.9999994369115857, iteration: 320963
loss: 1.0647350549697876,grad_norm: 0.81410836141078, iteration: 320964
loss: 0.9988008141517639,grad_norm: 0.79536200228616, iteration: 320965
loss: 0.9801017642021179,grad_norm: 0.8082062056426959, iteration: 320966
loss: 0.9919047355651855,grad_norm: 0.9577192805609812, iteration: 320967
loss: 1.0134974718093872,grad_norm: 0.9999998159094639, iteration: 320968
loss: 1.0455464124679565,grad_norm: 0.9999992874809408, iteration: 320969
loss: 0.9743470549583435,grad_norm: 0.820533524386785, iteration: 320970
loss: 0.9978205561637878,grad_norm: 0.7578361929769541, iteration: 320971
loss: 1.0226967334747314,grad_norm: 0.7911961024103163, iteration: 320972
loss: 0.9985310435295105,grad_norm: 0.7346319707334604, iteration: 320973
loss: 0.9828785061836243,grad_norm: 0.7692860112176589, iteration: 320974
loss: 0.993131160736084,grad_norm: 0.9134045955968415, iteration: 320975
loss: 1.00880765914917,grad_norm: 0.8153381489208398, iteration: 320976
loss: 1.0356496572494507,grad_norm: 0.9999998519898781, iteration: 320977
loss: 0.9957214593887329,grad_norm: 0.893125851883153, iteration: 320978
loss: 1.0285929441452026,grad_norm: 0.9353989635605972, iteration: 320979
loss: 0.9822876453399658,grad_norm: 0.8408006028403138, iteration: 320980
loss: 1.002321481704712,grad_norm: 0.9999996753081012, iteration: 320981
loss: 1.0499504804611206,grad_norm: 0.9999989316001969, iteration: 320982
loss: 0.9929931163787842,grad_norm: 0.9332995968352865, iteration: 320983
loss: 1.0943106412887573,grad_norm: 0.9999995443245006, iteration: 320984
loss: 1.0181695222854614,grad_norm: 0.9999992550030021, iteration: 320985
loss: 1.0309895277023315,grad_norm: 0.9999991573887271, iteration: 320986
loss: 1.0346646308898926,grad_norm: 0.8420688280853204, iteration: 320987
loss: 1.0075193643569946,grad_norm: 0.9436861263853767, iteration: 320988
loss: 1.2175520658493042,grad_norm: 0.9999997655778228, iteration: 320989
loss: 1.1776418685913086,grad_norm: 0.9999996906380016, iteration: 320990
loss: 0.97926265001297,grad_norm: 0.8795611401319751, iteration: 320991
loss: 1.0583384037017822,grad_norm: 0.999999907802611, iteration: 320992
loss: 1.0479906797409058,grad_norm: 0.9999994887431063, iteration: 320993
loss: 0.9795578122138977,grad_norm: 0.9999991752387878, iteration: 320994
loss: 1.0074366331100464,grad_norm: 0.9999990514658175, iteration: 320995
loss: 1.1470606327056885,grad_norm: 0.9999993990593079, iteration: 320996
loss: 1.0181094408035278,grad_norm: 0.9999990947124144, iteration: 320997
loss: 0.9909744262695312,grad_norm: 0.8311748422327409, iteration: 320998
loss: 1.1259064674377441,grad_norm: 0.9999997086842904, iteration: 320999
loss: 0.9613351821899414,grad_norm: 0.9999990038718786, iteration: 321000
loss: 0.9981645345687866,grad_norm: 0.8320245890441097, iteration: 321001
loss: 0.9982004761695862,grad_norm: 0.9239145271323932, iteration: 321002
loss: 1.0136146545410156,grad_norm: 0.9999993915552048, iteration: 321003
loss: 1.2151594161987305,grad_norm: 0.9999992953911863, iteration: 321004
loss: 0.9849086999893188,grad_norm: 0.9999992398626422, iteration: 321005
loss: 1.0218379497528076,grad_norm: 0.8962039273345229, iteration: 321006
loss: 1.041276216506958,grad_norm: 0.9999993948005609, iteration: 321007
loss: 0.9974693059921265,grad_norm: 0.7668914390108685, iteration: 321008
loss: 0.9920466542243958,grad_norm: 0.818325138658655, iteration: 321009
loss: 1.0911056995391846,grad_norm: 0.9999997800763506, iteration: 321010
loss: 1.0384297370910645,grad_norm: 0.9999989244792528, iteration: 321011
loss: 1.0005724430084229,grad_norm: 0.9999992443125355, iteration: 321012
loss: 0.9984778761863708,grad_norm: 0.8054035567631473, iteration: 321013
loss: 1.0100089311599731,grad_norm: 0.7633507936444723, iteration: 321014
loss: 1.0867992639541626,grad_norm: 0.9999993725517056, iteration: 321015
loss: 0.9982784986495972,grad_norm: 0.7598388111124003, iteration: 321016
loss: 0.9897499084472656,grad_norm: 0.804423728636514, iteration: 321017
loss: 0.9681083559989929,grad_norm: 0.8059823256472937, iteration: 321018
loss: 0.9813066124916077,grad_norm: 0.9999991484338303, iteration: 321019
loss: 1.065065860748291,grad_norm: 0.9999997776328032, iteration: 321020
loss: 1.0180977582931519,grad_norm: 0.9483129295658079, iteration: 321021
loss: 1.009526252746582,grad_norm: 0.7329830952265877, iteration: 321022
loss: 1.0137712955474854,grad_norm: 0.8406748048098746, iteration: 321023
loss: 1.0352771282196045,grad_norm: 0.9999999249615255, iteration: 321024
loss: 1.0760455131530762,grad_norm: 0.9999991034589224, iteration: 321025
loss: 0.992859423160553,grad_norm: 0.999999318547631, iteration: 321026
loss: 1.050571084022522,grad_norm: 0.9999995190714777, iteration: 321027
loss: 1.055557131767273,grad_norm: 0.9999990195464248, iteration: 321028
loss: 1.069612741470337,grad_norm: 0.9999997567803056, iteration: 321029
loss: 1.0766688585281372,grad_norm: 0.9999996177239996, iteration: 321030
loss: 0.9922172427177429,grad_norm: 0.9261618518396727, iteration: 321031
loss: 0.9742264151573181,grad_norm: 0.8126400959345685, iteration: 321032
loss: 0.9389544129371643,grad_norm: 0.9010284016335385, iteration: 321033
loss: 1.0202890634536743,grad_norm: 0.9999992929997171, iteration: 321034
loss: 1.0487353801727295,grad_norm: 0.9999995043334052, iteration: 321035
loss: 0.9820445775985718,grad_norm: 0.9251553226862861, iteration: 321036
loss: 1.0880451202392578,grad_norm: 0.9999994886930532, iteration: 321037
loss: 1.015931248664856,grad_norm: 0.8337809973912923, iteration: 321038
loss: 1.003098487854004,grad_norm: 0.8540513251840861, iteration: 321039
loss: 0.9989748001098633,grad_norm: 0.7197298903541269, iteration: 321040
loss: 1.010840892791748,grad_norm: 0.9022984039304257, iteration: 321041
loss: 1.0179593563079834,grad_norm: 0.9999993137186935, iteration: 321042
loss: 1.003159761428833,grad_norm: 0.9999990997335299, iteration: 321043
loss: 1.0126876831054688,grad_norm: 0.9999994251072015, iteration: 321044
loss: 0.9634792804718018,grad_norm: 0.8197465545039821, iteration: 321045
loss: 1.0559122562408447,grad_norm: 0.9999989934484589, iteration: 321046
loss: 0.9849538207054138,grad_norm: 0.9999989505902624, iteration: 321047
loss: 1.0740352869033813,grad_norm: 0.9999998438184345, iteration: 321048
loss: 0.986072838306427,grad_norm: 0.7670880354246222, iteration: 321049
loss: 1.0014466047286987,grad_norm: 0.6949654340659654, iteration: 321050
loss: 1.0412633419036865,grad_norm: 0.999999541007487, iteration: 321051
loss: 1.091286301612854,grad_norm: 0.9999992310161476, iteration: 321052
loss: 0.9991627931594849,grad_norm: 0.7187872503944164, iteration: 321053
loss: 0.9936454892158508,grad_norm: 0.8677435662105317, iteration: 321054
loss: 0.9608913660049438,grad_norm: 0.9999990739475558, iteration: 321055
loss: 1.0210915803909302,grad_norm: 0.9999992159194516, iteration: 321056
loss: 1.0060423612594604,grad_norm: 0.7470156000767305, iteration: 321057
loss: 1.0681445598602295,grad_norm: 0.9999996524480252, iteration: 321058
loss: 1.1777430772781372,grad_norm: 0.9666971825613063, iteration: 321059
loss: 1.1104239225387573,grad_norm: 0.9999990278582683, iteration: 321060
loss: 1.000059962272644,grad_norm: 0.8997641707464782, iteration: 321061
loss: 0.9877538681030273,grad_norm: 0.8556373793071373, iteration: 321062
loss: 1.0424933433532715,grad_norm: 0.9921253586522902, iteration: 321063
loss: 0.9949817061424255,grad_norm: 0.9999997445135732, iteration: 321064
loss: 1.0860965251922607,grad_norm: 0.9999995989262698, iteration: 321065
loss: 1.235964059829712,grad_norm: 0.9999996838360433, iteration: 321066
loss: 1.0067371129989624,grad_norm: 0.9999998367368067, iteration: 321067
loss: 1.0594558715820312,grad_norm: 0.9999998772071752, iteration: 321068
loss: 1.102685809135437,grad_norm: 0.999999884605755, iteration: 321069
loss: 1.033129334449768,grad_norm: 0.8011477058642433, iteration: 321070
loss: 1.0597277879714966,grad_norm: 0.9999995576521034, iteration: 321071
loss: 1.081523060798645,grad_norm: 0.8059168691041788, iteration: 321072
loss: 1.091005802154541,grad_norm: 0.99999993641472, iteration: 321073
loss: 0.9982349276542664,grad_norm: 0.9999990872461164, iteration: 321074
loss: 1.0163416862487793,grad_norm: 0.9327124236993993, iteration: 321075
loss: 1.011394739151001,grad_norm: 0.9299204732800538, iteration: 321076
loss: 1.0298528671264648,grad_norm: 0.9999992052110243, iteration: 321077
loss: 0.981341540813446,grad_norm: 0.9101272927251864, iteration: 321078
loss: 0.9903577566146851,grad_norm: 0.9231622159024294, iteration: 321079
loss: 0.9856864809989929,grad_norm: 0.8561072239099169, iteration: 321080
loss: 1.0090399980545044,grad_norm: 0.9999995265129947, iteration: 321081
loss: 0.9839048981666565,grad_norm: 0.8422770014308265, iteration: 321082
loss: 1.0584436655044556,grad_norm: 0.9999995316256346, iteration: 321083
loss: 1.0337307453155518,grad_norm: 0.7677752500947621, iteration: 321084
loss: 1.0753916501998901,grad_norm: 0.9999998918047364, iteration: 321085
loss: 1.1235971450805664,grad_norm: 0.9999993506806656, iteration: 321086
loss: 1.0408935546875,grad_norm: 0.9999995044599307, iteration: 321087
loss: 0.9936985373497009,grad_norm: 0.9178371951947303, iteration: 321088
loss: 0.9816240668296814,grad_norm: 0.7885241316664369, iteration: 321089
loss: 0.9682977199554443,grad_norm: 0.8554083508044379, iteration: 321090
loss: 1.0329599380493164,grad_norm: 0.948279545507585, iteration: 321091
loss: 1.0328608751296997,grad_norm: 0.8602800492915229, iteration: 321092
loss: 0.9964851140975952,grad_norm: 0.7669335853459077, iteration: 321093
loss: 1.0089653730392456,grad_norm: 0.8743849376956201, iteration: 321094
loss: 0.991382360458374,grad_norm: 0.7991012265865636, iteration: 321095
loss: 0.9953437447547913,grad_norm: 0.6851079574711911, iteration: 321096
loss: 0.9887973666191101,grad_norm: 0.7290377222628176, iteration: 321097
loss: 1.0146738290786743,grad_norm: 0.8673107262493902, iteration: 321098
loss: 0.9629454016685486,grad_norm: 0.8936367899820108, iteration: 321099
loss: 1.0025027990341187,grad_norm: 0.8936247206569911, iteration: 321100
loss: 1.0360840559005737,grad_norm: 0.8405351198703268, iteration: 321101
loss: 1.0104467868804932,grad_norm: 0.7862312309864291, iteration: 321102
loss: 1.0880430936813354,grad_norm: 1.0000000089008365, iteration: 321103
loss: 0.9392769932746887,grad_norm: 0.9999992952106335, iteration: 321104
loss: 0.9879276752471924,grad_norm: 0.9612325881162258, iteration: 321105
loss: 1.1219736337661743,grad_norm: 0.9999999584619469, iteration: 321106
loss: 0.9772034883499146,grad_norm: 0.943379744606946, iteration: 321107
loss: 0.9837948083877563,grad_norm: 0.9388388042549822, iteration: 321108
loss: 0.9982062578201294,grad_norm: 0.8223396248446012, iteration: 321109
loss: 0.9975928664207458,grad_norm: 0.9300638860375188, iteration: 321110
loss: 1.0535222291946411,grad_norm: 0.8795892143535357, iteration: 321111
loss: 1.0304852724075317,grad_norm: 0.8186877930550318, iteration: 321112
loss: 0.9926241040229797,grad_norm: 0.8811561336665059, iteration: 321113
loss: 1.0116350650787354,grad_norm: 0.7466088492422807, iteration: 321114
loss: 0.9738906621932983,grad_norm: 0.8303097164899157, iteration: 321115
loss: 0.9778277277946472,grad_norm: 0.880922545773318, iteration: 321116
loss: 1.1359413862228394,grad_norm: 0.9216079599346844, iteration: 321117
loss: 1.006929874420166,grad_norm: 0.8058467114943019, iteration: 321118
loss: 1.0238343477249146,grad_norm: 0.7703430945869448, iteration: 321119
loss: 1.035828948020935,grad_norm: 0.9909445577133152, iteration: 321120
loss: 1.0215011835098267,grad_norm: 0.7106929893689988, iteration: 321121
loss: 0.9526832103729248,grad_norm: 0.821630009310744, iteration: 321122
loss: 1.0436099767684937,grad_norm: 0.7415628716927893, iteration: 321123
loss: 1.0570122003555298,grad_norm: 0.8898416098162015, iteration: 321124
loss: 1.0202925205230713,grad_norm: 0.9324963192303959, iteration: 321125
loss: 1.023511290550232,grad_norm: 0.9999992490647055, iteration: 321126
loss: 0.9616291522979736,grad_norm: 0.8033456523240654, iteration: 321127
loss: 1.0130531787872314,grad_norm: 0.9636223495743147, iteration: 321128
loss: 1.0350204706192017,grad_norm: 0.9050202332554091, iteration: 321129
loss: 1.0806362628936768,grad_norm: 0.9999990506545027, iteration: 321130
loss: 1.046781063079834,grad_norm: 0.9999994518972853, iteration: 321131
loss: 1.0209847688674927,grad_norm: 0.8613003098507679, iteration: 321132
loss: 0.9851865768432617,grad_norm: 0.8566854474850232, iteration: 321133
loss: 0.9664673209190369,grad_norm: 0.8936090554091092, iteration: 321134
loss: 0.9880751967430115,grad_norm: 0.7585641277239867, iteration: 321135
loss: 1.141396403312683,grad_norm: 0.9999999313885927, iteration: 321136
loss: 1.0178771018981934,grad_norm: 0.7369125872486271, iteration: 321137
loss: 1.0230677127838135,grad_norm: 0.931504655629101, iteration: 321138
loss: 0.9997997879981995,grad_norm: 0.9210870090725234, iteration: 321139
loss: 1.0170800685882568,grad_norm: 0.9818757922447262, iteration: 321140
loss: 0.9618839621543884,grad_norm: 0.9999994706147651, iteration: 321141
loss: 1.0169658660888672,grad_norm: 0.7613814148062563, iteration: 321142
loss: 1.0204168558120728,grad_norm: 0.910181963187765, iteration: 321143
loss: 0.9701524972915649,grad_norm: 0.7349774793864041, iteration: 321144
loss: 1.0070689916610718,grad_norm: 0.9999991833755852, iteration: 321145
loss: 1.0339982509613037,grad_norm: 0.9999993150393853, iteration: 321146
loss: 0.9935541749000549,grad_norm: 0.8477387682846191, iteration: 321147
loss: 1.0329017639160156,grad_norm: 0.8913068712534258, iteration: 321148
loss: 0.9815895557403564,grad_norm: 0.8170674448613007, iteration: 321149
loss: 1.0067558288574219,grad_norm: 0.7196830293712243, iteration: 321150
loss: 0.9958115220069885,grad_norm: 0.7599565088931921, iteration: 321151
loss: 1.1093530654907227,grad_norm: 0.9999991678755084, iteration: 321152
loss: 1.024563193321228,grad_norm: 0.999998980243359, iteration: 321153
loss: 1.011104702949524,grad_norm: 0.8294982916086023, iteration: 321154
loss: 1.0446826219558716,grad_norm: 0.9453105398893197, iteration: 321155
loss: 0.9810307621955872,grad_norm: 0.9538742553878925, iteration: 321156
loss: 1.0254613161087036,grad_norm: 0.9999995493281086, iteration: 321157
loss: 0.9810430407524109,grad_norm: 0.9999991291748244, iteration: 321158
loss: 1.0224038362503052,grad_norm: 0.8333637452967465, iteration: 321159
loss: 1.073852300643921,grad_norm: 0.9999994986560944, iteration: 321160
loss: 1.0346709489822388,grad_norm: 0.9262864124465877, iteration: 321161
loss: 1.0279780626296997,grad_norm: 0.9999991890190424, iteration: 321162
loss: 0.9805400371551514,grad_norm: 0.8991432910092759, iteration: 321163
loss: 0.9380143880844116,grad_norm: 0.7564639551615372, iteration: 321164
loss: 1.0016072988510132,grad_norm: 0.9999992095409804, iteration: 321165
loss: 0.9903713464736938,grad_norm: 0.8236269210172175, iteration: 321166
loss: 0.9934141039848328,grad_norm: 0.767901497416669, iteration: 321167
loss: 1.05431067943573,grad_norm: 0.999999735086338, iteration: 321168
loss: 1.041296124458313,grad_norm: 0.9688285988658962, iteration: 321169
loss: 1.1414040327072144,grad_norm: 0.9999992765114204, iteration: 321170
loss: 1.0331788063049316,grad_norm: 0.9999991771536145, iteration: 321171
loss: 0.9991673231124878,grad_norm: 0.9547534621825193, iteration: 321172
loss: 1.0093220472335815,grad_norm: 0.9780158229141968, iteration: 321173
loss: 1.0605939626693726,grad_norm: 0.9999994769317018, iteration: 321174
loss: 1.0234636068344116,grad_norm: 0.999999240993747, iteration: 321175
loss: 1.0194281339645386,grad_norm: 0.9108288000980994, iteration: 321176
loss: 0.9868552088737488,grad_norm: 0.942300130240355, iteration: 321177
loss: 1.001245141029358,grad_norm: 0.9300521156733181, iteration: 321178
loss: 0.986613929271698,grad_norm: 0.7633167095919206, iteration: 321179
loss: 1.0065864324569702,grad_norm: 0.8766660923481866, iteration: 321180
loss: 1.2112408876419067,grad_norm: 0.9999997641544308, iteration: 321181
loss: 0.9973000884056091,grad_norm: 0.819786939520843, iteration: 321182
loss: 0.9478273987770081,grad_norm: 0.7221575481865888, iteration: 321183
loss: 1.2486200332641602,grad_norm: 0.9999992601266866, iteration: 321184
loss: 0.9877205491065979,grad_norm: 0.8582167178588688, iteration: 321185
loss: 1.0087162256240845,grad_norm: 0.8584294567139541, iteration: 321186
loss: 1.054973840713501,grad_norm: 0.999998988011398, iteration: 321187
loss: 1.0247361660003662,grad_norm: 0.8914096818748746, iteration: 321188
loss: 1.0634610652923584,grad_norm: 0.9999990532199629, iteration: 321189
loss: 0.9719192385673523,grad_norm: 0.7886826634313052, iteration: 321190
loss: 1.0593883991241455,grad_norm: 0.8731627248292871, iteration: 321191
loss: 0.9786406755447388,grad_norm: 0.8654686753458759, iteration: 321192
loss: 1.0872342586517334,grad_norm: 0.999999172214987, iteration: 321193
loss: 1.0084933042526245,grad_norm: 0.9700453333217687, iteration: 321194
loss: 1.0006022453308105,grad_norm: 0.9999991025077191, iteration: 321195
loss: 1.028037428855896,grad_norm: 0.9999995868299213, iteration: 321196
loss: 0.9674394726753235,grad_norm: 0.8911252459635179, iteration: 321197
loss: 0.9933996796607971,grad_norm: 0.8438134663476704, iteration: 321198
loss: 1.0315049886703491,grad_norm: 0.933437158345146, iteration: 321199
loss: 1.0936212539672852,grad_norm: 0.9999990833034783, iteration: 321200
loss: 0.9945003390312195,grad_norm: 0.8349239928805131, iteration: 321201
loss: 1.0265905857086182,grad_norm: 0.999999199701861, iteration: 321202
loss: 1.065102219581604,grad_norm: 0.9999995870545938, iteration: 321203
loss: 1.0355033874511719,grad_norm: 0.9999991115025761, iteration: 321204
loss: 0.9808407425880432,grad_norm: 0.7959911674232448, iteration: 321205
loss: 0.9874330163002014,grad_norm: 0.737316108506145, iteration: 321206
loss: 1.048333764076233,grad_norm: 0.9999992866815319, iteration: 321207
loss: 1.0209695100784302,grad_norm: 0.857571041210382, iteration: 321208
loss: 1.0106009244918823,grad_norm: 0.8845935440744254, iteration: 321209
loss: 1.0576567649841309,grad_norm: 0.9767964536379947, iteration: 321210
loss: 1.0203683376312256,grad_norm: 0.9999992662414623, iteration: 321211
loss: 1.014934778213501,grad_norm: 0.9999997613800019, iteration: 321212
loss: 1.0068533420562744,grad_norm: 0.9999990662468979, iteration: 321213
loss: 0.9851067662239075,grad_norm: 0.9027634587069483, iteration: 321214
loss: 1.0887852907180786,grad_norm: 0.9999993185840218, iteration: 321215
loss: 1.044890284538269,grad_norm: 0.8491427909998778, iteration: 321216
loss: 0.9977484941482544,grad_norm: 0.9999993028012888, iteration: 321217
loss: 1.0532301664352417,grad_norm: 0.9999993867075286, iteration: 321218
loss: 0.9839583039283752,grad_norm: 0.9303369792899608, iteration: 321219
loss: 1.0213923454284668,grad_norm: 0.8324356411829221, iteration: 321220
loss: 0.9735640287399292,grad_norm: 0.7974399961637407, iteration: 321221
loss: 1.0278114080429077,grad_norm: 0.9230197539343314, iteration: 321222
loss: 1.0288207530975342,grad_norm: 0.9999999630583876, iteration: 321223
loss: 0.9531099200248718,grad_norm: 0.9999998660077535, iteration: 321224
loss: 1.0351812839508057,grad_norm: 0.9999995895757521, iteration: 321225
loss: 1.0070335865020752,grad_norm: 0.7493456857709423, iteration: 321226
loss: 0.9990764260292053,grad_norm: 0.999999120578589, iteration: 321227
loss: 0.9923222661018372,grad_norm: 0.752155588529153, iteration: 321228
loss: 1.0748323202133179,grad_norm: 0.9999996634103949, iteration: 321229
loss: 1.0533857345581055,grad_norm: 0.9999995943003159, iteration: 321230
loss: 0.9807285666465759,grad_norm: 0.7826345558402695, iteration: 321231
loss: 1.0086389780044556,grad_norm: 0.7311749384494481, iteration: 321232
loss: 1.0043662786483765,grad_norm: 0.675835546652879, iteration: 321233
loss: 1.0375587940216064,grad_norm: 0.8665414550464816, iteration: 321234
loss: 1.0596288442611694,grad_norm: 0.9999997721602966, iteration: 321235
loss: 1.0418064594268799,grad_norm: 0.8004426871785736, iteration: 321236
loss: 0.9999608397483826,grad_norm: 0.8564141815244657, iteration: 321237
loss: 0.9794572591781616,grad_norm: 0.999999153738661, iteration: 321238
loss: 1.0457501411437988,grad_norm: 0.9487232195558294, iteration: 321239
loss: 0.9951194524765015,grad_norm: 0.9767976896545807, iteration: 321240
loss: 1.0123404264450073,grad_norm: 0.8406723752196887, iteration: 321241
loss: 0.994874119758606,grad_norm: 0.8983141369849873, iteration: 321242
loss: 1.0092341899871826,grad_norm: 0.7194852182385753, iteration: 321243
loss: 0.9842395186424255,grad_norm: 0.7993466019577355, iteration: 321244
loss: 1.0172207355499268,grad_norm: 0.8299532372267792, iteration: 321245
loss: 0.9918438196182251,grad_norm: 0.9135750937291776, iteration: 321246
loss: 1.003774642944336,grad_norm: 0.8548982146494095, iteration: 321247
loss: 0.9878264665603638,grad_norm: 0.8679013776812479, iteration: 321248
loss: 1.0242273807525635,grad_norm: 0.8747554833505078, iteration: 321249
loss: 1.0193958282470703,grad_norm: 0.85743733712747, iteration: 321250
loss: 1.0125269889831543,grad_norm: 0.8043248346525617, iteration: 321251
loss: 1.0251251459121704,grad_norm: 0.9512416605325914, iteration: 321252
loss: 1.013702630996704,grad_norm: 0.974264448049265, iteration: 321253
loss: 0.9877967834472656,grad_norm: 0.8320833932010657, iteration: 321254
loss: 0.9874790906906128,grad_norm: 0.8506684578432326, iteration: 321255
loss: 0.9866679310798645,grad_norm: 0.7513350495180288, iteration: 321256
loss: 1.0008724927902222,grad_norm: 0.8813400945530369, iteration: 321257
loss: 0.9960072040557861,grad_norm: 0.7754057589392632, iteration: 321258
loss: 1.0349212884902954,grad_norm: 0.9999994407987299, iteration: 321259
loss: 1.0652018785476685,grad_norm: 0.8728155908135447, iteration: 321260
loss: 1.0086948871612549,grad_norm: 0.8788120399374013, iteration: 321261
loss: 1.00691819190979,grad_norm: 0.752058215320482, iteration: 321262
loss: 1.0243467092514038,grad_norm: 0.7399462548444921, iteration: 321263
loss: 1.0438216924667358,grad_norm: 0.999999293627803, iteration: 321264
loss: 1.0066437721252441,grad_norm: 0.9516253257930921, iteration: 321265
loss: 1.1427221298217773,grad_norm: 0.9999992475633126, iteration: 321266
loss: 1.0361169576644897,grad_norm: 0.9213181616259732, iteration: 321267
loss: 0.9908370971679688,grad_norm: 0.7563060278801998, iteration: 321268
loss: 1.0379375219345093,grad_norm: 0.9175906307088426, iteration: 321269
loss: 0.9925380349159241,grad_norm: 0.9405586471470835, iteration: 321270
loss: 0.9902608394622803,grad_norm: 0.7918714034134732, iteration: 321271
loss: 1.0597517490386963,grad_norm: 0.999999008279712, iteration: 321272
loss: 0.966098427772522,grad_norm: 0.9149625677368403, iteration: 321273
loss: 1.0533738136291504,grad_norm: 0.9999997233379275, iteration: 321274
loss: 1.0150139331817627,grad_norm: 0.8814587668524845, iteration: 321275
loss: 0.9850082993507385,grad_norm: 0.7697744458490928, iteration: 321276
loss: 1.0008058547973633,grad_norm: 0.8285537988224851, iteration: 321277
loss: 1.0049464702606201,grad_norm: 0.9999993141802391, iteration: 321278
loss: 1.026934027671814,grad_norm: 0.8305831054180937, iteration: 321279
loss: 1.0251619815826416,grad_norm: 0.9038115599120077, iteration: 321280
loss: 1.1255475282669067,grad_norm: 0.9999992586305945, iteration: 321281
loss: 1.0498924255371094,grad_norm: 0.9999996451737777, iteration: 321282
loss: 0.9994363188743591,grad_norm: 0.9208160518383514, iteration: 321283
loss: 0.9679271578788757,grad_norm: 0.8334871892830902, iteration: 321284
loss: 0.9634326696395874,grad_norm: 0.7924740272958263, iteration: 321285
loss: 1.0217714309692383,grad_norm: 0.913373246302924, iteration: 321286
loss: 1.0015758275985718,grad_norm: 0.68919920690831, iteration: 321287
loss: 0.990241527557373,grad_norm: 0.9092425804859359, iteration: 321288
loss: 0.999955952167511,grad_norm: 0.9355730213283763, iteration: 321289
loss: 1.018712043762207,grad_norm: 0.9999993155957685, iteration: 321290
loss: 0.9496741890907288,grad_norm: 0.8281928929695979, iteration: 321291
loss: 0.9865543842315674,grad_norm: 0.7106740890547758, iteration: 321292
loss: 1.0397995710372925,grad_norm: 0.9357090379331956, iteration: 321293
loss: 0.9741382598876953,grad_norm: 0.9999992284010613, iteration: 321294
loss: 0.9651750922203064,grad_norm: 0.7317790795274225, iteration: 321295
loss: 0.9900962710380554,grad_norm: 0.9999991437822312, iteration: 321296
loss: 1.0308891534805298,grad_norm: 0.8373453583208104, iteration: 321297
loss: 1.0064523220062256,grad_norm: 0.8435125660218769, iteration: 321298
loss: 1.0216975212097168,grad_norm: 0.9999988905095717, iteration: 321299
loss: 1.0194878578186035,grad_norm: 0.7647753239942136, iteration: 321300
loss: 0.9747151732444763,grad_norm: 0.661944430718711, iteration: 321301
loss: 1.0173439979553223,grad_norm: 0.9829320177909688, iteration: 321302
loss: 1.0096698999404907,grad_norm: 0.9999998743417604, iteration: 321303
loss: 1.0119941234588623,grad_norm: 0.999998980516663, iteration: 321304
loss: 1.0123581886291504,grad_norm: 0.7418013900413165, iteration: 321305
loss: 1.0416241884231567,grad_norm: 0.9999991799765477, iteration: 321306
loss: 0.9637138247489929,grad_norm: 0.8401297733827372, iteration: 321307
loss: 0.9712145924568176,grad_norm: 0.8728557231598918, iteration: 321308
loss: 1.056299090385437,grad_norm: 0.751348595826081, iteration: 321309
loss: 1.0340442657470703,grad_norm: 0.9999998477274235, iteration: 321310
loss: 0.9933695197105408,grad_norm: 0.8942973932056157, iteration: 321311
loss: 0.9976629614830017,grad_norm: 0.8270419016539747, iteration: 321312
loss: 0.9997639060020447,grad_norm: 0.848473915151175, iteration: 321313
loss: 1.0079360008239746,grad_norm: 0.7027613971151871, iteration: 321314
loss: 1.019472599029541,grad_norm: 0.8793437344578628, iteration: 321315
loss: 0.997275710105896,grad_norm: 0.9260882124714587, iteration: 321316
loss: 1.019701361656189,grad_norm: 0.7374653669381198, iteration: 321317
loss: 1.003159999847412,grad_norm: 0.7962080859964062, iteration: 321318
loss: 1.0067662000656128,grad_norm: 0.8469435691977399, iteration: 321319
loss: 1.0053390264511108,grad_norm: 0.9999988863779876, iteration: 321320
loss: 0.9919401407241821,grad_norm: 0.7266219750317439, iteration: 321321
loss: 1.0007762908935547,grad_norm: 0.9332756032299123, iteration: 321322
loss: 0.9988537430763245,grad_norm: 0.9532327051356188, iteration: 321323
loss: 1.0011643171310425,grad_norm: 0.7415712635557935, iteration: 321324
loss: 0.9973203539848328,grad_norm: 0.9999991468072452, iteration: 321325
loss: 1.0414375066757202,grad_norm: 0.8837549176518353, iteration: 321326
loss: 1.0026425123214722,grad_norm: 0.6997231376537423, iteration: 321327
loss: 1.005219578742981,grad_norm: 0.8403042334943742, iteration: 321328
loss: 1.0294252634048462,grad_norm: 0.9999992689817608, iteration: 321329
loss: 1.0296742916107178,grad_norm: 0.9070930200054821, iteration: 321330
loss: 1.030797004699707,grad_norm: 0.9999993077809294, iteration: 321331
loss: 1.0214650630950928,grad_norm: 0.8096775388751423, iteration: 321332
loss: 1.024205207824707,grad_norm: 0.9248970273655274, iteration: 321333
loss: 0.9827924966812134,grad_norm: 0.825197954059626, iteration: 321334
loss: 1.0066473484039307,grad_norm: 0.8399039750478694, iteration: 321335
loss: 1.0046263933181763,grad_norm: 0.9935186816785267, iteration: 321336
loss: 1.027448058128357,grad_norm: 0.9687148222110602, iteration: 321337
loss: 1.00984787940979,grad_norm: 0.8345637369784422, iteration: 321338
loss: 0.9973673224449158,grad_norm: 0.9999993764177525, iteration: 321339
loss: 1.011547565460205,grad_norm: 0.850709136305559, iteration: 321340
loss: 1.0069975852966309,grad_norm: 0.827566915368147, iteration: 321341
loss: 1.092987298965454,grad_norm: 0.9204060485645245, iteration: 321342
loss: 1.1037324666976929,grad_norm: 0.891808716276225, iteration: 321343
loss: 1.0028607845306396,grad_norm: 0.9999999790468219, iteration: 321344
loss: 1.0430914163589478,grad_norm: 0.9999990544280686, iteration: 321345
loss: 1.0210626125335693,grad_norm: 0.8706131804344035, iteration: 321346
loss: 1.042802333831787,grad_norm: 0.9999999401192513, iteration: 321347
loss: 0.9850929975509644,grad_norm: 0.9999999276630526, iteration: 321348
loss: 1.0768206119537354,grad_norm: 0.8729122015695198, iteration: 321349
loss: 1.0118752717971802,grad_norm: 0.8457797013990788, iteration: 321350
loss: 0.9728133082389832,grad_norm: 0.8680247289340042, iteration: 321351
loss: 1.0249207019805908,grad_norm: 0.8061467803622115, iteration: 321352
loss: 1.06566321849823,grad_norm: 0.9999999613600953, iteration: 321353
loss: 1.0170328617095947,grad_norm: 0.971360544742128, iteration: 321354
loss: 1.0276659727096558,grad_norm: 0.844704822268577, iteration: 321355
loss: 1.1245275735855103,grad_norm: 0.9999999381952087, iteration: 321356
loss: 1.0339268445968628,grad_norm: 1.0000000656911905, iteration: 321357
loss: 0.9933344721794128,grad_norm: 0.8796943230048092, iteration: 321358
loss: 1.0079922676086426,grad_norm: 0.8130589587793501, iteration: 321359
loss: 0.9906050562858582,grad_norm: 0.8336748840519813, iteration: 321360
loss: 0.9774789214134216,grad_norm: 0.82257481343869, iteration: 321361
loss: 1.0093168020248413,grad_norm: 0.7367553345324, iteration: 321362
loss: 1.0117911100387573,grad_norm: 0.7563764207376141, iteration: 321363
loss: 1.0087496042251587,grad_norm: 0.7976410414398752, iteration: 321364
loss: 1.0830501317977905,grad_norm: 0.9999995161285312, iteration: 321365
loss: 0.9805999398231506,grad_norm: 0.947900300819949, iteration: 321366
loss: 1.0104212760925293,grad_norm: 0.8027930884226673, iteration: 321367
loss: 0.9997166395187378,grad_norm: 0.8444043845606549, iteration: 321368
loss: 1.027137279510498,grad_norm: 0.8209950237294071, iteration: 321369
loss: 1.0842846632003784,grad_norm: 0.9999995168099766, iteration: 321370
loss: 1.010604739189148,grad_norm: 0.9999991054894907, iteration: 321371
loss: 0.9469778537750244,grad_norm: 0.9011074233914899, iteration: 321372
loss: 1.0715545415878296,grad_norm: 0.9999995188852686, iteration: 321373
loss: 1.0123270750045776,grad_norm: 0.8872533093742213, iteration: 321374
loss: 1.0677412748336792,grad_norm: 0.9999995265359516, iteration: 321375
loss: 1.0491187572479248,grad_norm: 0.965771096658007, iteration: 321376
loss: 1.0155593156814575,grad_norm: 0.7330179310914752, iteration: 321377
loss: 0.9714571833610535,grad_norm: 0.8874855137895142, iteration: 321378
loss: 0.9965662956237793,grad_norm: 0.7966564897440661, iteration: 321379
loss: 1.0348573923110962,grad_norm: 0.9999990479273743, iteration: 321380
loss: 0.9694845080375671,grad_norm: 0.9999993573051784, iteration: 321381
loss: 1.0318783521652222,grad_norm: 0.9255354297794007, iteration: 321382
loss: 1.0200351476669312,grad_norm: 0.9030311143012578, iteration: 321383
loss: 1.0171761512756348,grad_norm: 0.9999996969811766, iteration: 321384
loss: 0.9621694684028625,grad_norm: 0.8298877833644485, iteration: 321385
loss: 1.0134297609329224,grad_norm: 0.8302354947627052, iteration: 321386
loss: 0.972676157951355,grad_norm: 0.9364400953142046, iteration: 321387
loss: 1.0044268369674683,grad_norm: 0.7720145338465395, iteration: 321388
loss: 0.9728514552116394,grad_norm: 0.9013057745352144, iteration: 321389
loss: 0.9840351939201355,grad_norm: 0.7838218204823106, iteration: 321390
loss: 1.0070688724517822,grad_norm: 0.9999990532322347, iteration: 321391
loss: 1.010513186454773,grad_norm: 0.9999997719397478, iteration: 321392
loss: 0.9801276922225952,grad_norm: 0.8337698275501323, iteration: 321393
loss: 0.9913113117218018,grad_norm: 0.8484580987826682, iteration: 321394
loss: 1.0306134223937988,grad_norm: 0.8497264896757495, iteration: 321395
loss: 1.0019054412841797,grad_norm: 0.9187279796269787, iteration: 321396
loss: 1.0089205503463745,grad_norm: 0.9145407397895275, iteration: 321397
loss: 1.0206704139709473,grad_norm: 0.7219072328031378, iteration: 321398
loss: 1.0178061723709106,grad_norm: 0.8172098330393511, iteration: 321399
loss: 1.005013108253479,grad_norm: 0.9999990792338336, iteration: 321400
loss: 0.9942078590393066,grad_norm: 0.9788204279275324, iteration: 321401
loss: 1.0055522918701172,grad_norm: 0.8414025706220561, iteration: 321402
loss: 1.0096982717514038,grad_norm: 0.9999997751960554, iteration: 321403
loss: 1.2303953170776367,grad_norm: 0.9999997173205788, iteration: 321404
loss: 1.0524088144302368,grad_norm: 0.7328326502525947, iteration: 321405
loss: 0.9659455418586731,grad_norm: 0.8488065707052781, iteration: 321406
loss: 0.987640380859375,grad_norm: 0.8867546197493182, iteration: 321407
loss: 0.9732629060745239,grad_norm: 0.8944289303044899, iteration: 321408
loss: 1.018133521080017,grad_norm: 0.8393543014850787, iteration: 321409
loss: 0.9696285724639893,grad_norm: 0.8686115663924757, iteration: 321410
loss: 1.0189327001571655,grad_norm: 0.8646999319779269, iteration: 321411
loss: 1.0477321147918701,grad_norm: 0.9999997000955261, iteration: 321412
loss: 1.0050915479660034,grad_norm: 0.999999229745988, iteration: 321413
loss: 1.0524085760116577,grad_norm: 0.999999747828948, iteration: 321414
loss: 1.0079243183135986,grad_norm: 0.9999990076428174, iteration: 321415
loss: 1.0272672176361084,grad_norm: 0.9999990482303427, iteration: 321416
loss: 1.0213298797607422,grad_norm: 0.9999992138423114, iteration: 321417
loss: 1.0092633962631226,grad_norm: 0.7527754740206589, iteration: 321418
loss: 1.0013997554779053,grad_norm: 0.83754902730389, iteration: 321419
loss: 1.0208603143692017,grad_norm: 0.8603905638921596, iteration: 321420
loss: 0.9940947890281677,grad_norm: 0.7850437557859293, iteration: 321421
loss: 0.9632493853569031,grad_norm: 0.9136217766552919, iteration: 321422
loss: 0.9569054245948792,grad_norm: 0.7977482085248166, iteration: 321423
loss: 0.9712673425674438,grad_norm: 0.9999992870959108, iteration: 321424
loss: 0.9923675656318665,grad_norm: 0.87043385784466, iteration: 321425
loss: 1.1475211381912231,grad_norm: 0.9999999732795811, iteration: 321426
loss: 1.1175048351287842,grad_norm: 0.9999998623356783, iteration: 321427
loss: 1.144327163696289,grad_norm: 0.9999991886040904, iteration: 321428
loss: 1.0119760036468506,grad_norm: 0.8414428835456216, iteration: 321429
loss: 1.009499430656433,grad_norm: 0.7547027466722714, iteration: 321430
loss: 0.9669804573059082,grad_norm: 0.8906315121839854, iteration: 321431
loss: 1.0706863403320312,grad_norm: 0.7747502801533478, iteration: 321432
loss: 1.0418479442596436,grad_norm: 0.9999998889855803, iteration: 321433
loss: 0.9791329503059387,grad_norm: 0.8356932498572448, iteration: 321434
loss: 1.4441767930984497,grad_norm: 0.9999995452680995, iteration: 321435
loss: 0.998465359210968,grad_norm: 0.8398742655605158, iteration: 321436
loss: 0.9724674820899963,grad_norm: 0.9099572252522089, iteration: 321437
loss: 1.0751676559448242,grad_norm: 0.9995923416134258, iteration: 321438
loss: 1.01945161819458,grad_norm: 0.7501951695460753, iteration: 321439
loss: 1.0259674787521362,grad_norm: 0.9257460972941113, iteration: 321440
loss: 1.0805587768554688,grad_norm: 0.9999996494297437, iteration: 321441
loss: 1.0336573123931885,grad_norm: 0.9999995870603516, iteration: 321442
loss: 1.076441764831543,grad_norm: 0.9999996507403647, iteration: 321443
loss: 1.0131678581237793,grad_norm: 0.8563289598730734, iteration: 321444
loss: 1.1189221143722534,grad_norm: 0.9999990189155172, iteration: 321445
loss: 0.978603720664978,grad_norm: 0.9999992826998645, iteration: 321446
loss: 1.0333268642425537,grad_norm: 0.9999996076418993, iteration: 321447
loss: 0.9889414310455322,grad_norm: 0.9999990567030064, iteration: 321448
loss: 1.0088984966278076,grad_norm: 0.9511980383049411, iteration: 321449
loss: 1.023298978805542,grad_norm: 0.9999991903534678, iteration: 321450
loss: 1.0376014709472656,grad_norm: 0.9820864159381449, iteration: 321451
loss: 1.0078977346420288,grad_norm: 0.8356636621817539, iteration: 321452
loss: 0.9931522607803345,grad_norm: 0.9999999690297233, iteration: 321453
loss: 1.0400454998016357,grad_norm: 0.9999991258819139, iteration: 321454
loss: 1.0307905673980713,grad_norm: 0.996151100947022, iteration: 321455
loss: 1.0587316751480103,grad_norm: 0.9999996905951153, iteration: 321456
loss: 1.069852590560913,grad_norm: 0.871361165888673, iteration: 321457
loss: 1.019132137298584,grad_norm: 0.7490519991278445, iteration: 321458
loss: 1.032238483428955,grad_norm: 0.999999836510092, iteration: 321459
loss: 1.0630446672439575,grad_norm: 0.9999998219097574, iteration: 321460
loss: 1.0494595766067505,grad_norm: 0.9999998829712072, iteration: 321461
loss: 1.0433480739593506,grad_norm: 0.9999997899912053, iteration: 321462
loss: 0.9961310625076294,grad_norm: 0.7501917074641549, iteration: 321463
loss: 1.0612082481384277,grad_norm: 0.9999992268941399, iteration: 321464
loss: 1.0029678344726562,grad_norm: 0.9999996383213386, iteration: 321465
loss: 1.0129246711730957,grad_norm: 0.7191783383939124, iteration: 321466
loss: 1.1060994863510132,grad_norm: 0.9999993249405172, iteration: 321467
loss: 1.01530921459198,grad_norm: 0.9801034449531374, iteration: 321468
loss: 1.0539791584014893,grad_norm: 0.9999996132882978, iteration: 321469
loss: 1.0247575044631958,grad_norm: 0.9999993393114351, iteration: 321470
loss: 1.0353893041610718,grad_norm: 0.9272989846359059, iteration: 321471
loss: 1.0537045001983643,grad_norm: 0.9999990806245276, iteration: 321472
loss: 1.0708404779434204,grad_norm: 0.9999991395318651, iteration: 321473
loss: 0.989882230758667,grad_norm: 0.9999991040511778, iteration: 321474
loss: 1.0899879932403564,grad_norm: 0.9999999251841655, iteration: 321475
loss: 1.0215662717819214,grad_norm: 0.9999990820519563, iteration: 321476
loss: 1.045715093612671,grad_norm: 0.9999996718926979, iteration: 321477
loss: 1.019293189048767,grad_norm: 0.817297172863274, iteration: 321478
loss: 1.0824061632156372,grad_norm: 0.9999992883554403, iteration: 321479
loss: 0.9801979660987854,grad_norm: 0.7335150673268208, iteration: 321480
loss: 1.1554672718048096,grad_norm: 0.9999992306604989, iteration: 321481
loss: 1.001205325126648,grad_norm: 0.9999993644860866, iteration: 321482
loss: 1.1539281606674194,grad_norm: 0.9999999027952795, iteration: 321483
loss: 1.0046731233596802,grad_norm: 0.9999996587848836, iteration: 321484
loss: 1.1024762392044067,grad_norm: 0.9999993202547488, iteration: 321485
loss: 1.0069787502288818,grad_norm: 0.9999990594602404, iteration: 321486
loss: 1.0176687240600586,grad_norm: 0.7929595821187331, iteration: 321487
loss: 1.005461573600769,grad_norm: 0.7282370960985909, iteration: 321488
loss: 0.9985842704772949,grad_norm: 0.9360634524538577, iteration: 321489
loss: 1.0068116188049316,grad_norm: 0.9999993728013106, iteration: 321490
loss: 1.0704240798950195,grad_norm: 0.9999992042229555, iteration: 321491
loss: 1.1301642656326294,grad_norm: 0.9999997549318668, iteration: 321492
loss: 0.9656898975372314,grad_norm: 0.7965892893023541, iteration: 321493
loss: 1.0306493043899536,grad_norm: 0.9999992067125476, iteration: 321494
loss: 1.0983930826187134,grad_norm: 1.0000000881489737, iteration: 321495
loss: 1.0015928745269775,grad_norm: 0.9999997391988769, iteration: 321496
loss: 0.9777321815490723,grad_norm: 0.9999996620140007, iteration: 321497
loss: 1.0531235933303833,grad_norm: 0.9999992764518586, iteration: 321498
loss: 0.9993136525154114,grad_norm: 0.9999995600741676, iteration: 321499
loss: 1.0344210863113403,grad_norm: 0.9999996172153448, iteration: 321500
loss: 1.0295400619506836,grad_norm: 0.8668734388527631, iteration: 321501
loss: 1.0523637533187866,grad_norm: 0.8600352841283012, iteration: 321502
loss: 1.0899903774261475,grad_norm: 0.9999993258743836, iteration: 321503
loss: 1.0384327173233032,grad_norm: 0.9774009796315024, iteration: 321504
loss: 1.1804141998291016,grad_norm: 0.9999991387208583, iteration: 321505
loss: 0.984309732913971,grad_norm: 0.9307798956890532, iteration: 321506
loss: 1.1195136308670044,grad_norm: 0.9999998223191259, iteration: 321507
loss: 1.0881823301315308,grad_norm: 0.999999899998687, iteration: 321508
loss: 1.0201102495193481,grad_norm: 0.7877262250888779, iteration: 321509
loss: 1.0187138319015503,grad_norm: 0.7804048963580135, iteration: 321510
loss: 1.0992836952209473,grad_norm: 0.9773750280065248, iteration: 321511
loss: 1.01124906539917,grad_norm: 0.9999990760206914, iteration: 321512
loss: 0.981631875038147,grad_norm: 0.9999992993053985, iteration: 321513
loss: 1.1227543354034424,grad_norm: 0.9999997554392535, iteration: 321514
loss: 1.0224393606185913,grad_norm: 0.9999992771115913, iteration: 321515
loss: 1.0726053714752197,grad_norm: 0.9308921279095675, iteration: 321516
loss: 1.0425928831100464,grad_norm: 0.9999990467595018, iteration: 321517
loss: 0.9973793625831604,grad_norm: 0.8331826353425179, iteration: 321518
loss: 1.034379243850708,grad_norm: 0.9999991803254148, iteration: 321519
loss: 1.027694821357727,grad_norm: 0.9999989935073396, iteration: 321520
loss: 1.0352829694747925,grad_norm: 0.9611815055725537, iteration: 321521
loss: 1.0016915798187256,grad_norm: 0.9183761073700375, iteration: 321522
loss: 1.0421146154403687,grad_norm: 0.9999992247747894, iteration: 321523
loss: 0.9985504150390625,grad_norm: 0.8096439740667427, iteration: 321524
loss: 0.9552561044692993,grad_norm: 0.7099469743116673, iteration: 321525
loss: 0.982703447341919,grad_norm: 0.8488881461765743, iteration: 321526
loss: 1.0347956418991089,grad_norm: 0.999999827425895, iteration: 321527
loss: 0.9850587844848633,grad_norm: 0.8275000341791202, iteration: 321528
loss: 1.0078068971633911,grad_norm: 0.779438341979721, iteration: 321529
loss: 1.0445805788040161,grad_norm: 0.8901003709009389, iteration: 321530
loss: 0.9865599274635315,grad_norm: 0.9447085838144375, iteration: 321531
loss: 1.0410640239715576,grad_norm: 0.9539196324739655, iteration: 321532
loss: 1.1026452779769897,grad_norm: 0.9999990714055009, iteration: 321533
loss: 1.029474139213562,grad_norm: 0.999999949223322, iteration: 321534
loss: 1.0242773294448853,grad_norm: 0.9182203817007071, iteration: 321535
loss: 1.0484358072280884,grad_norm: 0.9999997665333342, iteration: 321536
loss: 1.0203454494476318,grad_norm: 0.7973606125543924, iteration: 321537
loss: 1.1020197868347168,grad_norm: 0.9999999377757838, iteration: 321538
loss: 1.0316269397735596,grad_norm: 1.0000000406297367, iteration: 321539
loss: 0.9943433403968811,grad_norm: 0.9999999764488104, iteration: 321540
loss: 1.0225695371627808,grad_norm: 0.7906058514201401, iteration: 321541
loss: 1.04135262966156,grad_norm: 0.9047238993778793, iteration: 321542
loss: 0.9731311202049255,grad_norm: 0.758970305714349, iteration: 321543
loss: 1.072148084640503,grad_norm: 0.919370214340302, iteration: 321544
loss: 0.9796509742736816,grad_norm: 0.9999991530955621, iteration: 321545
loss: 1.0185314416885376,grad_norm: 0.8595426261370273, iteration: 321546
loss: 1.0258420705795288,grad_norm: 0.9999992273762214, iteration: 321547
loss: 0.9953941106796265,grad_norm: 0.9999994112585371, iteration: 321548
loss: 0.9732863903045654,grad_norm: 0.999999902524469, iteration: 321549
loss: 1.0552095174789429,grad_norm: 0.999999117822806, iteration: 321550
loss: 1.0516382455825806,grad_norm: 0.8792353455095769, iteration: 321551
loss: 0.9756201505661011,grad_norm: 0.8210727058458713, iteration: 321552
loss: 0.9781692624092102,grad_norm: 0.9206036454755302, iteration: 321553
loss: 0.9829061627388,grad_norm: 0.9999993400465043, iteration: 321554
loss: 0.9769853949546814,grad_norm: 0.7942761457511851, iteration: 321555
loss: 1.0195510387420654,grad_norm: 0.7536564486560466, iteration: 321556
loss: 1.0404072999954224,grad_norm: 0.89871599768182, iteration: 321557
loss: 1.0097664594650269,grad_norm: 0.9097592888518795, iteration: 321558
loss: 1.0177208185195923,grad_norm: 0.8673062669146597, iteration: 321559
loss: 0.9905734658241272,grad_norm: 0.8586211600076148, iteration: 321560
loss: 1.0033457279205322,grad_norm: 0.8087964218072633, iteration: 321561
loss: 1.004941701889038,grad_norm: 0.7242495169838902, iteration: 321562
loss: 0.9708728194236755,grad_norm: 0.9433846543204393, iteration: 321563
loss: 1.0323350429534912,grad_norm: 0.9999991500623824, iteration: 321564
loss: 1.0342432260513306,grad_norm: 0.9139458471823205, iteration: 321565
loss: 1.0175610780715942,grad_norm: 0.9999990206367484, iteration: 321566
loss: 1.0179686546325684,grad_norm: 0.8927025035497033, iteration: 321567
loss: 1.023362398147583,grad_norm: 0.8729214939657755, iteration: 321568
loss: 0.9558513164520264,grad_norm: 0.7403578820545271, iteration: 321569
loss: 1.0047239065170288,grad_norm: 0.9762796293523388, iteration: 321570
loss: 0.9937092065811157,grad_norm: 0.8487295363660471, iteration: 321571
loss: 1.0329307317733765,grad_norm: 0.9327236196470915, iteration: 321572
loss: 1.0216889381408691,grad_norm: 0.987222127706207, iteration: 321573
loss: 0.998797595500946,grad_norm: 0.6783437231248742, iteration: 321574
loss: 1.0016777515411377,grad_norm: 0.86758106598709, iteration: 321575
loss: 1.0452187061309814,grad_norm: 0.9999996094996542, iteration: 321576
loss: 0.9831397533416748,grad_norm: 0.6392676248377149, iteration: 321577
loss: 0.9510339498519897,grad_norm: 0.7381564003564667, iteration: 321578
loss: 0.9853505492210388,grad_norm: 0.8728377370539895, iteration: 321579
loss: 1.022593379020691,grad_norm: 0.8363864038731847, iteration: 321580
loss: 0.9972402453422546,grad_norm: 0.775265837020382, iteration: 321581
loss: 1.0486969947814941,grad_norm: 0.9999991028545707, iteration: 321582
loss: 0.9980896711349487,grad_norm: 0.9199119913445326, iteration: 321583
loss: 1.0030770301818848,grad_norm: 0.9999991385742066, iteration: 321584
loss: 1.0350888967514038,grad_norm: 0.9521232722931563, iteration: 321585
loss: 0.9985416531562805,grad_norm: 0.8381717088078182, iteration: 321586
loss: 1.0284175872802734,grad_norm: 0.9999991314389217, iteration: 321587
loss: 1.1167877912521362,grad_norm: 0.9999999742303076, iteration: 321588
loss: 1.004326343536377,grad_norm: 0.7393817022855586, iteration: 321589
loss: 1.0094932317733765,grad_norm: 0.9642824099108345, iteration: 321590
loss: 1.017143726348877,grad_norm: 0.7657394356337379, iteration: 321591
loss: 1.0161116123199463,grad_norm: 0.9999995560321476, iteration: 321592
loss: 1.0555920600891113,grad_norm: 0.8476654969761871, iteration: 321593
loss: 1.0390117168426514,grad_norm: 0.8413232379727756, iteration: 321594
loss: 1.0010995864868164,grad_norm: 0.8834778033820238, iteration: 321595
loss: 1.0194084644317627,grad_norm: 0.9999991501922337, iteration: 321596
loss: 0.9666264653205872,grad_norm: 0.8310481991083692, iteration: 321597
loss: 1.221864104270935,grad_norm: 0.9999998790294617, iteration: 321598
loss: 0.9783204197883606,grad_norm: 0.836939049413811, iteration: 321599
loss: 0.9716362953186035,grad_norm: 0.7495167556189778, iteration: 321600
loss: 1.0603530406951904,grad_norm: 0.764794592168745, iteration: 321601
loss: 1.079389214515686,grad_norm: 0.9999991154759024, iteration: 321602
loss: 0.9803676605224609,grad_norm: 0.9312177446945804, iteration: 321603
loss: 1.005223035812378,grad_norm: 0.9024330769900565, iteration: 321604
loss: 1.0001167058944702,grad_norm: 0.999999033887153, iteration: 321605
loss: 1.010143518447876,grad_norm: 0.8152224532822994, iteration: 321606
loss: 0.9847366213798523,grad_norm: 0.7462629370613992, iteration: 321607
loss: 1.0110234022140503,grad_norm: 0.9801776384552506, iteration: 321608
loss: 0.9924346208572388,grad_norm: 0.8079388468827327, iteration: 321609
loss: 0.9750476479530334,grad_norm: 0.8759438996470429, iteration: 321610
loss: 0.994164228439331,grad_norm: 0.7460140005989139, iteration: 321611
loss: 1.0062311887741089,grad_norm: 0.7359475672635644, iteration: 321612
loss: 1.0224555730819702,grad_norm: 0.7937409824966013, iteration: 321613
loss: 0.9955083131790161,grad_norm: 0.8363522298272202, iteration: 321614
loss: 0.9793158769607544,grad_norm: 0.8167078063369907, iteration: 321615
loss: 1.0083050727844238,grad_norm: 0.8774937279635028, iteration: 321616
loss: 1.012136697769165,grad_norm: 0.9999993977562863, iteration: 321617
loss: 0.9789749383926392,grad_norm: 0.8263772058099507, iteration: 321618
loss: 0.9753987789154053,grad_norm: 0.7602268124729941, iteration: 321619
loss: 0.9995564222335815,grad_norm: 0.9999990773035713, iteration: 321620
loss: 1.0303226709365845,grad_norm: 0.999999177536259, iteration: 321621
loss: 1.0380618572235107,grad_norm: 0.7839564690195601, iteration: 321622
loss: 1.0139997005462646,grad_norm: 0.9999991215741985, iteration: 321623
loss: 1.04360032081604,grad_norm: 0.9999996609402882, iteration: 321624
loss: 0.9771479964256287,grad_norm: 0.9381890617087446, iteration: 321625
loss: 0.9981054067611694,grad_norm: 0.9999992592468714, iteration: 321626
loss: 0.9848895072937012,grad_norm: 0.7978642989385489, iteration: 321627
loss: 0.9603036046028137,grad_norm: 0.6751965005056799, iteration: 321628
loss: 0.9573180675506592,grad_norm: 0.6902208626185893, iteration: 321629
loss: 0.9622809886932373,grad_norm: 0.8664680872887275, iteration: 321630
loss: 1.0161458253860474,grad_norm: 0.8466746841887444, iteration: 321631
loss: 0.9932892322540283,grad_norm: 0.9999994548146034, iteration: 321632
loss: 0.9988093376159668,grad_norm: 0.8127817512432612, iteration: 321633
loss: 1.0437591075897217,grad_norm: 0.9999995008465239, iteration: 321634
loss: 1.02353036403656,grad_norm: 0.8280795791578259, iteration: 321635
loss: 1.0351332426071167,grad_norm: 0.8766237108560311, iteration: 321636
loss: 1.0538259744644165,grad_norm: 0.9999992553021618, iteration: 321637
loss: 0.995063841342926,grad_norm: 0.9484465070143584, iteration: 321638
loss: 0.9776712656021118,grad_norm: 0.8171845904966755, iteration: 321639
loss: 1.0390357971191406,grad_norm: 0.7557974826376913, iteration: 321640
loss: 0.9667447805404663,grad_norm: 0.9999989932271484, iteration: 321641
loss: 1.0282671451568604,grad_norm: 0.9999994838471415, iteration: 321642
loss: 0.9794867634773254,grad_norm: 0.8165306008951252, iteration: 321643
loss: 1.03046715259552,grad_norm: 0.9999993245312602, iteration: 321644
loss: 1.046736717224121,grad_norm: 0.8701455974552541, iteration: 321645
loss: 0.9981095790863037,grad_norm: 0.9999990653714486, iteration: 321646
loss: 1.0306203365325928,grad_norm: 0.8995283055386764, iteration: 321647
loss: 0.9514212012290955,grad_norm: 0.7530287594672275, iteration: 321648
loss: 1.004122257232666,grad_norm: 0.8398248130437427, iteration: 321649
loss: 1.0151528120040894,grad_norm: 0.7466593960199298, iteration: 321650
loss: 1.0176959037780762,grad_norm: 0.7058015035758259, iteration: 321651
loss: 0.9862337112426758,grad_norm: 0.9248095245060722, iteration: 321652
loss: 0.995042085647583,grad_norm: 0.7252780271285133, iteration: 321653
loss: 0.9902543425559998,grad_norm: 0.708667795140764, iteration: 321654
loss: 0.9969421029090881,grad_norm: 0.9597702940448833, iteration: 321655
loss: 0.983695387840271,grad_norm: 0.9999992689130157, iteration: 321656
loss: 1.0294458866119385,grad_norm: 0.9835968276440581, iteration: 321657
loss: 1.0051463842391968,grad_norm: 0.8451910178533233, iteration: 321658
loss: 0.9783154726028442,grad_norm: 0.889257894978743, iteration: 321659
loss: 0.968630313873291,grad_norm: 0.9458494420423752, iteration: 321660
loss: 1.0053611993789673,grad_norm: 0.7264569185810759, iteration: 321661
loss: 1.0254539251327515,grad_norm: 0.8811612459499778, iteration: 321662
loss: 1.0799052715301514,grad_norm: 0.9999993468530555, iteration: 321663
loss: 1.0154321193695068,grad_norm: 0.8368322249206853, iteration: 321664
loss: 0.9732347130775452,grad_norm: 0.7370813537211466, iteration: 321665
loss: 1.0366216897964478,grad_norm: 0.7987576692910733, iteration: 321666
loss: 1.039036750793457,grad_norm: 0.9052586447889345, iteration: 321667
loss: 0.9709745049476624,grad_norm: 0.9999992126770035, iteration: 321668
loss: 1.0307179689407349,grad_norm: 0.6574039875228896, iteration: 321669
loss: 1.089043378829956,grad_norm: 0.9441795567663843, iteration: 321670
loss: 1.0039061307907104,grad_norm: 0.7510223575795871, iteration: 321671
loss: 1.0038167238235474,grad_norm: 0.9999990638873175, iteration: 321672
loss: 1.0023301839828491,grad_norm: 0.7813328067657695, iteration: 321673
loss: 1.0133296251296997,grad_norm: 0.999999757402421, iteration: 321674
loss: 0.9795007109642029,grad_norm: 0.884386159113505, iteration: 321675
loss: 0.9787965416908264,grad_norm: 0.8632683746888168, iteration: 321676
loss: 1.0155974626541138,grad_norm: 0.9464868867236526, iteration: 321677
loss: 1.0156114101409912,grad_norm: 0.6978612484420565, iteration: 321678
loss: 0.9692087769508362,grad_norm: 0.909341897804819, iteration: 321679
loss: 0.9984561800956726,grad_norm: 0.7752081689691339, iteration: 321680
loss: 0.9790414571762085,grad_norm: 0.9063496830977901, iteration: 321681
loss: 1.0245410203933716,grad_norm: 0.8177960737704444, iteration: 321682
loss: 1.0076806545257568,grad_norm: 0.8778061266666877, iteration: 321683
loss: 1.0109676122665405,grad_norm: 0.8408358744881875, iteration: 321684
loss: 0.9807479977607727,grad_norm: 0.8787659546192622, iteration: 321685
loss: 0.9433107376098633,grad_norm: 0.8152077187365229, iteration: 321686
loss: 1.2863975763320923,grad_norm: 0.999999859998211, iteration: 321687
loss: 1.0463582277297974,grad_norm: 0.7913688756323296, iteration: 321688
loss: 1.019880771636963,grad_norm: 0.9747722666868894, iteration: 321689
loss: 1.036885142326355,grad_norm: 0.9999999784735599, iteration: 321690
loss: 0.9632630944252014,grad_norm: 0.881553769941073, iteration: 321691
loss: 1.045062780380249,grad_norm: 0.9733031376394958, iteration: 321692
loss: 1.0322914123535156,grad_norm: 0.9999998725456257, iteration: 321693
loss: 1.0217647552490234,grad_norm: 0.884149486092565, iteration: 321694
loss: 1.0383541584014893,grad_norm: 0.8785356047549757, iteration: 321695
loss: 1.0047485828399658,grad_norm: 0.8219049297196812, iteration: 321696
loss: 1.0344680547714233,grad_norm: 0.9208544940440495, iteration: 321697
loss: 1.0222890377044678,grad_norm: 0.9999999527178014, iteration: 321698
loss: 1.0416303873062134,grad_norm: 0.9999993469130937, iteration: 321699
loss: 1.1218898296356201,grad_norm: 0.999999716260715, iteration: 321700
loss: 1.0273089408874512,grad_norm: 0.9721060487225271, iteration: 321701
loss: 1.0106024742126465,grad_norm: 0.8241160569547988, iteration: 321702
loss: 1.0175148248672485,grad_norm: 0.7277808582840665, iteration: 321703
loss: 0.9639252424240112,grad_norm: 0.9999993950368101, iteration: 321704
loss: 1.0447837114334106,grad_norm: 0.9999991939126058, iteration: 321705
loss: 1.0174815654754639,grad_norm: 0.8145829377562547, iteration: 321706
loss: 0.9868698716163635,grad_norm: 0.9999995255197531, iteration: 321707
loss: 1.0774691104888916,grad_norm: 0.9984116303828098, iteration: 321708
loss: 0.9724443554878235,grad_norm: 0.8318997692926081, iteration: 321709
loss: 1.0735507011413574,grad_norm: 0.9417968682403787, iteration: 321710
loss: 1.004840612411499,grad_norm: 0.8379841957830511, iteration: 321711
loss: 1.0282987356185913,grad_norm: 0.9999990246474032, iteration: 321712
loss: 0.9891144037246704,grad_norm: 0.999999240315792, iteration: 321713
loss: 0.999805212020874,grad_norm: 0.9524003194935571, iteration: 321714
loss: 1.010625958442688,grad_norm: 0.9164578117052472, iteration: 321715
loss: 1.0076547861099243,grad_norm: 0.7905279865288364, iteration: 321716
loss: 1.045925498008728,grad_norm: 0.9999990628902938, iteration: 321717
loss: 1.0219882726669312,grad_norm: 0.9999993665787306, iteration: 321718
loss: 1.0570837259292603,grad_norm: 0.9999998491626927, iteration: 321719
loss: 0.9940933585166931,grad_norm: 0.9742384205276197, iteration: 321720
loss: 0.9706239104270935,grad_norm: 0.870270510188351, iteration: 321721
loss: 1.0063915252685547,grad_norm: 0.7995915413381925, iteration: 321722
loss: 1.0118712186813354,grad_norm: 0.893723919890355, iteration: 321723
loss: 1.0426952838897705,grad_norm: 0.9181486978864003, iteration: 321724
loss: 1.2668015956878662,grad_norm: 1.000000030488512, iteration: 321725
loss: 1.0300313234329224,grad_norm: 0.936036129223207, iteration: 321726
loss: 1.0560511350631714,grad_norm: 0.9999990909978819, iteration: 321727
loss: 1.0572701692581177,grad_norm: 0.9999994546982075, iteration: 321728
loss: 1.04475736618042,grad_norm: 0.9999996737939439, iteration: 321729
loss: 0.9862135052680969,grad_norm: 0.9851852585295067, iteration: 321730
loss: 0.9901798963546753,grad_norm: 0.8079238998731295, iteration: 321731
loss: 0.9632833003997803,grad_norm: 0.7678160248490378, iteration: 321732
loss: 1.062626838684082,grad_norm: 0.9999997014708023, iteration: 321733
loss: 1.0174763202667236,grad_norm: 0.7891896981478517, iteration: 321734
loss: 0.9806186556816101,grad_norm: 0.9999992332748542, iteration: 321735
loss: 1.0160943269729614,grad_norm: 0.9364254481807289, iteration: 321736
loss: 1.0915428400039673,grad_norm: 0.9999993182619782, iteration: 321737
loss: 1.0998836755752563,grad_norm: 0.9999992013604336, iteration: 321738
loss: 1.083587646484375,grad_norm: 0.999999416013979, iteration: 321739
loss: 1.0451370477676392,grad_norm: 0.9999990794824379, iteration: 321740
loss: 1.0713368654251099,grad_norm: 0.8598824189520495, iteration: 321741
loss: 0.989416241645813,grad_norm: 0.9432218327240763, iteration: 321742
loss: 1.0533990859985352,grad_norm: 0.8228587587739512, iteration: 321743
loss: 0.9507162570953369,grad_norm: 0.8305328285997885, iteration: 321744
loss: 1.0535519123077393,grad_norm: 0.9999992217021263, iteration: 321745
loss: 1.028980016708374,grad_norm: 0.9724872461088663, iteration: 321746
loss: 1.0332238674163818,grad_norm: 0.9945319448884322, iteration: 321747
loss: 0.973368763923645,grad_norm: 0.6737126104597306, iteration: 321748
loss: 0.9774665236473083,grad_norm: 0.8274202519068353, iteration: 321749
loss: 1.0289714336395264,grad_norm: 0.883055025649392, iteration: 321750
loss: 1.008248209953308,grad_norm: 0.9124501405635459, iteration: 321751
loss: 1.1530475616455078,grad_norm: 0.9999993458485052, iteration: 321752
loss: 1.0091382265090942,grad_norm: 0.8682812662472355, iteration: 321753
loss: 0.9894338250160217,grad_norm: 0.9999990175776314, iteration: 321754
loss: 1.0044715404510498,grad_norm: 0.9999992340896235, iteration: 321755
loss: 1.0450338125228882,grad_norm: 0.9999992595652681, iteration: 321756
loss: 0.9598676562309265,grad_norm: 0.9999991751548092, iteration: 321757
loss: 1.0005768537521362,grad_norm: 0.7337113916425918, iteration: 321758
loss: 1.0090078115463257,grad_norm: 0.7788281460915795, iteration: 321759
loss: 1.0179133415222168,grad_norm: 0.9999995983217506, iteration: 321760
loss: 0.9920473694801331,grad_norm: 0.9376461539791053, iteration: 321761
loss: 0.9985899329185486,grad_norm: 0.9999998806707543, iteration: 321762
loss: 1.0124417543411255,grad_norm: 0.9081041988182448, iteration: 321763
loss: 1.003800868988037,grad_norm: 0.7254284189200705, iteration: 321764
loss: 0.9875354766845703,grad_norm: 0.8200256436411759, iteration: 321765
loss: 1.0106631517410278,grad_norm: 0.7412785583538006, iteration: 321766
loss: 0.9895941615104675,grad_norm: 0.9999992279790946, iteration: 321767
loss: 1.0633376836776733,grad_norm: 0.9999993685187826, iteration: 321768
loss: 0.9708283543586731,grad_norm: 0.9999990568609873, iteration: 321769
loss: 1.0015729665756226,grad_norm: 0.722803634917374, iteration: 321770
loss: 1.0287846326828003,grad_norm: 0.9999998908817425, iteration: 321771
loss: 1.0861241817474365,grad_norm: 0.9999993337475731, iteration: 321772
loss: 1.0143425464630127,grad_norm: 0.9999998481510071, iteration: 321773
loss: 0.9829837083816528,grad_norm: 0.9999997164960605, iteration: 321774
loss: 1.0348572731018066,grad_norm: 0.9999998303129594, iteration: 321775
loss: 0.9619660377502441,grad_norm: 0.9006017007123179, iteration: 321776
loss: 1.0677293539047241,grad_norm: 0.99999925347383, iteration: 321777
loss: 1.0189048051834106,grad_norm: 0.8440756630570527, iteration: 321778
loss: 1.0127854347229004,grad_norm: 0.8323873894505888, iteration: 321779
loss: 1.0369012355804443,grad_norm: 0.9999997341496035, iteration: 321780
loss: 1.0357625484466553,grad_norm: 0.877390596165889, iteration: 321781
loss: 1.0233705043792725,grad_norm: 0.888310247109241, iteration: 321782
loss: 1.0240845680236816,grad_norm: 0.9999992482607682, iteration: 321783
loss: 1.0497440099716187,grad_norm: 0.9999996604568033, iteration: 321784
loss: 1.0046330690383911,grad_norm: 0.999999324273713, iteration: 321785
loss: 0.9758870601654053,grad_norm: 0.999999299206603, iteration: 321786
loss: 0.9782941341400146,grad_norm: 0.8900062269718257, iteration: 321787
loss: 1.010730266571045,grad_norm: 0.9999998810820753, iteration: 321788
loss: 0.9787452816963196,grad_norm: 0.9319340346208402, iteration: 321789
loss: 1.025322437286377,grad_norm: 0.993146887175511, iteration: 321790
loss: 1.0183206796646118,grad_norm: 0.8055012413250989, iteration: 321791
loss: 0.9750068187713623,grad_norm: 0.98848891489681, iteration: 321792
loss: 1.0017316341400146,grad_norm: 0.9999991861489231, iteration: 321793
loss: 1.0223954916000366,grad_norm: 0.7359515689139488, iteration: 321794
loss: 0.982856273651123,grad_norm: 0.9999992831895365, iteration: 321795
loss: 1.0335654020309448,grad_norm: 0.8743576184254358, iteration: 321796
loss: 1.0178930759429932,grad_norm: 0.8282940823548374, iteration: 321797
loss: 1.0125483274459839,grad_norm: 0.8860519782562167, iteration: 321798
loss: 1.0809460878372192,grad_norm: 0.9999992097811936, iteration: 321799
loss: 1.0032671689987183,grad_norm: 0.7119551900135256, iteration: 321800
loss: 1.0290011167526245,grad_norm: 0.8515051996822577, iteration: 321801
loss: 1.030405879020691,grad_norm: 0.7898959941807764, iteration: 321802
loss: 0.9727213382720947,grad_norm: 0.8684427191840529, iteration: 321803
loss: 0.9974371790885925,grad_norm: 0.8063872463524404, iteration: 321804
loss: 0.9955034852027893,grad_norm: 0.9999991422514939, iteration: 321805
loss: 1.0189446210861206,grad_norm: 0.75039171967464, iteration: 321806
loss: 1.0781916379928589,grad_norm: 0.999999929721916, iteration: 321807
loss: 0.9962167739868164,grad_norm: 0.7804291886422497, iteration: 321808
loss: 0.9973816275596619,grad_norm: 0.7978058925765307, iteration: 321809
loss: 1.0104091167449951,grad_norm: 0.7826325272211202, iteration: 321810
loss: 0.979957103729248,grad_norm: 0.8140791788022909, iteration: 321811
loss: 1.0755623579025269,grad_norm: 0.9999993520338705, iteration: 321812
loss: 1.0274114608764648,grad_norm: 0.9313770219652773, iteration: 321813
loss: 0.9974230527877808,grad_norm: 0.8325929544665767, iteration: 321814
loss: 0.9794005751609802,grad_norm: 0.6771948087611512, iteration: 321815
loss: 1.0234355926513672,grad_norm: 0.694276316547728, iteration: 321816
loss: 0.9829179644584656,grad_norm: 0.9999992038428781, iteration: 321817
loss: 1.0330145359039307,grad_norm: 0.8434764010537568, iteration: 321818
loss: 0.9833766222000122,grad_norm: 0.9070575504467807, iteration: 321819
loss: 1.0274869203567505,grad_norm: 0.8477798011965202, iteration: 321820
loss: 1.0036541223526,grad_norm: 0.7270501518888933, iteration: 321821
loss: 1.0276174545288086,grad_norm: 0.99999918448408, iteration: 321822
loss: 1.0330092906951904,grad_norm: 0.9999991963946224, iteration: 321823
loss: 0.9801280498504639,grad_norm: 0.9999996849577736, iteration: 321824
loss: 1.0722275972366333,grad_norm: 0.9999992469830856, iteration: 321825
loss: 1.1069259643554688,grad_norm: 0.9999996340974606, iteration: 321826
loss: 0.9684107899665833,grad_norm: 0.8695424437225565, iteration: 321827
loss: 0.9780052900314331,grad_norm: 0.8557897640065656, iteration: 321828
loss: 1.0119779109954834,grad_norm: 0.8567633675317978, iteration: 321829
loss: 1.093462586402893,grad_norm: 0.9999997497085922, iteration: 321830
loss: 1.028756856918335,grad_norm: 0.9999997771783973, iteration: 321831
loss: 0.9824239611625671,grad_norm: 0.9569045108059875, iteration: 321832
loss: 0.9690050482749939,grad_norm: 0.909880103783752, iteration: 321833
loss: 1.0007058382034302,grad_norm: 0.9581780354300466, iteration: 321834
loss: 0.9769364595413208,grad_norm: 0.9999992020736544, iteration: 321835
loss: 1.0090997219085693,grad_norm: 0.9999995879776239, iteration: 321836
loss: 1.011695384979248,grad_norm: 0.8985785514267538, iteration: 321837
loss: 1.098184585571289,grad_norm: 1.000000053830504, iteration: 321838
loss: 1.0321388244628906,grad_norm: 0.8921170827012731, iteration: 321839
loss: 1.020552158355713,grad_norm: 0.9999994811975044, iteration: 321840
loss: 0.9788650274276733,grad_norm: 0.9082895637233172, iteration: 321841
loss: 1.0055499076843262,grad_norm: 0.9999990369606974, iteration: 321842
loss: 1.067110538482666,grad_norm: 0.7906987170437759, iteration: 321843
loss: 1.0014313459396362,grad_norm: 0.7904004090407112, iteration: 321844
loss: 0.997929573059082,grad_norm: 0.6936897415502027, iteration: 321845
loss: 0.9853547215461731,grad_norm: 0.9224673132589499, iteration: 321846
loss: 1.010390043258667,grad_norm: 0.7173242081345512, iteration: 321847
loss: 1.0051485300064087,grad_norm: 0.9068734406747805, iteration: 321848
loss: 0.9604365229606628,grad_norm: 0.9855043602940805, iteration: 321849
loss: 0.9554452896118164,grad_norm: 0.9741900008899858, iteration: 321850
loss: 0.9897710084915161,grad_norm: 0.9825007297447006, iteration: 321851
loss: 1.0297431945800781,grad_norm: 0.9999992692396648, iteration: 321852
loss: 0.9532533288002014,grad_norm: 0.8903253783480037, iteration: 321853
loss: 0.9865065217018127,grad_norm: 0.8768311740122363, iteration: 321854
loss: 1.007097601890564,grad_norm: 0.7722557354753204, iteration: 321855
loss: 1.0070903301239014,grad_norm: 0.8038048933678544, iteration: 321856
loss: 1.0017424821853638,grad_norm: 0.7855176935258591, iteration: 321857
loss: 1.01217782497406,grad_norm: 0.8852643100436052, iteration: 321858
loss: 1.0039029121398926,grad_norm: 0.6926192221443808, iteration: 321859
loss: 0.9942315220832825,grad_norm: 0.780422285905202, iteration: 321860
loss: 0.9783201217651367,grad_norm: 0.7569701341358024, iteration: 321861
loss: 0.9994760751724243,grad_norm: 0.813322961618922, iteration: 321862
loss: 1.0190563201904297,grad_norm: 0.8569442404457673, iteration: 321863
loss: 0.9922705888748169,grad_norm: 0.9999992479316735, iteration: 321864
loss: 1.0019034147262573,grad_norm: 0.7611827763049492, iteration: 321865
loss: 0.9816686511039734,grad_norm: 0.7636949904764702, iteration: 321866
loss: 0.9694322943687439,grad_norm: 0.7858303650889111, iteration: 321867
loss: 1.0309420824050903,grad_norm: 0.7814999366362672, iteration: 321868
loss: 0.987074077129364,grad_norm: 0.783558444391705, iteration: 321869
loss: 1.0496052503585815,grad_norm: 0.9999995077985016, iteration: 321870
loss: 0.9919964075088501,grad_norm: 0.8411832827401413, iteration: 321871
loss: 1.0526480674743652,grad_norm: 0.7293824122788807, iteration: 321872
loss: 1.0121891498565674,grad_norm: 0.9097610883822601, iteration: 321873
loss: 0.9914869070053101,grad_norm: 0.9262903506986345, iteration: 321874
loss: 1.0399470329284668,grad_norm: 0.9999999380296509, iteration: 321875
loss: 1.0202631950378418,grad_norm: 0.999999208832654, iteration: 321876
loss: 0.992287814617157,grad_norm: 0.8517410286997962, iteration: 321877
loss: 0.9826693534851074,grad_norm: 0.9999992134067359, iteration: 321878
loss: 1.0356695652008057,grad_norm: 0.812082792438083, iteration: 321879
loss: 1.0172139406204224,grad_norm: 0.9999991402357701, iteration: 321880
loss: 1.0161709785461426,grad_norm: 0.8575803785727648, iteration: 321881
loss: 1.0147837400436401,grad_norm: 0.9076362307025918, iteration: 321882
loss: 1.1554925441741943,grad_norm: 0.9999992522042839, iteration: 321883
loss: 1.0283313989639282,grad_norm: 0.9660778901519117, iteration: 321884
loss: 0.9818047285079956,grad_norm: 0.9999992467217037, iteration: 321885
loss: 1.0137536525726318,grad_norm: 0.9999993266208468, iteration: 321886
loss: 0.9704437255859375,grad_norm: 0.8036754815591365, iteration: 321887
loss: 0.9936150908470154,grad_norm: 0.9014039954444218, iteration: 321888
loss: 0.9960474967956543,grad_norm: 0.90936305308605, iteration: 321889
loss: 1.0016343593597412,grad_norm: 0.7504078449802836, iteration: 321890
loss: 0.9832879304885864,grad_norm: 0.9999990462442895, iteration: 321891
loss: 1.0045017004013062,grad_norm: 0.9999991232155947, iteration: 321892
loss: 1.0213314294815063,grad_norm: 0.7465177757968228, iteration: 321893
loss: 0.9777911305427551,grad_norm: 0.9999990481985717, iteration: 321894
loss: 1.0207717418670654,grad_norm: 0.9581574766253813, iteration: 321895
loss: 1.0221741199493408,grad_norm: 0.9999992838604208, iteration: 321896
loss: 1.044906735420227,grad_norm: 0.9999995984790637, iteration: 321897
loss: 0.9876362085342407,grad_norm: 0.8392799783802509, iteration: 321898
loss: 1.002496361732483,grad_norm: 0.771652443072234, iteration: 321899
loss: 0.9975141286849976,grad_norm: 0.8411712758445024, iteration: 321900
loss: 1.0023531913757324,grad_norm: 0.8625100219822525, iteration: 321901
loss: 0.9610544443130493,grad_norm: 0.751534725820907, iteration: 321902
loss: 1.0163624286651611,grad_norm: 0.8519299839387825, iteration: 321903
loss: 0.9841561317443848,grad_norm: 0.7939804904217578, iteration: 321904
loss: 1.0365874767303467,grad_norm: 0.9999995433660701, iteration: 321905
loss: 0.9716360569000244,grad_norm: 0.8583396759761056, iteration: 321906
loss: 1.0083744525909424,grad_norm: 0.8350444589970566, iteration: 321907
loss: 1.044768214225769,grad_norm: 0.8885202449193742, iteration: 321908
loss: 1.059976577758789,grad_norm: 0.9999995060077264, iteration: 321909
loss: 0.9718003869056702,grad_norm: 0.8337081572108123, iteration: 321910
loss: 1.0005731582641602,grad_norm: 0.7596426809236102, iteration: 321911
loss: 1.069320797920227,grad_norm: 0.9999997089185304, iteration: 321912
loss: 0.9825370907783508,grad_norm: 0.8521806770520491, iteration: 321913
loss: 1.0102672576904297,grad_norm: 0.9958412024673312, iteration: 321914
loss: 1.0023255348205566,grad_norm: 0.9587963850297182, iteration: 321915
loss: 1.015354037284851,grad_norm: 0.999999178560594, iteration: 321916
loss: 0.9702444672584534,grad_norm: 0.7718463786234792, iteration: 321917
loss: 0.9888514876365662,grad_norm: 0.8831311765958156, iteration: 321918
loss: 1.071100115776062,grad_norm: 0.9999994323570099, iteration: 321919
loss: 1.1249948740005493,grad_norm: 0.9999993507843985, iteration: 321920
loss: 1.0582168102264404,grad_norm: 0.9999992070301811, iteration: 321921
loss: 1.0101267099380493,grad_norm: 0.896684748246105, iteration: 321922
loss: 1.0230885744094849,grad_norm: 0.8481516182466438, iteration: 321923
loss: 0.9787907004356384,grad_norm: 0.8038676718903982, iteration: 321924
loss: 1.0526847839355469,grad_norm: 0.9056858405163561, iteration: 321925
loss: 0.9978869557380676,grad_norm: 0.9210386172047573, iteration: 321926
loss: 0.9725438356399536,grad_norm: 0.9999989745340135, iteration: 321927
loss: 1.0000821352005005,grad_norm: 0.6845351902934648, iteration: 321928
loss: 0.9745770692825317,grad_norm: 0.7174119882321712, iteration: 321929
loss: 1.0151803493499756,grad_norm: 0.8397120686637097, iteration: 321930
loss: 1.036984920501709,grad_norm: 0.9999991414889431, iteration: 321931
loss: 1.0532410144805908,grad_norm: 0.9923057925721566, iteration: 321932
loss: 0.978545069694519,grad_norm: 0.9999995544899144, iteration: 321933
loss: 0.9928467869758606,grad_norm: 0.9278729583772626, iteration: 321934
loss: 1.0036954879760742,grad_norm: 0.7311708517962198, iteration: 321935
loss: 0.9983319044113159,grad_norm: 0.9100780546780294, iteration: 321936
loss: 1.0233081579208374,grad_norm: 0.7894772787432088, iteration: 321937
loss: 0.9940353035926819,grad_norm: 0.999999299455503, iteration: 321938
loss: 0.9805066585540771,grad_norm: 0.9031865319506548, iteration: 321939
loss: 0.9537330269813538,grad_norm: 0.8305678111190828, iteration: 321940
loss: 1.000186800956726,grad_norm: 0.8577636756552142, iteration: 321941
loss: 1.0571445226669312,grad_norm: 0.8914176431814077, iteration: 321942
loss: 1.0374590158462524,grad_norm: 0.8830225892032213, iteration: 321943
loss: 1.0410609245300293,grad_norm: 0.7121840337273553, iteration: 321944
loss: 1.0069153308868408,grad_norm: 0.8123717164977804, iteration: 321945
loss: 0.965849757194519,grad_norm: 0.9999990557102953, iteration: 321946
loss: 0.9941164255142212,grad_norm: 0.7791559590418885, iteration: 321947
loss: 0.9954867959022522,grad_norm: 0.832482346197323, iteration: 321948
loss: 0.9953317046165466,grad_norm: 0.764773257240785, iteration: 321949
loss: 0.9935183525085449,grad_norm: 0.9423262090642432, iteration: 321950
loss: 1.0421444177627563,grad_norm: 0.9999992950919138, iteration: 321951
loss: 0.986264705657959,grad_norm: 0.9999990999257284, iteration: 321952
loss: 1.0144388675689697,grad_norm: 0.8963480882800162, iteration: 321953
loss: 0.9832094311714172,grad_norm: 0.8134456986505432, iteration: 321954
loss: 0.9958148002624512,grad_norm: 0.7209890959928125, iteration: 321955
loss: 0.9736319780349731,grad_norm: 0.9999990349538024, iteration: 321956
loss: 1.012814998626709,grad_norm: 0.9057482466545385, iteration: 321957
loss: 1.0046485662460327,grad_norm: 0.8697583961446056, iteration: 321958
loss: 1.0020451545715332,grad_norm: 0.7960585462093421, iteration: 321959
loss: 1.0195053815841675,grad_norm: 0.9506925524832541, iteration: 321960
loss: 1.0013103485107422,grad_norm: 0.738860403481664, iteration: 321961
loss: 1.006439208984375,grad_norm: 0.8589666583036264, iteration: 321962
loss: 1.0001192092895508,grad_norm: 0.7976315880345891, iteration: 321963
loss: 0.9950008988380432,grad_norm: 0.6972647326382602, iteration: 321964
loss: 1.0001965761184692,grad_norm: 0.7979348695163386, iteration: 321965
loss: 0.9972043633460999,grad_norm: 0.7601211283219036, iteration: 321966
loss: 0.9759105443954468,grad_norm: 0.7135965412652026, iteration: 321967
loss: 1.0050206184387207,grad_norm: 0.8378993228654648, iteration: 321968
loss: 0.9942349195480347,grad_norm: 0.7859608176791101, iteration: 321969
loss: 1.018680214881897,grad_norm: 0.7735313344771778, iteration: 321970
loss: 0.9940318465232849,grad_norm: 0.9168733551619704, iteration: 321971
loss: 1.0192848443984985,grad_norm: 0.765125217202639, iteration: 321972
loss: 1.0539973974227905,grad_norm: 0.9999993101032079, iteration: 321973
loss: 0.9651200771331787,grad_norm: 0.7956958638132197, iteration: 321974
loss: 1.0135506391525269,grad_norm: 0.8027031809689834, iteration: 321975
loss: 1.0095057487487793,grad_norm: 0.7879687803413178, iteration: 321976
loss: 1.0150086879730225,grad_norm: 0.8989906045957354, iteration: 321977
loss: 1.0425114631652832,grad_norm: 0.9999994495314182, iteration: 321978
loss: 1.006090760231018,grad_norm: 0.8097709015351433, iteration: 321979
loss: 0.9834393858909607,grad_norm: 0.8837389287782863, iteration: 321980
loss: 1.010107159614563,grad_norm: 0.8948018726209233, iteration: 321981
loss: 1.003768801689148,grad_norm: 0.9999990205583965, iteration: 321982
loss: 0.9783570170402527,grad_norm: 0.7958716627004633, iteration: 321983
loss: 1.0017093420028687,grad_norm: 0.7432592503783048, iteration: 321984
loss: 1.0115872621536255,grad_norm: 0.9999994214316049, iteration: 321985
loss: 1.0003302097320557,grad_norm: 0.7486787746929215, iteration: 321986
loss: 0.997767984867096,grad_norm: 0.788383252646417, iteration: 321987
loss: 0.9500215649604797,grad_norm: 0.9218283003155497, iteration: 321988
loss: 0.9797168374061584,grad_norm: 0.9734472456330596, iteration: 321989
loss: 0.9594255089759827,grad_norm: 0.8871024251803189, iteration: 321990
loss: 0.9901626110076904,grad_norm: 0.7403839738478287, iteration: 321991
loss: 1.007980227470398,grad_norm: 0.9999990545386437, iteration: 321992
loss: 0.9771367311477661,grad_norm: 0.9999990990637454, iteration: 321993
loss: 1.0186909437179565,grad_norm: 0.9659570718737271, iteration: 321994
loss: 1.0424013137817383,grad_norm: 0.9999993912510315, iteration: 321995
loss: 0.9881027340888977,grad_norm: 0.7833515950437523, iteration: 321996
loss: 0.9917278289794922,grad_norm: 0.9322761865846954, iteration: 321997
loss: 1.0202194452285767,grad_norm: 0.9295807465688471, iteration: 321998
loss: 0.9639444351196289,grad_norm: 0.9099550153634504, iteration: 321999
loss: 1.006107211112976,grad_norm: 0.9999989615937407, iteration: 322000
loss: 1.021162748336792,grad_norm: 0.9347055312441476, iteration: 322001
loss: 0.9756788611412048,grad_norm: 0.8589398606415329, iteration: 322002
loss: 1.02680242061615,grad_norm: 0.8047940042899581, iteration: 322003
loss: 0.9987928867340088,grad_norm: 0.8565672295895626, iteration: 322004
loss: 1.011735200881958,grad_norm: 0.8233308642916092, iteration: 322005
loss: 0.9431604743003845,grad_norm: 0.9999991710680698, iteration: 322006
loss: 0.9985836148262024,grad_norm: 0.999999888838972, iteration: 322007
loss: 0.9872223138809204,grad_norm: 0.8595062554548926, iteration: 322008
loss: 0.978617787361145,grad_norm: 0.9235111836560507, iteration: 322009
loss: 1.026100993156433,grad_norm: 0.9999991659327864, iteration: 322010
loss: 0.9879292249679565,grad_norm: 0.7274610427313194, iteration: 322011
loss: 1.0265781879425049,grad_norm: 0.9999990797521786, iteration: 322012
loss: 0.9852365851402283,grad_norm: 0.7647628176762427, iteration: 322013
loss: 0.9960135817527771,grad_norm: 0.8003731446107297, iteration: 322014
loss: 0.9939162135124207,grad_norm: 0.9325787138191126, iteration: 322015
loss: 0.990807056427002,grad_norm: 0.9999996451437564, iteration: 322016
loss: 0.9950276613235474,grad_norm: 0.7927422557150179, iteration: 322017
loss: 0.9981346726417542,grad_norm: 0.9200217125471455, iteration: 322018
loss: 0.9836535453796387,grad_norm: 0.919230845715144, iteration: 322019
loss: 1.0304375886917114,grad_norm: 0.9815334788063459, iteration: 322020
loss: 1.00596022605896,grad_norm: 0.896292671588971, iteration: 322021
loss: 1.0084068775177002,grad_norm: 0.8101896441350764, iteration: 322022
loss: 1.0370244979858398,grad_norm: 0.7189923626096432, iteration: 322023
loss: 1.0188461542129517,grad_norm: 0.6955296529784943, iteration: 322024
loss: 1.0656272172927856,grad_norm: 0.9863108554748099, iteration: 322025
loss: 0.9659737944602966,grad_norm: 0.9999991496648828, iteration: 322026
loss: 1.0144482851028442,grad_norm: 0.8561026758364801, iteration: 322027
loss: 1.0327826738357544,grad_norm: 0.936112259246797, iteration: 322028
loss: 0.999640941619873,grad_norm: 0.8255625343882218, iteration: 322029
loss: 1.0651525259017944,grad_norm: 0.9999991097468096, iteration: 322030
loss: 1.0206985473632812,grad_norm: 0.7156188957682912, iteration: 322031
loss: 0.9884933233261108,grad_norm: 0.813641815138548, iteration: 322032
loss: 1.0130424499511719,grad_norm: 0.9999990072988055, iteration: 322033
loss: 1.0055296421051025,grad_norm: 0.9839281662357465, iteration: 322034
loss: 1.0025768280029297,grad_norm: 0.8961768159046427, iteration: 322035
loss: 1.0324126482009888,grad_norm: 0.7499603547265556, iteration: 322036
loss: 1.0208803415298462,grad_norm: 0.9999991711287965, iteration: 322037
loss: 1.038781762123108,grad_norm: 0.9846102915794521, iteration: 322038
loss: 1.1053794622421265,grad_norm: 0.999999756251464, iteration: 322039
loss: 0.9631608724594116,grad_norm: 0.8877099888513802, iteration: 322040
loss: 1.0387380123138428,grad_norm: 0.8240936606048307, iteration: 322041
loss: 1.0201125144958496,grad_norm: 0.827892824618448, iteration: 322042
loss: 0.9909857511520386,grad_norm: 0.7097295060557054, iteration: 322043
loss: 0.9814529418945312,grad_norm: 0.7413318731688994, iteration: 322044
loss: 1.0024546384811401,grad_norm: 0.7733003222215146, iteration: 322045
loss: 0.9747396111488342,grad_norm: 0.9999992754960406, iteration: 322046
loss: 0.9945981502532959,grad_norm: 0.780834613492151, iteration: 322047
loss: 1.0165187120437622,grad_norm: 0.9999995908403989, iteration: 322048
loss: 1.0122090578079224,grad_norm: 0.9754127492384111, iteration: 322049
loss: 1.0082708597183228,grad_norm: 0.8984458042463306, iteration: 322050
loss: 0.9883178472518921,grad_norm: 0.9999991709496383, iteration: 322051
loss: 1.0214896202087402,grad_norm: 0.7403178941212248, iteration: 322052
loss: 0.9539480209350586,grad_norm: 0.7425647943687459, iteration: 322053
loss: 0.9753405451774597,grad_norm: 0.9999993347018776, iteration: 322054
loss: 1.0318812131881714,grad_norm: 0.9528035100352598, iteration: 322055
loss: 1.0214496850967407,grad_norm: 0.7885494524076219, iteration: 322056
loss: 1.1010303497314453,grad_norm: 0.7796999359837131, iteration: 322057
loss: 1.0239921808242798,grad_norm: 0.7640499125159887, iteration: 322058
loss: 1.0039288997650146,grad_norm: 0.9999991842340733, iteration: 322059
loss: 1.0306326150894165,grad_norm: 0.9999991275621687, iteration: 322060
loss: 1.004437804222107,grad_norm: 0.8931272496339738, iteration: 322061
loss: 1.0194121599197388,grad_norm: 0.8691374203997562, iteration: 322062
loss: 1.1000316143035889,grad_norm: 0.9504066132943901, iteration: 322063
loss: 1.0034337043762207,grad_norm: 0.7783682515814326, iteration: 322064
loss: 1.01108980178833,grad_norm: 0.9193696336687014, iteration: 322065
loss: 1.0189844369888306,grad_norm: 0.9680690380804702, iteration: 322066
loss: 1.0077650547027588,grad_norm: 0.8943011494307261, iteration: 322067
loss: 1.0065382719039917,grad_norm: 0.8345914093097987, iteration: 322068
loss: 1.004095435142517,grad_norm: 0.7316495834964643, iteration: 322069
loss: 0.9928065538406372,grad_norm: 0.8657375819186092, iteration: 322070
loss: 1.082495093345642,grad_norm: 0.9999996978505096, iteration: 322071
loss: 0.9561220407485962,grad_norm: 0.838146624323884, iteration: 322072
loss: 1.0309971570968628,grad_norm: 0.7069764702475778, iteration: 322073
loss: 1.013393521308899,grad_norm: 0.8146037688328822, iteration: 322074
loss: 1.0003408193588257,grad_norm: 0.7793188636976661, iteration: 322075
loss: 0.9692840576171875,grad_norm: 0.971917868136596, iteration: 322076
loss: 1.0005046129226685,grad_norm: 0.8880720749382622, iteration: 322077
loss: 0.9552190899848938,grad_norm: 0.904724378526383, iteration: 322078
loss: 1.0005167722702026,grad_norm: 0.9999998437078551, iteration: 322079
loss: 0.9999849200248718,grad_norm: 0.8181043036744944, iteration: 322080
loss: 0.9939554929733276,grad_norm: 0.9999992306606803, iteration: 322081
loss: 1.0184671878814697,grad_norm: 0.9473127827707225, iteration: 322082
loss: 0.990105152130127,grad_norm: 0.8426675882535335, iteration: 322083
loss: 0.9878283143043518,grad_norm: 0.822043022949786, iteration: 322084
loss: 0.9784988760948181,grad_norm: 0.9178972622173184, iteration: 322085
loss: 1.0037938356399536,grad_norm: 0.7476033839826645, iteration: 322086
loss: 0.9719850420951843,grad_norm: 0.6920148634103843, iteration: 322087
loss: 0.9879629015922546,grad_norm: 0.9999989588156293, iteration: 322088
loss: 0.9997677206993103,grad_norm: 0.9999990845331489, iteration: 322089
loss: 1.0050206184387207,grad_norm: 0.9337977303154775, iteration: 322090
loss: 0.9856597185134888,grad_norm: 0.6392980339848845, iteration: 322091
loss: 1.0230255126953125,grad_norm: 0.8903449129806297, iteration: 322092
loss: 0.9662474393844604,grad_norm: 0.9110598085970982, iteration: 322093
loss: 0.9484809637069702,grad_norm: 0.7588553807989326, iteration: 322094
loss: 1.0678738355636597,grad_norm: 0.9999991912204064, iteration: 322095
loss: 1.0261967182159424,grad_norm: 0.8421774821883118, iteration: 322096
loss: 0.9943569302558899,grad_norm: 0.8662071848829558, iteration: 322097
loss: 1.0067015886306763,grad_norm: 0.7241746304081802, iteration: 322098
loss: 1.0391277074813843,grad_norm: 0.9693443184142102, iteration: 322099
loss: 0.9983397126197815,grad_norm: 0.8148274614021064, iteration: 322100
loss: 1.0172556638717651,grad_norm: 0.9999990299345952, iteration: 322101
loss: 1.0260426998138428,grad_norm: 0.6979775254118308, iteration: 322102
loss: 0.9525289535522461,grad_norm: 0.8926383618238176, iteration: 322103
loss: 0.9751104116439819,grad_norm: 0.9999990024359863, iteration: 322104
loss: 1.0048761367797852,grad_norm: 0.9142155019221134, iteration: 322105
loss: 1.0757977962493896,grad_norm: 0.9999997568836235, iteration: 322106
loss: 1.0488998889923096,grad_norm: 0.999999564980476, iteration: 322107
loss: 1.043186068534851,grad_norm: 0.8103273049624895, iteration: 322108
loss: 1.0261750221252441,grad_norm: 0.7939175233497376, iteration: 322109
loss: 0.9940874576568604,grad_norm: 0.7824125834006099, iteration: 322110
loss: 1.0141314268112183,grad_norm: 0.7645421336410454, iteration: 322111
loss: 0.9884464144706726,grad_norm: 0.91449163889182, iteration: 322112
loss: 1.0357645750045776,grad_norm: 0.9529613857011374, iteration: 322113
loss: 0.9754577875137329,grad_norm: 0.9317800701006614, iteration: 322114
loss: 0.9720475077629089,grad_norm: 0.8327802626878993, iteration: 322115
loss: 1.0710047483444214,grad_norm: 0.8818289142326269, iteration: 322116
loss: 0.9954037666320801,grad_norm: 0.8561068796595626, iteration: 322117
loss: 1.0226311683654785,grad_norm: 0.9661672702101327, iteration: 322118
loss: 0.9884873032569885,grad_norm: 0.7652135879718724, iteration: 322119
loss: 0.9824092388153076,grad_norm: 0.9999991643127691, iteration: 322120
loss: 1.0686115026474,grad_norm: 0.999999441988072, iteration: 322121
loss: 0.9599703550338745,grad_norm: 0.793606674362891, iteration: 322122
loss: 1.0721752643585205,grad_norm: 0.8402155682351545, iteration: 322123
loss: 1.0790904760360718,grad_norm: 0.799236953057301, iteration: 322124
loss: 1.0301282405853271,grad_norm: 0.9999995702641628, iteration: 322125
loss: 1.0083398818969727,grad_norm: 0.7254942029037303, iteration: 322126
loss: 0.9602164626121521,grad_norm: 0.8802476086968242, iteration: 322127
loss: 0.9534134268760681,grad_norm: 0.7305976765207679, iteration: 322128
loss: 0.9745703935623169,grad_norm: 0.7391740383599179, iteration: 322129
loss: 1.0033766031265259,grad_norm: 0.9806130513314522, iteration: 322130
loss: 0.9883602261543274,grad_norm: 0.9097233126765075, iteration: 322131
loss: 0.9918785095214844,grad_norm: 0.9054384588946094, iteration: 322132
loss: 1.0252995491027832,grad_norm: 0.999999179615791, iteration: 322133
loss: 1.1903326511383057,grad_norm: 0.9999996546749031, iteration: 322134
loss: 1.078783631324768,grad_norm: 0.9188157898489365, iteration: 322135
loss: 1.0177550315856934,grad_norm: 0.9999990710272875, iteration: 322136
loss: 0.9553757309913635,grad_norm: 0.8254821142434445, iteration: 322137
loss: 0.9976291060447693,grad_norm: 0.8264911653685735, iteration: 322138
loss: 1.0153871774673462,grad_norm: 0.957506115599779, iteration: 322139
loss: 0.9986634254455566,grad_norm: 0.8970840806178155, iteration: 322140
loss: 0.9871870875358582,grad_norm: 0.8077989073401267, iteration: 322141
loss: 0.9959899187088013,grad_norm: 0.8353367903634878, iteration: 322142
loss: 0.986499547958374,grad_norm: 0.9025040264025415, iteration: 322143
loss: 0.9895460605621338,grad_norm: 0.8005364369890483, iteration: 322144
loss: 1.0054303407669067,grad_norm: 0.7059338354383395, iteration: 322145
loss: 1.0339210033416748,grad_norm: 0.9999990355454562, iteration: 322146
loss: 1.0147336721420288,grad_norm: 0.8631530559497494, iteration: 322147
loss: 0.9995442032814026,grad_norm: 0.7767571436390389, iteration: 322148
loss: 1.1128746271133423,grad_norm: 0.9026065051358613, iteration: 322149
loss: 1.001223087310791,grad_norm: 0.7177147372734558, iteration: 322150
loss: 1.049946665763855,grad_norm: 0.9797819543975321, iteration: 322151
loss: 0.9945322871208191,grad_norm: 0.8287174421324963, iteration: 322152
loss: 1.0042436122894287,grad_norm: 0.9999991284592551, iteration: 322153
loss: 1.0100935697555542,grad_norm: 0.7911581251363466, iteration: 322154
loss: 1.0718801021575928,grad_norm: 1.0000000117487875, iteration: 322155
loss: 0.9867636561393738,grad_norm: 0.8160043809001902, iteration: 322156
loss: 1.0278571844100952,grad_norm: 0.9530575349640122, iteration: 322157
loss: 0.9883666634559631,grad_norm: 0.8559061595964776, iteration: 322158
loss: 1.0412317514419556,grad_norm: 0.9999995738799169, iteration: 322159
loss: 1.0054244995117188,grad_norm: 0.7196937860482925, iteration: 322160
loss: 1.0159837007522583,grad_norm: 0.733797391297706, iteration: 322161
loss: 0.9912806153297424,grad_norm: 0.6550776592297145, iteration: 322162
loss: 0.9710885286331177,grad_norm: 0.7846147427811302, iteration: 322163
loss: 1.031232237815857,grad_norm: 0.9999992866805226, iteration: 322164
loss: 1.043860673904419,grad_norm: 0.9999999671097086, iteration: 322165
loss: 1.007533073425293,grad_norm: 0.9074917537367527, iteration: 322166
loss: 0.9700945019721985,grad_norm: 0.9840669214426963, iteration: 322167
loss: 1.034825325012207,grad_norm: 0.7357256879192122, iteration: 322168
loss: 0.996404767036438,grad_norm: 0.7567016567072584, iteration: 322169
loss: 0.9878875017166138,grad_norm: 0.7747395419444032, iteration: 322170
loss: 1.031404972076416,grad_norm: 0.799416356540405, iteration: 322171
loss: 1.0446568727493286,grad_norm: 0.7448393133810388, iteration: 322172
loss: 1.0184051990509033,grad_norm: 0.7868390155494963, iteration: 322173
loss: 1.0782029628753662,grad_norm: 0.9641435908313124, iteration: 322174
loss: 0.9981870651245117,grad_norm: 0.8238604897714515, iteration: 322175
loss: 1.1787772178649902,grad_norm: 0.9999995917137084, iteration: 322176
loss: 1.1207003593444824,grad_norm: 0.9999998183852007, iteration: 322177
loss: 1.0479774475097656,grad_norm: 0.9999999408946606, iteration: 322178
loss: 1.0035492181777954,grad_norm: 0.9676472658035201, iteration: 322179
loss: 1.0349713563919067,grad_norm: 0.9999992674636425, iteration: 322180
loss: 0.9751031994819641,grad_norm: 0.9999992335083954, iteration: 322181
loss: 1.0096468925476074,grad_norm: 0.8843623666425564, iteration: 322182
loss: 1.0366120338439941,grad_norm: 0.9999990616076339, iteration: 322183
loss: 0.9590150117874146,grad_norm: 0.9274969087991162, iteration: 322184
loss: 1.0114725828170776,grad_norm: 0.8400016456138968, iteration: 322185
loss: 1.0057787895202637,grad_norm: 0.9999991569214662, iteration: 322186
loss: 1.0028170347213745,grad_norm: 0.9287439061679236, iteration: 322187
loss: 0.987618625164032,grad_norm: 0.9100836828751645, iteration: 322188
loss: 1.0064400434494019,grad_norm: 0.9560292334318412, iteration: 322189
loss: 0.977537214756012,grad_norm: 0.9181447746136236, iteration: 322190
loss: 1.060289978981018,grad_norm: 0.9999996747248439, iteration: 322191
loss: 0.9687472581863403,grad_norm: 0.7834741378048256, iteration: 322192
loss: 0.9727469682693481,grad_norm: 0.9999992640106673, iteration: 322193
loss: 1.0073648691177368,grad_norm: 0.9999997962528481, iteration: 322194
loss: 1.041974663734436,grad_norm: 0.9999990781649262, iteration: 322195
loss: 1.0181084871292114,grad_norm: 0.7395685701922049, iteration: 322196
loss: 0.9998562335968018,grad_norm: 0.9558586767143283, iteration: 322197
loss: 1.0018010139465332,grad_norm: 0.999999835649252, iteration: 322198
loss: 0.9937266111373901,grad_norm: 0.9269669615796867, iteration: 322199
loss: 1.0235563516616821,grad_norm: 0.9379939669999104, iteration: 322200
loss: 1.0372415781021118,grad_norm: 0.9351902484939144, iteration: 322201
loss: 0.9729856848716736,grad_norm: 0.8295293710244164, iteration: 322202
loss: 1.0002259016036987,grad_norm: 0.7512780742553995, iteration: 322203
loss: 0.9731107354164124,grad_norm: 0.9999990577980472, iteration: 322204
loss: 0.987522542476654,grad_norm: 0.765931634868482, iteration: 322205
loss: 1.0822705030441284,grad_norm: 0.9551949825503111, iteration: 322206
loss: 1.0205439329147339,grad_norm: 0.9999990266445866, iteration: 322207
loss: 1.037024736404419,grad_norm: 0.9999991476931543, iteration: 322208
loss: 1.1177500486373901,grad_norm: 0.9999999621862528, iteration: 322209
loss: 0.977215588092804,grad_norm: 0.9999992393912104, iteration: 322210
loss: 0.9964802861213684,grad_norm: 0.869736387586135, iteration: 322211
loss: 0.9984961152076721,grad_norm: 0.9623233462418442, iteration: 322212
loss: 1.0000654458999634,grad_norm: 0.8215227261142334, iteration: 322213
loss: 0.9885531663894653,grad_norm: 0.9999991425949946, iteration: 322214
loss: 1.0597659349441528,grad_norm: 0.9163494835560717, iteration: 322215
loss: 1.0133130550384521,grad_norm: 0.999999193040791, iteration: 322216
loss: 0.9643594622612,grad_norm: 0.7545477142180511, iteration: 322217
loss: 1.018629789352417,grad_norm: 0.8032827726446955, iteration: 322218
loss: 0.9971737861633301,grad_norm: 0.7932067614452543, iteration: 322219
loss: 1.0206555128097534,grad_norm: 0.9999995288556568, iteration: 322220
loss: 1.0310741662979126,grad_norm: 0.9999990748704712, iteration: 322221
loss: 1.03604257106781,grad_norm: 0.9999992328278854, iteration: 322222
loss: 0.9575398564338684,grad_norm: 0.7996286023245592, iteration: 322223
loss: 0.9946894645690918,grad_norm: 0.9999995453319209, iteration: 322224
loss: 1.0619298219680786,grad_norm: 0.8678280796435494, iteration: 322225
loss: 1.0053379535675049,grad_norm: 0.9551237786654011, iteration: 322226
loss: 1.0154030323028564,grad_norm: 0.9999992714173123, iteration: 322227
loss: 0.992000162601471,grad_norm: 0.9468256739090385, iteration: 322228
loss: 1.031204104423523,grad_norm: 0.8016781074801043, iteration: 322229
loss: 1.004662036895752,grad_norm: 0.8580871911065241, iteration: 322230
loss: 1.0059514045715332,grad_norm: 0.9010558286942727, iteration: 322231
loss: 0.9974071979522705,grad_norm: 0.8665427774766812, iteration: 322232
loss: 1.0052512884140015,grad_norm: 0.7575353178725466, iteration: 322233
loss: 1.0016026496887207,grad_norm: 0.8323870660733695, iteration: 322234
loss: 0.9882214069366455,grad_norm: 0.7429862122486002, iteration: 322235
loss: 1.020207166671753,grad_norm: 0.9593318668326395, iteration: 322236
loss: 1.0148239135742188,grad_norm: 0.7979116290701054, iteration: 322237
loss: 0.9870486855506897,grad_norm: 0.9118833361088866, iteration: 322238
loss: 1.011881709098816,grad_norm: 0.9174542381045238, iteration: 322239
loss: 1.015437126159668,grad_norm: 0.9318129413837949, iteration: 322240
loss: 0.9779583215713501,grad_norm: 0.7086047906340975, iteration: 322241
loss: 0.9929753541946411,grad_norm: 0.7668008835580585, iteration: 322242
loss: 1.0418931245803833,grad_norm: 0.9999991503586922, iteration: 322243
loss: 1.0181279182434082,grad_norm: 0.9999990043217637, iteration: 322244
loss: 0.9989361763000488,grad_norm: 0.938677400773921, iteration: 322245
loss: 0.9940522313117981,grad_norm: 0.8449653083155156, iteration: 322246
loss: 1.0003410577774048,grad_norm: 0.8668342868371164, iteration: 322247
loss: 1.0002657175064087,grad_norm: 0.7863728526146452, iteration: 322248
loss: 1.0042842626571655,grad_norm: 0.8022345773673518, iteration: 322249
loss: 1.0233973264694214,grad_norm: 0.9999990866487408, iteration: 322250
loss: 1.0217365026474,grad_norm: 0.9999993619184326, iteration: 322251
loss: 1.0061708688735962,grad_norm: 0.9999990664050677, iteration: 322252
loss: 0.9930479526519775,grad_norm: 0.9999996285232632, iteration: 322253
loss: 1.0137860774993896,grad_norm: 0.9999991570840008, iteration: 322254
loss: 1.0094830989837646,grad_norm: 0.7620407195656769, iteration: 322255
loss: 1.0947850942611694,grad_norm: 0.999999223715855, iteration: 322256
loss: 0.9696948528289795,grad_norm: 0.9013611047325656, iteration: 322257
loss: 0.9773014783859253,grad_norm: 0.9845479666650294, iteration: 322258
loss: 1.0087770223617554,grad_norm: 0.9999992801140664, iteration: 322259
loss: 1.0437804460525513,grad_norm: 0.9999995374890166, iteration: 322260
loss: 1.001908302307129,grad_norm: 0.7335072038099014, iteration: 322261
loss: 1.0377840995788574,grad_norm: 0.888552490865559, iteration: 322262
loss: 0.9946728944778442,grad_norm: 0.767122681563073, iteration: 322263
loss: 0.9732792377471924,grad_norm: 0.7999084802885411, iteration: 322264
loss: 0.9906489253044128,grad_norm: 0.9999990899992088, iteration: 322265
loss: 1.0021119117736816,grad_norm: 0.9999992329769335, iteration: 322266
loss: 1.0113654136657715,grad_norm: 0.8538736393350587, iteration: 322267
loss: 0.9925031065940857,grad_norm: 0.9364261361624077, iteration: 322268
loss: 0.9907296299934387,grad_norm: 0.7903940659969139, iteration: 322269
loss: 0.9997243285179138,grad_norm: 0.6972491121116953, iteration: 322270
loss: 0.991612434387207,grad_norm: 0.8575411258338032, iteration: 322271
loss: 1.0024585723876953,grad_norm: 0.7916797212026891, iteration: 322272
loss: 1.0180808305740356,grad_norm: 0.9129446851269949, iteration: 322273
loss: 1.0072680711746216,grad_norm: 0.9999990964846691, iteration: 322274
loss: 0.9890440106391907,grad_norm: 0.9252907843784673, iteration: 322275
loss: 0.9500647783279419,grad_norm: 0.8961179635438973, iteration: 322276
loss: 1.0151690244674683,grad_norm: 0.8308868047726667, iteration: 322277
loss: 1.003718376159668,grad_norm: 0.9226505243784976, iteration: 322278
loss: 0.9813968539237976,grad_norm: 0.8598412025295857, iteration: 322279
loss: 1.0271222591400146,grad_norm: 0.9712691474285354, iteration: 322280
loss: 1.0379462242126465,grad_norm: 0.8136703848540782, iteration: 322281
loss: 0.9786508083343506,grad_norm: 0.9999991881153786, iteration: 322282
loss: 1.0081521272659302,grad_norm: 0.898137837993332, iteration: 322283
loss: 1.0466252565383911,grad_norm: 0.9999990781356498, iteration: 322284
loss: 1.0033336877822876,grad_norm: 0.8640574294862174, iteration: 322285
loss: 1.00240957736969,grad_norm: 0.9302932042422853, iteration: 322286
loss: 0.9879111051559448,grad_norm: 0.8283360799886826, iteration: 322287
loss: 0.9963274002075195,grad_norm: 0.999999071705498, iteration: 322288
loss: 0.9766478538513184,grad_norm: 0.9655732148357474, iteration: 322289
loss: 1.004516363143921,grad_norm: 0.8914102291743508, iteration: 322290
loss: 1.0047948360443115,grad_norm: 0.7833367785536226, iteration: 322291
loss: 1.027611255645752,grad_norm: 0.8301498764599256, iteration: 322292
loss: 1.0590095520019531,grad_norm: 0.9999998161934917, iteration: 322293
loss: 0.9987429976463318,grad_norm: 0.8424181564662212, iteration: 322294
loss: 1.0164819955825806,grad_norm: 0.7653223337228133, iteration: 322295
loss: 0.9606162309646606,grad_norm: 0.9673184968048795, iteration: 322296
loss: 0.9970874786376953,grad_norm: 0.9254984646561135, iteration: 322297
loss: 0.9988576173782349,grad_norm: 0.8324700206328163, iteration: 322298
loss: 0.9664899110794067,grad_norm: 0.8803681948893086, iteration: 322299
loss: 0.9993353486061096,grad_norm: 0.8824286600604317, iteration: 322300
loss: 1.086812973022461,grad_norm: 0.8934361505509608, iteration: 322301
loss: 0.9914366602897644,grad_norm: 0.9159433147388586, iteration: 322302
loss: 1.0145100355148315,grad_norm: 0.897846816590585, iteration: 322303
loss: 1.0885024070739746,grad_norm: 0.9999991152818997, iteration: 322304
loss: 0.9982431530952454,grad_norm: 0.8563353089944158, iteration: 322305
loss: 1.0744374990463257,grad_norm: 0.9999992112161715, iteration: 322306
loss: 1.0253019332885742,grad_norm: 0.9999994253750862, iteration: 322307
loss: 1.0140961408615112,grad_norm: 0.820515937668114, iteration: 322308
loss: 0.966076135635376,grad_norm: 0.8003793783844746, iteration: 322309
loss: 0.9905390739440918,grad_norm: 0.8273858102300559, iteration: 322310
loss: 0.9995277523994446,grad_norm: 0.9828534385158779, iteration: 322311
loss: 0.9855919480323792,grad_norm: 0.8463382585338383, iteration: 322312
loss: 1.0629938840866089,grad_norm: 0.8963915971596691, iteration: 322313
loss: 1.007178783416748,grad_norm: 0.9683040242873262, iteration: 322314
loss: 0.9815578460693359,grad_norm: 0.9760450202746975, iteration: 322315
loss: 0.9837430119514465,grad_norm: 0.8422970582672232, iteration: 322316
loss: 0.9918147325515747,grad_norm: 0.8327807957670271, iteration: 322317
loss: 0.9624297022819519,grad_norm: 0.7364275632925036, iteration: 322318
loss: 1.0818589925765991,grad_norm: 0.9999994130153648, iteration: 322319
loss: 1.0937427282333374,grad_norm: 0.9999996431002125, iteration: 322320
loss: 0.9858987927436829,grad_norm: 0.8236533088896933, iteration: 322321
loss: 1.0179193019866943,grad_norm: 0.9999990292321489, iteration: 322322
loss: 0.9985212683677673,grad_norm: 0.9999998528133399, iteration: 322323
loss: 0.9701627492904663,grad_norm: 0.8082120907550786, iteration: 322324
loss: 1.0260415077209473,grad_norm: 0.7987268758007621, iteration: 322325
loss: 1.0086987018585205,grad_norm: 0.8518048408316181, iteration: 322326
loss: 1.0402826070785522,grad_norm: 0.7671392094962379, iteration: 322327
loss: 0.9876051545143127,grad_norm: 0.9910477292147694, iteration: 322328
loss: 0.9628911018371582,grad_norm: 0.8554928909954943, iteration: 322329
loss: 1.0011787414550781,grad_norm: 0.8993776693211605, iteration: 322330
loss: 1.0096255540847778,grad_norm: 0.8944125128613002, iteration: 322331
loss: 1.0157376527786255,grad_norm: 0.9999991866395715, iteration: 322332
loss: 1.0104602575302124,grad_norm: 0.9999992604655408, iteration: 322333
loss: 1.0095055103302002,grad_norm: 0.8378680296731263, iteration: 322334
loss: 1.007365107536316,grad_norm: 0.8366827690800417, iteration: 322335
loss: 1.0016546249389648,grad_norm: 0.8828033369719951, iteration: 322336
loss: 1.0303274393081665,grad_norm: 0.9999992062455855, iteration: 322337
loss: 1.017211675643921,grad_norm: 0.9668317971132724, iteration: 322338
loss: 1.0035446882247925,grad_norm: 0.8980331435853018, iteration: 322339
loss: 0.948337972164154,grad_norm: 0.8509762020001808, iteration: 322340
loss: 1.0070652961730957,grad_norm: 0.9118964216507848, iteration: 322341
loss: 0.9687178134918213,grad_norm: 0.8568174842687674, iteration: 322342
loss: 1.0202279090881348,grad_norm: 0.7573283629608576, iteration: 322343
loss: 0.9811879396438599,grad_norm: 0.9347782814063487, iteration: 322344
loss: 1.003294587135315,grad_norm: 0.8758667599948413, iteration: 322345
loss: 1.063456654548645,grad_norm: 0.9999989654321673, iteration: 322346
loss: 0.9803611636161804,grad_norm: 0.890594805521276, iteration: 322347
loss: 0.976381242275238,grad_norm: 0.8474220292794685, iteration: 322348
loss: 0.9951152801513672,grad_norm: 0.7023161746490686, iteration: 322349
loss: 1.0304898023605347,grad_norm: 0.9999990933926427, iteration: 322350
loss: 1.0054866075515747,grad_norm: 0.999999003024232, iteration: 322351
loss: 0.9821599125862122,grad_norm: 0.9999990347356499, iteration: 322352
loss: 1.0301949977874756,grad_norm: 0.999999113683674, iteration: 322353
loss: 1.0606242418289185,grad_norm: 0.999999326964535, iteration: 322354
loss: 0.9895671606063843,grad_norm: 0.9999991581611417, iteration: 322355
loss: 0.9836096167564392,grad_norm: 0.9124373307911428, iteration: 322356
loss: 0.9767744541168213,grad_norm: 0.9835155889398876, iteration: 322357
loss: 1.040940284729004,grad_norm: 0.9555719901551456, iteration: 322358
loss: 1.0591257810592651,grad_norm: 0.9999990999464918, iteration: 322359
loss: 0.9946827292442322,grad_norm: 0.7914122074572515, iteration: 322360
loss: 0.9581864476203918,grad_norm: 0.8491440018748598, iteration: 322361
loss: 1.0676450729370117,grad_norm: 0.9999992908252647, iteration: 322362
loss: 1.0004632472991943,grad_norm: 0.926143130009455, iteration: 322363
loss: 1.040380597114563,grad_norm: 0.9134543412505466, iteration: 322364
loss: 1.1107221841812134,grad_norm: 0.9999999494262085, iteration: 322365
loss: 0.9928244948387146,grad_norm: 0.974808570646218, iteration: 322366
loss: 1.0874319076538086,grad_norm: 0.8273969297175932, iteration: 322367
loss: 1.019466519355774,grad_norm: 0.8492218938617275, iteration: 322368
loss: 0.9351155161857605,grad_norm: 0.9159227250547055, iteration: 322369
loss: 1.009653091430664,grad_norm: 0.7406836582142924, iteration: 322370
loss: 1.0033825635910034,grad_norm: 0.7541276501253766, iteration: 322371
loss: 0.990179717540741,grad_norm: 0.8885685839703614, iteration: 322372
loss: 1.045779824256897,grad_norm: 0.8464424675944217, iteration: 322373
loss: 1.0109648704528809,grad_norm: 0.8297481580079408, iteration: 322374
loss: 0.9990089535713196,grad_norm: 0.8429177245474145, iteration: 322375
loss: 0.9949836730957031,grad_norm: 0.7460261104223896, iteration: 322376
loss: 1.0178592205047607,grad_norm: 0.8483089898259626, iteration: 322377
loss: 0.9708989262580872,grad_norm: 0.9999993784348856, iteration: 322378
loss: 1.010311484336853,grad_norm: 0.9999991429552559, iteration: 322379
loss: 1.0015075206756592,grad_norm: 0.7941632919928263, iteration: 322380
loss: 0.9786080718040466,grad_norm: 0.9300530913638041, iteration: 322381
loss: 0.9931361675262451,grad_norm: 0.999999035075362, iteration: 322382
loss: 1.0447192192077637,grad_norm: 0.8982035274401748, iteration: 322383
loss: 0.9784913063049316,grad_norm: 0.7912253591613578, iteration: 322384
loss: 1.0016963481903076,grad_norm: 0.7229939270873118, iteration: 322385
loss: 1.0098315477371216,grad_norm: 0.8532576863495984, iteration: 322386
loss: 1.0052660703659058,grad_norm: 0.745541766407519, iteration: 322387
loss: 1.037734866142273,grad_norm: 0.9999998209822103, iteration: 322388
loss: 1.0120935440063477,grad_norm: 0.8261151143973611, iteration: 322389
loss: 0.9979497790336609,grad_norm: 0.7290051320060738, iteration: 322390
loss: 0.9870704412460327,grad_norm: 0.9999991860593034, iteration: 322391
loss: 1.0419542789459229,grad_norm: 0.7118211034089584, iteration: 322392
loss: 1.0342646837234497,grad_norm: 0.8370830673513194, iteration: 322393
loss: 0.9889149069786072,grad_norm: 0.8765712196467494, iteration: 322394
loss: 1.005623698234558,grad_norm: 0.7703185272849477, iteration: 322395
loss: 0.9865126609802246,grad_norm: 0.9999990282421104, iteration: 322396
loss: 0.9832359552383423,grad_norm: 0.832288267644353, iteration: 322397
loss: 0.968746542930603,grad_norm: 0.8363008847098964, iteration: 322398
loss: 1.008103370666504,grad_norm: 0.9999995727415548, iteration: 322399
loss: 0.969090461730957,grad_norm: 0.7016825435252981, iteration: 322400
loss: 0.9675252437591553,grad_norm: 0.8072916573583325, iteration: 322401
loss: 0.9981205463409424,grad_norm: 0.85287776672304, iteration: 322402
loss: 1.0057477951049805,grad_norm: 0.9999991887533575, iteration: 322403
loss: 1.0009065866470337,grad_norm: 0.744144953017298, iteration: 322404
loss: 1.0086232423782349,grad_norm: 0.797448942963508, iteration: 322405
loss: 0.9955692887306213,grad_norm: 0.8372602254291956, iteration: 322406
loss: 0.9892923831939697,grad_norm: 0.7826110084914392, iteration: 322407
loss: 0.9923755526542664,grad_norm: 0.7840716910565017, iteration: 322408
loss: 0.98969566822052,grad_norm: 0.8239278555034703, iteration: 322409
loss: 0.985970139503479,grad_norm: 0.9214266399343518, iteration: 322410
loss: 1.0005725622177124,grad_norm: 0.9780180902029556, iteration: 322411
loss: 0.9842792749404907,grad_norm: 0.8039434483718245, iteration: 322412
loss: 1.017715573310852,grad_norm: 0.9073532853635069, iteration: 322413
loss: 1.0256649255752563,grad_norm: 0.8680728740251739, iteration: 322414
loss: 0.9841340184211731,grad_norm: 0.8277181828647894, iteration: 322415
loss: 1.015956997871399,grad_norm: 0.7764142434696122, iteration: 322416
loss: 1.0403923988342285,grad_norm: 0.887879135595033, iteration: 322417
loss: 1.0929386615753174,grad_norm: 0.9999999410212923, iteration: 322418
loss: 1.0078892707824707,grad_norm: 0.7320669032826524, iteration: 322419
loss: 0.9912655353546143,grad_norm: 0.6858475788326716, iteration: 322420
loss: 1.0049669742584229,grad_norm: 0.9003197783445891, iteration: 322421
loss: 1.0148577690124512,grad_norm: 0.9999994822344549, iteration: 322422
loss: 1.007120966911316,grad_norm: 0.9136108113934126, iteration: 322423
loss: 0.9858956933021545,grad_norm: 0.9346624062784552, iteration: 322424
loss: 0.9985837340354919,grad_norm: 0.9316483876226573, iteration: 322425
loss: 1.0248878002166748,grad_norm: 0.7353751608836118, iteration: 322426
loss: 1.0281500816345215,grad_norm: 0.8640707183332029, iteration: 322427
loss: 0.9523211717605591,grad_norm: 0.8626455803129913, iteration: 322428
loss: 0.9693154096603394,grad_norm: 0.7069021819999438, iteration: 322429
loss: 0.9913111329078674,grad_norm: 0.7834218547064896, iteration: 322430
loss: 1.0334346294403076,grad_norm: 0.8167638163557124, iteration: 322431
loss: 0.976271390914917,grad_norm: 0.8315660047534184, iteration: 322432
loss: 0.9899637699127197,grad_norm: 0.8230809936846776, iteration: 322433
loss: 1.0250025987625122,grad_norm: 0.9999993632658238, iteration: 322434
loss: 0.9978459477424622,grad_norm: 0.7958791601884155, iteration: 322435
loss: 0.9905180335044861,grad_norm: 0.7362546848281404, iteration: 322436
loss: 0.9564854502677917,grad_norm: 0.9968844608801276, iteration: 322437
loss: 0.9941647052764893,grad_norm: 0.8605706530117242, iteration: 322438
loss: 1.000799536705017,grad_norm: 0.7556439340868101, iteration: 322439
loss: 1.0020229816436768,grad_norm: 0.9021111624992244, iteration: 322440
loss: 0.9950171709060669,grad_norm: 0.7269778872234911, iteration: 322441
loss: 0.9863698482513428,grad_norm: 0.8582484899547441, iteration: 322442
loss: 0.9983159303665161,grad_norm: 0.6152063911755895, iteration: 322443
loss: 1.0469146966934204,grad_norm: 0.9918981057489619, iteration: 322444
loss: 1.0408239364624023,grad_norm: 0.9161914931743756, iteration: 322445
loss: 1.0326459407806396,grad_norm: 0.999999665782976, iteration: 322446
loss: 0.9748900532722473,grad_norm: 0.7542122404145377, iteration: 322447
loss: 1.0302270650863647,grad_norm: 0.9999990979434343, iteration: 322448
loss: 1.0001800060272217,grad_norm: 0.8654209179578042, iteration: 322449
loss: 1.0144535303115845,grad_norm: 0.8527874040371322, iteration: 322450
loss: 1.0290907621383667,grad_norm: 0.9999990775644293, iteration: 322451
loss: 0.9746803641319275,grad_norm: 0.8134462790906326, iteration: 322452
loss: 1.0334885120391846,grad_norm: 0.8332941197459187, iteration: 322453
loss: 1.0249147415161133,grad_norm: 0.8051994016703509, iteration: 322454
loss: 0.9767124652862549,grad_norm: 0.8000178473952749, iteration: 322455
loss: 1.0038365125656128,grad_norm: 0.7078854859478395, iteration: 322456
loss: 1.0047311782836914,grad_norm: 0.6917643810838077, iteration: 322457
loss: 0.9775704741477966,grad_norm: 0.802689845951904, iteration: 322458
loss: 0.9941026568412781,grad_norm: 0.8051427199600111, iteration: 322459
loss: 0.9629970788955688,grad_norm: 0.8156779116744612, iteration: 322460
loss: 1.0018476247787476,grad_norm: 0.6580210901275848, iteration: 322461
loss: 1.0259491205215454,grad_norm: 0.7634565162883852, iteration: 322462
loss: 0.9964008331298828,grad_norm: 0.9270758421794184, iteration: 322463
loss: 1.0092799663543701,grad_norm: 0.8427518339965423, iteration: 322464
loss: 1.011291742324829,grad_norm: 0.8915030662136485, iteration: 322465
loss: 0.9898731708526611,grad_norm: 0.8191290452525982, iteration: 322466
loss: 0.980643093585968,grad_norm: 0.9238105316342148, iteration: 322467
loss: 0.9527556896209717,grad_norm: 0.9172758102513064, iteration: 322468
loss: 1.0112472772598267,grad_norm: 0.9926891422285793, iteration: 322469
loss: 1.0070501565933228,grad_norm: 0.7746460051014254, iteration: 322470
loss: 1.02821683883667,grad_norm: 0.7380454536330964, iteration: 322471
loss: 1.0214804410934448,grad_norm: 0.902220368974606, iteration: 322472
loss: 1.0108988285064697,grad_norm: 0.7570432732374421, iteration: 322473
loss: 0.9612110257148743,grad_norm: 0.763053138813157, iteration: 322474
loss: 1.0399587154388428,grad_norm: 0.9999998856933002, iteration: 322475
loss: 1.005628228187561,grad_norm: 0.7310631387407382, iteration: 322476
loss: 1.0170598030090332,grad_norm: 0.8758724843785608, iteration: 322477
loss: 1.000470519065857,grad_norm: 0.7556177270347529, iteration: 322478
loss: 1.0080267190933228,grad_norm: 0.887871830513054, iteration: 322479
loss: 1.0149309635162354,grad_norm: 0.9073832591748704, iteration: 322480
loss: 1.0222851037979126,grad_norm: 0.8721096883173947, iteration: 322481
loss: 0.9925045371055603,grad_norm: 0.7717523790359085, iteration: 322482
loss: 1.0354087352752686,grad_norm: 0.8991110541716314, iteration: 322483
loss: 1.0130130052566528,grad_norm: 0.8339202916003888, iteration: 322484
loss: 0.9893046021461487,grad_norm: 0.7868685631408987, iteration: 322485
loss: 1.001375675201416,grad_norm: 0.8664237046926635, iteration: 322486
loss: 0.9912182688713074,grad_norm: 0.7595428325916994, iteration: 322487
loss: 0.9994624853134155,grad_norm: 0.8390938791734723, iteration: 322488
loss: 0.9956742525100708,grad_norm: 0.9512828285653434, iteration: 322489
loss: 0.9989174604415894,grad_norm: 0.868499715774913, iteration: 322490
loss: 1.0189735889434814,grad_norm: 0.8328950255437685, iteration: 322491
loss: 0.9781777262687683,grad_norm: 0.8395066219744146, iteration: 322492
loss: 0.9721763134002686,grad_norm: 0.8093299431689541, iteration: 322493
loss: 0.9896856546401978,grad_norm: 0.8941089235114926, iteration: 322494
loss: 1.0006102323532104,grad_norm: 0.8309323934806585, iteration: 322495
loss: 1.0240470170974731,grad_norm: 0.8798457431395902, iteration: 322496
loss: 0.9821115136146545,grad_norm: 0.7255599961875713, iteration: 322497
loss: 1.013441562652588,grad_norm: 0.8330946308462088, iteration: 322498
loss: 0.9792047142982483,grad_norm: 0.9422880577081659, iteration: 322499
loss: 1.037974238395691,grad_norm: 0.9905421313245646, iteration: 322500
loss: 1.0166271924972534,grad_norm: 0.7423816002037735, iteration: 322501
loss: 0.95998215675354,grad_norm: 0.9519850545024151, iteration: 322502
loss: 0.9946141839027405,grad_norm: 0.9487473304230208, iteration: 322503
loss: 1.0077005624771118,grad_norm: 0.8426620891755431, iteration: 322504
loss: 0.9617924690246582,grad_norm: 0.7870113789382704, iteration: 322505
loss: 0.9882919192314148,grad_norm: 0.9999994553013988, iteration: 322506
loss: 1.0052870512008667,grad_norm: 0.8064043476448418, iteration: 322507
loss: 1.0270612239837646,grad_norm: 0.7708664651760885, iteration: 322508
loss: 0.9832521677017212,grad_norm: 0.9777140308558293, iteration: 322509
loss: 1.0390229225158691,grad_norm: 0.924367932791412, iteration: 322510
loss: 0.9808830618858337,grad_norm: 0.8241296861243096, iteration: 322511
loss: 0.9982119202613831,grad_norm: 0.7216110081012655, iteration: 322512
loss: 0.9971842169761658,grad_norm: 0.7165454336014664, iteration: 322513
loss: 0.9847646951675415,grad_norm: 0.8107486209999972, iteration: 322514
loss: 1.151484489440918,grad_norm: 0.9999997560771111, iteration: 322515
loss: 0.9739516377449036,grad_norm: 0.7810433444913684, iteration: 322516
loss: 1.0132179260253906,grad_norm: 0.8833520627884924, iteration: 322517
loss: 0.9857798218727112,grad_norm: 0.8504657040238834, iteration: 322518
loss: 0.9603586792945862,grad_norm: 0.8080427389506415, iteration: 322519
loss: 1.0004976987838745,grad_norm: 0.8976182959064881, iteration: 322520
loss: 1.026086688041687,grad_norm: 0.8206468906246135, iteration: 322521
loss: 1.0132625102996826,grad_norm: 0.7119276351343288, iteration: 322522
loss: 1.0051226615905762,grad_norm: 0.813975894332776, iteration: 322523
loss: 0.9885302186012268,grad_norm: 0.846565427392081, iteration: 322524
loss: 1.011804461479187,grad_norm: 0.8098370693654329, iteration: 322525
loss: 1.02492094039917,grad_norm: 0.6392824187163737, iteration: 322526
loss: 0.9989530444145203,grad_norm: 0.9999989570782277, iteration: 322527
loss: 0.9963239431381226,grad_norm: 0.8907824598899231, iteration: 322528
loss: 1.028617024421692,grad_norm: 0.9626125477705337, iteration: 322529
loss: 0.9926635026931763,grad_norm: 0.8476342492230982, iteration: 322530
loss: 0.9721357226371765,grad_norm: 0.9736335856883831, iteration: 322531
loss: 1.0227655172348022,grad_norm: 0.9260633866074673, iteration: 322532
loss: 0.988593339920044,grad_norm: 0.8565778626195979, iteration: 322533
loss: 0.9945265054702759,grad_norm: 0.8657900047468948, iteration: 322534
loss: 0.9771177172660828,grad_norm: 0.6871386199965985, iteration: 322535
loss: 1.003401756286621,grad_norm: 0.9999999437835084, iteration: 322536
loss: 0.9928093552589417,grad_norm: 0.7814548532324813, iteration: 322537
loss: 1.0954608917236328,grad_norm: 0.7802653885697358, iteration: 322538
loss: 0.9791049957275391,grad_norm: 0.7934350786511102, iteration: 322539
loss: 1.0138061046600342,grad_norm: 0.7521951333770914, iteration: 322540
loss: 1.0462901592254639,grad_norm: 0.9999999189730228, iteration: 322541
loss: 0.9812420606613159,grad_norm: 0.9995727687461333, iteration: 322542
loss: 0.979780375957489,grad_norm: 0.9906408183528802, iteration: 322543
loss: 1.006864309310913,grad_norm: 0.8555544588706168, iteration: 322544
loss: 0.9955385327339172,grad_norm: 0.8678765598492654, iteration: 322545
loss: 0.9937067627906799,grad_norm: 0.7722957452528191, iteration: 322546
loss: 1.0119906663894653,grad_norm: 0.849842727159078, iteration: 322547
loss: 1.001659870147705,grad_norm: 0.7486751732017008, iteration: 322548
loss: 0.9735532999038696,grad_norm: 0.9999992498706667, iteration: 322549
loss: 0.9765228629112244,grad_norm: 0.8170413822380467, iteration: 322550
loss: 0.9990667700767517,grad_norm: 0.9999989653450007, iteration: 322551
loss: 1.0029267072677612,grad_norm: 0.8265651186346212, iteration: 322552
loss: 1.0029197931289673,grad_norm: 0.9999991254406156, iteration: 322553
loss: 1.0318952798843384,grad_norm: 0.8373569741476207, iteration: 322554
loss: 1.0033440589904785,grad_norm: 0.8734176820107317, iteration: 322555
loss: 0.9648402333259583,grad_norm: 0.9999997877958058, iteration: 322556
loss: 0.9777311682701111,grad_norm: 0.8082095067865216, iteration: 322557
loss: 1.0194637775421143,grad_norm: 0.6368412528233909, iteration: 322558
loss: 1.0020538568496704,grad_norm: 0.8583239646429474, iteration: 322559
loss: 1.0716097354888916,grad_norm: 0.9999999452515914, iteration: 322560
loss: 1.0275341272354126,grad_norm: 0.8862476033537231, iteration: 322561
loss: 0.9956019520759583,grad_norm: 0.8506352290517805, iteration: 322562
loss: 1.011727213859558,grad_norm: 0.8736410569136148, iteration: 322563
loss: 1.021247386932373,grad_norm: 0.9999988710262199, iteration: 322564
loss: 1.0107156038284302,grad_norm: 0.8094140394029467, iteration: 322565
loss: 1.0053825378417969,grad_norm: 0.8183125241833109, iteration: 322566
loss: 1.0056685209274292,grad_norm: 0.6840228619579863, iteration: 322567
loss: 0.9660163521766663,grad_norm: 0.8114580581278728, iteration: 322568
loss: 1.048714280128479,grad_norm: 0.9162106788553647, iteration: 322569
loss: 1.0146961212158203,grad_norm: 0.741556268601881, iteration: 322570
loss: 1.0064114332199097,grad_norm: 0.8364608601563728, iteration: 322571
loss: 1.025376319885254,grad_norm: 0.9999991465810134, iteration: 322572
loss: 0.9882078766822815,grad_norm: 0.7894545555924257, iteration: 322573
loss: 1.0712692737579346,grad_norm: 0.9999997151786925, iteration: 322574
loss: 1.00287926197052,grad_norm: 0.7966748352209839, iteration: 322575
loss: 1.0297931432724,grad_norm: 0.7501378719445077, iteration: 322576
loss: 0.9918915629386902,grad_norm: 0.9468897705083642, iteration: 322577
loss: 1.008551836013794,grad_norm: 0.9406292511050729, iteration: 322578
loss: 0.9501502513885498,grad_norm: 0.8614059248431536, iteration: 322579
loss: 0.9764596223831177,grad_norm: 0.9510187042950508, iteration: 322580
loss: 0.9924824237823486,grad_norm: 0.9999998251044081, iteration: 322581
loss: 1.0246132612228394,grad_norm: 0.9124417923178776, iteration: 322582
loss: 1.025765061378479,grad_norm: 0.9366960951385257, iteration: 322583
loss: 1.0141667127609253,grad_norm: 0.9133161743904141, iteration: 322584
loss: 0.9948187470436096,grad_norm: 0.8140739136468695, iteration: 322585
loss: 1.0146205425262451,grad_norm: 0.9351444428784509, iteration: 322586
loss: 1.004747986793518,grad_norm: 0.8011657063990122, iteration: 322587
loss: 1.0209616422653198,grad_norm: 0.8776155831807129, iteration: 322588
loss: 1.016047477722168,grad_norm: 0.8958661968668612, iteration: 322589
loss: 0.9893049597740173,grad_norm: 0.739119069697875, iteration: 322590
loss: 0.9895897507667542,grad_norm: 0.7909977713682658, iteration: 322591
loss: 1.0969833135604858,grad_norm: 0.7983045970859586, iteration: 322592
loss: 1.003348708152771,grad_norm: 0.8519966864689043, iteration: 322593
loss: 1.011626958847046,grad_norm: 0.9684930920455344, iteration: 322594
loss: 0.9743462800979614,grad_norm: 0.7790792662098669, iteration: 322595
loss: 0.9597951173782349,grad_norm: 0.8388573461690904, iteration: 322596
loss: 1.0296201705932617,grad_norm: 0.866363099319208, iteration: 322597
loss: 1.0435835123062134,grad_norm: 0.9032614455234496, iteration: 322598
loss: 0.984948456287384,grad_norm: 0.9329174189708083, iteration: 322599
loss: 1.0467827320098877,grad_norm: 0.8812625252483407, iteration: 322600
loss: 0.9882388710975647,grad_norm: 0.9019885282015292, iteration: 322601
loss: 0.9397384524345398,grad_norm: 0.7873294091987002, iteration: 322602
loss: 1.007675051689148,grad_norm: 0.9208003348495422, iteration: 322603
loss: 0.9817862510681152,grad_norm: 0.8188948136827863, iteration: 322604
loss: 0.9605221152305603,grad_norm: 0.8413178517387636, iteration: 322605
loss: 1.0215927362442017,grad_norm: 0.8780208714954475, iteration: 322606
loss: 1.0204284191131592,grad_norm: 0.9999997979198085, iteration: 322607
loss: 0.9964192509651184,grad_norm: 0.7749014657168318, iteration: 322608
loss: 1.0067206621170044,grad_norm: 0.8339075776509708, iteration: 322609
loss: 0.9928355813026428,grad_norm: 0.8886802726576069, iteration: 322610
loss: 1.0201218128204346,grad_norm: 0.9999994827587012, iteration: 322611
loss: 0.963974118232727,grad_norm: 0.8277493063071157, iteration: 322612
loss: 0.9719123840332031,grad_norm: 0.9189979948621009, iteration: 322613
loss: 0.9609441161155701,grad_norm: 0.8421377184624764, iteration: 322614
loss: 1.0269585847854614,grad_norm: 0.791636596337081, iteration: 322615
loss: 1.0035346746444702,grad_norm: 0.8093226975263491, iteration: 322616
loss: 1.0324223041534424,grad_norm: 0.7157326898044195, iteration: 322617
loss: 0.9841249585151672,grad_norm: 0.9514094831303082, iteration: 322618
loss: 0.9766485691070557,grad_norm: 0.9999993988640242, iteration: 322619
loss: 1.0078002214431763,grad_norm: 0.8860677808489407, iteration: 322620
loss: 1.039483904838562,grad_norm: 0.6624854364269509, iteration: 322621
loss: 0.946392297744751,grad_norm: 0.7474460778166399, iteration: 322622
loss: 1.0182067155838013,grad_norm: 0.7452255669713186, iteration: 322623
loss: 1.0070078372955322,grad_norm: 0.8441656206218187, iteration: 322624
loss: 1.0068122148513794,grad_norm: 0.8575912670321362, iteration: 322625
loss: 1.012528419494629,grad_norm: 0.9999989603046827, iteration: 322626
loss: 0.9853801727294922,grad_norm: 0.7875136686967215, iteration: 322627
loss: 1.0335674285888672,grad_norm: 0.9555820822979622, iteration: 322628
loss: 1.010479211807251,grad_norm: 0.7410990800567514, iteration: 322629
loss: 0.9964603781700134,grad_norm: 0.8272679995838521, iteration: 322630
loss: 1.003920555114746,grad_norm: 0.9999991544289919, iteration: 322631
loss: 0.9701147675514221,grad_norm: 0.8547801968440943, iteration: 322632
loss: 0.9880709052085876,grad_norm: 0.7551269190666852, iteration: 322633
loss: 1.0102653503417969,grad_norm: 0.7213536318780154, iteration: 322634
loss: 0.9935781359672546,grad_norm: 0.7355782047485626, iteration: 322635
loss: 1.0644859075546265,grad_norm: 0.9999999653007561, iteration: 322636
loss: 0.9784646034240723,grad_norm: 0.8652144487762824, iteration: 322637
loss: 1.0047663450241089,grad_norm: 0.8185475060777612, iteration: 322638
loss: 0.9959787726402283,grad_norm: 0.9636623463801958, iteration: 322639
loss: 0.9844976663589478,grad_norm: 0.7067800248985556, iteration: 322640
loss: 1.0046412944793701,grad_norm: 0.860787715240261, iteration: 322641
loss: 0.999807596206665,grad_norm: 0.899702184784895, iteration: 322642
loss: 1.0207644701004028,grad_norm: 0.9999991411558502, iteration: 322643
loss: 0.9779789447784424,grad_norm: 0.7414746144138032, iteration: 322644
loss: 1.0255026817321777,grad_norm: 0.8880316083311652, iteration: 322645
loss: 1.0245774984359741,grad_norm: 0.7933007482995331, iteration: 322646
loss: 0.9619187712669373,grad_norm: 0.9877975599977454, iteration: 322647
loss: 1.0015535354614258,grad_norm: 0.8324723268945945, iteration: 322648
loss: 1.0030426979064941,grad_norm: 0.6726723184962989, iteration: 322649
loss: 1.0066323280334473,grad_norm: 0.9999998546878027, iteration: 322650
loss: 0.943528413772583,grad_norm: 0.8230747330728638, iteration: 322651
loss: 0.9900608062744141,grad_norm: 0.8366040926208823, iteration: 322652
loss: 0.9804182052612305,grad_norm: 0.7690885835167671, iteration: 322653
loss: 0.9763686060905457,grad_norm: 0.8309492716664829, iteration: 322654
loss: 0.9723253846168518,grad_norm: 0.9054665435141358, iteration: 322655
loss: 1.0202912092208862,grad_norm: 0.8817995067495777, iteration: 322656
loss: 1.0142455101013184,grad_norm: 0.7901807427633986, iteration: 322657
loss: 0.9727819561958313,grad_norm: 0.8440842677796427, iteration: 322658
loss: 1.0476970672607422,grad_norm: 0.834057932587487, iteration: 322659
loss: 1.0004128217697144,grad_norm: 0.9184320448890378, iteration: 322660
loss: 1.0127723217010498,grad_norm: 0.9616037835574182, iteration: 322661
loss: 0.9848940968513489,grad_norm: 0.7281497412531287, iteration: 322662
loss: 1.1018730401992798,grad_norm: 0.9999993706550776, iteration: 322663
loss: 0.9799501299858093,grad_norm: 0.7797124444483965, iteration: 322664
loss: 1.0000807046890259,grad_norm: 0.7772055211724713, iteration: 322665
loss: 0.9708877205848694,grad_norm: 0.8071465231748611, iteration: 322666
loss: 1.0188689231872559,grad_norm: 0.9895725535114162, iteration: 322667
loss: 1.0145812034606934,grad_norm: 0.7683965660941162, iteration: 322668
loss: 0.989118754863739,grad_norm: 0.8160329342827835, iteration: 322669
loss: 1.0055428743362427,grad_norm: 0.790638688524409, iteration: 322670
loss: 0.994641900062561,grad_norm: 0.8790605501866017, iteration: 322671
loss: 1.0477430820465088,grad_norm: 0.9138234292909315, iteration: 322672
loss: 1.006766438484192,grad_norm: 0.814076368894565, iteration: 322673
loss: 1.005024790763855,grad_norm: 0.9999991906667464, iteration: 322674
loss: 1.0233114957809448,grad_norm: 0.9999991084082817, iteration: 322675
loss: 1.0507731437683105,grad_norm: 0.999999144388106, iteration: 322676
loss: 0.9866059422492981,grad_norm: 0.7425910695311737, iteration: 322677
loss: 1.015750765800476,grad_norm: 0.828749190418892, iteration: 322678
loss: 0.9446809887886047,grad_norm: 0.9922433173998354, iteration: 322679
loss: 1.014469861984253,grad_norm: 0.7408543811475317, iteration: 322680
loss: 1.02683424949646,grad_norm: 0.9376507672491128, iteration: 322681
loss: 0.9766806364059448,grad_norm: 0.7155178771648953, iteration: 322682
loss: 0.9878058433532715,grad_norm: 0.7129776707809402, iteration: 322683
loss: 0.9852993488311768,grad_norm: 0.6517087999932377, iteration: 322684
loss: 0.9924737215042114,grad_norm: 0.6499066287440401, iteration: 322685
loss: 1.0695099830627441,grad_norm: 0.9999992009822334, iteration: 322686
loss: 0.9665009379386902,grad_norm: 0.9999991177519495, iteration: 322687
loss: 0.980402410030365,grad_norm: 0.8209270146945219, iteration: 322688
loss: 1.0132410526275635,grad_norm: 0.7936566991032961, iteration: 322689
loss: 1.0067179203033447,grad_norm: 0.7473126755852466, iteration: 322690
loss: 0.9882551431655884,grad_norm: 0.8334776072192709, iteration: 322691
loss: 1.0055118799209595,grad_norm: 0.9999991801442648, iteration: 322692
loss: 0.9823811054229736,grad_norm: 0.8115132567972324, iteration: 322693
loss: 0.9778858423233032,grad_norm: 0.7761713918315293, iteration: 322694
loss: 1.018618106842041,grad_norm: 0.9999999216888026, iteration: 322695
loss: 0.9939026832580566,grad_norm: 0.7575392786126198, iteration: 322696
loss: 1.0300781726837158,grad_norm: 0.9530681601986661, iteration: 322697
loss: 1.0113781690597534,grad_norm: 0.9212721192336201, iteration: 322698
loss: 1.0095648765563965,grad_norm: 0.7204860648601538, iteration: 322699
loss: 0.986107349395752,grad_norm: 0.7487786631406828, iteration: 322700
loss: 0.980625331401825,grad_norm: 0.8675603790707295, iteration: 322701
loss: 1.006098985671997,grad_norm: 0.9999994438534974, iteration: 322702
loss: 0.9963707327842712,grad_norm: 0.8223925390620149, iteration: 322703
loss: 0.9971774816513062,grad_norm: 0.8423072083134088, iteration: 322704
loss: 0.9787113666534424,grad_norm: 0.7908566073593356, iteration: 322705
loss: 1.0161409378051758,grad_norm: 0.9355217730342212, iteration: 322706
loss: 1.0446739196777344,grad_norm: 0.853514809331462, iteration: 322707
loss: 0.9873540997505188,grad_norm: 0.9988057531225615, iteration: 322708
loss: 1.0308494567871094,grad_norm: 0.9999991558463441, iteration: 322709
loss: 1.0161902904510498,grad_norm: 0.9999991986665144, iteration: 322710
loss: 0.9932242631912231,grad_norm: 0.8698389536517686, iteration: 322711
loss: 1.0103864669799805,grad_norm: 0.7635086429399401, iteration: 322712
loss: 0.959575355052948,grad_norm: 0.9539672264608813, iteration: 322713
loss: 1.0490295886993408,grad_norm: 0.9625323677177392, iteration: 322714
loss: 1.0017526149749756,grad_norm: 0.829142493569467, iteration: 322715
loss: 1.005871295928955,grad_norm: 0.7953768488858906, iteration: 322716
loss: 0.9998313784599304,grad_norm: 0.7379745805216957, iteration: 322717
loss: 1.0985625982284546,grad_norm: 0.999999105262261, iteration: 322718
loss: 0.973201334476471,grad_norm: 0.8266789632450813, iteration: 322719
loss: 1.0477973222732544,grad_norm: 0.999999397818294, iteration: 322720
loss: 1.0810641050338745,grad_norm: 0.9999994002121281, iteration: 322721
loss: 0.9880839586257935,grad_norm: 0.7754810819963289, iteration: 322722
loss: 1.0072846412658691,grad_norm: 0.9422487847841601, iteration: 322723
loss: 1.0133705139160156,grad_norm: 0.9506908880130652, iteration: 322724
loss: 1.0493521690368652,grad_norm: 0.9999993328459809, iteration: 322725
loss: 1.0023095607757568,grad_norm: 0.9999990758693608, iteration: 322726
loss: 1.0266879796981812,grad_norm: 0.8989321430094444, iteration: 322727
loss: 1.0070874691009521,grad_norm: 0.8586007964883394, iteration: 322728
loss: 1.015756368637085,grad_norm: 0.9999991094836997, iteration: 322729
loss: 1.1022472381591797,grad_norm: 0.9618844400486473, iteration: 322730
loss: 1.046239972114563,grad_norm: 0.8184778493604036, iteration: 322731
loss: 0.9546720385551453,grad_norm: 0.7948686176713171, iteration: 322732
loss: 1.0393341779708862,grad_norm: 0.7971661644656934, iteration: 322733
loss: 1.013906717300415,grad_norm: 0.9277000705202111, iteration: 322734
loss: 1.0486189126968384,grad_norm: 0.7727430386586832, iteration: 322735
loss: 1.033812403678894,grad_norm: 0.7297276438958273, iteration: 322736
loss: 1.0675023794174194,grad_norm: 0.9999999658660835, iteration: 322737
loss: 1.0033291578292847,grad_norm: 0.9999996530045536, iteration: 322738
loss: 0.9991071820259094,grad_norm: 0.9949504721747249, iteration: 322739
loss: 1.0078349113464355,grad_norm: 0.9753502130891014, iteration: 322740
loss: 1.0720694065093994,grad_norm: 0.9999991034537957, iteration: 322741
loss: 1.0257806777954102,grad_norm: 0.9999990477452982, iteration: 322742
loss: 0.9439904093742371,grad_norm: 0.8687053551735073, iteration: 322743
loss: 1.042245626449585,grad_norm: 0.999999063171883, iteration: 322744
loss: 1.0909498929977417,grad_norm: 0.9999997179552623, iteration: 322745
loss: 0.96290123462677,grad_norm: 0.7831409602004759, iteration: 322746
loss: 0.9897322654724121,grad_norm: 0.9999998334817306, iteration: 322747
loss: 1.021260380744934,grad_norm: 0.9963861222682571, iteration: 322748
loss: 1.052679419517517,grad_norm: 0.9999999459729184, iteration: 322749
loss: 0.9839633107185364,grad_norm: 0.9264888131168358, iteration: 322750
loss: 1.0826172828674316,grad_norm: 0.999999480940255, iteration: 322751
loss: 0.9850180745124817,grad_norm: 0.8063894965323832, iteration: 322752
loss: 0.984643280506134,grad_norm: 0.8296597372486413, iteration: 322753
loss: 0.9559484124183655,grad_norm: 0.7871503646017364, iteration: 322754
loss: 0.9851318597793579,grad_norm: 0.7895162927693684, iteration: 322755
loss: 0.9771219491958618,grad_norm: 0.8319173524715608, iteration: 322756
loss: 0.9755637645721436,grad_norm: 0.9999992462209492, iteration: 322757
loss: 0.9846274852752686,grad_norm: 0.9704331495059736, iteration: 322758
loss: 0.988203227519989,grad_norm: 0.8087047359797939, iteration: 322759
loss: 1.0027767419815063,grad_norm: 0.7629003797379182, iteration: 322760
loss: 0.9851329922676086,grad_norm: 0.9047607083447796, iteration: 322761
loss: 0.9879883527755737,grad_norm: 0.9396427380818656, iteration: 322762
loss: 1.0050925016403198,grad_norm: 0.9453588966129317, iteration: 322763
loss: 1.0179246664047241,grad_norm: 0.9514995816176641, iteration: 322764
loss: 1.0512104034423828,grad_norm: 0.9999994411901244, iteration: 322765
loss: 0.9983710646629333,grad_norm: 0.8382829208829999, iteration: 322766
loss: 0.9908726215362549,grad_norm: 0.9999990153368539, iteration: 322767
loss: 0.9771836400032043,grad_norm: 0.8607475575139819, iteration: 322768
loss: 1.0687084197998047,grad_norm: 0.7736084516279582, iteration: 322769
loss: 1.043818473815918,grad_norm: 0.993148264736986, iteration: 322770
loss: 1.081134557723999,grad_norm: 0.9872921402394859, iteration: 322771
loss: 1.024340271949768,grad_norm: 0.8625925963265295, iteration: 322772
loss: 0.9941463470458984,grad_norm: 0.7959135938144714, iteration: 322773
loss: 1.0018645524978638,grad_norm: 0.8662243982569027, iteration: 322774
loss: 1.0136593580245972,grad_norm: 0.9999992182057287, iteration: 322775
loss: 1.0086781978607178,grad_norm: 0.9503579068540537, iteration: 322776
loss: 0.9867446422576904,grad_norm: 0.9727829644231516, iteration: 322777
loss: 1.0098949670791626,grad_norm: 0.9999991313649238, iteration: 322778
loss: 1.0198838710784912,grad_norm: 0.8096301979496914, iteration: 322779
loss: 1.0179275274276733,grad_norm: 0.6194223452466237, iteration: 322780
loss: 0.9833812713623047,grad_norm: 0.7540257728459074, iteration: 322781
loss: 1.0068423748016357,grad_norm: 0.9999991048538452, iteration: 322782
loss: 0.9991284012794495,grad_norm: 0.9790891043901391, iteration: 322783
loss: 1.2724955081939697,grad_norm: 0.999999949054524, iteration: 322784
loss: 1.1038486957550049,grad_norm: 0.9999998376359807, iteration: 322785
loss: 1.0142090320587158,grad_norm: 0.7995051907800833, iteration: 322786
loss: 1.018966794013977,grad_norm: 0.8043448164772798, iteration: 322787
loss: 0.9575851559638977,grad_norm: 0.9552984892084215, iteration: 322788
loss: 1.003753662109375,grad_norm: 0.7748365704212261, iteration: 322789
loss: 0.9912692904472351,grad_norm: 0.7157411398228612, iteration: 322790
loss: 1.019932746887207,grad_norm: 0.812767254172582, iteration: 322791
loss: 1.0009444952011108,grad_norm: 0.9999998153045498, iteration: 322792
loss: 1.0318738222122192,grad_norm: 0.8609059453249065, iteration: 322793
loss: 1.001947045326233,grad_norm: 0.8480138491134447, iteration: 322794
loss: 1.012157917022705,grad_norm: 0.9999992157921049, iteration: 322795
loss: 0.9897050261497498,grad_norm: 0.9999996886787595, iteration: 322796
loss: 1.0118844509124756,grad_norm: 0.8967062445325404, iteration: 322797
loss: 0.9501119256019592,grad_norm: 0.8371184972127848, iteration: 322798
loss: 0.9970055818557739,grad_norm: 0.8697966661625587, iteration: 322799
loss: 0.9744555950164795,grad_norm: 0.8590981093696494, iteration: 322800
loss: 0.9907190799713135,grad_norm: 0.9999999826683825, iteration: 322801
loss: 1.0348719358444214,grad_norm: 0.893067175282352, iteration: 322802
loss: 1.044027328491211,grad_norm: 0.9729056045370084, iteration: 322803
loss: 0.9920375347137451,grad_norm: 0.7110954432668156, iteration: 322804
loss: 0.9696592688560486,grad_norm: 0.9653144629209638, iteration: 322805
loss: 1.0171164274215698,grad_norm: 0.8335302835336007, iteration: 322806
loss: 1.0130870342254639,grad_norm: 0.9999991140748691, iteration: 322807
loss: 0.9703210592269897,grad_norm: 0.8653482743491888, iteration: 322808
loss: 0.9955982565879822,grad_norm: 0.9999991809766895, iteration: 322809
loss: 1.004995346069336,grad_norm: 0.8070402331876457, iteration: 322810
loss: 1.0378696918487549,grad_norm: 0.796773015900583, iteration: 322811
loss: 1.0119309425354004,grad_norm: 0.6820675147693924, iteration: 322812
loss: 1.0328933000564575,grad_norm: 0.8724574210069796, iteration: 322813
loss: 1.029914379119873,grad_norm: 0.8251713315620123, iteration: 322814
loss: 1.0476056337356567,grad_norm: 0.8387139413093793, iteration: 322815
loss: 0.9708586931228638,grad_norm: 0.8118350091859006, iteration: 322816
loss: 0.9902828931808472,grad_norm: 0.9999990791585885, iteration: 322817
loss: 1.0620166063308716,grad_norm: 0.9999997013280637, iteration: 322818
loss: 0.9799209833145142,grad_norm: 0.7743545423882554, iteration: 322819
loss: 1.0105913877487183,grad_norm: 0.8553361020320378, iteration: 322820
loss: 0.9968846440315247,grad_norm: 0.7552560906809526, iteration: 322821
loss: 1.0340169668197632,grad_norm: 0.9999991755041947, iteration: 322822
loss: 0.9730591773986816,grad_norm: 0.7713568917299218, iteration: 322823
loss: 1.023612141609192,grad_norm: 0.8412281950015872, iteration: 322824
loss: 1.0213967561721802,grad_norm: 0.9720725776061719, iteration: 322825
loss: 0.9929360747337341,grad_norm: 0.8964680899262465, iteration: 322826
loss: 1.041839599609375,grad_norm: 0.901040215082862, iteration: 322827
loss: 0.9902604818344116,grad_norm: 0.9813700545599717, iteration: 322828
loss: 0.994475781917572,grad_norm: 0.9788135320053846, iteration: 322829
loss: 1.0245879888534546,grad_norm: 0.9999991734811541, iteration: 322830
loss: 1.048690915107727,grad_norm: 0.832457995436915, iteration: 322831
loss: 1.039800763130188,grad_norm: 0.926595692437514, iteration: 322832
loss: 1.0068813562393188,grad_norm: 0.8590951237306879, iteration: 322833
loss: 1.0221118927001953,grad_norm: 0.956050687417485, iteration: 322834
loss: 0.9866073727607727,grad_norm: 0.64466181823732, iteration: 322835
loss: 1.0713932514190674,grad_norm: 0.9999993400275778, iteration: 322836
loss: 0.987821102142334,grad_norm: 0.8220643114249808, iteration: 322837
loss: 0.9764435887336731,grad_norm: 0.8949715487355436, iteration: 322838
loss: 1.0019681453704834,grad_norm: 0.8128677739327731, iteration: 322839
loss: 1.040446400642395,grad_norm: 0.9342700631865465, iteration: 322840
loss: 0.9906449913978577,grad_norm: 0.8499521350243795, iteration: 322841
loss: 1.0797678232192993,grad_norm: 0.9999996106186688, iteration: 322842
loss: 1.0006334781646729,grad_norm: 0.933123116476074, iteration: 322843
loss: 1.097586989402771,grad_norm: 0.999999138698445, iteration: 322844
loss: 0.9556611180305481,grad_norm: 0.7856217791779091, iteration: 322845
loss: 1.0404422283172607,grad_norm: 0.9999998812337925, iteration: 322846
loss: 0.9844239354133606,grad_norm: 0.7994584422913118, iteration: 322847
loss: 1.0230125188827515,grad_norm: 0.9063477683901559, iteration: 322848
loss: 1.015441656112671,grad_norm: 0.9999997954620188, iteration: 322849
loss: 1.0004743337631226,grad_norm: 0.7727850625280026, iteration: 322850
loss: 1.020995855331421,grad_norm: 0.9999989668729271, iteration: 322851
loss: 0.9987126588821411,grad_norm: 0.8907285574656109, iteration: 322852
loss: 1.0415066480636597,grad_norm: 0.9930090271965134, iteration: 322853
loss: 0.9921087026596069,grad_norm: 0.8337020235949393, iteration: 322854
loss: 1.0257108211517334,grad_norm: 0.8225745403523141, iteration: 322855
loss: 0.9960947632789612,grad_norm: 0.7390671092940447, iteration: 322856
loss: 1.0034881830215454,grad_norm: 0.9443136706257922, iteration: 322857
loss: 0.9519902467727661,grad_norm: 0.7972260603396565, iteration: 322858
loss: 0.994778573513031,grad_norm: 0.8980313150965932, iteration: 322859
loss: 1.0360335111618042,grad_norm: 0.9999989644115844, iteration: 322860
loss: 1.0054763555526733,grad_norm: 0.8210842950532679, iteration: 322861
loss: 0.9528374075889587,grad_norm: 0.8299612724489693, iteration: 322862
loss: 1.0476024150848389,grad_norm: 0.9738991708316928, iteration: 322863
loss: 1.0001410245895386,grad_norm: 0.8586279481232723, iteration: 322864
loss: 1.0447767972946167,grad_norm: 0.8939417247782331, iteration: 322865
loss: 1.0214593410491943,grad_norm: 0.9999996123057717, iteration: 322866
loss: 1.0212310552597046,grad_norm: 0.8265094885498478, iteration: 322867
loss: 0.9905096292495728,grad_norm: 0.7170786532359855, iteration: 322868
loss: 1.0019242763519287,grad_norm: 0.9596216551305473, iteration: 322869
loss: 1.0366811752319336,grad_norm: 0.9322254913616185, iteration: 322870
loss: 0.9978518486022949,grad_norm: 0.8551455454910882, iteration: 322871
loss: 1.032194972038269,grad_norm: 0.9905681981054593, iteration: 322872
loss: 1.0367351770401,grad_norm: 0.9999998832330599, iteration: 322873
loss: 1.0023856163024902,grad_norm: 0.9916659425143487, iteration: 322874
loss: 0.9928454756736755,grad_norm: 0.9186777758274097, iteration: 322875
loss: 0.9602952599525452,grad_norm: 0.7530180563587938, iteration: 322876
loss: 1.0405144691467285,grad_norm: 0.9999991143022422, iteration: 322877
loss: 1.015362024307251,grad_norm: 0.7962877891356798, iteration: 322878
loss: 0.9825189113616943,grad_norm: 0.8279856349301564, iteration: 322879
loss: 1.0468686819076538,grad_norm: 0.9999991619224675, iteration: 322880
loss: 0.9866237044334412,grad_norm: 0.7940275911517634, iteration: 322881
loss: 1.0342916250228882,grad_norm: 0.999999559015827, iteration: 322882
loss: 1.014993667602539,grad_norm: 0.8529048198831637, iteration: 322883
loss: 1.0176091194152832,grad_norm: 0.7659444971391687, iteration: 322884
loss: 1.0117595195770264,grad_norm: 0.8692713151373901, iteration: 322885
loss: 1.0166398286819458,grad_norm: 0.9362970962465914, iteration: 322886
loss: 1.0168428421020508,grad_norm: 0.7743791258632114, iteration: 322887
loss: 0.9861733317375183,grad_norm: 0.7622872903626354, iteration: 322888
loss: 1.135425090789795,grad_norm: 0.9982633446528253, iteration: 322889
loss: 1.0140353441238403,grad_norm: 0.8132487311019151, iteration: 322890
loss: 1.0560095310211182,grad_norm: 0.9918074919032794, iteration: 322891
loss: 0.993202805519104,grad_norm: 0.814633805932888, iteration: 322892
loss: 0.9987529516220093,grad_norm: 0.7854018078902031, iteration: 322893
loss: 0.9803480505943298,grad_norm: 0.9075249760161037, iteration: 322894
loss: 1.0023506879806519,grad_norm: 0.8484704844808593, iteration: 322895
loss: 1.0010569095611572,grad_norm: 0.8902354373162399, iteration: 322896
loss: 1.0191680192947388,grad_norm: 0.9604820438613144, iteration: 322897
loss: 1.1030076742172241,grad_norm: 0.9999990932477902, iteration: 322898
loss: 0.9910732507705688,grad_norm: 0.6403277072503399, iteration: 322899
loss: 0.9829146862030029,grad_norm: 0.9576584783713643, iteration: 322900
loss: 0.9913901686668396,grad_norm: 0.9065073045488393, iteration: 322901
loss: 1.0042909383773804,grad_norm: 0.7474495246368994, iteration: 322902
loss: 0.997532308101654,grad_norm: 0.7961546749740105, iteration: 322903
loss: 1.0081541538238525,grad_norm: 0.9999990694158791, iteration: 322904
loss: 1.023011565208435,grad_norm: 0.8902308135923619, iteration: 322905
loss: 0.9801806807518005,grad_norm: 0.9101162357132301, iteration: 322906
loss: 1.0261142253875732,grad_norm: 0.9882636972462187, iteration: 322907
loss: 1.1047672033309937,grad_norm: 0.9999996885232254, iteration: 322908
loss: 1.0200790166854858,grad_norm: 0.8725792765299712, iteration: 322909
loss: 1.0545605421066284,grad_norm: 0.9795060672785605, iteration: 322910
loss: 1.0109738111495972,grad_norm: 0.9999995674849904, iteration: 322911
loss: 1.0090258121490479,grad_norm: 0.7195395465538446, iteration: 322912
loss: 0.9900962114334106,grad_norm: 0.9347159041994964, iteration: 322913
loss: 1.0912781953811646,grad_norm: 1.0000000064882486, iteration: 322914
loss: 1.0246894359588623,grad_norm: 0.9215225228936276, iteration: 322915
loss: 0.9831601977348328,grad_norm: 0.8874650198487772, iteration: 322916
loss: 0.9676665663719177,grad_norm: 0.8756031099214795, iteration: 322917
loss: 1.0186265707015991,grad_norm: 0.9999991497127456, iteration: 322918
loss: 1.0271389484405518,grad_norm: 0.9999990880548006, iteration: 322919
loss: 1.0254145860671997,grad_norm: 0.8822513439073205, iteration: 322920
loss: 0.9909604787826538,grad_norm: 0.8404502382952883, iteration: 322921
loss: 1.0269848108291626,grad_norm: 0.7637604709688753, iteration: 322922
loss: 0.997600257396698,grad_norm: 0.8584522948347141, iteration: 322923
loss: 1.0511736869812012,grad_norm: 0.9999998011267668, iteration: 322924
loss: 1.028153657913208,grad_norm: 0.8760583066504541, iteration: 322925
loss: 0.9473409652709961,grad_norm: 0.8553292739775404, iteration: 322926
loss: 0.9882305264472961,grad_norm: 0.7731835075338345, iteration: 322927
loss: 1.0145866870880127,grad_norm: 0.8774168913067065, iteration: 322928
loss: 1.0225245952606201,grad_norm: 0.7853480810174732, iteration: 322929
loss: 1.006116509437561,grad_norm: 0.82577893862233, iteration: 322930
loss: 0.9929622411727905,grad_norm: 0.999999033027919, iteration: 322931
loss: 1.0067578554153442,grad_norm: 0.9707644610727499, iteration: 322932
loss: 0.9965943694114685,grad_norm: 0.99999896752945, iteration: 322933
loss: 0.9824916124343872,grad_norm: 0.831823150941549, iteration: 322934
loss: 0.972866952419281,grad_norm: 0.9377534648571694, iteration: 322935
loss: 1.007494330406189,grad_norm: 0.8888859020564417, iteration: 322936
loss: 1.0261718034744263,grad_norm: 0.8597905867193636, iteration: 322937
loss: 0.9836205244064331,grad_norm: 0.9530737967113171, iteration: 322938
loss: 0.9366917610168457,grad_norm: 0.9999989787884049, iteration: 322939
loss: 1.0266505479812622,grad_norm: 0.7893254281334956, iteration: 322940
loss: 0.983232319355011,grad_norm: 0.94886293468803, iteration: 322941
loss: 0.9680317640304565,grad_norm: 0.8326821496946304, iteration: 322942
loss: 1.080113410949707,grad_norm: 0.9999991429846526, iteration: 322943
loss: 1.0003381967544556,grad_norm: 0.9999992256897269, iteration: 322944
loss: 1.0189560651779175,grad_norm: 0.999999747603708, iteration: 322945
loss: 1.0007379055023193,grad_norm: 0.8571957883353086, iteration: 322946
loss: 1.0043562650680542,grad_norm: 0.7430735791122562, iteration: 322947
loss: 1.030507206916809,grad_norm: 0.9999999775025237, iteration: 322948
loss: 0.9950850605964661,grad_norm: 0.7682856719577257, iteration: 322949
loss: 0.9866594672203064,grad_norm: 0.7927882958830098, iteration: 322950
loss: 0.9922772645950317,grad_norm: 0.7858671645513889, iteration: 322951
loss: 1.001901388168335,grad_norm: 0.8557533246091242, iteration: 322952
loss: 1.0166420936584473,grad_norm: 0.7950579350073331, iteration: 322953
loss: 0.9794884920120239,grad_norm: 0.994931157802783, iteration: 322954
loss: 0.9870153069496155,grad_norm: 0.7958140462214519, iteration: 322955
loss: 1.0715829133987427,grad_norm: 0.9999994741965604, iteration: 322956
loss: 1.0132073163986206,grad_norm: 0.9863227554026313, iteration: 322957
loss: 1.0403341054916382,grad_norm: 0.9999993723183128, iteration: 322958
loss: 0.9990136027336121,grad_norm: 0.999999139561308, iteration: 322959
loss: 1.0279755592346191,grad_norm: 0.9999992840979058, iteration: 322960
loss: 1.0465185642242432,grad_norm: 0.7373858749834151, iteration: 322961
loss: 1.0821070671081543,grad_norm: 0.9999998930431407, iteration: 322962
loss: 0.9855681657791138,grad_norm: 0.8121646594704108, iteration: 322963
loss: 0.9947649240493774,grad_norm: 0.8392933717894948, iteration: 322964
loss: 0.9641968607902527,grad_norm: 0.8433046951756602, iteration: 322965
loss: 0.9930992722511292,grad_norm: 0.9999990989538907, iteration: 322966
loss: 1.0239850282669067,grad_norm: 0.8069884299503192, iteration: 322967
loss: 1.097785472869873,grad_norm: 0.9999997727515881, iteration: 322968
loss: 1.0210151672363281,grad_norm: 0.8056084636473326, iteration: 322969
loss: 0.9846326112747192,grad_norm: 0.9999998853310512, iteration: 322970
loss: 0.9670799374580383,grad_norm: 0.8617527390634963, iteration: 322971
loss: 1.003910779953003,grad_norm: 0.8034053106239194, iteration: 322972
loss: 0.9962913393974304,grad_norm: 0.8084726651291826, iteration: 322973
loss: 1.0020700693130493,grad_norm: 0.7914549888370264, iteration: 322974
loss: 0.9996857047080994,grad_norm: 0.7681874763037888, iteration: 322975
loss: 0.9828624725341797,grad_norm: 0.7301666997615496, iteration: 322976
loss: 1.0124543905258179,grad_norm: 0.7946371258244003, iteration: 322977
loss: 0.9710167050361633,grad_norm: 0.8797424524107669, iteration: 322978
loss: 1.045040488243103,grad_norm: 0.7067771006868268, iteration: 322979
loss: 0.9989664554595947,grad_norm: 0.786469077279335, iteration: 322980
loss: 0.9998975992202759,grad_norm: 0.9119008462082046, iteration: 322981
loss: 1.0487747192382812,grad_norm: 0.9999995690392949, iteration: 322982
loss: 0.9442809820175171,grad_norm: 0.9897083816564399, iteration: 322983
loss: 0.9930487275123596,grad_norm: 0.7888534258863144, iteration: 322984
loss: 0.9992851614952087,grad_norm: 0.9034689134147883, iteration: 322985
loss: 1.0160399675369263,grad_norm: 0.9412957588464027, iteration: 322986
loss: 0.947140634059906,grad_norm: 0.8106093792081047, iteration: 322987
loss: 0.9935938715934753,grad_norm: 0.9065919181708758, iteration: 322988
loss: 1.0138301849365234,grad_norm: 0.8624940482347367, iteration: 322989
loss: 1.0045615434646606,grad_norm: 0.6829799990512175, iteration: 322990
loss: 0.9970812201499939,grad_norm: 0.8891092473594435, iteration: 322991
loss: 0.9781999588012695,grad_norm: 0.95015753468793, iteration: 322992
loss: 0.9930919408798218,grad_norm: 0.7826564148055231, iteration: 322993
loss: 1.0011875629425049,grad_norm: 0.8834621069400383, iteration: 322994
loss: 0.9735313057899475,grad_norm: 0.9351250779732271, iteration: 322995
loss: 0.9895067811012268,grad_norm: 0.8287663345200634, iteration: 322996
loss: 1.0236237049102783,grad_norm: 0.9949736171153216, iteration: 322997
loss: 0.943975031375885,grad_norm: 0.71520069663768, iteration: 322998
loss: 0.9894627928733826,grad_norm: 0.7883211442846907, iteration: 322999
loss: 1.0220749378204346,grad_norm: 0.9044058675921424, iteration: 323000
loss: 0.9594536423683167,grad_norm: 0.7935569782084281, iteration: 323001
loss: 0.9999527335166931,grad_norm: 0.8679911409716646, iteration: 323002
loss: 0.95684415102005,grad_norm: 0.9999996972889128, iteration: 323003
loss: 1.022735357284546,grad_norm: 0.8926947872670143, iteration: 323004
loss: 1.0206100940704346,grad_norm: 0.7925957575964082, iteration: 323005
loss: 0.9774734973907471,grad_norm: 0.9610716294706014, iteration: 323006
loss: 1.0023714303970337,grad_norm: 0.8228327292066143, iteration: 323007
loss: 1.0191315412521362,grad_norm: 0.743286873776323, iteration: 323008
loss: 0.9970622658729553,grad_norm: 0.7747250412513852, iteration: 323009
loss: 1.0056605339050293,grad_norm: 0.8862922068807872, iteration: 323010
loss: 1.0254731178283691,grad_norm: 0.8038079912399945, iteration: 323011
loss: 0.9827134609222412,grad_norm: 0.7936175763843543, iteration: 323012
loss: 0.9672204852104187,grad_norm: 0.7898215343088003, iteration: 323013
loss: 1.0059677362442017,grad_norm: 0.8411046400535568, iteration: 323014
loss: 0.9743453860282898,grad_norm: 0.7687526692010449, iteration: 323015
loss: 1.0081056356430054,grad_norm: 0.8632729843984935, iteration: 323016
loss: 0.9911397099494934,grad_norm: 0.9153007516542174, iteration: 323017
loss: 1.0377211570739746,grad_norm: 0.7283482188625268, iteration: 323018
loss: 1.008863925933838,grad_norm: 0.9666248454674621, iteration: 323019
loss: 1.0207427740097046,grad_norm: 0.8530389857572019, iteration: 323020
loss: 1.0058588981628418,grad_norm: 0.7076149080852919, iteration: 323021
loss: 0.9940946102142334,grad_norm: 0.8074216749889954, iteration: 323022
loss: 0.9754647016525269,grad_norm: 0.999999112881161, iteration: 323023
loss: 1.1189203262329102,grad_norm: 0.9999997479770079, iteration: 323024
loss: 0.979264497756958,grad_norm: 0.8015860197375049, iteration: 323025
loss: 0.9971827268600464,grad_norm: 0.9665194921888464, iteration: 323026
loss: 0.9733625650405884,grad_norm: 0.9139044267769756, iteration: 323027
loss: 1.0137590169906616,grad_norm: 0.8606077758676547, iteration: 323028
loss: 0.9787795543670654,grad_norm: 0.7908141436879592, iteration: 323029
loss: 1.0379612445831299,grad_norm: 0.910827818790099, iteration: 323030
loss: 0.9989583492279053,grad_norm: 0.8893216586180624, iteration: 323031
loss: 1.026680588722229,grad_norm: 0.8308063912153083, iteration: 323032
loss: 1.0120099782943726,grad_norm: 0.9245208601063116, iteration: 323033
loss: 0.9963453412055969,grad_norm: 0.7739251578845867, iteration: 323034
loss: 1.001974105834961,grad_norm: 0.8544962534335724, iteration: 323035
loss: 1.0137863159179688,grad_norm: 0.8235209818309865, iteration: 323036
loss: 1.024371862411499,grad_norm: 0.8002805570744266, iteration: 323037
loss: 0.9789347648620605,grad_norm: 0.9345267629434705, iteration: 323038
loss: 0.9802965521812439,grad_norm: 0.7415776811370848, iteration: 323039
loss: 1.0058879852294922,grad_norm: 0.835669335344107, iteration: 323040
loss: 0.9895380139350891,grad_norm: 0.873867340550201, iteration: 323041
loss: 0.9635552763938904,grad_norm: 0.8115918564261168, iteration: 323042
loss: 1.0203348398208618,grad_norm: 0.9541495893611011, iteration: 323043
loss: 0.963143527507782,grad_norm: 0.780035628887693, iteration: 323044
loss: 1.0242892503738403,grad_norm: 0.8161564127586515, iteration: 323045
loss: 0.965025782585144,grad_norm: 0.7800255651957456, iteration: 323046
loss: 0.9944897890090942,grad_norm: 0.8280717054053057, iteration: 323047
loss: 1.0014172792434692,grad_norm: 0.8403804654646352, iteration: 323048
loss: 0.9883185029029846,grad_norm: 0.8571608701546057, iteration: 323049
loss: 0.9578539133071899,grad_norm: 0.9154640805647614, iteration: 323050
loss: 0.9905276894569397,grad_norm: 0.9999991377997748, iteration: 323051
loss: 0.9607582688331604,grad_norm: 0.9200708477560393, iteration: 323052
loss: 0.988591194152832,grad_norm: 0.9999991669359851, iteration: 323053
loss: 0.9961959719657898,grad_norm: 0.7161764738115532, iteration: 323054
loss: 0.9910716414451599,grad_norm: 0.8090132627566488, iteration: 323055
loss: 1.0072752237319946,grad_norm: 0.9054696587976938, iteration: 323056
loss: 1.007637858390808,grad_norm: 0.7773319816305326, iteration: 323057
loss: 0.9807139039039612,grad_norm: 0.7889625528600792, iteration: 323058
loss: 1.01656973361969,grad_norm: 0.9317443736325839, iteration: 323059
loss: 1.0204106569290161,grad_norm: 0.8789225836244012, iteration: 323060
loss: 0.979228675365448,grad_norm: 0.8937787185620365, iteration: 323061
loss: 1.0595906972885132,grad_norm: 0.9999993142853612, iteration: 323062
loss: 1.0699214935302734,grad_norm: 0.8963319669713573, iteration: 323063
loss: 1.0082608461380005,grad_norm: 0.8007723427187757, iteration: 323064
loss: 1.0045413970947266,grad_norm: 0.7708464640021877, iteration: 323065
loss: 1.028295636177063,grad_norm: 0.8832733533860706, iteration: 323066
loss: 0.9885916113853455,grad_norm: 0.9898522725251173, iteration: 323067
loss: 1.0317530632019043,grad_norm: 0.8764405153814201, iteration: 323068
loss: 1.018998384475708,grad_norm: 0.8087783062931938, iteration: 323069
loss: 1.0136375427246094,grad_norm: 0.8600803086220865, iteration: 323070
loss: 1.0276328325271606,grad_norm: 0.8915815444461536, iteration: 323071
loss: 0.9574471712112427,grad_norm: 0.956926441582298, iteration: 323072
loss: 0.992380678653717,grad_norm: 0.8387453851089889, iteration: 323073
loss: 1.0111005306243896,grad_norm: 0.7620086513413724, iteration: 323074
loss: 1.0159215927124023,grad_norm: 0.793446580070341, iteration: 323075
loss: 1.0393823385238647,grad_norm: 0.9999998768286391, iteration: 323076
loss: 0.9833477139472961,grad_norm: 0.7915718386589603, iteration: 323077
loss: 1.0104968547821045,grad_norm: 0.9999991465901942, iteration: 323078
loss: 1.0228923559188843,grad_norm: 0.7637289496530537, iteration: 323079
loss: 0.9958801865577698,grad_norm: 0.7373918841731159, iteration: 323080
loss: 0.9903966188430786,grad_norm: 0.9166285325520241, iteration: 323081
loss: 1.0349382162094116,grad_norm: 0.9561499914698065, iteration: 323082
loss: 1.0374460220336914,grad_norm: 0.8233119364825126, iteration: 323083
loss: 1.0124666690826416,grad_norm: 0.8074717066833739, iteration: 323084
loss: 1.0135440826416016,grad_norm: 0.8082107687813166, iteration: 323085
loss: 1.054531216621399,grad_norm: 0.9999996552793752, iteration: 323086
loss: 1.0327833890914917,grad_norm: 0.999999767097815, iteration: 323087
loss: 1.0918622016906738,grad_norm: 0.9831011874892898, iteration: 323088
loss: 1.0497101545333862,grad_norm: 0.8815374890568036, iteration: 323089
loss: 0.9843437075614929,grad_norm: 0.7783198475601014, iteration: 323090
loss: 0.9614278078079224,grad_norm: 0.9594595865049652, iteration: 323091
loss: 0.9902249574661255,grad_norm: 0.8345388661437437, iteration: 323092
loss: 0.9690614342689514,grad_norm: 0.9669537660167732, iteration: 323093
loss: 0.9939362406730652,grad_norm: 0.8464390470271899, iteration: 323094
loss: 0.9882822036743164,grad_norm: 0.8655960452821345, iteration: 323095
loss: 1.0104870796203613,grad_norm: 0.8227319244882558, iteration: 323096
loss: 0.9946210980415344,grad_norm: 0.7690544531280874, iteration: 323097
loss: 1.0456125736236572,grad_norm: 0.7380362952474339, iteration: 323098
loss: 0.9855921268463135,grad_norm: 0.8510720135996893, iteration: 323099
loss: 1.0161583423614502,grad_norm: 0.8485464240758867, iteration: 323100
loss: 1.0354857444763184,grad_norm: 0.779777027843947, iteration: 323101
loss: 1.0520113706588745,grad_norm: 0.999999575721978, iteration: 323102
loss: 0.985076904296875,grad_norm: 0.7740821440941754, iteration: 323103
loss: 0.9851043820381165,grad_norm: 0.8702663852681147, iteration: 323104
loss: 1.0245187282562256,grad_norm: 0.999999893363502, iteration: 323105
loss: 0.9965428113937378,grad_norm: 0.8832048800990873, iteration: 323106
loss: 0.9696714878082275,grad_norm: 0.8663627933093794, iteration: 323107
loss: 1.018314242362976,grad_norm: 0.9730485990047122, iteration: 323108
loss: 1.0411441326141357,grad_norm: 0.7533403363891533, iteration: 323109
loss: 0.9858540296554565,grad_norm: 0.9999989063063796, iteration: 323110
loss: 1.0193027257919312,grad_norm: 0.8339667091904203, iteration: 323111
loss: 1.0268113613128662,grad_norm: 0.851665380366843, iteration: 323112
loss: 0.9682654738426208,grad_norm: 0.7387075323055269, iteration: 323113
loss: 1.0129605531692505,grad_norm: 0.9487101930871177, iteration: 323114
loss: 1.0109455585479736,grad_norm: 0.8997387095875025, iteration: 323115
loss: 0.9825189113616943,grad_norm: 0.7886997717864264, iteration: 323116
loss: 0.991854190826416,grad_norm: 0.7379600123973562, iteration: 323117
loss: 0.9999781847000122,grad_norm: 0.7724472559498299, iteration: 323118
loss: 1.0150781869888306,grad_norm: 0.7333923730216886, iteration: 323119
loss: 0.9787808060646057,grad_norm: 0.7828045328414333, iteration: 323120
loss: 1.0408180952072144,grad_norm: 0.9148673220866962, iteration: 323121
loss: 1.0180193185806274,grad_norm: 0.8421902208299986, iteration: 323122
loss: 1.0250067710876465,grad_norm: 0.9999993026044932, iteration: 323123
loss: 0.9874700903892517,grad_norm: 0.8282945098486081, iteration: 323124
loss: 1.0534989833831787,grad_norm: 0.9999990296667859, iteration: 323125
loss: 1.0218065977096558,grad_norm: 0.7967847892032633, iteration: 323126
loss: 1.028619647026062,grad_norm: 0.999999005660114, iteration: 323127
loss: 1.0650492906570435,grad_norm: 0.9999992206293594, iteration: 323128
loss: 0.9781156182289124,grad_norm: 0.7853399145896274, iteration: 323129
loss: 1.0019069910049438,grad_norm: 0.898686694616528, iteration: 323130
loss: 1.0178197622299194,grad_norm: 0.999999160656115, iteration: 323131
loss: 1.039499044418335,grad_norm: 0.8437567868993635, iteration: 323132
loss: 1.0064948797225952,grad_norm: 0.7322125390596081, iteration: 323133
loss: 1.0066784620285034,grad_norm: 0.7433388697112709, iteration: 323134
loss: 1.0263155698776245,grad_norm: 0.9999992883650692, iteration: 323135
loss: 0.9623088836669922,grad_norm: 0.9999991859450753, iteration: 323136
loss: 0.9863348603248596,grad_norm: 0.9205761177748373, iteration: 323137
loss: 1.0850616693496704,grad_norm: 0.9999995333392933, iteration: 323138
loss: 1.0818219184875488,grad_norm: 0.9999990618535193, iteration: 323139
loss: 1.0735867023468018,grad_norm: 0.985350972850911, iteration: 323140
loss: 0.9924743175506592,grad_norm: 0.9999990902987189, iteration: 323141
loss: 1.007292628288269,grad_norm: 0.9999995535103496, iteration: 323142
loss: 0.9736316800117493,grad_norm: 0.8436915891427105, iteration: 323143
loss: 0.9759014248847961,grad_norm: 0.7178554265815478, iteration: 323144
loss: 0.9961692690849304,grad_norm: 0.9999999243923464, iteration: 323145
loss: 0.9691574573516846,grad_norm: 0.976824481562062, iteration: 323146
loss: 1.0115636587142944,grad_norm: 0.8263943002369695, iteration: 323147
loss: 0.9974033832550049,grad_norm: 0.9999995634133189, iteration: 323148
loss: 0.9847500920295715,grad_norm: 0.9999990539345364, iteration: 323149
loss: 1.029606819152832,grad_norm: 0.844769708064167, iteration: 323150
loss: 1.079374074935913,grad_norm: 0.9999996510098896, iteration: 323151
loss: 1.014351487159729,grad_norm: 0.8178091285843553, iteration: 323152
loss: 0.9991098046302795,grad_norm: 0.9255134440879501, iteration: 323153
loss: 0.9825461506843567,grad_norm: 0.6940798592810745, iteration: 323154
loss: 0.977537214756012,grad_norm: 0.8789387780732346, iteration: 323155
loss: 1.0059309005737305,grad_norm: 0.9999992559277238, iteration: 323156
loss: 1.0195175409317017,grad_norm: 0.858829153643984, iteration: 323157
loss: 1.0313303470611572,grad_norm: 0.9999992907897957, iteration: 323158
loss: 1.0286579132080078,grad_norm: 0.871929523349037, iteration: 323159
loss: 0.971638023853302,grad_norm: 0.8087361054280876, iteration: 323160
loss: 0.9852869510650635,grad_norm: 0.9020369768815336, iteration: 323161
loss: 0.9598231911659241,grad_norm: 0.874994738504341, iteration: 323162
loss: 1.0304590463638306,grad_norm: 0.872055845899168, iteration: 323163
loss: 1.0047824382781982,grad_norm: 0.9226704768639659, iteration: 323164
loss: 0.9817032814025879,grad_norm: 0.9999991983231916, iteration: 323165
loss: 1.0231070518493652,grad_norm: 0.8531279367798524, iteration: 323166
loss: 1.0445419549942017,grad_norm: 0.9999991792251732, iteration: 323167
loss: 1.0105003118515015,grad_norm: 0.9284226198877279, iteration: 323168
loss: 1.033211588859558,grad_norm: 0.9999990819598953, iteration: 323169
loss: 1.0062980651855469,grad_norm: 0.8314704596482333, iteration: 323170
loss: 0.9910391569137573,grad_norm: 0.841111199454126, iteration: 323171
loss: 1.0683221817016602,grad_norm: 0.9548389893625177, iteration: 323172
loss: 0.981765866279602,grad_norm: 0.9999994411168056, iteration: 323173
loss: 1.0024418830871582,grad_norm: 0.8572868377899927, iteration: 323174
loss: 1.049480676651001,grad_norm: 0.9999996020171013, iteration: 323175
loss: 1.0286561250686646,grad_norm: 0.9999994775032919, iteration: 323176
loss: 1.012419581413269,grad_norm: 0.9429575241445575, iteration: 323177
loss: 1.0202351808547974,grad_norm: 0.9999990785351069, iteration: 323178
loss: 0.9543989300727844,grad_norm: 0.7629221600421852, iteration: 323179
loss: 1.017374873161316,grad_norm: 0.6949697337525881, iteration: 323180
loss: 0.9837150573730469,grad_norm: 0.8044002507194289, iteration: 323181
loss: 0.9705146551132202,grad_norm: 0.7962220823548153, iteration: 323182
loss: 1.0371513366699219,grad_norm: 0.9999991167226281, iteration: 323183
loss: 1.001912236213684,grad_norm: 0.8454858766156522, iteration: 323184
loss: 0.9519442319869995,grad_norm: 0.9725768460155845, iteration: 323185
loss: 0.9920710325241089,grad_norm: 0.8897944371072194, iteration: 323186
loss: 1.021291732788086,grad_norm: 0.8605971303663077, iteration: 323187
loss: 0.9649897813796997,grad_norm: 0.9999996026741813, iteration: 323188
loss: 1.022425889968872,grad_norm: 0.9999991537268007, iteration: 323189
loss: 1.0416595935821533,grad_norm: 0.9999990309853707, iteration: 323190
loss: 0.9749226570129395,grad_norm: 0.8832007940897931, iteration: 323191
loss: 0.9932180047035217,grad_norm: 0.8197808072914041, iteration: 323192
loss: 0.96534264087677,grad_norm: 0.8248884351460212, iteration: 323193
loss: 1.0049537420272827,grad_norm: 0.8332918597316669, iteration: 323194
loss: 1.0183905363082886,grad_norm: 0.8168362369680724, iteration: 323195
loss: 1.0048246383666992,grad_norm: 0.8082900406123282, iteration: 323196
loss: 1.035738468170166,grad_norm: 0.9082424799326856, iteration: 323197
loss: 0.9909741878509521,grad_norm: 0.8809701824245046, iteration: 323198
loss: 1.0172909498214722,grad_norm: 0.9249733639887637, iteration: 323199
loss: 1.00482976436615,grad_norm: 0.802502791148952, iteration: 323200
loss: 0.9907962083816528,grad_norm: 0.7449524348198114, iteration: 323201
loss: 1.0087987184524536,grad_norm: 0.8961743315392979, iteration: 323202
loss: 1.0158950090408325,grad_norm: 0.77367511581435, iteration: 323203
loss: 1.0188733339309692,grad_norm: 0.6668263750577522, iteration: 323204
loss: 0.9931057691574097,grad_norm: 0.7690048794002551, iteration: 323205
loss: 0.9739534854888916,grad_norm: 0.9999992336402602, iteration: 323206
loss: 1.0190467834472656,grad_norm: 0.8853016048776465, iteration: 323207
loss: 0.9625847339630127,grad_norm: 0.7541429891473601, iteration: 323208
loss: 1.020538091659546,grad_norm: 0.8145810288059526, iteration: 323209
loss: 1.0484758615493774,grad_norm: 0.914812760426529, iteration: 323210
loss: 0.978736162185669,grad_norm: 0.7708388438356226, iteration: 323211
loss: 1.0098731517791748,grad_norm: 0.8110771651114485, iteration: 323212
loss: 0.982001781463623,grad_norm: 0.7863823229155825, iteration: 323213
loss: 1.0492675304412842,grad_norm: 0.8629570382620146, iteration: 323214
loss: 0.9825732707977295,grad_norm: 0.9058361433480754, iteration: 323215
loss: 0.9720578789710999,grad_norm: 0.8969698299072856, iteration: 323216
loss: 1.0186764001846313,grad_norm: 0.9012425860577966, iteration: 323217
loss: 1.0242260694503784,grad_norm: 0.8316407708380916, iteration: 323218
loss: 0.9623923301696777,grad_norm: 0.9207067613464868, iteration: 323219
loss: 1.0037920475006104,grad_norm: 0.9294485408469593, iteration: 323220
loss: 1.1544620990753174,grad_norm: 0.9999998692016254, iteration: 323221
loss: 0.9945672750473022,grad_norm: 0.7682981903148248, iteration: 323222
loss: 1.0060869455337524,grad_norm: 0.8078614495645274, iteration: 323223
loss: 1.0272243022918701,grad_norm: 0.7761452072040946, iteration: 323224
loss: 0.9646998643875122,grad_norm: 0.7697058700982474, iteration: 323225
loss: 0.9820799231529236,grad_norm: 0.9999995045822735, iteration: 323226
loss: 0.982140839099884,grad_norm: 0.999999406749644, iteration: 323227
loss: 0.9924324154853821,grad_norm: 0.6523063034425064, iteration: 323228
loss: 0.9846802353858948,grad_norm: 0.6862684926769297, iteration: 323229
loss: 1.003006100654602,grad_norm: 0.939505689064414, iteration: 323230
loss: 1.01056706905365,grad_norm: 0.7892074764528682, iteration: 323231
loss: 0.9907544851303101,grad_norm: 0.8249752553734048, iteration: 323232
loss: 0.9884853959083557,grad_norm: 0.874024467716579, iteration: 323233
loss: 1.0291264057159424,grad_norm: 0.9999991427693554, iteration: 323234
loss: 0.987375795841217,grad_norm: 0.9999992929181855, iteration: 323235
loss: 0.9969264268875122,grad_norm: 0.7855435725520191, iteration: 323236
loss: 1.013023018836975,grad_norm: 0.8630029613560461, iteration: 323237
loss: 1.025355339050293,grad_norm: 0.9999996778539769, iteration: 323238
loss: 0.9800804257392883,grad_norm: 0.7297857553562351, iteration: 323239
loss: 1.0028051137924194,grad_norm: 0.8872574432623636, iteration: 323240
loss: 1.0191320180892944,grad_norm: 0.9225607034386736, iteration: 323241
loss: 0.9733198285102844,grad_norm: 0.8389037427839088, iteration: 323242
loss: 0.9888287782669067,grad_norm: 0.8062784238234368, iteration: 323243
loss: 0.9713364243507385,grad_norm: 0.7797995006276384, iteration: 323244
loss: 1.0424585342407227,grad_norm: 0.9316479901871799, iteration: 323245
loss: 1.004844307899475,grad_norm: 0.8619186856415888, iteration: 323246
loss: 0.9960724115371704,grad_norm: 0.8599112520755318, iteration: 323247
loss: 1.0084912776947021,grad_norm: 0.7540570305513112, iteration: 323248
loss: 0.9834740161895752,grad_norm: 0.805292632194604, iteration: 323249
loss: 1.0130020380020142,grad_norm: 0.9999991131045565, iteration: 323250
loss: 1.0609557628631592,grad_norm: 0.8851215894802007, iteration: 323251
loss: 0.9705657958984375,grad_norm: 0.9156454469584341, iteration: 323252
loss: 1.034124732017517,grad_norm: 0.7354699967642042, iteration: 323253
loss: 1.016152262687683,grad_norm: 0.9281956427316759, iteration: 323254
loss: 1.002496600151062,grad_norm: 0.7847097752530555, iteration: 323255
loss: 1.0063550472259521,grad_norm: 0.8570230745207622, iteration: 323256
loss: 0.991651177406311,grad_norm: 0.8549975888989358, iteration: 323257
loss: 1.021148443222046,grad_norm: 0.9999991753328117, iteration: 323258
loss: 0.9916573762893677,grad_norm: 0.7743900525286392, iteration: 323259
loss: 1.013352870941162,grad_norm: 0.7123908472596397, iteration: 323260
loss: 0.9813161492347717,grad_norm: 0.9618083690838592, iteration: 323261
loss: 1.0085276365280151,grad_norm: 0.9364160337411069, iteration: 323262
loss: 0.9677655100822449,grad_norm: 0.9347793834980318, iteration: 323263
loss: 1.0495582818984985,grad_norm: 0.7906535820142432, iteration: 323264
loss: 1.0176628828048706,grad_norm: 0.7679527972460002, iteration: 323265
loss: 1.0052058696746826,grad_norm: 0.8701226063109591, iteration: 323266
loss: 0.9859718680381775,grad_norm: 0.9008685319279613, iteration: 323267
loss: 1.0124539136886597,grad_norm: 0.9999995029424212, iteration: 323268
loss: 1.0762537717819214,grad_norm: 0.9999993659955472, iteration: 323269
loss: 1.0289244651794434,grad_norm: 0.6281697098849152, iteration: 323270
loss: 0.9823383092880249,grad_norm: 0.7532922088119434, iteration: 323271
loss: 0.9820253252983093,grad_norm: 0.8139585863529746, iteration: 323272
loss: 0.9851023554801941,grad_norm: 0.9176773903789452, iteration: 323273
loss: 1.0113985538482666,grad_norm: 0.7268161137799116, iteration: 323274
loss: 1.0845760107040405,grad_norm: 0.8825687649660063, iteration: 323275
loss: 0.9999604821205139,grad_norm: 0.8504700600455751, iteration: 323276
loss: 1.0161590576171875,grad_norm: 0.8046555014442048, iteration: 323277
loss: 0.9857124090194702,grad_norm: 0.7075605216051865, iteration: 323278
loss: 1.024941086769104,grad_norm: 0.8339542859614374, iteration: 323279
loss: 1.0728366374969482,grad_norm: 0.8723536135237994, iteration: 323280
loss: 1.0214803218841553,grad_norm: 0.8707561794521974, iteration: 323281
loss: 1.0050761699676514,grad_norm: 0.8639746663242078, iteration: 323282
loss: 1.0399506092071533,grad_norm: 0.9020597161283448, iteration: 323283
loss: 0.9996859431266785,grad_norm: 0.8967445177634433, iteration: 323284
loss: 0.9821581840515137,grad_norm: 0.8189855817936593, iteration: 323285
loss: 1.024969458580017,grad_norm: 0.8254776743848695, iteration: 323286
loss: 0.9912474751472473,grad_norm: 0.7861760027297816, iteration: 323287
loss: 0.9915747046470642,grad_norm: 0.7313782063181451, iteration: 323288
loss: 1.005475640296936,grad_norm: 0.7462003096402404, iteration: 323289
loss: 0.9881352782249451,grad_norm: 0.904976223824697, iteration: 323290
loss: 0.9921748638153076,grad_norm: 0.6995802643373595, iteration: 323291
loss: 1.0377845764160156,grad_norm: 0.9275590666607562, iteration: 323292
loss: 1.031241774559021,grad_norm: 0.7941200671142378, iteration: 323293
loss: 1.001882553100586,grad_norm: 0.8684821646901879, iteration: 323294
loss: 0.9976254105567932,grad_norm: 0.7446528694243941, iteration: 323295
loss: 1.0159083604812622,grad_norm: 0.9999991775559848, iteration: 323296
loss: 0.9864723682403564,grad_norm: 0.9197087303310718, iteration: 323297
loss: 0.9622799754142761,grad_norm: 0.7997170381589582, iteration: 323298
loss: 1.0452609062194824,grad_norm: 0.8461447847838393, iteration: 323299
loss: 1.0104801654815674,grad_norm: 0.885506993538151, iteration: 323300
loss: 1.0043716430664062,grad_norm: 0.8622046667089303, iteration: 323301
loss: 0.9945955872535706,grad_norm: 0.8791596088084999, iteration: 323302
loss: 0.9809666275978088,grad_norm: 0.7912457700491682, iteration: 323303
loss: 0.9755552411079407,grad_norm: 0.8734581507354173, iteration: 323304
loss: 0.9673900604248047,grad_norm: 0.7613477197261846, iteration: 323305
loss: 0.9852395057678223,grad_norm: 0.8649097076035405, iteration: 323306
loss: 1.050734281539917,grad_norm: 0.911739937223647, iteration: 323307
loss: 0.9743722677230835,grad_norm: 0.6831129227305502, iteration: 323308
loss: 1.0100717544555664,grad_norm: 0.8904102964721474, iteration: 323309
loss: 1.0134570598602295,grad_norm: 0.8093603684413601, iteration: 323310
loss: 0.9752898812294006,grad_norm: 0.9027420073011636, iteration: 323311
loss: 0.9522132873535156,grad_norm: 0.673961135020384, iteration: 323312
loss: 0.9476861357688904,grad_norm: 0.8316841739196789, iteration: 323313
loss: 1.0080888271331787,grad_norm: 0.999999121491669, iteration: 323314
loss: 0.9675777554512024,grad_norm: 0.9999990169201829, iteration: 323315
loss: 0.9816297888755798,grad_norm: 0.8895056765119557, iteration: 323316
loss: 0.9897683262825012,grad_norm: 0.7762194730080285, iteration: 323317
loss: 1.0093896389007568,grad_norm: 0.8245678476844339, iteration: 323318
loss: 0.999809205532074,grad_norm: 0.9153832924271054, iteration: 323319
loss: 0.9989056587219238,grad_norm: 0.7971134880549942, iteration: 323320
loss: 1.031895637512207,grad_norm: 0.8218909265592658, iteration: 323321
loss: 0.9942153096199036,grad_norm: 0.8151103295538558, iteration: 323322
loss: 0.9884276390075684,grad_norm: 0.7153581999311704, iteration: 323323
loss: 0.9959063529968262,grad_norm: 0.8278268000417168, iteration: 323324
loss: 1.0650564432144165,grad_norm: 0.9488547678369257, iteration: 323325
loss: 0.9762278199195862,grad_norm: 0.8856214292727465, iteration: 323326
loss: 0.997011125087738,grad_norm: 0.8487894032171812, iteration: 323327
loss: 1.0280765295028687,grad_norm: 0.862648482610005, iteration: 323328
loss: 0.9912971258163452,grad_norm: 0.6710297677183827, iteration: 323329
loss: 0.9856904745101929,grad_norm: 0.8739134748037941, iteration: 323330
loss: 0.9819109439849854,grad_norm: 0.912325074430032, iteration: 323331
loss: 0.9828045964241028,grad_norm: 0.8595908857737673, iteration: 323332
loss: 1.0275976657867432,grad_norm: 0.8534867574342329, iteration: 323333
loss: 0.9996346235275269,grad_norm: 0.9999990025609757, iteration: 323334
loss: 1.001563310623169,grad_norm: 0.9523742108220429, iteration: 323335
loss: 0.9954990148544312,grad_norm: 0.7658993385626262, iteration: 323336
loss: 0.9634350538253784,grad_norm: 0.8741464949404395, iteration: 323337
loss: 1.0044944286346436,grad_norm: 0.8958867294667903, iteration: 323338
loss: 1.0006976127624512,grad_norm: 0.803536097442361, iteration: 323339
loss: 1.0127694606781006,grad_norm: 0.8474481464164685, iteration: 323340
loss: 0.9766316413879395,grad_norm: 0.999999082681065, iteration: 323341
loss: 1.0029139518737793,grad_norm: 0.8059271789948705, iteration: 323342
loss: 1.0246747732162476,grad_norm: 0.8947203463043103, iteration: 323343
loss: 1.005552053451538,grad_norm: 0.8588578700913134, iteration: 323344
loss: 0.9976515173912048,grad_norm: 0.8208875238399126, iteration: 323345
loss: 1.002642273902893,grad_norm: 0.769296447265233, iteration: 323346
loss: 0.9840168952941895,grad_norm: 0.9462747986796751, iteration: 323347
loss: 1.0149489641189575,grad_norm: 0.8854446471553078, iteration: 323348
loss: 1.0259114503860474,grad_norm: 0.9389674456620345, iteration: 323349
loss: 0.9800589084625244,grad_norm: 0.999999889802184, iteration: 323350
loss: 0.9919034838676453,grad_norm: 0.9506300025619534, iteration: 323351
loss: 0.9957456588745117,grad_norm: 0.820143485646358, iteration: 323352
loss: 1.003714919090271,grad_norm: 0.9353790456702338, iteration: 323353
loss: 0.9837453961372375,grad_norm: 0.7265167255582027, iteration: 323354
loss: 1.0313992500305176,grad_norm: 0.7874376735944087, iteration: 323355
loss: 0.9964313507080078,grad_norm: 0.7455657880419562, iteration: 323356
loss: 0.9813655614852905,grad_norm: 0.8933202695597414, iteration: 323357
loss: 0.9985642433166504,grad_norm: 0.941635230955803, iteration: 323358
loss: 0.9863762259483337,grad_norm: 0.7502399850892089, iteration: 323359
loss: 0.9889727234840393,grad_norm: 0.7408526521825456, iteration: 323360
loss: 1.0000026226043701,grad_norm: 0.8944986548935979, iteration: 323361
loss: 1.031708836555481,grad_norm: 0.8620519828855229, iteration: 323362
loss: 1.017275094985962,grad_norm: 0.7675615648215449, iteration: 323363
loss: 0.9816074967384338,grad_norm: 0.8714157240655832, iteration: 323364
loss: 1.002829909324646,grad_norm: 0.8178033064916772, iteration: 323365
loss: 1.0065919160842896,grad_norm: 0.7026968811265103, iteration: 323366
loss: 0.9770594835281372,grad_norm: 0.6876139042805987, iteration: 323367
loss: 1.0301750898361206,grad_norm: 0.8758393660293956, iteration: 323368
loss: 1.1040546894073486,grad_norm: 0.9999993358595757, iteration: 323369
loss: 1.016250491142273,grad_norm: 0.8660935022310926, iteration: 323370
loss: 1.0129314661026,grad_norm: 0.8360395201399062, iteration: 323371
loss: 0.9894086122512817,grad_norm: 0.9169219941261165, iteration: 323372
loss: 1.007232427597046,grad_norm: 0.7871732463549737, iteration: 323373
loss: 1.015581488609314,grad_norm: 0.9193027425715268, iteration: 323374
loss: 0.9792395234107971,grad_norm: 0.8132719358667895, iteration: 323375
loss: 1.0537978410720825,grad_norm: 0.9999997875608667, iteration: 323376
loss: 1.0091851949691772,grad_norm: 0.9026416808977954, iteration: 323377
loss: 1.0267870426177979,grad_norm: 0.9652070684413421, iteration: 323378
loss: 1.0348249673843384,grad_norm: 0.7811410560664284, iteration: 323379
loss: 0.9737760424613953,grad_norm: 0.7444288950237735, iteration: 323380
loss: 0.989387571811676,grad_norm: 0.9999988862547577, iteration: 323381
loss: 0.9891804456710815,grad_norm: 0.7703578131022304, iteration: 323382
loss: 1.0092711448669434,grad_norm: 0.8768915235521897, iteration: 323383
loss: 0.9771122336387634,grad_norm: 0.667737769292179, iteration: 323384
loss: 0.9702726602554321,grad_norm: 0.8959964222385005, iteration: 323385
loss: 1.0086420774459839,grad_norm: 0.7522705245045426, iteration: 323386
loss: 0.9507266879081726,grad_norm: 0.8122689364912636, iteration: 323387
loss: 1.0171846151351929,grad_norm: 0.7297460911917283, iteration: 323388
loss: 0.9526461362838745,grad_norm: 0.6817640109633634, iteration: 323389
loss: 1.0014623403549194,grad_norm: 0.959311862094049, iteration: 323390
loss: 1.0183188915252686,grad_norm: 0.7097010113559884, iteration: 323391
loss: 1.017406940460205,grad_norm: 0.7558566896648043, iteration: 323392
loss: 0.9790042638778687,grad_norm: 0.8744195565069143, iteration: 323393
loss: 1.0035746097564697,grad_norm: 0.8365601099115773, iteration: 323394
loss: 1.0085620880126953,grad_norm: 0.8715159571090877, iteration: 323395
loss: 1.0073384046554565,grad_norm: 0.9999995689688461, iteration: 323396
loss: 0.9750086069107056,grad_norm: 0.8354916169431129, iteration: 323397
loss: 1.0007774829864502,grad_norm: 0.7025068597636274, iteration: 323398
loss: 1.0130081176757812,grad_norm: 0.8470089510966068, iteration: 323399
loss: 1.0268651247024536,grad_norm: 0.8521814340206713, iteration: 323400
loss: 0.9789636731147766,grad_norm: 0.8731703126592909, iteration: 323401
loss: 1.0060780048370361,grad_norm: 0.977687528682915, iteration: 323402
loss: 0.9959273338317871,grad_norm: 0.8200446795350033, iteration: 323403
loss: 1.0033786296844482,grad_norm: 0.9545031633477238, iteration: 323404
loss: 0.9827561974525452,grad_norm: 0.9090400454671571, iteration: 323405
loss: 1.002596378326416,grad_norm: 0.9349025912577115, iteration: 323406
loss: 1.0020637512207031,grad_norm: 0.9999990321327842, iteration: 323407
loss: 1.0384327173233032,grad_norm: 0.9999995763165426, iteration: 323408
loss: 1.0117127895355225,grad_norm: 0.9999998971992541, iteration: 323409
loss: 1.0085489749908447,grad_norm: 0.8223453076738607, iteration: 323410
loss: 0.9674274921417236,grad_norm: 0.9165213699900225, iteration: 323411
loss: 0.9946281313896179,grad_norm: 0.7522537047814039, iteration: 323412
loss: 0.9775230884552002,grad_norm: 0.7812683659279892, iteration: 323413
loss: 0.9712516665458679,grad_norm: 0.702827377669194, iteration: 323414
loss: 0.9888587594032288,grad_norm: 0.9999991046592684, iteration: 323415
loss: 1.00204336643219,grad_norm: 0.8040675028533639, iteration: 323416
loss: 0.9659789800643921,grad_norm: 0.9847833692436163, iteration: 323417
loss: 1.0071384906768799,grad_norm: 0.9075065362649585, iteration: 323418
loss: 1.0170637369155884,grad_norm: 0.9350316688573291, iteration: 323419
loss: 0.9855825304985046,grad_norm: 0.8061020010790328, iteration: 323420
loss: 1.008010983467102,grad_norm: 0.9117782479057381, iteration: 323421
loss: 1.0176433324813843,grad_norm: 0.832838822982382, iteration: 323422
loss: 0.9715695381164551,grad_norm: 0.7595703101231414, iteration: 323423
loss: 1.0089188814163208,grad_norm: 0.9999990255341994, iteration: 323424
loss: 1.0000309944152832,grad_norm: 0.8133309038689371, iteration: 323425
loss: 0.9400673508644104,grad_norm: 0.9720515645933924, iteration: 323426
loss: 1.0215057134628296,grad_norm: 0.877502502976442, iteration: 323427
loss: 0.9767612814903259,grad_norm: 0.697151429582664, iteration: 323428
loss: 0.9792817234992981,grad_norm: 0.8282801371223601, iteration: 323429
loss: 0.9965381622314453,grad_norm: 0.9103558566155592, iteration: 323430
loss: 1.045690894126892,grad_norm: 0.999999122294891, iteration: 323431
loss: 1.0156787633895874,grad_norm: 0.7974781526632961, iteration: 323432
loss: 1.012236475944519,grad_norm: 0.8943574843308731, iteration: 323433
loss: 0.9718402028083801,grad_norm: 0.8531768509327425, iteration: 323434
loss: 1.0002199411392212,grad_norm: 0.7733228428479832, iteration: 323435
loss: 0.9898027777671814,grad_norm: 0.8463734152084795, iteration: 323436
loss: 0.990684986114502,grad_norm: 0.9999991230097403, iteration: 323437
loss: 0.9456468820571899,grad_norm: 0.9303739642048158, iteration: 323438
loss: 0.9647350907325745,grad_norm: 0.9960398225638769, iteration: 323439
loss: 0.9871358275413513,grad_norm: 0.755588304121902, iteration: 323440
loss: 1.0066606998443604,grad_norm: 0.7904069160272603, iteration: 323441
loss: 1.005507469177246,grad_norm: 0.8690723899018393, iteration: 323442
loss: 0.9814764857292175,grad_norm: 0.999999130233179, iteration: 323443
loss: 1.0191570520401,grad_norm: 0.8885273671749964, iteration: 323444
loss: 1.0247645378112793,grad_norm: 0.8634721211546025, iteration: 323445
loss: 1.0194742679595947,grad_norm: 0.9999990429283111, iteration: 323446
loss: 1.0089962482452393,grad_norm: 0.9525974594554482, iteration: 323447
loss: 1.0105390548706055,grad_norm: 0.7591705430055166, iteration: 323448
loss: 1.0128904581069946,grad_norm: 0.9999993421815969, iteration: 323449
loss: 1.0080628395080566,grad_norm: 0.9999989808274018, iteration: 323450
loss: 0.9877542853355408,grad_norm: 0.8932736539552918, iteration: 323451
loss: 1.0278948545455933,grad_norm: 0.9339173841887249, iteration: 323452
loss: 0.9837549328804016,grad_norm: 0.743733271270253, iteration: 323453
loss: 1.052351951599121,grad_norm: 0.8286625033896028, iteration: 323454
loss: 0.9834425449371338,grad_norm: 0.999999892269288, iteration: 323455
loss: 1.0173280239105225,grad_norm: 0.8472355314043586, iteration: 323456
loss: 0.9522029161453247,grad_norm: 0.9308797117527071, iteration: 323457
loss: 0.9929680228233337,grad_norm: 0.6859374471559034, iteration: 323458
loss: 1.0155701637268066,grad_norm: 0.9999990318197957, iteration: 323459
loss: 1.0263233184814453,grad_norm: 0.7334847542305734, iteration: 323460
loss: 1.0018290281295776,grad_norm: 0.9999990470117205, iteration: 323461
loss: 1.0087954998016357,grad_norm: 0.9999993898567258, iteration: 323462
loss: 1.0218346118927002,grad_norm: 0.7203143725872491, iteration: 323463
loss: 1.0102500915527344,grad_norm: 0.8722435244597776, iteration: 323464
loss: 1.050599455833435,grad_norm: 0.9313744582535883, iteration: 323465
loss: 1.031450867652893,grad_norm: 0.8973537471576543, iteration: 323466
loss: 0.9872955083847046,grad_norm: 0.9999993151910344, iteration: 323467
loss: 1.0054662227630615,grad_norm: 0.7964570697812239, iteration: 323468
loss: 0.9674104452133179,grad_norm: 0.8442820092164557, iteration: 323469
loss: 0.968804121017456,grad_norm: 0.9637762083572085, iteration: 323470
loss: 1.0080463886260986,grad_norm: 0.812048493291484, iteration: 323471
loss: 0.9680078625679016,grad_norm: 0.7741801708563902, iteration: 323472
loss: 1.145627737045288,grad_norm: 0.9999996445616645, iteration: 323473
loss: 0.9768242835998535,grad_norm: 0.8141105226279993, iteration: 323474
loss: 1.0090481042861938,grad_norm: 0.8441481697969498, iteration: 323475
loss: 0.9952583909034729,grad_norm: 0.8328716992600229, iteration: 323476
loss: 0.9963262677192688,grad_norm: 0.8304013280633333, iteration: 323477
loss: 1.050215482711792,grad_norm: 0.758849039545946, iteration: 323478
loss: 1.0145450830459595,grad_norm: 0.8923621132074415, iteration: 323479
loss: 1.0065897703170776,grad_norm: 0.7491975548283415, iteration: 323480
loss: 1.0318212509155273,grad_norm: 0.8906731243425954, iteration: 323481
loss: 1.0463371276855469,grad_norm: 0.8093918489902439, iteration: 323482
loss: 0.9878201484680176,grad_norm: 0.8148388171025903, iteration: 323483
loss: 0.9949889183044434,grad_norm: 0.9156960864200627, iteration: 323484
loss: 0.9685185551643372,grad_norm: 0.8523064055793352, iteration: 323485
loss: 1.0237653255462646,grad_norm: 0.8686744794504285, iteration: 323486
loss: 0.9895749092102051,grad_norm: 0.7956850899428268, iteration: 323487
loss: 0.986844003200531,grad_norm: 0.710576856465696, iteration: 323488
loss: 0.994365930557251,grad_norm: 0.9605505141735801, iteration: 323489
loss: 0.9968721270561218,grad_norm: 0.7038853327244047, iteration: 323490
loss: 1.1052732467651367,grad_norm: 0.9999997493213794, iteration: 323491
loss: 0.9872709512710571,grad_norm: 0.7494655429877499, iteration: 323492
loss: 1.048525094985962,grad_norm: 0.7744657869021471, iteration: 323493
loss: 1.0010881423950195,grad_norm: 0.8714370042989515, iteration: 323494
loss: 1.0044047832489014,grad_norm: 0.7376015918768354, iteration: 323495
loss: 1.0099846124649048,grad_norm: 0.8270038308696496, iteration: 323496
loss: 1.0082311630249023,grad_norm: 0.8848934714742093, iteration: 323497
loss: 1.0150760412216187,grad_norm: 0.999999486496668, iteration: 323498
loss: 1.0099226236343384,grad_norm: 0.7406449678422153, iteration: 323499
loss: 0.9592782855033875,grad_norm: 0.8396790938275412, iteration: 323500
loss: 1.014341950416565,grad_norm: 0.8022029614479277, iteration: 323501
loss: 1.046911358833313,grad_norm: 0.9171351578138996, iteration: 323502
loss: 0.9870927333831787,grad_norm: 0.8405700110289354, iteration: 323503
loss: 0.9970142841339111,grad_norm: 0.7016723071718429, iteration: 323504
loss: 1.028361201286316,grad_norm: 0.7997674837026588, iteration: 323505
loss: 1.0160082578659058,grad_norm: 0.768566720373182, iteration: 323506
loss: 1.0594178438186646,grad_norm: 0.9590483766731973, iteration: 323507
loss: 1.0160703659057617,grad_norm: 0.7695617240243375, iteration: 323508
loss: 0.9809533357620239,grad_norm: 0.9067375963507821, iteration: 323509
loss: 0.9982367753982544,grad_norm: 0.8287295833603637, iteration: 323510
loss: 0.9582099914550781,grad_norm: 0.8438373469548726, iteration: 323511
loss: 1.0274345874786377,grad_norm: 0.8111782363035593, iteration: 323512
loss: 0.9914241433143616,grad_norm: 0.7383179497021182, iteration: 323513
loss: 0.9733268022537231,grad_norm: 0.7809141603799933, iteration: 323514
loss: 0.9993084073066711,grad_norm: 0.8358174187019058, iteration: 323515
loss: 1.005749225616455,grad_norm: 0.8273816672607016, iteration: 323516
loss: 0.9894102215766907,grad_norm: 0.9999989567830075, iteration: 323517
loss: 1.0307527780532837,grad_norm: 0.9240482743741099, iteration: 323518
loss: 1.0192978382110596,grad_norm: 0.7511842832610865, iteration: 323519
loss: 1.0213172435760498,grad_norm: 0.8738995389490855, iteration: 323520
loss: 1.0060542821884155,grad_norm: 0.7694533870144885, iteration: 323521
loss: 1.0066274404525757,grad_norm: 0.8520756943363974, iteration: 323522
loss: 1.004745602607727,grad_norm: 0.8224871357231254, iteration: 323523
loss: 0.9816312193870544,grad_norm: 0.8420669171366715, iteration: 323524
loss: 1.0126804113388062,grad_norm: 0.7198713462444146, iteration: 323525
loss: 0.9757671356201172,grad_norm: 0.9298347047006326, iteration: 323526
loss: 0.9815836548805237,grad_norm: 0.8508491986545152, iteration: 323527
loss: 1.0091544389724731,grad_norm: 0.8159731285823413, iteration: 323528
loss: 1.0016125440597534,grad_norm: 0.858756596030647, iteration: 323529
loss: 0.9853630661964417,grad_norm: 0.8255284216783907, iteration: 323530
loss: 0.9838330149650574,grad_norm: 0.8878814049644536, iteration: 323531
loss: 1.0087300539016724,grad_norm: 0.7594645782086157, iteration: 323532
loss: 0.9958989024162292,grad_norm: 0.8255472682093064, iteration: 323533
loss: 1.015840768814087,grad_norm: 0.8097300512242163, iteration: 323534
loss: 0.9724544882774353,grad_norm: 0.8793283174633096, iteration: 323535
loss: 1.019796371459961,grad_norm: 0.8111613590050284, iteration: 323536
loss: 1.0123323202133179,grad_norm: 0.7936944013450921, iteration: 323537
loss: 0.9535991549491882,grad_norm: 0.7884941135479405, iteration: 323538
loss: 1.0062135457992554,grad_norm: 0.8948945300134564, iteration: 323539
loss: 0.9644068479537964,grad_norm: 0.8363953047648894, iteration: 323540
loss: 0.963517963886261,grad_norm: 0.826485527362557, iteration: 323541
loss: 0.9924241900444031,grad_norm: 0.8707875323367834, iteration: 323542
loss: 1.0198237895965576,grad_norm: 0.8919191961008102, iteration: 323543
loss: 0.9652087092399597,grad_norm: 0.7333700074355212, iteration: 323544
loss: 0.9937132000923157,grad_norm: 0.8821513729881911, iteration: 323545
loss: 1.0195434093475342,grad_norm: 0.8688369184576382, iteration: 323546
loss: 1.0079100131988525,grad_norm: 0.6754016825513764, iteration: 323547
loss: 1.003000259399414,grad_norm: 0.8731100255680928, iteration: 323548
loss: 0.9788512587547302,grad_norm: 0.8196364688614579, iteration: 323549
loss: 0.9735350012779236,grad_norm: 0.994477590106299, iteration: 323550
loss: 1.0432376861572266,grad_norm: 0.9599109065642347, iteration: 323551
loss: 0.999258279800415,grad_norm: 0.8084369787699981, iteration: 323552
loss: 1.0134589672088623,grad_norm: 0.9999991014117916, iteration: 323553
loss: 0.9969065189361572,grad_norm: 0.768519451777082, iteration: 323554
loss: 0.9882417917251587,grad_norm: 0.9999996075001382, iteration: 323555
loss: 0.9763805866241455,grad_norm: 0.8449478321413096, iteration: 323556
loss: 0.9756075739860535,grad_norm: 0.7566617721288583, iteration: 323557
loss: 1.0097150802612305,grad_norm: 0.9105886026000145, iteration: 323558
loss: 0.9751731753349304,grad_norm: 0.8654956026172019, iteration: 323559
loss: 1.0001804828643799,grad_norm: 0.8553895593638455, iteration: 323560
loss: 1.017655611038208,grad_norm: 0.8978346744146533, iteration: 323561
loss: 0.9976844787597656,grad_norm: 0.8635432534178065, iteration: 323562
loss: 0.9978058338165283,grad_norm: 0.80266920658569, iteration: 323563
loss: 1.0019105672836304,grad_norm: 0.8872472781027837, iteration: 323564
loss: 1.0101577043533325,grad_norm: 0.8214479832843127, iteration: 323565
loss: 0.9632920622825623,grad_norm: 0.9355463625055698, iteration: 323566
loss: 1.104071855545044,grad_norm: 0.9999993164744688, iteration: 323567
loss: 1.0135594606399536,grad_norm: 0.9644154029581495, iteration: 323568
loss: 0.9803643822669983,grad_norm: 0.7645303304831608, iteration: 323569
loss: 0.9948149919509888,grad_norm: 0.8408558099110881, iteration: 323570
loss: 0.9876619577407837,grad_norm: 0.6844431577793764, iteration: 323571
loss: 1.0404798984527588,grad_norm: 0.9999991302606063, iteration: 323572
loss: 1.0342609882354736,grad_norm: 0.9999996958606481, iteration: 323573
loss: 1.0067551136016846,grad_norm: 0.7537365494958772, iteration: 323574
loss: 1.035288691520691,grad_norm: 0.9360860583049284, iteration: 323575
loss: 0.9699389338493347,grad_norm: 0.8362177255905979, iteration: 323576
loss: 0.9544821381568909,grad_norm: 0.8177603145057446, iteration: 323577
loss: 0.9697688221931458,grad_norm: 0.829229102700507, iteration: 323578
loss: 1.0130581855773926,grad_norm: 0.8379314350623117, iteration: 323579
loss: 1.0011943578720093,grad_norm: 0.9006938294080362, iteration: 323580
loss: 0.9918453693389893,grad_norm: 0.7329521962502282, iteration: 323581
loss: 1.0219347476959229,grad_norm: 0.6479502556037058, iteration: 323582
loss: 0.9736461639404297,grad_norm: 0.85197401264178, iteration: 323583
loss: 1.0350786447525024,grad_norm: 0.7332110042119838, iteration: 323584
loss: 1.0199148654937744,grad_norm: 0.9999992159774963, iteration: 323585
loss: 0.987592339515686,grad_norm: 0.8471335984687172, iteration: 323586
loss: 0.9924049973487854,grad_norm: 0.9560925134194107, iteration: 323587
loss: 0.9862856268882751,grad_norm: 0.9982836819556474, iteration: 323588
loss: 0.9921920895576477,grad_norm: 0.7342625890007756, iteration: 323589
loss: 1.0013306140899658,grad_norm: 0.8318488019403566, iteration: 323590
loss: 0.9832984209060669,grad_norm: 0.8059697543375152, iteration: 323591
loss: 1.0044052600860596,grad_norm: 0.8194210776718791, iteration: 323592
loss: 1.0284171104431152,grad_norm: 0.8858139523030364, iteration: 323593
loss: 1.0050458908081055,grad_norm: 0.8129942064320569, iteration: 323594
loss: 1.0059622526168823,grad_norm: 0.7706937561681292, iteration: 323595
loss: 1.028701663017273,grad_norm: 0.8827406121961613, iteration: 323596
loss: 0.9545057415962219,grad_norm: 0.8446586695182001, iteration: 323597
loss: 1.0421169996261597,grad_norm: 0.9205626842216581, iteration: 323598
loss: 1.0124590396881104,grad_norm: 0.8908975274424223, iteration: 323599
loss: 1.051640272140503,grad_norm: 0.9999992605315308, iteration: 323600
loss: 0.9908732771873474,grad_norm: 0.9999989784938084, iteration: 323601
loss: 1.0026909112930298,grad_norm: 0.841603992081039, iteration: 323602
loss: 1.0155136585235596,grad_norm: 0.6763240301753999, iteration: 323603
loss: 1.000101089477539,grad_norm: 0.9606586093195613, iteration: 323604
loss: 1.015738844871521,grad_norm: 0.8050380684509093, iteration: 323605
loss: 0.9858294725418091,grad_norm: 0.999999089971251, iteration: 323606
loss: 1.012630581855774,grad_norm: 0.8677712415418474, iteration: 323607
loss: 0.9616712331771851,grad_norm: 0.8784570731223535, iteration: 323608
loss: 1.0011086463928223,grad_norm: 0.6898726042852429, iteration: 323609
loss: 1.0384869575500488,grad_norm: 0.7941999966093146, iteration: 323610
loss: 1.003650188446045,grad_norm: 0.9589027753194341, iteration: 323611
loss: 0.9854389429092407,grad_norm: 0.9341390855815896, iteration: 323612
loss: 0.9543977379798889,grad_norm: 0.8782822438946823, iteration: 323613
loss: 1.0126489400863647,grad_norm: 0.8720570336184388, iteration: 323614
loss: 1.004494547843933,grad_norm: 0.8978257277740407, iteration: 323615
loss: 1.0086941719055176,grad_norm: 0.8630049375083636, iteration: 323616
loss: 1.0064750909805298,grad_norm: 0.8106968869621108, iteration: 323617
loss: 0.963530957698822,grad_norm: 0.8248291230341595, iteration: 323618
loss: 0.986004114151001,grad_norm: 0.7988086534528965, iteration: 323619
loss: 0.9973137974739075,grad_norm: 0.9867341452893874, iteration: 323620
loss: 0.9738708734512329,grad_norm: 0.9999991591426628, iteration: 323621
loss: 0.983089029788971,grad_norm: 0.8432330002082874, iteration: 323622
loss: 1.0048513412475586,grad_norm: 0.8735004204280301, iteration: 323623
loss: 0.9942448735237122,grad_norm: 0.75879368420882, iteration: 323624
loss: 1.0655033588409424,grad_norm: 0.999999001610372, iteration: 323625
loss: 0.9835793972015381,grad_norm: 0.9264977267656171, iteration: 323626
loss: 1.0191621780395508,grad_norm: 0.7937587524522955, iteration: 323627
loss: 0.9900434613227844,grad_norm: 0.9169604397063243, iteration: 323628
loss: 1.0530047416687012,grad_norm: 0.7203843439262562, iteration: 323629
loss: 0.9861050844192505,grad_norm: 0.8804652875708607, iteration: 323630
loss: 1.0086236000061035,grad_norm: 0.9066209676431524, iteration: 323631
loss: 1.0070406198501587,grad_norm: 0.7423072028613813, iteration: 323632
loss: 1.0057029724121094,grad_norm: 0.9999992278713865, iteration: 323633
loss: 0.9784370064735413,grad_norm: 0.8835945140100088, iteration: 323634
loss: 0.9894756078720093,grad_norm: 0.7426668571287218, iteration: 323635
loss: 1.0200302600860596,grad_norm: 0.7498352152383057, iteration: 323636
loss: 0.9874237775802612,grad_norm: 0.9524324587687995, iteration: 323637
loss: 0.9888451099395752,grad_norm: 0.780343616879748, iteration: 323638
loss: 0.9900205731391907,grad_norm: 0.7776350264066283, iteration: 323639
loss: 1.0257532596588135,grad_norm: 0.8221654494926731, iteration: 323640
loss: 1.0232027769088745,grad_norm: 0.9282114057174969, iteration: 323641
loss: 1.052997350692749,grad_norm: 0.9999992420026578, iteration: 323642
loss: 0.980279803276062,grad_norm: 0.8450185790712073, iteration: 323643
loss: 1.0141721963882446,grad_norm: 0.9999994449164619, iteration: 323644
loss: 0.9688814282417297,grad_norm: 0.9021286998912883, iteration: 323645
loss: 0.9608017206192017,grad_norm: 0.8089463310445227, iteration: 323646
loss: 0.9469519257545471,grad_norm: 0.8035076264767882, iteration: 323647
loss: 1.006323218345642,grad_norm: 0.7915558890970115, iteration: 323648
loss: 0.9976188540458679,grad_norm: 0.7866966429560895, iteration: 323649
loss: 0.9970536828041077,grad_norm: 1.000000013437587, iteration: 323650
loss: 0.981641411781311,grad_norm: 0.796048478008652, iteration: 323651
loss: 1.028412103652954,grad_norm: 0.8849211083972799, iteration: 323652
loss: 1.0145269632339478,grad_norm: 0.6678200209248096, iteration: 323653
loss: 0.9696906805038452,grad_norm: 0.7741112007569684, iteration: 323654
loss: 1.0040735006332397,grad_norm: 0.8492356965813043, iteration: 323655
loss: 1.001430869102478,grad_norm: 0.8880575112333206, iteration: 323656
loss: 1.0159502029418945,grad_norm: 0.8696389069622684, iteration: 323657
loss: 1.032858967781067,grad_norm: 0.9999996604408632, iteration: 323658
loss: 1.0236090421676636,grad_norm: 0.853823943634361, iteration: 323659
loss: 1.001344084739685,grad_norm: 0.8476702226227373, iteration: 323660
loss: 0.9737709760665894,grad_norm: 0.9227274496642099, iteration: 323661
loss: 1.0120654106140137,grad_norm: 0.9840471281817101, iteration: 323662
loss: 1.0242722034454346,grad_norm: 0.9067032852059693, iteration: 323663
loss: 1.0118135213851929,grad_norm: 0.8581099712695188, iteration: 323664
loss: 0.9950340986251831,grad_norm: 0.8152003852226095, iteration: 323665
loss: 1.0112487077713013,grad_norm: 0.827708564804579, iteration: 323666
loss: 0.9747623801231384,grad_norm: 0.9444713496574966, iteration: 323667
loss: 0.9570373296737671,grad_norm: 0.8397349920923054, iteration: 323668
loss: 0.9870979189872742,grad_norm: 0.8321262735013614, iteration: 323669
loss: 1.0192393064498901,grad_norm: 0.7323191110462972, iteration: 323670
loss: 0.9949417114257812,grad_norm: 0.9168679632869116, iteration: 323671
loss: 0.9938825368881226,grad_norm: 0.8267881654117077, iteration: 323672
loss: 1.040982961654663,grad_norm: 0.9448857480005666, iteration: 323673
loss: 1.0097062587738037,grad_norm: 0.9999989386475381, iteration: 323674
loss: 1.0275185108184814,grad_norm: 0.8851186783282715, iteration: 323675
loss: 0.9957855939865112,grad_norm: 0.9999991225702605, iteration: 323676
loss: 1.0000752210617065,grad_norm: 0.9531008881319601, iteration: 323677
loss: 1.020864486694336,grad_norm: 0.8067456154157067, iteration: 323678
loss: 0.9924924969673157,grad_norm: 0.7735597923489396, iteration: 323679
loss: 1.0061297416687012,grad_norm: 0.787433331413409, iteration: 323680
loss: 1.006014108657837,grad_norm: 0.7410817091034795, iteration: 323681
loss: 1.012387990951538,grad_norm: 0.7445516846839494, iteration: 323682
loss: 1.0075353384017944,grad_norm: 0.9133180584819041, iteration: 323683
loss: 0.9911803603172302,grad_norm: 0.8862627188207269, iteration: 323684
loss: 0.9846792817115784,grad_norm: 0.9999992278291975, iteration: 323685
loss: 1.0157378911972046,grad_norm: 0.8334789209401164, iteration: 323686
loss: 1.0079056024551392,grad_norm: 0.8821341906410494, iteration: 323687
loss: 1.019846796989441,grad_norm: 0.8622185568602496, iteration: 323688
loss: 0.9960419535636902,grad_norm: 0.7976019074004694, iteration: 323689
loss: 1.0038917064666748,grad_norm: 0.9094094944952955, iteration: 323690
loss: 1.0289618968963623,grad_norm: 0.8306325400066564, iteration: 323691
loss: 0.9913130402565002,grad_norm: 0.9587554324509909, iteration: 323692
loss: 1.0101814270019531,grad_norm: 0.8546059500111438, iteration: 323693
loss: 1.003853678703308,grad_norm: 0.8061702355344319, iteration: 323694
loss: 1.0260406732559204,grad_norm: 0.888529499053855, iteration: 323695
loss: 0.9925097227096558,grad_norm: 0.842169073168905, iteration: 323696
loss: 0.9606344699859619,grad_norm: 0.8948173681626134, iteration: 323697
loss: 1.0002061128616333,grad_norm: 0.821423212568346, iteration: 323698
loss: 0.9830681681632996,grad_norm: 0.8763570531015372, iteration: 323699
loss: 1.0037643909454346,grad_norm: 0.7946829054375953, iteration: 323700
loss: 1.0119932889938354,grad_norm: 0.822816047290767, iteration: 323701
loss: 0.9950971007347107,grad_norm: 0.8304881025650812, iteration: 323702
loss: 1.0033138990402222,grad_norm: 0.8056491225751358, iteration: 323703
loss: 0.9791311621665955,grad_norm: 0.999999262990774, iteration: 323704
loss: 1.0074745416641235,grad_norm: 0.7028404201063319, iteration: 323705
loss: 1.0301350355148315,grad_norm: 0.7711168497764157, iteration: 323706
loss: 1.0047158002853394,grad_norm: 0.8379334728340104, iteration: 323707
loss: 1.0335091352462769,grad_norm: 0.8935133794557155, iteration: 323708
loss: 1.058014154434204,grad_norm: 0.8809112201295618, iteration: 323709
loss: 0.9702249765396118,grad_norm: 0.8559455860119711, iteration: 323710
loss: 1.0204511880874634,grad_norm: 0.8050908159618693, iteration: 323711
loss: 0.9890655875205994,grad_norm: 0.888915532936731, iteration: 323712
loss: 0.9929572939872742,grad_norm: 0.7735793195891537, iteration: 323713
loss: 0.9796446561813354,grad_norm: 0.80015149591404, iteration: 323714
loss: 0.9838508367538452,grad_norm: 0.8563827957525812, iteration: 323715
loss: 1.0095839500427246,grad_norm: 0.8756784236144332, iteration: 323716
loss: 1.0000576972961426,grad_norm: 0.744211869865305, iteration: 323717
loss: 1.0150127410888672,grad_norm: 0.9999990590733316, iteration: 323718
loss: 1.022535800933838,grad_norm: 0.857535526016233, iteration: 323719
loss: 0.9992221593856812,grad_norm: 0.9325837002360998, iteration: 323720
loss: 0.9992823600769043,grad_norm: 0.9999996283439484, iteration: 323721
loss: 0.9861392378807068,grad_norm: 0.908753464351446, iteration: 323722
loss: 1.0003834962844849,grad_norm: 0.7718153838221172, iteration: 323723
loss: 1.0342568159103394,grad_norm: 0.7702530134749599, iteration: 323724
loss: 0.9637428522109985,grad_norm: 0.7936577208269241, iteration: 323725
loss: 0.9776514768600464,grad_norm: 0.9835883176229523, iteration: 323726
loss: 1.001386284828186,grad_norm: 0.9544648313181199, iteration: 323727
loss: 0.9569070339202881,grad_norm: 0.6855795502893336, iteration: 323728
loss: 1.027249813079834,grad_norm: 0.9066004115185089, iteration: 323729
loss: 1.0207695960998535,grad_norm: 0.8842413885305469, iteration: 323730
loss: 1.0250872373580933,grad_norm: 0.9200690769484476, iteration: 323731
loss: 1.0656156539916992,grad_norm: 0.8218357592497719, iteration: 323732
loss: 0.9788758754730225,grad_norm: 0.7750320153638772, iteration: 323733
loss: 1.0066982507705688,grad_norm: 0.8749265437282719, iteration: 323734
loss: 1.0107477903366089,grad_norm: 0.8669048138558095, iteration: 323735
loss: 0.9956101775169373,grad_norm: 0.7343352407122854, iteration: 323736
loss: 1.0082533359527588,grad_norm: 0.6967993229472884, iteration: 323737
loss: 0.9773393869400024,grad_norm: 0.7916087044517327, iteration: 323738
loss: 0.9859369397163391,grad_norm: 0.740578908296802, iteration: 323739
loss: 1.0301746129989624,grad_norm: 0.888489134078955, iteration: 323740
loss: 1.039331078529358,grad_norm: 0.9999995696407992, iteration: 323741
loss: 1.0090030431747437,grad_norm: 0.7809145989928478, iteration: 323742
loss: 0.9946849942207336,grad_norm: 0.7046627755032749, iteration: 323743
loss: 1.0084072351455688,grad_norm: 0.9778727993308837, iteration: 323744
loss: 0.9731106758117676,grad_norm: 0.8230945270358162, iteration: 323745
loss: 0.9458367824554443,grad_norm: 0.92189713744485, iteration: 323746
loss: 0.9750971794128418,grad_norm: 0.8306222218424942, iteration: 323747
loss: 1.0050374269485474,grad_norm: 0.8177340692610997, iteration: 323748
loss: 0.9617467522621155,grad_norm: 0.9925976216774323, iteration: 323749
loss: 1.0112708806991577,grad_norm: 0.9240676957567749, iteration: 323750
loss: 1.032873272895813,grad_norm: 0.9999991191450862, iteration: 323751
loss: 0.9954115152359009,grad_norm: 0.8075649242265857, iteration: 323752
loss: 1.0214613676071167,grad_norm: 0.8504447481974718, iteration: 323753
loss: 1.040436863899231,grad_norm: 0.7381434503696996, iteration: 323754
loss: 1.0154104232788086,grad_norm: 0.776767374183079, iteration: 323755
loss: 0.9982702136039734,grad_norm: 0.999999469389673, iteration: 323756
loss: 1.0070523023605347,grad_norm: 0.9227166666698087, iteration: 323757
loss: 0.992922842502594,grad_norm: 0.8310433962657878, iteration: 323758
loss: 1.0297410488128662,grad_norm: 0.9504095520987048, iteration: 323759
loss: 0.9834383726119995,grad_norm: 0.8084030736769547, iteration: 323760
loss: 0.9854282140731812,grad_norm: 0.9999990915968865, iteration: 323761
loss: 1.0022469758987427,grad_norm: 0.7395594029983846, iteration: 323762
loss: 0.990070104598999,grad_norm: 0.8466631397077264, iteration: 323763
loss: 0.9975005984306335,grad_norm: 0.7657201314179957, iteration: 323764
loss: 0.9864637851715088,grad_norm: 0.8363485983489725, iteration: 323765
loss: 1.0090723037719727,grad_norm: 0.9722915556485094, iteration: 323766
loss: 0.9978505373001099,grad_norm: 0.9548270154125076, iteration: 323767
loss: 0.9761328101158142,grad_norm: 0.845558253432044, iteration: 323768
loss: 0.9674754738807678,grad_norm: 0.9328127732278504, iteration: 323769
loss: 1.0494025945663452,grad_norm: 0.8904636714998067, iteration: 323770
loss: 1.0216611623764038,grad_norm: 0.9361889205169006, iteration: 323771
loss: 1.0278325080871582,grad_norm: 0.7455782820956761, iteration: 323772
loss: 1.0176630020141602,grad_norm: 0.7159930013739016, iteration: 323773
loss: 1.0100927352905273,grad_norm: 0.9040287606360902, iteration: 323774
loss: 1.0044586658477783,grad_norm: 0.7160417891788319, iteration: 323775
loss: 1.0723696947097778,grad_norm: 0.7482273730716384, iteration: 323776
loss: 1.0208815336227417,grad_norm: 0.930952890414423, iteration: 323777
loss: 1.0054973363876343,grad_norm: 0.9999993402302203, iteration: 323778
loss: 1.0139392614364624,grad_norm: 0.923867447481089, iteration: 323779
loss: 1.0057446956634521,grad_norm: 0.9006324263708241, iteration: 323780
loss: 0.9880322217941284,grad_norm: 0.9999991851230536, iteration: 323781
loss: 1.003806710243225,grad_norm: 0.8172823434346849, iteration: 323782
loss: 1.0017691850662231,grad_norm: 0.8261202770772208, iteration: 323783
loss: 1.0226078033447266,grad_norm: 0.9107506984402198, iteration: 323784
loss: 1.0209500789642334,grad_norm: 0.8600565212324442, iteration: 323785
loss: 1.016108512878418,grad_norm: 0.8685877220786394, iteration: 323786
loss: 1.02255117893219,grad_norm: 0.6782993054092171, iteration: 323787
loss: 1.0035221576690674,grad_norm: 0.7394567872040689, iteration: 323788
loss: 1.0143451690673828,grad_norm: 0.9999991577152748, iteration: 323789
loss: 1.0179108381271362,grad_norm: 0.8921151969672166, iteration: 323790
loss: 1.0071043968200684,grad_norm: 0.7437470188031965, iteration: 323791
loss: 1.0047279596328735,grad_norm: 0.9496707583142691, iteration: 323792
loss: 1.0701581239700317,grad_norm: 0.9452831617417531, iteration: 323793
loss: 1.0134276151657104,grad_norm: 0.7077813568180673, iteration: 323794
loss: 1.021994948387146,grad_norm: 0.8918505510801092, iteration: 323795
loss: 1.0052576065063477,grad_norm: 0.8113231874656271, iteration: 323796
loss: 1.068906545639038,grad_norm: 0.8880639299465608, iteration: 323797
loss: 1.024968147277832,grad_norm: 0.9464889855298996, iteration: 323798
loss: 1.0409672260284424,grad_norm: 0.9999993034243345, iteration: 323799
loss: 1.0099360942840576,grad_norm: 0.738052347285856, iteration: 323800
loss: 1.0132704973220825,grad_norm: 0.6912876640374298, iteration: 323801
loss: 1.0027556419372559,grad_norm: 0.7501647567294968, iteration: 323802
loss: 0.9553731083869934,grad_norm: 0.8691382548491161, iteration: 323803
loss: 1.026496410369873,grad_norm: 0.7285828501358127, iteration: 323804
loss: 1.0091673135757446,grad_norm: 0.7832254940547103, iteration: 323805
loss: 0.9779048562049866,grad_norm: 0.8179568599513499, iteration: 323806
loss: 1.0002901554107666,grad_norm: 0.9999991486596657, iteration: 323807
loss: 0.9768939018249512,grad_norm: 0.6918264393985945, iteration: 323808
loss: 0.9566444158554077,grad_norm: 0.9999991312972875, iteration: 323809
loss: 1.0477639436721802,grad_norm: 0.9635103646367821, iteration: 323810
loss: 1.0338613986968994,grad_norm: 0.8016762697043015, iteration: 323811
loss: 0.9902138113975525,grad_norm: 0.74711252792158, iteration: 323812
loss: 0.9766889810562134,grad_norm: 0.7331092045727295, iteration: 323813
loss: 1.0223984718322754,grad_norm: 0.9685345297419675, iteration: 323814
loss: 1.0137913227081299,grad_norm: 0.9999996779591819, iteration: 323815
loss: 1.0013961791992188,grad_norm: 0.8038888989746361, iteration: 323816
loss: 1.0333019495010376,grad_norm: 0.878590636673451, iteration: 323817
loss: 0.9631286859512329,grad_norm: 0.6736395280897697, iteration: 323818
loss: 0.9751416444778442,grad_norm: 0.7247701199856713, iteration: 323819
loss: 0.996868908405304,grad_norm: 0.8527503075317551, iteration: 323820
loss: 0.9950547814369202,grad_norm: 0.9099426873537386, iteration: 323821
loss: 0.9779511094093323,grad_norm: 0.8359116380976357, iteration: 323822
loss: 0.9809783697128296,grad_norm: 0.7245308504809708, iteration: 323823
loss: 0.9723062515258789,grad_norm: 0.796639349721788, iteration: 323824
loss: 1.0000633001327515,grad_norm: 0.8268636702305403, iteration: 323825
loss: 0.9906539916992188,grad_norm: 0.9617814558025578, iteration: 323826
loss: 0.9927807450294495,grad_norm: 0.6804778276659544, iteration: 323827
loss: 0.9666208028793335,grad_norm: 0.8999629582495432, iteration: 323828
loss: 0.9750552773475647,grad_norm: 0.8064086604935039, iteration: 323829
loss: 1.0146660804748535,grad_norm: 0.8907615486576135, iteration: 323830
loss: 0.9795452952384949,grad_norm: 0.89988269087643, iteration: 323831
loss: 1.0205109119415283,grad_norm: 0.9999995888302604, iteration: 323832
loss: 0.9473577737808228,grad_norm: 0.8767995293797743, iteration: 323833
loss: 1.0124447345733643,grad_norm: 0.8843873930753997, iteration: 323834
loss: 1.0663355588912964,grad_norm: 0.9999991837514216, iteration: 323835
loss: 1.0340293645858765,grad_norm: 0.8546870185327721, iteration: 323836
loss: 1.002082347869873,grad_norm: 0.7461539478095, iteration: 323837
loss: 1.028696060180664,grad_norm: 0.9016229823430407, iteration: 323838
loss: 0.9796695113182068,grad_norm: 0.6638408952295144, iteration: 323839
loss: 1.001909852027893,grad_norm: 0.8013295828930462, iteration: 323840
loss: 0.983180820941925,grad_norm: 0.8682095762942229, iteration: 323841
loss: 0.9836158156394958,grad_norm: 0.999999808485611, iteration: 323842
loss: 0.9806541204452515,grad_norm: 0.7301642351980955, iteration: 323843
loss: 0.961030900478363,grad_norm: 0.9672499476518025, iteration: 323844
loss: 0.9751914739608765,grad_norm: 0.9829179135082121, iteration: 323845
loss: 1.0175257921218872,grad_norm: 0.8097630320657624, iteration: 323846
loss: 1.025518536567688,grad_norm: 0.82348430758668, iteration: 323847
loss: 0.9756072163581848,grad_norm: 0.9068376805736933, iteration: 323848
loss: 0.9824008345603943,grad_norm: 0.6793105682015257, iteration: 323849
loss: 0.9693849086761475,grad_norm: 0.7787853291684778, iteration: 323850
loss: 1.0304477214813232,grad_norm: 0.9999997809527899, iteration: 323851
loss: 0.9898841381072998,grad_norm: 0.9999991915562695, iteration: 323852
loss: 0.9655753970146179,grad_norm: 0.9162830149878199, iteration: 323853
loss: 0.9665123224258423,grad_norm: 0.9440388908366975, iteration: 323854
loss: 0.9895329475402832,grad_norm: 0.8848212570394536, iteration: 323855
loss: 1.0065613985061646,grad_norm: 0.8156224447362512, iteration: 323856
loss: 0.9904117584228516,grad_norm: 0.9955895429443226, iteration: 323857
loss: 0.9659662246704102,grad_norm: 0.9526581182382247, iteration: 323858
loss: 1.0035890340805054,grad_norm: 0.7678551439063076, iteration: 323859
loss: 0.9697940945625305,grad_norm: 0.9999992769124282, iteration: 323860
loss: 0.9964286684989929,grad_norm: 0.7919854963596464, iteration: 323861
loss: 0.9756803512573242,grad_norm: 0.7715809528037556, iteration: 323862
loss: 1.0072728395462036,grad_norm: 0.6192356944272125, iteration: 323863
loss: 1.0495661497116089,grad_norm: 0.9725378531095239, iteration: 323864
loss: 1.0000948905944824,grad_norm: 0.8935138607869794, iteration: 323865
loss: 0.9703242182731628,grad_norm: 0.8273842231598052, iteration: 323866
loss: 0.988114058971405,grad_norm: 0.8878103241024751, iteration: 323867
loss: 0.9911757111549377,grad_norm: 0.8977508086289765, iteration: 323868
loss: 1.0162245035171509,grad_norm: 0.7677713105797591, iteration: 323869
loss: 1.018160104751587,grad_norm: 0.8241624422171099, iteration: 323870
loss: 1.0347895622253418,grad_norm: 0.8222205428824133, iteration: 323871
loss: 1.0291670560836792,grad_norm: 0.7844264220378613, iteration: 323872
loss: 0.9931355118751526,grad_norm: 0.8776078677945233, iteration: 323873
loss: 1.0470653772354126,grad_norm: 0.9999998047893401, iteration: 323874
loss: 1.0209705829620361,grad_norm: 0.7262593716718231, iteration: 323875
loss: 0.9793865084648132,grad_norm: 0.7461469494242596, iteration: 323876
loss: 0.9657895565032959,grad_norm: 0.8701642350532144, iteration: 323877
loss: 1.0291492938995361,grad_norm: 0.9999992110798269, iteration: 323878
loss: 0.9921178221702576,grad_norm: 0.7392858403044033, iteration: 323879
loss: 0.9808411002159119,grad_norm: 0.8327104646892602, iteration: 323880
loss: 1.0301514863967896,grad_norm: 0.9999991140775703, iteration: 323881
loss: 1.050269365310669,grad_norm: 0.7482005694326767, iteration: 323882
loss: 1.0184738636016846,grad_norm: 0.7100198131413692, iteration: 323883
loss: 1.0467039346694946,grad_norm: 0.9248859621128399, iteration: 323884
loss: 1.0448287725448608,grad_norm: 0.9026788007436584, iteration: 323885
loss: 1.0013256072998047,grad_norm: 0.889795260641181, iteration: 323886
loss: 0.9921106696128845,grad_norm: 0.9472092222502576, iteration: 323887
loss: 1.013198971748352,grad_norm: 0.7504195256729509, iteration: 323888
loss: 1.0317214727401733,grad_norm: 0.8381310430223943, iteration: 323889
loss: 0.9771325588226318,grad_norm: 0.7771779053518392, iteration: 323890
loss: 1.0171287059783936,grad_norm: 0.8621870228716758, iteration: 323891
loss: 0.9739581942558289,grad_norm: 0.9493233046155002, iteration: 323892
loss: 0.982642650604248,grad_norm: 0.9999991857302054, iteration: 323893
loss: 0.9572370052337646,grad_norm: 0.724011386558445, iteration: 323894
loss: 1.0047016143798828,grad_norm: 0.8730579087976692, iteration: 323895
loss: 1.0088331699371338,grad_norm: 0.9345955611795067, iteration: 323896
loss: 1.0969507694244385,grad_norm: 0.9999993811374832, iteration: 323897
loss: 0.9803063273429871,grad_norm: 0.9999991814637867, iteration: 323898
loss: 0.9770848155021667,grad_norm: 0.7393691330736684, iteration: 323899
loss: 0.9808920621871948,grad_norm: 0.7927000574580167, iteration: 323900
loss: 0.985429048538208,grad_norm: 0.8750531236800567, iteration: 323901
loss: 1.0049619674682617,grad_norm: 0.8794417430720467, iteration: 323902
loss: 1.0164610147476196,grad_norm: 0.9084349043688849, iteration: 323903
loss: 0.9948306679725647,grad_norm: 0.7925768274663584, iteration: 323904
loss: 1.0173012018203735,grad_norm: 0.7078967275267415, iteration: 323905
loss: 0.9656041860580444,grad_norm: 0.7951277674990946, iteration: 323906
loss: 1.0381094217300415,grad_norm: 0.816664912899502, iteration: 323907
loss: 0.9847460985183716,grad_norm: 0.9999991612155771, iteration: 323908
loss: 0.9623697996139526,grad_norm: 0.8030514692666535, iteration: 323909
loss: 0.9776577949523926,grad_norm: 0.7608921119456359, iteration: 323910
loss: 0.9970884919166565,grad_norm: 0.7953693950097513, iteration: 323911
loss: 1.0206888914108276,grad_norm: 0.8166548317054944, iteration: 323912
loss: 0.9758058190345764,grad_norm: 0.7581359622839751, iteration: 323913
loss: 1.0030893087387085,grad_norm: 0.8832241344459565, iteration: 323914
loss: 0.9919783473014832,grad_norm: 0.7330354704278779, iteration: 323915
loss: 0.9852511882781982,grad_norm: 0.9801805140185187, iteration: 323916
loss: 0.9975017309188843,grad_norm: 0.8662216762431603, iteration: 323917
loss: 0.9970527291297913,grad_norm: 0.8961233031613735, iteration: 323918
loss: 0.9892822504043579,grad_norm: 0.7142989459448474, iteration: 323919
loss: 1.000969409942627,grad_norm: 0.9999989944661386, iteration: 323920
loss: 0.9919390678405762,grad_norm: 0.9999998816922067, iteration: 323921
loss: 1.019487977027893,grad_norm: 0.716123584530583, iteration: 323922
loss: 1.0207115411758423,grad_norm: 0.8533982857021977, iteration: 323923
loss: 1.028322696685791,grad_norm: 0.9999993471036324, iteration: 323924
loss: 0.9972364902496338,grad_norm: 0.9582916937038113, iteration: 323925
loss: 1.0127828121185303,grad_norm: 0.7578378932433002, iteration: 323926
loss: 0.9909085631370544,grad_norm: 0.7463218751789946, iteration: 323927
loss: 1.0345790386199951,grad_norm: 0.8204714975418044, iteration: 323928
loss: 1.049363613128662,grad_norm: 0.9999990962072526, iteration: 323929
loss: 0.9892563819885254,grad_norm: 0.7576005804175608, iteration: 323930
loss: 1.013390064239502,grad_norm: 0.7922003089372857, iteration: 323931
loss: 0.9759097099304199,grad_norm: 0.7951053061050875, iteration: 323932
loss: 0.987026572227478,grad_norm: 0.9999991638837106, iteration: 323933
loss: 1.031399130821228,grad_norm: 0.9999996925128601, iteration: 323934
loss: 1.0077893733978271,grad_norm: 0.999999180906351, iteration: 323935
loss: 1.0433528423309326,grad_norm: 0.7947505483126917, iteration: 323936
loss: 1.0228629112243652,grad_norm: 0.8661271432611596, iteration: 323937
loss: 1.00143301486969,grad_norm: 0.691904273596012, iteration: 323938
loss: 1.001212239265442,grad_norm: 0.8474630774613312, iteration: 323939
loss: 0.9858932495117188,grad_norm: 0.8934594231229515, iteration: 323940
loss: 0.9688912630081177,grad_norm: 0.890056426400237, iteration: 323941
loss: 0.9817450642585754,grad_norm: 0.7906322303150346, iteration: 323942
loss: 1.0083646774291992,grad_norm: 0.893179850617656, iteration: 323943
loss: 0.9972366094589233,grad_norm: 0.8142373735469276, iteration: 323944
loss: 0.9979380369186401,grad_norm: 0.9130955870966385, iteration: 323945
loss: 1.0345205068588257,grad_norm: 0.9713251862226453, iteration: 323946
loss: 1.002812147140503,grad_norm: 0.8561583436529323, iteration: 323947
loss: 1.0447356700897217,grad_norm: 0.9754925190887244, iteration: 323948
loss: 1.0060102939605713,grad_norm: 0.9007637177414286, iteration: 323949
loss: 0.9617568254470825,grad_norm: 0.8593143514298244, iteration: 323950
loss: 0.9994633197784424,grad_norm: 0.958926259022538, iteration: 323951
loss: 0.9682509303092957,grad_norm: 0.8874992552150772, iteration: 323952
loss: 0.9931656122207642,grad_norm: 0.7162025923663679, iteration: 323953
loss: 0.9863889813423157,grad_norm: 0.9999991416011449, iteration: 323954
loss: 0.9946000576019287,grad_norm: 0.9180805434008221, iteration: 323955
loss: 0.9974720478057861,grad_norm: 0.7865462068715391, iteration: 323956
loss: 0.9839050769805908,grad_norm: 0.8238120375912857, iteration: 323957
loss: 1.0448591709136963,grad_norm: 0.9314276879284401, iteration: 323958
loss: 0.990100085735321,grad_norm: 0.9999990791443666, iteration: 323959
loss: 1.0033862590789795,grad_norm: 0.9885455156018973, iteration: 323960
loss: 1.0148941278457642,grad_norm: 0.9999992146975436, iteration: 323961
loss: 1.0011450052261353,grad_norm: 0.9999990528034847, iteration: 323962
loss: 1.027256965637207,grad_norm: 0.944491951400049, iteration: 323963
loss: 0.993167519569397,grad_norm: 0.9318205508118388, iteration: 323964
loss: 1.1288015842437744,grad_norm: 0.9999993184959769, iteration: 323965
loss: 0.9866380095481873,grad_norm: 0.7555627969402009, iteration: 323966
loss: 1.0148112773895264,grad_norm: 0.911185097217938, iteration: 323967
loss: 0.9725000858306885,grad_norm: 0.7827773515557434, iteration: 323968
loss: 1.0148770809173584,grad_norm: 0.9817987603195093, iteration: 323969
loss: 0.9784824848175049,grad_norm: 0.7454294629784181, iteration: 323970
loss: 0.9647173881530762,grad_norm: 0.7751603096245601, iteration: 323971
loss: 1.00364351272583,grad_norm: 0.805868825537783, iteration: 323972
loss: 0.9847553968429565,grad_norm: 0.7921464661251765, iteration: 323973
loss: 0.9737014770507812,grad_norm: 0.7822318798474072, iteration: 323974
loss: 1.0298198461532593,grad_norm: 0.7181652931668462, iteration: 323975
loss: 1.002653956413269,grad_norm: 0.7453339680402609, iteration: 323976
loss: 1.0058008432388306,grad_norm: 0.9057035317072527, iteration: 323977
loss: 1.002238154411316,grad_norm: 0.8469343274336044, iteration: 323978
loss: 0.930648684501648,grad_norm: 0.8796616089224004, iteration: 323979
loss: 1.0339597463607788,grad_norm: 0.8625777058109121, iteration: 323980
loss: 1.0008376836776733,grad_norm: 0.7746844839129016, iteration: 323981
loss: 1.0351779460906982,grad_norm: 0.8235801344710373, iteration: 323982
loss: 1.0195839405059814,grad_norm: 0.9999991131592848, iteration: 323983
loss: 1.0051573514938354,grad_norm: 0.7460470042756343, iteration: 323984
loss: 0.9768745303153992,grad_norm: 0.7627836359942172, iteration: 323985
loss: 0.9768525958061218,grad_norm: 0.7415181031792832, iteration: 323986
loss: 0.9561351537704468,grad_norm: 0.9326770270710487, iteration: 323987
loss: 0.977948009967804,grad_norm: 0.9680650309212873, iteration: 323988
loss: 1.0004360675811768,grad_norm: 0.8455809085944533, iteration: 323989
loss: 0.9880964756011963,grad_norm: 0.8236553306118332, iteration: 323990
loss: 0.9966061115264893,grad_norm: 0.763518103233616, iteration: 323991
loss: 0.9790070056915283,grad_norm: 0.8309200518569352, iteration: 323992
loss: 1.015743613243103,grad_norm: 0.7579015736615974, iteration: 323993
loss: 0.9855937957763672,grad_norm: 0.9294899885072286, iteration: 323994
loss: 1.0158064365386963,grad_norm: 0.7581286227487918, iteration: 323995
loss: 0.9919059872627258,grad_norm: 0.8233066727645845, iteration: 323996
loss: 1.0091885328292847,grad_norm: 0.9236783288201891, iteration: 323997
loss: 1.0163029432296753,grad_norm: 0.7972333832754445, iteration: 323998
loss: 1.0226632356643677,grad_norm: 0.7322311172086237, iteration: 323999
loss: 1.003087043762207,grad_norm: 0.7800620383253204, iteration: 324000
loss: 0.9651538133621216,grad_norm: 0.8417052874337484, iteration: 324001
loss: 0.9990828633308411,grad_norm: 0.7420151345789264, iteration: 324002
loss: 0.9937753677368164,grad_norm: 0.8048266079942834, iteration: 324003
loss: 0.9939985871315002,grad_norm: 0.8926536813310244, iteration: 324004
loss: 1.0139470100402832,grad_norm: 0.8391750488661202, iteration: 324005
loss: 0.9892055988311768,grad_norm: 0.8280866915881144, iteration: 324006
loss: 1.0219359397888184,grad_norm: 0.8179876763900835, iteration: 324007
loss: 1.0059205293655396,grad_norm: 0.999999643277019, iteration: 324008
loss: 0.9809048771858215,grad_norm: 0.8347998920192812, iteration: 324009
loss: 1.045573353767395,grad_norm: 0.765940554958666, iteration: 324010
loss: 1.013895869255066,grad_norm: 0.8432423233377779, iteration: 324011
loss: 1.0147809982299805,grad_norm: 0.8397042197638525, iteration: 324012
loss: 1.0148532390594482,grad_norm: 0.9999991475464591, iteration: 324013
loss: 1.0110400915145874,grad_norm: 0.8651522726029826, iteration: 324014
loss: 1.0061304569244385,grad_norm: 0.7970050662369543, iteration: 324015
loss: 1.057665467262268,grad_norm: 0.7673435310412088, iteration: 324016
loss: 1.0084339380264282,grad_norm: 0.8086889078759093, iteration: 324017
loss: 0.992808997631073,grad_norm: 0.9423146375673342, iteration: 324018
loss: 0.954509437084198,grad_norm: 0.8262614951751278, iteration: 324019
loss: 0.9756495356559753,grad_norm: 0.7541893985707369, iteration: 324020
loss: 1.0032179355621338,grad_norm: 0.8529938568340469, iteration: 324021
loss: 0.9857836961746216,grad_norm: 0.8516277781410326, iteration: 324022
loss: 0.937186062335968,grad_norm: 0.9999989929129841, iteration: 324023
loss: 1.0313148498535156,grad_norm: 0.9999992294256853, iteration: 324024
loss: 1.0225133895874023,grad_norm: 0.8172687957194732, iteration: 324025
loss: 1.010410189628601,grad_norm: 0.6833499433367499, iteration: 324026
loss: 1.0175745487213135,grad_norm: 0.9999997024099009, iteration: 324027
loss: 0.9759652018547058,grad_norm: 0.8537682978398359, iteration: 324028
loss: 0.9987882971763611,grad_norm: 0.9544134472545572, iteration: 324029
loss: 1.1615557670593262,grad_norm: 0.9999997219724618, iteration: 324030
loss: 0.9989672899246216,grad_norm: 0.8385653653145781, iteration: 324031
loss: 0.9965682029724121,grad_norm: 0.950228798321295, iteration: 324032
loss: 0.998387336730957,grad_norm: 0.890290405124385, iteration: 324033
loss: 0.9868149757385254,grad_norm: 0.8832245565796218, iteration: 324034
loss: 0.981284499168396,grad_norm: 0.8843895813363167, iteration: 324035
loss: 1.0027174949645996,grad_norm: 0.8600871123935495, iteration: 324036
loss: 1.1192967891693115,grad_norm: 0.9180564429637377, iteration: 324037
loss: 1.0269771814346313,grad_norm: 0.7147844673133249, iteration: 324038
loss: 0.9760750532150269,grad_norm: 0.811632386696978, iteration: 324039
loss: 1.0044734477996826,grad_norm: 0.742660513873285, iteration: 324040
loss: 1.0142639875411987,grad_norm: 0.7806936341355697, iteration: 324041
loss: 1.0019962787628174,grad_norm: 0.9330524313683864, iteration: 324042
loss: 1.0266224145889282,grad_norm: 0.9999995118723407, iteration: 324043
loss: 1.0345290899276733,grad_norm: 0.8110583433346915, iteration: 324044
loss: 1.0295244455337524,grad_norm: 0.7937913384577904, iteration: 324045
loss: 0.9624047875404358,grad_norm: 0.7972408732471944, iteration: 324046
loss: 1.0004979372024536,grad_norm: 0.661313571359896, iteration: 324047
loss: 1.0331497192382812,grad_norm: 0.8050234032537195, iteration: 324048
loss: 1.0087014436721802,grad_norm: 0.9692620024282524, iteration: 324049
loss: 1.022006630897522,grad_norm: 0.9006375667488837, iteration: 324050
loss: 0.980523943901062,grad_norm: 0.9516313382570321, iteration: 324051
loss: 1.0025432109832764,grad_norm: 0.8414636300399199, iteration: 324052
loss: 1.020196557044983,grad_norm: 0.9999993858881372, iteration: 324053
loss: 1.0015469789505005,grad_norm: 0.8992688057808602, iteration: 324054
loss: 1.0118587017059326,grad_norm: 0.719808669169922, iteration: 324055
loss: 1.1803786754608154,grad_norm: 0.9999997159960351, iteration: 324056
loss: 1.0169284343719482,grad_norm: 0.6917014215886813, iteration: 324057
loss: 0.9908037781715393,grad_norm: 0.9534479155911815, iteration: 324058
loss: 1.0228586196899414,grad_norm: 0.9999991908830536, iteration: 324059
loss: 1.000130295753479,grad_norm: 0.8268099519015434, iteration: 324060
loss: 1.0323973894119263,grad_norm: 0.9103881955555635, iteration: 324061
loss: 1.0311903953552246,grad_norm: 0.8076716498860167, iteration: 324062
loss: 0.9920616745948792,grad_norm: 0.936000201499373, iteration: 324063
loss: 1.0075803995132446,grad_norm: 0.9586617674697909, iteration: 324064
loss: 1.016213059425354,grad_norm: 0.6854530127270071, iteration: 324065
loss: 1.022861361503601,grad_norm: 0.9396937115680748, iteration: 324066
loss: 0.9940266013145447,grad_norm: 0.9178341901464133, iteration: 324067
loss: 1.0347115993499756,grad_norm: 0.8405209257160121, iteration: 324068
loss: 1.0589083433151245,grad_norm: 0.7828951157505194, iteration: 324069
loss: 1.0081619024276733,grad_norm: 0.9533451587867715, iteration: 324070
loss: 0.9559154510498047,grad_norm: 0.8166538290109832, iteration: 324071
loss: 0.9701035022735596,grad_norm: 0.7858374888639672, iteration: 324072
loss: 1.0159714221954346,grad_norm: 0.7695288854522817, iteration: 324073
loss: 1.0122920274734497,grad_norm: 0.7200161219161719, iteration: 324074
loss: 1.024282455444336,grad_norm: 0.8888846561673697, iteration: 324075
loss: 1.0240970849990845,grad_norm: 0.8236226445419802, iteration: 324076
loss: 1.061063289642334,grad_norm: 0.9999990817233116, iteration: 324077
loss: 1.021774411201477,grad_norm: 0.9214844532994246, iteration: 324078
loss: 0.9577386379241943,grad_norm: 0.8741001855482139, iteration: 324079
loss: 1.0383613109588623,grad_norm: 0.8410431810346825, iteration: 324080
loss: 1.0013924837112427,grad_norm: 0.7944207133895596, iteration: 324081
loss: 0.9808016419410706,grad_norm: 0.7248727802217413, iteration: 324082
loss: 1.0199086666107178,grad_norm: 0.8667547966580788, iteration: 324083
loss: 0.952612042427063,grad_norm: 0.8066683102950136, iteration: 324084
loss: 0.9626826047897339,grad_norm: 0.7912426044745895, iteration: 324085
loss: 1.050038456916809,grad_norm: 0.9999996150223053, iteration: 324086
loss: 1.017610788345337,grad_norm: 0.8872932924797515, iteration: 324087
loss: 1.0178660154342651,grad_norm: 0.8624946058130633, iteration: 324088
loss: 1.0398539304733276,grad_norm: 0.8813433529700431, iteration: 324089
loss: 0.9779689311981201,grad_norm: 0.8079877561854996, iteration: 324090
loss: 1.0099056959152222,grad_norm: 0.7146178751420798, iteration: 324091
loss: 1.0121750831604004,grad_norm: 0.8917772976773731, iteration: 324092
loss: 1.0044537782669067,grad_norm: 0.8117523802843046, iteration: 324093
loss: 0.9917147159576416,grad_norm: 0.8083541327424764, iteration: 324094
loss: 1.0212498903274536,grad_norm: 0.8443011808587018, iteration: 324095
loss: 1.054513931274414,grad_norm: 0.9999994892983187, iteration: 324096
loss: 0.9909300208091736,grad_norm: 0.9812933612023894, iteration: 324097
loss: 0.9750460982322693,grad_norm: 0.8811649043990935, iteration: 324098
loss: 0.9854592680931091,grad_norm: 0.7430776100990588, iteration: 324099
loss: 1.0043212175369263,grad_norm: 0.8425175862436399, iteration: 324100
loss: 0.9861422777175903,grad_norm: 0.9999989965831546, iteration: 324101
loss: 0.9991073608398438,grad_norm: 0.938034431174136, iteration: 324102
loss: 0.9672221541404724,grad_norm: 0.928065679774714, iteration: 324103
loss: 0.9931365847587585,grad_norm: 0.824317900226422, iteration: 324104
loss: 0.9859265089035034,grad_norm: 0.8566855519432973, iteration: 324105
loss: 0.976718008518219,grad_norm: 0.787007309655921, iteration: 324106
loss: 1.0319099426269531,grad_norm: 0.7379424194989012, iteration: 324107
loss: 1.0056416988372803,grad_norm: 0.7164245032126222, iteration: 324108
loss: 1.0095903873443604,grad_norm: 0.8125785210357725, iteration: 324109
loss: 1.0036956071853638,grad_norm: 0.8306063740044665, iteration: 324110
loss: 1.0443280935287476,grad_norm: 0.8308071767478652, iteration: 324111
loss: 1.0044910907745361,grad_norm: 0.7238189488483066, iteration: 324112
loss: 0.973885178565979,grad_norm: 0.9289459854708294, iteration: 324113
loss: 1.0196855068206787,grad_norm: 0.877218100494068, iteration: 324114
loss: 0.985970675945282,grad_norm: 0.999999137427139, iteration: 324115
loss: 1.0148365497589111,grad_norm: 0.8441867061508891, iteration: 324116
loss: 1.1712082624435425,grad_norm: 0.9999991444365486, iteration: 324117
loss: 1.0196465253829956,grad_norm: 0.8378378622485425, iteration: 324118
loss: 1.021375298500061,grad_norm: 0.9633135745938474, iteration: 324119
loss: 0.9903172850608826,grad_norm: 0.7297553650374035, iteration: 324120
loss: 0.9912322163581848,grad_norm: 0.7720246263705342, iteration: 324121
loss: 0.9912262558937073,grad_norm: 0.8234079440232838, iteration: 324122
loss: 1.0130691528320312,grad_norm: 0.7862971390863418, iteration: 324123
loss: 1.022387981414795,grad_norm: 0.7678293917258383, iteration: 324124
loss: 0.9905652403831482,grad_norm: 0.722702213470053, iteration: 324125
loss: 1.0034462213516235,grad_norm: 0.8267859129518114, iteration: 324126
loss: 1.1659739017486572,grad_norm: 0.9999996801494605, iteration: 324127
loss: 1.0218638181686401,grad_norm: 0.7395787770518351, iteration: 324128
loss: 0.9977356791496277,grad_norm: 0.8869816328137723, iteration: 324129
loss: 0.9712635278701782,grad_norm: 0.7663733148426248, iteration: 324130
loss: 0.9966345429420471,grad_norm: 0.8403849906174122, iteration: 324131
loss: 1.0162838697433472,grad_norm: 0.7186695658503016, iteration: 324132
loss: 0.9955360293388367,grad_norm: 0.8711635905892647, iteration: 324133
loss: 1.0386821031570435,grad_norm: 0.9999992378347425, iteration: 324134
loss: 0.9733060598373413,grad_norm: 0.853663748654259, iteration: 324135
loss: 1.0053074359893799,grad_norm: 0.8027692776474148, iteration: 324136
loss: 1.0382225513458252,grad_norm: 0.9999991581898816, iteration: 324137
loss: 1.013257384300232,grad_norm: 0.8045497026507532, iteration: 324138
loss: 0.9861305952072144,grad_norm: 0.7643291342243882, iteration: 324139
loss: 0.9794559478759766,grad_norm: 0.930591760643499, iteration: 324140
loss: 1.0169585943222046,grad_norm: 0.7107101345655417, iteration: 324141
loss: 0.9821736216545105,grad_norm: 0.7489385531814841, iteration: 324142
loss: 1.030738353729248,grad_norm: 0.9999996726838786, iteration: 324143
loss: 0.994973361492157,grad_norm: 0.812157399862861, iteration: 324144
loss: 0.9536399245262146,grad_norm: 0.9364998252026152, iteration: 324145
loss: 0.9882426261901855,grad_norm: 0.7774103196676949, iteration: 324146
loss: 0.9940412640571594,grad_norm: 0.7978791816919547, iteration: 324147
loss: 1.0260707139968872,grad_norm: 0.732511734453284, iteration: 324148
loss: 0.9862027764320374,grad_norm: 0.6839888616015183, iteration: 324149
loss: 1.0128041505813599,grad_norm: 0.8557198243678698, iteration: 324150
loss: 0.9759187698364258,grad_norm: 0.9999989326006747, iteration: 324151
loss: 0.9923904538154602,grad_norm: 0.9045138335037163, iteration: 324152
loss: 0.9855777025222778,grad_norm: 0.8907796954408397, iteration: 324153
loss: 0.998199999332428,grad_norm: 0.9155679016175812, iteration: 324154
loss: 1.0040773153305054,grad_norm: 0.8041805297018761, iteration: 324155
loss: 1.0034748315811157,grad_norm: 0.9999992730578908, iteration: 324156
loss: 1.0244204998016357,grad_norm: 0.6953007452536404, iteration: 324157
loss: 1.0557861328125,grad_norm: 0.9999990387866898, iteration: 324158
loss: 0.9977641701698303,grad_norm: 0.7905854464080925, iteration: 324159
loss: 1.0141079425811768,grad_norm: 0.8029763844436161, iteration: 324160
loss: 1.006689429283142,grad_norm: 0.8880076760591025, iteration: 324161
loss: 1.0312631130218506,grad_norm: 0.8775605561513096, iteration: 324162
loss: 0.9919842481613159,grad_norm: 0.7885851911214391, iteration: 324163
loss: 0.9883459210395813,grad_norm: 0.9014811905715269, iteration: 324164
loss: 1.0268051624298096,grad_norm: 0.812034859666595, iteration: 324165
loss: 1.00047767162323,grad_norm: 0.9999989597876873, iteration: 324166
loss: 1.0003536939620972,grad_norm: 0.9222931341516946, iteration: 324167
loss: 1.0327521562576294,grad_norm: 0.8387801439537222, iteration: 324168
loss: 0.9980340600013733,grad_norm: 0.7771234787107968, iteration: 324169
loss: 0.9890012741088867,grad_norm: 0.946716891869474, iteration: 324170
loss: 1.0007209777832031,grad_norm: 0.8435808835490193, iteration: 324171
loss: 0.9917591214179993,grad_norm: 0.9999991485147407, iteration: 324172
loss: 1.0171763896942139,grad_norm: 0.7565271652694532, iteration: 324173
loss: 0.949548602104187,grad_norm: 0.8099235352266378, iteration: 324174
loss: 1.0087530612945557,grad_norm: 0.7862087727877429, iteration: 324175
loss: 1.0339490175247192,grad_norm: 0.9116917417294578, iteration: 324176
loss: 0.9523057341575623,grad_norm: 0.8287487138069591, iteration: 324177
loss: 0.9992344975471497,grad_norm: 0.9999990548523723, iteration: 324178
loss: 1.0621066093444824,grad_norm: 0.9850779509307199, iteration: 324179
loss: 0.994702160358429,grad_norm: 0.8325459985626974, iteration: 324180
loss: 0.9661523699760437,grad_norm: 0.7543194757412625, iteration: 324181
loss: 0.9809927344322205,grad_norm: 0.788555807020655, iteration: 324182
loss: 1.197422742843628,grad_norm: 0.9999995474029723, iteration: 324183
loss: 1.0452982187271118,grad_norm: 0.9254098988114067, iteration: 324184
loss: 1.0081804990768433,grad_norm: 0.8712503108370406, iteration: 324185
loss: 1.002914547920227,grad_norm: 0.6931085631961204, iteration: 324186
loss: 1.033657431602478,grad_norm: 0.946865938905517, iteration: 324187
loss: 0.9760478734970093,grad_norm: 0.7669564701589131, iteration: 324188
loss: 1.0109813213348389,grad_norm: 0.7567417350575609, iteration: 324189
loss: 0.9982249140739441,grad_norm: 0.7269991549956458, iteration: 324190
loss: 1.0157510042190552,grad_norm: 0.9999990667650694, iteration: 324191
loss: 0.9851630926132202,grad_norm: 0.855186237540049, iteration: 324192
loss: 0.9607828259468079,grad_norm: 0.6468158048659314, iteration: 324193
loss: 0.9593822360038757,grad_norm: 0.9999990473185998, iteration: 324194
loss: 1.0152239799499512,grad_norm: 0.9999990612417837, iteration: 324195
loss: 0.9706165194511414,grad_norm: 0.6857735743513029, iteration: 324196
loss: 0.9880452752113342,grad_norm: 0.9198779621005533, iteration: 324197
loss: 1.0429543256759644,grad_norm: 0.7354530257402018, iteration: 324198
loss: 0.9720771312713623,grad_norm: 0.8831739069012574, iteration: 324199
loss: 0.993293285369873,grad_norm: 0.7877731610488155, iteration: 324200
loss: 1.056681513786316,grad_norm: 0.861617655642526, iteration: 324201
loss: 1.0041507482528687,grad_norm: 0.8235914599402397, iteration: 324202
loss: 1.0089714527130127,grad_norm: 0.8528801679801488, iteration: 324203
loss: 1.0185531377792358,grad_norm: 0.9999991382117792, iteration: 324204
loss: 1.0039069652557373,grad_norm: 0.817091988742078, iteration: 324205
loss: 0.9914349317550659,grad_norm: 0.9999989874704965, iteration: 324206
loss: 0.9785813093185425,grad_norm: 0.817488455488875, iteration: 324207
loss: 0.9755515456199646,grad_norm: 0.8785512736144448, iteration: 324208
loss: 0.9662820100784302,grad_norm: 0.8828697511857986, iteration: 324209
loss: 1.03117036819458,grad_norm: 0.8696446555974167, iteration: 324210
loss: 1.1037284135818481,grad_norm: 0.876553203616861, iteration: 324211
loss: 0.9827179908752441,grad_norm: 0.7005192333357229, iteration: 324212
loss: 1.0142242908477783,grad_norm: 0.9999998000691819, iteration: 324213
loss: 0.9984843134880066,grad_norm: 0.6705482465861671, iteration: 324214
loss: 0.9998986124992371,grad_norm: 0.9999990009166352, iteration: 324215
loss: 0.9713720083236694,grad_norm: 0.8447006147233803, iteration: 324216
loss: 1.0195032358169556,grad_norm: 0.9999990832117008, iteration: 324217
loss: 0.9989539980888367,grad_norm: 0.9924669403717109, iteration: 324218
loss: 1.1657893657684326,grad_norm: 0.9999998628815543, iteration: 324219
loss: 0.9818093180656433,grad_norm: 0.9028681291499319, iteration: 324220
loss: 0.9712403416633606,grad_norm: 0.783092844236677, iteration: 324221
loss: 1.0317955017089844,grad_norm: 0.9210236118630822, iteration: 324222
loss: 0.9980413913726807,grad_norm: 0.9185831911196685, iteration: 324223
loss: 0.9862555265426636,grad_norm: 0.7829265292383498, iteration: 324224
loss: 1.005995512008667,grad_norm: 0.8423455607927678, iteration: 324225
loss: 0.9875543117523193,grad_norm: 0.9226095077002502, iteration: 324226
loss: 0.9925541281700134,grad_norm: 0.8238351185654732, iteration: 324227
loss: 1.0102511644363403,grad_norm: 0.7108083257308893, iteration: 324228
loss: 1.0123401880264282,grad_norm: 0.7491777380599589, iteration: 324229
loss: 0.9872730374336243,grad_norm: 0.8042397145419447, iteration: 324230
loss: 0.9852535128593445,grad_norm: 0.7825910017215348, iteration: 324231
loss: 0.956540584564209,grad_norm: 0.9207403985999243, iteration: 324232
loss: 1.045101284980774,grad_norm: 0.6874565668301429, iteration: 324233
loss: 1.005005955696106,grad_norm: 0.748795079007317, iteration: 324234
loss: 1.018320083618164,grad_norm: 0.8613690793728772, iteration: 324235
loss: 1.0774564743041992,grad_norm: 0.9851390873725517, iteration: 324236
loss: 1.2889792919158936,grad_norm: 0.9999995389492206, iteration: 324237
loss: 1.0280539989471436,grad_norm: 0.9999990212563876, iteration: 324238
loss: 1.0559730529785156,grad_norm: 0.9469699244952152, iteration: 324239
loss: 0.981528103351593,grad_norm: 0.9827631348869893, iteration: 324240
loss: 1.0406371355056763,grad_norm: 0.9392009450180671, iteration: 324241
loss: 1.0182337760925293,grad_norm: 0.845184939816371, iteration: 324242
loss: 1.0032038688659668,grad_norm: 0.9583108748406356, iteration: 324243
loss: 0.955901563167572,grad_norm: 0.9580443862914856, iteration: 324244
loss: 1.0096646547317505,grad_norm: 0.7151503727511325, iteration: 324245
loss: 0.9862247705459595,grad_norm: 0.7367644140178848, iteration: 324246
loss: 0.9701415300369263,grad_norm: 0.8745544071472993, iteration: 324247
loss: 1.0319396257400513,grad_norm: 0.8222491242319294, iteration: 324248
loss: 0.9955117702484131,grad_norm: 0.9375239650543015, iteration: 324249
loss: 0.9757198095321655,grad_norm: 0.7703700862077479, iteration: 324250
loss: 0.9861982464790344,grad_norm: 0.8836017647281831, iteration: 324251
loss: 0.9911233186721802,grad_norm: 0.7484103880715537, iteration: 324252
loss: 1.0286110639572144,grad_norm: 0.8052102553525228, iteration: 324253
loss: 1.0555016994476318,grad_norm: 0.9999997900602797, iteration: 324254
loss: 1.0186676979064941,grad_norm: 0.9815991624485036, iteration: 324255
loss: 0.9848310351371765,grad_norm: 0.9316327366804374, iteration: 324256
loss: 1.0282655954360962,grad_norm: 0.999999165820221, iteration: 324257
loss: 1.010227084159851,grad_norm: 0.9380746106115512, iteration: 324258
loss: 0.9808306097984314,grad_norm: 0.8275148799275335, iteration: 324259
loss: 1.0098412036895752,grad_norm: 0.7919434954752905, iteration: 324260
loss: 1.0264335870742798,grad_norm: 0.7481162828995498, iteration: 324261
loss: 1.0145915746688843,grad_norm: 0.8853045534131062, iteration: 324262
loss: 1.0266035795211792,grad_norm: 0.8665996667855197, iteration: 324263
loss: 1.093603491783142,grad_norm: 0.8615932194709498, iteration: 324264
loss: 1.1250941753387451,grad_norm: 0.8995901868465616, iteration: 324265
loss: 0.998356282711029,grad_norm: 0.9234481854351746, iteration: 324266
loss: 1.0350193977355957,grad_norm: 0.9999996426178843, iteration: 324267
loss: 1.02997624874115,grad_norm: 0.9643497650317315, iteration: 324268
loss: 1.0201671123504639,grad_norm: 0.844551044248209, iteration: 324269
loss: 1.0133943557739258,grad_norm: 0.9838064044796759, iteration: 324270
loss: 1.000437617301941,grad_norm: 0.7065996688226063, iteration: 324271
loss: 0.986531138420105,grad_norm: 0.843099460457918, iteration: 324272
loss: 1.0238367319107056,grad_norm: 0.7527372709684665, iteration: 324273
loss: 0.9892033338546753,grad_norm: 0.7702012992606528, iteration: 324274
loss: 0.9820740222930908,grad_norm: 0.878653880045768, iteration: 324275
loss: 1.0246244668960571,grad_norm: 0.8825298034642738, iteration: 324276
loss: 0.9923827648162842,grad_norm: 0.7910952901280979, iteration: 324277
loss: 1.0304762125015259,grad_norm: 0.8420531791186786, iteration: 324278
loss: 1.0406445264816284,grad_norm: 0.8945759456553644, iteration: 324279
loss: 1.033495545387268,grad_norm: 0.999998985998123, iteration: 324280
loss: 0.9946926236152649,grad_norm: 0.7582232072330194, iteration: 324281
loss: 1.0015449523925781,grad_norm: 0.8607936424592142, iteration: 324282
loss: 1.007903814315796,grad_norm: 0.8767252137573106, iteration: 324283
loss: 0.9830160737037659,grad_norm: 0.9369507620239842, iteration: 324284
loss: 1.0175412893295288,grad_norm: 0.970468921740262, iteration: 324285
loss: 1.059650182723999,grad_norm: 0.9999992580446269, iteration: 324286
loss: 1.0096651315689087,grad_norm: 0.8984129330761433, iteration: 324287
loss: 0.9900992512702942,grad_norm: 0.7950647707181051, iteration: 324288
loss: 1.013963222503662,grad_norm: 0.8988138134026734, iteration: 324289
loss: 0.9933803677558899,grad_norm: 0.9406293351832516, iteration: 324290
loss: 1.0026769638061523,grad_norm: 0.8631649249308453, iteration: 324291
loss: 1.0226454734802246,grad_norm: 0.9999994677765798, iteration: 324292
loss: 1.0201503038406372,grad_norm: 0.9999990520484484, iteration: 324293
loss: 0.9888118505477905,grad_norm: 0.7546723425013162, iteration: 324294
loss: 1.0000845193862915,grad_norm: 0.8116162442857038, iteration: 324295
loss: 1.0031200647354126,grad_norm: 0.9533551199858171, iteration: 324296
loss: 1.0372717380523682,grad_norm: 0.9814047599863449, iteration: 324297
loss: 1.0074973106384277,grad_norm: 0.7808926389314614, iteration: 324298
loss: 0.9904998540878296,grad_norm: 0.9999992152083937, iteration: 324299
loss: 1.0357487201690674,grad_norm: 0.9999994584544698, iteration: 324300
loss: 1.014276385307312,grad_norm: 0.8185265829311262, iteration: 324301
loss: 1.0018949508666992,grad_norm: 0.9725899345091532, iteration: 324302
loss: 1.0243926048278809,grad_norm: 0.989243745014676, iteration: 324303
loss: 1.0029420852661133,grad_norm: 0.8105321154737203, iteration: 324304
loss: 1.0276780128479004,grad_norm: 0.9824912252225934, iteration: 324305
loss: 0.9959287643432617,grad_norm: 0.8489867076176508, iteration: 324306
loss: 1.006210446357727,grad_norm: 0.8154853018370565, iteration: 324307
loss: 1.094367504119873,grad_norm: 0.9999991208259111, iteration: 324308
loss: 0.995615541934967,grad_norm: 0.6997008423655059, iteration: 324309
loss: 1.0371239185333252,grad_norm: 0.8702865531984096, iteration: 324310
loss: 1.013156533241272,grad_norm: 0.7587470851491925, iteration: 324311
loss: 0.9649326801300049,grad_norm: 0.7073032288822714, iteration: 324312
loss: 0.9848289489746094,grad_norm: 0.8443980696236729, iteration: 324313
loss: 0.9894961714744568,grad_norm: 0.7520491808088251, iteration: 324314
loss: 0.9736490249633789,grad_norm: 0.8241487394610051, iteration: 324315
loss: 0.9955761432647705,grad_norm: 0.7688394559163855, iteration: 324316
loss: 0.9760255813598633,grad_norm: 0.8734483910475451, iteration: 324317
loss: 0.9806930422782898,grad_norm: 0.7915255579452852, iteration: 324318
loss: 1.0144022703170776,grad_norm: 0.8252330706729281, iteration: 324319
loss: 0.9763825535774231,grad_norm: 0.9041855874913395, iteration: 324320
loss: 0.9912722110748291,grad_norm: 0.7988889324992932, iteration: 324321
loss: 1.001731038093567,grad_norm: 0.8825000554338318, iteration: 324322
loss: 1.0227587223052979,grad_norm: 0.9999994728441184, iteration: 324323
loss: 1.0022106170654297,grad_norm: 0.9291025516031801, iteration: 324324
loss: 1.0264476537704468,grad_norm: 0.8445556871597782, iteration: 324325
loss: 0.9696153402328491,grad_norm: 0.9563810208441371, iteration: 324326
loss: 0.9926698803901672,grad_norm: 0.6493078626708072, iteration: 324327
loss: 0.9483873248100281,grad_norm: 0.9116626590410989, iteration: 324328
loss: 1.0102440118789673,grad_norm: 0.9999991680805405, iteration: 324329
loss: 1.015593409538269,grad_norm: 0.9999992145894592, iteration: 324330
loss: 0.9797040224075317,grad_norm: 0.7981482217314408, iteration: 324331
loss: 0.9866561889648438,grad_norm: 0.9230672613207294, iteration: 324332
loss: 0.9853441715240479,grad_norm: 0.8614195804251791, iteration: 324333
loss: 1.0429404973983765,grad_norm: 0.9999995834496362, iteration: 324334
loss: 1.0118279457092285,grad_norm: 0.8283216267796317, iteration: 324335
loss: 1.0195964574813843,grad_norm: 0.9972667982110383, iteration: 324336
loss: 1.05880868434906,grad_norm: 1.0000000354894079, iteration: 324337
loss: 0.9683874249458313,grad_norm: 0.6844269930185192, iteration: 324338
loss: 1.0042166709899902,grad_norm: 0.8129159180013904, iteration: 324339
loss: 0.9897500276565552,grad_norm: 0.7653613253617293, iteration: 324340
loss: 1.0222899913787842,grad_norm: 0.8661167565321812, iteration: 324341
loss: 1.0294245481491089,grad_norm: 0.8436394713412074, iteration: 324342
loss: 1.010788083076477,grad_norm: 0.7875552437073742, iteration: 324343
loss: 1.011605143547058,grad_norm: 0.9999992835092057, iteration: 324344
loss: 0.9654408097267151,grad_norm: 0.7867719531327174, iteration: 324345
loss: 1.002395510673523,grad_norm: 0.7566665809089907, iteration: 324346
loss: 1.0198299884796143,grad_norm: 0.9005436312597578, iteration: 324347
loss: 0.9746887683868408,grad_norm: 0.7826382072643192, iteration: 324348
loss: 0.9724181294441223,grad_norm: 0.78381329933399, iteration: 324349
loss: 0.9946844577789307,grad_norm: 0.6847582686398203, iteration: 324350
loss: 1.0115323066711426,grad_norm: 0.8517003914794666, iteration: 324351
loss: 0.9802768230438232,grad_norm: 0.8708650647082697, iteration: 324352
loss: 1.0484769344329834,grad_norm: 0.9999994571241537, iteration: 324353
loss: 0.9676696062088013,grad_norm: 0.7758580460014805, iteration: 324354
loss: 0.9948522448539734,grad_norm: 0.7633444520554248, iteration: 324355
loss: 1.034490942955017,grad_norm: 0.7750683690460899, iteration: 324356
loss: 0.9845726490020752,grad_norm: 0.8071935348187385, iteration: 324357
loss: 0.97218918800354,grad_norm: 0.9333640427529512, iteration: 324358
loss: 1.0159504413604736,grad_norm: 0.7739229586252943, iteration: 324359
loss: 0.9595630168914795,grad_norm: 0.8293782535091478, iteration: 324360
loss: 1.0228615999221802,grad_norm: 0.7707937536728828, iteration: 324361
loss: 0.9821459054946899,grad_norm: 0.9998359296220265, iteration: 324362
loss: 0.9726499319076538,grad_norm: 0.7536239167179873, iteration: 324363
loss: 1.0259822607040405,grad_norm: 0.8005628031450552, iteration: 324364
loss: 1.0127805471420288,grad_norm: 0.871354825511855, iteration: 324365
loss: 1.0312001705169678,grad_norm: 0.9351814657964659, iteration: 324366
loss: 0.9920249581336975,grad_norm: 0.902820112932358, iteration: 324367
loss: 0.9844951033592224,grad_norm: 0.6547124664202679, iteration: 324368
loss: 0.9956538677215576,grad_norm: 0.8562789579979055, iteration: 324369
loss: 0.9961937069892883,grad_norm: 0.8238585123786686, iteration: 324370
loss: 0.9864213466644287,grad_norm: 0.9250272549705808, iteration: 324371
loss: 0.9961355924606323,grad_norm: 0.8988139361445354, iteration: 324372
loss: 0.9908016324043274,grad_norm: 0.7444922873331989, iteration: 324373
loss: 1.0121349096298218,grad_norm: 0.8223736839771375, iteration: 324374
loss: 1.0152835845947266,grad_norm: 0.896717588394083, iteration: 324375
loss: 0.9971617460250854,grad_norm: 0.8208072685444127, iteration: 324376
loss: 1.0161494016647339,grad_norm: 0.9530869809249911, iteration: 324377
loss: 1.034827470779419,grad_norm: 0.8759720081376854, iteration: 324378
loss: 1.0168981552124023,grad_norm: 0.945488220219337, iteration: 324379
loss: 1.0355942249298096,grad_norm: 0.8964299346407344, iteration: 324380
loss: 0.9867985248565674,grad_norm: 0.7370896134901125, iteration: 324381
loss: 0.9812789559364319,grad_norm: 0.9999992908121625, iteration: 324382
loss: 1.0089482069015503,grad_norm: 0.9097274540752751, iteration: 324383
loss: 0.9973211884498596,grad_norm: 0.8772275705143542, iteration: 324384
loss: 1.0171358585357666,grad_norm: 0.7968631197838839, iteration: 324385
loss: 1.0206024646759033,grad_norm: 0.995846096555728, iteration: 324386
loss: 0.97208172082901,grad_norm: 0.8222131080663682, iteration: 324387
loss: 1.00022554397583,grad_norm: 0.9999990627805655, iteration: 324388
loss: 1.0042638778686523,grad_norm: 0.9162627893598907, iteration: 324389
loss: 0.960060179233551,grad_norm: 0.9451936492725819, iteration: 324390
loss: 1.0372779369354248,grad_norm: 0.9999992175753193, iteration: 324391
loss: 1.0282248258590698,grad_norm: 0.9751852754457354, iteration: 324392
loss: 1.0647711753845215,grad_norm: 0.9999995588987911, iteration: 324393
loss: 0.96873939037323,grad_norm: 0.8312065252369851, iteration: 324394
loss: 1.0310537815093994,grad_norm: 0.7946068855756105, iteration: 324395
loss: 1.012648582458496,grad_norm: 0.8374074422057017, iteration: 324396
loss: 0.9558017253875732,grad_norm: 0.8673529808374363, iteration: 324397
loss: 0.9979154467582703,grad_norm: 0.7780356950484516, iteration: 324398
loss: 1.0033633708953857,grad_norm: 0.6657759768343681, iteration: 324399
loss: 1.0415884256362915,grad_norm: 0.9799896690853491, iteration: 324400
loss: 1.0155689716339111,grad_norm: 0.8403825741618234, iteration: 324401
loss: 1.0433648824691772,grad_norm: 0.8638522732959354, iteration: 324402
loss: 0.992120623588562,grad_norm: 0.8436558757365563, iteration: 324403
loss: 0.9972240924835205,grad_norm: 0.9458513195780061, iteration: 324404
loss: 1.0111790895462036,grad_norm: 0.8679947874411847, iteration: 324405
loss: 0.974431037902832,grad_norm: 0.9999991190338624, iteration: 324406
loss: 1.030259370803833,grad_norm: 0.6700575871522767, iteration: 324407
loss: 1.011484146118164,grad_norm: 0.859344957149332, iteration: 324408
loss: 1.003574252128601,grad_norm: 0.9999989619048042, iteration: 324409
loss: 0.9870015978813171,grad_norm: 0.8970031376812555, iteration: 324410
loss: 0.9786426424980164,grad_norm: 0.6815464940604691, iteration: 324411
loss: 0.9917202591896057,grad_norm: 0.9307338444690042, iteration: 324412
loss: 1.0118564367294312,grad_norm: 0.855950562855972, iteration: 324413
loss: 1.006408452987671,grad_norm: 0.6666871963715805, iteration: 324414
loss: 1.0186917781829834,grad_norm: 0.9999990504350487, iteration: 324415
loss: 1.001384973526001,grad_norm: 0.8934325786259717, iteration: 324416
loss: 0.9896799325942993,grad_norm: 0.752024518487605, iteration: 324417
loss: 1.024185061454773,grad_norm: 0.8583196471032055, iteration: 324418
loss: 0.9938216209411621,grad_norm: 0.8100901719332303, iteration: 324419
loss: 1.0286962985992432,grad_norm: 0.742568519805877, iteration: 324420
loss: 0.9644497036933899,grad_norm: 0.8362885304805887, iteration: 324421
loss: 0.9831587672233582,grad_norm: 0.7532009644239707, iteration: 324422
loss: 0.9946874976158142,grad_norm: 0.814670257022145, iteration: 324423
loss: 1.0126936435699463,grad_norm: 0.9999994990891705, iteration: 324424
loss: 0.9961672425270081,grad_norm: 0.9353299878993854, iteration: 324425
loss: 1.008934497833252,grad_norm: 0.831033474574017, iteration: 324426
loss: 1.0048496723175049,grad_norm: 0.9999997346260072, iteration: 324427
loss: 0.9935663938522339,grad_norm: 0.7560096468048763, iteration: 324428
loss: 1.0121315717697144,grad_norm: 0.9401731049118498, iteration: 324429
loss: 1.002889633178711,grad_norm: 0.8735185630917158, iteration: 324430
loss: 1.0417662858963013,grad_norm: 0.8171510654882684, iteration: 324431
loss: 0.9895820617675781,grad_norm: 0.9514603224709148, iteration: 324432
loss: 0.982011079788208,grad_norm: 0.7175229012986323, iteration: 324433
loss: 0.9995644092559814,grad_norm: 0.7421104834557473, iteration: 324434
loss: 0.9659031629562378,grad_norm: 0.7843043027706831, iteration: 324435
loss: 0.9915022253990173,grad_norm: 0.7140694742811288, iteration: 324436
loss: 0.9959895014762878,grad_norm: 0.796225294354422, iteration: 324437
loss: 1.0178354978561401,grad_norm: 0.9059607876932589, iteration: 324438
loss: 1.003176212310791,grad_norm: 0.8798794833292807, iteration: 324439
loss: 1.0119147300720215,grad_norm: 0.8295966655147226, iteration: 324440
loss: 1.0245050191879272,grad_norm: 0.7675771759141443, iteration: 324441
loss: 1.0082058906555176,grad_norm: 0.8505381149590495, iteration: 324442
loss: 0.992872416973114,grad_norm: 0.8707956637602768, iteration: 324443
loss: 1.0142149925231934,grad_norm: 0.916997565683211, iteration: 324444
loss: 0.9994648098945618,grad_norm: 0.9205257952913973, iteration: 324445
loss: 0.9890627264976501,grad_norm: 0.8356504702221624, iteration: 324446
loss: 1.0008858442306519,grad_norm: 0.9999989838726263, iteration: 324447
loss: 0.9566182494163513,grad_norm: 0.8091101363295212, iteration: 324448
loss: 1.0274549722671509,grad_norm: 0.8493225428419791, iteration: 324449
loss: 1.0031075477600098,grad_norm: 0.7889452344429645, iteration: 324450
loss: 1.0238022804260254,grad_norm: 0.9022053408411945, iteration: 324451
loss: 1.0119237899780273,grad_norm: 0.8120809061611441, iteration: 324452
loss: 1.0764644145965576,grad_norm: 0.9999997250754419, iteration: 324453
loss: 1.04291832447052,grad_norm: 0.8743273629816121, iteration: 324454
loss: 0.9913108348846436,grad_norm: 0.8478447001013161, iteration: 324455
loss: 1.0185819864273071,grad_norm: 0.7797361791298844, iteration: 324456
loss: 0.9915749430656433,grad_norm: 0.8864688748873014, iteration: 324457
loss: 0.9782706499099731,grad_norm: 0.6890253388729564, iteration: 324458
loss: 1.0023295879364014,grad_norm: 0.9091084965585964, iteration: 324459
loss: 0.965547502040863,grad_norm: 0.8315744166432818, iteration: 324460
loss: 0.9782373905181885,grad_norm: 0.6886128078310786, iteration: 324461
loss: 1.0192842483520508,grad_norm: 0.9999990186692544, iteration: 324462
loss: 1.2637702226638794,grad_norm: 0.9999994246906608, iteration: 324463
loss: 1.0276622772216797,grad_norm: 0.9999991901286714, iteration: 324464
loss: 0.9871501922607422,grad_norm: 0.7432416025941209, iteration: 324465
loss: 1.0213441848754883,grad_norm: 0.8329596205226666, iteration: 324466
loss: 0.981078565120697,grad_norm: 0.8256624808786516, iteration: 324467
loss: 1.0252902507781982,grad_norm: 0.8191330250099831, iteration: 324468
loss: 0.9650278687477112,grad_norm: 0.8112467317293446, iteration: 324469
loss: 1.0045337677001953,grad_norm: 0.9999991438073611, iteration: 324470
loss: 0.9983519911766052,grad_norm: 0.7942139393138644, iteration: 324471
loss: 1.053167462348938,grad_norm: 0.9999991341188786, iteration: 324472
loss: 0.9993571639060974,grad_norm: 0.7945192943263161, iteration: 324473
loss: 1.0051363706588745,grad_norm: 0.999999171905499, iteration: 324474
loss: 0.9583597779273987,grad_norm: 0.9868582557640797, iteration: 324475
loss: 1.2444888353347778,grad_norm: 0.9999999469886155, iteration: 324476
loss: 0.9913894534111023,grad_norm: 0.8701120933687836, iteration: 324477
loss: 1.0066027641296387,grad_norm: 0.7089747387147132, iteration: 324478
loss: 1.0267294645309448,grad_norm: 0.8268140957334157, iteration: 324479
loss: 0.9917049407958984,grad_norm: 0.8468665659768982, iteration: 324480
loss: 0.9859943389892578,grad_norm: 0.9553791743615159, iteration: 324481
loss: 1.0379564762115479,grad_norm: 0.9960567420342956, iteration: 324482
loss: 1.0000532865524292,grad_norm: 0.8502622610391714, iteration: 324483
loss: 1.0069769620895386,grad_norm: 0.8174305624749912, iteration: 324484
loss: 0.99102383852005,grad_norm: 0.8881890215159343, iteration: 324485
loss: 1.0179102420806885,grad_norm: 0.8105106821181549, iteration: 324486
loss: 0.9754733443260193,grad_norm: 0.8684207978338983, iteration: 324487
loss: 1.05795419216156,grad_norm: 0.8372028815100352, iteration: 324488
loss: 0.9958475232124329,grad_norm: 0.861801141323558, iteration: 324489
loss: 1.021798014640808,grad_norm: 0.7500208058519813, iteration: 324490
loss: 1.0062533617019653,grad_norm: 0.8376750369527849, iteration: 324491
loss: 0.986108660697937,grad_norm: 0.8297116874560627, iteration: 324492
loss: 1.0462695360183716,grad_norm: 0.9999991025821237, iteration: 324493
loss: 0.9703099131584167,grad_norm: 0.7711165805622406, iteration: 324494
loss: 0.9905356764793396,grad_norm: 0.8238641673430727, iteration: 324495
loss: 0.9755839109420776,grad_norm: 0.7661409742661561, iteration: 324496
loss: 1.0270851850509644,grad_norm: 0.8768286191690192, iteration: 324497
loss: 1.0158982276916504,grad_norm: 0.8671399365442346, iteration: 324498
loss: 0.9657902121543884,grad_norm: 0.9999991353545579, iteration: 324499
loss: 1.0081580877304077,grad_norm: 0.7882061865803756, iteration: 324500
loss: 0.9774883985519409,grad_norm: 0.7365047742730043, iteration: 324501
loss: 0.9661476612091064,grad_norm: 0.832557447954238, iteration: 324502
loss: 1.0151050090789795,grad_norm: 0.9999991958137557, iteration: 324503
loss: 1.0349310636520386,grad_norm: 0.8271544054174433, iteration: 324504
loss: 1.0201783180236816,grad_norm: 0.8747816174997269, iteration: 324505
loss: 0.9909831285476685,grad_norm: 0.8481927145605499, iteration: 324506
loss: 0.9740821123123169,grad_norm: 0.9004646209605451, iteration: 324507
loss: 1.009873867034912,grad_norm: 0.9954234589570456, iteration: 324508
loss: 0.954156219959259,grad_norm: 0.8513989647271696, iteration: 324509
loss: 0.9888597726821899,grad_norm: 0.8952763612107252, iteration: 324510
loss: 0.9886518120765686,grad_norm: 0.914515657053382, iteration: 324511
loss: 1.0178152322769165,grad_norm: 0.7377797489622283, iteration: 324512
loss: 1.0042438507080078,grad_norm: 0.9756121010797878, iteration: 324513
loss: 0.9967026710510254,grad_norm: 0.892451477560127, iteration: 324514
loss: 0.9840672612190247,grad_norm: 0.8606063721161603, iteration: 324515
loss: 0.9779361486434937,grad_norm: 0.9140115725311153, iteration: 324516
loss: 0.987223744392395,grad_norm: 0.7968194948167912, iteration: 324517
loss: 0.98308926820755,grad_norm: 0.7731352326250025, iteration: 324518
loss: 1.0067135095596313,grad_norm: 0.8961636298344724, iteration: 324519
loss: 1.0300655364990234,grad_norm: 0.9999998316513361, iteration: 324520
loss: 1.0118860006332397,grad_norm: 0.7993291198768866, iteration: 324521
loss: 0.993469774723053,grad_norm: 0.8589419495791693, iteration: 324522
loss: 0.9701880216598511,grad_norm: 0.7671700418772973, iteration: 324523
loss: 0.9871255159378052,grad_norm: 0.927975675891795, iteration: 324524
loss: 1.0082670450210571,grad_norm: 0.8711300314261914, iteration: 324525
loss: 0.9884541034698486,grad_norm: 0.8931704541733196, iteration: 324526
loss: 1.021507978439331,grad_norm: 0.8766035638702144, iteration: 324527
loss: 0.9967493414878845,grad_norm: 0.8910931774821536, iteration: 324528
loss: 1.005799412727356,grad_norm: 0.9127079916241061, iteration: 324529
loss: 1.019436001777649,grad_norm: 0.8560597711824401, iteration: 324530
loss: 1.0075563192367554,grad_norm: 0.763729540720259, iteration: 324531
loss: 0.9812436699867249,grad_norm: 0.9504218338982166, iteration: 324532
loss: 1.014653205871582,grad_norm: 0.9861961967299328, iteration: 324533
loss: 0.982715904712677,grad_norm: 0.9269813688128216, iteration: 324534
loss: 0.9900065660476685,grad_norm: 0.7969361316372607, iteration: 324535
loss: 1.007663607597351,grad_norm: 0.8846706968706113, iteration: 324536
loss: 0.9600650668144226,grad_norm: 0.999999615588291, iteration: 324537
loss: 1.0243585109710693,grad_norm: 0.7192897147849643, iteration: 324538
loss: 1.0183682441711426,grad_norm: 0.9124569213431643, iteration: 324539
loss: 1.045932412147522,grad_norm: 0.9222387316753502, iteration: 324540
loss: 0.9764483571052551,grad_norm: 0.9714305214564822, iteration: 324541
loss: 1.010972261428833,grad_norm: 0.8808390377577202, iteration: 324542
loss: 0.9784743189811707,grad_norm: 0.8730468034556432, iteration: 324543
loss: 0.9843591451644897,grad_norm: 0.7977396638807772, iteration: 324544
loss: 0.9897042512893677,grad_norm: 0.7952112050167486, iteration: 324545
loss: 1.0256285667419434,grad_norm: 0.8829608960128689, iteration: 324546
loss: 0.9869396686553955,grad_norm: 0.7326075650844517, iteration: 324547
loss: 1.0106570720672607,grad_norm: 0.7557691377044111, iteration: 324548
loss: 0.9822506308555603,grad_norm: 0.991553053746194, iteration: 324549
loss: 1.0512019395828247,grad_norm: 0.9999990342683714, iteration: 324550
loss: 1.0046296119689941,grad_norm: 0.8621655463774496, iteration: 324551
loss: 0.9904776811599731,grad_norm: 0.9096485885878755, iteration: 324552
loss: 1.0273663997650146,grad_norm: 0.9037205893698583, iteration: 324553
loss: 0.9895821213722229,grad_norm: 0.9999997356122718, iteration: 324554
loss: 0.9624892473220825,grad_norm: 0.7816985643707819, iteration: 324555
loss: 0.9871277809143066,grad_norm: 0.8323641622444692, iteration: 324556
loss: 1.0262117385864258,grad_norm: 0.7518757584765887, iteration: 324557
loss: 0.9927878379821777,grad_norm: 0.8173495871867993, iteration: 324558
loss: 0.9914997220039368,grad_norm: 0.8451575274953865, iteration: 324559
loss: 1.0070196390151978,grad_norm: 0.9484054419000919, iteration: 324560
loss: 1.0192419290542603,grad_norm: 0.9999990924005027, iteration: 324561
loss: 0.9936976432800293,grad_norm: 0.8947688504204889, iteration: 324562
loss: 1.029511570930481,grad_norm: 0.9265500397365484, iteration: 324563
loss: 0.9853202104568481,grad_norm: 0.9042693901373925, iteration: 324564
loss: 1.0040278434753418,grad_norm: 0.9236993295335381, iteration: 324565
loss: 1.0028705596923828,grad_norm: 0.8262713998652762, iteration: 324566
loss: 0.9896637201309204,grad_norm: 0.9004244544707837, iteration: 324567
loss: 1.008241057395935,grad_norm: 0.9750234639589453, iteration: 324568
loss: 1.001021146774292,grad_norm: 0.8136986972697156, iteration: 324569
loss: 1.0195114612579346,grad_norm: 0.8368180582225041, iteration: 324570
loss: 1.034346342086792,grad_norm: 0.9795915368648126, iteration: 324571
loss: 1.0009512901306152,grad_norm: 0.8364509242348975, iteration: 324572
loss: 1.0039668083190918,grad_norm: 0.8964339213253754, iteration: 324573
loss: 1.0062406063079834,grad_norm: 0.7484820943344828, iteration: 324574
loss: 0.9931236505508423,grad_norm: 0.7561383906500592, iteration: 324575
loss: 1.015741229057312,grad_norm: 0.912975250243484, iteration: 324576
loss: 1.0138614177703857,grad_norm: 0.8834752254223529, iteration: 324577
loss: 0.9929293990135193,grad_norm: 0.8392606995241851, iteration: 324578
loss: 1.0419580936431885,grad_norm: 0.9999997919706394, iteration: 324579
loss: 1.0027176141738892,grad_norm: 0.7841773234220403, iteration: 324580
loss: 1.0279324054718018,grad_norm: 0.9733430442709295, iteration: 324581
loss: 1.0138789415359497,grad_norm: 0.8133076015459195, iteration: 324582
loss: 1.0049636363983154,grad_norm: 0.9999992854731199, iteration: 324583
loss: 0.9779344797134399,grad_norm: 0.8927071739444097, iteration: 324584
loss: 1.0079501867294312,grad_norm: 0.778853455035688, iteration: 324585
loss: 1.0617287158966064,grad_norm: 0.9329673791488623, iteration: 324586
loss: 0.9810051918029785,grad_norm: 0.7423037578815093, iteration: 324587
loss: 0.9947612285614014,grad_norm: 0.7152674521634961, iteration: 324588
loss: 1.0223575830459595,grad_norm: 0.8346301947218442, iteration: 324589
loss: 0.9747767448425293,grad_norm: 0.8113385299881102, iteration: 324590
loss: 1.0112824440002441,grad_norm: 0.9999991116813738, iteration: 324591
loss: 0.9658648371696472,grad_norm: 0.7706931489859943, iteration: 324592
loss: 1.0151616334915161,grad_norm: 0.7370305611308094, iteration: 324593
loss: 0.9865655303001404,grad_norm: 0.7564553147236968, iteration: 324594
loss: 1.0457980632781982,grad_norm: 0.9282280250935988, iteration: 324595
loss: 1.0181946754455566,grad_norm: 0.865506868733443, iteration: 324596
loss: 0.9806486964225769,grad_norm: 0.6822431837932579, iteration: 324597
loss: 1.0009175539016724,grad_norm: 0.7992784433353672, iteration: 324598
loss: 1.0262103080749512,grad_norm: 0.9340303271288004, iteration: 324599
loss: 1.0197781324386597,grad_norm: 0.9204786895417265, iteration: 324600
loss: 0.9788799285888672,grad_norm: 0.8998060378806046, iteration: 324601
loss: 0.9821572303771973,grad_norm: 0.8929364007907713, iteration: 324602
loss: 1.0863481760025024,grad_norm: 0.9999998096605772, iteration: 324603
loss: 0.9763761162757874,grad_norm: 0.7769847042201521, iteration: 324604
loss: 1.006643533706665,grad_norm: 0.8062504078911332, iteration: 324605
loss: 0.9891568422317505,grad_norm: 0.7271108093741081, iteration: 324606
loss: 1.0092991590499878,grad_norm: 0.7998711153783198, iteration: 324607
loss: 1.0092302560806274,grad_norm: 0.7110177585089095, iteration: 324608
loss: 0.9756121635437012,grad_norm: 0.9555020190141579, iteration: 324609
loss: 1.0033786296844482,grad_norm: 0.8650363121164936, iteration: 324610
loss: 1.0132211446762085,grad_norm: 0.9544131557822046, iteration: 324611
loss: 0.9815180897712708,grad_norm: 0.8576885313508564, iteration: 324612
loss: 1.0041241645812988,grad_norm: 0.8523999279193858, iteration: 324613
loss: 1.0226954221725464,grad_norm: 0.8071594087869326, iteration: 324614
loss: 0.9850298166275024,grad_norm: 0.9061156579115646, iteration: 324615
loss: 1.0156607627868652,grad_norm: 0.7730150124236904, iteration: 324616
loss: 1.0077641010284424,grad_norm: 0.758881446006976, iteration: 324617
loss: 1.0032546520233154,grad_norm: 0.7594726791836289, iteration: 324618
loss: 0.9662575721740723,grad_norm: 0.7392031530353069, iteration: 324619
loss: 1.004438877105713,grad_norm: 0.8223935006401789, iteration: 324620
loss: 1.0004923343658447,grad_norm: 0.9265511714943045, iteration: 324621
loss: 1.0097872018814087,grad_norm: 0.8164400367193603, iteration: 324622
loss: 0.9592307806015015,grad_norm: 0.9202017156070419, iteration: 324623
loss: 1.169325590133667,grad_norm: 0.9999994133039698, iteration: 324624
loss: 1.0591202974319458,grad_norm: 0.9999992163492709, iteration: 324625
loss: 0.9999059438705444,grad_norm: 0.8900041219809033, iteration: 324626
loss: 0.9576321840286255,grad_norm: 0.8707569893973091, iteration: 324627
loss: 1.0316613912582397,grad_norm: 0.8919676621947475, iteration: 324628
loss: 0.9563801288604736,grad_norm: 0.658185010681211, iteration: 324629
loss: 1.0127125978469849,grad_norm: 0.750825475364194, iteration: 324630
loss: 0.9795133471488953,grad_norm: 0.9145950382219038, iteration: 324631
loss: 1.023842692375183,grad_norm: 0.8984768639662779, iteration: 324632
loss: 1.0120046138763428,grad_norm: 0.7821870540163754, iteration: 324633
loss: 0.9401284456253052,grad_norm: 0.9738609170965167, iteration: 324634
loss: 1.0399885177612305,grad_norm: 0.9999991057093497, iteration: 324635
loss: 0.9825426340103149,grad_norm: 0.8337811242219817, iteration: 324636
loss: 0.9734749794006348,grad_norm: 0.9102924173604781, iteration: 324637
loss: 0.9953915476799011,grad_norm: 0.8630214873607905, iteration: 324638
loss: 0.986799955368042,grad_norm: 1.0000000015838824, iteration: 324639
loss: 0.9944234490394592,grad_norm: 0.8677880653794597, iteration: 324640
loss: 0.9928008317947388,grad_norm: 0.7899079557163786, iteration: 324641
loss: 1.020495057106018,grad_norm: 0.8258244936189475, iteration: 324642
loss: 0.9821196794509888,grad_norm: 0.7575732193704061, iteration: 324643
loss: 0.972348153591156,grad_norm: 0.8693769056693499, iteration: 324644
loss: 1.0741347074508667,grad_norm: 0.9999997407206822, iteration: 324645
loss: 0.9822556376457214,grad_norm: 0.8721932640183726, iteration: 324646
loss: 1.0246176719665527,grad_norm: 0.945249876309552, iteration: 324647
loss: 0.9928572177886963,grad_norm: 0.9999988958371883, iteration: 324648
loss: 0.9816449880599976,grad_norm: 0.6775961989255591, iteration: 324649
loss: 0.9790229201316833,grad_norm: 0.7933439012033944, iteration: 324650
loss: 1.0051766633987427,grad_norm: 0.9535069950536865, iteration: 324651
loss: 0.990074098110199,grad_norm: 0.8474323864071934, iteration: 324652
loss: 0.9682497382164001,grad_norm: 0.7853377839700816, iteration: 324653
loss: 0.9578935503959656,grad_norm: 0.6752203526306934, iteration: 324654
loss: 0.9860378503799438,grad_norm: 0.9248202833477007, iteration: 324655
loss: 0.9909615516662598,grad_norm: 0.9008414085465474, iteration: 324656
loss: 1.0307385921478271,grad_norm: 0.9970654965316578, iteration: 324657
loss: 0.9881700873374939,grad_norm: 0.9708444587131434, iteration: 324658
loss: 0.9907138347625732,grad_norm: 0.9323189351786305, iteration: 324659
loss: 0.9879366755485535,grad_norm: 0.9999991416560885, iteration: 324660
loss: 1.0254029035568237,grad_norm: 0.7533630631047853, iteration: 324661
loss: 1.011643886566162,grad_norm: 0.7886131258687729, iteration: 324662
loss: 0.9670060873031616,grad_norm: 0.8009315681732476, iteration: 324663
loss: 1.048107624053955,grad_norm: 0.8774585723953979, iteration: 324664
loss: 0.9503033757209778,grad_norm: 0.9882273454735024, iteration: 324665
loss: 0.9745216369628906,grad_norm: 0.9741871893706141, iteration: 324666
loss: 1.0119200944900513,grad_norm: 0.6779348734999036, iteration: 324667
loss: 0.9815170764923096,grad_norm: 0.8810033861201322, iteration: 324668
loss: 0.9941992163658142,grad_norm: 0.9999991110721596, iteration: 324669
loss: 0.9639161229133606,grad_norm: 0.9999997579115221, iteration: 324670
loss: 0.9513174295425415,grad_norm: 0.7994658572536877, iteration: 324671
loss: 1.0194895267486572,grad_norm: 0.848107618736947, iteration: 324672
loss: 1.086311936378479,grad_norm: 0.9714975275173591, iteration: 324673
loss: 1.050866723060608,grad_norm: 0.9858462382820253, iteration: 324674
loss: 0.9863491058349609,grad_norm: 0.8386554834230152, iteration: 324675
loss: 0.9956967830657959,grad_norm: 0.8407250981729594, iteration: 324676
loss: 1.0110960006713867,grad_norm: 0.896100667006569, iteration: 324677
loss: 1.0044760704040527,grad_norm: 0.6926968617819432, iteration: 324678
loss: 1.0049940347671509,grad_norm: 0.8776540086726506, iteration: 324679
loss: 1.0122544765472412,grad_norm: 0.8959886691230099, iteration: 324680
loss: 0.9941070675849915,grad_norm: 0.8566541075802694, iteration: 324681
loss: 1.0007774829864502,grad_norm: 0.7767580684799391, iteration: 324682
loss: 0.9869358539581299,grad_norm: 0.8699932006656147, iteration: 324683
loss: 1.057352900505066,grad_norm: 0.9999997365613436, iteration: 324684
loss: 1.023102879524231,grad_norm: 0.9999991031939209, iteration: 324685
loss: 0.972233235836029,grad_norm: 0.9830240356163026, iteration: 324686
loss: 0.9866759777069092,grad_norm: 0.8872336681911819, iteration: 324687
loss: 0.9671087265014648,grad_norm: 0.9352942824183554, iteration: 324688
loss: 1.004778504371643,grad_norm: 0.8194315249318687, iteration: 324689
loss: 0.9781297445297241,grad_norm: 0.8029412982529853, iteration: 324690
loss: 1.044120192527771,grad_norm: 0.9999999186500227, iteration: 324691
loss: 1.0292870998382568,grad_norm: 0.7685223118404113, iteration: 324692
loss: 1.0188565254211426,grad_norm: 0.7795233836988058, iteration: 324693
loss: 1.0069869756698608,grad_norm: 0.8584958238697524, iteration: 324694
loss: 1.0204930305480957,grad_norm: 0.8717504428374068, iteration: 324695
loss: 1.0721521377563477,grad_norm: 0.8737613191136193, iteration: 324696
loss: 0.999232292175293,grad_norm: 0.9999990756573602, iteration: 324697
loss: 0.9595538973808289,grad_norm: 0.999998947234504, iteration: 324698
loss: 0.9774028658866882,grad_norm: 0.7873351544725501, iteration: 324699
loss: 1.0057506561279297,grad_norm: 0.8737682976537106, iteration: 324700
loss: 0.9840377569198608,grad_norm: 0.7088402708172906, iteration: 324701
loss: 1.022821307182312,grad_norm: 0.7475986284571512, iteration: 324702
loss: 1.0327417850494385,grad_norm: 0.9999992207591337, iteration: 324703
loss: 0.982727587223053,grad_norm: 0.8103208050899909, iteration: 324704
loss: 1.0366036891937256,grad_norm: 0.9148000239773367, iteration: 324705
loss: 1.0037057399749756,grad_norm: 0.7643603808002157, iteration: 324706
loss: 1.0052521228790283,grad_norm: 0.850148635777592, iteration: 324707
loss: 0.9634526968002319,grad_norm: 0.6718004002823393, iteration: 324708
loss: 1.0310574769973755,grad_norm: 0.9999990133884463, iteration: 324709
loss: 1.028103232383728,grad_norm: 0.9999993885693379, iteration: 324710
loss: 1.0024477243423462,grad_norm: 0.7960909057827685, iteration: 324711
loss: 0.9921079277992249,grad_norm: 0.8677047026880819, iteration: 324712
loss: 1.0244494676589966,grad_norm: 0.8797888361225789, iteration: 324713
loss: 1.0435525178909302,grad_norm: 0.9201884901681509, iteration: 324714
loss: 0.9974219799041748,grad_norm: 0.9878868852157285, iteration: 324715
loss: 0.9949791431427002,grad_norm: 0.873690716790298, iteration: 324716
loss: 0.9903072714805603,grad_norm: 0.8042903491519938, iteration: 324717
loss: 0.9637021422386169,grad_norm: 0.8250622304342402, iteration: 324718
loss: 1.0112961530685425,grad_norm: 0.8247708877546301, iteration: 324719
loss: 1.0180399417877197,grad_norm: 0.922730118382937, iteration: 324720
loss: 1.1373631954193115,grad_norm: 0.8849987270106803, iteration: 324721
loss: 1.0130856037139893,grad_norm: 0.9999990961907667, iteration: 324722
loss: 0.9977807402610779,grad_norm: 0.8439771778726078, iteration: 324723
loss: 0.9660562872886658,grad_norm: 0.7603182797201131, iteration: 324724
loss: 0.9841834306716919,grad_norm: 0.7917864309189163, iteration: 324725
loss: 1.015633225440979,grad_norm: 0.9071033391843587, iteration: 324726
loss: 0.9826341271400452,grad_norm: 0.8162728756815906, iteration: 324727
loss: 1.0061602592468262,grad_norm: 0.8528060778733316, iteration: 324728
loss: 0.9778942465782166,grad_norm: 0.8526838379776588, iteration: 324729
loss: 0.978887677192688,grad_norm: 0.9040153701260378, iteration: 324730
loss: 1.0247937440872192,grad_norm: 0.8793274254382676, iteration: 324731
loss: 1.0028083324432373,grad_norm: 0.9244872330915392, iteration: 324732
loss: 0.9918320178985596,grad_norm: 0.9999991208725512, iteration: 324733
loss: 1.0023659467697144,grad_norm: 0.8217348516040267, iteration: 324734
loss: 1.0339354276657104,grad_norm: 0.9627401524914612, iteration: 324735
loss: 0.9765908718109131,grad_norm: 0.999999084704388, iteration: 324736
loss: 1.0148506164550781,grad_norm: 0.9174154273336484, iteration: 324737
loss: 1.0312469005584717,grad_norm: 0.9988067788851851, iteration: 324738
loss: 1.0058984756469727,grad_norm: 0.8361135980231479, iteration: 324739
loss: 1.0059407949447632,grad_norm: 0.8807223504877222, iteration: 324740
loss: 0.972730278968811,grad_norm: 0.8337814740167735, iteration: 324741
loss: 0.9418303966522217,grad_norm: 0.8241459457789189, iteration: 324742
loss: 1.0239731073379517,grad_norm: 0.8476271551517935, iteration: 324743
loss: 0.9920778274536133,grad_norm: 0.8646846315379613, iteration: 324744
loss: 1.0033855438232422,grad_norm: 0.8208264452411927, iteration: 324745
loss: 0.9595159888267517,grad_norm: 0.8109964196762911, iteration: 324746
loss: 1.0131210088729858,grad_norm: 0.7813721244685523, iteration: 324747
loss: 0.9951372146606445,grad_norm: 0.8733259529953544, iteration: 324748
loss: 1.2887144088745117,grad_norm: 0.999999895962572, iteration: 324749
loss: 1.0299254655838013,grad_norm: 0.7980053202458824, iteration: 324750
loss: 1.0227080583572388,grad_norm: 0.7762100725046637, iteration: 324751
loss: 1.0060991048812866,grad_norm: 0.9008507693941569, iteration: 324752
loss: 0.9960219264030457,grad_norm: 0.9501950962801067, iteration: 324753
loss: 1.0078457593917847,grad_norm: 0.8817200966499308, iteration: 324754
loss: 0.9963478446006775,grad_norm: 0.7270938849559425, iteration: 324755
loss: 1.006765365600586,grad_norm: 0.652312174807221, iteration: 324756
loss: 1.0453757047653198,grad_norm: 0.999999062772658, iteration: 324757
loss: 0.9764946103096008,grad_norm: 0.9872719882043148, iteration: 324758
loss: 1.0345454216003418,grad_norm: 0.9999990348601843, iteration: 324759
loss: 1.0084922313690186,grad_norm: 0.8562660143428207, iteration: 324760
loss: 1.031388521194458,grad_norm: 0.8725115480021551, iteration: 324761
loss: 1.041458249092102,grad_norm: 0.7476048130458243, iteration: 324762
loss: 1.026682734489441,grad_norm: 0.999999650794938, iteration: 324763
loss: 1.0614218711853027,grad_norm: 0.8567694367464824, iteration: 324764
loss: 0.9765894412994385,grad_norm: 0.8922836400421358, iteration: 324765
loss: 1.1040908098220825,grad_norm: 0.91984030000301, iteration: 324766
loss: 1.0042986869812012,grad_norm: 0.7380177929160333, iteration: 324767
loss: 0.9866054058074951,grad_norm: 0.8396741676786877, iteration: 324768
loss: 1.0146253108978271,grad_norm: 0.9999990887220779, iteration: 324769
loss: 0.9783962368965149,grad_norm: 0.850121305307584, iteration: 324770
loss: 1.0094290971755981,grad_norm: 0.7997185602448007, iteration: 324771
loss: 0.9794560670852661,grad_norm: 0.8321704360807888, iteration: 324772
loss: 0.9816828370094299,grad_norm: 0.9961581801776431, iteration: 324773
loss: 1.0010743141174316,grad_norm: 0.9226877239302897, iteration: 324774
loss: 0.9807084202766418,grad_norm: 0.9999989377315882, iteration: 324775
loss: 0.9813793301582336,grad_norm: 0.8191052690451626, iteration: 324776
loss: 1.016270637512207,grad_norm: 0.8675639028737515, iteration: 324777
loss: 1.0122135877609253,grad_norm: 0.6932150995928039, iteration: 324778
loss: 0.9949821829795837,grad_norm: 0.8951972326510731, iteration: 324779
loss: 1.005672812461853,grad_norm: 0.7235755606503601, iteration: 324780
loss: 0.9855087995529175,grad_norm: 0.8343664467463472, iteration: 324781
loss: 0.9925979971885681,grad_norm: 0.8477746591274249, iteration: 324782
loss: 1.0116316080093384,grad_norm: 0.9151229023131437, iteration: 324783
loss: 0.9929183125495911,grad_norm: 0.7962518165304948, iteration: 324784
loss: 0.9488608837127686,grad_norm: 0.8221159673269126, iteration: 324785
loss: 1.0114269256591797,grad_norm: 0.9326838793248032, iteration: 324786
loss: 0.9861320853233337,grad_norm: 0.7655256624587123, iteration: 324787
loss: 0.9991077184677124,grad_norm: 0.8486757113765707, iteration: 324788
loss: 1.0113133192062378,grad_norm: 0.8428688878606378, iteration: 324789
loss: 0.9768445491790771,grad_norm: 0.7581219609279263, iteration: 324790
loss: 0.9796287417411804,grad_norm: 0.9325605012674371, iteration: 324791
loss: 0.9954475164413452,grad_norm: 0.6304458676579355, iteration: 324792
loss: 1.0217143297195435,grad_norm: 0.8980929626275511, iteration: 324793
loss: 0.9865899682044983,grad_norm: 0.9999990852847129, iteration: 324794
loss: 0.9854039549827576,grad_norm: 0.9463865585316984, iteration: 324795
loss: 1.0119725465774536,grad_norm: 0.8285365229661198, iteration: 324796
loss: 1.0270074605941772,grad_norm: 0.7315119984672243, iteration: 324797
loss: 1.0069458484649658,grad_norm: 0.8947075012140345, iteration: 324798
loss: 0.9625969529151917,grad_norm: 0.8883770544793681, iteration: 324799
loss: 1.0308963060379028,grad_norm: 0.7737229692409764, iteration: 324800
loss: 0.9502195715904236,grad_norm: 0.786903241376118, iteration: 324801
loss: 0.9781288504600525,grad_norm: 0.650211465459269, iteration: 324802
loss: 0.9610280990600586,grad_norm: 0.966606391213151, iteration: 324803
loss: 1.0133590698242188,grad_norm: 0.7977859673168826, iteration: 324804
loss: 1.0085086822509766,grad_norm: 0.8640474014885202, iteration: 324805
loss: 1.008102297782898,grad_norm: 0.9074152843274417, iteration: 324806
loss: 1.0050296783447266,grad_norm: 0.8351449043299014, iteration: 324807
loss: 1.0263409614562988,grad_norm: 0.9306919785332242, iteration: 324808
loss: 0.9671649932861328,grad_norm: 0.9043971228031171, iteration: 324809
loss: 0.9891111850738525,grad_norm: 0.7312886242959356, iteration: 324810
loss: 1.010895848274231,grad_norm: 0.9144796155363436, iteration: 324811
loss: 1.0230222940444946,grad_norm: 0.8725620319181061, iteration: 324812
loss: 1.0591708421707153,grad_norm: 0.9999991808850065, iteration: 324813
loss: 0.9748361706733704,grad_norm: 0.9451205381170168, iteration: 324814
loss: 0.9965449571609497,grad_norm: 0.9999995041871482, iteration: 324815
loss: 1.0034364461898804,grad_norm: 0.7839241207955842, iteration: 324816
loss: 1.0147570371627808,grad_norm: 0.9999997794919979, iteration: 324817
loss: 1.0309845209121704,grad_norm: 0.9999996052893516, iteration: 324818
loss: 0.9337745308876038,grad_norm: 0.9684246777075999, iteration: 324819
loss: 1.0512245893478394,grad_norm: 0.7952718782704443, iteration: 324820
loss: 0.9767467379570007,grad_norm: 0.8671480605784884, iteration: 324821
loss: 1.0160719156265259,grad_norm: 0.9999990605503173, iteration: 324822
loss: 0.982153594493866,grad_norm: 0.9999990975366061, iteration: 324823
loss: 0.9573794007301331,grad_norm: 0.8104661125058473, iteration: 324824
loss: 1.050804853439331,grad_norm: 0.9999991999264107, iteration: 324825
loss: 1.0038293600082397,grad_norm: 0.8346600057218966, iteration: 324826
loss: 0.9837378859519958,grad_norm: 0.882883982566804, iteration: 324827
loss: 1.0061007738113403,grad_norm: 0.9985018235822243, iteration: 324828
loss: 1.023158311843872,grad_norm: 0.8343055286102979, iteration: 324829
loss: 1.009440541267395,grad_norm: 0.9309466812290933, iteration: 324830
loss: 1.0406148433685303,grad_norm: 0.9999990125276198, iteration: 324831
loss: 0.9779629707336426,grad_norm: 0.7659727036488063, iteration: 324832
loss: 0.9854109287261963,grad_norm: 0.7775116662200133, iteration: 324833
loss: 0.9724321961402893,grad_norm: 0.7804422159720804, iteration: 324834
loss: 1.02359139919281,grad_norm: 0.786825347250086, iteration: 324835
loss: 1.0035223960876465,grad_norm: 0.8621313793569507, iteration: 324836
loss: 1.014701008796692,grad_norm: 0.7351608731656049, iteration: 324837
loss: 1.0103275775909424,grad_norm: 0.8878931013306555, iteration: 324838
loss: 1.0232911109924316,grad_norm: 0.8148332517857301, iteration: 324839
loss: 0.9957865476608276,grad_norm: 0.7468545871072781, iteration: 324840
loss: 1.0008147954940796,grad_norm: 0.8611075353148715, iteration: 324841
loss: 1.0277208089828491,grad_norm: 0.8354794115845127, iteration: 324842
loss: 1.0011608600616455,grad_norm: 0.7473400032274695, iteration: 324843
loss: 1.0441100597381592,grad_norm: 0.9799097536480387, iteration: 324844
loss: 1.0136845111846924,grad_norm: 0.7233313411793041, iteration: 324845
loss: 0.9864556193351746,grad_norm: 0.9999992200586455, iteration: 324846
loss: 0.9835498929023743,grad_norm: 0.8197865662299008, iteration: 324847
loss: 1.001565933227539,grad_norm: 0.675688756112017, iteration: 324848
loss: 1.0124390125274658,grad_norm: 0.9999992700966254, iteration: 324849
loss: 1.0092921257019043,grad_norm: 0.9999992781653425, iteration: 324850
loss: 1.0190824270248413,grad_norm: 0.8327201658046454, iteration: 324851
loss: 1.0106598138809204,grad_norm: 0.7578657715622055, iteration: 324852
loss: 1.0087376832962036,grad_norm: 0.9748653452962606, iteration: 324853
loss: 0.9889000058174133,grad_norm: 0.989473851275276, iteration: 324854
loss: 0.9824457764625549,grad_norm: 0.99764177906258, iteration: 324855
loss: 0.9678523540496826,grad_norm: 0.9376536116407496, iteration: 324856
loss: 0.9923136234283447,grad_norm: 0.9999992159523734, iteration: 324857
loss: 0.9571894407272339,grad_norm: 0.8801184878478618, iteration: 324858
loss: 1.0089657306671143,grad_norm: 0.8554099562194973, iteration: 324859
loss: 0.9874362945556641,grad_norm: 0.8845487225056347, iteration: 324860
loss: 0.9805415868759155,grad_norm: 0.8312538088853664, iteration: 324861
loss: 1.0072816610336304,grad_norm: 0.7523927377970326, iteration: 324862
loss: 0.9684155583381653,grad_norm: 0.8291002496669838, iteration: 324863
loss: 1.0240153074264526,grad_norm: 0.8605930900976283, iteration: 324864
loss: 0.9678317904472351,grad_norm: 0.8112927798597597, iteration: 324865
loss: 0.9880326390266418,grad_norm: 0.6995848958679824, iteration: 324866
loss: 1.0202794075012207,grad_norm: 0.872912249097362, iteration: 324867
loss: 0.9873311519622803,grad_norm: 0.8858988912629825, iteration: 324868
loss: 1.025272250175476,grad_norm: 0.8953033703354336, iteration: 324869
loss: 1.0217653512954712,grad_norm: 0.7336064850614537, iteration: 324870
loss: 1.065497875213623,grad_norm: 0.9999992356816441, iteration: 324871
loss: 0.990239679813385,grad_norm: 0.835338005502015, iteration: 324872
loss: 0.9648357033729553,grad_norm: 0.999999495926768, iteration: 324873
loss: 1.0117872953414917,grad_norm: 0.7750789800509479, iteration: 324874
loss: 0.9548875093460083,grad_norm: 0.9522620656057852, iteration: 324875
loss: 0.9935168027877808,grad_norm: 0.893080491019191, iteration: 324876
loss: 1.0237911939620972,grad_norm: 0.7199381374461922, iteration: 324877
loss: 1.009831190109253,grad_norm: 0.8983384352007138, iteration: 324878
loss: 1.015352487564087,grad_norm: 0.8261533613878485, iteration: 324879
loss: 1.0113346576690674,grad_norm: 0.8033871000070115, iteration: 324880
loss: 0.9461072683334351,grad_norm: 0.8866601057957096, iteration: 324881
loss: 0.9990304112434387,grad_norm: 0.999999111512033, iteration: 324882
loss: 1.0207247734069824,grad_norm: 0.6328077874543008, iteration: 324883
loss: 1.0090343952178955,grad_norm: 0.9379905160957941, iteration: 324884
loss: 0.9831365942955017,grad_norm: 0.7624339699627724, iteration: 324885
loss: 1.0459833145141602,grad_norm: 0.9999998717219669, iteration: 324886
loss: 1.0109270811080933,grad_norm: 0.8323142802716474, iteration: 324887
loss: 1.053787112236023,grad_norm: 0.967111785534252, iteration: 324888
loss: 0.989810585975647,grad_norm: 0.7415177996531167, iteration: 324889
loss: 1.023816466331482,grad_norm: 0.9999996434594223, iteration: 324890
loss: 1.0081740617752075,grad_norm: 0.9999992172694612, iteration: 324891
loss: 0.9789537191390991,grad_norm: 0.7775679646858327, iteration: 324892
loss: 0.9922751784324646,grad_norm: 0.7846924420629703, iteration: 324893
loss: 0.9791306257247925,grad_norm: 0.8513993363768976, iteration: 324894
loss: 0.9656791090965271,grad_norm: 0.8121516985157485, iteration: 324895
loss: 0.9507001638412476,grad_norm: 0.8491313414497509, iteration: 324896
loss: 0.9826704859733582,grad_norm: 0.8470200267529564, iteration: 324897
loss: 1.0363001823425293,grad_norm: 0.9999995047984541, iteration: 324898
loss: 1.0093683004379272,grad_norm: 0.9117959157621668, iteration: 324899
loss: 1.0197030305862427,grad_norm: 0.9047425903681117, iteration: 324900
loss: 0.9818071722984314,grad_norm: 0.8002371747160434, iteration: 324901
loss: 0.9959607124328613,grad_norm: 0.7613390254025836, iteration: 324902
loss: 1.1085861921310425,grad_norm: 0.9999996916575709, iteration: 324903
loss: 0.9651278257369995,grad_norm: 0.781809488215404, iteration: 324904
loss: 0.9663026928901672,grad_norm: 0.9999990782779516, iteration: 324905
loss: 1.0108039379119873,grad_norm: 0.7370632596526809, iteration: 324906
loss: 0.9979650378227234,grad_norm: 0.8076254598073247, iteration: 324907
loss: 1.011604905128479,grad_norm: 0.9542872792709315, iteration: 324908
loss: 0.9536940455436707,grad_norm: 0.9002958874514011, iteration: 324909
loss: 0.9748868346214294,grad_norm: 0.7907110585651761, iteration: 324910
loss: 1.0047049522399902,grad_norm: 0.7853971445738265, iteration: 324911
loss: 0.9828706979751587,grad_norm: 0.7303729269454899, iteration: 324912
loss: 1.0021734237670898,grad_norm: 0.7045215991995353, iteration: 324913
loss: 1.0480345487594604,grad_norm: 0.9001017737468799, iteration: 324914
loss: 0.9791024327278137,grad_norm: 0.9783615165526868, iteration: 324915
loss: 1.0013505220413208,grad_norm: 0.9101680917740239, iteration: 324916
loss: 1.0272976160049438,grad_norm: 0.9466112497193815, iteration: 324917
loss: 1.0081673860549927,grad_norm: 0.8396981422150397, iteration: 324918
loss: 1.0277830362319946,grad_norm: 0.9999989841845708, iteration: 324919
loss: 0.9977281093597412,grad_norm: 0.8022714016731833, iteration: 324920
loss: 1.0262811183929443,grad_norm: 0.9999990013692421, iteration: 324921
loss: 1.0028350353240967,grad_norm: 0.7468968285781217, iteration: 324922
loss: 1.0374789237976074,grad_norm: 0.9999990468578667, iteration: 324923
loss: 0.9967101812362671,grad_norm: 0.7890242014232308, iteration: 324924
loss: 1.0157934427261353,grad_norm: 0.9999990525394183, iteration: 324925
loss: 1.0179582834243774,grad_norm: 0.9999991567865283, iteration: 324926
loss: 0.9985848069190979,grad_norm: 0.9999990029842978, iteration: 324927
loss: 0.9878347516059875,grad_norm: 0.8707569665563331, iteration: 324928
loss: 0.9957637190818787,grad_norm: 0.8130989787657054, iteration: 324929
loss: 0.999021053314209,grad_norm: 0.953891135802416, iteration: 324930
loss: 0.9740695953369141,grad_norm: 0.8127050638729765, iteration: 324931
loss: 0.9737359881401062,grad_norm: 0.7924448625992657, iteration: 324932
loss: 1.024094820022583,grad_norm: 0.6798185901322134, iteration: 324933
loss: 0.9811450242996216,grad_norm: 0.9999994926077342, iteration: 324934
loss: 1.015540599822998,grad_norm: 0.7950591689725038, iteration: 324935
loss: 1.0244723558425903,grad_norm: 0.8341975904267603, iteration: 324936
loss: 0.9852687120437622,grad_norm: 0.973457842642987, iteration: 324937
loss: 0.9636009335517883,grad_norm: 0.9999990373131672, iteration: 324938
loss: 1.0019780397415161,grad_norm: 0.9999991125830641, iteration: 324939
loss: 0.9909685850143433,grad_norm: 0.7998281155370213, iteration: 324940
loss: 0.969492495059967,grad_norm: 0.7796769131681411, iteration: 324941
loss: 1.0177699327468872,grad_norm: 0.8887144326811959, iteration: 324942
loss: 1.0279555320739746,grad_norm: 0.8509024487368878, iteration: 324943
loss: 1.0262954235076904,grad_norm: 0.9999991511778575, iteration: 324944
loss: 0.9792929887771606,grad_norm: 0.7752336179732592, iteration: 324945
loss: 1.0547839403152466,grad_norm: 0.9999990826708711, iteration: 324946
loss: 0.9836445450782776,grad_norm: 0.7673061257285803, iteration: 324947
loss: 1.1397448778152466,grad_norm: 0.9999999963637256, iteration: 324948
loss: 0.9827322959899902,grad_norm: 0.7760866963093255, iteration: 324949
loss: 0.9983665347099304,grad_norm: 0.8978441843544002, iteration: 324950
loss: 1.0231094360351562,grad_norm: 0.8526488978207424, iteration: 324951
loss: 1.0905507802963257,grad_norm: 0.9999993581024831, iteration: 324952
loss: 1.006460428237915,grad_norm: 0.8434042631678103, iteration: 324953
loss: 0.9612390398979187,grad_norm: 0.7285312311592732, iteration: 324954
loss: 1.0383597612380981,grad_norm: 0.7491143890025924, iteration: 324955
loss: 0.9792563319206238,grad_norm: 0.7619449725008525, iteration: 324956
loss: 1.011248230934143,grad_norm: 0.8411919425079027, iteration: 324957
loss: 0.9805737733840942,grad_norm: 0.8498700185979965, iteration: 324958
loss: 0.9919558167457581,grad_norm: 0.8710418621942094, iteration: 324959
loss: 0.9822835922241211,grad_norm: 0.9738199001736892, iteration: 324960
loss: 1.0098237991333008,grad_norm: 0.9999992183949822, iteration: 324961
loss: 1.0086032152175903,grad_norm: 0.7708905511142143, iteration: 324962
loss: 1.012978434562683,grad_norm: 0.8159927827519692, iteration: 324963
loss: 0.9729817509651184,grad_norm: 0.8307611322458962, iteration: 324964
loss: 1.0318031311035156,grad_norm: 0.9999991529973132, iteration: 324965
loss: 0.961563229560852,grad_norm: 0.8920847927056603, iteration: 324966
loss: 1.0173135995864868,grad_norm: 0.713131647870316, iteration: 324967
loss: 0.9448547959327698,grad_norm: 0.8933449490568959, iteration: 324968
loss: 0.962759256362915,grad_norm: 0.9999991511134668, iteration: 324969
loss: 1.0197622776031494,grad_norm: 0.9999989471080788, iteration: 324970
loss: 0.9860347509384155,grad_norm: 0.7986393123920766, iteration: 324971
loss: 1.1161245107650757,grad_norm: 0.9999992186766078, iteration: 324972
loss: 0.9947153925895691,grad_norm: 0.7297576232557429, iteration: 324973
loss: 1.0114511251449585,grad_norm: 0.9408947806564314, iteration: 324974
loss: 1.0631572008132935,grad_norm: 0.9549729534300513, iteration: 324975
loss: 1.0744907855987549,grad_norm: 0.9999991775322655, iteration: 324976
loss: 1.0150443315505981,grad_norm: 0.843786480958007, iteration: 324977
loss: 1.008792757987976,grad_norm: 0.8559296940957227, iteration: 324978
loss: 1.0033540725708008,grad_norm: 0.7381758419471807, iteration: 324979
loss: 1.0440068244934082,grad_norm: 0.9013277080968711, iteration: 324980
loss: 0.9972543716430664,grad_norm: 0.9999991079454612, iteration: 324981
loss: 1.0019407272338867,grad_norm: 0.9999992662015934, iteration: 324982
loss: 1.0594028234481812,grad_norm: 0.9999996709219391, iteration: 324983
loss: 0.9768426418304443,grad_norm: 0.7864854254310084, iteration: 324984
loss: 0.9942758083343506,grad_norm: 0.811672383773226, iteration: 324985
loss: 0.9697983264923096,grad_norm: 0.8781043293998415, iteration: 324986
loss: 1.0207479000091553,grad_norm: 0.9471371968641127, iteration: 324987
loss: 0.975986123085022,grad_norm: 0.9454535711112342, iteration: 324988
loss: 0.9907092452049255,grad_norm: 0.9999999846825477, iteration: 324989
loss: 1.0955431461334229,grad_norm: 1.0000000467962027, iteration: 324990
loss: 1.0046480894088745,grad_norm: 0.8775906604289425, iteration: 324991
loss: 1.037993311882019,grad_norm: 0.9992810535868172, iteration: 324992
loss: 0.9660688638687134,grad_norm: 0.7830828114493206, iteration: 324993
loss: 0.9897204637527466,grad_norm: 0.8578089265721441, iteration: 324994
loss: 1.2320306301116943,grad_norm: 0.9999998532445067, iteration: 324995
loss: 1.012149691581726,grad_norm: 0.9999992742018416, iteration: 324996
loss: 1.0723466873168945,grad_norm: 0.9999995777470874, iteration: 324997
loss: 1.0032329559326172,grad_norm: 0.999999080403772, iteration: 324998
loss: 0.9971047639846802,grad_norm: 0.9999991335644385, iteration: 324999
loss: 0.9877697229385376,grad_norm: 0.9999990239748948, iteration: 325000
loss: 1.0368682146072388,grad_norm: 0.8984882957381426, iteration: 325001
loss: 1.0478719472885132,grad_norm: 0.837734633871601, iteration: 325002
loss: 0.9992406368255615,grad_norm: 0.7967799543513849, iteration: 325003
loss: 1.0411592721939087,grad_norm: 0.746842726912025, iteration: 325004
loss: 0.9892175793647766,grad_norm: 0.9756944970866069, iteration: 325005
loss: 1.0274627208709717,grad_norm: 0.9999989224423839, iteration: 325006
loss: 0.9923478960990906,grad_norm: 0.8298188025867135, iteration: 325007
loss: 0.9757757782936096,grad_norm: 0.8605734985420294, iteration: 325008
loss: 0.9676074981689453,grad_norm: 0.8248394722867294, iteration: 325009
loss: 1.0072124004364014,grad_norm: 0.9920386397529412, iteration: 325010
loss: 0.9971271753311157,grad_norm: 0.9179117211607715, iteration: 325011
loss: 0.9908679723739624,grad_norm: 0.8899761109037017, iteration: 325012
loss: 1.01390540599823,grad_norm: 0.9999991473589845, iteration: 325013
loss: 0.9932436347007751,grad_norm: 0.9999989906666249, iteration: 325014
loss: 0.9799477458000183,grad_norm: 0.9999990543597439, iteration: 325015
loss: 1.018149971961975,grad_norm: 0.8498808085481709, iteration: 325016
loss: 0.9819422960281372,grad_norm: 0.8196313286011558, iteration: 325017
loss: 0.9856796264648438,grad_norm: 0.7617153022524277, iteration: 325018
loss: 0.993435263633728,grad_norm: 0.9646329457146823, iteration: 325019
loss: 0.9827770590782166,grad_norm: 0.7378952598007009, iteration: 325020
loss: 0.9958183169364929,grad_norm: 0.8283080407124592, iteration: 325021
loss: 0.9737085700035095,grad_norm: 0.7756068296777693, iteration: 325022
loss: 0.9613484740257263,grad_norm: 0.9999994871674632, iteration: 325023
loss: 1.0156595706939697,grad_norm: 0.9281031333963718, iteration: 325024
loss: 1.0139535665512085,grad_norm: 0.9999994943151317, iteration: 325025
loss: 0.9948925375938416,grad_norm: 0.8612130596492794, iteration: 325026
loss: 0.9806303977966309,grad_norm: 0.6927197043298136, iteration: 325027
loss: 0.9806845784187317,grad_norm: 0.9324923923906232, iteration: 325028
loss: 1.0103809833526611,grad_norm: 0.9999991472918488, iteration: 325029
loss: 1.0247608423233032,grad_norm: 0.7145659561352807, iteration: 325030
loss: 0.977468729019165,grad_norm: 0.8545061102627858, iteration: 325031
loss: 0.9632753729820251,grad_norm: 0.9837837819741134, iteration: 325032
loss: 0.9589472413063049,grad_norm: 0.9619307511667553, iteration: 325033
loss: 1.0479443073272705,grad_norm: 0.9021313938919594, iteration: 325034
loss: 1.02276611328125,grad_norm: 0.9999993514192388, iteration: 325035
loss: 1.043845295906067,grad_norm: 0.7746908805967658, iteration: 325036
loss: 1.0380192995071411,grad_norm: 0.7832709069805288, iteration: 325037
loss: 1.0309994220733643,grad_norm: 0.955360635356616, iteration: 325038
loss: 0.9690556526184082,grad_norm: 0.8214502044270268, iteration: 325039
loss: 1.0050143003463745,grad_norm: 0.8389947259146032, iteration: 325040
loss: 0.9901465177536011,grad_norm: 0.8858286061133576, iteration: 325041
loss: 1.0225958824157715,grad_norm: 0.8349944744831845, iteration: 325042
loss: 1.0167231559753418,grad_norm: 0.8268093863012033, iteration: 325043
loss: 0.98785001039505,grad_norm: 0.8980824065530966, iteration: 325044
loss: 1.0068657398223877,grad_norm: 0.7999688619919565, iteration: 325045
loss: 1.0133002996444702,grad_norm: 0.9999997082269763, iteration: 325046
loss: 0.96296226978302,grad_norm: 0.6649477801169307, iteration: 325047
loss: 1.020379900932312,grad_norm: 0.7893458151059997, iteration: 325048
loss: 1.117639422416687,grad_norm: 0.9999994416413869, iteration: 325049
loss: 1.0436944961547852,grad_norm: 0.8282627280710265, iteration: 325050
loss: 1.000235915184021,grad_norm: 0.861241269739922, iteration: 325051
loss: 1.004774570465088,grad_norm: 0.8275783860557188, iteration: 325052
loss: 0.9741588234901428,grad_norm: 0.7975330454125507, iteration: 325053
loss: 1.0141136646270752,grad_norm: 0.9999991319668209, iteration: 325054
loss: 0.9996621608734131,grad_norm: 0.8236800458441805, iteration: 325055
loss: 1.0124237537384033,grad_norm: 0.9999989846977738, iteration: 325056
loss: 1.0281027555465698,grad_norm: 0.8715423133094274, iteration: 325057
loss: 0.9860531091690063,grad_norm: 0.7711125129667714, iteration: 325058
loss: 1.0069994926452637,grad_norm: 0.8539287272962142, iteration: 325059
loss: 1.006777048110962,grad_norm: 0.871094108354996, iteration: 325060
loss: 0.9856178164482117,grad_norm: 0.9999991864274195, iteration: 325061
loss: 0.9856389760971069,grad_norm: 0.8647087267029286, iteration: 325062
loss: 1.0224318504333496,grad_norm: 0.7999902082861251, iteration: 325063
loss: 1.0222911834716797,grad_norm: 0.8544331036414665, iteration: 325064
loss: 0.9929439425468445,grad_norm: 0.80355879269821, iteration: 325065
loss: 0.9926251173019409,grad_norm: 0.824971666322603, iteration: 325066
loss: 0.9842326641082764,grad_norm: 0.7143047940857558, iteration: 325067
loss: 0.9716390371322632,grad_norm: 0.8369373379846383, iteration: 325068
loss: 0.985416054725647,grad_norm: 0.801857069557536, iteration: 325069
loss: 1.1231330633163452,grad_norm: 0.9999990602711546, iteration: 325070
loss: 0.990180253982544,grad_norm: 0.7164926725398395, iteration: 325071
loss: 0.9730411171913147,grad_norm: 0.9999991854166805, iteration: 325072
loss: 0.9832956790924072,grad_norm: 0.9999999071192438, iteration: 325073
loss: 1.0077168941497803,grad_norm: 0.9563043080704517, iteration: 325074
loss: 1.0343729257583618,grad_norm: 0.8024712082727288, iteration: 325075
loss: 1.010726809501648,grad_norm: 0.9999991395128902, iteration: 325076
loss: 0.9842797517776489,grad_norm: 0.8768917620706259, iteration: 325077
loss: 0.9773539304733276,grad_norm: 0.7756192804864543, iteration: 325078
loss: 1.007729172706604,grad_norm: 0.8208595874898155, iteration: 325079
loss: 1.1240240335464478,grad_norm: 0.9999996251771919, iteration: 325080
loss: 1.0237337350845337,grad_norm: 0.7382376284362093, iteration: 325081
loss: 1.019985318183899,grad_norm: 0.788157048720657, iteration: 325082
loss: 0.9810307621955872,grad_norm: 0.8912464220341003, iteration: 325083
loss: 1.004244327545166,grad_norm: 0.8112200685134068, iteration: 325084
loss: 1.0019887685775757,grad_norm: 0.8657650172234144, iteration: 325085
loss: 0.9906250238418579,grad_norm: 0.8239144231552851, iteration: 325086
loss: 1.0192769765853882,grad_norm: 0.9999989895789747, iteration: 325087
loss: 1.0244402885437012,grad_norm: 0.927746709102015, iteration: 325088
loss: 0.9933837056159973,grad_norm: 0.8406587431401302, iteration: 325089
loss: 0.9748981595039368,grad_norm: 0.7755943440447298, iteration: 325090
loss: 0.9829526543617249,grad_norm: 0.8396464687842329, iteration: 325091
loss: 1.048372507095337,grad_norm: 0.947150629637944, iteration: 325092
loss: 0.9974817633628845,grad_norm: 0.8479825931715713, iteration: 325093
loss: 1.0081603527069092,grad_norm: 0.9810258981573585, iteration: 325094
loss: 1.0537275075912476,grad_norm: 0.999999313481276, iteration: 325095
loss: 1.0363281965255737,grad_norm: 0.9999995687822202, iteration: 325096
loss: 1.0310590267181396,grad_norm: 0.9999992824181432, iteration: 325097
loss: 1.0055899620056152,grad_norm: 0.8358714592181268, iteration: 325098
loss: 0.9821625351905823,grad_norm: 0.8174940664194462, iteration: 325099
loss: 1.0007625818252563,grad_norm: 0.97776481656328, iteration: 325100
loss: 1.0017436742782593,grad_norm: 0.8877098970156814, iteration: 325101
loss: 1.022598147392273,grad_norm: 0.9654916997120659, iteration: 325102
loss: 0.999821126461029,grad_norm: 0.7164689745491603, iteration: 325103
loss: 0.9721104502677917,grad_norm: 0.9999992559577733, iteration: 325104
loss: 0.9849306344985962,grad_norm: 0.8667603392355537, iteration: 325105
loss: 1.0137988328933716,grad_norm: 0.9999991067916177, iteration: 325106
loss: 1.0077928304672241,grad_norm: 0.8460364633803897, iteration: 325107
loss: 0.9969568848609924,grad_norm: 0.8969568673586557, iteration: 325108
loss: 0.9592047929763794,grad_norm: 0.8913639769554218, iteration: 325109
loss: 0.9706806540489197,grad_norm: 0.9999991373109757, iteration: 325110
loss: 0.9921459555625916,grad_norm: 0.9481923719660259, iteration: 325111
loss: 1.0078067779541016,grad_norm: 0.8159639121678195, iteration: 325112
loss: 1.0001928806304932,grad_norm: 0.830196812232252, iteration: 325113
loss: 1.0201536417007446,grad_norm: 0.9476108647033794, iteration: 325114
loss: 1.0210078954696655,grad_norm: 0.7916951986771493, iteration: 325115
loss: 1.0195049047470093,grad_norm: 0.8626675020499092, iteration: 325116
loss: 1.01810622215271,grad_norm: 0.8727540701448218, iteration: 325117
loss: 0.995846688747406,grad_norm: 0.8881157368116652, iteration: 325118
loss: 1.0179177522659302,grad_norm: 0.6994523130079742, iteration: 325119
loss: 0.9904808402061462,grad_norm: 0.7583546540356741, iteration: 325120
loss: 0.9920811057090759,grad_norm: 0.7735894118728706, iteration: 325121
loss: 0.9842585921287537,grad_norm: 0.9999998146865956, iteration: 325122
loss: 0.9910150766372681,grad_norm: 0.8026683672133025, iteration: 325123
loss: 1.0123473405838013,grad_norm: 0.9999992780847066, iteration: 325124
loss: 1.0001956224441528,grad_norm: 0.7857861824531921, iteration: 325125
loss: 0.9849604964256287,grad_norm: 0.720724096112381, iteration: 325126
loss: 1.0960053205490112,grad_norm: 0.9999997561668215, iteration: 325127
loss: 1.0050318241119385,grad_norm: 0.8592894537277589, iteration: 325128
loss: 1.0110515356063843,grad_norm: 0.9999991919953892, iteration: 325129
loss: 1.0417195558547974,grad_norm: 0.9999995273991573, iteration: 325130
loss: 0.9846909046173096,grad_norm: 0.8064155789641463, iteration: 325131
loss: 0.9877069592475891,grad_norm: 0.9355288894843293, iteration: 325132
loss: 0.9893175959587097,grad_norm: 0.8923221690576908, iteration: 325133
loss: 1.009657859802246,grad_norm: 0.9999994835967556, iteration: 325134
loss: 1.0549530982971191,grad_norm: 0.9999994142038933, iteration: 325135
loss: 0.9774916172027588,grad_norm: 0.6914721915819222, iteration: 325136
loss: 1.0137717723846436,grad_norm: 0.8467362502875279, iteration: 325137
loss: 0.9659010767936707,grad_norm: 0.8633717828127478, iteration: 325138
loss: 1.026999831199646,grad_norm: 0.6245656706964233, iteration: 325139
loss: 0.9937867522239685,grad_norm: 0.7728376571276191, iteration: 325140
loss: 0.9852030873298645,grad_norm: 0.8670063210672783, iteration: 325141
loss: 0.9814333319664001,grad_norm: 0.8354839328407541, iteration: 325142
loss: 1.0014551877975464,grad_norm: 0.7700577691365972, iteration: 325143
loss: 1.0221811532974243,grad_norm: 0.8278787970821617, iteration: 325144
loss: 0.9783201813697815,grad_norm: 0.8824881623784904, iteration: 325145
loss: 0.9916753768920898,grad_norm: 0.7346952596196005, iteration: 325146
loss: 0.9950494766235352,grad_norm: 0.8934278627804194, iteration: 325147
loss: 0.9536989331245422,grad_norm: 0.8318000413186827, iteration: 325148
loss: 1.0111937522888184,grad_norm: 0.9999993647445883, iteration: 325149
loss: 1.0148788690567017,grad_norm: 0.8880618705785397, iteration: 325150
loss: 1.0017411708831787,grad_norm: 0.7605620991152042, iteration: 325151
loss: 1.0012015104293823,grad_norm: 0.8556720322897184, iteration: 325152
loss: 0.9786520004272461,grad_norm: 0.81542365164623, iteration: 325153
loss: 1.013893485069275,grad_norm: 0.799040766449433, iteration: 325154
loss: 0.9762906432151794,grad_norm: 0.6708398938186213, iteration: 325155
loss: 1.0001285076141357,grad_norm: 0.7939900017469295, iteration: 325156
loss: 1.004853367805481,grad_norm: 0.753073469716276, iteration: 325157
loss: 0.9636750817298889,grad_norm: 0.8486970814437326, iteration: 325158
loss: 0.973215639591217,grad_norm: 0.80195957825564, iteration: 325159
loss: 1.0107536315917969,grad_norm: 0.9636612825809882, iteration: 325160
loss: 0.992717981338501,grad_norm: 0.8715662491188295, iteration: 325161
loss: 0.9971523284912109,grad_norm: 0.9593853560529663, iteration: 325162
loss: 1.026648759841919,grad_norm: 0.8597048041258624, iteration: 325163
loss: 1.0160845518112183,grad_norm: 0.999999242022822, iteration: 325164
loss: 1.0061017274856567,grad_norm: 0.8401717184913945, iteration: 325165
loss: 0.9829182624816895,grad_norm: 0.8728148570175356, iteration: 325166
loss: 0.990745484828949,grad_norm: 0.9999992029817595, iteration: 325167
loss: 0.9954805374145508,grad_norm: 0.774698633181709, iteration: 325168
loss: 0.9666042923927307,grad_norm: 0.7344112088975576, iteration: 325169
loss: 1.020467758178711,grad_norm: 0.8489675227446445, iteration: 325170
loss: 0.9955003261566162,grad_norm: 0.9137131223192138, iteration: 325171
loss: 1.00199556350708,grad_norm: 0.8406255120530163, iteration: 325172
loss: 0.9925463795661926,grad_norm: 0.9999990258032063, iteration: 325173
loss: 1.0597282648086548,grad_norm: 0.9999990596696171, iteration: 325174
loss: 0.9652448892593384,grad_norm: 0.8158748485729391, iteration: 325175
loss: 1.175698161125183,grad_norm: 1.0000000285216668, iteration: 325176
loss: 0.9804069995880127,grad_norm: 0.8123845176828586, iteration: 325177
loss: 1.0143828392028809,grad_norm: 0.8685657399110724, iteration: 325178
loss: 0.9798097610473633,grad_norm: 0.899328307940615, iteration: 325179
loss: 0.9984486699104309,grad_norm: 0.7823182023300612, iteration: 325180
loss: 1.0801085233688354,grad_norm: 0.9999996808012626, iteration: 325181
loss: 0.9953563809394836,grad_norm: 0.9999991671080782, iteration: 325182
loss: 0.9746556878089905,grad_norm: 0.7831828431864909, iteration: 325183
loss: 1.0101888179779053,grad_norm: 0.80732657626655, iteration: 325184
loss: 0.9963726997375488,grad_norm: 0.8203259810437629, iteration: 325185
loss: 0.9991490840911865,grad_norm: 0.9758272962414309, iteration: 325186
loss: 1.0162988901138306,grad_norm: 0.9761478794803833, iteration: 325187
loss: 0.9954610466957092,grad_norm: 0.7943065849759119, iteration: 325188
loss: 0.9966295957565308,grad_norm: 0.7649290319572709, iteration: 325189
loss: 1.019652009010315,grad_norm: 0.7627151901621552, iteration: 325190
loss: 0.9706243276596069,grad_norm: 0.8641404151905485, iteration: 325191
loss: 0.9819878339767456,grad_norm: 0.9374296703963407, iteration: 325192
loss: 0.9608494639396667,grad_norm: 0.7225786577650277, iteration: 325193
loss: 0.9872626662254333,grad_norm: 0.9185751759084961, iteration: 325194
loss: 0.9914698600769043,grad_norm: 0.9999991788545413, iteration: 325195
loss: 0.9635215997695923,grad_norm: 0.806253232200511, iteration: 325196
loss: 1.0121077299118042,grad_norm: 0.687208981979995, iteration: 325197
loss: 0.992627739906311,grad_norm: 0.7678116629015624, iteration: 325198
loss: 0.9960260391235352,grad_norm: 0.9999991731358014, iteration: 325199
loss: 0.9993903040885925,grad_norm: 0.9061154163824289, iteration: 325200
loss: 0.9830981492996216,grad_norm: 0.8029813343703949, iteration: 325201
loss: 0.9842245578765869,grad_norm: 0.99999940418996, iteration: 325202
loss: 0.9704664349555969,grad_norm: 0.9703771601392678, iteration: 325203
loss: 0.9989515542984009,grad_norm: 0.8118423889132287, iteration: 325204
loss: 0.9939323663711548,grad_norm: 0.9999991280587235, iteration: 325205
loss: 1.00121009349823,grad_norm: 0.8620776858684932, iteration: 325206
loss: 0.983518123626709,grad_norm: 0.7076603142088763, iteration: 325207
loss: 1.0070518255233765,grad_norm: 0.8207104186631737, iteration: 325208
loss: 0.9938786625862122,grad_norm: 0.7872467657789984, iteration: 325209
loss: 1.0331816673278809,grad_norm: 0.936938710839907, iteration: 325210
loss: 1.0459275245666504,grad_norm: 0.9999999324239328, iteration: 325211
loss: 0.9836114645004272,grad_norm: 0.7126672500487118, iteration: 325212
loss: 1.0507558584213257,grad_norm: 0.7808685917457502, iteration: 325213
loss: 0.9699760675430298,grad_norm: 0.7779446995641737, iteration: 325214
loss: 0.9850808382034302,grad_norm: 0.7576439710849912, iteration: 325215
loss: 1.0426487922668457,grad_norm: 0.764567605609897, iteration: 325216
loss: 0.98393315076828,grad_norm: 0.7998653487893387, iteration: 325217
loss: 1.0053529739379883,grad_norm: 0.9180836863294307, iteration: 325218
loss: 0.9969115257263184,grad_norm: 0.9999989458761368, iteration: 325219
loss: 0.9596893191337585,grad_norm: 0.9898248943526625, iteration: 325220
loss: 0.9786249995231628,grad_norm: 0.8935285476638969, iteration: 325221
loss: 1.0091201066970825,grad_norm: 0.7522560225757341, iteration: 325222
loss: 1.0185883045196533,grad_norm: 0.7880661085318873, iteration: 325223
loss: 0.9836036562919617,grad_norm: 0.826205664971234, iteration: 325224
loss: 0.9795546531677246,grad_norm: 0.8603936465753319, iteration: 325225
loss: 1.0256537199020386,grad_norm: 0.8932307146540506, iteration: 325226
loss: 0.9986125230789185,grad_norm: 0.9535523745412033, iteration: 325227
loss: 1.0304360389709473,grad_norm: 0.8262716931425395, iteration: 325228
loss: 1.002479910850525,grad_norm: 0.8628629841048062, iteration: 325229
loss: 1.0251225233078003,grad_norm: 0.9999990729308663, iteration: 325230
loss: 1.026018500328064,grad_norm: 0.8868933230189155, iteration: 325231
loss: 0.9960634112358093,grad_norm: 0.7920835327846077, iteration: 325232
loss: 1.0471299886703491,grad_norm: 0.9999996681978409, iteration: 325233
loss: 0.9303731918334961,grad_norm: 0.7639470853092672, iteration: 325234
loss: 0.9886685013771057,grad_norm: 0.9202546298033061, iteration: 325235
loss: 0.9972876906394958,grad_norm: 0.8043909821010083, iteration: 325236
loss: 1.0368949174880981,grad_norm: 0.8207984590372976, iteration: 325237
loss: 0.9890517592430115,grad_norm: 0.9999997468421831, iteration: 325238
loss: 1.0130168199539185,grad_norm: 0.9999994759756959, iteration: 325239
loss: 1.0457210540771484,grad_norm: 0.9999998647611591, iteration: 325240
loss: 0.9625237584114075,grad_norm: 0.8695457950573028, iteration: 325241
loss: 0.9742712378501892,grad_norm: 0.9999990157418385, iteration: 325242
loss: 0.9926708936691284,grad_norm: 0.9999994913285052, iteration: 325243
loss: 0.9968043565750122,grad_norm: 0.9265066316922325, iteration: 325244
loss: 0.991250216960907,grad_norm: 0.8720798622320073, iteration: 325245
loss: 0.9952948689460754,grad_norm: 0.8234669324156756, iteration: 325246
loss: 1.0454825162887573,grad_norm: 0.9999992746521459, iteration: 325247
loss: 1.0140700340270996,grad_norm: 0.999999014053455, iteration: 325248
loss: 0.9988526105880737,grad_norm: 0.7682018754827458, iteration: 325249
loss: 1.0178972482681274,grad_norm: 0.9383164094497266, iteration: 325250
loss: 1.0021088123321533,grad_norm: 0.846349441751239, iteration: 325251
loss: 0.9920437932014465,grad_norm: 0.8618693265186982, iteration: 325252
loss: 0.9680025577545166,grad_norm: 0.6908498977327866, iteration: 325253
loss: 0.9699528217315674,grad_norm: 0.83653046249215, iteration: 325254
loss: 0.9929684996604919,grad_norm: 0.7993347976645259, iteration: 325255
loss: 0.9966174960136414,grad_norm: 0.7083930609235246, iteration: 325256
loss: 0.9947078227996826,grad_norm: 0.8307876291540135, iteration: 325257
loss: 1.0233365297317505,grad_norm: 0.8902907975252985, iteration: 325258
loss: 1.0357862710952759,grad_norm: 0.9999994472786556, iteration: 325259
loss: 1.0200402736663818,grad_norm: 0.9408022894524397, iteration: 325260
loss: 0.9954497218132019,grad_norm: 0.7895316895906737, iteration: 325261
loss: 1.024406909942627,grad_norm: 0.9335606005867348, iteration: 325262
loss: 1.0089869499206543,grad_norm: 0.7715619879225026, iteration: 325263
loss: 1.015378713607788,grad_norm: 0.8146972434547907, iteration: 325264
loss: 1.0096025466918945,grad_norm: 0.9157127271142647, iteration: 325265
loss: 1.009755253791809,grad_norm: 0.9999998070562655, iteration: 325266
loss: 1.020841360092163,grad_norm: 0.9314735205635298, iteration: 325267
loss: 0.9796871542930603,grad_norm: 0.9999995688806279, iteration: 325268
loss: 1.0017884969711304,grad_norm: 0.9019643790037222, iteration: 325269
loss: 1.0134527683258057,grad_norm: 0.9391897024821833, iteration: 325270
loss: 0.9543834924697876,grad_norm: 0.7909297000331286, iteration: 325271
loss: 0.9763237237930298,grad_norm: 0.9999990523599943, iteration: 325272
loss: 0.979217529296875,grad_norm: 0.9276038730625598, iteration: 325273
loss: 1.0120853185653687,grad_norm: 0.8609077143537355, iteration: 325274
loss: 0.9635365009307861,grad_norm: 0.7612874336322956, iteration: 325275
loss: 1.0377004146575928,grad_norm: 0.7960369960298406, iteration: 325276
loss: 0.9949797987937927,grad_norm: 0.8188788772320955, iteration: 325277
loss: 1.004799485206604,grad_norm: 0.9999995817168207, iteration: 325278
loss: 1.0021733045578003,grad_norm: 0.8227236601792465, iteration: 325279
loss: 0.9985296130180359,grad_norm: 0.9752664580526675, iteration: 325280
loss: 1.0519939661026,grad_norm: 0.999999524837898, iteration: 325281
loss: 0.9721629619598389,grad_norm: 0.9999991267385618, iteration: 325282
loss: 1.0183978080749512,grad_norm: 0.8948353650539667, iteration: 325283
loss: 0.9936862587928772,grad_norm: 0.9518406237030007, iteration: 325284
loss: 0.9995641708374023,grad_norm: 0.8170670239310279, iteration: 325285
loss: 1.0206118822097778,grad_norm: 0.8790768428573944, iteration: 325286
loss: 1.0126926898956299,grad_norm: 0.7741787975462306, iteration: 325287
loss: 1.038289189338684,grad_norm: 0.9999996361820257, iteration: 325288
loss: 1.028499960899353,grad_norm: 0.8301691148878921, iteration: 325289
loss: 0.9789186716079712,grad_norm: 0.8699179379161442, iteration: 325290
loss: 1.0004088878631592,grad_norm: 0.7801265190210807, iteration: 325291
loss: 0.9813302159309387,grad_norm: 0.880704060297838, iteration: 325292
loss: 0.9856050610542297,grad_norm: 0.76673645791237, iteration: 325293
loss: 0.9592465162277222,grad_norm: 0.7651221165334134, iteration: 325294
loss: 1.042852759361267,grad_norm: 0.881430955792071, iteration: 325295
loss: 1.0005797147750854,grad_norm: 0.8517469942872231, iteration: 325296
loss: 0.9838851094245911,grad_norm: 0.8410429339909866, iteration: 325297
loss: 1.0025883913040161,grad_norm: 0.8105240912364085, iteration: 325298
loss: 0.9766111373901367,grad_norm: 0.8093697118546553, iteration: 325299
loss: 1.0124908685684204,grad_norm: 0.8858731705442984, iteration: 325300
loss: 1.0230885744094849,grad_norm: 0.8771101911689624, iteration: 325301
loss: 0.9896918535232544,grad_norm: 0.867674298921504, iteration: 325302
loss: 0.983054518699646,grad_norm: 0.8455812706132979, iteration: 325303
loss: 1.0144685506820679,grad_norm: 0.9999997828430602, iteration: 325304
loss: 0.9800930023193359,grad_norm: 0.9829012388268085, iteration: 325305
loss: 0.9832364916801453,grad_norm: 0.8582491691887189, iteration: 325306
loss: 1.0001311302185059,grad_norm: 0.8501124190189853, iteration: 325307
loss: 0.9752324223518372,grad_norm: 0.7538905212118282, iteration: 325308
loss: 0.9985820651054382,grad_norm: 0.681756636864143, iteration: 325309
loss: 1.0149351358413696,grad_norm: 0.9012855418738296, iteration: 325310
loss: 0.9975781440734863,grad_norm: 0.8984437547347521, iteration: 325311
loss: 1.071204423904419,grad_norm: 0.8113550301080888, iteration: 325312
loss: 1.0027014017105103,grad_norm: 0.807246453019465, iteration: 325313
loss: 1.027353048324585,grad_norm: 0.9999992077925055, iteration: 325314
loss: 1.0132331848144531,grad_norm: 0.9999993609746028, iteration: 325315
loss: 1.000542402267456,grad_norm: 0.7833360067900913, iteration: 325316
loss: 1.0188993215560913,grad_norm: 0.6846309387137834, iteration: 325317
loss: 0.9978870749473572,grad_norm: 0.8403131278975992, iteration: 325318
loss: 0.9462469220161438,grad_norm: 0.746202282270441, iteration: 325319
loss: 0.9370864629745483,grad_norm: 0.8297383808188944, iteration: 325320
loss: 1.0137710571289062,grad_norm: 0.8311448634181903, iteration: 325321
loss: 0.997930109500885,grad_norm: 0.9999992221061846, iteration: 325322
loss: 0.9768356680870056,grad_norm: 0.9006581390227306, iteration: 325323
loss: 1.0193318128585815,grad_norm: 0.7030755433449803, iteration: 325324
loss: 1.0318212509155273,grad_norm: 0.813763412065425, iteration: 325325
loss: 1.0211889743804932,grad_norm: 0.8390564564810072, iteration: 325326
loss: 1.0004252195358276,grad_norm: 0.9999991493638656, iteration: 325327
loss: 0.9854239821434021,grad_norm: 0.8549785182314901, iteration: 325328
loss: 0.9979613423347473,grad_norm: 0.8811380409222319, iteration: 325329
loss: 1.016911268234253,grad_norm: 0.9451277830959528, iteration: 325330
loss: 1.0447930097579956,grad_norm: 0.8893257112198573, iteration: 325331
loss: 0.9850939512252808,grad_norm: 0.8313333736917313, iteration: 325332
loss: 1.001423954963684,grad_norm: 0.8107605584758907, iteration: 325333
loss: 1.0308529138565063,grad_norm: 0.9566771762180117, iteration: 325334
loss: 0.996010422706604,grad_norm: 0.9276718518718057, iteration: 325335
loss: 0.9934077858924866,grad_norm: 0.7677005776161644, iteration: 325336
loss: 1.0044437646865845,grad_norm: 0.7828623771898432, iteration: 325337
loss: 1.0023534297943115,grad_norm: 0.9268305105424961, iteration: 325338
loss: 1.0279827117919922,grad_norm: 0.9121575245672688, iteration: 325339
loss: 0.9747185111045837,grad_norm: 0.8577690734006166, iteration: 325340
loss: 0.9808939695358276,grad_norm: 0.8792192178320122, iteration: 325341
loss: 1.0170897245407104,grad_norm: 0.9173941027167322, iteration: 325342
loss: 0.9940696954727173,grad_norm: 0.7744348369206068, iteration: 325343
loss: 0.9723665118217468,grad_norm: 0.8969311598178596, iteration: 325344
loss: 1.0178701877593994,grad_norm: 0.8742215242310059, iteration: 325345
loss: 1.011290192604065,grad_norm: 0.8062787839293833, iteration: 325346
loss: 1.0152782201766968,grad_norm: 0.8174562731905858, iteration: 325347
loss: 1.0742781162261963,grad_norm: 0.9999994340846124, iteration: 325348
loss: 1.0522342920303345,grad_norm: 0.9367284154838308, iteration: 325349
loss: 1.0093837976455688,grad_norm: 0.9999991164665603, iteration: 325350
loss: 1.0165783166885376,grad_norm: 0.9999990090910281, iteration: 325351
loss: 0.9969815611839294,grad_norm: 0.725038868658789, iteration: 325352
loss: 0.9993336200714111,grad_norm: 0.9999991213342326, iteration: 325353
loss: 1.0219568014144897,grad_norm: 0.6958779705890984, iteration: 325354
loss: 0.9916837811470032,grad_norm: 0.8610666557503143, iteration: 325355
loss: 0.9924683570861816,grad_norm: 0.8752428620104927, iteration: 325356
loss: 1.0001006126403809,grad_norm: 0.9324751127486688, iteration: 325357
loss: 1.0309538841247559,grad_norm: 0.8401408230821664, iteration: 325358
loss: 1.0113201141357422,grad_norm: 0.9565550432634701, iteration: 325359
loss: 1.0079774856567383,grad_norm: 0.9999992479664016, iteration: 325360
loss: 1.0136070251464844,grad_norm: 0.707351512224262, iteration: 325361
loss: 1.0341222286224365,grad_norm: 0.9999997515573464, iteration: 325362
loss: 1.0064901113510132,grad_norm: 0.6979759950335828, iteration: 325363
loss: 1.0198997259140015,grad_norm: 0.903918589909773, iteration: 325364
loss: 1.021175742149353,grad_norm: 0.9468053082397692, iteration: 325365
loss: 1.0099736452102661,grad_norm: 0.8014355136354279, iteration: 325366
loss: 0.9744809865951538,grad_norm: 0.7147093825971571, iteration: 325367
loss: 0.9780148863792419,grad_norm: 0.6902717662173017, iteration: 325368
loss: 0.9964386820793152,grad_norm: 0.7780052786938078, iteration: 325369
loss: 1.0383566617965698,grad_norm: 0.8202728300989344, iteration: 325370
loss: 0.9672713279724121,grad_norm: 0.8321746465970455, iteration: 325371
loss: 1.0048564672470093,grad_norm: 0.9751990326190003, iteration: 325372
loss: 1.0161765813827515,grad_norm: 0.8661315238587782, iteration: 325373
loss: 0.9905733466148376,grad_norm: 0.9751976613031254, iteration: 325374
loss: 1.0202330350875854,grad_norm: 0.7754519693668416, iteration: 325375
loss: 0.9784506559371948,grad_norm: 0.9999998968329937, iteration: 325376
loss: 1.0437273979187012,grad_norm: 0.8583073858632005, iteration: 325377
loss: 1.0039150714874268,grad_norm: 0.9999993255820767, iteration: 325378
loss: 0.9614458680152893,grad_norm: 0.9999989739288945, iteration: 325379
loss: 1.005348801612854,grad_norm: 0.8552920823702448, iteration: 325380
loss: 0.9966945052146912,grad_norm: 0.9105394314169092, iteration: 325381
loss: 0.9785555601119995,grad_norm: 0.9999992637756515, iteration: 325382
loss: 0.9846432209014893,grad_norm: 0.688418353912464, iteration: 325383
loss: 0.9805652499198914,grad_norm: 0.9999990867768678, iteration: 325384
loss: 1.0195586681365967,grad_norm: 0.971696228038804, iteration: 325385
loss: 1.0035922527313232,grad_norm: 0.8543957820191629, iteration: 325386
loss: 0.9936556220054626,grad_norm: 0.7991652665984746, iteration: 325387
loss: 0.9744094610214233,grad_norm: 0.9999993261681083, iteration: 325388
loss: 0.9942973256111145,grad_norm: 0.8071171829383437, iteration: 325389
loss: 1.0053534507751465,grad_norm: 0.8670174830529893, iteration: 325390
loss: 0.9972751140594482,grad_norm: 0.7517844928287435, iteration: 325391
loss: 1.0602850914001465,grad_norm: 0.9999994190415427, iteration: 325392
loss: 0.9673131108283997,grad_norm: 0.898366106898794, iteration: 325393
loss: 0.994589626789093,grad_norm: 0.7198680824217372, iteration: 325394
loss: 1.0173014402389526,grad_norm: 0.7367727571986553, iteration: 325395
loss: 0.962814450263977,grad_norm: 0.9999990239040439, iteration: 325396
loss: 1.011759638786316,grad_norm: 0.9335762454315878, iteration: 325397
loss: 0.9798760414123535,grad_norm: 0.8996730926881357, iteration: 325398
loss: 1.0002410411834717,grad_norm: 0.9197287295059907, iteration: 325399
loss: 1.012627363204956,grad_norm: 0.8315381418063762, iteration: 325400
loss: 1.0092713832855225,grad_norm: 0.9999992480829488, iteration: 325401
loss: 0.9755172729492188,grad_norm: 0.9768798089648444, iteration: 325402
loss: 0.9844932556152344,grad_norm: 0.7326407151672492, iteration: 325403
loss: 1.021751046180725,grad_norm: 0.7448492666399693, iteration: 325404
loss: 0.980815052986145,grad_norm: 0.8099427001249315, iteration: 325405
loss: 1.0172741413116455,grad_norm: 0.9999991617092995, iteration: 325406
loss: 0.9967790842056274,grad_norm: 0.758834874896897, iteration: 325407
loss: 1.0253714323043823,grad_norm: 0.7521750591899673, iteration: 325408
loss: 0.9833375811576843,grad_norm: 0.8966745132212962, iteration: 325409
loss: 1.0088647603988647,grad_norm: 0.9385774538037959, iteration: 325410
loss: 1.0236058235168457,grad_norm: 0.9999998314009185, iteration: 325411
loss: 0.9916220307350159,grad_norm: 0.7142489185519272, iteration: 325412
loss: 1.1389265060424805,grad_norm: 0.9999998712499562, iteration: 325413
loss: 0.9975738525390625,grad_norm: 0.9258065562417699, iteration: 325414
loss: 1.0208871364593506,grad_norm: 0.999999109790674, iteration: 325415
loss: 1.0174213647842407,grad_norm: 0.8842166241526325, iteration: 325416
loss: 0.9977824687957764,grad_norm: 0.9358413135251017, iteration: 325417
loss: 0.9720894694328308,grad_norm: 0.7441186853334628, iteration: 325418
loss: 0.9772419929504395,grad_norm: 0.7195550717514272, iteration: 325419
loss: 0.9908254146575928,grad_norm: 0.9229004880934117, iteration: 325420
loss: 1.01957106590271,grad_norm: 0.9452839587567451, iteration: 325421
loss: 1.0064154863357544,grad_norm: 0.9999989760640526, iteration: 325422
loss: 1.0052469968795776,grad_norm: 0.8423458525759553, iteration: 325423
loss: 0.9660735726356506,grad_norm: 0.774049363531247, iteration: 325424
loss: 0.9776327013969421,grad_norm: 0.8871030652819548, iteration: 325425
loss: 0.9881296157836914,grad_norm: 0.8291629346998937, iteration: 325426
loss: 1.0335876941680908,grad_norm: 0.9999990457911798, iteration: 325427
loss: 1.0280569791793823,grad_norm: 0.7925420791751829, iteration: 325428
loss: 0.9941572546958923,grad_norm: 0.7589335923283488, iteration: 325429
loss: 1.0377087593078613,grad_norm: 0.7693952937485132, iteration: 325430
loss: 1.0215920209884644,grad_norm: 0.6783590334010122, iteration: 325431
loss: 0.9924474954605103,grad_norm: 0.9999991042991777, iteration: 325432
loss: 1.080895185470581,grad_norm: 0.9616387129822976, iteration: 325433
loss: 0.9817824959754944,grad_norm: 0.7890701447269213, iteration: 325434
loss: 0.9991414546966553,grad_norm: 0.9116791476902505, iteration: 325435
loss: 0.9784745573997498,grad_norm: 0.7764135503452554, iteration: 325436
loss: 1.0013961791992188,grad_norm: 0.9595841115838956, iteration: 325437
loss: 1.0053746700286865,grad_norm: 0.8622473215283083, iteration: 325438
loss: 1.026474952697754,grad_norm: 1.0000000730835465, iteration: 325439
loss: 1.0484130382537842,grad_norm: 0.9999996738435586, iteration: 325440
loss: 0.9872649312019348,grad_norm: 0.9999992479988409, iteration: 325441
loss: 1.0176398754119873,grad_norm: 0.9699152048432222, iteration: 325442
loss: 1.031049370765686,grad_norm: 0.9311832917889983, iteration: 325443
loss: 1.0008946657180786,grad_norm: 0.8775389737808469, iteration: 325444
loss: 1.0068330764770508,grad_norm: 0.7897131364847494, iteration: 325445
loss: 1.0230860710144043,grad_norm: 0.7832756868759465, iteration: 325446
loss: 1.0374834537506104,grad_norm: 0.8849197803749492, iteration: 325447
loss: 0.9635403752326965,grad_norm: 0.8960551091101018, iteration: 325448
loss: 1.0374975204467773,grad_norm: 0.7198542516706252, iteration: 325449
loss: 1.0018084049224854,grad_norm: 0.8644995949911122, iteration: 325450
loss: 1.0194710493087769,grad_norm: 0.8151954395816405, iteration: 325451
loss: 1.064028024673462,grad_norm: 0.9999992026731325, iteration: 325452
loss: 1.0109262466430664,grad_norm: 0.9999998899789737, iteration: 325453
loss: 0.9699562191963196,grad_norm: 0.8704167382863014, iteration: 325454
loss: 1.003774881362915,grad_norm: 0.9374761685414861, iteration: 325455
loss: 0.9914149045944214,grad_norm: 0.8709444284592908, iteration: 325456
loss: 1.0836342573165894,grad_norm: 0.9999994772617776, iteration: 325457
loss: 0.9978846311569214,grad_norm: 0.9999991611078988, iteration: 325458
loss: 0.9972549676895142,grad_norm: 0.999999260587506, iteration: 325459
loss: 1.0021177530288696,grad_norm: 0.9999990738152293, iteration: 325460
loss: 1.030592679977417,grad_norm: 0.8114469757482883, iteration: 325461
loss: 0.9898788928985596,grad_norm: 0.7083714876785943, iteration: 325462
loss: 1.0357586145401,grad_norm: 0.930208367673004, iteration: 325463
loss: 1.0220309495925903,grad_norm: 0.7787090387793886, iteration: 325464
loss: 1.073198676109314,grad_norm: 0.9213373184586101, iteration: 325465
loss: 1.0658631324768066,grad_norm: 0.9999993454797103, iteration: 325466
loss: 1.0108810663223267,grad_norm: 0.8735444055390398, iteration: 325467
loss: 1.015566110610962,grad_norm: 0.8717219928253662, iteration: 325468
loss: 1.060775876045227,grad_norm: 0.999999613325045, iteration: 325469
loss: 1.0065799951553345,grad_norm: 0.8594959209920244, iteration: 325470
loss: 0.998602032661438,grad_norm: 0.8305286105160502, iteration: 325471
loss: 1.0039024353027344,grad_norm: 0.9999991719452462, iteration: 325472
loss: 0.9839164018630981,grad_norm: 0.9999996141428458, iteration: 325473
loss: 1.0872793197631836,grad_norm: 0.819799853419925, iteration: 325474
loss: 1.0912961959838867,grad_norm: 0.9999990491481707, iteration: 325475
loss: 0.9913170337677002,grad_norm: 0.9175324418224018, iteration: 325476
loss: 1.006840705871582,grad_norm: 0.788473385735348, iteration: 325477
loss: 1.0218069553375244,grad_norm: 0.8519500652840317, iteration: 325478
loss: 0.9895837903022766,grad_norm: 0.8229991553454974, iteration: 325479
loss: 0.9991139769554138,grad_norm: 0.8871016360345111, iteration: 325480
loss: 0.9764406085014343,grad_norm: 0.926137629929615, iteration: 325481
loss: 0.9764772057533264,grad_norm: 0.8123986088103949, iteration: 325482
loss: 1.0283819437026978,grad_norm: 0.7003354971279752, iteration: 325483
loss: 0.975207507610321,grad_norm: 0.999999194723101, iteration: 325484
loss: 1.0049331188201904,grad_norm: 0.9999990663245951, iteration: 325485
loss: 0.9897611737251282,grad_norm: 0.9999990572024766, iteration: 325486
loss: 0.9792601466178894,grad_norm: 0.757571085343857, iteration: 325487
loss: 1.0070827007293701,grad_norm: 0.887006853193896, iteration: 325488
loss: 0.9666667580604553,grad_norm: 0.8806885376171378, iteration: 325489
loss: 1.0126445293426514,grad_norm: 0.6993338091517599, iteration: 325490
loss: 0.9913042187690735,grad_norm: 0.8991804269380784, iteration: 325491
loss: 0.9648270010948181,grad_norm: 0.7483967483752825, iteration: 325492
loss: 0.9755982756614685,grad_norm: 0.946466339711816, iteration: 325493
loss: 1.0270954370498657,grad_norm: 0.8720152355168047, iteration: 325494
loss: 0.9948770403862,grad_norm: 0.801235832567091, iteration: 325495
loss: 0.9922306537628174,grad_norm: 0.9999994915387561, iteration: 325496
loss: 1.0394134521484375,grad_norm: 0.8930995962147508, iteration: 325497
loss: 1.0057988166809082,grad_norm: 0.7024284212653145, iteration: 325498
loss: 0.9844516515731812,grad_norm: 0.9541110895774031, iteration: 325499
loss: 0.9648513793945312,grad_norm: 0.8506371049827947, iteration: 325500
loss: 0.9838749170303345,grad_norm: 0.817610674129534, iteration: 325501
loss: 1.0038048028945923,grad_norm: 0.7089661146751564, iteration: 325502
loss: 0.9794600009918213,grad_norm: 0.9999990570518722, iteration: 325503
loss: 1.0048490762710571,grad_norm: 0.9999997040010675, iteration: 325504
loss: 1.0208625793457031,grad_norm: 0.7783527407384554, iteration: 325505
loss: 0.9936686158180237,grad_norm: 0.9999990752605245, iteration: 325506
loss: 1.0203646421432495,grad_norm: 0.7785540909729036, iteration: 325507
loss: 0.9520950317382812,grad_norm: 0.9999991720386789, iteration: 325508
loss: 0.9546602368354797,grad_norm: 0.7316197053676896, iteration: 325509
loss: 0.9782735109329224,grad_norm: 0.7231162122391355, iteration: 325510
loss: 0.9891875982284546,grad_norm: 0.9732986762164222, iteration: 325511
loss: 0.9745945334434509,grad_norm: 0.7990527461848366, iteration: 325512
loss: 0.9642167091369629,grad_norm: 0.8059224620767332, iteration: 325513
loss: 1.0052334070205688,grad_norm: 0.7826215541063255, iteration: 325514
loss: 1.028153419494629,grad_norm: 0.9999998933732822, iteration: 325515
loss: 0.9891608357429504,grad_norm: 0.9238072504773416, iteration: 325516
loss: 1.0086661577224731,grad_norm: 0.9911704405962314, iteration: 325517
loss: 1.0501904487609863,grad_norm: 0.9999994681304222, iteration: 325518
loss: 1.0229049921035767,grad_norm: 0.6842530746531574, iteration: 325519
loss: 1.0530431270599365,grad_norm: 0.9999998950896181, iteration: 325520
loss: 1.0650739669799805,grad_norm: 0.8774847627875071, iteration: 325521
loss: 0.9866438508033752,grad_norm: 0.8450212119637655, iteration: 325522
loss: 1.036452293395996,grad_norm: 0.9313288708924791, iteration: 325523
loss: 1.0116996765136719,grad_norm: 0.9999991169703807, iteration: 325524
loss: 0.9732407927513123,grad_norm: 0.9174541284449763, iteration: 325525
loss: 1.0228908061981201,grad_norm: 0.8657901843157938, iteration: 325526
loss: 0.9914236664772034,grad_norm: 0.8459002744241194, iteration: 325527
loss: 0.9959639310836792,grad_norm: 0.9999990231723024, iteration: 325528
loss: 1.0124515295028687,grad_norm: 0.8947470662774081, iteration: 325529
loss: 1.0058947801589966,grad_norm: 0.9444698243724435, iteration: 325530
loss: 1.0358675718307495,grad_norm: 0.9999990911839056, iteration: 325531
loss: 1.0523704290390015,grad_norm: 0.9999996598326193, iteration: 325532
loss: 0.9753910899162292,grad_norm: 0.905030537332925, iteration: 325533
loss: 0.9955491423606873,grad_norm: 0.780328490878138, iteration: 325534
loss: 0.9797994494438171,grad_norm: 0.996054385879027, iteration: 325535
loss: 1.0035772323608398,grad_norm: 0.8834547507926452, iteration: 325536
loss: 0.9980902671813965,grad_norm: 0.9999992090778335, iteration: 325537
loss: 1.00715970993042,grad_norm: 0.8008775859889135, iteration: 325538
loss: 0.9964135885238647,grad_norm: 0.7750605841821919, iteration: 325539
loss: 1.010379433631897,grad_norm: 0.9999990206041511, iteration: 325540
loss: 1.0377137660980225,grad_norm: 0.9798100500459374, iteration: 325541
loss: 1.0739226341247559,grad_norm: 0.999999838715436, iteration: 325542
loss: 0.9714547395706177,grad_norm: 0.9318666661448688, iteration: 325543
loss: 1.0017820596694946,grad_norm: 0.70724909042695, iteration: 325544
loss: 0.9648067951202393,grad_norm: 0.78335205371214, iteration: 325545
loss: 1.004420280456543,grad_norm: 0.7782710250704422, iteration: 325546
loss: 1.0278184413909912,grad_norm: 0.8264746779890425, iteration: 325547
loss: 1.0083928108215332,grad_norm: 0.8665435443951598, iteration: 325548
loss: 1.0151280164718628,grad_norm: 0.7287224902853429, iteration: 325549
loss: 1.037278175354004,grad_norm: 0.773119924231974, iteration: 325550
loss: 0.9831331968307495,grad_norm: 0.7830833666635755, iteration: 325551
loss: 0.9973691701889038,grad_norm: 0.7341520312354297, iteration: 325552
loss: 1.0194956064224243,grad_norm: 0.8695912030778746, iteration: 325553
loss: 0.9612205028533936,grad_norm: 0.9999990946321969, iteration: 325554
loss: 0.9704062342643738,grad_norm: 0.9215888829572765, iteration: 325555
loss: 0.9995359778404236,grad_norm: 0.9790178528394801, iteration: 325556
loss: 1.0009191036224365,grad_norm: 0.8993553669603229, iteration: 325557
loss: 0.9954550266265869,grad_norm: 0.8308626567788424, iteration: 325558
loss: 0.9955362677574158,grad_norm: 0.7719803737383892, iteration: 325559
loss: 0.9800580739974976,grad_norm: 0.9862594448147886, iteration: 325560
loss: 1.0480961799621582,grad_norm: 0.8470044852563111, iteration: 325561
loss: 1.0180885791778564,grad_norm: 0.7835224054683275, iteration: 325562
loss: 1.001725673675537,grad_norm: 0.9999990635365485, iteration: 325563
loss: 1.0624345541000366,grad_norm: 0.9999991738836241, iteration: 325564
loss: 1.035775065422058,grad_norm: 0.9999993155775309, iteration: 325565
loss: 0.9930868148803711,grad_norm: 0.8156425780857691, iteration: 325566
loss: 1.0187363624572754,grad_norm: 0.9999992440264613, iteration: 325567
loss: 1.0386849641799927,grad_norm: 0.7163445004699782, iteration: 325568
loss: 1.0239155292510986,grad_norm: 0.8776917998158703, iteration: 325569
loss: 1.1775416135787964,grad_norm: 0.9999995638351833, iteration: 325570
loss: 0.9962225556373596,grad_norm: 0.7216904067210198, iteration: 325571
loss: 1.0342012643814087,grad_norm: 0.8628597463728391, iteration: 325572
loss: 1.01129949092865,grad_norm: 0.8511303553934906, iteration: 325573
loss: 1.0076804161071777,grad_norm: 0.943109608397481, iteration: 325574
loss: 0.9937432408332825,grad_norm: 0.8975855683428006, iteration: 325575
loss: 1.1711745262145996,grad_norm: 0.9999993789084434, iteration: 325576
loss: 0.9733814001083374,grad_norm: 0.9999991059193134, iteration: 325577
loss: 1.03458833694458,grad_norm: 0.9361394282856088, iteration: 325578
loss: 0.9646435379981995,grad_norm: 0.7885972981397116, iteration: 325579
loss: 1.010101556777954,grad_norm: 0.9999998257698599, iteration: 325580
loss: 1.0541805028915405,grad_norm: 0.987304160097138, iteration: 325581
loss: 0.9857417345046997,grad_norm: 0.8973017024863856, iteration: 325582
loss: 1.0087323188781738,grad_norm: 0.9999998684302759, iteration: 325583
loss: 0.9860868453979492,grad_norm: 0.7752691207159098, iteration: 325584
loss: 1.0118173360824585,grad_norm: 0.7339101561105208, iteration: 325585
loss: 0.9856722354888916,grad_norm: 0.9531440563359407, iteration: 325586
loss: 1.0132068395614624,grad_norm: 0.8592880709757158, iteration: 325587
loss: 1.0074167251586914,grad_norm: 0.9103311596964178, iteration: 325588
loss: 0.9731560349464417,grad_norm: 0.9999990034870093, iteration: 325589
loss: 1.019509196281433,grad_norm: 0.7508461819521357, iteration: 325590
loss: 0.9877838492393494,grad_norm: 0.8676612191348648, iteration: 325591
loss: 1.0102884769439697,grad_norm: 0.9999992824039321, iteration: 325592
loss: 1.0400229692459106,grad_norm: 0.9999992013062045, iteration: 325593
loss: 1.024740219116211,grad_norm: 0.999999189393914, iteration: 325594
loss: 0.9782516956329346,grad_norm: 0.7985900461218955, iteration: 325595
loss: 1.014188289642334,grad_norm: 0.8790022465381488, iteration: 325596
loss: 0.9897049069404602,grad_norm: 0.8554043356056452, iteration: 325597
loss: 0.9975336790084839,grad_norm: 0.8913013200158945, iteration: 325598
loss: 1.0071758031845093,grad_norm: 0.8813733167420319, iteration: 325599
loss: 0.9659936428070068,grad_norm: 0.8235200846366337, iteration: 325600
loss: 0.9894422292709351,grad_norm: 0.9999991815477082, iteration: 325601
loss: 1.0901057720184326,grad_norm: 0.9999995709230244, iteration: 325602
loss: 1.0199356079101562,grad_norm: 0.8002195505750935, iteration: 325603
loss: 0.9714010953903198,grad_norm: 0.795356578191689, iteration: 325604
loss: 0.9829172492027283,grad_norm: 0.9999990677182284, iteration: 325605
loss: 0.9875813722610474,grad_norm: 0.8258677545421663, iteration: 325606
loss: 0.9706689119338989,grad_norm: 0.8157150319722862, iteration: 325607
loss: 0.9636275172233582,grad_norm: 0.782447945798891, iteration: 325608
loss: 1.0201365947723389,grad_norm: 0.8162268610536603, iteration: 325609
loss: 0.9926714301109314,grad_norm: 0.9999990312848034, iteration: 325610
loss: 1.1299500465393066,grad_norm: 0.8174006393594337, iteration: 325611
loss: 0.9947999119758606,grad_norm: 0.9999996537297638, iteration: 325612
loss: 1.0195116996765137,grad_norm: 0.9607517336240979, iteration: 325613
loss: 1.038997769355774,grad_norm: 0.9999992929974806, iteration: 325614
loss: 1.0112404823303223,grad_norm: 0.8889515495392977, iteration: 325615
loss: 0.9948595762252808,grad_norm: 0.9999990815163564, iteration: 325616
loss: 1.0092352628707886,grad_norm: 0.999999953545359, iteration: 325617
loss: 1.038179636001587,grad_norm: 0.9999997560103827, iteration: 325618
loss: 0.9566425085067749,grad_norm: 0.7846944220872899, iteration: 325619
loss: 1.015298843383789,grad_norm: 0.9375885610900081, iteration: 325620
loss: 1.0063650608062744,grad_norm: 0.7244472488486898, iteration: 325621
loss: 1.001252293586731,grad_norm: 0.8191445146258093, iteration: 325622
loss: 1.0069657564163208,grad_norm: 0.8885761182706282, iteration: 325623
loss: 1.0295453071594238,grad_norm: 0.9999998729247979, iteration: 325624
loss: 0.9815483689308167,grad_norm: 0.9420334041741468, iteration: 325625
loss: 0.9915595650672913,grad_norm: 0.7554939293715301, iteration: 325626
loss: 1.0104880332946777,grad_norm: 0.8646888363623304, iteration: 325627
loss: 0.9746875166893005,grad_norm: 0.8706323442581987, iteration: 325628
loss: 1.0268431901931763,grad_norm: 0.9999993895360055, iteration: 325629
loss: 1.002120852470398,grad_norm: 0.8324086202094915, iteration: 325630
loss: 1.0281490087509155,grad_norm: 0.9999992886595539, iteration: 325631
loss: 1.0121738910675049,grad_norm: 0.8027492502894417, iteration: 325632
loss: 0.9588004946708679,grad_norm: 0.8131303295244358, iteration: 325633
loss: 0.9845384359359741,grad_norm: 0.8707672073737798, iteration: 325634
loss: 0.9689761400222778,grad_norm: 0.9999990537166582, iteration: 325635
loss: 1.039906620979309,grad_norm: 0.9999991935862294, iteration: 325636
loss: 1.1430062055587769,grad_norm: 0.9999996955266611, iteration: 325637
loss: 0.9938068389892578,grad_norm: 0.751550441918255, iteration: 325638
loss: 0.9626679420471191,grad_norm: 0.7614594601762426, iteration: 325639
loss: 1.0240262746810913,grad_norm: 0.9999995468964719, iteration: 325640
loss: 1.0890198945999146,grad_norm: 0.9999992034126978, iteration: 325641
loss: 1.043502688407898,grad_norm: 0.9999992995353454, iteration: 325642
loss: 0.9635210633277893,grad_norm: 0.8374274934092574, iteration: 325643
loss: 1.0081886053085327,grad_norm: 0.9185889417413209, iteration: 325644
loss: 0.9749754071235657,grad_norm: 0.9006955923985822, iteration: 325645
loss: 0.9810212254524231,grad_norm: 0.8466763835424319, iteration: 325646
loss: 1.0082454681396484,grad_norm: 0.9999992081220147, iteration: 325647
loss: 0.9983060956001282,grad_norm: 0.6616586922094887, iteration: 325648
loss: 1.0124107599258423,grad_norm: 0.9999992012906462, iteration: 325649
loss: 0.948952853679657,grad_norm: 0.9683775381741038, iteration: 325650
loss: 1.034065842628479,grad_norm: 0.8825932127710516, iteration: 325651
loss: 1.001764178276062,grad_norm: 0.887424633618031, iteration: 325652
loss: 0.9998143315315247,grad_norm: 0.9380688081614474, iteration: 325653
loss: 0.988825798034668,grad_norm: 0.9064163939892541, iteration: 325654
loss: 1.0028343200683594,grad_norm: 0.8091997368401534, iteration: 325655
loss: 1.011284589767456,grad_norm: 0.9999993532591896, iteration: 325656
loss: 0.9857258796691895,grad_norm: 0.7482755889125504, iteration: 325657
loss: 1.028312087059021,grad_norm: 0.7250831674126875, iteration: 325658
loss: 1.0286380052566528,grad_norm: 0.8058100910263093, iteration: 325659
loss: 1.0416537523269653,grad_norm: 0.9852970231769922, iteration: 325660
loss: 1.0135034322738647,grad_norm: 0.8399944602689348, iteration: 325661
loss: 1.0605077743530273,grad_norm: 0.9999993320598702, iteration: 325662
loss: 0.9860404133796692,grad_norm: 0.9999996122737626, iteration: 325663
loss: 1.0062196254730225,grad_norm: 0.7080582141917008, iteration: 325664
loss: 1.0019733905792236,grad_norm: 0.8207231830264963, iteration: 325665
loss: 1.0842338800430298,grad_norm: 0.9999992523055607, iteration: 325666
loss: 0.9828250408172607,grad_norm: 0.7904733050655759, iteration: 325667
loss: 1.0296449661254883,grad_norm: 0.8969269648930913, iteration: 325668
loss: 0.9787850975990295,grad_norm: 0.816036134762686, iteration: 325669
loss: 1.0862531661987305,grad_norm: 0.9183699022081807, iteration: 325670
loss: 1.094985842704773,grad_norm: 0.8064827615840723, iteration: 325671
loss: 0.9672514796257019,grad_norm: 0.78724608492001, iteration: 325672
loss: 0.9871376752853394,grad_norm: 0.8324509442837211, iteration: 325673
loss: 1.0215293169021606,grad_norm: 0.7698276617115153, iteration: 325674
loss: 1.0150874853134155,grad_norm: 0.797840312997408, iteration: 325675
loss: 1.0235942602157593,grad_norm: 0.9999993478691293, iteration: 325676
loss: 1.024658441543579,grad_norm: 0.9310791318712224, iteration: 325677
loss: 0.9726800918579102,grad_norm: 0.9699867437050352, iteration: 325678
loss: 0.9894295930862427,grad_norm: 0.919439290919676, iteration: 325679
loss: 1.0029059648513794,grad_norm: 0.814575314524132, iteration: 325680
loss: 1.0084539651870728,grad_norm: 0.8088269156524098, iteration: 325681
loss: 0.9715408682823181,grad_norm: 0.8646389533537635, iteration: 325682
loss: 0.9779012203216553,grad_norm: 0.8158072618373179, iteration: 325683
loss: 1.022039771080017,grad_norm: 0.8457036447981652, iteration: 325684
loss: 0.9840573072433472,grad_norm: 0.7266094809260151, iteration: 325685
loss: 1.0219098329544067,grad_norm: 0.999999523213947, iteration: 325686
loss: 0.9907272458076477,grad_norm: 0.6962880847714149, iteration: 325687
loss: 1.0082727670669556,grad_norm: 0.9999993991559194, iteration: 325688
loss: 1.1689505577087402,grad_norm: 0.9999997685457525, iteration: 325689
loss: 0.9874032735824585,grad_norm: 0.9999991170042817, iteration: 325690
loss: 1.0282697677612305,grad_norm: 0.7648848280160939, iteration: 325691
loss: 0.9617866277694702,grad_norm: 0.8269276310280811, iteration: 325692
loss: 1.0243415832519531,grad_norm: 0.7785853167472832, iteration: 325693
loss: 0.9815904498100281,grad_norm: 0.7993133702607258, iteration: 325694
loss: 0.9776607155799866,grad_norm: 0.8103370123734152, iteration: 325695
loss: 1.0019937753677368,grad_norm: 0.9279605491012328, iteration: 325696
loss: 0.9828112721443176,grad_norm: 0.859734965440989, iteration: 325697
loss: 0.9916265606880188,grad_norm: 0.7612069704620564, iteration: 325698
loss: 0.9643118977546692,grad_norm: 0.8769841478028562, iteration: 325699
loss: 0.9922919273376465,grad_norm: 0.722006743858646, iteration: 325700
loss: 0.9898313879966736,grad_norm: 0.9120835371363903, iteration: 325701
loss: 1.0260791778564453,grad_norm: 0.8901847680076066, iteration: 325702
loss: 0.9685913324356079,grad_norm: 0.7655436165142553, iteration: 325703
loss: 1.1053926944732666,grad_norm: 0.999999853668879, iteration: 325704
loss: 0.9808557033538818,grad_norm: 0.8977600873622192, iteration: 325705
loss: 1.0184681415557861,grad_norm: 0.983969247556222, iteration: 325706
loss: 0.9950202107429504,grad_norm: 0.8319035214007385, iteration: 325707
loss: 1.010499358177185,grad_norm: 0.9898069923728208, iteration: 325708
loss: 0.9895052909851074,grad_norm: 0.848047095069713, iteration: 325709
loss: 1.0153279304504395,grad_norm: 0.7568201226884099, iteration: 325710
loss: 1.0087401866912842,grad_norm: 0.8702555886652891, iteration: 325711
loss: 0.9860538244247437,grad_norm: 0.885846735302572, iteration: 325712
loss: 0.9839190244674683,grad_norm: 0.7862258732268427, iteration: 325713
loss: 1.0343073606491089,grad_norm: 0.8492210682391977, iteration: 325714
loss: 0.9729756116867065,grad_norm: 0.8364749262517135, iteration: 325715
loss: 1.0069396495819092,grad_norm: 0.9544780422741249, iteration: 325716
loss: 1.002739429473877,grad_norm: 0.9999992715131909, iteration: 325717
loss: 1.0086599588394165,grad_norm: 0.7542854348115414, iteration: 325718
loss: 1.0117015838623047,grad_norm: 0.8046023243517078, iteration: 325719
loss: 1.008341670036316,grad_norm: 0.9325959023643529, iteration: 325720
loss: 0.9937088489532471,grad_norm: 0.8172161879951059, iteration: 325721
loss: 1.0191792249679565,grad_norm: 0.8131027371072534, iteration: 325722
loss: 1.0441654920578003,grad_norm: 0.999999987174245, iteration: 325723
loss: 1.0321204662322998,grad_norm: 0.8311150532824071, iteration: 325724
loss: 1.094649314880371,grad_norm: 0.9919202993541725, iteration: 325725
loss: 1.0867424011230469,grad_norm: 0.9608959206602755, iteration: 325726
loss: 1.0187177658081055,grad_norm: 0.9999992860843888, iteration: 325727
loss: 1.039492130279541,grad_norm: 0.8517118047903092, iteration: 325728
loss: 0.9827635884284973,grad_norm: 0.7056673727819317, iteration: 325729
loss: 1.0080169439315796,grad_norm: 0.9999994571446389, iteration: 325730
loss: 0.9817325472831726,grad_norm: 0.7512835498097283, iteration: 325731
loss: 1.0077800750732422,grad_norm: 0.99999897684594, iteration: 325732
loss: 1.0095903873443604,grad_norm: 0.7393270145272016, iteration: 325733
loss: 1.1134495735168457,grad_norm: 0.9999995083396623, iteration: 325734
loss: 1.0076338052749634,grad_norm: 0.8080909584090777, iteration: 325735
loss: 1.0425033569335938,grad_norm: 0.9999997256080704, iteration: 325736
loss: 1.0484156608581543,grad_norm: 0.9999993479565571, iteration: 325737
loss: 0.9953258633613586,grad_norm: 0.9364320871943727, iteration: 325738
loss: 0.9914373159408569,grad_norm: 0.8069172400424051, iteration: 325739
loss: 1.0697482824325562,grad_norm: 0.9999993495307882, iteration: 325740
loss: 0.9894723892211914,grad_norm: 0.6730706662418322, iteration: 325741
loss: 0.9593503475189209,grad_norm: 0.8372997941723322, iteration: 325742
loss: 1.0456031560897827,grad_norm: 0.9384585299298385, iteration: 325743
loss: 1.010029673576355,grad_norm: 0.7234747565403825, iteration: 325744
loss: 1.0445877313613892,grad_norm: 0.9999993856542118, iteration: 325745
loss: 0.9882389903068542,grad_norm: 0.8435212259484016, iteration: 325746
loss: 0.9923039674758911,grad_norm: 0.8913358497348539, iteration: 325747
loss: 0.9939625263214111,grad_norm: 0.8688686667217506, iteration: 325748
loss: 1.011315107345581,grad_norm: 0.7973746519235384, iteration: 325749
loss: 1.0193601846694946,grad_norm: 0.9077329974206888, iteration: 325750
loss: 0.9966384768486023,grad_norm: 0.7615887016690184, iteration: 325751
loss: 1.0088287591934204,grad_norm: 0.8194959375604861, iteration: 325752
loss: 1.0002517700195312,grad_norm: 0.7275413347486008, iteration: 325753
loss: 1.0162196159362793,grad_norm: 0.8974598944051962, iteration: 325754
loss: 0.9962997436523438,grad_norm: 0.853825524250527, iteration: 325755
loss: 1.0000184774398804,grad_norm: 0.927032277640439, iteration: 325756
loss: 1.137696385383606,grad_norm: 0.9999997246177784, iteration: 325757
loss: 0.9973114728927612,grad_norm: 0.9999992874485992, iteration: 325758
loss: 1.0173739194869995,grad_norm: 0.7875534422847994, iteration: 325759
loss: 0.985138475894928,grad_norm: 0.9493294146204551, iteration: 325760
loss: 1.081584095954895,grad_norm: 0.9999996157627365, iteration: 325761
loss: 1.0006654262542725,grad_norm: 0.8176911462650166, iteration: 325762
loss: 1.0007858276367188,grad_norm: 0.9999999432505747, iteration: 325763
loss: 0.9921448230743408,grad_norm: 0.9999991240204453, iteration: 325764
loss: 0.9858568906784058,grad_norm: 0.7929985418950565, iteration: 325765
loss: 0.9574571251869202,grad_norm: 0.9682914201065721, iteration: 325766
loss: 0.9743315577507019,grad_norm: 0.8463325242015703, iteration: 325767
loss: 0.9913082718849182,grad_norm: 0.963595069795743, iteration: 325768
loss: 0.9958068132400513,grad_norm: 0.7836342193473699, iteration: 325769
loss: 0.959644079208374,grad_norm: 0.9193652151471308, iteration: 325770
loss: 1.017146348953247,grad_norm: 0.7453231386162129, iteration: 325771
loss: 0.9947155714035034,grad_norm: 0.8474702427313658, iteration: 325772
loss: 0.9897643327713013,grad_norm: 0.6647564296480175, iteration: 325773
loss: 1.0103635787963867,grad_norm: 0.8415063958093361, iteration: 325774
loss: 0.9952970743179321,grad_norm: 0.9999991436058542, iteration: 325775
loss: 0.999358594417572,grad_norm: 0.8609167169519616, iteration: 325776
loss: 0.9757604598999023,grad_norm: 0.9359069572604695, iteration: 325777
loss: 1.010922908782959,grad_norm: 0.8619556423471908, iteration: 325778
loss: 0.9913153648376465,grad_norm: 0.7738935561724928, iteration: 325779
loss: 1.0159815549850464,grad_norm: 0.8441415733544958, iteration: 325780
loss: 0.9873088598251343,grad_norm: 0.8141609931415678, iteration: 325781
loss: 0.9677545428276062,grad_norm: 0.7651405466626362, iteration: 325782
loss: 1.0744733810424805,grad_norm: 0.8707980912807097, iteration: 325783
loss: 1.027153491973877,grad_norm: 0.999999374564496, iteration: 325784
loss: 1.0057415962219238,grad_norm: 0.99999911710216, iteration: 325785
loss: 0.9654222130775452,grad_norm: 0.6752742568236649, iteration: 325786
loss: 0.9887741804122925,grad_norm: 0.9399151577952838, iteration: 325787
loss: 0.9756471514701843,grad_norm: 0.8426580134657135, iteration: 325788
loss: 0.9987574815750122,grad_norm: 0.8395005215833249, iteration: 325789
loss: 1.0309679508209229,grad_norm: 0.8522212739916912, iteration: 325790
loss: 1.0528570413589478,grad_norm: 0.999999845549674, iteration: 325791
loss: 1.013548493385315,grad_norm: 0.8574954652115957, iteration: 325792
loss: 0.9637314081192017,grad_norm: 0.7770643923302948, iteration: 325793
loss: 0.9914399981498718,grad_norm: 0.8020715510080303, iteration: 325794
loss: 1.0116229057312012,grad_norm: 0.999999241522775, iteration: 325795
loss: 1.0123817920684814,grad_norm: 0.7550667007323326, iteration: 325796
loss: 0.9741989970207214,grad_norm: 0.742870500902496, iteration: 325797
loss: 1.0032581090927124,grad_norm: 0.8369075779386794, iteration: 325798
loss: 1.2170348167419434,grad_norm: 0.9999993853631788, iteration: 325799
loss: 1.0071133375167847,grad_norm: 0.7654092354166653, iteration: 325800
loss: 1.0057505369186401,grad_norm: 0.7613999218505323, iteration: 325801
loss: 1.0510776042938232,grad_norm: 0.9999992913688874, iteration: 325802
loss: 1.021462082862854,grad_norm: 0.9150659420377677, iteration: 325803
loss: 1.0050837993621826,grad_norm: 0.999999283278073, iteration: 325804
loss: 1.0332828760147095,grad_norm: 0.9806530397181438, iteration: 325805
loss: 1.0247087478637695,grad_norm: 0.8070243601347123, iteration: 325806
loss: 0.995480477809906,grad_norm: 0.9999999765599151, iteration: 325807
loss: 1.0097968578338623,grad_norm: 0.9757712982733452, iteration: 325808
loss: 1.0003435611724854,grad_norm: 0.8941046364027203, iteration: 325809
loss: 0.9984815716743469,grad_norm: 0.9520319860357316, iteration: 325810
loss: 1.0009868144989014,grad_norm: 0.9089781803356433, iteration: 325811
loss: 1.0126532316207886,grad_norm: 0.8055441807528815, iteration: 325812
loss: 0.9972014427185059,grad_norm: 0.8059232134688437, iteration: 325813
loss: 1.0248218774795532,grad_norm: 0.8528176660621721, iteration: 325814
loss: 0.9603754281997681,grad_norm: 0.748663569685136, iteration: 325815
loss: 1.0170750617980957,grad_norm: 0.9477082565664098, iteration: 325816
loss: 1.078495740890503,grad_norm: 0.9706486750128869, iteration: 325817
loss: 0.9874764084815979,grad_norm: 0.8250229002717725, iteration: 325818
loss: 0.9747043251991272,grad_norm: 0.889412613629289, iteration: 325819
loss: 0.9874789118766785,grad_norm: 0.8026297956793307, iteration: 325820
loss: 0.9818205833435059,grad_norm: 0.8149261049436175, iteration: 325821
loss: 0.9965198040008545,grad_norm: 0.6721025108321523, iteration: 325822
loss: 0.9970020651817322,grad_norm: 0.7587153906880476, iteration: 325823
loss: 0.9657865166664124,grad_norm: 0.8761167865767817, iteration: 325824
loss: 0.999470591545105,grad_norm: 0.7155814360461116, iteration: 325825
loss: 1.0150940418243408,grad_norm: 0.9999992243552821, iteration: 325826
loss: 0.9287092089653015,grad_norm: 0.8272698186362439, iteration: 325827
loss: 1.0052616596221924,grad_norm: 0.9471231635729949, iteration: 325828
loss: 1.0234075784683228,grad_norm: 0.6925778456972044, iteration: 325829
loss: 1.0124582052230835,grad_norm: 0.9999990112881817, iteration: 325830
loss: 1.0179388523101807,grad_norm: 0.9999999202954171, iteration: 325831
loss: 1.0481486320495605,grad_norm: 0.9999998474600176, iteration: 325832
loss: 0.9943926334381104,grad_norm: 0.7881682466237515, iteration: 325833
loss: 0.9512589573860168,grad_norm: 0.7379965866842938, iteration: 325834
loss: 0.9897245764732361,grad_norm: 0.9999992904807383, iteration: 325835
loss: 1.0139437913894653,grad_norm: 0.8782507828513486, iteration: 325836
loss: 1.0244368314743042,grad_norm: 0.9404210021342168, iteration: 325837
loss: 1.0362569093704224,grad_norm: 0.8750941639455762, iteration: 325838
loss: 0.9848774671554565,grad_norm: 0.8427177711550712, iteration: 325839
loss: 1.0140107870101929,grad_norm: 0.9999990290972304, iteration: 325840
loss: 1.065082311630249,grad_norm: 0.9226849757603964, iteration: 325841
loss: 0.9878014326095581,grad_norm: 0.6918530710487303, iteration: 325842
loss: 1.023102045059204,grad_norm: 0.8112603252540412, iteration: 325843
loss: 0.9699956774711609,grad_norm: 0.9269704248941113, iteration: 325844
loss: 1.0085382461547852,grad_norm: 0.9999999054007276, iteration: 325845
loss: 1.0711631774902344,grad_norm: 0.9514766540782841, iteration: 325846
loss: 0.987480640411377,grad_norm: 0.9999998826772446, iteration: 325847
loss: 1.0042496919631958,grad_norm: 0.8293559624632323, iteration: 325848
loss: 1.0335384607315063,grad_norm: 0.9196101098147234, iteration: 325849
loss: 0.9704005718231201,grad_norm: 0.8474733611368819, iteration: 325850
loss: 0.9992255568504333,grad_norm: 0.8300158632918386, iteration: 325851
loss: 0.9935035109519958,grad_norm: 0.8221283813617468, iteration: 325852
loss: 1.0194271802902222,grad_norm: 0.7786972901234045, iteration: 325853
loss: 1.022811770439148,grad_norm: 0.9414264633226359, iteration: 325854
loss: 0.981363832950592,grad_norm: 0.8504255293423016, iteration: 325855
loss: 1.014358639717102,grad_norm: 0.7102129502132281, iteration: 325856
loss: 0.9688234925270081,grad_norm: 0.8332139589492509, iteration: 325857
loss: 1.0721583366394043,grad_norm: 0.9999992081756568, iteration: 325858
loss: 1.017709732055664,grad_norm: 0.9547915295560532, iteration: 325859
loss: 0.9845696091651917,grad_norm: 0.8517851260954449, iteration: 325860
loss: 1.1604801416397095,grad_norm: 0.9999992573226383, iteration: 325861
loss: 1.0070925951004028,grad_norm: 0.9242269646573338, iteration: 325862
loss: 1.0032178163528442,grad_norm: 0.9646639720025619, iteration: 325863
loss: 1.011116862297058,grad_norm: 0.876716254108881, iteration: 325864
loss: 0.9913842082023621,grad_norm: 0.9010549595931882, iteration: 325865
loss: 0.9935051798820496,grad_norm: 0.8380602557458012, iteration: 325866
loss: 0.9991846084594727,grad_norm: 0.9999990327662079, iteration: 325867
loss: 0.9920214414596558,grad_norm: 0.8264045182289692, iteration: 325868
loss: 0.9940198659896851,grad_norm: 0.8935960140007893, iteration: 325869
loss: 0.9985358119010925,grad_norm: 0.8055833167929567, iteration: 325870
loss: 0.9937896728515625,grad_norm: 0.8631852547191884, iteration: 325871
loss: 1.0114350318908691,grad_norm: 0.966134898003399, iteration: 325872
loss: 0.9935259819030762,grad_norm: 0.8481361917858464, iteration: 325873
loss: 0.9854133129119873,grad_norm: 0.8374978243857628, iteration: 325874
loss: 1.0023747682571411,grad_norm: 0.9999998922164353, iteration: 325875
loss: 0.9834420680999756,grad_norm: 0.8479998279358365, iteration: 325876
loss: 1.0156352519989014,grad_norm: 0.858274163208953, iteration: 325877
loss: 1.0224251747131348,grad_norm: 0.8401887559475315, iteration: 325878
loss: 0.9737179279327393,grad_norm: 0.7091467470075402, iteration: 325879
loss: 0.9752818942070007,grad_norm: 0.7564601387770697, iteration: 325880
loss: 1.1105520725250244,grad_norm: 0.835714737554411, iteration: 325881
loss: 0.9583336710929871,grad_norm: 0.9546697586629784, iteration: 325882
loss: 1.0130103826522827,grad_norm: 0.6789083665573398, iteration: 325883
loss: 1.0222779512405396,grad_norm: 0.8145760841076366, iteration: 325884
loss: 1.0297709703445435,grad_norm: 0.7769818422045249, iteration: 325885
loss: 1.052735686302185,grad_norm: 0.9099818590301337, iteration: 325886
loss: 0.9873433113098145,grad_norm: 0.7621434265340106, iteration: 325887
loss: 0.9898186922073364,grad_norm: 0.9999993128570493, iteration: 325888
loss: 0.9845793843269348,grad_norm: 0.9264770328501931, iteration: 325889
loss: 1.0324546098709106,grad_norm: 0.853366600071222, iteration: 325890
loss: 0.9662835597991943,grad_norm: 0.8047539479410549, iteration: 325891
loss: 0.9966900944709778,grad_norm: 0.8052949871806053, iteration: 325892
loss: 0.9749923348426819,grad_norm: 0.9608599857904769, iteration: 325893
loss: 1.035398006439209,grad_norm: 0.8697726975792326, iteration: 325894
loss: 0.9788433909416199,grad_norm: 0.7111766506506075, iteration: 325895
loss: 0.9861788749694824,grad_norm: 0.9649274492357535, iteration: 325896
loss: 1.013598918914795,grad_norm: 0.7639745454058834, iteration: 325897
loss: 0.9803253412246704,grad_norm: 0.76977206129852, iteration: 325898
loss: 1.008777141571045,grad_norm: 0.8241384409989647, iteration: 325899
loss: 1.0128053426742554,grad_norm: 0.720557067417167, iteration: 325900
loss: 1.0500777959823608,grad_norm: 0.9554888508078978, iteration: 325901
loss: 1.047505259513855,grad_norm: 0.9999998673240421, iteration: 325902
loss: 0.9668877720832825,grad_norm: 0.9999996541831718, iteration: 325903
loss: 0.9915852546691895,grad_norm: 0.8699076737812377, iteration: 325904
loss: 1.0078105926513672,grad_norm: 0.8043106474698077, iteration: 325905
loss: 1.006576657295227,grad_norm: 0.9039170492302445, iteration: 325906
loss: 0.9986035227775574,grad_norm: 0.780394488849461, iteration: 325907
loss: 1.0031518936157227,grad_norm: 0.7497021738972514, iteration: 325908
loss: 1.0017180442810059,grad_norm: 0.8268105697827046, iteration: 325909
loss: 1.0808250904083252,grad_norm: 0.9999990852344237, iteration: 325910
loss: 1.021895408630371,grad_norm: 0.9999994785963119, iteration: 325911
loss: 1.0611573457717896,grad_norm: 0.961013704550893, iteration: 325912
loss: 1.0164716243743896,grad_norm: 0.9359126460773156, iteration: 325913
loss: 0.9896513819694519,grad_norm: 0.6837715532444836, iteration: 325914
loss: 1.0104758739471436,grad_norm: 0.6908344485801515, iteration: 325915
loss: 1.0252068042755127,grad_norm: 0.9115644364708672, iteration: 325916
loss: 0.9895375370979309,grad_norm: 0.9973532555055322, iteration: 325917
loss: 1.0120912790298462,grad_norm: 0.9317161362680074, iteration: 325918
loss: 1.0181856155395508,grad_norm: 0.7959448163115302, iteration: 325919
loss: 0.9953246712684631,grad_norm: 0.7870162439490971, iteration: 325920
loss: 0.974729597568512,grad_norm: 0.9999990695204012, iteration: 325921
loss: 0.9866504073143005,grad_norm: 0.9326744002714998, iteration: 325922
loss: 0.9652103781700134,grad_norm: 0.9999991304676575, iteration: 325923
loss: 1.0072144269943237,grad_norm: 0.8251461377648266, iteration: 325924
loss: 0.9957154989242554,grad_norm: 0.7857320183553269, iteration: 325925
loss: 1.0350979566574097,grad_norm: 0.7355003018583889, iteration: 325926
loss: 1.0144156217575073,grad_norm: 0.7641527102799108, iteration: 325927
loss: 0.9812003374099731,grad_norm: 0.7257804917826737, iteration: 325928
loss: 1.0231231451034546,grad_norm: 0.9235603629648037, iteration: 325929
loss: 1.0332698822021484,grad_norm: 0.8536336786911423, iteration: 325930
loss: 1.0232681035995483,grad_norm: 0.7899025737171925, iteration: 325931
loss: 0.9919953346252441,grad_norm: 0.8411477206755641, iteration: 325932
loss: 1.097176432609558,grad_norm: 0.8802280795798092, iteration: 325933
loss: 1.0569223165512085,grad_norm: 0.7518996118739104, iteration: 325934
loss: 1.0063486099243164,grad_norm: 0.7521082327958766, iteration: 325935
loss: 0.9982428550720215,grad_norm: 0.9894369787057717, iteration: 325936
loss: 1.0149550437927246,grad_norm: 0.9999992096276473, iteration: 325937
loss: 0.9652217030525208,grad_norm: 0.9797806687042607, iteration: 325938
loss: 1.09608793258667,grad_norm: 0.9999992207354084, iteration: 325939
loss: 1.0104835033416748,grad_norm: 0.890206574965521, iteration: 325940
loss: 1.0590873956680298,grad_norm: 0.7276681122716557, iteration: 325941
loss: 1.0166563987731934,grad_norm: 0.9999991424235531, iteration: 325942
loss: 1.00505793094635,grad_norm: 0.908408310049645, iteration: 325943
loss: 0.9940191507339478,grad_norm: 0.8413648795608702, iteration: 325944
loss: 0.9780756831169128,grad_norm: 0.8015000350051247, iteration: 325945
loss: 1.0146819353103638,grad_norm: 0.7982464638220824, iteration: 325946
loss: 1.0400782823562622,grad_norm: 0.9999991448327457, iteration: 325947
loss: 1.0280969142913818,grad_norm: 0.9419259042966718, iteration: 325948
loss: 1.0275053977966309,grad_norm: 0.9999989779108918, iteration: 325949
loss: 0.9967127442359924,grad_norm: 0.9999992064518127, iteration: 325950
loss: 0.9907365441322327,grad_norm: 0.9999990449377677, iteration: 325951
loss: 1.0497175455093384,grad_norm: 0.9999998387222504, iteration: 325952
loss: 1.0164481401443481,grad_norm: 0.9999991272990988, iteration: 325953
loss: 1.0631232261657715,grad_norm: 0.9999991902413031, iteration: 325954
loss: 1.0181623697280884,grad_norm: 0.9504163370832297, iteration: 325955
loss: 1.0678818225860596,grad_norm: 0.9999997633560956, iteration: 325956
loss: 1.023052453994751,grad_norm: 0.9270861502661751, iteration: 325957
loss: 1.0328176021575928,grad_norm: 0.9999992222309093, iteration: 325958
loss: 1.0802383422851562,grad_norm: 0.9999993830347137, iteration: 325959
loss: 0.9730113744735718,grad_norm: 0.6706312477523046, iteration: 325960
loss: 0.979970395565033,grad_norm: 0.7367183120205287, iteration: 325961
loss: 1.00225031375885,grad_norm: 0.8282657271339786, iteration: 325962
loss: 0.9689530730247498,grad_norm: 0.9242034918261556, iteration: 325963
loss: 1.015870213508606,grad_norm: 0.8488795613375341, iteration: 325964
loss: 0.9611884951591492,grad_norm: 0.9742901385506558, iteration: 325965
loss: 1.0385031700134277,grad_norm: 0.7135460024634487, iteration: 325966
loss: 1.0546647310256958,grad_norm: 0.9726180784653797, iteration: 325967
loss: 1.123677372932434,grad_norm: 0.9999993119669557, iteration: 325968
loss: 1.0887806415557861,grad_norm: 0.9999990894246087, iteration: 325969
loss: 0.988111674785614,grad_norm: 0.8002065095703346, iteration: 325970
loss: 1.0029174089431763,grad_norm: 0.8011403768850948, iteration: 325971
loss: 0.9739120006561279,grad_norm: 0.875302050476758, iteration: 325972
loss: 1.0298439264297485,grad_norm: 0.7163813969752296, iteration: 325973
loss: 1.0280094146728516,grad_norm: 0.9625469355527647, iteration: 325974
loss: 0.9933386445045471,grad_norm: 0.9999993202288554, iteration: 325975
loss: 0.9643418192863464,grad_norm: 0.839501866478419, iteration: 325976
loss: 1.0068905353546143,grad_norm: 0.9265190377546272, iteration: 325977
loss: 1.0140033960342407,grad_norm: 0.801723682298896, iteration: 325978
loss: 1.1292271614074707,grad_norm: 0.8416451695071938, iteration: 325979
loss: 1.0279936790466309,grad_norm: 0.8516366027470493, iteration: 325980
loss: 0.9705140590667725,grad_norm: 0.8058865846869832, iteration: 325981
loss: 0.9944021701812744,grad_norm: 0.9999991689014238, iteration: 325982
loss: 0.9799448847770691,grad_norm: 0.7983462836470618, iteration: 325983
loss: 0.9819973707199097,grad_norm: 0.8812509598361671, iteration: 325984
loss: 1.1004587411880493,grad_norm: 0.9999995956992968, iteration: 325985
loss: 1.0204378366470337,grad_norm: 0.9999993357397743, iteration: 325986
loss: 1.015372633934021,grad_norm: 0.8827968900711745, iteration: 325987
loss: 1.0321545600891113,grad_norm: 0.9289710106584821, iteration: 325988
loss: 0.990985631942749,grad_norm: 0.7305134169710118, iteration: 325989
loss: 0.9786028265953064,grad_norm: 0.8292659991790604, iteration: 325990
loss: 0.9903854131698608,grad_norm: 0.7775066266085052, iteration: 325991
loss: 0.9898126125335693,grad_norm: 0.928114902259558, iteration: 325992
loss: 0.990813672542572,grad_norm: 0.8951459935849126, iteration: 325993
loss: 0.9954619407653809,grad_norm: 0.9999991029019542, iteration: 325994
loss: 1.0031659603118896,grad_norm: 0.8885996749276476, iteration: 325995
loss: 1.024612307548523,grad_norm: 0.9248054867389744, iteration: 325996
loss: 1.0434545278549194,grad_norm: 0.9999991560292062, iteration: 325997
loss: 0.9646596312522888,grad_norm: 0.6511700505613184, iteration: 325998
loss: 1.0605342388153076,grad_norm: 0.8418069486492326, iteration: 325999
loss: 1.0049721002578735,grad_norm: 0.9999992137805506, iteration: 326000
loss: 0.9515731334686279,grad_norm: 0.8806469481831648, iteration: 326001
loss: 0.9880132675170898,grad_norm: 0.8917354627209678, iteration: 326002
loss: 1.0148475170135498,grad_norm: 0.8231867672631794, iteration: 326003
loss: 0.9929149150848389,grad_norm: 0.78745768109454, iteration: 326004
loss: 1.0325899124145508,grad_norm: 0.7177805927502938, iteration: 326005
loss: 0.9744492769241333,grad_norm: 0.9999997266617519, iteration: 326006
loss: 1.0096371173858643,grad_norm: 0.804289868306138, iteration: 326007
loss: 1.0137799978256226,grad_norm: 0.9999995443873148, iteration: 326008
loss: 1.014651894569397,grad_norm: 0.7265792392925737, iteration: 326009
loss: 0.9858260750770569,grad_norm: 0.847261196605125, iteration: 326010
loss: 1.0101940631866455,grad_norm: 0.8716147765564274, iteration: 326011
loss: 0.9997458457946777,grad_norm: 0.7761482868290646, iteration: 326012
loss: 0.9937689900398254,grad_norm: 0.902136463047588, iteration: 326013
loss: 1.0315179824829102,grad_norm: 0.9999991334565616, iteration: 326014
loss: 1.042585849761963,grad_norm: 0.9999991488674005, iteration: 326015
loss: 0.995851993560791,grad_norm: 0.8131602114538479, iteration: 326016
loss: 1.0070240497589111,grad_norm: 0.999999209585078, iteration: 326017
loss: 0.9947960376739502,grad_norm: 0.6878185637513341, iteration: 326018
loss: 1.0100619792938232,grad_norm: 0.8700425176604956, iteration: 326019
loss: 1.0117769241333008,grad_norm: 0.8092491472612804, iteration: 326020
loss: 0.9913697838783264,grad_norm: 0.9999992447598558, iteration: 326021
loss: 0.9882737994194031,grad_norm: 0.7718178716897631, iteration: 326022
loss: 1.0682405233383179,grad_norm: 0.7670589273555446, iteration: 326023
loss: 1.012014627456665,grad_norm: 0.7837196250188458, iteration: 326024
loss: 0.9875699877738953,grad_norm: 0.8214670880883947, iteration: 326025
loss: 0.9822607040405273,grad_norm: 0.8027768111756907, iteration: 326026
loss: 1.0285758972167969,grad_norm: 0.7688038146318605, iteration: 326027
loss: 1.0090699195861816,grad_norm: 0.9755350758380729, iteration: 326028
loss: 0.9884457588195801,grad_norm: 0.8114876610707422, iteration: 326029
loss: 1.0005779266357422,grad_norm: 0.6204035510803612, iteration: 326030
loss: 1.0639135837554932,grad_norm: 0.9999999569118613, iteration: 326031
loss: 0.993411123752594,grad_norm: 0.7529523416065613, iteration: 326032
loss: 1.002739667892456,grad_norm: 0.8008837853094856, iteration: 326033
loss: 1.0025922060012817,grad_norm: 0.9999992844067682, iteration: 326034
loss: 1.0024269819259644,grad_norm: 0.7006739537943745, iteration: 326035
loss: 0.9974437355995178,grad_norm: 0.9999991724983622, iteration: 326036
loss: 1.0310105085372925,grad_norm: 0.7574500267057211, iteration: 326037
loss: 1.00789475440979,grad_norm: 0.8970545077444175, iteration: 326038
loss: 0.996304988861084,grad_norm: 0.9999994002807665, iteration: 326039
loss: 1.0058603286743164,grad_norm: 0.7811368969757766, iteration: 326040
loss: 0.9528723955154419,grad_norm: 0.9298833394657442, iteration: 326041
loss: 0.9892574548721313,grad_norm: 0.8391373390994206, iteration: 326042
loss: 0.9358899593353271,grad_norm: 0.8100829416326836, iteration: 326043
loss: 0.9815628528594971,grad_norm: 0.7762566786562649, iteration: 326044
loss: 0.9681499600410461,grad_norm: 0.7695076189057939, iteration: 326045
loss: 0.977299690246582,grad_norm: 0.970095750332958, iteration: 326046
loss: 1.1305041313171387,grad_norm: 0.9841449915020382, iteration: 326047
loss: 1.0505993366241455,grad_norm: 0.9999993303589613, iteration: 326048
loss: 0.9877201318740845,grad_norm: 0.7998304420270709, iteration: 326049
loss: 1.0262125730514526,grad_norm: 0.8184243692039518, iteration: 326050
loss: 1.0309280157089233,grad_norm: 0.8137371955370092, iteration: 326051
loss: 1.027698278427124,grad_norm: 0.8109851662069679, iteration: 326052
loss: 0.9848337173461914,grad_norm: 0.7983192683346206, iteration: 326053
loss: 0.9933257102966309,grad_norm: 0.8817160496318938, iteration: 326054
loss: 1.1047180891036987,grad_norm: 0.9999997576203173, iteration: 326055
loss: 0.9910790920257568,grad_norm: 0.7274196271990248, iteration: 326056
loss: 0.9907833933830261,grad_norm: 0.999999154851622, iteration: 326057
loss: 1.0033143758773804,grad_norm: 0.7286711749142659, iteration: 326058
loss: 1.061819314956665,grad_norm: 0.999999783508279, iteration: 326059
loss: 0.9728778004646301,grad_norm: 0.8551624790386166, iteration: 326060
loss: 1.0858546495437622,grad_norm: 0.9999999609008803, iteration: 326061
loss: 1.0131617784500122,grad_norm: 0.7688169049436899, iteration: 326062
loss: 1.0055192708969116,grad_norm: 0.9999999263004362, iteration: 326063
loss: 1.0138746500015259,grad_norm: 0.9395464316314817, iteration: 326064
loss: 0.9514017701148987,grad_norm: 0.748127000720857, iteration: 326065
loss: 0.9815001487731934,grad_norm: 0.7954015363836525, iteration: 326066
loss: 1.0512787103652954,grad_norm: 0.9999990651607038, iteration: 326067
loss: 0.984514057636261,grad_norm: 0.8658222505764195, iteration: 326068
loss: 1.0110399723052979,grad_norm: 0.834184772472183, iteration: 326069
loss: 0.9857172966003418,grad_norm: 0.9999998539209947, iteration: 326070
loss: 1.009034276008606,grad_norm: 0.8712207100266055, iteration: 326071
loss: 0.996870756149292,grad_norm: 0.7736882514647816, iteration: 326072
loss: 1.004361867904663,grad_norm: 0.7492901550043726, iteration: 326073
loss: 0.9950566291809082,grad_norm: 0.8105939208383034, iteration: 326074
loss: 1.0079208612442017,grad_norm: 0.8169691482948803, iteration: 326075
loss: 1.0369166135787964,grad_norm: 0.8285857832347032, iteration: 326076
loss: 0.995991051197052,grad_norm: 0.8033183683937167, iteration: 326077
loss: 0.969011664390564,grad_norm: 0.8036801291972981, iteration: 326078
loss: 1.0109467506408691,grad_norm: 0.9579519098663936, iteration: 326079
loss: 1.0092655420303345,grad_norm: 0.8385637324162394, iteration: 326080
loss: 0.9661771059036255,grad_norm: 0.9415814230873868, iteration: 326081
loss: 0.9991007447242737,grad_norm: 0.7908023610872598, iteration: 326082
loss: 0.9998376369476318,grad_norm: 0.8214313276046458, iteration: 326083
loss: 0.9520833492279053,grad_norm: 0.8250026303949176, iteration: 326084
loss: 1.0355297327041626,grad_norm: 0.9999999779329856, iteration: 326085
loss: 1.0228112936019897,grad_norm: 0.9798891979636398, iteration: 326086
loss: 0.967936635017395,grad_norm: 0.8935298599282052, iteration: 326087
loss: 1.0171304941177368,grad_norm: 0.7908396304077352, iteration: 326088
loss: 1.0350340604782104,grad_norm: 0.999999055537202, iteration: 326089
loss: 0.9903777241706848,grad_norm: 0.8346786564887428, iteration: 326090
loss: 1.0146572589874268,grad_norm: 0.8605010813186832, iteration: 326091
loss: 0.9649842381477356,grad_norm: 0.7846449754964352, iteration: 326092
loss: 0.9899661540985107,grad_norm: 0.7873382271498024, iteration: 326093
loss: 1.000710129737854,grad_norm: 0.7941980818747313, iteration: 326094
loss: 0.9874160289764404,grad_norm: 0.7349881364482378, iteration: 326095
loss: 0.9986624121665955,grad_norm: 0.7347396222844803, iteration: 326096
loss: 1.0138018131256104,grad_norm: 0.8471525937047037, iteration: 326097
loss: 1.0177980661392212,grad_norm: 0.9017536657277946, iteration: 326098
loss: 0.9754034280776978,grad_norm: 0.8165318949521221, iteration: 326099
loss: 1.01491379737854,grad_norm: 0.8927126421298508, iteration: 326100
loss: 0.9806177616119385,grad_norm: 0.9215778298185804, iteration: 326101
loss: 1.0162323713302612,grad_norm: 0.9366470900270946, iteration: 326102
loss: 1.0015705823898315,grad_norm: 0.9999989991141185, iteration: 326103
loss: 0.9949670433998108,grad_norm: 0.7439646611350262, iteration: 326104
loss: 1.0058106184005737,grad_norm: 0.977611218632977, iteration: 326105
loss: 0.9747628569602966,grad_norm: 0.7138926103069879, iteration: 326106
loss: 1.0089126825332642,grad_norm: 0.7727112671313208, iteration: 326107
loss: 0.9859768152236938,grad_norm: 0.7823550174119067, iteration: 326108
loss: 1.0209434032440186,grad_norm: 0.9999996287502777, iteration: 326109
loss: 0.9522459506988525,grad_norm: 0.8246183943756328, iteration: 326110
loss: 1.01217782497406,grad_norm: 0.9417840773381545, iteration: 326111
loss: 1.02839994430542,grad_norm: 0.9999992848462044, iteration: 326112
loss: 1.0132718086242676,grad_norm: 0.7571043983911433, iteration: 326113
loss: 0.9896657466888428,grad_norm: 0.69347379596967, iteration: 326114
loss: 1.0038174390792847,grad_norm: 0.8236824445217406, iteration: 326115
loss: 1.1006618738174438,grad_norm: 0.8571516369424788, iteration: 326116
loss: 0.9840471744537354,grad_norm: 0.8337303343356912, iteration: 326117
loss: 0.9766753911972046,grad_norm: 0.8049109530171967, iteration: 326118
loss: 1.012731671333313,grad_norm: 0.9002435412598211, iteration: 326119
loss: 0.9889532327651978,grad_norm: 0.7802304530588581, iteration: 326120
loss: 1.0232727527618408,grad_norm: 0.7211151259991794, iteration: 326121
loss: 1.0180894136428833,grad_norm: 0.8166689829549667, iteration: 326122
loss: 1.0102921724319458,grad_norm: 0.8477484384900024, iteration: 326123
loss: 0.9765439629554749,grad_norm: 0.8236278816589285, iteration: 326124
loss: 0.9804781079292297,grad_norm: 0.7459359983215043, iteration: 326125
loss: 1.0046707391738892,grad_norm: 0.9161006107549308, iteration: 326126
loss: 1.0196962356567383,grad_norm: 0.7648024167556441, iteration: 326127
loss: 0.9777143001556396,grad_norm: 0.8586129866181997, iteration: 326128
loss: 1.0327974557876587,grad_norm: 0.9999992673797783, iteration: 326129
loss: 0.9995245933532715,grad_norm: 0.8957115665947656, iteration: 326130
loss: 0.9659743309020996,grad_norm: 0.7673763091943648, iteration: 326131
loss: 0.9894726276397705,grad_norm: 0.8641233293663457, iteration: 326132
loss: 0.9793974757194519,grad_norm: 0.9192631523936321, iteration: 326133
loss: 1.1280581951141357,grad_norm: 0.9999995247608888, iteration: 326134
loss: 1.0058716535568237,grad_norm: 0.9999999987623299, iteration: 326135
loss: 1.034705638885498,grad_norm: 0.7837345707221208, iteration: 326136
loss: 0.9799317121505737,grad_norm: 0.7967307997979123, iteration: 326137
loss: 0.9929364323616028,grad_norm: 0.8859508481594566, iteration: 326138
loss: 1.0200837850570679,grad_norm: 0.793630266319169, iteration: 326139
loss: 1.0051604509353638,grad_norm: 0.9286644972617354, iteration: 326140
loss: 0.9931957125663757,grad_norm: 0.7913312379777556, iteration: 326141
loss: 0.9763637781143188,grad_norm: 0.8610610744265487, iteration: 326142
loss: 1.0958970785140991,grad_norm: 0.9999990252764792, iteration: 326143
loss: 1.017996907234192,grad_norm: 0.7489038443269392, iteration: 326144
loss: 1.0322843790054321,grad_norm: 0.7350269488742737, iteration: 326145
loss: 0.9906943440437317,grad_norm: 0.9999991439161287, iteration: 326146
loss: 0.9756190776824951,grad_norm: 0.8613818005489426, iteration: 326147
loss: 0.9833509922027588,grad_norm: 0.8640384788383345, iteration: 326148
loss: 1.0182949304580688,grad_norm: 0.8365376844964194, iteration: 326149
loss: 1.0439178943634033,grad_norm: 0.9737833154328466, iteration: 326150
loss: 1.0038361549377441,grad_norm: 0.9246901134566077, iteration: 326151
loss: 1.0285507440567017,grad_norm: 0.9999990738931251, iteration: 326152
loss: 0.9799590706825256,grad_norm: 0.9535635188122927, iteration: 326153
loss: 0.9539436101913452,grad_norm: 0.8269741076940009, iteration: 326154
loss: 1.0059131383895874,grad_norm: 0.9999996205963811, iteration: 326155
loss: 0.9724274277687073,grad_norm: 0.8476163544075642, iteration: 326156
loss: 1.0087045431137085,grad_norm: 0.677473428033633, iteration: 326157
loss: 0.9895037412643433,grad_norm: 0.8250961472146783, iteration: 326158
loss: 0.973014235496521,grad_norm: 0.9999991666711717, iteration: 326159
loss: 0.9867689609527588,grad_norm: 0.818910789317297, iteration: 326160
loss: 1.0055999755859375,grad_norm: 0.9673723185867565, iteration: 326161
loss: 1.0588524341583252,grad_norm: 0.8469382430727274, iteration: 326162
loss: 0.9860453009605408,grad_norm: 0.7896883874128204, iteration: 326163
loss: 0.9929362535476685,grad_norm: 0.8248736655339179, iteration: 326164
loss: 1.0156899690628052,grad_norm: 0.8458823017251136, iteration: 326165
loss: 0.9913221597671509,grad_norm: 0.8160187399418674, iteration: 326166
loss: 1.0208380222320557,grad_norm: 0.8731460185261252, iteration: 326167
loss: 1.011936068534851,grad_norm: 0.7990044580816399, iteration: 326168
loss: 1.0334221124649048,grad_norm: 0.9241521914211397, iteration: 326169
loss: 0.9856657385826111,grad_norm: 0.8273216011681422, iteration: 326170
loss: 1.0213515758514404,grad_norm: 0.9753300564649279, iteration: 326171
loss: 1.0023746490478516,grad_norm: 0.8124929202621848, iteration: 326172
loss: 0.9780362248420715,grad_norm: 0.7254509892607188, iteration: 326173
loss: 1.0056706666946411,grad_norm: 0.8571513034916298, iteration: 326174
loss: 0.993222177028656,grad_norm: 0.9999994309098887, iteration: 326175
loss: 1.0345007181167603,grad_norm: 0.9999996327060207, iteration: 326176
loss: 1.135575771331787,grad_norm: 0.9999994886934003, iteration: 326177
loss: 0.9870991110801697,grad_norm: 0.8402796795707447, iteration: 326178
loss: 0.9998789429664612,grad_norm: 0.9035506357497938, iteration: 326179
loss: 1.022560715675354,grad_norm: 0.9999993126431088, iteration: 326180
loss: 1.0154460668563843,grad_norm: 0.7551832530549744, iteration: 326181
loss: 1.0041625499725342,grad_norm: 0.9392657541315975, iteration: 326182
loss: 0.9718748331069946,grad_norm: 0.8601066163971457, iteration: 326183
loss: 0.999229371547699,grad_norm: 0.9599883665075438, iteration: 326184
loss: 0.9975924491882324,grad_norm: 0.9138207852718805, iteration: 326185
loss: 1.033159613609314,grad_norm: 0.8230135188663951, iteration: 326186
loss: 0.9661304950714111,grad_norm: 0.8066840946164191, iteration: 326187
loss: 1.0189675092697144,grad_norm: 0.824146879592629, iteration: 326188
loss: 1.0321075916290283,grad_norm: 0.9999992549594683, iteration: 326189
loss: 1.0668480396270752,grad_norm: 0.9999999039178383, iteration: 326190
loss: 1.0035877227783203,grad_norm: 0.8934860187822714, iteration: 326191
loss: 1.0442869663238525,grad_norm: 0.7759960056680255, iteration: 326192
loss: 1.0067263841629028,grad_norm: 0.7770150963300235, iteration: 326193
loss: 0.9902198314666748,grad_norm: 0.867635491490011, iteration: 326194
loss: 0.9694664478302002,grad_norm: 0.7933674488856857, iteration: 326195
loss: 1.0137419700622559,grad_norm: 0.7423997386018808, iteration: 326196
loss: 1.0026702880859375,grad_norm: 0.8684465987314381, iteration: 326197
loss: 1.006443977355957,grad_norm: 0.8588683890899858, iteration: 326198
loss: 1.000680923461914,grad_norm: 0.7748248901682637, iteration: 326199
loss: 1.0682249069213867,grad_norm: 0.9999991841765676, iteration: 326200
loss: 1.0059033632278442,grad_norm: 0.9999998414194201, iteration: 326201
loss: 1.0084161758422852,grad_norm: 0.8211998980306922, iteration: 326202
loss: 0.9916541576385498,grad_norm: 0.8809325211388225, iteration: 326203
loss: 0.986072838306427,grad_norm: 0.6735266823941669, iteration: 326204
loss: 0.9992306232452393,grad_norm: 0.9734330295801011, iteration: 326205
loss: 1.0168285369873047,grad_norm: 0.8636873723675367, iteration: 326206
loss: 0.998779296875,grad_norm: 0.9999991015952174, iteration: 326207
loss: 1.017072081565857,grad_norm: 0.9562178361671236, iteration: 326208
loss: 1.000900387763977,grad_norm: 0.8068175872481265, iteration: 326209
loss: 0.9729230999946594,grad_norm: 0.8967900293725739, iteration: 326210
loss: 0.9983031153678894,grad_norm: 0.771095362336438, iteration: 326211
loss: 0.9909380674362183,grad_norm: 0.8105661621215321, iteration: 326212
loss: 1.0014889240264893,grad_norm: 0.8712695741378663, iteration: 326213
loss: 0.9914612174034119,grad_norm: 0.8808970831381049, iteration: 326214
loss: 0.970768928527832,grad_norm: 0.6551089961466046, iteration: 326215
loss: 0.9884485602378845,grad_norm: 0.8105182570972689, iteration: 326216
loss: 0.9678065776824951,grad_norm: 0.7867826728440172, iteration: 326217
loss: 1.0310436487197876,grad_norm: 0.9999992029609375, iteration: 326218
loss: 0.9835320115089417,grad_norm: 0.9999990944152949, iteration: 326219
loss: 0.9836238026618958,grad_norm: 0.8057497866024491, iteration: 326220
loss: 0.98213791847229,grad_norm: 0.801256742225451, iteration: 326221
loss: 0.9706572890281677,grad_norm: 0.7073196137533728, iteration: 326222
loss: 1.0294232368469238,grad_norm: 0.9999990092909968, iteration: 326223
loss: 0.9901044964790344,grad_norm: 0.8116266204615423, iteration: 326224
loss: 0.9905305504798889,grad_norm: 0.7806032425055226, iteration: 326225
loss: 1.0487889051437378,grad_norm: 0.9999991752486919, iteration: 326226
loss: 0.9881543517112732,grad_norm: 0.7219122869309943, iteration: 326227
loss: 1.03048837184906,grad_norm: 0.8589939276699524, iteration: 326228
loss: 1.0886884927749634,grad_norm: 0.9999990671287531, iteration: 326229
loss: 1.0223125219345093,grad_norm: 0.8656443210234082, iteration: 326230
loss: 1.0412886142730713,grad_norm: 0.9820008708309232, iteration: 326231
loss: 0.9813446998596191,grad_norm: 0.7989675959776815, iteration: 326232
loss: 0.9954378604888916,grad_norm: 0.9999997969542345, iteration: 326233
loss: 0.9779833555221558,grad_norm: 0.7377042100944247, iteration: 326234
loss: 1.0027093887329102,grad_norm: 0.843273413917366, iteration: 326235
loss: 0.9908545017242432,grad_norm: 0.7771044716119756, iteration: 326236
loss: 1.0079776048660278,grad_norm: 0.8593818720580302, iteration: 326237
loss: 1.009850263595581,grad_norm: 0.7934791166473147, iteration: 326238
loss: 1.001752257347107,grad_norm: 0.8711820762514028, iteration: 326239
loss: 1.0266132354736328,grad_norm: 0.660676409970507, iteration: 326240
loss: 0.9990702867507935,grad_norm: 0.9106095057356216, iteration: 326241
loss: 0.9811126589775085,grad_norm: 0.751313913832898, iteration: 326242
loss: 0.9747037887573242,grad_norm: 0.8837621861759145, iteration: 326243
loss: 1.0089895725250244,grad_norm: 0.8053496361822324, iteration: 326244
loss: 1.0617356300354004,grad_norm: 0.9999995657395352, iteration: 326245
loss: 1.0296010971069336,grad_norm: 0.7016462717978952, iteration: 326246
loss: 1.0274399518966675,grad_norm: 0.9609302174713074, iteration: 326247
loss: 0.9940450191497803,grad_norm: 0.9999998769883754, iteration: 326248
loss: 1.0426502227783203,grad_norm: 0.9999991725498026, iteration: 326249
loss: 0.9818164706230164,grad_norm: 0.8397632629813413, iteration: 326250
loss: 0.9812336564064026,grad_norm: 0.820759229058106, iteration: 326251
loss: 1.0046374797821045,grad_norm: 0.7436273404396301, iteration: 326252
loss: 1.0032960176467896,grad_norm: 0.9214639357541182, iteration: 326253
loss: 0.9830322265625,grad_norm: 0.7930567598771093, iteration: 326254
loss: 1.0248572826385498,grad_norm: 0.9999991337150851, iteration: 326255
loss: 1.0180572271347046,grad_norm: 0.7966813807767956, iteration: 326256
loss: 1.0096639394760132,grad_norm: 0.9999990838490072, iteration: 326257
loss: 0.993838369846344,grad_norm: 0.8311869450671442, iteration: 326258
loss: 0.9955395460128784,grad_norm: 0.9219466536096999, iteration: 326259
loss: 1.0065590143203735,grad_norm: 0.8223339439251444, iteration: 326260
loss: 0.9786590933799744,grad_norm: 0.7505513196649793, iteration: 326261
loss: 1.0090653896331787,grad_norm: 0.871767408748567, iteration: 326262
loss: 0.9821962118148804,grad_norm: 0.6677690515770892, iteration: 326263
loss: 0.991767168045044,grad_norm: 0.7957327721239632, iteration: 326264
loss: 1.0005179643630981,grad_norm: 0.9202605247856811, iteration: 326265
loss: 0.9965148568153381,grad_norm: 0.9005404404397775, iteration: 326266
loss: 0.9675987958908081,grad_norm: 0.746994628646399, iteration: 326267
loss: 0.9736329913139343,grad_norm: 0.9219896707162221, iteration: 326268
loss: 1.0287665128707886,grad_norm: 0.7753624363618117, iteration: 326269
loss: 0.9700356721878052,grad_norm: 0.8156237801023384, iteration: 326270
loss: 1.004564881324768,grad_norm: 0.861322635485453, iteration: 326271
loss: 1.0272353887557983,grad_norm: 0.7778527759091155, iteration: 326272
loss: 0.9552434682846069,grad_norm: 0.9999993043450681, iteration: 326273
loss: 0.9690414667129517,grad_norm: 0.8037187720844671, iteration: 326274
loss: 0.9695926308631897,grad_norm: 0.9983429385060276, iteration: 326275
loss: 0.92420893907547,grad_norm: 0.9309702624510365, iteration: 326276
loss: 1.0075758695602417,grad_norm: 0.7849269830774946, iteration: 326277
loss: 1.03616201877594,grad_norm: 0.8298561118458422, iteration: 326278
loss: 1.024964451789856,grad_norm: 0.8463577484091263, iteration: 326279
loss: 1.020630955696106,grad_norm: 0.9999997433610848, iteration: 326280
loss: 0.9806250929832458,grad_norm: 0.8525250730110225, iteration: 326281
loss: 1.0334864854812622,grad_norm: 0.9999995537233826, iteration: 326282
loss: 1.072618007659912,grad_norm: 0.895417231960252, iteration: 326283
loss: 0.9472942352294922,grad_norm: 0.9877053653146969, iteration: 326284
loss: 1.0392210483551025,grad_norm: 0.9999995848679086, iteration: 326285
loss: 0.9711053967475891,grad_norm: 0.9999998144919968, iteration: 326286
loss: 1.0410865545272827,grad_norm: 0.999999061795832, iteration: 326287
loss: 0.9650699496269226,grad_norm: 0.8811568855670644, iteration: 326288
loss: 1.051398754119873,grad_norm: 0.9999993143807666, iteration: 326289
loss: 0.9806129336357117,grad_norm: 0.8419059717476858, iteration: 326290
loss: 0.9991130828857422,grad_norm: 0.8638721710695008, iteration: 326291
loss: 1.0130561590194702,grad_norm: 0.7217754972556082, iteration: 326292
loss: 1.0106600522994995,grad_norm: 0.9999991298240002, iteration: 326293
loss: 0.9856486320495605,grad_norm: 0.9693836303682548, iteration: 326294
loss: 1.0057519674301147,grad_norm: 0.9629176457919392, iteration: 326295
loss: 1.0255018472671509,grad_norm: 0.7606898913390068, iteration: 326296
loss: 0.9904953241348267,grad_norm: 0.808443355192013, iteration: 326297
loss: 1.0378482341766357,grad_norm: 0.9126099161671947, iteration: 326298
loss: 0.9679638743400574,grad_norm: 0.7249310161180499, iteration: 326299
loss: 1.005806803703308,grad_norm: 0.835909899356682, iteration: 326300
loss: 0.9928160905838013,grad_norm: 0.803846478001596, iteration: 326301
loss: 1.0114638805389404,grad_norm: 0.9185490989436206, iteration: 326302
loss: 1.0186759233474731,grad_norm: 0.8324053225839778, iteration: 326303
loss: 1.0681374073028564,grad_norm: 0.9999990353133699, iteration: 326304
loss: 1.0071160793304443,grad_norm: 0.8550397767652125, iteration: 326305
loss: 0.9725511074066162,grad_norm: 0.7894337998689163, iteration: 326306
loss: 1.0729063749313354,grad_norm: 0.9999990962079689, iteration: 326307
loss: 0.9848921895027161,grad_norm: 0.8126016602651375, iteration: 326308
loss: 1.0129085779190063,grad_norm: 0.9999992362362425, iteration: 326309
loss: 0.99786376953125,grad_norm: 0.9299484513736698, iteration: 326310
loss: 1.000910997390747,grad_norm: 0.8724954999025785, iteration: 326311
loss: 1.0586174726486206,grad_norm: 0.9999991996956101, iteration: 326312
loss: 1.0136107206344604,grad_norm: 0.9823097659909568, iteration: 326313
loss: 0.992692232131958,grad_norm: 0.9999990956319713, iteration: 326314
loss: 0.9840009808540344,grad_norm: 0.9044379481005093, iteration: 326315
loss: 1.0133285522460938,grad_norm: 0.8796497831902074, iteration: 326316
loss: 1.0190813541412354,grad_norm: 0.9164574735140383, iteration: 326317
loss: 1.0558457374572754,grad_norm: 0.7789514424248718, iteration: 326318
loss: 1.0015959739685059,grad_norm: 0.9110611888636195, iteration: 326319
loss: 0.987755298614502,grad_norm: 0.8236983687879541, iteration: 326320
loss: 0.9995072484016418,grad_norm: 0.8001026442005639, iteration: 326321
loss: 0.9905676245689392,grad_norm: 0.8240931655443259, iteration: 326322
loss: 0.9806974530220032,grad_norm: 0.7757436764742621, iteration: 326323
loss: 0.9714125394821167,grad_norm: 0.9999991274985488, iteration: 326324
loss: 1.0370780229568481,grad_norm: 0.9183823141835188, iteration: 326325
loss: 1.0520334243774414,grad_norm: 0.9999996611677648, iteration: 326326
loss: 1.0663033723831177,grad_norm: 0.9334774703727795, iteration: 326327
loss: 0.9666683673858643,grad_norm: 0.8951693638701661, iteration: 326328
loss: 0.9955845475196838,grad_norm: 0.9847283384108397, iteration: 326329
loss: 1.0148757696151733,grad_norm: 0.7835437796656771, iteration: 326330
loss: 1.0401462316513062,grad_norm: 0.9762544727135911, iteration: 326331
loss: 1.001356840133667,grad_norm: 0.818671532903596, iteration: 326332
loss: 1.0710499286651611,grad_norm: 0.99999982608374, iteration: 326333
loss: 1.006396770477295,grad_norm: 0.9502879055043603, iteration: 326334
loss: 0.9878395199775696,grad_norm: 0.8471965363062118, iteration: 326335
loss: 1.0824594497680664,grad_norm: 0.9999991057507298, iteration: 326336
loss: 0.9942667484283447,grad_norm: 0.8310013999813184, iteration: 326337
loss: 1.0078314542770386,grad_norm: 0.9226608273333162, iteration: 326338
loss: 1.0129956007003784,grad_norm: 0.834547226445238, iteration: 326339
loss: 0.9709227085113525,grad_norm: 0.9152932302262807, iteration: 326340
loss: 1.055736780166626,grad_norm: 0.9653385947839097, iteration: 326341
loss: 1.0117722749710083,grad_norm: 0.8040885627998237, iteration: 326342
loss: 1.0126019716262817,grad_norm: 0.8552847665767244, iteration: 326343
loss: 1.019365906715393,grad_norm: 0.9004415652365049, iteration: 326344
loss: 1.0559202432632446,grad_norm: 0.9999993577798901, iteration: 326345
loss: 0.9888537526130676,grad_norm: 0.7867647851631839, iteration: 326346
loss: 0.9773289561271667,grad_norm: 0.8944726107051, iteration: 326347
loss: 1.0021096467971802,grad_norm: 0.9210221631440105, iteration: 326348
loss: 1.0132547616958618,grad_norm: 0.9069684414508926, iteration: 326349
loss: 0.9881950616836548,grad_norm: 0.9173882747041827, iteration: 326350
loss: 1.0273630619049072,grad_norm: 0.8956276265761098, iteration: 326351
loss: 1.054931402206421,grad_norm: 0.9999999803938178, iteration: 326352
loss: 0.983742892742157,grad_norm: 0.9628367698527182, iteration: 326353
loss: 0.9889981150627136,grad_norm: 0.7748176894363422, iteration: 326354
loss: 1.0407094955444336,grad_norm: 0.9999997641490144, iteration: 326355
loss: 1.049217700958252,grad_norm: 0.9999995876233609, iteration: 326356
loss: 1.013023853302002,grad_norm: 0.8261579356624276, iteration: 326357
loss: 0.9924153685569763,grad_norm: 0.8208191522627839, iteration: 326358
loss: 0.9972676038742065,grad_norm: 0.9102349539017071, iteration: 326359
loss: 0.9907388091087341,grad_norm: 0.8608916702838032, iteration: 326360
loss: 0.9637881517410278,grad_norm: 0.9120568169296518, iteration: 326361
loss: 0.9821686744689941,grad_norm: 0.9906282179492653, iteration: 326362
loss: 1.014097809791565,grad_norm: 0.9999989966831417, iteration: 326363
loss: 0.993595540523529,grad_norm: 0.7814196078728352, iteration: 326364
loss: 0.9727877974510193,grad_norm: 0.9999991185907101, iteration: 326365
loss: 1.0299038887023926,grad_norm: 0.9782000227774333, iteration: 326366
loss: 0.9966537356376648,grad_norm: 0.9348721536270453, iteration: 326367
loss: 1.0472476482391357,grad_norm: 0.8313763151711815, iteration: 326368
loss: 0.9945662617683411,grad_norm: 0.761508629610882, iteration: 326369
loss: 1.0066198110580444,grad_norm: 0.9842304616759269, iteration: 326370
loss: 1.0116679668426514,grad_norm: 0.9889154178746469, iteration: 326371
loss: 0.9883062243461609,grad_norm: 0.9029788993618654, iteration: 326372
loss: 0.9808411598205566,grad_norm: 0.911242946604508, iteration: 326373
loss: 1.0439321994781494,grad_norm: 0.9999998252116028, iteration: 326374
loss: 0.9959514141082764,grad_norm: 0.7154264866505917, iteration: 326375
loss: 1.0099719762802124,grad_norm: 0.9999990105847236, iteration: 326376
loss: 1.0157463550567627,grad_norm: 0.9529643238686135, iteration: 326377
loss: 1.0011000633239746,grad_norm: 0.7751511597582509, iteration: 326378
loss: 1.0373234748840332,grad_norm: 0.9129046449998584, iteration: 326379
loss: 1.0329233407974243,grad_norm: 0.9446347762212043, iteration: 326380
loss: 0.9770522713661194,grad_norm: 0.9374647380234682, iteration: 326381
loss: 1.014120101928711,grad_norm: 0.8272303904992799, iteration: 326382
loss: 0.9716486930847168,grad_norm: 0.8351466483409146, iteration: 326383
loss: 0.9780859351158142,grad_norm: 0.9999991613407503, iteration: 326384
loss: 0.9657562375068665,grad_norm: 0.9180728057364194, iteration: 326385
loss: 0.970001757144928,grad_norm: 0.999999075903261, iteration: 326386
loss: 1.0163414478302002,grad_norm: 0.9495776571470849, iteration: 326387
loss: 0.9603878259658813,grad_norm: 0.8464004976691657, iteration: 326388
loss: 0.9955155253410339,grad_norm: 0.8577083954652641, iteration: 326389
loss: 0.9940242767333984,grad_norm: 0.9999992020527954, iteration: 326390
loss: 1.003317952156067,grad_norm: 0.8320912489736779, iteration: 326391
loss: 1.0316736698150635,grad_norm: 0.8539739210730747, iteration: 326392
loss: 1.1078308820724487,grad_norm: 0.9999995387692834, iteration: 326393
loss: 0.9917961955070496,grad_norm: 0.7983398653701693, iteration: 326394
loss: 0.9739511609077454,grad_norm: 0.8533067134406441, iteration: 326395
loss: 0.9980835914611816,grad_norm: 0.7947455984140076, iteration: 326396
loss: 1.02015221118927,grad_norm: 0.7221899285961493, iteration: 326397
loss: 0.9887189865112305,grad_norm: 0.6737706991843614, iteration: 326398
loss: 0.9898979067802429,grad_norm: 0.8200135601878573, iteration: 326399
loss: 1.0222980976104736,grad_norm: 0.7857614615137757, iteration: 326400
loss: 1.0035347938537598,grad_norm: 0.8124495407003369, iteration: 326401
loss: 0.9946706295013428,grad_norm: 0.8796959797424234, iteration: 326402
loss: 0.9947535991668701,grad_norm: 0.8310688052078407, iteration: 326403
loss: 0.9688859581947327,grad_norm: 0.9289399062773547, iteration: 326404
loss: 0.9854254722595215,grad_norm: 0.8156224240133868, iteration: 326405
loss: 0.9861364960670471,grad_norm: 0.7904122227064729, iteration: 326406
loss: 0.9767580628395081,grad_norm: 0.963481856568505, iteration: 326407
loss: 1.1204180717468262,grad_norm: 0.9999999333963351, iteration: 326408
loss: 1.0192807912826538,grad_norm: 0.9067819986166281, iteration: 326409
loss: 1.0380442142486572,grad_norm: 0.999999467440503, iteration: 326410
loss: 0.9865344762802124,grad_norm: 0.826417193155662, iteration: 326411
loss: 0.9871630668640137,grad_norm: 0.8098626813685731, iteration: 326412
loss: 0.9884905815124512,grad_norm: 0.8147134998255257, iteration: 326413
loss: 1.0010251998901367,grad_norm: 0.9081599984029175, iteration: 326414
loss: 1.0084776878356934,grad_norm: 0.6508740832279261, iteration: 326415
loss: 1.0124945640563965,grad_norm: 0.8765329549844387, iteration: 326416
loss: 1.0211458206176758,grad_norm: 0.9999990888427683, iteration: 326417
loss: 1.0068472623825073,grad_norm: 0.9999991937435704, iteration: 326418
loss: 1.0217119455337524,grad_norm: 0.7938522763518818, iteration: 326419
loss: 0.9841863512992859,grad_norm: 0.98117755860814, iteration: 326420
loss: 1.0081746578216553,grad_norm: 0.8968590966411578, iteration: 326421
loss: 0.9961531758308411,grad_norm: 0.9712094730809626, iteration: 326422
loss: 0.9987586736679077,grad_norm: 0.9214870576273986, iteration: 326423
loss: 1.007409930229187,grad_norm: 0.8067710759153753, iteration: 326424
loss: 1.021870732307434,grad_norm: 0.9999991297403031, iteration: 326425
loss: 0.9603712558746338,grad_norm: 0.7969103244517008, iteration: 326426
loss: 1.0108559131622314,grad_norm: 0.891710357702801, iteration: 326427
loss: 0.973524272441864,grad_norm: 0.7885466906852024, iteration: 326428
loss: 0.9576963782310486,grad_norm: 0.9113227003296568, iteration: 326429
loss: 0.9619089961051941,grad_norm: 0.9331684726585856, iteration: 326430
loss: 1.0034513473510742,grad_norm: 0.8289892750261763, iteration: 326431
loss: 0.9693309664726257,grad_norm: 0.7975362634452476, iteration: 326432
loss: 0.9856749773025513,grad_norm: 0.8649936023239161, iteration: 326433
loss: 1.0211083889007568,grad_norm: 0.9999994160734034, iteration: 326434
loss: 0.9961262345314026,grad_norm: 0.7906242766911976, iteration: 326435
loss: 1.0314955711364746,grad_norm: 0.674602672130288, iteration: 326436
loss: 1.032705307006836,grad_norm: 0.9999992927446101, iteration: 326437
loss: 1.0365333557128906,grad_norm: 0.7745963066177409, iteration: 326438
loss: 1.115196943283081,grad_norm: 0.9999996586916626, iteration: 326439
loss: 0.9763615131378174,grad_norm: 0.8495249457734735, iteration: 326440
loss: 1.048032283782959,grad_norm: 0.743040234190672, iteration: 326441
loss: 1.0187212228775024,grad_norm: 0.8828411630542432, iteration: 326442
loss: 1.0343644618988037,grad_norm: 0.7371221330208568, iteration: 326443
loss: 1.2092527151107788,grad_norm: 0.9999991837600382, iteration: 326444
loss: 0.9765341877937317,grad_norm: 0.8954573889328374, iteration: 326445
loss: 1.0100719928741455,grad_norm: 0.8533328415979892, iteration: 326446
loss: 1.0461028814315796,grad_norm: 0.9999992534511428, iteration: 326447
loss: 1.0186679363250732,grad_norm: 0.9113309224946399, iteration: 326448
loss: 1.098141074180603,grad_norm: 0.9999996287899717, iteration: 326449
loss: 0.9968788623809814,grad_norm: 0.7853208736252032, iteration: 326450
loss: 1.0175015926361084,grad_norm: 0.7413242786271504, iteration: 326451
loss: 1.000791311264038,grad_norm: 0.9071157907360327, iteration: 326452
loss: 0.9624426960945129,grad_norm: 0.8249070751205299, iteration: 326453
loss: 1.0151335000991821,grad_norm: 0.7933622264722139, iteration: 326454
loss: 0.9989464282989502,grad_norm: 0.9003224934017123, iteration: 326455
loss: 0.9711686968803406,grad_norm: 0.9324886749832028, iteration: 326456
loss: 1.0077979564666748,grad_norm: 0.9999991201673438, iteration: 326457
loss: 1.002647042274475,grad_norm: 0.8861016740234599, iteration: 326458
loss: 0.9734190106391907,grad_norm: 0.7887121402165057, iteration: 326459
loss: 1.1328518390655518,grad_norm: 0.8861596788601701, iteration: 326460
loss: 0.9929333329200745,grad_norm: 0.8093319161781343, iteration: 326461
loss: 1.0022011995315552,grad_norm: 0.7060054223631426, iteration: 326462
loss: 1.0520894527435303,grad_norm: 0.9851697578548737, iteration: 326463
loss: 1.005192756652832,grad_norm: 0.7324678800533143, iteration: 326464
loss: 0.9999575614929199,grad_norm: 0.8731398038944715, iteration: 326465
loss: 0.99765545129776,grad_norm: 0.7951144746431165, iteration: 326466
loss: 0.9698640704154968,grad_norm: 0.870818609986088, iteration: 326467
loss: 1.088335633277893,grad_norm: 0.9865971069922592, iteration: 326468
loss: 1.1832937002182007,grad_norm: 0.9999993590490235, iteration: 326469
loss: 0.9941306114196777,grad_norm: 0.8545301019775032, iteration: 326470
loss: 1.009680986404419,grad_norm: 0.856564064436646, iteration: 326471
loss: 0.986585259437561,grad_norm: 0.8747881055001167, iteration: 326472
loss: 1.0430973768234253,grad_norm: 0.8679000594414495, iteration: 326473
loss: 1.013514518737793,grad_norm: 0.8456235101576935, iteration: 326474
loss: 1.039868950843811,grad_norm: 0.8772212241451472, iteration: 326475
loss: 0.9787977933883667,grad_norm: 0.8050875131959964, iteration: 326476
loss: 1.000797986984253,grad_norm: 0.6706921092618112, iteration: 326477
loss: 1.0253592729568481,grad_norm: 0.9535635947939334, iteration: 326478
loss: 0.9947750568389893,grad_norm: 0.8922133686257377, iteration: 326479
loss: 0.963475227355957,grad_norm: 0.7584356158706342, iteration: 326480
loss: 1.0791208744049072,grad_norm: 0.8777686577956273, iteration: 326481
loss: 0.9739869236946106,grad_norm: 0.8674296562427727, iteration: 326482
loss: 0.9954590201377869,grad_norm: 0.8071014934957044, iteration: 326483
loss: 0.9914537072181702,grad_norm: 0.8039320019916745, iteration: 326484
loss: 1.031056523323059,grad_norm: 0.7747861672878256, iteration: 326485
loss: 0.9756152033805847,grad_norm: 0.7863571778176454, iteration: 326486
loss: 1.0029629468917847,grad_norm: 0.8598923941453691, iteration: 326487
loss: 1.0389695167541504,grad_norm: 0.8482839362810751, iteration: 326488
loss: 0.9882842302322388,grad_norm: 0.9472646013679356, iteration: 326489
loss: 0.9855102896690369,grad_norm: 0.7715745112382262, iteration: 326490
loss: 0.9651485681533813,grad_norm: 0.9089025839186012, iteration: 326491
loss: 0.9787530899047852,grad_norm: 0.9771443884606431, iteration: 326492
loss: 1.0010546445846558,grad_norm: 0.9945909642938026, iteration: 326493
loss: 1.0380185842514038,grad_norm: 0.9576249225550428, iteration: 326494
loss: 1.0022457838058472,grad_norm: 0.99999989816665, iteration: 326495
loss: 1.0142059326171875,grad_norm: 0.8602384285610281, iteration: 326496
loss: 0.999639630317688,grad_norm: 0.8089508746390106, iteration: 326497
loss: 0.9728286862373352,grad_norm: 0.8602854480574457, iteration: 326498
loss: 0.9258169531822205,grad_norm: 0.8463206412764125, iteration: 326499
loss: 0.98509281873703,grad_norm: 0.8309424604421229, iteration: 326500
loss: 1.0002707242965698,grad_norm: 0.8293157592139402, iteration: 326501
loss: 0.9924027919769287,grad_norm: 0.8016323484555845, iteration: 326502
loss: 1.0253963470458984,grad_norm: 0.9999990507555286, iteration: 326503
loss: 0.9773592352867126,grad_norm: 0.9999991042145642, iteration: 326504
loss: 1.0193116664886475,grad_norm: 0.8478271722987446, iteration: 326505
loss: 0.9845138192176819,grad_norm: 0.9999998640335329, iteration: 326506
loss: 0.9765445590019226,grad_norm: 0.8028683360024652, iteration: 326507
loss: 1.001330018043518,grad_norm: 0.8340430999180664, iteration: 326508
loss: 1.0049777030944824,grad_norm: 0.8259258950014857, iteration: 326509
loss: 0.9974896907806396,grad_norm: 0.923382850425931, iteration: 326510
loss: 1.0020167827606201,grad_norm: 0.6574631315892026, iteration: 326511
loss: 1.0267616510391235,grad_norm: 0.9001949785728027, iteration: 326512
loss: 1.0416725873947144,grad_norm: 0.9999993157847011, iteration: 326513
loss: 1.014207363128662,grad_norm: 0.8543480367220287, iteration: 326514
loss: 0.9814456701278687,grad_norm: 0.81042897281078, iteration: 326515
loss: 0.9935353398323059,grad_norm: 0.9999990945642798, iteration: 326516
loss: 1.0157735347747803,grad_norm: 0.999999579484562, iteration: 326517
loss: 0.9947013258934021,grad_norm: 0.7752397187568868, iteration: 326518
loss: 1.0368776321411133,grad_norm: 0.7065563837338668, iteration: 326519
loss: 1.054976224899292,grad_norm: 0.8695396277960271, iteration: 326520
loss: 1.0630894899368286,grad_norm: 0.7706463676198166, iteration: 326521
loss: 0.9979054927825928,grad_norm: 0.8321829371010246, iteration: 326522
loss: 1.0018951892852783,grad_norm: 0.9099874456996011, iteration: 326523
loss: 1.0096691846847534,grad_norm: 0.8269906624164353, iteration: 326524
loss: 0.9997689127922058,grad_norm: 0.8864394227067596, iteration: 326525
loss: 0.9616401791572571,grad_norm: 0.8903143214166175, iteration: 326526
loss: 0.984447717666626,grad_norm: 0.9051289337435952, iteration: 326527
loss: 0.9905393719673157,grad_norm: 0.9177231812925282, iteration: 326528
loss: 1.017252802848816,grad_norm: 0.8134934874424354, iteration: 326529
loss: 1.105101227760315,grad_norm: 0.9999993136162012, iteration: 326530
loss: 1.0820670127868652,grad_norm: 0.9999995114412965, iteration: 326531
loss: 1.0289632081985474,grad_norm: 0.999999097106396, iteration: 326532
loss: 1.0081956386566162,grad_norm: 0.7712930662103415, iteration: 326533
loss: 0.9464859962463379,grad_norm: 0.7978839868385679, iteration: 326534
loss: 1.0032943487167358,grad_norm: 0.7872109852814128, iteration: 326535
loss: 0.9701914191246033,grad_norm: 0.7818014153770273, iteration: 326536
loss: 0.9612953662872314,grad_norm: 0.8745549975273579, iteration: 326537
loss: 0.974054217338562,grad_norm: 0.791580562222674, iteration: 326538
loss: 1.0146673917770386,grad_norm: 0.8209682088213961, iteration: 326539
loss: 1.0226268768310547,grad_norm: 0.906125758478315, iteration: 326540
loss: 1.0200709104537964,grad_norm: 0.896024730510254, iteration: 326541
loss: 1.009096384048462,grad_norm: 0.7502327028483465, iteration: 326542
loss: 0.9926342964172363,grad_norm: 0.6763033565424331, iteration: 326543
loss: 1.0961564779281616,grad_norm: 0.9999998991936409, iteration: 326544
loss: 1.0031589269638062,grad_norm: 0.9691604650653173, iteration: 326545
loss: 1.0103975534439087,grad_norm: 0.8297309580702964, iteration: 326546
loss: 0.9709835052490234,grad_norm: 0.9999992404956601, iteration: 326547
loss: 1.0225344896316528,grad_norm: 0.8827967088795481, iteration: 326548
loss: 1.0158135890960693,grad_norm: 0.943521254620703, iteration: 326549
loss: 0.9843746423721313,grad_norm: 0.804813592738273, iteration: 326550
loss: 0.9922639727592468,grad_norm: 0.9427183931286374, iteration: 326551
loss: 1.0019257068634033,grad_norm: 0.8183777855621837, iteration: 326552
loss: 1.0156716108322144,grad_norm: 0.9999993812298601, iteration: 326553
loss: 0.9959034323692322,grad_norm: 0.9519933869121918, iteration: 326554
loss: 0.9502931237220764,grad_norm: 0.9612431353379951, iteration: 326555
loss: 0.9997836351394653,grad_norm: 0.8095211587529261, iteration: 326556
loss: 0.9716437458992004,grad_norm: 0.6929427534114347, iteration: 326557
loss: 1.0303218364715576,grad_norm: 0.81008182214468, iteration: 326558
loss: 1.0145436525344849,grad_norm: 0.9999990015200249, iteration: 326559
loss: 1.1058729887008667,grad_norm: 0.9999998656989755, iteration: 326560
loss: 0.9347952604293823,grad_norm: 0.7676372611333019, iteration: 326561
loss: 0.9937664866447449,grad_norm: 0.7659199415722601, iteration: 326562
loss: 1.010919213294983,grad_norm: 0.8832428998158329, iteration: 326563
loss: 1.0058350563049316,grad_norm: 0.9327347426867972, iteration: 326564
loss: 0.9877980351448059,grad_norm: 0.7924695135360849, iteration: 326565
loss: 0.9975424408912659,grad_norm: 0.7536727121301672, iteration: 326566
loss: 0.9935046434402466,grad_norm: 0.9164554940933232, iteration: 326567
loss: 1.079587459564209,grad_norm: 0.9999993439446926, iteration: 326568
loss: 1.0098459720611572,grad_norm: 0.7763127400451229, iteration: 326569
loss: 1.0118889808654785,grad_norm: 0.7658893169006596, iteration: 326570
loss: 1.035079002380371,grad_norm: 0.999999124034611, iteration: 326571
loss: 0.9928147196769714,grad_norm: 0.8192806386403512, iteration: 326572
loss: 1.0057182312011719,grad_norm: 0.9050255947209469, iteration: 326573
loss: 1.0074067115783691,grad_norm: 0.818764300400383, iteration: 326574
loss: 0.9839526414871216,grad_norm: 0.8430578998719743, iteration: 326575
loss: 1.0109847784042358,grad_norm: 0.9999992092070842, iteration: 326576
loss: 0.9969297647476196,grad_norm: 0.7399891296976956, iteration: 326577
loss: 1.0054795742034912,grad_norm: 0.8687420576272926, iteration: 326578
loss: 1.010401725769043,grad_norm: 0.794651629106246, iteration: 326579
loss: 1.0906140804290771,grad_norm: 0.9999997422900934, iteration: 326580
loss: 1.0157511234283447,grad_norm: 0.8883872288500198, iteration: 326581
loss: 0.9517766833305359,grad_norm: 0.8807100989085891, iteration: 326582
loss: 1.0082179307937622,grad_norm: 0.7720934180307019, iteration: 326583
loss: 1.0077143907546997,grad_norm: 0.9999991891703895, iteration: 326584
loss: 0.9828101396560669,grad_norm: 0.9168273787505005, iteration: 326585
loss: 0.9993942379951477,grad_norm: 0.9108811339066829, iteration: 326586
loss: 1.055567979812622,grad_norm: 0.9999995599372822, iteration: 326587
loss: 0.9957183003425598,grad_norm: 0.809817328510936, iteration: 326588
loss: 0.9990451335906982,grad_norm: 0.8120229104310088, iteration: 326589
loss: 0.9804208874702454,grad_norm: 0.8246095579135042, iteration: 326590
loss: 0.9729320406913757,grad_norm: 0.8092361564436686, iteration: 326591
loss: 1.0251680612564087,grad_norm: 0.8821517268277946, iteration: 326592
loss: 0.9520533680915833,grad_norm: 0.7967853317114845, iteration: 326593
loss: 1.0254448652267456,grad_norm: 0.9531198999195518, iteration: 326594
loss: 1.0084599256515503,grad_norm: 0.9602066633660967, iteration: 326595
loss: 0.9716907143592834,grad_norm: 0.8915954436181808, iteration: 326596
loss: 1.0333842039108276,grad_norm: 0.7591757400094019, iteration: 326597
loss: 1.2519779205322266,grad_norm: 0.9999996218459181, iteration: 326598
loss: 1.0871893167495728,grad_norm: 0.9327970364811988, iteration: 326599
loss: 0.9901999831199646,grad_norm: 0.8115023122287799, iteration: 326600
loss: 0.9638951420783997,grad_norm: 0.8668235225040497, iteration: 326601
loss: 0.9526082873344421,grad_norm: 0.8956288146421673, iteration: 326602
loss: 0.9572173357009888,grad_norm: 0.8072491522451339, iteration: 326603
loss: 0.979902446269989,grad_norm: 0.7743579721101411, iteration: 326604
loss: 1.0398905277252197,grad_norm: 0.7463543037455941, iteration: 326605
loss: 1.0337022542953491,grad_norm: 0.7468501731490688, iteration: 326606
loss: 0.9976847767829895,grad_norm: 0.9723789843579178, iteration: 326607
loss: 1.0280944108963013,grad_norm: 0.9470297615993872, iteration: 326608
loss: 0.9484174847602844,grad_norm: 0.8717931071446492, iteration: 326609
loss: 0.9981253743171692,grad_norm: 0.8977174357363845, iteration: 326610
loss: 0.9838027358055115,grad_norm: 0.8098969828666025, iteration: 326611
loss: 0.9950209259986877,grad_norm: 0.9999990659646413, iteration: 326612
loss: 1.1856554746627808,grad_norm: 0.9999998516882146, iteration: 326613
loss: 1.091468334197998,grad_norm: 0.9999997179160183, iteration: 326614
loss: 1.0166412591934204,grad_norm: 0.8088520161927457, iteration: 326615
loss: 1.008034586906433,grad_norm: 0.9999990348976885, iteration: 326616
loss: 1.0051331520080566,grad_norm: 0.8661611335302217, iteration: 326617
loss: 1.0403019189834595,grad_norm: 0.8171838733502108, iteration: 326618
loss: 0.9997156858444214,grad_norm: 0.7062549200477193, iteration: 326619
loss: 0.9985604286193848,grad_norm: 0.7639560114202191, iteration: 326620
loss: 0.9853927493095398,grad_norm: 0.8295601745158672, iteration: 326621
loss: 0.9984093904495239,grad_norm: 0.7152569775939794, iteration: 326622
loss: 1.0157939195632935,grad_norm: 0.8422847056278899, iteration: 326623
loss: 1.0196425914764404,grad_norm: 0.7611084161601765, iteration: 326624
loss: 1.0082505941390991,grad_norm: 0.9200263847055284, iteration: 326625
loss: 1.0124354362487793,grad_norm: 0.9999052266605667, iteration: 326626
loss: 1.0301024913787842,grad_norm: 1.000000012175681, iteration: 326627
loss: 1.0297836065292358,grad_norm: 0.949503361953858, iteration: 326628
loss: 0.9909068942070007,grad_norm: 0.8422720112825165, iteration: 326629
loss: 0.9785078167915344,grad_norm: 0.7507209649591012, iteration: 326630
loss: 0.9917283654212952,grad_norm: 0.7667155167333644, iteration: 326631
loss: 0.9992440342903137,grad_norm: 0.8604715308200716, iteration: 326632
loss: 1.139575481414795,grad_norm: 0.9573805413689859, iteration: 326633
loss: 1.0005872249603271,grad_norm: 0.7959678417065994, iteration: 326634
loss: 0.9778707027435303,grad_norm: 0.8310692453277784, iteration: 326635
loss: 1.0321147441864014,grad_norm: 0.8769942570888564, iteration: 326636
loss: 1.0015606880187988,grad_norm: 0.8527272257321837, iteration: 326637
loss: 1.0091272592544556,grad_norm: 0.8488790558071562, iteration: 326638
loss: 1.0055515766143799,grad_norm: 0.9999995581133748, iteration: 326639
loss: 1.0031565427780151,grad_norm: 0.9605944364622904, iteration: 326640
loss: 1.0058577060699463,grad_norm: 0.8931358859856582, iteration: 326641
loss: 1.005398154258728,grad_norm: 0.6932182957392297, iteration: 326642
loss: 1.045082449913025,grad_norm: 0.8553730718214747, iteration: 326643
loss: 1.0203614234924316,grad_norm: 0.9999993854380179, iteration: 326644
loss: 1.0066698789596558,grad_norm: 0.999999025318989, iteration: 326645
loss: 0.9912295341491699,grad_norm: 0.787981413183876, iteration: 326646
loss: 0.9891768097877502,grad_norm: 0.8410538075071347, iteration: 326647
loss: 1.0090752840042114,grad_norm: 0.9999995016267939, iteration: 326648
loss: 1.001139760017395,grad_norm: 0.9175751189462549, iteration: 326649
loss: 1.0026686191558838,grad_norm: 0.7932016805854899, iteration: 326650
loss: 1.008968472480774,grad_norm: 0.999999232154583, iteration: 326651
loss: 0.995660662651062,grad_norm: 0.777501181797121, iteration: 326652
loss: 1.012373924255371,grad_norm: 0.7958505566963744, iteration: 326653
loss: 1.0574474334716797,grad_norm: 0.9999993489936405, iteration: 326654
loss: 1.0267200469970703,grad_norm: 0.8377024111801865, iteration: 326655
loss: 1.0010673999786377,grad_norm: 0.9429203605551897, iteration: 326656
loss: 1.0147002935409546,grad_norm: 0.9999995197931248, iteration: 326657
loss: 1.1894865036010742,grad_norm: 0.904956498069947, iteration: 326658
loss: 0.9704227447509766,grad_norm: 0.8678949092292646, iteration: 326659
loss: 0.9989007711410522,grad_norm: 0.8357092747280761, iteration: 326660
loss: 1.065004587173462,grad_norm: 0.8417956033933152, iteration: 326661
loss: 1.0003069639205933,grad_norm: 0.8064477951854457, iteration: 326662
loss: 1.0255299806594849,grad_norm: 0.786203268134926, iteration: 326663
loss: 1.014466404914856,grad_norm: 0.8773744332219829, iteration: 326664
loss: 1.0459355115890503,grad_norm: 0.9999996623651113, iteration: 326665
loss: 1.0293083190917969,grad_norm: 0.8883535329863222, iteration: 326666
loss: 0.9932270050048828,grad_norm: 0.691526468508207, iteration: 326667
loss: 0.9656458497047424,grad_norm: 0.7439408253953542, iteration: 326668
loss: 1.0182453393936157,grad_norm: 0.82878547520122, iteration: 326669
loss: 1.032837152481079,grad_norm: 0.8339263956017962, iteration: 326670
loss: 0.9988713264465332,grad_norm: 0.9999990227453212, iteration: 326671
loss: 1.102453589439392,grad_norm: 0.9999999046751352, iteration: 326672
loss: 0.9805824160575867,grad_norm: 0.9999991218880266, iteration: 326673
loss: 1.0071536302566528,grad_norm: 0.84942731031245, iteration: 326674
loss: 1.2191689014434814,grad_norm: 0.9999995023498494, iteration: 326675
loss: 0.9785794615745544,grad_norm: 0.8492458331407516, iteration: 326676
loss: 1.1144286394119263,grad_norm: 0.9999990032777782, iteration: 326677
loss: 1.0311729907989502,grad_norm: 0.78844971473685, iteration: 326678
loss: 0.9947230815887451,grad_norm: 0.922738726344954, iteration: 326679
loss: 1.0056017637252808,grad_norm: 0.7499773631083073, iteration: 326680
loss: 1.2304385900497437,grad_norm: 0.9999991966317042, iteration: 326681
loss: 1.0604908466339111,grad_norm: 0.9999992357651684, iteration: 326682
loss: 0.973293662071228,grad_norm: 0.9339559435587693, iteration: 326683
loss: 1.032689094543457,grad_norm: 0.9246484146432075, iteration: 326684
loss: 1.0452548265457153,grad_norm: 0.9999997335141584, iteration: 326685
loss: 0.9887321591377258,grad_norm: 0.8221173487663357, iteration: 326686
loss: 1.0234907865524292,grad_norm: 0.7239633566964623, iteration: 326687
loss: 1.0257774591445923,grad_norm: 0.8726800208220035, iteration: 326688
loss: 1.132620930671692,grad_norm: 0.9999996165419808, iteration: 326689
loss: 1.0562220811843872,grad_norm: 0.925492622296501, iteration: 326690
loss: 0.9956628680229187,grad_norm: 0.9999991678887649, iteration: 326691
loss: 1.0405712127685547,grad_norm: 0.8200265200635682, iteration: 326692
loss: 1.012919306755066,grad_norm: 0.9057685594603546, iteration: 326693
loss: 0.9385371804237366,grad_norm: 0.9756745890867913, iteration: 326694
loss: 0.9826527833938599,grad_norm: 0.9999989672090116, iteration: 326695
loss: 1.0128254890441895,grad_norm: 0.9312512034167504, iteration: 326696
loss: 1.0125596523284912,grad_norm: 0.7764772922357042, iteration: 326697
loss: 1.0190694332122803,grad_norm: 0.8662991989729387, iteration: 326698
loss: 0.9685779809951782,grad_norm: 0.716886958787841, iteration: 326699
loss: 1.034387469291687,grad_norm: 0.9999991669306981, iteration: 326700
loss: 1.0457502603530884,grad_norm: 0.9999991282863131, iteration: 326701
loss: 0.9950369596481323,grad_norm: 0.8154957754876921, iteration: 326702
loss: 1.0066845417022705,grad_norm: 0.9466672682639499, iteration: 326703
loss: 0.9764322638511658,grad_norm: 0.8432004046372793, iteration: 326704
loss: 0.9967637658119202,grad_norm: 0.8061376109234422, iteration: 326705
loss: 1.0738364458084106,grad_norm: 0.7585102469340844, iteration: 326706
loss: 1.00533127784729,grad_norm: 0.8700051852251359, iteration: 326707
loss: 1.09196138381958,grad_norm: 0.9999991276588831, iteration: 326708
loss: 1.0052433013916016,grad_norm: 0.7341441278497219, iteration: 326709
loss: 0.9939718246459961,grad_norm: 0.9052221880845006, iteration: 326710
loss: 1.0310146808624268,grad_norm: 0.8594470158197826, iteration: 326711
loss: 1.016060471534729,grad_norm: 0.9999998538470986, iteration: 326712
loss: 1.0009129047393799,grad_norm: 0.7805474791137375, iteration: 326713
loss: 0.9911539554595947,grad_norm: 0.8787810660554363, iteration: 326714
loss: 0.9967073202133179,grad_norm: 0.7950577192591798, iteration: 326715
loss: 0.9887723922729492,grad_norm: 0.6323905606816199, iteration: 326716
loss: 1.0026224851608276,grad_norm: 0.9999990214082272, iteration: 326717
loss: 0.9555541276931763,grad_norm: 0.8604701505336224, iteration: 326718
loss: 1.0350854396820068,grad_norm: 0.8480862003782355, iteration: 326719
loss: 0.996711254119873,grad_norm: 0.9120434066805311, iteration: 326720
loss: 1.0476802587509155,grad_norm: 0.933070244166688, iteration: 326721
loss: 0.9847647547721863,grad_norm: 0.8050906060068963, iteration: 326722
loss: 1.0243037939071655,grad_norm: 0.7683938155575307, iteration: 326723
loss: 1.037718415260315,grad_norm: 0.9046449324429, iteration: 326724
loss: 1.0306352376937866,grad_norm: 0.9999993284725989, iteration: 326725
loss: 0.9811951518058777,grad_norm: 0.8836769662019107, iteration: 326726
loss: 1.0330746173858643,grad_norm: 0.8257851063419459, iteration: 326727
loss: 0.9991185069084167,grad_norm: 0.8822085298235555, iteration: 326728
loss: 1.0205224752426147,grad_norm: 0.7479351857837155, iteration: 326729
loss: 1.0242847204208374,grad_norm: 0.8464167299391636, iteration: 326730
loss: 0.9800350069999695,grad_norm: 0.8261595561452499, iteration: 326731
loss: 0.9699974656105042,grad_norm: 0.9363120765012873, iteration: 326732
loss: 0.9893765449523926,grad_norm: 0.8497107204864272, iteration: 326733
loss: 0.9746675491333008,grad_norm: 0.8162287908618532, iteration: 326734
loss: 0.988218367099762,grad_norm: 0.7891831920671333, iteration: 326735
loss: 1.0145013332366943,grad_norm: 1.0000000984895256, iteration: 326736
loss: 0.9680916666984558,grad_norm: 0.9025529274873686, iteration: 326737
loss: 1.0183604955673218,grad_norm: 0.9999997317458349, iteration: 326738
loss: 1.0072475671768188,grad_norm: 0.8398766684730423, iteration: 326739
loss: 0.990412175655365,grad_norm: 0.9999995825532296, iteration: 326740
loss: 1.097261905670166,grad_norm: 0.9999992904527092, iteration: 326741
loss: 1.029302716255188,grad_norm: 0.9999993910742792, iteration: 326742
loss: 1.0567073822021484,grad_norm: 0.9999996796267568, iteration: 326743
loss: 1.0102412700653076,grad_norm: 0.7795365459078265, iteration: 326744
loss: 1.0858210325241089,grad_norm: 0.999999666976403, iteration: 326745
loss: 1.0275557041168213,grad_norm: 0.9644322551414194, iteration: 326746
loss: 1.0173557996749878,grad_norm: 0.9999991803704835, iteration: 326747
loss: 1.0194355249404907,grad_norm: 0.7486171871497754, iteration: 326748
loss: 0.9939257502555847,grad_norm: 0.8502163226311313, iteration: 326749
loss: 1.0687929391860962,grad_norm: 0.9999996863789057, iteration: 326750
loss: 1.0338358879089355,grad_norm: 0.8728349186759269, iteration: 326751
loss: 0.98008131980896,grad_norm: 0.8267075341276977, iteration: 326752
loss: 1.0088982582092285,grad_norm: 0.9999989746107478, iteration: 326753
loss: 1.0054075717926025,grad_norm: 0.9208202238720361, iteration: 326754
loss: 0.9995321035385132,grad_norm: 0.9999992125527963, iteration: 326755
loss: 1.0731066465377808,grad_norm: 0.9999990315195164, iteration: 326756
loss: 0.9453044533729553,grad_norm: 0.8187219414736977, iteration: 326757
loss: 1.001039981842041,grad_norm: 0.9550101551826752, iteration: 326758
loss: 0.9936856627464294,grad_norm: 0.8017355363082447, iteration: 326759
loss: 0.9943753480911255,grad_norm: 0.8145189563696817, iteration: 326760
loss: 1.0380264520645142,grad_norm: 0.9332915974753484, iteration: 326761
loss: 0.9980342984199524,grad_norm: 0.9999993773293604, iteration: 326762
loss: 0.9819753766059875,grad_norm: 0.9999994250543263, iteration: 326763
loss: 1.0274548530578613,grad_norm: 0.9999996262318956, iteration: 326764
loss: 1.025152564048767,grad_norm: 0.7685023629885991, iteration: 326765
loss: 0.9831695556640625,grad_norm: 0.9999990856619818, iteration: 326766
loss: 1.0081024169921875,grad_norm: 0.9046050019298106, iteration: 326767
loss: 1.0047504901885986,grad_norm: 0.999999317230372, iteration: 326768
loss: 1.01320481300354,grad_norm: 0.9489588430148591, iteration: 326769
loss: 0.9874510765075684,grad_norm: 0.7016948961807107, iteration: 326770
loss: 1.0787034034729004,grad_norm: 0.9999998327865648, iteration: 326771
loss: 1.0444698333740234,grad_norm: 0.8829395669833582, iteration: 326772
loss: 1.0299279689788818,grad_norm: 0.8346315852413987, iteration: 326773
loss: 1.0025523900985718,grad_norm: 0.999999911629057, iteration: 326774
loss: 1.0177828073501587,grad_norm: 0.9310194801385031, iteration: 326775
loss: 0.9983901977539062,grad_norm: 0.9999991561081318, iteration: 326776
loss: 0.9996753931045532,grad_norm: 0.7968547483673135, iteration: 326777
loss: 1.0498558282852173,grad_norm: 0.9999991725934096, iteration: 326778
loss: 0.9686229825019836,grad_norm: 0.84303837180362, iteration: 326779
loss: 0.982792317867279,grad_norm: 0.999999740759915, iteration: 326780
loss: 1.0265837907791138,grad_norm: 0.7457209885731807, iteration: 326781
loss: 1.0011645555496216,grad_norm: 0.8716936803049429, iteration: 326782
loss: 1.1989367008209229,grad_norm: 0.9999990162829528, iteration: 326783
loss: 1.0842453241348267,grad_norm: 0.8127480796091632, iteration: 326784
loss: 1.0570911169052124,grad_norm: 0.9613685044072764, iteration: 326785
loss: 0.9821797013282776,grad_norm: 0.8357166115984385, iteration: 326786
loss: 1.0144673585891724,grad_norm: 0.9999991030834474, iteration: 326787
loss: 1.0421587228775024,grad_norm: 0.96413671828402, iteration: 326788
loss: 0.9909138083457947,grad_norm: 0.9999999494626727, iteration: 326789
loss: 0.9735926985740662,grad_norm: 0.7265413885063435, iteration: 326790
loss: 1.0454902648925781,grad_norm: 0.9999998573225423, iteration: 326791
loss: 1.0054113864898682,grad_norm: 0.9999990722128593, iteration: 326792
loss: 1.0049638748168945,grad_norm: 0.8692395996761818, iteration: 326793
loss: 1.0495964288711548,grad_norm: 0.9999996132049613, iteration: 326794
loss: 0.964431881904602,grad_norm: 0.9844217035823681, iteration: 326795
loss: 0.9825038909912109,grad_norm: 0.7456098060681355, iteration: 326796
loss: 0.9938468337059021,grad_norm: 0.9999992495755726, iteration: 326797
loss: 1.0085644721984863,grad_norm: 0.9005980077415087, iteration: 326798
loss: 1.0044193267822266,grad_norm: 0.9073597191143192, iteration: 326799
loss: 1.0247822999954224,grad_norm: 0.8267098555127158, iteration: 326800
loss: 1.0574766397476196,grad_norm: 0.9882124784223216, iteration: 326801
loss: 1.0292167663574219,grad_norm: 0.9497025851235202, iteration: 326802
loss: 0.9917524456977844,grad_norm: 0.9086254247426269, iteration: 326803
loss: 1.031716227531433,grad_norm: 0.9999996875562516, iteration: 326804
loss: 0.9698758721351624,grad_norm: 0.7994455742726474, iteration: 326805
loss: 1.0581648349761963,grad_norm: 0.9999989557102901, iteration: 326806
loss: 1.0475925207138062,grad_norm: 0.999999373521311, iteration: 326807
loss: 1.0200684070587158,grad_norm: 0.834797771087957, iteration: 326808
loss: 1.0429593324661255,grad_norm: 0.9398018123950106, iteration: 326809
loss: 1.0502030849456787,grad_norm: 0.754349491337427, iteration: 326810
loss: 1.0055930614471436,grad_norm: 0.7997358729425688, iteration: 326811
loss: 1.0153685808181763,grad_norm: 0.8484419332377736, iteration: 326812
loss: 1.0029979944229126,grad_norm: 0.8942419812042127, iteration: 326813
loss: 0.9829518795013428,grad_norm: 0.8346661996477679, iteration: 326814
loss: 1.0512077808380127,grad_norm: 0.9999995060312525, iteration: 326815
loss: 1.0104197263717651,grad_norm: 0.7832374063648354, iteration: 326816
loss: 0.9800944924354553,grad_norm: 0.7967985747005366, iteration: 326817
loss: 1.0139495134353638,grad_norm: 0.9999998826197919, iteration: 326818
loss: 0.9642020463943481,grad_norm: 0.999999058896052, iteration: 326819
loss: 1.0145633220672607,grad_norm: 0.9816488283602115, iteration: 326820
loss: 1.0723768472671509,grad_norm: 0.8224056469791405, iteration: 326821
loss: 1.023189902305603,grad_norm: 0.7571464358351241, iteration: 326822
loss: 1.0165654420852661,grad_norm: 0.9999995927619536, iteration: 326823
loss: 0.99626225233078,grad_norm: 0.9400834677150848, iteration: 326824
loss: 1.000606894493103,grad_norm: 0.9999991794698397, iteration: 326825
loss: 0.9860239028930664,grad_norm: 0.9999997254539987, iteration: 326826
loss: 0.9885179996490479,grad_norm: 0.8942815737576667, iteration: 326827
loss: 0.9940610527992249,grad_norm: 0.6449044097521645, iteration: 326828
loss: 1.0055619478225708,grad_norm: 0.8261636553382522, iteration: 326829
loss: 1.1722944974899292,grad_norm: 1.0000000198986252, iteration: 326830
loss: 0.9924904108047485,grad_norm: 0.8620012386642887, iteration: 326831
loss: 0.9906617999076843,grad_norm: 0.9999994231803295, iteration: 326832
loss: 1.1174376010894775,grad_norm: 0.9999992380553318, iteration: 326833
loss: 1.0079452991485596,grad_norm: 0.8554630063416712, iteration: 326834
loss: 1.0235692262649536,grad_norm: 0.999999339796569, iteration: 326835
loss: 0.9895159006118774,grad_norm: 0.897595002064431, iteration: 326836
loss: 1.0044643878936768,grad_norm: 0.9093231794934665, iteration: 326837
loss: 1.0328055620193481,grad_norm: 0.8680573028830496, iteration: 326838
loss: 0.9633094668388367,grad_norm: 0.9810327211173907, iteration: 326839
loss: 1.0332601070404053,grad_norm: 0.9999992417271704, iteration: 326840
loss: 1.0449875593185425,grad_norm: 0.9999992535632433, iteration: 326841
loss: 0.9921619892120361,grad_norm: 0.8212344574845224, iteration: 326842
loss: 1.0005360841751099,grad_norm: 0.9999991505486284, iteration: 326843
loss: 0.9988626837730408,grad_norm: 0.7660203942854688, iteration: 326844
loss: 0.9996963143348694,grad_norm: 0.7526594977315967, iteration: 326845
loss: 1.0102633237838745,grad_norm: 0.9999990785572951, iteration: 326846
loss: 1.0060876607894897,grad_norm: 0.979206129061854, iteration: 326847
loss: 1.025931477546692,grad_norm: 0.8310699470866462, iteration: 326848
loss: 1.0474176406860352,grad_norm: 0.7588355708618963, iteration: 326849
loss: 0.979655921459198,grad_norm: 0.7990488528655296, iteration: 326850
loss: 1.0183804035186768,grad_norm: 0.6055750787751416, iteration: 326851
loss: 0.9676300287246704,grad_norm: 0.9629910385637227, iteration: 326852
loss: 1.0334993600845337,grad_norm: 0.9999993610545832, iteration: 326853
loss: 1.0239527225494385,grad_norm: 0.9999991337691483, iteration: 326854
loss: 1.0255606174468994,grad_norm: 0.820808075488037, iteration: 326855
loss: 0.9633707404136658,grad_norm: 0.8316475295530267, iteration: 326856
loss: 0.9983507990837097,grad_norm: 0.8023142010806082, iteration: 326857
loss: 0.994267463684082,grad_norm: 0.8164958735463901, iteration: 326858
loss: 1.0197829008102417,grad_norm: 0.6787276477306434, iteration: 326859
loss: 1.0169085264205933,grad_norm: 0.906411185601632, iteration: 326860
loss: 0.9821411967277527,grad_norm: 0.9946618763586964, iteration: 326861
loss: 0.9721888899803162,grad_norm: 0.8890988905188499, iteration: 326862
loss: 1.0040968656539917,grad_norm: 0.8702360760713234, iteration: 326863
loss: 0.9657256007194519,grad_norm: 0.7502525825415282, iteration: 326864
loss: 1.0044127702713013,grad_norm: 0.9999991549179248, iteration: 326865
loss: 0.9879934787750244,grad_norm: 0.7928414695491083, iteration: 326866
loss: 1.0204697847366333,grad_norm: 0.8105284914787607, iteration: 326867
loss: 1.001662254333496,grad_norm: 0.9808386051425266, iteration: 326868
loss: 1.0094761848449707,grad_norm: 0.9999999674719247, iteration: 326869
loss: 0.9945510625839233,grad_norm: 0.9068193151795679, iteration: 326870
loss: 0.9914960861206055,grad_norm: 0.7955144134689496, iteration: 326871
loss: 1.1003779172897339,grad_norm: 0.9999997116071957, iteration: 326872
loss: 0.9526570439338684,grad_norm: 0.84212124935363, iteration: 326873
loss: 0.9857615232467651,grad_norm: 0.9999991931796577, iteration: 326874
loss: 0.9748520255088806,grad_norm: 0.9999991349002993, iteration: 326875
loss: 1.054764747619629,grad_norm: 0.9999991546788968, iteration: 326876
loss: 1.0125163793563843,grad_norm: 0.806506465978535, iteration: 326877
loss: 0.9967233538627625,grad_norm: 0.8450105827689162, iteration: 326878
loss: 0.9920837879180908,grad_norm: 0.7826239729478279, iteration: 326879
loss: 1.0268999338150024,grad_norm: 0.9203436215946542, iteration: 326880
loss: 1.005316972732544,grad_norm: 0.907199834379202, iteration: 326881
loss: 0.9488265514373779,grad_norm: 0.99999903332361, iteration: 326882
loss: 1.038587212562561,grad_norm: 0.7802201259218763, iteration: 326883
loss: 1.0495189428329468,grad_norm: 0.9999999475186719, iteration: 326884
loss: 1.0319061279296875,grad_norm: 0.8256316216798484, iteration: 326885
loss: 1.0993162393569946,grad_norm: 0.9999998964042189, iteration: 326886
loss: 0.9845837950706482,grad_norm: 0.7469992570188883, iteration: 326887
loss: 0.9777902364730835,grad_norm: 0.7397818627210354, iteration: 326888
loss: 0.9901061654090881,grad_norm: 0.7401800313347561, iteration: 326889
loss: 1.0243136882781982,grad_norm: 0.7454268223251184, iteration: 326890
loss: 0.9603025317192078,grad_norm: 0.9455572317285199, iteration: 326891
loss: 0.9764448404312134,grad_norm: 0.7438179644498426, iteration: 326892
loss: 1.0235384702682495,grad_norm: 0.82365385625671, iteration: 326893
loss: 0.9858275651931763,grad_norm: 0.7179961526147953, iteration: 326894
loss: 0.9837095141410828,grad_norm: 0.763155018219042, iteration: 326895
loss: 1.0081697702407837,grad_norm: 0.822386726930267, iteration: 326896
loss: 0.995974600315094,grad_norm: 0.9003516465516753, iteration: 326897
loss: 0.9963112473487854,grad_norm: 0.9322758016169116, iteration: 326898
loss: 0.9997166991233826,grad_norm: 0.7459984485633546, iteration: 326899
loss: 1.0150209665298462,grad_norm: 0.9999990878810904, iteration: 326900
loss: 0.9897644519805908,grad_norm: 0.963718609878604, iteration: 326901
loss: 0.9810675382614136,grad_norm: 0.8072022895601977, iteration: 326902
loss: 1.0105429887771606,grad_norm: 0.7451578525894128, iteration: 326903
loss: 1.0328176021575928,grad_norm: 0.8494332610732908, iteration: 326904
loss: 1.0009658336639404,grad_norm: 0.9999997878897762, iteration: 326905
loss: 0.9635960459709167,grad_norm: 0.7782956065022438, iteration: 326906
loss: 1.013338565826416,grad_norm: 0.9859260544179013, iteration: 326907
loss: 1.006447196006775,grad_norm: 0.886070712136945, iteration: 326908
loss: 0.9911758303642273,grad_norm: 0.705629567793826, iteration: 326909
loss: 0.983069896697998,grad_norm: 0.9999990306631227, iteration: 326910
loss: 1.012285828590393,grad_norm: 0.9999990415886857, iteration: 326911
loss: 0.9945390224456787,grad_norm: 0.694601384251646, iteration: 326912
loss: 0.9968271851539612,grad_norm: 0.7397862726645676, iteration: 326913
loss: 1.0253443717956543,grad_norm: 0.7130794126345896, iteration: 326914
loss: 1.015122890472412,grad_norm: 0.7427021543639368, iteration: 326915
loss: 0.9815146923065186,grad_norm: 0.8410365111929713, iteration: 326916
loss: 1.0008611679077148,grad_norm: 0.9100180057408509, iteration: 326917
loss: 0.9568567276000977,grad_norm: 0.994125267621222, iteration: 326918
loss: 0.987607479095459,grad_norm: 0.8571272600271294, iteration: 326919
loss: 0.9493980407714844,grad_norm: 0.8368107969703467, iteration: 326920
loss: 1.0082939863204956,grad_norm: 0.7229335870494012, iteration: 326921
loss: 1.0239437818527222,grad_norm: 0.7397681327973007, iteration: 326922
loss: 0.992160439491272,grad_norm: 0.8826343553910901, iteration: 326923
loss: 0.994055449962616,grad_norm: 0.7130222157258633, iteration: 326924
loss: 1.0003644227981567,grad_norm: 0.9301966947475037, iteration: 326925
loss: 0.9941357374191284,grad_norm: 0.9999991378044303, iteration: 326926
loss: 0.9992328882217407,grad_norm: 0.8943188028650315, iteration: 326927
loss: 1.000815987586975,grad_norm: 0.7569491824043655, iteration: 326928
loss: 1.0258402824401855,grad_norm: 0.7410654992984014, iteration: 326929
loss: 1.0014747381210327,grad_norm: 0.9381686968172778, iteration: 326930
loss: 0.987375020980835,grad_norm: 0.8635804622410356, iteration: 326931
loss: 0.9977033138275146,grad_norm: 0.9146842155425827, iteration: 326932
loss: 0.9521077275276184,grad_norm: 0.9999992637859892, iteration: 326933
loss: 0.9794905185699463,grad_norm: 0.8198042371077876, iteration: 326934
loss: 0.9671055674552917,grad_norm: 0.7485018345068791, iteration: 326935
loss: 0.9876337647438049,grad_norm: 0.946314944254726, iteration: 326936
loss: 1.0132347345352173,grad_norm: 0.9294277261186591, iteration: 326937
loss: 0.9803816676139832,grad_norm: 0.8763401456705494, iteration: 326938
loss: 0.9685785174369812,grad_norm: 0.9999990507648443, iteration: 326939
loss: 1.015633225440979,grad_norm: 0.7701081793258652, iteration: 326940
loss: 0.9925445914268494,grad_norm: 0.7316764006537361, iteration: 326941
loss: 1.0021674633026123,grad_norm: 0.8235797733465805, iteration: 326942
loss: 1.0364792346954346,grad_norm: 0.9999997438480017, iteration: 326943
loss: 0.9991039633750916,grad_norm: 0.8774877796828708, iteration: 326944
loss: 0.9751348495483398,grad_norm: 0.9707118355361983, iteration: 326945
loss: 0.964728832244873,grad_norm: 0.7963558445469643, iteration: 326946
loss: 0.9858479499816895,grad_norm: 0.87423620239764, iteration: 326947
loss: 0.9927323460578918,grad_norm: 0.9665479304850872, iteration: 326948
loss: 0.9984048008918762,grad_norm: 0.8080736409717588, iteration: 326949
loss: 0.9852701425552368,grad_norm: 0.977621404715433, iteration: 326950
loss: 1.0037522315979004,grad_norm: 0.9022969617626252, iteration: 326951
loss: 1.0115935802459717,grad_norm: 0.8073732945845978, iteration: 326952
loss: 1.0353058576583862,grad_norm: 0.9999989629154565, iteration: 326953
loss: 1.0021682977676392,grad_norm: 0.7786908746041074, iteration: 326954
loss: 1.0002548694610596,grad_norm: 0.7439668270296554, iteration: 326955
loss: 0.9877218008041382,grad_norm: 0.9999991379637576, iteration: 326956
loss: 0.9902045130729675,grad_norm: 0.7988837159194406, iteration: 326957
loss: 1.0394785404205322,grad_norm: 0.9999993981420099, iteration: 326958
loss: 1.0355424880981445,grad_norm: 0.9030776965179146, iteration: 326959
loss: 0.9843247532844543,grad_norm: 0.9265567429690358, iteration: 326960
loss: 0.9629794359207153,grad_norm: 0.7752103744391438, iteration: 326961
loss: 1.0055660009384155,grad_norm: 0.749502906286728, iteration: 326962
loss: 1.0124098062515259,grad_norm: 0.8321286848255782, iteration: 326963
loss: 0.9772025942802429,grad_norm: 0.8588043554656352, iteration: 326964
loss: 1.1074743270874023,grad_norm: 0.9999995931069808, iteration: 326965
loss: 1.0421862602233887,grad_norm: 0.9999998922937984, iteration: 326966
loss: 0.9792606234550476,grad_norm: 0.8724246623651095, iteration: 326967
loss: 0.9913972020149231,grad_norm: 0.9055229287083687, iteration: 326968
loss: 1.0384825468063354,grad_norm: 0.9999991585195385, iteration: 326969
loss: 1.0104585886001587,grad_norm: 0.9999991258029597, iteration: 326970
loss: 1.0394377708435059,grad_norm: 0.7613176945756676, iteration: 326971
loss: 1.0247100591659546,grad_norm: 0.8956237988762159, iteration: 326972
loss: 1.0197540521621704,grad_norm: 0.9999996935324355, iteration: 326973
loss: 1.1671535968780518,grad_norm: 0.9999998540856452, iteration: 326974
loss: 1.0926073789596558,grad_norm: 0.9999995994418864, iteration: 326975
loss: 0.9914834499359131,grad_norm: 0.8460243454392182, iteration: 326976
loss: 0.9942827224731445,grad_norm: 0.9244519622209936, iteration: 326977
loss: 1.0271449089050293,grad_norm: 0.858725150779341, iteration: 326978
loss: 0.9682921767234802,grad_norm: 0.9999991987461901, iteration: 326979
loss: 0.9787985682487488,grad_norm: 0.8046379516260197, iteration: 326980
loss: 1.0150043964385986,grad_norm: 0.7293970447278517, iteration: 326981
loss: 0.9927616715431213,grad_norm: 0.8371310569504435, iteration: 326982
loss: 0.9762845039367676,grad_norm: 0.8625126256147244, iteration: 326983
loss: 0.993867814540863,grad_norm: 0.8871378810931994, iteration: 326984
loss: 1.0126736164093018,grad_norm: 0.899546104188188, iteration: 326985
loss: 0.9939701557159424,grad_norm: 0.8748688990372854, iteration: 326986
loss: 1.0024731159210205,grad_norm: 0.901642745163611, iteration: 326987
loss: 1.2326163053512573,grad_norm: 1.0000000092909083, iteration: 326988
loss: 0.9906805157661438,grad_norm: 0.9282460574595803, iteration: 326989
loss: 1.0464400053024292,grad_norm: 0.9978514204089716, iteration: 326990
loss: 0.9893385171890259,grad_norm: 0.9335845788246409, iteration: 326991
loss: 1.0097177028656006,grad_norm: 0.9999993382183204, iteration: 326992
loss: 1.016379475593567,grad_norm: 0.7527491581265716, iteration: 326993
loss: 0.9692298173904419,grad_norm: 0.7179638607085664, iteration: 326994
loss: 0.9683005809783936,grad_norm: 0.9268427727736518, iteration: 326995
loss: 1.0369884967803955,grad_norm: 0.9999990174764174, iteration: 326996
loss: 1.032108187675476,grad_norm: 0.8389190669067799, iteration: 326997
loss: 1.0270256996154785,grad_norm: 0.9999992627934577, iteration: 326998
loss: 0.9993935227394104,grad_norm: 0.8330201213489825, iteration: 326999
loss: 0.9743515253067017,grad_norm: 0.9044724088826452, iteration: 327000
loss: 1.023266315460205,grad_norm: 0.7618098826693698, iteration: 327001
loss: 0.9687060713768005,grad_norm: 0.7561059066412119, iteration: 327002
loss: 0.9776030778884888,grad_norm: 0.8120993237484113, iteration: 327003
loss: 1.1211639642715454,grad_norm: 0.9999997877628068, iteration: 327004
loss: 1.0089317560195923,grad_norm: 0.9439394154612294, iteration: 327005
loss: 1.0016292333602905,grad_norm: 0.8589907083535238, iteration: 327006
loss: 0.9788524508476257,grad_norm: 0.9615982802321451, iteration: 327007
loss: 0.9870009422302246,grad_norm: 0.9822324866626676, iteration: 327008
loss: 0.9873275756835938,grad_norm: 0.9999991812612388, iteration: 327009
loss: 1.0254169702529907,grad_norm: 0.9426672052705002, iteration: 327010
loss: 0.9430462718009949,grad_norm: 0.8712775511126871, iteration: 327011
loss: 1.0025441646575928,grad_norm: 0.9999993490171644, iteration: 327012
loss: 0.9454478621482849,grad_norm: 0.8019884729863425, iteration: 327013
loss: 0.9852966070175171,grad_norm: 0.7821078218830567, iteration: 327014
loss: 1.0054417848587036,grad_norm: 0.7068736397879878, iteration: 327015
loss: 1.0251866579055786,grad_norm: 0.7713174522093482, iteration: 327016
loss: 1.0400078296661377,grad_norm: 0.9999998423088426, iteration: 327017
loss: 1.0176182985305786,grad_norm: 0.8952806315840403, iteration: 327018
loss: 1.0267162322998047,grad_norm: 0.9999997881739433, iteration: 327019
loss: 1.0081630945205688,grad_norm: 0.8491222475098636, iteration: 327020
loss: 0.9970208406448364,grad_norm: 0.744033658301593, iteration: 327021
loss: 0.9897943139076233,grad_norm: 0.6811028059091511, iteration: 327022
loss: 1.1244990825653076,grad_norm: 0.9999991461512175, iteration: 327023
loss: 0.995134711265564,grad_norm: 0.8763510938566821, iteration: 327024
loss: 0.9895271062850952,grad_norm: 0.8192187635592013, iteration: 327025
loss: 1.0148342847824097,grad_norm: 0.7787242769554013, iteration: 327026
loss: 0.9941591620445251,grad_norm: 0.7402326409339571, iteration: 327027
loss: 1.0037096738815308,grad_norm: 0.9999988385803552, iteration: 327028
loss: 1.0300912857055664,grad_norm: 0.9999991139100822, iteration: 327029
loss: 1.0129191875457764,grad_norm: 0.9256750259287587, iteration: 327030
loss: 0.9921467304229736,grad_norm: 0.9013729171764451, iteration: 327031
loss: 1.001729965209961,grad_norm: 0.9999994359152589, iteration: 327032
loss: 1.0250061750411987,grad_norm: 0.9999990714332053, iteration: 327033
loss: 0.9902105927467346,grad_norm: 0.832913881745839, iteration: 327034
loss: 0.9761665463447571,grad_norm: 0.7773652659217468, iteration: 327035
loss: 1.0038782358169556,grad_norm: 0.9359904821579341, iteration: 327036
loss: 0.9986802935600281,grad_norm: 0.9816897125788245, iteration: 327037
loss: 1.1976958513259888,grad_norm: 0.9999997402393245, iteration: 327038
loss: 1.0052341222763062,grad_norm: 0.826723756693056, iteration: 327039
loss: 1.00969660282135,grad_norm: 0.9662875591235509, iteration: 327040
loss: 1.0019776821136475,grad_norm: 0.7659037084903475, iteration: 327041
loss: 1.0032538175582886,grad_norm: 0.8324582528519929, iteration: 327042
loss: 1.099226713180542,grad_norm: 0.9999990921364089, iteration: 327043
loss: 1.008273720741272,grad_norm: 0.9999993601146183, iteration: 327044
loss: 1.0352078676223755,grad_norm: 0.999999601023013, iteration: 327045
loss: 0.9830076694488525,grad_norm: 0.8919628104406643, iteration: 327046
loss: 1.0030137300491333,grad_norm: 0.9999992134520574, iteration: 327047
loss: 1.0042005777359009,grad_norm: 0.8875486552146105, iteration: 327048
loss: 0.9756191968917847,grad_norm: 0.8731100132264323, iteration: 327049
loss: 1.0632072687149048,grad_norm: 0.9999998837853951, iteration: 327050
loss: 0.9849863052368164,grad_norm: 0.8918486039741561, iteration: 327051
loss: 0.9774211049079895,grad_norm: 0.7149092850790594, iteration: 327052
loss: 0.987852156162262,grad_norm: 0.8436353254061235, iteration: 327053
loss: 1.0184656381607056,grad_norm: 0.9770962479899337, iteration: 327054
loss: 1.1490050554275513,grad_norm: 0.9646697098832706, iteration: 327055
loss: 0.9784516096115112,grad_norm: 0.6634417010186809, iteration: 327056
loss: 0.9978294968605042,grad_norm: 0.995969528720856, iteration: 327057
loss: 1.046897053718567,grad_norm: 0.9622418244105899, iteration: 327058
loss: 1.0452505350112915,grad_norm: 0.9999992275237686, iteration: 327059
loss: 0.9922844767570496,grad_norm: 0.8046347654001557, iteration: 327060
loss: 0.9881099462509155,grad_norm: 0.8303363935540081, iteration: 327061
loss: 0.9882168173789978,grad_norm: 0.9006487138545428, iteration: 327062
loss: 0.9818165898323059,grad_norm: 0.7489406031602259, iteration: 327063
loss: 1.0820308923721313,grad_norm: 0.8536578658316937, iteration: 327064
loss: 1.0629887580871582,grad_norm: 0.9943405142921319, iteration: 327065
loss: 0.9853819608688354,grad_norm: 0.7948593655853216, iteration: 327066
loss: 0.9714884161949158,grad_norm: 0.7094908708863608, iteration: 327067
loss: 0.9516319036483765,grad_norm: 0.8081234842405726, iteration: 327068
loss: 1.0490673780441284,grad_norm: 0.99999980600729, iteration: 327069
loss: 1.0826472043991089,grad_norm: 0.8870639076495943, iteration: 327070
loss: 1.0473679304122925,grad_norm: 0.9999990595113976, iteration: 327071
loss: 1.0037016868591309,grad_norm: 0.8352012173822929, iteration: 327072
loss: 0.9932006001472473,grad_norm: 0.8185666706072086, iteration: 327073
loss: 1.0104106664657593,grad_norm: 0.9999992902573593, iteration: 327074
loss: 0.9925927519798279,grad_norm: 0.8339119456053822, iteration: 327075
loss: 1.0035971403121948,grad_norm: 0.8870749401352545, iteration: 327076
loss: 1.0208195447921753,grad_norm: 0.8106803583496384, iteration: 327077
loss: 1.0104515552520752,grad_norm: 0.7436182546156382, iteration: 327078
loss: 1.0266461372375488,grad_norm: 0.8479797643914627, iteration: 327079
loss: 1.0262093544006348,grad_norm: 0.8180401961059353, iteration: 327080
loss: 1.0085920095443726,grad_norm: 0.9999996819760283, iteration: 327081
loss: 1.003926396369934,grad_norm: 0.6505292170095721, iteration: 327082
loss: 1.0093390941619873,grad_norm: 0.9043404296198202, iteration: 327083
loss: 0.9831858277320862,grad_norm: 0.704362140885867, iteration: 327084
loss: 0.9887259006500244,grad_norm: 0.8543574475330931, iteration: 327085
loss: 1.00208580493927,grad_norm: 0.8637396372594974, iteration: 327086
loss: 1.0027401447296143,grad_norm: 0.992692823951853, iteration: 327087
loss: 1.019417405128479,grad_norm: 0.8796765121828537, iteration: 327088
loss: 1.0140846967697144,grad_norm: 0.9768820623319177, iteration: 327089
loss: 1.0114421844482422,grad_norm: 0.9596039051434687, iteration: 327090
loss: 1.0065797567367554,grad_norm: 0.8231675511742332, iteration: 327091
loss: 0.9828351736068726,grad_norm: 0.8280367701131849, iteration: 327092
loss: 1.016581654548645,grad_norm: 0.858538349030763, iteration: 327093
loss: 0.9983656406402588,grad_norm: 0.9768030371308033, iteration: 327094
loss: 0.9796792268753052,grad_norm: 0.8981090149308556, iteration: 327095
loss: 0.9908201098442078,grad_norm: 0.9198679006173701, iteration: 327096
loss: 1.0084789991378784,grad_norm: 0.8562016516676347, iteration: 327097
loss: 1.0044225454330444,grad_norm: 0.8567993393128988, iteration: 327098
loss: 0.9899999499320984,grad_norm: 0.7993044996612991, iteration: 327099
loss: 0.9755726456642151,grad_norm: 0.8387260949862936, iteration: 327100
loss: 0.9887271523475647,grad_norm: 0.8959052536803108, iteration: 327101
loss: 1.2124817371368408,grad_norm: 0.9999998399977568, iteration: 327102
loss: 1.0498746633529663,grad_norm: 0.8948624958973151, iteration: 327103
loss: 1.0358107089996338,grad_norm: 0.9961325459569481, iteration: 327104
loss: 1.0043689012527466,grad_norm: 0.7721953700655159, iteration: 327105
loss: 1.0391582250595093,grad_norm: 0.9999991978789222, iteration: 327106
loss: 0.9908390045166016,grad_norm: 0.7053282887232608, iteration: 327107
loss: 0.9722523093223572,grad_norm: 0.8305562740020106, iteration: 327108
loss: 1.0145183801651,grad_norm: 0.7402695663611847, iteration: 327109
loss: 0.9798455834388733,grad_norm: 0.6778336057416938, iteration: 327110
loss: 0.9969877004623413,grad_norm: 0.9567144871191776, iteration: 327111
loss: 1.0662076473236084,grad_norm: 0.9999993645576567, iteration: 327112
loss: 1.0170817375183105,grad_norm: 0.6294756889524464, iteration: 327113
loss: 1.02019464969635,grad_norm: 0.7751712685558831, iteration: 327114
loss: 1.0078215599060059,grad_norm: 0.8762202616200296, iteration: 327115
loss: 0.9879065752029419,grad_norm: 0.7369569218187567, iteration: 327116
loss: 0.9897371530532837,grad_norm: 0.7016735835843217, iteration: 327117
loss: 1.0028403997421265,grad_norm: 0.9594425190332723, iteration: 327118
loss: 1.008173942565918,grad_norm: 0.8433891945247516, iteration: 327119
loss: 1.0047663450241089,grad_norm: 0.8520159827674013, iteration: 327120
loss: 1.0373636484146118,grad_norm: 0.7755817250323616, iteration: 327121
loss: 1.0013420581817627,grad_norm: 0.7802146389594362, iteration: 327122
loss: 1.0117638111114502,grad_norm: 0.999999384730897, iteration: 327123
loss: 1.0133427381515503,grad_norm: 0.799849952882922, iteration: 327124
loss: 0.9813523292541504,grad_norm: 0.819131022007439, iteration: 327125
loss: 1.0754445791244507,grad_norm: 1.0000000191500864, iteration: 327126
loss: 1.038037896156311,grad_norm: 0.8449459903638734, iteration: 327127
loss: 0.9935999512672424,grad_norm: 0.7661686551503804, iteration: 327128
loss: 1.0035842657089233,grad_norm: 0.8836153857516174, iteration: 327129
loss: 0.9902834296226501,grad_norm: 0.691840624260921, iteration: 327130
loss: 0.9919883012771606,grad_norm: 0.8547885195169013, iteration: 327131
loss: 0.9932891130447388,grad_norm: 0.656316757225438, iteration: 327132
loss: 1.0194711685180664,grad_norm: 0.8205898050394045, iteration: 327133
loss: 0.9987354874610901,grad_norm: 0.7869577065592, iteration: 327134
loss: 1.010818600654602,grad_norm: 0.788329355291806, iteration: 327135
loss: 1.0016268491744995,grad_norm: 0.7982789139545174, iteration: 327136
loss: 1.0279147624969482,grad_norm: 0.9999991344562091, iteration: 327137
loss: 1.00734543800354,grad_norm: 0.8228542481224702, iteration: 327138
loss: 0.9733511209487915,grad_norm: 0.999999892835441, iteration: 327139
loss: 0.9603608846664429,grad_norm: 0.7969557893987212, iteration: 327140
loss: 1.0389957427978516,grad_norm: 0.9999990037746316, iteration: 327141
loss: 0.9828903079032898,grad_norm: 0.7561553419155679, iteration: 327142
loss: 1.0154436826705933,grad_norm: 0.7047736911644668, iteration: 327143
loss: 1.074655532836914,grad_norm: 0.9999996762021428, iteration: 327144
loss: 1.0182209014892578,grad_norm: 0.8714586680018733, iteration: 327145
loss: 1.0304487943649292,grad_norm: 0.9605810346115328, iteration: 327146
loss: 0.9819376468658447,grad_norm: 0.8437483994698934, iteration: 327147
loss: 1.0609968900680542,grad_norm: 0.9999996805558424, iteration: 327148
loss: 0.9829285740852356,grad_norm: 0.8645705070212034, iteration: 327149
loss: 0.9853360652923584,grad_norm: 0.8390335581531738, iteration: 327150
loss: 0.9748860597610474,grad_norm: 0.864940793669678, iteration: 327151
loss: 1.0026906728744507,grad_norm: 0.7991429299210014, iteration: 327152
loss: 0.964970052242279,grad_norm: 0.8819586638506267, iteration: 327153
loss: 0.9673025012016296,grad_norm: 0.881018698930063, iteration: 327154
loss: 1.0198005437850952,grad_norm: 0.999999078463037, iteration: 327155
loss: 1.0114725828170776,grad_norm: 0.7465294735529154, iteration: 327156
loss: 0.9634238481521606,grad_norm: 0.9999990206120773, iteration: 327157
loss: 1.0088469982147217,grad_norm: 0.8832856562715905, iteration: 327158
loss: 0.9939980506896973,grad_norm: 0.7625542853474667, iteration: 327159
loss: 0.9961485266685486,grad_norm: 0.8911897801624455, iteration: 327160
loss: 1.0226192474365234,grad_norm: 0.7468733985026438, iteration: 327161
loss: 1.0219324827194214,grad_norm: 0.956897598864173, iteration: 327162
loss: 1.0049359798431396,grad_norm: 0.758779795109939, iteration: 327163
loss: 0.9927715063095093,grad_norm: 0.9668930387377734, iteration: 327164
loss: 0.9752603769302368,grad_norm: 0.8014620626723862, iteration: 327165
loss: 0.9842236042022705,grad_norm: 0.8276022395163126, iteration: 327166
loss: 1.0406489372253418,grad_norm: 0.8589293914559238, iteration: 327167
loss: 1.0070394277572632,grad_norm: 0.9999992323723041, iteration: 327168
loss: 0.9828026294708252,grad_norm: 0.8066582545293052, iteration: 327169
loss: 1.0025988817214966,grad_norm: 0.8147535314694184, iteration: 327170
loss: 1.0198595523834229,grad_norm: 0.9999990368328404, iteration: 327171
loss: 0.9817813634872437,grad_norm: 0.8379639351876574, iteration: 327172
loss: 1.0619004964828491,grad_norm: 0.924379824400767, iteration: 327173
loss: 1.0304317474365234,grad_norm: 0.9999990462870241, iteration: 327174
loss: 1.0817707777023315,grad_norm: 0.9999998988140275, iteration: 327175
loss: 0.9972038269042969,grad_norm: 0.999999599638969, iteration: 327176
loss: 0.9842360615730286,grad_norm: 0.833726471480524, iteration: 327177
loss: 0.994623064994812,grad_norm: 0.811604125580996, iteration: 327178
loss: 0.9834482073783875,grad_norm: 0.804081059088474, iteration: 327179
loss: 1.0374672412872314,grad_norm: 0.9663879946612979, iteration: 327180
loss: 1.0307022333145142,grad_norm: 0.9999991327880636, iteration: 327181
loss: 0.9773685336112976,grad_norm: 0.7684070776237881, iteration: 327182
loss: 0.9840065836906433,grad_norm: 0.7401314270868822, iteration: 327183
loss: 0.9678813815116882,grad_norm: 0.8261137823380236, iteration: 327184
loss: 1.0066535472869873,grad_norm: 0.8322725868868694, iteration: 327185
loss: 1.0127007961273193,grad_norm: 0.9999997764616412, iteration: 327186
loss: 1.0361095666885376,grad_norm: 0.9999996101928929, iteration: 327187
loss: 0.9647380709648132,grad_norm: 0.9999990923501592, iteration: 327188
loss: 1.0046900510787964,grad_norm: 0.7344932677126319, iteration: 327189
loss: 1.0483484268188477,grad_norm: 0.7959953306990264, iteration: 327190
loss: 0.9808987379074097,grad_norm: 0.7892470552449313, iteration: 327191
loss: 0.9937624335289001,grad_norm: 0.8550590939129745, iteration: 327192
loss: 0.9754090905189514,grad_norm: 0.8783346918302906, iteration: 327193
loss: 0.9856143593788147,grad_norm: 0.8951662653293384, iteration: 327194
loss: 1.0048376321792603,grad_norm: 0.8245619920581984, iteration: 327195
loss: 1.0310771465301514,grad_norm: 0.8573326274774977, iteration: 327196
loss: 0.9981992840766907,grad_norm: 0.999999181076944, iteration: 327197
loss: 0.9906906485557556,grad_norm: 0.999998934692889, iteration: 327198
loss: 1.0142494440078735,grad_norm: 0.7759860364958359, iteration: 327199
loss: 1.0048664808273315,grad_norm: 0.9411499398992604, iteration: 327200
loss: 1.0025795698165894,grad_norm: 0.8899794535529406, iteration: 327201
loss: 1.047128438949585,grad_norm: 0.9883300756593124, iteration: 327202
loss: 1.01780366897583,grad_norm: 0.9999999693067323, iteration: 327203
loss: 0.9724151492118835,grad_norm: 0.9810435460931032, iteration: 327204
loss: 0.9865157008171082,grad_norm: 0.7036757897065664, iteration: 327205
loss: 0.990753173828125,grad_norm: 0.6812137432545154, iteration: 327206
loss: 1.035109281539917,grad_norm: 0.9999998336376702, iteration: 327207
loss: 1.0483232736587524,grad_norm: 0.862990484005312, iteration: 327208
loss: 0.9990154504776001,grad_norm: 0.7913038665792713, iteration: 327209
loss: 0.9647595286369324,grad_norm: 0.8718600692686869, iteration: 327210
loss: 1.0047848224639893,grad_norm: 0.9429644479656125, iteration: 327211
loss: 0.9780423045158386,grad_norm: 0.8308291030139884, iteration: 327212
loss: 1.0089112520217896,grad_norm: 0.8341056559693689, iteration: 327213
loss: 0.9898334741592407,grad_norm: 0.8251676645231352, iteration: 327214
loss: 0.9867220520973206,grad_norm: 0.9155207615115848, iteration: 327215
loss: 1.0061640739440918,grad_norm: 0.6478891029482665, iteration: 327216
loss: 0.9944307804107666,grad_norm: 0.8838679433865406, iteration: 327217
loss: 0.992447555065155,grad_norm: 0.744770019716147, iteration: 327218
loss: 0.9828208088874817,grad_norm: 0.7841891714895692, iteration: 327219
loss: 0.9972530603408813,grad_norm: 0.963783252168441, iteration: 327220
loss: 0.9641404151916504,grad_norm: 0.7524294884621815, iteration: 327221
loss: 1.023276448249817,grad_norm: 0.7568933500728738, iteration: 327222
loss: 0.9906138181686401,grad_norm: 0.8735952765922366, iteration: 327223
loss: 1.0401301383972168,grad_norm: 0.9999995471651119, iteration: 327224
loss: 0.9845868349075317,grad_norm: 0.9084061167062079, iteration: 327225
loss: 0.9920462369918823,grad_norm: 0.9655895260379261, iteration: 327226
loss: 1.0214712619781494,grad_norm: 0.9096516598464361, iteration: 327227
loss: 1.011170506477356,grad_norm: 0.7968722399248707, iteration: 327228
loss: 1.01791250705719,grad_norm: 0.835528745791744, iteration: 327229
loss: 1.0462008714675903,grad_norm: 0.7150834630335279, iteration: 327230
loss: 1.0080474615097046,grad_norm: 0.9271851029024241, iteration: 327231
loss: 1.0233385562896729,grad_norm: 0.9999996037202177, iteration: 327232
loss: 1.003730058670044,grad_norm: 0.7778051549562853, iteration: 327233
loss: 1.0222986936569214,grad_norm: 0.8167338193079428, iteration: 327234
loss: 0.9961927533149719,grad_norm: 0.999999772864603, iteration: 327235
loss: 0.9963423609733582,grad_norm: 0.799153978465631, iteration: 327236
loss: 0.9947289228439331,grad_norm: 0.8257028594161098, iteration: 327237
loss: 0.9971930980682373,grad_norm: 0.8568156968087495, iteration: 327238
loss: 1.0020493268966675,grad_norm: 0.8706410251069486, iteration: 327239
loss: 0.9963065981864929,grad_norm: 0.9020331747712659, iteration: 327240
loss: 1.0070899724960327,grad_norm: 0.8340931870397185, iteration: 327241
loss: 0.9912607073783875,grad_norm: 0.9999990408816587, iteration: 327242
loss: 1.0098258256912231,grad_norm: 0.7612199173213902, iteration: 327243
loss: 1.0083588361740112,grad_norm: 0.8493793859754377, iteration: 327244
loss: 0.9894989132881165,grad_norm: 0.9616516118941456, iteration: 327245
loss: 0.9751554727554321,grad_norm: 0.9024502841632612, iteration: 327246
loss: 1.0061569213867188,grad_norm: 0.8175187407304947, iteration: 327247
loss: 0.9969476461410522,grad_norm: 0.8851841004232249, iteration: 327248
loss: 0.9932494759559631,grad_norm: 0.756626239492402, iteration: 327249
loss: 0.9794755578041077,grad_norm: 0.9191495876802911, iteration: 327250
loss: 0.9846527576446533,grad_norm: 0.9253694124411166, iteration: 327251
loss: 0.9840612411499023,grad_norm: 0.6942500335780849, iteration: 327252
loss: 1.0002716779708862,grad_norm: 0.6755908204230332, iteration: 327253
loss: 1.0241858959197998,grad_norm: 0.9999991235702191, iteration: 327254
loss: 1.0044327974319458,grad_norm: 0.727808168719595, iteration: 327255
loss: 1.0111445188522339,grad_norm: 0.8668237083366545, iteration: 327256
loss: 1.0062501430511475,grad_norm: 0.7233417947206497, iteration: 327257
loss: 1.0221145153045654,grad_norm: 0.999999472775884, iteration: 327258
loss: 1.0304514169692993,grad_norm: 0.8800374272779221, iteration: 327259
loss: 1.0150370597839355,grad_norm: 0.9999989865213427, iteration: 327260
loss: 1.0066368579864502,grad_norm: 0.8646126340180441, iteration: 327261
loss: 1.0088609457015991,grad_norm: 0.7689240337888049, iteration: 327262
loss: 1.00375235080719,grad_norm: 0.8222536333181734, iteration: 327263
loss: 1.0394574403762817,grad_norm: 0.9999991067717592, iteration: 327264
loss: 1.0107332468032837,grad_norm: 0.999999163083227, iteration: 327265
loss: 0.9860239028930664,grad_norm: 0.8541183675771067, iteration: 327266
loss: 1.0561351776123047,grad_norm: 0.8707863883512225, iteration: 327267
loss: 0.9786360263824463,grad_norm: 0.8565375708434542, iteration: 327268
loss: 1.033628225326538,grad_norm: 0.999999931386604, iteration: 327269
loss: 1.038701057434082,grad_norm: 0.9437621156097836, iteration: 327270
loss: 1.0037732124328613,grad_norm: 0.9677403481643035, iteration: 327271
loss: 1.0047791004180908,grad_norm: 0.7736868179807422, iteration: 327272
loss: 0.9775833487510681,grad_norm: 0.9624919065178749, iteration: 327273
loss: 0.964453399181366,grad_norm: 0.7980913503182118, iteration: 327274
loss: 1.0795024633407593,grad_norm: 0.9140330886499388, iteration: 327275
loss: 1.0124939680099487,grad_norm: 0.903732465067063, iteration: 327276
loss: 1.048959493637085,grad_norm: 0.999999078090561, iteration: 327277
loss: 0.9825312495231628,grad_norm: 0.9999991741281048, iteration: 327278
loss: 0.9939492344856262,grad_norm: 0.8621933249131816, iteration: 327279
loss: 0.9745703935623169,grad_norm: 0.8727196294216911, iteration: 327280
loss: 1.000504732131958,grad_norm: 0.9816701838964029, iteration: 327281
loss: 0.9686652421951294,grad_norm: 0.7785322926966728, iteration: 327282
loss: 0.9941450357437134,grad_norm: 0.8962768865489676, iteration: 327283
loss: 0.9920184016227722,grad_norm: 0.9340884849021954, iteration: 327284
loss: 0.9906789660453796,grad_norm: 0.924571769174311, iteration: 327285
loss: 0.9782683849334717,grad_norm: 0.7589661430810679, iteration: 327286
loss: 1.002263069152832,grad_norm: 0.7334347365178834, iteration: 327287
loss: 1.0062748193740845,grad_norm: 0.9480565622931559, iteration: 327288
loss: 1.026315450668335,grad_norm: 0.9837911859807296, iteration: 327289
loss: 1.023408055305481,grad_norm: 0.9146510205865952, iteration: 327290
loss: 1.017701268196106,grad_norm: 0.9585523677889136, iteration: 327291
loss: 1.0739741325378418,grad_norm: 0.9999995189299659, iteration: 327292
loss: 1.0192584991455078,grad_norm: 0.9816403585617498, iteration: 327293
loss: 1.027931571006775,grad_norm: 0.9118389504227892, iteration: 327294
loss: 1.0149524211883545,grad_norm: 0.8486916276812112, iteration: 327295
loss: 1.0045465230941772,grad_norm: 0.7840244256367763, iteration: 327296
loss: 1.00979745388031,grad_norm: 0.8908839224587279, iteration: 327297
loss: 0.9742552638053894,grad_norm: 0.7135459634243355, iteration: 327298
loss: 1.0158129930496216,grad_norm: 0.9999990287499115, iteration: 327299
loss: 0.9673833847045898,grad_norm: 0.7187801230052059, iteration: 327300
loss: 1.0232666730880737,grad_norm: 0.8664042070201955, iteration: 327301
loss: 0.9772474765777588,grad_norm: 0.750798694538175, iteration: 327302
loss: 0.9906410574913025,grad_norm: 0.8414491624003035, iteration: 327303
loss: 1.0175888538360596,grad_norm: 0.9339742389670738, iteration: 327304
loss: 1.0229078531265259,grad_norm: 0.8340901133565368, iteration: 327305
loss: 1.0085426568984985,grad_norm: 0.7571022977904425, iteration: 327306
loss: 0.9861845374107361,grad_norm: 0.9999989037969622, iteration: 327307
loss: 0.9675758481025696,grad_norm: 0.9700667917240757, iteration: 327308
loss: 1.0052226781845093,grad_norm: 0.926782171303345, iteration: 327309
loss: 0.9592857360839844,grad_norm: 0.8576321117280985, iteration: 327310
loss: 1.0292158126831055,grad_norm: 0.9999998769478657, iteration: 327311
loss: 0.9921004176139832,grad_norm: 0.8122897073337494, iteration: 327312
loss: 1.003433346748352,grad_norm: 0.816100769182634, iteration: 327313
loss: 1.013863205909729,grad_norm: 0.8902651616116252, iteration: 327314
loss: 0.9970059394836426,grad_norm: 0.8638342463114498, iteration: 327315
loss: 1.012171983718872,grad_norm: 0.8734319240099799, iteration: 327316
loss: 1.0064349174499512,grad_norm: 0.8229289570535135, iteration: 327317
loss: 0.9948745965957642,grad_norm: 0.7586902148609297, iteration: 327318
loss: 1.0088120698928833,grad_norm: 0.8998756474844362, iteration: 327319
loss: 1.0281498432159424,grad_norm: 0.9999990279548304, iteration: 327320
loss: 1.0139100551605225,grad_norm: 0.901718802616605, iteration: 327321
loss: 1.0187040567398071,grad_norm: 0.8004231702658839, iteration: 327322
loss: 0.9693202972412109,grad_norm: 0.9047270646683298, iteration: 327323
loss: 0.9764343500137329,grad_norm: 0.9056960544892285, iteration: 327324
loss: 1.0077111721038818,grad_norm: 0.8495946628696464, iteration: 327325
loss: 1.0409973859786987,grad_norm: 0.9439675540503862, iteration: 327326
loss: 1.005804419517517,grad_norm: 0.9649091725978077, iteration: 327327
loss: 1.0142085552215576,grad_norm: 0.9244322229032873, iteration: 327328
loss: 0.9866232872009277,grad_norm: 0.9623199209739488, iteration: 327329
loss: 1.0167911052703857,grad_norm: 0.7899239758556985, iteration: 327330
loss: 0.9816029667854309,grad_norm: 0.7648533606282729, iteration: 327331
loss: 0.9915571212768555,grad_norm: 0.9770771819781114, iteration: 327332
loss: 0.9796313643455505,grad_norm: 0.7416009039594411, iteration: 327333
loss: 0.9523032307624817,grad_norm: 0.916229023893146, iteration: 327334
loss: 1.0241504907608032,grad_norm: 0.9999993663216729, iteration: 327335
loss: 0.9742827415466309,grad_norm: 0.722290109934205, iteration: 327336
loss: 1.009270429611206,grad_norm: 0.9026802703900062, iteration: 327337
loss: 1.0145117044448853,grad_norm: 0.8329263066374278, iteration: 327338
loss: 1.0044972896575928,grad_norm: 0.7466653338598391, iteration: 327339
loss: 0.9874254465103149,grad_norm: 0.9193034586184368, iteration: 327340
loss: 1.0100451707839966,grad_norm: 0.7658202704986619, iteration: 327341
loss: 1.0313018560409546,grad_norm: 0.8156331565815218, iteration: 327342
loss: 1.0076652765274048,grad_norm: 0.8251360484043164, iteration: 327343
loss: 0.9929036498069763,grad_norm: 0.9999992676794726, iteration: 327344
loss: 0.98423171043396,grad_norm: 0.8741864302681897, iteration: 327345
loss: 0.9985629916191101,grad_norm: 0.999999570953212, iteration: 327346
loss: 1.0307468175888062,grad_norm: 0.8459602005887801, iteration: 327347
loss: 0.9713512659072876,grad_norm: 0.7437226950066584, iteration: 327348
loss: 0.9809346199035645,grad_norm: 0.7146627695034056, iteration: 327349
loss: 0.99461829662323,grad_norm: 0.7396118965698044, iteration: 327350
loss: 1.0040324926376343,grad_norm: 0.7422524855129853, iteration: 327351
loss: 1.0267333984375,grad_norm: 0.73250375260163, iteration: 327352
loss: 0.9843392968177795,grad_norm: 0.8946567937032619, iteration: 327353
loss: 1.0021450519561768,grad_norm: 0.8664071633818389, iteration: 327354
loss: 1.0046343803405762,grad_norm: 0.805180176349263, iteration: 327355
loss: 0.9517261385917664,grad_norm: 0.8993478958453778, iteration: 327356
loss: 1.0168160200119019,grad_norm: 0.9999991480460073, iteration: 327357
loss: 0.9978976845741272,grad_norm: 0.8506950259003664, iteration: 327358
loss: 1.004494309425354,grad_norm: 0.7427527040360388, iteration: 327359
loss: 0.9946584701538086,grad_norm: 0.7194946936017832, iteration: 327360
loss: 1.021723985671997,grad_norm: 0.8338906370157629, iteration: 327361
loss: 1.0112128257751465,grad_norm: 0.827646922210608, iteration: 327362
loss: 1.0117285251617432,grad_norm: 0.9999993439678103, iteration: 327363
loss: 1.0123893022537231,grad_norm: 0.7767157888640183, iteration: 327364
loss: 1.0157554149627686,grad_norm: 0.7863655681260364, iteration: 327365
loss: 0.9894329905509949,grad_norm: 0.8015419189402204, iteration: 327366
loss: 0.9911561608314514,grad_norm: 0.8684298308803312, iteration: 327367
loss: 0.9837811589241028,grad_norm: 0.7645100437685638, iteration: 327368
loss: 1.0018025636672974,grad_norm: 0.9081392515804726, iteration: 327369
loss: 0.9983062744140625,grad_norm: 0.8404363923963037, iteration: 327370
loss: 1.0116820335388184,grad_norm: 0.8784037394329463, iteration: 327371
loss: 0.9959603548049927,grad_norm: 0.98908452314154, iteration: 327372
loss: 0.9616823792457581,grad_norm: 0.8499248352687612, iteration: 327373
loss: 1.1042406558990479,grad_norm: 0.999999969876965, iteration: 327374
loss: 0.9679414629936218,grad_norm: 0.6939057927341716, iteration: 327375
loss: 0.9803368449211121,grad_norm: 0.7755038636687366, iteration: 327376
loss: 0.9803825616836548,grad_norm: 0.7703422191745675, iteration: 327377
loss: 1.0615476369857788,grad_norm: 0.7493225559958872, iteration: 327378
loss: 0.9959756135940552,grad_norm: 0.8263572289042278, iteration: 327379
loss: 1.0373320579528809,grad_norm: 0.8002411285152522, iteration: 327380
loss: 1.0489414930343628,grad_norm: 0.789391534110927, iteration: 327381
loss: 1.0201783180236816,grad_norm: 0.7580467484001396, iteration: 327382
loss: 1.1037757396697998,grad_norm: 0.9999990565973116, iteration: 327383
loss: 1.035560965538025,grad_norm: 0.9422793475220743, iteration: 327384
loss: 0.9617804288864136,grad_norm: 0.8618994165664702, iteration: 327385
loss: 0.9988818764686584,grad_norm: 0.9079218722419178, iteration: 327386
loss: 0.9739129543304443,grad_norm: 0.7763070869825904, iteration: 327387
loss: 1.0199487209320068,grad_norm: 0.921146879391139, iteration: 327388
loss: 0.9992112517356873,grad_norm: 0.9272180968860441, iteration: 327389
loss: 0.9726572632789612,grad_norm: 0.8802914674338136, iteration: 327390
loss: 0.9872817993164062,grad_norm: 0.9899798725271849, iteration: 327391
loss: 1.0149955749511719,grad_norm: 0.6832918221877746, iteration: 327392
loss: 0.9908068180084229,grad_norm: 0.8964435252869855, iteration: 327393
loss: 0.9745287299156189,grad_norm: 0.7780296760230253, iteration: 327394
loss: 0.9939109086990356,grad_norm: 0.8611778308900393, iteration: 327395
loss: 1.03621244430542,grad_norm: 0.999999421352876, iteration: 327396
loss: 1.0256156921386719,grad_norm: 0.8132461674395509, iteration: 327397
loss: 0.9942480325698853,grad_norm: 0.7746314196338798, iteration: 327398
loss: 1.002321720123291,grad_norm: 0.9425129661318429, iteration: 327399
loss: 0.9798312187194824,grad_norm: 0.8363299664864966, iteration: 327400
loss: 0.9954143762588501,grad_norm: 0.9999992228614266, iteration: 327401
loss: 0.9853164553642273,grad_norm: 0.8409860866503334, iteration: 327402
loss: 0.9818890690803528,grad_norm: 0.7193301757705607, iteration: 327403
loss: 0.9809679985046387,grad_norm: 0.923741352516102, iteration: 327404
loss: 1.0293196439743042,grad_norm: 0.7729429316628036, iteration: 327405
loss: 1.0091832876205444,grad_norm: 0.9935576244981656, iteration: 327406
loss: 0.9927671551704407,grad_norm: 0.9456632038047217, iteration: 327407
loss: 1.0464303493499756,grad_norm: 0.9999992398672741, iteration: 327408
loss: 1.059414267539978,grad_norm: 0.8583739741142581, iteration: 327409
loss: 1.0270121097564697,grad_norm: 0.8001359317791747, iteration: 327410
loss: 1.0826865434646606,grad_norm: 0.8936065968887329, iteration: 327411
loss: 1.0233343839645386,grad_norm: 0.9999993154726914, iteration: 327412
loss: 1.0118557214736938,grad_norm: 0.8889207210902522, iteration: 327413
loss: 1.0195544958114624,grad_norm: 0.7733523092664409, iteration: 327414
loss: 0.9886398911476135,grad_norm: 0.7716280228413985, iteration: 327415
loss: 0.9971038103103638,grad_norm: 0.7586574891603851, iteration: 327416
loss: 0.9951786398887634,grad_norm: 0.7986250724213291, iteration: 327417
loss: 0.979822039604187,grad_norm: 0.917175059431227, iteration: 327418
loss: 0.9853077530860901,grad_norm: 0.6844831771361681, iteration: 327419
loss: 1.012816071510315,grad_norm: 0.9011027390102665, iteration: 327420
loss: 0.9890395998954773,grad_norm: 0.7988058128934683, iteration: 327421
loss: 0.9946248531341553,grad_norm: 0.7210143526404321, iteration: 327422
loss: 0.9752407670021057,grad_norm: 0.9865599063332637, iteration: 327423
loss: 1.019148588180542,grad_norm: 0.9999990410190236, iteration: 327424
loss: 1.0060445070266724,grad_norm: 0.7514159976903922, iteration: 327425
loss: 0.9930750131607056,grad_norm: 0.8135342469934485, iteration: 327426
loss: 1.0111644268035889,grad_norm: 0.9361703258663255, iteration: 327427
loss: 0.9569013118743896,grad_norm: 0.7715118241398863, iteration: 327428
loss: 1.000547170639038,grad_norm: 0.7507827011191667, iteration: 327429
loss: 1.0022554397583008,grad_norm: 0.7547979430973826, iteration: 327430
loss: 0.9946718215942383,grad_norm: 0.6403641344626549, iteration: 327431
loss: 1.0363520383834839,grad_norm: 0.8370679772553148, iteration: 327432
loss: 1.0226389169692993,grad_norm: 0.8776343797236189, iteration: 327433
loss: 1.0111058950424194,grad_norm: 0.8924782144949223, iteration: 327434
loss: 0.9703224897384644,grad_norm: 0.992501639063873, iteration: 327435
loss: 1.010081171989441,grad_norm: 0.8915182861815016, iteration: 327436
loss: 1.0025752782821655,grad_norm: 0.9999991252406247, iteration: 327437
loss: 0.9851504564285278,grad_norm: 0.9999990971958445, iteration: 327438
loss: 1.018330454826355,grad_norm: 0.8243424556315101, iteration: 327439
loss: 0.994055986404419,grad_norm: 0.8805570320732139, iteration: 327440
loss: 0.9578667283058167,grad_norm: 0.9999991687052878, iteration: 327441
loss: 0.9839552640914917,grad_norm: 0.9805480775699544, iteration: 327442
loss: 0.9775935411453247,grad_norm: 0.8120291835645282, iteration: 327443
loss: 0.9946605563163757,grad_norm: 0.8514936441001834, iteration: 327444
loss: 0.9660181403160095,grad_norm: 0.8464001719001633, iteration: 327445
loss: 1.053292155265808,grad_norm: 0.9999990422754033, iteration: 327446
loss: 1.0079575777053833,grad_norm: 0.7947950726900291, iteration: 327447
loss: 1.0529967546463013,grad_norm: 0.9610387707796665, iteration: 327448
loss: 1.048757553100586,grad_norm: 0.8838562154736104, iteration: 327449
loss: 1.024906873703003,grad_norm: 0.9999999183971743, iteration: 327450
loss: 1.0148155689239502,grad_norm: 0.7175266028735092, iteration: 327451
loss: 0.9865742325782776,grad_norm: 0.8242173716670605, iteration: 327452
loss: 1.1349550485610962,grad_norm: 0.9999999103044281, iteration: 327453
loss: 0.9740946888923645,grad_norm: 0.9069372324813331, iteration: 327454
loss: 0.975887656211853,grad_norm: 0.8647513675186538, iteration: 327455
loss: 0.9854127168655396,grad_norm: 0.9103304433418469, iteration: 327456
loss: 0.9557234644889832,grad_norm: 0.8081743019965881, iteration: 327457
loss: 1.0138928890228271,grad_norm: 0.8436051977495796, iteration: 327458
loss: 1.0252952575683594,grad_norm: 0.8151484284751794, iteration: 327459
loss: 0.978502094745636,grad_norm: 0.7199108811656749, iteration: 327460
loss: 0.9898115396499634,grad_norm: 0.7255513528176377, iteration: 327461
loss: 1.0000380277633667,grad_norm: 0.7371771597823639, iteration: 327462
loss: 0.998558521270752,grad_norm: 0.9999992035216494, iteration: 327463
loss: 0.9936787486076355,grad_norm: 0.8980333161672353, iteration: 327464
loss: 1.0109983682632446,grad_norm: 0.8291016344650556, iteration: 327465
loss: 0.9655877351760864,grad_norm: 0.8957933862672643, iteration: 327466
loss: 1.0985716581344604,grad_norm: 0.9999991637753199, iteration: 327467
loss: 1.0080082416534424,grad_norm: 0.8313188671317866, iteration: 327468
loss: 0.99559485912323,grad_norm: 0.9066892557747888, iteration: 327469
loss: 0.9870579838752747,grad_norm: 0.9999991041813451, iteration: 327470
loss: 0.9997161626815796,grad_norm: 0.999999054883036, iteration: 327471
loss: 1.0091110467910767,grad_norm: 0.8516139249272241, iteration: 327472
loss: 0.9933571219444275,grad_norm: 0.9440879673362658, iteration: 327473
loss: 1.0428999662399292,grad_norm: 0.8205123686392141, iteration: 327474
loss: 1.0310075283050537,grad_norm: 0.8097060846111207, iteration: 327475
loss: 1.0060138702392578,grad_norm: 0.7138561928335988, iteration: 327476
loss: 1.0035483837127686,grad_norm: 0.9913527219544536, iteration: 327477
loss: 1.0097965002059937,grad_norm: 0.7384859281203667, iteration: 327478
loss: 1.0359727144241333,grad_norm: 0.7328366217936668, iteration: 327479
loss: 1.0094373226165771,grad_norm: 0.8841361372444327, iteration: 327480
loss: 0.9772037267684937,grad_norm: 0.8599607158028224, iteration: 327481
loss: 1.0027679204940796,grad_norm: 0.7396940402015585, iteration: 327482
loss: 1.00046968460083,grad_norm: 0.8114661131720087, iteration: 327483
loss: 1.006346583366394,grad_norm: 0.7075310270411977, iteration: 327484
loss: 1.035528540611267,grad_norm: 0.9999992061371272, iteration: 327485
loss: 1.0733166933059692,grad_norm: 0.7578207576689203, iteration: 327486
loss: 1.0074660778045654,grad_norm: 0.7355866257338776, iteration: 327487
loss: 0.981864869594574,grad_norm: 0.8754686220024456, iteration: 327488
loss: 0.9823225736618042,grad_norm: 0.8756953263740199, iteration: 327489
loss: 1.0031611919403076,grad_norm: 0.7227407519138866, iteration: 327490
loss: 0.973725438117981,grad_norm: 0.727288172533826, iteration: 327491
loss: 1.0059155225753784,grad_norm: 0.7303333830129425, iteration: 327492
loss: 0.9811687469482422,grad_norm: 0.8258024075661466, iteration: 327493
loss: 1.0219522714614868,grad_norm: 0.7249798012907985, iteration: 327494
loss: 0.9673731327056885,grad_norm: 0.74800753024517, iteration: 327495
loss: 0.9953539967536926,grad_norm: 0.8204865317736079, iteration: 327496
loss: 1.0154333114624023,grad_norm: 0.7499279606368063, iteration: 327497
loss: 1.0016168355941772,grad_norm: 0.7703179963026946, iteration: 327498
loss: 1.0030759572982788,grad_norm: 0.917252233896777, iteration: 327499
loss: 0.9705231785774231,grad_norm: 0.8383063279680145, iteration: 327500
loss: 1.0096036195755005,grad_norm: 0.9999991414832768, iteration: 327501
loss: 0.9982895851135254,grad_norm: 0.8380686723882198, iteration: 327502
loss: 1.0120383501052856,grad_norm: 0.7904733505817471, iteration: 327503
loss: 0.971246063709259,grad_norm: 0.8185941851059636, iteration: 327504
loss: 0.9748916029930115,grad_norm: 0.9999995026266947, iteration: 327505
loss: 0.9911739230155945,grad_norm: 0.9999990436523563, iteration: 327506
loss: 0.9748144149780273,grad_norm: 0.8329466275943711, iteration: 327507
loss: 0.9804602265357971,grad_norm: 0.7132307469966618, iteration: 327508
loss: 1.0151900053024292,grad_norm: 0.8222537801961703, iteration: 327509
loss: 1.0343565940856934,grad_norm: 0.9144931622750926, iteration: 327510
loss: 1.020963430404663,grad_norm: 0.8965096960333595, iteration: 327511
loss: 1.063818097114563,grad_norm: 0.9999997148303748, iteration: 327512
loss: 0.9760624766349792,grad_norm: 0.9266624632196125, iteration: 327513
loss: 0.9747160077095032,grad_norm: 0.97103887655165, iteration: 327514
loss: 0.9894333481788635,grad_norm: 0.7669857681490942, iteration: 327515
loss: 1.0086877346038818,grad_norm: 0.746126553087465, iteration: 327516
loss: 1.0163944959640503,grad_norm: 0.7980291597413749, iteration: 327517
loss: 0.961625337600708,grad_norm: 0.8285242817699224, iteration: 327518
loss: 0.9584238529205322,grad_norm: 0.9724232476006511, iteration: 327519
loss: 1.0096207857131958,grad_norm: 0.712532358111576, iteration: 327520
loss: 1.0232560634613037,grad_norm: 0.9999990585589645, iteration: 327521
loss: 1.0010058879852295,grad_norm: 0.99999908016141, iteration: 327522
loss: 0.9941206574440002,grad_norm: 0.9555247263805056, iteration: 327523
loss: 0.981854259967804,grad_norm: 0.8391259275353061, iteration: 327524
loss: 0.9979718327522278,grad_norm: 0.8413618576151214, iteration: 327525
loss: 0.9949713349342346,grad_norm: 0.8477920542206867, iteration: 327526
loss: 0.9811462759971619,grad_norm: 0.9484052100715636, iteration: 327527
loss: 0.9978916645050049,grad_norm: 0.9149521342143556, iteration: 327528
loss: 1.0236884355545044,grad_norm: 0.9999991200038145, iteration: 327529
loss: 0.9842717051506042,grad_norm: 0.926172454267942, iteration: 327530
loss: 1.0126060247421265,grad_norm: 0.8708415381237161, iteration: 327531
loss: 0.9784426689147949,grad_norm: 0.7798535706296307, iteration: 327532
loss: 1.043616771697998,grad_norm: 0.9999990509344934, iteration: 327533
loss: 1.002022385597229,grad_norm: 0.9105209654166982, iteration: 327534
loss: 1.0188885927200317,grad_norm: 0.9535253462736, iteration: 327535
loss: 1.0348764657974243,grad_norm: 0.8492737289210357, iteration: 327536
loss: 1.0122734308242798,grad_norm: 0.9449034731522102, iteration: 327537
loss: 0.9959217309951782,grad_norm: 0.7335163220867769, iteration: 327538
loss: 0.993651807308197,grad_norm: 0.846563877107143, iteration: 327539
loss: 1.005043625831604,grad_norm: 0.884599891414596, iteration: 327540
loss: 1.0152835845947266,grad_norm: 0.9999991051929049, iteration: 327541
loss: 0.9882224202156067,grad_norm: 0.9204291743550633, iteration: 327542
loss: 0.9680481553077698,grad_norm: 0.9514360805712652, iteration: 327543
loss: 0.9566444754600525,grad_norm: 0.9999991072214881, iteration: 327544
loss: 0.9972469806671143,grad_norm: 0.9999996257636671, iteration: 327545
loss: 1.0434478521347046,grad_norm: 0.9999995100117964, iteration: 327546
loss: 1.017912745475769,grad_norm: 0.9999994010821729, iteration: 327547
loss: 0.9459432363510132,grad_norm: 0.8677268281485414, iteration: 327548
loss: 0.9774859547615051,grad_norm: 0.8221735077851892, iteration: 327549
loss: 1.0027812719345093,grad_norm: 0.8971283194739945, iteration: 327550
loss: 0.982697069644928,grad_norm: 0.8960760195951791, iteration: 327551
loss: 0.9905989170074463,grad_norm: 0.871885336345258, iteration: 327552
loss: 0.9955927729606628,grad_norm: 0.7568969924771909, iteration: 327553
loss: 1.003615140914917,grad_norm: 0.8428794742860378, iteration: 327554
loss: 0.9910641312599182,grad_norm: 0.7940736636069052, iteration: 327555
loss: 0.9926604628562927,grad_norm: 0.8987496357474054, iteration: 327556
loss: 0.9489693641662598,grad_norm: 0.8304735551875947, iteration: 327557
loss: 0.970095694065094,grad_norm: 0.8873628640882809, iteration: 327558
loss: 1.0061593055725098,grad_norm: 0.7256146545052874, iteration: 327559
loss: 1.016128420829773,grad_norm: 0.9999992296599961, iteration: 327560
loss: 0.9880721569061279,grad_norm: 0.9999991175565554, iteration: 327561
loss: 1.079960584640503,grad_norm: 0.9999997520615674, iteration: 327562
loss: 1.0198746919631958,grad_norm: 0.8417544080786445, iteration: 327563
loss: 1.0445836782455444,grad_norm: 0.8295957073135383, iteration: 327564
loss: 1.0670478343963623,grad_norm: 0.972861606252309, iteration: 327565
loss: 1.0483812093734741,grad_norm: 0.925914047892072, iteration: 327566
loss: 1.0087192058563232,grad_norm: 0.8312349398826187, iteration: 327567
loss: 1.0020464658737183,grad_norm: 0.7370555916529307, iteration: 327568
loss: 1.0120586156845093,grad_norm: 0.7967136225421372, iteration: 327569
loss: 1.0174704790115356,grad_norm: 0.7113725075246206, iteration: 327570
loss: 1.0028904676437378,grad_norm: 0.9999989382818375, iteration: 327571
loss: 1.0008280277252197,grad_norm: 0.7422586116681694, iteration: 327572
loss: 1.0089937448501587,grad_norm: 0.797998868530764, iteration: 327573
loss: 0.9787066578865051,grad_norm: 0.819446962887015, iteration: 327574
loss: 0.9765532612800598,grad_norm: 0.7461348525694564, iteration: 327575
loss: 1.0023274421691895,grad_norm: 0.8338560731609307, iteration: 327576
loss: 1.0194144248962402,grad_norm: 0.8749882779922369, iteration: 327577
loss: 0.9931432008743286,grad_norm: 0.9172233422404855, iteration: 327578
loss: 1.0416967868804932,grad_norm: 0.999999482023045, iteration: 327579
loss: 1.0166709423065186,grad_norm: 0.6622426720164741, iteration: 327580
loss: 0.9849779009819031,grad_norm: 0.8322222550549642, iteration: 327581
loss: 1.0091997385025024,grad_norm: 0.7158504664818834, iteration: 327582
loss: 1.010018229484558,grad_norm: 0.7651281740796936, iteration: 327583
loss: 0.9885978102684021,grad_norm: 0.7616796254634085, iteration: 327584
loss: 1.0013201236724854,grad_norm: 0.9341065342615066, iteration: 327585
loss: 0.9703911542892456,grad_norm: 0.8116922883236287, iteration: 327586
loss: 0.9987359642982483,grad_norm: 0.8221949880991304, iteration: 327587
loss: 0.9951401948928833,grad_norm: 0.9666636329062398, iteration: 327588
loss: 0.9915427565574646,grad_norm: 0.6614903948237795, iteration: 327589
loss: 0.9752445816993713,grad_norm: 0.8216461914248273, iteration: 327590
loss: 1.0421336889266968,grad_norm: 0.9454001386692069, iteration: 327591
loss: 0.9708667993545532,grad_norm: 0.95508317381183, iteration: 327592
loss: 1.032470703125,grad_norm: 0.909205964352987, iteration: 327593
loss: 1.0030198097229004,grad_norm: 0.7699734935346457, iteration: 327594
loss: 0.950076699256897,grad_norm: 0.8544886396885529, iteration: 327595
loss: 1.0045907497406006,grad_norm: 0.9718434516263318, iteration: 327596
loss: 0.9657889008522034,grad_norm: 0.9188740121446111, iteration: 327597
loss: 1.0112966299057007,grad_norm: 0.6401149221939915, iteration: 327598
loss: 0.972711443901062,grad_norm: 0.7336135077084136, iteration: 327599
loss: 1.0119925737380981,grad_norm: 0.9999990189734277, iteration: 327600
loss: 0.9795924425125122,grad_norm: 0.9944109512707975, iteration: 327601
loss: 0.9947909116744995,grad_norm: 0.7609480266660492, iteration: 327602
loss: 0.9478020668029785,grad_norm: 0.8174218278398707, iteration: 327603
loss: 1.0298259258270264,grad_norm: 0.8045717494445728, iteration: 327604
loss: 0.9617440700531006,grad_norm: 0.9304037503082958, iteration: 327605
loss: 1.0094362497329712,grad_norm: 0.999999644507977, iteration: 327606
loss: 1.01186203956604,grad_norm: 0.7521132220735657, iteration: 327607
loss: 1.0484988689422607,grad_norm: 0.9999991307638797, iteration: 327608
loss: 0.9449712038040161,grad_norm: 0.7401927549714686, iteration: 327609
loss: 0.9783820509910583,grad_norm: 0.7019387077389577, iteration: 327610
loss: 0.9923312067985535,grad_norm: 0.860639293573804, iteration: 327611
loss: 1.013689398765564,grad_norm: 0.9999991414659809, iteration: 327612
loss: 1.0203946828842163,grad_norm: 0.917534344642756, iteration: 327613
loss: 0.9652131199836731,grad_norm: 0.9999991798310339, iteration: 327614
loss: 0.9977327585220337,grad_norm: 0.7325404622203804, iteration: 327615
loss: 1.0156856775283813,grad_norm: 0.9189834242459844, iteration: 327616
loss: 0.9495521187782288,grad_norm: 0.8126944213669952, iteration: 327617
loss: 0.9823148846626282,grad_norm: 0.7157443347291642, iteration: 327618
loss: 1.025645136833191,grad_norm: 0.9999997175020295, iteration: 327619
loss: 1.0169193744659424,grad_norm: 0.8530512838968577, iteration: 327620
loss: 1.0131795406341553,grad_norm: 0.7605391773468079, iteration: 327621
loss: 0.9468814730644226,grad_norm: 0.9999991145676395, iteration: 327622
loss: 1.0362898111343384,grad_norm: 0.9999994360165764, iteration: 327623
loss: 1.000196933746338,grad_norm: 0.8133741685536432, iteration: 327624
loss: 1.0097166299819946,grad_norm: 0.8663508650299269, iteration: 327625
loss: 1.0473504066467285,grad_norm: 0.9999997256774686, iteration: 327626
loss: 1.0162240266799927,grad_norm: 0.7585008744172822, iteration: 327627
loss: 1.0533770322799683,grad_norm: 0.8658369136013971, iteration: 327628
loss: 0.9801181554794312,grad_norm: 0.9587565862105888, iteration: 327629
loss: 1.0229706764221191,grad_norm: 0.999999034679462, iteration: 327630
loss: 0.9974075555801392,grad_norm: 0.9435006443051341, iteration: 327631
loss: 0.9803102612495422,grad_norm: 0.9265476418639684, iteration: 327632
loss: 1.032921552658081,grad_norm: 0.8364848967292191, iteration: 327633
loss: 0.9887722730636597,grad_norm: 0.9999990455998683, iteration: 327634
loss: 1.02800452709198,grad_norm: 0.8175982034385308, iteration: 327635
loss: 0.9988064169883728,grad_norm: 0.9699433310253175, iteration: 327636
loss: 0.9987296462059021,grad_norm: 0.7614535906311447, iteration: 327637
loss: 1.0088326930999756,grad_norm: 0.9392495280570424, iteration: 327638
loss: 0.9749979376792908,grad_norm: 0.9696396839748148, iteration: 327639
loss: 0.9958695769309998,grad_norm: 0.8800796153197833, iteration: 327640
loss: 1.024945616722107,grad_norm: 0.999999233471049, iteration: 327641
loss: 0.991694450378418,grad_norm: 0.9099822939516786, iteration: 327642
loss: 0.9544246792793274,grad_norm: 0.906748182117148, iteration: 327643
loss: 1.0083990097045898,grad_norm: 0.9825461150182996, iteration: 327644
loss: 0.9817957282066345,grad_norm: 0.8633504478289072, iteration: 327645
loss: 1.0281765460968018,grad_norm: 0.7470420158331953, iteration: 327646
loss: 1.0019299983978271,grad_norm: 0.7678364591170136, iteration: 327647
loss: 1.0547146797180176,grad_norm: 0.9999997640253501, iteration: 327648
loss: 1.013237714767456,grad_norm: 0.8826053697302576, iteration: 327649
loss: 0.9835941791534424,grad_norm: 0.8238402720396213, iteration: 327650
loss: 1.0282762050628662,grad_norm: 0.9999989892167822, iteration: 327651
loss: 1.0456019639968872,grad_norm: 0.9999991378697722, iteration: 327652
loss: 0.9999604225158691,grad_norm: 0.742097874256973, iteration: 327653
loss: 1.0392528772354126,grad_norm: 0.8197390558473033, iteration: 327654
loss: 0.9824559688568115,grad_norm: 0.7947243128479782, iteration: 327655
loss: 1.0086326599121094,grad_norm: 0.8108270879225739, iteration: 327656
loss: 0.9580121636390686,grad_norm: 0.7773825324436137, iteration: 327657
loss: 1.0408438444137573,grad_norm: 0.9306874534447667, iteration: 327658
loss: 0.9777789115905762,grad_norm: 0.7840938472544847, iteration: 327659
loss: 0.9933560490608215,grad_norm: 0.8918223966826634, iteration: 327660
loss: 0.9913778901100159,grad_norm: 0.7964659789708033, iteration: 327661
loss: 0.9598022699356079,grad_norm: 0.824260545921711, iteration: 327662
loss: 1.00151526927948,grad_norm: 0.7370313043502078, iteration: 327663
loss: 1.0152932405471802,grad_norm: 0.7824243617319638, iteration: 327664
loss: 1.0106616020202637,grad_norm: 0.8273092205168171, iteration: 327665
loss: 1.0839052200317383,grad_norm: 0.9612206276412397, iteration: 327666
loss: 0.9919160008430481,grad_norm: 0.7351053191515873, iteration: 327667
loss: 0.9723251461982727,grad_norm: 0.7481824944051476, iteration: 327668
loss: 1.002432942390442,grad_norm: 0.7927072782195146, iteration: 327669
loss: 1.0114384889602661,grad_norm: 0.93413574791107, iteration: 327670
loss: 0.9855427145957947,grad_norm: 0.9999992475079283, iteration: 327671
loss: 0.9802581071853638,grad_norm: 0.7913033692389987, iteration: 327672
loss: 0.9572376012802124,grad_norm: 0.7065981193111708, iteration: 327673
loss: 1.023950219154358,grad_norm: 0.7789301011469802, iteration: 327674
loss: 1.013216257095337,grad_norm: 0.7767539816558253, iteration: 327675
loss: 0.9813938140869141,grad_norm: 0.9999991396179835, iteration: 327676
loss: 1.0389796495437622,grad_norm: 0.9999993675193681, iteration: 327677
loss: 0.9828413724899292,grad_norm: 0.9849737575424465, iteration: 327678
loss: 0.9901743531227112,grad_norm: 0.9367443515464213, iteration: 327679
loss: 1.0254400968551636,grad_norm: 0.9999999104089119, iteration: 327680
loss: 1.027683973312378,grad_norm: 0.7225386773542836, iteration: 327681
loss: 1.0049002170562744,grad_norm: 0.8480467620224207, iteration: 327682
loss: 1.0198975801467896,grad_norm: 0.8236764957163971, iteration: 327683
loss: 0.9903796911239624,grad_norm: 0.875651501035498, iteration: 327684
loss: 1.0450156927108765,grad_norm: 0.9749353869550406, iteration: 327685
loss: 0.980951726436615,grad_norm: 0.8774025307649177, iteration: 327686
loss: 0.989147424697876,grad_norm: 0.9612730180275506, iteration: 327687
loss: 0.9713830351829529,grad_norm: 0.847613820648538, iteration: 327688
loss: 0.9601081609725952,grad_norm: 0.8508186741917679, iteration: 327689
loss: 1.0260097980499268,grad_norm: 0.7901938716978698, iteration: 327690
loss: 1.0114980936050415,grad_norm: 0.9238671264471863, iteration: 327691
loss: 1.021073818206787,grad_norm: 0.8447939544606642, iteration: 327692
loss: 0.9989772439002991,grad_norm: 0.8735236602834814, iteration: 327693
loss: 0.9717509746551514,grad_norm: 0.811824947786634, iteration: 327694
loss: 0.9700744152069092,grad_norm: 0.8544153860514476, iteration: 327695
loss: 0.9779868721961975,grad_norm: 0.9736445247594244, iteration: 327696
loss: 0.9959563612937927,grad_norm: 0.6466856956680666, iteration: 327697
loss: 1.0151612758636475,grad_norm: 0.924904830174317, iteration: 327698
loss: 0.9297822713851929,grad_norm: 0.7500908908600249, iteration: 327699
loss: 0.9943501353263855,grad_norm: 0.8923453730068586, iteration: 327700
loss: 0.9945575594902039,grad_norm: 0.8446049288940993, iteration: 327701
loss: 1.0054588317871094,grad_norm: 0.8686960235107531, iteration: 327702
loss: 0.9956039786338806,grad_norm: 0.7742184998377865, iteration: 327703
loss: 1.0004807710647583,grad_norm: 0.7960184343583998, iteration: 327704
loss: 0.9846340417861938,grad_norm: 0.850243479938708, iteration: 327705
loss: 1.027517557144165,grad_norm: 0.9805424549001323, iteration: 327706
loss: 0.998214066028595,grad_norm: 0.9287384311256978, iteration: 327707
loss: 0.9515214562416077,grad_norm: 0.7860052725225791, iteration: 327708
loss: 1.0048950910568237,grad_norm: 0.9176907099623303, iteration: 327709
loss: 0.9664279818534851,grad_norm: 0.9087116401527763, iteration: 327710
loss: 0.9924342036247253,grad_norm: 0.9562435150488132, iteration: 327711
loss: 1.005066156387329,grad_norm: 0.9169551488563482, iteration: 327712
loss: 0.9887424111366272,grad_norm: 0.999999237758257, iteration: 327713
loss: 0.9884576201438904,grad_norm: 0.8706611890086097, iteration: 327714
loss: 0.9988095164299011,grad_norm: 0.9249720511255871, iteration: 327715
loss: 1.0386145114898682,grad_norm: 0.8230128074609413, iteration: 327716
loss: 1.0221021175384521,grad_norm: 0.9999991071751477, iteration: 327717
loss: 1.038953185081482,grad_norm: 0.9967193169729202, iteration: 327718
loss: 0.9737504124641418,grad_norm: 0.7703106205930276, iteration: 327719
loss: 1.029341697692871,grad_norm: 0.7727159591402858, iteration: 327720
loss: 0.9643605351448059,grad_norm: 0.9999991774042515, iteration: 327721
loss: 1.0198384523391724,grad_norm: 0.7963417670898288, iteration: 327722
loss: 0.9968309998512268,grad_norm: 0.8875108159297977, iteration: 327723
loss: 0.978753924369812,grad_norm: 0.8159692276069098, iteration: 327724
loss: 1.0075429677963257,grad_norm: 0.8532468183264356, iteration: 327725
loss: 1.0056135654449463,grad_norm: 0.8206930152221806, iteration: 327726
loss: 1.0137830972671509,grad_norm: 0.9419250998197441, iteration: 327727
loss: 1.0154529809951782,grad_norm: 0.9999990177380271, iteration: 327728
loss: 1.09535551071167,grad_norm: 0.9398004187774447, iteration: 327729
loss: 0.9773745536804199,grad_norm: 0.8532358579450382, iteration: 327730
loss: 0.9809873700141907,grad_norm: 0.999998878881656, iteration: 327731
loss: 0.9968359470367432,grad_norm: 0.8967109832106099, iteration: 327732
loss: 1.0381635427474976,grad_norm: 0.9234570113528853, iteration: 327733
loss: 0.9714149832725525,grad_norm: 0.9140173441520334, iteration: 327734
loss: 1.0215164422988892,grad_norm: 0.7382252405929234, iteration: 327735
loss: 0.9842719435691833,grad_norm: 0.862488919369486, iteration: 327736
loss: 1.0361275672912598,grad_norm: 0.7721126793461937, iteration: 327737
loss: 1.0158382654190063,grad_norm: 0.7843965074940856, iteration: 327738
loss: 1.0122883319854736,grad_norm: 0.776761935845851, iteration: 327739
loss: 0.9831693768501282,grad_norm: 0.8580387616008645, iteration: 327740
loss: 0.9946014285087585,grad_norm: 0.7226982423983718, iteration: 327741
loss: 1.0592995882034302,grad_norm: 0.89444601075105, iteration: 327742
loss: 1.0044764280319214,grad_norm: 0.733083182714836, iteration: 327743
loss: 0.9855790138244629,grad_norm: 0.9999990760840555, iteration: 327744
loss: 1.0185271501541138,grad_norm: 0.9999991322550408, iteration: 327745
loss: 1.0513612031936646,grad_norm: 0.9488018178424071, iteration: 327746
loss: 0.9866946339607239,grad_norm: 0.8347309839669372, iteration: 327747
loss: 1.0064677000045776,grad_norm: 0.8683403625622419, iteration: 327748
loss: 1.056563377380371,grad_norm: 0.9097265771781511, iteration: 327749
loss: 1.0323801040649414,grad_norm: 0.9999992692266225, iteration: 327750
loss: 1.0205150842666626,grad_norm: 0.8943179412406996, iteration: 327751
loss: 0.9870077967643738,grad_norm: 0.8719418390329338, iteration: 327752
loss: 0.9487804174423218,grad_norm: 0.7595789357015588, iteration: 327753
loss: 0.9935488104820251,grad_norm: 0.792025760139055, iteration: 327754
loss: 1.0499736070632935,grad_norm: 0.9999989539409375, iteration: 327755
loss: 1.0071625709533691,grad_norm: 0.8852536499828304, iteration: 327756
loss: 1.003579020500183,grad_norm: 0.702738643338556, iteration: 327757
loss: 0.9662042856216431,grad_norm: 0.8953537262147138, iteration: 327758
loss: 0.975410521030426,grad_norm: 0.8621310187750153, iteration: 327759
loss: 0.9962208867073059,grad_norm: 0.8615816289019437, iteration: 327760
loss: 0.9610868692398071,grad_norm: 0.790731596781396, iteration: 327761
loss: 0.9586910605430603,grad_norm: 0.7623564198700183, iteration: 327762
loss: 1.0078030824661255,grad_norm: 0.9014434515130552, iteration: 327763
loss: 0.9857816696166992,grad_norm: 0.838397612989711, iteration: 327764
loss: 1.019959568977356,grad_norm: 0.940083056380495, iteration: 327765
loss: 0.9977774620056152,grad_norm: 0.9457453611459128, iteration: 327766
loss: 1.020880103111267,grad_norm: 0.82155161666598, iteration: 327767
loss: 0.9659345746040344,grad_norm: 0.8324672299581753, iteration: 327768
loss: 0.9666740298271179,grad_norm: 0.7868371005911259, iteration: 327769
loss: 1.0189398527145386,grad_norm: 0.7887417839494325, iteration: 327770
loss: 1.0074745416641235,grad_norm: 0.7832882719484225, iteration: 327771
loss: 0.9771586656570435,grad_norm: 0.7516547342085723, iteration: 327772
loss: 0.9902414083480835,grad_norm: 0.9484495666617102, iteration: 327773
loss: 0.9661043286323547,grad_norm: 0.9088196780532568, iteration: 327774
loss: 0.9766263961791992,grad_norm: 0.8381314979648402, iteration: 327775
loss: 0.9915017485618591,grad_norm: 0.8956078762846857, iteration: 327776
loss: 0.994300901889801,grad_norm: 0.7654967336675887, iteration: 327777
loss: 0.999318540096283,grad_norm: 0.7656165047149835, iteration: 327778
loss: 0.9954255223274231,grad_norm: 0.7525360305380011, iteration: 327779
loss: 0.96128910779953,grad_norm: 0.9370992934444234, iteration: 327780
loss: 0.9704904556274414,grad_norm: 0.8775353736389562, iteration: 327781
loss: 1.0275187492370605,grad_norm: 0.95595720771022, iteration: 327782
loss: 1.0022279024124146,grad_norm: 0.8311991827256212, iteration: 327783
loss: 1.0086385011672974,grad_norm: 0.7893977432688023, iteration: 327784
loss: 1.0182137489318848,grad_norm: 0.9652585351238074, iteration: 327785
loss: 1.0112810134887695,grad_norm: 0.861776582941487, iteration: 327786
loss: 0.9862813353538513,grad_norm: 0.8206964516545141, iteration: 327787
loss: 0.9950388669967651,grad_norm: 0.9707387864914547, iteration: 327788
loss: 1.0189807415008545,grad_norm: 0.7895134637625764, iteration: 327789
loss: 0.9886511564254761,grad_norm: 0.778332684550024, iteration: 327790
loss: 0.9561344385147095,grad_norm: 0.9302576318130386, iteration: 327791
loss: 0.9879641532897949,grad_norm: 0.9281867114651802, iteration: 327792
loss: 0.9877927899360657,grad_norm: 0.8433287141555552, iteration: 327793
loss: 1.00147545337677,grad_norm: 0.9999995730092899, iteration: 327794
loss: 0.9990661144256592,grad_norm: 0.8629699339157078, iteration: 327795
loss: 0.9999465346336365,grad_norm: 0.9999991218965307, iteration: 327796
loss: 0.9981074333190918,grad_norm: 0.8246764453901206, iteration: 327797
loss: 1.0193835496902466,grad_norm: 0.9999993869166558, iteration: 327798
loss: 1.0138094425201416,grad_norm: 0.8724620112327359, iteration: 327799
loss: 1.0022180080413818,grad_norm: 0.8251936563029132, iteration: 327800
loss: 0.9754868149757385,grad_norm: 0.7395557202167907, iteration: 327801
loss: 0.9711354374885559,grad_norm: 0.6313585592931771, iteration: 327802
loss: 0.9616085886955261,grad_norm: 0.9999990764438031, iteration: 327803
loss: 1.0013610124588013,grad_norm: 0.7906217245726103, iteration: 327804
loss: 1.018476963043213,grad_norm: 0.7154320952221677, iteration: 327805
loss: 0.9797626733779907,grad_norm: 0.9389607434538313, iteration: 327806
loss: 0.9672765135765076,grad_norm: 0.8952996902280975, iteration: 327807
loss: 0.9342204928398132,grad_norm: 0.7817771158905829, iteration: 327808
loss: 0.9987083673477173,grad_norm: 0.6468506632831748, iteration: 327809
loss: 0.9777776598930359,grad_norm: 0.882507540555221, iteration: 327810
loss: 1.029982566833496,grad_norm: 0.836704735853841, iteration: 327811
loss: 0.9871935248374939,grad_norm: 0.8570258810027314, iteration: 327812
loss: 1.0428338050842285,grad_norm: 0.9321714991134107, iteration: 327813
loss: 0.9954360723495483,grad_norm: 0.9043871709450757, iteration: 327814
loss: 0.9962013363838196,grad_norm: 0.8832060016996217, iteration: 327815
loss: 0.9704028367996216,grad_norm: 0.983854240382736, iteration: 327816
loss: 1.0477843284606934,grad_norm: 0.7096915940362669, iteration: 327817
loss: 0.9691892862319946,grad_norm: 0.8140548650981109, iteration: 327818
loss: 0.9942666292190552,grad_norm: 0.9211946898104894, iteration: 327819
loss: 0.9870990514755249,grad_norm: 0.9030804727663416, iteration: 327820
loss: 1.0081455707550049,grad_norm: 0.8564739055708444, iteration: 327821
loss: 1.0383836030960083,grad_norm: 0.8865507206004376, iteration: 327822
loss: 1.0132241249084473,grad_norm: 0.9999991779519085, iteration: 327823
loss: 1.016222596168518,grad_norm: 0.9842285350057588, iteration: 327824
loss: 0.9900087118148804,grad_norm: 0.8330981794631184, iteration: 327825
loss: 0.977425217628479,grad_norm: 0.9215194993786492, iteration: 327826
loss: 1.0125750303268433,grad_norm: 0.9109556856436722, iteration: 327827
loss: 0.9878726005554199,grad_norm: 0.7676817127415442, iteration: 327828
loss: 0.9651610255241394,grad_norm: 0.8476459445773726, iteration: 327829
loss: 1.0155465602874756,grad_norm: 0.8618022167614011, iteration: 327830
loss: 1.0185871124267578,grad_norm: 0.9866424836070191, iteration: 327831
loss: 0.9858313202857971,grad_norm: 0.8380346524577648, iteration: 327832
loss: 1.031033992767334,grad_norm: 0.7375621812849956, iteration: 327833
loss: 0.991710901260376,grad_norm: 0.8941567804888569, iteration: 327834
loss: 1.0213781595230103,grad_norm: 0.8839173132877883, iteration: 327835
loss: 0.9884345531463623,grad_norm: 0.8560185601078223, iteration: 327836
loss: 1.007243037223816,grad_norm: 0.8245926339349402, iteration: 327837
loss: 0.9690406322479248,grad_norm: 0.7359295760961305, iteration: 327838
loss: 0.9753196835517883,grad_norm: 0.8571038296776259, iteration: 327839
loss: 0.9837601184844971,grad_norm: 0.9425117555818648, iteration: 327840
loss: 1.0305447578430176,grad_norm: 0.9269857936310255, iteration: 327841
loss: 0.9849259853363037,grad_norm: 0.658678080882606, iteration: 327842
loss: 1.0142836570739746,grad_norm: 0.855642093652706, iteration: 327843
loss: 0.9917541146278381,grad_norm: 0.7772128215185826, iteration: 327844
loss: 0.9962260127067566,grad_norm: 0.8723793071500052, iteration: 327845
loss: 0.9810916781425476,grad_norm: 0.8282295520830165, iteration: 327846
loss: 0.9914107918739319,grad_norm: 0.6895295154633662, iteration: 327847
loss: 0.9943253397941589,grad_norm: 0.7761159698780226, iteration: 327848
loss: 0.9908707141876221,grad_norm: 0.7824190001072295, iteration: 327849
loss: 1.0269845724105835,grad_norm: 0.9372963549379278, iteration: 327850
loss: 1.015967845916748,grad_norm: 0.9659134581142665, iteration: 327851
loss: 1.0104063749313354,grad_norm: 0.825049212987224, iteration: 327852
loss: 1.0455278158187866,grad_norm: 0.9999994520582289, iteration: 327853
loss: 0.9248142242431641,grad_norm: 0.7169531922296695, iteration: 327854
loss: 1.0029085874557495,grad_norm: 0.8784944159261772, iteration: 327855
loss: 1.0132982730865479,grad_norm: 0.9399548159374164, iteration: 327856
loss: 0.9938086867332458,grad_norm: 0.7810825200216253, iteration: 327857
loss: 1.0174219608306885,grad_norm: 0.8378540082309313, iteration: 327858
loss: 1.002206563949585,grad_norm: 0.9548645876963424, iteration: 327859
loss: 0.9734390377998352,grad_norm: 0.9999998715559689, iteration: 327860
loss: 0.9938146471977234,grad_norm: 0.7215921698150161, iteration: 327861
loss: 0.9960417151451111,grad_norm: 0.8428348136438739, iteration: 327862
loss: 0.97347491979599,grad_norm: 0.8358138641074807, iteration: 327863
loss: 1.0498045682907104,grad_norm: 0.9975639281146426, iteration: 327864
loss: 0.9784450531005859,grad_norm: 0.7905208292937526, iteration: 327865
loss: 1.022333025932312,grad_norm: 0.7097056442852488, iteration: 327866
loss: 1.0224112272262573,grad_norm: 0.9990332222931353, iteration: 327867
loss: 1.006941795349121,grad_norm: 0.7559159828327863, iteration: 327868
loss: 0.9808260202407837,grad_norm: 0.8241460091215385, iteration: 327869
loss: 1.0115344524383545,grad_norm: 0.8599772731106611, iteration: 327870
loss: 0.9843063354492188,grad_norm: 0.9999991192135675, iteration: 327871
loss: 1.0459470748901367,grad_norm: 0.9469393907785837, iteration: 327872
loss: 0.9735492467880249,grad_norm: 0.8510267049154491, iteration: 327873
loss: 0.9698613882064819,grad_norm: 0.9999991358935089, iteration: 327874
loss: 0.9748067855834961,grad_norm: 0.8121003454940227, iteration: 327875
loss: 0.9923487305641174,grad_norm: 0.7734914042542037, iteration: 327876
loss: 0.9897132515907288,grad_norm: 0.8244249486879909, iteration: 327877
loss: 1.008755087852478,grad_norm: 0.7927380279395461, iteration: 327878
loss: 1.0167508125305176,grad_norm: 0.8838666958088031, iteration: 327879
loss: 0.9814034104347229,grad_norm: 0.7941809848962615, iteration: 327880
loss: 0.9877178072929382,grad_norm: 0.9814694141512047, iteration: 327881
loss: 1.0104739665985107,grad_norm: 0.9094487907310375, iteration: 327882
loss: 0.9769338369369507,grad_norm: 0.865115021057864, iteration: 327883
loss: 0.989213228225708,grad_norm: 0.9787898393229216, iteration: 327884
loss: 0.9773169159889221,grad_norm: 0.8920361422204094, iteration: 327885
loss: 0.9913114905357361,grad_norm: 0.7568581607803543, iteration: 327886
loss: 1.0099796056747437,grad_norm: 0.7382334530572985, iteration: 327887
loss: 1.0976029634475708,grad_norm: 0.9788732324753212, iteration: 327888
loss: 1.0261696577072144,grad_norm: 0.7206271402148074, iteration: 327889
loss: 1.0079783201217651,grad_norm: 0.9393453157557594, iteration: 327890
loss: 1.0327197313308716,grad_norm: 0.7520011918401722, iteration: 327891
loss: 1.0043865442276,grad_norm: 0.8436357891600309, iteration: 327892
loss: 1.0102006196975708,grad_norm: 0.8769916915964376, iteration: 327893
loss: 0.9634377360343933,grad_norm: 0.8790116145367091, iteration: 327894
loss: 0.9874308109283447,grad_norm: 0.8739590878843528, iteration: 327895
loss: 1.001167893409729,grad_norm: 0.8749445147906274, iteration: 327896
loss: 1.0117580890655518,grad_norm: 0.7561910486638953, iteration: 327897
loss: 1.0009214878082275,grad_norm: 0.9228183591890077, iteration: 327898
loss: 0.953764796257019,grad_norm: 0.7893134249595157, iteration: 327899
loss: 1.0062958002090454,grad_norm: 0.8725612868879522, iteration: 327900
loss: 0.992617130279541,grad_norm: 0.8335732533047752, iteration: 327901
loss: 1.0005522966384888,grad_norm: 0.9867664028096936, iteration: 327902
loss: 1.0127147436141968,grad_norm: 0.7508947424252217, iteration: 327903
loss: 1.0264919996261597,grad_norm: 0.8552309350321738, iteration: 327904
loss: 0.9859172701835632,grad_norm: 0.9999991212520918, iteration: 327905
loss: 1.0098826885223389,grad_norm: 0.7586249407917708, iteration: 327906
loss: 1.0065393447875977,grad_norm: 0.7241553806727778, iteration: 327907
loss: 0.9902210831642151,grad_norm: 0.9145487469873351, iteration: 327908
loss: 0.989546000957489,grad_norm: 0.7689974452493856, iteration: 327909
loss: 0.9791601896286011,grad_norm: 0.8690654773450581, iteration: 327910
loss: 0.995738685131073,grad_norm: 0.7777638193280882, iteration: 327911
loss: 0.9492989182472229,grad_norm: 0.8120433669447354, iteration: 327912
loss: 0.9800863265991211,grad_norm: 0.8893023332674148, iteration: 327913
loss: 1.036521315574646,grad_norm: 0.7655770265513339, iteration: 327914
loss: 0.988874614238739,grad_norm: 0.8194923293750019, iteration: 327915
loss: 0.9642193913459778,grad_norm: 0.749000654676037, iteration: 327916
loss: 0.9985694885253906,grad_norm: 0.9999990826768296, iteration: 327917
loss: 1.0071467161178589,grad_norm: 0.864074370375997, iteration: 327918
loss: 1.0299689769744873,grad_norm: 0.9764193692790387, iteration: 327919
loss: 0.9807938933372498,grad_norm: 0.8130166188695158, iteration: 327920
loss: 0.9587273001670837,grad_norm: 0.7876335177522334, iteration: 327921
loss: 0.9702666401863098,grad_norm: 0.8497374662272468, iteration: 327922
loss: 1.0456409454345703,grad_norm: 0.9999991696881322, iteration: 327923
loss: 0.9928847551345825,grad_norm: 0.999999188028608, iteration: 327924
loss: 0.953761637210846,grad_norm: 0.8605908274568791, iteration: 327925
loss: 0.9784173369407654,grad_norm: 0.8174264747110911, iteration: 327926
loss: 0.9784741997718811,grad_norm: 0.7678167546131937, iteration: 327927
loss: 1.000800371170044,grad_norm: 0.8328442134463278, iteration: 327928
loss: 1.0055323839187622,grad_norm: 0.7777590061538717, iteration: 327929
loss: 0.9878870844841003,grad_norm: 0.7944823718412577, iteration: 327930
loss: 1.033669114112854,grad_norm: 0.9229273379050301, iteration: 327931
loss: 1.0834418535232544,grad_norm: 0.9700381095283607, iteration: 327932
loss: 0.9450405836105347,grad_norm: 0.9806481041858567, iteration: 327933
loss: 0.9900296330451965,grad_norm: 0.9999992089985036, iteration: 327934
loss: 0.9658390283584595,grad_norm: 0.8162051730864667, iteration: 327935
loss: 1.0007514953613281,grad_norm: 0.7740226506340643, iteration: 327936
loss: 1.0207908153533936,grad_norm: 0.959413654840892, iteration: 327937
loss: 0.9865418076515198,grad_norm: 0.9999993217085748, iteration: 327938
loss: 0.9990955591201782,grad_norm: 0.9999990702272213, iteration: 327939
loss: 0.9752486348152161,grad_norm: 0.6763077855357171, iteration: 327940
loss: 0.9891300201416016,grad_norm: 0.8388971697720079, iteration: 327941
loss: 1.0286630392074585,grad_norm: 0.8885188534149916, iteration: 327942
loss: 0.9795864224433899,grad_norm: 0.999999127696592, iteration: 327943
loss: 1.0570532083511353,grad_norm: 0.9999989834872086, iteration: 327944
loss: 0.9727209806442261,grad_norm: 0.8136121253396907, iteration: 327945
loss: 1.0087858438491821,grad_norm: 0.8067272939271788, iteration: 327946
loss: 1.00209641456604,grad_norm: 0.9999995920270346, iteration: 327947
loss: 0.9997782111167908,grad_norm: 0.8705130379618835, iteration: 327948
loss: 1.0405296087265015,grad_norm: 0.9162099403201834, iteration: 327949
loss: 1.0498321056365967,grad_norm: 0.9140860082663922, iteration: 327950
loss: 0.9894664883613586,grad_norm: 0.8425515207351292, iteration: 327951
loss: 0.9745703339576721,grad_norm: 0.7994680828023274, iteration: 327952
loss: 1.008913516998291,grad_norm: 0.8360882568506499, iteration: 327953
loss: 1.0103665590286255,grad_norm: 0.9999990189319395, iteration: 327954
loss: 1.0075000524520874,grad_norm: 0.902116757776796, iteration: 327955
loss: 0.9986405372619629,grad_norm: 0.8211556317941409, iteration: 327956
loss: 0.9736091494560242,grad_norm: 0.8367271678452042, iteration: 327957
loss: 0.9685452580451965,grad_norm: 0.9720453231947624, iteration: 327958
loss: 0.9647854566574097,grad_norm: 0.8173284078698669, iteration: 327959
loss: 0.9674516916275024,grad_norm: 0.7777399920105906, iteration: 327960
loss: 1.0248186588287354,grad_norm: 0.9885155579025593, iteration: 327961
loss: 0.9758121371269226,grad_norm: 0.9573858301854851, iteration: 327962
loss: 1.0091935396194458,grad_norm: 0.7961058058078783, iteration: 327963
loss: 1.0329440832138062,grad_norm: 0.9999995811463506, iteration: 327964
loss: 1.0313677787780762,grad_norm: 0.7889093893379446, iteration: 327965
loss: 1.0326207876205444,grad_norm: 0.7328404698470418, iteration: 327966
loss: 1.0819181203842163,grad_norm: 0.9999990831494611, iteration: 327967
loss: 0.9791138768196106,grad_norm: 0.6641940051554149, iteration: 327968
loss: 1.0004751682281494,grad_norm: 0.7978228058664957, iteration: 327969
loss: 0.9944532513618469,grad_norm: 0.9211383096787289, iteration: 327970
loss: 0.9729708433151245,grad_norm: 0.8556894185332462, iteration: 327971
loss: 0.9712305665016174,grad_norm: 0.8118201948084304, iteration: 327972
loss: 1.0189454555511475,grad_norm: 0.9410161939284413, iteration: 327973
loss: 0.9674985408782959,grad_norm: 0.7900066607901525, iteration: 327974
loss: 0.972792387008667,grad_norm: 0.8289613691153573, iteration: 327975
loss: 1.0012520551681519,grad_norm: 0.9884564951605627, iteration: 327976
loss: 0.978852391242981,grad_norm: 0.8410136916290917, iteration: 327977
loss: 0.9692128300666809,grad_norm: 0.830517593850497, iteration: 327978
loss: 0.9912452101707458,grad_norm: 0.7559119464691004, iteration: 327979
loss: 1.0163793563842773,grad_norm: 0.7652440651736004, iteration: 327980
loss: 1.024117112159729,grad_norm: 0.8051231832110151, iteration: 327981
loss: 0.9448516964912415,grad_norm: 0.8759941211118717, iteration: 327982
loss: 0.993878960609436,grad_norm: 0.7956195716904111, iteration: 327983
loss: 1.0262869596481323,grad_norm: 0.8536032915977816, iteration: 327984
loss: 0.9797351360321045,grad_norm: 0.8170917665656614, iteration: 327985
loss: 0.9672723412513733,grad_norm: 0.8487956880991516, iteration: 327986
loss: 0.9583699107170105,grad_norm: 0.7224683068210441, iteration: 327987
loss: 0.9867010116577148,grad_norm: 0.8269109972514342, iteration: 327988
loss: 0.9903312921524048,grad_norm: 0.700888887665728, iteration: 327989
loss: 0.9645169377326965,grad_norm: 0.8150517616668347, iteration: 327990
loss: 0.9657602310180664,grad_norm: 0.8567845632827611, iteration: 327991
loss: 0.9976000785827637,grad_norm: 0.790872672428734, iteration: 327992
loss: 1.0173596143722534,grad_norm: 0.9242009194246582, iteration: 327993
loss: 1.0232129096984863,grad_norm: 0.8584717199668951, iteration: 327994
loss: 1.0297737121582031,grad_norm: 0.9999990668395627, iteration: 327995
loss: 1.0263657569885254,grad_norm: 0.9999990598343238, iteration: 327996
loss: 1.009595513343811,grad_norm: 0.9424690009981387, iteration: 327997
loss: 0.9933546781539917,grad_norm: 0.9176778096726995, iteration: 327998
loss: 0.9665740728378296,grad_norm: 0.7628056196657963, iteration: 327999
loss: 1.0144932270050049,grad_norm: 0.8126521490529408, iteration: 328000
loss: 0.9810788035392761,grad_norm: 0.8502346355801333, iteration: 328001
loss: 1.0019066333770752,grad_norm: 0.7514286795429742, iteration: 328002
loss: 1.0166466236114502,grad_norm: 0.7018178837567641, iteration: 328003
loss: 0.9859476089477539,grad_norm: 0.7539715712708326, iteration: 328004
loss: 1.0137848854064941,grad_norm: 0.9040713796165684, iteration: 328005
loss: 1.032720685005188,grad_norm: 0.9999991895783643, iteration: 328006
loss: 1.0124599933624268,grad_norm: 0.9049800275820209, iteration: 328007
loss: 0.9755881428718567,grad_norm: 0.8186380261959202, iteration: 328008
loss: 0.962135910987854,grad_norm: 0.9508514016379563, iteration: 328009
loss: 0.9889369010925293,grad_norm: 0.8648577532958046, iteration: 328010
loss: 0.9974755644798279,grad_norm: 0.7981524245714393, iteration: 328011
loss: 0.9653801918029785,grad_norm: 0.7950434400204098, iteration: 328012
loss: 1.0472372770309448,grad_norm: 0.891772542205924, iteration: 328013
loss: 0.996739387512207,grad_norm: 0.7940160290810262, iteration: 328014
loss: 1.0120213031768799,grad_norm: 0.9999997688094365, iteration: 328015
loss: 1.0092144012451172,grad_norm: 0.8976603350549357, iteration: 328016
loss: 1.0139942169189453,grad_norm: 0.9849408652486213, iteration: 328017
loss: 0.9826366901397705,grad_norm: 0.9031423171021141, iteration: 328018
loss: 0.964589536190033,grad_norm: 0.838761813591701, iteration: 328019
loss: 0.9723665118217468,grad_norm: 0.8183442974929086, iteration: 328020
loss: 0.9867032766342163,grad_norm: 0.6849622852221801, iteration: 328021
loss: 0.992199718952179,grad_norm: 0.9665899205386804, iteration: 328022
loss: 0.9694270491600037,grad_norm: 0.9731512534272774, iteration: 328023
loss: 0.989504873752594,grad_norm: 0.7851555622977878, iteration: 328024
loss: 0.9777958989143372,grad_norm: 0.9367353543222333, iteration: 328025
loss: 0.9952901601791382,grad_norm: 0.8691469164647334, iteration: 328026
loss: 0.98666912317276,grad_norm: 0.9227584582998393, iteration: 328027
loss: 1.0413001775741577,grad_norm: 0.8109904595914094, iteration: 328028
loss: 0.972659170627594,grad_norm: 0.7687186682135243, iteration: 328029
loss: 1.0246278047561646,grad_norm: 0.9999991061692591, iteration: 328030
loss: 0.9608914256095886,grad_norm: 0.7538302541072794, iteration: 328031
loss: 0.9858635663986206,grad_norm: 0.9999988727805551, iteration: 328032
loss: 0.9880725145339966,grad_norm: 0.9110460245657415, iteration: 328033
loss: 0.9983693361282349,grad_norm: 0.9124272873021853, iteration: 328034
loss: 1.010886549949646,grad_norm: 0.8779193710532864, iteration: 328035
loss: 0.9712632894515991,grad_norm: 0.9372138283191389, iteration: 328036
loss: 1.0518041849136353,grad_norm: 0.7094827575289111, iteration: 328037
loss: 1.0118262767791748,grad_norm: 0.8196323758653018, iteration: 328038
loss: 0.9866014122962952,grad_norm: 0.824622804959339, iteration: 328039
loss: 0.9812062978744507,grad_norm: 0.8423221737279201, iteration: 328040
loss: 0.989192545413971,grad_norm: 0.7766475126588089, iteration: 328041
loss: 1.0168383121490479,grad_norm: 0.9483009886904306, iteration: 328042
loss: 1.0152299404144287,grad_norm: 0.687815054906989, iteration: 328043
loss: 0.9997995495796204,grad_norm: 0.8804620178980556, iteration: 328044
loss: 1.0041747093200684,grad_norm: 0.9999990562554429, iteration: 328045
loss: 0.9898990988731384,grad_norm: 0.7788746260020262, iteration: 328046
loss: 0.9770506024360657,grad_norm: 0.7904457594537347, iteration: 328047
loss: 1.0216572284698486,grad_norm: 0.861249417802187, iteration: 328048
loss: 1.0376430749893188,grad_norm: 0.8925469210433713, iteration: 328049
loss: 1.006848692893982,grad_norm: 0.8342991443829753, iteration: 328050
loss: 1.028723120689392,grad_norm: 0.847617466244097, iteration: 328051
loss: 1.0077496767044067,grad_norm: 0.8770092159217417, iteration: 328052
loss: 0.967252254486084,grad_norm: 0.9145513759115358, iteration: 328053
loss: 0.9792009592056274,grad_norm: 0.81348284446142, iteration: 328054
loss: 1.0053527355194092,grad_norm: 0.9999997209215821, iteration: 328055
loss: 0.9940828084945679,grad_norm: 0.6591451542845357, iteration: 328056
loss: 0.9948955178260803,grad_norm: 0.7823442739694667, iteration: 328057
loss: 1.029861569404602,grad_norm: 0.8065027017747955, iteration: 328058
loss: 1.0031107664108276,grad_norm: 0.8724349304469762, iteration: 328059
loss: 1.0285271406173706,grad_norm: 0.9999990418028641, iteration: 328060
loss: 1.0243321657180786,grad_norm: 0.8804176290199204, iteration: 328061
loss: 1.0398695468902588,grad_norm: 0.8387959641034366, iteration: 328062
loss: 0.9629866480827332,grad_norm: 0.999999690537103, iteration: 328063
loss: 1.0093082189559937,grad_norm: 0.8020712477984001, iteration: 328064
loss: 0.9918422102928162,grad_norm: 0.7990228933610545, iteration: 328065
loss: 0.985191822052002,grad_norm: 0.888336199471282, iteration: 328066
loss: 0.9976359605789185,grad_norm: 0.9335757097577941, iteration: 328067
loss: 1.0077641010284424,grad_norm: 0.8889802272165812, iteration: 328068
loss: 1.0252388715744019,grad_norm: 0.7863605535342799, iteration: 328069
loss: 1.0300689935684204,grad_norm: 0.9177961407127497, iteration: 328070
loss: 1.0442653894424438,grad_norm: 0.7672012672623469, iteration: 328071
loss: 0.9839240908622742,grad_norm: 0.8476678611555717, iteration: 328072
loss: 1.0158343315124512,grad_norm: 0.9050946838844307, iteration: 328073
loss: 0.988914966583252,grad_norm: 0.7744434611089218, iteration: 328074
loss: 1.0212795734405518,grad_norm: 0.8913161178784043, iteration: 328075
loss: 0.9982492327690125,grad_norm: 0.8107137045550857, iteration: 328076
loss: 1.0819215774536133,grad_norm: 0.9999992641303092, iteration: 328077
loss: 1.0062031745910645,grad_norm: 0.8451916765121386, iteration: 328078
loss: 1.024692416191101,grad_norm: 0.7361259085987257, iteration: 328079
loss: 0.9942030310630798,grad_norm: 0.7627241298007622, iteration: 328080
loss: 1.019775152206421,grad_norm: 0.8060312200103916, iteration: 328081
loss: 1.080673098564148,grad_norm: 0.9999998566795821, iteration: 328082
loss: 1.1684623956680298,grad_norm: 0.9999998573295313, iteration: 328083
loss: 0.9854130744934082,grad_norm: 0.9097996782059712, iteration: 328084
loss: 0.9891000986099243,grad_norm: 0.9247584419154794, iteration: 328085
loss: 1.1671010255813599,grad_norm: 0.999999886362426, iteration: 328086
loss: 0.982522189617157,grad_norm: 0.8054819210270089, iteration: 328087
loss: 1.0446629524230957,grad_norm: 0.9999992221350505, iteration: 328088
loss: 0.9798386693000793,grad_norm: 0.9611641666835389, iteration: 328089
loss: 1.002313256263733,grad_norm: 0.7416016216697399, iteration: 328090
loss: 1.0165046453475952,grad_norm: 0.7860109388721, iteration: 328091
loss: 0.9992257952690125,grad_norm: 0.8393845534171311, iteration: 328092
loss: 0.993482768535614,grad_norm: 0.8467339878530517, iteration: 328093
loss: 1.0068720579147339,grad_norm: 0.7650144379122498, iteration: 328094
loss: 0.9908943772315979,grad_norm: 0.880854198039182, iteration: 328095
loss: 0.9867497682571411,grad_norm: 0.7992816911208696, iteration: 328096
loss: 0.9425608515739441,grad_norm: 0.7672798530671397, iteration: 328097
loss: 0.9658445715904236,grad_norm: 0.8231234315637587, iteration: 328098
loss: 1.029448390007019,grad_norm: 0.8213123227894504, iteration: 328099
loss: 1.0154353380203247,grad_norm: 0.8761207285837422, iteration: 328100
loss: 0.9870030879974365,grad_norm: 0.9999992027832174, iteration: 328101
loss: 1.0551552772521973,grad_norm: 0.9999999902934406, iteration: 328102
loss: 1.0160883665084839,grad_norm: 0.9999992455809806, iteration: 328103
loss: 0.9917599558830261,grad_norm: 0.9999991327230892, iteration: 328104
loss: 1.0048199892044067,grad_norm: 0.9003802730009948, iteration: 328105
loss: 0.9944454431533813,grad_norm: 0.810483236343356, iteration: 328106
loss: 0.9632312059402466,grad_norm: 0.6933246604715338, iteration: 328107
loss: 1.0601074695587158,grad_norm: 1.0000000035836865, iteration: 328108
loss: 0.9988837838172913,grad_norm: 0.8340989389285347, iteration: 328109
loss: 0.9907453656196594,grad_norm: 0.9999992801655468, iteration: 328110
loss: 1.0317422151565552,grad_norm: 0.8299159073825746, iteration: 328111
loss: 1.0041648149490356,grad_norm: 0.9999995969229892, iteration: 328112
loss: 0.9979001879692078,grad_norm: 0.8276385257826655, iteration: 328113
loss: 1.033245325088501,grad_norm: 0.8139247054058597, iteration: 328114
loss: 0.9985818266868591,grad_norm: 0.8520089703354503, iteration: 328115
loss: 0.9570299386978149,grad_norm: 0.8456359890281082, iteration: 328116
loss: 0.9832188487052917,grad_norm: 0.7921653928623978, iteration: 328117
loss: 0.964605450630188,grad_norm: 0.9555848416648612, iteration: 328118
loss: 1.0104039907455444,grad_norm: 0.8874987853768858, iteration: 328119
loss: 0.9941543936729431,grad_norm: 0.9022751942470939, iteration: 328120
loss: 0.9914156198501587,grad_norm: 0.8276897990707655, iteration: 328121
loss: 1.0240552425384521,grad_norm: 0.7236337597465505, iteration: 328122
loss: 1.0432065725326538,grad_norm: 0.9999996270347982, iteration: 328123
loss: 1.0581386089324951,grad_norm: 0.9999990636346613, iteration: 328124
loss: 1.0097259283065796,grad_norm: 0.879803252965197, iteration: 328125
loss: 0.9653252363204956,grad_norm: 0.9584122378324895, iteration: 328126
loss: 1.0074129104614258,grad_norm: 0.8101976770055762, iteration: 328127
loss: 0.9790754318237305,grad_norm: 0.751889422403732, iteration: 328128
loss: 0.9977811574935913,grad_norm: 0.7867568848365226, iteration: 328129
loss: 1.0119872093200684,grad_norm: 0.8418117705458448, iteration: 328130
loss: 0.9963967204093933,grad_norm: 0.7754703967274009, iteration: 328131
loss: 1.0199846029281616,grad_norm: 0.9379355721614545, iteration: 328132
loss: 1.007163166999817,grad_norm: 0.8398438321817527, iteration: 328133
loss: 0.9919880628585815,grad_norm: 0.8485210261275575, iteration: 328134
loss: 0.973538339138031,grad_norm: 0.9598456115162552, iteration: 328135
loss: 0.9955712556838989,grad_norm: 0.93889643951227, iteration: 328136
loss: 1.007949948310852,grad_norm: 0.9939586535815443, iteration: 328137
loss: 0.9898829460144043,grad_norm: 0.8007825166517321, iteration: 328138
loss: 1.0111546516418457,grad_norm: 0.9341366715985228, iteration: 328139
loss: 0.9969193935394287,grad_norm: 0.8790475027030706, iteration: 328140
loss: 1.0091294050216675,grad_norm: 0.9788881183589435, iteration: 328141
loss: 0.9965391755104065,grad_norm: 0.646293152123094, iteration: 328142
loss: 0.9695252180099487,grad_norm: 0.7699957548890806, iteration: 328143
loss: 0.9714414477348328,grad_norm: 0.9999990125954257, iteration: 328144
loss: 0.9300063848495483,grad_norm: 0.9159291126702745, iteration: 328145
loss: 0.9754893183708191,grad_norm: 0.9420699964045316, iteration: 328146
loss: 1.0215438604354858,grad_norm: 0.8940113749184875, iteration: 328147
loss: 0.9987822771072388,grad_norm: 0.9102677290244694, iteration: 328148
loss: 0.9905672073364258,grad_norm: 0.8520998175563368, iteration: 328149
loss: 0.9728453159332275,grad_norm: 0.9207266168055559, iteration: 328150
loss: 1.0382704734802246,grad_norm: 0.8674992672536717, iteration: 328151
loss: 1.0086028575897217,grad_norm: 0.77899986388635, iteration: 328152
loss: 1.0188044309616089,grad_norm: 0.7504329642894948, iteration: 328153
loss: 0.9919403195381165,grad_norm: 0.8425837562617506, iteration: 328154
loss: 1.0151492357254028,grad_norm: 0.7621097526545619, iteration: 328155
loss: 0.9818121194839478,grad_norm: 0.7856412111773015, iteration: 328156
loss: 0.9934021830558777,grad_norm: 0.7726527245232565, iteration: 328157
loss: 1.0100584030151367,grad_norm: 0.7240673910600205, iteration: 328158
loss: 0.9888969659805298,grad_norm: 0.9928997176949109, iteration: 328159
loss: 0.9801474809646606,grad_norm: 0.8681844861333117, iteration: 328160
loss: 0.9900667071342468,grad_norm: 0.8393999687122381, iteration: 328161
loss: 1.0716816186904907,grad_norm: 0.9999994858564062, iteration: 328162
loss: 0.9906395077705383,grad_norm: 0.7688869731112637, iteration: 328163
loss: 1.0378345251083374,grad_norm: 0.9999990801182533, iteration: 328164
loss: 0.9898477792739868,grad_norm: 0.9968836124708087, iteration: 328165
loss: 0.9543049335479736,grad_norm: 0.8190223344061592, iteration: 328166
loss: 0.9699058532714844,grad_norm: 0.8118449139043494, iteration: 328167
loss: 0.9946342706680298,grad_norm: 0.8247658844466405, iteration: 328168
loss: 0.9573566317558289,grad_norm: 0.8578434297812987, iteration: 328169
loss: 1.0191903114318848,grad_norm: 0.8107493970222023, iteration: 328170
loss: 0.9960505962371826,grad_norm: 0.8661980150194308, iteration: 328171
loss: 1.010414481163025,grad_norm: 0.7814939745158488, iteration: 328172
loss: 0.9635659456253052,grad_norm: 0.999999310468264, iteration: 328173
loss: 0.9913142323493958,grad_norm: 0.9999990742936775, iteration: 328174
loss: 1.032870888710022,grad_norm: 0.8108195439502532, iteration: 328175
loss: 1.010068655014038,grad_norm: 0.8889838648949269, iteration: 328176
loss: 0.9692206978797913,grad_norm: 0.8660555161488809, iteration: 328177
loss: 1.0140318870544434,grad_norm: 0.8916672288335058, iteration: 328178
loss: 0.9734954833984375,grad_norm: 0.8551848140263096, iteration: 328179
loss: 1.0397120714187622,grad_norm: 0.792444464606674, iteration: 328180
loss: 0.990601658821106,grad_norm: 0.8634053557188598, iteration: 328181
loss: 1.0254063606262207,grad_norm: 0.8698174662210909, iteration: 328182
loss: 1.0260305404663086,grad_norm: 0.7194034892786898, iteration: 328183
loss: 1.0374703407287598,grad_norm: 0.8349328735449384, iteration: 328184
loss: 1.0390819311141968,grad_norm: 0.9704965780796031, iteration: 328185
loss: 0.9693373441696167,grad_norm: 0.8076761206516329, iteration: 328186
loss: 0.9868997931480408,grad_norm: 0.9447656300410806, iteration: 328187
loss: 0.9832852482795715,grad_norm: 0.9146766064212652, iteration: 328188
loss: 1.0536245107650757,grad_norm: 0.8337933558786617, iteration: 328189
loss: 1.0454477071762085,grad_norm: 0.9999998232560745, iteration: 328190
loss: 1.0370243787765503,grad_norm: 0.8983115031002721, iteration: 328191
loss: 0.9546947479248047,grad_norm: 0.8427404359563123, iteration: 328192
loss: 0.9479794502258301,grad_norm: 0.9449629689369543, iteration: 328193
loss: 0.9973832368850708,grad_norm: 0.8374357155673948, iteration: 328194
loss: 1.0356025695800781,grad_norm: 0.9999995737198822, iteration: 328195
loss: 1.0200855731964111,grad_norm: 0.7474596927739844, iteration: 328196
loss: 1.010796308517456,grad_norm: 0.8464521883876295, iteration: 328197
loss: 0.9566966891288757,grad_norm: 0.8745556710029715, iteration: 328198
loss: 1.0325775146484375,grad_norm: 0.7794600311301357, iteration: 328199
loss: 0.9741798043251038,grad_norm: 0.926068975786527, iteration: 328200
loss: 1.018810510635376,grad_norm: 0.835647463982821, iteration: 328201
loss: 1.0223917961120605,grad_norm: 0.8168560812923051, iteration: 328202
loss: 1.0274543762207031,grad_norm: 0.8883894419399248, iteration: 328203
loss: 0.9883975386619568,grad_norm: 0.9999990049991299, iteration: 328204
loss: 0.988940417766571,grad_norm: 0.8680490929664784, iteration: 328205
loss: 1.022273063659668,grad_norm: 0.9023364685264675, iteration: 328206
loss: 1.029417872428894,grad_norm: 0.9999991447102159, iteration: 328207
loss: 0.9866636395454407,grad_norm: 0.9377189199344333, iteration: 328208
loss: 1.0558524131774902,grad_norm: 0.7594022465639527, iteration: 328209
loss: 0.9966878890991211,grad_norm: 0.8137490159236007, iteration: 328210
loss: 1.0296638011932373,grad_norm: 0.9999993377644673, iteration: 328211
loss: 0.9740116000175476,grad_norm: 0.7387545325266053, iteration: 328212
loss: 0.9571924805641174,grad_norm: 0.9999991377472168, iteration: 328213
loss: 0.973811686038971,grad_norm: 0.8921845131881633, iteration: 328214
loss: 1.0303715467453003,grad_norm: 0.8606965655122475, iteration: 328215
loss: 0.9985923767089844,grad_norm: 0.821707458998569, iteration: 328216
loss: 1.0041753053665161,grad_norm: 0.9999997762486456, iteration: 328217
loss: 0.9977269768714905,grad_norm: 0.9999990716318707, iteration: 328218
loss: 0.9810855388641357,grad_norm: 0.8473762014474018, iteration: 328219
loss: 1.035844326019287,grad_norm: 0.9198593291708064, iteration: 328220
loss: 1.0035446882247925,grad_norm: 0.8924094137535973, iteration: 328221
loss: 1.014816403388977,grad_norm: 0.870663230157231, iteration: 328222
loss: 1.0182300806045532,grad_norm: 0.9999990834688727, iteration: 328223
loss: 0.991706371307373,grad_norm: 0.9339695905088538, iteration: 328224
loss: 1.0400842428207397,grad_norm: 0.8850034387360414, iteration: 328225
loss: 0.9750659465789795,grad_norm: 0.8141856474019853, iteration: 328226
loss: 0.9807868599891663,grad_norm: 0.8332447425239901, iteration: 328227
loss: 0.9891074895858765,grad_norm: 0.8188039114477078, iteration: 328228
loss: 0.9981593489646912,grad_norm: 0.7677823165770319, iteration: 328229
loss: 0.9890398383140564,grad_norm: 0.8215383058943054, iteration: 328230
loss: 0.9675997495651245,grad_norm: 0.8263441984610597, iteration: 328231
loss: 1.063562273979187,grad_norm: 0.9088956119071904, iteration: 328232
loss: 1.0202940702438354,grad_norm: 0.9189107743677803, iteration: 328233
loss: 1.0013644695281982,grad_norm: 0.8203489367105047, iteration: 328234
loss: 0.9649745225906372,grad_norm: 0.9270734443352667, iteration: 328235
loss: 1.016929030418396,grad_norm: 0.7796087157877886, iteration: 328236
loss: 1.042410135269165,grad_norm: 0.848812391597064, iteration: 328237
loss: 0.9838913679122925,grad_norm: 0.8377277430540055, iteration: 328238
loss: 0.9875280261039734,grad_norm: 0.8385628353734316, iteration: 328239
loss: 1.0026367902755737,grad_norm: 0.9007891272724642, iteration: 328240
loss: 1.0124551057815552,grad_norm: 0.7445602619431869, iteration: 328241
loss: 0.9667549729347229,grad_norm: 0.9729500537673361, iteration: 328242
loss: 1.0711097717285156,grad_norm: 0.776706364843015, iteration: 328243
loss: 1.0030778646469116,grad_norm: 0.8669962038427937, iteration: 328244
loss: 1.0153425931930542,grad_norm: 0.9197020581715235, iteration: 328245
loss: 1.0069903135299683,grad_norm: 1.000000031535446, iteration: 328246
loss: 0.9898346066474915,grad_norm: 0.7925470668912988, iteration: 328247
loss: 1.0198644399642944,grad_norm: 0.9107887296787468, iteration: 328248
loss: 1.003618836402893,grad_norm: 0.7246969670539719, iteration: 328249
loss: 1.0056986808776855,grad_norm: 0.8564885427613456, iteration: 328250
loss: 0.9652184844017029,grad_norm: 0.8375958810848969, iteration: 328251
loss: 0.9823673367500305,grad_norm: 0.8646098878285939, iteration: 328252
loss: 0.9964175820350647,grad_norm: 0.9347229898570882, iteration: 328253
loss: 0.991477906703949,grad_norm: 0.7320691491187109, iteration: 328254
loss: 1.0372501611709595,grad_norm: 0.817053828015177, iteration: 328255
loss: 0.9628612399101257,grad_norm: 0.8209214958362283, iteration: 328256
loss: 1.0094316005706787,grad_norm: 0.85429609727833, iteration: 328257
loss: 0.9943667054176331,grad_norm: 0.8172681758793937, iteration: 328258
loss: 0.9980515241622925,grad_norm: 0.8253704523493249, iteration: 328259
loss: 0.9851709604263306,grad_norm: 0.7650087007253376, iteration: 328260
loss: 1.0287827253341675,grad_norm: 0.8505472322873067, iteration: 328261
loss: 1.0353602170944214,grad_norm: 0.9999996681628083, iteration: 328262
loss: 0.9931317567825317,grad_norm: 0.8624756449636259, iteration: 328263
loss: 0.9841835498809814,grad_norm: 0.6758956133880063, iteration: 328264
loss: 1.0092885494232178,grad_norm: 0.8630260020169331, iteration: 328265
loss: 1.0066192150115967,grad_norm: 0.8252551856258555, iteration: 328266
loss: 1.0179051160812378,grad_norm: 0.99999903518314, iteration: 328267
loss: 1.0255317687988281,grad_norm: 0.7451939650686175, iteration: 328268
loss: 0.9932968020439148,grad_norm: 0.7887143887222141, iteration: 328269
loss: 0.9602450728416443,grad_norm: 0.9164189248771375, iteration: 328270
loss: 1.0470566749572754,grad_norm: 0.7441427114619156, iteration: 328271
loss: 1.0226026773452759,grad_norm: 0.9999990950093764, iteration: 328272
loss: 1.010346531867981,grad_norm: 0.8443036199497377, iteration: 328273
loss: 0.9874823093414307,grad_norm: 0.9531409354733151, iteration: 328274
loss: 1.0027273893356323,grad_norm: 0.9999989425427656, iteration: 328275
loss: 0.9997420907020569,grad_norm: 0.7745956980252812, iteration: 328276
loss: 1.0167522430419922,grad_norm: 0.945097615662598, iteration: 328277
loss: 1.0095678567886353,grad_norm: 0.9025333018664374, iteration: 328278
loss: 0.9789643883705139,grad_norm: 0.7597991897225107, iteration: 328279
loss: 1.0159244537353516,grad_norm: 0.9999989820156366, iteration: 328280
loss: 0.9852192997932434,grad_norm: 0.7566482422626142, iteration: 328281
loss: 0.9710357189178467,grad_norm: 0.8329801566502482, iteration: 328282
loss: 0.9848983287811279,grad_norm: 0.9314744377944839, iteration: 328283
loss: 0.9962894916534424,grad_norm: 0.9999990309248964, iteration: 328284
loss: 1.0096503496170044,grad_norm: 0.8051758571616442, iteration: 328285
loss: 0.9652538299560547,grad_norm: 0.7983152048726209, iteration: 328286
loss: 1.070630669593811,grad_norm: 0.999999133682119, iteration: 328287
loss: 1.003868818283081,grad_norm: 0.7837451422769564, iteration: 328288
loss: 1.0273873805999756,grad_norm: 0.7599799426119024, iteration: 328289
loss: 0.9891538619995117,grad_norm: 0.8726173065591191, iteration: 328290
loss: 1.0132805109024048,grad_norm: 0.8552002360779382, iteration: 328291
loss: 0.9812875986099243,grad_norm: 0.6125341763218756, iteration: 328292
loss: 0.9980558156967163,grad_norm: 0.924504546415086, iteration: 328293
loss: 1.0234614610671997,grad_norm: 0.8561644440319615, iteration: 328294
loss: 1.0157275199890137,grad_norm: 0.8853205585256814, iteration: 328295
loss: 0.9863924980163574,grad_norm: 0.8202275311889607, iteration: 328296
loss: 0.9970861077308655,grad_norm: 0.873391460101103, iteration: 328297
loss: 1.0246046781539917,grad_norm: 0.9467051956168456, iteration: 328298
loss: 0.9781503677368164,grad_norm: 0.8146561272049269, iteration: 328299
loss: 1.006654143333435,grad_norm: 0.6988382778160936, iteration: 328300
loss: 1.0070370435714722,grad_norm: 0.7695492445578475, iteration: 328301
loss: 0.9905365705490112,grad_norm: 0.9999990926076754, iteration: 328302
loss: 0.9813372492790222,grad_norm: 0.9999991565938864, iteration: 328303
loss: 1.0120857954025269,grad_norm: 0.6977981055602973, iteration: 328304
loss: 0.996614396572113,grad_norm: 0.6894779240536806, iteration: 328305
loss: 0.9829753041267395,grad_norm: 0.8068601062518287, iteration: 328306
loss: 0.9991973638534546,grad_norm: 0.856722038820935, iteration: 328307
loss: 1.0069986581802368,grad_norm: 0.6973089222970961, iteration: 328308
loss: 0.9660930633544922,grad_norm: 0.9418642609057282, iteration: 328309
loss: 0.988880455493927,grad_norm: 0.8978324122746204, iteration: 328310
loss: 1.0028027296066284,grad_norm: 0.9999990921283427, iteration: 328311
loss: 0.9653398394584656,grad_norm: 0.8020444743656874, iteration: 328312
loss: 1.022125482559204,grad_norm: 0.8579917402034121, iteration: 328313
loss: 1.0243767499923706,grad_norm: 0.9999990528108406, iteration: 328314
loss: 1.0085296630859375,grad_norm: 0.8604100456886681, iteration: 328315
loss: 0.9759956002235413,grad_norm: 0.9269823699022791, iteration: 328316
loss: 1.0619360208511353,grad_norm: 0.8055611300477062, iteration: 328317
loss: 1.0278679132461548,grad_norm: 0.9209001765548829, iteration: 328318
loss: 1.0420572757720947,grad_norm: 0.9284373471850282, iteration: 328319
loss: 0.9955688118934631,grad_norm: 0.7058328688660603, iteration: 328320
loss: 1.0228866338729858,grad_norm: 0.8305470759950702, iteration: 328321
loss: 1.1105706691741943,grad_norm: 0.8084119417868624, iteration: 328322
loss: 1.0135242938995361,grad_norm: 0.9644766499024002, iteration: 328323
loss: 0.9988276362419128,grad_norm: 0.9188302172328129, iteration: 328324
loss: 1.0259528160095215,grad_norm: 0.6995828257048031, iteration: 328325
loss: 1.0062189102172852,grad_norm: 0.9502636143990334, iteration: 328326
loss: 0.983740508556366,grad_norm: 0.7996678025224322, iteration: 328327
loss: 0.9765238165855408,grad_norm: 0.8365907010424041, iteration: 328328
loss: 1.004914402961731,grad_norm: 0.9558882774011065, iteration: 328329
loss: 0.9930348992347717,grad_norm: 0.8706904782033537, iteration: 328330
loss: 1.0125458240509033,grad_norm: 0.9013626185629527, iteration: 328331
loss: 1.0031334161758423,grad_norm: 0.9999990698027104, iteration: 328332
loss: 0.9683375954627991,grad_norm: 0.8967797822651763, iteration: 328333
loss: 1.0550259351730347,grad_norm: 0.999999349710774, iteration: 328334
loss: 0.9881401658058167,grad_norm: 0.8631758013619644, iteration: 328335
loss: 1.0081098079681396,grad_norm: 0.9999994037442606, iteration: 328336
loss: 0.9955967664718628,grad_norm: 0.934607974327073, iteration: 328337
loss: 0.9877071380615234,grad_norm: 0.7890174317754494, iteration: 328338
loss: 0.9801912307739258,grad_norm: 0.9999990210745197, iteration: 328339
loss: 1.0505486726760864,grad_norm: 0.7911918526236489, iteration: 328340
loss: 1.018147587776184,grad_norm: 0.885175282349822, iteration: 328341
loss: 0.9865449070930481,grad_norm: 0.9786675618740066, iteration: 328342
loss: 0.9606173038482666,grad_norm: 0.7030012105809006, iteration: 328343
loss: 1.0041321516036987,grad_norm: 0.8875678728635118, iteration: 328344
loss: 1.0160578489303589,grad_norm: 0.9999996041513128, iteration: 328345
loss: 0.99187171459198,grad_norm: 0.7902225435974086, iteration: 328346
loss: 0.9909433126449585,grad_norm: 0.9999995313339622, iteration: 328347
loss: 0.9866767525672913,grad_norm: 0.7258740644087603, iteration: 328348
loss: 0.989425539970398,grad_norm: 0.8177548464708415, iteration: 328349
loss: 0.9720416069030762,grad_norm: 0.8002666091270273, iteration: 328350
loss: 1.0104418992996216,grad_norm: 0.8184993470479639, iteration: 328351
loss: 1.047737956047058,grad_norm: 0.9999991460047284, iteration: 328352
loss: 0.9901314377784729,grad_norm: 0.7669769744289148, iteration: 328353
loss: 1.0034821033477783,grad_norm: 0.9999990637703959, iteration: 328354
loss: 0.9816034436225891,grad_norm: 0.8487922927274687, iteration: 328355
loss: 0.9640973806381226,grad_norm: 0.9999994291765234, iteration: 328356
loss: 1.0008430480957031,grad_norm: 0.8503787799595861, iteration: 328357
loss: 1.0274733304977417,grad_norm: 0.739728467561545, iteration: 328358
loss: 0.9729869961738586,grad_norm: 0.9031368940881365, iteration: 328359
loss: 1.011811375617981,grad_norm: 0.8165221710331619, iteration: 328360
loss: 1.0236191749572754,grad_norm: 0.9071120361128537, iteration: 328361
loss: 1.0167274475097656,grad_norm: 0.9157686500489266, iteration: 328362
loss: 1.0158698558807373,grad_norm: 0.7083231474201336, iteration: 328363
loss: 1.0006968975067139,grad_norm: 0.7079507775754185, iteration: 328364
loss: 0.9989727735519409,grad_norm: 0.7918967730006062, iteration: 328365
loss: 0.9815819263458252,grad_norm: 0.7995892197063892, iteration: 328366
loss: 0.9954328536987305,grad_norm: 0.8793463973264192, iteration: 328367
loss: 1.0179470777511597,grad_norm: 0.7093591136241071, iteration: 328368
loss: 0.9780751466751099,grad_norm: 0.9420093484417833, iteration: 328369
loss: 0.9959802627563477,grad_norm: 0.9391596075985331, iteration: 328370
loss: 0.9860713481903076,grad_norm: 0.8089181550368919, iteration: 328371
loss: 1.0099482536315918,grad_norm: 0.7810967190368805, iteration: 328372
loss: 0.960117757320404,grad_norm: 0.8816102868632615, iteration: 328373
loss: 1.0249098539352417,grad_norm: 0.8448072135471194, iteration: 328374
loss: 0.9713881611824036,grad_norm: 0.9161459567493615, iteration: 328375
loss: 1.0220948457717896,grad_norm: 0.8122927772438105, iteration: 328376
loss: 0.9610018730163574,grad_norm: 0.8498884367391464, iteration: 328377
loss: 0.9853529930114746,grad_norm: 0.8482139797752652, iteration: 328378
loss: 1.0254820585250854,grad_norm: 0.9999991514321377, iteration: 328379
loss: 1.0127122402191162,grad_norm: 0.9999994472454439, iteration: 328380
loss: 1.023313283920288,grad_norm: 0.8639751522865596, iteration: 328381
loss: 1.020137906074524,grad_norm: 0.9898581236016057, iteration: 328382
loss: 1.0862635374069214,grad_norm: 0.9999996983656646, iteration: 328383
loss: 0.9998648166656494,grad_norm: 0.8997573527345263, iteration: 328384
loss: 0.9874233603477478,grad_norm: 0.7453546558375069, iteration: 328385
loss: 1.0124766826629639,grad_norm: 0.7460164194776013, iteration: 328386
loss: 1.0017906427383423,grad_norm: 0.9999990824403512, iteration: 328387
loss: 1.0354636907577515,grad_norm: 0.82001631907651, iteration: 328388
loss: 0.9872289896011353,grad_norm: 0.9999991304704818, iteration: 328389
loss: 1.0164533853530884,grad_norm: 0.985075435793892, iteration: 328390
loss: 0.9998825788497925,grad_norm: 0.9034904069390326, iteration: 328391
loss: 0.9950344562530518,grad_norm: 0.8018533874220882, iteration: 328392
loss: 1.0139923095703125,grad_norm: 0.8024098560249284, iteration: 328393
loss: 0.9887308478355408,grad_norm: 0.9009121205733452, iteration: 328394
loss: 1.1109129190444946,grad_norm: 0.9999996681757378, iteration: 328395
loss: 1.0306397676467896,grad_norm: 0.7191094346788077, iteration: 328396
loss: 1.0138763189315796,grad_norm: 0.9999993955136467, iteration: 328397
loss: 1.0149822235107422,grad_norm: 0.8902836141561249, iteration: 328398
loss: 1.018385410308838,grad_norm: 0.9061699187342426, iteration: 328399
loss: 0.9936887621879578,grad_norm: 0.818519733974312, iteration: 328400
loss: 1.0032882690429688,grad_norm: 0.8989213235410831, iteration: 328401
loss: 1.033207654953003,grad_norm: 0.9999994372026377, iteration: 328402
loss: 1.1107300519943237,grad_norm: 0.999999584042668, iteration: 328403
loss: 0.9851120114326477,grad_norm: 0.8306952691041511, iteration: 328404
loss: 1.0264010429382324,grad_norm: 0.8715983075647948, iteration: 328405
loss: 0.9855837821960449,grad_norm: 0.7907661180525161, iteration: 328406
loss: 0.9986611008644104,grad_norm: 0.8979586711290382, iteration: 328407
loss: 1.0331535339355469,grad_norm: 0.8357204857168476, iteration: 328408
loss: 0.9813581705093384,grad_norm: 0.9999991148176325, iteration: 328409
loss: 1.0001757144927979,grad_norm: 0.7711893630447467, iteration: 328410
loss: 0.9807826280593872,grad_norm: 0.9999999956542766, iteration: 328411
loss: 0.9989118576049805,grad_norm: 0.9999991447264486, iteration: 328412
loss: 1.0203949213027954,grad_norm: 0.9999999935953151, iteration: 328413
loss: 0.9496490955352783,grad_norm: 0.7277618063814588, iteration: 328414
loss: 0.9717316627502441,grad_norm: 0.9233857721910435, iteration: 328415
loss: 0.9668364524841309,grad_norm: 0.8907904233511208, iteration: 328416
loss: 0.9907557964324951,grad_norm: 0.8305170941353542, iteration: 328417
loss: 0.9784937500953674,grad_norm: 0.8009868463706781, iteration: 328418
loss: 0.9973400235176086,grad_norm: 0.8211465623303558, iteration: 328419
loss: 1.0421677827835083,grad_norm: 0.9806777453509959, iteration: 328420
loss: 1.0204483270645142,grad_norm: 0.7832110713101615, iteration: 328421
loss: 1.0026267766952515,grad_norm: 0.75449329275206, iteration: 328422
loss: 1.0253398418426514,grad_norm: 0.9455259160904388, iteration: 328423
loss: 0.9813512563705444,grad_norm: 0.9405934239903861, iteration: 328424
loss: 0.9718989133834839,grad_norm: 0.735850487070196, iteration: 328425
loss: 0.9822404980659485,grad_norm: 0.8383662551365029, iteration: 328426
loss: 1.0142016410827637,grad_norm: 0.8925150697986166, iteration: 328427
loss: 1.0289552211761475,grad_norm: 0.9555628791385714, iteration: 328428
loss: 1.0621353387832642,grad_norm: 0.8427753729464286, iteration: 328429
loss: 1.0317493677139282,grad_norm: 0.7676377895358192, iteration: 328430
loss: 0.9827821254730225,grad_norm: 0.9999991261423267, iteration: 328431
loss: 1.0453402996063232,grad_norm: 0.7878767460880574, iteration: 328432
loss: 1.0161607265472412,grad_norm: 0.8133655031745494, iteration: 328433
loss: 0.9873254895210266,grad_norm: 0.9999991162933043, iteration: 328434
loss: 1.0231947898864746,grad_norm: 0.8288375128679942, iteration: 328435
loss: 0.9554031491279602,grad_norm: 0.7954273376214127, iteration: 328436
loss: 0.9817030429840088,grad_norm: 0.7895094419519385, iteration: 328437
loss: 0.9702354073524475,grad_norm: 0.982321059856131, iteration: 328438
loss: 0.996384859085083,grad_norm: 0.8396845941396763, iteration: 328439
loss: 1.0074657201766968,grad_norm: 0.9620594740081281, iteration: 328440
loss: 0.9884039163589478,grad_norm: 0.7984443675876611, iteration: 328441
loss: 1.0156688690185547,grad_norm: 0.9278276597729219, iteration: 328442
loss: 0.9991239309310913,grad_norm: 0.7381933417059411, iteration: 328443
loss: 0.9993963837623596,grad_norm: 0.8966385880161247, iteration: 328444
loss: 1.0084333419799805,grad_norm: 0.927691315056101, iteration: 328445
loss: 1.0457932949066162,grad_norm: 0.9537350921019938, iteration: 328446
loss: 1.0135676860809326,grad_norm: 0.9052068449659169, iteration: 328447
loss: 1.0130616426467896,grad_norm: 0.8778589522180928, iteration: 328448
loss: 0.9943849444389343,grad_norm: 0.8912725324743338, iteration: 328449
loss: 0.9992004632949829,grad_norm: 0.7709926865868837, iteration: 328450
loss: 1.064268946647644,grad_norm: 0.999999819740525, iteration: 328451
loss: 1.0115638971328735,grad_norm: 0.722777098623937, iteration: 328452
loss: 1.007427453994751,grad_norm: 0.6955376333996544, iteration: 328453
loss: 0.9965108036994934,grad_norm: 0.6755781816846872, iteration: 328454
loss: 1.009044885635376,grad_norm: 0.999999076244798, iteration: 328455
loss: 1.0142518281936646,grad_norm: 0.9999991131610775, iteration: 328456
loss: 0.9747523665428162,grad_norm: 0.7155393405620181, iteration: 328457
loss: 0.9562376737594604,grad_norm: 0.9999997055084702, iteration: 328458
loss: 0.9893953204154968,grad_norm: 0.9999991492953317, iteration: 328459
loss: 1.0221011638641357,grad_norm: 0.9999991969323078, iteration: 328460
loss: 1.0047215223312378,grad_norm: 0.9681574717263799, iteration: 328461
loss: 1.0033595561981201,grad_norm: 0.9999997657740088, iteration: 328462
loss: 1.0106395483016968,grad_norm: 0.812212293451651, iteration: 328463
loss: 1.0382684469223022,grad_norm: 0.8116497760293044, iteration: 328464
loss: 1.0350924730300903,grad_norm: 0.9999992006388243, iteration: 328465
loss: 0.9888370633125305,grad_norm: 0.8097813378319184, iteration: 328466
loss: 0.9542356133460999,grad_norm: 0.9488798035528971, iteration: 328467
loss: 1.0131688117980957,grad_norm: 0.9999998950414978, iteration: 328468
loss: 1.0848695039749146,grad_norm: 0.9999991120831027, iteration: 328469
loss: 1.0180425643920898,grad_norm: 0.775320625078692, iteration: 328470
loss: 0.9668629169464111,grad_norm: 0.783884709396711, iteration: 328471
loss: 1.0204890966415405,grad_norm: 0.9727743556551675, iteration: 328472
loss: 0.9995782375335693,grad_norm: 0.8033165848135544, iteration: 328473
loss: 1.0260660648345947,grad_norm: 0.999999421107479, iteration: 328474
loss: 1.0339659452438354,grad_norm: 0.8373955916105899, iteration: 328475
loss: 1.0365376472473145,grad_norm: 0.7454732184075396, iteration: 328476
loss: 1.109133005142212,grad_norm: 0.9999993529370497, iteration: 328477
loss: 1.0251317024230957,grad_norm: 0.8149590121741239, iteration: 328478
loss: 0.9840312004089355,grad_norm: 0.9088958788340356, iteration: 328479
loss: 0.9642084240913391,grad_norm: 0.7868864892013289, iteration: 328480
loss: 0.9692417979240417,grad_norm: 0.8621751167035565, iteration: 328481
loss: 0.9849014282226562,grad_norm: 0.8371720704695651, iteration: 328482
loss: 1.028594732284546,grad_norm: 0.9999991137225784, iteration: 328483
loss: 1.068453311920166,grad_norm: 0.9999993319995597, iteration: 328484
loss: 0.9882481098175049,grad_norm: 0.7867685074223988, iteration: 328485
loss: 1.0163606405258179,grad_norm: 0.7508350062328052, iteration: 328486
loss: 0.9682213068008423,grad_norm: 0.6419193899217664, iteration: 328487
loss: 0.9829524755477905,grad_norm: 0.7295605147506892, iteration: 328488
loss: 0.9905846118927002,grad_norm: 0.9999992473664847, iteration: 328489
loss: 0.9971215128898621,grad_norm: 0.7750503840462057, iteration: 328490
loss: 0.9910066723823547,grad_norm: 0.84731273490475, iteration: 328491
loss: 0.986804187297821,grad_norm: 0.9004834102005828, iteration: 328492
loss: 1.0055314302444458,grad_norm: 0.9695180004078076, iteration: 328493
loss: 0.9845827221870422,grad_norm: 0.8580794406876873, iteration: 328494
loss: 0.995802104473114,grad_norm: 0.7404105009718437, iteration: 328495
loss: 1.0095106363296509,grad_norm: 0.8619993303232332, iteration: 328496
loss: 0.980448842048645,grad_norm: 0.9999999132317541, iteration: 328497
loss: 1.003360390663147,grad_norm: 0.9999992519026835, iteration: 328498
loss: 1.0530836582183838,grad_norm: 0.9999991750514712, iteration: 328499
loss: 0.9884874224662781,grad_norm: 0.8130709606945569, iteration: 328500
loss: 0.9961985945701599,grad_norm: 0.9980338059031533, iteration: 328501
loss: 0.9954936504364014,grad_norm: 0.9311532124882843, iteration: 328502
loss: 1.0300014019012451,grad_norm: 0.7260202771446425, iteration: 328503
loss: 1.0392611026763916,grad_norm: 0.999999463055045, iteration: 328504
loss: 1.0323786735534668,grad_norm: 0.8212447392391579, iteration: 328505
loss: 1.0215266942977905,grad_norm: 0.7583136636276137, iteration: 328506
loss: 0.9795320630073547,grad_norm: 0.8624204211132991, iteration: 328507
loss: 0.9811444878578186,grad_norm: 0.7946243745850093, iteration: 328508
loss: 1.0097016096115112,grad_norm: 0.8250457027077683, iteration: 328509
loss: 0.9982204437255859,grad_norm: 0.8228091283602521, iteration: 328510
loss: 1.0227835178375244,grad_norm: 0.9880246797159337, iteration: 328511
loss: 1.0251160860061646,grad_norm: 0.9999994132111584, iteration: 328512
loss: 0.9906824827194214,grad_norm: 0.9999990998544368, iteration: 328513
loss: 0.9754216074943542,grad_norm: 0.8820585630451672, iteration: 328514
loss: 0.9954893589019775,grad_norm: 0.9999993140116156, iteration: 328515
loss: 1.0147373676300049,grad_norm: 0.8649290568238762, iteration: 328516
loss: 1.0014532804489136,grad_norm: 0.9160308807479781, iteration: 328517
loss: 1.1041371822357178,grad_norm: 0.9999996796070321, iteration: 328518
loss: 0.9638210535049438,grad_norm: 0.8955190569259295, iteration: 328519
loss: 0.9747004508972168,grad_norm: 0.7212402973321129, iteration: 328520
loss: 1.035954236984253,grad_norm: 0.8804761267770014, iteration: 328521
loss: 0.9771570563316345,grad_norm: 0.840061366038968, iteration: 328522
loss: 0.9889679551124573,grad_norm: 0.7686814839633392, iteration: 328523
loss: 1.0083931684494019,grad_norm: 0.9999997036557259, iteration: 328524
loss: 0.9991527199745178,grad_norm: 0.857298732545068, iteration: 328525
loss: 0.9783081412315369,grad_norm: 0.9141292602089821, iteration: 328526
loss: 0.982327938079834,grad_norm: 0.8449387785028785, iteration: 328527
loss: 1.002171516418457,grad_norm: 0.9999991500062244, iteration: 328528
loss: 1.0282524824142456,grad_norm: 0.9757672988018771, iteration: 328529
loss: 1.0665608644485474,grad_norm: 0.7919191437126688, iteration: 328530
loss: 1.0156077146530151,grad_norm: 0.7811741040959349, iteration: 328531
loss: 0.9734959006309509,grad_norm: 0.8617365290674929, iteration: 328532
loss: 0.9749253392219543,grad_norm: 0.8445028903054632, iteration: 328533
loss: 1.0105401277542114,grad_norm: 0.7672311019680104, iteration: 328534
loss: 1.0398832559585571,grad_norm: 0.6931734872130318, iteration: 328535
loss: 1.0061612129211426,grad_norm: 0.7971446836867674, iteration: 328536
loss: 0.9716686010360718,grad_norm: 0.8130037841708825, iteration: 328537
loss: 1.0329203605651855,grad_norm: 0.9999993006504833, iteration: 328538
loss: 1.014877200126648,grad_norm: 0.9209129954557999, iteration: 328539
loss: 0.9786577820777893,grad_norm: 0.8727341785876606, iteration: 328540
loss: 1.0614572763442993,grad_norm: 0.9999990543755392, iteration: 328541
loss: 1.0101860761642456,grad_norm: 0.8552661444559047, iteration: 328542
loss: 1.0142675638198853,grad_norm: 0.7047732460676687, iteration: 328543
loss: 0.9958609938621521,grad_norm: 0.9390787392043408, iteration: 328544
loss: 1.0200241804122925,grad_norm: 0.9816942706656503, iteration: 328545
loss: 1.0175343751907349,grad_norm: 0.8464089186507807, iteration: 328546
loss: 0.9627481698989868,grad_norm: 0.9778012295730343, iteration: 328547
loss: 0.9287364482879639,grad_norm: 0.897051300643715, iteration: 328548
loss: 0.9922342896461487,grad_norm: 0.9999991939800384, iteration: 328549
loss: 0.9785143136978149,grad_norm: 0.999998986458376, iteration: 328550
loss: 1.015534520149231,grad_norm: 0.999999086482195, iteration: 328551
loss: 0.9891612529754639,grad_norm: 0.8418492881114896, iteration: 328552
loss: 1.0225977897644043,grad_norm: 0.9348503877981553, iteration: 328553
loss: 1.01177978515625,grad_norm: 0.8171050624859408, iteration: 328554
loss: 1.0210703611373901,grad_norm: 0.9999990979561051, iteration: 328555
loss: 1.0053328275680542,grad_norm: 0.9188796756538425, iteration: 328556
loss: 1.03035569190979,grad_norm: 0.9069018359402693, iteration: 328557
loss: 0.9604920744895935,grad_norm: 0.7891813661941194, iteration: 328558
loss: 0.9992384910583496,grad_norm: 0.7695074350093947, iteration: 328559
loss: 1.01214599609375,grad_norm: 0.9999990031004479, iteration: 328560
loss: 0.9947974681854248,grad_norm: 0.8078973307863269, iteration: 328561
loss: 1.0352102518081665,grad_norm: 0.9884453103165142, iteration: 328562
loss: 1.004388689994812,grad_norm: 0.7452855852876162, iteration: 328563
loss: 1.0076498985290527,grad_norm: 0.7193512503015296, iteration: 328564
loss: 1.0063297748565674,grad_norm: 0.8037276338004824, iteration: 328565
loss: 0.9886674880981445,grad_norm: 0.7782901374168362, iteration: 328566
loss: 1.0059869289398193,grad_norm: 0.9999990440322556, iteration: 328567
loss: 1.046149492263794,grad_norm: 0.815092925481789, iteration: 328568
loss: 0.9783561825752258,grad_norm: 0.7945044486556546, iteration: 328569
loss: 0.9898271560668945,grad_norm: 0.8378830119548623, iteration: 328570
loss: 0.9900237917900085,grad_norm: 0.7209850222429108, iteration: 328571
loss: 1.0071282386779785,grad_norm: 0.8608003528191683, iteration: 328572
loss: 0.9973932504653931,grad_norm: 0.9999992502781176, iteration: 328573
loss: 0.995589554309845,grad_norm: 0.9999991768416179, iteration: 328574
loss: 1.0011389255523682,grad_norm: 0.9999998078281758, iteration: 328575
loss: 0.9785788655281067,grad_norm: 0.8328505587372199, iteration: 328576
loss: 1.0291529893875122,grad_norm: 0.782300637819259, iteration: 328577
loss: 1.019140601158142,grad_norm: 0.8384999882244281, iteration: 328578
loss: 1.03043532371521,grad_norm: 0.9470357731207119, iteration: 328579
loss: 1.0091487169265747,grad_norm: 0.7885829240176595, iteration: 328580
loss: 1.0072330236434937,grad_norm: 0.7932944812775996, iteration: 328581
loss: 0.9746321439743042,grad_norm: 0.9999990671429639, iteration: 328582
loss: 0.9954620599746704,grad_norm: 0.8337055512821914, iteration: 328583
loss: 0.9798131585121155,grad_norm: 0.9464609485808804, iteration: 328584
loss: 1.0341212749481201,grad_norm: 0.8236367779688551, iteration: 328585
loss: 1.02714204788208,grad_norm: 0.7980382924155526, iteration: 328586
loss: 0.9824275970458984,grad_norm: 0.9092193924646196, iteration: 328587
loss: 0.9715597033500671,grad_norm: 0.9369616878422359, iteration: 328588
loss: 1.0412527322769165,grad_norm: 0.9148776763129483, iteration: 328589
loss: 0.97563236951828,grad_norm: 0.7404564815979828, iteration: 328590
loss: 0.9654076099395752,grad_norm: 0.9999993470452863, iteration: 328591
loss: 1.0000795125961304,grad_norm: 0.9950830691661862, iteration: 328592
loss: 0.9655655026435852,grad_norm: 0.7152981363942925, iteration: 328593
loss: 0.9695746302604675,grad_norm: 0.999998987097244, iteration: 328594
loss: 1.0426673889160156,grad_norm: 0.9999993093575782, iteration: 328595
loss: 0.9869811534881592,grad_norm: 0.7074731615066637, iteration: 328596
loss: 1.1351066827774048,grad_norm: 0.9999991712154332, iteration: 328597
loss: 1.0282623767852783,grad_norm: 0.8427866008679962, iteration: 328598
loss: 1.0072312355041504,grad_norm: 0.9375538269756734, iteration: 328599
loss: 0.9862208366394043,grad_norm: 0.8031606498971863, iteration: 328600
loss: 0.9991526007652283,grad_norm: 0.930115250989166, iteration: 328601
loss: 1.0191131830215454,grad_norm: 0.7636715926951726, iteration: 328602
loss: 0.9885143041610718,grad_norm: 0.7690860567318111, iteration: 328603
loss: 1.0956064462661743,grad_norm: 0.8771928111050392, iteration: 328604
loss: 0.982281506061554,grad_norm: 0.7034653336973113, iteration: 328605
loss: 1.017418622970581,grad_norm: 0.8459396049826335, iteration: 328606
loss: 1.0254039764404297,grad_norm: 0.9999992104239658, iteration: 328607
loss: 0.9861137866973877,grad_norm: 0.823201027253513, iteration: 328608
loss: 0.9912481904029846,grad_norm: 0.8521285435682701, iteration: 328609
loss: 1.002025842666626,grad_norm: 0.999999625593472, iteration: 328610
loss: 1.015886902809143,grad_norm: 0.9999993059810207, iteration: 328611
loss: 1.0075479745864868,grad_norm: 0.7983126172904008, iteration: 328612
loss: 0.991477906703949,grad_norm: 0.8080718045387203, iteration: 328613
loss: 0.9733084440231323,grad_norm: 0.8202743087576916, iteration: 328614
loss: 1.0062144994735718,grad_norm: 0.8489015527615144, iteration: 328615
loss: 0.9860578775405884,grad_norm: 0.8130446135007734, iteration: 328616
loss: 0.9822653532028198,grad_norm: 0.9999990881576764, iteration: 328617
loss: 1.0126922130584717,grad_norm: 0.8841742126890132, iteration: 328618
loss: 0.9686630368232727,grad_norm: 0.8038551570689784, iteration: 328619
loss: 0.9888386726379395,grad_norm: 0.9466856658257818, iteration: 328620
loss: 1.0016653537750244,grad_norm: 0.7469336274930966, iteration: 328621
loss: 0.9676359295845032,grad_norm: 0.7670071672390977, iteration: 328622
loss: 1.0055502653121948,grad_norm: 0.8240175160955048, iteration: 328623
loss: 0.9960679411888123,grad_norm: 0.7940877244487474, iteration: 328624
loss: 0.9852202534675598,grad_norm: 0.8551188692655523, iteration: 328625
loss: 1.0062838792800903,grad_norm: 0.9544537012055524, iteration: 328626
loss: 1.032413363456726,grad_norm: 0.9089881253114384, iteration: 328627
loss: 0.9881148934364319,grad_norm: 0.728899884591163, iteration: 328628
loss: 0.9804129600524902,grad_norm: 0.7639443357258225, iteration: 328629
loss: 1.0226833820343018,grad_norm: 0.7473862516113352, iteration: 328630
loss: 1.0313894748687744,grad_norm: 0.7428667539497962, iteration: 328631
loss: 1.0053871870040894,grad_norm: 0.868434935829856, iteration: 328632
loss: 1.0116137266159058,grad_norm: 0.7573273482487237, iteration: 328633
loss: 0.9984177947044373,grad_norm: 0.931017191220249, iteration: 328634
loss: 1.016968846321106,grad_norm: 0.9945889441316246, iteration: 328635
loss: 0.9914349317550659,grad_norm: 0.8995845477448182, iteration: 328636
loss: 0.9968686103820801,grad_norm: 0.8962011315188099, iteration: 328637
loss: 0.9905441999435425,grad_norm: 0.7765493430889745, iteration: 328638
loss: 0.9446600079536438,grad_norm: 0.8794052711099025, iteration: 328639
loss: 1.0289742946624756,grad_norm: 0.9999993891150593, iteration: 328640
loss: 0.9559200406074524,grad_norm: 0.9085893995832265, iteration: 328641
loss: 0.9655404090881348,grad_norm: 0.9147903131377361, iteration: 328642
loss: 1.003142237663269,grad_norm: 0.8120610405114577, iteration: 328643
loss: 0.9999738335609436,grad_norm: 0.7516922839023583, iteration: 328644
loss: 0.9878634810447693,grad_norm: 0.7463263474914945, iteration: 328645
loss: 1.0073935985565186,grad_norm: 0.7635589243727653, iteration: 328646
loss: 1.044715166091919,grad_norm: 0.9999991287960415, iteration: 328647
loss: 1.0332014560699463,grad_norm: 0.767364786752767, iteration: 328648
loss: 0.9659225940704346,grad_norm: 0.9280641553885571, iteration: 328649
loss: 0.9746551513671875,grad_norm: 0.8218559450503788, iteration: 328650
loss: 1.0130282640457153,grad_norm: 0.8969204749295332, iteration: 328651
loss: 0.987723708152771,grad_norm: 0.9999991531415939, iteration: 328652
loss: 0.9825953245162964,grad_norm: 0.941173205283745, iteration: 328653
loss: 0.9922097325325012,grad_norm: 0.9999990872638642, iteration: 328654
loss: 1.001927137374878,grad_norm: 0.7465101523809632, iteration: 328655
loss: 1.0375336408615112,grad_norm: 0.9660988476130855, iteration: 328656
loss: 1.0240540504455566,grad_norm: 0.954485016727, iteration: 328657
loss: 1.011192798614502,grad_norm: 0.9999992120424744, iteration: 328658
loss: 0.9849324226379395,grad_norm: 0.7818453423239222, iteration: 328659
loss: 1.015281319618225,grad_norm: 0.9999991015020908, iteration: 328660
loss: 0.9816968441009521,grad_norm: 0.7913513348031734, iteration: 328661
loss: 1.0495593547821045,grad_norm: 0.769088711054024, iteration: 328662
loss: 1.0089513063430786,grad_norm: 0.8604651563873226, iteration: 328663
loss: 1.0054278373718262,grad_norm: 0.8856058775296057, iteration: 328664
loss: 0.996709942817688,grad_norm: 0.8972834267472587, iteration: 328665
loss: 0.9686564207077026,grad_norm: 0.7146918176538996, iteration: 328666
loss: 0.9803188443183899,grad_norm: 0.9999992124939139, iteration: 328667
loss: 0.9934113621711731,grad_norm: 0.6718111203545125, iteration: 328668
loss: 0.9931353330612183,grad_norm: 0.9776994375985395, iteration: 328669
loss: 0.9996673464775085,grad_norm: 0.9999989941148731, iteration: 328670
loss: 0.991070032119751,grad_norm: 0.9036115032356471, iteration: 328671
loss: 0.9962775707244873,grad_norm: 0.7921190173200102, iteration: 328672
loss: 1.017745852470398,grad_norm: 0.7930782342434664, iteration: 328673
loss: 1.0198408365249634,grad_norm: 0.8261557472877157, iteration: 328674
loss: 1.00077223777771,grad_norm: 0.8566318577280255, iteration: 328675
loss: 0.9892871975898743,grad_norm: 0.7950235712103807, iteration: 328676
loss: 0.9726853966712952,grad_norm: 0.7076105872596128, iteration: 328677
loss: 0.991357147693634,grad_norm: 0.8368508123760071, iteration: 328678
loss: 1.0159728527069092,grad_norm: 0.9999998881689415, iteration: 328679
loss: 1.0024229288101196,grad_norm: 0.909614682716687, iteration: 328680
loss: 0.9691497683525085,grad_norm: 0.7457420518074028, iteration: 328681
loss: 1.1379859447479248,grad_norm: 0.9999999390585816, iteration: 328682
loss: 0.9928091168403625,grad_norm: 0.9999991796535345, iteration: 328683
loss: 0.980694055557251,grad_norm: 0.8411857797828411, iteration: 328684
loss: 1.0139901638031006,grad_norm: 0.7805582637959563, iteration: 328685
loss: 1.0400663614273071,grad_norm: 0.9999992226922548, iteration: 328686
loss: 1.0037773847579956,grad_norm: 0.730076577168545, iteration: 328687
loss: 1.0029009580612183,grad_norm: 0.7384967300925513, iteration: 328688
loss: 1.0224120616912842,grad_norm: 0.8266310235337619, iteration: 328689
loss: 0.9907852411270142,grad_norm: 0.8254771031326281, iteration: 328690
loss: 1.006899118423462,grad_norm: 0.773306269108233, iteration: 328691
loss: 1.0076605081558228,grad_norm: 0.8790806013909734, iteration: 328692
loss: 1.0276217460632324,grad_norm: 0.6455428211961651, iteration: 328693
loss: 0.98542720079422,grad_norm: 0.9878622581859001, iteration: 328694
loss: 1.0352529287338257,grad_norm: 0.7412950318725101, iteration: 328695
loss: 0.9979856610298157,grad_norm: 0.8278309055208676, iteration: 328696
loss: 0.9827244281768799,grad_norm: 0.7760575155639479, iteration: 328697
loss: 1.0099620819091797,grad_norm: 0.857677655997007, iteration: 328698
loss: 0.993160605430603,grad_norm: 0.8145500640494195, iteration: 328699
loss: 0.9989497661590576,grad_norm: 0.9985196116485031, iteration: 328700
loss: 1.0153090953826904,grad_norm: 0.8886006085822232, iteration: 328701
loss: 0.9837831258773804,grad_norm: 0.9612905827087529, iteration: 328702
loss: 0.9843668341636658,grad_norm: 0.6609555149222535, iteration: 328703
loss: 0.996599018573761,grad_norm: 0.9317391123124179, iteration: 328704
loss: 0.962353527545929,grad_norm: 0.8603382872971694, iteration: 328705
loss: 1.032381534576416,grad_norm: 0.9999992380446571, iteration: 328706
loss: 1.012050747871399,grad_norm: 0.7672569856187904, iteration: 328707
loss: 0.9454924464225769,grad_norm: 0.9760999085275003, iteration: 328708
loss: 1.00812566280365,grad_norm: 0.8925070698933684, iteration: 328709
loss: 0.9991148114204407,grad_norm: 0.7483220188921761, iteration: 328710
loss: 0.9915035367012024,grad_norm: 0.78031712283474, iteration: 328711
loss: 1.0195101499557495,grad_norm: 0.9999988727551488, iteration: 328712
loss: 0.9951602816581726,grad_norm: 0.9999991536505451, iteration: 328713
loss: 1.021769404411316,grad_norm: 0.9763807790647774, iteration: 328714
loss: 0.9990919828414917,grad_norm: 0.9682515395308803, iteration: 328715
loss: 0.991870641708374,grad_norm: 0.884020236113918, iteration: 328716
loss: 1.045849323272705,grad_norm: 0.9999991139721651, iteration: 328717
loss: 1.01876699924469,grad_norm: 0.9999993009863282, iteration: 328718
loss: 0.9749566316604614,grad_norm: 0.8264509679044918, iteration: 328719
loss: 1.0807926654815674,grad_norm: 0.9999991390957358, iteration: 328720
loss: 1.0055301189422607,grad_norm: 0.9999989561386412, iteration: 328721
loss: 1.0093722343444824,grad_norm: 0.9999995194180741, iteration: 328722
loss: 1.020857810974121,grad_norm: 0.7850901454792208, iteration: 328723
loss: 1.0018515586853027,grad_norm: 0.8582540289665347, iteration: 328724
loss: 0.9822379350662231,grad_norm: 0.7260905828396925, iteration: 328725
loss: 1.0795788764953613,grad_norm: 0.8139655073738383, iteration: 328726
loss: 0.9965841770172119,grad_norm: 0.9999989932891137, iteration: 328727
loss: 1.0321252346038818,grad_norm: 0.8318415596347328, iteration: 328728
loss: 1.0363975763320923,grad_norm: 0.9999991130646152, iteration: 328729
loss: 1.0139353275299072,grad_norm: 0.723068720715114, iteration: 328730
loss: 1.0177483558654785,grad_norm: 0.9999992002625535, iteration: 328731
loss: 1.0192097425460815,grad_norm: 0.7116239250085356, iteration: 328732
loss: 1.0106500387191772,grad_norm: 0.8218799770510142, iteration: 328733
loss: 0.9756072759628296,grad_norm: 0.8795594805197597, iteration: 328734
loss: 1.0158789157867432,grad_norm: 0.8749042646196538, iteration: 328735
loss: 0.9929155707359314,grad_norm: 0.7902650801572645, iteration: 328736
loss: 0.9922617673873901,grad_norm: 0.8985798035242414, iteration: 328737
loss: 0.964712917804718,grad_norm: 0.7270233965421339, iteration: 328738
loss: 1.0262483358383179,grad_norm: 0.8843836077322925, iteration: 328739
loss: 1.0511802434921265,grad_norm: 0.7668132720496227, iteration: 328740
loss: 0.9842931628227234,grad_norm: 0.8739028425321733, iteration: 328741
loss: 1.0890130996704102,grad_norm: 0.9316864477535282, iteration: 328742
loss: 0.9859658479690552,grad_norm: 0.6946893016936319, iteration: 328743
loss: 0.99473637342453,grad_norm: 0.7579069714196439, iteration: 328744
loss: 0.9897955060005188,grad_norm: 0.7932100660522943, iteration: 328745
loss: 1.0029315948486328,grad_norm: 0.8171646068046368, iteration: 328746
loss: 0.9872246980667114,grad_norm: 0.8064033255911955, iteration: 328747
loss: 1.0478053092956543,grad_norm: 0.7048518055372984, iteration: 328748
loss: 1.0290907621383667,grad_norm: 0.7413145201355309, iteration: 328749
loss: 1.0142608880996704,grad_norm: 0.6764946918806526, iteration: 328750
loss: 0.9823147058486938,grad_norm: 0.9795915768731543, iteration: 328751
loss: 1.0377224683761597,grad_norm: 0.8788197205795788, iteration: 328752
loss: 1.0146790742874146,grad_norm: 0.7283733682940597, iteration: 328753
loss: 0.98885178565979,grad_norm: 0.8458016363497187, iteration: 328754
loss: 1.038357138633728,grad_norm: 0.9999999401677845, iteration: 328755
loss: 0.9917435050010681,grad_norm: 0.7800452822575636, iteration: 328756
loss: 1.0090762376785278,grad_norm: 0.8343615487969607, iteration: 328757
loss: 0.9624713659286499,grad_norm: 0.8426023170218616, iteration: 328758
loss: 0.9619277119636536,grad_norm: 0.9999991098947885, iteration: 328759
loss: 0.996048629283905,grad_norm: 0.7302405698464901, iteration: 328760
loss: 1.0012394189834595,grad_norm: 0.8147187542681164, iteration: 328761
loss: 1.02265465259552,grad_norm: 0.8170868470885223, iteration: 328762
loss: 1.0158460140228271,grad_norm: 0.9265766593130536, iteration: 328763
loss: 1.03379225730896,grad_norm: 0.7578630875376731, iteration: 328764
loss: 0.9917646646499634,grad_norm: 0.8365504628522079, iteration: 328765
loss: 0.9725910425186157,grad_norm: 0.8633162042839065, iteration: 328766
loss: 0.9903543591499329,grad_norm: 0.8652499440188601, iteration: 328767
loss: 0.984759509563446,grad_norm: 0.8183287620290999, iteration: 328768
loss: 1.0377298593521118,grad_norm: 0.8753929696883116, iteration: 328769
loss: 1.0111037492752075,grad_norm: 0.9270872813215614, iteration: 328770
loss: 0.988337516784668,grad_norm: 0.8552871137332844, iteration: 328771
loss: 1.0296772718429565,grad_norm: 0.8841702059990657, iteration: 328772
loss: 0.9770386815071106,grad_norm: 0.8339008060041301, iteration: 328773
loss: 1.0181820392608643,grad_norm: 0.7523075578316237, iteration: 328774
loss: 1.0208561420440674,grad_norm: 0.826851742635458, iteration: 328775
loss: 0.9969412684440613,grad_norm: 0.8284702579246703, iteration: 328776
loss: 1.0064198970794678,grad_norm: 0.7731487307173922, iteration: 328777
loss: 0.9778982996940613,grad_norm: 0.90179876118538, iteration: 328778
loss: 1.0216739177703857,grad_norm: 0.8893386835540477, iteration: 328779
loss: 0.9841595888137817,grad_norm: 0.93593564167721, iteration: 328780
loss: 1.0600277185440063,grad_norm: 0.9539291860709238, iteration: 328781
loss: 0.9946249127388,grad_norm: 0.8432466415371472, iteration: 328782
loss: 1.0083309412002563,grad_norm: 0.7243670155633738, iteration: 328783
loss: 1.0176461935043335,grad_norm: 0.9999990348236781, iteration: 328784
loss: 0.9952961802482605,grad_norm: 0.8796686672189682, iteration: 328785
loss: 0.9919331073760986,grad_norm: 0.7840507873546732, iteration: 328786
loss: 1.026799201965332,grad_norm: 0.999999885873245, iteration: 328787
loss: 1.0014088153839111,grad_norm: 0.8063131848703753, iteration: 328788
loss: 1.0129814147949219,grad_norm: 0.7313202468094352, iteration: 328789
loss: 0.990484356880188,grad_norm: 0.8904475666280006, iteration: 328790
loss: 1.0011045932769775,grad_norm: 0.8152008596619817, iteration: 328791
loss: 0.9978097677230835,grad_norm: 0.6568488746161957, iteration: 328792
loss: 1.007200837135315,grad_norm: 0.8827004104173718, iteration: 328793
loss: 1.0507822036743164,grad_norm: 0.6477464883116062, iteration: 328794
loss: 0.9846718907356262,grad_norm: 0.8981518954772354, iteration: 328795
loss: 1.0017322301864624,grad_norm: 0.9009328149793941, iteration: 328796
loss: 1.0374833345413208,grad_norm: 0.8185067615522927, iteration: 328797
loss: 0.9847552180290222,grad_norm: 0.8162171055314608, iteration: 328798
loss: 1.0405418872833252,grad_norm: 0.9999990700951216, iteration: 328799
loss: 0.9979658126831055,grad_norm: 0.7685969760560916, iteration: 328800
loss: 0.984238862991333,grad_norm: 0.7797326892065647, iteration: 328801
loss: 1.0198429822921753,grad_norm: 0.677991129902073, iteration: 328802
loss: 1.0063676834106445,grad_norm: 0.8603114638722072, iteration: 328803
loss: 0.9654865264892578,grad_norm: 0.8325343783459066, iteration: 328804
loss: 1.0110177993774414,grad_norm: 0.8310841352343903, iteration: 328805
loss: 1.0129551887512207,grad_norm: 0.9999991492795391, iteration: 328806
loss: 0.9829677939414978,grad_norm: 0.9168297560367611, iteration: 328807
loss: 1.0215641260147095,grad_norm: 0.9096827713310507, iteration: 328808
loss: 0.9991207122802734,grad_norm: 0.9369847771872045, iteration: 328809
loss: 0.9762701988220215,grad_norm: 0.9999989622859732, iteration: 328810
loss: 1.024644136428833,grad_norm: 0.8355467849882741, iteration: 328811
loss: 0.9898839592933655,grad_norm: 0.8844267631768771, iteration: 328812
loss: 1.0067615509033203,grad_norm: 0.8246304213335415, iteration: 328813
loss: 1.023584246635437,grad_norm: 0.9656530525475194, iteration: 328814
loss: 0.9768168926239014,grad_norm: 0.8846355376382469, iteration: 328815
loss: 1.0158296823501587,grad_norm: 0.7857837885485967, iteration: 328816
loss: 0.9912751317024231,grad_norm: 0.8179901250966509, iteration: 328817
loss: 1.0027821063995361,grad_norm: 0.7827057828840132, iteration: 328818
loss: 1.0339566469192505,grad_norm: 0.8630332525934656, iteration: 328819
loss: 1.0085573196411133,grad_norm: 0.9044044053783705, iteration: 328820
loss: 0.9491423964500427,grad_norm: 0.8174093452985689, iteration: 328821
loss: 0.9796268939971924,grad_norm: 0.8756282316216837, iteration: 328822
loss: 0.9856266975402832,grad_norm: 0.999999100555574, iteration: 328823
loss: 0.9985707402229309,grad_norm: 0.9096285743340901, iteration: 328824
loss: 0.9973827600479126,grad_norm: 0.8421389081908662, iteration: 328825
loss: 0.9817017316818237,grad_norm: 0.7645622699788217, iteration: 328826
loss: 1.0686748027801514,grad_norm: 0.9999991893367266, iteration: 328827
loss: 1.0384794473648071,grad_norm: 0.999999125264301, iteration: 328828
loss: 1.0169143676757812,grad_norm: 0.9999999182051567, iteration: 328829
loss: 1.0350446701049805,grad_norm: 0.9999991924597523, iteration: 328830
loss: 0.9607766270637512,grad_norm: 0.726890362026863, iteration: 328831
loss: 1.064102053642273,grad_norm: 0.8409409314062942, iteration: 328832
loss: 1.0205498933792114,grad_norm: 0.8251052879006734, iteration: 328833
loss: 0.9734251499176025,grad_norm: 0.8101430200600017, iteration: 328834
loss: 0.9977113604545593,grad_norm: 0.7533642294097298, iteration: 328835
loss: 0.9872828125953674,grad_norm: 0.828608455213318, iteration: 328836
loss: 1.0058547258377075,grad_norm: 0.8279605699789074, iteration: 328837
loss: 1.026041030883789,grad_norm: 0.8266074754908334, iteration: 328838
loss: 0.9976375699043274,grad_norm: 0.7099372955637533, iteration: 328839
loss: 0.9960498213768005,grad_norm: 0.7633098604562877, iteration: 328840
loss: 0.9995979070663452,grad_norm: 0.6981387013318222, iteration: 328841
loss: 1.0207163095474243,grad_norm: 0.8906426108360509, iteration: 328842
loss: 1.0292445421218872,grad_norm: 0.9004207529938024, iteration: 328843
loss: 1.0381574630737305,grad_norm: 0.7488837592668856, iteration: 328844
loss: 0.9696498513221741,grad_norm: 0.7526658067368621, iteration: 328845
loss: 0.9772267937660217,grad_norm: 0.909627546778672, iteration: 328846
loss: 0.9913883805274963,grad_norm: 0.7630299682362501, iteration: 328847
loss: 0.9873259663581848,grad_norm: 0.9966174767974241, iteration: 328848
loss: 1.0118979215621948,grad_norm: 0.920976423450131, iteration: 328849
loss: 0.9869890809059143,grad_norm: 0.7736875983481295, iteration: 328850
loss: 0.9793522357940674,grad_norm: 0.7494406921929837, iteration: 328851
loss: 0.9945292472839355,grad_norm: 0.8227224866206616, iteration: 328852
loss: 1.0043919086456299,grad_norm: 0.7296620631022185, iteration: 328853
loss: 1.0663366317749023,grad_norm: 0.9999994029094538, iteration: 328854
loss: 1.013966679573059,grad_norm: 0.8497616937141692, iteration: 328855
loss: 0.9591431021690369,grad_norm: 0.7813014018592785, iteration: 328856
loss: 0.964740514755249,grad_norm: 0.9280784749337186, iteration: 328857
loss: 1.0072400569915771,grad_norm: 0.9091447376823079, iteration: 328858
loss: 1.0209076404571533,grad_norm: 0.8407048501498092, iteration: 328859
loss: 0.9553498029708862,grad_norm: 0.8438491730678428, iteration: 328860
loss: 0.9956982135772705,grad_norm: 0.7307136930906932, iteration: 328861
loss: 1.0234096050262451,grad_norm: 0.7342855294267483, iteration: 328862
loss: 0.9727023839950562,grad_norm: 0.938779360130388, iteration: 328863
loss: 1.0504298210144043,grad_norm: 0.9999996438440558, iteration: 328864
loss: 0.9857625961303711,grad_norm: 0.8581737351095701, iteration: 328865
loss: 1.0096436738967896,grad_norm: 0.7629723601178238, iteration: 328866
loss: 0.9947304129600525,grad_norm: 0.8322379293046624, iteration: 328867
loss: 1.0191093683242798,grad_norm: 0.840620665443475, iteration: 328868
loss: 0.9823248982429504,grad_norm: 0.9270531263908142, iteration: 328869
loss: 0.9893943667411804,grad_norm: 0.782725917860022, iteration: 328870
loss: 1.0670998096466064,grad_norm: 0.9999991067584258, iteration: 328871
loss: 1.0188658237457275,grad_norm: 0.9999989762629572, iteration: 328872
loss: 1.0012245178222656,grad_norm: 0.8203300246300061, iteration: 328873
loss: 1.0103003978729248,grad_norm: 0.9277514967895525, iteration: 328874
loss: 0.9982334971427917,grad_norm: 0.928627514683607, iteration: 328875
loss: 0.9882346391677856,grad_norm: 0.9454509528345313, iteration: 328876
loss: 1.06650710105896,grad_norm: 0.9999998976030635, iteration: 328877
loss: 1.0120664834976196,grad_norm: 0.7057368823569238, iteration: 328878
loss: 1.0048881769180298,grad_norm: 0.7875824574668295, iteration: 328879
loss: 1.0165194272994995,grad_norm: 0.8216760638104356, iteration: 328880
loss: 1.0010228157043457,grad_norm: 0.9495729613690331, iteration: 328881
loss: 0.9677720665931702,grad_norm: 0.9999991860453973, iteration: 328882
loss: 1.0329371690750122,grad_norm: 0.763197856909743, iteration: 328883
loss: 0.9852784872055054,grad_norm: 0.8790484484465426, iteration: 328884
loss: 0.9702264666557312,grad_norm: 0.7532737293637685, iteration: 328885
loss: 1.014968991279602,grad_norm: 0.9012289270124622, iteration: 328886
loss: 0.9679646492004395,grad_norm: 0.7106332771491283, iteration: 328887
loss: 1.0140527486801147,grad_norm: 0.8282230341148318, iteration: 328888
loss: 1.0141760110855103,grad_norm: 0.791446523049921, iteration: 328889
loss: 1.0243040323257446,grad_norm: 0.9450994551864079, iteration: 328890
loss: 0.998671293258667,grad_norm: 0.9913133583517487, iteration: 328891
loss: 0.9913876056671143,grad_norm: 0.7852629846751595, iteration: 328892
loss: 1.0135635137557983,grad_norm: 0.7935336372981391, iteration: 328893
loss: 1.0130537748336792,grad_norm: 0.7790176104039257, iteration: 328894
loss: 1.0399447679519653,grad_norm: 0.999999222075129, iteration: 328895
loss: 1.0030980110168457,grad_norm: 0.7165500807750026, iteration: 328896
loss: 1.0269663333892822,grad_norm: 0.9205951047494128, iteration: 328897
loss: 0.9907386302947998,grad_norm: 0.8292511758290281, iteration: 328898
loss: 0.9964766502380371,grad_norm: 0.9999993164861731, iteration: 328899
loss: 1.0032511949539185,grad_norm: 0.7849834858354495, iteration: 328900
loss: 0.9756176471710205,grad_norm: 0.8204884346395027, iteration: 328901
loss: 1.124703288078308,grad_norm: 0.9999998980862408, iteration: 328902
loss: 1.0118218660354614,grad_norm: 0.7208874435085767, iteration: 328903
loss: 0.9980387091636658,grad_norm: 0.9999999098486378, iteration: 328904
loss: 1.0029304027557373,grad_norm: 0.7838437828799497, iteration: 328905
loss: 1.0002247095108032,grad_norm: 0.8764322751112618, iteration: 328906
loss: 0.9779713153839111,grad_norm: 0.9089231801365593, iteration: 328907
loss: 1.0620566606521606,grad_norm: 0.9828369143014488, iteration: 328908
loss: 0.9863907694816589,grad_norm: 0.7625790273459682, iteration: 328909
loss: 1.084944486618042,grad_norm: 0.9999994738056182, iteration: 328910
loss: 0.994914174079895,grad_norm: 0.9999996760637577, iteration: 328911
loss: 1.0195142030715942,grad_norm: 0.9999996344424386, iteration: 328912
loss: 1.02093505859375,grad_norm: 0.8952347181125205, iteration: 328913
loss: 1.3488391637802124,grad_norm: 0.9999998728976228, iteration: 328914
loss: 0.9961540102958679,grad_norm: 0.9999992136902071, iteration: 328915
loss: 1.0297825336456299,grad_norm: 0.9359698616742219, iteration: 328916
loss: 0.9605931043624878,grad_norm: 0.7367050435761988, iteration: 328917
loss: 0.9771947860717773,grad_norm: 0.719647881999637, iteration: 328918
loss: 1.031333088874817,grad_norm: 0.9053683809727956, iteration: 328919
loss: 0.9988447427749634,grad_norm: 0.955475259996147, iteration: 328920
loss: 0.9698690176010132,grad_norm: 0.9021348474938199, iteration: 328921
loss: 1.0178288221359253,grad_norm: 0.7821838484653492, iteration: 328922
loss: 0.9865966439247131,grad_norm: 0.9724622564365905, iteration: 328923
loss: 1.0878732204437256,grad_norm: 0.9999997896507012, iteration: 328924
loss: 0.9966285824775696,grad_norm: 0.813447298716513, iteration: 328925
loss: 1.004334568977356,grad_norm: 0.7939693246702084, iteration: 328926
loss: 1.0369815826416016,grad_norm: 0.8424738795246774, iteration: 328927
loss: 1.0157628059387207,grad_norm: 0.8769333895308935, iteration: 328928
loss: 1.0004411935806274,grad_norm: 0.770199983174323, iteration: 328929
loss: 1.0433133840560913,grad_norm: 0.9999992938564993, iteration: 328930
loss: 1.020334005355835,grad_norm: 0.7863414108074814, iteration: 328931
loss: 0.9877446889877319,grad_norm: 0.8071673270535594, iteration: 328932
loss: 0.9840034246444702,grad_norm: 0.7553485903266449, iteration: 328933
loss: 1.0263696908950806,grad_norm: 0.6957743367226804, iteration: 328934
loss: 0.9796798229217529,grad_norm: 0.8895002084527104, iteration: 328935
loss: 0.9938777089118958,grad_norm: 0.8409449058984381, iteration: 328936
loss: 0.9916951060295105,grad_norm: 0.6632415883969436, iteration: 328937
loss: 0.9641352295875549,grad_norm: 0.9999990147950177, iteration: 328938
loss: 0.9925457835197449,grad_norm: 0.8563122683169552, iteration: 328939
loss: 1.0157690048217773,grad_norm: 0.9999991828239102, iteration: 328940
loss: 0.9934321045875549,grad_norm: 0.9790653289506818, iteration: 328941
loss: 1.0366485118865967,grad_norm: 0.9883284562093123, iteration: 328942
loss: 1.001075267791748,grad_norm: 0.7644291685435971, iteration: 328943
loss: 1.0917962789535522,grad_norm: 0.9999998114213124, iteration: 328944
loss: 0.9897741675376892,grad_norm: 0.7160333907501844, iteration: 328945
loss: 0.9686262607574463,grad_norm: 0.9336335906088398, iteration: 328946
loss: 0.9709616303443909,grad_norm: 0.8216608431426282, iteration: 328947
loss: 0.9765489101409912,grad_norm: 0.8889398213808476, iteration: 328948
loss: 1.1843574047088623,grad_norm: 0.9999999224217175, iteration: 328949
loss: 0.9943004846572876,grad_norm: 0.8543394079126075, iteration: 328950
loss: 0.9555274248123169,grad_norm: 0.7418318955236393, iteration: 328951
loss: 1.0041807889938354,grad_norm: 0.9999996620787874, iteration: 328952
loss: 1.0051867961883545,grad_norm: 0.9062012808354436, iteration: 328953
loss: 1.0732612609863281,grad_norm: 0.9999995565535519, iteration: 328954
loss: 1.0184247493743896,grad_norm: 0.9999996340309599, iteration: 328955
loss: 1.0479692220687866,grad_norm: 0.7066793170691446, iteration: 328956
loss: 0.9979217052459717,grad_norm: 0.786410733951884, iteration: 328957
loss: 0.9900867938995361,grad_norm: 0.8719459437261337, iteration: 328958
loss: 1.0204732418060303,grad_norm: 0.8912318781174009, iteration: 328959
loss: 1.0546526908874512,grad_norm: 0.9999993439649094, iteration: 328960
loss: 1.0452109575271606,grad_norm: 0.977720922628901, iteration: 328961
loss: 1.4083552360534668,grad_norm: 0.9999998040541044, iteration: 328962
loss: 1.0111972093582153,grad_norm: 0.7382926929257982, iteration: 328963
loss: 1.0247998237609863,grad_norm: 0.9999992835499274, iteration: 328964
loss: 1.0235975980758667,grad_norm: 0.9999996606678125, iteration: 328965
loss: 1.0066814422607422,grad_norm: 0.9314954220648397, iteration: 328966
loss: 0.991095244884491,grad_norm: 0.808858365105023, iteration: 328967
loss: 1.0791934728622437,grad_norm: 0.8050774938519057, iteration: 328968
loss: 1.1328527927398682,grad_norm: 0.9999997048996454, iteration: 328969
loss: 0.9970598220825195,grad_norm: 0.9999994235555602, iteration: 328970
loss: 1.050065040588379,grad_norm: 0.83146421483314, iteration: 328971
loss: 1.122251033782959,grad_norm: 0.9999996746794889, iteration: 328972
loss: 1.0886759757995605,grad_norm: 0.9999995394651298, iteration: 328973
loss: 1.0013729333877563,grad_norm: 0.9999995701728114, iteration: 328974
loss: 1.039109468460083,grad_norm: 0.9999993986651046, iteration: 328975
loss: 0.999247670173645,grad_norm: 0.7990785988467309, iteration: 328976
loss: 1.068274974822998,grad_norm: 0.8313770931398773, iteration: 328977
loss: 0.967369794845581,grad_norm: 0.730366762956765, iteration: 328978
loss: 1.0164793729782104,grad_norm: 0.9443175093793336, iteration: 328979
loss: 0.9964526295661926,grad_norm: 0.8958879564218172, iteration: 328980
loss: 1.0319262742996216,grad_norm: 0.7488779582866292, iteration: 328981
loss: 1.0129199028015137,grad_norm: 0.8052804064442142, iteration: 328982
loss: 0.9826974272727966,grad_norm: 0.9999996910522442, iteration: 328983
loss: 0.9996533393859863,grad_norm: 0.8385566719973409, iteration: 328984
loss: 0.9966216683387756,grad_norm: 0.8997456807945061, iteration: 328985
loss: 0.9780630469322205,grad_norm: 0.7389807228552902, iteration: 328986
loss: 1.0119091272354126,grad_norm: 0.9999995963512653, iteration: 328987
loss: 1.0453492403030396,grad_norm: 0.9999996124371303, iteration: 328988
loss: 1.0239075422286987,grad_norm: 0.9999997745295425, iteration: 328989
loss: 1.0705749988555908,grad_norm: 0.9999997721072255, iteration: 328990
loss: 1.0360339879989624,grad_norm: 0.9291279524481406, iteration: 328991
loss: 1.1366149187088013,grad_norm: 0.9999992650343559, iteration: 328992
loss: 1.0745792388916016,grad_norm: 0.999999420597369, iteration: 328993
loss: 1.039539098739624,grad_norm: 0.9999999555467428, iteration: 328994
loss: 1.2037334442138672,grad_norm: 0.9999991421778016, iteration: 328995
loss: 1.0514650344848633,grad_norm: 0.7963163604344863, iteration: 328996
loss: 0.9918060302734375,grad_norm: 0.818886255588233, iteration: 328997
loss: 1.0058420896530151,grad_norm: 0.794225529784978, iteration: 328998
loss: 0.9853224158287048,grad_norm: 0.9999990197764675, iteration: 328999
loss: 1.0224982500076294,grad_norm: 0.9999997287223933, iteration: 329000
loss: 1.0357531309127808,grad_norm: 0.9999991108617791, iteration: 329001
loss: 1.0012342929840088,grad_norm: 0.7832831787816796, iteration: 329002
loss: 1.0226470232009888,grad_norm: 0.9999995176688784, iteration: 329003
loss: 1.0343626737594604,grad_norm: 0.9312753230870614, iteration: 329004
loss: 1.0133516788482666,grad_norm: 0.991325848459103, iteration: 329005
loss: 1.0945566892623901,grad_norm: 0.9999991857846561, iteration: 329006
loss: 1.0314319133758545,grad_norm: 0.9999992018492629, iteration: 329007
loss: 1.2704448699951172,grad_norm: 0.9999995381584966, iteration: 329008
loss: 0.9908173680305481,grad_norm: 0.8413186948140722, iteration: 329009
loss: 1.1249727010726929,grad_norm: 0.9999989746624756, iteration: 329010
loss: 1.0447255373001099,grad_norm: 0.9999990288287063, iteration: 329011
loss: 1.0163077116012573,grad_norm: 0.858970516251284, iteration: 329012
loss: 0.9787818193435669,grad_norm: 0.7224647815419536, iteration: 329013
loss: 0.9651937484741211,grad_norm: 0.9999993345143068, iteration: 329014
loss: 0.9826079607009888,grad_norm: 0.8564707494698122, iteration: 329015
loss: 1.000881552696228,grad_norm: 0.8934220413011074, iteration: 329016
loss: 1.0131174325942993,grad_norm: 0.9955810324994798, iteration: 329017
loss: 1.038952350616455,grad_norm: 0.7787899431982798, iteration: 329018
loss: 0.9814102053642273,grad_norm: 0.9999998484903755, iteration: 329019
loss: 1.0109338760375977,grad_norm: 0.9999997926602325, iteration: 329020
loss: 1.0095430612564087,grad_norm: 0.8504504900269617, iteration: 329021
loss: 1.023727297782898,grad_norm: 0.972649160615391, iteration: 329022
loss: 0.9986820816993713,grad_norm: 0.9999993753570825, iteration: 329023
loss: 0.9948097467422485,grad_norm: 0.9109199701862133, iteration: 329024
loss: 1.0548030138015747,grad_norm: 0.9999999477393008, iteration: 329025
loss: 1.0634992122650146,grad_norm: 0.7817781659893414, iteration: 329026
loss: 1.0006119012832642,grad_norm: 0.9999997825257315, iteration: 329027
loss: 1.017924189567566,grad_norm: 0.7884826041952094, iteration: 329028
loss: 1.1057780981063843,grad_norm: 0.9878716957220488, iteration: 329029
loss: 1.030849814414978,grad_norm: 0.9999995914734486, iteration: 329030
loss: 0.9710680842399597,grad_norm: 0.761515967074608, iteration: 329031
loss: 0.975294828414917,grad_norm: 0.9999991508950686, iteration: 329032
loss: 1.0112770795822144,grad_norm: 0.9179532352075962, iteration: 329033
loss: 0.9644967913627625,grad_norm: 0.7733297708217545, iteration: 329034
loss: 0.9725827574729919,grad_norm: 0.8597577330839695, iteration: 329035
loss: 0.9744658470153809,grad_norm: 0.8419726041576261, iteration: 329036
loss: 1.018274188041687,grad_norm: 0.9999992324585221, iteration: 329037
loss: 1.084114670753479,grad_norm: 0.7941289983180402, iteration: 329038
loss: 0.970641553401947,grad_norm: 0.9152998765466593, iteration: 329039
loss: 1.0015612840652466,grad_norm: 0.9999992281588963, iteration: 329040
loss: 1.0117757320404053,grad_norm: 0.9999990139138578, iteration: 329041
loss: 1.2023082971572876,grad_norm: 0.9999997485981699, iteration: 329042
loss: 1.0615837574005127,grad_norm: 0.9999992687459387, iteration: 329043
loss: 0.9914345145225525,grad_norm: 0.9191474048119623, iteration: 329044
loss: 1.2173442840576172,grad_norm: 0.9999991895532238, iteration: 329045
loss: 1.0314445495605469,grad_norm: 0.9999991539770606, iteration: 329046
loss: 1.1179476976394653,grad_norm: 0.9999997016195096, iteration: 329047
loss: 1.1389234066009521,grad_norm: 0.9999998479374814, iteration: 329048
loss: 1.0498064756393433,grad_norm: 0.9313629455620687, iteration: 329049
loss: 1.0595688819885254,grad_norm: 0.9999998169755134, iteration: 329050
loss: 1.1678794622421265,grad_norm: 0.9999999278332734, iteration: 329051
loss: 1.011902928352356,grad_norm: 0.8073845690314849, iteration: 329052
loss: 1.0088777542114258,grad_norm: 0.7073961044235914, iteration: 329053
loss: 1.0873407125473022,grad_norm: 0.9999991676646622, iteration: 329054
loss: 1.0470712184906006,grad_norm: 0.9999996782667558, iteration: 329055
loss: 0.9572892785072327,grad_norm: 0.8631020883611661, iteration: 329056
loss: 1.0162110328674316,grad_norm: 0.8542425270912309, iteration: 329057
loss: 0.9896045923233032,grad_norm: 0.9999999287014655, iteration: 329058
loss: 1.0139058828353882,grad_norm: 0.7250374441316229, iteration: 329059
loss: 0.9933679103851318,grad_norm: 0.6989625574828674, iteration: 329060
loss: 0.9906793832778931,grad_norm: 0.7893119232114753, iteration: 329061
loss: 1.0062025785446167,grad_norm: 0.8718243660677856, iteration: 329062
loss: 1.0154838562011719,grad_norm: 0.7118402040572865, iteration: 329063
loss: 1.0223146677017212,grad_norm: 0.8786256178584936, iteration: 329064
loss: 1.0112955570220947,grad_norm: 0.7885240538234934, iteration: 329065
loss: 1.0398476123809814,grad_norm: 0.7971549974950264, iteration: 329066
loss: 0.9798636436462402,grad_norm: 0.9172152895655344, iteration: 329067
loss: 1.0345070362091064,grad_norm: 0.9999990384207728, iteration: 329068
loss: 0.9937195777893066,grad_norm: 0.8608378738261377, iteration: 329069
loss: 1.0348782539367676,grad_norm: 0.8922756826801134, iteration: 329070
loss: 1.0154823064804077,grad_norm: 0.999999505965571, iteration: 329071
loss: 1.0043656826019287,grad_norm: 0.7853677923551478, iteration: 329072
loss: 1.0263917446136475,grad_norm: 0.8011150776646863, iteration: 329073
loss: 0.9810535311698914,grad_norm: 0.9403051728372813, iteration: 329074
loss: 0.9897924661636353,grad_norm: 0.6387811809332735, iteration: 329075
loss: 1.0557212829589844,grad_norm: 0.952447373522704, iteration: 329076
loss: 1.0116420984268188,grad_norm: 0.8569817771623894, iteration: 329077
loss: 0.9931113123893738,grad_norm: 0.9702248559631554, iteration: 329078
loss: 1.010411024093628,grad_norm: 0.8336867152818563, iteration: 329079
loss: 0.9755612015724182,grad_norm: 0.9999991240970887, iteration: 329080
loss: 0.9825995564460754,grad_norm: 0.9999991772519946, iteration: 329081
loss: 0.9650858044624329,grad_norm: 0.8060826934646415, iteration: 329082
loss: 1.0350950956344604,grad_norm: 0.7523325175692602, iteration: 329083
loss: 1.0281323194503784,grad_norm: 0.9999998405754933, iteration: 329084
loss: 0.9932067394256592,grad_norm: 0.9470608276954596, iteration: 329085
loss: 0.977975606918335,grad_norm: 0.9999993992066838, iteration: 329086
loss: 1.0024640560150146,grad_norm: 0.960005949359338, iteration: 329087
loss: 0.9963663816452026,grad_norm: 0.7356668865980291, iteration: 329088
loss: 1.0040786266326904,grad_norm: 0.8460986180662061, iteration: 329089
loss: 0.9871188402175903,grad_norm: 0.7507698197515245, iteration: 329090
loss: 0.9881591200828552,grad_norm: 0.8763636438825732, iteration: 329091
loss: 1.0330215692520142,grad_norm: 0.9999999556624164, iteration: 329092
loss: 1.019317865371704,grad_norm: 0.9999993862613162, iteration: 329093
loss: 0.9929676651954651,grad_norm: 0.9338403702871093, iteration: 329094
loss: 1.0320175886154175,grad_norm: 0.9318000529329922, iteration: 329095
loss: 1.0226489305496216,grad_norm: 0.8410120819069156, iteration: 329096
loss: 1.0189118385314941,grad_norm: 0.8751178838806634, iteration: 329097
loss: 1.0090361833572388,grad_norm: 0.9464986886711823, iteration: 329098
loss: 0.992496907711029,grad_norm: 0.9085929584139034, iteration: 329099
loss: 0.9791486263275146,grad_norm: 0.6505342833575423, iteration: 329100
loss: 1.0202503204345703,grad_norm: 0.8535152742108123, iteration: 329101
loss: 0.9835627675056458,grad_norm: 0.9449294525785543, iteration: 329102
loss: 0.9868528842926025,grad_norm: 0.8165825330248239, iteration: 329103
loss: 1.0049487352371216,grad_norm: 0.8168985811156897, iteration: 329104
loss: 1.1900173425674438,grad_norm: 0.9999995270428125, iteration: 329105
loss: 1.0481704473495483,grad_norm: 0.8831631080157039, iteration: 329106
loss: 0.9804528951644897,grad_norm: 0.8617048489871106, iteration: 329107
loss: 1.016779899597168,grad_norm: 0.8358282816870204, iteration: 329108
loss: 0.9904744625091553,grad_norm: 0.9289521759810296, iteration: 329109
loss: 1.019451379776001,grad_norm: 0.999999146837644, iteration: 329110
loss: 0.999606192111969,grad_norm: 0.857149657646763, iteration: 329111
loss: 1.0320175886154175,grad_norm: 0.9701074178567151, iteration: 329112
loss: 1.012269377708435,grad_norm: 0.7025437780707438, iteration: 329113
loss: 1.0038129091262817,grad_norm: 0.7878666262559839, iteration: 329114
loss: 1.0405818223953247,grad_norm: 0.8180480408660373, iteration: 329115
loss: 1.0252699851989746,grad_norm: 0.9060908850006179, iteration: 329116
loss: 1.0098341703414917,grad_norm: 0.896747968222958, iteration: 329117
loss: 1.0349761247634888,grad_norm: 0.8735766689528058, iteration: 329118
loss: 1.093328833580017,grad_norm: 0.8691473258405803, iteration: 329119
loss: 1.0081396102905273,grad_norm: 0.763257731774894, iteration: 329120
loss: 1.0294984579086304,grad_norm: 0.7701469493214738, iteration: 329121
loss: 0.9797738194465637,grad_norm: 0.6937001269851054, iteration: 329122
loss: 0.9853286147117615,grad_norm: 0.8626854885237005, iteration: 329123
loss: 0.9620090126991272,grad_norm: 0.8699103516801553, iteration: 329124
loss: 0.9906582832336426,grad_norm: 0.9491189582516427, iteration: 329125
loss: 1.066776990890503,grad_norm: 0.8859707934343918, iteration: 329126
loss: 1.0052523612976074,grad_norm: 0.7094915720594693, iteration: 329127
loss: 1.0127289295196533,grad_norm: 0.956157713285848, iteration: 329128
loss: 1.0017101764678955,grad_norm: 0.9072965233759233, iteration: 329129
loss: 1.035854697227478,grad_norm: 0.8723741352733438, iteration: 329130
loss: 1.065144419670105,grad_norm: 0.9223894641185105, iteration: 329131
loss: 1.0118588209152222,grad_norm: 0.955058625554144, iteration: 329132
loss: 0.9749605059623718,grad_norm: 0.7668576383951128, iteration: 329133
loss: 1.1121665239334106,grad_norm: 0.9591478311093081, iteration: 329134
loss: 1.0085183382034302,grad_norm: 0.8485598945708683, iteration: 329135
loss: 1.0022578239440918,grad_norm: 0.9999993274494489, iteration: 329136
loss: 1.0005466938018799,grad_norm: 0.8725006743300584, iteration: 329137
loss: 1.0170729160308838,grad_norm: 0.9261453016766944, iteration: 329138
loss: 1.0053788423538208,grad_norm: 0.8527655877258283, iteration: 329139
loss: 1.0196999311447144,grad_norm: 0.7623786869679469, iteration: 329140
loss: 1.0140583515167236,grad_norm: 0.8394108763141799, iteration: 329141
loss: 1.112265706062317,grad_norm: 0.9999998603322942, iteration: 329142
loss: 1.0105475187301636,grad_norm: 0.9999993471517369, iteration: 329143
loss: 0.9704246520996094,grad_norm: 0.8883969811377385, iteration: 329144
loss: 0.9720452427864075,grad_norm: 0.9901900942106343, iteration: 329145
loss: 1.0082834959030151,grad_norm: 0.9023519798453447, iteration: 329146
loss: 1.0054577589035034,grad_norm: 0.9371848907604577, iteration: 329147
loss: 0.9938157796859741,grad_norm: 0.8783073202836243, iteration: 329148
loss: 0.9876857399940491,grad_norm: 0.999999143804311, iteration: 329149
loss: 1.0057870149612427,grad_norm: 0.7793016869010188, iteration: 329150
loss: 0.9601141810417175,grad_norm: 0.9307524020191528, iteration: 329151
loss: 0.9780987501144409,grad_norm: 0.8135173886921381, iteration: 329152
loss: 0.9991787075996399,grad_norm: 0.7559955597765662, iteration: 329153
loss: 1.024781584739685,grad_norm: 0.9999990886458616, iteration: 329154
loss: 1.0243380069732666,grad_norm: 0.9070690270113841, iteration: 329155
loss: 0.9954048991203308,grad_norm: 0.9999998611663262, iteration: 329156
loss: 1.0065494775772095,grad_norm: 0.8561593624834954, iteration: 329157
loss: 1.0031580924987793,grad_norm: 0.9999999121677468, iteration: 329158
loss: 0.9920001029968262,grad_norm: 0.8100671871825674, iteration: 329159
loss: 0.9899896383285522,grad_norm: 0.7637222622542925, iteration: 329160
loss: 0.9703480005264282,grad_norm: 0.8391851981664299, iteration: 329161
loss: 0.9878109097480774,grad_norm: 0.9174124192979619, iteration: 329162
loss: 0.9863021373748779,grad_norm: 0.890051150937125, iteration: 329163
loss: 0.9675106406211853,grad_norm: 0.8753009235141075, iteration: 329164
loss: 1.0117518901824951,grad_norm: 0.9266380648816522, iteration: 329165
loss: 0.9833418130874634,grad_norm: 0.9727616366039586, iteration: 329166
loss: 1.0463699102401733,grad_norm: 0.9999999189250985, iteration: 329167
loss: 0.9807884693145752,grad_norm: 0.8220267601538335, iteration: 329168
loss: 0.9602751135826111,grad_norm: 0.801816889609627, iteration: 329169
loss: 0.9881884455680847,grad_norm: 0.7183117966204233, iteration: 329170
loss: 0.9953233599662781,grad_norm: 0.9999991664543114, iteration: 329171
loss: 1.0164510011672974,grad_norm: 0.8301180756815114, iteration: 329172
loss: 1.0555998086929321,grad_norm: 0.9692698734173496, iteration: 329173
loss: 1.0189021825790405,grad_norm: 0.8276849475110136, iteration: 329174
loss: 0.9864529371261597,grad_norm: 0.999999677920675, iteration: 329175
loss: 0.993100643157959,grad_norm: 0.9935836676671654, iteration: 329176
loss: 0.9883920550346375,grad_norm: 0.7952469161820434, iteration: 329177
loss: 1.0931956768035889,grad_norm: 0.833249743229965, iteration: 329178
loss: 1.0288029909133911,grad_norm: 0.8188522948879596, iteration: 329179
loss: 1.0342121124267578,grad_norm: 0.9999992731423663, iteration: 329180
loss: 1.026090145111084,grad_norm: 0.9487237916722714, iteration: 329181
loss: 0.9642979502677917,grad_norm: 0.846420834903515, iteration: 329182
loss: 0.9673610925674438,grad_norm: 0.8741415896157243, iteration: 329183
loss: 1.0765682458877563,grad_norm: 0.9999991099060284, iteration: 329184
loss: 1.0326305627822876,grad_norm: 0.8375931340962092, iteration: 329185
loss: 0.980735182762146,grad_norm: 0.7308251593577187, iteration: 329186
loss: 0.9915040731430054,grad_norm: 0.9999991357855668, iteration: 329187
loss: 1.1185623407363892,grad_norm: 0.8953040131749995, iteration: 329188
loss: 1.0899308919906616,grad_norm: 0.9413581362245153, iteration: 329189
loss: 0.9513006806373596,grad_norm: 0.8739235834313448, iteration: 329190
loss: 1.0257869958877563,grad_norm: 0.9794756855602432, iteration: 329191
loss: 1.1166338920593262,grad_norm: 0.9999996097242909, iteration: 329192
loss: 1.0301835536956787,grad_norm: 0.9999995262737823, iteration: 329193
loss: 1.1510882377624512,grad_norm: 0.9999999629034836, iteration: 329194
loss: 1.0189237594604492,grad_norm: 0.9999999741099711, iteration: 329195
loss: 1.021018624305725,grad_norm: 0.9182751629556564, iteration: 329196
loss: 1.0190526247024536,grad_norm: 0.8528373204096031, iteration: 329197
loss: 1.0070542097091675,grad_norm: 0.8789670068660483, iteration: 329198
loss: 1.0062236785888672,grad_norm: 0.7963266900072531, iteration: 329199
loss: 1.013795256614685,grad_norm: 0.960022875331128, iteration: 329200
loss: 0.991426408290863,grad_norm: 0.999999171462094, iteration: 329201
loss: 0.9899057745933533,grad_norm: 0.9332380116992801, iteration: 329202
loss: 1.002098560333252,grad_norm: 0.9999995655982092, iteration: 329203
loss: 0.9943174719810486,grad_norm: 0.9999992515723384, iteration: 329204
loss: 0.9958345890045166,grad_norm: 0.9999991330048883, iteration: 329205
loss: 1.0651438236236572,grad_norm: 0.9939756528191235, iteration: 329206
loss: 1.100547432899475,grad_norm: 0.9999992598669825, iteration: 329207
loss: 1.0023884773254395,grad_norm: 0.926280523009541, iteration: 329208
loss: 0.9940794110298157,grad_norm: 0.884156429007987, iteration: 329209
loss: 1.0708914995193481,grad_norm: 0.9999999004397393, iteration: 329210
loss: 1.1261416673660278,grad_norm: 0.9999994579591672, iteration: 329211
loss: 1.0392743349075317,grad_norm: 0.9999992577032122, iteration: 329212
loss: 1.007889747619629,grad_norm: 0.9999996040752362, iteration: 329213
loss: 1.1103721857070923,grad_norm: 0.8425768902805367, iteration: 329214
loss: 0.9836310148239136,grad_norm: 0.7854459586280816, iteration: 329215
loss: 1.0613044500350952,grad_norm: 0.9835381175060098, iteration: 329216
loss: 1.0009973049163818,grad_norm: 0.828412918746725, iteration: 329217
loss: 1.0130008459091187,grad_norm: 0.9999991062943773, iteration: 329218
loss: 1.2596267461776733,grad_norm: 0.9999999597963987, iteration: 329219
loss: 0.9698942303657532,grad_norm: 0.7870516831218238, iteration: 329220
loss: 1.1927728652954102,grad_norm: 0.9999998394837838, iteration: 329221
loss: 1.146236538887024,grad_norm: 0.9999998229776668, iteration: 329222
loss: 0.9804263710975647,grad_norm: 0.9999992115676146, iteration: 329223
loss: 1.0431517362594604,grad_norm: 0.9999996783765339, iteration: 329224
loss: 1.0187180042266846,grad_norm: 0.8793126094541468, iteration: 329225
loss: 0.99310702085495,grad_norm: 0.7952766435545708, iteration: 329226
loss: 1.0149298906326294,grad_norm: 0.9959310167220907, iteration: 329227
loss: 1.0171613693237305,grad_norm: 0.8907000408030344, iteration: 329228
loss: 1.0924437046051025,grad_norm: 0.9999993838064619, iteration: 329229
loss: 1.0015203952789307,grad_norm: 0.9999998550086748, iteration: 329230
loss: 1.0054107904434204,grad_norm: 0.7851962430854363, iteration: 329231
loss: 1.0765721797943115,grad_norm: 0.9422488300691835, iteration: 329232
loss: 1.0264641046524048,grad_norm: 0.9999991696791208, iteration: 329233
loss: 0.9998795986175537,grad_norm: 0.9999997126563472, iteration: 329234
loss: 0.9857302308082581,grad_norm: 0.9999991833388382, iteration: 329235
loss: 0.9747239351272583,grad_norm: 0.7529674811363107, iteration: 329236
loss: 1.0358017683029175,grad_norm: 0.9999991780865001, iteration: 329237
loss: 0.9987977743148804,grad_norm: 0.9999997869426325, iteration: 329238
loss: 0.9810395836830139,grad_norm: 0.9999993131090381, iteration: 329239
loss: 0.9756060838699341,grad_norm: 0.7244417735825586, iteration: 329240
loss: 1.0268057584762573,grad_norm: 0.9915644335420728, iteration: 329241
loss: 1.0230404138565063,grad_norm: 0.7850335209906459, iteration: 329242
loss: 1.0292234420776367,grad_norm: 0.9999991843026724, iteration: 329243
loss: 1.0537570714950562,grad_norm: 0.8889344897448777, iteration: 329244
loss: 1.0554683208465576,grad_norm: 0.9999994133913891, iteration: 329245
loss: 1.0281015634536743,grad_norm: 0.9999999651858584, iteration: 329246
loss: 1.0980744361877441,grad_norm: 0.9999999570064576, iteration: 329247
loss: 1.022535800933838,grad_norm: 0.9999995470107612, iteration: 329248
loss: 0.9857733845710754,grad_norm: 0.9348962597307757, iteration: 329249
loss: 1.0649884939193726,grad_norm: 0.9999991262593468, iteration: 329250
loss: 1.1202356815338135,grad_norm: 0.9999998451695971, iteration: 329251
loss: 0.985439121723175,grad_norm: 0.764934535156788, iteration: 329252
loss: 1.0157948732376099,grad_norm: 0.9013662366767774, iteration: 329253
loss: 1.01738440990448,grad_norm: 0.9999993754885266, iteration: 329254
loss: 0.9911285042762756,grad_norm: 0.8323876975002018, iteration: 329255
loss: 1.0327892303466797,grad_norm: 0.9999993000960923, iteration: 329256
loss: 0.9764902591705322,grad_norm: 0.889511712822484, iteration: 329257
loss: 1.0459208488464355,grad_norm: 0.9999994372698147, iteration: 329258
loss: 1.0000077486038208,grad_norm: 0.9999992795811393, iteration: 329259
loss: 1.0198338031768799,grad_norm: 0.9999991438722741, iteration: 329260
loss: 0.9960660934448242,grad_norm: 0.7636593397202599, iteration: 329261
loss: 1.041029453277588,grad_norm: 0.8905784250301494, iteration: 329262
loss: 1.0086921453475952,grad_norm: 0.9805796074617883, iteration: 329263
loss: 0.9944521188735962,grad_norm: 0.8317508773795057, iteration: 329264
loss: 0.994136393070221,grad_norm: 0.9998896705897594, iteration: 329265
loss: 1.0110198259353638,grad_norm: 0.87994057636262, iteration: 329266
loss: 1.0720545053482056,grad_norm: 0.9999996620971731, iteration: 329267
loss: 1.0602049827575684,grad_norm: 0.9999997222371271, iteration: 329268
loss: 0.9833667874336243,grad_norm: 0.9999992175219526, iteration: 329269
loss: 1.007671594619751,grad_norm: 0.7667510595261936, iteration: 329270
loss: 0.9895378351211548,grad_norm: 0.8029553128980553, iteration: 329271
loss: 1.0100679397583008,grad_norm: 0.7735518289779333, iteration: 329272
loss: 1.0158525705337524,grad_norm: 0.9999993146925051, iteration: 329273
loss: 1.0025224685668945,grad_norm: 0.9999999769626295, iteration: 329274
loss: 1.0793278217315674,grad_norm: 0.9139576017268424, iteration: 329275
loss: 1.015425682067871,grad_norm: 0.9999990231385703, iteration: 329276
loss: 1.0308853387832642,grad_norm: 0.9999991780623818, iteration: 329277
loss: 1.0918720960617065,grad_norm: 0.9138366572406348, iteration: 329278
loss: 0.993440568447113,grad_norm: 0.9428672670701058, iteration: 329279
loss: 0.9857792854309082,grad_norm: 0.8306162219480763, iteration: 329280
loss: 1.1723976135253906,grad_norm: 0.9999999370920233, iteration: 329281
loss: 1.1006337404251099,grad_norm: 0.9999992341178345, iteration: 329282
loss: 1.0037903785705566,grad_norm: 0.9999990981783173, iteration: 329283
loss: 0.9778549075126648,grad_norm: 0.6986598015694347, iteration: 329284
loss: 1.0385462045669556,grad_norm: 0.9999992811511317, iteration: 329285
loss: 1.0048245191574097,grad_norm: 0.9999992191297479, iteration: 329286
loss: 1.008359432220459,grad_norm: 0.9281142550286975, iteration: 329287
loss: 1.0108567476272583,grad_norm: 0.7344794608372441, iteration: 329288
loss: 1.030297040939331,grad_norm: 0.706399571418, iteration: 329289
loss: 0.9931108355522156,grad_norm: 0.9609996278144771, iteration: 329290
loss: 1.0716197490692139,grad_norm: 0.9999993676418549, iteration: 329291
loss: 1.021883249282837,grad_norm: 0.8563604961930527, iteration: 329292
loss: 1.0214020013809204,grad_norm: 0.9999996455394722, iteration: 329293
loss: 0.990903913974762,grad_norm: 0.8877949131682485, iteration: 329294
loss: 1.0349363088607788,grad_norm: 0.8774613582910287, iteration: 329295
loss: 0.9889294505119324,grad_norm: 0.9914632754095156, iteration: 329296
loss: 0.9759451746940613,grad_norm: 0.7633275259014256, iteration: 329297
loss: 1.0744818449020386,grad_norm: 0.9999991749289694, iteration: 329298
loss: 1.0252349376678467,grad_norm: 0.8961147249803618, iteration: 329299
loss: 1.0537556409835815,grad_norm: 0.9999996241554603, iteration: 329300
loss: 1.1075165271759033,grad_norm: 0.9999998541991449, iteration: 329301
loss: 1.0148040056228638,grad_norm: 0.803540909647999, iteration: 329302
loss: 1.050538420677185,grad_norm: 0.9999991344579394, iteration: 329303
loss: 1.104883074760437,grad_norm: 0.9999998937152327, iteration: 329304
loss: 0.9707460999488831,grad_norm: 0.8991343995789097, iteration: 329305
loss: 1.0768471956253052,grad_norm: 0.9999996301833737, iteration: 329306
loss: 1.0145412683486938,grad_norm: 0.7311085987605157, iteration: 329307
loss: 1.0311048030853271,grad_norm: 0.9013584957493634, iteration: 329308
loss: 1.0778380632400513,grad_norm: 0.9999997882554101, iteration: 329309
loss: 1.0404819250106812,grad_norm: 0.9999994330075966, iteration: 329310
loss: 1.0276844501495361,grad_norm: 0.9999995816714433, iteration: 329311
loss: 0.9994587898254395,grad_norm: 0.9999994094859735, iteration: 329312
loss: 1.0283275842666626,grad_norm: 0.9999992191728817, iteration: 329313
loss: 1.0194697380065918,grad_norm: 0.8842612929554006, iteration: 329314
loss: 1.0005059242248535,grad_norm: 0.9999993303545495, iteration: 329315
loss: 0.9732834100723267,grad_norm: 0.8541024507439005, iteration: 329316
loss: 0.9914008975028992,grad_norm: 0.7318809389222702, iteration: 329317
loss: 1.0403181314468384,grad_norm: 0.9999990549122296, iteration: 329318
loss: 1.1637660264968872,grad_norm: 0.9999999086671242, iteration: 329319
loss: 1.0449928045272827,grad_norm: 0.7743139095690783, iteration: 329320
loss: 0.9880814552307129,grad_norm: 0.8515525011234286, iteration: 329321
loss: 0.9769890904426575,grad_norm: 0.8989802202261257, iteration: 329322
loss: 0.9826385974884033,grad_norm: 0.6600887879462661, iteration: 329323
loss: 0.9742335081100464,grad_norm: 0.8361897727791054, iteration: 329324
loss: 1.0049771070480347,grad_norm: 0.6804662974220902, iteration: 329325
loss: 1.0961964130401611,grad_norm: 0.9999994946930235, iteration: 329326
loss: 1.087862491607666,grad_norm: 0.9999997717962308, iteration: 329327
loss: 1.0435864925384521,grad_norm: 0.9999993846126551, iteration: 329328
loss: 0.9818387031555176,grad_norm: 0.839335534295247, iteration: 329329
loss: 1.0185201168060303,grad_norm: 0.9265640885321952, iteration: 329330
loss: 1.018720269203186,grad_norm: 0.7469313294131753, iteration: 329331
loss: 1.0031143426895142,grad_norm: 0.9114594704306507, iteration: 329332
loss: 0.9781092405319214,grad_norm: 0.9650145165325884, iteration: 329333
loss: 0.9950432181358337,grad_norm: 0.8152312065659524, iteration: 329334
loss: 1.0034600496292114,grad_norm: 0.7918563396731687, iteration: 329335
loss: 1.05385160446167,grad_norm: 0.9999998785737741, iteration: 329336
loss: 1.020603060722351,grad_norm: 0.6874116543339636, iteration: 329337
loss: 0.9923000931739807,grad_norm: 0.9799526533520995, iteration: 329338
loss: 1.0307039022445679,grad_norm: 0.9201428373944425, iteration: 329339
loss: 0.9603387117385864,grad_norm: 0.7873495292675651, iteration: 329340
loss: 1.0038361549377441,grad_norm: 0.9999992129995522, iteration: 329341
loss: 1.0027813911437988,grad_norm: 0.9999995357282825, iteration: 329342
loss: 1.0942143201828003,grad_norm: 0.9999990879400611, iteration: 329343
loss: 1.0811609029769897,grad_norm: 0.8047946319401278, iteration: 329344
loss: 0.9888424277305603,grad_norm: 0.7848671454377858, iteration: 329345
loss: 1.0211360454559326,grad_norm: 0.8349567293466347, iteration: 329346
loss: 0.9913448691368103,grad_norm: 0.7040036704383223, iteration: 329347
loss: 1.0226870775222778,grad_norm: 0.9509773305800394, iteration: 329348
loss: 0.9809237718582153,grad_norm: 0.8295204922524174, iteration: 329349
loss: 0.9809373617172241,grad_norm: 0.8922685652272265, iteration: 329350
loss: 1.046592354774475,grad_norm: 0.8686476951253275, iteration: 329351
loss: 1.144055724143982,grad_norm: 0.9999989820460488, iteration: 329352
loss: 0.965751051902771,grad_norm: 0.7755071739438991, iteration: 329353
loss: 1.0320011377334595,grad_norm: 0.8387537256489901, iteration: 329354
loss: 1.010279655456543,grad_norm: 0.8060927038839957, iteration: 329355
loss: 1.0104793310165405,grad_norm: 0.9999993312252444, iteration: 329356
loss: 1.0346169471740723,grad_norm: 0.9999991190924079, iteration: 329357
loss: 1.0245741605758667,grad_norm: 0.893442886580072, iteration: 329358
loss: 0.9843843579292297,grad_norm: 0.8382128734198736, iteration: 329359
loss: 1.0455735921859741,grad_norm: 0.9999992342245356, iteration: 329360
loss: 1.0047509670257568,grad_norm: 0.7594371224540197, iteration: 329361
loss: 0.9958193898200989,grad_norm: 0.7347899565487424, iteration: 329362
loss: 0.9914817214012146,grad_norm: 0.777853500958719, iteration: 329363
loss: 1.0121304988861084,grad_norm: 0.7978805968934519, iteration: 329364
loss: 0.9996421933174133,grad_norm: 0.9999993099570857, iteration: 329365
loss: 0.9968757033348083,grad_norm: 0.9153580057260279, iteration: 329366
loss: 1.002395510673523,grad_norm: 0.9664074438483062, iteration: 329367
loss: 1.0376529693603516,grad_norm: 0.9805404787889481, iteration: 329368
loss: 0.9789091348648071,grad_norm: 0.7965452568136614, iteration: 329369
loss: 1.0158175230026245,grad_norm: 0.9999998976817528, iteration: 329370
loss: 1.0175620317459106,grad_norm: 0.9122472766643542, iteration: 329371
loss: 0.9912856817245483,grad_norm: 0.746113999541752, iteration: 329372
loss: 1.0211337804794312,grad_norm: 0.9120781604861994, iteration: 329373
loss: 0.9938679933547974,grad_norm: 0.8176181729330666, iteration: 329374
loss: 1.0189979076385498,grad_norm: 0.9350269535985141, iteration: 329375
loss: 1.0097322463989258,grad_norm: 0.8479227829996049, iteration: 329376
loss: 1.0181068181991577,grad_norm: 0.7680766080864745, iteration: 329377
loss: 1.0384018421173096,grad_norm: 0.8306667903174143, iteration: 329378
loss: 0.9885690808296204,grad_norm: 0.9062705623164461, iteration: 329379
loss: 1.0834156274795532,grad_norm: 0.9999990742332889, iteration: 329380
loss: 0.9636368751525879,grad_norm: 0.8808238317351498, iteration: 329381
loss: 0.9838682413101196,grad_norm: 0.8883597957849574, iteration: 329382
loss: 1.0112080574035645,grad_norm: 0.8604206888361637, iteration: 329383
loss: 1.0730253458023071,grad_norm: 0.9999991367186072, iteration: 329384
loss: 0.9963112473487854,grad_norm: 0.7713005603268538, iteration: 329385
loss: 0.977849006652832,grad_norm: 0.8424083291745715, iteration: 329386
loss: 1.2504500150680542,grad_norm: 0.9999996079482355, iteration: 329387
loss: 1.0112600326538086,grad_norm: 0.9594225375716814, iteration: 329388
loss: 0.9629482626914978,grad_norm: 0.8028464093643596, iteration: 329389
loss: 0.9737401008605957,grad_norm: 0.7866780876602166, iteration: 329390
loss: 1.0142837762832642,grad_norm: 0.9689567528343281, iteration: 329391
loss: 1.0368983745574951,grad_norm: 0.8352098822129079, iteration: 329392
loss: 1.0044522285461426,grad_norm: 0.9999990490563353, iteration: 329393
loss: 1.0007133483886719,grad_norm: 0.9847495805127043, iteration: 329394
loss: 1.0568169355392456,grad_norm: 0.9999998038640215, iteration: 329395
loss: 1.152793526649475,grad_norm: 0.9999990984001823, iteration: 329396
loss: 0.9964548945426941,grad_norm: 0.8625335133154723, iteration: 329397
loss: 1.0071605443954468,grad_norm: 0.6710504009621575, iteration: 329398
loss: 1.054732084274292,grad_norm: 0.9999990833856591, iteration: 329399
loss: 1.0377742052078247,grad_norm: 0.8446156833227105, iteration: 329400
loss: 0.9766779541969299,grad_norm: 0.9999998751487497, iteration: 329401
loss: 1.044127345085144,grad_norm: 0.7380772839978526, iteration: 329402
loss: 1.0109879970550537,grad_norm: 0.9999990176314569, iteration: 329403
loss: 1.0294396877288818,grad_norm: 0.7170212637810097, iteration: 329404
loss: 0.9894170761108398,grad_norm: 0.8238625258173585, iteration: 329405
loss: 1.0025829076766968,grad_norm: 0.8476655006434268, iteration: 329406
loss: 1.0020699501037598,grad_norm: 0.7676465895821242, iteration: 329407
loss: 1.003013253211975,grad_norm: 0.9204641181575802, iteration: 329408
loss: 0.9955351948738098,grad_norm: 0.9021960429742872, iteration: 329409
loss: 1.0649683475494385,grad_norm: 0.9999992867799623, iteration: 329410
loss: 1.03001868724823,grad_norm: 0.9999999198804304, iteration: 329411
loss: 1.0302149057388306,grad_norm: 0.9999994837880429, iteration: 329412
loss: 0.9931375980377197,grad_norm: 0.8502210492511902, iteration: 329413
loss: 1.0212658643722534,grad_norm: 0.99999905185614, iteration: 329414
loss: 1.0190259218215942,grad_norm: 0.7248003122833622, iteration: 329415
loss: 0.9885038733482361,grad_norm: 0.8979382836065483, iteration: 329416
loss: 0.997732937335968,grad_norm: 0.7626804715620128, iteration: 329417
loss: 1.030411958694458,grad_norm: 0.9412134694533786, iteration: 329418
loss: 1.0227831602096558,grad_norm: 0.9999998391680708, iteration: 329419
loss: 1.0468999147415161,grad_norm: 0.999999712752137, iteration: 329420
loss: 0.9982418417930603,grad_norm: 0.8752012408853885, iteration: 329421
loss: 1.0272505283355713,grad_norm: 0.9647102315734788, iteration: 329422
loss: 1.0823307037353516,grad_norm: 0.8951599629744262, iteration: 329423
loss: 1.010202169418335,grad_norm: 0.7568045235675958, iteration: 329424
loss: 1.0616891384124756,grad_norm: 0.999999772779617, iteration: 329425
loss: 0.9865566492080688,grad_norm: 0.7585774746677023, iteration: 329426
loss: 0.9833172559738159,grad_norm: 0.8209354520216093, iteration: 329427
loss: 0.9999545216560364,grad_norm: 0.9999994597541816, iteration: 329428
loss: 0.9954646825790405,grad_norm: 0.7722488270501596, iteration: 329429
loss: 1.0533536672592163,grad_norm: 0.8054075107201123, iteration: 329430
loss: 0.9798583984375,grad_norm: 0.749303093166659, iteration: 329431
loss: 1.0295741558074951,grad_norm: 0.8683746078881249, iteration: 329432
loss: 0.9982070326805115,grad_norm: 0.780709603548438, iteration: 329433
loss: 0.9800014495849609,grad_norm: 0.9999991274659848, iteration: 329434
loss: 0.990375816822052,grad_norm: 0.9314960911007107, iteration: 329435
loss: 1.0011171102523804,grad_norm: 0.7649810356846765, iteration: 329436
loss: 1.0412538051605225,grad_norm: 0.9027794466890454, iteration: 329437
loss: 1.0472743511199951,grad_norm: 0.9999992608663819, iteration: 329438
loss: 1.1091991662979126,grad_norm: 0.9188266735267415, iteration: 329439
loss: 0.9919173121452332,grad_norm: 0.8014158681577148, iteration: 329440
loss: 0.9871718883514404,grad_norm: 0.9076614792076385, iteration: 329441
loss: 1.0119434595108032,grad_norm: 0.723452965362077, iteration: 329442
loss: 0.9999298453330994,grad_norm: 0.8128811657415579, iteration: 329443
loss: 0.967132031917572,grad_norm: 0.7995823928948261, iteration: 329444
loss: 0.9747108221054077,grad_norm: 0.9999990447779475, iteration: 329445
loss: 1.0042208433151245,grad_norm: 0.751948022597637, iteration: 329446
loss: 1.0108240842819214,grad_norm: 0.7498445126026264, iteration: 329447
loss: 1.02523672580719,grad_norm: 0.8162562284043091, iteration: 329448
loss: 1.0720789432525635,grad_norm: 0.9999997983537017, iteration: 329449
loss: 0.9805418848991394,grad_norm: 0.9332282325799958, iteration: 329450
loss: 0.9704867601394653,grad_norm: 0.8818848738904755, iteration: 329451
loss: 0.9830281734466553,grad_norm: 0.8009563681525839, iteration: 329452
loss: 1.0257329940795898,grad_norm: 0.7439812002050904, iteration: 329453
loss: 1.013662576675415,grad_norm: 0.8032704342993052, iteration: 329454
loss: 1.000208854675293,grad_norm: 0.9535861918295774, iteration: 329455
loss: 1.019938588142395,grad_norm: 0.953774576426185, iteration: 329456
loss: 0.987617015838623,grad_norm: 0.9364799972972389, iteration: 329457
loss: 0.985348641872406,grad_norm: 0.6881085315338811, iteration: 329458
loss: 1.025728464126587,grad_norm: 0.7675575998911022, iteration: 329459
loss: 0.991980791091919,grad_norm: 0.7928080563858705, iteration: 329460
loss: 0.967351496219635,grad_norm: 0.8549243638543598, iteration: 329461
loss: 0.9917356967926025,grad_norm: 0.8522074734281607, iteration: 329462
loss: 1.0053434371948242,grad_norm: 0.8420879941158331, iteration: 329463
loss: 0.9999791383743286,grad_norm: 0.9999992915607799, iteration: 329464
loss: 0.9943367838859558,grad_norm: 0.8238740386668993, iteration: 329465
loss: 0.9845201373100281,grad_norm: 0.8874374422713149, iteration: 329466
loss: 0.9822891354560852,grad_norm: 0.9054632856956704, iteration: 329467
loss: 1.0338099002838135,grad_norm: 0.9999990930253446, iteration: 329468
loss: 0.9872180223464966,grad_norm: 0.8709129453721668, iteration: 329469
loss: 0.9778010249137878,grad_norm: 0.9525675639354442, iteration: 329470
loss: 0.9826719164848328,grad_norm: 0.856381145553799, iteration: 329471
loss: 1.0023512840270996,grad_norm: 0.7297097612334383, iteration: 329472
loss: 0.9956275224685669,grad_norm: 0.9999995514028929, iteration: 329473
loss: 1.0112354755401611,grad_norm: 0.8649957052562719, iteration: 329474
loss: 1.0190519094467163,grad_norm: 0.7660546819073936, iteration: 329475
loss: 0.9715025424957275,grad_norm: 0.899141923623419, iteration: 329476
loss: 1.0356308221817017,grad_norm: 0.9999998255746856, iteration: 329477
loss: 1.0940799713134766,grad_norm: 0.9999995383442982, iteration: 329478
loss: 0.995243489742279,grad_norm: 0.8179057659269645, iteration: 329479
loss: 1.0660196542739868,grad_norm: 0.9999992948195835, iteration: 329480
loss: 0.9741894006729126,grad_norm: 0.9559102978137268, iteration: 329481
loss: 1.0120289325714111,grad_norm: 0.8541448190876553, iteration: 329482
loss: 0.977688193321228,grad_norm: 0.6827147736018858, iteration: 329483
loss: 0.9707241058349609,grad_norm: 0.9999990908512971, iteration: 329484
loss: 1.0325868129730225,grad_norm: 0.9999992878862156, iteration: 329485
loss: 1.0257827043533325,grad_norm: 0.9007615945994824, iteration: 329486
loss: 1.0133311748504639,grad_norm: 0.8735820874329386, iteration: 329487
loss: 0.9768632650375366,grad_norm: 0.9274842916536024, iteration: 329488
loss: 1.006758213043213,grad_norm: 0.7085739748677945, iteration: 329489
loss: 0.9756436944007874,grad_norm: 0.999998945526004, iteration: 329490
loss: 0.9724323153495789,grad_norm: 0.6760650963441374, iteration: 329491
loss: 1.024158000946045,grad_norm: 0.8180329715508448, iteration: 329492
loss: 1.0455743074417114,grad_norm: 0.7886033402701876, iteration: 329493
loss: 0.9862114191055298,grad_norm: 0.8604122926850849, iteration: 329494
loss: 0.9866474866867065,grad_norm: 0.7966265150913574, iteration: 329495
loss: 0.9962878823280334,grad_norm: 0.9999990961254618, iteration: 329496
loss: 1.0109543800354004,grad_norm: 0.8735947692363855, iteration: 329497
loss: 1.1252607107162476,grad_norm: 0.9999991892978514, iteration: 329498
loss: 0.969116747379303,grad_norm: 0.9616317002013545, iteration: 329499
loss: 1.0257295370101929,grad_norm: 1.000000001805567, iteration: 329500
loss: 0.980778157711029,grad_norm: 0.9795598642123039, iteration: 329501
loss: 1.0090776681900024,grad_norm: 0.9024524810240442, iteration: 329502
loss: 1.0532339811325073,grad_norm: 0.8649025734105721, iteration: 329503
loss: 1.0214866399765015,grad_norm: 0.9999991479357362, iteration: 329504
loss: 1.0124255418777466,grad_norm: 0.8754573570977074, iteration: 329505
loss: 0.9864266514778137,grad_norm: 0.999999167578949, iteration: 329506
loss: 1.0073505640029907,grad_norm: 0.939288619514925, iteration: 329507
loss: 0.9910871982574463,grad_norm: 0.8282947415070744, iteration: 329508
loss: 1.0019944906234741,grad_norm: 0.8827324273896172, iteration: 329509
loss: 1.032941460609436,grad_norm: 0.8877961589708612, iteration: 329510
loss: 1.0275236368179321,grad_norm: 0.7973283067796241, iteration: 329511
loss: 0.9952989816665649,grad_norm: 0.8322767065491885, iteration: 329512
loss: 1.059089183807373,grad_norm: 0.9999993130430824, iteration: 329513
loss: 0.980241060256958,grad_norm: 0.7936462524889661, iteration: 329514
loss: 1.058854341506958,grad_norm: 0.9091030762320983, iteration: 329515
loss: 1.044386625289917,grad_norm: 0.8453749919821376, iteration: 329516
loss: 1.0470787286758423,grad_norm: 0.8735490018606643, iteration: 329517
loss: 0.999085009098053,grad_norm: 0.9999996370042684, iteration: 329518
loss: 0.954956591129303,grad_norm: 0.9332040604737237, iteration: 329519
loss: 1.0219910144805908,grad_norm: 0.9999990500842049, iteration: 329520
loss: 0.9777316451072693,grad_norm: 0.8909203325788554, iteration: 329521
loss: 1.0576587915420532,grad_norm: 0.7458880450172257, iteration: 329522
loss: 0.983565628528595,grad_norm: 0.9999991515897854, iteration: 329523
loss: 1.0074222087860107,grad_norm: 0.999999076415876, iteration: 329524
loss: 1.0058550834655762,grad_norm: 0.724889314213378, iteration: 329525
loss: 0.9584680795669556,grad_norm: 0.7700616157004964, iteration: 329526
loss: 0.9907599687576294,grad_norm: 0.9207120483586899, iteration: 329527
loss: 0.9644955992698669,grad_norm: 0.9965770637424823, iteration: 329528
loss: 1.0596351623535156,grad_norm: 0.9999994976010407, iteration: 329529
loss: 1.004733681678772,grad_norm: 0.8740516808100051, iteration: 329530
loss: 0.9925270676612854,grad_norm: 0.9844599104164725, iteration: 329531
loss: 0.9816092252731323,grad_norm: 0.7743842800762546, iteration: 329532
loss: 1.0206490755081177,grad_norm: 0.758821428281337, iteration: 329533
loss: 0.9761170148849487,grad_norm: 0.8746916091520355, iteration: 329534
loss: 1.0093045234680176,grad_norm: 0.7468523470631827, iteration: 329535
loss: 1.0075663328170776,grad_norm: 0.9999989417068723, iteration: 329536
loss: 0.9800587892532349,grad_norm: 0.7335482276049144, iteration: 329537
loss: 0.9958789348602295,grad_norm: 0.8968890741032536, iteration: 329538
loss: 0.968548595905304,grad_norm: 0.7835833169105186, iteration: 329539
loss: 0.9802829027175903,grad_norm: 0.8666493751301242, iteration: 329540
loss: 0.9460662603378296,grad_norm: 0.9999991408328015, iteration: 329541
loss: 1.0069622993469238,grad_norm: 0.8804076534719855, iteration: 329542
loss: 1.0207138061523438,grad_norm: 0.8552930193786387, iteration: 329543
loss: 1.0046416521072388,grad_norm: 0.8207127927363882, iteration: 329544
loss: 0.9978801608085632,grad_norm: 0.8312116594846654, iteration: 329545
loss: 1.0288677215576172,grad_norm: 0.8553201368590289, iteration: 329546
loss: 1.0875468254089355,grad_norm: 0.9999989939020247, iteration: 329547
loss: 1.029675006866455,grad_norm: 0.7353945352199758, iteration: 329548
loss: 1.0085422992706299,grad_norm: 0.9848563777728694, iteration: 329549
loss: 1.0565202236175537,grad_norm: 0.9999993447070928, iteration: 329550
loss: 1.0322847366333008,grad_norm: 0.8367117267255998, iteration: 329551
loss: 0.9843889474868774,grad_norm: 0.8162546916077136, iteration: 329552
loss: 1.0289422273635864,grad_norm: 0.9676468385819398, iteration: 329553
loss: 1.0045812129974365,grad_norm: 0.7661499973598809, iteration: 329554
loss: 0.9488282203674316,grad_norm: 0.9999990829564889, iteration: 329555
loss: 0.987973153591156,grad_norm: 0.8563338169305059, iteration: 329556
loss: 1.011536717414856,grad_norm: 0.9999993984058491, iteration: 329557
loss: 0.9341561198234558,grad_norm: 0.9294876004910934, iteration: 329558
loss: 1.0568904876708984,grad_norm: 0.9999998089563463, iteration: 329559
loss: 0.966143012046814,grad_norm: 0.933612199656797, iteration: 329560
loss: 1.0796581506729126,grad_norm: 0.9416162253249858, iteration: 329561
loss: 1.1722729206085205,grad_norm: 0.999999850452384, iteration: 329562
loss: 1.0079559087753296,grad_norm: 0.9999990140616994, iteration: 329563
loss: 1.023504614830017,grad_norm: 0.7685693769436972, iteration: 329564
loss: 0.9659769535064697,grad_norm: 0.8755183146713207, iteration: 329565
loss: 1.0209226608276367,grad_norm: 0.9999994795621102, iteration: 329566
loss: 0.968683660030365,grad_norm: 0.8818734708800815, iteration: 329567
loss: 1.0373475551605225,grad_norm: 0.999999106319063, iteration: 329568
loss: 0.9713484644889832,grad_norm: 0.8577575861255851, iteration: 329569
loss: 0.9986920356750488,grad_norm: 0.9086655948835026, iteration: 329570
loss: 0.9660075306892395,grad_norm: 0.7308402225017238, iteration: 329571
loss: 1.004346489906311,grad_norm: 0.9999991996031404, iteration: 329572
loss: 1.0202016830444336,grad_norm: 0.972104188421899, iteration: 329573
loss: 0.992548406124115,grad_norm: 0.6984825806953564, iteration: 329574
loss: 0.951663076877594,grad_norm: 0.7204019990214401, iteration: 329575
loss: 0.9888402819633484,grad_norm: 0.8760466075029125, iteration: 329576
loss: 0.97596675157547,grad_norm: 0.7339997914001121, iteration: 329577
loss: 0.955205500125885,grad_norm: 0.8262365297329274, iteration: 329578
loss: 1.0461088418960571,grad_norm: 0.7702118638522821, iteration: 329579
loss: 0.9934565424919128,grad_norm: 0.9649250501112971, iteration: 329580
loss: 1.0151207447052002,grad_norm: 0.9148320044066978, iteration: 329581
loss: 1.0235702991485596,grad_norm: 0.9462719992186734, iteration: 329582
loss: 1.029432773590088,grad_norm: 0.9999992678542796, iteration: 329583
loss: 1.0279854536056519,grad_norm: 0.9896323092599479, iteration: 329584
loss: 1.0162922143936157,grad_norm: 0.9087170855613976, iteration: 329585
loss: 0.9995908141136169,grad_norm: 0.6598619860546392, iteration: 329586
loss: 1.0537805557250977,grad_norm: 0.9999994519399079, iteration: 329587
loss: 0.9888275861740112,grad_norm: 0.8820907165308144, iteration: 329588
loss: 0.9928575158119202,grad_norm: 0.9738401867991158, iteration: 329589
loss: 0.9794203639030457,grad_norm: 0.9137306695380532, iteration: 329590
loss: 1.0138213634490967,grad_norm: 0.99999939178285, iteration: 329591
loss: 1.0080220699310303,grad_norm: 0.9246292750726784, iteration: 329592
loss: 1.058476448059082,grad_norm: 0.9699348941856791, iteration: 329593
loss: 1.0106910467147827,grad_norm: 0.8548010873640229, iteration: 329594
loss: 0.9892030358314514,grad_norm: 0.6898778108394817, iteration: 329595
loss: 1.0661146640777588,grad_norm: 0.7763025185921838, iteration: 329596
loss: 1.0199732780456543,grad_norm: 0.9999992873215832, iteration: 329597
loss: 1.0076521635055542,grad_norm: 0.8762391225138224, iteration: 329598
loss: 1.0072786808013916,grad_norm: 0.9999989604542856, iteration: 329599
loss: 1.0611313581466675,grad_norm: 0.7400591622277118, iteration: 329600
loss: 1.0021848678588867,grad_norm: 0.878582238286812, iteration: 329601
loss: 1.113047480583191,grad_norm: 0.9999993288039394, iteration: 329602
loss: 1.0088900327682495,grad_norm: 0.9420027353996495, iteration: 329603
loss: 1.005154013633728,grad_norm: 0.8165498804346353, iteration: 329604
loss: 1.0018092393875122,grad_norm: 0.8048011307201184, iteration: 329605
loss: 1.0707873106002808,grad_norm: 0.9999998762190625, iteration: 329606
loss: 1.0011074542999268,grad_norm: 0.9999990656525577, iteration: 329607
loss: 1.0346494913101196,grad_norm: 0.8565800548923135, iteration: 329608
loss: 0.9887482523918152,grad_norm: 0.9633325664175205, iteration: 329609
loss: 1.0467383861541748,grad_norm: 0.999999731174984, iteration: 329610
loss: 1.0248574018478394,grad_norm: 0.9397408925784779, iteration: 329611
loss: 0.9885224103927612,grad_norm: 0.8073731206580119, iteration: 329612
loss: 0.9719282388687134,grad_norm: 0.9999990287020502, iteration: 329613
loss: 1.0149438381195068,grad_norm: 0.999999327631151, iteration: 329614
loss: 1.0011473894119263,grad_norm: 0.7525604563625724, iteration: 329615
loss: 1.0351887941360474,grad_norm: 0.999999331093407, iteration: 329616
loss: 0.9929953813552856,grad_norm: 0.8252935964282248, iteration: 329617
loss: 1.0970852375030518,grad_norm: 0.9999998784705225, iteration: 329618
loss: 1.0243091583251953,grad_norm: 0.7923813366547813, iteration: 329619
loss: 0.9800320863723755,grad_norm: 0.927573376356811, iteration: 329620
loss: 1.0419920682907104,grad_norm: 0.9999992543751535, iteration: 329621
loss: 1.018592357635498,grad_norm: 0.9028334716915374, iteration: 329622
loss: 1.006091833114624,grad_norm: 0.8542813135406463, iteration: 329623
loss: 0.9789212346076965,grad_norm: 0.7436835480661651, iteration: 329624
loss: 0.9880500435829163,grad_norm: 0.7537593979985968, iteration: 329625
loss: 1.0643093585968018,grad_norm: 0.8316416795875442, iteration: 329626
loss: 1.0177351236343384,grad_norm: 0.9999997329014975, iteration: 329627
loss: 1.011969804763794,grad_norm: 0.9999996130296687, iteration: 329628
loss: 1.0180938243865967,grad_norm: 0.7891090941539748, iteration: 329629
loss: 0.9972145557403564,grad_norm: 0.951634629019057, iteration: 329630
loss: 1.096584439277649,grad_norm: 0.999999766819023, iteration: 329631
loss: 1.0849288702011108,grad_norm: 0.9999997559518773, iteration: 329632
loss: 0.9862126708030701,grad_norm: 0.6832369855977911, iteration: 329633
loss: 1.0133709907531738,grad_norm: 0.8888216833983177, iteration: 329634
loss: 0.9538900852203369,grad_norm: 0.99999906331285, iteration: 329635
loss: 0.9710726737976074,grad_norm: 0.8361214084974411, iteration: 329636
loss: 0.9847050309181213,grad_norm: 0.9999989930903744, iteration: 329637
loss: 0.964152455329895,grad_norm: 0.7101596404810266, iteration: 329638
loss: 1.0266178846359253,grad_norm: 0.7983273962135891, iteration: 329639
loss: 0.9871296882629395,grad_norm: 0.7709908115323258, iteration: 329640
loss: 0.9989941120147705,grad_norm: 0.7713337420607341, iteration: 329641
loss: 0.9978256225585938,grad_norm: 0.8084511333850781, iteration: 329642
loss: 0.9682472348213196,grad_norm: 0.9090629567510853, iteration: 329643
loss: 1.0318965911865234,grad_norm: 0.9999991305970249, iteration: 329644
loss: 1.0036673545837402,grad_norm: 0.9612641589774694, iteration: 329645
loss: 0.9998548030853271,grad_norm: 0.8715914255257612, iteration: 329646
loss: 1.0249453783035278,grad_norm: 0.7635614621317593, iteration: 329647
loss: 1.0016685724258423,grad_norm: 0.8696460916322686, iteration: 329648
loss: 1.0412163734436035,grad_norm: 0.7747456449144772, iteration: 329649
loss: 1.0395703315734863,grad_norm: 0.8642658296280245, iteration: 329650
loss: 1.023354411125183,grad_norm: 0.9359568564603525, iteration: 329651
loss: 1.0178003311157227,grad_norm: 0.9094143224151288, iteration: 329652
loss: 0.9651026129722595,grad_norm: 0.794115632960435, iteration: 329653
loss: 0.9719023108482361,grad_norm: 0.9850543188742651, iteration: 329654
loss: 1.0076513290405273,grad_norm: 0.7420558329605044, iteration: 329655
loss: 0.9791970252990723,grad_norm: 0.9999991905274641, iteration: 329656
loss: 1.0400605201721191,grad_norm: 0.9999999254719711, iteration: 329657
loss: 0.9982728958129883,grad_norm: 0.9676812149543392, iteration: 329658
loss: 1.018352746963501,grad_norm: 0.8677785656876625, iteration: 329659
loss: 0.9928981065750122,grad_norm: 0.8859434806219905, iteration: 329660
loss: 1.0366175174713135,grad_norm: 0.9999990890345737, iteration: 329661
loss: 0.9679089784622192,grad_norm: 0.7189929422198994, iteration: 329662
loss: 0.9982236623764038,grad_norm: 0.8784653766809895, iteration: 329663
loss: 0.9859891533851624,grad_norm: 0.8032604330970368, iteration: 329664
loss: 1.0036529302597046,grad_norm: 0.6956295284425981, iteration: 329665
loss: 1.0509775876998901,grad_norm: 0.9999998968972417, iteration: 329666
loss: 1.0067145824432373,grad_norm: 0.9010030568104972, iteration: 329667
loss: 0.9859237670898438,grad_norm: 0.7930020667447659, iteration: 329668
loss: 0.9893251657485962,grad_norm: 0.8083230634992553, iteration: 329669
loss: 1.0595234632492065,grad_norm: 0.9656026166789251, iteration: 329670
loss: 1.0375375747680664,grad_norm: 0.9999997731094468, iteration: 329671
loss: 1.0176984071731567,grad_norm: 0.8638532826564781, iteration: 329672
loss: 1.0108619928359985,grad_norm: 0.8603855254060367, iteration: 329673
loss: 1.1658612489700317,grad_norm: 0.9999992648162133, iteration: 329674
loss: 0.9694104194641113,grad_norm: 0.6944964902752995, iteration: 329675
loss: 0.9897060394287109,grad_norm: 0.999999028894383, iteration: 329676
loss: 0.9713674783706665,grad_norm: 0.7931988638589732, iteration: 329677
loss: 0.9967136979103088,grad_norm: 0.8020824335679174, iteration: 329678
loss: 1.0232665538787842,grad_norm: 0.8510625537573258, iteration: 329679
loss: 1.0013681650161743,grad_norm: 0.9086077248171519, iteration: 329680
loss: 1.0006952285766602,grad_norm: 0.8619819167018867, iteration: 329681
loss: 1.0078803300857544,grad_norm: 0.7833527777609852, iteration: 329682
loss: 0.9856401085853577,grad_norm: 0.7438094809433907, iteration: 329683
loss: 1.0169892311096191,grad_norm: 0.9171607098905151, iteration: 329684
loss: 0.9769344925880432,grad_norm: 0.9999995346347623, iteration: 329685
loss: 1.009316325187683,grad_norm: 0.9999996186711735, iteration: 329686
loss: 0.9763575196266174,grad_norm: 0.8694884156291377, iteration: 329687
loss: 1.002118468284607,grad_norm: 0.8281227057732342, iteration: 329688
loss: 1.0047225952148438,grad_norm: 0.8336728202993204, iteration: 329689
loss: 0.9597520232200623,grad_norm: 0.8218857942744384, iteration: 329690
loss: 1.0030789375305176,grad_norm: 0.996733982383406, iteration: 329691
loss: 1.037585735321045,grad_norm: 0.9999990308537138, iteration: 329692
loss: 0.9944737553596497,grad_norm: 0.8221858332487989, iteration: 329693
loss: 1.033642053604126,grad_norm: 0.9178003550166901, iteration: 329694
loss: 1.0313799381256104,grad_norm: 0.8586651337549887, iteration: 329695
loss: 1.0694561004638672,grad_norm: 0.8651577452906695, iteration: 329696
loss: 0.9969272017478943,grad_norm: 0.9999996192654044, iteration: 329697
loss: 1.0111936330795288,grad_norm: 0.9486895462466702, iteration: 329698
loss: 1.02689528465271,grad_norm: 0.7944763462369826, iteration: 329699
loss: 1.0053890943527222,grad_norm: 0.780726138243434, iteration: 329700
loss: 1.0541551113128662,grad_norm: 0.999999644846176, iteration: 329701
loss: 0.9788538217544556,grad_norm: 0.8010186671308167, iteration: 329702
loss: 0.9845231175422668,grad_norm: 0.8880502250860095, iteration: 329703
loss: 0.9881930351257324,grad_norm: 0.9080438217530333, iteration: 329704
loss: 1.0509445667266846,grad_norm: 0.9999991272565426, iteration: 329705
loss: 1.09404718875885,grad_norm: 0.9999997300981722, iteration: 329706
loss: 1.0034409761428833,grad_norm: 0.7519642944599302, iteration: 329707
loss: 1.0430662631988525,grad_norm: 0.9381818910084538, iteration: 329708
loss: 1.1824631690979004,grad_norm: 0.999999845350588, iteration: 329709
loss: 0.9796651601791382,grad_norm: 0.9999991209582016, iteration: 329710
loss: 1.059464454650879,grad_norm: 0.9999992785719899, iteration: 329711
loss: 0.9679293036460876,grad_norm: 0.7485122823619333, iteration: 329712
loss: 0.9782872200012207,grad_norm: 0.9999990244231766, iteration: 329713
loss: 0.9607076048851013,grad_norm: 0.9999997103922916, iteration: 329714
loss: 1.0188039541244507,grad_norm: 0.9998393444654632, iteration: 329715
loss: 0.9866397380828857,grad_norm: 0.8670104563340716, iteration: 329716
loss: 0.9593571424484253,grad_norm: 0.8577220669154751, iteration: 329717
loss: 0.9690074324607849,grad_norm: 0.8502736155781213, iteration: 329718
loss: 1.0172299146652222,grad_norm: 0.8543145842372747, iteration: 329719
loss: 0.9659718871116638,grad_norm: 0.7303814805121805, iteration: 329720
loss: 1.0208336114883423,grad_norm: 0.8686021327447206, iteration: 329721
loss: 1.0292418003082275,grad_norm: 0.9999997986671263, iteration: 329722
loss: 1.1018767356872559,grad_norm: 0.999999982430507, iteration: 329723
loss: 0.9730363488197327,grad_norm: 0.993968725058161, iteration: 329724
loss: 1.0524848699569702,grad_norm: 0.9999996524673423, iteration: 329725
loss: 0.9812330007553101,grad_norm: 0.8828730403756024, iteration: 329726
loss: 0.9785597324371338,grad_norm: 0.8188768487395545, iteration: 329727
loss: 0.9981833696365356,grad_norm: 0.7851444760223991, iteration: 329728
loss: 0.9947717785835266,grad_norm: 0.9514145507530554, iteration: 329729
loss: 1.016319990158081,grad_norm: 0.999999145066942, iteration: 329730
loss: 1.0009069442749023,grad_norm: 0.9286277413494304, iteration: 329731
loss: 0.9737934470176697,grad_norm: 0.9580441069383991, iteration: 329732
loss: 1.0199973583221436,grad_norm: 0.6580022204124785, iteration: 329733
loss: 1.0233391523361206,grad_norm: 0.8759184049400165, iteration: 329734
loss: 1.0103046894073486,grad_norm: 0.9999993040223312, iteration: 329735
loss: 0.9740160703659058,grad_norm: 0.7586604357992595, iteration: 329736
loss: 1.022100806236267,grad_norm: 0.8918863454440833, iteration: 329737
loss: 0.9932065606117249,grad_norm: 0.9989398643100091, iteration: 329738
loss: 1.010583519935608,grad_norm: 0.8284671384767178, iteration: 329739
loss: 1.0053210258483887,grad_norm: 0.8633438249560013, iteration: 329740
loss: 1.0147145986557007,grad_norm: 0.9887292594598337, iteration: 329741
loss: 0.9952305555343628,grad_norm: 0.9999991007634358, iteration: 329742
loss: 0.9831922054290771,grad_norm: 0.8956986973725959, iteration: 329743
loss: 0.9747155904769897,grad_norm: 0.9999990954260399, iteration: 329744
loss: 0.9806904196739197,grad_norm: 0.7945799440072882, iteration: 329745
loss: 1.017090082168579,grad_norm: 0.821261791594244, iteration: 329746
loss: 1.0028988122940063,grad_norm: 0.9679254490203286, iteration: 329747
loss: 1.026110053062439,grad_norm: 0.9720090053762827, iteration: 329748
loss: 0.9976717233657837,grad_norm: 0.7204781223649582, iteration: 329749
loss: 1.008942723274231,grad_norm: 0.9999991024300484, iteration: 329750
loss: 1.0021705627441406,grad_norm: 0.9046543769997454, iteration: 329751
loss: 1.0523791313171387,grad_norm: 0.9999998629860701, iteration: 329752
loss: 1.0301116704940796,grad_norm: 0.8158843695923266, iteration: 329753
loss: 0.9712953567504883,grad_norm: 0.8431489364613107, iteration: 329754
loss: 1.0227032899856567,grad_norm: 0.8561035067130311, iteration: 329755
loss: 1.0048608779907227,grad_norm: 0.999999143450052, iteration: 329756
loss: 1.0252381563186646,grad_norm: 0.7153507865361116, iteration: 329757
loss: 1.0312285423278809,grad_norm: 0.7894865918353855, iteration: 329758
loss: 1.0425852537155151,grad_norm: 0.9999992623200212, iteration: 329759
loss: 1.0044536590576172,grad_norm: 0.6647091812654711, iteration: 329760
loss: 1.0123111009597778,grad_norm: 0.9999999125157459, iteration: 329761
loss: 0.9631451964378357,grad_norm: 0.9364796129555466, iteration: 329762
loss: 1.0261523723602295,grad_norm: 0.9999992741307161, iteration: 329763
loss: 1.0073410272598267,grad_norm: 0.9999995616472751, iteration: 329764
loss: 0.9795242547988892,grad_norm: 0.8677592631724625, iteration: 329765
loss: 1.0073542594909668,grad_norm: 0.7080773393560448, iteration: 329766
loss: 0.9955301284790039,grad_norm: 0.9999992700410639, iteration: 329767
loss: 1.0151983499526978,grad_norm: 0.9599973157838408, iteration: 329768
loss: 0.9672738313674927,grad_norm: 0.790885368403373, iteration: 329769
loss: 0.9879250526428223,grad_norm: 0.9999996195012838, iteration: 329770
loss: 0.9964369535446167,grad_norm: 0.9791692736541944, iteration: 329771
loss: 0.9772412776947021,grad_norm: 0.8018546600913582, iteration: 329772
loss: 1.0222724676132202,grad_norm: 0.8842770285594277, iteration: 329773
loss: 1.002273678779602,grad_norm: 0.9999992670399261, iteration: 329774
loss: 1.0047435760498047,grad_norm: 0.9999999474081053, iteration: 329775
loss: 0.9793604612350464,grad_norm: 0.8634133631872756, iteration: 329776
loss: 1.1401910781860352,grad_norm: 0.9999999194013847, iteration: 329777
loss: 1.0020434856414795,grad_norm: 0.8713803832108631, iteration: 329778
loss: 1.0384294986724854,grad_norm: 0.7995903861521364, iteration: 329779
loss: 1.0098282098770142,grad_norm: 0.7277338568136348, iteration: 329780
loss: 1.031773567199707,grad_norm: 0.8765449426860421, iteration: 329781
loss: 0.9924774169921875,grad_norm: 0.7610688973564088, iteration: 329782
loss: 1.0173439979553223,grad_norm: 0.9335389952319443, iteration: 329783
loss: 1.0036635398864746,grad_norm: 0.9914736682678754, iteration: 329784
loss: 0.997341513633728,grad_norm: 0.8072660012298477, iteration: 329785
loss: 1.014502763748169,grad_norm: 0.9083988818781566, iteration: 329786
loss: 0.9982972145080566,grad_norm: 0.8381788058318752, iteration: 329787
loss: 0.999921441078186,grad_norm: 0.7606472667297328, iteration: 329788
loss: 0.9681307077407837,grad_norm: 0.8718439735720604, iteration: 329789
loss: 1.0108416080474854,grad_norm: 0.765737065257857, iteration: 329790
loss: 0.9513265490531921,grad_norm: 0.7183403650179705, iteration: 329791
loss: 1.0104669332504272,grad_norm: 0.9084760609864858, iteration: 329792
loss: 1.037913203239441,grad_norm: 0.9361468577470352, iteration: 329793
loss: 0.9856883883476257,grad_norm: 0.9999990441353156, iteration: 329794
loss: 1.0184834003448486,grad_norm: 0.999999884562941, iteration: 329795
loss: 1.0166032314300537,grad_norm: 0.7758661278593646, iteration: 329796
loss: 0.9709281921386719,grad_norm: 0.9999993185285424, iteration: 329797
loss: 0.9985388517379761,grad_norm: 0.8017684234568245, iteration: 329798
loss: 1.0148448944091797,grad_norm: 0.9999999273024474, iteration: 329799
loss: 0.981728196144104,grad_norm: 0.7398708531812579, iteration: 329800
loss: 0.9899031519889832,grad_norm: 0.7385927648347033, iteration: 329801
loss: 0.9954323172569275,grad_norm: 0.8380688780587547, iteration: 329802
loss: 0.9901686906814575,grad_norm: 0.7809189584340144, iteration: 329803
loss: 1.0210580825805664,grad_norm: 0.937186317601851, iteration: 329804
loss: 1.0254664421081543,grad_norm: 0.9168791546540056, iteration: 329805
loss: 1.0420948266983032,grad_norm: 0.9999990506979393, iteration: 329806
loss: 1.0273280143737793,grad_norm: 0.732413863451882, iteration: 329807
loss: 1.0204877853393555,grad_norm: 0.9057061148832085, iteration: 329808
loss: 1.0005912780761719,grad_norm: 0.8558921052524203, iteration: 329809
loss: 1.0089799165725708,grad_norm: 0.7928005352538271, iteration: 329810
loss: 1.0086660385131836,grad_norm: 0.6735774215411711, iteration: 329811
loss: 1.0202361345291138,grad_norm: 0.7990142661897017, iteration: 329812
loss: 1.015103816986084,grad_norm: 0.9999990175212621, iteration: 329813
loss: 0.9762397408485413,grad_norm: 0.837669343961319, iteration: 329814
loss: 0.9909343719482422,grad_norm: 0.812029065047294, iteration: 329815
loss: 0.9598247408866882,grad_norm: 0.7954781432810909, iteration: 329816
loss: 0.9946170449256897,grad_norm: 0.9026158053469224, iteration: 329817
loss: 1.0150549411773682,grad_norm: 0.7731593546780917, iteration: 329818
loss: 1.0015888214111328,grad_norm: 0.6883036477605008, iteration: 329819
loss: 1.001988410949707,grad_norm: 0.7767703054162346, iteration: 329820
loss: 1.0040665864944458,grad_norm: 0.7679706642051003, iteration: 329821
loss: 1.0384262800216675,grad_norm: 0.7458645015183417, iteration: 329822
loss: 1.0212445259094238,grad_norm: 0.8136096663897346, iteration: 329823
loss: 1.0131136178970337,grad_norm: 0.7928352188181512, iteration: 329824
loss: 0.9604059457778931,grad_norm: 0.8483819377751, iteration: 329825
loss: 1.0009502172470093,grad_norm: 0.786546080893487, iteration: 329826
loss: 1.0181044340133667,grad_norm: 0.8507297042173058, iteration: 329827
loss: 0.986605703830719,grad_norm: 0.9999994061773675, iteration: 329828
loss: 0.9988197684288025,grad_norm: 0.895720734343864, iteration: 329829
loss: 0.9942557215690613,grad_norm: 0.7312244749034778, iteration: 329830
loss: 0.9838148355484009,grad_norm: 0.9584844292421255, iteration: 329831
loss: 1.0139521360397339,grad_norm: 0.9500465298555195, iteration: 329832
loss: 0.9860973358154297,grad_norm: 0.8425525077390632, iteration: 329833
loss: 0.9670133590698242,grad_norm: 0.8103436194278343, iteration: 329834
loss: 0.9417421221733093,grad_norm: 0.8045879263221162, iteration: 329835
loss: 1.011704683303833,grad_norm: 0.9282491617087901, iteration: 329836
loss: 0.9967436194419861,grad_norm: 0.9999990947207806, iteration: 329837
loss: 0.9957020878791809,grad_norm: 0.7676824682118079, iteration: 329838
loss: 1.0037661790847778,grad_norm: 0.7662899385652674, iteration: 329839
loss: 1.0350621938705444,grad_norm: 0.9999991438519908, iteration: 329840
loss: 1.0225902795791626,grad_norm: 0.8964889783803563, iteration: 329841
loss: 1.004787802696228,grad_norm: 0.8618460951168976, iteration: 329842
loss: 1.010738492012024,grad_norm: 0.8788938465592384, iteration: 329843
loss: 1.0129799842834473,grad_norm: 0.7812585667417146, iteration: 329844
loss: 1.1027185916900635,grad_norm: 0.9226463377503942, iteration: 329845
loss: 1.0557382106781006,grad_norm: 0.9999991632468894, iteration: 329846
loss: 1.0838204622268677,grad_norm: 0.8055684286874119, iteration: 329847
loss: 0.9831068515777588,grad_norm: 0.9999990726848638, iteration: 329848
loss: 1.0900871753692627,grad_norm: 0.99999913467778, iteration: 329849
loss: 0.9759439826011658,grad_norm: 0.7891268742139486, iteration: 329850
loss: 0.9740681052207947,grad_norm: 0.9555909712672844, iteration: 329851
loss: 1.0314233303070068,grad_norm: 0.8991171998149317, iteration: 329852
loss: 0.9938214421272278,grad_norm: 0.8370083406492569, iteration: 329853
loss: 0.9778058528900146,grad_norm: 0.9999992445488464, iteration: 329854
loss: 1.0004676580429077,grad_norm: 0.8925077131930544, iteration: 329855
loss: 0.9970865845680237,grad_norm: 0.7923478934162212, iteration: 329856
loss: 1.0408883094787598,grad_norm: 0.9999992503146798, iteration: 329857
loss: 1.0162454843521118,grad_norm: 0.8795887293921263, iteration: 329858
loss: 1.0164684057235718,grad_norm: 0.8721921944417996, iteration: 329859
loss: 1.0066527128219604,grad_norm: 0.9389674253224056, iteration: 329860
loss: 0.9681695103645325,grad_norm: 0.753658946069823, iteration: 329861
loss: 0.9549669027328491,grad_norm: 0.9999990029735821, iteration: 329862
loss: 1.1766153573989868,grad_norm: 0.9999997166861039, iteration: 329863
loss: 0.9937979578971863,grad_norm: 0.8544053906935222, iteration: 329864
loss: 1.023442029953003,grad_norm: 0.8172281984166304, iteration: 329865
loss: 0.9952588677406311,grad_norm: 0.862063657847227, iteration: 329866
loss: 1.0168726444244385,grad_norm: 0.8352625256334352, iteration: 329867
loss: 0.9901131987571716,grad_norm: 0.8086756206012935, iteration: 329868
loss: 1.002525806427002,grad_norm: 0.9999992306696243, iteration: 329869
loss: 1.0473918914794922,grad_norm: 0.9050970808036732, iteration: 329870
loss: 1.036728858947754,grad_norm: 0.9114130635779084, iteration: 329871
loss: 1.0168867111206055,grad_norm: 0.7882887384133199, iteration: 329872
loss: 0.9632752537727356,grad_norm: 0.8733037580495918, iteration: 329873
loss: 1.0428816080093384,grad_norm: 0.7836378316807919, iteration: 329874
loss: 0.9985471367835999,grad_norm: 0.9522436520610219, iteration: 329875
loss: 1.0278022289276123,grad_norm: 0.8648248413371484, iteration: 329876
loss: 0.9854702353477478,grad_norm: 0.8417912991334232, iteration: 329877
loss: 1.0098676681518555,grad_norm: 0.8689616987627617, iteration: 329878
loss: 0.9730625152587891,grad_norm: 0.8545407090818581, iteration: 329879
loss: 0.9528313279151917,grad_norm: 0.999999045772322, iteration: 329880
loss: 1.0083868503570557,grad_norm: 0.9999991643243666, iteration: 329881
loss: 1.0503486394882202,grad_norm: 0.7998341061645806, iteration: 329882
loss: 0.9957866072654724,grad_norm: 0.8489602819589017, iteration: 329883
loss: 1.0303233861923218,grad_norm: 0.897793438828906, iteration: 329884
loss: 1.0082803964614868,grad_norm: 0.7625691194882709, iteration: 329885
loss: 1.0031498670578003,grad_norm: 0.8418246031018564, iteration: 329886
loss: 0.9990812540054321,grad_norm: 0.8719574516250733, iteration: 329887
loss: 0.9942648410797119,grad_norm: 0.8782242013473218, iteration: 329888
loss: 0.9946470260620117,grad_norm: 0.9999999617051986, iteration: 329889
loss: 1.012730360031128,grad_norm: 0.9767722435306126, iteration: 329890
loss: 0.987643301486969,grad_norm: 0.9457475857395132, iteration: 329891
loss: 1.025710105895996,grad_norm: 0.8794475694626097, iteration: 329892
loss: 1.01353919506073,grad_norm: 0.6961526749022817, iteration: 329893
loss: 0.9771410822868347,grad_norm: 0.9290960442108274, iteration: 329894
loss: 1.0100510120391846,grad_norm: 0.8736719678597169, iteration: 329895
loss: 0.9984630346298218,grad_norm: 0.9909153688802752, iteration: 329896
loss: 1.0181881189346313,grad_norm: 0.8119135006057625, iteration: 329897
loss: 1.0004162788391113,grad_norm: 0.729141869365403, iteration: 329898
loss: 0.9804279208183289,grad_norm: 0.7063968211213391, iteration: 329899
loss: 0.9896870255470276,grad_norm: 0.7253752168977589, iteration: 329900
loss: 1.0044293403625488,grad_norm: 0.821414335867377, iteration: 329901
loss: 1.1125668287277222,grad_norm: 0.999999710898382, iteration: 329902
loss: 0.9714000821113586,grad_norm: 0.8077658167990799, iteration: 329903
loss: 1.0016964673995972,grad_norm: 0.6945275392660789, iteration: 329904
loss: 0.9890487790107727,grad_norm: 0.8841581913309331, iteration: 329905
loss: 0.987113893032074,grad_norm: 0.7349100394387817, iteration: 329906
loss: 1.0333689451217651,grad_norm: 0.7661561232864396, iteration: 329907
loss: 0.9885760545730591,grad_norm: 0.8924145086101366, iteration: 329908
loss: 1.0914770364761353,grad_norm: 0.9999998127724654, iteration: 329909
loss: 0.9792224168777466,grad_norm: 0.9031864584537965, iteration: 329910
loss: 0.9984664916992188,grad_norm: 0.9999993645702843, iteration: 329911
loss: 1.0107063055038452,grad_norm: 0.9043525065232476, iteration: 329912
loss: 1.0111393928527832,grad_norm: 0.868248158940154, iteration: 329913
loss: 0.9816954135894775,grad_norm: 0.9584080621959503, iteration: 329914
loss: 1.0217701196670532,grad_norm: 0.7778348275830655, iteration: 329915
loss: 1.0122184753417969,grad_norm: 0.9999997368324897, iteration: 329916
loss: 1.0038036108016968,grad_norm: 0.9999995759518238, iteration: 329917
loss: 0.9402337074279785,grad_norm: 0.8880592073774677, iteration: 329918
loss: 1.0273491144180298,grad_norm: 0.8652754137268387, iteration: 329919
loss: 0.9904822707176208,grad_norm: 0.8930210659909515, iteration: 329920
loss: 0.9894014596939087,grad_norm: 0.7401043606451879, iteration: 329921
loss: 1.026283860206604,grad_norm: 0.8646226681792656, iteration: 329922
loss: 0.9827730655670166,grad_norm: 0.9125210544912816, iteration: 329923
loss: 1.0339337587356567,grad_norm: 0.8949174730276112, iteration: 329924
loss: 1.0413672924041748,grad_norm: 0.999999197916192, iteration: 329925
loss: 1.0432087182998657,grad_norm: 0.8825686705530481, iteration: 329926
loss: 0.9742164015769958,grad_norm: 0.9999989264717815, iteration: 329927
loss: 0.988741397857666,grad_norm: 0.925202831945935, iteration: 329928
loss: 1.0233136415481567,grad_norm: 0.7549663028077621, iteration: 329929
loss: 0.9739848375320435,grad_norm: 0.930591771223216, iteration: 329930
loss: 0.960180938243866,grad_norm: 0.9765403533375918, iteration: 329931
loss: 0.9987725615501404,grad_norm: 0.9863385663600648, iteration: 329932
loss: 1.0008132457733154,grad_norm: 0.9999995280556488, iteration: 329933
loss: 0.9911282062530518,grad_norm: 0.8263764626150116, iteration: 329934
loss: 1.0398526191711426,grad_norm: 0.8598301823498209, iteration: 329935
loss: 0.96673184633255,grad_norm: 0.7536875753473466, iteration: 329936
loss: 1.0220026969909668,grad_norm: 0.9999995301158504, iteration: 329937
loss: 0.9558810591697693,grad_norm: 0.7359999439554371, iteration: 329938
loss: 0.9939653873443604,grad_norm: 0.927184716640314, iteration: 329939
loss: 0.9755301475524902,grad_norm: 0.7820276725916009, iteration: 329940
loss: 0.9983955025672913,grad_norm: 0.8251279607527215, iteration: 329941
loss: 1.0283606052398682,grad_norm: 0.9780675341172567, iteration: 329942
loss: 1.0009288787841797,grad_norm: 0.8907637371245378, iteration: 329943
loss: 1.0017695426940918,grad_norm: 0.7069332527482521, iteration: 329944
loss: 0.9951992034912109,grad_norm: 0.7304961474403686, iteration: 329945
loss: 1.0183039903640747,grad_norm: 0.9999994075186773, iteration: 329946
loss: 1.0143338441848755,grad_norm: 0.7839614442118862, iteration: 329947
loss: 0.9781419634819031,grad_norm: 0.9208689917706047, iteration: 329948
loss: 0.9988306164741516,grad_norm: 0.8664832317710961, iteration: 329949
loss: 1.0221242904663086,grad_norm: 0.999780446665791, iteration: 329950
loss: 0.9774670004844666,grad_norm: 0.9999991392278863, iteration: 329951
loss: 1.0045198202133179,grad_norm: 0.9999994076490727, iteration: 329952
loss: 1.0772842168807983,grad_norm: 0.9999998642816615, iteration: 329953
loss: 0.9625515341758728,grad_norm: 0.9999999299072673, iteration: 329954
loss: 0.9844063520431519,grad_norm: 0.9421324084022017, iteration: 329955
loss: 0.9962286949157715,grad_norm: 0.8637379960279289, iteration: 329956
loss: 1.1074362993240356,grad_norm: 0.9999996690280328, iteration: 329957
loss: 0.9823944568634033,grad_norm: 0.7438166523328549, iteration: 329958
loss: 1.00351881980896,grad_norm: 0.8445466933700839, iteration: 329959
loss: 1.046800971031189,grad_norm: 0.9999999371175902, iteration: 329960
loss: 1.0324528217315674,grad_norm: 0.7468929143294585, iteration: 329961
loss: 1.1171764135360718,grad_norm: 0.7607709877849654, iteration: 329962
loss: 0.9806463718414307,grad_norm: 0.7053534448071342, iteration: 329963
loss: 1.1545512676239014,grad_norm: 0.9311764707513447, iteration: 329964
loss: 1.0343533754348755,grad_norm: 0.8389326040899937, iteration: 329965
loss: 0.989702045917511,grad_norm: 0.9963154742309679, iteration: 329966
loss: 0.9955744743347168,grad_norm: 0.8912747392745738, iteration: 329967
loss: 0.999094545841217,grad_norm: 0.9999992024270944, iteration: 329968
loss: 1.0466026067733765,grad_norm: 0.9999991987584852, iteration: 329969
loss: 1.0069007873535156,grad_norm: 0.7227921702044497, iteration: 329970
loss: 0.9952698945999146,grad_norm: 0.8344762156587512, iteration: 329971
loss: 0.9886108636856079,grad_norm: 0.9999996812400634, iteration: 329972
loss: 1.0080437660217285,grad_norm: 0.8138997423500004, iteration: 329973
loss: 1.015018105506897,grad_norm: 0.9615836164026805, iteration: 329974
loss: 1.0268133878707886,grad_norm: 0.783750150009937, iteration: 329975
loss: 0.9969437718391418,grad_norm: 0.8771991335556706, iteration: 329976
loss: 1.0115994215011597,grad_norm: 0.8635718855638278, iteration: 329977
loss: 1.0065466165542603,grad_norm: 0.8636537930025862, iteration: 329978
loss: 1.0067176818847656,grad_norm: 0.9217952120418288, iteration: 329979
loss: 0.9674868583679199,grad_norm: 0.8090924761269969, iteration: 329980
loss: 1.013847827911377,grad_norm: 0.8447583553157032, iteration: 329981
loss: 0.9991688132286072,grad_norm: 0.8101043258740755, iteration: 329982
loss: 1.0178478956222534,grad_norm: 0.9999991843199835, iteration: 329983
loss: 1.0064172744750977,grad_norm: 0.8531521382820205, iteration: 329984
loss: 1.0180152654647827,grad_norm: 0.7856198800415171, iteration: 329985
loss: 1.123945951461792,grad_norm: 0.9999995953551183, iteration: 329986
loss: 0.9935089945793152,grad_norm: 0.8890048287390633, iteration: 329987
loss: 0.9826734662055969,grad_norm: 0.7458669372218669, iteration: 329988
loss: 1.0009570121765137,grad_norm: 0.9999991569735973, iteration: 329989
loss: 0.9974805116653442,grad_norm: 0.9587869962098508, iteration: 329990
loss: 1.0029221773147583,grad_norm: 0.8552193089535889, iteration: 329991
loss: 1.0218369960784912,grad_norm: 0.767959068153863, iteration: 329992
loss: 0.9945424795150757,grad_norm: 0.6984632945055811, iteration: 329993
loss: 1.0359539985656738,grad_norm: 0.9642924044819353, iteration: 329994
loss: 1.0346966981887817,grad_norm: 0.8304282892953608, iteration: 329995
loss: 1.0136408805847168,grad_norm: 0.8646166043282534, iteration: 329996
loss: 0.989015519618988,grad_norm: 0.807390143073578, iteration: 329997
loss: 1.0246169567108154,grad_norm: 0.853563939404573, iteration: 329998
loss: 1.0112227201461792,grad_norm: 0.8513278751849436, iteration: 329999
loss: 1.0357449054718018,grad_norm: 0.9253531089173175, iteration: 330000
Evaluating at step 330000
{'val': 0.9945234283804893, 'test': 2.3970723731068375}
loss: 1.0422590970993042,grad_norm: 0.8475908798220115, iteration: 330001
loss: 0.9769207835197449,grad_norm: 0.7637050203310299, iteration: 330002
loss: 1.0299677848815918,grad_norm: 0.8214429249057907, iteration: 330003
loss: 1.0031006336212158,grad_norm: 0.875036962497278, iteration: 330004
loss: 1.079671025276184,grad_norm: 0.9798493502078757, iteration: 330005
loss: 1.0014760494232178,grad_norm: 0.8395188054880538, iteration: 330006
loss: 0.9982222318649292,grad_norm: 0.8497191719277465, iteration: 330007
loss: 1.0606515407562256,grad_norm: 0.9999991346703051, iteration: 330008
loss: 0.9691805243492126,grad_norm: 0.8724139901615162, iteration: 330009
loss: 1.011035442352295,grad_norm: 0.9346200678411316, iteration: 330010
loss: 0.9956259727478027,grad_norm: 0.7909940718397679, iteration: 330011
loss: 1.0043869018554688,grad_norm: 0.9743387446081144, iteration: 330012
loss: 0.9856185913085938,grad_norm: 0.9999989892542779, iteration: 330013
loss: 1.0548605918884277,grad_norm: 0.999999357223387, iteration: 330014
loss: 0.9958829283714294,grad_norm: 0.9999992139877587, iteration: 330015
loss: 1.0458420515060425,grad_norm: 0.9999995469027587, iteration: 330016
loss: 1.0016902685165405,grad_norm: 0.9180224462030401, iteration: 330017
loss: 1.0219601392745972,grad_norm: 0.7783319402629203, iteration: 330018
loss: 0.9901686310768127,grad_norm: 0.8075910421889091, iteration: 330019
loss: 0.9717416167259216,grad_norm: 0.9999994599498597, iteration: 330020
loss: 1.0197901725769043,grad_norm: 0.8492905498373846, iteration: 330021
loss: 1.0412006378173828,grad_norm: 0.7776674922333221, iteration: 330022
loss: 0.9926729798316956,grad_norm: 0.7813033779328903, iteration: 330023
loss: 1.0377671718597412,grad_norm: 0.8976229558043121, iteration: 330024
loss: 0.9702698588371277,grad_norm: 0.9933594330192825, iteration: 330025
loss: 1.0408514738082886,grad_norm: 0.9999998119357968, iteration: 330026
loss: 0.9460510611534119,grad_norm: 0.8515625242190802, iteration: 330027
loss: 1.0170286893844604,grad_norm: 0.9999992013570548, iteration: 330028
loss: 0.9869229197502136,grad_norm: 0.8327359499211564, iteration: 330029
loss: 1.0272516012191772,grad_norm: 0.8506744963502352, iteration: 330030
loss: 0.9982820153236389,grad_norm: 0.9416616050346726, iteration: 330031
loss: 1.0375328063964844,grad_norm: 0.9999998479572039, iteration: 330032
loss: 0.9887955188751221,grad_norm: 0.9214742122136718, iteration: 330033
loss: 0.9692069292068481,grad_norm: 0.85457241855734, iteration: 330034
loss: 0.999165952205658,grad_norm: 0.7826928707979934, iteration: 330035
loss: 0.979794979095459,grad_norm: 0.861451840912714, iteration: 330036
loss: 0.9856101274490356,grad_norm: 0.8568593598523062, iteration: 330037
loss: 0.9731437563896179,grad_norm: 0.8837289427260657, iteration: 330038
loss: 0.9986732006072998,grad_norm: 0.8150226299280349, iteration: 330039
loss: 1.0229185819625854,grad_norm: 0.9999998532237323, iteration: 330040
loss: 1.0453875064849854,grad_norm: 0.7584296532977569, iteration: 330041
loss: 1.0172216892242432,grad_norm: 0.9421816459209821, iteration: 330042
loss: 0.9738918542861938,grad_norm: 0.9651147138042191, iteration: 330043
loss: 0.9808249473571777,grad_norm: 0.9999989708434747, iteration: 330044
loss: 0.9963455200195312,grad_norm: 0.99999903165792, iteration: 330045
loss: 0.997094452381134,grad_norm: 0.9999991267979588, iteration: 330046
loss: 0.9975075125694275,grad_norm: 0.7943057922220416, iteration: 330047
loss: 0.9798296689987183,grad_norm: 0.806374026776244, iteration: 330048
loss: 0.9860458970069885,grad_norm: 0.8658721952109435, iteration: 330049
loss: 1.0241729021072388,grad_norm: 0.8849542498874753, iteration: 330050
loss: 0.9620526432991028,grad_norm: 0.77634808600801, iteration: 330051
loss: 0.9618124961853027,grad_norm: 0.8002687213656952, iteration: 330052
loss: 0.9889040589332581,grad_norm: 0.8505928550505084, iteration: 330053
loss: 1.0215787887573242,grad_norm: 0.9529190536682426, iteration: 330054
loss: 1.0088778734207153,grad_norm: 0.8942062480019404, iteration: 330055
loss: 1.008309245109558,grad_norm: 0.9157515554075212, iteration: 330056
loss: 1.0159964561462402,grad_norm: 0.790370442736735, iteration: 330057
loss: 0.9966301918029785,grad_norm: 0.8110824882041591, iteration: 330058
loss: 1.0093770027160645,grad_norm: 0.7691073586969249, iteration: 330059
loss: 0.9392538666725159,grad_norm: 0.8427541210236769, iteration: 330060
loss: 0.9899418950080872,grad_norm: 0.9602370753124344, iteration: 330061
loss: 0.9927318692207336,grad_norm: 0.8056042798511502, iteration: 330062
loss: 0.9682458639144897,grad_norm: 0.9153874758793673, iteration: 330063
loss: 1.0019387006759644,grad_norm: 0.735890344258079, iteration: 330064
loss: 0.9728121161460876,grad_norm: 0.7488193875550931, iteration: 330065
loss: 1.0153268575668335,grad_norm: 0.660098688093112, iteration: 330066
loss: 1.020422339439392,grad_norm: 0.7580321648480691, iteration: 330067
loss: 0.9721391201019287,grad_norm: 0.846051257711424, iteration: 330068
loss: 0.9813783764839172,grad_norm: 0.8552945584657106, iteration: 330069
loss: 0.9980993270874023,grad_norm: 0.9999996359988885, iteration: 330070
loss: 1.016804575920105,grad_norm: 0.990495432870229, iteration: 330071
loss: 0.9931309819221497,grad_norm: 0.8118299339741953, iteration: 330072
loss: 1.0162328481674194,grad_norm: 0.9999998281324417, iteration: 330073
loss: 1.0018032789230347,grad_norm: 0.8273056093351427, iteration: 330074
loss: 1.0105918645858765,grad_norm: 0.7979821293614997, iteration: 330075
loss: 0.9912005066871643,grad_norm: 0.8659158865156538, iteration: 330076
loss: 1.027144193649292,grad_norm: 0.9724848244860111, iteration: 330077
loss: 1.04872465133667,grad_norm: 0.9999996641864801, iteration: 330078
loss: 0.9847589731216431,grad_norm: 0.8587405454446387, iteration: 330079
loss: 0.9939431548118591,grad_norm: 0.8898952130723364, iteration: 330080
loss: 0.9958023428916931,grad_norm: 0.8247423211749475, iteration: 330081
loss: 0.9803749322891235,grad_norm: 0.8328594351988019, iteration: 330082
loss: 0.9741174578666687,grad_norm: 0.9049912870048268, iteration: 330083
loss: 1.0326018333435059,grad_norm: 0.7832568421372064, iteration: 330084
loss: 0.9714809656143188,grad_norm: 0.8280442452267092, iteration: 330085
loss: 1.015058159828186,grad_norm: 0.9999992025318286, iteration: 330086
loss: 0.9769394993782043,grad_norm: 0.9130399904729714, iteration: 330087
loss: 1.0062698125839233,grad_norm: 0.7576663034176575, iteration: 330088
loss: 1.0082716941833496,grad_norm: 0.9126092520648539, iteration: 330089
loss: 1.0777873992919922,grad_norm: 0.9999993543578108, iteration: 330090
loss: 0.9941624999046326,grad_norm: 0.9999990819492823, iteration: 330091
loss: 0.9880889654159546,grad_norm: 0.7490540200973422, iteration: 330092
loss: 0.9838178753852844,grad_norm: 0.7858958086550729, iteration: 330093
loss: 1.0049251317977905,grad_norm: 0.799112418816307, iteration: 330094
loss: 0.9878086447715759,grad_norm: 0.7106646954767867, iteration: 330095
loss: 1.0192298889160156,grad_norm: 0.8220853560039277, iteration: 330096
loss: 1.0213228464126587,grad_norm: 0.9271333218667964, iteration: 330097
loss: 1.0224922895431519,grad_norm: 0.7905781850322594, iteration: 330098
loss: 1.0076813697814941,grad_norm: 0.8988574531787806, iteration: 330099
loss: 0.9935879707336426,grad_norm: 0.8542086497449354, iteration: 330100
loss: 0.9929619431495667,grad_norm: 0.8855323750178454, iteration: 330101
loss: 1.0118825435638428,grad_norm: 0.7436038117057217, iteration: 330102
loss: 0.9838367104530334,grad_norm: 0.8492446583316942, iteration: 330103
loss: 1.0376958847045898,grad_norm: 0.8468937945103744, iteration: 330104
loss: 1.000329852104187,grad_norm: 0.7730345679363065, iteration: 330105
loss: 1.0040377378463745,grad_norm: 0.9999990708308842, iteration: 330106
loss: 0.969565749168396,grad_norm: 0.9999990631112555, iteration: 330107
loss: 1.0154035091400146,grad_norm: 0.9280741308488318, iteration: 330108
loss: 1.0022095441818237,grad_norm: 0.9999997670970698, iteration: 330109
loss: 1.0088207721710205,grad_norm: 0.9999990778166454, iteration: 330110
loss: 0.996292233467102,grad_norm: 0.9646213794274857, iteration: 330111
loss: 0.9888048768043518,grad_norm: 0.9573211276737349, iteration: 330112
loss: 1.0044972896575928,grad_norm: 0.809539425539298, iteration: 330113
loss: 0.9458165168762207,grad_norm: 0.8833363205643382, iteration: 330114
loss: 1.012060284614563,grad_norm: 0.8570497456749026, iteration: 330115
loss: 1.0079954862594604,grad_norm: 0.9999996657547686, iteration: 330116
loss: 1.1016770601272583,grad_norm: 0.9999999279345604, iteration: 330117
loss: 1.0674185752868652,grad_norm: 0.9999994451311964, iteration: 330118
loss: 1.0545973777770996,grad_norm: 0.8027886511620408, iteration: 330119
loss: 0.9845783710479736,grad_norm: 0.8292104362905395, iteration: 330120
loss: 0.9674145579338074,grad_norm: 0.9999991469972263, iteration: 330121
loss: 0.9698044061660767,grad_norm: 0.7627450168454039, iteration: 330122
loss: 0.9915364384651184,grad_norm: 0.7500123059317793, iteration: 330123
loss: 1.0222582817077637,grad_norm: 0.8854741281882228, iteration: 330124
loss: 1.0116450786590576,grad_norm: 0.9851179243653001, iteration: 330125
loss: 1.0104111433029175,grad_norm: 0.7256583055747254, iteration: 330126
loss: 0.9984813928604126,grad_norm: 0.7953816697660019, iteration: 330127
loss: 1.0185348987579346,grad_norm: 0.7498338026740642, iteration: 330128
loss: 0.983863353729248,grad_norm: 0.7599122175602817, iteration: 330129
loss: 0.9986410140991211,grad_norm: 0.7899619609752592, iteration: 330130
loss: 0.9929611086845398,grad_norm: 0.8670564366486982, iteration: 330131
loss: 0.9888653755187988,grad_norm: 0.871232357093307, iteration: 330132
loss: 1.004712700843811,grad_norm: 0.9259654659383215, iteration: 330133
loss: 1.139527440071106,grad_norm: 0.9728904580250625, iteration: 330134
loss: 0.9862825870513916,grad_norm: 0.6758238102912, iteration: 330135
loss: 0.9930583834648132,grad_norm: 0.972712277204396, iteration: 330136
loss: 0.991913378238678,grad_norm: 0.8445502703770541, iteration: 330137
loss: 0.9911778569221497,grad_norm: 0.7164753552784295, iteration: 330138
loss: 0.9652502536773682,grad_norm: 0.7702741686357545, iteration: 330139
loss: 0.9968028664588928,grad_norm: 0.7238984021013893, iteration: 330140
loss: 0.9869338274002075,grad_norm: 0.9999992309179894, iteration: 330141
loss: 1.0059436559677124,grad_norm: 0.9362647986841671, iteration: 330142
loss: 1.0039640665054321,grad_norm: 0.8268046169706263, iteration: 330143
loss: 1.0643329620361328,grad_norm: 0.8190777166605471, iteration: 330144
loss: 1.0207600593566895,grad_norm: 0.8429065896768017, iteration: 330145
loss: 0.9976209402084351,grad_norm: 0.6045273578973154, iteration: 330146
loss: 1.0263954401016235,grad_norm: 0.9999997006742251, iteration: 330147
loss: 1.0132851600646973,grad_norm: 0.7764020039583626, iteration: 330148
loss: 1.0581798553466797,grad_norm: 0.9839059218466163, iteration: 330149
loss: 1.036276936531067,grad_norm: 0.8676169634993673, iteration: 330150
loss: 1.0178134441375732,grad_norm: 0.9999998360410984, iteration: 330151
loss: 1.0216186046600342,grad_norm: 0.9085703585336283, iteration: 330152
loss: 0.9852094650268555,grad_norm: 0.887163913162251, iteration: 330153
loss: 0.9700462222099304,grad_norm: 0.7173903694673331, iteration: 330154
loss: 0.9876777529716492,grad_norm: 0.9999992589430827, iteration: 330155
loss: 1.0719637870788574,grad_norm: 0.9999997634355018, iteration: 330156
loss: 0.9369056820869446,grad_norm: 0.8081458398214273, iteration: 330157
loss: 1.0060590505599976,grad_norm: 0.7683186923876753, iteration: 330158
loss: 0.9884598255157471,grad_norm: 0.6807993511427956, iteration: 330159
loss: 1.019688606262207,grad_norm: 0.9677771611039766, iteration: 330160
loss: 1.0180221796035767,grad_norm: 0.9999999966019857, iteration: 330161
loss: 0.9904663562774658,grad_norm: 0.8550394382769563, iteration: 330162
loss: 1.0104032754898071,grad_norm: 0.8421469289416217, iteration: 330163
loss: 0.9796252250671387,grad_norm: 0.8334505643366574, iteration: 330164
loss: 0.9873093366622925,grad_norm: 0.676781664702379, iteration: 330165
loss: 0.9992477893829346,grad_norm: 0.9840812387626482, iteration: 330166
loss: 1.0074570178985596,grad_norm: 0.7886556371476992, iteration: 330167
loss: 1.0174338817596436,grad_norm: 0.8568387841515552, iteration: 330168
loss: 0.9827715754508972,grad_norm: 0.8678616422767961, iteration: 330169
loss: 1.0039963722229004,grad_norm: 0.9999991120745794, iteration: 330170
loss: 1.0128859281539917,grad_norm: 0.886740905922301, iteration: 330171
loss: 1.0078171491622925,grad_norm: 0.8962704286927738, iteration: 330172
loss: 0.9998370409011841,grad_norm: 0.885950554109736, iteration: 330173
loss: 1.0232322216033936,grad_norm: 0.9310401670596704, iteration: 330174
loss: 0.9817888736724854,grad_norm: 0.7680909354812446, iteration: 330175
loss: 0.9933409094810486,grad_norm: 0.8701748895502179, iteration: 330176
loss: 0.941981315612793,grad_norm: 0.9604758884527513, iteration: 330177
loss: 1.0442208051681519,grad_norm: 0.9424612488560613, iteration: 330178
loss: 1.0186715126037598,grad_norm: 0.9999998768679011, iteration: 330179
loss: 0.9901267290115356,grad_norm: 0.941295541183337, iteration: 330180
loss: 1.004120945930481,grad_norm: 0.7608411576287399, iteration: 330181
loss: 1.0339148044586182,grad_norm: 0.8337655780535067, iteration: 330182
loss: 0.9993054270744324,grad_norm: 0.8071120586064913, iteration: 330183
loss: 1.0510421991348267,grad_norm: 0.9999991098780281, iteration: 330184
loss: 1.0054607391357422,grad_norm: 0.9999997568457529, iteration: 330185
loss: 0.9911620020866394,grad_norm: 0.9999990550703064, iteration: 330186
loss: 1.0022681951522827,grad_norm: 0.6614051085035944, iteration: 330187
loss: 1.0065511465072632,grad_norm: 0.9331379790750289, iteration: 330188
loss: 1.0035558938980103,grad_norm: 0.9346443331928086, iteration: 330189
loss: 1.0059152841567993,grad_norm: 0.7757695277721032, iteration: 330190
loss: 0.983830451965332,grad_norm: 0.8241352241056055, iteration: 330191
loss: 0.9906718730926514,grad_norm: 0.7398770465190969, iteration: 330192
loss: 0.9873183965682983,grad_norm: 0.9999991145881171, iteration: 330193
loss: 1.0218756198883057,grad_norm: 0.9999991929953531, iteration: 330194
loss: 1.0264919996261597,grad_norm: 0.7826346320558842, iteration: 330195
loss: 1.0408750772476196,grad_norm: 0.8027477505056986, iteration: 330196
loss: 1.0460301637649536,grad_norm: 0.9253077249332218, iteration: 330197
loss: 0.9952744841575623,grad_norm: 0.7827680204390428, iteration: 330198
loss: 0.9683460593223572,grad_norm: 0.9999995346487843, iteration: 330199
loss: 0.9983510971069336,grad_norm: 0.8289602226266456, iteration: 330200
loss: 1.036299705505371,grad_norm: 0.7398547326834775, iteration: 330201
loss: 0.9590291380882263,grad_norm: 0.7545694525717939, iteration: 330202
loss: 0.9945545196533203,grad_norm: 0.8601636213335644, iteration: 330203
loss: 0.9676100015640259,grad_norm: 0.9372296106732055, iteration: 330204
loss: 1.0029442310333252,grad_norm: 0.8506921467945311, iteration: 330205
loss: 1.0119304656982422,grad_norm: 0.7318409670902964, iteration: 330206
loss: 1.0165570974349976,grad_norm: 0.9456347400148435, iteration: 330207
loss: 0.989539623260498,grad_norm: 0.7500641774871819, iteration: 330208
loss: 0.9870378971099854,grad_norm: 0.922131597598912, iteration: 330209
loss: 0.9712696075439453,grad_norm: 0.9752610587004372, iteration: 330210
loss: 0.986565351486206,grad_norm: 0.8739029228730931, iteration: 330211
loss: 1.1352969408035278,grad_norm: 0.9999990550989717, iteration: 330212
loss: 1.0041810274124146,grad_norm: 0.73985777912892, iteration: 330213
loss: 1.0205010175704956,grad_norm: 0.6484140970016065, iteration: 330214
loss: 0.9919583797454834,grad_norm: 0.8122188238013303, iteration: 330215
loss: 0.9881770014762878,grad_norm: 0.8946383153205804, iteration: 330216
loss: 1.0425063371658325,grad_norm: 0.9999993227619106, iteration: 330217
loss: 0.9722697138786316,grad_norm: 0.9241487079408729, iteration: 330218
loss: 0.9658461809158325,grad_norm: 0.8861154940383462, iteration: 330219
loss: 0.973792552947998,grad_norm: 0.9899993573065493, iteration: 330220
loss: 0.9952946305274963,grad_norm: 0.8961650347374408, iteration: 330221
loss: 1.0342469215393066,grad_norm: 0.8027414856252356, iteration: 330222
loss: 1.008226990699768,grad_norm: 0.9999991769693858, iteration: 330223
loss: 1.033995509147644,grad_norm: 0.6955103851138538, iteration: 330224
loss: 1.0058848857879639,grad_norm: 0.7384432741403466, iteration: 330225
loss: 1.020519495010376,grad_norm: 0.725037057381909, iteration: 330226
loss: 0.9936807751655579,grad_norm: 0.9082240828358005, iteration: 330227
loss: 0.9546265602111816,grad_norm: 0.9003377810616358, iteration: 330228
loss: 0.9814749956130981,grad_norm: 0.9973850566704422, iteration: 330229
loss: 0.9944787621498108,grad_norm: 0.7449789016642897, iteration: 330230
loss: 1.0324479341506958,grad_norm: 0.9007303768009572, iteration: 330231
loss: 0.9597637057304382,grad_norm: 0.9101796996459222, iteration: 330232
loss: 0.995665431022644,grad_norm: 0.7617403271740457, iteration: 330233
loss: 0.9802665114402771,grad_norm: 0.960325534142064, iteration: 330234
loss: 1.0078516006469727,grad_norm: 0.8723007232123321, iteration: 330235
loss: 1.0317909717559814,grad_norm: 0.9999999268765462, iteration: 330236
loss: 0.9922873377799988,grad_norm: 0.8474082107604589, iteration: 330237
loss: 1.0110989809036255,grad_norm: 0.9657066012633385, iteration: 330238
loss: 0.9890028834342957,grad_norm: 0.8169530586269249, iteration: 330239
loss: 1.0077097415924072,grad_norm: 0.8640059475634457, iteration: 330240
loss: 1.12153160572052,grad_norm: 0.7928427478647507, iteration: 330241
loss: 0.9959371089935303,grad_norm: 0.7995998753651191, iteration: 330242
loss: 1.0084751844406128,grad_norm: 0.7077079788176458, iteration: 330243
loss: 1.0213347673416138,grad_norm: 0.7598036786319278, iteration: 330244
loss: 1.0081161260604858,grad_norm: 0.816254141217267, iteration: 330245
loss: 1.0275217294692993,grad_norm: 0.8065098623944621, iteration: 330246
loss: 1.0085257291793823,grad_norm: 0.7044860709300008, iteration: 330247
loss: 1.0073283910751343,grad_norm: 0.9999994341779121, iteration: 330248
loss: 0.9956327080726624,grad_norm: 0.7580142063831639, iteration: 330249
loss: 1.0795811414718628,grad_norm: 0.8386521768567103, iteration: 330250
loss: 0.9800354242324829,grad_norm: 0.8098153435582964, iteration: 330251
loss: 0.9900134801864624,grad_norm: 0.7787264254154646, iteration: 330252
loss: 0.9983049631118774,grad_norm: 0.6962030138501476, iteration: 330253
loss: 0.9836848974227905,grad_norm: 0.7927746820906562, iteration: 330254
loss: 1.0334951877593994,grad_norm: 0.8523062130102224, iteration: 330255
loss: 1.0106221437454224,grad_norm: 0.7453890884592705, iteration: 330256
loss: 1.0073800086975098,grad_norm: 0.9999990016996226, iteration: 330257
loss: 1.024631381034851,grad_norm: 0.7260190288855591, iteration: 330258
loss: 1.0028975009918213,grad_norm: 0.9625140710014787, iteration: 330259
loss: 0.9663183093070984,grad_norm: 0.8526873885570584, iteration: 330260
loss: 1.0029363632202148,grad_norm: 0.9049234007829278, iteration: 330261
loss: 0.9868502020835876,grad_norm: 0.7583804474175669, iteration: 330262
loss: 0.9689016938209534,grad_norm: 0.9999995741084788, iteration: 330263
loss: 1.0274875164031982,grad_norm: 0.8699994384459049, iteration: 330264
loss: 0.9962672591209412,grad_norm: 0.7465314541580517, iteration: 330265
loss: 1.0383293628692627,grad_norm: 0.9999991528440868, iteration: 330266
loss: 1.009454369544983,grad_norm: 0.9999990885764203, iteration: 330267
loss: 1.0147792100906372,grad_norm: 0.9064154391471604, iteration: 330268
loss: 1.0267642736434937,grad_norm: 0.7757544404405242, iteration: 330269
loss: 1.020567774772644,grad_norm: 0.822767590491016, iteration: 330270
loss: 1.0173550844192505,grad_norm: 0.6755799922246103, iteration: 330271
loss: 0.9926785826683044,grad_norm: 0.9164784747402599, iteration: 330272
loss: 1.0141104459762573,grad_norm: 0.9999990806035617, iteration: 330273
loss: 1.0054653882980347,grad_norm: 0.9684661520497753, iteration: 330274
loss: 0.9832391738891602,grad_norm: 0.8733886126497498, iteration: 330275
loss: 1.0490636825561523,grad_norm: 0.9315419297997561, iteration: 330276
loss: 0.9768955707550049,grad_norm: 0.9999990974134071, iteration: 330277
loss: 0.984026312828064,grad_norm: 0.7620634544654492, iteration: 330278
loss: 0.9664418697357178,grad_norm: 0.8885044682206086, iteration: 330279
loss: 0.9993663430213928,grad_norm: 0.7063067474112396, iteration: 330280
loss: 1.0032191276550293,grad_norm: 0.7729032167289693, iteration: 330281
loss: 1.0628107786178589,grad_norm: 0.8316625327705587, iteration: 330282
loss: 1.0475654602050781,grad_norm: 0.9999996466784574, iteration: 330283
loss: 0.9767646193504333,grad_norm: 0.8300083821178391, iteration: 330284
loss: 1.013942003250122,grad_norm: 0.8330148912121453, iteration: 330285
loss: 1.0393916368484497,grad_norm: 0.9999991757991449, iteration: 330286
loss: 0.9660309553146362,grad_norm: 0.7745866424095371, iteration: 330287
loss: 1.0229097604751587,grad_norm: 0.9999991576186658, iteration: 330288
loss: 0.9801369905471802,grad_norm: 0.8550178342540657, iteration: 330289
loss: 1.0142091512680054,grad_norm: 0.9999990609786287, iteration: 330290
loss: 0.9622812867164612,grad_norm: 0.8591784593146841, iteration: 330291
loss: 0.9914155602455139,grad_norm: 0.9059397577355034, iteration: 330292
loss: 0.9929203391075134,grad_norm: 0.7936110292063893, iteration: 330293
loss: 0.9849038124084473,grad_norm: 0.7945272877527825, iteration: 330294
loss: 0.9981169104576111,grad_norm: 0.7916878398593102, iteration: 330295
loss: 1.0142699480056763,grad_norm: 0.9047339285280259, iteration: 330296
loss: 1.0021238327026367,grad_norm: 0.9320381496981216, iteration: 330297
loss: 0.9933267831802368,grad_norm: 0.7292651769325628, iteration: 330298
loss: 1.0278016328811646,grad_norm: 0.8969105779485912, iteration: 330299
loss: 1.0248123407363892,grad_norm: 0.9052106539279897, iteration: 330300
loss: 0.9550012946128845,grad_norm: 0.7612744275776208, iteration: 330301
loss: 0.9723066687583923,grad_norm: 0.9999990379373781, iteration: 330302
loss: 1.0159343481063843,grad_norm: 0.7670708079122116, iteration: 330303
loss: 0.9959890842437744,grad_norm: 0.9395488599102095, iteration: 330304
loss: 0.98675537109375,grad_norm: 0.8784317149160318, iteration: 330305
loss: 1.0061057806015015,grad_norm: 0.8708873256553397, iteration: 330306
loss: 1.0075949430465698,grad_norm: 0.857374989876513, iteration: 330307
loss: 1.0210332870483398,grad_norm: 0.805413381245953, iteration: 330308
loss: 0.9952387809753418,grad_norm: 0.9003234911249769, iteration: 330309
loss: 0.9820635318756104,grad_norm: 0.8558910833166542, iteration: 330310
loss: 1.0430055856704712,grad_norm: 0.9085728663699485, iteration: 330311
loss: 1.0544297695159912,grad_norm: 0.8260412572821585, iteration: 330312
loss: 1.0086086988449097,grad_norm: 0.9999992230318833, iteration: 330313
loss: 0.972080409526825,grad_norm: 0.9040728771781128, iteration: 330314
loss: 0.9884796142578125,grad_norm: 0.9785300008541394, iteration: 330315
loss: 1.0310091972351074,grad_norm: 0.8893430695887516, iteration: 330316
loss: 1.0214787721633911,grad_norm: 0.8187775424945146, iteration: 330317
loss: 1.0246895551681519,grad_norm: 0.9522413419934755, iteration: 330318
loss: 0.9701725840568542,grad_norm: 0.8315836535624197, iteration: 330319
loss: 1.0266813039779663,grad_norm: 0.7845710650651747, iteration: 330320
loss: 1.012163758277893,grad_norm: 0.8974195069824461, iteration: 330321
loss: 0.9796146154403687,grad_norm: 0.9999990884114565, iteration: 330322
loss: 1.043331265449524,grad_norm: 0.9132292602197162, iteration: 330323
loss: 0.9892804026603699,grad_norm: 0.983655055525435, iteration: 330324
loss: 1.011102557182312,grad_norm: 0.6879745023738838, iteration: 330325
loss: 0.9664911031723022,grad_norm: 0.858634628759423, iteration: 330326
loss: 0.9959709048271179,grad_norm: 0.8615261804227475, iteration: 330327
loss: 0.9819241166114807,grad_norm: 0.996371045502413, iteration: 330328
loss: 0.9936949610710144,grad_norm: 0.7648946003810136, iteration: 330329
loss: 0.9859098196029663,grad_norm: 0.8466074198157415, iteration: 330330
loss: 1.0023351907730103,grad_norm: 0.7943078533371757, iteration: 330331
loss: 1.088111162185669,grad_norm: 0.9595147258639276, iteration: 330332
loss: 0.986847460269928,grad_norm: 0.8609042228838912, iteration: 330333
loss: 1.0204474925994873,grad_norm: 0.8986096346314337, iteration: 330334
loss: 0.9830842018127441,grad_norm: 0.8372602876186515, iteration: 330335
loss: 1.0585272312164307,grad_norm: 0.7765891771328524, iteration: 330336
loss: 1.032165288925171,grad_norm: 0.9999992960577441, iteration: 330337
loss: 0.9902607202529907,grad_norm: 0.8204120588595616, iteration: 330338
loss: 0.9668179750442505,grad_norm: 0.9999991385214124, iteration: 330339
loss: 0.9778569340705872,grad_norm: 0.808475522894415, iteration: 330340
loss: 0.9807563424110413,grad_norm: 0.7872596112334271, iteration: 330341
loss: 1.0943459272384644,grad_norm: 0.9596892389414473, iteration: 330342
loss: 1.0382131338119507,grad_norm: 0.8387763555599089, iteration: 330343
loss: 0.9741002321243286,grad_norm: 0.8713220339116419, iteration: 330344
loss: 0.969458818435669,grad_norm: 0.8498143567613275, iteration: 330345
loss: 1.006015419960022,grad_norm: 0.9999996959590285, iteration: 330346
loss: 1.0037007331848145,grad_norm: 0.8925966257938677, iteration: 330347
loss: 0.9514789581298828,grad_norm: 0.8704411625106117, iteration: 330348
loss: 1.022010087966919,grad_norm: 0.9999991339082689, iteration: 330349
loss: 1.0179493427276611,grad_norm: 0.7372070660092256, iteration: 330350
loss: 1.00423002243042,grad_norm: 0.9999991141605507, iteration: 330351
loss: 1.0108299255371094,grad_norm: 0.7820078993427408, iteration: 330352
loss: 0.9903952479362488,grad_norm: 0.9999992646261158, iteration: 330353
loss: 0.9953648447990417,grad_norm: 0.946830800534993, iteration: 330354
loss: 0.9518037438392639,grad_norm: 0.9247828072370157, iteration: 330355
loss: 0.9874759912490845,grad_norm: 0.8479593013729566, iteration: 330356
loss: 1.00191068649292,grad_norm: 0.7849985967961769, iteration: 330357
loss: 1.0217243432998657,grad_norm: 0.8672149805595776, iteration: 330358
loss: 1.001662015914917,grad_norm: 0.8987929655408824, iteration: 330359
loss: 1.0165321826934814,grad_norm: 0.9245316128415227, iteration: 330360
loss: 1.0297783613204956,grad_norm: 0.7570366490933833, iteration: 330361
loss: 0.9991783499717712,grad_norm: 0.9407985744328571, iteration: 330362
loss: 0.9899746775627136,grad_norm: 0.7356164649404126, iteration: 330363
loss: 0.9911126494407654,grad_norm: 0.9642712475046725, iteration: 330364
loss: 0.9738749861717224,grad_norm: 0.7512424119231533, iteration: 330365
loss: 0.9797818064689636,grad_norm: 0.8782581830816134, iteration: 330366
loss: 0.9464771747589111,grad_norm: 0.7602553178419889, iteration: 330367
loss: 1.0093826055526733,grad_norm: 0.7949452409709397, iteration: 330368
loss: 0.9673987030982971,grad_norm: 0.7825414418669633, iteration: 330369
loss: 1.041279673576355,grad_norm: 0.9999992700115269, iteration: 330370
loss: 0.9753958582878113,grad_norm: 0.9999998893645595, iteration: 330371
loss: 1.0159475803375244,grad_norm: 0.999999242891378, iteration: 330372
loss: 1.0135482549667358,grad_norm: 0.8349432237627171, iteration: 330373
loss: 0.9995285868644714,grad_norm: 0.7925009059540601, iteration: 330374
loss: 0.9932146072387695,grad_norm: 0.6437637168432321, iteration: 330375
loss: 1.0072638988494873,grad_norm: 0.8049147395620236, iteration: 330376
loss: 1.0086003541946411,grad_norm: 0.8544086933670124, iteration: 330377
loss: 1.0043668746948242,grad_norm: 0.9999992022816777, iteration: 330378
loss: 0.9982683062553406,grad_norm: 0.9999990934146541, iteration: 330379
loss: 0.991990327835083,grad_norm: 0.896681150322384, iteration: 330380
loss: 1.002293348312378,grad_norm: 0.9274094289944939, iteration: 330381
loss: 0.9829816222190857,grad_norm: 0.9379334257829814, iteration: 330382
loss: 1.000546932220459,grad_norm: 0.9607454671616107, iteration: 330383
loss: 0.9906004071235657,grad_norm: 0.8673052716705383, iteration: 330384
loss: 0.9991499185562134,grad_norm: 0.9706154695925051, iteration: 330385
loss: 1.0047944784164429,grad_norm: 0.8460090037491773, iteration: 330386
loss: 0.9909343123435974,grad_norm: 0.7754435293480424, iteration: 330387
loss: 0.9664807319641113,grad_norm: 0.9436123007705688, iteration: 330388
loss: 0.984765350818634,grad_norm: 0.8133445326315789, iteration: 330389
loss: 1.0031628608703613,grad_norm: 0.7369006590552473, iteration: 330390
loss: 1.0068856477737427,grad_norm: 0.8197291482143739, iteration: 330391
loss: 0.9929982423782349,grad_norm: 0.8633185766612046, iteration: 330392
loss: 1.0104730129241943,grad_norm: 0.9999991312844051, iteration: 330393
loss: 0.984653115272522,grad_norm: 0.8823787080094757, iteration: 330394
loss: 1.0677759647369385,grad_norm: 0.9348704120346384, iteration: 330395
loss: 1.0101468563079834,grad_norm: 0.9377414732524763, iteration: 330396
loss: 0.9597272276878357,grad_norm: 0.8491813752142388, iteration: 330397
loss: 1.0260742902755737,grad_norm: 0.7217366432502718, iteration: 330398
loss: 1.0585856437683105,grad_norm: 0.9999996251378479, iteration: 330399
loss: 1.0024559497833252,grad_norm: 0.7372942873502729, iteration: 330400
loss: 0.9893978834152222,grad_norm: 0.6271924108021171, iteration: 330401
loss: 0.9929642081260681,grad_norm: 0.8993474590126046, iteration: 330402
loss: 0.9396775364875793,grad_norm: 0.8435778604184082, iteration: 330403
loss: 0.9771955013275146,grad_norm: 0.7779114973356579, iteration: 330404
loss: 0.9653909206390381,grad_norm: 0.7327451343502114, iteration: 330405
loss: 0.99020916223526,grad_norm: 0.7617125707833654, iteration: 330406
loss: 1.001947283744812,grad_norm: 0.9976991657032697, iteration: 330407
loss: 1.0371476411819458,grad_norm: 0.9999990256752337, iteration: 330408
loss: 0.9650384783744812,grad_norm: 0.7371012491676674, iteration: 330409
loss: 0.9871907830238342,grad_norm: 0.8548654067044152, iteration: 330410
loss: 1.030524492263794,grad_norm: 0.9999989421141039, iteration: 330411
loss: 0.9833113551139832,grad_norm: 0.9026773634672821, iteration: 330412
loss: 1.0015228986740112,grad_norm: 0.8316925824647955, iteration: 330413
loss: 0.9936506152153015,grad_norm: 0.8175912253279835, iteration: 330414
loss: 1.0080242156982422,grad_norm: 0.7802227878499024, iteration: 330415
loss: 1.014236330986023,grad_norm: 0.999999163049983, iteration: 330416
loss: 0.9895381331443787,grad_norm: 0.8594229274818456, iteration: 330417
loss: 0.9812867045402527,grad_norm: 0.7910050671151907, iteration: 330418
loss: 0.9836018085479736,grad_norm: 0.99999981582461, iteration: 330419
loss: 0.9884649515151978,grad_norm: 0.74231181652716, iteration: 330420
loss: 0.9785559773445129,grad_norm: 0.7723916741005233, iteration: 330421
loss: 0.9603886604309082,grad_norm: 0.9397406895239346, iteration: 330422
loss: 0.9977849721908569,grad_norm: 0.8091934443563767, iteration: 330423
loss: 1.0075348615646362,grad_norm: 0.6976072311234921, iteration: 330424
loss: 1.002764344215393,grad_norm: 0.807501779582061, iteration: 330425
loss: 1.0289291143417358,grad_norm: 0.860717864093963, iteration: 330426
loss: 0.9552137851715088,grad_norm: 0.8429227393847429, iteration: 330427
loss: 0.9965872168540955,grad_norm: 0.8856760347342459, iteration: 330428
loss: 0.9926271438598633,grad_norm: 0.8269534961586174, iteration: 330429
loss: 1.0281802415847778,grad_norm: 0.7741559894943781, iteration: 330430
loss: 0.9805558323860168,grad_norm: 0.8870470169626344, iteration: 330431
loss: 1.0173985958099365,grad_norm: 0.8805824305935791, iteration: 330432
loss: 0.9666010141372681,grad_norm: 0.7695900037639407, iteration: 330433
loss: 1.006339192390442,grad_norm: 0.9609240554507511, iteration: 330434
loss: 1.0200996398925781,grad_norm: 0.770293096637897, iteration: 330435
loss: 1.0221737623214722,grad_norm: 0.8954299452638635, iteration: 330436
loss: 1.0159220695495605,grad_norm: 0.8560560816737974, iteration: 330437
loss: 1.0112687349319458,grad_norm: 0.9999989836666209, iteration: 330438
loss: 1.0098519325256348,grad_norm: 0.7376177459842163, iteration: 330439
loss: 0.9967545866966248,grad_norm: 0.8539575912202654, iteration: 330440
loss: 1.002293586730957,grad_norm: 0.8878329225556841, iteration: 330441
loss: 0.9746860861778259,grad_norm: 0.7749983178551033, iteration: 330442
loss: 0.9617759585380554,grad_norm: 0.8926174108597731, iteration: 330443
loss: 1.0060234069824219,grad_norm: 0.9999996689667704, iteration: 330444
loss: 1.0066148042678833,grad_norm: 0.8857147836421368, iteration: 330445
loss: 0.9835529327392578,grad_norm: 0.9315317857664472, iteration: 330446
loss: 1.010546088218689,grad_norm: 0.9043833841263336, iteration: 330447
loss: 1.0110410451889038,grad_norm: 0.7985129355634678, iteration: 330448
loss: 1.020675778388977,grad_norm: 0.7044944989240692, iteration: 330449
loss: 0.9892227649688721,grad_norm: 0.7851761752335408, iteration: 330450
loss: 0.959221363067627,grad_norm: 0.9578806628252233, iteration: 330451
loss: 1.0557574033737183,grad_norm: 0.9462074182257446, iteration: 330452
loss: 1.000989317893982,grad_norm: 0.7642334056244896, iteration: 330453
loss: 1.003157377243042,grad_norm: 0.9999991790298092, iteration: 330454
loss: 0.9699990749359131,grad_norm: 0.9074201788778768, iteration: 330455
loss: 0.9716089963912964,grad_norm: 0.8474263853356373, iteration: 330456
loss: 1.005007028579712,grad_norm: 0.7261283283378999, iteration: 330457
loss: 1.0006295442581177,grad_norm: 0.7604118508083132, iteration: 330458
loss: 0.9974369406700134,grad_norm: 0.7034610756974562, iteration: 330459
loss: 1.052392840385437,grad_norm: 0.9353483650506426, iteration: 330460
loss: 1.0019687414169312,grad_norm: 0.728414198442111, iteration: 330461
loss: 1.0229864120483398,grad_norm: 0.7942234252090496, iteration: 330462
loss: 0.9910046458244324,grad_norm: 0.7996166100321596, iteration: 330463
loss: 1.0451864004135132,grad_norm: 0.9999998839857734, iteration: 330464
loss: 0.9874804615974426,grad_norm: 0.7739620083458874, iteration: 330465
loss: 0.9652115106582642,grad_norm: 0.9091624247789063, iteration: 330466
loss: 0.9697296023368835,grad_norm: 0.8877385775374236, iteration: 330467
loss: 0.9741579294204712,grad_norm: 0.8834689904816763, iteration: 330468
loss: 1.036158800125122,grad_norm: 0.8206589832788749, iteration: 330469
loss: 1.0843753814697266,grad_norm: 0.9402881914647396, iteration: 330470
loss: 1.0344995260238647,grad_norm: 0.9596756116559234, iteration: 330471
loss: 0.9556418061256409,grad_norm: 0.820901632824332, iteration: 330472
loss: 1.0024956464767456,grad_norm: 0.9999991191273403, iteration: 330473
loss: 0.9780174493789673,grad_norm: 0.7685100237093575, iteration: 330474
loss: 1.0521351099014282,grad_norm: 0.835768348160554, iteration: 330475
loss: 0.9915520548820496,grad_norm: 0.8820877933630102, iteration: 330476
loss: 1.0171915292739868,grad_norm: 0.8868736161095818, iteration: 330477
loss: 1.0024762153625488,grad_norm: 0.7324862427014588, iteration: 330478
loss: 0.993360698223114,grad_norm: 0.9228793296548246, iteration: 330479
loss: 1.0202341079711914,grad_norm: 0.8206777330865935, iteration: 330480
loss: 0.9953672289848328,grad_norm: 0.818254395315729, iteration: 330481
loss: 1.0062135457992554,grad_norm: 0.8051173194641126, iteration: 330482
loss: 0.9796796441078186,grad_norm: 0.8076488595185836, iteration: 330483
loss: 0.9813998341560364,grad_norm: 0.7116592996554998, iteration: 330484
loss: 1.0326567888259888,grad_norm: 0.9135594033092849, iteration: 330485
loss: 1.00706148147583,grad_norm: 0.658315507857402, iteration: 330486
loss: 1.0014638900756836,grad_norm: 0.8384595626083924, iteration: 330487
loss: 1.0219953060150146,grad_norm: 0.9999991138851698, iteration: 330488
loss: 0.9989660382270813,grad_norm: 0.7648687140858672, iteration: 330489
loss: 1.036264181137085,grad_norm: 0.936462440809342, iteration: 330490
loss: 0.9947240948677063,grad_norm: 0.8607028834241034, iteration: 330491
loss: 1.0026086568832397,grad_norm: 0.7053840813583855, iteration: 330492
loss: 0.9536023736000061,grad_norm: 0.8649998358477909, iteration: 330493
loss: 0.9942048192024231,grad_norm: 0.8301132054670313, iteration: 330494
loss: 0.9953146576881409,grad_norm: 0.9382973559321237, iteration: 330495
loss: 1.040305733680725,grad_norm: 0.8965987332330726, iteration: 330496
loss: 1.0680567026138306,grad_norm: 0.9999998134650278, iteration: 330497
loss: 1.0249255895614624,grad_norm: 0.8151807549902188, iteration: 330498
loss: 1.055332899093628,grad_norm: 0.999999903021818, iteration: 330499
loss: 1.0121806859970093,grad_norm: 0.8001827602908065, iteration: 330500
loss: 1.0222070217132568,grad_norm: 0.874794738934611, iteration: 330501
loss: 0.974107563495636,grad_norm: 0.7362872431640503, iteration: 330502
loss: 1.020877718925476,grad_norm: 0.8165402905053204, iteration: 330503
loss: 1.004001259803772,grad_norm: 0.9562410095068358, iteration: 330504
loss: 1.0998213291168213,grad_norm: 0.853921841008387, iteration: 330505
loss: 0.9617685675621033,grad_norm: 0.7477220961943499, iteration: 330506
loss: 1.027725100517273,grad_norm: 0.999999222513272, iteration: 330507
loss: 1.044569492340088,grad_norm: 0.9814477910646751, iteration: 330508
loss: 1.0372852087020874,grad_norm: 0.9999992971423052, iteration: 330509
loss: 1.0193736553192139,grad_norm: 0.7751008977208497, iteration: 330510
loss: 0.9742889404296875,grad_norm: 0.8187032678912577, iteration: 330511
loss: 1.0001622438430786,grad_norm: 0.9574190491586888, iteration: 330512
loss: 1.0014398097991943,grad_norm: 0.9999990386581267, iteration: 330513
loss: 1.0062260627746582,grad_norm: 0.8227477654838962, iteration: 330514
loss: 0.98459392786026,grad_norm: 0.7002569804565879, iteration: 330515
loss: 1.0071110725402832,grad_norm: 0.6920356030105972, iteration: 330516
loss: 0.9748067259788513,grad_norm: 0.820254962283422, iteration: 330517
loss: 1.0132555961608887,grad_norm: 0.9999991777818883, iteration: 330518
loss: 1.0169681310653687,grad_norm: 0.9052746792383376, iteration: 330519
loss: 0.9820219278335571,grad_norm: 0.8098686968758665, iteration: 330520
loss: 1.0274889469146729,grad_norm: 0.9370041212532075, iteration: 330521
loss: 1.004974603652954,grad_norm: 0.7951171532845855, iteration: 330522
loss: 0.993102490901947,grad_norm: 0.8920121529031776, iteration: 330523
loss: 1.0405974388122559,grad_norm: 0.9564158253337351, iteration: 330524
loss: 0.9688059091567993,grad_norm: 0.8750184019970314, iteration: 330525
loss: 1.0214310884475708,grad_norm: 0.946152809660024, iteration: 330526
loss: 1.0493184328079224,grad_norm: 0.999999145492491, iteration: 330527
loss: 0.9942426085472107,grad_norm: 0.9734740360767457, iteration: 330528
loss: 1.0173157453536987,grad_norm: 0.9098068133805546, iteration: 330529
loss: 1.024932861328125,grad_norm: 0.9999990786720715, iteration: 330530
loss: 0.9837644696235657,grad_norm: 0.9999992737630274, iteration: 330531
loss: 0.9660158753395081,grad_norm: 0.8851444167317787, iteration: 330532
loss: 0.9990414977073669,grad_norm: 0.8231822143597414, iteration: 330533
loss: 1.0292445421218872,grad_norm: 0.9999990744636756, iteration: 330534
loss: 0.9604727625846863,grad_norm: 0.8348034610534392, iteration: 330535
loss: 0.9678042531013489,grad_norm: 0.9999990954980568, iteration: 330536
loss: 0.9737599492073059,grad_norm: 0.9999990516973485, iteration: 330537
loss: 1.0135656595230103,grad_norm: 0.7505301156272426, iteration: 330538
loss: 0.9920827150344849,grad_norm: 0.7225323650697861, iteration: 330539
loss: 1.0230780839920044,grad_norm: 0.9999990770574466, iteration: 330540
loss: 1.0431772470474243,grad_norm: 0.889713880893704, iteration: 330541
loss: 1.0130863189697266,grad_norm: 0.801006426041354, iteration: 330542
loss: 1.0182663202285767,grad_norm: 0.9999991950277972, iteration: 330543
loss: 1.1153351068496704,grad_norm: 0.9999990516765681, iteration: 330544
loss: 0.9897621870040894,grad_norm: 0.8526595126001686, iteration: 330545
loss: 1.022460699081421,grad_norm: 0.999999618615541, iteration: 330546
loss: 1.062099814414978,grad_norm: 0.9999992367979734, iteration: 330547
loss: 1.0088365077972412,grad_norm: 0.8629837146080823, iteration: 330548
loss: 1.0153131484985352,grad_norm: 0.884764659473162, iteration: 330549
loss: 0.999891459941864,grad_norm: 0.9999996896886268, iteration: 330550
loss: 0.9954461455345154,grad_norm: 0.6499752137846201, iteration: 330551
loss: 1.031822919845581,grad_norm: 0.9999990972556589, iteration: 330552
loss: 0.9601458311080933,grad_norm: 0.7399312378802337, iteration: 330553
loss: 1.0155141353607178,grad_norm: 0.7666011471789838, iteration: 330554
loss: 1.0139825344085693,grad_norm: 0.8839802744725261, iteration: 330555
loss: 0.9985901713371277,grad_norm: 0.8358691314791808, iteration: 330556
loss: 1.0018227100372314,grad_norm: 0.8550944594681796, iteration: 330557
loss: 0.9832255840301514,grad_norm: 0.8475137817178208, iteration: 330558
loss: 1.0151903629302979,grad_norm: 0.8261904290534556, iteration: 330559
loss: 1.0195813179016113,grad_norm: 0.9299811903570878, iteration: 330560
loss: 1.0629687309265137,grad_norm: 0.9999991247059704, iteration: 330561
loss: 0.9786280393600464,grad_norm: 0.9042020470347825, iteration: 330562
loss: 0.970932126045227,grad_norm: 0.9879565098107368, iteration: 330563
loss: 0.998018205165863,grad_norm: 0.8141889245186384, iteration: 330564
loss: 0.9628509879112244,grad_norm: 0.996644784079186, iteration: 330565
loss: 0.9969261884689331,grad_norm: 0.8501010229265462, iteration: 330566
loss: 0.9610106945037842,grad_norm: 0.7867629150908217, iteration: 330567
loss: 0.9830710291862488,grad_norm: 0.9447075940794805, iteration: 330568
loss: 1.0113654136657715,grad_norm: 0.8958074349104127, iteration: 330569
loss: 1.0267530679702759,grad_norm: 0.7315115477492677, iteration: 330570
loss: 0.9976046681404114,grad_norm: 0.920431485094909, iteration: 330571
loss: 1.0070295333862305,grad_norm: 0.9041598567181013, iteration: 330572
loss: 0.9915005564689636,grad_norm: 0.851641692645515, iteration: 330573
loss: 1.0034209489822388,grad_norm: 0.762040322195463, iteration: 330574
loss: 1.0037062168121338,grad_norm: 0.8343979698177728, iteration: 330575
loss: 0.9860528111457825,grad_norm: 0.8046839529882325, iteration: 330576
loss: 0.9529669880867004,grad_norm: 0.7691613122094353, iteration: 330577
loss: 1.0016447305679321,grad_norm: 0.8228606793416193, iteration: 330578
loss: 0.9965932369232178,grad_norm: 0.7445237579951492, iteration: 330579
loss: 1.0346715450286865,grad_norm: 0.9219544856834406, iteration: 330580
loss: 1.0002204179763794,grad_norm: 0.9999991203822748, iteration: 330581
loss: 0.9881380200386047,grad_norm: 0.775309253292197, iteration: 330582
loss: 1.0113378763198853,grad_norm: 0.9999991953596807, iteration: 330583
loss: 0.9878314733505249,grad_norm: 0.9204529463160059, iteration: 330584
loss: 0.9950721859931946,grad_norm: 0.817187703145221, iteration: 330585
loss: 0.9912184476852417,grad_norm: 0.7929607030055428, iteration: 330586
loss: 1.0253000259399414,grad_norm: 0.8872258231484014, iteration: 330587
loss: 0.9985826015472412,grad_norm: 0.7754227587369499, iteration: 330588
loss: 0.9920098781585693,grad_norm: 0.7881382493054319, iteration: 330589
loss: 1.0044077634811401,grad_norm: 0.9675609960552373, iteration: 330590
loss: 1.0053064823150635,grad_norm: 0.8891185500513645, iteration: 330591
loss: 1.0348111391067505,grad_norm: 0.8763749203706653, iteration: 330592
loss: 0.9769952893257141,grad_norm: 0.9292476084651164, iteration: 330593
loss: 1.152933955192566,grad_norm: 0.9999991535520449, iteration: 330594
loss: 0.976445198059082,grad_norm: 0.9628090682234971, iteration: 330595
loss: 1.0075178146362305,grad_norm: 0.8161392127320617, iteration: 330596
loss: 1.002052664756775,grad_norm: 0.797610085441944, iteration: 330597
loss: 1.0447465181350708,grad_norm: 0.999999769807174, iteration: 330598
loss: 1.0237106084823608,grad_norm: 0.9074920139377232, iteration: 330599
loss: 0.9771038293838501,grad_norm: 0.7855208881661138, iteration: 330600
loss: 1.0137875080108643,grad_norm: 0.7138800215075853, iteration: 330601
loss: 0.9836580157279968,grad_norm: 0.9999993470602125, iteration: 330602
loss: 0.979181706905365,grad_norm: 0.8218873332240879, iteration: 330603
loss: 0.9995188117027283,grad_norm: 0.8267438661796958, iteration: 330604
loss: 1.0056674480438232,grad_norm: 0.822430999805402, iteration: 330605
loss: 0.9808435440063477,grad_norm: 0.8295437620853549, iteration: 330606
loss: 1.0081623792648315,grad_norm: 0.8176221186911014, iteration: 330607
loss: 1.000239372253418,grad_norm: 0.8991507581089254, iteration: 330608
loss: 1.0069838762283325,grad_norm: 0.8851173001487704, iteration: 330609
loss: 1.013520359992981,grad_norm: 0.9999991970892772, iteration: 330610
loss: 0.9869751930236816,grad_norm: 0.8565844025396573, iteration: 330611
loss: 0.9847248792648315,grad_norm: 0.9025423566264615, iteration: 330612
loss: 1.0022345781326294,grad_norm: 0.7949453958038465, iteration: 330613
loss: 1.0423882007598877,grad_norm: 0.9999993267919466, iteration: 330614
loss: 1.014373540878296,grad_norm: 0.870884814651574, iteration: 330615
loss: 1.0157257318496704,grad_norm: 0.837544111907743, iteration: 330616
loss: 0.9682741761207581,grad_norm: 0.976373316393409, iteration: 330617
loss: 0.999751627445221,grad_norm: 0.8243799747723114, iteration: 330618
loss: 1.0414457321166992,grad_norm: 0.726162973777845, iteration: 330619
loss: 1.0364105701446533,grad_norm: 0.7459263962485334, iteration: 330620
loss: 0.9856168031692505,grad_norm: 0.7397772630820315, iteration: 330621
loss: 1.0111912488937378,grad_norm: 0.6401456217823427, iteration: 330622
loss: 1.0226213932037354,grad_norm: 0.9999999262225919, iteration: 330623
loss: 0.9780520796775818,grad_norm: 0.8325312930107595, iteration: 330624
loss: 1.0041049718856812,grad_norm: 0.9999992751916681, iteration: 330625
loss: 1.1343052387237549,grad_norm: 0.9999994675336575, iteration: 330626
loss: 1.0097311735153198,grad_norm: 0.826961708202627, iteration: 330627
loss: 0.988681972026825,grad_norm: 0.9999990707628571, iteration: 330628
loss: 1.0141098499298096,grad_norm: 0.9999999498582143, iteration: 330629
loss: 1.0171887874603271,grad_norm: 0.8535307929236354, iteration: 330630
loss: 1.0265181064605713,grad_norm: 0.8343519271638097, iteration: 330631
loss: 0.9783812761306763,grad_norm: 0.6856258991320066, iteration: 330632
loss: 0.9991564154624939,grad_norm: 0.786136852280155, iteration: 330633
loss: 1.0027966499328613,grad_norm: 0.7285684659697325, iteration: 330634
loss: 1.0171154737472534,grad_norm: 0.71661118185482, iteration: 330635
loss: 0.9932565093040466,grad_norm: 0.8658357339526169, iteration: 330636
loss: 1.019188642501831,grad_norm: 0.7973456874381757, iteration: 330637
loss: 1.1878392696380615,grad_norm: 0.9999998827614448, iteration: 330638
loss: 1.0383344888687134,grad_norm: 0.9516482582469378, iteration: 330639
loss: 0.970481276512146,grad_norm: 0.772576766828647, iteration: 330640
loss: 1.0045424699783325,grad_norm: 0.8835305449004565, iteration: 330641
loss: 0.9794453382492065,grad_norm: 0.9999991728714177, iteration: 330642
loss: 1.0036146640777588,grad_norm: 0.6551470643236518, iteration: 330643
loss: 1.0335588455200195,grad_norm: 0.7545190434317297, iteration: 330644
loss: 1.088843822479248,grad_norm: 0.9999995923336664, iteration: 330645
loss: 1.0084470510482788,grad_norm: 0.9330102115635958, iteration: 330646
loss: 1.0821082592010498,grad_norm: 0.9999992516584134, iteration: 330647
loss: 0.9834954142570496,grad_norm: 0.7843127789495706, iteration: 330648
loss: 1.0098166465759277,grad_norm: 0.8335211937302341, iteration: 330649
loss: 0.9709607362747192,grad_norm: 0.8173293458458345, iteration: 330650
loss: 1.0304670333862305,grad_norm: 0.8187507865601179, iteration: 330651
loss: 0.9822930693626404,grad_norm: 0.9999998974682519, iteration: 330652
loss: 0.9861546158790588,grad_norm: 0.7293183792153047, iteration: 330653
loss: 1.0507593154907227,grad_norm: 0.8687599308133723, iteration: 330654
loss: 1.0060687065124512,grad_norm: 0.9540170303367326, iteration: 330655
loss: 0.9594441056251526,grad_norm: 0.7894730713288557, iteration: 330656
loss: 0.9706295728683472,grad_norm: 0.7328760497450854, iteration: 330657
loss: 1.024782657623291,grad_norm: 0.9999991795006032, iteration: 330658
loss: 0.987993061542511,grad_norm: 0.8684824355341413, iteration: 330659
loss: 0.9893591403961182,grad_norm: 0.7934990567207484, iteration: 330660
loss: 0.9987441301345825,grad_norm: 0.8817418359069638, iteration: 330661
loss: 1.0794521570205688,grad_norm: 0.8820468589942141, iteration: 330662
loss: 0.9598493576049805,grad_norm: 0.9075912100469187, iteration: 330663
loss: 1.000411868095398,grad_norm: 0.9999991267869134, iteration: 330664
loss: 1.007851481437683,grad_norm: 0.8607309559358651, iteration: 330665
loss: 0.9937316179275513,grad_norm: 0.9999990912682689, iteration: 330666
loss: 0.9868908524513245,grad_norm: 0.9999990703434111, iteration: 330667
loss: 0.9841201305389404,grad_norm: 0.7871517141682085, iteration: 330668
loss: 0.9855968952178955,grad_norm: 0.7273171604401195, iteration: 330669
loss: 0.9972346425056458,grad_norm: 0.7750757201926006, iteration: 330670
loss: 1.0208765268325806,grad_norm: 0.8595727867352978, iteration: 330671
loss: 0.9951623678207397,grad_norm: 0.9617270686409247, iteration: 330672
loss: 1.0280463695526123,grad_norm: 0.9234369569718708, iteration: 330673
loss: 0.9527669548988342,grad_norm: 0.822602745904633, iteration: 330674
loss: 0.961711049079895,grad_norm: 0.806604046039238, iteration: 330675
loss: 0.998802125453949,grad_norm: 0.8564653789663486, iteration: 330676
loss: 1.0121506452560425,grad_norm: 0.9099456033721518, iteration: 330677
loss: 0.992384672164917,grad_norm: 0.8654353314316025, iteration: 330678
loss: 1.078876256942749,grad_norm: 0.9999995454788203, iteration: 330679
loss: 0.994151771068573,grad_norm: 0.8349392129234869, iteration: 330680
loss: 0.9748522043228149,grad_norm: 0.686058341417021, iteration: 330681
loss: 1.0220274925231934,grad_norm: 0.9896247714318976, iteration: 330682
loss: 1.0277130603790283,grad_norm: 0.8401974352096675, iteration: 330683
loss: 1.0094316005706787,grad_norm: 0.8121589630959524, iteration: 330684
loss: 0.9915708303451538,grad_norm: 0.8779933615979253, iteration: 330685
loss: 0.9973421096801758,grad_norm: 0.9539786132332139, iteration: 330686
loss: 1.067245602607727,grad_norm: 0.999999066169264, iteration: 330687
loss: 0.9984524250030518,grad_norm: 0.8070850834242048, iteration: 330688
loss: 0.9788553714752197,grad_norm: 0.7471389036833646, iteration: 330689
loss: 0.9626498222351074,grad_norm: 0.7294070142438455, iteration: 330690
loss: 1.0090789794921875,grad_norm: 0.824965453401401, iteration: 330691
loss: 1.0135163068771362,grad_norm: 0.8418699483451187, iteration: 330692
loss: 1.0312145948410034,grad_norm: 0.851930161040687, iteration: 330693
loss: 0.9742410778999329,grad_norm: 0.7785342772552933, iteration: 330694
loss: 0.9902598261833191,grad_norm: 0.9999989694358609, iteration: 330695
loss: 0.9991096258163452,grad_norm: 0.9999990172025625, iteration: 330696
loss: 1.0085617303848267,grad_norm: 0.9166524095221902, iteration: 330697
loss: 0.9877053499221802,grad_norm: 0.8138162378642034, iteration: 330698
loss: 0.99385666847229,grad_norm: 0.9644969705664115, iteration: 330699
loss: 1.0250250101089478,grad_norm: 0.9999992144988443, iteration: 330700
loss: 1.018134593963623,grad_norm: 0.7519939611022789, iteration: 330701
loss: 0.9924744963645935,grad_norm: 0.8823279644074494, iteration: 330702
loss: 1.0148593187332153,grad_norm: 0.8644229412631691, iteration: 330703
loss: 1.0057463645935059,grad_norm: 0.8439083737540166, iteration: 330704
loss: 1.0185554027557373,grad_norm: 0.999999150419743, iteration: 330705
loss: 0.9861650466918945,grad_norm: 0.7794412388459316, iteration: 330706
loss: 1.0119658708572388,grad_norm: 0.9999991523450458, iteration: 330707
loss: 1.0021198987960815,grad_norm: 0.8309020637374822, iteration: 330708
loss: 0.9637902975082397,grad_norm: 0.9999990304317897, iteration: 330709
loss: 0.9842177629470825,grad_norm: 0.7099686526533227, iteration: 330710
loss: 1.0025253295898438,grad_norm: 0.831741077373616, iteration: 330711
loss: 1.0079939365386963,grad_norm: 0.8793678617406488, iteration: 330712
loss: 1.1079671382904053,grad_norm: 0.9999999395623882, iteration: 330713
loss: 0.9976016283035278,grad_norm: 0.8583210424458292, iteration: 330714
loss: 1.0182090997695923,grad_norm: 0.9999993264597714, iteration: 330715
loss: 1.012406587600708,grad_norm: 0.8560048626684356, iteration: 330716
loss: 1.0356701612472534,grad_norm: 0.9999992427241062, iteration: 330717
loss: 0.9404959082603455,grad_norm: 0.8344577150610082, iteration: 330718
loss: 1.0094307661056519,grad_norm: 0.7542997397333342, iteration: 330719
loss: 0.9707145094871521,grad_norm: 0.9643420591922394, iteration: 330720
loss: 1.0303654670715332,grad_norm: 0.9426027094033563, iteration: 330721
loss: 0.994195282459259,grad_norm: 0.716006869233247, iteration: 330722
loss: 0.9880793690681458,grad_norm: 0.831576623015521, iteration: 330723
loss: 0.9845067262649536,grad_norm: 0.755192501538857, iteration: 330724
loss: 1.0274815559387207,grad_norm: 0.8146885730430551, iteration: 330725
loss: 0.9846887588500977,grad_norm: 0.8094930582180027, iteration: 330726
loss: 1.0090166330337524,grad_norm: 0.8405411296007163, iteration: 330727
loss: 1.0312381982803345,grad_norm: 0.7355534283301677, iteration: 330728
loss: 0.9735251665115356,grad_norm: 0.9572302804551399, iteration: 330729
loss: 0.9509922862052917,grad_norm: 0.8357339629150934, iteration: 330730
loss: 1.0446113348007202,grad_norm: 0.932042857220565, iteration: 330731
loss: 1.0009173154830933,grad_norm: 0.7857818336800225, iteration: 330732
loss: 1.0037559270858765,grad_norm: 0.9350578177329585, iteration: 330733
loss: 1.0546767711639404,grad_norm: 0.8828791300727215, iteration: 330734
loss: 1.02275550365448,grad_norm: 0.9999995544084457, iteration: 330735
loss: 1.0180037021636963,grad_norm: 0.6926267750045074, iteration: 330736
loss: 1.010890245437622,grad_norm: 0.9449474956784295, iteration: 330737
loss: 1.1212620735168457,grad_norm: 0.9698092791343716, iteration: 330738
loss: 1.025174856185913,grad_norm: 0.8246088144639863, iteration: 330739
loss: 1.0277366638183594,grad_norm: 0.861478088276133, iteration: 330740
loss: 0.9515010118484497,grad_norm: 0.8567560116999552, iteration: 330741
loss: 1.0014290809631348,grad_norm: 0.9475871412020256, iteration: 330742
loss: 1.0390671491622925,grad_norm: 0.9187711873344209, iteration: 330743
loss: 1.0046285390853882,grad_norm: 0.7702635603892322, iteration: 330744
loss: 0.9943524599075317,grad_norm: 0.7741310844528135, iteration: 330745
loss: 0.9708288311958313,grad_norm: 0.9783904086010309, iteration: 330746
loss: 1.0157452821731567,grad_norm: 0.7164442046408231, iteration: 330747
loss: 0.9801846146583557,grad_norm: 0.7834824987701691, iteration: 330748
loss: 0.9329261779785156,grad_norm: 0.8510099023249217, iteration: 330749
loss: 1.0074681043624878,grad_norm: 0.775603249510964, iteration: 330750
loss: 0.9688274264335632,grad_norm: 0.7986903327526043, iteration: 330751
loss: 1.0254037380218506,grad_norm: 0.9999997951476814, iteration: 330752
loss: 0.9811696410179138,grad_norm: 0.7454121171868302, iteration: 330753
loss: 0.9487212896347046,grad_norm: 0.821702008528734, iteration: 330754
loss: 0.9541430473327637,grad_norm: 0.822472954321787, iteration: 330755
loss: 1.026330590248108,grad_norm: 0.9999991063027526, iteration: 330756
loss: 0.9948594570159912,grad_norm: 0.8103868729484615, iteration: 330757
loss: 0.9893306493759155,grad_norm: 0.9999991214099067, iteration: 330758
loss: 1.0174951553344727,grad_norm: 0.8480801977445117, iteration: 330759
loss: 0.9592204093933105,grad_norm: 0.7476736476306269, iteration: 330760
loss: 1.0278599262237549,grad_norm: 0.9058520924047456, iteration: 330761
loss: 0.9697322845458984,grad_norm: 0.8606707479669607, iteration: 330762
loss: 0.9851542711257935,grad_norm: 0.8533118042715783, iteration: 330763
loss: 1.0033018589019775,grad_norm: 0.8449793573939074, iteration: 330764
loss: 1.0007734298706055,grad_norm: 0.9176453289871865, iteration: 330765
loss: 1.0371828079223633,grad_norm: 0.9358406116634581, iteration: 330766
loss: 0.9913272857666016,grad_norm: 0.9999990869321674, iteration: 330767
loss: 0.9636122584342957,grad_norm: 0.8807027693710886, iteration: 330768
loss: 1.0089967250823975,grad_norm: 0.8064286005273267, iteration: 330769
loss: 1.0387378931045532,grad_norm: 0.9253698233960025, iteration: 330770
loss: 0.9630085229873657,grad_norm: 0.942756908929175, iteration: 330771
loss: 0.9568244218826294,grad_norm: 0.724144690438652, iteration: 330772
loss: 1.0117287635803223,grad_norm: 0.9999995029632257, iteration: 330773
loss: 1.00043523311615,grad_norm: 0.999999456688805, iteration: 330774
loss: 0.9931226372718811,grad_norm: 0.7663701366220318, iteration: 330775
loss: 0.9301460385322571,grad_norm: 0.7375297301418906, iteration: 330776
loss: 1.0214080810546875,grad_norm: 0.8149580651866779, iteration: 330777
loss: 0.9928625822067261,grad_norm: 0.741261928548278, iteration: 330778
loss: 0.9612323641777039,grad_norm: 0.8308437534527516, iteration: 330779
loss: 1.0445934534072876,grad_norm: 1.0000000168538918, iteration: 330780
loss: 0.9915091395378113,grad_norm: 0.6903507335727177, iteration: 330781
loss: 1.0286725759506226,grad_norm: 0.859499698082329, iteration: 330782
loss: 1.0306257009506226,grad_norm: 0.8514537292991178, iteration: 330783
loss: 0.9894968867301941,grad_norm: 0.7439251203782283, iteration: 330784
loss: 1.0123226642608643,grad_norm: 0.94414002077511, iteration: 330785
loss: 1.0318353176116943,grad_norm: 0.8664090285286586, iteration: 330786
loss: 1.013948678970337,grad_norm: 0.9152801025403657, iteration: 330787
loss: 0.9949884414672852,grad_norm: 0.9999992340600324, iteration: 330788
loss: 0.980350136756897,grad_norm: 0.8997236600357582, iteration: 330789
loss: 1.010135531425476,grad_norm: 0.9999994086786652, iteration: 330790
loss: 0.9384671449661255,grad_norm: 0.7910671911753384, iteration: 330791
loss: 0.9848470687866211,grad_norm: 0.7320320764474445, iteration: 330792
loss: 1.0095868110656738,grad_norm: 0.8397398906902581, iteration: 330793
loss: 1.0330790281295776,grad_norm: 0.7695135914464575, iteration: 330794
loss: 0.9943671226501465,grad_norm: 0.8683779482660422, iteration: 330795
loss: 0.9725568890571594,grad_norm: 0.9932974155507968, iteration: 330796
loss: 1.0190889835357666,grad_norm: 0.9999996311564999, iteration: 330797
loss: 0.9639267921447754,grad_norm: 0.8748045472795154, iteration: 330798
loss: 1.0266391038894653,grad_norm: 0.9999991088261305, iteration: 330799
loss: 1.0253674983978271,grad_norm: 0.8508369294042942, iteration: 330800
loss: 1.0425511598587036,grad_norm: 0.941488037251793, iteration: 330801
loss: 1.0524795055389404,grad_norm: 0.9999990349286743, iteration: 330802
loss: 1.022693157196045,grad_norm: 0.9999991550113595, iteration: 330803
loss: 1.0071829557418823,grad_norm: 0.8246730201136677, iteration: 330804
loss: 0.9754393696784973,grad_norm: 0.7979742312236858, iteration: 330805
loss: 1.0019201040267944,grad_norm: 0.8708824150743629, iteration: 330806
loss: 0.978002667427063,grad_norm: 0.9974073018681019, iteration: 330807
loss: 1.0041590929031372,grad_norm: 0.8693118683779087, iteration: 330808
loss: 1.0042842626571655,grad_norm: 0.9064738824159744, iteration: 330809
loss: 1.0119417905807495,grad_norm: 0.7478983333446518, iteration: 330810
loss: 0.9779295325279236,grad_norm: 0.7756512700317092, iteration: 330811
loss: 0.9711741805076599,grad_norm: 0.9999990248289854, iteration: 330812
loss: 1.0157753229141235,grad_norm: 0.9276627920792069, iteration: 330813
loss: 1.000934362411499,grad_norm: 0.7646114279590721, iteration: 330814
loss: 1.0137327909469604,grad_norm: 0.999998982111324, iteration: 330815
loss: 0.9815510511398315,grad_norm: 0.6824188594261577, iteration: 330816
loss: 1.0341516733169556,grad_norm: 0.7422336205997735, iteration: 330817
loss: 1.019554615020752,grad_norm: 0.7612878715799706, iteration: 330818
loss: 1.001952886581421,grad_norm: 0.8656331986201651, iteration: 330819
loss: 1.010641098022461,grad_norm: 0.8487690696119069, iteration: 330820
loss: 1.0426315069198608,grad_norm: 0.7305635459827208, iteration: 330821
loss: 1.00175940990448,grad_norm: 0.7771422274685457, iteration: 330822
loss: 0.9876644611358643,grad_norm: 0.7400748891739503, iteration: 330823
loss: 1.0020778179168701,grad_norm: 0.9999990055868156, iteration: 330824
loss: 0.9998406767845154,grad_norm: 0.7806575742167919, iteration: 330825
loss: 0.9982147216796875,grad_norm: 0.9733422785063623, iteration: 330826
loss: 1.0326632261276245,grad_norm: 0.7401291709234857, iteration: 330827
loss: 1.0213607549667358,grad_norm: 0.7430882958265045, iteration: 330828
loss: 0.9821879863739014,grad_norm: 0.9451398350179993, iteration: 330829
loss: 1.1243927478790283,grad_norm: 0.9999997452383269, iteration: 330830
loss: 1.0119704008102417,grad_norm: 0.8805189744398817, iteration: 330831
loss: 0.9788310527801514,grad_norm: 0.8060404322506273, iteration: 330832
loss: 1.0421005487442017,grad_norm: 0.8328794847232861, iteration: 330833
loss: 0.9985069036483765,grad_norm: 0.8146489773469596, iteration: 330834
loss: 0.9984949231147766,grad_norm: 0.8750738334217022, iteration: 330835
loss: 0.9589561223983765,grad_norm: 0.8637171040988406, iteration: 330836
loss: 0.9894500970840454,grad_norm: 0.7418322576788169, iteration: 330837
loss: 1.0166398286819458,grad_norm: 0.8406779402780594, iteration: 330838
loss: 0.9789083003997803,grad_norm: 0.8361258377914504, iteration: 330839
loss: 1.0202051401138306,grad_norm: 0.9999991144791828, iteration: 330840
loss: 0.9576429128646851,grad_norm: 0.6984373155314715, iteration: 330841
loss: 1.019645094871521,grad_norm: 0.8662672566846039, iteration: 330842
loss: 1.0394399166107178,grad_norm: 0.9999993052535694, iteration: 330843
loss: 1.0280240774154663,grad_norm: 0.8237068659780898, iteration: 330844
loss: 0.9671271443367004,grad_norm: 0.9267703043996585, iteration: 330845
loss: 1.0064467191696167,grad_norm: 0.7491463418695066, iteration: 330846
loss: 0.9545623064041138,grad_norm: 0.8961599423701018, iteration: 330847
loss: 1.0014342069625854,grad_norm: 0.8816722046469171, iteration: 330848
loss: 0.963104248046875,grad_norm: 0.999999189193003, iteration: 330849
loss: 1.0028663873672485,grad_norm: 0.8198113333125147, iteration: 330850
loss: 0.9736437201499939,grad_norm: 0.7907611140833918, iteration: 330851
loss: 1.0134426355361938,grad_norm: 0.7243538960018981, iteration: 330852
loss: 0.9867545962333679,grad_norm: 0.8739243455872929, iteration: 330853
loss: 1.0262147188186646,grad_norm: 0.8042105782318595, iteration: 330854
loss: 1.023409366607666,grad_norm: 0.812401395890538, iteration: 330855
loss: 1.0157215595245361,grad_norm: 0.8475847351196076, iteration: 330856
loss: 1.0027767419815063,grad_norm: 0.8033132338365025, iteration: 330857
loss: 0.9969046115875244,grad_norm: 0.9999991044868858, iteration: 330858
loss: 0.9690311551094055,grad_norm: 0.7511967930405585, iteration: 330859
loss: 0.9822285771369934,grad_norm: 0.9684870917612045, iteration: 330860
loss: 1.0093636512756348,grad_norm: 0.9562780749822773, iteration: 330861
loss: 1.0242637395858765,grad_norm: 0.67780255760424, iteration: 330862
loss: 1.0121670961380005,grad_norm: 0.7678629405744384, iteration: 330863
loss: 0.9862052202224731,grad_norm: 0.7746599366973836, iteration: 330864
loss: 1.1540201902389526,grad_norm: 0.9999992160796723, iteration: 330865
loss: 1.000440239906311,grad_norm: 0.7780625960233638, iteration: 330866
loss: 1.0249146223068237,grad_norm: 0.8923185854388741, iteration: 330867
loss: 1.0123041868209839,grad_norm: 0.8240863814307805, iteration: 330868
loss: 0.9876084923744202,grad_norm: 0.725869866270667, iteration: 330869
loss: 1.0410479307174683,grad_norm: 0.9407505462721152, iteration: 330870
loss: 1.0241048336029053,grad_norm: 0.6497924748431778, iteration: 330871
loss: 0.9902499318122864,grad_norm: 0.9014160672935323, iteration: 330872
loss: 1.0113271474838257,grad_norm: 0.7900706095294021, iteration: 330873
loss: 1.0348464250564575,grad_norm: 0.7714350835384797, iteration: 330874
loss: 0.9869978427886963,grad_norm: 0.9788573039827435, iteration: 330875
loss: 0.9571515321731567,grad_norm: 0.9372373815904287, iteration: 330876
loss: 1.004156470298767,grad_norm: 0.7455672789591752, iteration: 330877
loss: 0.9722113609313965,grad_norm: 0.7351755846233338, iteration: 330878
loss: 0.9841609597206116,grad_norm: 0.7649961852919274, iteration: 330879
loss: 0.98900306224823,grad_norm: 0.9999989766663848, iteration: 330880
loss: 0.9847614765167236,grad_norm: 0.7953092446782114, iteration: 330881
loss: 1.0029841661453247,grad_norm: 0.9589970857936767, iteration: 330882
loss: 1.0297915935516357,grad_norm: 0.9999991670304536, iteration: 330883
loss: 0.9967936873435974,grad_norm: 0.9082012882114571, iteration: 330884
loss: 0.9771828651428223,grad_norm: 0.8698335466964101, iteration: 330885
loss: 0.9966983795166016,grad_norm: 0.8379322548349997, iteration: 330886
loss: 0.9472019672393799,grad_norm: 0.8952905170671835, iteration: 330887
loss: 1.0127453804016113,grad_norm: 0.8278099381634676, iteration: 330888
loss: 0.9818993806838989,grad_norm: 0.9714858032495893, iteration: 330889
loss: 0.9449924826622009,grad_norm: 0.8072268099137924, iteration: 330890
loss: 1.0883362293243408,grad_norm: 0.9178865231701514, iteration: 330891
loss: 1.0275906324386597,grad_norm: 0.8764848362542046, iteration: 330892
loss: 1.0105834007263184,grad_norm: 0.9835608247761831, iteration: 330893
loss: 1.0595110654830933,grad_norm: 0.9999992675153336, iteration: 330894
loss: 1.0003910064697266,grad_norm: 0.8835145925007512, iteration: 330895
loss: 1.0288021564483643,grad_norm: 0.9999999321145472, iteration: 330896
loss: 1.0765100717544556,grad_norm: 0.9999998423704638, iteration: 330897
loss: 1.229085087776184,grad_norm: 0.9999991857705568, iteration: 330898
loss: 0.9995941519737244,grad_norm: 0.9999990104762295, iteration: 330899
loss: 0.9887343049049377,grad_norm: 0.993989050791056, iteration: 330900
loss: 0.9798911213874817,grad_norm: 0.9517481162259426, iteration: 330901
loss: 0.9942649602890015,grad_norm: 0.890500429796105, iteration: 330902
loss: 1.0134413242340088,grad_norm: 0.8558542321351039, iteration: 330903
loss: 0.9849550127983093,grad_norm: 0.8642651185718904, iteration: 330904
loss: 1.0262389183044434,grad_norm: 0.9999990799790699, iteration: 330905
loss: 0.9922851920127869,grad_norm: 0.8516984439738104, iteration: 330906
loss: 1.0008925199508667,grad_norm: 0.7607705140778425, iteration: 330907
loss: 0.9826211929321289,grad_norm: 0.8425752777328257, iteration: 330908
loss: 0.985905647277832,grad_norm: 0.7396473862706093, iteration: 330909
loss: 0.9690025448799133,grad_norm: 0.9662444744878114, iteration: 330910
loss: 1.0164337158203125,grad_norm: 0.7375558954343925, iteration: 330911
loss: 0.9512903094291687,grad_norm: 0.8357167799658155, iteration: 330912
loss: 0.9869200587272644,grad_norm: 0.8311352944106213, iteration: 330913
loss: 1.0059254169464111,grad_norm: 0.7807510049484829, iteration: 330914
loss: 0.977230429649353,grad_norm: 0.759007021911008, iteration: 330915
loss: 0.9736400842666626,grad_norm: 0.8996017039452668, iteration: 330916
loss: 0.9988785982131958,grad_norm: 0.8407235585998681, iteration: 330917
loss: 0.998321533203125,grad_norm: 0.9583314093156263, iteration: 330918
loss: 0.9683055281639099,grad_norm: 0.7814049123321152, iteration: 330919
loss: 1.00592839717865,grad_norm: 0.7940158437887143, iteration: 330920
loss: 0.98716139793396,grad_norm: 0.7738899886594562, iteration: 330921
loss: 0.9664924740791321,grad_norm: 0.830180632085154, iteration: 330922
loss: 0.9819700717926025,grad_norm: 0.9141295627646414, iteration: 330923
loss: 0.9915187358856201,grad_norm: 0.9447813831263344, iteration: 330924
loss: 0.9740092754364014,grad_norm: 0.8731900829452647, iteration: 330925
loss: 1.015292763710022,grad_norm: 0.6822742844083725, iteration: 330926
loss: 0.9613599181175232,grad_norm: 0.9999991332914868, iteration: 330927
loss: 1.0008574724197388,grad_norm: 0.788291309148603, iteration: 330928
loss: 1.0007984638214111,grad_norm: 0.85869317462249, iteration: 330929
loss: 1.0610085725784302,grad_norm: 0.9970888025858139, iteration: 330930
loss: 0.990419864654541,grad_norm: 0.9999991252042377, iteration: 330931
loss: 0.9883319139480591,grad_norm: 0.9078480449767312, iteration: 330932
loss: 1.0345029830932617,grad_norm: 0.9999991015738856, iteration: 330933
loss: 0.9922798871994019,grad_norm: 0.8634784746280387, iteration: 330934
loss: 0.9644031524658203,grad_norm: 0.8186680411915552, iteration: 330935
loss: 1.0245859622955322,grad_norm: 0.8846818292638204, iteration: 330936
loss: 0.963654637336731,grad_norm: 0.8438624650441894, iteration: 330937
loss: 1.033458948135376,grad_norm: 0.9558139262874138, iteration: 330938
loss: 1.0711742639541626,grad_norm: 1.0000000378789482, iteration: 330939
loss: 1.003036379814148,grad_norm: 0.8432087667982764, iteration: 330940
loss: 0.9979450106620789,grad_norm: 0.8789276949533508, iteration: 330941
loss: 1.0116689205169678,grad_norm: 0.8359947816739172, iteration: 330942
loss: 0.985469400882721,grad_norm: 0.9812783053553554, iteration: 330943
loss: 0.9848535060882568,grad_norm: 0.7697817656899963, iteration: 330944
loss: 1.0074704885482788,grad_norm: 0.9559562341324426, iteration: 330945
loss: 1.0362128019332886,grad_norm: 0.9555891537336163, iteration: 330946
loss: 1.022587776184082,grad_norm: 0.7974395600536326, iteration: 330947
loss: 1.0108015537261963,grad_norm: 0.8035610669689398, iteration: 330948
loss: 0.9910941123962402,grad_norm: 0.9271966135486305, iteration: 330949
loss: 1.0540746450424194,grad_norm: 0.7192069928445091, iteration: 330950
loss: 1.01234769821167,grad_norm: 0.9495330726963198, iteration: 330951
loss: 1.0267255306243896,grad_norm: 0.9000236844600762, iteration: 330952
loss: 0.9895498156547546,grad_norm: 0.8303090565852614, iteration: 330953
loss: 0.9994333982467651,grad_norm: 0.7363121155294053, iteration: 330954
loss: 1.0292952060699463,grad_norm: 0.8253314132221522, iteration: 330955
loss: 0.97915118932724,grad_norm: 0.9999991699829684, iteration: 330956
loss: 0.9490960240364075,grad_norm: 0.8373774752099503, iteration: 330957
loss: 0.983914315700531,grad_norm: 0.766037591840946, iteration: 330958
loss: 0.9814863204956055,grad_norm: 0.9999990210602223, iteration: 330959
loss: 0.9719955325126648,grad_norm: 0.9999992954152596, iteration: 330960
loss: 0.9972019195556641,grad_norm: 0.929021711583431, iteration: 330961
loss: 1.0128624439239502,grad_norm: 0.8994180926561609, iteration: 330962
loss: 1.0117990970611572,grad_norm: 0.678829416749505, iteration: 330963
loss: 1.0137063264846802,grad_norm: 0.9999997717622363, iteration: 330964
loss: 1.0055439472198486,grad_norm: 0.7901598121615007, iteration: 330965
loss: 0.990976870059967,grad_norm: 0.7479571659615346, iteration: 330966
loss: 1.0354803800582886,grad_norm: 0.9999997648905609, iteration: 330967
loss: 1.0046836137771606,grad_norm: 0.9062230185632891, iteration: 330968
loss: 0.9904022216796875,grad_norm: 0.9999990693256605, iteration: 330969
loss: 0.9504714608192444,grad_norm: 0.926618697405371, iteration: 330970
loss: 1.0297127962112427,grad_norm: 0.9182498625979697, iteration: 330971
loss: 0.9673344492912292,grad_norm: 0.8464878550296188, iteration: 330972
loss: 0.9725099802017212,grad_norm: 0.9825452499919783, iteration: 330973
loss: 1.0884428024291992,grad_norm: 0.9999991421585949, iteration: 330974
loss: 1.0140377283096313,grad_norm: 0.903745030402233, iteration: 330975
loss: 1.0095233917236328,grad_norm: 0.9999997406108307, iteration: 330976
loss: 1.0575509071350098,grad_norm: 0.8358502457985684, iteration: 330977
loss: 1.0132681131362915,grad_norm: 0.9999992171115911, iteration: 330978
loss: 0.9920892119407654,grad_norm: 0.8542760024651577, iteration: 330979
loss: 0.9728712439537048,grad_norm: 0.7467354775730074, iteration: 330980
loss: 0.9868418574333191,grad_norm: 0.886150085471827, iteration: 330981
loss: 1.0419299602508545,grad_norm: 0.9999992192526749, iteration: 330982
loss: 1.0321574211120605,grad_norm: 0.9613532858624944, iteration: 330983
loss: 1.0075442790985107,grad_norm: 0.7634529461930459, iteration: 330984
loss: 0.9872823357582092,grad_norm: 0.7005901637753071, iteration: 330985
loss: 1.020763874053955,grad_norm: 0.7875096844140463, iteration: 330986
loss: 0.9922056794166565,grad_norm: 0.6954867758110383, iteration: 330987
loss: 0.9768533110618591,grad_norm: 0.788858631680974, iteration: 330988
loss: 0.9981852769851685,grad_norm: 0.9264291491336132, iteration: 330989
loss: 0.983860194683075,grad_norm: 0.742869508506355, iteration: 330990
loss: 1.0096683502197266,grad_norm: 0.8018268228222581, iteration: 330991
loss: 1.0481456518173218,grad_norm: 0.8797716995329219, iteration: 330992
loss: 0.9930402636528015,grad_norm: 0.8191879622674934, iteration: 330993
loss: 0.9559750556945801,grad_norm: 0.8819396711150724, iteration: 330994
loss: 0.9735913276672363,grad_norm: 0.8116186981710011, iteration: 330995
loss: 0.999198853969574,grad_norm: 0.7963388841617268, iteration: 330996
loss: 1.0819306373596191,grad_norm: 0.99999905642776, iteration: 330997
loss: 1.003951072692871,grad_norm: 0.8909158000786236, iteration: 330998
loss: 1.003147006034851,grad_norm: 0.9999997259735175, iteration: 330999
loss: 1.0447235107421875,grad_norm: 0.9575980266336398, iteration: 331000
loss: 0.9792770147323608,grad_norm: 0.7421359603134278, iteration: 331001
loss: 1.0140360593795776,grad_norm: 0.7391291686358876, iteration: 331002
loss: 0.9916295409202576,grad_norm: 0.8728890732623981, iteration: 331003
loss: 0.978209912776947,grad_norm: 0.8677003313113789, iteration: 331004
loss: 1.0446640253067017,grad_norm: 0.9999992085717275, iteration: 331005
loss: 0.9907098412513733,grad_norm: 0.9999995117538679, iteration: 331006
loss: 0.9937275052070618,grad_norm: 0.9171451891203969, iteration: 331007
loss: 1.0297526121139526,grad_norm: 0.7529999468335027, iteration: 331008
loss: 0.9872339367866516,grad_norm: 0.910155609033846, iteration: 331009
loss: 1.0063872337341309,grad_norm: 0.9999990279530155, iteration: 331010
loss: 1.0691180229187012,grad_norm: 0.8040483124713919, iteration: 331011
loss: 1.002912163734436,grad_norm: 0.84346768692345, iteration: 331012
loss: 1.06868577003479,grad_norm: 0.9999992569293251, iteration: 331013
loss: 0.9829349517822266,grad_norm: 0.7123072208165118, iteration: 331014
loss: 0.9800381064414978,grad_norm: 0.8008855112391383, iteration: 331015
loss: 0.9720255136489868,grad_norm: 0.816906741120747, iteration: 331016
loss: 0.9613322615623474,grad_norm: 0.8154592956453541, iteration: 331017
loss: 0.9977914690971375,grad_norm: 0.8156053396094204, iteration: 331018
loss: 0.9966940879821777,grad_norm: 0.7617073043705856, iteration: 331019
loss: 0.9629024267196655,grad_norm: 0.9999990533452322, iteration: 331020
loss: 1.028539776802063,grad_norm: 0.8164493927419199, iteration: 331021
loss: 1.0094325542449951,grad_norm: 0.8191528319490063, iteration: 331022
loss: 1.01914381980896,grad_norm: 0.8098568477114093, iteration: 331023
loss: 1.0064117908477783,grad_norm: 0.9999990597655531, iteration: 331024
loss: 1.0043144226074219,grad_norm: 0.9999991661423664, iteration: 331025
loss: 0.9620590209960938,grad_norm: 0.7487438341916206, iteration: 331026
loss: 1.0243620872497559,grad_norm: 0.9506770370079954, iteration: 331027
loss: 0.9628726243972778,grad_norm: 0.8743798204989626, iteration: 331028
loss: 0.9920439720153809,grad_norm: 0.728116450162652, iteration: 331029
loss: 0.9967106580734253,grad_norm: 0.8380849473423598, iteration: 331030
loss: 0.998828113079071,grad_norm: 0.744479376047725, iteration: 331031
loss: 0.9792775511741638,grad_norm: 0.8215826941192858, iteration: 331032
loss: 0.9997583627700806,grad_norm: 0.8272840011533241, iteration: 331033
loss: 1.0047876834869385,grad_norm: 0.8832819829138825, iteration: 331034
loss: 0.9675815105438232,grad_norm: 0.9053942568688772, iteration: 331035
loss: 1.0144011974334717,grad_norm: 0.8875529355753656, iteration: 331036
loss: 0.990583062171936,grad_norm: 0.7686321027245338, iteration: 331037
loss: 1.006399154663086,grad_norm: 0.8742697476241793, iteration: 331038
loss: 0.997534453868866,grad_norm: 0.8827037188711844, iteration: 331039
loss: 0.9557764530181885,grad_norm: 0.7925075606103328, iteration: 331040
loss: 1.0222009420394897,grad_norm: 0.7961917080748088, iteration: 331041
loss: 1.0905838012695312,grad_norm: 0.9999990296137102, iteration: 331042
loss: 1.0243520736694336,grad_norm: 0.9322175838257273, iteration: 331043
loss: 1.0035431385040283,grad_norm: 0.8247530492982069, iteration: 331044
loss: 1.0276141166687012,grad_norm: 0.9607462371523458, iteration: 331045
loss: 1.0333313941955566,grad_norm: 0.7670618572604623, iteration: 331046
loss: 1.0122050046920776,grad_norm: 0.9999990563203858, iteration: 331047
loss: 0.9952934980392456,grad_norm: 0.8000078401829476, iteration: 331048
loss: 0.9941967725753784,grad_norm: 0.8803429832421727, iteration: 331049
loss: 1.0230172872543335,grad_norm: 0.8516937564483718, iteration: 331050
loss: 0.9904278516769409,grad_norm: 0.9999992101924808, iteration: 331051
loss: 1.0195809602737427,grad_norm: 0.8371446489210402, iteration: 331052
loss: 0.9934775233268738,grad_norm: 0.73729229080041, iteration: 331053
loss: 0.985657274723053,grad_norm: 0.907654310920676, iteration: 331054
loss: 1.017749547958374,grad_norm: 0.9336407345608335, iteration: 331055
loss: 0.9939013123512268,grad_norm: 0.8075062704528927, iteration: 331056
loss: 1.0905104875564575,grad_norm: 0.9999996898550244, iteration: 331057
loss: 1.0086013078689575,grad_norm: 0.9731118465569474, iteration: 331058
loss: 0.9777379035949707,grad_norm: 0.9850229587727627, iteration: 331059
loss: 0.9915159344673157,grad_norm: 0.7845610374758085, iteration: 331060
loss: 0.9942981004714966,grad_norm: 0.9675215581346887, iteration: 331061
loss: 1.0170249938964844,grad_norm: 0.9034396430498168, iteration: 331062
loss: 0.9883937835693359,grad_norm: 0.994170428113072, iteration: 331063
loss: 1.0122660398483276,grad_norm: 0.894336774751251, iteration: 331064
loss: 1.0358139276504517,grad_norm: 0.8668618245571919, iteration: 331065
loss: 0.9897205233573914,grad_norm: 0.9062465718120587, iteration: 331066
loss: 0.9906269311904907,grad_norm: 0.9163256437273137, iteration: 331067
loss: 1.0135936737060547,grad_norm: 0.9999998858804466, iteration: 331068
loss: 0.9957612156867981,grad_norm: 0.8115403287528055, iteration: 331069
loss: 1.006572961807251,grad_norm: 0.9266480016729859, iteration: 331070
loss: 0.9933050870895386,grad_norm: 0.8452392380621132, iteration: 331071
loss: 0.9606437683105469,grad_norm: 0.7077644278775845, iteration: 331072
loss: 1.0243233442306519,grad_norm: 0.7880559517288818, iteration: 331073
loss: 1.0080699920654297,grad_norm: 0.8887138695295704, iteration: 331074
loss: 1.0108590126037598,grad_norm: 0.8798921371594524, iteration: 331075
loss: 0.9672802686691284,grad_norm: 0.7635324909071436, iteration: 331076
loss: 0.9892438054084778,grad_norm: 0.776702466844213, iteration: 331077
loss: 1.0081140995025635,grad_norm: 0.9012030313127877, iteration: 331078
loss: 1.0162960290908813,grad_norm: 0.7449300620380984, iteration: 331079
loss: 0.985730767250061,grad_norm: 0.983084391526415, iteration: 331080
loss: 0.9546109437942505,grad_norm: 0.9385564235228546, iteration: 331081
loss: 1.0257195234298706,grad_norm: 0.7383516677826937, iteration: 331082
loss: 1.0210728645324707,grad_norm: 0.9999992938867289, iteration: 331083
loss: 1.0350348949432373,grad_norm: 0.9999992762657532, iteration: 331084
loss: 1.004601001739502,grad_norm: 0.8106644609882195, iteration: 331085
loss: 1.0011577606201172,grad_norm: 0.7778930988349837, iteration: 331086
loss: 1.0079530477523804,grad_norm: 0.798034895491085, iteration: 331087
loss: 1.013594627380371,grad_norm: 0.9461302078403891, iteration: 331088
loss: 0.9945940971374512,grad_norm: 0.9999997653533976, iteration: 331089
loss: 0.9610850811004639,grad_norm: 0.7589474310873708, iteration: 331090
loss: 1.0298465490341187,grad_norm: 0.9680369351881213, iteration: 331091
loss: 0.9628772735595703,grad_norm: 0.8765414809923784, iteration: 331092
loss: 0.9754464030265808,grad_norm: 0.7673520345800637, iteration: 331093
loss: 1.0116071701049805,grad_norm: 0.9362071445424287, iteration: 331094
loss: 1.016991138458252,grad_norm: 0.999999874490031, iteration: 331095
loss: 0.9670848846435547,grad_norm: 0.8239643289714376, iteration: 331096
loss: 0.9922566413879395,grad_norm: 0.7500391962319092, iteration: 331097
loss: 1.044629693031311,grad_norm: 0.9999995900592206, iteration: 331098
loss: 1.0235390663146973,grad_norm: 0.999999162755631, iteration: 331099
loss: 0.985115110874176,grad_norm: 0.8683080993033244, iteration: 331100
loss: 1.0459213256835938,grad_norm: 0.9999991617298077, iteration: 331101
loss: 0.9810012578964233,grad_norm: 0.9999990555541695, iteration: 331102
loss: 1.0025614500045776,grad_norm: 0.7194125703991429, iteration: 331103
loss: 0.9958316087722778,grad_norm: 0.7788993773814359, iteration: 331104
loss: 1.0619306564331055,grad_norm: 0.9193752287381786, iteration: 331105
loss: 1.002963662147522,grad_norm: 0.8115732700910732, iteration: 331106
loss: 0.9626469612121582,grad_norm: 0.7878344081296808, iteration: 331107
loss: 0.9748497009277344,grad_norm: 0.9157023757530747, iteration: 331108
loss: 0.9747409224510193,grad_norm: 0.954581033775465, iteration: 331109
loss: 1.0113168954849243,grad_norm: 0.8843226152714124, iteration: 331110
loss: 1.003416895866394,grad_norm: 0.9999996574627963, iteration: 331111
loss: 0.9391977787017822,grad_norm: 0.9617016637052839, iteration: 331112
loss: 0.9739521145820618,grad_norm: 0.9999994744414267, iteration: 331113
loss: 0.9828208684921265,grad_norm: 0.8090980741574515, iteration: 331114
loss: 1.03389310836792,grad_norm: 0.9999990305868338, iteration: 331115
loss: 1.0256626605987549,grad_norm: 0.9049667614611052, iteration: 331116
loss: 0.9618138074874878,grad_norm: 0.9124656997942464, iteration: 331117
loss: 1.0167052745819092,grad_norm: 0.78192814982413, iteration: 331118
loss: 0.9839427471160889,grad_norm: 0.999999122873794, iteration: 331119
loss: 1.0177512168884277,grad_norm: 0.779942488845849, iteration: 331120
loss: 0.9894654154777527,grad_norm: 0.9999991572237498, iteration: 331121
loss: 0.9861404895782471,grad_norm: 0.9379877868013349, iteration: 331122
loss: 1.0168629884719849,grad_norm: 0.9999992184684253, iteration: 331123
loss: 1.0626155138015747,grad_norm: 0.798683650829161, iteration: 331124
loss: 0.9942153692245483,grad_norm: 0.8363362679898851, iteration: 331125
loss: 1.062796711921692,grad_norm: 0.8070399338773226, iteration: 331126
loss: 1.028611421585083,grad_norm: 0.9242828643343431, iteration: 331127
loss: 1.0125640630722046,grad_norm: 0.9788404762614302, iteration: 331128
loss: 1.0360751152038574,grad_norm: 0.9085819330925384, iteration: 331129
loss: 0.9902477860450745,grad_norm: 0.999508448534218, iteration: 331130
loss: 1.0590953826904297,grad_norm: 0.9742105497668423, iteration: 331131
loss: 1.0160688161849976,grad_norm: 0.8583192658791768, iteration: 331132
loss: 1.133603572845459,grad_norm: 0.8229195228123272, iteration: 331133
loss: 0.9888343214988708,grad_norm: 0.7662353070638422, iteration: 331134
loss: 0.9816079139709473,grad_norm: 0.6666737799689447, iteration: 331135
loss: 1.0135469436645508,grad_norm: 0.7437022001712893, iteration: 331136
loss: 1.113962173461914,grad_norm: 0.843601228193316, iteration: 331137
loss: 0.9863133430480957,grad_norm: 0.999999240109732, iteration: 331138
loss: 0.9739708304405212,grad_norm: 0.662856012853883, iteration: 331139
loss: 1.0332883596420288,grad_norm: 0.9999992996447019, iteration: 331140
loss: 0.9680665135383606,grad_norm: 0.6923033855475598, iteration: 331141
loss: 1.0362621545791626,grad_norm: 0.9999990859283024, iteration: 331142
loss: 0.9901556968688965,grad_norm: 0.8811256224937518, iteration: 331143
loss: 0.9812111854553223,grad_norm: 0.9999994413457978, iteration: 331144
loss: 1.0138598680496216,grad_norm: 0.7267598312274055, iteration: 331145
loss: 1.1522585153579712,grad_norm: 0.9999992137704597, iteration: 331146
loss: 1.023858904838562,grad_norm: 0.9999991345864562, iteration: 331147
loss: 1.029622197151184,grad_norm: 0.9999994554996567, iteration: 331148
loss: 1.0010126829147339,grad_norm: 0.9999990122888744, iteration: 331149
loss: 0.991003692150116,grad_norm: 0.7267014885102758, iteration: 331150
loss: 1.0495846271514893,grad_norm: 0.9468942390636947, iteration: 331151
loss: 1.0242903232574463,grad_norm: 0.9238546619378478, iteration: 331152
loss: 1.0209057331085205,grad_norm: 0.8901864216479459, iteration: 331153
loss: 1.0709490776062012,grad_norm: 0.9999999524911403, iteration: 331154
loss: 1.0142784118652344,grad_norm: 0.9999990693377174, iteration: 331155
loss: 1.010396122932434,grad_norm: 0.9357537655700199, iteration: 331156
loss: 1.0514813661575317,grad_norm: 0.9999991987584756, iteration: 331157
loss: 1.1066750288009644,grad_norm: 0.958093671559467, iteration: 331158
loss: 1.0478967428207397,grad_norm: 0.9999992904045385, iteration: 331159
loss: 1.0047502517700195,grad_norm: 0.8347095695590313, iteration: 331160
loss: 1.0140830278396606,grad_norm: 0.9999997539504708, iteration: 331161
loss: 0.9726267457008362,grad_norm: 0.8095277653694006, iteration: 331162
loss: 1.0435560941696167,grad_norm: 0.8316649782093639, iteration: 331163
loss: 1.0185891389846802,grad_norm: 0.9140433331420647, iteration: 331164
loss: 1.0223603248596191,grad_norm: 0.9999998663240001, iteration: 331165
loss: 1.0289890766143799,grad_norm: 0.7673867026299699, iteration: 331166
loss: 1.010310173034668,grad_norm: 0.9999998533352815, iteration: 331167
loss: 0.9794213771820068,grad_norm: 0.6791088435656314, iteration: 331168
loss: 0.9924623966217041,grad_norm: 0.8714359274931727, iteration: 331169
loss: 0.9716702103614807,grad_norm: 0.841609008132499, iteration: 331170
loss: 0.987781822681427,grad_norm: 0.9999994835110695, iteration: 331171
loss: 1.061948299407959,grad_norm: 0.9999992903263348, iteration: 331172
loss: 1.0709961652755737,grad_norm: 0.9999992656167429, iteration: 331173
loss: 0.9818756580352783,grad_norm: 0.9770906668801838, iteration: 331174
loss: 1.0012842416763306,grad_norm: 0.9060562176591589, iteration: 331175
loss: 1.0423527956008911,grad_norm: 0.9995341348319406, iteration: 331176
loss: 1.027816653251648,grad_norm: 0.8688881534199241, iteration: 331177
loss: 0.9848442077636719,grad_norm: 0.7485151595652282, iteration: 331178
loss: 1.14689040184021,grad_norm: 0.9999991733191324, iteration: 331179
loss: 1.0158601999282837,grad_norm: 0.9426512081499434, iteration: 331180
loss: 1.0147262811660767,grad_norm: 0.8288780017404065, iteration: 331181
loss: 0.9971575736999512,grad_norm: 0.7644315059810209, iteration: 331182
loss: 0.979611873626709,grad_norm: 0.99999955859225, iteration: 331183
loss: 1.0029765367507935,grad_norm: 0.890503359098264, iteration: 331184
loss: 1.1637085676193237,grad_norm: 0.9999993997778339, iteration: 331185
loss: 0.9891861081123352,grad_norm: 0.8221419751559822, iteration: 331186
loss: 0.9718292951583862,grad_norm: 0.7845785209474155, iteration: 331187
loss: 1.0654219388961792,grad_norm: 0.9353883366010712, iteration: 331188
loss: 0.9769638180732727,grad_norm: 0.6941348948644755, iteration: 331189
loss: 0.98974609375,grad_norm: 0.9999991363441892, iteration: 331190
loss: 1.048513650894165,grad_norm: 0.9999990399339924, iteration: 331191
loss: 0.9820963740348816,grad_norm: 0.9517397093957785, iteration: 331192
loss: 1.0182969570159912,grad_norm: 0.8416966183934175, iteration: 331193
loss: 0.9228172302246094,grad_norm: 0.8132352192711823, iteration: 331194
loss: 1.0200629234313965,grad_norm: 0.7298761262794717, iteration: 331195
loss: 1.1448719501495361,grad_norm: 0.9999997233392185, iteration: 331196
loss: 0.9669342041015625,grad_norm: 0.7100802819970968, iteration: 331197
loss: 1.0394114255905151,grad_norm: 0.9446052323465978, iteration: 331198
loss: 0.9662498831748962,grad_norm: 0.885898161434365, iteration: 331199
loss: 1.0272951126098633,grad_norm: 0.8960899301044759, iteration: 331200
loss: 0.9817743301391602,grad_norm: 0.8329059601120701, iteration: 331201
loss: 1.0128061771392822,grad_norm: 0.8042443698031039, iteration: 331202
loss: 1.0528799295425415,grad_norm: 0.9999991288012633, iteration: 331203
loss: 1.0104432106018066,grad_norm: 0.8416604954725637, iteration: 331204
loss: 1.0318576097488403,grad_norm: 0.9073126528232891, iteration: 331205
loss: 1.0521047115325928,grad_norm: 0.999999337225377, iteration: 331206
loss: 0.9827106595039368,grad_norm: 0.7094544460169044, iteration: 331207
loss: 0.9900424480438232,grad_norm: 0.764294118245311, iteration: 331208
loss: 1.0502718687057495,grad_norm: 0.99999990761785, iteration: 331209
loss: 1.0383999347686768,grad_norm: 0.9999992334229426, iteration: 331210
loss: 0.9785107374191284,grad_norm: 0.8385413383895313, iteration: 331211
loss: 1.004427194595337,grad_norm: 0.8567274011274857, iteration: 331212
loss: 1.0030720233917236,grad_norm: 0.8858641458230923, iteration: 331213
loss: 0.9983971118927002,grad_norm: 0.9999993569805959, iteration: 331214
loss: 1.0144548416137695,grad_norm: 0.9128915061597767, iteration: 331215
loss: 0.964089035987854,grad_norm: 0.7775901834043654, iteration: 331216
loss: 1.0350290536880493,grad_norm: 0.9999992022325063, iteration: 331217
loss: 1.0036203861236572,grad_norm: 0.9999991278428382, iteration: 331218
loss: 1.0070792436599731,grad_norm: 0.8530837940679015, iteration: 331219
loss: 1.0223498344421387,grad_norm: 0.7637609332527497, iteration: 331220
loss: 1.0365936756134033,grad_norm: 0.8463637727743546, iteration: 331221
loss: 0.9848806262016296,grad_norm: 0.8167783831521299, iteration: 331222
loss: 1.0079083442687988,grad_norm: 0.9999999433173998, iteration: 331223
loss: 1.0987683534622192,grad_norm: 0.9092589982277468, iteration: 331224
loss: 0.9792240262031555,grad_norm: 0.7577116919833585, iteration: 331225
loss: 1.0188740491867065,grad_norm: 0.9076149562283782, iteration: 331226
loss: 1.009623408317566,grad_norm: 0.8218975660813196, iteration: 331227
loss: 1.0021843910217285,grad_norm: 0.9367701137965179, iteration: 331228
loss: 0.9904768466949463,grad_norm: 0.9067576328375672, iteration: 331229
loss: 1.0234618186950684,grad_norm: 0.99999958733369, iteration: 331230
loss: 1.0214821100234985,grad_norm: 0.9077674726117606, iteration: 331231
loss: 0.9787600636482239,grad_norm: 0.7011614751531222, iteration: 331232
loss: 1.0012471675872803,grad_norm: 0.9999994005308, iteration: 331233
loss: 0.9885655045509338,grad_norm: 0.8545940776638655, iteration: 331234
loss: 1.008111596107483,grad_norm: 0.8207992459732588, iteration: 331235
loss: 1.044835090637207,grad_norm: 0.9999998984742318, iteration: 331236
loss: 0.9921271204948425,grad_norm: 0.7503031121891198, iteration: 331237
loss: 1.0157538652420044,grad_norm: 0.8161747830082421, iteration: 331238
loss: 1.0163099765777588,grad_norm: 0.8431633859812213, iteration: 331239
loss: 0.9687563180923462,grad_norm: 0.8078901923142086, iteration: 331240
loss: 0.983595073223114,grad_norm: 0.8138148395519129, iteration: 331241
loss: 0.9868568181991577,grad_norm: 0.9999990715750426, iteration: 331242
loss: 0.9879745841026306,grad_norm: 0.8770023631007311, iteration: 331243
loss: 0.9939171671867371,grad_norm: 0.8790197220091555, iteration: 331244
loss: 1.0702412128448486,grad_norm: 0.9999995687527032, iteration: 331245
loss: 0.9948328733444214,grad_norm: 0.9586204647419603, iteration: 331246
loss: 1.0275441408157349,grad_norm: 0.9999992522369622, iteration: 331247
loss: 1.0272917747497559,grad_norm: 0.999999446146932, iteration: 331248
loss: 1.003263235092163,grad_norm: 0.9968415650689848, iteration: 331249
loss: 0.9689180254936218,grad_norm: 0.7933281327210496, iteration: 331250
loss: 1.0689727067947388,grad_norm: 0.7675417511461382, iteration: 331251
loss: 1.0601575374603271,grad_norm: 0.9999994961790588, iteration: 331252
loss: 1.014967679977417,grad_norm: 0.8210186678594162, iteration: 331253
loss: 1.0530024766921997,grad_norm: 0.9356318486738688, iteration: 331254
loss: 1.0914133787155151,grad_norm: 0.999999230275374, iteration: 331255
loss: 0.9871736764907837,grad_norm: 0.9999989281002758, iteration: 331256
loss: 1.0174963474273682,grad_norm: 0.8882684117539996, iteration: 331257
loss: 0.9844242930412292,grad_norm: 0.9945712508388498, iteration: 331258
loss: 1.0373808145523071,grad_norm: 0.9132825166114775, iteration: 331259
loss: 1.027133822441101,grad_norm: 0.921733883072635, iteration: 331260
loss: 0.9515591859817505,grad_norm: 0.9638324319336031, iteration: 331261
loss: 1.0290671586990356,grad_norm: 0.7160238752772216, iteration: 331262
loss: 1.0265722274780273,grad_norm: 0.7876751322984296, iteration: 331263
loss: 1.0174118280410767,grad_norm: 0.9808212735389832, iteration: 331264
loss: 0.9966568946838379,grad_norm: 0.9999997152796749, iteration: 331265
loss: 0.9879260659217834,grad_norm: 0.8627547109987839, iteration: 331266
loss: 1.0097646713256836,grad_norm: 0.8853554335835411, iteration: 331267
loss: 0.9848688244819641,grad_norm: 0.8242449389446799, iteration: 331268
loss: 1.0120723247528076,grad_norm: 0.9277989105903982, iteration: 331269
loss: 1.0407848358154297,grad_norm: 0.9999990593508541, iteration: 331270
loss: 0.9707666635513306,grad_norm: 0.9999989989676391, iteration: 331271
loss: 1.0512521266937256,grad_norm: 0.9999991609007085, iteration: 331272
loss: 0.972986102104187,grad_norm: 0.7221410171531963, iteration: 331273
loss: 1.0295484066009521,grad_norm: 0.9304686072479292, iteration: 331274
loss: 0.9950775504112244,grad_norm: 0.9127915482404353, iteration: 331275
loss: 1.0502865314483643,grad_norm: 0.9999995831558784, iteration: 331276
loss: 1.0252224206924438,grad_norm: 0.781518608662476, iteration: 331277
loss: 1.0222113132476807,grad_norm: 0.8602186497010601, iteration: 331278
loss: 0.990705668926239,grad_norm: 0.6713230013962677, iteration: 331279
loss: 0.9693659543991089,grad_norm: 0.8990979761027446, iteration: 331280
loss: 1.0010855197906494,grad_norm: 0.9932368528439033, iteration: 331281
loss: 0.999868631362915,grad_norm: 0.9804122031047029, iteration: 331282
loss: 0.9997937083244324,grad_norm: 0.9138118038238545, iteration: 331283
loss: 0.9991806149482727,grad_norm: 0.7735846846259079, iteration: 331284
loss: 0.9674161076545715,grad_norm: 0.8927756005278478, iteration: 331285
loss: 1.0134172439575195,grad_norm: 0.8091305602484665, iteration: 331286
loss: 1.0165095329284668,grad_norm: 0.9999990781411128, iteration: 331287
loss: 0.9927627444267273,grad_norm: 0.6993973205760277, iteration: 331288
loss: 1.0319771766662598,grad_norm: 0.844663809861198, iteration: 331289
loss: 1.0264283418655396,grad_norm: 0.763908973873932, iteration: 331290
loss: 1.002142071723938,grad_norm: 0.7589959693289743, iteration: 331291
loss: 1.0254623889923096,grad_norm: 1.0000000195498349, iteration: 331292
loss: 1.0094726085662842,grad_norm: 0.8662847096617238, iteration: 331293
loss: 1.018613576889038,grad_norm: 0.837960812218822, iteration: 331294
loss: 0.9762440323829651,grad_norm: 0.8157135831948205, iteration: 331295
loss: 1.0324668884277344,grad_norm: 0.9999990415816008, iteration: 331296
loss: 1.0297019481658936,grad_norm: 0.9999993388271454, iteration: 331297
loss: 1.0079151391983032,grad_norm: 0.8955412201368242, iteration: 331298
loss: 0.9737682342529297,grad_norm: 0.8586801360613088, iteration: 331299
loss: 1.0045760869979858,grad_norm: 0.9999997874294554, iteration: 331300
loss: 1.0093973875045776,grad_norm: 0.8027848918199963, iteration: 331301
loss: 1.0123876333236694,grad_norm: 0.9999991611221105, iteration: 331302
loss: 1.0166709423065186,grad_norm: 0.786111024867283, iteration: 331303
loss: 1.0148545503616333,grad_norm: 0.7805277197511515, iteration: 331304
loss: 1.001847743988037,grad_norm: 0.8588099522023912, iteration: 331305
loss: 1.0098557472229004,grad_norm: 0.9999993269166096, iteration: 331306
loss: 0.9954715371131897,grad_norm: 0.7955447085308365, iteration: 331307
loss: 1.0031408071517944,grad_norm: 0.864473766557148, iteration: 331308
loss: 1.0141334533691406,grad_norm: 0.7830024498475995, iteration: 331309
loss: 1.0298155546188354,grad_norm: 0.9999994277050496, iteration: 331310
loss: 1.008266806602478,grad_norm: 0.7976606202188496, iteration: 331311
loss: 1.0273752212524414,grad_norm: 0.9999992448055451, iteration: 331312
loss: 1.000966191291809,grad_norm: 0.9999989903598618, iteration: 331313
loss: 1.0453081130981445,grad_norm: 0.9999999937946119, iteration: 331314
loss: 1.005076289176941,grad_norm: 0.999999765037419, iteration: 331315
loss: 1.0033595561981201,grad_norm: 0.8539927347204981, iteration: 331316
loss: 1.0781306028366089,grad_norm: 0.9999993495081941, iteration: 331317
loss: 0.9949019551277161,grad_norm: 0.9658575068196638, iteration: 331318
loss: 1.1042075157165527,grad_norm: 0.9999992797354007, iteration: 331319
loss: 1.0126937627792358,grad_norm: 0.8295548228285053, iteration: 331320
loss: 1.0941704511642456,grad_norm: 0.9999991473527557, iteration: 331321
loss: 1.0202138423919678,grad_norm: 0.9599924191958412, iteration: 331322
loss: 0.9752205014228821,grad_norm: 0.859361130607064, iteration: 331323
loss: 1.0073052644729614,grad_norm: 0.9999996038949315, iteration: 331324
loss: 1.0388513803482056,grad_norm: 0.8955195725537365, iteration: 331325
loss: 0.9951278567314148,grad_norm: 0.8339527948208987, iteration: 331326
loss: 0.9871223568916321,grad_norm: 0.7969234790213295, iteration: 331327
loss: 0.9914825558662415,grad_norm: 0.803516939549292, iteration: 331328
loss: 0.9964602589607239,grad_norm: 0.8982566694098428, iteration: 331329
loss: 1.0056393146514893,grad_norm: 0.9999991348636316, iteration: 331330
loss: 0.9927773475646973,grad_norm: 0.8741494243710963, iteration: 331331
loss: 0.9899854063987732,grad_norm: 0.7512966317045721, iteration: 331332
loss: 1.0085697174072266,grad_norm: 0.7561901543188356, iteration: 331333
loss: 1.004938006401062,grad_norm: 0.8831178752486329, iteration: 331334
loss: 1.00128173828125,grad_norm: 0.9359094189259471, iteration: 331335
loss: 1.0459526777267456,grad_norm: 0.9999992484996971, iteration: 331336
loss: 0.9837746620178223,grad_norm: 0.9647241560541975, iteration: 331337
loss: 0.9808211922645569,grad_norm: 0.99999921908271, iteration: 331338
loss: 0.9734100699424744,grad_norm: 0.9211839806738379, iteration: 331339
loss: 0.9767540097236633,grad_norm: 0.9898728412034983, iteration: 331340
loss: 1.0285310745239258,grad_norm: 0.9999997663629671, iteration: 331341
loss: 1.0969579219818115,grad_norm: 0.9999991087826325, iteration: 331342
loss: 0.9803869128227234,grad_norm: 0.6862932133831396, iteration: 331343
loss: 0.9569706916809082,grad_norm: 0.8685127994426581, iteration: 331344
loss: 1.004781723022461,grad_norm: 0.9956436555258905, iteration: 331345
loss: 0.9926444292068481,grad_norm: 0.8548486208542869, iteration: 331346
loss: 0.9955902695655823,grad_norm: 0.7734864600649889, iteration: 331347
loss: 0.9803617596626282,grad_norm: 0.9116831966619819, iteration: 331348
loss: 1.0173860788345337,grad_norm: 0.8280303747271036, iteration: 331349
loss: 1.0192859172821045,grad_norm: 0.9226848507392685, iteration: 331350
loss: 0.9843583106994629,grad_norm: 0.9284701212897157, iteration: 331351
loss: 0.9793992042541504,grad_norm: 0.9999996879311823, iteration: 331352
loss: 1.0107264518737793,grad_norm: 0.8328331871159138, iteration: 331353
loss: 1.0067377090454102,grad_norm: 0.8763470391541813, iteration: 331354
loss: 0.9990662336349487,grad_norm: 0.906053896776564, iteration: 331355
loss: 1.013448715209961,grad_norm: 0.7668810590793199, iteration: 331356
loss: 1.000704288482666,grad_norm: 0.770186604850496, iteration: 331357
loss: 0.9638460874557495,grad_norm: 0.8568953393997637, iteration: 331358
loss: 0.966640293598175,grad_norm: 0.7431354375646321, iteration: 331359
loss: 1.021847128868103,grad_norm: 0.8209685570533707, iteration: 331360
loss: 1.0102694034576416,grad_norm: 0.846927061017904, iteration: 331361
loss: 1.0072942972183228,grad_norm: 0.9744216311104582, iteration: 331362
loss: 1.0260800123214722,grad_norm: 0.9999989589923096, iteration: 331363
loss: 0.9809085130691528,grad_norm: 0.7471982190484713, iteration: 331364
loss: 1.007197380065918,grad_norm: 0.8565846522547085, iteration: 331365
loss: 0.983498215675354,grad_norm: 0.8443932867742583, iteration: 331366
loss: 0.9949858784675598,grad_norm: 0.9704490991837702, iteration: 331367
loss: 1.0016653537750244,grad_norm: 0.7399453720837579, iteration: 331368
loss: 1.066353440284729,grad_norm: 0.8875748622555427, iteration: 331369
loss: 1.008349061012268,grad_norm: 0.8700709509658844, iteration: 331370
loss: 1.0085679292678833,grad_norm: 0.7747928697011897, iteration: 331371
loss: 0.9953767657279968,grad_norm: 0.7890196073124562, iteration: 331372
loss: 1.0530718564987183,grad_norm: 0.9999990673004778, iteration: 331373
loss: 0.9951580762863159,grad_norm: 0.708525690030513, iteration: 331374
loss: 0.9897785186767578,grad_norm: 0.8463796567066165, iteration: 331375
loss: 1.035367727279663,grad_norm: 0.9554087366746007, iteration: 331376
loss: 1.0030714273452759,grad_norm: 0.999999728251871, iteration: 331377
loss: 0.9815240502357483,grad_norm: 0.8712064124374311, iteration: 331378
loss: 1.0529584884643555,grad_norm: 0.7420173247503681, iteration: 331379
loss: 1.007240653038025,grad_norm: 0.8938502243210142, iteration: 331380
loss: 1.0370315313339233,grad_norm: 0.9999992497025472, iteration: 331381
loss: 0.9810789227485657,grad_norm: 0.9183168135102934, iteration: 331382
loss: 1.0293223857879639,grad_norm: 0.7481143804172455, iteration: 331383
loss: 1.0237267017364502,grad_norm: 0.9999991735837851, iteration: 331384
loss: 1.0075252056121826,grad_norm: 0.9090562147017606, iteration: 331385
loss: 1.0743027925491333,grad_norm: 0.9999997951786626, iteration: 331386
loss: 0.9918028116226196,grad_norm: 0.9999995762932279, iteration: 331387
loss: 1.0326570272445679,grad_norm: 0.7837755000411933, iteration: 331388
loss: 0.9998410940170288,grad_norm: 0.826586661218796, iteration: 331389
loss: 0.9960095882415771,grad_norm: 0.8988256858159527, iteration: 331390
loss: 0.9943448305130005,grad_norm: 0.90711156098185, iteration: 331391
loss: 0.9895167946815491,grad_norm: 0.9156869407042654, iteration: 331392
loss: 1.0045093297958374,grad_norm: 0.8237851857584253, iteration: 331393
loss: 1.0020719766616821,grad_norm: 0.8485412973127986, iteration: 331394
loss: 1.0224213600158691,grad_norm: 0.9385714764567573, iteration: 331395
loss: 1.0118235349655151,grad_norm: 0.9999991837893145, iteration: 331396
loss: 0.9806041717529297,grad_norm: 0.917535065426967, iteration: 331397
loss: 1.000564694404602,grad_norm: 0.8518474559457615, iteration: 331398
loss: 1.0076439380645752,grad_norm: 0.921872192908305, iteration: 331399
loss: 1.0182546377182007,grad_norm: 0.7855211384364291, iteration: 331400
loss: 0.9975420832633972,grad_norm: 0.8424684881189124, iteration: 331401
loss: 1.22329843044281,grad_norm: 0.9999992228519113, iteration: 331402
loss: 1.000245451927185,grad_norm: 0.7646323300996337, iteration: 331403
loss: 1.038758397102356,grad_norm: 0.9999994847900819, iteration: 331404
loss: 1.026196837425232,grad_norm: 0.9999991388675568, iteration: 331405
loss: 1.0115951299667358,grad_norm: 0.723158962236849, iteration: 331406
loss: 0.9973439574241638,grad_norm: 0.8342550030116076, iteration: 331407
loss: 0.994871199131012,grad_norm: 0.8272684754382856, iteration: 331408
loss: 1.0007797479629517,grad_norm: 0.8152060703882761, iteration: 331409
loss: 0.9890831112861633,grad_norm: 0.9911521180770922, iteration: 331410
loss: 1.013428807258606,grad_norm: 0.9999992937059914, iteration: 331411
loss: 0.9966513514518738,grad_norm: 0.732964516054181, iteration: 331412
loss: 1.0048818588256836,grad_norm: 0.6623441766195365, iteration: 331413
loss: 1.0244113206863403,grad_norm: 0.8632837417302089, iteration: 331414
loss: 1.0160951614379883,grad_norm: 0.7773571642244845, iteration: 331415
loss: 0.9758681654930115,grad_norm: 0.8216565687552143, iteration: 331416
loss: 0.9728134274482727,grad_norm: 0.6987481178354973, iteration: 331417
loss: 1.0075268745422363,grad_norm: 0.8224532846775595, iteration: 331418
loss: 1.0386003255844116,grad_norm: 0.9999990026114176, iteration: 331419
loss: 1.015846848487854,grad_norm: 0.8795925222960137, iteration: 331420
loss: 0.9899254441261292,grad_norm: 0.9417756713621319, iteration: 331421
loss: 1.0305849313735962,grad_norm: 0.999999177586621, iteration: 331422
loss: 1.0144661664962769,grad_norm: 0.9999997077620109, iteration: 331423
loss: 0.9873625636100769,grad_norm: 0.9336100552355443, iteration: 331424
loss: 1.0012085437774658,grad_norm: 0.7156462101891348, iteration: 331425
loss: 1.003031611442566,grad_norm: 0.713581548805067, iteration: 331426
loss: 0.9954540133476257,grad_norm: 0.9538823702475797, iteration: 331427
loss: 1.1501901149749756,grad_norm: 0.9999994125314007, iteration: 331428
loss: 0.9532865881919861,grad_norm: 0.7964568739654617, iteration: 331429
loss: 0.9759466648101807,grad_norm: 0.8552276026497659, iteration: 331430
loss: 0.9752263426780701,grad_norm: 0.757080088668411, iteration: 331431
loss: 1.0962728261947632,grad_norm: 0.9999991993586869, iteration: 331432
loss: 1.0676699876785278,grad_norm: 0.9999990743136936, iteration: 331433
loss: 1.05402672290802,grad_norm: 0.8341811046930159, iteration: 331434
loss: 1.0274577140808105,grad_norm: 0.7312677736240173, iteration: 331435
loss: 0.9956192970275879,grad_norm: 0.9001981454264225, iteration: 331436
loss: 0.9887290000915527,grad_norm: 0.6889191014790813, iteration: 331437
loss: 1.0149025917053223,grad_norm: 0.8380793042966868, iteration: 331438
loss: 0.9871507287025452,grad_norm: 0.8825057684742128, iteration: 331439
loss: 1.07231867313385,grad_norm: 0.9999991222287078, iteration: 331440
loss: 0.9938367009162903,grad_norm: 0.8140185749809491, iteration: 331441
loss: 1.0504624843597412,grad_norm: 0.9999998534402219, iteration: 331442
loss: 0.982132613658905,grad_norm: 0.999999546105467, iteration: 331443
loss: 1.1438384056091309,grad_norm: 0.999999923766456, iteration: 331444
loss: 1.0116037130355835,grad_norm: 0.8572029870603393, iteration: 331445
loss: 1.0052180290222168,grad_norm: 0.788055103285005, iteration: 331446
loss: 1.2451610565185547,grad_norm: 0.9999998788459928, iteration: 331447
loss: 1.086676001548767,grad_norm: 0.999999491359558, iteration: 331448
loss: 1.050892949104309,grad_norm: 0.7884743421624426, iteration: 331449
loss: 1.0067188739776611,grad_norm: 0.9174499567512093, iteration: 331450
loss: 1.0399013757705688,grad_norm: 0.9999991206107428, iteration: 331451
loss: 1.0132209062576294,grad_norm: 0.8194724101836292, iteration: 331452
loss: 1.0186057090759277,grad_norm: 0.8553672714192665, iteration: 331453
loss: 0.9948480725288391,grad_norm: 0.9999995167718041, iteration: 331454
loss: 0.9955582022666931,grad_norm: 0.7449974864696933, iteration: 331455
loss: 1.0064483880996704,grad_norm: 0.8796875334853856, iteration: 331456
loss: 1.0593169927597046,grad_norm: 0.9999996283443927, iteration: 331457
loss: 0.9934610724449158,grad_norm: 0.9999991091060806, iteration: 331458
loss: 1.0119819641113281,grad_norm: 0.856424796182539, iteration: 331459
loss: 0.9985507130622864,grad_norm: 0.9115212836329103, iteration: 331460
loss: 0.9892309308052063,grad_norm: 0.7346577988933263, iteration: 331461
loss: 1.0167840719223022,grad_norm: 0.9999996942494291, iteration: 331462
loss: 1.015170693397522,grad_norm: 0.8715806393132524, iteration: 331463
loss: 0.974942147731781,grad_norm: 0.7974603682044198, iteration: 331464
loss: 1.0608487129211426,grad_norm: 1.0000000043142685, iteration: 331465
loss: 1.0107332468032837,grad_norm: 0.8603277946795648, iteration: 331466
loss: 0.9989436864852905,grad_norm: 0.7991049576868912, iteration: 331467
loss: 1.0136425495147705,grad_norm: 0.9999990981486022, iteration: 331468
loss: 1.009861946105957,grad_norm: 0.9247214431067576, iteration: 331469
loss: 1.0031628608703613,grad_norm: 0.9999993959680584, iteration: 331470
loss: 1.025777816772461,grad_norm: 0.7410341947769399, iteration: 331471
loss: 1.0069727897644043,grad_norm: 0.9080886202397996, iteration: 331472
loss: 1.0063115358352661,grad_norm: 0.8554459720587959, iteration: 331473
loss: 1.089094638824463,grad_norm: 0.9999993779264857, iteration: 331474
loss: 1.0155704021453857,grad_norm: 0.9999991049621136, iteration: 331475
loss: 1.0385361909866333,grad_norm: 0.783645150842526, iteration: 331476
loss: 1.0128237009048462,grad_norm: 0.908408140783194, iteration: 331477
loss: 1.0076258182525635,grad_norm: 0.9865982275735589, iteration: 331478
loss: 1.1017190217971802,grad_norm: 0.9284378756703048, iteration: 331479
loss: 0.971903920173645,grad_norm: 0.7435474673376757, iteration: 331480
loss: 1.0279251337051392,grad_norm: 0.9999991419904963, iteration: 331481
loss: 1.0838537216186523,grad_norm: 0.9999993050630611, iteration: 331482
loss: 1.0105136632919312,grad_norm: 0.9601418829533273, iteration: 331483
loss: 1.0302088260650635,grad_norm: 0.7641744822904734, iteration: 331484
loss: 1.0278035402297974,grad_norm: 0.9202911354295814, iteration: 331485
loss: 1.0269535779953003,grad_norm: 0.9120547127908284, iteration: 331486
loss: 1.007503628730774,grad_norm: 0.7130962076289435, iteration: 331487
loss: 1.1084530353546143,grad_norm: 0.9999990237529387, iteration: 331488
loss: 1.0200021266937256,grad_norm: 0.9661672618458772, iteration: 331489
loss: 1.0309809446334839,grad_norm: 0.9999996994894574, iteration: 331490
loss: 1.0370019674301147,grad_norm: 0.8729764466976956, iteration: 331491
loss: 1.0217660665512085,grad_norm: 0.768509860699864, iteration: 331492
loss: 0.9898213148117065,grad_norm: 0.8933512416697358, iteration: 331493
loss: 0.9782801270484924,grad_norm: 0.7105900351654845, iteration: 331494
loss: 1.0054450035095215,grad_norm: 0.7359992205079854, iteration: 331495
loss: 0.9781367182731628,grad_norm: 0.9167945359343708, iteration: 331496
loss: 1.010801911354065,grad_norm: 0.7034854718096171, iteration: 331497
loss: 0.9845731854438782,grad_norm: 0.7549826281863694, iteration: 331498
loss: 1.0233558416366577,grad_norm: 0.7358626159213433, iteration: 331499
loss: 0.9539315700531006,grad_norm: 0.7318702794386945, iteration: 331500
loss: 1.0017173290252686,grad_norm: 0.9999990231154362, iteration: 331501
loss: 1.0024751424789429,grad_norm: 0.8519104451736684, iteration: 331502
loss: 0.9937798380851746,grad_norm: 0.8890086137636047, iteration: 331503
loss: 1.0085614919662476,grad_norm: 0.9062243753508523, iteration: 331504
loss: 1.021314024925232,grad_norm: 0.7488058613656198, iteration: 331505
loss: 1.059983253479004,grad_norm: 0.9999994358144375, iteration: 331506
loss: 1.0027648210525513,grad_norm: 0.9326465002980601, iteration: 331507
loss: 0.9814724922180176,grad_norm: 0.7382783708151741, iteration: 331508
loss: 1.1067649126052856,grad_norm: 0.9464278849820192, iteration: 331509
loss: 1.0476038455963135,grad_norm: 0.9544725569094699, iteration: 331510
loss: 0.9839166402816772,grad_norm: 0.8271101974607988, iteration: 331511
loss: 0.9735868573188782,grad_norm: 0.8052526867101957, iteration: 331512
loss: 1.0270026922225952,grad_norm: 0.9015677810760954, iteration: 331513
loss: 0.9900777339935303,grad_norm: 0.823421496209446, iteration: 331514
loss: 1.0474787950515747,grad_norm: 0.9999997964827808, iteration: 331515
loss: 0.9993999004364014,grad_norm: 0.6360704925684858, iteration: 331516
loss: 1.0148565769195557,grad_norm: 0.9999990796575782, iteration: 331517
loss: 1.028656005859375,grad_norm: 0.931103288220388, iteration: 331518
loss: 0.9927574396133423,grad_norm: 0.7392801135805523, iteration: 331519
loss: 0.9763602018356323,grad_norm: 0.8787794792771181, iteration: 331520
loss: 1.0152193307876587,grad_norm: 0.9286643359905388, iteration: 331521
loss: 0.9883298873901367,grad_norm: 0.9999989928846655, iteration: 331522
loss: 0.9875136613845825,grad_norm: 0.846983541753889, iteration: 331523
loss: 1.0599359273910522,grad_norm: 0.9999990546140162, iteration: 331524
loss: 1.0136109590530396,grad_norm: 0.8003972258566624, iteration: 331525
loss: 1.0651814937591553,grad_norm: 0.9999997094964399, iteration: 331526
loss: 1.071580410003662,grad_norm: 0.8459068303994155, iteration: 331527
loss: 0.9639288783073425,grad_norm: 0.9999990511566473, iteration: 331528
loss: 0.9747570157051086,grad_norm: 0.9726693571051547, iteration: 331529
loss: 0.9955520033836365,grad_norm: 0.9999993571477017, iteration: 331530
loss: 1.0097923278808594,grad_norm: 0.7806208462629947, iteration: 331531
loss: 1.0877436399459839,grad_norm: 0.9999992520822255, iteration: 331532
loss: 1.0131511688232422,grad_norm: 0.8720281157910555, iteration: 331533
loss: 1.0169788599014282,grad_norm: 0.729190532234801, iteration: 331534
loss: 1.0739895105361938,grad_norm: 0.949352182461048, iteration: 331535
loss: 0.9924383759498596,grad_norm: 0.9324895503363594, iteration: 331536
loss: 1.0124200582504272,grad_norm: 0.9999990585781229, iteration: 331537
loss: 1.048134684562683,grad_norm: 0.7366743713404547, iteration: 331538
loss: 0.9810337424278259,grad_norm: 0.7407028071235121, iteration: 331539
loss: 1.0260086059570312,grad_norm: 0.86811329168581, iteration: 331540
loss: 0.997120201587677,grad_norm: 0.999999075621017, iteration: 331541
loss: 1.0033228397369385,grad_norm: 0.812706823323451, iteration: 331542
loss: 1.0142887830734253,grad_norm: 0.9934253155943671, iteration: 331543
loss: 0.9698063731193542,grad_norm: 0.9032639197223623, iteration: 331544
loss: 1.012679100036621,grad_norm: 0.8959567181183143, iteration: 331545
loss: 1.0097250938415527,grad_norm: 0.8340057115526407, iteration: 331546
loss: 1.023238182067871,grad_norm: 0.9786242426288343, iteration: 331547
loss: 0.9729072451591492,grad_norm: 0.86599506902581, iteration: 331548
loss: 1.0029412508010864,grad_norm: 0.8292365802929766, iteration: 331549
loss: 0.9871875643730164,grad_norm: 0.7179760874558652, iteration: 331550
loss: 1.012306809425354,grad_norm: 0.8907487024284152, iteration: 331551
loss: 1.0487940311431885,grad_norm: 0.9095372429244594, iteration: 331552
loss: 0.96004718542099,grad_norm: 0.7975095287961832, iteration: 331553
loss: 0.974463939666748,grad_norm: 0.8837811346576849, iteration: 331554
loss: 0.9721658229827881,grad_norm: 0.8366501884469765, iteration: 331555
loss: 0.9970971941947937,grad_norm: 0.7516019042771755, iteration: 331556
loss: 1.0247018337249756,grad_norm: 0.9999998886233201, iteration: 331557
loss: 1.1140884160995483,grad_norm: 0.9999996434272272, iteration: 331558
loss: 0.9904718399047852,grad_norm: 0.9999990827761562, iteration: 331559
loss: 1.0032421350479126,grad_norm: 0.8680481385304281, iteration: 331560
loss: 1.073003888130188,grad_norm: 0.9999991205499613, iteration: 331561
loss: 1.1567720174789429,grad_norm: 0.9999993620849907, iteration: 331562
loss: 1.0019582509994507,grad_norm: 0.826166717460018, iteration: 331563
loss: 1.1089799404144287,grad_norm: 0.9999990127269579, iteration: 331564
loss: 1.02252995967865,grad_norm: 0.8141699290065441, iteration: 331565
loss: 1.022148847579956,grad_norm: 0.8502017371633116, iteration: 331566
loss: 1.0021477937698364,grad_norm: 0.8609726277934823, iteration: 331567
loss: 1.0339362621307373,grad_norm: 0.7621452472310365, iteration: 331568
loss: 1.2789127826690674,grad_norm: 0.9999994154727024, iteration: 331569
loss: 1.2125277519226074,grad_norm: 0.9999994408110325, iteration: 331570
loss: 1.1307727098464966,grad_norm: 0.9999995014484803, iteration: 331571
loss: 0.9850927591323853,grad_norm: 0.8762203233517254, iteration: 331572
loss: 1.0917960405349731,grad_norm: 0.9372149147117078, iteration: 331573
loss: 1.0353021621704102,grad_norm: 0.9533304182876081, iteration: 331574
loss: 1.0334779024124146,grad_norm: 0.855432399790657, iteration: 331575
loss: 0.9707049131393433,grad_norm: 0.9999991885324265, iteration: 331576
loss: 1.059761643409729,grad_norm: 0.9999996910869425, iteration: 331577
loss: 0.974602460861206,grad_norm: 0.960167385066786, iteration: 331578
loss: 0.9733355641365051,grad_norm: 0.9999991535995595, iteration: 331579
loss: 1.0227032899856567,grad_norm: 0.999999856813101, iteration: 331580
loss: 0.9935996532440186,grad_norm: 0.9916537596439441, iteration: 331581
loss: 0.9740765690803528,grad_norm: 0.7738496467971006, iteration: 331582
loss: 1.0992140769958496,grad_norm: 1.0000001045333269, iteration: 331583
loss: 1.0044163465499878,grad_norm: 0.9628685530765588, iteration: 331584
loss: 1.2124080657958984,grad_norm: 0.99999980092372, iteration: 331585
loss: 0.9923257231712341,grad_norm: 0.8395369926610843, iteration: 331586
loss: 1.020235300064087,grad_norm: 0.999999176068657, iteration: 331587
loss: 0.9827474355697632,grad_norm: 0.7835217926352964, iteration: 331588
loss: 1.0731239318847656,grad_norm: 0.857754676383923, iteration: 331589
loss: 1.0626691579818726,grad_norm: 0.8840186664096341, iteration: 331590
loss: 1.0805970430374146,grad_norm: 0.999999416569056, iteration: 331591
loss: 0.9993285536766052,grad_norm: 0.810129591874604, iteration: 331592
loss: 1.053168773651123,grad_norm: 0.9999993600925351, iteration: 331593
loss: 0.9846956133842468,grad_norm: 0.9556958358505057, iteration: 331594
loss: 1.0097323656082153,grad_norm: 0.8346019844098145, iteration: 331595
loss: 1.0168811082839966,grad_norm: 0.7772264925604891, iteration: 331596
loss: 1.005650520324707,grad_norm: 0.879622594269236, iteration: 331597
loss: 1.0120255947113037,grad_norm: 0.8489155886797716, iteration: 331598
loss: 1.053317904472351,grad_norm: 0.9999993797301415, iteration: 331599
loss: 1.025154709815979,grad_norm: 0.8033075470005429, iteration: 331600
loss: 0.97376948595047,grad_norm: 0.9551246061267489, iteration: 331601
loss: 0.9730411171913147,grad_norm: 0.7393885015786559, iteration: 331602
loss: 1.010784387588501,grad_norm: 0.8560054206015408, iteration: 331603
loss: 0.9751958250999451,grad_norm: 0.9999992268637926, iteration: 331604
loss: 1.0133612155914307,grad_norm: 0.7774751612052356, iteration: 331605
loss: 1.0182708501815796,grad_norm: 0.8025213204885495, iteration: 331606
loss: 1.009659767150879,grad_norm: 0.8340425352558335, iteration: 331607
loss: 1.0363142490386963,grad_norm: 0.8053268410484022, iteration: 331608
loss: 0.9653111100196838,grad_norm: 0.7846734402659625, iteration: 331609
loss: 0.9883421063423157,grad_norm: 0.8887890005914842, iteration: 331610
loss: 0.9974027872085571,grad_norm: 0.9999991133397405, iteration: 331611
loss: 0.9679245352745056,grad_norm: 0.9999991726272108, iteration: 331612
loss: 0.9910416007041931,grad_norm: 0.9999993964442526, iteration: 331613
loss: 0.994164764881134,grad_norm: 0.8128444531464877, iteration: 331614
loss: 1.0022846460342407,grad_norm: 0.9999990596562671, iteration: 331615
loss: 0.9717125296592712,grad_norm: 0.9999989929632529, iteration: 331616
loss: 0.9873820543289185,grad_norm: 0.7857192967201255, iteration: 331617
loss: 0.9910597205162048,grad_norm: 0.8837103407269208, iteration: 331618
loss: 1.0047187805175781,grad_norm: 0.9866754400535483, iteration: 331619
loss: 0.9997987151145935,grad_norm: 0.9148038018493507, iteration: 331620
loss: 1.0603008270263672,grad_norm: 0.961007623335893, iteration: 331621
loss: 1.0102860927581787,grad_norm: 0.8404327743760668, iteration: 331622
loss: 0.9810893535614014,grad_norm: 0.8460634359193403, iteration: 331623
loss: 1.0235246419906616,grad_norm: 0.9999991799066068, iteration: 331624
loss: 0.987373948097229,grad_norm: 0.8139743340350657, iteration: 331625
loss: 1.0245846509933472,grad_norm: 0.9394473911508231, iteration: 331626
loss: 1.01288640499115,grad_norm: 0.9346516167919525, iteration: 331627
loss: 1.0070830583572388,grad_norm: 0.844349889281801, iteration: 331628
loss: 0.9696662425994873,grad_norm: 0.8813277161169867, iteration: 331629
loss: 1.0152639150619507,grad_norm: 0.9999991986829976, iteration: 331630
loss: 0.9942217469215393,grad_norm: 0.7055253788292287, iteration: 331631
loss: 1.0159838199615479,grad_norm: 0.8521748930754459, iteration: 331632
loss: 0.9950768351554871,grad_norm: 0.9999991714991774, iteration: 331633
loss: 1.0005055665969849,grad_norm: 0.8352973659474253, iteration: 331634
loss: 0.9674543738365173,grad_norm: 0.9554891115982524, iteration: 331635
loss: 1.013046383857727,grad_norm: 0.8055332625183655, iteration: 331636
loss: 1.0336025953292847,grad_norm: 0.8493499813223233, iteration: 331637
loss: 1.0446993112564087,grad_norm: 0.9999991264554745, iteration: 331638
loss: 0.9501272439956665,grad_norm: 0.8953281360127875, iteration: 331639
loss: 1.0404235124588013,grad_norm: 0.9635472861726707, iteration: 331640
loss: 0.9436084032058716,grad_norm: 0.7999929500607904, iteration: 331641
loss: 0.9909531474113464,grad_norm: 0.9147773771988943, iteration: 331642
loss: 1.0153076648712158,grad_norm: 0.821703369130118, iteration: 331643
loss: 1.0122421979904175,grad_norm: 0.8702296373235701, iteration: 331644
loss: 1.0273240804672241,grad_norm: 0.8447268639209544, iteration: 331645
loss: 1.0048885345458984,grad_norm: 0.8607115403800025, iteration: 331646
loss: 1.0048060417175293,grad_norm: 0.7554072690509372, iteration: 331647
loss: 1.0005078315734863,grad_norm: 0.8258275075449084, iteration: 331648
loss: 0.9856662154197693,grad_norm: 0.7577367640931397, iteration: 331649
loss: 0.9898378849029541,grad_norm: 0.9999991333112271, iteration: 331650
loss: 1.0032870769500732,grad_norm: 0.9999991480076378, iteration: 331651
loss: 1.0772225856781006,grad_norm: 0.9999997827678764, iteration: 331652
loss: 0.9931551218032837,grad_norm: 0.9848277428587935, iteration: 331653
loss: 1.017876386642456,grad_norm: 0.7942171646068562, iteration: 331654
loss: 1.0201749801635742,grad_norm: 0.999999641970536, iteration: 331655
loss: 0.9830766916275024,grad_norm: 0.7526671397991183, iteration: 331656
loss: 1.0013725757598877,grad_norm: 0.8111824581637237, iteration: 331657
loss: 0.9644498229026794,grad_norm: 0.8665920440941693, iteration: 331658
loss: 0.9862411022186279,grad_norm: 0.9048607435804968, iteration: 331659
loss: 1.0136982202529907,grad_norm: 0.9999991009380153, iteration: 331660
loss: 0.9973512887954712,grad_norm: 0.7275249092781834, iteration: 331661
loss: 1.021405577659607,grad_norm: 0.736440035212821, iteration: 331662
loss: 0.9914444088935852,grad_norm: 0.6986008650201355, iteration: 331663
loss: 1.05909264087677,grad_norm: 0.9999996591406528, iteration: 331664
loss: 1.015454649925232,grad_norm: 0.8405408793995204, iteration: 331665
loss: 0.9873526692390442,grad_norm: 0.7063742468126581, iteration: 331666
loss: 1.0162220001220703,grad_norm: 0.767153683069759, iteration: 331667
loss: 0.922528862953186,grad_norm: 0.9361041863577828, iteration: 331668
loss: 1.0071741342544556,grad_norm: 0.9999992194827638, iteration: 331669
loss: 0.9794520139694214,grad_norm: 0.8393158711178516, iteration: 331670
loss: 1.0527898073196411,grad_norm: 0.9999992956040142, iteration: 331671
loss: 1.0234962701797485,grad_norm: 0.743691748201744, iteration: 331672
loss: 0.9814895391464233,grad_norm: 0.7723225657960492, iteration: 331673
loss: 1.0146056413650513,grad_norm: 0.9999998445244382, iteration: 331674
loss: 1.0381300449371338,grad_norm: 0.9790351362374201, iteration: 331675
loss: 1.0138611793518066,grad_norm: 0.7208604287088519, iteration: 331676
loss: 0.9999369978904724,grad_norm: 0.7488402212841826, iteration: 331677
loss: 1.0040918588638306,grad_norm: 0.8978806673964532, iteration: 331678
loss: 0.9708474278450012,grad_norm: 0.7100361424980781, iteration: 331679
loss: 1.0149126052856445,grad_norm: 0.739261289236374, iteration: 331680
loss: 1.0072063207626343,grad_norm: 0.8239187031631993, iteration: 331681
loss: 0.9741281867027283,grad_norm: 0.8030993331457128, iteration: 331682
loss: 1.0069364309310913,grad_norm: 0.9999989768430773, iteration: 331683
loss: 0.9745886921882629,grad_norm: 0.7276734148995697, iteration: 331684
loss: 0.9940749406814575,grad_norm: 0.9805103467774241, iteration: 331685
loss: 0.9994003772735596,grad_norm: 0.787920433327073, iteration: 331686
loss: 0.9721421003341675,grad_norm: 0.8772878058678464, iteration: 331687
loss: 1.0439318418502808,grad_norm: 0.8086719699426066, iteration: 331688
loss: 0.9767659306526184,grad_norm: 0.9539325890465197, iteration: 331689
loss: 0.9924675226211548,grad_norm: 0.9999993764051198, iteration: 331690
loss: 0.990500807762146,grad_norm: 0.8073272871207537, iteration: 331691
loss: 1.0120935440063477,grad_norm: 0.9999993100547392, iteration: 331692
loss: 1.0152889490127563,grad_norm: 0.9999990869286758, iteration: 331693
loss: 1.0193758010864258,grad_norm: 0.6871763096839094, iteration: 331694
loss: 1.0010764598846436,grad_norm: 0.8607466878750826, iteration: 331695
loss: 0.9940981268882751,grad_norm: 0.7574986932169125, iteration: 331696
loss: 0.9400933384895325,grad_norm: 0.8370240631072102, iteration: 331697
loss: 1.0147526264190674,grad_norm: 0.9274318353683657, iteration: 331698
loss: 1.0230473279953003,grad_norm: 0.8488013570821388, iteration: 331699
loss: 1.0079623460769653,grad_norm: 0.9628985371464548, iteration: 331700
loss: 1.0115529298782349,grad_norm: 0.8534307212531008, iteration: 331701
loss: 0.9307074546813965,grad_norm: 0.72666782548781, iteration: 331702
loss: 1.0182228088378906,grad_norm: 0.7825893566866513, iteration: 331703
loss: 0.9972102642059326,grad_norm: 0.8808640271022246, iteration: 331704
loss: 0.9931523203849792,grad_norm: 0.9159782116832634, iteration: 331705
loss: 0.9856145977973938,grad_norm: 0.7642945938018433, iteration: 331706
loss: 1.0905476808547974,grad_norm: 0.7686716360695408, iteration: 331707
loss: 1.0181231498718262,grad_norm: 0.9141692177944671, iteration: 331708
loss: 1.026419758796692,grad_norm: 0.8757629627253676, iteration: 331709
loss: 1.025800347328186,grad_norm: 0.7316663632035588, iteration: 331710
loss: 1.013832449913025,grad_norm: 0.792184666190352, iteration: 331711
loss: 1.0164649486541748,grad_norm: 0.8785562514800616, iteration: 331712
loss: 1.0324586629867554,grad_norm: 0.7844880174604694, iteration: 331713
loss: 1.006745457649231,grad_norm: 0.9096122828624766, iteration: 331714
loss: 0.9805992841720581,grad_norm: 0.8252794241998652, iteration: 331715
loss: 0.9852197170257568,grad_norm: 0.7848029976694059, iteration: 331716
loss: 1.0742912292480469,grad_norm: 0.7684249412542122, iteration: 331717
loss: 1.021904706954956,grad_norm: 0.6814613297435107, iteration: 331718
loss: 1.0236977338790894,grad_norm: 0.9508980186316532, iteration: 331719
loss: 0.9843814373016357,grad_norm: 0.7531003209463473, iteration: 331720
loss: 0.9973876476287842,grad_norm: 0.8338649022356643, iteration: 331721
loss: 1.012108564376831,grad_norm: 0.7343201709728269, iteration: 331722
loss: 0.9938480257987976,grad_norm: 0.7611696060462152, iteration: 331723
loss: 1.0191106796264648,grad_norm: 0.681425263812266, iteration: 331724
loss: 0.9948205351829529,grad_norm: 0.9296322071322737, iteration: 331725
loss: 1.0215038061141968,grad_norm: 0.9999996942449397, iteration: 331726
loss: 0.9968568682670593,grad_norm: 0.8215910419811142, iteration: 331727
loss: 0.950454831123352,grad_norm: 0.9550057416559284, iteration: 331728
loss: 1.011521339416504,grad_norm: 0.7374532802445773, iteration: 331729
loss: 0.9585954546928406,grad_norm: 0.8466246167660689, iteration: 331730
loss: 1.0175350904464722,grad_norm: 0.8109196587669372, iteration: 331731
loss: 1.0287772417068481,grad_norm: 0.9999992084879104, iteration: 331732
loss: 0.956885814666748,grad_norm: 0.8034081952513749, iteration: 331733
loss: 0.9872889518737793,grad_norm: 0.7539159264759271, iteration: 331734
loss: 0.9756828546524048,grad_norm: 0.8487766130772775, iteration: 331735
loss: 1.0225856304168701,grad_norm: 0.8507536882236159, iteration: 331736
loss: 1.0017220973968506,grad_norm: 0.761267021661943, iteration: 331737
loss: 1.0427542924880981,grad_norm: 0.7898841924042685, iteration: 331738
loss: 1.0180894136428833,grad_norm: 0.8230295307567721, iteration: 331739
loss: 1.0772815942764282,grad_norm: 0.9999992672792544, iteration: 331740
loss: 0.9834671020507812,grad_norm: 0.7333778080864929, iteration: 331741
loss: 1.009872317314148,grad_norm: 0.8844865024411301, iteration: 331742
loss: 0.9535962343215942,grad_norm: 0.8654238586264354, iteration: 331743
loss: 0.987639844417572,grad_norm: 0.9696564905527454, iteration: 331744
loss: 1.006351113319397,grad_norm: 0.8969226942514938, iteration: 331745
loss: 1.0965538024902344,grad_norm: 0.99999909742771, iteration: 331746
loss: 1.0207786560058594,grad_norm: 0.8912926669351173, iteration: 331747
loss: 1.0270050764083862,grad_norm: 0.7620095100713002, iteration: 331748
loss: 0.9986394643783569,grad_norm: 0.7978338723910466, iteration: 331749
loss: 1.0088156461715698,grad_norm: 0.9999990099503013, iteration: 331750
loss: 0.9885114431381226,grad_norm: 0.7185873470914875, iteration: 331751
loss: 0.9906009435653687,grad_norm: 0.9999991857124539, iteration: 331752
loss: 1.056102991104126,grad_norm: 0.9999995058311484, iteration: 331753
loss: 0.9675163626670837,grad_norm: 0.7381962342050352, iteration: 331754
loss: 0.9977496266365051,grad_norm: 0.8365977342936307, iteration: 331755
loss: 0.9717910289764404,grad_norm: 0.8397781798182636, iteration: 331756
loss: 1.0148905515670776,grad_norm: 0.8027316290806219, iteration: 331757
loss: 1.0152071714401245,grad_norm: 0.9999990783657906, iteration: 331758
loss: 0.977494478225708,grad_norm: 0.6699864409057049, iteration: 331759
loss: 1.0157712697982788,grad_norm: 0.8495813561158887, iteration: 331760
loss: 0.9979472160339355,grad_norm: 0.7758388631307981, iteration: 331761
loss: 1.0243589878082275,grad_norm: 0.7624344992129058, iteration: 331762
loss: 0.979941725730896,grad_norm: 0.9869568074766872, iteration: 331763
loss: 1.0273689031600952,grad_norm: 0.8918177885824242, iteration: 331764
loss: 1.006638526916504,grad_norm: 0.7370551394364818, iteration: 331765
loss: 1.0804420709609985,grad_norm: 0.7527669786049395, iteration: 331766
loss: 1.0025465488433838,grad_norm: 0.7462622264308946, iteration: 331767
loss: 1.0091172456741333,grad_norm: 0.9218792652853648, iteration: 331768
loss: 1.036380648612976,grad_norm: 0.9999997407667884, iteration: 331769
loss: 0.9999594688415527,grad_norm: 0.8099542621908931, iteration: 331770
loss: 1.022598385810852,grad_norm: 0.9999991219458988, iteration: 331771
loss: 1.0314056873321533,grad_norm: 0.8099004139158614, iteration: 331772
loss: 1.0095977783203125,grad_norm: 0.9999995036546631, iteration: 331773
loss: 1.0326838493347168,grad_norm: 0.7882771826681183, iteration: 331774
loss: 0.9642935991287231,grad_norm: 0.8261828469415275, iteration: 331775
loss: 1.0142477750778198,grad_norm: 0.8398035896239491, iteration: 331776
loss: 0.9765265583992004,grad_norm: 0.8993677483519181, iteration: 331777
loss: 0.9836341738700867,grad_norm: 0.9117697148428322, iteration: 331778
loss: 1.0130289793014526,grad_norm: 0.7953561845333512, iteration: 331779
loss: 0.9913632273674011,grad_norm: 0.8186398791366692, iteration: 331780
loss: 1.0217669010162354,grad_norm: 0.637039174776084, iteration: 331781
loss: 1.0225201845169067,grad_norm: 0.8449776563815053, iteration: 331782
loss: 1.008219599723816,grad_norm: 0.9999990390369156, iteration: 331783
loss: 0.9928221106529236,grad_norm: 0.8661946806116637, iteration: 331784
loss: 0.9465093612670898,grad_norm: 0.913407148295202, iteration: 331785
loss: 1.0090129375457764,grad_norm: 0.9076443377028997, iteration: 331786
loss: 0.966503381729126,grad_norm: 0.8614825192650449, iteration: 331787
loss: 1.0136533975601196,grad_norm: 0.8653558284104927, iteration: 331788
loss: 1.023756742477417,grad_norm: 0.7599192399544492, iteration: 331789
loss: 0.9961920380592346,grad_norm: 0.7628368540685695, iteration: 331790
loss: 0.9405252933502197,grad_norm: 0.8456329698855544, iteration: 331791
loss: 0.969502866268158,grad_norm: 0.7649707458086135, iteration: 331792
loss: 0.9819746613502502,grad_norm: 0.7872475583908956, iteration: 331793
loss: 0.9710510969161987,grad_norm: 0.8764648169044693, iteration: 331794
loss: 0.9993367791175842,grad_norm: 0.8140598267199046, iteration: 331795
loss: 1.0218443870544434,grad_norm: 0.895999865197176, iteration: 331796
loss: 0.971575915813446,grad_norm: 0.8359547010900158, iteration: 331797
loss: 1.0081592798233032,grad_norm: 0.7900395724125113, iteration: 331798
loss: 0.9897580742835999,grad_norm: 0.8395488994373083, iteration: 331799
loss: 0.9969839453697205,grad_norm: 0.8116569686672204, iteration: 331800
loss: 1.043939232826233,grad_norm: 0.9999991667090103, iteration: 331801
loss: 0.9739147424697876,grad_norm: 0.8598110907779632, iteration: 331802
loss: 0.9748498201370239,grad_norm: 0.943521069277166, iteration: 331803
loss: 1.0325396060943604,grad_norm: 0.8924616945517131, iteration: 331804
loss: 1.0066440105438232,grad_norm: 0.9999991482618742, iteration: 331805
loss: 0.9846884608268738,grad_norm: 0.868797221745099, iteration: 331806
loss: 0.9711163640022278,grad_norm: 0.9999989974789205, iteration: 331807
loss: 0.9820399880409241,grad_norm: 0.6954531816627819, iteration: 331808
loss: 0.981235682964325,grad_norm: 0.8256229832721659, iteration: 331809
loss: 1.0697112083435059,grad_norm: 0.9999992565520052, iteration: 331810
loss: 1.0346873998641968,grad_norm: 0.8253579677767112, iteration: 331811
loss: 1.000059962272644,grad_norm: 0.8363730586976114, iteration: 331812
loss: 0.987795889377594,grad_norm: 0.7790561024039613, iteration: 331813
loss: 0.9942378401756287,grad_norm: 0.8266754317493911, iteration: 331814
loss: 1.033562183380127,grad_norm: 0.8801789961414891, iteration: 331815
loss: 1.0200316905975342,grad_norm: 0.8067541847256932, iteration: 331816
loss: 1.0566534996032715,grad_norm: 0.9170462629182291, iteration: 331817
loss: 0.9743202924728394,grad_norm: 0.9230027435421977, iteration: 331818
loss: 1.0224851369857788,grad_norm: 0.8508527658094035, iteration: 331819
loss: 1.0029538869857788,grad_norm: 0.8858095163068056, iteration: 331820
loss: 0.9403309226036072,grad_norm: 0.9999994017899305, iteration: 331821
loss: 0.9890437722206116,grad_norm: 0.8458048922133521, iteration: 331822
loss: 1.018939733505249,grad_norm: 0.7432115060380208, iteration: 331823
loss: 1.0398491621017456,grad_norm: 0.9254535856166819, iteration: 331824
loss: 0.9872786402702332,grad_norm: 0.7882209314229525, iteration: 331825
loss: 1.0358411073684692,grad_norm: 0.7330764075513491, iteration: 331826
loss: 1.0135759115219116,grad_norm: 0.9999992798095647, iteration: 331827
loss: 0.9936075806617737,grad_norm: 0.9999990505051636, iteration: 331828
loss: 1.0010908842086792,grad_norm: 0.8213003990699866, iteration: 331829
loss: 1.0212805271148682,grad_norm: 0.8682671511522463, iteration: 331830
loss: 1.0050477981567383,grad_norm: 0.9301076705314607, iteration: 331831
loss: 1.0043610334396362,grad_norm: 0.7399234540845188, iteration: 331832
loss: 0.9934220910072327,grad_norm: 0.9999990138883447, iteration: 331833
loss: 1.0107866525650024,grad_norm: 0.8055015874568088, iteration: 331834
loss: 1.0114831924438477,grad_norm: 0.7585161807299882, iteration: 331835
loss: 0.974481463432312,grad_norm: 0.8056231361605931, iteration: 331836
loss: 1.0047719478607178,grad_norm: 0.8236051141571269, iteration: 331837
loss: 0.9700105786323547,grad_norm: 0.9375736763519229, iteration: 331838
loss: 1.0083600282669067,grad_norm: 0.9999990546789332, iteration: 331839
loss: 1.0027836561203003,grad_norm: 0.9099152261455653, iteration: 331840
loss: 0.989365816116333,grad_norm: 0.8148329130440809, iteration: 331841
loss: 0.9882295727729797,grad_norm: 0.6866936585943236, iteration: 331842
loss: 0.9623457193374634,grad_norm: 0.8398549420935949, iteration: 331843
loss: 1.001586675643921,grad_norm: 0.8531361834122453, iteration: 331844
loss: 1.0252653360366821,grad_norm: 0.8265582730047646, iteration: 331845
loss: 0.9373407363891602,grad_norm: 0.8653296399920462, iteration: 331846
loss: 1.023386836051941,grad_norm: 0.8019172105848261, iteration: 331847
loss: 0.9698629379272461,grad_norm: 0.8961417621241542, iteration: 331848
loss: 1.0150755643844604,grad_norm: 0.7995526739861443, iteration: 331849
loss: 1.0030972957611084,grad_norm: 0.933460146251912, iteration: 331850
loss: 1.010520577430725,grad_norm: 0.8454704105198818, iteration: 331851
loss: 1.006066083908081,grad_norm: 0.8304603690904045, iteration: 331852
loss: 1.0164803266525269,grad_norm: 0.8920961988211881, iteration: 331853
loss: 1.0133857727050781,grad_norm: 0.6868147792409579, iteration: 331854
loss: 1.0040912628173828,grad_norm: 0.9561700117687125, iteration: 331855
loss: 0.9717645645141602,grad_norm: 0.9999991415053717, iteration: 331856
loss: 1.0198246240615845,grad_norm: 0.8952327683435088, iteration: 331857
loss: 0.9847458600997925,grad_norm: 0.8163577709031662, iteration: 331858
loss: 1.017110824584961,grad_norm: 0.839301612727147, iteration: 331859
loss: 0.9934604167938232,grad_norm: 0.9999995417817567, iteration: 331860
loss: 1.0022515058517456,grad_norm: 0.7961959617829818, iteration: 331861
loss: 1.0103986263275146,grad_norm: 0.7978696241254288, iteration: 331862
loss: 1.0338103771209717,grad_norm: 0.8104299733090471, iteration: 331863
loss: 0.9865965843200684,grad_norm: 0.9999996547410104, iteration: 331864
loss: 0.9779872894287109,grad_norm: 0.8235384005849543, iteration: 331865
loss: 1.077079176902771,grad_norm: 0.9999994798271, iteration: 331866
loss: 1.0165425539016724,grad_norm: 0.7982279436654415, iteration: 331867
loss: 1.0238405466079712,grad_norm: 0.9357915843705418, iteration: 331868
loss: 0.9986387491226196,grad_norm: 0.8133742071835107, iteration: 331869
loss: 1.0394566059112549,grad_norm: 0.863509183238994, iteration: 331870
loss: 1.0167797803878784,grad_norm: 0.7467788234979348, iteration: 331871
loss: 1.0218393802642822,grad_norm: 0.9783943495144899, iteration: 331872
loss: 1.0168806314468384,grad_norm: 0.6870720814528076, iteration: 331873
loss: 0.987711489200592,grad_norm: 0.9999990719143674, iteration: 331874
loss: 1.0403960943222046,grad_norm: 0.7529762202968798, iteration: 331875
loss: 1.0099663734436035,grad_norm: 0.8978040962900616, iteration: 331876
loss: 0.9756965637207031,grad_norm: 0.8542490635437687, iteration: 331877
loss: 1.0032936334609985,grad_norm: 0.859782241718445, iteration: 331878
loss: 1.0671334266662598,grad_norm: 0.9081562109107012, iteration: 331879
loss: 1.0016635656356812,grad_norm: 0.7082482885048353, iteration: 331880
loss: 0.9890098571777344,grad_norm: 0.8232332504221389, iteration: 331881
loss: 0.9998758435249329,grad_norm: 0.7812442848221895, iteration: 331882
loss: 1.0106247663497925,grad_norm: 0.8242925427533376, iteration: 331883
loss: 1.0231459140777588,grad_norm: 0.9999991194086305, iteration: 331884
loss: 0.9903478026390076,grad_norm: 0.8194977962059244, iteration: 331885
loss: 0.9908336400985718,grad_norm: 0.8214688582599189, iteration: 331886
loss: 0.9745911359786987,grad_norm: 0.727998551277043, iteration: 331887
loss: 1.0003255605697632,grad_norm: 0.8646609569618334, iteration: 331888
loss: 1.0012094974517822,grad_norm: 0.9999989922954235, iteration: 331889
loss: 1.0313388109207153,grad_norm: 0.9999999743729096, iteration: 331890
loss: 0.9819096922874451,grad_norm: 0.9616404378150245, iteration: 331891
loss: 1.006714105606079,grad_norm: 0.8096448623727374, iteration: 331892
loss: 1.0152051448822021,grad_norm: 0.8164181378834517, iteration: 331893
loss: 1.0106552839279175,grad_norm: 0.7669701089517688, iteration: 331894
loss: 1.012123465538025,grad_norm: 0.702303451656807, iteration: 331895
loss: 1.0055519342422485,grad_norm: 0.9296636997007603, iteration: 331896
loss: 0.9853144288063049,grad_norm: 0.7630522192158101, iteration: 331897
loss: 1.0228281021118164,grad_norm: 0.8858511510417287, iteration: 331898
loss: 0.9981672763824463,grad_norm: 0.7876607626015705, iteration: 331899
loss: 0.9905989170074463,grad_norm: 0.9999991294655663, iteration: 331900
loss: 0.9859400987625122,grad_norm: 0.9999990961090897, iteration: 331901
loss: 0.9631048440933228,grad_norm: 0.8497782405222909, iteration: 331902
loss: 1.0140074491500854,grad_norm: 0.7392594504105594, iteration: 331903
loss: 1.0008275508880615,grad_norm: 0.89969058320759, iteration: 331904
loss: 0.9921106100082397,grad_norm: 0.8285290372762472, iteration: 331905
loss: 0.9758323431015015,grad_norm: 0.9491989224381279, iteration: 331906
loss: 1.0148297548294067,grad_norm: 0.7927369697066899, iteration: 331907
loss: 1.0228533744812012,grad_norm: 0.7400105886417884, iteration: 331908
loss: 0.9889150261878967,grad_norm: 0.7527108901021383, iteration: 331909
loss: 0.9755779504776001,grad_norm: 0.8251242375405782, iteration: 331910
loss: 0.9735618829727173,grad_norm: 0.7458995224478605, iteration: 331911
loss: 1.0112812519073486,grad_norm: 0.824564276732715, iteration: 331912
loss: 1.0042883157730103,grad_norm: 0.8677357704686017, iteration: 331913
loss: 1.0138837099075317,grad_norm: 0.7132896717685521, iteration: 331914
loss: 0.9745671153068542,grad_norm: 0.7187987097945076, iteration: 331915
loss: 1.0507525205612183,grad_norm: 0.9999990866505712, iteration: 331916
loss: 1.0207659006118774,grad_norm: 0.9999990479677258, iteration: 331917
loss: 1.0410871505737305,grad_norm: 0.8964755960661527, iteration: 331918
loss: 1.0591366291046143,grad_norm: 0.9999992352688344, iteration: 331919
loss: 1.0032269954681396,grad_norm: 0.7982308159748994, iteration: 331920
loss: 0.979938805103302,grad_norm: 0.7789825141711064, iteration: 331921
loss: 0.9994259476661682,grad_norm: 0.8026567055161771, iteration: 331922
loss: 1.0212688446044922,grad_norm: 0.8978935444195998, iteration: 331923
loss: 0.9883997440338135,grad_norm: 0.8146988598873872, iteration: 331924
loss: 0.9669844508171082,grad_norm: 0.9344329367809563, iteration: 331925
loss: 1.0722898244857788,grad_norm: 0.999999298666502, iteration: 331926
loss: 0.9758055210113525,grad_norm: 0.7619927509741604, iteration: 331927
loss: 1.0187281370162964,grad_norm: 0.9999990332754934, iteration: 331928
loss: 0.9670588970184326,grad_norm: 0.8713067947279901, iteration: 331929
loss: 0.9788193106651306,grad_norm: 0.7707596366818391, iteration: 331930
loss: 0.9975944757461548,grad_norm: 0.7910847142108572, iteration: 331931
loss: 1.0036729574203491,grad_norm: 0.9999989358912836, iteration: 331932
loss: 0.9614078998565674,grad_norm: 0.8279707231868081, iteration: 331933
loss: 0.9904055595397949,grad_norm: 0.9999994360369198, iteration: 331934
loss: 0.9973215460777283,grad_norm: 0.7417192324183115, iteration: 331935
loss: 0.9905096292495728,grad_norm: 0.9999991095584434, iteration: 331936
loss: 1.0064036846160889,grad_norm: 0.9322737105519866, iteration: 331937
loss: 1.0309171676635742,grad_norm: 0.9298451193679771, iteration: 331938
loss: 0.9642635583877563,grad_norm: 0.8033845658686403, iteration: 331939
loss: 0.9793533682823181,grad_norm: 0.7319859473338306, iteration: 331940
loss: 1.0041390657424927,grad_norm: 0.94526763631511, iteration: 331941
loss: 1.0100971460342407,grad_norm: 0.6818793351166826, iteration: 331942
loss: 1.011275053024292,grad_norm: 0.8985099615509841, iteration: 331943
loss: 1.0161430835723877,grad_norm: 0.8478177044828568, iteration: 331944
loss: 1.007586121559143,grad_norm: 0.9285700210693681, iteration: 331945
loss: 0.9920511245727539,grad_norm: 0.8352928404194828, iteration: 331946
loss: 1.0409770011901855,grad_norm: 0.8488811873533133, iteration: 331947
loss: 1.0120458602905273,grad_norm: 0.7858240825888626, iteration: 331948
loss: 1.024134635925293,grad_norm: 0.9999998004539975, iteration: 331949
loss: 0.9911506772041321,grad_norm: 0.7911478067741655, iteration: 331950
loss: 0.9873974919319153,grad_norm: 0.7327021582331538, iteration: 331951
loss: 1.1143450736999512,grad_norm: 0.9999992032632999, iteration: 331952
loss: 1.0842790603637695,grad_norm: 0.8698380765047801, iteration: 331953
loss: 1.003703236579895,grad_norm: 0.8050590550129074, iteration: 331954
loss: 1.0233590602874756,grad_norm: 0.8432087108639298, iteration: 331955
loss: 0.9767369031906128,grad_norm: 0.8690146383574043, iteration: 331956
loss: 0.998212456703186,grad_norm: 0.7067657514697262, iteration: 331957
loss: 1.0248271226882935,grad_norm: 0.9999995152445438, iteration: 331958
loss: 1.0024328231811523,grad_norm: 0.8364253858639465, iteration: 331959
loss: 1.0026624202728271,grad_norm: 0.9233807928195644, iteration: 331960
loss: 1.0339202880859375,grad_norm: 0.8234438676426395, iteration: 331961
loss: 0.9604616165161133,grad_norm: 0.7907502917493, iteration: 331962
loss: 1.004942774772644,grad_norm: 0.7667919173091019, iteration: 331963
loss: 0.9991467595100403,grad_norm: 0.6756711281525882, iteration: 331964
loss: 1.0223108530044556,grad_norm: 0.8905327832422832, iteration: 331965
loss: 1.024835467338562,grad_norm: 0.999999552911665, iteration: 331966
loss: 1.0043106079101562,grad_norm: 0.9999992578719886, iteration: 331967
loss: 1.0251919031143188,grad_norm: 0.9355181605123922, iteration: 331968
loss: 1.0335930585861206,grad_norm: 0.8554391805441653, iteration: 331969
loss: 1.0709444284439087,grad_norm: 0.9999997781393828, iteration: 331970
loss: 0.948943018913269,grad_norm: 0.8132856261244641, iteration: 331971
loss: 1.0242328643798828,grad_norm: 0.7444752593357805, iteration: 331972
loss: 0.9953309893608093,grad_norm: 0.8867965274659411, iteration: 331973
loss: 1.0296627283096313,grad_norm: 0.9999998304087004, iteration: 331974
loss: 0.9923517107963562,grad_norm: 0.9897194783508938, iteration: 331975
loss: 0.9849196672439575,grad_norm: 0.693167515672778, iteration: 331976
loss: 1.0140111446380615,grad_norm: 0.9999991471120672, iteration: 331977
loss: 0.996487021446228,grad_norm: 0.7883214943957011, iteration: 331978
loss: 1.0380198955535889,grad_norm: 0.905312500022689, iteration: 331979
loss: 0.987508237361908,grad_norm: 0.8186455064303674, iteration: 331980
loss: 1.0187034606933594,grad_norm: 0.8669115278117129, iteration: 331981
loss: 0.9962168335914612,grad_norm: 0.8850252313477818, iteration: 331982
loss: 0.9985966086387634,grad_norm: 0.9364286709229374, iteration: 331983
loss: 1.0133733749389648,grad_norm: 0.9231312434871946, iteration: 331984
loss: 1.011365294456482,grad_norm: 0.9999996107511252, iteration: 331985
loss: 1.011726975440979,grad_norm: 0.89136367705803, iteration: 331986
loss: 1.0048586130142212,grad_norm: 0.8215631474115078, iteration: 331987
loss: 1.0303030014038086,grad_norm: 0.9999992522189886, iteration: 331988
loss: 0.9894194006919861,grad_norm: 0.935014817528899, iteration: 331989
loss: 1.0136204957962036,grad_norm: 0.8444997574151004, iteration: 331990
loss: 1.0212548971176147,grad_norm: 0.9462421705632066, iteration: 331991
loss: 0.966908872127533,grad_norm: 0.9243394745876927, iteration: 331992
loss: 1.0152394771575928,grad_norm: 0.9188932137809837, iteration: 331993
loss: 1.0080358982086182,grad_norm: 0.9431548762742034, iteration: 331994
loss: 1.0160398483276367,grad_norm: 0.8584410709955964, iteration: 331995
loss: 0.9983553886413574,grad_norm: 0.758311068575956, iteration: 331996
loss: 1.0138911008834839,grad_norm: 0.8344777803986851, iteration: 331997
loss: 1.0084830522537231,grad_norm: 0.77082118453574, iteration: 331998
loss: 0.9915668368339539,grad_norm: 0.9130605807254927, iteration: 331999
loss: 0.9886214733123779,grad_norm: 0.7543124376147052, iteration: 332000
loss: 1.011705756187439,grad_norm: 0.7177140529354998, iteration: 332001
loss: 1.0083266496658325,grad_norm: 0.7448609085758906, iteration: 332002
loss: 0.9958290457725525,grad_norm: 0.829100598352661, iteration: 332003
loss: 0.9976157546043396,grad_norm: 0.840824267473169, iteration: 332004
loss: 0.9780809283256531,grad_norm: 0.7413302309700304, iteration: 332005
loss: 0.9831432700157166,grad_norm: 0.983786771077432, iteration: 332006
loss: 0.9866475462913513,grad_norm: 0.7686009958099468, iteration: 332007
loss: 1.0478956699371338,grad_norm: 0.919997393046462, iteration: 332008
loss: 0.981433093547821,grad_norm: 0.964171942139521, iteration: 332009
loss: 0.9877890944480896,grad_norm: 0.743038565723368, iteration: 332010
loss: 0.9862251877784729,grad_norm: 0.9640200605592287, iteration: 332011
loss: 1.0074048042297363,grad_norm: 0.867872620776383, iteration: 332012
loss: 1.0031383037567139,grad_norm: 0.9991098256681868, iteration: 332013
loss: 0.9990362524986267,grad_norm: 0.7466469194284389, iteration: 332014
loss: 0.9831680655479431,grad_norm: 0.8513722006070812, iteration: 332015
loss: 0.9623816013336182,grad_norm: 0.9676112854648061, iteration: 332016
loss: 1.0084326267242432,grad_norm: 0.7995277676266225, iteration: 332017
loss: 0.9794960618019104,grad_norm: 0.7797247359819532, iteration: 332018
loss: 1.0251260995864868,grad_norm: 0.8625115126151243, iteration: 332019
loss: 1.025636076927185,grad_norm: 0.7495335302147258, iteration: 332020
loss: 1.0289543867111206,grad_norm: 0.7429250301090496, iteration: 332021
loss: 1.0172532796859741,grad_norm: 0.9999992214862093, iteration: 332022
loss: 1.127912998199463,grad_norm: 0.999999164518671, iteration: 332023
loss: 1.031264066696167,grad_norm: 0.9472300769561198, iteration: 332024
loss: 1.0267980098724365,grad_norm: 0.7092105718265482, iteration: 332025
loss: 1.0223735570907593,grad_norm: 0.8248012742563269, iteration: 332026
loss: 0.9786083698272705,grad_norm: 0.9999991957247717, iteration: 332027
loss: 0.9846000075340271,grad_norm: 0.8093636687633907, iteration: 332028
loss: 1.021915078163147,grad_norm: 0.9999990854705878, iteration: 332029
loss: 0.9948881268501282,grad_norm: 0.9999998385121476, iteration: 332030
loss: 0.9898813962936401,grad_norm: 0.81952891033925, iteration: 332031
loss: 0.9541105031967163,grad_norm: 0.7639577850757978, iteration: 332032
loss: 1.0211436748504639,grad_norm: 0.8024276954769063, iteration: 332033
loss: 0.9741998314857483,grad_norm: 0.8558592449528648, iteration: 332034
loss: 0.9792441725730896,grad_norm: 0.9436890029512631, iteration: 332035
loss: 0.9850438237190247,grad_norm: 0.9999988999406291, iteration: 332036
loss: 1.007422685623169,grad_norm: 0.9999991223923788, iteration: 332037
loss: 0.999943196773529,grad_norm: 0.7597334871947207, iteration: 332038
loss: 0.9976934194564819,grad_norm: 0.8573952037137863, iteration: 332039
loss: 1.0228796005249023,grad_norm: 0.6928185809093332, iteration: 332040
loss: 0.9837275147438049,grad_norm: 0.7922030135116002, iteration: 332041
loss: 0.9901797771453857,grad_norm: 0.9005167020782848, iteration: 332042
loss: 1.030946135520935,grad_norm: 0.9211870794444883, iteration: 332043
loss: 0.9822254776954651,grad_norm: 0.8057516993021202, iteration: 332044
loss: 1.000230073928833,grad_norm: 0.8931125620347536, iteration: 332045
loss: 0.9871588945388794,grad_norm: 0.9999994265213286, iteration: 332046
loss: 1.080319881439209,grad_norm: 0.9999991021285369, iteration: 332047
loss: 1.012600302696228,grad_norm: 0.9991418886363722, iteration: 332048
loss: 1.0185139179229736,grad_norm: 0.781859509412213, iteration: 332049
loss: 1.013143539428711,grad_norm: 0.7456223576362199, iteration: 332050
loss: 1.0212808847427368,grad_norm: 0.9992071205280164, iteration: 332051
loss: 1.0021247863769531,grad_norm: 0.9068156559220553, iteration: 332052
loss: 0.9961773157119751,grad_norm: 0.7421343585569473, iteration: 332053
loss: 1.0122109651565552,grad_norm: 0.999999236439901, iteration: 332054
loss: 0.9705043435096741,grad_norm: 0.9358848461668987, iteration: 332055
loss: 1.008497953414917,grad_norm: 0.8869314093416001, iteration: 332056
loss: 1.0026357173919678,grad_norm: 0.97808248981235, iteration: 332057
loss: 1.018378496170044,grad_norm: 0.9151948771276475, iteration: 332058
loss: 1.0496208667755127,grad_norm: 0.7099053787996725, iteration: 332059
loss: 1.0146304368972778,grad_norm: 0.9999990842660804, iteration: 332060
loss: 1.005968451499939,grad_norm: 0.7912350497103391, iteration: 332061
loss: 1.0251761674880981,grad_norm: 0.999999715087763, iteration: 332062
loss: 1.0120891332626343,grad_norm: 0.9599624406686706, iteration: 332063
loss: 1.0129541158676147,grad_norm: 0.8293419904652343, iteration: 332064
loss: 0.9403544664382935,grad_norm: 0.8921918306416584, iteration: 332065
loss: 1.0015052556991577,grad_norm: 0.8042616059565217, iteration: 332066
loss: 1.0247957706451416,grad_norm: 0.7343057566235687, iteration: 332067
loss: 0.9952657222747803,grad_norm: 0.7910918993966659, iteration: 332068
loss: 0.9984526038169861,grad_norm: 0.672797043214548, iteration: 332069
loss: 1.0075587034225464,grad_norm: 0.8598756601388673, iteration: 332070
loss: 1.0132266283035278,grad_norm: 0.7887553775735515, iteration: 332071
loss: 1.0404994487762451,grad_norm: 0.983987271703043, iteration: 332072
loss: 1.0252747535705566,grad_norm: 0.8416402876083371, iteration: 332073
loss: 0.9889845848083496,grad_norm: 0.8146571157808516, iteration: 332074
loss: 1.0013236999511719,grad_norm: 0.97399986679176, iteration: 332075
loss: 1.0251048803329468,grad_norm: 0.8305597944336491, iteration: 332076
loss: 0.9884852170944214,grad_norm: 0.8351154735280361, iteration: 332077
loss: 1.008978009223938,grad_norm: 0.8554301867033391, iteration: 332078
loss: 0.9758539199829102,grad_norm: 0.8682125886094235, iteration: 332079
loss: 0.9553560018539429,grad_norm: 0.974635352468017, iteration: 332080
loss: 0.9915717244148254,grad_norm: 0.7661412290519087, iteration: 332081
loss: 1.0030872821807861,grad_norm: 0.8291400336144898, iteration: 332082
loss: 0.9815963506698608,grad_norm: 0.8546666119965641, iteration: 332083
loss: 0.9949007034301758,grad_norm: 0.8825422266113847, iteration: 332084
loss: 0.9879236817359924,grad_norm: 0.8123244845828662, iteration: 332085
loss: 1.00886070728302,grad_norm: 0.752370673449381, iteration: 332086
loss: 1.00514554977417,grad_norm: 0.8492793014217761, iteration: 332087
loss: 1.0025348663330078,grad_norm: 0.8226650936476293, iteration: 332088
loss: 0.9997175335884094,grad_norm: 0.994795077721378, iteration: 332089
loss: 1.0489784479141235,grad_norm: 0.9999992167176042, iteration: 332090
loss: 1.0061548948287964,grad_norm: 0.7652129910512204, iteration: 332091
loss: 1.0085772275924683,grad_norm: 0.8879127695885312, iteration: 332092
loss: 0.9718396067619324,grad_norm: 0.7532133349196196, iteration: 332093
loss: 1.0298912525177002,grad_norm: 0.8374827021906521, iteration: 332094
loss: 1.010464072227478,grad_norm: 0.7500048710230535, iteration: 332095
loss: 0.9865705370903015,grad_norm: 0.8250840964833993, iteration: 332096
loss: 1.0031297206878662,grad_norm: 0.8654469842814136, iteration: 332097
loss: 1.0039498805999756,grad_norm: 0.8857402463642434, iteration: 332098
loss: 1.0205137729644775,grad_norm: 0.8109189802614348, iteration: 332099
loss: 1.0173715353012085,grad_norm: 0.9999992533519376, iteration: 332100
loss: 0.9795085787773132,grad_norm: 0.9072807585050547, iteration: 332101
loss: 1.0071417093276978,grad_norm: 0.9999991618390237, iteration: 332102
loss: 0.9660917520523071,grad_norm: 0.8280233646682007, iteration: 332103
loss: 0.999358057975769,grad_norm: 0.8490919770148176, iteration: 332104
loss: 0.9832693338394165,grad_norm: 0.897929829337887, iteration: 332105
loss: 1.016266107559204,grad_norm: 0.776196327500409, iteration: 332106
loss: 0.9832190275192261,grad_norm: 0.904864293552707, iteration: 332107
loss: 0.9905763268470764,grad_norm: 0.6929611800217937, iteration: 332108
loss: 0.962944507598877,grad_norm: 0.9776971016851898, iteration: 332109
loss: 1.0030364990234375,grad_norm: 0.7462962418328881, iteration: 332110
loss: 1.004568099975586,grad_norm: 0.9999991765133713, iteration: 332111
loss: 1.0178331136703491,grad_norm: 0.607805999703663, iteration: 332112
loss: 0.9632651805877686,grad_norm: 0.7758505532563889, iteration: 332113
loss: 0.9712824821472168,grad_norm: 0.7645457861670318, iteration: 332114
loss: 1.0476906299591064,grad_norm: 0.9999990755748596, iteration: 332115
loss: 1.0342044830322266,grad_norm: 0.924053531241404, iteration: 332116
loss: 1.0387823581695557,grad_norm: 0.8588120482664251, iteration: 332117
loss: 0.9804303646087646,grad_norm: 0.8777180882400971, iteration: 332118
loss: 1.0117394924163818,grad_norm: 0.9651355795874432, iteration: 332119
loss: 1.0567069053649902,grad_norm: 0.9999999742821548, iteration: 332120
loss: 1.0101613998413086,grad_norm: 0.8244323920139817, iteration: 332121
loss: 1.0105202198028564,grad_norm: 0.999999827991886, iteration: 332122
loss: 0.982201874256134,grad_norm: 0.8548981829435418, iteration: 332123
loss: 1.0250346660614014,grad_norm: 0.9960901157837078, iteration: 332124
loss: 1.0057851076126099,grad_norm: 0.7788237461851908, iteration: 332125
loss: 1.0261931419372559,grad_norm: 0.6800913931860436, iteration: 332126
loss: 0.9709157347679138,grad_norm: 0.9338702717344908, iteration: 332127
loss: 0.9686708450317383,grad_norm: 0.8080089051526321, iteration: 332128
loss: 0.9949897527694702,grad_norm: 0.9515969252284384, iteration: 332129
loss: 1.0661044120788574,grad_norm: 0.9999992605661372, iteration: 332130
loss: 0.9759190082550049,grad_norm: 0.7534519006286927, iteration: 332131
loss: 1.0058287382125854,grad_norm: 0.8212654462737217, iteration: 332132
loss: 0.9736191034317017,grad_norm: 0.9804052933561977, iteration: 332133
loss: 0.9916263222694397,grad_norm: 0.8561872213140802, iteration: 332134
loss: 1.0186697244644165,grad_norm: 0.7546958009145329, iteration: 332135
loss: 1.0172834396362305,grad_norm: 0.7931583829169915, iteration: 332136
loss: 0.9900881052017212,grad_norm: 0.9141607286963663, iteration: 332137
loss: 1.0084298849105835,grad_norm: 0.9999997818078002, iteration: 332138
loss: 1.0261335372924805,grad_norm: 0.906529822878977, iteration: 332139
loss: 1.0128173828125,grad_norm: 0.8085480045100609, iteration: 332140
loss: 0.9957209229469299,grad_norm: 0.7215526896116727, iteration: 332141
loss: 1.0718201398849487,grad_norm: 0.9999993033044638, iteration: 332142
loss: 0.9985706210136414,grad_norm: 0.9910349710949283, iteration: 332143
loss: 1.0602949857711792,grad_norm: 0.9999999270470397, iteration: 332144
loss: 1.0126622915267944,grad_norm: 0.9999994400258118, iteration: 332145
loss: 1.0263561010360718,grad_norm: 0.7083405628974248, iteration: 332146
loss: 1.0057135820388794,grad_norm: 0.8101890093976876, iteration: 332147
loss: 1.0173530578613281,grad_norm: 0.6924737592809022, iteration: 332148
loss: 1.029386281967163,grad_norm: 0.9999995657107403, iteration: 332149
loss: 1.0224398374557495,grad_norm: 0.8104930560585597, iteration: 332150
loss: 1.0175082683563232,grad_norm: 0.7374953599120091, iteration: 332151
loss: 1.012434720993042,grad_norm: 0.8787493800688811, iteration: 332152
loss: 1.0087592601776123,grad_norm: 0.8266413542229281, iteration: 332153
loss: 0.9473097324371338,grad_norm: 0.8296490139252795, iteration: 332154
loss: 0.9824638366699219,grad_norm: 0.8206990790690168, iteration: 332155
loss: 1.0213285684585571,grad_norm: 0.8345389799266438, iteration: 332156
loss: 1.015354871749878,grad_norm: 0.8675396155727428, iteration: 332157
loss: 1.028457522392273,grad_norm: 0.9999992373984524, iteration: 332158
loss: 1.0128660202026367,grad_norm: 0.8679992314739916, iteration: 332159
loss: 1.038287878036499,grad_norm: 0.8509272214514255, iteration: 332160
loss: 0.9801701307296753,grad_norm: 0.9062810388639204, iteration: 332161
loss: 1.0130257606506348,grad_norm: 0.9999996419674336, iteration: 332162
loss: 1.1085494756698608,grad_norm: 0.999999941431091, iteration: 332163
loss: 1.0148828029632568,grad_norm: 0.9999995442243953, iteration: 332164
loss: 1.0029243230819702,grad_norm: 0.7933826462656215, iteration: 332165
loss: 0.980118453502655,grad_norm: 0.8899469996886801, iteration: 332166
loss: 1.0037355422973633,grad_norm: 0.9999994288858526, iteration: 332167
loss: 0.9491058588027954,grad_norm: 0.7489354975900946, iteration: 332168
loss: 1.0273922681808472,grad_norm: 0.9999993293379273, iteration: 332169
loss: 1.0153145790100098,grad_norm: 0.9999992288514957, iteration: 332170
loss: 1.0125620365142822,grad_norm: 0.6628479707369767, iteration: 332171
loss: 1.0234348773956299,grad_norm: 0.9337228824175449, iteration: 332172
loss: 0.986189067363739,grad_norm: 0.6432097113318295, iteration: 332173
loss: 1.0240442752838135,grad_norm: 0.9107182074360808, iteration: 332174
loss: 1.0062878131866455,grad_norm: 0.9999996555095558, iteration: 332175
loss: 1.0057533979415894,grad_norm: 0.9999991534006224, iteration: 332176
loss: 1.0236544609069824,grad_norm: 0.893047501883573, iteration: 332177
loss: 1.0043857097625732,grad_norm: 0.8202075672461767, iteration: 332178
loss: 1.0198869705200195,grad_norm: 0.7223993834162945, iteration: 332179
loss: 1.043351411819458,grad_norm: 0.9999994304202512, iteration: 332180
loss: 0.9736917018890381,grad_norm: 0.7370942959511471, iteration: 332181
loss: 1.047834038734436,grad_norm: 0.9999997302016086, iteration: 332182
loss: 0.9912132024765015,grad_norm: 0.8688052162431217, iteration: 332183
loss: 0.9933721423149109,grad_norm: 0.9833413841160792, iteration: 332184
loss: 1.0086313486099243,grad_norm: 0.8201275070478268, iteration: 332185
loss: 1.0018820762634277,grad_norm: 0.7720840488260683, iteration: 332186
loss: 0.9880337119102478,grad_norm: 0.8521100590533145, iteration: 332187
loss: 0.9706260561943054,grad_norm: 0.8958917143877428, iteration: 332188
loss: 1.0127507448196411,grad_norm: 0.7777214607208157, iteration: 332189
loss: 0.9846885204315186,grad_norm: 0.9999991109884526, iteration: 332190
loss: 1.0153056383132935,grad_norm: 0.9150842013547232, iteration: 332191
loss: 0.9977586269378662,grad_norm: 0.6830428363687985, iteration: 332192
loss: 0.9707505106925964,grad_norm: 0.7121734627093926, iteration: 332193
loss: 1.0073062181472778,grad_norm: 0.9495492806054514, iteration: 332194
loss: 1.0203452110290527,grad_norm: 0.9185261842409693, iteration: 332195
loss: 1.012218713760376,grad_norm: 0.7739492741495102, iteration: 332196
loss: 1.0210806131362915,grad_norm: 0.9829766955461585, iteration: 332197
loss: 0.9985965490341187,grad_norm: 0.9999990893402063, iteration: 332198
loss: 1.0244829654693604,grad_norm: 0.9999989827564216, iteration: 332199
loss: 0.9914275407791138,grad_norm: 0.8122105749218834, iteration: 332200
loss: 0.9518502354621887,grad_norm: 0.9072710746066232, iteration: 332201
loss: 0.983283519744873,grad_norm: 0.9999990634931242, iteration: 332202
loss: 0.9694826006889343,grad_norm: 0.8404060775830124, iteration: 332203
loss: 0.9977166056632996,grad_norm: 0.8893624679572858, iteration: 332204
loss: 1.0456889867782593,grad_norm: 0.9924609451377306, iteration: 332205
loss: 1.0901750326156616,grad_norm: 0.9999990037099361, iteration: 332206
loss: 0.9767112731933594,grad_norm: 0.8719641436603965, iteration: 332207
loss: 0.9955028295516968,grad_norm: 0.7160142272177591, iteration: 332208
loss: 1.0076738595962524,grad_norm: 0.7815978235025793, iteration: 332209
loss: 0.9796195030212402,grad_norm: 0.848950958105118, iteration: 332210
loss: 1.0304239988327026,grad_norm: 0.7986282883761325, iteration: 332211
loss: 0.9842381477355957,grad_norm: 0.9999992241343962, iteration: 332212
loss: 1.0087189674377441,grad_norm: 0.9081950485691132, iteration: 332213
loss: 1.0902659893035889,grad_norm: 0.9999999170212762, iteration: 332214
loss: 1.0394048690795898,grad_norm: 0.9999996642538884, iteration: 332215
loss: 0.9821653366088867,grad_norm: 0.8621011435135587, iteration: 332216
loss: 1.0091277360916138,grad_norm: 0.7139036902882107, iteration: 332217
loss: 0.9926537275314331,grad_norm: 0.8380492023143056, iteration: 332218
loss: 1.0301822423934937,grad_norm: 0.9999990052289376, iteration: 332219
loss: 0.9746847748756409,grad_norm: 0.8448350576740625, iteration: 332220
loss: 1.0078048706054688,grad_norm: 0.9999994395808348, iteration: 332221
loss: 1.010143518447876,grad_norm: 0.930562972393016, iteration: 332222
loss: 1.0071667432785034,grad_norm: 0.9999989855560174, iteration: 332223
loss: 1.016581416130066,grad_norm: 0.9429905305672536, iteration: 332224
loss: 1.0827410221099854,grad_norm: 0.9999998358986311, iteration: 332225
loss: 1.002020001411438,grad_norm: 0.9228063945964882, iteration: 332226
loss: 1.023777961730957,grad_norm: 0.8179428037854032, iteration: 332227
loss: 0.9848841428756714,grad_norm: 0.8333249293655255, iteration: 332228
loss: 1.0272762775421143,grad_norm: 0.9999996136331575, iteration: 332229
loss: 1.0347967147827148,grad_norm: 0.9999997621987027, iteration: 332230
loss: 1.0134150981903076,grad_norm: 0.8859920678330132, iteration: 332231
loss: 1.009244680404663,grad_norm: 0.9443150007797383, iteration: 332232
loss: 1.0004936456680298,grad_norm: 0.7139728267698616, iteration: 332233
loss: 1.0097756385803223,grad_norm: 0.9999991730274139, iteration: 332234
loss: 1.0091723203659058,grad_norm: 0.7746669194677408, iteration: 332235
loss: 1.0196808576583862,grad_norm: 0.9999997078304959, iteration: 332236
loss: 0.9421606659889221,grad_norm: 0.8978640879094809, iteration: 332237
loss: 0.992195725440979,grad_norm: 0.7733737791062122, iteration: 332238
loss: 1.0007363557815552,grad_norm: 0.8457056288035877, iteration: 332239
loss: 0.9554035663604736,grad_norm: 0.9999991226155958, iteration: 332240
loss: 0.9758803844451904,grad_norm: 0.7537033304029219, iteration: 332241
loss: 0.9852709174156189,grad_norm: 0.8642744230747369, iteration: 332242
loss: 1.0263848304748535,grad_norm: 0.8662035339789819, iteration: 332243
loss: 1.0037609338760376,grad_norm: 0.7698723288199636, iteration: 332244
loss: 1.0370049476623535,grad_norm: 0.8660711196596809, iteration: 332245
loss: 1.0277684926986694,grad_norm: 0.9999998664739836, iteration: 332246
loss: 0.9604780673980713,grad_norm: 0.9999992807950862, iteration: 332247
loss: 1.0113176107406616,grad_norm: 0.9558427826496109, iteration: 332248
loss: 1.0366487503051758,grad_norm: 0.8645237600084692, iteration: 332249
loss: 1.004059910774231,grad_norm: 0.7952726273879591, iteration: 332250
loss: 0.9543963670730591,grad_norm: 0.8800902882808175, iteration: 332251
loss: 1.0057183504104614,grad_norm: 0.9578014753097376, iteration: 332252
loss: 0.986584484577179,grad_norm: 0.9999990624420463, iteration: 332253
loss: 1.040412187576294,grad_norm: 0.7982788886825962, iteration: 332254
loss: 0.9985123872756958,grad_norm: 0.760470536069648, iteration: 332255
loss: 0.9731838703155518,grad_norm: 0.8814439295766596, iteration: 332256
loss: 0.9967154264450073,grad_norm: 0.9138744070255276, iteration: 332257
loss: 1.0975029468536377,grad_norm: 0.9999998734024464, iteration: 332258
loss: 1.014516830444336,grad_norm: 0.7577480437407901, iteration: 332259
loss: 1.0618137121200562,grad_norm: 0.9999992313951327, iteration: 332260
loss: 0.9866622090339661,grad_norm: 0.9585964634483783, iteration: 332261
loss: 0.9796976447105408,grad_norm: 0.7705807173673307, iteration: 332262
loss: 1.0406384468078613,grad_norm: 0.7094493217600921, iteration: 332263
loss: 1.0158560276031494,grad_norm: 0.9999997043709729, iteration: 332264
loss: 1.0105551481246948,grad_norm: 0.8616428662990349, iteration: 332265
loss: 1.0031380653381348,grad_norm: 0.7837307481375684, iteration: 332266
loss: 0.9895131587982178,grad_norm: 0.8751561412053899, iteration: 332267
loss: 1.0115550756454468,grad_norm: 0.8289302161440302, iteration: 332268
loss: 1.0223222970962524,grad_norm: 0.9999998443989093, iteration: 332269
loss: 0.9741081595420837,grad_norm: 0.9999990817921219, iteration: 332270
loss: 0.9915887713432312,grad_norm: 0.9999996432935788, iteration: 332271
loss: 0.9894431829452515,grad_norm: 0.8450445813194515, iteration: 332272
loss: 1.0221600532531738,grad_norm: 0.9336631137001663, iteration: 332273
loss: 0.9487376809120178,grad_norm: 0.9806561377420189, iteration: 332274
loss: 1.0295979976654053,grad_norm: 0.9320223966813136, iteration: 332275
loss: 1.0113297700881958,grad_norm: 0.9999992512347795, iteration: 332276
loss: 0.9913116693496704,grad_norm: 0.858077767634542, iteration: 332277
loss: 1.0136089324951172,grad_norm: 0.9999997299321021, iteration: 332278
loss: 0.9518742561340332,grad_norm: 0.753888982155952, iteration: 332279
loss: 0.9945423603057861,grad_norm: 0.7582124469806628, iteration: 332280
loss: 1.0051747560501099,grad_norm: 0.7282214052494554, iteration: 332281
loss: 1.0768003463745117,grad_norm: 0.9999997120565656, iteration: 332282
loss: 0.9940151572227478,grad_norm: 0.6930813412724083, iteration: 332283
loss: 1.0155596733093262,grad_norm: 0.8764384174267184, iteration: 332284
loss: 0.958017110824585,grad_norm: 0.7893965570281422, iteration: 332285
loss: 0.9814709424972534,grad_norm: 0.8136415806924197, iteration: 332286
loss: 0.9746333956718445,grad_norm: 0.7919368815453878, iteration: 332287
loss: 1.0194333791732788,grad_norm: 0.9251732023953271, iteration: 332288
loss: 0.9975997805595398,grad_norm: 0.7520919811192124, iteration: 332289
loss: 1.0065010786056519,grad_norm: 0.9421414380937232, iteration: 332290
loss: 0.9946538209915161,grad_norm: 0.8967740410580607, iteration: 332291
loss: 0.99274080991745,grad_norm: 0.8508009721262194, iteration: 332292
loss: 0.9873762726783752,grad_norm: 0.8050227055104826, iteration: 332293
loss: 1.0465068817138672,grad_norm: 0.9084427489870498, iteration: 332294
loss: 0.9821919202804565,grad_norm: 0.9999989059338112, iteration: 332295
loss: 0.9942975640296936,grad_norm: 0.9999999306517962, iteration: 332296
loss: 0.971159040927887,grad_norm: 0.9824761578239463, iteration: 332297
loss: 1.1299434900283813,grad_norm: 0.9999991569866878, iteration: 332298
loss: 1.0195168256759644,grad_norm: 0.9191394284883607, iteration: 332299
loss: 1.0062350034713745,grad_norm: 0.8681974089205275, iteration: 332300
loss: 0.9954912066459656,grad_norm: 0.6921053551036143, iteration: 332301
loss: 0.9905220866203308,grad_norm: 0.8980000600339061, iteration: 332302
loss: 1.2103371620178223,grad_norm: 0.999999123458068, iteration: 332303
loss: 1.031874656677246,grad_norm: 0.8228217989750306, iteration: 332304
loss: 1.0297209024429321,grad_norm: 0.7736652960709843, iteration: 332305
loss: 1.0643391609191895,grad_norm: 0.9650207914335354, iteration: 332306
loss: 0.9718360900878906,grad_norm: 0.6936350468441675, iteration: 332307
loss: 1.0851472616195679,grad_norm: 0.9767305247371246, iteration: 332308
loss: 1.0226819515228271,grad_norm: 0.9370494899145094, iteration: 332309
loss: 0.9595673680305481,grad_norm: 0.8396821327720466, iteration: 332310
loss: 0.9796640276908875,grad_norm: 0.8234249910126487, iteration: 332311
loss: 0.9906381964683533,grad_norm: 0.999999111005616, iteration: 332312
loss: 1.0258660316467285,grad_norm: 0.8060674113551737, iteration: 332313
loss: 1.0374813079833984,grad_norm: 0.9999992339367212, iteration: 332314
loss: 1.0279120206832886,grad_norm: 0.9948576169477733, iteration: 332315
loss: 0.9863157868385315,grad_norm: 0.9999993487472137, iteration: 332316
loss: 1.0653337240219116,grad_norm: 0.8282182882521485, iteration: 332317
loss: 1.0242047309875488,grad_norm: 0.8151447030078047, iteration: 332318
loss: 0.9680659770965576,grad_norm: 0.85991326216037, iteration: 332319
loss: 1.0173659324645996,grad_norm: 0.9259561762244197, iteration: 332320
loss: 0.998196542263031,grad_norm: 0.8554127405078218, iteration: 332321
loss: 0.9813489317893982,grad_norm: 0.9074523173412277, iteration: 332322
loss: 1.0011686086654663,grad_norm: 0.7778237615616485, iteration: 332323
loss: 1.0231117010116577,grad_norm: 0.851735611236108, iteration: 332324
loss: 1.0155538320541382,grad_norm: 0.8198717147147827, iteration: 332325
loss: 1.0387858152389526,grad_norm: 0.8346270029643117, iteration: 332326
loss: 1.0210984945297241,grad_norm: 0.6445137575018893, iteration: 332327
loss: 1.020677924156189,grad_norm: 0.827970689921063, iteration: 332328
loss: 0.953335165977478,grad_norm: 0.7999458195083732, iteration: 332329
loss: 0.9792332649230957,grad_norm: 0.8617843789470772, iteration: 332330
loss: 1.0378096103668213,grad_norm: 0.8483634312319296, iteration: 332331
loss: 1.019514799118042,grad_norm: 0.8035726813444987, iteration: 332332
loss: 1.0144481658935547,grad_norm: 0.7953469996841037, iteration: 332333
loss: 0.9978175759315491,grad_norm: 0.8873730248271505, iteration: 332334
loss: 1.0955705642700195,grad_norm: 0.9999992048434285, iteration: 332335
loss: 0.9836393594741821,grad_norm: 0.8347404409978774, iteration: 332336
loss: 1.0014575719833374,grad_norm: 0.9999990879459567, iteration: 332337
loss: 1.0046658515930176,grad_norm: 0.7797304089707049, iteration: 332338
loss: 1.0376414060592651,grad_norm: 0.880270577527282, iteration: 332339
loss: 0.9989046454429626,grad_norm: 0.9769410450477353, iteration: 332340
loss: 0.9985920786857605,grad_norm: 0.999998931383633, iteration: 332341
loss: 1.0710058212280273,grad_norm: 0.8701627153094645, iteration: 332342
loss: 0.9887140393257141,grad_norm: 0.8439785788178873, iteration: 332343
loss: 1.0055418014526367,grad_norm: 0.757517564612113, iteration: 332344
loss: 1.049376130104065,grad_norm: 0.8474617793783025, iteration: 332345
loss: 1.034589171409607,grad_norm: 0.9697818179567113, iteration: 332346
loss: 0.986663818359375,grad_norm: 0.9075114140661812, iteration: 332347
loss: 0.9721785187721252,grad_norm: 0.8777461128442796, iteration: 332348
loss: 0.9852181077003479,grad_norm: 0.8105195829538533, iteration: 332349
loss: 0.9888051152229309,grad_norm: 0.9028780756197997, iteration: 332350
loss: 0.9723679423332214,grad_norm: 0.7445739061388874, iteration: 332351
loss: 1.053302526473999,grad_norm: 0.7920592177906275, iteration: 332352
loss: 0.9702072143554688,grad_norm: 0.888330294335529, iteration: 332353
loss: 0.9876765608787537,grad_norm: 0.8864352726023352, iteration: 332354
loss: 0.9861487746238708,grad_norm: 0.9999990848867896, iteration: 332355
loss: 1.1532329320907593,grad_norm: 0.9999993849317727, iteration: 332356
loss: 0.9891777634620667,grad_norm: 0.8924226000637032, iteration: 332357
loss: 0.9731594920158386,grad_norm: 0.9144997444984977, iteration: 332358
loss: 1.064051866531372,grad_norm: 0.9999993857356008, iteration: 332359
loss: 1.1488423347473145,grad_norm: 0.9999998192753498, iteration: 332360
loss: 1.0030094385147095,grad_norm: 0.9999992203153634, iteration: 332361
loss: 1.0053662061691284,grad_norm: 0.786159657852337, iteration: 332362
loss: 1.0200183391571045,grad_norm: 0.9362699752904428, iteration: 332363
loss: 1.0122935771942139,grad_norm: 0.6853537276428696, iteration: 332364
loss: 1.0049656629562378,grad_norm: 0.7554096013998473, iteration: 332365
loss: 0.9882591366767883,grad_norm: 0.9350077005238969, iteration: 332366
loss: 1.0164715051651,grad_norm: 0.7931572367309755, iteration: 332367
loss: 1.0387662649154663,grad_norm: 0.9999999668060642, iteration: 332368
loss: 1.0305017232894897,grad_norm: 0.8447898263399679, iteration: 332369
loss: 1.0054619312286377,grad_norm: 0.6365459444167403, iteration: 332370
loss: 0.9839307069778442,grad_norm: 0.859328834942515, iteration: 332371
loss: 1.0139851570129395,grad_norm: 0.9175259364529348, iteration: 332372
loss: 1.0296614170074463,grad_norm: 0.9999997955928506, iteration: 332373
loss: 0.9900400042533875,grad_norm: 0.9999991613254566, iteration: 332374
loss: 1.0254443883895874,grad_norm: 0.7989797405929623, iteration: 332375
loss: 1.0091575384140015,grad_norm: 0.781346154881257, iteration: 332376
loss: 0.9681487083435059,grad_norm: 0.8702127647421022, iteration: 332377
loss: 1.0089997053146362,grad_norm: 0.8882646247160595, iteration: 332378
loss: 0.9928762912750244,grad_norm: 0.7658111085982997, iteration: 332379
loss: 0.9916055798530579,grad_norm: 0.8049924977828861, iteration: 332380
loss: 0.9545959234237671,grad_norm: 0.7699206628284502, iteration: 332381
loss: 0.9827507138252258,grad_norm: 0.9220548037977943, iteration: 332382
loss: 1.0165764093399048,grad_norm: 0.9999992973817169, iteration: 332383
loss: 0.9844626188278198,grad_norm: 0.9999990850399776, iteration: 332384
loss: 1.0256115198135376,grad_norm: 0.8501264499192921, iteration: 332385
loss: 0.9914336800575256,grad_norm: 0.8140897056382178, iteration: 332386
loss: 0.9619535803794861,grad_norm: 0.7336474512605615, iteration: 332387
loss: 1.0228060483932495,grad_norm: 0.651287654948649, iteration: 332388
loss: 1.0142946243286133,grad_norm: 0.9214232781145819, iteration: 332389
loss: 1.044250726699829,grad_norm: 0.8736408292044927, iteration: 332390
loss: 0.9987533092498779,grad_norm: 0.778654339414374, iteration: 332391
loss: 1.176361083984375,grad_norm: 0.9999992809874458, iteration: 332392
loss: 1.0033464431762695,grad_norm: 0.9999991721571612, iteration: 332393
loss: 0.9541006088256836,grad_norm: 0.8222600477196226, iteration: 332394
loss: 1.0527012348175049,grad_norm: 0.922205149184236, iteration: 332395
loss: 1.043794870376587,grad_norm: 0.9999995633968181, iteration: 332396
loss: 1.0346381664276123,grad_norm: 0.900306352212965, iteration: 332397
loss: 1.0648772716522217,grad_norm: 0.9999990859948158, iteration: 332398
loss: 0.9878454804420471,grad_norm: 0.7710990839551617, iteration: 332399
loss: 1.0398699045181274,grad_norm: 0.999999311358251, iteration: 332400
loss: 1.0061787366867065,grad_norm: 0.999999289186311, iteration: 332401
loss: 1.070190191268921,grad_norm: 0.9827643925684286, iteration: 332402
loss: 0.9667515158653259,grad_norm: 0.8946536287393061, iteration: 332403
loss: 1.0337709188461304,grad_norm: 0.9280818764242202, iteration: 332404
loss: 1.0382760763168335,grad_norm: 0.7730547434564, iteration: 332405
loss: 0.9978547692298889,grad_norm: 0.9676823902876369, iteration: 332406
loss: 1.0104727745056152,grad_norm: 0.7606495368532799, iteration: 332407
loss: 1.023239254951477,grad_norm: 0.9999999407231518, iteration: 332408
loss: 0.9688401222229004,grad_norm: 0.8588581268159451, iteration: 332409
loss: 0.9974112510681152,grad_norm: 0.7593471784735102, iteration: 332410
loss: 1.005142331123352,grad_norm: 0.7913091909611191, iteration: 332411
loss: 1.1499755382537842,grad_norm: 0.8717606101985776, iteration: 332412
loss: 0.9852376580238342,grad_norm: 0.9999989734941829, iteration: 332413
loss: 1.0216845273971558,grad_norm: 0.659686919970812, iteration: 332414
loss: 1.0145987272262573,grad_norm: 0.999999293717409, iteration: 332415
loss: 1.0115553140640259,grad_norm: 0.8428472517026514, iteration: 332416
loss: 0.9667261242866516,grad_norm: 0.965948554226046, iteration: 332417
loss: 1.0020084381103516,grad_norm: 0.9014487514127849, iteration: 332418
loss: 1.026607632637024,grad_norm: 0.9593945445092179, iteration: 332419
loss: 1.0039013624191284,grad_norm: 0.9999995598411109, iteration: 332420
loss: 0.9574814438819885,grad_norm: 0.7718314143148977, iteration: 332421
loss: 0.9874239563941956,grad_norm: 0.9342732919697089, iteration: 332422
loss: 0.993171751499176,grad_norm: 0.7892275939190302, iteration: 332423
loss: 1.013333797454834,grad_norm: 0.8526412563340675, iteration: 332424
loss: 1.025190830230713,grad_norm: 0.7605818671739072, iteration: 332425
loss: 0.9914193749427795,grad_norm: 0.9999989823753109, iteration: 332426
loss: 0.966653048992157,grad_norm: 0.8277382840500074, iteration: 332427
loss: 0.9884921312332153,grad_norm: 0.9999999505348041, iteration: 332428
loss: 0.9782312512397766,grad_norm: 0.7436998856951369, iteration: 332429
loss: 1.0380690097808838,grad_norm: 0.9094374260634874, iteration: 332430
loss: 0.9668341279029846,grad_norm: 0.999998937183158, iteration: 332431
loss: 1.0027649402618408,grad_norm: 0.825979953824104, iteration: 332432
loss: 1.0038032531738281,grad_norm: 0.7404738991380452, iteration: 332433
loss: 1.0226091146469116,grad_norm: 0.8724161784231468, iteration: 332434
loss: 1.0012948513031006,grad_norm: 0.9542575117281099, iteration: 332435
loss: 0.9996145963668823,grad_norm: 0.8579397999324706, iteration: 332436
loss: 1.0319743156433105,grad_norm: 0.7794575768087268, iteration: 332437
loss: 1.1062480211257935,grad_norm: 0.9999991988411808, iteration: 332438
loss: 0.9519749283790588,grad_norm: 0.8162974385811479, iteration: 332439
loss: 0.9780605435371399,grad_norm: 0.9470897131044644, iteration: 332440
loss: 1.0144869089126587,grad_norm: 0.7402220774100087, iteration: 332441
loss: 1.0433915853500366,grad_norm: 0.984142848049334, iteration: 332442
loss: 0.9616798162460327,grad_norm: 0.7343419383133414, iteration: 332443
loss: 0.9935411810874939,grad_norm: 0.7936074790226725, iteration: 332444
loss: 1.0072245597839355,grad_norm: 0.7318218288649903, iteration: 332445
loss: 0.9837825298309326,grad_norm: 0.657714473983704, iteration: 332446
loss: 1.0759360790252686,grad_norm: 0.8699212047386119, iteration: 332447
loss: 1.0074794292449951,grad_norm: 0.8612380012308016, iteration: 332448
loss: 0.989338755607605,grad_norm: 0.8393456241461965, iteration: 332449
loss: 0.9757544994354248,grad_norm: 0.8006988630813753, iteration: 332450
loss: 1.0874691009521484,grad_norm: 0.9999990273911149, iteration: 332451
loss: 1.0051097869873047,grad_norm: 0.9999992241376435, iteration: 332452
loss: 0.9653774499893188,grad_norm: 0.8087111557256402, iteration: 332453
loss: 0.9775530695915222,grad_norm: 0.7801922393415374, iteration: 332454
loss: 1.0218300819396973,grad_norm: 0.8825707622845966, iteration: 332455
loss: 0.9760676622390747,grad_norm: 0.719142724784461, iteration: 332456
loss: 0.9995148777961731,grad_norm: 0.9943848674046971, iteration: 332457
loss: 1.0103510618209839,grad_norm: 0.9728518228322665, iteration: 332458
loss: 1.006712794303894,grad_norm: 0.9789459052044529, iteration: 332459
loss: 1.00508713722229,grad_norm: 0.8823018308798358, iteration: 332460
loss: 1.008970856666565,grad_norm: 0.8579118531147877, iteration: 332461
loss: 0.9559584259986877,grad_norm: 0.8337741095075857, iteration: 332462
loss: 0.9846364259719849,grad_norm: 0.8776318610507509, iteration: 332463
loss: 0.975456178188324,grad_norm: 0.8130399693327656, iteration: 332464
loss: 1.02542245388031,grad_norm: 0.9999991724092244, iteration: 332465
loss: 1.0236762762069702,grad_norm: 0.9025207488218367, iteration: 332466
loss: 1.044241189956665,grad_norm: 0.7940928564828105, iteration: 332467
loss: 1.0063095092773438,grad_norm: 0.8275258930948306, iteration: 332468
loss: 0.9883211851119995,grad_norm: 0.9581878695719663, iteration: 332469
loss: 1.0289790630340576,grad_norm: 0.8685935022475783, iteration: 332470
loss: 1.0043941736221313,grad_norm: 0.7286880810733589, iteration: 332471
loss: 1.0354223251342773,grad_norm: 0.9999993086153469, iteration: 332472
loss: 0.9681034088134766,grad_norm: 0.7182327538806196, iteration: 332473
loss: 1.087480902671814,grad_norm: 0.9999991269877951, iteration: 332474
loss: 1.0095199346542358,grad_norm: 0.7456331768491449, iteration: 332475
loss: 0.9783833026885986,grad_norm: 0.9030041622634434, iteration: 332476
loss: 1.0046120882034302,grad_norm: 0.7406506774256603, iteration: 332477
loss: 1.0507733821868896,grad_norm: 0.9999992670194721, iteration: 332478
loss: 1.0000485181808472,grad_norm: 0.9138582540829377, iteration: 332479
loss: 1.0036840438842773,grad_norm: 0.9239171163167537, iteration: 332480
loss: 0.9787416458129883,grad_norm: 0.8794269198104525, iteration: 332481
loss: 0.9899947643280029,grad_norm: 0.863302551553732, iteration: 332482
loss: 0.9978674650192261,grad_norm: 0.9023947684781871, iteration: 332483
loss: 1.004981517791748,grad_norm: 0.9290411393273929, iteration: 332484
loss: 1.0329293012619019,grad_norm: 0.9999991348927398, iteration: 332485
loss: 0.9914321899414062,grad_norm: 0.8530693658178078, iteration: 332486
loss: 1.0427755117416382,grad_norm: 0.9642589798147689, iteration: 332487
loss: 0.9506563544273376,grad_norm: 0.8584026508658481, iteration: 332488
loss: 0.991306722164154,grad_norm: 0.7106334851191641, iteration: 332489
loss: 0.9546007513999939,grad_norm: 0.8425315209522182, iteration: 332490
loss: 0.9900943636894226,grad_norm: 0.7971951198424556, iteration: 332491
loss: 1.0030518770217896,grad_norm: 0.7643634075298315, iteration: 332492
loss: 1.0529707670211792,grad_norm: 0.9511065964201089, iteration: 332493
loss: 1.0738967657089233,grad_norm: 0.9158058670530377, iteration: 332494
loss: 1.017591953277588,grad_norm: 0.9999991251038224, iteration: 332495
loss: 0.9675515294075012,grad_norm: 0.8632758488345834, iteration: 332496
loss: 1.0072330236434937,grad_norm: 0.6351297623033267, iteration: 332497
loss: 0.978944718837738,grad_norm: 0.8948304052160883, iteration: 332498
loss: 1.0226739645004272,grad_norm: 0.8131150526892812, iteration: 332499
loss: 0.997446596622467,grad_norm: 0.9999990232949494, iteration: 332500
loss: 1.0186020135879517,grad_norm: 0.820825225390159, iteration: 332501
loss: 1.0266791582107544,grad_norm: 0.9999991521897275, iteration: 332502
loss: 0.9871387481689453,grad_norm: 0.9999999430195975, iteration: 332503
loss: 0.9984971284866333,grad_norm: 0.9999990497757034, iteration: 332504
loss: 1.0125443935394287,grad_norm: 0.8296877969840802, iteration: 332505
loss: 1.0173699855804443,grad_norm: 0.9268919761569785, iteration: 332506
loss: 0.9772413372993469,grad_norm: 0.8949126530501097, iteration: 332507
loss: 0.9865949749946594,grad_norm: 0.7569353179495582, iteration: 332508
loss: 0.9519016742706299,grad_norm: 0.8929551042102087, iteration: 332509
loss: 1.0203582048416138,grad_norm: 0.921267688474232, iteration: 332510
loss: 0.9645395874977112,grad_norm: 0.7502970131213444, iteration: 332511
loss: 0.991453230381012,grad_norm: 0.7927383232103553, iteration: 332512
loss: 1.089560866355896,grad_norm: 0.9999996204569358, iteration: 332513
loss: 1.0087404251098633,grad_norm: 0.8507209717149836, iteration: 332514
loss: 1.019234299659729,grad_norm: 0.9735265240182018, iteration: 332515
loss: 1.0574439764022827,grad_norm: 0.9705565629033651, iteration: 332516
loss: 1.0074412822723389,grad_norm: 0.7363329680872969, iteration: 332517
loss: 0.9989165663719177,grad_norm: 0.9999991079395754, iteration: 332518
loss: 0.963010847568512,grad_norm: 0.9525159483742156, iteration: 332519
loss: 1.0238008499145508,grad_norm: 0.8937871212981121, iteration: 332520
loss: 0.9565523862838745,grad_norm: 0.9330290863209036, iteration: 332521
loss: 1.0542577505111694,grad_norm: 0.9999996060510644, iteration: 332522
loss: 0.9885095953941345,grad_norm: 0.7244848528036799, iteration: 332523
loss: 1.0230988264083862,grad_norm: 0.694850803851222, iteration: 332524
loss: 0.9919498562812805,grad_norm: 0.8503742603302115, iteration: 332525
loss: 0.9918050169944763,grad_norm: 0.8494805288713458, iteration: 332526
loss: 1.018454909324646,grad_norm: 0.7409656903225711, iteration: 332527
loss: 1.0573030710220337,grad_norm: 0.8101273339150883, iteration: 332528
loss: 0.9995947480201721,grad_norm: 0.8096931906199982, iteration: 332529
loss: 0.9388784170150757,grad_norm: 0.7698956447265317, iteration: 332530
loss: 0.9780715703964233,grad_norm: 0.8557721222889242, iteration: 332531
loss: 1.0400642156600952,grad_norm: 0.9999990969075622, iteration: 332532
loss: 0.9840664267539978,grad_norm: 0.7586822237374765, iteration: 332533
loss: 1.0121111869812012,grad_norm: 0.7513147143984118, iteration: 332534
loss: 0.9875169396400452,grad_norm: 0.8464590881035655, iteration: 332535
loss: 0.976659893989563,grad_norm: 0.7872190981069238, iteration: 332536
loss: 0.996090829372406,grad_norm: 0.7752120735065641, iteration: 332537
loss: 1.0368543863296509,grad_norm: 0.999999941772007, iteration: 332538
loss: 1.0005782842636108,grad_norm: 0.8818225702731672, iteration: 332539
loss: 1.0046918392181396,grad_norm: 0.6368667980352538, iteration: 332540
loss: 0.9920468330383301,grad_norm: 0.8475135984412712, iteration: 332541
loss: 1.0241166353225708,grad_norm: 0.9999996577727627, iteration: 332542
loss: 1.0474518537521362,grad_norm: 0.9171092427529829, iteration: 332543
loss: 1.0732697248458862,grad_norm: 0.8058891270289102, iteration: 332544
loss: 0.9805606007575989,grad_norm: 0.8702227675289286, iteration: 332545
loss: 1.0168951749801636,grad_norm: 0.8407080068517891, iteration: 332546
loss: 1.016359567642212,grad_norm: 0.7501400437172133, iteration: 332547
loss: 1.0133469104766846,grad_norm: 0.8818972453426621, iteration: 332548
loss: 0.9572291970252991,grad_norm: 0.6837895073581066, iteration: 332549
loss: 1.0152618885040283,grad_norm: 0.8122995186611738, iteration: 332550
loss: 1.0071252584457397,grad_norm: 0.8604656398357778, iteration: 332551
loss: 0.9862056970596313,grad_norm: 0.8387477174502774, iteration: 332552
loss: 0.9866247177124023,grad_norm: 0.9504672328302455, iteration: 332553
loss: 0.9732298851013184,grad_norm: 0.9083249065216262, iteration: 332554
loss: 0.9924129247665405,grad_norm: 0.7386791835022493, iteration: 332555
loss: 0.9988965392112732,grad_norm: 0.8279267715242331, iteration: 332556
loss: 0.9985222220420837,grad_norm: 0.8194537196190795, iteration: 332557
loss: 0.983898937702179,grad_norm: 0.8886910995533392, iteration: 332558
loss: 1.0032556056976318,grad_norm: 0.843101351789498, iteration: 332559
loss: 0.9964969158172607,grad_norm: 0.9259439579251034, iteration: 332560
loss: 1.018897294998169,grad_norm: 0.7264992962161647, iteration: 332561
loss: 1.005191683769226,grad_norm: 0.7643416072789364, iteration: 332562
loss: 0.9676269292831421,grad_norm: 0.857218554578669, iteration: 332563
loss: 1.035622000694275,grad_norm: 0.7749672653929881, iteration: 332564
loss: 0.9869656562805176,grad_norm: 0.925070463013877, iteration: 332565
loss: 0.9767894148826599,grad_norm: 0.9839871679469481, iteration: 332566
loss: 0.9974097609519958,grad_norm: 0.783564523112569, iteration: 332567
loss: 0.9967095851898193,grad_norm: 0.8149808670155296, iteration: 332568
loss: 1.0000191926956177,grad_norm: 0.8086781053770574, iteration: 332569
loss: 1.0042707920074463,grad_norm: 0.8570472544492337, iteration: 332570
loss: 0.9885352849960327,grad_norm: 0.8220960623136958, iteration: 332571
loss: 1.0256187915802002,grad_norm: 0.9999998887159538, iteration: 332572
loss: 0.9922279715538025,grad_norm: 0.8882148999530831, iteration: 332573
loss: 1.0386935472488403,grad_norm: 0.9999998127848944, iteration: 332574
loss: 1.0058234930038452,grad_norm: 0.7986175545712532, iteration: 332575
loss: 1.0255175828933716,grad_norm: 0.8114868304950511, iteration: 332576
loss: 0.9533794522285461,grad_norm: 0.7716016169193292, iteration: 332577
loss: 1.0192575454711914,grad_norm: 0.8585367195949564, iteration: 332578
loss: 0.9766656756401062,grad_norm: 0.8190431913725931, iteration: 332579
loss: 1.0381674766540527,grad_norm: 0.9999990118860662, iteration: 332580
loss: 0.966275155544281,grad_norm: 0.8392280516477039, iteration: 332581
loss: 1.1270561218261719,grad_norm: 1.000000017957557, iteration: 332582
loss: 1.012527346611023,grad_norm: 0.9658110752545935, iteration: 332583
loss: 0.9769344925880432,grad_norm: 0.7635037810692085, iteration: 332584
loss: 1.0204076766967773,grad_norm: 0.9796231940164286, iteration: 332585
loss: 0.9983407258987427,grad_norm: 0.9999992064409277, iteration: 332586
loss: 1.006761908531189,grad_norm: 0.8202509900899507, iteration: 332587
loss: 1.0102144479751587,grad_norm: 0.9112208042300867, iteration: 332588
loss: 1.0483968257904053,grad_norm: 0.9534869005306159, iteration: 332589
loss: 1.0159697532653809,grad_norm: 0.9999994618141681, iteration: 332590
loss: 0.9939785003662109,grad_norm: 0.6981227095652507, iteration: 332591
loss: 1.0075758695602417,grad_norm: 0.9487412580383161, iteration: 332592
loss: 0.9897421002388,grad_norm: 0.8528423270456558, iteration: 332593
loss: 1.0233882665634155,grad_norm: 0.8105904755528094, iteration: 332594
loss: 1.0177663564682007,grad_norm: 0.8425408104715714, iteration: 332595
loss: 0.9970940351486206,grad_norm: 0.8131364499286075, iteration: 332596
loss: 1.0317806005477905,grad_norm: 0.840889004264161, iteration: 332597
loss: 0.9919971823692322,grad_norm: 0.8328380655576049, iteration: 332598
loss: 0.9973748326301575,grad_norm: 0.7960081211985925, iteration: 332599
loss: 0.953201949596405,grad_norm: 0.8881314256613103, iteration: 332600
loss: 0.9726291298866272,grad_norm: 0.9304348174374374, iteration: 332601
loss: 0.9872156381607056,grad_norm: 0.7196485095970641, iteration: 332602
loss: 1.0561712980270386,grad_norm: 0.7637492930815549, iteration: 332603
loss: 0.9568997025489807,grad_norm: 0.7810552350675869, iteration: 332604
loss: 1.0143979787826538,grad_norm: 0.9969633791791113, iteration: 332605
loss: 0.947490930557251,grad_norm: 0.9999996154815787, iteration: 332606
loss: 1.0387171506881714,grad_norm: 0.8699334429965384, iteration: 332607
loss: 1.0103546380996704,grad_norm: 0.7498135527705576, iteration: 332608
loss: 1.012850046157837,grad_norm: 0.8993395544408208, iteration: 332609
loss: 0.9710692763328552,grad_norm: 0.7493745325933188, iteration: 332610
loss: 0.9593713879585266,grad_norm: 0.8254478002009457, iteration: 332611
loss: 1.0132845640182495,grad_norm: 0.6935984701777554, iteration: 332612
loss: 1.0177754163742065,grad_norm: 0.8285885785042878, iteration: 332613
loss: 1.008832335472107,grad_norm: 0.767415701228328, iteration: 332614
loss: 1.0771287679672241,grad_norm: 0.9999999447207426, iteration: 332615
loss: 0.9771538376808167,grad_norm: 0.7873057117568232, iteration: 332616
loss: 0.9950956702232361,grad_norm: 0.7781525177990057, iteration: 332617
loss: 1.0097788572311401,grad_norm: 0.9374378123351529, iteration: 332618
loss: 1.0002261400222778,grad_norm: 0.9999998096401657, iteration: 332619
loss: 1.0115959644317627,grad_norm: 0.7387538548539213, iteration: 332620
loss: 1.0174200534820557,grad_norm: 0.8317211861750695, iteration: 332621
loss: 0.9872708916664124,grad_norm: 0.9959041459828692, iteration: 332622
loss: 0.9747638702392578,grad_norm: 0.9999991943072418, iteration: 332623
loss: 0.9964134693145752,grad_norm: 0.8486274817653982, iteration: 332624
loss: 0.963189959526062,grad_norm: 0.860537590792154, iteration: 332625
loss: 1.0079050064086914,grad_norm: 0.835119525496621, iteration: 332626
loss: 1.0047873258590698,grad_norm: 0.7572304817454945, iteration: 332627
loss: 1.0279597043991089,grad_norm: 0.9999990967911564, iteration: 332628
loss: 0.9809394478797913,grad_norm: 0.8278601049051537, iteration: 332629
loss: 0.9878307580947876,grad_norm: 0.902086330929406, iteration: 332630
loss: 0.9965909123420715,grad_norm: 0.7783154565926352, iteration: 332631
loss: 0.9921010732650757,grad_norm: 0.9999990262062248, iteration: 332632
loss: 1.0255578756332397,grad_norm: 0.9999990265035514, iteration: 332633
loss: 0.9975087642669678,grad_norm: 0.8705409747938649, iteration: 332634
loss: 1.0087082386016846,grad_norm: 0.9392108857161341, iteration: 332635
loss: 1.0125497579574585,grad_norm: 0.9999990991114056, iteration: 332636
loss: 1.0202994346618652,grad_norm: 0.9419946997468438, iteration: 332637
loss: 0.9876086115837097,grad_norm: 0.8902194257436512, iteration: 332638
loss: 1.0561237335205078,grad_norm: 0.9393143176575067, iteration: 332639
loss: 1.0165183544158936,grad_norm: 0.9999991046560555, iteration: 332640
loss: 0.9830111265182495,grad_norm: 0.8940266994409106, iteration: 332641
loss: 1.0072153806686401,grad_norm: 0.7679145878325719, iteration: 332642
loss: 0.9824975728988647,grad_norm: 0.893046114447537, iteration: 332643
loss: 0.9691563248634338,grad_norm: 0.8078330277497138, iteration: 332644
loss: 0.9708466529846191,grad_norm: 0.9011076861644487, iteration: 332645
loss: 1.011810302734375,grad_norm: 0.7385240355177971, iteration: 332646
loss: 0.9929994940757751,grad_norm: 0.768124705552764, iteration: 332647
loss: 0.9904268980026245,grad_norm: 0.9999991471036276, iteration: 332648
loss: 0.9906646609306335,grad_norm: 0.9472084137959795, iteration: 332649
loss: 0.9987289905548096,grad_norm: 0.7749961339922768, iteration: 332650
loss: 0.9634755253791809,grad_norm: 0.7899345022112687, iteration: 332651
loss: 1.007158875465393,grad_norm: 0.9671551958269637, iteration: 332652
loss: 0.9931290745735168,grad_norm: 0.7758971134458711, iteration: 332653
loss: 0.9800455570220947,grad_norm: 0.6883577858950763, iteration: 332654
loss: 1.006723165512085,grad_norm: 0.5531618876397164, iteration: 332655
loss: 0.9708755612373352,grad_norm: 0.9999991778004592, iteration: 332656
loss: 0.988452672958374,grad_norm: 0.9103884966642043, iteration: 332657
loss: 1.0162678956985474,grad_norm: 0.7577601423209015, iteration: 332658
loss: 0.9810718894004822,grad_norm: 0.7594907067790693, iteration: 332659
loss: 1.0243055820465088,grad_norm: 0.7803094390633194, iteration: 332660
loss: 0.986385703086853,grad_norm: 0.999999079869675, iteration: 332661
loss: 1.0403594970703125,grad_norm: 0.8021016080238786, iteration: 332662
loss: 0.9864574074745178,grad_norm: 0.7176477488536929, iteration: 332663
loss: 1.0349056720733643,grad_norm: 0.8227831187126339, iteration: 332664
loss: 1.0079631805419922,grad_norm: 0.849349361384013, iteration: 332665
loss: 0.9858113527297974,grad_norm: 0.9792087710093109, iteration: 332666
loss: 1.011966586112976,grad_norm: 0.9999999104196239, iteration: 332667
loss: 1.0122339725494385,grad_norm: 0.7909931214989597, iteration: 332668
loss: 0.9947369694709778,grad_norm: 0.7551717531562676, iteration: 332669
loss: 0.9985169768333435,grad_norm: 0.8269744384195622, iteration: 332670
loss: 1.0035731792449951,grad_norm: 0.8132353461081483, iteration: 332671
loss: 0.9640697836875916,grad_norm: 0.9999996134081566, iteration: 332672
loss: 1.0628103017807007,grad_norm: 0.9999998476581782, iteration: 332673
loss: 0.9989693760871887,grad_norm: 0.999999423851464, iteration: 332674
loss: 1.0121363401412964,grad_norm: 0.7735074945902163, iteration: 332675
loss: 1.1847467422485352,grad_norm: 0.9999997468938452, iteration: 332676
loss: 0.997417688369751,grad_norm: 0.8355893749967387, iteration: 332677
loss: 0.9733654260635376,grad_norm: 0.792096784434093, iteration: 332678
loss: 0.9684363007545471,grad_norm: 0.777545196471287, iteration: 332679
loss: 1.0291492938995361,grad_norm: 0.8109222702090887, iteration: 332680
loss: 0.9913487434387207,grad_norm: 0.7173860552263479, iteration: 332681
loss: 1.0332564115524292,grad_norm: 0.9999999500256375, iteration: 332682
loss: 0.9636175036430359,grad_norm: 0.7602849381570634, iteration: 332683
loss: 0.9967954754829407,grad_norm: 0.8015305897043037, iteration: 332684
loss: 0.9761248826980591,grad_norm: 0.8358801256826677, iteration: 332685
loss: 1.0119751691818237,grad_norm: 0.9474229004390029, iteration: 332686
loss: 1.1258647441864014,grad_norm: 0.999999306238001, iteration: 332687
loss: 0.96429842710495,grad_norm: 0.9999998865717651, iteration: 332688
loss: 0.952041745185852,grad_norm: 0.9847850119839745, iteration: 332689
loss: 1.0054453611373901,grad_norm: 0.9999991492962692, iteration: 332690
loss: 1.1616778373718262,grad_norm: 0.9999991320299325, iteration: 332691
loss: 0.9873954653739929,grad_norm: 0.8507296989792589, iteration: 332692
loss: 1.022420048713684,grad_norm: 0.8529602034351277, iteration: 332693
loss: 1.033361554145813,grad_norm: 0.9320785296757593, iteration: 332694
loss: 0.9754654765129089,grad_norm: 0.7545914963195492, iteration: 332695
loss: 0.9881957769393921,grad_norm: 0.8652187569258676, iteration: 332696
loss: 1.0492186546325684,grad_norm: 0.9999998148639001, iteration: 332697
loss: 0.9968112111091614,grad_norm: 0.9325132967842036, iteration: 332698
loss: 0.9758607149124146,grad_norm: 0.8308098760187241, iteration: 332699
loss: 1.0117360353469849,grad_norm: 0.8526145925833486, iteration: 332700
loss: 1.0710035562515259,grad_norm: 0.7519920879641514, iteration: 332701
loss: 1.0705511569976807,grad_norm: 0.9999997437311685, iteration: 332702
loss: 0.9817390441894531,grad_norm: 0.9261159878656144, iteration: 332703
loss: 1.033705472946167,grad_norm: 0.788522133811564, iteration: 332704
loss: 1.007664442062378,grad_norm: 0.999998997265036, iteration: 332705
loss: 1.0185344219207764,grad_norm: 0.8431221048010135, iteration: 332706
loss: 0.9946674704551697,grad_norm: 0.9364076046510948, iteration: 332707
loss: 0.9746062755584717,grad_norm: 0.8564937477049118, iteration: 332708
loss: 1.0434767007827759,grad_norm: 0.9999991355932185, iteration: 332709
loss: 1.021374225616455,grad_norm: 0.8397621633787283, iteration: 332710
loss: 1.031101942062378,grad_norm: 0.9522389741174512, iteration: 332711
loss: 0.9564767479896545,grad_norm: 0.7694210058003912, iteration: 332712
loss: 1.032171607017517,grad_norm: 0.9999991068495543, iteration: 332713
loss: 1.0576871633529663,grad_norm: 1.0000000048167872, iteration: 332714
loss: 0.9426486492156982,grad_norm: 0.7628322312500426, iteration: 332715
loss: 1.0221601724624634,grad_norm: 0.9999990747038778, iteration: 332716
loss: 1.0311588048934937,grad_norm: 0.8457970490060882, iteration: 332717
loss: 1.0054117441177368,grad_norm: 0.9106229781958965, iteration: 332718
loss: 1.0066028833389282,grad_norm: 0.9240887659659403, iteration: 332719
loss: 0.9820384383201599,grad_norm: 0.8901510908249335, iteration: 332720
loss: 1.022284746170044,grad_norm: 0.9094624498745539, iteration: 332721
loss: 1.0668696165084839,grad_norm: 0.9999991519723336, iteration: 332722
loss: 1.0258994102478027,grad_norm: 0.9999995886755265, iteration: 332723
loss: 1.0420445203781128,grad_norm: 0.9009860853663796, iteration: 332724
loss: 1.0132659673690796,grad_norm: 0.731344558669618, iteration: 332725
loss: 1.0032144784927368,grad_norm: 0.9999990982227639, iteration: 332726
loss: 1.0169693231582642,grad_norm: 0.9125544136130679, iteration: 332727
loss: 1.0162696838378906,grad_norm: 0.7709992162945375, iteration: 332728
loss: 0.9951308965682983,grad_norm: 0.7774943091363501, iteration: 332729
loss: 1.071431040763855,grad_norm: 0.9999994874297503, iteration: 332730
loss: 1.0081218481063843,grad_norm: 0.7295183849523913, iteration: 332731
loss: 1.005922555923462,grad_norm: 0.8403325929092743, iteration: 332732
loss: 0.9890821576118469,grad_norm: 0.7133143602598278, iteration: 332733
loss: 1.0085422992706299,grad_norm: 0.9999997718510296, iteration: 332734
loss: 0.9735935926437378,grad_norm: 0.9169815616360814, iteration: 332735
loss: 1.071225881576538,grad_norm: 0.999998997483852, iteration: 332736
loss: 1.009831190109253,grad_norm: 0.891204996806419, iteration: 332737
loss: 1.032554268836975,grad_norm: 0.9999996654655126, iteration: 332738
loss: 1.0062283277511597,grad_norm: 0.9999990407675143, iteration: 332739
loss: 1.0651298761367798,grad_norm: 0.9547475662204972, iteration: 332740
loss: 0.9914666414260864,grad_norm: 0.9360155848291584, iteration: 332741
loss: 1.0287501811981201,grad_norm: 0.8514908655269703, iteration: 332742
loss: 0.9968510866165161,grad_norm: 0.8529997045829746, iteration: 332743
loss: 1.020511269569397,grad_norm: 0.819160144902444, iteration: 332744
loss: 1.0069776773452759,grad_norm: 0.7916141158756242, iteration: 332745
loss: 0.993798017501831,grad_norm: 0.7406710194072869, iteration: 332746
loss: 1.0382955074310303,grad_norm: 0.9039085408794807, iteration: 332747
loss: 1.0118250846862793,grad_norm: 0.8208673926120725, iteration: 332748
loss: 0.9944363832473755,grad_norm: 0.9999992231700299, iteration: 332749
loss: 1.0102769136428833,grad_norm: 0.7501927084333201, iteration: 332750
loss: 1.0043752193450928,grad_norm: 0.9576282175510158, iteration: 332751
loss: 1.0154814720153809,grad_norm: 0.9866193572512729, iteration: 332752
loss: 1.0058715343475342,grad_norm: 0.7260115986617593, iteration: 332753
loss: 1.0086767673492432,grad_norm: 0.7892761103459301, iteration: 332754
loss: 1.004913330078125,grad_norm: 0.9999993989390158, iteration: 332755
loss: 1.0032955408096313,grad_norm: 0.8537832986149287, iteration: 332756
loss: 0.9839884638786316,grad_norm: 0.7639497212787962, iteration: 332757
loss: 1.0225263833999634,grad_norm: 0.8994371194867617, iteration: 332758
loss: 1.006374478340149,grad_norm: 0.9722741039457636, iteration: 332759
loss: 1.0185468196868896,grad_norm: 0.8414350934457713, iteration: 332760
loss: 0.989780068397522,grad_norm: 0.8401101535548313, iteration: 332761
loss: 0.9996417164802551,grad_norm: 0.9999990858329929, iteration: 332762
loss: 0.975078284740448,grad_norm: 0.7599199162727582, iteration: 332763
loss: 0.9736231565475464,grad_norm: 0.814827188547874, iteration: 332764
loss: 1.0538780689239502,grad_norm: 0.9999997062218943, iteration: 332765
loss: 1.0241378545761108,grad_norm: 0.9905568166758352, iteration: 332766
loss: 1.0241940021514893,grad_norm: 0.8708201643352648, iteration: 332767
loss: 0.9829860329627991,grad_norm: 0.6606840754353737, iteration: 332768
loss: 1.1406512260437012,grad_norm: 0.9999993400595828, iteration: 332769
loss: 1.0027869939804077,grad_norm: 0.8811929931189886, iteration: 332770
loss: 0.988502025604248,grad_norm: 0.9538841151308359, iteration: 332771
loss: 0.9654393792152405,grad_norm: 0.9999990810453308, iteration: 332772
loss: 1.0154160261154175,grad_norm: 0.8245759328706961, iteration: 332773
loss: 0.9838035106658936,grad_norm: 0.7271916121413935, iteration: 332774
loss: 1.0314584970474243,grad_norm: 0.9249851091400862, iteration: 332775
loss: 0.9900283813476562,grad_norm: 0.8229783892917105, iteration: 332776
loss: 1.0065057277679443,grad_norm: 0.8357577947541859, iteration: 332777
loss: 1.025169014930725,grad_norm: 0.9999995920804272, iteration: 332778
loss: 1.0039459466934204,grad_norm: 0.9026986748752137, iteration: 332779
loss: 1.0635021924972534,grad_norm: 0.8591318372910214, iteration: 332780
loss: 1.0213303565979004,grad_norm: 0.9999991249914875, iteration: 332781
loss: 0.9950712323188782,grad_norm: 0.9999991272852378, iteration: 332782
loss: 0.9614641666412354,grad_norm: 0.8822789437124589, iteration: 332783
loss: 1.0154774188995361,grad_norm: 0.918331762307487, iteration: 332784
loss: 0.9932768940925598,grad_norm: 0.8944257572188478, iteration: 332785
loss: 1.024429202079773,grad_norm: 0.9881770404349425, iteration: 332786
loss: 1.035043478012085,grad_norm: 0.7410012995709375, iteration: 332787
loss: 0.9991114139556885,grad_norm: 0.9999991070298866, iteration: 332788
loss: 1.0288337469100952,grad_norm: 0.8105307262774055, iteration: 332789
loss: 1.0804070234298706,grad_norm: 0.9999998155915767, iteration: 332790
loss: 0.9962210655212402,grad_norm: 0.9497305540530862, iteration: 332791
loss: 0.9927707314491272,grad_norm: 0.8393817139453815, iteration: 332792
loss: 0.9688816666603088,grad_norm: 0.999999013073012, iteration: 332793
loss: 0.987579345703125,grad_norm: 0.7845554184133033, iteration: 332794
loss: 1.0185948610305786,grad_norm: 0.9518892656262758, iteration: 332795
loss: 1.0918563604354858,grad_norm: 0.9999994922411889, iteration: 332796
loss: 1.000653624534607,grad_norm: 0.7709703345396414, iteration: 332797
loss: 1.011098861694336,grad_norm: 0.723035575030222, iteration: 332798
loss: 1.0321458578109741,grad_norm: 0.7529974734534016, iteration: 332799
loss: 0.9540433883666992,grad_norm: 0.7343868823519565, iteration: 332800
loss: 0.959252119064331,grad_norm: 0.9999990168765718, iteration: 332801
loss: 1.0286071300506592,grad_norm: 0.9225399117429378, iteration: 332802
loss: 1.0623424053192139,grad_norm: 0.8699764706337461, iteration: 332803
loss: 1.1077831983566284,grad_norm: 0.9999991701000831, iteration: 332804
loss: 1.1050043106079102,grad_norm: 0.9999993910133155, iteration: 332805
loss: 0.9536465406417847,grad_norm: 0.9056878515041069, iteration: 332806
loss: 1.0095040798187256,grad_norm: 0.983050568244745, iteration: 332807
loss: 0.9888137578964233,grad_norm: 0.8211466525030351, iteration: 332808
loss: 1.0030215978622437,grad_norm: 0.6979157614544418, iteration: 332809
loss: 1.0363659858703613,grad_norm: 0.9997655574625313, iteration: 332810
loss: 1.0087639093399048,grad_norm: 0.8842463717516721, iteration: 332811
loss: 0.9714111089706421,grad_norm: 0.8441656609651681, iteration: 332812
loss: 0.9825998544692993,grad_norm: 0.7697212459076517, iteration: 332813
loss: 1.0768128633499146,grad_norm: 0.9999990101118253, iteration: 332814
loss: 1.0384882688522339,grad_norm: 0.9999999514785206, iteration: 332815
loss: 1.040269374847412,grad_norm: 0.999999176820263, iteration: 332816
loss: 1.0795468091964722,grad_norm: 0.9999992190459578, iteration: 332817
loss: 1.1018027067184448,grad_norm: 0.9999999409019584, iteration: 332818
loss: 0.9955653548240662,grad_norm: 0.9999994400141351, iteration: 332819
loss: 1.007094383239746,grad_norm: 0.9999991771390138, iteration: 332820
loss: 1.0010194778442383,grad_norm: 0.9999998130282305, iteration: 332821
loss: 1.0156497955322266,grad_norm: 0.9558857221801519, iteration: 332822
loss: 1.0152591466903687,grad_norm: 0.7755179288140417, iteration: 332823
loss: 1.067549467086792,grad_norm: 0.9999996325755311, iteration: 332824
loss: 0.980094313621521,grad_norm: 0.8539202538868811, iteration: 332825
loss: 1.0132169723510742,grad_norm: 0.8649583911617489, iteration: 332826
loss: 0.9703770279884338,grad_norm: 0.8387976696780842, iteration: 332827
loss: 0.996590256690979,grad_norm: 0.9999996426523775, iteration: 332828
loss: 0.9784786701202393,grad_norm: 0.9999993651003269, iteration: 332829
loss: 1.025136113166809,grad_norm: 0.7816251778642406, iteration: 332830
loss: 0.9964723587036133,grad_norm: 0.6971024014081463, iteration: 332831
loss: 0.9950644373893738,grad_norm: 0.9999990976051663, iteration: 332832
loss: 0.9945758581161499,grad_norm: 0.7966036577791822, iteration: 332833
loss: 1.0318173170089722,grad_norm: 0.9999994349311111, iteration: 332834
loss: 1.0280330181121826,grad_norm: 0.9999991130277257, iteration: 332835
loss: 0.995276689529419,grad_norm: 0.7909236656300356, iteration: 332836
loss: 1.0015629529953003,grad_norm: 0.9369307013710668, iteration: 332837
loss: 1.1159725189208984,grad_norm: 1.0000000218982636, iteration: 332838
loss: 1.0264495611190796,grad_norm: 0.9999992140337268, iteration: 332839
loss: 1.0238081216812134,grad_norm: 0.9999997622612049, iteration: 332840
loss: 0.9840850830078125,grad_norm: 0.8110534215984558, iteration: 332841
loss: 0.9959713816642761,grad_norm: 0.9999991287640976, iteration: 332842
loss: 1.0090678930282593,grad_norm: 0.999999113291587, iteration: 332843
loss: 0.9846633076667786,grad_norm: 0.8558681368620479, iteration: 332844
loss: 1.0146009922027588,grad_norm: 0.7359551364235707, iteration: 332845
loss: 0.9821653366088867,grad_norm: 0.7080882436710537, iteration: 332846
loss: 1.018885850906372,grad_norm: 0.9312778482222751, iteration: 332847
loss: 1.0038706064224243,grad_norm: 0.9999991233394814, iteration: 332848
loss: 1.0061566829681396,grad_norm: 0.7448923287580513, iteration: 332849
loss: 0.9481894969940186,grad_norm: 0.7674849627814025, iteration: 332850
loss: 0.9803690910339355,grad_norm: 0.9233967185760659, iteration: 332851
loss: 1.1038068532943726,grad_norm: 0.9999997117277091, iteration: 332852
loss: 1.01271653175354,grad_norm: 0.999999141101268, iteration: 332853
loss: 0.9915990829467773,grad_norm: 0.740530545860583, iteration: 332854
loss: 1.0686841011047363,grad_norm: 0.9999991872445961, iteration: 332855
loss: 1.0544465780258179,grad_norm: 0.999999740067984, iteration: 332856
loss: 1.00649893283844,grad_norm: 0.8497811970192584, iteration: 332857
loss: 1.0049926042556763,grad_norm: 0.7857272799194182, iteration: 332858
loss: 0.9926465749740601,grad_norm: 0.7897724353039921, iteration: 332859
loss: 1.0731111764907837,grad_norm: 0.9999994970188723, iteration: 332860
loss: 1.0398062467575073,grad_norm: 0.9999995568994826, iteration: 332861
loss: 1.016983985900879,grad_norm: 0.9999991536635258, iteration: 332862
loss: 1.0642670392990112,grad_norm: 0.9999993180127532, iteration: 332863
loss: 0.9934535026550293,grad_norm: 0.9999996235751756, iteration: 332864
loss: 1.0311815738677979,grad_norm: 0.9218834465098827, iteration: 332865
loss: 0.9951558113098145,grad_norm: 0.9999991542723171, iteration: 332866
loss: 1.00914466381073,grad_norm: 0.8232741729989937, iteration: 332867
loss: 0.9747490882873535,grad_norm: 0.9999990554730657, iteration: 332868
loss: 0.9949624538421631,grad_norm: 0.6823114146767008, iteration: 332869
loss: 1.0088447332382202,grad_norm: 0.9999999690095823, iteration: 332870
loss: 1.0148687362670898,grad_norm: 0.6988503270075833, iteration: 332871
loss: 0.9926618933677673,grad_norm: 0.9300338808618771, iteration: 332872
loss: 1.032400131225586,grad_norm: 0.9999993533013785, iteration: 332873
loss: 0.977822482585907,grad_norm: 0.8276537396886352, iteration: 332874
loss: 0.9838235974311829,grad_norm: 0.7507258032527406, iteration: 332875
loss: 1.066537857055664,grad_norm: 0.9999997291013548, iteration: 332876
loss: 0.997628927230835,grad_norm: 0.8146250913484633, iteration: 332877
loss: 0.9628389477729797,grad_norm: 0.8731664824025688, iteration: 332878
loss: 0.9673525094985962,grad_norm: 0.8329742242428796, iteration: 332879
loss: 0.9982526898384094,grad_norm: 0.9474251304173621, iteration: 332880
loss: 1.0340681076049805,grad_norm: 0.9999993994246621, iteration: 332881
loss: 1.0405144691467285,grad_norm: 0.9999994509262904, iteration: 332882
loss: 1.0242836475372314,grad_norm: 0.6614310683384433, iteration: 332883
loss: 0.9816811084747314,grad_norm: 0.7205021920185459, iteration: 332884
loss: 1.0066490173339844,grad_norm: 0.7558591515987726, iteration: 332885
loss: 1.0124619007110596,grad_norm: 0.9999998785687358, iteration: 332886
loss: 1.01677405834198,grad_norm: 0.8034857509639893, iteration: 332887
loss: 1.1001811027526855,grad_norm: 0.9999995947456753, iteration: 332888
loss: 0.9881342053413391,grad_norm: 0.8513292016605304, iteration: 332889
loss: 0.9734076261520386,grad_norm: 0.7767465219067831, iteration: 332890
loss: 1.1141011714935303,grad_norm: 0.9999997241011896, iteration: 332891
loss: 1.0654001235961914,grad_norm: 0.9875409747907516, iteration: 332892
loss: 1.0058175325393677,grad_norm: 0.880679661427161, iteration: 332893
loss: 1.0388171672821045,grad_norm: 0.839360045650556, iteration: 332894
loss: 1.0318233966827393,grad_norm: 0.9544457016370438, iteration: 332895
loss: 1.1282532215118408,grad_norm: 0.9414335091983695, iteration: 332896
loss: 1.0452537536621094,grad_norm: 0.9999991631131349, iteration: 332897
loss: 1.0048927068710327,grad_norm: 0.9169077650326053, iteration: 332898
loss: 0.9961005449295044,grad_norm: 0.8048050223515736, iteration: 332899
loss: 1.0945465564727783,grad_norm: 0.8117702763998391, iteration: 332900
loss: 0.9802749752998352,grad_norm: 0.9417196278490899, iteration: 332901
loss: 0.9791532754898071,grad_norm: 0.8643675766778413, iteration: 332902
loss: 0.989871084690094,grad_norm: 0.9999991351291313, iteration: 332903
loss: 0.9698567390441895,grad_norm: 0.9999994092159312, iteration: 332904
loss: 1.0278123617172241,grad_norm: 0.9999991728179312, iteration: 332905
loss: 0.9869418740272522,grad_norm: 0.8227428742446846, iteration: 332906
loss: 1.0427182912826538,grad_norm: 0.8157464482368443, iteration: 332907
loss: 0.9865303635597229,grad_norm: 0.8380842166897401, iteration: 332908
loss: 1.0056344270706177,grad_norm: 0.8412115197603952, iteration: 332909
loss: 1.0090429782867432,grad_norm: 0.999998977155027, iteration: 332910
loss: 1.0081171989440918,grad_norm: 0.9278183028765334, iteration: 332911
loss: 1.037920355796814,grad_norm: 0.9999995309037566, iteration: 332912
loss: 0.9952476024627686,grad_norm: 0.9999998848049237, iteration: 332913
loss: 1.0128097534179688,grad_norm: 0.9999992364895902, iteration: 332914
loss: 0.9653844833374023,grad_norm: 0.8849295574643804, iteration: 332915
loss: 1.002875804901123,grad_norm: 0.8313528992357095, iteration: 332916
loss: 0.9912826418876648,grad_norm: 0.9228525775216354, iteration: 332917
loss: 1.0863621234893799,grad_norm: 0.999999322963382, iteration: 332918
loss: 1.0850837230682373,grad_norm: 0.9999991503765401, iteration: 332919
loss: 1.039750576019287,grad_norm: 0.999999699179142, iteration: 332920
loss: 1.0542577505111694,grad_norm: 0.9104007323114505, iteration: 332921
loss: 1.0473378896713257,grad_norm: 0.9915631715560682, iteration: 332922
loss: 0.9985378980636597,grad_norm: 0.960638065253861, iteration: 332923
loss: 1.045386791229248,grad_norm: 0.9999996491955718, iteration: 332924
loss: 1.0159993171691895,grad_norm: 0.8685462013598777, iteration: 332925
loss: 1.0183837413787842,grad_norm: 0.999999358831161, iteration: 332926
loss: 1.0114400386810303,grad_norm: 0.9156785151604037, iteration: 332927
loss: 1.021183729171753,grad_norm: 0.7924086168298441, iteration: 332928
loss: 1.0105880498886108,grad_norm: 0.8910810090402876, iteration: 332929
loss: 1.006178379058838,grad_norm: 0.6953674536163842, iteration: 332930
loss: 1.0018922090530396,grad_norm: 0.8507933395122191, iteration: 332931
loss: 1.041998028755188,grad_norm: 0.999999271783245, iteration: 332932
loss: 0.953403890132904,grad_norm: 0.9547100264058189, iteration: 332933
loss: 0.9996768832206726,grad_norm: 0.8339455414840878, iteration: 332934
loss: 0.954566478729248,grad_norm: 0.8674560940429346, iteration: 332935
loss: 0.9649448990821838,grad_norm: 0.8556145608695466, iteration: 332936
loss: 0.9835231304168701,grad_norm: 0.6619557524594812, iteration: 332937
loss: 0.9689170122146606,grad_norm: 0.8553905175844072, iteration: 332938
loss: 1.0016990900039673,grad_norm: 0.9999995642859995, iteration: 332939
loss: 0.9833256006240845,grad_norm: 0.9999989881874966, iteration: 332940
loss: 1.004855751991272,grad_norm: 0.7828515568773035, iteration: 332941
loss: 1.0005865097045898,grad_norm: 0.7973178316335586, iteration: 332942
loss: 0.9472733736038208,grad_norm: 0.8281252414523689, iteration: 332943
loss: 1.0061962604522705,grad_norm: 0.8704321497763469, iteration: 332944
loss: 0.9980757236480713,grad_norm: 0.7553882861438269, iteration: 332945
loss: 1.0156751871109009,grad_norm: 0.9999993424200685, iteration: 332946
loss: 0.9884780645370483,grad_norm: 0.9999991824811479, iteration: 332947
loss: 1.0346187353134155,grad_norm: 0.7145842767286706, iteration: 332948
loss: 0.9926250576972961,grad_norm: 0.7813457592253443, iteration: 332949
loss: 0.975020170211792,grad_norm: 0.7635574652616823, iteration: 332950
loss: 1.0201075077056885,grad_norm: 0.7811730550531495, iteration: 332951
loss: 0.9814805388450623,grad_norm: 0.9999991601676167, iteration: 332952
loss: 0.9864177107810974,grad_norm: 0.922099489565961, iteration: 332953
loss: 0.9972200989723206,grad_norm: 0.7725177429832721, iteration: 332954
loss: 1.0617585182189941,grad_norm: 0.9128266355940768, iteration: 332955
loss: 0.9965353012084961,grad_norm: 0.8333797397386068, iteration: 332956
loss: 1.029642939567566,grad_norm: 0.8387732886993023, iteration: 332957
loss: 0.980501115322113,grad_norm: 0.7908732322970291, iteration: 332958
loss: 0.975721001625061,grad_norm: 0.9999990934413553, iteration: 332959
loss: 0.9800223112106323,grad_norm: 0.7847794744234202, iteration: 332960
loss: 1.0096865892410278,grad_norm: 0.9329808718236295, iteration: 332961
loss: 1.0011378526687622,grad_norm: 0.8893070990026607, iteration: 332962
loss: 0.9762241840362549,grad_norm: 0.9999992122996202, iteration: 332963
loss: 1.0313202142715454,grad_norm: 0.9243306776559815, iteration: 332964
loss: 1.0032490491867065,grad_norm: 0.7592760326125361, iteration: 332965
loss: 0.9973989725112915,grad_norm: 0.9999996280778596, iteration: 332966
loss: 1.0016618967056274,grad_norm: 0.9013657704700044, iteration: 332967
loss: 1.013303279876709,grad_norm: 0.7547752772431328, iteration: 332968
loss: 1.0268628597259521,grad_norm: 0.9999996142462017, iteration: 332969
loss: 1.0481046438217163,grad_norm: 0.9999993003606147, iteration: 332970
loss: 1.0346206426620483,grad_norm: 0.9757166793100106, iteration: 332971
loss: 1.0183497667312622,grad_norm: 0.8827790956089999, iteration: 332972
loss: 1.0690699815750122,grad_norm: 0.9999994823653715, iteration: 332973
loss: 1.018211841583252,grad_norm: 0.8764740714475607, iteration: 332974
loss: 1.0036368370056152,grad_norm: 0.9999990075730355, iteration: 332975
loss: 1.0114880800247192,grad_norm: 0.7945739452795928, iteration: 332976
loss: 0.9905493259429932,grad_norm: 0.8996582351129531, iteration: 332977
loss: 0.994796097278595,grad_norm: 0.8266536067591289, iteration: 332978
loss: 1.0543729066848755,grad_norm: 0.9999992833758925, iteration: 332979
loss: 1.0416351556777954,grad_norm: 0.9999998771561763, iteration: 332980
loss: 1.0060880184173584,grad_norm: 0.9268135178894678, iteration: 332981
loss: 1.0150307416915894,grad_norm: 0.8671076432267699, iteration: 332982
loss: 1.0149823427200317,grad_norm: 0.8471778060110923, iteration: 332983
loss: 1.0458041429519653,grad_norm: 0.9999993972643572, iteration: 332984
loss: 1.0782285928726196,grad_norm: 0.999999458876305, iteration: 332985
loss: 1.0333508253097534,grad_norm: 0.8581087033247757, iteration: 332986
loss: 1.021929144859314,grad_norm: 0.9760150747494949, iteration: 332987
loss: 1.1011319160461426,grad_norm: 0.9999999588448316, iteration: 332988
loss: 0.9803958535194397,grad_norm: 0.7989854397237567, iteration: 332989
loss: 0.9799663424491882,grad_norm: 0.8132590525512489, iteration: 332990
loss: 1.0055423974990845,grad_norm: 0.8840118406099756, iteration: 332991
loss: 1.0336270332336426,grad_norm: 0.760709148938373, iteration: 332992
loss: 0.9668220281600952,grad_norm: 0.764211421415138, iteration: 332993
loss: 0.9738152027130127,grad_norm: 0.9336964642808682, iteration: 332994
loss: 1.0437949895858765,grad_norm: 0.999999802259214, iteration: 332995
loss: 0.9893249273300171,grad_norm: 0.8335791588289024, iteration: 332996
loss: 0.9816553592681885,grad_norm: 0.8515626497715683, iteration: 332997
loss: 1.080299973487854,grad_norm: 0.9402901107555091, iteration: 332998
loss: 1.0284900665283203,grad_norm: 0.999999170002644, iteration: 332999
loss: 0.9658694267272949,grad_norm: 0.9999991597861156, iteration: 333000
loss: 0.986454963684082,grad_norm: 0.7330102257249318, iteration: 333001
loss: 1.0103026628494263,grad_norm: 0.9999994661108865, iteration: 333002
loss: 1.0335932970046997,grad_norm: 0.9999992050788145, iteration: 333003
loss: 0.9896293878555298,grad_norm: 0.7531277534833718, iteration: 333004
loss: 1.01271653175354,grad_norm: 0.8210592286786277, iteration: 333005
loss: 0.9952904582023621,grad_norm: 0.7033754644066502, iteration: 333006
loss: 0.9759303331375122,grad_norm: 0.9383786862166915, iteration: 333007
loss: 1.1307932138442993,grad_norm: 0.9999994737658314, iteration: 333008
loss: 0.9967100024223328,grad_norm: 0.7357274532153324, iteration: 333009
loss: 0.9982739686965942,grad_norm: 0.7145664921451006, iteration: 333010
loss: 1.0243653059005737,grad_norm: 0.7914336179595162, iteration: 333011
loss: 1.0285388231277466,grad_norm: 0.8602322612355049, iteration: 333012
loss: 1.028777003288269,grad_norm: 0.99999912639335, iteration: 333013
loss: 0.9941189885139465,grad_norm: 0.999999332348501, iteration: 333014
loss: 1.008002758026123,grad_norm: 0.8770740902014111, iteration: 333015
loss: 1.0082018375396729,grad_norm: 0.9999990483299489, iteration: 333016
loss: 0.9783148765563965,grad_norm: 0.9059357335797762, iteration: 333017
loss: 1.0645405054092407,grad_norm: 0.9999993066883472, iteration: 333018
loss: 1.001550555229187,grad_norm: 0.677268933104454, iteration: 333019
loss: 0.9855175614356995,grad_norm: 0.8614938641557868, iteration: 333020
loss: 0.9987287521362305,grad_norm: 0.9999995055523159, iteration: 333021
loss: 0.9876120686531067,grad_norm: 0.9999993147574423, iteration: 333022
loss: 0.9705907106399536,grad_norm: 0.8195888925553692, iteration: 333023
loss: 0.9918150305747986,grad_norm: 0.9111320078632317, iteration: 333024
loss: 1.03028404712677,grad_norm: 0.8770836442558263, iteration: 333025
loss: 1.0121465921401978,grad_norm: 0.8175282999532221, iteration: 333026
loss: 0.9884859919548035,grad_norm: 0.9744851535589184, iteration: 333027
loss: 0.9870063662528992,grad_norm: 0.8625052831897732, iteration: 333028
loss: 0.987309455871582,grad_norm: 0.9543512076779082, iteration: 333029
loss: 0.9935302734375,grad_norm: 0.8903120886787391, iteration: 333030
loss: 0.9962736964225769,grad_norm: 0.9999992174007936, iteration: 333031
loss: 1.019953727722168,grad_norm: 1.0000000455462645, iteration: 333032
loss: 0.9526634812355042,grad_norm: 0.8099943580852218, iteration: 333033
loss: 0.976292610168457,grad_norm: 0.8783172413071666, iteration: 333034
loss: 0.9556024074554443,grad_norm: 0.8072925639568714, iteration: 333035
loss: 0.9761735796928406,grad_norm: 0.7789980242345236, iteration: 333036
loss: 0.9734119772911072,grad_norm: 0.9139287247919893, iteration: 333037
loss: 1.0214502811431885,grad_norm: 0.9353953829622622, iteration: 333038
loss: 1.0144007205963135,grad_norm: 0.7210643409239299, iteration: 333039
loss: 0.9733684062957764,grad_norm: 0.7127813944809703, iteration: 333040
loss: 1.0043714046478271,grad_norm: 0.9684529321943218, iteration: 333041
loss: 1.0043504238128662,grad_norm: 0.7997748503482094, iteration: 333042
loss: 0.9867904782295227,grad_norm: 0.7887889744432542, iteration: 333043
loss: 1.0133708715438843,grad_norm: 0.8887564107559326, iteration: 333044
loss: 1.0150728225708008,grad_norm: 0.9695753489112005, iteration: 333045
loss: 0.9914397597312927,grad_norm: 0.9999996762358397, iteration: 333046
loss: 1.0141633749008179,grad_norm: 0.9146692310030577, iteration: 333047
loss: 1.0041999816894531,grad_norm: 0.7797222520484085, iteration: 333048
loss: 0.9934676885604858,grad_norm: 0.8954212619443657, iteration: 333049
loss: 1.0613641738891602,grad_norm: 0.9999994538754953, iteration: 333050
loss: 1.0144827365875244,grad_norm: 0.9999991547919561, iteration: 333051
loss: 1.031484842300415,grad_norm: 0.9999992601274548, iteration: 333052
loss: 1.0503836870193481,grad_norm: 0.7331804837089648, iteration: 333053
loss: 0.9987685680389404,grad_norm: 0.8421975468436437, iteration: 333054
loss: 0.9777414798736572,grad_norm: 0.9999993012063371, iteration: 333055
loss: 1.0185126066207886,grad_norm: 0.8970783184913489, iteration: 333056
loss: 0.9602840542793274,grad_norm: 0.7947756704337399, iteration: 333057
loss: 0.9759728908538818,grad_norm: 0.8783368175323997, iteration: 333058
loss: 1.0302929878234863,grad_norm: 0.9840682922912006, iteration: 333059
loss: 1.0398519039154053,grad_norm: 0.9999990686899154, iteration: 333060
loss: 1.1499103307724,grad_norm: 0.9999999527562634, iteration: 333061
loss: 1.0875476598739624,grad_norm: 0.9999995573144731, iteration: 333062
loss: 0.9614776968955994,grad_norm: 0.9999989688974031, iteration: 333063
loss: 1.0159578323364258,grad_norm: 0.9999999706799989, iteration: 333064
loss: 1.0303466320037842,grad_norm: 0.9481905836532005, iteration: 333065
loss: 0.97409588098526,grad_norm: 0.8603198083937078, iteration: 333066
loss: 1.0382572412490845,grad_norm: 0.9999995187039523, iteration: 333067
loss: 0.9757381677627563,grad_norm: 0.9999996409891566, iteration: 333068
loss: 1.0200856924057007,grad_norm: 0.8703509406197921, iteration: 333069
loss: 1.0140957832336426,grad_norm: 0.8740823177999139, iteration: 333070
loss: 1.0068926811218262,grad_norm: 0.9999991749742234, iteration: 333071
loss: 0.9739630222320557,grad_norm: 0.9513038210183541, iteration: 333072
loss: 0.9725605845451355,grad_norm: 0.8901539486951416, iteration: 333073
loss: 0.9766665101051331,grad_norm: 0.857121189372869, iteration: 333074
loss: 1.0670440196990967,grad_norm: 0.8582829919512766, iteration: 333075
loss: 1.0082457065582275,grad_norm: 0.7572601126156643, iteration: 333076
loss: 1.0056602954864502,grad_norm: 0.6715467301136306, iteration: 333077
loss: 1.0432695150375366,grad_norm: 0.9999996908584455, iteration: 333078
loss: 0.9980414509773254,grad_norm: 0.7190017493085107, iteration: 333079
loss: 1.000545620918274,grad_norm: 0.7058640794048728, iteration: 333080
loss: 1.0364258289337158,grad_norm: 0.9999992109911597, iteration: 333081
loss: 1.012563943862915,grad_norm: 0.9241424776984517, iteration: 333082
loss: 1.0280888080596924,grad_norm: 0.9999995499017548, iteration: 333083
loss: 0.9698799252510071,grad_norm: 0.8506034222902371, iteration: 333084
loss: 0.9703406691551208,grad_norm: 0.8300958212955004, iteration: 333085
loss: 0.9640907049179077,grad_norm: 0.8715309855164535, iteration: 333086
loss: 1.0595506429672241,grad_norm: 0.7574909197423937, iteration: 333087
loss: 0.9818293452262878,grad_norm: 0.8967661282642937, iteration: 333088
loss: 1.0070520639419556,grad_norm: 0.9999993146073501, iteration: 333089
loss: 0.9797841310501099,grad_norm: 0.6920218078741974, iteration: 333090
loss: 1.0218356847763062,grad_norm: 0.9999999420518965, iteration: 333091
loss: 0.9909033179283142,grad_norm: 0.999999307399798, iteration: 333092
loss: 0.9598660469055176,grad_norm: 0.9595181710980518, iteration: 333093
loss: 0.9987736940383911,grad_norm: 0.8957650496697938, iteration: 333094
loss: 1.0000723600387573,grad_norm: 0.7304938041496084, iteration: 333095
loss: 1.0014148950576782,grad_norm: 0.9832864702089563, iteration: 333096
loss: 0.9975040555000305,grad_norm: 0.8662633131257657, iteration: 333097
loss: 1.007655382156372,grad_norm: 0.8978561071506885, iteration: 333098
loss: 0.9869737029075623,grad_norm: 0.999999122765817, iteration: 333099
loss: 0.9986764788627625,grad_norm: 0.7370381955609585, iteration: 333100
loss: 0.9748464226722717,grad_norm: 0.898527206446757, iteration: 333101
loss: 0.9913590550422668,grad_norm: 0.855282447577057, iteration: 333102
loss: 0.9897012710571289,grad_norm: 0.8532757077876324, iteration: 333103
loss: 1.0374011993408203,grad_norm: 0.9999996954374496, iteration: 333104
loss: 0.9841096997261047,grad_norm: 0.7870086404724631, iteration: 333105
loss: 1.022903323173523,grad_norm: 0.9999998100830171, iteration: 333106
loss: 1.0258219242095947,grad_norm: 0.9999993570037199, iteration: 333107
loss: 1.0204600095748901,grad_norm: 0.837208379598872, iteration: 333108
loss: 0.9983707070350647,grad_norm: 0.9999996455578156, iteration: 333109
loss: 0.9932702779769897,grad_norm: 0.7075653618497539, iteration: 333110
loss: 0.9753360152244568,grad_norm: 0.9999991552961661, iteration: 333111
loss: 0.9839179515838623,grad_norm: 0.7133403376470651, iteration: 333112
loss: 0.9936961531639099,grad_norm: 0.7671903803402184, iteration: 333113
loss: 0.9920047521591187,grad_norm: 0.7066253484995203, iteration: 333114
loss: 1.0181488990783691,grad_norm: 0.999999054168061, iteration: 333115
loss: 0.9869529008865356,grad_norm: 0.7181912033869999, iteration: 333116
loss: 0.9852730631828308,grad_norm: 0.999999226510347, iteration: 333117
loss: 1.0044134855270386,grad_norm: 0.8610055531728353, iteration: 333118
loss: 0.9999939799308777,grad_norm: 0.9088808342743778, iteration: 333119
loss: 1.019085168838501,grad_norm: 0.6903477469787974, iteration: 333120
loss: 1.033333659172058,grad_norm: 0.9999997423359063, iteration: 333121
loss: 1.0060255527496338,grad_norm: 0.7791352658546544, iteration: 333122
loss: 1.0321277379989624,grad_norm: 0.9599836772512448, iteration: 333123
loss: 0.9848812222480774,grad_norm: 0.8758917669488915, iteration: 333124
loss: 0.9955055117607117,grad_norm: 0.8671894954471449, iteration: 333125
loss: 0.9690337777137756,grad_norm: 0.8034019839035507, iteration: 333126
loss: 1.0460954904556274,grad_norm: 0.814653140923775, iteration: 333127
loss: 0.9937648773193359,grad_norm: 0.9316236773370865, iteration: 333128
loss: 1.0418611764907837,grad_norm: 0.8566040315803073, iteration: 333129
loss: 1.0323647260665894,grad_norm: 0.8779992310685641, iteration: 333130
loss: 0.9750898480415344,grad_norm: 0.7658173750791294, iteration: 333131
loss: 1.0054959058761597,grad_norm: 0.8133121095111588, iteration: 333132
loss: 1.0136134624481201,grad_norm: 0.8237034448541773, iteration: 333133
loss: 1.0053379535675049,grad_norm: 0.8661028058426924, iteration: 333134
loss: 1.001530408859253,grad_norm: 0.8649160656332776, iteration: 333135
loss: 0.9950554966926575,grad_norm: 0.8855664526295692, iteration: 333136
loss: 0.9948756098747253,grad_norm: 0.7519183712152638, iteration: 333137
loss: 0.9937995672225952,grad_norm: 0.9956202344855132, iteration: 333138
loss: 0.9710186719894409,grad_norm: 0.8669951650382215, iteration: 333139
loss: 1.0059598684310913,grad_norm: 0.7935304690153573, iteration: 333140
loss: 0.9659705758094788,grad_norm: 0.9264324035677, iteration: 333141
loss: 0.9799449443817139,grad_norm: 0.7461679773903297, iteration: 333142
loss: 1.0094351768493652,grad_norm: 0.889454525809715, iteration: 333143
loss: 0.9971219897270203,grad_norm: 0.7985775850045047, iteration: 333144
loss: 0.9723801016807556,grad_norm: 0.8261916036462326, iteration: 333145
loss: 0.9887674450874329,grad_norm: 0.8239283055781623, iteration: 333146
loss: 1.0003485679626465,grad_norm: 0.8802636094384745, iteration: 333147
loss: 0.9622933864593506,grad_norm: 0.8446555334432865, iteration: 333148
loss: 0.9901532530784607,grad_norm: 0.9153371170235747, iteration: 333149
loss: 1.0146650075912476,grad_norm: 0.9260075038179437, iteration: 333150
loss: 0.9859654307365417,grad_norm: 0.8861068430577771, iteration: 333151
loss: 0.9878975749015808,grad_norm: 0.8168972602799841, iteration: 333152
loss: 1.0011993646621704,grad_norm: 0.7980471124448697, iteration: 333153
loss: 0.9931537508964539,grad_norm: 0.9999998416529235, iteration: 333154
loss: 0.9872553944587708,grad_norm: 0.9495454124409595, iteration: 333155
loss: 1.1653251647949219,grad_norm: 0.999999943403058, iteration: 333156
loss: 1.004003643989563,grad_norm: 0.9999989918121933, iteration: 333157
loss: 1.0181313753128052,grad_norm: 0.8835351981342286, iteration: 333158
loss: 1.0225847959518433,grad_norm: 0.925913589189224, iteration: 333159
loss: 1.038792371749878,grad_norm: 0.8506065383927769, iteration: 333160
loss: 0.9728009700775146,grad_norm: 0.8047600682332919, iteration: 333161
loss: 1.0322269201278687,grad_norm: 0.7415246471738821, iteration: 333162
loss: 0.9769221544265747,grad_norm: 0.8009763144888496, iteration: 333163
loss: 1.042994737625122,grad_norm: 0.9999993024498843, iteration: 333164
loss: 0.9879605174064636,grad_norm: 0.9584124124475087, iteration: 333165
loss: 1.05052649974823,grad_norm: 0.9999999319947739, iteration: 333166
loss: 0.9651917815208435,grad_norm: 0.8860484104819021, iteration: 333167
loss: 0.9477936625480652,grad_norm: 0.8552233080104819, iteration: 333168
loss: 0.9900874495506287,grad_norm: 0.9889600330420767, iteration: 333169
loss: 0.9906666278839111,grad_norm: 0.7696025844606895, iteration: 333170
loss: 0.9893867373466492,grad_norm: 0.9355321482925801, iteration: 333171
loss: 1.0152403116226196,grad_norm: 0.9485398952725844, iteration: 333172
loss: 0.9602078199386597,grad_norm: 0.9103793304294647, iteration: 333173
loss: 0.9956228137016296,grad_norm: 0.8597368702973223, iteration: 333174
loss: 1.0515192747116089,grad_norm: 0.8883947854212979, iteration: 333175
loss: 1.0275633335113525,grad_norm: 0.9999999399668019, iteration: 333176
loss: 1.0333473682403564,grad_norm: 0.999999937730831, iteration: 333177
loss: 0.9843519330024719,grad_norm: 0.7368297647271517, iteration: 333178
loss: 0.994837760925293,grad_norm: 0.8320457700984984, iteration: 333179
loss: 1.0206488370895386,grad_norm: 0.9128926318852124, iteration: 333180
loss: 1.0621426105499268,grad_norm: 0.8007013277755747, iteration: 333181
loss: 0.9883856177330017,grad_norm: 0.7544983565984275, iteration: 333182
loss: 0.9987336993217468,grad_norm: 0.8587900614323094, iteration: 333183
loss: 1.0328565835952759,grad_norm: 0.8337404944971526, iteration: 333184
loss: 0.9845530986785889,grad_norm: 0.7416691161050823, iteration: 333185
loss: 1.0019927024841309,grad_norm: 0.8316379930452914, iteration: 333186
loss: 0.9818373918533325,grad_norm: 0.8914781865256739, iteration: 333187
loss: 1.0188730955123901,grad_norm: 0.8861503910639247, iteration: 333188
loss: 0.9833062291145325,grad_norm: 0.8869251838868599, iteration: 333189
loss: 1.0270788669586182,grad_norm: 0.862626166507646, iteration: 333190
loss: 1.1210697889328003,grad_norm: 0.8569974859096833, iteration: 333191
loss: 1.0017427206039429,grad_norm: 0.7950983807992072, iteration: 333192
loss: 1.067823886871338,grad_norm: 0.999999653009062, iteration: 333193
loss: 1.0047675371170044,grad_norm: 0.8404675795522251, iteration: 333194
loss: 0.9866266250610352,grad_norm: 0.9999995423025527, iteration: 333195
loss: 0.9659814238548279,grad_norm: 0.8381101427997578, iteration: 333196
loss: 0.9719149470329285,grad_norm: 0.7865687247021804, iteration: 333197
loss: 1.0112621784210205,grad_norm: 0.9089426834458839, iteration: 333198
loss: 0.9848099946975708,grad_norm: 0.9492213962281241, iteration: 333199
loss: 1.0031734704971313,grad_norm: 0.9358052201483986, iteration: 333200
loss: 1.0853066444396973,grad_norm: 0.9999990492724348, iteration: 333201
loss: 1.0760040283203125,grad_norm: 0.9973021874342523, iteration: 333202
loss: 0.9470856189727783,grad_norm: 0.809727412609475, iteration: 333203
loss: 1.1431937217712402,grad_norm: 0.9999993357360093, iteration: 333204
loss: 1.0431187152862549,grad_norm: 0.8831439706177073, iteration: 333205
loss: 1.014864206314087,grad_norm: 0.9999992348617244, iteration: 333206
loss: 0.9888780117034912,grad_norm: 0.8724022971486615, iteration: 333207
loss: 1.0996630191802979,grad_norm: 0.9999995645797246, iteration: 333208
loss: 0.9901288747787476,grad_norm: 0.8403572988514133, iteration: 333209
loss: 1.0061638355255127,grad_norm: 0.8082811893127364, iteration: 333210
loss: 1.0314685106277466,grad_norm: 0.9912019576643197, iteration: 333211
loss: 1.0172756910324097,grad_norm: 0.7336311670526017, iteration: 333212
loss: 0.9845387935638428,grad_norm: 0.9999992818795859, iteration: 333213
loss: 1.0325050354003906,grad_norm: 0.9165832867638657, iteration: 333214
loss: 1.0113893747329712,grad_norm: 0.9289110951857175, iteration: 333215
loss: 1.0415724515914917,grad_norm: 0.9999991262425558, iteration: 333216
loss: 0.9803788661956787,grad_norm: 0.8704787922260969, iteration: 333217
loss: 1.0106369256973267,grad_norm: 0.8050649414534734, iteration: 333218
loss: 1.0041754245758057,grad_norm: 0.8206439520755494, iteration: 333219
loss: 0.9770820140838623,grad_norm: 0.6815346815572152, iteration: 333220
loss: 1.041618824005127,grad_norm: 0.999999828767693, iteration: 333221
loss: 0.9929602742195129,grad_norm: 0.9999990685795739, iteration: 333222
loss: 1.0839322805404663,grad_norm: 0.9358852455913154, iteration: 333223
loss: 1.0101416110992432,grad_norm: 0.8997895303877536, iteration: 333224
loss: 0.9899373054504395,grad_norm: 0.9999992670970644, iteration: 333225
loss: 1.0181564092636108,grad_norm: 0.9186185949266591, iteration: 333226
loss: 0.9870825409889221,grad_norm: 0.9999992159563617, iteration: 333227
loss: 0.9691813588142395,grad_norm: 0.8458376175779017, iteration: 333228
loss: 0.997948944568634,grad_norm: 0.9999992429454464, iteration: 333229
loss: 1.016240119934082,grad_norm: 0.8363695480472361, iteration: 333230
loss: 1.043774962425232,grad_norm: 0.9028176921125812, iteration: 333231
loss: 0.9792956709861755,grad_norm: 0.9830774951094549, iteration: 333232
loss: 1.0522485971450806,grad_norm: 0.8837990527800632, iteration: 333233
loss: 0.9879215359687805,grad_norm: 0.9195823989945816, iteration: 333234
loss: 0.9797247648239136,grad_norm: 0.9999992731572521, iteration: 333235
loss: 0.9867292642593384,grad_norm: 0.7418629748713974, iteration: 333236
loss: 1.0020616054534912,grad_norm: 0.8842885506386334, iteration: 333237
loss: 1.0358233451843262,grad_norm: 0.9999992345235248, iteration: 333238
loss: 0.992464542388916,grad_norm: 0.8199955804366661, iteration: 333239
loss: 0.996029257774353,grad_norm: 0.9999992406387102, iteration: 333240
loss: 1.010862112045288,grad_norm: 0.8523568283366906, iteration: 333241
loss: 1.0029520988464355,grad_norm: 0.949730635731785, iteration: 333242
loss: 0.9973462224006653,grad_norm: 0.9999992783104672, iteration: 333243
loss: 1.0608662366867065,grad_norm: 0.9999993929475962, iteration: 333244
loss: 0.9443196058273315,grad_norm: 0.99999910300326, iteration: 333245
loss: 0.9814974069595337,grad_norm: 0.7785794649287457, iteration: 333246
loss: 0.9798992872238159,grad_norm: 0.7392541043161565, iteration: 333247
loss: 0.9962456822395325,grad_norm: 0.8936193298109392, iteration: 333248
loss: 1.1145426034927368,grad_norm: 0.9999998462528514, iteration: 333249
loss: 1.012405514717102,grad_norm: 0.7428838588643953, iteration: 333250
loss: 1.0024396181106567,grad_norm: 0.7219493823089139, iteration: 333251
loss: 1.018459439277649,grad_norm: 0.9009112521602984, iteration: 333252
loss: 1.0346808433532715,grad_norm: 0.9999994216143284, iteration: 333253
loss: 0.9883344173431396,grad_norm: 0.870829442705759, iteration: 333254
loss: 1.0054070949554443,grad_norm: 0.8747572205765023, iteration: 333255
loss: 1.0515211820602417,grad_norm: 0.8094559317453812, iteration: 333256
loss: 1.0178508758544922,grad_norm: 0.7710860181273612, iteration: 333257
loss: 1.0304217338562012,grad_norm: 0.8821419368120608, iteration: 333258
loss: 0.9858811497688293,grad_norm: 0.8342100383180726, iteration: 333259
loss: 0.9926649332046509,grad_norm: 0.7017985640138334, iteration: 333260
loss: 1.0083190202713013,grad_norm: 0.9999991191751934, iteration: 333261
loss: 0.9796436429023743,grad_norm: 0.7644079580405763, iteration: 333262
loss: 1.0001708269119263,grad_norm: 0.8614550020323146, iteration: 333263
loss: 1.0541731119155884,grad_norm: 0.8182674117259969, iteration: 333264
loss: 0.9829598069190979,grad_norm: 0.767737736860975, iteration: 333265
loss: 1.0107601881027222,grad_norm: 0.8469550943546343, iteration: 333266
loss: 1.001785159111023,grad_norm: 0.9999994510390251, iteration: 333267
loss: 0.9921600222587585,grad_norm: 0.770993370027861, iteration: 333268
loss: 1.0226720571517944,grad_norm: 0.9909649486111942, iteration: 333269
loss: 0.9685521721839905,grad_norm: 0.9999991719546932, iteration: 333270
loss: 0.9839444160461426,grad_norm: 0.9999994392524592, iteration: 333271
loss: 0.9948325753211975,grad_norm: 0.8212222020340113, iteration: 333272
loss: 1.0777759552001953,grad_norm: 0.9999997317949443, iteration: 333273
loss: 1.0554591417312622,grad_norm: 0.9999998955227556, iteration: 333274
loss: 1.0265661478042603,grad_norm: 0.9999992691273915, iteration: 333275
loss: 1.000587821006775,grad_norm: 0.7916046724538878, iteration: 333276
loss: 0.9517627954483032,grad_norm: 0.9999991156284107, iteration: 333277
loss: 1.060234785079956,grad_norm: 0.9999998984856258, iteration: 333278
loss: 0.998354434967041,grad_norm: 0.8087767141065075, iteration: 333279
loss: 0.9651616811752319,grad_norm: 0.7380863895703148, iteration: 333280
loss: 1.0113260746002197,grad_norm: 0.9999999860345938, iteration: 333281
loss: 1.0455994606018066,grad_norm: 0.8435769821670602, iteration: 333282
loss: 0.945804238319397,grad_norm: 0.7683899569811117, iteration: 333283
loss: 1.0677300691604614,grad_norm: 0.9999993836385065, iteration: 333284
loss: 1.0671179294586182,grad_norm: 0.9999990441863722, iteration: 333285
loss: 1.0311552286148071,grad_norm: 0.9999997642245849, iteration: 333286
loss: 1.0482349395751953,grad_norm: 0.9999991560126635, iteration: 333287
loss: 1.0189238786697388,grad_norm: 0.7953071662897193, iteration: 333288
loss: 1.0944210290908813,grad_norm: 0.9999999511359909, iteration: 333289
loss: 1.0021179914474487,grad_norm: 0.871448432882809, iteration: 333290
loss: 0.9856706261634827,grad_norm: 0.7563964177662535, iteration: 333291
loss: 1.070630669593811,grad_norm: 0.9999992686791858, iteration: 333292
loss: 1.014424443244934,grad_norm: 0.9999988992529297, iteration: 333293
loss: 0.9836467504501343,grad_norm: 0.9678200074635745, iteration: 333294
loss: 1.0184338092803955,grad_norm: 0.705564686552366, iteration: 333295
loss: 0.9953132271766663,grad_norm: 0.9990127735724091, iteration: 333296
loss: 0.9888671636581421,grad_norm: 0.9030056693897714, iteration: 333297
loss: 1.0103169679641724,grad_norm: 0.9218277612818953, iteration: 333298
loss: 1.0301649570465088,grad_norm: 0.8960127351899365, iteration: 333299
loss: 1.0203227996826172,grad_norm: 0.9999990581006317, iteration: 333300
loss: 0.9997948408126831,grad_norm: 0.6750860017202274, iteration: 333301
loss: 1.019564151763916,grad_norm: 0.9763909991577924, iteration: 333302
loss: 1.004607915878296,grad_norm: 0.8614951463748036, iteration: 333303
loss: 0.98722243309021,grad_norm: 0.9952615893142243, iteration: 333304
loss: 0.9879989624023438,grad_norm: 0.9999995058156087, iteration: 333305
loss: 1.015196442604065,grad_norm: 0.9891245184966453, iteration: 333306
loss: 1.0808584690093994,grad_norm: 0.999999279480577, iteration: 333307
loss: 1.0080455541610718,grad_norm: 0.8689588516231087, iteration: 333308
loss: 1.0221428871154785,grad_norm: 0.902632909109548, iteration: 333309
loss: 0.9711910486221313,grad_norm: 0.981604211404196, iteration: 333310
loss: 0.983726441860199,grad_norm: 0.728997240360118, iteration: 333311
loss: 1.001772403717041,grad_norm: 0.8367720718828854, iteration: 333312
loss: 0.9967833161354065,grad_norm: 0.9186211223775578, iteration: 333313
loss: 1.0106121301651,grad_norm: 0.9999991559721688, iteration: 333314
loss: 1.007475733757019,grad_norm: 0.9999989861545149, iteration: 333315
loss: 1.0143039226531982,grad_norm: 0.9999996132067258, iteration: 333316
loss: 0.999201774597168,grad_norm: 0.9183249726211228, iteration: 333317
loss: 1.0278810262680054,grad_norm: 0.9999999402008658, iteration: 333318
loss: 1.020258903503418,grad_norm: 0.8711106041715282, iteration: 333319
loss: 0.9843412637710571,grad_norm: 0.8224056678399857, iteration: 333320
loss: 0.9958564639091492,grad_norm: 0.9923034072391217, iteration: 333321
loss: 1.0103243589401245,grad_norm: 0.7955029085336334, iteration: 333322
loss: 0.9923684000968933,grad_norm: 0.8480199529479009, iteration: 333323
loss: 1.0722575187683105,grad_norm: 0.9893144297558166, iteration: 333324
loss: 1.0241179466247559,grad_norm: 0.8995462318092294, iteration: 333325
loss: 0.9651938080787659,grad_norm: 0.8225959717524599, iteration: 333326
loss: 1.0496855974197388,grad_norm: 0.8329170892369241, iteration: 333327
loss: 1.0522258281707764,grad_norm: 0.8341107437437661, iteration: 333328
loss: 1.040755033493042,grad_norm: 0.9499551142328365, iteration: 333329
loss: 0.9966185092926025,grad_norm: 0.8167550147752646, iteration: 333330
loss: 1.0202302932739258,grad_norm: 0.9999991518076402, iteration: 333331
loss: 1.0189045667648315,grad_norm: 0.8243229864759855, iteration: 333332
loss: 1.0151736736297607,grad_norm: 0.7794800246085203, iteration: 333333
loss: 1.0232386589050293,grad_norm: 0.8633447650652321, iteration: 333334
loss: 0.9726380705833435,grad_norm: 0.8447926676889895, iteration: 333335
loss: 1.0014170408248901,grad_norm: 0.8280409946127519, iteration: 333336
loss: 1.0175426006317139,grad_norm: 0.9105081706551338, iteration: 333337
loss: 1.015505075454712,grad_norm: 0.9871934234732698, iteration: 333338
loss: 1.022169589996338,grad_norm: 0.9999990540025702, iteration: 333339
loss: 0.9573134779930115,grad_norm: 0.7437855766687664, iteration: 333340
loss: 0.9784883856773376,grad_norm: 0.7111646350404612, iteration: 333341
loss: 1.0723683834075928,grad_norm: 0.8645952026752349, iteration: 333342
loss: 0.9974543452262878,grad_norm: 0.753841203011131, iteration: 333343
loss: 0.9725333452224731,grad_norm: 0.7820537588144691, iteration: 333344
loss: 1.0118128061294556,grad_norm: 0.8005280487939996, iteration: 333345
loss: 1.0148050785064697,grad_norm: 0.9999995240564744, iteration: 333346
loss: 1.0032336711883545,grad_norm: 0.9151872301767046, iteration: 333347
loss: 0.9715158343315125,grad_norm: 0.7933360462892431, iteration: 333348
loss: 1.0215833187103271,grad_norm: 0.9804400514065982, iteration: 333349
loss: 1.0047053098678589,grad_norm: 0.7708432840319551, iteration: 333350
loss: 1.0183809995651245,grad_norm: 0.9999992105846963, iteration: 333351
loss: 1.0035456418991089,grad_norm: 0.9999989725853188, iteration: 333352
loss: 0.9857708215713501,grad_norm: 0.8685817954363715, iteration: 333353
loss: 0.9949257373809814,grad_norm: 0.8793721610291709, iteration: 333354
loss: 0.9987916350364685,grad_norm: 0.9667491159198544, iteration: 333355
loss: 0.9978207945823669,grad_norm: 0.7653256851700928, iteration: 333356
loss: 0.9714552760124207,grad_norm: 0.8467679383872156, iteration: 333357
loss: 0.985552966594696,grad_norm: 0.7720119113068838, iteration: 333358
loss: 0.9890995025634766,grad_norm: 0.9455767741627683, iteration: 333359
loss: 0.9974948763847351,grad_norm: 0.7665866938668855, iteration: 333360
loss: 0.989155650138855,grad_norm: 0.7905433132521236, iteration: 333361
loss: 0.9873281717300415,grad_norm: 0.979989996793207, iteration: 333362
loss: 0.9942472577095032,grad_norm: 0.8225093100374161, iteration: 333363
loss: 0.9994516372680664,grad_norm: 0.933476292583073, iteration: 333364
loss: 0.9799754023551941,grad_norm: 0.9081107683293975, iteration: 333365
loss: 0.9759716987609863,grad_norm: 0.8949133832033865, iteration: 333366
loss: 1.0252208709716797,grad_norm: 0.9392698595092269, iteration: 333367
loss: 1.0222100019454956,grad_norm: 0.6971362975558614, iteration: 333368
loss: 0.9685598611831665,grad_norm: 0.9999992977456188, iteration: 333369
loss: 0.9895322322845459,grad_norm: 0.9999990785028218, iteration: 333370
loss: 0.9565824866294861,grad_norm: 0.771977579065348, iteration: 333371
loss: 1.0422813892364502,grad_norm: 0.9999991167535097, iteration: 333372
loss: 0.9983651041984558,grad_norm: 0.8372150004467408, iteration: 333373
loss: 0.9856230020523071,grad_norm: 0.744286490852926, iteration: 333374
loss: 0.9845093488693237,grad_norm: 0.8494629665410519, iteration: 333375
loss: 0.9585264325141907,grad_norm: 0.9999991208719657, iteration: 333376
loss: 0.9464460015296936,grad_norm: 0.9999990845779195, iteration: 333377
loss: 1.0300745964050293,grad_norm: 0.9339513491778683, iteration: 333378
loss: 1.0080386400222778,grad_norm: 0.889389366007825, iteration: 333379
loss: 0.9782741069793701,grad_norm: 0.8419391080908335, iteration: 333380
loss: 1.0001887083053589,grad_norm: 0.9999991619658414, iteration: 333381
loss: 0.9858024716377258,grad_norm: 0.9577178487990106, iteration: 333382
loss: 1.033689022064209,grad_norm: 0.9999999564299326, iteration: 333383
loss: 1.0123217105865479,grad_norm: 0.9999990047145666, iteration: 333384
loss: 1.0292876958847046,grad_norm: 0.8062791786693257, iteration: 333385
loss: 1.0169180631637573,grad_norm: 0.9999992391342168, iteration: 333386
loss: 0.982159435749054,grad_norm: 0.9552091437764201, iteration: 333387
loss: 0.958153486251831,grad_norm: 0.7275021601951112, iteration: 333388
loss: 1.0073864459991455,grad_norm: 0.9999993203179735, iteration: 333389
loss: 1.0210965871810913,grad_norm: 0.9453308827131833, iteration: 333390
loss: 0.9959223866462708,grad_norm: 0.7944374401200853, iteration: 333391
loss: 0.9379850029945374,grad_norm: 0.809976342634139, iteration: 333392
loss: 1.0146853923797607,grad_norm: 0.9067696805628113, iteration: 333393
loss: 1.0314136743545532,grad_norm: 0.7987608029024876, iteration: 333394
loss: 1.0210402011871338,grad_norm: 0.911476823482441, iteration: 333395
loss: 1.02217435836792,grad_norm: 0.9034078141553556, iteration: 333396
loss: 0.9822140336036682,grad_norm: 0.8427382730986319, iteration: 333397
loss: 1.0223307609558105,grad_norm: 0.9362240785511847, iteration: 333398
loss: 1.0030616521835327,grad_norm: 0.6316045367515724, iteration: 333399
loss: 1.0029582977294922,grad_norm: 0.8491275175754879, iteration: 333400
loss: 1.02603280544281,grad_norm: 0.7735135772855364, iteration: 333401
loss: 1.039691686630249,grad_norm: 0.8240832621491774, iteration: 333402
loss: 1.0051831007003784,grad_norm: 0.9237861478870244, iteration: 333403
loss: 1.0125123262405396,grad_norm: 0.9999999446251007, iteration: 333404
loss: 1.0042355060577393,grad_norm: 0.9999992037223554, iteration: 333405
loss: 0.9614425897598267,grad_norm: 0.9780434829501388, iteration: 333406
loss: 0.9875352382659912,grad_norm: 0.7236659965594509, iteration: 333407
loss: 1.0297688245773315,grad_norm: 0.8702326913406215, iteration: 333408
loss: 0.9938856959342957,grad_norm: 0.8709100870983123, iteration: 333409
loss: 0.9812914133071899,grad_norm: 0.8856502224490116, iteration: 333410
loss: 1.0059856176376343,grad_norm: 0.8643989479925744, iteration: 333411
loss: 1.0734870433807373,grad_norm: 0.9322553668197205, iteration: 333412
loss: 1.018005132675171,grad_norm: 0.9999991827152939, iteration: 333413
loss: 0.9764793515205383,grad_norm: 0.841155828150423, iteration: 333414
loss: 1.0001029968261719,grad_norm: 0.7982649108766414, iteration: 333415
loss: 0.9687268137931824,grad_norm: 0.8467263004523439, iteration: 333416
loss: 1.0693265199661255,grad_norm: 0.999999168161877, iteration: 333417
loss: 1.0319626331329346,grad_norm: 0.7978863674602404, iteration: 333418
loss: 1.0147329568862915,grad_norm: 0.9839359890687572, iteration: 333419
loss: 0.9982866048812866,grad_norm: 0.8956783616975639, iteration: 333420
loss: 1.0127280950546265,grad_norm: 0.9999998114927015, iteration: 333421
loss: 0.9789934158325195,grad_norm: 0.9999989867552619, iteration: 333422
loss: 1.0297969579696655,grad_norm: 0.984885365713191, iteration: 333423
loss: 0.9854071140289307,grad_norm: 0.9999992589984427, iteration: 333424
loss: 1.0126312971115112,grad_norm: 0.7431231876856397, iteration: 333425
loss: 0.9863794445991516,grad_norm: 0.7752674780379303, iteration: 333426
loss: 1.0068480968475342,grad_norm: 0.9276209792486807, iteration: 333427
loss: 0.9642938375473022,grad_norm: 0.8138971848069003, iteration: 333428
loss: 0.9798116087913513,grad_norm: 0.9921923295991283, iteration: 333429
loss: 1.0883922576904297,grad_norm: 0.8703957105687509, iteration: 333430
loss: 0.9898534417152405,grad_norm: 0.8644082831793758, iteration: 333431
loss: 0.9918720126152039,grad_norm: 0.9026452007782437, iteration: 333432
loss: 1.0580421686172485,grad_norm: 0.9999995407054697, iteration: 333433
loss: 0.969693660736084,grad_norm: 0.8010941806349802, iteration: 333434
loss: 0.9986349940299988,grad_norm: 0.9999996696729266, iteration: 333435
loss: 1.0027415752410889,grad_norm: 0.8031513685428608, iteration: 333436
loss: 1.0301767587661743,grad_norm: 0.8175428678778789, iteration: 333437
loss: 1.1170532703399658,grad_norm: 0.9515443397000792, iteration: 333438
loss: 1.047716498374939,grad_norm: 0.9999995041918809, iteration: 333439
loss: 0.977985143661499,grad_norm: 0.9475593646163522, iteration: 333440
loss: 0.9711151123046875,grad_norm: 0.8288876162678134, iteration: 333441
loss: 1.0583560466766357,grad_norm: 0.9999993298536609, iteration: 333442
loss: 0.9714304804801941,grad_norm: 0.8417067672834907, iteration: 333443
loss: 1.0263158082962036,grad_norm: 0.7592414919815558, iteration: 333444
loss: 1.0140748023986816,grad_norm: 0.9512850416448438, iteration: 333445
loss: 1.0080268383026123,grad_norm: 0.9999993288954856, iteration: 333446
loss: 1.0075156688690186,grad_norm: 0.9999992915090005, iteration: 333447
loss: 1.0318794250488281,grad_norm: 0.9209769049765351, iteration: 333448
loss: 1.0035845041275024,grad_norm: 0.9587268409623533, iteration: 333449
loss: 1.0062607526779175,grad_norm: 0.7289891615672849, iteration: 333450
loss: 1.0185850858688354,grad_norm: 0.8040915115221461, iteration: 333451
loss: 1.00709068775177,grad_norm: 0.7870492050102474, iteration: 333452
loss: 1.019996166229248,grad_norm: 0.865006101689191, iteration: 333453
loss: 0.9570755362510681,grad_norm: 0.7835599102490617, iteration: 333454
loss: 0.9982129335403442,grad_norm: 0.7926018341625793, iteration: 333455
loss: 1.0119296312332153,grad_norm: 0.9654615050868662, iteration: 333456
loss: 1.0285992622375488,grad_norm: 0.7790127227439407, iteration: 333457
loss: 0.9695611000061035,grad_norm: 0.9141666404386278, iteration: 333458
loss: 1.0133416652679443,grad_norm: 0.7297491991135701, iteration: 333459
loss: 1.0037057399749756,grad_norm: 0.8206308119888758, iteration: 333460
loss: 0.9644378423690796,grad_norm: 0.8138555698126754, iteration: 333461
loss: 1.0004680156707764,grad_norm: 0.999999457792102, iteration: 333462
loss: 0.982171356678009,grad_norm: 0.9345255368922442, iteration: 333463
loss: 0.9814727306365967,grad_norm: 0.9999990260844988, iteration: 333464
loss: 1.022196888923645,grad_norm: 0.9487458782701854, iteration: 333465
loss: 1.0145843029022217,grad_norm: 0.9999991202321988, iteration: 333466
loss: 0.9951132535934448,grad_norm: 0.9720367811144359, iteration: 333467
loss: 0.9725226759910583,grad_norm: 0.8045212291986015, iteration: 333468
loss: 1.0018470287322998,grad_norm: 0.8024406767527426, iteration: 333469
loss: 0.9913882613182068,grad_norm: 0.8318143736572173, iteration: 333470
loss: 1.0220661163330078,grad_norm: 0.7990439952356428, iteration: 333471
loss: 0.9764845967292786,grad_norm: 0.7966306408840007, iteration: 333472
loss: 1.0183045864105225,grad_norm: 0.9564920983235784, iteration: 333473
loss: 0.9707750678062439,grad_norm: 0.8816638504546185, iteration: 333474
loss: 0.9949225187301636,grad_norm: 0.670417160387955, iteration: 333475
loss: 1.0010454654693604,grad_norm: 0.9226960449739654, iteration: 333476
loss: 0.9953024387359619,grad_norm: 0.7789115138852798, iteration: 333477
loss: 0.9750645756721497,grad_norm: 0.9428974477080148, iteration: 333478
loss: 1.019855260848999,grad_norm: 0.9475103975467956, iteration: 333479
loss: 0.9970642328262329,grad_norm: 0.7670245452708696, iteration: 333480
loss: 0.9956832528114319,grad_norm: 0.7709859828922658, iteration: 333481
loss: 1.0281665325164795,grad_norm: 0.9999990688534762, iteration: 333482
loss: 1.0097401142120361,grad_norm: 0.9349212333037858, iteration: 333483
loss: 1.0410919189453125,grad_norm: 0.9999991828445509, iteration: 333484
loss: 0.9859318733215332,grad_norm: 0.835108035953897, iteration: 333485
loss: 0.9914326667785645,grad_norm: 0.9999990657817848, iteration: 333486
loss: 1.1812310218811035,grad_norm: 0.9649300618332625, iteration: 333487
loss: 0.9941860437393188,grad_norm: 0.8977783644594843, iteration: 333488
loss: 1.003615379333496,grad_norm: 0.8205588630811032, iteration: 333489
loss: 0.9989085793495178,grad_norm: 0.7999909599622843, iteration: 333490
loss: 1.0085844993591309,grad_norm: 0.9744917833390833, iteration: 333491
loss: 1.006730556488037,grad_norm: 0.8958094045884358, iteration: 333492
loss: 1.0172494649887085,grad_norm: 0.7726666613599563, iteration: 333493
loss: 1.011465072631836,grad_norm: 0.6355166393415957, iteration: 333494
loss: 1.0003708600997925,grad_norm: 0.8588347175172834, iteration: 333495
loss: 1.0001804828643799,grad_norm: 0.7913688053940149, iteration: 333496
loss: 0.9819673895835876,grad_norm: 0.8017671937970343, iteration: 333497
loss: 0.9816654324531555,grad_norm: 0.9017528918623378, iteration: 333498
loss: 1.0642986297607422,grad_norm: 0.9999994430260556, iteration: 333499
loss: 0.992813766002655,grad_norm: 0.6950257344488465, iteration: 333500
loss: 1.0108355283737183,grad_norm: 0.8163501497011192, iteration: 333501
loss: 1.0270333290100098,grad_norm: 0.7560104424484837, iteration: 333502
loss: 0.998847246170044,grad_norm: 0.8934011921795727, iteration: 333503
loss: 1.003099799156189,grad_norm: 0.697903443612717, iteration: 333504
loss: 0.9879232048988342,grad_norm: 0.7950456122639504, iteration: 333505
loss: 0.9977700710296631,grad_norm: 0.6797411515813708, iteration: 333506
loss: 0.9879415035247803,grad_norm: 0.8183605960375091, iteration: 333507
loss: 0.9771509170532227,grad_norm: 0.7666185840132331, iteration: 333508
loss: 0.9862063527107239,grad_norm: 0.8855957876797075, iteration: 333509
loss: 0.9905166029930115,grad_norm: 0.861267522038025, iteration: 333510
loss: 0.9923292398452759,grad_norm: 0.9999990313913327, iteration: 333511
loss: 1.0090192556381226,grad_norm: 0.8986116723726294, iteration: 333512
loss: 0.9706701636314392,grad_norm: 0.7714209620044361, iteration: 333513
loss: 1.1508809328079224,grad_norm: 0.9999998816883422, iteration: 333514
loss: 1.005353569984436,grad_norm: 0.8722758813525453, iteration: 333515
loss: 1.0071545839309692,grad_norm: 0.9256527932819543, iteration: 333516
loss: 0.9880700707435608,grad_norm: 0.6920830355292468, iteration: 333517
loss: 1.004190444946289,grad_norm: 0.6710599532304633, iteration: 333518
loss: 0.99373859167099,grad_norm: 0.7433118825854546, iteration: 333519
loss: 1.0071258544921875,grad_norm: 0.8212220907590423, iteration: 333520
loss: 0.999535322189331,grad_norm: 0.8405937230573834, iteration: 333521
loss: 0.9881471991539001,grad_norm: 0.8636036909166116, iteration: 333522
loss: 1.0077085494995117,grad_norm: 0.9278990838740154, iteration: 333523
loss: 1.0103018283843994,grad_norm: 0.9272275957304533, iteration: 333524
loss: 0.9790531992912292,grad_norm: 0.8507619151258378, iteration: 333525
loss: 1.0093640089035034,grad_norm: 0.9358760214392675, iteration: 333526
loss: 1.0797184705734253,grad_norm: 0.904261045418789, iteration: 333527
loss: 0.9993236064910889,grad_norm: 0.7115757336792629, iteration: 333528
loss: 1.0107896327972412,grad_norm: 0.9190604506548051, iteration: 333529
loss: 0.9893822073936462,grad_norm: 0.8373887361978479, iteration: 333530
loss: 0.9683473110198975,grad_norm: 0.9074144301359113, iteration: 333531
loss: 0.9995002150535583,grad_norm: 0.9535808103013255, iteration: 333532
loss: 0.9949336647987366,grad_norm: 0.9999991332193106, iteration: 333533
loss: 1.007790207862854,grad_norm: 0.725158465060225, iteration: 333534
loss: 1.0358529090881348,grad_norm: 0.9999991385902879, iteration: 333535
loss: 1.0021086931228638,grad_norm: 0.7912139665390506, iteration: 333536
loss: 1.037117600440979,grad_norm: 0.8065944764316967, iteration: 333537
loss: 1.0027698278427124,grad_norm: 0.7613970432146717, iteration: 333538
loss: 0.9966162443161011,grad_norm: 0.9396104745210643, iteration: 333539
loss: 0.9563960433006287,grad_norm: 0.9240506412283604, iteration: 333540
loss: 0.9797962307929993,grad_norm: 0.8918671502208042, iteration: 333541
loss: 1.011705994606018,grad_norm: 0.9115783250095582, iteration: 333542
loss: 0.9682779908180237,grad_norm: 0.9999991025579018, iteration: 333543
loss: 0.9767593741416931,grad_norm: 0.8266993239869327, iteration: 333544
loss: 0.9882715344429016,grad_norm: 0.8580629046091929, iteration: 333545
loss: 1.010867714881897,grad_norm: 0.7943187234245361, iteration: 333546
loss: 1.005013346672058,grad_norm: 0.9293845295536484, iteration: 333547
loss: 0.9773601293563843,grad_norm: 0.9787556866443355, iteration: 333548
loss: 0.9941604733467102,grad_norm: 0.9945003223366116, iteration: 333549
loss: 0.9463088512420654,grad_norm: 0.9852581994834321, iteration: 333550
loss: 1.0115419626235962,grad_norm: 0.8932983884740906, iteration: 333551
loss: 0.9678414463996887,grad_norm: 0.9553271326640027, iteration: 333552
loss: 0.9895303249359131,grad_norm: 0.8273295456278384, iteration: 333553
loss: 0.9948047399520874,grad_norm: 0.8934695341723718, iteration: 333554
loss: 1.0263673067092896,grad_norm: 0.999999797801831, iteration: 333555
loss: 0.9983790516853333,grad_norm: 0.9784715000990543, iteration: 333556
loss: 1.04192316532135,grad_norm: 0.999999549293779, iteration: 333557
loss: 0.988466739654541,grad_norm: 0.706713618982632, iteration: 333558
loss: 0.9855452179908752,grad_norm: 0.8635446344367671, iteration: 333559
loss: 0.9648603796958923,grad_norm: 0.8725411691413947, iteration: 333560
loss: 0.9888946413993835,grad_norm: 0.805275838692099, iteration: 333561
loss: 1.0384169816970825,grad_norm: 0.9999999139193082, iteration: 333562
loss: 1.016142725944519,grad_norm: 0.9120601001600358, iteration: 333563
loss: 1.0179136991500854,grad_norm: 0.9195371974900783, iteration: 333564
loss: 1.0341883897781372,grad_norm: 0.9661284836517008, iteration: 333565
loss: 1.0227497816085815,grad_norm: 0.7237895451086289, iteration: 333566
loss: 0.994907021522522,grad_norm: 0.7658176118289504, iteration: 333567
loss: 0.9569717049598694,grad_norm: 0.8110733909515228, iteration: 333568
loss: 1.0166658163070679,grad_norm: 0.9999996333050287, iteration: 333569
loss: 0.994550883769989,grad_norm: 0.7916286492906355, iteration: 333570
loss: 1.0210919380187988,grad_norm: 0.8388124467986386, iteration: 333571
loss: 0.9745275974273682,grad_norm: 0.7572833097905665, iteration: 333572
loss: 0.9770996570587158,grad_norm: 0.9257825696767132, iteration: 333573
loss: 1.0057075023651123,grad_norm: 0.9638145689529283, iteration: 333574
loss: 1.0157372951507568,grad_norm: 0.8918014681898481, iteration: 333575
loss: 0.9739276766777039,grad_norm: 0.9999989005337053, iteration: 333576
loss: 0.9834646582603455,grad_norm: 0.9999993375970819, iteration: 333577
loss: 0.9940506815910339,grad_norm: 0.9226294879047388, iteration: 333578
loss: 0.9646995663642883,grad_norm: 0.9099404020628817, iteration: 333579
loss: 1.0365705490112305,grad_norm: 0.8869609813980428, iteration: 333580
loss: 1.0155278444290161,grad_norm: 0.7412990092348072, iteration: 333581
loss: 1.0138367414474487,grad_norm: 0.8581000214789654, iteration: 333582
loss: 0.9947211742401123,grad_norm: 0.7805545941863229, iteration: 333583
loss: 0.9986053109169006,grad_norm: 0.8549922873173195, iteration: 333584
loss: 0.9864373207092285,grad_norm: 0.9822436827283367, iteration: 333585
loss: 0.9943609833717346,grad_norm: 0.8211561397377707, iteration: 333586
loss: 0.9654495716094971,grad_norm: 0.7094972436073363, iteration: 333587
loss: 1.0014070272445679,grad_norm: 0.765562311167208, iteration: 333588
loss: 1.000894546508789,grad_norm: 0.6647899346365663, iteration: 333589
loss: 1.0296056270599365,grad_norm: 0.698388168445522, iteration: 333590
loss: 0.9877519607543945,grad_norm: 0.8488870335625065, iteration: 333591
loss: 0.9803953766822815,grad_norm: 0.8784194516424084, iteration: 333592
loss: 1.0162432193756104,grad_norm: 0.8557184338621228, iteration: 333593
loss: 0.9690717458724976,grad_norm: 0.8258373110090036, iteration: 333594
loss: 1.0751385688781738,grad_norm: 0.9999991494080749, iteration: 333595
loss: 1.0080785751342773,grad_norm: 0.8483079653616753, iteration: 333596
loss: 0.9772452712059021,grad_norm: 0.8747567324463436, iteration: 333597
loss: 1.0030988454818726,grad_norm: 0.783209908071751, iteration: 333598
loss: 1.0437043905258179,grad_norm: 0.7739709345356697, iteration: 333599
loss: 0.9606490731239319,grad_norm: 0.7420885464164644, iteration: 333600
loss: 1.1141165494918823,grad_norm: 0.9999999919731232, iteration: 333601
loss: 0.9996734857559204,grad_norm: 0.8819349317008095, iteration: 333602
loss: 0.9724541902542114,grad_norm: 0.7661604451418813, iteration: 333603
loss: 1.015872836112976,grad_norm: 0.8757278814304831, iteration: 333604
loss: 0.9673485159873962,grad_norm: 0.8389261236539779, iteration: 333605
loss: 0.980635941028595,grad_norm: 0.9999991484593227, iteration: 333606
loss: 1.0552008152008057,grad_norm: 0.9934070358079772, iteration: 333607
loss: 1.0013257265090942,grad_norm: 0.81845270679698, iteration: 333608
loss: 0.9685352444648743,grad_norm: 0.7614385498523498, iteration: 333609
loss: 0.9894561767578125,grad_norm: 0.8681239752170604, iteration: 333610
loss: 1.0547112226486206,grad_norm: 0.9159283857990441, iteration: 333611
loss: 0.9693506360054016,grad_norm: 0.9062504079386824, iteration: 333612
loss: 0.9541170597076416,grad_norm: 0.794088347183315, iteration: 333613
loss: 0.9796056151390076,grad_norm: 0.9382207753723478, iteration: 333614
loss: 0.9963341355323792,grad_norm: 0.8297876828825015, iteration: 333615
loss: 1.0394753217697144,grad_norm: 0.9193944870166386, iteration: 333616
loss: 0.9857876896858215,grad_norm: 0.7410987246623874, iteration: 333617
loss: 0.9797056317329407,grad_norm: 0.7346447285509478, iteration: 333618
loss: 1.0114891529083252,grad_norm: 0.8506278581778909, iteration: 333619
loss: 0.9689078330993652,grad_norm: 0.9156080073757175, iteration: 333620
loss: 0.9677656292915344,grad_norm: 0.7897907458999587, iteration: 333621
loss: 0.9932851195335388,grad_norm: 0.8428541122123877, iteration: 333622
loss: 0.986372709274292,grad_norm: 0.7740096305783133, iteration: 333623
loss: 1.0281269550323486,grad_norm: 0.8667324820616903, iteration: 333624
loss: 1.0043164491653442,grad_norm: 0.8073212401207038, iteration: 333625
loss: 1.0323485136032104,grad_norm: 0.9131838301009622, iteration: 333626
loss: 1.0008180141448975,grad_norm: 0.71573124271076, iteration: 333627
loss: 0.9706830382347107,grad_norm: 0.7667239078516481, iteration: 333628
loss: 1.0375194549560547,grad_norm: 0.7863102056247365, iteration: 333629
loss: 1.0040422677993774,grad_norm: 0.7307880978728698, iteration: 333630
loss: 0.9935826063156128,grad_norm: 0.8862715715656893, iteration: 333631
loss: 1.016120433807373,grad_norm: 0.7389530068851031, iteration: 333632
loss: 1.0087343454360962,grad_norm: 0.913315666050132, iteration: 333633
loss: 0.9884359240531921,grad_norm: 0.8380354495497765, iteration: 333634
loss: 1.0321192741394043,grad_norm: 0.7055081190899276, iteration: 333635
loss: 1.0476975440979004,grad_norm: 0.9999995918619227, iteration: 333636
loss: 0.979751467704773,grad_norm: 0.9999989723570143, iteration: 333637
loss: 1.0197051763534546,grad_norm: 0.788652329812774, iteration: 333638
loss: 1.0188159942626953,grad_norm: 0.876356380947811, iteration: 333639
loss: 0.9929125905036926,grad_norm: 0.8337152408876818, iteration: 333640
loss: 0.9933939576148987,grad_norm: 0.7687608994801235, iteration: 333641
loss: 1.0095337629318237,grad_norm: 0.9999991915580814, iteration: 333642
loss: 1.0017298460006714,grad_norm: 0.7379567784823311, iteration: 333643
loss: 1.0336179733276367,grad_norm: 0.9999994944040942, iteration: 333644
loss: 1.012015700340271,grad_norm: 0.9592595832915825, iteration: 333645
loss: 1.0052855014801025,grad_norm: 0.9999990855374865, iteration: 333646
loss: 1.000886082649231,grad_norm: 0.953933791636338, iteration: 333647
loss: 1.1389696598052979,grad_norm: 0.9999995964542954, iteration: 333648
loss: 0.9805001020431519,grad_norm: 0.8525710578005077, iteration: 333649
loss: 1.0028374195098877,grad_norm: 0.9073083732593105, iteration: 333650
loss: 1.0947914123535156,grad_norm: 0.9999995673287085, iteration: 333651
loss: 1.0059068202972412,grad_norm: 0.9999991978114122, iteration: 333652
loss: 1.022274136543274,grad_norm: 0.7288636766872912, iteration: 333653
loss: 0.958035409450531,grad_norm: 0.9945034922420437, iteration: 333654
loss: 0.9854583144187927,grad_norm: 0.7559384112752321, iteration: 333655
loss: 1.059080958366394,grad_norm: 0.9999990302559217, iteration: 333656
loss: 1.0570770502090454,grad_norm: 0.8290243993639622, iteration: 333657
loss: 1.028017282485962,grad_norm: 0.8916295742291762, iteration: 333658
loss: 0.979900062084198,grad_norm: 0.7858307511420515, iteration: 333659
loss: 0.9796537160873413,grad_norm: 0.8469840339787547, iteration: 333660
loss: 1.0579451322555542,grad_norm: 0.9041842440032014, iteration: 333661
loss: 1.0000801086425781,grad_norm: 0.8760869857911199, iteration: 333662
loss: 0.9895641803741455,grad_norm: 0.8285863857237684, iteration: 333663
loss: 0.9987645745277405,grad_norm: 0.7451639968239511, iteration: 333664
loss: 1.0118029117584229,grad_norm: 0.6784819389833459, iteration: 333665
loss: 0.9924570918083191,grad_norm: 0.9022644224640907, iteration: 333666
loss: 1.0075663328170776,grad_norm: 0.9818672698573341, iteration: 333667
loss: 1.0299628973007202,grad_norm: 0.884623741162182, iteration: 333668
loss: 1.0121711492538452,grad_norm: 0.8395660501945494, iteration: 333669
loss: 1.0326370000839233,grad_norm: 0.9999991135153461, iteration: 333670
loss: 1.0073944330215454,grad_norm: 0.7255263420824726, iteration: 333671
loss: 0.9894662499427795,grad_norm: 0.77809463048283, iteration: 333672
loss: 1.0061959028244019,grad_norm: 0.9242998770943472, iteration: 333673
loss: 0.9947529435157776,grad_norm: 0.8011293656647938, iteration: 333674
loss: 1.0710865259170532,grad_norm: 0.9999991338777392, iteration: 333675
loss: 1.0014045238494873,grad_norm: 0.9605334611380367, iteration: 333676
loss: 1.003674864768982,grad_norm: 0.8976173690560562, iteration: 333677
loss: 0.995334267616272,grad_norm: 0.8204077424132357, iteration: 333678
loss: 1.0126923322677612,grad_norm: 0.7838332678436332, iteration: 333679
loss: 0.9911574721336365,grad_norm: 0.7448407295439231, iteration: 333680
loss: 0.9929436445236206,grad_norm: 0.8175160651860147, iteration: 333681
loss: 1.017896294593811,grad_norm: 0.7843204827158994, iteration: 333682
loss: 1.0119655132293701,grad_norm: 0.9097217436876189, iteration: 333683
loss: 1.0128192901611328,grad_norm: 0.8244971865295136, iteration: 333684
loss: 1.0051202774047852,grad_norm: 0.9999991957437071, iteration: 333685
loss: 1.03227961063385,grad_norm: 0.974210941374008, iteration: 333686
loss: 0.9962313771247864,grad_norm: 0.7057938651342753, iteration: 333687
loss: 0.9982134103775024,grad_norm: 0.7839601946081087, iteration: 333688
loss: 1.015687108039856,grad_norm: 0.9999990960258677, iteration: 333689
loss: 0.9837437868118286,grad_norm: 0.8029067744739977, iteration: 333690
loss: 1.006353735923767,grad_norm: 0.8253035397470779, iteration: 333691
loss: 0.9778040051460266,grad_norm: 0.9653245987739975, iteration: 333692
loss: 1.0263851881027222,grad_norm: 0.7449446888394883, iteration: 333693
loss: 1.0499391555786133,grad_norm: 0.7515925748789436, iteration: 333694
loss: 1.015104055404663,grad_norm: 0.727287512479004, iteration: 333695
loss: 1.0076267719268799,grad_norm: 0.7829735864628171, iteration: 333696
loss: 0.9784295558929443,grad_norm: 0.7637365611362357, iteration: 333697
loss: 1.0115461349487305,grad_norm: 0.8738744954831377, iteration: 333698
loss: 0.9671432971954346,grad_norm: 0.7396204045713235, iteration: 333699
loss: 0.989190399646759,grad_norm: 0.7958700302027187, iteration: 333700
loss: 0.9803428053855896,grad_norm: 0.72430866018955, iteration: 333701
loss: 1.0184037685394287,grad_norm: 0.7730214419091563, iteration: 333702
loss: 1.0096062421798706,grad_norm: 0.7683163926821784, iteration: 333703
loss: 1.035430669784546,grad_norm: 0.9999997238136511, iteration: 333704
loss: 1.0285027027130127,grad_norm: 0.6819745384404053, iteration: 333705
loss: 1.0036208629608154,grad_norm: 0.9528213352827533, iteration: 333706
loss: 0.9661006927490234,grad_norm: 0.8329097914353283, iteration: 333707
loss: 0.980406641960144,grad_norm: 0.834240500555096, iteration: 333708
loss: 0.997000515460968,grad_norm: 0.9999989443967282, iteration: 333709
loss: 0.9745252132415771,grad_norm: 0.9184473877122605, iteration: 333710
loss: 1.063867449760437,grad_norm: 0.9999993133001638, iteration: 333711
loss: 1.0075963735580444,grad_norm: 0.9999993268388487, iteration: 333712
loss: 0.998058021068573,grad_norm: 0.9999990269505007, iteration: 333713
loss: 0.9968940615653992,grad_norm: 0.7216169413431244, iteration: 333714
loss: 1.0311832427978516,grad_norm: 0.9999993637493472, iteration: 333715
loss: 0.9550540447235107,grad_norm: 0.9290614204686637, iteration: 333716
loss: 1.0636671781539917,grad_norm: 0.9999993348780059, iteration: 333717
loss: 1.0035117864608765,grad_norm: 0.7158964459397791, iteration: 333718
loss: 0.990415632724762,grad_norm: 0.8286067576844836, iteration: 333719
loss: 1.0401309728622437,grad_norm: 0.9999998567534121, iteration: 333720
loss: 1.0819989442825317,grad_norm: 1.0000000042264912, iteration: 333721
loss: 1.0074769258499146,grad_norm: 0.8343834845161152, iteration: 333722
loss: 1.0103703737258911,grad_norm: 0.7716634998563393, iteration: 333723
loss: 0.9896174073219299,grad_norm: 0.9999992517446848, iteration: 333724
loss: 0.9835458993911743,grad_norm: 0.9999990807057227, iteration: 333725
loss: 1.0181721448898315,grad_norm: 0.9440010671553113, iteration: 333726
loss: 1.0146864652633667,grad_norm: 0.999999196296907, iteration: 333727
loss: 0.995342493057251,grad_norm: 0.9521267548297933, iteration: 333728
loss: 0.9943467974662781,grad_norm: 0.7440657201277837, iteration: 333729
loss: 0.984500527381897,grad_norm: 0.9000606239263065, iteration: 333730
loss: 1.0054359436035156,grad_norm: 0.7831112375996775, iteration: 333731
loss: 1.019148349761963,grad_norm: 0.9999995222385398, iteration: 333732
loss: 1.0067646503448486,grad_norm: 0.9999990190588604, iteration: 333733
loss: 0.998064398765564,grad_norm: 0.9648727965444949, iteration: 333734
loss: 0.9945752620697021,grad_norm: 0.7492432313664048, iteration: 333735
loss: 1.007123589515686,grad_norm: 0.8120533973990213, iteration: 333736
loss: 0.9815211892127991,grad_norm: 0.8683120801755339, iteration: 333737
loss: 1.0089354515075684,grad_norm: 0.9875557785991893, iteration: 333738
loss: 0.9645291566848755,grad_norm: 0.7435855588914213, iteration: 333739
loss: 1.0155408382415771,grad_norm: 0.9999993303483866, iteration: 333740
loss: 0.9685960412025452,grad_norm: 0.9181053920234329, iteration: 333741
loss: 1.0455145835876465,grad_norm: 0.9996269320180177, iteration: 333742
loss: 1.0020978450775146,grad_norm: 0.9999999087740657, iteration: 333743
loss: 0.9776366949081421,grad_norm: 0.7788410444938801, iteration: 333744
loss: 0.9793444275856018,grad_norm: 0.9057103432542055, iteration: 333745
loss: 1.020167350769043,grad_norm: 0.8056506695048327, iteration: 333746
loss: 0.9993448853492737,grad_norm: 0.7622226680960883, iteration: 333747
loss: 1.0310657024383545,grad_norm: 0.9999991162739968, iteration: 333748
loss: 0.9716587662696838,grad_norm: 0.9999991168114729, iteration: 333749
loss: 1.0024141073226929,grad_norm: 0.9226617877896688, iteration: 333750
loss: 1.0234487056732178,grad_norm: 0.8755435377670907, iteration: 333751
loss: 1.0034140348434448,grad_norm: 0.7555114473543428, iteration: 333752
loss: 1.0116093158721924,grad_norm: 0.8344401813536905, iteration: 333753
loss: 0.9942281246185303,grad_norm: 0.896942851341967, iteration: 333754
loss: 0.9718952178955078,grad_norm: 0.7261246944527212, iteration: 333755
loss: 0.9863670468330383,grad_norm: 0.7609766499678111, iteration: 333756
loss: 0.9782128930091858,grad_norm: 0.7197421427033917, iteration: 333757
loss: 0.9832155704498291,grad_norm: 0.9999989861807925, iteration: 333758
loss: 0.9819181561470032,grad_norm: 0.8309950358075123, iteration: 333759
loss: 0.9928264617919922,grad_norm: 0.7322280307580645, iteration: 333760
loss: 0.9690256118774414,grad_norm: 0.8645562045784589, iteration: 333761
loss: 1.0046249628067017,grad_norm: 0.8162737886424943, iteration: 333762
loss: 1.0033949613571167,grad_norm: 0.7185341140198429, iteration: 333763
loss: 0.9911810755729675,grad_norm: 0.815403083660156, iteration: 333764
loss: 0.9930123686790466,grad_norm: 0.9559503098220917, iteration: 333765
loss: 1.0010547637939453,grad_norm: 0.8439384215356208, iteration: 333766
loss: 1.0576122999191284,grad_norm: 0.9683599261278784, iteration: 333767
loss: 1.0192756652832031,grad_norm: 0.9057599993553437, iteration: 333768
loss: 0.9650771021842957,grad_norm: 0.9617848410048713, iteration: 333769
loss: 1.0251368284225464,grad_norm: 0.999999278453479, iteration: 333770
loss: 0.9972803592681885,grad_norm: 0.816093445229853, iteration: 333771
loss: 1.0162134170532227,grad_norm: 0.827161972691202, iteration: 333772
loss: 0.9903954863548279,grad_norm: 0.8174246040300347, iteration: 333773
loss: 1.0313823223114014,grad_norm: 0.8260965680614617, iteration: 333774
loss: 1.0045729875564575,grad_norm: 0.792934907469809, iteration: 333775
loss: 0.9696927666664124,grad_norm: 0.9299978274393522, iteration: 333776
loss: 1.040205955505371,grad_norm: 0.8543818854666663, iteration: 333777
loss: 0.9889503717422485,grad_norm: 0.8239111121500926, iteration: 333778
loss: 0.9652329683303833,grad_norm: 0.9292485750640155, iteration: 333779
loss: 0.9905542135238647,grad_norm: 0.9250074664853021, iteration: 333780
loss: 0.9773979783058167,grad_norm: 0.8896437270386798, iteration: 333781
loss: 0.9793869853019714,grad_norm: 0.9999993373709347, iteration: 333782
loss: 0.979557454586029,grad_norm: 0.7875504463706322, iteration: 333783
loss: 1.0063626766204834,grad_norm: 0.8251752225249372, iteration: 333784
loss: 1.0409601926803589,grad_norm: 0.7880162593868647, iteration: 333785
loss: 1.009334683418274,grad_norm: 0.7631172020496647, iteration: 333786
loss: 0.9987331628799438,grad_norm: 0.7651008901130796, iteration: 333787
loss: 0.9953936338424683,grad_norm: 0.9333246836035897, iteration: 333788
loss: 1.00706946849823,grad_norm: 0.9999990938259073, iteration: 333789
loss: 1.0020643472671509,grad_norm: 0.9426899296677282, iteration: 333790
loss: 0.9743145108222961,grad_norm: 0.8320977672570359, iteration: 333791
loss: 1.0210926532745361,grad_norm: 0.9999993315873962, iteration: 333792
loss: 0.9843606948852539,grad_norm: 0.8516577895341907, iteration: 333793
loss: 1.0308589935302734,grad_norm: 0.9811069731872908, iteration: 333794
loss: 0.9671130776405334,grad_norm: 0.8051109833393915, iteration: 333795
loss: 0.9819105863571167,grad_norm: 0.7878503457850575, iteration: 333796
loss: 0.9766790270805359,grad_norm: 0.9401502969781644, iteration: 333797
loss: 0.9954369068145752,grad_norm: 0.9472773773588449, iteration: 333798
loss: 1.0051674842834473,grad_norm: 0.8206374500458241, iteration: 333799
loss: 1.0855448246002197,grad_norm: 0.9999998706510477, iteration: 333800
loss: 0.9564992189407349,grad_norm: 0.8859189718830013, iteration: 333801
loss: 1.0723527669906616,grad_norm: 0.9290425605541783, iteration: 333802
loss: 1.0026841163635254,grad_norm: 0.7989609727934847, iteration: 333803
loss: 1.0001225471496582,grad_norm: 0.8013561516276032, iteration: 333804
loss: 1.0232858657836914,grad_norm: 0.8978692586992891, iteration: 333805
loss: 1.0089514255523682,grad_norm: 0.7161015979807107, iteration: 333806
loss: 0.9358986020088196,grad_norm: 0.8274535444146449, iteration: 333807
loss: 1.0042184591293335,grad_norm: 0.7172040933043928, iteration: 333808
loss: 0.9835830926895142,grad_norm: 0.6768374390984645, iteration: 333809
loss: 0.970424473285675,grad_norm: 0.8189968164981725, iteration: 333810
loss: 0.9987363219261169,grad_norm: 0.9908953566309476, iteration: 333811
loss: 0.985552966594696,grad_norm: 0.6896215664171347, iteration: 333812
loss: 0.9828532934188843,grad_norm: 0.8472178281051932, iteration: 333813
loss: 1.0290732383728027,grad_norm: 0.804888484924363, iteration: 333814
loss: 1.0223908424377441,grad_norm: 0.9999997831943965, iteration: 333815
loss: 1.0039050579071045,grad_norm: 0.9101647621597133, iteration: 333816
loss: 1.0352106094360352,grad_norm: 0.999999135934347, iteration: 333817
loss: 0.9753068685531616,grad_norm: 0.7976757103750347, iteration: 333818
loss: 1.0194010734558105,grad_norm: 0.8388533526969415, iteration: 333819
loss: 1.001033902168274,grad_norm: 0.7053596392768734, iteration: 333820
loss: 0.9919520020484924,grad_norm: 0.9649572114296096, iteration: 333821
loss: 0.9708747863769531,grad_norm: 0.8367065560035759, iteration: 333822
loss: 0.9956004619598389,grad_norm: 0.9626334890340419, iteration: 333823
loss: 1.0136620998382568,grad_norm: 0.9268370021019469, iteration: 333824
loss: 0.9985672235488892,grad_norm: 0.7736174197636589, iteration: 333825
loss: 0.983441948890686,grad_norm: 0.7535601358818638, iteration: 333826
loss: 1.0171812772750854,grad_norm: 0.8005293556195674, iteration: 333827
loss: 1.0110173225402832,grad_norm: 0.8927179126711875, iteration: 333828
loss: 1.0223357677459717,grad_norm: 0.9970930875826287, iteration: 333829
loss: 1.025008201599121,grad_norm: 0.9999992914305565, iteration: 333830
loss: 0.9929559230804443,grad_norm: 0.9999991580305532, iteration: 333831
loss: 0.9754670858383179,grad_norm: 0.7419542050557176, iteration: 333832
loss: 1.0361502170562744,grad_norm: 0.7930040334068335, iteration: 333833
loss: 0.9756240248680115,grad_norm: 0.8668412514722844, iteration: 333834
loss: 0.9819033145904541,grad_norm: 0.999999067647931, iteration: 333835
loss: 0.9931594133377075,grad_norm: 0.7373474883132485, iteration: 333836
loss: 0.9976208209991455,grad_norm: 0.6922294166288048, iteration: 333837
loss: 1.0146496295928955,grad_norm: 0.9124875880204487, iteration: 333838
loss: 0.9795673489570618,grad_norm: 0.918541249256601, iteration: 333839
loss: 0.9958997964859009,grad_norm: 0.7957152652886654, iteration: 333840
loss: 1.0085786581039429,grad_norm: 0.8250158443339297, iteration: 333841
loss: 0.9711722731590271,grad_norm: 0.9999990395388146, iteration: 333842
loss: 1.0147935152053833,grad_norm: 0.8187135136396066, iteration: 333843
loss: 0.9766300916671753,grad_norm: 0.7805221496792019, iteration: 333844
loss: 0.9977568984031677,grad_norm: 0.6280494130417382, iteration: 333845
loss: 1.0022549629211426,grad_norm: 0.8945257234157399, iteration: 333846
loss: 1.0709638595581055,grad_norm: 0.9969626561496583, iteration: 333847
loss: 1.025231957435608,grad_norm: 0.8058230505883446, iteration: 333848
loss: 0.98036128282547,grad_norm: 0.8248073869938505, iteration: 333849
loss: 0.9906715750694275,grad_norm: 0.7846851693810875, iteration: 333850
loss: 1.0810192823410034,grad_norm: 0.8018714811144865, iteration: 333851
loss: 0.9763615131378174,grad_norm: 0.8375949668722968, iteration: 333852
loss: 1.0125521421432495,grad_norm: 0.9999994543896217, iteration: 333853
loss: 0.923583447933197,grad_norm: 0.9999989933724701, iteration: 333854
loss: 0.9636972546577454,grad_norm: 0.9379687649876493, iteration: 333855
loss: 1.0095235109329224,grad_norm: 0.9999991492073275, iteration: 333856
loss: 0.9914310574531555,grad_norm: 0.7978753663623271, iteration: 333857
loss: 1.0027354955673218,grad_norm: 0.7926346883004077, iteration: 333858
loss: 1.03216552734375,grad_norm: 0.7661891429963282, iteration: 333859
loss: 0.9613494873046875,grad_norm: 0.8646161829911003, iteration: 333860
loss: 1.0131384134292603,grad_norm: 0.7851877844335866, iteration: 333861
loss: 1.008122205734253,grad_norm: 0.969078862553894, iteration: 333862
loss: 0.9841150045394897,grad_norm: 0.7734400215545986, iteration: 333863
loss: 1.012058973312378,grad_norm: 0.8256442779228595, iteration: 333864
loss: 1.0012551546096802,grad_norm: 0.8174774577127162, iteration: 333865
loss: 0.9744402766227722,grad_norm: 0.7160204514107029, iteration: 333866
loss: 1.0065799951553345,grad_norm: 0.8310650804323275, iteration: 333867
loss: 0.9961439967155457,grad_norm: 0.8335542480221357, iteration: 333868
loss: 1.0277292728424072,grad_norm: 0.9773459391407215, iteration: 333869
loss: 1.007858157157898,grad_norm: 0.8366786349980346, iteration: 333870
loss: 1.0354034900665283,grad_norm: 0.9999997542202247, iteration: 333871
loss: 0.9933199286460876,grad_norm: 0.8338586616156267, iteration: 333872
loss: 0.9942671656608582,grad_norm: 0.7095516422548351, iteration: 333873
loss: 1.011335015296936,grad_norm: 0.7441236401495772, iteration: 333874
loss: 1.0191563367843628,grad_norm: 0.8431564845318757, iteration: 333875
loss: 1.0025334358215332,grad_norm: 0.7527790753880018, iteration: 333876
loss: 0.9997327923774719,grad_norm: 0.7630810225781512, iteration: 333877
loss: 1.0018097162246704,grad_norm: 0.9420702059552435, iteration: 333878
loss: 0.9834890961647034,grad_norm: 0.7554555974562353, iteration: 333879
loss: 0.9803879261016846,grad_norm: 0.8354532520873871, iteration: 333880
loss: 1.100752592086792,grad_norm: 0.9131690335417559, iteration: 333881
loss: 1.010176181793213,grad_norm: 0.895591147026026, iteration: 333882
loss: 1.0070796012878418,grad_norm: 0.9297670323336124, iteration: 333883
loss: 1.0782678127288818,grad_norm: 0.9999990708945264, iteration: 333884
loss: 1.0215728282928467,grad_norm: 0.9805820032256563, iteration: 333885
loss: 1.0147746801376343,grad_norm: 0.9999991383009791, iteration: 333886
loss: 1.001692771911621,grad_norm: 0.8092123538673593, iteration: 333887
loss: 1.0084463357925415,grad_norm: 0.8644056754487704, iteration: 333888
loss: 1.0523653030395508,grad_norm: 0.99999906018823, iteration: 333889
loss: 1.0214627981185913,grad_norm: 0.8225451306507826, iteration: 333890
loss: 0.993365466594696,grad_norm: 0.7205450513637095, iteration: 333891
loss: 1.0033702850341797,grad_norm: 0.8208227586257227, iteration: 333892
loss: 1.0143139362335205,grad_norm: 0.7795493619584556, iteration: 333893
loss: 0.9925320148468018,grad_norm: 0.8965983361139214, iteration: 333894
loss: 1.0143699645996094,grad_norm: 0.7767695630820696, iteration: 333895
loss: 0.9709802269935608,grad_norm: 0.8133080919949636, iteration: 333896
loss: 1.006056785583496,grad_norm: 0.8095163901852187, iteration: 333897
loss: 0.9955259561538696,grad_norm: 0.805625941301927, iteration: 333898
loss: 1.0061920881271362,grad_norm: 0.9202371170938479, iteration: 333899
loss: 1.0528010129928589,grad_norm: 0.9233147644282832, iteration: 333900
loss: 1.0106559991836548,grad_norm: 0.8949373895485842, iteration: 333901
loss: 1.0093178749084473,grad_norm: 0.9999996353029598, iteration: 333902
loss: 0.9927122592926025,grad_norm: 0.9999989289160718, iteration: 333903
loss: 1.075838565826416,grad_norm: 0.6903454560576698, iteration: 333904
loss: 0.988560676574707,grad_norm: 0.9216909649775674, iteration: 333905
loss: 0.9745084047317505,grad_norm: 0.8813788106073637, iteration: 333906
loss: 1.0888221263885498,grad_norm: 0.8767902571326484, iteration: 333907
loss: 1.073262095451355,grad_norm: 0.9999992727557833, iteration: 333908
loss: 0.9708632230758667,grad_norm: 0.9999989937539614, iteration: 333909
loss: 0.9894436001777649,grad_norm: 0.928544616442303, iteration: 333910
loss: 1.0079227685928345,grad_norm: 0.8266127375669311, iteration: 333911
loss: 1.01611328125,grad_norm: 0.8917041302874943, iteration: 333912
loss: 1.0087206363677979,grad_norm: 0.9806860710636555, iteration: 333913
loss: 0.9663118124008179,grad_norm: 0.8434739288174805, iteration: 333914
loss: 0.9793814420700073,grad_norm: 0.9735991322515255, iteration: 333915
loss: 1.0624539852142334,grad_norm: 0.9999995500438877, iteration: 333916
loss: 1.0037131309509277,grad_norm: 0.7305910687523676, iteration: 333917
loss: 1.0442132949829102,grad_norm: 0.8674036213051473, iteration: 333918
loss: 0.9852229952812195,grad_norm: 0.7743040828261807, iteration: 333919
loss: 0.9673497676849365,grad_norm: 0.7371381470984228, iteration: 333920
loss: 0.9767570495605469,grad_norm: 0.610799098547182, iteration: 333921
loss: 0.9632441401481628,grad_norm: 0.7788455408261385, iteration: 333922
loss: 1.0113602876663208,grad_norm: 0.8585281709073571, iteration: 333923
loss: 1.056483507156372,grad_norm: 0.915552039896463, iteration: 333924
loss: 0.987432599067688,grad_norm: 0.8925781001647635, iteration: 333925
loss: 1.0142474174499512,grad_norm: 0.9999994350794714, iteration: 333926
loss: 1.0771435499191284,grad_norm: 0.9999992574119898, iteration: 333927
loss: 1.0084738731384277,grad_norm: 0.8677891913846443, iteration: 333928
loss: 1.0505050420761108,grad_norm: 0.8127040344852559, iteration: 333929
loss: 0.9604411721229553,grad_norm: 0.8678969443008894, iteration: 333930
loss: 1.0263348817825317,grad_norm: 0.954466363429367, iteration: 333931
loss: 0.9381257891654968,grad_norm: 0.8347535850375858, iteration: 333932
loss: 0.9696709513664246,grad_norm: 0.8174778774883623, iteration: 333933
loss: 1.0209076404571533,grad_norm: 0.9881255927543906, iteration: 333934
loss: 1.0170401334762573,grad_norm: 0.8506687979537888, iteration: 333935
loss: 1.00092351436615,grad_norm: 0.8033702107947086, iteration: 333936
loss: 1.0129108428955078,grad_norm: 0.9999991520010879, iteration: 333937
loss: 1.0056484937667847,grad_norm: 0.8693705505701892, iteration: 333938
loss: 0.9914563894271851,grad_norm: 0.8136993515598445, iteration: 333939
loss: 1.0219805240631104,grad_norm: 0.7812325287958227, iteration: 333940
loss: 1.0128916501998901,grad_norm: 0.7834517316507734, iteration: 333941
loss: 0.9973000288009644,grad_norm: 0.8428328610327538, iteration: 333942
loss: 1.0018901824951172,grad_norm: 0.9999998665230356, iteration: 333943
loss: 0.9814803600311279,grad_norm: 0.999999363435244, iteration: 333944
loss: 0.9779860377311707,grad_norm: 0.7198581292033045, iteration: 333945
loss: 0.9959882497787476,grad_norm: 0.7146175576148878, iteration: 333946
loss: 0.9647105932235718,grad_norm: 0.8847450933739499, iteration: 333947
loss: 1.0036858320236206,grad_norm: 0.7330308256150081, iteration: 333948
loss: 0.9875732064247131,grad_norm: 0.8304962248588837, iteration: 333949
loss: 1.0655957460403442,grad_norm: 0.9999989829098341, iteration: 333950
loss: 1.0669395923614502,grad_norm: 0.9033043571221563, iteration: 333951
loss: 1.068415880203247,grad_norm: 0.929825459591286, iteration: 333952
loss: 1.071664810180664,grad_norm: 0.9999999417255544, iteration: 333953
loss: 1.055754542350769,grad_norm: 0.9402647919897921, iteration: 333954
loss: 1.101684331893921,grad_norm: 0.9999997072387296, iteration: 333955
loss: 1.0467277765274048,grad_norm: 0.9829152689232135, iteration: 333956
loss: 1.0437957048416138,grad_norm: 0.9999995873693783, iteration: 333957
loss: 1.0112518072128296,grad_norm: 0.8076848822023173, iteration: 333958
loss: 1.0098717212677002,grad_norm: 0.826166569749431, iteration: 333959
loss: 1.0326144695281982,grad_norm: 0.8910482226518711, iteration: 333960
loss: 0.984157919883728,grad_norm: 0.9999992020473726, iteration: 333961
loss: 1.0204538106918335,grad_norm: 0.7562834083734306, iteration: 333962
loss: 0.991081953048706,grad_norm: 0.9613310784111051, iteration: 333963
loss: 1.008360505104065,grad_norm: 0.8007198914491874, iteration: 333964
loss: 1.0434727668762207,grad_norm: 0.8477432583603188, iteration: 333965
loss: 1.0311965942382812,grad_norm: 0.8274301902219569, iteration: 333966
loss: 1.020565152168274,grad_norm: 0.894169820164611, iteration: 333967
loss: 1.0094267129898071,grad_norm: 0.7741292656875576, iteration: 333968
loss: 1.0844587087631226,grad_norm: 0.99999995134829, iteration: 333969
loss: 1.0107522010803223,grad_norm: 0.9037916121179239, iteration: 333970
loss: 1.0088735818862915,grad_norm: 0.9999995675293696, iteration: 333971
loss: 0.9941681623458862,grad_norm: 0.9063223732524908, iteration: 333972
loss: 0.9988701939582825,grad_norm: 0.9945015381507523, iteration: 333973
loss: 1.001606822013855,grad_norm: 0.8489532579361917, iteration: 333974
loss: 1.0355032682418823,grad_norm: 0.9198253992731824, iteration: 333975
loss: 1.0287784337997437,grad_norm: 0.8426640086302809, iteration: 333976
loss: 1.0397766828536987,grad_norm: 0.8599478973585606, iteration: 333977
loss: 0.960067093372345,grad_norm: 0.8133084834568383, iteration: 333978
loss: 1.0456242561340332,grad_norm: 0.9999999646698912, iteration: 333979
loss: 1.0265614986419678,grad_norm: 0.9999991615553181, iteration: 333980
loss: 1.0110418796539307,grad_norm: 0.8673681283749161, iteration: 333981
loss: 0.9953517317771912,grad_norm: 0.9266274785587733, iteration: 333982
loss: 1.0006568431854248,grad_norm: 0.7530056639446138, iteration: 333983
loss: 0.986575722694397,grad_norm: 0.7709336097047036, iteration: 333984
loss: 1.0042624473571777,grad_norm: 0.9999991720269971, iteration: 333985
loss: 1.024179458618164,grad_norm: 0.8437631288393874, iteration: 333986
loss: 0.9865857362747192,grad_norm: 0.8039996921485958, iteration: 333987
loss: 0.9867763519287109,grad_norm: 0.9422028953495186, iteration: 333988
loss: 0.9912315011024475,grad_norm: 0.9999996961996626, iteration: 333989
loss: 1.0032289028167725,grad_norm: 0.7789750460746465, iteration: 333990
loss: 1.0206350088119507,grad_norm: 0.9999990576030211, iteration: 333991
loss: 1.0176292657852173,grad_norm: 0.9999991219778862, iteration: 333992
loss: 0.9727873206138611,grad_norm: 0.9573044681345597, iteration: 333993
loss: 1.0128849744796753,grad_norm: 0.9684332887379559, iteration: 333994
loss: 0.9916530847549438,grad_norm: 0.8004522806112426, iteration: 333995
loss: 0.9802712798118591,grad_norm: 0.8610230143637939, iteration: 333996
loss: 0.9893385767936707,grad_norm: 0.7115866003598152, iteration: 333997
loss: 1.0171842575073242,grad_norm: 0.6954656342910385, iteration: 333998
loss: 1.0263800621032715,grad_norm: 0.9635812066557093, iteration: 333999
loss: 0.9652851819992065,grad_norm: 0.7058212670643289, iteration: 334000
loss: 0.9820809364318848,grad_norm: 0.8905011387858736, iteration: 334001
loss: 0.9861716628074646,grad_norm: 0.85322416429417, iteration: 334002
loss: 1.0327848196029663,grad_norm: 0.8203372614350182, iteration: 334003
loss: 1.0027645826339722,grad_norm: 0.7743947366778368, iteration: 334004
loss: 0.9744241237640381,grad_norm: 0.8442458481482528, iteration: 334005
loss: 1.011303424835205,grad_norm: 0.9999997021377298, iteration: 334006
loss: 0.9950152635574341,grad_norm: 0.81419984748808, iteration: 334007
loss: 1.0259299278259277,grad_norm: 0.8061070329555371, iteration: 334008
loss: 0.9911860227584839,grad_norm: 0.8925033239492087, iteration: 334009
loss: 1.0027152299880981,grad_norm: 0.9999994293567044, iteration: 334010
loss: 0.9942703247070312,grad_norm: 0.7424121268153759, iteration: 334011
loss: 0.9736797213554382,grad_norm: 0.7174927981318343, iteration: 334012
loss: 1.0457749366760254,grad_norm: 0.9081282582981224, iteration: 334013
loss: 1.0048593282699585,grad_norm: 0.9704368373503339, iteration: 334014
loss: 0.9998543858528137,grad_norm: 0.8062812127942885, iteration: 334015
loss: 1.051496148109436,grad_norm: 0.8469690908809124, iteration: 334016
loss: 1.0104268789291382,grad_norm: 0.999999842394379, iteration: 334017
loss: 0.9927057027816772,grad_norm: 0.7311401265542024, iteration: 334018
loss: 1.000968337059021,grad_norm: 0.8559373043845957, iteration: 334019
loss: 1.0074790716171265,grad_norm: 0.8995048444297548, iteration: 334020
loss: 1.003074049949646,grad_norm: 0.8453242582563923, iteration: 334021
loss: 0.9696268439292908,grad_norm: 0.7733901844816029, iteration: 334022
loss: 1.0274707078933716,grad_norm: 0.850440355566141, iteration: 334023
loss: 1.036104679107666,grad_norm: 0.7006133553032139, iteration: 334024
loss: 1.0256481170654297,grad_norm: 0.9533705498270911, iteration: 334025
loss: 1.0445317029953003,grad_norm: 0.8497464738789949, iteration: 334026
loss: 0.987066388130188,grad_norm: 0.8213932987547654, iteration: 334027
loss: 0.9922598600387573,grad_norm: 0.8302235315243424, iteration: 334028
loss: 0.9874716401100159,grad_norm: 0.926081973055678, iteration: 334029
loss: 1.0046544075012207,grad_norm: 0.7679396769898301, iteration: 334030
loss: 0.982302725315094,grad_norm: 0.7222493696698623, iteration: 334031
loss: 0.9779229164123535,grad_norm: 0.8469062137505794, iteration: 334032
loss: 0.9894108772277832,grad_norm: 0.9353780220570995, iteration: 334033
loss: 1.0032610893249512,grad_norm: 0.843196226400203, iteration: 334034
loss: 0.9667782783508301,grad_norm: 0.7332895467153328, iteration: 334035
loss: 1.0145998001098633,grad_norm: 0.9902205677532429, iteration: 334036
loss: 0.9674535393714905,grad_norm: 0.88200965672091, iteration: 334037
loss: 1.0089043378829956,grad_norm: 0.9412407253368396, iteration: 334038
loss: 0.9883169531822205,grad_norm: 0.9999991278348015, iteration: 334039
loss: 1.0137425661087036,grad_norm: 0.7265282781109197, iteration: 334040
loss: 0.9962143898010254,grad_norm: 0.6972386503739858, iteration: 334041
loss: 0.9977636933326721,grad_norm: 0.7840411485045695, iteration: 334042
loss: 0.9922410845756531,grad_norm: 0.8653206336808723, iteration: 334043
loss: 1.0199542045593262,grad_norm: 0.8339938973942924, iteration: 334044
loss: 1.0192762613296509,grad_norm: 0.8272782644797428, iteration: 334045
loss: 0.9600628614425659,grad_norm: 0.7929441870270847, iteration: 334046
loss: 1.0158929824829102,grad_norm: 0.9999999050505687, iteration: 334047
loss: 1.0696076154708862,grad_norm: 0.9999995773868483, iteration: 334048
loss: 1.0855319499969482,grad_norm: 0.8993233802205747, iteration: 334049
loss: 0.9765276908874512,grad_norm: 0.8290432150136705, iteration: 334050
loss: 1.0151907205581665,grad_norm: 0.8092460526353451, iteration: 334051
loss: 1.0119390487670898,grad_norm: 0.6639324800791379, iteration: 334052
loss: 1.0113130807876587,grad_norm: 0.946245685262508, iteration: 334053
loss: 0.9397397637367249,grad_norm: 0.9081864788332743, iteration: 334054
loss: 0.999580979347229,grad_norm: 0.8851634319379641, iteration: 334055
loss: 0.9901641607284546,grad_norm: 0.7063608838759433, iteration: 334056
loss: 1.0436742305755615,grad_norm: 0.931591303302888, iteration: 334057
loss: 1.10722815990448,grad_norm: 0.9999996839570671, iteration: 334058
loss: 0.9808604121208191,grad_norm: 0.8461904869412105, iteration: 334059
loss: 0.9960455298423767,grad_norm: 0.7472393733119125, iteration: 334060
loss: 1.0143804550170898,grad_norm: 0.7515131044956919, iteration: 334061
loss: 1.005312442779541,grad_norm: 0.7540426481138452, iteration: 334062
loss: 0.9749813079833984,grad_norm: 0.8343840078619927, iteration: 334063
loss: 0.9554170966148376,grad_norm: 0.9999990605983734, iteration: 334064
loss: 0.9830819368362427,grad_norm: 0.8425258322169794, iteration: 334065
loss: 1.0286743640899658,grad_norm: 0.9978672780551882, iteration: 334066
loss: 1.0497602224349976,grad_norm: 0.9999998994881266, iteration: 334067
loss: 1.0131081342697144,grad_norm: 0.7921687685181887, iteration: 334068
loss: 1.0844736099243164,grad_norm: 0.9999998414366159, iteration: 334069
loss: 1.0219900608062744,grad_norm: 0.9369450361596509, iteration: 334070
loss: 0.9491721987724304,grad_norm: 0.8372642349629958, iteration: 334071
loss: 1.0298545360565186,grad_norm: 0.9421721991382549, iteration: 334072
loss: 0.9731040000915527,grad_norm: 0.9789034513567828, iteration: 334073
loss: 0.9904150366783142,grad_norm: 0.7180380498146545, iteration: 334074
loss: 0.9645694494247437,grad_norm: 0.9999999916076503, iteration: 334075
loss: 0.9825713038444519,grad_norm: 0.7146527783875993, iteration: 334076
loss: 1.0107145309448242,grad_norm: 0.8401359347966512, iteration: 334077
loss: 1.0336612462997437,grad_norm: 0.729731481083303, iteration: 334078
loss: 1.0218578577041626,grad_norm: 0.9139711691133029, iteration: 334079
loss: 1.0155688524246216,grad_norm: 0.8609890006908455, iteration: 334080
loss: 1.0129036903381348,grad_norm: 0.7711101825020785, iteration: 334081
loss: 1.0217543840408325,grad_norm: 0.8896700662981493, iteration: 334082
loss: 0.9490396976470947,grad_norm: 0.8341861511948945, iteration: 334083
loss: 0.9843021035194397,grad_norm: 0.8420952087248764, iteration: 334084
loss: 0.9713132381439209,grad_norm: 0.9346694795018478, iteration: 334085
loss: 1.0396968126296997,grad_norm: 0.9535022122608818, iteration: 334086
loss: 0.9924816489219666,grad_norm: 0.7704846308639248, iteration: 334087
loss: 1.048014760017395,grad_norm: 0.9999998455858161, iteration: 334088
loss: 1.0117521286010742,grad_norm: 0.8826444664381401, iteration: 334089
loss: 0.9475598335266113,grad_norm: 0.9265170257985722, iteration: 334090
loss: 1.0089067220687866,grad_norm: 0.9999991983770242, iteration: 334091
loss: 0.9694832563400269,grad_norm: 0.9999992952446503, iteration: 334092
loss: 1.0473167896270752,grad_norm: 0.9344329543028668, iteration: 334093
loss: 1.0147639513015747,grad_norm: 0.8243578555616198, iteration: 334094
loss: 1.0110275745391846,grad_norm: 0.6979742711141541, iteration: 334095
loss: 0.9891615509986877,grad_norm: 0.9977238562017309, iteration: 334096
loss: 1.0517704486846924,grad_norm: 0.9999991180088262, iteration: 334097
loss: 1.009867787361145,grad_norm: 0.8395406086607785, iteration: 334098
loss: 1.0268956422805786,grad_norm: 0.999999240232625, iteration: 334099
loss: 1.0442982912063599,grad_norm: 0.8011120362713532, iteration: 334100
loss: 0.9820280075073242,grad_norm: 0.9999991394077405, iteration: 334101
loss: 1.013131856918335,grad_norm: 0.7891108152318359, iteration: 334102
loss: 0.9971979260444641,grad_norm: 0.9512350345707365, iteration: 334103
loss: 0.9792947769165039,grad_norm: 0.9664503302990125, iteration: 334104
loss: 1.0588902235031128,grad_norm: 0.9999992431123427, iteration: 334105
loss: 1.0086538791656494,grad_norm: 0.7757829849950801, iteration: 334106
loss: 0.999066948890686,grad_norm: 0.7959031629551585, iteration: 334107
loss: 1.0698590278625488,grad_norm: 0.9450426179030293, iteration: 334108
loss: 1.0718055963516235,grad_norm: 0.9395562889131681, iteration: 334109
loss: 1.1193420886993408,grad_norm: 0.9999997877626073, iteration: 334110
loss: 1.0003830194473267,grad_norm: 0.9377669031228706, iteration: 334111
loss: 0.9500285387039185,grad_norm: 0.8681540651675729, iteration: 334112
loss: 0.9957571625709534,grad_norm: 0.7910290523530396, iteration: 334113
loss: 1.0276212692260742,grad_norm: 0.9338187061558209, iteration: 334114
loss: 0.9832976460456848,grad_norm: 0.7174958881419621, iteration: 334115
loss: 1.0266042947769165,grad_norm: 0.9999990846639131, iteration: 334116
loss: 1.002415657043457,grad_norm: 0.9999991453292794, iteration: 334117
loss: 0.9784064888954163,grad_norm: 0.9570624299265333, iteration: 334118
loss: 1.0448464155197144,grad_norm: 0.9999994096621961, iteration: 334119
loss: 1.0089071989059448,grad_norm: 0.8311238351158214, iteration: 334120
loss: 1.0123094320297241,grad_norm: 0.8274383032799374, iteration: 334121
loss: 0.9921209216117859,grad_norm: 0.8299705135593648, iteration: 334122
loss: 1.0051283836364746,grad_norm: 0.7720912413995819, iteration: 334123
loss: 0.9873682856559753,grad_norm: 0.8951946334461497, iteration: 334124
loss: 0.9754799604415894,grad_norm: 0.6939635359421654, iteration: 334125
loss: 0.9877602458000183,grad_norm: 0.7302136003744164, iteration: 334126
loss: 1.0338106155395508,grad_norm: 0.9848921357138316, iteration: 334127
loss: 0.9671979546546936,grad_norm: 0.8146459196716277, iteration: 334128
loss: 1.0554121732711792,grad_norm: 0.9999996302666042, iteration: 334129
loss: 0.9772164821624756,grad_norm: 0.9201320243022993, iteration: 334130
loss: 1.0074185132980347,grad_norm: 0.8718261832378879, iteration: 334131
loss: 1.0275639295578003,grad_norm: 0.7619262694907011, iteration: 334132
loss: 0.9831118583679199,grad_norm: 0.835444041244101, iteration: 334133
loss: 1.0123834609985352,grad_norm: 0.9784319917742417, iteration: 334134
loss: 1.0034335851669312,grad_norm: 0.8444104334223739, iteration: 334135
loss: 1.1085224151611328,grad_norm: 0.9999999293598126, iteration: 334136
loss: 0.9856058955192566,grad_norm: 0.9567807903006503, iteration: 334137
loss: 0.9718527793884277,grad_norm: 0.9999992258617881, iteration: 334138
loss: 1.0117453336715698,grad_norm: 0.7674302975031473, iteration: 334139
loss: 1.010034203529358,grad_norm: 0.834050313298918, iteration: 334140
loss: 1.0033093690872192,grad_norm: 0.8736189326348013, iteration: 334141
loss: 1.0062766075134277,grad_norm: 0.9999989844458278, iteration: 334142
loss: 0.9730470776557922,grad_norm: 0.8238168631137679, iteration: 334143
loss: 0.9958683848381042,grad_norm: 0.9068420518830057, iteration: 334144
loss: 0.9847913384437561,grad_norm: 0.8747165187878184, iteration: 334145
loss: 0.9759591817855835,grad_norm: 0.9046809524352571, iteration: 334146
loss: 0.998431384563446,grad_norm: 0.9999992301531833, iteration: 334147
loss: 1.0076841115951538,grad_norm: 0.9999997786738658, iteration: 334148
loss: 1.0002164840698242,grad_norm: 0.8773227303975096, iteration: 334149
loss: 1.025513768196106,grad_norm: 0.8525534739375534, iteration: 334150
loss: 1.0470242500305176,grad_norm: 0.8576753604386789, iteration: 334151
loss: 1.0354076623916626,grad_norm: 0.9999991263442334, iteration: 334152
loss: 1.0137885808944702,grad_norm: 0.9999990547835106, iteration: 334153
loss: 1.0225906372070312,grad_norm: 0.9999996509939297, iteration: 334154
loss: 0.9751297831535339,grad_norm: 0.9999991822617035, iteration: 334155
loss: 0.9671319723129272,grad_norm: 0.7927801086024027, iteration: 334156
loss: 0.9646636843681335,grad_norm: 0.7877670716452762, iteration: 334157
loss: 1.0339264869689941,grad_norm: 0.7796758297433755, iteration: 334158
loss: 1.035309076309204,grad_norm: 0.9999994855025613, iteration: 334159
loss: 0.9995907545089722,grad_norm: 0.7873991405021658, iteration: 334160
loss: 1.0230451822280884,grad_norm: 0.9999998907225374, iteration: 334161
loss: 0.9965043663978577,grad_norm: 0.9142049347110268, iteration: 334162
loss: 0.9850388169288635,grad_norm: 0.8889254403274535, iteration: 334163
loss: 1.042855978012085,grad_norm: 0.9999993032986656, iteration: 334164
loss: 0.9944431781768799,grad_norm: 0.6203104109123999, iteration: 334165
loss: 1.0438892841339111,grad_norm: 0.8483308258885004, iteration: 334166
loss: 0.9990481734275818,grad_norm: 0.999999129584027, iteration: 334167
loss: 1.0089465379714966,grad_norm: 0.9608254706169055, iteration: 334168
loss: 0.9854909181594849,grad_norm: 0.9999989621521915, iteration: 334169
loss: 1.041913390159607,grad_norm: 0.9495073257721065, iteration: 334170
loss: 0.9787713289260864,grad_norm: 0.7564434922820149, iteration: 334171
loss: 1.054337978363037,grad_norm: 0.7087139397252809, iteration: 334172
loss: 0.9906529188156128,grad_norm: 0.8344959224692842, iteration: 334173
loss: 1.0421086549758911,grad_norm: 0.9999990796507258, iteration: 334174
loss: 0.9920473694801331,grad_norm: 0.7126542954238579, iteration: 334175
loss: 0.9943089485168457,grad_norm: 0.7683614676757972, iteration: 334176
loss: 0.9891960024833679,grad_norm: 0.9999998810235833, iteration: 334177
loss: 0.9809376001358032,grad_norm: 0.9999994228343765, iteration: 334178
loss: 1.004288673400879,grad_norm: 0.8438597294737018, iteration: 334179
loss: 1.0363720655441284,grad_norm: 0.8597772254876407, iteration: 334180
loss: 0.9951907396316528,grad_norm: 0.8475763074125011, iteration: 334181
loss: 1.0360077619552612,grad_norm: 0.999999101734447, iteration: 334182
loss: 1.036543846130371,grad_norm: 0.9999998402247344, iteration: 334183
loss: 1.0139174461364746,grad_norm: 0.8121757832187311, iteration: 334184
loss: 1.00603449344635,grad_norm: 0.7226391556935238, iteration: 334185
loss: 1.0178937911987305,grad_norm: 0.9321526754410516, iteration: 334186
loss: 0.9873196482658386,grad_norm: 0.9999990621750625, iteration: 334187
loss: 0.9619776010513306,grad_norm: 0.9927440367537271, iteration: 334188
loss: 1.0215829610824585,grad_norm: 0.8863814890372895, iteration: 334189
loss: 1.0038386583328247,grad_norm: 0.8199020273546171, iteration: 334190
loss: 0.9933226108551025,grad_norm: 0.960613404839881, iteration: 334191
loss: 0.9919406771659851,grad_norm: 0.8045235860738589, iteration: 334192
loss: 1.0149363279342651,grad_norm: 0.9187872762458794, iteration: 334193
loss: 1.0638315677642822,grad_norm: 0.9999989932724492, iteration: 334194
loss: 1.0083197355270386,grad_norm: 0.9999994511209052, iteration: 334195
loss: 1.0153472423553467,grad_norm: 0.795772568401701, iteration: 334196
loss: 1.0129716396331787,grad_norm: 0.9432666064845732, iteration: 334197
loss: 1.019882082939148,grad_norm: 0.9999996634037073, iteration: 334198
loss: 1.0815995931625366,grad_norm: 0.9999991995238255, iteration: 334199
loss: 1.0511913299560547,grad_norm: 0.7628177375364081, iteration: 334200
loss: 1.0112649202346802,grad_norm: 0.9005472205377226, iteration: 334201
loss: 1.119761347770691,grad_norm: 0.9428286258244551, iteration: 334202
loss: 1.0405281782150269,grad_norm: 0.6995282599105713, iteration: 334203
loss: 1.0022794008255005,grad_norm: 0.8762218751002904, iteration: 334204
loss: 1.0228720903396606,grad_norm: 0.9999997019004959, iteration: 334205
loss: 1.0040793418884277,grad_norm: 0.7554999286226546, iteration: 334206
loss: 0.9948480725288391,grad_norm: 0.9260027483507404, iteration: 334207
loss: 0.9887932538986206,grad_norm: 0.9999995667885637, iteration: 334208
loss: 0.9832057356834412,grad_norm: 0.8850564530579207, iteration: 334209
loss: 0.9970409274101257,grad_norm: 0.8314727630453053, iteration: 334210
loss: 1.0014064311981201,grad_norm: 0.9004003406426828, iteration: 334211
loss: 1.0084365606307983,grad_norm: 0.9389162197661602, iteration: 334212
loss: 1.0099135637283325,grad_norm: 0.9999998336705238, iteration: 334213
loss: 1.0538712739944458,grad_norm: 0.9999995311075949, iteration: 334214
loss: 1.0148382186889648,grad_norm: 0.898197310385198, iteration: 334215
loss: 1.0286900997161865,grad_norm: 0.7056975138458, iteration: 334216
loss: 0.9652368426322937,grad_norm: 0.99999913150692, iteration: 334217
loss: 1.0271719694137573,grad_norm: 0.8352905318374255, iteration: 334218
loss: 1.0181059837341309,grad_norm: 0.8311902280332015, iteration: 334219
loss: 1.0204538106918335,grad_norm: 0.7466173357949125, iteration: 334220
loss: 1.0734648704528809,grad_norm: 0.783729409752223, iteration: 334221
loss: 0.9767975807189941,grad_norm: 0.9043069652923804, iteration: 334222
loss: 1.0056105852127075,grad_norm: 0.8035110983175608, iteration: 334223
loss: 1.0237723588943481,grad_norm: 0.7944025543580253, iteration: 334224
loss: 1.0193476676940918,grad_norm: 0.7724902504156428, iteration: 334225
loss: 0.970641016960144,grad_norm: 0.9241907065732197, iteration: 334226
loss: 0.9909899830818176,grad_norm: 0.8033772470715215, iteration: 334227
loss: 1.0109001398086548,grad_norm: 0.9999992040847494, iteration: 334228
loss: 1.0200473070144653,grad_norm: 0.9999997648910387, iteration: 334229
loss: 0.9734275341033936,grad_norm: 0.9110866179241256, iteration: 334230
loss: 1.017330288887024,grad_norm: 0.827088097054635, iteration: 334231
loss: 0.984403133392334,grad_norm: 0.6611902584245563, iteration: 334232
loss: 1.0075922012329102,grad_norm: 0.77532351468848, iteration: 334233
loss: 1.0028901100158691,grad_norm: 0.7404997166916492, iteration: 334234
loss: 0.9889980554580688,grad_norm: 0.8676765062496637, iteration: 334235
loss: 0.9796211123466492,grad_norm: 0.8358254058163467, iteration: 334236
loss: 0.9935157299041748,grad_norm: 0.803415710056925, iteration: 334237
loss: 1.0129563808441162,grad_norm: 0.87229782342389, iteration: 334238
loss: 1.0117337703704834,grad_norm: 0.9010148456217997, iteration: 334239
loss: 0.9997972846031189,grad_norm: 0.9310493660445879, iteration: 334240
loss: 1.0180883407592773,grad_norm: 0.8551526591551238, iteration: 334241
loss: 1.0083932876586914,grad_norm: 0.8561642365052152, iteration: 334242
loss: 1.0264970064163208,grad_norm: 0.9335835331631468, iteration: 334243
loss: 0.9793241620063782,grad_norm: 0.9999991221270433, iteration: 334244
loss: 1.0208089351654053,grad_norm: 1.0000000733829522, iteration: 334245
loss: 0.9884868264198303,grad_norm: 0.7788537554986429, iteration: 334246
loss: 1.0030992031097412,grad_norm: 0.9354546673389703, iteration: 334247
loss: 1.029836654663086,grad_norm: 0.8566699689670977, iteration: 334248
loss: 0.956214189529419,grad_norm: 0.8432948922395517, iteration: 334249
loss: 1.0029926300048828,grad_norm: 0.9133821970157482, iteration: 334250
loss: 1.0039870738983154,grad_norm: 0.7801293928178161, iteration: 334251
loss: 1.0182863473892212,grad_norm: 0.8795180561219301, iteration: 334252
loss: 0.9648464322090149,grad_norm: 0.9450114467912369, iteration: 334253
loss: 1.019814133644104,grad_norm: 0.9178575615805051, iteration: 334254
loss: 1.0228238105773926,grad_norm: 0.7785890357929651, iteration: 334255
loss: 0.996200680732727,grad_norm: 0.9703915914316881, iteration: 334256
loss: 0.9750876426696777,grad_norm: 0.7432730406034954, iteration: 334257
loss: 1.0077953338623047,grad_norm: 0.8285803173206845, iteration: 334258
loss: 1.004509687423706,grad_norm: 0.8800313969424055, iteration: 334259
loss: 0.9790351986885071,grad_norm: 0.9718972868829278, iteration: 334260
loss: 0.9951992034912109,grad_norm: 0.6800741814838462, iteration: 334261
loss: 1.042352318763733,grad_norm: 0.8640114587564636, iteration: 334262
loss: 0.9888229966163635,grad_norm: 0.8300751534793127, iteration: 334263
loss: 1.0225735902786255,grad_norm: 0.9910168974015098, iteration: 334264
loss: 1.0173872709274292,grad_norm: 0.8237226181177646, iteration: 334265
loss: 0.9762486815452576,grad_norm: 0.7503868301819508, iteration: 334266
loss: 0.9674521088600159,grad_norm: 0.9999999733467664, iteration: 334267
loss: 1.0195986032485962,grad_norm: 0.999999034394454, iteration: 334268
loss: 0.9894640445709229,grad_norm: 0.9246489058048233, iteration: 334269
loss: 0.9977027177810669,grad_norm: 0.9228505960234157, iteration: 334270
loss: 0.9708727598190308,grad_norm: 0.8476050664573115, iteration: 334271
loss: 0.9808075428009033,grad_norm: 0.8305702760258394, iteration: 334272
loss: 0.981939435005188,grad_norm: 0.8459749275781003, iteration: 334273
loss: 1.0077701807022095,grad_norm: 0.8019269922473227, iteration: 334274
loss: 0.9888846278190613,grad_norm: 0.7379907195257394, iteration: 334275
loss: 1.0478068590164185,grad_norm: 0.8015940331758712, iteration: 334276
loss: 0.9692572951316833,grad_norm: 0.8216763478957748, iteration: 334277
loss: 0.9715756177902222,grad_norm: 0.7909274246654313, iteration: 334278
loss: 0.9885879158973694,grad_norm: 0.9146466664943973, iteration: 334279
loss: 1.0242117643356323,grad_norm: 0.7324324642508352, iteration: 334280
loss: 1.080233097076416,grad_norm: 0.9909910316122083, iteration: 334281
loss: 0.9992210865020752,grad_norm: 0.8735843989150263, iteration: 334282
loss: 0.96860671043396,grad_norm: 0.8225782896012804, iteration: 334283
loss: 1.0753077268600464,grad_norm: 0.818725607001389, iteration: 334284
loss: 1.0764881372451782,grad_norm: 0.9999993334696737, iteration: 334285
loss: 0.9834476709365845,grad_norm: 0.9999992158841509, iteration: 334286
loss: 1.024870753288269,grad_norm: 0.9999993533983157, iteration: 334287
loss: 0.998586118221283,grad_norm: 0.9884420922564495, iteration: 334288
loss: 0.9849132895469666,grad_norm: 0.8078786793644702, iteration: 334289
loss: 0.9990714192390442,grad_norm: 0.8043935557305023, iteration: 334290
loss: 0.9998088479042053,grad_norm: 0.9999996729054714, iteration: 334291
loss: 1.0003471374511719,grad_norm: 0.9999990176661309, iteration: 334292
loss: 1.0325310230255127,grad_norm: 0.9181355366815533, iteration: 334293
loss: 1.0132344961166382,grad_norm: 0.832778036936505, iteration: 334294
loss: 1.0942497253417969,grad_norm: 0.9999991110914584, iteration: 334295
loss: 1.0096001625061035,grad_norm: 0.9409192611796439, iteration: 334296
loss: 1.237579584121704,grad_norm: 0.9999990128687088, iteration: 334297
loss: 1.0030128955841064,grad_norm: 0.7192899210621505, iteration: 334298
loss: 1.0690921545028687,grad_norm: 0.9999998988169638, iteration: 334299
loss: 0.9544946551322937,grad_norm: 0.8360930190217658, iteration: 334300
loss: 0.9883074164390564,grad_norm: 0.7792483108839056, iteration: 334301
loss: 0.9978758096694946,grad_norm: 0.7848058340828725, iteration: 334302
loss: 0.9722687602043152,grad_norm: 0.7773403760541779, iteration: 334303
loss: 0.96861732006073,grad_norm: 0.7396391534399696, iteration: 334304
loss: 1.013520359992981,grad_norm: 0.9371883829944204, iteration: 334305
loss: 0.9979410171508789,grad_norm: 0.9406195129800553, iteration: 334306
loss: 0.9643932580947876,grad_norm: 0.7794944863749248, iteration: 334307
loss: 0.9873278141021729,grad_norm: 0.8973575038350423, iteration: 334308
loss: 1.0043439865112305,grad_norm: 0.9841150116594702, iteration: 334309
loss: 1.0760027170181274,grad_norm: 0.9992104935402816, iteration: 334310
loss: 1.031288743019104,grad_norm: 0.7555829067359938, iteration: 334311
loss: 0.99019855260849,grad_norm: 0.8634043007615672, iteration: 334312
loss: 1.009605050086975,grad_norm: 0.9999990714317959, iteration: 334313
loss: 1.0369255542755127,grad_norm: 0.9121564633336353, iteration: 334314
loss: 0.995524525642395,grad_norm: 0.8435747054404303, iteration: 334315
loss: 0.9842106103897095,grad_norm: 0.8513690777467787, iteration: 334316
loss: 1.0061225891113281,grad_norm: 0.8408684866937184, iteration: 334317
loss: 1.000734567642212,grad_norm: 0.8104711275217773, iteration: 334318
loss: 0.9872068166732788,grad_norm: 0.8204581132119183, iteration: 334319
loss: 1.0110880136489868,grad_norm: 0.7015362468402025, iteration: 334320
loss: 1.011946201324463,grad_norm: 0.7012140723984632, iteration: 334321
loss: 1.0666544437408447,grad_norm: 0.9999997181874096, iteration: 334322
loss: 1.0107301473617554,grad_norm: 0.7251198710211071, iteration: 334323
loss: 0.9973509311676025,grad_norm: 0.7820937870536542, iteration: 334324
loss: 0.9911118149757385,grad_norm: 0.8712359725404275, iteration: 334325
loss: 0.9571748375892639,grad_norm: 0.6538166640406973, iteration: 334326
loss: 0.9579046964645386,grad_norm: 0.9427699041710681, iteration: 334327
loss: 0.996964693069458,grad_norm: 0.6715322123434747, iteration: 334328
loss: 1.0201480388641357,grad_norm: 0.8431203793027944, iteration: 334329
loss: 0.9987502694129944,grad_norm: 0.8032700519029055, iteration: 334330
loss: 1.0106759071350098,grad_norm: 0.8450588562503708, iteration: 334331
loss: 0.9961197972297668,grad_norm: 0.8017126977166634, iteration: 334332
loss: 1.0026148557662964,grad_norm: 0.775343714994955, iteration: 334333
loss: 0.993371844291687,grad_norm: 0.9833126204577796, iteration: 334334
loss: 1.0123404264450073,grad_norm: 0.8497596216803959, iteration: 334335
loss: 1.0040167570114136,grad_norm: 0.6765931108505832, iteration: 334336
loss: 0.9973771572113037,grad_norm: 0.9496574513618271, iteration: 334337
loss: 1.0031607151031494,grad_norm: 0.7075010277032707, iteration: 334338
loss: 1.0038769245147705,grad_norm: 0.8047026379336617, iteration: 334339
loss: 0.9865342974662781,grad_norm: 0.7833895122502256, iteration: 334340
loss: 1.0008841753005981,grad_norm: 0.9999991256049245, iteration: 334341
loss: 0.99330735206604,grad_norm: 0.8783998244645627, iteration: 334342
loss: 0.9749805927276611,grad_norm: 0.6978127468545008, iteration: 334343
loss: 0.9824403524398804,grad_norm: 0.9189127077448914, iteration: 334344
loss: 1.0387643575668335,grad_norm: 0.8170571615165578, iteration: 334345
loss: 1.0079978704452515,grad_norm: 0.8887979868856679, iteration: 334346
loss: 0.9992619752883911,grad_norm: 0.7999223045994549, iteration: 334347
loss: 1.1328731775283813,grad_norm: 0.9999992915962019, iteration: 334348
loss: 0.9788155555725098,grad_norm: 0.8497719019578022, iteration: 334349
loss: 1.0162981748580933,grad_norm: 0.889354254356109, iteration: 334350
loss: 0.9794944524765015,grad_norm: 0.8940920639740879, iteration: 334351
loss: 1.0918580293655396,grad_norm: 0.9999997413195023, iteration: 334352
loss: 1.0162830352783203,grad_norm: 0.9437715347701411, iteration: 334353
loss: 1.011745810508728,grad_norm: 0.8675500442319769, iteration: 334354
loss: 1.0445224046707153,grad_norm: 0.9999992715829576, iteration: 334355
loss: 0.9865270853042603,grad_norm: 0.74888237090855, iteration: 334356
loss: 1.0404969453811646,grad_norm: 0.9999989515435241, iteration: 334357
loss: 0.9763727188110352,grad_norm: 0.7410206282299181, iteration: 334358
loss: 1.0438588857650757,grad_norm: 0.8605321423395799, iteration: 334359
loss: 1.0124210119247437,grad_norm: 0.7069048903790041, iteration: 334360
loss: 1.0197962522506714,grad_norm: 0.9913307848062167, iteration: 334361
loss: 1.0697591304779053,grad_norm: 0.8292025244322693, iteration: 334362
loss: 0.9742725491523743,grad_norm: 0.927499134761943, iteration: 334363
loss: 0.9779669642448425,grad_norm: 0.6994317072183307, iteration: 334364
loss: 0.9762061834335327,grad_norm: 0.8284791865457244, iteration: 334365
loss: 1.0176348686218262,grad_norm: 0.8616690740669911, iteration: 334366
loss: 1.0225623846054077,grad_norm: 0.9402575184585064, iteration: 334367
loss: 0.9824263453483582,grad_norm: 0.8514398515765459, iteration: 334368
loss: 1.018127202987671,grad_norm: 0.9999995246352327, iteration: 334369
loss: 0.9857584834098816,grad_norm: 0.7203974033633948, iteration: 334370
loss: 0.9792880415916443,grad_norm: 0.8148408984022177, iteration: 334371
loss: 1.0668989419937134,grad_norm: 0.9999991435687786, iteration: 334372
loss: 1.0277754068374634,grad_norm: 0.9470193146540821, iteration: 334373
loss: 0.9772458076477051,grad_norm: 0.9999991559531344, iteration: 334374
loss: 1.0041804313659668,grad_norm: 0.8057064840097945, iteration: 334375
loss: 0.9970000386238098,grad_norm: 0.9068803220394738, iteration: 334376
loss: 0.9704207181930542,grad_norm: 0.9999989889675218, iteration: 334377
loss: 0.9857050180435181,grad_norm: 0.786627291331168, iteration: 334378
loss: 1.0000511407852173,grad_norm: 0.9999996108905722, iteration: 334379
loss: 0.972368597984314,grad_norm: 0.6866535393279267, iteration: 334380
loss: 1.0190476179122925,grad_norm: 0.8615582739182165, iteration: 334381
loss: 0.9744179844856262,grad_norm: 0.8895354156026071, iteration: 334382
loss: 0.9635131359100342,grad_norm: 0.9604867600766809, iteration: 334383
loss: 0.9895079135894775,grad_norm: 0.8952277274923865, iteration: 334384
loss: 1.0020318031311035,grad_norm: 0.7904357599874396, iteration: 334385
loss: 1.1005767583847046,grad_norm: 0.8151438231102823, iteration: 334386
loss: 1.0948251485824585,grad_norm: 0.9999998451221515, iteration: 334387
loss: 1.0347837209701538,grad_norm: 0.6798060315212915, iteration: 334388
loss: 1.0300105810165405,grad_norm: 0.7375669674526213, iteration: 334389
loss: 1.009179711341858,grad_norm: 0.9999991119592297, iteration: 334390
loss: 0.9945873022079468,grad_norm: 0.7439766049171596, iteration: 334391
loss: 0.9803795218467712,grad_norm: 0.9110601739310569, iteration: 334392
loss: 0.9933319091796875,grad_norm: 0.8510314638381947, iteration: 334393
loss: 1.051700234413147,grad_norm: 0.9459961506403481, iteration: 334394
loss: 1.0205104351043701,grad_norm: 0.900237176267486, iteration: 334395
loss: 0.9928966164588928,grad_norm: 0.8109406558691733, iteration: 334396
loss: 1.0127989053726196,grad_norm: 0.9999995719602478, iteration: 334397
loss: 1.0472815036773682,grad_norm: 0.9999993256397848, iteration: 334398
loss: 0.9957224726676941,grad_norm: 0.7141811426159772, iteration: 334399
loss: 1.0184123516082764,grad_norm: 0.7465372932064136, iteration: 334400
loss: 1.0269834995269775,grad_norm: 0.9356399649993786, iteration: 334401
loss: 1.0013445615768433,grad_norm: 0.8549083125538482, iteration: 334402
loss: 1.0070315599441528,grad_norm: 0.9246744704451332, iteration: 334403
loss: 1.0078297853469849,grad_norm: 0.9999992394168503, iteration: 334404
loss: 1.0639337301254272,grad_norm: 0.827603780857997, iteration: 334405
loss: 0.9936954975128174,grad_norm: 0.7720433350869571, iteration: 334406
loss: 1.0284639596939087,grad_norm: 0.749507130421094, iteration: 334407
loss: 1.0123810768127441,grad_norm: 0.892092318732124, iteration: 334408
loss: 0.9832781553268433,grad_norm: 0.9255596022450864, iteration: 334409
loss: 1.0064878463745117,grad_norm: 0.8417124451527029, iteration: 334410
loss: 1.1066397428512573,grad_norm: 0.9999994263410413, iteration: 334411
loss: 0.9795321822166443,grad_norm: 0.9139129742315132, iteration: 334412
loss: 1.0017997026443481,grad_norm: 0.9999998837833041, iteration: 334413
loss: 1.0693161487579346,grad_norm: 0.9999992581999877, iteration: 334414
loss: 1.0286972522735596,grad_norm: 0.9999992459212546, iteration: 334415
loss: 0.9965611696243286,grad_norm: 0.7591177608193906, iteration: 334416
loss: 0.9903902411460876,grad_norm: 0.7611248177798569, iteration: 334417
loss: 0.9857956171035767,grad_norm: 0.8588535084147207, iteration: 334418
loss: 0.997899055480957,grad_norm: 0.939714293378801, iteration: 334419
loss: 1.0310324430465698,grad_norm: 0.863534709841597, iteration: 334420
loss: 1.066870927810669,grad_norm: 0.9999992229033421, iteration: 334421
loss: 1.020877718925476,grad_norm: 0.7739476731369778, iteration: 334422
loss: 1.0121848583221436,grad_norm: 0.7627043306847111, iteration: 334423
loss: 0.9820773005485535,grad_norm: 0.8487393715949193, iteration: 334424
loss: 1.0561282634735107,grad_norm: 0.999999146138136, iteration: 334425
loss: 0.9794304370880127,grad_norm: 0.7682905352321043, iteration: 334426
loss: 0.9927505850791931,grad_norm: 0.7172096057190579, iteration: 334427
loss: 1.0281057357788086,grad_norm: 0.9792428632026848, iteration: 334428
loss: 0.9954171180725098,grad_norm: 0.9999992026974545, iteration: 334429
loss: 1.0030863285064697,grad_norm: 0.795449204499317, iteration: 334430
loss: 1.0409584045410156,grad_norm: 0.9999994273520425, iteration: 334431
loss: 0.9338516592979431,grad_norm: 0.9999990081738777, iteration: 334432
loss: 0.9865967035293579,grad_norm: 0.8767816146075061, iteration: 334433
loss: 1.0546610355377197,grad_norm: 0.8522261958497177, iteration: 334434
loss: 1.106539249420166,grad_norm: 0.9999993223434784, iteration: 334435
loss: 0.9822497963905334,grad_norm: 0.7865650533692256, iteration: 334436
loss: 1.0257827043533325,grad_norm: 0.9999996530240088, iteration: 334437
loss: 0.9703962206840515,grad_norm: 0.8733142048967634, iteration: 334438
loss: 1.0264123678207397,grad_norm: 0.99999980910367, iteration: 334439
loss: 1.0144470930099487,grad_norm: 0.8433713761815957, iteration: 334440
loss: 1.0277152061462402,grad_norm: 0.8592657656560454, iteration: 334441
loss: 1.0419310331344604,grad_norm: 0.999999577921416, iteration: 334442
loss: 1.0994662046432495,grad_norm: 0.9999998811562385, iteration: 334443
loss: 0.9797009825706482,grad_norm: 0.8529329944322185, iteration: 334444
loss: 1.0021865367889404,grad_norm: 0.9358470871751255, iteration: 334445
loss: 1.0206109285354614,grad_norm: 0.854134213062183, iteration: 334446
loss: 0.9961172938346863,grad_norm: 0.8475007887478346, iteration: 334447
loss: 0.9988616108894348,grad_norm: 0.8837806656275329, iteration: 334448
loss: 0.9990087151527405,grad_norm: 0.8612196379602446, iteration: 334449
loss: 1.0277159214019775,grad_norm: 0.9999990311799752, iteration: 334450
loss: 0.9785680174827576,grad_norm: 0.8570976795149509, iteration: 334451
loss: 1.0291075706481934,grad_norm: 0.9015634148842009, iteration: 334452
loss: 1.0041577816009521,grad_norm: 0.9999998497043278, iteration: 334453
loss: 1.028921365737915,grad_norm: 0.8580900845904962, iteration: 334454
loss: 0.9904995560646057,grad_norm: 0.8103472746919899, iteration: 334455
loss: 0.9802572727203369,grad_norm: 0.839666080456503, iteration: 334456
loss: 0.9708189368247986,grad_norm: 0.9544053691148389, iteration: 334457
loss: 0.993358850479126,grad_norm: 0.6638876859461681, iteration: 334458
loss: 1.0071065425872803,grad_norm: 0.7835804016989483, iteration: 334459
loss: 1.0577352046966553,grad_norm: 0.9134099261620545, iteration: 334460
loss: 1.25558340549469,grad_norm: 1.00000000073065, iteration: 334461
loss: 1.0094870328903198,grad_norm: 0.7347785916656987, iteration: 334462
loss: 0.9667327404022217,grad_norm: 0.8114442899507534, iteration: 334463
loss: 1.0460067987442017,grad_norm: 0.999999434616118, iteration: 334464
loss: 0.9416152834892273,grad_norm: 0.9012196957151848, iteration: 334465
loss: 1.0350232124328613,grad_norm: 0.750412891374635, iteration: 334466
loss: 0.9723564982414246,grad_norm: 0.7847438226948107, iteration: 334467
loss: 0.9986419677734375,grad_norm: 0.8641087709392089, iteration: 334468
loss: 1.0280733108520508,grad_norm: 0.9999992482601789, iteration: 334469
loss: 0.9828053116798401,grad_norm: 0.7283417972972202, iteration: 334470
loss: 1.0208306312561035,grad_norm: 0.999999067945465, iteration: 334471
loss: 1.0149099826812744,grad_norm: 0.8967484490678009, iteration: 334472
loss: 1.0269451141357422,grad_norm: 0.9756926820453846, iteration: 334473
loss: 0.988497793674469,grad_norm: 0.7951420164198312, iteration: 334474
loss: 0.997101902961731,grad_norm: 0.7380304786214884, iteration: 334475
loss: 0.9837471842765808,grad_norm: 0.7494323318253452, iteration: 334476
loss: 1.0235198736190796,grad_norm: 0.8943719792150613, iteration: 334477
loss: 1.0453596115112305,grad_norm: 0.9999992132665885, iteration: 334478
loss: 1.0237791538238525,grad_norm: 0.7631880518471196, iteration: 334479
loss: 1.0226129293441772,grad_norm: 0.8456283783062589, iteration: 334480
loss: 1.0014396905899048,grad_norm: 0.9733578080299657, iteration: 334481
loss: 1.000510334968567,grad_norm: 0.8645208154955797, iteration: 334482
loss: 1.1906100511550903,grad_norm: 0.9999999366697162, iteration: 334483
loss: 1.0173044204711914,grad_norm: 0.8775236264004007, iteration: 334484
loss: 1.079488754272461,grad_norm: 0.8889968173455441, iteration: 334485
loss: 0.9894788265228271,grad_norm: 0.784059126758909, iteration: 334486
loss: 1.0190720558166504,grad_norm: 0.9999995989625128, iteration: 334487
loss: 0.9911606311798096,grad_norm: 0.8803362643144437, iteration: 334488
loss: 1.051039457321167,grad_norm: 0.8654753949137083, iteration: 334489
loss: 1.0277084112167358,grad_norm: 0.9999993851545039, iteration: 334490
loss: 1.0341640710830688,grad_norm: 0.8188965302717661, iteration: 334491
loss: 0.9939961433410645,grad_norm: 0.7417307315188123, iteration: 334492
loss: 1.045870304107666,grad_norm: 0.9627159623461226, iteration: 334493
loss: 1.0106637477874756,grad_norm: 0.819965779954059, iteration: 334494
loss: 1.031594157218933,grad_norm: 0.9482685682640604, iteration: 334495
loss: 0.9784287214279175,grad_norm: 0.8513581586205612, iteration: 334496
loss: 1.031118392944336,grad_norm: 0.8941661907215149, iteration: 334497
loss: 1.010560393333435,grad_norm: 0.9150305136312177, iteration: 334498
loss: 1.0077964067459106,grad_norm: 0.8217370815043582, iteration: 334499
loss: 0.9738490581512451,grad_norm: 0.7566917075725004, iteration: 334500
loss: 1.0119903087615967,grad_norm: 0.7819128105660571, iteration: 334501
loss: 1.0094155073165894,grad_norm: 0.8780176686732382, iteration: 334502
loss: 0.9849280118942261,grad_norm: 0.8611941210201401, iteration: 334503
loss: 0.9871585965156555,grad_norm: 0.7112867980290715, iteration: 334504
loss: 1.037207841873169,grad_norm: 0.934342732129245, iteration: 334505
loss: 0.9692163467407227,grad_norm: 0.8761569348835622, iteration: 334506
loss: 0.9509750008583069,grad_norm: 0.9999990347580997, iteration: 334507
loss: 1.0060886144638062,grad_norm: 0.9477038028337845, iteration: 334508
loss: 1.076735258102417,grad_norm: 0.8566656511913247, iteration: 334509
loss: 0.9975212216377258,grad_norm: 0.8547352596325911, iteration: 334510
loss: 1.0157084465026855,grad_norm: 0.787359299036309, iteration: 334511
loss: 0.9821860194206238,grad_norm: 0.7620420850088029, iteration: 334512
loss: 0.9971762299537659,grad_norm: 0.7228365791740596, iteration: 334513
loss: 0.9906700849533081,grad_norm: 0.8534172138258643, iteration: 334514
loss: 1.0864362716674805,grad_norm: 0.988220375167307, iteration: 334515
loss: 0.9997109174728394,grad_norm: 0.9339350679732545, iteration: 334516
loss: 1.0088409185409546,grad_norm: 0.9496015601179595, iteration: 334517
loss: 1.0109502077102661,grad_norm: 0.952292904397301, iteration: 334518
loss: 1.030859351158142,grad_norm: 0.9431609276278373, iteration: 334519
loss: 1.0030081272125244,grad_norm: 0.7464074786414736, iteration: 334520
loss: 0.9490055441856384,grad_norm: 0.7502668522975183, iteration: 334521
loss: 0.9963806867599487,grad_norm: 0.8679294595310126, iteration: 334522
loss: 1.029147982597351,grad_norm: 0.8376936326977968, iteration: 334523
loss: 0.9808089137077332,grad_norm: 0.8542413607825226, iteration: 334524
loss: 1.0241643190383911,grad_norm: 0.9999993977182741, iteration: 334525
loss: 0.9798349738121033,grad_norm: 0.7539434999638132, iteration: 334526
loss: 1.0017726421356201,grad_norm: 0.8918217855876941, iteration: 334527
loss: 1.0236941576004028,grad_norm: 0.9999990014141867, iteration: 334528
loss: 1.0004968643188477,grad_norm: 0.7247163146117842, iteration: 334529
loss: 0.9975790977478027,grad_norm: 0.7493986112290506, iteration: 334530
loss: 0.989726185798645,grad_norm: 0.692726483733124, iteration: 334531
loss: 1.033957839012146,grad_norm: 0.9999992197405024, iteration: 334532
loss: 1.0019307136535645,grad_norm: 0.8993487013734438, iteration: 334533
loss: 1.030015230178833,grad_norm: 0.9244551227642537, iteration: 334534
loss: 0.9763608574867249,grad_norm: 0.8048695346434886, iteration: 334535
loss: 0.9880076050758362,grad_norm: 0.7703333447005186, iteration: 334536
loss: 1.0072009563446045,grad_norm: 0.9999999463316143, iteration: 334537
loss: 0.9796777963638306,grad_norm: 0.7864124423983118, iteration: 334538
loss: 0.9580495357513428,grad_norm: 0.95571309642057, iteration: 334539
loss: 1.0138119459152222,grad_norm: 0.8223394532984328, iteration: 334540
loss: 1.022845983505249,grad_norm: 0.8746682627999427, iteration: 334541
loss: 1.0128121376037598,grad_norm: 0.9999990999075049, iteration: 334542
loss: 1.019173502922058,grad_norm: 0.8825797212930937, iteration: 334543
loss: 0.9821215271949768,grad_norm: 0.8415670764058193, iteration: 334544
loss: 1.0097706317901611,grad_norm: 0.8398970379935534, iteration: 334545
loss: 1.0482149124145508,grad_norm: 0.9999996299691712, iteration: 334546
loss: 0.9964886903762817,grad_norm: 0.7546145758745386, iteration: 334547
loss: 0.9851322174072266,grad_norm: 0.945034782646974, iteration: 334548
loss: 1.028004765510559,grad_norm: 0.8316924599481494, iteration: 334549
loss: 1.0307515859603882,grad_norm: 0.8967925337387953, iteration: 334550
loss: 1.0017552375793457,grad_norm: 0.8167284115961899, iteration: 334551
loss: 0.9848617315292358,grad_norm: 0.8105424681330182, iteration: 334552
loss: 1.004831075668335,grad_norm: 0.7188352329007702, iteration: 334553
loss: 1.0363342761993408,grad_norm: 0.813153742347737, iteration: 334554
loss: 1.0074148178100586,grad_norm: 0.9043546585860803, iteration: 334555
loss: 1.0555694103240967,grad_norm: 0.8326853217347964, iteration: 334556
loss: 1.0857980251312256,grad_norm: 0.9999997879854159, iteration: 334557
loss: 1.0158084630966187,grad_norm: 0.757079940126993, iteration: 334558
loss: 0.998616099357605,grad_norm: 0.8189670646819418, iteration: 334559
loss: 0.9840453863143921,grad_norm: 0.9448126568025633, iteration: 334560
loss: 0.9995361566543579,grad_norm: 0.9999991677465673, iteration: 334561
loss: 0.9709466695785522,grad_norm: 0.7629919595329713, iteration: 334562
loss: 1.004451036453247,grad_norm: 0.8653167541664551, iteration: 334563
loss: 0.9925873875617981,grad_norm: 0.7765063384262039, iteration: 334564
loss: 1.0476304292678833,grad_norm: 0.9777804329873342, iteration: 334565
loss: 1.0075080394744873,grad_norm: 0.8913571292160978, iteration: 334566
loss: 1.0498595237731934,grad_norm: 0.8182685045772631, iteration: 334567
loss: 1.0859498977661133,grad_norm: 0.8192007974412319, iteration: 334568
loss: 0.9660358428955078,grad_norm: 0.675063849398213, iteration: 334569
loss: 1.0030715465545654,grad_norm: 0.8585664678260381, iteration: 334570
loss: 1.1914141178131104,grad_norm: 0.9999994892663596, iteration: 334571
loss: 0.9802270531654358,grad_norm: 0.8336872624723978, iteration: 334572
loss: 0.9866617321968079,grad_norm: 0.7634742126936652, iteration: 334573
loss: 1.0637248754501343,grad_norm: 0.9999994476776491, iteration: 334574
loss: 0.9865384101867676,grad_norm: 0.8088459297616305, iteration: 334575
loss: 1.01973557472229,grad_norm: 0.9114744975761956, iteration: 334576
loss: 1.0485488176345825,grad_norm: 0.8428196329714143, iteration: 334577
loss: 1.0298445224761963,grad_norm: 0.7822273447121124, iteration: 334578
loss: 0.9628077745437622,grad_norm: 0.9526480168066745, iteration: 334579
loss: 0.9969202876091003,grad_norm: 0.9999991355696799, iteration: 334580
loss: 0.9593378901481628,grad_norm: 0.9353009558849654, iteration: 334581
loss: 0.9975370168685913,grad_norm: 0.8424512983827742, iteration: 334582
loss: 1.1422169208526611,grad_norm: 0.9999994611418292, iteration: 334583
loss: 1.0520766973495483,grad_norm: 0.9452555500712214, iteration: 334584
loss: 1.0202045440673828,grad_norm: 0.7971745779647478, iteration: 334585
loss: 0.9657309055328369,grad_norm: 0.8229207116076427, iteration: 334586
loss: 1.009108066558838,grad_norm: 0.8637541486441943, iteration: 334587
loss: 0.9868957996368408,grad_norm: 0.735061520505084, iteration: 334588
loss: 1.069700837135315,grad_norm: 0.9999996101506913, iteration: 334589
loss: 0.9774572253227234,grad_norm: 0.8477208160548044, iteration: 334590
loss: 0.9764770269393921,grad_norm: 0.9004865905784637, iteration: 334591
loss: 1.0106909275054932,grad_norm: 0.833495202631643, iteration: 334592
loss: 1.0033323764801025,grad_norm: 0.8902487512923511, iteration: 334593
loss: 0.9989626407623291,grad_norm: 0.9999995164847111, iteration: 334594
loss: 1.016026496887207,grad_norm: 0.9450996697296122, iteration: 334595
loss: 0.9919177889823914,grad_norm: 0.7425165246935491, iteration: 334596
loss: 1.0187358856201172,grad_norm: 0.8286037353715611, iteration: 334597
loss: 1.0099778175354004,grad_norm: 0.8274913775635168, iteration: 334598
loss: 0.9981881976127625,grad_norm: 0.7907887257442309, iteration: 334599
loss: 0.9988968968391418,grad_norm: 0.7327549381724225, iteration: 334600
loss: 1.0029724836349487,grad_norm: 0.9227176285992131, iteration: 334601
loss: 1.0463228225708008,grad_norm: 0.9982839494681431, iteration: 334602
loss: 1.2077994346618652,grad_norm: 0.9999991504525084, iteration: 334603
loss: 1.0031769275665283,grad_norm: 0.9092437985535294, iteration: 334604
loss: 1.1222904920578003,grad_norm: 0.9999999359598023, iteration: 334605
loss: 0.968473494052887,grad_norm: 0.7677673473445992, iteration: 334606
loss: 1.0021356344223022,grad_norm: 0.8364742854742582, iteration: 334607
loss: 0.9985172748565674,grad_norm: 0.9953882498788096, iteration: 334608
loss: 0.9998410940170288,grad_norm: 0.7928970500367405, iteration: 334609
loss: 0.9998433589935303,grad_norm: 0.8241541254610665, iteration: 334610
loss: 0.9980366230010986,grad_norm: 0.6655434259975205, iteration: 334611
loss: 1.0269907712936401,grad_norm: 0.8739330443896856, iteration: 334612
loss: 1.0360171794891357,grad_norm: 0.8178829777746949, iteration: 334613
loss: 1.0104079246520996,grad_norm: 0.9999997580511996, iteration: 334614
loss: 1.0776841640472412,grad_norm: 0.9999992030768708, iteration: 334615
loss: 0.9886910915374756,grad_norm: 0.7401652205167271, iteration: 334616
loss: 0.9976716041564941,grad_norm: 0.7622987841208855, iteration: 334617
loss: 0.9986653327941895,grad_norm: 0.7892223958311725, iteration: 334618
loss: 1.0131251811981201,grad_norm: 0.8808411243208468, iteration: 334619
loss: 0.9651519060134888,grad_norm: 0.7198733936546005, iteration: 334620
loss: 1.0163425207138062,grad_norm: 0.8098859700136666, iteration: 334621
loss: 0.9915773272514343,grad_norm: 0.7914841660660501, iteration: 334622
loss: 1.0088324546813965,grad_norm: 0.7341893065371181, iteration: 334623
loss: 1.0638601779937744,grad_norm: 0.9999991075241902, iteration: 334624
loss: 0.9819766879081726,grad_norm: 0.7117231300717648, iteration: 334625
loss: 1.029991865158081,grad_norm: 0.960357621707621, iteration: 334626
loss: 0.9800814390182495,grad_norm: 0.8813202301027573, iteration: 334627
loss: 1.0097423791885376,grad_norm: 0.8293994191266687, iteration: 334628
loss: 0.940579891204834,grad_norm: 0.7304171566369524, iteration: 334629
loss: 1.012882947921753,grad_norm: 0.999999353611207, iteration: 334630
loss: 0.986660897731781,grad_norm: 0.874249588675945, iteration: 334631
loss: 0.9873716831207275,grad_norm: 0.8688215173457884, iteration: 334632
loss: 1.0001862049102783,grad_norm: 0.9999997824961298, iteration: 334633
loss: 1.0050815343856812,grad_norm: 0.7597543250323106, iteration: 334634
loss: 1.0274100303649902,grad_norm: 0.9999990620837561, iteration: 334635
loss: 1.006438136100769,grad_norm: 0.7361448965094873, iteration: 334636
loss: 1.151180624961853,grad_norm: 0.9999990942821976, iteration: 334637
loss: 0.9695403575897217,grad_norm: 0.8535714713190153, iteration: 334638
loss: 1.0044492483139038,grad_norm: 0.7745272232347961, iteration: 334639
loss: 0.9730308055877686,grad_norm: 0.8427259711829603, iteration: 334640
loss: 0.9814372658729553,grad_norm: 0.904308942576681, iteration: 334641
loss: 1.0230313539505005,grad_norm: 0.8794621856145043, iteration: 334642
loss: 0.9685535430908203,grad_norm: 0.7799348918554273, iteration: 334643
loss: 1.0343081951141357,grad_norm: 0.7902091563479395, iteration: 334644
loss: 1.0048065185546875,grad_norm: 0.7822188883748333, iteration: 334645
loss: 1.021738052368164,grad_norm: 0.9999997435481599, iteration: 334646
loss: 1.0034353733062744,grad_norm: 0.7960812967879737, iteration: 334647
loss: 1.0099306106567383,grad_norm: 0.7623620241326879, iteration: 334648
loss: 1.019098162651062,grad_norm: 0.8807240733725812, iteration: 334649
loss: 1.005569577217102,grad_norm: 0.8193568067482434, iteration: 334650
loss: 1.0161226987838745,grad_norm: 0.7818383435223187, iteration: 334651
loss: 0.9870930910110474,grad_norm: 0.7330180256782958, iteration: 334652
loss: 1.0174177885055542,grad_norm: 0.7552317126068354, iteration: 334653
loss: 1.0537668466567993,grad_norm: 0.9999991768342059, iteration: 334654
loss: 1.0182347297668457,grad_norm: 0.9999992318282347, iteration: 334655
loss: 1.0019062757492065,grad_norm: 0.9345419733155886, iteration: 334656
loss: 1.016284465789795,grad_norm: 0.9999991201554322, iteration: 334657
loss: 0.994887113571167,grad_norm: 0.8046014357022506, iteration: 334658
loss: 1.0583776235580444,grad_norm: 0.999999377580636, iteration: 334659
loss: 1.0523265600204468,grad_norm: 0.9171764467530189, iteration: 334660
loss: 0.9695962071418762,grad_norm: 0.849306715090142, iteration: 334661
loss: 1.001694679260254,grad_norm: 0.9999998039325477, iteration: 334662
loss: 1.0292669534683228,grad_norm: 0.7133471190522237, iteration: 334663
loss: 1.0002343654632568,grad_norm: 0.8464576604880903, iteration: 334664
loss: 1.037841558456421,grad_norm: 0.9503673850925487, iteration: 334665
loss: 0.9998785257339478,grad_norm: 0.8478705551225341, iteration: 334666
loss: 1.0820955038070679,grad_norm: 0.9999996852060711, iteration: 334667
loss: 0.9771958589553833,grad_norm: 0.8549497050656211, iteration: 334668
loss: 0.9968280792236328,grad_norm: 0.9999995436731777, iteration: 334669
loss: 0.9897371530532837,grad_norm: 0.7171567369048227, iteration: 334670
loss: 0.9845869541168213,grad_norm: 0.9999989591845742, iteration: 334671
loss: 1.014249324798584,grad_norm: 0.7272231379414255, iteration: 334672
loss: 1.0251412391662598,grad_norm: 0.9264860382460095, iteration: 334673
loss: 1.026169776916504,grad_norm: 0.8788121851002989, iteration: 334674
loss: 1.017756462097168,grad_norm: 0.7898117593305445, iteration: 334675
loss: 0.9622632265090942,grad_norm: 0.6961725054745046, iteration: 334676
loss: 0.9780856966972351,grad_norm: 0.9012190166621404, iteration: 334677
loss: 0.9851853847503662,grad_norm: 0.9732059904835239, iteration: 334678
loss: 1.1540483236312866,grad_norm: 0.9999997277274546, iteration: 334679
loss: 1.049699306488037,grad_norm: 0.9762772686140323, iteration: 334680
loss: 1.0151938199996948,grad_norm: 0.9999997014930959, iteration: 334681
loss: 1.0522263050079346,grad_norm: 0.999999757214953, iteration: 334682
loss: 0.9629157781600952,grad_norm: 0.77967908015869, iteration: 334683
loss: 1.0046379566192627,grad_norm: 0.8393692716150745, iteration: 334684
loss: 1.0163018703460693,grad_norm: 0.9654341457483551, iteration: 334685
loss: 0.9819453954696655,grad_norm: 0.7944817694060559, iteration: 334686
loss: 0.9811806678771973,grad_norm: 0.8173517114718823, iteration: 334687
loss: 1.013725996017456,grad_norm: 0.9999992117942661, iteration: 334688
loss: 0.9869552850723267,grad_norm: 0.9999989600928264, iteration: 334689
loss: 1.0263327360153198,grad_norm: 0.7249791683101134, iteration: 334690
loss: 0.9987916946411133,grad_norm: 0.688841292341514, iteration: 334691
loss: 1.0056424140930176,grad_norm: 0.9234552930024402, iteration: 334692
loss: 0.9919701218605042,grad_norm: 0.9746915703530059, iteration: 334693
loss: 1.0284547805786133,grad_norm: 0.7517150343795008, iteration: 334694
loss: 1.0302350521087646,grad_norm: 0.8288671480138909, iteration: 334695
loss: 0.9885302186012268,grad_norm: 0.7863518292089301, iteration: 334696
loss: 0.982214629650116,grad_norm: 0.7718059297966507, iteration: 334697
loss: 0.9946441650390625,grad_norm: 0.9455721366204957, iteration: 334698
loss: 0.9989086985588074,grad_norm: 0.9304804305058163, iteration: 334699
loss: 1.014884114265442,grad_norm: 0.8942592787578648, iteration: 334700
loss: 0.9863182306289673,grad_norm: 0.8729494584517772, iteration: 334701
loss: 1.0392502546310425,grad_norm: 0.9999998769025987, iteration: 334702
loss: 0.9549692869186401,grad_norm: 0.98307531423703, iteration: 334703
loss: 1.0128449201583862,grad_norm: 0.8105238366884869, iteration: 334704
loss: 1.0437984466552734,grad_norm: 0.8925396552995588, iteration: 334705
loss: 1.008386492729187,grad_norm: 0.8016172402198676, iteration: 334706
loss: 1.010702133178711,grad_norm: 0.8755066905477336, iteration: 334707
loss: 1.0156500339508057,grad_norm: 0.6911648324412127, iteration: 334708
loss: 1.0096092224121094,grad_norm: 0.711729289008077, iteration: 334709
loss: 0.9695623517036438,grad_norm: 0.9908572884988511, iteration: 334710
loss: 1.0011652708053589,grad_norm: 0.739975952168999, iteration: 334711
loss: 1.0064237117767334,grad_norm: 0.9999993048433123, iteration: 334712
loss: 0.9785133600234985,grad_norm: 0.8943037043701524, iteration: 334713
loss: 1.008002758026123,grad_norm: 0.8217034400442714, iteration: 334714
loss: 1.1268091201782227,grad_norm: 0.9926302019817621, iteration: 334715
loss: 1.0000593662261963,grad_norm: 0.9999991001251949, iteration: 334716
loss: 0.9703487753868103,grad_norm: 0.755772262177539, iteration: 334717
loss: 0.9613152146339417,grad_norm: 0.9225409788044002, iteration: 334718
loss: 1.0498015880584717,grad_norm: 0.8684478124712863, iteration: 334719
loss: 1.1058496236801147,grad_norm: 0.9999991781334249, iteration: 334720
loss: 0.9990103840827942,grad_norm: 0.8405952124695322, iteration: 334721
loss: 0.9826911091804504,grad_norm: 0.8263089530849982, iteration: 334722
loss: 1.0045807361602783,grad_norm: 0.6244456228190824, iteration: 334723
loss: 1.0033299922943115,grad_norm: 0.9999989942006299, iteration: 334724
loss: 0.9954898357391357,grad_norm: 0.7987725841143254, iteration: 334725
loss: 0.9758246541023254,grad_norm: 0.7395489297468809, iteration: 334726
loss: 1.0346215963363647,grad_norm: 0.9568559004812536, iteration: 334727
loss: 1.0103274583816528,grad_norm: 0.781477614947729, iteration: 334728
loss: 1.0239533185958862,grad_norm: 0.8373825110797766, iteration: 334729
loss: 0.9619536995887756,grad_norm: 0.8571471046357418, iteration: 334730
loss: 0.9741939306259155,grad_norm: 0.8574885460599874, iteration: 334731
loss: 0.993163526058197,grad_norm: 0.8549446319922124, iteration: 334732
loss: 1.0081729888916016,grad_norm: 0.8907497882963766, iteration: 334733
loss: 0.9571925401687622,grad_norm: 0.8247355551467475, iteration: 334734
loss: 1.0142409801483154,grad_norm: 0.999999968676087, iteration: 334735
loss: 0.9681447148323059,grad_norm: 0.8634277806104419, iteration: 334736
loss: 1.0037115812301636,grad_norm: 0.7808237343350919, iteration: 334737
loss: 0.9932939410209656,grad_norm: 0.7843282159470661, iteration: 334738
loss: 0.9386160969734192,grad_norm: 0.7975618242500112, iteration: 334739
loss: 0.9726889729499817,grad_norm: 0.7927725808832793, iteration: 334740
loss: 1.0295493602752686,grad_norm: 0.8752723460892445, iteration: 334741
loss: 1.0378164052963257,grad_norm: 0.9999995540756719, iteration: 334742
loss: 1.0015062093734741,grad_norm: 0.810203201715671, iteration: 334743
loss: 0.9649166464805603,grad_norm: 0.9999990644307287, iteration: 334744
loss: 0.9875178337097168,grad_norm: 0.8158521340730831, iteration: 334745
loss: 0.999061644077301,grad_norm: 0.7497401175053177, iteration: 334746
loss: 0.9954122304916382,grad_norm: 0.8310624043284922, iteration: 334747
loss: 0.9854236841201782,grad_norm: 0.99999924184118, iteration: 334748
loss: 1.0414323806762695,grad_norm: 0.9999997249756262, iteration: 334749
loss: 0.9636538624763489,grad_norm: 0.9999990603994254, iteration: 334750
loss: 1.0026925802230835,grad_norm: 0.860506593218288, iteration: 334751
loss: 1.0018898248672485,grad_norm: 0.8510774787982935, iteration: 334752
loss: 1.0799763202667236,grad_norm: 0.9999999421389555, iteration: 334753
loss: 0.9950004816055298,grad_norm: 0.9999991028738485, iteration: 334754
loss: 0.9718475341796875,grad_norm: 0.8454799139098556, iteration: 334755
loss: 0.9775491952896118,grad_norm: 0.8933570298547173, iteration: 334756
loss: 1.034156084060669,grad_norm: 0.999999067116508, iteration: 334757
loss: 1.0011858940124512,grad_norm: 0.8931978658700945, iteration: 334758
loss: 0.9780485033988953,grad_norm: 0.9781353071577864, iteration: 334759
loss: 1.019642949104309,grad_norm: 0.8814180042922705, iteration: 334760
loss: 1.001757025718689,grad_norm: 0.99999905475219, iteration: 334761
loss: 0.9806279540061951,grad_norm: 0.836170057525583, iteration: 334762
loss: 1.0345667600631714,grad_norm: 0.9467329751267624, iteration: 334763
loss: 1.0080351829528809,grad_norm: 0.9999989884631041, iteration: 334764
loss: 1.0144339799880981,grad_norm: 0.9055171486810114, iteration: 334765
loss: 0.9942216873168945,grad_norm: 0.8886410415862439, iteration: 334766
loss: 0.9862585067749023,grad_norm: 0.9055123503926799, iteration: 334767
loss: 1.1783745288848877,grad_norm: 0.9999991382486036, iteration: 334768
loss: 1.039592981338501,grad_norm: 0.8138029363486461, iteration: 334769
loss: 0.9837892651557922,grad_norm: 0.8901542323518693, iteration: 334770
loss: 1.008859395980835,grad_norm: 0.7049136935734525, iteration: 334771
loss: 0.9222330451011658,grad_norm: 0.9777029439750546, iteration: 334772
loss: 1.0772068500518799,grad_norm: 0.9428898027634974, iteration: 334773
loss: 0.9712309837341309,grad_norm: 0.7445509855596361, iteration: 334774
loss: 1.0525883436203003,grad_norm: 0.9999993919354769, iteration: 334775
loss: 0.9735977649688721,grad_norm: 0.8901391753379163, iteration: 334776
loss: 1.0404689311981201,grad_norm: 0.8620243288430826, iteration: 334777
loss: 0.9682992696762085,grad_norm: 0.7891367328628224, iteration: 334778
loss: 0.9774535894393921,grad_norm: 0.7995676566553854, iteration: 334779
loss: 1.0000131130218506,grad_norm: 0.7743099696192253, iteration: 334780
loss: 1.008269190788269,grad_norm: 0.8252145258728623, iteration: 334781
loss: 0.9859586358070374,grad_norm: 0.839275462851239, iteration: 334782
loss: 1.0044184923171997,grad_norm: 0.813343950915603, iteration: 334783
loss: 1.0178594589233398,grad_norm: 0.8509942485131097, iteration: 334784
loss: 1.011281967163086,grad_norm: 0.7510987567471329, iteration: 334785
loss: 0.9985527396202087,grad_norm: 0.788922789972572, iteration: 334786
loss: 0.9594950079917908,grad_norm: 0.8692359540353697, iteration: 334787
loss: 0.9879714846611023,grad_norm: 0.7704658782327135, iteration: 334788
loss: 1.0070909261703491,grad_norm: 0.9167530001004949, iteration: 334789
loss: 1.0125219821929932,grad_norm: 0.825295362800279, iteration: 334790
loss: 1.022621512413025,grad_norm: 0.7376577081945918, iteration: 334791
loss: 0.9714322090148926,grad_norm: 0.7300897371768794, iteration: 334792
loss: 1.0117688179016113,grad_norm: 0.9999991098051171, iteration: 334793
loss: 1.0237294435501099,grad_norm: 0.675444289323349, iteration: 334794
loss: 0.9934988617897034,grad_norm: 0.9999994202193769, iteration: 334795
loss: 0.9641701579093933,grad_norm: 0.8354209627591508, iteration: 334796
loss: 1.0583373308181763,grad_norm: 0.9411306085740635, iteration: 334797
loss: 1.0155338048934937,grad_norm: 0.9948996447711715, iteration: 334798
loss: 1.0202945470809937,grad_norm: 0.9089289651235155, iteration: 334799
loss: 1.0391647815704346,grad_norm: 0.7719384662924879, iteration: 334800
loss: 0.9890912175178528,grad_norm: 0.7778169466351552, iteration: 334801
loss: 1.0201398134231567,grad_norm: 0.6037926252114095, iteration: 334802
loss: 1.0111991167068481,grad_norm: 0.9875520738010322, iteration: 334803
loss: 0.9939864277839661,grad_norm: 0.7846018884890856, iteration: 334804
loss: 1.001990795135498,grad_norm: 0.9999991956677091, iteration: 334805
loss: 1.018431544303894,grad_norm: 0.8609250017591072, iteration: 334806
loss: 1.0152404308319092,grad_norm: 0.9287957964677678, iteration: 334807
loss: 0.9854545593261719,grad_norm: 0.8386139713267426, iteration: 334808
loss: 0.997855007648468,grad_norm: 0.7175362476919328, iteration: 334809
loss: 1.006705403327942,grad_norm: 0.9786878702038889, iteration: 334810
loss: 1.0532052516937256,grad_norm: 0.9999998564428285, iteration: 334811
loss: 0.9753494262695312,grad_norm: 0.9999990386694929, iteration: 334812
loss: 0.9810677170753479,grad_norm: 0.8001639134622818, iteration: 334813
loss: 0.9887610077857971,grad_norm: 0.9999990185741906, iteration: 334814
loss: 1.0110167264938354,grad_norm: 0.7157831806900199, iteration: 334815
loss: 1.02332603931427,grad_norm: 0.9999991333203996, iteration: 334816
loss: 0.9985931515693665,grad_norm: 0.9085667482404233, iteration: 334817
loss: 0.9933066368103027,grad_norm: 0.776284679028176, iteration: 334818
loss: 0.9661169648170471,grad_norm: 0.8531753123397742, iteration: 334819
loss: 1.0945165157318115,grad_norm: 0.7604240688564321, iteration: 334820
loss: 0.9772546291351318,grad_norm: 0.8716339366667943, iteration: 334821
loss: 1.0216971635818481,grad_norm: 0.884955155952485, iteration: 334822
loss: 0.9628937840461731,grad_norm: 0.8995848103635613, iteration: 334823
loss: 1.0394032001495361,grad_norm: 0.8722394763411913, iteration: 334824
loss: 1.0121899843215942,grad_norm: 0.7297869214701629, iteration: 334825
loss: 1.1105365753173828,grad_norm: 0.7863488100906927, iteration: 334826
loss: 1.04072105884552,grad_norm: 0.9999999673754029, iteration: 334827
loss: 1.0042634010314941,grad_norm: 0.9999998838902817, iteration: 334828
loss: 1.003260850906372,grad_norm: 0.8718735276821986, iteration: 334829
loss: 0.9844318628311157,grad_norm: 0.9999991697295992, iteration: 334830
loss: 1.0010966062545776,grad_norm: 0.8676973859330026, iteration: 334831
loss: 0.967869758605957,grad_norm: 0.8511664832437822, iteration: 334832
loss: 1.0673424005508423,grad_norm: 0.9999993474126166, iteration: 334833
loss: 0.96992427110672,grad_norm: 0.7107320270660284, iteration: 334834
loss: 1.0083982944488525,grad_norm: 0.7740825850470103, iteration: 334835
loss: 1.0177267789840698,grad_norm: 0.7989476044174924, iteration: 334836
loss: 0.9570577144622803,grad_norm: 0.8686579784538813, iteration: 334837
loss: 0.9952096343040466,grad_norm: 0.782132236144855, iteration: 334838
loss: 0.9912805557250977,grad_norm: 0.7992517974457848, iteration: 334839
loss: 0.9764976501464844,grad_norm: 0.7089753472426168, iteration: 334840
loss: 0.9920148253440857,grad_norm: 0.999999102451723, iteration: 334841
loss: 1.0083454847335815,grad_norm: 0.7438222628703614, iteration: 334842
loss: 0.9985771179199219,grad_norm: 0.9072970477254878, iteration: 334843
loss: 0.9818175435066223,grad_norm: 0.8700777347599562, iteration: 334844
loss: 1.004352331161499,grad_norm: 0.8356882714759066, iteration: 334845
loss: 0.9769050478935242,grad_norm: 0.9999995357749883, iteration: 334846
loss: 0.9805251359939575,grad_norm: 0.7321617551367323, iteration: 334847
loss: 1.0077035427093506,grad_norm: 0.791855540077456, iteration: 334848
loss: 0.9850724339485168,grad_norm: 0.844519845454674, iteration: 334849
loss: 1.0067119598388672,grad_norm: 0.9905446858313431, iteration: 334850
loss: 0.9926754832267761,grad_norm: 0.9999991303825169, iteration: 334851
loss: 1.0079251527786255,grad_norm: 0.9637547626453321, iteration: 334852
loss: 1.0054961442947388,grad_norm: 0.8075173097171732, iteration: 334853
loss: 0.9840667247772217,grad_norm: 0.9853126722501693, iteration: 334854
loss: 0.9914035201072693,grad_norm: 0.9047153010708444, iteration: 334855
loss: 1.029044508934021,grad_norm: 0.9999991553766768, iteration: 334856
loss: 0.9916296005249023,grad_norm: 0.9732190974154278, iteration: 334857
loss: 0.9870335459709167,grad_norm: 0.6965794980396172, iteration: 334858
loss: 0.9944032430648804,grad_norm: 0.851253704303693, iteration: 334859
loss: 1.028282642364502,grad_norm: 0.8694813373398186, iteration: 334860
loss: 0.9800087809562683,grad_norm: 0.9442844603781895, iteration: 334861
loss: 1.0036022663116455,grad_norm: 0.9139319137285923, iteration: 334862
loss: 0.9676979184150696,grad_norm: 0.8537063954136276, iteration: 334863
loss: 1.0978963375091553,grad_norm: 0.8788619353166408, iteration: 334864
loss: 0.9754050374031067,grad_norm: 0.9012549609984543, iteration: 334865
loss: 0.9742481112480164,grad_norm: 0.9108553107407792, iteration: 334866
loss: 0.9877856969833374,grad_norm: 0.8385408286731161, iteration: 334867
loss: 0.9766607284545898,grad_norm: 0.6664444411625186, iteration: 334868
loss: 1.0086045265197754,grad_norm: 0.9078581233906522, iteration: 334869
loss: 0.9896163940429688,grad_norm: 0.9100235823955921, iteration: 334870
loss: 0.9636666178703308,grad_norm: 0.8615408855143805, iteration: 334871
loss: 0.9597222208976746,grad_norm: 0.8474220563024014, iteration: 334872
loss: 0.9798529148101807,grad_norm: 0.9072749628584692, iteration: 334873
loss: 1.0182571411132812,grad_norm: 0.8987262800647489, iteration: 334874
loss: 0.9990864396095276,grad_norm: 0.7384640303628691, iteration: 334875
loss: 0.9653691053390503,grad_norm: 0.8071480375827546, iteration: 334876
loss: 1.0333513021469116,grad_norm: 0.776815421271086, iteration: 334877
loss: 0.9918123483657837,grad_norm: 0.8914643113842223, iteration: 334878
loss: 1.0068559646606445,grad_norm: 0.9999992163144822, iteration: 334879
loss: 0.9843912124633789,grad_norm: 0.8064318539868736, iteration: 334880
loss: 1.014910340309143,grad_norm: 0.9999989417295877, iteration: 334881
loss: 1.0342096090316772,grad_norm: 0.8781842970000233, iteration: 334882
loss: 0.9546210765838623,grad_norm: 0.9999990274534762, iteration: 334883
loss: 1.0078604221343994,grad_norm: 0.9502727036640284, iteration: 334884
loss: 1.0124047994613647,grad_norm: 0.8132250049886041, iteration: 334885
loss: 1.0057963132858276,grad_norm: 0.9999996031275518, iteration: 334886
loss: 0.9991059303283691,grad_norm: 0.8410513142162495, iteration: 334887
loss: 0.9781434535980225,grad_norm: 0.8987866004596482, iteration: 334888
loss: 1.0108230113983154,grad_norm: 0.8489088187288952, iteration: 334889
loss: 0.9462637901306152,grad_norm: 0.9202152804524633, iteration: 334890
loss: 1.0008385181427002,grad_norm: 0.7357927124789115, iteration: 334891
loss: 0.9819756746292114,grad_norm: 0.8589321692275008, iteration: 334892
loss: 0.9699264764785767,grad_norm: 0.7658565930972789, iteration: 334893
loss: 1.0025265216827393,grad_norm: 0.9342521001644835, iteration: 334894
loss: 0.995363712310791,grad_norm: 0.8193408339432607, iteration: 334895
loss: 0.9875211119651794,grad_norm: 0.8542486774974308, iteration: 334896
loss: 1.0469783544540405,grad_norm: 0.9999995974128546, iteration: 334897
loss: 1.054202675819397,grad_norm: 0.9999990498167347, iteration: 334898
loss: 1.0074501037597656,grad_norm: 0.8621395997319615, iteration: 334899
loss: 1.0144469738006592,grad_norm: 0.8817622390625661, iteration: 334900
loss: 1.0148316621780396,grad_norm: 0.8815594406144266, iteration: 334901
loss: 1.004799485206604,grad_norm: 0.7158630184250376, iteration: 334902
loss: 0.9916739463806152,grad_norm: 0.8211929342559582, iteration: 334903
loss: 0.9883415102958679,grad_norm: 0.737009342676308, iteration: 334904
loss: 1.0019383430480957,grad_norm: 0.8039817155488551, iteration: 334905
loss: 1.010822057723999,grad_norm: 0.7623187116602189, iteration: 334906
loss: 0.9894493222236633,grad_norm: 0.999999064026328, iteration: 334907
loss: 0.9994977116584778,grad_norm: 0.897282252294555, iteration: 334908
loss: 0.9980010986328125,grad_norm: 0.7979583853625447, iteration: 334909
loss: 1.0704437494277954,grad_norm: 0.9375158440244851, iteration: 334910
loss: 1.0095571279525757,grad_norm: 0.8554167369457876, iteration: 334911
loss: 1.0078613758087158,grad_norm: 0.7626366092339023, iteration: 334912
loss: 1.005033254623413,grad_norm: 0.9791371811724666, iteration: 334913
loss: 1.0214356184005737,grad_norm: 0.8325723175594131, iteration: 334914
loss: 1.0053625106811523,grad_norm: 0.8859687690269187, iteration: 334915
loss: 0.999001681804657,grad_norm: 0.9999996464353593, iteration: 334916
loss: 0.9451660513877869,grad_norm: 0.8768452801215298, iteration: 334917
loss: 1.0304906368255615,grad_norm: 0.8863988160279614, iteration: 334918
loss: 1.0060971975326538,grad_norm: 0.7913357345007764, iteration: 334919
loss: 1.0041953325271606,grad_norm: 0.9999989463531288, iteration: 334920
loss: 0.9802336692810059,grad_norm: 0.9896317859464958, iteration: 334921
loss: 0.9666287302970886,grad_norm: 0.9999991345337944, iteration: 334922
loss: 0.9941955804824829,grad_norm: 0.6417295894019104, iteration: 334923
loss: 1.0169745683670044,grad_norm: 0.8299711166400932, iteration: 334924
loss: 1.0375741720199585,grad_norm: 0.9909386832119867, iteration: 334925
loss: 1.0124645233154297,grad_norm: 0.8620670921566729, iteration: 334926
loss: 1.0086597204208374,grad_norm: 0.9999995963251513, iteration: 334927
loss: 1.04621422290802,grad_norm: 0.8648717982402842, iteration: 334928
loss: 0.9732087254524231,grad_norm: 0.7687150932082153, iteration: 334929
loss: 0.9749647974967957,grad_norm: 0.8859560214708802, iteration: 334930
loss: 0.9694864153862,grad_norm: 0.7977968997115299, iteration: 334931
loss: 0.9553458094596863,grad_norm: 0.9817107510980185, iteration: 334932
loss: 1.0364443063735962,grad_norm: 0.7958803429944947, iteration: 334933
loss: 1.023255467414856,grad_norm: 0.9479970915505481, iteration: 334934
loss: 1.0101301670074463,grad_norm: 0.8320918089485435, iteration: 334935
loss: 0.996429979801178,grad_norm: 0.8341125158238122, iteration: 334936
loss: 0.9990295767784119,grad_norm: 0.9999992731383087, iteration: 334937
loss: 0.9888788461685181,grad_norm: 0.7636484318458001, iteration: 334938
loss: 1.0195324420928955,grad_norm: 0.8242936679155173, iteration: 334939
loss: 1.0067905187606812,grad_norm: 0.9763423801837713, iteration: 334940
loss: 0.9681057333946228,grad_norm: 0.9244854295964048, iteration: 334941
loss: 0.9876328706741333,grad_norm: 0.7960400564833425, iteration: 334942
loss: 1.0032286643981934,grad_norm: 0.6121151341894763, iteration: 334943
loss: 1.0084457397460938,grad_norm: 0.850979226259932, iteration: 334944
loss: 0.9390885233879089,grad_norm: 0.8914547616876974, iteration: 334945
loss: 0.9993159174919128,grad_norm: 0.8899929462931498, iteration: 334946
loss: 1.0229252576828003,grad_norm: 0.708582872647368, iteration: 334947
loss: 0.9862729907035828,grad_norm: 0.8899363883900036, iteration: 334948
loss: 0.9992811679840088,grad_norm: 0.8911575948446699, iteration: 334949
loss: 1.0024230480194092,grad_norm: 0.7964550030367963, iteration: 334950
loss: 1.0314990282058716,grad_norm: 0.999999652942012, iteration: 334951
loss: 1.020437479019165,grad_norm: 0.6841202367916732, iteration: 334952
loss: 1.0161875486373901,grad_norm: 0.8183951731160231, iteration: 334953
loss: 0.9935411810874939,grad_norm: 0.8240759010906544, iteration: 334954
loss: 0.9657331705093384,grad_norm: 0.9692536350422062, iteration: 334955
loss: 0.9837232232093811,grad_norm: 0.8909274697146216, iteration: 334956
loss: 1.0157772302627563,grad_norm: 0.9999995010329972, iteration: 334957
loss: 1.1328204870224,grad_norm: 0.9999998703990507, iteration: 334958
loss: 1.1673742532730103,grad_norm: 0.9999999574241554, iteration: 334959
loss: 0.9881635308265686,grad_norm: 0.9299318444847258, iteration: 334960
loss: 1.0192556381225586,grad_norm: 0.8040747065109192, iteration: 334961
loss: 1.0316832065582275,grad_norm: 0.7287200309618744, iteration: 334962
loss: 1.0248353481292725,grad_norm: 0.8326326035284827, iteration: 334963
loss: 1.018970251083374,grad_norm: 0.9999994743426038, iteration: 334964
loss: 0.9998076558113098,grad_norm: 0.6606702265269835, iteration: 334965
loss: 0.9936791062355042,grad_norm: 0.7433068206519132, iteration: 334966
loss: 1.1075429916381836,grad_norm: 0.9999991750055398, iteration: 334967
loss: 1.0165002346038818,grad_norm: 0.7864646749087535, iteration: 334968
loss: 0.9987882971763611,grad_norm: 0.7207350398691345, iteration: 334969
loss: 0.9819008111953735,grad_norm: 0.9687789410928785, iteration: 334970
loss: 1.3638412952423096,grad_norm: 0.9999994046967562, iteration: 334971
loss: 1.0069535970687866,grad_norm: 0.9999990679861787, iteration: 334972
loss: 1.0028650760650635,grad_norm: 0.8346675612593567, iteration: 334973
loss: 1.0891295671463013,grad_norm: 0.9999991488242795, iteration: 334974
loss: 1.0410280227661133,grad_norm: 0.9999997540558744, iteration: 334975
loss: 1.0056023597717285,grad_norm: 0.8163007277932436, iteration: 334976
loss: 1.0375748872756958,grad_norm: 0.8770153360094326, iteration: 334977
loss: 1.639351487159729,grad_norm: 0.9999997500417518, iteration: 334978
loss: 1.031898856163025,grad_norm: 0.8070868670952253, iteration: 334979
loss: 1.063452124595642,grad_norm: 0.999999207835127, iteration: 334980
loss: 1.019823670387268,grad_norm: 0.9999997277144955, iteration: 334981
loss: 1.1358869075775146,grad_norm: 1.000000018555884, iteration: 334982
loss: 1.1480278968811035,grad_norm: 0.9999996363257642, iteration: 334983
loss: 1.2255700826644897,grad_norm: 0.9999998979530146, iteration: 334984
loss: 1.0383350849151611,grad_norm: 0.9999997747279342, iteration: 334985
loss: 1.0094438791275024,grad_norm: 0.7196393262512317, iteration: 334986
loss: 0.9753338694572449,grad_norm: 0.9695203127985935, iteration: 334987
loss: 0.9969760775566101,grad_norm: 0.8569435227926875, iteration: 334988
loss: 0.9714154005050659,grad_norm: 0.9511731888324768, iteration: 334989
loss: 0.9756971001625061,grad_norm: 0.7399212937670436, iteration: 334990
loss: 1.0215871334075928,grad_norm: 0.8430595619782627, iteration: 334991
loss: 1.3452225923538208,grad_norm: 0.9999998332908081, iteration: 334992
loss: 1.0458520650863647,grad_norm: 0.7418781766040391, iteration: 334993
loss: 1.1617854833602905,grad_norm: 0.9999995307399507, iteration: 334994
loss: 1.0000942945480347,grad_norm: 0.9999996666163777, iteration: 334995
loss: 1.0000786781311035,grad_norm: 0.9048368992513314, iteration: 334996
loss: 0.9922783374786377,grad_norm: 0.7903086209951018, iteration: 334997
loss: 0.9812057614326477,grad_norm: 0.7091633939569593, iteration: 334998
loss: 0.9969046711921692,grad_norm: 0.7915106889020666, iteration: 334999
loss: 1.0330466032028198,grad_norm: 0.9999994892071199, iteration: 335000
loss: 1.1823750734329224,grad_norm: 0.999999674761236, iteration: 335001
loss: 1.0119065046310425,grad_norm: 0.9002737935743022, iteration: 335002
loss: 1.0316245555877686,grad_norm: 0.9999990468256017, iteration: 335003
loss: 0.9908183217048645,grad_norm: 0.9450296227448751, iteration: 335004
loss: 1.0705522298812866,grad_norm: 0.9999996017547976, iteration: 335005
loss: 1.042490005493164,grad_norm: 0.8684653961355627, iteration: 335006
loss: 1.019999623298645,grad_norm: 0.7663085435608791, iteration: 335007
loss: 0.9824603796005249,grad_norm: 0.8179334948161806, iteration: 335008
loss: 1.0229774713516235,grad_norm: 0.7655776217473033, iteration: 335009
loss: 1.046927809715271,grad_norm: 0.7861749332755213, iteration: 335010
loss: 1.038859248161316,grad_norm: 0.8552205196388823, iteration: 335011
loss: 1.0017125606536865,grad_norm: 0.8180765114649864, iteration: 335012
loss: 0.993880569934845,grad_norm: 0.8505407448990667, iteration: 335013
loss: 0.9977067112922668,grad_norm: 1.000000007806743, iteration: 335014
loss: 1.0038166046142578,grad_norm: 0.6509644573148727, iteration: 335015
loss: 1.0512384176254272,grad_norm: 0.7104017185166277, iteration: 335016
loss: 0.9997205138206482,grad_norm: 0.9999992324991733, iteration: 335017
loss: 0.9966742396354675,grad_norm: 0.9999992254203897, iteration: 335018
loss: 1.0000691413879395,grad_norm: 0.7787754914316534, iteration: 335019
loss: 0.9909732937812805,grad_norm: 0.7298132816361876, iteration: 335020
loss: 1.152342438697815,grad_norm: 0.9518913964403652, iteration: 335021
loss: 1.0057569742202759,grad_norm: 0.9999991015994393, iteration: 335022
loss: 1.0073691606521606,grad_norm: 0.8392482667333023, iteration: 335023
loss: 0.9997040033340454,grad_norm: 0.7457205621324272, iteration: 335024
loss: 0.9709504246711731,grad_norm: 0.7253562196013935, iteration: 335025
loss: 0.9919983744621277,grad_norm: 0.748959313530626, iteration: 335026
loss: 0.9715806841850281,grad_norm: 0.8449550603163725, iteration: 335027
loss: 1.0046676397323608,grad_norm: 0.7356087506153816, iteration: 335028
loss: 1.004486083984375,grad_norm: 0.7811811503185834, iteration: 335029
loss: 1.0293819904327393,grad_norm: 0.9999998450189939, iteration: 335030
loss: 1.0394920110702515,grad_norm: 0.9999992186517805, iteration: 335031
loss: 1.0287946462631226,grad_norm: 0.9110184078567707, iteration: 335032
loss: 1.0042325258255005,grad_norm: 0.9424826285484098, iteration: 335033
loss: 0.9939162135124207,grad_norm: 0.8415966574900127, iteration: 335034
loss: 1.012061357498169,grad_norm: 0.9999992711181673, iteration: 335035
loss: 1.0445630550384521,grad_norm: 0.9999998885941767, iteration: 335036
loss: 0.9885859489440918,grad_norm: 0.6721066527081863, iteration: 335037
loss: 0.9998661279678345,grad_norm: 0.8192310523117238, iteration: 335038
loss: 0.9900791645050049,grad_norm: 0.9262106092890224, iteration: 335039
loss: 1.1038615703582764,grad_norm: 0.9348121701136537, iteration: 335040
loss: 1.0473798513412476,grad_norm: 0.9999998610599157, iteration: 335041
loss: 1.0040465593338013,grad_norm: 0.7999816808979967, iteration: 335042
loss: 0.9662221074104309,grad_norm: 0.8674056376741822, iteration: 335043
loss: 1.0018932819366455,grad_norm: 0.7064044455461673, iteration: 335044
loss: 1.0169768333435059,grad_norm: 0.6905160249381526, iteration: 335045
loss: 1.0520427227020264,grad_norm: 0.9999991507618292, iteration: 335046
loss: 1.0524451732635498,grad_norm: 0.9999996260561681, iteration: 335047
loss: 0.9955239295959473,grad_norm: 0.9999992599506587, iteration: 335048
loss: 0.992469310760498,grad_norm: 0.9104144805966412, iteration: 335049
loss: 0.9808292388916016,grad_norm: 0.8460376058778668, iteration: 335050
loss: 0.9957935810089111,grad_norm: 0.9999990883448221, iteration: 335051
loss: 1.0302751064300537,grad_norm: 0.999999324683378, iteration: 335052
loss: 1.0121737718582153,grad_norm: 0.9144440064789909, iteration: 335053
loss: 1.0045651197433472,grad_norm: 0.932724049424894, iteration: 335054
loss: 0.9755219221115112,grad_norm: 0.8166431289170233, iteration: 335055
loss: 0.9998809099197388,grad_norm: 0.7687963962716878, iteration: 335056
loss: 1.0423054695129395,grad_norm: 0.7942824026605805, iteration: 335057
loss: 1.060339331626892,grad_norm: 0.9999994225202198, iteration: 335058
loss: 1.0055204629898071,grad_norm: 0.9084500108237269, iteration: 335059
loss: 0.9937078356742859,grad_norm: 0.8404027643096634, iteration: 335060
loss: 1.0023512840270996,grad_norm: 0.8187007479434494, iteration: 335061
loss: 0.9941291809082031,grad_norm: 0.9999993584300555, iteration: 335062
loss: 0.9838297963142395,grad_norm: 0.7166928564258034, iteration: 335063
loss: 1.0023506879806519,grad_norm: 0.9384354781231434, iteration: 335064
loss: 1.0331257581710815,grad_norm: 0.9999990999957618, iteration: 335065
loss: 1.0153629779815674,grad_norm: 0.7521539176268334, iteration: 335066
loss: 0.9884322881698608,grad_norm: 0.9999991180535859, iteration: 335067
loss: 0.9919347167015076,grad_norm: 0.8647635302687257, iteration: 335068
loss: 1.0250327587127686,grad_norm: 0.8802997439248054, iteration: 335069
loss: 1.0007308721542358,grad_norm: 0.7823352181247145, iteration: 335070
loss: 0.9522538781166077,grad_norm: 0.7695457748779503, iteration: 335071
loss: 0.9787381291389465,grad_norm: 0.8173270278271177, iteration: 335072
loss: 1.0159000158309937,grad_norm: 0.7825456736721061, iteration: 335073
loss: 1.0163952112197876,grad_norm: 0.9946980643887139, iteration: 335074
loss: 1.0152133703231812,grad_norm: 0.6971198529375505, iteration: 335075
loss: 1.0123369693756104,grad_norm: 0.9601124960410442, iteration: 335076
loss: 0.9866277575492859,grad_norm: 0.7806255158290892, iteration: 335077
loss: 0.9817045331001282,grad_norm: 0.7543059751435052, iteration: 335078
loss: 1.0402535200119019,grad_norm: 0.9634878868255506, iteration: 335079
loss: 1.0115677118301392,grad_norm: 0.7794964775560563, iteration: 335080
loss: 0.9591075778007507,grad_norm: 0.9839717815995648, iteration: 335081
loss: 0.9984340667724609,grad_norm: 0.7112593696609738, iteration: 335082
loss: 0.9858549237251282,grad_norm: 0.9999998520996547, iteration: 335083
loss: 0.9861801266670227,grad_norm: 0.9439170852798371, iteration: 335084
loss: 0.9928311109542847,grad_norm: 0.7386036231989643, iteration: 335085
loss: 1.1421499252319336,grad_norm: 0.9999999073468636, iteration: 335086
loss: 1.0246630907058716,grad_norm: 0.9999993538568044, iteration: 335087
loss: 0.9888594150543213,grad_norm: 0.7967898727312164, iteration: 335088
loss: 1.0095880031585693,grad_norm: 0.9999998561062394, iteration: 335089
loss: 0.9792588949203491,grad_norm: 0.8071921217069475, iteration: 335090
loss: 1.068676233291626,grad_norm: 0.9999999940068779, iteration: 335091
loss: 1.0216395854949951,grad_norm: 0.8765635927269518, iteration: 335092
loss: 0.9925031065940857,grad_norm: 0.8246782492248448, iteration: 335093
loss: 1.0004558563232422,grad_norm: 0.9999991244149047, iteration: 335094
loss: 0.974156379699707,grad_norm: 0.8290744764697368, iteration: 335095
loss: 1.0182660818099976,grad_norm: 0.7727532789903965, iteration: 335096
loss: 0.9958244562149048,grad_norm: 0.9999993234697893, iteration: 335097
loss: 1.076050043106079,grad_norm: 0.9999991469843262, iteration: 335098
loss: 0.9776612520217896,grad_norm: 0.8530132256806147, iteration: 335099
loss: 1.0146368741989136,grad_norm: 0.7483453558241278, iteration: 335100
loss: 0.9883497357368469,grad_norm: 0.8202517776002307, iteration: 335101
loss: 1.0192065238952637,grad_norm: 0.9286054453526188, iteration: 335102
loss: 0.9719746708869934,grad_norm: 0.77196759996107, iteration: 335103
loss: 1.0106842517852783,grad_norm: 0.7992451881770172, iteration: 335104
loss: 1.0202444791793823,grad_norm: 0.8509591764882567, iteration: 335105
loss: 1.0068994760513306,grad_norm: 0.7130178731376231, iteration: 335106
loss: 0.9701573848724365,grad_norm: 0.9880821809547659, iteration: 335107
loss: 0.9950308203697205,grad_norm: 0.9999998885042003, iteration: 335108
loss: 1.0587495565414429,grad_norm: 1.0000000370543154, iteration: 335109
loss: 1.0236393213272095,grad_norm: 0.9999990540787921, iteration: 335110
loss: 1.0368764400482178,grad_norm: 0.9999998160376642, iteration: 335111
loss: 1.0119032859802246,grad_norm: 0.8659193617330823, iteration: 335112
loss: 1.0235145092010498,grad_norm: 0.8586003241967484, iteration: 335113
loss: 1.0070297718048096,grad_norm: 0.756464275971879, iteration: 335114
loss: 1.0016686916351318,grad_norm: 0.9488285146791036, iteration: 335115
loss: 1.009172797203064,grad_norm: 0.9152121972102076, iteration: 335116
loss: 0.9913915991783142,grad_norm: 0.8383766733337542, iteration: 335117
loss: 0.9878801107406616,grad_norm: 0.9999992584376576, iteration: 335118
loss: 0.9957408308982849,grad_norm: 0.9074991839173259, iteration: 335119
loss: 1.0200244188308716,grad_norm: 0.7571028172613056, iteration: 335120
loss: 1.0120303630828857,grad_norm: 0.8337918619680653, iteration: 335121
loss: 1.022189736366272,grad_norm: 0.999999108153886, iteration: 335122
loss: 1.2095633745193481,grad_norm: 0.954700771273333, iteration: 335123
loss: 1.007770299911499,grad_norm: 0.887804169310751, iteration: 335124
loss: 0.9762807488441467,grad_norm: 0.8390616260692774, iteration: 335125
loss: 1.0173962116241455,grad_norm: 0.9092463051526763, iteration: 335126
loss: 0.9952314496040344,grad_norm: 0.884615459763474, iteration: 335127
loss: 1.0679417848587036,grad_norm: 0.9999994389413625, iteration: 335128
loss: 0.9962003827095032,grad_norm: 0.8203200183360084, iteration: 335129
loss: 1.0211936235427856,grad_norm: 0.9481050483164809, iteration: 335130
loss: 0.9826515316963196,grad_norm: 0.8660064037299087, iteration: 335131
loss: 0.9880529642105103,grad_norm: 0.743678089121916, iteration: 335132
loss: 1.007954478263855,grad_norm: 0.8936474192134205, iteration: 335133
loss: 0.9631638526916504,grad_norm: 0.9999998427850214, iteration: 335134
loss: 0.9834054112434387,grad_norm: 0.740429981341726, iteration: 335135
loss: 0.988794207572937,grad_norm: 0.9418818878968506, iteration: 335136
loss: 0.9831423759460449,grad_norm: 0.762257712572881, iteration: 335137
loss: 1.0343891382217407,grad_norm: 0.9999990528349888, iteration: 335138
loss: 0.9850397109985352,grad_norm: 0.9999991608435183, iteration: 335139
loss: 1.0509752035140991,grad_norm: 0.9189359117300131, iteration: 335140
loss: 1.0232572555541992,grad_norm: 0.8441792992535957, iteration: 335141
loss: 1.0235683917999268,grad_norm: 0.806772520186108, iteration: 335142
loss: 1.0297585725784302,grad_norm: 0.9279904036171199, iteration: 335143
loss: 1.0977106094360352,grad_norm: 0.9999990659709276, iteration: 335144
loss: 0.9873871803283691,grad_norm: 0.9353125058962394, iteration: 335145
loss: 0.9986388683319092,grad_norm: 0.8633804978787348, iteration: 335146
loss: 1.0294524431228638,grad_norm: 0.9999991434794643, iteration: 335147
loss: 1.0318220853805542,grad_norm: 0.9839932507120175, iteration: 335148
loss: 1.0027495622634888,grad_norm: 0.999999097883021, iteration: 335149
loss: 1.0095630884170532,grad_norm: 0.6337604988316352, iteration: 335150
loss: 0.9608051180839539,grad_norm: 0.7668310600434466, iteration: 335151
loss: 1.0108757019042969,grad_norm: 0.8971458167728783, iteration: 335152
loss: 0.9895136952400208,grad_norm: 0.8009457947861187, iteration: 335153
loss: 0.9639451503753662,grad_norm: 0.7859477793020716, iteration: 335154
loss: 1.0203014612197876,grad_norm: 0.7916015041173542, iteration: 335155
loss: 0.9934937953948975,grad_norm: 0.7162454073480516, iteration: 335156
loss: 0.9935470819473267,grad_norm: 0.7274394192128081, iteration: 335157
loss: 0.9954936504364014,grad_norm: 0.9999990642967711, iteration: 335158
loss: 1.0453763008117676,grad_norm: 0.9083069592999177, iteration: 335159
loss: 1.0317829847335815,grad_norm: 0.9999994382808225, iteration: 335160
loss: 0.9974943995475769,grad_norm: 0.8246482558714512, iteration: 335161
loss: 0.9851481914520264,grad_norm: 0.9999996384382516, iteration: 335162
loss: 1.0107660293579102,grad_norm: 0.9084934564840897, iteration: 335163
loss: 1.0115382671356201,grad_norm: 0.7847066692464237, iteration: 335164
loss: 1.0692106485366821,grad_norm: 0.9999991858469879, iteration: 335165
loss: 0.9815990924835205,grad_norm: 0.7528955485437251, iteration: 335166
loss: 1.104355812072754,grad_norm: 0.8244490791210807, iteration: 335167
loss: 1.0339360237121582,grad_norm: 0.9999991241870214, iteration: 335168
loss: 1.0134549140930176,grad_norm: 0.8792248461313417, iteration: 335169
loss: 1.0016878843307495,grad_norm: 0.8592656208362062, iteration: 335170
loss: 0.9572654366493225,grad_norm: 0.9811656659164465, iteration: 335171
loss: 0.9768342971801758,grad_norm: 0.9999990551666633, iteration: 335172
loss: 1.114555835723877,grad_norm: 0.7413619703330582, iteration: 335173
loss: 1.0360721349716187,grad_norm: 0.999999258119877, iteration: 335174
loss: 1.0076305866241455,grad_norm: 0.8718020000957668, iteration: 335175
loss: 1.0542930364608765,grad_norm: 0.9999993510300642, iteration: 335176
loss: 0.9999310374259949,grad_norm: 0.7485572974836956, iteration: 335177
loss: 0.968127965927124,grad_norm: 0.7861504899580282, iteration: 335178
loss: 1.0435148477554321,grad_norm: 0.9358438146051835, iteration: 335179
loss: 1.0082776546478271,grad_norm: 0.8997766713594866, iteration: 335180
loss: 1.0615038871765137,grad_norm: 0.999999149869789, iteration: 335181
loss: 0.9660314321517944,grad_norm: 0.7999312204643616, iteration: 335182
loss: 1.0036174058914185,grad_norm: 0.9648007100465982, iteration: 335183
loss: 0.9908331632614136,grad_norm: 0.9412330784804555, iteration: 335184
loss: 1.112723469734192,grad_norm: 0.9999991189947511, iteration: 335185
loss: 1.0851967334747314,grad_norm: 0.9999991372549983, iteration: 335186
loss: 1.009723424911499,grad_norm: 0.7662132887962592, iteration: 335187
loss: 0.9773492217063904,grad_norm: 0.7984836025581145, iteration: 335188
loss: 1.0763484239578247,grad_norm: 0.999999140656371, iteration: 335189
loss: 0.9858522415161133,grad_norm: 0.9734991089021594, iteration: 335190
loss: 0.9696163535118103,grad_norm: 0.8386841289279752, iteration: 335191
loss: 1.0048760175704956,grad_norm: 0.8941670406571148, iteration: 335192
loss: 1.010504961013794,grad_norm: 0.6710263256909699, iteration: 335193
loss: 1.0063753128051758,grad_norm: 0.9999990815363661, iteration: 335194
loss: 0.9861180186271667,grad_norm: 0.7559497673289569, iteration: 335195
loss: 1.0670570135116577,grad_norm: 0.9999990135078196, iteration: 335196
loss: 1.0195738077163696,grad_norm: 0.7767866223599906, iteration: 335197
loss: 1.0393891334533691,grad_norm: 0.8929472540787408, iteration: 335198
loss: 1.0615861415863037,grad_norm: 0.9727010570697432, iteration: 335199
loss: 1.0663598775863647,grad_norm: 0.9999993072466109, iteration: 335200
loss: 1.0658924579620361,grad_norm: 0.9599792841869311, iteration: 335201
loss: 1.0174306631088257,grad_norm: 0.9999990564634845, iteration: 335202
loss: 0.9912664890289307,grad_norm: 0.8013273984420306, iteration: 335203
loss: 1.0021185874938965,grad_norm: 0.7813979262617052, iteration: 335204
loss: 1.034887671470642,grad_norm: 0.9999998055346323, iteration: 335205
loss: 1.0265294313430786,grad_norm: 0.99999950765763, iteration: 335206
loss: 1.0029091835021973,grad_norm: 0.7159395006806755, iteration: 335207
loss: 0.9940987825393677,grad_norm: 0.7610972380493779, iteration: 335208
loss: 0.9956139326095581,grad_norm: 0.8234933834555364, iteration: 335209
loss: 0.943271815776825,grad_norm: 0.9407489130198726, iteration: 335210
loss: 0.9682906270027161,grad_norm: 0.7291535172326248, iteration: 335211
loss: 0.9586113095283508,grad_norm: 0.8097877874297315, iteration: 335212
loss: 1.1130449771881104,grad_norm: 0.9999992869392536, iteration: 335213
loss: 0.9586932063102722,grad_norm: 0.9853612556180426, iteration: 335214
loss: 1.0241894721984863,grad_norm: 0.9202368148568598, iteration: 335215
loss: 1.101384162902832,grad_norm: 0.9999998265617875, iteration: 335216
loss: 1.1427867412567139,grad_norm: 0.9999993776307704, iteration: 335217
loss: 1.1453841924667358,grad_norm: 0.9999992779530893, iteration: 335218
loss: 1.1902743577957153,grad_norm: 0.9999999876180203, iteration: 335219
loss: 1.0326226949691772,grad_norm: 0.8833733748796496, iteration: 335220
loss: 1.1656134128570557,grad_norm: 0.99999926192934, iteration: 335221
loss: 1.0327568054199219,grad_norm: 0.8920570144964517, iteration: 335222
loss: 1.178154468536377,grad_norm: 0.9999994417480232, iteration: 335223
loss: 1.0049479007720947,grad_norm: 0.9999993555453981, iteration: 335224
loss: 0.9896297454833984,grad_norm: 0.84512051776476, iteration: 335225
loss: 1.1580066680908203,grad_norm: 0.9999997905115322, iteration: 335226
loss: 1.0076899528503418,grad_norm: 0.7887033594084506, iteration: 335227
loss: 1.0654566287994385,grad_norm: 0.9999998476180725, iteration: 335228
loss: 1.0749331712722778,grad_norm: 0.9455779356202673, iteration: 335229
loss: 1.1324604749679565,grad_norm: 0.8126353226144514, iteration: 335230
loss: 1.109019160270691,grad_norm: 1.0000000129904574, iteration: 335231
loss: 1.1219886541366577,grad_norm: 0.9999993205247327, iteration: 335232
loss: 1.129762053489685,grad_norm: 0.9999998487816416, iteration: 335233
loss: 1.0992323160171509,grad_norm: 0.9999991849737083, iteration: 335234
loss: 1.048862338066101,grad_norm: 0.8016926166103772, iteration: 335235
loss: 1.0678536891937256,grad_norm: 0.9999991024694753, iteration: 335236
loss: 1.0455322265625,grad_norm: 0.9999999283244047, iteration: 335237
loss: 1.055269718170166,grad_norm: 0.9999997831345154, iteration: 335238
loss: 0.9932109713554382,grad_norm: 0.9605633398257503, iteration: 335239
loss: 1.2195799350738525,grad_norm: 0.9999996900819733, iteration: 335240
loss: 1.1223474740982056,grad_norm: 0.9999992336356548, iteration: 335241
loss: 1.032413125038147,grad_norm: 0.9999999662843244, iteration: 335242
loss: 0.9577277898788452,grad_norm: 0.9752539583996519, iteration: 335243
loss: 1.0933963060379028,grad_norm: 0.9999996377789158, iteration: 335244
loss: 0.9727834463119507,grad_norm: 0.9272273911126795, iteration: 335245
loss: 1.1029343605041504,grad_norm: 0.99999955686141, iteration: 335246
loss: 1.0952134132385254,grad_norm: 0.9999998796743995, iteration: 335247
loss: 1.172905445098877,grad_norm: 0.9999994770959839, iteration: 335248
loss: 1.1762793064117432,grad_norm: 1.0000000297546563, iteration: 335249
loss: 1.045791506767273,grad_norm: 0.9166929991784571, iteration: 335250
loss: 1.0045472383499146,grad_norm: 0.7648981140576298, iteration: 335251
loss: 0.9857553839683533,grad_norm: 0.9999993300938882, iteration: 335252
loss: 1.0206804275512695,grad_norm: 0.9999995770432653, iteration: 335253
loss: 0.9825953841209412,grad_norm: 0.9383290036977262, iteration: 335254
loss: 0.9774942994117737,grad_norm: 0.8398939312519227, iteration: 335255
loss: 1.0292637348175049,grad_norm: 0.910647766121214, iteration: 335256
loss: 1.0660375356674194,grad_norm: 0.9999997880060385, iteration: 335257
loss: 1.0511425733566284,grad_norm: 0.9999994099366918, iteration: 335258
loss: 1.096606731414795,grad_norm: 0.9999998268670762, iteration: 335259
loss: 0.9989814162254333,grad_norm: 0.837117926982928, iteration: 335260
loss: 0.9893813729286194,grad_norm: 0.8394652885780678, iteration: 335261
loss: 1.002379298210144,grad_norm: 0.8018250437693031, iteration: 335262
loss: 0.9796959757804871,grad_norm: 0.8937997890893128, iteration: 335263
loss: 0.9842406511306763,grad_norm: 0.8649487061837955, iteration: 335264
loss: 1.0214515924453735,grad_norm: 0.7451811613046226, iteration: 335265
loss: 1.0483027696609497,grad_norm: 0.7671589860199926, iteration: 335266
loss: 0.9568571448326111,grad_norm: 0.8525414349058749, iteration: 335267
loss: 1.0712436437606812,grad_norm: 0.9999990926296684, iteration: 335268
loss: 0.9905020594596863,grad_norm: 0.8162714107264991, iteration: 335269
loss: 1.0061640739440918,grad_norm: 0.885901669246143, iteration: 335270
loss: 1.0279169082641602,grad_norm: 0.9845848204337215, iteration: 335271
loss: 0.9986289143562317,grad_norm: 0.9999996944965138, iteration: 335272
loss: 1.037938117980957,grad_norm: 0.806043523582017, iteration: 335273
loss: 1.0511953830718994,grad_norm: 0.9999998906436423, iteration: 335274
loss: 1.020207166671753,grad_norm: 0.9999991296471993, iteration: 335275
loss: 0.9529179930686951,grad_norm: 0.766499025390938, iteration: 335276
loss: 1.0053150653839111,grad_norm: 0.9531414742512315, iteration: 335277
loss: 0.9871132373809814,grad_norm: 0.7954542707330742, iteration: 335278
loss: 0.9781562685966492,grad_norm: 0.8163900745808995, iteration: 335279
loss: 0.9787688255310059,grad_norm: 0.8072222438909324, iteration: 335280
loss: 1.077104091644287,grad_norm: 0.9089089030325149, iteration: 335281
loss: 1.0080146789550781,grad_norm: 0.7732896338589642, iteration: 335282
loss: 1.0205273628234863,grad_norm: 0.8329293410529771, iteration: 335283
loss: 1.035611867904663,grad_norm: 0.9659401115581822, iteration: 335284
loss: 1.0210233926773071,grad_norm: 0.8497945633873547, iteration: 335285
loss: 1.0426442623138428,grad_norm: 0.9999991033668211, iteration: 335286
loss: 1.0089161396026611,grad_norm: 0.9999991797582154, iteration: 335287
loss: 1.0025900602340698,grad_norm: 0.7421055296780517, iteration: 335288
loss: 1.0178321599960327,grad_norm: 0.9999995652998154, iteration: 335289
loss: 1.0017181634902954,grad_norm: 0.9999996248023196, iteration: 335290
loss: 0.9988190531730652,grad_norm: 0.9999992130649539, iteration: 335291
loss: 0.9951866269111633,grad_norm: 0.8888701553292883, iteration: 335292
loss: 1.0136116743087769,grad_norm: 0.814383522127763, iteration: 335293
loss: 0.9745464324951172,grad_norm: 0.6757617586085582, iteration: 335294
loss: 1.020797610282898,grad_norm: 0.8091856749277495, iteration: 335295
loss: 1.085852861404419,grad_norm: 1.0000000144716013, iteration: 335296
loss: 0.9972546696662903,grad_norm: 0.8396498943680325, iteration: 335297
loss: 0.9719803929328918,grad_norm: 0.7955038358262675, iteration: 335298
loss: 1.0081616640090942,grad_norm: 0.9844530327633596, iteration: 335299
loss: 0.9919020533561707,grad_norm: 0.9397739558080161, iteration: 335300
loss: 1.016095519065857,grad_norm: 0.7352622593610458, iteration: 335301
loss: 1.0164133310317993,grad_norm: 0.8384632799574981, iteration: 335302
loss: 1.0581650733947754,grad_norm: 0.8366687062077393, iteration: 335303
loss: 0.9956390857696533,grad_norm: 0.7363687241588674, iteration: 335304
loss: 0.9599027037620544,grad_norm: 0.7634177655004017, iteration: 335305
loss: 0.9856429100036621,grad_norm: 0.9695987129401991, iteration: 335306
loss: 1.0503615140914917,grad_norm: 0.9999994445702973, iteration: 335307
loss: 1.0650482177734375,grad_norm: 0.9999990931854053, iteration: 335308
loss: 0.990483820438385,grad_norm: 0.7469419985724594, iteration: 335309
loss: 1.0580418109893799,grad_norm: 0.782274362373578, iteration: 335310
loss: 0.9757199287414551,grad_norm: 0.9955902079705489, iteration: 335311
loss: 1.1098675727844238,grad_norm: 0.9999994815901126, iteration: 335312
loss: 1.0005730390548706,grad_norm: 0.8646635004249784, iteration: 335313
loss: 1.0120153427124023,grad_norm: 0.7505772900324688, iteration: 335314
loss: 0.9566858410835266,grad_norm: 0.7332479730253654, iteration: 335315
loss: 0.9775155186653137,grad_norm: 0.907204841838014, iteration: 335316
loss: 1.0107797384262085,grad_norm: 0.9311271728454555, iteration: 335317
loss: 1.0630594491958618,grad_norm: 0.9999990912521577, iteration: 335318
loss: 0.9771802425384521,grad_norm: 0.8040528991456676, iteration: 335319
loss: 1.0137656927108765,grad_norm: 0.8738943072873047, iteration: 335320
loss: 1.0337163209915161,grad_norm: 0.9999991336755059, iteration: 335321
loss: 0.9741957783699036,grad_norm: 0.9999991501089497, iteration: 335322
loss: 0.9763747453689575,grad_norm: 0.9624052387410229, iteration: 335323
loss: 1.0109704732894897,grad_norm: 0.9112940883327205, iteration: 335324
loss: 1.0053510665893555,grad_norm: 0.8308660772965079, iteration: 335325
loss: 0.9983265399932861,grad_norm: 0.7192729672973815, iteration: 335326
loss: 1.022506833076477,grad_norm: 0.9999996463657922, iteration: 335327
loss: 1.0699753761291504,grad_norm: 0.9999998859933203, iteration: 335328
loss: 0.9990911483764648,grad_norm: 0.7857666955202423, iteration: 335329
loss: 0.9694563746452332,grad_norm: 0.8749563613142481, iteration: 335330
loss: 1.0303986072540283,grad_norm: 0.8906639614513773, iteration: 335331
loss: 1.0323643684387207,grad_norm: 0.9999998505557673, iteration: 335332
loss: 1.0046250820159912,grad_norm: 0.7207035569304912, iteration: 335333
loss: 0.9651654958724976,grad_norm: 0.8138477861136818, iteration: 335334
loss: 0.9903735518455505,grad_norm: 0.8980834095771675, iteration: 335335
loss: 0.9918984770774841,grad_norm: 0.8950021285456332, iteration: 335336
loss: 1.0009123086929321,grad_norm: 0.9997853849485961, iteration: 335337
loss: 1.0224634408950806,grad_norm: 0.812589148061121, iteration: 335338
loss: 0.9754500985145569,grad_norm: 0.8253537801522997, iteration: 335339
loss: 1.0269137620925903,grad_norm: 0.9999998412299858, iteration: 335340
loss: 1.1127574443817139,grad_norm: 0.9999994743425752, iteration: 335341
loss: 1.0034745931625366,grad_norm: 0.9942611773678339, iteration: 335342
loss: 0.986298680305481,grad_norm: 0.9418709437555922, iteration: 335343
loss: 0.9760759472846985,grad_norm: 0.7449254296050118, iteration: 335344
loss: 1.0276602506637573,grad_norm: 0.8727418840547484, iteration: 335345
loss: 1.2590751647949219,grad_norm: 0.9999999420301298, iteration: 335346
loss: 0.9849350452423096,grad_norm: 0.8527729182972175, iteration: 335347
loss: 1.0495001077651978,grad_norm: 0.986871936823496, iteration: 335348
loss: 0.991699755191803,grad_norm: 0.8695434048906121, iteration: 335349
loss: 1.1241586208343506,grad_norm: 0.9999994428513512, iteration: 335350
loss: 1.0096418857574463,grad_norm: 0.8169354031129167, iteration: 335351
loss: 1.0069149732589722,grad_norm: 0.8918067185887808, iteration: 335352
loss: 0.9992900490760803,grad_norm: 0.8439172714071929, iteration: 335353
loss: 1.0132132768630981,grad_norm: 0.6968938539797848, iteration: 335354
loss: 1.0415303707122803,grad_norm: 0.6992088122852742, iteration: 335355
loss: 1.1043223142623901,grad_norm: 0.9999991619366779, iteration: 335356
loss: 1.0513533353805542,grad_norm: 0.9999992525490101, iteration: 335357
loss: 1.0244979858398438,grad_norm: 0.7831214684799817, iteration: 335358
loss: 1.015994906425476,grad_norm: 0.9431329429136559, iteration: 335359
loss: 1.039367437362671,grad_norm: 0.9532525060508256, iteration: 335360
loss: 0.9831631779670715,grad_norm: 0.910194214847134, iteration: 335361
loss: 1.0454715490341187,grad_norm: 0.9999990918697387, iteration: 335362
loss: 0.9772844910621643,grad_norm: 0.787171693276817, iteration: 335363
loss: 1.2121115922927856,grad_norm: 1.0000000358419892, iteration: 335364
loss: 1.0089585781097412,grad_norm: 0.7790154592499531, iteration: 335365
loss: 1.0080636739730835,grad_norm: 0.8441069660605873, iteration: 335366
loss: 1.0127373933792114,grad_norm: 0.7671716968498441, iteration: 335367
loss: 0.960252583026886,grad_norm: 0.8302649564119339, iteration: 335368
loss: 1.0311870574951172,grad_norm: 0.762777051635611, iteration: 335369
loss: 1.024446725845337,grad_norm: 0.739122925848448, iteration: 335370
loss: 0.9793208837509155,grad_norm: 0.7939278141005985, iteration: 335371
loss: 1.0211231708526611,grad_norm: 0.780685946848315, iteration: 335372
loss: 1.0178866386413574,grad_norm: 0.940763171234687, iteration: 335373
loss: 1.0214972496032715,grad_norm: 0.9999998811861435, iteration: 335374
loss: 0.9941667318344116,grad_norm: 0.8630467435422315, iteration: 335375
loss: 1.0082402229309082,grad_norm: 0.9253750713960157, iteration: 335376
loss: 1.024482011795044,grad_norm: 0.9999999900982659, iteration: 335377
loss: 1.0405220985412598,grad_norm: 0.9999999711168273, iteration: 335378
loss: 1.0029184818267822,grad_norm: 0.8669317905607616, iteration: 335379
loss: 1.0772799253463745,grad_norm: 0.999999590148214, iteration: 335380
loss: 1.0303874015808105,grad_norm: 0.7630805551732133, iteration: 335381
loss: 1.014430046081543,grad_norm: 0.7988352318229854, iteration: 335382
loss: 1.0669986009597778,grad_norm: 0.9999998552506013, iteration: 335383
loss: 1.1079729795455933,grad_norm: 0.9999991471736714, iteration: 335384
loss: 0.9914116263389587,grad_norm: 0.8291927084096443, iteration: 335385
loss: 1.0035455226898193,grad_norm: 0.7847785371824065, iteration: 335386
loss: 1.0197588205337524,grad_norm: 0.9168716198035417, iteration: 335387
loss: 1.0098968744277954,grad_norm: 0.7384236622160615, iteration: 335388
loss: 1.0076425075531006,grad_norm: 0.9861734963529276, iteration: 335389
loss: 1.0580065250396729,grad_norm: 0.9999996943492223, iteration: 335390
loss: 1.0092912912368774,grad_norm: 0.9429978330308257, iteration: 335391
loss: 1.0029314756393433,grad_norm: 0.9214876803606179, iteration: 335392
loss: 0.9975691437721252,grad_norm: 0.8638569097060516, iteration: 335393
loss: 0.9622713923454285,grad_norm: 0.8850513991454765, iteration: 335394
loss: 1.0046677589416504,grad_norm: 0.9999990694204002, iteration: 335395
loss: 1.1206448078155518,grad_norm: 0.999999587967572, iteration: 335396
loss: 1.0798434019088745,grad_norm: 0.946456573862219, iteration: 335397
loss: 1.2618962526321411,grad_norm: 0.999999109783194, iteration: 335398
loss: 1.0029658079147339,grad_norm: 0.999999093946277, iteration: 335399
loss: 0.9889335036277771,grad_norm: 0.9999991681029059, iteration: 335400
loss: 1.0213021039962769,grad_norm: 0.8166652777245116, iteration: 335401
loss: 1.0837831497192383,grad_norm: 0.9999998865561792, iteration: 335402
loss: 1.0489751100540161,grad_norm: 0.999999406100624, iteration: 335403
loss: 1.0400938987731934,grad_norm: 0.9999991643790945, iteration: 335404
loss: 1.0399751663208008,grad_norm: 0.7923417603732179, iteration: 335405
loss: 0.9928299784660339,grad_norm: 0.9657940080247104, iteration: 335406
loss: 1.0406049489974976,grad_norm: 0.9999991493648835, iteration: 335407
loss: 0.9729059934616089,grad_norm: 0.8885425851560868, iteration: 335408
loss: 0.9823026061058044,grad_norm: 0.7989912816636208, iteration: 335409
loss: 1.0214146375656128,grad_norm: 0.9999990622636027, iteration: 335410
loss: 1.005878210067749,grad_norm: 0.7917882245543346, iteration: 335411
loss: 1.0391360521316528,grad_norm: 0.9999994337344091, iteration: 335412
loss: 1.0198798179626465,grad_norm: 0.9334598699035215, iteration: 335413
loss: 1.001123070716858,grad_norm: 0.9162135126557298, iteration: 335414
loss: 1.0530298948287964,grad_norm: 0.999999288752047, iteration: 335415
loss: 0.995146632194519,grad_norm: 0.9120134268498917, iteration: 335416
loss: 1.0022106170654297,grad_norm: 0.8238193686029595, iteration: 335417
loss: 1.0239845514297485,grad_norm: 0.9999995905083907, iteration: 335418
loss: 1.0233099460601807,grad_norm: 0.8006594880315542, iteration: 335419
loss: 0.9991446733474731,grad_norm: 0.952522742897414, iteration: 335420
loss: 1.1634738445281982,grad_norm: 0.9999997130223901, iteration: 335421
loss: 0.9764924645423889,grad_norm: 0.9533409301577316, iteration: 335422
loss: 0.9638040661811829,grad_norm: 0.8414334882935232, iteration: 335423
loss: 1.011549472808838,grad_norm: 0.7482842144793022, iteration: 335424
loss: 0.9949061274528503,grad_norm: 0.865700090723319, iteration: 335425
loss: 0.9910880327224731,grad_norm: 0.930365303233756, iteration: 335426
loss: 0.9847011566162109,grad_norm: 0.8287367321716603, iteration: 335427
loss: 1.0310382843017578,grad_norm: 0.9999990108104257, iteration: 335428
loss: 0.9945801496505737,grad_norm: 0.7815615160499275, iteration: 335429
loss: 1.1104342937469482,grad_norm: 0.9999994380303112, iteration: 335430
loss: 1.026602029800415,grad_norm: 0.999999163574051, iteration: 335431
loss: 0.9645711779594421,grad_norm: 0.9999992389855502, iteration: 335432
loss: 1.0071355104446411,grad_norm: 0.9999993323780095, iteration: 335433
loss: 1.097709059715271,grad_norm: 0.9999995939607096, iteration: 335434
loss: 1.0703258514404297,grad_norm: 0.9999999901397751, iteration: 335435
loss: 0.983476996421814,grad_norm: 0.8631823555317244, iteration: 335436
loss: 0.9926440715789795,grad_norm: 0.9999993034898552, iteration: 335437
loss: 0.997934877872467,grad_norm: 0.9999991501042589, iteration: 335438
loss: 1.0043542385101318,grad_norm: 0.9999999291941898, iteration: 335439
loss: 0.9733842611312866,grad_norm: 0.8216541728060338, iteration: 335440
loss: 0.9912381768226624,grad_norm: 0.6840077219485368, iteration: 335441
loss: 0.9635699391365051,grad_norm: 0.9581823453474321, iteration: 335442
loss: 0.9932814240455627,grad_norm: 0.9019734939386983, iteration: 335443
loss: 0.9985612034797668,grad_norm: 0.9999990488034336, iteration: 335444
loss: 1.0300508737564087,grad_norm: 0.9999997733242963, iteration: 335445
loss: 1.0204263925552368,grad_norm: 0.9999993296889904, iteration: 335446
loss: 0.9730780124664307,grad_norm: 0.722341430600852, iteration: 335447
loss: 1.0404695272445679,grad_norm: 0.9999991837388205, iteration: 335448
loss: 1.0006887912750244,grad_norm: 0.9158942873090944, iteration: 335449
loss: 0.9904863834381104,grad_norm: 0.7061815361395265, iteration: 335450
loss: 1.1642744541168213,grad_norm: 0.9999994595497887, iteration: 335451
loss: 1.0036147832870483,grad_norm: 0.9999997089880542, iteration: 335452
loss: 1.0167038440704346,grad_norm: 0.9999991684431645, iteration: 335453
loss: 1.0033388137817383,grad_norm: 0.9999999269664829, iteration: 335454
loss: 1.1087424755096436,grad_norm: 0.9999997894333138, iteration: 335455
loss: 1.0208553075790405,grad_norm: 0.9062685972232135, iteration: 335456
loss: 0.9954458475112915,grad_norm: 0.9999999259115655, iteration: 335457
loss: 1.0262869596481323,grad_norm: 0.7953201586076054, iteration: 335458
loss: 1.0292178392410278,grad_norm: 0.9781671659603941, iteration: 335459
loss: 1.0443922281265259,grad_norm: 0.9999999776647712, iteration: 335460
loss: 0.9759754538536072,grad_norm: 0.7417130709699804, iteration: 335461
loss: 1.0142135620117188,grad_norm: 0.8925922996986062, iteration: 335462
loss: 1.0600924491882324,grad_norm: 0.999999781223378, iteration: 335463
loss: 0.9906548857688904,grad_norm: 0.9485563658650076, iteration: 335464
loss: 1.017622709274292,grad_norm: 0.834961922635392, iteration: 335465
loss: 0.9933499097824097,grad_norm: 0.7128551066353976, iteration: 335466
loss: 1.0515896081924438,grad_norm: 0.8320242902722784, iteration: 335467
loss: 1.02937912940979,grad_norm: 0.871220210634869, iteration: 335468
loss: 1.0279396772384644,grad_norm: 0.8694479791073092, iteration: 335469
loss: 1.0024391412734985,grad_norm: 0.7556351482919382, iteration: 335470
loss: 1.2531042098999023,grad_norm: 0.999999413733261, iteration: 335471
loss: 1.0493735074996948,grad_norm: 0.9562935146561377, iteration: 335472
loss: 1.093338966369629,grad_norm: 0.9999990401613741, iteration: 335473
loss: 1.042677402496338,grad_norm: 0.9999999925660521, iteration: 335474
loss: 1.015920877456665,grad_norm: 0.7297315136991053, iteration: 335475
loss: 1.04128098487854,grad_norm: 0.9999992128570684, iteration: 335476
loss: 1.018434762954712,grad_norm: 0.9999996040052005, iteration: 335477
loss: 0.9888525009155273,grad_norm: 0.9999996361184692, iteration: 335478
loss: 0.9987404346466064,grad_norm: 0.9999993058672052, iteration: 335479
loss: 0.9902351498603821,grad_norm: 0.7388247790545545, iteration: 335480
loss: 0.9993120431900024,grad_norm: 0.9999991045671066, iteration: 335481
loss: 1.0059057474136353,grad_norm: 0.8157135733251734, iteration: 335482
loss: 1.0347309112548828,grad_norm: 0.8451482425979693, iteration: 335483
loss: 1.0199849605560303,grad_norm: 0.8069541137220186, iteration: 335484
loss: 1.1716395616531372,grad_norm: 0.9999998006869503, iteration: 335485
loss: 0.9286812543869019,grad_norm: 0.999999471320563, iteration: 335486
loss: 0.9887977242469788,grad_norm: 0.9999991109970361, iteration: 335487
loss: 1.0354952812194824,grad_norm: 0.7498988032034765, iteration: 335488
loss: 1.0011104345321655,grad_norm: 0.9999992077340782, iteration: 335489
loss: 1.004022479057312,grad_norm: 0.7701035399190359, iteration: 335490
loss: 1.020336389541626,grad_norm: 0.9999990204172097, iteration: 335491
loss: 1.0197696685791016,grad_norm: 0.8926325284805017, iteration: 335492
loss: 1.0795108079910278,grad_norm: 0.9999993292383238, iteration: 335493
loss: 1.1148669719696045,grad_norm: 0.9999997499904464, iteration: 335494
loss: 1.1196444034576416,grad_norm: 0.9999997617339293, iteration: 335495
loss: 0.9982307553291321,grad_norm: 0.74827953047496, iteration: 335496
loss: 1.0980623960494995,grad_norm: 0.8204738726585921, iteration: 335497
loss: 1.0235875844955444,grad_norm: 0.8070507370629902, iteration: 335498
loss: 1.0969520807266235,grad_norm: 0.9999992237560077, iteration: 335499
loss: 0.9899036884307861,grad_norm: 0.7739727503388467, iteration: 335500
loss: 0.9794207215309143,grad_norm: 0.8207593693268157, iteration: 335501
loss: 1.0188262462615967,grad_norm: 0.77195272330734, iteration: 335502
loss: 1.0690687894821167,grad_norm: 0.853558928997363, iteration: 335503
loss: 1.0164521932601929,grad_norm: 0.9999991057132639, iteration: 335504
loss: 1.0356518030166626,grad_norm: 0.9716618163718976, iteration: 335505
loss: 1.0079046487808228,grad_norm: 0.8537278735151782, iteration: 335506
loss: 1.035095453262329,grad_norm: 0.9668602961721512, iteration: 335507
loss: 1.043418288230896,grad_norm: 0.9256158296164744, iteration: 335508
loss: 1.115755558013916,grad_norm: 0.9464443757626281, iteration: 335509
loss: 0.9623710513114929,grad_norm: 0.8381899840500149, iteration: 335510
loss: 0.9873831272125244,grad_norm: 0.8125377790203194, iteration: 335511
loss: 1.043559193611145,grad_norm: 0.9999998655029534, iteration: 335512
loss: 1.0200570821762085,grad_norm: 0.8656405029116052, iteration: 335513
loss: 1.0173896551132202,grad_norm: 0.999999039775414, iteration: 335514
loss: 0.9930598139762878,grad_norm: 0.8291823986766771, iteration: 335515
loss: 1.0187901258468628,grad_norm: 0.9999991970819745, iteration: 335516
loss: 0.9816162586212158,grad_norm: 0.8502556576421675, iteration: 335517
loss: 1.0432976484298706,grad_norm: 0.9019730383343114, iteration: 335518
loss: 0.9794034957885742,grad_norm: 0.8563495228438436, iteration: 335519
loss: 1.0500767230987549,grad_norm: 0.9999991140009811, iteration: 335520
loss: 1.017914056777954,grad_norm: 0.8718092651233323, iteration: 335521
loss: 0.9647584557533264,grad_norm: 0.7677300329148293, iteration: 335522
loss: 1.0003339052200317,grad_norm: 0.7995538882128241, iteration: 335523
loss: 0.991273820400238,grad_norm: 0.999999445118843, iteration: 335524
loss: 1.0865721702575684,grad_norm: 0.9999997258489349, iteration: 335525
loss: 1.1593796014785767,grad_norm: 0.9999992577801979, iteration: 335526
loss: 1.0187187194824219,grad_norm: 0.9999993887108407, iteration: 335527
loss: 0.9718117117881775,grad_norm: 0.9999993466004891, iteration: 335528
loss: 1.0846744775772095,grad_norm: 0.8498866392212833, iteration: 335529
loss: 0.9680737853050232,grad_norm: 0.8737235964523816, iteration: 335530
loss: 0.9730375409126282,grad_norm: 0.7577529538243482, iteration: 335531
loss: 1.0318199396133423,grad_norm: 0.9999998783929679, iteration: 335532
loss: 1.0563324689865112,grad_norm: 0.9999995143406337, iteration: 335533
loss: 0.9977291226387024,grad_norm: 0.9999991773555547, iteration: 335534
loss: 0.990145742893219,grad_norm: 0.7398354350500455, iteration: 335535
loss: 1.0290595293045044,grad_norm: 0.9999993214353295, iteration: 335536
loss: 0.9516171216964722,grad_norm: 0.8607935508453588, iteration: 335537
loss: 1.0227410793304443,grad_norm: 0.9007612299727729, iteration: 335538
loss: 0.9904274940490723,grad_norm: 0.791129823548008, iteration: 335539
loss: 0.9878373146057129,grad_norm: 0.8975635110392759, iteration: 335540
loss: 1.1537877321243286,grad_norm: 0.9999996404962261, iteration: 335541
loss: 1.067654013633728,grad_norm: 0.9671272674279356, iteration: 335542
loss: 1.0884841680526733,grad_norm: 0.9999998992729254, iteration: 335543
loss: 0.99848473072052,grad_norm: 0.8009616404609154, iteration: 335544
loss: 1.0492924451828003,grad_norm: 0.9512822644550415, iteration: 335545
loss: 1.0524101257324219,grad_norm: 0.9999992006678021, iteration: 335546
loss: 0.9965168833732605,grad_norm: 0.8576489185608233, iteration: 335547
loss: 1.1013855934143066,grad_norm: 0.9999991027795855, iteration: 335548
loss: 1.0422463417053223,grad_norm: 0.999999561370972, iteration: 335549
loss: 0.9815141558647156,grad_norm: 0.6522871269713013, iteration: 335550
loss: 1.049537181854248,grad_norm: 0.999999477648584, iteration: 335551
loss: 1.117081642150879,grad_norm: 0.9999994849830641, iteration: 335552
loss: 1.0287666320800781,grad_norm: 0.7144485098640478, iteration: 335553
loss: 1.0176514387130737,grad_norm: 0.8605545479994855, iteration: 335554
loss: 0.9838875532150269,grad_norm: 0.7569552694368176, iteration: 335555
loss: 1.0048807859420776,grad_norm: 0.9999998918917664, iteration: 335556
loss: 1.0053383111953735,grad_norm: 0.8411184181391123, iteration: 335557
loss: 0.9765344858169556,grad_norm: 0.9999989968140016, iteration: 335558
loss: 1.0419646501541138,grad_norm: 0.9999994615656155, iteration: 335559
loss: 1.0035804510116577,grad_norm: 0.8235286248358605, iteration: 335560
loss: 1.028070330619812,grad_norm: 0.9494968356516436, iteration: 335561
loss: 0.9612388014793396,grad_norm: 0.8953247659718299, iteration: 335562
loss: 1.0500587224960327,grad_norm: 0.9999992604348751, iteration: 335563
loss: 0.9476498961448669,grad_norm: 0.8150890993102426, iteration: 335564
loss: 0.9902371764183044,grad_norm: 0.9082914249751268, iteration: 335565
loss: 0.9940477013587952,grad_norm: 0.937073060357182, iteration: 335566
loss: 1.0327774286270142,grad_norm: 0.8733808908276146, iteration: 335567
loss: 1.0283581018447876,grad_norm: 0.7317633961987708, iteration: 335568
loss: 0.9346653819084167,grad_norm: 0.7321124101647836, iteration: 335569
loss: 0.999444305896759,grad_norm: 0.9999993311152726, iteration: 335570
loss: 1.1217188835144043,grad_norm: 0.9999999965092679, iteration: 335571
loss: 1.0235233306884766,grad_norm: 0.7961761958075996, iteration: 335572
loss: 1.0215331315994263,grad_norm: 0.7927855889404457, iteration: 335573
loss: 0.9906164407730103,grad_norm: 0.898963714233832, iteration: 335574
loss: 1.012715220451355,grad_norm: 0.734460599859654, iteration: 335575
loss: 1.0605401992797852,grad_norm: 0.9999994576425536, iteration: 335576
loss: 0.966100811958313,grad_norm: 0.835524118113877, iteration: 335577
loss: 0.9941820502281189,grad_norm: 0.8561725007044978, iteration: 335578
loss: 0.995112419128418,grad_norm: 0.9999989764634166, iteration: 335579
loss: 0.9954033493995667,grad_norm: 0.8252307347060175, iteration: 335580
loss: 1.0214309692382812,grad_norm: 0.8240861021425047, iteration: 335581
loss: 0.9730091691017151,grad_norm: 0.8709203714157442, iteration: 335582
loss: 1.0026681423187256,grad_norm: 0.9999997759146351, iteration: 335583
loss: 1.1274648904800415,grad_norm: 0.9999998015990488, iteration: 335584
loss: 1.1925325393676758,grad_norm: 0.9999998363809873, iteration: 335585
loss: 1.0172948837280273,grad_norm: 0.9943704758871662, iteration: 335586
loss: 1.0085813999176025,grad_norm: 0.8663363032003856, iteration: 335587
loss: 1.0495175123214722,grad_norm: 0.874648222171748, iteration: 335588
loss: 0.9940394759178162,grad_norm: 0.8466308717186278, iteration: 335589
loss: 1.0713379383087158,grad_norm: 0.9999997543359316, iteration: 335590
loss: 0.9844384789466858,grad_norm: 0.6900551357286567, iteration: 335591
loss: 1.0081288814544678,grad_norm: 0.999999462345236, iteration: 335592
loss: 1.0536442995071411,grad_norm: 0.9706255683745265, iteration: 335593
loss: 1.0378798246383667,grad_norm: 0.9999994928926309, iteration: 335594
loss: 1.0108013153076172,grad_norm: 0.6532109339026119, iteration: 335595
loss: 0.9740269184112549,grad_norm: 0.6438682580901829, iteration: 335596
loss: 1.0353413820266724,grad_norm: 0.9999998764850286, iteration: 335597
loss: 1.025152564048767,grad_norm: 0.7883986756369447, iteration: 335598
loss: 1.0318015813827515,grad_norm: 0.7700744100953227, iteration: 335599
loss: 1.0250813961029053,grad_norm: 0.829064016284385, iteration: 335600
loss: 0.9867972731590271,grad_norm: 0.9999998521712458, iteration: 335601
loss: 1.0769739151000977,grad_norm: 0.999999983800009, iteration: 335602
loss: 1.0219500064849854,grad_norm: 0.8825147446663477, iteration: 335603
loss: 1.045711874961853,grad_norm: 0.9999997454550683, iteration: 335604
loss: 0.9852572679519653,grad_norm: 0.9999996876084359, iteration: 335605
loss: 1.0976707935333252,grad_norm: 0.9050761150018735, iteration: 335606
loss: 1.0270345211029053,grad_norm: 0.9999996585031854, iteration: 335607
loss: 1.1375080347061157,grad_norm: 0.9999999257491228, iteration: 335608
loss: 1.0268449783325195,grad_norm: 0.9999993895378824, iteration: 335609
loss: 1.030838966369629,grad_norm: 0.7798366053215553, iteration: 335610
loss: 0.9959844350814819,grad_norm: 0.9999991449409869, iteration: 335611
loss: 0.9775905609130859,grad_norm: 0.7073831071718798, iteration: 335612
loss: 1.0771771669387817,grad_norm: 0.9999997236623586, iteration: 335613
loss: 1.087681770324707,grad_norm: 0.9999992276549097, iteration: 335614
loss: 1.0168421268463135,grad_norm: 0.7614319241354589, iteration: 335615
loss: 1.0737086534500122,grad_norm: 0.9999998794437102, iteration: 335616
loss: 0.9969138503074646,grad_norm: 0.8214175947185339, iteration: 335617
loss: 0.9893957376480103,grad_norm: 0.9999990713826034, iteration: 335618
loss: 1.0726275444030762,grad_norm: 0.9999999637331998, iteration: 335619
loss: 1.0000569820404053,grad_norm: 0.7547975235913511, iteration: 335620
loss: 0.9903149008750916,grad_norm: 0.8142289154188123, iteration: 335621
loss: 1.052365779876709,grad_norm: 0.9622081766043884, iteration: 335622
loss: 0.9812265634536743,grad_norm: 0.9999993131529854, iteration: 335623
loss: 1.0342421531677246,grad_norm: 0.9999999225105592, iteration: 335624
loss: 0.9956879615783691,grad_norm: 0.7171228814394276, iteration: 335625
loss: 1.0586230754852295,grad_norm: 0.9999995717002844, iteration: 335626
loss: 1.0491201877593994,grad_norm: 0.8586948890093712, iteration: 335627
loss: 1.0404893159866333,grad_norm: 0.6938238313641019, iteration: 335628
loss: 0.9944459199905396,grad_norm: 0.9077241454362095, iteration: 335629
loss: 1.0822373628616333,grad_norm: 0.9999998534022184, iteration: 335630
loss: 1.0206619501113892,grad_norm: 0.9103870353857435, iteration: 335631
loss: 1.0971527099609375,grad_norm: 0.9999999960155753, iteration: 335632
loss: 1.0117443799972534,grad_norm: 0.7027537195557745, iteration: 335633
loss: 1.0683033466339111,grad_norm: 0.9999997285897159, iteration: 335634
loss: 0.9890235662460327,grad_norm: 0.8045449299528369, iteration: 335635
loss: 0.9940453171730042,grad_norm: 0.999999072127538, iteration: 335636
loss: 1.0909913778305054,grad_norm: 0.9999995976265383, iteration: 335637
loss: 1.0370243787765503,grad_norm: 0.9999992073412737, iteration: 335638
loss: 1.0187807083129883,grad_norm: 0.9999991097363283, iteration: 335639
loss: 1.0304425954818726,grad_norm: 0.7631491489981971, iteration: 335640
loss: 1.1063238382339478,grad_norm: 0.9999997694707947, iteration: 335641
loss: 1.106072187423706,grad_norm: 0.9999993094865072, iteration: 335642
loss: 0.9599172472953796,grad_norm: 0.8580855411331128, iteration: 335643
loss: 0.9571930766105652,grad_norm: 0.9999992555704259, iteration: 335644
loss: 1.0781126022338867,grad_norm: 0.8144909852373874, iteration: 335645
loss: 1.1151491403579712,grad_norm: 0.9999993744364112, iteration: 335646
loss: 1.0388679504394531,grad_norm: 0.9999999820969452, iteration: 335647
loss: 1.101291537284851,grad_norm: 0.9999991601788766, iteration: 335648
loss: 0.992179274559021,grad_norm: 0.9999996639112431, iteration: 335649
loss: 1.3966389894485474,grad_norm: 0.9999996329186818, iteration: 335650
loss: 1.2552380561828613,grad_norm: 0.9999999016047174, iteration: 335651
loss: 1.1558688879013062,grad_norm: 1.0000000854926685, iteration: 335652
loss: 1.0288033485412598,grad_norm: 0.9999992784933578, iteration: 335653
loss: 1.0274072885513306,grad_norm: 0.7220023740050943, iteration: 335654
loss: 0.974181056022644,grad_norm: 0.7558011689354079, iteration: 335655
loss: 0.9991042017936707,grad_norm: 0.9999989895953133, iteration: 335656
loss: 1.1253623962402344,grad_norm: 0.9999998484612513, iteration: 335657
loss: 1.1094274520874023,grad_norm: 0.9999999928248363, iteration: 335658
loss: 1.0483053922653198,grad_norm: 0.9999997544463313, iteration: 335659
loss: 1.2885798215866089,grad_norm: 0.9999999619856638, iteration: 335660
loss: 1.114376187324524,grad_norm: 0.9999998693016344, iteration: 335661
loss: 1.4125447273254395,grad_norm: 0.9999998935328923, iteration: 335662
loss: 1.181423306465149,grad_norm: 0.99999971322911, iteration: 335663
loss: 1.1323397159576416,grad_norm: 0.999999352900305, iteration: 335664
loss: 1.0555120706558228,grad_norm: 0.99999999657808, iteration: 335665
loss: 1.216552972793579,grad_norm: 1.0000000053884208, iteration: 335666
loss: 1.1461938619613647,grad_norm: 0.9999994621427054, iteration: 335667
loss: 1.1617628335952759,grad_norm: 0.9999996207083143, iteration: 335668
loss: 1.219799280166626,grad_norm: 0.999999802326806, iteration: 335669
loss: 1.3001281023025513,grad_norm: 0.9999999297379093, iteration: 335670
loss: 1.156011939048767,grad_norm: 0.9999996946052832, iteration: 335671
loss: 1.0724729299545288,grad_norm: 0.9999990514775655, iteration: 335672
loss: 0.9985888004302979,grad_norm: 0.8472372015410139, iteration: 335673
loss: 1.1538994312286377,grad_norm: 0.9999998705024024, iteration: 335674
loss: 1.0403324365615845,grad_norm: 0.9999996469013207, iteration: 335675
loss: 1.1281471252441406,grad_norm: 0.9999992910575938, iteration: 335676
loss: 1.1239370107650757,grad_norm: 0.9999998028939457, iteration: 335677
loss: 1.0205235481262207,grad_norm: 0.9999993836633767, iteration: 335678
loss: 1.0076531171798706,grad_norm: 0.8400415441196573, iteration: 335679
loss: 1.026157021522522,grad_norm: 0.7823826363356159, iteration: 335680
loss: 1.0858803987503052,grad_norm: 0.9999994959254819, iteration: 335681
loss: 1.1082301139831543,grad_norm: 0.9999995605992057, iteration: 335682
loss: 1.027486801147461,grad_norm: 0.8144374898390613, iteration: 335683
loss: 0.9923495650291443,grad_norm: 0.8182340643242093, iteration: 335684
loss: 1.0489106178283691,grad_norm: 0.9999995271612588, iteration: 335685
loss: 0.9909918904304504,grad_norm: 0.7658049713810668, iteration: 335686
loss: 1.053879976272583,grad_norm: 0.9999992178007193, iteration: 335687
loss: 1.053055763244629,grad_norm: 0.9999995773500074, iteration: 335688
loss: 1.0335984230041504,grad_norm: 0.9999992783099896, iteration: 335689
loss: 1.0685045719146729,grad_norm: 0.9999991418613405, iteration: 335690
loss: 1.064896583557129,grad_norm: 0.8913569881292484, iteration: 335691
loss: 1.0326000452041626,grad_norm: 0.9999990482006074, iteration: 335692
loss: 1.044005274772644,grad_norm: 0.999999746033343, iteration: 335693
loss: 1.0228770971298218,grad_norm: 0.9999994037125978, iteration: 335694
loss: 1.0460253953933716,grad_norm: 0.9999998202331328, iteration: 335695
loss: 1.0552570819854736,grad_norm: 0.9999993482116937, iteration: 335696
loss: 1.0329241752624512,grad_norm: 0.9999999420144848, iteration: 335697
loss: 1.0377802848815918,grad_norm: 0.9999993984005339, iteration: 335698
loss: 0.9805564284324646,grad_norm: 0.9923353070907036, iteration: 335699
loss: 0.9667555689811707,grad_norm: 0.8567960227836112, iteration: 335700
loss: 1.012542486190796,grad_norm: 0.999999382940376, iteration: 335701
loss: 1.1380091905593872,grad_norm: 0.9999998943488819, iteration: 335702
loss: 0.9978212714195251,grad_norm: 0.8509732814775226, iteration: 335703
loss: 1.04567289352417,grad_norm: 0.9553169242137777, iteration: 335704
loss: 1.015312910079956,grad_norm: 0.8634391405971965, iteration: 335705
loss: 1.019257664680481,grad_norm: 0.9999991225933971, iteration: 335706
loss: 1.0789470672607422,grad_norm: 0.9705558038592834, iteration: 335707
loss: 1.0929044485092163,grad_norm: 0.8457863980836059, iteration: 335708
loss: 0.995676577091217,grad_norm: 0.9999997168741667, iteration: 335709
loss: 0.9923714399337769,grad_norm: 0.9999999708128141, iteration: 335710
loss: 0.9683135747909546,grad_norm: 0.7565785074122762, iteration: 335711
loss: 1.0258128643035889,grad_norm: 0.9395769042421954, iteration: 335712
loss: 0.9968849420547485,grad_norm: 0.918586037676805, iteration: 335713
loss: 0.9930022954940796,grad_norm: 0.7177228431651582, iteration: 335714
loss: 1.0214903354644775,grad_norm: 0.8282959643190367, iteration: 335715
loss: 1.0089282989501953,grad_norm: 0.9999991085597876, iteration: 335716
loss: 1.0254583358764648,grad_norm: 0.8498114471771445, iteration: 335717
loss: 0.9605203866958618,grad_norm: 0.8205727402622389, iteration: 335718
loss: 1.0311157703399658,grad_norm: 0.9999997801739792, iteration: 335719
loss: 1.0216816663742065,grad_norm: 0.9999994259641211, iteration: 335720
loss: 1.0705350637435913,grad_norm: 0.8042674017649417, iteration: 335721
loss: 1.0593509674072266,grad_norm: 0.9999995644152943, iteration: 335722
loss: 1.0688929557800293,grad_norm: 0.9999991511916381, iteration: 335723
loss: 1.0438601970672607,grad_norm: 0.9999991481675486, iteration: 335724
loss: 0.9964364767074585,grad_norm: 0.9111593096003415, iteration: 335725
loss: 1.0762133598327637,grad_norm: 0.9999991292409558, iteration: 335726
loss: 1.0327062606811523,grad_norm: 0.9185582525127773, iteration: 335727
loss: 0.9901596307754517,grad_norm: 0.9999996145265706, iteration: 335728
loss: 1.0507291555404663,grad_norm: 0.9999991923947701, iteration: 335729
loss: 0.9767785668373108,grad_norm: 0.9019890436393948, iteration: 335730
loss: 0.9954732656478882,grad_norm: 0.8591863382764405, iteration: 335731
loss: 1.0328176021575928,grad_norm: 0.9999996880388543, iteration: 335732
loss: 1.0614045858383179,grad_norm: 0.7358290418984625, iteration: 335733
loss: 1.002124547958374,grad_norm: 0.9999995784351592, iteration: 335734
loss: 0.9872369170188904,grad_norm: 0.8632104512766592, iteration: 335735
loss: 1.0168488025665283,grad_norm: 0.7339351131062993, iteration: 335736
loss: 0.974021315574646,grad_norm: 0.7642542456574715, iteration: 335737
loss: 1.0084341764450073,grad_norm: 0.9334791410133488, iteration: 335738
loss: 1.0009076595306396,grad_norm: 0.9655550284044592, iteration: 335739
loss: 1.0104715824127197,grad_norm: 0.9999999802480849, iteration: 335740
loss: 1.014513611793518,grad_norm: 0.8657896904947658, iteration: 335741
loss: 0.9851005673408508,grad_norm: 0.8972713667863372, iteration: 335742
loss: 0.9890233278274536,grad_norm: 0.7468271782428626, iteration: 335743
loss: 0.9958238005638123,grad_norm: 0.822665785905041, iteration: 335744
loss: 1.0253013372421265,grad_norm: 0.9999996720330342, iteration: 335745
loss: 1.0029245615005493,grad_norm: 0.7469909892799812, iteration: 335746
loss: 0.9706560969352722,grad_norm: 0.8694131649535167, iteration: 335747
loss: 1.0316163301467896,grad_norm: 0.999999509947387, iteration: 335748
loss: 1.047464370727539,grad_norm: 0.9999990706129028, iteration: 335749
loss: 1.0151033401489258,grad_norm: 0.8863903389580445, iteration: 335750
loss: 1.012259602546692,grad_norm: 0.7631579537163395, iteration: 335751
loss: 0.9650347828865051,grad_norm: 0.9044806553206942, iteration: 335752
loss: 0.9982820153236389,grad_norm: 0.8734764650914754, iteration: 335753
loss: 0.9753091931343079,grad_norm: 0.7794881433892346, iteration: 335754
loss: 0.9902998208999634,grad_norm: 0.9039670469542537, iteration: 335755
loss: 0.9995817542076111,grad_norm: 0.7275842706408965, iteration: 335756
loss: 1.0182769298553467,grad_norm: 0.7822556437544957, iteration: 335757
loss: 0.9833290576934814,grad_norm: 0.8473152909580867, iteration: 335758
loss: 1.1313892602920532,grad_norm: 0.9999999920621259, iteration: 335759
loss: 1.0821120738983154,grad_norm: 0.960777855191289, iteration: 335760
loss: 0.9975036382675171,grad_norm: 0.901128867971646, iteration: 335761
loss: 0.9848692417144775,grad_norm: 0.7835778396666281, iteration: 335762
loss: 1.0067623853683472,grad_norm: 0.8256464972946075, iteration: 335763
loss: 1.1400171518325806,grad_norm: 0.9999997991287579, iteration: 335764
loss: 1.0314555168151855,grad_norm: 0.9999990910937928, iteration: 335765
loss: 0.9665180444717407,grad_norm: 0.8191796481420814, iteration: 335766
loss: 0.9948787689208984,grad_norm: 0.9124614807147159, iteration: 335767
loss: 1.0073001384735107,grad_norm: 0.999999683469122, iteration: 335768
loss: 0.9834280610084534,grad_norm: 0.7827353408429383, iteration: 335769
loss: 0.9754204750061035,grad_norm: 0.9850162862319162, iteration: 335770
loss: 1.03598952293396,grad_norm: 0.7224200788863835, iteration: 335771
loss: 1.0107824802398682,grad_norm: 0.8335437319427894, iteration: 335772
loss: 1.163738489151001,grad_norm: 0.9999999317146604, iteration: 335773
loss: 1.0169507265090942,grad_norm: 0.756119494572602, iteration: 335774
loss: 0.9902715682983398,grad_norm: 0.9433593336725135, iteration: 335775
loss: 0.9949725270271301,grad_norm: 0.9487535321460316, iteration: 335776
loss: 1.0136553049087524,grad_norm: 0.9999991124328059, iteration: 335777
loss: 0.9816126823425293,grad_norm: 0.8858892303081329, iteration: 335778
loss: 1.0193736553192139,grad_norm: 0.812956555104823, iteration: 335779
loss: 1.009031057357788,grad_norm: 0.9999991881477258, iteration: 335780
loss: 1.0052459239959717,grad_norm: 0.9999996569102086, iteration: 335781
loss: 1.0376044511795044,grad_norm: 0.8832826083404299, iteration: 335782
loss: 0.9663407206535339,grad_norm: 0.8037244977997869, iteration: 335783
loss: 1.0222586393356323,grad_norm: 0.9999991587289055, iteration: 335784
loss: 1.0272471904754639,grad_norm: 0.8010282903971359, iteration: 335785
loss: 1.0197001695632935,grad_norm: 0.8086824394279665, iteration: 335786
loss: 0.9857072234153748,grad_norm: 0.8510658580233081, iteration: 335787
loss: 1.0778793096542358,grad_norm: 0.943383712039716, iteration: 335788
loss: 1.0021920204162598,grad_norm: 0.9380540615322213, iteration: 335789
loss: 0.9809460043907166,grad_norm: 0.8440823124908317, iteration: 335790
loss: 0.9778546094894409,grad_norm: 0.8804652336426401, iteration: 335791
loss: 1.0055919885635376,grad_norm: 0.9351773686355623, iteration: 335792
loss: 1.010723352432251,grad_norm: 0.9565499960064641, iteration: 335793
loss: 0.9617084860801697,grad_norm: 0.8046208637070892, iteration: 335794
loss: 1.001759648323059,grad_norm: 0.7118358573141517, iteration: 335795
loss: 1.0514949560165405,grad_norm: 0.9707773365883613, iteration: 335796
loss: 1.0172072649002075,grad_norm: 0.9977314744999896, iteration: 335797
loss: 0.9767840504646301,grad_norm: 0.9802559762621763, iteration: 335798
loss: 0.9902392029762268,grad_norm: 0.7737047956640155, iteration: 335799
loss: 0.9710410833358765,grad_norm: 0.8310575610998714, iteration: 335800
loss: 1.0173677206039429,grad_norm: 0.8424276550563732, iteration: 335801
loss: 1.027967929840088,grad_norm: 0.7922339947223128, iteration: 335802
loss: 1.0098915100097656,grad_norm: 0.7960489206312786, iteration: 335803
loss: 0.9922838807106018,grad_norm: 0.8937597964803174, iteration: 335804
loss: 1.1723889112472534,grad_norm: 0.9999995298690622, iteration: 335805
loss: 0.993201732635498,grad_norm: 0.9844901544294116, iteration: 335806
loss: 0.9850984215736389,grad_norm: 0.8941079843936579, iteration: 335807
loss: 1.0096746683120728,grad_norm: 0.9675149736960194, iteration: 335808
loss: 1.037765622138977,grad_norm: 0.8429081889847936, iteration: 335809
loss: 1.0012946128845215,grad_norm: 0.9999999418555313, iteration: 335810
loss: 0.9954518675804138,grad_norm: 0.7736569466698264, iteration: 335811
loss: 0.9869067668914795,grad_norm: 0.8295861487527061, iteration: 335812
loss: 0.9905814528465271,grad_norm: 0.9246245095338431, iteration: 335813
loss: 0.9715489745140076,grad_norm: 0.9317750363288355, iteration: 335814
loss: 0.9778777360916138,grad_norm: 0.9792586606495015, iteration: 335815
loss: 0.9688253402709961,grad_norm: 0.8295655923379616, iteration: 335816
loss: 0.997775137424469,grad_norm: 0.8466634346031293, iteration: 335817
loss: 0.9846501350402832,grad_norm: 0.9315293393383266, iteration: 335818
loss: 1.0068094730377197,grad_norm: 0.8456296907806542, iteration: 335819
loss: 1.0154484510421753,grad_norm: 0.9056668603156985, iteration: 335820
loss: 0.9885792136192322,grad_norm: 0.8580777643507682, iteration: 335821
loss: 1.0343085527420044,grad_norm: 0.9336304660609012, iteration: 335822
loss: 0.9804664850234985,grad_norm: 0.7029185734598034, iteration: 335823
loss: 1.0043045282363892,grad_norm: 0.821309522409105, iteration: 335824
loss: 1.0077019929885864,grad_norm: 0.9999995196591015, iteration: 335825
loss: 1.148444652557373,grad_norm: 0.9999997931662579, iteration: 335826
loss: 1.024346947669983,grad_norm: 0.7700054145924936, iteration: 335827
loss: 1.007538080215454,grad_norm: 0.9999990460024215, iteration: 335828
loss: 0.9935137033462524,grad_norm: 0.8318536264268468, iteration: 335829
loss: 0.9747912883758545,grad_norm: 0.7962062146851575, iteration: 335830
loss: 1.0564348697662354,grad_norm: 0.9403004260469033, iteration: 335831
loss: 0.95682692527771,grad_norm: 0.869828339985918, iteration: 335832
loss: 1.021359920501709,grad_norm: 0.7971223069844005, iteration: 335833
loss: 1.020365595817566,grad_norm: 0.9092254696146513, iteration: 335834
loss: 1.0133607387542725,grad_norm: 0.9297775998218715, iteration: 335835
loss: 1.0061370134353638,grad_norm: 0.99999929143279, iteration: 335836
loss: 1.0640461444854736,grad_norm: 0.9999997501825122, iteration: 335837
loss: 1.0107887983322144,grad_norm: 0.9782862212885297, iteration: 335838
loss: 1.0124421119689941,grad_norm: 0.6419414482401756, iteration: 335839
loss: 0.9708526134490967,grad_norm: 0.9160837755282624, iteration: 335840
loss: 1.0788980722427368,grad_norm: 0.960011713044826, iteration: 335841
loss: 0.9894959330558777,grad_norm: 0.7649081126643917, iteration: 335842
loss: 1.0747359991073608,grad_norm: 0.9999996519651148, iteration: 335843
loss: 1.0104291439056396,grad_norm: 0.8476776930091449, iteration: 335844
loss: 1.0346243381500244,grad_norm: 0.8682101323666178, iteration: 335845
loss: 1.019700288772583,grad_norm: 0.9999990848051492, iteration: 335846
loss: 0.983016312122345,grad_norm: 0.8468884561451685, iteration: 335847
loss: 1.0689435005187988,grad_norm: 0.8482504693840827, iteration: 335848
loss: 0.9938278198242188,grad_norm: 0.8690772682752047, iteration: 335849
loss: 0.9694972634315491,grad_norm: 0.8453452723779169, iteration: 335850
loss: 1.0079576969146729,grad_norm: 0.819409809654772, iteration: 335851
loss: 1.028232216835022,grad_norm: 0.7926809137743003, iteration: 335852
loss: 0.9848785400390625,grad_norm: 0.9058593923376443, iteration: 335853
loss: 1.0459120273590088,grad_norm: 0.9999999214355124, iteration: 335854
loss: 0.9777694344520569,grad_norm: 0.7677151423068433, iteration: 335855
loss: 1.0320614576339722,grad_norm: 0.9999998258828321, iteration: 335856
loss: 1.0221284627914429,grad_norm: 0.8914880032016322, iteration: 335857
loss: 0.9873887896537781,grad_norm: 0.7762599269311362, iteration: 335858
loss: 0.9706870913505554,grad_norm: 0.9054437137330834, iteration: 335859
loss: 1.050490140914917,grad_norm: 0.9999998357322173, iteration: 335860
loss: 0.9924314618110657,grad_norm: 0.7543400272628825, iteration: 335861
loss: 0.9905376434326172,grad_norm: 0.9999990698620579, iteration: 335862
loss: 1.0755529403686523,grad_norm: 0.9999996780324097, iteration: 335863
loss: 0.9849096536636353,grad_norm: 0.7953817817047099, iteration: 335864
loss: 1.003050684928894,grad_norm: 0.8891461236347411, iteration: 335865
loss: 0.9638727307319641,grad_norm: 0.9633384891889297, iteration: 335866
loss: 0.9998831748962402,grad_norm: 0.944362918561978, iteration: 335867
loss: 1.0071791410446167,grad_norm: 0.9990966275293706, iteration: 335868
loss: 0.974768340587616,grad_norm: 0.7864689590735906, iteration: 335869
loss: 1.0771034955978394,grad_norm: 0.9999998409016807, iteration: 335870
loss: 1.0003992319107056,grad_norm: 0.9084315718294886, iteration: 335871
loss: 0.9986063838005066,grad_norm: 0.732924710610045, iteration: 335872
loss: 1.0061713457107544,grad_norm: 0.7452220925470588, iteration: 335873
loss: 1.0030980110168457,grad_norm: 0.7788160794818392, iteration: 335874
loss: 0.9995019435882568,grad_norm: 0.7972103906011551, iteration: 335875
loss: 0.9906198382377625,grad_norm: 0.7788101856896235, iteration: 335876
loss: 1.0342464447021484,grad_norm: 0.9999990569346531, iteration: 335877
loss: 1.0103414058685303,grad_norm: 0.938711233166925, iteration: 335878
loss: 0.9732004404067993,grad_norm: 0.9999994144813534, iteration: 335879
loss: 0.9873307943344116,grad_norm: 0.9999991853289023, iteration: 335880
loss: 0.9969497919082642,grad_norm: 0.7881831619928613, iteration: 335881
loss: 1.0247561931610107,grad_norm: 0.9820409146000704, iteration: 335882
loss: 1.0322037935256958,grad_norm: 0.9999994711555172, iteration: 335883
loss: 1.0234254598617554,grad_norm: 0.9784147138252465, iteration: 335884
loss: 1.0341893434524536,grad_norm: 0.9954157590584111, iteration: 335885
loss: 0.9764705896377563,grad_norm: 0.896378627553409, iteration: 335886
loss: 0.9772855639457703,grad_norm: 0.9999994317894623, iteration: 335887
loss: 0.9934979677200317,grad_norm: 0.7410171859148007, iteration: 335888
loss: 1.0806018114089966,grad_norm: 0.9999999361628191, iteration: 335889
loss: 1.0274429321289062,grad_norm: 0.7476639618789293, iteration: 335890
loss: 0.9678702354431152,grad_norm: 0.9999991945277148, iteration: 335891
loss: 1.0077788829803467,grad_norm: 0.7801319337331653, iteration: 335892
loss: 1.0005213022232056,grad_norm: 0.921914067835652, iteration: 335893
loss: 0.9869120717048645,grad_norm: 0.9999994241954058, iteration: 335894
loss: 0.9924137592315674,grad_norm: 0.8729824428110206, iteration: 335895
loss: 1.0036616325378418,grad_norm: 0.9861796361853546, iteration: 335896
loss: 0.973143994808197,grad_norm: 0.7698425673676158, iteration: 335897
loss: 1.0500216484069824,grad_norm: 0.8564629112623307, iteration: 335898
loss: 1.040729284286499,grad_norm: 0.9512383280588445, iteration: 335899
loss: 1.1153886318206787,grad_norm: 0.9999991292492212, iteration: 335900
loss: 1.1326637268066406,grad_norm: 0.9999999042415916, iteration: 335901
loss: 1.092625617980957,grad_norm: 0.9999992977165826, iteration: 335902
loss: 1.0039875507354736,grad_norm: 0.747107177508769, iteration: 335903
loss: 1.0290751457214355,grad_norm: 0.9999990321641412, iteration: 335904
loss: 0.9849271774291992,grad_norm: 0.7287278961680403, iteration: 335905
loss: 0.9898873567581177,grad_norm: 0.8875435818067299, iteration: 335906
loss: 1.0244818925857544,grad_norm: 0.8064319110787461, iteration: 335907
loss: 1.0063329935073853,grad_norm: 0.7250342430246363, iteration: 335908
loss: 1.0104962587356567,grad_norm: 0.9518636330390593, iteration: 335909
loss: 1.0094001293182373,grad_norm: 0.9957386856878553, iteration: 335910
loss: 1.0068855285644531,grad_norm: 0.9637657190839537, iteration: 335911
loss: 0.9696715474128723,grad_norm: 0.8039417008731623, iteration: 335912
loss: 1.020365595817566,grad_norm: 0.999999266402007, iteration: 335913
loss: 1.037270188331604,grad_norm: 0.8563996897421434, iteration: 335914
loss: 0.9738811254501343,grad_norm: 0.7220874806001295, iteration: 335915
loss: 0.9889665246009827,grad_norm: 0.7244580669365915, iteration: 335916
loss: 0.9792224764823914,grad_norm: 0.9066760135851571, iteration: 335917
loss: 0.994210958480835,grad_norm: 0.9999999725966663, iteration: 335918
loss: 0.9889580011367798,grad_norm: 0.836210791822365, iteration: 335919
loss: 0.998293936252594,grad_norm: 0.7818339140912159, iteration: 335920
loss: 1.016103982925415,grad_norm: 0.8940223302299097, iteration: 335921
loss: 1.001589059829712,grad_norm: 0.9999993577232972, iteration: 335922
loss: 1.0231492519378662,grad_norm: 0.7911116824185908, iteration: 335923
loss: 0.9783455729484558,grad_norm: 0.9270158792798129, iteration: 335924
loss: 1.0075215101242065,grad_norm: 0.9525582812752914, iteration: 335925
loss: 1.0724618434906006,grad_norm: 0.999999274685272, iteration: 335926
loss: 0.9753214120864868,grad_norm: 0.8761122073662447, iteration: 335927
loss: 1.0173944234848022,grad_norm: 0.7348410504558345, iteration: 335928
loss: 0.9896965622901917,grad_norm: 0.8173641337045953, iteration: 335929
loss: 0.9946544766426086,grad_norm: 0.811999629002106, iteration: 335930
loss: 0.9807801842689514,grad_norm: 0.7957015629182198, iteration: 335931
loss: 0.9844846129417419,grad_norm: 0.8497146300181722, iteration: 335932
loss: 0.9865251183509827,grad_norm: 0.9581532090369648, iteration: 335933
loss: 1.0433762073516846,grad_norm: 0.7915584456075551, iteration: 335934
loss: 0.9824753403663635,grad_norm: 0.7708816943523433, iteration: 335935
loss: 1.0105395317077637,grad_norm: 0.7734736111223588, iteration: 335936
loss: 0.9626821279525757,grad_norm: 0.9335248331310756, iteration: 335937
loss: 1.0227479934692383,grad_norm: 0.9999995955174912, iteration: 335938
loss: 1.0284473896026611,grad_norm: 0.7601305642715868, iteration: 335939
loss: 1.0165473222732544,grad_norm: 0.9155113593827972, iteration: 335940
loss: 0.9998919367790222,grad_norm: 0.9999990484687841, iteration: 335941
loss: 1.0449851751327515,grad_norm: 0.9999994963659321, iteration: 335942
loss: 0.9774072766304016,grad_norm: 0.8270650639824124, iteration: 335943
loss: 1.0159789323806763,grad_norm: 0.8748566496726233, iteration: 335944
loss: 1.0425432920455933,grad_norm: 0.9624862537655543, iteration: 335945
loss: 1.003994107246399,grad_norm: 0.8694979680388218, iteration: 335946
loss: 0.9947764873504639,grad_norm: 0.7591452883669456, iteration: 335947
loss: 1.0263198614120483,grad_norm: 0.7466006525528206, iteration: 335948
loss: 1.0023200511932373,grad_norm: 0.7085678557734715, iteration: 335949
loss: 1.0155376195907593,grad_norm: 0.8101929484151521, iteration: 335950
loss: 0.9875629544258118,grad_norm: 0.9999989684643839, iteration: 335951
loss: 1.0321543216705322,grad_norm: 0.8937687751986981, iteration: 335952
loss: 0.9991048574447632,grad_norm: 0.9999995684914714, iteration: 335953
loss: 1.0069924592971802,grad_norm: 0.7309182482055478, iteration: 335954
loss: 1.0139696598052979,grad_norm: 0.8734680504234655, iteration: 335955
loss: 1.0348615646362305,grad_norm: 0.7914368455033147, iteration: 335956
loss: 1.0091581344604492,grad_norm: 0.7681324574349139, iteration: 335957
loss: 0.9728512763977051,grad_norm: 0.80532063072453, iteration: 335958
loss: 1.0136446952819824,grad_norm: 0.8813046811096263, iteration: 335959
loss: 0.9698877334594727,grad_norm: 0.9245921472447627, iteration: 335960
loss: 1.0630571842193604,grad_norm: 0.9631523006453596, iteration: 335961
loss: 1.0199451446533203,grad_norm: 0.7016752234443472, iteration: 335962
loss: 1.006115436553955,grad_norm: 0.9240109463672345, iteration: 335963
loss: 0.9973559975624084,grad_norm: 0.960972062313272, iteration: 335964
loss: 0.9909063577651978,grad_norm: 0.797165263267482, iteration: 335965
loss: 0.9927115440368652,grad_norm: 0.904390059935627, iteration: 335966
loss: 1.0085773468017578,grad_norm: 0.9318278284724917, iteration: 335967
loss: 1.0014570951461792,grad_norm: 0.8331062235328426, iteration: 335968
loss: 0.9781280159950256,grad_norm: 0.8474949802566757, iteration: 335969
loss: 0.9843115210533142,grad_norm: 0.82336174007646, iteration: 335970
loss: 1.0075600147247314,grad_norm: 0.9999991438791904, iteration: 335971
loss: 1.0024113655090332,grad_norm: 0.8158483701493371, iteration: 335972
loss: 1.0213000774383545,grad_norm: 0.9999990157822581, iteration: 335973
loss: 1.0338352918624878,grad_norm: 0.7939851904695359, iteration: 335974
loss: 0.9690452218055725,grad_norm: 0.8015605561228998, iteration: 335975
loss: 1.0258456468582153,grad_norm: 0.9085012368078971, iteration: 335976
loss: 0.9892237186431885,grad_norm: 0.8679779013800316, iteration: 335977
loss: 1.0131804943084717,grad_norm: 0.9310897380650462, iteration: 335978
loss: 1.0088683366775513,grad_norm: 0.8104130700956351, iteration: 335979
loss: 0.9721531271934509,grad_norm: 0.8543390429833506, iteration: 335980
loss: 1.024939775466919,grad_norm: 0.9999993274127312, iteration: 335981
loss: 0.9835851788520813,grad_norm: 0.7472750648869367, iteration: 335982
loss: 0.9807668328285217,grad_norm: 0.8098080146739737, iteration: 335983
loss: 0.9970657229423523,grad_norm: 0.8756972978044958, iteration: 335984
loss: 0.9952747821807861,grad_norm: 0.9999991150310674, iteration: 335985
loss: 1.0157536268234253,grad_norm: 0.778419151596744, iteration: 335986
loss: 1.0310237407684326,grad_norm: 0.928388476391945, iteration: 335987
loss: 1.0867382287979126,grad_norm: 0.9665151939720995, iteration: 335988
loss: 1.0099517107009888,grad_norm: 0.9074094735492557, iteration: 335989
loss: 0.9979310035705566,grad_norm: 0.8970616544528365, iteration: 335990
loss: 0.9950190186500549,grad_norm: 0.8884169011386215, iteration: 335991
loss: 1.0244449377059937,grad_norm: 0.8695683612300918, iteration: 335992
loss: 0.9860823750495911,grad_norm: 0.7617423463377544, iteration: 335993
loss: 0.9865735769271851,grad_norm: 0.8461290053485258, iteration: 335994
loss: 0.9688324928283691,grad_norm: 0.7396103170736257, iteration: 335995
loss: 1.1443434953689575,grad_norm: 0.9999993901840353, iteration: 335996
loss: 1.03569757938385,grad_norm: 0.9999998189047933, iteration: 335997
loss: 0.9902014136314392,grad_norm: 0.7615757585947466, iteration: 335998
loss: 0.9616515040397644,grad_norm: 0.8363372896087355, iteration: 335999
loss: 1.1310564279556274,grad_norm: 0.9999990756775627, iteration: 336000
loss: 0.9648880958557129,grad_norm: 0.7826759885962731, iteration: 336001
loss: 1.0041277408599854,grad_norm: 0.9240855894321567, iteration: 336002
loss: 0.9928624629974365,grad_norm: 0.7388178559574562, iteration: 336003
loss: 0.9694288372993469,grad_norm: 0.8734957340970448, iteration: 336004
loss: 1.016570806503296,grad_norm: 0.9119451941248345, iteration: 336005
loss: 1.0813648700714111,grad_norm: 0.8861190598390445, iteration: 336006
loss: 1.0144929885864258,grad_norm: 0.9999990567633514, iteration: 336007
loss: 1.0359045267105103,grad_norm: 1.0000000260536277, iteration: 336008
loss: 1.0324617624282837,grad_norm: 0.8747787516947203, iteration: 336009
loss: 1.008868932723999,grad_norm: 0.8990391949208775, iteration: 336010
loss: 0.9713279604911804,grad_norm: 0.8336259522899131, iteration: 336011
loss: 1.0137112140655518,grad_norm: 0.7034577085152636, iteration: 336012
loss: 0.9654225707054138,grad_norm: 0.9324662738836615, iteration: 336013
loss: 1.0323325395584106,grad_norm: 0.8353839606752628, iteration: 336014
loss: 1.0103706121444702,grad_norm: 0.8137483025147312, iteration: 336015
loss: 1.0171332359313965,grad_norm: 0.7898860687110006, iteration: 336016
loss: 1.0309075117111206,grad_norm: 0.7977467149388315, iteration: 336017
loss: 0.9935892820358276,grad_norm: 0.7822378831198739, iteration: 336018
loss: 1.050861120223999,grad_norm: 0.7286731624898656, iteration: 336019
loss: 1.0231635570526123,grad_norm: 0.9999993288022314, iteration: 336020
loss: 1.0019004344940186,grad_norm: 0.8527702433639358, iteration: 336021
loss: 0.9916172027587891,grad_norm: 0.9999991053560697, iteration: 336022
loss: 0.9872764945030212,grad_norm: 0.943162421368845, iteration: 336023
loss: 1.036676049232483,grad_norm: 0.9999994823085596, iteration: 336024
loss: 1.0214861631393433,grad_norm: 0.8408023188595725, iteration: 336025
loss: 0.9715887308120728,grad_norm: 0.8442141387133942, iteration: 336026
loss: 0.9872318506240845,grad_norm: 0.7862279570075302, iteration: 336027
loss: 0.9816188812255859,grad_norm: 0.8161779068814862, iteration: 336028
loss: 1.0062775611877441,grad_norm: 0.9999990914660108, iteration: 336029
loss: 1.0147850513458252,grad_norm: 0.8072971810177775, iteration: 336030
loss: 0.9741966724395752,grad_norm: 0.7543749257374706, iteration: 336031
loss: 1.0107662677764893,grad_norm: 0.8161533966466821, iteration: 336032
loss: 1.0198800563812256,grad_norm: 0.903313148792404, iteration: 336033
loss: 0.9753527641296387,grad_norm: 0.9960264712316398, iteration: 336034
loss: 0.9502835273742676,grad_norm: 0.7679397591128427, iteration: 336035
loss: 0.9969126582145691,grad_norm: 0.7728210909878725, iteration: 336036
loss: 1.0059874057769775,grad_norm: 0.6571544944759722, iteration: 336037
loss: 0.9690232276916504,grad_norm: 0.8078562569859326, iteration: 336038
loss: 0.9917251467704773,grad_norm: 0.7902688254652347, iteration: 336039
loss: 0.9991503953933716,grad_norm: 0.8476666814764742, iteration: 336040
loss: 1.0143927335739136,grad_norm: 0.9999995793493466, iteration: 336041
loss: 1.0505626201629639,grad_norm: 0.8848710843922286, iteration: 336042
loss: 1.0741147994995117,grad_norm: 0.7878091239458146, iteration: 336043
loss: 0.9820360541343689,grad_norm: 0.7646935744527706, iteration: 336044
loss: 0.9564948678016663,grad_norm: 0.800931459432885, iteration: 336045
loss: 1.027923583984375,grad_norm: 0.9999998300261383, iteration: 336046
loss: 0.9926425814628601,grad_norm: 0.9590033027846063, iteration: 336047
loss: 0.9977338314056396,grad_norm: 0.6674160577610653, iteration: 336048
loss: 1.0431923866271973,grad_norm: 0.999999067819817, iteration: 336049
loss: 1.0110032558441162,grad_norm: 0.9122437684834184, iteration: 336050
loss: 1.0022610425949097,grad_norm: 0.7812861332155981, iteration: 336051
loss: 1.0059062242507935,grad_norm: 0.7685270398053929, iteration: 336052
loss: 1.0272164344787598,grad_norm: 0.747146375803305, iteration: 336053
loss: 0.9353480339050293,grad_norm: 0.8600038723858053, iteration: 336054
loss: 1.0087584257125854,grad_norm: 0.8993155185252543, iteration: 336055
loss: 1.046640396118164,grad_norm: 0.9108788746213138, iteration: 336056
loss: 1.0873994827270508,grad_norm: 0.999999896568234, iteration: 336057
loss: 1.0194913148880005,grad_norm: 0.7256533721063023, iteration: 336058
loss: 1.0101217031478882,grad_norm: 0.761635869599505, iteration: 336059
loss: 0.9954679608345032,grad_norm: 0.6933857205832037, iteration: 336060
loss: 0.9564107656478882,grad_norm: 0.9929102266102675, iteration: 336061
loss: 0.9548471570014954,grad_norm: 0.750874923085541, iteration: 336062
loss: 1.0012634992599487,grad_norm: 0.8694256930492154, iteration: 336063
loss: 0.9969626665115356,grad_norm: 0.9972484362546529, iteration: 336064
loss: 1.1476589441299438,grad_norm: 0.9999990584085707, iteration: 336065
loss: 1.0004767179489136,grad_norm: 0.7545689753924087, iteration: 336066
loss: 1.0286637544631958,grad_norm: 0.9999998548269187, iteration: 336067
loss: 1.0191928148269653,grad_norm: 0.8719600489919619, iteration: 336068
loss: 0.984930157661438,grad_norm: 0.8929085953314544, iteration: 336069
loss: 0.9624086618423462,grad_norm: 0.9197868716935983, iteration: 336070
loss: 0.9936860799789429,grad_norm: 0.7730778215519897, iteration: 336071
loss: 1.0047626495361328,grad_norm: 0.9041894182414456, iteration: 336072
loss: 0.9978505373001099,grad_norm: 0.8810492890501542, iteration: 336073
loss: 0.9967867136001587,grad_norm: 0.9999997242084515, iteration: 336074
loss: 1.05235755443573,grad_norm: 0.955613829592388, iteration: 336075
loss: 0.9848303198814392,grad_norm: 0.6886013418798139, iteration: 336076
loss: 1.0638364553451538,grad_norm: 0.9999992151042109, iteration: 336077
loss: 1.0035121440887451,grad_norm: 0.936032444334977, iteration: 336078
loss: 1.0531765222549438,grad_norm: 0.9999992245719646, iteration: 336079
loss: 0.9811293482780457,grad_norm: 0.9661658674522939, iteration: 336080
loss: 1.0190285444259644,grad_norm: 0.8797468983795607, iteration: 336081
loss: 1.070084810256958,grad_norm: 0.9999996857804578, iteration: 336082
loss: 1.0201818943023682,grad_norm: 0.7165950226587653, iteration: 336083
loss: 0.994583010673523,grad_norm: 0.8693750119734756, iteration: 336084
loss: 1.0665150880813599,grad_norm: 0.9999992821368623, iteration: 336085
loss: 0.9998680949211121,grad_norm: 0.6543239033430387, iteration: 336086
loss: 0.9561727643013,grad_norm: 0.9749332845456563, iteration: 336087
loss: 1.0021268129348755,grad_norm: 0.9112167314772978, iteration: 336088
loss: 0.9624024629592896,grad_norm: 0.777774551671772, iteration: 336089
loss: 0.9984018802642822,grad_norm: 0.9999992092700872, iteration: 336090
loss: 1.0087542533874512,grad_norm: 0.9069430889577599, iteration: 336091
loss: 0.9559198617935181,grad_norm: 0.7808204389202582, iteration: 336092
loss: 1.0050082206726074,grad_norm: 0.8161429914611766, iteration: 336093
loss: 0.9658427238464355,grad_norm: 0.7667142667963259, iteration: 336094
loss: 0.9953683614730835,grad_norm: 0.7398693509444598, iteration: 336095
loss: 1.0021711587905884,grad_norm: 0.8152598134216079, iteration: 336096
loss: 1.0045889616012573,grad_norm: 0.7968309128446505, iteration: 336097
loss: 0.9971572160720825,grad_norm: 0.7906377444443657, iteration: 336098
loss: 0.982615053653717,grad_norm: 0.8050008122386855, iteration: 336099
loss: 1.0211652517318726,grad_norm: 0.7804975119941118, iteration: 336100
loss: 1.0942399501800537,grad_norm: 0.7737951668856154, iteration: 336101
loss: 1.0740339756011963,grad_norm: 0.9999996370334806, iteration: 336102
loss: 1.0485163927078247,grad_norm: 0.9005770904756447, iteration: 336103
loss: 0.9942002892494202,grad_norm: 0.7442739971658514, iteration: 336104
loss: 1.0183545351028442,grad_norm: 0.8170330759194271, iteration: 336105
loss: 0.9464787840843201,grad_norm: 0.9922267779910785, iteration: 336106
loss: 1.013271689414978,grad_norm: 0.8561132988808443, iteration: 336107
loss: 1.108461618423462,grad_norm: 0.9999995710256484, iteration: 336108
loss: 1.0483990907669067,grad_norm: 0.7145088346821868, iteration: 336109
loss: 0.9463672637939453,grad_norm: 0.9928326011100934, iteration: 336110
loss: 1.0508511066436768,grad_norm: 0.883239372257571, iteration: 336111
loss: 1.0108243227005005,grad_norm: 0.9451614659718027, iteration: 336112
loss: 1.0179059505462646,grad_norm: 0.7929932526676354, iteration: 336113
loss: 1.0096435546875,grad_norm: 0.8930403652344406, iteration: 336114
loss: 0.9775837659835815,grad_norm: 0.7172070234902921, iteration: 336115
loss: 0.9898291230201721,grad_norm: 0.8215769039925032, iteration: 336116
loss: 1.005462884902954,grad_norm: 0.7978095774869401, iteration: 336117
loss: 1.0490611791610718,grad_norm: 0.7537051943063803, iteration: 336118
loss: 0.9775890111923218,grad_norm: 0.8913550096016106, iteration: 336119
loss: 0.9562491178512573,grad_norm: 0.7899669264375415, iteration: 336120
loss: 1.0281622409820557,grad_norm: 0.9999998504264695, iteration: 336121
loss: 1.0215263366699219,grad_norm: 0.9600526633345756, iteration: 336122
loss: 0.9802306294441223,grad_norm: 0.7139103349942802, iteration: 336123
loss: 1.0531046390533447,grad_norm: 0.9999991851497372, iteration: 336124
loss: 1.0025864839553833,grad_norm: 0.7469011967273664, iteration: 336125
loss: 1.0858433246612549,grad_norm: 0.9415656610885936, iteration: 336126
loss: 1.0243349075317383,grad_norm: 0.9999990775597086, iteration: 336127
loss: 1.0114762783050537,grad_norm: 0.7277183797367602, iteration: 336128
loss: 1.0179532766342163,grad_norm: 0.9862800605682968, iteration: 336129
loss: 1.0404009819030762,grad_norm: 0.9999991464977117, iteration: 336130
loss: 0.9979724287986755,grad_norm: 0.6388413885612855, iteration: 336131
loss: 0.9986680150032043,grad_norm: 0.9170660247600438, iteration: 336132
loss: 0.978635847568512,grad_norm: 0.7321088874741988, iteration: 336133
loss: 1.078404426574707,grad_norm: 0.8402881427888085, iteration: 336134
loss: 0.9796985983848572,grad_norm: 0.8561161228681908, iteration: 336135
loss: 1.0762081146240234,grad_norm: 0.7828035788726514, iteration: 336136
loss: 0.9893134236335754,grad_norm: 0.8928024043112658, iteration: 336137
loss: 1.1309009790420532,grad_norm: 0.8679936403989285, iteration: 336138
loss: 0.9987825751304626,grad_norm: 0.8181384546098386, iteration: 336139
loss: 1.0416382551193237,grad_norm: 0.8595832761276216, iteration: 336140
loss: 1.005066990852356,grad_norm: 0.8871434874747476, iteration: 336141
loss: 0.9884918332099915,grad_norm: 0.7925658352961911, iteration: 336142
loss: 0.9570244550704956,grad_norm: 0.9152014926103387, iteration: 336143
loss: 1.0086712837219238,grad_norm: 0.9999997774631594, iteration: 336144
loss: 1.0355743169784546,grad_norm: 0.9999996741094234, iteration: 336145
loss: 1.0486468076705933,grad_norm: 0.8010564128459862, iteration: 336146
loss: 1.0060231685638428,grad_norm: 0.9999990851569519, iteration: 336147
loss: 1.0351272821426392,grad_norm: 0.8300316323597203, iteration: 336148
loss: 0.9860957860946655,grad_norm: 0.850287250800812, iteration: 336149
loss: 1.0019774436950684,grad_norm: 0.7831969134215906, iteration: 336150
loss: 0.9896451234817505,grad_norm: 0.8556990774140188, iteration: 336151
loss: 1.0181763172149658,grad_norm: 0.918096927806154, iteration: 336152
loss: 1.0852464437484741,grad_norm: 0.9999992988197828, iteration: 336153
loss: 1.004217505455017,grad_norm: 0.8368400476467506, iteration: 336154
loss: 1.0048203468322754,grad_norm: 0.7574503402232259, iteration: 336155
loss: 1.0590752363204956,grad_norm: 0.9999991198720176, iteration: 336156
loss: 1.0375622510910034,grad_norm: 0.9999991174073912, iteration: 336157
loss: 1.0089938640594482,grad_norm: 0.9232795332633004, iteration: 336158
loss: 1.029973030090332,grad_norm: 0.790631973327582, iteration: 336159
loss: 0.995442807674408,grad_norm: 0.7811905868417326, iteration: 336160
loss: 0.9732325077056885,grad_norm: 0.855765817235528, iteration: 336161
loss: 1.0158296823501587,grad_norm: 0.7597976477009686, iteration: 336162
loss: 0.9876318573951721,grad_norm: 0.719582080255173, iteration: 336163
loss: 0.9723538756370544,grad_norm: 0.999999585855321, iteration: 336164
loss: 1.0233205556869507,grad_norm: 0.9999992406975254, iteration: 336165
loss: 0.9925423264503479,grad_norm: 0.8581615081150563, iteration: 336166
loss: 0.9872080683708191,grad_norm: 0.9000756208924034, iteration: 336167
loss: 0.9910690784454346,grad_norm: 0.9999990588392517, iteration: 336168
loss: 1.0261403322219849,grad_norm: 0.8546301170139933, iteration: 336169
loss: 1.0036011934280396,grad_norm: 0.9806183736299429, iteration: 336170
loss: 1.0017625093460083,grad_norm: 0.6497402526410122, iteration: 336171
loss: 1.0436291694641113,grad_norm: 0.9999995851671649, iteration: 336172
loss: 0.998103141784668,grad_norm: 0.9999994254669641, iteration: 336173
loss: 1.0620700120925903,grad_norm: 0.9999999172739834, iteration: 336174
loss: 0.9957118034362793,grad_norm: 0.6770521431852807, iteration: 336175
loss: 1.0118608474731445,grad_norm: 0.8600048789230123, iteration: 336176
loss: 1.0047986507415771,grad_norm: 0.9295412219340975, iteration: 336177
loss: 1.0268534421920776,grad_norm: 0.8495142480103246, iteration: 336178
loss: 0.9906348586082458,grad_norm: 0.7740158476807907, iteration: 336179
loss: 1.0184073448181152,grad_norm: 0.8764087420280563, iteration: 336180
loss: 1.01077401638031,grad_norm: 0.7699492361772591, iteration: 336181
loss: 1.0540636777877808,grad_norm: 0.896787403146999, iteration: 336182
loss: 1.0379493236541748,grad_norm: 0.8863514360280272, iteration: 336183
loss: 0.9914138317108154,grad_norm: 0.7613303383547827, iteration: 336184
loss: 0.9876319766044617,grad_norm: 0.7844609042985141, iteration: 336185
loss: 1.0745939016342163,grad_norm: 0.9684085404051103, iteration: 336186
loss: 0.9879416823387146,grad_norm: 0.836737345940209, iteration: 336187
loss: 1.0893120765686035,grad_norm: 0.9999989707376256, iteration: 336188
loss: 1.0714333057403564,grad_norm: 0.9999991665201311, iteration: 336189
loss: 1.0298490524291992,grad_norm: 0.8608212077516751, iteration: 336190
loss: 0.9790855050086975,grad_norm: 0.7327560460917825, iteration: 336191
loss: 0.9879461526870728,grad_norm: 0.9535938553174241, iteration: 336192
loss: 1.0404897928237915,grad_norm: 0.9985397634671738, iteration: 336193
loss: 1.001011610031128,grad_norm: 0.8486931934517443, iteration: 336194
loss: 1.066627025604248,grad_norm: 0.7880047938429221, iteration: 336195
loss: 1.0208115577697754,grad_norm: 0.9999990510167659, iteration: 336196
loss: 1.0253074169158936,grad_norm: 0.9999992274262567, iteration: 336197
loss: 0.9671568870544434,grad_norm: 0.8278269378124824, iteration: 336198
loss: 1.096771001815796,grad_norm: 0.9293104219360856, iteration: 336199
loss: 0.9860745668411255,grad_norm: 0.8780644020450941, iteration: 336200
loss: 1.0149105787277222,grad_norm: 0.8336777022390449, iteration: 336201
loss: 0.997581422328949,grad_norm: 0.9842377056818621, iteration: 336202
loss: 1.0664175748825073,grad_norm: 0.9538980216402343, iteration: 336203
loss: 0.9678249359130859,grad_norm: 0.9999990514406886, iteration: 336204
loss: 0.9986994862556458,grad_norm: 0.8378376633651389, iteration: 336205
loss: 1.0732510089874268,grad_norm: 0.8552508149639562, iteration: 336206
loss: 1.0014888048171997,grad_norm: 0.7201362095411192, iteration: 336207
loss: 1.0564117431640625,grad_norm: 0.9628706695133445, iteration: 336208
loss: 0.996137797832489,grad_norm: 0.8639934999912181, iteration: 336209
loss: 0.967043936252594,grad_norm: 0.7845252025259347, iteration: 336210
loss: 0.9685712456703186,grad_norm: 0.9145817041787474, iteration: 336211
loss: 1.009755253791809,grad_norm: 0.7840113210390608, iteration: 336212
loss: 0.9554589986801147,grad_norm: 0.999999098627329, iteration: 336213
loss: 1.032345175743103,grad_norm: 0.9819425162360421, iteration: 336214
loss: 1.0588427782058716,grad_norm: 0.8504070907140143, iteration: 336215
loss: 1.096343755722046,grad_norm: 0.8301478274970379, iteration: 336216
loss: 1.0416098833084106,grad_norm: 0.9999993142227701, iteration: 336217
loss: 1.0888067483901978,grad_norm: 0.9999990933868255, iteration: 336218
loss: 1.0159305334091187,grad_norm: 0.8505048683977008, iteration: 336219
loss: 1.0343642234802246,grad_norm: 0.6971349744665291, iteration: 336220
loss: 1.024495005607605,grad_norm: 0.7831584447315308, iteration: 336221
loss: 0.9889951348304749,grad_norm: 0.7906151242090416, iteration: 336222
loss: 1.0730270147323608,grad_norm: 0.9999993932521596, iteration: 336223
loss: 1.023659110069275,grad_norm: 0.9186783517144501, iteration: 336224
loss: 0.9603582620620728,grad_norm: 0.8255061239453758, iteration: 336225
loss: 0.9941419363021851,grad_norm: 0.8579916613556383, iteration: 336226
loss: 1.0971252918243408,grad_norm: 0.9999993801656819, iteration: 336227
loss: 1.0016170740127563,grad_norm: 0.999999964254303, iteration: 336228
loss: 1.0260857343673706,grad_norm: 0.8686250792686482, iteration: 336229
loss: 1.0237390995025635,grad_norm: 0.9413143848045908, iteration: 336230
loss: 0.9635404348373413,grad_norm: 0.9999990472332558, iteration: 336231
loss: 1.006237506866455,grad_norm: 0.774398707069747, iteration: 336232
loss: 0.9960533976554871,grad_norm: 0.6722223468110231, iteration: 336233
loss: 1.0564446449279785,grad_norm: 0.933166966729504, iteration: 336234
loss: 1.0330811738967896,grad_norm: 0.7179846046286087, iteration: 336235
loss: 1.0528244972229004,grad_norm: 0.9419885928843668, iteration: 336236
loss: 0.9940054416656494,grad_norm: 0.9999991914901726, iteration: 336237
loss: 1.0537447929382324,grad_norm: 0.9999991659777487, iteration: 336238
loss: 0.9947521090507507,grad_norm: 0.781304968260833, iteration: 336239
loss: 1.0859947204589844,grad_norm: 0.9999994988969924, iteration: 336240
loss: 1.004805564880371,grad_norm: 0.9999995020533948, iteration: 336241
loss: 1.0855456590652466,grad_norm: 0.7813869986538557, iteration: 336242
loss: 0.9550879001617432,grad_norm: 0.7891983618403119, iteration: 336243
loss: 1.190003514289856,grad_norm: 0.8563889240590539, iteration: 336244
loss: 1.031152606010437,grad_norm: 0.8454094238927743, iteration: 336245
loss: 0.9807515740394592,grad_norm: 0.6542436464328082, iteration: 336246
loss: 1.016983985900879,grad_norm: 0.9999992068550069, iteration: 336247
loss: 0.9865190982818604,grad_norm: 0.7809850809929298, iteration: 336248
loss: 1.0349854230880737,grad_norm: 0.8554337394971149, iteration: 336249
loss: 1.0197362899780273,grad_norm: 0.9999999245804285, iteration: 336250
loss: 0.9967765212059021,grad_norm: 0.9755433409132517, iteration: 336251
loss: 1.0100703239440918,grad_norm: 0.9504277968459832, iteration: 336252
loss: 1.075137972831726,grad_norm: 0.9999995920581232, iteration: 336253
loss: 1.0695713758468628,grad_norm: 0.999999699859755, iteration: 336254
loss: 1.0437288284301758,grad_norm: 0.9165030376395525, iteration: 336255
loss: 1.034581184387207,grad_norm: 0.8986102707041385, iteration: 336256
loss: 1.0257236957550049,grad_norm: 0.7682177065207052, iteration: 336257
loss: 0.9862449765205383,grad_norm: 0.6385419497082159, iteration: 336258
loss: 0.9944896697998047,grad_norm: 0.771808182656541, iteration: 336259
loss: 0.9859185218811035,grad_norm: 0.7626896152408135, iteration: 336260
loss: 1.0570399761199951,grad_norm: 0.8327618098518879, iteration: 336261
loss: 1.0496958494186401,grad_norm: 0.9999998422131775, iteration: 336262
loss: 1.0208711624145508,grad_norm: 1.0000000092886714, iteration: 336263
loss: 1.04188871383667,grad_norm: 0.9999999912662181, iteration: 336264
loss: 1.000357747077942,grad_norm: 0.7284043244882686, iteration: 336265
loss: 1.0548745393753052,grad_norm: 0.9999997096916298, iteration: 336266
loss: 1.0491572618484497,grad_norm: 0.9656522862030668, iteration: 336267
loss: 1.0195919275283813,grad_norm: 0.9999990629124675, iteration: 336268
loss: 0.9915552735328674,grad_norm: 0.9999990098031336, iteration: 336269
loss: 1.0866483449935913,grad_norm: 0.8722019165897492, iteration: 336270
loss: 1.1860185861587524,grad_norm: 0.9999990542335876, iteration: 336271
loss: 0.9953099489212036,grad_norm: 0.6633476364735705, iteration: 336272
loss: 1.0465880632400513,grad_norm: 0.9521078919979489, iteration: 336273
loss: 1.0457390546798706,grad_norm: 0.9999993243790642, iteration: 336274
loss: 0.9986139535903931,grad_norm: 0.9251354935335058, iteration: 336275
loss: 1.0301488637924194,grad_norm: 0.8848026240670462, iteration: 336276
loss: 1.0130022764205933,grad_norm: 0.9195052177439699, iteration: 336277
loss: 0.9997889995574951,grad_norm: 0.9999992468727458, iteration: 336278
loss: 1.0417612791061401,grad_norm: 0.8936122906362993, iteration: 336279
loss: 1.0098859071731567,grad_norm: 0.8085419804969834, iteration: 336280
loss: 1.014171838760376,grad_norm: 0.658367006620894, iteration: 336281
loss: 1.0554299354553223,grad_norm: 0.9237134094510694, iteration: 336282
loss: 1.0886578559875488,grad_norm: 0.999999307460988, iteration: 336283
loss: 1.01423978805542,grad_norm: 0.9999991181514465, iteration: 336284
loss: 1.02093505859375,grad_norm: 0.8335748480393118, iteration: 336285
loss: 0.9595391154289246,grad_norm: 0.9624558793388766, iteration: 336286
loss: 0.9997514486312866,grad_norm: 0.7578257852101847, iteration: 336287
loss: 1.0737662315368652,grad_norm: 0.9999992364746212, iteration: 336288
loss: 1.0010261535644531,grad_norm: 0.9108923751383889, iteration: 336289
loss: 1.054319143295288,grad_norm: 0.8540982493475201, iteration: 336290
loss: 0.9934408068656921,grad_norm: 0.94377084363579, iteration: 336291
loss: 1.034265398979187,grad_norm: 0.8925432148757686, iteration: 336292
loss: 0.9725906252861023,grad_norm: 0.9664018895727229, iteration: 336293
loss: 0.9824930429458618,grad_norm: 0.7498504334380448, iteration: 336294
loss: 1.035372018814087,grad_norm: 0.8893674483887403, iteration: 336295
loss: 0.984221875667572,grad_norm: 0.8787757417612443, iteration: 336296
loss: 1.0006476640701294,grad_norm: 0.884383764653997, iteration: 336297
loss: 0.9883307218551636,grad_norm: 0.8459230115897511, iteration: 336298
loss: 1.0171667337417603,grad_norm: 0.9156647494295285, iteration: 336299
loss: 0.9758009910583496,grad_norm: 0.7373359086727618, iteration: 336300
loss: 0.9552226662635803,grad_norm: 0.9058773117771264, iteration: 336301
loss: 0.9880532026290894,grad_norm: 0.7430461813824691, iteration: 336302
loss: 1.027800440788269,grad_norm: 0.8686775556404845, iteration: 336303
loss: 1.0145225524902344,grad_norm: 0.7341526031515476, iteration: 336304
loss: 0.9616106152534485,grad_norm: 0.7976819088593081, iteration: 336305
loss: 1.0069748163223267,grad_norm: 0.9999995312681277, iteration: 336306
loss: 1.00220787525177,grad_norm: 0.9582071867651345, iteration: 336307
loss: 0.9969224333763123,grad_norm: 0.8415701417074363, iteration: 336308
loss: 1.1212376356124878,grad_norm: 0.9999999549622239, iteration: 336309
loss: 1.031227469444275,grad_norm: 0.9047762942086313, iteration: 336310
loss: 1.0201513767242432,grad_norm: 0.778534825731437, iteration: 336311
loss: 0.9778587222099304,grad_norm: 0.818514414979039, iteration: 336312
loss: 0.981511116027832,grad_norm: 0.7577082282356726, iteration: 336313
loss: 0.9894181489944458,grad_norm: 0.999999058973444, iteration: 336314
loss: 1.014460802078247,grad_norm: 0.7509218415586886, iteration: 336315
loss: 1.0070126056671143,grad_norm: 0.8167194109900283, iteration: 336316
loss: 0.998786449432373,grad_norm: 0.7776997253939454, iteration: 336317
loss: 1.039263367652893,grad_norm: 0.9999990194703915, iteration: 336318
loss: 0.9991209506988525,grad_norm: 0.803845857329644, iteration: 336319
loss: 1.0253690481185913,grad_norm: 0.8292313907708343, iteration: 336320
loss: 1.0267977714538574,grad_norm: 0.9835054163281289, iteration: 336321
loss: 1.0010167360305786,grad_norm: 0.8811658966799663, iteration: 336322
loss: 1.0372780561447144,grad_norm: 0.9958537594251565, iteration: 336323
loss: 1.0124692916870117,grad_norm: 0.8108781115899292, iteration: 336324
loss: 0.9932906031608582,grad_norm: 0.8376630866976184, iteration: 336325
loss: 0.9974154829978943,grad_norm: 0.8993268387419774, iteration: 336326
loss: 1.0811244249343872,grad_norm: 0.847670655313757, iteration: 336327
loss: 0.9611075520515442,grad_norm: 0.910183785451769, iteration: 336328
loss: 1.0178407430648804,grad_norm: 0.9677877991830788, iteration: 336329
loss: 1.0571671724319458,grad_norm: 0.9999999003074849, iteration: 336330
loss: 1.0056416988372803,grad_norm: 0.7693395765543595, iteration: 336331
loss: 0.9957390427589417,grad_norm: 0.8586203426721298, iteration: 336332
loss: 0.953597366809845,grad_norm: 0.8154993965666404, iteration: 336333
loss: 1.0291078090667725,grad_norm: 0.824453215955099, iteration: 336334
loss: 1.0413422584533691,grad_norm: 0.9999999515474883, iteration: 336335
loss: 0.9889361262321472,grad_norm: 0.8748574009379834, iteration: 336336
loss: 1.0298649072647095,grad_norm: 0.9999995399667283, iteration: 336337
loss: 1.0377572774887085,grad_norm: 0.8072083334328571, iteration: 336338
loss: 1.0145207643508911,grad_norm: 0.8751256845991761, iteration: 336339
loss: 1.0193103551864624,grad_norm: 0.9852338112560896, iteration: 336340
loss: 1.0005784034729004,grad_norm: 0.8222296032584001, iteration: 336341
loss: 1.0160244703292847,grad_norm: 0.7820929388688113, iteration: 336342
loss: 1.0385373830795288,grad_norm: 0.9999991783766675, iteration: 336343
loss: 0.9938332438468933,grad_norm: 0.9999992311754444, iteration: 336344
loss: 1.054904580116272,grad_norm: 0.9563518042403344, iteration: 336345
loss: 1.0172690153121948,grad_norm: 0.7894039119408437, iteration: 336346
loss: 1.010576605796814,grad_norm: 0.9999994124987865, iteration: 336347
loss: 1.0447520017623901,grad_norm: 0.7710975501653187, iteration: 336348
loss: 0.9811619520187378,grad_norm: 0.8423623118517696, iteration: 336349
loss: 0.9363846182823181,grad_norm: 0.9999992092532346, iteration: 336350
loss: 1.01279616355896,grad_norm: 0.778402836089592, iteration: 336351
loss: 0.9718270897865295,grad_norm: 0.8289324128693166, iteration: 336352
loss: 0.9944987893104553,grad_norm: 0.9999997195646051, iteration: 336353
loss: 0.9870771765708923,grad_norm: 0.9520724824282243, iteration: 336354
loss: 1.0607956647872925,grad_norm: 0.9999995040006899, iteration: 336355
loss: 1.009870171546936,grad_norm: 0.9999998447664612, iteration: 336356
loss: 1.03764009475708,grad_norm: 0.8734068311545301, iteration: 336357
loss: 1.009645700454712,grad_norm: 0.8696067658452261, iteration: 336358
loss: 0.9943824410438538,grad_norm: 0.8223949017141851, iteration: 336359
loss: 0.9925013780593872,grad_norm: 0.7264029922380538, iteration: 336360
loss: 1.0069082975387573,grad_norm: 0.9130879294908126, iteration: 336361
loss: 0.9922870397567749,grad_norm: 0.9999990038189486, iteration: 336362
loss: 1.0489193201065063,grad_norm: 0.999999958456764, iteration: 336363
loss: 0.9846420884132385,grad_norm: 0.7808216745580423, iteration: 336364
loss: 1.0080963373184204,grad_norm: 0.7302104704274955, iteration: 336365
loss: 1.0057536363601685,grad_norm: 0.9999995424266989, iteration: 336366
loss: 0.9766777157783508,grad_norm: 0.8974640072732492, iteration: 336367
loss: 1.0037487745285034,grad_norm: 0.9999996064702522, iteration: 336368
loss: 1.0102274417877197,grad_norm: 0.9790975460916747, iteration: 336369
loss: 0.9923729300498962,grad_norm: 0.7933028423115666, iteration: 336370
loss: 0.996959924697876,grad_norm: 0.821522424136572, iteration: 336371
loss: 0.9983933568000793,grad_norm: 0.8065506655897159, iteration: 336372
loss: 1.0251063108444214,grad_norm: 0.7622194095994406, iteration: 336373
loss: 0.9828441739082336,grad_norm: 0.8426761935358458, iteration: 336374
loss: 0.9807700514793396,grad_norm: 0.8748065865316794, iteration: 336375
loss: 1.0065369606018066,grad_norm: 0.719409826890045, iteration: 336376
loss: 0.995729923248291,grad_norm: 0.916689775597234, iteration: 336377
loss: 0.9855306148529053,grad_norm: 0.8231797280981988, iteration: 336378
loss: 1.022682547569275,grad_norm: 0.669797514033733, iteration: 336379
loss: 0.9863653779029846,grad_norm: 0.6984528430590425, iteration: 336380
loss: 0.982628583908081,grad_norm: 0.7412613838782162, iteration: 336381
loss: 1.0031886100769043,grad_norm: 0.7330882415104178, iteration: 336382
loss: 0.973739743232727,grad_norm: 0.9105397839087009, iteration: 336383
loss: 1.0088229179382324,grad_norm: 0.9999990027225785, iteration: 336384
loss: 0.9937018752098083,grad_norm: 0.7373657049128504, iteration: 336385
loss: 1.0270031690597534,grad_norm: 0.9417641145094535, iteration: 336386
loss: 1.0032111406326294,grad_norm: 0.8819803493431907, iteration: 336387
loss: 0.994671642780304,grad_norm: 0.9742389732777427, iteration: 336388
loss: 1.1128098964691162,grad_norm: 0.9999994969945344, iteration: 336389
loss: 0.9677342176437378,grad_norm: 0.7678719315544119, iteration: 336390
loss: 1.00490403175354,grad_norm: 0.688966491419276, iteration: 336391
loss: 0.9633678197860718,grad_norm: 0.9186447506714003, iteration: 336392
loss: 1.172758936882019,grad_norm: 0.9999991122869715, iteration: 336393
loss: 0.9999852180480957,grad_norm: 0.8958505834299665, iteration: 336394
loss: 0.9779914617538452,grad_norm: 0.8406383850655305, iteration: 336395
loss: 1.010540246963501,grad_norm: 0.756233041937084, iteration: 336396
loss: 1.0135339498519897,grad_norm: 0.9999991884019445, iteration: 336397
loss: 1.0171762704849243,grad_norm: 0.8236746575247392, iteration: 336398
loss: 1.0176664590835571,grad_norm: 0.7845157833105947, iteration: 336399
loss: 0.9851044416427612,grad_norm: 0.9363000179707229, iteration: 336400
loss: 0.9933595061302185,grad_norm: 0.9999991260184328, iteration: 336401
loss: 1.077986478805542,grad_norm: 0.9394473030410075, iteration: 336402
loss: 1.036947250366211,grad_norm: 0.8873205332808461, iteration: 336403
loss: 1.0127958059310913,grad_norm: 0.9506131623499054, iteration: 336404
loss: 0.9759852290153503,grad_norm: 0.8000482165989806, iteration: 336405
loss: 0.9967207312583923,grad_norm: 0.8669834980175742, iteration: 336406
loss: 1.0079509019851685,grad_norm: 0.856444253037251, iteration: 336407
loss: 1.0053412914276123,grad_norm: 0.7992876091166797, iteration: 336408
loss: 0.9954254031181335,grad_norm: 0.7974872672555268, iteration: 336409
loss: 0.9678770899772644,grad_norm: 0.8701826088720586, iteration: 336410
loss: 0.978119432926178,grad_norm: 0.7851911014709237, iteration: 336411
loss: 0.9803838133811951,grad_norm: 0.8491373805359653, iteration: 336412
loss: 1.0200037956237793,grad_norm: 0.8128722785312071, iteration: 336413
loss: 1.0534061193466187,grad_norm: 0.7728294527096706, iteration: 336414
loss: 0.9842528700828552,grad_norm: 0.9855399252770565, iteration: 336415
loss: 0.9903662800788879,grad_norm: 0.7310331602463516, iteration: 336416
loss: 1.0582754611968994,grad_norm: 0.9999999167683312, iteration: 336417
loss: 1.017828345298767,grad_norm: 0.940576416487998, iteration: 336418
loss: 0.9593965411186218,grad_norm: 0.9517209239387026, iteration: 336419
loss: 1.012953281402588,grad_norm: 0.999999113921186, iteration: 336420
loss: 1.0001004934310913,grad_norm: 0.7442206405223138, iteration: 336421
loss: 1.0238109827041626,grad_norm: 0.8605909715779471, iteration: 336422
loss: 1.0238720178604126,grad_norm: 0.8359397110988881, iteration: 336423
loss: 1.000951886177063,grad_norm: 0.8271467744415529, iteration: 336424
loss: 0.9844621419906616,grad_norm: 0.8703768216775138, iteration: 336425
loss: 0.9908316731452942,grad_norm: 0.8606789558763707, iteration: 336426
loss: 0.9990454912185669,grad_norm: 0.9509552268879978, iteration: 336427
loss: 1.1619702577590942,grad_norm: 0.9999998349831248, iteration: 336428
loss: 1.0088441371917725,grad_norm: 0.9074517503871695, iteration: 336429
loss: 1.0309584140777588,grad_norm: 0.9999989939236202, iteration: 336430
loss: 0.9904028177261353,grad_norm: 0.8064933176082003, iteration: 336431
loss: 1.0634000301361084,grad_norm: 0.901001671064884, iteration: 336432
loss: 1.0133024454116821,grad_norm: 0.8840619100713757, iteration: 336433
loss: 0.9928351640701294,grad_norm: 0.7092699294666104, iteration: 336434
loss: 1.0441232919692993,grad_norm: 0.7614303312226184, iteration: 336435
loss: 1.058376669883728,grad_norm: 0.8225711123335683, iteration: 336436
loss: 0.9891963005065918,grad_norm: 0.9999990623416402, iteration: 336437
loss: 1.0372984409332275,grad_norm: 0.7782146630394253, iteration: 336438
loss: 0.9788342118263245,grad_norm: 0.852954337792275, iteration: 336439
loss: 1.0186395645141602,grad_norm: 0.7778585429578412, iteration: 336440
loss: 0.9775702357292175,grad_norm: 0.7877415006152492, iteration: 336441
loss: 1.043845295906067,grad_norm: 0.9999996404987006, iteration: 336442
loss: 0.9892160296440125,grad_norm: 0.8540859092516123, iteration: 336443
loss: 1.0105743408203125,grad_norm: 0.7515751010050777, iteration: 336444
loss: 1.002625823020935,grad_norm: 0.9999993725115772, iteration: 336445
loss: 1.1469181776046753,grad_norm: 0.9999990842227692, iteration: 336446
loss: 1.009909987449646,grad_norm: 0.9999993725470535, iteration: 336447
loss: 1.057802677154541,grad_norm: 0.9999992163950565, iteration: 336448
loss: 1.1094107627868652,grad_norm: 1.0000000854433793, iteration: 336449
loss: 0.9944813251495361,grad_norm: 0.8517231986291848, iteration: 336450
loss: 1.004961371421814,grad_norm: 0.8699587489873123, iteration: 336451
loss: 0.9999688267707825,grad_norm: 0.9999991032231763, iteration: 336452
loss: 0.9904884099960327,grad_norm: 0.9293596335184827, iteration: 336453
loss: 1.0660231113433838,grad_norm: 0.9999995695752404, iteration: 336454
loss: 1.002970576286316,grad_norm: 0.8517258245892968, iteration: 336455
loss: 0.9892582893371582,grad_norm: 0.8730912926344192, iteration: 336456
loss: 0.9939457178115845,grad_norm: 0.6676463805031527, iteration: 336457
loss: 0.9662264585494995,grad_norm: 0.8454127014153154, iteration: 336458
loss: 0.9877934455871582,grad_norm: 0.7461147939085861, iteration: 336459
loss: 1.0133084058761597,grad_norm: 0.7860966876697237, iteration: 336460
loss: 1.0113385915756226,grad_norm: 0.7558782636566465, iteration: 336461
loss: 1.1234937906265259,grad_norm: 0.9899405051016218, iteration: 336462
loss: 1.0090652704238892,grad_norm: 0.6939499057253019, iteration: 336463
loss: 0.992919921875,grad_norm: 0.9999991240225542, iteration: 336464
loss: 1.017888069152832,grad_norm: 0.8923590520788234, iteration: 336465
loss: 0.9921373128890991,grad_norm: 0.8681450004361273, iteration: 336466
loss: 0.9912548661231995,grad_norm: 0.7685194798002418, iteration: 336467
loss: 1.0003362894058228,grad_norm: 0.9999993639484535, iteration: 336468
loss: 1.0067585706710815,grad_norm: 0.9999999228049041, iteration: 336469
loss: 0.9887746572494507,grad_norm: 0.9046994381769473, iteration: 336470
loss: 0.985349714756012,grad_norm: 0.8056794837222513, iteration: 336471
loss: 1.0101786851882935,grad_norm: 0.8031738497730352, iteration: 336472
loss: 1.0305217504501343,grad_norm: 0.7893195602659386, iteration: 336473
loss: 1.008326530456543,grad_norm: 0.7431970080654839, iteration: 336474
loss: 0.9609577655792236,grad_norm: 0.7541184897858003, iteration: 336475
loss: 0.9911810159683228,grad_norm: 0.7337788938039684, iteration: 336476
loss: 0.9820933938026428,grad_norm: 0.8666199169578548, iteration: 336477
loss: 1.026647686958313,grad_norm: 0.9999992164742703, iteration: 336478
loss: 0.994417667388916,grad_norm: 0.7786839744549272, iteration: 336479
loss: 1.0024601221084595,grad_norm: 0.9999990791559537, iteration: 336480
loss: 1.0033124685287476,grad_norm: 0.8680964210426189, iteration: 336481
loss: 1.003498911857605,grad_norm: 0.902008019772587, iteration: 336482
loss: 0.9964606761932373,grad_norm: 0.9760871537891225, iteration: 336483
loss: 0.9964854717254639,grad_norm: 0.8052407325105074, iteration: 336484
loss: 0.971634030342102,grad_norm: 0.910012408454381, iteration: 336485
loss: 1.0298768281936646,grad_norm: 0.7511790516869746, iteration: 336486
loss: 0.9874019622802734,grad_norm: 0.7575060894824106, iteration: 336487
loss: 1.007417917251587,grad_norm: 0.8442428453931307, iteration: 336488
loss: 0.989260733127594,grad_norm: 0.8208648669106929, iteration: 336489
loss: 1.0632853507995605,grad_norm: 0.9999999629379211, iteration: 336490
loss: 0.9858723282814026,grad_norm: 0.8588619971811088, iteration: 336491
loss: 0.9730520844459534,grad_norm: 0.7998227047366496, iteration: 336492
loss: 1.0038830041885376,grad_norm: 0.8568435353198094, iteration: 336493
loss: 1.028719425201416,grad_norm: 0.9999998868461575, iteration: 336494
loss: 1.0463411808013916,grad_norm: 0.8773503523815276, iteration: 336495
loss: 1.0448591709136963,grad_norm: 0.9949978923363468, iteration: 336496
loss: 0.9801041483879089,grad_norm: 0.9999991519958086, iteration: 336497
loss: 0.9806045293807983,grad_norm: 0.8913640493321074, iteration: 336498
loss: 0.9895517826080322,grad_norm: 0.780972184805046, iteration: 336499
loss: 0.9892358779907227,grad_norm: 0.9999992220201289, iteration: 336500
loss: 1.0163291692733765,grad_norm: 0.7193846114156212, iteration: 336501
loss: 1.0211197137832642,grad_norm: 0.8234034788020971, iteration: 336502
loss: 1.0236150026321411,grad_norm: 0.836849343076644, iteration: 336503
loss: 1.0698639154434204,grad_norm: 0.9284474307377188, iteration: 336504
loss: 1.000092625617981,grad_norm: 0.8332827115495771, iteration: 336505
loss: 1.008366346359253,grad_norm: 0.9379895424285343, iteration: 336506
loss: 0.9883631467819214,grad_norm: 0.8718091644566679, iteration: 336507
loss: 0.9961073398590088,grad_norm: 0.8767626579536851, iteration: 336508
loss: 1.009761929512024,grad_norm: 0.9999991973287452, iteration: 336509
loss: 0.9823526740074158,grad_norm: 0.9631395775132404, iteration: 336510
loss: 0.9836034774780273,grad_norm: 0.8109770725828686, iteration: 336511
loss: 1.0657988786697388,grad_norm: 0.9999990454172295, iteration: 336512
loss: 1.008053183555603,grad_norm: 0.9041630958521155, iteration: 336513
loss: 1.0018471479415894,grad_norm: 0.9999991417726869, iteration: 336514
loss: 0.9883677363395691,grad_norm: 0.7468473114554682, iteration: 336515
loss: 1.0096039772033691,grad_norm: 0.9003353627015998, iteration: 336516
loss: 1.0347715616226196,grad_norm: 0.985758372942329, iteration: 336517
loss: 1.0520931482315063,grad_norm: 0.999999425827577, iteration: 336518
loss: 1.0046205520629883,grad_norm: 0.9999999888193507, iteration: 336519
loss: 0.9964914917945862,grad_norm: 0.8050273936887454, iteration: 336520
loss: 1.013508915901184,grad_norm: 0.9300912121759605, iteration: 336521
loss: 1.0193225145339966,grad_norm: 0.9999992190878191, iteration: 336522
loss: 1.0142978429794312,grad_norm: 0.7887840569837936, iteration: 336523
loss: 0.9942378401756287,grad_norm: 0.8005923317825205, iteration: 336524
loss: 1.0397242307662964,grad_norm: 0.8762156715424265, iteration: 336525
loss: 1.001930594444275,grad_norm: 0.9336924705997508, iteration: 336526
loss: 0.9988045692443848,grad_norm: 0.7892584999034289, iteration: 336527
loss: 0.9978023171424866,grad_norm: 0.9999991684815526, iteration: 336528
loss: 1.039698839187622,grad_norm: 0.9999993603829703, iteration: 336529
loss: 0.9969646334648132,grad_norm: 0.7487664459255553, iteration: 336530
loss: 1.0095949172973633,grad_norm: 0.9936709769121355, iteration: 336531
loss: 1.0086809396743774,grad_norm: 0.999999362201985, iteration: 336532
loss: 0.9846144318580627,grad_norm: 0.8509464165927354, iteration: 336533
loss: 0.9967950582504272,grad_norm: 0.829103419848401, iteration: 336534
loss: 1.014457106590271,grad_norm: 0.9654404053580271, iteration: 336535
loss: 1.0216929912567139,grad_norm: 0.7703402867628341, iteration: 336536
loss: 1.0447973012924194,grad_norm: 0.8367260340312745, iteration: 336537
loss: 0.996225118637085,grad_norm: 0.8265920828688892, iteration: 336538
loss: 0.9891652464866638,grad_norm: 0.8264770481303553, iteration: 336539
loss: 1.0098838806152344,grad_norm: 0.7854037182184178, iteration: 336540
loss: 0.97149258852005,grad_norm: 0.8830520790518358, iteration: 336541
loss: 0.9944347143173218,grad_norm: 0.794727495631381, iteration: 336542
loss: 1.0065138339996338,grad_norm: 0.7042241686163392, iteration: 336543
loss: 1.002862811088562,grad_norm: 0.7147000384200267, iteration: 336544
loss: 1.019171953201294,grad_norm: 0.8408880327698784, iteration: 336545
loss: 0.9869545102119446,grad_norm: 0.8117748632352844, iteration: 336546
loss: 1.0587483644485474,grad_norm: 0.9999996556946082, iteration: 336547
loss: 1.042289137840271,grad_norm: 0.7709845400085786, iteration: 336548
loss: 1.017414927482605,grad_norm: 0.7882782407924784, iteration: 336549
loss: 0.9807512164115906,grad_norm: 0.7489245709090294, iteration: 336550
loss: 0.9828743934631348,grad_norm: 0.9999990256953492, iteration: 336551
loss: 1.0004699230194092,grad_norm: 0.7608596068738399, iteration: 336552
loss: 0.9748026728630066,grad_norm: 0.7066566159930686, iteration: 336553
loss: 0.9943623542785645,grad_norm: 0.6628509275267238, iteration: 336554
loss: 1.0337884426116943,grad_norm: 0.8080365225326316, iteration: 336555
loss: 1.0093255043029785,grad_norm: 0.7122993798705256, iteration: 336556
loss: 1.0691512823104858,grad_norm: 0.8602145222691504, iteration: 336557
loss: 1.1430323123931885,grad_norm: 0.9999998670031538, iteration: 336558
loss: 1.0049519538879395,grad_norm: 0.7920441491369119, iteration: 336559
loss: 0.9911791682243347,grad_norm: 0.9883933834122094, iteration: 336560
loss: 0.9623798131942749,grad_norm: 0.7317294354255582, iteration: 336561
loss: 1.0078293085098267,grad_norm: 0.7386335986487005, iteration: 336562
loss: 0.989246666431427,grad_norm: 0.9999992271723954, iteration: 336563
loss: 1.0874147415161133,grad_norm: 0.9999990390815502, iteration: 336564
loss: 0.9996944665908813,grad_norm: 0.8489111371232139, iteration: 336565
loss: 0.9771996140480042,grad_norm: 0.9999997188479446, iteration: 336566
loss: 0.9838871359825134,grad_norm: 0.9999998587583777, iteration: 336567
loss: 1.0191189050674438,grad_norm: 0.822933813812615, iteration: 336568
loss: 0.9855372905731201,grad_norm: 0.8945054318339775, iteration: 336569
loss: 0.9950962066650391,grad_norm: 0.8965431185978441, iteration: 336570
loss: 1.1567178964614868,grad_norm: 0.9999998538823711, iteration: 336571
loss: 1.0013165473937988,grad_norm: 0.740778029978237, iteration: 336572
loss: 1.0059564113616943,grad_norm: 0.767103111784954, iteration: 336573
loss: 1.0019218921661377,grad_norm: 0.5602773817882394, iteration: 336574
loss: 1.0076199769973755,grad_norm: 0.8704338564929758, iteration: 336575
loss: 1.0129631757736206,grad_norm: 0.773833298554578, iteration: 336576
loss: 1.0243330001831055,grad_norm: 0.8245436918876264, iteration: 336577
loss: 1.032394289970398,grad_norm: 0.9999998258368135, iteration: 336578
loss: 0.9850409626960754,grad_norm: 0.849383754168727, iteration: 336579
loss: 0.9949167370796204,grad_norm: 0.9999998430624887, iteration: 336580
loss: 1.0008103847503662,grad_norm: 0.9112819911472541, iteration: 336581
loss: 1.0086493492126465,grad_norm: 0.9677518604756122, iteration: 336582
loss: 0.989758312702179,grad_norm: 0.6984467442975719, iteration: 336583
loss: 0.9683095216751099,grad_norm: 0.8712320242803462, iteration: 336584
loss: 0.9893205165863037,grad_norm: 0.7073557996771329, iteration: 336585
loss: 0.9715579748153687,grad_norm: 0.8706603580956319, iteration: 336586
loss: 0.9518905878067017,grad_norm: 0.9302646200633063, iteration: 336587
loss: 1.0688012838363647,grad_norm: 0.7601290142550381, iteration: 336588
loss: 0.9875765442848206,grad_norm: 0.9408562099305234, iteration: 336589
loss: 0.9824323058128357,grad_norm: 0.9168552856773354, iteration: 336590
loss: 1.022631049156189,grad_norm: 0.7581258884732078, iteration: 336591
loss: 0.9911403656005859,grad_norm: 0.7150943313229923, iteration: 336592
loss: 0.9715743064880371,grad_norm: 0.9999990315810326, iteration: 336593
loss: 1.0046387910842896,grad_norm: 0.723791562925048, iteration: 336594
loss: 0.9669451713562012,grad_norm: 0.6845065295383244, iteration: 336595
loss: 0.9782878160476685,grad_norm: 0.973891422809412, iteration: 336596
loss: 0.9675248861312866,grad_norm: 0.7508063451770267, iteration: 336597
loss: 0.9815080165863037,grad_norm: 0.8086490082863417, iteration: 336598
loss: 1.0117392539978027,grad_norm: 0.7038339000278663, iteration: 336599
loss: 0.9733300805091858,grad_norm: 0.7452987415814862, iteration: 336600
loss: 1.0973631143569946,grad_norm: 0.738390189469073, iteration: 336601
loss: 0.9777253866195679,grad_norm: 0.9999999240250151, iteration: 336602
loss: 1.010909914970398,grad_norm: 0.7473734782587645, iteration: 336603
loss: 1.0108418464660645,grad_norm: 0.7775004802976032, iteration: 336604
loss: 1.0169957876205444,grad_norm: 0.8870601074359263, iteration: 336605
loss: 0.9839708209037781,grad_norm: 0.7704775097331412, iteration: 336606
loss: 1.0118223428726196,grad_norm: 0.9322959108907417, iteration: 336607
loss: 0.9794250726699829,grad_norm: 0.9094933283408454, iteration: 336608
loss: 0.9890848398208618,grad_norm: 0.7973543372456255, iteration: 336609
loss: 1.0125070810317993,grad_norm: 0.9999993026350735, iteration: 336610
loss: 0.9811946153640747,grad_norm: 0.707556150318308, iteration: 336611
loss: 0.9896975755691528,grad_norm: 0.9999990683562544, iteration: 336612
loss: 1.0024992227554321,grad_norm: 0.718012062371672, iteration: 336613
loss: 0.9982101917266846,grad_norm: 0.919382005565311, iteration: 336614
loss: 1.0065988302230835,grad_norm: 0.9352825649876437, iteration: 336615
loss: 1.0289125442504883,grad_norm: 0.8954569470695724, iteration: 336616
loss: 1.0291149616241455,grad_norm: 0.8377176189405074, iteration: 336617
loss: 0.9684734344482422,grad_norm: 0.8873016404785832, iteration: 336618
loss: 0.9966067671775818,grad_norm: 0.971205014846238, iteration: 336619
loss: 1.016459345817566,grad_norm: 0.8840551575389924, iteration: 336620
loss: 0.9811134934425354,grad_norm: 0.9295681010379211, iteration: 336621
loss: 1.0246597528457642,grad_norm: 0.7293777738122447, iteration: 336622
loss: 1.0219578742980957,grad_norm: 0.9526567990022518, iteration: 336623
loss: 0.9968956112861633,grad_norm: 0.7156321142469022, iteration: 336624
loss: 1.0108366012573242,grad_norm: 0.9258792513207091, iteration: 336625
loss: 0.9616438150405884,grad_norm: 0.9252093442513405, iteration: 336626
loss: 1.0001989603042603,grad_norm: 0.786015976442479, iteration: 336627
loss: 0.9788158535957336,grad_norm: 0.8510197317024477, iteration: 336628
loss: 1.0171029567718506,grad_norm: 0.9999993978760519, iteration: 336629
loss: 0.983379602432251,grad_norm: 0.8919136467185494, iteration: 336630
loss: 1.0063339471817017,grad_norm: 0.7689284418235062, iteration: 336631
loss: 0.9891358017921448,grad_norm: 0.8682651621616629, iteration: 336632
loss: 1.0066840648651123,grad_norm: 0.9999990890688211, iteration: 336633
loss: 0.9901601672172546,grad_norm: 0.7275633733772732, iteration: 336634
loss: 0.9985414743423462,grad_norm: 0.6461716234156845, iteration: 336635
loss: 0.9950711131095886,grad_norm: 0.7729502833142069, iteration: 336636
loss: 0.9902660250663757,grad_norm: 0.8636175697728219, iteration: 336637
loss: 0.9704102277755737,grad_norm: 0.8862791928307417, iteration: 336638
loss: 1.021465539932251,grad_norm: 0.9999991947847524, iteration: 336639
loss: 0.9437530040740967,grad_norm: 0.9081817330938671, iteration: 336640
loss: 0.9459207653999329,grad_norm: 0.7649869070081826, iteration: 336641
loss: 1.0029902458190918,grad_norm: 0.990764331662443, iteration: 336642
loss: 0.9819231033325195,grad_norm: 0.8761065409312001, iteration: 336643
loss: 1.016435980796814,grad_norm: 0.8529089252924541, iteration: 336644
loss: 1.04867422580719,grad_norm: 0.9999996257408368, iteration: 336645
loss: 1.0759197473526,grad_norm: 0.9331236709077873, iteration: 336646
loss: 0.9962882399559021,grad_norm: 0.678040760182005, iteration: 336647
loss: 1.0374246835708618,grad_norm: 0.9999990444702864, iteration: 336648
loss: 0.9883719086647034,grad_norm: 0.6905627427818422, iteration: 336649
loss: 1.004399299621582,grad_norm: 0.7499822459231985, iteration: 336650
loss: 0.9933968186378479,grad_norm: 0.8886786034010364, iteration: 336651
loss: 1.0118439197540283,grad_norm: 0.7778691247411214, iteration: 336652
loss: 1.0221439599990845,grad_norm: 0.9999991904924591, iteration: 336653
loss: 1.0318683385849,grad_norm: 0.9305335985580743, iteration: 336654
loss: 1.0010255575180054,grad_norm: 0.8455188254609035, iteration: 336655
loss: 0.9598316550254822,grad_norm: 0.8979671169266347, iteration: 336656
loss: 1.026804804801941,grad_norm: 0.7599854947687695, iteration: 336657
loss: 0.9553725719451904,grad_norm: 0.7423254517737163, iteration: 336658
loss: 1.0045890808105469,grad_norm: 0.6312310935552393, iteration: 336659
loss: 1.0686465501785278,grad_norm: 0.8469164557431504, iteration: 336660
loss: 1.0275729894638062,grad_norm: 0.8273346407834822, iteration: 336661
loss: 0.990057647228241,grad_norm: 0.8378204785040455, iteration: 336662
loss: 1.0172297954559326,grad_norm: 0.9999991712587063, iteration: 336663
loss: 1.0133267641067505,grad_norm: 0.9999992240432025, iteration: 336664
loss: 1.0096056461334229,grad_norm: 0.8846069886525549, iteration: 336665
loss: 0.9894417524337769,grad_norm: 0.8795106928367857, iteration: 336666
loss: 0.9470992684364319,grad_norm: 0.8695371324126661, iteration: 336667
loss: 1.0231205224990845,grad_norm: 0.9999992325990104, iteration: 336668
loss: 0.9957071542739868,grad_norm: 0.873644849384638, iteration: 336669
loss: 1.0404012203216553,grad_norm: 0.759902166434364, iteration: 336670
loss: 0.9844353199005127,grad_norm: 0.9063122335867677, iteration: 336671
loss: 1.0007483959197998,grad_norm: 0.9999991809079238, iteration: 336672
loss: 0.9744008779525757,grad_norm: 0.6978926950646633, iteration: 336673
loss: 1.029675841331482,grad_norm: 0.8861535641808138, iteration: 336674
loss: 0.9971435070037842,grad_norm: 0.7410294993825349, iteration: 336675
loss: 0.9480311870574951,grad_norm: 0.8723408557029356, iteration: 336676
loss: 1.0101780891418457,grad_norm: 0.7785654425925107, iteration: 336677
loss: 0.9804478883743286,grad_norm: 0.7275748669031954, iteration: 336678
loss: 0.9855890274047852,grad_norm: 0.6470628352387974, iteration: 336679
loss: 1.0034219026565552,grad_norm: 0.9999996467228579, iteration: 336680
loss: 0.992983341217041,grad_norm: 0.8891536707855664, iteration: 336681
loss: 0.987297773361206,grad_norm: 0.7996819777819545, iteration: 336682
loss: 1.058943271636963,grad_norm: 0.8451102449268312, iteration: 336683
loss: 0.9810582399368286,grad_norm: 0.8245242935551322, iteration: 336684
loss: 1.0144802331924438,grad_norm: 0.7963225903962852, iteration: 336685
loss: 0.9974265098571777,grad_norm: 0.8149174414677101, iteration: 336686
loss: 0.9946860671043396,grad_norm: 0.7825078744526248, iteration: 336687
loss: 0.9921514391899109,grad_norm: 0.898624133872895, iteration: 336688
loss: 1.0510493516921997,grad_norm: 0.9999991884423906, iteration: 336689
loss: 1.0094548463821411,grad_norm: 0.9999992331018657, iteration: 336690
loss: 0.977340817451477,grad_norm: 0.8720425235591086, iteration: 336691
loss: 0.9645866751670837,grad_norm: 0.8060731077835317, iteration: 336692
loss: 0.991461992263794,grad_norm: 0.7969394330372113, iteration: 336693
loss: 0.9965255260467529,grad_norm: 0.7452069454987187, iteration: 336694
loss: 1.0484989881515503,grad_norm: 0.963662726661434, iteration: 336695
loss: 1.0141057968139648,grad_norm: 0.8997829297860697, iteration: 336696
loss: 0.9556252956390381,grad_norm: 0.9999995095301231, iteration: 336697
loss: 1.025743842124939,grad_norm: 0.8172275144347233, iteration: 336698
loss: 1.0147583484649658,grad_norm: 0.8203234013530718, iteration: 336699
loss: 0.9553959369659424,grad_norm: 0.8100059756508963, iteration: 336700
loss: 0.9920335412025452,grad_norm: 0.8141938686492934, iteration: 336701
loss: 0.9760962128639221,grad_norm: 0.8336057211246934, iteration: 336702
loss: 1.0102308988571167,grad_norm: 0.7765375559247482, iteration: 336703
loss: 0.9991826415061951,grad_norm: 0.7975536092703535, iteration: 336704
loss: 0.9940239787101746,grad_norm: 0.9020359442270991, iteration: 336705
loss: 1.0140236616134644,grad_norm: 0.8284532165803516, iteration: 336706
loss: 0.9902065396308899,grad_norm: 0.9851545884231998, iteration: 336707
loss: 1.0463391542434692,grad_norm: 0.8242116312710844, iteration: 336708
loss: 0.986885130405426,grad_norm: 0.8555370846515958, iteration: 336709
loss: 0.9968275427818298,grad_norm: 0.7730168511824721, iteration: 336710
loss: 0.9988330602645874,grad_norm: 0.9999990526054711, iteration: 336711
loss: 0.950661838054657,grad_norm: 0.7072668349525666, iteration: 336712
loss: 1.0262937545776367,grad_norm: 0.9999998947300618, iteration: 336713
loss: 0.9839940667152405,grad_norm: 0.9504688842589823, iteration: 336714
loss: 0.9743517637252808,grad_norm: 0.8124624911121465, iteration: 336715
loss: 1.0257972478866577,grad_norm: 0.8402590954892909, iteration: 336716
loss: 1.0094307661056519,grad_norm: 0.7767829856970674, iteration: 336717
loss: 0.9933675527572632,grad_norm: 0.9258225048735015, iteration: 336718
loss: 0.9971472024917603,grad_norm: 0.853056148963046, iteration: 336719
loss: 0.9983065128326416,grad_norm: 0.8151282551185012, iteration: 336720
loss: 1.015602707862854,grad_norm: 0.8326635847531014, iteration: 336721
loss: 0.9872262477874756,grad_norm: 0.7762236821635413, iteration: 336722
loss: 0.989765465259552,grad_norm: 0.9300820540717347, iteration: 336723
loss: 0.998175859451294,grad_norm: 0.7570239380176387, iteration: 336724
loss: 1.0432698726654053,grad_norm: 0.999999060219075, iteration: 336725
loss: 0.9702220559120178,grad_norm: 0.7495046567044036, iteration: 336726
loss: 1.0769388675689697,grad_norm: 0.9999999483774765, iteration: 336727
loss: 0.995564341545105,grad_norm: 0.7837058827098209, iteration: 336728
loss: 0.9852054715156555,grad_norm: 0.8711870388344748, iteration: 336729
loss: 0.9811000227928162,grad_norm: 0.9019000110165771, iteration: 336730
loss: 1.0138294696807861,grad_norm: 0.7570784197735719, iteration: 336731
loss: 0.9593818187713623,grad_norm: 0.6828433349273035, iteration: 336732
loss: 1.0117791891098022,grad_norm: 0.7978284593099182, iteration: 336733
loss: 1.02086341381073,grad_norm: 0.7455644335062366, iteration: 336734
loss: 1.0019879341125488,grad_norm: 0.9999990145703003, iteration: 336735
loss: 1.020708680152893,grad_norm: 0.9999990361271598, iteration: 336736
loss: 0.9883666038513184,grad_norm: 0.8078567996567672, iteration: 336737
loss: 0.9539327025413513,grad_norm: 0.870765082184083, iteration: 336738
loss: 0.9961850047111511,grad_norm: 0.9999992681527241, iteration: 336739
loss: 1.0428647994995117,grad_norm: 0.9999997742608906, iteration: 336740
loss: 0.9980632662773132,grad_norm: 0.8065875794855817, iteration: 336741
loss: 0.989382803440094,grad_norm: 0.8232074564889822, iteration: 336742
loss: 0.9783817529678345,grad_norm: 0.9277284723129929, iteration: 336743
loss: 1.0076640844345093,grad_norm: 0.8735866090495177, iteration: 336744
loss: 0.9381476640701294,grad_norm: 0.7550234789848408, iteration: 336745
loss: 1.0572198629379272,grad_norm: 0.8310104849284935, iteration: 336746
loss: 1.057289481163025,grad_norm: 0.9999998587770407, iteration: 336747
loss: 1.0439647436141968,grad_norm: 0.754062091918299, iteration: 336748
loss: 1.0151594877243042,grad_norm: 0.8918019060453918, iteration: 336749
loss: 0.9655312895774841,grad_norm: 0.9368654852272987, iteration: 336750
loss: 1.0179256200790405,grad_norm: 0.7083748000249293, iteration: 336751
loss: 0.996500551700592,grad_norm: 0.9999997770315618, iteration: 336752
loss: 0.9846625924110413,grad_norm: 0.7267410493766879, iteration: 336753
loss: 0.991466760635376,grad_norm: 0.7140260494283726, iteration: 336754
loss: 1.005722165107727,grad_norm: 0.7590183314448613, iteration: 336755
loss: 0.9590680599212646,grad_norm: 0.7722494814737632, iteration: 336756
loss: 0.9560621976852417,grad_norm: 0.8025579025867793, iteration: 336757
loss: 1.0766698122024536,grad_norm: 0.9999997374598613, iteration: 336758
loss: 0.9914206862449646,grad_norm: 0.7165115388576284, iteration: 336759
loss: 1.018202543258667,grad_norm: 0.999999460230106, iteration: 336760
loss: 1.0316028594970703,grad_norm: 0.8817809432573446, iteration: 336761
loss: 0.9825266599655151,grad_norm: 0.8397919619623676, iteration: 336762
loss: 1.008252501487732,grad_norm: 0.8926513890468485, iteration: 336763
loss: 1.0046088695526123,grad_norm: 0.9716211047659512, iteration: 336764
loss: 1.0076335668563843,grad_norm: 0.8030237800899113, iteration: 336765
loss: 0.93401700258255,grad_norm: 0.7890193486622603, iteration: 336766
loss: 1.008852243423462,grad_norm: 0.8700179081970073, iteration: 336767
loss: 0.9486858248710632,grad_norm: 0.809682833325557, iteration: 336768
loss: 1.027572512626648,grad_norm: 0.8348716865046413, iteration: 336769
loss: 1.011337161064148,grad_norm: 0.7080027882138769, iteration: 336770
loss: 0.9683181047439575,grad_norm: 0.7808280731294043, iteration: 336771
loss: 0.9988780617713928,grad_norm: 0.8457999233569748, iteration: 336772
loss: 0.9635550379753113,grad_norm: 0.8335830630277731, iteration: 336773
loss: 0.996530294418335,grad_norm: 0.856477607818872, iteration: 336774
loss: 1.0372222661972046,grad_norm: 0.9999997928305923, iteration: 336775
loss: 0.9794546961784363,grad_norm: 0.8237107036210293, iteration: 336776
loss: 0.9839792251586914,grad_norm: 0.7853272479863609, iteration: 336777
loss: 1.02117121219635,grad_norm: 0.8529314117928383, iteration: 336778
loss: 0.9822453856468201,grad_norm: 0.8303910118872205, iteration: 336779
loss: 1.0307202339172363,grad_norm: 0.9999991397827638, iteration: 336780
loss: 1.0327497720718384,grad_norm: 0.9999992639095161, iteration: 336781
loss: 1.0028760433197021,grad_norm: 0.8165877011635827, iteration: 336782
loss: 0.9863066673278809,grad_norm: 0.8619908920363168, iteration: 336783
loss: 0.9667696356773376,grad_norm: 0.9470164610342685, iteration: 336784
loss: 0.9630831480026245,grad_norm: 0.9001492056653214, iteration: 336785
loss: 1.0202523469924927,grad_norm: 0.9964650450328283, iteration: 336786
loss: 1.0265147686004639,grad_norm: 0.8439427377647153, iteration: 336787
loss: 1.0140385627746582,grad_norm: 0.8278964640468157, iteration: 336788
loss: 0.9572585225105286,grad_norm: 0.9070778527421117, iteration: 336789
loss: 1.0409793853759766,grad_norm: 0.8953497053159308, iteration: 336790
loss: 1.002793788909912,grad_norm: 0.8633485827340182, iteration: 336791
loss: 1.0157428979873657,grad_norm: 0.8631450371749092, iteration: 336792
loss: 0.9929302334785461,grad_norm: 0.8538406007056915, iteration: 336793
loss: 1.002060890197754,grad_norm: 0.9999992361083505, iteration: 336794
loss: 0.9829193353652954,grad_norm: 0.8702842914336532, iteration: 336795
loss: 0.9866135716438293,grad_norm: 0.8454369872380485, iteration: 336796
loss: 0.9734092950820923,grad_norm: 0.692436409935804, iteration: 336797
loss: 1.0116891860961914,grad_norm: 0.7962546371912438, iteration: 336798
loss: 1.029262661933899,grad_norm: 0.7416627662742107, iteration: 336799
loss: 0.9562673568725586,grad_norm: 0.9661316722620433, iteration: 336800
loss: 1.0242283344268799,grad_norm: 0.7154551461760538, iteration: 336801
loss: 0.9681826829910278,grad_norm: 0.8403720665009015, iteration: 336802
loss: 1.0074706077575684,grad_norm: 0.9121468761448184, iteration: 336803
loss: 1.0039787292480469,grad_norm: 0.7696599066986449, iteration: 336804
loss: 0.9777542352676392,grad_norm: 0.7228589893076177, iteration: 336805
loss: 0.9998045563697815,grad_norm: 0.9999991278551005, iteration: 336806
loss: 1.0102983713150024,grad_norm: 0.6946211952106184, iteration: 336807
loss: 0.9980794191360474,grad_norm: 0.8219499397809451, iteration: 336808
loss: 0.9926248788833618,grad_norm: 0.7219380499140778, iteration: 336809
loss: 1.0023256540298462,grad_norm: 0.9886056063988518, iteration: 336810
loss: 1.0615123510360718,grad_norm: 0.7955077215769397, iteration: 336811
loss: 1.0124701261520386,grad_norm: 0.9762122169988077, iteration: 336812
loss: 0.9869285225868225,grad_norm: 0.8053945989872814, iteration: 336813
loss: 0.9931570291519165,grad_norm: 0.8886444679513732, iteration: 336814
loss: 1.0131573677062988,grad_norm: 0.843089910113639, iteration: 336815
loss: 1.0119950771331787,grad_norm: 0.999999027395028, iteration: 336816
loss: 0.9779635667800903,grad_norm: 0.6331994928795412, iteration: 336817
loss: 1.0103869438171387,grad_norm: 0.7857843910037386, iteration: 336818
loss: 1.0294303894042969,grad_norm: 0.8005144862315542, iteration: 336819
loss: 0.9875144362449646,grad_norm: 0.7043322951841989, iteration: 336820
loss: 1.014277696609497,grad_norm: 0.9157383656282005, iteration: 336821
loss: 1.1877877712249756,grad_norm: 0.9999999388676051, iteration: 336822
loss: 0.9797714352607727,grad_norm: 0.8838998582211803, iteration: 336823
loss: 0.9867597222328186,grad_norm: 0.8620185223221992, iteration: 336824
loss: 0.9846944808959961,grad_norm: 0.8397343478696497, iteration: 336825
loss: 1.028024673461914,grad_norm: 0.9099714939970063, iteration: 336826
loss: 0.9687541127204895,grad_norm: 0.8650029300852591, iteration: 336827
loss: 1.0462987422943115,grad_norm: 0.9999998571475278, iteration: 336828
loss: 0.9980109333992004,grad_norm: 0.8946987922900139, iteration: 336829
loss: 1.0192792415618896,grad_norm: 0.7524920347673444, iteration: 336830
loss: 0.9918848872184753,grad_norm: 0.7591370565286091, iteration: 336831
loss: 0.9792664051055908,grad_norm: 0.9016552862578607, iteration: 336832
loss: 0.9991714954376221,grad_norm: 0.9750920733634386, iteration: 336833
loss: 0.9623458981513977,grad_norm: 0.9074958085605151, iteration: 336834
loss: 0.9918429851531982,grad_norm: 0.839376327234905, iteration: 336835
loss: 1.0214728116989136,grad_norm: 0.8229816645506283, iteration: 336836
loss: 1.0007802248001099,grad_norm: 0.8152378298882138, iteration: 336837
loss: 1.0193387269973755,grad_norm: 0.7738903750407493, iteration: 336838
loss: 1.0202393531799316,grad_norm: 0.7957188398364566, iteration: 336839
loss: 1.0467422008514404,grad_norm: 0.9513168410952988, iteration: 336840
loss: 0.9595508575439453,grad_norm: 0.7177410143391852, iteration: 336841
loss: 1.0106037855148315,grad_norm: 0.8411134750112227, iteration: 336842
loss: 1.0350620746612549,grad_norm: 0.9623896217054461, iteration: 336843
loss: 1.0590687990188599,grad_norm: 0.9999994772659515, iteration: 336844
loss: 1.031870722770691,grad_norm: 0.8968029771886602, iteration: 336845
loss: 0.9942931532859802,grad_norm: 0.7941280331181498, iteration: 336846
loss: 1.0068602561950684,grad_norm: 0.7651417495851981, iteration: 336847
loss: 1.0091605186462402,grad_norm: 0.9999993142335192, iteration: 336848
loss: 1.0039589405059814,grad_norm: 0.7575432356488162, iteration: 336849
loss: 0.9802998304367065,grad_norm: 0.7913230115524595, iteration: 336850
loss: 1.028844952583313,grad_norm: 0.9697272888315219, iteration: 336851
loss: 1.0258705615997314,grad_norm: 0.9268229615087492, iteration: 336852
loss: 1.0165082216262817,grad_norm: 0.8956480114587767, iteration: 336853
loss: 0.9752408862113953,grad_norm: 0.7606568850720983, iteration: 336854
loss: 0.9950292110443115,grad_norm: 0.8978671831108651, iteration: 336855
loss: 1.025691270828247,grad_norm: 0.9271144781136885, iteration: 336856
loss: 0.9917812347412109,grad_norm: 0.9205284866623543, iteration: 336857
loss: 1.0220308303833008,grad_norm: 0.8094190062681672, iteration: 336858
loss: 0.9884589910507202,grad_norm: 0.737761400301071, iteration: 336859
loss: 0.9302050471305847,grad_norm: 0.7739010880032267, iteration: 336860
loss: 1.0100123882293701,grad_norm: 0.8290825503427248, iteration: 336861
loss: 1.0154935121536255,grad_norm: 0.8321011819993214, iteration: 336862
loss: 1.0306798219680786,grad_norm: 0.8399578214440637, iteration: 336863
loss: 0.9823688268661499,grad_norm: 0.7890320207677688, iteration: 336864
loss: 1.06081223487854,grad_norm: 0.9999991587653123, iteration: 336865
loss: 0.9863225221633911,grad_norm: 0.9999990022874984, iteration: 336866
loss: 1.0286082029342651,grad_norm: 0.9999992596511855, iteration: 336867
loss: 0.9737327694892883,grad_norm: 0.8810295523522719, iteration: 336868
loss: 1.0221134424209595,grad_norm: 0.9999992320527366, iteration: 336869
loss: 1.0740678310394287,grad_norm: 0.9999999083885736, iteration: 336870
loss: 0.980800449848175,grad_norm: 0.8709161850592627, iteration: 336871
loss: 1.0256404876708984,grad_norm: 0.7910897705185057, iteration: 336872
loss: 0.9965835809707642,grad_norm: 0.8203042981568289, iteration: 336873
loss: 1.0008248090744019,grad_norm: 0.9557012772994298, iteration: 336874
loss: 1.021364688873291,grad_norm: 0.9999992247820556, iteration: 336875
loss: 0.994360089302063,grad_norm: 0.8882799124781933, iteration: 336876
loss: 0.9986843466758728,grad_norm: 0.8918903877580289, iteration: 336877
loss: 1.0117138624191284,grad_norm: 0.7593576865453213, iteration: 336878
loss: 1.0234731435775757,grad_norm: 0.7182710299891978, iteration: 336879
loss: 0.9586468935012817,grad_norm: 0.9009366994180338, iteration: 336880
loss: 1.0021581649780273,grad_norm: 0.8703111398429094, iteration: 336881
loss: 0.9972258806228638,grad_norm: 0.7813839149846212, iteration: 336882
loss: 0.9837203025817871,grad_norm: 0.7829428139104884, iteration: 336883
loss: 0.9930997490882874,grad_norm: 0.9999995461259396, iteration: 336884
loss: 1.0253121852874756,grad_norm: 0.9999991680515297, iteration: 336885
loss: 0.9932703375816345,grad_norm: 0.8941079250894421, iteration: 336886
loss: 0.9801832437515259,grad_norm: 0.8667690704937562, iteration: 336887
loss: 1.0444875955581665,grad_norm: 0.7503723088536391, iteration: 336888
loss: 0.9725523591041565,grad_norm: 0.8223919576441044, iteration: 336889
loss: 0.9771592020988464,grad_norm: 0.9999990487350754, iteration: 336890
loss: 1.000159502029419,grad_norm: 0.9999991925841005, iteration: 336891
loss: 1.0271779298782349,grad_norm: 0.9999991974199209, iteration: 336892
loss: 1.0001606941223145,grad_norm: 0.7935993921693562, iteration: 336893
loss: 0.9588606953620911,grad_norm: 0.7962785056402756, iteration: 336894
loss: 0.991897702217102,grad_norm: 0.907926386080174, iteration: 336895
loss: 0.9795150756835938,grad_norm: 0.8292096491041557, iteration: 336896
loss: 1.0365084409713745,grad_norm: 0.7874565431236502, iteration: 336897
loss: 1.0554935932159424,grad_norm: 0.9999999149133829, iteration: 336898
loss: 1.0115410089492798,grad_norm: 0.7433140437379315, iteration: 336899
loss: 0.9520100355148315,grad_norm: 0.8484089154113874, iteration: 336900
loss: 0.9835408926010132,grad_norm: 0.9133576524911035, iteration: 336901
loss: 1.0161751508712769,grad_norm: 0.9846584217172268, iteration: 336902
loss: 0.9812684059143066,grad_norm: 0.8023685314145858, iteration: 336903
loss: 1.02376389503479,grad_norm: 0.7869625263504146, iteration: 336904
loss: 0.9909398555755615,grad_norm: 0.8221270675031297, iteration: 336905
loss: 1.0685598850250244,grad_norm: 0.9999991323188513, iteration: 336906
loss: 1.0342602729797363,grad_norm: 0.8278718029825367, iteration: 336907
loss: 0.9754565358161926,grad_norm: 0.8166144860284691, iteration: 336908
loss: 0.9523776769638062,grad_norm: 0.9058195878677446, iteration: 336909
loss: 0.9724398255348206,grad_norm: 0.7887095140421421, iteration: 336910
loss: 1.012457251548767,grad_norm: 0.933956501721696, iteration: 336911
loss: 1.041999340057373,grad_norm: 0.8476319726960586, iteration: 336912
loss: 1.2420724630355835,grad_norm: 0.9999992855204872, iteration: 336913
loss: 0.9695520997047424,grad_norm: 0.8683400023357236, iteration: 336914
loss: 0.9690759181976318,grad_norm: 0.7782398244651929, iteration: 336915
loss: 1.0152356624603271,grad_norm: 0.8697650679004194, iteration: 336916
loss: 0.9936535954475403,grad_norm: 0.727144667928195, iteration: 336917
loss: 0.9919527769088745,grad_norm: 0.8578316490085854, iteration: 336918
loss: 1.0571194887161255,grad_norm: 0.9999997630388425, iteration: 336919
loss: 1.0225874185562134,grad_norm: 0.8209398047883365, iteration: 336920
loss: 0.962284505367279,grad_norm: 0.7985643306280802, iteration: 336921
loss: 1.0093637704849243,grad_norm: 0.8606891105175534, iteration: 336922
loss: 0.9884448051452637,grad_norm: 0.8264224411833719, iteration: 336923
loss: 1.0846378803253174,grad_norm: 0.7637814564440103, iteration: 336924
loss: 1.0632764101028442,grad_norm: 0.9999994444019863, iteration: 336925
loss: 1.049849510192871,grad_norm: 0.6853403354521702, iteration: 336926
loss: 1.0070245265960693,grad_norm: 0.8913622471783804, iteration: 336927
loss: 0.9881017804145813,grad_norm: 0.7080302572499444, iteration: 336928
loss: 0.9723812341690063,grad_norm: 0.9172219781018873, iteration: 336929
loss: 0.9921665787696838,grad_norm: 0.9532776039291748, iteration: 336930
loss: 0.9881439208984375,grad_norm: 0.8490845700750714, iteration: 336931
loss: 1.012734055519104,grad_norm: 0.7783229175725651, iteration: 336932
loss: 0.9978203773498535,grad_norm: 0.7035539209949322, iteration: 336933
loss: 1.0713984966278076,grad_norm: 0.9999994226536478, iteration: 336934
loss: 1.019035816192627,grad_norm: 0.9083751215871322, iteration: 336935
loss: 0.9887312650680542,grad_norm: 0.7846382446086252, iteration: 336936
loss: 0.9800439476966858,grad_norm: 0.999999101911021, iteration: 336937
loss: 1.0175137519836426,grad_norm: 0.7930730086050394, iteration: 336938
loss: 1.0198822021484375,grad_norm: 0.8392551766515195, iteration: 336939
loss: 0.9949826598167419,grad_norm: 0.76945812882837, iteration: 336940
loss: 0.9572331309318542,grad_norm: 0.8149410010699698, iteration: 336941
loss: 0.9813894629478455,grad_norm: 0.8192948343061434, iteration: 336942
loss: 1.0120495557785034,grad_norm: 0.9999995530121394, iteration: 336943
loss: 0.9995253682136536,grad_norm: 0.6923342385807857, iteration: 336944
loss: 0.9895370602607727,grad_norm: 0.6707875864905024, iteration: 336945
loss: 1.0417594909667969,grad_norm: 0.9034518169580446, iteration: 336946
loss: 0.9874448180198669,grad_norm: 0.8914149599431497, iteration: 336947
loss: 0.9802495241165161,grad_norm: 0.9214717225237464, iteration: 336948
loss: 0.961921751499176,grad_norm: 0.7548301178755104, iteration: 336949
loss: 0.9859934449195862,grad_norm: 0.7619752317293595, iteration: 336950
loss: 0.9978764653205872,grad_norm: 0.7123805940348527, iteration: 336951
loss: 0.9792001843452454,grad_norm: 0.9999991003162753, iteration: 336952
loss: 0.983475923538208,grad_norm: 0.7816836471561971, iteration: 336953
loss: 0.9955721497535706,grad_norm: 0.8849497547835777, iteration: 336954
loss: 0.972483217716217,grad_norm: 0.7458780158643088, iteration: 336955
loss: 1.1737993955612183,grad_norm: 0.9930880677545751, iteration: 336956
loss: 0.968828558921814,grad_norm: 0.7400406811036542, iteration: 336957
loss: 0.9614160656929016,grad_norm: 0.9564850386166666, iteration: 336958
loss: 1.0292173624038696,grad_norm: 0.8694854224078453, iteration: 336959
loss: 0.9773392677307129,grad_norm: 0.8776839074900675, iteration: 336960
loss: 1.0199885368347168,grad_norm: 0.9999991230857223, iteration: 336961
loss: 1.0098659992218018,grad_norm: 0.79001686947809, iteration: 336962
loss: 0.9771699905395508,grad_norm: 0.8603564824444679, iteration: 336963
loss: 1.002051830291748,grad_norm: 0.911052563152812, iteration: 336964
loss: 1.0034921169281006,grad_norm: 0.7920700631735161, iteration: 336965
loss: 1.0885835886001587,grad_norm: 0.9999998790212814, iteration: 336966
loss: 1.015012264251709,grad_norm: 0.9899798219006531, iteration: 336967
loss: 1.0288078784942627,grad_norm: 0.9635059975524631, iteration: 336968
loss: 0.9789931774139404,grad_norm: 0.9818545236469911, iteration: 336969
loss: 0.981930673122406,grad_norm: 0.9575312529739353, iteration: 336970
loss: 1.0008584260940552,grad_norm: 0.9999991667998026, iteration: 336971
loss: 1.004175066947937,grad_norm: 0.9999990424141605, iteration: 336972
loss: 0.9798067808151245,grad_norm: 0.7761248173698694, iteration: 336973
loss: 0.9864470958709717,grad_norm: 0.9178776745499871, iteration: 336974
loss: 0.9733888506889343,grad_norm: 0.7881618303408922, iteration: 336975
loss: 0.9669011831283569,grad_norm: 0.8142499430842032, iteration: 336976
loss: 1.050112247467041,grad_norm: 0.9999990907497177, iteration: 336977
loss: 1.0878170728683472,grad_norm: 0.9999992981267879, iteration: 336978
loss: 1.0216434001922607,grad_norm: 0.865046218194296, iteration: 336979
loss: 0.9797467589378357,grad_norm: 0.8219174744854374, iteration: 336980
loss: 0.9911574125289917,grad_norm: 0.9201640100647889, iteration: 336981
loss: 0.9964635968208313,grad_norm: 0.7889520460459861, iteration: 336982
loss: 0.9813732504844666,grad_norm: 0.8653879308279055, iteration: 336983
loss: 1.0175514221191406,grad_norm: 0.809989886465129, iteration: 336984
loss: 1.0121517181396484,grad_norm: 0.7104248853041317, iteration: 336985
loss: 0.9940898418426514,grad_norm: 1.0000000061219472, iteration: 336986
loss: 0.9815775156021118,grad_norm: 0.7792103202803938, iteration: 336987
loss: 0.9658945798873901,grad_norm: 0.7414978061594295, iteration: 336988
loss: 0.9830620288848877,grad_norm: 0.8273460071099323, iteration: 336989
loss: 1.0413384437561035,grad_norm: 0.7836233536545707, iteration: 336990
loss: 1.0339834690093994,grad_norm: 0.8340232184944164, iteration: 336991
loss: 1.0009257793426514,grad_norm: 0.8026757364076254, iteration: 336992
loss: 0.9554779529571533,grad_norm: 0.7731500484134869, iteration: 336993
loss: 1.012223482131958,grad_norm: 0.9999992289623054, iteration: 336994
loss: 1.017950415611267,grad_norm: 0.8081810198688164, iteration: 336995
loss: 1.0262202024459839,grad_norm: 0.8042762792800636, iteration: 336996
loss: 0.9744207859039307,grad_norm: 0.673884483494289, iteration: 336997
loss: 0.9666441082954407,grad_norm: 0.7363413498903362, iteration: 336998
loss: 1.1046050786972046,grad_norm: 0.9828961949119615, iteration: 336999
loss: 0.9687055349349976,grad_norm: 0.8148340246805713, iteration: 337000
loss: 0.957159161567688,grad_norm: 0.8485986888218561, iteration: 337001
loss: 1.0315659046173096,grad_norm: 0.9501696473598567, iteration: 337002
loss: 1.0008569955825806,grad_norm: 0.9287199222950921, iteration: 337003
loss: 0.9864190220832825,grad_norm: 0.8332420746424405, iteration: 337004
loss: 0.991290271282196,grad_norm: 0.8874551886720086, iteration: 337005
loss: 0.9964306950569153,grad_norm: 0.8544724453342106, iteration: 337006
loss: 0.9897491931915283,grad_norm: 0.8550880314573572, iteration: 337007
loss: 1.0276479721069336,grad_norm: 0.8574647207301906, iteration: 337008
loss: 0.9809186458587646,grad_norm: 0.7602747510107993, iteration: 337009
loss: 1.0218772888183594,grad_norm: 0.7769257940570697, iteration: 337010
loss: 1.0246646404266357,grad_norm: 0.8122443970729749, iteration: 337011
loss: 0.9539000988006592,grad_norm: 0.8824268833289378, iteration: 337012
loss: 0.9959204792976379,grad_norm: 0.7223732289938619, iteration: 337013
loss: 1.026680827140808,grad_norm: 0.9999997226936421, iteration: 337014
loss: 1.0111421346664429,grad_norm: 0.9999990667185231, iteration: 337015
loss: 1.01603364944458,grad_norm: 0.5872972431928069, iteration: 337016
loss: 1.0081087350845337,grad_norm: 0.8985620920454753, iteration: 337017
loss: 1.0192527770996094,grad_norm: 0.7531789483539894, iteration: 337018
loss: 1.0413990020751953,grad_norm: 0.9288504291330355, iteration: 337019
loss: 0.9866368174552917,grad_norm: 0.881556353689369, iteration: 337020
loss: 0.9703689217567444,grad_norm: 0.8846978308534192, iteration: 337021
loss: 1.0296334028244019,grad_norm: 0.7003002897459445, iteration: 337022
loss: 0.991369366645813,grad_norm: 0.8529020175159207, iteration: 337023
loss: 1.0157772302627563,grad_norm: 0.7491353827782299, iteration: 337024
loss: 0.9798755645751953,grad_norm: 0.8131083225033688, iteration: 337025
loss: 1.017694354057312,grad_norm: 0.7172446411696752, iteration: 337026
loss: 0.9819232225418091,grad_norm: 0.812953764024589, iteration: 337027
loss: 1.020445704460144,grad_norm: 0.9234041330373557, iteration: 337028
loss: 1.0012924671173096,grad_norm: 0.9141769068483747, iteration: 337029
loss: 1.057784914970398,grad_norm: 0.815650392788411, iteration: 337030
loss: 0.9873926639556885,grad_norm: 0.8155784507126389, iteration: 337031
loss: 1.0265425443649292,grad_norm: 0.8223014203186773, iteration: 337032
loss: 1.008216381072998,grad_norm: 0.8102826409981282, iteration: 337033
loss: 0.9769573211669922,grad_norm: 0.8221517543489103, iteration: 337034
loss: 0.9878735542297363,grad_norm: 0.8456220064323193, iteration: 337035
loss: 0.9674941301345825,grad_norm: 0.9214944015620713, iteration: 337036
loss: 0.9989001154899597,grad_norm: 0.9251419604099753, iteration: 337037
loss: 1.0197453498840332,grad_norm: 0.9919432641845375, iteration: 337038
loss: 1.0074200630187988,grad_norm: 0.9999990672244996, iteration: 337039
loss: 1.0176033973693848,grad_norm: 0.8516961936757549, iteration: 337040
loss: 1.0337588787078857,grad_norm: 0.8739272919708452, iteration: 337041
loss: 1.0829436779022217,grad_norm: 0.9999991140077903, iteration: 337042
loss: 0.987928569316864,grad_norm: 0.7834781531613388, iteration: 337043
loss: 1.0205708742141724,grad_norm: 0.9999990065931432, iteration: 337044
loss: 1.0145763158798218,grad_norm: 0.9194958132242099, iteration: 337045
loss: 1.0070570707321167,grad_norm: 0.9999991702584468, iteration: 337046
loss: 1.0072935819625854,grad_norm: 0.9095310999113995, iteration: 337047
loss: 0.9903309345245361,grad_norm: 0.999999066909215, iteration: 337048
loss: 0.9854310154914856,grad_norm: 0.7885827927063681, iteration: 337049
loss: 0.9881609678268433,grad_norm: 0.7164164412380029, iteration: 337050
loss: 1.014670729637146,grad_norm: 0.7385539737795495, iteration: 337051
loss: 0.988071620464325,grad_norm: 0.6999204028072733, iteration: 337052
loss: 1.0214749574661255,grad_norm: 0.7914329485217427, iteration: 337053
loss: 0.9568064212799072,grad_norm: 0.6747271203343104, iteration: 337054
loss: 0.9869803190231323,grad_norm: 0.8943788358182523, iteration: 337055
loss: 1.0014910697937012,grad_norm: 0.7551526751033011, iteration: 337056
loss: 1.0390348434448242,grad_norm: 0.9421728950663568, iteration: 337057
loss: 0.9863361716270447,grad_norm: 0.9511820908942332, iteration: 337058
loss: 1.0087337493896484,grad_norm: 0.856936071226239, iteration: 337059
loss: 1.0171420574188232,grad_norm: 0.86053550177623, iteration: 337060
loss: 0.9909693598747253,grad_norm: 0.8986296814351435, iteration: 337061
loss: 1.0153883695602417,grad_norm: 0.8423787054636335, iteration: 337062
loss: 0.989764928817749,grad_norm: 0.8526594004210395, iteration: 337063
loss: 0.9943616986274719,grad_norm: 0.7785030234198848, iteration: 337064
loss: 1.0033732652664185,grad_norm: 0.7913436248787725, iteration: 337065
loss: 0.9641536474227905,grad_norm: 0.9999994609653672, iteration: 337066
loss: 1.0172977447509766,grad_norm: 0.7673838500032092, iteration: 337067
loss: 1.0176583528518677,grad_norm: 0.8717667026153466, iteration: 337068
loss: 0.9791035652160645,grad_norm: 0.9999993275440106, iteration: 337069
loss: 0.9955852627754211,grad_norm: 0.9206502316698845, iteration: 337070
loss: 1.0331926345825195,grad_norm: 0.8878211277525234, iteration: 337071
loss: 0.9875019788742065,grad_norm: 0.7911599410873409, iteration: 337072
loss: 0.9781990647315979,grad_norm: 0.7928671709746498, iteration: 337073
loss: 0.9743993282318115,grad_norm: 0.8075305042047626, iteration: 337074
loss: 1.038623332977295,grad_norm: 0.9999992831896793, iteration: 337075
loss: 1.0299715995788574,grad_norm: 0.8678994973656784, iteration: 337076
loss: 0.9708702564239502,grad_norm: 0.8098576725291298, iteration: 337077
loss: 0.9978609681129456,grad_norm: 0.8219351292942969, iteration: 337078
loss: 1.0010520219802856,grad_norm: 0.8546204060890912, iteration: 337079
loss: 1.0746941566467285,grad_norm: 0.9999995840492732, iteration: 337080
loss: 1.011443853378296,grad_norm: 0.9999994464663221, iteration: 337081
loss: 0.9849233627319336,grad_norm: 0.9426960269043398, iteration: 337082
loss: 1.0049818754196167,grad_norm: 0.8752979484924392, iteration: 337083
loss: 0.9931432008743286,grad_norm: 0.9999989967302405, iteration: 337084
loss: 1.012329339981079,grad_norm: 0.7342415731555066, iteration: 337085
loss: 0.9838337898254395,grad_norm: 0.8160910598848565, iteration: 337086
loss: 1.023725986480713,grad_norm: 0.7278380597217046, iteration: 337087
loss: 0.9805009365081787,grad_norm: 0.9179016345232425, iteration: 337088
loss: 1.0104409456253052,grad_norm: 0.9999990564195198, iteration: 337089
loss: 0.9996693134307861,grad_norm: 0.9323438212651369, iteration: 337090
loss: 1.0527534484863281,grad_norm: 0.9999992750393102, iteration: 337091
loss: 1.0161799192428589,grad_norm: 0.7974208119962859, iteration: 337092
loss: 1.0249680280685425,grad_norm: 0.9999996667620336, iteration: 337093
loss: 0.9788960814476013,grad_norm: 0.8621359881223354, iteration: 337094
loss: 0.9631127119064331,grad_norm: 0.8214884170903439, iteration: 337095
loss: 0.9821847677230835,grad_norm: 0.9732765323070901, iteration: 337096
loss: 1.021324634552002,grad_norm: 0.7936493091284221, iteration: 337097
loss: 1.038766622543335,grad_norm: 0.9189795449287529, iteration: 337098
loss: 1.0160865783691406,grad_norm: 0.9621961902157966, iteration: 337099
loss: 0.9812282919883728,grad_norm: 0.8310797713310408, iteration: 337100
loss: 0.9875891208648682,grad_norm: 0.9999992463223475, iteration: 337101
loss: 0.9853255748748779,grad_norm: 0.7961042806907681, iteration: 337102
loss: 0.9727230072021484,grad_norm: 0.9448370030962173, iteration: 337103
loss: 1.0210379362106323,grad_norm: 0.8584866698873622, iteration: 337104
loss: 1.0031884908676147,grad_norm: 0.9938110488383483, iteration: 337105
loss: 0.9861288070678711,grad_norm: 0.6846908643694598, iteration: 337106
loss: 1.025735855102539,grad_norm: 0.8433999945773736, iteration: 337107
loss: 0.9587053060531616,grad_norm: 0.9657079476492286, iteration: 337108
loss: 1.0016175508499146,grad_norm: 0.7634200657349741, iteration: 337109
loss: 1.0445871353149414,grad_norm: 0.99999926696395, iteration: 337110
loss: 0.9707502126693726,grad_norm: 0.7759462016767464, iteration: 337111
loss: 1.0349316596984863,grad_norm: 0.7723533742196004, iteration: 337112
loss: 1.0113102197647095,grad_norm: 0.8424399597659852, iteration: 337113
loss: 0.9733203053474426,grad_norm: 0.8698464299943692, iteration: 337114
loss: 1.013543725013733,grad_norm: 0.6619000327651527, iteration: 337115
loss: 0.9648809432983398,grad_norm: 0.7893861165553513, iteration: 337116
loss: 0.9921285510063171,grad_norm: 0.7336925441644443, iteration: 337117
loss: 0.9855008125305176,grad_norm: 0.7261719987406748, iteration: 337118
loss: 0.9766899347305298,grad_norm: 0.8033301363381257, iteration: 337119
loss: 0.9639456272125244,grad_norm: 0.8759711396593898, iteration: 337120
loss: 1.0073062181472778,grad_norm: 0.7965729321540448, iteration: 337121
loss: 0.9913105368614197,grad_norm: 0.9999991232948193, iteration: 337122
loss: 1.006160855293274,grad_norm: 0.9540108145851784, iteration: 337123
loss: 0.9877775311470032,grad_norm: 0.7334569391115516, iteration: 337124
loss: 0.9673340320587158,grad_norm: 0.8122037853964541, iteration: 337125
loss: 0.9799123406410217,grad_norm: 0.7401949318263977, iteration: 337126
loss: 0.9950531721115112,grad_norm: 0.8638263229170012, iteration: 337127
loss: 1.0242918729782104,grad_norm: 0.7785827784060109, iteration: 337128
loss: 1.0325334072113037,grad_norm: 0.9999990576582554, iteration: 337129
loss: 0.9634144306182861,grad_norm: 0.94370799152341, iteration: 337130
loss: 0.9615221619606018,grad_norm: 0.8924213618662571, iteration: 337131
loss: 1.0215013027191162,grad_norm: 0.9792228166340298, iteration: 337132
loss: 1.0351706743240356,grad_norm: 0.9999997816650001, iteration: 337133
loss: 0.981481671333313,grad_norm: 0.8610002697157778, iteration: 337134
loss: 0.999075174331665,grad_norm: 0.9125224077182671, iteration: 337135
loss: 1.0111082792282104,grad_norm: 0.8343301714950279, iteration: 337136
loss: 1.0103638172149658,grad_norm: 0.8540421919705984, iteration: 337137
loss: 1.0297966003417969,grad_norm: 0.9999991488678565, iteration: 337138
loss: 1.0116989612579346,grad_norm: 0.8331306133040132, iteration: 337139
loss: 1.0269570350646973,grad_norm: 0.8424440934338662, iteration: 337140
loss: 0.9998092651367188,grad_norm: 0.7469164071230945, iteration: 337141
loss: 1.0267151594161987,grad_norm: 0.8392242668441925, iteration: 337142
loss: 1.0031098127365112,grad_norm: 0.7835148458457452, iteration: 337143
loss: 0.9657002091407776,grad_norm: 0.9943481509586092, iteration: 337144
loss: 1.0004398822784424,grad_norm: 0.8819340111019548, iteration: 337145
loss: 0.9846195578575134,grad_norm: 0.7726190645722917, iteration: 337146
loss: 1.0265034437179565,grad_norm: 0.8923037332216096, iteration: 337147
loss: 1.0162675380706787,grad_norm: 0.8340667924136262, iteration: 337148
loss: 1.0295398235321045,grad_norm: 0.9999991212031134, iteration: 337149
loss: 0.9680657386779785,grad_norm: 0.9728046208981748, iteration: 337150
loss: 0.998003363609314,grad_norm: 0.9922487504598659, iteration: 337151
loss: 1.0044684410095215,grad_norm: 0.7775213380851533, iteration: 337152
loss: 1.000109314918518,grad_norm: 0.9975446256262261, iteration: 337153
loss: 1.0105865001678467,grad_norm: 0.6909255563281689, iteration: 337154
loss: 1.0254287719726562,grad_norm: 0.9516931867346201, iteration: 337155
loss: 0.9902589321136475,grad_norm: 0.7151776627811102, iteration: 337156
loss: 0.9901992082595825,grad_norm: 0.8626200365536244, iteration: 337157
loss: 0.956244945526123,grad_norm: 0.9155875875759294, iteration: 337158
loss: 1.006447196006775,grad_norm: 0.9276296446106767, iteration: 337159
loss: 1.0342316627502441,grad_norm: 0.8687680739635733, iteration: 337160
loss: 1.0141204595565796,grad_norm: 0.823674712815349, iteration: 337161
loss: 1.0061031579971313,grad_norm: 0.9999991618023424, iteration: 337162
loss: 1.0781069993972778,grad_norm: 0.71964369240489, iteration: 337163
loss: 0.9861671328544617,grad_norm: 0.7009096134882493, iteration: 337164
loss: 1.0001046657562256,grad_norm: 0.8570014919017432, iteration: 337165
loss: 1.0041898488998413,grad_norm: 0.8879228398732552, iteration: 337166
loss: 1.021674633026123,grad_norm: 0.9042070988119071, iteration: 337167
loss: 1.0174018144607544,grad_norm: 0.9361867171136792, iteration: 337168
loss: 1.0250155925750732,grad_norm: 0.8471834805923849, iteration: 337169
loss: 1.0267016887664795,grad_norm: 0.7878890990272029, iteration: 337170
loss: 0.9805890917778015,grad_norm: 0.9158728288362711, iteration: 337171
loss: 1.0083268880844116,grad_norm: 0.8249967407170357, iteration: 337172
loss: 1.0163853168487549,grad_norm: 0.9343706941073374, iteration: 337173
loss: 0.9930764436721802,grad_norm: 0.8756797392672412, iteration: 337174
loss: 0.9847303628921509,grad_norm: 0.7286518114518162, iteration: 337175
loss: 0.9914900064468384,grad_norm: 0.8988305429969717, iteration: 337176
loss: 0.9416687488555908,grad_norm: 0.9790318252516681, iteration: 337177
loss: 0.9682409167289734,grad_norm: 0.7139046081890142, iteration: 337178
loss: 1.009947419166565,grad_norm: 0.7535220560252222, iteration: 337179
loss: 1.0176522731781006,grad_norm: 0.8415261940104726, iteration: 337180
loss: 1.014463186264038,grad_norm: 0.9126798560592493, iteration: 337181
loss: 0.9930947422981262,grad_norm: 0.8309654994842531, iteration: 337182
loss: 0.9963339567184448,grad_norm: 0.9100513303849598, iteration: 337183
loss: 0.9626051783561707,grad_norm: 0.8348321500440845, iteration: 337184
loss: 1.005774974822998,grad_norm: 0.7994832842606137, iteration: 337185
loss: 0.9766380786895752,grad_norm: 0.7505607095130764, iteration: 337186
loss: 1.0185619592666626,grad_norm: 0.6709155543236158, iteration: 337187
loss: 0.9624359011650085,grad_norm: 0.8591148914153828, iteration: 337188
loss: 0.9846259355545044,grad_norm: 0.7245541587351721, iteration: 337189
loss: 1.0100128650665283,grad_norm: 0.7569884209526729, iteration: 337190
loss: 0.9737281203269958,grad_norm: 0.9999991792430236, iteration: 337191
loss: 0.9836631417274475,grad_norm: 0.8690531361102293, iteration: 337192
loss: 0.9777956008911133,grad_norm: 0.9537635935613215, iteration: 337193
loss: 0.9825356006622314,grad_norm: 0.8065586545385602, iteration: 337194
loss: 1.0063003301620483,grad_norm: 0.9999991686138691, iteration: 337195
loss: 0.988960862159729,grad_norm: 0.7448380368137351, iteration: 337196
loss: 0.9884663820266724,grad_norm: 0.888190164652637, iteration: 337197
loss: 1.0021193027496338,grad_norm: 0.9999994873375267, iteration: 337198
loss: 1.0145031213760376,grad_norm: 0.6660055432629735, iteration: 337199
loss: 0.9724902510643005,grad_norm: 0.8318278188411449, iteration: 337200
loss: 1.0112677812576294,grad_norm: 0.7446585258766352, iteration: 337201
loss: 1.0020347833633423,grad_norm: 0.7792924607101387, iteration: 337202
loss: 0.9519810676574707,grad_norm: 0.7870983391189885, iteration: 337203
loss: 0.962875485420227,grad_norm: 0.9999990547680833, iteration: 337204
loss: 0.9896447062492371,grad_norm: 0.798462375422154, iteration: 337205
loss: 1.0239795446395874,grad_norm: 0.7246035049693625, iteration: 337206
loss: 0.9968048930168152,grad_norm: 0.716978044574704, iteration: 337207
loss: 1.0104070901870728,grad_norm: 0.754110580017943, iteration: 337208
loss: 0.9948013424873352,grad_norm: 0.9822362663188847, iteration: 337209
loss: 0.9765090346336365,grad_norm: 0.9030762978090515, iteration: 337210
loss: 0.9930056929588318,grad_norm: 0.890033893519611, iteration: 337211
loss: 0.9806731343269348,grad_norm: 0.9800215189627758, iteration: 337212
loss: 0.9855775833129883,grad_norm: 0.9479931948887192, iteration: 337213
loss: 1.0332995653152466,grad_norm: 0.9229752393135574, iteration: 337214
loss: 1.06657874584198,grad_norm: 0.9999993606614227, iteration: 337215
loss: 0.9969760179519653,grad_norm: 0.8024735045345256, iteration: 337216
loss: 0.9738544821739197,grad_norm: 0.8951854567941524, iteration: 337217
loss: 0.988021731376648,grad_norm: 0.8638254623869192, iteration: 337218
loss: 1.0180771350860596,grad_norm: 0.8785001173023671, iteration: 337219
loss: 0.9942918419837952,grad_norm: 0.8323424653583922, iteration: 337220
loss: 0.9778705835342407,grad_norm: 0.7374503287708031, iteration: 337221
loss: 1.0042346715927124,grad_norm: 0.9999989655525414, iteration: 337222
loss: 0.9980573058128357,grad_norm: 0.7445527650911632, iteration: 337223
loss: 0.9462931156158447,grad_norm: 0.9999991124399976, iteration: 337224
loss: 0.9955275654792786,grad_norm: 0.7718617612833918, iteration: 337225
loss: 1.0109896659851074,grad_norm: 0.7936736560385368, iteration: 337226
loss: 1.014824628829956,grad_norm: 0.8017925158903169, iteration: 337227
loss: 1.0111998319625854,grad_norm: 0.8199986539562918, iteration: 337228
loss: 1.0041677951812744,grad_norm: 0.7869171446726781, iteration: 337229
loss: 0.9800527095794678,grad_norm: 0.801214806181187, iteration: 337230
loss: 0.9879777431488037,grad_norm: 0.7875732230562258, iteration: 337231
loss: 0.9798712730407715,grad_norm: 0.8477878520954757, iteration: 337232
loss: 1.0132192373275757,grad_norm: 0.8610624808958195, iteration: 337233
loss: 0.9760362505912781,grad_norm: 0.8259520103676905, iteration: 337234
loss: 1.0019546747207642,grad_norm: 0.7671196006147081, iteration: 337235
loss: 0.9956660270690918,grad_norm: 0.703581902287878, iteration: 337236
loss: 1.0211001634597778,grad_norm: 0.9999993596087635, iteration: 337237
loss: 0.9973130822181702,grad_norm: 0.9999990240333424, iteration: 337238
loss: 1.0121937990188599,grad_norm: 0.9999991050498657, iteration: 337239
loss: 0.990502655506134,grad_norm: 0.8068470099383254, iteration: 337240
loss: 1.066373348236084,grad_norm: 0.789832577270778, iteration: 337241
loss: 1.0338139533996582,grad_norm: 0.9999991926288241, iteration: 337242
loss: 1.0298234224319458,grad_norm: 0.7882113861019695, iteration: 337243
loss: 0.9826943874359131,grad_norm: 0.7603691977431006, iteration: 337244
loss: 1.040622353553772,grad_norm: 0.999999095050202, iteration: 337245
loss: 0.9727292656898499,grad_norm: 0.8510410053764104, iteration: 337246
loss: 1.0192080736160278,grad_norm: 0.9999991856515115, iteration: 337247
loss: 0.9919605851173401,grad_norm: 0.9194708311717367, iteration: 337248
loss: 0.9865533113479614,grad_norm: 0.7835947415588675, iteration: 337249
loss: 1.0036216974258423,grad_norm: 0.8312111276925187, iteration: 337250
loss: 0.976530134677887,grad_norm: 0.9613250128231776, iteration: 337251
loss: 1.0378506183624268,grad_norm: 0.9999994979992204, iteration: 337252
loss: 1.0418338775634766,grad_norm: 0.7236926445521616, iteration: 337253
loss: 1.0213159322738647,grad_norm: 0.757414741931155, iteration: 337254
loss: 1.0145823955535889,grad_norm: 0.8952676607209902, iteration: 337255
loss: 0.9843443036079407,grad_norm: 0.9889378214524773, iteration: 337256
loss: 1.0372912883758545,grad_norm: 0.9999992795061968, iteration: 337257
loss: 0.9770321846008301,grad_norm: 0.7032854140647425, iteration: 337258
loss: 0.9876483678817749,grad_norm: 0.7300920775662525, iteration: 337259
loss: 1.0204095840454102,grad_norm: 0.9274532389818794, iteration: 337260
loss: 1.030405879020691,grad_norm: 0.8138268257708221, iteration: 337261
loss: 1.0407280921936035,grad_norm: 0.9264158314962396, iteration: 337262
loss: 0.9715328812599182,grad_norm: 0.8480679721914871, iteration: 337263
loss: 0.9577168822288513,grad_norm: 0.7810771136537994, iteration: 337264
loss: 1.0296365022659302,grad_norm: 0.8184024630499281, iteration: 337265
loss: 0.9972603917121887,grad_norm: 0.6723045396788504, iteration: 337266
loss: 1.0253236293792725,grad_norm: 0.751812906924809, iteration: 337267
loss: 0.9735714197158813,grad_norm: 0.6462072193015885, iteration: 337268
loss: 0.9842868447303772,grad_norm: 0.775759269722179, iteration: 337269
loss: 0.9819384813308716,grad_norm: 0.949713924449018, iteration: 337270
loss: 1.0342906713485718,grad_norm: 0.9999990493756346, iteration: 337271
loss: 1.0374739170074463,grad_norm: 0.9534156123195816, iteration: 337272
loss: 0.9905250072479248,grad_norm: 0.9866054301930911, iteration: 337273
loss: 1.0247691869735718,grad_norm: 0.9117556546002208, iteration: 337274
loss: 1.0121225118637085,grad_norm: 0.70721814887406, iteration: 337275
loss: 1.0299506187438965,grad_norm: 0.9999992415475125, iteration: 337276
loss: 1.0008193254470825,grad_norm: 0.8432753623337851, iteration: 337277
loss: 0.9945359826087952,grad_norm: 1.0000000047034676, iteration: 337278
loss: 0.9992267489433289,grad_norm: 0.7854004279678328, iteration: 337279
loss: 1.0484514236450195,grad_norm: 0.8962291950124107, iteration: 337280
loss: 0.9760324358940125,grad_norm: 0.8790229841206938, iteration: 337281
loss: 1.0146026611328125,grad_norm: 0.8863965127304949, iteration: 337282
loss: 0.9563403129577637,grad_norm: 0.7188379870180257, iteration: 337283
loss: 0.9861771464347839,grad_norm: 0.9066016000550325, iteration: 337284
loss: 1.0123027563095093,grad_norm: 0.848025240492239, iteration: 337285
loss: 0.9753322005271912,grad_norm: 0.8569419636318341, iteration: 337286
loss: 1.0682367086410522,grad_norm: 0.83770027206475, iteration: 337287
loss: 1.0120283365249634,grad_norm: 0.7567921759413674, iteration: 337288
loss: 1.0287116765975952,grad_norm: 0.8352386104697458, iteration: 337289
loss: 0.9606578350067139,grad_norm: 0.7942072609689838, iteration: 337290
loss: 1.0086053609848022,grad_norm: 0.7674970045181703, iteration: 337291
loss: 0.9860283136367798,grad_norm: 0.803418178381644, iteration: 337292
loss: 1.0366026163101196,grad_norm: 0.6739548524966963, iteration: 337293
loss: 1.027514100074768,grad_norm: 0.8980228686996548, iteration: 337294
loss: 1.007564663887024,grad_norm: 0.8824015420469504, iteration: 337295
loss: 0.9821439385414124,grad_norm: 0.9058070695495847, iteration: 337296
loss: 0.9943252205848694,grad_norm: 0.8802108766744987, iteration: 337297
loss: 1.0069128274917603,grad_norm: 0.7811299424725434, iteration: 337298
loss: 1.006483554840088,grad_norm: 0.9106891304722144, iteration: 337299
loss: 1.0019582509994507,grad_norm: 0.7245823255490929, iteration: 337300
loss: 1.016741156578064,grad_norm: 0.9677712383987, iteration: 337301
loss: 0.9918582439422607,grad_norm: 0.7189869710339135, iteration: 337302
loss: 1.0261024236679077,grad_norm: 0.8582532023782742, iteration: 337303
loss: 0.9863349199295044,grad_norm: 0.7603741443083132, iteration: 337304
loss: 1.0279042720794678,grad_norm: 0.7346899957481325, iteration: 337305
loss: 0.9677257537841797,grad_norm: 0.9276689836969486, iteration: 337306
loss: 1.0008679628372192,grad_norm: 0.7513870852578916, iteration: 337307
loss: 0.9813098311424255,grad_norm: 0.7307905040655113, iteration: 337308
loss: 1.0307986736297607,grad_norm: 0.9999990954275164, iteration: 337309
loss: 0.9963453412055969,grad_norm: 0.9999991193417717, iteration: 337310
loss: 1.0209174156188965,grad_norm: 0.7798457446570096, iteration: 337311
loss: 1.0139832496643066,grad_norm: 0.8223103107918233, iteration: 337312
loss: 1.0024110078811646,grad_norm: 0.7494571196262979, iteration: 337313
loss: 0.9785683155059814,grad_norm: 0.8331967128631604, iteration: 337314
loss: 1.0460858345031738,grad_norm: 0.7901840002973733, iteration: 337315
loss: 1.0232245922088623,grad_norm: 0.956700683556048, iteration: 337316
loss: 1.0328947305679321,grad_norm: 0.8515157630552413, iteration: 337317
loss: 1.0028407573699951,grad_norm: 0.7542399290944592, iteration: 337318
loss: 0.9965264797210693,grad_norm: 0.8065587712224468, iteration: 337319
loss: 1.0148462057113647,grad_norm: 0.8411624729443227, iteration: 337320
loss: 0.9575778245925903,grad_norm: 0.9730910313981956, iteration: 337321
loss: 0.9723951816558838,grad_norm: 0.8443492553296218, iteration: 337322
loss: 0.9771153330802917,grad_norm: 0.9999991338426188, iteration: 337323
loss: 0.9882559776306152,grad_norm: 0.9197195761897884, iteration: 337324
loss: 0.9842875599861145,grad_norm: 0.7507861542065072, iteration: 337325
loss: 0.9831605553627014,grad_norm: 0.7632944640520554, iteration: 337326
loss: 1.031563401222229,grad_norm: 0.9340498658153051, iteration: 337327
loss: 1.0135068893432617,grad_norm: 0.8435263839087214, iteration: 337328
loss: 1.0720667839050293,grad_norm: 0.9999992583290626, iteration: 337329
loss: 0.9441647529602051,grad_norm: 0.8476209770226499, iteration: 337330
loss: 0.9833874702453613,grad_norm: 0.6640889677586213, iteration: 337331
loss: 0.984718382358551,grad_norm: 0.8041279389226614, iteration: 337332
loss: 0.9980146884918213,grad_norm: 0.7868780455027441, iteration: 337333
loss: 1.027884840965271,grad_norm: 0.7366632195072339, iteration: 337334
loss: 0.9959871172904968,grad_norm: 0.82859769830524, iteration: 337335
loss: 1.012627124786377,grad_norm: 0.9338113995121581, iteration: 337336
loss: 0.9935195446014404,grad_norm: 0.8634380657344087, iteration: 337337
loss: 1.0181951522827148,grad_norm: 0.7414296183027276, iteration: 337338
loss: 1.0005583763122559,grad_norm: 0.8332952243393227, iteration: 337339
loss: 1.0053308010101318,grad_norm: 0.9137337189602667, iteration: 337340
loss: 1.0144004821777344,grad_norm: 0.7954630739336116, iteration: 337341
loss: 0.9662761092185974,grad_norm: 0.8318838048093585, iteration: 337342
loss: 1.010028600692749,grad_norm: 0.7671342239003446, iteration: 337343
loss: 0.994704008102417,grad_norm: 0.9135187492623296, iteration: 337344
loss: 1.014989972114563,grad_norm: 0.8141165222225732, iteration: 337345
loss: 1.0051960945129395,grad_norm: 0.8369744221251821, iteration: 337346
loss: 0.9957612752914429,grad_norm: 0.9176186364862351, iteration: 337347
loss: 0.9987438917160034,grad_norm: 0.7939003252996158, iteration: 337348
loss: 1.0124112367630005,grad_norm: 0.9999999203843832, iteration: 337349
loss: 1.0744256973266602,grad_norm: 0.7120926669362133, iteration: 337350
loss: 1.0065706968307495,grad_norm: 0.8334662387515454, iteration: 337351
loss: 1.0374932289123535,grad_norm: 0.8435660352275167, iteration: 337352
loss: 0.9843268990516663,grad_norm: 0.8575521165239992, iteration: 337353
loss: 1.011799693107605,grad_norm: 0.9464381796107255, iteration: 337354
loss: 0.9746133685112,grad_norm: 0.7456832251204315, iteration: 337355
loss: 1.0023118257522583,grad_norm: 0.6851291365523781, iteration: 337356
loss: 0.9946363568305969,grad_norm: 0.8738101993127896, iteration: 337357
loss: 0.9953551292419434,grad_norm: 0.9676124580437271, iteration: 337358
loss: 1.0089892148971558,grad_norm: 0.9939323585204964, iteration: 337359
loss: 0.9820734262466431,grad_norm: 0.7566899784735729, iteration: 337360
loss: 0.9911578893661499,grad_norm: 0.7032004453751783, iteration: 337361
loss: 1.0109156370162964,grad_norm: 0.8567342674796297, iteration: 337362
loss: 0.9990285038948059,grad_norm: 0.8398319355306403, iteration: 337363
loss: 0.9891814589500427,grad_norm: 0.8633695530690324, iteration: 337364
loss: 0.9847534894943237,grad_norm: 0.7284826319356891, iteration: 337365
loss: 1.0229144096374512,grad_norm: 0.6215258023640396, iteration: 337366
loss: 0.9963605403900146,grad_norm: 0.912677840169021, iteration: 337367
loss: 1.0340458154678345,grad_norm: 0.850295920850808, iteration: 337368
loss: 0.9715145826339722,grad_norm: 0.9151628523825942, iteration: 337369
loss: 0.9737027883529663,grad_norm: 0.8295695251279308, iteration: 337370
loss: 1.0399645566940308,grad_norm: 0.9999999210800867, iteration: 337371
loss: 1.0166850090026855,grad_norm: 0.9442981666674425, iteration: 337372
loss: 1.0107759237289429,grad_norm: 0.7099000873122892, iteration: 337373
loss: 0.9886043667793274,grad_norm: 0.6973873452312958, iteration: 337374
loss: 0.989955723285675,grad_norm: 0.7806144884481675, iteration: 337375
loss: 1.0265721082687378,grad_norm: 0.9999992265973675, iteration: 337376
loss: 0.9699490666389465,grad_norm: 0.7251827861527291, iteration: 337377
loss: 0.9711752533912659,grad_norm: 0.8223572269696507, iteration: 337378
loss: 0.986650288105011,grad_norm: 0.9144492970044461, iteration: 337379
loss: 1.009663701057434,grad_norm: 0.8659326021643189, iteration: 337380
loss: 1.1028311252593994,grad_norm: 0.9004780002811231, iteration: 337381
loss: 1.0274778604507446,grad_norm: 0.9999990558074517, iteration: 337382
loss: 1.0272530317306519,grad_norm: 0.7935137891609839, iteration: 337383
loss: 0.9514778852462769,grad_norm: 0.8349184361111139, iteration: 337384
loss: 1.0037319660186768,grad_norm: 0.850815840360301, iteration: 337385
loss: 1.0137981176376343,grad_norm: 0.732375264465171, iteration: 337386
loss: 1.011176586151123,grad_norm: 0.9999991147585728, iteration: 337387
loss: 1.0113064050674438,grad_norm: 0.8159931685889114, iteration: 337388
loss: 1.3797545433044434,grad_norm: 0.9999993399728392, iteration: 337389
loss: 1.024259090423584,grad_norm: 0.9066320174403014, iteration: 337390
loss: 1.0066115856170654,grad_norm: 0.943455232172737, iteration: 337391
loss: 0.9880479574203491,grad_norm: 0.7572594016688918, iteration: 337392
loss: 0.9958749413490295,grad_norm: 0.9999990416164893, iteration: 337393
loss: 0.9657442569732666,grad_norm: 0.9999990911604687, iteration: 337394
loss: 1.0122793912887573,grad_norm: 0.7021142028090792, iteration: 337395
loss: 0.9770941734313965,grad_norm: 0.8338410303293171, iteration: 337396
loss: 0.9659354090690613,grad_norm: 0.8638502195341915, iteration: 337397
loss: 0.9506372213363647,grad_norm: 0.9999990807163276, iteration: 337398
loss: 0.9881055951118469,grad_norm: 0.7770093567712542, iteration: 337399
loss: 0.9834675788879395,grad_norm: 0.8583556918166225, iteration: 337400
loss: 1.0194834470748901,grad_norm: 0.7214957611327888, iteration: 337401
loss: 1.0164070129394531,grad_norm: 0.8242650745644992, iteration: 337402
loss: 0.9813008904457092,grad_norm: 0.7789460407332759, iteration: 337403
loss: 1.0309932231903076,grad_norm: 0.8262114687224712, iteration: 337404
loss: 1.0063221454620361,grad_norm: 0.8890731218619919, iteration: 337405
loss: 1.0784859657287598,grad_norm: 0.8725893826124063, iteration: 337406
loss: 1.008890151977539,grad_norm: 0.7160222950657222, iteration: 337407
loss: 1.009002685546875,grad_norm: 0.9999991161547898, iteration: 337408
loss: 0.976491391658783,grad_norm: 0.9926026688719589, iteration: 337409
loss: 0.9986740350723267,grad_norm: 0.8430874270243762, iteration: 337410
loss: 1.001092791557312,grad_norm: 0.7793395118412785, iteration: 337411
loss: 0.9919329285621643,grad_norm: 0.8896886236346834, iteration: 337412
loss: 1.022567868232727,grad_norm: 0.8728703841370217, iteration: 337413
loss: 0.9951487183570862,grad_norm: 0.8473626429019572, iteration: 337414
loss: 0.9990396499633789,grad_norm: 0.8931633237771025, iteration: 337415
loss: 1.002411127090454,grad_norm: 0.9999989264406441, iteration: 337416
loss: 1.0364233255386353,grad_norm: 0.8284378926283792, iteration: 337417
loss: 0.9836009740829468,grad_norm: 0.9220425662529166, iteration: 337418
loss: 0.9662991762161255,grad_norm: 0.9999990331417432, iteration: 337419
loss: 1.027350664138794,grad_norm: 0.9170661496377398, iteration: 337420
loss: 1.002848505973816,grad_norm: 0.8722190494491449, iteration: 337421
loss: 1.0106102228164673,grad_norm: 0.8789056259803616, iteration: 337422
loss: 1.011444091796875,grad_norm: 0.8674483215091418, iteration: 337423
loss: 1.0174087285995483,grad_norm: 0.8736172137609239, iteration: 337424
loss: 0.9979010820388794,grad_norm: 0.9862744182534016, iteration: 337425
loss: 1.020672082901001,grad_norm: 0.7800572882562042, iteration: 337426
loss: 0.9879615306854248,grad_norm: 0.9361612026268356, iteration: 337427
loss: 1.0239055156707764,grad_norm: 0.9999993077728031, iteration: 337428
loss: 0.9803739190101624,grad_norm: 0.9218937004935612, iteration: 337429
loss: 0.978507936000824,grad_norm: 0.8996029181308516, iteration: 337430
loss: 1.036423683166504,grad_norm: 0.9999990642539882, iteration: 337431
loss: 0.9919223189353943,grad_norm: 0.8252324850402563, iteration: 337432
loss: 0.9884876012802124,grad_norm: 0.9999995452998832, iteration: 337433
loss: 0.9782994389533997,grad_norm: 0.923643622508219, iteration: 337434
loss: 1.0207760334014893,grad_norm: 0.9999992153496462, iteration: 337435
loss: 1.0001132488250732,grad_norm: 0.8091458904254576, iteration: 337436
loss: 1.0020664930343628,grad_norm: 0.7564188917029586, iteration: 337437
loss: 1.0151249170303345,grad_norm: 0.922063900212113, iteration: 337438
loss: 1.0029369592666626,grad_norm: 0.8258746679251792, iteration: 337439
loss: 0.9474228024482727,grad_norm: 0.8010612466667081, iteration: 337440
loss: 1.0120246410369873,grad_norm: 0.7524590320712883, iteration: 337441
loss: 1.0343763828277588,grad_norm: 0.9999993587857272, iteration: 337442
loss: 0.9794460535049438,grad_norm: 0.9879043273730556, iteration: 337443
loss: 0.995568037033081,grad_norm: 0.8948245622978166, iteration: 337444
loss: 0.993269681930542,grad_norm: 0.9430467962530338, iteration: 337445
loss: 0.997541606426239,grad_norm: 0.7242842679788566, iteration: 337446
loss: 1.0272082090377808,grad_norm: 0.8451447924537286, iteration: 337447
loss: 0.9890612363815308,grad_norm: 0.8453392757153124, iteration: 337448
loss: 1.0451033115386963,grad_norm: 0.9406124381355283, iteration: 337449
loss: 0.9949396848678589,grad_norm: 0.820062322579657, iteration: 337450
loss: 0.9533171653747559,grad_norm: 0.8451688687032266, iteration: 337451
loss: 1.0098040103912354,grad_norm: 0.8265148519208998, iteration: 337452
loss: 0.9673628807067871,grad_norm: 0.7911202138954333, iteration: 337453
loss: 0.9668719172477722,grad_norm: 0.7657026495222226, iteration: 337454
loss: 1.0193980932235718,grad_norm: 0.8190584893909112, iteration: 337455
loss: 1.0295803546905518,grad_norm: 0.7895467877873574, iteration: 337456
loss: 1.0152573585510254,grad_norm: 0.8667929537670508, iteration: 337457
loss: 1.0153460502624512,grad_norm: 0.9528312158404534, iteration: 337458
loss: 0.9711492657661438,grad_norm: 0.7868048999612602, iteration: 337459
loss: 1.0192316770553589,grad_norm: 0.7721044267976588, iteration: 337460
loss: 0.9752384424209595,grad_norm: 0.7176242924371763, iteration: 337461
loss: 1.0090157985687256,grad_norm: 0.873046155852314, iteration: 337462
loss: 0.9886651635169983,grad_norm: 0.9599307390933145, iteration: 337463
loss: 0.9706488251686096,grad_norm: 0.9381053658873088, iteration: 337464
loss: 0.9921983480453491,grad_norm: 0.8046870787832571, iteration: 337465
loss: 0.9895211458206177,grad_norm: 0.7625040020442652, iteration: 337466
loss: 1.0154435634613037,grad_norm: 0.9871078313924222, iteration: 337467
loss: 1.0917472839355469,grad_norm: 0.9999991556300546, iteration: 337468
loss: 1.0336045026779175,grad_norm: 0.8193002989301705, iteration: 337469
loss: 1.0138049125671387,grad_norm: 0.7126024709754814, iteration: 337470
loss: 0.9824498891830444,grad_norm: 0.812431762606912, iteration: 337471
loss: 0.9880858063697815,grad_norm: 0.9999993449274507, iteration: 337472
loss: 0.9516685605049133,grad_norm: 0.7566718113872958, iteration: 337473
loss: 1.0376883745193481,grad_norm: 0.7650281677381959, iteration: 337474
loss: 0.993194043636322,grad_norm: 0.9999992725975887, iteration: 337475
loss: 1.0042822360992432,grad_norm: 0.8553540532803452, iteration: 337476
loss: 0.9885273575782776,grad_norm: 0.8128761088424135, iteration: 337477
loss: 0.9624097943305969,grad_norm: 0.7514190734060756, iteration: 337478
loss: 0.994321346282959,grad_norm: 0.8824202261777697, iteration: 337479
loss: 0.9861863851547241,grad_norm: 0.8831617718194537, iteration: 337480
loss: 1.0337092876434326,grad_norm: 0.9999992652609169, iteration: 337481
loss: 1.0271507501602173,grad_norm: 0.8369167214394553, iteration: 337482
loss: 1.033510446548462,grad_norm: 0.8661617325836432, iteration: 337483
loss: 0.9821740984916687,grad_norm: 0.6546631642128243, iteration: 337484
loss: 1.044317603111267,grad_norm: 0.9999991902102869, iteration: 337485
loss: 1.0114303827285767,grad_norm: 0.9999997852434817, iteration: 337486
loss: 0.9776242971420288,grad_norm: 0.8393070753218124, iteration: 337487
loss: 1.0316129922866821,grad_norm: 0.8109508761387044, iteration: 337488
loss: 0.9945040345191956,grad_norm: 0.9123891156502648, iteration: 337489
loss: 0.9956023097038269,grad_norm: 0.8313793838332582, iteration: 337490
loss: 0.9989500641822815,grad_norm: 0.8395590214402134, iteration: 337491
loss: 0.9638485312461853,grad_norm: 0.833586156553699, iteration: 337492
loss: 1.0024384260177612,grad_norm: 0.9999993749501036, iteration: 337493
loss: 0.9961634278297424,grad_norm: 0.8197713005880236, iteration: 337494
loss: 0.988382875919342,grad_norm: 0.6982996588381465, iteration: 337495
loss: 0.9561460018157959,grad_norm: 0.9999999528854788, iteration: 337496
loss: 0.9885933995246887,grad_norm: 0.8443528826785132, iteration: 337497
loss: 1.003470540046692,grad_norm: 0.9277814414434876, iteration: 337498
loss: 0.9705439209938049,grad_norm: 0.8714362960609234, iteration: 337499
loss: 1.0215935707092285,grad_norm: 0.7927012954232401, iteration: 337500
loss: 1.0484780073165894,grad_norm: 0.96604807138551, iteration: 337501
loss: 1.0153450965881348,grad_norm: 0.8198084127291912, iteration: 337502
loss: 0.9685778021812439,grad_norm: 0.8211116378202544, iteration: 337503
loss: 1.0235000848770142,grad_norm: 0.8908574611585764, iteration: 337504
loss: 0.999722957611084,grad_norm: 0.9999991118342407, iteration: 337505
loss: 1.0681830644607544,grad_norm: 0.9412145007796421, iteration: 337506
loss: 0.9669784903526306,grad_norm: 0.7636540597552295, iteration: 337507
loss: 1.0256059169769287,grad_norm: 0.8490976456553126, iteration: 337508
loss: 0.9722698926925659,grad_norm: 0.7335378638349495, iteration: 337509
loss: 1.000062346458435,grad_norm: 0.8993595619809006, iteration: 337510
loss: 1.1610478162765503,grad_norm: 0.9999995760077848, iteration: 337511
loss: 0.9821943640708923,grad_norm: 0.7730952666935906, iteration: 337512
loss: 1.0550183057785034,grad_norm: 0.800112401283734, iteration: 337513
loss: 1.01746666431427,grad_norm: 0.738849160170883, iteration: 337514
loss: 1.0125057697296143,grad_norm: 0.9999991598265763, iteration: 337515
loss: 0.9988133311271667,grad_norm: 0.7782542092056349, iteration: 337516
loss: 0.9879747033119202,grad_norm: 0.8214696881017418, iteration: 337517
loss: 0.9863187670707703,grad_norm: 0.9999989682020213, iteration: 337518
loss: 0.9989874362945557,grad_norm: 0.6567676645799657, iteration: 337519
loss: 1.000376582145691,grad_norm: 0.9999994757264407, iteration: 337520
loss: 1.0060348510742188,grad_norm: 0.8937601437785605, iteration: 337521
loss: 1.0049822330474854,grad_norm: 0.9031991525772177, iteration: 337522
loss: 0.992133617401123,grad_norm: 0.9058339591072136, iteration: 337523
loss: 1.0189449787139893,grad_norm: 0.8044851831713293, iteration: 337524
loss: 1.0410451889038086,grad_norm: 0.8287749659963038, iteration: 337525
loss: 0.9628584980964661,grad_norm: 0.8424637432733744, iteration: 337526
loss: 1.0142461061477661,grad_norm: 0.9999990766926669, iteration: 337527
loss: 1.0127559900283813,grad_norm: 0.9639131542734071, iteration: 337528
loss: 0.9809298515319824,grad_norm: 0.856932371222976, iteration: 337529
loss: 1.014582872390747,grad_norm: 0.9137009751666876, iteration: 337530
loss: 0.9574862718582153,grad_norm: 0.868485032107063, iteration: 337531
loss: 0.9708243608474731,grad_norm: 0.7650925365777315, iteration: 337532
loss: 1.0816400051116943,grad_norm: 0.9999990362622672, iteration: 337533
loss: 0.952580451965332,grad_norm: 0.9920769409451435, iteration: 337534
loss: 0.9997780323028564,grad_norm: 0.8413117548546833, iteration: 337535
loss: 0.98348069190979,grad_norm: 0.9435244167590114, iteration: 337536
loss: 1.0156301259994507,grad_norm: 0.9999998243879203, iteration: 337537
loss: 0.9922255873680115,grad_norm: 0.7704749024418627, iteration: 337538
loss: 0.9809194207191467,grad_norm: 0.9999993936714282, iteration: 337539
loss: 1.0361601114273071,grad_norm: 0.9999990205765364, iteration: 337540
loss: 1.0505094528198242,grad_norm: 0.908489899863087, iteration: 337541
loss: 0.9719074368476868,grad_norm: 0.982446742458846, iteration: 337542
loss: 0.9512596130371094,grad_norm: 0.847220026575394, iteration: 337543
loss: 0.994660496711731,grad_norm: 0.9275689157706561, iteration: 337544
loss: 0.9986326098442078,grad_norm: 0.9226199682182485, iteration: 337545
loss: 0.9939651489257812,grad_norm: 0.9093090306453273, iteration: 337546
loss: 0.9998605251312256,grad_norm: 0.7980163682960952, iteration: 337547
loss: 0.9632134437561035,grad_norm: 0.901675881918122, iteration: 337548
loss: 1.006258487701416,grad_norm: 0.9999990054261135, iteration: 337549
loss: 1.0349334478378296,grad_norm: 0.920243668689967, iteration: 337550
loss: 1.001275897026062,grad_norm: 0.9999999351433179, iteration: 337551
loss: 1.045996904373169,grad_norm: 0.9999991125210297, iteration: 337552
loss: 0.9879844784736633,grad_norm: 0.8870715347044418, iteration: 337553
loss: 1.0245094299316406,grad_norm: 0.9999997763841095, iteration: 337554
loss: 0.9960713982582092,grad_norm: 0.9999990845977789, iteration: 337555
loss: 1.0241953134536743,grad_norm: 0.8294781873791695, iteration: 337556
loss: 1.003738284111023,grad_norm: 0.7746492851452093, iteration: 337557
loss: 0.9999202489852905,grad_norm: 0.915881078582834, iteration: 337558
loss: 1.047835350036621,grad_norm: 0.9999998861970328, iteration: 337559
loss: 0.9646165370941162,grad_norm: 0.9087632088168165, iteration: 337560
loss: 1.0037504434585571,grad_norm: 0.8809538219335183, iteration: 337561
loss: 0.9983835220336914,grad_norm: 0.8476238335918684, iteration: 337562
loss: 0.9938448667526245,grad_norm: 0.8905259680331664, iteration: 337563
loss: 1.0754239559173584,grad_norm: 0.9999993898684196, iteration: 337564
loss: 0.968374490737915,grad_norm: 0.758637162786057, iteration: 337565
loss: 1.0075868368148804,grad_norm: 0.9761828181029962, iteration: 337566
loss: 0.9965595602989197,grad_norm: 0.7814894386782295, iteration: 337567
loss: 0.9791056513786316,grad_norm: 0.8755162140746581, iteration: 337568
loss: 0.9973604083061218,grad_norm: 0.818418451293634, iteration: 337569
loss: 0.9453395009040833,grad_norm: 0.8469699132098621, iteration: 337570
loss: 0.9885531067848206,grad_norm: 0.8705502620754623, iteration: 337571
loss: 1.0115604400634766,grad_norm: 0.8215636690772772, iteration: 337572
loss: 1.028028130531311,grad_norm: 0.7462824961153397, iteration: 337573
loss: 1.0567090511322021,grad_norm: 0.9999992621847246, iteration: 337574
loss: 0.9840232729911804,grad_norm: 0.7282346828245221, iteration: 337575
loss: 1.017537236213684,grad_norm: 0.7703543314153197, iteration: 337576
loss: 1.0421671867370605,grad_norm: 0.7741724545863876, iteration: 337577
loss: 0.9852773547172546,grad_norm: 0.9999991453876077, iteration: 337578
loss: 1.0409642457962036,grad_norm: 0.8655142783746881, iteration: 337579
loss: 1.0212957859039307,grad_norm: 0.8181875121696671, iteration: 337580
loss: 1.002642273902893,grad_norm: 0.945262968001793, iteration: 337581
loss: 0.9760321974754333,grad_norm: 0.8033680974297328, iteration: 337582
loss: 0.9513852000236511,grad_norm: 0.7720102660919207, iteration: 337583
loss: 1.0088813304901123,grad_norm: 0.6642776093962551, iteration: 337584
loss: 0.9707742929458618,grad_norm: 0.7110661760737433, iteration: 337585
loss: 1.0010226964950562,grad_norm: 0.7061898500628608, iteration: 337586
loss: 0.9793687462806702,grad_norm: 0.7211224649757234, iteration: 337587
loss: 1.0277814865112305,grad_norm: 0.8743264644774381, iteration: 337588
loss: 1.0168895721435547,grad_norm: 0.772397592526594, iteration: 337589
loss: 0.9895715117454529,grad_norm: 0.9227894264531581, iteration: 337590
loss: 1.006432056427002,grad_norm: 0.8527166658300064, iteration: 337591
loss: 0.9967189431190491,grad_norm: 0.7280443439940238, iteration: 337592
loss: 0.9744446873664856,grad_norm: 0.7374262547509728, iteration: 337593
loss: 1.0227694511413574,grad_norm: 0.9437898591079873, iteration: 337594
loss: 0.9976460337638855,grad_norm: 0.7356769954143931, iteration: 337595
loss: 0.9935835003852844,grad_norm: 0.8020853976621944, iteration: 337596
loss: 0.9891221523284912,grad_norm: 0.8575734171482549, iteration: 337597
loss: 0.9810810089111328,grad_norm: 0.9288328019995853, iteration: 337598
loss: 1.131212830543518,grad_norm: 0.999999662813319, iteration: 337599
loss: 1.0271415710449219,grad_norm: 0.929702640021484, iteration: 337600
loss: 1.020404577255249,grad_norm: 0.7590650060937646, iteration: 337601
loss: 0.9978315234184265,grad_norm: 0.9744188140355592, iteration: 337602
loss: 0.9840853214263916,grad_norm: 0.8873014505987804, iteration: 337603
loss: 0.9771429300308228,grad_norm: 0.8571794351965328, iteration: 337604
loss: 0.9684279561042786,grad_norm: 0.8128519157882259, iteration: 337605
loss: 0.9847555756568909,grad_norm: 0.7834057254744062, iteration: 337606
loss: 0.9849043488502502,grad_norm: 0.9888124129031661, iteration: 337607
loss: 1.056566596031189,grad_norm: 0.9999991239880327, iteration: 337608
loss: 0.987751305103302,grad_norm: 0.7509790154254453, iteration: 337609
loss: 1.014103889465332,grad_norm: 0.7170731411027718, iteration: 337610
loss: 1.013883352279663,grad_norm: 0.8050060105881122, iteration: 337611
loss: 0.9977806210517883,grad_norm: 0.9161760422097028, iteration: 337612
loss: 0.9696117043495178,grad_norm: 0.8640390777867515, iteration: 337613
loss: 0.9867941737174988,grad_norm: 0.781717494444499, iteration: 337614
loss: 0.992807149887085,grad_norm: 0.999999637215722, iteration: 337615
loss: 0.9824215173721313,grad_norm: 0.8416140063900257, iteration: 337616
loss: 0.9939510822296143,grad_norm: 0.7504402042601477, iteration: 337617
loss: 1.0311483144760132,grad_norm: 0.8224057630763444, iteration: 337618
loss: 1.007336974143982,grad_norm: 0.9999992454468822, iteration: 337619
loss: 1.0114331245422363,grad_norm: 0.6631691343688296, iteration: 337620
loss: 1.0336884260177612,grad_norm: 0.7284174173302636, iteration: 337621
loss: 0.9845520853996277,grad_norm: 0.9999991314092247, iteration: 337622
loss: 1.004284381866455,grad_norm: 0.8527036661325392, iteration: 337623
loss: 1.000803828239441,grad_norm: 0.7647507360627778, iteration: 337624
loss: 1.0264984369277954,grad_norm: 0.7901297969755071, iteration: 337625
loss: 0.9626298546791077,grad_norm: 0.7480953913079356, iteration: 337626
loss: 0.9821677803993225,grad_norm: 0.8363213702829299, iteration: 337627
loss: 1.0291435718536377,grad_norm: 0.7952160259026397, iteration: 337628
loss: 0.9791551232337952,grad_norm: 0.8195818111504358, iteration: 337629
loss: 0.9979205131530762,grad_norm: 0.9999990253267877, iteration: 337630
loss: 1.2965235710144043,grad_norm: 0.9999999199359746, iteration: 337631
loss: 0.9688510894775391,grad_norm: 0.9999990832525011, iteration: 337632
loss: 1.051782488822937,grad_norm: 0.99999967666629, iteration: 337633
loss: 1.0274426937103271,grad_norm: 0.9811777250009113, iteration: 337634
loss: 0.9941352605819702,grad_norm: 0.7327710166965237, iteration: 337635
loss: 1.0258269309997559,grad_norm: 0.8726850697076299, iteration: 337636
loss: 0.9919853806495667,grad_norm: 0.6456123583957581, iteration: 337637
loss: 0.9869093894958496,grad_norm: 0.8941419617292712, iteration: 337638
loss: 1.0154304504394531,grad_norm: 0.7751835983044822, iteration: 337639
loss: 1.0510061979293823,grad_norm: 0.8060770742559815, iteration: 337640
loss: 1.020885705947876,grad_norm: 0.9727374465909864, iteration: 337641
loss: 1.0040242671966553,grad_norm: 0.8414581555767024, iteration: 337642
loss: 0.9674439430236816,grad_norm: 0.9265797714690954, iteration: 337643
loss: 1.0308332443237305,grad_norm: 0.8119462516694632, iteration: 337644
loss: 1.0333949327468872,grad_norm: 0.9200303751973917, iteration: 337645
loss: 1.0242384672164917,grad_norm: 0.999999104625069, iteration: 337646
loss: 0.9914825558662415,grad_norm: 0.7996328409960984, iteration: 337647
loss: 0.9748310446739197,grad_norm: 0.7892891784009965, iteration: 337648
loss: 1.0337193012237549,grad_norm: 0.9999991065807547, iteration: 337649
loss: 1.0090450048446655,grad_norm: 0.6219837446295967, iteration: 337650
loss: 0.9927974343299866,grad_norm: 0.7742943641432731, iteration: 337651
loss: 0.9728091359138489,grad_norm: 0.9392132755376837, iteration: 337652
loss: 1.0146397352218628,grad_norm: 0.6976271659504539, iteration: 337653
loss: 1.0203405618667603,grad_norm: 0.9999993890700538, iteration: 337654
loss: 1.0535086393356323,grad_norm: 0.9999995667466633, iteration: 337655
loss: 1.0068848133087158,grad_norm: 0.8769517232464263, iteration: 337656
loss: 1.0065618753433228,grad_norm: 0.8323018350402841, iteration: 337657
loss: 1.0039433240890503,grad_norm: 0.7939726392482088, iteration: 337658
loss: 1.0000195503234863,grad_norm: 0.7031030686065524, iteration: 337659
loss: 1.0454906225204468,grad_norm: 0.8827123447307139, iteration: 337660
loss: 0.9946697354316711,grad_norm: 0.8036652964637343, iteration: 337661
loss: 1.0187784433364868,grad_norm: 0.8134358449043978, iteration: 337662
loss: 1.0431840419769287,grad_norm: 0.8838572097814846, iteration: 337663
loss: 0.9919929504394531,grad_norm: 0.8131892204163201, iteration: 337664
loss: 0.9750681519508362,grad_norm: 0.7658858549141456, iteration: 337665
loss: 1.0104975700378418,grad_norm: 0.7459433279262199, iteration: 337666
loss: 1.021894931793213,grad_norm: 0.8012040805585111, iteration: 337667
loss: 1.0051002502441406,grad_norm: 0.8407612627343737, iteration: 337668
loss: 1.0317262411117554,grad_norm: 0.694245108361603, iteration: 337669
loss: 1.008313536643982,grad_norm: 0.7871407796483654, iteration: 337670
loss: 1.02329421043396,grad_norm: 0.8990645688373593, iteration: 337671
loss: 1.0192387104034424,grad_norm: 0.9999990782295382, iteration: 337672
loss: 0.9829998016357422,grad_norm: 0.8556659285721027, iteration: 337673
loss: 0.9867442846298218,grad_norm: 0.8076445103418208, iteration: 337674
loss: 1.003514051437378,grad_norm: 0.8750967043818779, iteration: 337675
loss: 1.0087820291519165,grad_norm: 0.7858361088050128, iteration: 337676
loss: 0.9798741936683655,grad_norm: 0.9471700873045928, iteration: 337677
loss: 1.010740876197815,grad_norm: 0.7580491403932359, iteration: 337678
loss: 1.0361613035202026,grad_norm: 0.9604220732640381, iteration: 337679
loss: 1.2178993225097656,grad_norm: 0.9999993364518285, iteration: 337680
loss: 1.012662410736084,grad_norm: 0.7861369148839172, iteration: 337681
loss: 1.0300410985946655,grad_norm: 0.9221955773030008, iteration: 337682
loss: 0.9784460067749023,grad_norm: 0.8875025143788412, iteration: 337683
loss: 1.0811280012130737,grad_norm: 0.9999996638767388, iteration: 337684
loss: 0.9958476424217224,grad_norm: 0.6682418361104284, iteration: 337685
loss: 1.0879387855529785,grad_norm: 0.9999998979219842, iteration: 337686
loss: 0.9703776836395264,grad_norm: 0.8650184748764709, iteration: 337687
loss: 1.0358473062515259,grad_norm: 0.8392430879252185, iteration: 337688
loss: 0.9811331033706665,grad_norm: 0.7216693698313966, iteration: 337689
loss: 1.0124309062957764,grad_norm: 0.9238749397258688, iteration: 337690
loss: 1.0257219076156616,grad_norm: 0.8916430031565035, iteration: 337691
loss: 1.0136263370513916,grad_norm: 0.9594019177435671, iteration: 337692
loss: 0.9737964272499084,grad_norm: 0.7480744723538617, iteration: 337693
loss: 0.9897434115409851,grad_norm: 0.9999994644140082, iteration: 337694
loss: 0.9849222898483276,grad_norm: 0.8562868828516939, iteration: 337695
loss: 0.987154483795166,grad_norm: 0.8138819563097539, iteration: 337696
loss: 1.0029371976852417,grad_norm: 0.8889790167756744, iteration: 337697
loss: 1.0193766355514526,grad_norm: 0.9706616447933936, iteration: 337698
loss: 1.0074310302734375,grad_norm: 0.8469759424167095, iteration: 337699
loss: 1.0103323459625244,grad_norm: 0.9906552843819084, iteration: 337700
loss: 1.0094407796859741,grad_norm: 0.9809598605272225, iteration: 337701
loss: 1.029691457748413,grad_norm: 0.9826124124843381, iteration: 337702
loss: 0.9946205615997314,grad_norm: 0.8588683863001386, iteration: 337703
loss: 0.9942466616630554,grad_norm: 0.9684310270906914, iteration: 337704
loss: 1.0219818353652954,grad_norm: 0.8634820807161223, iteration: 337705
loss: 1.0284897089004517,grad_norm: 0.7953714960622069, iteration: 337706
loss: 0.9949246048927307,grad_norm: 0.8418713935653946, iteration: 337707
loss: 0.9724664688110352,grad_norm: 0.866024925804064, iteration: 337708
loss: 1.008332371711731,grad_norm: 0.760484990585084, iteration: 337709
loss: 1.0190680027008057,grad_norm: 0.7202429106867013, iteration: 337710
loss: 1.0079680681228638,grad_norm: 0.9276925523049436, iteration: 337711
loss: 1.0086122751235962,grad_norm: 0.8651838531387345, iteration: 337712
loss: 1.0333240032196045,grad_norm: 0.999999440981675, iteration: 337713
loss: 0.9997666478157043,grad_norm: 0.8193295409584336, iteration: 337714
loss: 0.9808508157730103,grad_norm: 0.8587076572075478, iteration: 337715
loss: 0.9854927659034729,grad_norm: 0.889911424420217, iteration: 337716
loss: 1.0937042236328125,grad_norm: 0.9999996022828883, iteration: 337717
loss: 0.9902693629264832,grad_norm: 0.8883584425691902, iteration: 337718
loss: 0.9984065890312195,grad_norm: 0.9999990353467278, iteration: 337719
loss: 1.0117816925048828,grad_norm: 0.6894571891527588, iteration: 337720
loss: 1.0865932703018188,grad_norm: 0.7694480909653981, iteration: 337721
loss: 1.0728110074996948,grad_norm: 0.9721381710961842, iteration: 337722
loss: 1.0118337869644165,grad_norm: 0.9363711697246422, iteration: 337723
loss: 1.001457929611206,grad_norm: 0.8535377335514867, iteration: 337724
loss: 1.0126954317092896,grad_norm: 0.9784663674932095, iteration: 337725
loss: 0.9871631264686584,grad_norm: 0.867924622736762, iteration: 337726
loss: 0.9676325917243958,grad_norm: 0.9206656787610684, iteration: 337727
loss: 1.0662246942520142,grad_norm: 0.9999991118586853, iteration: 337728
loss: 1.0399473905563354,grad_norm: 0.9999989564791181, iteration: 337729
loss: 1.000614881515503,grad_norm: 0.7601016004109549, iteration: 337730
loss: 1.018663763999939,grad_norm: 0.7042808421434854, iteration: 337731
loss: 1.0044524669647217,grad_norm: 0.7880953076479096, iteration: 337732
loss: 1.0185471773147583,grad_norm: 0.8918299968343566, iteration: 337733
loss: 0.9667274355888367,grad_norm: 0.7509162631400685, iteration: 337734
loss: 1.0260846614837646,grad_norm: 0.7581284073601849, iteration: 337735
loss: 0.9803667068481445,grad_norm: 0.9244199548293268, iteration: 337736
loss: 1.027593731880188,grad_norm: 0.674325421105868, iteration: 337737
loss: 1.006960153579712,grad_norm: 0.8269467925228666, iteration: 337738
loss: 0.9973351359367371,grad_norm: 0.9999990723298199, iteration: 337739
loss: 0.9783802032470703,grad_norm: 0.8140944852154263, iteration: 337740
loss: 1.0601905584335327,grad_norm: 0.9999997167527687, iteration: 337741
loss: 0.971002995967865,grad_norm: 0.8182911236976185, iteration: 337742
loss: 0.9790990948677063,grad_norm: 0.7435089519331279, iteration: 337743
loss: 1.0269620418548584,grad_norm: 0.9999991759993613, iteration: 337744
loss: 0.9985562562942505,grad_norm: 0.8658653948891716, iteration: 337745
loss: 1.0166860818862915,grad_norm: 0.8452546682639427, iteration: 337746
loss: 0.9969862103462219,grad_norm: 0.9056412086152709, iteration: 337747
loss: 1.0064817667007446,grad_norm: 0.8021339774251968, iteration: 337748
loss: 0.9983964562416077,grad_norm: 0.7108104951373572, iteration: 337749
loss: 1.0101670026779175,grad_norm: 0.8735038316101216, iteration: 337750
loss: 1.0208419561386108,grad_norm: 0.9042747929402399, iteration: 337751
loss: 1.0016639232635498,grad_norm: 0.7360149344323637, iteration: 337752
loss: 1.0416127443313599,grad_norm: 0.8475034521117826, iteration: 337753
loss: 1.0952816009521484,grad_norm: 0.999999464772617, iteration: 337754
loss: 0.9880715608596802,grad_norm: 0.7327897714997904, iteration: 337755
loss: 1.0146198272705078,grad_norm: 0.930281506825894, iteration: 337756
loss: 1.0184392929077148,grad_norm: 0.903674096496112, iteration: 337757
loss: 0.9903476238250732,grad_norm: 0.8693682969085805, iteration: 337758
loss: 0.9747422337532043,grad_norm: 0.8550023739389746, iteration: 337759
loss: 1.0020304918289185,grad_norm: 0.8708841672483918, iteration: 337760
loss: 0.9895473718643188,grad_norm: 0.9117288491547975, iteration: 337761
loss: 0.9924332499504089,grad_norm: 0.9112668403151065, iteration: 337762
loss: 1.043524980545044,grad_norm: 0.9999997991668311, iteration: 337763
loss: 0.9896013140678406,grad_norm: 0.7991623585480052, iteration: 337764
loss: 1.0086381435394287,grad_norm: 0.9666753832617802, iteration: 337765
loss: 1.1540371179580688,grad_norm: 0.9999999061281979, iteration: 337766
loss: 0.9884584546089172,grad_norm: 0.9999990892213563, iteration: 337767
loss: 0.9730982184410095,grad_norm: 0.7725913586212017, iteration: 337768
loss: 0.990731418132782,grad_norm: 0.7679781394613736, iteration: 337769
loss: 0.9764912724494934,grad_norm: 0.7759649316326557, iteration: 337770
loss: 0.992133378982544,grad_norm: 0.7493309609489458, iteration: 337771
loss: 0.9965670108795166,grad_norm: 0.8291356746707573, iteration: 337772
loss: 1.0091019868850708,grad_norm: 0.8051653707727333, iteration: 337773
loss: 0.9724576473236084,grad_norm: 0.8860329565070263, iteration: 337774
loss: 1.0098317861557007,grad_norm: 0.8544115278831573, iteration: 337775
loss: 0.9990721940994263,grad_norm: 0.7934519614277523, iteration: 337776
loss: 0.9462026953697205,grad_norm: 0.7722606991742819, iteration: 337777
loss: 1.0006458759307861,grad_norm: 0.9287387332125258, iteration: 337778
loss: 1.0059459209442139,grad_norm: 0.9919040063213547, iteration: 337779
loss: 0.9913733601570129,grad_norm: 0.9999991639699077, iteration: 337780
loss: 0.963801920413971,grad_norm: 0.9928401184964102, iteration: 337781
loss: 0.9738907814025879,grad_norm: 0.7037976652663038, iteration: 337782
loss: 1.0286130905151367,grad_norm: 0.7702411530258457, iteration: 337783
loss: 1.021566390991211,grad_norm: 0.8425090333251043, iteration: 337784
loss: 1.016727089881897,grad_norm: 0.9549171329639825, iteration: 337785
loss: 0.9976403117179871,grad_norm: 0.9999990596202143, iteration: 337786
loss: 1.0008913278579712,grad_norm: 0.9999991914299565, iteration: 337787
loss: 0.978340744972229,grad_norm: 0.9999991292010943, iteration: 337788
loss: 1.004913568496704,grad_norm: 0.8471505482910933, iteration: 337789
loss: 0.9802454710006714,grad_norm: 0.8079157812616136, iteration: 337790
loss: 1.0183545351028442,grad_norm: 0.8499995624359246, iteration: 337791
loss: 1.0212310552597046,grad_norm: 0.8303971607920423, iteration: 337792
loss: 1.077471137046814,grad_norm: 0.9999997689838521, iteration: 337793
loss: 0.9650968909263611,grad_norm: 0.7350088100162278, iteration: 337794
loss: 0.9664008021354675,grad_norm: 0.999999239385361, iteration: 337795
loss: 0.9735208749771118,grad_norm: 0.9999991394328385, iteration: 337796
loss: 1.0009520053863525,grad_norm: 0.8944272215870133, iteration: 337797
loss: 0.991162896156311,grad_norm: 0.9999991070028018, iteration: 337798
loss: 0.9708002805709839,grad_norm: 0.8184664115292956, iteration: 337799
loss: 0.9854919910430908,grad_norm: 0.7543134873208218, iteration: 337800
loss: 1.0309081077575684,grad_norm: 0.9804780094274523, iteration: 337801
loss: 0.9819160103797913,grad_norm: 0.999999651410835, iteration: 337802
loss: 1.0271610021591187,grad_norm: 0.9059392620000817, iteration: 337803
loss: 1.0094716548919678,grad_norm: 0.7595558410328358, iteration: 337804
loss: 0.9889115691184998,grad_norm: 0.8324009323556353, iteration: 337805
loss: 0.9499710202217102,grad_norm: 0.8448568187365644, iteration: 337806
loss: 1.0196864604949951,grad_norm: 0.881530968909518, iteration: 337807
loss: 0.9988558292388916,grad_norm: 0.8152762552211585, iteration: 337808
loss: 1.0389397144317627,grad_norm: 0.968532988077685, iteration: 337809
loss: 0.993294358253479,grad_norm: 0.9227779392796148, iteration: 337810
loss: 0.9961172342300415,grad_norm: 0.8309040934011804, iteration: 337811
loss: 1.0364803075790405,grad_norm: 0.7623707963935176, iteration: 337812
loss: 1.0003328323364258,grad_norm: 0.7969385285201109, iteration: 337813
loss: 1.0117144584655762,grad_norm: 1.0000000470561035, iteration: 337814
loss: 0.9668554663658142,grad_norm: 0.8590957464461175, iteration: 337815
loss: 0.9864739179611206,grad_norm: 0.9999991875621347, iteration: 337816
loss: 0.9580490589141846,grad_norm: 0.7226205293549045, iteration: 337817
loss: 0.9861755967140198,grad_norm: 0.7381929784506276, iteration: 337818
loss: 1.0109822750091553,grad_norm: 0.7930066888914962, iteration: 337819
loss: 0.9979175925254822,grad_norm: 0.749931949418446, iteration: 337820
loss: 1.066617488861084,grad_norm: 0.9999995307929892, iteration: 337821
loss: 0.9904885292053223,grad_norm: 0.8706331416088837, iteration: 337822
loss: 1.0262693166732788,grad_norm: 1.0000000235849664, iteration: 337823
loss: 0.982042670249939,grad_norm: 0.8264636090653432, iteration: 337824
loss: 0.9888758659362793,grad_norm: 0.9999991528931923, iteration: 337825
loss: 0.9947636723518372,grad_norm: 0.8102099407727592, iteration: 337826
loss: 0.9858346581459045,grad_norm: 0.8377853896796754, iteration: 337827
loss: 1.0196936130523682,grad_norm: 0.8540850004564331, iteration: 337828
loss: 0.9974119067192078,grad_norm: 0.9712034450263982, iteration: 337829
loss: 1.0375490188598633,grad_norm: 0.9999995954305255, iteration: 337830
loss: 1.003275990486145,grad_norm: 0.9560644047533793, iteration: 337831
loss: 1.010980248451233,grad_norm: 0.978843851722283, iteration: 337832
loss: 1.0372127294540405,grad_norm: 0.853345659222049, iteration: 337833
loss: 1.0313622951507568,grad_norm: 0.999999207570667, iteration: 337834
loss: 1.0155820846557617,grad_norm: 0.9133929284218494, iteration: 337835
loss: 1.0008530616760254,grad_norm: 0.9500942314270937, iteration: 337836
loss: 1.1284029483795166,grad_norm: 0.9999995306785087, iteration: 337837
loss: 0.9817970991134644,grad_norm: 0.7943698192052545, iteration: 337838
loss: 1.0022447109222412,grad_norm: 0.853497898436363, iteration: 337839
loss: 0.990866482257843,grad_norm: 0.999999394820589, iteration: 337840
loss: 1.0836762189865112,grad_norm: 0.9999999619346945, iteration: 337841
loss: 0.993381142616272,grad_norm: 0.9999996721354819, iteration: 337842
loss: 1.0112100839614868,grad_norm: 0.9999990875489313, iteration: 337843
loss: 0.9977130889892578,grad_norm: 0.9999995177326133, iteration: 337844
loss: 1.0519077777862549,grad_norm: 0.9751249758653205, iteration: 337845
loss: 1.1194958686828613,grad_norm: 0.9999991255132995, iteration: 337846
loss: 1.0341367721557617,grad_norm: 0.9999991535172097, iteration: 337847
loss: 1.0254536867141724,grad_norm: 0.9361669085009369, iteration: 337848
loss: 1.0089668035507202,grad_norm: 0.9999993030107612, iteration: 337849
loss: 1.0298959016799927,grad_norm: 0.9861344443993036, iteration: 337850
loss: 1.3035342693328857,grad_norm: 0.9999997959679457, iteration: 337851
loss: 1.1245983839035034,grad_norm: 0.9999998303231126, iteration: 337852
loss: 0.995811402797699,grad_norm: 0.9999995530837221, iteration: 337853
loss: 1.0342007875442505,grad_norm: 0.8007764169213742, iteration: 337854
loss: 1.1295101642608643,grad_norm: 0.842769803300446, iteration: 337855
loss: 1.0194971561431885,grad_norm: 0.8782858056139194, iteration: 337856
loss: 1.0106167793273926,grad_norm: 0.9259009642781995, iteration: 337857
loss: 0.9862171411514282,grad_norm: 0.9999999234973108, iteration: 337858
loss: 0.9846317768096924,grad_norm: 0.7475886295426473, iteration: 337859
loss: 0.992994487285614,grad_norm: 0.8702592868344049, iteration: 337860
loss: 0.9874023795127869,grad_norm: 0.896864751638999, iteration: 337861
loss: 0.9964408874511719,grad_norm: 0.7522354073569814, iteration: 337862
loss: 0.9805546998977661,grad_norm: 0.9581642385584948, iteration: 337863
loss: 1.0011850595474243,grad_norm: 0.7803986342975834, iteration: 337864
loss: 1.0129836797714233,grad_norm: 0.810089287126167, iteration: 337865
loss: 1.0056618452072144,grad_norm: 0.7295391229889503, iteration: 337866
loss: 0.9864255785942078,grad_norm: 0.9999991151820816, iteration: 337867
loss: 1.0131267309188843,grad_norm: 0.9990195181035125, iteration: 337868
loss: 0.9879568815231323,grad_norm: 0.8483818070280673, iteration: 337869
loss: 0.9690887331962585,grad_norm: 0.8190612676555444, iteration: 337870
loss: 1.0026177167892456,grad_norm: 0.9999992732922613, iteration: 337871
loss: 1.0032596588134766,grad_norm: 0.6718278016077918, iteration: 337872
loss: 1.0378164052963257,grad_norm: 0.9999998293322288, iteration: 337873
loss: 0.9965320229530334,grad_norm: 0.7994474522236895, iteration: 337874
loss: 1.010310173034668,grad_norm: 0.9293869495039109, iteration: 337875
loss: 0.9970836639404297,grad_norm: 0.8948100611267162, iteration: 337876
loss: 1.0245649814605713,grad_norm: 0.7076540815521354, iteration: 337877
loss: 1.0056734085083008,grad_norm: 0.7747251847047351, iteration: 337878
loss: 0.9635839462280273,grad_norm: 0.8418721444893447, iteration: 337879
loss: 1.0030299425125122,grad_norm: 0.7673358835099822, iteration: 337880
loss: 0.9775372743606567,grad_norm: 0.7959973375125858, iteration: 337881
loss: 0.9805784821510315,grad_norm: 0.9999993054627675, iteration: 337882
loss: 1.006643295288086,grad_norm: 0.9999998488233854, iteration: 337883
loss: 0.9896060824394226,grad_norm: 0.8410333765531842, iteration: 337884
loss: 1.0712156295776367,grad_norm: 0.9999992781144867, iteration: 337885
loss: 1.0079925060272217,grad_norm: 0.8974331976334537, iteration: 337886
loss: 1.114395260810852,grad_norm: 0.9999995171775721, iteration: 337887
loss: 1.0022321939468384,grad_norm: 0.8804982023506385, iteration: 337888
loss: 0.9898024201393127,grad_norm: 0.9092562292168206, iteration: 337889
loss: 0.9841712117195129,grad_norm: 0.885942188644022, iteration: 337890
loss: 0.9933380484580994,grad_norm: 0.8326939922957176, iteration: 337891
loss: 0.9908289313316345,grad_norm: 0.8760365317441354, iteration: 337892
loss: 1.0303107500076294,grad_norm: 0.5985631620848818, iteration: 337893
loss: 1.0168601274490356,grad_norm: 0.6842874241567696, iteration: 337894
loss: 0.9975240230560303,grad_norm: 0.8167419473263944, iteration: 337895
loss: 1.0168471336364746,grad_norm: 0.9113893914253152, iteration: 337896
loss: 1.019399881362915,grad_norm: 0.9961893952015275, iteration: 337897
loss: 0.9784174561500549,grad_norm: 0.8269900573781319, iteration: 337898
loss: 1.0013593435287476,grad_norm: 0.852751427728069, iteration: 337899
loss: 0.9824442863464355,grad_norm: 0.9999990532474823, iteration: 337900
loss: 1.0561068058013916,grad_norm: 0.999999556642821, iteration: 337901
loss: 1.0857588052749634,grad_norm: 0.9999992591993719, iteration: 337902
loss: 0.9899925589561462,grad_norm: 0.7474267075921763, iteration: 337903
loss: 0.9636518359184265,grad_norm: 0.9827792799039272, iteration: 337904
loss: 0.9790732264518738,grad_norm: 0.679835671493167, iteration: 337905
loss: 0.98292475938797,grad_norm: 0.999999210597677, iteration: 337906
loss: 1.0332996845245361,grad_norm: 0.8766361919308214, iteration: 337907
loss: 0.9413255453109741,grad_norm: 0.7776325376842878, iteration: 337908
loss: 1.0083287954330444,grad_norm: 0.8745470836002314, iteration: 337909
loss: 0.9570070505142212,grad_norm: 0.7942398585073966, iteration: 337910
loss: 1.0033321380615234,grad_norm: 0.9548003988041219, iteration: 337911
loss: 0.991507887840271,grad_norm: 0.7587769086834463, iteration: 337912
loss: 1.0063128471374512,grad_norm: 0.8751136935862676, iteration: 337913
loss: 0.9977973103523254,grad_norm: 0.9102089642340905, iteration: 337914
loss: 1.0213236808776855,grad_norm: 0.7034749472830106, iteration: 337915
loss: 0.9956008791923523,grad_norm: 0.9999993364090576, iteration: 337916
loss: 0.991237223148346,grad_norm: 0.8427385233693473, iteration: 337917
loss: 0.99417644739151,grad_norm: 0.8430273625520041, iteration: 337918
loss: 1.0085424184799194,grad_norm: 0.8544323503381642, iteration: 337919
loss: 1.015940546989441,grad_norm: 0.9999999170245074, iteration: 337920
loss: 0.9586513638496399,grad_norm: 0.6446820533722946, iteration: 337921
loss: 0.9896194934844971,grad_norm: 0.8678636012099803, iteration: 337922
loss: 1.0431360006332397,grad_norm: 0.9999997039825994, iteration: 337923
loss: 0.9952083230018616,grad_norm: 0.7720289294185407, iteration: 337924
loss: 0.9931818842887878,grad_norm: 0.8359124265929776, iteration: 337925
loss: 1.028122901916504,grad_norm: 0.9253774929250086, iteration: 337926
loss: 1.0558668375015259,grad_norm: 0.9999999683499127, iteration: 337927
loss: 1.0060851573944092,grad_norm: 0.7608220156879202, iteration: 337928
loss: 1.0534389019012451,grad_norm: 0.892192728698217, iteration: 337929
loss: 1.056741714477539,grad_norm: 0.9999995444272628, iteration: 337930
loss: 0.9981358051300049,grad_norm: 0.8223347419788918, iteration: 337931
loss: 1.0315430164337158,grad_norm: 0.8498595791689441, iteration: 337932
loss: 1.031011939048767,grad_norm: 0.9905716271186151, iteration: 337933
loss: 0.9688763618469238,grad_norm: 0.7776183420186705, iteration: 337934
loss: 0.9767458438873291,grad_norm: 0.6979240959558295, iteration: 337935
loss: 0.9539942145347595,grad_norm: 0.715124850814458, iteration: 337936
loss: 1.0178648233413696,grad_norm: 0.8899623420660903, iteration: 337937
loss: 1.0222750902175903,grad_norm: 0.7729212178881848, iteration: 337938
loss: 0.9850703477859497,grad_norm: 0.9999994751206654, iteration: 337939
loss: 1.0098448991775513,grad_norm: 0.9999992683416086, iteration: 337940
loss: 1.012534499168396,grad_norm: 0.8541539893390879, iteration: 337941
loss: 0.9923956990242004,grad_norm: 0.9095388831387108, iteration: 337942
loss: 1.0340533256530762,grad_norm: 0.9999993575726547, iteration: 337943
loss: 1.0231494903564453,grad_norm: 0.9773024085769243, iteration: 337944
loss: 1.0522520542144775,grad_norm: 0.8354621077495705, iteration: 337945
loss: 1.0373814105987549,grad_norm: 0.7345325952184485, iteration: 337946
loss: 0.9901864528656006,grad_norm: 0.7830887262527004, iteration: 337947
loss: 1.0882940292358398,grad_norm: 0.896932801875645, iteration: 337948
loss: 1.219505786895752,grad_norm: 0.9999998331247834, iteration: 337949
loss: 1.093599796295166,grad_norm: 0.9331111031612733, iteration: 337950
loss: 1.036070704460144,grad_norm: 0.9170811168941686, iteration: 337951
loss: 0.9923670291900635,grad_norm: 0.8395977089442249, iteration: 337952
loss: 1.0356563329696655,grad_norm: 0.9999994392589301, iteration: 337953
loss: 1.0278313159942627,grad_norm: 0.9427182144494682, iteration: 337954
loss: 0.9861375093460083,grad_norm: 0.9999989482057162, iteration: 337955
loss: 1.0514987707138062,grad_norm: 0.9680448828700325, iteration: 337956
loss: 1.0369535684585571,grad_norm: 0.9025253271038254, iteration: 337957
loss: 1.019911527633667,grad_norm: 0.9999990886861333, iteration: 337958
loss: 0.9836786389350891,grad_norm: 0.7553492069887658, iteration: 337959
loss: 0.9989776611328125,grad_norm: 0.7861759749185078, iteration: 337960
loss: 0.9790853261947632,grad_norm: 0.9361958945979174, iteration: 337961
loss: 1.0244773626327515,grad_norm: 0.9364612588246022, iteration: 337962
loss: 0.9932461380958557,grad_norm: 0.8330728035683036, iteration: 337963
loss: 0.979766845703125,grad_norm: 0.7644381086953891, iteration: 337964
loss: 1.0211143493652344,grad_norm: 0.999999289796129, iteration: 337965
loss: 1.0280792713165283,grad_norm: 0.8507936493949668, iteration: 337966
loss: 1.0135185718536377,grad_norm: 0.7398466096991724, iteration: 337967
loss: 0.9484217762947083,grad_norm: 0.9635570885749417, iteration: 337968
loss: 0.9821705222129822,grad_norm: 0.7458554437832089, iteration: 337969
loss: 0.977706253528595,grad_norm: 0.9255155684903968, iteration: 337970
loss: 1.0315883159637451,grad_norm: 0.7338316831669516, iteration: 337971
loss: 0.9545575976371765,grad_norm: 0.9999992361849988, iteration: 337972
loss: 1.014975666999817,grad_norm: 0.8774950326640453, iteration: 337973
loss: 0.9824800491333008,grad_norm: 0.9999990802116685, iteration: 337974
loss: 0.9858096837997437,grad_norm: 0.7549416699882797, iteration: 337975
loss: 1.0186514854431152,grad_norm: 0.9878855803616762, iteration: 337976
loss: 1.0076687335968018,grad_norm: 0.7979609982365327, iteration: 337977
loss: 1.0642518997192383,grad_norm: 0.99999963276947, iteration: 337978
loss: 1.002907395362854,grad_norm: 0.7209829577347041, iteration: 337979
loss: 0.974058985710144,grad_norm: 0.8766897094760774, iteration: 337980
loss: 1.0041204690933228,grad_norm: 0.8353778641663295, iteration: 337981
loss: 1.023401141166687,grad_norm: 0.9791804022731208, iteration: 337982
loss: 0.9825496673583984,grad_norm: 0.920012004149237, iteration: 337983
loss: 1.0106481313705444,grad_norm: 0.9999992161235524, iteration: 337984
loss: 0.9961742758750916,grad_norm: 0.9405338753997776, iteration: 337985
loss: 1.012195348739624,grad_norm: 0.8192043609306612, iteration: 337986
loss: 1.0381883382797241,grad_norm: 0.9957822186776325, iteration: 337987
loss: 1.00828218460083,grad_norm: 0.9202924708295214, iteration: 337988
loss: 1.3244131803512573,grad_norm: 0.9999997651709968, iteration: 337989
loss: 1.041536569595337,grad_norm: 0.750779308524069, iteration: 337990
loss: 1.0015671253204346,grad_norm: 0.9301019565735777, iteration: 337991
loss: 0.9872539043426514,grad_norm: 0.6814988294229468, iteration: 337992
loss: 0.997836172580719,grad_norm: 0.8006562847620275, iteration: 337993
loss: 0.931843101978302,grad_norm: 0.7892771062743915, iteration: 337994
loss: 1.0013422966003418,grad_norm: 0.8460941245514821, iteration: 337995
loss: 1.0144836902618408,grad_norm: 0.8448046216068015, iteration: 337996
loss: 0.9917455911636353,grad_norm: 0.8503433850279604, iteration: 337997
loss: 1.0299304723739624,grad_norm: 0.8286602654433476, iteration: 337998
loss: 0.9869306683540344,grad_norm: 0.8125958323183288, iteration: 337999
loss: 0.9830053448677063,grad_norm: 0.8144363412644384, iteration: 338000
loss: 1.0684891939163208,grad_norm: 0.9999991449212778, iteration: 338001
loss: 1.0229979753494263,grad_norm: 0.7236925443940273, iteration: 338002
loss: 1.0066245794296265,grad_norm: 0.7934544962009199, iteration: 338003
loss: 1.0395530462265015,grad_norm: 0.9999998083837546, iteration: 338004
loss: 0.9870595932006836,grad_norm: 0.9705299686234936, iteration: 338005
loss: 1.0089914798736572,grad_norm: 0.7158348570957236, iteration: 338006
loss: 0.9698885679244995,grad_norm: 0.7199379788676387, iteration: 338007
loss: 0.9563631415367126,grad_norm: 0.8102979914968971, iteration: 338008
loss: 0.9882640242576599,grad_norm: 0.8142677410467255, iteration: 338009
loss: 1.005373477935791,grad_norm: 0.8785309798918236, iteration: 338010
loss: 0.973192572593689,grad_norm: 0.8453109700183511, iteration: 338011
loss: 1.0128657817840576,grad_norm: 0.8233570006969707, iteration: 338012
loss: 1.0330337285995483,grad_norm: 0.8464543667629583, iteration: 338013
loss: 0.9859820008277893,grad_norm: 0.8305085238987922, iteration: 338014
loss: 1.0213532447814941,grad_norm: 0.9224805428484754, iteration: 338015
loss: 1.0040277242660522,grad_norm: 0.9069179692451138, iteration: 338016
loss: 1.0017504692077637,grad_norm: 0.8954470446444254, iteration: 338017
loss: 1.0244539976119995,grad_norm: 0.8067365370272972, iteration: 338018
loss: 0.9902414679527283,grad_norm: 0.9999991149943818, iteration: 338019
loss: 0.9865002036094666,grad_norm: 0.8336298945255796, iteration: 338020
loss: 0.9588553309440613,grad_norm: 0.8418293073631082, iteration: 338021
loss: 0.9860743284225464,grad_norm: 0.8138395187753823, iteration: 338022
loss: 0.999220609664917,grad_norm: 0.8109893300812747, iteration: 338023
loss: 0.9954049587249756,grad_norm: 0.7287072830155733, iteration: 338024
loss: 1.0102763175964355,grad_norm: 0.8177425691923262, iteration: 338025
loss: 1.0120058059692383,grad_norm: 0.7618097410292526, iteration: 338026
loss: 0.9636942148208618,grad_norm: 0.970327429435535, iteration: 338027
loss: 0.9638076424598694,grad_norm: 0.8684020977852444, iteration: 338028
loss: 0.9750382900238037,grad_norm: 0.8639419091656477, iteration: 338029
loss: 1.0202358961105347,grad_norm: 0.9422192943546722, iteration: 338030
loss: 0.9979209303855896,grad_norm: 0.8257913770686034, iteration: 338031
loss: 1.1136274337768555,grad_norm: 0.9999991479585586, iteration: 338032
loss: 0.999204158782959,grad_norm: 0.7861074680148609, iteration: 338033
loss: 1.0327465534210205,grad_norm: 0.9999994585845318, iteration: 338034
loss: 0.9987668991088867,grad_norm: 0.9499203416014667, iteration: 338035
loss: 1.0200529098510742,grad_norm: 0.8909680973074442, iteration: 338036
loss: 1.0436094999313354,grad_norm: 0.8081310917348057, iteration: 338037
loss: 1.0321125984191895,grad_norm: 0.9999997500970582, iteration: 338038
loss: 0.9967444539070129,grad_norm: 0.7571827008585607, iteration: 338039
loss: 0.9820351004600525,grad_norm: 0.7289764612301702, iteration: 338040
loss: 1.0006332397460938,grad_norm: 0.7108645121316365, iteration: 338041
loss: 0.9846437573432922,grad_norm: 0.9999999555409915, iteration: 338042
loss: 1.0465275049209595,grad_norm: 0.8239556623313323, iteration: 338043
loss: 1.0943713188171387,grad_norm: 0.9999997962916316, iteration: 338044
loss: 0.987579345703125,grad_norm: 0.9999990608461047, iteration: 338045
loss: 0.9934298992156982,grad_norm: 0.8659775962624916, iteration: 338046
loss: 0.9889937043190002,grad_norm: 0.9241693475157663, iteration: 338047
loss: 1.013852834701538,grad_norm: 0.9641427760598239, iteration: 338048
loss: 0.9870911836624146,grad_norm: 0.790905503679311, iteration: 338049
loss: 0.9972865581512451,grad_norm: 0.9208259190552233, iteration: 338050
loss: 0.9877835512161255,grad_norm: 0.7230815707104801, iteration: 338051
loss: 1.0714516639709473,grad_norm: 0.9999999537939824, iteration: 338052
loss: 1.037415862083435,grad_norm: 0.9999993428111968, iteration: 338053
loss: 1.0298758745193481,grad_norm: 0.7575198937588551, iteration: 338054
loss: 1.0041512250900269,grad_norm: 0.7592162477621648, iteration: 338055
loss: 0.9771889448165894,grad_norm: 0.7286861800389702, iteration: 338056
loss: 0.9911651611328125,grad_norm: 0.832809257831737, iteration: 338057
loss: 1.006618857383728,grad_norm: 0.7681146665593708, iteration: 338058
loss: 1.0395690202713013,grad_norm: 0.8218637128860566, iteration: 338059
loss: 0.9795940518379211,grad_norm: 0.9039760350396714, iteration: 338060
loss: 1.0155638456344604,grad_norm: 0.7736897114095277, iteration: 338061
loss: 0.9964959025382996,grad_norm: 0.9371451730961466, iteration: 338062
loss: 0.9761118292808533,grad_norm: 0.8948605467126476, iteration: 338063
loss: 1.0257982015609741,grad_norm: 0.7684676640435245, iteration: 338064
loss: 1.0010368824005127,grad_norm: 0.80877353980672, iteration: 338065
loss: 0.9834830164909363,grad_norm: 0.8117857538476856, iteration: 338066
loss: 0.9805905818939209,grad_norm: 0.856399730495832, iteration: 338067
loss: 1.0155876874923706,grad_norm: 0.7589551470435766, iteration: 338068
loss: 1.0144736766815186,grad_norm: 0.9999994386225594, iteration: 338069
loss: 0.9640368223190308,grad_norm: 0.7203681691623007, iteration: 338070
loss: 0.9948854446411133,grad_norm: 0.805661333049647, iteration: 338071
loss: 0.9850727915763855,grad_norm: 0.9937268587100024, iteration: 338072
loss: 0.9726147651672363,grad_norm: 0.7569617595091375, iteration: 338073
loss: 0.9542902112007141,grad_norm: 0.883897412008256, iteration: 338074
loss: 0.9911205172538757,grad_norm: 0.7426614783967384, iteration: 338075
loss: 1.080283761024475,grad_norm: 0.9292289437097943, iteration: 338076
loss: 0.9885116815567017,grad_norm: 0.9761962057632328, iteration: 338077
loss: 0.9588443040847778,grad_norm: 0.8816295176114756, iteration: 338078
loss: 1.008298397064209,grad_norm: 0.7829373726523526, iteration: 338079
loss: 0.9847466349601746,grad_norm: 0.9170024967300121, iteration: 338080
loss: 0.9855548739433289,grad_norm: 0.7133759491490238, iteration: 338081
loss: 0.9917986392974854,grad_norm: 0.8364692709172371, iteration: 338082
loss: 1.0266979932785034,grad_norm: 0.9999998873674191, iteration: 338083
loss: 1.0091243982315063,grad_norm: 0.9264545999819481, iteration: 338084
loss: 1.0053411722183228,grad_norm: 0.8865093033274482, iteration: 338085
loss: 1.0030113458633423,grad_norm: 0.9254938544122322, iteration: 338086
loss: 1.035741925239563,grad_norm: 0.9999991094325045, iteration: 338087
loss: 0.9995636940002441,grad_norm: 0.9999995292151982, iteration: 338088
loss: 1.006947636604309,grad_norm: 0.8842139146827662, iteration: 338089
loss: 0.9780279994010925,grad_norm: 0.7827396754707182, iteration: 338090
loss: 1.0222965478897095,grad_norm: 0.7309613843907173, iteration: 338091
loss: 1.0087229013442993,grad_norm: 0.9027873384226932, iteration: 338092
loss: 1.0110399723052979,grad_norm: 0.7906515397966194, iteration: 338093
loss: 1.0152900218963623,grad_norm: 0.823928553744252, iteration: 338094
loss: 1.0559405088424683,grad_norm: 0.9999990667195128, iteration: 338095
loss: 1.0111850500106812,grad_norm: 0.9935378494816933, iteration: 338096
loss: 0.977174699306488,grad_norm: 0.83049060012643, iteration: 338097
loss: 1.012934923171997,grad_norm: 0.9006146289922065, iteration: 338098
loss: 1.006116271018982,grad_norm: 0.7744043188404117, iteration: 338099
loss: 1.0360866785049438,grad_norm: 0.8038650544811465, iteration: 338100
loss: 1.0065433979034424,grad_norm: 0.8775591688999327, iteration: 338101
loss: 0.9900826811790466,grad_norm: 0.9246244074378925, iteration: 338102
loss: 1.028024435043335,grad_norm: 0.7823832045620788, iteration: 338103
loss: 0.9864480495452881,grad_norm: 0.784383646382333, iteration: 338104
loss: 1.011269211769104,grad_norm: 0.762016222122175, iteration: 338105
loss: 1.0024057626724243,grad_norm: 0.7957458112427309, iteration: 338106
loss: 1.033416509628296,grad_norm: 0.7534870826786475, iteration: 338107
loss: 0.9977777004241943,grad_norm: 0.8690363620671945, iteration: 338108
loss: 1.0002013444900513,grad_norm: 0.8436644756979775, iteration: 338109
loss: 0.9922418594360352,grad_norm: 0.8228278116657746, iteration: 338110
loss: 0.9742100834846497,grad_norm: 0.8129872844814366, iteration: 338111
loss: 0.9855049252510071,grad_norm: 0.7122657346725398, iteration: 338112
loss: 0.9893726110458374,grad_norm: 0.8905100693470273, iteration: 338113
loss: 1.0119893550872803,grad_norm: 0.7604178290799788, iteration: 338114
loss: 1.0251535177230835,grad_norm: 0.8211309940889645, iteration: 338115
loss: 1.008489727973938,grad_norm: 0.7698283017953786, iteration: 338116
loss: 1.0080195665359497,grad_norm: 0.8751780459527544, iteration: 338117
loss: 1.0004048347473145,grad_norm: 0.7000746619212062, iteration: 338118
loss: 0.9737243056297302,grad_norm: 0.6666064822581388, iteration: 338119
loss: 1.0093199014663696,grad_norm: 0.9999992169614834, iteration: 338120
loss: 1.0038180351257324,grad_norm: 0.7294460289026056, iteration: 338121
loss: 1.0789926052093506,grad_norm: 0.9999999195387359, iteration: 338122
loss: 0.9843112230300903,grad_norm: 0.8301048522949371, iteration: 338123
loss: 1.0377068519592285,grad_norm: 0.7956586092318247, iteration: 338124
loss: 1.0360928773880005,grad_norm: 0.999999929522518, iteration: 338125
loss: 0.9627580046653748,grad_norm: 0.7705128169311086, iteration: 338126
loss: 0.9921136498451233,grad_norm: 0.6259257115819231, iteration: 338127
loss: 1.0069657564163208,grad_norm: 0.8012324097371903, iteration: 338128
loss: 1.0144774913787842,grad_norm: 0.9999991995518054, iteration: 338129
loss: 1.0248788595199585,grad_norm: 0.8619297441322759, iteration: 338130
loss: 0.96735018491745,grad_norm: 0.8685661331869547, iteration: 338131
loss: 0.9924051761627197,grad_norm: 0.7324886581810811, iteration: 338132
loss: 1.036506175994873,grad_norm: 0.9326886003325052, iteration: 338133
loss: 0.9847643971443176,grad_norm: 0.8568649621243984, iteration: 338134
loss: 0.978547990322113,grad_norm: 0.7948856930738345, iteration: 338135
loss: 0.9930509924888611,grad_norm: 0.8233317897109556, iteration: 338136
loss: 1.012180209159851,grad_norm: 0.8039022083863648, iteration: 338137
loss: 1.001751184463501,grad_norm: 0.9999990766980434, iteration: 338138
loss: 1.0037885904312134,grad_norm: 0.8670075946709521, iteration: 338139
loss: 0.9599735140800476,grad_norm: 0.7891029164576403, iteration: 338140
loss: 0.9964163303375244,grad_norm: 0.9062274423972413, iteration: 338141
loss: 0.9949778318405151,grad_norm: 0.8448657938206716, iteration: 338142
loss: 0.9912986159324646,grad_norm: 0.7943118514184181, iteration: 338143
loss: 1.0615063905715942,grad_norm: 0.9999991548717755, iteration: 338144
loss: 0.9974876642227173,grad_norm: 0.8475687013686642, iteration: 338145
loss: 0.9440994262695312,grad_norm: 0.9849498865653799, iteration: 338146
loss: 1.0128388404846191,grad_norm: 0.999999237860143, iteration: 338147
loss: 0.9517859220504761,grad_norm: 0.8019475644087735, iteration: 338148
loss: 1.022294044494629,grad_norm: 0.8953707924821261, iteration: 338149
loss: 0.9721605777740479,grad_norm: 0.8647098716280995, iteration: 338150
loss: 1.0403244495391846,grad_norm: 0.8248510674095053, iteration: 338151
loss: 1.0278993844985962,grad_norm: 0.9999992534545062, iteration: 338152
loss: 1.030967354774475,grad_norm: 0.8465168228414034, iteration: 338153
loss: 1.0148792266845703,grad_norm: 0.9495678521142991, iteration: 338154
loss: 0.9663402438163757,grad_norm: 0.7698300201472433, iteration: 338155
loss: 0.9966481328010559,grad_norm: 0.8265478658457771, iteration: 338156
loss: 1.0215576887130737,grad_norm: 0.8948860722702736, iteration: 338157
loss: 0.98097163438797,grad_norm: 0.7659950176903, iteration: 338158
loss: 0.9903395771980286,grad_norm: 0.7376940609070793, iteration: 338159
loss: 1.0377726554870605,grad_norm: 0.9072756073628127, iteration: 338160
loss: 1.0003297328948975,grad_norm: 0.718717640863936, iteration: 338161
loss: 1.0273256301879883,grad_norm: 0.8390432341921694, iteration: 338162
loss: 1.0029667615890503,grad_norm: 0.8949552533404104, iteration: 338163
loss: 1.0157568454742432,grad_norm: 0.9550445025961207, iteration: 338164
loss: 1.0318411588668823,grad_norm: 0.7386297430084512, iteration: 338165
loss: 1.0005484819412231,grad_norm: 0.8936031678590116, iteration: 338166
loss: 1.0432240962982178,grad_norm: 0.7423932565927615, iteration: 338167
loss: 0.9894063472747803,grad_norm: 0.87636886963615, iteration: 338168
loss: 1.000585675239563,grad_norm: 0.7594085766205223, iteration: 338169
loss: 1.0362389087677002,grad_norm: 0.8823066438176419, iteration: 338170
loss: 0.9915905594825745,grad_norm: 0.9446268670938395, iteration: 338171
loss: 1.017974615097046,grad_norm: 0.873115020095562, iteration: 338172
loss: 1.0075135231018066,grad_norm: 0.8000809298559333, iteration: 338173
loss: 1.015043020248413,grad_norm: 0.9299126182292782, iteration: 338174
loss: 1.063768744468689,grad_norm: 0.9999991757035184, iteration: 338175
loss: 0.9903773069381714,grad_norm: 0.9999996981438208, iteration: 338176
loss: 1.0021991729736328,grad_norm: 0.8814436840088171, iteration: 338177
loss: 0.9698020815849304,grad_norm: 0.8721273050922095, iteration: 338178
loss: 0.993004560470581,grad_norm: 0.9135758439062767, iteration: 338179
loss: 1.0027750730514526,grad_norm: 0.7088686238262717, iteration: 338180
loss: 1.0172615051269531,grad_norm: 0.8275943586825538, iteration: 338181
loss: 1.0102483034133911,grad_norm: 0.9999990886094827, iteration: 338182
loss: 0.9909493923187256,grad_norm: 0.7825800720708627, iteration: 338183
loss: 1.0241305828094482,grad_norm: 0.7093044511804149, iteration: 338184
loss: 0.9804728031158447,grad_norm: 0.7399233619068661, iteration: 338185
loss: 0.9951126575469971,grad_norm: 0.7462674842336434, iteration: 338186
loss: 0.9857388734817505,grad_norm: 0.7028457095701867, iteration: 338187
loss: 0.9948269724845886,grad_norm: 0.8760713697352, iteration: 338188
loss: 1.0069754123687744,grad_norm: 0.9999991938439211, iteration: 338189
loss: 0.9845145344734192,grad_norm: 0.8027769191557167, iteration: 338190
loss: 0.9949408769607544,grad_norm: 0.7920402220708784, iteration: 338191
loss: 0.9697548151016235,grad_norm: 0.8750343526402882, iteration: 338192
loss: 0.970744788646698,grad_norm: 0.8067581539784335, iteration: 338193
loss: 0.9945627450942993,grad_norm: 0.8656374638821247, iteration: 338194
loss: 0.9898191094398499,grad_norm: 0.9318999003079834, iteration: 338195
loss: 0.9702885746955872,grad_norm: 0.9252203927029682, iteration: 338196
loss: 0.9756508469581604,grad_norm: 0.7775675492278964, iteration: 338197
loss: 1.0251299142837524,grad_norm: 0.8208893875226196, iteration: 338198
loss: 1.0070526599884033,grad_norm: 0.8426285033676875, iteration: 338199
loss: 0.9621769785881042,grad_norm: 0.7684319046348012, iteration: 338200
loss: 0.9903662800788879,grad_norm: 0.8889347718678083, iteration: 338201
loss: 1.0230426788330078,grad_norm: 0.7828185472862308, iteration: 338202
loss: 1.0281001329421997,grad_norm: 0.8070004800605233, iteration: 338203
loss: 0.9828943014144897,grad_norm: 0.6903297579372607, iteration: 338204
loss: 0.9943444728851318,grad_norm: 0.8197066881065612, iteration: 338205
loss: 1.0005656480789185,grad_norm: 0.7163686969142719, iteration: 338206
loss: 0.9943544268608093,grad_norm: 0.8608628604689154, iteration: 338207
loss: 0.9925737977027893,grad_norm: 0.7859209570059492, iteration: 338208
loss: 1.0124006271362305,grad_norm: 0.7481175780936445, iteration: 338209
loss: 1.0166243314743042,grad_norm: 0.8333867641053275, iteration: 338210
loss: 0.9671525955200195,grad_norm: 0.8079187179973552, iteration: 338211
loss: 1.0577019453048706,grad_norm: 0.7486903446185297, iteration: 338212
loss: 1.0128674507141113,grad_norm: 0.8518399303063782, iteration: 338213
loss: 1.0116777420043945,grad_norm: 0.9088313407904437, iteration: 338214
loss: 0.9375717639923096,grad_norm: 0.929592836212947, iteration: 338215
loss: 1.007947564125061,grad_norm: 0.7498342581443285, iteration: 338216
loss: 1.0094542503356934,grad_norm: 0.9999990157992334, iteration: 338217
loss: 1.0385206937789917,grad_norm: 0.9515944858689591, iteration: 338218
loss: 0.9902485609054565,grad_norm: 0.9999998241429998, iteration: 338219
loss: 1.0120980739593506,grad_norm: 0.8819238944996285, iteration: 338220
loss: 1.0200879573822021,grad_norm: 0.9134605690116728, iteration: 338221
loss: 1.0317462682724,grad_norm: 0.7222188671605736, iteration: 338222
loss: 1.0238686800003052,grad_norm: 0.7695261772944929, iteration: 338223
loss: 1.018011212348938,grad_norm: 0.7922401303347162, iteration: 338224
loss: 0.9942567944526672,grad_norm: 0.8063089916563697, iteration: 338225
loss: 0.9883801937103271,grad_norm: 0.7077170225601672, iteration: 338226
loss: 0.9703805446624756,grad_norm: 0.9386809314516921, iteration: 338227
loss: 0.9898138046264648,grad_norm: 0.9555071153843373, iteration: 338228
loss: 1.0077202320098877,grad_norm: 0.999999564023293, iteration: 338229
loss: 0.9985784292221069,grad_norm: 0.7665171778999063, iteration: 338230
loss: 0.9976063966751099,grad_norm: 0.8845936171137251, iteration: 338231
loss: 0.9992199540138245,grad_norm: 0.9999990591099843, iteration: 338232
loss: 1.0000213384628296,grad_norm: 0.7575015657614692, iteration: 338233
loss: 1.0169605016708374,grad_norm: 0.7996102414104764, iteration: 338234
loss: 1.0157907009124756,grad_norm: 0.8370030211667475, iteration: 338235
loss: 1.0069631338119507,grad_norm: 0.9754892504178904, iteration: 338236
loss: 1.0181971788406372,grad_norm: 0.831326322042124, iteration: 338237
loss: 1.0126451253890991,grad_norm: 0.9969018957456608, iteration: 338238
loss: 0.987510085105896,grad_norm: 0.7860136864647039, iteration: 338239
loss: 1.0272542238235474,grad_norm: 0.7976059688524997, iteration: 338240
loss: 1.106524109840393,grad_norm: 0.9999992226021229, iteration: 338241
loss: 0.9888208508491516,grad_norm: 0.8204034652471269, iteration: 338242
loss: 0.9600361585617065,grad_norm: 0.8468137462232868, iteration: 338243
loss: 1.0051801204681396,grad_norm: 0.7792009782835382, iteration: 338244
loss: 0.9868266582489014,grad_norm: 0.9780732962367913, iteration: 338245
loss: 1.0214498043060303,grad_norm: 0.8189860773372056, iteration: 338246
loss: 1.0011813640594482,grad_norm: 0.7222045730861527, iteration: 338247
loss: 0.9616067409515381,grad_norm: 0.9999992661915411, iteration: 338248
loss: 1.046374797821045,grad_norm: 0.9999999091811007, iteration: 338249
loss: 1.0044989585876465,grad_norm: 0.8397943238366897, iteration: 338250
loss: 0.9700274467468262,grad_norm: 0.8157443630761152, iteration: 338251
loss: 1.0037976503372192,grad_norm: 0.9999991827625403, iteration: 338252
loss: 1.068780779838562,grad_norm: 0.9999998319204788, iteration: 338253
loss: 1.0937166213989258,grad_norm: 0.9903594196736228, iteration: 338254
loss: 1.0022910833358765,grad_norm: 0.9999994903114464, iteration: 338255
loss: 0.9817966222763062,grad_norm: 0.7469281281055146, iteration: 338256
loss: 1.0124807357788086,grad_norm: 0.8193535137335396, iteration: 338257
loss: 0.9923999905586243,grad_norm: 0.855455262520183, iteration: 338258
loss: 0.9680703282356262,grad_norm: 0.7941219536927976, iteration: 338259
loss: 0.9852674007415771,grad_norm: 0.6864060866899596, iteration: 338260
loss: 0.9808290004730225,grad_norm: 0.851671392963691, iteration: 338261
loss: 1.0073150396347046,grad_norm: 0.8457572417112369, iteration: 338262
loss: 1.021634817123413,grad_norm: 0.9999990904737961, iteration: 338263
loss: 1.0303348302841187,grad_norm: 0.8870778806108371, iteration: 338264
loss: 0.9935334920883179,grad_norm: 0.9999998528283457, iteration: 338265
loss: 0.9818155169487,grad_norm: 0.8056274981368724, iteration: 338266
loss: 0.9923509955406189,grad_norm: 0.9999992628679415, iteration: 338267
loss: 1.036242961883545,grad_norm: 0.9999992729877167, iteration: 338268
loss: 0.9952130317687988,grad_norm: 0.7654831240701868, iteration: 338269
loss: 1.023133635520935,grad_norm: 0.9999995263322053, iteration: 338270
loss: 0.9781033992767334,grad_norm: 0.9789076576250666, iteration: 338271
loss: 1.0095922946929932,grad_norm: 0.8355894726167006, iteration: 338272
loss: 0.9773215055465698,grad_norm: 0.8648735271364912, iteration: 338273
loss: 1.0406006574630737,grad_norm: 0.9999993891162913, iteration: 338274
loss: 0.9905782341957092,grad_norm: 0.7822934408926631, iteration: 338275
loss: 1.0139355659484863,grad_norm: 0.8310219696730271, iteration: 338276
loss: 1.0235660076141357,grad_norm: 0.9208951223247613, iteration: 338277
loss: 1.0038820505142212,grad_norm: 0.814227708749244, iteration: 338278
loss: 1.1193510293960571,grad_norm: 0.9999995615493793, iteration: 338279
loss: 0.9723206162452698,grad_norm: 0.8520846130030394, iteration: 338280
loss: 1.046822190284729,grad_norm: 0.8263909857096583, iteration: 338281
loss: 1.0248117446899414,grad_norm: 0.9999990698848727, iteration: 338282
loss: 0.9897032380104065,grad_norm: 0.7655383434770349, iteration: 338283
loss: 1.002560019493103,grad_norm: 0.7728974709676293, iteration: 338284
loss: 1.0261236429214478,grad_norm: 0.9999990349373826, iteration: 338285
loss: 0.9883208870887756,grad_norm: 0.7306426978468841, iteration: 338286
loss: 0.9808773994445801,grad_norm: 0.8578701064076224, iteration: 338287
loss: 1.022037148475647,grad_norm: 0.7036982799470093, iteration: 338288
loss: 1.078913688659668,grad_norm: 0.9999994670001523, iteration: 338289
loss: 1.0236269235610962,grad_norm: 0.9307289646425742, iteration: 338290
loss: 0.991940438747406,grad_norm: 0.9560433069777438, iteration: 338291
loss: 0.9923112988471985,grad_norm: 0.739351367587395, iteration: 338292
loss: 1.0009821653366089,grad_norm: 0.7067558704056224, iteration: 338293
loss: 0.9765466451644897,grad_norm: 0.9545598582367282, iteration: 338294
loss: 1.0166314840316772,grad_norm: 0.9303065347598144, iteration: 338295
loss: 0.9985780715942383,grad_norm: 0.834167304511154, iteration: 338296
loss: 0.9994668960571289,grad_norm: 0.8284846754626386, iteration: 338297
loss: 0.9997592568397522,grad_norm: 0.7824323774981014, iteration: 338298
loss: 1.040862798690796,grad_norm: 0.9853429564573265, iteration: 338299
loss: 0.9819104075431824,grad_norm: 0.8531419396551528, iteration: 338300
loss: 1.0121840238571167,grad_norm: 0.8689853088273154, iteration: 338301
loss: 1.000038981437683,grad_norm: 0.9999993986439699, iteration: 338302
loss: 1.0302544832229614,grad_norm: 0.9254662889873214, iteration: 338303
loss: 1.0093693733215332,grad_norm: 0.7333554647408915, iteration: 338304
loss: 1.0178035497665405,grad_norm: 0.8938938442046459, iteration: 338305
loss: 1.00753915309906,grad_norm: 0.9113590604896917, iteration: 338306
loss: 0.9723604321479797,grad_norm: 0.8527659133393709, iteration: 338307
loss: 0.9794256091117859,grad_norm: 0.8225558286737865, iteration: 338308
loss: 0.9828814268112183,grad_norm: 0.8774251641062757, iteration: 338309
loss: 0.9833637475967407,grad_norm: 0.8237471771605568, iteration: 338310
loss: 0.9978926777839661,grad_norm: 0.75379546987228, iteration: 338311
loss: 1.020226001739502,grad_norm: 0.8103974341001492, iteration: 338312
loss: 0.9883354306221008,grad_norm: 0.9371821703931296, iteration: 338313
loss: 0.9901635050773621,grad_norm: 0.9999991098653872, iteration: 338314
loss: 0.9633209705352783,grad_norm: 0.7990395996202087, iteration: 338315
loss: 0.9961760640144348,grad_norm: 0.8369362079324323, iteration: 338316
loss: 0.956754744052887,grad_norm: 0.7659647552126027, iteration: 338317
loss: 1.0045610666275024,grad_norm: 0.9760879872058863, iteration: 338318
loss: 0.9965368509292603,grad_norm: 0.9908674300450955, iteration: 338319
loss: 1.0630009174346924,grad_norm: 0.9999998622988393, iteration: 338320
loss: 0.9905657768249512,grad_norm: 0.8210275630369483, iteration: 338321
loss: 1.0328352451324463,grad_norm: 0.8272872576761515, iteration: 338322
loss: 0.9888387322425842,grad_norm: 0.7481625285989995, iteration: 338323
loss: 0.9900408387184143,grad_norm: 0.7288446716373147, iteration: 338324
loss: 1.0112234354019165,grad_norm: 0.7183340656649361, iteration: 338325
loss: 1.015283226966858,grad_norm: 0.8060278280791593, iteration: 338326
loss: 1.0219389200210571,grad_norm: 0.8630221356161618, iteration: 338327
loss: 0.9725527763366699,grad_norm: 0.9252507211356813, iteration: 338328
loss: 1.0726593732833862,grad_norm: 0.9602038963351505, iteration: 338329
loss: 1.0122450590133667,grad_norm: 0.8222824842127647, iteration: 338330
loss: 1.0002673864364624,grad_norm: 0.9786527775269678, iteration: 338331
loss: 0.9999567866325378,grad_norm: 0.9863867594509583, iteration: 338332
loss: 1.0362638235092163,grad_norm: 0.8191244370126927, iteration: 338333
loss: 1.019698977470398,grad_norm: 0.9434721357438375, iteration: 338334
loss: 1.1094285249710083,grad_norm: 0.9999992784441613, iteration: 338335
loss: 1.0472036600112915,grad_norm: 0.9999999735413561, iteration: 338336
loss: 0.9705507159233093,grad_norm: 0.7934100799060747, iteration: 338337
loss: 0.9821298718452454,grad_norm: 0.7717127214741923, iteration: 338338
loss: 1.0104789733886719,grad_norm: 0.81966809738601, iteration: 338339
loss: 0.9759940505027771,grad_norm: 0.9281926701812707, iteration: 338340
loss: 1.0189958810806274,grad_norm: 0.9999995772804088, iteration: 338341
loss: 1.0210068225860596,grad_norm: 0.8849825593470118, iteration: 338342
loss: 1.0209527015686035,grad_norm: 0.8097093618069054, iteration: 338343
loss: 0.9683287143707275,grad_norm: 0.7342590298875846, iteration: 338344
loss: 0.9921609163284302,grad_norm: 0.8890312543942633, iteration: 338345
loss: 1.00234055519104,grad_norm: 0.7133643598342663, iteration: 338346
loss: 0.9860485792160034,grad_norm: 0.9423548127615246, iteration: 338347
loss: 0.9681699872016907,grad_norm: 0.8240298555189934, iteration: 338348
loss: 1.0146031379699707,grad_norm: 0.9999992075004506, iteration: 338349
loss: 1.0081813335418701,grad_norm: 0.972852093189034, iteration: 338350
loss: 1.0476703643798828,grad_norm: 0.9999998224107123, iteration: 338351
loss: 1.0087708234786987,grad_norm: 0.7871821864106587, iteration: 338352
loss: 1.0045692920684814,grad_norm: 0.9511194171058344, iteration: 338353
loss: 0.9788329005241394,grad_norm: 0.77677534199814, iteration: 338354
loss: 1.0123238563537598,grad_norm: 0.7602049646456861, iteration: 338355
loss: 1.036455750465393,grad_norm: 0.7979071181635224, iteration: 338356
loss: 1.1409026384353638,grad_norm: 0.9999991482878071, iteration: 338357
loss: 0.9831366539001465,grad_norm: 0.8747369405101294, iteration: 338358
loss: 0.9854841232299805,grad_norm: 0.9350251532877132, iteration: 338359
loss: 1.0143957138061523,grad_norm: 0.7883565662407893, iteration: 338360
loss: 0.9795341491699219,grad_norm: 0.9999991459541717, iteration: 338361
loss: 1.0106563568115234,grad_norm: 0.8940214738320502, iteration: 338362
loss: 0.992683470249176,grad_norm: 0.9613013662765306, iteration: 338363
loss: 0.982751190662384,grad_norm: 0.9988301699547694, iteration: 338364
loss: 0.9852015972137451,grad_norm: 0.8112751251110049, iteration: 338365
loss: 1.031760573387146,grad_norm: 0.9999993521202608, iteration: 338366
loss: 1.010635256767273,grad_norm: 0.8770383784309402, iteration: 338367
loss: 0.9841451644897461,grad_norm: 0.9999994293744858, iteration: 338368
loss: 0.9799357652664185,grad_norm: 0.9999996654695488, iteration: 338369
loss: 0.9869745969772339,grad_norm: 0.783972655394165, iteration: 338370
loss: 1.028841495513916,grad_norm: 0.7778434082698257, iteration: 338371
loss: 1.0262727737426758,grad_norm: 0.8085235092863561, iteration: 338372
loss: 0.9981858730316162,grad_norm: 0.8472028696572954, iteration: 338373
loss: 0.9785712957382202,grad_norm: 0.7728511160453126, iteration: 338374
loss: 1.008384108543396,grad_norm: 0.8638886638460582, iteration: 338375
loss: 0.9874703884124756,grad_norm: 0.7930003977914906, iteration: 338376
loss: 1.0767713785171509,grad_norm: 0.9999997219468113, iteration: 338377
loss: 0.9740713834762573,grad_norm: 0.9999992541851721, iteration: 338378
loss: 0.9894552230834961,grad_norm: 0.9897880439184861, iteration: 338379
loss: 1.0118434429168701,grad_norm: 0.9524249451398495, iteration: 338380
loss: 1.0409722328186035,grad_norm: 0.7881881718200072, iteration: 338381
loss: 0.9895336627960205,grad_norm: 0.9999999351054396, iteration: 338382
loss: 0.943572461605072,grad_norm: 0.775810036020335, iteration: 338383
loss: 0.9900560975074768,grad_norm: 0.8385625822132347, iteration: 338384
loss: 0.9880762696266174,grad_norm: 0.8370209373196427, iteration: 338385
loss: 0.9910817742347717,grad_norm: 0.6835772898095268, iteration: 338386
loss: 1.0216772556304932,grad_norm: 0.7119947868331002, iteration: 338387
loss: 1.070468544960022,grad_norm: 0.9984616119510041, iteration: 338388
loss: 1.0484559535980225,grad_norm: 0.9999998332574163, iteration: 338389
loss: 0.9599283337593079,grad_norm: 0.8314117839580218, iteration: 338390
loss: 1.2350627183914185,grad_norm: 0.99999997954756, iteration: 338391
loss: 1.0395293235778809,grad_norm: 0.9146575886168155, iteration: 338392
loss: 1.006706953048706,grad_norm: 0.7787665909649786, iteration: 338393
loss: 1.0431146621704102,grad_norm: 0.9999990389704726, iteration: 338394
loss: 1.150179386138916,grad_norm: 0.9999998227002093, iteration: 338395
loss: 0.9714149236679077,grad_norm: 0.6730269545433398, iteration: 338396
loss: 1.0037285089492798,grad_norm: 0.9999996275966684, iteration: 338397
loss: 1.0119521617889404,grad_norm: 0.9999991104293299, iteration: 338398
loss: 0.9962260723114014,grad_norm: 0.9999993019131392, iteration: 338399
loss: 1.0767050981521606,grad_norm: 0.9999991299749516, iteration: 338400
loss: 0.9732916951179504,grad_norm: 0.8883219235335666, iteration: 338401
loss: 1.046412706375122,grad_norm: 0.8806097828719734, iteration: 338402
loss: 0.9967575073242188,grad_norm: 0.7726097112681266, iteration: 338403
loss: 1.0865063667297363,grad_norm: 0.9999994806689414, iteration: 338404
loss: 1.0203630924224854,grad_norm: 0.9999990981190318, iteration: 338405
loss: 1.0134639739990234,grad_norm: 0.9036464730987156, iteration: 338406
loss: 1.021935224533081,grad_norm: 0.8584015579388946, iteration: 338407
loss: 0.9855191707611084,grad_norm: 0.9999991690883397, iteration: 338408
loss: 1.1286232471466064,grad_norm: 0.9999996776192925, iteration: 338409
loss: 1.0716906785964966,grad_norm: 0.9999992465378748, iteration: 338410
loss: 1.0081528425216675,grad_norm: 0.7502545019064074, iteration: 338411
loss: 1.090015172958374,grad_norm: 0.9999994891839346, iteration: 338412
loss: 1.005054235458374,grad_norm: 0.8381716641077719, iteration: 338413
loss: 0.993898332118988,grad_norm: 0.9895496661946964, iteration: 338414
loss: 1.0258015394210815,grad_norm: 0.9999996064180107, iteration: 338415
loss: 1.0963612794876099,grad_norm: 0.9999997238813798, iteration: 338416
loss: 1.0116944313049316,grad_norm: 0.7343467602409539, iteration: 338417
loss: 1.0417499542236328,grad_norm: 0.9687354115126978, iteration: 338418
loss: 1.0344047546386719,grad_norm: 0.6460735334809652, iteration: 338419
loss: 1.5541342496871948,grad_norm: 0.9999998050297357, iteration: 338420
loss: 1.1376230716705322,grad_norm: 0.9999999530657161, iteration: 338421
loss: 1.0653986930847168,grad_norm: 0.9999991190269005, iteration: 338422
loss: 1.0295275449752808,grad_norm: 0.8306412620102052, iteration: 338423
loss: 1.0103325843811035,grad_norm: 0.9669249428684829, iteration: 338424
loss: 0.9944064617156982,grad_norm: 0.8855296500830009, iteration: 338425
loss: 1.0251027345657349,grad_norm: 0.999999334103381, iteration: 338426
loss: 1.039591908454895,grad_norm: 0.7523366629206584, iteration: 338427
loss: 1.0483859777450562,grad_norm: 0.935002458183212, iteration: 338428
loss: 1.0626935958862305,grad_norm: 0.9865255392183809, iteration: 338429
loss: 1.0135159492492676,grad_norm: 0.8406799798128577, iteration: 338430
loss: 1.0486822128295898,grad_norm: 0.8036817544508169, iteration: 338431
loss: 1.030378818511963,grad_norm: 0.9999999468487906, iteration: 338432
loss: 1.0187453031539917,grad_norm: 0.6989639644945663, iteration: 338433
loss: 1.0695371627807617,grad_norm: 0.8757890242402256, iteration: 338434
loss: 0.9822263121604919,grad_norm: 0.9999991231861788, iteration: 338435
loss: 0.9975404143333435,grad_norm: 0.9999997856365275, iteration: 338436
loss: 0.9955995082855225,grad_norm: 0.8857737626866169, iteration: 338437
loss: 1.0142844915390015,grad_norm: 0.9373419534999227, iteration: 338438
loss: 0.9642950892448425,grad_norm: 0.8130571783996129, iteration: 338439
loss: 1.0172938108444214,grad_norm: 0.9641404957380261, iteration: 338440
loss: 1.0441261529922485,grad_norm: 0.9999990144170638, iteration: 338441
loss: 1.0743998289108276,grad_norm: 0.999999054914523, iteration: 338442
loss: 1.0042529106140137,grad_norm: 0.9999991565116619, iteration: 338443
loss: 1.0022860765457153,grad_norm: 0.9999994842558867, iteration: 338444
loss: 0.9759305119514465,grad_norm: 0.9294837941018912, iteration: 338445
loss: 1.0155969858169556,grad_norm: 0.766929870520572, iteration: 338446
loss: 0.987770140171051,grad_norm: 0.9481297687551297, iteration: 338447
loss: 0.999908447265625,grad_norm: 0.9999997459128106, iteration: 338448
loss: 1.0344855785369873,grad_norm: 0.9999998312015576, iteration: 338449
loss: 1.0821549892425537,grad_norm: 0.9999991762064852, iteration: 338450
loss: 0.9893560409545898,grad_norm: 0.8213581068801048, iteration: 338451
loss: 1.010557770729065,grad_norm: 0.9999990590594668, iteration: 338452
loss: 1.0503720045089722,grad_norm: 0.9999993661144604, iteration: 338453
loss: 1.0200492143630981,grad_norm: 0.8966546151788313, iteration: 338454
loss: 0.9924850463867188,grad_norm: 0.9999990948217005, iteration: 338455
loss: 0.9978075623512268,grad_norm: 0.8933475938989517, iteration: 338456
loss: 1.0040385723114014,grad_norm: 0.9999990486182437, iteration: 338457
loss: 1.1627612113952637,grad_norm: 0.9999996281782777, iteration: 338458
loss: 1.079569697380066,grad_norm: 0.9999994929215927, iteration: 338459
loss: 0.9552035331726074,grad_norm: 0.9216812700472313, iteration: 338460
loss: 1.048944115638733,grad_norm: 0.8695455572698061, iteration: 338461
loss: 1.0323755741119385,grad_norm: 0.9999997650387424, iteration: 338462
loss: 0.97861248254776,grad_norm: 0.99999912824008, iteration: 338463
loss: 1.0205448865890503,grad_norm: 0.7835009718311805, iteration: 338464
loss: 1.0565506219863892,grad_norm: 0.9999996035674786, iteration: 338465
loss: 0.9933258295059204,grad_norm: 0.999999208276359, iteration: 338466
loss: 0.9873057007789612,grad_norm: 0.9999996959503011, iteration: 338467
loss: 1.0154188871383667,grad_norm: 0.8978162398650555, iteration: 338468
loss: 1.0561542510986328,grad_norm: 0.9999994136746843, iteration: 338469
loss: 0.9724702835083008,grad_norm: 0.7449162899275379, iteration: 338470
loss: 1.0328887701034546,grad_norm: 0.9071279323750903, iteration: 338471
loss: 1.1071404218673706,grad_norm: 0.9999998672107799, iteration: 338472
loss: 1.0385525226593018,grad_norm: 0.9999991812047415, iteration: 338473
loss: 1.0145318508148193,grad_norm: 0.8280326568868175, iteration: 338474
loss: 1.0564268827438354,grad_norm: 0.9999996319782557, iteration: 338475
loss: 1.0215317010879517,grad_norm: 0.9526371349099608, iteration: 338476
loss: 1.0033107995986938,grad_norm: 0.7957177457670357, iteration: 338477
loss: 1.0425479412078857,grad_norm: 0.7915405894091478, iteration: 338478
loss: 1.0045793056488037,grad_norm: 0.9999994042566908, iteration: 338479
loss: 1.0137434005737305,grad_norm: 0.9999995695662999, iteration: 338480
loss: 1.0272129774093628,grad_norm: 0.9008578229204338, iteration: 338481
loss: 0.9797858595848083,grad_norm: 0.8990088078576695, iteration: 338482
loss: 0.9643081426620483,grad_norm: 0.9792822822423711, iteration: 338483
loss: 1.07793390750885,grad_norm: 0.9999991127337422, iteration: 338484
loss: 1.0166209936141968,grad_norm: 0.7937288237313564, iteration: 338485
loss: 0.97756427526474,grad_norm: 0.9213796663186533, iteration: 338486
loss: 1.0475574731826782,grad_norm: 0.9227493073204159, iteration: 338487
loss: 1.0115524530410767,grad_norm: 0.8954240774072834, iteration: 338488
loss: 1.0044915676116943,grad_norm: 0.795597522034016, iteration: 338489
loss: 0.9985785484313965,grad_norm: 0.7862103988807533, iteration: 338490
loss: 1.019066333770752,grad_norm: 0.9999990763959808, iteration: 338491
loss: 1.0353045463562012,grad_norm: 0.9999994581988094, iteration: 338492
loss: 1.0130997896194458,grad_norm: 0.7160702803660033, iteration: 338493
loss: 0.9901078343391418,grad_norm: 0.8498414750466735, iteration: 338494
loss: 0.9755285978317261,grad_norm: 0.9412922626668961, iteration: 338495
loss: 1.010461688041687,grad_norm: 0.8548987346306479, iteration: 338496
loss: 1.019588828086853,grad_norm: 0.7987028306857084, iteration: 338497
loss: 1.064611554145813,grad_norm: 0.9999997195493142, iteration: 338498
loss: 1.1598594188690186,grad_norm: 0.9999999113705876, iteration: 338499
loss: 0.9767774939537048,grad_norm: 0.7492881042678555, iteration: 338500
loss: 0.9831002354621887,grad_norm: 0.8568597348698848, iteration: 338501
loss: 0.9762927889823914,grad_norm: 0.7316679290514205, iteration: 338502
loss: 1.0164633989334106,grad_norm: 0.9999998639101024, iteration: 338503
loss: 1.0712894201278687,grad_norm: 0.999999219542287, iteration: 338504
loss: 1.0627657175064087,grad_norm: 0.9999996676447437, iteration: 338505
loss: 1.0054833889007568,grad_norm: 0.6696051949292087, iteration: 338506
loss: 0.9671269655227661,grad_norm: 0.999998902150231, iteration: 338507
loss: 0.9720597267150879,grad_norm: 0.8112044410409527, iteration: 338508
loss: 1.0154523849487305,grad_norm: 0.9999991616624296, iteration: 338509
loss: 1.1028780937194824,grad_norm: 0.9999997577842143, iteration: 338510
loss: 1.0017354488372803,grad_norm: 0.8924008392807118, iteration: 338511
loss: 0.9796550869941711,grad_norm: 0.8814166779424539, iteration: 338512
loss: 1.0160596370697021,grad_norm: 0.7770513251207403, iteration: 338513
loss: 1.0055654048919678,grad_norm: 0.9513593170258933, iteration: 338514
loss: 1.0109355449676514,grad_norm: 0.7634248686703231, iteration: 338515
loss: 1.0184366703033447,grad_norm: 0.836182008859459, iteration: 338516
loss: 0.9840966463088989,grad_norm: 0.9999997855627579, iteration: 338517
loss: 1.0090872049331665,grad_norm: 0.7964907512348813, iteration: 338518
loss: 0.9953727722167969,grad_norm: 0.7680439399100919, iteration: 338519
loss: 1.0269502401351929,grad_norm: 0.8590754517254756, iteration: 338520
loss: 0.9944198131561279,grad_norm: 0.7862111535736475, iteration: 338521
loss: 1.038142442703247,grad_norm: 0.8042127619507118, iteration: 338522
loss: 1.0677205324172974,grad_norm: 0.7713960446182412, iteration: 338523
loss: 1.0618205070495605,grad_norm: 0.9999994956832912, iteration: 338524
loss: 1.1117901802062988,grad_norm: 0.9999994147221392, iteration: 338525
loss: 1.0732322931289673,grad_norm: 0.9999993049739837, iteration: 338526
loss: 1.0255789756774902,grad_norm: 0.9999990735766668, iteration: 338527
loss: 1.1113195419311523,grad_norm: 1.0000000320231848, iteration: 338528
loss: 0.9859128594398499,grad_norm: 0.8498272464451472, iteration: 338529
loss: 1.044353723526001,grad_norm: 0.9999999317409922, iteration: 338530
loss: 0.9737652540206909,grad_norm: 0.7478180748863079, iteration: 338531
loss: 0.9819077253341675,grad_norm: 0.8414783830196882, iteration: 338532
loss: 0.9807180762290955,grad_norm: 0.8876649206914904, iteration: 338533
loss: 1.0326141119003296,grad_norm: 0.904436637782135, iteration: 338534
loss: 1.1042922735214233,grad_norm: 0.9999992974343216, iteration: 338535
loss: 1.0307750701904297,grad_norm: 0.9999992171534582, iteration: 338536
loss: 0.9969382882118225,grad_norm: 0.966638946631773, iteration: 338537
loss: 1.1051044464111328,grad_norm: 0.9999990077811128, iteration: 338538
loss: 0.9856067895889282,grad_norm: 0.8888311721606162, iteration: 338539
loss: 1.008720874786377,grad_norm: 0.9999989980468881, iteration: 338540
loss: 1.016181230545044,grad_norm: 0.8947960185175954, iteration: 338541
loss: 1.071557879447937,grad_norm: 0.9999993511804316, iteration: 338542
loss: 1.02288818359375,grad_norm: 0.9999995334170054, iteration: 338543
loss: 1.0180444717407227,grad_norm: 0.9999997507674964, iteration: 338544
loss: 1.0286749601364136,grad_norm: 0.9999991961048343, iteration: 338545
loss: 1.020513653755188,grad_norm: 0.9999991365449319, iteration: 338546
loss: 1.023685336112976,grad_norm: 0.8866839736838461, iteration: 338547
loss: 0.9961743354797363,grad_norm: 0.8596675821792532, iteration: 338548
loss: 1.0099300146102905,grad_norm: 0.6550499901345694, iteration: 338549
loss: 1.0081534385681152,grad_norm: 0.8774837032640731, iteration: 338550
loss: 0.9644132852554321,grad_norm: 0.9999991355451514, iteration: 338551
loss: 1.0163226127624512,grad_norm: 0.9104461057729902, iteration: 338552
loss: 1.0214895009994507,grad_norm: 0.9288995222302463, iteration: 338553
loss: 0.9872320890426636,grad_norm: 0.7438818610801918, iteration: 338554
loss: 0.9609184861183167,grad_norm: 0.8726802494902282, iteration: 338555
loss: 0.9415388107299805,grad_norm: 0.7801179768998597, iteration: 338556
loss: 0.9890493750572205,grad_norm: 0.8850506924098194, iteration: 338557
loss: 0.9691148996353149,grad_norm: 0.7955233220036916, iteration: 338558
loss: 1.0053582191467285,grad_norm: 0.8533139467780775, iteration: 338559
loss: 0.966336727142334,grad_norm: 0.6974234064433108, iteration: 338560
loss: 1.0221614837646484,grad_norm: 0.9646780926235538, iteration: 338561
loss: 0.997427761554718,grad_norm: 0.8117425957516851, iteration: 338562
loss: 0.9767225384712219,grad_norm: 0.8969869047348422, iteration: 338563
loss: 1.0292549133300781,grad_norm: 0.842206927522547, iteration: 338564
loss: 1.0075429677963257,grad_norm: 0.799906214794203, iteration: 338565
loss: 0.9520065784454346,grad_norm: 0.8305189289673909, iteration: 338566
loss: 0.9741599559783936,grad_norm: 0.9170834785270827, iteration: 338567
loss: 1.004737377166748,grad_norm: 0.847099157859942, iteration: 338568
loss: 1.0218291282653809,grad_norm: 0.9500226431243235, iteration: 338569
loss: 0.9998453259468079,grad_norm: 0.7871178486845819, iteration: 338570
loss: 1.0519031286239624,grad_norm: 0.8631132732559958, iteration: 338571
loss: 1.046878457069397,grad_norm: 0.999998994176745, iteration: 338572
loss: 0.9831395745277405,grad_norm: 0.8976886177928816, iteration: 338573
loss: 1.1278634071350098,grad_norm: 0.999999744765165, iteration: 338574
loss: 0.9679082632064819,grad_norm: 0.9130265596520626, iteration: 338575
loss: 1.1093627214431763,grad_norm: 0.9588573633779377, iteration: 338576
loss: 0.9774932265281677,grad_norm: 0.7224727687206, iteration: 338577
loss: 1.0585124492645264,grad_norm: 0.9999998486602345, iteration: 338578
loss: 0.9851599335670471,grad_norm: 0.7866330842448874, iteration: 338579
loss: 0.99807208776474,grad_norm: 0.9274528716699192, iteration: 338580
loss: 1.011937141418457,grad_norm: 0.9999990642999814, iteration: 338581
loss: 1.005414366722107,grad_norm: 0.859245141287521, iteration: 338582
loss: 1.0598888397216797,grad_norm: 0.9371202313586867, iteration: 338583
loss: 1.0052739381790161,grad_norm: 0.903866014422708, iteration: 338584
loss: 1.0051517486572266,grad_norm: 0.9791473442246234, iteration: 338585
loss: 0.98939448595047,grad_norm: 0.9722212938182005, iteration: 338586
loss: 0.9868003726005554,grad_norm: 0.7664224574023005, iteration: 338587
loss: 0.9736601114273071,grad_norm: 0.7808580613202698, iteration: 338588
loss: 1.0321601629257202,grad_norm: 0.9999992997808682, iteration: 338589
loss: 0.9691323637962341,grad_norm: 0.9999991777701605, iteration: 338590
loss: 0.9627001881599426,grad_norm: 0.8981158206328412, iteration: 338591
loss: 1.0038269758224487,grad_norm: 0.8363576018335185, iteration: 338592
loss: 0.9979405403137207,grad_norm: 0.9999995285298394, iteration: 338593
loss: 1.0530530214309692,grad_norm: 0.9999993632288557, iteration: 338594
loss: 0.9763548374176025,grad_norm: 0.7207271814362908, iteration: 338595
loss: 1.0119353532791138,grad_norm: 0.9999991224714098, iteration: 338596
loss: 1.0010974407196045,grad_norm: 0.9164605747762332, iteration: 338597
loss: 1.016704797744751,grad_norm: 0.999999218157186, iteration: 338598
loss: 1.0388351678848267,grad_norm: 0.9999991636602487, iteration: 338599
loss: 0.9880490303039551,grad_norm: 0.8434434416823052, iteration: 338600
loss: 0.9819002747535706,grad_norm: 0.999999155038853, iteration: 338601
loss: 0.9617611169815063,grad_norm: 0.9287740923939924, iteration: 338602
loss: 1.0019710063934326,grad_norm: 0.9168040202742676, iteration: 338603
loss: 0.9929371476173401,grad_norm: 0.8063008344121096, iteration: 338604
loss: 1.0382827520370483,grad_norm: 0.7784434464207306, iteration: 338605
loss: 0.9674607515335083,grad_norm: 0.7745979645302092, iteration: 338606
loss: 1.0553317070007324,grad_norm: 0.9999993526051645, iteration: 338607
loss: 1.0052679777145386,grad_norm: 0.795494127545956, iteration: 338608
loss: 0.9843028783798218,grad_norm: 0.8341399970629794, iteration: 338609
loss: 0.9624566435813904,grad_norm: 0.7770339749135576, iteration: 338610
loss: 0.9868683815002441,grad_norm: 0.9413796736366413, iteration: 338611
loss: 1.0729479789733887,grad_norm: 0.999999150112089, iteration: 338612
loss: 0.9969611763954163,grad_norm: 0.9619703922891142, iteration: 338613
loss: 1.103063702583313,grad_norm: 0.9999992555693165, iteration: 338614
loss: 1.0301380157470703,grad_norm: 0.762088925384034, iteration: 338615
loss: 1.0147062540054321,grad_norm: 0.8322648747841634, iteration: 338616
loss: 1.0266772508621216,grad_norm: 0.9999993149921411, iteration: 338617
loss: 1.0198125839233398,grad_norm: 0.7759501467305813, iteration: 338618
loss: 0.9708022475242615,grad_norm: 0.9191152009881064, iteration: 338619
loss: 1.0087772607803345,grad_norm: 0.9999997250611584, iteration: 338620
loss: 1.0039587020874023,grad_norm: 0.9939571230881706, iteration: 338621
loss: 0.9757742285728455,grad_norm: 0.8716274205595477, iteration: 338622
loss: 1.0132372379302979,grad_norm: 0.8580815639242764, iteration: 338623
loss: 1.0175502300262451,grad_norm: 0.9538601844819965, iteration: 338624
loss: 0.9927189946174622,grad_norm: 0.9999991330475207, iteration: 338625
loss: 0.9710278511047363,grad_norm: 0.7894429083411313, iteration: 338626
loss: 0.9892137050628662,grad_norm: 0.7577811427237569, iteration: 338627
loss: 1.065996766090393,grad_norm: 0.9720439392384741, iteration: 338628
loss: 0.9859276413917542,grad_norm: 0.8448266062941281, iteration: 338629
loss: 1.0288667678833008,grad_norm: 0.999999409978909, iteration: 338630
loss: 1.0167139768600464,grad_norm: 0.9999992011511474, iteration: 338631
loss: 0.9560485482215881,grad_norm: 0.7041007714132724, iteration: 338632
loss: 0.9909063577651978,grad_norm: 0.7335034353907129, iteration: 338633
loss: 0.9965494275093079,grad_norm: 0.9193572920614042, iteration: 338634
loss: 1.0028367042541504,grad_norm: 0.9135610770161909, iteration: 338635
loss: 1.0008130073547363,grad_norm: 0.84394620068069, iteration: 338636
loss: 1.0315790176391602,grad_norm: 0.987132255587154, iteration: 338637
loss: 1.0070186853408813,grad_norm: 0.9727085049882097, iteration: 338638
loss: 0.9990128874778748,grad_norm: 0.9999991016881027, iteration: 338639
loss: 1.0709218978881836,grad_norm: 1.0000000125575514, iteration: 338640
loss: 1.0115503072738647,grad_norm: 0.8847384820773027, iteration: 338641
loss: 0.9896390438079834,grad_norm: 0.8543152859271895, iteration: 338642
loss: 1.006555438041687,grad_norm: 0.916141372147593, iteration: 338643
loss: 1.0005773305892944,grad_norm: 0.7952928413601796, iteration: 338644
loss: 1.1011406183242798,grad_norm: 0.9384880967952093, iteration: 338645
loss: 1.080243468284607,grad_norm: 0.7674006995803854, iteration: 338646
loss: 1.0037696361541748,grad_norm: 0.9172817853176891, iteration: 338647
loss: 0.9954236745834351,grad_norm: 0.8530153988323368, iteration: 338648
loss: 0.9331247806549072,grad_norm: 0.735403731191643, iteration: 338649
loss: 0.9853286743164062,grad_norm: 0.7536254831301836, iteration: 338650
loss: 1.0311490297317505,grad_norm: 0.9289766496637137, iteration: 338651
loss: 0.9928497076034546,grad_norm: 0.9999992754865223, iteration: 338652
loss: 1.004316806793213,grad_norm: 0.9788674363609366, iteration: 338653
loss: 1.0254050493240356,grad_norm: 0.9733335762919346, iteration: 338654
loss: 0.9866378903388977,grad_norm: 0.861439293924785, iteration: 338655
loss: 0.9969663023948669,grad_norm: 0.7079564988523261, iteration: 338656
loss: 0.9971694946289062,grad_norm: 0.6770762174822306, iteration: 338657
loss: 1.004075527191162,grad_norm: 0.902822253948199, iteration: 338658
loss: 1.082374095916748,grad_norm: 0.9999999632955349, iteration: 338659
loss: 0.9836248755455017,grad_norm: 0.876765368198092, iteration: 338660
loss: 0.9831727743148804,grad_norm: 0.883481285324676, iteration: 338661
loss: 1.0400488376617432,grad_norm: 0.999999174055555, iteration: 338662
loss: 0.9797621965408325,grad_norm: 0.6747415312308027, iteration: 338663
loss: 1.0467571020126343,grad_norm: 0.8776294975399516, iteration: 338664
loss: 1.0721380710601807,grad_norm: 0.8123758228538182, iteration: 338665
loss: 1.0039496421813965,grad_norm: 0.7999826867165607, iteration: 338666
loss: 1.0117605924606323,grad_norm: 0.8221489511351547, iteration: 338667
loss: 1.01871657371521,grad_norm: 0.9752485137743822, iteration: 338668
loss: 0.9741581678390503,grad_norm: 0.9426490771313041, iteration: 338669
loss: 1.0191956758499146,grad_norm: 0.9999997067928047, iteration: 338670
loss: 0.9786783456802368,grad_norm: 0.8374422899903055, iteration: 338671
loss: 0.9687709212303162,grad_norm: 0.8372958323522792, iteration: 338672
loss: 1.0399932861328125,grad_norm: 0.9999997642478021, iteration: 338673
loss: 1.1189138889312744,grad_norm: 0.9999997159311107, iteration: 338674
loss: 1.0330613851547241,grad_norm: 0.8219860283251837, iteration: 338675
loss: 1.0543054342269897,grad_norm: 0.9999997889248479, iteration: 338676
loss: 1.0070178508758545,grad_norm: 0.9433267595237963, iteration: 338677
loss: 1.0221744775772095,grad_norm: 0.8762386159604847, iteration: 338678
loss: 1.0197116136550903,grad_norm: 0.8258931644956521, iteration: 338679
loss: 1.067874550819397,grad_norm: 0.9999995897640834, iteration: 338680
loss: 1.0934349298477173,grad_norm: 0.9999996051406475, iteration: 338681
loss: 1.1174707412719727,grad_norm: 0.999999106918431, iteration: 338682
loss: 1.0060113668441772,grad_norm: 0.9999990895647518, iteration: 338683
loss: 0.9866361021995544,grad_norm: 0.7102217516604232, iteration: 338684
loss: 1.0318316221237183,grad_norm: 0.9999994910063198, iteration: 338685
loss: 1.019830584526062,grad_norm: 0.9999995269638444, iteration: 338686
loss: 1.0623199939727783,grad_norm: 0.9944662880369868, iteration: 338687
loss: 1.0628992319107056,grad_norm: 0.9107271243349156, iteration: 338688
loss: 1.0265085697174072,grad_norm: 0.9999990523167877, iteration: 338689
loss: 0.9886363744735718,grad_norm: 0.6673031748160769, iteration: 338690
loss: 1.03346848487854,grad_norm: 0.999999499059537, iteration: 338691
loss: 0.9985636472702026,grad_norm: 0.7679673397514775, iteration: 338692
loss: 0.9849632382392883,grad_norm: 0.8267131949556531, iteration: 338693
loss: 1.0038021802902222,grad_norm: 0.9999991311621724, iteration: 338694
loss: 0.9697052240371704,grad_norm: 0.8886141071775342, iteration: 338695
loss: 0.9933310747146606,grad_norm: 0.9999991247390121, iteration: 338696
loss: 1.1055208444595337,grad_norm: 0.9999993123695756, iteration: 338697
loss: 1.0896226167678833,grad_norm: 0.9019208707900314, iteration: 338698
loss: 1.0071300268173218,grad_norm: 0.6727322188486667, iteration: 338699
loss: 0.982060968875885,grad_norm: 0.8957252713290913, iteration: 338700
loss: 0.9884492754936218,grad_norm: 0.7161972165539722, iteration: 338701
loss: 0.9662200808525085,grad_norm: 0.9501468294507446, iteration: 338702
loss: 1.0016696453094482,grad_norm: 0.7908910620653583, iteration: 338703
loss: 1.0104707479476929,grad_norm: 0.9999992637967942, iteration: 338704
loss: 1.1228985786437988,grad_norm: 0.9999995006789396, iteration: 338705
loss: 0.9774686694145203,grad_norm: 0.840967299031084, iteration: 338706
loss: 1.0656027793884277,grad_norm: 0.9491501101816432, iteration: 338707
loss: 0.9565341472625732,grad_norm: 0.7523074126448837, iteration: 338708
loss: 0.976179838180542,grad_norm: 0.7809137834423908, iteration: 338709
loss: 1.011961817741394,grad_norm: 0.9999990313860266, iteration: 338710
loss: 1.0019866228103638,grad_norm: 0.9015721343826973, iteration: 338711
loss: 1.045250415802002,grad_norm: 0.9999991650204796, iteration: 338712
loss: 1.0464816093444824,grad_norm: 0.9857442399703624, iteration: 338713
loss: 1.0133495330810547,grad_norm: 0.9999996455607042, iteration: 338714
loss: 1.1073397397994995,grad_norm: 0.9999995682301283, iteration: 338715
loss: 0.9955142140388489,grad_norm: 0.8243800070889292, iteration: 338716
loss: 0.9865946173667908,grad_norm: 0.7754898787480031, iteration: 338717
loss: 1.0252115726470947,grad_norm: 0.9081156878334269, iteration: 338718
loss: 1.0025407075881958,grad_norm: 0.7653260959852096, iteration: 338719
loss: 1.040960431098938,grad_norm: 0.9999992437251504, iteration: 338720
loss: 1.046902060508728,grad_norm: 0.9999990248236716, iteration: 338721
loss: 1.0337873697280884,grad_norm: 0.9999991352177606, iteration: 338722
loss: 0.9639632701873779,grad_norm: 0.9115314116841196, iteration: 338723
loss: 1.0594251155853271,grad_norm: 0.836232906182074, iteration: 338724
loss: 1.0257959365844727,grad_norm: 0.9996002039876064, iteration: 338725
loss: 0.9997084140777588,grad_norm: 0.711274319712472, iteration: 338726
loss: 1.039212703704834,grad_norm: 0.9005652953715073, iteration: 338727
loss: 1.021659255027771,grad_norm: 0.9999991731088198, iteration: 338728
loss: 0.971743106842041,grad_norm: 0.8637013576541567, iteration: 338729
loss: 1.0047227144241333,grad_norm: 0.8696818901902512, iteration: 338730
loss: 1.001129150390625,grad_norm: 0.7699084239655049, iteration: 338731
loss: 0.9836409091949463,grad_norm: 0.9228005839808081, iteration: 338732
loss: 0.9971057772636414,grad_norm: 0.8911823291758492, iteration: 338733
loss: 1.0077202320098877,grad_norm: 0.8132460149546915, iteration: 338734
loss: 0.9991102814674377,grad_norm: 0.8827038967272159, iteration: 338735
loss: 1.0075136423110962,grad_norm: 0.6981302448702381, iteration: 338736
loss: 1.017438292503357,grad_norm: 0.9999995274161689, iteration: 338737
loss: 1.0343960523605347,grad_norm: 0.9999999132893669, iteration: 338738
loss: 1.0049806833267212,grad_norm: 0.8924914771040657, iteration: 338739
loss: 0.9813314080238342,grad_norm: 0.8853418850224125, iteration: 338740
loss: 0.9677980542182922,grad_norm: 0.8040273411850982, iteration: 338741
loss: 1.0118169784545898,grad_norm: 0.858900623058189, iteration: 338742
loss: 1.0276894569396973,grad_norm: 0.9719321350285628, iteration: 338743
loss: 0.972978949546814,grad_norm: 0.752926501284813, iteration: 338744
loss: 0.9960166215896606,grad_norm: 0.9767306650785573, iteration: 338745
loss: 1.0946826934814453,grad_norm: 0.9999994386452606, iteration: 338746
loss: 1.024081826210022,grad_norm: 0.978448403736803, iteration: 338747
loss: 1.0317662954330444,grad_norm: 0.8505574977448098, iteration: 338748
loss: 1.0153775215148926,grad_norm: 0.8772071820400841, iteration: 338749
loss: 1.0129501819610596,grad_norm: 0.7687425599265025, iteration: 338750
loss: 0.9783949255943298,grad_norm: 0.7476035121487198, iteration: 338751
loss: 1.0271679162979126,grad_norm: 0.9999989797000864, iteration: 338752
loss: 1.0111651420593262,grad_norm: 0.9999997467345259, iteration: 338753
loss: 1.009721040725708,grad_norm: 0.9240459427477629, iteration: 338754
loss: 1.0764546394348145,grad_norm: 0.9999995484608973, iteration: 338755
loss: 0.9908651113510132,grad_norm: 0.8077246657992176, iteration: 338756
loss: 1.0347727537155151,grad_norm: 0.9999998515316467, iteration: 338757
loss: 0.9511493444442749,grad_norm: 0.9713939211100915, iteration: 338758
loss: 0.9911332726478577,grad_norm: 0.7126286462255755, iteration: 338759
loss: 0.9915750026702881,grad_norm: 0.8302170905022006, iteration: 338760
loss: 1.0226407051086426,grad_norm: 0.8013571171926258, iteration: 338761
loss: 0.9795255064964294,grad_norm: 0.8979261264166977, iteration: 338762
loss: 0.9617525935173035,grad_norm: 0.8744720581447954, iteration: 338763
loss: 1.0336487293243408,grad_norm: 0.8057609594790567, iteration: 338764
loss: 1.0250160694122314,grad_norm: 0.8338060890992822, iteration: 338765
loss: 0.9790534377098083,grad_norm: 0.8356163803401917, iteration: 338766
loss: 1.0137356519699097,grad_norm: 0.9999992006095637, iteration: 338767
loss: 0.9686642289161682,grad_norm: 0.8479630713593629, iteration: 338768
loss: 1.0592986345291138,grad_norm: 0.999999526626041, iteration: 338769
loss: 1.0043326616287231,grad_norm: 0.7763157716100382, iteration: 338770
loss: 1.0108739137649536,grad_norm: 0.7487162006921908, iteration: 338771
loss: 1.003288745880127,grad_norm: 0.7238030265115587, iteration: 338772
loss: 1.000844120979309,grad_norm: 0.8052316622722452, iteration: 338773
loss: 1.0342453718185425,grad_norm: 0.70980212582054, iteration: 338774
loss: 0.9885916113853455,grad_norm: 0.9128011932660516, iteration: 338775
loss: 1.0084497928619385,grad_norm: 0.9999992239984593, iteration: 338776
loss: 0.9863120317459106,grad_norm: 0.9999992278128023, iteration: 338777
loss: 0.9830590486526489,grad_norm: 0.7796707843367358, iteration: 338778
loss: 1.0289247035980225,grad_norm: 0.9596908073447475, iteration: 338779
loss: 0.9941386580467224,grad_norm: 0.9057188951067956, iteration: 338780
loss: 1.0812324285507202,grad_norm: 0.9999999919314653, iteration: 338781
loss: 0.9820488691329956,grad_norm: 0.8007279211382542, iteration: 338782
loss: 1.0258339643478394,grad_norm: 0.9765459787197188, iteration: 338783
loss: 1.006089210510254,grad_norm: 0.9032818319534444, iteration: 338784
loss: 0.9876940250396729,grad_norm: 0.8612501726898345, iteration: 338785
loss: 0.9922757148742676,grad_norm: 0.9204776635299541, iteration: 338786
loss: 0.9882295727729797,grad_norm: 0.9999994887774817, iteration: 338787
loss: 0.9954570531845093,grad_norm: 0.8816710877838446, iteration: 338788
loss: 0.9896998405456543,grad_norm: 0.9999991343836869, iteration: 338789
loss: 0.9927442073822021,grad_norm: 0.7668042276119194, iteration: 338790
loss: 1.0157172679901123,grad_norm: 0.6611940373024633, iteration: 338791
loss: 1.0182255506515503,grad_norm: 0.9999991649454574, iteration: 338792
loss: 0.9904575943946838,grad_norm: 0.9668085920493786, iteration: 338793
loss: 0.9948076009750366,grad_norm: 0.9404523802112085, iteration: 338794
loss: 1.0005743503570557,grad_norm: 0.8595677802495162, iteration: 338795
loss: 1.0172532796859741,grad_norm: 0.9999992827485448, iteration: 338796
loss: 0.9975532293319702,grad_norm: 0.8118543709370669, iteration: 338797
loss: 0.98537278175354,grad_norm: 0.8969260514726699, iteration: 338798
loss: 1.0394313335418701,grad_norm: 0.9999995698892025, iteration: 338799
loss: 1.0455081462860107,grad_norm: 0.6302590936679201, iteration: 338800
loss: 1.0255414247512817,grad_norm: 0.8584898055241227, iteration: 338801
loss: 1.015401005744934,grad_norm: 0.9999991736369961, iteration: 338802
loss: 1.0069665908813477,grad_norm: 0.9872560938772167, iteration: 338803
loss: 1.0059623718261719,grad_norm: 0.8662188210561889, iteration: 338804
loss: 1.0495151281356812,grad_norm: 0.789169569528527, iteration: 338805
loss: 1.0322186946868896,grad_norm: 0.8036739078445366, iteration: 338806
loss: 1.0115872621536255,grad_norm: 0.7321716762261129, iteration: 338807
loss: 1.0088523626327515,grad_norm: 0.7938568164834886, iteration: 338808
loss: 0.9946443438529968,grad_norm: 0.9503984185877288, iteration: 338809
loss: 1.0303550958633423,grad_norm: 0.9960373056904968, iteration: 338810
loss: 1.0310198068618774,grad_norm: 0.999999587391825, iteration: 338811
loss: 1.049881935119629,grad_norm: 0.9999998776303828, iteration: 338812
loss: 0.9918613433837891,grad_norm: 0.8653262814338866, iteration: 338813
loss: 1.0208882093429565,grad_norm: 0.9999992309976135, iteration: 338814
loss: 1.0289901494979858,grad_norm: 0.9462598063512684, iteration: 338815
loss: 1.0119167566299438,grad_norm: 0.91478379827346, iteration: 338816
loss: 1.0089609622955322,grad_norm: 0.7613082738107302, iteration: 338817
loss: 1.00791597366333,grad_norm: 0.9344251724077368, iteration: 338818
loss: 1.025510549545288,grad_norm: 0.8885764045068695, iteration: 338819
loss: 1.0103133916854858,grad_norm: 0.7186014387440636, iteration: 338820
loss: 0.9972381591796875,grad_norm: 0.7710699452190604, iteration: 338821
loss: 1.014945387840271,grad_norm: 0.6898641031953675, iteration: 338822
loss: 0.977252721786499,grad_norm: 0.8208358876134496, iteration: 338823
loss: 0.9778841137886047,grad_norm: 0.7084702037595061, iteration: 338824
loss: 1.0066795349121094,grad_norm: 0.8571580171670488, iteration: 338825
loss: 0.9930973052978516,grad_norm: 0.7258952347322261, iteration: 338826
loss: 1.0094146728515625,grad_norm: 0.952655477979333, iteration: 338827
loss: 1.0334904193878174,grad_norm: 0.7354001289303386, iteration: 338828
loss: 1.0053907632827759,grad_norm: 0.7610940722342618, iteration: 338829
loss: 0.9653818011283875,grad_norm: 0.7805354884232812, iteration: 338830
loss: 1.012382984161377,grad_norm: 0.8836848860513146, iteration: 338831
loss: 1.0326441526412964,grad_norm: 0.9555627265488038, iteration: 338832
loss: 1.0331922769546509,grad_norm: 0.7854773185437542, iteration: 338833
loss: 0.9881360530853271,grad_norm: 0.9999991218536594, iteration: 338834
loss: 0.9703835844993591,grad_norm: 0.8356354222073983, iteration: 338835
loss: 1.0354773998260498,grad_norm: 0.9999990700101885, iteration: 338836
loss: 1.0194454193115234,grad_norm: 0.8440633639972057, iteration: 338837
loss: 1.0222423076629639,grad_norm: 0.8095769147849321, iteration: 338838
loss: 0.992871880531311,grad_norm: 0.8201519728816842, iteration: 338839
loss: 1.019063115119934,grad_norm: 0.8352704458195104, iteration: 338840
loss: 0.9656527042388916,grad_norm: 0.7529329934748212, iteration: 338841
loss: 0.9853225946426392,grad_norm: 0.8598208949905978, iteration: 338842
loss: 0.9928692579269409,grad_norm: 0.8430132403986431, iteration: 338843
loss: 0.9400500655174255,grad_norm: 0.8049499489516339, iteration: 338844
loss: 1.129426121711731,grad_norm: 0.999999171391101, iteration: 338845
loss: 0.9847769737243652,grad_norm: 0.9165411502170976, iteration: 338846
loss: 0.982914924621582,grad_norm: 0.7305960656201809, iteration: 338847
loss: 1.001034140586853,grad_norm: 0.8647766321322093, iteration: 338848
loss: 1.0266146659851074,grad_norm: 0.8823150825413697, iteration: 338849
loss: 0.9902766346931458,grad_norm: 0.9091215269647256, iteration: 338850
loss: 0.9842497706413269,grad_norm: 0.803611266221599, iteration: 338851
loss: 1.0063464641571045,grad_norm: 0.9999991443241492, iteration: 338852
loss: 1.0021339654922485,grad_norm: 0.823610500450756, iteration: 338853
loss: 0.9543718099594116,grad_norm: 0.7627790116793464, iteration: 338854
loss: 0.9886441230773926,grad_norm: 0.8591991858464495, iteration: 338855
loss: 0.9898127317428589,grad_norm: 0.7544813762598189, iteration: 338856
loss: 1.017851710319519,grad_norm: 0.9999992863529851, iteration: 338857
loss: 0.9899470806121826,grad_norm: 0.700108278759764, iteration: 338858
loss: 1.0124483108520508,grad_norm: 0.8883750295152357, iteration: 338859
loss: 0.9585623741149902,grad_norm: 0.7800891056974285, iteration: 338860
loss: 0.9883667826652527,grad_norm: 0.8983719432884726, iteration: 338861
loss: 1.1053144931793213,grad_norm: 0.9999992774880218, iteration: 338862
loss: 0.9903826117515564,grad_norm: 0.7114048453803393, iteration: 338863
loss: 1.022139310836792,grad_norm: 0.7547378087662339, iteration: 338864
loss: 1.0151822566986084,grad_norm: 0.8301109656479019, iteration: 338865
loss: 0.9657083749771118,grad_norm: 0.8247143363430751, iteration: 338866
loss: 0.9880082607269287,grad_norm: 0.8064115796113166, iteration: 338867
loss: 0.9582735300064087,grad_norm: 0.8660761441588007, iteration: 338868
loss: 0.9853905439376831,grad_norm: 0.8552761579534247, iteration: 338869
loss: 0.9898110032081604,grad_norm: 0.7657187094420411, iteration: 338870
loss: 0.9891986846923828,grad_norm: 0.6627401464140551, iteration: 338871
loss: 1.0260262489318848,grad_norm: 0.9942698907284782, iteration: 338872
loss: 0.9981227517127991,grad_norm: 0.7735916058899328, iteration: 338873
loss: 0.9968166947364807,grad_norm: 0.8281380668993533, iteration: 338874
loss: 0.9861993193626404,grad_norm: 0.8708886788998249, iteration: 338875
loss: 1.0281282663345337,grad_norm: 0.9999991846629319, iteration: 338876
loss: 1.0085225105285645,grad_norm: 0.8135001226805566, iteration: 338877
loss: 0.9651168584823608,grad_norm: 0.798719115499273, iteration: 338878
loss: 0.9977587461471558,grad_norm: 0.8831836926821269, iteration: 338879
loss: 1.0779073238372803,grad_norm: 0.9999994378434546, iteration: 338880
loss: 1.0076640844345093,grad_norm: 0.9999992194326287, iteration: 338881
loss: 1.0437681674957275,grad_norm: 0.9999996174444375, iteration: 338882
loss: 1.0212311744689941,grad_norm: 0.8458274064634895, iteration: 338883
loss: 1.0161226987838745,grad_norm: 0.855722513193979, iteration: 338884
loss: 0.9953645467758179,grad_norm: 0.8561611296076963, iteration: 338885
loss: 1.0153377056121826,grad_norm: 0.9968604855463078, iteration: 338886
loss: 1.0193790197372437,grad_norm: 0.8370850847172453, iteration: 338887
loss: 1.0019606351852417,grad_norm: 0.7658066273806844, iteration: 338888
loss: 0.9901957511901855,grad_norm: 0.9021233992719273, iteration: 338889
loss: 1.0261759757995605,grad_norm: 0.9999998022399345, iteration: 338890
loss: 1.000797152519226,grad_norm: 0.8962758381811292, iteration: 338891
loss: 1.0301352739334106,grad_norm: 0.7542465747833038, iteration: 338892
loss: 1.0129964351654053,grad_norm: 0.736923765022259, iteration: 338893
loss: 1.0255422592163086,grad_norm: 0.8160677190726829, iteration: 338894
loss: 1.000579595565796,grad_norm: 0.7957272159339086, iteration: 338895
loss: 1.0041953325271606,grad_norm: 0.9999992395944788, iteration: 338896
loss: 0.9868385195732117,grad_norm: 0.9999997078171505, iteration: 338897
loss: 0.9985786080360413,grad_norm: 0.8756730415735384, iteration: 338898
loss: 0.990053117275238,grad_norm: 0.8544534589269267, iteration: 338899
loss: 1.0211325883865356,grad_norm: 0.8165743788396626, iteration: 338900
loss: 0.9908620119094849,grad_norm: 0.7197013228229636, iteration: 338901
loss: 1.0043377876281738,grad_norm: 0.8035612663850907, iteration: 338902
loss: 1.0158939361572266,grad_norm: 0.9999995590180224, iteration: 338903
loss: 1.0091303586959839,grad_norm: 0.6932939802437904, iteration: 338904
loss: 1.0120220184326172,grad_norm: 0.8204680286646954, iteration: 338905
loss: 0.9766678214073181,grad_norm: 0.8742299947545472, iteration: 338906
loss: 0.9901638627052307,grad_norm: 0.9848821696161004, iteration: 338907
loss: 1.0042928457260132,grad_norm: 0.9396485325054416, iteration: 338908
loss: 1.0307644605636597,grad_norm: 0.8365028070387082, iteration: 338909
loss: 0.9767894148826599,grad_norm: 0.8382291727442688, iteration: 338910
loss: 1.0084213018417358,grad_norm: 0.8108999209995607, iteration: 338911
loss: 1.03853440284729,grad_norm: 0.8122888747025698, iteration: 338912
loss: 1.0661755800247192,grad_norm: 0.9999997507265963, iteration: 338913
loss: 0.9995571374893188,grad_norm: 0.8055285555673057, iteration: 338914
loss: 1.0175198316574097,grad_norm: 0.8771137803507245, iteration: 338915
loss: 0.9933136701583862,grad_norm: 0.791288491742107, iteration: 338916
loss: 1.0368304252624512,grad_norm: 0.8070890489998044, iteration: 338917
loss: 1.0472097396850586,grad_norm: 0.9999992143495082, iteration: 338918
loss: 1.047371745109558,grad_norm: 0.9999999712692174, iteration: 338919
loss: 0.9869093298912048,grad_norm: 0.9234467160426557, iteration: 338920
loss: 1.0128227472305298,grad_norm: 0.8520811391161143, iteration: 338921
loss: 1.008195400238037,grad_norm: 0.7312846881146239, iteration: 338922
loss: 0.9908621311187744,grad_norm: 0.7889020911200073, iteration: 338923
loss: 1.0112484693527222,grad_norm: 0.8386602786484516, iteration: 338924
loss: 1.0574567317962646,grad_norm: 0.9999991319221798, iteration: 338925
loss: 0.9944589138031006,grad_norm: 0.8476749352992821, iteration: 338926
loss: 0.9806753993034363,grad_norm: 0.721516692238728, iteration: 338927
loss: 0.9805748462677002,grad_norm: 0.9850969918747355, iteration: 338928
loss: 0.9788705110549927,grad_norm: 0.9082837829892485, iteration: 338929
loss: 0.9950090646743774,grad_norm: 0.8468123121421709, iteration: 338930
loss: 1.0247961282730103,grad_norm: 0.8122772328428971, iteration: 338931
loss: 1.0056259632110596,grad_norm: 0.7490246396713662, iteration: 338932
loss: 0.9914214611053467,grad_norm: 0.7884935795233948, iteration: 338933
loss: 0.9835876226425171,grad_norm: 0.9999995884005617, iteration: 338934
loss: 1.0137745141983032,grad_norm: 0.9999992571920724, iteration: 338935
loss: 0.9839431643486023,grad_norm: 0.8332744604752912, iteration: 338936
loss: 1.0130082368850708,grad_norm: 0.7799736180128087, iteration: 338937
loss: 1.0509657859802246,grad_norm: 0.9769759635673635, iteration: 338938
loss: 1.0371861457824707,grad_norm: 0.9815073071425777, iteration: 338939
loss: 0.9431822299957275,grad_norm: 0.9999997433479659, iteration: 338940
loss: 1.0077732801437378,grad_norm: 0.9038128315303203, iteration: 338941
loss: 1.0033084154129028,grad_norm: 0.9999997051126509, iteration: 338942
loss: 0.989492654800415,grad_norm: 0.9166958647553701, iteration: 338943
loss: 1.032074213027954,grad_norm: 0.7937195242067764, iteration: 338944
loss: 1.0457515716552734,grad_norm: 0.9747188155368297, iteration: 338945
loss: 0.9892174601554871,grad_norm: 0.9807848644000681, iteration: 338946
loss: 1.0009520053863525,grad_norm: 0.7742779047892804, iteration: 338947
loss: 0.975593626499176,grad_norm: 0.7921922441930286, iteration: 338948
loss: 0.9725930690765381,grad_norm: 0.857235236592943, iteration: 338949
loss: 0.9916625618934631,grad_norm: 0.9999992796911046, iteration: 338950
loss: 0.9795222282409668,grad_norm: 0.8716846571669472, iteration: 338951
loss: 1.0397512912750244,grad_norm: 0.7470456149185876, iteration: 338952
loss: 0.9947348237037659,grad_norm: 0.7490630971472885, iteration: 338953
loss: 0.972747802734375,grad_norm: 0.8249442754379382, iteration: 338954
loss: 0.9874578714370728,grad_norm: 0.9999999240773316, iteration: 338955
loss: 1.032233476638794,grad_norm: 0.9001491781094046, iteration: 338956
loss: 1.0071333646774292,grad_norm: 0.7442395681500675, iteration: 338957
loss: 1.0254462957382202,grad_norm: 0.7588671534657168, iteration: 338958
loss: 0.9840130805969238,grad_norm: 0.9137050078476823, iteration: 338959
loss: 1.0313924551010132,grad_norm: 0.8823199744773035, iteration: 338960
loss: 0.9901502728462219,grad_norm: 0.7079542075719715, iteration: 338961
loss: 0.9936552047729492,grad_norm: 0.7304961880215518, iteration: 338962
loss: 0.9755417704582214,grad_norm: 0.8548134834350702, iteration: 338963
loss: 1.0445953607559204,grad_norm: 0.9999995117413716, iteration: 338964
loss: 1.0083590745925903,grad_norm: 0.7675143947386481, iteration: 338965
loss: 0.993432879447937,grad_norm: 0.9835255340982638, iteration: 338966
loss: 0.9994521141052246,grad_norm: 0.9370754521796066, iteration: 338967
loss: 1.0118975639343262,grad_norm: 0.7764286713755403, iteration: 338968
loss: 1.0175843238830566,grad_norm: 0.9999996329813738, iteration: 338969
loss: 0.9880539774894714,grad_norm: 0.7115506048119894, iteration: 338970
loss: 1.070401668548584,grad_norm: 0.9999998868912391, iteration: 338971
loss: 1.032878041267395,grad_norm: 0.8816281991934819, iteration: 338972
loss: 1.0222907066345215,grad_norm: 0.7577763902377419, iteration: 338973
loss: 0.9985486268997192,grad_norm: 0.923282255184207, iteration: 338974
loss: 1.0087358951568604,grad_norm: 0.9312440493469051, iteration: 338975
loss: 1.0076947212219238,grad_norm: 0.999999230098757, iteration: 338976
loss: 1.0193769931793213,grad_norm: 0.7818027470549342, iteration: 338977
loss: 1.0042657852172852,grad_norm: 0.7905711026975023, iteration: 338978
loss: 1.0029199123382568,grad_norm: 0.9999990792136394, iteration: 338979
loss: 0.9964300990104675,grad_norm: 0.9001864715374294, iteration: 338980
loss: 0.9937557578086853,grad_norm: 0.7879447037776821, iteration: 338981
loss: 0.9885566234588623,grad_norm: 0.9999991050600738, iteration: 338982
loss: 0.9914940595626831,grad_norm: 0.8709029156924321, iteration: 338983
loss: 0.9605703949928284,grad_norm: 0.7238582108541177, iteration: 338984
loss: 0.9798316359519958,grad_norm: 0.77112433775764, iteration: 338985
loss: 0.9795175194740295,grad_norm: 0.9832347401319421, iteration: 338986
loss: 1.06673002243042,grad_norm: 0.9999997165090062, iteration: 338987
loss: 0.9450060725212097,grad_norm: 0.9999991403288688, iteration: 338988
loss: 1.0048002004623413,grad_norm: 0.8753800094598795, iteration: 338989
loss: 1.0501224994659424,grad_norm: 0.7939683093418908, iteration: 338990
loss: 1.1602290868759155,grad_norm: 0.9999990255887345, iteration: 338991
loss: 1.0277230739593506,grad_norm: 0.7788208374530747, iteration: 338992
loss: 0.9857851266860962,grad_norm: 0.6543691995011877, iteration: 338993
loss: 1.0016659498214722,grad_norm: 0.7092690156853672, iteration: 338994
loss: 1.0200637578964233,grad_norm: 0.8393437283962694, iteration: 338995
loss: 1.0394021272659302,grad_norm: 0.9999997996944118, iteration: 338996
loss: 1.0146061182022095,grad_norm: 0.9999991120812356, iteration: 338997
loss: 1.0114538669586182,grad_norm: 0.7454294975210419, iteration: 338998
loss: 1.0435069799423218,grad_norm: 0.9999996269226212, iteration: 338999
loss: 1.0107223987579346,grad_norm: 0.8917233090312177, iteration: 339000
loss: 0.9803198575973511,grad_norm: 0.8682218443677281, iteration: 339001
loss: 1.1068847179412842,grad_norm: 0.9999997756066792, iteration: 339002
loss: 0.9798147082328796,grad_norm: 0.8193611454503217, iteration: 339003
loss: 0.9773991703987122,grad_norm: 0.9999991416739178, iteration: 339004
loss: 1.0132055282592773,grad_norm: 0.9145509711765298, iteration: 339005
loss: 1.007298469543457,grad_norm: 0.6982783374179885, iteration: 339006
loss: 0.9632606506347656,grad_norm: 0.7799403399148082, iteration: 339007
loss: 1.0162023305892944,grad_norm: 0.9999993270713161, iteration: 339008
loss: 1.0117734670639038,grad_norm: 0.9999990908996299, iteration: 339009
loss: 0.9935170412063599,grad_norm: 0.9999991696309647, iteration: 339010
loss: 1.0010411739349365,grad_norm: 0.8403691440798229, iteration: 339011
loss: 0.994288980960846,grad_norm: 0.9009280348661629, iteration: 339012
loss: 1.0260084867477417,grad_norm: 0.9120722653642978, iteration: 339013
loss: 1.0000848770141602,grad_norm: 0.7189120048672014, iteration: 339014
loss: 0.9734845161437988,grad_norm: 0.9925717947575231, iteration: 339015
loss: 0.9819550514221191,grad_norm: 0.836895807471447, iteration: 339016
loss: 1.0031436681747437,grad_norm: 0.6628669221112233, iteration: 339017
loss: 1.0180399417877197,grad_norm: 0.7857551199662414, iteration: 339018
loss: 0.9829424619674683,grad_norm: 0.820082487848363, iteration: 339019
loss: 1.0141780376434326,grad_norm: 0.8388246325112909, iteration: 339020
loss: 0.9965916872024536,grad_norm: 0.7598472174021458, iteration: 339021
loss: 1.0418400764465332,grad_norm: 0.9137188382446565, iteration: 339022
loss: 1.0241105556488037,grad_norm: 0.837385043288815, iteration: 339023
loss: 0.9987797737121582,grad_norm: 0.7375162649635445, iteration: 339024
loss: 0.9848407506942749,grad_norm: 0.7908019080234292, iteration: 339025
loss: 1.000003695487976,grad_norm: 0.8495329497866166, iteration: 339026
loss: 1.0106037855148315,grad_norm: 0.890876990478481, iteration: 339027
loss: 1.0597399473190308,grad_norm: 0.7942555279246517, iteration: 339028
loss: 1.0053523778915405,grad_norm: 0.7480137806829664, iteration: 339029
loss: 0.9960310459136963,grad_norm: 0.8349653238919221, iteration: 339030
loss: 1.0013989210128784,grad_norm: 0.9999990625734092, iteration: 339031
loss: 0.9990188479423523,grad_norm: 0.9999990247255808, iteration: 339032
loss: 1.022525429725647,grad_norm: 0.8169069699684053, iteration: 339033
loss: 0.9751021265983582,grad_norm: 0.6970701112670682, iteration: 339034
loss: 0.988317608833313,grad_norm: 0.8724152943434693, iteration: 339035
loss: 1.0011652708053589,grad_norm: 0.9128083872585138, iteration: 339036
loss: 0.995399534702301,grad_norm: 0.7685242017031167, iteration: 339037
loss: 1.0124263763427734,grad_norm: 0.7898618838706253, iteration: 339038
loss: 1.0666604042053223,grad_norm: 0.9999992564327441, iteration: 339039
loss: 0.9860358834266663,grad_norm: 0.7480152830670812, iteration: 339040
loss: 0.988439679145813,grad_norm: 0.8252764885179262, iteration: 339041
loss: 1.0059630870819092,grad_norm: 0.8652554073257949, iteration: 339042
loss: 1.0293749570846558,grad_norm: 0.9999994147380085, iteration: 339043
loss: 0.9885880351066589,grad_norm: 0.8632670987885537, iteration: 339044
loss: 1.001236081123352,grad_norm: 0.7933392871933419, iteration: 339045
loss: 1.010223150253296,grad_norm: 0.7230308940269914, iteration: 339046
loss: 1.0272098779678345,grad_norm: 0.9999997357182838, iteration: 339047
loss: 0.978020429611206,grad_norm: 0.9019904652323434, iteration: 339048
loss: 1.0049948692321777,grad_norm: 0.9999991805164784, iteration: 339049
loss: 1.0017542839050293,grad_norm: 0.8740908845593977, iteration: 339050
loss: 0.977878749370575,grad_norm: 0.8373796077763082, iteration: 339051
loss: 0.965559184551239,grad_norm: 0.7873089165823073, iteration: 339052
loss: 0.9703705906867981,grad_norm: 0.8949824071368817, iteration: 339053
loss: 0.9977302551269531,grad_norm: 0.8321475018754185, iteration: 339054
loss: 1.035253882408142,grad_norm: 0.8231848414711038, iteration: 339055
loss: 1.0116424560546875,grad_norm: 0.8304060031279646, iteration: 339056
loss: 1.0133262872695923,grad_norm: 0.8820661262585884, iteration: 339057
loss: 0.9673153162002563,grad_norm: 0.8354077025673959, iteration: 339058
loss: 0.9887436628341675,grad_norm: 0.9024880102159336, iteration: 339059
loss: 0.9793687462806702,grad_norm: 0.7313876797316949, iteration: 339060
loss: 1.0135396718978882,grad_norm: 0.7345658476281969, iteration: 339061
loss: 0.9887734651565552,grad_norm: 0.8491279039521133, iteration: 339062
loss: 0.9709230661392212,grad_norm: 0.8340939787605292, iteration: 339063
loss: 0.9956472516059875,grad_norm: 0.6848840114585125, iteration: 339064
loss: 0.988123893737793,grad_norm: 0.7241916512268647, iteration: 339065
loss: 1.0118350982666016,grad_norm: 0.860086520345964, iteration: 339066
loss: 1.0043604373931885,grad_norm: 0.8120186206991581, iteration: 339067
loss: 0.9711143374443054,grad_norm: 0.7673153276937402, iteration: 339068
loss: 1.0030018091201782,grad_norm: 0.9552678590194746, iteration: 339069
loss: 1.0011250972747803,grad_norm: 0.9228913241717764, iteration: 339070
loss: 1.0262959003448486,grad_norm: 0.7074741217346694, iteration: 339071
loss: 1.0180317163467407,grad_norm: 0.7505454610504546, iteration: 339072
loss: 0.962412416934967,grad_norm: 0.7134123028341169, iteration: 339073
loss: 0.9931927919387817,grad_norm: 0.8867216504826276, iteration: 339074
loss: 0.9646782875061035,grad_norm: 0.9054915300141974, iteration: 339075
loss: 0.9844681620597839,grad_norm: 0.8322324148147262, iteration: 339076
loss: 0.9890350103378296,grad_norm: 0.8284689301980595, iteration: 339077
loss: 0.9569129943847656,grad_norm: 0.88519901526356, iteration: 339078
loss: 0.9925847053527832,grad_norm: 0.9999991227700933, iteration: 339079
loss: 0.990958571434021,grad_norm: 0.823690620740259, iteration: 339080
loss: 1.0226271152496338,grad_norm: 0.8902086321018614, iteration: 339081
loss: 0.9865502119064331,grad_norm: 0.809613953155628, iteration: 339082
loss: 0.9572582840919495,grad_norm: 0.8615176801254578, iteration: 339083
loss: 0.9714545607566833,grad_norm: 0.8104725506586188, iteration: 339084
loss: 0.959841787815094,grad_norm: 0.7759604902381653, iteration: 339085
loss: 1.0267455577850342,grad_norm: 0.7374640989531143, iteration: 339086
loss: 1.0474926233291626,grad_norm: 0.8748040533894857, iteration: 339087
loss: 0.9786989092826843,grad_norm: 0.9043150966273784, iteration: 339088
loss: 1.0000255107879639,grad_norm: 0.7905724481798807, iteration: 339089
loss: 0.9835861921310425,grad_norm: 0.9999991019580927, iteration: 339090
loss: 1.0084995031356812,grad_norm: 0.8150052085521317, iteration: 339091
loss: 0.9792137145996094,grad_norm: 0.9999991840057614, iteration: 339092
loss: 1.1133644580841064,grad_norm: 0.9999997446981057, iteration: 339093
loss: 1.0025854110717773,grad_norm: 0.9999992535811482, iteration: 339094
loss: 0.9798969030380249,grad_norm: 0.9999990130149475, iteration: 339095
loss: 1.0022238492965698,grad_norm: 0.7895589051660391, iteration: 339096
loss: 0.980318009853363,grad_norm: 0.7887490356687138, iteration: 339097
loss: 0.9524301290512085,grad_norm: 0.9292419832197761, iteration: 339098
loss: 1.018136739730835,grad_norm: 0.8369195914604948, iteration: 339099
loss: 1.0025153160095215,grad_norm: 0.9987884646603501, iteration: 339100
loss: 1.024856686592102,grad_norm: 0.88410815566725, iteration: 339101
loss: 0.997107207775116,grad_norm: 0.7134589042923589, iteration: 339102
loss: 0.9747114181518555,grad_norm: 0.8373103230422834, iteration: 339103
loss: 1.0212697982788086,grad_norm: 0.92492908516825, iteration: 339104
loss: 1.0288947820663452,grad_norm: 0.6417689333694099, iteration: 339105
loss: 1.0515156984329224,grad_norm: 0.9999992177412127, iteration: 339106
loss: 1.0216180086135864,grad_norm: 0.8126845242497048, iteration: 339107
loss: 0.9510697722434998,grad_norm: 0.795663989389936, iteration: 339108
loss: 0.980025053024292,grad_norm: 0.9257729585258265, iteration: 339109
loss: 1.010847806930542,grad_norm: 0.7552832666272868, iteration: 339110
loss: 1.016631007194519,grad_norm: 0.795309975309507, iteration: 339111
loss: 0.9863606691360474,grad_norm: 0.7269793603336374, iteration: 339112
loss: 1.0147442817687988,grad_norm: 0.8279423033687865, iteration: 339113
loss: 1.011179804801941,grad_norm: 0.7643988014749667, iteration: 339114
loss: 0.985939621925354,grad_norm: 0.7767250300862629, iteration: 339115
loss: 1.006422758102417,grad_norm: 0.7058563685578928, iteration: 339116
loss: 1.0087170600891113,grad_norm: 0.9999991303002611, iteration: 339117
loss: 0.998841404914856,grad_norm: 0.7169344444511835, iteration: 339118
loss: 1.0042434930801392,grad_norm: 0.9564810578295192, iteration: 339119
loss: 0.9989253878593445,grad_norm: 0.8423086188905687, iteration: 339120
loss: 0.97637939453125,grad_norm: 0.9999989967113386, iteration: 339121
loss: 1.0101209878921509,grad_norm: 0.9999990123996076, iteration: 339122
loss: 1.0319172143936157,grad_norm: 0.9999995511947118, iteration: 339123
loss: 0.9959819316864014,grad_norm: 0.723737020284173, iteration: 339124
loss: 0.9939864873886108,grad_norm: 0.9999991692736697, iteration: 339125
loss: 1.0072418451309204,grad_norm: 0.8048119654142233, iteration: 339126
loss: 1.0208431482315063,grad_norm: 0.8185184243380795, iteration: 339127
loss: 0.9813739657402039,grad_norm: 0.843656952979151, iteration: 339128
loss: 0.9699753522872925,grad_norm: 0.8237303233132354, iteration: 339129
loss: 1.0188335180282593,grad_norm: 0.7784956169598024, iteration: 339130
loss: 1.0220519304275513,grad_norm: 0.9999990351459174, iteration: 339131
loss: 1.0243858098983765,grad_norm: 0.8077278972872413, iteration: 339132
loss: 0.9978957772254944,grad_norm: 0.8719847651212134, iteration: 339133
loss: 0.9918996095657349,grad_norm: 0.817811441552231, iteration: 339134
loss: 0.9713542461395264,grad_norm: 0.7397136412879123, iteration: 339135
loss: 1.0222630500793457,grad_norm: 0.7005970934098495, iteration: 339136
loss: 1.0057626962661743,grad_norm: 0.7415876197793808, iteration: 339137
loss: 1.0430881977081299,grad_norm: 0.9846720082985874, iteration: 339138
loss: 1.019789695739746,grad_norm: 0.9075460458330278, iteration: 339139
loss: 0.9993353486061096,grad_norm: 0.7572321027545242, iteration: 339140
loss: 0.9883413910865784,grad_norm: 0.8304020125618027, iteration: 339141
loss: 1.0568345785140991,grad_norm: 0.9958736322936561, iteration: 339142
loss: 1.0048904418945312,grad_norm: 0.8773642789723105, iteration: 339143
loss: 1.0015913248062134,grad_norm: 0.751704875979536, iteration: 339144
loss: 0.984646201133728,grad_norm: 0.925123282985645, iteration: 339145
loss: 0.9848366379737854,grad_norm: 0.8086525450300078, iteration: 339146
loss: 0.9914068579673767,grad_norm: 0.9537543267480694, iteration: 339147
loss: 0.9897775053977966,grad_norm: 0.7884918991594848, iteration: 339148
loss: 1.0060667991638184,grad_norm: 0.7683545690515328, iteration: 339149
loss: 1.041159987449646,grad_norm: 0.8787399368470468, iteration: 339150
loss: 0.9723952412605286,grad_norm: 0.8819434183613318, iteration: 339151
loss: 1.0370573997497559,grad_norm: 0.8255672657594494, iteration: 339152
loss: 1.0211131572723389,grad_norm: 0.8659624026992219, iteration: 339153
loss: 0.9976953864097595,grad_norm: 0.9196925627773449, iteration: 339154
loss: 0.9919570088386536,grad_norm: 0.8121826403087246, iteration: 339155
loss: 1.0087854862213135,grad_norm: 0.8619388903370673, iteration: 339156
loss: 1.0193232297897339,grad_norm: 0.7592141004758559, iteration: 339157
loss: 1.001650333404541,grad_norm: 0.9066360271069224, iteration: 339158
loss: 1.019002079963684,grad_norm: 0.7528555805868024, iteration: 339159
loss: 0.9743077754974365,grad_norm: 0.8809684924018739, iteration: 339160
loss: 0.9765067100524902,grad_norm: 0.7994524305589592, iteration: 339161
loss: 0.9931147694587708,grad_norm: 0.9180341395930708, iteration: 339162
loss: 1.0490802526474,grad_norm: 0.6702492502485771, iteration: 339163
loss: 0.9815388321876526,grad_norm: 0.8763313991711141, iteration: 339164
loss: 0.9888964891433716,grad_norm: 0.6781824129084763, iteration: 339165
loss: 0.9583624005317688,grad_norm: 0.694956348502807, iteration: 339166
loss: 1.002152442932129,grad_norm: 0.9273438694585617, iteration: 339167
loss: 0.9804590940475464,grad_norm: 0.851672390048778, iteration: 339168
loss: 1.012281894683838,grad_norm: 0.9999991324973341, iteration: 339169
loss: 0.9946296811103821,grad_norm: 0.8025925106604627, iteration: 339170
loss: 0.9732146263122559,grad_norm: 0.9999990426997989, iteration: 339171
loss: 0.9933463335037231,grad_norm: 0.9999991286142961, iteration: 339172
loss: 0.9963710308074951,grad_norm: 0.7159056965736681, iteration: 339173
loss: 1.0351580381393433,grad_norm: 0.8879151374030372, iteration: 339174
loss: 1.014551043510437,grad_norm: 0.9999991641794528, iteration: 339175
loss: 0.9651454091072083,grad_norm: 0.7239591597341151, iteration: 339176
loss: 0.9890028238296509,grad_norm: 0.7110176842848146, iteration: 339177
loss: 1.0435645580291748,grad_norm: 0.9999996082262934, iteration: 339178
loss: 1.0412085056304932,grad_norm: 0.9999996896814713, iteration: 339179
loss: 1.0190176963806152,grad_norm: 0.8897768661800051, iteration: 339180
loss: 0.9592946767807007,grad_norm: 0.8428551028312667, iteration: 339181
loss: 0.9801743626594543,grad_norm: 0.6975561334720267, iteration: 339182
loss: 1.0197123289108276,grad_norm: 0.839552174709955, iteration: 339183
loss: 1.0011733770370483,grad_norm: 0.6791511583616192, iteration: 339184
loss: 1.051253318786621,grad_norm: 0.9034913193219415, iteration: 339185
loss: 1.0064548254013062,grad_norm: 0.741210382794153, iteration: 339186
loss: 1.0184897184371948,grad_norm: 0.9472028173263156, iteration: 339187
loss: 0.96126788854599,grad_norm: 0.7048459131066297, iteration: 339188
loss: 0.9725918173789978,grad_norm: 0.6690897329655138, iteration: 339189
loss: 1.0052410364151,grad_norm: 0.6955388880000406, iteration: 339190
loss: 0.9868082404136658,grad_norm: 0.8355271572351519, iteration: 339191
loss: 0.9771199822425842,grad_norm: 0.9159675524026795, iteration: 339192
loss: 0.985606849193573,grad_norm: 0.9999989744128135, iteration: 339193
loss: 1.0070496797561646,grad_norm: 0.7889755595837942, iteration: 339194
loss: 0.9896233677864075,grad_norm: 0.9601311470588358, iteration: 339195
loss: 0.9911586046218872,grad_norm: 0.9005301477721341, iteration: 339196
loss: 1.0418360233306885,grad_norm: 0.7352964713895479, iteration: 339197
loss: 0.9986387491226196,grad_norm: 0.904505736039228, iteration: 339198
loss: 1.0106960535049438,grad_norm: 0.8926321032558387, iteration: 339199
loss: 1.0212130546569824,grad_norm: 0.9550489820687602, iteration: 339200
loss: 0.9836482405662537,grad_norm: 0.9192990994845334, iteration: 339201
loss: 1.0169512033462524,grad_norm: 0.6555547168912, iteration: 339202
loss: 1.0332636833190918,grad_norm: 0.999999167955272, iteration: 339203
loss: 1.0043025016784668,grad_norm: 0.805098831782266, iteration: 339204
loss: 0.9950591921806335,grad_norm: 0.8773236346377842, iteration: 339205
loss: 1.0647237300872803,grad_norm: 0.8174642341997821, iteration: 339206
loss: 1.004814863204956,grad_norm: 0.7779672403613526, iteration: 339207
loss: 1.0544648170471191,grad_norm: 0.9999995819824402, iteration: 339208
loss: 0.9919294118881226,grad_norm: 0.7203408784894557, iteration: 339209
loss: 0.9996871948242188,grad_norm: 0.9616134766720033, iteration: 339210
loss: 0.9862037301063538,grad_norm: 0.8163118148904295, iteration: 339211
loss: 1.0271049737930298,grad_norm: 0.9999991966354653, iteration: 339212
loss: 0.961155891418457,grad_norm: 0.8142171997326877, iteration: 339213
loss: 0.9822582006454468,grad_norm: 0.8284115709808607, iteration: 339214
loss: 1.0082658529281616,grad_norm: 0.9515295396207186, iteration: 339215
loss: 0.9871440529823303,grad_norm: 0.8060685150035805, iteration: 339216
loss: 1.0136330127716064,grad_norm: 0.7718832235625295, iteration: 339217
loss: 0.9966261386871338,grad_norm: 0.6661836595669501, iteration: 339218
loss: 1.0014543533325195,grad_norm: 0.9517239524004272, iteration: 339219
loss: 0.9952720999717712,grad_norm: 0.6744407616801179, iteration: 339220
loss: 0.9873170256614685,grad_norm: 0.8677584134592309, iteration: 339221
loss: 1.0222625732421875,grad_norm: 0.8246439283501206, iteration: 339222
loss: 0.9423180818557739,grad_norm: 0.7749489664442366, iteration: 339223
loss: 0.9767135977745056,grad_norm: 0.9220195933136355, iteration: 339224
loss: 1.0109610557556152,grad_norm: 0.8278777165841279, iteration: 339225
loss: 0.9757885932922363,grad_norm: 0.6694925243597208, iteration: 339226
loss: 0.9932184219360352,grad_norm: 0.8165572546315832, iteration: 339227
loss: 0.9952676296234131,grad_norm: 0.8688618543370891, iteration: 339228
loss: 0.9774693250656128,grad_norm: 0.8382088876918639, iteration: 339229
loss: 1.0051074028015137,grad_norm: 0.769462542577851, iteration: 339230
loss: 0.9900302886962891,grad_norm: 0.9999991933493243, iteration: 339231
loss: 1.0103285312652588,grad_norm: 0.9194693969813639, iteration: 339232
loss: 1.0199220180511475,grad_norm: 0.887194166708555, iteration: 339233
loss: 1.0430899858474731,grad_norm: 0.9999997754252796, iteration: 339234
loss: 0.9873649477958679,grad_norm: 0.9157461925482048, iteration: 339235
loss: 1.0163713693618774,grad_norm: 0.9892861829889443, iteration: 339236
loss: 1.023741364479065,grad_norm: 0.8478485879725101, iteration: 339237
loss: 0.9886152148246765,grad_norm: 0.7583935176351929, iteration: 339238
loss: 1.0298638343811035,grad_norm: 0.6918686969174153, iteration: 339239
loss: 0.9969722032546997,grad_norm: 0.8822165413525823, iteration: 339240
loss: 1.0552035570144653,grad_norm: 0.9999999414913623, iteration: 339241
loss: 0.9974127411842346,grad_norm: 0.9079206495306884, iteration: 339242
loss: 0.974627673625946,grad_norm: 0.939823804285546, iteration: 339243
loss: 1.004486322402954,grad_norm: 0.7873087189232207, iteration: 339244
loss: 0.9936762452125549,grad_norm: 0.8553008076276811, iteration: 339245
loss: 0.985752522945404,grad_norm: 0.7203713690557926, iteration: 339246
loss: 1.0171869993209839,grad_norm: 0.7881853339547586, iteration: 339247
loss: 0.9456726312637329,grad_norm: 0.7379232558108896, iteration: 339248
loss: 1.0351135730743408,grad_norm: 0.9999994069492957, iteration: 339249
loss: 0.9698660969734192,grad_norm: 0.8443157007622917, iteration: 339250
loss: 0.9828481078147888,grad_norm: 0.9999989557698741, iteration: 339251
loss: 0.9759113192558289,grad_norm: 0.7801283720700605, iteration: 339252
loss: 0.995293140411377,grad_norm: 0.766457324576031, iteration: 339253
loss: 1.0538041591644287,grad_norm: 0.8260260479806434, iteration: 339254
loss: 1.002731442451477,grad_norm: 0.9999990813648246, iteration: 339255
loss: 1.0395233631134033,grad_norm: 0.8145882388777969, iteration: 339256
loss: 1.0114940404891968,grad_norm: 0.9999990550907404, iteration: 339257
loss: 1.012807011604309,grad_norm: 0.844802451454881, iteration: 339258
loss: 0.9834489226341248,grad_norm: 0.9999990487702412, iteration: 339259
loss: 0.9915882349014282,grad_norm: 0.7899585865455616, iteration: 339260
loss: 1.0172404050827026,grad_norm: 0.8421712044321189, iteration: 339261
loss: 1.015981674194336,grad_norm: 0.9417377225990468, iteration: 339262
loss: 0.973580002784729,grad_norm: 0.7994659142093039, iteration: 339263
loss: 0.995622456073761,grad_norm: 0.6387058850195099, iteration: 339264
loss: 0.9752939939498901,grad_norm: 0.6995411401093329, iteration: 339265
loss: 0.9985003471374512,grad_norm: 0.9094743585929616, iteration: 339266
loss: 0.9642215371131897,grad_norm: 0.9999990795944328, iteration: 339267
loss: 0.9845600128173828,grad_norm: 0.7830570906982468, iteration: 339268
loss: 1.017433524131775,grad_norm: 0.7226364926423304, iteration: 339269
loss: 0.9954819679260254,grad_norm: 0.7907405159204018, iteration: 339270
loss: 1.0020328760147095,grad_norm: 0.7579005527606577, iteration: 339271
loss: 1.0113722085952759,grad_norm: 0.9050038123322742, iteration: 339272
loss: 0.993182897567749,grad_norm: 0.9846650522184982, iteration: 339273
loss: 1.0239388942718506,grad_norm: 0.8536309421663876, iteration: 339274
loss: 0.9922475814819336,grad_norm: 0.7923662506975301, iteration: 339275
loss: 1.005409598350525,grad_norm: 0.9191195904882202, iteration: 339276
loss: 1.0441820621490479,grad_norm: 0.8825901420640597, iteration: 339277
loss: 1.0135411024093628,grad_norm: 0.8137813915993627, iteration: 339278
loss: 1.034691333770752,grad_norm: 0.6430111702430243, iteration: 339279
loss: 0.9580544829368591,grad_norm: 0.7358110382861073, iteration: 339280
loss: 1.036839246749878,grad_norm: 0.8476048290257133, iteration: 339281
loss: 0.9937349557876587,grad_norm: 0.720891478841557, iteration: 339282
loss: 0.9784780144691467,grad_norm: 0.7928737113984019, iteration: 339283
loss: 1.0122569799423218,grad_norm: 0.9542991770156698, iteration: 339284
loss: 1.0009639263153076,grad_norm: 0.804570187598413, iteration: 339285
loss: 1.022117018699646,grad_norm: 0.7428469370305464, iteration: 339286
loss: 1.0178072452545166,grad_norm: 0.949525328628916, iteration: 339287
loss: 0.9990127682685852,grad_norm: 0.8316482105891301, iteration: 339288
loss: 1.0147669315338135,grad_norm: 0.8141583998300267, iteration: 339289
loss: 0.9656042456626892,grad_norm: 0.8717106126332427, iteration: 339290
loss: 1.0341418981552124,grad_norm: 0.9999990263306591, iteration: 339291
loss: 0.9887964725494385,grad_norm: 0.6785564386597582, iteration: 339292
loss: 1.0059171915054321,grad_norm: 0.8158574178669808, iteration: 339293
loss: 1.0124008655548096,grad_norm: 0.7443343216798252, iteration: 339294
loss: 0.9627876877784729,grad_norm: 0.9736937638857046, iteration: 339295
loss: 1.0043946504592896,grad_norm: 0.8685404109066546, iteration: 339296
loss: 1.0049209594726562,grad_norm: 0.762541908338241, iteration: 339297
loss: 0.9933509230613708,grad_norm: 0.8375701071051863, iteration: 339298
loss: 0.983150064945221,grad_norm: 0.8037663725510847, iteration: 339299
loss: 1.0244189500808716,grad_norm: 0.8491308257119893, iteration: 339300
loss: 1.0192747116088867,grad_norm: 0.798211915879868, iteration: 339301
loss: 0.999309778213501,grad_norm: 0.9813716895925245, iteration: 339302
loss: 1.0089313983917236,grad_norm: 0.9999991557076329, iteration: 339303
loss: 1.0080246925354004,grad_norm: 0.8068603354071925, iteration: 339304
loss: 0.9943246841430664,grad_norm: 0.8537485073390586, iteration: 339305
loss: 0.9944949150085449,grad_norm: 0.8240700214820412, iteration: 339306
loss: 1.0146223306655884,grad_norm: 0.7558688651104144, iteration: 339307
loss: 0.9616726636886597,grad_norm: 0.8704072681752587, iteration: 339308
loss: 0.9619737863540649,grad_norm: 0.8571750802738574, iteration: 339309
loss: 0.9746488332748413,grad_norm: 0.7724213370481477, iteration: 339310
loss: 0.996455192565918,grad_norm: 0.8648552457254189, iteration: 339311
loss: 1.0085982084274292,grad_norm: 0.7750529962166403, iteration: 339312
loss: 0.9746743440628052,grad_norm: 0.7682423135188057, iteration: 339313
loss: 1.0002585649490356,grad_norm: 0.993689499848116, iteration: 339314
loss: 0.9981602430343628,grad_norm: 0.9592131607617495, iteration: 339315
loss: 0.9784209132194519,grad_norm: 0.7309037421761111, iteration: 339316
loss: 0.9861975908279419,grad_norm: 0.8733347808525677, iteration: 339317
loss: 1.010646939277649,grad_norm: 0.8009970910183183, iteration: 339318
loss: 0.9840506315231323,grad_norm: 0.7810300942006423, iteration: 339319
loss: 1.027927279472351,grad_norm: 0.7814143197369379, iteration: 339320
loss: 0.9677915573120117,grad_norm: 0.9183364766962697, iteration: 339321
loss: 0.9936506152153015,grad_norm: 0.9245748018443674, iteration: 339322
loss: 0.9994027018547058,grad_norm: 0.8901960492865424, iteration: 339323
loss: 1.0200417041778564,grad_norm: 0.7760914384083129, iteration: 339324
loss: 0.9745192527770996,grad_norm: 0.7313642637798832, iteration: 339325
loss: 0.9915428161621094,grad_norm: 0.7987249976791185, iteration: 339326
loss: 1.0063927173614502,grad_norm: 0.6534303844644112, iteration: 339327
loss: 0.988063633441925,grad_norm: 0.8799284130791272, iteration: 339328
loss: 0.9986668825149536,grad_norm: 0.8179385882543156, iteration: 339329
loss: 0.9485399127006531,grad_norm: 0.7610504023076405, iteration: 339330
loss: 0.9829767346382141,grad_norm: 0.7818775922922917, iteration: 339331
loss: 1.0274384021759033,grad_norm: 0.8846069275061827, iteration: 339332
loss: 0.9883701205253601,grad_norm: 0.999999154789961, iteration: 339333
loss: 0.9953870177268982,grad_norm: 0.7483748333372737, iteration: 339334
loss: 1.023500919342041,grad_norm: 0.8984260550277999, iteration: 339335
loss: 1.0032963752746582,grad_norm: 0.9436863222501762, iteration: 339336
loss: 0.9937721490859985,grad_norm: 0.9404775144106796, iteration: 339337
loss: 1.005746841430664,grad_norm: 0.9639410258811696, iteration: 339338
loss: 0.9678518176078796,grad_norm: 0.779728516143503, iteration: 339339
loss: 1.0392011404037476,grad_norm: 0.8130862485297018, iteration: 339340
loss: 0.956642746925354,grad_norm: 0.8533253838486694, iteration: 339341
loss: 0.9688266515731812,grad_norm: 0.7291268954073316, iteration: 339342
loss: 1.0079575777053833,grad_norm: 0.800457186122878, iteration: 339343
loss: 0.9867735505104065,grad_norm: 0.7419870470255677, iteration: 339344
loss: 1.0045627355575562,grad_norm: 0.8146407783945061, iteration: 339345
loss: 1.0142217874526978,grad_norm: 0.9999993469265224, iteration: 339346
loss: 0.9677943587303162,grad_norm: 0.8448992210876213, iteration: 339347
loss: 1.0213358402252197,grad_norm: 0.8664531066547004, iteration: 339348
loss: 1.0090419054031372,grad_norm: 0.7564447609381085, iteration: 339349
loss: 0.9833157658576965,grad_norm: 0.7744870250406096, iteration: 339350
loss: 0.9832638502120972,grad_norm: 0.6754667686922301, iteration: 339351
loss: 0.9643362164497375,grad_norm: 0.837330404188611, iteration: 339352
loss: 1.0113365650177002,grad_norm: 0.9999996641519858, iteration: 339353
loss: 0.995905339717865,grad_norm: 0.9999991060843458, iteration: 339354
loss: 1.0050994157791138,grad_norm: 0.8013751858870574, iteration: 339355
loss: 0.9947288036346436,grad_norm: 0.8108676876635829, iteration: 339356
loss: 1.00579035282135,grad_norm: 0.6839843105203346, iteration: 339357
loss: 1.01528799533844,grad_norm: 0.9070202700891592, iteration: 339358
loss: 1.0237889289855957,grad_norm: 0.7753989714998618, iteration: 339359
loss: 0.989780068397522,grad_norm: 0.741608678472797, iteration: 339360
loss: 1.01776123046875,grad_norm: 0.7304377719738305, iteration: 339361
loss: 0.9810079336166382,grad_norm: 0.8103763654586377, iteration: 339362
loss: 1.006243348121643,grad_norm: 0.7068793959749268, iteration: 339363
loss: 1.0203056335449219,grad_norm: 0.7998822709959761, iteration: 339364
loss: 0.9967643618583679,grad_norm: 0.9999991874631322, iteration: 339365
loss: 0.9968300461769104,grad_norm: 0.7992169377736307, iteration: 339366
loss: 0.9832484722137451,grad_norm: 0.8779392622871334, iteration: 339367
loss: 0.9710809588432312,grad_norm: 0.9136692382178426, iteration: 339368
loss: 0.9972708225250244,grad_norm: 0.7090331556166418, iteration: 339369
loss: 1.0087006092071533,grad_norm: 0.8234296400290825, iteration: 339370
loss: 1.010366678237915,grad_norm: 0.807729533534108, iteration: 339371
loss: 1.034330129623413,grad_norm: 0.7161604189836153, iteration: 339372
loss: 0.9651921987533569,grad_norm: 0.7551569061009784, iteration: 339373
loss: 1.0252801179885864,grad_norm: 0.8888279281806734, iteration: 339374
loss: 0.9826169013977051,grad_norm: 0.8366925290294045, iteration: 339375
loss: 0.9682403206825256,grad_norm: 0.8379808264794201, iteration: 339376
loss: 1.000810980796814,grad_norm: 0.9999992774271038, iteration: 339377
loss: 1.0335118770599365,grad_norm: 0.9999991894762883, iteration: 339378
loss: 1.008352518081665,grad_norm: 0.9999998678468391, iteration: 339379
loss: 1.0316716432571411,grad_norm: 0.7008215446951486, iteration: 339380
loss: 0.9882094264030457,grad_norm: 0.9164370219929121, iteration: 339381
loss: 1.037587285041809,grad_norm: 0.9999991076433415, iteration: 339382
loss: 1.0059123039245605,grad_norm: 0.889210851196324, iteration: 339383
loss: 1.001150131225586,grad_norm: 0.7974149629827646, iteration: 339384
loss: 0.9981224536895752,grad_norm: 0.7997511807742297, iteration: 339385
loss: 1.013047456741333,grad_norm: 0.788661954380941, iteration: 339386
loss: 0.9560742974281311,grad_norm: 0.9555428296234009, iteration: 339387
loss: 1.0827480554580688,grad_norm: 0.9529774779227954, iteration: 339388
loss: 0.9978098273277283,grad_norm: 0.761436552047794, iteration: 339389
loss: 1.045448899269104,grad_norm: 0.799532918024962, iteration: 339390
loss: 1.036429524421692,grad_norm: 0.8407074090889326, iteration: 339391
loss: 0.9672739505767822,grad_norm: 0.6978966242816939, iteration: 339392
loss: 0.9681651592254639,grad_norm: 0.8228761386190875, iteration: 339393
loss: 1.0419381856918335,grad_norm: 0.999998870491927, iteration: 339394
loss: 0.9862143993377686,grad_norm: 0.9999990306816001, iteration: 339395
loss: 1.0368154048919678,grad_norm: 0.7273005453077663, iteration: 339396
loss: 1.025990605354309,grad_norm: 0.9999991721549383, iteration: 339397
loss: 0.9871160387992859,grad_norm: 0.8593243895283832, iteration: 339398
loss: 0.9869495630264282,grad_norm: 0.7079532297682564, iteration: 339399
loss: 0.9729140996932983,grad_norm: 0.7511903831003128, iteration: 339400
loss: 0.9973816871643066,grad_norm: 0.838545704672481, iteration: 339401
loss: 0.9857388138771057,grad_norm: 0.8566381849362341, iteration: 339402
loss: 0.9746076464653015,grad_norm: 0.7641578145438876, iteration: 339403
loss: 0.9687957763671875,grad_norm: 0.7286409780595398, iteration: 339404
loss: 0.9921029806137085,grad_norm: 0.8300374968963682, iteration: 339405
loss: 1.0044527053833008,grad_norm: 0.9205135409758052, iteration: 339406
loss: 1.0271459817886353,grad_norm: 0.8920708585953894, iteration: 339407
loss: 1.023388147354126,grad_norm: 0.790601435132384, iteration: 339408
loss: 1.0016083717346191,grad_norm: 0.7816089725074172, iteration: 339409
loss: 0.9944044351577759,grad_norm: 0.9649444702953405, iteration: 339410
loss: 0.9679641723632812,grad_norm: 0.7997829250072714, iteration: 339411
loss: 0.972519040107727,grad_norm: 0.7915724245888422, iteration: 339412
loss: 0.9856143593788147,grad_norm: 0.8634918470439787, iteration: 339413
loss: 0.9989824295043945,grad_norm: 0.7040844948006918, iteration: 339414
loss: 0.9562246203422546,grad_norm: 0.8754378675821146, iteration: 339415
loss: 0.9435521960258484,grad_norm: 0.8295715418618262, iteration: 339416
loss: 1.0073312520980835,grad_norm: 0.8981731403496476, iteration: 339417
loss: 0.9834927320480347,grad_norm: 0.8309538683033906, iteration: 339418
loss: 0.9917770624160767,grad_norm: 0.9999992267586401, iteration: 339419
loss: 0.9884771108627319,grad_norm: 0.9756569189675721, iteration: 339420
loss: 0.995971143245697,grad_norm: 0.7641218666846227, iteration: 339421
loss: 0.9785627126693726,grad_norm: 0.8956467592124199, iteration: 339422
loss: 0.9732856750488281,grad_norm: 0.724128647329465, iteration: 339423
loss: 1.0385710000991821,grad_norm: 0.935420719334078, iteration: 339424
loss: 0.9761881232261658,grad_norm: 0.8600131965817427, iteration: 339425
loss: 1.0279178619384766,grad_norm: 0.9999992893057172, iteration: 339426
loss: 0.993539571762085,grad_norm: 0.7164956699987982, iteration: 339427
loss: 0.9975863695144653,grad_norm: 0.9247491480397727, iteration: 339428
loss: 1.013502597808838,grad_norm: 0.7329062842819688, iteration: 339429
loss: 1.006213665008545,grad_norm: 0.9999998160569142, iteration: 339430
loss: 1.0233287811279297,grad_norm: 0.7882743733851384, iteration: 339431
loss: 0.9889608025550842,grad_norm: 0.8347962940527718, iteration: 339432
loss: 1.0178091526031494,grad_norm: 0.9999995846436721, iteration: 339433
loss: 1.0747684240341187,grad_norm: 0.999999713347406, iteration: 339434
loss: 1.051702618598938,grad_norm: 0.9999993149478734, iteration: 339435
loss: 1.0118271112442017,grad_norm: 0.7523273891883986, iteration: 339436
loss: 0.9942270517349243,grad_norm: 0.8762451071418643, iteration: 339437
loss: 0.9732846617698669,grad_norm: 0.9999999238098118, iteration: 339438
loss: 0.9924370050430298,grad_norm: 0.8792188153903664, iteration: 339439
loss: 1.022618055343628,grad_norm: 0.8103729836774833, iteration: 339440
loss: 0.9954929351806641,grad_norm: 0.9107790461357623, iteration: 339441
loss: 1.023353099822998,grad_norm: 0.9305886826524012, iteration: 339442
loss: 0.9818832874298096,grad_norm: 0.7316324873758451, iteration: 339443
loss: 0.9653962850570679,grad_norm: 0.7574250557867405, iteration: 339444
loss: 1.0044138431549072,grad_norm: 0.8236290203472082, iteration: 339445
loss: 0.9840856194496155,grad_norm: 0.9172043482769908, iteration: 339446
loss: 0.99655681848526,grad_norm: 0.8135666577438557, iteration: 339447
loss: 0.9954193830490112,grad_norm: 0.9999991067824933, iteration: 339448
loss: 1.012532114982605,grad_norm: 0.8544929259322467, iteration: 339449
loss: 1.009851098060608,grad_norm: 0.7173001046795768, iteration: 339450
loss: 0.9984287619590759,grad_norm: 0.8583051315199179, iteration: 339451
loss: 0.9980205297470093,grad_norm: 0.6580859427328498, iteration: 339452
loss: 1.0077561140060425,grad_norm: 0.9308592358292344, iteration: 339453
loss: 0.9909066557884216,grad_norm: 0.7538010149658585, iteration: 339454
loss: 0.9811391234397888,grad_norm: 0.9049733441557324, iteration: 339455
loss: 0.9866377711296082,grad_norm: 0.971584483853894, iteration: 339456
loss: 1.0156821012496948,grad_norm: 0.8075113194787394, iteration: 339457
loss: 1.0248949527740479,grad_norm: 0.7959122700921571, iteration: 339458
loss: 1.0347552299499512,grad_norm: 0.9999991811743955, iteration: 339459
loss: 0.9762057662010193,grad_norm: 0.864084496819617, iteration: 339460
loss: 0.9911085963249207,grad_norm: 0.6874724277595444, iteration: 339461
loss: 0.9823471307754517,grad_norm: 0.9608543980633193, iteration: 339462
loss: 0.9938289523124695,grad_norm: 0.9999993115453027, iteration: 339463
loss: 0.9504216313362122,grad_norm: 0.8471716534416953, iteration: 339464
loss: 1.0128734111785889,grad_norm: 0.9395377162484052, iteration: 339465
loss: 1.0025075674057007,grad_norm: 0.6763283075825448, iteration: 339466
loss: 0.9747869968414307,grad_norm: 0.7638876844131279, iteration: 339467
loss: 0.9448128938674927,grad_norm: 0.8131101646107929, iteration: 339468
loss: 1.0598901510238647,grad_norm: 0.8857816027493965, iteration: 339469
loss: 1.0275180339813232,grad_norm: 0.708853727105313, iteration: 339470
loss: 0.9824082255363464,grad_norm: 0.9999991001862848, iteration: 339471
loss: 0.9883298277854919,grad_norm: 0.8872056294611236, iteration: 339472
loss: 0.9824984073638916,grad_norm: 0.9999993253990115, iteration: 339473
loss: 1.0046530961990356,grad_norm: 0.7858345977463539, iteration: 339474
loss: 1.0056445598602295,grad_norm: 0.8731016011588644, iteration: 339475
loss: 1.016197919845581,grad_norm: 0.8307861178306988, iteration: 339476
loss: 1.020196557044983,grad_norm: 0.8488004433715756, iteration: 339477
loss: 0.9998051524162292,grad_norm: 0.8286308182187575, iteration: 339478
loss: 1.0140976905822754,grad_norm: 0.8728704138433508, iteration: 339479
loss: 0.9556719064712524,grad_norm: 0.8831455455186394, iteration: 339480
loss: 1.036075234413147,grad_norm: 0.999999174325116, iteration: 339481
loss: 0.9920086860656738,grad_norm: 0.782667416575618, iteration: 339482
loss: 0.9853891730308533,grad_norm: 0.9658474068964691, iteration: 339483
loss: 1.009581446647644,grad_norm: 0.8503318023839379, iteration: 339484
loss: 1.0254417657852173,grad_norm: 0.9999991556921345, iteration: 339485
loss: 1.0098737478256226,grad_norm: 0.9999991491741799, iteration: 339486
loss: 1.006858468055725,grad_norm: 0.975607107857844, iteration: 339487
loss: 1.0352332592010498,grad_norm: 0.8013907272921196, iteration: 339488
loss: 0.9821246266365051,grad_norm: 0.6849888687136788, iteration: 339489
loss: 0.9843650460243225,grad_norm: 0.8515012313898399, iteration: 339490
loss: 0.9509769082069397,grad_norm: 0.9999991923180312, iteration: 339491
loss: 1.0986912250518799,grad_norm: 0.9999989801770904, iteration: 339492
loss: 1.003901481628418,grad_norm: 0.7865362710305648, iteration: 339493
loss: 1.0133426189422607,grad_norm: 0.660808902236316, iteration: 339494
loss: 0.9706599712371826,grad_norm: 0.9276332210253705, iteration: 339495
loss: 0.9779754877090454,grad_norm: 0.7045247638422921, iteration: 339496
loss: 1.00398588180542,grad_norm: 0.752557160978862, iteration: 339497
loss: 0.9502357840538025,grad_norm: 0.8255661092717858, iteration: 339498
loss: 1.0074666738510132,grad_norm: 0.7606490472244414, iteration: 339499
loss: 0.9775381684303284,grad_norm: 0.7185576165446727, iteration: 339500
loss: 0.9998956322669983,grad_norm: 0.873705198274763, iteration: 339501
loss: 1.0084503889083862,grad_norm: 0.8603975149772383, iteration: 339502
loss: 0.999047040939331,grad_norm: 0.8625690090392353, iteration: 339503
loss: 1.0209158658981323,grad_norm: 0.8994932965166696, iteration: 339504
loss: 1.0615715980529785,grad_norm: 0.9999991386558095, iteration: 339505
loss: 1.0260509252548218,grad_norm: 0.8006066019327667, iteration: 339506
loss: 0.9898717403411865,grad_norm: 0.9999993284103305, iteration: 339507
loss: 0.9960364699363708,grad_norm: 0.7901071835054408, iteration: 339508
loss: 0.9942866563796997,grad_norm: 0.8828293882191145, iteration: 339509
loss: 0.9879297018051147,grad_norm: 0.805124025365786, iteration: 339510
loss: 1.0037119388580322,grad_norm: 0.8872282671564903, iteration: 339511
loss: 1.0041487216949463,grad_norm: 0.8134310774761908, iteration: 339512
loss: 1.0173808336257935,grad_norm: 0.7295245686176497, iteration: 339513
loss: 1.0169123411178589,grad_norm: 0.9999998909387092, iteration: 339514
loss: 0.9998394250869751,grad_norm: 0.7857180142858678, iteration: 339515
loss: 0.9839476346969604,grad_norm: 0.9076900410591906, iteration: 339516
loss: 1.0007632970809937,grad_norm: 0.893729434547572, iteration: 339517
loss: 1.014248251914978,grad_norm: 0.9999992947483753, iteration: 339518
loss: 0.9993849992752075,grad_norm: 0.6913264128230067, iteration: 339519
loss: 1.0052318572998047,grad_norm: 0.7435038925691982, iteration: 339520
loss: 1.0176821947097778,grad_norm: 0.8411684919693684, iteration: 339521
loss: 0.95377516746521,grad_norm: 0.8454455179667291, iteration: 339522
loss: 0.9569932818412781,grad_norm: 0.7626414303557185, iteration: 339523
loss: 1.026572585105896,grad_norm: 0.8413560951805009, iteration: 339524
loss: 1.0006043910980225,grad_norm: 0.8710948533158263, iteration: 339525
loss: 1.029417634010315,grad_norm: 0.8386098122003283, iteration: 339526
loss: 1.012251615524292,grad_norm: 0.869402508525422, iteration: 339527
loss: 0.9883771538734436,grad_norm: 0.7934225523568967, iteration: 339528
loss: 0.9901659488677979,grad_norm: 0.7061046882653397, iteration: 339529
loss: 1.0275835990905762,grad_norm: 0.7806739403251437, iteration: 339530
loss: 1.0086185932159424,grad_norm: 0.824004062674642, iteration: 339531
loss: 0.998659074306488,grad_norm: 0.7768472493095148, iteration: 339532
loss: 0.9458686113357544,grad_norm: 0.8215127688072311, iteration: 339533
loss: 1.0546467304229736,grad_norm: 0.9999996733135904, iteration: 339534
loss: 1.0149321556091309,grad_norm: 0.8541057626772113, iteration: 339535
loss: 0.9898116588592529,grad_norm: 0.987066022934786, iteration: 339536
loss: 1.0021295547485352,grad_norm: 0.891345640041766, iteration: 339537
loss: 1.0926669836044312,grad_norm: 0.9999991271021369, iteration: 339538
loss: 0.9983310699462891,grad_norm: 0.8993767785959025, iteration: 339539
loss: 0.981474757194519,grad_norm: 0.7298362998757881, iteration: 339540
loss: 0.9876444339752197,grad_norm: 0.9234651117539102, iteration: 339541
loss: 0.9948846697807312,grad_norm: 0.8444171537820178, iteration: 339542
loss: 1.025521993637085,grad_norm: 0.9637711740673989, iteration: 339543
loss: 1.03741455078125,grad_norm: 0.8339046046117714, iteration: 339544
loss: 1.0613237619400024,grad_norm: 0.9999991842025819, iteration: 339545
loss: 1.0448330640792847,grad_norm: 0.8407276593798207, iteration: 339546
loss: 0.9927165508270264,grad_norm: 0.8329522794051434, iteration: 339547
loss: 1.0423526763916016,grad_norm: 0.999999175260859, iteration: 339548
loss: 0.9427578449249268,grad_norm: 0.8278765034298031, iteration: 339549
loss: 1.0801795721054077,grad_norm: 0.9999992045299848, iteration: 339550
loss: 0.9761000871658325,grad_norm: 0.7877540187686045, iteration: 339551
loss: 1.1368317604064941,grad_norm: 0.999999715156735, iteration: 339552
loss: 0.9778865575790405,grad_norm: 0.7741490192607544, iteration: 339553
loss: 0.9886491298675537,grad_norm: 0.8549708684204949, iteration: 339554
loss: 1.0037248134613037,grad_norm: 0.9219647939162752, iteration: 339555
loss: 1.0017975568771362,grad_norm: 0.8333515638745261, iteration: 339556
loss: 0.9879550337791443,grad_norm: 0.7524821478788896, iteration: 339557
loss: 1.023126482963562,grad_norm: 0.9241275189619789, iteration: 339558
loss: 0.974566638469696,grad_norm: 0.9297166827320915, iteration: 339559
loss: 0.9592630863189697,grad_norm: 0.9569734756914603, iteration: 339560
loss: 1.0446157455444336,grad_norm: 0.9999997310346544, iteration: 339561
loss: 0.9701665043830872,grad_norm: 0.999998923385837, iteration: 339562
loss: 1.0215301513671875,grad_norm: 0.6542895826449927, iteration: 339563
loss: 1.0032740831375122,grad_norm: 0.8145225808499347, iteration: 339564
loss: 1.002368688583374,grad_norm: 0.6648440637596077, iteration: 339565
loss: 1.0066372156143188,grad_norm: 0.9956536632877946, iteration: 339566
loss: 1.0021101236343384,grad_norm: 0.8371023931335472, iteration: 339567
loss: 0.9772641062736511,grad_norm: 0.7057490768293623, iteration: 339568
loss: 0.9680843949317932,grad_norm: 0.8659973321784624, iteration: 339569
loss: 0.9984409213066101,grad_norm: 0.7846505215174611, iteration: 339570
loss: 0.9955952763557434,grad_norm: 0.9236708275477412, iteration: 339571
loss: 1.0176712274551392,grad_norm: 0.8428786508378558, iteration: 339572
loss: 1.0407769680023193,grad_norm: 0.7120963730566723, iteration: 339573
loss: 1.005088448524475,grad_norm: 0.8215353916780476, iteration: 339574
loss: 1.0182117223739624,grad_norm: 0.9808795476466596, iteration: 339575
loss: 1.0481795072555542,grad_norm: 0.9999998875110744, iteration: 339576
loss: 0.9984370470046997,grad_norm: 0.8353140245177472, iteration: 339577
loss: 1.036275863647461,grad_norm: 0.6524063377785422, iteration: 339578
loss: 0.9998714923858643,grad_norm: 0.9373471617850112, iteration: 339579
loss: 0.9766437411308289,grad_norm: 0.7759847334292527, iteration: 339580
loss: 1.0520097017288208,grad_norm: 0.9893852669515445, iteration: 339581
loss: 0.9559988379478455,grad_norm: 0.8133207244241396, iteration: 339582
loss: 0.9794138669967651,grad_norm: 0.7601894620869519, iteration: 339583
loss: 1.0417380332946777,grad_norm: 0.9110503014452216, iteration: 339584
loss: 1.022146463394165,grad_norm: 0.805610145720744, iteration: 339585
loss: 0.9627498984336853,grad_norm: 0.8727500349103882, iteration: 339586
loss: 0.9995670318603516,grad_norm: 0.8475393921296789, iteration: 339587
loss: 0.9906718730926514,grad_norm: 0.7748780713505524, iteration: 339588
loss: 0.9804778099060059,grad_norm: 0.8018996073272256, iteration: 339589
loss: 1.0153614282608032,grad_norm: 0.7042263408746509, iteration: 339590
loss: 0.9895510673522949,grad_norm: 0.8375876296823572, iteration: 339591
loss: 0.9996306300163269,grad_norm: 0.8673147028285774, iteration: 339592
loss: 0.962913990020752,grad_norm: 0.8908955975263362, iteration: 339593
loss: 1.0004785060882568,grad_norm: 0.7488645585032673, iteration: 339594
loss: 0.9850775003433228,grad_norm: 0.8071630486170354, iteration: 339595
loss: 1.021756649017334,grad_norm: 0.7735500929702149, iteration: 339596
loss: 1.029679536819458,grad_norm: 0.7999512151821575, iteration: 339597
loss: 0.9833813905715942,grad_norm: 0.8921424913865256, iteration: 339598
loss: 0.9892982840538025,grad_norm: 0.8474369696571574, iteration: 339599
loss: 0.9987829327583313,grad_norm: 0.7759983800448604, iteration: 339600
loss: 0.9652687907218933,grad_norm: 0.8674023628936126, iteration: 339601
loss: 0.9879704713821411,grad_norm: 0.8042841567222713, iteration: 339602
loss: 0.9836519956588745,grad_norm: 0.9684124102526854, iteration: 339603
loss: 0.9901343584060669,grad_norm: 0.8494499422314566, iteration: 339604
loss: 1.0514163970947266,grad_norm: 0.938677128329074, iteration: 339605
loss: 1.0133695602416992,grad_norm: 0.9999991671312336, iteration: 339606
loss: 0.9596124291419983,grad_norm: 0.7998725660292839, iteration: 339607
loss: 1.0004892349243164,grad_norm: 0.8640867300693792, iteration: 339608
loss: 1.0476455688476562,grad_norm: 0.9090725002696813, iteration: 339609
loss: 1.0524985790252686,grad_norm: 0.9999993898632497, iteration: 339610
loss: 0.9683908820152283,grad_norm: 0.7451631269087978, iteration: 339611
loss: 1.004106044769287,grad_norm: 0.8275751603362285, iteration: 339612
loss: 0.9833084940910339,grad_norm: 0.7735682558015805, iteration: 339613
loss: 1.00637686252594,grad_norm: 0.9850205925267888, iteration: 339614
loss: 1.0394377708435059,grad_norm: 0.951704909272426, iteration: 339615
loss: 0.9999557733535767,grad_norm: 0.6624097497213933, iteration: 339616
loss: 1.083725094795227,grad_norm: 0.9999996767164886, iteration: 339617
loss: 1.000728726387024,grad_norm: 0.9243545711600806, iteration: 339618
loss: 1.040297269821167,grad_norm: 0.9246897750719585, iteration: 339619
loss: 1.030820369720459,grad_norm: 0.7754691765454416, iteration: 339620
loss: 0.9865133166313171,grad_norm: 0.9999991367229877, iteration: 339621
loss: 0.9883140921592712,grad_norm: 0.7737017572760672, iteration: 339622
loss: 1.0199270248413086,grad_norm: 0.7808374543980192, iteration: 339623
loss: 1.0101314783096313,grad_norm: 0.754814233337443, iteration: 339624
loss: 1.0128772258758545,grad_norm: 0.8730655210835444, iteration: 339625
loss: 0.9840187430381775,grad_norm: 0.8462554464903519, iteration: 339626
loss: 0.9724677205085754,grad_norm: 0.7486653244942988, iteration: 339627
loss: 1.017501711845398,grad_norm: 0.9100213293247718, iteration: 339628
loss: 1.0208667516708374,grad_norm: 0.8144119735441372, iteration: 339629
loss: 1.0813429355621338,grad_norm: 0.8468239031219655, iteration: 339630
loss: 1.0242488384246826,grad_norm: 0.9315194027856334, iteration: 339631
loss: 1.001457929611206,grad_norm: 0.8994441259874181, iteration: 339632
loss: 0.9883366823196411,grad_norm: 0.8102349366442178, iteration: 339633
loss: 1.0098662376403809,grad_norm: 0.7667157722220727, iteration: 339634
loss: 1.0413607358932495,grad_norm: 0.7503035558759049, iteration: 339635
loss: 1.0078521966934204,grad_norm: 0.969700748138157, iteration: 339636
loss: 0.9792811870574951,grad_norm: 0.9528742402072116, iteration: 339637
loss: 0.9824694991111755,grad_norm: 0.7609139080230469, iteration: 339638
loss: 1.020682692527771,grad_norm: 0.8381705669055383, iteration: 339639
loss: 1.075363278388977,grad_norm: 0.9292436154892224, iteration: 339640
loss: 1.005004644393921,grad_norm: 0.792202008108344, iteration: 339641
loss: 1.0330551862716675,grad_norm: 0.6675433020514466, iteration: 339642
loss: 1.0208384990692139,grad_norm: 0.8849935438045305, iteration: 339643
loss: 0.97232586145401,grad_norm: 0.7583818580206946, iteration: 339644
loss: 1.017547607421875,grad_norm: 0.830061534776598, iteration: 339645
loss: 1.0018681287765503,grad_norm: 0.7847248680534528, iteration: 339646
loss: 1.092990517616272,grad_norm: 0.9999997047118899, iteration: 339647
loss: 1.0232998132705688,grad_norm: 0.8576264134810306, iteration: 339648
loss: 1.0208443403244019,grad_norm: 0.8877808166491059, iteration: 339649
loss: 1.0267912149429321,grad_norm: 0.8535101330013912, iteration: 339650
loss: 0.9759247303009033,grad_norm: 0.9118264896816357, iteration: 339651
loss: 1.0079764127731323,grad_norm: 0.8439972339294041, iteration: 339652
loss: 0.9915592670440674,grad_norm: 0.889611277345911, iteration: 339653
loss: 0.9655991196632385,grad_norm: 0.7802924948505637, iteration: 339654
loss: 1.0101251602172852,grad_norm: 0.8511519267233483, iteration: 339655
loss: 1.019212245941162,grad_norm: 0.9999990530627385, iteration: 339656
loss: 0.9634402990341187,grad_norm: 0.9164782137950387, iteration: 339657
loss: 0.9845461249351501,grad_norm: 0.7137280503948908, iteration: 339658
loss: 1.0510259866714478,grad_norm: 0.8132934512711028, iteration: 339659
loss: 0.9944184422492981,grad_norm: 0.8038050577893786, iteration: 339660
loss: 1.0261430740356445,grad_norm: 0.7359896342848664, iteration: 339661
loss: 1.0964165925979614,grad_norm: 0.9999994132619985, iteration: 339662
loss: 1.015424132347107,grad_norm: 0.8371023659089903, iteration: 339663
loss: 0.9947672486305237,grad_norm: 0.9288416312366967, iteration: 339664
loss: 1.008008599281311,grad_norm: 0.8942265136431192, iteration: 339665
loss: 0.972040057182312,grad_norm: 0.7461640426921449, iteration: 339666
loss: 1.009925365447998,grad_norm: 0.9999992581896688, iteration: 339667
loss: 1.0715428590774536,grad_norm: 0.9673301772976612, iteration: 339668
loss: 1.0055128335952759,grad_norm: 0.723029095111591, iteration: 339669
loss: 0.9720232486724854,grad_norm: 0.9392241697969761, iteration: 339670
loss: 1.0044645071029663,grad_norm: 0.8828624513068641, iteration: 339671
loss: 1.0447288751602173,grad_norm: 0.9999999152487365, iteration: 339672
loss: 0.9758865237236023,grad_norm: 0.7359381500885146, iteration: 339673
loss: 1.0174009799957275,grad_norm: 0.7966102521243216, iteration: 339674
loss: 1.0110055208206177,grad_norm: 0.741073861086965, iteration: 339675
loss: 1.0096501111984253,grad_norm: 0.7547547597333842, iteration: 339676
loss: 1.0001572370529175,grad_norm: 0.8649592033631353, iteration: 339677
loss: 0.9813363552093506,grad_norm: 0.7274207706737205, iteration: 339678
loss: 0.9592760801315308,grad_norm: 0.9621075048346903, iteration: 339679
loss: 1.0178824663162231,grad_norm: 0.9999992624230134, iteration: 339680
loss: 0.9803711175918579,grad_norm: 0.7753014226789696, iteration: 339681
loss: 0.9917178750038147,grad_norm: 0.8515145561813308, iteration: 339682
loss: 0.9897613525390625,grad_norm: 0.7727791115251427, iteration: 339683
loss: 0.9995464086532593,grad_norm: 0.9075051943217384, iteration: 339684
loss: 1.0026127099990845,grad_norm: 0.9122163687308991, iteration: 339685
loss: 1.0050091743469238,grad_norm: 0.6567287481072138, iteration: 339686
loss: 1.0103683471679688,grad_norm: 0.7482067199930877, iteration: 339687
loss: 0.9624229073524475,grad_norm: 0.741381575587407, iteration: 339688
loss: 1.033532738685608,grad_norm: 0.7359797500484144, iteration: 339689
loss: 1.0254963636398315,grad_norm: 0.898690966024802, iteration: 339690
loss: 1.0209197998046875,grad_norm: 0.9999988864466142, iteration: 339691
loss: 1.0262421369552612,grad_norm: 0.9999990650197326, iteration: 339692
loss: 1.0165917873382568,grad_norm: 0.8946935595431725, iteration: 339693
loss: 1.0093353986740112,grad_norm: 0.8414699732560159, iteration: 339694
loss: 1.0262850522994995,grad_norm: 0.7838332405154479, iteration: 339695
loss: 1.030422568321228,grad_norm: 0.9999990477805709, iteration: 339696
loss: 1.070504069328308,grad_norm: 0.8951415863853025, iteration: 339697
loss: 0.9816785454750061,grad_norm: 0.8881359102638455, iteration: 339698
loss: 1.0289098024368286,grad_norm: 0.9999997669902032, iteration: 339699
loss: 0.9748345017433167,grad_norm: 0.9999991348177044, iteration: 339700
loss: 1.0479967594146729,grad_norm: 0.9999991378800596, iteration: 339701
loss: 1.0710418224334717,grad_norm: 0.9999994096222689, iteration: 339702
loss: 0.9992772936820984,grad_norm: 0.9135456214650364, iteration: 339703
loss: 0.9803125262260437,grad_norm: 0.8536663487558505, iteration: 339704
loss: 1.0289252996444702,grad_norm: 0.9999992380243152, iteration: 339705
loss: 0.9733617305755615,grad_norm: 0.968442067265423, iteration: 339706
loss: 0.966555655002594,grad_norm: 0.7625372242457439, iteration: 339707
loss: 1.003388524055481,grad_norm: 0.7004058911685144, iteration: 339708
loss: 1.0063620805740356,grad_norm: 0.7887382491787506, iteration: 339709
loss: 0.9815342426300049,grad_norm: 0.6558328071563493, iteration: 339710
loss: 1.0314162969589233,grad_norm: 0.7908829219987527, iteration: 339711
loss: 1.013009786605835,grad_norm: 0.7651686618883394, iteration: 339712
loss: 1.0027858018875122,grad_norm: 0.7031671933821809, iteration: 339713
loss: 0.9855734705924988,grad_norm: 0.9068154035466393, iteration: 339714
loss: 0.9750415682792664,grad_norm: 0.692813282896456, iteration: 339715
loss: 1.012425422668457,grad_norm: 0.9824065164526333, iteration: 339716
loss: 1.0672175884246826,grad_norm: 0.9999993709799312, iteration: 339717
loss: 1.0307340621948242,grad_norm: 0.9653417183936683, iteration: 339718
loss: 0.9975756406784058,grad_norm: 0.99999964873907, iteration: 339719
loss: 1.016116976737976,grad_norm: 0.94997591283875, iteration: 339720
loss: 1.086838960647583,grad_norm: 0.9999994301775101, iteration: 339721
loss: 1.0147597789764404,grad_norm: 0.9999991578334095, iteration: 339722
loss: 1.107591986656189,grad_norm: 0.9999994541803882, iteration: 339723
loss: 1.0107039213180542,grad_norm: 0.7352099781150977, iteration: 339724
loss: 1.0160521268844604,grad_norm: 0.9999993780177374, iteration: 339725
loss: 1.0026496648788452,grad_norm: 0.9428307396036265, iteration: 339726
loss: 0.9933339357376099,grad_norm: 0.852383211028705, iteration: 339727
loss: 0.9920892715454102,grad_norm: 0.9441936125458323, iteration: 339728
loss: 1.0667498111724854,grad_norm: 0.8141607955748816, iteration: 339729
loss: 1.027163028717041,grad_norm: 0.8561237869183344, iteration: 339730
loss: 1.0303661823272705,grad_norm: 0.9999989803025109, iteration: 339731
loss: 1.066486120223999,grad_norm: 0.7901092517092375, iteration: 339732
loss: 0.9726738333702087,grad_norm: 0.851149567760436, iteration: 339733
loss: 1.0035276412963867,grad_norm: 0.8834154492549284, iteration: 339734
loss: 1.0164353847503662,grad_norm: 0.7827695438886083, iteration: 339735
loss: 0.9981207251548767,grad_norm: 0.9357618696514529, iteration: 339736
loss: 1.0558497905731201,grad_norm: 1.0000000277074144, iteration: 339737
loss: 0.9937251210212708,grad_norm: 0.8192581954731748, iteration: 339738
loss: 1.0715113878250122,grad_norm: 0.9999991705164427, iteration: 339739
loss: 1.0668373107910156,grad_norm: 0.99999936290006, iteration: 339740
loss: 1.026548981666565,grad_norm: 0.9084482894391995, iteration: 339741
loss: 0.9990633130073547,grad_norm: 0.7099260828787176, iteration: 339742
loss: 1.0118557214736938,grad_norm: 0.999999127366155, iteration: 339743
loss: 0.9872687458992004,grad_norm: 0.9999990904200158, iteration: 339744
loss: 1.0049694776535034,grad_norm: 0.7311683107654942, iteration: 339745
loss: 1.0000370740890503,grad_norm: 0.999999183297538, iteration: 339746
loss: 0.9853231906890869,grad_norm: 0.7867894162597413, iteration: 339747
loss: 1.034757375717163,grad_norm: 0.9999995297962128, iteration: 339748
loss: 1.0089689493179321,grad_norm: 0.8698824757466215, iteration: 339749
loss: 0.9809901118278503,grad_norm: 0.8483930535836269, iteration: 339750
loss: 1.0072333812713623,grad_norm: 0.8912653007798039, iteration: 339751
loss: 1.0053714513778687,grad_norm: 0.8062909406752924, iteration: 339752
loss: 0.9818956851959229,grad_norm: 0.7199906372143039, iteration: 339753
loss: 0.9969404339790344,grad_norm: 0.8536991652842199, iteration: 339754
loss: 1.049911618232727,grad_norm: 0.9999990825988504, iteration: 339755
loss: 1.023158311843872,grad_norm: 0.91074363279817, iteration: 339756
loss: 1.006393551826477,grad_norm: 0.7958067541823638, iteration: 339757
loss: 1.0463180541992188,grad_norm: 0.9999992777370119, iteration: 339758
loss: 1.0169806480407715,grad_norm: 0.9298230338816583, iteration: 339759
loss: 1.006091833114624,grad_norm: 0.9999994427520216, iteration: 339760
loss: 1.0092366933822632,grad_norm: 0.7164962163825305, iteration: 339761
loss: 1.0183064937591553,grad_norm: 0.7583314176688902, iteration: 339762
loss: 1.0012189149856567,grad_norm: 0.9478847472137654, iteration: 339763
loss: 0.9959073662757874,grad_norm: 0.9766436499043855, iteration: 339764
loss: 0.9627633094787598,grad_norm: 0.9527160484441463, iteration: 339765
loss: 0.9818778038024902,grad_norm: 0.8319112951746371, iteration: 339766
loss: 0.9764474034309387,grad_norm: 0.9999992029683743, iteration: 339767
loss: 0.9963394403457642,grad_norm: 0.7947351239344186, iteration: 339768
loss: 1.0091814994812012,grad_norm: 0.9999990501520037, iteration: 339769
loss: 0.9843183159828186,grad_norm: 0.6729833064203852, iteration: 339770
loss: 1.0269862413406372,grad_norm: 0.9984136133164766, iteration: 339771
loss: 1.0113532543182373,grad_norm: 0.9897470215439041, iteration: 339772
loss: 0.9954376816749573,grad_norm: 0.9658794904340633, iteration: 339773
loss: 1.0042695999145508,grad_norm: 0.8953461082875428, iteration: 339774
loss: 1.0795279741287231,grad_norm: 0.9999999113564447, iteration: 339775
loss: 1.0065847635269165,grad_norm: 0.9574840330927185, iteration: 339776
loss: 0.9586523771286011,grad_norm: 0.7809056252926613, iteration: 339777
loss: 1.0149085521697998,grad_norm: 0.7860699833476908, iteration: 339778
loss: 0.9828430414199829,grad_norm: 0.7010221793722207, iteration: 339779
loss: 0.9748200178146362,grad_norm: 0.8173524678796813, iteration: 339780
loss: 0.9875907301902771,grad_norm: 0.947367564562072, iteration: 339781
loss: 1.0334750413894653,grad_norm: 0.97409789974683, iteration: 339782
loss: 1.0251144170761108,grad_norm: 0.9827417265259915, iteration: 339783
loss: 1.0072721242904663,grad_norm: 0.7185795902985694, iteration: 339784
loss: 1.0203533172607422,grad_norm: 0.7452713608291233, iteration: 339785
loss: 1.0380241870880127,grad_norm: 0.9289872002178172, iteration: 339786
loss: 1.0139387845993042,grad_norm: 0.9999996961602131, iteration: 339787
loss: 1.0218969583511353,grad_norm: 0.9653579927881722, iteration: 339788
loss: 1.0372079610824585,grad_norm: 0.8590092183946112, iteration: 339789
loss: 1.009581208229065,grad_norm: 0.7319426679922005, iteration: 339790
loss: 0.9755554795265198,grad_norm: 0.6800722226153284, iteration: 339791
loss: 0.9679903388023376,grad_norm: 0.9535312058905834, iteration: 339792
loss: 0.9865405559539795,grad_norm: 0.9999990615992312, iteration: 339793
loss: 1.021045207977295,grad_norm: 0.7899139121113229, iteration: 339794
loss: 0.9899946451187134,grad_norm: 0.9999992635963282, iteration: 339795
loss: 0.9989641904830933,grad_norm: 0.655586705196564, iteration: 339796
loss: 0.9923360347747803,grad_norm: 0.8601261997693935, iteration: 339797
loss: 0.989821195602417,grad_norm: 0.9322074681964762, iteration: 339798
loss: 0.9826629757881165,grad_norm: 0.834961441969665, iteration: 339799
loss: 0.9879108667373657,grad_norm: 0.7761625617276575, iteration: 339800
loss: 1.2129899263381958,grad_norm: 0.9999994436476599, iteration: 339801
loss: 1.0107530355453491,grad_norm: 0.8087377408229924, iteration: 339802
loss: 1.095933437347412,grad_norm: 0.9999997080979776, iteration: 339803
loss: 0.9963997006416321,grad_norm: 0.81112624966374, iteration: 339804
loss: 0.989956259727478,grad_norm: 0.8167004640893722, iteration: 339805
loss: 1.0017417669296265,grad_norm: 0.9999991881334905, iteration: 339806
loss: 1.0215983390808105,grad_norm: 0.9503118171686242, iteration: 339807
loss: 0.9885919690132141,grad_norm: 0.8818269562533915, iteration: 339808
loss: 1.0206912755966187,grad_norm: 0.9999997063915064, iteration: 339809
loss: 0.9869633316993713,grad_norm: 0.8971483532530249, iteration: 339810
loss: 0.9776661992073059,grad_norm: 0.9041518704234611, iteration: 339811
loss: 0.9792003035545349,grad_norm: 0.7991161859212113, iteration: 339812
loss: 0.9306043386459351,grad_norm: 0.8798085197636037, iteration: 339813
loss: 0.9923133850097656,grad_norm: 0.671310157106894, iteration: 339814
loss: 0.9923169612884521,grad_norm: 0.7007602604410328, iteration: 339815
loss: 1.007878303527832,grad_norm: 0.9780430648814342, iteration: 339816
loss: 1.0252009630203247,grad_norm: 0.8835813813911854, iteration: 339817
loss: 1.016108512878418,grad_norm: 0.7344781532641255, iteration: 339818
loss: 1.0097039937973022,grad_norm: 0.8616349394076203, iteration: 339819
loss: 0.9814333319664001,grad_norm: 0.90059719853954, iteration: 339820
loss: 1.000307559967041,grad_norm: 0.7095463334861846, iteration: 339821
loss: 0.9898663759231567,grad_norm: 0.9598746431723898, iteration: 339822
loss: 1.0017067193984985,grad_norm: 0.8552444082635497, iteration: 339823
loss: 1.0354763269424438,grad_norm: 0.9999991583442069, iteration: 339824
loss: 0.9693635702133179,grad_norm: 0.7631727715393645, iteration: 339825
loss: 0.9956339001655579,grad_norm: 0.6610738248549405, iteration: 339826
loss: 1.0627237558364868,grad_norm: 0.9999998234035344, iteration: 339827
loss: 0.9992564916610718,grad_norm: 0.9350608957742497, iteration: 339828
loss: 0.9947084188461304,grad_norm: 0.9377730053265412, iteration: 339829
loss: 0.9985461235046387,grad_norm: 0.8371095133795388, iteration: 339830
loss: 1.0136600732803345,grad_norm: 0.7691622981631355, iteration: 339831
loss: 0.9786773324012756,grad_norm: 0.8763059045710248, iteration: 339832
loss: 0.9767654538154602,grad_norm: 0.9257566720600244, iteration: 339833
loss: 1.018673062324524,grad_norm: 0.7842384107589042, iteration: 339834
loss: 0.9721289277076721,grad_norm: 0.8280074790268773, iteration: 339835
loss: 1.0489706993103027,grad_norm: 0.9999992292648191, iteration: 339836
loss: 1.002698302268982,grad_norm: 0.7131234548769854, iteration: 339837
loss: 1.006723403930664,grad_norm: 0.7116126592047582, iteration: 339838
loss: 0.9866279363632202,grad_norm: 0.8247677731401568, iteration: 339839
loss: 1.0037494897842407,grad_norm: 0.9683955009987215, iteration: 339840
loss: 1.0396021604537964,grad_norm: 0.9967638942044351, iteration: 339841
loss: 1.00632643699646,grad_norm: 0.8621269852642249, iteration: 339842
loss: 1.0242563486099243,grad_norm: 0.9041451912405548, iteration: 339843
loss: 0.9895991683006287,grad_norm: 0.7748565031336986, iteration: 339844
loss: 1.0248605012893677,grad_norm: 0.9467726781402274, iteration: 339845
loss: 1.0006523132324219,grad_norm: 0.9451034723818115, iteration: 339846
loss: 1.0311354398727417,grad_norm: 0.9999991564336844, iteration: 339847
loss: 0.9868478178977966,grad_norm: 0.833444441176764, iteration: 339848
loss: 1.0467736721038818,grad_norm: 0.9272105786923736, iteration: 339849
loss: 1.028314232826233,grad_norm: 0.8708698844402855, iteration: 339850
loss: 0.9492622017860413,grad_norm: 0.7591805851301087, iteration: 339851
loss: 0.9973522424697876,grad_norm: 0.9999998385850399, iteration: 339852
loss: 0.9568246006965637,grad_norm: 0.8747658667754407, iteration: 339853
loss: 0.9967528581619263,grad_norm: 0.7981821866821812, iteration: 339854
loss: 1.0057833194732666,grad_norm: 0.8074584418219392, iteration: 339855
loss: 1.0099848508834839,grad_norm: 0.8528310325308575, iteration: 339856
loss: 1.0176451206207275,grad_norm: 0.9145478399848853, iteration: 339857
loss: 0.9949117302894592,grad_norm: 0.8796527698857747, iteration: 339858
loss: 0.9790357947349548,grad_norm: 0.8352930199593022, iteration: 339859
loss: 1.0014115571975708,grad_norm: 0.9999992404698087, iteration: 339860
loss: 1.008745789527893,grad_norm: 0.888339588341327, iteration: 339861
loss: 0.9881011843681335,grad_norm: 0.85045841538148, iteration: 339862
loss: 0.9797526001930237,grad_norm: 0.9343960486830217, iteration: 339863
loss: 1.0169520378112793,grad_norm: 0.7310062907955232, iteration: 339864
loss: 1.0166268348693848,grad_norm: 0.9804498128619361, iteration: 339865
loss: 0.9981539845466614,grad_norm: 0.7882802551765957, iteration: 339866
loss: 1.0111700296401978,grad_norm: 0.937220236924912, iteration: 339867
loss: 0.9722925424575806,grad_norm: 0.8187709775315553, iteration: 339868
loss: 1.00454843044281,grad_norm: 0.8738071725763702, iteration: 339869
loss: 1.0185067653656006,grad_norm: 0.8204511176413417, iteration: 339870
loss: 1.04610276222229,grad_norm: 0.8346773877274094, iteration: 339871
loss: 0.9645252227783203,grad_norm: 0.8041425602287654, iteration: 339872
loss: 1.0176516771316528,grad_norm: 0.8821295015071198, iteration: 339873
loss: 0.9790918827056885,grad_norm: 0.7167952846959215, iteration: 339874
loss: 0.9900702238082886,grad_norm: 0.8019187439625065, iteration: 339875
loss: 1.0071351528167725,grad_norm: 0.8046247017541415, iteration: 339876
loss: 0.9772089123725891,grad_norm: 0.7949818877619383, iteration: 339877
loss: 1.0172693729400635,grad_norm: 0.8408069472375888, iteration: 339878
loss: 1.0103548765182495,grad_norm: 0.9339812419794974, iteration: 339879
loss: 1.0206470489501953,grad_norm: 0.8274831650830811, iteration: 339880
loss: 1.0088014602661133,grad_norm: 0.8691136510132291, iteration: 339881
loss: 1.0232371091842651,grad_norm: 0.9201263242368843, iteration: 339882
loss: 1.0048240423202515,grad_norm: 0.8389487844358517, iteration: 339883
loss: 0.977995753288269,grad_norm: 0.9999990674531134, iteration: 339884
loss: 1.025895357131958,grad_norm: 0.9999992129276216, iteration: 339885
loss: 0.9914562106132507,grad_norm: 0.6813243435328024, iteration: 339886
loss: 1.0445449352264404,grad_norm: 0.9999991249190281, iteration: 339887
loss: 1.0197466611862183,grad_norm: 0.9969136506049907, iteration: 339888
loss: 1.0317269563674927,grad_norm: 0.9778965128609332, iteration: 339889
loss: 1.0146710872650146,grad_norm: 0.7741917589789491, iteration: 339890
loss: 0.9857941269874573,grad_norm: 0.9279463194349389, iteration: 339891
loss: 1.025009274482727,grad_norm: 0.9229214577357697, iteration: 339892
loss: 1.0111414194107056,grad_norm: 0.8795652545465771, iteration: 339893
loss: 0.9813922643661499,grad_norm: 0.8528903430889687, iteration: 339894
loss: 0.9870736002922058,grad_norm: 0.9107591752365507, iteration: 339895
loss: 1.0137121677398682,grad_norm: 0.7699136413190335, iteration: 339896
loss: 1.045148491859436,grad_norm: 0.9999990917452413, iteration: 339897
loss: 0.994277834892273,grad_norm: 0.8060706688658223, iteration: 339898
loss: 0.9891433715820312,grad_norm: 0.7812670657594806, iteration: 339899
loss: 0.9862129092216492,grad_norm: 0.7705052495615696, iteration: 339900
loss: 0.9811651110649109,grad_norm: 0.779689951737612, iteration: 339901
loss: 0.9747469425201416,grad_norm: 0.8008743642344822, iteration: 339902
loss: 1.0067527294158936,grad_norm: 0.7883609070659517, iteration: 339903
loss: 0.9937968850135803,grad_norm: 0.9999998321232845, iteration: 339904
loss: 1.0308510065078735,grad_norm: 0.8561629920268015, iteration: 339905
loss: 0.978175938129425,grad_norm: 0.823892941358892, iteration: 339906
loss: 0.9992469549179077,grad_norm: 0.8493342574563523, iteration: 339907
loss: 1.0271950960159302,grad_norm: 0.9104733524986747, iteration: 339908
loss: 1.054744839668274,grad_norm: 0.9286602313021253, iteration: 339909
loss: 1.0042195320129395,grad_norm: 0.9999991527605641, iteration: 339910
loss: 1.0076318979263306,grad_norm: 0.9425443829165054, iteration: 339911
loss: 1.0261306762695312,grad_norm: 0.7914271908666923, iteration: 339912
loss: 1.0322107076644897,grad_norm: 0.9276470497197707, iteration: 339913
loss: 0.9777533411979675,grad_norm: 0.7983409885996995, iteration: 339914
loss: 0.9893084764480591,grad_norm: 0.9216152612854907, iteration: 339915
loss: 0.9827410578727722,grad_norm: 0.8671623134123446, iteration: 339916
loss: 1.0774766206741333,grad_norm: 0.9999999196049848, iteration: 339917
loss: 0.9902123212814331,grad_norm: 0.9999991917135596, iteration: 339918
loss: 1.000653862953186,grad_norm: 0.8444142940879826, iteration: 339919
loss: 1.0168890953063965,grad_norm: 0.7827623220843031, iteration: 339920
loss: 0.9812154769897461,grad_norm: 0.7837926358501872, iteration: 339921
loss: 0.9573654532432556,grad_norm: 0.809828048916926, iteration: 339922
loss: 0.9908791184425354,grad_norm: 0.8228060824714925, iteration: 339923
loss: 1.046985149383545,grad_norm: 0.9562640039180956, iteration: 339924
loss: 1.0062068700790405,grad_norm: 0.8008780853881244, iteration: 339925
loss: 1.011383056640625,grad_norm: 0.689787889175503, iteration: 339926
loss: 1.0181913375854492,grad_norm: 0.7541824526206665, iteration: 339927
loss: 1.011752724647522,grad_norm: 0.8526369466041785, iteration: 339928
loss: 0.9765861630439758,grad_norm: 0.7943025064948637, iteration: 339929
loss: 1.0216891765594482,grad_norm: 0.5952054655333737, iteration: 339930
loss: 1.0010648965835571,grad_norm: 0.9103645841010376, iteration: 339931
loss: 1.0121639966964722,grad_norm: 0.7285315366404778, iteration: 339932
loss: 1.0030847787857056,grad_norm: 0.8161280980860831, iteration: 339933
loss: 1.0233386754989624,grad_norm: 0.8185546739167943, iteration: 339934
loss: 0.9839740991592407,grad_norm: 0.7790159198941649, iteration: 339935
loss: 1.1858657598495483,grad_norm: 0.9999992749705604, iteration: 339936
loss: 1.0417755842208862,grad_norm: 0.947538465292505, iteration: 339937
loss: 0.9671434164047241,grad_norm: 0.9114404441175612, iteration: 339938
loss: 0.9697402119636536,grad_norm: 0.9525938793613021, iteration: 339939
loss: 1.004058837890625,grad_norm: 0.803543806713513, iteration: 339940
loss: 0.98666912317276,grad_norm: 0.8721383147805458, iteration: 339941
loss: 1.0263047218322754,grad_norm: 0.7964458014456782, iteration: 339942
loss: 1.0375502109527588,grad_norm: 0.8676270178797436, iteration: 339943
loss: 1.0303162336349487,grad_norm: 0.9999993419890424, iteration: 339944
loss: 0.9709072709083557,grad_norm: 0.7951644232841273, iteration: 339945
loss: 1.0012974739074707,grad_norm: 0.9440754604616218, iteration: 339946
loss: 0.9935817122459412,grad_norm: 0.816581851764886, iteration: 339947
loss: 1.0040004253387451,grad_norm: 0.8529542198543177, iteration: 339948
loss: 1.075834035873413,grad_norm: 0.9999993414581408, iteration: 339949
loss: 0.9868637323379517,grad_norm: 0.8511127927699741, iteration: 339950
loss: 0.9922242164611816,grad_norm: 0.7271910004543046, iteration: 339951
loss: 0.9932210445404053,grad_norm: 0.9098643879127455, iteration: 339952
loss: 0.9985925555229187,grad_norm: 0.9070069172781667, iteration: 339953
loss: 0.9918537735939026,grad_norm: 0.8254973594510036, iteration: 339954
loss: 0.9982010722160339,grad_norm: 0.8907347516047521, iteration: 339955
loss: 0.9910937547683716,grad_norm: 0.8620411216594557, iteration: 339956
loss: 1.0207180976867676,grad_norm: 0.9168183000651371, iteration: 339957
loss: 1.038711667060852,grad_norm: 0.9999999639069663, iteration: 339958
loss: 0.9667471647262573,grad_norm: 0.7810050087772249, iteration: 339959
loss: 0.9999793171882629,grad_norm: 0.8275159528017544, iteration: 339960
loss: 0.9947479367256165,grad_norm: 0.669960467716125, iteration: 339961
loss: 0.9914628863334656,grad_norm: 0.6957146640246524, iteration: 339962
loss: 1.0390617847442627,grad_norm: 0.9716384661579481, iteration: 339963
loss: 0.9783644676208496,grad_norm: 0.8827968627456718, iteration: 339964
loss: 1.0965425968170166,grad_norm: 0.9999999306792531, iteration: 339965
loss: 1.0035947561264038,grad_norm: 0.9098064842385761, iteration: 339966
loss: 0.9855543375015259,grad_norm: 0.801909989387492, iteration: 339967
loss: 0.9892868995666504,grad_norm: 0.9999991915822006, iteration: 339968
loss: 0.9937704205513,grad_norm: 0.9999992032583905, iteration: 339969
loss: 1.022674560546875,grad_norm: 0.8752815196172714, iteration: 339970
loss: 0.9899080395698547,grad_norm: 0.9923872229332856, iteration: 339971
loss: 1.032909870147705,grad_norm: 0.8117951075622319, iteration: 339972
loss: 0.9986798167228699,grad_norm: 0.8538252966836531, iteration: 339973
loss: 1.0167253017425537,grad_norm: 0.8268035476349449, iteration: 339974
loss: 1.0228312015533447,grad_norm: 0.9999999170653366, iteration: 339975
loss: 1.0181121826171875,grad_norm: 0.9205067709485228, iteration: 339976
loss: 1.0266867876052856,grad_norm: 0.8560092769568308, iteration: 339977
loss: 1.0063270330429077,grad_norm: 0.9350734718719523, iteration: 339978
loss: 1.0071332454681396,grad_norm: 0.9999990939347815, iteration: 339979
loss: 0.991810142993927,grad_norm: 0.6787294875488746, iteration: 339980
loss: 1.011932611465454,grad_norm: 0.8412245705403859, iteration: 339981
loss: 1.0218032598495483,grad_norm: 0.8094977795905373, iteration: 339982
loss: 1.0331709384918213,grad_norm: 0.9999992457700799, iteration: 339983
loss: 1.0159270763397217,grad_norm: 0.7784484762146999, iteration: 339984
loss: 0.9869444966316223,grad_norm: 0.9076948733153055, iteration: 339985
loss: 0.9638060927391052,grad_norm: 0.8395643900823065, iteration: 339986
loss: 0.9945169687271118,grad_norm: 0.9132855148907146, iteration: 339987
loss: 1.0276981592178345,grad_norm: 0.9999995860415599, iteration: 339988
loss: 1.002759575843811,grad_norm: 0.8792874410351368, iteration: 339989
loss: 0.9958895444869995,grad_norm: 0.8948145087960826, iteration: 339990
loss: 1.0058289766311646,grad_norm: 0.9999990795945796, iteration: 339991
loss: 0.9529759883880615,grad_norm: 0.8400890767606095, iteration: 339992
loss: 0.986510694026947,grad_norm: 0.9602224254856327, iteration: 339993
loss: 1.012511968612671,grad_norm: 0.9999992236088928, iteration: 339994
loss: 1.0296087265014648,grad_norm: 0.7665297107463663, iteration: 339995
loss: 1.0053595304489136,grad_norm: 0.9999991652302, iteration: 339996
loss: 0.9719874262809753,grad_norm: 0.797492408454212, iteration: 339997
loss: 0.9902228116989136,grad_norm: 0.7809481119633823, iteration: 339998
loss: 0.9371494054794312,grad_norm: 0.7906492209435905, iteration: 339999
loss: 0.992290735244751,grad_norm: 0.8328260864946633, iteration: 340000
Evaluating at step 340000
{'val': 0.9955324772745371, 'test': 1.9966234170590726}
loss: 0.9974703192710876,grad_norm: 0.9116735114470489, iteration: 340001
loss: 1.1649482250213623,grad_norm: 0.9999994842510699, iteration: 340002
loss: 0.9975994825363159,grad_norm: 0.9999995490069633, iteration: 340003
loss: 0.9955703020095825,grad_norm: 0.9999991499259977, iteration: 340004
loss: 1.0486148595809937,grad_norm: 0.9999999513388538, iteration: 340005
loss: 1.0041643381118774,grad_norm: 0.832388526251254, iteration: 340006
loss: 0.978620707988739,grad_norm: 0.8174474925024038, iteration: 340007
loss: 0.9715023040771484,grad_norm: 0.9589413470488395, iteration: 340008
loss: 0.9845765233039856,grad_norm: 0.854309930027729, iteration: 340009
loss: 0.9932801127433777,grad_norm: 0.7634908120122083, iteration: 340010
loss: 0.9817832708358765,grad_norm: 0.7186744916301407, iteration: 340011
loss: 0.9968461990356445,grad_norm: 0.7676509492405821, iteration: 340012
loss: 1.0189672708511353,grad_norm: 0.8620390625125806, iteration: 340013
loss: 1.0127774477005005,grad_norm: 0.6644493025616818, iteration: 340014
loss: 1.0150268077850342,grad_norm: 0.999999048426795, iteration: 340015
loss: 1.094648003578186,grad_norm: 0.9999997884768757, iteration: 340016
loss: 0.9931648373603821,grad_norm: 0.7225668788094116, iteration: 340017
loss: 0.9984328746795654,grad_norm: 0.7653148503637135, iteration: 340018
loss: 1.0494862794876099,grad_norm: 0.9999993192517584, iteration: 340019
loss: 1.0463535785675049,grad_norm: 0.9160585260270069, iteration: 340020
loss: 1.0030412673950195,grad_norm: 0.6355777614024268, iteration: 340021
loss: 0.9975789189338684,grad_norm: 0.9339227443359508, iteration: 340022
loss: 0.9768025279045105,grad_norm: 0.9494603672801613, iteration: 340023
loss: 1.0122398138046265,grad_norm: 0.677921645939388, iteration: 340024
loss: 1.1855711936950684,grad_norm: 0.9999997726718717, iteration: 340025
loss: 1.0195093154907227,grad_norm: 0.8592473347169842, iteration: 340026
loss: 1.0024447441101074,grad_norm: 0.8109410697340954, iteration: 340027
loss: 1.0061092376708984,grad_norm: 0.8836381895368473, iteration: 340028
loss: 1.0163105726242065,grad_norm: 0.7166287080674397, iteration: 340029
loss: 1.0027432441711426,grad_norm: 0.875436902718423, iteration: 340030
loss: 0.9703882932662964,grad_norm: 0.8167125743120275, iteration: 340031
loss: 1.0124566555023193,grad_norm: 0.8476712243939435, iteration: 340032
loss: 0.9910768866539001,grad_norm: 0.7383524126783346, iteration: 340033
loss: 1.0063750743865967,grad_norm: 0.8824296170978929, iteration: 340034
loss: 1.0345344543457031,grad_norm: 0.9999997530785534, iteration: 340035
loss: 1.0033535957336426,grad_norm: 0.6642040754263395, iteration: 340036
loss: 0.985434353351593,grad_norm: 0.6935910036211167, iteration: 340037
loss: 1.0026925802230835,grad_norm: 0.9201732260342784, iteration: 340038
loss: 0.9786827564239502,grad_norm: 0.7784627626500106, iteration: 340039
loss: 0.9988954067230225,grad_norm: 0.751873325794654, iteration: 340040
loss: 0.9752824902534485,grad_norm: 0.8627764717506319, iteration: 340041
loss: 1.0010377168655396,grad_norm: 0.6792634897894525, iteration: 340042
loss: 0.9822399616241455,grad_norm: 0.9018329528557495, iteration: 340043
loss: 0.9815998077392578,grad_norm: 0.8497006924910007, iteration: 340044
loss: 0.9812991619110107,grad_norm: 0.8146768847583218, iteration: 340045
loss: 1.0126217603683472,grad_norm: 0.7276707207520536, iteration: 340046
loss: 1.0103552341461182,grad_norm: 0.9999994093730025, iteration: 340047
loss: 1.0445626974105835,grad_norm: 0.9024173764012202, iteration: 340048
loss: 0.981779932975769,grad_norm: 0.7393272084701433, iteration: 340049
loss: 1.014349102973938,grad_norm: 0.9999998861083049, iteration: 340050
loss: 0.9701150059700012,grad_norm: 0.9159778074718399, iteration: 340051
loss: 1.0176265239715576,grad_norm: 0.8255939941443058, iteration: 340052
loss: 1.0212761163711548,grad_norm: 0.9999999243084199, iteration: 340053
loss: 1.0692588090896606,grad_norm: 0.9999994136109966, iteration: 340054
loss: 1.031111240386963,grad_norm: 0.779716847697388, iteration: 340055
loss: 1.1135382652282715,grad_norm: 0.9689715266342701, iteration: 340056
loss: 1.0221949815750122,grad_norm: 0.9134501506722221, iteration: 340057
loss: 1.008446216583252,grad_norm: 0.7914780780544518, iteration: 340058
loss: 0.9737308621406555,grad_norm: 0.9839676930799394, iteration: 340059
loss: 0.9872135519981384,grad_norm: 0.8660067331156437, iteration: 340060
loss: 1.0244210958480835,grad_norm: 0.6590433632283899, iteration: 340061
loss: 1.0147947072982788,grad_norm: 0.8996181554466314, iteration: 340062
loss: 1.003501534461975,grad_norm: 0.9251763233892127, iteration: 340063
loss: 1.0357918739318848,grad_norm: 0.8726996245659556, iteration: 340064
loss: 0.998956561088562,grad_norm: 0.9901199079386249, iteration: 340065
loss: 0.9631507396697998,grad_norm: 0.8878341271891383, iteration: 340066
loss: 0.9882193803787231,grad_norm: 0.7076653094587894, iteration: 340067
loss: 0.9979037642478943,grad_norm: 0.8675339196876992, iteration: 340068
loss: 1.031399130821228,grad_norm: 0.9999996647987844, iteration: 340069
loss: 1.0144191980361938,grad_norm: 0.7945162391325234, iteration: 340070
loss: 1.0043342113494873,grad_norm: 0.9649569656067125, iteration: 340071
loss: 0.9850291609764099,grad_norm: 0.8624751357791195, iteration: 340072
loss: 0.9899435639381409,grad_norm: 0.8250300925122529, iteration: 340073
loss: 0.9767081141471863,grad_norm: 0.953528929265037, iteration: 340074
loss: 1.0521514415740967,grad_norm: 0.9999993008392669, iteration: 340075
loss: 1.0032356977462769,grad_norm: 0.8389225320301456, iteration: 340076
loss: 1.0102763175964355,grad_norm: 0.9408322568906725, iteration: 340077
loss: 0.9529801607131958,grad_norm: 0.8935951406773373, iteration: 340078
loss: 0.9775997996330261,grad_norm: 0.7751230728559176, iteration: 340079
loss: 1.0167593955993652,grad_norm: 0.7223405009642413, iteration: 340080
loss: 0.996305525302887,grad_norm: 0.7713109833711151, iteration: 340081
loss: 1.006704568862915,grad_norm: 0.982677668681607, iteration: 340082
loss: 1.004763126373291,grad_norm: 0.9999991349820481, iteration: 340083
loss: 1.003127098083496,grad_norm: 0.7393402808583094, iteration: 340084
loss: 0.9923617839813232,grad_norm: 0.7806522358148057, iteration: 340085
loss: 0.9904599785804749,grad_norm: 0.8491205925312848, iteration: 340086
loss: 1.0350520610809326,grad_norm: 0.9999991197477417, iteration: 340087
loss: 1.017170786857605,grad_norm: 0.9107048162052978, iteration: 340088
loss: 1.0125088691711426,grad_norm: 0.8124152977079567, iteration: 340089
loss: 1.0227807760238647,grad_norm: 0.9390472321358754, iteration: 340090
loss: 0.9986962080001831,grad_norm: 0.8386359936434692, iteration: 340091
loss: 0.9807666540145874,grad_norm: 0.7458064007460757, iteration: 340092
loss: 0.9960671663284302,grad_norm: 0.6925811530489377, iteration: 340093
loss: 1.017376184463501,grad_norm: 0.7597461506331981, iteration: 340094
loss: 1.0005539655685425,grad_norm: 0.9459365286786272, iteration: 340095
loss: 0.9955419301986694,grad_norm: 0.943345211136753, iteration: 340096
loss: 1.0033656358718872,grad_norm: 0.8079237664162604, iteration: 340097
loss: 1.021342396736145,grad_norm: 0.8223751031875466, iteration: 340098
loss: 1.0219488143920898,grad_norm: 0.9999996115912475, iteration: 340099
loss: 1.0114349126815796,grad_norm: 0.8962257843859972, iteration: 340100
loss: 0.9851171970367432,grad_norm: 0.9999991351225389, iteration: 340101
loss: 1.0093939304351807,grad_norm: 0.8540173574823309, iteration: 340102
loss: 0.9942280650138855,grad_norm: 0.7960334518240865, iteration: 340103
loss: 0.9680764675140381,grad_norm: 0.8181418408225712, iteration: 340104
loss: 0.9852694869041443,grad_norm: 0.8603968100448733, iteration: 340105
loss: 1.0249781608581543,grad_norm: 0.9783564063072006, iteration: 340106
loss: 0.9656389355659485,grad_norm: 0.8696049482766544, iteration: 340107
loss: 0.9975817203521729,grad_norm: 0.8723059548249583, iteration: 340108
loss: 1.0308728218078613,grad_norm: 0.9999996656303013, iteration: 340109
loss: 0.9499644637107849,grad_norm: 0.7871678024778844, iteration: 340110
loss: 0.9758482575416565,grad_norm: 0.7820267172285947, iteration: 340111
loss: 0.9493336081504822,grad_norm: 0.7967160842187535, iteration: 340112
loss: 0.9824679493904114,grad_norm: 0.8148385768689569, iteration: 340113
loss: 0.997682511806488,grad_norm: 0.7099780858188507, iteration: 340114
loss: 0.9606397747993469,grad_norm: 0.7826980071486712, iteration: 340115
loss: 0.9824600219726562,grad_norm: 0.7624084705055159, iteration: 340116
loss: 1.002650260925293,grad_norm: 0.832934078738947, iteration: 340117
loss: 0.9746423959732056,grad_norm: 0.9069751459182764, iteration: 340118
loss: 0.9932472705841064,grad_norm: 0.8144082171154365, iteration: 340119
loss: 0.9640485644340515,grad_norm: 0.856403861523754, iteration: 340120
loss: 1.0113930702209473,grad_norm: 0.7952028279177462, iteration: 340121
loss: 0.9942643046379089,grad_norm: 0.760945736334987, iteration: 340122
loss: 0.9908947944641113,grad_norm: 0.8719533694395595, iteration: 340123
loss: 1.015618920326233,grad_norm: 0.7272843735850459, iteration: 340124
loss: 0.9986605644226074,grad_norm: 0.796050535697412, iteration: 340125
loss: 1.0038827657699585,grad_norm: 0.9999989840677862, iteration: 340126
loss: 1.0008821487426758,grad_norm: 0.7016872277058747, iteration: 340127
loss: 0.9995773434638977,grad_norm: 0.9545236508651308, iteration: 340128
loss: 0.947729229927063,grad_norm: 0.8200683267783562, iteration: 340129
loss: 1.0167534351348877,grad_norm: 0.9831973349163062, iteration: 340130
loss: 0.9611438512802124,grad_norm: 0.7717716906378606, iteration: 340131
loss: 1.0322660207748413,grad_norm: 0.8620332005927502, iteration: 340132
loss: 0.9915454983711243,grad_norm: 0.9999998918144101, iteration: 340133
loss: 0.9813738465309143,grad_norm: 0.8034914728160576, iteration: 340134
loss: 1.0193356275558472,grad_norm: 0.8127350829327058, iteration: 340135
loss: 0.9862340688705444,grad_norm: 0.9586684569902613, iteration: 340136
loss: 1.1066222190856934,grad_norm: 0.9999989671768706, iteration: 340137
loss: 0.9736185669898987,grad_norm: 0.9337251864305024, iteration: 340138
loss: 1.0276546478271484,grad_norm: 0.8799546375280064, iteration: 340139
loss: 1.0020151138305664,grad_norm: 0.6885350218597505, iteration: 340140
loss: 0.9621335864067078,grad_norm: 0.9468548017126343, iteration: 340141
loss: 0.9904960989952087,grad_norm: 0.7126474004267704, iteration: 340142
loss: 1.016384243965149,grad_norm: 0.9999992773325291, iteration: 340143
loss: 0.9974421858787537,grad_norm: 0.7189740063594556, iteration: 340144
loss: 1.001325249671936,grad_norm: 0.9999992805558806, iteration: 340145
loss: 1.0028072595596313,grad_norm: 0.9026768810385547, iteration: 340146
loss: 1.0029293298721313,grad_norm: 0.6940724905444117, iteration: 340147
loss: 1.0213940143585205,grad_norm: 0.7062739339500802, iteration: 340148
loss: 0.980344295501709,grad_norm: 0.8582254529914626, iteration: 340149
loss: 0.9636510014533997,grad_norm: 0.7858594690895507, iteration: 340150
loss: 0.9970749616622925,grad_norm: 0.7690869956413561, iteration: 340151
loss: 0.9730775356292725,grad_norm: 0.8531534392363009, iteration: 340152
loss: 1.0061036348342896,grad_norm: 0.7993095855106647, iteration: 340153
loss: 1.007006287574768,grad_norm: 0.7590219927372951, iteration: 340154
loss: 0.9498720169067383,grad_norm: 0.7821258475625894, iteration: 340155
loss: 1.0117336511611938,grad_norm: 0.845277483313183, iteration: 340156
loss: 0.9650479555130005,grad_norm: 0.9035670783874594, iteration: 340157
loss: 1.0261753797531128,grad_norm: 0.7823204701553935, iteration: 340158
loss: 1.0200127363204956,grad_norm: 0.8110614559752447, iteration: 340159
loss: 0.9813506007194519,grad_norm: 0.845446886054824, iteration: 340160
loss: 0.99269038438797,grad_norm: 0.9999991328006721, iteration: 340161
loss: 1.0273219347000122,grad_norm: 0.6797056502210874, iteration: 340162
loss: 1.0467016696929932,grad_norm: 0.8740536167981074, iteration: 340163
loss: 0.99174565076828,grad_norm: 0.7696753680142039, iteration: 340164
loss: 0.9907104969024658,grad_norm: 0.9063531248031321, iteration: 340165
loss: 0.990997314453125,grad_norm: 0.9999990748099892, iteration: 340166
loss: 0.9844837784767151,grad_norm: 0.9161813339180646, iteration: 340167
loss: 1.0047680139541626,grad_norm: 0.8254510421018413, iteration: 340168
loss: 1.0248740911483765,grad_norm: 0.8046158125562879, iteration: 340169
loss: 1.0132280588150024,grad_norm: 0.7722734382505033, iteration: 340170
loss: 1.0205014944076538,grad_norm: 0.8139028881391287, iteration: 340171
loss: 0.9739876389503479,grad_norm: 0.7918207591041919, iteration: 340172
loss: 0.9960953593254089,grad_norm: 0.8639704296824512, iteration: 340173
loss: 0.9825153350830078,grad_norm: 0.8306741168114239, iteration: 340174
loss: 0.9914419651031494,grad_norm: 0.8323112134033771, iteration: 340175
loss: 0.9963117241859436,grad_norm: 0.9989926374812278, iteration: 340176
loss: 0.9620137214660645,grad_norm: 0.7709643345245645, iteration: 340177
loss: 1.0256938934326172,grad_norm: 0.9342587717925223, iteration: 340178
loss: 0.9742043018341064,grad_norm: 0.9999991009065148, iteration: 340179
loss: 1.0349057912826538,grad_norm: 0.9721256324302855, iteration: 340180
loss: 1.0101633071899414,grad_norm: 0.9665662014339702, iteration: 340181
loss: 1.0501182079315186,grad_norm: 0.9999991028245518, iteration: 340182
loss: 0.99638831615448,grad_norm: 0.6885351510133367, iteration: 340183
loss: 0.9986145496368408,grad_norm: 0.8515115132496306, iteration: 340184
loss: 0.9920364618301392,grad_norm: 0.9902059518272767, iteration: 340185
loss: 0.9756193161010742,grad_norm: 0.7745691793838965, iteration: 340186
loss: 1.0114734172821045,grad_norm: 0.9299192145663449, iteration: 340187
loss: 0.9885894060134888,grad_norm: 0.9999991282723141, iteration: 340188
loss: 1.0146428346633911,grad_norm: 0.9999998861915916, iteration: 340189
loss: 1.0180490016937256,grad_norm: 0.8642721272107026, iteration: 340190
loss: 1.031903862953186,grad_norm: 0.8290966555042357, iteration: 340191
loss: 1.020272135734558,grad_norm: 0.9999992131607573, iteration: 340192
loss: 0.9994893074035645,grad_norm: 0.7400028611662554, iteration: 340193
loss: 1.0078634023666382,grad_norm: 0.8250255914768446, iteration: 340194
loss: 0.9804276823997498,grad_norm: 0.7337943443961099, iteration: 340195
loss: 0.9896971583366394,grad_norm: 0.7602292534107604, iteration: 340196
loss: 0.9830134510993958,grad_norm: 0.7849473922873152, iteration: 340197
loss: 1.01619291305542,grad_norm: 0.9999990020930957, iteration: 340198
loss: 0.9816873669624329,grad_norm: 0.7368757699424152, iteration: 340199
loss: 1.0184932947158813,grad_norm: 0.8597531350759684, iteration: 340200
loss: 0.9906534552574158,grad_norm: 0.8738388466754149, iteration: 340201
loss: 1.0159711837768555,grad_norm: 0.9999990820880311, iteration: 340202
loss: 0.99506014585495,grad_norm: 0.7849797487207976, iteration: 340203
loss: 1.0064047574996948,grad_norm: 0.7763216426491684, iteration: 340204
loss: 1.0273001194000244,grad_norm: 0.8708958871106148, iteration: 340205
loss: 1.010036587715149,grad_norm: 0.7530657530259453, iteration: 340206
loss: 1.003463625907898,grad_norm: 0.9999989772754697, iteration: 340207
loss: 1.0065540075302124,grad_norm: 0.9573297970498365, iteration: 340208
loss: 1.0096360445022583,grad_norm: 0.8236036134725357, iteration: 340209
loss: 0.9913815259933472,grad_norm: 0.7875603485875983, iteration: 340210
loss: 1.0228962898254395,grad_norm: 0.9302165271943176, iteration: 340211
loss: 0.989645779132843,grad_norm: 0.8924479912562777, iteration: 340212
loss: 0.985279381275177,grad_norm: 0.8892059925635719, iteration: 340213
loss: 1.026597261428833,grad_norm: 0.8588875062657813, iteration: 340214
loss: 1.0177010297775269,grad_norm: 0.6827379854071619, iteration: 340215
loss: 1.0290675163269043,grad_norm: 0.7178334913042534, iteration: 340216
loss: 0.9858829379081726,grad_norm: 0.8437936077399232, iteration: 340217
loss: 1.013811469078064,grad_norm: 0.9413856167981763, iteration: 340218
loss: 0.9973011016845703,grad_norm: 0.8927591152342668, iteration: 340219
loss: 1.0111322402954102,grad_norm: 0.8081595645506059, iteration: 340220
loss: 1.0300147533416748,grad_norm: 0.8897403731233596, iteration: 340221
loss: 0.9887188076972961,grad_norm: 0.8195838229634341, iteration: 340222
loss: 0.9687473773956299,grad_norm: 0.8565220762401152, iteration: 340223
loss: 0.9952647686004639,grad_norm: 0.7415250753948519, iteration: 340224
loss: 0.9965111613273621,grad_norm: 0.7201147234597434, iteration: 340225
loss: 1.0008081197738647,grad_norm: 0.652218515456648, iteration: 340226
loss: 1.0117493867874146,grad_norm: 0.9999994134025387, iteration: 340227
loss: 1.0074747800827026,grad_norm: 0.9646909977450152, iteration: 340228
loss: 1.001067042350769,grad_norm: 0.9173048599494535, iteration: 340229
loss: 1.0058445930480957,grad_norm: 0.8209680481178923, iteration: 340230
loss: 0.995975136756897,grad_norm: 0.7443296337210455, iteration: 340231
loss: 0.978460431098938,grad_norm: 0.8047578599610228, iteration: 340232
loss: 0.9777480959892273,grad_norm: 0.7418769451717437, iteration: 340233
loss: 0.9828401803970337,grad_norm: 0.689730571600938, iteration: 340234
loss: 1.0046427249908447,grad_norm: 0.9656109731892949, iteration: 340235
loss: 1.0088642835617065,grad_norm: 0.8288050350989159, iteration: 340236
loss: 1.037177562713623,grad_norm: 0.7892625338129109, iteration: 340237
loss: 1.007736325263977,grad_norm: 0.7924616444956233, iteration: 340238
loss: 1.020845890045166,grad_norm: 0.7508022160737293, iteration: 340239
loss: 0.9825760126113892,grad_norm: 0.742793389744455, iteration: 340240
loss: 1.0073860883712769,grad_norm: 0.8678498966814538, iteration: 340241
loss: 1.0447360277175903,grad_norm: 0.9999994398965045, iteration: 340242
loss: 0.9850414395332336,grad_norm: 0.7430132062821024, iteration: 340243
loss: 0.9860138297080994,grad_norm: 0.873457433709899, iteration: 340244
loss: 1.0155153274536133,grad_norm: 0.8144530693119986, iteration: 340245
loss: 0.9807964563369751,grad_norm: 0.9028481028383589, iteration: 340246
loss: 0.986286997795105,grad_norm: 0.9012968700275421, iteration: 340247
loss: 1.0150970220565796,grad_norm: 0.7051489952526663, iteration: 340248
loss: 1.0254721641540527,grad_norm: 0.7241467566857892, iteration: 340249
loss: 0.9947147369384766,grad_norm: 0.9999990631158471, iteration: 340250
loss: 1.0002492666244507,grad_norm: 0.8724394919178149, iteration: 340251
loss: 0.9997090101242065,grad_norm: 0.9999990894970476, iteration: 340252
loss: 0.968816339969635,grad_norm: 0.7806636170663849, iteration: 340253
loss: 1.0008755922317505,grad_norm: 0.7509762660135713, iteration: 340254
loss: 1.0064224004745483,grad_norm: 0.834153640948068, iteration: 340255
loss: 0.9708515405654907,grad_norm: 0.7435802899918146, iteration: 340256
loss: 1.0067713260650635,grad_norm: 0.9999988637063342, iteration: 340257
loss: 0.9759962558746338,grad_norm: 0.7860081732807409, iteration: 340258
loss: 0.986771821975708,grad_norm: 0.7843108011956248, iteration: 340259
loss: 1.0019663572311401,grad_norm: 0.99999969309618, iteration: 340260
loss: 1.0048471689224243,grad_norm: 0.7685726675774248, iteration: 340261
loss: 1.0230926275253296,grad_norm: 0.9999991163827706, iteration: 340262
loss: 0.9903920888900757,grad_norm: 0.7322371108864093, iteration: 340263
loss: 0.9593967795372009,grad_norm: 0.7318933177351494, iteration: 340264
loss: 1.0448697805404663,grad_norm: 0.9464145842223703, iteration: 340265
loss: 0.985181987285614,grad_norm: 0.9999989913162791, iteration: 340266
loss: 0.9652629494667053,grad_norm: 0.966469593904398, iteration: 340267
loss: 0.9878160357475281,grad_norm: 0.8512395722956575, iteration: 340268
loss: 0.9859517812728882,grad_norm: 0.6657784800759642, iteration: 340269
loss: 1.0084868669509888,grad_norm: 0.910500217491564, iteration: 340270
loss: 1.0057674646377563,grad_norm: 0.979200645556947, iteration: 340271
loss: 0.9964125752449036,grad_norm: 0.8323112197952098, iteration: 340272
loss: 1.0201914310455322,grad_norm: 0.8421973087748204, iteration: 340273
loss: 0.9793748259544373,grad_norm: 0.7432564828218008, iteration: 340274
loss: 1.0199607610702515,grad_norm: 0.8476780866036954, iteration: 340275
loss: 0.9958913922309875,grad_norm: 0.9675359072185693, iteration: 340276
loss: 0.9948440790176392,grad_norm: 0.885498350518974, iteration: 340277
loss: 0.9951561689376831,grad_norm: 0.7576467545341639, iteration: 340278
loss: 0.9971269965171814,grad_norm: 0.9999990457964378, iteration: 340279
loss: 1.0324753522872925,grad_norm: 1.0000000265928073, iteration: 340280
loss: 1.0325044393539429,grad_norm: 0.7177096213310267, iteration: 340281
loss: 1.016184687614441,grad_norm: 0.9865919496081683, iteration: 340282
loss: 1.023543357849121,grad_norm: 0.9999993890505512, iteration: 340283
loss: 1.0038679838180542,grad_norm: 0.9999989448874472, iteration: 340284
loss: 1.019223928451538,grad_norm: 0.9999997621907134, iteration: 340285
loss: 0.9621213674545288,grad_norm: 0.9369569419597777, iteration: 340286
loss: 0.994069516658783,grad_norm: 0.7463680069300895, iteration: 340287
loss: 0.9878671765327454,grad_norm: 0.732603404190871, iteration: 340288
loss: 0.9911905527114868,grad_norm: 0.8748283427724176, iteration: 340289
loss: 0.9967532753944397,grad_norm: 0.760728191063058, iteration: 340290
loss: 0.9873339533805847,grad_norm: 0.8269557745553273, iteration: 340291
loss: 1.025316596031189,grad_norm: 0.897426383801499, iteration: 340292
loss: 1.0056058168411255,grad_norm: 0.7422265920833381, iteration: 340293
loss: 1.0171979665756226,grad_norm: 0.7490613320776492, iteration: 340294
loss: 1.0083986520767212,grad_norm: 0.778263358904796, iteration: 340295
loss: 0.9810865521430969,grad_norm: 0.7519691343358679, iteration: 340296
loss: 1.0163991451263428,grad_norm: 0.7502715368235985, iteration: 340297
loss: 1.0121396780014038,grad_norm: 0.9390285617156808, iteration: 340298
loss: 1.0098240375518799,grad_norm: 0.8545143146595395, iteration: 340299
loss: 1.016135573387146,grad_norm: 0.8532171054871969, iteration: 340300
loss: 1.004908561706543,grad_norm: 0.9999993147161348, iteration: 340301
loss: 1.0491890907287598,grad_norm: 0.7960052447066204, iteration: 340302
loss: 1.0123134851455688,grad_norm: 0.9432034812407666, iteration: 340303
loss: 0.9992154240608215,grad_norm: 0.7761645798018576, iteration: 340304
loss: 0.9937724471092224,grad_norm: 0.7415289058010059, iteration: 340305
loss: 1.0050047636032104,grad_norm: 0.8916233281440268, iteration: 340306
loss: 1.0177940130233765,grad_norm: 0.9999995422724061, iteration: 340307
loss: 1.002461552619934,grad_norm: 0.7739370492627736, iteration: 340308
loss: 0.996769368648529,grad_norm: 0.815276592132417, iteration: 340309
loss: 0.9984825849533081,grad_norm: 0.7895040222806342, iteration: 340310
loss: 1.0094342231750488,grad_norm: 0.7649315201875463, iteration: 340311
loss: 0.9728152751922607,grad_norm: 0.8969884055105215, iteration: 340312
loss: 0.9758528470993042,grad_norm: 0.8676206018155108, iteration: 340313
loss: 1.0288066864013672,grad_norm: 0.9999991150824995, iteration: 340314
loss: 0.9813396334648132,grad_norm: 0.9999990195910001, iteration: 340315
loss: 1.040761113166809,grad_norm: 0.710643634308487, iteration: 340316
loss: 0.9680970311164856,grad_norm: 0.8175789654010627, iteration: 340317
loss: 1.00074303150177,grad_norm: 0.7014061217377843, iteration: 340318
loss: 0.9891713857650757,grad_norm: 0.7223335447403838, iteration: 340319
loss: 1.0003533363342285,grad_norm: 0.8313159209610236, iteration: 340320
loss: 0.9869265556335449,grad_norm: 0.7518559440723833, iteration: 340321
loss: 0.9831120371818542,grad_norm: 0.8399376846228163, iteration: 340322
loss: 1.0113276243209839,grad_norm: 0.7647113014297499, iteration: 340323
loss: 0.9896782636642456,grad_norm: 0.7979098321835548, iteration: 340324
loss: 1.0376944541931152,grad_norm: 0.903082817899846, iteration: 340325
loss: 1.0280567407608032,grad_norm: 0.9999991068355724, iteration: 340326
loss: 0.9353449940681458,grad_norm: 0.8095131677742763, iteration: 340327
loss: 0.9887744784355164,grad_norm: 0.7319585342698867, iteration: 340328
loss: 1.0266114473342896,grad_norm: 0.9080688256540391, iteration: 340329
loss: 1.0224169492721558,grad_norm: 0.9038256517750562, iteration: 340330
loss: 0.9611608982086182,grad_norm: 0.7460183284574764, iteration: 340331
loss: 0.9950345754623413,grad_norm: 0.8606884893101986, iteration: 340332
loss: 1.000197172164917,grad_norm: 0.8172454703698631, iteration: 340333
loss: 1.0147955417633057,grad_norm: 0.8151547083173015, iteration: 340334
loss: 0.9982753992080688,grad_norm: 0.9112252891630657, iteration: 340335
loss: 1.0130109786987305,grad_norm: 0.7634745911072882, iteration: 340336
loss: 1.0046982765197754,grad_norm: 0.7710998981150462, iteration: 340337
loss: 0.9678120613098145,grad_norm: 0.9251473505019697, iteration: 340338
loss: 0.9902581572532654,grad_norm: 0.7333980453143509, iteration: 340339
loss: 0.9919070601463318,grad_norm: 0.7431888044637377, iteration: 340340
loss: 0.9913448095321655,grad_norm: 0.7058315909239284, iteration: 340341
loss: 0.9467480778694153,grad_norm: 0.8955270177693528, iteration: 340342
loss: 1.0687382221221924,grad_norm: 0.9999991754529249, iteration: 340343
loss: 1.0485363006591797,grad_norm: 0.9006124746855841, iteration: 340344
loss: 1.0054688453674316,grad_norm: 0.8805891630400178, iteration: 340345
loss: 0.9472907185554504,grad_norm: 0.6959396515579711, iteration: 340346
loss: 1.0011001825332642,grad_norm: 0.8965266413343108, iteration: 340347
loss: 1.023572325706482,grad_norm: 0.7362640138259722, iteration: 340348
loss: 0.9672759771347046,grad_norm: 0.9358109492870944, iteration: 340349
loss: 0.9771854877471924,grad_norm: 0.8310506248488542, iteration: 340350
loss: 0.9661298990249634,grad_norm: 0.8615455208367764, iteration: 340351
loss: 0.9498826861381531,grad_norm: 0.999999051371496, iteration: 340352
loss: 0.9979885816574097,grad_norm: 0.8349602513007447, iteration: 340353
loss: 0.989210307598114,grad_norm: 0.7527391316215565, iteration: 340354
loss: 0.9753406643867493,grad_norm: 0.7971994639501483, iteration: 340355
loss: 0.9940786361694336,grad_norm: 0.8870288203937059, iteration: 340356
loss: 0.9869015216827393,grad_norm: 0.9236020841513847, iteration: 340357
loss: 1.0057379007339478,grad_norm: 0.8172380019277117, iteration: 340358
loss: 1.0007004737854004,grad_norm: 0.7615302735896536, iteration: 340359
loss: 0.9279379844665527,grad_norm: 0.9418046011943877, iteration: 340360
loss: 1.0102440118789673,grad_norm: 0.7877769157653838, iteration: 340361
loss: 1.026358962059021,grad_norm: 0.8540398504899606, iteration: 340362
loss: 1.0207730531692505,grad_norm: 0.9286989442325861, iteration: 340363
loss: 1.0019251108169556,grad_norm: 0.8924034968188326, iteration: 340364
loss: 0.9907539486885071,grad_norm: 0.9057476440602342, iteration: 340365
loss: 1.0745298862457275,grad_norm: 0.7399302753887927, iteration: 340366
loss: 1.0056419372558594,grad_norm: 0.8416781592868178, iteration: 340367
loss: 0.9964856505393982,grad_norm: 0.7380536080959429, iteration: 340368
loss: 1.0179263353347778,grad_norm: 0.7070830928677199, iteration: 340369
loss: 1.0123423337936401,grad_norm: 0.9389415641929608, iteration: 340370
loss: 1.0027492046356201,grad_norm: 0.8901746609375055, iteration: 340371
loss: 0.9968224167823792,grad_norm: 0.7571084511667727, iteration: 340372
loss: 0.9832035303115845,grad_norm: 0.7136322389353642, iteration: 340373
loss: 0.9798808693885803,grad_norm: 0.8104682166211608, iteration: 340374
loss: 0.974176824092865,grad_norm: 0.8703105603091148, iteration: 340375
loss: 0.9607192873954773,grad_norm: 0.8216523912423737, iteration: 340376
loss: 1.006531834602356,grad_norm: 0.999999087950961, iteration: 340377
loss: 1.0040558576583862,grad_norm: 0.7198458290240769, iteration: 340378
loss: 1.0077266693115234,grad_norm: 0.7843858076391571, iteration: 340379
loss: 0.9907938838005066,grad_norm: 0.8008589591514903, iteration: 340380
loss: 0.9955616593360901,grad_norm: 0.8875320733035149, iteration: 340381
loss: 0.9776395559310913,grad_norm: 0.931104759462178, iteration: 340382
loss: 0.9897713661193848,grad_norm: 0.9999997057405267, iteration: 340383
loss: 0.9970806241035461,grad_norm: 0.8530948796570245, iteration: 340384
loss: 1.0156868696212769,grad_norm: 0.9999991229893817, iteration: 340385
loss: 0.9681146740913391,grad_norm: 0.8859361782765726, iteration: 340386
loss: 0.9876089096069336,grad_norm: 0.8628779470355864, iteration: 340387
loss: 0.9720994234085083,grad_norm: 0.821023280084299, iteration: 340388
loss: 1.0085794925689697,grad_norm: 0.8702290182433158, iteration: 340389
loss: 1.0020463466644287,grad_norm: 0.8589470592865633, iteration: 340390
loss: 1.0220764875411987,grad_norm: 0.9999990407864462, iteration: 340391
loss: 0.9864103198051453,grad_norm: 0.8809653991863611, iteration: 340392
loss: 0.9697220325469971,grad_norm: 0.7514156822186892, iteration: 340393
loss: 0.9800446629524231,grad_norm: 0.7741544896701141, iteration: 340394
loss: 1.029837965965271,grad_norm: 0.7437120819826853, iteration: 340395
loss: 0.9928329586982727,grad_norm: 0.8279089713878443, iteration: 340396
loss: 1.0107498168945312,grad_norm: 0.7256478431093316, iteration: 340397
loss: 0.9771784543991089,grad_norm: 0.9169295483113653, iteration: 340398
loss: 0.9985989332199097,grad_norm: 0.9999991244651462, iteration: 340399
loss: 0.9949113726615906,grad_norm: 0.931818762276376, iteration: 340400
loss: 0.9897365570068359,grad_norm: 0.8151656112415855, iteration: 340401
loss: 0.9725922346115112,grad_norm: 0.7816015173043932, iteration: 340402
loss: 0.9860942363739014,grad_norm: 0.8254347425568073, iteration: 340403
loss: 0.9693614840507507,grad_norm: 0.6843227292961958, iteration: 340404
loss: 1.0014228820800781,grad_norm: 0.8624557132734637, iteration: 340405
loss: 1.0545941591262817,grad_norm: 0.9999998776383523, iteration: 340406
loss: 1.0058023929595947,grad_norm: 0.8529717990532821, iteration: 340407
loss: 0.9448503255844116,grad_norm: 0.7642368510343599, iteration: 340408
loss: 1.0104029178619385,grad_norm: 0.8652225201370315, iteration: 340409
loss: 1.0152347087860107,grad_norm: 0.8245166822307614, iteration: 340410
loss: 1.0110796689987183,grad_norm: 0.7451882646174549, iteration: 340411
loss: 0.9898489713668823,grad_norm: 0.9458645071588206, iteration: 340412
loss: 1.0066460371017456,grad_norm: 0.7409536516198092, iteration: 340413
loss: 0.9981535077095032,grad_norm: 0.8789391028592091, iteration: 340414
loss: 1.059342384338379,grad_norm: 0.8770120107767606, iteration: 340415
loss: 0.9731068015098572,grad_norm: 0.8216003521816777, iteration: 340416
loss: 0.9879211783409119,grad_norm: 0.9482836906479284, iteration: 340417
loss: 1.0124256610870361,grad_norm: 0.9999992809048834, iteration: 340418
loss: 1.0013538599014282,grad_norm: 0.8891844045655419, iteration: 340419
loss: 0.999699592590332,grad_norm: 0.9422495792642676, iteration: 340420
loss: 0.9832557439804077,grad_norm: 0.8056434607663822, iteration: 340421
loss: 1.0038201808929443,grad_norm: 0.8856957125307431, iteration: 340422
loss: 0.9625987410545349,grad_norm: 0.7470251286991192, iteration: 340423
loss: 1.0051584243774414,grad_norm: 0.822310359250906, iteration: 340424
loss: 1.0433717966079712,grad_norm: 0.9999992470889038, iteration: 340425
loss: 1.1770111322402954,grad_norm: 0.9999991202301972, iteration: 340426
loss: 0.9843054413795471,grad_norm: 0.8841289099171217, iteration: 340427
loss: 0.9835829734802246,grad_norm: 0.8497238191704996, iteration: 340428
loss: 1.0314345359802246,grad_norm: 0.7982068918736428, iteration: 340429
loss: 0.9924657940864563,grad_norm: 0.8318764124372934, iteration: 340430
loss: 1.0090558528900146,grad_norm: 0.9524817218523702, iteration: 340431
loss: 0.9640941619873047,grad_norm: 0.8811808169249483, iteration: 340432
loss: 1.0279945135116577,grad_norm: 0.7267817184528043, iteration: 340433
loss: 1.0015709400177002,grad_norm: 0.8463650730251091, iteration: 340434
loss: 0.9959084391593933,grad_norm: 0.739975991087041, iteration: 340435
loss: 0.9640020728111267,grad_norm: 0.9052603819136309, iteration: 340436
loss: 0.99433434009552,grad_norm: 0.778880870192718, iteration: 340437
loss: 1.0069934129714966,grad_norm: 0.748931239526988, iteration: 340438
loss: 1.0008543729782104,grad_norm: 0.7626447737392245, iteration: 340439
loss: 0.9971641898155212,grad_norm: 0.9541387766730741, iteration: 340440
loss: 0.9958558678627014,grad_norm: 0.931663827354329, iteration: 340441
loss: 1.0616143941879272,grad_norm: 0.9999995045547813, iteration: 340442
loss: 1.0071765184402466,grad_norm: 0.7943744711708678, iteration: 340443
loss: 0.9854776859283447,grad_norm: 0.9999989783987806, iteration: 340444
loss: 1.0233426094055176,grad_norm: 0.7477367780444283, iteration: 340445
loss: 1.0449517965316772,grad_norm: 0.9999998686825932, iteration: 340446
loss: 0.9802366495132446,grad_norm: 0.7098073419894395, iteration: 340447
loss: 0.9526756405830383,grad_norm: 0.7450746718863162, iteration: 340448
loss: 1.0088087320327759,grad_norm: 0.9999992087749162, iteration: 340449
loss: 1.0014259815216064,grad_norm: 0.862052382297452, iteration: 340450
loss: 1.0109333992004395,grad_norm: 0.9694875159975075, iteration: 340451
loss: 1.033631443977356,grad_norm: 0.9407754611632789, iteration: 340452
loss: 0.9929773807525635,grad_norm: 0.9185723395200969, iteration: 340453
loss: 1.0096856355667114,grad_norm: 0.9623881200711083, iteration: 340454
loss: 1.0608186721801758,grad_norm: 0.7389808327107769, iteration: 340455
loss: 0.9990190267562866,grad_norm: 0.9999990535421829, iteration: 340456
loss: 1.0009621381759644,grad_norm: 0.7032323059437583, iteration: 340457
loss: 1.1210674047470093,grad_norm: 0.9999998312184986, iteration: 340458
loss: 0.9993330240249634,grad_norm: 0.8057263090029244, iteration: 340459
loss: 1.0028191804885864,grad_norm: 0.8805389466330542, iteration: 340460
loss: 0.9617816209793091,grad_norm: 0.7582588570418712, iteration: 340461
loss: 1.0220673084259033,grad_norm: 0.9050614352811087, iteration: 340462
loss: 0.9868471026420593,grad_norm: 0.9999992722058304, iteration: 340463
loss: 0.9936103820800781,grad_norm: 0.9762736862713896, iteration: 340464
loss: 0.9880659580230713,grad_norm: 0.9999990530911301, iteration: 340465
loss: 0.9946164488792419,grad_norm: 0.8291229661260294, iteration: 340466
loss: 1.0060768127441406,grad_norm: 0.8315995439020316, iteration: 340467
loss: 1.0193618535995483,grad_norm: 0.9999992705865655, iteration: 340468
loss: 0.9401799440383911,grad_norm: 0.9055715567613815, iteration: 340469
loss: 1.0264993906021118,grad_norm: 0.9300830932618801, iteration: 340470
loss: 1.01914644241333,grad_norm: 0.9135831662798695, iteration: 340471
loss: 0.9887164831161499,grad_norm: 0.7435167428842948, iteration: 340472
loss: 1.010381817817688,grad_norm: 0.8692403355056026, iteration: 340473
loss: 1.015122413635254,grad_norm: 0.8075997300567829, iteration: 340474
loss: 1.0232417583465576,grad_norm: 0.8392229280832401, iteration: 340475
loss: 0.9797097444534302,grad_norm: 0.9999991786294646, iteration: 340476
loss: 0.9922762513160706,grad_norm: 0.9212729838623409, iteration: 340477
loss: 0.9968590140342712,grad_norm: 0.7510908760828902, iteration: 340478
loss: 0.9992019534111023,grad_norm: 0.8987512139633774, iteration: 340479
loss: 1.0205395221710205,grad_norm: 0.8401984401973355, iteration: 340480
loss: 1.0224685668945312,grad_norm: 0.999999387613123, iteration: 340481
loss: 1.0074576139450073,grad_norm: 0.863479091052665, iteration: 340482
loss: 1.0144023895263672,grad_norm: 0.7483328386183482, iteration: 340483
loss: 0.9804671406745911,grad_norm: 0.88100029956005, iteration: 340484
loss: 0.996326744556427,grad_norm: 0.7673882966299376, iteration: 340485
loss: 1.0046440362930298,grad_norm: 0.8613550359287977, iteration: 340486
loss: 1.0888042449951172,grad_norm: 0.9999992474780465, iteration: 340487
loss: 0.9945502281188965,grad_norm: 0.8630986089363185, iteration: 340488
loss: 0.9865450859069824,grad_norm: 0.8667585936024218, iteration: 340489
loss: 0.9913274049758911,grad_norm: 0.9999994525830306, iteration: 340490
loss: 1.0203944444656372,grad_norm: 0.8196385027693428, iteration: 340491
loss: 1.0124850273132324,grad_norm: 0.9968015227392956, iteration: 340492
loss: 1.0025479793548584,grad_norm: 0.9999991966949124, iteration: 340493
loss: 0.9775288701057434,grad_norm: 0.7602447523299327, iteration: 340494
loss: 1.0048437118530273,grad_norm: 0.8230098693975673, iteration: 340495
loss: 0.994452178478241,grad_norm: 0.7569350621513469, iteration: 340496
loss: 0.9980723857879639,grad_norm: 0.9999990539462107, iteration: 340497
loss: 0.9950408935546875,grad_norm: 0.8658583556698864, iteration: 340498
loss: 1.022373080253601,grad_norm: 0.742435715239855, iteration: 340499
loss: 1.0479822158813477,grad_norm: 0.9736138350038229, iteration: 340500
loss: 0.9946829676628113,grad_norm: 0.7709057955822105, iteration: 340501
loss: 0.9726791977882385,grad_norm: 0.7855386063902351, iteration: 340502
loss: 0.9994513392448425,grad_norm: 0.8996298239478722, iteration: 340503
loss: 1.0134109258651733,grad_norm: 0.7515294955988756, iteration: 340504
loss: 0.9636243581771851,grad_norm: 0.8143000047319199, iteration: 340505
loss: 1.0285710096359253,grad_norm: 0.959479068745918, iteration: 340506
loss: 0.9894543886184692,grad_norm: 0.7134824846295132, iteration: 340507
loss: 1.0877492427825928,grad_norm: 0.9999998897462307, iteration: 340508
loss: 1.0404049158096313,grad_norm: 0.8062766865081296, iteration: 340509
loss: 0.9869701862335205,grad_norm: 0.7949419408044134, iteration: 340510
loss: 1.0002152919769287,grad_norm: 0.8602952701301053, iteration: 340511
loss: 1.014330506324768,grad_norm: 0.7285743245264555, iteration: 340512
loss: 1.004633903503418,grad_norm: 0.7391117649976322, iteration: 340513
loss: 1.0191357135772705,grad_norm: 0.9029140628903168, iteration: 340514
loss: 0.988426148891449,grad_norm: 0.9999994030906231, iteration: 340515
loss: 0.9789095520973206,grad_norm: 0.7398255300663176, iteration: 340516
loss: 1.0262402296066284,grad_norm: 0.8556752074538198, iteration: 340517
loss: 1.0068104267120361,grad_norm: 0.8919696811958379, iteration: 340518
loss: 0.9854491949081421,grad_norm: 0.7607482888765099, iteration: 340519
loss: 1.0272014141082764,grad_norm: 0.9296021663028988, iteration: 340520
loss: 0.9903550744056702,grad_norm: 0.7688544348768036, iteration: 340521
loss: 0.9921138286590576,grad_norm: 0.737142336948607, iteration: 340522
loss: 0.9886109232902527,grad_norm: 0.7634248525529242, iteration: 340523
loss: 1.0096880197525024,grad_norm: 0.8072556116272648, iteration: 340524
loss: 1.0078060626983643,grad_norm: 0.8628862101503354, iteration: 340525
loss: 0.9599609971046448,grad_norm: 0.8729028416087958, iteration: 340526
loss: 0.9657081961631775,grad_norm: 0.9102285640307758, iteration: 340527
loss: 0.996684730052948,grad_norm: 0.8411826422220806, iteration: 340528
loss: 0.9697470664978027,grad_norm: 0.9071617939672982, iteration: 340529
loss: 1.0494335889816284,grad_norm: 0.7582656258848575, iteration: 340530
loss: 0.9663197994232178,grad_norm: 0.9414310714310857, iteration: 340531
loss: 1.0236015319824219,grad_norm: 0.7065627571957023, iteration: 340532
loss: 0.997713565826416,grad_norm: 0.7783499342736323, iteration: 340533
loss: 0.9999994039535522,grad_norm: 0.8742721603901833, iteration: 340534
loss: 0.9747400879859924,grad_norm: 0.877922585233388, iteration: 340535
loss: 1.012261986732483,grad_norm: 0.8190099283596789, iteration: 340536
loss: 1.0230119228363037,grad_norm: 0.8708483005319713, iteration: 340537
loss: 1.0693804025650024,grad_norm: 0.9999994120590425, iteration: 340538
loss: 1.0140763521194458,grad_norm: 0.7919086463528522, iteration: 340539
loss: 0.9911919236183167,grad_norm: 0.853632144481089, iteration: 340540
loss: 0.9855560064315796,grad_norm: 0.9999990713638958, iteration: 340541
loss: 1.0828372240066528,grad_norm: 0.9010330731625708, iteration: 340542
loss: 1.0158698558807373,grad_norm: 0.912371800861382, iteration: 340543
loss: 1.0244122743606567,grad_norm: 0.888359806090087, iteration: 340544
loss: 1.0110942125320435,grad_norm: 0.8165740199556596, iteration: 340545
loss: 1.0390784740447998,grad_norm: 0.9999995927486147, iteration: 340546
loss: 1.0286427736282349,grad_norm: 0.7763365210064758, iteration: 340547
loss: 1.0063714981079102,grad_norm: 0.8792065788763345, iteration: 340548
loss: 1.006718635559082,grad_norm: 0.8149727324332582, iteration: 340549
loss: 0.9982897043228149,grad_norm: 0.8445129198464288, iteration: 340550
loss: 1.0057601928710938,grad_norm: 0.845473950507036, iteration: 340551
loss: 0.988760232925415,grad_norm: 0.947592491100903, iteration: 340552
loss: 1.000460147857666,grad_norm: 0.9913026596291151, iteration: 340553
loss: 0.9705676436424255,grad_norm: 0.8088225656341264, iteration: 340554
loss: 0.9655187726020813,grad_norm: 0.884012702213434, iteration: 340555
loss: 1.0132489204406738,grad_norm: 0.8194932319910169, iteration: 340556
loss: 1.0056884288787842,grad_norm: 0.856507264228025, iteration: 340557
loss: 0.9724068641662598,grad_norm: 0.98492614913269, iteration: 340558
loss: 1.004055142402649,grad_norm: 0.8650138586951996, iteration: 340559
loss: 0.987282395362854,grad_norm: 0.9999991328458598, iteration: 340560
loss: 1.0205825567245483,grad_norm: 0.776772438016864, iteration: 340561
loss: 0.9755371809005737,grad_norm: 0.7997877761866041, iteration: 340562
loss: 0.9704328775405884,grad_norm: 0.854558188085565, iteration: 340563
loss: 1.0031431913375854,grad_norm: 0.7238107059453589, iteration: 340564
loss: 0.9908291101455688,grad_norm: 0.8249911847302827, iteration: 340565
loss: 0.9807835817337036,grad_norm: 0.9026501456379697, iteration: 340566
loss: 1.0100759267807007,grad_norm: 0.7564555543274376, iteration: 340567
loss: 1.0835232734680176,grad_norm: 0.7913444835270633, iteration: 340568
loss: 1.0598232746124268,grad_norm: 0.9999995804822892, iteration: 340569
loss: 0.9680477380752563,grad_norm: 0.833587900504181, iteration: 340570
loss: 1.0259836912155151,grad_norm: 0.7905112329063915, iteration: 340571
loss: 1.0020357370376587,grad_norm: 0.9999991525929443, iteration: 340572
loss: 0.9958403706550598,grad_norm: 0.9387558743363582, iteration: 340573
loss: 1.0352277755737305,grad_norm: 0.9999990869854465, iteration: 340574
loss: 0.9772579073905945,grad_norm: 0.856776362005647, iteration: 340575
loss: 1.0171688795089722,grad_norm: 0.9914711677308293, iteration: 340576
loss: 1.0577336549758911,grad_norm: 0.790471599567323, iteration: 340577
loss: 1.0224052667617798,grad_norm: 0.9999990046756747, iteration: 340578
loss: 0.9794408082962036,grad_norm: 0.7938090997936671, iteration: 340579
loss: 0.9837165474891663,grad_norm: 0.6204792266155889, iteration: 340580
loss: 0.9846187233924866,grad_norm: 0.7673559694408173, iteration: 340581
loss: 1.0266695022583008,grad_norm: 0.8000547351287186, iteration: 340582
loss: 1.0153131484985352,grad_norm: 0.9999993261160453, iteration: 340583
loss: 0.9437586665153503,grad_norm: 0.8288282766828864, iteration: 340584
loss: 0.9919233322143555,grad_norm: 0.6896330851877398, iteration: 340585
loss: 0.9810211658477783,grad_norm: 0.8463081929284566, iteration: 340586
loss: 1.078955054283142,grad_norm: 0.9999994743162053, iteration: 340587
loss: 1.022337794303894,grad_norm: 0.7456777061717124, iteration: 340588
loss: 1.015892744064331,grad_norm: 0.7465789097057296, iteration: 340589
loss: 0.9891505241394043,grad_norm: 0.8735951048820895, iteration: 340590
loss: 1.0175786018371582,grad_norm: 0.7462568296388922, iteration: 340591
loss: 1.0440027713775635,grad_norm: 0.876747541592305, iteration: 340592
loss: 1.0140231847763062,grad_norm: 0.8860106984464404, iteration: 340593
loss: 0.9866270422935486,grad_norm: 0.8344039259086087, iteration: 340594
loss: 0.9351751804351807,grad_norm: 0.9102024197037991, iteration: 340595
loss: 1.0119913816452026,grad_norm: 0.9860166136035664, iteration: 340596
loss: 0.9865175485610962,grad_norm: 0.8580706572951451, iteration: 340597
loss: 0.9879034757614136,grad_norm: 0.724892380744441, iteration: 340598
loss: 1.0206997394561768,grad_norm: 0.7841018807066379, iteration: 340599
loss: 1.012518048286438,grad_norm: 0.7412388153303414, iteration: 340600
loss: 1.0141221284866333,grad_norm: 0.8731002113785176, iteration: 340601
loss: 1.0160564184188843,grad_norm: 0.9999990535114884, iteration: 340602
loss: 0.9795354008674622,grad_norm: 0.999999796434223, iteration: 340603
loss: 0.9488088488578796,grad_norm: 0.7366392460726497, iteration: 340604
loss: 1.0355029106140137,grad_norm: 0.9999995756589837, iteration: 340605
loss: 0.9756804704666138,grad_norm: 0.7258041286544408, iteration: 340606
loss: 0.9580849409103394,grad_norm: 0.8512863519672411, iteration: 340607
loss: 1.0056884288787842,grad_norm: 0.7485278021668617, iteration: 340608
loss: 0.9540969133377075,grad_norm: 0.9999991299220857, iteration: 340609
loss: 0.9895972609519958,grad_norm: 0.7607170269540873, iteration: 340610
loss: 0.998054563999176,grad_norm: 0.9854091494982171, iteration: 340611
loss: 0.9950025677680969,grad_norm: 0.7952160116972011, iteration: 340612
loss: 0.9970982074737549,grad_norm: 0.872384516993668, iteration: 340613
loss: 1.000669240951538,grad_norm: 0.8705896675202691, iteration: 340614
loss: 0.9811737537384033,grad_norm: 0.9851577508369878, iteration: 340615
loss: 0.9950541853904724,grad_norm: 0.9787283210049399, iteration: 340616
loss: 1.0106383562088013,grad_norm: 0.7944842564346798, iteration: 340617
loss: 0.9961481094360352,grad_norm: 0.9569818884605341, iteration: 340618
loss: 1.0356656312942505,grad_norm: 0.8680444833635083, iteration: 340619
loss: 1.0085407495498657,grad_norm: 0.9999993544689667, iteration: 340620
loss: 1.012136697769165,grad_norm: 0.9534515795663909, iteration: 340621
loss: 0.9887860417366028,grad_norm: 0.8360821244596247, iteration: 340622
loss: 1.0071735382080078,grad_norm: 0.849511600023929, iteration: 340623
loss: 1.0034648180007935,grad_norm: 0.9999994419041632, iteration: 340624
loss: 0.9971104860305786,grad_norm: 0.826167897970347, iteration: 340625
loss: 0.9847407341003418,grad_norm: 0.9474655019036531, iteration: 340626
loss: 1.0089713335037231,grad_norm: 0.9999991259375022, iteration: 340627
loss: 0.9847233891487122,grad_norm: 0.8802792056805154, iteration: 340628
loss: 0.9995089769363403,grad_norm: 0.8322068802499679, iteration: 340629
loss: 1.0457031726837158,grad_norm: 0.9298958360472155, iteration: 340630
loss: 1.0165603160858154,grad_norm: 0.763600126794756, iteration: 340631
loss: 0.9896592497825623,grad_norm: 0.9999990542276669, iteration: 340632
loss: 1.0072860717773438,grad_norm: 0.9059270727348062, iteration: 340633
loss: 1.139135718345642,grad_norm: 0.9999997652482964, iteration: 340634
loss: 1.000689148902893,grad_norm: 0.8812480294078262, iteration: 340635
loss: 1.0013611316680908,grad_norm: 0.9999992224429435, iteration: 340636
loss: 1.0103260278701782,grad_norm: 0.8286114810762133, iteration: 340637
loss: 0.9937453866004944,grad_norm: 0.8487138556854845, iteration: 340638
loss: 0.9877002239227295,grad_norm: 0.8212713086974157, iteration: 340639
loss: 1.0446851253509521,grad_norm: 0.7563937577428534, iteration: 340640
loss: 0.9957785606384277,grad_norm: 0.7433538671955584, iteration: 340641
loss: 1.022553563117981,grad_norm: 0.9009048172041652, iteration: 340642
loss: 1.0479594469070435,grad_norm: 0.9999993854765807, iteration: 340643
loss: 1.0440078973770142,grad_norm: 0.9999992959460092, iteration: 340644
loss: 1.0525366067886353,grad_norm: 0.9021905563389075, iteration: 340645
loss: 0.9891781806945801,grad_norm: 0.8354593707181316, iteration: 340646
loss: 0.981684684753418,grad_norm: 0.9999991540515614, iteration: 340647
loss: 1.0146278142929077,grad_norm: 0.852307270989567, iteration: 340648
loss: 0.9894626140594482,grad_norm: 0.9963893849651657, iteration: 340649
loss: 0.9885731339454651,grad_norm: 0.816343801983101, iteration: 340650
loss: 0.9925022721290588,grad_norm: 0.8142740443344321, iteration: 340651
loss: 1.0031033754348755,grad_norm: 0.8023256984294522, iteration: 340652
loss: 1.0094817876815796,grad_norm: 0.7932455006570938, iteration: 340653
loss: 0.9973207116127014,grad_norm: 0.8159958599634128, iteration: 340654
loss: 1.0016521215438843,grad_norm: 0.7507816506109909, iteration: 340655
loss: 1.0282831192016602,grad_norm: 0.9999996813280209, iteration: 340656
loss: 0.9733599424362183,grad_norm: 0.7656821255117109, iteration: 340657
loss: 0.9732595682144165,grad_norm: 0.7600876551714775, iteration: 340658
loss: 0.9723886251449585,grad_norm: 0.7669325026176244, iteration: 340659
loss: 0.995303213596344,grad_norm: 0.8656580841649686, iteration: 340660
loss: 1.0018713474273682,grad_norm: 0.8481405553915714, iteration: 340661
loss: 0.9858904480934143,grad_norm: 0.9191850444562026, iteration: 340662
loss: 0.9643797874450684,grad_norm: 0.9999999519362299, iteration: 340663
loss: 0.991064727306366,grad_norm: 0.9322199812574578, iteration: 340664
loss: 1.0308218002319336,grad_norm: 0.7665842718406404, iteration: 340665
loss: 1.0281941890716553,grad_norm: 0.889592763114082, iteration: 340666
loss: 0.9940423369407654,grad_norm: 0.8341388260941646, iteration: 340667
loss: 1.0933126211166382,grad_norm: 1.0000000580741726, iteration: 340668
loss: 1.0355583429336548,grad_norm: 0.8291347788720099, iteration: 340669
loss: 1.0119506120681763,grad_norm: 0.7917534267920957, iteration: 340670
loss: 1.0028998851776123,grad_norm: 0.8125011223265582, iteration: 340671
loss: 1.0973544120788574,grad_norm: 0.9999998276174085, iteration: 340672
loss: 1.0103076696395874,grad_norm: 0.8044756598120794, iteration: 340673
loss: 0.9789425730705261,grad_norm: 0.8143005069358091, iteration: 340674
loss: 0.9987758994102478,grad_norm: 0.8659140066633478, iteration: 340675
loss: 1.0281280279159546,grad_norm: 0.8656689401914978, iteration: 340676
loss: 0.9794842600822449,grad_norm: 0.8754790878274895, iteration: 340677
loss: 0.9942920804023743,grad_norm: 0.9418060731590757, iteration: 340678
loss: 0.9535987377166748,grad_norm: 0.9999991466395599, iteration: 340679
loss: 1.0188478231430054,grad_norm: 0.7825623474672136, iteration: 340680
loss: 1.0245449542999268,grad_norm: 0.7923462011385546, iteration: 340681
loss: 0.9665939211845398,grad_norm: 0.9999990584085753, iteration: 340682
loss: 1.032628059387207,grad_norm: 0.9730127295574196, iteration: 340683
loss: 0.9725579619407654,grad_norm: 0.8459061184859026, iteration: 340684
loss: 1.021284818649292,grad_norm: 0.8753750487179695, iteration: 340685
loss: 1.0064314603805542,grad_norm: 0.9774069608470323, iteration: 340686
loss: 0.9935469627380371,grad_norm: 0.8393857946912611, iteration: 340687
loss: 1.0493422746658325,grad_norm: 0.9073956511345638, iteration: 340688
loss: 0.9569110870361328,grad_norm: 0.8435299857552128, iteration: 340689
loss: 0.9764140844345093,grad_norm: 0.7922521078697845, iteration: 340690
loss: 1.0005929470062256,grad_norm: 0.9472241242478284, iteration: 340691
loss: 1.0524688959121704,grad_norm: 0.8415684709968655, iteration: 340692
loss: 1.0101895332336426,grad_norm: 0.853869326648931, iteration: 340693
loss: 0.9396744966506958,grad_norm: 0.9250536965306823, iteration: 340694
loss: 1.0110560655593872,grad_norm: 0.7438234692867729, iteration: 340695
loss: 0.9979456663131714,grad_norm: 0.7912836464276745, iteration: 340696
loss: 0.9571409821510315,grad_norm: 0.8682441354331191, iteration: 340697
loss: 0.9992442727088928,grad_norm: 0.7585955060660345, iteration: 340698
loss: 0.9944145083427429,grad_norm: 0.9999990393135906, iteration: 340699
loss: 0.9990947246551514,grad_norm: 0.8444506639262305, iteration: 340700
loss: 0.9909567832946777,grad_norm: 0.9999992010765888, iteration: 340701
loss: 0.9684982299804688,grad_norm: 0.8922904604320617, iteration: 340702
loss: 0.9963445663452148,grad_norm: 0.9609254922424477, iteration: 340703
loss: 0.9907330870628357,grad_norm: 0.9999993062834425, iteration: 340704
loss: 0.9870595335960388,grad_norm: 0.8740237835003354, iteration: 340705
loss: 1.000522255897522,grad_norm: 0.9256915854674035, iteration: 340706
loss: 0.9993339776992798,grad_norm: 0.9151658841859558, iteration: 340707
loss: 1.0347381830215454,grad_norm: 0.9482910080307226, iteration: 340708
loss: 1.104715347290039,grad_norm: 0.8530821676387459, iteration: 340709
loss: 1.0177044868469238,grad_norm: 0.7710210305180866, iteration: 340710
loss: 0.9302671551704407,grad_norm: 0.9528054121225786, iteration: 340711
loss: 1.0477628707885742,grad_norm: 0.9999991245941863, iteration: 340712
loss: 0.9672381281852722,grad_norm: 0.971123733612444, iteration: 340713
loss: 1.097680687904358,grad_norm: 0.9999993113082267, iteration: 340714
loss: 1.0031660795211792,grad_norm: 0.9591353511778357, iteration: 340715
loss: 1.02292001247406,grad_norm: 0.999999938573516, iteration: 340716
loss: 1.030606746673584,grad_norm: 0.7521253028200978, iteration: 340717
loss: 1.049652099609375,grad_norm: 0.9999999995819644, iteration: 340718
loss: 1.0202975273132324,grad_norm: 0.9999996576819504, iteration: 340719
loss: 1.0654246807098389,grad_norm: 0.9999998821688106, iteration: 340720
loss: 0.9832012057304382,grad_norm: 0.8787085203084178, iteration: 340721
loss: 0.9805712699890137,grad_norm: 0.8572616977489402, iteration: 340722
loss: 1.0438209772109985,grad_norm: 0.8844551200138064, iteration: 340723
loss: 0.9992849826812744,grad_norm: 0.880890952743773, iteration: 340724
loss: 0.9926665425300598,grad_norm: 0.9878139569482346, iteration: 340725
loss: 1.1172503232955933,grad_norm: 0.9999999639814213, iteration: 340726
loss: 0.9797508120536804,grad_norm: 0.8622771244159791, iteration: 340727
loss: 1.01510488986969,grad_norm: 0.8378099329349109, iteration: 340728
loss: 1.041141152381897,grad_norm: 0.9999999460654493, iteration: 340729
loss: 1.1227917671203613,grad_norm: 0.8975320704077209, iteration: 340730
loss: 0.9737697839736938,grad_norm: 0.843330273801911, iteration: 340731
loss: 1.044089913368225,grad_norm: 0.8717187193970029, iteration: 340732
loss: 1.0343438386917114,grad_norm: 0.8540674845572783, iteration: 340733
loss: 1.0180425643920898,grad_norm: 0.9697793688153487, iteration: 340734
loss: 1.021447777748108,grad_norm: 1.0000000048751656, iteration: 340735
loss: 0.9676079750061035,grad_norm: 0.86549887361841, iteration: 340736
loss: 1.0208029747009277,grad_norm: 0.7698122505007267, iteration: 340737
loss: 1.0028676986694336,grad_norm: 0.840825915213087, iteration: 340738
loss: 1.013830542564392,grad_norm: 0.8372468567864468, iteration: 340739
loss: 1.0224109888076782,grad_norm: 0.7800517157424974, iteration: 340740
loss: 1.1976045370101929,grad_norm: 0.9999996027055351, iteration: 340741
loss: 0.9941275119781494,grad_norm: 0.8356200063606621, iteration: 340742
loss: 1.0130255222320557,grad_norm: 0.7434877029601218, iteration: 340743
loss: 0.9642257690429688,grad_norm: 0.7714720541155922, iteration: 340744
loss: 1.0095899105072021,grad_norm: 0.9676658038183702, iteration: 340745
loss: 1.0095617771148682,grad_norm: 0.9999995761000768, iteration: 340746
loss: 1.0314990282058716,grad_norm: 0.9480481668374111, iteration: 340747
loss: 1.0804144144058228,grad_norm: 0.9999992014743767, iteration: 340748
loss: 0.9813330173492432,grad_norm: 0.8178332224847081, iteration: 340749
loss: 0.9768669605255127,grad_norm: 0.8311271141642657, iteration: 340750
loss: 1.0433199405670166,grad_norm: 0.9023457090200299, iteration: 340751
loss: 1.021135926246643,grad_norm: 0.7153354514846185, iteration: 340752
loss: 1.0205519199371338,grad_norm: 0.9706568363229401, iteration: 340753
loss: 0.9721513390541077,grad_norm: 0.8343573058478083, iteration: 340754
loss: 1.0134872198104858,grad_norm: 0.9999991079325106, iteration: 340755
loss: 0.9517349600791931,grad_norm: 0.9070471507542031, iteration: 340756
loss: 1.0043580532073975,grad_norm: 0.8578534337879525, iteration: 340757
loss: 1.0080493688583374,grad_norm: 0.8988978442599126, iteration: 340758
loss: 1.0259621143341064,grad_norm: 0.7522286724551449, iteration: 340759
loss: 1.0588252544403076,grad_norm: 0.8481100725146139, iteration: 340760
loss: 1.000380039215088,grad_norm: 0.7753811253705875, iteration: 340761
loss: 1.0085960626602173,grad_norm: 0.8852038781885536, iteration: 340762
loss: 1.0110597610473633,grad_norm: 0.9999998363808311, iteration: 340763
loss: 1.002764344215393,grad_norm: 0.8478469538278617, iteration: 340764
loss: 1.01552152633667,grad_norm: 0.8945312056176344, iteration: 340765
loss: 1.0379924774169922,grad_norm: 0.9999993503321998, iteration: 340766
loss: 1.0188748836517334,grad_norm: 0.7456014401373185, iteration: 340767
loss: 1.0206608772277832,grad_norm: 0.7602073455267847, iteration: 340768
loss: 0.9711059927940369,grad_norm: 0.9543268422785847, iteration: 340769
loss: 1.0183402299880981,grad_norm: 0.9999996519336988, iteration: 340770
loss: 0.9804935455322266,grad_norm: 0.7784701478045897, iteration: 340771
loss: 1.0066218376159668,grad_norm: 0.7644436584979211, iteration: 340772
loss: 1.0251644849777222,grad_norm: 0.870848108453319, iteration: 340773
loss: 0.9587602615356445,grad_norm: 0.8899856137696688, iteration: 340774
loss: 1.0380101203918457,grad_norm: 0.9999997064991174, iteration: 340775
loss: 1.010155439376831,grad_norm: 0.9552632983440624, iteration: 340776
loss: 1.034129023551941,grad_norm: 0.9274939674372298, iteration: 340777
loss: 1.013028860092163,grad_norm: 0.859466446345868, iteration: 340778
loss: 1.0214653015136719,grad_norm: 0.7614779556022337, iteration: 340779
loss: 0.995309591293335,grad_norm: 0.9999997855079579, iteration: 340780
loss: 1.0501363277435303,grad_norm: 0.9999997992457527, iteration: 340781
loss: 0.9660840630531311,grad_norm: 0.7791650442531285, iteration: 340782
loss: 0.9691172242164612,grad_norm: 0.750010816511457, iteration: 340783
loss: 1.033926010131836,grad_norm: 0.9999997983963533, iteration: 340784
loss: 0.950054407119751,grad_norm: 0.8184275279671028, iteration: 340785
loss: 0.9912801384925842,grad_norm: 0.9756457681210853, iteration: 340786
loss: 0.9912338256835938,grad_norm: 0.9999992022542038, iteration: 340787
loss: 0.9839699268341064,grad_norm: 0.8801625078720263, iteration: 340788
loss: 0.9818622469902039,grad_norm: 0.8398642015939974, iteration: 340789
loss: 0.9517093300819397,grad_norm: 0.7796412701384973, iteration: 340790
loss: 1.0528963804244995,grad_norm: 0.9173157271266916, iteration: 340791
loss: 1.0489305257797241,grad_norm: 0.9064259511719626, iteration: 340792
loss: 1.0066306591033936,grad_norm: 0.8013819824283558, iteration: 340793
loss: 0.9323843121528625,grad_norm: 0.7918158526394588, iteration: 340794
loss: 1.0520607233047485,grad_norm: 0.9999991815018058, iteration: 340795
loss: 1.009440302848816,grad_norm: 0.999999132566669, iteration: 340796
loss: 1.0037614107131958,grad_norm: 0.7460335340080182, iteration: 340797
loss: 1.0528019666671753,grad_norm: 0.9999999399358234, iteration: 340798
loss: 1.0143224000930786,grad_norm: 0.8315079591381361, iteration: 340799
loss: 0.9872803688049316,grad_norm: 0.8499640827313314, iteration: 340800
loss: 0.9868815541267395,grad_norm: 0.927826657035507, iteration: 340801
loss: 0.993080735206604,grad_norm: 0.7865053726162156, iteration: 340802
loss: 1.023878812789917,grad_norm: 0.9999997106544426, iteration: 340803
loss: 1.0038604736328125,grad_norm: 0.8307610333467893, iteration: 340804
loss: 1.020653486251831,grad_norm: 0.8124599309633409, iteration: 340805
loss: 0.9733242392539978,grad_norm: 0.8635274468858237, iteration: 340806
loss: 1.002267599105835,grad_norm: 0.9634073110509099, iteration: 340807
loss: 0.9969992637634277,grad_norm: 0.9999990608925909, iteration: 340808
loss: 1.1584453582763672,grad_norm: 0.999999728397937, iteration: 340809
loss: 0.9897742867469788,grad_norm: 0.8620825895384078, iteration: 340810
loss: 1.0139557123184204,grad_norm: 0.6819604473561943, iteration: 340811
loss: 0.9988566040992737,grad_norm: 0.82744896957896, iteration: 340812
loss: 0.9826127886772156,grad_norm: 0.8609542794727463, iteration: 340813
loss: 1.098413348197937,grad_norm: 0.9999991922469449, iteration: 340814
loss: 0.9942940473556519,grad_norm: 0.7563054438286847, iteration: 340815
loss: 1.212166428565979,grad_norm: 0.9999998740539813, iteration: 340816
loss: 0.9904221296310425,grad_norm: 0.7378468944698297, iteration: 340817
loss: 1.1098308563232422,grad_norm: 0.9999996515631103, iteration: 340818
loss: 0.9790199398994446,grad_norm: 0.8181118573197577, iteration: 340819
loss: 0.9843770265579224,grad_norm: 0.9999993439889117, iteration: 340820
loss: 0.9997813105583191,grad_norm: 0.9999991421193283, iteration: 340821
loss: 1.0552278757095337,grad_norm: 0.9999996263597257, iteration: 340822
loss: 1.0070744752883911,grad_norm: 0.8551672008632505, iteration: 340823
loss: 0.9815362095832825,grad_norm: 0.8250808622048665, iteration: 340824
loss: 0.9775495529174805,grad_norm: 0.9999991857740939, iteration: 340825
loss: 1.0050345659255981,grad_norm: 0.9999996483309938, iteration: 340826
loss: 0.9596055150032043,grad_norm: 0.9617408972965661, iteration: 340827
loss: 1.003674030303955,grad_norm: 0.9999996544822851, iteration: 340828
loss: 1.0380781888961792,grad_norm: 0.9999994799115395, iteration: 340829
loss: 1.0025620460510254,grad_norm: 0.9599947416091175, iteration: 340830
loss: 1.0127114057540894,grad_norm: 0.6999762108414377, iteration: 340831
loss: 0.9597474932670593,grad_norm: 0.949575033058547, iteration: 340832
loss: 1.0085595846176147,grad_norm: 0.7452306047658113, iteration: 340833
loss: 1.1111140251159668,grad_norm: 0.9999992265245162, iteration: 340834
loss: 0.9862760305404663,grad_norm: 0.7595887373898341, iteration: 340835
loss: 0.9942813515663147,grad_norm: 0.9999995069481501, iteration: 340836
loss: 0.9659755229949951,grad_norm: 0.892600142262713, iteration: 340837
loss: 1.030569314956665,grad_norm: 0.8206728931716939, iteration: 340838
loss: 1.0543503761291504,grad_norm: 0.9437656601680244, iteration: 340839
loss: 0.9728721380233765,grad_norm: 0.9168388152994412, iteration: 340840
loss: 0.984428346157074,grad_norm: 0.9526328184262218, iteration: 340841
loss: 1.0043262243270874,grad_norm: 0.8591108969688431, iteration: 340842
loss: 1.0477795600891113,grad_norm: 0.833615774447803, iteration: 340843
loss: 1.020311951637268,grad_norm: 0.8569841559660883, iteration: 340844
loss: 0.9665895700454712,grad_norm: 0.9059854958216169, iteration: 340845
loss: 1.0345194339752197,grad_norm: 0.999999577111881, iteration: 340846
loss: 1.0118629932403564,grad_norm: 0.9050506758472041, iteration: 340847
loss: 0.9786584973335266,grad_norm: 0.8122045740225484, iteration: 340848
loss: 1.0052982568740845,grad_norm: 0.8548502787693578, iteration: 340849
loss: 1.0477091073989868,grad_norm: 0.7866294314643395, iteration: 340850
loss: 1.0467075109481812,grad_norm: 0.9999999373237362, iteration: 340851
loss: 1.030517578125,grad_norm: 0.9999992362244882, iteration: 340852
loss: 1.1048555374145508,grad_norm: 0.9999991675498803, iteration: 340853
loss: 0.9816615581512451,grad_norm: 0.7442325060623974, iteration: 340854
loss: 1.073778510093689,grad_norm: 0.8604938814827782, iteration: 340855
loss: 1.0682086944580078,grad_norm: 0.9999996944970893, iteration: 340856
loss: 1.008458137512207,grad_norm: 0.9999994534172474, iteration: 340857
loss: 0.986934244632721,grad_norm: 0.9298002556931482, iteration: 340858
loss: 0.9985358119010925,grad_norm: 0.707849713998805, iteration: 340859
loss: 1.0199108123779297,grad_norm: 0.8230339032732854, iteration: 340860
loss: 0.9476343393325806,grad_norm: 0.7900916483668723, iteration: 340861
loss: 0.9863417148590088,grad_norm: 0.7254618774595125, iteration: 340862
loss: 0.9710842370986938,grad_norm: 0.7965347737755757, iteration: 340863
loss: 0.9792375564575195,grad_norm: 0.9124362798357853, iteration: 340864
loss: 1.0034288167953491,grad_norm: 0.8919930263478633, iteration: 340865
loss: 1.0140633583068848,grad_norm: 0.9242485010445688, iteration: 340866
loss: 1.0356545448303223,grad_norm: 0.8523863901221352, iteration: 340867
loss: 0.9992476105690002,grad_norm: 0.9999992769806924, iteration: 340868
loss: 1.0587769746780396,grad_norm: 0.9729470606757873, iteration: 340869
loss: 1.0011560916900635,grad_norm: 0.9883968284921003, iteration: 340870
loss: 0.9996012449264526,grad_norm: 0.8876827141340179, iteration: 340871
loss: 1.0154739618301392,grad_norm: 0.8677312995359051, iteration: 340872
loss: 0.9953505396842957,grad_norm: 0.8397211271138211, iteration: 340873
loss: 1.0038565397262573,grad_norm: 0.8065248183148481, iteration: 340874
loss: 1.0612562894821167,grad_norm: 0.9999999207934214, iteration: 340875
loss: 1.0181069374084473,grad_norm: 0.9999991314654586, iteration: 340876
loss: 1.0381239652633667,grad_norm: 0.9999999991948378, iteration: 340877
loss: 0.9852330684661865,grad_norm: 0.8411459757472038, iteration: 340878
loss: 1.011208415031433,grad_norm: 0.7925604846297776, iteration: 340879
loss: 1.001084804534912,grad_norm: 0.9205206815701147, iteration: 340880
loss: 0.9876585006713867,grad_norm: 0.7832225584239411, iteration: 340881
loss: 0.9945840239524841,grad_norm: 0.9999999449413198, iteration: 340882
loss: 0.9509244561195374,grad_norm: 0.7563792840980665, iteration: 340883
loss: 1.0079094171524048,grad_norm: 0.9570909475683282, iteration: 340884
loss: 0.98577880859375,grad_norm: 0.7629142860971057, iteration: 340885
loss: 1.0133789777755737,grad_norm: 0.9999991949815492, iteration: 340886
loss: 0.9727334380149841,grad_norm: 0.9216175335598986, iteration: 340887
loss: 0.9863912463188171,grad_norm: 0.9999990429998632, iteration: 340888
loss: 1.0606956481933594,grad_norm: 0.9999994768491623, iteration: 340889
loss: 0.9874656796455383,grad_norm: 0.7939704025838074, iteration: 340890
loss: 1.006809115409851,grad_norm: 0.9023421886876648, iteration: 340891
loss: 1.0017496347427368,grad_norm: 0.9124524193672471, iteration: 340892
loss: 0.971013605594635,grad_norm: 0.8530162250000135, iteration: 340893
loss: 0.9844359755516052,grad_norm: 0.7827776990217306, iteration: 340894
loss: 0.9977733492851257,grad_norm: 0.8566449181553439, iteration: 340895
loss: 0.9762080907821655,grad_norm: 0.7954583034356488, iteration: 340896
loss: 0.9948601722717285,grad_norm: 0.9341453501830134, iteration: 340897
loss: 1.0045669078826904,grad_norm: 0.7634961622731335, iteration: 340898
loss: 0.9664140939712524,grad_norm: 0.999999244254879, iteration: 340899
loss: 1.0319803953170776,grad_norm: 0.8101210336502074, iteration: 340900
loss: 1.031801462173462,grad_norm: 0.81025885141765, iteration: 340901
loss: 0.96132892370224,grad_norm: 0.8306599017802729, iteration: 340902
loss: 0.9812418222427368,grad_norm: 0.7427425723737875, iteration: 340903
loss: 0.9462559819221497,grad_norm: 0.9030913611730965, iteration: 340904
loss: 0.9926210641860962,grad_norm: 0.9265092567982914, iteration: 340905
loss: 1.0153142213821411,grad_norm: 0.9190814174900572, iteration: 340906
loss: 1.0523344278335571,grad_norm: 0.9249478462500509, iteration: 340907
loss: 0.9654068946838379,grad_norm: 0.8838205596990832, iteration: 340908
loss: 0.9915054440498352,grad_norm: 0.7079321684484453, iteration: 340909
loss: 1.0157859325408936,grad_norm: 0.8110247834771384, iteration: 340910
loss: 0.9921441078186035,grad_norm: 0.8916679341875057, iteration: 340911
loss: 0.9896066784858704,grad_norm: 0.7047582701195897, iteration: 340912
loss: 0.9753533601760864,grad_norm: 0.7636934901283111, iteration: 340913
loss: 0.9685148000717163,grad_norm: 0.9527843058794466, iteration: 340914
loss: 1.0210789442062378,grad_norm: 0.999999181932953, iteration: 340915
loss: 0.9676886796951294,grad_norm: 0.7865188314166613, iteration: 340916
loss: 1.0313746929168701,grad_norm: 0.8866712395768152, iteration: 340917
loss: 1.0249882936477661,grad_norm: 0.999999260567672, iteration: 340918
loss: 0.9765466451644897,grad_norm: 0.8527996513316526, iteration: 340919
loss: 0.9730387330055237,grad_norm: 0.9096847467134237, iteration: 340920
loss: 1.009732961654663,grad_norm: 0.7589122962008462, iteration: 340921
loss: 0.9941752552986145,grad_norm: 0.7022919432403016, iteration: 340922
loss: 0.9932220578193665,grad_norm: 0.7931075600507319, iteration: 340923
loss: 1.02655827999115,grad_norm: 0.8707713285876023, iteration: 340924
loss: 1.009528398513794,grad_norm: 0.8530417820535814, iteration: 340925
loss: 1.0080969333648682,grad_norm: 0.7020064687833198, iteration: 340926
loss: 1.026667594909668,grad_norm: 0.7526517041489444, iteration: 340927
loss: 0.9970734715461731,grad_norm: 0.7795826066036311, iteration: 340928
loss: 1.0065960884094238,grad_norm: 0.7045844597857672, iteration: 340929
loss: 1.0005507469177246,grad_norm: 0.7732760413210401, iteration: 340930
loss: 1.0111483335494995,grad_norm: 0.9999990398314698, iteration: 340931
loss: 1.1096657514572144,grad_norm: 0.9999995445959946, iteration: 340932
loss: 1.0231308937072754,grad_norm: 0.9855900501101316, iteration: 340933
loss: 1.0243984460830688,grad_norm: 0.8456682780256353, iteration: 340934
loss: 0.9939461350440979,grad_norm: 0.7945508768650872, iteration: 340935
loss: 1.0247331857681274,grad_norm: 0.8609087219065718, iteration: 340936
loss: 0.9950611591339111,grad_norm: 0.8337241908571619, iteration: 340937
loss: 0.9698377847671509,grad_norm: 0.8403841405055628, iteration: 340938
loss: 0.9836521744728088,grad_norm: 0.9551016754979157, iteration: 340939
loss: 1.0096925497055054,grad_norm: 0.7312731994996471, iteration: 340940
loss: 0.9985866546630859,grad_norm: 0.7997454470713864, iteration: 340941
loss: 1.0153530836105347,grad_norm: 0.8335488473655712, iteration: 340942
loss: 1.004753589630127,grad_norm: 0.9999995557270718, iteration: 340943
loss: 0.9900534749031067,grad_norm: 0.8836753298728328, iteration: 340944
loss: 0.9936816692352295,grad_norm: 0.8983452761031735, iteration: 340945
loss: 0.9933875799179077,grad_norm: 0.9999989297210115, iteration: 340946
loss: 0.9998650550842285,grad_norm: 0.9999991491794373, iteration: 340947
loss: 0.9937324523925781,grad_norm: 0.965983246284341, iteration: 340948
loss: 0.9843253493309021,grad_norm: 0.9425446156669947, iteration: 340949
loss: 0.9925588965415955,grad_norm: 0.8561307958527039, iteration: 340950
loss: 0.9975772500038147,grad_norm: 0.8211801352052928, iteration: 340951
loss: 1.005414366722107,grad_norm: 0.6676204262874967, iteration: 340952
loss: 0.9920408129692078,grad_norm: 0.6686226687297119, iteration: 340953
loss: 0.9712945818901062,grad_norm: 0.6365667486190447, iteration: 340954
loss: 0.9787147641181946,grad_norm: 0.8192090568127862, iteration: 340955
loss: 0.962467610836029,grad_norm: 0.9999991629696917, iteration: 340956
loss: 0.9798080921173096,grad_norm: 0.6994276960137161, iteration: 340957
loss: 0.9977728724479675,grad_norm: 0.7709696375952524, iteration: 340958
loss: 1.0547055006027222,grad_norm: 0.8541309785792996, iteration: 340959
loss: 1.0026090145111084,grad_norm: 0.786854960079198, iteration: 340960
loss: 0.9973998665809631,grad_norm: 0.8502231983549274, iteration: 340961
loss: 0.9861719608306885,grad_norm: 0.9040137214376754, iteration: 340962
loss: 0.9932883977890015,grad_norm: 0.7185759027550909, iteration: 340963
loss: 1.0166219472885132,grad_norm: 0.9336707848835603, iteration: 340964
loss: 1.000615119934082,grad_norm: 0.861942489360338, iteration: 340965
loss: 1.02236008644104,grad_norm: 0.7818672499278139, iteration: 340966
loss: 1.014601230621338,grad_norm: 0.9632034060635977, iteration: 340967
loss: 1.0556459426879883,grad_norm: 0.9999991736147866, iteration: 340968
loss: 0.973536491394043,grad_norm: 0.8582102682729141, iteration: 340969
loss: 1.0219396352767944,grad_norm: 0.8118039553557548, iteration: 340970
loss: 0.9521228671073914,grad_norm: 0.7568706706211948, iteration: 340971
loss: 0.9664071202278137,grad_norm: 0.755882573530864, iteration: 340972
loss: 0.9984144568443298,grad_norm: 0.7710336537937565, iteration: 340973
loss: 0.9821799993515015,grad_norm: 0.8390148968232629, iteration: 340974
loss: 1.0121469497680664,grad_norm: 0.7825397952579676, iteration: 340975
loss: 1.0129228830337524,grad_norm: 0.9239730065271159, iteration: 340976
loss: 1.0464924573898315,grad_norm: 0.9782961307831999, iteration: 340977
loss: 0.9754535555839539,grad_norm: 0.9345778850038675, iteration: 340978
loss: 0.9712314605712891,grad_norm: 0.9999991362611638, iteration: 340979
loss: 1.0005766153335571,grad_norm: 0.7181270171642361, iteration: 340980
loss: 1.0104498863220215,grad_norm: 0.7754342373863432, iteration: 340981
loss: 1.0124505758285522,grad_norm: 0.8387469434522875, iteration: 340982
loss: 0.988860547542572,grad_norm: 0.8773469966347582, iteration: 340983
loss: 0.9773164987564087,grad_norm: 0.8479080278921323, iteration: 340984
loss: 0.9751505851745605,grad_norm: 0.8218209725717314, iteration: 340985
loss: 1.0426559448242188,grad_norm: 0.7586332889307615, iteration: 340986
loss: 0.9762594699859619,grad_norm: 0.7122836058197118, iteration: 340987
loss: 0.989798367023468,grad_norm: 0.7981317755458698, iteration: 340988
loss: 1.0012389421463013,grad_norm: 0.851279343989094, iteration: 340989
loss: 1.0048635005950928,grad_norm: 0.6594115810896309, iteration: 340990
loss: 0.9943482279777527,grad_norm: 0.926318592256414, iteration: 340991
loss: 0.9888206124305725,grad_norm: 0.7293505596354776, iteration: 340992
loss: 1.0136083364486694,grad_norm: 0.9869595071096638, iteration: 340993
loss: 1.0288305282592773,grad_norm: 0.9324793339447245, iteration: 340994
loss: 0.9817362427711487,grad_norm: 0.8570416253416155, iteration: 340995
loss: 1.0455117225646973,grad_norm: 0.7713529856094373, iteration: 340996
loss: 1.0584332942962646,grad_norm: 0.9999991712152609, iteration: 340997
loss: 1.0794512033462524,grad_norm: 0.9999992153419824, iteration: 340998
loss: 1.0273516178131104,grad_norm: 0.8089796456135964, iteration: 340999
loss: 1.019194483757019,grad_norm: 0.8654138095062615, iteration: 341000
loss: 0.9906522631645203,grad_norm: 0.9999991441795847, iteration: 341001
loss: 1.0110136270523071,grad_norm: 0.732787898075503, iteration: 341002
loss: 0.9487888813018799,grad_norm: 0.8868706772718814, iteration: 341003
loss: 1.0158393383026123,grad_norm: 0.9639725912468105, iteration: 341004
loss: 0.9825591444969177,grad_norm: 0.8512871464033095, iteration: 341005
loss: 0.9584319591522217,grad_norm: 0.8001147992152589, iteration: 341006
loss: 1.0340596437454224,grad_norm: 0.9999995802272666, iteration: 341007
loss: 1.0524327754974365,grad_norm: 0.9726332583096832, iteration: 341008
loss: 0.9991307258605957,grad_norm: 0.7953588133555629, iteration: 341009
loss: 0.9950686693191528,grad_norm: 0.8796300587666439, iteration: 341010
loss: 1.0327465534210205,grad_norm: 0.881729309264451, iteration: 341011
loss: 0.9927324056625366,grad_norm: 0.8475009962725857, iteration: 341012
loss: 0.9916179180145264,grad_norm: 0.9999996631874646, iteration: 341013
loss: 0.9989203214645386,grad_norm: 0.9941899064610796, iteration: 341014
loss: 1.0415022373199463,grad_norm: 0.9905852140687273, iteration: 341015
loss: 1.0072273015975952,grad_norm: 0.8096958568956171, iteration: 341016
loss: 1.0295454263687134,grad_norm: 0.8500799713230928, iteration: 341017
loss: 0.9983094334602356,grad_norm: 0.9999996081593698, iteration: 341018
loss: 0.9738959074020386,grad_norm: 0.9999992459617049, iteration: 341019
loss: 1.011595606803894,grad_norm: 0.9999993472459828, iteration: 341020
loss: 0.9824914336204529,grad_norm: 0.8659616734385173, iteration: 341021
loss: 1.0143821239471436,grad_norm: 0.7896161763716381, iteration: 341022
loss: 1.005371332168579,grad_norm: 0.7916603740201338, iteration: 341023
loss: 0.9977771043777466,grad_norm: 0.6859852351592809, iteration: 341024
loss: 1.0079275369644165,grad_norm: 0.8855616756021384, iteration: 341025
loss: 0.9722338914871216,grad_norm: 0.8883281775485766, iteration: 341026
loss: 0.9798344969749451,grad_norm: 0.8293320738258371, iteration: 341027
loss: 1.2128112316131592,grad_norm: 0.9999990384108994, iteration: 341028
loss: 0.9955450892448425,grad_norm: 0.9776265216235688, iteration: 341029
loss: 1.0305484533309937,grad_norm: 0.9411294845776635, iteration: 341030
loss: 0.9764075875282288,grad_norm: 0.7827917145380446, iteration: 341031
loss: 1.0233628749847412,grad_norm: 0.885308085869464, iteration: 341032
loss: 1.0629606246948242,grad_norm: 0.9999994439905492, iteration: 341033
loss: 1.025580883026123,grad_norm: 0.9999991351335409, iteration: 341034
loss: 1.0069657564163208,grad_norm: 0.8316397507261094, iteration: 341035
loss: 0.9925814270973206,grad_norm: 0.7058375115717143, iteration: 341036
loss: 1.0094144344329834,grad_norm: 0.8465483832468013, iteration: 341037
loss: 1.1710091829299927,grad_norm: 0.9999994093229523, iteration: 341038
loss: 0.9691646099090576,grad_norm: 0.8208869156977812, iteration: 341039
loss: 0.9884581565856934,grad_norm: 0.7731594721303697, iteration: 341040
loss: 1.0128514766693115,grad_norm: 0.8257681553854732, iteration: 341041
loss: 1.0091272592544556,grad_norm: 0.9114471051917494, iteration: 341042
loss: 1.0821939706802368,grad_norm: 0.8063215317212311, iteration: 341043
loss: 0.9897763133049011,grad_norm: 0.8962050311303278, iteration: 341044
loss: 1.0131655931472778,grad_norm: 0.8196752007863924, iteration: 341045
loss: 1.0818392038345337,grad_norm: 0.9999997874488435, iteration: 341046
loss: 0.9754816293716431,grad_norm: 0.7937462886746828, iteration: 341047
loss: 0.985065758228302,grad_norm: 0.7786583564614705, iteration: 341048
loss: 0.9933413863182068,grad_norm: 0.7234661156146153, iteration: 341049
loss: 1.0135278701782227,grad_norm: 0.9015697616210089, iteration: 341050
loss: 0.976159393787384,grad_norm: 0.6728852050278358, iteration: 341051
loss: 0.9974135160446167,grad_norm: 0.9272978584938658, iteration: 341052
loss: 0.9892321825027466,grad_norm: 0.778584031466614, iteration: 341053
loss: 1.0276142358779907,grad_norm: 0.9999991661618554, iteration: 341054
loss: 0.9762088656425476,grad_norm: 0.7954428390339595, iteration: 341055
loss: 1.0039620399475098,grad_norm: 0.7650111663467821, iteration: 341056
loss: 1.0229376554489136,grad_norm: 0.9999992262216534, iteration: 341057
loss: 0.9920942783355713,grad_norm: 0.8575673309957028, iteration: 341058
loss: 1.0033843517303467,grad_norm: 0.8702660458298761, iteration: 341059
loss: 1.0234471559524536,grad_norm: 0.913046142322885, iteration: 341060
loss: 1.0158828496932983,grad_norm: 0.9999997051913946, iteration: 341061
loss: 1.0617982149124146,grad_norm: 0.8275635749704806, iteration: 341062
loss: 0.9651204347610474,grad_norm: 0.869723990060967, iteration: 341063
loss: 1.017642855644226,grad_norm: 0.8852530551513803, iteration: 341064
loss: 1.003548264503479,grad_norm: 0.7773500007567319, iteration: 341065
loss: 0.973067581653595,grad_norm: 0.806779513433835, iteration: 341066
loss: 1.0176671743392944,grad_norm: 0.7666957588252084, iteration: 341067
loss: 0.9746949672698975,grad_norm: 0.6989238868716849, iteration: 341068
loss: 1.026877999305725,grad_norm: 0.8093324673991592, iteration: 341069
loss: 0.9762996435165405,grad_norm: 0.9051376231513969, iteration: 341070
loss: 1.0388742685317993,grad_norm: 0.9999991106907459, iteration: 341071
loss: 0.9661290049552917,grad_norm: 0.8359673939143764, iteration: 341072
loss: 1.0188195705413818,grad_norm: 0.7509805176672627, iteration: 341073
loss: 1.0674176216125488,grad_norm: 0.9999991379835702, iteration: 341074
loss: 0.9795078039169312,grad_norm: 0.7814307374659999, iteration: 341075
loss: 0.9736104607582092,grad_norm: 0.8806899268638444, iteration: 341076
loss: 0.9875620007514954,grad_norm: 0.742141793334376, iteration: 341077
loss: 0.9558796286582947,grad_norm: 0.8112013588007025, iteration: 341078
loss: 0.9929245114326477,grad_norm: 0.7376227916076475, iteration: 341079
loss: 1.0753591060638428,grad_norm: 0.9999993721086994, iteration: 341080
loss: 1.0522608757019043,grad_norm: 0.9999997361854452, iteration: 341081
loss: 1.070716142654419,grad_norm: 0.9999999399809381, iteration: 341082
loss: 0.987825334072113,grad_norm: 0.9654344351561505, iteration: 341083
loss: 1.0294735431671143,grad_norm: 0.880037277122005, iteration: 341084
loss: 1.0112344026565552,grad_norm: 0.9333597376899044, iteration: 341085
loss: 1.0013277530670166,grad_norm: 0.7117344699871384, iteration: 341086
loss: 1.0025911331176758,grad_norm: 0.8776283035149441, iteration: 341087
loss: 0.9968881011009216,grad_norm: 0.8518912893623174, iteration: 341088
loss: 0.9967858195304871,grad_norm: 0.7486584337621351, iteration: 341089
loss: 1.1937363147735596,grad_norm: 0.9999996927644114, iteration: 341090
loss: 1.0189223289489746,grad_norm: 0.980001430821858, iteration: 341091
loss: 0.9890714883804321,grad_norm: 0.9265777477566656, iteration: 341092
loss: 1.0008032321929932,grad_norm: 0.8659948849021266, iteration: 341093
loss: 1.0076121091842651,grad_norm: 0.910279356397899, iteration: 341094
loss: 1.0331952571868896,grad_norm: 0.9649333646250492, iteration: 341095
loss: 0.9896233081817627,grad_norm: 0.7615495871342245, iteration: 341096
loss: 1.0297220945358276,grad_norm: 0.9545420051804413, iteration: 341097
loss: 1.0787255764007568,grad_norm: 0.9999998358216733, iteration: 341098
loss: 1.2041523456573486,grad_norm: 0.9999995989322379, iteration: 341099
loss: 1.0015017986297607,grad_norm: 0.8853110534705466, iteration: 341100
loss: 1.0545833110809326,grad_norm: 0.901648927435514, iteration: 341101
loss: 1.0520089864730835,grad_norm: 0.8816667106748256, iteration: 341102
loss: 0.9855976104736328,grad_norm: 0.819269678899143, iteration: 341103
loss: 0.9760661721229553,grad_norm: 0.980042807649739, iteration: 341104
loss: 1.0346416234970093,grad_norm: 0.9999991568688662, iteration: 341105
loss: 1.0294945240020752,grad_norm: 0.7820400184762154, iteration: 341106
loss: 1.0687819719314575,grad_norm: 0.999999666867275, iteration: 341107
loss: 1.1540502309799194,grad_norm: 0.9167596037420499, iteration: 341108
loss: 1.115072250366211,grad_norm: 0.9999991287258622, iteration: 341109
loss: 1.014994502067566,grad_norm: 0.9999991145187652, iteration: 341110
loss: 1.1089078187942505,grad_norm: 0.9999992420022943, iteration: 341111
loss: 1.0197256803512573,grad_norm: 0.7771439764398306, iteration: 341112
loss: 0.9931962490081787,grad_norm: 0.8811867947650757, iteration: 341113
loss: 0.9790364503860474,grad_norm: 0.8655744660668722, iteration: 341114
loss: 0.9658321738243103,grad_norm: 0.9742802196437061, iteration: 341115
loss: 1.0074173212051392,grad_norm: 0.9398861744696134, iteration: 341116
loss: 1.005729079246521,grad_norm: 0.7754260728932365, iteration: 341117
loss: 1.0958243608474731,grad_norm: 0.9999990456123786, iteration: 341118
loss: 1.0205844640731812,grad_norm: 0.8240400936895487, iteration: 341119
loss: 1.0487697124481201,grad_norm: 0.999999296276408, iteration: 341120
loss: 0.9749005436897278,grad_norm: 0.8469678159448135, iteration: 341121
loss: 1.0627102851867676,grad_norm: 0.8157617303374728, iteration: 341122
loss: 1.0182303190231323,grad_norm: 0.9999998683650901, iteration: 341123
loss: 0.997672975063324,grad_norm: 0.7586454184045367, iteration: 341124
loss: 0.9293010830879211,grad_norm: 0.7757830433981342, iteration: 341125
loss: 0.9656361937522888,grad_norm: 0.8681486253209919, iteration: 341126
loss: 1.0063931941986084,grad_norm: 0.8770676230000829, iteration: 341127
loss: 0.9945434331893921,grad_norm: 0.9590806521463572, iteration: 341128
loss: 0.9955815672874451,grad_norm: 0.7717407336739078, iteration: 341129
loss: 0.9978696703910828,grad_norm: 0.934804685401269, iteration: 341130
loss: 1.01941978931427,grad_norm: 0.9156464945200072, iteration: 341131
loss: 0.9975007176399231,grad_norm: 0.9061546707667238, iteration: 341132
loss: 0.9495935440063477,grad_norm: 0.7541780578088673, iteration: 341133
loss: 1.0288653373718262,grad_norm: 0.9999993865096646, iteration: 341134
loss: 1.0112732648849487,grad_norm: 0.7898912548976059, iteration: 341135
loss: 0.9961482882499695,grad_norm: 0.7403517536439134, iteration: 341136
loss: 1.002025842666626,grad_norm: 0.9610833740173207, iteration: 341137
loss: 1.0271432399749756,grad_norm: 0.8793461171257551, iteration: 341138
loss: 1.0144118070602417,grad_norm: 0.9999992267623071, iteration: 341139
loss: 1.0326800346374512,grad_norm: 0.9999989930976115, iteration: 341140
loss: 1.0042527914047241,grad_norm: 0.7516706064855053, iteration: 341141
loss: 1.0465726852416992,grad_norm: 0.9999994424977577, iteration: 341142
loss: 1.0089452266693115,grad_norm: 0.9001975015683003, iteration: 341143
loss: 1.018058180809021,grad_norm: 0.9535269378453098, iteration: 341144
loss: 1.1109102964401245,grad_norm: 0.9999993988216364, iteration: 341145
loss: 1.0464296340942383,grad_norm: 0.7483658868755954, iteration: 341146
loss: 0.9714373350143433,grad_norm: 0.9999997468869927, iteration: 341147
loss: 0.9913399815559387,grad_norm: 0.7672800613650052, iteration: 341148
loss: 1.0124760866165161,grad_norm: 0.7315512172175478, iteration: 341149
loss: 0.9894679188728333,grad_norm: 0.9520932715719714, iteration: 341150
loss: 1.0229992866516113,grad_norm: 0.7675087243108493, iteration: 341151
loss: 1.0372906923294067,grad_norm: 0.9999996312578981, iteration: 341152
loss: 0.9917375445365906,grad_norm: 0.718129878124217, iteration: 341153
loss: 1.0113824605941772,grad_norm: 0.8027465876075427, iteration: 341154
loss: 0.9373467564582825,grad_norm: 0.8114710901332693, iteration: 341155
loss: 1.038716435432434,grad_norm: 0.8754329811284446, iteration: 341156
loss: 0.9762415289878845,grad_norm: 0.9121464370434138, iteration: 341157
loss: 1.0730904340744019,grad_norm: 0.9273794900703299, iteration: 341158
loss: 0.9800854325294495,grad_norm: 0.777299322436745, iteration: 341159
loss: 0.9782213568687439,grad_norm: 0.8472010922759396, iteration: 341160
loss: 1.005623459815979,grad_norm: 0.855634308602461, iteration: 341161
loss: 1.029903531074524,grad_norm: 0.9999990913943764, iteration: 341162
loss: 0.994037389755249,grad_norm: 0.8202191726621982, iteration: 341163
loss: 1.065763235092163,grad_norm: 0.8968788829784237, iteration: 341164
loss: 0.9960057139396667,grad_norm: 0.9999990176291211, iteration: 341165
loss: 0.9811345934867859,grad_norm: 0.873572596148397, iteration: 341166
loss: 0.9840589165687561,grad_norm: 0.8632569577531609, iteration: 341167
loss: 0.9936928749084473,grad_norm: 0.8014811656798605, iteration: 341168
loss: 1.0099469423294067,grad_norm: 0.7737742355851668, iteration: 341169
loss: 1.0072996616363525,grad_norm: 0.8013746329683946, iteration: 341170
loss: 1.007501244544983,grad_norm: 0.999999157772549, iteration: 341171
loss: 1.0596107244491577,grad_norm: 0.9999999605541485, iteration: 341172
loss: 0.9794634580612183,grad_norm: 0.7075475893436127, iteration: 341173
loss: 1.1018483638763428,grad_norm: 0.999999860779082, iteration: 341174
loss: 1.0968152284622192,grad_norm: 0.99999946372482, iteration: 341175
loss: 1.0138978958129883,grad_norm: 0.999999872339001, iteration: 341176
loss: 0.9860129952430725,grad_norm: 0.9085924622481071, iteration: 341177
loss: 1.02275812625885,grad_norm: 0.9999993156631644, iteration: 341178
loss: 0.9762053489685059,grad_norm: 0.7265428931188789, iteration: 341179
loss: 0.9875797629356384,grad_norm: 0.7734780452419012, iteration: 341180
loss: 1.016564965248108,grad_norm: 0.8001569097493098, iteration: 341181
loss: 0.9925253391265869,grad_norm: 0.8781180236381279, iteration: 341182
loss: 0.9956343770027161,grad_norm: 0.9999990833539277, iteration: 341183
loss: 0.9899133443832397,grad_norm: 0.8269761026815896, iteration: 341184
loss: 1.0147594213485718,grad_norm: 0.9999992611848443, iteration: 341185
loss: 0.998813271522522,grad_norm: 0.9565235461749024, iteration: 341186
loss: 1.0657072067260742,grad_norm: 0.9999995875655563, iteration: 341187
loss: 1.0095821619033813,grad_norm: 0.8199866791649852, iteration: 341188
loss: 1.0408819913864136,grad_norm: 0.796238660312807, iteration: 341189
loss: 1.1143550872802734,grad_norm: 0.8649540045464219, iteration: 341190
loss: 0.9958326816558838,grad_norm: 0.7863410514782649, iteration: 341191
loss: 0.9978395700454712,grad_norm: 0.9999994277366548, iteration: 341192
loss: 1.0098358392715454,grad_norm: 0.8275288249338573, iteration: 341193
loss: 1.0143659114837646,grad_norm: 0.7042077625300868, iteration: 341194
loss: 0.993297815322876,grad_norm: 0.9011310895583952, iteration: 341195
loss: 1.1406837701797485,grad_norm: 0.9999992848983377, iteration: 341196
loss: 1.0219658613204956,grad_norm: 0.9999989852147211, iteration: 341197
loss: 0.9459550380706787,grad_norm: 0.9999990542583943, iteration: 341198
loss: 1.1316468715667725,grad_norm: 0.9420276764179649, iteration: 341199
loss: 1.0032604932785034,grad_norm: 0.6987389638402449, iteration: 341200
loss: 1.0416758060455322,grad_norm: 0.8094223316709881, iteration: 341201
loss: 1.005050539970398,grad_norm: 0.7973172854553346, iteration: 341202
loss: 1.029452919960022,grad_norm: 0.9999991160903421, iteration: 341203
loss: 1.0210684537887573,grad_norm: 0.9999990425498605, iteration: 341204
loss: 0.9711726903915405,grad_norm: 0.8019092743258536, iteration: 341205
loss: 1.0658141374588013,grad_norm: 0.9999996031791353, iteration: 341206
loss: 0.9751881957054138,grad_norm: 0.8199678606761158, iteration: 341207
loss: 0.9589545130729675,grad_norm: 0.86842882076283, iteration: 341208
loss: 1.0199121236801147,grad_norm: 0.8888911941642345, iteration: 341209
loss: 1.0002715587615967,grad_norm: 0.8226825811511765, iteration: 341210
loss: 0.9909407496452332,grad_norm: 0.9897773282792776, iteration: 341211
loss: 0.9982093572616577,grad_norm: 0.9999993569560313, iteration: 341212
loss: 1.0098943710327148,grad_norm: 0.8293431726175614, iteration: 341213
loss: 0.9874683022499084,grad_norm: 0.7132951912006963, iteration: 341214
loss: 1.0125970840454102,grad_norm: 0.7581596683678254, iteration: 341215
loss: 0.9886049032211304,grad_norm: 0.8301220451354905, iteration: 341216
loss: 1.0334889888763428,grad_norm: 0.8228146178191442, iteration: 341217
loss: 0.9931972026824951,grad_norm: 0.7957547355868925, iteration: 341218
loss: 0.9818382263183594,grad_norm: 0.9999990995978908, iteration: 341219
loss: 0.9887546300888062,grad_norm: 0.8758648928370435, iteration: 341220
loss: 1.008100152015686,grad_norm: 0.8037452234466514, iteration: 341221
loss: 0.994849681854248,grad_norm: 0.8970081818016922, iteration: 341222
loss: 1.0232282876968384,grad_norm: 0.8293520255417023, iteration: 341223
loss: 1.0147154331207275,grad_norm: 0.7140105530792438, iteration: 341224
loss: 1.0438501834869385,grad_norm: 0.7061937448163607, iteration: 341225
loss: 0.9979631304740906,grad_norm: 0.8937359150984797, iteration: 341226
loss: 1.0129891633987427,grad_norm: 0.8515163886370607, iteration: 341227
loss: 0.9928166270256042,grad_norm: 0.9838855292094884, iteration: 341228
loss: 1.017876148223877,grad_norm: 0.8622200743072859, iteration: 341229
loss: 0.9885751605033875,grad_norm: 0.9068940561990535, iteration: 341230
loss: 1.0213818550109863,grad_norm: 0.8327179176669627, iteration: 341231
loss: 0.9926831722259521,grad_norm: 0.7308834610873053, iteration: 341232
loss: 0.9787933826446533,grad_norm: 0.999999863049218, iteration: 341233
loss: 1.0086655616760254,grad_norm: 0.954206570677238, iteration: 341234
loss: 1.0333690643310547,grad_norm: 0.9999998251443277, iteration: 341235
loss: 1.001706600189209,grad_norm: 0.7959155616787542, iteration: 341236
loss: 0.9939152598381042,grad_norm: 0.9999990926947352, iteration: 341237
loss: 1.0121299028396606,grad_norm: 0.9999999242959203, iteration: 341238
loss: 0.9928011894226074,grad_norm: 0.8224890744316788, iteration: 341239
loss: 1.0171492099761963,grad_norm: 0.7714448203368687, iteration: 341240
loss: 0.9580543041229248,grad_norm: 0.8609533695214958, iteration: 341241
loss: 0.9843931198120117,grad_norm: 0.8105668540473162, iteration: 341242
loss: 0.9767007827758789,grad_norm: 0.8845092070408492, iteration: 341243
loss: 1.0024211406707764,grad_norm: 0.8496966476429708, iteration: 341244
loss: 1.0138812065124512,grad_norm: 0.8513975609477057, iteration: 341245
loss: 0.985405683517456,grad_norm: 0.8478174658201043, iteration: 341246
loss: 1.016991138458252,grad_norm: 0.9383560909958916, iteration: 341247
loss: 1.0999298095703125,grad_norm: 0.9999995413193296, iteration: 341248
loss: 1.0198280811309814,grad_norm: 0.8434204387861209, iteration: 341249
loss: 1.003343105316162,grad_norm: 0.9999992706848713, iteration: 341250
loss: 1.0149601697921753,grad_norm: 0.798245669969188, iteration: 341251
loss: 1.0028897523880005,grad_norm: 0.9999999111333471, iteration: 341252
loss: 0.9983725547790527,grad_norm: 0.772543779676296, iteration: 341253
loss: 0.9942469000816345,grad_norm: 0.972008784523712, iteration: 341254
loss: 1.0053907632827759,grad_norm: 0.6876835907339167, iteration: 341255
loss: 1.0243823528289795,grad_norm: 0.761315104677407, iteration: 341256
loss: 1.0224241018295288,grad_norm: 0.9012557826576219, iteration: 341257
loss: 1.0333223342895508,grad_norm: 0.6963434114642586, iteration: 341258
loss: 0.9875530004501343,grad_norm: 0.6959900140078438, iteration: 341259
loss: 0.98326575756073,grad_norm: 0.8695790994790672, iteration: 341260
loss: 1.0052419900894165,grad_norm: 0.8274469481951274, iteration: 341261
loss: 1.0366429090499878,grad_norm: 0.9999990690396967, iteration: 341262
loss: 1.0186246633529663,grad_norm: 0.9999992545724653, iteration: 341263
loss: 1.028685212135315,grad_norm: 0.8813427775098802, iteration: 341264
loss: 1.0070090293884277,grad_norm: 0.817889105514854, iteration: 341265
loss: 0.9755955338478088,grad_norm: 0.8603244500027141, iteration: 341266
loss: 1.0101875066757202,grad_norm: 0.7952436310402026, iteration: 341267
loss: 1.0025618076324463,grad_norm: 0.9064493967027657, iteration: 341268
loss: 1.0407006740570068,grad_norm: 0.8601656503026951, iteration: 341269
loss: 1.01227605342865,grad_norm: 0.9376747384022728, iteration: 341270
loss: 1.027179479598999,grad_norm: 0.739411879580442, iteration: 341271
loss: 0.9873033165931702,grad_norm: 0.8505004740772871, iteration: 341272
loss: 0.9832030534744263,grad_norm: 0.8420349257154965, iteration: 341273
loss: 1.015732765197754,grad_norm: 0.9039849111179951, iteration: 341274
loss: 0.9870696663856506,grad_norm: 0.8371014448899274, iteration: 341275
loss: 1.0021220445632935,grad_norm: 0.8012678215717787, iteration: 341276
loss: 0.9983179569244385,grad_norm: 0.7822591151957671, iteration: 341277
loss: 1.0667378902435303,grad_norm: 0.9999997887102938, iteration: 341278
loss: 0.9778316020965576,grad_norm: 0.7298894525379565, iteration: 341279
loss: 1.0250860452651978,grad_norm: 0.8937724649555381, iteration: 341280
loss: 0.9641952514648438,grad_norm: 0.7620744036325948, iteration: 341281
loss: 1.0046230554580688,grad_norm: 0.8540242478123714, iteration: 341282
loss: 1.0168462991714478,grad_norm: 0.8457766323042603, iteration: 341283
loss: 1.0403656959533691,grad_norm: 0.8764403851624525, iteration: 341284
loss: 1.0137267112731934,grad_norm: 0.8878919932454201, iteration: 341285
loss: 1.0200953483581543,grad_norm: 0.9999990971462182, iteration: 341286
loss: 0.9745779037475586,grad_norm: 0.9027264881266601, iteration: 341287
loss: 1.0082273483276367,grad_norm: 0.8694505711330647, iteration: 341288
loss: 0.9828866124153137,grad_norm: 0.7383448768113899, iteration: 341289
loss: 1.0582914352416992,grad_norm: 0.978934439569535, iteration: 341290
loss: 1.0140832662582397,grad_norm: 0.8320926571353547, iteration: 341291
loss: 1.0095070600509644,grad_norm: 0.9999991914600131, iteration: 341292
loss: 0.9895055294036865,grad_norm: 0.8483528259435942, iteration: 341293
loss: 0.9943327307701111,grad_norm: 0.9249877626840962, iteration: 341294
loss: 1.0305273532867432,grad_norm: 0.8886812528950853, iteration: 341295
loss: 0.9761287569999695,grad_norm: 0.9170397407551149, iteration: 341296
loss: 1.0024845600128174,grad_norm: 0.9634450051822679, iteration: 341297
loss: 1.0286656618118286,grad_norm: 0.9567585527950032, iteration: 341298
loss: 1.0520232915878296,grad_norm: 0.8353699673493483, iteration: 341299
loss: 1.006222128868103,grad_norm: 0.8208008486620396, iteration: 341300
loss: 1.0071673393249512,grad_norm: 0.7398474690113673, iteration: 341301
loss: 1.0720837116241455,grad_norm: 0.9999990755879617, iteration: 341302
loss: 0.9820373058319092,grad_norm: 0.7494979004006097, iteration: 341303
loss: 0.9748161435127258,grad_norm: 0.8399743173287962, iteration: 341304
loss: 1.0086030960083008,grad_norm: 0.9999991543591795, iteration: 341305
loss: 0.9948170185089111,grad_norm: 0.9100842656101306, iteration: 341306
loss: 0.9736511707305908,grad_norm: 0.882105038044723, iteration: 341307
loss: 0.9924920201301575,grad_norm: 0.8260614451182278, iteration: 341308
loss: 0.9613738059997559,grad_norm: 0.9999998389984791, iteration: 341309
loss: 0.9705241322517395,grad_norm: 0.8523240247529515, iteration: 341310
loss: 1.0086241960525513,grad_norm: 0.7413096590530966, iteration: 341311
loss: 0.9983731508255005,grad_norm: 0.7257531691675936, iteration: 341312
loss: 0.9918086528778076,grad_norm: 0.7928104796655847, iteration: 341313
loss: 0.9951692819595337,grad_norm: 0.9025070739351452, iteration: 341314
loss: 0.960807740688324,grad_norm: 0.8385724805026084, iteration: 341315
loss: 0.9975162744522095,grad_norm: 0.817119987169336, iteration: 341316
loss: 1.0053088665008545,grad_norm: 0.7148865040907749, iteration: 341317
loss: 1.0191574096679688,grad_norm: 0.9999992182081665, iteration: 341318
loss: 0.9962403178215027,grad_norm: 0.7831068728943615, iteration: 341319
loss: 0.9909909963607788,grad_norm: 0.8704025759329784, iteration: 341320
loss: 1.0314072370529175,grad_norm: 0.999999033126075, iteration: 341321
loss: 0.9934505224227905,grad_norm: 0.9999992134310489, iteration: 341322
loss: 1.0023877620697021,grad_norm: 0.7441325094359211, iteration: 341323
loss: 0.9840164184570312,grad_norm: 0.8392354453160008, iteration: 341324
loss: 1.0220192670822144,grad_norm: 0.9790006484451608, iteration: 341325
loss: 0.9940804839134216,grad_norm: 0.7621150954144931, iteration: 341326
loss: 1.050478458404541,grad_norm: 0.9999993172690954, iteration: 341327
loss: 1.0179463624954224,grad_norm: 0.9999995879798649, iteration: 341328
loss: 1.0038188695907593,grad_norm: 0.6456175043109552, iteration: 341329
loss: 0.982429563999176,grad_norm: 0.7880841407352857, iteration: 341330
loss: 1.0330555438995361,grad_norm: 0.7810452343795881, iteration: 341331
loss: 1.0104824304580688,grad_norm: 0.7650189061065306, iteration: 341332
loss: 1.086074709892273,grad_norm: 0.9999994615543704, iteration: 341333
loss: 0.9935566186904907,grad_norm: 0.9366436715331489, iteration: 341334
loss: 1.0002495050430298,grad_norm: 0.7282386454647559, iteration: 341335
loss: 0.9965689778327942,grad_norm: 0.7416799181015508, iteration: 341336
loss: 1.0034607648849487,grad_norm: 0.8206034655168116, iteration: 341337
loss: 0.9481405019760132,grad_norm: 0.939964576170059, iteration: 341338
loss: 0.9948341250419617,grad_norm: 0.7229558758259114, iteration: 341339
loss: 0.9932690262794495,grad_norm: 0.7617219911372803, iteration: 341340
loss: 1.0053441524505615,grad_norm: 0.9999992801760531, iteration: 341341
loss: 1.0067397356033325,grad_norm: 0.8169146606585547, iteration: 341342
loss: 0.9645220041275024,grad_norm: 0.8772028460044287, iteration: 341343
loss: 0.9991616010665894,grad_norm: 0.8466450472128983, iteration: 341344
loss: 0.9971775412559509,grad_norm: 0.9999992154183208, iteration: 341345
loss: 1.00579035282135,grad_norm: 0.8996065234418242, iteration: 341346
loss: 1.0034551620483398,grad_norm: 0.9999992463508959, iteration: 341347
loss: 0.9396432042121887,grad_norm: 0.932002054760635, iteration: 341348
loss: 0.9867742657661438,grad_norm: 0.8747739703503549, iteration: 341349
loss: 1.0166263580322266,grad_norm: 0.9999994018155323, iteration: 341350
loss: 1.0522972345352173,grad_norm: 0.9999993248881281, iteration: 341351
loss: 0.9705405831336975,grad_norm: 0.8361545386735795, iteration: 341352
loss: 1.001968264579773,grad_norm: 0.9999993203230515, iteration: 341353
loss: 0.9846491813659668,grad_norm: 0.7991323883997207, iteration: 341354
loss: 0.9850345253944397,grad_norm: 0.8429687895295489, iteration: 341355
loss: 1.0164743661880493,grad_norm: 0.8189698199016952, iteration: 341356
loss: 0.9990714192390442,grad_norm: 0.789008788095567, iteration: 341357
loss: 0.9777476787567139,grad_norm: 0.7123494509654107, iteration: 341358
loss: 0.9944669008255005,grad_norm: 0.7935798333584673, iteration: 341359
loss: 0.981447696685791,grad_norm: 0.9166669292658428, iteration: 341360
loss: 0.9819115400314331,grad_norm: 0.9925949357438616, iteration: 341361
loss: 0.9921479225158691,grad_norm: 0.8393659139805074, iteration: 341362
loss: 1.194517970085144,grad_norm: 0.9999990625468634, iteration: 341363
loss: 0.9873806238174438,grad_norm: 0.9209378279606668, iteration: 341364
loss: 1.0077959299087524,grad_norm: 0.6946815286409666, iteration: 341365
loss: 1.077468752861023,grad_norm: 0.9999997622731421, iteration: 341366
loss: 0.998099148273468,grad_norm: 0.8380364035703072, iteration: 341367
loss: 0.9977769255638123,grad_norm: 0.7545078751738976, iteration: 341368
loss: 0.9788638353347778,grad_norm: 0.6893217805053228, iteration: 341369
loss: 1.0192573070526123,grad_norm: 0.9999990439294032, iteration: 341370
loss: 0.9972349405288696,grad_norm: 0.774114806247498, iteration: 341371
loss: 0.9965519905090332,grad_norm: 0.8172462324231776, iteration: 341372
loss: 1.0056971311569214,grad_norm: 0.9999990871960678, iteration: 341373
loss: 0.9798353314399719,grad_norm: 0.7582725810180393, iteration: 341374
loss: 1.0026271343231201,grad_norm: 0.8901315577066348, iteration: 341375
loss: 0.9973685145378113,grad_norm: 0.999999280489151, iteration: 341376
loss: 1.0030348300933838,grad_norm: 0.9999990255896073, iteration: 341377
loss: 0.9824426770210266,grad_norm: 0.959317808560486, iteration: 341378
loss: 1.0082224607467651,grad_norm: 0.8843510224510084, iteration: 341379
loss: 1.0082321166992188,grad_norm: 0.8101200489979451, iteration: 341380
loss: 1.176715612411499,grad_norm: 0.9999989854026465, iteration: 341381
loss: 0.954789936542511,grad_norm: 0.7672442794435314, iteration: 341382
loss: 0.9931385517120361,grad_norm: 0.8670929433653938, iteration: 341383
loss: 1.01400887966156,grad_norm: 0.9999994463929509, iteration: 341384
loss: 1.0027600526809692,grad_norm: 0.8498079133679207, iteration: 341385
loss: 0.9598506093025208,grad_norm: 0.9958459469747857, iteration: 341386
loss: 0.9557210803031921,grad_norm: 0.8222886418706916, iteration: 341387
loss: 1.0264439582824707,grad_norm: 0.8582356583332722, iteration: 341388
loss: 1.0079503059387207,grad_norm: 0.8058228538878328, iteration: 341389
loss: 1.0274832248687744,grad_norm: 0.798399554162651, iteration: 341390
loss: 1.019779086112976,grad_norm: 0.7324496029079098, iteration: 341391
loss: 1.0074572563171387,grad_norm: 0.744701294871385, iteration: 341392
loss: 1.0210521221160889,grad_norm: 0.7052140521256737, iteration: 341393
loss: 0.9884946346282959,grad_norm: 0.7337547568422739, iteration: 341394
loss: 1.052069902420044,grad_norm: 0.999999919173668, iteration: 341395
loss: 1.027022361755371,grad_norm: 0.8534845638172599, iteration: 341396
loss: 1.1663501262664795,grad_norm: 0.9999996314969475, iteration: 341397
loss: 1.0316671133041382,grad_norm: 0.8512057622198101, iteration: 341398
loss: 0.9783037900924683,grad_norm: 0.8535151671515354, iteration: 341399
loss: 1.0952739715576172,grad_norm: 0.9999994083230314, iteration: 341400
loss: 0.9875411987304688,grad_norm: 0.8232064446674118, iteration: 341401
loss: 1.0613254308700562,grad_norm: 0.9999997128001871, iteration: 341402
loss: 1.0361734628677368,grad_norm: 0.856387849136933, iteration: 341403
loss: 0.9700720310211182,grad_norm: 0.7449269088097508, iteration: 341404
loss: 1.0645604133605957,grad_norm: 0.9632587635536267, iteration: 341405
loss: 0.9825615882873535,grad_norm: 0.8889962411939655, iteration: 341406
loss: 0.9864198565483093,grad_norm: 0.9366375881081421, iteration: 341407
loss: 1.0140974521636963,grad_norm: 0.9186647313217826, iteration: 341408
loss: 0.9913839101791382,grad_norm: 0.7479985699784464, iteration: 341409
loss: 0.9984689950942993,grad_norm: 0.7201777346760234, iteration: 341410
loss: 1.0325164794921875,grad_norm: 0.7970608261813206, iteration: 341411
loss: 1.0241360664367676,grad_norm: 0.8196868839149275, iteration: 341412
loss: 0.949760377407074,grad_norm: 0.824448581508183, iteration: 341413
loss: 1.0036113262176514,grad_norm: 0.8257128779367676, iteration: 341414
loss: 0.9821855425834656,grad_norm: 0.8659754220173627, iteration: 341415
loss: 1.0310792922973633,grad_norm: 0.9999993864837142, iteration: 341416
loss: 0.9807730317115784,grad_norm: 0.8554938924595072, iteration: 341417
loss: 1.0095188617706299,grad_norm: 0.9999991859421671, iteration: 341418
loss: 1.0146530866622925,grad_norm: 0.7724138112386509, iteration: 341419
loss: 1.0216076374053955,grad_norm: 0.9999994410577093, iteration: 341420
loss: 0.9855128526687622,grad_norm: 0.719672404349398, iteration: 341421
loss: 1.004752516746521,grad_norm: 0.9942104106457875, iteration: 341422
loss: 0.9944116473197937,grad_norm: 0.9002771848747649, iteration: 341423
loss: 1.0917080640792847,grad_norm: 0.9999995719660114, iteration: 341424
loss: 1.077298879623413,grad_norm: 0.8609435208786894, iteration: 341425
loss: 1.0045069456100464,grad_norm: 0.9999998866760313, iteration: 341426
loss: 1.0771071910858154,grad_norm: 0.9005785709004434, iteration: 341427
loss: 1.115779161453247,grad_norm: 0.9999989699189606, iteration: 341428
loss: 1.0216537714004517,grad_norm: 0.8926876764040205, iteration: 341429
loss: 0.9875795841217041,grad_norm: 0.7217750181726826, iteration: 341430
loss: 1.0125222206115723,grad_norm: 0.7989116916416401, iteration: 341431
loss: 1.038528561592102,grad_norm: 0.999999182969376, iteration: 341432
loss: 1.0557196140289307,grad_norm: 0.9999997212607968, iteration: 341433
loss: 1.019473671913147,grad_norm: 0.9297311976281608, iteration: 341434
loss: 1.0180931091308594,grad_norm: 0.9998307594027274, iteration: 341435
loss: 1.067935824394226,grad_norm: 0.9999999186073883, iteration: 341436
loss: 1.0001096725463867,grad_norm: 0.9786141868597462, iteration: 341437
loss: 1.051085352897644,grad_norm: 0.9999990661628992, iteration: 341438
loss: 0.9995837807655334,grad_norm: 0.8563047921112796, iteration: 341439
loss: 1.0033698081970215,grad_norm: 0.753111076469168, iteration: 341440
loss: 1.1077783107757568,grad_norm: 0.9999993769082977, iteration: 341441
loss: 1.0205025672912598,grad_norm: 0.9999991920963279, iteration: 341442
loss: 1.020169973373413,grad_norm: 0.9883512850380886, iteration: 341443
loss: 1.0111154317855835,grad_norm: 0.8860515088391939, iteration: 341444
loss: 0.9984098672866821,grad_norm: 0.969211320840868, iteration: 341445
loss: 1.0543440580368042,grad_norm: 0.9999991578638017, iteration: 341446
loss: 0.9962016344070435,grad_norm: 0.8976339405230983, iteration: 341447
loss: 1.0636051893234253,grad_norm: 0.9999997334248123, iteration: 341448
loss: 0.9791309833526611,grad_norm: 0.8549036975444361, iteration: 341449
loss: 1.013645887374878,grad_norm: 0.9164508385442712, iteration: 341450
loss: 1.059604287147522,grad_norm: 0.8859987576445562, iteration: 341451
loss: 0.9846174120903015,grad_norm: 0.9557866747289407, iteration: 341452
loss: 1.013029932975769,grad_norm: 0.7155083003684832, iteration: 341453
loss: 0.9613674283027649,grad_norm: 0.854539463925052, iteration: 341454
loss: 1.1018602848052979,grad_norm: 0.9509674830670162, iteration: 341455
loss: 1.0471546649932861,grad_norm: 0.9999992031367426, iteration: 341456
loss: 0.9791228771209717,grad_norm: 0.7490348595200995, iteration: 341457
loss: 1.1767308712005615,grad_norm: 0.9999999793780975, iteration: 341458
loss: 1.1771705150604248,grad_norm: 0.9957485357377399, iteration: 341459
loss: 0.9938849806785583,grad_norm: 0.7371906278241813, iteration: 341460
loss: 1.017140507698059,grad_norm: 0.9999995472313162, iteration: 341461
loss: 1.1856575012207031,grad_norm: 0.9999995056640799, iteration: 341462
loss: 0.9971342086791992,grad_norm: 0.999998979161743, iteration: 341463
loss: 1.001888394355774,grad_norm: 0.6366442488093308, iteration: 341464
loss: 0.9892342686653137,grad_norm: 0.9999998048874003, iteration: 341465
loss: 0.9959560036659241,grad_norm: 0.8272254231938544, iteration: 341466
loss: 1.0110138654708862,grad_norm: 0.9999994070451116, iteration: 341467
loss: 1.085408329963684,grad_norm: 0.9999997101438777, iteration: 341468
loss: 1.0444872379302979,grad_norm: 0.9999998865113225, iteration: 341469
loss: 1.0668622255325317,grad_norm: 0.9999994330899878, iteration: 341470
loss: 1.2097532749176025,grad_norm: 0.9999996038351939, iteration: 341471
loss: 1.0600208044052124,grad_norm: 0.999999314087515, iteration: 341472
loss: 1.1204379796981812,grad_norm: 0.9999998982185155, iteration: 341473
loss: 1.1276401281356812,grad_norm: 0.9999997273561126, iteration: 341474
loss: 1.1015303134918213,grad_norm: 1.0000000009143115, iteration: 341475
loss: 1.055114507675171,grad_norm: 0.9999991753926802, iteration: 341476
loss: 1.1040996313095093,grad_norm: 0.9999994250967473, iteration: 341477
loss: 1.1206461191177368,grad_norm: 0.9999993716254104, iteration: 341478
loss: 1.0285329818725586,grad_norm: 0.7180324029103997, iteration: 341479
loss: 1.1800870895385742,grad_norm: 0.9999993388891112, iteration: 341480
loss: 1.0377014875411987,grad_norm: 0.9999992040913159, iteration: 341481
loss: 1.1208912134170532,grad_norm: 0.9999991313864444, iteration: 341482
loss: 1.0260207653045654,grad_norm: 0.9999998464602313, iteration: 341483
loss: 1.151026725769043,grad_norm: 0.9999995876054635, iteration: 341484
loss: 1.050896167755127,grad_norm: 0.9999993802575426, iteration: 341485
loss: 1.0936241149902344,grad_norm: 0.9999993723518746, iteration: 341486
loss: 1.0353286266326904,grad_norm: 0.9999991720171308, iteration: 341487
loss: 1.0661096572875977,grad_norm: 0.9999993451855547, iteration: 341488
loss: 1.0298664569854736,grad_norm: 0.9999991278162117, iteration: 341489
loss: 1.0220993757247925,grad_norm: 0.8929275195108843, iteration: 341490
loss: 1.069686770439148,grad_norm: 0.9999997240415809, iteration: 341491
loss: 1.1202243566513062,grad_norm: 0.9999994400856759, iteration: 341492
loss: 1.0846078395843506,grad_norm: 0.9999994965606986, iteration: 341493
loss: 1.0639677047729492,grad_norm: 0.8897454522446221, iteration: 341494
loss: 1.2039732933044434,grad_norm: 0.9999995970449648, iteration: 341495
loss: 1.0526776313781738,grad_norm: 0.9182479930600113, iteration: 341496
loss: 1.048148274421692,grad_norm: 0.8734259614899943, iteration: 341497
loss: 1.1310745477676392,grad_norm: 0.9999998389525256, iteration: 341498
loss: 1.1103310585021973,grad_norm: 0.9999999832834462, iteration: 341499
loss: 1.0834953784942627,grad_norm: 0.9999990819072304, iteration: 341500
loss: 1.0525686740875244,grad_norm: 0.999999057478779, iteration: 341501
loss: 1.0464411973953247,grad_norm: 0.9004018698116758, iteration: 341502
loss: 1.1249895095825195,grad_norm: 0.9999994038990155, iteration: 341503
loss: 1.067524790763855,grad_norm: 0.9448570611729797, iteration: 341504
loss: 1.0223217010498047,grad_norm: 0.9999991168178264, iteration: 341505
loss: 1.0363754034042358,grad_norm: 0.9999997136380298, iteration: 341506
loss: 0.994843602180481,grad_norm: 0.9079568836343694, iteration: 341507
loss: 1.0080316066741943,grad_norm: 0.8261070929335121, iteration: 341508
loss: 1.0290353298187256,grad_norm: 0.9999991446731302, iteration: 341509
loss: 0.9735410809516907,grad_norm: 0.9999991156667519, iteration: 341510
loss: 1.0862114429473877,grad_norm: 0.9999994378148086, iteration: 341511
loss: 1.0463813543319702,grad_norm: 0.8324591246198949, iteration: 341512
loss: 1.1387851238250732,grad_norm: 0.9999993524739988, iteration: 341513
loss: 0.9881503582000732,grad_norm: 0.7846634140660352, iteration: 341514
loss: 0.9957420229911804,grad_norm: 0.9619197039394283, iteration: 341515
loss: 1.032053828239441,grad_norm: 0.9999996446948028, iteration: 341516
loss: 1.0305111408233643,grad_norm: 0.7435653122969127, iteration: 341517
loss: 0.9859025478363037,grad_norm: 0.9246271877626966, iteration: 341518
loss: 1.1302331686019897,grad_norm: 0.9999998070969496, iteration: 341519
loss: 1.0781375169754028,grad_norm: 0.9999995501547105, iteration: 341520
loss: 1.031341314315796,grad_norm: 0.9999991267807681, iteration: 341521
loss: 1.1955087184906006,grad_norm: 0.9999991884581918, iteration: 341522
loss: 1.0509748458862305,grad_norm: 0.9999996910745931, iteration: 341523
loss: 1.0038434267044067,grad_norm: 0.9632050382993839, iteration: 341524
loss: 1.0598136186599731,grad_norm: 0.9160113713494449, iteration: 341525
loss: 1.0167285203933716,grad_norm: 0.748337589817349, iteration: 341526
loss: 1.0902507305145264,grad_norm: 0.9465224487999938, iteration: 341527
loss: 1.026812195777893,grad_norm: 0.9999991509254098, iteration: 341528
loss: 1.0417133569717407,grad_norm: 0.9999995941981581, iteration: 341529
loss: 1.0156404972076416,grad_norm: 0.8697348258251153, iteration: 341530
loss: 1.0350712537765503,grad_norm: 0.9157706168879349, iteration: 341531
loss: 1.071592926979065,grad_norm: 0.9999995422445787, iteration: 341532
loss: 1.0455957651138306,grad_norm: 0.8864381973531055, iteration: 341533
loss: 1.162388801574707,grad_norm: 0.9999999557975263, iteration: 341534
loss: 0.998235285282135,grad_norm: 0.9535717185713467, iteration: 341535
loss: 0.9688088893890381,grad_norm: 0.9101381199590848, iteration: 341536
loss: 1.0074739456176758,grad_norm: 0.9227544907289981, iteration: 341537
loss: 1.0497416257858276,grad_norm: 0.7633502992201383, iteration: 341538
loss: 0.9950912594795227,grad_norm: 0.9999991421918436, iteration: 341539
loss: 0.9843181371688843,grad_norm: 0.9836602023010397, iteration: 341540
loss: 1.1730797290802002,grad_norm: 0.9999996041067225, iteration: 341541
loss: 1.0487271547317505,grad_norm: 0.9759531671126004, iteration: 341542
loss: 0.9998809695243835,grad_norm: 0.9645763159914708, iteration: 341543
loss: 1.006949782371521,grad_norm: 0.999999264748022, iteration: 341544
loss: 1.0136463642120361,grad_norm: 0.779968870120533, iteration: 341545
loss: 1.0364713668823242,grad_norm: 0.9589182733939706, iteration: 341546
loss: 1.0317609310150146,grad_norm: 0.8688032632928702, iteration: 341547
loss: 0.9963354468345642,grad_norm: 0.9999990109065766, iteration: 341548
loss: 1.079561710357666,grad_norm: 0.9999995846226971, iteration: 341549
loss: 1.0893481969833374,grad_norm: 0.9999993508573266, iteration: 341550
loss: 1.0376101732254028,grad_norm: 0.9999990733758507, iteration: 341551
loss: 1.0875502824783325,grad_norm: 0.9999994937075236, iteration: 341552
loss: 1.1252224445343018,grad_norm: 0.9999994916499739, iteration: 341553
loss: 1.076789140701294,grad_norm: 0.9999991116230925, iteration: 341554
loss: 1.0179561376571655,grad_norm: 0.999999389604818, iteration: 341555
loss: 1.063417673110962,grad_norm: 0.9999992320242497, iteration: 341556
loss: 1.0794285535812378,grad_norm: 0.9999997331858902, iteration: 341557
loss: 1.033686637878418,grad_norm: 0.8725822404556389, iteration: 341558
loss: 1.0677598714828491,grad_norm: 0.9575348363998706, iteration: 341559
loss: 1.0393576622009277,grad_norm: 0.9999991129424166, iteration: 341560
loss: 0.9868909120559692,grad_norm: 0.8966462320187379, iteration: 341561
loss: 1.0433329343795776,grad_norm: 0.9999993499923087, iteration: 341562
loss: 1.0843894481658936,grad_norm: 0.8763954257631612, iteration: 341563
loss: 1.0893765687942505,grad_norm: 0.9999994378659857, iteration: 341564
loss: 1.0078046321868896,grad_norm: 0.9981047198261072, iteration: 341565
loss: 1.056341528892517,grad_norm: 0.894984373921215, iteration: 341566
loss: 1.0974863767623901,grad_norm: 0.999999643147729, iteration: 341567
loss: 1.073913335800171,grad_norm: 0.9999994660897256, iteration: 341568
loss: 1.0471861362457275,grad_norm: 0.9999993723912641, iteration: 341569
loss: 1.0196515321731567,grad_norm: 0.9999997280516905, iteration: 341570
loss: 1.0984491109848022,grad_norm: 0.9999999087660937, iteration: 341571
loss: 1.0802574157714844,grad_norm: 0.9999993049292971, iteration: 341572
loss: 1.0411003828048706,grad_norm: 0.9999995763695863, iteration: 341573
loss: 1.1315019130706787,grad_norm: 0.9999998744532086, iteration: 341574
loss: 1.0192193984985352,grad_norm: 0.9999991059799069, iteration: 341575
loss: 1.0039137601852417,grad_norm: 0.7988803165923412, iteration: 341576
loss: 1.0915648937225342,grad_norm: 0.9418176134188913, iteration: 341577
loss: 1.090167760848999,grad_norm: 0.9999990067655122, iteration: 341578
loss: 1.0056487321853638,grad_norm: 0.9212943888795719, iteration: 341579
loss: 1.0496110916137695,grad_norm: 0.9999993535265654, iteration: 341580
loss: 1.189704179763794,grad_norm: 0.9999993563084945, iteration: 341581
loss: 1.0414619445800781,grad_norm: 0.7097616680059675, iteration: 341582
loss: 1.2815313339233398,grad_norm: 0.9999998129939796, iteration: 341583
loss: 1.025036334991455,grad_norm: 0.7716112616567463, iteration: 341584
loss: 1.022876262664795,grad_norm: 0.9999996736313546, iteration: 341585
loss: 1.0852209329605103,grad_norm: 0.9999992437911336, iteration: 341586
loss: 1.0326244831085205,grad_norm: 0.937913828060637, iteration: 341587
loss: 1.0247156620025635,grad_norm: 0.841097882374256, iteration: 341588
loss: 1.0473171472549438,grad_norm: 0.9999990764402207, iteration: 341589
loss: 0.9990769624710083,grad_norm: 0.9834232943853111, iteration: 341590
loss: 1.1080045700073242,grad_norm: 0.9999993937305723, iteration: 341591
loss: 1.0033009052276611,grad_norm: 0.9576591744450039, iteration: 341592
loss: 1.0164669752120972,grad_norm: 0.999999228779127, iteration: 341593
loss: 1.0170036554336548,grad_norm: 0.8596740794213646, iteration: 341594
loss: 1.0571777820587158,grad_norm: 0.999999659814673, iteration: 341595
loss: 1.0429130792617798,grad_norm: 0.91693918316374, iteration: 341596
loss: 1.0316826105117798,grad_norm: 0.737485844040882, iteration: 341597
loss: 1.047108769416809,grad_norm: 0.9999992521825013, iteration: 341598
loss: 1.0062967538833618,grad_norm: 0.8321885231613312, iteration: 341599
loss: 1.002342939376831,grad_norm: 0.9999991041275781, iteration: 341600
loss: 1.0153990983963013,grad_norm: 0.9859645672866983, iteration: 341601
loss: 1.211505651473999,grad_norm: 0.9999997576540248, iteration: 341602
loss: 1.1284300088882446,grad_norm: 0.9999994072831941, iteration: 341603
loss: 0.987415075302124,grad_norm: 0.9999990560016362, iteration: 341604
loss: 1.015804648399353,grad_norm: 0.9999992179093844, iteration: 341605
loss: 1.051965355873108,grad_norm: 0.915291164373006, iteration: 341606
loss: 1.0419057607650757,grad_norm: 0.9999996062950786, iteration: 341607
loss: 1.059343695640564,grad_norm: 0.8758711887891427, iteration: 341608
loss: 1.0663273334503174,grad_norm: 0.8536979196804276, iteration: 341609
loss: 1.2062486410140991,grad_norm: 1.0000000364047443, iteration: 341610
loss: 1.0150578022003174,grad_norm: 0.9999995342496276, iteration: 341611
loss: 1.0278475284576416,grad_norm: 0.9999991414795651, iteration: 341612
loss: 1.1320991516113281,grad_norm: 0.9999997867721286, iteration: 341613
loss: 0.9805753827095032,grad_norm: 0.8691147619951074, iteration: 341614
loss: 0.9815813899040222,grad_norm: 0.9999991917757249, iteration: 341615
loss: 1.067925214767456,grad_norm: 0.807948945665149, iteration: 341616
loss: 1.1453227996826172,grad_norm: 0.9999999400014268, iteration: 341617
loss: 1.1344757080078125,grad_norm: 0.9999993737749631, iteration: 341618
loss: 1.0125510692596436,grad_norm: 0.9999991314845315, iteration: 341619
loss: 1.0229160785675049,grad_norm: 0.8567035049601571, iteration: 341620
loss: 1.065919280052185,grad_norm: 0.8757646785220571, iteration: 341621
loss: 1.0022993087768555,grad_norm: 0.8190418145542788, iteration: 341622
loss: 1.0253986120224,grad_norm: 0.9999999897740723, iteration: 341623
loss: 1.1210819482803345,grad_norm: 0.9999991068821299, iteration: 341624
loss: 1.1055490970611572,grad_norm: 0.8684021759754612, iteration: 341625
loss: 1.036281943321228,grad_norm: 0.8631277193711602, iteration: 341626
loss: 1.074141502380371,grad_norm: 0.9999996358670027, iteration: 341627
loss: 1.1649048328399658,grad_norm: 0.9999997536810709, iteration: 341628
loss: 0.9997541904449463,grad_norm: 0.999999130499388, iteration: 341629
loss: 1.5018470287322998,grad_norm: 0.9999992365915389, iteration: 341630
loss: 1.0434465408325195,grad_norm: 0.8897051647630085, iteration: 341631
loss: 1.3120152950286865,grad_norm: 0.9999991270641113, iteration: 341632
loss: 1.1051750183105469,grad_norm: 0.9999996924256653, iteration: 341633
loss: 1.009864091873169,grad_norm: 0.8632687282548392, iteration: 341634
loss: 1.051816463470459,grad_norm: 0.9999992328244591, iteration: 341635
loss: 1.105023980140686,grad_norm: 0.9999992994937247, iteration: 341636
loss: 1.1101216077804565,grad_norm: 0.8985145142617991, iteration: 341637
loss: 1.0391275882720947,grad_norm: 0.9999993076535914, iteration: 341638
loss: 1.0501289367675781,grad_norm: 0.9999990522432458, iteration: 341639
loss: 1.0414398908615112,grad_norm: 0.9999992143020732, iteration: 341640
loss: 1.3400135040283203,grad_norm: 1.0000000785053604, iteration: 341641
loss: 1.1270732879638672,grad_norm: 0.9999992132148483, iteration: 341642
loss: 1.0090879201889038,grad_norm: 0.8996216292221558, iteration: 341643
loss: 1.1431642770767212,grad_norm: 0.9999997689248216, iteration: 341644
loss: 1.2628121376037598,grad_norm: 0.9999996645609266, iteration: 341645
loss: 1.0171622037887573,grad_norm: 0.9999992087227049, iteration: 341646
loss: 0.9923222661018372,grad_norm: 0.8541476843576324, iteration: 341647
loss: 1.057065725326538,grad_norm: 0.9999994404666985, iteration: 341648
loss: 1.0705538988113403,grad_norm: 0.9999999063025612, iteration: 341649
loss: 1.0172107219696045,grad_norm: 0.9999993509213327, iteration: 341650
loss: 1.0154224634170532,grad_norm: 0.8302222727308386, iteration: 341651
loss: 1.0974284410476685,grad_norm: 0.9999996154529768, iteration: 341652
loss: 1.0451258420944214,grad_norm: 0.9999990791839488, iteration: 341653
loss: 1.0391480922698975,grad_norm: 0.9999992699436493, iteration: 341654
loss: 1.0340579748153687,grad_norm: 0.6440434166503641, iteration: 341655
loss: 1.1471436023712158,grad_norm: 0.9999996256334489, iteration: 341656
loss: 1.05291748046875,grad_norm: 0.999999074281316, iteration: 341657
loss: 1.11289644241333,grad_norm: 0.9999996338880848, iteration: 341658
loss: 0.9976470470428467,grad_norm: 0.909772844610957, iteration: 341659
loss: 1.022189736366272,grad_norm: 0.7512349655256552, iteration: 341660
loss: 1.0272690057754517,grad_norm: 0.9999991013629977, iteration: 341661
loss: 1.292284369468689,grad_norm: 0.9999999856041644, iteration: 341662
loss: 1.367873191833496,grad_norm: 0.9999997178070859, iteration: 341663
loss: 1.189802646636963,grad_norm: 0.9999995786737861, iteration: 341664
loss: 1.4350215196609497,grad_norm: 0.9999998866853448, iteration: 341665
loss: 1.1243280172348022,grad_norm: 0.9999991872176165, iteration: 341666
loss: 1.0398626327514648,grad_norm: 0.9999999581280222, iteration: 341667
loss: 1.1186630725860596,grad_norm: 0.9999992564521344, iteration: 341668
loss: 1.0543955564498901,grad_norm: 0.999999236620328, iteration: 341669
loss: 1.2412220239639282,grad_norm: 0.9999993999381721, iteration: 341670
loss: 1.0262930393218994,grad_norm: 0.9846419525921097, iteration: 341671
loss: 1.0145155191421509,grad_norm: 0.9999990952004951, iteration: 341672
loss: 1.0255106687545776,grad_norm: 0.9269676006362239, iteration: 341673
loss: 1.056653380393982,grad_norm: 0.8609677933660879, iteration: 341674
loss: 1.0091413259506226,grad_norm: 0.7203291332651588, iteration: 341675
loss: 1.0982906818389893,grad_norm: 0.99999984960262, iteration: 341676
loss: 1.1277772188186646,grad_norm: 0.9999991036666636, iteration: 341677
loss: 1.0802613496780396,grad_norm: 0.999999593343963, iteration: 341678
loss: 0.9910258650779724,grad_norm: 0.8686568475617775, iteration: 341679
loss: 1.0188323259353638,grad_norm: 0.8568461068038257, iteration: 341680
loss: 1.0967419147491455,grad_norm: 0.999999809021039, iteration: 341681
loss: 1.0144942998886108,grad_norm: 0.8972096320798494, iteration: 341682
loss: 1.0403765439987183,grad_norm: 0.9999990643338755, iteration: 341683
loss: 1.0933834314346313,grad_norm: 0.9999992649218389, iteration: 341684
loss: 1.061034917831421,grad_norm: 0.9999992290477736, iteration: 341685
loss: 1.1306548118591309,grad_norm: 0.9999995675863037, iteration: 341686
loss: 1.0328538417816162,grad_norm: 0.9999994201048485, iteration: 341687
loss: 1.088814377784729,grad_norm: 0.8928043842706521, iteration: 341688
loss: 1.0601431131362915,grad_norm: 0.925897441501408, iteration: 341689
loss: 1.111194372177124,grad_norm: 0.9999993454956285, iteration: 341690
loss: 1.0266004800796509,grad_norm: 0.9873018157143286, iteration: 341691
loss: 1.0241047143936157,grad_norm: 0.7331870916285711, iteration: 341692
loss: 1.0195964574813843,grad_norm: 0.9437742856487906, iteration: 341693
loss: 1.0528569221496582,grad_norm: 0.8028434740206533, iteration: 341694
loss: 1.0023207664489746,grad_norm: 0.9999992909478167, iteration: 341695
loss: 1.0278877019882202,grad_norm: 0.8329151579026731, iteration: 341696
loss: 1.0820043087005615,grad_norm: 0.8881642841242287, iteration: 341697
loss: 1.0388331413269043,grad_norm: 0.815222746720618, iteration: 341698
loss: 1.0436272621154785,grad_norm: 0.9999991321354803, iteration: 341699
loss: 1.0503902435302734,grad_norm: 0.8083210089542483, iteration: 341700
loss: 1.0130914449691772,grad_norm: 0.9999991455164283, iteration: 341701
loss: 1.0296711921691895,grad_norm: 0.9574624957460732, iteration: 341702
loss: 1.096520185470581,grad_norm: 0.9999991933755095, iteration: 341703
loss: 1.0494422912597656,grad_norm: 0.9999991973257899, iteration: 341704
loss: 1.0951751470565796,grad_norm: 0.9999999958314502, iteration: 341705
loss: 1.0538170337677002,grad_norm: 0.9999990051409366, iteration: 341706
loss: 1.0405694246292114,grad_norm: 0.9999991556593237, iteration: 341707
loss: 1.0302252769470215,grad_norm: 0.9622218690123764, iteration: 341708
loss: 0.9748101830482483,grad_norm: 0.8817542063368558, iteration: 341709
loss: 1.0869767665863037,grad_norm: 0.9999993885575069, iteration: 341710
loss: 1.0177550315856934,grad_norm: 0.8094668202868329, iteration: 341711
loss: 1.0148524045944214,grad_norm: 0.7822111586413133, iteration: 341712
loss: 0.98421710729599,grad_norm: 0.9279156228346546, iteration: 341713
loss: 1.006382703781128,grad_norm: 0.9999989134102099, iteration: 341714
loss: 1.0103241205215454,grad_norm: 0.9999991885172683, iteration: 341715
loss: 1.054084300994873,grad_norm: 0.92560335275438, iteration: 341716
loss: 1.105047583580017,grad_norm: 0.9999991096088904, iteration: 341717
loss: 0.9934126138687134,grad_norm: 0.8237041175901094, iteration: 341718
loss: 1.0656311511993408,grad_norm: 0.999999870548053, iteration: 341719
loss: 1.0692741870880127,grad_norm: 0.9999993955858348, iteration: 341720
loss: 1.0020198822021484,grad_norm: 0.8098159576269045, iteration: 341721
loss: 1.0127503871917725,grad_norm: 0.7966559194165231, iteration: 341722
loss: 1.0197160243988037,grad_norm: 0.8778611295277572, iteration: 341723
loss: 1.101463794708252,grad_norm: 0.8349366546469672, iteration: 341724
loss: 1.0543503761291504,grad_norm: 0.999999071618947, iteration: 341725
loss: 1.0302331447601318,grad_norm: 0.906286125508662, iteration: 341726
loss: 1.2127630710601807,grad_norm: 0.9999997619871446, iteration: 341727
loss: 1.076499342918396,grad_norm: 0.9999993396344009, iteration: 341728
loss: 1.0432261228561401,grad_norm: 0.9710008704323, iteration: 341729
loss: 1.037420630455017,grad_norm: 0.7781928528987914, iteration: 341730
loss: 1.0554465055465698,grad_norm: 0.913815744152435, iteration: 341731
loss: 1.0331165790557861,grad_norm: 0.9999991866071751, iteration: 341732
loss: 1.0302144289016724,grad_norm: 0.8941895474965406, iteration: 341733
loss: 1.062079668045044,grad_norm: 0.8377262480270197, iteration: 341734
loss: 1.0259934663772583,grad_norm: 0.8566824988374351, iteration: 341735
loss: 1.0413970947265625,grad_norm: 0.8343114008357854, iteration: 341736
loss: 1.0172080993652344,grad_norm: 0.9499752457245331, iteration: 341737
loss: 1.019739031791687,grad_norm: 0.9283031356146172, iteration: 341738
loss: 1.0028105974197388,grad_norm: 0.9105992640249205, iteration: 341739
loss: 1.0604774951934814,grad_norm: 0.9955035527692754, iteration: 341740
loss: 1.0505105257034302,grad_norm: 0.9999990828858932, iteration: 341741
loss: 0.9802995920181274,grad_norm: 0.9641602073144033, iteration: 341742
loss: 1.0485392808914185,grad_norm: 0.9999997385547065, iteration: 341743
loss: 1.0210034847259521,grad_norm: 0.9182536391684349, iteration: 341744
loss: 1.0334105491638184,grad_norm: 0.9999992877363304, iteration: 341745
loss: 1.0377886295318604,grad_norm: 0.9660433941705416, iteration: 341746
loss: 1.0402710437774658,grad_norm: 0.9999990646132525, iteration: 341747
loss: 1.0440884828567505,grad_norm: 0.9490587971894183, iteration: 341748
loss: 0.9944691061973572,grad_norm: 0.942003643951442, iteration: 341749
loss: 1.0921283960342407,grad_norm: 0.999999115896982, iteration: 341750
loss: 1.0119073390960693,grad_norm: 0.9999991254628716, iteration: 341751
loss: 1.0112615823745728,grad_norm: 0.8281521764126032, iteration: 341752
loss: 1.005492925643921,grad_norm: 0.9106270381378213, iteration: 341753
loss: 1.0554511547088623,grad_norm: 0.9809269327922846, iteration: 341754
loss: 0.9919369220733643,grad_norm: 0.8771683543534609, iteration: 341755
loss: 1.032050609588623,grad_norm: 0.9999992820249526, iteration: 341756
loss: 1.0588563680648804,grad_norm: 0.8148949535399268, iteration: 341757
loss: 1.060499668121338,grad_norm: 0.9999994293577775, iteration: 341758
loss: 1.0274611711502075,grad_norm: 0.8975730458256129, iteration: 341759
loss: 1.0239207744598389,grad_norm: 0.9999991968825398, iteration: 341760
loss: 0.9759763479232788,grad_norm: 0.8689889904212488, iteration: 341761
loss: 1.083647608757019,grad_norm: 0.9999994650912577, iteration: 341762
loss: 1.0411337614059448,grad_norm: 0.9233204475065088, iteration: 341763
loss: 1.0739020109176636,grad_norm: 0.8582874622107504, iteration: 341764
loss: 0.9896867275238037,grad_norm: 0.7494208607686554, iteration: 341765
loss: 1.028254747390747,grad_norm: 0.9999993350203914, iteration: 341766
loss: 1.019785761833191,grad_norm: 0.9987004063899616, iteration: 341767
loss: 1.2579963207244873,grad_norm: 0.9999994469335612, iteration: 341768
loss: 1.0020813941955566,grad_norm: 0.9349121588911261, iteration: 341769
loss: 1.0397648811340332,grad_norm: 0.9999992565628936, iteration: 341770
loss: 1.0842511653900146,grad_norm: 0.9999991670243923, iteration: 341771
loss: 0.989911675453186,grad_norm: 0.851658819154296, iteration: 341772
loss: 1.02421236038208,grad_norm: 0.9999990897354122, iteration: 341773
loss: 1.027858853340149,grad_norm: 0.7822248543847793, iteration: 341774
loss: 1.0145680904388428,grad_norm: 0.9890133616113564, iteration: 341775
loss: 1.0515161752700806,grad_norm: 0.9999993838756317, iteration: 341776
loss: 1.0494475364685059,grad_norm: 0.9877699934051658, iteration: 341777
loss: 1.0169366598129272,grad_norm: 0.8474091763598043, iteration: 341778
loss: 1.0138832330703735,grad_norm: 0.7718376069625356, iteration: 341779
loss: 0.9835721254348755,grad_norm: 0.8838817731488998, iteration: 341780
loss: 1.0135679244995117,grad_norm: 0.9999990049377284, iteration: 341781
loss: 0.9801355004310608,grad_norm: 0.9903636011128613, iteration: 341782
loss: 1.0178351402282715,grad_norm: 0.7768230470205392, iteration: 341783
loss: 1.036949872970581,grad_norm: 0.856506994390691, iteration: 341784
loss: 1.0616586208343506,grad_norm: 0.9999996096432474, iteration: 341785
loss: 1.0651935338974,grad_norm: 0.9126767109509396, iteration: 341786
loss: 1.0535072088241577,grad_norm: 0.9182478819787196, iteration: 341787
loss: 1.0467385053634644,grad_norm: 0.8246563836878391, iteration: 341788
loss: 1.017950177192688,grad_norm: 0.9999993945233583, iteration: 341789
loss: 1.029558777809143,grad_norm: 0.9482721137694871, iteration: 341790
loss: 1.1390124559402466,grad_norm: 0.9999993405314743, iteration: 341791
loss: 1.0439250469207764,grad_norm: 0.9494561243770693, iteration: 341792
loss: 1.0717053413391113,grad_norm: 0.9999993492003689, iteration: 341793
loss: 1.0507586002349854,grad_norm: 0.9579436930746543, iteration: 341794
loss: 1.1644821166992188,grad_norm: 0.9999995288024188, iteration: 341795
loss: 0.9889461994171143,grad_norm: 0.9999991225286665, iteration: 341796
loss: 1.0554683208465576,grad_norm: 0.9759709390112555, iteration: 341797
loss: 1.0053306818008423,grad_norm: 0.9999990345720712, iteration: 341798
loss: 1.093313455581665,grad_norm: 0.9985093415149853, iteration: 341799
loss: 1.0443042516708374,grad_norm: 0.9999996145638901, iteration: 341800
loss: 1.0297807455062866,grad_norm: 0.9999997383083166, iteration: 341801
loss: 1.0369822978973389,grad_norm: 0.9999994272052486, iteration: 341802
loss: 1.0459884405136108,grad_norm: 0.8534228443587561, iteration: 341803
loss: 1.0838135480880737,grad_norm: 0.9999990602478143, iteration: 341804
loss: 1.0380098819732666,grad_norm: 0.9999993484572625, iteration: 341805
loss: 1.0003737211227417,grad_norm: 0.9652007887576086, iteration: 341806
loss: 1.0089209079742432,grad_norm: 0.9382207920372111, iteration: 341807
loss: 1.1068148612976074,grad_norm: 0.9266110313465995, iteration: 341808
loss: 1.0608571767807007,grad_norm: 0.9584024826299776, iteration: 341809
loss: 0.980943500995636,grad_norm: 0.9999993640430416, iteration: 341810
loss: 1.0364998579025269,grad_norm: 0.736587488275366, iteration: 341811
loss: 0.9800370931625366,grad_norm: 0.9353605128290374, iteration: 341812
loss: 1.0223931074142456,grad_norm: 0.7720700878675385, iteration: 341813
loss: 1.039888858795166,grad_norm: 0.9999992271569679, iteration: 341814
loss: 1.0446161031723022,grad_norm: 0.8453736716772281, iteration: 341815
loss: 0.9985538721084595,grad_norm: 0.8866114733047034, iteration: 341816
loss: 1.0247758626937866,grad_norm: 0.8456010924642174, iteration: 341817
loss: 1.0781660079956055,grad_norm: 0.9452749331157314, iteration: 341818
loss: 1.0154447555541992,grad_norm: 0.7778995604187773, iteration: 341819
loss: 1.0293371677398682,grad_norm: 0.9698889543077592, iteration: 341820
loss: 1.026566743850708,grad_norm: 0.8802852474413839, iteration: 341821
loss: 0.9784876704216003,grad_norm: 0.9999994823605381, iteration: 341822
loss: 0.9993114471435547,grad_norm: 0.8975728235645487, iteration: 341823
loss: 1.0537221431732178,grad_norm: 0.8467932362252348, iteration: 341824
loss: 1.0467358827590942,grad_norm: 0.9999993136068349, iteration: 341825
loss: 1.1851463317871094,grad_norm: 0.9999999330102274, iteration: 341826
loss: 1.2429982423782349,grad_norm: 0.9999994415641518, iteration: 341827
loss: 0.9723621010780334,grad_norm: 0.7347487156040909, iteration: 341828
loss: 1.0381102561950684,grad_norm: 0.8399747609882258, iteration: 341829
loss: 1.0304995775222778,grad_norm: 0.8463506515063148, iteration: 341830
loss: 1.019221544265747,grad_norm: 0.8693756254263111, iteration: 341831
loss: 1.0026191473007202,grad_norm: 0.7708205127799725, iteration: 341832
loss: 1.0479635000228882,grad_norm: 0.9999997500964309, iteration: 341833
loss: 1.0318217277526855,grad_norm: 0.9999995827140412, iteration: 341834
loss: 0.9960657358169556,grad_norm: 0.9999996823378827, iteration: 341835
loss: 1.0299793481826782,grad_norm: 0.9999993452162218, iteration: 341836
loss: 1.1881083250045776,grad_norm: 0.9999997527482858, iteration: 341837
loss: 1.0483627319335938,grad_norm: 0.8346793716274234, iteration: 341838
loss: 1.115071415901184,grad_norm: 0.9999997371553879, iteration: 341839
loss: 1.039977788925171,grad_norm: 0.782449926046011, iteration: 341840
loss: 1.0604524612426758,grad_norm: 0.9999994595756463, iteration: 341841
loss: 1.1765602827072144,grad_norm: 0.999999984527023, iteration: 341842
loss: 1.055648922920227,grad_norm: 0.9999990411286654, iteration: 341843
loss: 1.017030954360962,grad_norm: 0.9424234172641036, iteration: 341844
loss: 1.0155882835388184,grad_norm: 0.9035451671431047, iteration: 341845
loss: 1.0347026586532593,grad_norm: 0.9999991519053902, iteration: 341846
loss: 1.0592641830444336,grad_norm: 0.9999990640824807, iteration: 341847
loss: 1.0679664611816406,grad_norm: 0.9123387300345451, iteration: 341848
loss: 1.1199266910552979,grad_norm: 0.9999991914793885, iteration: 341849
loss: 1.0536448955535889,grad_norm: 0.9999995486219557, iteration: 341850
loss: 1.0084973573684692,grad_norm: 0.9332416533795816, iteration: 341851
loss: 1.07793390750885,grad_norm: 0.999999294245388, iteration: 341852
loss: 0.9837880730628967,grad_norm: 0.8925411201039691, iteration: 341853
loss: 1.0400179624557495,grad_norm: 0.9095517077671952, iteration: 341854
loss: 1.0373011827468872,grad_norm: 0.8355232522646083, iteration: 341855
loss: 1.0285078287124634,grad_norm: 0.9999998534128093, iteration: 341856
loss: 0.9938355684280396,grad_norm: 0.7647155350606601, iteration: 341857
loss: 1.2717883586883545,grad_norm: 0.999999907968798, iteration: 341858
loss: 1.0818514823913574,grad_norm: 0.9999996423350512, iteration: 341859
loss: 1.0213875770568848,grad_norm: 0.8562610599764395, iteration: 341860
loss: 1.021139144897461,grad_norm: 0.8759017774604713, iteration: 341861
loss: 0.9949655532836914,grad_norm: 0.7334015131151902, iteration: 341862
loss: 1.0570218563079834,grad_norm: 0.999999081658862, iteration: 341863
loss: 0.982616662979126,grad_norm: 0.9999996190416748, iteration: 341864
loss: 1.0359553098678589,grad_norm: 0.999999166613643, iteration: 341865
loss: 1.0808913707733154,grad_norm: 0.9871773136608739, iteration: 341866
loss: 1.0140717029571533,grad_norm: 0.8804964011441343, iteration: 341867
loss: 0.9999008178710938,grad_norm: 0.9999998992731972, iteration: 341868
loss: 1.1539926528930664,grad_norm: 0.9999993830763232, iteration: 341869
loss: 1.0770390033721924,grad_norm: 0.7934062216573656, iteration: 341870
loss: 1.1623080968856812,grad_norm: 0.9999999349413458, iteration: 341871
loss: 1.1100300550460815,grad_norm: 0.9999995686190052, iteration: 341872
loss: 1.1134968996047974,grad_norm: 0.9665930595674898, iteration: 341873
loss: 1.010492205619812,grad_norm: 0.8810708144484902, iteration: 341874
loss: 1.1404448747634888,grad_norm: 0.9999999297876526, iteration: 341875
loss: 1.0464235544204712,grad_norm: 0.785864307825879, iteration: 341876
loss: 1.1952515840530396,grad_norm: 0.9999990395729514, iteration: 341877
loss: 1.0438437461853027,grad_norm: 0.9999990909128953, iteration: 341878
loss: 1.0597196817398071,grad_norm: 0.9999992257682814, iteration: 341879
loss: 1.0421326160430908,grad_norm: 0.7468299498492416, iteration: 341880
loss: 1.054081916809082,grad_norm: 0.9275347039109096, iteration: 341881
loss: 1.088973045349121,grad_norm: 0.999999359978021, iteration: 341882
loss: 1.0841467380523682,grad_norm: 0.9716294020573034, iteration: 341883
loss: 1.0778417587280273,grad_norm: 0.9999992536742032, iteration: 341884
loss: 1.0801068544387817,grad_norm: 0.7835142341618355, iteration: 341885
loss: 1.0550448894500732,grad_norm: 0.9999995719191942, iteration: 341886
loss: 1.0983115434646606,grad_norm: 0.9999992907107926, iteration: 341887
loss: 1.040687084197998,grad_norm: 0.9999993870838827, iteration: 341888
loss: 1.0566768646240234,grad_norm: 0.8819065980512335, iteration: 341889
loss: 1.0955532789230347,grad_norm: 0.9999995942698217, iteration: 341890
loss: 1.1775503158569336,grad_norm: 0.9999994081702018, iteration: 341891
loss: 1.0638656616210938,grad_norm: 0.9999996812606369, iteration: 341892
loss: 1.039492130279541,grad_norm: 0.99999934225164, iteration: 341893
loss: 1.1384409666061401,grad_norm: 0.9999995497887425, iteration: 341894
loss: 1.0584986209869385,grad_norm: 0.9421947031588579, iteration: 341895
loss: 1.0473897457122803,grad_norm: 0.9512333261932956, iteration: 341896
loss: 1.0850017070770264,grad_norm: 0.9999993298128401, iteration: 341897
loss: 1.0119479894638062,grad_norm: 0.8993189347260777, iteration: 341898
loss: 1.083290934562683,grad_norm: 0.999999187940899, iteration: 341899
loss: 1.031488060951233,grad_norm: 0.7988605672601761, iteration: 341900
loss: 1.0524928569793701,grad_norm: 0.816498302270816, iteration: 341901
loss: 1.103989839553833,grad_norm: 0.9999998280967174, iteration: 341902
loss: 1.2279599905014038,grad_norm: 0.9999991415786696, iteration: 341903
loss: 1.0841238498687744,grad_norm: 0.8511913302093426, iteration: 341904
loss: 1.1818163394927979,grad_norm: 0.9999997711482551, iteration: 341905
loss: 1.1823385953903198,grad_norm: 0.9999989949408872, iteration: 341906
loss: 1.0886507034301758,grad_norm: 0.9999990954853151, iteration: 341907
loss: 1.0429039001464844,grad_norm: 0.8717314825955306, iteration: 341908
loss: 1.0367927551269531,grad_norm: 0.8610129217027046, iteration: 341909
loss: 0.9960731267929077,grad_norm: 0.9999991959005676, iteration: 341910
loss: 1.077738881111145,grad_norm: 0.8581250298857582, iteration: 341911
loss: 1.12495756149292,grad_norm: 0.9999990993612924, iteration: 341912
loss: 1.056735634803772,grad_norm: 0.9999998369296864, iteration: 341913
loss: 1.1739546060562134,grad_norm: 0.9999998157438922, iteration: 341914
loss: 1.0393660068511963,grad_norm: 0.9193033469730907, iteration: 341915
loss: 1.0243067741394043,grad_norm: 0.9060659963520802, iteration: 341916
loss: 0.9991941452026367,grad_norm: 0.9999995929905109, iteration: 341917
loss: 1.0228220224380493,grad_norm: 0.8938931199801678, iteration: 341918
loss: 1.0319788455963135,grad_norm: 0.9591389986895452, iteration: 341919
loss: 1.0701909065246582,grad_norm: 0.9999993503082893, iteration: 341920
loss: 1.1135720014572144,grad_norm: 0.9026358038823081, iteration: 341921
loss: 1.077054500579834,grad_norm: 0.9999999656511847, iteration: 341922
loss: 1.0193142890930176,grad_norm: 0.9559436126120738, iteration: 341923
loss: 1.007556676864624,grad_norm: 0.9394684162623472, iteration: 341924
loss: 1.0458111763000488,grad_norm: 0.9999992741204805, iteration: 341925
loss: 1.033963680267334,grad_norm: 0.8679771581200775, iteration: 341926
loss: 1.107459306716919,grad_norm: 0.9999998225429023, iteration: 341927
loss: 1.0373132228851318,grad_norm: 0.9999992016136174, iteration: 341928
loss: 1.0608904361724854,grad_norm: 0.9266438853010037, iteration: 341929
loss: 1.04877507686615,grad_norm: 0.962383837066156, iteration: 341930
loss: 1.0831300020217896,grad_norm: 0.9999997261567726, iteration: 341931
loss: 1.0592268705368042,grad_norm: 0.9999993214550267, iteration: 341932
loss: 1.0412780046463013,grad_norm: 0.95654128509434, iteration: 341933
loss: 1.0845286846160889,grad_norm: 0.9999998734029532, iteration: 341934
loss: 1.48087477684021,grad_norm: 0.999999472364078, iteration: 341935
loss: 1.021314263343811,grad_norm: 0.875484638460497, iteration: 341936
loss: 1.0479732751846313,grad_norm: 0.9999992220949301, iteration: 341937
loss: 1.0239087343215942,grad_norm: 0.8516268956099494, iteration: 341938
loss: 1.070671796798706,grad_norm: 0.978872516442255, iteration: 341939
loss: 1.0540034770965576,grad_norm: 0.9999999304979323, iteration: 341940
loss: 1.0593793392181396,grad_norm: 0.9999993403222617, iteration: 341941
loss: 1.056221604347229,grad_norm: 0.9999992213686278, iteration: 341942
loss: 1.144905924797058,grad_norm: 0.9999998380275045, iteration: 341943
loss: 1.0810117721557617,grad_norm: 0.9326232224053826, iteration: 341944
loss: 1.042362093925476,grad_norm: 0.9999995290375084, iteration: 341945
loss: 1.0674760341644287,grad_norm: 0.9866796333832923, iteration: 341946
loss: 1.01300048828125,grad_norm: 0.9999990005757331, iteration: 341947
loss: 1.2122581005096436,grad_norm: 0.9999999830994067, iteration: 341948
loss: 1.121529221534729,grad_norm: 0.9999997715636979, iteration: 341949
loss: 1.0601001977920532,grad_norm: 0.9999995567213011, iteration: 341950
loss: 1.2532023191452026,grad_norm: 0.9999996073873424, iteration: 341951
loss: 1.2654318809509277,grad_norm: 0.9999998018457149, iteration: 341952
loss: 1.0932888984680176,grad_norm: 0.9999992473246876, iteration: 341953
loss: 1.1561223268508911,grad_norm: 0.9999990548967056, iteration: 341954
loss: 0.993179976940155,grad_norm: 0.850926868767287, iteration: 341955
loss: 1.1132944822311401,grad_norm: 0.9999993858250839, iteration: 341956
loss: 1.0230661630630493,grad_norm: 0.9999992119096474, iteration: 341957
loss: 1.0389378070831299,grad_norm: 0.9274321627821064, iteration: 341958
loss: 1.1897194385528564,grad_norm: 0.9999998770917662, iteration: 341959
loss: 1.0820902585983276,grad_norm: 0.999999477982329, iteration: 341960
loss: 1.2542952299118042,grad_norm: 0.9999998107955964, iteration: 341961
loss: 1.113976001739502,grad_norm: 0.9999995599519301, iteration: 341962
loss: 1.0256986618041992,grad_norm: 0.9999992821853598, iteration: 341963
loss: 1.1054860353469849,grad_norm: 0.9999996667983745, iteration: 341964
loss: 1.01580810546875,grad_norm: 0.9950539509953055, iteration: 341965
loss: 1.045312762260437,grad_norm: 0.9999990993102202, iteration: 341966
loss: 1.1538548469543457,grad_norm: 0.9999997330837349, iteration: 341967
loss: 1.1961421966552734,grad_norm: 0.9999995334641808, iteration: 341968
loss: 1.1866577863693237,grad_norm: 0.9999994709166392, iteration: 341969
loss: 1.0226794481277466,grad_norm: 0.9999990491541163, iteration: 341970
loss: 1.0648341178894043,grad_norm: 1.0000000143929195, iteration: 341971
loss: 1.0362390279769897,grad_norm: 0.9999990911483094, iteration: 341972
loss: 1.0924772024154663,grad_norm: 0.9897977398621308, iteration: 341973
loss: 1.011813759803772,grad_norm: 0.9999993083048643, iteration: 341974
loss: 1.1120823621749878,grad_norm: 0.9999996810636005, iteration: 341975
loss: 1.018959879875183,grad_norm: 0.9158612047162312, iteration: 341976
loss: 1.0369690656661987,grad_norm: 0.9162455863396055, iteration: 341977
loss: 0.9964087009429932,grad_norm: 0.9779830706312385, iteration: 341978
loss: 1.0954691171646118,grad_norm: 0.9724303715836308, iteration: 341979
loss: 1.0385721921920776,grad_norm: 0.9468304958007544, iteration: 341980
loss: 1.0160664319992065,grad_norm: 0.8333273328160903, iteration: 341981
loss: 1.0110644102096558,grad_norm: 0.8444104547502371, iteration: 341982
loss: 1.0279804468154907,grad_norm: 0.9999990508574713, iteration: 341983
loss: 1.1066884994506836,grad_norm: 0.9999996701674033, iteration: 341984
loss: 1.1020973920822144,grad_norm: 0.9999998434426577, iteration: 341985
loss: 1.0291639566421509,grad_norm: 0.7524339749437469, iteration: 341986
loss: 1.0023192167282104,grad_norm: 0.8850768028694647, iteration: 341987
loss: 1.086650013923645,grad_norm: 0.9999997039449827, iteration: 341988
loss: 1.0402350425720215,grad_norm: 0.8251654584505905, iteration: 341989
loss: 1.0181472301483154,grad_norm: 0.9999996148414411, iteration: 341990
loss: 1.034656286239624,grad_norm: 0.9999993098518483, iteration: 341991
loss: 1.322317361831665,grad_norm: 0.9999995316534951, iteration: 341992
loss: 1.1186788082122803,grad_norm: 0.9999997897223376, iteration: 341993
loss: 1.0543859004974365,grad_norm: 0.9997742982107254, iteration: 341994
loss: 1.2377898693084717,grad_norm: 0.9999999073225728, iteration: 341995
loss: 1.0534731149673462,grad_norm: 0.9999990725611045, iteration: 341996
loss: 1.0564178228378296,grad_norm: 0.9999996564492304, iteration: 341997
loss: 1.0228177309036255,grad_norm: 0.9999995869349795, iteration: 341998
loss: 1.0148500204086304,grad_norm: 0.8512423989131331, iteration: 341999
loss: 1.3314719200134277,grad_norm: 0.9999995739455045, iteration: 342000
loss: 1.0464367866516113,grad_norm: 0.9999997464382389, iteration: 342001
loss: 1.0304603576660156,grad_norm: 0.9252631650263186, iteration: 342002
loss: 1.0248812437057495,grad_norm: 0.9999994487802161, iteration: 342003
loss: 1.0964219570159912,grad_norm: 0.9999996859277015, iteration: 342004
loss: 1.0251033306121826,grad_norm: 0.9581784815220946, iteration: 342005
loss: 1.1432979106903076,grad_norm: 0.9999992293057779, iteration: 342006
loss: 1.088364839553833,grad_norm: 0.9999993630097814, iteration: 342007
loss: 1.022590160369873,grad_norm: 0.7710534695025758, iteration: 342008
loss: 1.0601320266723633,grad_norm: 0.9999989547791809, iteration: 342009
loss: 1.0520650148391724,grad_norm: 0.8147992443888838, iteration: 342010
loss: 1.03452730178833,grad_norm: 0.8736047628133201, iteration: 342011
loss: 0.9880357980728149,grad_norm: 0.7471482890228953, iteration: 342012
loss: 1.0418105125427246,grad_norm: 0.877510151739363, iteration: 342013
loss: 1.0355372428894043,grad_norm: 0.999999878872588, iteration: 342014
loss: 1.00948965549469,grad_norm: 0.9999991676839591, iteration: 342015
loss: 1.027091145515442,grad_norm: 0.8761110842417117, iteration: 342016
loss: 1.0202714204788208,grad_norm: 0.9999994122311299, iteration: 342017
loss: 1.019465684890747,grad_norm: 0.9999992082289229, iteration: 342018
loss: 1.0437135696411133,grad_norm: 0.8064704658546926, iteration: 342019
loss: 0.9953070282936096,grad_norm: 0.905615099612414, iteration: 342020
loss: 1.076387643814087,grad_norm: 0.8602046889938696, iteration: 342021
loss: 1.0144548416137695,grad_norm: 0.7799572741177621, iteration: 342022
loss: 1.070257544517517,grad_norm: 0.982492219549042, iteration: 342023
loss: 1.0677602291107178,grad_norm: 0.744753252738597, iteration: 342024
loss: 1.072983741760254,grad_norm: 0.9999996013178798, iteration: 342025
loss: 1.0511428117752075,grad_norm: 0.9999997551482506, iteration: 342026
loss: 1.0300770998001099,grad_norm: 0.999999155878154, iteration: 342027
loss: 1.0998852252960205,grad_norm: 0.890520129022191, iteration: 342028
loss: 1.0710798501968384,grad_norm: 0.9999990817686922, iteration: 342029
loss: 1.0426321029663086,grad_norm: 0.9999990915358166, iteration: 342030
loss: 1.0779614448547363,grad_norm: 0.9999991262908038, iteration: 342031
loss: 1.0356700420379639,grad_norm: 0.9232919543392455, iteration: 342032
loss: 1.0172505378723145,grad_norm: 0.9999996059962765, iteration: 342033
loss: 1.0965111255645752,grad_norm: 0.9182258015944009, iteration: 342034
loss: 1.0714945793151855,grad_norm: 0.9999996212110993, iteration: 342035
loss: 1.0440657138824463,grad_norm: 0.7341947992700482, iteration: 342036
loss: 1.0265512466430664,grad_norm: 0.9999992508092788, iteration: 342037
loss: 1.0664491653442383,grad_norm: 0.9999992819496833, iteration: 342038
loss: 1.0933115482330322,grad_norm: 1.0000000022639912, iteration: 342039
loss: 1.0580732822418213,grad_norm: 0.999999419376682, iteration: 342040
loss: 1.005352258682251,grad_norm: 0.8811492936200285, iteration: 342041
loss: 1.1208256483078003,grad_norm: 0.9999998248174818, iteration: 342042
loss: 1.135892391204834,grad_norm: 0.9999997999795961, iteration: 342043
loss: 1.0674279928207397,grad_norm: 0.9999990436359467, iteration: 342044
loss: 1.106173038482666,grad_norm: 0.9999992740851655, iteration: 342045
loss: 1.158328652381897,grad_norm: 0.9999999508272096, iteration: 342046
loss: 1.1937470436096191,grad_norm: 0.9999997802428566, iteration: 342047
loss: 1.0479439496994019,grad_norm: 0.9999993242253647, iteration: 342048
loss: 1.0539820194244385,grad_norm: 0.8501918143243149, iteration: 342049
loss: 1.001194953918457,grad_norm: 0.8172763476397588, iteration: 342050
loss: 0.9929036498069763,grad_norm: 0.8302481614225605, iteration: 342051
loss: 1.084175705909729,grad_norm: 0.8749399381388832, iteration: 342052
loss: 1.0800875425338745,grad_norm: 0.9999996892857284, iteration: 342053
loss: 1.0911760330200195,grad_norm: 0.9988543655385191, iteration: 342054
loss: 1.0709327459335327,grad_norm: 0.9983035163315708, iteration: 342055
loss: 1.0220425128936768,grad_norm: 0.9999991044250248, iteration: 342056
loss: 1.024379014968872,grad_norm: 0.999999318209018, iteration: 342057
loss: 1.1503939628601074,grad_norm: 0.9999992356371983, iteration: 342058
loss: 1.018074870109558,grad_norm: 0.9379898983679769, iteration: 342059
loss: 0.9966896772384644,grad_norm: 0.9999991449395832, iteration: 342060
loss: 0.9977303743362427,grad_norm: 0.8270391279372578, iteration: 342061
loss: 0.9978374242782593,grad_norm: 0.874458901240393, iteration: 342062
loss: 1.0581269264221191,grad_norm: 0.999999084989296, iteration: 342063
loss: 0.9867810606956482,grad_norm: 0.843535334392828, iteration: 342064
loss: 0.9888374209403992,grad_norm: 0.7945421232545241, iteration: 342065
loss: 1.0116634368896484,grad_norm: 0.999999474934033, iteration: 342066
loss: 1.050848364830017,grad_norm: 0.9999995851849564, iteration: 342067
loss: 1.0355556011199951,grad_norm: 0.9683949840726365, iteration: 342068
loss: 1.0636800527572632,grad_norm: 0.953558857103852, iteration: 342069
loss: 1.0407785177230835,grad_norm: 0.7552481927168057, iteration: 342070
loss: 1.0155423879623413,grad_norm: 0.8648326530622558, iteration: 342071
loss: 1.03264319896698,grad_norm: 0.9999997670802522, iteration: 342072
loss: 1.1698105335235596,grad_norm: 0.999999427835851, iteration: 342073
loss: 1.0234715938568115,grad_norm: 0.7696034687827091, iteration: 342074
loss: 0.9822734594345093,grad_norm: 0.9211528291563881, iteration: 342075
loss: 0.981667160987854,grad_norm: 0.989750438182042, iteration: 342076
loss: 1.047438383102417,grad_norm: 0.744271408966763, iteration: 342077
loss: 1.0457820892333984,grad_norm: 0.777683457917677, iteration: 342078
loss: 0.9939176440238953,grad_norm: 0.9346308789926169, iteration: 342079
loss: 1.0269920825958252,grad_norm: 0.8801254991555321, iteration: 342080
loss: 1.086440920829773,grad_norm: 0.8476067160132646, iteration: 342081
loss: 1.0372520685195923,grad_norm: 0.9999998942815197, iteration: 342082
loss: 1.0770068168640137,grad_norm: 0.9004514396828421, iteration: 342083
loss: 1.0699560642242432,grad_norm: 0.9231107282092476, iteration: 342084
loss: 1.0596457719802856,grad_norm: 0.8447810325901065, iteration: 342085
loss: 0.9840617179870605,grad_norm: 0.8428345446305329, iteration: 342086
loss: 1.016047477722168,grad_norm: 0.8751889962415959, iteration: 342087
loss: 1.011102557182312,grad_norm: 0.8592505976569548, iteration: 342088
loss: 1.0236656665802002,grad_norm: 0.9959793890484315, iteration: 342089
loss: 0.989862859249115,grad_norm: 0.8178998140147429, iteration: 342090
loss: 1.0383388996124268,grad_norm: 0.9999998581068548, iteration: 342091
loss: 1.0155854225158691,grad_norm: 0.7808751930091802, iteration: 342092
loss: 1.0442728996276855,grad_norm: 0.6702609180066256, iteration: 342093
loss: 0.9943374991416931,grad_norm: 0.8110633423211763, iteration: 342094
loss: 0.927829384803772,grad_norm: 0.8296961448377342, iteration: 342095
loss: 1.02545166015625,grad_norm: 0.8076599782433533, iteration: 342096
loss: 1.0176818370819092,grad_norm: 0.9999999401406343, iteration: 342097
loss: 1.0304421186447144,grad_norm: 0.846818280919337, iteration: 342098
loss: 1.0412262678146362,grad_norm: 0.9999990041864807, iteration: 342099
loss: 1.1217013597488403,grad_norm: 0.9999999242933537, iteration: 342100
loss: 1.0817490816116333,grad_norm: 0.999999038415021, iteration: 342101
loss: 1.0374764204025269,grad_norm: 0.9999993256814858, iteration: 342102
loss: 1.1351864337921143,grad_norm: 0.999999533254441, iteration: 342103
loss: 1.0672544240951538,grad_norm: 0.9074138969835371, iteration: 342104
loss: 1.0716873407363892,grad_norm: 0.9131510651142609, iteration: 342105
loss: 1.0125489234924316,grad_norm: 0.8357108742552357, iteration: 342106
loss: 1.0505820512771606,grad_norm: 0.9559981664333977, iteration: 342107
loss: 1.0408719778060913,grad_norm: 0.9999990728815445, iteration: 342108
loss: 1.0259830951690674,grad_norm: 0.8034066262688908, iteration: 342109
loss: 1.018816351890564,grad_norm: 0.9999991870364766, iteration: 342110
loss: 1.0561162233352661,grad_norm: 0.9395862185059201, iteration: 342111
loss: 1.053451418876648,grad_norm: 0.9999998487122574, iteration: 342112
loss: 1.0511952638626099,grad_norm: 0.9999998016155084, iteration: 342113
loss: 1.0532772541046143,grad_norm: 0.99999901871876, iteration: 342114
loss: 1.0596983432769775,grad_norm: 0.8668498655993948, iteration: 342115
loss: 0.9867706894874573,grad_norm: 0.7909682813597475, iteration: 342116
loss: 1.0096240043640137,grad_norm: 0.9873012253287469, iteration: 342117
loss: 1.036179542541504,grad_norm: 0.982870935952292, iteration: 342118
loss: 1.0305304527282715,grad_norm: 0.9399685013154525, iteration: 342119
loss: 1.0549733638763428,grad_norm: 0.9999991529523148, iteration: 342120
loss: 1.0829248428344727,grad_norm: 0.9999991391642785, iteration: 342121
loss: 1.0694549083709717,grad_norm: 0.8072877649992891, iteration: 342122
loss: 1.1100623607635498,grad_norm: 0.9999995977222162, iteration: 342123
loss: 1.0664011240005493,grad_norm: 0.8838632505417517, iteration: 342124
loss: 1.2230823040008545,grad_norm: 0.9999995945736256, iteration: 342125
loss: 1.0941252708435059,grad_norm: 0.9999998377030855, iteration: 342126
loss: 1.0819988250732422,grad_norm: 0.8796226090669687, iteration: 342127
loss: 1.033000111579895,grad_norm: 0.9999992986562288, iteration: 342128
loss: 1.1907479763031006,grad_norm: 0.9999997579044034, iteration: 342129
loss: 1.0260725021362305,grad_norm: 0.7419847838653751, iteration: 342130
loss: 1.0079673528671265,grad_norm: 0.908369633202872, iteration: 342131
loss: 1.159666657447815,grad_norm: 0.9999995696663821, iteration: 342132
loss: 0.98199462890625,grad_norm: 0.9999990730053919, iteration: 342133
loss: 1.0964478254318237,grad_norm: 0.9999998837912403, iteration: 342134
loss: 1.0710972547531128,grad_norm: 0.9999990648769604, iteration: 342135
loss: 1.0410802364349365,grad_norm: 0.9051990220485474, iteration: 342136
loss: 1.0253698825836182,grad_norm: 0.9999991524469666, iteration: 342137
loss: 1.0907962322235107,grad_norm: 0.9072279679175501, iteration: 342138
loss: 1.0904357433319092,grad_norm: 0.9999998094227377, iteration: 342139
loss: 1.1046680212020874,grad_norm: 0.9999998374362952, iteration: 342140
loss: 1.0257023572921753,grad_norm: 0.9999998878526819, iteration: 342141
loss: 1.012641191482544,grad_norm: 0.9847131621313454, iteration: 342142
loss: 1.0250694751739502,grad_norm: 0.8757952608902362, iteration: 342143
loss: 1.0373741388320923,grad_norm: 0.9999998758735364, iteration: 342144
loss: 1.1002591848373413,grad_norm: 0.9999996099218066, iteration: 342145
loss: 0.991008460521698,grad_norm: 0.9999993678056392, iteration: 342146
loss: 1.0544824600219727,grad_norm: 0.8708122913091053, iteration: 342147
loss: 1.018539309501648,grad_norm: 0.8682935116397548, iteration: 342148
loss: 1.070968747138977,grad_norm: 0.9999992793631509, iteration: 342149
loss: 1.0225423574447632,grad_norm: 0.8238344053987166, iteration: 342150
loss: 1.0015813112258911,grad_norm: 0.9999989163398911, iteration: 342151
loss: 1.049015760421753,grad_norm: 0.9999991985506984, iteration: 342152
loss: 1.0104026794433594,grad_norm: 0.9024008131535455, iteration: 342153
loss: 1.065515398979187,grad_norm: 0.9999990672415261, iteration: 342154
loss: 0.971250593662262,grad_norm: 0.9507536175480655, iteration: 342155
loss: 1.0115054845809937,grad_norm: 0.8643383959629409, iteration: 342156
loss: 1.0001308917999268,grad_norm: 0.9244163362957617, iteration: 342157
loss: 0.9931288361549377,grad_norm: 0.740689440585115, iteration: 342158
loss: 1.1175974607467651,grad_norm: 0.999999332789739, iteration: 342159
loss: 1.039785623550415,grad_norm: 0.9856851172003478, iteration: 342160
loss: 1.0331624746322632,grad_norm: 0.8777710176669242, iteration: 342161
loss: 1.0247746706008911,grad_norm: 0.9657996685483486, iteration: 342162
loss: 0.9978481531143188,grad_norm: 0.9999991302924076, iteration: 342163
loss: 1.02669095993042,grad_norm: 0.8820739580083038, iteration: 342164
loss: 0.9640870690345764,grad_norm: 0.891037278877639, iteration: 342165
loss: 0.9805825352668762,grad_norm: 0.9999990499867251, iteration: 342166
loss: 1.1031094789505005,grad_norm: 0.9999997497369455, iteration: 342167
loss: 1.078950047492981,grad_norm: 0.8551916346597903, iteration: 342168
loss: 1.0410059690475464,grad_norm: 0.9078302292238253, iteration: 342169
loss: 1.047116756439209,grad_norm: 0.8729968042798735, iteration: 342170
loss: 1.0039031505584717,grad_norm: 0.7638305431492826, iteration: 342171
loss: 1.071945071220398,grad_norm: 0.9999996170883151, iteration: 342172
loss: 1.041213035583496,grad_norm: 0.881373820724156, iteration: 342173
loss: 1.1177785396575928,grad_norm: 0.9882412049989373, iteration: 342174
loss: 1.0369489192962646,grad_norm: 0.9756993039850212, iteration: 342175
loss: 1.0175892114639282,grad_norm: 0.8121065820700328, iteration: 342176
loss: 1.0256584882736206,grad_norm: 0.7944379174612693, iteration: 342177
loss: 1.0691454410552979,grad_norm: 0.9999992799695044, iteration: 342178
loss: 0.9939606189727783,grad_norm: 0.9144709364195703, iteration: 342179
loss: 1.1076403856277466,grad_norm: 0.9999996578137745, iteration: 342180
loss: 1.114410400390625,grad_norm: 0.8816743896913275, iteration: 342181
loss: 1.0919972658157349,grad_norm: 0.9626298135717909, iteration: 342182
loss: 0.9921340942382812,grad_norm: 0.9698334963076579, iteration: 342183
loss: 0.9980295896530151,grad_norm: 0.838721004532312, iteration: 342184
loss: 1.0970944166183472,grad_norm: 0.9999997671761157, iteration: 342185
loss: 1.0754084587097168,grad_norm: 0.9999998178022391, iteration: 342186
loss: 1.0707606077194214,grad_norm: 0.8093770393456554, iteration: 342187
loss: 1.0839183330535889,grad_norm: 0.9999995191911806, iteration: 342188
loss: 1.0198758840560913,grad_norm: 0.7615618741707245, iteration: 342189
loss: 1.0367623567581177,grad_norm: 0.7635508448831142, iteration: 342190
loss: 1.0162431001663208,grad_norm: 0.7956681518410473, iteration: 342191
loss: 1.0189975500106812,grad_norm: 0.9999995257866514, iteration: 342192
loss: 1.027247428894043,grad_norm: 0.9999996497405874, iteration: 342193
loss: 1.03256356716156,grad_norm: 0.804909652031031, iteration: 342194
loss: 1.108109474182129,grad_norm: 0.9999994029501562, iteration: 342195
loss: 1.1084327697753906,grad_norm: 0.9999999483439146, iteration: 342196
loss: 1.0285682678222656,grad_norm: 0.9630744210062403, iteration: 342197
loss: 1.0849425792694092,grad_norm: 0.9860033132606254, iteration: 342198
loss: 1.050593614578247,grad_norm: 0.75274473174648, iteration: 342199
loss: 1.0619914531707764,grad_norm: 0.9999988993739987, iteration: 342200
loss: 1.0504839420318604,grad_norm: 0.9999991889010024, iteration: 342201
loss: 1.0256679058074951,grad_norm: 0.8427488770057953, iteration: 342202
loss: 1.3652617931365967,grad_norm: 0.9999999120725273, iteration: 342203
loss: 0.98775714635849,grad_norm: 0.7947649992990753, iteration: 342204
loss: 1.0420156717300415,grad_norm: 0.7716217771738103, iteration: 342205
loss: 1.1070281267166138,grad_norm: 0.882792807593599, iteration: 342206
loss: 1.0154832601547241,grad_norm: 0.7969663284931094, iteration: 342207
loss: 0.9993392825126648,grad_norm: 0.9140876993299728, iteration: 342208
loss: 1.0756973028182983,grad_norm: 0.9024723908222309, iteration: 342209
loss: 1.0757120847702026,grad_norm: 0.9999991782906084, iteration: 342210
loss: 0.9850744009017944,grad_norm: 0.8934310669147333, iteration: 342211
loss: 0.9874483942985535,grad_norm: 0.8061254799168494, iteration: 342212
loss: 1.0344970226287842,grad_norm: 0.9999991246083708, iteration: 342213
loss: 1.0775303840637207,grad_norm: 0.7841920646071114, iteration: 342214
loss: 1.0821521282196045,grad_norm: 0.8489259711373476, iteration: 342215
loss: 1.0273064374923706,grad_norm: 0.7891942726538768, iteration: 342216
loss: 1.0189143419265747,grad_norm: 0.8143374553450266, iteration: 342217
loss: 1.0268770456314087,grad_norm: 0.8156169788525539, iteration: 342218
loss: 1.0640536546707153,grad_norm: 0.9999996595535389, iteration: 342219
loss: 1.0180455446243286,grad_norm: 0.8520377327662124, iteration: 342220
loss: 1.2109348773956299,grad_norm: 0.9999995389619271, iteration: 342221
loss: 0.978258490562439,grad_norm: 0.8719762451145793, iteration: 342222
loss: 1.0998563766479492,grad_norm: 0.999999509825475, iteration: 342223
loss: 1.1039950847625732,grad_norm: 0.9270836359340272, iteration: 342224
loss: 1.009647250175476,grad_norm: 0.9306906922157713, iteration: 342225
loss: 1.049269676208496,grad_norm: 0.9999991293568401, iteration: 342226
loss: 1.3126556873321533,grad_norm: 1.0000001272693027, iteration: 342227
loss: 1.1707262992858887,grad_norm: 0.9999998136065339, iteration: 342228
loss: 1.1001144647598267,grad_norm: 0.9999995044938759, iteration: 342229
loss: 1.059388279914856,grad_norm: 0.9999999641982581, iteration: 342230
loss: 1.0107769966125488,grad_norm: 0.9289249173096772, iteration: 342231
loss: 1.0855807065963745,grad_norm: 0.9999994086182697, iteration: 342232
loss: 1.0145392417907715,grad_norm: 0.8161461141405645, iteration: 342233
loss: 1.0329538583755493,grad_norm: 0.8599395771189241, iteration: 342234
loss: 1.0347574949264526,grad_norm: 0.9435386739516075, iteration: 342235
loss: 1.079201579093933,grad_norm: 0.9999993904577126, iteration: 342236
loss: 1.0389797687530518,grad_norm: 0.9999994042670928, iteration: 342237
loss: 1.0243360996246338,grad_norm: 0.9999996479507909, iteration: 342238
loss: 1.010940670967102,grad_norm: 0.8503773593123224, iteration: 342239
loss: 1.0523200035095215,grad_norm: 0.9999994364907955, iteration: 342240
loss: 1.1426841020584106,grad_norm: 0.8817509943342122, iteration: 342241
loss: 1.1105891466140747,grad_norm: 0.9999993365073813, iteration: 342242
loss: 1.0508157014846802,grad_norm: 0.999998943355756, iteration: 342243
loss: 1.184775948524475,grad_norm: 0.9999991875848416, iteration: 342244
loss: 1.0287096500396729,grad_norm: 0.981755670916247, iteration: 342245
loss: 1.0106709003448486,grad_norm: 0.9999993102046733, iteration: 342246
loss: 1.0398274660110474,grad_norm: 0.9999990171442875, iteration: 342247
loss: 1.0449669361114502,grad_norm: 0.9999994499695141, iteration: 342248
loss: 1.1133500337600708,grad_norm: 0.9944490648559623, iteration: 342249
loss: 0.9939892888069153,grad_norm: 0.9999996391202477, iteration: 342250
loss: 1.0946122407913208,grad_norm: 0.8678797255001417, iteration: 342251
loss: 1.1282529830932617,grad_norm: 0.9999996635080185, iteration: 342252
loss: 1.077591061592102,grad_norm: 0.9350858304035787, iteration: 342253
loss: 1.015570044517517,grad_norm: 0.8073447686412777, iteration: 342254
loss: 1.0333516597747803,grad_norm: 0.9344292115457409, iteration: 342255
loss: 1.030830979347229,grad_norm: 0.9999991175274054, iteration: 342256
loss: 1.0301755666732788,grad_norm: 0.9999995440257962, iteration: 342257
loss: 1.004687786102295,grad_norm: 0.9999999418637563, iteration: 342258
loss: 1.0769883394241333,grad_norm: 0.9999997220041147, iteration: 342259
loss: 1.0601941347122192,grad_norm: 0.9999997822556684, iteration: 342260
loss: 1.0858322381973267,grad_norm: 0.9265586058962499, iteration: 342261
loss: 1.0208942890167236,grad_norm: 0.9999992901840253, iteration: 342262
loss: 1.0581305027008057,grad_norm: 0.91585986720078, iteration: 342263
loss: 0.9909054636955261,grad_norm: 0.7830868724654687, iteration: 342264
loss: 1.0513384342193604,grad_norm: 0.874074359344981, iteration: 342265
loss: 1.0623998641967773,grad_norm: 0.9999999488254473, iteration: 342266
loss: 1.0339869260787964,grad_norm: 0.9999998492260699, iteration: 342267
loss: 1.0235449075698853,grad_norm: 0.6994884704185075, iteration: 342268
loss: 1.0011996030807495,grad_norm: 0.8952946387476944, iteration: 342269
loss: 1.0393836498260498,grad_norm: 0.99999906581158, iteration: 342270
loss: 0.9806296825408936,grad_norm: 0.7332457379940973, iteration: 342271
loss: 1.0655823945999146,grad_norm: 0.8661711476505176, iteration: 342272
loss: 1.053813099861145,grad_norm: 0.9999991245419754, iteration: 342273
loss: 1.1185681819915771,grad_norm: 0.9999994833042046, iteration: 342274
loss: 1.0226283073425293,grad_norm: 0.9868927656517656, iteration: 342275
loss: 1.055413842201233,grad_norm: 0.999999373764123, iteration: 342276
loss: 1.1717349290847778,grad_norm: 0.9999993458857482, iteration: 342277
loss: 1.0211001634597778,grad_norm: 0.8655347863167723, iteration: 342278
loss: 0.9958897233009338,grad_norm: 0.8918279146183994, iteration: 342279
loss: 1.0658950805664062,grad_norm: 1.0000000085445089, iteration: 342280
loss: 0.9717495441436768,grad_norm: 0.9999995675877176, iteration: 342281
loss: 1.0138070583343506,grad_norm: 0.7772909290716982, iteration: 342282
loss: 1.012797236442566,grad_norm: 0.8552752935310654, iteration: 342283
loss: 1.0121291875839233,grad_norm: 0.8697902767543656, iteration: 342284
loss: 0.9938285946846008,grad_norm: 0.8047804474932035, iteration: 342285
loss: 1.0248658657073975,grad_norm: 0.9999994086374064, iteration: 342286
loss: 1.0706316232681274,grad_norm: 0.9999991371899996, iteration: 342287
loss: 1.0718843936920166,grad_norm: 0.999999731223321, iteration: 342288
loss: 1.0310389995574951,grad_norm: 0.9276736526863176, iteration: 342289
loss: 1.1641874313354492,grad_norm: 0.9999991957190836, iteration: 342290
loss: 1.0987697839736938,grad_norm: 0.970048663738629, iteration: 342291
loss: 1.1661046743392944,grad_norm: 0.9999993725092379, iteration: 342292
loss: 1.0157454013824463,grad_norm: 0.9999990521770193, iteration: 342293
loss: 0.9813504219055176,grad_norm: 0.8524684071591846, iteration: 342294
loss: 1.0993454456329346,grad_norm: 0.9999999090061699, iteration: 342295
loss: 0.9583685994148254,grad_norm: 0.87060501017471, iteration: 342296
loss: 1.08800208568573,grad_norm: 0.9999994980811211, iteration: 342297
loss: 1.0182299613952637,grad_norm: 0.77769663492286, iteration: 342298
loss: 1.043452262878418,grad_norm: 0.9999992285990836, iteration: 342299
loss: 1.0485154390335083,grad_norm: 0.9308417570596345, iteration: 342300
loss: 1.0277931690216064,grad_norm: 0.7494370466819557, iteration: 342301
loss: 1.0199458599090576,grad_norm: 0.7755157957393015, iteration: 342302
loss: 1.0554454326629639,grad_norm: 0.9999992240197859, iteration: 342303
loss: 1.0391777753829956,grad_norm: 0.9417491781370696, iteration: 342304
loss: 0.972724199295044,grad_norm: 0.7913976053682057, iteration: 342305
loss: 1.0022608041763306,grad_norm: 0.7136543460570159, iteration: 342306
loss: 1.0457236766815186,grad_norm: 0.9999991211905299, iteration: 342307
loss: 0.982424795627594,grad_norm: 0.8284137958646614, iteration: 342308
loss: 1.0242705345153809,grad_norm: 0.9999990309353202, iteration: 342309
loss: 1.1483668088912964,grad_norm: 0.986637961364376, iteration: 342310
loss: 0.9967581033706665,grad_norm: 0.9444876456189875, iteration: 342311
loss: 1.0737636089324951,grad_norm: 0.9999993049364791, iteration: 342312
loss: 0.9893839359283447,grad_norm: 0.9999997765960609, iteration: 342313
loss: 1.086472511291504,grad_norm: 0.9201912012439344, iteration: 342314
loss: 1.073782205581665,grad_norm: 0.9999996682498965, iteration: 342315
loss: 0.9834035038948059,grad_norm: 0.8104215813908091, iteration: 342316
loss: 1.061966061592102,grad_norm: 0.9999996366163165, iteration: 342317
loss: 1.0404376983642578,grad_norm: 0.9999994383046691, iteration: 342318
loss: 1.0308719873428345,grad_norm: 0.8423318401966308, iteration: 342319
loss: 1.0084409713745117,grad_norm: 0.9999995551352275, iteration: 342320
loss: 1.008517861366272,grad_norm: 0.7555956895760839, iteration: 342321
loss: 1.0170899629592896,grad_norm: 0.7802874709834203, iteration: 342322
loss: 0.9700852036476135,grad_norm: 0.8214654902933645, iteration: 342323
loss: 1.0335208177566528,grad_norm: 0.7480824358865898, iteration: 342324
loss: 1.0003303289413452,grad_norm: 0.8598168982454893, iteration: 342325
loss: 0.9944794178009033,grad_norm: 0.684272803927049, iteration: 342326
loss: 1.1693516969680786,grad_norm: 0.9999998496272157, iteration: 342327
loss: 1.0856283903121948,grad_norm: 0.889962079551826, iteration: 342328
loss: 0.9835003614425659,grad_norm: 0.908426233417845, iteration: 342329
loss: 0.9793945550918579,grad_norm: 0.9999990053816328, iteration: 342330
loss: 1.0230456590652466,grad_norm: 0.833096214763958, iteration: 342331
loss: 1.0164817571640015,grad_norm: 0.7959884643103374, iteration: 342332
loss: 0.9729458689689636,grad_norm: 0.8347027326324666, iteration: 342333
loss: 0.9984274506568909,grad_norm: 0.877371368077131, iteration: 342334
loss: 1.0280386209487915,grad_norm: 0.823439615327821, iteration: 342335
loss: 0.9863074421882629,grad_norm: 0.9209663151324208, iteration: 342336
loss: 0.996705174446106,grad_norm: 0.6940424668892902, iteration: 342337
loss: 0.9826249480247498,grad_norm: 0.8615983954270543, iteration: 342338
loss: 1.0664111375808716,grad_norm: 0.9999992645665539, iteration: 342339
loss: 0.9825735092163086,grad_norm: 0.7423610601753947, iteration: 342340
loss: 1.0170246362686157,grad_norm: 0.8085109628197711, iteration: 342341
loss: 1.0172722339630127,grad_norm: 0.7882134003813988, iteration: 342342
loss: 1.047762155532837,grad_norm: 0.9999991138398366, iteration: 342343
loss: 1.0056917667388916,grad_norm: 0.9999989843758484, iteration: 342344
loss: 0.97587651014328,grad_norm: 0.9999999632981867, iteration: 342345
loss: 0.979682981967926,grad_norm: 0.8900333637483179, iteration: 342346
loss: 1.0435676574707031,grad_norm: 0.9999994062038045, iteration: 342347
loss: 1.0104832649230957,grad_norm: 0.999999555150109, iteration: 342348
loss: 1.001105546951294,grad_norm: 0.8638423326565297, iteration: 342349
loss: 0.9899000525474548,grad_norm: 0.894959403551709, iteration: 342350
loss: 0.9745386242866516,grad_norm: 0.7895381627181658, iteration: 342351
loss: 1.0336323976516724,grad_norm: 0.8326354234464066, iteration: 342352
loss: 1.0162270069122314,grad_norm: 0.7051337280794203, iteration: 342353
loss: 1.041812539100647,grad_norm: 0.8929794761542879, iteration: 342354
loss: 1.0891327857971191,grad_norm: 0.8613463317210203, iteration: 342355
loss: 0.9915059208869934,grad_norm: 0.8467696078210529, iteration: 342356
loss: 1.027387022972107,grad_norm: 0.8751583824181232, iteration: 342357
loss: 0.9668270349502563,grad_norm: 0.7581221028007138, iteration: 342358
loss: 1.0187867879867554,grad_norm: 0.8106984644917561, iteration: 342359
loss: 1.020481824874878,grad_norm: 0.7171831834554283, iteration: 342360
loss: 1.006302833557129,grad_norm: 0.8370253653899351, iteration: 342361
loss: 1.0459413528442383,grad_norm: 0.8459692040887518, iteration: 342362
loss: 0.9916450381278992,grad_norm: 0.868306671080072, iteration: 342363
loss: 0.9665842056274414,grad_norm: 0.8264520407345285, iteration: 342364
loss: 1.0013920068740845,grad_norm: 0.7406311073397849, iteration: 342365
loss: 1.0387508869171143,grad_norm: 0.8266293993904722, iteration: 342366
loss: 0.9827431440353394,grad_norm: 0.8903236568223967, iteration: 342367
loss: 1.052055835723877,grad_norm: 0.817580889368067, iteration: 342368
loss: 0.9876102805137634,grad_norm: 0.8521898405927975, iteration: 342369
loss: 1.1110504865646362,grad_norm: 0.9999994223509661, iteration: 342370
loss: 0.9849452376365662,grad_norm: 0.7881012596477798, iteration: 342371
loss: 0.9938728213310242,grad_norm: 0.9999999106257181, iteration: 342372
loss: 0.9485837817192078,grad_norm: 0.8026161534433299, iteration: 342373
loss: 0.9904277324676514,grad_norm: 0.82131577206155, iteration: 342374
loss: 1.0173900127410889,grad_norm: 0.8643385339155057, iteration: 342375
loss: 0.9717192649841309,grad_norm: 0.8446648803242661, iteration: 342376
loss: 1.0031285285949707,grad_norm: 0.733664923009493, iteration: 342377
loss: 0.9946097731590271,grad_norm: 0.7625914066778358, iteration: 342378
loss: 1.0291718244552612,grad_norm: 0.730307465009292, iteration: 342379
loss: 1.010856032371521,grad_norm: 0.9999992540294703, iteration: 342380
loss: 1.0019663572311401,grad_norm: 0.7753562302935503, iteration: 342381
loss: 1.0164281129837036,grad_norm: 0.9999989436444696, iteration: 342382
loss: 1.03020179271698,grad_norm: 0.7569807878116165, iteration: 342383
loss: 1.011670470237732,grad_norm: 0.9371149213432228, iteration: 342384
loss: 0.9871152639389038,grad_norm: 0.7722508103209774, iteration: 342385
loss: 1.0729656219482422,grad_norm: 0.9999991032394812, iteration: 342386
loss: 0.988972544670105,grad_norm: 0.8884086300357661, iteration: 342387
loss: 1.074742317199707,grad_norm: 0.9999992329439666, iteration: 342388
loss: 1.0206642150878906,grad_norm: 0.9999996945413534, iteration: 342389
loss: 1.0265634059906006,grad_norm: 0.787952551437856, iteration: 342390
loss: 0.9880850911140442,grad_norm: 0.869782947791858, iteration: 342391
loss: 1.0252079963684082,grad_norm: 0.9758452780388531, iteration: 342392
loss: 1.0114803314208984,grad_norm: 0.7515232642220461, iteration: 342393
loss: 1.0018954277038574,grad_norm: 0.6959641082904217, iteration: 342394
loss: 1.0294159650802612,grad_norm: 0.999999881174396, iteration: 342395
loss: 0.9699131846427917,grad_norm: 0.9999989636793908, iteration: 342396
loss: 0.9797912836074829,grad_norm: 0.829087753789211, iteration: 342397
loss: 1.0036430358886719,grad_norm: 0.7520163727024228, iteration: 342398
loss: 1.0180573463439941,grad_norm: 0.9999990879030275, iteration: 342399
loss: 1.058964729309082,grad_norm: 0.8369065908293987, iteration: 342400
loss: 1.0049470663070679,grad_norm: 0.8024262683350052, iteration: 342401
loss: 0.9946064949035645,grad_norm: 0.7456766563810941, iteration: 342402
loss: 0.9794412851333618,grad_norm: 0.7760553939897695, iteration: 342403
loss: 0.9720864295959473,grad_norm: 0.9016083608320286, iteration: 342404
loss: 1.005668044090271,grad_norm: 0.727730332501889, iteration: 342405
loss: 1.0033462047576904,grad_norm: 0.9462837532596866, iteration: 342406
loss: 1.0328755378723145,grad_norm: 0.9999998649932128, iteration: 342407
loss: 1.028544545173645,grad_norm: 0.7661496458231801, iteration: 342408
loss: 1.0013980865478516,grad_norm: 0.8516040098736121, iteration: 342409
loss: 1.004267692565918,grad_norm: 0.8982794896074507, iteration: 342410
loss: 1.0763472318649292,grad_norm: 0.9999992473017664, iteration: 342411
loss: 0.9615708589553833,grad_norm: 0.8853028609549812, iteration: 342412
loss: 1.0801514387130737,grad_norm: 0.999999825776842, iteration: 342413
loss: 0.9531258344650269,grad_norm: 0.9040767173232032, iteration: 342414
loss: 0.983041524887085,grad_norm: 0.8886499583608062, iteration: 342415
loss: 1.0098670721054077,grad_norm: 0.7642333145764383, iteration: 342416
loss: 0.9899508357048035,grad_norm: 0.7665875785370144, iteration: 342417
loss: 0.9587293863296509,grad_norm: 0.7512753414652715, iteration: 342418
loss: 0.9494141340255737,grad_norm: 0.7412292295802606, iteration: 342419
loss: 1.0053824186325073,grad_norm: 0.8238560158673824, iteration: 342420
loss: 0.9998522400856018,grad_norm: 0.8443367186569407, iteration: 342421
loss: 1.0000112056732178,grad_norm: 0.8771402514568685, iteration: 342422
loss: 1.0058345794677734,grad_norm: 0.9152634309508969, iteration: 342423
loss: 1.069900631904602,grad_norm: 0.9999995613105914, iteration: 342424
loss: 1.0333658456802368,grad_norm: 0.9999991969223738, iteration: 342425
loss: 1.0134830474853516,grad_norm: 0.7140153548062056, iteration: 342426
loss: 1.0080496072769165,grad_norm: 0.7777293042866844, iteration: 342427
loss: 0.9787741899490356,grad_norm: 0.7084380251208225, iteration: 342428
loss: 1.0101946592330933,grad_norm: 0.7990233551269096, iteration: 342429
loss: 1.0115379095077515,grad_norm: 0.7980889735718341, iteration: 342430
loss: 0.9727515578269958,grad_norm: 0.9815854719014112, iteration: 342431
loss: 1.0225379467010498,grad_norm: 0.9999992095429417, iteration: 342432
loss: 1.0104038715362549,grad_norm: 0.8006586074605397, iteration: 342433
loss: 1.0063172578811646,grad_norm: 0.8142836136418845, iteration: 342434
loss: 1.0107687711715698,grad_norm: 0.9999994258469096, iteration: 342435
loss: 0.9724740982055664,grad_norm: 0.7872831816443905, iteration: 342436
loss: 1.0266592502593994,grad_norm: 0.7926148461794353, iteration: 342437
loss: 0.9759382009506226,grad_norm: 0.7960396963401745, iteration: 342438
loss: 0.9898452162742615,grad_norm: 0.907210579324544, iteration: 342439
loss: 0.9990273118019104,grad_norm: 0.7251103144688441, iteration: 342440
loss: 1.0062267780303955,grad_norm: 0.7907539617741485, iteration: 342441
loss: 1.020804524421692,grad_norm: 0.8311220591777709, iteration: 342442
loss: 0.9739187359809875,grad_norm: 0.9946162313520953, iteration: 342443
loss: 1.0253297090530396,grad_norm: 0.7804597011707707, iteration: 342444
loss: 1.0088527202606201,grad_norm: 0.7848684183859997, iteration: 342445
loss: 1.0102148056030273,grad_norm: 0.9845570093925309, iteration: 342446
loss: 1.0249468088150024,grad_norm: 0.7337046006125449, iteration: 342447
loss: 0.9955313205718994,grad_norm: 0.7459813101426718, iteration: 342448
loss: 0.9953093528747559,grad_norm: 0.7697681140463074, iteration: 342449
loss: 1.0141570568084717,grad_norm: 0.8376380581277363, iteration: 342450
loss: 1.0018483400344849,grad_norm: 0.8501828336775472, iteration: 342451
loss: 0.9824001789093018,grad_norm: 0.7433092117650177, iteration: 342452
loss: 0.9962974786758423,grad_norm: 0.7470114451587163, iteration: 342453
loss: 1.0173218250274658,grad_norm: 0.7823846578329741, iteration: 342454
loss: 0.9711028933525085,grad_norm: 0.824258685333429, iteration: 342455
loss: 1.004391074180603,grad_norm: 0.6860245665740321, iteration: 342456
loss: 0.9904680848121643,grad_norm: 0.7108193277629736, iteration: 342457
loss: 1.0392591953277588,grad_norm: 0.7788440433265851, iteration: 342458
loss: 1.0080244541168213,grad_norm: 0.9477692720056492, iteration: 342459
loss: 0.9787180423736572,grad_norm: 0.7102657238742559, iteration: 342460
loss: 1.011614441871643,grad_norm: 0.8511565448539292, iteration: 342461
loss: 1.0183862447738647,grad_norm: 0.9289354292374219, iteration: 342462
loss: 1.0171319246292114,grad_norm: 0.750750131541758, iteration: 342463
loss: 1.0269616842269897,grad_norm: 0.9882073035193402, iteration: 342464
loss: 0.9923087954521179,grad_norm: 0.6518143920699931, iteration: 342465
loss: 1.0084699392318726,grad_norm: 0.755539729392397, iteration: 342466
loss: 1.0060064792633057,grad_norm: 0.8349714562508899, iteration: 342467
loss: 0.9841269254684448,grad_norm: 0.8948156742754588, iteration: 342468
loss: 0.998178243637085,grad_norm: 0.8545277666615845, iteration: 342469
loss: 0.9722688794136047,grad_norm: 0.8456495168513406, iteration: 342470
loss: 1.0184370279312134,grad_norm: 0.822596508189934, iteration: 342471
loss: 0.9835487008094788,grad_norm: 0.7739917423349293, iteration: 342472
loss: 1.0442543029785156,grad_norm: 0.8320953878740648, iteration: 342473
loss: 0.9916555881500244,grad_norm: 0.87964479135061, iteration: 342474
loss: 1.024815320968628,grad_norm: 0.9816750056966951, iteration: 342475
loss: 0.9962568283081055,grad_norm: 0.791346823937068, iteration: 342476
loss: 1.14827561378479,grad_norm: 0.9999998319279019, iteration: 342477
loss: 1.012891173362732,grad_norm: 0.9457814016574176, iteration: 342478
loss: 1.015330195426941,grad_norm: 0.8823377008444577, iteration: 342479
loss: 0.9897862076759338,grad_norm: 0.9880361750563471, iteration: 342480
loss: 0.9643983244895935,grad_norm: 0.8257029142289098, iteration: 342481
loss: 0.9803067445755005,grad_norm: 0.848215781319474, iteration: 342482
loss: 1.053584098815918,grad_norm: 0.8658737152831791, iteration: 342483
loss: 0.9946774244308472,grad_norm: 0.835892259923007, iteration: 342484
loss: 1.1104520559310913,grad_norm: 0.9999998181029642, iteration: 342485
loss: 0.9944076538085938,grad_norm: 0.810990449036249, iteration: 342486
loss: 0.9769524335861206,grad_norm: 0.945076809414304, iteration: 342487
loss: 1.0242106914520264,grad_norm: 0.8829053894708441, iteration: 342488
loss: 1.0100001096725464,grad_norm: 0.7139209864992039, iteration: 342489
loss: 0.9711938500404358,grad_norm: 0.7166558144721246, iteration: 342490
loss: 1.056734323501587,grad_norm: 0.999999159184506, iteration: 342491
loss: 0.9839507341384888,grad_norm: 0.7464352445502097, iteration: 342492
loss: 1.0018285512924194,grad_norm: 0.9770729130448882, iteration: 342493
loss: 1.0647211074829102,grad_norm: 0.9999990895978466, iteration: 342494
loss: 1.04364013671875,grad_norm: 0.7417728913777393, iteration: 342495
loss: 0.962314248085022,grad_norm: 0.8893444581235096, iteration: 342496
loss: 1.0849230289459229,grad_norm: 0.97806577955844, iteration: 342497
loss: 1.1232126951217651,grad_norm: 0.9587002248103842, iteration: 342498
loss: 1.0151928663253784,grad_norm: 0.6612748079621116, iteration: 342499
loss: 0.9779389500617981,grad_norm: 0.999999280343328, iteration: 342500
loss: 1.0514663457870483,grad_norm: 0.9999991653335548, iteration: 342501
loss: 1.0033854246139526,grad_norm: 0.999999075823714, iteration: 342502
loss: 0.980977475643158,grad_norm: 0.899396501954006, iteration: 342503
loss: 1.0629616975784302,grad_norm: 0.9806138183888751, iteration: 342504
loss: 0.9941644668579102,grad_norm: 0.7818858823549095, iteration: 342505
loss: 1.0065654516220093,grad_norm: 0.7854478374057513, iteration: 342506
loss: 0.9887086153030396,grad_norm: 0.8710270269363304, iteration: 342507
loss: 1.015564203262329,grad_norm: 0.751151972074439, iteration: 342508
loss: 1.0064139366149902,grad_norm: 0.9999991641993364, iteration: 342509
loss: 1.0190902948379517,grad_norm: 0.8335332854350463, iteration: 342510
loss: 1.0001720190048218,grad_norm: 0.9745526237979072, iteration: 342511
loss: 1.0234631299972534,grad_norm: 0.8206825067049827, iteration: 342512
loss: 0.9772610068321228,grad_norm: 0.7931872908544975, iteration: 342513
loss: 1.0063607692718506,grad_norm: 0.8243026721522058, iteration: 342514
loss: 1.0436369180679321,grad_norm: 0.9999994336354984, iteration: 342515
loss: 1.007114291191101,grad_norm: 0.9364856004799955, iteration: 342516
loss: 1.01968514919281,grad_norm: 0.8391521120021306, iteration: 342517
loss: 0.9941360354423523,grad_norm: 0.9999997146962549, iteration: 342518
loss: 1.0172032117843628,grad_norm: 0.9999994885968627, iteration: 342519
loss: 1.0529649257659912,grad_norm: 0.9419214000286399, iteration: 342520
loss: 0.995359480381012,grad_norm: 0.8346381085252083, iteration: 342521
loss: 1.123673915863037,grad_norm: 0.9999999447289873, iteration: 342522
loss: 1.0013275146484375,grad_norm: 0.7808883913717545, iteration: 342523
loss: 0.9976046085357666,grad_norm: 0.8737661021742077, iteration: 342524
loss: 1.07610285282135,grad_norm: 0.8885886003442166, iteration: 342525
loss: 0.9859105944633484,grad_norm: 0.8368184451955331, iteration: 342526
loss: 0.9998687505722046,grad_norm: 0.8259318431768211, iteration: 342527
loss: 0.9477911591529846,grad_norm: 0.832986379742777, iteration: 342528
loss: 1.3548859357833862,grad_norm: 0.9999997306631642, iteration: 342529
loss: 0.9746724367141724,grad_norm: 0.8875878884386915, iteration: 342530
loss: 1.0685746669769287,grad_norm: 0.9999996449583991, iteration: 342531
loss: 0.9755185842514038,grad_norm: 0.8006482842084365, iteration: 342532
loss: 1.0205129384994507,grad_norm: 0.9999999187726749, iteration: 342533
loss: 1.0095083713531494,grad_norm: 0.8481638956608513, iteration: 342534
loss: 1.03513503074646,grad_norm: 0.8502108774330099, iteration: 342535
loss: 1.150194764137268,grad_norm: 0.9999992981975732, iteration: 342536
loss: 1.1616483926773071,grad_norm: 0.9999998719032898, iteration: 342537
loss: 0.9942062497138977,grad_norm: 0.8267358340602685, iteration: 342538
loss: 0.9962427020072937,grad_norm: 0.6932508920357859, iteration: 342539
loss: 1.012589454650879,grad_norm: 0.999999054181899, iteration: 342540
loss: 1.0513287782669067,grad_norm: 0.8054398205813056, iteration: 342541
loss: 1.0220195055007935,grad_norm: 0.8585107210954641, iteration: 342542
loss: 1.0547906160354614,grad_norm: 0.7746192861704093, iteration: 342543
loss: 0.9607840180397034,grad_norm: 0.9890457065646004, iteration: 342544
loss: 1.0587718486785889,grad_norm: 0.9999991678429253, iteration: 342545
loss: 0.9974335432052612,grad_norm: 0.9299793855351505, iteration: 342546
loss: 0.9747055172920227,grad_norm: 0.8478282709013079, iteration: 342547
loss: 0.9916082620620728,grad_norm: 0.678766814153247, iteration: 342548
loss: 0.9666842818260193,grad_norm: 0.9031540678885894, iteration: 342549
loss: 1.0268645286560059,grad_norm: 0.9464139989765533, iteration: 342550
loss: 0.9972279667854309,grad_norm: 0.7413968079915347, iteration: 342551
loss: 0.9990673661231995,grad_norm: 0.897441511248943, iteration: 342552
loss: 1.0094102621078491,grad_norm: 0.9663857158035261, iteration: 342553
loss: 0.9731918573379517,grad_norm: 0.8555348877544822, iteration: 342554
loss: 1.0563981533050537,grad_norm: 0.7803652728715592, iteration: 342555
loss: 1.0125453472137451,grad_norm: 0.7451095009154556, iteration: 342556
loss: 1.0019241571426392,grad_norm: 0.8919875950537227, iteration: 342557
loss: 0.9828417301177979,grad_norm: 0.9337075661001517, iteration: 342558
loss: 0.9974209070205688,grad_norm: 0.9378535174568124, iteration: 342559
loss: 1.0171740055084229,grad_norm: 0.831648722268154, iteration: 342560
loss: 1.0093191862106323,grad_norm: 0.817418778233789, iteration: 342561
loss: 1.001732349395752,grad_norm: 0.999999884541813, iteration: 342562
loss: 0.9681788682937622,grad_norm: 0.807765472034156, iteration: 342563
loss: 0.9710667729377747,grad_norm: 0.9620817327078571, iteration: 342564
loss: 1.0493277311325073,grad_norm: 0.9999997245910233, iteration: 342565
loss: 0.9948546886444092,grad_norm: 0.7204156907905181, iteration: 342566
loss: 1.0094226598739624,grad_norm: 0.7889260189974375, iteration: 342567
loss: 0.9723110795021057,grad_norm: 0.863319677870541, iteration: 342568
loss: 0.9987059831619263,grad_norm: 0.9040714606442659, iteration: 342569
loss: 1.0206685066223145,grad_norm: 0.9471714191903712, iteration: 342570
loss: 1.2708357572555542,grad_norm: 0.9999991906188489, iteration: 342571
loss: 0.967566728591919,grad_norm: 0.7625079755046776, iteration: 342572
loss: 0.9614874720573425,grad_norm: 0.8713131331939702, iteration: 342573
loss: 1.0099612474441528,grad_norm: 0.8620280472780133, iteration: 342574
loss: 1.0349069833755493,grad_norm: 0.9999996053206012, iteration: 342575
loss: 1.0430099964141846,grad_norm: 0.7025293480587312, iteration: 342576
loss: 0.9719407558441162,grad_norm: 0.9845727172044334, iteration: 342577
loss: 1.1856780052185059,grad_norm: 0.9999993687237513, iteration: 342578
loss: 0.9758108854293823,grad_norm: 0.8788763563640248, iteration: 342579
loss: 0.9985371232032776,grad_norm: 0.8219209388589097, iteration: 342580
loss: 1.0072600841522217,grad_norm: 0.6782353184761843, iteration: 342581
loss: 1.1067357063293457,grad_norm: 0.9999998281279178, iteration: 342582
loss: 1.0094012022018433,grad_norm: 0.8511433371307249, iteration: 342583
loss: 0.9948844313621521,grad_norm: 0.7778710153788041, iteration: 342584
loss: 1.0196164846420288,grad_norm: 0.9999993888126146, iteration: 342585
loss: 0.9964814186096191,grad_norm: 0.8309813976686808, iteration: 342586
loss: 0.9945329427719116,grad_norm: 0.9999992170887025, iteration: 342587
loss: 0.9667122960090637,grad_norm: 0.7578017380929962, iteration: 342588
loss: 0.979787290096283,grad_norm: 0.781772048113842, iteration: 342589
loss: 1.0294462442398071,grad_norm: 0.7378246757985509, iteration: 342590
loss: 1.0007705688476562,grad_norm: 0.8668852921456673, iteration: 342591
loss: 0.9838889837265015,grad_norm: 0.9999991977901822, iteration: 342592
loss: 0.9833846092224121,grad_norm: 0.8033555423349552, iteration: 342593
loss: 1.0076113939285278,grad_norm: 0.8695997629695159, iteration: 342594
loss: 0.9591423273086548,grad_norm: 0.9999991156665587, iteration: 342595
loss: 0.9878780841827393,grad_norm: 0.9999997979287657, iteration: 342596
loss: 1.0194789171218872,grad_norm: 0.7311623960386759, iteration: 342597
loss: 1.0016939640045166,grad_norm: 0.8594724467606181, iteration: 342598
loss: 0.9807224273681641,grad_norm: 0.8194507474436586, iteration: 342599
loss: 0.997319757938385,grad_norm: 0.7216390852712572, iteration: 342600
loss: 1.0450563430786133,grad_norm: 0.999999112849487, iteration: 342601
loss: 0.971486508846283,grad_norm: 0.8376414766277874, iteration: 342602
loss: 1.1048558950424194,grad_norm: 0.999999391098095, iteration: 342603
loss: 0.9722712635993958,grad_norm: 0.9364872802702731, iteration: 342604
loss: 0.9903751015663147,grad_norm: 0.9999998880013902, iteration: 342605
loss: 1.0022114515304565,grad_norm: 0.9999992200935061, iteration: 342606
loss: 1.066528558731079,grad_norm: 0.9434866331756528, iteration: 342607
loss: 0.9731164574623108,grad_norm: 0.7912282903435148, iteration: 342608
loss: 0.9904303550720215,grad_norm: 0.9792505715912685, iteration: 342609
loss: 0.9977574348449707,grad_norm: 0.7999777127290321, iteration: 342610
loss: 1.045712947845459,grad_norm: 0.8638567036425999, iteration: 342611
loss: 0.9858115911483765,grad_norm: 0.9999991813748719, iteration: 342612
loss: 1.0244126319885254,grad_norm: 0.760171652517081, iteration: 342613
loss: 1.0170490741729736,grad_norm: 0.8485400080630706, iteration: 342614
loss: 1.0571078062057495,grad_norm: 0.9999993706834598, iteration: 342615
loss: 1.0132923126220703,grad_norm: 0.667918808828452, iteration: 342616
loss: 1.0181201696395874,grad_norm: 0.7871845746654987, iteration: 342617
loss: 1.0063284635543823,grad_norm: 0.8230111197098687, iteration: 342618
loss: 0.9953457713127136,grad_norm: 0.7721870310936213, iteration: 342619
loss: 1.0189006328582764,grad_norm: 0.6945198623095133, iteration: 342620
loss: 1.0437663793563843,grad_norm: 0.8357138741998933, iteration: 342621
loss: 1.13628089427948,grad_norm: 0.9999994541233702, iteration: 342622
loss: 1.0017305612564087,grad_norm: 0.8125349876435841, iteration: 342623
loss: 1.0404943227767944,grad_norm: 0.878985255293497, iteration: 342624
loss: 1.005064606666565,grad_norm: 0.9999990626924016, iteration: 342625
loss: 1.0123080015182495,grad_norm: 0.9999991982760367, iteration: 342626
loss: 1.0350910425186157,grad_norm: 0.9999992719096281, iteration: 342627
loss: 0.9851407408714294,grad_norm: 0.9999990379560536, iteration: 342628
loss: 1.0443111658096313,grad_norm: 0.8440011343061169, iteration: 342629
loss: 1.035522222518921,grad_norm: 0.999999100727534, iteration: 342630
loss: 1.0182267427444458,grad_norm: 0.7797219654076935, iteration: 342631
loss: 1.0215038061141968,grad_norm: 0.8100855152180294, iteration: 342632
loss: 0.9943065047264099,grad_norm: 0.7020069596132527, iteration: 342633
loss: 1.0018186569213867,grad_norm: 0.9999991571026878, iteration: 342634
loss: 0.9749318957328796,grad_norm: 0.8344876303046427, iteration: 342635
loss: 0.953423023223877,grad_norm: 0.763855689674957, iteration: 342636
loss: 0.9874619245529175,grad_norm: 0.9009053516074633, iteration: 342637
loss: 1.069170594215393,grad_norm: 0.9999996826385319, iteration: 342638
loss: 0.9964609742164612,grad_norm: 0.7027848265493539, iteration: 342639
loss: 1.0153926610946655,grad_norm: 0.9999997685952586, iteration: 342640
loss: 0.9674175381660461,grad_norm: 0.7516268277574683, iteration: 342641
loss: 1.0097798109054565,grad_norm: 0.8074166222037436, iteration: 342642
loss: 0.9637119174003601,grad_norm: 0.8452746506215945, iteration: 342643
loss: 1.0208892822265625,grad_norm: 0.834407207398788, iteration: 342644
loss: 1.0023130178451538,grad_norm: 0.8550159098200114, iteration: 342645
loss: 1.0157415866851807,grad_norm: 0.8325326413194021, iteration: 342646
loss: 0.9978708624839783,grad_norm: 0.7711959917567079, iteration: 342647
loss: 1.0093926191329956,grad_norm: 0.7290054637529955, iteration: 342648
loss: 1.0167235136032104,grad_norm: 0.8907877348970599, iteration: 342649
loss: 1.0116544961929321,grad_norm: 0.7680373115118401, iteration: 342650
loss: 1.0077104568481445,grad_norm: 0.9999992123838018, iteration: 342651
loss: 1.0379215478897095,grad_norm: 0.9999999082594705, iteration: 342652
loss: 1.0639592409133911,grad_norm: 0.9999990319648068, iteration: 342653
loss: 1.0065035820007324,grad_norm: 0.8849070075372424, iteration: 342654
loss: 1.0449997186660767,grad_norm: 0.9999992222485039, iteration: 342655
loss: 1.0247793197631836,grad_norm: 1.0000000128281186, iteration: 342656
loss: 0.9863675832748413,grad_norm: 0.7981821344198199, iteration: 342657
loss: 0.9618630409240723,grad_norm: 0.7228745008189644, iteration: 342658
loss: 1.0163087844848633,grad_norm: 0.7900280117018221, iteration: 342659
loss: 0.9795395731925964,grad_norm: 0.897000708603353, iteration: 342660
loss: 0.9860988855361938,grad_norm: 0.9999990169357843, iteration: 342661
loss: 0.9845831990242004,grad_norm: 0.7428287123816627, iteration: 342662
loss: 1.0355303287506104,grad_norm: 0.8952869009512977, iteration: 342663
loss: 0.9795629382133484,grad_norm: 0.9999990680204258, iteration: 342664
loss: 0.988493025302887,grad_norm: 0.9999991481329549, iteration: 342665
loss: 1.0244534015655518,grad_norm: 0.8005082108718437, iteration: 342666
loss: 1.0563186407089233,grad_norm: 0.8801595099476116, iteration: 342667
loss: 1.0838288068771362,grad_norm: 0.8978518120144724, iteration: 342668
loss: 0.9926175475120544,grad_norm: 0.8187089679776157, iteration: 342669
loss: 0.9721306562423706,grad_norm: 0.6982500090279041, iteration: 342670
loss: 1.007641315460205,grad_norm: 0.6953256722372119, iteration: 342671
loss: 1.073182463645935,grad_norm: 0.9999996992334387, iteration: 342672
loss: 0.9632590413093567,grad_norm: 0.8300346687460687, iteration: 342673
loss: 1.0250599384307861,grad_norm: 0.9999999039523216, iteration: 342674
loss: 1.0244020223617554,grad_norm: 0.8627412412799194, iteration: 342675
loss: 0.9691773056983948,grad_norm: 0.7466900812591416, iteration: 342676
loss: 1.006098985671997,grad_norm: 0.999999015365765, iteration: 342677
loss: 1.017760157585144,grad_norm: 0.8154427211307258, iteration: 342678
loss: 0.9997199773788452,grad_norm: 0.9080106712732193, iteration: 342679
loss: 1.0271364450454712,grad_norm: 0.9499598366605996, iteration: 342680
loss: 0.9981825351715088,grad_norm: 0.7651990926077858, iteration: 342681
loss: 1.0177576541900635,grad_norm: 0.7434533016951744, iteration: 342682
loss: 1.0062415599822998,grad_norm: 0.8384650668230594, iteration: 342683
loss: 0.9830557703971863,grad_norm: 0.8353040300530462, iteration: 342684
loss: 1.027805209159851,grad_norm: 0.9999996296894221, iteration: 342685
loss: 0.984914243221283,grad_norm: 0.9819441608642497, iteration: 342686
loss: 0.9695289134979248,grad_norm: 0.9999998394332212, iteration: 342687
loss: 1.1211742162704468,grad_norm: 0.9999996632305186, iteration: 342688
loss: 0.9894701838493347,grad_norm: 0.9999993150979198, iteration: 342689
loss: 0.9601899981498718,grad_norm: 0.7634002766837045, iteration: 342690
loss: 1.0499029159545898,grad_norm: 0.9999996286183024, iteration: 342691
loss: 0.962005615234375,grad_norm: 0.7907377827302502, iteration: 342692
loss: 1.0175243616104126,grad_norm: 0.9568986476512001, iteration: 342693
loss: 0.9495660066604614,grad_norm: 0.9594593379624611, iteration: 342694
loss: 0.9733746647834778,grad_norm: 0.8359389649225072, iteration: 342695
loss: 0.9764031171798706,grad_norm: 0.9708808091527485, iteration: 342696
loss: 1.012379765510559,grad_norm: 0.7852078449077285, iteration: 342697
loss: 1.0280064344406128,grad_norm: 0.8139176350879344, iteration: 342698
loss: 1.009655475616455,grad_norm: 0.9999990896246115, iteration: 342699
loss: 1.059556484222412,grad_norm: 0.8316343511605478, iteration: 342700
loss: 1.0030725002288818,grad_norm: 0.9873704495348851, iteration: 342701
loss: 1.0149061679840088,grad_norm: 0.7812159377954783, iteration: 342702
loss: 0.9835411906242371,grad_norm: 0.8240470962601408, iteration: 342703
loss: 1.0026735067367554,grad_norm: 0.7640009636925632, iteration: 342704
loss: 1.0120701789855957,grad_norm: 0.9999989986120472, iteration: 342705
loss: 1.021746277809143,grad_norm: 0.999999932424106, iteration: 342706
loss: 0.9827673435211182,grad_norm: 0.6987412561072527, iteration: 342707
loss: 0.9753859043121338,grad_norm: 0.9999997999328151, iteration: 342708
loss: 0.9939619302749634,grad_norm: 0.735279901705888, iteration: 342709
loss: 0.976543664932251,grad_norm: 0.7685559060376272, iteration: 342710
loss: 1.0089280605316162,grad_norm: 0.9254376846679014, iteration: 342711
loss: 1.0267714262008667,grad_norm: 0.9999991507778728, iteration: 342712
loss: 1.0265798568725586,grad_norm: 0.7994335574556888, iteration: 342713
loss: 0.9991872310638428,grad_norm: 0.8149618296856952, iteration: 342714
loss: 1.0431580543518066,grad_norm: 0.9999997219154829, iteration: 342715
loss: 0.9848838448524475,grad_norm: 0.8094071417289935, iteration: 342716
loss: 1.0030946731567383,grad_norm: 0.7001894436009111, iteration: 342717
loss: 1.001677393913269,grad_norm: 0.8584501622897173, iteration: 342718
loss: 1.018908977508545,grad_norm: 0.7725241706006241, iteration: 342719
loss: 1.0659339427947998,grad_norm: 0.9999997375649924, iteration: 342720
loss: 1.030535340309143,grad_norm: 0.8806419507672192, iteration: 342721
loss: 0.9746123552322388,grad_norm: 0.879327681562269, iteration: 342722
loss: 1.097508430480957,grad_norm: 0.9648108754873161, iteration: 342723
loss: 1.032293438911438,grad_norm: 0.9999993766574607, iteration: 342724
loss: 0.9977330565452576,grad_norm: 0.745932750624241, iteration: 342725
loss: 0.9610313773155212,grad_norm: 0.8895231138453815, iteration: 342726
loss: 1.035075306892395,grad_norm: 0.9770755632900676, iteration: 342727
loss: 0.9977400302886963,grad_norm: 0.9075804735885071, iteration: 342728
loss: 0.9754035472869873,grad_norm: 0.8035446601820094, iteration: 342729
loss: 1.0067708492279053,grad_norm: 0.7788416288113175, iteration: 342730
loss: 1.0231940746307373,grad_norm: 0.9999991297052242, iteration: 342731
loss: 0.9666232466697693,grad_norm: 0.8773740156036394, iteration: 342732
loss: 1.0039113759994507,grad_norm: 0.7920282623162269, iteration: 342733
loss: 1.0124818086624146,grad_norm: 0.798734043955864, iteration: 342734
loss: 0.9963687658309937,grad_norm: 0.7790477584379226, iteration: 342735
loss: 0.9937931299209595,grad_norm: 0.8378032152338816, iteration: 342736
loss: 1.0205719470977783,grad_norm: 0.789570709607251, iteration: 342737
loss: 0.9957930445671082,grad_norm: 0.8377416281172327, iteration: 342738
loss: 0.9517560601234436,grad_norm: 0.756675538337477, iteration: 342739
loss: 1.058089017868042,grad_norm: 0.8374579735610431, iteration: 342740
loss: 1.003799557685852,grad_norm: 0.8099441385832562, iteration: 342741
loss: 0.990157425403595,grad_norm: 0.9561370444834564, iteration: 342742
loss: 0.9959937334060669,grad_norm: 0.9711452283284812, iteration: 342743
loss: 1.004072666168213,grad_norm: 0.9550874751385455, iteration: 342744
loss: 0.9866849184036255,grad_norm: 0.9999989422414156, iteration: 342745
loss: 1.0117427110671997,grad_norm: 0.999999461919204, iteration: 342746
loss: 0.9858279824256897,grad_norm: 0.8702195214073004, iteration: 342747
loss: 0.9699364304542542,grad_norm: 0.709667209869673, iteration: 342748
loss: 1.0113049745559692,grad_norm: 0.9999994040957062, iteration: 342749
loss: 0.970115065574646,grad_norm: 0.9229000300756163, iteration: 342750
loss: 1.0024235248565674,grad_norm: 0.8264988283105974, iteration: 342751
loss: 1.044373869895935,grad_norm: 0.9044003197105469, iteration: 342752
loss: 1.0149518251419067,grad_norm: 0.916948779437969, iteration: 342753
loss: 0.9985265135765076,grad_norm: 0.9600229782282073, iteration: 342754
loss: 1.0542658567428589,grad_norm: 0.9999995254802494, iteration: 342755
loss: 0.9956772327423096,grad_norm: 0.999999019143655, iteration: 342756
loss: 1.002055287361145,grad_norm: 0.8679427658704942, iteration: 342757
loss: 1.0118968486785889,grad_norm: 0.915951530795329, iteration: 342758
loss: 1.0157233476638794,grad_norm: 0.9999994931512016, iteration: 342759
loss: 1.0285632610321045,grad_norm: 0.8085688775774654, iteration: 342760
loss: 0.9926385283470154,grad_norm: 0.9112075094898338, iteration: 342761
loss: 0.9975708723068237,grad_norm: 0.7926418466387029, iteration: 342762
loss: 0.9591086506843567,grad_norm: 0.8335010843423065, iteration: 342763
loss: 1.0120328664779663,grad_norm: 0.7381597265160509, iteration: 342764
loss: 1.006129264831543,grad_norm: 0.9410705286834389, iteration: 342765
loss: 1.0026851892471313,grad_norm: 0.9999994551573601, iteration: 342766
loss: 1.0404322147369385,grad_norm: 0.9999998025406971, iteration: 342767
loss: 1.0647530555725098,grad_norm: 0.9999994119939586, iteration: 342768
loss: 1.0069003105163574,grad_norm: 0.8631923711169555, iteration: 342769
loss: 0.9560027122497559,grad_norm: 0.6732627791729503, iteration: 342770
loss: 1.0661715269088745,grad_norm: 0.9999999606689041, iteration: 342771
loss: 0.9958743453025818,grad_norm: 0.7737365686020004, iteration: 342772
loss: 1.0282108783721924,grad_norm: 0.8571460177616874, iteration: 342773
loss: 0.9889819622039795,grad_norm: 0.7921966729276764, iteration: 342774
loss: 1.0252351760864258,grad_norm: 0.7826637198336455, iteration: 342775
loss: 0.9602324366569519,grad_norm: 0.9064088122767042, iteration: 342776
loss: 0.9840790629386902,grad_norm: 0.9999999333573557, iteration: 342777
loss: 1.0013734102249146,grad_norm: 0.8473378146698309, iteration: 342778
loss: 0.9375775456428528,grad_norm: 0.9764690629913403, iteration: 342779
loss: 1.0109878778457642,grad_norm: 0.831076752016541, iteration: 342780
loss: 1.154980182647705,grad_norm: 0.9999996225807641, iteration: 342781
loss: 1.0237617492675781,grad_norm: 0.9999993586272383, iteration: 342782
loss: 1.0839455127716064,grad_norm: 0.9524320933457425, iteration: 342783
loss: 1.0343372821807861,grad_norm: 0.9156016249930586, iteration: 342784
loss: 1.031258463859558,grad_norm: 0.9752240117990614, iteration: 342785
loss: 0.9903514981269836,grad_norm: 0.977714374016656, iteration: 342786
loss: 0.9989987015724182,grad_norm: 0.7509063645637615, iteration: 342787
loss: 1.0014910697937012,grad_norm: 0.898035747107984, iteration: 342788
loss: 1.0292636156082153,grad_norm: 0.8839090346816296, iteration: 342789
loss: 0.9737260937690735,grad_norm: 0.826102105024471, iteration: 342790
loss: 0.99054354429245,grad_norm: 0.7461912060566718, iteration: 342791
loss: 1.055745005607605,grad_norm: 0.9162251065753413, iteration: 342792
loss: 1.0575268268585205,grad_norm: 0.9999998752255927, iteration: 342793
loss: 1.066760540008545,grad_norm: 0.9999998267385201, iteration: 342794
loss: 0.9762269854545593,grad_norm: 0.8216169283583663, iteration: 342795
loss: 0.9959878325462341,grad_norm: 0.672989027707625, iteration: 342796
loss: 1.0004348754882812,grad_norm: 0.7246320770421042, iteration: 342797
loss: 0.9554945230484009,grad_norm: 0.7947052753517126, iteration: 342798
loss: 1.0427659749984741,grad_norm: 0.7994099491410869, iteration: 342799
loss: 1.0129035711288452,grad_norm: 0.7909883037335106, iteration: 342800
loss: 1.0101760625839233,grad_norm: 0.9295064358609603, iteration: 342801
loss: 1.0040946006774902,grad_norm: 0.8980046427597156, iteration: 342802
loss: 1.05341637134552,grad_norm: 0.9999999019502187, iteration: 342803
loss: 1.0645211935043335,grad_norm: 0.9440310168330484, iteration: 342804
loss: 1.0516310930252075,grad_norm: 0.9913358410800786, iteration: 342805
loss: 1.0275248289108276,grad_norm: 0.9870590839218575, iteration: 342806
loss: 1.0653703212738037,grad_norm: 0.8979126665364772, iteration: 342807
loss: 1.0302608013153076,grad_norm: 0.7949691325504459, iteration: 342808
loss: 1.03600013256073,grad_norm: 0.9155133518487611, iteration: 342809
loss: 1.0080630779266357,grad_norm: 0.8329810603850883, iteration: 342810
loss: 1.1049779653549194,grad_norm: 0.999999392996349, iteration: 342811
loss: 0.9725733995437622,grad_norm: 0.7613616639801912, iteration: 342812
loss: 1.060303807258606,grad_norm: 0.9999997920585694, iteration: 342813
loss: 1.0241568088531494,grad_norm: 0.9999995030441697, iteration: 342814
loss: 1.0120644569396973,grad_norm: 0.9102785835143398, iteration: 342815
loss: 1.0938973426818848,grad_norm: 0.9999993315944952, iteration: 342816
loss: 1.0821322202682495,grad_norm: 0.8304804632319149, iteration: 342817
loss: 1.0230002403259277,grad_norm: 0.8436571023453623, iteration: 342818
loss: 1.0358855724334717,grad_norm: 0.7247070751931952, iteration: 342819
loss: 1.0662486553192139,grad_norm: 0.9999993070935892, iteration: 342820
loss: 0.9887466430664062,grad_norm: 0.951094619288665, iteration: 342821
loss: 1.0491300821304321,grad_norm: 0.9999997641250177, iteration: 342822
loss: 0.943072497844696,grad_norm: 0.9383949269428105, iteration: 342823
loss: 1.0276845693588257,grad_norm: 0.9557992425449413, iteration: 342824
loss: 1.0495535135269165,grad_norm: 0.9999995831316429, iteration: 342825
loss: 1.0076299905776978,grad_norm: 0.8039132931635863, iteration: 342826
loss: 1.000037670135498,grad_norm: 0.8131648066599426, iteration: 342827
loss: 1.1732683181762695,grad_norm: 0.999999543935265, iteration: 342828
loss: 1.1149898767471313,grad_norm: 0.9999998393163199, iteration: 342829
loss: 1.1664282083511353,grad_norm: 0.9999997271204142, iteration: 342830
loss: 0.9558769464492798,grad_norm: 0.8976337871163212, iteration: 342831
loss: 1.0020465850830078,grad_norm: 0.9369917261352713, iteration: 342832
loss: 1.0775949954986572,grad_norm: 0.9534376456361073, iteration: 342833
loss: 1.0818407535552979,grad_norm: 0.9999993823647807, iteration: 342834
loss: 1.024364709854126,grad_norm: 0.8161866651740626, iteration: 342835
loss: 1.052879810333252,grad_norm: 0.9214503336083472, iteration: 342836
loss: 1.044851541519165,grad_norm: 0.9999999037486234, iteration: 342837
loss: 0.987460196018219,grad_norm: 0.7523395042600246, iteration: 342838
loss: 1.0355174541473389,grad_norm: 0.9999992826563104, iteration: 342839
loss: 1.0329967737197876,grad_norm: 0.9999992531993783, iteration: 342840
loss: 1.0648376941680908,grad_norm: 0.972661143259488, iteration: 342841
loss: 0.9801395535469055,grad_norm: 0.9999992073715833, iteration: 342842
loss: 1.1868137121200562,grad_norm: 0.9999994497483452, iteration: 342843
loss: 1.0170619487762451,grad_norm: 0.8994138388155427, iteration: 342844
loss: 1.0310423374176025,grad_norm: 0.999999260406648, iteration: 342845
loss: 1.0646562576293945,grad_norm: 0.9999993110387745, iteration: 342846
loss: 1.0607746839523315,grad_norm: 0.9895159350885981, iteration: 342847
loss: 0.9594736099243164,grad_norm: 0.7349337455715348, iteration: 342848
loss: 1.093990445137024,grad_norm: 0.9999998443136705, iteration: 342849
loss: 0.9721660614013672,grad_norm: 0.8014103384983337, iteration: 342850
loss: 1.0330870151519775,grad_norm: 0.961438609312006, iteration: 342851
loss: 1.001656174659729,grad_norm: 0.7765101923600777, iteration: 342852
loss: 1.1051690578460693,grad_norm: 0.9757924709276873, iteration: 342853
loss: 1.0309288501739502,grad_norm: 0.9999997627444775, iteration: 342854
loss: 0.9715982675552368,grad_norm: 0.9282667881686691, iteration: 342855
loss: 1.0266367197036743,grad_norm: 0.9999999302696629, iteration: 342856
loss: 1.021621823310852,grad_norm: 0.6860677642115297, iteration: 342857
loss: 0.9972419142723083,grad_norm: 0.6701534218923595, iteration: 342858
loss: 1.0011789798736572,grad_norm: 0.9842719917653419, iteration: 342859
loss: 0.9724958539009094,grad_norm: 0.7851764579353344, iteration: 342860
loss: 0.9758935570716858,grad_norm: 0.7769149491591091, iteration: 342861
loss: 1.1703788042068481,grad_norm: 0.9999998357806229, iteration: 342862
loss: 1.0186395645141602,grad_norm: 0.9064946894504654, iteration: 342863
loss: 0.9736323356628418,grad_norm: 0.9016704470570175, iteration: 342864
loss: 0.9975335001945496,grad_norm: 0.999999828395634, iteration: 342865
loss: 1.0530545711517334,grad_norm: 0.999999186247346, iteration: 342866
loss: 0.995415449142456,grad_norm: 0.9999996188168788, iteration: 342867
loss: 1.0575554370880127,grad_norm: 0.7321727392412607, iteration: 342868
loss: 1.0387903451919556,grad_norm: 0.9999991525873662, iteration: 342869
loss: 1.2421481609344482,grad_norm: 0.999999593415162, iteration: 342870
loss: 1.0839091539382935,grad_norm: 0.999999502694454, iteration: 342871
loss: 0.9701706171035767,grad_norm: 0.8628813192231518, iteration: 342872
loss: 1.0161422491073608,grad_norm: 0.8490343119645671, iteration: 342873
loss: 1.1952458620071411,grad_norm: 0.9999997070397804, iteration: 342874
loss: 0.9863851070404053,grad_norm: 0.9578536998980067, iteration: 342875
loss: 1.0161789655685425,grad_norm: 0.9161942717413196, iteration: 342876
loss: 1.0181750059127808,grad_norm: 0.9999995816255243, iteration: 342877
loss: 1.1420862674713135,grad_norm: 0.9999993186310067, iteration: 342878
loss: 1.034730076789856,grad_norm: 0.9986990688428209, iteration: 342879
loss: 1.014541506767273,grad_norm: 0.8429358522111975, iteration: 342880
loss: 1.0864747762680054,grad_norm: 0.9771189902124258, iteration: 342881
loss: 1.0347968339920044,grad_norm: 0.7233114843280214, iteration: 342882
loss: 1.0307731628417969,grad_norm: 0.9931333289806942, iteration: 342883
loss: 0.994574785232544,grad_norm: 0.9151752541803653, iteration: 342884
loss: 1.0287621021270752,grad_norm: 0.7931415408983131, iteration: 342885
loss: 0.9693456292152405,grad_norm: 0.8688885152234469, iteration: 342886
loss: 0.9888681769371033,grad_norm: 0.9333475638412039, iteration: 342887
loss: 1.076892614364624,grad_norm: 0.8963012356309181, iteration: 342888
loss: 1.0969123840332031,grad_norm: 0.9999990432997719, iteration: 342889
loss: 1.0592291355133057,grad_norm: 0.9999993004206038, iteration: 342890
loss: 1.084077000617981,grad_norm: 0.9999998964330173, iteration: 342891
loss: 0.9854073524475098,grad_norm: 0.9915338678067451, iteration: 342892
loss: 1.085569143295288,grad_norm: 0.7905507992209485, iteration: 342893
loss: 1.0303922891616821,grad_norm: 0.999999171502372, iteration: 342894
loss: 1.0095808506011963,grad_norm: 0.9969231650890688, iteration: 342895
loss: 1.0767332315444946,grad_norm: 0.8871291052129111, iteration: 342896
loss: 0.9774765968322754,grad_norm: 0.845178394969768, iteration: 342897
loss: 1.0736340284347534,grad_norm: 0.9813836071076913, iteration: 342898
loss: 1.0141669511795044,grad_norm: 0.9676969293251795, iteration: 342899
loss: 1.0721064805984497,grad_norm: 0.9000038582233622, iteration: 342900
loss: 1.01981782913208,grad_norm: 0.7416701568479074, iteration: 342901
loss: 1.001499056816101,grad_norm: 0.8330232337941376, iteration: 342902
loss: 0.9712837338447571,grad_norm: 0.9999998396390426, iteration: 342903
loss: 0.988929033279419,grad_norm: 0.8317369869135883, iteration: 342904
loss: 1.0363901853561401,grad_norm: 0.9999998287954267, iteration: 342905
loss: 0.9920295476913452,grad_norm: 0.8456032794545822, iteration: 342906
loss: 0.9896363019943237,grad_norm: 0.7585230519440309, iteration: 342907
loss: 0.9974680542945862,grad_norm: 0.8873521689087268, iteration: 342908
loss: 0.9795422554016113,grad_norm: 0.756630536646516, iteration: 342909
loss: 1.1182360649108887,grad_norm: 0.9999999290385249, iteration: 342910
loss: 1.0373152494430542,grad_norm: 0.9999997546057771, iteration: 342911
loss: 0.9999897480010986,grad_norm: 0.9999991985651163, iteration: 342912
loss: 1.0346616506576538,grad_norm: 0.9999997080496086, iteration: 342913
loss: 1.0486340522766113,grad_norm: 0.9999999628589578, iteration: 342914
loss: 1.0119174718856812,grad_norm: 0.9466289584354826, iteration: 342915
loss: 0.9855402708053589,grad_norm: 0.9999996144277716, iteration: 342916
loss: 0.9676224589347839,grad_norm: 0.8096712012723456, iteration: 342917
loss: 1.0019488334655762,grad_norm: 0.7282754537737824, iteration: 342918
loss: 1.0022392272949219,grad_norm: 0.9019838234034452, iteration: 342919
loss: 0.9674267172813416,grad_norm: 0.8474618615732774, iteration: 342920
loss: 0.9874699115753174,grad_norm: 0.8982752608170932, iteration: 342921
loss: 1.045096516609192,grad_norm: 0.999999069463511, iteration: 342922
loss: 0.9810011982917786,grad_norm: 0.8604977772420583, iteration: 342923
loss: 1.046924114227295,grad_norm: 0.9999993378845314, iteration: 342924
loss: 0.9581661820411682,grad_norm: 0.7103449404334645, iteration: 342925
loss: 1.0866289138793945,grad_norm: 0.9999994044528575, iteration: 342926
loss: 1.0002778768539429,grad_norm: 0.8138273188596058, iteration: 342927
loss: 1.0156598091125488,grad_norm: 0.9746679085313487, iteration: 342928
loss: 1.0237709283828735,grad_norm: 0.9999990957701211, iteration: 342929
loss: 1.0389902591705322,grad_norm: 0.7251829852869573, iteration: 342930
loss: 1.0211325883865356,grad_norm: 0.9999994445806779, iteration: 342931
loss: 1.0262607336044312,grad_norm: 0.9999994115389679, iteration: 342932
loss: 0.9881242513656616,grad_norm: 0.9038571699776099, iteration: 342933
loss: 0.985171914100647,grad_norm: 0.7427527622069025, iteration: 342934
loss: 1.0475192070007324,grad_norm: 0.9999991464138726, iteration: 342935
loss: 1.0199228525161743,grad_norm: 0.7908371749905656, iteration: 342936
loss: 1.0234370231628418,grad_norm: 0.8799325113644028, iteration: 342937
loss: 1.0618834495544434,grad_norm: 0.8770368268622818, iteration: 342938
loss: 1.1252264976501465,grad_norm: 0.9999995608358184, iteration: 342939
loss: 1.006068229675293,grad_norm: 0.7099525706429094, iteration: 342940
loss: 1.0060346126556396,grad_norm: 0.7663606753376414, iteration: 342941
loss: 1.0560673475265503,grad_norm: 0.9999992972647834, iteration: 342942
loss: 1.0173349380493164,grad_norm: 0.9999996699044015, iteration: 342943
loss: 1.0208474397659302,grad_norm: 0.7870565792738871, iteration: 342944
loss: 0.9689556956291199,grad_norm: 0.8048770299387401, iteration: 342945
loss: 1.0025075674057007,grad_norm: 0.8611984215055777, iteration: 342946
loss: 1.0445712804794312,grad_norm: 0.7129399479015647, iteration: 342947
loss: 1.0723518133163452,grad_norm: 0.9999993296938904, iteration: 342948
loss: 1.0490928888320923,grad_norm: 0.9999992802387119, iteration: 342949
loss: 1.0282021760940552,grad_norm: 0.9999997517193439, iteration: 342950
loss: 1.0143940448760986,grad_norm: 0.9888024375737791, iteration: 342951
loss: 1.0048171281814575,grad_norm: 0.7798113789846285, iteration: 342952
loss: 1.0502310991287231,grad_norm: 0.9216611358191676, iteration: 342953
loss: 1.0036168098449707,grad_norm: 0.9999993026741301, iteration: 342954
loss: 1.0144131183624268,grad_norm: 0.9999990098744662, iteration: 342955
loss: 1.05177903175354,grad_norm: 0.7582279006887964, iteration: 342956
loss: 1.0903949737548828,grad_norm: 0.9999993184559193, iteration: 342957
loss: 1.0565190315246582,grad_norm: 0.9999999170981784, iteration: 342958
loss: 1.404110074043274,grad_norm: 0.9999994324032369, iteration: 342959
loss: 0.9894236922264099,grad_norm: 0.8686921378482833, iteration: 342960
loss: 1.0323785543441772,grad_norm: 0.9999990730407682, iteration: 342961
loss: 1.160853385925293,grad_norm: 0.9879160687943593, iteration: 342962
loss: 0.9904659986495972,grad_norm: 0.93095072198749, iteration: 342963
loss: 1.1319069862365723,grad_norm: 0.999999561851131, iteration: 342964
loss: 0.9876362681388855,grad_norm: 0.8301403384746869, iteration: 342965
loss: 0.9853296875953674,grad_norm: 0.7946610559036683, iteration: 342966
loss: 1.0250866413116455,grad_norm: 0.8430184883263676, iteration: 342967
loss: 1.06277334690094,grad_norm: 0.8930853307529973, iteration: 342968
loss: 1.0138185024261475,grad_norm: 0.9999990239204744, iteration: 342969
loss: 1.149746298789978,grad_norm: 0.9999992444004138, iteration: 342970
loss: 0.9821577668190002,grad_norm: 0.7215555407847758, iteration: 342971
loss: 1.0305017232894897,grad_norm: 0.8863811347277115, iteration: 342972
loss: 1.0677258968353271,grad_norm: 0.925886530692046, iteration: 342973
loss: 1.156309962272644,grad_norm: 0.9999993139935042, iteration: 342974
loss: 1.001932144165039,grad_norm: 0.8479392651094522, iteration: 342975
loss: 1.0375776290893555,grad_norm: 0.9999991348582503, iteration: 342976
loss: 1.0923519134521484,grad_norm: 0.9999991106331629, iteration: 342977
loss: 1.0844707489013672,grad_norm: 0.9999991468224739, iteration: 342978
loss: 1.016385793685913,grad_norm: 0.9999999379922161, iteration: 342979
loss: 1.024362325668335,grad_norm: 0.7668101527595188, iteration: 342980
loss: 1.0452288389205933,grad_norm: 0.9999991297508098, iteration: 342981
loss: 1.0623656511306763,grad_norm: 0.9999992352349368, iteration: 342982
loss: 1.0034981966018677,grad_norm: 0.9999995612387633, iteration: 342983
loss: 1.0180282592773438,grad_norm: 0.7887420219376915, iteration: 342984
loss: 1.0013052225112915,grad_norm: 0.9061461642854131, iteration: 342985
loss: 1.017727017402649,grad_norm: 0.9249851297739399, iteration: 342986
loss: 1.2002397775650024,grad_norm: 0.9999998618185018, iteration: 342987
loss: 1.0133769512176514,grad_norm: 0.9999992562569049, iteration: 342988
loss: 0.9995483160018921,grad_norm: 0.740521962786174, iteration: 342989
loss: 1.0468363761901855,grad_norm: 0.9999993971519859, iteration: 342990
loss: 1.0520321130752563,grad_norm: 0.9999993565432976, iteration: 342991
loss: 0.989870011806488,grad_norm: 0.7685183729483052, iteration: 342992
loss: 0.9949842095375061,grad_norm: 0.9651279737718631, iteration: 342993
loss: 1.0284671783447266,grad_norm: 0.799847601315217, iteration: 342994
loss: 1.1213176250457764,grad_norm: 0.9999991088929109, iteration: 342995
loss: 0.9629765748977661,grad_norm: 0.9013070483655231, iteration: 342996
loss: 0.9945188164710999,grad_norm: 0.8807998647753208, iteration: 342997
loss: 0.9949571490287781,grad_norm: 0.8709868265796282, iteration: 342998
loss: 0.9890130162239075,grad_norm: 0.8697498472838415, iteration: 342999
loss: 0.9635214805603027,grad_norm: 0.7234146616038912, iteration: 343000
loss: 0.974563479423523,grad_norm: 0.7466684575710719, iteration: 343001
loss: 1.0236408710479736,grad_norm: 0.9999994641988468, iteration: 343002
loss: 1.0240168571472168,grad_norm: 0.8639742932706674, iteration: 343003
loss: 0.9910334944725037,grad_norm: 0.8750864126220076, iteration: 343004
loss: 1.067254900932312,grad_norm: 0.9999994738778228, iteration: 343005
loss: 0.9640758633613586,grad_norm: 0.8618226867942759, iteration: 343006
loss: 0.9534186124801636,grad_norm: 0.8314956613905977, iteration: 343007
loss: 0.9989100694656372,grad_norm: 0.7766581326458273, iteration: 343008
loss: 1.1568942070007324,grad_norm: 0.999999580214283, iteration: 343009
loss: 0.9735795855522156,grad_norm: 0.8294715502387389, iteration: 343010
loss: 1.0605602264404297,grad_norm: 0.7788863546570687, iteration: 343011
loss: 1.0771349668502808,grad_norm: 0.9999999201816415, iteration: 343012
loss: 0.9897420406341553,grad_norm: 0.8901519459544069, iteration: 343013
loss: 1.0400266647338867,grad_norm: 0.9999998991139527, iteration: 343014
loss: 0.9911102652549744,grad_norm: 0.6422895258310843, iteration: 343015
loss: 1.0149385929107666,grad_norm: 0.9134897125715831, iteration: 343016
loss: 1.0533769130706787,grad_norm: 0.9999997980062483, iteration: 343017
loss: 1.0256675481796265,grad_norm: 0.8653061118023194, iteration: 343018
loss: 1.0571507215499878,grad_norm: 0.9999995216284223, iteration: 343019
loss: 0.9916811585426331,grad_norm: 0.7430844558769916, iteration: 343020
loss: 1.0005037784576416,grad_norm: 0.6891284020810856, iteration: 343021
loss: 1.0561416149139404,grad_norm: 0.9999999206380774, iteration: 343022
loss: 1.0675677061080933,grad_norm: 0.9999998112440431, iteration: 343023
loss: 1.0676350593566895,grad_norm: 0.9999999827750135, iteration: 343024
loss: 1.057976484298706,grad_norm: 1.00000010680184, iteration: 343025
loss: 1.0014166831970215,grad_norm: 0.8663426056353166, iteration: 343026
loss: 0.9774458408355713,grad_norm: 0.7471964116281321, iteration: 343027
loss: 0.9878088235855103,grad_norm: 0.919781759965478, iteration: 343028
loss: 1.007383108139038,grad_norm: 0.6816914159915806, iteration: 343029
loss: 1.075778603553772,grad_norm: 0.9999989947641134, iteration: 343030
loss: 0.9901615977287292,grad_norm: 0.9999992146121949, iteration: 343031
loss: 1.0134425163269043,grad_norm: 0.7514692747360912, iteration: 343032
loss: 1.0174407958984375,grad_norm: 0.8517154141463525, iteration: 343033
loss: 1.0543913841247559,grad_norm: 0.9854256261722213, iteration: 343034
loss: 0.9866447448730469,grad_norm: 0.8087811747325535, iteration: 343035
loss: 1.0041424036026,grad_norm: 0.8440757997353805, iteration: 343036
loss: 1.0136100053787231,grad_norm: 0.9999998301121007, iteration: 343037
loss: 1.0314499139785767,grad_norm: 0.8189783213576708, iteration: 343038
loss: 0.9818878173828125,grad_norm: 0.9164470842385335, iteration: 343039
loss: 1.0302960872650146,grad_norm: 0.7880911216042975, iteration: 343040
loss: 0.9762526154518127,grad_norm: 0.8599740641091272, iteration: 343041
loss: 0.9759166836738586,grad_norm: 0.6975709910310266, iteration: 343042
loss: 1.0882164239883423,grad_norm: 0.9999990892764798, iteration: 343043
loss: 1.0939632654190063,grad_norm: 0.9999994764520131, iteration: 343044
loss: 0.9994240403175354,grad_norm: 0.9999993768372285, iteration: 343045
loss: 0.9507074952125549,grad_norm: 0.9999991447636679, iteration: 343046
loss: 1.0915955305099487,grad_norm: 0.9999990083753111, iteration: 343047
loss: 1.0133799314498901,grad_norm: 0.6576963138695374, iteration: 343048
loss: 1.0022116899490356,grad_norm: 0.9999991274366662, iteration: 343049
loss: 1.0208860635757446,grad_norm: 0.6797735356925726, iteration: 343050
loss: 1.0021800994873047,grad_norm: 0.7863682354736664, iteration: 343051
loss: 1.0212727785110474,grad_norm: 0.9999995019349475, iteration: 343052
loss: 1.0039548873901367,grad_norm: 0.7824498004939322, iteration: 343053
loss: 1.0103754997253418,grad_norm: 0.9999995914379538, iteration: 343054
loss: 0.978773295879364,grad_norm: 0.7281526120632219, iteration: 343055
loss: 0.9930446743965149,grad_norm: 0.8773864128216101, iteration: 343056
loss: 0.9863978028297424,grad_norm: 0.9999990660898026, iteration: 343057
loss: 1.0135178565979004,grad_norm: 0.6535095931057191, iteration: 343058
loss: 0.9592987895011902,grad_norm: 0.940696352439776, iteration: 343059
loss: 1.0310125350952148,grad_norm: 0.7574680663271106, iteration: 343060
loss: 0.9798331260681152,grad_norm: 0.8129708743939441, iteration: 343061
loss: 1.0702966451644897,grad_norm: 0.9025216761114233, iteration: 343062
loss: 1.0071343183517456,grad_norm: 0.6817591675639287, iteration: 343063
loss: 0.9992640018463135,grad_norm: 0.7884385092891683, iteration: 343064
loss: 0.9989975690841675,grad_norm: 0.8089226251359406, iteration: 343065
loss: 1.0120418071746826,grad_norm: 0.8901901611734269, iteration: 343066
loss: 0.9661129117012024,grad_norm: 0.8833258271200577, iteration: 343067
loss: 0.9616169333457947,grad_norm: 0.9898594963943587, iteration: 343068
loss: 0.9871357083320618,grad_norm: 0.9220606336863074, iteration: 343069
loss: 0.9638429284095764,grad_norm: 0.742094529301156, iteration: 343070
loss: 1.0050239562988281,grad_norm: 0.9999993876277181, iteration: 343071
loss: 0.9867374300956726,grad_norm: 0.7758105494321811, iteration: 343072
loss: 1.0404343605041504,grad_norm: 0.9999991115648721, iteration: 343073
loss: 1.0024964809417725,grad_norm: 0.872652440971039, iteration: 343074
loss: 1.0301564931869507,grad_norm: 0.8944177891415355, iteration: 343075
loss: 1.0287046432495117,grad_norm: 0.9666785515041777, iteration: 343076
loss: 0.9940393567085266,grad_norm: 0.9096513111951393, iteration: 343077
loss: 1.037771463394165,grad_norm: 0.9999993338346654, iteration: 343078
loss: 1.023769736289978,grad_norm: 0.9622650950670351, iteration: 343079
loss: 0.9993652105331421,grad_norm: 0.8682680283864808, iteration: 343080
loss: 0.9813750982284546,grad_norm: 0.8618810313301578, iteration: 343081
loss: 0.9729790687561035,grad_norm: 0.8843524187683329, iteration: 343082
loss: 1.030272126197815,grad_norm: 0.8580808325730601, iteration: 343083
loss: 0.9833753705024719,grad_norm: 0.7820413759166847, iteration: 343084
loss: 0.966422975063324,grad_norm: 0.9451462435897514, iteration: 343085
loss: 0.9930574297904968,grad_norm: 0.9999991238119572, iteration: 343086
loss: 1.0074739456176758,grad_norm: 0.8528184254621817, iteration: 343087
loss: 1.017946481704712,grad_norm: 0.8104204396809271, iteration: 343088
loss: 0.9629414081573486,grad_norm: 0.7609371991193665, iteration: 343089
loss: 0.9965466856956482,grad_norm: 0.7396043579823358, iteration: 343090
loss: 1.0024534463882446,grad_norm: 0.8211616549432309, iteration: 343091
loss: 0.9906022548675537,grad_norm: 0.8318752569305711, iteration: 343092
loss: 1.0145024061203003,grad_norm: 0.6633938756821411, iteration: 343093
loss: 0.9749727845191956,grad_norm: 0.999999122035153, iteration: 343094
loss: 1.0108393430709839,grad_norm: 0.7241322558203117, iteration: 343095
loss: 0.9959071278572083,grad_norm: 0.6219632420745886, iteration: 343096
loss: 0.997490644454956,grad_norm: 0.7497166467744417, iteration: 343097
loss: 0.976273238658905,grad_norm: 0.741702678231194, iteration: 343098
loss: 0.9966660737991333,grad_norm: 0.8955812657853102, iteration: 343099
loss: 1.0117294788360596,grad_norm: 0.7501239939314824, iteration: 343100
loss: 1.0223708152770996,grad_norm: 0.7571648524019968, iteration: 343101
loss: 1.0134305953979492,grad_norm: 0.9221388261407246, iteration: 343102
loss: 1.063076376914978,grad_norm: 0.8231729700976044, iteration: 343103
loss: 0.998293936252594,grad_norm: 0.8905903490717832, iteration: 343104
loss: 0.9974532723426819,grad_norm: 0.9739366091172913, iteration: 343105
loss: 0.9905228018760681,grad_norm: 0.9179304058977502, iteration: 343106
loss: 1.0016088485717773,grad_norm: 0.8586475618555716, iteration: 343107
loss: 1.020675539970398,grad_norm: 0.9439751371975067, iteration: 343108
loss: 1.0363198518753052,grad_norm: 0.7909242534729438, iteration: 343109
loss: 0.9913133382797241,grad_norm: 0.7469111765497238, iteration: 343110
loss: 0.991593062877655,grad_norm: 0.8895233740087362, iteration: 343111
loss: 1.0202980041503906,grad_norm: 0.999999129192674, iteration: 343112
loss: 1.024125099182129,grad_norm: 0.7321139642077872, iteration: 343113
loss: 1.0121294260025024,grad_norm: 0.8194806884303584, iteration: 343114
loss: 0.9913467168807983,grad_norm: 0.9999994039075106, iteration: 343115
loss: 1.005197286605835,grad_norm: 0.8699965697103536, iteration: 343116
loss: 0.9833070635795593,grad_norm: 0.9604432297317612, iteration: 343117
loss: 1.0243592262268066,grad_norm: 0.9999998687724109, iteration: 343118
loss: 1.0034867525100708,grad_norm: 0.9999990208335799, iteration: 343119
loss: 0.9853113293647766,grad_norm: 0.7888590805115749, iteration: 343120
loss: 1.0082870721817017,grad_norm: 0.7211820078576261, iteration: 343121
loss: 0.9648004174232483,grad_norm: 0.880452067231405, iteration: 343122
loss: 0.9813005924224854,grad_norm: 0.8026890061320973, iteration: 343123
loss: 0.9898474812507629,grad_norm: 0.8565202071470092, iteration: 343124
loss: 0.969563364982605,grad_norm: 0.816012845978288, iteration: 343125
loss: 1.0205111503601074,grad_norm: 0.8624925263086543, iteration: 343126
loss: 1.059658169746399,grad_norm: 0.8824224072558305, iteration: 343127
loss: 1.0221635103225708,grad_norm: 0.7814693119272277, iteration: 343128
loss: 1.0100077390670776,grad_norm: 0.9133065258540503, iteration: 343129
loss: 1.0187395811080933,grad_norm: 0.7642626820086486, iteration: 343130
loss: 1.0108156204223633,grad_norm: 0.8103083538358021, iteration: 343131
loss: 0.9582892656326294,grad_norm: 0.898910470827895, iteration: 343132
loss: 0.9735724925994873,grad_norm: 0.7510054827786249, iteration: 343133
loss: 0.9833824038505554,grad_norm: 0.9999990058976784, iteration: 343134
loss: 1.020121693611145,grad_norm: 0.9590485864549395, iteration: 343135
loss: 1.0164114236831665,grad_norm: 0.8418106360215295, iteration: 343136
loss: 1.0147111415863037,grad_norm: 0.96716128146163, iteration: 343137
loss: 1.0230228900909424,grad_norm: 0.9999992955385238, iteration: 343138
loss: 1.0062187910079956,grad_norm: 0.9674527578564229, iteration: 343139
loss: 1.0004501342773438,grad_norm: 0.9999989934060007, iteration: 343140
loss: 1.03383207321167,grad_norm: 0.999999634915484, iteration: 343141
loss: 1.0162030458450317,grad_norm: 0.6534564474313737, iteration: 343142
loss: 1.0037227869033813,grad_norm: 0.9999998138113113, iteration: 343143
loss: 1.014874815940857,grad_norm: 0.9062510451142654, iteration: 343144
loss: 0.9898546934127808,grad_norm: 0.9481281786790583, iteration: 343145
loss: 1.0042604207992554,grad_norm: 0.7067897201549244, iteration: 343146
loss: 1.0125749111175537,grad_norm: 0.9449278369624949, iteration: 343147
loss: 0.9906792640686035,grad_norm: 0.9389305305872891, iteration: 343148
loss: 1.007421612739563,grad_norm: 0.9466619406811984, iteration: 343149
loss: 1.012512445449829,grad_norm: 0.892160772008611, iteration: 343150
loss: 1.0416572093963623,grad_norm: 0.9418631886732601, iteration: 343151
loss: 1.0067532062530518,grad_norm: 0.799829434298082, iteration: 343152
loss: 1.0078641176223755,grad_norm: 0.8143709125751587, iteration: 343153
loss: 0.980580747127533,grad_norm: 0.8344382933515467, iteration: 343154
loss: 0.9879350662231445,grad_norm: 0.7498473326939886, iteration: 343155
loss: 0.9723673462867737,grad_norm: 0.7453546563643985, iteration: 343156
loss: 0.9872992634773254,grad_norm: 0.8299451936474528, iteration: 343157
loss: 0.9966638684272766,grad_norm: 0.8142091140901707, iteration: 343158
loss: 1.063624382019043,grad_norm: 0.9999995574985104, iteration: 343159
loss: 0.9899856448173523,grad_norm: 0.7425841161865522, iteration: 343160
loss: 1.0075596570968628,grad_norm: 0.8954108535567853, iteration: 343161
loss: 0.9901999831199646,grad_norm: 0.8648414106366782, iteration: 343162
loss: 1.0234239101409912,grad_norm: 0.9214516092169789, iteration: 343163
loss: 1.0357496738433838,grad_norm: 0.9999990838525469, iteration: 343164
loss: 0.991255521774292,grad_norm: 0.8978139534584414, iteration: 343165
loss: 1.006454348564148,grad_norm: 0.8357852506256696, iteration: 343166
loss: 1.003300428390503,grad_norm: 0.9999996989777235, iteration: 343167
loss: 0.9752241373062134,grad_norm: 0.8078736756451801, iteration: 343168
loss: 1.0045764446258545,grad_norm: 0.9999994185683062, iteration: 343169
loss: 1.0090769529342651,grad_norm: 0.8351475980779207, iteration: 343170
loss: 0.9811616539955139,grad_norm: 0.8475833852079558, iteration: 343171
loss: 0.9694311022758484,grad_norm: 0.7026896080001398, iteration: 343172
loss: 1.0560349225997925,grad_norm: 0.9999996384330498, iteration: 343173
loss: 0.9900631308555603,grad_norm: 0.7539287161824968, iteration: 343174
loss: 0.9972696900367737,grad_norm: 0.9999995143094902, iteration: 343175
loss: 1.015015959739685,grad_norm: 0.949624399466861, iteration: 343176
loss: 1.0787365436553955,grad_norm: 0.9999992049097921, iteration: 343177
loss: 1.005715012550354,grad_norm: 0.8731460282657241, iteration: 343178
loss: 1.0049335956573486,grad_norm: 0.8882613232730363, iteration: 343179
loss: 1.011778712272644,grad_norm: 0.7953535094345515, iteration: 343180
loss: 0.994843602180481,grad_norm: 0.7832653617528735, iteration: 343181
loss: 1.0049636363983154,grad_norm: 0.9999992705365193, iteration: 343182
loss: 1.1757712364196777,grad_norm: 0.9999998417773628, iteration: 343183
loss: 0.9688345789909363,grad_norm: 0.8880873483618967, iteration: 343184
loss: 1.009321928024292,grad_norm: 0.9999992111627887, iteration: 343185
loss: 1.0536201000213623,grad_norm: 0.9999994278983146, iteration: 343186
loss: 0.9918888807296753,grad_norm: 0.7308278844144798, iteration: 343187
loss: 0.9797477126121521,grad_norm: 0.9423603183547908, iteration: 343188
loss: 0.9741420745849609,grad_norm: 0.8512278603161251, iteration: 343189
loss: 1.0042438507080078,grad_norm: 0.9007057726472332, iteration: 343190
loss: 1.0293827056884766,grad_norm: 0.7241364416161078, iteration: 343191
loss: 0.9846847653388977,grad_norm: 0.8166198312519857, iteration: 343192
loss: 0.988489031791687,grad_norm: 0.8247235023403673, iteration: 343193
loss: 0.9803317189216614,grad_norm: 0.8026381902420043, iteration: 343194
loss: 0.9905417561531067,grad_norm: 0.9016969938121293, iteration: 343195
loss: 1.026474118232727,grad_norm: 0.9999997329916496, iteration: 343196
loss: 1.018479347229004,grad_norm: 0.8763084179540227, iteration: 343197
loss: 1.0020815134048462,grad_norm: 0.7124257260049439, iteration: 343198
loss: 0.9564971923828125,grad_norm: 0.857809703178259, iteration: 343199
loss: 1.01224946975708,grad_norm: 0.9999996117540789, iteration: 343200
loss: 1.0394549369812012,grad_norm: 0.8284937115997443, iteration: 343201
loss: 1.0238611698150635,grad_norm: 0.9999991144524558, iteration: 343202
loss: 1.0122472047805786,grad_norm: 0.6962920896051908, iteration: 343203
loss: 0.9717252254486084,grad_norm: 0.7011994440176649, iteration: 343204
loss: 0.9865737557411194,grad_norm: 0.8282865916842552, iteration: 343205
loss: 1.0420446395874023,grad_norm: 0.8228230152370326, iteration: 343206
loss: 1.0032389163970947,grad_norm: 0.7253773401236605, iteration: 343207
loss: 0.971522331237793,grad_norm: 0.9195233675700661, iteration: 343208
loss: 1.0315570831298828,grad_norm: 0.9999990592311409, iteration: 343209
loss: 0.9955223202705383,grad_norm: 0.6727901680853903, iteration: 343210
loss: 1.0418884754180908,grad_norm: 0.9222480771345598, iteration: 343211
loss: 1.0714261531829834,grad_norm: 0.9999993597857363, iteration: 343212
loss: 0.9857035279273987,grad_norm: 0.8912774988422184, iteration: 343213
loss: 0.9936110973358154,grad_norm: 0.7834668649813633, iteration: 343214
loss: 1.0002763271331787,grad_norm: 0.795560447536765, iteration: 343215
loss: 0.9626573920249939,grad_norm: 0.7800135387689163, iteration: 343216
loss: 1.0068625211715698,grad_norm: 0.9238168801907868, iteration: 343217
loss: 0.9870056509971619,grad_norm: 0.9802085633319303, iteration: 343218
loss: 1.0172065496444702,grad_norm: 0.8201523221463145, iteration: 343219
loss: 1.0440669059753418,grad_norm: 0.9999993654398986, iteration: 343220
loss: 1.0342546701431274,grad_norm: 0.7401642195520833, iteration: 343221
loss: 0.9864627122879028,grad_norm: 0.7860344298829975, iteration: 343222
loss: 0.9850462675094604,grad_norm: 0.7962878777005091, iteration: 343223
loss: 1.0968868732452393,grad_norm: 0.8630648553447403, iteration: 343224
loss: 1.0206295251846313,grad_norm: 0.8302536011900452, iteration: 343225
loss: 1.0432541370391846,grad_norm: 0.9999999782472211, iteration: 343226
loss: 1.0738664865493774,grad_norm: 0.847236783652378, iteration: 343227
loss: 1.0371944904327393,grad_norm: 0.7860206231205884, iteration: 343228
loss: 0.9629420638084412,grad_norm: 0.8509835867510306, iteration: 343229
loss: 1.0388531684875488,grad_norm: 0.9999991194846445, iteration: 343230
loss: 1.0076940059661865,grad_norm: 0.8261472011151733, iteration: 343231
loss: 1.0447394847869873,grad_norm: 0.796680995104392, iteration: 343232
loss: 1.03118896484375,grad_norm: 0.999999942242872, iteration: 343233
loss: 0.9957929253578186,grad_norm: 0.7699279690053974, iteration: 343234
loss: 0.957512378692627,grad_norm: 0.7597896730813652, iteration: 343235
loss: 0.9885052442550659,grad_norm: 0.8667966055974452, iteration: 343236
loss: 0.9799128770828247,grad_norm: 0.9999991492027651, iteration: 343237
loss: 0.9949503540992737,grad_norm: 0.6678120115113845, iteration: 343238
loss: 1.009442925453186,grad_norm: 0.999999693819818, iteration: 343239
loss: 0.9536442160606384,grad_norm: 0.9309161521099064, iteration: 343240
loss: 1.0332204103469849,grad_norm: 0.894525981714231, iteration: 343241
loss: 1.0002679824829102,grad_norm: 0.7900687845975785, iteration: 343242
loss: 0.9940582513809204,grad_norm: 0.7961509560035602, iteration: 343243
loss: 0.9932395219802856,grad_norm: 0.9626669800642307, iteration: 343244
loss: 1.0043631792068481,grad_norm: 0.9634926424653287, iteration: 343245
loss: 0.9919516444206238,grad_norm: 0.8036682911386211, iteration: 343246
loss: 0.9993208050727844,grad_norm: 0.9999991241209286, iteration: 343247
loss: 1.0168735980987549,grad_norm: 0.705462950160099, iteration: 343248
loss: 1.0168129205703735,grad_norm: 0.7899612352567474, iteration: 343249
loss: 1.0168150663375854,grad_norm: 0.9999998413442183, iteration: 343250
loss: 0.9949007034301758,grad_norm: 0.9999992029719532, iteration: 343251
loss: 1.0199528932571411,grad_norm: 0.9999991467127006, iteration: 343252
loss: 1.0166536569595337,grad_norm: 0.8617211951921349, iteration: 343253
loss: 1.076452612876892,grad_norm: 0.8832709080920489, iteration: 343254
loss: 1.1207886934280396,grad_norm: 0.9999997832537396, iteration: 343255
loss: 1.0544471740722656,grad_norm: 0.726135732437313, iteration: 343256
loss: 1.0047614574432373,grad_norm: 0.9795820491103097, iteration: 343257
loss: 0.9780844449996948,grad_norm: 0.8461330168967744, iteration: 343258
loss: 0.9972754716873169,grad_norm: 0.915307600445368, iteration: 343259
loss: 1.027130126953125,grad_norm: 0.7126381526200093, iteration: 343260
loss: 0.9995956420898438,grad_norm: 0.9999990741246655, iteration: 343261
loss: 1.0111851692199707,grad_norm: 0.9999990734547975, iteration: 343262
loss: 1.0764684677124023,grad_norm: 0.9999998371570279, iteration: 343263
loss: 1.0094635486602783,grad_norm: 0.7122632729696214, iteration: 343264
loss: 1.0452489852905273,grad_norm: 0.965412725507233, iteration: 343265
loss: 0.9753268361091614,grad_norm: 0.9500223645282166, iteration: 343266
loss: 0.9616249799728394,grad_norm: 0.7185315936755957, iteration: 343267
loss: 1.032874345779419,grad_norm: 0.9225542338444128, iteration: 343268
loss: 1.0586559772491455,grad_norm: 0.7971361898786835, iteration: 343269
loss: 0.9836109280586243,grad_norm: 0.7900670687459792, iteration: 343270
loss: 1.0368174314498901,grad_norm: 0.7822857840527982, iteration: 343271
loss: 0.9831079244613647,grad_norm: 0.8493464744287016, iteration: 343272
loss: 0.9853323101997375,grad_norm: 0.8882788114709158, iteration: 343273
loss: 1.022139072418213,grad_norm: 0.7612957530698297, iteration: 343274
loss: 1.0186082124710083,grad_norm: 0.9999997749334728, iteration: 343275
loss: 0.9830543994903564,grad_norm: 0.6786730142723121, iteration: 343276
loss: 1.0354835987091064,grad_norm: 0.999999375368275, iteration: 343277
loss: 1.0181382894515991,grad_norm: 0.8768215416323096, iteration: 343278
loss: 0.9870541095733643,grad_norm: 0.8657519198888257, iteration: 343279
loss: 0.9838254451751709,grad_norm: 0.747223139382688, iteration: 343280
loss: 1.042822241783142,grad_norm: 0.9999997370418663, iteration: 343281
loss: 0.9663596749305725,grad_norm: 0.7185166283015993, iteration: 343282
loss: 0.9808727502822876,grad_norm: 0.8073261454936288, iteration: 343283
loss: 1.007319450378418,grad_norm: 0.9336923513872685, iteration: 343284
loss: 1.01813542842865,grad_norm: 0.8382351099019919, iteration: 343285
loss: 1.0286226272583008,grad_norm: 0.7185624037527233, iteration: 343286
loss: 0.9815928936004639,grad_norm: 0.8506662493520176, iteration: 343287
loss: 1.0043749809265137,grad_norm: 0.7291856151096107, iteration: 343288
loss: 1.0151492357254028,grad_norm: 0.7335995724575156, iteration: 343289
loss: 1.2019915580749512,grad_norm: 0.9999998438150931, iteration: 343290
loss: 1.0208014249801636,grad_norm: 0.9999997860110758, iteration: 343291
loss: 1.001463532447815,grad_norm: 0.9999991922999791, iteration: 343292
loss: 1.0139883756637573,grad_norm: 0.8294504424544297, iteration: 343293
loss: 1.0001195669174194,grad_norm: 0.917926621989563, iteration: 343294
loss: 0.9848705530166626,grad_norm: 0.7926800206635566, iteration: 343295
loss: 1.0259915590286255,grad_norm: 0.8798670835984072, iteration: 343296
loss: 0.9736728072166443,grad_norm: 0.7757647264883456, iteration: 343297
loss: 0.9783284664154053,grad_norm: 0.8336201260067045, iteration: 343298
loss: 1.036637783050537,grad_norm: 0.919282600617169, iteration: 343299
loss: 1.0269391536712646,grad_norm: 0.999999918622992, iteration: 343300
loss: 1.0014123916625977,grad_norm: 0.9359768265539545, iteration: 343301
loss: 1.0200637578964233,grad_norm: 0.7662333565563204, iteration: 343302
loss: 0.9899616241455078,grad_norm: 0.7418761392526945, iteration: 343303
loss: 1.0189800262451172,grad_norm: 0.6821879789342201, iteration: 343304
loss: 1.0191045999526978,grad_norm: 0.747289408264086, iteration: 343305
loss: 0.9858801364898682,grad_norm: 0.9999991437297444, iteration: 343306
loss: 0.9539072513580322,grad_norm: 0.7995841642095165, iteration: 343307
loss: 0.9996933937072754,grad_norm: 0.9014273490685507, iteration: 343308
loss: 1.0167070627212524,grad_norm: 0.9514992018334015, iteration: 343309
loss: 1.0069918632507324,grad_norm: 0.8789578310415018, iteration: 343310
loss: 1.0380357503890991,grad_norm: 0.8668920318674973, iteration: 343311
loss: 1.0054594278335571,grad_norm: 0.8118680954966759, iteration: 343312
loss: 0.9503356218338013,grad_norm: 0.9192762350161591, iteration: 343313
loss: 0.9946983456611633,grad_norm: 0.7857864233307933, iteration: 343314
loss: 0.9662496447563171,grad_norm: 0.9050230888771501, iteration: 343315
loss: 1.0124354362487793,grad_norm: 0.8263371729016519, iteration: 343316
loss: 1.128013253211975,grad_norm: 0.999999175356645, iteration: 343317
loss: 0.9856476187705994,grad_norm: 0.7003705808215074, iteration: 343318
loss: 0.9677527546882629,grad_norm: 0.806754359595821, iteration: 343319
loss: 0.9977596998214722,grad_norm: 0.9999990508957176, iteration: 343320
loss: 0.986335039138794,grad_norm: 0.8141189555080341, iteration: 343321
loss: 1.023766279220581,grad_norm: 0.8439870831521744, iteration: 343322
loss: 1.0205215215682983,grad_norm: 0.8586719498756382, iteration: 343323
loss: 0.9930217862129211,grad_norm: 0.7872965803011123, iteration: 343324
loss: 0.9926785826683044,grad_norm: 0.808183970469261, iteration: 343325
loss: 0.9865573048591614,grad_norm: 0.8061217460762061, iteration: 343326
loss: 1.0046896934509277,grad_norm: 0.8317178629980732, iteration: 343327
loss: 1.0248719453811646,grad_norm: 0.7750903587991121, iteration: 343328
loss: 0.9522280693054199,grad_norm: 0.7682887459243111, iteration: 343329
loss: 0.9737588167190552,grad_norm: 0.7390663453235643, iteration: 343330
loss: 1.0483167171478271,grad_norm: 0.9999991351884328, iteration: 343331
loss: 1.035078763961792,grad_norm: 0.7789866783812712, iteration: 343332
loss: 0.9962401986122131,grad_norm: 0.7936470976266154, iteration: 343333
loss: 1.0290136337280273,grad_norm: 0.999999480662898, iteration: 343334
loss: 1.0009480714797974,grad_norm: 0.7715780045589132, iteration: 343335
loss: 0.9828277826309204,grad_norm: 0.8617268457964709, iteration: 343336
loss: 1.0258293151855469,grad_norm: 0.8105239039546365, iteration: 343337
loss: 1.0092803239822388,grad_norm: 0.8608286791281216, iteration: 343338
loss: 1.0179071426391602,grad_norm: 0.8335334375758189, iteration: 343339
loss: 1.0165599584579468,grad_norm: 0.7345897920978713, iteration: 343340
loss: 1.0209635496139526,grad_norm: 0.9999995198456852, iteration: 343341
loss: 1.0194562673568726,grad_norm: 0.7856982309718595, iteration: 343342
loss: 1.0376605987548828,grad_norm: 0.9999994777732418, iteration: 343343
loss: 1.0226690769195557,grad_norm: 0.9999997477752344, iteration: 343344
loss: 0.9713006615638733,grad_norm: 0.7051728811480229, iteration: 343345
loss: 1.024848461151123,grad_norm: 0.9999992097558467, iteration: 343346
loss: 0.9973825812339783,grad_norm: 0.7875042576132153, iteration: 343347
loss: 1.0266528129577637,grad_norm: 0.80712870974439, iteration: 343348
loss: 0.9639297723770142,grad_norm: 0.9543248200937151, iteration: 343349
loss: 0.9415180683135986,grad_norm: 0.9240102924980905, iteration: 343350
loss: 0.9763253331184387,grad_norm: 0.7232493118670955, iteration: 343351
loss: 0.9934440851211548,grad_norm: 0.7531573483959464, iteration: 343352
loss: 1.0666685104370117,grad_norm: 0.9999993862722615, iteration: 343353
loss: 1.0030357837677002,grad_norm: 0.8011629613999666, iteration: 343354
loss: 0.982383131980896,grad_norm: 0.8227460753936682, iteration: 343355
loss: 1.000315546989441,grad_norm: 0.8893524097174975, iteration: 343356
loss: 0.9966496229171753,grad_norm: 0.7666678217296164, iteration: 343357
loss: 0.9840223789215088,grad_norm: 0.8764303522937428, iteration: 343358
loss: 0.9939979314804077,grad_norm: 0.8404397169393678, iteration: 343359
loss: 1.0730323791503906,grad_norm: 0.9999991731502506, iteration: 343360
loss: 1.006328821182251,grad_norm: 0.845550725884932, iteration: 343361
loss: 1.0147379636764526,grad_norm: 0.9999989655849206, iteration: 343362
loss: 1.0119937658309937,grad_norm: 0.8326973119894243, iteration: 343363
loss: 1.0242164134979248,grad_norm: 0.8406724749484542, iteration: 343364
loss: 1.05647873878479,grad_norm: 0.9999996817933906, iteration: 343365
loss: 1.0055257081985474,grad_norm: 0.9999997437552847, iteration: 343366
loss: 0.9952613711357117,grad_norm: 0.9999991283458313, iteration: 343367
loss: 0.9926236271858215,grad_norm: 0.9246772795017588, iteration: 343368
loss: 0.9553451538085938,grad_norm: 0.8204451499846115, iteration: 343369
loss: 0.9694281220436096,grad_norm: 0.8009782480694477, iteration: 343370
loss: 1.0062593221664429,grad_norm: 0.8409239362746087, iteration: 343371
loss: 1.0159720182418823,grad_norm: 0.9879093382675529, iteration: 343372
loss: 0.9796055555343628,grad_norm: 0.796626749445519, iteration: 343373
loss: 0.9885163307189941,grad_norm: 0.8789138102975205, iteration: 343374
loss: 0.9890382289886475,grad_norm: 0.6803146712200843, iteration: 343375
loss: 0.9816290140151978,grad_norm: 0.8528185870427867, iteration: 343376
loss: 0.9995652437210083,grad_norm: 0.8553915899440383, iteration: 343377
loss: 1.0080145597457886,grad_norm: 0.8369997432338541, iteration: 343378
loss: 0.9669742584228516,grad_norm: 0.8216851859250883, iteration: 343379
loss: 0.9929096102714539,grad_norm: 0.9409955186901635, iteration: 343380
loss: 0.9559893608093262,grad_norm: 0.8250725337083779, iteration: 343381
loss: 1.0309525728225708,grad_norm: 0.6112835544750684, iteration: 343382
loss: 1.007607340812683,grad_norm: 0.7307337918168556, iteration: 343383
loss: 1.0063644647598267,grad_norm: 0.7649411802557322, iteration: 343384
loss: 0.9471037983894348,grad_norm: 0.8425259383698573, iteration: 343385
loss: 0.9811627268791199,grad_norm: 0.7293964208633008, iteration: 343386
loss: 0.9865192770957947,grad_norm: 0.7852993132306858, iteration: 343387
loss: 0.98260098695755,grad_norm: 0.7479387334296984, iteration: 343388
loss: 0.9764197468757629,grad_norm: 0.7640489966038019, iteration: 343389
loss: 0.9968658089637756,grad_norm: 0.9971050434791116, iteration: 343390
loss: 1.0246162414550781,grad_norm: 0.9010041319812426, iteration: 343391
loss: 1.0012222528457642,grad_norm: 0.7202908127108746, iteration: 343392
loss: 0.9802020788192749,grad_norm: 0.9999991312494253, iteration: 343393
loss: 1.0055522918701172,grad_norm: 0.904039821788441, iteration: 343394
loss: 1.000852346420288,grad_norm: 0.8876645920092381, iteration: 343395
loss: 0.9579603672027588,grad_norm: 0.9233214429871242, iteration: 343396
loss: 1.014212727546692,grad_norm: 0.7733800420974188, iteration: 343397
loss: 0.9920172095298767,grad_norm: 0.8719429286307276, iteration: 343398
loss: 1.0178310871124268,grad_norm: 0.7533687276097827, iteration: 343399
loss: 0.9553911089897156,grad_norm: 0.8163785512945168, iteration: 343400
loss: 1.026171326637268,grad_norm: 0.8274585126960657, iteration: 343401
loss: 0.9867797493934631,grad_norm: 0.8121765911864777, iteration: 343402
loss: 0.9732968211174011,grad_norm: 0.8498739895856868, iteration: 343403
loss: 0.9851994514465332,grad_norm: 0.93048684348982, iteration: 343404
loss: 1.0236849784851074,grad_norm: 0.8363405911506524, iteration: 343405
loss: 0.9698740839958191,grad_norm: 0.867073913529387, iteration: 343406
loss: 0.9677512049674988,grad_norm: 0.9328375719966235, iteration: 343407
loss: 0.9896238446235657,grad_norm: 0.9999994683649844, iteration: 343408
loss: 1.0147228240966797,grad_norm: 0.8325488285083369, iteration: 343409
loss: 0.9946203827857971,grad_norm: 0.9824141280874703, iteration: 343410
loss: 0.998779296875,grad_norm: 0.9999992391601857, iteration: 343411
loss: 0.9857020378112793,grad_norm: 0.7679917102895005, iteration: 343412
loss: 1.0041775703430176,grad_norm: 0.780080829690614, iteration: 343413
loss: 1.0288161039352417,grad_norm: 0.7525468146856341, iteration: 343414
loss: 0.9896199107170105,grad_norm: 0.847141590667807, iteration: 343415
loss: 1.0309875011444092,grad_norm: 0.9613202125394678, iteration: 343416
loss: 1.0305969715118408,grad_norm: 0.8725533005920918, iteration: 343417
loss: 1.02451753616333,grad_norm: 0.6850225280321013, iteration: 343418
loss: 1.0204923152923584,grad_norm: 0.7435115531459776, iteration: 343419
loss: 1.0134333372116089,grad_norm: 0.9065078638166716, iteration: 343420
loss: 0.9813432693481445,grad_norm: 0.9999991800020295, iteration: 343421
loss: 0.9836318492889404,grad_norm: 0.9999990381970444, iteration: 343422
loss: 1.010607123374939,grad_norm: 0.8600376163439013, iteration: 343423
loss: 1.019881010055542,grad_norm: 0.853453817514378, iteration: 343424
loss: 1.0208901166915894,grad_norm: 0.8924094641303025, iteration: 343425
loss: 1.062519907951355,grad_norm: 0.9999993238417005, iteration: 343426
loss: 1.0144331455230713,grad_norm: 0.9201383262768554, iteration: 343427
loss: 1.0244226455688477,grad_norm: 0.6864763069064165, iteration: 343428
loss: 0.9887782335281372,grad_norm: 0.7638332980799719, iteration: 343429
loss: 0.9757708311080933,grad_norm: 0.9999995684551427, iteration: 343430
loss: 0.9571479558944702,grad_norm: 0.7702804798653998, iteration: 343431
loss: 1.013047695159912,grad_norm: 0.7727316225662473, iteration: 343432
loss: 1.0737255811691284,grad_norm: 0.9999998664069797, iteration: 343433
loss: 1.0319277048110962,grad_norm: 0.9999995639903136, iteration: 343434
loss: 1.0259020328521729,grad_norm: 0.8491397548060378, iteration: 343435
loss: 0.9740307331085205,grad_norm: 0.9280282801183013, iteration: 343436
loss: 1.0172243118286133,grad_norm: 0.862923801520783, iteration: 343437
loss: 0.9764866828918457,grad_norm: 0.7710208329171067, iteration: 343438
loss: 1.003302812576294,grad_norm: 0.9869441073175153, iteration: 343439
loss: 0.9999113082885742,grad_norm: 0.7699624347716807, iteration: 343440
loss: 1.015380620956421,grad_norm: 0.8007122465167875, iteration: 343441
loss: 0.994968831539154,grad_norm: 0.9999991722536371, iteration: 343442
loss: 0.9846807718276978,grad_norm: 0.8741554734932457, iteration: 343443
loss: 1.0246608257293701,grad_norm: 0.7980183107353401, iteration: 343444
loss: 1.0208313465118408,grad_norm: 0.9137772981379292, iteration: 343445
loss: 0.9715530872344971,grad_norm: 0.9166489570889361, iteration: 343446
loss: 0.9902569055557251,grad_norm: 0.9095036778878457, iteration: 343447
loss: 1.0040937662124634,grad_norm: 0.892496735494994, iteration: 343448
loss: 0.9687963724136353,grad_norm: 0.7927999711395894, iteration: 343449
loss: 1.0209522247314453,grad_norm: 0.9999991845313553, iteration: 343450
loss: 1.0126668214797974,grad_norm: 0.9999991141431478, iteration: 343451
loss: 1.0814194679260254,grad_norm: 0.9999998750838055, iteration: 343452
loss: 0.9815531373023987,grad_norm: 0.8463044832416083, iteration: 343453
loss: 0.9844878315925598,grad_norm: 0.7864396773966283, iteration: 343454
loss: 0.9990870952606201,grad_norm: 0.9057746972894763, iteration: 343455
loss: 0.9920079112052917,grad_norm: 0.7804758655401097, iteration: 343456
loss: 1.0469039678573608,grad_norm: 0.999999880828444, iteration: 343457
loss: 1.016254186630249,grad_norm: 0.9999993951482736, iteration: 343458
loss: 1.027323842048645,grad_norm: 0.8038175995810427, iteration: 343459
loss: 1.0202350616455078,grad_norm: 0.9432306075485432, iteration: 343460
loss: 0.9956666827201843,grad_norm: 0.6677206987572792, iteration: 343461
loss: 0.986819863319397,grad_norm: 0.8273923733423221, iteration: 343462
loss: 0.9972301125526428,grad_norm: 0.9624600816757695, iteration: 343463
loss: 0.9986334443092346,grad_norm: 0.8285504835279004, iteration: 343464
loss: 1.0518946647644043,grad_norm: 0.8818783110891961, iteration: 343465
loss: 0.9670662879943848,grad_norm: 0.7756124469165455, iteration: 343466
loss: 1.0400694608688354,grad_norm: 0.7365048266379989, iteration: 343467
loss: 0.985837459564209,grad_norm: 0.7473538312329447, iteration: 343468
loss: 1.0022220611572266,grad_norm: 0.7625348409568969, iteration: 343469
loss: 1.0267727375030518,grad_norm: 0.9359171479028215, iteration: 343470
loss: 1.0158164501190186,grad_norm: 0.8722336198368319, iteration: 343471
loss: 1.0113226175308228,grad_norm: 0.8782169773793654, iteration: 343472
loss: 1.0149019956588745,grad_norm: 0.999999025491171, iteration: 343473
loss: 0.9876874685287476,grad_norm: 0.8435279745676916, iteration: 343474
loss: 1.0222251415252686,grad_norm: 0.9999994112799381, iteration: 343475
loss: 1.0216867923736572,grad_norm: 0.7121393031298274, iteration: 343476
loss: 1.0459398031234741,grad_norm: 0.8794951619717453, iteration: 343477
loss: 1.0187125205993652,grad_norm: 0.7588257622508903, iteration: 343478
loss: 0.9749918580055237,grad_norm: 0.8249113293983563, iteration: 343479
loss: 0.9930652379989624,grad_norm: 0.9128919826277933, iteration: 343480
loss: 1.0143587589263916,grad_norm: 0.7355401957014401, iteration: 343481
loss: 1.0275176763534546,grad_norm: 0.8023650617595363, iteration: 343482
loss: 0.9980984330177307,grad_norm: 0.8084488066330104, iteration: 343483
loss: 1.0054786205291748,grad_norm: 0.8887450403249475, iteration: 343484
loss: 1.012411117553711,grad_norm: 0.9999992341691999, iteration: 343485
loss: 0.9698835611343384,grad_norm: 0.8575483517760336, iteration: 343486
loss: 0.9829902648925781,grad_norm: 0.9030681561755769, iteration: 343487
loss: 1.0255919694900513,grad_norm: 0.6487599512629724, iteration: 343488
loss: 0.9921685457229614,grad_norm: 0.9999992387658526, iteration: 343489
loss: 1.011374592781067,grad_norm: 0.9999990498446688, iteration: 343490
loss: 1.0029221773147583,grad_norm: 0.9010392731896866, iteration: 343491
loss: 1.055423617362976,grad_norm: 0.9999993137684381, iteration: 343492
loss: 0.9963870048522949,grad_norm: 0.8322361263847032, iteration: 343493
loss: 0.970206618309021,grad_norm: 0.7658421737838114, iteration: 343494
loss: 0.9952024221420288,grad_norm: 0.7896024018864829, iteration: 343495
loss: 1.053053855895996,grad_norm: 0.8905517549301813, iteration: 343496
loss: 1.035649061203003,grad_norm: 0.9999991082650348, iteration: 343497
loss: 0.9979807138442993,grad_norm: 0.8133804457198163, iteration: 343498
loss: 1.0169066190719604,grad_norm: 0.7752432216795874, iteration: 343499
loss: 1.0032522678375244,grad_norm: 0.9999991345484185, iteration: 343500
loss: 1.0051772594451904,grad_norm: 0.9113081173144447, iteration: 343501
loss: 1.0336824655532837,grad_norm: 0.8220316510486375, iteration: 343502
loss: 0.94887375831604,grad_norm: 0.8382849168198611, iteration: 343503
loss: 1.0175344944000244,grad_norm: 0.8039942846921846, iteration: 343504
loss: 0.9790884852409363,grad_norm: 0.8381005895777272, iteration: 343505
loss: 0.9710875749588013,grad_norm: 0.9999994037186556, iteration: 343506
loss: 0.9961972236633301,grad_norm: 0.85779418246266, iteration: 343507
loss: 0.9917481541633606,grad_norm: 0.99692322987991, iteration: 343508
loss: 1.013029932975769,grad_norm: 0.8232423865198333, iteration: 343509
loss: 1.0064785480499268,grad_norm: 0.6976357918819139, iteration: 343510
loss: 0.9754337072372437,grad_norm: 0.9485885148660128, iteration: 343511
loss: 0.9741483330726624,grad_norm: 0.7320153887062653, iteration: 343512
loss: 0.9999872446060181,grad_norm: 0.9217849941480036, iteration: 343513
loss: 1.038004994392395,grad_norm: 0.9999991306888358, iteration: 343514
loss: 1.0091464519500732,grad_norm: 0.679473173480411, iteration: 343515
loss: 1.0066170692443848,grad_norm: 0.9189004745670605, iteration: 343516
loss: 1.0182462930679321,grad_norm: 0.7397038632313131, iteration: 343517
loss: 0.9740810990333557,grad_norm: 0.7763126494547387, iteration: 343518
loss: 1.0500751733779907,grad_norm: 0.7459703442057669, iteration: 343519
loss: 0.9775140881538391,grad_norm: 0.8085742945245911, iteration: 343520
loss: 0.9913445115089417,grad_norm: 0.8400819163658542, iteration: 343521
loss: 1.0625181198120117,grad_norm: 0.9999995565635234, iteration: 343522
loss: 0.9910187125205994,grad_norm: 0.7079888812034549, iteration: 343523
loss: 1.0151327848434448,grad_norm: 0.7976773267750878, iteration: 343524
loss: 0.9918470978736877,grad_norm: 0.9999990590683469, iteration: 343525
loss: 0.9879436492919922,grad_norm: 0.7649569497295785, iteration: 343526
loss: 1.0944305658340454,grad_norm: 0.9999993900600869, iteration: 343527
loss: 1.032562017440796,grad_norm: 0.9999990242288083, iteration: 343528
loss: 0.9806249141693115,grad_norm: 0.6709707917439014, iteration: 343529
loss: 0.9939650893211365,grad_norm: 0.771677096364399, iteration: 343530
loss: 0.9930484294891357,grad_norm: 0.7704382120876467, iteration: 343531
loss: 0.9995711445808411,grad_norm: 0.9999992092839934, iteration: 343532
loss: 1.0238317251205444,grad_norm: 0.7476965242761823, iteration: 343533
loss: 0.9875708222389221,grad_norm: 0.6927005404419362, iteration: 343534
loss: 0.9941614270210266,grad_norm: 0.7289647990710435, iteration: 343535
loss: 0.9894217252731323,grad_norm: 0.6883299131504759, iteration: 343536
loss: 0.9954882264137268,grad_norm: 0.8052029887056178, iteration: 343537
loss: 1.0280417203903198,grad_norm: 0.806561296311082, iteration: 343538
loss: 1.0238358974456787,grad_norm: 0.6780307420188163, iteration: 343539
loss: 1.0015971660614014,grad_norm: 0.914108375930582, iteration: 343540
loss: 0.9735593199729919,grad_norm: 0.7685689815364039, iteration: 343541
loss: 1.0252659320831299,grad_norm: 0.9124283178402529, iteration: 343542
loss: 1.0083200931549072,grad_norm: 0.795361536751623, iteration: 343543
loss: 1.0101568698883057,grad_norm: 0.7559900060656807, iteration: 343544
loss: 1.0034728050231934,grad_norm: 0.9551064805948989, iteration: 343545
loss: 0.996271550655365,grad_norm: 0.8713088005434962, iteration: 343546
loss: 1.0676418542861938,grad_norm: 0.862279734765393, iteration: 343547
loss: 0.9941673278808594,grad_norm: 0.8222383772447279, iteration: 343548
loss: 1.032414436340332,grad_norm: 0.9174331442383173, iteration: 343549
loss: 1.017452597618103,grad_norm: 0.7560906235742892, iteration: 343550
loss: 0.9730678796768188,grad_norm: 0.9999995898582809, iteration: 343551
loss: 0.9462366700172424,grad_norm: 0.742754225736474, iteration: 343552
loss: 0.9921324849128723,grad_norm: 0.9999990518552765, iteration: 343553
loss: 1.1737536191940308,grad_norm: 0.8331239312783082, iteration: 343554
loss: 1.013547420501709,grad_norm: 0.8824462256706135, iteration: 343555
loss: 0.9940758347511292,grad_norm: 0.881912686549095, iteration: 343556
loss: 0.9833645224571228,grad_norm: 0.8557055435814617, iteration: 343557
loss: 0.998079776763916,grad_norm: 0.7647413447523137, iteration: 343558
loss: 1.0090147256851196,grad_norm: 0.8152878911060223, iteration: 343559
loss: 1.0717458724975586,grad_norm: 0.8939052145905391, iteration: 343560
loss: 0.9643689393997192,grad_norm: 0.7940728747305736, iteration: 343561
loss: 1.007770299911499,grad_norm: 0.9999994792973117, iteration: 343562
loss: 1.1276838779449463,grad_norm: 0.9999991870643831, iteration: 343563
loss: 1.0144330263137817,grad_norm: 0.9999999082116523, iteration: 343564
loss: 0.9869645833969116,grad_norm: 0.9672823298845192, iteration: 343565
loss: 1.0200092792510986,grad_norm: 0.9999996035811576, iteration: 343566
loss: 1.0081771612167358,grad_norm: 0.9999990134832425, iteration: 343567
loss: 1.0599534511566162,grad_norm: 0.9600635684718946, iteration: 343568
loss: 0.979547917842865,grad_norm: 0.8936884880948849, iteration: 343569
loss: 0.9911085367202759,grad_norm: 0.9321349851713373, iteration: 343570
loss: 1.0069161653518677,grad_norm: 0.9276133067639676, iteration: 343571
loss: 1.0202441215515137,grad_norm: 0.7818467411076115, iteration: 343572
loss: 1.0029101371765137,grad_norm: 0.9358350582786613, iteration: 343573
loss: 1.004626750946045,grad_norm: 0.7644381810123528, iteration: 343574
loss: 0.9729512929916382,grad_norm: 0.9002233552976726, iteration: 343575
loss: 0.9909875988960266,grad_norm: 0.9999990829092054, iteration: 343576
loss: 0.9851381182670593,grad_norm: 0.999999139414761, iteration: 343577
loss: 1.0034599304199219,grad_norm: 0.8730605735168132, iteration: 343578
loss: 1.0017013549804688,grad_norm: 0.6552645941439751, iteration: 343579
loss: 1.005129337310791,grad_norm: 0.7119175605707263, iteration: 343580
loss: 0.9562497138977051,grad_norm: 0.807250521096565, iteration: 343581
loss: 1.0251140594482422,grad_norm: 0.8961157465700496, iteration: 343582
loss: 0.9766548275947571,grad_norm: 0.8687814218816373, iteration: 343583
loss: 0.9806966781616211,grad_norm: 0.9999990645949433, iteration: 343584
loss: 1.046717882156372,grad_norm: 0.9101300464766495, iteration: 343585
loss: 0.9956569075584412,grad_norm: 0.9566514100769608, iteration: 343586
loss: 0.9726898074150085,grad_norm: 0.7596378119731513, iteration: 343587
loss: 1.011708378791809,grad_norm: 0.7737701745490022, iteration: 343588
loss: 0.966856837272644,grad_norm: 0.8822833813812849, iteration: 343589
loss: 0.980402946472168,grad_norm: 0.999999170993151, iteration: 343590
loss: 0.9983365535736084,grad_norm: 0.9999995405835203, iteration: 343591
loss: 1.0216666460037231,grad_norm: 0.9999991399981597, iteration: 343592
loss: 1.059845209121704,grad_norm: 0.9999999834044369, iteration: 343593
loss: 1.0105646848678589,grad_norm: 0.8072218868401086, iteration: 343594
loss: 0.97078937292099,grad_norm: 0.8651069432738568, iteration: 343595
loss: 1.0098214149475098,grad_norm: 0.9999990097581599, iteration: 343596
loss: 1.0587483644485474,grad_norm: 0.8764024272342416, iteration: 343597
loss: 0.956416130065918,grad_norm: 0.7019712231220111, iteration: 343598
loss: 1.013812780380249,grad_norm: 0.9319873273239556, iteration: 343599
loss: 1.0618126392364502,grad_norm: 0.9999991755817426, iteration: 343600
loss: 1.013680338859558,grad_norm: 0.7990276744366842, iteration: 343601
loss: 0.9635031819343567,grad_norm: 0.7481044960712873, iteration: 343602
loss: 1.0047619342803955,grad_norm: 0.8939982367110544, iteration: 343603
loss: 0.9852175116539001,grad_norm: 0.9856249905239937, iteration: 343604
loss: 1.0109262466430664,grad_norm: 0.8318941076090062, iteration: 343605
loss: 0.9942485094070435,grad_norm: 0.8650422309380745, iteration: 343606
loss: 1.024835467338562,grad_norm: 0.9151504524007367, iteration: 343607
loss: 1.0337247848510742,grad_norm: 0.7942010097259383, iteration: 343608
loss: 0.9871509671211243,grad_norm: 0.744457091660645, iteration: 343609
loss: 0.9963395595550537,grad_norm: 0.7332177634724708, iteration: 343610
loss: 0.987490177154541,grad_norm: 0.8522731510451743, iteration: 343611
loss: 1.0111995935440063,grad_norm: 0.8682320932257681, iteration: 343612
loss: 1.002975344657898,grad_norm: 0.9376404669552276, iteration: 343613
loss: 0.9673092365264893,grad_norm: 0.8343721920662058, iteration: 343614
loss: 0.9669673442840576,grad_norm: 0.7742930670751944, iteration: 343615
loss: 0.9840474724769592,grad_norm: 0.9611309550021259, iteration: 343616
loss: 1.158338189125061,grad_norm: 0.9999996881002133, iteration: 343617
loss: 1.002360224723816,grad_norm: 0.9688322791559338, iteration: 343618
loss: 0.9846989512443542,grad_norm: 0.9287383255342782, iteration: 343619
loss: 1.019418716430664,grad_norm: 0.8062870238084529, iteration: 343620
loss: 1.0238373279571533,grad_norm: 0.890206559621787, iteration: 343621
loss: 0.9810827374458313,grad_norm: 0.999999179707279, iteration: 343622
loss: 1.0259648561477661,grad_norm: 0.7275820800836473, iteration: 343623
loss: 1.0067903995513916,grad_norm: 0.7427559483221751, iteration: 343624
loss: 0.9965769052505493,grad_norm: 0.945091505019882, iteration: 343625
loss: 1.0018489360809326,grad_norm: 0.8905966766925022, iteration: 343626
loss: 1.0291379690170288,grad_norm: 0.7264195380108466, iteration: 343627
loss: 0.9724199175834656,grad_norm: 0.8702086612959173, iteration: 343628
loss: 1.022836446762085,grad_norm: 0.910864236502478, iteration: 343629
loss: 1.0407227277755737,grad_norm: 0.9999991099290172, iteration: 343630
loss: 1.0068295001983643,grad_norm: 0.7653068165481444, iteration: 343631
loss: 1.003125548362732,grad_norm: 0.8458086535724243, iteration: 343632
loss: 1.0198761224746704,grad_norm: 0.9999990850197011, iteration: 343633
loss: 1.0101467370986938,grad_norm: 0.8805976559886668, iteration: 343634
loss: 1.0076472759246826,grad_norm: 0.9665077247236351, iteration: 343635
loss: 1.073511004447937,grad_norm: 0.8917441032336408, iteration: 343636
loss: 1.0416722297668457,grad_norm: 0.8303346709040837, iteration: 343637
loss: 1.0395092964172363,grad_norm: 0.8449262471293371, iteration: 343638
loss: 1.0456095933914185,grad_norm: 0.9999999713001105, iteration: 343639
loss: 1.032248616218567,grad_norm: 0.999999415236067, iteration: 343640
loss: 1.0015549659729004,grad_norm: 0.803804095819517, iteration: 343641
loss: 1.0253087282180786,grad_norm: 0.9999991333940929, iteration: 343642
loss: 1.0040663480758667,grad_norm: 0.7958440198454991, iteration: 343643
loss: 1.0136109590530396,grad_norm: 0.8774561928986329, iteration: 343644
loss: 1.0164412260055542,grad_norm: 0.7948832481377188, iteration: 343645
loss: 0.9684922099113464,grad_norm: 0.751801782975873, iteration: 343646
loss: 1.0330004692077637,grad_norm: 0.7101776489368564, iteration: 343647
loss: 1.0036653280258179,grad_norm: 0.8014565621496457, iteration: 343648
loss: 1.0025620460510254,grad_norm: 0.8724576426640813, iteration: 343649
loss: 1.024244785308838,grad_norm: 0.961592522338863, iteration: 343650
loss: 1.013658881187439,grad_norm: 0.773099964676385, iteration: 343651
loss: 1.0215812921524048,grad_norm: 0.8172172403763083, iteration: 343652
loss: 1.039349913597107,grad_norm: 0.9999994061213839, iteration: 343653
loss: 1.0125997066497803,grad_norm: 0.9729421020877765, iteration: 343654
loss: 0.9852774143218994,grad_norm: 0.9024125356583503, iteration: 343655
loss: 1.0338495969772339,grad_norm: 0.7318757616098267, iteration: 343656
loss: 1.0160644054412842,grad_norm: 0.9491095024638883, iteration: 343657
loss: 0.9940247535705566,grad_norm: 0.6245142102955193, iteration: 343658
loss: 1.0276888608932495,grad_norm: 0.8175961046281893, iteration: 343659
loss: 1.0108106136322021,grad_norm: 0.7594757426495473, iteration: 343660
loss: 0.967978298664093,grad_norm: 0.6968570444784327, iteration: 343661
loss: 1.005001425743103,grad_norm: 0.6860556063815361, iteration: 343662
loss: 1.0413386821746826,grad_norm: 0.8443486247522838, iteration: 343663
loss: 0.9601419568061829,grad_norm: 0.8131432746427139, iteration: 343664
loss: 1.0455763339996338,grad_norm: 0.9999992943629498, iteration: 343665
loss: 0.9896101951599121,grad_norm: 0.7414986228333104, iteration: 343666
loss: 0.9640061259269714,grad_norm: 0.7324559441408153, iteration: 343667
loss: 0.9952085614204407,grad_norm: 0.8434224509729753, iteration: 343668
loss: 0.998638927936554,grad_norm: 0.8485083703486499, iteration: 343669
loss: 1.0158010721206665,grad_norm: 0.9489662269679836, iteration: 343670
loss: 0.9780876040458679,grad_norm: 0.7112525888783902, iteration: 343671
loss: 1.0192513465881348,grad_norm: 0.7644928644726955, iteration: 343672
loss: 0.9809833765029907,grad_norm: 0.8072685500134695, iteration: 343673
loss: 0.9984352588653564,grad_norm: 0.7770847050928421, iteration: 343674
loss: 0.9997663497924805,grad_norm: 0.9081304144851511, iteration: 343675
loss: 1.0086086988449097,grad_norm: 0.7286384332121126, iteration: 343676
loss: 1.0136213302612305,grad_norm: 0.8848132466834208, iteration: 343677
loss: 1.0137526988983154,grad_norm: 0.7594349813796801, iteration: 343678
loss: 1.0226686000823975,grad_norm: 0.7474863734801188, iteration: 343679
loss: 1.014042615890503,grad_norm: 0.9108341744870784, iteration: 343680
loss: 1.1583154201507568,grad_norm: 0.999999726988909, iteration: 343681
loss: 1.0071293115615845,grad_norm: 0.8009649707900653, iteration: 343682
loss: 1.0510225296020508,grad_norm: 0.7016450364961628, iteration: 343683
loss: 1.0102417469024658,grad_norm: 0.999999011773025, iteration: 343684
loss: 0.9837177991867065,grad_norm: 0.7059078279851855, iteration: 343685
loss: 1.0126690864562988,grad_norm: 0.7675009795910483, iteration: 343686
loss: 0.9951947331428528,grad_norm: 0.8545405374184029, iteration: 343687
loss: 1.017785906791687,grad_norm: 0.7642168123637272, iteration: 343688
loss: 0.9786568880081177,grad_norm: 0.8590408736622854, iteration: 343689
loss: 1.0054666996002197,grad_norm: 0.6687397337405251, iteration: 343690
loss: 0.999345600605011,grad_norm: 0.7350650239486672, iteration: 343691
loss: 0.9990251660346985,grad_norm: 0.6629683134381353, iteration: 343692
loss: 0.9638294577598572,grad_norm: 0.8238446701677162, iteration: 343693
loss: 0.9904215931892395,grad_norm: 0.8415110678392609, iteration: 343694
loss: 1.0031077861785889,grad_norm: 0.6593388627407214, iteration: 343695
loss: 1.0264453887939453,grad_norm: 0.8655746747968637, iteration: 343696
loss: 0.9956256151199341,grad_norm: 0.9999990452310562, iteration: 343697
loss: 0.9960194230079651,grad_norm: 0.8398337981378703, iteration: 343698
loss: 1.013681173324585,grad_norm: 0.8855357172845209, iteration: 343699
loss: 1.0080208778381348,grad_norm: 0.873698243739244, iteration: 343700
loss: 1.0031336545944214,grad_norm: 0.9976252324954586, iteration: 343701
loss: 0.9719517230987549,grad_norm: 0.8640100203724321, iteration: 343702
loss: 1.0387697219848633,grad_norm: 0.8899630823362646, iteration: 343703
loss: 0.9546754956245422,grad_norm: 0.9343938599881496, iteration: 343704
loss: 0.9918105006217957,grad_norm: 0.7918640583465609, iteration: 343705
loss: 0.9815768599510193,grad_norm: 0.7223876465525423, iteration: 343706
loss: 0.9782440066337585,grad_norm: 0.8425461067576708, iteration: 343707
loss: 0.9506401419639587,grad_norm: 0.6369340107057618, iteration: 343708
loss: 0.9712586998939514,grad_norm: 0.7256176886572595, iteration: 343709
loss: 0.9956782460212708,grad_norm: 0.8099838411455632, iteration: 343710
loss: 1.0879780054092407,grad_norm: 0.9999991753623767, iteration: 343711
loss: 0.9833033084869385,grad_norm: 0.8619611899800893, iteration: 343712
loss: 1.0175782442092896,grad_norm: 0.7893541513972308, iteration: 343713
loss: 1.0073511600494385,grad_norm: 0.8716466210777634, iteration: 343714
loss: 1.0086252689361572,grad_norm: 0.7713484702044733, iteration: 343715
loss: 1.0060890913009644,grad_norm: 0.7608943646980916, iteration: 343716
loss: 1.003370761871338,grad_norm: 0.9283215136733186, iteration: 343717
loss: 1.009643793106079,grad_norm: 0.6643255275129109, iteration: 343718
loss: 0.9756186604499817,grad_norm: 0.8282685581417601, iteration: 343719
loss: 0.9626837968826294,grad_norm: 0.9423479132077304, iteration: 343720
loss: 1.0108535289764404,grad_norm: 0.6675597139880042, iteration: 343721
loss: 1.0048149824142456,grad_norm: 0.7854423177081667, iteration: 343722
loss: 1.133107304573059,grad_norm: 0.9999993258821743, iteration: 343723
loss: 1.0263259410858154,grad_norm: 0.8770076171294641, iteration: 343724
loss: 1.002150058746338,grad_norm: 0.8446346218113829, iteration: 343725
loss: 0.9924836754798889,grad_norm: 0.8161000830850123, iteration: 343726
loss: 1.0002952814102173,grad_norm: 0.856717374714296, iteration: 343727
loss: 1.013587236404419,grad_norm: 0.8437680096193338, iteration: 343728
loss: 1.0535560846328735,grad_norm: 0.8396828250332259, iteration: 343729
loss: 0.9682432413101196,grad_norm: 0.9999991144777761, iteration: 343730
loss: 0.9893277883529663,grad_norm: 0.8853355207701634, iteration: 343731
loss: 0.9966268539428711,grad_norm: 0.9456134827481913, iteration: 343732
loss: 1.006722331047058,grad_norm: 0.7510237947694393, iteration: 343733
loss: 0.9724503755569458,grad_norm: 0.8327205874268422, iteration: 343734
loss: 0.9658666253089905,grad_norm: 0.8295861106523775, iteration: 343735
loss: 1.0267078876495361,grad_norm: 0.8505422189982885, iteration: 343736
loss: 1.021141767501831,grad_norm: 0.8893819298868132, iteration: 343737
loss: 0.963463544845581,grad_norm: 0.7699624514653997, iteration: 343738
loss: 0.9560871124267578,grad_norm: 0.7226023145818596, iteration: 343739
loss: 0.957100510597229,grad_norm: 0.846975941245567, iteration: 343740
loss: 1.008087396621704,grad_norm: 0.9999997853832291, iteration: 343741
loss: 1.0201184749603271,grad_norm: 0.8092533381649313, iteration: 343742
loss: 0.996131181716919,grad_norm: 0.7651614045590858, iteration: 343743
loss: 0.9899096488952637,grad_norm: 0.6752175139287776, iteration: 343744
loss: 0.9671285152435303,grad_norm: 0.8669638568522388, iteration: 343745
loss: 1.023647427558899,grad_norm: 0.8486086825123204, iteration: 343746
loss: 0.995543897151947,grad_norm: 0.7530838866914802, iteration: 343747
loss: 0.9806659817695618,grad_norm: 0.8989353099380611, iteration: 343748
loss: 0.9765256643295288,grad_norm: 0.7798344812999962, iteration: 343749
loss: 1.0123056173324585,grad_norm: 0.9530168157902088, iteration: 343750
loss: 1.0281000137329102,grad_norm: 0.7459490429233387, iteration: 343751
loss: 0.973092257976532,grad_norm: 0.9856442201655747, iteration: 343752
loss: 0.990856945514679,grad_norm: 0.780994330226404, iteration: 343753
loss: 1.0424062013626099,grad_norm: 0.9999991956628016, iteration: 343754
loss: 0.9643777012825012,grad_norm: 0.9999577964102615, iteration: 343755
loss: 0.958782434463501,grad_norm: 0.8039221457106184, iteration: 343756
loss: 1.030222773551941,grad_norm: 0.7887492063936001, iteration: 343757
loss: 1.0002046823501587,grad_norm: 0.904587917046142, iteration: 343758
loss: 1.0066148042678833,grad_norm: 0.8882384264970401, iteration: 343759
loss: 0.9886343479156494,grad_norm: 0.7139554064080299, iteration: 343760
loss: 0.9720255732536316,grad_norm: 0.7151755505774601, iteration: 343761
loss: 0.9460397958755493,grad_norm: 0.8730343196567989, iteration: 343762
loss: 0.972033679485321,grad_norm: 0.9999992889765256, iteration: 343763
loss: 0.9808997511863708,grad_norm: 0.9280545435660895, iteration: 343764
loss: 1.0079917907714844,grad_norm: 0.8166380191847231, iteration: 343765
loss: 0.9704368710517883,grad_norm: 0.8503673114904152, iteration: 343766
loss: 1.0280699729919434,grad_norm: 0.9833868220767986, iteration: 343767
loss: 1.0184848308563232,grad_norm: 0.7325715433585899, iteration: 343768
loss: 1.016804575920105,grad_norm: 0.6504970566672879, iteration: 343769
loss: 1.0086009502410889,grad_norm: 0.860027855059827, iteration: 343770
loss: 1.008790135383606,grad_norm: 0.7291626298263831, iteration: 343771
loss: 0.9969667196273804,grad_norm: 0.8113979499485027, iteration: 343772
loss: 0.965857207775116,grad_norm: 0.8540188744423172, iteration: 343773
loss: 0.9792464375495911,grad_norm: 0.7278940087702508, iteration: 343774
loss: 0.9902876615524292,grad_norm: 0.9999990772170037, iteration: 343775
loss: 0.9753219485282898,grad_norm: 0.9171355630472331, iteration: 343776
loss: 1.0948269367218018,grad_norm: 0.9999996196987864, iteration: 343777
loss: 0.9997037649154663,grad_norm: 0.7464144718321641, iteration: 343778
loss: 0.9777054786682129,grad_norm: 0.9507314671847817, iteration: 343779
loss: 1.04160737991333,grad_norm: 0.8585036827821254, iteration: 343780
loss: 1.0088611841201782,grad_norm: 0.887112201377661, iteration: 343781
loss: 0.9937300086021423,grad_norm: 0.7918355103027421, iteration: 343782
loss: 0.9804538488388062,grad_norm: 0.7573967238639296, iteration: 343783
loss: 0.9803746342658997,grad_norm: 0.8283752953689146, iteration: 343784
loss: 1.0131542682647705,grad_norm: 0.8217282505469795, iteration: 343785
loss: 1.0495012998580933,grad_norm: 0.9999993448374999, iteration: 343786
loss: 1.0474637746810913,grad_norm: 0.915170209442724, iteration: 343787
loss: 1.0394295454025269,grad_norm: 0.9999995398607417, iteration: 343788
loss: 1.0125540494918823,grad_norm: 0.7701535844454296, iteration: 343789
loss: 0.9769763350486755,grad_norm: 0.9999999181060577, iteration: 343790
loss: 0.9528182744979858,grad_norm: 0.916120349020325, iteration: 343791
loss: 1.0168852806091309,grad_norm: 0.7937686672535084, iteration: 343792
loss: 1.0253719091415405,grad_norm: 0.9999995891333169, iteration: 343793
loss: 1.0057852268218994,grad_norm: 0.8019939794167242, iteration: 343794
loss: 0.9769978523254395,grad_norm: 0.8899898586541196, iteration: 343795
loss: 1.1182323694229126,grad_norm: 0.9999990638526846, iteration: 343796
loss: 0.9740368723869324,grad_norm: 0.9999991287515608, iteration: 343797
loss: 0.9953153133392334,grad_norm: 0.8553008159271644, iteration: 343798
loss: 0.985821545124054,grad_norm: 0.8366428582353861, iteration: 343799
loss: 0.9818106293678284,grad_norm: 0.9999992234277317, iteration: 343800
loss: 1.0315324068069458,grad_norm: 0.8754261764301753, iteration: 343801
loss: 0.9606758952140808,grad_norm: 0.9200438810167126, iteration: 343802
loss: 0.9809558987617493,grad_norm: 0.9271512277191235, iteration: 343803
loss: 1.0237880945205688,grad_norm: 0.7568127599017062, iteration: 343804
loss: 1.0280271768569946,grad_norm: 0.7325581637708106, iteration: 343805
loss: 0.9980921149253845,grad_norm: 0.9999992289023953, iteration: 343806
loss: 1.0004162788391113,grad_norm: 0.9999995128438286, iteration: 343807
loss: 1.006410002708435,grad_norm: 0.675901807385908, iteration: 343808
loss: 0.9825556874275208,grad_norm: 0.8788298381212187, iteration: 343809
loss: 0.9585272669792175,grad_norm: 0.9510446060877157, iteration: 343810
loss: 0.9934450387954712,grad_norm: 0.8027315249429193, iteration: 343811
loss: 1.0165846347808838,grad_norm: 0.99999922348706, iteration: 343812
loss: 0.9969639182090759,grad_norm: 0.7257798797140927, iteration: 343813
loss: 0.9861166477203369,grad_norm: 0.8763500480001826, iteration: 343814
loss: 0.9732187390327454,grad_norm: 0.7029647761306425, iteration: 343815
loss: 0.9570710062980652,grad_norm: 0.9269827817952703, iteration: 343816
loss: 1.015012264251709,grad_norm: 0.7913732934273451, iteration: 343817
loss: 1.020981788635254,grad_norm: 0.8049426449781725, iteration: 343818
loss: 1.0739409923553467,grad_norm: 0.9999992365888215, iteration: 343819
loss: 1.1232720613479614,grad_norm: 0.9999994757847321, iteration: 343820
loss: 0.9876686930656433,grad_norm: 0.992266028514602, iteration: 343821
loss: 1.011496663093567,grad_norm: 0.6818766400208015, iteration: 343822
loss: 0.9709687829017639,grad_norm: 0.7302687488566565, iteration: 343823
loss: 0.9707820415496826,grad_norm: 0.8423457288566429, iteration: 343824
loss: 1.0046192407608032,grad_norm: 0.741053206761177, iteration: 343825
loss: 0.941875696182251,grad_norm: 0.9999990982640508, iteration: 343826
loss: 1.002456545829773,grad_norm: 0.8109693954174516, iteration: 343827
loss: 0.966185450553894,grad_norm: 0.826585861898783, iteration: 343828
loss: 0.9805333614349365,grad_norm: 0.8323478946586689, iteration: 343829
loss: 1.024093508720398,grad_norm: 0.9695253407599977, iteration: 343830
loss: 1.0373647212982178,grad_norm: 0.9999993094934865, iteration: 343831
loss: 1.0322835445404053,grad_norm: 0.7434733085460938, iteration: 343832
loss: 1.0012540817260742,grad_norm: 0.8995975948513774, iteration: 343833
loss: 0.9895763397216797,grad_norm: 0.8188816112047613, iteration: 343834
loss: 1.0075199604034424,grad_norm: 0.871848466698092, iteration: 343835
loss: 1.0347926616668701,grad_norm: 0.8328542186809446, iteration: 343836
loss: 1.0033148527145386,grad_norm: 0.9926972314382082, iteration: 343837
loss: 1.0453822612762451,grad_norm: 0.7703475901532625, iteration: 343838
loss: 1.0161672830581665,grad_norm: 1.0000000086914171, iteration: 343839
loss: 0.9787726998329163,grad_norm: 0.834598059336783, iteration: 343840
loss: 1.1223183870315552,grad_norm: 0.9999995527647686, iteration: 343841
loss: 1.004058837890625,grad_norm: 0.9205213784915924, iteration: 343842
loss: 1.0628057718276978,grad_norm: 0.9999992131316711, iteration: 343843
loss: 1.0106487274169922,grad_norm: 0.9586415844751108, iteration: 343844
loss: 0.9919170141220093,grad_norm: 0.9401821594496311, iteration: 343845
loss: 0.992307186126709,grad_norm: 0.9010492637437858, iteration: 343846
loss: 0.975283682346344,grad_norm: 0.750420654854772, iteration: 343847
loss: 0.9732874035835266,grad_norm: 0.7383195556309081, iteration: 343848
loss: 0.979241132736206,grad_norm: 0.8283519043515126, iteration: 343849
loss: 0.971596360206604,grad_norm: 0.7384382750141923, iteration: 343850
loss: 1.0139230489730835,grad_norm: 0.8306614044497311, iteration: 343851
loss: 0.9876539707183838,grad_norm: 0.6902869645010918, iteration: 343852
loss: 1.0091906785964966,grad_norm: 0.9999999352863146, iteration: 343853
loss: 0.9481246471405029,grad_norm: 0.7883158327279187, iteration: 343854
loss: 0.9713031053543091,grad_norm: 0.8285364443842987, iteration: 343855
loss: 1.029884934425354,grad_norm: 0.9999999216054765, iteration: 343856
loss: 0.9878031015396118,grad_norm: 0.7548986610022369, iteration: 343857
loss: 1.083335280418396,grad_norm: 0.999999227721917, iteration: 343858
loss: 1.0062670707702637,grad_norm: 0.9488782478736906, iteration: 343859
loss: 0.9702944755554199,grad_norm: 0.7661127621330786, iteration: 343860
loss: 1.046877384185791,grad_norm: 0.9003735730358065, iteration: 343861
loss: 1.0214362144470215,grad_norm: 0.9999999164830987, iteration: 343862
loss: 1.0400868654251099,grad_norm: 0.6982959758263179, iteration: 343863
loss: 0.9731240272521973,grad_norm: 0.8766804272451937, iteration: 343864
loss: 1.0202773809432983,grad_norm: 0.7228053249288174, iteration: 343865
loss: 0.9806414842605591,grad_norm: 0.7856614718927382, iteration: 343866
loss: 1.0247975587844849,grad_norm: 0.9529886239510686, iteration: 343867
loss: 1.0200921297073364,grad_norm: 0.8139130791284939, iteration: 343868
loss: 1.0501723289489746,grad_norm: 0.9373629076863581, iteration: 343869
loss: 0.9590367078781128,grad_norm: 0.8717261929142569, iteration: 343870
loss: 1.0051031112670898,grad_norm: 0.7751596727203379, iteration: 343871
loss: 0.9877936840057373,grad_norm: 0.6743408790901582, iteration: 343872
loss: 1.0040678977966309,grad_norm: 0.9106182180966808, iteration: 343873
loss: 1.0325462818145752,grad_norm: 0.8281554174934143, iteration: 343874
loss: 0.9878116846084595,grad_norm: 0.6541519464339758, iteration: 343875
loss: 1.0058655738830566,grad_norm: 0.9999992724209184, iteration: 343876
loss: 1.0212572813034058,grad_norm: 0.8580171707671227, iteration: 343877
loss: 1.0022811889648438,grad_norm: 0.8089621388437244, iteration: 343878
loss: 1.008142113685608,grad_norm: 0.7862128539330505, iteration: 343879
loss: 1.000492811203003,grad_norm: 0.8110242704105404, iteration: 343880
loss: 1.0124903917312622,grad_norm: 0.9999993691445829, iteration: 343881
loss: 1.0182889699935913,grad_norm: 0.8335208243152447, iteration: 343882
loss: 0.994515061378479,grad_norm: 0.7100183044549293, iteration: 343883
loss: 1.033865213394165,grad_norm: 0.8412526704948077, iteration: 343884
loss: 0.9657339453697205,grad_norm: 0.7425462883363411, iteration: 343885
loss: 0.9645853638648987,grad_norm: 0.8228826245829538, iteration: 343886
loss: 1.0313360691070557,grad_norm: 0.9417973328244631, iteration: 343887
loss: 1.0260754823684692,grad_norm: 0.7541678046500838, iteration: 343888
loss: 1.0026519298553467,grad_norm: 0.799541157217491, iteration: 343889
loss: 1.006101131439209,grad_norm: 0.82077471980651, iteration: 343890
loss: 1.0104035139083862,grad_norm: 0.9999991776257124, iteration: 343891
loss: 1.0401026010513306,grad_norm: 0.9999997835356361, iteration: 343892
loss: 0.9879528880119324,grad_norm: 0.8246279755032878, iteration: 343893
loss: 1.003118872642517,grad_norm: 0.7478702827443688, iteration: 343894
loss: 0.9913078546524048,grad_norm: 0.8093744482037724, iteration: 343895
loss: 1.000930905342102,grad_norm: 0.9522820221259195, iteration: 343896
loss: 1.013116478919983,grad_norm: 0.9999993397878209, iteration: 343897
loss: 1.03632390499115,grad_norm: 0.9999993095948234, iteration: 343898
loss: 1.083310604095459,grad_norm: 0.9999995125451373, iteration: 343899
loss: 0.9864077568054199,grad_norm: 0.9768057144032655, iteration: 343900
loss: 1.010672926902771,grad_norm: 0.9029976582451477, iteration: 343901
loss: 0.9806516766548157,grad_norm: 0.8561110140907805, iteration: 343902
loss: 0.943806529045105,grad_norm: 0.6903401391168112, iteration: 343903
loss: 0.9925620555877686,grad_norm: 0.8484094233738866, iteration: 343904
loss: 0.9814647436141968,grad_norm: 0.9818637647298617, iteration: 343905
loss: 1.000783085823059,grad_norm: 0.7592431168356838, iteration: 343906
loss: 0.9890896677970886,grad_norm: 0.8092004741873047, iteration: 343907
loss: 1.076505422592163,grad_norm: 0.9999991690876685, iteration: 343908
loss: 0.9550374746322632,grad_norm: 0.9468609133753721, iteration: 343909
loss: 1.0211308002471924,grad_norm: 0.7900921985323623, iteration: 343910
loss: 1.0558263063430786,grad_norm: 0.9999991928169238, iteration: 343911
loss: 0.9592921137809753,grad_norm: 0.8379387443049146, iteration: 343912
loss: 1.0146281719207764,grad_norm: 0.8794622792191671, iteration: 343913
loss: 0.9478604793548584,grad_norm: 0.7867906027291157, iteration: 343914
loss: 0.9659372568130493,grad_norm: 0.8212759806928639, iteration: 343915
loss: 1.0209200382232666,grad_norm: 0.7908578067080333, iteration: 343916
loss: 1.0033584833145142,grad_norm: 0.7923222802737264, iteration: 343917
loss: 0.9857296347618103,grad_norm: 0.8335738841927397, iteration: 343918
loss: 0.9947044849395752,grad_norm: 0.9005784023274686, iteration: 343919
loss: 1.001631736755371,grad_norm: 0.9999996528176843, iteration: 343920
loss: 0.9952214956283569,grad_norm: 0.9999991491887041, iteration: 343921
loss: 1.1139051914215088,grad_norm: 0.9999993041304872, iteration: 343922
loss: 0.9715314507484436,grad_norm: 0.8832204919176483, iteration: 343923
loss: 0.9812236428260803,grad_norm: 0.8229870961376938, iteration: 343924
loss: 0.983847975730896,grad_norm: 0.8953695773196008, iteration: 343925
loss: 1.01313316822052,grad_norm: 0.6732949933019415, iteration: 343926
loss: 0.9805750250816345,grad_norm: 0.8447170085021602, iteration: 343927
loss: 1.011626958847046,grad_norm: 0.9169758913295987, iteration: 343928
loss: 0.9869357943534851,grad_norm: 0.7556921927985019, iteration: 343929
loss: 1.0076311826705933,grad_norm: 0.9999989929692271, iteration: 343930
loss: 1.0106648206710815,grad_norm: 0.7930209425705146, iteration: 343931
loss: 1.003201961517334,grad_norm: 0.8569987908799691, iteration: 343932
loss: 1.09279465675354,grad_norm: 0.9999989520448335, iteration: 343933
loss: 0.9845689535140991,grad_norm: 0.856400827503645, iteration: 343934
loss: 1.0977352857589722,grad_norm: 0.9964405961952084, iteration: 343935
loss: 1.0218063592910767,grad_norm: 0.8882704922445775, iteration: 343936
loss: 0.9813570380210876,grad_norm: 0.8215555922265046, iteration: 343937
loss: 0.986177384853363,grad_norm: 0.7045875557142341, iteration: 343938
loss: 1.0052610635757446,grad_norm: 0.9049874709783651, iteration: 343939
loss: 0.9790164828300476,grad_norm: 0.8956135072341027, iteration: 343940
loss: 0.991970956325531,grad_norm: 0.8699197962284709, iteration: 343941
loss: 0.9862944483757019,grad_norm: 0.8913710644407213, iteration: 343942
loss: 1.026925802230835,grad_norm: 0.6835390037452673, iteration: 343943
loss: 1.0040572881698608,grad_norm: 0.9999991333423579, iteration: 343944
loss: 0.9780420064926147,grad_norm: 0.9081326207800907, iteration: 343945
loss: 0.9953111410140991,grad_norm: 0.8722445431740259, iteration: 343946
loss: 1.0077069997787476,grad_norm: 0.7592453427645669, iteration: 343947
loss: 1.0224825143814087,grad_norm: 0.7947223326096664, iteration: 343948
loss: 1.0059467554092407,grad_norm: 0.7016509450821196, iteration: 343949
loss: 0.9866303205490112,grad_norm: 0.999999100341611, iteration: 343950
loss: 0.97394198179245,grad_norm: 0.7612581646133262, iteration: 343951
loss: 1.0482335090637207,grad_norm: 0.7396057582800766, iteration: 343952
loss: 1.0014939308166504,grad_norm: 0.8556545448993269, iteration: 343953
loss: 1.0178521871566772,grad_norm: 0.657963585014636, iteration: 343954
loss: 0.9925228953361511,grad_norm: 0.841010933423421, iteration: 343955
loss: 1.0053139925003052,grad_norm: 0.8536159629224025, iteration: 343956
loss: 1.0249263048171997,grad_norm: 0.9324151454030655, iteration: 343957
loss: 1.0294291973114014,grad_norm: 0.6468395470595716, iteration: 343958
loss: 1.0194542407989502,grad_norm: 0.9762342208278552, iteration: 343959
loss: 1.0021071434020996,grad_norm: 0.7784789191639931, iteration: 343960
loss: 1.0298631191253662,grad_norm: 0.8371063732769772, iteration: 343961
loss: 0.9678325653076172,grad_norm: 0.9295266794426718, iteration: 343962
loss: 0.9814480543136597,grad_norm: 0.9999995645025406, iteration: 343963
loss: 1.0264947414398193,grad_norm: 0.8210905976439408, iteration: 343964
loss: 1.0164191722869873,grad_norm: 0.7618578639426019, iteration: 343965
loss: 0.9963350892066956,grad_norm: 0.8274274795652888, iteration: 343966
loss: 1.0734864473342896,grad_norm: 0.999999834848391, iteration: 343967
loss: 0.9936339259147644,grad_norm: 0.8917385004948803, iteration: 343968
loss: 1.0553998947143555,grad_norm: 0.8242532890667531, iteration: 343969
loss: 0.9761929512023926,grad_norm: 0.9775233589563183, iteration: 343970
loss: 1.0112375020980835,grad_norm: 0.8142553805826819, iteration: 343971
loss: 1.0023682117462158,grad_norm: 0.7471936289925101, iteration: 343972
loss: 1.027683138847351,grad_norm: 0.937919332609429, iteration: 343973
loss: 1.0746761560440063,grad_norm: 0.8781090752330923, iteration: 343974
loss: 1.0400179624557495,grad_norm: 0.9999989125956192, iteration: 343975
loss: 1.0236932039260864,grad_norm: 0.9706601898622287, iteration: 343976
loss: 1.0101768970489502,grad_norm: 0.8811587117002042, iteration: 343977
loss: 0.9693313837051392,grad_norm: 0.9195292591450145, iteration: 343978
loss: 0.9421009421348572,grad_norm: 0.6637639037289267, iteration: 343979
loss: 1.0019590854644775,grad_norm: 0.7871350072786287, iteration: 343980
loss: 1.0139418840408325,grad_norm: 0.7685543710013284, iteration: 343981
loss: 1.0446490049362183,grad_norm: 0.8232647522781352, iteration: 343982
loss: 0.9923983812332153,grad_norm: 0.7098166669084942, iteration: 343983
loss: 0.9947935342788696,grad_norm: 0.8286407247759782, iteration: 343984
loss: 0.9920064210891724,grad_norm: 0.8606006839331791, iteration: 343985
loss: 1.011407494544983,grad_norm: 0.9999991368001607, iteration: 343986
loss: 1.0316680669784546,grad_norm: 0.8149985462264833, iteration: 343987
loss: 1.0156302452087402,grad_norm: 0.980423497703673, iteration: 343988
loss: 1.0024700164794922,grad_norm: 0.7991082307729028, iteration: 343989
loss: 1.0104275941848755,grad_norm: 0.7879368069910015, iteration: 343990
loss: 1.0001754760742188,grad_norm: 0.8332807451908434, iteration: 343991
loss: 1.0056822299957275,grad_norm: 0.8305700267950767, iteration: 343992
loss: 1.001992106437683,grad_norm: 0.8879069678220932, iteration: 343993
loss: 0.9935845732688904,grad_norm: 0.9999991432106399, iteration: 343994
loss: 0.9981133937835693,grad_norm: 0.8353312572058468, iteration: 343995
loss: 0.9696201086044312,grad_norm: 0.7331041203008921, iteration: 343996
loss: 1.0439059734344482,grad_norm: 0.9703158376738451, iteration: 343997
loss: 1.0033372640609741,grad_norm: 0.8228756981240699, iteration: 343998
loss: 0.9821743369102478,grad_norm: 0.7723507062439983, iteration: 343999
loss: 0.9711258411407471,grad_norm: 0.8645956942559494, iteration: 344000
loss: 1.1294660568237305,grad_norm: 0.9999992145662118, iteration: 344001
loss: 0.9967526793479919,grad_norm: 0.7685271665775505, iteration: 344002
loss: 1.0518382787704468,grad_norm: 0.9999997750334838, iteration: 344003
loss: 0.980151891708374,grad_norm: 0.8174801513265866, iteration: 344004
loss: 1.0135550498962402,grad_norm: 0.8540656254792793, iteration: 344005
loss: 1.01681649684906,grad_norm: 0.7403641739022697, iteration: 344006
loss: 1.0115582942962646,grad_norm: 0.9999998514745296, iteration: 344007
loss: 0.9681370854377747,grad_norm: 0.7839536213488083, iteration: 344008
loss: 0.9891876578330994,grad_norm: 0.7986258096959326, iteration: 344009
loss: 0.9985893964767456,grad_norm: 0.8528129804112952, iteration: 344010
loss: 0.9774337410926819,grad_norm: 0.741961261897447, iteration: 344011
loss: 1.001150131225586,grad_norm: 0.8140054148650506, iteration: 344012
loss: 0.956243634223938,grad_norm: 0.9999990331049718, iteration: 344013
loss: 0.987186074256897,grad_norm: 0.9999992397877376, iteration: 344014
loss: 0.9919751286506653,grad_norm: 0.9852520429146248, iteration: 344015
loss: 1.0082155466079712,grad_norm: 0.7195862585969786, iteration: 344016
loss: 1.0043833255767822,grad_norm: 0.8070167465959226, iteration: 344017
loss: 1.034247636795044,grad_norm: 0.7376760283364636, iteration: 344018
loss: 1.0795178413391113,grad_norm: 0.9141611736978966, iteration: 344019
loss: 0.9887785315513611,grad_norm: 0.8858061281901809, iteration: 344020
loss: 1.0477131605148315,grad_norm: 0.8520234489379771, iteration: 344021
loss: 1.010306715965271,grad_norm: 0.9258399221082924, iteration: 344022
loss: 0.9932659864425659,grad_norm: 0.8113049824279774, iteration: 344023
loss: 0.9776726365089417,grad_norm: 0.7179272358432001, iteration: 344024
loss: 1.0293880701065063,grad_norm: 0.9219405870990661, iteration: 344025
loss: 1.0229623317718506,grad_norm: 0.8660164606327867, iteration: 344026
loss: 1.0226619243621826,grad_norm: 0.7500230932648349, iteration: 344027
loss: 1.0238170623779297,grad_norm: 0.8670676046606678, iteration: 344028
loss: 1.030176043510437,grad_norm: 0.9999992982575259, iteration: 344029
loss: 0.9774799346923828,grad_norm: 0.7742366512602046, iteration: 344030
loss: 1.0023832321166992,grad_norm: 0.7828608416854809, iteration: 344031
loss: 0.9471122026443481,grad_norm: 0.9999990390096303, iteration: 344032
loss: 1.1204050779342651,grad_norm: 0.9403071951648625, iteration: 344033
loss: 0.9984537959098816,grad_norm: 0.6521736735495322, iteration: 344034
loss: 1.0237833261489868,grad_norm: 0.8058537534843343, iteration: 344035
loss: 1.0194370746612549,grad_norm: 0.9523350829675135, iteration: 344036
loss: 0.9933955669403076,grad_norm: 0.8186912683209853, iteration: 344037
loss: 0.9774914979934692,grad_norm: 0.760048045086796, iteration: 344038
loss: 0.9998459815979004,grad_norm: 0.8881138656190588, iteration: 344039
loss: 1.027777075767517,grad_norm: 0.7422623817295229, iteration: 344040
loss: 1.0129963159561157,grad_norm: 0.8241450962386452, iteration: 344041
loss: 1.0278818607330322,grad_norm: 0.9704562558245369, iteration: 344042
loss: 1.0017319917678833,grad_norm: 0.7603584095600772, iteration: 344043
loss: 0.9392238855361938,grad_norm: 0.8845424118913128, iteration: 344044
loss: 0.9501866102218628,grad_norm: 0.7909006294188433, iteration: 344045
loss: 1.0179513692855835,grad_norm: 0.8082555356255309, iteration: 344046
loss: 0.9963837265968323,grad_norm: 0.7431405070522292, iteration: 344047
loss: 1.0150803327560425,grad_norm: 0.9938586729849552, iteration: 344048
loss: 0.9792411923408508,grad_norm: 0.7911418446760323, iteration: 344049
loss: 1.0040637254714966,grad_norm: 0.7666156858321824, iteration: 344050
loss: 1.0105525255203247,grad_norm: 0.8836178911160913, iteration: 344051
loss: 0.9932079911231995,grad_norm: 0.8972122388474816, iteration: 344052
loss: 1.0414122343063354,grad_norm: 0.8748881592557026, iteration: 344053
loss: 0.9992405772209167,grad_norm: 0.825269908986072, iteration: 344054
loss: 0.959827184677124,grad_norm: 0.9999999856670743, iteration: 344055
loss: 1.0264054536819458,grad_norm: 0.9125269344273711, iteration: 344056
loss: 0.9704525470733643,grad_norm: 0.7502511735170658, iteration: 344057
loss: 1.0225965976715088,grad_norm: 0.9999990349408939, iteration: 344058
loss: 0.9968529343605042,grad_norm: 0.8886409597065958, iteration: 344059
loss: 0.9899263381958008,grad_norm: 0.8083808909890355, iteration: 344060
loss: 0.9884901642799377,grad_norm: 0.663149118804893, iteration: 344061
loss: 0.9809983372688293,grad_norm: 0.7735908854255157, iteration: 344062
loss: 1.007153034210205,grad_norm: 0.8120258763249354, iteration: 344063
loss: 1.017216444015503,grad_norm: 0.7627440263812619, iteration: 344064
loss: 0.9986129403114319,grad_norm: 0.6989210307324442, iteration: 344065
loss: 1.07708740234375,grad_norm: 0.9999999526090738, iteration: 344066
loss: 1.020527720451355,grad_norm: 0.8162626097519613, iteration: 344067
loss: 0.9644414782524109,grad_norm: 0.9999993438948316, iteration: 344068
loss: 1.0203568935394287,grad_norm: 0.9999991852538167, iteration: 344069
loss: 1.0161947011947632,grad_norm: 0.6962685805238825, iteration: 344070
loss: 1.0097060203552246,grad_norm: 0.7885325932422874, iteration: 344071
loss: 0.987838089466095,grad_norm: 0.8189051315800958, iteration: 344072
loss: 0.9923494458198547,grad_norm: 0.8757744964897226, iteration: 344073
loss: 0.986358642578125,grad_norm: 0.8578649936010324, iteration: 344074
loss: 0.9972023963928223,grad_norm: 0.7988791320463793, iteration: 344075
loss: 1.017642617225647,grad_norm: 0.8864142648099266, iteration: 344076
loss: 1.0246832370758057,grad_norm: 0.8590216005170379, iteration: 344077
loss: 1.0072910785675049,grad_norm: 0.9999992307482806, iteration: 344078
loss: 0.9651819467544556,grad_norm: 0.7982162224465579, iteration: 344079
loss: 1.0086878538131714,grad_norm: 0.9120495834465087, iteration: 344080
loss: 1.0145760774612427,grad_norm: 0.8373115102509314, iteration: 344081
loss: 0.9962438941001892,grad_norm: 0.8412809297956577, iteration: 344082
loss: 1.0019360780715942,grad_norm: 0.9999996362115338, iteration: 344083
loss: 1.059661865234375,grad_norm: 0.9999994493302498, iteration: 344084
loss: 1.0102829933166504,grad_norm: 0.8479774400429504, iteration: 344085
loss: 1.0039838552474976,grad_norm: 0.7499389437154015, iteration: 344086
loss: 0.975368320941925,grad_norm: 0.7957368380668253, iteration: 344087
loss: 0.9673579335212708,grad_norm: 0.7472334226858653, iteration: 344088
loss: 0.9915351271629333,grad_norm: 0.8679728029243193, iteration: 344089
loss: 0.986639142036438,grad_norm: 0.9999991635263759, iteration: 344090
loss: 1.0041790008544922,grad_norm: 0.7909670864772821, iteration: 344091
loss: 1.0190738439559937,grad_norm: 0.9051386544018959, iteration: 344092
loss: 1.0153145790100098,grad_norm: 0.9219582471109747, iteration: 344093
loss: 1.0118122100830078,grad_norm: 0.7991740625730241, iteration: 344094
loss: 1.0082662105560303,grad_norm: 0.833563060915788, iteration: 344095
loss: 1.0044270753860474,grad_norm: 0.6699186515109725, iteration: 344096
loss: 1.0488687753677368,grad_norm: 0.9608889674042387, iteration: 344097
loss: 0.9672542214393616,grad_norm: 0.9999990755425583, iteration: 344098
loss: 0.995409369468689,grad_norm: 0.8990612909840325, iteration: 344099
loss: 1.027039885520935,grad_norm: 0.9999990696970517, iteration: 344100
loss: 0.9864031672477722,grad_norm: 0.8291135756146956, iteration: 344101
loss: 1.0837061405181885,grad_norm: 0.9087827830523756, iteration: 344102
loss: 0.9998782873153687,grad_norm: 0.9859138097543133, iteration: 344103
loss: 1.158508539199829,grad_norm: 0.9608014908925648, iteration: 344104
loss: 1.0131336450576782,grad_norm: 0.999999234028642, iteration: 344105
loss: 1.0186291933059692,grad_norm: 0.9999997616419029, iteration: 344106
loss: 0.993482768535614,grad_norm: 0.8294435214018492, iteration: 344107
loss: 1.0111483335494995,grad_norm: 0.9002495216564684, iteration: 344108
loss: 1.0836892127990723,grad_norm: 0.8857855537001954, iteration: 344109
loss: 1.0069637298583984,grad_norm: 0.9139162227501142, iteration: 344110
loss: 0.9995861053466797,grad_norm: 0.8870510841309607, iteration: 344111
loss: 1.0139625072479248,grad_norm: 0.9186278811469757, iteration: 344112
loss: 1.0106831789016724,grad_norm: 0.9999991075714839, iteration: 344113
loss: 1.020708680152893,grad_norm: 0.8292805489367874, iteration: 344114
loss: 1.0312283039093018,grad_norm: 0.9216190493465483, iteration: 344115
loss: 1.0144580602645874,grad_norm: 0.7563299363944576, iteration: 344116
loss: 1.016128659248352,grad_norm: 0.7226010438213909, iteration: 344117
loss: 1.0054502487182617,grad_norm: 0.9999991892400154, iteration: 344118
loss: 0.9931257367134094,grad_norm: 0.6180233736844581, iteration: 344119
loss: 1.0149016380310059,grad_norm: 0.8227120440372131, iteration: 344120
loss: 1.0280460119247437,grad_norm: 0.833796749722191, iteration: 344121
loss: 0.9890909790992737,grad_norm: 0.9061275649159555, iteration: 344122
loss: 0.9631195664405823,grad_norm: 0.6963536056385878, iteration: 344123
loss: 1.0560309886932373,grad_norm: 0.9999994467188222, iteration: 344124
loss: 1.0171223878860474,grad_norm: 0.8900510556991996, iteration: 344125
loss: 1.0173553228378296,grad_norm: 0.8491507105849629, iteration: 344126
loss: 1.0045983791351318,grad_norm: 0.792165431087187, iteration: 344127
loss: 0.981120765209198,grad_norm: 0.7683557596876838, iteration: 344128
loss: 1.004143238067627,grad_norm: 0.7614853403189537, iteration: 344129
loss: 0.9836339950561523,grad_norm: 0.7639320486252481, iteration: 344130
loss: 0.9774196743965149,grad_norm: 0.7060778940973779, iteration: 344131
loss: 0.9824112057685852,grad_norm: 0.8334060077085181, iteration: 344132
loss: 0.9630577564239502,grad_norm: 0.7742259337573358, iteration: 344133
loss: 1.0227844715118408,grad_norm: 0.9999991077248924, iteration: 344134
loss: 0.9639741778373718,grad_norm: 0.8139801710011058, iteration: 344135
loss: 1.0100222826004028,grad_norm: 0.9880215017501064, iteration: 344136
loss: 0.9834592938423157,grad_norm: 0.999999023433028, iteration: 344137
loss: 0.9865049123764038,grad_norm: 0.9999991873466497, iteration: 344138
loss: 1.0509151220321655,grad_norm: 0.8750843165867618, iteration: 344139
loss: 1.0022937059402466,grad_norm: 0.7785923107861378, iteration: 344140
loss: 1.0231263637542725,grad_norm: 0.9947833072166408, iteration: 344141
loss: 0.9813655614852905,grad_norm: 0.9999991329941431, iteration: 344142
loss: 0.991544783115387,grad_norm: 0.8463986286158879, iteration: 344143
loss: 1.036529541015625,grad_norm: 0.7204130190594016, iteration: 344144
loss: 0.9730002880096436,grad_norm: 0.7020264353272698, iteration: 344145
loss: 0.9700968265533447,grad_norm: 0.8784289280924941, iteration: 344146
loss: 1.090282917022705,grad_norm: 0.9692247324294403, iteration: 344147
loss: 1.0355241298675537,grad_norm: 0.9957081208283913, iteration: 344148
loss: 0.9778001308441162,grad_norm: 0.9037445332307075, iteration: 344149
loss: 0.9774218201637268,grad_norm: 0.887330516854962, iteration: 344150
loss: 1.0129899978637695,grad_norm: 0.7681430062261987, iteration: 344151
loss: 0.9979273676872253,grad_norm: 0.742510280885125, iteration: 344152
loss: 0.981268048286438,grad_norm: 0.7021451899654787, iteration: 344153
loss: 1.0129939317703247,grad_norm: 0.6954597705543394, iteration: 344154
loss: 0.9849250912666321,grad_norm: 0.8318205021930722, iteration: 344155
loss: 0.9789706468582153,grad_norm: 0.8268291717916422, iteration: 344156
loss: 0.9779688715934753,grad_norm: 0.9513098221390112, iteration: 344157
loss: 1.0386850833892822,grad_norm: 0.9768015856546035, iteration: 344158
loss: 1.0199834108352661,grad_norm: 0.8471553116556354, iteration: 344159
loss: 0.9802482724189758,grad_norm: 0.7776558027505706, iteration: 344160
loss: 1.0258618593215942,grad_norm: 0.8865636299726339, iteration: 344161
loss: 1.0371397733688354,grad_norm: 0.9999998292832201, iteration: 344162
loss: 0.9951037168502808,grad_norm: 0.7869527506189615, iteration: 344163
loss: 1.018856406211853,grad_norm: 0.9618426752580539, iteration: 344164
loss: 1.09644615650177,grad_norm: 0.9999991463521756, iteration: 344165
loss: 0.962902843952179,grad_norm: 0.7655760213038999, iteration: 344166
loss: 1.0422395467758179,grad_norm: 0.9150036058280773, iteration: 344167
loss: 1.0162250995635986,grad_norm: 0.9999996070829209, iteration: 344168
loss: 0.9695479869842529,grad_norm: 0.8321631838721293, iteration: 344169
loss: 1.0861090421676636,grad_norm: 0.9544446223918966, iteration: 344170
loss: 1.0152827501296997,grad_norm: 0.9058311006081642, iteration: 344171
loss: 0.9882882237434387,grad_norm: 0.8013675678266, iteration: 344172
loss: 1.0110501050949097,grad_norm: 0.7917424402388126, iteration: 344173
loss: 1.0253227949142456,grad_norm: 0.9196428659204937, iteration: 344174
loss: 1.0011889934539795,grad_norm: 0.8727212840406013, iteration: 344175
loss: 1.013606309890747,grad_norm: 0.8849505212298692, iteration: 344176
loss: 0.967379093170166,grad_norm: 0.8254808630808402, iteration: 344177
loss: 1.0115708112716675,grad_norm: 0.7520751611233755, iteration: 344178
loss: 1.002310037612915,grad_norm: 0.9004083794037258, iteration: 344179
loss: 0.9877628087997437,grad_norm: 0.6756337792194863, iteration: 344180
loss: 0.9818187952041626,grad_norm: 0.7294157880205361, iteration: 344181
loss: 0.9843240976333618,grad_norm: 0.9999990763579827, iteration: 344182
loss: 1.024240493774414,grad_norm: 0.9999991335011789, iteration: 344183
loss: 1.0074111223220825,grad_norm: 0.9755227763490553, iteration: 344184
loss: 0.9994474649429321,grad_norm: 0.7439323679758435, iteration: 344185
loss: 1.0880035161972046,grad_norm: 0.9999998261299599, iteration: 344186
loss: 0.983192503452301,grad_norm: 0.9566154611601637, iteration: 344187
loss: 0.996700644493103,grad_norm: 0.7327024209878141, iteration: 344188
loss: 0.9954198002815247,grad_norm: 0.8612995755088676, iteration: 344189
loss: 0.9779476523399353,grad_norm: 0.7530682115404845, iteration: 344190
loss: 1.0239380598068237,grad_norm: 0.8069513317009874, iteration: 344191
loss: 1.0118498802185059,grad_norm: 0.7697095921247344, iteration: 344192
loss: 1.0389587879180908,grad_norm: 0.9999995772973506, iteration: 344193
loss: 1.0060791969299316,grad_norm: 0.9030883854447734, iteration: 344194
loss: 1.0391254425048828,grad_norm: 0.8816465890844852, iteration: 344195
loss: 1.0369527339935303,grad_norm: 0.8161873283240468, iteration: 344196
loss: 0.9412978887557983,grad_norm: 0.8186943455753325, iteration: 344197
loss: 1.029373049736023,grad_norm: 0.9999998936413024, iteration: 344198
loss: 0.9796336889266968,grad_norm: 0.9090855273794266, iteration: 344199
loss: 1.0459210872650146,grad_norm: 0.8395195281160901, iteration: 344200
loss: 0.973541259765625,grad_norm: 0.7511127867886529, iteration: 344201
loss: 0.9761698842048645,grad_norm: 0.8771924830017283, iteration: 344202
loss: 0.9975816011428833,grad_norm: 0.873631105422974, iteration: 344203
loss: 1.0376091003417969,grad_norm: 0.7928798308408994, iteration: 344204
loss: 1.0192161798477173,grad_norm: 0.8461839898772648, iteration: 344205
loss: 1.0006824731826782,grad_norm: 0.6597513961385766, iteration: 344206
loss: 1.1470158100128174,grad_norm: 0.9999997075041266, iteration: 344207
loss: 1.0335333347320557,grad_norm: 0.716755754521033, iteration: 344208
loss: 1.0051363706588745,grad_norm: 0.8481167392446732, iteration: 344209
loss: 1.062782883644104,grad_norm: 0.9999990899183853, iteration: 344210
loss: 1.00080144405365,grad_norm: 0.8485498012631654, iteration: 344211
loss: 0.9917604327201843,grad_norm: 0.7503428789419144, iteration: 344212
loss: 1.0136256217956543,grad_norm: 0.9897948956421644, iteration: 344213
loss: 0.9834319353103638,grad_norm: 0.8058230930366242, iteration: 344214
loss: 1.0139460563659668,grad_norm: 0.7878865198862756, iteration: 344215
loss: 1.0005247592926025,grad_norm: 0.8264184077365526, iteration: 344216
loss: 0.9407813549041748,grad_norm: 0.9999990895397374, iteration: 344217
loss: 1.0225694179534912,grad_norm: 0.8821251456790705, iteration: 344218
loss: 1.0245306491851807,grad_norm: 0.7238325721726345, iteration: 344219
loss: 0.9946998357772827,grad_norm: 0.9667460124393406, iteration: 344220
loss: 0.9734246134757996,grad_norm: 0.8571800701152552, iteration: 344221
loss: 0.9944493174552917,grad_norm: 0.8385088452108344, iteration: 344222
loss: 0.9872496724128723,grad_norm: 0.7927473827546304, iteration: 344223
loss: 1.0164345502853394,grad_norm: 0.7416679507660855, iteration: 344224
loss: 1.0470939874649048,grad_norm: 0.9317807092188253, iteration: 344225
loss: 1.003602385520935,grad_norm: 0.9999991941320553, iteration: 344226
loss: 0.9881604313850403,grad_norm: 0.7496634354145993, iteration: 344227
loss: 0.9711158275604248,grad_norm: 0.8147748924686388, iteration: 344228
loss: 1.1275749206542969,grad_norm: 0.9999998415865399, iteration: 344229
loss: 0.9977685809135437,grad_norm: 0.7498060195236014, iteration: 344230
loss: 0.9778326153755188,grad_norm: 0.8944768441391239, iteration: 344231
loss: 0.9986699819564819,grad_norm: 0.8509789910602469, iteration: 344232
loss: 0.9849647283554077,grad_norm: 0.7547615978405816, iteration: 344233
loss: 0.9967526793479919,grad_norm: 0.6788445290021587, iteration: 344234
loss: 1.0069196224212646,grad_norm: 0.7528909133775346, iteration: 344235
loss: 1.0042396783828735,grad_norm: 0.9999992388971168, iteration: 344236
loss: 1.0515637397766113,grad_norm: 0.78479099149484, iteration: 344237
loss: 1.0037882328033447,grad_norm: 0.8658683777478056, iteration: 344238
loss: 1.0111786127090454,grad_norm: 0.8645141530166196, iteration: 344239
loss: 1.0587339401245117,grad_norm: 0.9999999944470042, iteration: 344240
loss: 0.9966325163841248,grad_norm: 0.7516643968349649, iteration: 344241
loss: 1.0423861742019653,grad_norm: 0.9320150374292501, iteration: 344242
loss: 1.003191351890564,grad_norm: 0.9999990332279886, iteration: 344243
loss: 0.9901797771453857,grad_norm: 0.7700930068907149, iteration: 344244
loss: 1.031949520111084,grad_norm: 0.7891184482958927, iteration: 344245
loss: 0.9884663224220276,grad_norm: 0.7388394889654066, iteration: 344246
loss: 1.0053893327713013,grad_norm: 0.999999258408318, iteration: 344247
loss: 1.0040326118469238,grad_norm: 0.6782147010226058, iteration: 344248
loss: 0.9912241697311401,grad_norm: 0.8377764770351788, iteration: 344249
loss: 1.014180064201355,grad_norm: 0.7894639122432665, iteration: 344250
loss: 1.0020238161087036,grad_norm: 0.8290167403826487, iteration: 344251
loss: 0.9771948456764221,grad_norm: 0.808276199344541, iteration: 344252
loss: 1.0047286748886108,grad_norm: 0.7188555819294311, iteration: 344253
loss: 1.0279006958007812,grad_norm: 0.9688653495941993, iteration: 344254
loss: 1.0624825954437256,grad_norm: 0.9999998763770973, iteration: 344255
loss: 0.9910896420478821,grad_norm: 0.8719675232497539, iteration: 344256
loss: 1.0249550342559814,grad_norm: 0.8703611711865269, iteration: 344257
loss: 1.0182223320007324,grad_norm: 0.9148877837027766, iteration: 344258
loss: 0.9900314211845398,grad_norm: 0.7713159480867815, iteration: 344259
loss: 0.977115273475647,grad_norm: 0.7342834671825139, iteration: 344260
loss: 1.0493483543395996,grad_norm: 0.9999998648670076, iteration: 344261
loss: 0.9985520839691162,grad_norm: 0.7947676578184247, iteration: 344262
loss: 0.9901096224784851,grad_norm: 0.7879574784651828, iteration: 344263
loss: 0.9980379343032837,grad_norm: 0.8329592798298403, iteration: 344264
loss: 0.9894883632659912,grad_norm: 0.8342908477403377, iteration: 344265
loss: 1.0275253057479858,grad_norm: 0.7409473219101994, iteration: 344266
loss: 1.015124797821045,grad_norm: 0.9929542386376388, iteration: 344267
loss: 1.046364188194275,grad_norm: 0.7334618263918837, iteration: 344268
loss: 0.9859039783477783,grad_norm: 0.7478822675298654, iteration: 344269
loss: 1.0322895050048828,grad_norm: 0.8929462366315738, iteration: 344270
loss: 0.9960176944732666,grad_norm: 0.897760223847517, iteration: 344271
loss: 1.0159069299697876,grad_norm: 0.8833389418065983, iteration: 344272
loss: 0.9968104362487793,grad_norm: 0.8918997078085145, iteration: 344273
loss: 0.9796925187110901,grad_norm: 0.7952613366409632, iteration: 344274
loss: 0.9967828989028931,grad_norm: 0.9999989750526668, iteration: 344275
loss: 0.9981195330619812,grad_norm: 0.9999995917874003, iteration: 344276
loss: 1.008338451385498,grad_norm: 0.685955097096111, iteration: 344277
loss: 0.9843326210975647,grad_norm: 0.7971508092600611, iteration: 344278
loss: 0.9898843169212341,grad_norm: 0.8621629569620801, iteration: 344279
loss: 1.0453557968139648,grad_norm: 0.9999998861112869, iteration: 344280
loss: 0.9818214178085327,grad_norm: 0.7235412551716247, iteration: 344281
loss: 1.0186398029327393,grad_norm: 0.8574836630499636, iteration: 344282
loss: 0.9787901043891907,grad_norm: 0.93744239004162, iteration: 344283
loss: 1.0791624784469604,grad_norm: 0.9999991363846431, iteration: 344284
loss: 0.990604817867279,grad_norm: 0.8673549038393601, iteration: 344285
loss: 0.9795233011245728,grad_norm: 0.7936989070716624, iteration: 344286
loss: 1.0062443017959595,grad_norm: 0.8474004171708909, iteration: 344287
loss: 0.969142496585846,grad_norm: 0.8523593052104445, iteration: 344288
loss: 1.0418436527252197,grad_norm: 0.8944134895214815, iteration: 344289
loss: 1.0142617225646973,grad_norm: 0.9999990195653619, iteration: 344290
loss: 0.993421733379364,grad_norm: 0.7547570788805995, iteration: 344291
loss: 0.9857833981513977,grad_norm: 0.9451334694360984, iteration: 344292
loss: 1.003551959991455,grad_norm: 0.8248820098665688, iteration: 344293
loss: 1.0130293369293213,grad_norm: 0.8974918138938679, iteration: 344294
loss: 0.9886490106582642,grad_norm: 0.7878408239932972, iteration: 344295
loss: 0.977149248123169,grad_norm: 0.9999995467576692, iteration: 344296
loss: 1.1290408372879028,grad_norm: 0.9999996406648841, iteration: 344297
loss: 0.991671621799469,grad_norm: 0.8390772671047062, iteration: 344298
loss: 0.9938883185386658,grad_norm: 0.9502588433846366, iteration: 344299
loss: 1.0011168718338013,grad_norm: 0.7815643520467996, iteration: 344300
loss: 0.9586132168769836,grad_norm: 0.786900330678391, iteration: 344301
loss: 1.0255287885665894,grad_norm: 0.8817333989001443, iteration: 344302
loss: 0.9959713220596313,grad_norm: 0.8283697824719882, iteration: 344303
loss: 1.0311689376831055,grad_norm: 0.8309786845712763, iteration: 344304
loss: 0.9928408861160278,grad_norm: 0.793041569496428, iteration: 344305
loss: 0.9899093508720398,grad_norm: 0.9656817204280664, iteration: 344306
loss: 1.0426069498062134,grad_norm: 0.9999990610372417, iteration: 344307
loss: 1.013137936592102,grad_norm: 0.7358557788188665, iteration: 344308
loss: 0.9802427887916565,grad_norm: 0.7765907914696074, iteration: 344309
loss: 1.00655996799469,grad_norm: 0.9999992956726598, iteration: 344310
loss: 1.0151180028915405,grad_norm: 0.9090211287374026, iteration: 344311
loss: 0.9988999366760254,grad_norm: 0.7850405189848562, iteration: 344312
loss: 1.0014210939407349,grad_norm: 0.8896352004840136, iteration: 344313
loss: 1.002996802330017,grad_norm: 0.778697135896737, iteration: 344314
loss: 1.000827431678772,grad_norm: 0.9999989926961227, iteration: 344315
loss: 1.0026456117630005,grad_norm: 0.999999112700703, iteration: 344316
loss: 0.9974476099014282,grad_norm: 0.8944599207972658, iteration: 344317
loss: 1.0347238779067993,grad_norm: 0.9861132546280161, iteration: 344318
loss: 0.9557464122772217,grad_norm: 0.8930347471898493, iteration: 344319
loss: 0.9875651001930237,grad_norm: 0.8441429967147914, iteration: 344320
loss: 1.0197352170944214,grad_norm: 0.847984197005547, iteration: 344321
loss: 1.017295241355896,grad_norm: 0.7869418893059186, iteration: 344322
loss: 0.9984912872314453,grad_norm: 0.7346395734204172, iteration: 344323
loss: 0.9721688628196716,grad_norm: 0.8566685968238192, iteration: 344324
loss: 0.9954251050949097,grad_norm: 0.7839068125397429, iteration: 344325
loss: 1.010876178741455,grad_norm: 0.8306157064631352, iteration: 344326
loss: 0.9520143270492554,grad_norm: 0.7383280799684363, iteration: 344327
loss: 1.0779443979263306,grad_norm: 0.999999474037968, iteration: 344328
loss: 1.1041474342346191,grad_norm: 0.8432423258546404, iteration: 344329
loss: 1.1187176704406738,grad_norm: 0.9999991414098096, iteration: 344330
loss: 0.9919816255569458,grad_norm: 0.8918819965816107, iteration: 344331
loss: 1.031605839729309,grad_norm: 0.9999999703563861, iteration: 344332
loss: 1.0108821392059326,grad_norm: 0.7205003608072726, iteration: 344333
loss: 1.0075041055679321,grad_norm: 0.9078777244789631, iteration: 344334
loss: 1.0152347087860107,grad_norm: 0.9431943307704846, iteration: 344335
loss: 1.0041745901107788,grad_norm: 0.8640398367202134, iteration: 344336
loss: 0.9668917059898376,grad_norm: 0.9011317707536214, iteration: 344337
loss: 1.1998786926269531,grad_norm: 0.9999997925828622, iteration: 344338
loss: 0.9847288727760315,grad_norm: 0.8532912859382524, iteration: 344339
loss: 0.9963743686676025,grad_norm: 0.7188661154365935, iteration: 344340
loss: 1.017765998840332,grad_norm: 0.816161164174513, iteration: 344341
loss: 1.00568425655365,grad_norm: 0.8768171523501324, iteration: 344342
loss: 1.0078281164169312,grad_norm: 0.7044608316443276, iteration: 344343
loss: 1.209457278251648,grad_norm: 0.9999997983907581, iteration: 344344
loss: 0.9987562298774719,grad_norm: 0.8987645946053783, iteration: 344345
loss: 1.0135095119476318,grad_norm: 0.9504501379539696, iteration: 344346
loss: 1.0645955801010132,grad_norm: 0.7790761006713602, iteration: 344347
loss: 0.9604634046554565,grad_norm: 0.6334159917030145, iteration: 344348
loss: 1.0101817846298218,grad_norm: 0.9999990400411657, iteration: 344349
loss: 0.9877943992614746,grad_norm: 0.7550297437672093, iteration: 344350
loss: 1.0015885829925537,grad_norm: 0.8302023627658885, iteration: 344351
loss: 1.0228599309921265,grad_norm: 1.0000000508732638, iteration: 344352
loss: 0.9911593198776245,grad_norm: 0.7420441279862475, iteration: 344353
loss: 1.0005958080291748,grad_norm: 0.969120051635582, iteration: 344354
loss: 1.0621978044509888,grad_norm: 0.9999999134632888, iteration: 344355
loss: 1.044109582901001,grad_norm: 0.9999992042317667, iteration: 344356
loss: 1.0073968172073364,grad_norm: 0.9999999017164676, iteration: 344357
loss: 0.977776050567627,grad_norm: 0.7050987232316416, iteration: 344358
loss: 1.003391146659851,grad_norm: 0.7132049330203835, iteration: 344359
loss: 1.0088930130004883,grad_norm: 0.7384095258265131, iteration: 344360
loss: 1.0229939222335815,grad_norm: 0.9927919733639387, iteration: 344361
loss: 0.9709375500679016,grad_norm: 0.8786707597315027, iteration: 344362
loss: 0.9835948944091797,grad_norm: 0.7594949229282733, iteration: 344363
loss: 1.0563112497329712,grad_norm: 0.9604865689103156, iteration: 344364
loss: 0.9941049814224243,grad_norm: 0.7930725468377925, iteration: 344365
loss: 1.0057475566864014,grad_norm: 0.8157634174802683, iteration: 344366
loss: 0.9887286424636841,grad_norm: 0.6754922309522067, iteration: 344367
loss: 1.0043655633926392,grad_norm: 0.9999997168051974, iteration: 344368
loss: 1.0366400480270386,grad_norm: 0.9999995170741134, iteration: 344369
loss: 1.0021171569824219,grad_norm: 0.7719622074923216, iteration: 344370
loss: 1.0240031480789185,grad_norm: 0.7966834650721569, iteration: 344371
loss: 1.0231592655181885,grad_norm: 0.9836633099064814, iteration: 344372
loss: 0.993079662322998,grad_norm: 0.9999991194946984, iteration: 344373
loss: 1.0344592332839966,grad_norm: 0.9999991299659817, iteration: 344374
loss: 0.9671051502227783,grad_norm: 0.8341717535157737, iteration: 344375
loss: 0.9830829501152039,grad_norm: 0.82543684691946, iteration: 344376
loss: 1.0048449039459229,grad_norm: 0.9782120543356279, iteration: 344377
loss: 0.984982430934906,grad_norm: 0.69830109190307, iteration: 344378
loss: 1.0065165758132935,grad_norm: 0.9999995549362418, iteration: 344379
loss: 1.0703024864196777,grad_norm: 0.999999567214451, iteration: 344380
loss: 1.0826925039291382,grad_norm: 0.9999992190875351, iteration: 344381
loss: 1.0661616325378418,grad_norm: 0.8368913379926304, iteration: 344382
loss: 0.9965842366218567,grad_norm: 0.7789484424045718, iteration: 344383
loss: 1.009740948677063,grad_norm: 0.7277649639411773, iteration: 344384
loss: 0.9971709847450256,grad_norm: 0.8993279636663, iteration: 344385
loss: 1.1020822525024414,grad_norm: 0.9999996565452581, iteration: 344386
loss: 0.9951516389846802,grad_norm: 0.999999122960996, iteration: 344387
loss: 1.0218515396118164,grad_norm: 0.8151855000016451, iteration: 344388
loss: 0.9851134419441223,grad_norm: 0.7265867372637701, iteration: 344389
loss: 1.007175087928772,grad_norm: 0.8283506828820436, iteration: 344390
loss: 1.0006383657455444,grad_norm: 0.8065517933777651, iteration: 344391
loss: 0.9806848168373108,grad_norm: 0.9526587298212646, iteration: 344392
loss: 1.0110201835632324,grad_norm: 0.806747226619196, iteration: 344393
loss: 0.958702564239502,grad_norm: 0.6958744966390303, iteration: 344394
loss: 0.9916583299636841,grad_norm: 0.9398319191929643, iteration: 344395
loss: 0.9932115077972412,grad_norm: 0.996195270307888, iteration: 344396
loss: 0.9616608023643494,grad_norm: 0.7856330025147027, iteration: 344397
loss: 1.0183826684951782,grad_norm: 0.9999995403640006, iteration: 344398
loss: 1.0055954456329346,grad_norm: 0.7227415663970599, iteration: 344399
loss: 0.9883019924163818,grad_norm: 0.7371709267150517, iteration: 344400
loss: 1.007318139076233,grad_norm: 0.6867696179124914, iteration: 344401
loss: 1.0897395610809326,grad_norm: 0.9999995093021645, iteration: 344402
loss: 1.0069751739501953,grad_norm: 0.8232632840259655, iteration: 344403
loss: 0.9909716844558716,grad_norm: 0.9999994316616289, iteration: 344404
loss: 1.1290388107299805,grad_norm: 0.999999524113601, iteration: 344405
loss: 1.038430094718933,grad_norm: 0.8962401705276373, iteration: 344406
loss: 1.1143330335617065,grad_norm: 0.9264331868423995, iteration: 344407
loss: 0.996656060218811,grad_norm: 0.9839713630194019, iteration: 344408
loss: 0.980436384677887,grad_norm: 0.8524718204050612, iteration: 344409
loss: 1.0342191457748413,grad_norm: 0.8923097721658829, iteration: 344410
loss: 1.2765700817108154,grad_norm: 0.9999993539251818, iteration: 344411
loss: 1.0002765655517578,grad_norm: 0.9392572688462791, iteration: 344412
loss: 1.1256647109985352,grad_norm: 0.9999994993616549, iteration: 344413
loss: 1.0051524639129639,grad_norm: 0.7998867222014393, iteration: 344414
loss: 1.0731531381607056,grad_norm: 0.9999994703429032, iteration: 344415
loss: 0.9824210405349731,grad_norm: 0.784305636752447, iteration: 344416
loss: 1.0213505029678345,grad_norm: 0.9367373042879182, iteration: 344417
loss: 1.02345871925354,grad_norm: 0.8728355444261119, iteration: 344418
loss: 0.998014509677887,grad_norm: 0.852874004972697, iteration: 344419
loss: 1.0215022563934326,grad_norm: 0.9983401463529572, iteration: 344420
loss: 0.9746277332305908,grad_norm: 0.7166048440319356, iteration: 344421
loss: 1.0362337827682495,grad_norm: 0.8123678063794328, iteration: 344422
loss: 0.9922682046890259,grad_norm: 0.6516887482257557, iteration: 344423
loss: 1.1453742980957031,grad_norm: 0.9047165970104881, iteration: 344424
loss: 1.020445704460144,grad_norm: 0.6622310889507518, iteration: 344425
loss: 1.0024142265319824,grad_norm: 0.8627757422038645, iteration: 344426
loss: 1.0024627447128296,grad_norm: 0.9995642823776958, iteration: 344427
loss: 1.0164828300476074,grad_norm: 0.9999993174548351, iteration: 344428
loss: 1.0043739080429077,grad_norm: 0.8331007615816415, iteration: 344429
loss: 1.0058015584945679,grad_norm: 0.6877380707949501, iteration: 344430
loss: 1.0133576393127441,grad_norm: 0.821848936622378, iteration: 344431
loss: 1.0827593803405762,grad_norm: 0.9811494444570955, iteration: 344432
loss: 1.0135700702667236,grad_norm: 0.8220629222745445, iteration: 344433
loss: 0.9967515468597412,grad_norm: 0.8630102397136695, iteration: 344434
loss: 1.0120131969451904,grad_norm: 0.8325008022153704, iteration: 344435
loss: 1.0404413938522339,grad_norm: 0.9999995012653672, iteration: 344436
loss: 1.0819077491760254,grad_norm: 0.9804375870761394, iteration: 344437
loss: 1.0174533128738403,grad_norm: 0.7760644895320741, iteration: 344438
loss: 1.005774736404419,grad_norm: 0.7814161079643263, iteration: 344439
loss: 1.0455964803695679,grad_norm: 0.8679108660516495, iteration: 344440
loss: 0.974786639213562,grad_norm: 0.7760776223522093, iteration: 344441
loss: 0.9612730145454407,grad_norm: 0.885535576486506, iteration: 344442
loss: 0.9494954347610474,grad_norm: 0.8065704331122784, iteration: 344443
loss: 1.1207756996154785,grad_norm: 0.9999990294357527, iteration: 344444
loss: 1.0316890478134155,grad_norm: 0.8845214773796848, iteration: 344445
loss: 1.0166138410568237,grad_norm: 0.9999990986063044, iteration: 344446
loss: 1.0124335289001465,grad_norm: 0.7783795218826534, iteration: 344447
loss: 0.9626808166503906,grad_norm: 0.9999990202419371, iteration: 344448
loss: 1.0539730787277222,grad_norm: 0.999999120678973, iteration: 344449
loss: 0.9935523867607117,grad_norm: 0.8785085989312028, iteration: 344450
loss: 0.984911322593689,grad_norm: 0.8279928639848816, iteration: 344451
loss: 1.0432709455490112,grad_norm: 0.8010397729524661, iteration: 344452
loss: 1.034696340560913,grad_norm: 0.9235438628763134, iteration: 344453
loss: 1.0198169946670532,grad_norm: 0.7983864005115926, iteration: 344454
loss: 1.0535165071487427,grad_norm: 0.9999991131727762, iteration: 344455
loss: 0.9791266322135925,grad_norm: 0.9026302891901961, iteration: 344456
loss: 1.016313076019287,grad_norm: 0.9999991076344763, iteration: 344457
loss: 1.070236086845398,grad_norm: 0.9999999303744618, iteration: 344458
loss: 1.0676963329315186,grad_norm: 0.9999992217907775, iteration: 344459
loss: 1.053175687789917,grad_norm: 0.9999992612357367, iteration: 344460
loss: 1.1610270738601685,grad_norm: 0.9999995993895857, iteration: 344461
loss: 1.0088977813720703,grad_norm: 0.9999991939361353, iteration: 344462
loss: 1.0261651277542114,grad_norm: 0.9999997301973388, iteration: 344463
loss: 0.9878446459770203,grad_norm: 0.7005661946898286, iteration: 344464
loss: 1.0644094944000244,grad_norm: 0.7713735005364474, iteration: 344465
loss: 1.136148452758789,grad_norm: 0.9999997113911264, iteration: 344466
loss: 1.017653465270996,grad_norm: 0.8015190564211522, iteration: 344467
loss: 1.0066436529159546,grad_norm: 0.7435930801347111, iteration: 344468
loss: 0.9919151663780212,grad_norm: 0.8373527600220415, iteration: 344469
loss: 1.0053788423538208,grad_norm: 0.9999990690379569, iteration: 344470
loss: 1.0154134035110474,grad_norm: 0.6459761499475374, iteration: 344471
loss: 0.975859522819519,grad_norm: 0.910772681554903, iteration: 344472
loss: 1.0146914720535278,grad_norm: 0.943424971608501, iteration: 344473
loss: 0.9989724159240723,grad_norm: 0.8004186048437494, iteration: 344474
loss: 1.007278561592102,grad_norm: 0.6198033402379087, iteration: 344475
loss: 1.0134021043777466,grad_norm: 0.8059649132111646, iteration: 344476
loss: 1.0127400159835815,grad_norm: 0.7425216161962088, iteration: 344477
loss: 1.0985870361328125,grad_norm: 0.8934451639243643, iteration: 344478
loss: 1.0172017812728882,grad_norm: 0.924235612333555, iteration: 344479
loss: 0.9825252294540405,grad_norm: 0.895764671024261, iteration: 344480
loss: 0.9925010204315186,grad_norm: 0.8909992404782311, iteration: 344481
loss: 1.037106990814209,grad_norm: 0.999999664173876, iteration: 344482
loss: 1.0314910411834717,grad_norm: 0.8361030906865292, iteration: 344483
loss: 0.9703145027160645,grad_norm: 0.7842500934672795, iteration: 344484
loss: 0.9866390824317932,grad_norm: 0.7696651126784815, iteration: 344485
loss: 1.0350369215011597,grad_norm: 0.999999613650977, iteration: 344486
loss: 0.98419189453125,grad_norm: 0.7995883440682787, iteration: 344487
loss: 1.0258017778396606,grad_norm: 0.9952279438199177, iteration: 344488
loss: 1.0392135381698608,grad_norm: 0.8049888811724428, iteration: 344489
loss: 1.017592430114746,grad_norm: 0.6864921749885938, iteration: 344490
loss: 0.989756166934967,grad_norm: 0.8542928059288191, iteration: 344491
loss: 1.0212918519973755,grad_norm: 0.9999996263634495, iteration: 344492
loss: 0.9766684174537659,grad_norm: 0.8077849339390393, iteration: 344493
loss: 0.9995115995407104,grad_norm: 0.999999481431693, iteration: 344494
loss: 1.2189911603927612,grad_norm: 1.0000000389043138, iteration: 344495
loss: 0.9639067053794861,grad_norm: 0.7650023738085282, iteration: 344496
loss: 1.011330008506775,grad_norm: 0.6652957352213169, iteration: 344497
loss: 1.0226831436157227,grad_norm: 0.9999991872374017, iteration: 344498
loss: 1.0211092233657837,grad_norm: 0.8193324049081985, iteration: 344499
loss: 0.98360276222229,grad_norm: 0.9999989848974185, iteration: 344500
loss: 1.0013535022735596,grad_norm: 0.8861032395754002, iteration: 344501
loss: 1.0079925060272217,grad_norm: 0.8872127620717184, iteration: 344502
loss: 0.9969432353973389,grad_norm: 0.7356288121054332, iteration: 344503
loss: 0.9732828140258789,grad_norm: 0.822946991151811, iteration: 344504
loss: 1.012527346611023,grad_norm: 0.9030810492275887, iteration: 344505
loss: 0.9832290410995483,grad_norm: 0.7911005484938938, iteration: 344506
loss: 0.9942210912704468,grad_norm: 0.7381744032123397, iteration: 344507
loss: 0.9871498346328735,grad_norm: 0.717752311035127, iteration: 344508
loss: 1.0139491558074951,grad_norm: 0.9083501125047491, iteration: 344509
loss: 1.009155035018921,grad_norm: 0.7648962481136204, iteration: 344510
loss: 0.9620542526245117,grad_norm: 0.6774427259142946, iteration: 344511
loss: 1.0925135612487793,grad_norm: 0.9999996819762743, iteration: 344512
loss: 1.0105690956115723,grad_norm: 0.7508407993427952, iteration: 344513
loss: 1.0019482374191284,grad_norm: 0.87681565782987, iteration: 344514
loss: 1.029181718826294,grad_norm: 0.9460801586029184, iteration: 344515
loss: 1.008284330368042,grad_norm: 0.8934521476118782, iteration: 344516
loss: 1.0013625621795654,grad_norm: 0.8263351302745683, iteration: 344517
loss: 0.9927425980567932,grad_norm: 0.8635133926831892, iteration: 344518
loss: 0.9921219944953918,grad_norm: 0.8888253208258282, iteration: 344519
loss: 1.04812753200531,grad_norm: 0.9519342409639776, iteration: 344520
loss: 0.9582536220550537,grad_norm: 0.7885498440073909, iteration: 344521
loss: 1.0027763843536377,grad_norm: 0.8380792272599283, iteration: 344522
loss: 1.0436261892318726,grad_norm: 0.916016150332618, iteration: 344523
loss: 1.0029375553131104,grad_norm: 0.9061653779151421, iteration: 344524
loss: 0.9894012808799744,grad_norm: 0.7632129781470458, iteration: 344525
loss: 0.9947658181190491,grad_norm: 0.9492265051719383, iteration: 344526
loss: 1.019425392150879,grad_norm: 0.7003677521726346, iteration: 344527
loss: 0.9816439151763916,grad_norm: 0.6765274829754083, iteration: 344528
loss: 1.0917696952819824,grad_norm: 0.9999999577222128, iteration: 344529
loss: 1.0091400146484375,grad_norm: 0.8992036183478188, iteration: 344530
loss: 1.006890058517456,grad_norm: 0.9999991005313241, iteration: 344531
loss: 0.9672436714172363,grad_norm: 0.9999994133921829, iteration: 344532
loss: 1.0005016326904297,grad_norm: 0.7189139308861106, iteration: 344533
loss: 1.026901364326477,grad_norm: 0.8036531232602031, iteration: 344534
loss: 1.001011848449707,grad_norm: 0.9999998559075571, iteration: 344535
loss: 0.9977702498435974,grad_norm: 0.8804197358492727, iteration: 344536
loss: 1.065340518951416,grad_norm: 0.8900059796425043, iteration: 344537
loss: 1.0087685585021973,grad_norm: 0.7903448902951828, iteration: 344538
loss: 1.0269887447357178,grad_norm: 0.8064793123285598, iteration: 344539
loss: 0.9867066144943237,grad_norm: 0.7527276627046051, iteration: 344540
loss: 1.057162880897522,grad_norm: 0.8314554730033141, iteration: 344541
loss: 1.0450724363327026,grad_norm: 0.912257939650886, iteration: 344542
loss: 1.020019292831421,grad_norm: 0.9999996402231556, iteration: 344543
loss: 1.0811740159988403,grad_norm: 0.9999998803696427, iteration: 344544
loss: 1.1404733657836914,grad_norm: 0.999999320426122, iteration: 344545
loss: 1.0338855981826782,grad_norm: 0.7885530163645927, iteration: 344546
loss: 0.9416396021842957,grad_norm: 0.897162262675522, iteration: 344547
loss: 0.9787726998329163,grad_norm: 0.7398991686675221, iteration: 344548
loss: 0.9996612071990967,grad_norm: 0.9759867412479981, iteration: 344549
loss: 0.9930630326271057,grad_norm: 0.8486569857414101, iteration: 344550
loss: 0.9973559975624084,grad_norm: 0.7565874522326204, iteration: 344551
loss: 1.0492337942123413,grad_norm: 0.9999992928793597, iteration: 344552
loss: 0.966160774230957,grad_norm: 0.858153098429873, iteration: 344553
loss: 0.992118239402771,grad_norm: 0.8847418285596336, iteration: 344554
loss: 1.0355204343795776,grad_norm: 0.7605945751486607, iteration: 344555
loss: 0.9693913459777832,grad_norm: 0.8734533651911407, iteration: 344556
loss: 1.0193620920181274,grad_norm: 0.8163388205462319, iteration: 344557
loss: 0.9951077699661255,grad_norm: 0.9999991780237206, iteration: 344558
loss: 1.024641990661621,grad_norm: 0.9999992013855956, iteration: 344559
loss: 0.9771841168403625,grad_norm: 0.7581295933279543, iteration: 344560
loss: 1.0126246213912964,grad_norm: 0.7830610716576875, iteration: 344561
loss: 0.9827468991279602,grad_norm: 0.7470197195603457, iteration: 344562
loss: 1.0582444667816162,grad_norm: 0.9999991367238789, iteration: 344563
loss: 0.9918689131736755,grad_norm: 0.9999990426646717, iteration: 344564
loss: 1.009854793548584,grad_norm: 0.7301374642888312, iteration: 344565
loss: 0.9851827621459961,grad_norm: 0.7062443726538524, iteration: 344566
loss: 1.0077383518218994,grad_norm: 0.8304343619581247, iteration: 344567
loss: 1.1531349420547485,grad_norm: 0.8545195131805461, iteration: 344568
loss: 1.0346393585205078,grad_norm: 0.8748505690186307, iteration: 344569
loss: 0.9856258630752563,grad_norm: 0.7444834063302854, iteration: 344570
loss: 1.0074777603149414,grad_norm: 0.7984946713888509, iteration: 344571
loss: 1.1270482540130615,grad_norm: 0.9999997451621065, iteration: 344572
loss: 0.9889165759086609,grad_norm: 0.9392438026604213, iteration: 344573
loss: 1.0300606489181519,grad_norm: 0.7997801545400163, iteration: 344574
loss: 0.9902351498603821,grad_norm: 0.9806659738636845, iteration: 344575
loss: 0.9961556196212769,grad_norm: 0.9999991112246105, iteration: 344576
loss: 0.9820090532302856,grad_norm: 0.6318349541962321, iteration: 344577
loss: 1.0085347890853882,grad_norm: 0.7423359734760906, iteration: 344578
loss: 1.006740927696228,grad_norm: 0.7856324653259711, iteration: 344579
loss: 0.9950730204582214,grad_norm: 0.9733926772694148, iteration: 344580
loss: 0.9794222712516785,grad_norm: 0.8648081408488787, iteration: 344581
loss: 0.9844610691070557,grad_norm: 0.6578014187416253, iteration: 344582
loss: 1.0302814245224,grad_norm: 0.7643826429987794, iteration: 344583
loss: 1.023339867591858,grad_norm: 0.826844485142778, iteration: 344584
loss: 0.9689597487449646,grad_norm: 0.9423047730312571, iteration: 344585
loss: 1.0166789293289185,grad_norm: 0.9413815005786684, iteration: 344586
loss: 0.9789631962776184,grad_norm: 0.7948985679712298, iteration: 344587
loss: 1.0234928131103516,grad_norm: 0.7697488099648621, iteration: 344588
loss: 1.0248003005981445,grad_norm: 0.9871232561894513, iteration: 344589
loss: 0.9925873875617981,grad_norm: 0.8173763157749625, iteration: 344590
loss: 1.028936743736267,grad_norm: 0.999999937355869, iteration: 344591
loss: 0.9854623079299927,grad_norm: 0.7112980459702403, iteration: 344592
loss: 0.9809854626655579,grad_norm: 0.8008590048427411, iteration: 344593
loss: 1.005150556564331,grad_norm: 0.9999993119118379, iteration: 344594
loss: 0.9933587908744812,grad_norm: 0.7029114195048795, iteration: 344595
loss: 0.9754458665847778,grad_norm: 0.9999994802061273, iteration: 344596
loss: 1.0273175239562988,grad_norm: 0.8410397036002115, iteration: 344597
loss: 1.007873773574829,grad_norm: 0.9747059975202955, iteration: 344598
loss: 0.971051037311554,grad_norm: 0.8219701614652539, iteration: 344599
loss: 1.0052331686019897,grad_norm: 0.7998785222988192, iteration: 344600
loss: 1.0620307922363281,grad_norm: 0.9287674260181705, iteration: 344601
loss: 1.069930076599121,grad_norm: 0.999999526998457, iteration: 344602
loss: 0.982423722743988,grad_norm: 0.8406678374571166, iteration: 344603
loss: 0.9845080971717834,grad_norm: 0.8030053383554803, iteration: 344604
loss: 1.0859014987945557,grad_norm: 0.7957190503244346, iteration: 344605
loss: 1.0096110105514526,grad_norm: 0.9110232651107775, iteration: 344606
loss: 0.9471974968910217,grad_norm: 0.8620828388352526, iteration: 344607
loss: 1.0733014345169067,grad_norm: 0.8341815775105601, iteration: 344608
loss: 1.050087332725525,grad_norm: 0.904940916619259, iteration: 344609
loss: 0.9930794835090637,grad_norm: 0.6836273630160048, iteration: 344610
loss: 1.0419964790344238,grad_norm: 0.9999991835441276, iteration: 344611
loss: 1.0088279247283936,grad_norm: 0.7349468530369089, iteration: 344612
loss: 0.9910110235214233,grad_norm: 0.7689231636444616, iteration: 344613
loss: 0.9921720623970032,grad_norm: 0.7667095222009975, iteration: 344614
loss: 0.9934399127960205,grad_norm: 0.8995252335110293, iteration: 344615
loss: 0.9919703602790833,grad_norm: 0.705282578992112, iteration: 344616
loss: 1.0686129331588745,grad_norm: 0.9999998544721457, iteration: 344617
loss: 0.9824144840240479,grad_norm: 0.9710609320340458, iteration: 344618
loss: 0.972312867641449,grad_norm: 0.7548779864838253, iteration: 344619
loss: 0.9839811325073242,grad_norm: 0.7608718989913836, iteration: 344620
loss: 1.0469613075256348,grad_norm: 0.9999989972928773, iteration: 344621
loss: 0.991034209728241,grad_norm: 0.9999989881120979, iteration: 344622
loss: 0.9939517378807068,grad_norm: 0.9999993192166171, iteration: 344623
loss: 1.0038325786590576,grad_norm: 0.9999990654325204, iteration: 344624
loss: 1.0097880363464355,grad_norm: 0.9999999254723484, iteration: 344625
loss: 1.0444496870040894,grad_norm: 0.9449477692323752, iteration: 344626
loss: 1.1435500383377075,grad_norm: 0.9999990578383503, iteration: 344627
loss: 1.1122459173202515,grad_norm: 0.9999999108196269, iteration: 344628
loss: 1.050706148147583,grad_norm: 0.9999998223573338, iteration: 344629
loss: 0.965514600276947,grad_norm: 0.9999991481293838, iteration: 344630
loss: 1.0965924263000488,grad_norm: 0.9999989918798875, iteration: 344631
loss: 0.95377516746521,grad_norm: 0.8135036684926268, iteration: 344632
loss: 0.9506019949913025,grad_norm: 0.8719145124539414, iteration: 344633
loss: 1.0278675556182861,grad_norm: 0.8815254209162835, iteration: 344634
loss: 0.9992135763168335,grad_norm: 0.8133083025430248, iteration: 344635
loss: 1.0467337369918823,grad_norm: 0.8177295060011758, iteration: 344636
loss: 1.0260968208312988,grad_norm: 0.9872361140186477, iteration: 344637
loss: 1.0169960260391235,grad_norm: 0.9999990826705336, iteration: 344638
loss: 1.0594451427459717,grad_norm: 0.7704147790954552, iteration: 344639
loss: 0.9772432446479797,grad_norm: 0.9937845770506804, iteration: 344640
loss: 1.0367374420166016,grad_norm: 0.6590226171263682, iteration: 344641
loss: 1.0149856805801392,grad_norm: 0.8131089621223586, iteration: 344642
loss: 0.9707273244857788,grad_norm: 0.8882052507766817, iteration: 344643
loss: 0.9812962412834167,grad_norm: 0.7258557626946998, iteration: 344644
loss: 0.9867317080497742,grad_norm: 0.830139256515164, iteration: 344645
loss: 1.0961579084396362,grad_norm: 0.999999525532052, iteration: 344646
loss: 0.9621298909187317,grad_norm: 0.8314316971436855, iteration: 344647
loss: 0.9609354138374329,grad_norm: 0.6665795960327348, iteration: 344648
loss: 0.9837541580200195,grad_norm: 0.8160837011334392, iteration: 344649
loss: 1.0084877014160156,grad_norm: 0.9999996557805396, iteration: 344650
loss: 0.9949015974998474,grad_norm: 0.6976365576787246, iteration: 344651
loss: 0.955608069896698,grad_norm: 0.999999157069401, iteration: 344652
loss: 1.0194947719573975,grad_norm: 0.826280983772988, iteration: 344653
loss: 1.0319539308547974,grad_norm: 0.9381381804235628, iteration: 344654
loss: 1.0352013111114502,grad_norm: 0.9267748690011309, iteration: 344655
loss: 0.9931319355964661,grad_norm: 0.8225246560685807, iteration: 344656
loss: 1.0060911178588867,grad_norm: 0.9999989818768183, iteration: 344657
loss: 1.0224761962890625,grad_norm: 0.7785678166370051, iteration: 344658
loss: 0.9913617372512817,grad_norm: 0.9999990987058384, iteration: 344659
loss: 1.0431468486785889,grad_norm: 0.7977538388776874, iteration: 344660
loss: 0.9703943133354187,grad_norm: 0.8299334082596689, iteration: 344661
loss: 0.9827439785003662,grad_norm: 0.8126242586222042, iteration: 344662
loss: 0.9991437196731567,grad_norm: 0.7717506232392662, iteration: 344663
loss: 0.9941470623016357,grad_norm: 0.9258847291791006, iteration: 344664
loss: 1.0107619762420654,grad_norm: 0.7749717564815236, iteration: 344665
loss: 1.0196059942245483,grad_norm: 0.6919086687080466, iteration: 344666
loss: 1.0414249897003174,grad_norm: 0.8707709328450697, iteration: 344667
loss: 1.0769569873809814,grad_norm: 0.9999992968425471, iteration: 344668
loss: 1.005400538444519,grad_norm: 0.753573279997804, iteration: 344669
loss: 1.0056992769241333,grad_norm: 0.9999990256177537, iteration: 344670
loss: 1.04547119140625,grad_norm: 0.8733995061252768, iteration: 344671
loss: 0.9986949563026428,grad_norm: 0.8355050945856347, iteration: 344672
loss: 1.0078661441802979,grad_norm: 0.995495280205591, iteration: 344673
loss: 0.9768364429473877,grad_norm: 0.7771966460413055, iteration: 344674
loss: 1.0172092914581299,grad_norm: 0.9173474023358033, iteration: 344675
loss: 1.0160815715789795,grad_norm: 0.9165037517877197, iteration: 344676
loss: 0.9844648838043213,grad_norm: 0.8225636199657901, iteration: 344677
loss: 1.0022814273834229,grad_norm: 0.7481461559992327, iteration: 344678
loss: 1.0373728275299072,grad_norm: 0.9999992921266948, iteration: 344679
loss: 1.0045082569122314,grad_norm: 0.8296955045572705, iteration: 344680
loss: 0.9942464232444763,grad_norm: 0.8137559491655203, iteration: 344681
loss: 1.013245701789856,grad_norm: 0.7507424293353357, iteration: 344682
loss: 0.9813917279243469,grad_norm: 0.9999990027851341, iteration: 344683
loss: 0.9404565691947937,grad_norm: 0.8607877753590264, iteration: 344684
loss: 0.9828843474388123,grad_norm: 0.7904118013139756, iteration: 344685
loss: 0.98309326171875,grad_norm: 0.6885380034282379, iteration: 344686
loss: 1.0109775066375732,grad_norm: 0.8470903185054166, iteration: 344687
loss: 0.9674926996231079,grad_norm: 0.8879240564580831, iteration: 344688
loss: 1.0175331830978394,grad_norm: 0.9999991095052618, iteration: 344689
loss: 0.9625924229621887,grad_norm: 0.7992988392761468, iteration: 344690
loss: 1.0526565313339233,grad_norm: 0.8020921677044166, iteration: 344691
loss: 1.009404182434082,grad_norm: 0.9999992557889806, iteration: 344692
loss: 1.031238079071045,grad_norm: 0.7601879069667442, iteration: 344693
loss: 0.9892143607139587,grad_norm: 0.9999991378302724, iteration: 344694
loss: 0.981497049331665,grad_norm: 0.6829665091436989, iteration: 344695
loss: 1.0151277780532837,grad_norm: 0.773370259863579, iteration: 344696
loss: 0.9901756048202515,grad_norm: 0.8306587232754361, iteration: 344697
loss: 1.2591339349746704,grad_norm: 0.9999990986683099, iteration: 344698
loss: 1.0076003074645996,grad_norm: 0.7394465135003631, iteration: 344699
loss: 1.049187183380127,grad_norm: 0.8803321329810974, iteration: 344700
loss: 1.0159882307052612,grad_norm: 0.9999990471885544, iteration: 344701
loss: 1.015525460243225,grad_norm: 0.7589700898937293, iteration: 344702
loss: 0.9730707406997681,grad_norm: 0.7051030095285723, iteration: 344703
loss: 0.9481256604194641,grad_norm: 0.7923262974078558, iteration: 344704
loss: 0.9961991906166077,grad_norm: 0.734589853394607, iteration: 344705
loss: 0.9632851481437683,grad_norm: 0.8601294055124067, iteration: 344706
loss: 1.022969126701355,grad_norm: 0.74513116127316, iteration: 344707
loss: 1.3073748350143433,grad_norm: 0.9999997957386275, iteration: 344708
loss: 1.0053092241287231,grad_norm: 0.9999993142794277, iteration: 344709
loss: 0.9849129319190979,grad_norm: 0.7870868516004818, iteration: 344710
loss: 1.096718430519104,grad_norm: 0.9999989477746373, iteration: 344711
loss: 0.998984694480896,grad_norm: 0.859297419872064, iteration: 344712
loss: 0.9932917356491089,grad_norm: 0.6918764545130652, iteration: 344713
loss: 1.0306318998336792,grad_norm: 0.9894042018512309, iteration: 344714
loss: 1.0633132457733154,grad_norm: 0.9999998455416751, iteration: 344715
loss: 0.9790727496147156,grad_norm: 0.7834904756205264, iteration: 344716
loss: 0.9654747843742371,grad_norm: 0.9925835190674585, iteration: 344717
loss: 1.007881760597229,grad_norm: 0.8877497381718581, iteration: 344718
loss: 1.028066873550415,grad_norm: 0.971722714348689, iteration: 344719
loss: 1.002617597579956,grad_norm: 0.9999993159072227, iteration: 344720
loss: 0.9813829660415649,grad_norm: 0.9431322812093281, iteration: 344721
loss: 1.0324881076812744,grad_norm: 0.8283950146757999, iteration: 344722
loss: 0.9989792108535767,grad_norm: 0.999999426189637, iteration: 344723
loss: 1.0154106616973877,grad_norm: 0.7441735575158812, iteration: 344724
loss: 1.0505616664886475,grad_norm: 0.9999995380916014, iteration: 344725
loss: 0.98005610704422,grad_norm: 0.7528173114816444, iteration: 344726
loss: 1.0227364301681519,grad_norm: 0.7339043830659512, iteration: 344727
loss: 1.003939151763916,grad_norm: 0.999999991028273, iteration: 344728
loss: 1.0077378749847412,grad_norm: 0.9999992894522947, iteration: 344729
loss: 1.0088173151016235,grad_norm: 0.8288234947391622, iteration: 344730
loss: 0.9924702048301697,grad_norm: 0.8745503355244119, iteration: 344731
loss: 0.9884830713272095,grad_norm: 0.8683979705596655, iteration: 344732
loss: 1.0407079458236694,grad_norm: 0.7986692403266389, iteration: 344733
loss: 0.9767943620681763,grad_norm: 0.999999747819851, iteration: 344734
loss: 1.0064277648925781,grad_norm: 0.7783802529472484, iteration: 344735
loss: 0.9793429970741272,grad_norm: 0.7023547912464828, iteration: 344736
loss: 0.9774744510650635,grad_norm: 0.8974085007826361, iteration: 344737
loss: 1.0212481021881104,grad_norm: 0.851366938181153, iteration: 344738
loss: 0.989605724811554,grad_norm: 0.8316689798465583, iteration: 344739
loss: 0.9837462902069092,grad_norm: 0.8204658654272456, iteration: 344740
loss: 0.9430417418479919,grad_norm: 0.7604698231005652, iteration: 344741
loss: 1.0180022716522217,grad_norm: 0.7047234798766648, iteration: 344742
loss: 0.9828459620475769,grad_norm: 0.9692889739249677, iteration: 344743
loss: 1.0212044715881348,grad_norm: 0.8679614094691649, iteration: 344744
loss: 0.9991858601570129,grad_norm: 0.7890469462440899, iteration: 344745
loss: 0.9850109219551086,grad_norm: 0.9999989832518564, iteration: 344746
loss: 0.996794581413269,grad_norm: 0.7883915304983758, iteration: 344747
loss: 0.956470251083374,grad_norm: 0.8970718900983131, iteration: 344748
loss: 1.0006814002990723,grad_norm: 0.9861146330299264, iteration: 344749
loss: 0.9831150770187378,grad_norm: 0.6477322677565843, iteration: 344750
loss: 0.9738567471504211,grad_norm: 0.6578686563967783, iteration: 344751
loss: 0.966093897819519,grad_norm: 0.7813175686811302, iteration: 344752
loss: 0.9959741830825806,grad_norm: 0.8498750194112065, iteration: 344753
loss: 0.9825922846794128,grad_norm: 0.758300325000555, iteration: 344754
loss: 1.0088533163070679,grad_norm: 0.789684243104234, iteration: 344755
loss: 0.9927700161933899,grad_norm: 0.8171845836506799, iteration: 344756
loss: 1.0085726976394653,grad_norm: 0.7479620107356996, iteration: 344757
loss: 1.0192680358886719,grad_norm: 0.874463392674365, iteration: 344758
loss: 0.9947118759155273,grad_norm: 0.7865716845074354, iteration: 344759
loss: 1.0541212558746338,grad_norm: 0.835220894693392, iteration: 344760
loss: 0.9822764992713928,grad_norm: 0.999999616086684, iteration: 344761
loss: 0.9739781618118286,grad_norm: 0.7767378957329227, iteration: 344762
loss: 1.0111595392227173,grad_norm: 0.9062191410962976, iteration: 344763
loss: 0.9514181613922119,grad_norm: 0.8240894381758015, iteration: 344764
loss: 0.9874656200408936,grad_norm: 0.8177205133659482, iteration: 344765
loss: 0.9641456604003906,grad_norm: 0.6926361709387315, iteration: 344766
loss: 0.9781299233436584,grad_norm: 0.959487968036353, iteration: 344767
loss: 0.9615381956100464,grad_norm: 0.8255853118612005, iteration: 344768
loss: 1.006374716758728,grad_norm: 0.872115540118392, iteration: 344769
loss: 1.0524060726165771,grad_norm: 0.9999997918716433, iteration: 344770
loss: 0.9915672540664673,grad_norm: 0.8789917509786274, iteration: 344771
loss: 0.9915902018547058,grad_norm: 0.8556604383221882, iteration: 344772
loss: 1.0057076215744019,grad_norm: 0.8268272478654237, iteration: 344773
loss: 0.9711613655090332,grad_norm: 0.715289399577088, iteration: 344774
loss: 0.9682435393333435,grad_norm: 0.7793419151607869, iteration: 344775
loss: 0.9913901090621948,grad_norm: 0.827978073826084, iteration: 344776
loss: 0.9875228404998779,grad_norm: 0.8315289185988793, iteration: 344777
loss: 0.980970025062561,grad_norm: 0.7681494528658652, iteration: 344778
loss: 1.002621054649353,grad_norm: 0.8196672514057933, iteration: 344779
loss: 0.9819946885108948,grad_norm: 0.8101879644165731, iteration: 344780
loss: 0.9853984117507935,grad_norm: 0.8979378882025962, iteration: 344781
loss: 0.9891010522842407,grad_norm: 0.8469351055390894, iteration: 344782
loss: 1.078390121459961,grad_norm: 0.99999983156582, iteration: 344783
loss: 1.005603313446045,grad_norm: 0.9291247636946, iteration: 344784
loss: 1.0093967914581299,grad_norm: 0.8585066685680773, iteration: 344785
loss: 1.0007001161575317,grad_norm: 0.9999990673679456, iteration: 344786
loss: 0.9906947016716003,grad_norm: 0.7721831993796684, iteration: 344787
loss: 0.964633584022522,grad_norm: 0.9012284757090187, iteration: 344788
loss: 0.9798456430435181,grad_norm: 0.753251240595114, iteration: 344789
loss: 1.0121357440948486,grad_norm: 0.7770615812267919, iteration: 344790
loss: 1.0825508832931519,grad_norm: 0.9999996901731951, iteration: 344791
loss: 1.0836927890777588,grad_norm: 0.9999995758703905, iteration: 344792
loss: 0.973883867263794,grad_norm: 0.8181434780878538, iteration: 344793
loss: 1.0110257863998413,grad_norm: 0.7779892635031208, iteration: 344794
loss: 0.9833818674087524,grad_norm: 0.6881270430952191, iteration: 344795
loss: 1.0077714920043945,grad_norm: 0.8103191432390506, iteration: 344796
loss: 0.9662255644798279,grad_norm: 0.6832004580664062, iteration: 344797
loss: 0.9828459620475769,grad_norm: 0.7290766289323241, iteration: 344798
loss: 0.9736375212669373,grad_norm: 0.8730510667119256, iteration: 344799
loss: 1.048592209815979,grad_norm: 0.9999991283621027, iteration: 344800
loss: 1.0344356298446655,grad_norm: 0.9041414055527238, iteration: 344801
loss: 0.9971640110015869,grad_norm: 0.7163102775782869, iteration: 344802
loss: 1.019790530204773,grad_norm: 0.946297550410553, iteration: 344803
loss: 1.0014533996582031,grad_norm: 0.9841649479242814, iteration: 344804
loss: 1.0146580934524536,grad_norm: 0.8677224800250587, iteration: 344805
loss: 1.0094736814498901,grad_norm: 0.9777123240725564, iteration: 344806
loss: 1.0390284061431885,grad_norm: 0.7430359710871378, iteration: 344807
loss: 1.0868747234344482,grad_norm: 0.9999993410780563, iteration: 344808
loss: 0.9801949858665466,grad_norm: 0.999999097952504, iteration: 344809
loss: 1.0597429275512695,grad_norm: 0.9999998318693251, iteration: 344810
loss: 0.9942729473114014,grad_norm: 0.7901504209332213, iteration: 344811
loss: 0.9838032126426697,grad_norm: 0.760084286037512, iteration: 344812
loss: 1.072012186050415,grad_norm: 1.0000000438232384, iteration: 344813
loss: 0.9812904596328735,grad_norm: 0.7327980430148344, iteration: 344814
loss: 0.9698009490966797,grad_norm: 0.999999917210386, iteration: 344815
loss: 1.0041395425796509,grad_norm: 0.7861534942429225, iteration: 344816
loss: 0.9936245083808899,grad_norm: 0.6633253960038135, iteration: 344817
loss: 0.9902467131614685,grad_norm: 0.6613435171103879, iteration: 344818
loss: 1.014792799949646,grad_norm: 0.9819296712135893, iteration: 344819
loss: 1.0887696743011475,grad_norm: 0.9999990435977734, iteration: 344820
loss: 0.998668909072876,grad_norm: 0.7211167933874275, iteration: 344821
loss: 0.972265899181366,grad_norm: 0.7776159153391875, iteration: 344822
loss: 1.0155466794967651,grad_norm: 0.8813275985909698, iteration: 344823
loss: 1.0068572759628296,grad_norm: 0.7516708100294502, iteration: 344824
loss: 0.9969378113746643,grad_norm: 0.9303028733869759, iteration: 344825
loss: 0.9984032511711121,grad_norm: 0.9475177761903653, iteration: 344826
loss: 1.0028274059295654,grad_norm: 0.8834578491992156, iteration: 344827
loss: 1.0364396572113037,grad_norm: 0.9999991462380572, iteration: 344828
loss: 0.9962559342384338,grad_norm: 0.7460397309782899, iteration: 344829
loss: 1.0251073837280273,grad_norm: 0.7992064615437521, iteration: 344830
loss: 1.0266786813735962,grad_norm: 0.9999991460296731, iteration: 344831
loss: 0.961875855922699,grad_norm: 0.7739940699083677, iteration: 344832
loss: 0.9825865030288696,grad_norm: 0.7533722413685577, iteration: 344833
loss: 0.988692045211792,grad_norm: 0.8419825830521579, iteration: 344834
loss: 1.1303600072860718,grad_norm: 0.9999992438617924, iteration: 344835
loss: 0.9737016558647156,grad_norm: 0.7759153664997953, iteration: 344836
loss: 1.0232787132263184,grad_norm: 0.8095943891065183, iteration: 344837
loss: 1.0202504396438599,grad_norm: 0.7555849461484481, iteration: 344838
loss: 1.011508584022522,grad_norm: 0.772876450232505, iteration: 344839
loss: 0.9741088151931763,grad_norm: 0.820140498913345, iteration: 344840
loss: 1.0335307121276855,grad_norm: 0.8814129229766325, iteration: 344841
loss: 1.0087517499923706,grad_norm: 0.8291105823343257, iteration: 344842
loss: 1.008237361907959,grad_norm: 0.8201338658753126, iteration: 344843
loss: 0.9908738732337952,grad_norm: 0.7850438913691676, iteration: 344844
loss: 1.0058648586273193,grad_norm: 0.7785832491183776, iteration: 344845
loss: 0.9912254810333252,grad_norm: 0.8469362516306074, iteration: 344846
loss: 1.011433720588684,grad_norm: 0.8484527732508245, iteration: 344847
loss: 1.079849362373352,grad_norm: 0.999999513360768, iteration: 344848
loss: 0.9928579330444336,grad_norm: 0.7240075033623242, iteration: 344849
loss: 0.9941970705986023,grad_norm: 0.9675726377935341, iteration: 344850
loss: 1.008794903755188,grad_norm: 0.8015714939262015, iteration: 344851
loss: 1.0004544258117676,grad_norm: 0.7253425215567082, iteration: 344852
loss: 1.0303592681884766,grad_norm: 0.7423724957115081, iteration: 344853
loss: 1.0041980743408203,grad_norm: 0.8168303362944651, iteration: 344854
loss: 1.0148296356201172,grad_norm: 0.9999991859548604, iteration: 344855
loss: 1.0369696617126465,grad_norm: 0.9999998250881721, iteration: 344856
loss: 1.0003737211227417,grad_norm: 0.7360239514258847, iteration: 344857
loss: 1.0193432569503784,grad_norm: 0.8277894028684871, iteration: 344858
loss: 0.9795936942100525,grad_norm: 0.79694638724036, iteration: 344859
loss: 1.0158406496047974,grad_norm: 0.999999315778389, iteration: 344860
loss: 0.9673140048980713,grad_norm: 0.9648272845051988, iteration: 344861
loss: 1.0695182085037231,grad_norm: 0.9999991255805035, iteration: 344862
loss: 1.0240875482559204,grad_norm: 0.8861513175859508, iteration: 344863
loss: 1.009660005569458,grad_norm: 0.9762739855373795, iteration: 344864
loss: 1.0196775197982788,grad_norm: 0.703807031659507, iteration: 344865
loss: 0.974968433380127,grad_norm: 0.6651438894957211, iteration: 344866
loss: 0.9761884212493896,grad_norm: 0.8784098046752145, iteration: 344867
loss: 1.0326316356658936,grad_norm: 0.8849525718213448, iteration: 344868
loss: 1.0103566646575928,grad_norm: 0.9049687154947762, iteration: 344869
loss: 1.0021780729293823,grad_norm: 0.702596720131583, iteration: 344870
loss: 1.0240930318832397,grad_norm: 0.6502253112206827, iteration: 344871
loss: 1.0009108781814575,grad_norm: 0.9260093905596001, iteration: 344872
loss: 1.0450137853622437,grad_norm: 0.7147137810069196, iteration: 344873
loss: 0.9912976026535034,grad_norm: 0.85946030341477, iteration: 344874
loss: 0.9723362326622009,grad_norm: 0.7155414169232076, iteration: 344875
loss: 0.9983062744140625,grad_norm: 0.888617153544561, iteration: 344876
loss: 0.984985888004303,grad_norm: 0.8455352507394207, iteration: 344877
loss: 0.9855772852897644,grad_norm: 0.8673526686884533, iteration: 344878
loss: 1.0141421556472778,grad_norm: 0.9999994132332269, iteration: 344879
loss: 1.0076156854629517,grad_norm: 0.9353249963046305, iteration: 344880
loss: 1.004939317703247,grad_norm: 0.7370972001809204, iteration: 344881
loss: 0.9893990159034729,grad_norm: 0.8019082923986549, iteration: 344882
loss: 1.1399378776550293,grad_norm: 0.999999122622192, iteration: 344883
loss: 1.0129022598266602,grad_norm: 0.7608101565056451, iteration: 344884
loss: 1.010116696357727,grad_norm: 0.8360919933379373, iteration: 344885
loss: 0.9984021186828613,grad_norm: 0.8364045860192083, iteration: 344886
loss: 1.0276191234588623,grad_norm: 0.9999992714901431, iteration: 344887
loss: 0.963777482509613,grad_norm: 0.9842424932607157, iteration: 344888
loss: 1.0348848104476929,grad_norm: 0.9999997670521841, iteration: 344889
loss: 0.9995169639587402,grad_norm: 0.9180779758551165, iteration: 344890
loss: 0.9765785336494446,grad_norm: 0.8836612926274161, iteration: 344891
loss: 0.9980101585388184,grad_norm: 0.8145324484612084, iteration: 344892
loss: 1.0456950664520264,grad_norm: 0.8950268677893589, iteration: 344893
loss: 1.0034339427947998,grad_norm: 0.8976935932057839, iteration: 344894
loss: 1.0117448568344116,grad_norm: 0.7996236992256938, iteration: 344895
loss: 1.031227469444275,grad_norm: 0.7874354564318635, iteration: 344896
loss: 1.0165671110153198,grad_norm: 0.8693196880753856, iteration: 344897
loss: 1.017357349395752,grad_norm: 0.8061437538171049, iteration: 344898
loss: 1.0133241415023804,grad_norm: 0.7461341703129467, iteration: 344899
loss: 1.0011353492736816,grad_norm: 0.9999990539249817, iteration: 344900
loss: 0.9620081782341003,grad_norm: 0.8195403312884172, iteration: 344901
loss: 1.058764934539795,grad_norm: 0.9493762622134934, iteration: 344902
loss: 1.040078043937683,grad_norm: 0.9999996081185347, iteration: 344903
loss: 0.9916790723800659,grad_norm: 0.7328859553387773, iteration: 344904
loss: 0.9782810807228088,grad_norm: 0.8327828022633601, iteration: 344905
loss: 1.0320125818252563,grad_norm: 0.9999993531410112, iteration: 344906
loss: 1.007808804512024,grad_norm: 0.911608111517946, iteration: 344907
loss: 1.0786911249160767,grad_norm: 0.9999997392147854, iteration: 344908
loss: 1.0361957550048828,grad_norm: 0.8574324929162351, iteration: 344909
loss: 1.060238242149353,grad_norm: 0.9999993634135602, iteration: 344910
loss: 1.0139615535736084,grad_norm: 0.8565402847192474, iteration: 344911
loss: 1.008633017539978,grad_norm: 0.9999991429231453, iteration: 344912
loss: 0.9916892647743225,grad_norm: 0.6175584447788447, iteration: 344913
loss: 1.0194612741470337,grad_norm: 0.8486937473644893, iteration: 344914
loss: 0.9842002391815186,grad_norm: 0.7581382322817141, iteration: 344915
loss: 0.9949041604995728,grad_norm: 0.9218651041987426, iteration: 344916
loss: 1.088881015777588,grad_norm: 0.8741099666572418, iteration: 344917
loss: 0.9690764546394348,grad_norm: 0.7178977371660299, iteration: 344918
loss: 0.9738426208496094,grad_norm: 0.7707809050666152, iteration: 344919
loss: 0.9539276361465454,grad_norm: 0.783665703068404, iteration: 344920
loss: 0.9823391437530518,grad_norm: 0.7972874502626293, iteration: 344921
loss: 0.990278422832489,grad_norm: 0.8328908829654826, iteration: 344922
loss: 1.0063618421554565,grad_norm: 0.9438545078623611, iteration: 344923
loss: 0.9815083742141724,grad_norm: 0.707356382891275, iteration: 344924
loss: 1.051455020904541,grad_norm: 0.9999996197204205, iteration: 344925
loss: 1.0317158699035645,grad_norm: 0.9999991409092703, iteration: 344926
loss: 1.001344919204712,grad_norm: 0.7042189673487367, iteration: 344927
loss: 1.01600182056427,grad_norm: 0.78619524491205, iteration: 344928
loss: 1.0049535036087036,grad_norm: 0.9124904705242675, iteration: 344929
loss: 0.9605684876441956,grad_norm: 0.9631035971320203, iteration: 344930
loss: 0.9974786043167114,grad_norm: 0.8628755915249432, iteration: 344931
loss: 0.9916858673095703,grad_norm: 0.9999991101447796, iteration: 344932
loss: 1.0002312660217285,grad_norm: 0.720042975592186, iteration: 344933
loss: 0.9862335920333862,grad_norm: 0.8675037521459464, iteration: 344934
loss: 0.9828585982322693,grad_norm: 0.9999994151780319, iteration: 344935
loss: 0.9997352957725525,grad_norm: 0.7396522362789564, iteration: 344936
loss: 0.9828611016273499,grad_norm: 0.9258783400883027, iteration: 344937
loss: 0.9966870546340942,grad_norm: 0.9020491411076387, iteration: 344938
loss: 0.9791960120201111,grad_norm: 0.7878876742754235, iteration: 344939
loss: 1.0423481464385986,grad_norm: 0.9999998307185277, iteration: 344940
loss: 1.050776720046997,grad_norm: 0.8724546316634698, iteration: 344941
loss: 1.008204460144043,grad_norm: 0.9999993684265727, iteration: 344942
loss: 0.9789672493934631,grad_norm: 0.9633330867678797, iteration: 344943
loss: 1.026755928993225,grad_norm: 0.9999991448550203, iteration: 344944
loss: 1.0056456327438354,grad_norm: 0.8855982640572587, iteration: 344945
loss: 1.000447154045105,grad_norm: 0.8346823603806488, iteration: 344946
loss: 1.0063716173171997,grad_norm: 0.9844492890404052, iteration: 344947
loss: 0.968438982963562,grad_norm: 0.9302231431039357, iteration: 344948
loss: 0.9778099060058594,grad_norm: 0.8294004909680632, iteration: 344949
loss: 1.0224007368087769,grad_norm: 0.8596082399130892, iteration: 344950
loss: 1.0312955379486084,grad_norm: 1.0000000555433366, iteration: 344951
loss: 0.9865128993988037,grad_norm: 0.8556813321975876, iteration: 344952
loss: 0.9977196455001831,grad_norm: 0.6999908955144484, iteration: 344953
loss: 0.9942348003387451,grad_norm: 0.9741925033348829, iteration: 344954
loss: 1.0308120250701904,grad_norm: 0.7016829439507273, iteration: 344955
loss: 0.9854546189308167,grad_norm: 0.7201668972687347, iteration: 344956
loss: 1.0201834440231323,grad_norm: 0.8219680057934987, iteration: 344957
loss: 0.9871197938919067,grad_norm: 0.8912033668302622, iteration: 344958
loss: 1.011360764503479,grad_norm: 0.7875132189348603, iteration: 344959
loss: 1.0111709833145142,grad_norm: 0.8732340028100167, iteration: 344960
loss: 0.9841137528419495,grad_norm: 0.9999991012129834, iteration: 344961
loss: 0.9717116355895996,grad_norm: 0.7842872484542851, iteration: 344962
loss: 0.9873151183128357,grad_norm: 0.8396208267668598, iteration: 344963
loss: 0.9917578101158142,grad_norm: 0.9038385486780304, iteration: 344964
loss: 1.0206390619277954,grad_norm: 0.99999907403465, iteration: 344965
loss: 1.003873586654663,grad_norm: 0.8488682438796018, iteration: 344966
loss: 1.0242507457733154,grad_norm: 0.9999994501554267, iteration: 344967
loss: 0.9764841794967651,grad_norm: 0.993395191122226, iteration: 344968
loss: 0.9699267148971558,grad_norm: 0.8573740845927242, iteration: 344969
loss: 1.0089890956878662,grad_norm: 0.7425487059048916, iteration: 344970
loss: 0.9832831025123596,grad_norm: 0.9073105397568857, iteration: 344971
loss: 1.022012710571289,grad_norm: 0.8628876892821036, iteration: 344972
loss: 1.0010578632354736,grad_norm: 0.9192067094106395, iteration: 344973
loss: 0.9735107421875,grad_norm: 0.7332895242914796, iteration: 344974
loss: 0.9980305433273315,grad_norm: 0.7551039634200348, iteration: 344975
loss: 1.0612828731536865,grad_norm: 0.8409444566163665, iteration: 344976
loss: 0.9944896101951599,grad_norm: 0.7680256666930961, iteration: 344977
loss: 0.9789769053459167,grad_norm: 0.8131197958592994, iteration: 344978
loss: 0.9605032205581665,grad_norm: 0.8117738469006712, iteration: 344979
loss: 1.026724934577942,grad_norm: 0.9545695548269559, iteration: 344980
loss: 1.0205448865890503,grad_norm: 0.8268570027025096, iteration: 344981
loss: 0.9995890259742737,grad_norm: 0.7701853737201415, iteration: 344982
loss: 0.9679694771766663,grad_norm: 0.748041512910531, iteration: 344983
loss: 1.0155267715454102,grad_norm: 0.8670120541323855, iteration: 344984
loss: 1.001579999923706,grad_norm: 0.9309088060204526, iteration: 344985
loss: 1.0061242580413818,grad_norm: 0.7624307693689144, iteration: 344986
loss: 1.0088480710983276,grad_norm: 0.6989878983483628, iteration: 344987
loss: 1.0263054370880127,grad_norm: 0.9999996578609576, iteration: 344988
loss: 1.0407326221466064,grad_norm: 0.9999991588938846, iteration: 344989
loss: 0.986712634563446,grad_norm: 0.8429719740435685, iteration: 344990
loss: 0.982136070728302,grad_norm: 0.8216183683690846, iteration: 344991
loss: 0.9988762140274048,grad_norm: 0.7348390237235615, iteration: 344992
loss: 0.9704699516296387,grad_norm: 0.9999991104988815, iteration: 344993
loss: 1.0499056577682495,grad_norm: 0.9999997192198349, iteration: 344994
loss: 0.9784737825393677,grad_norm: 0.8956652490748229, iteration: 344995
loss: 0.9972244501113892,grad_norm: 0.8983744476067833, iteration: 344996
loss: 0.9724259972572327,grad_norm: 0.9999992535090346, iteration: 344997
loss: 0.979455292224884,grad_norm: 0.7481507163429575, iteration: 344998
loss: 1.0156890153884888,grad_norm: 0.9999993250011419, iteration: 344999
loss: 0.9808149933815002,grad_norm: 0.9498036181913982, iteration: 345000
loss: 1.0225838422775269,grad_norm: 0.8622010672986968, iteration: 345001
loss: 1.0098503828048706,grad_norm: 0.9522990611781139, iteration: 345002
loss: 1.0050897598266602,grad_norm: 0.9999990230968917, iteration: 345003
loss: 1.0165656805038452,grad_norm: 0.9999989271458535, iteration: 345004
loss: 0.9825291037559509,grad_norm: 0.7742039689477173, iteration: 345005
loss: 0.998242199420929,grad_norm: 0.8567695270295516, iteration: 345006
loss: 0.9792088866233826,grad_norm: 0.803978749303113, iteration: 345007
loss: 0.9860144853591919,grad_norm: 0.808817782862623, iteration: 345008
loss: 1.0107448101043701,grad_norm: 0.8131131473107001, iteration: 345009
loss: 1.0341395139694214,grad_norm: 0.9758708151073712, iteration: 345010
loss: 1.0032638311386108,grad_norm: 0.8329955499227734, iteration: 345011
loss: 0.9937883019447327,grad_norm: 0.771595945982718, iteration: 345012
loss: 0.9809882044792175,grad_norm: 0.8943026557423293, iteration: 345013
loss: 1.028741478919983,grad_norm: 0.9999997988865362, iteration: 345014
loss: 0.9695450067520142,grad_norm: 0.8206063414976016, iteration: 345015
loss: 0.9954885840415955,grad_norm: 0.7357603909671133, iteration: 345016
loss: 0.9950847625732422,grad_norm: 0.8626994113898644, iteration: 345017
loss: 0.9589401483535767,grad_norm: 0.8566078123242884, iteration: 345018
loss: 0.9708878397941589,grad_norm: 0.9291971723969894, iteration: 345019
loss: 1.0095945596694946,grad_norm: 0.8697166025931403, iteration: 345020
loss: 0.9981796145439148,grad_norm: 0.8805158556408322, iteration: 345021
loss: 1.0283756256103516,grad_norm: 0.8509933078819573, iteration: 345022
loss: 1.0110934972763062,grad_norm: 0.9999992637709352, iteration: 345023
loss: 0.9736250042915344,grad_norm: 0.7663592428786005, iteration: 345024
loss: 1.0194289684295654,grad_norm: 0.9681846731308714, iteration: 345025
loss: 1.029944658279419,grad_norm: 0.9638008112072081, iteration: 345026
loss: 1.042085886001587,grad_norm: 0.9999998925143697, iteration: 345027
loss: 0.9488964080810547,grad_norm: 0.8454007181934968, iteration: 345028
loss: 1.0001243352890015,grad_norm: 0.6969008222888673, iteration: 345029
loss: 0.9765385389328003,grad_norm: 0.8869549683169938, iteration: 345030
loss: 1.0567870140075684,grad_norm: 0.8751404078480004, iteration: 345031
loss: 1.0358800888061523,grad_norm: 0.9999990358263671, iteration: 345032
loss: 1.0346213579177856,grad_norm: 0.8981340834314926, iteration: 345033
loss: 1.0095993280410767,grad_norm: 0.9472429574995003, iteration: 345034
loss: 1.0102200508117676,grad_norm: 0.9999994385743737, iteration: 345035
loss: 1.0225396156311035,grad_norm: 0.9809873881412178, iteration: 345036
loss: 1.045664668083191,grad_norm: 0.9999988896914503, iteration: 345037
loss: 0.9937126636505127,grad_norm: 0.8956454097327182, iteration: 345038
loss: 0.9815365076065063,grad_norm: 0.811913416341813, iteration: 345039
loss: 0.9836381077766418,grad_norm: 0.999999023334942, iteration: 345040
loss: 0.9913100600242615,grad_norm: 0.8199070287370549, iteration: 345041
loss: 0.9915343523025513,grad_norm: 0.9999991646909911, iteration: 345042
loss: 1.0328902006149292,grad_norm: 0.9999990708413031, iteration: 345043
loss: 0.9960988759994507,grad_norm: 0.7207544049457761, iteration: 345044
loss: 0.9976832270622253,grad_norm: 0.9476371104817555, iteration: 345045
loss: 1.0030168294906616,grad_norm: 0.7857754918140475, iteration: 345046
loss: 1.1640454530715942,grad_norm: 0.9999998496901653, iteration: 345047
loss: 0.9903478622436523,grad_norm: 0.9999990567782536, iteration: 345048
loss: 1.0013022422790527,grad_norm: 0.7644734720323204, iteration: 345049
loss: 0.9992244839668274,grad_norm: 0.897259483406731, iteration: 345050
loss: 1.0165431499481201,grad_norm: 0.9999992694776149, iteration: 345051
loss: 1.0286657810211182,grad_norm: 0.9999993805767441, iteration: 345052
loss: 0.9867289662361145,grad_norm: 0.856495633401521, iteration: 345053
loss: 1.0527540445327759,grad_norm: 0.9999994572208838, iteration: 345054
loss: 0.9967819452285767,grad_norm: 0.9999991976921889, iteration: 345055
loss: 0.9988719820976257,grad_norm: 0.8834681519788422, iteration: 345056
loss: 0.9968795776367188,grad_norm: 0.820267151656278, iteration: 345057
loss: 1.011357069015503,grad_norm: 0.8571634434452289, iteration: 345058
loss: 0.9625194072723389,grad_norm: 0.8401357477826806, iteration: 345059
loss: 1.0605266094207764,grad_norm: 0.9999999661620125, iteration: 345060
loss: 1.0102260112762451,grad_norm: 0.8604350082429213, iteration: 345061
loss: 1.0000852346420288,grad_norm: 0.7141127936261954, iteration: 345062
loss: 0.979604959487915,grad_norm: 0.8078828285958521, iteration: 345063
loss: 0.9787065386772156,grad_norm: 0.7021291383079707, iteration: 345064
loss: 1.0027737617492676,grad_norm: 0.695713975255218, iteration: 345065
loss: 1.0938788652420044,grad_norm: 0.9999997650722777, iteration: 345066
loss: 1.0047516822814941,grad_norm: 0.9999994957260468, iteration: 345067
loss: 1.0223100185394287,grad_norm: 0.9999994647542645, iteration: 345068
loss: 1.016675353050232,grad_norm: 0.7859775364784629, iteration: 345069
loss: 1.0116045475006104,grad_norm: 0.7789848789880542, iteration: 345070
loss: 1.4164338111877441,grad_norm: 0.9999997581213612, iteration: 345071
loss: 1.0056103467941284,grad_norm: 0.7986437656491656, iteration: 345072
loss: 1.0169456005096436,grad_norm: 0.9999991005720426, iteration: 345073
loss: 0.9758486747741699,grad_norm: 0.7396460943903658, iteration: 345074
loss: 1.042343020439148,grad_norm: 0.7651041789514152, iteration: 345075
loss: 1.0436781644821167,grad_norm: 0.9463755164079809, iteration: 345076
loss: 1.0141159296035767,grad_norm: 0.99999993652033, iteration: 345077
loss: 1.0067832469940186,grad_norm: 0.7896686914903354, iteration: 345078
loss: 1.0137850046157837,grad_norm: 0.777528179148324, iteration: 345079
loss: 1.0071231126785278,grad_norm: 0.999999278321934, iteration: 345080
loss: 1.0168949365615845,grad_norm: 0.9999996520487375, iteration: 345081
loss: 1.0103704929351807,grad_norm: 0.7958074491546866, iteration: 345082
loss: 0.9868323802947998,grad_norm: 0.9506856523914373, iteration: 345083
loss: 0.9786795973777771,grad_norm: 0.9058713868079227, iteration: 345084
loss: 0.9561368227005005,grad_norm: 0.9201043463939074, iteration: 345085
loss: 1.018155574798584,grad_norm: 0.8519766572164268, iteration: 345086
loss: 1.00116765499115,grad_norm: 0.9999990479952852, iteration: 345087
loss: 1.0111274719238281,grad_norm: 0.9741490687835812, iteration: 345088
loss: 0.9732417464256287,grad_norm: 0.9025395916798096, iteration: 345089
loss: 0.9907353520393372,grad_norm: 0.9999992216922786, iteration: 345090
loss: 1.0043714046478271,grad_norm: 0.9165148491901688, iteration: 345091
loss: 1.0630494356155396,grad_norm: 0.9999999955881617, iteration: 345092
loss: 0.9759578704833984,grad_norm: 0.841514790501635, iteration: 345093
loss: 0.9671160578727722,grad_norm: 0.9249218902483483, iteration: 345094
loss: 0.9724909067153931,grad_norm: 0.8016958958754644, iteration: 345095
loss: 1.0128735303878784,grad_norm: 0.7669931859699344, iteration: 345096
loss: 1.010643720626831,grad_norm: 0.7706288948749255, iteration: 345097
loss: 1.019134283065796,grad_norm: 0.9999991417329858, iteration: 345098
loss: 1.0110994577407837,grad_norm: 0.8015186686766688, iteration: 345099
loss: 1.019010305404663,grad_norm: 0.6998475972949023, iteration: 345100
loss: 0.9917759299278259,grad_norm: 0.9117105490661452, iteration: 345101
loss: 0.985761284828186,grad_norm: 0.999999855766817, iteration: 345102
loss: 1.0165293216705322,grad_norm: 0.7033895211646038, iteration: 345103
loss: 0.9820172786712646,grad_norm: 0.7678016166173588, iteration: 345104
loss: 0.999299168586731,grad_norm: 0.7299468374599665, iteration: 345105
loss: 1.0370113849639893,grad_norm: 0.9532433103396953, iteration: 345106
loss: 0.9530759453773499,grad_norm: 0.7265137611580154, iteration: 345107
loss: 1.0079371929168701,grad_norm: 0.971166189808618, iteration: 345108
loss: 0.9938742518424988,grad_norm: 0.8134844118820679, iteration: 345109
loss: 1.0103124380111694,grad_norm: 0.734727231358906, iteration: 345110
loss: 1.0173202753067017,grad_norm: 0.9143235667249429, iteration: 345111
loss: 1.04625403881073,grad_norm: 0.8141456697247522, iteration: 345112
loss: 1.0049386024475098,grad_norm: 0.715133133804264, iteration: 345113
loss: 0.9965575933456421,grad_norm: 0.9999992329888875, iteration: 345114
loss: 1.0322295427322388,grad_norm: 0.9094171277249911, iteration: 345115
loss: 1.0699589252471924,grad_norm: 0.9999991761370295, iteration: 345116
loss: 0.9886414408683777,grad_norm: 0.8896328745645612, iteration: 345117
loss: 1.0121511220932007,grad_norm: 0.9999996100074955, iteration: 345118
loss: 0.9678520560264587,grad_norm: 0.8628524205088132, iteration: 345119
loss: 1.047234058380127,grad_norm: 0.9999993067729817, iteration: 345120
loss: 1.0033334493637085,grad_norm: 0.7941927199576326, iteration: 345121
loss: 0.9885262250900269,grad_norm: 0.6678008560505797, iteration: 345122
loss: 1.0004419088363647,grad_norm: 0.8745716046530451, iteration: 345123
loss: 0.947291374206543,grad_norm: 0.9999992433688029, iteration: 345124
loss: 1.008929967880249,grad_norm: 0.7993024658128713, iteration: 345125
loss: 1.0278762578964233,grad_norm: 0.9999991324409383, iteration: 345126
loss: 1.029147744178772,grad_norm: 0.7647796801095905, iteration: 345127
loss: 0.9995862245559692,grad_norm: 0.7788393962522437, iteration: 345128
loss: 1.0000983476638794,grad_norm: 0.946711764272484, iteration: 345129
loss: 0.9997647404670715,grad_norm: 0.7344124911597759, iteration: 345130
loss: 0.9995743036270142,grad_norm: 0.9999990610469294, iteration: 345131
loss: 1.007006287574768,grad_norm: 0.9999994681154105, iteration: 345132
loss: 1.0037622451782227,grad_norm: 0.9092446333924106, iteration: 345133
loss: 1.0174278020858765,grad_norm: 0.8134232592991499, iteration: 345134
loss: 0.9598819017410278,grad_norm: 0.8622838535491233, iteration: 345135
loss: 0.9991088509559631,grad_norm: 0.7309594645364947, iteration: 345136
loss: 0.9682828187942505,grad_norm: 0.8129176808900055, iteration: 345137
loss: 1.033635139465332,grad_norm: 0.9092813020503057, iteration: 345138
loss: 1.0515871047973633,grad_norm: 0.8368691436342786, iteration: 345139
loss: 0.989355206489563,grad_norm: 0.8658429051079876, iteration: 345140
loss: 0.9636417031288147,grad_norm: 0.7434319798741081, iteration: 345141
loss: 0.9616003632545471,grad_norm: 0.7274994371958868, iteration: 345142
loss: 0.9747249484062195,grad_norm: 0.7702726600623105, iteration: 345143
loss: 1.0216728448867798,grad_norm: 0.9999993284191956, iteration: 345144
loss: 1.0887738466262817,grad_norm: 0.9851813194331316, iteration: 345145
loss: 0.9949777126312256,grad_norm: 0.8564082142250791, iteration: 345146
loss: 1.0037469863891602,grad_norm: 0.9763057635857623, iteration: 345147
loss: 1.0167430639266968,grad_norm: 0.8952667046060919, iteration: 345148
loss: 1.003151535987854,grad_norm: 0.8982582995076204, iteration: 345149
loss: 1.0590087175369263,grad_norm: 0.9999990637851763, iteration: 345150
loss: 1.003189206123352,grad_norm: 0.9174947510765183, iteration: 345151
loss: 0.985086977481842,grad_norm: 0.8526459486229495, iteration: 345152
loss: 1.0154139995574951,grad_norm: 0.7051765317995253, iteration: 345153
loss: 0.9879469871520996,grad_norm: 0.9856151343474961, iteration: 345154
loss: 1.0088587999343872,grad_norm: 0.9999998285329484, iteration: 345155
loss: 0.9850788116455078,grad_norm: 0.7474549129051914, iteration: 345156
loss: 1.0366568565368652,grad_norm: 0.850498610763512, iteration: 345157
loss: 1.0169020891189575,grad_norm: 0.70019458527222, iteration: 345158
loss: 1.0009815692901611,grad_norm: 0.8598556048067237, iteration: 345159
loss: 0.9749178290367126,grad_norm: 0.8852807696258449, iteration: 345160
loss: 1.0284603834152222,grad_norm: 0.8988440359538741, iteration: 345161
loss: 0.9823555946350098,grad_norm: 0.749802781045505, iteration: 345162
loss: 1.0215743780136108,grad_norm: 0.812851512693391, iteration: 345163
loss: 0.9989227652549744,grad_norm: 0.9720769126068745, iteration: 345164
loss: 1.0220996141433716,grad_norm: 0.7736835325956107, iteration: 345165
loss: 1.012123703956604,grad_norm: 0.9913250631384867, iteration: 345166
loss: 1.0101006031036377,grad_norm: 0.9999991642913739, iteration: 345167
loss: 1.0196620225906372,grad_norm: 0.9999995088010822, iteration: 345168
loss: 0.9792289733886719,grad_norm: 0.7896124936337381, iteration: 345169
loss: 1.010918140411377,grad_norm: 0.78090458621903, iteration: 345170
loss: 0.9964773058891296,grad_norm: 0.7883882848212026, iteration: 345171
loss: 0.9982423186302185,grad_norm: 0.8638730323649206, iteration: 345172
loss: 0.989609956741333,grad_norm: 0.9999991566764067, iteration: 345173
loss: 1.05303156375885,grad_norm: 0.9999991662608392, iteration: 345174
loss: 1.021347165107727,grad_norm: 0.9227056410440663, iteration: 345175
loss: 1.0348962545394897,grad_norm: 0.9999997507458787, iteration: 345176
loss: 1.0163406133651733,grad_norm: 0.7289423370034426, iteration: 345177
loss: 0.9757680296897888,grad_norm: 0.815355820368421, iteration: 345178
loss: 1.0355767011642456,grad_norm: 0.9054681463774243, iteration: 345179
loss: 0.9824804663658142,grad_norm: 0.9422846921429151, iteration: 345180
loss: 0.960188627243042,grad_norm: 0.8835810655286769, iteration: 345181
loss: 1.0073355436325073,grad_norm: 0.9999992345775864, iteration: 345182
loss: 0.9843224883079529,grad_norm: 0.9999999476382229, iteration: 345183
loss: 0.968184232711792,grad_norm: 0.9079073043258433, iteration: 345184
loss: 1.0052188634872437,grad_norm: 0.8393019492025561, iteration: 345185
loss: 1.005238652229309,grad_norm: 0.9999990878048991, iteration: 345186
loss: 1.0272670984268188,grad_norm: 0.8393603703849428, iteration: 345187
loss: 1.0487922430038452,grad_norm: 0.9999992047598082, iteration: 345188
loss: 1.1258395910263062,grad_norm: 0.9999992577659939, iteration: 345189
loss: 0.9955215454101562,grad_norm: 0.9363303250754015, iteration: 345190
loss: 0.9966973066329956,grad_norm: 0.9999990916181423, iteration: 345191
loss: 0.9941235780715942,grad_norm: 0.7307681790854134, iteration: 345192
loss: 1.0053437948226929,grad_norm: 0.9999989693785336, iteration: 345193
loss: 0.9691939353942871,grad_norm: 0.7605189577430485, iteration: 345194
loss: 0.9952772259712219,grad_norm: 0.7532133647488505, iteration: 345195
loss: 0.9732271432876587,grad_norm: 0.8941984133601804, iteration: 345196
loss: 1.0288656949996948,grad_norm: 0.99999912299569, iteration: 345197
loss: 0.9884551167488098,grad_norm: 0.920632151536387, iteration: 345198
loss: 1.0328726768493652,grad_norm: 0.8441244115015707, iteration: 345199
loss: 0.9772675037384033,grad_norm: 0.814418139522558, iteration: 345200
loss: 1.006241798400879,grad_norm: 0.6365567955211605, iteration: 345201
loss: 0.9906904697418213,grad_norm: 0.7821372717507097, iteration: 345202
loss: 1.0572972297668457,grad_norm: 0.8859033253171693, iteration: 345203
loss: 0.9852842092514038,grad_norm: 0.9955256496059447, iteration: 345204
loss: 1.0055561065673828,grad_norm: 0.8554106824513552, iteration: 345205
loss: 1.02897047996521,grad_norm: 0.6697903397797166, iteration: 345206
loss: 1.0228270292282104,grad_norm: 0.8562568934429634, iteration: 345207
loss: 1.0201866626739502,grad_norm: 0.8262112002300264, iteration: 345208
loss: 1.0232264995574951,grad_norm: 0.898677586377624, iteration: 345209
loss: 0.9764377474784851,grad_norm: 0.7986348746445067, iteration: 345210
loss: 1.0296739339828491,grad_norm: 0.9999995458158892, iteration: 345211
loss: 0.9901754260063171,grad_norm: 0.8209912377466532, iteration: 345212
loss: 0.9395366907119751,grad_norm: 0.9880983059485449, iteration: 345213
loss: 1.0374172925949097,grad_norm: 0.9999998888246785, iteration: 345214
loss: 0.9945607781410217,grad_norm: 0.7983120038532656, iteration: 345215
loss: 0.9813327193260193,grad_norm: 0.7487812159238917, iteration: 345216
loss: 0.9815194010734558,grad_norm: 0.7473625278528995, iteration: 345217
loss: 1.0214191675186157,grad_norm: 0.8813380493268457, iteration: 345218
loss: 1.0130895376205444,grad_norm: 0.8778783793227226, iteration: 345219
loss: 0.9660753011703491,grad_norm: 0.999999010026694, iteration: 345220
loss: 1.0475475788116455,grad_norm: 0.8823775605345651, iteration: 345221
loss: 1.0112003087997437,grad_norm: 0.9999999268077817, iteration: 345222
loss: 0.9915024638175964,grad_norm: 0.7844505633378259, iteration: 345223
loss: 0.9992426037788391,grad_norm: 0.9999994929061784, iteration: 345224
loss: 1.0252578258514404,grad_norm: 0.9999993073055726, iteration: 345225
loss: 0.9490505456924438,grad_norm: 0.7272702607044256, iteration: 345226
loss: 1.032821774482727,grad_norm: 0.9999991125745337, iteration: 345227
loss: 1.1641192436218262,grad_norm: 1.0000000130147952, iteration: 345228
loss: 1.055587649345398,grad_norm: 0.999999944159322, iteration: 345229
loss: 0.9613135457038879,grad_norm: 0.9999991117480297, iteration: 345230
loss: 1.00742506980896,grad_norm: 0.999999106116333, iteration: 345231
loss: 1.0016188621520996,grad_norm: 0.8023078255589398, iteration: 345232
loss: 1.0120280981063843,grad_norm: 0.8507032947066028, iteration: 345233
loss: 1.002810001373291,grad_norm: 0.825781032467726, iteration: 345234
loss: 1.0335485935211182,grad_norm: 0.9396437482779995, iteration: 345235
loss: 0.975070059299469,grad_norm: 0.8782814184397059, iteration: 345236
loss: 0.9973824620246887,grad_norm: 0.7762471742417679, iteration: 345237
loss: 0.9729479551315308,grad_norm: 0.7284055932512272, iteration: 345238
loss: 0.9882218241691589,grad_norm: 0.9999990312468845, iteration: 345239
loss: 0.970575749874115,grad_norm: 0.8740098157591998, iteration: 345240
loss: 0.9903486371040344,grad_norm: 0.9164115253682114, iteration: 345241
loss: 0.9905189275741577,grad_norm: 0.7402490905830013, iteration: 345242
loss: 1.009052038192749,grad_norm: 0.9999990501383256, iteration: 345243
loss: 0.9775441884994507,grad_norm: 0.822931918483808, iteration: 345244
loss: 1.073621153831482,grad_norm: 0.9999999284644513, iteration: 345245
loss: 0.9779312014579773,grad_norm: 0.9999993081801254, iteration: 345246
loss: 0.9901517629623413,grad_norm: 0.8815437662581148, iteration: 345247
loss: 0.9705637693405151,grad_norm: 0.8495650870664574, iteration: 345248
loss: 0.9695581793785095,grad_norm: 0.8351039707711386, iteration: 345249
loss: 1.010776162147522,grad_norm: 0.7481922330722364, iteration: 345250
loss: 0.9866185784339905,grad_norm: 0.9835867183080078, iteration: 345251
loss: 1.0311143398284912,grad_norm: 0.8044309498785451, iteration: 345252
loss: 0.9550132155418396,grad_norm: 0.8302627393823508, iteration: 345253
loss: 1.0658087730407715,grad_norm: 0.8687570478345279, iteration: 345254
loss: 1.0137691497802734,grad_norm: 0.8266728374792613, iteration: 345255
loss: 0.9887243509292603,grad_norm: 0.7623127405967779, iteration: 345256
loss: 1.0112738609313965,grad_norm: 0.8736846708402405, iteration: 345257
loss: 1.0353717803955078,grad_norm: 0.999999218545022, iteration: 345258
loss: 1.014130711555481,grad_norm: 0.9999990258609488, iteration: 345259
loss: 0.9915153384208679,grad_norm: 0.9467256649378172, iteration: 345260
loss: 0.9445996284484863,grad_norm: 0.9653809146429537, iteration: 345261
loss: 0.992130696773529,grad_norm: 0.7672407273713993, iteration: 345262
loss: 0.9617806077003479,grad_norm: 0.8424469714250927, iteration: 345263
loss: 0.9990079402923584,grad_norm: 0.8200941490584629, iteration: 345264
loss: 1.0529603958129883,grad_norm: 0.9263637236703124, iteration: 345265
loss: 0.996764063835144,grad_norm: 0.9531657389148493, iteration: 345266
loss: 1.0189021825790405,grad_norm: 0.9999998812859713, iteration: 345267
loss: 1.167592167854309,grad_norm: 0.9999998734051051, iteration: 345268
loss: 1.0367435216903687,grad_norm: 0.9999996393175344, iteration: 345269
loss: 0.9772481918334961,grad_norm: 0.8387234965855067, iteration: 345270
loss: 1.068721055984497,grad_norm: 0.9999998058564673, iteration: 345271
loss: 0.9831352233886719,grad_norm: 0.7198128014209793, iteration: 345272
loss: 0.99215167760849,grad_norm: 0.7886227088790305, iteration: 345273
loss: 0.9862415194511414,grad_norm: 0.6924898269256213, iteration: 345274
loss: 0.994359016418457,grad_norm: 0.6625066683639881, iteration: 345275
loss: 1.0573643445968628,grad_norm: 0.7752254685244351, iteration: 345276
loss: 1.0268065929412842,grad_norm: 0.9999996928866614, iteration: 345277
loss: 0.9873633980751038,grad_norm: 0.7070904059763985, iteration: 345278
loss: 0.9999033212661743,grad_norm: 0.8383038846625951, iteration: 345279
loss: 0.9945743680000305,grad_norm: 0.7918233682245841, iteration: 345280
loss: 1.00626540184021,grad_norm: 0.8443521684403057, iteration: 345281
loss: 1.0356606245040894,grad_norm: 0.9999992226376404, iteration: 345282
loss: 1.037538766860962,grad_norm: 0.9077267136141484, iteration: 345283
loss: 1.0046557188034058,grad_norm: 0.8496074443763676, iteration: 345284
loss: 1.0231966972351074,grad_norm: 0.842242314741081, iteration: 345285
loss: 0.9980826377868652,grad_norm: 0.9999997444961353, iteration: 345286
loss: 1.0145893096923828,grad_norm: 0.9637645189175374, iteration: 345287
loss: 0.9889788031578064,grad_norm: 0.9999991278796259, iteration: 345288
loss: 1.106806755065918,grad_norm: 0.9999998010934277, iteration: 345289
loss: 0.98590487241745,grad_norm: 0.740794329174457, iteration: 345290
loss: 0.9686707854270935,grad_norm: 0.9999991074249623, iteration: 345291
loss: 1.0765738487243652,grad_norm: 0.8799864197280421, iteration: 345292
loss: 1.030136227607727,grad_norm: 0.9999997480663716, iteration: 345293
loss: 1.0037550926208496,grad_norm: 0.7422596165611307, iteration: 345294
loss: 0.9915664196014404,grad_norm: 0.9999990082374837, iteration: 345295
loss: 0.9966676235198975,grad_norm: 0.9999995150755054, iteration: 345296
loss: 1.0191200971603394,grad_norm: 0.9597758408400464, iteration: 345297
loss: 1.0023561716079712,grad_norm: 0.9630394167317731, iteration: 345298
loss: 1.0160175561904907,grad_norm: 0.7924431931975384, iteration: 345299
loss: 0.9845341444015503,grad_norm: 0.7826318157416313, iteration: 345300
loss: 0.9890956878662109,grad_norm: 0.9317791798251532, iteration: 345301
loss: 1.0018471479415894,grad_norm: 0.8561333443603609, iteration: 345302
loss: 0.9940659999847412,grad_norm: 0.79627006964744, iteration: 345303
loss: 1.0255823135375977,grad_norm: 0.8783256999234847, iteration: 345304
loss: 1.0188714265823364,grad_norm: 0.7944363854863927, iteration: 345305
loss: 1.0229743719100952,grad_norm: 0.999999244542755, iteration: 345306
loss: 1.014074444770813,grad_norm: 0.7520674283068333, iteration: 345307
loss: 1.0493565797805786,grad_norm: 0.8904095500744255, iteration: 345308
loss: 1.0061919689178467,grad_norm: 0.7797896085388255, iteration: 345309
loss: 1.0301254987716675,grad_norm: 0.7511230929574323, iteration: 345310
loss: 1.0331330299377441,grad_norm: 0.8715176396284101, iteration: 345311
loss: 1.0105865001678467,grad_norm: 0.7671643783312716, iteration: 345312
loss: 0.9970340728759766,grad_norm: 0.837150790946619, iteration: 345313
loss: 1.0580614805221558,grad_norm: 0.9741205650404512, iteration: 345314
loss: 0.9941014051437378,grad_norm: 0.8088939406551884, iteration: 345315
loss: 1.0009957551956177,grad_norm: 0.9999993443106548, iteration: 345316
loss: 0.9773700833320618,grad_norm: 0.8160461706183624, iteration: 345317
loss: 1.0305202007293701,grad_norm: 0.9438826107634494, iteration: 345318
loss: 1.071631908416748,grad_norm: 0.8856276763367795, iteration: 345319
loss: 0.9956423044204712,grad_norm: 0.7394486383181195, iteration: 345320
loss: 0.9913350939750671,grad_norm: 0.8222881437090802, iteration: 345321
loss: 0.9627953767776489,grad_norm: 0.9999997449846028, iteration: 345322
loss: 1.0288857221603394,grad_norm: 0.7558516903617847, iteration: 345323
loss: 1.0084577798843384,grad_norm: 0.9875270185878245, iteration: 345324
loss: 1.0062631368637085,grad_norm: 0.9999989680409993, iteration: 345325
loss: 1.0570107698440552,grad_norm: 0.9999993691637116, iteration: 345326
loss: 0.9867990016937256,grad_norm: 0.7637849043892836, iteration: 345327
loss: 1.018385887145996,grad_norm: 0.9915115320234633, iteration: 345328
loss: 1.0413297414779663,grad_norm: 0.99999922687062, iteration: 345329
loss: 1.02016019821167,grad_norm: 0.8676482479035621, iteration: 345330
loss: 1.0134224891662598,grad_norm: 0.868166843825345, iteration: 345331
loss: 1.0333787202835083,grad_norm: 0.6501288248065453, iteration: 345332
loss: 1.0316438674926758,grad_norm: 0.8522629442436803, iteration: 345333
loss: 1.016876220703125,grad_norm: 0.9197875794422564, iteration: 345334
loss: 1.0013434886932373,grad_norm: 0.7397828849188062, iteration: 345335
loss: 1.0188438892364502,grad_norm: 0.7485288320161401, iteration: 345336
loss: 1.007603645324707,grad_norm: 0.8003155230165035, iteration: 345337
loss: 1.0022488832473755,grad_norm: 0.8528619514310688, iteration: 345338
loss: 1.0283831357955933,grad_norm: 0.9999990630062832, iteration: 345339
loss: 1.0224659442901611,grad_norm: 0.7889592390505517, iteration: 345340
loss: 0.9809817671775818,grad_norm: 0.9229917077773906, iteration: 345341
loss: 1.0156941413879395,grad_norm: 0.9999991377167029, iteration: 345342
loss: 1.0138940811157227,grad_norm: 0.8393786961103878, iteration: 345343
loss: 1.0464621782302856,grad_norm: 0.999999894274182, iteration: 345344
loss: 0.9995288252830505,grad_norm: 0.7726311765319731, iteration: 345345
loss: 0.9621497392654419,grad_norm: 0.8269200515475624, iteration: 345346
loss: 0.9850647449493408,grad_norm: 0.7638134171102613, iteration: 345347
loss: 0.9818312525749207,grad_norm: 0.8819598531037947, iteration: 345348
loss: 1.0135996341705322,grad_norm: 0.868318381284755, iteration: 345349
loss: 0.9876435995101929,grad_norm: 0.8216678154946956, iteration: 345350
loss: 0.9882304072380066,grad_norm: 0.8655345503349874, iteration: 345351
loss: 0.9980313181877136,grad_norm: 0.9999997423417077, iteration: 345352
loss: 0.9977506399154663,grad_norm: 0.8539217214603593, iteration: 345353
loss: 1.0555164813995361,grad_norm: 0.9999996535871365, iteration: 345354
loss: 1.0619215965270996,grad_norm: 0.9424225378104273, iteration: 345355
loss: 1.0125831365585327,grad_norm: 0.9458534262802394, iteration: 345356
loss: 0.989000141620636,grad_norm: 0.8863323884699784, iteration: 345357
loss: 1.0273689031600952,grad_norm: 0.999999455410212, iteration: 345358
loss: 1.029905080795288,grad_norm: 0.7741824260983747, iteration: 345359
loss: 1.0204273462295532,grad_norm: 0.9999995029417046, iteration: 345360
loss: 1.0314698219299316,grad_norm: 0.8383493885169593, iteration: 345361
loss: 0.9839578866958618,grad_norm: 0.8662807199591628, iteration: 345362
loss: 1.0728274583816528,grad_norm: 0.7233555919271971, iteration: 345363
loss: 0.9633145332336426,grad_norm: 0.7457070586373638, iteration: 345364
loss: 1.039116382598877,grad_norm: 0.7468362698040989, iteration: 345365
loss: 1.0572179555892944,grad_norm: 0.7604467763339002, iteration: 345366
loss: 1.0576956272125244,grad_norm: 0.8970239271224657, iteration: 345367
loss: 1.0027996301651,grad_norm: 0.9224360260089547, iteration: 345368
loss: 1.0135128498077393,grad_norm: 0.7847625404448583, iteration: 345369
loss: 1.0728434324264526,grad_norm: 0.8974018145194791, iteration: 345370
loss: 0.9875801205635071,grad_norm: 0.9999991548492905, iteration: 345371
loss: 1.110640048980713,grad_norm: 0.9999991586290151, iteration: 345372
loss: 0.9917405247688293,grad_norm: 0.7314905083223655, iteration: 345373
loss: 0.9716235995292664,grad_norm: 0.9999998104229485, iteration: 345374
loss: 1.0071830749511719,grad_norm: 0.7529432306262522, iteration: 345375
loss: 1.057152509689331,grad_norm: 0.9999995466002294, iteration: 345376
loss: 1.087138056755066,grad_norm: 0.9999995442542705, iteration: 345377
loss: 1.1394915580749512,grad_norm: 0.8806664251005023, iteration: 345378
loss: 0.9999613761901855,grad_norm: 0.8312085300302217, iteration: 345379
loss: 1.0250121355056763,grad_norm: 0.9999993965905801, iteration: 345380
loss: 0.9975836277008057,grad_norm: 0.9178328906617834, iteration: 345381
loss: 1.1724987030029297,grad_norm: 0.9999993740770566, iteration: 345382
loss: 1.0076615810394287,grad_norm: 0.7799395794217033, iteration: 345383
loss: 0.9956262111663818,grad_norm: 0.9262199957554598, iteration: 345384
loss: 1.1722086668014526,grad_norm: 0.9999993290200698, iteration: 345385
loss: 1.0651323795318604,grad_norm: 0.9999991691169667, iteration: 345386
loss: 1.0716986656188965,grad_norm: 0.9999996671441314, iteration: 345387
loss: 1.002530574798584,grad_norm: 0.7801454707744676, iteration: 345388
loss: 1.0351691246032715,grad_norm: 0.6893875019523604, iteration: 345389
loss: 0.9532300233840942,grad_norm: 0.8351127057063825, iteration: 345390
loss: 0.9803429841995239,grad_norm: 0.8296066894811127, iteration: 345391
loss: 1.0413329601287842,grad_norm: 0.9999991911121359, iteration: 345392
loss: 1.0085811614990234,grad_norm: 0.9369704180868867, iteration: 345393
loss: 1.0503636598587036,grad_norm: 0.9999996868833151, iteration: 345394
loss: 1.0158566236495972,grad_norm: 0.8166060733968316, iteration: 345395
loss: 1.0353208780288696,grad_norm: 0.9999999685570765, iteration: 345396
loss: 0.990765392780304,grad_norm: 0.8769558103782708, iteration: 345397
loss: 1.0203367471694946,grad_norm: 0.9999993877544355, iteration: 345398
loss: 0.9786614179611206,grad_norm: 0.999999744919012, iteration: 345399
loss: 1.0542032718658447,grad_norm: 0.999999417268321, iteration: 345400
loss: 1.0469541549682617,grad_norm: 0.9001633206456598, iteration: 345401
loss: 0.9801656007766724,grad_norm: 0.9292893712266854, iteration: 345402
loss: 1.0246491432189941,grad_norm: 0.999999359514064, iteration: 345403
loss: 0.961242139339447,grad_norm: 0.7772914047093692, iteration: 345404
loss: 1.0015535354614258,grad_norm: 0.8341903879447882, iteration: 345405
loss: 0.9739964008331299,grad_norm: 0.7629308861005811, iteration: 345406
loss: 1.0014015436172485,grad_norm: 0.7354401339548645, iteration: 345407
loss: 0.9773585200309753,grad_norm: 0.766423988061106, iteration: 345408
loss: 1.0387890338897705,grad_norm: 0.9999991126856346, iteration: 345409
loss: 1.0040229558944702,grad_norm: 0.9999997332524944, iteration: 345410
loss: 1.0202171802520752,grad_norm: 0.9999991087176978, iteration: 345411
loss: 1.0384674072265625,grad_norm: 0.891822561839719, iteration: 345412
loss: 1.0292260646820068,grad_norm: 0.7257505847212667, iteration: 345413
loss: 0.9744986295700073,grad_norm: 0.7815761718615525, iteration: 345414
loss: 1.0192792415618896,grad_norm: 0.6697051796370483, iteration: 345415
loss: 0.9993852972984314,grad_norm: 0.7086047205946242, iteration: 345416
loss: 1.0147695541381836,grad_norm: 0.9999991565183747, iteration: 345417
loss: 0.9958754777908325,grad_norm: 0.9667637003018199, iteration: 345418
loss: 1.0188426971435547,grad_norm: 0.724341755856548, iteration: 345419
loss: 0.9914731979370117,grad_norm: 0.9999991726650721, iteration: 345420
loss: 1.0058836936950684,grad_norm: 0.6515673102788389, iteration: 345421
loss: 1.0312503576278687,grad_norm: 0.7715634022311161, iteration: 345422
loss: 1.0792213678359985,grad_norm: 0.9999997406315452, iteration: 345423
loss: 1.036047101020813,grad_norm: 0.9308754596319866, iteration: 345424
loss: 1.0181081295013428,grad_norm: 0.9514264564476703, iteration: 345425
loss: 1.0248807668685913,grad_norm: 0.8684632077244139, iteration: 345426
loss: 1.0035816431045532,grad_norm: 0.9999994870562335, iteration: 345427
loss: 1.1361767053604126,grad_norm: 0.8724955728023414, iteration: 345428
loss: 0.9943746328353882,grad_norm: 0.9660817617742999, iteration: 345429
loss: 0.9768450856208801,grad_norm: 0.9999992281480846, iteration: 345430
loss: 1.0124177932739258,grad_norm: 0.7701226477933963, iteration: 345431
loss: 0.9987781047821045,grad_norm: 0.9133265840099751, iteration: 345432
loss: 1.0095168352127075,grad_norm: 0.8822522664224489, iteration: 345433
loss: 1.0755189657211304,grad_norm: 0.9999994689600057, iteration: 345434
loss: 0.9944639205932617,grad_norm: 0.8850757483816862, iteration: 345435
loss: 1.1071797609329224,grad_norm: 0.9589406475507546, iteration: 345436
loss: 1.0284806489944458,grad_norm: 0.9460621460638093, iteration: 345437
loss: 0.9533341526985168,grad_norm: 0.9826336167127454, iteration: 345438
loss: 0.9742137789726257,grad_norm: 0.9999994955421109, iteration: 345439
loss: 0.990952730178833,grad_norm: 0.7962562238778081, iteration: 345440
loss: 1.0151617527008057,grad_norm: 0.9999993270691289, iteration: 345441
loss: 0.9771469235420227,grad_norm: 0.8569376846584763, iteration: 345442
loss: 0.9907130599021912,grad_norm: 0.8909030028643825, iteration: 345443
loss: 0.9997861385345459,grad_norm: 0.7552847454894961, iteration: 345444
loss: 1.0677376985549927,grad_norm: 0.9999994020483682, iteration: 345445
loss: 1.0021603107452393,grad_norm: 0.7164949625102549, iteration: 345446
loss: 1.0231993198394775,grad_norm: 0.8713025204661665, iteration: 345447
loss: 0.9892171025276184,grad_norm: 0.8834528816770194, iteration: 345448
loss: 0.9966535568237305,grad_norm: 0.8814480041570643, iteration: 345449
loss: 0.9597029089927673,grad_norm: 0.8036802679756898, iteration: 345450
loss: 1.0184440612792969,grad_norm: 0.7174174729590953, iteration: 345451
loss: 0.9967042803764343,grad_norm: 0.8633756847500242, iteration: 345452
loss: 1.0093846321105957,grad_norm: 0.8462101937913281, iteration: 345453
loss: 0.9876289367675781,grad_norm: 0.8422527691962984, iteration: 345454
loss: 0.9682210683822632,grad_norm: 0.7669524927458542, iteration: 345455
loss: 0.9936531186103821,grad_norm: 0.8507528086499846, iteration: 345456
loss: 0.9872139096260071,grad_norm: 0.7035013910790593, iteration: 345457
loss: 1.0171266794204712,grad_norm: 0.8359005923622864, iteration: 345458
loss: 0.9893768429756165,grad_norm: 0.9999992423252914, iteration: 345459
loss: 1.0122560262680054,grad_norm: 0.9258645815383801, iteration: 345460
loss: 0.9958968162536621,grad_norm: 0.9856533609773027, iteration: 345461
loss: 0.9819502830505371,grad_norm: 0.8668231911066783, iteration: 345462
loss: 1.0072795152664185,grad_norm: 0.8738630294172248, iteration: 345463
loss: 0.9984462857246399,grad_norm: 0.795597430661393, iteration: 345464
loss: 0.9915570020675659,grad_norm: 0.9999990847479674, iteration: 345465
loss: 1.015399694442749,grad_norm: 0.6541647663345844, iteration: 345466
loss: 1.0168381929397583,grad_norm: 0.924933554131909, iteration: 345467
loss: 0.9906609654426575,grad_norm: 0.8164483008560295, iteration: 345468
loss: 1.1236096620559692,grad_norm: 0.9999991175314359, iteration: 345469
loss: 0.9696677923202515,grad_norm: 0.9999996892647688, iteration: 345470
loss: 0.9990007877349854,grad_norm: 0.7819369986671342, iteration: 345471
loss: 1.024031400680542,grad_norm: 0.9310832516133839, iteration: 345472
loss: 1.026012659072876,grad_norm: 0.9999997317215168, iteration: 345473
loss: 0.9994615316390991,grad_norm: 0.9891228586530364, iteration: 345474
loss: 0.9605096578598022,grad_norm: 0.8328880841718841, iteration: 345475
loss: 1.0015757083892822,grad_norm: 0.7475474162472493, iteration: 345476
loss: 1.0070605278015137,grad_norm: 0.8956729698262021, iteration: 345477
loss: 1.0045605897903442,grad_norm: 0.999999600011754, iteration: 345478
loss: 0.987885057926178,grad_norm: 0.7537098349431823, iteration: 345479
loss: 0.9919652342796326,grad_norm: 0.795055275028021, iteration: 345480
loss: 0.962934672832489,grad_norm: 0.8250249776745596, iteration: 345481
loss: 0.9809166193008423,grad_norm: 0.8414402748301019, iteration: 345482
loss: 0.9920663237571716,grad_norm: 0.9999990958479463, iteration: 345483
loss: 1.0544204711914062,grad_norm: 0.9999991326997579, iteration: 345484
loss: 1.024271845817566,grad_norm: 0.8405965515073333, iteration: 345485
loss: 1.0266227722167969,grad_norm: 0.8694514512335232, iteration: 345486
loss: 1.0210559368133545,grad_norm: 0.859633624191989, iteration: 345487
loss: 1.0208739042282104,grad_norm: 0.7863123594062404, iteration: 345488
loss: 1.0244474411010742,grad_norm: 0.9331412397985837, iteration: 345489
loss: 1.0161869525909424,grad_norm: 0.8621671973368964, iteration: 345490
loss: 0.996160626411438,grad_norm: 0.904913091882391, iteration: 345491
loss: 1.0120662450790405,grad_norm: 0.987977696920561, iteration: 345492
loss: 1.045127511024475,grad_norm: 0.8193166105184669, iteration: 345493
loss: 0.9748198986053467,grad_norm: 0.7245614941363752, iteration: 345494
loss: 1.0114810466766357,grad_norm: 0.9148876448030997, iteration: 345495
loss: 1.034332036972046,grad_norm: 0.8853478166724796, iteration: 345496
loss: 0.9823259711265564,grad_norm: 0.8410721944785111, iteration: 345497
loss: 1.0294350385665894,grad_norm: 0.9204372581408153, iteration: 345498
loss: 0.9741315841674805,grad_norm: 0.7634438053933549, iteration: 345499
loss: 0.9731302857398987,grad_norm: 0.999053504528984, iteration: 345500
loss: 0.9835965037345886,grad_norm: 0.7769705079629136, iteration: 345501
loss: 0.9966152906417847,grad_norm: 0.9275292414503876, iteration: 345502
loss: 0.9568239450454712,grad_norm: 0.8588893187419496, iteration: 345503
loss: 1.0250895023345947,grad_norm: 0.9625119263307652, iteration: 345504
loss: 1.0041179656982422,grad_norm: 0.8625652832400909, iteration: 345505
loss: 1.0077247619628906,grad_norm: 0.9105299229217251, iteration: 345506
loss: 0.967688262462616,grad_norm: 0.7552440170324165, iteration: 345507
loss: 1.0001778602600098,grad_norm: 0.8648124703207776, iteration: 345508
loss: 1.0087295770645142,grad_norm: 0.9124512699562046, iteration: 345509
loss: 1.0078483819961548,grad_norm: 0.8427922148435593, iteration: 345510
loss: 0.9976590871810913,grad_norm: 0.9994314836362223, iteration: 345511
loss: 0.9934743642807007,grad_norm: 0.6541015882898534, iteration: 345512
loss: 1.006101131439209,grad_norm: 0.7036802041918504, iteration: 345513
loss: 1.024692416191101,grad_norm: 0.926995432520435, iteration: 345514
loss: 1.0031754970550537,grad_norm: 0.7494234589435512, iteration: 345515
loss: 1.0046278238296509,grad_norm: 0.7178913259181094, iteration: 345516
loss: 0.977022647857666,grad_norm: 0.7331492098224098, iteration: 345517
loss: 1.0744255781173706,grad_norm: 0.9999993579342863, iteration: 345518
loss: 1.0233144760131836,grad_norm: 0.8000083034298152, iteration: 345519
loss: 1.0225985050201416,grad_norm: 0.8698395149087181, iteration: 345520
loss: 0.9454764127731323,grad_norm: 0.9528319128283069, iteration: 345521
loss: 0.9869925379753113,grad_norm: 0.7595986255950972, iteration: 345522
loss: 1.0100785493850708,grad_norm: 0.8873863715314372, iteration: 345523
loss: 1.0456547737121582,grad_norm: 0.9999990947066288, iteration: 345524
loss: 0.9947848320007324,grad_norm: 0.7969146984599297, iteration: 345525
loss: 1.0118499994277954,grad_norm: 0.9362910922542006, iteration: 345526
loss: 1.0117716789245605,grad_norm: 0.7933845194704813, iteration: 345527
loss: 1.0888419151306152,grad_norm: 0.9999998995320626, iteration: 345528
loss: 1.0131028890609741,grad_norm: 0.9999996563425834, iteration: 345529
loss: 1.122106671333313,grad_norm: 0.9999993303188394, iteration: 345530
loss: 0.9691881537437439,grad_norm: 0.8842599782451713, iteration: 345531
loss: 0.9824986457824707,grad_norm: 0.7545699253587085, iteration: 345532
loss: 0.9905237555503845,grad_norm: 0.7878456481161229, iteration: 345533
loss: 0.9986281394958496,grad_norm: 0.8489132809738336, iteration: 345534
loss: 1.0060944557189941,grad_norm: 0.8372713790380433, iteration: 345535
loss: 1.014643669128418,grad_norm: 0.7319455503622696, iteration: 345536
loss: 0.993400514125824,grad_norm: 0.9999991637090618, iteration: 345537
loss: 0.9928784370422363,grad_norm: 0.8497429731366369, iteration: 345538
loss: 0.975398600101471,grad_norm: 0.8477858810577812, iteration: 345539
loss: 0.9768128991127014,grad_norm: 0.6718109359951384, iteration: 345540
loss: 0.986724853515625,grad_norm: 0.7632669394838154, iteration: 345541
loss: 0.9803221225738525,grad_norm: 0.999999372216323, iteration: 345542
loss: 1.0073961019515991,grad_norm: 0.9262446442375551, iteration: 345543
loss: 0.980238139629364,grad_norm: 0.7448279556236159, iteration: 345544
loss: 1.0166215896606445,grad_norm: 0.9999989775092583, iteration: 345545
loss: 1.0099722146987915,grad_norm: 0.8529668369510546, iteration: 345546
loss: 1.0250667333602905,grad_norm: 0.9999990553056526, iteration: 345547
loss: 1.033839225769043,grad_norm: 0.9999993243308068, iteration: 345548
loss: 1.012742280960083,grad_norm: 0.9770118589201513, iteration: 345549
loss: 0.9837765693664551,grad_norm: 0.6933971058765148, iteration: 345550
loss: 0.9997223615646362,grad_norm: 0.8302695018280185, iteration: 345551
loss: 1.012004017829895,grad_norm: 0.7776302196936007, iteration: 345552
loss: 0.9808313846588135,grad_norm: 0.7126529583017642, iteration: 345553
loss: 1.0034905672073364,grad_norm: 0.843014469548467, iteration: 345554
loss: 1.0593823194503784,grad_norm: 0.7312063993859147, iteration: 345555
loss: 0.989961564540863,grad_norm: 0.9999990288281769, iteration: 345556
loss: 0.9997262358665466,grad_norm: 0.7675613979425553, iteration: 345557
loss: 1.0423749685287476,grad_norm: 0.8749231764386256, iteration: 345558
loss: 0.9934992790222168,grad_norm: 0.9309774364014426, iteration: 345559
loss: 0.9717977643013,grad_norm: 0.8931858836330694, iteration: 345560
loss: 1.0062891244888306,grad_norm: 0.8346640038707928, iteration: 345561
loss: 1.0495011806488037,grad_norm: 0.8654569729120821, iteration: 345562
loss: 0.9881771206855774,grad_norm: 0.7793314484621441, iteration: 345563
loss: 0.9819852113723755,grad_norm: 0.8832592673981934, iteration: 345564
loss: 1.0372883081436157,grad_norm: 0.999999677639474, iteration: 345565
loss: 0.9756153225898743,grad_norm: 0.858181409633272, iteration: 345566
loss: 1.0419509410858154,grad_norm: 0.9999994286697552, iteration: 345567
loss: 1.0397931337356567,grad_norm: 0.9204990253779061, iteration: 345568
loss: 1.032192587852478,grad_norm: 0.8870146998742633, iteration: 345569
loss: 0.9965353012084961,grad_norm: 0.8615853553135376, iteration: 345570
loss: 1.0005966424942017,grad_norm: 0.7016038286824501, iteration: 345571
loss: 0.9899875521659851,grad_norm: 0.8876232079172538, iteration: 345572
loss: 1.0040370225906372,grad_norm: 0.7607361635913671, iteration: 345573
loss: 0.9771421551704407,grad_norm: 0.9775099657954808, iteration: 345574
loss: 1.0031541585922241,grad_norm: 0.818195680957307, iteration: 345575
loss: 0.9599469304084778,grad_norm: 0.8404148819319093, iteration: 345576
loss: 1.0012439489364624,grad_norm: 0.9076598172311797, iteration: 345577
loss: 0.970515251159668,grad_norm: 0.8651837060786213, iteration: 345578
loss: 0.9797377586364746,grad_norm: 0.8529237642145279, iteration: 345579
loss: 1.027811050415039,grad_norm: 0.8176930368946462, iteration: 345580
loss: 1.0134634971618652,grad_norm: 0.7394588772824499, iteration: 345581
loss: 1.0192034244537354,grad_norm: 0.9999991101710647, iteration: 345582
loss: 1.0165499448776245,grad_norm: 0.8008664014461631, iteration: 345583
loss: 1.0145418643951416,grad_norm: 0.8870912828461097, iteration: 345584
loss: 1.0311510562896729,grad_norm: 0.9407010926974381, iteration: 345585
loss: 1.006441593170166,grad_norm: 0.8405259796695529, iteration: 345586
loss: 1.0259416103363037,grad_norm: 0.7787729804258693, iteration: 345587
loss: 1.0568156242370605,grad_norm: 0.7537265456204976, iteration: 345588
loss: 1.0156073570251465,grad_norm: 0.9225795792113712, iteration: 345589
loss: 0.9830413460731506,grad_norm: 0.7714804748705607, iteration: 345590
loss: 1.0054453611373901,grad_norm: 0.8078271323266005, iteration: 345591
loss: 1.002989411354065,grad_norm: 0.7310127413569063, iteration: 345592
loss: 0.981003999710083,grad_norm: 0.9035067938412403, iteration: 345593
loss: 0.9905034899711609,grad_norm: 0.8134559890341703, iteration: 345594
loss: 0.9669537544250488,grad_norm: 0.7800369557814897, iteration: 345595
loss: 1.0301618576049805,grad_norm: 0.8692213947781108, iteration: 345596
loss: 1.1873494386672974,grad_norm: 0.9999996541148706, iteration: 345597
loss: 0.9619919061660767,grad_norm: 0.7873366890058607, iteration: 345598
loss: 1.0324416160583496,grad_norm: 0.8126718546283, iteration: 345599
loss: 0.9914113879203796,grad_norm: 0.9999999545072398, iteration: 345600
loss: 1.0051851272583008,grad_norm: 0.7819583081532081, iteration: 345601
loss: 0.972521960735321,grad_norm: 0.8588583283316703, iteration: 345602
loss: 0.9910229444503784,grad_norm: 0.8198039183817376, iteration: 345603
loss: 0.9858962893486023,grad_norm: 0.7710850229478452, iteration: 345604
loss: 1.0096118450164795,grad_norm: 0.7809295026767492, iteration: 345605
loss: 1.0200374126434326,grad_norm: 0.9243563966270147, iteration: 345606
loss: 1.029094934463501,grad_norm: 0.9999994212694896, iteration: 345607
loss: 0.980951189994812,grad_norm: 0.7080549888656669, iteration: 345608
loss: 1.087512731552124,grad_norm: 0.8994570179809741, iteration: 345609
loss: 0.9805567264556885,grad_norm: 0.9416686043817438, iteration: 345610
loss: 0.9592866897583008,grad_norm: 0.8366373967887355, iteration: 345611
loss: 0.9711575508117676,grad_norm: 0.9152487890506611, iteration: 345612
loss: 1.036425232887268,grad_norm: 0.9970186010711626, iteration: 345613
loss: 0.9909297823905945,grad_norm: 0.9563389843181284, iteration: 345614
loss: 0.9815021753311157,grad_norm: 0.9507665732497024, iteration: 345615
loss: 0.9830667972564697,grad_norm: 0.8604882601629062, iteration: 345616
loss: 1.0302817821502686,grad_norm: 0.8837275845138749, iteration: 345617
loss: 1.0242578983306885,grad_norm: 0.7897947380701769, iteration: 345618
loss: 1.0402774810791016,grad_norm: 0.65336414493764, iteration: 345619
loss: 1.0045298337936401,grad_norm: 0.9618211479836476, iteration: 345620
loss: 0.9484728574752808,grad_norm: 0.8279823711554579, iteration: 345621
loss: 1.0133904218673706,grad_norm: 0.9248389520906656, iteration: 345622
loss: 1.006871223449707,grad_norm: 0.8504743756599282, iteration: 345623
loss: 0.9920254349708557,grad_norm: 0.6802551199940974, iteration: 345624
loss: 1.00149667263031,grad_norm: 0.8712983678470684, iteration: 345625
loss: 0.957735002040863,grad_norm: 0.7684568912139378, iteration: 345626
loss: 1.0349371433258057,grad_norm: 0.8468710221029778, iteration: 345627
loss: 1.0124703645706177,grad_norm: 0.9999990872785158, iteration: 345628
loss: 0.9845438599586487,grad_norm: 0.729460537349465, iteration: 345629
loss: 1.0148587226867676,grad_norm: 0.849273513771159, iteration: 345630
loss: 1.0379363298416138,grad_norm: 0.9846673291151428, iteration: 345631
loss: 1.148862361907959,grad_norm: 0.9999998281797914, iteration: 345632
loss: 1.0646638870239258,grad_norm: 0.9999992897520168, iteration: 345633
loss: 0.9849250912666321,grad_norm: 0.9345805825247714, iteration: 345634
loss: 1.0131862163543701,grad_norm: 0.9999992875536937, iteration: 345635
loss: 1.0131455659866333,grad_norm: 0.9999989511959635, iteration: 345636
loss: 1.0663412809371948,grad_norm: 0.999999307047915, iteration: 345637
loss: 0.964311420917511,grad_norm: 0.9612243132294035, iteration: 345638
loss: 0.9951912760734558,grad_norm: 0.7739343684766191, iteration: 345639
loss: 1.0094759464263916,grad_norm: 0.7226961221783849, iteration: 345640
loss: 1.0689457654953003,grad_norm: 0.9063580679789865, iteration: 345641
loss: 1.0288606882095337,grad_norm: 0.7886164290000995, iteration: 345642
loss: 1.054863691329956,grad_norm: 0.8525634653543256, iteration: 345643
loss: 0.998788058757782,grad_norm: 0.9341346934439171, iteration: 345644
loss: 0.996809184551239,grad_norm: 0.7913970664270096, iteration: 345645
loss: 0.9853383302688599,grad_norm: 0.8536495381290534, iteration: 345646
loss: 1.017085075378418,grad_norm: 0.8738916758148739, iteration: 345647
loss: 0.979214608669281,grad_norm: 0.9999991280720827, iteration: 345648
loss: 1.0034892559051514,grad_norm: 0.9999991051068269, iteration: 345649
loss: 1.0150448083877563,grad_norm: 0.8908608674401437, iteration: 345650
loss: 0.9720457792282104,grad_norm: 0.9999989886197558, iteration: 345651
loss: 1.0026012659072876,grad_norm: 0.8362672961327703, iteration: 345652
loss: 1.0006076097488403,grad_norm: 0.999999630228855, iteration: 345653
loss: 1.0492221117019653,grad_norm: 0.8687507224241456, iteration: 345654
loss: 1.062569260597229,grad_norm: 0.999999793306575, iteration: 345655
loss: 1.0815677642822266,grad_norm: 0.8739417248336744, iteration: 345656
loss: 0.9871284365653992,grad_norm: 0.9577243226783629, iteration: 345657
loss: 0.9557806253433228,grad_norm: 0.8573844654666315, iteration: 345658
loss: 0.994174063205719,grad_norm: 0.7839899558708876, iteration: 345659
loss: 0.9823954105377197,grad_norm: 0.9884677270410128, iteration: 345660
loss: 1.0394624471664429,grad_norm: 0.7806864347300935, iteration: 345661
loss: 1.0336226224899292,grad_norm: 0.7855431424933503, iteration: 345662
loss: 1.0139813423156738,grad_norm: 0.8746676808766787, iteration: 345663
loss: 1.0089870691299438,grad_norm: 0.6772767171384751, iteration: 345664
loss: 1.0181219577789307,grad_norm: 0.8590540617265078, iteration: 345665
loss: 0.9854438304901123,grad_norm: 0.8645344843747717, iteration: 345666
loss: 1.0094741582870483,grad_norm: 0.7298066230927375, iteration: 345667
loss: 0.9984051585197449,grad_norm: 0.8145706010301317, iteration: 345668
loss: 1.012255072593689,grad_norm: 0.6927288885845333, iteration: 345669
loss: 0.9766045808792114,grad_norm: 0.7617924521522013, iteration: 345670
loss: 0.9353303909301758,grad_norm: 0.8335831804449112, iteration: 345671
loss: 1.0236722230911255,grad_norm: 0.8960068399929149, iteration: 345672
loss: 0.9815304279327393,grad_norm: 0.6799439789838531, iteration: 345673
loss: 1.0651909112930298,grad_norm: 0.8562634103032803, iteration: 345674
loss: 1.1239149570465088,grad_norm: 0.9999997400837956, iteration: 345675
loss: 0.9835733771324158,grad_norm: 0.7275194084694216, iteration: 345676
loss: 1.0105431079864502,grad_norm: 0.8208042916138957, iteration: 345677
loss: 0.9771357178688049,grad_norm: 0.8524643220880704, iteration: 345678
loss: 0.9737066030502319,grad_norm: 0.8741296421339841, iteration: 345679
loss: 0.9937754273414612,grad_norm: 0.7883464661646763, iteration: 345680
loss: 1.0094727277755737,grad_norm: 0.8938800922048591, iteration: 345681
loss: 0.9956080913543701,grad_norm: 0.9999991148354231, iteration: 345682
loss: 1.0102131366729736,grad_norm: 0.7625504013453244, iteration: 345683
loss: 0.9815251231193542,grad_norm: 0.8188042689269558, iteration: 345684
loss: 1.0346447229385376,grad_norm: 0.7895989795711543, iteration: 345685
loss: 1.008913516998291,grad_norm: 0.9393869257127756, iteration: 345686
loss: 1.0344586372375488,grad_norm: 0.7384176081156241, iteration: 345687
loss: 1.032076120376587,grad_norm: 0.999999072514631, iteration: 345688
loss: 1.0404107570648193,grad_norm: 0.7291238865619026, iteration: 345689
loss: 1.0236362218856812,grad_norm: 0.9999993032498239, iteration: 345690
loss: 1.0411707162857056,grad_norm: 0.7356412764930292, iteration: 345691
loss: 0.9715814590454102,grad_norm: 0.7604751693581859, iteration: 345692
loss: 0.9918156862258911,grad_norm: 0.866626916664014, iteration: 345693
loss: 1.0075089931488037,grad_norm: 0.7407794893357581, iteration: 345694
loss: 1.03947114944458,grad_norm: 0.895046676564589, iteration: 345695
loss: 1.019734263420105,grad_norm: 0.8249803204654074, iteration: 345696
loss: 1.0485599040985107,grad_norm: 0.7853793902580432, iteration: 345697
loss: 0.9366477727890015,grad_norm: 0.8674843714192797, iteration: 345698
loss: 0.9839830994606018,grad_norm: 0.8928990047905542, iteration: 345699
loss: 1.0349866151809692,grad_norm: 0.9999990014547019, iteration: 345700
loss: 1.0338135957717896,grad_norm: 0.9948979419774715, iteration: 345701
loss: 0.9964055418968201,grad_norm: 0.8665007072280912, iteration: 345702
loss: 1.0407521724700928,grad_norm: 0.9554189442528862, iteration: 345703
loss: 0.9942071437835693,grad_norm: 0.8168730843748233, iteration: 345704
loss: 0.9883313775062561,grad_norm: 0.8703154725934115, iteration: 345705
loss: 1.0153511762619019,grad_norm: 0.7646502335648037, iteration: 345706
loss: 1.011243224143982,grad_norm: 0.8124639916706281, iteration: 345707
loss: 0.9483606219291687,grad_norm: 0.7497175448443621, iteration: 345708
loss: 0.9932500720024109,grad_norm: 0.9972692044163529, iteration: 345709
loss: 1.0446834564208984,grad_norm: 0.9999993401221059, iteration: 345710
loss: 1.0341103076934814,grad_norm: 0.7628592172157456, iteration: 345711
loss: 1.0161911249160767,grad_norm: 0.7984534787407139, iteration: 345712
loss: 1.0077098608016968,grad_norm: 0.7392190253010288, iteration: 345713
loss: 0.9947482347488403,grad_norm: 0.7639296754334957, iteration: 345714
loss: 0.9979966282844543,grad_norm: 0.9999990511265228, iteration: 345715
loss: 0.9881446361541748,grad_norm: 0.8416022729956071, iteration: 345716
loss: 1.0414289236068726,grad_norm: 0.8588318455035956, iteration: 345717
loss: 1.006496548652649,grad_norm: 0.7936821696547164, iteration: 345718
loss: 0.9997571706771851,grad_norm: 0.8771245042469299, iteration: 345719
loss: 1.0269396305084229,grad_norm: 0.9347568500214661, iteration: 345720
loss: 0.9977911114692688,grad_norm: 0.7290017199786181, iteration: 345721
loss: 0.9893816113471985,grad_norm: 0.7877791851600928, iteration: 345722
loss: 0.9797134399414062,grad_norm: 0.8407180712866782, iteration: 345723
loss: 1.018471121788025,grad_norm: 0.8017179705704472, iteration: 345724
loss: 1.0184699296951294,grad_norm: 0.8581083573220006, iteration: 345725
loss: 1.0344659090042114,grad_norm: 0.9999998872449058, iteration: 345726
loss: 1.0084694623947144,grad_norm: 0.8002203113681344, iteration: 345727
loss: 1.0155105590820312,grad_norm: 0.7766574781894385, iteration: 345728
loss: 1.051002025604248,grad_norm: 0.8030853554822114, iteration: 345729
loss: 0.9930905103683472,grad_norm: 0.6842086291438583, iteration: 345730
loss: 1.003893256187439,grad_norm: 0.7085811541868818, iteration: 345731
loss: 0.9926344156265259,grad_norm: 0.813222004253594, iteration: 345732
loss: 1.0202022790908813,grad_norm: 0.7327820610938264, iteration: 345733
loss: 0.9825341105461121,grad_norm: 0.7261535369494648, iteration: 345734
loss: 0.9883710145950317,grad_norm: 0.8670538949590268, iteration: 345735
loss: 1.0105705261230469,grad_norm: 0.7542979643931749, iteration: 345736
loss: 1.0103567838668823,grad_norm: 0.9187906324746714, iteration: 345737
loss: 0.9955165386199951,grad_norm: 0.8826308363629604, iteration: 345738
loss: 1.0111054182052612,grad_norm: 0.9468307333873335, iteration: 345739
loss: 0.9764452576637268,grad_norm: 0.8073622036157124, iteration: 345740
loss: 0.9902347326278687,grad_norm: 0.8417427712550111, iteration: 345741
loss: 1.0190867185592651,grad_norm: 0.8188739939497951, iteration: 345742
loss: 0.9906401038169861,grad_norm: 0.7197200140478843, iteration: 345743
loss: 1.0315766334533691,grad_norm: 0.7882174357034473, iteration: 345744
loss: 1.026389479637146,grad_norm: 0.8209782801992581, iteration: 345745
loss: 0.997480571269989,grad_norm: 0.9259677004997283, iteration: 345746
loss: 1.0642331838607788,grad_norm: 0.8063173263972704, iteration: 345747
loss: 0.9728842377662659,grad_norm: 0.9547989518499276, iteration: 345748
loss: 0.9999831318855286,grad_norm: 0.8191399453367398, iteration: 345749
loss: 1.0052993297576904,grad_norm: 0.7529848644101977, iteration: 345750
loss: 0.97214674949646,grad_norm: 0.8617459221687552, iteration: 345751
loss: 0.9825454354286194,grad_norm: 0.8057524627531432, iteration: 345752
loss: 1.0252004861831665,grad_norm: 0.9254993730566211, iteration: 345753
loss: 0.9714248776435852,grad_norm: 0.7011683649653175, iteration: 345754
loss: 1.0124719142913818,grad_norm: 0.8055151132179088, iteration: 345755
loss: 1.0006009340286255,grad_norm: 0.9565061739701024, iteration: 345756
loss: 1.009835958480835,grad_norm: 0.877810367423998, iteration: 345757
loss: 1.0370770692825317,grad_norm: 0.9557485096781574, iteration: 345758
loss: 1.0183428525924683,grad_norm: 0.8199501920999981, iteration: 345759
loss: 0.9751467704772949,grad_norm: 0.9645061129350718, iteration: 345760
loss: 0.9821512699127197,grad_norm: 0.7191336893638902, iteration: 345761
loss: 0.9902955889701843,grad_norm: 0.837170109042441, iteration: 345762
loss: 1.023970603942871,grad_norm: 0.8957532471220855, iteration: 345763
loss: 0.9536662697792053,grad_norm: 0.8772586757447215, iteration: 345764
loss: 1.0482443571090698,grad_norm: 0.811837583681226, iteration: 345765
loss: 0.9908635020256042,grad_norm: 0.8111986904362432, iteration: 345766
loss: 0.9724745154380798,grad_norm: 0.7709290080390655, iteration: 345767
loss: 1.0004252195358276,grad_norm: 0.8609705420684547, iteration: 345768
loss: 0.9968129992485046,grad_norm: 0.7712255143253087, iteration: 345769
loss: 1.0149599313735962,grad_norm: 0.8435443087628212, iteration: 345770
loss: 1.0597354173660278,grad_norm: 0.8726139256297518, iteration: 345771
loss: 1.0033913850784302,grad_norm: 0.8535759545768241, iteration: 345772
loss: 0.9845064878463745,grad_norm: 0.9999990389745155, iteration: 345773
loss: 1.0272154808044434,grad_norm: 0.7692138619860069, iteration: 345774
loss: 1.0082881450653076,grad_norm: 0.8770719660130846, iteration: 345775
loss: 0.965808093547821,grad_norm: 0.9088860444148258, iteration: 345776
loss: 0.9807071685791016,grad_norm: 0.9305087396550028, iteration: 345777
loss: 0.9859504699707031,grad_norm: 0.8428202619068497, iteration: 345778
loss: 0.9894777536392212,grad_norm: 0.8717002758491349, iteration: 345779
loss: 1.0059480667114258,grad_norm: 0.9999991884550762, iteration: 345780
loss: 0.9574390053749084,grad_norm: 0.7406638401692346, iteration: 345781
loss: 0.9504446983337402,grad_norm: 0.759473008809269, iteration: 345782
loss: 0.9887620806694031,grad_norm: 0.8949485928977453, iteration: 345783
loss: 1.0011272430419922,grad_norm: 0.6924999216288914, iteration: 345784
loss: 0.9890971779823303,grad_norm: 0.7675886491428546, iteration: 345785
loss: 0.9844356179237366,grad_norm: 0.7360746802031318, iteration: 345786
loss: 0.9897392988204956,grad_norm: 0.7221643902171156, iteration: 345787
loss: 1.0061229467391968,grad_norm: 0.8951503750390013, iteration: 345788
loss: 1.0385921001434326,grad_norm: 0.9867664379439122, iteration: 345789
loss: 1.0433025360107422,grad_norm: 0.9414956961862427, iteration: 345790
loss: 0.9991438388824463,grad_norm: 0.8648249332125055, iteration: 345791
loss: 1.0372557640075684,grad_norm: 0.8908830818535515, iteration: 345792
loss: 1.0268831253051758,grad_norm: 0.9899796639023728, iteration: 345793
loss: 0.9745815396308899,grad_norm: 0.800229365927404, iteration: 345794
loss: 0.9802477955818176,grad_norm: 0.8045089111444668, iteration: 345795
loss: 1.0261629819869995,grad_norm: 0.8564403711498536, iteration: 345796
loss: 1.2449636459350586,grad_norm: 0.9999992574830148, iteration: 345797
loss: 1.016687273979187,grad_norm: 0.8431503142757265, iteration: 345798
loss: 1.0428221225738525,grad_norm: 0.8352136498941107, iteration: 345799
loss: 1.0085999965667725,grad_norm: 0.7684612242450682, iteration: 345800
loss: 0.9819331169128418,grad_norm: 0.7416875873443765, iteration: 345801
loss: 1.0133780241012573,grad_norm: 0.9116564797244544, iteration: 345802
loss: 1.014595627784729,grad_norm: 0.7284210033556797, iteration: 345803
loss: 0.9790112376213074,grad_norm: 0.8802915002594306, iteration: 345804
loss: 0.9933938980102539,grad_norm: 0.9999992200484787, iteration: 345805
loss: 0.9961660504341125,grad_norm: 0.9575387564154563, iteration: 345806
loss: 0.965573787689209,grad_norm: 0.9570774646938266, iteration: 345807
loss: 1.153330683708191,grad_norm: 0.9999991510335644, iteration: 345808
loss: 1.0055029392242432,grad_norm: 0.7416935999528258, iteration: 345809
loss: 1.016391634941101,grad_norm: 0.9276034833282262, iteration: 345810
loss: 0.9869170188903809,grad_norm: 0.8757528939281832, iteration: 345811
loss: 0.962369978427887,grad_norm: 0.9108054533504245, iteration: 345812
loss: 0.9929542541503906,grad_norm: 0.8071170305587644, iteration: 345813
loss: 1.025903582572937,grad_norm: 0.8441424324242502, iteration: 345814
loss: 0.9984809756278992,grad_norm: 0.8678758547182608, iteration: 345815
loss: 1.0053894519805908,grad_norm: 0.9457467023125933, iteration: 345816
loss: 0.9949113726615906,grad_norm: 0.8698929759968618, iteration: 345817
loss: 0.9463041424751282,grad_norm: 0.8297183698587297, iteration: 345818
loss: 0.9816471934318542,grad_norm: 0.8191016124505932, iteration: 345819
loss: 1.0041369199752808,grad_norm: 0.8041800979191629, iteration: 345820
loss: 1.028025507926941,grad_norm: 0.9907599560763729, iteration: 345821
loss: 1.001654028892517,grad_norm: 0.6945921370808223, iteration: 345822
loss: 0.9864745140075684,grad_norm: 0.9010036091820256, iteration: 345823
loss: 0.9747607707977295,grad_norm: 0.8869727169668897, iteration: 345824
loss: 1.0009846687316895,grad_norm: 0.9258692469758979, iteration: 345825
loss: 1.0773131847381592,grad_norm: 0.8503058728018564, iteration: 345826
loss: 1.0216622352600098,grad_norm: 0.9999998566399081, iteration: 345827
loss: 1.0064855813980103,grad_norm: 0.8976524834197034, iteration: 345828
loss: 1.0265365839004517,grad_norm: 0.8019121261274231, iteration: 345829
loss: 0.9923014640808105,grad_norm: 0.7702907479070832, iteration: 345830
loss: 1.0087900161743164,grad_norm: 0.9999991150281095, iteration: 345831
loss: 1.0229607820510864,grad_norm: 0.8680177828541873, iteration: 345832
loss: 1.002590298652649,grad_norm: 0.8551325996853452, iteration: 345833
loss: 1.0205793380737305,grad_norm: 0.7225990918232463, iteration: 345834
loss: 0.993391215801239,grad_norm: 0.8639351177276762, iteration: 345835
loss: 0.984109103679657,grad_norm: 0.9313832197212442, iteration: 345836
loss: 1.054219126701355,grad_norm: 0.7946200436062055, iteration: 345837
loss: 1.0146783590316772,grad_norm: 0.7557888584336553, iteration: 345838
loss: 1.0273842811584473,grad_norm: 0.9999991256107542, iteration: 345839
loss: 0.9807413220405579,grad_norm: 0.7172277893858683, iteration: 345840
loss: 1.024011254310608,grad_norm: 0.9999998449005656, iteration: 345841
loss: 0.9796554446220398,grad_norm: 0.737314857702454, iteration: 345842
loss: 1.0119918584823608,grad_norm: 0.8196172339192935, iteration: 345843
loss: 1.0249592065811157,grad_norm: 0.9210285356093264, iteration: 345844
loss: 0.9731632471084595,grad_norm: 0.7792290196977195, iteration: 345845
loss: 1.009641408920288,grad_norm: 0.6721393600536191, iteration: 345846
loss: 1.030523657798767,grad_norm: 0.7652156211547751, iteration: 345847
loss: 1.0293899774551392,grad_norm: 0.6829939788063252, iteration: 345848
loss: 1.0207773447036743,grad_norm: 0.9320188056815688, iteration: 345849
loss: 0.9852182269096375,grad_norm: 0.8265831063201345, iteration: 345850
loss: 0.9958851933479309,grad_norm: 0.9320595399661725, iteration: 345851
loss: 1.0095672607421875,grad_norm: 0.8775331163700868, iteration: 345852
loss: 0.989946722984314,grad_norm: 0.9416400151533332, iteration: 345853
loss: 0.9798663854598999,grad_norm: 0.7800543361879105, iteration: 345854
loss: 0.9911142587661743,grad_norm: 0.8167354423422127, iteration: 345855
loss: 0.9764949083328247,grad_norm: 0.8967343379544969, iteration: 345856
loss: 0.9936739206314087,grad_norm: 0.8577141545091727, iteration: 345857
loss: 0.9599807858467102,grad_norm: 0.8601436968341843, iteration: 345858
loss: 0.9896007180213928,grad_norm: 0.8285684623549635, iteration: 345859
loss: 1.0012205839157104,grad_norm: 0.8350321671833598, iteration: 345860
loss: 1.012660264968872,grad_norm: 0.9999991909685031, iteration: 345861
loss: 0.9925352931022644,grad_norm: 0.6802461866316074, iteration: 345862
loss: 0.9868204593658447,grad_norm: 0.8459068888753298, iteration: 345863
loss: 1.0012476444244385,grad_norm: 0.8960507331308069, iteration: 345864
loss: 0.9923523664474487,grad_norm: 0.8300373919075279, iteration: 345865
loss: 0.98301762342453,grad_norm: 0.9999991246458415, iteration: 345866
loss: 1.0147901773452759,grad_norm: 0.9999990051368696, iteration: 345867
loss: 0.9835586547851562,grad_norm: 0.7826911064608856, iteration: 345868
loss: 0.9488391876220703,grad_norm: 0.8225459020214039, iteration: 345869
loss: 0.9888466596603394,grad_norm: 0.8763221761817622, iteration: 345870
loss: 0.9960139393806458,grad_norm: 0.6861347004882142, iteration: 345871
loss: 0.9964300990104675,grad_norm: 0.8773053737444069, iteration: 345872
loss: 0.9885618686676025,grad_norm: 0.8128767942349897, iteration: 345873
loss: 1.0041996240615845,grad_norm: 0.6989784672315947, iteration: 345874
loss: 0.9838690161705017,grad_norm: 0.8431846735859282, iteration: 345875
loss: 0.9939594268798828,grad_norm: 0.7197372973972301, iteration: 345876
loss: 1.000690221786499,grad_norm: 0.9277743411644812, iteration: 345877
loss: 1.0023468732833862,grad_norm: 0.8473098914758996, iteration: 345878
loss: 0.9937766790390015,grad_norm: 0.7763621037792856, iteration: 345879
loss: 0.9912298917770386,grad_norm: 0.9999989996601325, iteration: 345880
loss: 0.9745864272117615,grad_norm: 0.7947509161296671, iteration: 345881
loss: 1.0112231969833374,grad_norm: 0.7402845836604792, iteration: 345882
loss: 0.9845575094223022,grad_norm: 0.7499586345201225, iteration: 345883
loss: 1.0014841556549072,grad_norm: 0.7891008938998735, iteration: 345884
loss: 1.0164380073547363,grad_norm: 0.9999991956539145, iteration: 345885
loss: 0.9805788397789001,grad_norm: 0.9596651629279501, iteration: 345886
loss: 1.0191253423690796,grad_norm: 0.7657321470567817, iteration: 345887
loss: 0.991796612739563,grad_norm: 0.9999995818995149, iteration: 345888
loss: 1.0556347370147705,grad_norm: 0.839671908559257, iteration: 345889
loss: 1.0150247812271118,grad_norm: 0.8082789815738167, iteration: 345890
loss: 1.001720666885376,grad_norm: 0.7313798304818547, iteration: 345891
loss: 0.989597737789154,grad_norm: 0.9193077373888785, iteration: 345892
loss: 1.0442628860473633,grad_norm: 0.7942548638810227, iteration: 345893
loss: 0.9761962294578552,grad_norm: 0.7453180213320739, iteration: 345894
loss: 1.0339773893356323,grad_norm: 0.9165814249489977, iteration: 345895
loss: 0.969870924949646,grad_norm: 0.8615872632418758, iteration: 345896
loss: 1.007346749305725,grad_norm: 0.769418693971438, iteration: 345897
loss: 1.0026483535766602,grad_norm: 0.8441508509237978, iteration: 345898
loss: 0.9742872714996338,grad_norm: 0.8490679698924802, iteration: 345899
loss: 1.0420001745224,grad_norm: 0.9999992057039885, iteration: 345900
loss: 0.9949038624763489,grad_norm: 0.863316229116098, iteration: 345901
loss: 1.0149580240249634,grad_norm: 0.9779564610109763, iteration: 345902
loss: 1.0326191186904907,grad_norm: 0.8566112791632565, iteration: 345903
loss: 0.9918197393417358,grad_norm: 0.756776988692649, iteration: 345904
loss: 1.0067412853240967,grad_norm: 0.7447764440664999, iteration: 345905
loss: 0.9785938262939453,grad_norm: 0.7058594425730674, iteration: 345906
loss: 1.0164779424667358,grad_norm: 0.6423726835633549, iteration: 345907
loss: 1.0622916221618652,grad_norm: 0.999999635334382, iteration: 345908
loss: 1.0303459167480469,grad_norm: 0.9999992126019642, iteration: 345909
loss: 0.9786775708198547,grad_norm: 0.8435645419541947, iteration: 345910
loss: 1.0137759447097778,grad_norm: 0.901730271393061, iteration: 345911
loss: 1.0096224546432495,grad_norm: 0.8940551460267371, iteration: 345912
loss: 1.0121617317199707,grad_norm: 0.7899838933822979, iteration: 345913
loss: 1.025128960609436,grad_norm: 0.9439538575458748, iteration: 345914
loss: 1.0146011114120483,grad_norm: 0.9999989784548974, iteration: 345915
loss: 0.9990901947021484,grad_norm: 0.999999448990158, iteration: 345916
loss: 0.9834752678871155,grad_norm: 0.7924962198238573, iteration: 345917
loss: 1.064184308052063,grad_norm: 0.92014164922326, iteration: 345918
loss: 1.0403640270233154,grad_norm: 0.9012104250214764, iteration: 345919
loss: 0.9908082485198975,grad_norm: 0.954962745472244, iteration: 345920
loss: 0.9770188331604004,grad_norm: 0.6987263264186465, iteration: 345921
loss: 0.9848318696022034,grad_norm: 0.8254501349079306, iteration: 345922
loss: 1.0602902173995972,grad_norm: 0.9999991322892738, iteration: 345923
loss: 1.0026522874832153,grad_norm: 0.8192775029588198, iteration: 345924
loss: 0.9610604643821716,grad_norm: 0.8255153198022975, iteration: 345925
loss: 1.0503872632980347,grad_norm: 0.9356289888764258, iteration: 345926
loss: 1.0014678239822388,grad_norm: 0.7586216425877559, iteration: 345927
loss: 0.991003692150116,grad_norm: 0.8856497165689083, iteration: 345928
loss: 1.0225821733474731,grad_norm: 0.9622137145199768, iteration: 345929
loss: 1.0119765996932983,grad_norm: 0.9141361621158054, iteration: 345930
loss: 1.025589942932129,grad_norm: 0.8526903314391002, iteration: 345931
loss: 0.9973536133766174,grad_norm: 0.999999202633322, iteration: 345932
loss: 0.9957507848739624,grad_norm: 0.8947904059971132, iteration: 345933
loss: 1.0020484924316406,grad_norm: 0.8401510340825619, iteration: 345934
loss: 0.9790037274360657,grad_norm: 0.9132953237825057, iteration: 345935
loss: 1.027157187461853,grad_norm: 0.999999557606531, iteration: 345936
loss: 1.0052845478057861,grad_norm: 0.831176151548291, iteration: 345937
loss: 1.02154541015625,grad_norm: 0.9999990696096741, iteration: 345938
loss: 0.9917194843292236,grad_norm: 0.9999993657601757, iteration: 345939
loss: 1.0024926662445068,grad_norm: 0.795111551430796, iteration: 345940
loss: 1.0102726221084595,grad_norm: 0.9999994086902089, iteration: 345941
loss: 1.1652462482452393,grad_norm: 0.999999416570308, iteration: 345942
loss: 0.9959753751754761,grad_norm: 0.6325715369470405, iteration: 345943
loss: 1.0374499559402466,grad_norm: 0.8918233573465597, iteration: 345944
loss: 0.9708439707756042,grad_norm: 0.9999995186699097, iteration: 345945
loss: 0.9863237738609314,grad_norm: 0.9135785571233618, iteration: 345946
loss: 0.9825197458267212,grad_norm: 0.8448415646648901, iteration: 345947
loss: 1.0564854145050049,grad_norm: 0.9083993807800865, iteration: 345948
loss: 1.0561628341674805,grad_norm: 0.8741501379283568, iteration: 345949
loss: 1.031502604484558,grad_norm: 0.9999991837617848, iteration: 345950
loss: 1.025151252746582,grad_norm: 0.9999992353501346, iteration: 345951
loss: 1.1159237623214722,grad_norm: 0.7369050306855339, iteration: 345952
loss: 0.9904909729957581,grad_norm: 0.9466119309293476, iteration: 345953
loss: 1.0789133310317993,grad_norm: 1.0000000709018986, iteration: 345954
loss: 1.0104185342788696,grad_norm: 0.6114628995420603, iteration: 345955
loss: 1.0815110206604004,grad_norm: 0.9999991842985266, iteration: 345956
loss: 1.0165241956710815,grad_norm: 0.9999994266872894, iteration: 345957
loss: 1.010779857635498,grad_norm: 0.9057530865300604, iteration: 345958
loss: 1.0714256763458252,grad_norm: 0.9999994005121544, iteration: 345959
loss: 1.0750112533569336,grad_norm: 0.8567078713230512, iteration: 345960
loss: 1.0222781896591187,grad_norm: 0.9999990765548845, iteration: 345961
loss: 0.9646578431129456,grad_norm: 0.8719266567830769, iteration: 345962
loss: 0.9947074055671692,grad_norm: 0.7703247770489542, iteration: 345963
loss: 0.9906768798828125,grad_norm: 0.6801156392433748, iteration: 345964
loss: 0.9740967154502869,grad_norm: 0.9080720048215932, iteration: 345965
loss: 1.0358227491378784,grad_norm: 0.9999990139162143, iteration: 345966
loss: 0.994114100933075,grad_norm: 0.862381836244209, iteration: 345967
loss: 1.0263035297393799,grad_norm: 0.9999993396211111, iteration: 345968
loss: 0.9947356581687927,grad_norm: 0.9999991164046269, iteration: 345969
loss: 0.9982684254646301,grad_norm: 0.8771106606782311, iteration: 345970
loss: 0.9997150897979736,grad_norm: 0.9999990770834284, iteration: 345971
loss: 1.0183416604995728,grad_norm: 0.7974427558439949, iteration: 345972
loss: 0.972438395023346,grad_norm: 0.9496748845137241, iteration: 345973
loss: 1.0052263736724854,grad_norm: 0.9999991344240489, iteration: 345974
loss: 0.9850606322288513,grad_norm: 0.8449140434405821, iteration: 345975
loss: 1.013586163520813,grad_norm: 0.8436475394012523, iteration: 345976
loss: 1.0225211381912231,grad_norm: 0.750994977283526, iteration: 345977
loss: 0.9487326741218567,grad_norm: 0.9094859316449517, iteration: 345978
loss: 1.0090774297714233,grad_norm: 0.8880907874521655, iteration: 345979
loss: 1.0243308544158936,grad_norm: 0.8656578297847839, iteration: 345980
loss: 0.9731438159942627,grad_norm: 0.7564337115823027, iteration: 345981
loss: 1.017168402671814,grad_norm: 0.9788773031039086, iteration: 345982
loss: 0.9926037192344666,grad_norm: 0.999999284037115, iteration: 345983
loss: 1.0232044458389282,grad_norm: 0.8963271068321463, iteration: 345984
loss: 0.9634727239608765,grad_norm: 0.8143054201996837, iteration: 345985
loss: 1.0090305805206299,grad_norm: 0.9999991356486717, iteration: 345986
loss: 0.9995170831680298,grad_norm: 0.9965508479050329, iteration: 345987
loss: 0.9921843409538269,grad_norm: 0.7592330259726634, iteration: 345988
loss: 1.0309457778930664,grad_norm: 0.863423898833448, iteration: 345989
loss: 0.9860169291496277,grad_norm: 0.8161367009904993, iteration: 345990
loss: 1.0036762952804565,grad_norm: 0.9419780564867073, iteration: 345991
loss: 0.9741775989532471,grad_norm: 0.8638055015068791, iteration: 345992
loss: 0.9963948726654053,grad_norm: 0.8133214713820786, iteration: 345993
loss: 1.0083569288253784,grad_norm: 0.8697809509380692, iteration: 345994
loss: 1.0982649326324463,grad_norm: 0.9999999981301608, iteration: 345995
loss: 1.0242975950241089,grad_norm: 0.7718678866599147, iteration: 345996
loss: 0.9930547475814819,grad_norm: 0.747841807693921, iteration: 345997
loss: 1.0719504356384277,grad_norm: 0.8385021215399485, iteration: 345998
loss: 1.0221483707427979,grad_norm: 0.999999796790252, iteration: 345999
loss: 1.013499140739441,grad_norm: 0.8043234179012966, iteration: 346000
loss: 0.9809081554412842,grad_norm: 0.8027525644111645, iteration: 346001
loss: 1.021324634552002,grad_norm: 0.9999991499728215, iteration: 346002
loss: 1.0310214757919312,grad_norm: 0.9437177263337617, iteration: 346003
loss: 1.0387409925460815,grad_norm: 0.7638536815667848, iteration: 346004
loss: 1.0697336196899414,grad_norm: 0.8762689456319483, iteration: 346005
loss: 0.9571698307991028,grad_norm: 0.7994316514008735, iteration: 346006
loss: 1.0134202241897583,grad_norm: 0.8877099812254508, iteration: 346007
loss: 0.9917460680007935,grad_norm: 0.8502088077908453, iteration: 346008
loss: 0.9957616925239563,grad_norm: 0.8420321348853694, iteration: 346009
loss: 1.079580307006836,grad_norm: 0.9988575467876433, iteration: 346010
loss: 1.019643783569336,grad_norm: 0.8723362218522702, iteration: 346011
loss: 0.9798341989517212,grad_norm: 0.837535142351919, iteration: 346012
loss: 1.0160939693450928,grad_norm: 0.9999998158923462, iteration: 346013
loss: 1.013612985610962,grad_norm: 0.9809513528898557, iteration: 346014
loss: 1.0528088808059692,grad_norm: 0.9999992542105355, iteration: 346015
loss: 0.9988338947296143,grad_norm: 0.9999994648725034, iteration: 346016
loss: 1.0021945238113403,grad_norm: 0.9586463757051298, iteration: 346017
loss: 0.9677425622940063,grad_norm: 0.8538364361772443, iteration: 346018
loss: 1.0016652345657349,grad_norm: 0.8856069271039916, iteration: 346019
loss: 0.9924046397209167,grad_norm: 0.7393656783624502, iteration: 346020
loss: 0.9837082624435425,grad_norm: 0.9999990437979005, iteration: 346021
loss: 1.0259274244308472,grad_norm: 0.7613747844255672, iteration: 346022
loss: 0.9957325458526611,grad_norm: 0.9365321202805093, iteration: 346023
loss: 0.9951900839805603,grad_norm: 0.9999993289601112, iteration: 346024
loss: 0.9721890091896057,grad_norm: 0.8609298891794273, iteration: 346025
loss: 0.9835528135299683,grad_norm: 0.8906916684821088, iteration: 346026
loss: 1.0193995237350464,grad_norm: 0.7862848011519151, iteration: 346027
loss: 1.0064756870269775,grad_norm: 0.9999997083925294, iteration: 346028
loss: 1.0371379852294922,grad_norm: 0.999999906564384, iteration: 346029
loss: 1.0200921297073364,grad_norm: 0.9999994669036437, iteration: 346030
loss: 1.0181633234024048,grad_norm: 0.9087297587956056, iteration: 346031
loss: 1.0135937929153442,grad_norm: 0.9144899200927346, iteration: 346032
loss: 1.080845832824707,grad_norm: 0.9999993244706109, iteration: 346033
loss: 0.9979960918426514,grad_norm: 0.7989361665125082, iteration: 346034
loss: 1.0326194763183594,grad_norm: 0.8587242488333906, iteration: 346035
loss: 1.0010236501693726,grad_norm: 0.9568182795884698, iteration: 346036
loss: 0.9768996834754944,grad_norm: 0.8058660344946894, iteration: 346037
loss: 1.0036965608596802,grad_norm: 0.7452417954546113, iteration: 346038
loss: 1.226248025894165,grad_norm: 0.9999999850086592, iteration: 346039
loss: 1.0328402519226074,grad_norm: 0.787978556366304, iteration: 346040
loss: 1.0064263343811035,grad_norm: 0.7454979888204201, iteration: 346041
loss: 1.0290088653564453,grad_norm: 0.9031102888678361, iteration: 346042
loss: 1.042600154876709,grad_norm: 0.9999991426082031, iteration: 346043
loss: 0.9562334418296814,grad_norm: 0.771321277183868, iteration: 346044
loss: 0.9741701483726501,grad_norm: 0.8279447870424238, iteration: 346045
loss: 1.018419623374939,grad_norm: 0.7438102436312053, iteration: 346046
loss: 0.999393105506897,grad_norm: 0.7822827871492201, iteration: 346047
loss: 1.058876633644104,grad_norm: 0.8107927975485925, iteration: 346048
loss: 1.027305006980896,grad_norm: 0.8786923767658884, iteration: 346049
loss: 0.9986380934715271,grad_norm: 0.7429697597909555, iteration: 346050
loss: 1.002018928527832,grad_norm: 0.758284768125656, iteration: 346051
loss: 0.9931942224502563,grad_norm: 0.8254563429969737, iteration: 346052
loss: 1.0278315544128418,grad_norm: 0.7422109581636485, iteration: 346053
loss: 0.9708454608917236,grad_norm: 0.6406598399071833, iteration: 346054
loss: 0.9898340106010437,grad_norm: 0.7262737269959769, iteration: 346055
loss: 1.0365922451019287,grad_norm: 0.8437825573603727, iteration: 346056
loss: 1.0194822549819946,grad_norm: 0.8171352687721302, iteration: 346057
loss: 0.9787821769714355,grad_norm: 0.9999993101835761, iteration: 346058
loss: 1.0039384365081787,grad_norm: 0.8424816840664362, iteration: 346059
loss: 0.9850440621376038,grad_norm: 0.7971064223632391, iteration: 346060
loss: 0.9752625226974487,grad_norm: 0.6999778856276264, iteration: 346061
loss: 1.002605676651001,grad_norm: 0.8940209349603236, iteration: 346062
loss: 1.039657711982727,grad_norm: 1.000000031913223, iteration: 346063
loss: 0.998257040977478,grad_norm: 0.7934928103258059, iteration: 346064
loss: 1.0023082494735718,grad_norm: 0.78598053220702, iteration: 346065
loss: 1.043350338935852,grad_norm: 0.805949428165033, iteration: 346066
loss: 1.0231027603149414,grad_norm: 0.7881828855190001, iteration: 346067
loss: 1.0537934303283691,grad_norm: 0.9999990241046148, iteration: 346068
loss: 0.993686854839325,grad_norm: 0.8797906401993599, iteration: 346069
loss: 0.9936599135398865,grad_norm: 0.8346929273346566, iteration: 346070
loss: 0.9951695203781128,grad_norm: 0.7683421245307281, iteration: 346071
loss: 0.9791861176490784,grad_norm: 0.8147666866997376, iteration: 346072
loss: 0.9730671644210815,grad_norm: 0.8742218671245314, iteration: 346073
loss: 0.9847153425216675,grad_norm: 0.8734942348754657, iteration: 346074
loss: 0.9892790913581848,grad_norm: 0.9999991181374969, iteration: 346075
loss: 1.0882521867752075,grad_norm: 0.9999993292499174, iteration: 346076
loss: 1.0321366786956787,grad_norm: 0.9999998441834925, iteration: 346077
loss: 0.9917434453964233,grad_norm: 0.7554031875339328, iteration: 346078
loss: 1.01047945022583,grad_norm: 0.9064063691600852, iteration: 346079
loss: 0.9816194772720337,grad_norm: 0.6651451909424972, iteration: 346080
loss: 0.9992126822471619,grad_norm: 0.8130621998810477, iteration: 346081
loss: 0.9895644783973694,grad_norm: 0.8496197530278269, iteration: 346082
loss: 0.9836450219154358,grad_norm: 0.775583884047665, iteration: 346083
loss: 0.9960983395576477,grad_norm: 0.9999997705957523, iteration: 346084
loss: 1.0082495212554932,grad_norm: 0.8259033736112931, iteration: 346085
loss: 1.0276094675064087,grad_norm: 0.7984956073285958, iteration: 346086
loss: 1.0035514831542969,grad_norm: 0.8474679879841795, iteration: 346087
loss: 1.1286230087280273,grad_norm: 0.9999990905625291, iteration: 346088
loss: 0.9325535297393799,grad_norm: 0.9270577758741495, iteration: 346089
loss: 0.978110671043396,grad_norm: 0.8201449529554042, iteration: 346090
loss: 0.9735820889472961,grad_norm: 0.8864217389173872, iteration: 346091
loss: 0.9677973985671997,grad_norm: 0.8373988770704459, iteration: 346092
loss: 0.9814518690109253,grad_norm: 0.8283412332190954, iteration: 346093
loss: 1.0141031742095947,grad_norm: 0.8464912292473581, iteration: 346094
loss: 0.9988579154014587,grad_norm: 0.8465345142015388, iteration: 346095
loss: 1.018306851387024,grad_norm: 0.7855464803281577, iteration: 346096
loss: 1.081199049949646,grad_norm: 0.9999992103940658, iteration: 346097
loss: 0.985838770866394,grad_norm: 0.9392942528684475, iteration: 346098
loss: 1.0019296407699585,grad_norm: 0.9999991092354076, iteration: 346099
loss: 1.0330809354782104,grad_norm: 0.8155430746968472, iteration: 346100
loss: 0.9420347213745117,grad_norm: 0.8408252531775418, iteration: 346101
loss: 1.0251773595809937,grad_norm: 0.9999991625609344, iteration: 346102
loss: 1.001345157623291,grad_norm: 0.7090828774425076, iteration: 346103
loss: 1.013525128364563,grad_norm: 0.7810704231176179, iteration: 346104
loss: 1.0142343044281006,grad_norm: 0.903482785346497, iteration: 346105
loss: 0.9576435089111328,grad_norm: 0.8908043727723753, iteration: 346106
loss: 0.9844143390655518,grad_norm: 0.878148462411854, iteration: 346107
loss: 1.0006159543991089,grad_norm: 0.7922031822511775, iteration: 346108
loss: 0.9994460344314575,grad_norm: 0.9647941108380417, iteration: 346109
loss: 1.0249137878417969,grad_norm: 0.8416528418693182, iteration: 346110
loss: 1.019407033920288,grad_norm: 0.7383793192386207, iteration: 346111
loss: 1.069374918937683,grad_norm: 0.9999995264552921, iteration: 346112
loss: 0.9874709844589233,grad_norm: 0.9011971064000168, iteration: 346113
loss: 0.9961745142936707,grad_norm: 0.8003285464022702, iteration: 346114
loss: 0.9672074317932129,grad_norm: 0.838135002439193, iteration: 346115
loss: 0.9958866834640503,grad_norm: 0.7436097664600693, iteration: 346116
loss: 0.9930137395858765,grad_norm: 0.67521455358453, iteration: 346117
loss: 0.967359185218811,grad_norm: 0.8952685583876184, iteration: 346118
loss: 1.008135437965393,grad_norm: 0.8867113121973014, iteration: 346119
loss: 1.0065569877624512,grad_norm: 0.8753667907543642, iteration: 346120
loss: 0.9923931360244751,grad_norm: 0.7743384135369981, iteration: 346121
loss: 0.9567206501960754,grad_norm: 0.8061760494182031, iteration: 346122
loss: 1.058179259300232,grad_norm: 0.999999961869363, iteration: 346123
loss: 0.9670067429542542,grad_norm: 0.929240918694539, iteration: 346124
loss: 1.055606484413147,grad_norm: 0.9699285181826782, iteration: 346125
loss: 0.9965440630912781,grad_norm: 0.6827196795302278, iteration: 346126
loss: 1.0092732906341553,grad_norm: 0.9035190117514027, iteration: 346127
loss: 0.9872424006462097,grad_norm: 0.8318986566263379, iteration: 346128
loss: 0.9810500144958496,grad_norm: 0.9420848257138691, iteration: 346129
loss: 1.0330735445022583,grad_norm: 0.7738845652539841, iteration: 346130
loss: 0.9716578125953674,grad_norm: 0.7441132478478449, iteration: 346131
loss: 1.018312931060791,grad_norm: 0.9999990375533777, iteration: 346132
loss: 1.0230504274368286,grad_norm: 0.7592492639696381, iteration: 346133
loss: 0.9867298603057861,grad_norm: 0.8769828475624064, iteration: 346134
loss: 1.0283197164535522,grad_norm: 0.9999994209588695, iteration: 346135
loss: 1.0036728382110596,grad_norm: 0.9289711651179825, iteration: 346136
loss: 0.9725597500801086,grad_norm: 0.9999992498918427, iteration: 346137
loss: 0.9743608236312866,grad_norm: 0.9999991678907316, iteration: 346138
loss: 0.967782735824585,grad_norm: 0.7904144847488577, iteration: 346139
loss: 0.9541007280349731,grad_norm: 0.7081391055424763, iteration: 346140
loss: 0.9904707074165344,grad_norm: 0.9956981699197397, iteration: 346141
loss: 0.9830304980278015,grad_norm: 0.8910232618801507, iteration: 346142
loss: 1.0579767227172852,grad_norm: 0.9999998465652498, iteration: 346143
loss: 1.0479910373687744,grad_norm: 0.9999998335592545, iteration: 346144
loss: 0.9914336204528809,grad_norm: 0.8270383056565191, iteration: 346145
loss: 1.0640015602111816,grad_norm: 0.9711465315361413, iteration: 346146
loss: 1.0162789821624756,grad_norm: 0.7401660902812015, iteration: 346147
loss: 0.9943835735321045,grad_norm: 0.8343295079033849, iteration: 346148
loss: 0.972809910774231,grad_norm: 0.8828407938905265, iteration: 346149
loss: 1.0381591320037842,grad_norm: 0.9999990612138261, iteration: 346150
loss: 1.082727313041687,grad_norm: 0.9999998587458141, iteration: 346151
loss: 1.0451810359954834,grad_norm: 0.8744091050065407, iteration: 346152
loss: 1.0269949436187744,grad_norm: 0.7706218546472736, iteration: 346153
loss: 1.0248517990112305,grad_norm: 0.8938729687501795, iteration: 346154
loss: 1.1411923170089722,grad_norm: 0.8710199927152437, iteration: 346155
loss: 1.0378942489624023,grad_norm: 0.999999169658593, iteration: 346156
loss: 1.034891963005066,grad_norm: 0.9999997674029784, iteration: 346157
loss: 1.0029760599136353,grad_norm: 0.8528935714941178, iteration: 346158
loss: 1.0584033727645874,grad_norm: 0.9284071937476212, iteration: 346159
loss: 1.0204945802688599,grad_norm: 0.7168952620303839, iteration: 346160
loss: 1.024487018585205,grad_norm: 0.7401214165675587, iteration: 346161
loss: 1.013088583946228,grad_norm: 0.8394779938114846, iteration: 346162
loss: 0.9800267219543457,grad_norm: 0.999999144968265, iteration: 346163
loss: 1.0087403059005737,grad_norm: 0.7931640028656411, iteration: 346164
loss: 1.0162984132766724,grad_norm: 0.757418007509704, iteration: 346165
loss: 1.180051565170288,grad_norm: 0.9999992542756088, iteration: 346166
loss: 0.9909092783927917,grad_norm: 0.9915689677203458, iteration: 346167
loss: 0.9955371022224426,grad_norm: 0.8103867236653484, iteration: 346168
loss: 1.0001881122589111,grad_norm: 0.7455681762464125, iteration: 346169
loss: 0.9561643004417419,grad_norm: 0.8860303893527524, iteration: 346170
loss: 0.9738196730613708,grad_norm: 0.8276042361211385, iteration: 346171
loss: 0.9923778772354126,grad_norm: 0.761787193888156, iteration: 346172
loss: 0.9943364262580872,grad_norm: 0.9252161639686414, iteration: 346173
loss: 1.0303624868392944,grad_norm: 0.8084732992352615, iteration: 346174
loss: 0.9949156641960144,grad_norm: 0.8659827552056539, iteration: 346175
loss: 0.965562105178833,grad_norm: 0.9999990552472854, iteration: 346176
loss: 0.9952605962753296,grad_norm: 0.977008538127021, iteration: 346177
loss: 1.021723747253418,grad_norm: 0.8714708689048647, iteration: 346178
loss: 1.0871021747589111,grad_norm: 0.9999994568331109, iteration: 346179
loss: 0.9849290251731873,grad_norm: 0.8915858330444845, iteration: 346180
loss: 1.1136342287063599,grad_norm: 0.9999998914117736, iteration: 346181
loss: 1.0225573778152466,grad_norm: 0.8373902003208893, iteration: 346182
loss: 0.9979846477508545,grad_norm: 0.8555910122273281, iteration: 346183
loss: 1.1465754508972168,grad_norm: 0.9999997856192883, iteration: 346184
loss: 0.9788506031036377,grad_norm: 0.7789766280784185, iteration: 346185
loss: 0.9854062795639038,grad_norm: 0.9999997924791714, iteration: 346186
loss: 0.9598261713981628,grad_norm: 0.835268127523019, iteration: 346187
loss: 0.9872413277626038,grad_norm: 0.9458667852418846, iteration: 346188
loss: 1.1668171882629395,grad_norm: 0.999999770350508, iteration: 346189
loss: 1.0236546993255615,grad_norm: 0.9999994852531574, iteration: 346190
loss: 0.9863339066505432,grad_norm: 0.8211295928966206, iteration: 346191
loss: 0.9941533803939819,grad_norm: 0.8459760042513514, iteration: 346192
loss: 1.0015214681625366,grad_norm: 0.9034686844672435, iteration: 346193
loss: 0.970707356929779,grad_norm: 0.9529048452351413, iteration: 346194
loss: 0.9784205555915833,grad_norm: 0.8440685368476515, iteration: 346195
loss: 0.9866443872451782,grad_norm: 0.8007228146551035, iteration: 346196
loss: 0.9876473546028137,grad_norm: 0.9196163036579115, iteration: 346197
loss: 0.9932907819747925,grad_norm: 0.8629702607252148, iteration: 346198
loss: 1.0357130765914917,grad_norm: 0.9008133623174783, iteration: 346199
loss: 1.033732533454895,grad_norm: 0.772801694629849, iteration: 346200
loss: 1.0211122035980225,grad_norm: 0.7364453338848239, iteration: 346201
loss: 1.0156373977661133,grad_norm: 0.8628544059227831, iteration: 346202
loss: 0.9944498538970947,grad_norm: 0.8459302282307105, iteration: 346203
loss: 0.997346818447113,grad_norm: 0.8941615586126082, iteration: 346204
loss: 1.0055813789367676,grad_norm: 0.8015612290713235, iteration: 346205
loss: 0.9721292853355408,grad_norm: 0.8367989463746945, iteration: 346206
loss: 0.9985184669494629,grad_norm: 0.9307150893917796, iteration: 346207
loss: 1.0496113300323486,grad_norm: 0.9999993381141826, iteration: 346208
loss: 0.94393390417099,grad_norm: 0.7608471965500747, iteration: 346209
loss: 1.0555288791656494,grad_norm: 0.9999997756038047, iteration: 346210
loss: 0.986370325088501,grad_norm: 0.8408541171759444, iteration: 346211
loss: 1.0226976871490479,grad_norm: 0.7849195679978862, iteration: 346212
loss: 1.0093353986740112,grad_norm: 0.9397767097955414, iteration: 346213
loss: 1.0118709802627563,grad_norm: 0.7463485101352678, iteration: 346214
loss: 1.0333253145217896,grad_norm: 0.8963411429086438, iteration: 346215
loss: 0.9783896803855896,grad_norm: 0.7256524234695061, iteration: 346216
loss: 1.0177531242370605,grad_norm: 0.817301754766125, iteration: 346217
loss: 1.0056079626083374,grad_norm: 0.9999990503659708, iteration: 346218
loss: 0.9707685708999634,grad_norm: 0.9999990673772952, iteration: 346219
loss: 0.9783945083618164,grad_norm: 0.6559009749237088, iteration: 346220
loss: 1.018537163734436,grad_norm: 0.8145980029458689, iteration: 346221
loss: 0.9914320707321167,grad_norm: 0.9378473444514238, iteration: 346222
loss: 1.0146805047988892,grad_norm: 0.9441386070626815, iteration: 346223
loss: 1.0013492107391357,grad_norm: 0.9039760517876034, iteration: 346224
loss: 1.0107550621032715,grad_norm: 0.8216193606296411, iteration: 346225
loss: 1.009719967842102,grad_norm: 0.6750196527252456, iteration: 346226
loss: 1.0443658828735352,grad_norm: 0.9999995729560673, iteration: 346227
loss: 1.0171936750411987,grad_norm: 0.7610599731628305, iteration: 346228
loss: 0.9771748781204224,grad_norm: 0.9999992869152954, iteration: 346229
loss: 0.9861431121826172,grad_norm: 0.795974071274803, iteration: 346230
loss: 1.032335638999939,grad_norm: 0.8911150324066835, iteration: 346231
loss: 1.003711223602295,grad_norm: 0.7280173432163025, iteration: 346232
loss: 0.987169086933136,grad_norm: 0.9456434674088033, iteration: 346233
loss: 1.0056945085525513,grad_norm: 0.8214283433368035, iteration: 346234
loss: 1.033144235610962,grad_norm: 0.8931755028267636, iteration: 346235
loss: 1.0005956888198853,grad_norm: 0.6212457695266653, iteration: 346236
loss: 1.0229560136795044,grad_norm: 0.927139144029961, iteration: 346237
loss: 0.975758969783783,grad_norm: 0.7678293206545405, iteration: 346238
loss: 0.9639275074005127,grad_norm: 0.798597044374207, iteration: 346239
loss: 1.0066258907318115,grad_norm: 0.9551975773498769, iteration: 346240
loss: 0.9919533133506775,grad_norm: 0.7657963256027865, iteration: 346241
loss: 1.0225943326950073,grad_norm: 0.8612233568858175, iteration: 346242
loss: 1.0309659242630005,grad_norm: 0.7017125218042151, iteration: 346243
loss: 1.090216875076294,grad_norm: 0.9655572906127071, iteration: 346244
loss: 1.0151069164276123,grad_norm: 0.9746699543337073, iteration: 346245
loss: 0.9604748487472534,grad_norm: 0.7662353559789826, iteration: 346246
loss: 1.0088469982147217,grad_norm: 0.9616180597494363, iteration: 346247
loss: 0.9466715455055237,grad_norm: 0.7455065150483856, iteration: 346248
loss: 1.0754789113998413,grad_norm: 0.9999992533181924, iteration: 346249
loss: 0.9951323866844177,grad_norm: 0.8652967865286987, iteration: 346250
loss: 0.9858483076095581,grad_norm: 0.8278325755177205, iteration: 346251
loss: 1.0326513051986694,grad_norm: 0.9999998863555293, iteration: 346252
loss: 0.9727123379707336,grad_norm: 0.9479757028062022, iteration: 346253
loss: 0.9936423301696777,grad_norm: 0.9999992906617194, iteration: 346254
loss: 0.9957854747772217,grad_norm: 0.8945267854389412, iteration: 346255
loss: 1.013063907623291,grad_norm: 0.8155273046697867, iteration: 346256
loss: 1.0319888591766357,grad_norm: 0.9999993908554199, iteration: 346257
loss: 1.0014290809631348,grad_norm: 0.795966724103674, iteration: 346258
loss: 1.020304560661316,grad_norm: 0.8712576397081712, iteration: 346259
loss: 0.9953888654708862,grad_norm: 0.6962999076052393, iteration: 346260
loss: 1.0145677328109741,grad_norm: 0.7300216408743535, iteration: 346261
loss: 1.0048853158950806,grad_norm: 0.7178964791580539, iteration: 346262
loss: 1.0219876766204834,grad_norm: 0.9999998349935771, iteration: 346263
loss: 0.9649418592453003,grad_norm: 0.9639542159363841, iteration: 346264
loss: 0.9678359627723694,grad_norm: 0.8146698923949365, iteration: 346265
loss: 0.9578424096107483,grad_norm: 0.7743424680942036, iteration: 346266
loss: 1.002488374710083,grad_norm: 0.7527509633675253, iteration: 346267
loss: 1.026776909828186,grad_norm: 0.8214232027688264, iteration: 346268
loss: 0.9804106950759888,grad_norm: 0.7963365372403532, iteration: 346269
loss: 1.0434037446975708,grad_norm: 0.8353184666351156, iteration: 346270
loss: 1.025192141532898,grad_norm: 0.782401487733165, iteration: 346271
loss: 0.9800665974617004,grad_norm: 0.6951394969322142, iteration: 346272
loss: 0.9917255640029907,grad_norm: 0.858080875456217, iteration: 346273
loss: 0.9888719320297241,grad_norm: 0.8978012849776726, iteration: 346274
loss: 0.9817778468132019,grad_norm: 0.7591990180537018, iteration: 346275
loss: 0.9498523473739624,grad_norm: 0.6944677131783236, iteration: 346276
loss: 0.9869934320449829,grad_norm: 0.9999991364681883, iteration: 346277
loss: 1.0366591215133667,grad_norm: 0.8563336357782584, iteration: 346278
loss: 1.0106700658798218,grad_norm: 0.758770816038114, iteration: 346279
loss: 0.9664093255996704,grad_norm: 0.8682370159973318, iteration: 346280
loss: 1.0351927280426025,grad_norm: 0.818247661111457, iteration: 346281
loss: 0.9828545451164246,grad_norm: 0.9999990794166739, iteration: 346282
loss: 0.9898121356964111,grad_norm: 0.798889180213201, iteration: 346283
loss: 1.0475852489471436,grad_norm: 0.8328863458074522, iteration: 346284
loss: 1.0024482011795044,grad_norm: 0.9366392743401065, iteration: 346285
loss: 0.9842149615287781,grad_norm: 0.8329203966084782, iteration: 346286
loss: 1.0570255517959595,grad_norm: 0.9243788285372225, iteration: 346287
loss: 0.9815413355827332,grad_norm: 0.8401663303029087, iteration: 346288
loss: 0.9454149007797241,grad_norm: 0.821029600015894, iteration: 346289
loss: 0.9721056222915649,grad_norm: 0.9419720758081805, iteration: 346290
loss: 0.9780071973800659,grad_norm: 0.9999989769047085, iteration: 346291
loss: 0.9992216229438782,grad_norm: 0.9989456033439366, iteration: 346292
loss: 0.9937964677810669,grad_norm: 0.9999992429570198, iteration: 346293
loss: 1.0333377122879028,grad_norm: 0.9999991473930125, iteration: 346294
loss: 1.0092278718948364,grad_norm: 0.7978825729591698, iteration: 346295
loss: 0.9759615659713745,grad_norm: 0.820039802496113, iteration: 346296
loss: 1.0068714618682861,grad_norm: 0.8819224208749478, iteration: 346297
loss: 1.0001918077468872,grad_norm: 0.9342829281641398, iteration: 346298
loss: 0.9725943803787231,grad_norm: 0.9529617491526033, iteration: 346299
loss: 1.0271220207214355,grad_norm: 0.6707665629652817, iteration: 346300
loss: 0.9915844202041626,grad_norm: 0.8549567450635126, iteration: 346301
loss: 1.0770671367645264,grad_norm: 0.9999991643671744, iteration: 346302
loss: 1.0814141035079956,grad_norm: 0.9999992179707656, iteration: 346303
loss: 1.0378077030181885,grad_norm: 0.742665697682704, iteration: 346304
loss: 1.0138758420944214,grad_norm: 0.8104800215471756, iteration: 346305
loss: 1.0096688270568848,grad_norm: 0.8154595082374095, iteration: 346306
loss: 0.9959519505500793,grad_norm: 0.888573313347764, iteration: 346307
loss: 0.9557579159736633,grad_norm: 0.772432163676278, iteration: 346308
loss: 0.9789896607398987,grad_norm: 0.8024716307774763, iteration: 346309
loss: 0.9757910370826721,grad_norm: 0.8180248360400393, iteration: 346310
loss: 0.9877488017082214,grad_norm: 0.9762424287704016, iteration: 346311
loss: 0.9762687087059021,grad_norm: 0.7939399353021456, iteration: 346312
loss: 0.9309877157211304,grad_norm: 0.8361114745052822, iteration: 346313
loss: 1.0264514684677124,grad_norm: 0.818666729593019, iteration: 346314
loss: 1.0162335634231567,grad_norm: 0.9999991251322051, iteration: 346315
loss: 0.9838046431541443,grad_norm: 0.8235582043377885, iteration: 346316
loss: 0.9767434597015381,grad_norm: 0.7038631191216727, iteration: 346317
loss: 1.0155936479568481,grad_norm: 0.8233462578968902, iteration: 346318
loss: 0.9790723919868469,grad_norm: 0.8186592106071188, iteration: 346319
loss: 0.9771172404289246,grad_norm: 0.6529715996895611, iteration: 346320
loss: 1.0204527378082275,grad_norm: 0.9999993755255256, iteration: 346321
loss: 1.0087908506393433,grad_norm: 0.7974894958684946, iteration: 346322
loss: 1.00068998336792,grad_norm: 0.735431870594198, iteration: 346323
loss: 0.9860406517982483,grad_norm: 0.7413527653044031, iteration: 346324
loss: 0.9654687643051147,grad_norm: 0.8914984418765561, iteration: 346325
loss: 0.9766923785209656,grad_norm: 0.7657742697225055, iteration: 346326
loss: 1.0199679136276245,grad_norm: 0.7860402321826351, iteration: 346327
loss: 0.9900416135787964,grad_norm: 0.8678076442616904, iteration: 346328
loss: 0.9838891625404358,grad_norm: 0.8894627747976555, iteration: 346329
loss: 0.9948671460151672,grad_norm: 0.8226453244214843, iteration: 346330
loss: 0.9865753054618835,grad_norm: 0.9067588901619185, iteration: 346331
loss: 0.9862868189811707,grad_norm: 0.8243101921523586, iteration: 346332
loss: 1.0169485807418823,grad_norm: 0.9999990428505412, iteration: 346333
loss: 1.03466796875,grad_norm: 0.8710958372237528, iteration: 346334
loss: 0.9907169342041016,grad_norm: 0.7339012139665667, iteration: 346335
loss: 1.001231074333191,grad_norm: 0.7463584983974298, iteration: 346336
loss: 1.0104371309280396,grad_norm: 0.9999996063712825, iteration: 346337
loss: 0.9924721717834473,grad_norm: 0.8650648405553859, iteration: 346338
loss: 1.0196845531463623,grad_norm: 0.9723103061396783, iteration: 346339
loss: 0.9753018021583557,grad_norm: 0.7806325552972068, iteration: 346340
loss: 0.9897552728652954,grad_norm: 0.8245904174627023, iteration: 346341
loss: 1.0315380096435547,grad_norm: 0.8587534474378639, iteration: 346342
loss: 1.0501993894577026,grad_norm: 0.9999991552338596, iteration: 346343
loss: 1.0203663110733032,grad_norm: 0.8549497229471256, iteration: 346344
loss: 0.9941543340682983,grad_norm: 0.8042979275339778, iteration: 346345
loss: 0.975408673286438,grad_norm: 0.7955903397876497, iteration: 346346
loss: 1.0945639610290527,grad_norm: 0.9999995539007722, iteration: 346347
loss: 0.9733976125717163,grad_norm: 0.7361013206281111, iteration: 346348
loss: 1.0261240005493164,grad_norm: 0.9999991825752923, iteration: 346349
loss: 1.0306789875030518,grad_norm: 0.9999996705160129, iteration: 346350
loss: 0.9737415909767151,grad_norm: 0.8742065137067694, iteration: 346351
loss: 1.0132967233657837,grad_norm: 0.741663547907715, iteration: 346352
loss: 1.0347645282745361,grad_norm: 0.8442957520498984, iteration: 346353
loss: 0.9877969622612,grad_norm: 0.8523196844528167, iteration: 346354
loss: 1.0249645709991455,grad_norm: 0.7844758373805305, iteration: 346355
loss: 0.9840193390846252,grad_norm: 0.8216104112603132, iteration: 346356
loss: 0.9830412864685059,grad_norm: 0.7726123201950944, iteration: 346357
loss: 1.033226728439331,grad_norm: 0.8886852302868679, iteration: 346358
loss: 1.0133565664291382,grad_norm: 0.9192087923444128, iteration: 346359
loss: 1.0871120691299438,grad_norm: 0.96628402408232, iteration: 346360
loss: 1.0041316747665405,grad_norm: 0.8613766149698732, iteration: 346361
loss: 0.9675120711326599,grad_norm: 0.9999991568591036, iteration: 346362
loss: 0.9818329811096191,grad_norm: 0.8735312819348607, iteration: 346363
loss: 1.0452995300292969,grad_norm: 0.9999995776606146, iteration: 346364
loss: 1.0089609622955322,grad_norm: 0.8041554680595479, iteration: 346365
loss: 1.0344027280807495,grad_norm: 0.9999992444408018, iteration: 346366
loss: 1.0055168867111206,grad_norm: 0.7175729586480704, iteration: 346367
loss: 0.9766830801963806,grad_norm: 0.8539209105574298, iteration: 346368
loss: 0.9793376922607422,grad_norm: 0.8127011021383374, iteration: 346369
loss: 0.9812377095222473,grad_norm: 0.6440568302221673, iteration: 346370
loss: 0.9745290279388428,grad_norm: 0.7202770693940471, iteration: 346371
loss: 0.9959889054298401,grad_norm: 0.8571386651583374, iteration: 346372
loss: 1.006308674812317,grad_norm: 0.7609089175935783, iteration: 346373
loss: 1.0485259294509888,grad_norm: 0.9999992911434054, iteration: 346374
loss: 1.1338772773742676,grad_norm: 0.999999814283126, iteration: 346375
loss: 1.0101730823516846,grad_norm: 0.9999993855293312, iteration: 346376
loss: 1.025099277496338,grad_norm: 0.8960619797127253, iteration: 346377
loss: 1.0422955751419067,grad_norm: 0.8968438924668536, iteration: 346378
loss: 1.0528305768966675,grad_norm: 0.9999990849465392, iteration: 346379
loss: 1.0240356922149658,grad_norm: 0.7991862812282398, iteration: 346380
loss: 1.017993450164795,grad_norm: 0.8364158302095817, iteration: 346381
loss: 1.0254830121994019,grad_norm: 0.7971640829548923, iteration: 346382
loss: 1.0727245807647705,grad_norm: 0.9999997487865792, iteration: 346383
loss: 0.9625244736671448,grad_norm: 0.8430663836776203, iteration: 346384
loss: 0.9871197938919067,grad_norm: 0.7068395736101112, iteration: 346385
loss: 1.0188692808151245,grad_norm: 0.9999992438854475, iteration: 346386
loss: 0.9922411441802979,grad_norm: 0.7958890399060773, iteration: 346387
loss: 1.0067708492279053,grad_norm: 0.8129987584300171, iteration: 346388
loss: 0.977695882320404,grad_norm: 0.6897685908826552, iteration: 346389
loss: 1.0166547298431396,grad_norm: 0.9131359401150189, iteration: 346390
loss: 1.0781387090682983,grad_norm: 0.801225274951436, iteration: 346391
loss: 1.0039445161819458,grad_norm: 0.6787228199958438, iteration: 346392
loss: 1.0383398532867432,grad_norm: 0.7514632318960531, iteration: 346393
loss: 0.9864718914031982,grad_norm: 0.9999991635615393, iteration: 346394
loss: 1.0024096965789795,grad_norm: 0.9577465963455449, iteration: 346395
loss: 1.0049657821655273,grad_norm: 0.7117742792776283, iteration: 346396
loss: 0.9908252358436584,grad_norm: 0.7829318433790146, iteration: 346397
loss: 0.9869402647018433,grad_norm: 0.7211742446143191, iteration: 346398
loss: 1.0055203437805176,grad_norm: 0.8388107476494278, iteration: 346399
loss: 1.0206845998764038,grad_norm: 0.7874607043660377, iteration: 346400
loss: 0.9937641024589539,grad_norm: 0.6625109455622928, iteration: 346401
loss: 1.028028130531311,grad_norm: 0.9773471973676207, iteration: 346402
loss: 1.1134493350982666,grad_norm: 0.9999991626139163, iteration: 346403
loss: 1.0071171522140503,grad_norm: 0.7905761118679733, iteration: 346404
loss: 1.0211235284805298,grad_norm: 0.7929491124030817, iteration: 346405
loss: 1.0010441541671753,grad_norm: 0.8585327512102933, iteration: 346406
loss: 1.002274751663208,grad_norm: 0.8299728836720454, iteration: 346407
loss: 1.0384492874145508,grad_norm: 0.9292217029663137, iteration: 346408
loss: 1.0834048986434937,grad_norm: 0.8172576757725907, iteration: 346409
loss: 1.0170577764511108,grad_norm: 0.8633656514111437, iteration: 346410
loss: 1.0314807891845703,grad_norm: 0.9999993391317381, iteration: 346411
loss: 0.991521418094635,grad_norm: 0.7068852312231344, iteration: 346412
loss: 1.0706610679626465,grad_norm: 0.956142773463539, iteration: 346413
loss: 1.0058454275131226,grad_norm: 0.8923400219201879, iteration: 346414
loss: 1.0515460968017578,grad_norm: 0.9999998005684094, iteration: 346415
loss: 1.0060359239578247,grad_norm: 0.8096117101136324, iteration: 346416
loss: 0.9728134274482727,grad_norm: 0.828137076717195, iteration: 346417
loss: 0.9759337902069092,grad_norm: 0.9999993594048796, iteration: 346418
loss: 0.9858224391937256,grad_norm: 0.9003253947488663, iteration: 346419
loss: 0.9809197783470154,grad_norm: 0.8008647500232204, iteration: 346420
loss: 1.0818151235580444,grad_norm: 0.976120353104469, iteration: 346421
loss: 1.1137709617614746,grad_norm: 0.9999996162965953, iteration: 346422
loss: 1.018359661102295,grad_norm: 0.9999992449027887, iteration: 346423
loss: 0.9866840243339539,grad_norm: 0.8166781228137843, iteration: 346424
loss: 0.9717561602592468,grad_norm: 0.8233709098587901, iteration: 346425
loss: 1.042786717414856,grad_norm: 0.9999996646133892, iteration: 346426
loss: 0.9673853516578674,grad_norm: 0.8206036801110197, iteration: 346427
loss: 0.9698267579078674,grad_norm: 0.7405510175166273, iteration: 346428
loss: 0.9996998310089111,grad_norm: 0.9999994357173315, iteration: 346429
loss: 0.9866613149642944,grad_norm: 0.8376383504848713, iteration: 346430
loss: 1.0213271379470825,grad_norm: 0.8270562527374624, iteration: 346431
loss: 0.9989369511604309,grad_norm: 0.9244889601109951, iteration: 346432
loss: 1.0438231229782104,grad_norm: 0.9194607668656258, iteration: 346433
loss: 0.9943718910217285,grad_norm: 0.6762597537369158, iteration: 346434
loss: 1.041115164756775,grad_norm: 0.9999990988854538, iteration: 346435
loss: 1.0036070346832275,grad_norm: 0.638496142420123, iteration: 346436
loss: 1.0159755945205688,grad_norm: 0.8890065488549846, iteration: 346437
loss: 1.028376579284668,grad_norm: 0.8797481161957327, iteration: 346438
loss: 0.9589022397994995,grad_norm: 0.9525800343990205, iteration: 346439
loss: 1.0905777215957642,grad_norm: 0.9999996210806775, iteration: 346440
loss: 1.0158195495605469,grad_norm: 0.7215967858363016, iteration: 346441
loss: 0.9852576851844788,grad_norm: 0.8092543970885964, iteration: 346442
loss: 0.990370512008667,grad_norm: 0.8340878540234101, iteration: 346443
loss: 0.9864205121994019,grad_norm: 0.7864671063407308, iteration: 346444
loss: 1.0279947519302368,grad_norm: 0.8334601812152834, iteration: 346445
loss: 0.9886593222618103,grad_norm: 0.9477180490370146, iteration: 346446
loss: 1.032989501953125,grad_norm: 0.9999996765794494, iteration: 346447
loss: 1.0837351083755493,grad_norm: 0.9999994678021268, iteration: 346448
loss: 0.9804315567016602,grad_norm: 0.8782644788350644, iteration: 346449
loss: 1.0016580820083618,grad_norm: 0.7773424150356633, iteration: 346450
loss: 1.0476269721984863,grad_norm: 0.8651601906681464, iteration: 346451
loss: 1.0428022146224976,grad_norm: 0.9251493416025538, iteration: 346452
loss: 1.094427227973938,grad_norm: 0.9999997899873954, iteration: 346453
loss: 0.957387387752533,grad_norm: 0.9267680041691992, iteration: 346454
loss: 1.0329563617706299,grad_norm: 0.9999998077295319, iteration: 346455
loss: 0.9661815762519836,grad_norm: 0.745076699324358, iteration: 346456
loss: 0.9838502407073975,grad_norm: 0.8497960883190011, iteration: 346457
loss: 1.0306190252304077,grad_norm: 0.8931915709201167, iteration: 346458
loss: 1.0234254598617554,grad_norm: 0.8456355909625561, iteration: 346459
loss: 0.982807993888855,grad_norm: 0.9442598180683759, iteration: 346460
loss: 1.0693563222885132,grad_norm: 0.9999995957323358, iteration: 346461
loss: 1.021655797958374,grad_norm: 0.999999105848926, iteration: 346462
loss: 0.9691126346588135,grad_norm: 0.9190503225338725, iteration: 346463
loss: 0.9616987109184265,grad_norm: 0.8902897024165496, iteration: 346464
loss: 1.0780490636825562,grad_norm: 0.9999991954464731, iteration: 346465
loss: 0.9696366190910339,grad_norm: 0.852188244948851, iteration: 346466
loss: 0.9774383306503296,grad_norm: 0.6971087819371005, iteration: 346467
loss: 1.035819411277771,grad_norm: 0.908282039068151, iteration: 346468
loss: 1.003294825553894,grad_norm: 0.8747008695067676, iteration: 346469
loss: 0.979287326335907,grad_norm: 0.8630423180550484, iteration: 346470
loss: 0.995080828666687,grad_norm: 0.7741560173000037, iteration: 346471
loss: 1.0152859687805176,grad_norm: 0.7475950493934547, iteration: 346472
loss: 0.9680302143096924,grad_norm: 0.8220348113769844, iteration: 346473
loss: 1.0105096101760864,grad_norm: 0.9687941803395586, iteration: 346474
loss: 1.0833944082260132,grad_norm: 0.9999995807156947, iteration: 346475
loss: 0.9881982207298279,grad_norm: 0.7515698008463444, iteration: 346476
loss: 1.009617805480957,grad_norm: 0.7809309135549917, iteration: 346477
loss: 0.9885056018829346,grad_norm: 0.9566884250812002, iteration: 346478
loss: 1.010503888130188,grad_norm: 0.999999207690649, iteration: 346479
loss: 1.002974271774292,grad_norm: 0.9999989755254705, iteration: 346480
loss: 1.0621769428253174,grad_norm: 0.9999995426901243, iteration: 346481
loss: 1.0122302770614624,grad_norm: 0.8257689787137211, iteration: 346482
loss: 1.0046852827072144,grad_norm: 0.7827993118034436, iteration: 346483
loss: 1.0049065351486206,grad_norm: 0.999999241410731, iteration: 346484
loss: 1.003761887550354,grad_norm: 0.9887760220922229, iteration: 346485
loss: 0.990997314453125,grad_norm: 0.9595054898267585, iteration: 346486
loss: 1.0272201299667358,grad_norm: 0.9999990053329889, iteration: 346487
loss: 1.0023643970489502,grad_norm: 0.7882191950570241, iteration: 346488
loss: 1.1094417572021484,grad_norm: 0.9999995697142084, iteration: 346489
loss: 1.0852051973342896,grad_norm: 0.9999998991161526, iteration: 346490
loss: 1.0176655054092407,grad_norm: 0.8102649791441305, iteration: 346491
loss: 1.0193012952804565,grad_norm: 0.8944611775932754, iteration: 346492
loss: 1.1398359537124634,grad_norm: 0.9999998585657385, iteration: 346493
loss: 0.9915904402732849,grad_norm: 0.8992181616391401, iteration: 346494
loss: 1.0070289373397827,grad_norm: 0.9854161521224142, iteration: 346495
loss: 1.004958152770996,grad_norm: 0.9863675747420668, iteration: 346496
loss: 1.02609121799469,grad_norm: 0.862781277865488, iteration: 346497
loss: 0.977067232131958,grad_norm: 0.8073089633665562, iteration: 346498
loss: 1.0665894746780396,grad_norm: 0.9400077456174523, iteration: 346499
loss: 1.0485239028930664,grad_norm: 0.9999996954827862, iteration: 346500
loss: 1.1578072309494019,grad_norm: 0.9999997087284535, iteration: 346501
loss: 1.0105905532836914,grad_norm: 0.7923583435641149, iteration: 346502
loss: 1.006670355796814,grad_norm: 0.9999992075326822, iteration: 346503
loss: 0.978971540927887,grad_norm: 0.8562896695754045, iteration: 346504
loss: 1.0026286840438843,grad_norm: 0.8615300001098746, iteration: 346505
loss: 0.9892972111701965,grad_norm: 0.7757260408514963, iteration: 346506
loss: 1.0709171295166016,grad_norm: 0.8044629494852606, iteration: 346507
loss: 1.0594619512557983,grad_norm: 0.9999997965908491, iteration: 346508
loss: 1.0129483938217163,grad_norm: 0.7283769229008465, iteration: 346509
loss: 0.9984538555145264,grad_norm: 0.9999993201119024, iteration: 346510
loss: 1.014951467514038,grad_norm: 0.8629438354275814, iteration: 346511
loss: 0.9927793741226196,grad_norm: 0.9999996233068301, iteration: 346512
loss: 1.0111721754074097,grad_norm: 0.676727175975157, iteration: 346513
loss: 1.0126237869262695,grad_norm: 0.9041188030815622, iteration: 346514
loss: 0.9706825017929077,grad_norm: 0.9999991828126923, iteration: 346515
loss: 0.9830715656280518,grad_norm: 0.8363813073198558, iteration: 346516
loss: 1.018738865852356,grad_norm: 0.8225571730903137, iteration: 346517
loss: 0.9826788306236267,grad_norm: 0.8853234967147275, iteration: 346518
loss: 1.1003001928329468,grad_norm: 0.9999997289264396, iteration: 346519
loss: 1.0628348588943481,grad_norm: 0.8019555352018966, iteration: 346520
loss: 1.0313770771026611,grad_norm: 0.9999999235571747, iteration: 346521
loss: 1.1050771474838257,grad_norm: 0.9999994762021518, iteration: 346522
loss: 1.0440726280212402,grad_norm: 0.9999990455266156, iteration: 346523
loss: 1.0499532222747803,grad_norm: 0.9520491209849767, iteration: 346524
loss: 0.9759680032730103,grad_norm: 0.7020283658413375, iteration: 346525
loss: 0.9665143489837646,grad_norm: 0.7676602782549116, iteration: 346526
loss: 0.9676482677459717,grad_norm: 0.8331839144998663, iteration: 346527
loss: 1.1403166055679321,grad_norm: 0.937052734541142, iteration: 346528
loss: 1.0630712509155273,grad_norm: 0.729752901626707, iteration: 346529
loss: 1.032214641571045,grad_norm: 0.9999994088402724, iteration: 346530
loss: 0.9676188826560974,grad_norm: 0.9542743785809604, iteration: 346531
loss: 1.0198535919189453,grad_norm: 0.7462926813470174, iteration: 346532
loss: 1.026993989944458,grad_norm: 1.0000000620436627, iteration: 346533
loss: 0.9598736763000488,grad_norm: 0.7635323354708172, iteration: 346534
loss: 1.0374021530151367,grad_norm: 1.0000000276885117, iteration: 346535
loss: 1.0289595127105713,grad_norm: 0.987857918846383, iteration: 346536
loss: 0.9940998554229736,grad_norm: 0.7913897493148252, iteration: 346537
loss: 0.9662538170814514,grad_norm: 0.8628379932770174, iteration: 346538
loss: 1.036219835281372,grad_norm: 0.7273597285265523, iteration: 346539
loss: 1.0238704681396484,grad_norm: 0.8036208383723181, iteration: 346540
loss: 1.0254613161087036,grad_norm: 0.8064531696695669, iteration: 346541
loss: 0.9871723055839539,grad_norm: 0.7836249185581271, iteration: 346542
loss: 1.0424622297286987,grad_norm: 0.8031462457104637, iteration: 346543
loss: 1.0406795740127563,grad_norm: 0.7789668580962088, iteration: 346544
loss: 1.0015780925750732,grad_norm: 0.7912247999803041, iteration: 346545
loss: 0.991936206817627,grad_norm: 0.7125698543825254, iteration: 346546
loss: 0.9967741370201111,grad_norm: 0.9999995044149766, iteration: 346547
loss: 1.0031921863555908,grad_norm: 0.8434519076664682, iteration: 346548
loss: 0.9874624013900757,grad_norm: 0.8570598005922176, iteration: 346549
loss: 1.0152883529663086,grad_norm: 0.8478832534077793, iteration: 346550
loss: 1.0283247232437134,grad_norm: 0.7915924979543659, iteration: 346551
loss: 1.0389164686203003,grad_norm: 0.9999991948487537, iteration: 346552
loss: 1.005879521369934,grad_norm: 0.7455293589677728, iteration: 346553
loss: 0.9548801779747009,grad_norm: 0.91681497774832, iteration: 346554
loss: 1.0055409669876099,grad_norm: 0.8538052410865693, iteration: 346555
loss: 0.9865793585777283,grad_norm: 0.6950352099550816, iteration: 346556
loss: 1.0130491256713867,grad_norm: 0.7814954969987028, iteration: 346557
loss: 1.0819929838180542,grad_norm: 0.9244844968035846, iteration: 346558
loss: 0.9989336133003235,grad_norm: 0.7807177474605481, iteration: 346559
loss: 0.9989949464797974,grad_norm: 0.8667279065439135, iteration: 346560
loss: 1.0040910243988037,grad_norm: 0.9218704199932439, iteration: 346561
loss: 0.9565185308456421,grad_norm: 0.7046054203042411, iteration: 346562
loss: 0.9700806736946106,grad_norm: 0.6879904240377265, iteration: 346563
loss: 0.9985038042068481,grad_norm: 0.8007309333076411, iteration: 346564
loss: 1.0424871444702148,grad_norm: 0.9999993595650138, iteration: 346565
loss: 1.0187079906463623,grad_norm: 0.785861851890153, iteration: 346566
loss: 1.0169517993927002,grad_norm: 0.8265513827493254, iteration: 346567
loss: 1.0131691694259644,grad_norm: 0.8339145965341399, iteration: 346568
loss: 1.005129098892212,grad_norm: 0.9999996260255732, iteration: 346569
loss: 1.0023159980773926,grad_norm: 0.8933928395931937, iteration: 346570
loss: 1.0036416053771973,grad_norm: 0.9081074623354997, iteration: 346571
loss: 1.024696707725525,grad_norm: 0.8160999897829745, iteration: 346572
loss: 0.9591432213783264,grad_norm: 0.9151561782984832, iteration: 346573
loss: 0.9674530029296875,grad_norm: 0.9067669238953411, iteration: 346574
loss: 1.0021778345108032,grad_norm: 0.987023551067082, iteration: 346575
loss: 0.9868123531341553,grad_norm: 0.7061682943887975, iteration: 346576
loss: 0.9637897610664368,grad_norm: 0.8618642543768723, iteration: 346577
loss: 1.0920497179031372,grad_norm: 0.9999994163969932, iteration: 346578
loss: 1.0024663209915161,grad_norm: 0.7652433422512255, iteration: 346579
loss: 0.9504148960113525,grad_norm: 0.999999205542054, iteration: 346580
loss: 1.0111695528030396,grad_norm: 0.9999994019294446, iteration: 346581
loss: 0.9811181426048279,grad_norm: 0.9999991659917912, iteration: 346582
loss: 0.9689927101135254,grad_norm: 0.8786302078692895, iteration: 346583
loss: 1.0012301206588745,grad_norm: 0.9999993038641439, iteration: 346584
loss: 1.0398329496383667,grad_norm: 0.8323189350569381, iteration: 346585
loss: 0.9929757714271545,grad_norm: 0.8025961075394433, iteration: 346586
loss: 0.9749664664268494,grad_norm: 0.8172809752353867, iteration: 346587
loss: 1.0361192226409912,grad_norm: 0.831990523792561, iteration: 346588
loss: 1.0000476837158203,grad_norm: 0.9098802889696993, iteration: 346589
loss: 0.9989317059516907,grad_norm: 0.7711225401674979, iteration: 346590
loss: 1.0765268802642822,grad_norm: 0.9999996554928171, iteration: 346591
loss: 1.0225651264190674,grad_norm: 0.7697105322341585, iteration: 346592
loss: 0.969658613204956,grad_norm: 0.9797644630715953, iteration: 346593
loss: 1.0052533149719238,grad_norm: 0.7697891255183226, iteration: 346594
loss: 1.0460975170135498,grad_norm: 0.999999365235783, iteration: 346595
loss: 0.9799351692199707,grad_norm: 0.8101252864328969, iteration: 346596
loss: 1.0099846124649048,grad_norm: 0.704302851608058, iteration: 346597
loss: 0.9730821251869202,grad_norm: 0.957237805807322, iteration: 346598
loss: 0.9873788356781006,grad_norm: 0.721769096246276, iteration: 346599
loss: 1.0010637044906616,grad_norm: 0.7435157997017134, iteration: 346600
loss: 1.018846035003662,grad_norm: 0.9673138535547731, iteration: 346601
loss: 1.0344077348709106,grad_norm: 0.7933204816430732, iteration: 346602
loss: 0.9435491561889648,grad_norm: 0.9999991290755654, iteration: 346603
loss: 0.9971635341644287,grad_norm: 0.9574408527241971, iteration: 346604
loss: 0.9460486173629761,grad_norm: 0.9254040345569111, iteration: 346605
loss: 1.0190718173980713,grad_norm: 0.9999990868555302, iteration: 346606
loss: 1.0050654411315918,grad_norm: 0.8624792496907864, iteration: 346607
loss: 1.0133671760559082,grad_norm: 1.0000000600231491, iteration: 346608
loss: 0.99756920337677,grad_norm: 0.9513405246612754, iteration: 346609
loss: 0.9811774492263794,grad_norm: 0.7718112022286095, iteration: 346610
loss: 1.0500787496566772,grad_norm: 0.8081074813222252, iteration: 346611
loss: 0.9783715605735779,grad_norm: 0.9459654919671516, iteration: 346612
loss: 1.0015101432800293,grad_norm: 0.7966296837465061, iteration: 346613
loss: 0.9607710242271423,grad_norm: 0.7525577789601205, iteration: 346614
loss: 1.0170875787734985,grad_norm: 0.7978480111076346, iteration: 346615
loss: 0.9727923274040222,grad_norm: 0.7270522674100091, iteration: 346616
loss: 0.9748255014419556,grad_norm: 0.8969765332968137, iteration: 346617
loss: 0.968848466873169,grad_norm: 0.8479049583101439, iteration: 346618
loss: 1.0650627613067627,grad_norm: 0.9999996687153491, iteration: 346619
loss: 1.0108544826507568,grad_norm: 0.8640394882128491, iteration: 346620
loss: 1.045395851135254,grad_norm: 0.9999993772853585, iteration: 346621
loss: 0.997484028339386,grad_norm: 0.7707080013079766, iteration: 346622
loss: 0.9943299889564514,grad_norm: 0.8976365606861851, iteration: 346623
loss: 0.9868918061256409,grad_norm: 0.9844873255848654, iteration: 346624
loss: 1.0840349197387695,grad_norm: 0.8012066962208053, iteration: 346625
loss: 1.0429526567459106,grad_norm: 0.8590228943032631, iteration: 346626
loss: 1.0102150440216064,grad_norm: 0.8515417302868622, iteration: 346627
loss: 0.993074357509613,grad_norm: 0.9472142826623005, iteration: 346628
loss: 0.9850316047668457,grad_norm: 0.8585415257354551, iteration: 346629
loss: 0.9826450347900391,grad_norm: 0.9506543633012686, iteration: 346630
loss: 0.9691426753997803,grad_norm: 0.6881741362846908, iteration: 346631
loss: 1.0070236921310425,grad_norm: 0.7561820822889922, iteration: 346632
loss: 1.0196181535720825,grad_norm: 0.6964920407979058, iteration: 346633
loss: 0.9943495988845825,grad_norm: 0.7886614818262608, iteration: 346634
loss: 1.0126999616622925,grad_norm: 0.9070905446664366, iteration: 346635
loss: 0.9795204401016235,grad_norm: 0.9999996248824461, iteration: 346636
loss: 1.0039793252944946,grad_norm: 0.7098344260427332, iteration: 346637
loss: 0.9885268807411194,grad_norm: 0.936757750052146, iteration: 346638
loss: 1.0397331714630127,grad_norm: 0.8018224082449212, iteration: 346639
loss: 0.9600708484649658,grad_norm: 0.8545317203227761, iteration: 346640
loss: 1.0320206880569458,grad_norm: 0.9264867696279443, iteration: 346641
loss: 1.0105266571044922,grad_norm: 0.8780633602417086, iteration: 346642
loss: 0.986746609210968,grad_norm: 0.8813667595735901, iteration: 346643
loss: 1.0008553266525269,grad_norm: 0.9999991057286645, iteration: 346644
loss: 1.030476450920105,grad_norm: 0.8442067605917838, iteration: 346645
loss: 1.0027966499328613,grad_norm: 0.7796087268992391, iteration: 346646
loss: 1.0075644254684448,grad_norm: 0.7273370330771853, iteration: 346647
loss: 0.9945963621139526,grad_norm: 0.7863671076529851, iteration: 346648
loss: 0.9905164241790771,grad_norm: 0.849144610760579, iteration: 346649
loss: 0.9382861852645874,grad_norm: 0.9603714675687649, iteration: 346650
loss: 1.0050524473190308,grad_norm: 0.7237657126037101, iteration: 346651
loss: 1.0221713781356812,grad_norm: 0.9779217428114185, iteration: 346652
loss: 0.9925709962844849,grad_norm: 0.9999996752717304, iteration: 346653
loss: 1.0128188133239746,grad_norm: 0.999999869018638, iteration: 346654
loss: 0.9856548309326172,grad_norm: 0.7869484225674972, iteration: 346655
loss: 0.9923304319381714,grad_norm: 0.8238418553237499, iteration: 346656
loss: 1.0169230699539185,grad_norm: 0.971276412665116, iteration: 346657
loss: 1.012746810913086,grad_norm: 0.853736680762932, iteration: 346658
loss: 0.9712984561920166,grad_norm: 0.7642062505206196, iteration: 346659
loss: 0.9885020852088928,grad_norm: 0.7413053043531992, iteration: 346660
loss: 1.0359092950820923,grad_norm: 0.804198110637266, iteration: 346661
loss: 0.9724046587944031,grad_norm: 0.8044028302948605, iteration: 346662
loss: 1.0256606340408325,grad_norm: 0.8202818751383939, iteration: 346663
loss: 1.007264494895935,grad_norm: 0.9999994178222006, iteration: 346664
loss: 1.021953821182251,grad_norm: 0.8527586039100263, iteration: 346665
loss: 0.9983884692192078,grad_norm: 0.7104632508878874, iteration: 346666
loss: 1.0214473009109497,grad_norm: 0.9570994598317522, iteration: 346667
loss: 0.9927893877029419,grad_norm: 0.9808785546834052, iteration: 346668
loss: 0.9825037121772766,grad_norm: 0.8955633669495994, iteration: 346669
loss: 0.9924336075782776,grad_norm: 0.7207566351594759, iteration: 346670
loss: 0.994693398475647,grad_norm: 0.7541016452637426, iteration: 346671
loss: 1.0095131397247314,grad_norm: 0.9592826550198172, iteration: 346672
loss: 0.9933938980102539,grad_norm: 0.844981160107903, iteration: 346673
loss: 1.0105388164520264,grad_norm: 0.6916821554593373, iteration: 346674
loss: 1.006155014038086,grad_norm: 0.7495160266964187, iteration: 346675
loss: 1.0065420866012573,grad_norm: 0.7312160635874516, iteration: 346676
loss: 0.9888048768043518,grad_norm: 0.8538104848483796, iteration: 346677
loss: 1.0748637914657593,grad_norm: 0.9999992534694061, iteration: 346678
loss: 1.0243583917617798,grad_norm: 0.820767220520799, iteration: 346679
loss: 0.987088143825531,grad_norm: 0.8150260113744345, iteration: 346680
loss: 1.0632803440093994,grad_norm: 0.9312550964337584, iteration: 346681
loss: 1.0094361305236816,grad_norm: 0.7904506049338368, iteration: 346682
loss: 0.9784299731254578,grad_norm: 0.9962434231785181, iteration: 346683
loss: 1.0183597803115845,grad_norm: 0.9999993544739523, iteration: 346684
loss: 1.0310254096984863,grad_norm: 0.7593266259549033, iteration: 346685
loss: 0.9537956118583679,grad_norm: 0.9999991664940642, iteration: 346686
loss: 0.9804052710533142,grad_norm: 0.7124483311867243, iteration: 346687
loss: 0.9787564277648926,grad_norm: 0.738109667514638, iteration: 346688
loss: 1.0353811979293823,grad_norm: 0.8323357418864669, iteration: 346689
loss: 0.972028911113739,grad_norm: 0.8078331559813423, iteration: 346690
loss: 1.0656687021255493,grad_norm: 0.9999998576145053, iteration: 346691
loss: 1.0246641635894775,grad_norm: 0.7966118212175188, iteration: 346692
loss: 1.0026825666427612,grad_norm: 0.9860156298604112, iteration: 346693
loss: 0.9963420033454895,grad_norm: 0.8640226167116493, iteration: 346694
loss: 0.9676659107208252,grad_norm: 0.9036590542841927, iteration: 346695
loss: 1.0156954526901245,grad_norm: 0.7981671923126261, iteration: 346696
loss: 1.0359877347946167,grad_norm: 0.9999996833701672, iteration: 346697
loss: 1.0427371263504028,grad_norm: 0.8255709323512019, iteration: 346698
loss: 1.0266879796981812,grad_norm: 0.7268128661888456, iteration: 346699
loss: 1.0385351181030273,grad_norm: 0.999999353973486, iteration: 346700
loss: 0.9900751709938049,grad_norm: 0.8731946317893029, iteration: 346701
loss: 0.9990924596786499,grad_norm: 0.7160676390354719, iteration: 346702
loss: 1.0084693431854248,grad_norm: 0.968835497096029, iteration: 346703
loss: 0.9868629574775696,grad_norm: 0.8795474589243261, iteration: 346704
loss: 0.9828119874000549,grad_norm: 0.8364853858754048, iteration: 346705
loss: 1.0202206373214722,grad_norm: 0.7761075262787436, iteration: 346706
loss: 1.0901509523391724,grad_norm: 0.9999989186269054, iteration: 346707
loss: 1.031489372253418,grad_norm: 0.9999992455758752, iteration: 346708
loss: 0.996307373046875,grad_norm: 0.6839812506479008, iteration: 346709
loss: 0.9669012427330017,grad_norm: 0.864819266785931, iteration: 346710
loss: 0.9906262159347534,grad_norm: 0.8933720603808967, iteration: 346711
loss: 1.0150666236877441,grad_norm: 0.8654772567687696, iteration: 346712
loss: 1.0152589082717896,grad_norm: 0.9652395516496378, iteration: 346713
loss: 0.9839633107185364,grad_norm: 0.9999993174621762, iteration: 346714
loss: 1.0107359886169434,grad_norm: 0.787280491626391, iteration: 346715
loss: 1.0030022859573364,grad_norm: 0.7516864547132746, iteration: 346716
loss: 1.0449320077896118,grad_norm: 0.8019256901110974, iteration: 346717
loss: 1.0295209884643555,grad_norm: 0.8812436427803931, iteration: 346718
loss: 0.9915966987609863,grad_norm: 0.8392733524720557, iteration: 346719
loss: 1.015384554862976,grad_norm: 0.928074122946436, iteration: 346720
loss: 1.0065902471542358,grad_norm: 0.7504976350408333, iteration: 346721
loss: 1.0301655530929565,grad_norm: 0.9237678746228326, iteration: 346722
loss: 0.991433322429657,grad_norm: 0.8160224298199221, iteration: 346723
loss: 1.0120337009429932,grad_norm: 0.8982377251599336, iteration: 346724
loss: 0.9774492383003235,grad_norm: 0.8884453854304802, iteration: 346725
loss: 0.9908730983734131,grad_norm: 0.7240810665441729, iteration: 346726
loss: 1.0018590688705444,grad_norm: 0.8749002802663619, iteration: 346727
loss: 0.9928499460220337,grad_norm: 0.7362654247365875, iteration: 346728
loss: 0.9822871088981628,grad_norm: 0.9999991536609051, iteration: 346729
loss: 1.115875482559204,grad_norm: 0.999999712566782, iteration: 346730
loss: 0.9780785441398621,grad_norm: 0.8502983352937034, iteration: 346731
loss: 1.0191254615783691,grad_norm: 0.9706003721718445, iteration: 346732
loss: 1.016416072845459,grad_norm: 0.9999997001963523, iteration: 346733
loss: 1.0430344343185425,grad_norm: 0.9999994895314398, iteration: 346734
loss: 0.9840981960296631,grad_norm: 0.7649199323354937, iteration: 346735
loss: 0.9902831315994263,grad_norm: 0.765549072447654, iteration: 346736
loss: 1.0184211730957031,grad_norm: 0.8770868742504532, iteration: 346737
loss: 0.9872693419456482,grad_norm: 0.7948981686204799, iteration: 346738
loss: 1.0059374570846558,grad_norm: 0.9621973616841909, iteration: 346739
loss: 1.0043331384658813,grad_norm: 0.8041836204733626, iteration: 346740
loss: 0.9844545722007751,grad_norm: 0.9999994720870619, iteration: 346741
loss: 0.9923605918884277,grad_norm: 0.9543995971593665, iteration: 346742
loss: 1.0197468996047974,grad_norm: 0.7810895357796612, iteration: 346743
loss: 0.9899933934211731,grad_norm: 0.9999991001666757, iteration: 346744
loss: 1.0456922054290771,grad_norm: 0.9907943629342871, iteration: 346745
loss: 0.9758792519569397,grad_norm: 0.7962037767134313, iteration: 346746
loss: 0.9581666588783264,grad_norm: 0.7507921035830197, iteration: 346747
loss: 1.0345542430877686,grad_norm: 0.9999994397772871, iteration: 346748
loss: 1.0668020248413086,grad_norm: 0.9999998469339973, iteration: 346749
loss: 0.9946811199188232,grad_norm: 0.8063544038376252, iteration: 346750
loss: 0.9850813150405884,grad_norm: 0.8876733298770159, iteration: 346751
loss: 1.0300604104995728,grad_norm: 0.8007364834218003, iteration: 346752
loss: 1.0042626857757568,grad_norm: 0.844664429085303, iteration: 346753
loss: 0.949785590171814,grad_norm: 0.9450450594472867, iteration: 346754
loss: 1.0405478477478027,grad_norm: 0.7302507325776924, iteration: 346755
loss: 0.9716593623161316,grad_norm: 0.9161099345392159, iteration: 346756
loss: 1.0329362154006958,grad_norm: 0.7233017080866299, iteration: 346757
loss: 0.9805116653442383,grad_norm: 0.7812580916421334, iteration: 346758
loss: 1.0064499378204346,grad_norm: 0.9999992887219301, iteration: 346759
loss: 1.0256191492080688,grad_norm: 0.9999993067189512, iteration: 346760
loss: 1.018080234527588,grad_norm: 0.9972326302433085, iteration: 346761
loss: 0.999053955078125,grad_norm: 0.8420985491410795, iteration: 346762
loss: 0.9868124127388,grad_norm: 0.8598562049984112, iteration: 346763
loss: 1.0023682117462158,grad_norm: 0.8733586353033002, iteration: 346764
loss: 1.0035043954849243,grad_norm: 0.7069180429785958, iteration: 346765
loss: 0.991430401802063,grad_norm: 0.832489593439503, iteration: 346766
loss: 0.9834997057914734,grad_norm: 0.8274320935796317, iteration: 346767
loss: 0.9807589650154114,grad_norm: 0.6939749836125363, iteration: 346768
loss: 1.010069489479065,grad_norm: 0.8034422464305805, iteration: 346769
loss: 0.9874168038368225,grad_norm: 0.7108864255875919, iteration: 346770
loss: 0.9935340881347656,grad_norm: 0.9999994107832025, iteration: 346771
loss: 1.0011637210845947,grad_norm: 0.6773788070801084, iteration: 346772
loss: 1.0363481044769287,grad_norm: 0.7210795189192362, iteration: 346773
loss: 1.0382716655731201,grad_norm: 0.9999997501475935, iteration: 346774
loss: 1.0282803773880005,grad_norm: 0.8746030104843426, iteration: 346775
loss: 0.9905781745910645,grad_norm: 0.9920857173188637, iteration: 346776
loss: 0.9860701560974121,grad_norm: 0.7560750687200629, iteration: 346777
loss: 0.9913678765296936,grad_norm: 0.7705300908032623, iteration: 346778
loss: 0.9654150605201721,grad_norm: 0.9999991472563388, iteration: 346779
loss: 0.984988808631897,grad_norm: 0.7226737206573015, iteration: 346780
loss: 1.0244140625,grad_norm: 0.8279380018515294, iteration: 346781
loss: 1.0296168327331543,grad_norm: 0.999998960744298, iteration: 346782
loss: 0.9703875184059143,grad_norm: 0.9392963204232064, iteration: 346783
loss: 0.9888810515403748,grad_norm: 0.9335419099136207, iteration: 346784
loss: 0.95331209897995,grad_norm: 0.7740607018625928, iteration: 346785
loss: 0.9880160689353943,grad_norm: 0.9409509955252892, iteration: 346786
loss: 0.9895895719528198,grad_norm: 0.8477768698694306, iteration: 346787
loss: 0.9786984324455261,grad_norm: 0.9272074068833537, iteration: 346788
loss: 0.9855449199676514,grad_norm: 0.6697628029713665, iteration: 346789
loss: 1.0757724046707153,grad_norm: 0.9379300663731176, iteration: 346790
loss: 1.0251744985580444,grad_norm: 0.9475807790199926, iteration: 346791
loss: 0.958181619644165,grad_norm: 0.9357329428198542, iteration: 346792
loss: 0.9599999785423279,grad_norm: 0.7563817252113157, iteration: 346793
loss: 0.9773954153060913,grad_norm: 0.8243283586423807, iteration: 346794
loss: 0.9866279363632202,grad_norm: 0.9290106388542991, iteration: 346795
loss: 0.9772475361824036,grad_norm: 0.8062907337499635, iteration: 346796
loss: 1.0317952632904053,grad_norm: 0.8377581574584005, iteration: 346797
loss: 1.0330774784088135,grad_norm: 0.9999990936519065, iteration: 346798
loss: 1.045122504234314,grad_norm: 0.6985466449049075, iteration: 346799
loss: 0.9791574478149414,grad_norm: 0.779892228734649, iteration: 346800
loss: 0.9938473701477051,grad_norm: 0.9042102405651119, iteration: 346801
loss: 0.9479659199714661,grad_norm: 0.7744622066142102, iteration: 346802
loss: 1.0027029514312744,grad_norm: 0.8567521386632788, iteration: 346803
loss: 0.9685071110725403,grad_norm: 0.7156244487425367, iteration: 346804
loss: 0.9835265278816223,grad_norm: 0.7592776220046835, iteration: 346805
loss: 0.9984380006790161,grad_norm: 0.6894494375853097, iteration: 346806
loss: 1.0405025482177734,grad_norm: 0.6812998147177154, iteration: 346807
loss: 0.9945497512817383,grad_norm: 0.8789850454299146, iteration: 346808
loss: 1.0259099006652832,grad_norm: 0.8585162256271996, iteration: 346809
loss: 0.9892350435256958,grad_norm: 0.9512820980291662, iteration: 346810
loss: 1.0512819290161133,grad_norm: 0.8417239516943649, iteration: 346811
loss: 1.0076699256896973,grad_norm: 0.8383468052327977, iteration: 346812
loss: 1.1166149377822876,grad_norm: 0.9999999090765197, iteration: 346813
loss: 0.9402006268501282,grad_norm: 0.7954220383209171, iteration: 346814
loss: 0.9933786392211914,grad_norm: 0.9294930911396402, iteration: 346815
loss: 1.0113639831542969,grad_norm: 0.9015625641945665, iteration: 346816
loss: 0.970867395401001,grad_norm: 0.8618008486494148, iteration: 346817
loss: 0.9760986566543579,grad_norm: 0.7404011872505208, iteration: 346818
loss: 1.018310546875,grad_norm: 0.8497653489307909, iteration: 346819
loss: 1.0183844566345215,grad_norm: 0.768585122484755, iteration: 346820
loss: 0.9896594285964966,grad_norm: 0.7443179555404725, iteration: 346821
loss: 1.0153039693832397,grad_norm: 0.8299398612523355, iteration: 346822
loss: 0.9669798016548157,grad_norm: 0.8923998650175464, iteration: 346823
loss: 0.9806589484214783,grad_norm: 0.8708393653013673, iteration: 346824
loss: 1.0000996589660645,grad_norm: 0.7632280847951854, iteration: 346825
loss: 1.0514781475067139,grad_norm: 0.8897324131789393, iteration: 346826
loss: 0.9820257425308228,grad_norm: 0.7899315461712688, iteration: 346827
loss: 1.013113260269165,grad_norm: 0.9004945294159618, iteration: 346828
loss: 0.9970154166221619,grad_norm: 0.8002166901046597, iteration: 346829
loss: 0.9747549295425415,grad_norm: 0.790996899623098, iteration: 346830
loss: 1.0268579721450806,grad_norm: 0.8470663018264323, iteration: 346831
loss: 0.994082510471344,grad_norm: 0.9527078086414648, iteration: 346832
loss: 1.0372952222824097,grad_norm: 0.8407247026912186, iteration: 346833
loss: 1.0331474542617798,grad_norm: 0.9527332495446245, iteration: 346834
loss: 1.024185299873352,grad_norm: 0.8006247122628684, iteration: 346835
loss: 1.0211381912231445,grad_norm: 0.8920487446794189, iteration: 346836
loss: 0.9642947316169739,grad_norm: 0.7479175498039543, iteration: 346837
loss: 0.9991241097450256,grad_norm: 0.7887789443570239, iteration: 346838
loss: 1.013622522354126,grad_norm: 0.8797895356665351, iteration: 346839
loss: 1.0031445026397705,grad_norm: 0.8770330500382884, iteration: 346840
loss: 1.0218563079833984,grad_norm: 0.8618838947323619, iteration: 346841
loss: 1.0046874284744263,grad_norm: 0.8737379304980765, iteration: 346842
loss: 0.9729442000389099,grad_norm: 0.9844503646793212, iteration: 346843
loss: 0.9828100800514221,grad_norm: 0.744389114974976, iteration: 346844
loss: 1.036145806312561,grad_norm: 0.9680990398480204, iteration: 346845
loss: 0.9869837164878845,grad_norm: 0.8188277890000278, iteration: 346846
loss: 1.088564395904541,grad_norm: 0.8073190364903591, iteration: 346847
loss: 1.0020935535430908,grad_norm: 0.9131389027864263, iteration: 346848
loss: 1.0594528913497925,grad_norm: 0.9999998035308078, iteration: 346849
loss: 1.0447680950164795,grad_norm: 0.999999620809538, iteration: 346850
loss: 1.0666486024856567,grad_norm: 0.9290095178737776, iteration: 346851
loss: 0.9864246249198914,grad_norm: 0.9068094522366134, iteration: 346852
loss: 0.9796159863471985,grad_norm: 0.8946146278531955, iteration: 346853
loss: 0.9848188161849976,grad_norm: 0.9200991906265183, iteration: 346854
loss: 1.0151516199111938,grad_norm: 0.7239230481167165, iteration: 346855
loss: 1.0412858724594116,grad_norm: 0.9169881189855796, iteration: 346856
loss: 0.966541051864624,grad_norm: 0.7656714702503422, iteration: 346857
loss: 0.9813379645347595,grad_norm: 0.760869168081409, iteration: 346858
loss: 0.9825670123100281,grad_norm: 0.8438359905015395, iteration: 346859
loss: 0.9999287724494934,grad_norm: 0.7203576820035831, iteration: 346860
loss: 1.008777379989624,grad_norm: 0.9815094635393422, iteration: 346861
loss: 1.0451610088348389,grad_norm: 0.8679401306920016, iteration: 346862
loss: 1.0160918235778809,grad_norm: 0.8738905439254745, iteration: 346863
loss: 0.9980260729789734,grad_norm: 0.9999990740922072, iteration: 346864
loss: 1.011549711227417,grad_norm: 0.7512943500445952, iteration: 346865
loss: 0.9826552271842957,grad_norm: 0.7474362314196982, iteration: 346866
loss: 1.0367202758789062,grad_norm: 0.9999999009558813, iteration: 346867
loss: 0.9912139177322388,grad_norm: 0.9999996752376874, iteration: 346868
loss: 1.007258415222168,grad_norm: 0.7057032988354712, iteration: 346869
loss: 1.0239672660827637,grad_norm: 0.8181753438457657, iteration: 346870
loss: 0.9421752691268921,grad_norm: 0.9872416901353874, iteration: 346871
loss: 1.0555486679077148,grad_norm: 0.7528766783635027, iteration: 346872
loss: 0.9708641171455383,grad_norm: 0.7692624158847813, iteration: 346873
loss: 1.0198293924331665,grad_norm: 0.9999995356278849, iteration: 346874
loss: 0.9972689151763916,grad_norm: 0.9449862274423481, iteration: 346875
loss: 0.9749148488044739,grad_norm: 0.9999989956156514, iteration: 346876
loss: 1.0456931591033936,grad_norm: 0.9999995205772313, iteration: 346877
loss: 1.003812551498413,grad_norm: 0.8071257769457306, iteration: 346878
loss: 0.9955069422721863,grad_norm: 0.8951763779301078, iteration: 346879
loss: 0.9879545569419861,grad_norm: 0.7251339185739895, iteration: 346880
loss: 1.002568006515503,grad_norm: 0.9999989648234864, iteration: 346881
loss: 1.013865351676941,grad_norm: 0.7987404235073858, iteration: 346882
loss: 1.0137182474136353,grad_norm: 0.7639745429852461, iteration: 346883
loss: 1.0031955242156982,grad_norm: 0.6084837925816349, iteration: 346884
loss: 0.996391236782074,grad_norm: 0.8311436396445784, iteration: 346885
loss: 0.9830736517906189,grad_norm: 0.811266545789115, iteration: 346886
loss: 1.0031362771987915,grad_norm: 0.9999989726733526, iteration: 346887
loss: 0.9918671250343323,grad_norm: 0.9988424912719133, iteration: 346888
loss: 1.0042510032653809,grad_norm: 0.7275819640474619, iteration: 346889
loss: 1.0232012271881104,grad_norm: 0.7493431941921979, iteration: 346890
loss: 0.9974710941314697,grad_norm: 0.7983564154168038, iteration: 346891
loss: 1.0253239870071411,grad_norm: 0.9001675607217117, iteration: 346892
loss: 1.013327956199646,grad_norm: 0.8389525664161098, iteration: 346893
loss: 0.9763895273208618,grad_norm: 0.8558074095281525, iteration: 346894
loss: 1.035739779472351,grad_norm: 0.7136920969920622, iteration: 346895
loss: 1.0998588800430298,grad_norm: 0.9999991747476897, iteration: 346896
loss: 1.0105798244476318,grad_norm: 0.7603742033176564, iteration: 346897
loss: 0.9821544885635376,grad_norm: 0.8223216626400939, iteration: 346898
loss: 1.0023847818374634,grad_norm: 0.7152226348308734, iteration: 346899
loss: 0.9961594343185425,grad_norm: 0.9089871335468623, iteration: 346900
loss: 0.994499683380127,grad_norm: 0.8501446779012531, iteration: 346901
loss: 0.9881075024604797,grad_norm: 0.8057674957223502, iteration: 346902
loss: 0.9943556189537048,grad_norm: 0.8676503377914723, iteration: 346903
loss: 0.9810286164283752,grad_norm: 0.7869556694231615, iteration: 346904
loss: 0.9957236051559448,grad_norm: 0.912556324702265, iteration: 346905
loss: 0.9977125525474548,grad_norm: 0.8022034364213736, iteration: 346906
loss: 1.0002738237380981,grad_norm: 0.8367611085738886, iteration: 346907
loss: 0.9885220527648926,grad_norm: 0.7992318855022648, iteration: 346908
loss: 0.9840236902236938,grad_norm: 0.6632698586489565, iteration: 346909
loss: 0.9685912132263184,grad_norm: 0.8854349316547695, iteration: 346910
loss: 0.9882122278213501,grad_norm: 0.7253680061800052, iteration: 346911
loss: 0.9894580245018005,grad_norm: 0.8908716836990392, iteration: 346912
loss: 0.985401451587677,grad_norm: 0.7303747122201759, iteration: 346913
loss: 1.0197478532791138,grad_norm: 0.9231336136778642, iteration: 346914
loss: 1.0585026741027832,grad_norm: 0.9999995423755765, iteration: 346915
loss: 1.0093225240707397,grad_norm: 0.8063979431445468, iteration: 346916
loss: 1.0137333869934082,grad_norm: 0.7358157106487363, iteration: 346917
loss: 0.9648388624191284,grad_norm: 0.8697695817777708, iteration: 346918
loss: 0.9729551076889038,grad_norm: 0.7247972865878205, iteration: 346919
loss: 0.9965008497238159,grad_norm: 0.7344199786553279, iteration: 346920
loss: 1.0029555559158325,grad_norm: 0.7202401727618775, iteration: 346921
loss: 0.980499267578125,grad_norm: 0.7939315653946301, iteration: 346922
loss: 1.0039745569229126,grad_norm: 0.8157922619288301, iteration: 346923
loss: 0.9675204157829285,grad_norm: 0.7866934135491944, iteration: 346924
loss: 1.0250076055526733,grad_norm: 1.0000000153616673, iteration: 346925
loss: 1.0127898454666138,grad_norm: 0.8930907085350952, iteration: 346926
loss: 1.047014832496643,grad_norm: 0.884168921119329, iteration: 346927
loss: 1.0004522800445557,grad_norm: 0.8202638839734526, iteration: 346928
loss: 1.023160696029663,grad_norm: 0.8199203554293215, iteration: 346929
loss: 0.9976919293403625,grad_norm: 0.86737058775215, iteration: 346930
loss: 0.990430474281311,grad_norm: 0.8085553238604031, iteration: 346931
loss: 0.9592592716217041,grad_norm: 0.7195257002915084, iteration: 346932
loss: 1.034927248954773,grad_norm: 0.852083620629752, iteration: 346933
loss: 1.0343103408813477,grad_norm: 0.7245378245995155, iteration: 346934
loss: 1.0557116270065308,grad_norm: 0.9999993980622474, iteration: 346935
loss: 0.9623790383338928,grad_norm: 0.9999991502358795, iteration: 346936
loss: 1.0010323524475098,grad_norm: 0.8469362098333652, iteration: 346937
loss: 0.9665384292602539,grad_norm: 0.8212557307323048, iteration: 346938
loss: 1.017422080039978,grad_norm: 0.7031441088858816, iteration: 346939
loss: 0.9850577712059021,grad_norm: 0.9436276822131265, iteration: 346940
loss: 0.9694333076477051,grad_norm: 0.7945082270955085, iteration: 346941
loss: 1.025451898574829,grad_norm: 0.8250720242847863, iteration: 346942
loss: 0.9909380078315735,grad_norm: 0.7919312403567634, iteration: 346943
loss: 0.9835594296455383,grad_norm: 0.8566530933174544, iteration: 346944
loss: 0.9652416110038757,grad_norm: 0.8987919470204563, iteration: 346945
loss: 1.0053343772888184,grad_norm: 0.7496578194741058, iteration: 346946
loss: 0.9874172806739807,grad_norm: 0.8474367632620438, iteration: 346947
loss: 0.9731748700141907,grad_norm: 0.719604127872321, iteration: 346948
loss: 1.0125311613082886,grad_norm: 0.8619783461705807, iteration: 346949
loss: 0.9931702613830566,grad_norm: 0.9999997263563973, iteration: 346950
loss: 1.0116363763809204,grad_norm: 0.906712332052385, iteration: 346951
loss: 1.0085300207138062,grad_norm: 0.9999991771243474, iteration: 346952
loss: 1.0051425695419312,grad_norm: 0.8812304577280713, iteration: 346953
loss: 0.998487651348114,grad_norm: 0.9591139091758571, iteration: 346954
loss: 1.010563611984253,grad_norm: 0.9999998525147676, iteration: 346955
loss: 0.9676362872123718,grad_norm: 0.8609363289316876, iteration: 346956
loss: 0.98462975025177,grad_norm: 0.9769084045735426, iteration: 346957
loss: 0.9910149574279785,grad_norm: 0.7659386625024808, iteration: 346958
loss: 1.0309898853302002,grad_norm: 0.7513390692564922, iteration: 346959
loss: 1.011217713356018,grad_norm: 0.774706695667223, iteration: 346960
loss: 1.066579818725586,grad_norm: 0.7165924693031481, iteration: 346961
loss: 1.0024473667144775,grad_norm: 0.8416539690067586, iteration: 346962
loss: 1.0434749126434326,grad_norm: 0.9999999334713432, iteration: 346963
loss: 0.9894972443580627,grad_norm: 0.6757952286453165, iteration: 346964
loss: 1.022922158241272,grad_norm: 0.8791825973933605, iteration: 346965
loss: 1.0147876739501953,grad_norm: 0.7812772602356853, iteration: 346966
loss: 1.0052682161331177,grad_norm: 0.9999996202227706, iteration: 346967
loss: 0.9978909492492676,grad_norm: 0.7884381799846416, iteration: 346968
loss: 0.9312586784362793,grad_norm: 0.712707231489533, iteration: 346969
loss: 0.9939537644386292,grad_norm: 0.7872939624437523, iteration: 346970
loss: 0.9934907555580139,grad_norm: 0.804222155402403, iteration: 346971
loss: 1.0018078088760376,grad_norm: 0.7403132273998089, iteration: 346972
loss: 0.9800582528114319,grad_norm: 0.7916307510632749, iteration: 346973
loss: 0.9644502401351929,grad_norm: 0.7511512629954963, iteration: 346974
loss: 1.00380539894104,grad_norm: 0.7100450225566138, iteration: 346975
loss: 0.9784180521965027,grad_norm: 0.7231408297307481, iteration: 346976
loss: 1.0266317129135132,grad_norm: 0.9883737819393147, iteration: 346977
loss: 1.0228203535079956,grad_norm: 0.6978822751727943, iteration: 346978
loss: 1.0320746898651123,grad_norm: 0.8475519611402268, iteration: 346979
loss: 0.9606474041938782,grad_norm: 0.7143155561957926, iteration: 346980
loss: 1.0129956007003784,grad_norm: 0.667704256638554, iteration: 346981
loss: 1.007353663444519,grad_norm: 0.6058862061682423, iteration: 346982
loss: 1.00057053565979,grad_norm: 0.8180157989732864, iteration: 346983
loss: 0.9662464261054993,grad_norm: 0.8689528395600337, iteration: 346984
loss: 0.975456714630127,grad_norm: 0.9231832488743729, iteration: 346985
loss: 1.0457971096038818,grad_norm: 0.8421102031932287, iteration: 346986
loss: 0.9872356653213501,grad_norm: 0.9811591745902688, iteration: 346987
loss: 1.0022313594818115,grad_norm: 0.8790068424456315, iteration: 346988
loss: 1.0373642444610596,grad_norm: 0.8833022542834217, iteration: 346989
loss: 0.9734094738960266,grad_norm: 0.8355334375439482, iteration: 346990
loss: 1.001973032951355,grad_norm: 0.8836926729683605, iteration: 346991
loss: 1.0043498277664185,grad_norm: 0.8610400359202482, iteration: 346992
loss: 0.9949597120285034,grad_norm: 0.8947065734376864, iteration: 346993
loss: 0.9715621471405029,grad_norm: 0.8734685932194293, iteration: 346994
loss: 1.0007405281066895,grad_norm: 0.7550128713881427, iteration: 346995
loss: 0.9641742706298828,grad_norm: 0.7701115548490383, iteration: 346996
loss: 1.0295320749282837,grad_norm: 0.8952824659759966, iteration: 346997
loss: 0.9876386523246765,grad_norm: 0.816608868792622, iteration: 346998
loss: 0.9890250563621521,grad_norm: 0.7951061539284898, iteration: 346999
loss: 0.9523366689682007,grad_norm: 0.7661445156636046, iteration: 347000
loss: 0.9921191334724426,grad_norm: 0.7808430090330221, iteration: 347001
loss: 1.0194854736328125,grad_norm: 0.8895341492440709, iteration: 347002
loss: 1.0087029933929443,grad_norm: 0.9999991899151256, iteration: 347003
loss: 0.9704608917236328,grad_norm: 0.9382105422018148, iteration: 347004
loss: 1.024143934249878,grad_norm: 0.7467987276119171, iteration: 347005
loss: 0.9574365019798279,grad_norm: 0.7767260969973492, iteration: 347006
loss: 1.007775068283081,grad_norm: 0.8199507999008213, iteration: 347007
loss: 1.0086716413497925,grad_norm: 0.813910165036209, iteration: 347008
loss: 1.030393123626709,grad_norm: 0.8567875868089009, iteration: 347009
loss: 0.975218653678894,grad_norm: 0.8567085346374773, iteration: 347010
loss: 1.0086215734481812,grad_norm: 0.904845256605717, iteration: 347011
loss: 1.0019339323043823,grad_norm: 0.7895542214530953, iteration: 347012
loss: 0.9913262724876404,grad_norm: 0.7921855288422531, iteration: 347013
loss: 0.9764270186424255,grad_norm: 0.8374688701869919, iteration: 347014
loss: 1.0141348838806152,grad_norm: 0.999999775582723, iteration: 347015
loss: 0.9632932543754578,grad_norm: 0.812222584299924, iteration: 347016
loss: 1.0148338079452515,grad_norm: 0.9858012294535696, iteration: 347017
loss: 1.0021625757217407,grad_norm: 0.9185508458243217, iteration: 347018
loss: 0.9743357300758362,grad_norm: 0.9593015960424922, iteration: 347019
loss: 1.040995478630066,grad_norm: 0.9202544913009798, iteration: 347020
loss: 1.0188068151474,grad_norm: 0.8154730261148759, iteration: 347021
loss: 1.0008560419082642,grad_norm: 0.943513489449471, iteration: 347022
loss: 1.0003992319107056,grad_norm: 0.9999997323011843, iteration: 347023
loss: 0.9795142412185669,grad_norm: 0.9999992421124736, iteration: 347024
loss: 0.9847349524497986,grad_norm: 0.9999991044567355, iteration: 347025
loss: 1.0200914144515991,grad_norm: 0.9075353219623246, iteration: 347026
loss: 0.9670422673225403,grad_norm: 0.8619633636553201, iteration: 347027
loss: 0.9860191345214844,grad_norm: 0.7512668736617565, iteration: 347028
loss: 1.0298831462860107,grad_norm: 0.7737481244680982, iteration: 347029
loss: 1.0135140419006348,grad_norm: 0.7601771145642054, iteration: 347030
loss: 1.028868556022644,grad_norm: 0.9186300074789802, iteration: 347031
loss: 0.981716513633728,grad_norm: 0.6219116719218022, iteration: 347032
loss: 0.9978659749031067,grad_norm: 0.8469827111150408, iteration: 347033
loss: 0.9974742531776428,grad_norm: 0.7946455527126002, iteration: 347034
loss: 1.0108808279037476,grad_norm: 0.8899426223258593, iteration: 347035
loss: 1.0200661420822144,grad_norm: 0.9264569128572677, iteration: 347036
loss: 0.9929932951927185,grad_norm: 0.854453958657663, iteration: 347037
loss: 0.9895884990692139,grad_norm: 0.8302040840899969, iteration: 347038
loss: 1.0145165920257568,grad_norm: 0.997463373135029, iteration: 347039
loss: 0.982957661151886,grad_norm: 0.7398713771877438, iteration: 347040
loss: 0.9995162487030029,grad_norm: 0.8558986491050218, iteration: 347041
loss: 0.9719080924987793,grad_norm: 0.8201560533590456, iteration: 347042
loss: 1.0130290985107422,grad_norm: 0.6693086621923953, iteration: 347043
loss: 1.0175846815109253,grad_norm: 0.9813873965124115, iteration: 347044
loss: 1.014288067817688,grad_norm: 0.8070973306221829, iteration: 347045
loss: 0.9816714525222778,grad_norm: 0.933091164587146, iteration: 347046
loss: 1.0148974657058716,grad_norm: 0.8328179896442951, iteration: 347047
loss: 0.9985648393630981,grad_norm: 0.6848659612311979, iteration: 347048
loss: 0.9850276112556458,grad_norm: 0.9223465152076534, iteration: 347049
loss: 0.99897301197052,grad_norm: 0.9999990533023448, iteration: 347050
loss: 1.0025603771209717,grad_norm: 0.8884887606039542, iteration: 347051
loss: 0.9710872769355774,grad_norm: 0.9999991370847087, iteration: 347052
loss: 1.0267889499664307,grad_norm: 0.999999730408961, iteration: 347053
loss: 1.0098899602890015,grad_norm: 0.8701462992697424, iteration: 347054
loss: 0.9886499047279358,grad_norm: 0.7578005996828336, iteration: 347055
loss: 1.0490094423294067,grad_norm: 0.7807914015546067, iteration: 347056
loss: 0.9902374744415283,grad_norm: 0.7556122780773161, iteration: 347057
loss: 0.9624753594398499,grad_norm: 0.734412870368697, iteration: 347058
loss: 0.9935327172279358,grad_norm: 0.8556406848298979, iteration: 347059
loss: 1.0184311866760254,grad_norm: 0.73856902204455, iteration: 347060
loss: 0.9639933109283447,grad_norm: 0.7758134675532482, iteration: 347061
loss: 0.9854326248168945,grad_norm: 0.6757474447287886, iteration: 347062
loss: 1.0593153238296509,grad_norm: 0.83191423635153, iteration: 347063
loss: 1.0013715028762817,grad_norm: 0.905493994536911, iteration: 347064
loss: 0.9518693089485168,grad_norm: 0.9032894537880036, iteration: 347065
loss: 1.0180720090866089,grad_norm: 0.8002672953298116, iteration: 347066
loss: 0.9867557287216187,grad_norm: 0.9259322723543754, iteration: 347067
loss: 0.9714206457138062,grad_norm: 0.8922046161694338, iteration: 347068
loss: 0.9901828169822693,grad_norm: 0.8968761369798687, iteration: 347069
loss: 1.0132187604904175,grad_norm: 0.772103764320142, iteration: 347070
loss: 1.003962755203247,grad_norm: 0.8555836150178333, iteration: 347071
loss: 0.9920132160186768,grad_norm: 0.7024135704883527, iteration: 347072
loss: 0.9928433895111084,grad_norm: 0.9999992184553262, iteration: 347073
loss: 1.0089218616485596,grad_norm: 0.6693285400734237, iteration: 347074
loss: 0.9683029055595398,grad_norm: 0.8864662329928372, iteration: 347075
loss: 0.9575816988945007,grad_norm: 0.9141476953474429, iteration: 347076
loss: 1.0041393041610718,grad_norm: 0.8494181175226115, iteration: 347077
loss: 0.9867374300956726,grad_norm: 0.80382553576845, iteration: 347078
loss: 1.005159616470337,grad_norm: 0.8679506960714225, iteration: 347079
loss: 1.0221482515335083,grad_norm: 0.7243129043205492, iteration: 347080
loss: 0.9900092482566833,grad_norm: 0.7552529908135132, iteration: 347081
loss: 1.0012367963790894,grad_norm: 0.7988055081279688, iteration: 347082
loss: 1.0071686506271362,grad_norm: 0.6814148707894531, iteration: 347083
loss: 1.0070278644561768,grad_norm: 0.8450138934445754, iteration: 347084
loss: 0.9887796640396118,grad_norm: 0.8530771875350552, iteration: 347085
loss: 1.057944655418396,grad_norm: 0.9999995900059394, iteration: 347086
loss: 1.0090446472167969,grad_norm: 0.8256151782155002, iteration: 347087
loss: 1.0279273986816406,grad_norm: 0.9706515245692284, iteration: 347088
loss: 0.9896386861801147,grad_norm: 0.7539855038675537, iteration: 347089
loss: 0.9766969680786133,grad_norm: 0.9548847128426665, iteration: 347090
loss: 1.0375139713287354,grad_norm: 0.8402746705573029, iteration: 347091
loss: 1.0110751390457153,grad_norm: 0.7221133829807082, iteration: 347092
loss: 0.9877954721450806,grad_norm: 0.9753323612592856, iteration: 347093
loss: 1.000424861907959,grad_norm: 0.8459238940727882, iteration: 347094
loss: 1.0099724531173706,grad_norm: 0.9878080629836455, iteration: 347095
loss: 1.0107675790786743,grad_norm: 0.8900131836366132, iteration: 347096
loss: 0.9746026992797852,grad_norm: 0.9376132601262923, iteration: 347097
loss: 1.00214684009552,grad_norm: 0.7501696629232552, iteration: 347098
loss: 1.0420455932617188,grad_norm: 0.8781044960547236, iteration: 347099
loss: 0.9840165376663208,grad_norm: 0.9999991616683506, iteration: 347100
loss: 1.0048457384109497,grad_norm: 0.801826288181249, iteration: 347101
loss: 0.9748174548149109,grad_norm: 0.7608199752975064, iteration: 347102
loss: 0.9722992181777954,grad_norm: 0.9999998752560982, iteration: 347103
loss: 0.9750568866729736,grad_norm: 0.831027829746294, iteration: 347104
loss: 1.0413134098052979,grad_norm: 0.9999991301639385, iteration: 347105
loss: 0.9663742184638977,grad_norm: 0.6305310001098121, iteration: 347106
loss: 0.9968398809432983,grad_norm: 0.9348971963466725, iteration: 347107
loss: 1.0104217529296875,grad_norm: 0.7123333660369505, iteration: 347108
loss: 0.9402300119400024,grad_norm: 0.8873324278567483, iteration: 347109
loss: 0.9684880375862122,grad_norm: 0.6979189855382728, iteration: 347110
loss: 0.98507159948349,grad_norm: 0.7738734403619112, iteration: 347111
loss: 0.9745039343833923,grad_norm: 0.7140389451530023, iteration: 347112
loss: 0.9935229420661926,grad_norm: 0.8207263345349112, iteration: 347113
loss: 1.0170111656188965,grad_norm: 0.9286235635852057, iteration: 347114
loss: 1.1064573526382446,grad_norm: 0.9999992186954204, iteration: 347115
loss: 0.9849870800971985,grad_norm: 0.9999992161167665, iteration: 347116
loss: 1.0229412317276,grad_norm: 0.9925889885335165, iteration: 347117
loss: 0.9869436025619507,grad_norm: 0.813328321428537, iteration: 347118
loss: 0.9979652762413025,grad_norm: 0.8123674168216329, iteration: 347119
loss: 0.9973247647285461,grad_norm: 0.8160947017028793, iteration: 347120
loss: 1.0066285133361816,grad_norm: 0.7291892526942667, iteration: 347121
loss: 0.98956298828125,grad_norm: 0.8918434709218979, iteration: 347122
loss: 0.9890931248664856,grad_norm: 0.8167905982302963, iteration: 347123
loss: 0.9620737433433533,grad_norm: 0.8428987257969701, iteration: 347124
loss: 1.0029973983764648,grad_norm: 0.9999995632603468, iteration: 347125
loss: 0.9584516882896423,grad_norm: 0.8877048650856911, iteration: 347126
loss: 0.9768227338790894,grad_norm: 0.8250885251516255, iteration: 347127
loss: 0.9961740374565125,grad_norm: 0.7381625791091019, iteration: 347128
loss: 0.9832463264465332,grad_norm: 0.7642396117630631, iteration: 347129
loss: 0.995762825012207,grad_norm: 0.7584029802804444, iteration: 347130
loss: 1.0265501737594604,grad_norm: 0.9999990339073139, iteration: 347131
loss: 0.9705674648284912,grad_norm: 0.9380824129645451, iteration: 347132
loss: 1.0221185684204102,grad_norm: 0.7023940163785264, iteration: 347133
loss: 1.007091760635376,grad_norm: 0.8573814447906073, iteration: 347134
loss: 0.9773740172386169,grad_norm: 0.7990807138621633, iteration: 347135
loss: 0.9816821813583374,grad_norm: 0.9495122084042431, iteration: 347136
loss: 1.0122417211532593,grad_norm: 0.7615599582400572, iteration: 347137
loss: 0.9781542420387268,grad_norm: 0.8835280993221115, iteration: 347138
loss: 1.0566611289978027,grad_norm: 0.9999991985627014, iteration: 347139
loss: 0.9709888100624084,grad_norm: 0.7016410420538206, iteration: 347140
loss: 1.004658579826355,grad_norm: 0.7095689386276031, iteration: 347141
loss: 0.9854487180709839,grad_norm: 0.846548845248866, iteration: 347142
loss: 0.9947431087493896,grad_norm: 0.8985639452461633, iteration: 347143
loss: 1.0001939535140991,grad_norm: 0.7643391589572043, iteration: 347144
loss: 0.9879639148712158,grad_norm: 0.9999990463466684, iteration: 347145
loss: 0.9838609099388123,grad_norm: 0.9889822902564118, iteration: 347146
loss: 1.057823657989502,grad_norm: 0.9259735812870206, iteration: 347147
loss: 0.9759568572044373,grad_norm: 0.8390076200646005, iteration: 347148
loss: 0.9743638038635254,grad_norm: 0.850018747912586, iteration: 347149
loss: 1.0153803825378418,grad_norm: 0.6756947287607644, iteration: 347150
loss: 1.0403971672058105,grad_norm: 0.9103738694529168, iteration: 347151
loss: 0.9874365329742432,grad_norm: 0.8705185556799583, iteration: 347152
loss: 0.9944753050804138,grad_norm: 0.7921451775935884, iteration: 347153
loss: 1.0058292150497437,grad_norm: 0.8144216610016027, iteration: 347154
loss: 0.9936832189559937,grad_norm: 0.9165326453085684, iteration: 347155
loss: 1.0145363807678223,grad_norm: 0.854833946559468, iteration: 347156
loss: 0.9770306348800659,grad_norm: 0.7937233423741812, iteration: 347157
loss: 0.9738871455192566,grad_norm: 0.9999990822671229, iteration: 347158
loss: 1.0083309412002563,grad_norm: 0.7153769725438516, iteration: 347159
loss: 1.0358436107635498,grad_norm: 0.965555954355834, iteration: 347160
loss: 1.0011563301086426,grad_norm: 0.6708204367913043, iteration: 347161
loss: 1.003135085105896,grad_norm: 0.7778767438496678, iteration: 347162
loss: 1.0204967260360718,grad_norm: 0.7875625367269877, iteration: 347163
loss: 0.9771022200584412,grad_norm: 0.7879776634589679, iteration: 347164
loss: 1.0141444206237793,grad_norm: 0.7244603326187117, iteration: 347165
loss: 0.9846575260162354,grad_norm: 0.9206890067372157, iteration: 347166
loss: 1.181947112083435,grad_norm: 0.9999993597790647, iteration: 347167
loss: 1.0002473592758179,grad_norm: 0.9999990718399023, iteration: 347168
loss: 0.9679239988327026,grad_norm: 0.960002069226541, iteration: 347169
loss: 1.0241895914077759,grad_norm: 0.9999991065306129, iteration: 347170
loss: 1.034820556640625,grad_norm: 0.88612901365293, iteration: 347171
loss: 1.0170718431472778,grad_norm: 0.8166787760449364, iteration: 347172
loss: 0.9891458749771118,grad_norm: 0.9269098831540561, iteration: 347173
loss: 0.9776349663734436,grad_norm: 0.8577986215909702, iteration: 347174
loss: 0.9890140295028687,grad_norm: 0.9079788072327855, iteration: 347175
loss: 0.9444301724433899,grad_norm: 0.8296728156032712, iteration: 347176
loss: 0.9716586470603943,grad_norm: 0.8022079787412953, iteration: 347177
loss: 0.9998850226402283,grad_norm: 0.9220090082698383, iteration: 347178
loss: 1.010338544845581,grad_norm: 0.7688678364353073, iteration: 347179
loss: 1.0057644844055176,grad_norm: 0.8175522177462115, iteration: 347180
loss: 1.0089722871780396,grad_norm: 0.9023810708621508, iteration: 347181
loss: 1.00831139087677,grad_norm: 0.9124859787632915, iteration: 347182
loss: 1.0104637145996094,grad_norm: 0.9769654081776536, iteration: 347183
loss: 1.0310564041137695,grad_norm: 0.7418083156201133, iteration: 347184
loss: 1.0262490510940552,grad_norm: 0.8390859724263735, iteration: 347185
loss: 0.9701172113418579,grad_norm: 0.9497876131631715, iteration: 347186
loss: 1.0045781135559082,grad_norm: 0.7838837156433427, iteration: 347187
loss: 0.9793615341186523,grad_norm: 0.8740809372580503, iteration: 347188
loss: 0.9977937340736389,grad_norm: 0.9999995544100773, iteration: 347189
loss: 1.0389724969863892,grad_norm: 0.7117664990013715, iteration: 347190
loss: 1.003832221031189,grad_norm: 0.8294375809232779, iteration: 347191
loss: 1.0106267929077148,grad_norm: 0.8220541452579291, iteration: 347192
loss: 0.9619011282920837,grad_norm: 0.9900790347915175, iteration: 347193
loss: 1.0503439903259277,grad_norm: 0.9999991383733241, iteration: 347194
loss: 0.9802342057228088,grad_norm: 0.992686390866153, iteration: 347195
loss: 1.0015943050384521,grad_norm: 0.8510432696676232, iteration: 347196
loss: 0.9899589419364929,grad_norm: 0.8741264272962095, iteration: 347197
loss: 0.9863520860671997,grad_norm: 0.717784446549718, iteration: 347198
loss: 0.9917909502983093,grad_norm: 0.925935588557117, iteration: 347199
loss: 0.966328501701355,grad_norm: 0.7570805560828706, iteration: 347200
loss: 1.0445202589035034,grad_norm: 0.8588981360424388, iteration: 347201
loss: 1.0126253366470337,grad_norm: 0.801025103052285, iteration: 347202
loss: 1.0072425603866577,grad_norm: 0.9690343723662593, iteration: 347203
loss: 1.021978735923767,grad_norm: 0.9999990761299071, iteration: 347204
loss: 0.9824091792106628,grad_norm: 0.7054215827197525, iteration: 347205
loss: 1.0157132148742676,grad_norm: 0.9150552266938884, iteration: 347206
loss: 1.001686453819275,grad_norm: 0.9253595764612771, iteration: 347207
loss: 0.9857540130615234,grad_norm: 0.7584040220013172, iteration: 347208
loss: 1.0028362274169922,grad_norm: 0.8477352930817937, iteration: 347209
loss: 0.9949120879173279,grad_norm: 0.7992540959746629, iteration: 347210
loss: 1.035742163658142,grad_norm: 0.9217381597362957, iteration: 347211
loss: 0.9715155363082886,grad_norm: 0.7766442321515046, iteration: 347212
loss: 0.9749504327774048,grad_norm: 0.66963404682996, iteration: 347213
loss: 0.9992550611495972,grad_norm: 0.6843163909496834, iteration: 347214
loss: 1.0295653343200684,grad_norm: 0.8345357414954547, iteration: 347215
loss: 0.9911866188049316,grad_norm: 0.8232293250717424, iteration: 347216
loss: 1.0609734058380127,grad_norm: 0.8596386765902823, iteration: 347217
loss: 1.0255929231643677,grad_norm: 0.7040462923978694, iteration: 347218
loss: 1.0282422304153442,grad_norm: 0.8348794423613899, iteration: 347219
loss: 0.9951000213623047,grad_norm: 0.8165905764307645, iteration: 347220
loss: 1.0000097751617432,grad_norm: 0.8810460773217582, iteration: 347221
loss: 0.9640950560569763,grad_norm: 0.8284933975863604, iteration: 347222
loss: 1.0174884796142578,grad_norm: 0.7731224470195244, iteration: 347223
loss: 1.018214225769043,grad_norm: 0.9123238737043027, iteration: 347224
loss: 0.9714360237121582,grad_norm: 0.7930705454169359, iteration: 347225
loss: 0.9777610301971436,grad_norm: 0.7558012803582259, iteration: 347226
loss: 0.9838690757751465,grad_norm: 0.8077086169018803, iteration: 347227
loss: 0.96844482421875,grad_norm: 0.7846634787277968, iteration: 347228
loss: 0.9581350088119507,grad_norm: 0.8155417338829599, iteration: 347229
loss: 0.9959542751312256,grad_norm: 0.9033544251234221, iteration: 347230
loss: 0.9629932045936584,grad_norm: 0.7424600388730432, iteration: 347231
loss: 0.9937112331390381,grad_norm: 0.8101048462265218, iteration: 347232
loss: 1.0147722959518433,grad_norm: 0.999999176416395, iteration: 347233
loss: 1.001358985900879,grad_norm: 0.7103782632403893, iteration: 347234
loss: 1.0201904773712158,grad_norm: 0.7582626072052276, iteration: 347235
loss: 0.9798924326896667,grad_norm: 0.8190252181091741, iteration: 347236
loss: 1.0360547304153442,grad_norm: 0.8560274426102386, iteration: 347237
loss: 0.9910428524017334,grad_norm: 0.8023285443128342, iteration: 347238
loss: 1.014461874961853,grad_norm: 0.8030116564954206, iteration: 347239
loss: 0.9821510910987854,grad_norm: 0.7232151025115556, iteration: 347240
loss: 1.0300813913345337,grad_norm: 0.8131201026867005, iteration: 347241
loss: 0.9744645953178406,grad_norm: 0.7677616946562674, iteration: 347242
loss: 0.9947611093521118,grad_norm: 0.8102579202428482, iteration: 347243
loss: 1.007976770401001,grad_norm: 0.8599974466251219, iteration: 347244
loss: 1.0164291858673096,grad_norm: 0.8028214286426623, iteration: 347245
loss: 0.973352313041687,grad_norm: 0.7291224031142819, iteration: 347246
loss: 0.9920503497123718,grad_norm: 0.902681581476962, iteration: 347247
loss: 0.961631715297699,grad_norm: 0.8935096858991228, iteration: 347248
loss: 0.9752665758132935,grad_norm: 0.6784310974683306, iteration: 347249
loss: 1.017273187637329,grad_norm: 0.8563502698273242, iteration: 347250
loss: 1.0077992677688599,grad_norm: 0.6424733456457872, iteration: 347251
loss: 0.9781431555747986,grad_norm: 0.8080708102304348, iteration: 347252
loss: 1.0031242370605469,grad_norm: 0.9999990973640898, iteration: 347253
loss: 0.9832136631011963,grad_norm: 0.8252246174275126, iteration: 347254
loss: 0.9959585666656494,grad_norm: 0.7643559615200352, iteration: 347255
loss: 0.9883036613464355,grad_norm: 0.999998933403738, iteration: 347256
loss: 1.0680434703826904,grad_norm: 0.8736577616578552, iteration: 347257
loss: 1.0213830471038818,grad_norm: 0.9113507900514106, iteration: 347258
loss: 0.9905326962471008,grad_norm: 0.7804581096949896, iteration: 347259
loss: 0.9609954953193665,grad_norm: 0.7557864402003913, iteration: 347260
loss: 1.0182812213897705,grad_norm: 0.8990069504337188, iteration: 347261
loss: 0.9999706745147705,grad_norm: 0.9769230596735083, iteration: 347262
loss: 1.0071309804916382,grad_norm: 0.7387929857131256, iteration: 347263
loss: 1.0282061100006104,grad_norm: 0.6990723779680071, iteration: 347264
loss: 1.0120437145233154,grad_norm: 0.8821321534345073, iteration: 347265
loss: 1.0241971015930176,grad_norm: 0.782122181351086, iteration: 347266
loss: 0.9913181066513062,grad_norm: 0.9999990625552043, iteration: 347267
loss: 0.9775089025497437,grad_norm: 0.796720575961382, iteration: 347268
loss: 1.0223290920257568,grad_norm: 0.8254913873170521, iteration: 347269
loss: 1.0373679399490356,grad_norm: 0.9442520627310352, iteration: 347270
loss: 1.0146719217300415,grad_norm: 0.9999995509991361, iteration: 347271
loss: 0.9572568535804749,grad_norm: 0.911623584930545, iteration: 347272
loss: 1.081404209136963,grad_norm: 0.9239703921330421, iteration: 347273
loss: 1.0021957159042358,grad_norm: 0.9402352863447331, iteration: 347274
loss: 1.0367331504821777,grad_norm: 0.999999145985648, iteration: 347275
loss: 0.9989665746688843,grad_norm: 0.8281743162466582, iteration: 347276
loss: 1.0498546361923218,grad_norm: 0.999999262985927, iteration: 347277
loss: 1.0025427341461182,grad_norm: 0.7233857504998968, iteration: 347278
loss: 0.9874139428138733,grad_norm: 0.6972613264228181, iteration: 347279
loss: 1.045926809310913,grad_norm: 0.8758163211848187, iteration: 347280
loss: 1.0295828580856323,grad_norm: 0.9540987060785586, iteration: 347281
loss: 1.0382272005081177,grad_norm: 0.8712753405935991, iteration: 347282
loss: 0.9590981602668762,grad_norm: 0.7272054820154147, iteration: 347283
loss: 1.021510362625122,grad_norm: 0.8255981535758738, iteration: 347284
loss: 1.0072834491729736,grad_norm: 0.7311540610974836, iteration: 347285
loss: 1.0211807489395142,grad_norm: 0.7713869106381598, iteration: 347286
loss: 1.005349040031433,grad_norm: 0.867090830557407, iteration: 347287
loss: 0.9468302726745605,grad_norm: 0.8208063664273688, iteration: 347288
loss: 0.9573504328727722,grad_norm: 0.8067465553464064, iteration: 347289
loss: 0.9694148898124695,grad_norm: 0.9999992182679873, iteration: 347290
loss: 1.009700059890747,grad_norm: 0.8335910583260164, iteration: 347291
loss: 0.9973775744438171,grad_norm: 0.6589880249687738, iteration: 347292
loss: 0.9857799410820007,grad_norm: 0.989461365434024, iteration: 347293
loss: 1.0100059509277344,grad_norm: 0.7354511297196391, iteration: 347294
loss: 0.9972135424613953,grad_norm: 0.7298762812865265, iteration: 347295
loss: 0.9850813150405884,grad_norm: 0.6897469955371438, iteration: 347296
loss: 0.9857686758041382,grad_norm: 0.7981293255408263, iteration: 347297
loss: 1.0221606492996216,grad_norm: 0.7169273023579634, iteration: 347298
loss: 1.0646684169769287,grad_norm: 0.9999989442773296, iteration: 347299
loss: 1.0012038946151733,grad_norm: 0.722487709381101, iteration: 347300
loss: 1.0040078163146973,grad_norm: 0.6757998122839446, iteration: 347301
loss: 0.9872633814811707,grad_norm: 0.9554471523466141, iteration: 347302
loss: 0.9809871315956116,grad_norm: 0.7426696204998496, iteration: 347303
loss: 1.0624191761016846,grad_norm: 0.9999993438775333, iteration: 347304
loss: 1.0027827024459839,grad_norm: 0.9999991209318339, iteration: 347305
loss: 0.9961898326873779,grad_norm: 0.8094998946712874, iteration: 347306
loss: 1.0024974346160889,grad_norm: 0.8812072793513078, iteration: 347307
loss: 1.0017346143722534,grad_norm: 0.7593754036432603, iteration: 347308
loss: 1.0171818733215332,grad_norm: 0.8172517847310707, iteration: 347309
loss: 1.0314024686813354,grad_norm: 0.9999997591299776, iteration: 347310
loss: 1.0021157264709473,grad_norm: 0.826661860293798, iteration: 347311
loss: 0.9923056364059448,grad_norm: 0.80835571102242, iteration: 347312
loss: 0.9709012508392334,grad_norm: 0.6900468279431284, iteration: 347313
loss: 0.9933990240097046,grad_norm: 0.7513475944442084, iteration: 347314
loss: 1.0059616565704346,grad_norm: 0.8455522464552444, iteration: 347315
loss: 0.9664842486381531,grad_norm: 0.7951628481388815, iteration: 347316
loss: 1.0344825983047485,grad_norm: 0.8092879249746348, iteration: 347317
loss: 0.9923149347305298,grad_norm: 0.7561041819247295, iteration: 347318
loss: 0.9878816604614258,grad_norm: 0.8698454128380816, iteration: 347319
loss: 1.0249030590057373,grad_norm: 0.7296422505505127, iteration: 347320
loss: 1.0451182126998901,grad_norm: 0.858329458452041, iteration: 347321
loss: 0.9591121673583984,grad_norm: 0.7505931884837812, iteration: 347322
loss: 0.9977124333381653,grad_norm: 0.715788594932343, iteration: 347323
loss: 1.0012069940567017,grad_norm: 0.7954239150765969, iteration: 347324
loss: 1.0159962177276611,grad_norm: 0.8199730434088289, iteration: 347325
loss: 0.9965248107910156,grad_norm: 0.7591043636834047, iteration: 347326
loss: 0.9777713418006897,grad_norm: 0.6565581419457143, iteration: 347327
loss: 1.0021494626998901,grad_norm: 0.7732244637652443, iteration: 347328
loss: 1.0293580293655396,grad_norm: 0.7829367414311218, iteration: 347329
loss: 1.0063974857330322,grad_norm: 0.8758034319145703, iteration: 347330
loss: 0.9972688555717468,grad_norm: 0.9999995552700827, iteration: 347331
loss: 0.9484139084815979,grad_norm: 0.8627771060345794, iteration: 347332
loss: 1.0449278354644775,grad_norm: 0.9951014896906208, iteration: 347333
loss: 0.9886549115180969,grad_norm: 0.8189596239021828, iteration: 347334
loss: 0.9942876696586609,grad_norm: 0.6776565271454894, iteration: 347335
loss: 1.008001446723938,grad_norm: 0.8636149443820619, iteration: 347336
loss: 0.9896358251571655,grad_norm: 0.8488063192633939, iteration: 347337
loss: 1.0045536756515503,grad_norm: 0.685849444721793, iteration: 347338
loss: 1.0351521968841553,grad_norm: 0.7450017401820715, iteration: 347339
loss: 0.9592686891555786,grad_norm: 0.761630920659477, iteration: 347340
loss: 1.0108468532562256,grad_norm: 0.8695228514918889, iteration: 347341
loss: 0.9817841649055481,grad_norm: 0.8504593404745527, iteration: 347342
loss: 1.0001354217529297,grad_norm: 0.8298275428927545, iteration: 347343
loss: 0.9911198616027832,grad_norm: 0.8553507937517292, iteration: 347344
loss: 0.9873517751693726,grad_norm: 0.8420105613593123, iteration: 347345
loss: 0.9686124920845032,grad_norm: 0.8767386638055938, iteration: 347346
loss: 1.024150013923645,grad_norm: 0.9999996935234043, iteration: 347347
loss: 1.0118411779403687,grad_norm: 0.9890946102937289, iteration: 347348
loss: 0.9922019839286804,grad_norm: 0.8839985463581809, iteration: 347349
loss: 0.9882949590682983,grad_norm: 0.8042594621477914, iteration: 347350
loss: 1.0060025453567505,grad_norm: 0.7889768759871292, iteration: 347351
loss: 1.0026028156280518,grad_norm: 0.9973091777855602, iteration: 347352
loss: 0.9993427395820618,grad_norm: 0.9999990830189315, iteration: 347353
loss: 0.9934113025665283,grad_norm: 0.9451672925201788, iteration: 347354
loss: 1.0125494003295898,grad_norm: 0.6789704682532276, iteration: 347355
loss: 1.0107005834579468,grad_norm: 0.8151802536916416, iteration: 347356
loss: 1.0374579429626465,grad_norm: 0.9906094492002033, iteration: 347357
loss: 1.04971182346344,grad_norm: 0.9999997768288523, iteration: 347358
loss: 1.0184643268585205,grad_norm: 0.8556552450402697, iteration: 347359
loss: 1.0101075172424316,grad_norm: 0.7266709178853671, iteration: 347360
loss: 1.0039395093917847,grad_norm: 0.7919135587300046, iteration: 347361
loss: 1.047963261604309,grad_norm: 0.9602733256878504, iteration: 347362
loss: 1.0058188438415527,grad_norm: 0.9197024336853983, iteration: 347363
loss: 1.0609694719314575,grad_norm: 0.9999999433064669, iteration: 347364
loss: 1.036120057106018,grad_norm: 1.0000000557046707, iteration: 347365
loss: 1.1248408555984497,grad_norm: 0.9681979823345945, iteration: 347366
loss: 0.9478510618209839,grad_norm: 0.8460630770809705, iteration: 347367
loss: 1.0124320983886719,grad_norm: 0.8246192371531026, iteration: 347368
loss: 0.9815146327018738,grad_norm: 0.9999990271439511, iteration: 347369
loss: 0.9858928918838501,grad_norm: 0.7529418105319189, iteration: 347370
loss: 0.9718597531318665,grad_norm: 0.740244141153428, iteration: 347371
loss: 0.9898785352706909,grad_norm: 0.9319434251782756, iteration: 347372
loss: 1.0188801288604736,grad_norm: 0.8066023100773244, iteration: 347373
loss: 0.9848347902297974,grad_norm: 0.8954595336699914, iteration: 347374
loss: 1.0031259059906006,grad_norm: 0.9483722517899075, iteration: 347375
loss: 1.0060385465621948,grad_norm: 0.9480338972391714, iteration: 347376
loss: 0.9754632115364075,grad_norm: 0.8209819894403207, iteration: 347377
loss: 0.9807876944541931,grad_norm: 0.8126339935998003, iteration: 347378
loss: 0.9888019561767578,grad_norm: 0.8785687228510289, iteration: 347379
loss: 1.0183333158493042,grad_norm: 0.9139240995764611, iteration: 347380
loss: 0.9797267913818359,grad_norm: 0.8342828888869679, iteration: 347381
loss: 1.0308589935302734,grad_norm: 0.7960700688672729, iteration: 347382
loss: 0.9855687022209167,grad_norm: 0.7576833653143232, iteration: 347383
loss: 0.9771450757980347,grad_norm: 0.8290558359812321, iteration: 347384
loss: 1.0422393083572388,grad_norm: 0.7916994329137713, iteration: 347385
loss: 0.9841504693031311,grad_norm: 0.8790929046170132, iteration: 347386
loss: 1.0407545566558838,grad_norm: 0.8699347727982444, iteration: 347387
loss: 1.0013351440429688,grad_norm: 0.847962661468645, iteration: 347388
loss: 1.0207427740097046,grad_norm: 0.6679209790173306, iteration: 347389
loss: 1.0139943361282349,grad_norm: 0.7060938668220089, iteration: 347390
loss: 1.0422723293304443,grad_norm: 0.9999994969021755, iteration: 347391
loss: 1.032652497291565,grad_norm: 0.9999998512092183, iteration: 347392
loss: 0.9586926102638245,grad_norm: 0.8621331410378098, iteration: 347393
loss: 0.9640563726425171,grad_norm: 0.9435409957486152, iteration: 347394
loss: 0.9683464765548706,grad_norm: 0.8713029004318774, iteration: 347395
loss: 1.0270556211471558,grad_norm: 0.8653155828225486, iteration: 347396
loss: 0.9862242341041565,grad_norm: 0.6633179250824344, iteration: 347397
loss: 0.9920420050621033,grad_norm: 0.9999991608185712, iteration: 347398
loss: 0.9939818978309631,grad_norm: 0.8746145392508073, iteration: 347399
loss: 1.0820140838623047,grad_norm: 0.8384329490180241, iteration: 347400
loss: 0.9812259078025818,grad_norm: 0.9999998801082906, iteration: 347401
loss: 1.0716463327407837,grad_norm: 0.9999999056778246, iteration: 347402
loss: 0.9854028820991516,grad_norm: 0.7840344290977906, iteration: 347403
loss: 0.9813964366912842,grad_norm: 0.8305503634486484, iteration: 347404
loss: 1.0399054288864136,grad_norm: 0.9999991019984096, iteration: 347405
loss: 0.999666690826416,grad_norm: 0.9430126252599497, iteration: 347406
loss: 1.0009913444519043,grad_norm: 0.7800016674349085, iteration: 347407
loss: 1.0247784852981567,grad_norm: 0.7928716498753168, iteration: 347408
loss: 1.0041728019714355,grad_norm: 0.9570494686697419, iteration: 347409
loss: 1.036032795906067,grad_norm: 0.9847975326332156, iteration: 347410
loss: 1.0050272941589355,grad_norm: 0.7359973114941349, iteration: 347411
loss: 1.0275599956512451,grad_norm: 0.7854604915420503, iteration: 347412
loss: 0.9856348633766174,grad_norm: 0.9999992943925149, iteration: 347413
loss: 0.9735147953033447,grad_norm: 0.8811291373733844, iteration: 347414
loss: 1.0058120489120483,grad_norm: 0.8076365201053082, iteration: 347415
loss: 1.0113857984542847,grad_norm: 0.7306324495947402, iteration: 347416
loss: 1.004010558128357,grad_norm: 0.6360819300330142, iteration: 347417
loss: 1.0210338830947876,grad_norm: 0.8034073142629955, iteration: 347418
loss: 0.9954870343208313,grad_norm: 0.738535287590297, iteration: 347419
loss: 0.9894565343856812,grad_norm: 0.9999994613690384, iteration: 347420
loss: 0.9728929996490479,grad_norm: 0.7179650927618555, iteration: 347421
loss: 0.9863383769989014,grad_norm: 0.8041849802048205, iteration: 347422
loss: 1.0004388093948364,grad_norm: 0.7610591619961015, iteration: 347423
loss: 1.0638036727905273,grad_norm: 0.7562178169874347, iteration: 347424
loss: 1.0334699153900146,grad_norm: 0.8706841000610761, iteration: 347425
loss: 0.9639943242073059,grad_norm: 0.9531085119593075, iteration: 347426
loss: 1.0139473676681519,grad_norm: 0.8784931026871957, iteration: 347427
loss: 0.972195565700531,grad_norm: 0.8411590544488458, iteration: 347428
loss: 1.003735065460205,grad_norm: 0.9014321589343457, iteration: 347429
loss: 0.9939167499542236,grad_norm: 0.7689368324140327, iteration: 347430
loss: 0.9733980894088745,grad_norm: 0.8164385858963269, iteration: 347431
loss: 0.9800835251808167,grad_norm: 0.8861128146249303, iteration: 347432
loss: 1.0155887603759766,grad_norm: 0.8419946927111684, iteration: 347433
loss: 1.0583090782165527,grad_norm: 0.8360751270587304, iteration: 347434
loss: 0.9992987513542175,grad_norm: 0.8934040700177258, iteration: 347435
loss: 1.0053510665893555,grad_norm: 0.9999992959058389, iteration: 347436
loss: 0.9944713115692139,grad_norm: 0.8604875720434197, iteration: 347437
loss: 1.018093466758728,grad_norm: 0.8611659235861862, iteration: 347438
loss: 1.0255470275878906,grad_norm: 0.798088914202826, iteration: 347439
loss: 0.9690175652503967,grad_norm: 0.8019082970675191, iteration: 347440
loss: 1.0652353763580322,grad_norm: 0.8388337104194092, iteration: 347441
loss: 1.0009665489196777,grad_norm: 0.9056931797196488, iteration: 347442
loss: 1.0201040506362915,grad_norm: 0.727694046104245, iteration: 347443
loss: 0.9968107342720032,grad_norm: 0.9999991989542842, iteration: 347444
loss: 0.9535048604011536,grad_norm: 0.9056517310492583, iteration: 347445
loss: 0.9450833201408386,grad_norm: 0.9036691577109845, iteration: 347446
loss: 0.9826647639274597,grad_norm: 0.9029850130366808, iteration: 347447
loss: 0.9982078671455383,grad_norm: 0.8328988551587286, iteration: 347448
loss: 0.9617235064506531,grad_norm: 0.8058395134558108, iteration: 347449
loss: 0.9850196838378906,grad_norm: 0.6970675670554204, iteration: 347450
loss: 0.9699098467826843,grad_norm: 0.7715987174786358, iteration: 347451
loss: 1.0154668092727661,grad_norm: 0.9999993701003653, iteration: 347452
loss: 1.0125902891159058,grad_norm: 0.9698020575126726, iteration: 347453
loss: 1.000614881515503,grad_norm: 0.7837795979433414, iteration: 347454
loss: 1.0012160539627075,grad_norm: 0.9105413671830456, iteration: 347455
loss: 0.9546417593955994,grad_norm: 0.8053407282620026, iteration: 347456
loss: 0.9760692119598389,grad_norm: 0.8280857259046437, iteration: 347457
loss: 1.0092742443084717,grad_norm: 0.7851540912291043, iteration: 347458
loss: 1.0021166801452637,grad_norm: 0.8758037832399783, iteration: 347459
loss: 1.0437047481536865,grad_norm: 0.8837591675878674, iteration: 347460
loss: 1.0055233240127563,grad_norm: 0.7693128967714479, iteration: 347461
loss: 1.01148521900177,grad_norm: 0.8309470793593536, iteration: 347462
loss: 1.0114635229110718,grad_norm: 0.9615751641377639, iteration: 347463
loss: 0.9962543845176697,grad_norm: 0.6760331054643349, iteration: 347464
loss: 0.9858661890029907,grad_norm: 0.8097138682525732, iteration: 347465
loss: 0.9801998734474182,grad_norm: 0.9798888302279782, iteration: 347466
loss: 0.9880032539367676,grad_norm: 0.9883203688730067, iteration: 347467
loss: 1.0051825046539307,grad_norm: 0.8142065943881481, iteration: 347468
loss: 1.0352413654327393,grad_norm: 0.7187812337961438, iteration: 347469
loss: 0.9613910913467407,grad_norm: 0.8473471336547258, iteration: 347470
loss: 0.9770886301994324,grad_norm: 0.7364456659538392, iteration: 347471
loss: 1.0263491868972778,grad_norm: 0.9342295869204831, iteration: 347472
loss: 1.0006675720214844,grad_norm: 0.7068327919429122, iteration: 347473
loss: 0.9945685267448425,grad_norm: 0.9959020143298526, iteration: 347474
loss: 1.0241284370422363,grad_norm: 0.7875656302310629, iteration: 347475
loss: 1.002668857574463,grad_norm: 0.7271238956453732, iteration: 347476
loss: 0.9766457080841064,grad_norm: 0.7278943334887028, iteration: 347477
loss: 0.973763644695282,grad_norm: 0.8761334923813446, iteration: 347478
loss: 1.0717273950576782,grad_norm: 0.7830689279654784, iteration: 347479
loss: 0.9325898289680481,grad_norm: 0.9322433340670042, iteration: 347480
loss: 1.0056830644607544,grad_norm: 0.7515283951680459, iteration: 347481
loss: 1.004977822303772,grad_norm: 0.7297486426973514, iteration: 347482
loss: 1.0145645141601562,grad_norm: 0.8139334639557091, iteration: 347483
loss: 0.9621535539627075,grad_norm: 0.8737904756919969, iteration: 347484
loss: 1.0161325931549072,grad_norm: 0.9415379351304953, iteration: 347485
loss: 1.0087915658950806,grad_norm: 0.9228552897279306, iteration: 347486
loss: 0.9849521517753601,grad_norm: 0.6945490471914221, iteration: 347487
loss: 0.9921308159828186,grad_norm: 0.7558055582025378, iteration: 347488
loss: 1.0467729568481445,grad_norm: 0.9999992850643579, iteration: 347489
loss: 0.9666911959648132,grad_norm: 0.9999991306254222, iteration: 347490
loss: 1.0010532140731812,grad_norm: 0.9357120461272569, iteration: 347491
loss: 1.0185095071792603,grad_norm: 0.9191685427926982, iteration: 347492
loss: 0.971503496170044,grad_norm: 0.7347971387544964, iteration: 347493
loss: 1.019370198249817,grad_norm: 0.7971127577981546, iteration: 347494
loss: 1.0143296718597412,grad_norm: 0.9999994169621174, iteration: 347495
loss: 0.9899888634681702,grad_norm: 0.7684168671613935, iteration: 347496
loss: 1.0244102478027344,grad_norm: 0.8612962778061652, iteration: 347497
loss: 1.0263144969940186,grad_norm: 0.8823796846965838, iteration: 347498
loss: 0.9846485257148743,grad_norm: 0.8582672999413876, iteration: 347499
loss: 0.9664129614830017,grad_norm: 0.7845679970955193, iteration: 347500
loss: 1.0417605638504028,grad_norm: 0.8941747569378947, iteration: 347501
loss: 1.0306774377822876,grad_norm: 0.8595079782433701, iteration: 347502
loss: 1.0406789779663086,grad_norm: 0.8636104168272832, iteration: 347503
loss: 0.9987154603004456,grad_norm: 0.8254432791117774, iteration: 347504
loss: 1.0004122257232666,grad_norm: 0.864580208234832, iteration: 347505
loss: 0.9864065647125244,grad_norm: 0.8652925764743393, iteration: 347506
loss: 0.9898638129234314,grad_norm: 0.67289130952507, iteration: 347507
loss: 1.0078924894332886,grad_norm: 0.8641385016384795, iteration: 347508
loss: 1.0554282665252686,grad_norm: 0.9999989747625111, iteration: 347509
loss: 0.9713435769081116,grad_norm: 0.9999992955481083, iteration: 347510
loss: 1.0209157466888428,grad_norm: 0.7995928784747418, iteration: 347511
loss: 0.9860548377037048,grad_norm: 0.7595198613623195, iteration: 347512
loss: 0.9978066682815552,grad_norm: 0.911286863914023, iteration: 347513
loss: 0.9774909615516663,grad_norm: 0.6812228186231649, iteration: 347514
loss: 1.0531704425811768,grad_norm: 0.9999990865135666, iteration: 347515
loss: 1.0047962665557861,grad_norm: 0.9557667425694174, iteration: 347516
loss: 0.9895575046539307,grad_norm: 0.7796310050376279, iteration: 347517
loss: 1.0524524450302124,grad_norm: 0.8470824186418836, iteration: 347518
loss: 0.9818601608276367,grad_norm: 0.8433426801054592, iteration: 347519
loss: 0.9954522252082825,grad_norm: 0.8898957311367972, iteration: 347520
loss: 0.9891815185546875,grad_norm: 0.8708747882311185, iteration: 347521
loss: 1.0773775577545166,grad_norm: 0.9999994528144621, iteration: 347522
loss: 0.9846155047416687,grad_norm: 0.6959985050337187, iteration: 347523
loss: 1.0367320775985718,grad_norm: 0.7879161024347126, iteration: 347524
loss: 0.980251669883728,grad_norm: 0.7712995339025862, iteration: 347525
loss: 1.0055240392684937,grad_norm: 0.7109278967157725, iteration: 347526
loss: 0.9705464243888855,grad_norm: 0.8571329911643171, iteration: 347527
loss: 0.9860402941703796,grad_norm: 0.8714199785387514, iteration: 347528
loss: 1.024061679840088,grad_norm: 0.9999999470726834, iteration: 347529
loss: 1.0116419792175293,grad_norm: 0.7222174081616501, iteration: 347530
loss: 0.9843841791152954,grad_norm: 0.6959384048106352, iteration: 347531
loss: 0.9820632338523865,grad_norm: 0.9289920840762714, iteration: 347532
loss: 0.9817481637001038,grad_norm: 0.8214757149989016, iteration: 347533
loss: 1.0328538417816162,grad_norm: 0.9238532760459511, iteration: 347534
loss: 1.0725955963134766,grad_norm: 0.9574484995931907, iteration: 347535
loss: 1.0310765504837036,grad_norm: 0.8377090831268069, iteration: 347536
loss: 1.0247883796691895,grad_norm: 0.9999998585536003, iteration: 347537
loss: 1.00980806350708,grad_norm: 0.7421393777481006, iteration: 347538
loss: 1.0284534692764282,grad_norm: 0.8301438351172868, iteration: 347539
loss: 0.9784621596336365,grad_norm: 0.7293473109975713, iteration: 347540
loss: 1.003448247909546,grad_norm: 0.9000213284895587, iteration: 347541
loss: 1.0572110414505005,grad_norm: 0.7962350525570968, iteration: 347542
loss: 1.0170289278030396,grad_norm: 0.8530073602413256, iteration: 347543
loss: 0.9745304584503174,grad_norm: 0.9999989794635168, iteration: 347544
loss: 1.0242207050323486,grad_norm: 0.8153238400832226, iteration: 347545
loss: 0.9871302843093872,grad_norm: 0.7713758768607535, iteration: 347546
loss: 0.9822800755500793,grad_norm: 0.7505309048793871, iteration: 347547
loss: 0.9533422589302063,grad_norm: 0.8142164465052456, iteration: 347548
loss: 1.0020121335983276,grad_norm: 0.9287152995897088, iteration: 347549
loss: 0.9756044745445251,grad_norm: 0.7814792046886523, iteration: 347550
loss: 0.9878457188606262,grad_norm: 0.8408954533761067, iteration: 347551
loss: 1.0031096935272217,grad_norm: 0.7891777675428845, iteration: 347552
loss: 1.0023589134216309,grad_norm: 0.9999989895175653, iteration: 347553
loss: 1.0141652822494507,grad_norm: 0.7359557213730975, iteration: 347554
loss: 1.0094244480133057,grad_norm: 0.8048613472730205, iteration: 347555
loss: 1.0016692876815796,grad_norm: 0.9999992200441538, iteration: 347556
loss: 1.0325732231140137,grad_norm: 0.999999098811089, iteration: 347557
loss: 1.0090556144714355,grad_norm: 0.8256120497882843, iteration: 347558
loss: 1.097406268119812,grad_norm: 0.9999990714756016, iteration: 347559
loss: 0.963939905166626,grad_norm: 0.9231141598111168, iteration: 347560
loss: 1.0182619094848633,grad_norm: 0.8523834097724241, iteration: 347561
loss: 0.9901749491691589,grad_norm: 0.7854860522444836, iteration: 347562
loss: 1.035380482673645,grad_norm: 0.8359157745489438, iteration: 347563
loss: 0.9805636405944824,grad_norm: 0.7324200406649142, iteration: 347564
loss: 0.9776884913444519,grad_norm: 0.7695491847211148, iteration: 347565
loss: 1.0136656761169434,grad_norm: 0.9695090631862257, iteration: 347566
loss: 0.9849075078964233,grad_norm: 0.7502722794837255, iteration: 347567
loss: 0.99785315990448,grad_norm: 0.8485644404341629, iteration: 347568
loss: 0.9943455457687378,grad_norm: 0.9999993116300638, iteration: 347569
loss: 0.9976492524147034,grad_norm: 0.708128909748642, iteration: 347570
loss: 1.0042003393173218,grad_norm: 0.7891735279809935, iteration: 347571
loss: 1.009663701057434,grad_norm: 0.9999999299396017, iteration: 347572
loss: 0.974242091178894,grad_norm: 0.8460247124334376, iteration: 347573
loss: 1.0379000902175903,grad_norm: 0.8780562159234222, iteration: 347574
loss: 0.965173065662384,grad_norm: 0.736820301148384, iteration: 347575
loss: 0.9890809059143066,grad_norm: 0.9013330367544012, iteration: 347576
loss: 1.0105619430541992,grad_norm: 0.8334878243661474, iteration: 347577
loss: 0.9924273490905762,grad_norm: 0.8260650313621881, iteration: 347578
loss: 1.0024908781051636,grad_norm: 0.8311820886569815, iteration: 347579
loss: 1.0443813800811768,grad_norm: 0.9999990970842764, iteration: 347580
loss: 0.9948649406433105,grad_norm: 0.789268839126344, iteration: 347581
loss: 1.036928653717041,grad_norm: 0.9999996105139789, iteration: 347582
loss: 1.003476858139038,grad_norm: 0.8023356742832424, iteration: 347583
loss: 1.0042918920516968,grad_norm: 0.9439897756077911, iteration: 347584
loss: 1.0176684856414795,grad_norm: 0.9473973227957202, iteration: 347585
loss: 1.0527116060256958,grad_norm: 0.9999990283353137, iteration: 347586
loss: 0.9844059348106384,grad_norm: 0.6917227820145925, iteration: 347587
loss: 0.9890533685684204,grad_norm: 0.7772120377067544, iteration: 347588
loss: 1.0229636430740356,grad_norm: 0.8472256004105981, iteration: 347589
loss: 0.9539093375205994,grad_norm: 0.8349259952899714, iteration: 347590
loss: 1.0257859230041504,grad_norm: 0.8883069105525508, iteration: 347591
loss: 0.981415867805481,grad_norm: 0.9689914269749775, iteration: 347592
loss: 0.9913033246994019,grad_norm: 0.8454387284594037, iteration: 347593
loss: 0.991249680519104,grad_norm: 0.7671351210330715, iteration: 347594
loss: 0.9694779515266418,grad_norm: 0.8417436288948639, iteration: 347595
loss: 0.9926590919494629,grad_norm: 0.8400239428306706, iteration: 347596
loss: 1.0084774494171143,grad_norm: 0.764272422743231, iteration: 347597
loss: 0.9776315093040466,grad_norm: 0.9999990660454018, iteration: 347598
loss: 1.0000783205032349,grad_norm: 0.852363340108002, iteration: 347599
loss: 1.041140079498291,grad_norm: 0.9999993206064006, iteration: 347600
loss: 1.0786970853805542,grad_norm: 0.8608256385194499, iteration: 347601
loss: 1.000609278678894,grad_norm: 0.7982445133474942, iteration: 347602
loss: 1.0203359127044678,grad_norm: 0.6526832953606766, iteration: 347603
loss: 0.9886537194252014,grad_norm: 0.7548856613332545, iteration: 347604
loss: 0.9548973441123962,grad_norm: 0.9539992731230519, iteration: 347605
loss: 1.0335798263549805,grad_norm: 0.9208263916346845, iteration: 347606
loss: 1.0313020944595337,grad_norm: 0.999999303450912, iteration: 347607
loss: 0.991033673286438,grad_norm: 0.8068037905913129, iteration: 347608
loss: 1.0278891324996948,grad_norm: 0.850612697166744, iteration: 347609
loss: 1.025126576423645,grad_norm: 0.8352097358781613, iteration: 347610
loss: 1.0116609334945679,grad_norm: 0.7233557422678815, iteration: 347611
loss: 0.9883492588996887,grad_norm: 0.822149738307307, iteration: 347612
loss: 1.0249441862106323,grad_norm: 0.8047083060303142, iteration: 347613
loss: 0.9991030097007751,grad_norm: 0.6914554444324161, iteration: 347614
loss: 0.989516019821167,grad_norm: 0.8481539961973547, iteration: 347615
loss: 0.9966461658477783,grad_norm: 0.80668400918492, iteration: 347616
loss: 1.0140067338943481,grad_norm: 0.8147474801012285, iteration: 347617
loss: 1.0384687185287476,grad_norm: 0.9463902142484982, iteration: 347618
loss: 0.9545212388038635,grad_norm: 0.9289372476910615, iteration: 347619
loss: 1.025749683380127,grad_norm: 0.7965733727629019, iteration: 347620
loss: 0.9732934236526489,grad_norm: 0.9977384133090438, iteration: 347621
loss: 1.0500298738479614,grad_norm: 0.8017815335173621, iteration: 347622
loss: 0.9819953441619873,grad_norm: 0.8319622270592821, iteration: 347623
loss: 1.0187435150146484,grad_norm: 0.8851381837187243, iteration: 347624
loss: 1.0660134553909302,grad_norm: 0.8603073076322265, iteration: 347625
loss: 1.0353995561599731,grad_norm: 0.8495514589577778, iteration: 347626
loss: 0.9606154561042786,grad_norm: 0.849864893550431, iteration: 347627
loss: 0.9813922643661499,grad_norm: 0.883367064203324, iteration: 347628
loss: 0.9965196251869202,grad_norm: 0.8759294579201783, iteration: 347629
loss: 0.9947996139526367,grad_norm: 0.8787040088191187, iteration: 347630
loss: 0.978909969329834,grad_norm: 0.8356070372031981, iteration: 347631
loss: 0.9904312491416931,grad_norm: 0.6153104795699801, iteration: 347632
loss: 1.017215609550476,grad_norm: 0.9999990642897535, iteration: 347633
loss: 1.0237786769866943,grad_norm: 0.9999997759556305, iteration: 347634
loss: 1.2178425788879395,grad_norm: 0.9999997132913173, iteration: 347635
loss: 0.9921983480453491,grad_norm: 0.8840307422391969, iteration: 347636
loss: 1.007369041442871,grad_norm: 0.9325524898353501, iteration: 347637
loss: 1.0099999904632568,grad_norm: 0.9999991287877436, iteration: 347638
loss: 0.9811946153640747,grad_norm: 0.7355589992246224, iteration: 347639
loss: 0.9945853352546692,grad_norm: 0.7842792889039149, iteration: 347640
loss: 0.9694356322288513,grad_norm: 0.8484615372995097, iteration: 347641
loss: 0.9403852224349976,grad_norm: 0.810517698344507, iteration: 347642
loss: 0.9944948554039001,grad_norm: 0.8282787944694292, iteration: 347643
loss: 1.029075026512146,grad_norm: 0.8618129264814213, iteration: 347644
loss: 0.9632689356803894,grad_norm: 0.8078313340226699, iteration: 347645
loss: 0.9986078143119812,grad_norm: 0.7710207802521286, iteration: 347646
loss: 1.051247239112854,grad_norm: 0.8059362851201347, iteration: 347647
loss: 1.0282962322235107,grad_norm: 0.9784584782254655, iteration: 347648
loss: 0.9557262659072876,grad_norm: 0.8759957956171007, iteration: 347649
loss: 1.0216176509857178,grad_norm: 0.9449474312602809, iteration: 347650
loss: 0.9742173552513123,grad_norm: 0.8080270642301954, iteration: 347651
loss: 0.9700387716293335,grad_norm: 0.867065741981708, iteration: 347652
loss: 1.0829652547836304,grad_norm: 0.9999994020150201, iteration: 347653
loss: 1.0682480335235596,grad_norm: 0.8033129039930998, iteration: 347654
loss: 1.0281213521957397,grad_norm: 0.8826024077723846, iteration: 347655
loss: 1.0069466829299927,grad_norm: 0.8575145637967002, iteration: 347656
loss: 0.9894246459007263,grad_norm: 0.9999991557817308, iteration: 347657
loss: 1.0838723182678223,grad_norm: 0.9999995595814981, iteration: 347658
loss: 0.9913495182991028,grad_norm: 0.7562691586731679, iteration: 347659
loss: 1.0427417755126953,grad_norm: 0.9999991327758635, iteration: 347660
loss: 1.0121824741363525,grad_norm: 0.7806574732488897, iteration: 347661
loss: 0.9990718364715576,grad_norm: 0.9999990750809659, iteration: 347662
loss: 0.9854826331138611,grad_norm: 0.7241769574612976, iteration: 347663
loss: 0.9889118075370789,grad_norm: 0.99999976414806, iteration: 347664
loss: 1.007881760597229,grad_norm: 0.9999991114606138, iteration: 347665
loss: 1.012658953666687,grad_norm: 0.8507653778261381, iteration: 347666
loss: 0.9858831167221069,grad_norm: 0.9999991467203411, iteration: 347667
loss: 1.0371930599212646,grad_norm: 0.9594435038398672, iteration: 347668
loss: 1.0236401557922363,grad_norm: 0.8020429250440513, iteration: 347669
loss: 0.960792601108551,grad_norm: 0.9703984081514192, iteration: 347670
loss: 1.0177723169326782,grad_norm: 0.7390704544241874, iteration: 347671
loss: 1.0842616558074951,grad_norm: 0.9999995208896022, iteration: 347672
loss: 1.0056339502334595,grad_norm: 0.731908638875876, iteration: 347673
loss: 0.9820435643196106,grad_norm: 0.8338426364394068, iteration: 347674
loss: 0.9994069337844849,grad_norm: 0.9999991005010294, iteration: 347675
loss: 1.0106093883514404,grad_norm: 0.8150986750514613, iteration: 347676
loss: 1.0349066257476807,grad_norm: 0.7754020318902046, iteration: 347677
loss: 1.0181912183761597,grad_norm: 0.7748853234009371, iteration: 347678
loss: 0.985816240310669,grad_norm: 0.9316337337472037, iteration: 347679
loss: 0.9487188458442688,grad_norm: 0.8357581630720184, iteration: 347680
loss: 1.020633578300476,grad_norm: 0.7637379045256202, iteration: 347681
loss: 1.014041781425476,grad_norm: 0.977577059836582, iteration: 347682
loss: 1.0255118608474731,grad_norm: 0.9999989888300875, iteration: 347683
loss: 1.0365480184555054,grad_norm: 0.9999991687510679, iteration: 347684
loss: 1.0110787153244019,grad_norm: 0.7844759339045047, iteration: 347685
loss: 0.9822307229042053,grad_norm: 0.9999999070546184, iteration: 347686
loss: 1.0187259912490845,grad_norm: 0.899304230865885, iteration: 347687
loss: 0.9786928296089172,grad_norm: 0.8596362947036797, iteration: 347688
loss: 1.0012233257293701,grad_norm: 0.9975575857134942, iteration: 347689
loss: 1.0047849416732788,grad_norm: 0.7526248119072084, iteration: 347690
loss: 1.0147297382354736,grad_norm: 0.999999596313103, iteration: 347691
loss: 0.9819828867912292,grad_norm: 0.8372318218840472, iteration: 347692
loss: 1.0022903680801392,grad_norm: 0.8760512443557225, iteration: 347693
loss: 1.0112704038619995,grad_norm: 0.8869827020939589, iteration: 347694
loss: 1.0367910861968994,grad_norm: 0.8053680772518806, iteration: 347695
loss: 0.9956408739089966,grad_norm: 0.9999992682110523, iteration: 347696
loss: 1.0072640180587769,grad_norm: 0.7718136519972237, iteration: 347697
loss: 0.9756806492805481,grad_norm: 0.8609401401815204, iteration: 347698
loss: 1.420707106590271,grad_norm: 0.9999999518532944, iteration: 347699
loss: 1.0061473846435547,grad_norm: 0.7188171724731846, iteration: 347700
loss: 0.996049702167511,grad_norm: 0.8397902913822245, iteration: 347701
loss: 0.9797249436378479,grad_norm: 0.8982207498915238, iteration: 347702
loss: 1.0118204355239868,grad_norm: 0.9999996584184367, iteration: 347703
loss: 0.9825775027275085,grad_norm: 0.9224065645753655, iteration: 347704
loss: 1.0186927318572998,grad_norm: 0.9633044360064315, iteration: 347705
loss: 0.9797222018241882,grad_norm: 0.8192215030453717, iteration: 347706
loss: 1.0768314599990845,grad_norm: 0.975876215662783, iteration: 347707
loss: 1.0356167554855347,grad_norm: 0.866159354100753, iteration: 347708
loss: 1.0334745645523071,grad_norm: 0.9999993682851971, iteration: 347709
loss: 1.0101841688156128,grad_norm: 0.894766524711809, iteration: 347710
loss: 0.9728615283966064,grad_norm: 0.999999195662548, iteration: 347711
loss: 1.013418197631836,grad_norm: 0.7660051855448943, iteration: 347712
loss: 0.9863117933273315,grad_norm: 0.872420375846998, iteration: 347713
loss: 0.9981679916381836,grad_norm: 0.9999992199930263, iteration: 347714
loss: 0.988973081111908,grad_norm: 0.9999991943060791, iteration: 347715
loss: 0.9882941246032715,grad_norm: 0.9694560477897679, iteration: 347716
loss: 0.9811562895774841,grad_norm: 0.9033038316218033, iteration: 347717
loss: 1.091614842414856,grad_norm: 0.9999995118874915, iteration: 347718
loss: 1.0065271854400635,grad_norm: 0.7894022940724126, iteration: 347719
loss: 1.0080533027648926,grad_norm: 0.9504070342864998, iteration: 347720
loss: 0.9913466572761536,grad_norm: 0.6411426157500293, iteration: 347721
loss: 1.0582494735717773,grad_norm: 0.8676405951574163, iteration: 347722
loss: 1.0492894649505615,grad_norm: 0.9999990915065256, iteration: 347723
loss: 0.9856613278388977,grad_norm: 0.7823013184922285, iteration: 347724
loss: 1.0013929605484009,grad_norm: 0.9999993359310415, iteration: 347725
loss: 1.034138798713684,grad_norm: 0.8091401695494278, iteration: 347726
loss: 1.0410029888153076,grad_norm: 0.9999994371270542, iteration: 347727
loss: 1.0014816522598267,grad_norm: 0.7245936846076534, iteration: 347728
loss: 0.9910511374473572,grad_norm: 0.9999989984490476, iteration: 347729
loss: 1.0097650289535522,grad_norm: 0.9999993262428666, iteration: 347730
loss: 0.9775015711784363,grad_norm: 0.8317281278254762, iteration: 347731
loss: 0.993929386138916,grad_norm: 0.8889522256347678, iteration: 347732
loss: 1.0434422492980957,grad_norm: 0.9999990890927194, iteration: 347733
loss: 1.02117121219635,grad_norm: 0.7568309015285904, iteration: 347734
loss: 1.0324153900146484,grad_norm: 0.8653876112616371, iteration: 347735
loss: 0.9855255484580994,grad_norm: 0.6776194632957512, iteration: 347736
loss: 1.0096057653427124,grad_norm: 0.719273223153235, iteration: 347737
loss: 1.0738413333892822,grad_norm: 0.8868441002422278, iteration: 347738
loss: 1.0068110227584839,grad_norm: 0.917701168218615, iteration: 347739
loss: 0.9984073638916016,grad_norm: 0.8132274041165369, iteration: 347740
loss: 1.0138893127441406,grad_norm: 0.7210180145963879, iteration: 347741
loss: 1.0016099214553833,grad_norm: 0.8936926544425134, iteration: 347742
loss: 0.9870016574859619,grad_norm: 0.8523488420404892, iteration: 347743
loss: 0.9361199736595154,grad_norm: 0.7885732797172567, iteration: 347744
loss: 1.1946508884429932,grad_norm: 0.9999997057899024, iteration: 347745
loss: 1.1569141149520874,grad_norm: 0.9589405401952528, iteration: 347746
loss: 0.9960926175117493,grad_norm: 0.8234839551840147, iteration: 347747
loss: 0.9582042694091797,grad_norm: 0.8434865243147845, iteration: 347748
loss: 1.0538067817687988,grad_norm: 0.9999993960153634, iteration: 347749
loss: 1.0161513090133667,grad_norm: 0.8045858692743276, iteration: 347750
loss: 1.1683015823364258,grad_norm: 0.99999969095732, iteration: 347751
loss: 0.9960903525352478,grad_norm: 0.6070054489055723, iteration: 347752
loss: 1.0146647691726685,grad_norm: 0.8125685610492518, iteration: 347753
loss: 1.0269057750701904,grad_norm: 0.9269279909690437, iteration: 347754
loss: 1.0178070068359375,grad_norm: 0.9364105197674888, iteration: 347755
loss: 0.9876278638839722,grad_norm: 0.7712319671364598, iteration: 347756
loss: 1.007614254951477,grad_norm: 0.6406107430839737, iteration: 347757
loss: 0.9882442951202393,grad_norm: 0.7801086490983417, iteration: 347758
loss: 0.986771821975708,grad_norm: 0.791114471087128, iteration: 347759
loss: 0.9672402143478394,grad_norm: 0.8142962367732473, iteration: 347760
loss: 0.9816543459892273,grad_norm: 0.7715114196011461, iteration: 347761
loss: 0.9684305787086487,grad_norm: 0.9999999340055566, iteration: 347762
loss: 0.9786733388900757,grad_norm: 0.9300387430443993, iteration: 347763
loss: 1.1299409866333008,grad_norm: 0.9999998187524013, iteration: 347764
loss: 1.0760568380355835,grad_norm: 0.8775787796925233, iteration: 347765
loss: 1.065644383430481,grad_norm: 0.9999993820792289, iteration: 347766
loss: 0.9980638027191162,grad_norm: 0.6296128510434086, iteration: 347767
loss: 0.989726185798645,grad_norm: 0.960239531577157, iteration: 347768
loss: 0.9880011677742004,grad_norm: 0.9591097285008733, iteration: 347769
loss: 0.9879379868507385,grad_norm: 0.6940684452069735, iteration: 347770
loss: 1.065894603729248,grad_norm: 0.9999997975139399, iteration: 347771
loss: 1.0130484104156494,grad_norm: 0.7401978448558063, iteration: 347772
loss: 0.9738056659698486,grad_norm: 0.7846329790231065, iteration: 347773
loss: 1.183774471282959,grad_norm: 0.9999999797624392, iteration: 347774
loss: 1.0121524333953857,grad_norm: 0.8029228015656319, iteration: 347775
loss: 1.0080175399780273,grad_norm: 0.8271775110368416, iteration: 347776
loss: 0.9921004176139832,grad_norm: 0.7752685368474515, iteration: 347777
loss: 1.0046683549880981,grad_norm: 0.7658039478204303, iteration: 347778
loss: 0.9975050091743469,grad_norm: 0.9163451650863474, iteration: 347779
loss: 1.014621376991272,grad_norm: 0.8582188105226869, iteration: 347780
loss: 0.9963366389274597,grad_norm: 0.8926493604537841, iteration: 347781
loss: 0.9927072525024414,grad_norm: 0.7557220047332438, iteration: 347782
loss: 1.0066165924072266,grad_norm: 0.5890346602991461, iteration: 347783
loss: 0.9934692978858948,grad_norm: 0.6681781040623457, iteration: 347784
loss: 0.9844855666160583,grad_norm: 0.7225781219487692, iteration: 347785
loss: 1.063897967338562,grad_norm: 0.9999995122883298, iteration: 347786
loss: 1.05852472782135,grad_norm: 0.7887974051598791, iteration: 347787
loss: 0.9822917580604553,grad_norm: 0.7934647979091158, iteration: 347788
loss: 0.9891523122787476,grad_norm: 0.7486689208770313, iteration: 347789
loss: 1.0032411813735962,grad_norm: 0.7491763293140336, iteration: 347790
loss: 0.9929774403572083,grad_norm: 0.9157431761907213, iteration: 347791
loss: 0.9511810541152954,grad_norm: 0.9380963817037726, iteration: 347792
loss: 0.9995063543319702,grad_norm: 0.8710734237974879, iteration: 347793
loss: 1.0269806385040283,grad_norm: 0.7079494335474531, iteration: 347794
loss: 0.9678018093109131,grad_norm: 0.7593356723602324, iteration: 347795
loss: 1.0158641338348389,grad_norm: 0.8452373740281337, iteration: 347796
loss: 0.9710949659347534,grad_norm: 0.8094839316406327, iteration: 347797
loss: 1.0293474197387695,grad_norm: 0.999999403203317, iteration: 347798
loss: 1.0334315299987793,grad_norm: 0.7903216698351047, iteration: 347799
loss: 0.9916351437568665,grad_norm: 0.906659870910938, iteration: 347800
loss: 1.0257236957550049,grad_norm: 0.7074977097511076, iteration: 347801
loss: 1.030626654624939,grad_norm: 0.9792617672682917, iteration: 347802
loss: 1.0378632545471191,grad_norm: 0.6993273764946584, iteration: 347803
loss: 1.0091944932937622,grad_norm: 0.9999989930735014, iteration: 347804
loss: 0.9202015399932861,grad_norm: 0.9172990739260445, iteration: 347805
loss: 0.9908111095428467,grad_norm: 0.779915633145035, iteration: 347806
loss: 1.0083526372909546,grad_norm: 0.761361332441152, iteration: 347807
loss: 0.9835189580917358,grad_norm: 0.9999991972673511, iteration: 347808
loss: 1.018664836883545,grad_norm: 0.9999997884097579, iteration: 347809
loss: 1.0873510837554932,grad_norm: 0.9999991592457694, iteration: 347810
loss: 1.0113295316696167,grad_norm: 0.7607137382255565, iteration: 347811
loss: 0.9647822976112366,grad_norm: 0.9705561287626184, iteration: 347812
loss: 0.9536334872245789,grad_norm: 0.9805010787473255, iteration: 347813
loss: 0.97835373878479,grad_norm: 0.7548954485544944, iteration: 347814
loss: 1.0796526670455933,grad_norm: 0.9999990329770589, iteration: 347815
loss: 1.0211946964263916,grad_norm: 0.9780990240723171, iteration: 347816
loss: 1.023064136505127,grad_norm: 0.9331803280356599, iteration: 347817
loss: 1.0107909440994263,grad_norm: 0.8317695387692567, iteration: 347818
loss: 0.9889340996742249,grad_norm: 0.8415314782654904, iteration: 347819
loss: 0.9887250065803528,grad_norm: 0.7787688320184428, iteration: 347820
loss: 0.994158148765564,grad_norm: 0.999999154547315, iteration: 347821
loss: 0.9897263050079346,grad_norm: 0.7450665698968961, iteration: 347822
loss: 1.0018353462219238,grad_norm: 0.9096112294932625, iteration: 347823
loss: 1.0305039882659912,grad_norm: 0.9492441342289097, iteration: 347824
loss: 0.9828026294708252,grad_norm: 0.8631992527815148, iteration: 347825
loss: 0.9600114822387695,grad_norm: 0.8059937193053054, iteration: 347826
loss: 1.048700213432312,grad_norm: 0.9866740203494014, iteration: 347827
loss: 1.0048179626464844,grad_norm: 0.8589489561931651, iteration: 347828
loss: 0.9812284111976624,grad_norm: 0.75991360712457, iteration: 347829
loss: 0.9728718400001526,grad_norm: 0.8541105368713384, iteration: 347830
loss: 1.011136531829834,grad_norm: 0.9999994838357373, iteration: 347831
loss: 0.9973093271255493,grad_norm: 0.8418770690764878, iteration: 347832
loss: 1.0092500448226929,grad_norm: 0.909333009635246, iteration: 347833
loss: 0.9908251166343689,grad_norm: 0.7546847541720594, iteration: 347834
loss: 1.0123199224472046,grad_norm: 0.9999990894088778, iteration: 347835
loss: 1.0856670141220093,grad_norm: 0.7434488915567341, iteration: 347836
loss: 1.1492438316345215,grad_norm: 0.999999350959582, iteration: 347837
loss: 0.9986231923103333,grad_norm: 0.8561136533826872, iteration: 347838
loss: 1.0221643447875977,grad_norm: 0.90794537876225, iteration: 347839
loss: 0.9730841517448425,grad_norm: 0.7332156578517032, iteration: 347840
loss: 0.9692557454109192,grad_norm: 0.731885594329682, iteration: 347841
loss: 1.0061559677124023,grad_norm: 0.7859146004818881, iteration: 347842
loss: 0.969616711139679,grad_norm: 0.7584434779730637, iteration: 347843
loss: 1.0379645824432373,grad_norm: 0.8044300152717776, iteration: 347844
loss: 0.9921477437019348,grad_norm: 0.8816976101126736, iteration: 347845
loss: 1.0064185857772827,grad_norm: 0.8051063710402995, iteration: 347846
loss: 1.0101540088653564,grad_norm: 0.6680732423366853, iteration: 347847
loss: 0.9754375219345093,grad_norm: 0.8743448073115841, iteration: 347848
loss: 1.0144990682601929,grad_norm: 0.7658423526345529, iteration: 347849
loss: 1.0432401895523071,grad_norm: 0.9932082198120644, iteration: 347850
loss: 1.088697075843811,grad_norm: 0.9999994006665694, iteration: 347851
loss: 0.9811563491821289,grad_norm: 0.8611238541808864, iteration: 347852
loss: 0.9899139404296875,grad_norm: 0.7899052426363221, iteration: 347853
loss: 0.995772659778595,grad_norm: 0.7653074111551276, iteration: 347854
loss: 1.037554383277893,grad_norm: 0.7755358378919552, iteration: 347855
loss: 0.9901205897331238,grad_norm: 0.7104415233080865, iteration: 347856
loss: 1.0121208429336548,grad_norm: 0.9166791894558074, iteration: 347857
loss: 1.0128436088562012,grad_norm: 0.8750729437074689, iteration: 347858
loss: 1.0134001970291138,grad_norm: 0.7638501517944718, iteration: 347859
loss: 1.0157296657562256,grad_norm: 0.966573621523496, iteration: 347860
loss: 1.0286219120025635,grad_norm: 0.801758603813714, iteration: 347861
loss: 1.0266205072402954,grad_norm: 0.8086524560871122, iteration: 347862
loss: 0.995427131652832,grad_norm: 0.6754148302555031, iteration: 347863
loss: 1.0231623649597168,grad_norm: 0.7665120613738662, iteration: 347864
loss: 0.999301016330719,grad_norm: 0.7666081482985697, iteration: 347865
loss: 1.011235237121582,grad_norm: 0.6586171285441874, iteration: 347866
loss: 0.9857243299484253,grad_norm: 0.8095238785822798, iteration: 347867
loss: 1.0103075504302979,grad_norm: 0.8806185170491749, iteration: 347868
loss: 0.9693578481674194,grad_norm: 0.7285609264358615, iteration: 347869
loss: 1.0089380741119385,grad_norm: 0.8189397240259302, iteration: 347870
loss: 1.0016151666641235,grad_norm: 0.8251889001805661, iteration: 347871
loss: 1.0166527032852173,grad_norm: 0.8106409781800731, iteration: 347872
loss: 1.0244876146316528,grad_norm: 0.9999992365023769, iteration: 347873
loss: 0.9793177843093872,grad_norm: 0.7917502851397152, iteration: 347874
loss: 1.0151277780532837,grad_norm: 0.8545692831912831, iteration: 347875
loss: 0.9860876202583313,grad_norm: 0.9150136175689976, iteration: 347876
loss: 1.0018175840377808,grad_norm: 0.8222255026734658, iteration: 347877
loss: 1.0019066333770752,grad_norm: 0.999999416807842, iteration: 347878
loss: 1.0066070556640625,grad_norm: 0.8758001988783619, iteration: 347879
loss: 0.956967294216156,grad_norm: 0.8073387059422272, iteration: 347880
loss: 0.99289470911026,grad_norm: 0.9047435971042154, iteration: 347881
loss: 1.001175045967102,grad_norm: 0.9324186146391834, iteration: 347882
loss: 0.9912338256835938,grad_norm: 0.9999989595448986, iteration: 347883
loss: 0.9823849201202393,grad_norm: 0.7490823491971842, iteration: 347884
loss: 1.0198214054107666,grad_norm: 0.8579203139673326, iteration: 347885
loss: 0.9957624077796936,grad_norm: 0.9974373329587787, iteration: 347886
loss: 1.0167112350463867,grad_norm: 0.8223304726611832, iteration: 347887
loss: 1.0061893463134766,grad_norm: 0.7237360335067329, iteration: 347888
loss: 0.9819520115852356,grad_norm: 0.9377178122950028, iteration: 347889
loss: 1.0242581367492676,grad_norm: 0.8734912229405566, iteration: 347890
loss: 0.9755232334136963,grad_norm: 0.928068250439575, iteration: 347891
loss: 1.0742709636688232,grad_norm: 0.9999996438808544, iteration: 347892
loss: 1.0422579050064087,grad_norm: 0.8702859638530981, iteration: 347893
loss: 0.9866798520088196,grad_norm: 0.9999991207211185, iteration: 347894
loss: 0.9907624125480652,grad_norm: 0.8572027634247511, iteration: 347895
loss: 0.9579145312309265,grad_norm: 0.8460193520558318, iteration: 347896
loss: 1.1107392311096191,grad_norm: 0.9999994906411876, iteration: 347897
loss: 1.0081526041030884,grad_norm: 0.8319483989601886, iteration: 347898
loss: 0.9998252987861633,grad_norm: 0.8044394941862284, iteration: 347899
loss: 1.0194237232208252,grad_norm: 0.7229050727778606, iteration: 347900
loss: 0.9632107615470886,grad_norm: 0.8555086142475993, iteration: 347901
loss: 0.9318954944610596,grad_norm: 0.9715391691289188, iteration: 347902
loss: 0.9745264053344727,grad_norm: 0.7825723453831652, iteration: 347903
loss: 0.9989821314811707,grad_norm: 0.9521671777057089, iteration: 347904
loss: 0.9929471611976624,grad_norm: 0.7368031916967712, iteration: 347905
loss: 0.9810923337936401,grad_norm: 0.7808565045712971, iteration: 347906
loss: 0.9920656085014343,grad_norm: 0.8374559723184724, iteration: 347907
loss: 1.0102126598358154,grad_norm: 0.7273146921759248, iteration: 347908
loss: 0.9656461477279663,grad_norm: 0.7376406899912156, iteration: 347909
loss: 0.9752570390701294,grad_norm: 0.8230438577953005, iteration: 347910
loss: 1.1246236562728882,grad_norm: 0.9999992599697024, iteration: 347911
loss: 1.0204073190689087,grad_norm: 0.8855809905622982, iteration: 347912
loss: 1.0290648937225342,grad_norm: 0.8138710016109525, iteration: 347913
loss: 0.9880831837654114,grad_norm: 0.8450430173526441, iteration: 347914
loss: 0.9808788299560547,grad_norm: 0.685591304009971, iteration: 347915
loss: 1.0072455406188965,grad_norm: 0.820309949600008, iteration: 347916
loss: 0.9713317155838013,grad_norm: 0.7945982147600243, iteration: 347917
loss: 0.9858384132385254,grad_norm: 0.7835136414081558, iteration: 347918
loss: 0.9986754059791565,grad_norm: 0.9857535450341709, iteration: 347919
loss: 0.9910283088684082,grad_norm: 0.8426018039230682, iteration: 347920
loss: 1.0023373365402222,grad_norm: 0.7499018336330849, iteration: 347921
loss: 0.953141450881958,grad_norm: 0.7768385706623322, iteration: 347922
loss: 0.9919304251670837,grad_norm: 0.7905888775111979, iteration: 347923
loss: 0.9714279770851135,grad_norm: 0.7595695468481072, iteration: 347924
loss: 1.0325616598129272,grad_norm: 0.7383045902166784, iteration: 347925
loss: 0.976715087890625,grad_norm: 0.9025408915805981, iteration: 347926
loss: 1.0126583576202393,grad_norm: 0.8063594820013157, iteration: 347927
loss: 1.0154248476028442,grad_norm: 0.9999991847974622, iteration: 347928
loss: 0.9616249203681946,grad_norm: 0.9246804634397409, iteration: 347929
loss: 1.0945756435394287,grad_norm: 0.9999991814535873, iteration: 347930
loss: 0.984459638595581,grad_norm: 0.7331533515240343, iteration: 347931
loss: 1.0023661851882935,grad_norm: 0.8219343979774864, iteration: 347932
loss: 0.9757121801376343,grad_norm: 0.8891539627309024, iteration: 347933
loss: 0.9877684712409973,grad_norm: 0.6465615924517482, iteration: 347934
loss: 0.9904689788818359,grad_norm: 0.7169800034089524, iteration: 347935
loss: 1.0196418762207031,grad_norm: 0.9124346493378573, iteration: 347936
loss: 1.0009411573410034,grad_norm: 0.7003606465505672, iteration: 347937
loss: 0.9971348643302917,grad_norm: 0.7550344622127997, iteration: 347938
loss: 1.024213194847107,grad_norm: 0.9316111976788178, iteration: 347939
loss: 1.0530115365982056,grad_norm: 0.913200256543951, iteration: 347940
loss: 0.9899547696113586,grad_norm: 0.7990823336082985, iteration: 347941
loss: 0.985253632068634,grad_norm: 0.7931813712605237, iteration: 347942
loss: 0.9936477541923523,grad_norm: 0.8484352777071669, iteration: 347943
loss: 1.0183970928192139,grad_norm: 0.7694498408269016, iteration: 347944
loss: 0.995895266532898,grad_norm: 0.7514741069492633, iteration: 347945
loss: 0.9710249900817871,grad_norm: 0.8163729757723237, iteration: 347946
loss: 0.9882897734642029,grad_norm: 0.7517272041266811, iteration: 347947
loss: 0.9709823131561279,grad_norm: 0.7077697530189058, iteration: 347948
loss: 1.0028082132339478,grad_norm: 0.7388834660064916, iteration: 347949
loss: 1.0123305320739746,grad_norm: 0.7170616223049265, iteration: 347950
loss: 1.0134152173995972,grad_norm: 0.7640981999012811, iteration: 347951
loss: 0.996570885181427,grad_norm: 0.8506666642044165, iteration: 347952
loss: 0.9863803386688232,grad_norm: 0.8205020640843214, iteration: 347953
loss: 1.002647042274475,grad_norm: 0.9217687393165436, iteration: 347954
loss: 1.051749587059021,grad_norm: 0.9999996611438511, iteration: 347955
loss: 0.9575675129890442,grad_norm: 0.9999992556077022, iteration: 347956
loss: 0.9964807033538818,grad_norm: 0.8132742336114227, iteration: 347957
loss: 1.0476932525634766,grad_norm: 0.7667501078032789, iteration: 347958
loss: 0.9962180256843567,grad_norm: 0.6940756167190725, iteration: 347959
loss: 1.0064241886138916,grad_norm: 0.8272348388909487, iteration: 347960
loss: 1.011041283607483,grad_norm: 0.8936080835183752, iteration: 347961
loss: 0.9905921220779419,grad_norm: 0.8807514952007206, iteration: 347962
loss: 0.9887682795524597,grad_norm: 0.7581306068030643, iteration: 347963
loss: 0.9817527532577515,grad_norm: 0.7450591059084711, iteration: 347964
loss: 1.0349193811416626,grad_norm: 0.8849465616110068, iteration: 347965
loss: 1.0352839231491089,grad_norm: 0.6875154399489019, iteration: 347966
loss: 0.993767261505127,grad_norm: 0.912519136596907, iteration: 347967
loss: 1.0831962823867798,grad_norm: 0.9999993817176595, iteration: 347968
loss: 0.9899880290031433,grad_norm: 0.7532506271625413, iteration: 347969
loss: 1.0105645656585693,grad_norm: 0.9131869851204578, iteration: 347970
loss: 0.9916445016860962,grad_norm: 0.8210097340511125, iteration: 347971
loss: 0.9993822574615479,grad_norm: 0.7860656042077474, iteration: 347972
loss: 0.9833484292030334,grad_norm: 0.9516907416374656, iteration: 347973
loss: 1.002886176109314,grad_norm: 0.7390735470786849, iteration: 347974
loss: 1.049136757850647,grad_norm: 0.9999989984391275, iteration: 347975
loss: 0.9747228026390076,grad_norm: 0.7668429605873016, iteration: 347976
loss: 0.9866026043891907,grad_norm: 0.9976154980546049, iteration: 347977
loss: 0.9929497838020325,grad_norm: 0.8025350957021203, iteration: 347978
loss: 1.0238009691238403,grad_norm: 0.8124274686975195, iteration: 347979
loss: 1.030442476272583,grad_norm: 0.8938131994433925, iteration: 347980
loss: 0.9934149384498596,grad_norm: 0.8167139963436203, iteration: 347981
loss: 0.9949931502342224,grad_norm: 0.6898926972460239, iteration: 347982
loss: 1.0102685689926147,grad_norm: 0.9999990320027908, iteration: 347983
loss: 0.9968950748443604,grad_norm: 0.7858763219977822, iteration: 347984
loss: 1.0551691055297852,grad_norm: 0.9999995738658731, iteration: 347985
loss: 1.009177565574646,grad_norm: 0.8623092917581899, iteration: 347986
loss: 1.007428526878357,grad_norm: 0.7702099518552252, iteration: 347987
loss: 1.0256445407867432,grad_norm: 0.8607822108027254, iteration: 347988
loss: 0.9954671263694763,grad_norm: 0.8721480597299642, iteration: 347989
loss: 0.9984385967254639,grad_norm: 0.807867576330445, iteration: 347990
loss: 1.0192782878875732,grad_norm: 0.9999991354722614, iteration: 347991
loss: 1.016174554824829,grad_norm: 0.8886483121075621, iteration: 347992
loss: 0.9689683318138123,grad_norm: 0.8196478876617151, iteration: 347993
loss: 0.978493332862854,grad_norm: 0.626153361798726, iteration: 347994
loss: 1.0156902074813843,grad_norm: 0.8153113428245434, iteration: 347995
loss: 0.9850772619247437,grad_norm: 0.9421868752394827, iteration: 347996
loss: 1.003021240234375,grad_norm: 0.8736539474965109, iteration: 347997
loss: 0.9943254590034485,grad_norm: 0.7635170475392665, iteration: 347998
loss: 1.0202330350875854,grad_norm: 0.9999991213956998, iteration: 347999
loss: 1.0011041164398193,grad_norm: 0.9999991538677949, iteration: 348000
loss: 1.0090200901031494,grad_norm: 0.6683795226320748, iteration: 348001
loss: 0.988146185874939,grad_norm: 0.9173225554732128, iteration: 348002
loss: 1.0306607484817505,grad_norm: 0.9999998766417708, iteration: 348003
loss: 0.9928842186927795,grad_norm: 0.8656019185232815, iteration: 348004
loss: 1.007843255996704,grad_norm: 0.8959490320753226, iteration: 348005
loss: 0.9860961437225342,grad_norm: 0.781780889505342, iteration: 348006
loss: 0.9729048609733582,grad_norm: 0.8615918753075422, iteration: 348007
loss: 1.012816071510315,grad_norm: 0.8397014956414285, iteration: 348008
loss: 0.9883378148078918,grad_norm: 0.744845372863314, iteration: 348009
loss: 0.9894704818725586,grad_norm: 0.8960160604254036, iteration: 348010
loss: 1.0068467855453491,grad_norm: 0.7997905597215741, iteration: 348011
loss: 0.9133081436157227,grad_norm: 0.8478084488304568, iteration: 348012
loss: 0.9524895548820496,grad_norm: 0.9999990596008081, iteration: 348013
loss: 0.9897028803825378,grad_norm: 0.9999992196170697, iteration: 348014
loss: 1.0008388757705688,grad_norm: 0.9999992501652055, iteration: 348015
loss: 0.9749422669410706,grad_norm: 0.8571355869859504, iteration: 348016
loss: 1.003991961479187,grad_norm: 0.6937328252479678, iteration: 348017
loss: 0.9681660532951355,grad_norm: 0.9371075058366334, iteration: 348018
loss: 0.9656586647033691,grad_norm: 0.8369235806677976, iteration: 348019
loss: 0.981371283531189,grad_norm: 0.8492966732306086, iteration: 348020
loss: 0.9517291188240051,grad_norm: 0.9329995021223813, iteration: 348021
loss: 1.0054645538330078,grad_norm: 0.6949416495482, iteration: 348022
loss: 0.9936053156852722,grad_norm: 0.8738421449097182, iteration: 348023
loss: 0.9877701997756958,grad_norm: 0.9756295471478931, iteration: 348024
loss: 1.0067704916000366,grad_norm: 0.846988335647399, iteration: 348025
loss: 0.994844377040863,grad_norm: 0.8296960595709819, iteration: 348026
loss: 1.113347053527832,grad_norm: 0.9436645000763546, iteration: 348027
loss: 0.9691576361656189,grad_norm: 0.76042070663446, iteration: 348028
loss: 0.9910004138946533,grad_norm: 0.8351030336976532, iteration: 348029
loss: 0.9627419114112854,grad_norm: 0.9999999159714942, iteration: 348030
loss: 0.9867839813232422,grad_norm: 0.8169981110337887, iteration: 348031
loss: 1.0174955129623413,grad_norm: 0.9858804347175891, iteration: 348032
loss: 1.0826388597488403,grad_norm: 0.999999883210833, iteration: 348033
loss: 0.965376615524292,grad_norm: 0.6935844567980466, iteration: 348034
loss: 0.9867361783981323,grad_norm: 0.9711646806422638, iteration: 348035
loss: 0.9575964212417603,grad_norm: 0.7352926722998158, iteration: 348036
loss: 0.989490270614624,grad_norm: 0.8757364683712481, iteration: 348037
loss: 0.9700052738189697,grad_norm: 0.7500653411844134, iteration: 348038
loss: 0.9954935312271118,grad_norm: 0.8289463102260435, iteration: 348039
loss: 0.990243136882782,grad_norm: 0.9624791766400206, iteration: 348040
loss: 1.004127860069275,grad_norm: 0.7442337243243959, iteration: 348041
loss: 0.9944976568222046,grad_norm: 0.8832718420189076, iteration: 348042
loss: 1.0699769258499146,grad_norm: 1.0000000242224711, iteration: 348043
loss: 1.0178838968276978,grad_norm: 0.9999993804112571, iteration: 348044
loss: 0.9889698624610901,grad_norm: 0.9459194839808283, iteration: 348045
loss: 1.0432802438735962,grad_norm: 0.785330285887193, iteration: 348046
loss: 0.9767847061157227,grad_norm: 0.866283116267134, iteration: 348047
loss: 1.0014948844909668,grad_norm: 0.9999994001579805, iteration: 348048
loss: 0.9800132513046265,grad_norm: 0.7590817844641, iteration: 348049
loss: 1.0024466514587402,grad_norm: 0.7100438346957807, iteration: 348050
loss: 0.9807529449462891,grad_norm: 0.7665236928579146, iteration: 348051
loss: 1.0329636335372925,grad_norm: 0.7423650190353128, iteration: 348052
loss: 1.039528489112854,grad_norm: 0.8729894258615226, iteration: 348053
loss: 0.9844022989273071,grad_norm: 0.6722877900308953, iteration: 348054
loss: 1.0209996700286865,grad_norm: 0.8423999066625559, iteration: 348055
loss: 1.0147713422775269,grad_norm: 0.9999997084349249, iteration: 348056
loss: 1.034995198249817,grad_norm: 0.8010792663882665, iteration: 348057
loss: 1.0122015476226807,grad_norm: 0.7737336179009774, iteration: 348058
loss: 0.9712375402450562,grad_norm: 0.8101543680007677, iteration: 348059
loss: 0.9986521005630493,grad_norm: 0.8574483761173874, iteration: 348060
loss: 1.0176184177398682,grad_norm: 0.7795975759197843, iteration: 348061
loss: 0.989521861076355,grad_norm: 0.6629932622257295, iteration: 348062
loss: 0.9902728796005249,grad_norm: 0.8610511028620415, iteration: 348063
loss: 0.9843943119049072,grad_norm: 0.8418091559808067, iteration: 348064
loss: 1.0034641027450562,grad_norm: 0.7892179081191995, iteration: 348065
loss: 0.9964812994003296,grad_norm: 0.6970474041316522, iteration: 348066
loss: 1.0012210607528687,grad_norm: 0.8563269121910826, iteration: 348067
loss: 1.0061532258987427,grad_norm: 0.999999063333546, iteration: 348068
loss: 0.9777211546897888,grad_norm: 0.8114731374996036, iteration: 348069
loss: 1.0111961364746094,grad_norm: 0.9172972638400411, iteration: 348070
loss: 1.005265474319458,grad_norm: 0.7410823662087554, iteration: 348071
loss: 0.9874163269996643,grad_norm: 0.811964299397502, iteration: 348072
loss: 0.9702360033988953,grad_norm: 0.734419404274262, iteration: 348073
loss: 0.9782094359397888,grad_norm: 0.7968272666160068, iteration: 348074
loss: 1.0204745531082153,grad_norm: 0.9108032331019922, iteration: 348075
loss: 1.0217185020446777,grad_norm: 0.8771344533894165, iteration: 348076
loss: 1.0096325874328613,grad_norm: 0.806278970557343, iteration: 348077
loss: 1.0052731037139893,grad_norm: 0.8100897937390166, iteration: 348078
loss: 1.0014288425445557,grad_norm: 0.9999998812489922, iteration: 348079
loss: 1.0021692514419556,grad_norm: 0.8771594536321905, iteration: 348080
loss: 1.0073513984680176,grad_norm: 0.7310581486204736, iteration: 348081
loss: 0.9809891581535339,grad_norm: 0.7028977859704305, iteration: 348082
loss: 1.0118860006332397,grad_norm: 0.8076624420992188, iteration: 348083
loss: 1.0430951118469238,grad_norm: 0.8819994943518101, iteration: 348084
loss: 1.0274280309677124,grad_norm: 0.8652034725693015, iteration: 348085
loss: 0.9902292490005493,grad_norm: 0.7674311464737464, iteration: 348086
loss: 1.1205857992172241,grad_norm: 0.9979452902288789, iteration: 348087
loss: 1.0245693922042847,grad_norm: 0.8397149126440381, iteration: 348088
loss: 1.0470737218856812,grad_norm: 0.9999993335457288, iteration: 348089
loss: 0.9714525938034058,grad_norm: 0.8007376828894726, iteration: 348090
loss: 0.99592524766922,grad_norm: 0.9999991931426583, iteration: 348091
loss: 1.0406683683395386,grad_norm: 0.9999991318940081, iteration: 348092
loss: 1.00270414352417,grad_norm: 0.9999990619237756, iteration: 348093
loss: 1.000742793083191,grad_norm: 0.6647185851887663, iteration: 348094
loss: 1.001476526260376,grad_norm: 0.9999997136600952, iteration: 348095
loss: 0.9734981060028076,grad_norm: 0.7244535673415748, iteration: 348096
loss: 1.0293354988098145,grad_norm: 0.9999991456824815, iteration: 348097
loss: 0.9844872355461121,grad_norm: 0.9386040684490832, iteration: 348098
loss: 0.9731069803237915,grad_norm: 0.9209122551300758, iteration: 348099
loss: 0.9938822388648987,grad_norm: 0.93815189603865, iteration: 348100
loss: 1.0792427062988281,grad_norm: 0.9999999331796172, iteration: 348101
loss: 1.0198293924331665,grad_norm: 0.7827152293636269, iteration: 348102
loss: 0.9990956783294678,grad_norm: 0.7053502063163063, iteration: 348103
loss: 1.0356818437576294,grad_norm: 0.8723007790486824, iteration: 348104
loss: 1.0325120687484741,grad_norm: 0.9999996946624637, iteration: 348105
loss: 0.9585020542144775,grad_norm: 0.681660320696208, iteration: 348106
loss: 0.9941062331199646,grad_norm: 0.7570683553195152, iteration: 348107
loss: 0.991957426071167,grad_norm: 0.8262999740274103, iteration: 348108
loss: 1.046450138092041,grad_norm: 0.9999998757722773, iteration: 348109
loss: 1.0277719497680664,grad_norm: 0.7983890089420193, iteration: 348110
loss: 0.9998968243598938,grad_norm: 0.8386687918691165, iteration: 348111
loss: 1.0158671140670776,grad_norm: 0.821368870480295, iteration: 348112
loss: 0.9640849828720093,grad_norm: 0.962760148284298, iteration: 348113
loss: 1.0095832347869873,grad_norm: 0.8454352901196865, iteration: 348114
loss: 1.0184484720230103,grad_norm: 0.9999991960369493, iteration: 348115
loss: 1.0293062925338745,grad_norm: 0.8225591928730486, iteration: 348116
loss: 1.0048195123672485,grad_norm: 0.8763033480402522, iteration: 348117
loss: 1.0580697059631348,grad_norm: 0.9999998321606636, iteration: 348118
loss: 0.9840211868286133,grad_norm: 1.0000000376402978, iteration: 348119
loss: 0.9732452630996704,grad_norm: 0.8376498786908332, iteration: 348120
loss: 1.0070018768310547,grad_norm: 0.8910715642836533, iteration: 348121
loss: 0.9824637174606323,grad_norm: 0.9188911742012477, iteration: 348122
loss: 1.0006533861160278,grad_norm: 0.7929910267830973, iteration: 348123
loss: 1.0074435472488403,grad_norm: 0.791174526930924, iteration: 348124
loss: 0.9793058037757874,grad_norm: 0.9999990695782515, iteration: 348125
loss: 1.0197701454162598,grad_norm: 0.9999994375552178, iteration: 348126
loss: 0.9781227707862854,grad_norm: 0.9999993202554928, iteration: 348127
loss: 1.0155731439590454,grad_norm: 0.9999990817116268, iteration: 348128
loss: 0.9804906845092773,grad_norm: 0.8252127290196037, iteration: 348129
loss: 1.022201657295227,grad_norm: 0.7706380365550567, iteration: 348130
loss: 1.002307415008545,grad_norm: 0.7545077919729359, iteration: 348131
loss: 1.0130528211593628,grad_norm: 0.6992363289335499, iteration: 348132
loss: 1.019384503364563,grad_norm: 0.8708528319632315, iteration: 348133
loss: 0.9984769225120544,grad_norm: 0.6580239431213631, iteration: 348134
loss: 0.9936338663101196,grad_norm: 0.9999995621415693, iteration: 348135
loss: 0.9682534337043762,grad_norm: 0.7706815054671813, iteration: 348136
loss: 1.0321792364120483,grad_norm: 0.9999990069065801, iteration: 348137
loss: 1.0015404224395752,grad_norm: 0.8250314916130539, iteration: 348138
loss: 1.0013114213943481,grad_norm: 0.9569302184965373, iteration: 348139
loss: 1.0222654342651367,grad_norm: 0.8849078713150229, iteration: 348140
loss: 0.9961151480674744,grad_norm: 0.9630580179018937, iteration: 348141
loss: 0.9944944977760315,grad_norm: 0.7043547298594895, iteration: 348142
loss: 1.032699704170227,grad_norm: 0.8500178153548942, iteration: 348143
loss: 1.0099011659622192,grad_norm: 0.7784755137796535, iteration: 348144
loss: 1.0544939041137695,grad_norm: 0.7275096851984011, iteration: 348145
loss: 1.0140305757522583,grad_norm: 0.8796716889833539, iteration: 348146
loss: 1.0306280851364136,grad_norm: 0.8720056803545259, iteration: 348147
loss: 1.0234805345535278,grad_norm: 0.9681233454395093, iteration: 348148
loss: 1.0239310264587402,grad_norm: 0.7334928763477271, iteration: 348149
loss: 1.018019199371338,grad_norm: 0.7069553212021602, iteration: 348150
loss: 0.95387864112854,grad_norm: 0.999999192876411, iteration: 348151
loss: 1.0194345712661743,grad_norm: 0.9402831230915456, iteration: 348152
loss: 1.0655124187469482,grad_norm: 0.9999997200889603, iteration: 348153
loss: 1.0190629959106445,grad_norm: 0.7489321890162761, iteration: 348154
loss: 0.9847733974456787,grad_norm: 0.9999991305991262, iteration: 348155
loss: 1.0278053283691406,grad_norm: 0.9999994253133848, iteration: 348156
loss: 0.9692245125770569,grad_norm: 0.8290656005079662, iteration: 348157
loss: 1.0022263526916504,grad_norm: 0.999999066112051, iteration: 348158
loss: 1.0035767555236816,grad_norm: 0.7324924987165793, iteration: 348159
loss: 1.002456784248352,grad_norm: 0.9999993588910348, iteration: 348160
loss: 0.983339250087738,grad_norm: 0.9795440302041258, iteration: 348161
loss: 0.9810206294059753,grad_norm: 0.8780604264745744, iteration: 348162
loss: 0.9993722438812256,grad_norm: 0.7506820818391646, iteration: 348163
loss: 0.9942187070846558,grad_norm: 0.7755875416945023, iteration: 348164
loss: 0.9916208982467651,grad_norm: 0.780197298593544, iteration: 348165
loss: 0.9847869873046875,grad_norm: 0.9999989959120824, iteration: 348166
loss: 1.004421353340149,grad_norm: 0.6437435927034505, iteration: 348167
loss: 0.974265456199646,grad_norm: 0.908057092914474, iteration: 348168
loss: 1.0267353057861328,grad_norm: 0.7893130153939817, iteration: 348169
loss: 1.0238264799118042,grad_norm: 0.9759570284344428, iteration: 348170
loss: 1.0088262557983398,grad_norm: 0.7709584249569602, iteration: 348171
loss: 1.02150297164917,grad_norm: 0.9044028977176557, iteration: 348172
loss: 0.9802941679954529,grad_norm: 0.8681713455144101, iteration: 348173
loss: 1.0582003593444824,grad_norm: 0.9999993910690549, iteration: 348174
loss: 1.0232605934143066,grad_norm: 0.8657553827588079, iteration: 348175
loss: 1.0158230066299438,grad_norm: 0.9999989640323244, iteration: 348176
loss: 0.9927684664726257,grad_norm: 0.7638787080153259, iteration: 348177
loss: 1.0183249711990356,grad_norm: 0.9999990400743001, iteration: 348178
loss: 1.0025959014892578,grad_norm: 0.7472965889485184, iteration: 348179
loss: 1.0535424947738647,grad_norm: 0.7546038580543666, iteration: 348180
loss: 0.9746473431587219,grad_norm: 0.9339337235951647, iteration: 348181
loss: 1.020110845565796,grad_norm: 0.9999998105443839, iteration: 348182
loss: 1.0083671808242798,grad_norm: 0.7445328960738044, iteration: 348183
loss: 0.9931349754333496,grad_norm: 0.8577257716396764, iteration: 348184
loss: 0.9919188618659973,grad_norm: 0.9555526512520984, iteration: 348185
loss: 0.9733499884605408,grad_norm: 0.7192503135059013, iteration: 348186
loss: 1.0119119882583618,grad_norm: 0.9999989832823681, iteration: 348187
loss: 0.9462159276008606,grad_norm: 0.6333294962810336, iteration: 348188
loss: 1.0238662958145142,grad_norm: 0.8455287242172178, iteration: 348189
loss: 1.0014255046844482,grad_norm: 0.7912651722268437, iteration: 348190
loss: 0.9736384749412537,grad_norm: 0.8481390020939805, iteration: 348191
loss: 1.0138956308364868,grad_norm: 0.9987073943047196, iteration: 348192
loss: 1.0016638040542603,grad_norm: 0.6506559465489941, iteration: 348193
loss: 0.9469607472419739,grad_norm: 0.96578054378083, iteration: 348194
loss: 0.9864698052406311,grad_norm: 0.7321815020693005, iteration: 348195
loss: 0.9566716551780701,grad_norm: 0.8406261579624915, iteration: 348196
loss: 0.9825460314750671,grad_norm: 0.8565855228071038, iteration: 348197
loss: 1.0323253870010376,grad_norm: 0.9999993422382715, iteration: 348198
loss: 0.973832368850708,grad_norm: 0.7510880863611765, iteration: 348199
loss: 0.9999396204948425,grad_norm: 0.8003491559997779, iteration: 348200
loss: 1.0248031616210938,grad_norm: 0.8018748065744882, iteration: 348201
loss: 0.9877472519874573,grad_norm: 0.8483975967957963, iteration: 348202
loss: 1.019101858139038,grad_norm: 0.8409046610607875, iteration: 348203
loss: 0.9983252882957458,grad_norm: 0.9999990361560126, iteration: 348204
loss: 1.0069806575775146,grad_norm: 0.8922808019310505, iteration: 348205
loss: 1.0049017667770386,grad_norm: 0.9999992265497087, iteration: 348206
loss: 1.0490552186965942,grad_norm: 0.8683016877810211, iteration: 348207
loss: 0.9867096543312073,grad_norm: 0.759278359407298, iteration: 348208
loss: 1.016732931137085,grad_norm: 0.8493785762763301, iteration: 348209
loss: 0.9729404449462891,grad_norm: 0.6949226945398168, iteration: 348210
loss: 0.9781036972999573,grad_norm: 0.7380886010406135, iteration: 348211
loss: 0.9833185076713562,grad_norm: 0.8484292997314737, iteration: 348212
loss: 0.9792273640632629,grad_norm: 0.8098658961410209, iteration: 348213
loss: 0.9980061054229736,grad_norm: 0.9999998995973336, iteration: 348214
loss: 1.0158501863479614,grad_norm: 0.9999991071601497, iteration: 348215
loss: 1.001860499382019,grad_norm: 0.7175576043826936, iteration: 348216
loss: 1.00369393825531,grad_norm: 0.9999994078288176, iteration: 348217
loss: 0.9562755823135376,grad_norm: 0.7874079428666665, iteration: 348218
loss: 1.0079338550567627,grad_norm: 0.8213348993910105, iteration: 348219
loss: 1.0068336725234985,grad_norm: 0.9203844967842622, iteration: 348220
loss: 1.0098775625228882,grad_norm: 0.9999991169676956, iteration: 348221
loss: 1.0183740854263306,grad_norm: 0.7644013222072205, iteration: 348222
loss: 1.0270241498947144,grad_norm: 0.7483588607033197, iteration: 348223
loss: 1.0317248106002808,grad_norm: 0.9472909481627146, iteration: 348224
loss: 1.04391348361969,grad_norm: 0.8514652636509581, iteration: 348225
loss: 1.0374728441238403,grad_norm: 0.7438327748487534, iteration: 348226
loss: 0.9715594053268433,grad_norm: 0.8167493771284753, iteration: 348227
loss: 0.9981217384338379,grad_norm: 0.7593022404137539, iteration: 348228
loss: 0.9635983109474182,grad_norm: 0.7686135285823246, iteration: 348229
loss: 0.9816403985023499,grad_norm: 0.9057435863959533, iteration: 348230
loss: 1.002802848815918,grad_norm: 0.759857445999473, iteration: 348231
loss: 1.0251784324645996,grad_norm: 0.7800467649623885, iteration: 348232
loss: 1.0023077726364136,grad_norm: 0.9779290552100572, iteration: 348233
loss: 1.0313032865524292,grad_norm: 0.8668540751919518, iteration: 348234
loss: 0.9945154190063477,grad_norm: 0.7098740299578572, iteration: 348235
loss: 0.9805595874786377,grad_norm: 0.8977505796871308, iteration: 348236
loss: 0.9757658839225769,grad_norm: 0.7978737388660542, iteration: 348237
loss: 0.9897789359092712,grad_norm: 0.7437243616303025, iteration: 348238
loss: 0.9784548282623291,grad_norm: 0.9698034306967269, iteration: 348239
loss: 0.9730823040008545,grad_norm: 0.7445299381034363, iteration: 348240
loss: 0.9653720259666443,grad_norm: 0.8873725999704718, iteration: 348241
loss: 1.0120713710784912,grad_norm: 0.8365715809081997, iteration: 348242
loss: 1.0109657049179077,grad_norm: 0.7162312953226816, iteration: 348243
loss: 1.0028105974197388,grad_norm: 0.9568600658868986, iteration: 348244
loss: 0.9989375472068787,grad_norm: 0.7578062721619212, iteration: 348245
loss: 1.0747853517532349,grad_norm: 0.9999991460255984, iteration: 348246
loss: 1.0079243183135986,grad_norm: 0.9149307798116357, iteration: 348247
loss: 1.0063554048538208,grad_norm: 0.9999990543768521, iteration: 348248
loss: 1.001844048500061,grad_norm: 0.863891370455723, iteration: 348249
loss: 1.014852523803711,grad_norm: 0.9187003065330492, iteration: 348250
loss: 1.070668339729309,grad_norm: 0.9999991166807097, iteration: 348251
loss: 0.9830141067504883,grad_norm: 0.7766244806611718, iteration: 348252
loss: 1.0026440620422363,grad_norm: 0.8515172059210688, iteration: 348253
loss: 1.0091769695281982,grad_norm: 0.7702272135666858, iteration: 348254
loss: 1.1195231676101685,grad_norm: 0.9999996066454021, iteration: 348255
loss: 0.9755749702453613,grad_norm: 0.9999994552230234, iteration: 348256
loss: 0.984338641166687,grad_norm: 0.7575554712285737, iteration: 348257
loss: 1.0454590320587158,grad_norm: 0.7848434890923146, iteration: 348258
loss: 0.999208927154541,grad_norm: 0.8463026376379041, iteration: 348259
loss: 0.9752949476242065,grad_norm: 0.8124908012010773, iteration: 348260
loss: 1.0200657844543457,grad_norm: 0.8577307119741487, iteration: 348261
loss: 1.0375375747680664,grad_norm: 0.978880968787549, iteration: 348262
loss: 1.0220742225646973,grad_norm: 0.7376377344200474, iteration: 348263
loss: 0.9889228343963623,grad_norm: 0.8419708723500641, iteration: 348264
loss: 1.0005227327346802,grad_norm: 0.7843027330664495, iteration: 348265
loss: 0.9915685653686523,grad_norm: 0.7715573374822813, iteration: 348266
loss: 0.9949337840080261,grad_norm: 0.7571709539172882, iteration: 348267
loss: 1.0281870365142822,grad_norm: 0.8952962131400167, iteration: 348268
loss: 0.9771748185157776,grad_norm: 0.9664426051498707, iteration: 348269
loss: 1.0263512134552002,grad_norm: 0.8240066635584453, iteration: 348270
loss: 0.9838985800743103,grad_norm: 0.9999992885751646, iteration: 348271
loss: 1.0106087923049927,grad_norm: 0.6551838840783516, iteration: 348272
loss: 0.9701244831085205,grad_norm: 0.9014145218565587, iteration: 348273
loss: 0.9495818018913269,grad_norm: 0.7104743967149266, iteration: 348274
loss: 0.9779939651489258,grad_norm: 0.9999991900407903, iteration: 348275
loss: 0.9710917472839355,grad_norm: 0.9999997421482165, iteration: 348276
loss: 0.9736528396606445,grad_norm: 0.833216385587948, iteration: 348277
loss: 1.0042251348495483,grad_norm: 0.8945716243032411, iteration: 348278
loss: 1.0144579410552979,grad_norm: 0.8653938553410752, iteration: 348279
loss: 1.0173423290252686,grad_norm: 0.8849007983000793, iteration: 348280
loss: 0.9997560381889343,grad_norm: 0.8024084739933548, iteration: 348281
loss: 1.006949782371521,grad_norm: 0.6934986795046477, iteration: 348282
loss: 0.9865222573280334,grad_norm: 0.8693201701596845, iteration: 348283
loss: 1.0096975564956665,grad_norm: 0.8048359723209382, iteration: 348284
loss: 0.9961446523666382,grad_norm: 0.7350659529470003, iteration: 348285
loss: 0.980259120464325,grad_norm: 0.942314966412189, iteration: 348286
loss: 0.9892330765724182,grad_norm: 0.7298622759199453, iteration: 348287
loss: 1.0153157711029053,grad_norm: 0.8662736337302825, iteration: 348288
loss: 1.0111067295074463,grad_norm: 0.864135292802322, iteration: 348289
loss: 0.9741801619529724,grad_norm: 0.826041532212874, iteration: 348290
loss: 0.9691367745399475,grad_norm: 0.9999991425049235, iteration: 348291
loss: 0.9635784029960632,grad_norm: 0.8571616101213763, iteration: 348292
loss: 1.0154250860214233,grad_norm: 0.9392590850006048, iteration: 348293
loss: 1.0238600969314575,grad_norm: 0.9999990785168511, iteration: 348294
loss: 0.9905704855918884,grad_norm: 0.7738065501713141, iteration: 348295
loss: 1.0089876651763916,grad_norm: 0.8081341681895403, iteration: 348296
loss: 0.9999698996543884,grad_norm: 0.9999997301394429, iteration: 348297
loss: 0.9576292037963867,grad_norm: 0.9604466765859943, iteration: 348298
loss: 0.9796669483184814,grad_norm: 0.8884416728255567, iteration: 348299
loss: 1.001837968826294,grad_norm: 0.8404312220650257, iteration: 348300
loss: 1.0182863473892212,grad_norm: 0.9152518532173927, iteration: 348301
loss: 0.9870786070823669,grad_norm: 0.7585095893390806, iteration: 348302
loss: 0.9838078022003174,grad_norm: 0.7306900750547326, iteration: 348303
loss: 0.9664648771286011,grad_norm: 0.9999993377395133, iteration: 348304
loss: 0.9680918455123901,grad_norm: 0.9339411926066664, iteration: 348305
loss: 0.9939274191856384,grad_norm: 0.8760588931275219, iteration: 348306
loss: 0.9925681352615356,grad_norm: 0.673045413305117, iteration: 348307
loss: 0.9804152250289917,grad_norm: 0.9999990899695803, iteration: 348308
loss: 1.0043065547943115,grad_norm: 0.812256591697537, iteration: 348309
loss: 1.0121148824691772,grad_norm: 0.747438846524483, iteration: 348310
loss: 0.9876723289489746,grad_norm: 0.8366222724129154, iteration: 348311
loss: 0.9824826121330261,grad_norm: 0.8425708209026911, iteration: 348312
loss: 0.9931706786155701,grad_norm: 0.8133403739153586, iteration: 348313
loss: 1.0172851085662842,grad_norm: 0.8529953264480211, iteration: 348314
loss: 0.9936255812644958,grad_norm: 0.7788372771402593, iteration: 348315
loss: 1.029333472251892,grad_norm: 0.9999995561416726, iteration: 348316
loss: 0.9973016381263733,grad_norm: 0.8484435081264616, iteration: 348317
loss: 0.9611554741859436,grad_norm: 0.9283959297777242, iteration: 348318
loss: 1.002992868423462,grad_norm: 0.8391008879348372, iteration: 348319
loss: 1.0015407800674438,grad_norm: 0.8733926131163768, iteration: 348320
loss: 0.9545099139213562,grad_norm: 0.8820222799271551, iteration: 348321
loss: 1.0522596836090088,grad_norm: 0.8568821762627202, iteration: 348322
loss: 0.9852429032325745,grad_norm: 0.7343172472455672, iteration: 348323
loss: 0.9925903081893921,grad_norm: 0.8435972143069005, iteration: 348324
loss: 1.0193896293640137,grad_norm: 0.9403852034540752, iteration: 348325
loss: 0.9914439916610718,grad_norm: 0.8459926573792943, iteration: 348326
loss: 0.9823806285858154,grad_norm: 0.7884569790152662, iteration: 348327
loss: 1.030395269393921,grad_norm: 0.933536925492133, iteration: 348328
loss: 0.9955301880836487,grad_norm: 0.7550543643010882, iteration: 348329
loss: 1.0040757656097412,grad_norm: 0.8791703813851963, iteration: 348330
loss: 0.9801971316337585,grad_norm: 0.796676841563557, iteration: 348331
loss: 1.004045009613037,grad_norm: 0.8155145325334987, iteration: 348332
loss: 0.9592928886413574,grad_norm: 0.7809432553453216, iteration: 348333
loss: 1.0249061584472656,grad_norm: 0.9592809104410689, iteration: 348334
loss: 0.9698235392570496,grad_norm: 0.8088609128577181, iteration: 348335
loss: 0.9707597494125366,grad_norm: 0.9259309553949306, iteration: 348336
loss: 1.0095125436782837,grad_norm: 0.9999990586144919, iteration: 348337
loss: 1.026618242263794,grad_norm: 0.9659054534849398, iteration: 348338
loss: 1.005103588104248,grad_norm: 0.7549104905699634, iteration: 348339
loss: 0.9832834601402283,grad_norm: 0.8165158500600291, iteration: 348340
loss: 0.9876695275306702,grad_norm: 0.8709008838997863, iteration: 348341
loss: 1.0260560512542725,grad_norm: 0.9999998602641107, iteration: 348342
loss: 0.9811293482780457,grad_norm: 0.7342012683205303, iteration: 348343
loss: 0.9800918102264404,grad_norm: 0.8564302269625085, iteration: 348344
loss: 1.0259963274002075,grad_norm: 0.9044648893805926, iteration: 348345
loss: 0.9982503652572632,grad_norm: 0.9188151001984163, iteration: 348346
loss: 0.9956552982330322,grad_norm: 0.8131364847838161, iteration: 348347
loss: 1.0338549613952637,grad_norm: 0.7030503062323262, iteration: 348348
loss: 0.9758960008621216,grad_norm: 0.9360480403280582, iteration: 348349
loss: 0.9946815967559814,grad_norm: 0.7770484577144733, iteration: 348350
loss: 0.9813724756240845,grad_norm: 0.87217862611708, iteration: 348351
loss: 1.0015897750854492,grad_norm: 0.9015641583560067, iteration: 348352
loss: 0.9677702784538269,grad_norm: 0.8831039696408642, iteration: 348353
loss: 1.0167993307113647,grad_norm: 0.859615157548632, iteration: 348354
loss: 1.0068645477294922,grad_norm: 0.9027548339603149, iteration: 348355
loss: 1.0230505466461182,grad_norm: 0.8654069636750035, iteration: 348356
loss: 0.979407548904419,grad_norm: 0.8476371588792229, iteration: 348357
loss: 0.9972511529922485,grad_norm: 0.6704961874429907, iteration: 348358
loss: 1.013352632522583,grad_norm: 0.9327660601314461, iteration: 348359
loss: 1.0011062622070312,grad_norm: 0.7770523159750536, iteration: 348360
loss: 1.0092180967330933,grad_norm: 0.7921170337548878, iteration: 348361
loss: 0.9837866425514221,grad_norm: 0.7974416654515084, iteration: 348362
loss: 0.9959378838539124,grad_norm: 0.999999009746836, iteration: 348363
loss: 0.9547325968742371,grad_norm: 0.8793567607333174, iteration: 348364
loss: 0.9858996272087097,grad_norm: 0.886496978003394, iteration: 348365
loss: 1.002692699432373,grad_norm: 0.7533265556229571, iteration: 348366
loss: 0.9599978923797607,grad_norm: 0.9999990707437316, iteration: 348367
loss: 1.0046930313110352,grad_norm: 0.8118746174899363, iteration: 348368
loss: 1.0448311567306519,grad_norm: 0.8080145688439934, iteration: 348369
loss: 1.0146510601043701,grad_norm: 0.8984359991644166, iteration: 348370
loss: 0.9783506393432617,grad_norm: 0.8795246369424714, iteration: 348371
loss: 1.0356539487838745,grad_norm: 0.9999999886934064, iteration: 348372
loss: 1.005907416343689,grad_norm: 0.7050613931550694, iteration: 348373
loss: 1.0188992023468018,grad_norm: 0.7200600393250199, iteration: 348374
loss: 1.0337915420532227,grad_norm: 0.8507864732405389, iteration: 348375
loss: 1.0111783742904663,grad_norm: 0.8788122422028114, iteration: 348376
loss: 1.003739356994629,grad_norm: 0.6919481123243323, iteration: 348377
loss: 0.9737129211425781,grad_norm: 0.867022791632732, iteration: 348378
loss: 1.0250906944274902,grad_norm: 0.7276075819452351, iteration: 348379
loss: 1.0161736011505127,grad_norm: 0.9999997660716354, iteration: 348380
loss: 1.2133848667144775,grad_norm: 0.9999994525889254, iteration: 348381
loss: 1.0395747423171997,grad_norm: 0.9265011606266679, iteration: 348382
loss: 0.9939214587211609,grad_norm: 0.7726637579109027, iteration: 348383
loss: 1.0090422630310059,grad_norm: 0.7548528015684367, iteration: 348384
loss: 0.957291841506958,grad_norm: 0.6815648266487826, iteration: 348385
loss: 0.9835450649261475,grad_norm: 0.743054171282673, iteration: 348386
loss: 0.9793343544006348,grad_norm: 0.912659882193322, iteration: 348387
loss: 0.9911603331565857,grad_norm: 0.9999991161690445, iteration: 348388
loss: 0.9794945120811462,grad_norm: 0.7480003633215256, iteration: 348389
loss: 0.978959858417511,grad_norm: 0.7853418070301647, iteration: 348390
loss: 1.025051236152649,grad_norm: 0.7724854533801279, iteration: 348391
loss: 1.0048003196716309,grad_norm: 0.9463678599581702, iteration: 348392
loss: 0.9941385388374329,grad_norm: 0.958347154094545, iteration: 348393
loss: 0.9974544644355774,grad_norm: 0.8971484726744596, iteration: 348394
loss: 1.0149517059326172,grad_norm: 0.7902313039520803, iteration: 348395
loss: 1.0151373147964478,grad_norm: 0.9999989773107113, iteration: 348396
loss: 0.985619306564331,grad_norm: 0.7337713816210002, iteration: 348397
loss: 1.0212923288345337,grad_norm: 0.6326116738465908, iteration: 348398
loss: 0.9918552041053772,grad_norm: 0.7264119349922554, iteration: 348399
loss: 0.9763412475585938,grad_norm: 0.6965498212728014, iteration: 348400
loss: 0.9954748749732971,grad_norm: 0.6786043009841424, iteration: 348401
loss: 1.0257961750030518,grad_norm: 0.9999998700874733, iteration: 348402
loss: 1.0325849056243896,grad_norm: 0.997442249028527, iteration: 348403
loss: 1.0222926139831543,grad_norm: 0.8618077507124925, iteration: 348404
loss: 1.0210819244384766,grad_norm: 0.8210141558050976, iteration: 348405
loss: 0.979709267616272,grad_norm: 0.8248931608704074, iteration: 348406
loss: 1.0082286596298218,grad_norm: 0.7419022790271494, iteration: 348407
loss: 0.9739052653312683,grad_norm: 0.8413491177708635, iteration: 348408
loss: 1.0597162246704102,grad_norm: 0.999999325000122, iteration: 348409
loss: 0.9955328106880188,grad_norm: 0.8257079744255493, iteration: 348410
loss: 0.9937757253646851,grad_norm: 0.7370838574683047, iteration: 348411
loss: 0.9845552444458008,grad_norm: 0.8317900780263362, iteration: 348412
loss: 0.9929537177085876,grad_norm: 0.8831689833703044, iteration: 348413
loss: 1.0452196598052979,grad_norm: 0.7095793068037837, iteration: 348414
loss: 0.9970006346702576,grad_norm: 0.9071761854854535, iteration: 348415
loss: 1.1099625825881958,grad_norm: 0.9892250045366987, iteration: 348416
loss: 1.014054775238037,grad_norm: 0.8138828610064988, iteration: 348417
loss: 1.009129285812378,grad_norm: 0.8726186298001574, iteration: 348418
loss: 0.965971052646637,grad_norm: 0.8055896980477107, iteration: 348419
loss: 0.9893601536750793,grad_norm: 0.9859371978825119, iteration: 348420
loss: 0.9895168542861938,grad_norm: 0.8616734203707017, iteration: 348421
loss: 1.031620740890503,grad_norm: 0.9518290638795929, iteration: 348422
loss: 0.9957108497619629,grad_norm: 0.8372898075595694, iteration: 348423
loss: 1.005247712135315,grad_norm: 0.7474898276681367, iteration: 348424
loss: 1.0932683944702148,grad_norm: 0.7567266252610361, iteration: 348425
loss: 1.0184606313705444,grad_norm: 0.9999991928213284, iteration: 348426
loss: 0.9886141419410706,grad_norm: 0.8833386404436947, iteration: 348427
loss: 1.003582239151001,grad_norm: 0.7307607616791574, iteration: 348428
loss: 1.0143386125564575,grad_norm: 0.9118637027485932, iteration: 348429
loss: 0.9764581322669983,grad_norm: 0.6637898964871594, iteration: 348430
loss: 1.0262749195098877,grad_norm: 0.7065884097246318, iteration: 348431
loss: 0.9693671464920044,grad_norm: 0.9015942136123063, iteration: 348432
loss: 1.0248640775680542,grad_norm: 0.8147888288072743, iteration: 348433
loss: 1.012152910232544,grad_norm: 0.9999991840054752, iteration: 348434
loss: 1.010330080986023,grad_norm: 0.9140851162950268, iteration: 348435
loss: 0.987304151058197,grad_norm: 0.7209125829019988, iteration: 348436
loss: 1.031731367111206,grad_norm: 0.7933379822655402, iteration: 348437
loss: 0.9434031248092651,grad_norm: 0.8355611565910677, iteration: 348438
loss: 0.9944552779197693,grad_norm: 0.8379697485254493, iteration: 348439
loss: 0.9827616214752197,grad_norm: 0.8382430291123142, iteration: 348440
loss: 0.9839296340942383,grad_norm: 0.7620435733313095, iteration: 348441
loss: 1.1140183210372925,grad_norm: 0.9999990752528425, iteration: 348442
loss: 1.0035213232040405,grad_norm: 0.9999996181142388, iteration: 348443
loss: 0.9910934567451477,grad_norm: 0.8659127831253117, iteration: 348444
loss: 0.9955487847328186,grad_norm: 0.9326046205387774, iteration: 348445
loss: 0.9577793478965759,grad_norm: 0.9252362518238836, iteration: 348446
loss: 0.9968024492263794,grad_norm: 0.9999991544311727, iteration: 348447
loss: 1.0022354125976562,grad_norm: 0.7718090643708672, iteration: 348448
loss: 0.9659479260444641,grad_norm: 0.9999990109232754, iteration: 348449
loss: 1.0059008598327637,grad_norm: 0.7103380449493758, iteration: 348450
loss: 1.0107942819595337,grad_norm: 0.6476883293329477, iteration: 348451
loss: 0.9716223478317261,grad_norm: 0.8904666202731315, iteration: 348452
loss: 1.0139789581298828,grad_norm: 0.9999992209973313, iteration: 348453
loss: 1.0043909549713135,grad_norm: 0.9999989685043259, iteration: 348454
loss: 1.0979679822921753,grad_norm: 0.9114186156554454, iteration: 348455
loss: 1.046565055847168,grad_norm: 0.7783790373269928, iteration: 348456
loss: 1.0371840000152588,grad_norm: 0.9213814329553326, iteration: 348457
loss: 1.0370126962661743,grad_norm: 0.8970536408954455, iteration: 348458
loss: 0.9712499380111694,grad_norm: 0.7337312065516975, iteration: 348459
loss: 0.9729558229446411,grad_norm: 0.8625559556930393, iteration: 348460
loss: 0.9637749195098877,grad_norm: 0.9182076547963613, iteration: 348461
loss: 0.9568220376968384,grad_norm: 0.8081644757453548, iteration: 348462
loss: 1.053730845451355,grad_norm: 0.848746395401305, iteration: 348463
loss: 1.0015767812728882,grad_norm: 0.8338762978895424, iteration: 348464
loss: 0.984273374080658,grad_norm: 0.9999990633295458, iteration: 348465
loss: 0.984792947769165,grad_norm: 0.7572710631741499, iteration: 348466
loss: 0.9946145415306091,grad_norm: 0.9999989942418748, iteration: 348467
loss: 0.9768681526184082,grad_norm: 0.747066687769919, iteration: 348468
loss: 1.0052417516708374,grad_norm: 0.8177660012382527, iteration: 348469
loss: 1.001212477684021,grad_norm: 0.940761419109189, iteration: 348470
loss: 0.9967572689056396,grad_norm: 0.7403879142534201, iteration: 348471
loss: 1.0038424730300903,grad_norm: 0.763251646308128, iteration: 348472
loss: 1.032807469367981,grad_norm: 0.9999991572962138, iteration: 348473
loss: 0.9773327708244324,grad_norm: 0.9999990424678173, iteration: 348474
loss: 1.0012010335922241,grad_norm: 0.9258945631862879, iteration: 348475
loss: 0.9871591329574585,grad_norm: 0.7357058907884033, iteration: 348476
loss: 1.0731399059295654,grad_norm: 0.8391137828581368, iteration: 348477
loss: 0.9860977530479431,grad_norm: 0.7870263610189981, iteration: 348478
loss: 0.9778328537940979,grad_norm: 0.7821851716104189, iteration: 348479
loss: 1.0367634296417236,grad_norm: 0.8270063283007083, iteration: 348480
loss: 1.0028951168060303,grad_norm: 0.8031041327909147, iteration: 348481
loss: 1.0112653970718384,grad_norm: 0.7450914727759147, iteration: 348482
loss: 0.9856956005096436,grad_norm: 0.9398536862551542, iteration: 348483
loss: 1.0095460414886475,grad_norm: 0.9033912021752044, iteration: 348484
loss: 1.0188727378845215,grad_norm: 0.7928870722015589, iteration: 348485
loss: 1.0020694732666016,grad_norm: 0.7532725613613801, iteration: 348486
loss: 1.0238326787948608,grad_norm: 0.6709061312348505, iteration: 348487
loss: 0.98140949010849,grad_norm: 0.782005809208387, iteration: 348488
loss: 1.014780879020691,grad_norm: 0.9092289729612772, iteration: 348489
loss: 0.9555858969688416,grad_norm: 0.8539022637715721, iteration: 348490
loss: 0.9639712572097778,grad_norm: 0.7773900645088834, iteration: 348491
loss: 1.0442689657211304,grad_norm: 0.8754331040634744, iteration: 348492
loss: 0.9921306371688843,grad_norm: 0.9999995027816704, iteration: 348493
loss: 1.0152109861373901,grad_norm: 0.9999990650832496, iteration: 348494
loss: 1.1070793867111206,grad_norm: 0.9999991353041233, iteration: 348495
loss: 1.0204856395721436,grad_norm: 0.8504783218059018, iteration: 348496
loss: 0.9933232069015503,grad_norm: 0.8808675001834244, iteration: 348497
loss: 1.0079550743103027,grad_norm: 0.9343150689972696, iteration: 348498
loss: 0.9887372255325317,grad_norm: 0.7354093097040204, iteration: 348499
loss: 0.9813676476478577,grad_norm: 0.7502106505937371, iteration: 348500
loss: 1.0231741666793823,grad_norm: 0.9150938820537529, iteration: 348501
loss: 1.0258750915527344,grad_norm: 0.6389007442777822, iteration: 348502
loss: 0.9749379754066467,grad_norm: 0.8277148119315318, iteration: 348503
loss: 0.9337759017944336,grad_norm: 0.7724178020207162, iteration: 348504
loss: 0.9787322282791138,grad_norm: 0.9746092432685618, iteration: 348505
loss: 0.9878039956092834,grad_norm: 0.8134661503260496, iteration: 348506
loss: 0.976818323135376,grad_norm: 0.8412139988983869, iteration: 348507
loss: 1.0044370889663696,grad_norm: 0.7064708767370007, iteration: 348508
loss: 1.0227396488189697,grad_norm: 0.9269552915119986, iteration: 348509
loss: 1.0147920846939087,grad_norm: 0.921597494555035, iteration: 348510
loss: 0.9899099469184875,grad_norm: 0.7464847573926369, iteration: 348511
loss: 1.0151969194412231,grad_norm: 0.8418104420163253, iteration: 348512
loss: 1.018291711807251,grad_norm: 0.9390474800472239, iteration: 348513
loss: 1.0103336572647095,grad_norm: 0.99294094645238, iteration: 348514
loss: 0.9829906821250916,grad_norm: 0.6309431863866454, iteration: 348515
loss: 1.0067368745803833,grad_norm: 0.7942437402030625, iteration: 348516
loss: 1.0320301055908203,grad_norm: 0.8290154728925464, iteration: 348517
loss: 0.9865062236785889,grad_norm: 0.8070717321178762, iteration: 348518
loss: 1.0116957426071167,grad_norm: 0.7595088953319269, iteration: 348519
loss: 0.9861597418785095,grad_norm: 0.8366246909226809, iteration: 348520
loss: 0.9929313063621521,grad_norm: 0.8628661103523542, iteration: 348521
loss: 0.9752153754234314,grad_norm: 0.8157339662400386, iteration: 348522
loss: 0.9922856092453003,grad_norm: 0.9229959291935117, iteration: 348523
loss: 1.0016858577728271,grad_norm: 0.999999235105714, iteration: 348524
loss: 1.0585756301879883,grad_norm: 0.7751109519361156, iteration: 348525
loss: 0.9702917337417603,grad_norm: 0.9999990953912299, iteration: 348526
loss: 0.9660646319389343,grad_norm: 0.7982414345827351, iteration: 348527
loss: 0.9864512085914612,grad_norm: 0.8681581921824922, iteration: 348528
loss: 1.0113290548324585,grad_norm: 0.7565083504211884, iteration: 348529
loss: 0.9841768145561218,grad_norm: 0.9999994067810688, iteration: 348530
loss: 0.9994426369667053,grad_norm: 0.7797457490781567, iteration: 348531
loss: 0.9677644371986389,grad_norm: 0.7586729071885353, iteration: 348532
loss: 0.9734335541725159,grad_norm: 0.7540741076406141, iteration: 348533
loss: 0.9890742301940918,grad_norm: 0.8272955946041449, iteration: 348534
loss: 0.9997554421424866,grad_norm: 0.8810406131869536, iteration: 348535
loss: 0.9914463758468628,grad_norm: 0.7867996592518636, iteration: 348536
loss: 0.9628732800483704,grad_norm: 0.7789911866520058, iteration: 348537
loss: 1.057394027709961,grad_norm: 0.9999993856214257, iteration: 348538
loss: 1.0451545715332031,grad_norm: 0.7118592062610901, iteration: 348539
loss: 0.9956991672515869,grad_norm: 0.7881676245167011, iteration: 348540
loss: 1.027530550956726,grad_norm: 0.805699202648628, iteration: 348541
loss: 1.014863133430481,grad_norm: 0.8294999668468067, iteration: 348542
loss: 0.9956651926040649,grad_norm: 0.7208029600805475, iteration: 348543
loss: 0.9560303688049316,grad_norm: 0.8065420262992506, iteration: 348544
loss: 0.9990342259407043,grad_norm: 0.956443968822047, iteration: 348545
loss: 1.0353130102157593,grad_norm: 0.7044585433113132, iteration: 348546
loss: 0.9627906680107117,grad_norm: 0.7672440362982726, iteration: 348547
loss: 0.9838098883628845,grad_norm: 0.7497074484686814, iteration: 348548
loss: 1.000553846359253,grad_norm: 0.9732142996964561, iteration: 348549
loss: 1.0322974920272827,grad_norm: 0.80627906097442, iteration: 348550
loss: 1.0047266483306885,grad_norm: 0.7095893741222622, iteration: 348551
loss: 0.9878329634666443,grad_norm: 0.9527257355698493, iteration: 348552
loss: 0.9830334782600403,grad_norm: 0.9171663107491467, iteration: 348553
loss: 1.0156503915786743,grad_norm: 0.7923053949343373, iteration: 348554
loss: 0.9918394088745117,grad_norm: 0.7297335406214893, iteration: 348555
loss: 0.9997771382331848,grad_norm: 0.7615812056293364, iteration: 348556
loss: 0.9998522996902466,grad_norm: 0.6971604549098934, iteration: 348557
loss: 1.0060746669769287,grad_norm: 0.9999992597124925, iteration: 348558
loss: 1.016007900238037,grad_norm: 0.7680194879197677, iteration: 348559
loss: 0.9836974740028381,grad_norm: 0.8660611994051676, iteration: 348560
loss: 1.0169408321380615,grad_norm: 0.9999991389550562, iteration: 348561
loss: 1.0074950456619263,grad_norm: 0.9346761223186951, iteration: 348562
loss: 0.9787248373031616,grad_norm: 0.7932865493834116, iteration: 348563
loss: 0.9862216114997864,grad_norm: 0.838550789557057, iteration: 348564
loss: 1.0330909490585327,grad_norm: 0.917986875908352, iteration: 348565
loss: 1.0529955625534058,grad_norm: 0.7137130092669702, iteration: 348566
loss: 1.0338555574417114,grad_norm: 0.7812151117198919, iteration: 348567
loss: 1.035773754119873,grad_norm: 0.9999990458813534, iteration: 348568
loss: 0.9813276529312134,grad_norm: 0.9297022835207713, iteration: 348569
loss: 0.9881768226623535,grad_norm: 0.9944748233280248, iteration: 348570
loss: 1.0253413915634155,grad_norm: 0.7267939285861551, iteration: 348571
loss: 0.97194504737854,grad_norm: 0.9011450235817097, iteration: 348572
loss: 0.9526268243789673,grad_norm: 0.9113690686980076, iteration: 348573
loss: 0.9998315572738647,grad_norm: 0.9999991055883476, iteration: 348574
loss: 0.9987176656723022,grad_norm: 0.7710837299579713, iteration: 348575
loss: 0.9988394975662231,grad_norm: 0.874702828285799, iteration: 348576
loss: 0.9895815253257751,grad_norm: 0.7433625120331472, iteration: 348577
loss: 1.0114104747772217,grad_norm: 0.9999997960146615, iteration: 348578
loss: 1.0171184539794922,grad_norm: 0.8137018790758774, iteration: 348579
loss: 0.9828304052352905,grad_norm: 0.9999996011200855, iteration: 348580
loss: 1.0098540782928467,grad_norm: 0.9760118463391606, iteration: 348581
loss: 0.977167010307312,grad_norm: 0.8446761956154792, iteration: 348582
loss: 0.9892764091491699,grad_norm: 0.7975252778679633, iteration: 348583
loss: 0.9466779828071594,grad_norm: 0.9263705774832458, iteration: 348584
loss: 0.9634414315223694,grad_norm: 0.8218709665458723, iteration: 348585
loss: 0.9700983762741089,grad_norm: 0.7523571700901491, iteration: 348586
loss: 1.0106277465820312,grad_norm: 0.9448206247449266, iteration: 348587
loss: 1.0348920822143555,grad_norm: 0.8297350340599422, iteration: 348588
loss: 0.9575490355491638,grad_norm: 0.8439087778215933, iteration: 348589
loss: 1.0239571332931519,grad_norm: 0.8121576392271396, iteration: 348590
loss: 1.0345239639282227,grad_norm: 0.9999993970646417, iteration: 348591
loss: 1.0066481828689575,grad_norm: 0.7115279380762745, iteration: 348592
loss: 1.0030869245529175,grad_norm: 0.7672121640534697, iteration: 348593
loss: 1.2271125316619873,grad_norm: 0.9999997121761623, iteration: 348594
loss: 1.024600625038147,grad_norm: 0.794148052051173, iteration: 348595
loss: 0.9807544946670532,grad_norm: 0.7373230873494492, iteration: 348596
loss: 1.0154166221618652,grad_norm: 0.7695283642294122, iteration: 348597
loss: 1.000055193901062,grad_norm: 0.9066511337930776, iteration: 348598
loss: 0.9871156215667725,grad_norm: 0.968068688526833, iteration: 348599
loss: 0.9884489178657532,grad_norm: 0.7727227795519083, iteration: 348600
loss: 1.0485554933547974,grad_norm: 1.0000000129241655, iteration: 348601
loss: 1.0047608613967896,grad_norm: 0.9137497344836438, iteration: 348602
loss: 0.9926374554634094,grad_norm: 0.8127707932308446, iteration: 348603
loss: 1.0115176439285278,grad_norm: 0.7932252325310603, iteration: 348604
loss: 0.996457040309906,grad_norm: 0.8292999946296437, iteration: 348605
loss: 0.9983304142951965,grad_norm: 0.7992773433970433, iteration: 348606
loss: 0.9871966242790222,grad_norm: 0.9069715041441595, iteration: 348607
loss: 1.0254034996032715,grad_norm: 0.7167662798328762, iteration: 348608
loss: 1.0127533674240112,grad_norm: 0.6908752133882226, iteration: 348609
loss: 0.9774551391601562,grad_norm: 0.6786891705269115, iteration: 348610
loss: 0.9979507327079773,grad_norm: 0.7542612466902141, iteration: 348611
loss: 1.0183992385864258,grad_norm: 0.8664035069795233, iteration: 348612
loss: 0.9796879887580872,grad_norm: 0.8893170969919246, iteration: 348613
loss: 0.9275244474411011,grad_norm: 0.7301975508287414, iteration: 348614
loss: 0.9862068891525269,grad_norm: 0.9098621976739453, iteration: 348615
loss: 0.998113214969635,grad_norm: 0.8782637642918446, iteration: 348616
loss: 1.0046381950378418,grad_norm: 0.8061134330289934, iteration: 348617
loss: 1.0004414319992065,grad_norm: 0.7511742209656522, iteration: 348618
loss: 1.024391770362854,grad_norm: 0.83053991000298, iteration: 348619
loss: 1.017770767211914,grad_norm: 0.7860374131155915, iteration: 348620
loss: 1.0108040571212769,grad_norm: 0.8448999331197654, iteration: 348621
loss: 0.963712751865387,grad_norm: 0.9999994206828371, iteration: 348622
loss: 0.9683094024658203,grad_norm: 0.7461423511455754, iteration: 348623
loss: 0.9838870167732239,grad_norm: 0.7331472788909524, iteration: 348624
loss: 1.0105915069580078,grad_norm: 0.9999992547397996, iteration: 348625
loss: 0.9918773770332336,grad_norm: 0.9999993977949301, iteration: 348626
loss: 1.0258272886276245,grad_norm: 0.8469988128590235, iteration: 348627
loss: 0.980955183506012,grad_norm: 0.8715223664018171, iteration: 348628
loss: 1.0035998821258545,grad_norm: 0.9999992136837257, iteration: 348629
loss: 1.0225414037704468,grad_norm: 0.872119542044209, iteration: 348630
loss: 1.0226242542266846,grad_norm: 0.9151890228501922, iteration: 348631
loss: 1.0627025365829468,grad_norm: 0.7162676055338902, iteration: 348632
loss: 0.9665448665618896,grad_norm: 0.8403297212878513, iteration: 348633
loss: 1.0148403644561768,grad_norm: 0.9838390287727605, iteration: 348634
loss: 1.045933723449707,grad_norm: 0.9999998105832815, iteration: 348635
loss: 0.9820249676704407,grad_norm: 0.7823462414416327, iteration: 348636
loss: 0.9860509037971497,grad_norm: 0.76435487448775, iteration: 348637
loss: 0.9916005730628967,grad_norm: 0.714923332922859, iteration: 348638
loss: 1.0010888576507568,grad_norm: 0.8432493906499761, iteration: 348639
loss: 1.003908634185791,grad_norm: 0.9163575203462825, iteration: 348640
loss: 1.0005640983581543,grad_norm: 0.8458308710968944, iteration: 348641
loss: 0.984258770942688,grad_norm: 0.806119999254798, iteration: 348642
loss: 1.0224961042404175,grad_norm: 0.9276716885602371, iteration: 348643
loss: 0.9968969821929932,grad_norm: 0.8644369315482576, iteration: 348644
loss: 1.0678515434265137,grad_norm: 0.897765222858672, iteration: 348645
loss: 0.992692768573761,grad_norm: 0.682738388928764, iteration: 348646
loss: 0.9724637269973755,grad_norm: 0.8276604077899279, iteration: 348647
loss: 1.0010969638824463,grad_norm: 0.7517034854684578, iteration: 348648
loss: 0.9933955073356628,grad_norm: 0.7064998536917548, iteration: 348649
loss: 1.0177736282348633,grad_norm: 0.7827766351401293, iteration: 348650
loss: 1.0244224071502686,grad_norm: 0.9987233008601459, iteration: 348651
loss: 1.0042834281921387,grad_norm: 0.8525488074150109, iteration: 348652
loss: 1.0264263153076172,grad_norm: 0.6221538901997177, iteration: 348653
loss: 1.0311912298202515,grad_norm: 0.7830115746080439, iteration: 348654
loss: 0.966286838054657,grad_norm: 0.7541842150074853, iteration: 348655
loss: 1.0066336393356323,grad_norm: 0.8838670163256873, iteration: 348656
loss: 1.0078269243240356,grad_norm: 0.795448741229868, iteration: 348657
loss: 1.0824984312057495,grad_norm: 0.9999990382906834, iteration: 348658
loss: 1.0328720808029175,grad_norm: 0.8182121864376558, iteration: 348659
loss: 0.9898750185966492,grad_norm: 0.86899602554187, iteration: 348660
loss: 1.016882300376892,grad_norm: 0.7426668148796407, iteration: 348661
loss: 1.0143760442733765,grad_norm: 0.8779595136512974, iteration: 348662
loss: 0.9997288584709167,grad_norm: 0.9528515094501453, iteration: 348663
loss: 0.9729931354522705,grad_norm: 0.999999222646041, iteration: 348664
loss: 1.0861459970474243,grad_norm: 0.9999991787684223, iteration: 348665
loss: 0.9981475472450256,grad_norm: 0.8780999570261167, iteration: 348666
loss: 0.9885243773460388,grad_norm: 0.8624875530986217, iteration: 348667
loss: 0.9788600206375122,grad_norm: 0.7557753200173467, iteration: 348668
loss: 0.9808590412139893,grad_norm: 0.8376628627072271, iteration: 348669
loss: 0.9841960072517395,grad_norm: 0.789333086005659, iteration: 348670
loss: 1.034710168838501,grad_norm: 0.9999989974012328, iteration: 348671
loss: 1.0001142024993896,grad_norm: 0.9134417365903726, iteration: 348672
loss: 1.0189615488052368,grad_norm: 0.7442968223839213, iteration: 348673
loss: 1.001044511795044,grad_norm: 0.9999991661020785, iteration: 348674
loss: 1.0444732904434204,grad_norm: 0.7148333273972177, iteration: 348675
loss: 1.033286452293396,grad_norm: 0.7552509385260281, iteration: 348676
loss: 0.9768267869949341,grad_norm: 0.8182726953031036, iteration: 348677
loss: 0.9798595309257507,grad_norm: 0.8533314145432697, iteration: 348678
loss: 0.9567211866378784,grad_norm: 0.9168508275390567, iteration: 348679
loss: 0.9955892562866211,grad_norm: 0.9416350688745915, iteration: 348680
loss: 0.9996415376663208,grad_norm: 0.9999990187334558, iteration: 348681
loss: 1.0086060762405396,grad_norm: 0.7458697494228851, iteration: 348682
loss: 1.0167932510375977,grad_norm: 0.7462104455376841, iteration: 348683
loss: 0.9632991552352905,grad_norm: 0.7503043169784164, iteration: 348684
loss: 0.9989045262336731,grad_norm: 0.6882781022068467, iteration: 348685
loss: 1.0829432010650635,grad_norm: 0.7934992022134839, iteration: 348686
loss: 0.9923039674758911,grad_norm: 0.7719897254349108, iteration: 348687
loss: 0.9691792726516724,grad_norm: 0.7080048938238447, iteration: 348688
loss: 0.9912640452384949,grad_norm: 0.7380504658470367, iteration: 348689
loss: 1.0072262287139893,grad_norm: 0.8950779675338253, iteration: 348690
loss: 1.0288822650909424,grad_norm: 0.7850449832074048, iteration: 348691
loss: 0.9754218459129333,grad_norm: 0.7531652422866124, iteration: 348692
loss: 1.0139952898025513,grad_norm: 0.8884468804346831, iteration: 348693
loss: 0.9976376891136169,grad_norm: 0.8217597370997385, iteration: 348694
loss: 0.9985149502754211,grad_norm: 0.8822186514409205, iteration: 348695
loss: 1.0114880800247192,grad_norm: 0.9999989766627638, iteration: 348696
loss: 1.0187379121780396,grad_norm: 0.8204606059950982, iteration: 348697
loss: 1.016675591468811,grad_norm: 0.8761214111136306, iteration: 348698
loss: 1.0220739841461182,grad_norm: 0.7469599299137165, iteration: 348699
loss: 1.0199183225631714,grad_norm: 0.7330972871334428, iteration: 348700
loss: 1.0628843307495117,grad_norm: 0.999999884527232, iteration: 348701
loss: 1.0142043828964233,grad_norm: 0.9999997952187744, iteration: 348702
loss: 0.9971317052841187,grad_norm: 0.9571433581415407, iteration: 348703
loss: 0.968330442905426,grad_norm: 0.7612712825906158, iteration: 348704
loss: 0.9759570956230164,grad_norm: 0.9846379239754369, iteration: 348705
loss: 1.0137159824371338,grad_norm: 0.8274956697769577, iteration: 348706
loss: 1.0100462436676025,grad_norm: 0.7067672641307714, iteration: 348707
loss: 0.9996339678764343,grad_norm: 0.7819392523496098, iteration: 348708
loss: 0.9661855101585388,grad_norm: 0.9156229842188395, iteration: 348709
loss: 0.9788785576820374,grad_norm: 0.9267269702455306, iteration: 348710
loss: 1.0208959579467773,grad_norm: 0.9568975224756291, iteration: 348711
loss: 1.0140068531036377,grad_norm: 0.7178431541050914, iteration: 348712
loss: 1.0076956748962402,grad_norm: 0.8842909068843714, iteration: 348713
loss: 1.0110902786254883,grad_norm: 0.8299383297353594, iteration: 348714
loss: 0.9591856598854065,grad_norm: 0.8453563984881608, iteration: 348715
loss: 1.0038641691207886,grad_norm: 0.7383169716543514, iteration: 348716
loss: 1.0022329092025757,grad_norm: 0.7628603460242136, iteration: 348717
loss: 0.9783345460891724,grad_norm: 0.8959595687365275, iteration: 348718
loss: 0.977026104927063,grad_norm: 0.8290514646257197, iteration: 348719
loss: 0.9995427131652832,grad_norm: 0.8354364315819097, iteration: 348720
loss: 0.9739056825637817,grad_norm: 0.7722315792042157, iteration: 348721
loss: 1.0025725364685059,grad_norm: 0.906706528951424, iteration: 348722
loss: 1.00287663936615,grad_norm: 0.7392378400995236, iteration: 348723
loss: 1.013972282409668,grad_norm: 0.7803703326543581, iteration: 348724
loss: 0.9821822643280029,grad_norm: 0.7477505202217345, iteration: 348725
loss: 1.0020043849945068,grad_norm: 0.8093772249909634, iteration: 348726
loss: 0.998636782169342,grad_norm: 0.7298566939103062, iteration: 348727
loss: 0.9763554334640503,grad_norm: 0.7346437802226219, iteration: 348728
loss: 0.991844654083252,grad_norm: 0.742020849587875, iteration: 348729
loss: 1.0356903076171875,grad_norm: 0.8444175476595226, iteration: 348730
loss: 1.010211706161499,grad_norm: 0.9072527158064114, iteration: 348731
loss: 0.9953629374504089,grad_norm: 0.8635568564980731, iteration: 348732
loss: 1.013454556465149,grad_norm: 0.7830860583181686, iteration: 348733
loss: 1.0295919179916382,grad_norm: 0.8124151867463836, iteration: 348734
loss: 1.0558586120605469,grad_norm: 0.8892993847383197, iteration: 348735
loss: 0.9627504348754883,grad_norm: 0.8338757996120393, iteration: 348736
loss: 0.9676032662391663,grad_norm: 0.9999991777174824, iteration: 348737
loss: 0.9826939702033997,grad_norm: 0.7521620119004828, iteration: 348738
loss: 0.9591943025588989,grad_norm: 0.8170221673277143, iteration: 348739
loss: 0.9919292330741882,grad_norm: 0.7852424692151743, iteration: 348740
loss: 0.9952494502067566,grad_norm: 0.8616606336633467, iteration: 348741
loss: 1.001597285270691,grad_norm: 0.7497000956514129, iteration: 348742
loss: 0.9654222130775452,grad_norm: 0.8510456372708508, iteration: 348743
loss: 0.984221339225769,grad_norm: 0.7251892969637519, iteration: 348744
loss: 1.0040061473846436,grad_norm: 0.7285458891235882, iteration: 348745
loss: 1.0014432668685913,grad_norm: 0.8346301062913367, iteration: 348746
loss: 1.0282119512557983,grad_norm: 0.9999991910166507, iteration: 348747
loss: 0.9551185965538025,grad_norm: 0.7810682865739138, iteration: 348748
loss: 1.0162979364395142,grad_norm: 0.9298072663557577, iteration: 348749
loss: 0.9853146076202393,grad_norm: 0.9024483330820182, iteration: 348750
loss: 0.9749060869216919,grad_norm: 0.9231967111474377, iteration: 348751
loss: 0.9913777709007263,grad_norm: 0.7811593614433022, iteration: 348752
loss: 0.9979042410850525,grad_norm: 0.7430415699332235, iteration: 348753
loss: 0.9670450687408447,grad_norm: 0.8926734749635638, iteration: 348754
loss: 0.9645363688468933,grad_norm: 0.8400059668509273, iteration: 348755
loss: 0.9524008631706238,grad_norm: 0.8337559674031682, iteration: 348756
loss: 1.0026164054870605,grad_norm: 0.8029069136279089, iteration: 348757
loss: 0.9974513053894043,grad_norm: 0.9893628569604165, iteration: 348758
loss: 0.9831368923187256,grad_norm: 0.7963534052198269, iteration: 348759
loss: 1.0197948217391968,grad_norm: 0.9077928628072872, iteration: 348760
loss: 1.0070226192474365,grad_norm: 0.8249227482665055, iteration: 348761
loss: 0.9934743642807007,grad_norm: 0.7691088482475447, iteration: 348762
loss: 0.9973700046539307,grad_norm: 0.7817450714368349, iteration: 348763
loss: 0.9845640659332275,grad_norm: 0.7520778406132624, iteration: 348764
loss: 0.9769757390022278,grad_norm: 0.7594827389161763, iteration: 348765
loss: 1.0189666748046875,grad_norm: 0.9999995529355994, iteration: 348766
loss: 1.033859372138977,grad_norm: 0.9443587957412507, iteration: 348767
loss: 0.9853523373603821,grad_norm: 0.7056607555208239, iteration: 348768
loss: 1.0118154287338257,grad_norm: 0.667190059650629, iteration: 348769
loss: 1.0032336711883545,grad_norm: 0.796060484247881, iteration: 348770
loss: 1.0082602500915527,grad_norm: 0.7824024768001392, iteration: 348771
loss: 0.9827106595039368,grad_norm: 0.8890429815551429, iteration: 348772
loss: 0.9991674423217773,grad_norm: 0.7923120750036411, iteration: 348773
loss: 0.9820032119750977,grad_norm: 0.7581220333293915, iteration: 348774
loss: 0.9462170600891113,grad_norm: 0.9096248607142128, iteration: 348775
loss: 1.0008364915847778,grad_norm: 0.7048497943226465, iteration: 348776
loss: 0.9660073518753052,grad_norm: 0.8393889809795186, iteration: 348777
loss: 1.0127332210540771,grad_norm: 0.7427667410575578, iteration: 348778
loss: 1.0168578624725342,grad_norm: 0.9999999789801042, iteration: 348779
loss: 0.9543014764785767,grad_norm: 0.7552833640187745, iteration: 348780
loss: 1.0265699625015259,grad_norm: 0.7594307720367699, iteration: 348781
loss: 1.0059417486190796,grad_norm: 0.852204667050975, iteration: 348782
loss: 1.025826334953308,grad_norm: 0.7960682159338573, iteration: 348783
loss: 0.9871141314506531,grad_norm: 0.7644973335267201, iteration: 348784
loss: 0.9984586834907532,grad_norm: 0.711218256825177, iteration: 348785
loss: 0.9911580681800842,grad_norm: 0.8882127159394665, iteration: 348786
loss: 1.0491485595703125,grad_norm: 0.8900243321632502, iteration: 348787
loss: 0.9879282116889954,grad_norm: 0.8215522112752947, iteration: 348788
loss: 0.997349202632904,grad_norm: 0.7627506924076249, iteration: 348789
loss: 0.9855349659919739,grad_norm: 0.6886113168316906, iteration: 348790
loss: 0.9895327687263489,grad_norm: 0.9999989701510373, iteration: 348791
loss: 1.023301362991333,grad_norm: 0.942443537114919, iteration: 348792
loss: 1.0148427486419678,grad_norm: 0.6514807443400323, iteration: 348793
loss: 1.0036510229110718,grad_norm: 0.69063650694242, iteration: 348794
loss: 0.9911696314811707,grad_norm: 0.8222846600952025, iteration: 348795
loss: 1.0363675355911255,grad_norm: 0.7845819143534408, iteration: 348796
loss: 0.9881054162979126,grad_norm: 0.9368452472458402, iteration: 348797
loss: 0.9992397427558899,grad_norm: 0.8622057216266217, iteration: 348798
loss: 1.0140475034713745,grad_norm: 0.754637226398523, iteration: 348799
loss: 1.0192475318908691,grad_norm: 0.8509432301554559, iteration: 348800
loss: 0.9707204103469849,grad_norm: 0.8065474294079994, iteration: 348801
loss: 1.004635214805603,grad_norm: 0.8300105856713598, iteration: 348802
loss: 0.9922153949737549,grad_norm: 0.9312408858764186, iteration: 348803
loss: 0.9835987687110901,grad_norm: 0.91137256545257, iteration: 348804
loss: 0.9901362657546997,grad_norm: 0.9260157952870403, iteration: 348805
loss: 0.9921800494194031,grad_norm: 0.9999992037058967, iteration: 348806
loss: 0.9699105024337769,grad_norm: 0.8026489075644097, iteration: 348807
loss: 1.0484118461608887,grad_norm: 0.9999991536290075, iteration: 348808
loss: 0.9819680452346802,grad_norm: 0.9999989523353771, iteration: 348809
loss: 1.0374064445495605,grad_norm: 0.7692493208821456, iteration: 348810
loss: 1.0191043615341187,grad_norm: 0.8782813398119538, iteration: 348811
loss: 1.031795859336853,grad_norm: 0.804854368715733, iteration: 348812
loss: 0.9901579022407532,grad_norm: 0.768944605966132, iteration: 348813
loss: 0.9824196100234985,grad_norm: 0.8872294451819351, iteration: 348814
loss: 0.9953754544258118,grad_norm: 0.7446324141680289, iteration: 348815
loss: 0.9963425397872925,grad_norm: 0.7578263113246209, iteration: 348816
loss: 1.0151755809783936,grad_norm: 0.6433118827852169, iteration: 348817
loss: 1.0086661577224731,grad_norm: 0.7824406593592388, iteration: 348818
loss: 1.0284596681594849,grad_norm: 0.858221784597633, iteration: 348819
loss: 0.9738755822181702,grad_norm: 0.9168639751817182, iteration: 348820
loss: 1.0039490461349487,grad_norm: 0.8484726177942137, iteration: 348821
loss: 1.008723258972168,grad_norm: 0.695913377164879, iteration: 348822
loss: 1.059411644935608,grad_norm: 0.9999996837609804, iteration: 348823
loss: 1.0321240425109863,grad_norm: 0.727558873409836, iteration: 348824
loss: 0.988381564617157,grad_norm: 0.8055763742819193, iteration: 348825
loss: 0.9915786385536194,grad_norm: 0.7950324494716258, iteration: 348826
loss: 1.0460174083709717,grad_norm: 0.9239161985112456, iteration: 348827
loss: 1.0009156465530396,grad_norm: 0.7885879061358946, iteration: 348828
loss: 0.9749025702476501,grad_norm: 0.792270828836941, iteration: 348829
loss: 1.0175914764404297,grad_norm: 0.8127788206112138, iteration: 348830
loss: 1.0305123329162598,grad_norm: 0.7855954962824881, iteration: 348831
loss: 0.9927176833152771,grad_norm: 0.79712293951602, iteration: 348832
loss: 1.0382053852081299,grad_norm: 0.7278355080715371, iteration: 348833
loss: 0.961717426776886,grad_norm: 0.7382184157718927, iteration: 348834
loss: 1.125473141670227,grad_norm: 0.8672984784637269, iteration: 348835
loss: 0.9787941575050354,grad_norm: 0.9032611739346301, iteration: 348836
loss: 0.9839501976966858,grad_norm: 0.7663670808326785, iteration: 348837
loss: 1.024929404258728,grad_norm: 0.7264659551775041, iteration: 348838
loss: 1.0212615728378296,grad_norm: 0.7562642644879222, iteration: 348839
loss: 0.9893513321876526,grad_norm: 0.8615044915056022, iteration: 348840
loss: 1.0054371356964111,grad_norm: 0.7423764103298772, iteration: 348841
loss: 1.016217589378357,grad_norm: 0.8684569052236918, iteration: 348842
loss: 0.9943653345108032,grad_norm: 0.7575016851631139, iteration: 348843
loss: 1.007421851158142,grad_norm: 0.83274517035901, iteration: 348844
loss: 1.005475640296936,grad_norm: 0.910222379031253, iteration: 348845
loss: 1.0184876918792725,grad_norm: 0.9999996986630423, iteration: 348846
loss: 0.9753966331481934,grad_norm: 0.8286471798194289, iteration: 348847
loss: 1.183774709701538,grad_norm: 0.9999999256252342, iteration: 348848
loss: 1.0271693468093872,grad_norm: 0.9273306661263745, iteration: 348849
loss: 0.9997380375862122,grad_norm: 0.7301991674166723, iteration: 348850
loss: 0.9874858260154724,grad_norm: 0.7044873136548778, iteration: 348851
loss: 1.0184298753738403,grad_norm: 0.9999991380927449, iteration: 348852
loss: 1.0075112581253052,grad_norm: 0.9100530484063376, iteration: 348853
loss: 0.98906010389328,grad_norm: 0.8901230156775223, iteration: 348854
loss: 1.0178413391113281,grad_norm: 0.6919517222803904, iteration: 348855
loss: 0.9912035465240479,grad_norm: 0.812657213973956, iteration: 348856
loss: 1.015250563621521,grad_norm: 0.759689307193151, iteration: 348857
loss: 1.0133092403411865,grad_norm: 0.9222464630279098, iteration: 348858
loss: 0.9923844933509827,grad_norm: 0.6475013320017143, iteration: 348859
loss: 1.036659598350525,grad_norm: 0.8629979269811102, iteration: 348860
loss: 0.9768084287643433,grad_norm: 0.7834917885816266, iteration: 348861
loss: 0.9646927118301392,grad_norm: 0.9965180398154033, iteration: 348862
loss: 0.9966035485267639,grad_norm: 0.7669960239998019, iteration: 348863
loss: 1.0077979564666748,grad_norm: 0.9273873067874151, iteration: 348864
loss: 0.9593386054039001,grad_norm: 0.797466845700966, iteration: 348865
loss: 1.0067239999771118,grad_norm: 0.9999991910807879, iteration: 348866
loss: 0.9747019410133362,grad_norm: 0.8137406203473981, iteration: 348867
loss: 1.038164734840393,grad_norm: 0.7655422431014781, iteration: 348868
loss: 0.9966580867767334,grad_norm: 0.8570861511537098, iteration: 348869
loss: 1.0450278520584106,grad_norm: 0.7469366145512569, iteration: 348870
loss: 1.0082683563232422,grad_norm: 0.8445762658468225, iteration: 348871
loss: 0.9946101307868958,grad_norm: 0.9026219391159425, iteration: 348872
loss: 0.9843577742576599,grad_norm: 0.8518009035896376, iteration: 348873
loss: 1.006033182144165,grad_norm: 0.7821482647841163, iteration: 348874
loss: 1.0312007665634155,grad_norm: 0.9565645972776766, iteration: 348875
loss: 1.0410058498382568,grad_norm: 0.9003265199374708, iteration: 348876
loss: 1.00861394405365,grad_norm: 0.8313866757275811, iteration: 348877
loss: 1.0050610303878784,grad_norm: 0.8924873942000484, iteration: 348878
loss: 0.9957394003868103,grad_norm: 0.8121344566194678, iteration: 348879
loss: 1.0195457935333252,grad_norm: 0.7633021249539341, iteration: 348880
loss: 1.0090144872665405,grad_norm: 0.9702389463849587, iteration: 348881
loss: 1.020365595817566,grad_norm: 0.8394526297154126, iteration: 348882
loss: 1.0099490880966187,grad_norm: 0.8871625811133631, iteration: 348883
loss: 1.0232290029525757,grad_norm: 0.9338244909479333, iteration: 348884
loss: 0.9812009334564209,grad_norm: 0.8454637740896228, iteration: 348885
loss: 1.0140477418899536,grad_norm: 0.9717011193402252, iteration: 348886
loss: 1.0032275915145874,grad_norm: 0.7798485482079632, iteration: 348887
loss: 0.960166335105896,grad_norm: 0.8533882389883608, iteration: 348888
loss: 0.9880396723747253,grad_norm: 0.7555639796128927, iteration: 348889
loss: 1.012112021446228,grad_norm: 0.8494976054018174, iteration: 348890
loss: 0.9892730116844177,grad_norm: 0.7669528275349188, iteration: 348891
loss: 0.9862357378005981,grad_norm: 0.7892593718322739, iteration: 348892
loss: 0.999819815158844,grad_norm: 0.9336113886145351, iteration: 348893
loss: 0.9630815386772156,grad_norm: 0.7896181998862221, iteration: 348894
loss: 1.0371946096420288,grad_norm: 0.9496773493575511, iteration: 348895
loss: 0.9719433188438416,grad_norm: 0.7645605788651062, iteration: 348896
loss: 1.0256932973861694,grad_norm: 0.9235985784394549, iteration: 348897
loss: 1.0082387924194336,grad_norm: 0.800301135148758, iteration: 348898
loss: 1.1209644079208374,grad_norm: 0.999999426669638, iteration: 348899
loss: 1.015328288078308,grad_norm: 0.7854085935768358, iteration: 348900
loss: 0.9969784617424011,grad_norm: 0.825046700596917, iteration: 348901
loss: 0.9764105081558228,grad_norm: 0.8599137917721427, iteration: 348902
loss: 0.9710819125175476,grad_norm: 0.9832377413992728, iteration: 348903
loss: 1.0153992176055908,grad_norm: 0.6642424907813296, iteration: 348904
loss: 0.9986565709114075,grad_norm: 0.9639114311306705, iteration: 348905
loss: 0.998262345790863,grad_norm: 0.708040461554569, iteration: 348906
loss: 1.0385230779647827,grad_norm: 0.9999991335220434, iteration: 348907
loss: 1.0050122737884521,grad_norm: 0.9999999517250137, iteration: 348908
loss: 0.9842161536216736,grad_norm: 0.7901705866389702, iteration: 348909
loss: 1.0030897855758667,grad_norm: 0.9999991461881945, iteration: 348910
loss: 1.0759217739105225,grad_norm: 0.9340353125075809, iteration: 348911
loss: 0.9884733557701111,grad_norm: 0.9060760948987028, iteration: 348912
loss: 0.9915351271629333,grad_norm: 0.8784962194170962, iteration: 348913
loss: 0.9835748076438904,grad_norm: 0.8326614644145742, iteration: 348914
loss: 1.0109426975250244,grad_norm: 0.8261388681107306, iteration: 348915
loss: 1.0061209201812744,grad_norm: 0.7824816611485303, iteration: 348916
loss: 0.979343831539154,grad_norm: 0.968529447556678, iteration: 348917
loss: 0.9970716834068298,grad_norm: 0.9675202749592161, iteration: 348918
loss: 1.0168946981430054,grad_norm: 0.9999992097856918, iteration: 348919
loss: 0.9953650236129761,grad_norm: 0.7307984352447018, iteration: 348920
loss: 0.9998800158500671,grad_norm: 0.9271429834265253, iteration: 348921
loss: 1.0128397941589355,grad_norm: 0.8138863437303017, iteration: 348922
loss: 1.0848309993743896,grad_norm: 0.9999998507494336, iteration: 348923
loss: 0.9897658824920654,grad_norm: 0.9005445194209506, iteration: 348924
loss: 1.0330239534378052,grad_norm: 0.9919249357880946, iteration: 348925
loss: 0.9834607839584351,grad_norm: 0.9989506605845655, iteration: 348926
loss: 0.9982616305351257,grad_norm: 0.8999132908688736, iteration: 348927
loss: 1.011719822883606,grad_norm: 0.9264610831254123, iteration: 348928
loss: 1.0098658800125122,grad_norm: 0.88990765411619, iteration: 348929
loss: 1.0026918649673462,grad_norm: 0.8798135535522749, iteration: 348930
loss: 1.040343165397644,grad_norm: 0.6981080456349794, iteration: 348931
loss: 1.0672600269317627,grad_norm: 0.9999990339664973, iteration: 348932
loss: 1.0197954177856445,grad_norm: 0.858079382371703, iteration: 348933
loss: 0.9884148240089417,grad_norm: 0.970891467793259, iteration: 348934
loss: 0.9870567917823792,grad_norm: 0.7800078006247114, iteration: 348935
loss: 0.9989767670631409,grad_norm: 0.8830667539559769, iteration: 348936
loss: 1.0293370485305786,grad_norm: 0.9543654508490669, iteration: 348937
loss: 1.0421197414398193,grad_norm: 0.9999990696609914, iteration: 348938
loss: 0.992730438709259,grad_norm: 0.7357038673973567, iteration: 348939
loss: 1.0107731819152832,grad_norm: 0.9999996982239963, iteration: 348940
loss: 0.9834256768226624,grad_norm: 0.7411729420541492, iteration: 348941
loss: 0.9766244888305664,grad_norm: 0.9573630568508588, iteration: 348942
loss: 0.9903227686882019,grad_norm: 0.7563098609098828, iteration: 348943
loss: 0.998167097568512,grad_norm: 0.9266991413403987, iteration: 348944
loss: 0.9620997905731201,grad_norm: 0.9886882731564905, iteration: 348945
loss: 0.9906325340270996,grad_norm: 0.7002173880884965, iteration: 348946
loss: 1.022688627243042,grad_norm: 0.7247531382165204, iteration: 348947
loss: 1.0054149627685547,grad_norm: 0.9195338149710973, iteration: 348948
loss: 1.004459261894226,grad_norm: 0.8143588918156717, iteration: 348949
loss: 0.9853432178497314,grad_norm: 0.8137424252108005, iteration: 348950
loss: 1.005372166633606,grad_norm: 0.7959654044409342, iteration: 348951
loss: 1.0191723108291626,grad_norm: 0.7963358710511073, iteration: 348952
loss: 0.9964828491210938,grad_norm: 0.7506884510041362, iteration: 348953
loss: 1.0044699907302856,grad_norm: 0.8361664430295732, iteration: 348954
loss: 1.0159809589385986,grad_norm: 0.9999999887225236, iteration: 348955
loss: 1.0058391094207764,grad_norm: 0.8897818921034062, iteration: 348956
loss: 1.0108132362365723,grad_norm: 0.8951973465232544, iteration: 348957
loss: 0.9907311797142029,grad_norm: 0.965049931777241, iteration: 348958
loss: 1.0179378986358643,grad_norm: 0.7026945474893788, iteration: 348959
loss: 0.9890519976615906,grad_norm: 0.8664694068177986, iteration: 348960
loss: 0.9888014197349548,grad_norm: 0.9292420164380911, iteration: 348961
loss: 1.0697675943374634,grad_norm: 0.9999993482119566, iteration: 348962
loss: 1.0438231229782104,grad_norm: 0.8157966302159876, iteration: 348963
loss: 0.9664150476455688,grad_norm: 0.8283066881157676, iteration: 348964
loss: 0.9909796714782715,grad_norm: 0.8130270424585735, iteration: 348965
loss: 0.9977783560752869,grad_norm: 0.7913170336375871, iteration: 348966
loss: 1.034758448600769,grad_norm: 0.9999991201998888, iteration: 348967
loss: 1.0150178670883179,grad_norm: 0.916619879068278, iteration: 348968
loss: 0.9964305758476257,grad_norm: 0.8375676355453784, iteration: 348969
loss: 0.9946914911270142,grad_norm: 0.7543623102973036, iteration: 348970
loss: 0.9850007891654968,grad_norm: 0.7506747625033116, iteration: 348971
loss: 0.9780110716819763,grad_norm: 0.8717571278977068, iteration: 348972
loss: 0.9997951984405518,grad_norm: 0.822224417677311, iteration: 348973
loss: 1.024381160736084,grad_norm: 0.7836242452053392, iteration: 348974
loss: 0.9905369281768799,grad_norm: 0.7833737682758908, iteration: 348975
loss: 1.032199740409851,grad_norm: 0.9192623084382366, iteration: 348976
loss: 1.008102536201477,grad_norm: 0.6841337701300573, iteration: 348977
loss: 0.9766089916229248,grad_norm: 0.8166301078971445, iteration: 348978
loss: 0.9860291481018066,grad_norm: 0.7629345287634979, iteration: 348979
loss: 0.9700830578804016,grad_norm: 0.8055686523755662, iteration: 348980
loss: 1.0150487422943115,grad_norm: 0.722936670196909, iteration: 348981
loss: 0.9701060652732849,grad_norm: 0.8764904083068105, iteration: 348982
loss: 0.965778112411499,grad_norm: 0.6669382778801783, iteration: 348983
loss: 0.9738545417785645,grad_norm: 0.7699128275703363, iteration: 348984
loss: 0.999795138835907,grad_norm: 0.8465537216952866, iteration: 348985
loss: 0.9951428174972534,grad_norm: 0.9999989704807924, iteration: 348986
loss: 1.0165197849273682,grad_norm: 0.8985699532056638, iteration: 348987
loss: 1.027635097503662,grad_norm: 0.9999992905938748, iteration: 348988
loss: 1.0316041707992554,grad_norm: 0.9999990837259192, iteration: 348989
loss: 1.0240983963012695,grad_norm: 0.8463529664103542, iteration: 348990
loss: 1.024166226387024,grad_norm: 0.9745883057691712, iteration: 348991
loss: 0.973511278629303,grad_norm: 0.8670400225193404, iteration: 348992
loss: 1.0280271768569946,grad_norm: 0.9999994529399128, iteration: 348993
loss: 0.9815568327903748,grad_norm: 0.9165376028495827, iteration: 348994
loss: 1.0177099704742432,grad_norm: 0.9999989027403353, iteration: 348995
loss: 1.0151857137680054,grad_norm: 0.8889657090821421, iteration: 348996
loss: 1.010488510131836,grad_norm: 0.7434940112471168, iteration: 348997
loss: 1.0822159051895142,grad_norm: 0.999999067372072, iteration: 348998
loss: 1.0007069110870361,grad_norm: 0.94778792146167, iteration: 348999
loss: 0.9940024018287659,grad_norm: 0.9999992275734431, iteration: 349000
loss: 1.0418686866760254,grad_norm: 0.999999861377523, iteration: 349001
loss: 1.136834979057312,grad_norm: 0.9999998211632398, iteration: 349002
loss: 0.9911755919456482,grad_norm: 0.9028445321540653, iteration: 349003
loss: 0.9819220900535583,grad_norm: 0.7293302689381483, iteration: 349004
loss: 0.9819380640983582,grad_norm: 0.817686404214393, iteration: 349005
loss: 1.0729535818099976,grad_norm: 0.9181940549305168, iteration: 349006
loss: 0.9991649985313416,grad_norm: 0.7925598918069838, iteration: 349007
loss: 0.9906321167945862,grad_norm: 0.7323089877167258, iteration: 349008
loss: 0.9876975417137146,grad_norm: 0.8569564430008767, iteration: 349009
loss: 0.967767059803009,grad_norm: 0.8468037038150095, iteration: 349010
loss: 1.0323408842086792,grad_norm: 0.8294597926741969, iteration: 349011
loss: 0.9872083067893982,grad_norm: 0.8422385776751945, iteration: 349012
loss: 1.1830415725708008,grad_norm: 0.9999998818971336, iteration: 349013
loss: 0.9934241771697998,grad_norm: 0.7178310342264411, iteration: 349014
loss: 0.9852813482284546,grad_norm: 0.779002055205865, iteration: 349015
loss: 1.0161032676696777,grad_norm: 0.7579306344588014, iteration: 349016
loss: 1.0247654914855957,grad_norm: 0.9999993744068159, iteration: 349017
loss: 0.9605851173400879,grad_norm: 0.8535757237688707, iteration: 349018
loss: 1.0878077745437622,grad_norm: 0.9999996622873079, iteration: 349019
loss: 0.9997483491897583,grad_norm: 0.7446251775333104, iteration: 349020
loss: 1.0248812437057495,grad_norm: 0.965501062842486, iteration: 349021
loss: 0.9867437481880188,grad_norm: 0.9999992735085442, iteration: 349022
loss: 0.9775524139404297,grad_norm: 0.9102790060420468, iteration: 349023
loss: 1.009751796722412,grad_norm: 0.8161271285775622, iteration: 349024
loss: 0.9798036217689514,grad_norm: 0.7247315961749995, iteration: 349025
loss: 1.0687183141708374,grad_norm: 0.9485813884578639, iteration: 349026
loss: 0.9590694308280945,grad_norm: 0.7891187795301093, iteration: 349027
loss: 1.0182658433914185,grad_norm: 0.7589704704891459, iteration: 349028
loss: 1.0205613374710083,grad_norm: 0.982475825123208, iteration: 349029
loss: 1.0315468311309814,grad_norm: 0.7653178154197241, iteration: 349030
loss: 0.9801193475723267,grad_norm: 0.81840435510774, iteration: 349031
loss: 1.0018177032470703,grad_norm: 0.8000882051783358, iteration: 349032
loss: 0.9899974465370178,grad_norm: 0.8219025957507039, iteration: 349033
loss: 1.0235480070114136,grad_norm: 0.999999110703446, iteration: 349034
loss: 1.0184115171432495,grad_norm: 0.8151496458015219, iteration: 349035
loss: 0.9873956441879272,grad_norm: 0.9373240264209229, iteration: 349036
loss: 1.1079814434051514,grad_norm: 0.8852265488947405, iteration: 349037
loss: 0.9867086410522461,grad_norm: 0.9999993237199968, iteration: 349038
loss: 1.1112325191497803,grad_norm: 0.9161400736322904, iteration: 349039
loss: 0.9829788208007812,grad_norm: 0.8849055929603802, iteration: 349040
loss: 0.9690625071525574,grad_norm: 0.9999990892572644, iteration: 349041
loss: 1.0036441087722778,grad_norm: 0.999999112175771, iteration: 349042
loss: 1.0094010829925537,grad_norm: 0.9999998019929761, iteration: 349043
loss: 1.0486326217651367,grad_norm: 0.8271071536354581, iteration: 349044
loss: 0.9989805817604065,grad_norm: 0.773065404460918, iteration: 349045
loss: 0.9961683750152588,grad_norm: 0.7903649532132933, iteration: 349046
loss: 0.9714301824569702,grad_norm: 0.9657234243211048, iteration: 349047
loss: 1.0327322483062744,grad_norm: 0.999999524318505, iteration: 349048
loss: 1.022283673286438,grad_norm: 0.8555118865311442, iteration: 349049
loss: 1.00178062915802,grad_norm: 0.9825113309413017, iteration: 349050
loss: 0.9623693823814392,grad_norm: 0.8263082381850892, iteration: 349051
loss: 0.9780452251434326,grad_norm: 0.7257983777417055, iteration: 349052
loss: 1.0261095762252808,grad_norm: 0.9999990009725993, iteration: 349053
loss: 1.0073754787445068,grad_norm: 0.824949742793302, iteration: 349054
loss: 1.0139098167419434,grad_norm: 0.8099263074874682, iteration: 349055
loss: 1.0223733186721802,grad_norm: 0.8206007796578846, iteration: 349056
loss: 1.0037184953689575,grad_norm: 0.883076602418044, iteration: 349057
loss: 0.9971483945846558,grad_norm: 0.8154356559480377, iteration: 349058
loss: 1.0015828609466553,grad_norm: 0.7925150318692911, iteration: 349059
loss: 1.035954236984253,grad_norm: 0.9211359318903293, iteration: 349060
loss: 1.0385472774505615,grad_norm: 0.9999991911689015, iteration: 349061
loss: 0.968603253364563,grad_norm: 0.8293779916825121, iteration: 349062
loss: 1.0085043907165527,grad_norm: 0.8041751745253146, iteration: 349063
loss: 1.0934686660766602,grad_norm: 0.9999999226593796, iteration: 349064
loss: 0.9930999279022217,grad_norm: 0.9034309832626408, iteration: 349065
loss: 1.0128852128982544,grad_norm: 0.851072512330587, iteration: 349066
loss: 0.9959782361984253,grad_norm: 0.8700410800915261, iteration: 349067
loss: 1.0372775793075562,grad_norm: 0.7618321425560898, iteration: 349068
loss: 1.001311182975769,grad_norm: 0.7647025302426378, iteration: 349069
loss: 1.0179182291030884,grad_norm: 0.9739443228136034, iteration: 349070
loss: 1.0825169086456299,grad_norm: 0.99999981683761, iteration: 349071
loss: 1.019340991973877,grad_norm: 0.8164654518941581, iteration: 349072
loss: 0.9860053658485413,grad_norm: 0.7733469707942341, iteration: 349073
loss: 1.0037237405776978,grad_norm: 0.7919148933693626, iteration: 349074
loss: 1.0293809175491333,grad_norm: 0.9999999144097114, iteration: 349075
loss: 1.0006428956985474,grad_norm: 0.9154364438158797, iteration: 349076
loss: 0.9909529685974121,grad_norm: 0.7475842144335472, iteration: 349077
loss: 0.9928824305534363,grad_norm: 0.7264957779010127, iteration: 349078
loss: 0.9786245226860046,grad_norm: 0.9488030100807529, iteration: 349079
loss: 1.0008271932601929,grad_norm: 0.6704461679770289, iteration: 349080
loss: 1.0034527778625488,grad_norm: 0.9518583270320251, iteration: 349081
loss: 1.0154656171798706,grad_norm: 0.8728612023369872, iteration: 349082
loss: 0.9891203045845032,grad_norm: 0.9479599999634346, iteration: 349083
loss: 1.0036566257476807,grad_norm: 0.7704426585243489, iteration: 349084
loss: 1.0047320127487183,grad_norm: 0.9194750612918701, iteration: 349085
loss: 1.0061135292053223,grad_norm: 0.9551872418735499, iteration: 349086
loss: 0.9637725949287415,grad_norm: 0.7085070523691671, iteration: 349087
loss: 1.0251485109329224,grad_norm: 0.9999992684477103, iteration: 349088
loss: 1.0077810287475586,grad_norm: 0.9999991471218793, iteration: 349089
loss: 1.0042893886566162,grad_norm: 0.9999994057860476, iteration: 349090
loss: 1.0490376949310303,grad_norm: 0.8066566313292405, iteration: 349091
loss: 1.0092601776123047,grad_norm: 0.9672852457849259, iteration: 349092
loss: 0.9605873227119446,grad_norm: 0.9999993183806353, iteration: 349093
loss: 0.997704267501831,grad_norm: 0.9083149975617203, iteration: 349094
loss: 1.0216723680496216,grad_norm: 0.8613852564639555, iteration: 349095
loss: 0.9926576614379883,grad_norm: 0.8070674584313617, iteration: 349096
loss: 0.9892687797546387,grad_norm: 0.8893003599905441, iteration: 349097
loss: 1.0420070886611938,grad_norm: 0.8088323799934898, iteration: 349098
loss: 0.9907654523849487,grad_norm: 0.9124862099993374, iteration: 349099
loss: 1.0071895122528076,grad_norm: 0.9198586670010775, iteration: 349100
loss: 1.0210254192352295,grad_norm: 0.7525520585815145, iteration: 349101
loss: 0.9776383638381958,grad_norm: 0.7652139075530069, iteration: 349102
loss: 1.0169553756713867,grad_norm: 0.9999989796623423, iteration: 349103
loss: 1.047802209854126,grad_norm: 1.000000064380658, iteration: 349104
loss: 0.987589955329895,grad_norm: 0.7952150428905129, iteration: 349105
loss: 0.9827560782432556,grad_norm: 0.9999992497792606, iteration: 349106
loss: 1.0366475582122803,grad_norm: 0.9999992600528044, iteration: 349107
loss: 0.9769049286842346,grad_norm: 0.9999992452318612, iteration: 349108
loss: 0.9799490571022034,grad_norm: 0.9023482086464908, iteration: 349109
loss: 1.0260334014892578,grad_norm: 0.9999997440116233, iteration: 349110
loss: 1.034349799156189,grad_norm: 0.8190723744102014, iteration: 349111
loss: 0.9961956739425659,grad_norm: 0.999999114245475, iteration: 349112
loss: 1.0186890363693237,grad_norm: 0.9200231102544708, iteration: 349113
loss: 0.9799045324325562,grad_norm: 0.902926074108206, iteration: 349114
loss: 0.9424922466278076,grad_norm: 0.9102235751647331, iteration: 349115
loss: 1.0101213455200195,grad_norm: 0.8687588934732176, iteration: 349116
loss: 1.0969046354293823,grad_norm: 0.999999012410163, iteration: 349117
loss: 0.9823445677757263,grad_norm: 0.7643244549145157, iteration: 349118
loss: 1.0190218687057495,grad_norm: 0.9999990620870838, iteration: 349119
loss: 0.9912149310112,grad_norm: 0.7061460241761797, iteration: 349120
loss: 0.9808391332626343,grad_norm: 0.7826089763294687, iteration: 349121
loss: 0.9696035981178284,grad_norm: 0.7258194033508912, iteration: 349122
loss: 0.9564037322998047,grad_norm: 0.8451914581594934, iteration: 349123
loss: 0.9923309087753296,grad_norm: 0.8102195172409371, iteration: 349124
loss: 1.023985743522644,grad_norm: 0.7947033705998655, iteration: 349125
loss: 0.9931716918945312,grad_norm: 0.9999995412405142, iteration: 349126
loss: 1.015737771987915,grad_norm: 0.7542344954579603, iteration: 349127
loss: 0.9676370620727539,grad_norm: 0.9320793701241927, iteration: 349128
loss: 1.0217608213424683,grad_norm: 0.7209902516786358, iteration: 349129
loss: 1.050016164779663,grad_norm: 0.7905704729423514, iteration: 349130
loss: 0.9812726378440857,grad_norm: 0.819192912408949, iteration: 349131
loss: 0.9799820780754089,grad_norm: 0.7693092863213066, iteration: 349132
loss: 0.9747133255004883,grad_norm: 0.7510500924948891, iteration: 349133
loss: 1.022823452949524,grad_norm: 0.9046451332170851, iteration: 349134
loss: 0.9902225136756897,grad_norm: 0.7546093624236885, iteration: 349135
loss: 0.9840465784072876,grad_norm: 0.7911005684100946, iteration: 349136
loss: 0.9655371308326721,grad_norm: 0.83079712630881, iteration: 349137
loss: 1.0490520000457764,grad_norm: 0.850639846180922, iteration: 349138
loss: 1.0027302503585815,grad_norm: 0.8458269758172434, iteration: 349139
loss: 0.987181544303894,grad_norm: 0.7416026926233872, iteration: 349140
loss: 0.9934528470039368,grad_norm: 0.7018984571284803, iteration: 349141
loss: 1.0133090019226074,grad_norm: 0.9313873657411594, iteration: 349142
loss: 1.0245293378829956,grad_norm: 0.9619361260625037, iteration: 349143
loss: 1.0380220413208008,grad_norm: 0.9999990890939465, iteration: 349144
loss: 1.0123376846313477,grad_norm: 0.8193980370861856, iteration: 349145
loss: 0.9852724075317383,grad_norm: 0.85013672687417, iteration: 349146
loss: 0.9671648740768433,grad_norm: 0.8538158003369027, iteration: 349147
loss: 0.967342734336853,grad_norm: 0.7614464106699659, iteration: 349148
loss: 1.0013664960861206,grad_norm: 0.8181525263032849, iteration: 349149
loss: 1.0051606893539429,grad_norm: 0.6717675785461368, iteration: 349150
loss: 1.003430724143982,grad_norm: 0.8613302878114635, iteration: 349151
loss: 1.0105679035186768,grad_norm: 0.7802103501069209, iteration: 349152
loss: 0.9870499968528748,grad_norm: 0.7417141501833489, iteration: 349153
loss: 1.0184274911880493,grad_norm: 0.8511762665283442, iteration: 349154
loss: 1.0003801584243774,grad_norm: 0.8418668922731333, iteration: 349155
loss: 0.9779626727104187,grad_norm: 0.7994375089331388, iteration: 349156
loss: 1.0296223163604736,grad_norm: 0.8185503797739204, iteration: 349157
loss: 1.0067297220230103,grad_norm: 0.9393280862069732, iteration: 349158
loss: 0.9753592014312744,grad_norm: 0.8784611436374251, iteration: 349159
loss: 1.0611525774002075,grad_norm: 0.999999520254894, iteration: 349160
loss: 0.9758386611938477,grad_norm: 0.9504155347072049, iteration: 349161
loss: 1.0141546726226807,grad_norm: 0.6336772371366984, iteration: 349162
loss: 0.9781695604324341,grad_norm: 0.8031639476945113, iteration: 349163
loss: 0.9981184005737305,grad_norm: 0.8843891653644926, iteration: 349164
loss: 1.0289026498794556,grad_norm: 0.964963289892991, iteration: 349165
loss: 0.9924502372741699,grad_norm: 0.8843333722993313, iteration: 349166
loss: 0.9879950881004333,grad_norm: 0.7749777915421399, iteration: 349167
loss: 1.000325322151184,grad_norm: 0.9999995863712758, iteration: 349168
loss: 0.9951209425926208,grad_norm: 0.7331687896464537, iteration: 349169
loss: 0.9960911870002747,grad_norm: 0.6955573107886913, iteration: 349170
loss: 0.9941251277923584,grad_norm: 0.818272479601895, iteration: 349171
loss: 1.0319373607635498,grad_norm: 0.9661453054098756, iteration: 349172
loss: 1.1213873624801636,grad_norm: 0.9999997761804943, iteration: 349173
loss: 1.0082268714904785,grad_norm: 0.7945477212188098, iteration: 349174
loss: 1.0033949613571167,grad_norm: 0.862678854158534, iteration: 349175
loss: 1.059486746788025,grad_norm: 0.7865939991859434, iteration: 349176
loss: 0.9853233695030212,grad_norm: 0.8986104644232563, iteration: 349177
loss: 0.9780831933021545,grad_norm: 0.7518349502223021, iteration: 349178
loss: 0.9956540465354919,grad_norm: 0.8523774969626304, iteration: 349179
loss: 1.0037639141082764,grad_norm: 0.7759467110982545, iteration: 349180
loss: 1.020979642868042,grad_norm: 0.9999992107791548, iteration: 349181
loss: 0.9786682724952698,grad_norm: 0.8516873123126906, iteration: 349182
loss: 0.9898861050605774,grad_norm: 0.7478751334113121, iteration: 349183
loss: 0.9513610601425171,grad_norm: 0.7074239506132777, iteration: 349184
loss: 0.9933721423149109,grad_norm: 0.7108772974839074, iteration: 349185
loss: 1.003308892250061,grad_norm: 0.805837097025996, iteration: 349186
loss: 0.994097113609314,grad_norm: 0.6772401078764474, iteration: 349187
loss: 1.0244139432907104,grad_norm: 0.8040804784473673, iteration: 349188
loss: 1.0829458236694336,grad_norm: 0.9999994848138775, iteration: 349189
loss: 1.016337275505066,grad_norm: 0.8030368940951903, iteration: 349190
loss: 0.9879295825958252,grad_norm: 0.8007971089597439, iteration: 349191
loss: 1.0047245025634766,grad_norm: 0.7958751689987106, iteration: 349192
loss: 0.9577564597129822,grad_norm: 0.7665841445529493, iteration: 349193
loss: 1.0256882905960083,grad_norm: 0.7548826843886944, iteration: 349194
loss: 1.0122452974319458,grad_norm: 0.9087672726410205, iteration: 349195
loss: 1.0352089405059814,grad_norm: 0.999999896490029, iteration: 349196
loss: 1.003213882446289,grad_norm: 0.9999993376980465, iteration: 349197
loss: 1.035345435142517,grad_norm: 0.9155796856338045, iteration: 349198
loss: 0.9917536973953247,grad_norm: 0.9999992017836401, iteration: 349199
loss: 0.9647045135498047,grad_norm: 0.8045117658680726, iteration: 349200
loss: 0.9938344955444336,grad_norm: 0.988959395121194, iteration: 349201
loss: 1.0631552934646606,grad_norm: 0.7786195491074339, iteration: 349202
loss: 0.9914098978042603,grad_norm: 0.9999991741270303, iteration: 349203
loss: 1.0029476881027222,grad_norm: 0.7895923833027229, iteration: 349204
loss: 1.0022506713867188,grad_norm: 0.8327978394974969, iteration: 349205
loss: 1.0354676246643066,grad_norm: 0.999999103735238, iteration: 349206
loss: 1.1065688133239746,grad_norm: 0.999999620437099, iteration: 349207
loss: 1.0476375818252563,grad_norm: 0.9999990129574714, iteration: 349208
loss: 0.99418705701828,grad_norm: 0.8453275862674613, iteration: 349209
loss: 0.9126928448677063,grad_norm: 0.9703298020022706, iteration: 349210
loss: 1.0363191366195679,grad_norm: 0.8236223102129575, iteration: 349211
loss: 0.9994086027145386,grad_norm: 0.9999998866907333, iteration: 349212
loss: 1.0021154880523682,grad_norm: 0.6882211662653189, iteration: 349213
loss: 1.009629487991333,grad_norm: 0.6500892784506728, iteration: 349214
loss: 0.9984388947486877,grad_norm: 0.8597650719624489, iteration: 349215
loss: 0.9937021732330322,grad_norm: 0.7172641755965309, iteration: 349216
loss: 0.996300220489502,grad_norm: 0.8692061324180985, iteration: 349217
loss: 1.0269404649734497,grad_norm: 0.9999990600745795, iteration: 349218
loss: 0.9829907417297363,grad_norm: 0.7199806924930894, iteration: 349219
loss: 1.0165051221847534,grad_norm: 0.9181906588488786, iteration: 349220
loss: 1.0198063850402832,grad_norm: 0.7559745723947621, iteration: 349221
loss: 1.0271995067596436,grad_norm: 0.8168189358089446, iteration: 349222
loss: 0.9905547499656677,grad_norm: 0.7798053438721695, iteration: 349223
loss: 0.9748870730400085,grad_norm: 0.802780163867351, iteration: 349224
loss: 0.9901861548423767,grad_norm: 0.8560380080492273, iteration: 349225
loss: 1.0049594640731812,grad_norm: 0.8142734246259693, iteration: 349226
loss: 0.9754664301872253,grad_norm: 0.8341361915194383, iteration: 349227
loss: 0.9985408782958984,grad_norm: 0.7302459163716732, iteration: 349228
loss: 1.0274593830108643,grad_norm: 0.9301511162456283, iteration: 349229
loss: 1.0531996488571167,grad_norm: 0.9999993663476051, iteration: 349230
loss: 1.0082409381866455,grad_norm: 0.8688352151996492, iteration: 349231
loss: 0.9826211333274841,grad_norm: 0.7776495181231882, iteration: 349232
loss: 0.973673403263092,grad_norm: 0.7356104073016375, iteration: 349233
loss: 1.028694748878479,grad_norm: 0.9999990948576493, iteration: 349234
loss: 1.0144984722137451,grad_norm: 0.7054029941264643, iteration: 349235
loss: 0.980258584022522,grad_norm: 0.9999991719604925, iteration: 349236
loss: 0.9682994484901428,grad_norm: 0.7687385047847518, iteration: 349237
loss: 0.9670722484588623,grad_norm: 0.724155645605845, iteration: 349238
loss: 1.01917564868927,grad_norm: 0.766725876877959, iteration: 349239
loss: 1.0009621381759644,grad_norm: 0.8216364633161792, iteration: 349240
loss: 1.0360177755355835,grad_norm: 0.9999992920404466, iteration: 349241
loss: 1.0127774477005005,grad_norm: 0.807405749199105, iteration: 349242
loss: 0.9940178990364075,grad_norm: 0.8892675549925296, iteration: 349243
loss: 0.9944975972175598,grad_norm: 0.7960240108842395, iteration: 349244
loss: 1.0178956985473633,grad_norm: 0.784032425569904, iteration: 349245
loss: 0.9830499887466431,grad_norm: 0.7034562723660823, iteration: 349246
loss: 0.979484498500824,grad_norm: 0.7630558421205422, iteration: 349247
loss: 0.9590349793434143,grad_norm: 0.8433962232069654, iteration: 349248
loss: 1.0035598278045654,grad_norm: 0.8645072900479767, iteration: 349249
loss: 0.9623184204101562,grad_norm: 0.8642995552749152, iteration: 349250
loss: 1.0034775733947754,grad_norm: 0.9999998399834392, iteration: 349251
loss: 1.011997103691101,grad_norm: 0.6798258365837856, iteration: 349252
loss: 0.9866431355476379,grad_norm: 0.8183359851678083, iteration: 349253
loss: 0.9895100593566895,grad_norm: 0.9060722526818211, iteration: 349254
loss: 1.0144612789154053,grad_norm: 0.6752150863469142, iteration: 349255
loss: 1.0257933139801025,grad_norm: 0.9277666518909853, iteration: 349256
loss: 1.0057930946350098,grad_norm: 0.7867525603458022, iteration: 349257
loss: 0.9689881801605225,grad_norm: 0.7971145287534295, iteration: 349258
loss: 0.9950375556945801,grad_norm: 0.9999991945631461, iteration: 349259
loss: 0.9936127662658691,grad_norm: 0.826349357257178, iteration: 349260
loss: 1.008517861366272,grad_norm: 0.7048150426371336, iteration: 349261
loss: 1.000924825668335,grad_norm: 0.8524398093518272, iteration: 349262
loss: 0.990821123123169,grad_norm: 0.7640618932317524, iteration: 349263
loss: 1.0021731853485107,grad_norm: 0.9999997535796883, iteration: 349264
loss: 1.0318310260772705,grad_norm: 0.8451173925815286, iteration: 349265
loss: 1.0007365942001343,grad_norm: 0.9466441129201206, iteration: 349266
loss: 1.013472557067871,grad_norm: 0.8953505864174176, iteration: 349267
loss: 0.9872134327888489,grad_norm: 0.8010778741765185, iteration: 349268
loss: 0.9809432029724121,grad_norm: 0.8816798747308529, iteration: 349269
loss: 1.0085957050323486,grad_norm: 0.883373649054602, iteration: 349270
loss: 0.9952687621116638,grad_norm: 0.9413255533496101, iteration: 349271
loss: 1.0197187662124634,grad_norm: 0.8265406025872002, iteration: 349272
loss: 0.9903519153594971,grad_norm: 0.8392281572482004, iteration: 349273
loss: 0.9936109185218811,grad_norm: 0.8557805233814901, iteration: 349274
loss: 0.9804179072380066,grad_norm: 0.7523485702761459, iteration: 349275
loss: 0.9786708950996399,grad_norm: 0.8354450966388184, iteration: 349276
loss: 1.0243864059448242,grad_norm: 0.7765520466409582, iteration: 349277
loss: 0.9836311936378479,grad_norm: 0.9999989896103035, iteration: 349278
loss: 1.0208430290222168,grad_norm: 0.6891495959084524, iteration: 349279
loss: 0.9907603859901428,grad_norm: 0.9999991556882836, iteration: 349280
loss: 1.0292717218399048,grad_norm: 0.894315177554062, iteration: 349281
loss: 0.9781352877616882,grad_norm: 0.7809451700184991, iteration: 349282
loss: 1.0266685485839844,grad_norm: 0.8180245549035473, iteration: 349283
loss: 0.9737578630447388,grad_norm: 0.8295072067389813, iteration: 349284
loss: 0.9999878406524658,grad_norm: 0.9999996004037154, iteration: 349285
loss: 0.9986902475357056,grad_norm: 0.7466384678954839, iteration: 349286
loss: 1.0383288860321045,grad_norm: 0.7651755682859043, iteration: 349287
loss: 0.9974048733711243,grad_norm: 0.9999992032728753, iteration: 349288
loss: 0.984234094619751,grad_norm: 0.6481767925266252, iteration: 349289
loss: 0.9985606074333191,grad_norm: 0.9025224648236269, iteration: 349290
loss: 1.0227195024490356,grad_norm: 0.894583403518105, iteration: 349291
loss: 1.0130797624588013,grad_norm: 0.8534350478016748, iteration: 349292
loss: 0.9699893593788147,grad_norm: 0.8023938510895363, iteration: 349293
loss: 0.9806699752807617,grad_norm: 0.6862446082842116, iteration: 349294
loss: 1.025368094444275,grad_norm: 0.7649919448945133, iteration: 349295
loss: 0.977813720703125,grad_norm: 0.840820777062252, iteration: 349296
loss: 1.006455659866333,grad_norm: 0.8772845770398028, iteration: 349297
loss: 0.9515236616134644,grad_norm: 0.8142243408320403, iteration: 349298
loss: 0.9954684972763062,grad_norm: 0.9483563915927391, iteration: 349299
loss: 0.9792676568031311,grad_norm: 0.9483333577006855, iteration: 349300
loss: 0.9930814504623413,grad_norm: 0.7364347849230357, iteration: 349301
loss: 1.0000793933868408,grad_norm: 0.6917171489622383, iteration: 349302
loss: 1.0034703016281128,grad_norm: 0.7531602016837429, iteration: 349303
loss: 0.9864301681518555,grad_norm: 0.8735468846975757, iteration: 349304
loss: 0.9785445928573608,grad_norm: 0.87081609571961, iteration: 349305
loss: 0.9573239088058472,grad_norm: 0.7873942846954843, iteration: 349306
loss: 0.9938743114471436,grad_norm: 0.7867742703726437, iteration: 349307
loss: 1.0354300737380981,grad_norm: 0.9999996212851104, iteration: 349308
loss: 0.9793891906738281,grad_norm: 0.8262449723016246, iteration: 349309
loss: 0.9705812335014343,grad_norm: 0.8423270860231756, iteration: 349310
loss: 0.9893871545791626,grad_norm: 0.8443292271113361, iteration: 349311
loss: 0.9980627298355103,grad_norm: 0.8947032845338134, iteration: 349312
loss: 1.0157510042190552,grad_norm: 0.6822747646298091, iteration: 349313
loss: 1.0015954971313477,grad_norm: 0.8103230260752357, iteration: 349314
loss: 0.9954981207847595,grad_norm: 0.7100524473106309, iteration: 349315
loss: 1.0128626823425293,grad_norm: 0.7732879787896515, iteration: 349316
loss: 0.9968093633651733,grad_norm: 0.8418056064190576, iteration: 349317
loss: 0.9686458110809326,grad_norm: 0.8068581163876826, iteration: 349318
loss: 1.0081384181976318,grad_norm: 0.7512245496901037, iteration: 349319
loss: 1.0149751901626587,grad_norm: 0.6953359143555816, iteration: 349320
loss: 1.0047762393951416,grad_norm: 0.7697976168377975, iteration: 349321
loss: 0.9921705722808838,grad_norm: 0.8841489607539169, iteration: 349322
loss: 1.0583001375198364,grad_norm: 0.9999991062146903, iteration: 349323
loss: 0.9495328664779663,grad_norm: 0.8326660875284169, iteration: 349324
loss: 0.9706018567085266,grad_norm: 0.9999994227531278, iteration: 349325
loss: 1.035556435585022,grad_norm: 0.7313658616681602, iteration: 349326
loss: 1.0025948286056519,grad_norm: 0.7905150697934288, iteration: 349327
loss: 0.9879100322723389,grad_norm: 0.9304432792846302, iteration: 349328
loss: 1.047115683555603,grad_norm: 0.8238550243262347, iteration: 349329
loss: 1.0209382772445679,grad_norm: 0.8144469307910462, iteration: 349330
loss: 0.9989709258079529,grad_norm: 0.8491219748484428, iteration: 349331
loss: 0.9945712089538574,grad_norm: 0.7934705928743332, iteration: 349332
loss: 1.0292117595672607,grad_norm: 0.9056653740033842, iteration: 349333
loss: 1.0153743028640747,grad_norm: 0.8911577417526839, iteration: 349334
loss: 1.0064189434051514,grad_norm: 0.9273689320694962, iteration: 349335
loss: 0.9782661199569702,grad_norm: 0.8700620801680994, iteration: 349336
loss: 0.9695858955383301,grad_norm: 0.732086784884383, iteration: 349337
loss: 1.0139206647872925,grad_norm: 0.7533232501830486, iteration: 349338
loss: 0.9846240878105164,grad_norm: 0.7669130341704101, iteration: 349339
loss: 0.9807087779045105,grad_norm: 0.8501155947173721, iteration: 349340
loss: 1.003967046737671,grad_norm: 0.6952970985029128, iteration: 349341
loss: 1.0013567209243774,grad_norm: 0.999999952057056, iteration: 349342
loss: 1.0350407361984253,grad_norm: 0.8721200503175204, iteration: 349343
loss: 0.9920621514320374,grad_norm: 0.8494803349642148, iteration: 349344
loss: 1.0108073949813843,grad_norm: 0.8958101594687187, iteration: 349345
loss: 0.9970866441726685,grad_norm: 0.8288233823266178, iteration: 349346
loss: 1.0093053579330444,grad_norm: 0.9894521871124389, iteration: 349347
loss: 0.9668082594871521,grad_norm: 0.7767536779786636, iteration: 349348
loss: 1.03126060962677,grad_norm: 0.9883654812828236, iteration: 349349
loss: 1.0090678930282593,grad_norm: 0.7449697465389401, iteration: 349350
loss: 1.0326231718063354,grad_norm: 0.9999991314781536, iteration: 349351
loss: 1.0021554231643677,grad_norm: 0.8952610631397951, iteration: 349352
loss: 1.0515037775039673,grad_norm: 0.8109920001305376, iteration: 349353
loss: 1.021013617515564,grad_norm: 0.7684957730839423, iteration: 349354
loss: 1.0192925930023193,grad_norm: 0.9404153438324904, iteration: 349355
loss: 0.9932236075401306,grad_norm: 0.697160399502811, iteration: 349356
loss: 0.9524865746498108,grad_norm: 0.8506847999007469, iteration: 349357
loss: 0.9922939538955688,grad_norm: 0.8781408042646351, iteration: 349358
loss: 1.0195199251174927,grad_norm: 0.9411870573870333, iteration: 349359
loss: 1.0236512422561646,grad_norm: 0.8478057659842057, iteration: 349360
loss: 1.004149317741394,grad_norm: 0.6589640334770017, iteration: 349361
loss: 0.9886990785598755,grad_norm: 0.7972593153964204, iteration: 349362
loss: 0.9819199442863464,grad_norm: 0.7988278806558823, iteration: 349363
loss: 1.0226558446884155,grad_norm: 0.9999998352307297, iteration: 349364
loss: 0.9929991364479065,grad_norm: 0.7465782016865241, iteration: 349365
loss: 0.9811527729034424,grad_norm: 0.7287286883189193, iteration: 349366
loss: 1.003275990486145,grad_norm: 0.7781219609865389, iteration: 349367
loss: 1.03876531124115,grad_norm: 0.9999994498676463, iteration: 349368
loss: 1.0103470087051392,grad_norm: 0.7888782725036187, iteration: 349369
loss: 1.004886507987976,grad_norm: 0.6462183829383833, iteration: 349370
loss: 0.9894703030586243,grad_norm: 0.6953081065075988, iteration: 349371
loss: 1.0162150859832764,grad_norm: 0.7747220192780292, iteration: 349372
loss: 0.9969902634620667,grad_norm: 0.9999991675405342, iteration: 349373
loss: 0.9794093370437622,grad_norm: 0.8828615453647438, iteration: 349374
loss: 0.9944277405738831,grad_norm: 0.7811907445920291, iteration: 349375
loss: 1.0649502277374268,grad_norm: 0.882728135239811, iteration: 349376
loss: 0.9751555919647217,grad_norm: 0.7247586333522671, iteration: 349377
loss: 1.003061056137085,grad_norm: 0.8019486772995091, iteration: 349378
loss: 0.9467536807060242,grad_norm: 0.8535284102232633, iteration: 349379
loss: 1.0409563779830933,grad_norm: 0.8565422743016204, iteration: 349380
loss: 1.027830719947815,grad_norm: 0.9999998676289112, iteration: 349381
loss: 1.0156164169311523,grad_norm: 0.9884989793579494, iteration: 349382
loss: 1.0450254678726196,grad_norm: 0.8812628027762439, iteration: 349383
loss: 0.9839739799499512,grad_norm: 0.6899588074394569, iteration: 349384
loss: 0.9866357445716858,grad_norm: 0.7837647480839822, iteration: 349385
loss: 0.9862539768218994,grad_norm: 0.7282675934241816, iteration: 349386
loss: 1.0249860286712646,grad_norm: 0.8989702529454787, iteration: 349387
loss: 1.025327444076538,grad_norm: 0.9999995782851675, iteration: 349388
loss: 1.082446813583374,grad_norm: 0.9193203113886708, iteration: 349389
loss: 1.0114518404006958,grad_norm: 0.8800973503218397, iteration: 349390
loss: 1.0158939361572266,grad_norm: 0.8902595335486575, iteration: 349391
loss: 0.9688993692398071,grad_norm: 0.8158263306980089, iteration: 349392
loss: 1.0312081575393677,grad_norm: 0.8439057480593282, iteration: 349393
loss: 1.0399186611175537,grad_norm: 0.9999991042394997, iteration: 349394
loss: 1.006386637687683,grad_norm: 0.8520305642975978, iteration: 349395
loss: 1.0117101669311523,grad_norm: 0.8009956685169973, iteration: 349396
loss: 1.0342196226119995,grad_norm: 0.8927294203040426, iteration: 349397
loss: 0.9764549136161804,grad_norm: 0.8154517195076301, iteration: 349398
loss: 1.1072801351547241,grad_norm: 0.9185182563931645, iteration: 349399
loss: 0.9741029739379883,grad_norm: 0.8439211172391368, iteration: 349400
loss: 0.9811697006225586,grad_norm: 0.9715182997002558, iteration: 349401
loss: 0.979025661945343,grad_norm: 0.7344053929214008, iteration: 349402
loss: 0.9986165165901184,grad_norm: 0.8469133024773162, iteration: 349403
loss: 0.9654980897903442,grad_norm: 0.8594772860951789, iteration: 349404
loss: 1.0121792554855347,grad_norm: 0.7260799660418558, iteration: 349405
loss: 1.0060492753982544,grad_norm: 0.9999990579212056, iteration: 349406
loss: 1.0163122415542603,grad_norm: 0.7707055210684659, iteration: 349407
loss: 1.0061075687408447,grad_norm: 0.8631757223177181, iteration: 349408
loss: 1.0215457677841187,grad_norm: 0.8985255968206929, iteration: 349409
loss: 0.9980006814002991,grad_norm: 0.8554367185751715, iteration: 349410
loss: 0.9722460508346558,grad_norm: 0.893083746884971, iteration: 349411
loss: 1.004717230796814,grad_norm: 0.999999024843251, iteration: 349412
loss: 1.0141534805297852,grad_norm: 0.702698461377385, iteration: 349413
loss: 1.0095525979995728,grad_norm: 0.7145769008063307, iteration: 349414
loss: 0.9801853895187378,grad_norm: 0.6245726817053536, iteration: 349415
loss: 0.9516040682792664,grad_norm: 0.8055848265626391, iteration: 349416
loss: 1.0164858102798462,grad_norm: 0.9921113938630722, iteration: 349417
loss: 1.075999140739441,grad_norm: 0.9999990936927211, iteration: 349418
loss: 1.0433331727981567,grad_norm: 0.9919856960524838, iteration: 349419
loss: 1.0126980543136597,grad_norm: 0.7361297247297146, iteration: 349420
loss: 1.0020909309387207,grad_norm: 0.9999991069031862, iteration: 349421
loss: 0.9757171273231506,grad_norm: 0.7746366260464612, iteration: 349422
loss: 1.0281133651733398,grad_norm: 0.7419759280692739, iteration: 349423
loss: 1.0279877185821533,grad_norm: 0.9999991319945161, iteration: 349424
loss: 0.9966119527816772,grad_norm: 0.8599330727673857, iteration: 349425
loss: 1.0733369588851929,grad_norm: 0.8788226549674648, iteration: 349426
loss: 1.0000672340393066,grad_norm: 0.7861428031443154, iteration: 349427
loss: 0.987117350101471,grad_norm: 0.810956898979963, iteration: 349428
loss: 1.0166294574737549,grad_norm: 0.7684305812550796, iteration: 349429
loss: 0.9964916110038757,grad_norm: 0.9999996559006518, iteration: 349430
loss: 1.0281548500061035,grad_norm: 0.802309351259818, iteration: 349431
loss: 0.9811998605728149,grad_norm: 0.8024728541262004, iteration: 349432
loss: 1.0144702196121216,grad_norm: 0.9999998338818213, iteration: 349433
loss: 0.9989007711410522,grad_norm: 0.8724166251447223, iteration: 349434
loss: 0.9893807768821716,grad_norm: 0.7329889827450862, iteration: 349435
loss: 1.0029444694519043,grad_norm: 0.9999991763762625, iteration: 349436
loss: 1.0030479431152344,grad_norm: 0.9086463924285083, iteration: 349437
loss: 0.970737874507904,grad_norm: 0.830607118311174, iteration: 349438
loss: 1.02462899684906,grad_norm: 0.9362259302514762, iteration: 349439
loss: 1.0162783861160278,grad_norm: 0.8537084532553713, iteration: 349440
loss: 0.9823322892189026,grad_norm: 0.9999990607644665, iteration: 349441
loss: 0.9782851338386536,grad_norm: 0.9389692020487925, iteration: 349442
loss: 1.0361183881759644,grad_norm: 0.8598915646670099, iteration: 349443
loss: 0.9832277894020081,grad_norm: 0.9120421974708978, iteration: 349444
loss: 0.9781551361083984,grad_norm: 0.8365168109135377, iteration: 349445
loss: 1.0400110483169556,grad_norm: 0.9832135728760308, iteration: 349446
loss: 1.018563151359558,grad_norm: 0.807169025516997, iteration: 349447
loss: 0.9786710739135742,grad_norm: 0.7437949287008508, iteration: 349448
loss: 1.000411033630371,grad_norm: 0.8255317923986776, iteration: 349449
loss: 1.043323278427124,grad_norm: 0.8509619254031373, iteration: 349450
loss: 0.9804180860519409,grad_norm: 0.8175084595343692, iteration: 349451
loss: 1.030167579650879,grad_norm: 0.9999991095251651, iteration: 349452
loss: 0.977849543094635,grad_norm: 0.729556646335618, iteration: 349453
loss: 1.0026134252548218,grad_norm: 0.7467965109730408, iteration: 349454
loss: 0.9919977784156799,grad_norm: 0.756454361667626, iteration: 349455
loss: 1.0155680179595947,grad_norm: 0.73884512399859, iteration: 349456
loss: 1.044151782989502,grad_norm: 0.7574295714196653, iteration: 349457
loss: 0.9785815477371216,grad_norm: 0.7975007242139102, iteration: 349458
loss: 0.9823505878448486,grad_norm: 0.7418541809294853, iteration: 349459
loss: 0.9776953458786011,grad_norm: 0.7166204877004283, iteration: 349460
loss: 0.996845543384552,grad_norm: 0.7145232726416497, iteration: 349461
loss: 1.0215890407562256,grad_norm: 0.8244602481249447, iteration: 349462
loss: 0.9681441783905029,grad_norm: 0.7884340361764635, iteration: 349463
loss: 0.976172149181366,grad_norm: 0.7092724289271802, iteration: 349464
loss: 0.9939461350440979,grad_norm: 0.9363529162426755, iteration: 349465
loss: 0.9827580451965332,grad_norm: 0.9999997680106028, iteration: 349466
loss: 1.005575180053711,grad_norm: 0.8596452062902297, iteration: 349467
loss: 0.9859681725502014,grad_norm: 0.9831954637342382, iteration: 349468
loss: 1.038411021232605,grad_norm: 0.8049133875671461, iteration: 349469
loss: 0.9583953619003296,grad_norm: 0.833834160184532, iteration: 349470
loss: 1.047186017036438,grad_norm: 0.9999995231233728, iteration: 349471
loss: 0.9768112897872925,grad_norm: 0.7426894752785875, iteration: 349472
loss: 0.9915571808815002,grad_norm: 0.835946273394171, iteration: 349473
loss: 1.0285240411758423,grad_norm: 0.912564445552203, iteration: 349474
loss: 1.0189517736434937,grad_norm: 0.999998986512125, iteration: 349475
loss: 1.0150964260101318,grad_norm: 0.9999991687240882, iteration: 349476
loss: 0.967295229434967,grad_norm: 0.708330954916124, iteration: 349477
loss: 0.9576660990715027,grad_norm: 0.9285861142925954, iteration: 349478
loss: 1.035948634147644,grad_norm: 0.9999994220950765, iteration: 349479
loss: 0.9958369731903076,grad_norm: 0.8038699077139364, iteration: 349480
loss: 0.9919272661209106,grad_norm: 0.8102811464484437, iteration: 349481
loss: 0.9657639861106873,grad_norm: 0.7874737863285405, iteration: 349482
loss: 1.0049984455108643,grad_norm: 0.8205984465708438, iteration: 349483
loss: 1.0209966897964478,grad_norm: 0.8501072742620048, iteration: 349484
loss: 1.004763126373291,grad_norm: 0.9287024502689512, iteration: 349485
loss: 0.9599296450614929,grad_norm: 0.7987605472668245, iteration: 349486
loss: 1.0307856798171997,grad_norm: 0.8417103004511817, iteration: 349487
loss: 1.0260897874832153,grad_norm: 0.8421724469924575, iteration: 349488
loss: 1.014139175415039,grad_norm: 0.9827167798140067, iteration: 349489
loss: 0.9863061904907227,grad_norm: 0.9836514835720098, iteration: 349490
loss: 1.0473620891571045,grad_norm: 0.878898405446974, iteration: 349491
loss: 1.010862112045288,grad_norm: 0.7104290435934179, iteration: 349492
loss: 0.9803043603897095,grad_norm: 0.866898953341508, iteration: 349493
loss: 0.9968097805976868,grad_norm: 0.9999989977614244, iteration: 349494
loss: 1.022297739982605,grad_norm: 0.9999996804869989, iteration: 349495
loss: 1.0010480880737305,grad_norm: 0.9999991901833505, iteration: 349496
loss: 1.0258686542510986,grad_norm: 0.9416160786979343, iteration: 349497
loss: 1.0001797676086426,grad_norm: 0.8319723736325749, iteration: 349498
loss: 1.0270676612854004,grad_norm: 0.9027778972539214, iteration: 349499
loss: 0.9807645082473755,grad_norm: 0.9917107282778791, iteration: 349500
loss: 1.0000964403152466,grad_norm: 0.768006649266446, iteration: 349501
loss: 0.9862833023071289,grad_norm: 0.7827308856056265, iteration: 349502
loss: 1.0035706758499146,grad_norm: 0.999999407875959, iteration: 349503
loss: 1.0205249786376953,grad_norm: 0.9999991564865848, iteration: 349504
loss: 0.9925956130027771,grad_norm: 0.8589857290371709, iteration: 349505
loss: 0.9946811199188232,grad_norm: 0.8029042843202979, iteration: 349506
loss: 0.9726774096488953,grad_norm: 0.7574663633188844, iteration: 349507
loss: 1.0032813549041748,grad_norm: 0.797773143353184, iteration: 349508
loss: 1.0291153192520142,grad_norm: 0.9480980359171515, iteration: 349509
loss: 1.0240412950515747,grad_norm: 0.9999991134768187, iteration: 349510
loss: 0.9868683218955994,grad_norm: 0.7053891284827293, iteration: 349511
loss: 0.9842346906661987,grad_norm: 0.8564884026869944, iteration: 349512
loss: 0.9826722145080566,grad_norm: 0.8588072624952526, iteration: 349513
loss: 1.0245723724365234,grad_norm: 0.9999991033716672, iteration: 349514
loss: 1.0402073860168457,grad_norm: 0.9257511993899584, iteration: 349515
loss: 1.0058356523513794,grad_norm: 0.9999991926849675, iteration: 349516
loss: 1.0640308856964111,grad_norm: 0.9999991282282826, iteration: 349517
loss: 0.9960694313049316,grad_norm: 0.7256312193419159, iteration: 349518
loss: 0.9995338916778564,grad_norm: 0.9999994551935477, iteration: 349519
loss: 1.0160062313079834,grad_norm: 0.7401474521791735, iteration: 349520
loss: 0.996984601020813,grad_norm: 0.7725048058719169, iteration: 349521
loss: 0.9820870161056519,grad_norm: 0.9290397798049772, iteration: 349522
loss: 0.9971712231636047,grad_norm: 0.7261715856678759, iteration: 349523
loss: 0.957038164138794,grad_norm: 0.881622182456737, iteration: 349524
loss: 0.9757546186447144,grad_norm: 0.9625546047935634, iteration: 349525
loss: 0.9885014295578003,grad_norm: 0.7769570565403098, iteration: 349526
loss: 0.9890532493591309,grad_norm: 0.8917967867665438, iteration: 349527
loss: 1.0490021705627441,grad_norm: 0.9999994508906054, iteration: 349528
loss: 0.9971063137054443,grad_norm: 0.8343526464262906, iteration: 349529
loss: 0.9841569662094116,grad_norm: 0.7910817600995332, iteration: 349530
loss: 1.0115587711334229,grad_norm: 0.8697668990892827, iteration: 349531
loss: 1.0216916799545288,grad_norm: 0.9999989822494741, iteration: 349532
loss: 0.9733949303627014,grad_norm: 0.9033337205956864, iteration: 349533
loss: 1.058591604232788,grad_norm: 0.9999998386711353, iteration: 349534
loss: 0.9681812524795532,grad_norm: 0.7441576380829, iteration: 349535
loss: 1.0663446187973022,grad_norm: 0.9999994416244364, iteration: 349536
loss: 0.9788407683372498,grad_norm: 0.7511185729233412, iteration: 349537
loss: 0.9857370257377625,grad_norm: 0.9379337362422765, iteration: 349538
loss: 0.9342958331108093,grad_norm: 0.9292510935483308, iteration: 349539
loss: 1.0010391473770142,grad_norm: 0.9999993747539135, iteration: 349540
loss: 0.9998377561569214,grad_norm: 0.6648502393123724, iteration: 349541
loss: 1.004482626914978,grad_norm: 0.8296128872751357, iteration: 349542
loss: 0.9971418380737305,grad_norm: 0.9999990400553916, iteration: 349543
loss: 0.9903753399848938,grad_norm: 0.630642493643046, iteration: 349544
loss: 0.9826448559761047,grad_norm: 0.7683861808216091, iteration: 349545
loss: 1.023087739944458,grad_norm: 0.7958380713437554, iteration: 349546
loss: 0.967212975025177,grad_norm: 0.8879742770348296, iteration: 349547
loss: 1.0227344036102295,grad_norm: 0.8236417476291074, iteration: 349548
loss: 0.9838171601295471,grad_norm: 0.9999992254133359, iteration: 349549
loss: 1.0003044605255127,grad_norm: 0.7317429629066324, iteration: 349550
loss: 0.961566686630249,grad_norm: 0.8115460973389618, iteration: 349551
loss: 0.9883986115455627,grad_norm: 0.9999991839441401, iteration: 349552
loss: 0.9858523607254028,grad_norm: 0.6162900115732868, iteration: 349553
loss: 0.9999874830245972,grad_norm: 0.763315932425971, iteration: 349554
loss: 1.0261610746383667,grad_norm: 0.8472262567649256, iteration: 349555
loss: 1.0352189540863037,grad_norm: 0.8046404706390744, iteration: 349556
loss: 1.0226953029632568,grad_norm: 0.8546633934523598, iteration: 349557
loss: 0.9895310401916504,grad_norm: 0.8136634530286306, iteration: 349558
loss: 0.9991039037704468,grad_norm: 0.8602306211815182, iteration: 349559
loss: 1.0013350248336792,grad_norm: 0.7883579200332149, iteration: 349560
loss: 0.9994500279426575,grad_norm: 0.8316573353532206, iteration: 349561
loss: 0.9830762147903442,grad_norm: 0.9541296690577876, iteration: 349562
loss: 1.0043425559997559,grad_norm: 0.7684055755329254, iteration: 349563
loss: 0.9992154240608215,grad_norm: 0.9999993288744392, iteration: 349564
loss: 0.9985679984092712,grad_norm: 0.9731790941732199, iteration: 349565
loss: 0.9970159530639648,grad_norm: 0.9467231204553573, iteration: 349566
loss: 0.998608410358429,grad_norm: 0.7877159160718044, iteration: 349567
loss: 0.9725536108016968,grad_norm: 0.8783149000533349, iteration: 349568
loss: 0.9641436338424683,grad_norm: 0.7195108762869816, iteration: 349569
loss: 0.9576185345649719,grad_norm: 0.7493527269612865, iteration: 349570
loss: 1.0260528326034546,grad_norm: 0.8335795726602646, iteration: 349571
loss: 1.0126090049743652,grad_norm: 0.9242605629167938, iteration: 349572
loss: 1.0163519382476807,grad_norm: 0.7486081433592827, iteration: 349573
loss: 0.9635134339332581,grad_norm: 0.8480451213020818, iteration: 349574
loss: 0.9940467476844788,grad_norm: 0.8126137497130267, iteration: 349575
loss: 0.9960440993309021,grad_norm: 0.703922152395123, iteration: 349576
loss: 1.0368393659591675,grad_norm: 0.993549084838607, iteration: 349577
loss: 1.0489641427993774,grad_norm: 0.999999745786871, iteration: 349578
loss: 0.9768128395080566,grad_norm: 0.9610069443652544, iteration: 349579
loss: 0.990553617477417,grad_norm: 0.9999992410898169, iteration: 349580
loss: 1.0143297910690308,grad_norm: 0.8990957669982126, iteration: 349581
loss: 0.9967285394668579,grad_norm: 0.7474254029803332, iteration: 349582
loss: 1.0086925029754639,grad_norm: 0.8554942813641265, iteration: 349583
loss: 1.0115830898284912,grad_norm: 0.7800291675701747, iteration: 349584
loss: 0.9765247702598572,grad_norm: 0.7388234478313501, iteration: 349585
loss: 0.9746994376182556,grad_norm: 0.9608101609181459, iteration: 349586
loss: 1.0096789598464966,grad_norm: 0.9999991493885267, iteration: 349587
loss: 1.0436569452285767,grad_norm: 0.9999993746844905, iteration: 349588
loss: 1.0018662214279175,grad_norm: 0.7027928509003057, iteration: 349589
loss: 0.9991078972816467,grad_norm: 0.9519726980140957, iteration: 349590
loss: 1.0150171518325806,grad_norm: 0.8915940314575169, iteration: 349591
loss: 1.0670297145843506,grad_norm: 0.9999993076817147, iteration: 349592
loss: 1.007826328277588,grad_norm: 0.6719241717229876, iteration: 349593
loss: 1.090309739112854,grad_norm: 0.8678589653083549, iteration: 349594
loss: 1.0280429124832153,grad_norm: 0.928867120290488, iteration: 349595
loss: 1.0008636713027954,grad_norm: 0.8192206796391361, iteration: 349596
loss: 1.0336012840270996,grad_norm: 0.94883521163517, iteration: 349597
loss: 0.9787275195121765,grad_norm: 0.7609755559852182, iteration: 349598
loss: 1.0149013996124268,grad_norm: 0.9999990291456822, iteration: 349599
loss: 0.992719829082489,grad_norm: 0.8899946031591927, iteration: 349600
loss: 1.0038275718688965,grad_norm: 0.9818067438906897, iteration: 349601
loss: 1.0146832466125488,grad_norm: 0.7826914440994414, iteration: 349602
loss: 0.9544578194618225,grad_norm: 0.8447445056726445, iteration: 349603
loss: 0.9999042749404907,grad_norm: 0.8549684697800571, iteration: 349604
loss: 1.0128917694091797,grad_norm: 0.9999996411887736, iteration: 349605
loss: 1.0208890438079834,grad_norm: 0.7012610932598422, iteration: 349606
loss: 1.0069340467453003,grad_norm: 0.8360152796310013, iteration: 349607
loss: 1.03844153881073,grad_norm: 0.7107664343532345, iteration: 349608
loss: 0.9737011194229126,grad_norm: 0.781141589718717, iteration: 349609
loss: 0.9790613651275635,grad_norm: 0.8544665358598104, iteration: 349610
loss: 0.9879898428916931,grad_norm: 0.9727262517589474, iteration: 349611
loss: 1.039094090461731,grad_norm: 0.9393300865650336, iteration: 349612
loss: 0.9939030408859253,grad_norm: 0.9851916417369072, iteration: 349613
loss: 0.989448070526123,grad_norm: 0.7710378425705338, iteration: 349614
loss: 0.9516290426254272,grad_norm: 0.9999990732745471, iteration: 349615
loss: 0.9939402937889099,grad_norm: 0.9999989686295482, iteration: 349616
loss: 1.087708830833435,grad_norm: 0.9999992430662568, iteration: 349617
loss: 1.0119507312774658,grad_norm: 0.9758767948228064, iteration: 349618
loss: 0.966870129108429,grad_norm: 0.9090331292429954, iteration: 349619
loss: 1.017846941947937,grad_norm: 0.860646928211542, iteration: 349620
loss: 1.0463272333145142,grad_norm: 0.9999992675240066, iteration: 349621
loss: 0.9825124740600586,grad_norm: 0.6536104520392717, iteration: 349622
loss: 0.9551733136177063,grad_norm: 0.747102817244323, iteration: 349623
loss: 1.0308101177215576,grad_norm: 0.7721699333903022, iteration: 349624
loss: 1.0061172246932983,grad_norm: 0.7300670194771305, iteration: 349625
loss: 1.015615463256836,grad_norm: 0.7240782066636037, iteration: 349626
loss: 1.0282022953033447,grad_norm: 0.9999989104690291, iteration: 349627
loss: 0.968859851360321,grad_norm: 0.9352380929732069, iteration: 349628
loss: 0.99784255027771,grad_norm: 0.9532771810096529, iteration: 349629
loss: 1.0244836807250977,grad_norm: 0.8531948349298843, iteration: 349630
loss: 1.0329856872558594,grad_norm: 0.9590472855781127, iteration: 349631
loss: 1.0072492361068726,grad_norm: 0.8165627355472449, iteration: 349632
loss: 1.0804630517959595,grad_norm: 0.9999997791737024, iteration: 349633
loss: 1.0177381038665771,grad_norm: 0.7638244290030082, iteration: 349634
loss: 1.0039769411087036,grad_norm: 0.7637313607368919, iteration: 349635
loss: 0.9792764782905579,grad_norm: 0.8917703728537444, iteration: 349636
loss: 1.0518347024917603,grad_norm: 0.9999998736507637, iteration: 349637
loss: 0.9809468984603882,grad_norm: 0.9685510738639818, iteration: 349638
loss: 0.9926754236221313,grad_norm: 0.7813431040436675, iteration: 349639
loss: 0.9565607905387878,grad_norm: 0.9037183601934985, iteration: 349640
loss: 1.0129414796829224,grad_norm: 0.8780179452913708, iteration: 349641
loss: 1.0569850206375122,grad_norm: 0.9999994492966863, iteration: 349642
loss: 1.0057414770126343,grad_norm: 0.9057804432635231, iteration: 349643
loss: 0.973450779914856,grad_norm: 0.8870657355669328, iteration: 349644
loss: 0.9480547308921814,grad_norm: 0.908172472845457, iteration: 349645
loss: 0.9838442206382751,grad_norm: 0.7993403475197329, iteration: 349646
loss: 0.9978414177894592,grad_norm: 0.7888498143146082, iteration: 349647
loss: 1.002517580986023,grad_norm: 0.7865467142081193, iteration: 349648
loss: 0.9690284729003906,grad_norm: 0.8113607887999305, iteration: 349649
loss: 1.0028117895126343,grad_norm: 0.781222086166517, iteration: 349650
loss: 0.998742938041687,grad_norm: 0.9999993011695483, iteration: 349651
loss: 1.1441105604171753,grad_norm: 0.9999999169630769, iteration: 349652
loss: 1.0099469423294067,grad_norm: 0.9039422607197296, iteration: 349653
loss: 1.028056025505066,grad_norm: 0.9414138645463894, iteration: 349654
loss: 1.0064641237258911,grad_norm: 0.8504962631992824, iteration: 349655
loss: 1.0341787338256836,grad_norm: 0.9999993894886758, iteration: 349656
loss: 1.0554779767990112,grad_norm: 0.9999991830708874, iteration: 349657
loss: 1.171730875968933,grad_norm: 1.0000001163738332, iteration: 349658
loss: 1.0030733346939087,grad_norm: 0.8132076424175292, iteration: 349659
loss: 1.1055731773376465,grad_norm: 0.9999995036988651, iteration: 349660
loss: 0.9458943605422974,grad_norm: 0.8331575726105158, iteration: 349661
loss: 1.0499337911605835,grad_norm: 0.9999997713201703, iteration: 349662
loss: 1.0329219102859497,grad_norm: 0.8056996002926781, iteration: 349663
loss: 1.0427377223968506,grad_norm: 0.9999989993280257, iteration: 349664
loss: 1.0212947130203247,grad_norm: 0.9280415033506626, iteration: 349665
loss: 0.994370698928833,grad_norm: 0.8435632129224184, iteration: 349666
loss: 1.0026805400848389,grad_norm: 0.9420454320868711, iteration: 349667
loss: 0.9955641031265259,grad_norm: 0.8741566118604199, iteration: 349668
loss: 0.9270848631858826,grad_norm: 0.8529360055262714, iteration: 349669
loss: 1.004210114479065,grad_norm: 0.8578617121894108, iteration: 349670
loss: 0.9905632138252258,grad_norm: 0.7920090354857996, iteration: 349671
loss: 0.9235221743583679,grad_norm: 0.7624452499277232, iteration: 349672
loss: 0.9677600264549255,grad_norm: 0.841315504146681, iteration: 349673
loss: 1.0046550035476685,grad_norm: 0.8971898805143285, iteration: 349674
loss: 0.978318989276886,grad_norm: 0.9085807783982043, iteration: 349675
loss: 1.009900689125061,grad_norm: 0.794090232276373, iteration: 349676
loss: 0.9668078422546387,grad_norm: 0.8551515579164934, iteration: 349677
loss: 1.0428601503372192,grad_norm: 0.9559839360139875, iteration: 349678
loss: 0.9917536377906799,grad_norm: 0.7737685774283266, iteration: 349679
loss: 1.0152751207351685,grad_norm: 0.8103393824315482, iteration: 349680
loss: 0.9593141674995422,grad_norm: 0.7610919403187362, iteration: 349681
loss: 0.9723007082939148,grad_norm: 0.8608074580737608, iteration: 349682
loss: 1.0290781259536743,grad_norm: 0.8593130689450084, iteration: 349683
loss: 0.9727864265441895,grad_norm: 0.8513333623550302, iteration: 349684
loss: 1.0098955631256104,grad_norm: 0.8595766439298012, iteration: 349685
loss: 0.9793243408203125,grad_norm: 0.9614694859038565, iteration: 349686
loss: 0.9895859360694885,grad_norm: 0.7357438160904496, iteration: 349687
loss: 1.0231019258499146,grad_norm: 0.6449274949858725, iteration: 349688
loss: 1.000050663948059,grad_norm: 0.8820681077228119, iteration: 349689
loss: 0.9921132922172546,grad_norm: 0.8841246497365955, iteration: 349690
loss: 0.9996165037155151,grad_norm: 0.751870853816886, iteration: 349691
loss: 0.9625791907310486,grad_norm: 0.9999992611962768, iteration: 349692
loss: 0.9787583947181702,grad_norm: 0.7086928789013771, iteration: 349693
loss: 1.002406120300293,grad_norm: 0.9270744283342259, iteration: 349694
loss: 1.0069612264633179,grad_norm: 0.8831349764643406, iteration: 349695
loss: 1.0160657167434692,grad_norm: 0.9049413455699744, iteration: 349696
loss: 1.0073341131210327,grad_norm: 0.7410858066826181, iteration: 349697
loss: 1.0345659255981445,grad_norm: 0.7899448784186769, iteration: 349698
loss: 0.9807330369949341,grad_norm: 0.782473981688864, iteration: 349699
loss: 0.9839307069778442,grad_norm: 0.7739243141130303, iteration: 349700
loss: 1.0184131860733032,grad_norm: 0.8254508904535861, iteration: 349701
loss: 1.0431172847747803,grad_norm: 0.9536755203331089, iteration: 349702
loss: 0.9947723150253296,grad_norm: 0.8604932024583493, iteration: 349703
loss: 1.0283589363098145,grad_norm: 0.9999992817739859, iteration: 349704
loss: 0.9946814179420471,grad_norm: 0.949908447031937, iteration: 349705
loss: 1.0090489387512207,grad_norm: 0.8211164958608205, iteration: 349706
loss: 0.9779025316238403,grad_norm: 0.6834885491672847, iteration: 349707
loss: 1.0342124700546265,grad_norm: 0.9588498478020535, iteration: 349708
loss: 0.9713214039802551,grad_norm: 0.8890975873632038, iteration: 349709
loss: 1.001883864402771,grad_norm: 0.8927695446665853, iteration: 349710
loss: 1.0134894847869873,grad_norm: 0.8767955673681997, iteration: 349711
loss: 1.1356583833694458,grad_norm: 0.9999995756771063, iteration: 349712
loss: 0.9986690878868103,grad_norm: 0.7723926126305705, iteration: 349713
loss: 1.0283395051956177,grad_norm: 0.9108440216430546, iteration: 349714
loss: 1.0184985399246216,grad_norm: 0.8053759493966849, iteration: 349715
loss: 1.0324705839157104,grad_norm: 0.8763802335538114, iteration: 349716
loss: 1.0152511596679688,grad_norm: 0.6671388667960645, iteration: 349717
loss: 0.9812962412834167,grad_norm: 0.8040452942989798, iteration: 349718
loss: 0.9726604223251343,grad_norm: 0.78482706257864, iteration: 349719
loss: 0.9806342124938965,grad_norm: 0.6456606722897643, iteration: 349720
loss: 0.9988077878952026,grad_norm: 0.7096673857720734, iteration: 349721
loss: 0.9825459718704224,grad_norm: 0.8627049782560573, iteration: 349722
loss: 1.0005459785461426,grad_norm: 0.8782277018389846, iteration: 349723
loss: 1.0182994604110718,grad_norm: 0.8067975076941601, iteration: 349724
loss: 0.996763288974762,grad_norm: 0.84533475142626, iteration: 349725
loss: 1.0262129306793213,grad_norm: 0.6995512037785054, iteration: 349726
loss: 0.9967688918113708,grad_norm: 0.9254185855994872, iteration: 349727
loss: 1.0081250667572021,grad_norm: 0.7885957185357193, iteration: 349728
loss: 1.0481382608413696,grad_norm: 0.8617970566483336, iteration: 349729
loss: 1.0168951749801636,grad_norm: 0.8120278345226446, iteration: 349730
loss: 0.9891151189804077,grad_norm: 0.9550224982602551, iteration: 349731
loss: 0.9687240719795227,grad_norm: 0.7844276848883588, iteration: 349732
loss: 0.9989540576934814,grad_norm: 0.8879590683462237, iteration: 349733
loss: 0.9992275238037109,grad_norm: 0.8814029725547394, iteration: 349734
loss: 0.995519757270813,grad_norm: 0.7255143612317314, iteration: 349735
loss: 0.9870962500572205,grad_norm: 0.8848888637407997, iteration: 349736
loss: 0.9816856384277344,grad_norm: 0.7614227522258403, iteration: 349737
loss: 0.9896559715270996,grad_norm: 0.7240355924059442, iteration: 349738
loss: 0.99212247133255,grad_norm: 0.8530911469742559, iteration: 349739
loss: 0.9931122660636902,grad_norm: 0.8530267721970439, iteration: 349740
loss: 1.0414279699325562,grad_norm: 0.6903624910409395, iteration: 349741
loss: 1.043694019317627,grad_norm: 0.7534778883749594, iteration: 349742
loss: 0.9734654426574707,grad_norm: 0.7812466023237777, iteration: 349743
loss: 1.2937980890274048,grad_norm: 0.9999993756064033, iteration: 349744
loss: 0.9928621053695679,grad_norm: 0.7560016417700369, iteration: 349745
loss: 1.025219202041626,grad_norm: 0.9999996260752702, iteration: 349746
loss: 0.9952766299247742,grad_norm: 0.7408987904789841, iteration: 349747
loss: 0.9903810024261475,grad_norm: 0.9999992771209653, iteration: 349748
loss: 1.0164490938186646,grad_norm: 0.9999991399312284, iteration: 349749
loss: 0.9327840805053711,grad_norm: 0.9050000949021989, iteration: 349750
loss: 1.0040849447250366,grad_norm: 0.7183577959216622, iteration: 349751
loss: 0.9770393967628479,grad_norm: 0.7587931881621185, iteration: 349752
loss: 0.9839658737182617,grad_norm: 0.9693063306566236, iteration: 349753
loss: 0.9821037650108337,grad_norm: 0.9999991211469589, iteration: 349754
loss: 1.0036519765853882,grad_norm: 0.9999997919975702, iteration: 349755
loss: 1.0059916973114014,grad_norm: 0.8362681096639364, iteration: 349756
loss: 0.9965596795082092,grad_norm: 0.7871383616628322, iteration: 349757
loss: 1.0266566276550293,grad_norm: 0.9999990654587387, iteration: 349758
loss: 1.0559788942337036,grad_norm: 0.9999996283864566, iteration: 349759
loss: 0.9790382981300354,grad_norm: 0.857077977922877, iteration: 349760
loss: 1.0049395561218262,grad_norm: 0.7466218695517102, iteration: 349761
loss: 1.0458208322525024,grad_norm: 0.9999990696759596, iteration: 349762
loss: 1.0235061645507812,grad_norm: 0.9817915673978417, iteration: 349763
loss: 0.9838316440582275,grad_norm: 0.8508170025556261, iteration: 349764
loss: 1.0019299983978271,grad_norm: 0.9137252063136567, iteration: 349765
loss: 1.0474318265914917,grad_norm: 0.9495873603194694, iteration: 349766
loss: 1.0227270126342773,grad_norm: 0.9999996843012176, iteration: 349767
loss: 0.9817073941230774,grad_norm: 0.9686140084048465, iteration: 349768
loss: 1.0047032833099365,grad_norm: 0.9999991967715478, iteration: 349769
loss: 0.9795325398445129,grad_norm: 0.8132728379452978, iteration: 349770
loss: 0.9748792052268982,grad_norm: 0.7758396503413745, iteration: 349771
loss: 0.981991708278656,grad_norm: 0.9170540559686051, iteration: 349772
loss: 0.9977421164512634,grad_norm: 0.861158644274725, iteration: 349773
loss: 1.0328418016433716,grad_norm: 0.7890345877802528, iteration: 349774
loss: 1.017284870147705,grad_norm: 0.890937274275154, iteration: 349775
loss: 1.0142829418182373,grad_norm: 0.8354787827006909, iteration: 349776
loss: 1.0406097173690796,grad_norm: 0.7524518035208274, iteration: 349777
loss: 1.0109046697616577,grad_norm: 0.8230715351884991, iteration: 349778
loss: 1.0613048076629639,grad_norm: 0.9999997382923265, iteration: 349779
loss: 0.980355441570282,grad_norm: 0.7630273640680935, iteration: 349780
loss: 1.026103138923645,grad_norm: 0.934274735422607, iteration: 349781
loss: 0.9814437031745911,grad_norm: 0.8871962799938984, iteration: 349782
loss: 0.9581056237220764,grad_norm: 0.7404943111959849, iteration: 349783
loss: 1.0248327255249023,grad_norm: 0.713381209176303, iteration: 349784
loss: 1.0087683200836182,grad_norm: 0.7855074964675562, iteration: 349785
loss: 0.9851027727127075,grad_norm: 0.7204858443003744, iteration: 349786
loss: 1.0160678625106812,grad_norm: 0.9999994729309339, iteration: 349787
loss: 0.9819097518920898,grad_norm: 0.8382357902204313, iteration: 349788
loss: 0.9962429404258728,grad_norm: 0.9803406730848168, iteration: 349789
loss: 1.014039397239685,grad_norm: 0.8163344096340903, iteration: 349790
loss: 1.061659812927246,grad_norm: 0.9999990591658923, iteration: 349791
loss: 1.0069150924682617,grad_norm: 0.7774352947084979, iteration: 349792
loss: 0.991050124168396,grad_norm: 0.9999990881866834, iteration: 349793
loss: 1.029531478881836,grad_norm: 0.8149957606256505, iteration: 349794
loss: 1.0365302562713623,grad_norm: 0.9999998500323805, iteration: 349795
loss: 1.0690158605575562,grad_norm: 0.9999998417865031, iteration: 349796
loss: 1.0243394374847412,grad_norm: 0.9999997066305241, iteration: 349797
loss: 1.110957145690918,grad_norm: 0.9999995045762625, iteration: 349798
loss: 0.9929887056350708,grad_norm: 0.8236653937683319, iteration: 349799
loss: 1.001744270324707,grad_norm: 0.825676079679752, iteration: 349800
loss: 0.9976187348365784,grad_norm: 0.6912814908041266, iteration: 349801
loss: 0.9878694415092468,grad_norm: 0.9778120898249842, iteration: 349802
loss: 1.0110256671905518,grad_norm: 0.7892400173528616, iteration: 349803
loss: 1.0149203538894653,grad_norm: 0.8317397313527285, iteration: 349804
loss: 0.9670051336288452,grad_norm: 0.773348042434239, iteration: 349805
loss: 1.0276907682418823,grad_norm: 0.9999995330416968, iteration: 349806
loss: 0.9680768847465515,grad_norm: 0.8341245389922528, iteration: 349807
loss: 0.9945980906486511,grad_norm: 0.8486400018190019, iteration: 349808
loss: 1.1468572616577148,grad_norm: 0.9999995736394266, iteration: 349809
loss: 0.9749888777732849,grad_norm: 0.8182904052760344, iteration: 349810
loss: 0.9661812782287598,grad_norm: 0.748041426811997, iteration: 349811
loss: 0.9837133288383484,grad_norm: 0.7419357205987351, iteration: 349812
loss: 0.9791207909584045,grad_norm: 0.8507136957437905, iteration: 349813
loss: 0.994970440864563,grad_norm: 0.8359277833697982, iteration: 349814
loss: 1.038140892982483,grad_norm: 0.8431352257840772, iteration: 349815
loss: 0.9531547427177429,grad_norm: 0.834186231040895, iteration: 349816
loss: 0.9969132542610168,grad_norm: 0.977489171599781, iteration: 349817
loss: 1.0028085708618164,grad_norm: 0.7803025299816, iteration: 349818
loss: 1.0050145387649536,grad_norm: 0.9999992726627831, iteration: 349819
loss: 1.0841090679168701,grad_norm: 0.8335424775898929, iteration: 349820
loss: 1.0013790130615234,grad_norm: 0.8585513645992021, iteration: 349821
loss: 0.9685052037239075,grad_norm: 0.9645162044218734, iteration: 349822
loss: 1.024491786956787,grad_norm: 0.7773662975940431, iteration: 349823
loss: 1.0006664991378784,grad_norm: 0.8232365675327956, iteration: 349824
loss: 1.02106773853302,grad_norm: 0.9849797489216046, iteration: 349825
loss: 0.9937049746513367,grad_norm: 0.6836056321208291, iteration: 349826
loss: 1.0166070461273193,grad_norm: 1.0000000105909204, iteration: 349827
loss: 0.9946872591972351,grad_norm: 0.8473707959481511, iteration: 349828
loss: 0.9950839281082153,grad_norm: 0.7606534892789182, iteration: 349829
loss: 0.9795551300048828,grad_norm: 0.8514075876954863, iteration: 349830
loss: 1.0985386371612549,grad_norm: 0.9999995716210942, iteration: 349831
loss: 1.0371010303497314,grad_norm: 0.7178608853004725, iteration: 349832
loss: 1.0119271278381348,grad_norm: 0.7902887724379503, iteration: 349833
loss: 1.0028691291809082,grad_norm: 0.8092785998535189, iteration: 349834
loss: 1.005446434020996,grad_norm: 0.8259267780603137, iteration: 349835
loss: 0.948749840259552,grad_norm: 0.9107786239936366, iteration: 349836
loss: 0.9989551901817322,grad_norm: 0.8908978821199284, iteration: 349837
loss: 0.9978393912315369,grad_norm: 0.7548066105969573, iteration: 349838
loss: 1.0110797882080078,grad_norm: 0.7612728635802918, iteration: 349839
loss: 1.2217907905578613,grad_norm: 0.9999997952686521, iteration: 349840
loss: 0.9724445343017578,grad_norm: 0.8650168234494519, iteration: 349841
loss: 1.0045480728149414,grad_norm: 0.7760038261004688, iteration: 349842
loss: 0.9974263310432434,grad_norm: 0.893959072855544, iteration: 349843
loss: 1.035470724105835,grad_norm: 0.7396049394542803, iteration: 349844
loss: 0.9620823264122009,grad_norm: 0.8035983882705316, iteration: 349845
loss: 0.9717864990234375,grad_norm: 0.9999995747949384, iteration: 349846
loss: 1.0742897987365723,grad_norm: 0.8920874675206318, iteration: 349847
loss: 0.96741783618927,grad_norm: 0.8795248611120647, iteration: 349848
loss: 1.0340783596038818,grad_norm: 0.7930919787567738, iteration: 349849
loss: 1.0441259145736694,grad_norm: 0.8378673822624565, iteration: 349850
loss: 0.9950699210166931,grad_norm: 0.9999990771853057, iteration: 349851
loss: 1.0321234464645386,grad_norm: 0.8050148422329713, iteration: 349852
loss: 1.0462923049926758,grad_norm: 0.900201414534237, iteration: 349853
loss: 1.00214684009552,grad_norm: 0.8562398715542727, iteration: 349854
loss: 0.971777617931366,grad_norm: 0.9329551228394507, iteration: 349855
loss: 1.3235634565353394,grad_norm: 0.9999995221682351, iteration: 349856
loss: 0.9913008213043213,grad_norm: 0.8208115756110776, iteration: 349857
loss: 1.012727975845337,grad_norm: 0.8010891251490703, iteration: 349858
loss: 0.9729814529418945,grad_norm: 0.9999992540423546, iteration: 349859
loss: 1.0030215978622437,grad_norm: 0.8319909727551698, iteration: 349860
loss: 0.9918673634529114,grad_norm: 0.9076033193619538, iteration: 349861
loss: 1.035135269165039,grad_norm: 0.999999371065218, iteration: 349862
loss: 0.9734477400779724,grad_norm: 0.9353336244727987, iteration: 349863
loss: 0.9830125570297241,grad_norm: 0.886935662762741, iteration: 349864
loss: 1.0980991125106812,grad_norm: 0.9999998202368512, iteration: 349865
loss: 1.115290641784668,grad_norm: 0.9999997037595785, iteration: 349866
loss: 1.0232784748077393,grad_norm: 0.9999991327244433, iteration: 349867
loss: 0.9676398038864136,grad_norm: 0.8232472243625418, iteration: 349868
loss: 1.0362318754196167,grad_norm: 0.8974952233949101, iteration: 349869
loss: 0.9981521368026733,grad_norm: 0.8613198023321352, iteration: 349870
loss: 1.0119197368621826,grad_norm: 0.9193997153880951, iteration: 349871
loss: 1.0093700885772705,grad_norm: 0.932123930831652, iteration: 349872
loss: 0.9749597311019897,grad_norm: 0.7677042183382669, iteration: 349873
loss: 1.03935968875885,grad_norm: 0.8436960012929933, iteration: 349874
loss: 1.0755252838134766,grad_norm: 0.9999991016422649, iteration: 349875
loss: 1.0026404857635498,grad_norm: 0.6658644964833033, iteration: 349876
loss: 1.1557748317718506,grad_norm: 0.8310051623587074, iteration: 349877
loss: 1.0070723295211792,grad_norm: 0.7383596398235498, iteration: 349878
loss: 0.9829338788986206,grad_norm: 0.8462553673379861, iteration: 349879
loss: 0.9512989521026611,grad_norm: 0.9999992762294029, iteration: 349880
loss: 1.0207105875015259,grad_norm: 0.999999338923374, iteration: 349881
loss: 1.0295647382736206,grad_norm: 0.7592637692777943, iteration: 349882
loss: 1.004374623298645,grad_norm: 0.6914835645829929, iteration: 349883
loss: 1.0155888795852661,grad_norm: 0.9330127565293472, iteration: 349884
loss: 0.9934535026550293,grad_norm: 0.9999991761260802, iteration: 349885
loss: 0.9800483584403992,grad_norm: 0.9629730523124761, iteration: 349886
loss: 1.0135107040405273,grad_norm: 0.8333325665112687, iteration: 349887
loss: 0.9840832948684692,grad_norm: 0.8377084579405357, iteration: 349888
loss: 1.0097084045410156,grad_norm: 0.7756967841225172, iteration: 349889
loss: 1.010132074356079,grad_norm: 0.8375981234495443, iteration: 349890
loss: 0.9836676716804504,grad_norm: 0.8233338865653047, iteration: 349891
loss: 1.0249618291854858,grad_norm: 1.000000074944224, iteration: 349892
loss: 1.0236624479293823,grad_norm: 0.9999990083550723, iteration: 349893
loss: 1.0261125564575195,grad_norm: 0.8985330263285888, iteration: 349894
loss: 1.027132272720337,grad_norm: 0.7469640227225841, iteration: 349895
loss: 1.0025964975357056,grad_norm: 0.7595381808920922, iteration: 349896
loss: 1.0828953981399536,grad_norm: 0.960588571531677, iteration: 349897
loss: 0.9871395230293274,grad_norm: 0.8790173174873955, iteration: 349898
loss: 1.0294222831726074,grad_norm: 0.9999990075514488, iteration: 349899
loss: 1.0064773559570312,grad_norm: 0.9280195539882867, iteration: 349900
loss: 0.9752245545387268,grad_norm: 0.7292517224778893, iteration: 349901
loss: 1.2934846878051758,grad_norm: 0.9999998451171476, iteration: 349902
loss: 0.9824689030647278,grad_norm: 0.999999187088116, iteration: 349903
loss: 0.9578154683113098,grad_norm: 0.9760679847424554, iteration: 349904
loss: 1.0204594135284424,grad_norm: 0.9999994362957781, iteration: 349905
loss: 1.0228279829025269,grad_norm: 0.7645201792796762, iteration: 349906
loss: 1.0727797746658325,grad_norm: 0.8483961711539846, iteration: 349907
loss: 0.9844075441360474,grad_norm: 0.8691518324774987, iteration: 349908
loss: 1.008236050605774,grad_norm: 0.7001116166070696, iteration: 349909
loss: 1.042069673538208,grad_norm: 0.8826613818601315, iteration: 349910
loss: 1.0002994537353516,grad_norm: 0.8090880156213159, iteration: 349911
loss: 0.9734140038490295,grad_norm: 0.9055313689023378, iteration: 349912
loss: 0.9844902753829956,grad_norm: 0.8305854548820001, iteration: 349913
loss: 1.0096042156219482,grad_norm: 0.8402820107512254, iteration: 349914
loss: 1.019693374633789,grad_norm: 0.7240987397167078, iteration: 349915
loss: 1.0111262798309326,grad_norm: 0.88857855239815, iteration: 349916
loss: 0.95195472240448,grad_norm: 0.7940247425387085, iteration: 349917
loss: 1.0065100193023682,grad_norm: 0.7981796239847532, iteration: 349918
loss: 0.9755299687385559,grad_norm: 0.9470868520583795, iteration: 349919
loss: 0.9987102150917053,grad_norm: 0.7814539708354682, iteration: 349920
loss: 0.9771090149879456,grad_norm: 0.8282785317015423, iteration: 349921
loss: 1.017802119255066,grad_norm: 0.8025086261884288, iteration: 349922
loss: 1.023226261138916,grad_norm: 0.8580629855547584, iteration: 349923
loss: 1.050238847732544,grad_norm: 0.9322048648332946, iteration: 349924
loss: 1.0155069828033447,grad_norm: 0.7882766352604781, iteration: 349925
loss: 1.037239909172058,grad_norm: 0.9609556699102937, iteration: 349926
loss: 0.9985063672065735,grad_norm: 0.7637804275348637, iteration: 349927
loss: 0.9980893731117249,grad_norm: 0.8463844819170827, iteration: 349928
loss: 0.9916759133338928,grad_norm: 0.6958432955996955, iteration: 349929
loss: 0.9966975450515747,grad_norm: 0.9264679904235279, iteration: 349930
loss: 1.0471240282058716,grad_norm: 0.6994715225877189, iteration: 349931
loss: 0.9656457304954529,grad_norm: 0.8877746828974475, iteration: 349932
loss: 1.00888991355896,grad_norm: 0.7910679977820148, iteration: 349933
loss: 1.0308133363723755,grad_norm: 0.775906843749007, iteration: 349934
loss: 0.9910535216331482,grad_norm: 0.8191108940107097, iteration: 349935
loss: 0.9508773684501648,grad_norm: 0.9205650658150022, iteration: 349936
loss: 1.1848434209823608,grad_norm: 0.9999991483923953, iteration: 349937
loss: 0.9999632835388184,grad_norm: 0.8517617792243304, iteration: 349938
loss: 0.9879693984985352,grad_norm: 0.8263964282686077, iteration: 349939
loss: 1.0083541870117188,grad_norm: 0.9721745242914377, iteration: 349940
loss: 1.0006111860275269,grad_norm: 0.8052708094524379, iteration: 349941
loss: 1.0000200271606445,grad_norm: 0.9196082843449089, iteration: 349942
loss: 1.0444188117980957,grad_norm: 0.8301990061495178, iteration: 349943
loss: 1.008373737335205,grad_norm: 0.6438030672561444, iteration: 349944
loss: 1.007566213607788,grad_norm: 0.802011354461417, iteration: 349945
loss: 1.0359177589416504,grad_norm: 0.9030275366376296, iteration: 349946
loss: 0.9863410592079163,grad_norm: 0.7965590301676698, iteration: 349947
loss: 1.0213168859481812,grad_norm: 0.806608365549329, iteration: 349948
loss: 1.147223711013794,grad_norm: 0.8413213507627962, iteration: 349949
loss: 1.0011876821517944,grad_norm: 0.8465381121297122, iteration: 349950
loss: 1.0085780620574951,grad_norm: 0.7392115813287305, iteration: 349951
loss: 0.9910845756530762,grad_norm: 0.8696784096131485, iteration: 349952
loss: 1.0298917293548584,grad_norm: 0.9999998626424434, iteration: 349953
loss: 0.9908102750778198,grad_norm: 0.8617821814987106, iteration: 349954
loss: 1.0562129020690918,grad_norm: 0.9999996110187405, iteration: 349955
loss: 1.0524152517318726,grad_norm: 0.772082457893284, iteration: 349956
loss: 0.9811245799064636,grad_norm: 0.8625627344533393, iteration: 349957
loss: 0.9949268698692322,grad_norm: 0.8503683615120566, iteration: 349958
loss: 0.9750874042510986,grad_norm: 0.7567312152614621, iteration: 349959
loss: 0.9788662791252136,grad_norm: 0.949772099733404, iteration: 349960
loss: 1.0039616823196411,grad_norm: 0.9746411472482497, iteration: 349961
loss: 1.0041074752807617,grad_norm: 0.936409294943474, iteration: 349962
loss: 0.9947232604026794,grad_norm: 0.7714992389379085, iteration: 349963
loss: 0.9812449216842651,grad_norm: 0.7514354452405231, iteration: 349964
loss: 0.9914682507514954,grad_norm: 0.8312815092008, iteration: 349965
loss: 1.002738118171692,grad_norm: 0.9999990335413064, iteration: 349966
loss: 0.9936285614967346,grad_norm: 0.9999990676505337, iteration: 349967
loss: 1.0181245803833008,grad_norm: 0.9171321087222275, iteration: 349968
loss: 1.0314140319824219,grad_norm: 0.7958245769592541, iteration: 349969
loss: 0.9817318916320801,grad_norm: 0.7438592702701402, iteration: 349970
loss: 0.99016273021698,grad_norm: 0.8421087209629918, iteration: 349971
loss: 1.000887393951416,grad_norm: 0.995282667492178, iteration: 349972
loss: 1.0364375114440918,grad_norm: 0.7442211308897985, iteration: 349973
loss: 1.0439567565917969,grad_norm: 0.9999993390890123, iteration: 349974
loss: 1.0095218420028687,grad_norm: 0.841077129699926, iteration: 349975
loss: 1.009610891342163,grad_norm: 0.8281264834855103, iteration: 349976
loss: 0.9765551090240479,grad_norm: 0.6752332428693557, iteration: 349977
loss: 1.0009136199951172,grad_norm: 0.7727399685477787, iteration: 349978
loss: 0.9837914705276489,grad_norm: 0.7648426528847931, iteration: 349979
loss: 0.9893741011619568,grad_norm: 0.999999067178548, iteration: 349980
loss: 1.020392656326294,grad_norm: 0.6817717666926346, iteration: 349981
loss: 0.9810676574707031,grad_norm: 0.7110901557059567, iteration: 349982
loss: 1.0106381177902222,grad_norm: 0.9241703380697811, iteration: 349983
loss: 1.0039246082305908,grad_norm: 0.7546147402144023, iteration: 349984
loss: 0.9485327005386353,grad_norm: 0.9451239695066302, iteration: 349985
loss: 0.9770927429199219,grad_norm: 0.76100488579359, iteration: 349986
loss: 1.0111663341522217,grad_norm: 0.7630263708479756, iteration: 349987
loss: 0.9983108639717102,grad_norm: 0.8654201296766498, iteration: 349988
loss: 1.0260592699050903,grad_norm: 0.7612837330728922, iteration: 349989
loss: 1.0046939849853516,grad_norm: 0.8807589229776535, iteration: 349990
loss: 1.0131874084472656,grad_norm: 0.8332410193542991, iteration: 349991
loss: 0.9733492136001587,grad_norm: 0.7930171625293785, iteration: 349992
loss: 0.9578555226325989,grad_norm: 0.800191106106922, iteration: 349993
loss: 1.0328683853149414,grad_norm: 0.6843690215856194, iteration: 349994
loss: 0.9946198463439941,grad_norm: 0.8518091751259462, iteration: 349995
loss: 0.9899525046348572,grad_norm: 0.8631205325427532, iteration: 349996
loss: 1.012512445449829,grad_norm: 0.7588300946755125, iteration: 349997
loss: 1.0271961688995361,grad_norm: 0.9999998431904252, iteration: 349998
loss: 0.9775264859199524,grad_norm: 0.6574320306293032, iteration: 349999
loss: 1.0141277313232422,grad_norm: 0.9468016830917627, iteration: 350000
Evaluating at step 350000
{'val': 0.9953861869871616, 'test': 2.21886824038082}
loss: 0.9725775718688965,grad_norm: 0.7634349422281692, iteration: 350001
loss: 0.9949501156806946,grad_norm: 0.7736984384412955, iteration: 350002
loss: 1.0287855863571167,grad_norm: 0.999999656587718, iteration: 350003
loss: 1.0164002180099487,grad_norm: 0.8300287035150039, iteration: 350004
loss: 0.9847946166992188,grad_norm: 0.8970241305692526, iteration: 350005
loss: 1.010103702545166,grad_norm: 0.9012382299992917, iteration: 350006
loss: 1.0246812105178833,grad_norm: 0.8760476676335359, iteration: 350007
loss: 0.9900407791137695,grad_norm: 0.7551994364211947, iteration: 350008
loss: 0.99430251121521,grad_norm: 0.9099496731214367, iteration: 350009
loss: 0.9677990674972534,grad_norm: 0.7848936658307275, iteration: 350010
loss: 1.033389687538147,grad_norm: 0.8374040459071331, iteration: 350011
loss: 1.0169588327407837,grad_norm: 0.8394762055814463, iteration: 350012
loss: 1.0175020694732666,grad_norm: 0.7183502913102655, iteration: 350013
loss: 1.0330188274383545,grad_norm: 0.7188875345017132, iteration: 350014
loss: 1.085445523262024,grad_norm: 0.8557968151061116, iteration: 350015
loss: 0.9977448582649231,grad_norm: 0.7543633285413933, iteration: 350016
loss: 0.9865020513534546,grad_norm: 0.7096659645060606, iteration: 350017
loss: 0.9980450868606567,grad_norm: 0.6586999443731689, iteration: 350018
loss: 1.0289995670318604,grad_norm: 0.7379039723622058, iteration: 350019
loss: 0.9865057468414307,grad_norm: 0.8102416399053749, iteration: 350020
loss: 1.005039095878601,grad_norm: 0.8716972222568476, iteration: 350021
loss: 0.9585615992546082,grad_norm: 0.8018320938728021, iteration: 350022
loss: 1.020353078842163,grad_norm: 0.9999988925601959, iteration: 350023
loss: 1.016973614692688,grad_norm: 0.8651144492168404, iteration: 350024
loss: 1.0458306074142456,grad_norm: 0.9999994195299877, iteration: 350025
loss: 0.9909355044364929,grad_norm: 0.8153647045487359, iteration: 350026
loss: 1.0151528120040894,grad_norm: 0.9999995788259409, iteration: 350027
loss: 0.9944425821304321,grad_norm: 0.9174774643453848, iteration: 350028
loss: 0.9747852683067322,grad_norm: 0.7536560531494643, iteration: 350029
loss: 1.0434155464172363,grad_norm: 0.9999998579584368, iteration: 350030
loss: 1.018668532371521,grad_norm: 0.7460507790095934, iteration: 350031
loss: 0.9951333999633789,grad_norm: 0.828657625806515, iteration: 350032
loss: 0.9953025579452515,grad_norm: 0.5996481349852039, iteration: 350033
loss: 1.0264830589294434,grad_norm: 0.9231071120659554, iteration: 350034
loss: 1.02875816822052,grad_norm: 0.7595074376782369, iteration: 350035
loss: 0.9690938591957092,grad_norm: 0.8841183514684822, iteration: 350036
loss: 1.0201297998428345,grad_norm: 0.8694371261304055, iteration: 350037
loss: 0.9994567036628723,grad_norm: 0.9045482300731467, iteration: 350038
loss: 1.023629069328308,grad_norm: 0.7903330823131222, iteration: 350039
loss: 1.059456706047058,grad_norm: 0.9091635402679281, iteration: 350040
loss: 0.9827553629875183,grad_norm: 0.9122405306665955, iteration: 350041
loss: 0.9917401671409607,grad_norm: 0.8000740026488912, iteration: 350042
loss: 0.969375729560852,grad_norm: 0.999999049539361, iteration: 350043
loss: 1.0539665222167969,grad_norm: 0.999999166411901, iteration: 350044
loss: 0.9897927641868591,grad_norm: 0.9937919900541013, iteration: 350045
loss: 0.9909207224845886,grad_norm: 0.6649365302566688, iteration: 350046
loss: 1.0086194276809692,grad_norm: 0.8873187117536387, iteration: 350047
loss: 1.0097931623458862,grad_norm: 0.7968353830089108, iteration: 350048
loss: 1.0874874591827393,grad_norm: 0.9999998127982829, iteration: 350049
loss: 1.079623818397522,grad_norm: 0.9815704564545049, iteration: 350050
loss: 1.014517903327942,grad_norm: 0.7532920127635173, iteration: 350051
loss: 0.9945430159568787,grad_norm: 0.8026973929224146, iteration: 350052
loss: 0.9994050860404968,grad_norm: 0.810204796361463, iteration: 350053
loss: 0.9938084483146667,grad_norm: 0.7652269146918551, iteration: 350054
loss: 0.9854884147644043,grad_norm: 0.9677553564538719, iteration: 350055
loss: 1.0379400253295898,grad_norm: 0.9274451363806527, iteration: 350056
loss: 1.0278644561767578,grad_norm: 0.9999994544551677, iteration: 350057
loss: 0.9590283632278442,grad_norm: 0.8330738898834055, iteration: 350058
loss: 1.0710736513137817,grad_norm: 0.8818794897636258, iteration: 350059
loss: 1.0098261833190918,grad_norm: 0.9999999677642291, iteration: 350060
loss: 1.014296293258667,grad_norm: 0.6199012814994432, iteration: 350061
loss: 1.0225980281829834,grad_norm: 0.7019770353298254, iteration: 350062
loss: 1.1539723873138428,grad_norm: 0.9964978707577141, iteration: 350063
loss: 1.0909156799316406,grad_norm: 0.9999996374206809, iteration: 350064
loss: 1.1132678985595703,grad_norm: 0.9999998983675543, iteration: 350065
loss: 0.998327374458313,grad_norm: 0.9375351032097398, iteration: 350066
loss: 1.0116939544677734,grad_norm: 0.8791296552857214, iteration: 350067
loss: 1.0037881135940552,grad_norm: 0.9999997914107945, iteration: 350068
loss: 1.0158687829971313,grad_norm: 0.9999993995517091, iteration: 350069
loss: 1.0754719972610474,grad_norm: 0.842404508617978, iteration: 350070
loss: 0.9800246357917786,grad_norm: 0.7193703416825902, iteration: 350071
loss: 0.9579995274543762,grad_norm: 0.7829386793539146, iteration: 350072
loss: 1.0349693298339844,grad_norm: 0.7052229362247322, iteration: 350073
loss: 1.016055941581726,grad_norm: 0.6420331722769003, iteration: 350074
loss: 0.9934661984443665,grad_norm: 0.8835032250020969, iteration: 350075
loss: 0.9880052804946899,grad_norm: 0.9999998450206717, iteration: 350076
loss: 1.0336776971817017,grad_norm: 0.7572605646534232, iteration: 350077
loss: 1.0578083992004395,grad_norm: 0.9999991868591616, iteration: 350078
loss: 0.9914807677268982,grad_norm: 0.7865325491788543, iteration: 350079
loss: 1.0165014266967773,grad_norm: 0.8113525047710367, iteration: 350080
loss: 0.995317816734314,grad_norm: 0.8755055609996059, iteration: 350081
loss: 1.0519667863845825,grad_norm: 0.9999999938726436, iteration: 350082
loss: 0.9957754611968994,grad_norm: 0.844116025448588, iteration: 350083
loss: 1.0443967580795288,grad_norm: 0.999999355395262, iteration: 350084
loss: 1.0362200736999512,grad_norm: 0.8932527534840466, iteration: 350085
loss: 1.0464448928833008,grad_norm: 0.9686938110004373, iteration: 350086
loss: 0.9933643341064453,grad_norm: 0.9999990943951614, iteration: 350087
loss: 0.9867907166481018,grad_norm: 0.8304311948548093, iteration: 350088
loss: 1.063125491142273,grad_norm: 0.9999991428514045, iteration: 350089
loss: 0.9717236161231995,grad_norm: 0.9551605016897197, iteration: 350090
loss: 1.0407187938690186,grad_norm: 0.9890281656556617, iteration: 350091
loss: 0.9928190112113953,grad_norm: 0.8626123968266253, iteration: 350092
loss: 1.1000136137008667,grad_norm: 0.9784465890012669, iteration: 350093
loss: 1.0141979455947876,grad_norm: 0.6686304965647353, iteration: 350094
loss: 0.9880515933036804,grad_norm: 0.7403318321075, iteration: 350095
loss: 1.0106083154678345,grad_norm: 0.8451039054652082, iteration: 350096
loss: 0.9807315468788147,grad_norm: 0.7541064267286668, iteration: 350097
loss: 0.9579565525054932,grad_norm: 0.9641240673061775, iteration: 350098
loss: 1.015441656112671,grad_norm: 0.9492344325005841, iteration: 350099
loss: 0.9715978503227234,grad_norm: 0.8591856345147568, iteration: 350100
loss: 1.0220307111740112,grad_norm: 0.804011238733061, iteration: 350101
loss: 0.9842422008514404,grad_norm: 0.9008231066848812, iteration: 350102
loss: 1.004177212715149,grad_norm: 0.8900048519720115, iteration: 350103
loss: 0.9876115918159485,grad_norm: 0.9445210252464434, iteration: 350104
loss: 0.9987895488739014,grad_norm: 0.8332001474681988, iteration: 350105
loss: 0.9704071283340454,grad_norm: 0.9999992143000038, iteration: 350106
loss: 1.0287400484085083,grad_norm: 0.9999992252449006, iteration: 350107
loss: 1.0524890422821045,grad_norm: 0.999999107824459, iteration: 350108
loss: 1.0174524784088135,grad_norm: 0.9999999547559524, iteration: 350109
loss: 0.9805643558502197,grad_norm: 0.7588618554101848, iteration: 350110
loss: 0.9683693051338196,grad_norm: 0.8860503173884037, iteration: 350111
loss: 1.013210654258728,grad_norm: 0.7358600085778425, iteration: 350112
loss: 0.9719936847686768,grad_norm: 0.8229633891677093, iteration: 350113
loss: 0.9933599233627319,grad_norm: 0.7192807084027426, iteration: 350114
loss: 1.015756607055664,grad_norm: 0.8251694379690202, iteration: 350115
loss: 0.9885289669036865,grad_norm: 0.8356494805192929, iteration: 350116
loss: 1.0136975049972534,grad_norm: 0.9999995492360185, iteration: 350117
loss: 0.9809291958808899,grad_norm: 0.9431448116282987, iteration: 350118
loss: 1.0274410247802734,grad_norm: 0.8913646284456606, iteration: 350119
loss: 1.0170650482177734,grad_norm: 0.6920195160522221, iteration: 350120
loss: 1.0161346197128296,grad_norm: 0.7283518864895422, iteration: 350121
loss: 1.0247606039047241,grad_norm: 0.8424043062834332, iteration: 350122
loss: 0.9876427054405212,grad_norm: 0.7473527765443735, iteration: 350123
loss: 1.1041585206985474,grad_norm: 0.9999992397212693, iteration: 350124
loss: 1.0275366306304932,grad_norm: 0.952291469887178, iteration: 350125
loss: 1.0329506397247314,grad_norm: 0.7208205058714447, iteration: 350126
loss: 1.0118675231933594,grad_norm: 0.9705286861134854, iteration: 350127
loss: 0.9817473292350769,grad_norm: 0.6848570397662177, iteration: 350128
loss: 0.9948279857635498,grad_norm: 0.8941778887746802, iteration: 350129
loss: 0.9749923944473267,grad_norm: 0.7596468720186664, iteration: 350130
loss: 0.9649354815483093,grad_norm: 0.9725814385204076, iteration: 350131
loss: 0.9862794876098633,grad_norm: 0.8000896289934267, iteration: 350132
loss: 0.9875235557556152,grad_norm: 0.7734050871451225, iteration: 350133
loss: 1.030314326286316,grad_norm: 0.9039561526542074, iteration: 350134
loss: 1.0815749168395996,grad_norm: 0.8174670380332528, iteration: 350135
loss: 0.9894924759864807,grad_norm: 0.9999993015258726, iteration: 350136
loss: 1.0178439617156982,grad_norm: 0.9999999276637039, iteration: 350137
loss: 1.0373060703277588,grad_norm: 0.7734600962157318, iteration: 350138
loss: 1.0026699304580688,grad_norm: 0.8391355502066533, iteration: 350139
loss: 1.0179619789123535,grad_norm: 0.8779324077034446, iteration: 350140
loss: 0.999136209487915,grad_norm: 0.7371787126274548, iteration: 350141
loss: 0.9921944737434387,grad_norm: 0.7889149974890871, iteration: 350142
loss: 0.9875807762145996,grad_norm: 0.9078804102629416, iteration: 350143
loss: 0.9961943030357361,grad_norm: 0.8949805264348472, iteration: 350144
loss: 1.009946584701538,grad_norm: 0.7882500673225574, iteration: 350145
loss: 0.9749954342842102,grad_norm: 0.8812579498251536, iteration: 350146
loss: 1.0737751722335815,grad_norm: 0.9536539258793351, iteration: 350147
loss: 1.031686544418335,grad_norm: 0.8226110392494015, iteration: 350148
loss: 1.0396761894226074,grad_norm: 0.9057169680662535, iteration: 350149
loss: 0.9699798226356506,grad_norm: 0.7605413252260954, iteration: 350150
loss: 0.9549520015716553,grad_norm: 0.8805577762507233, iteration: 350151
loss: 0.9628694653511047,grad_norm: 0.9261715565953554, iteration: 350152
loss: 0.9847390055656433,grad_norm: 0.6971681993399761, iteration: 350153
loss: 0.9973344206809998,grad_norm: 0.8712365978730171, iteration: 350154
loss: 1.0145128965377808,grad_norm: 0.6971530902357306, iteration: 350155
loss: 1.040935754776001,grad_norm: 0.9336901408099213, iteration: 350156
loss: 0.9892061352729797,grad_norm: 0.81949984390879, iteration: 350157
loss: 0.9994940757751465,grad_norm: 0.999999125838122, iteration: 350158
loss: 1.0167053937911987,grad_norm: 0.7120830077354193, iteration: 350159
loss: 0.9920369982719421,grad_norm: 0.9999992266360089, iteration: 350160
loss: 1.0211329460144043,grad_norm: 0.6992665378723016, iteration: 350161
loss: 0.9837515354156494,grad_norm: 0.6982134681331161, iteration: 350162
loss: 0.9795790314674377,grad_norm: 0.8306611908394077, iteration: 350163
loss: 1.0221384763717651,grad_norm: 0.843551155910007, iteration: 350164
loss: 1.0027177333831787,grad_norm: 0.8778289563216054, iteration: 350165
loss: 1.0527775287628174,grad_norm: 0.99999965968501, iteration: 350166
loss: 1.0091497898101807,grad_norm: 0.9095510738036735, iteration: 350167
loss: 0.9682615399360657,grad_norm: 0.7233485442471332, iteration: 350168
loss: 0.9819778800010681,grad_norm: 0.9359278695340637, iteration: 350169
loss: 1.0199307203292847,grad_norm: 0.9999998568727475, iteration: 350170
loss: 0.9779800772666931,grad_norm: 0.9349903585440286, iteration: 350171
loss: 1.0278898477554321,grad_norm: 0.6863362883310524, iteration: 350172
loss: 1.0134161710739136,grad_norm: 0.8217753695251956, iteration: 350173
loss: 1.0270975828170776,grad_norm: 0.7074659787192623, iteration: 350174
loss: 0.9801312685012817,grad_norm: 0.7803993561038443, iteration: 350175
loss: 0.9998714327812195,grad_norm: 0.9180728789438242, iteration: 350176
loss: 1.0135663747787476,grad_norm: 0.6834731710723397, iteration: 350177
loss: 1.2511930465698242,grad_norm: 0.999999398869344, iteration: 350178
loss: 1.0021288394927979,grad_norm: 0.9999992245402676, iteration: 350179
loss: 1.00407075881958,grad_norm: 0.7313851156948145, iteration: 350180
loss: 1.016497015953064,grad_norm: 0.76149168051724, iteration: 350181
loss: 1.0742619037628174,grad_norm: 0.9999992896262344, iteration: 350182
loss: 0.9972081780433655,grad_norm: 0.8325157105365486, iteration: 350183
loss: 1.0011835098266602,grad_norm: 0.9388817589968403, iteration: 350184
loss: 1.023518443107605,grad_norm: 0.9714502053109069, iteration: 350185
loss: 0.9821603298187256,grad_norm: 0.8710112974685689, iteration: 350186
loss: 0.9925530552864075,grad_norm: 0.7051231677175662, iteration: 350187
loss: 0.990210235118866,grad_norm: 0.749316112288064, iteration: 350188
loss: 1.0281487703323364,grad_norm: 0.9507646878642887, iteration: 350189
loss: 1.000356912612915,grad_norm: 0.8376258415309038, iteration: 350190
loss: 0.9815724492073059,grad_norm: 0.8931930705940282, iteration: 350191
loss: 0.9757654070854187,grad_norm: 0.7919237135485309, iteration: 350192
loss: 1.0144598484039307,grad_norm: 0.999999086507877, iteration: 350193
loss: 1.0368553400039673,grad_norm: 0.7666994332422472, iteration: 350194
loss: 0.9835704565048218,grad_norm: 0.6863133157651308, iteration: 350195
loss: 1.0067811012268066,grad_norm: 0.8231898213907743, iteration: 350196
loss: 0.9612807035446167,grad_norm: 0.9999990084510455, iteration: 350197
loss: 0.9736136198043823,grad_norm: 0.7430401426929483, iteration: 350198
loss: 0.9968883991241455,grad_norm: 0.8318277341874907, iteration: 350199
loss: 0.9944239258766174,grad_norm: 0.9999995187432714, iteration: 350200
loss: 1.0197889804840088,grad_norm: 0.869213242367592, iteration: 350201
loss: 0.9707213640213013,grad_norm: 0.8260131933732106, iteration: 350202
loss: 0.9920096397399902,grad_norm: 0.6632662245415938, iteration: 350203
loss: 1.1000126600265503,grad_norm: 0.8278541060234537, iteration: 350204
loss: 0.9887925982475281,grad_norm: 0.9865922465964705, iteration: 350205
loss: 1.0044251680374146,grad_norm: 0.988790357595059, iteration: 350206
loss: 1.0111888647079468,grad_norm: 0.7028964749338018, iteration: 350207
loss: 0.9933456778526306,grad_norm: 0.9137296523251055, iteration: 350208
loss: 0.978529155254364,grad_norm: 0.9053391969699716, iteration: 350209
loss: 0.9823296666145325,grad_norm: 0.8953577450575592, iteration: 350210
loss: 0.9786006808280945,grad_norm: 0.9352954656111502, iteration: 350211
loss: 0.9590282440185547,grad_norm: 0.763921186010578, iteration: 350212
loss: 0.9950659871101379,grad_norm: 0.8585651276887446, iteration: 350213
loss: 0.98516845703125,grad_norm: 0.7508407057181353, iteration: 350214
loss: 1.0369222164154053,grad_norm: 0.9999997394176695, iteration: 350215
loss: 0.9552330374717712,grad_norm: 0.6945867394206487, iteration: 350216
loss: 1.0720977783203125,grad_norm: 0.9999997451019554, iteration: 350217
loss: 1.0167189836502075,grad_norm: 0.713837971403818, iteration: 350218
loss: 1.0067845582962036,grad_norm: 0.9999993548301396, iteration: 350219
loss: 1.0189216136932373,grad_norm: 0.9550196594748933, iteration: 350220
loss: 0.9812829494476318,grad_norm: 0.8613714400317992, iteration: 350221
loss: 0.9588094353675842,grad_norm: 0.956571692621246, iteration: 350222
loss: 1.0048444271087646,grad_norm: 0.9371385972816342, iteration: 350223
loss: 0.9727796912193298,grad_norm: 0.9013199260830927, iteration: 350224
loss: 1.00762939453125,grad_norm: 0.8884944961096205, iteration: 350225
loss: 1.01974618434906,grad_norm: 0.7285860082442616, iteration: 350226
loss: 1.0280630588531494,grad_norm: 0.9329186464621964, iteration: 350227
loss: 0.9739804863929749,grad_norm: 0.8890849690689906, iteration: 350228
loss: 0.9911400675773621,grad_norm: 0.6828957131385206, iteration: 350229
loss: 0.9543085098266602,grad_norm: 0.891238048309555, iteration: 350230
loss: 0.9746330380439758,grad_norm: 0.7395502469842895, iteration: 350231
loss: 1.0378164052963257,grad_norm: 0.8827770118037628, iteration: 350232
loss: 1.0044013261795044,grad_norm: 0.8152235447042292, iteration: 350233
loss: 1.0214630365371704,grad_norm: 0.8592771064348851, iteration: 350234
loss: 1.009485125541687,grad_norm: 0.9107450594787005, iteration: 350235
loss: 1.0237312316894531,grad_norm: 0.8387475988338308, iteration: 350236
loss: 1.0106806755065918,grad_norm: 0.9204226770455606, iteration: 350237
loss: 0.9965978264808655,grad_norm: 0.8452156646353584, iteration: 350238
loss: 0.9979785680770874,grad_norm: 0.9802898359946334, iteration: 350239
loss: 1.0683767795562744,grad_norm: 0.9999999275968451, iteration: 350240
loss: 1.0132901668548584,grad_norm: 0.912493052329451, iteration: 350241
loss: 1.0137546062469482,grad_norm: 0.9999990665299942, iteration: 350242
loss: 0.9796546697616577,grad_norm: 0.8507513018455929, iteration: 350243
loss: 1.0262823104858398,grad_norm: 0.929011205697037, iteration: 350244
loss: 0.9960691928863525,grad_norm: 0.7914958950071254, iteration: 350245
loss: 1.0112954378128052,grad_norm: 0.756772015010229, iteration: 350246
loss: 0.9617581963539124,grad_norm: 0.8207099976079159, iteration: 350247
loss: 1.0038460493087769,grad_norm: 0.7227933431797198, iteration: 350248
loss: 1.004917860031128,grad_norm: 0.8190984174276701, iteration: 350249
loss: 0.9364683628082275,grad_norm: 0.9100991302711117, iteration: 350250
loss: 1.0047370195388794,grad_norm: 0.7890855844694484, iteration: 350251
loss: 0.9933671951293945,grad_norm: 0.9014817737803711, iteration: 350252
loss: 1.1047301292419434,grad_norm: 0.9999992237660583, iteration: 350253
loss: 0.9849565029144287,grad_norm: 0.9999996141553269, iteration: 350254
loss: 0.9701095819473267,grad_norm: 0.842056196856468, iteration: 350255
loss: 1.0096465349197388,grad_norm: 0.8996504583256255, iteration: 350256
loss: 1.0251628160476685,grad_norm: 0.8561642915341995, iteration: 350257
loss: 0.9975772500038147,grad_norm: 0.9999993262177088, iteration: 350258
loss: 1.0124316215515137,grad_norm: 0.8819439029408952, iteration: 350259
loss: 1.0187524557113647,grad_norm: 0.9999992179214702, iteration: 350260
loss: 1.0576820373535156,grad_norm: 0.8772055438104531, iteration: 350261
loss: 1.0162867307662964,grad_norm: 0.9999991680974238, iteration: 350262
loss: 0.9721820950508118,grad_norm: 0.830479338200446, iteration: 350263
loss: 1.0390939712524414,grad_norm: 0.9999996330307699, iteration: 350264
loss: 0.9949012398719788,grad_norm: 0.7077908467853722, iteration: 350265
loss: 0.9856035113334656,grad_norm: 0.7759694678125699, iteration: 350266
loss: 0.9937905669212341,grad_norm: 0.8092426286368116, iteration: 350267
loss: 1.0245299339294434,grad_norm: 0.8124537007919177, iteration: 350268
loss: 0.9726572632789612,grad_norm: 0.7508947515376968, iteration: 350269
loss: 0.9832682609558105,grad_norm: 0.9538855194259774, iteration: 350270
loss: 0.9970734715461731,grad_norm: 0.99999988653388, iteration: 350271
loss: 1.004120945930481,grad_norm: 0.789740960785303, iteration: 350272
loss: 0.9710009098052979,grad_norm: 0.6989824930234845, iteration: 350273
loss: 1.0147830247879028,grad_norm: 0.7671555292912052, iteration: 350274
loss: 0.9826743602752686,grad_norm: 0.7900575631608344, iteration: 350275
loss: 1.0339223146438599,grad_norm: 0.7850147413264612, iteration: 350276
loss: 0.9870510101318359,grad_norm: 0.7132980507091153, iteration: 350277
loss: 0.9651188850402832,grad_norm: 0.8311339937623877, iteration: 350278
loss: 0.9948964715003967,grad_norm: 0.8433662804167305, iteration: 350279
loss: 1.017132043838501,grad_norm: 0.8944789631838044, iteration: 350280
loss: 1.0274243354797363,grad_norm: 0.7664180017184519, iteration: 350281
loss: 0.9745796918869019,grad_norm: 0.863376341473505, iteration: 350282
loss: 0.9894663095474243,grad_norm: 0.7777559234093895, iteration: 350283
loss: 1.0098413228988647,grad_norm: 0.8301356419387417, iteration: 350284
loss: 1.0131498575210571,grad_norm: 0.8421749884079953, iteration: 350285
loss: 1.0863035917282104,grad_norm: 0.999999617711311, iteration: 350286
loss: 1.030868411064148,grad_norm: 0.9069928477271504, iteration: 350287
loss: 0.9940656423568726,grad_norm: 0.9999994510809626, iteration: 350288
loss: 1.051546573638916,grad_norm: 0.9999998616950672, iteration: 350289
loss: 1.0046939849853516,grad_norm: 0.878547098545273, iteration: 350290
loss: 0.9651923179626465,grad_norm: 0.6715674413459334, iteration: 350291
loss: 1.0357147455215454,grad_norm: 0.8092965529776726, iteration: 350292
loss: 1.0036516189575195,grad_norm: 0.8720869755537427, iteration: 350293
loss: 1.0138180255889893,grad_norm: 0.9146173029278857, iteration: 350294
loss: 1.0259004831314087,grad_norm: 0.9999990951594531, iteration: 350295
loss: 0.9890819191932678,grad_norm: 0.8099623627071785, iteration: 350296
loss: 0.9926875829696655,grad_norm: 0.8832693938657297, iteration: 350297
loss: 0.977620542049408,grad_norm: 0.9999992002545344, iteration: 350298
loss: 1.007326364517212,grad_norm: 0.6991376799932602, iteration: 350299
loss: 1.0123590230941772,grad_norm: 0.748007265656643, iteration: 350300
loss: 1.0232704877853394,grad_norm: 0.725315119565994, iteration: 350301
loss: 0.9755436778068542,grad_norm: 0.999999736530156, iteration: 350302
loss: 1.0237400531768799,grad_norm: 0.9999991000006792, iteration: 350303
loss: 0.9887091517448425,grad_norm: 0.9954816283354788, iteration: 350304
loss: 1.021633267402649,grad_norm: 0.9886297874629825, iteration: 350305
loss: 0.9595915675163269,grad_norm: 0.8577334681698373, iteration: 350306
loss: 1.0206810235977173,grad_norm: 0.8360435458856489, iteration: 350307
loss: 0.9878379702568054,grad_norm: 0.8064822741283487, iteration: 350308
loss: 1.00881826877594,grad_norm: 0.9450740005458659, iteration: 350309
loss: 1.0012050867080688,grad_norm: 0.9171300406532547, iteration: 350310
loss: 1.0002411603927612,grad_norm: 0.776001630989222, iteration: 350311
loss: 0.9818737506866455,grad_norm: 0.9198609364825768, iteration: 350312
loss: 1.0108311176300049,grad_norm: 0.7962053224451263, iteration: 350313
loss: 0.9871492385864258,grad_norm: 0.6804817962866182, iteration: 350314
loss: 1.0165542364120483,grad_norm: 0.9747623983039769, iteration: 350315
loss: 1.0272899866104126,grad_norm: 0.9999989811693307, iteration: 350316
loss: 1.0236001014709473,grad_norm: 0.7231593701894746, iteration: 350317
loss: 0.9755421280860901,grad_norm: 0.999999167843635, iteration: 350318
loss: 0.998504102230072,grad_norm: 0.8765065518071815, iteration: 350319
loss: 0.9817410111427307,grad_norm: 0.8833659674824562, iteration: 350320
loss: 1.0002537965774536,grad_norm: 0.8235050105437733, iteration: 350321
loss: 0.978637158870697,grad_norm: 0.958874959719212, iteration: 350322
loss: 0.9810961484909058,grad_norm: 0.7505451839901564, iteration: 350323
loss: 0.982721209526062,grad_norm: 0.9999992142426359, iteration: 350324
loss: 1.011977195739746,grad_norm: 0.7700679395725578, iteration: 350325
loss: 1.0495315790176392,grad_norm: 0.9999991321643298, iteration: 350326
loss: 0.9933724403381348,grad_norm: 0.7385026045856059, iteration: 350327
loss: 1.0091732740402222,grad_norm: 0.8019117749170038, iteration: 350328
loss: 0.9962305426597595,grad_norm: 0.6826353947483982, iteration: 350329
loss: 1.036304235458374,grad_norm: 0.9999992258606936, iteration: 350330
loss: 0.976819634437561,grad_norm: 0.6986763823238503, iteration: 350331
loss: 1.0076298713684082,grad_norm: 0.7992761523062814, iteration: 350332
loss: 0.9670570492744446,grad_norm: 0.8994990811399549, iteration: 350333
loss: 1.0587519407272339,grad_norm: 0.9546278103090129, iteration: 350334
loss: 0.9748303890228271,grad_norm: 0.6554541478609127, iteration: 350335
loss: 1.0175412893295288,grad_norm: 0.7049085631824816, iteration: 350336
loss: 1.007024884223938,grad_norm: 0.9364888675028853, iteration: 350337
loss: 0.9816892147064209,grad_norm: 0.7436113347689003, iteration: 350338
loss: 0.9757744073867798,grad_norm: 0.8057571830042587, iteration: 350339
loss: 1.0031683444976807,grad_norm: 0.8765064584771863, iteration: 350340
loss: 1.0215060710906982,grad_norm: 0.7435961400330348, iteration: 350341
loss: 0.9838672280311584,grad_norm: 0.6254192558617114, iteration: 350342
loss: 1.0018302202224731,grad_norm: 0.8612486962042901, iteration: 350343
loss: 0.9622814059257507,grad_norm: 0.775957632399834, iteration: 350344
loss: 0.9928134679794312,grad_norm: 0.7164930935858737, iteration: 350345
loss: 0.9855976700782776,grad_norm: 0.7847489207845545, iteration: 350346
loss: 0.9579723477363586,grad_norm: 0.9918530867987243, iteration: 350347
loss: 0.9645224213600159,grad_norm: 0.8381616912971398, iteration: 350348
loss: 1.0028095245361328,grad_norm: 0.8610290806448624, iteration: 350349
loss: 0.9792267084121704,grad_norm: 0.9999993026135299, iteration: 350350
loss: 0.9994906783103943,grad_norm: 0.7209029055016956, iteration: 350351
loss: 1.0289363861083984,grad_norm: 0.793341437798093, iteration: 350352
loss: 1.0134685039520264,grad_norm: 0.856358445159865, iteration: 350353
loss: 0.9730726480484009,grad_norm: 0.8766551393503277, iteration: 350354
loss: 0.998303234577179,grad_norm: 0.8421055167279062, iteration: 350355
loss: 1.0127440690994263,grad_norm: 0.9495406239410524, iteration: 350356
loss: 0.9643523693084717,grad_norm: 0.955852577583303, iteration: 350357
loss: 1.0014597177505493,grad_norm: 0.9396741695774743, iteration: 350358
loss: 1.0171748399734497,grad_norm: 0.9999990977028989, iteration: 350359
loss: 1.016539216041565,grad_norm: 0.7358251135109084, iteration: 350360
loss: 1.0659362077713013,grad_norm: 0.9999992460710375, iteration: 350361
loss: 0.992046058177948,grad_norm: 0.675953753250211, iteration: 350362
loss: 1.0187348127365112,grad_norm: 0.813788539552437, iteration: 350363
loss: 0.9706287980079651,grad_norm: 0.8118020961201515, iteration: 350364
loss: 0.9984403848648071,grad_norm: 0.7003426463046529, iteration: 350365
loss: 1.030116081237793,grad_norm: 0.7085911712118131, iteration: 350366
loss: 0.9894445538520813,grad_norm: 0.7961168464727013, iteration: 350367
loss: 1.0160582065582275,grad_norm: 0.8646216863016803, iteration: 350368
loss: 0.9795295596122742,grad_norm: 0.9999991109678508, iteration: 350369
loss: 1.023190975189209,grad_norm: 0.9999998388831145, iteration: 350370
loss: 0.9996216297149658,grad_norm: 0.6193449980196032, iteration: 350371
loss: 0.9909350872039795,grad_norm: 0.7620666233492775, iteration: 350372
loss: 0.9958458542823792,grad_norm: 0.7886806963589128, iteration: 350373
loss: 1.0148426294326782,grad_norm: 0.8596660716213846, iteration: 350374
loss: 1.0092241764068604,grad_norm: 0.8276492055311117, iteration: 350375
loss: 1.0140128135681152,grad_norm: 0.6982172143983855, iteration: 350376
loss: 0.9896292686462402,grad_norm: 0.8265852811792647, iteration: 350377
loss: 0.9996767640113831,grad_norm: 0.6795623914735409, iteration: 350378
loss: 1.0264673233032227,grad_norm: 0.9994204698775032, iteration: 350379
loss: 1.000874638557434,grad_norm: 0.9110243279942414, iteration: 350380
loss: 0.9756600856781006,grad_norm: 0.7688682549004368, iteration: 350381
loss: 1.008825421333313,grad_norm: 0.8277133642030143, iteration: 350382
loss: 1.0001907348632812,grad_norm: 0.9999996072269085, iteration: 350383
loss: 1.008968472480774,grad_norm: 0.7583908512218867, iteration: 350384
loss: 0.948239266872406,grad_norm: 0.8924980829057464, iteration: 350385
loss: 1.0373021364212036,grad_norm: 0.9999991544662046, iteration: 350386
loss: 1.030677318572998,grad_norm: 0.8811200315875053, iteration: 350387
loss: 0.9711347222328186,grad_norm: 0.6844273832792301, iteration: 350388
loss: 0.9816567897796631,grad_norm: 0.6948430047117024, iteration: 350389
loss: 1.0324651002883911,grad_norm: 0.7902675949607694, iteration: 350390
loss: 0.9843321442604065,grad_norm: 0.8597851975088842, iteration: 350391
loss: 1.1436840295791626,grad_norm: 0.9161523411887534, iteration: 350392
loss: 0.9983051419258118,grad_norm: 0.8432518538417875, iteration: 350393
loss: 1.0351966619491577,grad_norm: 0.7562924937818862, iteration: 350394
loss: 0.9703845381736755,grad_norm: 0.83631494196512, iteration: 350395
loss: 1.0476957559585571,grad_norm: 0.9999990207432573, iteration: 350396
loss: 0.9911487698554993,grad_norm: 0.811591486356862, iteration: 350397
loss: 1.0066869258880615,grad_norm: 0.8829694150351515, iteration: 350398
loss: 0.9826268553733826,grad_norm: 0.78160253908275, iteration: 350399
loss: 0.9900478720664978,grad_norm: 0.7586667107311402, iteration: 350400
loss: 1.0069228410720825,grad_norm: 0.9566116752183311, iteration: 350401
loss: 1.0399380922317505,grad_norm: 0.9999997983287188, iteration: 350402
loss: 0.9816219210624695,grad_norm: 0.8726982897988962, iteration: 350403
loss: 0.9760677814483643,grad_norm: 0.7637117235595056, iteration: 350404
loss: 1.0327399969100952,grad_norm: 0.7828662658607524, iteration: 350405
loss: 1.0230467319488525,grad_norm: 0.8937865073841333, iteration: 350406
loss: 1.0142452716827393,grad_norm: 0.9999992265710332, iteration: 350407
loss: 1.02432382106781,grad_norm: 0.8571186469876584, iteration: 350408
loss: 1.0104562044143677,grad_norm: 0.8621316989715103, iteration: 350409
loss: 0.9930693507194519,grad_norm: 0.8643559332154132, iteration: 350410
loss: 1.0046744346618652,grad_norm: 0.6817288568187132, iteration: 350411
loss: 1.0177793502807617,grad_norm: 0.8192534036619084, iteration: 350412
loss: 1.0187140703201294,grad_norm: 0.9999989892197969, iteration: 350413
loss: 0.990791380405426,grad_norm: 0.9999989533470321, iteration: 350414
loss: 1.0278327465057373,grad_norm: 0.7697856140085219, iteration: 350415
loss: 0.9690399169921875,grad_norm: 0.7411517093593275, iteration: 350416
loss: 0.9949673414230347,grad_norm: 0.7196684117828376, iteration: 350417
loss: 0.9532474279403687,grad_norm: 0.9005198420436441, iteration: 350418
loss: 0.976715624332428,grad_norm: 0.7608924009603539, iteration: 350419
loss: 1.0304595232009888,grad_norm: 0.9999999157852756, iteration: 350420
loss: 0.9986938834190369,grad_norm: 0.9999996684067143, iteration: 350421
loss: 0.9999019503593445,grad_norm: 0.6883897415669077, iteration: 350422
loss: 0.996170163154602,grad_norm: 0.8856927678730064, iteration: 350423
loss: 1.0000334978103638,grad_norm: 0.819135490553796, iteration: 350424
loss: 1.0316860675811768,grad_norm: 0.8446577171084622, iteration: 350425
loss: 1.0093084573745728,grad_norm: 0.8188696959673454, iteration: 350426
loss: 0.9940909147262573,grad_norm: 0.9358068296440328, iteration: 350427
loss: 1.0073167085647583,grad_norm: 0.7484366815617426, iteration: 350428
loss: 0.9938928484916687,grad_norm: 0.7389326941357508, iteration: 350429
loss: 1.0016944408416748,grad_norm: 0.8323429490459348, iteration: 350430
loss: 0.9706626534461975,grad_norm: 0.8364938342155377, iteration: 350431
loss: 0.9668489098548889,grad_norm: 0.8970049429131591, iteration: 350432
loss: 1.002986192703247,grad_norm: 0.952786716609135, iteration: 350433
loss: 1.007339358329773,grad_norm: 0.9999996475464032, iteration: 350434
loss: 1.0135271549224854,grad_norm: 0.7291408341866097, iteration: 350435
loss: 0.9725901484489441,grad_norm: 0.8627247602576145, iteration: 350436
loss: 0.9968267679214478,grad_norm: 0.7137550804155789, iteration: 350437
loss: 0.9985806941986084,grad_norm: 0.8204379913294686, iteration: 350438
loss: 1.0191985368728638,grad_norm: 0.865563394587099, iteration: 350439
loss: 0.9485408663749695,grad_norm: 0.8214125061763506, iteration: 350440
loss: 1.012500286102295,grad_norm: 0.8235293629766897, iteration: 350441
loss: 1.0031859874725342,grad_norm: 0.8001309841210313, iteration: 350442
loss: 0.992743194103241,grad_norm: 0.8250045679365228, iteration: 350443
loss: 0.9925780892372131,grad_norm: 0.7285338949355652, iteration: 350444
loss: 0.9824296236038208,grad_norm: 0.8850464309424708, iteration: 350445
loss: 0.9476742744445801,grad_norm: 0.750855969321514, iteration: 350446
loss: 1.0631369352340698,grad_norm: 0.8119816990744061, iteration: 350447
loss: 1.012223720550537,grad_norm: 0.8397593960209694, iteration: 350448
loss: 1.007930040359497,grad_norm: 0.6748554818238381, iteration: 350449
loss: 0.9954344630241394,grad_norm: 0.8553452608929321, iteration: 350450
loss: 1.013881802558899,grad_norm: 0.6358551084500514, iteration: 350451
loss: 0.9791567921638489,grad_norm: 0.9999991333242986, iteration: 350452
loss: 1.0276507139205933,grad_norm: 0.8607660649986718, iteration: 350453
loss: 0.9544515013694763,grad_norm: 0.9999994797727957, iteration: 350454
loss: 1.0245811939239502,grad_norm: 0.7690652223077445, iteration: 350455
loss: 0.9986766576766968,grad_norm: 0.7707093501281654, iteration: 350456
loss: 1.0152103900909424,grad_norm: 0.6667174948638127, iteration: 350457
loss: 0.979168176651001,grad_norm: 0.9632245948287106, iteration: 350458
loss: 1.0159485340118408,grad_norm: 0.9999990539129383, iteration: 350459
loss: 0.9840267300605774,grad_norm: 0.7043515187697534, iteration: 350460
loss: 0.9693371057510376,grad_norm: 0.6723677800527809, iteration: 350461
loss: 1.0091488361358643,grad_norm: 0.7364903188897592, iteration: 350462
loss: 1.0229160785675049,grad_norm: 0.8077683890194735, iteration: 350463
loss: 0.9977988600730896,grad_norm: 0.5444447356641109, iteration: 350464
loss: 0.9882875084877014,grad_norm: 0.890015640145136, iteration: 350465
loss: 0.9920104742050171,grad_norm: 0.6713211475653528, iteration: 350466
loss: 1.0141633749008179,grad_norm: 0.9979948283117168, iteration: 350467
loss: 1.0003994703292847,grad_norm: 0.7557770184744623, iteration: 350468
loss: 1.2220865488052368,grad_norm: 0.9999994455365432, iteration: 350469
loss: 1.0174020528793335,grad_norm: 0.9999993312976593, iteration: 350470
loss: 1.0435124635696411,grad_norm: 0.955467931815562, iteration: 350471
loss: 1.0286988019943237,grad_norm: 0.7795187625749121, iteration: 350472
loss: 1.0061087608337402,grad_norm: 0.8024851283922826, iteration: 350473
loss: 1.0179121494293213,grad_norm: 0.8001691234742349, iteration: 350474
loss: 1.0213385820388794,grad_norm: 0.8710215548726818, iteration: 350475
loss: 1.01143479347229,grad_norm: 0.682641826216142, iteration: 350476
loss: 0.9891265034675598,grad_norm: 0.913730720171388, iteration: 350477
loss: 1.0185339450836182,grad_norm: 0.8140015769304946, iteration: 350478
loss: 0.9972238540649414,grad_norm: 0.8068751537416646, iteration: 350479
loss: 0.9819483757019043,grad_norm: 0.852646223903125, iteration: 350480
loss: 1.0099602937698364,grad_norm: 0.8649747217413314, iteration: 350481
loss: 0.9772178530693054,grad_norm: 0.7207924627338841, iteration: 350482
loss: 0.9935197234153748,grad_norm: 0.8972306259990078, iteration: 350483
loss: 1.0696145296096802,grad_norm: 0.8667300979353342, iteration: 350484
loss: 1.117578387260437,grad_norm: 0.9999997936370968, iteration: 350485
loss: 0.9907355308532715,grad_norm: 0.9248172084012941, iteration: 350486
loss: 0.966212272644043,grad_norm: 0.8384744292931806, iteration: 350487
loss: 1.0180344581604004,grad_norm: 0.9999990378234349, iteration: 350488
loss: 0.9785130023956299,grad_norm: 0.9437753484650008, iteration: 350489
loss: 0.9662313461303711,grad_norm: 0.8325570208551819, iteration: 350490
loss: 0.9883040189743042,grad_norm: 0.7930709221675748, iteration: 350491
loss: 0.9913668036460876,grad_norm: 0.7611125543637456, iteration: 350492
loss: 0.9831727147102356,grad_norm: 0.9999991458840883, iteration: 350493
loss: 0.9503383040428162,grad_norm: 0.966430953572144, iteration: 350494
loss: 1.0395292043685913,grad_norm: 0.9999992752361813, iteration: 350495
loss: 1.0054317712783813,grad_norm: 0.9206028561393559, iteration: 350496
loss: 0.9933939576148987,grad_norm: 0.8222041810849606, iteration: 350497
loss: 0.9619977474212646,grad_norm: 0.7842181459485111, iteration: 350498
loss: 1.025192379951477,grad_norm: 0.8116029308059242, iteration: 350499
loss: 0.9767840504646301,grad_norm: 0.8138485512662444, iteration: 350500
loss: 0.9797953367233276,grad_norm: 0.6881783455069107, iteration: 350501
loss: 1.0044258832931519,grad_norm: 0.8099958476176399, iteration: 350502
loss: 1.0124350786209106,grad_norm: 0.7970624187698465, iteration: 350503
loss: 0.9908140301704407,grad_norm: 0.8450239073909449, iteration: 350504
loss: 1.0260045528411865,grad_norm: 0.9999996991886602, iteration: 350505
loss: 1.0271321535110474,grad_norm: 0.8046514440673277, iteration: 350506
loss: 1.0234593152999878,grad_norm: 0.7590259031216211, iteration: 350507
loss: 0.9745248556137085,grad_norm: 0.9115392796837802, iteration: 350508
loss: 0.9972315430641174,grad_norm: 0.9999991405184924, iteration: 350509
loss: 0.9919276237487793,grad_norm: 0.9020818314896313, iteration: 350510
loss: 1.0037763118743896,grad_norm: 0.7017177685208198, iteration: 350511
loss: 0.9865628480911255,grad_norm: 0.8432608827002295, iteration: 350512
loss: 0.9969891905784607,grad_norm: 0.8803546463522206, iteration: 350513
loss: 1.0020182132720947,grad_norm: 0.7577337392926865, iteration: 350514
loss: 0.9598785042762756,grad_norm: 0.7768147698437647, iteration: 350515
loss: 0.9812085628509521,grad_norm: 0.6526950270944933, iteration: 350516
loss: 1.0187013149261475,grad_norm: 0.9999990997128914, iteration: 350517
loss: 1.021551251411438,grad_norm: 0.8189062113049697, iteration: 350518
loss: 1.1520003080368042,grad_norm: 0.9999992955438758, iteration: 350519
loss: 1.0288413763046265,grad_norm: 0.9694436105688943, iteration: 350520
loss: 0.9773293137550354,grad_norm: 0.6522606737332862, iteration: 350521
loss: 0.9774011373519897,grad_norm: 0.9055598391953507, iteration: 350522
loss: 1.0277166366577148,grad_norm: 0.7557136305245074, iteration: 350523
loss: 0.986731231212616,grad_norm: 0.750173498676199, iteration: 350524
loss: 1.0115026235580444,grad_norm: 0.7953367158814393, iteration: 350525
loss: 0.9761386513710022,grad_norm: 0.8718005676051416, iteration: 350526
loss: 0.9961123466491699,grad_norm: 0.7302528509878065, iteration: 350527
loss: 1.0296982526779175,grad_norm: 0.9999992371263553, iteration: 350528
loss: 0.9723336100578308,grad_norm: 0.999999164266029, iteration: 350529
loss: 0.9600196480751038,grad_norm: 0.831573800914608, iteration: 350530
loss: 0.9893103241920471,grad_norm: 0.8039991759032995, iteration: 350531
loss: 0.9981870651245117,grad_norm: 0.9999997743334056, iteration: 350532
loss: 1.0174373388290405,grad_norm: 0.7949373205015025, iteration: 350533
loss: 1.0063570737838745,grad_norm: 0.814835900356045, iteration: 350534
loss: 1.0259441137313843,grad_norm: 0.8317461310123677, iteration: 350535
loss: 0.9776290059089661,grad_norm: 0.7543001514033429, iteration: 350536
loss: 0.9851505756378174,grad_norm: 0.6883483871720926, iteration: 350537
loss: 1.0033992528915405,grad_norm: 0.7750408717831889, iteration: 350538
loss: 1.0132569074630737,grad_norm: 0.8670683990096495, iteration: 350539
loss: 0.9745286703109741,grad_norm: 0.8843315567579437, iteration: 350540
loss: 0.9547299146652222,grad_norm: 0.7986590424855745, iteration: 350541
loss: 1.022202491760254,grad_norm: 0.7784993697721336, iteration: 350542
loss: 1.0109978914260864,grad_norm: 0.862512405256673, iteration: 350543
loss: 1.0042259693145752,grad_norm: 0.8266381161249394, iteration: 350544
loss: 1.0150071382522583,grad_norm: 0.7479534556869081, iteration: 350545
loss: 1.0610980987548828,grad_norm: 0.9009378387492747, iteration: 350546
loss: 0.9847555160522461,grad_norm: 0.8335968358945304, iteration: 350547
loss: 1.0041675567626953,grad_norm: 0.6739827730713139, iteration: 350548
loss: 1.0083892345428467,grad_norm: 0.7369076317109765, iteration: 350549
loss: 1.0038667917251587,grad_norm: 0.8028347710895755, iteration: 350550
loss: 1.0104540586471558,grad_norm: 0.9999991585016188, iteration: 350551
loss: 0.9886958599090576,grad_norm: 0.8938401256392718, iteration: 350552
loss: 0.9886716604232788,grad_norm: 0.7560964020375257, iteration: 350553
loss: 1.0045995712280273,grad_norm: 0.968014162019968, iteration: 350554
loss: 1.0093474388122559,grad_norm: 0.9999989844253705, iteration: 350555
loss: 1.0054874420166016,grad_norm: 0.8529506311815087, iteration: 350556
loss: 1.0153684616088867,grad_norm: 0.9999992373934087, iteration: 350557
loss: 0.977687418460846,grad_norm: 0.7182676105093241, iteration: 350558
loss: 1.025917410850525,grad_norm: 0.8976495849016746, iteration: 350559
loss: 0.9809573888778687,grad_norm: 0.8017863779119395, iteration: 350560
loss: 0.9843345880508423,grad_norm: 0.8703124705680519, iteration: 350561
loss: 1.0066791772842407,grad_norm: 0.7911638755455399, iteration: 350562
loss: 1.0362658500671387,grad_norm: 0.833986057123481, iteration: 350563
loss: 1.0120600461959839,grad_norm: 0.8272714185101168, iteration: 350564
loss: 0.9975426197052002,grad_norm: 0.8473496151782263, iteration: 350565
loss: 1.0023881196975708,grad_norm: 0.9999998460864791, iteration: 350566
loss: 0.9618650078773499,grad_norm: 0.7886972733153869, iteration: 350567
loss: 0.9554700255393982,grad_norm: 0.9908399660017012, iteration: 350568
loss: 1.1150108575820923,grad_norm: 0.9243085439134995, iteration: 350569
loss: 0.989514946937561,grad_norm: 0.935710959223559, iteration: 350570
loss: 1.0300846099853516,grad_norm: 0.6866638363742708, iteration: 350571
loss: 1.0324368476867676,grad_norm: 0.8392941525861465, iteration: 350572
loss: 0.9940670728683472,grad_norm: 0.7848151336758604, iteration: 350573
loss: 1.0144158601760864,grad_norm: 0.6803089105428803, iteration: 350574
loss: 1.0061306953430176,grad_norm: 0.8555041177540217, iteration: 350575
loss: 1.0187349319458008,grad_norm: 0.9024394767564936, iteration: 350576
loss: 0.9869903922080994,grad_norm: 0.9999990331919936, iteration: 350577
loss: 1.0329753160476685,grad_norm: 0.9275317083067903, iteration: 350578
loss: 0.9752735495567322,grad_norm: 0.8577248583055391, iteration: 350579
loss: 1.046791672706604,grad_norm: 0.7891491531005017, iteration: 350580
loss: 0.9862879514694214,grad_norm: 0.8152321935209255, iteration: 350581
loss: 0.9901297688484192,grad_norm: 0.8717188324937727, iteration: 350582
loss: 0.9654058814048767,grad_norm: 0.7272390598007492, iteration: 350583
loss: 0.9948035478591919,grad_norm: 0.7590041459613345, iteration: 350584
loss: 1.0076216459274292,grad_norm: 0.7810268325142877, iteration: 350585
loss: 0.9944860935211182,grad_norm: 0.7344504379061768, iteration: 350586
loss: 1.003144383430481,grad_norm: 0.9179689650902512, iteration: 350587
loss: 1.0839425325393677,grad_norm: 0.9999992339159173, iteration: 350588
loss: 1.013953447341919,grad_norm: 0.6839761324524603, iteration: 350589
loss: 0.9782599806785583,grad_norm: 0.8707894306354625, iteration: 350590
loss: 0.9943803548812866,grad_norm: 0.7709880112615137, iteration: 350591
loss: 0.9674727320671082,grad_norm: 0.8217740632802409, iteration: 350592
loss: 0.9861050248146057,grad_norm: 0.7800694297480137, iteration: 350593
loss: 1.0081150531768799,grad_norm: 0.8660451440134441, iteration: 350594
loss: 1.0217053890228271,grad_norm: 0.999999262574469, iteration: 350595
loss: 1.0063437223434448,grad_norm: 0.8874737550054296, iteration: 350596
loss: 1.0223554372787476,grad_norm: 0.7760140400335657, iteration: 350597
loss: 1.0002259016036987,grad_norm: 0.9165750288812607, iteration: 350598
loss: 1.030702829360962,grad_norm: 0.9999994257451069, iteration: 350599
loss: 0.992059051990509,grad_norm: 0.929340557496028, iteration: 350600
loss: 0.9841073155403137,grad_norm: 0.8799971274806591, iteration: 350601
loss: 0.98670893907547,grad_norm: 0.8531876717854655, iteration: 350602
loss: 0.9873778820037842,grad_norm: 0.9611698030934679, iteration: 350603
loss: 0.9804412722587585,grad_norm: 0.7310584808817566, iteration: 350604
loss: 0.9539588093757629,grad_norm: 0.9999990648474381, iteration: 350605
loss: 0.9793026447296143,grad_norm: 0.6243650228016925, iteration: 350606
loss: 0.9513027667999268,grad_norm: 0.8230832371778702, iteration: 350607
loss: 0.9711647629737854,grad_norm: 0.8970641105810612, iteration: 350608
loss: 1.0139551162719727,grad_norm: 0.8957541785037308, iteration: 350609
loss: 0.9987877011299133,grad_norm: 0.7545384046125164, iteration: 350610
loss: 0.9844800233840942,grad_norm: 0.8175141033901644, iteration: 350611
loss: 1.1240564584732056,grad_norm: 0.9999998477475907, iteration: 350612
loss: 0.9802385568618774,grad_norm: 0.8910686839870848, iteration: 350613
loss: 1.0568794012069702,grad_norm: 0.8185422582904253, iteration: 350614
loss: 1.0014897584915161,grad_norm: 0.8552255492401796, iteration: 350615
loss: 0.9622606039047241,grad_norm: 0.8169676417875094, iteration: 350616
loss: 1.006583333015442,grad_norm: 0.8841485546559316, iteration: 350617
loss: 1.056367039680481,grad_norm: 0.9999994472386605, iteration: 350618
loss: 1.0624133348464966,grad_norm: 0.8236537022638667, iteration: 350619
loss: 0.9718724489212036,grad_norm: 0.8826166340572961, iteration: 350620
loss: 0.9793761372566223,grad_norm: 0.7813223772063537, iteration: 350621
loss: 1.0893443822860718,grad_norm: 0.8226673701800614, iteration: 350622
loss: 0.9910774827003479,grad_norm: 0.9476328542514553, iteration: 350623
loss: 1.0352922677993774,grad_norm: 0.9901082623421237, iteration: 350624
loss: 1.0224186182022095,grad_norm: 0.7531699768116452, iteration: 350625
loss: 1.0846991539001465,grad_norm: 0.9999990151993214, iteration: 350626
loss: 0.9749938249588013,grad_norm: 0.9136860936781815, iteration: 350627
loss: 1.08863365650177,grad_norm: 0.6542698859473631, iteration: 350628
loss: 1.0321749448776245,grad_norm: 0.8874486496462543, iteration: 350629
loss: 1.0066816806793213,grad_norm: 0.8698924835016639, iteration: 350630
loss: 0.9929248094558716,grad_norm: 0.8163433770793606, iteration: 350631
loss: 0.9744328260421753,grad_norm: 0.8483276931151955, iteration: 350632
loss: 1.0180003643035889,grad_norm: 0.9999991439253577, iteration: 350633
loss: 1.0231199264526367,grad_norm: 0.7754272089010045, iteration: 350634
loss: 1.0389423370361328,grad_norm: 0.9999996089348253, iteration: 350635
loss: 0.9854390621185303,grad_norm: 0.7136787828650362, iteration: 350636
loss: 0.9888641238212585,grad_norm: 0.7086301643328843, iteration: 350637
loss: 1.0064023733139038,grad_norm: 0.7012350562566777, iteration: 350638
loss: 1.0166467428207397,grad_norm: 0.6624154469258718, iteration: 350639
loss: 0.9804648756980896,grad_norm: 0.7202613059237848, iteration: 350640
loss: 0.9726016521453857,grad_norm: 0.8196650399362092, iteration: 350641
loss: 0.994655191898346,grad_norm: 0.9999989898381213, iteration: 350642
loss: 1.0238277912139893,grad_norm: 0.9618007571090201, iteration: 350643
loss: 1.0192357301712036,grad_norm: 0.845315452659455, iteration: 350644
loss: 1.0763524770736694,grad_norm: 0.7976531311042435, iteration: 350645
loss: 0.9876673221588135,grad_norm: 0.812484861640785, iteration: 350646
loss: 0.9795370101928711,grad_norm: 0.8823196780231675, iteration: 350647
loss: 1.0152941942214966,grad_norm: 0.8413434770245131, iteration: 350648
loss: 0.9812130331993103,grad_norm: 0.8591059118712903, iteration: 350649
loss: 1.0230613946914673,grad_norm: 0.626355911464599, iteration: 350650
loss: 0.9844558835029602,grad_norm: 0.6736864410555415, iteration: 350651
loss: 1.0219582319259644,grad_norm: 0.8339071881419929, iteration: 350652
loss: 1.023851752281189,grad_norm: 0.8914309898891186, iteration: 350653
loss: 0.9865795373916626,grad_norm: 0.7241660787673401, iteration: 350654
loss: 1.0017527341842651,grad_norm: 0.8324306621998534, iteration: 350655
loss: 0.9888869524002075,grad_norm: 0.7772568232130445, iteration: 350656
loss: 0.9848753809928894,grad_norm: 0.8304679424642635, iteration: 350657
loss: 1.007441520690918,grad_norm: 0.8552750542741019, iteration: 350658
loss: 1.0078349113464355,grad_norm: 0.6313212790707384, iteration: 350659
loss: 1.2431044578552246,grad_norm: 0.9999994790620897, iteration: 350660
loss: 0.9891848564147949,grad_norm: 0.7839893838311619, iteration: 350661
loss: 0.9538070559501648,grad_norm: 0.8816576073368454, iteration: 350662
loss: 1.0269633531570435,grad_norm: 0.8614256259146992, iteration: 350663
loss: 1.0214883089065552,grad_norm: 0.7641926438664898, iteration: 350664
loss: 1.0309537649154663,grad_norm: 0.9258041273171942, iteration: 350665
loss: 1.0399771928787231,grad_norm: 0.7928526860730049, iteration: 350666
loss: 0.9896638989448547,grad_norm: 0.6928458152410111, iteration: 350667
loss: 1.0484639406204224,grad_norm: 0.7822223832632397, iteration: 350668
loss: 0.9986847639083862,grad_norm: 0.7842670900494256, iteration: 350669
loss: 0.9539788365364075,grad_norm: 0.847951210768412, iteration: 350670
loss: 1.0065020322799683,grad_norm: 0.8714810127371785, iteration: 350671
loss: 0.9996660351753235,grad_norm: 0.7646882512376932, iteration: 350672
loss: 0.9925455451011658,grad_norm: 0.6734162866198068, iteration: 350673
loss: 1.0225528478622437,grad_norm: 0.999999196882484, iteration: 350674
loss: 0.985130250453949,grad_norm: 0.6870025164435085, iteration: 350675
loss: 1.0412116050720215,grad_norm: 0.9999997971300995, iteration: 350676
loss: 1.0120346546173096,grad_norm: 0.8902350521152386, iteration: 350677
loss: 1.023482322692871,grad_norm: 0.868302823187492, iteration: 350678
loss: 0.9957616925239563,grad_norm: 0.8496326200348058, iteration: 350679
loss: 1.0007330179214478,grad_norm: 0.7497242281484936, iteration: 350680
loss: 1.02036452293396,grad_norm: 0.751936506511697, iteration: 350681
loss: 1.0345031023025513,grad_norm: 0.9999998377980993, iteration: 350682
loss: 1.0438323020935059,grad_norm: 0.7708161472712239, iteration: 350683
loss: 1.0083085298538208,grad_norm: 0.9999993603643508, iteration: 350684
loss: 0.9853999614715576,grad_norm: 0.7324892318560382, iteration: 350685
loss: 0.966742992401123,grad_norm: 0.9999991501753562, iteration: 350686
loss: 1.0002919435501099,grad_norm: 0.814319140257566, iteration: 350687
loss: 1.0022425651550293,grad_norm: 0.8933214634431532, iteration: 350688
loss: 1.0091969966888428,grad_norm: 0.8809319647011584, iteration: 350689
loss: 1.028180718421936,grad_norm: 0.9190870444354249, iteration: 350690
loss: 0.9984886646270752,grad_norm: 0.8649581853917891, iteration: 350691
loss: 1.0084573030471802,grad_norm: 0.7215851535231392, iteration: 350692
loss: 1.0048173666000366,grad_norm: 0.8846292484849378, iteration: 350693
loss: 1.0267254114151,grad_norm: 0.8600983935182029, iteration: 350694
loss: 0.9854158163070679,grad_norm: 0.7073357035292694, iteration: 350695
loss: 0.9832478761672974,grad_norm: 0.6881636768319155, iteration: 350696
loss: 1.0084213018417358,grad_norm: 0.7646694859184416, iteration: 350697
loss: 1.0011378526687622,grad_norm: 0.7992327729009007, iteration: 350698
loss: 0.9786256551742554,grad_norm: 0.9999990025326017, iteration: 350699
loss: 1.0087682008743286,grad_norm: 0.8873839406910694, iteration: 350700
loss: 1.01093590259552,grad_norm: 0.8120140166249106, iteration: 350701
loss: 0.9891884326934814,grad_norm: 0.9999992145912204, iteration: 350702
loss: 0.9709452390670776,grad_norm: 0.8830743283898929, iteration: 350703
loss: 0.9935198426246643,grad_norm: 0.8043941062930935, iteration: 350704
loss: 1.0017695426940918,grad_norm: 0.8687282954464866, iteration: 350705
loss: 0.992579460144043,grad_norm: 0.7322551082434352, iteration: 350706
loss: 1.0453773736953735,grad_norm: 0.9999990918888247, iteration: 350707
loss: 1.0080556869506836,grad_norm: 0.8276621200532143, iteration: 350708
loss: 1.0161818265914917,grad_norm: 0.9784594168313583, iteration: 350709
loss: 1.0292154550552368,grad_norm: 0.8284159811990531, iteration: 350710
loss: 1.0047128200531006,grad_norm: 0.6987387313688539, iteration: 350711
loss: 1.0023127794265747,grad_norm: 0.764682360693707, iteration: 350712
loss: 0.9965952038764954,grad_norm: 0.9884021098256432, iteration: 350713
loss: 0.9683743119239807,grad_norm: 0.7683778316165851, iteration: 350714
loss: 1.0806784629821777,grad_norm: 0.924142087568999, iteration: 350715
loss: 1.0210750102996826,grad_norm: 0.9426710646016471, iteration: 350716
loss: 1.0247681140899658,grad_norm: 0.7865000996479959, iteration: 350717
loss: 1.0126407146453857,grad_norm: 0.7917200759651225, iteration: 350718
loss: 0.9949418902397156,grad_norm: 0.9999990338704875, iteration: 350719
loss: 1.080276370048523,grad_norm: 0.9999992113534574, iteration: 350720
loss: 0.9779024124145508,grad_norm: 0.9570028737823824, iteration: 350721
loss: 1.0190801620483398,grad_norm: 0.8528997464824304, iteration: 350722
loss: 0.9782695770263672,grad_norm: 0.851692510268088, iteration: 350723
loss: 1.0114400386810303,grad_norm: 0.9252766565274685, iteration: 350724
loss: 1.0823410749435425,grad_norm: 0.9999991049840062, iteration: 350725
loss: 0.9668119549751282,grad_norm: 0.8162052756435969, iteration: 350726
loss: 0.9716116189956665,grad_norm: 0.8845567184561662, iteration: 350727
loss: 1.0321691036224365,grad_norm: 0.9999990240017531, iteration: 350728
loss: 1.0267664194107056,grad_norm: 0.7969299119611521, iteration: 350729
loss: 1.0159993171691895,grad_norm: 0.9999991582915186, iteration: 350730
loss: 1.0305227041244507,grad_norm: 0.8594955295905344, iteration: 350731
loss: 1.0009888410568237,grad_norm: 0.7211381996238122, iteration: 350732
loss: 0.9649562239646912,grad_norm: 0.6902630001788724, iteration: 350733
loss: 1.0251176357269287,grad_norm: 0.6882102161079958, iteration: 350734
loss: 0.996786892414093,grad_norm: 0.8202345076787876, iteration: 350735
loss: 0.9783281683921814,grad_norm: 0.8159729470611079, iteration: 350736
loss: 1.0668343305587769,grad_norm: 0.9999999173864687, iteration: 350737
loss: 0.9681462049484253,grad_norm: 0.8236807524397638, iteration: 350738
loss: 0.9867140650749207,grad_norm: 0.8118535635427473, iteration: 350739
loss: 1.0661578178405762,grad_norm: 0.9999995361575764, iteration: 350740
loss: 1.0175658464431763,grad_norm: 0.8946413496207747, iteration: 350741
loss: 1.0042887926101685,grad_norm: 0.8440428432200042, iteration: 350742
loss: 1.0040273666381836,grad_norm: 0.8021632574468964, iteration: 350743
loss: 0.9800668954849243,grad_norm: 0.7005082275067593, iteration: 350744
loss: 0.9891000390052795,grad_norm: 0.9999994417826875, iteration: 350745
loss: 0.9892252087593079,grad_norm: 0.8067511110170213, iteration: 350746
loss: 0.9834402799606323,grad_norm: 0.9991660627274443, iteration: 350747
loss: 1.0066934823989868,grad_norm: 0.99999945802687, iteration: 350748
loss: 0.9675887823104858,grad_norm: 0.8578547695943227, iteration: 350749
loss: 1.004887580871582,grad_norm: 0.8063415645578593, iteration: 350750
loss: 1.0208566188812256,grad_norm: 0.8000494565941886, iteration: 350751
loss: 1.043277621269226,grad_norm: 0.9999999183517148, iteration: 350752
loss: 0.9998524188995361,grad_norm: 0.7776863920791564, iteration: 350753
loss: 1.0119963884353638,grad_norm: 0.8883436323419102, iteration: 350754
loss: 1.0181727409362793,grad_norm: 0.9085984636203089, iteration: 350755
loss: 1.0195586681365967,grad_norm: 0.9999999098693292, iteration: 350756
loss: 0.9945864081382751,grad_norm: 0.800634872804752, iteration: 350757
loss: 1.0372076034545898,grad_norm: 0.9999990910430736, iteration: 350758
loss: 1.0137709379196167,grad_norm: 0.8791406376515016, iteration: 350759
loss: 0.9836692214012146,grad_norm: 0.8804526799817279, iteration: 350760
loss: 0.9783254861831665,grad_norm: 0.7238147994171498, iteration: 350761
loss: 0.9967631697654724,grad_norm: 0.7828347689679551, iteration: 350762
loss: 0.9900473952293396,grad_norm: 0.7649191600688735, iteration: 350763
loss: 1.020639181137085,grad_norm: 0.7380013433593058, iteration: 350764
loss: 1.0716255903244019,grad_norm: 0.7663352240779728, iteration: 350765
loss: 0.9759213924407959,grad_norm: 0.9999990157947202, iteration: 350766
loss: 1.0005772113800049,grad_norm: 0.9999991899887886, iteration: 350767
loss: 1.0290676355361938,grad_norm: 0.9999991997677278, iteration: 350768
loss: 1.0062471628189087,grad_norm: 0.8402814000011246, iteration: 350769
loss: 0.970774233341217,grad_norm: 0.8396455722192089, iteration: 350770
loss: 0.9820454120635986,grad_norm: 0.8363026602791014, iteration: 350771
loss: 1.0122016668319702,grad_norm: 0.9999991650058794, iteration: 350772
loss: 1.0165190696716309,grad_norm: 0.9573343286070571, iteration: 350773
loss: 1.0112147331237793,grad_norm: 0.7889346187820294, iteration: 350774
loss: 1.1660624742507935,grad_norm: 0.9999997080920061, iteration: 350775
loss: 1.008600115776062,grad_norm: 0.8890833880993065, iteration: 350776
loss: 0.992911159992218,grad_norm: 0.8552349217749085, iteration: 350777
loss: 1.01032555103302,grad_norm: 0.7607843564095519, iteration: 350778
loss: 0.9755635857582092,grad_norm: 0.8397133944542411, iteration: 350779
loss: 1.0099469423294067,grad_norm: 0.7846374868830348, iteration: 350780
loss: 0.9811130166053772,grad_norm: 0.7367598131554073, iteration: 350781
loss: 1.0115609169006348,grad_norm: 0.9999990838719962, iteration: 350782
loss: 0.9811243414878845,grad_norm: 0.8144181859614379, iteration: 350783
loss: 0.9875679612159729,grad_norm: 0.7206350909055831, iteration: 350784
loss: 0.9717419743537903,grad_norm: 0.7354659851627581, iteration: 350785
loss: 0.9537533521652222,grad_norm: 0.9629580513268217, iteration: 350786
loss: 1.0035207271575928,grad_norm: 0.8048386693640438, iteration: 350787
loss: 0.9818536043167114,grad_norm: 0.6843355214481407, iteration: 350788
loss: 0.9795675277709961,grad_norm: 0.9475743125001095, iteration: 350789
loss: 1.0145008563995361,grad_norm: 0.9999994533826324, iteration: 350790
loss: 1.0006272792816162,grad_norm: 0.8666304666985103, iteration: 350791
loss: 1.0374979972839355,grad_norm: 0.6823630088488266, iteration: 350792
loss: 1.0160006284713745,grad_norm: 0.9080806941151093, iteration: 350793
loss: 1.0560309886932373,grad_norm: 0.9999990699404913, iteration: 350794
loss: 1.0205501317977905,grad_norm: 0.6226237104301283, iteration: 350795
loss: 0.9896411895751953,grad_norm: 0.8657252362432384, iteration: 350796
loss: 0.9865416884422302,grad_norm: 0.8939086007153756, iteration: 350797
loss: 1.0170316696166992,grad_norm: 0.7193663829558905, iteration: 350798
loss: 1.010671854019165,grad_norm: 0.7928825805799532, iteration: 350799
loss: 1.0163683891296387,grad_norm: 0.8322750316489173, iteration: 350800
loss: 1.026666283607483,grad_norm: 0.9999994514899871, iteration: 350801
loss: 1.0295130014419556,grad_norm: 0.9582095377378245, iteration: 350802
loss: 0.9936701059341431,grad_norm: 0.999999199919771, iteration: 350803
loss: 0.9829679131507874,grad_norm: 0.9999991831152014, iteration: 350804
loss: 1.0986218452453613,grad_norm: 0.9999999091422166, iteration: 350805
loss: 1.012874722480774,grad_norm: 0.7655173548071376, iteration: 350806
loss: 0.9845457077026367,grad_norm: 0.6981262866615882, iteration: 350807
loss: 1.0208653211593628,grad_norm: 0.8737532712616434, iteration: 350808
loss: 0.9987080097198486,grad_norm: 0.9168045291166211, iteration: 350809
loss: 0.97169029712677,grad_norm: 0.8140251658946225, iteration: 350810
loss: 1.041213870048523,grad_norm: 0.9999996416343917, iteration: 350811
loss: 1.0208353996276855,grad_norm: 0.7049126673505295, iteration: 350812
loss: 0.966479480266571,grad_norm: 0.8180190275808454, iteration: 350813
loss: 1.0177332162857056,grad_norm: 0.7902846957909102, iteration: 350814
loss: 0.9789347648620605,grad_norm: 0.68457866063835, iteration: 350815
loss: 1.0050801038742065,grad_norm: 0.8480720839763554, iteration: 350816
loss: 0.9883114099502563,grad_norm: 0.7836937706655621, iteration: 350817
loss: 0.985766589641571,grad_norm: 0.8552325690612991, iteration: 350818
loss: 1.014503836631775,grad_norm: 0.7305631973755861, iteration: 350819
loss: 0.989482581615448,grad_norm: 0.7298512093864145, iteration: 350820
loss: 1.041566252708435,grad_norm: 0.9999992581366804, iteration: 350821
loss: 1.0038295984268188,grad_norm: 0.9999991180245914, iteration: 350822
loss: 1.014436960220337,grad_norm: 0.8980942820258578, iteration: 350823
loss: 1.0122467279434204,grad_norm: 0.8147821219374217, iteration: 350824
loss: 1.2987375259399414,grad_norm: 0.9999998549333499, iteration: 350825
loss: 1.0097185373306274,grad_norm: 0.999999065837338, iteration: 350826
loss: 0.9957068562507629,grad_norm: 0.8075843862633594, iteration: 350827
loss: 1.009257435798645,grad_norm: 0.9999994006804026, iteration: 350828
loss: 1.0051180124282837,grad_norm: 0.8846626730085345, iteration: 350829
loss: 0.950480043888092,grad_norm: 0.9313162163832551, iteration: 350830
loss: 0.9879189729690552,grad_norm: 0.973640958554554, iteration: 350831
loss: 0.9770588278770447,grad_norm: 0.8610780231222924, iteration: 350832
loss: 1.0038957595825195,grad_norm: 0.9999994158753984, iteration: 350833
loss: 1.0122708082199097,grad_norm: 0.86189892953512, iteration: 350834
loss: 1.00802743434906,grad_norm: 0.7298368272544558, iteration: 350835
loss: 1.052314043045044,grad_norm: 0.8696293083099641, iteration: 350836
loss: 1.027008295059204,grad_norm: 0.7826565272583843, iteration: 350837
loss: 0.9941141605377197,grad_norm: 0.8034628392133163, iteration: 350838
loss: 1.0085179805755615,grad_norm: 0.9249072810731117, iteration: 350839
loss: 1.11746084690094,grad_norm: 0.9200048631925127, iteration: 350840
loss: 0.9982840418815613,grad_norm: 0.9999990942512019, iteration: 350841
loss: 0.9979435801506042,grad_norm: 0.7612300092620643, iteration: 350842
loss: 0.9771077036857605,grad_norm: 0.9999990287139282, iteration: 350843
loss: 0.9827679395675659,grad_norm: 0.9291823351227172, iteration: 350844
loss: 0.9893926382064819,grad_norm: 0.7380901963044945, iteration: 350845
loss: 0.9857326745986938,grad_norm: 0.7464124088587575, iteration: 350846
loss: 0.9918399453163147,grad_norm: 0.7552881988865798, iteration: 350847
loss: 1.0499320030212402,grad_norm: 0.8158020090121653, iteration: 350848
loss: 1.0125633478164673,grad_norm: 0.9999990132614479, iteration: 350849
loss: 0.9991633296012878,grad_norm: 0.7636735503280531, iteration: 350850
loss: 1.0167583227157593,grad_norm: 0.8945704185162285, iteration: 350851
loss: 1.0210070610046387,grad_norm: 0.7991842789196473, iteration: 350852
loss: 0.9870634078979492,grad_norm: 0.8879245378400944, iteration: 350853
loss: 0.9753808379173279,grad_norm: 0.814102096204663, iteration: 350854
loss: 1.015815019607544,grad_norm: 0.7434137893594575, iteration: 350855
loss: 1.0561662912368774,grad_norm: 0.9999996515775431, iteration: 350856
loss: 1.025118112564087,grad_norm: 0.694251598635857, iteration: 350857
loss: 1.0132662057876587,grad_norm: 0.8097590056577173, iteration: 350858
loss: 1.0040274858474731,grad_norm: 0.8441259190840431, iteration: 350859
loss: 0.985334575176239,grad_norm: 0.7338302283891444, iteration: 350860
loss: 0.994156539440155,grad_norm: 0.8173440563270337, iteration: 350861
loss: 1.0047249794006348,grad_norm: 0.7054818910120637, iteration: 350862
loss: 1.0319629907608032,grad_norm: 0.9999989573930621, iteration: 350863
loss: 1.050565242767334,grad_norm: 0.8813029462342074, iteration: 350864
loss: 0.9911603927612305,grad_norm: 0.7399813954789225, iteration: 350865
loss: 0.9934470653533936,grad_norm: 0.8942899166419653, iteration: 350866
loss: 1.0214532613754272,grad_norm: 0.941084209343337, iteration: 350867
loss: 0.9935391545295715,grad_norm: 0.7575090496332917, iteration: 350868
loss: 1.074096918106079,grad_norm: 0.965265072984047, iteration: 350869
loss: 1.0391557216644287,grad_norm: 0.7505276574068437, iteration: 350870
loss: 1.0811799764633179,grad_norm: 0.9999998118583593, iteration: 350871
loss: 0.9924458265304565,grad_norm: 0.8900109297159838, iteration: 350872
loss: 0.9970883727073669,grad_norm: 0.9541512301982398, iteration: 350873
loss: 1.000119924545288,grad_norm: 0.8376602187235554, iteration: 350874
loss: 0.9697495102882385,grad_norm: 0.7389501628965879, iteration: 350875
loss: 0.981834888458252,grad_norm: 0.883823397821209, iteration: 350876
loss: 0.9855323433876038,grad_norm: 0.998295507571344, iteration: 350877
loss: 1.017214298248291,grad_norm: 0.8199700421471431, iteration: 350878
loss: 1.045509696006775,grad_norm: 0.9749030496041037, iteration: 350879
loss: 1.0119743347167969,grad_norm: 0.8774481928651016, iteration: 350880
loss: 0.9881501793861389,grad_norm: 0.7137979355476953, iteration: 350881
loss: 0.9583951234817505,grad_norm: 0.8843031062915488, iteration: 350882
loss: 0.982659101486206,grad_norm: 0.9999991513110266, iteration: 350883
loss: 0.9675513505935669,grad_norm: 0.6873780618217789, iteration: 350884
loss: 1.009529709815979,grad_norm: 0.9649453007823419, iteration: 350885
loss: 1.018471121788025,grad_norm: 0.8526438148602372, iteration: 350886
loss: 0.9776650071144104,grad_norm: 0.7173278238027511, iteration: 350887
loss: 0.9987488985061646,grad_norm: 0.9360356633292427, iteration: 350888
loss: 1.0267831087112427,grad_norm: 0.7905198954959266, iteration: 350889
loss: 1.093320608139038,grad_norm: 0.9999990653104629, iteration: 350890
loss: 0.978951096534729,grad_norm: 0.768217498600252, iteration: 350891
loss: 0.9784706830978394,grad_norm: 0.8354366276049966, iteration: 350892
loss: 0.9987289905548096,grad_norm: 0.9433692487142081, iteration: 350893
loss: 0.9910126328468323,grad_norm: 0.8119887181469652, iteration: 350894
loss: 0.9731959700584412,grad_norm: 0.8981674162750447, iteration: 350895
loss: 1.052829384803772,grad_norm: 0.9476466953449274, iteration: 350896
loss: 0.9812171459197998,grad_norm: 0.8468692203953146, iteration: 350897
loss: 0.9598715305328369,grad_norm: 0.8597907792151517, iteration: 350898
loss: 0.9776324033737183,grad_norm: 0.9999991801877149, iteration: 350899
loss: 0.9832801222801208,grad_norm: 0.7461522947919106, iteration: 350900
loss: 1.013558030128479,grad_norm: 0.895657266908124, iteration: 350901
loss: 1.0025854110717773,grad_norm: 0.9099101357053244, iteration: 350902
loss: 0.9574472308158875,grad_norm: 0.8689037187002315, iteration: 350903
loss: 1.0110039710998535,grad_norm: 0.818800284453836, iteration: 350904
loss: 0.9918994903564453,grad_norm: 0.8529415951775272, iteration: 350905
loss: 1.0177397727966309,grad_norm: 0.9229240188009902, iteration: 350906
loss: 0.9825006127357483,grad_norm: 0.99343287966613, iteration: 350907
loss: 1.0513137578964233,grad_norm: 0.8469995625209811, iteration: 350908
loss: 0.993506908416748,grad_norm: 0.8296673650769887, iteration: 350909
loss: 0.9576210379600525,grad_norm: 0.814310656144004, iteration: 350910
loss: 1.0123831033706665,grad_norm: 0.6913954977670349, iteration: 350911
loss: 1.01063072681427,grad_norm: 0.6056593657309726, iteration: 350912
loss: 1.005291223526001,grad_norm: 0.6784704734859034, iteration: 350913
loss: 1.0145994424819946,grad_norm: 0.9057844990024257, iteration: 350914
loss: 1.0008070468902588,grad_norm: 0.7470268022603171, iteration: 350915
loss: 0.9916594624519348,grad_norm: 0.9999992391214506, iteration: 350916
loss: 0.9620981812477112,grad_norm: 0.9042910020836518, iteration: 350917
loss: 1.0166385173797607,grad_norm: 0.6649445874574952, iteration: 350918
loss: 0.9895002245903015,grad_norm: 0.8788837321685309, iteration: 350919
loss: 0.9877467155456543,grad_norm: 0.7990254017404576, iteration: 350920
loss: 1.0661964416503906,grad_norm: 0.9686244514727461, iteration: 350921
loss: 1.0250376462936401,grad_norm: 0.9999993721759838, iteration: 350922
loss: 0.9760311841964722,grad_norm: 0.6873382407603201, iteration: 350923
loss: 1.0889195203781128,grad_norm: 0.9999997818930584, iteration: 350924
loss: 0.9687244892120361,grad_norm: 0.9086379249315376, iteration: 350925
loss: 0.9875458478927612,grad_norm: 0.7692568352111118, iteration: 350926
loss: 1.0366772413253784,grad_norm: 0.7892631669174257, iteration: 350927
loss: 1.0703948736190796,grad_norm: 0.9999991924387643, iteration: 350928
loss: 0.9993094801902771,grad_norm: 0.9999998266369234, iteration: 350929
loss: 0.9751912951469421,grad_norm: 0.9999989527387385, iteration: 350930
loss: 1.0075702667236328,grad_norm: 0.999999097268253, iteration: 350931
loss: 1.0191105604171753,grad_norm: 0.7264990593573598, iteration: 350932
loss: 1.0389506816864014,grad_norm: 0.8311387162855881, iteration: 350933
loss: 0.9993754625320435,grad_norm: 0.9636518012267999, iteration: 350934
loss: 0.9962626695632935,grad_norm: 0.9220424591531463, iteration: 350935
loss: 0.9931334853172302,grad_norm: 0.753691455668505, iteration: 350936
loss: 1.0267407894134521,grad_norm: 0.7400655015811837, iteration: 350937
loss: 1.0082592964172363,grad_norm: 0.7740673023544474, iteration: 350938
loss: 1.126489520072937,grad_norm: 0.911229143419192, iteration: 350939
loss: 1.0065494775772095,grad_norm: 0.9341308805200909, iteration: 350940
loss: 1.0042144060134888,grad_norm: 0.9999996921388489, iteration: 350941
loss: 1.0073630809783936,grad_norm: 0.9756457476188415, iteration: 350942
loss: 1.007153034210205,grad_norm: 0.727655404782616, iteration: 350943
loss: 1.0242799520492554,grad_norm: 0.8633545130232868, iteration: 350944
loss: 1.0591199398040771,grad_norm: 0.9999992005820421, iteration: 350945
loss: 1.0057623386383057,grad_norm: 0.8312020193268478, iteration: 350946
loss: 0.9794859886169434,grad_norm: 0.9120165805149402, iteration: 350947
loss: 1.0393894910812378,grad_norm: 0.9999995618755767, iteration: 350948
loss: 1.0021188259124756,grad_norm: 0.9999996527029898, iteration: 350949
loss: 0.9551050662994385,grad_norm: 0.6373141593582775, iteration: 350950
loss: 0.9973258376121521,grad_norm: 0.7906862381863689, iteration: 350951
loss: 1.0057408809661865,grad_norm: 0.8484578179797655, iteration: 350952
loss: 1.0214594602584839,grad_norm: 0.7211881536306388, iteration: 350953
loss: 1.043127417564392,grad_norm: 0.8842427959215342, iteration: 350954
loss: 0.9620155096054077,grad_norm: 0.8487613947680881, iteration: 350955
loss: 1.012085199356079,grad_norm: 0.7966101719704566, iteration: 350956
loss: 1.0850967168807983,grad_norm: 0.7546431388404023, iteration: 350957
loss: 1.0092259645462036,grad_norm: 0.7607015173088885, iteration: 350958
loss: 0.9895358085632324,grad_norm: 0.847802876728773, iteration: 350959
loss: 1.0275458097457886,grad_norm: 0.9815825821441969, iteration: 350960
loss: 1.0066176652908325,grad_norm: 0.9999991271140563, iteration: 350961
loss: 1.0135185718536377,grad_norm: 0.8174422092003664, iteration: 350962
loss: 0.9981635212898254,grad_norm: 0.9958249674743741, iteration: 350963
loss: 0.9895552396774292,grad_norm: 0.9570712679712844, iteration: 350964
loss: 1.108115315437317,grad_norm: 0.8856385526417672, iteration: 350965
loss: 1.0203183889389038,grad_norm: 0.6698609815641772, iteration: 350966
loss: 0.9668300747871399,grad_norm: 0.6271660090882464, iteration: 350967
loss: 1.0451738834381104,grad_norm: 0.7882681414162442, iteration: 350968
loss: 1.0148537158966064,grad_norm: 0.8302422111265362, iteration: 350969
loss: 1.010850191116333,grad_norm: 0.7353632181864921, iteration: 350970
loss: 0.9919077754020691,grad_norm: 0.948044288317204, iteration: 350971
loss: 1.062312364578247,grad_norm: 0.9999993849018689, iteration: 350972
loss: 0.9974733591079712,grad_norm: 0.9149360425839203, iteration: 350973
loss: 1.0203790664672852,grad_norm: 0.8902604112754491, iteration: 350974
loss: 0.9890486598014832,grad_norm: 0.999999212791428, iteration: 350975
loss: 0.9934715628623962,grad_norm: 0.7671953105208189, iteration: 350976
loss: 0.9986436367034912,grad_norm: 0.9015682023011439, iteration: 350977
loss: 1.007593035697937,grad_norm: 0.9999990540915454, iteration: 350978
loss: 1.0677220821380615,grad_norm: 0.9579340277210653, iteration: 350979
loss: 1.0118407011032104,grad_norm: 0.9999991281309212, iteration: 350980
loss: 1.0802816152572632,grad_norm: 0.9999995973609744, iteration: 350981
loss: 0.9821982383728027,grad_norm: 0.7040145219513817, iteration: 350982
loss: 1.0206234455108643,grad_norm: 0.9999990071257879, iteration: 350983
loss: 0.9732580780982971,grad_norm: 0.6990924857485218, iteration: 350984
loss: 1.0194282531738281,grad_norm: 0.8967621531508896, iteration: 350985
loss: 0.9987473487854004,grad_norm: 0.7805570333389841, iteration: 350986
loss: 1.0017284154891968,grad_norm: 0.7416440077391743, iteration: 350987
loss: 1.0117754936218262,grad_norm: 0.9988782513568751, iteration: 350988
loss: 0.9953200221061707,grad_norm: 0.8610710179676061, iteration: 350989
loss: 1.0371342897415161,grad_norm: 0.9796243626482841, iteration: 350990
loss: 0.9897879958152771,grad_norm: 0.9999992480381985, iteration: 350991
loss: 1.0026792287826538,grad_norm: 0.8512038608852492, iteration: 350992
loss: 1.0265777111053467,grad_norm: 0.8449653537156617, iteration: 350993
loss: 1.0655003786087036,grad_norm: 0.9999999176474172, iteration: 350994
loss: 0.9914356470108032,grad_norm: 0.8118583905840306, iteration: 350995
loss: 0.9912892580032349,grad_norm: 0.9999993027983098, iteration: 350996
loss: 1.0261738300323486,grad_norm: 0.8070700913052719, iteration: 350997
loss: 0.9975044131278992,grad_norm: 0.9479066475770307, iteration: 350998
loss: 1.0191805362701416,grad_norm: 0.8401129424415051, iteration: 350999
loss: 1.0097333192825317,grad_norm: 0.9832536147839116, iteration: 351000
loss: 0.9755697846412659,grad_norm: 0.8046957390262847, iteration: 351001
loss: 0.9910809993743896,grad_norm: 0.8522850769821694, iteration: 351002
loss: 1.0196119546890259,grad_norm: 0.8514471763556596, iteration: 351003
loss: 1.014312505722046,grad_norm: 0.7204338577261503, iteration: 351004
loss: 0.9601482152938843,grad_norm: 0.8008815516761877, iteration: 351005
loss: 1.0124064683914185,grad_norm: 0.9999990664143187, iteration: 351006
loss: 1.0022915601730347,grad_norm: 0.8999820613882857, iteration: 351007
loss: 0.9882234334945679,grad_norm: 0.871122965708236, iteration: 351008
loss: 0.9741654992103577,grad_norm: 0.8176142927408803, iteration: 351009
loss: 1.0299150943756104,grad_norm: 0.9999991580140867, iteration: 351010
loss: 0.9738859534263611,grad_norm: 0.9317596781082167, iteration: 351011
loss: 1.030200481414795,grad_norm: 0.8498865837644978, iteration: 351012
loss: 1.0061200857162476,grad_norm: 0.7039357957806902, iteration: 351013
loss: 0.9827693104743958,grad_norm: 0.713905418290058, iteration: 351014
loss: 1.0241107940673828,grad_norm: 0.8012508276450807, iteration: 351015
loss: 1.0019162893295288,grad_norm: 0.8136597370274541, iteration: 351016
loss: 1.0285691022872925,grad_norm: 0.723828642683885, iteration: 351017
loss: 1.015468955039978,grad_norm: 0.9099228313742072, iteration: 351018
loss: 1.0198689699172974,grad_norm: 0.9561774997002178, iteration: 351019
loss: 0.9980562925338745,grad_norm: 0.820315658367309, iteration: 351020
loss: 1.0269742012023926,grad_norm: 0.712611454796375, iteration: 351021
loss: 1.0689921379089355,grad_norm: 0.9999993876513091, iteration: 351022
loss: 0.987482488155365,grad_norm: 0.8205556632823114, iteration: 351023
loss: 0.9801479578018188,grad_norm: 0.9930264059899221, iteration: 351024
loss: 0.9912684559822083,grad_norm: 0.65436686107661, iteration: 351025
loss: 1.0483109951019287,grad_norm: 0.9999994954021034, iteration: 351026
loss: 1.0090978145599365,grad_norm: 0.9999991068606333, iteration: 351027
loss: 0.9887977242469788,grad_norm: 0.8555499715122689, iteration: 351028
loss: 0.9984138607978821,grad_norm: 0.7049843565639059, iteration: 351029
loss: 1.1279226541519165,grad_norm: 0.9999999459456925, iteration: 351030
loss: 1.054956078529358,grad_norm: 0.8219123688367949, iteration: 351031
loss: 1.0173087120056152,grad_norm: 0.6350815681286704, iteration: 351032
loss: 1.0062189102172852,grad_norm: 0.8648663391061381, iteration: 351033
loss: 0.9771466851234436,grad_norm: 0.7390265197277958, iteration: 351034
loss: 1.0134763717651367,grad_norm: 0.8896566937921137, iteration: 351035
loss: 0.9938675165176392,grad_norm: 0.8337936954731707, iteration: 351036
loss: 0.9985635876655579,grad_norm: 0.7503874597316815, iteration: 351037
loss: 1.045493245124817,grad_norm: 0.999999879687511, iteration: 351038
loss: 1.0134862661361694,grad_norm: 0.7817665051336571, iteration: 351039
loss: 1.0166298151016235,grad_norm: 0.8781330089946013, iteration: 351040
loss: 1.0099120140075684,grad_norm: 0.9062120391742322, iteration: 351041
loss: 0.9545504450798035,grad_norm: 0.9999991814468728, iteration: 351042
loss: 0.9725916385650635,grad_norm: 0.999999227693503, iteration: 351043
loss: 0.9972805976867676,grad_norm: 0.8276123753588749, iteration: 351044
loss: 1.0000265836715698,grad_norm: 0.6372620131452197, iteration: 351045
loss: 0.9903684854507446,grad_norm: 0.9999992995369029, iteration: 351046
loss: 1.042891502380371,grad_norm: 0.9033923151574599, iteration: 351047
loss: 0.9868834018707275,grad_norm: 0.7796951138089206, iteration: 351048
loss: 1.0000828504562378,grad_norm: 0.7909077329208215, iteration: 351049
loss: 0.9707423448562622,grad_norm: 0.7462935078777149, iteration: 351050
loss: 1.017663598060608,grad_norm: 0.6988596683954252, iteration: 351051
loss: 0.9658644199371338,grad_norm: 0.8926437876356369, iteration: 351052
loss: 1.0186498165130615,grad_norm: 0.999999712616304, iteration: 351053
loss: 0.9988312721252441,grad_norm: 0.7309803597198044, iteration: 351054
loss: 0.9977097511291504,grad_norm: 0.7833600217002638, iteration: 351055
loss: 1.0193313360214233,grad_norm: 0.9999995221054659, iteration: 351056
loss: 0.991706371307373,grad_norm: 0.9999998445625484, iteration: 351057
loss: 0.9973558187484741,grad_norm: 0.8005643481898198, iteration: 351058
loss: 0.989619255065918,grad_norm: 0.8696278774144038, iteration: 351059
loss: 0.9882821440696716,grad_norm: 0.9999994622709161, iteration: 351060
loss: 0.9818289279937744,grad_norm: 0.7948724679080829, iteration: 351061
loss: 0.9967629909515381,grad_norm: 0.9598143363374781, iteration: 351062
loss: 1.00255286693573,grad_norm: 0.8931253241837961, iteration: 351063
loss: 0.9669073224067688,grad_norm: 0.9122922036275126, iteration: 351064
loss: 1.0545313358306885,grad_norm: 0.9999998563559597, iteration: 351065
loss: 1.0041301250457764,grad_norm: 0.8810128686891792, iteration: 351066
loss: 1.0189204216003418,grad_norm: 0.9283313121464062, iteration: 351067
loss: 1.0082027912139893,grad_norm: 0.8872201175944856, iteration: 351068
loss: 1.0833091735839844,grad_norm: 0.9999998425529963, iteration: 351069
loss: 0.9839237332344055,grad_norm: 0.8285236219488742, iteration: 351070
loss: 1.0200812816619873,grad_norm: 0.9037502727780647, iteration: 351071
loss: 1.035101294517517,grad_norm: 0.9999996069529102, iteration: 351072
loss: 1.047082781791687,grad_norm: 0.7648267909012761, iteration: 351073
loss: 0.9826657176017761,grad_norm: 0.8016152998676449, iteration: 351074
loss: 0.9901919364929199,grad_norm: 0.7818355753385362, iteration: 351075
loss: 0.9952199459075928,grad_norm: 0.7045601668607354, iteration: 351076
loss: 0.9986856579780579,grad_norm: 0.8781647640716416, iteration: 351077
loss: 1.048890471458435,grad_norm: 0.9999998260077247, iteration: 351078
loss: 0.9752539396286011,grad_norm: 0.9999990927920972, iteration: 351079
loss: 0.9839103817939758,grad_norm: 0.7364790977552749, iteration: 351080
loss: 1.0063488483428955,grad_norm: 0.7525111263735215, iteration: 351081
loss: 1.0362836122512817,grad_norm: 0.9201973106343165, iteration: 351082
loss: 1.056895136833191,grad_norm: 0.9263279873763391, iteration: 351083
loss: 0.998987078666687,grad_norm: 0.9999993562950025, iteration: 351084
loss: 1.0043631792068481,grad_norm: 0.8300975107604762, iteration: 351085
loss: 1.003915548324585,grad_norm: 0.644651423808579, iteration: 351086
loss: 0.9933332204818726,grad_norm: 0.816374866006954, iteration: 351087
loss: 1.0306529998779297,grad_norm: 0.7447723776742196, iteration: 351088
loss: 0.9874001145362854,grad_norm: 0.8619323768681417, iteration: 351089
loss: 0.980457603931427,grad_norm: 0.9999991689718364, iteration: 351090
loss: 1.0318132638931274,grad_norm: 0.999999416256741, iteration: 351091
loss: 0.968458890914917,grad_norm: 0.9066653860575307, iteration: 351092
loss: 1.1613359451293945,grad_norm: 0.9999992066781089, iteration: 351093
loss: 0.9689651131629944,grad_norm: 0.743105876368863, iteration: 351094
loss: 1.0334556102752686,grad_norm: 0.9999992318928309, iteration: 351095
loss: 1.098611831665039,grad_norm: 0.9999995048197329, iteration: 351096
loss: 0.971382737159729,grad_norm: 0.8463510999635528, iteration: 351097
loss: 1.0449925661087036,grad_norm: 0.908459429387542, iteration: 351098
loss: 0.9851261377334595,grad_norm: 0.9315339063428592, iteration: 351099
loss: 1.0460107326507568,grad_norm: 0.7929359912430081, iteration: 351100
loss: 1.017913579940796,grad_norm: 0.7755277371468228, iteration: 351101
loss: 1.0309284925460815,grad_norm: 0.7444321140457193, iteration: 351102
loss: 0.980206310749054,grad_norm: 0.7320393486467487, iteration: 351103
loss: 1.0018707513809204,grad_norm: 0.7705213001014908, iteration: 351104
loss: 1.0066436529159546,grad_norm: 0.7713402304475927, iteration: 351105
loss: 1.0627849102020264,grad_norm: 0.9999990377450932, iteration: 351106
loss: 0.9612653851509094,grad_norm: 0.7823283575674124, iteration: 351107
loss: 0.9585992693901062,grad_norm: 0.8678138107711145, iteration: 351108
loss: 1.0235557556152344,grad_norm: 0.9999998452871941, iteration: 351109
loss: 0.9888323545455933,grad_norm: 0.737029907821791, iteration: 351110
loss: 1.0195668935775757,grad_norm: 0.9908167114543047, iteration: 351111
loss: 1.0307655334472656,grad_norm: 0.7784524358288865, iteration: 351112
loss: 0.9900553226470947,grad_norm: 0.9301107278543982, iteration: 351113
loss: 0.9942086338996887,grad_norm: 0.6281454852260564, iteration: 351114
loss: 0.9972249269485474,grad_norm: 0.8193775802108592, iteration: 351115
loss: 1.0063025951385498,grad_norm: 0.7913328822393026, iteration: 351116
loss: 0.9487636685371399,grad_norm: 0.7531989265431808, iteration: 351117
loss: 0.9789370894432068,grad_norm: 0.9331086054759812, iteration: 351118
loss: 0.9873184561729431,grad_norm: 0.7816256519013701, iteration: 351119
loss: 1.1120505332946777,grad_norm: 0.839176325581905, iteration: 351120
loss: 0.9804133176803589,grad_norm: 0.9999990368251453, iteration: 351121
loss: 0.9932462573051453,grad_norm: 0.9999999011664841, iteration: 351122
loss: 1.1276414394378662,grad_norm: 0.999999512041805, iteration: 351123
loss: 1.0098390579223633,grad_norm: 0.7290794950776394, iteration: 351124
loss: 1.1151502132415771,grad_norm: 0.9999997071712068, iteration: 351125
loss: 1.1257630586624146,grad_norm: 0.999999492454733, iteration: 351126
loss: 0.9734358787536621,grad_norm: 0.9414064117974695, iteration: 351127
loss: 0.9997474551200867,grad_norm: 0.7986369817680401, iteration: 351128
loss: 0.9695454835891724,grad_norm: 0.92424055138069, iteration: 351129
loss: 1.023184061050415,grad_norm: 0.8872794447893592, iteration: 351130
loss: 0.9896252155303955,grad_norm: 0.712656962488709, iteration: 351131
loss: 1.006486415863037,grad_norm: 0.7498060504115059, iteration: 351132
loss: 1.1912330389022827,grad_norm: 0.9999992578601825, iteration: 351133
loss: 1.0420799255371094,grad_norm: 0.8829996942871798, iteration: 351134
loss: 1.042658805847168,grad_norm: 0.9448805120866152, iteration: 351135
loss: 1.0029898881912231,grad_norm: 0.9999989925295897, iteration: 351136
loss: 0.972644567489624,grad_norm: 0.8287581909380419, iteration: 351137
loss: 1.0169655084609985,grad_norm: 0.7437695554766086, iteration: 351138
loss: 1.032614827156067,grad_norm: 0.945865550234197, iteration: 351139
loss: 1.1266025304794312,grad_norm: 0.9999999128351253, iteration: 351140
loss: 1.1775233745574951,grad_norm: 0.9999992630290307, iteration: 351141
loss: 1.0161525011062622,grad_norm: 0.8006473678121514, iteration: 351142
loss: 0.9972712397575378,grad_norm: 0.7289985353651942, iteration: 351143
loss: 0.9910984039306641,grad_norm: 0.9243463067723547, iteration: 351144
loss: 1.0903346538543701,grad_norm: 0.9999996714943393, iteration: 351145
loss: 1.0289753675460815,grad_norm: 0.999999857579938, iteration: 351146
loss: 1.0151258707046509,grad_norm: 0.9011251986613469, iteration: 351147
loss: 1.1320627927780151,grad_norm: 0.9999996303689322, iteration: 351148
loss: 0.9875304698944092,grad_norm: 0.9999994628386505, iteration: 351149
loss: 1.0686216354370117,grad_norm: 0.8808020330301436, iteration: 351150
loss: 0.9733563661575317,grad_norm: 0.7671385783549292, iteration: 351151
loss: 1.0122175216674805,grad_norm: 0.7143309017693532, iteration: 351152
loss: 1.0650473833084106,grad_norm: 0.9999997591058021, iteration: 351153
loss: 1.0227084159851074,grad_norm: 0.9123486059120277, iteration: 351154
loss: 0.9912833571434021,grad_norm: 0.8255013511651259, iteration: 351155
loss: 1.0498546361923218,grad_norm: 0.9999991749714714, iteration: 351156
loss: 1.2395933866500854,grad_norm: 0.999999770137363, iteration: 351157
loss: 1.0432007312774658,grad_norm: 0.9999991628165369, iteration: 351158
loss: 1.0193089246749878,grad_norm: 0.8544724941708297, iteration: 351159
loss: 1.0194264650344849,grad_norm: 0.9999992425247431, iteration: 351160
loss: 1.1653809547424316,grad_norm: 0.9999995454812506, iteration: 351161
loss: 1.1995395421981812,grad_norm: 0.9999998325966984, iteration: 351162
loss: 1.1266417503356934,grad_norm: 0.9999997774623598, iteration: 351163
loss: 0.992464542388916,grad_norm: 0.999999070432582, iteration: 351164
loss: 1.0395967960357666,grad_norm: 0.9999990667361829, iteration: 351165
loss: 1.0538338422775269,grad_norm: 0.932390950714387, iteration: 351166
loss: 1.0285067558288574,grad_norm: 0.9999991951613918, iteration: 351167
loss: 1.0296130180358887,grad_norm: 0.8623259436116804, iteration: 351168
loss: 0.9824320673942566,grad_norm: 0.8292770580296032, iteration: 351169
loss: 1.0665019750595093,grad_norm: 0.9999999215822931, iteration: 351170
loss: 1.1131659746170044,grad_norm: 0.9618580467255012, iteration: 351171
loss: 0.9672086238861084,grad_norm: 0.9735711101191049, iteration: 351172
loss: 0.9967939853668213,grad_norm: 0.9081172551624213, iteration: 351173
loss: 1.0749728679656982,grad_norm: 0.8770619502462353, iteration: 351174
loss: 1.0935797691345215,grad_norm: 0.9748297431956829, iteration: 351175
loss: 1.0663928985595703,grad_norm: 0.9999991750362232, iteration: 351176
loss: 1.0148488283157349,grad_norm: 0.7167681052929662, iteration: 351177
loss: 0.9886832237243652,grad_norm: 0.7348514655308981, iteration: 351178
loss: 0.9941490888595581,grad_norm: 0.8453668337684574, iteration: 351179
loss: 0.981103777885437,grad_norm: 0.9999991241357088, iteration: 351180
loss: 1.0040688514709473,grad_norm: 0.9797646201907206, iteration: 351181
loss: 0.9907868504524231,grad_norm: 0.8881842972539465, iteration: 351182
loss: 0.9711619019508362,grad_norm: 0.7850067502121014, iteration: 351183
loss: 1.0202938318252563,grad_norm: 0.8508007485630016, iteration: 351184
loss: 1.0172816514968872,grad_norm: 0.825801869468912, iteration: 351185
loss: 0.9762274026870728,grad_norm: 0.9653278886939503, iteration: 351186
loss: 1.0157482624053955,grad_norm: 0.9999997146649753, iteration: 351187
loss: 0.9894500374794006,grad_norm: 0.9999991753070658, iteration: 351188
loss: 1.016464352607727,grad_norm: 0.9852575299769342, iteration: 351189
loss: 1.0389553308486938,grad_norm: 0.7176712644175219, iteration: 351190
loss: 0.9703079462051392,grad_norm: 0.8590156454773814, iteration: 351191
loss: 0.9878270626068115,grad_norm: 0.7513767008770811, iteration: 351192
loss: 1.0182992219924927,grad_norm: 0.7779018778648047, iteration: 351193
loss: 1.0880669355392456,grad_norm: 0.9999991252381969, iteration: 351194
loss: 0.9905286431312561,grad_norm: 0.7986916787153693, iteration: 351195
loss: 1.055109977722168,grad_norm: 0.8865799750499892, iteration: 351196
loss: 1.1170439720153809,grad_norm: 0.999999451987475, iteration: 351197
loss: 1.0639833211898804,grad_norm: 0.90124066899442, iteration: 351198
loss: 1.0083431005477905,grad_norm: 0.8210774905535798, iteration: 351199
loss: 1.0012435913085938,grad_norm: 0.7486003061584745, iteration: 351200
loss: 1.013533115386963,grad_norm: 0.8268311624690776, iteration: 351201
loss: 0.9996384382247925,grad_norm: 0.7347887994871783, iteration: 351202
loss: 0.9281387329101562,grad_norm: 0.8701310440469726, iteration: 351203
loss: 1.0230128765106201,grad_norm: 0.9999997312652421, iteration: 351204
loss: 0.9793892502784729,grad_norm: 0.7626559265651232, iteration: 351205
loss: 0.9761877059936523,grad_norm: 0.9198307626130962, iteration: 351206
loss: 1.0103998184204102,grad_norm: 0.8021946187479986, iteration: 351207
loss: 1.1442335844039917,grad_norm: 0.8600672798028433, iteration: 351208
loss: 0.9897236227989197,grad_norm: 0.6969412392960372, iteration: 351209
loss: 0.9832831025123596,grad_norm: 0.9005556526715992, iteration: 351210
loss: 1.0011693239212036,grad_norm: 0.874497526459534, iteration: 351211
loss: 0.9952673316001892,grad_norm: 0.8019737869442096, iteration: 351212
loss: 1.0094796419143677,grad_norm: 0.814530754828895, iteration: 351213
loss: 0.9997589588165283,grad_norm: 0.7054881832769094, iteration: 351214
loss: 1.0215171575546265,grad_norm: 0.7873364469615352, iteration: 351215
loss: 0.9518849849700928,grad_norm: 0.999999113643069, iteration: 351216
loss: 1.0146229267120361,grad_norm: 0.8178614350866137, iteration: 351217
loss: 0.9824041128158569,grad_norm: 0.9579152277751807, iteration: 351218
loss: 1.0266121625900269,grad_norm: 0.9626063168151443, iteration: 351219
loss: 0.9713158011436462,grad_norm: 0.8394652734617658, iteration: 351220
loss: 0.9855457544326782,grad_norm: 0.8049020952187691, iteration: 351221
loss: 1.0377999544143677,grad_norm: 0.9999990672028244, iteration: 351222
loss: 1.0198734998703003,grad_norm: 0.8279094422229918, iteration: 351223
loss: 1.0218201875686646,grad_norm: 0.837927986894361, iteration: 351224
loss: 0.9958494305610657,grad_norm: 0.9850510707592283, iteration: 351225
loss: 1.0012987852096558,grad_norm: 0.738362095745916, iteration: 351226
loss: 1.0103169679641724,grad_norm: 0.9999995871523583, iteration: 351227
loss: 1.0175273418426514,grad_norm: 0.9222771802755741, iteration: 351228
loss: 1.015188217163086,grad_norm: 0.6365790455380785, iteration: 351229
loss: 0.976873517036438,grad_norm: 0.7892170079794256, iteration: 351230
loss: 0.9521323442459106,grad_norm: 0.942166228640469, iteration: 351231
loss: 1.0249813795089722,grad_norm: 0.8139688546084499, iteration: 351232
loss: 1.005967140197754,grad_norm: 0.803422405523352, iteration: 351233
loss: 0.9776282906532288,grad_norm: 0.803690100056118, iteration: 351234
loss: 1.0118484497070312,grad_norm: 0.9999990921003192, iteration: 351235
loss: 1.0422030687332153,grad_norm: 0.9999993562669953, iteration: 351236
loss: 1.0023616552352905,grad_norm: 0.7529332425212163, iteration: 351237
loss: 1.0533747673034668,grad_norm: 0.999999452803415, iteration: 351238
loss: 1.023189902305603,grad_norm: 0.8951382539491081, iteration: 351239
loss: 0.9688159227371216,grad_norm: 0.8455422082428593, iteration: 351240
loss: 1.0633292198181152,grad_norm: 0.8287516538254028, iteration: 351241
loss: 1.0270830392837524,grad_norm: 0.9514943950982362, iteration: 351242
loss: 0.9810804128646851,grad_norm: 0.7846636296848124, iteration: 351243
loss: 0.9829440712928772,grad_norm: 0.943362484817614, iteration: 351244
loss: 1.0305620431900024,grad_norm: 0.9490051006373653, iteration: 351245
loss: 0.982791006565094,grad_norm: 0.6529917468863813, iteration: 351246
loss: 0.9668607711791992,grad_norm: 0.9597390289390934, iteration: 351247
loss: 1.0249567031860352,grad_norm: 0.8424783130964869, iteration: 351248
loss: 1.002510905265808,grad_norm: 0.9251955608670539, iteration: 351249
loss: 0.9735257625579834,grad_norm: 0.7190249189147215, iteration: 351250
loss: 1.039933204650879,grad_norm: 0.9235955493009974, iteration: 351251
loss: 1.0244944095611572,grad_norm: 0.737035375475069, iteration: 351252
loss: 0.9791310429573059,grad_norm: 0.7761913636378903, iteration: 351253
loss: 1.0057950019836426,grad_norm: 0.9201754271562926, iteration: 351254
loss: 1.0081260204315186,grad_norm: 0.6867687006973746, iteration: 351255
loss: 1.0097991228103638,grad_norm: 0.7890403797058441, iteration: 351256
loss: 1.0602794885635376,grad_norm: 0.7648354515507229, iteration: 351257
loss: 0.9679038524627686,grad_norm: 0.7549754438265388, iteration: 351258
loss: 1.0067729949951172,grad_norm: 0.999999105540693, iteration: 351259
loss: 1.0101211071014404,grad_norm: 0.793352771078112, iteration: 351260
loss: 1.0114582777023315,grad_norm: 0.8150080522671562, iteration: 351261
loss: 1.0156090259552002,grad_norm: 0.7614218588294153, iteration: 351262
loss: 0.9809687733650208,grad_norm: 0.9772372845391624, iteration: 351263
loss: 0.9826889038085938,grad_norm: 0.7696898287124349, iteration: 351264
loss: 0.968727707862854,grad_norm: 0.8822298668023673, iteration: 351265
loss: 0.9937949776649475,grad_norm: 0.9999998871666593, iteration: 351266
loss: 1.022085189819336,grad_norm: 0.8178087305491282, iteration: 351267
loss: 1.0134282112121582,grad_norm: 0.8142033392658717, iteration: 351268
loss: 1.0082833766937256,grad_norm: 0.7910823117993789, iteration: 351269
loss: 0.9680985808372498,grad_norm: 0.8445749822635575, iteration: 351270
loss: 0.9945908784866333,grad_norm: 0.9053013707022043, iteration: 351271
loss: 0.980222761631012,grad_norm: 0.7573094001442172, iteration: 351272
loss: 1.0421090126037598,grad_norm: 0.8213178917521359, iteration: 351273
loss: 1.048940658569336,grad_norm: 0.912555293183072, iteration: 351274
loss: 0.9611402750015259,grad_norm: 0.7995997074831998, iteration: 351275
loss: 1.018141269683838,grad_norm: 0.800313185156392, iteration: 351276
loss: 1.012615442276001,grad_norm: 0.6432621301716679, iteration: 351277
loss: 0.9837695956230164,grad_norm: 0.7652391693981259, iteration: 351278
loss: 1.0060652494430542,grad_norm: 0.7611367044380627, iteration: 351279
loss: 1.0131510496139526,grad_norm: 0.8225510260486707, iteration: 351280
loss: 0.9991182088851929,grad_norm: 0.9999993862801714, iteration: 351281
loss: 0.9705743193626404,grad_norm: 0.858037242823091, iteration: 351282
loss: 1.0055880546569824,grad_norm: 0.8622431629717343, iteration: 351283
loss: 0.9923796653747559,grad_norm: 0.7787105474375946, iteration: 351284
loss: 0.9678171277046204,grad_norm: 0.7918327022445978, iteration: 351285
loss: 0.990172803401947,grad_norm: 0.8241573316405479, iteration: 351286
loss: 0.9820045828819275,grad_norm: 0.7080362885638707, iteration: 351287
loss: 1.0281740427017212,grad_norm: 0.7895156206919667, iteration: 351288
loss: 0.9863983392715454,grad_norm: 0.773252282533563, iteration: 351289
loss: 1.0277591943740845,grad_norm: 0.9999997891233549, iteration: 351290
loss: 1.0129542350769043,grad_norm: 0.9388298362903993, iteration: 351291
loss: 1.013542652130127,grad_norm: 0.8309431398498869, iteration: 351292
loss: 1.0113130807876587,grad_norm: 0.777569754327386, iteration: 351293
loss: 0.9782320261001587,grad_norm: 0.7822500136700049, iteration: 351294
loss: 1.0077242851257324,grad_norm: 0.8935335954091587, iteration: 351295
loss: 1.0174129009246826,grad_norm: 0.9999996147073694, iteration: 351296
loss: 0.9553975462913513,grad_norm: 0.855220373149158, iteration: 351297
loss: 0.9921435713768005,grad_norm: 0.8294836669922953, iteration: 351298
loss: 1.017493486404419,grad_norm: 0.863842496996748, iteration: 351299
loss: 1.006651520729065,grad_norm: 0.8790391692350471, iteration: 351300
loss: 0.9750174880027771,grad_norm: 0.8710083889903957, iteration: 351301
loss: 1.0036832094192505,grad_norm: 0.8709777183819392, iteration: 351302
loss: 1.0272216796875,grad_norm: 0.8896188728752646, iteration: 351303
loss: 1.0118145942687988,grad_norm: 0.7528082373450175, iteration: 351304
loss: 1.0417226552963257,grad_norm: 0.7473561325162636, iteration: 351305
loss: 1.0562169551849365,grad_norm: 0.8018965243092387, iteration: 351306
loss: 1.053476095199585,grad_norm: 0.8336897984154485, iteration: 351307
loss: 0.9809203743934631,grad_norm: 0.8783433881604625, iteration: 351308
loss: 0.9987969398498535,grad_norm: 0.6958933515435558, iteration: 351309
loss: 1.0090326070785522,grad_norm: 0.6787774995712254, iteration: 351310
loss: 1.0102293491363525,grad_norm: 0.8736831677466519, iteration: 351311
loss: 1.054445743560791,grad_norm: 0.9999991964588211, iteration: 351312
loss: 0.9784014225006104,grad_norm: 0.8827599953672117, iteration: 351313
loss: 0.9883723855018616,grad_norm: 0.8119432341098869, iteration: 351314
loss: 1.2036951780319214,grad_norm: 0.9999991314028587, iteration: 351315
loss: 0.9861701726913452,grad_norm: 0.8002823622212656, iteration: 351316
loss: 0.9873767495155334,grad_norm: 0.9415218642347853, iteration: 351317
loss: 0.9915831089019775,grad_norm: 0.9425509018222207, iteration: 351318
loss: 1.0027402639389038,grad_norm: 0.8194565733284316, iteration: 351319
loss: 0.9677191376686096,grad_norm: 0.7213923919092029, iteration: 351320
loss: 0.9777604341506958,grad_norm: 0.9218018181841595, iteration: 351321
loss: 0.9901660680770874,grad_norm: 0.6501946595217146, iteration: 351322
loss: 0.9883895516395569,grad_norm: 0.9999993271466032, iteration: 351323
loss: 1.090366244316101,grad_norm: 0.9999999471750254, iteration: 351324
loss: 1.0369939804077148,grad_norm: 0.8624232482582369, iteration: 351325
loss: 1.030950903892517,grad_norm: 0.7468337203831413, iteration: 351326
loss: 0.9791422486305237,grad_norm: 0.9999996398547022, iteration: 351327
loss: 0.9991586208343506,grad_norm: 0.8203221977145111, iteration: 351328
loss: 0.9726841449737549,grad_norm: 0.7172669834778959, iteration: 351329
loss: 0.9568120241165161,grad_norm: 0.9999990012348685, iteration: 351330
loss: 0.9713804125785828,grad_norm: 0.9179634654217321, iteration: 351331
loss: 1.102063775062561,grad_norm: 0.999999817152609, iteration: 351332
loss: 0.9958454370498657,grad_norm: 0.7436351044177039, iteration: 351333
loss: 1.030755877494812,grad_norm: 0.9934755070982659, iteration: 351334
loss: 0.9864176511764526,grad_norm: 0.7943541331707706, iteration: 351335
loss: 1.0045464038848877,grad_norm: 0.7998596509339274, iteration: 351336
loss: 0.9852078557014465,grad_norm: 0.7219680355400969, iteration: 351337
loss: 1.0168083906173706,grad_norm: 0.9999989723651987, iteration: 351338
loss: 0.9704038500785828,grad_norm: 0.6934757345094827, iteration: 351339
loss: 0.984596848487854,grad_norm: 0.8426976069163978, iteration: 351340
loss: 1.0123919248580933,grad_norm: 0.9481220231145308, iteration: 351341
loss: 1.0338267087936401,grad_norm: 0.9999996136807661, iteration: 351342
loss: 1.0263227224349976,grad_norm: 0.7305690030348329, iteration: 351343
loss: 0.9955856800079346,grad_norm: 0.6826957445673728, iteration: 351344
loss: 1.0117141008377075,grad_norm: 0.7101758908369376, iteration: 351345
loss: 1.0082793235778809,grad_norm: 0.9993795626758749, iteration: 351346
loss: 0.9575507640838623,grad_norm: 0.7763984685507095, iteration: 351347
loss: 0.9874194860458374,grad_norm: 0.9750817136099682, iteration: 351348
loss: 1.0107618570327759,grad_norm: 0.7848823754465439, iteration: 351349
loss: 1.0016214847564697,grad_norm: 0.7902062807240344, iteration: 351350
loss: 1.0127172470092773,grad_norm: 0.843682342242447, iteration: 351351
loss: 1.007352352142334,grad_norm: 0.8257259104287379, iteration: 351352
loss: 1.0134533643722534,grad_norm: 0.9582155437448118, iteration: 351353
loss: 1.0429039001464844,grad_norm: 0.9999990124969202, iteration: 351354
loss: 1.017673134803772,grad_norm: 0.6984965161455917, iteration: 351355
loss: 1.0097341537475586,grad_norm: 0.9999990236324636, iteration: 351356
loss: 0.9903916120529175,grad_norm: 0.9999990918093982, iteration: 351357
loss: 1.1773039102554321,grad_norm: 0.9999994400499697, iteration: 351358
loss: 1.0018820762634277,grad_norm: 0.7235055338009884, iteration: 351359
loss: 1.0250076055526733,grad_norm: 0.9999993722473288, iteration: 351360
loss: 1.0171034336090088,grad_norm: 0.8633229565667078, iteration: 351361
loss: 0.9835020303726196,grad_norm: 0.6942027740284598, iteration: 351362
loss: 1.021667242050171,grad_norm: 0.7708792987958082, iteration: 351363
loss: 1.002781867980957,grad_norm: 0.8049474600669719, iteration: 351364
loss: 0.9631720185279846,grad_norm: 0.8873471466245443, iteration: 351365
loss: 1.0196926593780518,grad_norm: 0.7722930376363646, iteration: 351366
loss: 0.9824742078781128,grad_norm: 0.8573063475769278, iteration: 351367
loss: 1.003092646598816,grad_norm: 0.698915857549387, iteration: 351368
loss: 0.9440409541130066,grad_norm: 0.9931184703246357, iteration: 351369
loss: 0.9795292615890503,grad_norm: 0.8887001877855927, iteration: 351370
loss: 1.0796122550964355,grad_norm: 0.7863792502250296, iteration: 351371
loss: 0.9753526449203491,grad_norm: 0.7406328567914794, iteration: 351372
loss: 1.03135347366333,grad_norm: 0.8039466914047855, iteration: 351373
loss: 0.9835959076881409,grad_norm: 0.7536818406167801, iteration: 351374
loss: 1.0204354524612427,grad_norm: 0.900171674001911, iteration: 351375
loss: 0.9980121850967407,grad_norm: 0.8513208456305342, iteration: 351376
loss: 0.9869681596755981,grad_norm: 0.8113290100654746, iteration: 351377
loss: 0.9884196519851685,grad_norm: 0.9129215100317739, iteration: 351378
loss: 1.0355052947998047,grad_norm: 0.8695426788878244, iteration: 351379
loss: 1.011195182800293,grad_norm: 0.8339604922655347, iteration: 351380
loss: 1.0115810632705688,grad_norm: 0.9999989923187892, iteration: 351381
loss: 0.972810685634613,grad_norm: 0.7838820824875259, iteration: 351382
loss: 1.1024034023284912,grad_norm: 0.9910678494528236, iteration: 351383
loss: 1.0269235372543335,grad_norm: 0.9245052407056897, iteration: 351384
loss: 1.0126327276229858,grad_norm: 0.8638593239708782, iteration: 351385
loss: 0.992877185344696,grad_norm: 0.9056156574545287, iteration: 351386
loss: 0.9866621494293213,grad_norm: 0.7726035854876084, iteration: 351387
loss: 1.0969650745391846,grad_norm: 0.9999998722279265, iteration: 351388
loss: 1.0197192430496216,grad_norm: 0.7593442926058863, iteration: 351389
loss: 1.0313975811004639,grad_norm: 0.8256516298333191, iteration: 351390
loss: 0.9972695112228394,grad_norm: 0.8675855573986533, iteration: 351391
loss: 1.0075613260269165,grad_norm: 0.7355783575626205, iteration: 351392
loss: 1.0102556943893433,grad_norm: 0.9999991963085003, iteration: 351393
loss: 0.9786478877067566,grad_norm: 0.677763986284629, iteration: 351394
loss: 1.1015286445617676,grad_norm: 0.9999998686212778, iteration: 351395
loss: 0.9790051579475403,grad_norm: 0.8859659682190418, iteration: 351396
loss: 1.021296501159668,grad_norm: 0.9999990548069163, iteration: 351397
loss: 1.0035619735717773,grad_norm: 0.8507283492547589, iteration: 351398
loss: 0.9617568850517273,grad_norm: 0.9078107698547605, iteration: 351399
loss: 1.0051906108856201,grad_norm: 0.6826169925609135, iteration: 351400
loss: 0.9828441143035889,grad_norm: 0.9929686825164835, iteration: 351401
loss: 1.0357106924057007,grad_norm: 0.8421284345008294, iteration: 351402
loss: 1.061456561088562,grad_norm: 0.853307559176864, iteration: 351403
loss: 1.0196521282196045,grad_norm: 0.9858052079971895, iteration: 351404
loss: 1.0083513259887695,grad_norm: 0.8767594842134127, iteration: 351405
loss: 1.0160456895828247,grad_norm: 0.7208986546337228, iteration: 351406
loss: 0.9796285629272461,grad_norm: 0.8838839986312389, iteration: 351407
loss: 0.9800300598144531,grad_norm: 0.884760091809901, iteration: 351408
loss: 0.9714572429656982,grad_norm: 0.8109586734081592, iteration: 351409
loss: 0.9628146290779114,grad_norm: 0.9081116361609847, iteration: 351410
loss: 0.9937095046043396,grad_norm: 0.8092942918454821, iteration: 351411
loss: 0.9948793053627014,grad_norm: 0.8565466704279183, iteration: 351412
loss: 1.0537065267562866,grad_norm: 0.7761892027036627, iteration: 351413
loss: 1.019726276397705,grad_norm: 0.7045158987412617, iteration: 351414
loss: 1.0455596446990967,grad_norm: 0.7960686489631075, iteration: 351415
loss: 1.0056133270263672,grad_norm: 0.6982733086602285, iteration: 351416
loss: 0.9834877252578735,grad_norm: 0.8087373018239391, iteration: 351417
loss: 0.9763099551200867,grad_norm: 0.8703918397283056, iteration: 351418
loss: 1.0114442110061646,grad_norm: 0.8330314694147127, iteration: 351419
loss: 0.9971749782562256,grad_norm: 0.8791118971013056, iteration: 351420
loss: 1.1348012685775757,grad_norm: 0.9999991512004206, iteration: 351421
loss: 1.194800615310669,grad_norm: 0.9999992080033905, iteration: 351422
loss: 0.9974895715713501,grad_norm: 0.9368607017665279, iteration: 351423
loss: 1.0151029825210571,grad_norm: 0.9157120698750258, iteration: 351424
loss: 1.0322760343551636,grad_norm: 0.9999991542742386, iteration: 351425
loss: 0.989090621471405,grad_norm: 0.9058441219726152, iteration: 351426
loss: 0.9721684455871582,grad_norm: 0.7335693864546031, iteration: 351427
loss: 0.9902411103248596,grad_norm: 0.9415371299892286, iteration: 351428
loss: 0.9939284324645996,grad_norm: 0.8739329522402527, iteration: 351429
loss: 1.0089267492294312,grad_norm: 0.7928278263743314, iteration: 351430
loss: 0.9936695694923401,grad_norm: 0.8534425459555615, iteration: 351431
loss: 1.0029464960098267,grad_norm: 0.8184325995074839, iteration: 351432
loss: 1.0283859968185425,grad_norm: 0.6189072772728443, iteration: 351433
loss: 0.9855406880378723,grad_norm: 0.7102377858949882, iteration: 351434
loss: 0.9787463545799255,grad_norm: 0.9166998113619139, iteration: 351435
loss: 0.9727094173431396,grad_norm: 0.7492104805131133, iteration: 351436
loss: 1.0338051319122314,grad_norm: 0.7736567075717025, iteration: 351437
loss: 0.9914892315864563,grad_norm: 0.7035556793242327, iteration: 351438
loss: 1.002810001373291,grad_norm: 0.7972377261631941, iteration: 351439
loss: 0.9748117923736572,grad_norm: 0.833991720841309, iteration: 351440
loss: 1.00400972366333,grad_norm: 0.9999991273173511, iteration: 351441
loss: 0.9824392199516296,grad_norm: 0.7720145623227015, iteration: 351442
loss: 1.0040485858917236,grad_norm: 0.9999993476319645, iteration: 351443
loss: 1.022205114364624,grad_norm: 0.9999990307962027, iteration: 351444
loss: 0.9863765835762024,grad_norm: 0.8456982506657234, iteration: 351445
loss: 0.9825077056884766,grad_norm: 0.8574445538333658, iteration: 351446
loss: 1.0199183225631714,grad_norm: 0.7912483021451402, iteration: 351447
loss: 1.0099037885665894,grad_norm: 0.7827646546483129, iteration: 351448
loss: 1.0243555307388306,grad_norm: 0.8469675780124399, iteration: 351449
loss: 1.0396254062652588,grad_norm: 0.9999998186040341, iteration: 351450
loss: 0.9887997508049011,grad_norm: 0.9999991018924353, iteration: 351451
loss: 1.0004841089248657,grad_norm: 0.8088206605251378, iteration: 351452
loss: 0.9945364594459534,grad_norm: 0.6008426983255707, iteration: 351453
loss: 0.9647209048271179,grad_norm: 0.8199766146876125, iteration: 351454
loss: 1.013744592666626,grad_norm: 0.7405380914021232, iteration: 351455
loss: 1.0061452388763428,grad_norm: 0.7711076140575968, iteration: 351456
loss: 1.0432533025741577,grad_norm: 0.9246619678691624, iteration: 351457
loss: 0.9654406905174255,grad_norm: 0.9999991075831257, iteration: 351458
loss: 1.023917555809021,grad_norm: 0.8935404157452341, iteration: 351459
loss: 1.0366144180297852,grad_norm: 0.999999315379308, iteration: 351460
loss: 0.9873790740966797,grad_norm: 0.9999992156404922, iteration: 351461
loss: 0.9882773756980896,grad_norm: 0.8039630039951166, iteration: 351462
loss: 0.9845665693283081,grad_norm: 0.8889166516392368, iteration: 351463
loss: 0.9603221416473389,grad_norm: 0.803674460438289, iteration: 351464
loss: 1.0663983821868896,grad_norm: 0.9513989350507329, iteration: 351465
loss: 1.0612735748291016,grad_norm: 0.965863084519274, iteration: 351466
loss: 1.0211478471755981,grad_norm: 0.958232256279965, iteration: 351467
loss: 0.9849250316619873,grad_norm: 0.9472889719145504, iteration: 351468
loss: 1.0139833688735962,grad_norm: 0.8766310974057367, iteration: 351469
loss: 0.9977740049362183,grad_norm: 0.8334263644390544, iteration: 351470
loss: 1.025884747505188,grad_norm: 0.9085619237815324, iteration: 351471
loss: 0.9964872598648071,grad_norm: 0.7365258981816531, iteration: 351472
loss: 1.0565829277038574,grad_norm: 0.804105846214706, iteration: 351473
loss: 0.9964251518249512,grad_norm: 0.8112487771610387, iteration: 351474
loss: 1.008156180381775,grad_norm: 0.8149501317795808, iteration: 351475
loss: 0.9905971884727478,grad_norm: 0.909722427608549, iteration: 351476
loss: 1.024821162223816,grad_norm: 0.7706774472992403, iteration: 351477
loss: 0.9780807495117188,grad_norm: 0.7499903107020612, iteration: 351478
loss: 0.9823191165924072,grad_norm: 0.9999999149907375, iteration: 351479
loss: 1.0390247106552124,grad_norm: 0.9999990261306138, iteration: 351480
loss: 1.0148814916610718,grad_norm: 0.6769720861377843, iteration: 351481
loss: 0.9950907230377197,grad_norm: 0.7308081095019524, iteration: 351482
loss: 0.9980599284172058,grad_norm: 0.8752921372575803, iteration: 351483
loss: 0.9832841157913208,grad_norm: 0.9012630986973613, iteration: 351484
loss: 1.0093814134597778,grad_norm: 0.735527698311003, iteration: 351485
loss: 1.002028226852417,grad_norm: 0.8166347984391467, iteration: 351486
loss: 1.0070000886917114,grad_norm: 0.8322022033398673, iteration: 351487
loss: 1.0182256698608398,grad_norm: 0.9999995271027183, iteration: 351488
loss: 1.023754358291626,grad_norm: 0.7524650875992445, iteration: 351489
loss: 1.0655819177627563,grad_norm: 0.8798916890369719, iteration: 351490
loss: 0.9848651885986328,grad_norm: 0.663514658039112, iteration: 351491
loss: 0.9905641674995422,grad_norm: 0.9790700299535663, iteration: 351492
loss: 0.9919041991233826,grad_norm: 0.8494592598824368, iteration: 351493
loss: 1.0163267850875854,grad_norm: 0.7713204013561583, iteration: 351494
loss: 0.9742054343223572,grad_norm: 0.999999184182335, iteration: 351495
loss: 1.0695439577102661,grad_norm: 0.9177293074593716, iteration: 351496
loss: 1.0013738870620728,grad_norm: 0.6765699334191141, iteration: 351497
loss: 0.9930092096328735,grad_norm: 0.9133384401285295, iteration: 351498
loss: 1.008811593055725,grad_norm: 0.8169196017280114, iteration: 351499
loss: 0.9836634993553162,grad_norm: 0.677675911015267, iteration: 351500
loss: 1.0776662826538086,grad_norm: 0.999999647896157, iteration: 351501
loss: 1.0067963600158691,grad_norm: 0.8629036113825936, iteration: 351502
loss: 1.021386742591858,grad_norm: 0.7519045559352274, iteration: 351503
loss: 1.0047037601470947,grad_norm: 0.7129597366253745, iteration: 351504
loss: 1.0859355926513672,grad_norm: 0.8334065871664351, iteration: 351505
loss: 1.0153876543045044,grad_norm: 0.8010953515028227, iteration: 351506
loss: 1.0145602226257324,grad_norm: 0.8174304730241158, iteration: 351507
loss: 1.0173124074935913,grad_norm: 0.6383113914068647, iteration: 351508
loss: 1.1544334888458252,grad_norm: 0.9999997985762163, iteration: 351509
loss: 1.0064269304275513,grad_norm: 0.9249620473044636, iteration: 351510
loss: 0.9536015391349792,grad_norm: 0.7781975721777772, iteration: 351511
loss: 0.9954988956451416,grad_norm: 0.7478906038563835, iteration: 351512
loss: 1.0438429117202759,grad_norm: 0.9872240793094061, iteration: 351513
loss: 0.9821218848228455,grad_norm: 0.7886129943246866, iteration: 351514
loss: 1.0182853937149048,grad_norm: 0.8233671817466426, iteration: 351515
loss: 1.0122809410095215,grad_norm: 0.8903276690249313, iteration: 351516
loss: 0.9615906476974487,grad_norm: 0.8178757076939493, iteration: 351517
loss: 0.9907305240631104,grad_norm: 0.7255552575729892, iteration: 351518
loss: 0.9664574861526489,grad_norm: 0.9574925770511212, iteration: 351519
loss: 1.011234998703003,grad_norm: 0.839555464957998, iteration: 351520
loss: 1.0240111351013184,grad_norm: 0.7774532238769959, iteration: 351521
loss: 1.0368417501449585,grad_norm: 0.8728765623148506, iteration: 351522
loss: 0.9562385678291321,grad_norm: 0.8472806847174019, iteration: 351523
loss: 1.0092424154281616,grad_norm: 0.9999991641099958, iteration: 351524
loss: 0.9906278848648071,grad_norm: 0.7781134794203758, iteration: 351525
loss: 0.9890923500061035,grad_norm: 0.9849048087951892, iteration: 351526
loss: 1.1228501796722412,grad_norm: 0.9999997822238553, iteration: 351527
loss: 1.0012729167938232,grad_norm: 0.9999991309239451, iteration: 351528
loss: 0.9595472812652588,grad_norm: 0.8861092857351237, iteration: 351529
loss: 1.0385645627975464,grad_norm: 0.999999139358423, iteration: 351530
loss: 0.9794691205024719,grad_norm: 0.7265801085099766, iteration: 351531
loss: 0.9757573008537292,grad_norm: 0.6636682436470659, iteration: 351532
loss: 0.9615132808685303,grad_norm: 0.768019682115044, iteration: 351533
loss: 1.0221855640411377,grad_norm: 0.8188266167755427, iteration: 351534
loss: 1.0398768186569214,grad_norm: 0.9042210146174693, iteration: 351535
loss: 1.0315991640090942,grad_norm: 0.9999993551556469, iteration: 351536
loss: 1.1212968826293945,grad_norm: 0.9999989316569866, iteration: 351537
loss: 1.007605791091919,grad_norm: 0.999999082278413, iteration: 351538
loss: 1.0010483264923096,grad_norm: 0.7537676126230808, iteration: 351539
loss: 0.9731360077857971,grad_norm: 0.8713666931185413, iteration: 351540
loss: 0.963454008102417,grad_norm: 0.8912220361823876, iteration: 351541
loss: 0.9855496287345886,grad_norm: 0.8144954713967743, iteration: 351542
loss: 1.025191307067871,grad_norm: 0.8004284122015395, iteration: 351543
loss: 1.0123000144958496,grad_norm: 0.8248399739679846, iteration: 351544
loss: 1.1035985946655273,grad_norm: 0.8525342989651676, iteration: 351545
loss: 1.0284712314605713,grad_norm: 0.9999998109394101, iteration: 351546
loss: 0.9623374342918396,grad_norm: 0.8963885618226621, iteration: 351547
loss: 1.0153533220291138,grad_norm: 0.9221019421330393, iteration: 351548
loss: 0.9990221261978149,grad_norm: 0.8576314015637515, iteration: 351549
loss: 1.0002696514129639,grad_norm: 0.8379412890364183, iteration: 351550
loss: 1.0174610614776611,grad_norm: 0.9999996605717959, iteration: 351551
loss: 0.9940982460975647,grad_norm: 0.9211897947373433, iteration: 351552
loss: 1.024988055229187,grad_norm: 0.9999992335596163, iteration: 351553
loss: 1.0502442121505737,grad_norm: 0.9134251356408707, iteration: 351554
loss: 1.0575525760650635,grad_norm: 0.9999996434987252, iteration: 351555
loss: 1.0842396020889282,grad_norm: 0.9999991985360769, iteration: 351556
loss: 0.9853364825248718,grad_norm: 0.8388709069286541, iteration: 351557
loss: 0.9962234497070312,grad_norm: 0.9999992743290316, iteration: 351558
loss: 0.9977896809577942,grad_norm: 0.7826197808947143, iteration: 351559
loss: 1.0419377088546753,grad_norm: 0.7271467483165417, iteration: 351560
loss: 0.9994215965270996,grad_norm: 0.9240004374020617, iteration: 351561
loss: 1.0018426179885864,grad_norm: 0.9999990515549754, iteration: 351562
loss: 1.0139676332473755,grad_norm: 0.6619483946794821, iteration: 351563
loss: 1.045486569404602,grad_norm: 0.7881600769927215, iteration: 351564
loss: 0.9926059246063232,grad_norm: 0.8853790142347034, iteration: 351565
loss: 0.9538007974624634,grad_norm: 0.8428184407618898, iteration: 351566
loss: 0.970107913017273,grad_norm: 0.855315966564255, iteration: 351567
loss: 0.9977974891662598,grad_norm: 0.8456576005665568, iteration: 351568
loss: 1.0039290189743042,grad_norm: 0.8062970335648181, iteration: 351569
loss: 0.9892082810401917,grad_norm: 0.6466472992107919, iteration: 351570
loss: 1.0279290676116943,grad_norm: 0.8069286169784375, iteration: 351571
loss: 0.9764139652252197,grad_norm: 0.8911391230532977, iteration: 351572
loss: 1.0368813276290894,grad_norm: 0.9022224299261348, iteration: 351573
loss: 0.9926118850708008,grad_norm: 0.9328859428857819, iteration: 351574
loss: 0.9965519309043884,grad_norm: 0.7076733121758566, iteration: 351575
loss: 1.01228928565979,grad_norm: 0.842673708769548, iteration: 351576
loss: 0.9800464510917664,grad_norm: 0.7864464084724921, iteration: 351577
loss: 0.9924625158309937,grad_norm: 0.8262716641478609, iteration: 351578
loss: 1.0370466709136963,grad_norm: 0.808719960704023, iteration: 351579
loss: 0.9950563311576843,grad_norm: 0.8306968279995579, iteration: 351580
loss: 0.9706945419311523,grad_norm: 0.7705211197540192, iteration: 351581
loss: 1.1182451248168945,grad_norm: 0.8243289772058141, iteration: 351582
loss: 0.9850540161132812,grad_norm: 0.6781800182571754, iteration: 351583
loss: 0.9996179938316345,grad_norm: 0.9251073863630612, iteration: 351584
loss: 1.002928376197815,grad_norm: 0.9060993079218757, iteration: 351585
loss: 1.1237454414367676,grad_norm: 0.9999998922546158, iteration: 351586
loss: 0.956136167049408,grad_norm: 0.810949825351356, iteration: 351587
loss: 0.9778176546096802,grad_norm: 0.8389379125972964, iteration: 351588
loss: 1.0172991752624512,grad_norm: 0.7166531595551496, iteration: 351589
loss: 1.0099644660949707,grad_norm: 0.7846563980847534, iteration: 351590
loss: 1.008366584777832,grad_norm: 0.9162251169693958, iteration: 351591
loss: 0.9916502237319946,grad_norm: 0.8170382051159881, iteration: 351592
loss: 1.0335959196090698,grad_norm: 0.8003495177067833, iteration: 351593
loss: 1.0348317623138428,grad_norm: 0.8440616773475639, iteration: 351594
loss: 1.0268832445144653,grad_norm: 0.8461476508013964, iteration: 351595
loss: 0.9501527547836304,grad_norm: 0.8702084352916979, iteration: 351596
loss: 1.0016553401947021,grad_norm: 0.8509839467031968, iteration: 351597
loss: 0.9989724159240723,grad_norm: 0.8271993267613611, iteration: 351598
loss: 1.00075101852417,grad_norm: 0.801797677728898, iteration: 351599
loss: 1.0044691562652588,grad_norm: 0.8002050170784638, iteration: 351600
loss: 0.9956509470939636,grad_norm: 0.8002994531536733, iteration: 351601
loss: 1.0360360145568848,grad_norm: 0.8417141922095362, iteration: 351602
loss: 1.0019398927688599,grad_norm: 0.7771717801731298, iteration: 351603
loss: 0.9800576567649841,grad_norm: 0.9513397556659754, iteration: 351604
loss: 1.008636474609375,grad_norm: 0.8245952984851297, iteration: 351605
loss: 1.0052945613861084,grad_norm: 0.9274523993656109, iteration: 351606
loss: 1.0275973081588745,grad_norm: 0.9948654821943677, iteration: 351607
loss: 1.0042140483856201,grad_norm: 0.8477177293952652, iteration: 351608
loss: 1.0189740657806396,grad_norm: 0.7722361254152429, iteration: 351609
loss: 0.9996857643127441,grad_norm: 0.7351833501537082, iteration: 351610
loss: 0.9953299760818481,grad_norm: 0.7565737074562006, iteration: 351611
loss: 0.9689063429832458,grad_norm: 0.8594984699939267, iteration: 351612
loss: 1.0039578676223755,grad_norm: 0.9999998272860258, iteration: 351613
loss: 1.0267928838729858,grad_norm: 0.9291326835777034, iteration: 351614
loss: 0.9935719966888428,grad_norm: 0.7525348816113875, iteration: 351615
loss: 0.9557613730430603,grad_norm: 0.8063087942786548, iteration: 351616
loss: 1.0085049867630005,grad_norm: 0.7242906995706693, iteration: 351617
loss: 0.9791868329048157,grad_norm: 0.6555956837880902, iteration: 351618
loss: 1.0007861852645874,grad_norm: 0.8540490623656528, iteration: 351619
loss: 1.0103925466537476,grad_norm: 0.7463320306460575, iteration: 351620
loss: 1.0192196369171143,grad_norm: 0.8540954347352188, iteration: 351621
loss: 0.9708375334739685,grad_norm: 0.8112207724975746, iteration: 351622
loss: 0.9769099354743958,grad_norm: 0.893922639451111, iteration: 351623
loss: 1.0185445547103882,grad_norm: 0.9999992899418483, iteration: 351624
loss: 1.006011962890625,grad_norm: 0.8229214282016852, iteration: 351625
loss: 0.9989647269248962,grad_norm: 0.8714667121805042, iteration: 351626
loss: 1.0327719449996948,grad_norm: 0.8021415915169817, iteration: 351627
loss: 0.9644677042961121,grad_norm: 0.8352541473794477, iteration: 351628
loss: 1.051787257194519,grad_norm: 0.824893437341824, iteration: 351629
loss: 0.9967222809791565,grad_norm: 0.7892218453342008, iteration: 351630
loss: 1.1172906160354614,grad_norm: 0.7997248277616842, iteration: 351631
loss: 0.9954651594161987,grad_norm: 0.8469640339566185, iteration: 351632
loss: 1.014752984046936,grad_norm: 0.8928233792368965, iteration: 351633
loss: 1.0199284553527832,grad_norm: 0.831833940714151, iteration: 351634
loss: 1.0047823190689087,grad_norm: 0.7539465509741125, iteration: 351635
loss: 0.98366379737854,grad_norm: 0.9337239795696493, iteration: 351636
loss: 0.9809943437576294,grad_norm: 0.8437259846756349, iteration: 351637
loss: 0.9666284322738647,grad_norm: 0.8722436186396052, iteration: 351638
loss: 1.0350531339645386,grad_norm: 0.9766793841504454, iteration: 351639
loss: 0.9709697961807251,grad_norm: 0.678929765052148, iteration: 351640
loss: 1.019556999206543,grad_norm: 0.698933158211159, iteration: 351641
loss: 1.023708462715149,grad_norm: 0.7139467568483436, iteration: 351642
loss: 0.984470784664154,grad_norm: 0.845987167534423, iteration: 351643
loss: 1.0455412864685059,grad_norm: 0.7309422079997665, iteration: 351644
loss: 1.0150758028030396,grad_norm: 0.9999989878533263, iteration: 351645
loss: 0.973538339138031,grad_norm: 0.7876630254448925, iteration: 351646
loss: 0.9662044644355774,grad_norm: 0.7698990254937859, iteration: 351647
loss: 0.9892624616622925,grad_norm: 0.8154318142033947, iteration: 351648
loss: 1.0241628885269165,grad_norm: 0.7617988165074622, iteration: 351649
loss: 1.0142722129821777,grad_norm: 0.7089229524260043, iteration: 351650
loss: 1.0479520559310913,grad_norm: 0.9204268884867484, iteration: 351651
loss: 1.1092692613601685,grad_norm: 0.9114593882224001, iteration: 351652
loss: 0.9776493310928345,grad_norm: 0.9999991131311262, iteration: 351653
loss: 0.9831416010856628,grad_norm: 0.815104653072022, iteration: 351654
loss: 0.9908685088157654,grad_norm: 0.8706364928238909, iteration: 351655
loss: 0.9816790819168091,grad_norm: 0.784294878989744, iteration: 351656
loss: 1.0054523944854736,grad_norm: 0.8199817589055215, iteration: 351657
loss: 1.0851478576660156,grad_norm: 0.9999999686542711, iteration: 351658
loss: 0.9576642513275146,grad_norm: 0.9458472576674025, iteration: 351659
loss: 1.0090290307998657,grad_norm: 0.7961549726126644, iteration: 351660
loss: 0.982835590839386,grad_norm: 0.8321490476857631, iteration: 351661
loss: 1.0091180801391602,grad_norm: 0.7658872332110619, iteration: 351662
loss: 1.0103626251220703,grad_norm: 0.8895323033540955, iteration: 351663
loss: 0.9536162614822388,grad_norm: 0.8942266583483754, iteration: 351664
loss: 0.9742202162742615,grad_norm: 0.8196307690619954, iteration: 351665
loss: 1.0208834409713745,grad_norm: 0.8317832356344456, iteration: 351666
loss: 1.0415928363800049,grad_norm: 0.9999990737724468, iteration: 351667
loss: 1.040878176689148,grad_norm: 0.8680213466144945, iteration: 351668
loss: 1.085080623626709,grad_norm: 0.9999999189257422, iteration: 351669
loss: 0.9932122826576233,grad_norm: 0.8133015854551601, iteration: 351670
loss: 1.0068589448928833,grad_norm: 0.7071778373793182, iteration: 351671
loss: 1.0146775245666504,grad_norm: 0.6930077501584279, iteration: 351672
loss: 0.975593626499176,grad_norm: 0.8895196110337075, iteration: 351673
loss: 0.9921970963478088,grad_norm: 0.6912361894972014, iteration: 351674
loss: 0.9787704944610596,grad_norm: 0.9999992313256347, iteration: 351675
loss: 0.9904531240463257,grad_norm: 0.8118264990702393, iteration: 351676
loss: 0.9669013023376465,grad_norm: 0.8286943125450503, iteration: 351677
loss: 0.9815475940704346,grad_norm: 0.7719894218518475, iteration: 351678
loss: 0.9651455879211426,grad_norm: 0.8785410282363205, iteration: 351679
loss: 1.0017844438552856,grad_norm: 0.7830670334981628, iteration: 351680
loss: 1.0105534791946411,grad_norm: 0.7613047045252646, iteration: 351681
loss: 1.0316684246063232,grad_norm: 0.6484890025339982, iteration: 351682
loss: 0.9672077894210815,grad_norm: 0.9287845741982543, iteration: 351683
loss: 1.0463308095932007,grad_norm: 0.9789406581470291, iteration: 351684
loss: 1.0033060312271118,grad_norm: 0.9044947228481605, iteration: 351685
loss: 0.9805435538291931,grad_norm: 0.7844876779479125, iteration: 351686
loss: 1.0330966711044312,grad_norm: 0.8533098907871252, iteration: 351687
loss: 1.0254360437393188,grad_norm: 0.8088647227400156, iteration: 351688
loss: 0.9557441473007202,grad_norm: 0.8019414735587258, iteration: 351689
loss: 0.980868399143219,grad_norm: 0.7358147986075197, iteration: 351690
loss: 1.0082697868347168,grad_norm: 0.8453886011031789, iteration: 351691
loss: 0.9856486916542053,grad_norm: 0.9964137950582672, iteration: 351692
loss: 1.0131415128707886,grad_norm: 0.999999031956221, iteration: 351693
loss: 1.0126959085464478,grad_norm: 0.8899010054796337, iteration: 351694
loss: 0.9836696982383728,grad_norm: 0.6667819228017482, iteration: 351695
loss: 0.9919333457946777,grad_norm: 0.827741832706462, iteration: 351696
loss: 0.9730649590492249,grad_norm: 0.9212488106239972, iteration: 351697
loss: 0.9972703456878662,grad_norm: 0.8390524751785988, iteration: 351698
loss: 0.9804387092590332,grad_norm: 0.7686722616988046, iteration: 351699
loss: 1.0126793384552002,grad_norm: 0.7398893588398756, iteration: 351700
loss: 1.0285824537277222,grad_norm: 0.6634721215838075, iteration: 351701
loss: 0.9651193022727966,grad_norm: 0.835999072188418, iteration: 351702
loss: 1.0156224966049194,grad_norm: 0.7568230122212166, iteration: 351703
loss: 1.0022093057632446,grad_norm: 0.7324529290059287, iteration: 351704
loss: 0.9730654358863831,grad_norm: 0.7789457188347014, iteration: 351705
loss: 0.9849774241447449,grad_norm: 0.8288807975545263, iteration: 351706
loss: 1.0070589780807495,grad_norm: 0.8320003692113714, iteration: 351707
loss: 0.9759079813957214,grad_norm: 0.8830093784816213, iteration: 351708
loss: 1.0296753644943237,grad_norm: 0.9999990374795701, iteration: 351709
loss: 0.9987984299659729,grad_norm: 0.7953293529215861, iteration: 351710
loss: 0.9815829992294312,grad_norm: 0.9999990872743311, iteration: 351711
loss: 1.0098776817321777,grad_norm: 0.8941980714087411, iteration: 351712
loss: 0.9899597764015198,grad_norm: 0.7652785512824677, iteration: 351713
loss: 0.9408077001571655,grad_norm: 0.783886185234567, iteration: 351714
loss: 0.9892492890357971,grad_norm: 0.7733705484902781, iteration: 351715
loss: 0.9528770446777344,grad_norm: 0.7855960656699692, iteration: 351716
loss: 1.043469786643982,grad_norm: 0.919410414027928, iteration: 351717
loss: 0.9711201786994934,grad_norm: 0.8387255635973081, iteration: 351718
loss: 0.9763914346694946,grad_norm: 0.7814129659949278, iteration: 351719
loss: 1.0056685209274292,grad_norm: 0.8373949978002776, iteration: 351720
loss: 0.9883852601051331,grad_norm: 0.7209138642941542, iteration: 351721
loss: 1.014854907989502,grad_norm: 0.7741136597730742, iteration: 351722
loss: 0.9553461074829102,grad_norm: 0.7563781009313, iteration: 351723
loss: 1.0176396369934082,grad_norm: 0.9075374794982806, iteration: 351724
loss: 0.9494585990905762,grad_norm: 0.7538172477313997, iteration: 351725
loss: 1.0042089223861694,grad_norm: 0.8038263420936401, iteration: 351726
loss: 1.006805658340454,grad_norm: 0.8167498892681705, iteration: 351727
loss: 0.9884719252586365,grad_norm: 0.999999048832623, iteration: 351728
loss: 1.0099759101867676,grad_norm: 0.9572683336124485, iteration: 351729
loss: 0.9984219670295715,grad_norm: 0.6202240534489349, iteration: 351730
loss: 0.998690128326416,grad_norm: 0.88214735882834, iteration: 351731
loss: 0.9701696634292603,grad_norm: 0.7588303903156511, iteration: 351732
loss: 1.0050048828125,grad_norm: 0.9331272564948674, iteration: 351733
loss: 1.0248289108276367,grad_norm: 0.956888188977446, iteration: 351734
loss: 0.9943514466285706,grad_norm: 0.8281420631154504, iteration: 351735
loss: 1.008942723274231,grad_norm: 0.7846614874347302, iteration: 351736
loss: 0.9939730763435364,grad_norm: 0.9999997195821554, iteration: 351737
loss: 1.0222433805465698,grad_norm: 0.9945920227451478, iteration: 351738
loss: 1.0069113969802856,grad_norm: 0.8786961473983609, iteration: 351739
loss: 1.0033591985702515,grad_norm: 0.8322244549520187, iteration: 351740
loss: 1.0021514892578125,grad_norm: 0.9999991151490781, iteration: 351741
loss: 1.0331919193267822,grad_norm: 0.741648327981437, iteration: 351742
loss: 1.006469964981079,grad_norm: 0.6793743722002796, iteration: 351743
loss: 1.0177299976348877,grad_norm: 0.6742833655103072, iteration: 351744
loss: 1.0298744440078735,grad_norm: 0.8871768323182793, iteration: 351745
loss: 1.023730754852295,grad_norm: 0.9999999493261419, iteration: 351746
loss: 1.0157593488693237,grad_norm: 0.9366484577058252, iteration: 351747
loss: 0.9944795370101929,grad_norm: 0.8356860132140789, iteration: 351748
loss: 0.98979651927948,grad_norm: 0.8895951371492605, iteration: 351749
loss: 0.9692093729972839,grad_norm: 0.9648497981020602, iteration: 351750
loss: 0.9760293364524841,grad_norm: 0.8485258380988897, iteration: 351751
loss: 0.9651041030883789,grad_norm: 0.8977691502704881, iteration: 351752
loss: 1.0195287466049194,grad_norm: 0.9732439613292514, iteration: 351753
loss: 0.9641699194908142,grad_norm: 0.7491395471575371, iteration: 351754
loss: 1.003326177597046,grad_norm: 0.7818452241106076, iteration: 351755
loss: 0.9892950654029846,grad_norm: 0.8682458374958671, iteration: 351756
loss: 1.003684639930725,grad_norm: 0.7158256111371937, iteration: 351757
loss: 1.0072773694992065,grad_norm: 0.8745964319238632, iteration: 351758
loss: 1.027665376663208,grad_norm: 0.8868315856381064, iteration: 351759
loss: 1.00413179397583,grad_norm: 0.7970896885987975, iteration: 351760
loss: 0.9943224787712097,grad_norm: 0.907875727680337, iteration: 351761
loss: 1.0100455284118652,grad_norm: 0.7088179748726116, iteration: 351762
loss: 0.9955087304115295,grad_norm: 0.7747250748663593, iteration: 351763
loss: 1.046826720237732,grad_norm: 0.9268815401179281, iteration: 351764
loss: 1.019457221031189,grad_norm: 0.6962315355022849, iteration: 351765
loss: 1.0160179138183594,grad_norm: 0.8711537373644777, iteration: 351766
loss: 1.0107876062393188,grad_norm: 0.8425014371046547, iteration: 351767
loss: 0.9928882122039795,grad_norm: 0.9329523988690493, iteration: 351768
loss: 0.9721499681472778,grad_norm: 0.7280260145837417, iteration: 351769
loss: 0.9828978180885315,grad_norm: 0.9999992638395324, iteration: 351770
loss: 0.9848716855049133,grad_norm: 0.7641064428060539, iteration: 351771
loss: 0.9849397540092468,grad_norm: 0.8756718019391493, iteration: 351772
loss: 1.0143706798553467,grad_norm: 0.7339503867386324, iteration: 351773
loss: 1.005195140838623,grad_norm: 1.0000000834729277, iteration: 351774
loss: 0.9937283992767334,grad_norm: 0.8515760461136099, iteration: 351775
loss: 0.975795328617096,grad_norm: 0.7345700097848044, iteration: 351776
loss: 1.0020208358764648,grad_norm: 0.7957125344408988, iteration: 351777
loss: 1.0196624994277954,grad_norm: 0.6683815482705692, iteration: 351778
loss: 0.9653740525245667,grad_norm: 0.8642281500656138, iteration: 351779
loss: 1.0622533559799194,grad_norm: 0.8441038291192411, iteration: 351780
loss: 1.0268079042434692,grad_norm: 0.8042762736981072, iteration: 351781
loss: 0.9738105535507202,grad_norm: 0.7505127180829195, iteration: 351782
loss: 1.021863579750061,grad_norm: 0.7070798744143952, iteration: 351783
loss: 0.9886307120323181,grad_norm: 0.9999990004405538, iteration: 351784
loss: 0.9973757863044739,grad_norm: 0.6673800290039593, iteration: 351785
loss: 1.0118924379348755,grad_norm: 0.9110782810699909, iteration: 351786
loss: 1.0269043445587158,grad_norm: 0.9029836377695559, iteration: 351787
loss: 0.9712255597114563,grad_norm: 0.7373236281333837, iteration: 351788
loss: 1.1255972385406494,grad_norm: 0.8393293115745467, iteration: 351789
loss: 0.9865493178367615,grad_norm: 0.7628400556598661, iteration: 351790
loss: 0.9780114889144897,grad_norm: 0.8106273062750012, iteration: 351791
loss: 0.9874334335327148,grad_norm: 0.7960503073662423, iteration: 351792
loss: 1.0340745449066162,grad_norm: 0.9999995945662675, iteration: 351793
loss: 0.9738779067993164,grad_norm: 0.9077657227059984, iteration: 351794
loss: 0.9684489369392395,grad_norm: 0.7933395183683905, iteration: 351795
loss: 0.9704728126525879,grad_norm: 0.8456524870348245, iteration: 351796
loss: 0.9580034017562866,grad_norm: 0.7184539763818892, iteration: 351797
loss: 0.9763781428337097,grad_norm: 0.7414980243487903, iteration: 351798
loss: 0.9902650117874146,grad_norm: 0.7879939289649646, iteration: 351799
loss: 0.9928140640258789,grad_norm: 0.7318705777118815, iteration: 351800
loss: 1.0362521409988403,grad_norm: 0.9999990257927974, iteration: 351801
loss: 0.9904903173446655,grad_norm: 0.8950685823125779, iteration: 351802
loss: 0.982096791267395,grad_norm: 0.8420422342415899, iteration: 351803
loss: 0.9590237140655518,grad_norm: 0.8154595420001796, iteration: 351804
loss: 0.982331395149231,grad_norm: 0.8077727460852476, iteration: 351805
loss: 0.9822993278503418,grad_norm: 0.6884408917205638, iteration: 351806
loss: 0.9768006801605225,grad_norm: 0.9366329933516234, iteration: 351807
loss: 1.0065847635269165,grad_norm: 0.7488831812268555, iteration: 351808
loss: 0.9805740714073181,grad_norm: 0.8464860079211758, iteration: 351809
loss: 0.9832366704940796,grad_norm: 0.8513006200807725, iteration: 351810
loss: 0.9940494894981384,grad_norm: 0.6935590220835249, iteration: 351811
loss: 0.9824420809745789,grad_norm: 0.9132261081816401, iteration: 351812
loss: 1.017043948173523,grad_norm: 0.8853745097049888, iteration: 351813
loss: 0.9732567667961121,grad_norm: 0.7804403602986725, iteration: 351814
loss: 1.0093586444854736,grad_norm: 0.7747112661100173, iteration: 351815
loss: 0.9756721258163452,grad_norm: 0.846522320287685, iteration: 351816
loss: 0.9985111355781555,grad_norm: 0.6705287419432214, iteration: 351817
loss: 0.9905481338500977,grad_norm: 0.7487756000215785, iteration: 351818
loss: 1.0017451047897339,grad_norm: 0.6903799022820705, iteration: 351819
loss: 1.058748722076416,grad_norm: 0.8863161913290161, iteration: 351820
loss: 1.097786784172058,grad_norm: 0.8973689803342917, iteration: 351821
loss: 1.0301569700241089,grad_norm: 0.7596632079922886, iteration: 351822
loss: 0.9978310465812683,grad_norm: 0.867313180624099, iteration: 351823
loss: 0.9887928366661072,grad_norm: 0.85520532835888, iteration: 351824
loss: 1.0069648027420044,grad_norm: 0.676094038533765, iteration: 351825
loss: 1.049942970275879,grad_norm: 0.9669193922412891, iteration: 351826
loss: 1.0291672945022583,grad_norm: 0.9371687148433776, iteration: 351827
loss: 0.9811829924583435,grad_norm: 0.8874871008420024, iteration: 351828
loss: 1.0114089250564575,grad_norm: 0.9354216493473424, iteration: 351829
loss: 1.0201724767684937,grad_norm: 0.8090700782041712, iteration: 351830
loss: 0.9807232618331909,grad_norm: 0.8241254327054245, iteration: 351831
loss: 0.97174471616745,grad_norm: 0.7343281495390722, iteration: 351832
loss: 1.0276298522949219,grad_norm: 0.7947110816473689, iteration: 351833
loss: 0.9626703262329102,grad_norm: 0.8767445204079135, iteration: 351834
loss: 1.0093251466751099,grad_norm: 0.7997667494505244, iteration: 351835
loss: 1.0013059377670288,grad_norm: 0.8836468599845078, iteration: 351836
loss: 0.9952822923660278,grad_norm: 0.7619718442704542, iteration: 351837
loss: 1.0371321439743042,grad_norm: 0.9725065534044308, iteration: 351838
loss: 1.136997103691101,grad_norm: 0.9393433510421737, iteration: 351839
loss: 1.023756980895996,grad_norm: 0.8107895294970452, iteration: 351840
loss: 1.039318323135376,grad_norm: 0.905293862631222, iteration: 351841
loss: 0.977096438407898,grad_norm: 0.7256541082585667, iteration: 351842
loss: 0.9658516645431519,grad_norm: 0.7513077042154774, iteration: 351843
loss: 1.0260632038116455,grad_norm: 0.8964627310988947, iteration: 351844
loss: 1.0691355466842651,grad_norm: 0.9999997200152596, iteration: 351845
loss: 0.9908596873283386,grad_norm: 0.773030609574126, iteration: 351846
loss: 0.9631879925727844,grad_norm: 0.8808541895582777, iteration: 351847
loss: 1.0073027610778809,grad_norm: 0.8416404913096815, iteration: 351848
loss: 1.0058621168136597,grad_norm: 0.999999200974613, iteration: 351849
loss: 1.1596707105636597,grad_norm: 0.8438956148836699, iteration: 351850
loss: 0.9844051599502563,grad_norm: 0.999999149695974, iteration: 351851
loss: 0.9726963043212891,grad_norm: 0.7108401433084643, iteration: 351852
loss: 1.063953161239624,grad_norm: 0.8697460002921279, iteration: 351853
loss: 0.9893861413002014,grad_norm: 0.9094900381378183, iteration: 351854
loss: 0.9888402223587036,grad_norm: 0.6964110054362055, iteration: 351855
loss: 0.9888834953308105,grad_norm: 0.8823438688151587, iteration: 351856
loss: 1.0185449123382568,grad_norm: 0.906114453904332, iteration: 351857
loss: 1.001699686050415,grad_norm: 0.9569876327524209, iteration: 351858
loss: 0.9877723455429077,grad_norm: 0.9085759930474177, iteration: 351859
loss: 1.1174076795578003,grad_norm: 0.9999991357657392, iteration: 351860
loss: 1.0237078666687012,grad_norm: 0.8451925641582446, iteration: 351861
loss: 0.9537768959999084,grad_norm: 0.8895505538469968, iteration: 351862
loss: 1.0134202241897583,grad_norm: 0.9443968229778031, iteration: 351863
loss: 1.000349760055542,grad_norm: 0.8235832257816812, iteration: 351864
loss: 1.007830262184143,grad_norm: 0.8280756170941208, iteration: 351865
loss: 1.053060531616211,grad_norm: 0.9999999606036155, iteration: 351866
loss: 0.9663715362548828,grad_norm: 0.7310025343671259, iteration: 351867
loss: 1.0207713842391968,grad_norm: 0.9999990083722634, iteration: 351868
loss: 0.9682528972625732,grad_norm: 0.7345140572148604, iteration: 351869
loss: 0.9776936769485474,grad_norm: 0.9999997780121828, iteration: 351870
loss: 0.9981130361557007,grad_norm: 0.8982259021562607, iteration: 351871
loss: 0.9855617880821228,grad_norm: 0.8156676300776431, iteration: 351872
loss: 0.9731555581092834,grad_norm: 0.8382096193746782, iteration: 351873
loss: 1.0090259313583374,grad_norm: 0.8890400370201355, iteration: 351874
loss: 1.0333774089813232,grad_norm: 0.8008471486745066, iteration: 351875
loss: 1.0620200634002686,grad_norm: 0.9090021693526249, iteration: 351876
loss: 1.018424153327942,grad_norm: 0.6397238435532873, iteration: 351877
loss: 1.0107512474060059,grad_norm: 0.6905493465816387, iteration: 351878
loss: 0.9809688329696655,grad_norm: 0.8639187629034771, iteration: 351879
loss: 1.003537893295288,grad_norm: 0.780756589129737, iteration: 351880
loss: 0.9711543917655945,grad_norm: 0.9999989981746688, iteration: 351881
loss: 0.9899821877479553,grad_norm: 0.8047785971393389, iteration: 351882
loss: 0.9773831963539124,grad_norm: 0.8016149286780239, iteration: 351883
loss: 1.0176705121994019,grad_norm: 0.8319770345916898, iteration: 351884
loss: 1.0118019580841064,grad_norm: 0.999999139522525, iteration: 351885
loss: 1.0015517473220825,grad_norm: 0.6825142017904171, iteration: 351886
loss: 0.9969795942306519,grad_norm: 0.789512006549734, iteration: 351887
loss: 0.9957749843597412,grad_norm: 0.7437376255275698, iteration: 351888
loss: 1.0322812795639038,grad_norm: 0.7973219730691288, iteration: 351889
loss: 1.0016872882843018,grad_norm: 0.7829185872585092, iteration: 351890
loss: 1.0199766159057617,grad_norm: 0.9999990837217712, iteration: 351891
loss: 1.0025320053100586,grad_norm: 0.7436963907035973, iteration: 351892
loss: 1.0170305967330933,grad_norm: 0.9999992823661047, iteration: 351893
loss: 1.0132598876953125,grad_norm: 0.853185339164549, iteration: 351894
loss: 1.007035255432129,grad_norm: 0.7357969467995531, iteration: 351895
loss: 0.9546082615852356,grad_norm: 0.9505699965123848, iteration: 351896
loss: 0.9972347617149353,grad_norm: 0.8294968847571899, iteration: 351897
loss: 0.9818373918533325,grad_norm: 0.8176722027262826, iteration: 351898
loss: 0.9966757893562317,grad_norm: 0.7532235848811203, iteration: 351899
loss: 1.2405844926834106,grad_norm: 0.9999990649126473, iteration: 351900
loss: 0.9773718118667603,grad_norm: 0.8487580929113422, iteration: 351901
loss: 1.0355342626571655,grad_norm: 0.8083403102136613, iteration: 351902
loss: 1.0185391902923584,grad_norm: 0.9041958925587626, iteration: 351903
loss: 1.0120460987091064,grad_norm: 0.9725400794102173, iteration: 351904
loss: 0.9917681217193604,grad_norm: 0.6282631761916746, iteration: 351905
loss: 1.0081895589828491,grad_norm: 0.9999999424606939, iteration: 351906
loss: 0.9922358393669128,grad_norm: 0.7491731659717197, iteration: 351907
loss: 1.0207990407943726,grad_norm: 0.8069352595999679, iteration: 351908
loss: 1.014316439628601,grad_norm: 0.8200340196959813, iteration: 351909
loss: 1.046829342842102,grad_norm: 0.9373403383819315, iteration: 351910
loss: 1.0103685855865479,grad_norm: 0.88430383504737, iteration: 351911
loss: 0.9335028529167175,grad_norm: 0.8253004448394954, iteration: 351912
loss: 0.9725017547607422,grad_norm: 0.6979479670442921, iteration: 351913
loss: 1.0198030471801758,grad_norm: 0.8827490386687573, iteration: 351914
loss: 1.0025399923324585,grad_norm: 0.699543859293246, iteration: 351915
loss: 1.0252392292022705,grad_norm: 0.9999991354206397, iteration: 351916
loss: 1.0044214725494385,grad_norm: 0.6964249455445279, iteration: 351917
loss: 0.9840932488441467,grad_norm: 0.9953925439663368, iteration: 351918
loss: 0.97799152135849,grad_norm: 0.9999989713760662, iteration: 351919
loss: 0.9994623064994812,grad_norm: 0.9999995112773197, iteration: 351920
loss: 0.9698330163955688,grad_norm: 0.9191238074708076, iteration: 351921
loss: 0.9941611289978027,grad_norm: 0.7229235135307879, iteration: 351922
loss: 1.0007708072662354,grad_norm: 0.9186457099314231, iteration: 351923
loss: 0.9884809851646423,grad_norm: 0.9999991136105075, iteration: 351924
loss: 1.010119915008545,grad_norm: 0.7157517983493246, iteration: 351925
loss: 0.9919658899307251,grad_norm: 0.7371403474176244, iteration: 351926
loss: 0.9984458088874817,grad_norm: 0.7292195554795851, iteration: 351927
loss: 1.018300175666809,grad_norm: 0.8123566549206449, iteration: 351928
loss: 1.009486436843872,grad_norm: 0.8701957674842801, iteration: 351929
loss: 1.0348589420318604,grad_norm: 0.9999991791879868, iteration: 351930
loss: 0.994422435760498,grad_norm: 0.9999992321743999, iteration: 351931
loss: 0.9905275106430054,grad_norm: 0.8404338294447757, iteration: 351932
loss: 0.998593270778656,grad_norm: 0.7504520397009318, iteration: 351933
loss: 0.9649021625518799,grad_norm: 0.942883064097245, iteration: 351934
loss: 0.9770329594612122,grad_norm: 0.8828256604968189, iteration: 351935
loss: 1.0007379055023193,grad_norm: 0.729119814362269, iteration: 351936
loss: 1.024200201034546,grad_norm: 0.7546186684184986, iteration: 351937
loss: 0.9810661673545837,grad_norm: 0.9999990560059532, iteration: 351938
loss: 1.0056958198547363,grad_norm: 0.9114297633788274, iteration: 351939
loss: 1.0054618120193481,grad_norm: 0.9086712401754159, iteration: 351940
loss: 0.9966721534729004,grad_norm: 0.8693034027111006, iteration: 351941
loss: 1.032510757446289,grad_norm: 0.7887134747763465, iteration: 351942
loss: 1.0006842613220215,grad_norm: 0.711162119331321, iteration: 351943
loss: 1.015714406967163,grad_norm: 0.8689245111004417, iteration: 351944
loss: 1.0143319368362427,grad_norm: 0.7937547355312209, iteration: 351945
loss: 1.008431315422058,grad_norm: 0.6986422817511927, iteration: 351946
loss: 0.9866589307785034,grad_norm: 0.9999992296133162, iteration: 351947
loss: 0.9971187710762024,grad_norm: 0.6711473793550091, iteration: 351948
loss: 0.9846174716949463,grad_norm: 0.7141456741491631, iteration: 351949
loss: 1.023518443107605,grad_norm: 0.9999992815588262, iteration: 351950
loss: 1.034932017326355,grad_norm: 0.7516767006232289, iteration: 351951
loss: 0.9726981520652771,grad_norm: 0.8886713388094384, iteration: 351952
loss: 1.026477336883545,grad_norm: 0.8292531426153825, iteration: 351953
loss: 0.9878811240196228,grad_norm: 0.8253543367395282, iteration: 351954
loss: 1.0055190324783325,grad_norm: 0.9406255201274389, iteration: 351955
loss: 1.018217921257019,grad_norm: 0.8838474775624751, iteration: 351956
loss: 1.0216469764709473,grad_norm: 0.8619220731856218, iteration: 351957
loss: 0.9783039093017578,grad_norm: 0.8402842772870798, iteration: 351958
loss: 0.9589336514472961,grad_norm: 0.795262204835442, iteration: 351959
loss: 0.9860731959342957,grad_norm: 0.8319168600881535, iteration: 351960
loss: 0.9801743030548096,grad_norm: 0.9999990576239788, iteration: 351961
loss: 1.0025744438171387,grad_norm: 0.9999989882962614, iteration: 351962
loss: 1.0289227962493896,grad_norm: 0.999999157480456, iteration: 351963
loss: 1.0162962675094604,grad_norm: 0.9999993417926569, iteration: 351964
loss: 1.0012396574020386,grad_norm: 0.9860196811175737, iteration: 351965
loss: 1.0570513010025024,grad_norm: 0.9999990239088808, iteration: 351966
loss: 0.9789537787437439,grad_norm: 0.9798328195172298, iteration: 351967
loss: 1.000162124633789,grad_norm: 0.80044372282906, iteration: 351968
loss: 0.9871105551719666,grad_norm: 0.9615554811943847, iteration: 351969
loss: 0.9913720488548279,grad_norm: 0.9999990377594009, iteration: 351970
loss: 1.0013550519943237,grad_norm: 0.8812699263843515, iteration: 351971
loss: 0.9913485646247864,grad_norm: 0.9999992816972889, iteration: 351972
loss: 1.0396426916122437,grad_norm: 0.9270053372374726, iteration: 351973
loss: 1.012834906578064,grad_norm: 0.8394633371803701, iteration: 351974
loss: 1.008258581161499,grad_norm: 0.9999993428632484, iteration: 351975
loss: 0.9950302243232727,grad_norm: 0.7702267975202922, iteration: 351976
loss: 0.9931489825248718,grad_norm: 0.7824986652573541, iteration: 351977
loss: 1.0040004253387451,grad_norm: 0.7829522262564007, iteration: 351978
loss: 1.0103719234466553,grad_norm: 0.834438018108098, iteration: 351979
loss: 0.9767579436302185,grad_norm: 0.8595279193240172, iteration: 351980
loss: 0.9829124808311462,grad_norm: 0.8094877341684102, iteration: 351981
loss: 0.9948654174804688,grad_norm: 0.7393956784344513, iteration: 351982
loss: 1.0271724462509155,grad_norm: 0.7601754268106212, iteration: 351983
loss: 1.0020443201065063,grad_norm: 0.8292483947558159, iteration: 351984
loss: 0.9719128608703613,grad_norm: 0.7988538755256681, iteration: 351985
loss: 0.9905020594596863,grad_norm: 0.6845753912209089, iteration: 351986
loss: 0.9812554121017456,grad_norm: 0.8405542297002275, iteration: 351987
loss: 0.9655858874320984,grad_norm: 0.8645904627539777, iteration: 351988
loss: 0.9796079993247986,grad_norm: 0.7880909591722768, iteration: 351989
loss: 1.005359172821045,grad_norm: 0.7591232888825149, iteration: 351990
loss: 0.9812174439430237,grad_norm: 0.816639911524269, iteration: 351991
loss: 1.0397061109542847,grad_norm: 0.9514752732102577, iteration: 351992
loss: 0.9644117951393127,grad_norm: 0.8999171367430657, iteration: 351993
loss: 1.0026508569717407,grad_norm: 0.7952922992085544, iteration: 351994
loss: 0.9717738032341003,grad_norm: 0.9724552277260166, iteration: 351995
loss: 0.9822769165039062,grad_norm: 0.9999991959231099, iteration: 351996
loss: 1.0210397243499756,grad_norm: 0.8445877226204443, iteration: 351997
loss: 1.0268149375915527,grad_norm: 0.7950417004842242, iteration: 351998
loss: 1.0160194635391235,grad_norm: 0.9780935182785857, iteration: 351999
loss: 0.9985997080802917,grad_norm: 0.9972588867833317, iteration: 352000
loss: 0.9324901103973389,grad_norm: 0.7966022454730347, iteration: 352001
loss: 0.9900073409080505,grad_norm: 0.8899617327619189, iteration: 352002
loss: 0.9726153016090393,grad_norm: 0.9371053428306289, iteration: 352003
loss: 1.0851835012435913,grad_norm: 0.9999990953863771, iteration: 352004
loss: 0.9936652779579163,grad_norm: 0.8544962285277095, iteration: 352005
loss: 1.000515341758728,grad_norm: 0.8827034730464005, iteration: 352006
loss: 0.9829214215278625,grad_norm: 0.8599136269059873, iteration: 352007
loss: 1.0023324489593506,grad_norm: 0.9524830778536469, iteration: 352008
loss: 0.9935899376869202,grad_norm: 0.8993530579765648, iteration: 352009
loss: 0.9926954507827759,grad_norm: 0.8786423071778525, iteration: 352010
loss: 1.0103296041488647,grad_norm: 0.7235057521961639, iteration: 352011
loss: 1.012161374092102,grad_norm: 0.9999991314690878, iteration: 352012
loss: 1.0430022478103638,grad_norm: 0.9999999863521543, iteration: 352013
loss: 0.9918667078018188,grad_norm: 0.9195028323870822, iteration: 352014
loss: 1.0112802982330322,grad_norm: 0.8618403424576269, iteration: 352015
loss: 0.9952778816223145,grad_norm: 0.9999995268651707, iteration: 352016
loss: 1.0119117498397827,grad_norm: 0.7954133623577083, iteration: 352017
loss: 0.988243818283081,grad_norm: 0.7463804670012831, iteration: 352018
loss: 0.9887104034423828,grad_norm: 0.6975205431308958, iteration: 352019
loss: 0.984910249710083,grad_norm: 0.9999997219852752, iteration: 352020
loss: 0.9960391521453857,grad_norm: 0.7780110385987979, iteration: 352021
loss: 1.0222530364990234,grad_norm: 0.7074772203963623, iteration: 352022
loss: 0.9777131080627441,grad_norm: 0.8906884694065745, iteration: 352023
loss: 0.9508380889892578,grad_norm: 0.9999989586676555, iteration: 352024
loss: 1.0219395160675049,grad_norm: 0.904469942591503, iteration: 352025
loss: 1.0025194883346558,grad_norm: 0.6931535531746639, iteration: 352026
loss: 0.9803879857063293,grad_norm: 0.8068097968422298, iteration: 352027
loss: 0.9906725883483887,grad_norm: 0.8832976664319254, iteration: 352028
loss: 1.1650559902191162,grad_norm: 0.9999996573537601, iteration: 352029
loss: 0.9851061105728149,grad_norm: 0.7146637532156426, iteration: 352030
loss: 1.0041072368621826,grad_norm: 0.8007534777768559, iteration: 352031
loss: 1.0121979713439941,grad_norm: 0.9999994053077005, iteration: 352032
loss: 1.007737159729004,grad_norm: 0.708439237250847, iteration: 352033
loss: 1.0060222148895264,grad_norm: 0.6641353421065127, iteration: 352034
loss: 0.9903640747070312,grad_norm: 0.7794191630355917, iteration: 352035
loss: 0.9899241924285889,grad_norm: 0.8848045462705287, iteration: 352036
loss: 1.0136607885360718,grad_norm: 0.7496638985953401, iteration: 352037
loss: 1.0144197940826416,grad_norm: 0.8719259737245231, iteration: 352038
loss: 0.980995237827301,grad_norm: 0.696993589606053, iteration: 352039
loss: 1.0334100723266602,grad_norm: 0.8250307681613263, iteration: 352040
loss: 0.9666255116462708,grad_norm: 0.7619921481479085, iteration: 352041
loss: 0.9862146973609924,grad_norm: 0.9330312126751295, iteration: 352042
loss: 0.9980819821357727,grad_norm: 0.755844624410978, iteration: 352043
loss: 0.9830671548843384,grad_norm: 0.8646157910218442, iteration: 352044
loss: 1.0340921878814697,grad_norm: 0.7930826440740687, iteration: 352045
loss: 1.0808649063110352,grad_norm: 0.9166014434057672, iteration: 352046
loss: 0.9736316204071045,grad_norm: 0.9999993264671724, iteration: 352047
loss: 0.9974936842918396,grad_norm: 0.7546100437664872, iteration: 352048
loss: 1.0130581855773926,grad_norm: 0.776727204555348, iteration: 352049
loss: 0.9782171845436096,grad_norm: 0.9599815185453495, iteration: 352050
loss: 0.9626277685165405,grad_norm: 0.9999992099998091, iteration: 352051
loss: 0.9780828356742859,grad_norm: 0.8913922133268338, iteration: 352052
loss: 0.9851619601249695,grad_norm: 0.8526394761112134, iteration: 352053
loss: 0.9509380459785461,grad_norm: 0.8739106734258492, iteration: 352054
loss: 1.031567931175232,grad_norm: 0.70724367038689, iteration: 352055
loss: 0.9680909514427185,grad_norm: 0.726869192178202, iteration: 352056
loss: 1.0120428800582886,grad_norm: 0.7427807328979973, iteration: 352057
loss: 1.0056859254837036,grad_norm: 0.8041241129533299, iteration: 352058
loss: 1.173866629600525,grad_norm: 0.9999991184211123, iteration: 352059
loss: 0.9933522939682007,grad_norm: 0.7769916887199877, iteration: 352060
loss: 0.992911696434021,grad_norm: 0.7706255007258797, iteration: 352061
loss: 1.0173343420028687,grad_norm: 0.9068808564567384, iteration: 352062
loss: 1.0011543035507202,grad_norm: 0.8941106217499153, iteration: 352063
loss: 0.9600667953491211,grad_norm: 0.8481054023282885, iteration: 352064
loss: 0.975545346736908,grad_norm: 0.7041162018725446, iteration: 352065
loss: 0.998407781124115,grad_norm: 0.8633464038563997, iteration: 352066
loss: 0.9713574647903442,grad_norm: 0.6797728103157, iteration: 352067
loss: 1.0061208009719849,grad_norm: 0.723511504591066, iteration: 352068
loss: 0.9643723964691162,grad_norm: 0.8191966044761786, iteration: 352069
loss: 0.9676637053489685,grad_norm: 0.6613629362143637, iteration: 352070
loss: 1.0041040182113647,grad_norm: 0.8320445123605378, iteration: 352071
loss: 1.0279425382614136,grad_norm: 0.6795039361296894, iteration: 352072
loss: 1.0133826732635498,grad_norm: 0.7567133309593451, iteration: 352073
loss: 0.9698349833488464,grad_norm: 0.8312627589834274, iteration: 352074
loss: 1.0067230463027954,grad_norm: 0.8543206062429055, iteration: 352075
loss: 1.0139011144638062,grad_norm: 0.8309479882505181, iteration: 352076
loss: 0.9841758012771606,grad_norm: 0.8766725335771839, iteration: 352077
loss: 0.96942538022995,grad_norm: 0.8721258917023648, iteration: 352078
loss: 1.044136643409729,grad_norm: 0.8747169074332013, iteration: 352079
loss: 0.9909723997116089,grad_norm: 0.9544548972563346, iteration: 352080
loss: 0.9998252391815186,grad_norm: 0.7797514320648073, iteration: 352081
loss: 1.0361926555633545,grad_norm: 0.7884138741448865, iteration: 352082
loss: 0.9910027384757996,grad_norm: 0.6839093630287685, iteration: 352083
loss: 0.9737118482589722,grad_norm: 0.7449477259969215, iteration: 352084
loss: 0.9979891180992126,grad_norm: 0.7516463089463665, iteration: 352085
loss: 0.9878418445587158,grad_norm: 0.8418248032182353, iteration: 352086
loss: 1.0231194496154785,grad_norm: 0.9999997349823758, iteration: 352087
loss: 0.9961423277854919,grad_norm: 0.79617820950516, iteration: 352088
loss: 0.9926379323005676,grad_norm: 0.9620233239836256, iteration: 352089
loss: 1.0267338752746582,grad_norm: 0.8502764750038226, iteration: 352090
loss: 1.0433436632156372,grad_norm: 0.9753026458372817, iteration: 352091
loss: 1.0082226991653442,grad_norm: 0.935519334307684, iteration: 352092
loss: 0.9688313603401184,grad_norm: 0.8022994306501934, iteration: 352093
loss: 1.020919919013977,grad_norm: 0.692967913164864, iteration: 352094
loss: 1.0128380060195923,grad_norm: 0.8020605908392759, iteration: 352095
loss: 1.0173799991607666,grad_norm: 0.7590654266289116, iteration: 352096
loss: 1.0262969732284546,grad_norm: 0.8825106236217165, iteration: 352097
loss: 1.1056170463562012,grad_norm: 0.9114671959389753, iteration: 352098
loss: 1.0191457271575928,grad_norm: 0.968309452098227, iteration: 352099
loss: 1.0100432634353638,grad_norm: 0.9525201372561429, iteration: 352100
loss: 0.982871949672699,grad_norm: 0.8463051138593153, iteration: 352101
loss: 1.0323219299316406,grad_norm: 0.8125441499781648, iteration: 352102
loss: 0.9953902363777161,grad_norm: 0.8620551745585149, iteration: 352103
loss: 0.9766001105308533,grad_norm: 0.909866858102692, iteration: 352104
loss: 0.9856690764427185,grad_norm: 0.7737240151775936, iteration: 352105
loss: 1.0041176080703735,grad_norm: 0.8321857711654407, iteration: 352106
loss: 0.9894876480102539,grad_norm: 0.8229872288057886, iteration: 352107
loss: 0.9894129633903503,grad_norm: 0.8670607972200645, iteration: 352108
loss: 1.0476875305175781,grad_norm: 0.8836439526356399, iteration: 352109
loss: 0.9600562453269958,grad_norm: 0.7459828530009338, iteration: 352110
loss: 0.9873652458190918,grad_norm: 0.703651135410831, iteration: 352111
loss: 0.9831190705299377,grad_norm: 0.7468323638556987, iteration: 352112
loss: 0.9915087223052979,grad_norm: 0.77131018309529, iteration: 352113
loss: 0.9680159091949463,grad_norm: 0.7994431473309431, iteration: 352114
loss: 0.9768577814102173,grad_norm: 0.9642601415850585, iteration: 352115
loss: 0.9771138429641724,grad_norm: 0.8016736312150097, iteration: 352116
loss: 0.9802668690681458,grad_norm: 0.7807692263973623, iteration: 352117
loss: 1.0169501304626465,grad_norm: 0.7757917298469478, iteration: 352118
loss: 1.0050222873687744,grad_norm: 0.9030296499705882, iteration: 352119
loss: 1.0027347803115845,grad_norm: 0.7063301179631452, iteration: 352120
loss: 1.032106876373291,grad_norm: 0.8811755851116029, iteration: 352121
loss: 1.0079760551452637,grad_norm: 0.7256172542062271, iteration: 352122
loss: 1.000260829925537,grad_norm: 0.9999993282341844, iteration: 352123
loss: 0.9754313230514526,grad_norm: 0.7397972559004348, iteration: 352124
loss: 1.0006893873214722,grad_norm: 0.778904893632324, iteration: 352125
loss: 1.0060944557189941,grad_norm: 0.8605779914334794, iteration: 352126
loss: 0.9857991337776184,grad_norm: 0.8947885522238137, iteration: 352127
loss: 1.0121138095855713,grad_norm: 0.9766291082305281, iteration: 352128
loss: 1.0078448057174683,grad_norm: 0.904440464612372, iteration: 352129
loss: 0.9938907027244568,grad_norm: 0.6710793459702327, iteration: 352130
loss: 1.014647364616394,grad_norm: 0.805791901755636, iteration: 352131
loss: 0.9968117475509644,grad_norm: 0.9999990389011809, iteration: 352132
loss: 0.9983147382736206,grad_norm: 0.7456888293015023, iteration: 352133
loss: 1.005362868309021,grad_norm: 0.8466324678833955, iteration: 352134
loss: 1.016358494758606,grad_norm: 0.9244845811618364, iteration: 352135
loss: 1.0036848783493042,grad_norm: 0.7944282564443657, iteration: 352136
loss: 1.0271743535995483,grad_norm: 0.8899985304579858, iteration: 352137
loss: 0.9930639863014221,grad_norm: 0.7255057372573025, iteration: 352138
loss: 0.9993727803230286,grad_norm: 0.8195007546048849, iteration: 352139
loss: 1.0683950185775757,grad_norm: 0.9999990784619978, iteration: 352140
loss: 0.9908694624900818,grad_norm: 0.8174862866336092, iteration: 352141
loss: 0.9863197207450867,grad_norm: 0.7405602344262332, iteration: 352142
loss: 1.0255577564239502,grad_norm: 0.839959155845708, iteration: 352143
loss: 1.0036745071411133,grad_norm: 0.7574938217209196, iteration: 352144
loss: 0.987952709197998,grad_norm: 0.789085253143363, iteration: 352145
loss: 1.0099138021469116,grad_norm: 0.6585051645996409, iteration: 352146
loss: 1.046950340270996,grad_norm: 0.6988350095941832, iteration: 352147
loss: 0.9935818910598755,grad_norm: 0.7389507052801831, iteration: 352148
loss: 0.9808169603347778,grad_norm: 0.8408918775955456, iteration: 352149
loss: 1.122253656387329,grad_norm: 0.9999999166471057, iteration: 352150
loss: 0.9836965799331665,grad_norm: 0.8345580282023015, iteration: 352151
loss: 0.9875195026397705,grad_norm: 0.8669385413401698, iteration: 352152
loss: 0.9865954518318176,grad_norm: 0.7948071339473453, iteration: 352153
loss: 1.0069502592086792,grad_norm: 0.8767296488247145, iteration: 352154
loss: 1.0058621168136597,grad_norm: 0.9999998744379618, iteration: 352155
loss: 1.0204776525497437,grad_norm: 0.8467941048449158, iteration: 352156
loss: 1.0060646533966064,grad_norm: 0.877932565734317, iteration: 352157
loss: 0.9927260875701904,grad_norm: 0.8262187601800036, iteration: 352158
loss: 1.0007656812667847,grad_norm: 0.8839086374081475, iteration: 352159
loss: 0.9520193934440613,grad_norm: 0.779309226476939, iteration: 352160
loss: 1.0298105478286743,grad_norm: 0.9999991195059286, iteration: 352161
loss: 1.002004623413086,grad_norm: 0.7815887314224723, iteration: 352162
loss: 0.9937930107116699,grad_norm: 0.7319466123518845, iteration: 352163
loss: 1.0200843811035156,grad_norm: 0.8843048696997152, iteration: 352164
loss: 0.9605897665023804,grad_norm: 0.7708704248524314, iteration: 352165
loss: 0.9932143688201904,grad_norm: 0.8533185460893769, iteration: 352166
loss: 1.0039362907409668,grad_norm: 0.7459394188928324, iteration: 352167
loss: 0.9985051155090332,grad_norm: 0.7280769239091409, iteration: 352168
loss: 1.0121968984603882,grad_norm: 0.999999383854605, iteration: 352169
loss: 1.0503736734390259,grad_norm: 0.7954798121397713, iteration: 352170
loss: 1.036612868309021,grad_norm: 0.8480739963304568, iteration: 352171
loss: 0.9920017123222351,grad_norm: 0.7836099781025629, iteration: 352172
loss: 0.9961724281311035,grad_norm: 0.8063263447980079, iteration: 352173
loss: 0.9883497357368469,grad_norm: 0.8951248400433766, iteration: 352174
loss: 0.9666423201560974,grad_norm: 0.7439007421872041, iteration: 352175
loss: 0.9961963295936584,grad_norm: 0.7187312848764594, iteration: 352176
loss: 0.9699430465698242,grad_norm: 0.9999990340558627, iteration: 352177
loss: 1.0263437032699585,grad_norm: 0.8411086272136482, iteration: 352178
loss: 1.001386284828186,grad_norm: 0.8210379935382812, iteration: 352179
loss: 0.9790238738059998,grad_norm: 0.7056691848749556, iteration: 352180
loss: 1.015501856803894,grad_norm: 0.809056338069828, iteration: 352181
loss: 0.972093403339386,grad_norm: 0.6644817068124681, iteration: 352182
loss: 0.9857291579246521,grad_norm: 0.7612997668028257, iteration: 352183
loss: 1.0338608026504517,grad_norm: 0.7672841052621371, iteration: 352184
loss: 1.019041657447815,grad_norm: 0.6756425523956808, iteration: 352185
loss: 0.9746508002281189,grad_norm: 0.6817811624760471, iteration: 352186
loss: 0.9957079887390137,grad_norm: 0.8706120040053387, iteration: 352187
loss: 0.9654324650764465,grad_norm: 0.8621060586073898, iteration: 352188
loss: 0.9886053800582886,grad_norm: 0.7418386083971978, iteration: 352189
loss: 0.9891965985298157,grad_norm: 0.7861288539541085, iteration: 352190
loss: 0.961656391620636,grad_norm: 0.8015076745015696, iteration: 352191
loss: 0.9930489659309387,grad_norm: 0.9999996962405411, iteration: 352192
loss: 0.9680227041244507,grad_norm: 0.6751489403157277, iteration: 352193
loss: 1.0190045833587646,grad_norm: 0.7043035569368028, iteration: 352194
loss: 0.9791566729545593,grad_norm: 0.8253760135772359, iteration: 352195
loss: 1.0134516954421997,grad_norm: 0.8126260249808664, iteration: 352196
loss: 0.9860040545463562,grad_norm: 0.8247780504199198, iteration: 352197
loss: 1.0084408521652222,grad_norm: 0.7637377233324778, iteration: 352198
loss: 1.0023521184921265,grad_norm: 0.9999991900472119, iteration: 352199
loss: 0.9699026942253113,grad_norm: 0.8004729831691163, iteration: 352200
loss: 0.9567405581474304,grad_norm: 0.8116483873925181, iteration: 352201
loss: 1.0148333311080933,grad_norm: 0.6669040118498798, iteration: 352202
loss: 1.007053256034851,grad_norm: 0.9999996534796378, iteration: 352203
loss: 0.9976749420166016,grad_norm: 0.8474118890497272, iteration: 352204
loss: 1.0148557424545288,grad_norm: 0.6234717450448821, iteration: 352205
loss: 1.002244472503662,grad_norm: 0.767193076230409, iteration: 352206
loss: 1.0076969861984253,grad_norm: 0.9010954072488506, iteration: 352207
loss: 1.0264313220977783,grad_norm: 0.9999991912021274, iteration: 352208
loss: 1.14356529712677,grad_norm: 0.9999994985741172, iteration: 352209
loss: 1.1282554864883423,grad_norm: 0.9999998566385784, iteration: 352210
loss: 1.003623127937317,grad_norm: 0.9984903958437896, iteration: 352211
loss: 1.000527262687683,grad_norm: 0.8414887405729987, iteration: 352212
loss: 1.0040934085845947,grad_norm: 0.6829635768817558, iteration: 352213
loss: 1.0409131050109863,grad_norm: 0.8336238090934225, iteration: 352214
loss: 0.9729446768760681,grad_norm: 0.8809983384622788, iteration: 352215
loss: 0.9984253644943237,grad_norm: 0.8277411179084673, iteration: 352216
loss: 0.9909834265708923,grad_norm: 0.8407805381982346, iteration: 352217
loss: 1.00650954246521,grad_norm: 0.8490563222720836, iteration: 352218
loss: 1.0257748365402222,grad_norm: 0.9621754376461359, iteration: 352219
loss: 1.0413496494293213,grad_norm: 0.8514269130685924, iteration: 352220
loss: 1.0113589763641357,grad_norm: 0.7798624854248437, iteration: 352221
loss: 1.0302207469940186,grad_norm: 0.7367473527822331, iteration: 352222
loss: 0.9861787557601929,grad_norm: 0.8508977898793462, iteration: 352223
loss: 1.0212360620498657,grad_norm: 0.9999991153515171, iteration: 352224
loss: 1.0185799598693848,grad_norm: 0.7153601666485928, iteration: 352225
loss: 0.9961222410202026,grad_norm: 0.7751391216390284, iteration: 352226
loss: 0.9837548136711121,grad_norm: 0.8199141699198141, iteration: 352227
loss: 0.9794177412986755,grad_norm: 0.8184198180265604, iteration: 352228
loss: 1.0051164627075195,grad_norm: 0.9162107366805302, iteration: 352229
loss: 0.971347987651825,grad_norm: 0.8388461218107194, iteration: 352230
loss: 0.9760622382164001,grad_norm: 0.8762693202641424, iteration: 352231
loss: 0.9946727752685547,grad_norm: 0.7739400693783794, iteration: 352232
loss: 1.001841425895691,grad_norm: 0.7306274539479009, iteration: 352233
loss: 0.9972400069236755,grad_norm: 0.9481640101704157, iteration: 352234
loss: 0.9892305135726929,grad_norm: 0.8731533912712721, iteration: 352235
loss: 0.9962618350982666,grad_norm: 0.8333443074143108, iteration: 352236
loss: 0.9753949046134949,grad_norm: 0.8810079084256673, iteration: 352237
loss: 1.0296101570129395,grad_norm: 0.7810357534609008, iteration: 352238
loss: 0.9861655235290527,grad_norm: 0.8626149088105751, iteration: 352239
loss: 0.9536553025245667,grad_norm: 0.7961604536690053, iteration: 352240
loss: 0.9935843348503113,grad_norm: 0.7643647875453435, iteration: 352241
loss: 0.9939919114112854,grad_norm: 0.8425052389515658, iteration: 352242
loss: 0.9600657224655151,grad_norm: 0.8313506189204564, iteration: 352243
loss: 1.0124374628067017,grad_norm: 0.9999996827687055, iteration: 352244
loss: 0.9943668842315674,grad_norm: 0.8116444484126165, iteration: 352245
loss: 1.0322096347808838,grad_norm: 0.9999992985820291, iteration: 352246
loss: 1.048975944519043,grad_norm: 0.7744008079870788, iteration: 352247
loss: 0.9789248704910278,grad_norm: 0.6910567532075985, iteration: 352248
loss: 0.9815672039985657,grad_norm: 0.9999991231758415, iteration: 352249
loss: 1.0100253820419312,grad_norm: 0.8659297283061502, iteration: 352250
loss: 0.9732709527015686,grad_norm: 0.709004490923061, iteration: 352251
loss: 1.0853277444839478,grad_norm: 0.9999994261507843, iteration: 352252
loss: 0.972329318523407,grad_norm: 0.8191102496483723, iteration: 352253
loss: 1.0144360065460205,grad_norm: 0.7915437180842276, iteration: 352254
loss: 0.9945911169052124,grad_norm: 0.8319599612779583, iteration: 352255
loss: 1.008291482925415,grad_norm: 0.8123120079060971, iteration: 352256
loss: 1.0125012397766113,grad_norm: 0.8482945683203775, iteration: 352257
loss: 1.0326814651489258,grad_norm: 0.9445858670325951, iteration: 352258
loss: 1.0003397464752197,grad_norm: 0.7958368741938576, iteration: 352259
loss: 1.023063063621521,grad_norm: 0.8140308400082259, iteration: 352260
loss: 1.015730619430542,grad_norm: 0.8711170771946577, iteration: 352261
loss: 1.0164073705673218,grad_norm: 0.955646767371909, iteration: 352262
loss: 0.973935067653656,grad_norm: 0.9999990395519506, iteration: 352263
loss: 0.9906900525093079,grad_norm: 0.9999990881306481, iteration: 352264
loss: 0.9981331825256348,grad_norm: 0.686052403619082, iteration: 352265
loss: 0.9752824306488037,grad_norm: 0.7438854324130529, iteration: 352266
loss: 0.9814643263816833,grad_norm: 0.7093814376474026, iteration: 352267
loss: 1.010316252708435,grad_norm: 0.9393641310165308, iteration: 352268
loss: 1.0140889883041382,grad_norm: 0.8711955867907151, iteration: 352269
loss: 1.0163328647613525,grad_norm: 0.8604276021156183, iteration: 352270
loss: 1.0107831954956055,grad_norm: 0.7781668972634888, iteration: 352271
loss: 1.0410951375961304,grad_norm: 0.8506464346577703, iteration: 352272
loss: 0.9855495691299438,grad_norm: 0.8071242905807159, iteration: 352273
loss: 0.9757170081138611,grad_norm: 0.7970889157428165, iteration: 352274
loss: 0.9740666151046753,grad_norm: 0.8724447002289002, iteration: 352275
loss: 1.0233485698699951,grad_norm: 0.8872071138751145, iteration: 352276
loss: 1.0153608322143555,grad_norm: 0.7509892476365971, iteration: 352277
loss: 1.0249134302139282,grad_norm: 0.9999992106564713, iteration: 352278
loss: 1.00771963596344,grad_norm: 0.8139423326470466, iteration: 352279
loss: 1.0249741077423096,grad_norm: 0.9999991137379296, iteration: 352280
loss: 0.9988366365432739,grad_norm: 0.8643949627366058, iteration: 352281
loss: 0.999394416809082,grad_norm: 0.6600151767016483, iteration: 352282
loss: 1.0168622732162476,grad_norm: 0.999999144313385, iteration: 352283
loss: 0.9671314358711243,grad_norm: 0.8285182430441839, iteration: 352284
loss: 1.0452274084091187,grad_norm: 0.8012249866819436, iteration: 352285
loss: 0.9935967922210693,grad_norm: 0.9458167480979147, iteration: 352286
loss: 1.005079984664917,grad_norm: 0.698445474207646, iteration: 352287
loss: 0.9812893271446228,grad_norm: 0.9999992110231563, iteration: 352288
loss: 0.9884498119354248,grad_norm: 0.8337093953922355, iteration: 352289
loss: 1.0115400552749634,grad_norm: 0.9072712834357104, iteration: 352290
loss: 1.0183671712875366,grad_norm: 0.6774185171275209, iteration: 352291
loss: 1.0344034433364868,grad_norm: 0.9999999201312051, iteration: 352292
loss: 0.9873835444450378,grad_norm: 0.7852152851898955, iteration: 352293
loss: 0.9586803913116455,grad_norm: 0.8156097832851912, iteration: 352294
loss: 1.041335105895996,grad_norm: 0.7964228751622734, iteration: 352295
loss: 0.9580657482147217,grad_norm: 0.8904247931328562, iteration: 352296
loss: 1.0381046533584595,grad_norm: 0.9999992520804147, iteration: 352297
loss: 0.9631410837173462,grad_norm: 0.8198713485571039, iteration: 352298
loss: 0.9993525147438049,grad_norm: 0.9430842720577642, iteration: 352299
loss: 0.9901020526885986,grad_norm: 0.864687211748722, iteration: 352300
loss: 1.0262318849563599,grad_norm: 0.78542051886505, iteration: 352301
loss: 1.0147149562835693,grad_norm: 0.7556174996832681, iteration: 352302
loss: 1.0082008838653564,grad_norm: 0.8477997229957952, iteration: 352303
loss: 0.9665766358375549,grad_norm: 0.9127717865559479, iteration: 352304
loss: 0.975287139415741,grad_norm: 0.9641110477886482, iteration: 352305
loss: 0.9761136770248413,grad_norm: 0.7061146697753301, iteration: 352306
loss: 0.9764290452003479,grad_norm: 0.7253202703479477, iteration: 352307
loss: 0.9855060577392578,grad_norm: 0.8807301907273499, iteration: 352308
loss: 0.9738219380378723,grad_norm: 0.9354407820124593, iteration: 352309
loss: 0.987126886844635,grad_norm: 0.7727944440781427, iteration: 352310
loss: 0.9998348951339722,grad_norm: 0.9003437548658767, iteration: 352311
loss: 0.9913806319236755,grad_norm: 0.9226304044106655, iteration: 352312
loss: 1.0226272344589233,grad_norm: 0.872914709759324, iteration: 352313
loss: 1.0294103622436523,grad_norm: 0.700269496860051, iteration: 352314
loss: 0.9986236691474915,grad_norm: 0.7081452015426509, iteration: 352315
loss: 1.0235395431518555,grad_norm: 0.9999993202368213, iteration: 352316
loss: 0.9932686686515808,grad_norm: 0.8818162366654756, iteration: 352317
loss: 0.9889633059501648,grad_norm: 0.8914106922933239, iteration: 352318
loss: 1.0174028873443604,grad_norm: 0.8004094899924257, iteration: 352319
loss: 1.0152478218078613,grad_norm: 0.7740170733637554, iteration: 352320
loss: 1.0140186548233032,grad_norm: 0.8303727273330674, iteration: 352321
loss: 0.9822551012039185,grad_norm: 0.9529298676401504, iteration: 352322
loss: 1.0348435640335083,grad_norm: 0.9601999776491215, iteration: 352323
loss: 0.9796236157417297,grad_norm: 0.7610560681810943, iteration: 352324
loss: 0.982390820980072,grad_norm: 0.7599872880206683, iteration: 352325
loss: 1.0006386041641235,grad_norm: 0.7346699947421853, iteration: 352326
loss: 0.9913209676742554,grad_norm: 0.8463035408710979, iteration: 352327
loss: 0.989510178565979,grad_norm: 0.8737558711054348, iteration: 352328
loss: 0.9510957598686218,grad_norm: 0.8451116623511422, iteration: 352329
loss: 0.9498885869979858,grad_norm: 0.9999989687315701, iteration: 352330
loss: 0.9911542534828186,grad_norm: 0.8480721337960357, iteration: 352331
loss: 0.9888959527015686,grad_norm: 0.6745153220234381, iteration: 352332
loss: 0.9749026298522949,grad_norm: 0.8969077238458459, iteration: 352333
loss: 0.9867607951164246,grad_norm: 0.9813071993611687, iteration: 352334
loss: 0.9837056994438171,grad_norm: 0.9066432471074878, iteration: 352335
loss: 0.9691981673240662,grad_norm: 0.7106121544030743, iteration: 352336
loss: 0.9929794073104858,grad_norm: 0.8259898296365242, iteration: 352337
loss: 0.9881210327148438,grad_norm: 0.8236061722874343, iteration: 352338
loss: 0.9941564798355103,grad_norm: 0.7722772655810468, iteration: 352339
loss: 0.9994866847991943,grad_norm: 0.8857330196982424, iteration: 352340
loss: 0.9806363582611084,grad_norm: 0.7814591348414779, iteration: 352341
loss: 1.0816843509674072,grad_norm: 0.847497196650158, iteration: 352342
loss: 1.0243451595306396,grad_norm: 0.7692899877858222, iteration: 352343
loss: 0.9981387853622437,grad_norm: 0.796249545508619, iteration: 352344
loss: 1.0292836427688599,grad_norm: 0.9999990421577745, iteration: 352345
loss: 0.9621891975402832,grad_norm: 0.8325258165297268, iteration: 352346
loss: 1.0181868076324463,grad_norm: 0.8368375641533432, iteration: 352347
loss: 1.0253766775131226,grad_norm: 0.6481338416130048, iteration: 352348
loss: 0.9796518683433533,grad_norm: 0.769367058941586, iteration: 352349
loss: 0.9803690314292908,grad_norm: 0.8502000388314469, iteration: 352350
loss: 0.9827632904052734,grad_norm: 0.6600867089551924, iteration: 352351
loss: 1.0114319324493408,grad_norm: 0.7225050804928274, iteration: 352352
loss: 0.9937921762466431,grad_norm: 0.7033583697460718, iteration: 352353
loss: 1.0048894882202148,grad_norm: 0.7792829075481124, iteration: 352354
loss: 1.0612565279006958,grad_norm: 0.9999998676443227, iteration: 352355
loss: 0.9945684671401978,grad_norm: 0.8226182360547957, iteration: 352356
loss: 0.9586573243141174,grad_norm: 0.7955517121372837, iteration: 352357
loss: 0.9988137483596802,grad_norm: 0.7469834678070453, iteration: 352358
loss: 0.9825273156166077,grad_norm: 0.9694907928842086, iteration: 352359
loss: 0.9739307165145874,grad_norm: 0.7883216608900614, iteration: 352360
loss: 1.0115243196487427,grad_norm: 0.8667680431556302, iteration: 352361
loss: 0.9983928203582764,grad_norm: 0.9999991291856551, iteration: 352362
loss: 0.9972973465919495,grad_norm: 0.9367819509176532, iteration: 352363
loss: 0.9838656783103943,grad_norm: 0.7700458755583561, iteration: 352364
loss: 1.0010828971862793,grad_norm: 0.6993448823705329, iteration: 352365
loss: 1.0332106351852417,grad_norm: 0.9999999344267583, iteration: 352366
loss: 0.9940841794013977,grad_norm: 0.8096170284163777, iteration: 352367
loss: 1.0142016410827637,grad_norm: 0.893972153055351, iteration: 352368
loss: 0.9804021716117859,grad_norm: 0.885689994550214, iteration: 352369
loss: 0.9847550988197327,grad_norm: 0.7317964757918899, iteration: 352370
loss: 1.0167828798294067,grad_norm: 0.7722580575739081, iteration: 352371
loss: 0.9987772703170776,grad_norm: 0.8372664238781189, iteration: 352372
loss: 1.0210222005844116,grad_norm: 0.7464083223875586, iteration: 352373
loss: 0.9876084327697754,grad_norm: 0.774911431142814, iteration: 352374
loss: 0.9606286883354187,grad_norm: 0.7217979993886904, iteration: 352375
loss: 0.9896537065505981,grad_norm: 0.7974015521424125, iteration: 352376
loss: 1.0168944597244263,grad_norm: 0.8001401827566277, iteration: 352377
loss: 1.0026211738586426,grad_norm: 0.9999992276805355, iteration: 352378
loss: 0.96535724401474,grad_norm: 0.7977772091774435, iteration: 352379
loss: 1.0069398880004883,grad_norm: 0.8393017973854704, iteration: 352380
loss: 0.9926509261131287,grad_norm: 0.9175651225651131, iteration: 352381
loss: 1.004136085510254,grad_norm: 0.9243016914864683, iteration: 352382
loss: 1.0004740953445435,grad_norm: 0.7445036393900503, iteration: 352383
loss: 0.9768482446670532,grad_norm: 0.8018851952642786, iteration: 352384
loss: 1.0506583452224731,grad_norm: 0.7528771206518018, iteration: 352385
loss: 0.9649795293807983,grad_norm: 0.8030270491873693, iteration: 352386
loss: 1.000251293182373,grad_norm: 0.8900498187853726, iteration: 352387
loss: 0.9738783240318298,grad_norm: 0.8731779151171601, iteration: 352388
loss: 1.043169617652893,grad_norm: 0.8307190600971227, iteration: 352389
loss: 1.042500376701355,grad_norm: 0.8348702919846368, iteration: 352390
loss: 1.017807960510254,grad_norm: 0.7022772941183143, iteration: 352391
loss: 0.9717103838920593,grad_norm: 0.8551236033019176, iteration: 352392
loss: 1.0122206211090088,grad_norm: 0.7638062412136625, iteration: 352393
loss: 0.9724858999252319,grad_norm: 0.8506574255933171, iteration: 352394
loss: 0.9893270134925842,grad_norm: 0.8042428367424949, iteration: 352395
loss: 0.988568902015686,grad_norm: 0.8603215220578035, iteration: 352396
loss: 1.0070576667785645,grad_norm: 0.7886323103797722, iteration: 352397
loss: 0.9770630598068237,grad_norm: 0.9789302358117068, iteration: 352398
loss: 0.9830725789070129,grad_norm: 0.875639981805016, iteration: 352399
loss: 1.0012552738189697,grad_norm: 0.7140711939534959, iteration: 352400
loss: 1.0017598867416382,grad_norm: 0.905040573085845, iteration: 352401
loss: 0.9696763157844543,grad_norm: 0.7186070991731921, iteration: 352402
loss: 1.0141806602478027,grad_norm: 0.8245428469383619, iteration: 352403
loss: 1.018471360206604,grad_norm: 0.9999992605347041, iteration: 352404
loss: 0.9866162538528442,grad_norm: 0.8841546619159206, iteration: 352405
loss: 0.9626734852790833,grad_norm: 0.7175436169413875, iteration: 352406
loss: 1.0157582759857178,grad_norm: 0.7986733460677429, iteration: 352407
loss: 0.9890340566635132,grad_norm: 0.6948212945832437, iteration: 352408
loss: 1.000321626663208,grad_norm: 0.8022545683130539, iteration: 352409
loss: 1.0044347047805786,grad_norm: 0.8518306488747347, iteration: 352410
loss: 1.0124248266220093,grad_norm: 0.7612079873092441, iteration: 352411
loss: 0.9850146174430847,grad_norm: 0.8420265786404291, iteration: 352412
loss: 1.0021893978118896,grad_norm: 0.8306455914931009, iteration: 352413
loss: 0.9933404326438904,grad_norm: 0.7510357361536832, iteration: 352414
loss: 0.9852826595306396,grad_norm: 0.8575302013336245, iteration: 352415
loss: 1.052552342414856,grad_norm: 0.9999998465866038, iteration: 352416
loss: 0.9784116744995117,grad_norm: 0.8154700971069789, iteration: 352417
loss: 1.0125951766967773,grad_norm: 0.6727790034913063, iteration: 352418
loss: 1.023306131362915,grad_norm: 0.9999993713211459, iteration: 352419
loss: 0.9882038831710815,grad_norm: 0.8421471412028898, iteration: 352420
loss: 0.9983884692192078,grad_norm: 0.9999991278119271, iteration: 352421
loss: 0.9993628859519958,grad_norm: 0.9999998164253607, iteration: 352422
loss: 1.0205410718917847,grad_norm: 0.7650051372925691, iteration: 352423
loss: 0.9711969494819641,grad_norm: 0.8086326063215703, iteration: 352424
loss: 0.9868121147155762,grad_norm: 0.9033429343289072, iteration: 352425
loss: 0.9819590449333191,grad_norm: 0.7392665548078308, iteration: 352426
loss: 0.9994131326675415,grad_norm: 0.8560661761284687, iteration: 352427
loss: 1.0080488920211792,grad_norm: 0.999999149753594, iteration: 352428
loss: 1.0097453594207764,grad_norm: 0.9937477888832655, iteration: 352429
loss: 0.9734746217727661,grad_norm: 0.711501857195468, iteration: 352430
loss: 0.9922558665275574,grad_norm: 0.7249469960064702, iteration: 352431
loss: 0.9931693077087402,grad_norm: 0.8006889515661487, iteration: 352432
loss: 1.0219252109527588,grad_norm: 0.7441269219029262, iteration: 352433
loss: 1.0331705808639526,grad_norm: 0.9999991369934954, iteration: 352434
loss: 1.0032942295074463,grad_norm: 0.9076780053847997, iteration: 352435
loss: 1.0371323823928833,grad_norm: 0.9999997226953179, iteration: 352436
loss: 1.002098798751831,grad_norm: 0.7470798112039728, iteration: 352437
loss: 0.979712724685669,grad_norm: 0.760463622742441, iteration: 352438
loss: 0.9927079677581787,grad_norm: 0.9999996845309806, iteration: 352439
loss: 0.9696186184883118,grad_norm: 0.9999990765722774, iteration: 352440
loss: 1.0057079792022705,grad_norm: 0.8577960279801093, iteration: 352441
loss: 0.9864082932472229,grad_norm: 0.835727575659086, iteration: 352442
loss: 0.967659056186676,grad_norm: 0.7846882901809809, iteration: 352443
loss: 1.0066953897476196,grad_norm: 0.8856853421106987, iteration: 352444
loss: 0.945039689540863,grad_norm: 0.8450650760533734, iteration: 352445
loss: 1.0401010513305664,grad_norm: 0.706816477919147, iteration: 352446
loss: 0.9934029579162598,grad_norm: 0.8169969075735695, iteration: 352447
loss: 0.9679338335990906,grad_norm: 0.9571790086818385, iteration: 352448
loss: 0.9725212454795837,grad_norm: 0.7434117861330412, iteration: 352449
loss: 1.0277061462402344,grad_norm: 0.8184103530250132, iteration: 352450
loss: 0.996936559677124,grad_norm: 0.7871456453853899, iteration: 352451
loss: 1.0195388793945312,grad_norm: 0.9051604877695321, iteration: 352452
loss: 0.949307382106781,grad_norm: 0.8735357446248235, iteration: 352453
loss: 1.0077815055847168,grad_norm: 0.9999991321699092, iteration: 352454
loss: 1.0047338008880615,grad_norm: 0.8394732059940299, iteration: 352455
loss: 1.0100748538970947,grad_norm: 0.7395106043674187, iteration: 352456
loss: 0.9852337837219238,grad_norm: 0.7426898254671722, iteration: 352457
loss: 1.0171526670455933,grad_norm: 0.8839307241502453, iteration: 352458
loss: 1.0260286331176758,grad_norm: 0.7800718022602532, iteration: 352459
loss: 1.0089777708053589,grad_norm: 0.9393469118234158, iteration: 352460
loss: 1.0319565534591675,grad_norm: 0.7051097853403596, iteration: 352461
loss: 1.0955215692520142,grad_norm: 0.9999996682259751, iteration: 352462
loss: 1.0016419887542725,grad_norm: 0.999998995595253, iteration: 352463
loss: 1.0560423135757446,grad_norm: 0.9999990400753461, iteration: 352464
loss: 1.0388450622558594,grad_norm: 0.8903124928477713, iteration: 352465
loss: 1.0480526685714722,grad_norm: 0.746784201471635, iteration: 352466
loss: 1.0854089260101318,grad_norm: 0.7428386227585175, iteration: 352467
loss: 1.0394866466522217,grad_norm: 0.9999997259850861, iteration: 352468
loss: 0.9861076474189758,grad_norm: 0.6635610384179174, iteration: 352469
loss: 0.9574479460716248,grad_norm: 0.8579111996494608, iteration: 352470
loss: 1.009625792503357,grad_norm: 0.8822520960697019, iteration: 352471
loss: 0.9962374567985535,grad_norm: 0.8112764574293654, iteration: 352472
loss: 1.0265001058578491,grad_norm: 0.8642179697122104, iteration: 352473
loss: 0.9764461517333984,grad_norm: 0.8342618985159038, iteration: 352474
loss: 0.9852085709571838,grad_norm: 0.7108447466144917, iteration: 352475
loss: 0.9824292659759521,grad_norm: 0.7987547472598224, iteration: 352476
loss: 1.008607268333435,grad_norm: 0.7406086805590443, iteration: 352477
loss: 0.9553230404853821,grad_norm: 0.8172733226888252, iteration: 352478
loss: 1.0076158046722412,grad_norm: 0.9743253343423609, iteration: 352479
loss: 1.0031850337982178,grad_norm: 0.8602995017091336, iteration: 352480
loss: 0.9789640307426453,grad_norm: 0.7656210216084448, iteration: 352481
loss: 1.019456386566162,grad_norm: 0.9999996982054772, iteration: 352482
loss: 0.9679682850837708,grad_norm: 0.671729557667384, iteration: 352483
loss: 0.9909050464630127,grad_norm: 0.7659309723591925, iteration: 352484
loss: 1.0268540382385254,grad_norm: 0.9999991603993642, iteration: 352485
loss: 1.0278823375701904,grad_norm: 0.9722772861818022, iteration: 352486
loss: 0.9558330178260803,grad_norm: 0.7248673705433567, iteration: 352487
loss: 1.0135515928268433,grad_norm: 0.792128450471148, iteration: 352488
loss: 0.9969740509986877,grad_norm: 0.829811562313899, iteration: 352489
loss: 0.9856756925582886,grad_norm: 0.7575305153275951, iteration: 352490
loss: 0.9557472467422485,grad_norm: 0.8638874076874402, iteration: 352491
loss: 1.0072647333145142,grad_norm: 0.7603332930084185, iteration: 352492
loss: 0.9871647357940674,grad_norm: 0.7501969322211445, iteration: 352493
loss: 1.0026226043701172,grad_norm: 0.918336958473553, iteration: 352494
loss: 0.9991658329963684,grad_norm: 0.6907092449095416, iteration: 352495
loss: 1.0625779628753662,grad_norm: 1.0000000419585662, iteration: 352496
loss: 1.0047475099563599,grad_norm: 0.8436060761224745, iteration: 352497
loss: 1.0050370693206787,grad_norm: 0.7013175185049243, iteration: 352498
loss: 0.983713686466217,grad_norm: 0.6684668943783924, iteration: 352499
loss: 0.98867267370224,grad_norm: 0.6826387451474865, iteration: 352500
loss: 1.0272514820098877,grad_norm: 0.9999992652107191, iteration: 352501
loss: 1.0003365278244019,grad_norm: 0.7353343164298447, iteration: 352502
loss: 0.9980877041816711,grad_norm: 0.7048837301767117, iteration: 352503
loss: 0.9823254942893982,grad_norm: 0.9289819353871511, iteration: 352504
loss: 0.943840503692627,grad_norm: 0.9357238342324629, iteration: 352505
loss: 0.9881733059883118,grad_norm: 0.7089389445716275, iteration: 352506
loss: 0.9953451156616211,grad_norm: 0.8849178337246977, iteration: 352507
loss: 1.0045219659805298,grad_norm: 0.8744540565220765, iteration: 352508
loss: 0.9822841286659241,grad_norm: 0.6936630732816955, iteration: 352509
loss: 1.003221869468689,grad_norm: 0.9221877792525787, iteration: 352510
loss: 0.9961776733398438,grad_norm: 0.9999993344906392, iteration: 352511
loss: 1.0036718845367432,grad_norm: 0.8726499319275497, iteration: 352512
loss: 1.013553261756897,grad_norm: 0.7839677188565949, iteration: 352513
loss: 1.0154814720153809,grad_norm: 0.9054354424611274, iteration: 352514
loss: 1.0264184474945068,grad_norm: 0.7895102176715361, iteration: 352515
loss: 1.079092264175415,grad_norm: 0.9087981612812708, iteration: 352516
loss: 1.0171419382095337,grad_norm: 0.8692383347345036, iteration: 352517
loss: 1.0031156539916992,grad_norm: 0.8220454184297424, iteration: 352518
loss: 0.9983144402503967,grad_norm: 0.8087266074521442, iteration: 352519
loss: 1.0052505731582642,grad_norm: 0.9034555122986617, iteration: 352520
loss: 1.005428671836853,grad_norm: 0.7933196690617106, iteration: 352521
loss: 0.9874244928359985,grad_norm: 0.8700377595763213, iteration: 352522
loss: 0.9968386292457581,grad_norm: 0.8800700144976766, iteration: 352523
loss: 1.0283024311065674,grad_norm: 0.7418412692958787, iteration: 352524
loss: 0.9999287724494934,grad_norm: 0.8955793302089035, iteration: 352525
loss: 1.0066204071044922,grad_norm: 0.8107787365146454, iteration: 352526
loss: 1.0106481313705444,grad_norm: 0.8754002791421227, iteration: 352527
loss: 1.0293716192245483,grad_norm: 0.999999688943737, iteration: 352528
loss: 0.9931694865226746,grad_norm: 0.8444915740200133, iteration: 352529
loss: 0.9811356067657471,grad_norm: 0.8611143164247315, iteration: 352530
loss: 0.9834170341491699,grad_norm: 0.844127050979098, iteration: 352531
loss: 0.999758243560791,grad_norm: 0.8011072508821894, iteration: 352532
loss: 0.9960505366325378,grad_norm: 0.8541185240059661, iteration: 352533
loss: 1.0170918703079224,grad_norm: 0.8897840519028622, iteration: 352534
loss: 0.9964618682861328,grad_norm: 0.9999993646137743, iteration: 352535
loss: 1.0044599771499634,grad_norm: 0.8681452981978081, iteration: 352536
loss: 0.9870405793190002,grad_norm: 0.9136525742889021, iteration: 352537
loss: 0.9778710007667542,grad_norm: 0.9475042024467778, iteration: 352538
loss: 1.0192855596542358,grad_norm: 0.966203075260767, iteration: 352539
loss: 1.017179250717163,grad_norm: 0.7730058654676017, iteration: 352540
loss: 1.015089750289917,grad_norm: 0.7040074341692121, iteration: 352541
loss: 0.96015465259552,grad_norm: 0.880644512993065, iteration: 352542
loss: 1.0174057483673096,grad_norm: 0.9999991269369508, iteration: 352543
loss: 0.9719233512878418,grad_norm: 0.6547506502837708, iteration: 352544
loss: 1.0649515390396118,grad_norm: 0.9356289851639416, iteration: 352545
loss: 0.989616334438324,grad_norm: 0.8592955449034794, iteration: 352546
loss: 0.9637904167175293,grad_norm: 0.9999990478155903, iteration: 352547
loss: 0.9722878336906433,grad_norm: 0.6890118146028008, iteration: 352548
loss: 0.9805541634559631,grad_norm: 0.8581135153739703, iteration: 352549
loss: 1.007570505142212,grad_norm: 0.9169989067400632, iteration: 352550
loss: 1.000230312347412,grad_norm: 0.7478583054648996, iteration: 352551
loss: 0.9997332692146301,grad_norm: 0.6966186559381818, iteration: 352552
loss: 0.9994472861289978,grad_norm: 0.8801092326991563, iteration: 352553
loss: 1.000217080116272,grad_norm: 0.8962073869449878, iteration: 352554
loss: 0.9866426587104797,grad_norm: 0.8911670323878261, iteration: 352555
loss: 0.986107587814331,grad_norm: 0.9999991661697266, iteration: 352556
loss: 1.0454801321029663,grad_norm: 0.9461891085000113, iteration: 352557
loss: 1.0031235218048096,grad_norm: 0.8234857540081609, iteration: 352558
loss: 0.9890674948692322,grad_norm: 0.7739253803907039, iteration: 352559
loss: 1.0225260257720947,grad_norm: 0.8193990550305453, iteration: 352560
loss: 1.0338423252105713,grad_norm: 0.7083948699039823, iteration: 352561
loss: 0.9896366000175476,grad_norm: 0.8396460955481816, iteration: 352562
loss: 1.0040286779403687,grad_norm: 0.7312886629208541, iteration: 352563
loss: 0.9677602648735046,grad_norm: 0.6944569376271231, iteration: 352564
loss: 1.0090551376342773,grad_norm: 0.7894137761234348, iteration: 352565
loss: 1.0320218801498413,grad_norm: 0.9999995313195685, iteration: 352566
loss: 0.9867047667503357,grad_norm: 0.9256631000835509, iteration: 352567
loss: 0.9734656810760498,grad_norm: 0.694679263859397, iteration: 352568
loss: 0.9985414743423462,grad_norm: 0.9999994384911131, iteration: 352569
loss: 1.0302680730819702,grad_norm: 0.8477199328727644, iteration: 352570
loss: 1.0062146186828613,grad_norm: 0.7432872032501835, iteration: 352571
loss: 0.982231080532074,grad_norm: 0.7700340798106778, iteration: 352572
loss: 1.0117040872573853,grad_norm: 0.6324334684616928, iteration: 352573
loss: 1.0021653175354004,grad_norm: 0.7356887007283426, iteration: 352574
loss: 1.000868797302246,grad_norm: 0.9999991066382111, iteration: 352575
loss: 1.0201220512390137,grad_norm: 0.9999989435253626, iteration: 352576
loss: 0.9954008460044861,grad_norm: 0.9546469847162368, iteration: 352577
loss: 1.0306081771850586,grad_norm: 0.7823959161270624, iteration: 352578
loss: 1.0317519903182983,grad_norm: 0.9999993324715534, iteration: 352579
loss: 0.9750939607620239,grad_norm: 0.7665000233935741, iteration: 352580
loss: 1.0091830492019653,grad_norm: 0.9999996156054025, iteration: 352581
loss: 1.00861656665802,grad_norm: 0.7512932117299765, iteration: 352582
loss: 0.9831434488296509,grad_norm: 0.8370947092382326, iteration: 352583
loss: 1.0604952573776245,grad_norm: 0.999999711718512, iteration: 352584
loss: 0.9826840162277222,grad_norm: 0.8694088778456842, iteration: 352585
loss: 0.9822220206260681,grad_norm: 0.7205675421160862, iteration: 352586
loss: 1.047884464263916,grad_norm: 0.9999996082931177, iteration: 352587
loss: 0.9631030559539795,grad_norm: 0.8030466203719997, iteration: 352588
loss: 0.952945351600647,grad_norm: 0.9556311238403536, iteration: 352589
loss: 1.0349445343017578,grad_norm: 0.794463163139759, iteration: 352590
loss: 0.9770419597625732,grad_norm: 0.7760970960449094, iteration: 352591
loss: 1.0320085287094116,grad_norm: 0.9999991454518598, iteration: 352592
loss: 1.0036684274673462,grad_norm: 0.7642547138879001, iteration: 352593
loss: 1.0004831552505493,grad_norm: 0.8068152829485333, iteration: 352594
loss: 1.0492264032363892,grad_norm: 0.9525158444698213, iteration: 352595
loss: 0.9720534682273865,grad_norm: 0.7489472220513936, iteration: 352596
loss: 1.0055322647094727,grad_norm: 0.6702306995986514, iteration: 352597
loss: 1.0397528409957886,grad_norm: 0.8458816576088598, iteration: 352598
loss: 0.9791889190673828,grad_norm: 0.7819148127097714, iteration: 352599
loss: 1.050371766090393,grad_norm: 0.9999991142455713, iteration: 352600
loss: 1.0264348983764648,grad_norm: 0.9999998759603135, iteration: 352601
loss: 1.0505621433258057,grad_norm: 0.7963107162764018, iteration: 352602
loss: 1.0157034397125244,grad_norm: 0.8148034867280517, iteration: 352603
loss: 1.1482043266296387,grad_norm: 0.9999998427687541, iteration: 352604
loss: 1.0147874355316162,grad_norm: 0.7292081943533294, iteration: 352605
loss: 0.9844409823417664,grad_norm: 0.8165263176062617, iteration: 352606
loss: 1.040690541267395,grad_norm: 0.9366048691837477, iteration: 352607
loss: 1.1067789793014526,grad_norm: 0.9999992562972899, iteration: 352608
loss: 1.0616379976272583,grad_norm: 0.999999367208836, iteration: 352609
loss: 1.0173437595367432,grad_norm: 0.8839288494076432, iteration: 352610
loss: 1.0636310577392578,grad_norm: 0.9999995600774756, iteration: 352611
loss: 0.9869477152824402,grad_norm: 0.8190493770825111, iteration: 352612
loss: 0.9876734614372253,grad_norm: 0.8925387971369652, iteration: 352613
loss: 0.9833576679229736,grad_norm: 0.7854326492258852, iteration: 352614
loss: 1.0848017930984497,grad_norm: 0.9601831114475932, iteration: 352615
loss: 1.001785159111023,grad_norm: 0.9310826059561093, iteration: 352616
loss: 1.0173653364181519,grad_norm: 0.8481222022066515, iteration: 352617
loss: 0.9816773533821106,grad_norm: 0.7800623193856204, iteration: 352618
loss: 1.0203272104263306,grad_norm: 0.7827102312206602, iteration: 352619
loss: 1.005469560623169,grad_norm: 0.9999990625487548, iteration: 352620
loss: 1.0003859996795654,grad_norm: 0.8698355484475325, iteration: 352621
loss: 1.020043134689331,grad_norm: 0.7928593440271583, iteration: 352622
loss: 1.0530879497528076,grad_norm: 0.9673465455365414, iteration: 352623
loss: 1.0232584476470947,grad_norm: 0.8393743024776174, iteration: 352624
loss: 0.9948103427886963,grad_norm: 0.857490505342197, iteration: 352625
loss: 1.013136863708496,grad_norm: 0.7962809390183944, iteration: 352626
loss: 0.9693329930305481,grad_norm: 0.8136326773440056, iteration: 352627
loss: 1.0190858840942383,grad_norm: 0.8994909845396376, iteration: 352628
loss: 0.9937705397605896,grad_norm: 0.7791699667605384, iteration: 352629
loss: 0.9906933307647705,grad_norm: 0.8578021478345196, iteration: 352630
loss: 0.9730009436607361,grad_norm: 0.8592200362581899, iteration: 352631
loss: 1.0097509622573853,grad_norm: 0.8524643248963142, iteration: 352632
loss: 1.0560246706008911,grad_norm: 0.8092486855243738, iteration: 352633
loss: 0.9895907044410706,grad_norm: 0.843409606952612, iteration: 352634
loss: 1.036755084991455,grad_norm: 0.8645794661740153, iteration: 352635
loss: 0.9899091720581055,grad_norm: 0.9214214797703333, iteration: 352636
loss: 1.0036118030548096,grad_norm: 0.6685837689501278, iteration: 352637
loss: 1.005285382270813,grad_norm: 0.8407348604262913, iteration: 352638
loss: 0.9728348851203918,grad_norm: 0.8202514897824573, iteration: 352639
loss: 0.9435459971427917,grad_norm: 0.9381177277876881, iteration: 352640
loss: 0.9969090223312378,grad_norm: 0.8715925385859101, iteration: 352641
loss: 1.010279655456543,grad_norm: 0.7768111603180428, iteration: 352642
loss: 0.9954107999801636,grad_norm: 0.7569551996178965, iteration: 352643
loss: 0.9804308414459229,grad_norm: 0.7551923874893696, iteration: 352644
loss: 1.0017192363739014,grad_norm: 0.849267288754055, iteration: 352645
loss: 1.0262433290481567,grad_norm: 0.7336306354248618, iteration: 352646
loss: 1.010269284248352,grad_norm: 0.762736614452225, iteration: 352647
loss: 1.0355043411254883,grad_norm: 0.9679893503444246, iteration: 352648
loss: 0.9843876361846924,grad_norm: 0.8160743899706264, iteration: 352649
loss: 0.9942547678947449,grad_norm: 0.7828845956588275, iteration: 352650
loss: 1.0134577751159668,grad_norm: 0.779174620644522, iteration: 352651
loss: 1.0094561576843262,grad_norm: 0.9726525307784961, iteration: 352652
loss: 1.0207239389419556,grad_norm: 0.7334346048559429, iteration: 352653
loss: 0.9858525395393372,grad_norm: 0.7725894313245187, iteration: 352654
loss: 1.0102874040603638,grad_norm: 0.9999998391463438, iteration: 352655
loss: 0.9669719338417053,grad_norm: 0.9295381115037907, iteration: 352656
loss: 1.0147935152053833,grad_norm: 0.9999990617275899, iteration: 352657
loss: 0.9999810457229614,grad_norm: 0.9999997153122789, iteration: 352658
loss: 0.9851916432380676,grad_norm: 0.8149566794680261, iteration: 352659
loss: 1.0787363052368164,grad_norm: 0.9999994455758087, iteration: 352660
loss: 1.0074328184127808,grad_norm: 0.9648464484241347, iteration: 352661
loss: 1.128991723060608,grad_norm: 0.9999995246891589, iteration: 352662
loss: 1.015460729598999,grad_norm: 0.9999993764963073, iteration: 352663
loss: 0.9952216744422913,grad_norm: 0.9554268876322721, iteration: 352664
loss: 0.9914594292640686,grad_norm: 0.8646255221863215, iteration: 352665
loss: 1.049257755279541,grad_norm: 0.9999991297441152, iteration: 352666
loss: 1.011672019958496,grad_norm: 0.7705649611147736, iteration: 352667
loss: 0.9974151849746704,grad_norm: 0.9999995588794323, iteration: 352668
loss: 1.0048681497573853,grad_norm: 0.8922246869004213, iteration: 352669
loss: 0.9895932674407959,grad_norm: 0.7509330256894772, iteration: 352670
loss: 0.9975262880325317,grad_norm: 0.8083429705100781, iteration: 352671
loss: 0.9752635359764099,grad_norm: 0.8949892298486395, iteration: 352672
loss: 0.9774560928344727,grad_norm: 0.7670504850629238, iteration: 352673
loss: 0.9711968302726746,grad_norm: 0.9676109533780135, iteration: 352674
loss: 0.9725248217582703,grad_norm: 0.7832641968286609, iteration: 352675
loss: 1.0470448732376099,grad_norm: 0.9741151524064131, iteration: 352676
loss: 0.9812368154525757,grad_norm: 0.9368712796282832, iteration: 352677
loss: 0.9938079118728638,grad_norm: 0.7650592725432724, iteration: 352678
loss: 1.0236763954162598,grad_norm: 0.9999994123444592, iteration: 352679
loss: 0.9973911643028259,grad_norm: 0.735776289544104, iteration: 352680
loss: 0.9812147617340088,grad_norm: 0.8847468431938359, iteration: 352681
loss: 0.9951494336128235,grad_norm: 0.7992370987174445, iteration: 352682
loss: 0.9903545379638672,grad_norm: 0.9999995306618136, iteration: 352683
loss: 1.0109707117080688,grad_norm: 0.9999995741505804, iteration: 352684
loss: 0.9741424322128296,grad_norm: 0.7706593372780703, iteration: 352685
loss: 0.9843910932540894,grad_norm: 0.7169620164595958, iteration: 352686
loss: 1.0558470487594604,grad_norm: 0.9999989924684057, iteration: 352687
loss: 0.9746972322463989,grad_norm: 0.7775181022362002, iteration: 352688
loss: 1.0157424211502075,grad_norm: 0.8143905078169932, iteration: 352689
loss: 1.0247968435287476,grad_norm: 0.7957749540285901, iteration: 352690
loss: 1.0092395544052124,grad_norm: 0.7228485717628887, iteration: 352691
loss: 1.0000088214874268,grad_norm: 0.7967044738770028, iteration: 352692
loss: 1.018214464187622,grad_norm: 0.9527448611060751, iteration: 352693
loss: 0.9641321301460266,grad_norm: 0.7619153004412709, iteration: 352694
loss: 0.9610360264778137,grad_norm: 0.8438951313823305, iteration: 352695
loss: 0.9637402296066284,grad_norm: 0.8846486570074761, iteration: 352696
loss: 1.0418462753295898,grad_norm: 0.8433750216130584, iteration: 352697
loss: 0.9958233833312988,grad_norm: 0.9999990893402588, iteration: 352698
loss: 1.0147740840911865,grad_norm: 0.7649131437468628, iteration: 352699
loss: 0.9976279735565186,grad_norm: 0.8776071456171561, iteration: 352700
loss: 1.0096426010131836,grad_norm: 0.8786718551640956, iteration: 352701
loss: 1.0306793451309204,grad_norm: 0.9799968687992893, iteration: 352702
loss: 0.9677909016609192,grad_norm: 0.8843297764056387, iteration: 352703
loss: 1.0057976245880127,grad_norm: 0.9999991458455533, iteration: 352704
loss: 0.9859538078308105,grad_norm: 0.8878316546122341, iteration: 352705
loss: 1.0402038097381592,grad_norm: 0.9999992266772139, iteration: 352706
loss: 0.9990171194076538,grad_norm: 0.9616525352243994, iteration: 352707
loss: 1.0124796628952026,grad_norm: 0.7532289330079824, iteration: 352708
loss: 1.0149396657943726,grad_norm: 0.9999996861248281, iteration: 352709
loss: 0.9684898853302002,grad_norm: 0.6845896758922358, iteration: 352710
loss: 0.9924941062927246,grad_norm: 0.7731961744090367, iteration: 352711
loss: 1.0106310844421387,grad_norm: 0.9999991027381897, iteration: 352712
loss: 0.987011730670929,grad_norm: 0.7162051235201863, iteration: 352713
loss: 1.0113784074783325,grad_norm: 0.6868200006967957, iteration: 352714
loss: 0.9913884401321411,grad_norm: 0.7554136254404541, iteration: 352715
loss: 0.9821879267692566,grad_norm: 0.8716966084997042, iteration: 352716
loss: 0.9393865466117859,grad_norm: 0.8198517792758802, iteration: 352717
loss: 0.9972620606422424,grad_norm: 0.6811677499797422, iteration: 352718
loss: 1.0601311922073364,grad_norm: 0.7979600377696869, iteration: 352719
loss: 1.016711711883545,grad_norm: 0.9268607952928112, iteration: 352720
loss: 0.992567777633667,grad_norm: 0.775728387998029, iteration: 352721
loss: 0.9925806522369385,grad_norm: 0.9829716673925791, iteration: 352722
loss: 0.9994435906410217,grad_norm: 0.8375475644640172, iteration: 352723
loss: 1.0142484903335571,grad_norm: 0.6942149519556694, iteration: 352724
loss: 0.9891339540481567,grad_norm: 0.8253743974797251, iteration: 352725
loss: 1.0102742910385132,grad_norm: 0.8980624633667446, iteration: 352726
loss: 0.9624460935592651,grad_norm: 0.8328080447087641, iteration: 352727
loss: 0.9815322756767273,grad_norm: 0.7195970573530623, iteration: 352728
loss: 1.0472697019577026,grad_norm: 0.9295068203757045, iteration: 352729
loss: 1.028628945350647,grad_norm: 0.8077837966681484, iteration: 352730
loss: 1.0047165155410767,grad_norm: 0.918206963627481, iteration: 352731
loss: 1.0022413730621338,grad_norm: 0.889139993889454, iteration: 352732
loss: 0.9914491176605225,grad_norm: 0.7781157152231032, iteration: 352733
loss: 1.0509401559829712,grad_norm: 0.9815794990339235, iteration: 352734
loss: 0.9929524064064026,grad_norm: 0.9248080260802418, iteration: 352735
loss: 0.9820992350578308,grad_norm: 0.8099284857258235, iteration: 352736
loss: 0.9518579840660095,grad_norm: 0.9035802272488087, iteration: 352737
loss: 1.0069422721862793,grad_norm: 0.7326446723900183, iteration: 352738
loss: 1.0134979486465454,grad_norm: 0.8643470041315983, iteration: 352739
loss: 0.9993760585784912,grad_norm: 0.8573772384368971, iteration: 352740
loss: 0.9712433815002441,grad_norm: 0.9469705646973154, iteration: 352741
loss: 0.9423189163208008,grad_norm: 0.7490994351768089, iteration: 352742
loss: 1.0149035453796387,grad_norm: 0.6445753231595657, iteration: 352743
loss: 1.015470027923584,grad_norm: 0.9999993589683902, iteration: 352744
loss: 1.05604088306427,grad_norm: 0.8929527076271631, iteration: 352745
loss: 0.9835120439529419,grad_norm: 0.6523756618913858, iteration: 352746
loss: 1.0318703651428223,grad_norm: 0.8890810466180168, iteration: 352747
loss: 0.9977554082870483,grad_norm: 0.8507902217489063, iteration: 352748
loss: 1.005109429359436,grad_norm: 0.8535579738196708, iteration: 352749
loss: 0.980008602142334,grad_norm: 0.9999995750766209, iteration: 352750
loss: 1.0154390335083008,grad_norm: 0.7256268083908207, iteration: 352751
loss: 1.0028737783432007,grad_norm: 0.7246497668667096, iteration: 352752
loss: 1.012056827545166,grad_norm: 0.7961522314294497, iteration: 352753
loss: 0.9983908534049988,grad_norm: 0.6970471898911244, iteration: 352754
loss: 0.947116494178772,grad_norm: 0.8994222474068506, iteration: 352755
loss: 0.9759005308151245,grad_norm: 0.99999917647598, iteration: 352756
loss: 1.010195016860962,grad_norm: 0.764178462343213, iteration: 352757
loss: 0.9847923517227173,grad_norm: 0.7828036810054948, iteration: 352758
loss: 0.9974153637886047,grad_norm: 0.9371663522717039, iteration: 352759
loss: 0.9671586155891418,grad_norm: 0.7526131911522338, iteration: 352760
loss: 1.009089469909668,grad_norm: 0.7294268176423108, iteration: 352761
loss: 1.0299859046936035,grad_norm: 0.8499986656529313, iteration: 352762
loss: 1.023389458656311,grad_norm: 0.9671145525243262, iteration: 352763
loss: 1.026822566986084,grad_norm: 0.9375770094882492, iteration: 352764
loss: 1.0261667966842651,grad_norm: 0.9868785266251873, iteration: 352765
loss: 1.0008480548858643,grad_norm: 0.8961702552614056, iteration: 352766
loss: 1.0312554836273193,grad_norm: 0.8909268471711761, iteration: 352767
loss: 1.0100668668746948,grad_norm: 0.9999991254523792, iteration: 352768
loss: 0.9842860102653503,grad_norm: 0.9370577798079788, iteration: 352769
loss: 0.9575623273849487,grad_norm: 0.7681226210233922, iteration: 352770
loss: 1.0058659315109253,grad_norm: 0.9982012457309282, iteration: 352771
loss: 0.9594977498054504,grad_norm: 0.838808423763718, iteration: 352772
loss: 1.0074721574783325,grad_norm: 0.8397000407522692, iteration: 352773
loss: 0.9663133025169373,grad_norm: 0.8187078787853279, iteration: 352774
loss: 1.0226924419403076,grad_norm: 0.897635618965812, iteration: 352775
loss: 1.0026962757110596,grad_norm: 0.949356226518596, iteration: 352776
loss: 1.001767873764038,grad_norm: 0.7716573697539698, iteration: 352777
loss: 1.0006777048110962,grad_norm: 0.7682332547979511, iteration: 352778
loss: 1.0081459283828735,grad_norm: 0.9999989556894808, iteration: 352779
loss: 1.0253753662109375,grad_norm: 0.8840707335553221, iteration: 352780
loss: 1.2133973836898804,grad_norm: 0.9999996622469417, iteration: 352781
loss: 1.00238037109375,grad_norm: 0.7455779752282627, iteration: 352782
loss: 1.0160578489303589,grad_norm: 0.7621049099354524, iteration: 352783
loss: 0.9687880873680115,grad_norm: 0.7741762315574394, iteration: 352784
loss: 1.0680333375930786,grad_norm: 0.9276961897860659, iteration: 352785
loss: 0.9496240019798279,grad_norm: 0.9524228397013594, iteration: 352786
loss: 1.0189582109451294,grad_norm: 0.999999101649254, iteration: 352787
loss: 1.026089072227478,grad_norm: 0.8777334385526975, iteration: 352788
loss: 0.9959617853164673,grad_norm: 0.7448842470936934, iteration: 352789
loss: 0.9892789721488953,grad_norm: 0.7156315150742563, iteration: 352790
loss: 0.9982084631919861,grad_norm: 0.9999998151438474, iteration: 352791
loss: 1.0254828929901123,grad_norm: 0.999999928840936, iteration: 352792
loss: 0.980807363986969,grad_norm: 0.8395776418727328, iteration: 352793
loss: 1.008742094039917,grad_norm: 0.7958126699257677, iteration: 352794
loss: 0.9898104667663574,grad_norm: 0.80891208166585, iteration: 352795
loss: 1.009730577468872,grad_norm: 0.7031872066989339, iteration: 352796
loss: 0.9629228711128235,grad_norm: 0.8172863589958966, iteration: 352797
loss: 1.0438532829284668,grad_norm: 0.999999065420357, iteration: 352798
loss: 0.9589414000511169,grad_norm: 0.8707612563735554, iteration: 352799
loss: 0.9880924224853516,grad_norm: 0.6582198506986363, iteration: 352800
loss: 0.9603000283241272,grad_norm: 0.8752337169952352, iteration: 352801
loss: 0.9977098107337952,grad_norm: 0.8190721117213908, iteration: 352802
loss: 1.0483628511428833,grad_norm: 0.9999996090244131, iteration: 352803
loss: 1.0175114870071411,grad_norm: 0.8437578245065539, iteration: 352804
loss: 0.9864866733551025,grad_norm: 0.7611881064336309, iteration: 352805
loss: 1.0300143957138062,grad_norm: 0.9999998636131989, iteration: 352806
loss: 1.0388007164001465,grad_norm: 0.9999996875876097, iteration: 352807
loss: 1.0070018768310547,grad_norm: 0.7402275923173249, iteration: 352808
loss: 1.0972613096237183,grad_norm: 1.000000001640284, iteration: 352809
loss: 1.019325613975525,grad_norm: 0.9351991413907327, iteration: 352810
loss: 1.1681358814239502,grad_norm: 0.9999993886150863, iteration: 352811
loss: 1.0290064811706543,grad_norm: 0.676727329823481, iteration: 352812
loss: 1.059038519859314,grad_norm: 0.9178622839353764, iteration: 352813
loss: 1.0014907121658325,grad_norm: 0.927283713315182, iteration: 352814
loss: 1.001956820487976,grad_norm: 0.8126736921298006, iteration: 352815
loss: 1.0177628993988037,grad_norm: 0.7135485677024979, iteration: 352816
loss: 1.0084660053253174,grad_norm: 0.9004319930859164, iteration: 352817
loss: 1.048333764076233,grad_norm: 0.7617423957110662, iteration: 352818
loss: 0.9902141690254211,grad_norm: 0.7696689579386155, iteration: 352819
loss: 1.0274336338043213,grad_norm: 0.6926447554714413, iteration: 352820
loss: 1.028344988822937,grad_norm: 0.9999996313997751, iteration: 352821
loss: 1.035902738571167,grad_norm: 0.8048978820525531, iteration: 352822
loss: 0.9913685917854309,grad_norm: 0.7993627438008081, iteration: 352823
loss: 1.0054938793182373,grad_norm: 0.7281093147926668, iteration: 352824
loss: 1.0440741777420044,grad_norm: 0.847642599403273, iteration: 352825
loss: 1.0016239881515503,grad_norm: 0.8625247648474942, iteration: 352826
loss: 1.0201802253723145,grad_norm: 0.7611661421409025, iteration: 352827
loss: 0.9983065724372864,grad_norm: 0.8609950157968355, iteration: 352828
loss: 1.0594913959503174,grad_norm: 0.9999996235004848, iteration: 352829
loss: 1.0402544736862183,grad_norm: 0.9511465052266543, iteration: 352830
loss: 0.9975401759147644,grad_norm: 0.861232835354266, iteration: 352831
loss: 0.9915472865104675,grad_norm: 0.768691746019286, iteration: 352832
loss: 0.9739590883255005,grad_norm: 0.999998944215843, iteration: 352833
loss: 1.0211451053619385,grad_norm: 0.8729563902167529, iteration: 352834
loss: 0.9476125240325928,grad_norm: 0.8155197069580856, iteration: 352835
loss: 0.9781332015991211,grad_norm: 0.8841854999546547, iteration: 352836
loss: 0.998765766620636,grad_norm: 0.9999990983380526, iteration: 352837
loss: 1.0148046016693115,grad_norm: 0.7061653547569018, iteration: 352838
loss: 0.9908475875854492,grad_norm: 0.6881561510256695, iteration: 352839
loss: 0.9887097477912903,grad_norm: 0.7610842329296089, iteration: 352840
loss: 1.0298383235931396,grad_norm: 0.9999992010144554, iteration: 352841
loss: 0.99260014295578,grad_norm: 0.9601824933967845, iteration: 352842
loss: 0.9982158541679382,grad_norm: 0.8412228262087277, iteration: 352843
loss: 1.0073128938674927,grad_norm: 0.9310218341360612, iteration: 352844
loss: 1.0027117729187012,grad_norm: 0.8754188736804435, iteration: 352845
loss: 0.9772984981536865,grad_norm: 0.7241732409411852, iteration: 352846
loss: 0.983989953994751,grad_norm: 0.8751339448001352, iteration: 352847
loss: 1.0205696821212769,grad_norm: 0.8222540705707045, iteration: 352848
loss: 0.95941162109375,grad_norm: 0.8284035353734613, iteration: 352849
loss: 0.9648680090904236,grad_norm: 0.8508814860381707, iteration: 352850
loss: 0.989170253276825,grad_norm: 0.693646560251704, iteration: 352851
loss: 0.9924582839012146,grad_norm: 0.819609759057412, iteration: 352852
loss: 1.0114151239395142,grad_norm: 0.920428199003842, iteration: 352853
loss: 0.9690527319908142,grad_norm: 0.9167327115566096, iteration: 352854
loss: 1.0277293920516968,grad_norm: 0.945147919904685, iteration: 352855
loss: 1.0287601947784424,grad_norm: 0.9999993575842497, iteration: 352856
loss: 1.192517876625061,grad_norm: 0.9999995463025917, iteration: 352857
loss: 1.0364078283309937,grad_norm: 0.9302641905992918, iteration: 352858
loss: 0.9719621539115906,grad_norm: 0.8544620624487176, iteration: 352859
loss: 0.9891319274902344,grad_norm: 0.8716830561189607, iteration: 352860
loss: 0.9705786108970642,grad_norm: 0.7870455348972402, iteration: 352861
loss: 1.0133874416351318,grad_norm: 0.7553641057611599, iteration: 352862
loss: 0.9744809865951538,grad_norm: 0.9561670707122917, iteration: 352863
loss: 1.0163050889968872,grad_norm: 0.8371057555314493, iteration: 352864
loss: 0.9755417704582214,grad_norm: 0.7377828968395369, iteration: 352865
loss: 1.0436128377914429,grad_norm: 0.7415224970798642, iteration: 352866
loss: 1.013648509979248,grad_norm: 0.7885822109394898, iteration: 352867
loss: 1.0110183954238892,grad_norm: 0.6318885730676258, iteration: 352868
loss: 0.9847039580345154,grad_norm: 0.7657801071947936, iteration: 352869
loss: 1.0447099208831787,grad_norm: 0.7953694283720334, iteration: 352870
loss: 0.9926859140396118,grad_norm: 0.8420325202359737, iteration: 352871
loss: 0.974646806716919,grad_norm: 0.73922098747451, iteration: 352872
loss: 0.9940786361694336,grad_norm: 0.7535374257432321, iteration: 352873
loss: 1.015593409538269,grad_norm: 0.8067432877276548, iteration: 352874
loss: 0.9500640630722046,grad_norm: 0.9999991267799438, iteration: 352875
loss: 0.998528242111206,grad_norm: 0.7604708474895835, iteration: 352876
loss: 0.9949803352355957,grad_norm: 0.8364181311273122, iteration: 352877
loss: 0.9822084903717041,grad_norm: 0.8162089149907472, iteration: 352878
loss: 0.9429534077644348,grad_norm: 0.7787029830741744, iteration: 352879
loss: 0.9928176999092102,grad_norm: 0.8813657511542062, iteration: 352880
loss: 1.002137303352356,grad_norm: 0.7323189525707298, iteration: 352881
loss: 0.9575321078300476,grad_norm: 0.7239911769668474, iteration: 352882
loss: 0.9808896780014038,grad_norm: 0.8418692276439939, iteration: 352883
loss: 0.9811919331550598,grad_norm: 0.8802488656273569, iteration: 352884
loss: 0.9743480682373047,grad_norm: 0.925576525015974, iteration: 352885
loss: 0.9190200567245483,grad_norm: 0.9999991266479801, iteration: 352886
loss: 0.9717487692832947,grad_norm: 0.8700258662165914, iteration: 352887
loss: 0.9618704915046692,grad_norm: 0.8614521014324832, iteration: 352888
loss: 1.0275927782058716,grad_norm: 0.75585393252709, iteration: 352889
loss: 0.9832685589790344,grad_norm: 0.7927993042621483, iteration: 352890
loss: 1.0302537679672241,grad_norm: 0.9471855044651933, iteration: 352891
loss: 0.9869768023490906,grad_norm: 0.7033856603836083, iteration: 352892
loss: 0.9862688779830933,grad_norm: 0.6420245289997819, iteration: 352893
loss: 1.0125257968902588,grad_norm: 0.9999990657552381, iteration: 352894
loss: 0.9803494215011597,grad_norm: 0.7975035366035885, iteration: 352895
loss: 1.0197656154632568,grad_norm: 0.8674410680349828, iteration: 352896
loss: 0.9528155326843262,grad_norm: 0.7789559960736949, iteration: 352897
loss: 0.9582921862602234,grad_norm: 0.8305797397431679, iteration: 352898
loss: 1.071790099143982,grad_norm: 0.9170593224075062, iteration: 352899
loss: 1.0105568170547485,grad_norm: 0.7588634003598497, iteration: 352900
loss: 1.0188676118850708,grad_norm: 0.835226355722518, iteration: 352901
loss: 0.995701789855957,grad_norm: 0.7977077148962817, iteration: 352902
loss: 0.9833041429519653,grad_norm: 0.7251036514295669, iteration: 352903
loss: 0.9994741678237915,grad_norm: 0.943734457101937, iteration: 352904
loss: 1.0161758661270142,grad_norm: 0.8516383746620335, iteration: 352905
loss: 0.9783033728599548,grad_norm: 0.7884701819443407, iteration: 352906
loss: 1.001441240310669,grad_norm: 0.9999990518274074, iteration: 352907
loss: 1.0226411819458008,grad_norm: 0.7525154974767645, iteration: 352908
loss: 1.0103180408477783,grad_norm: 0.7873404826069833, iteration: 352909
loss: 1.0277161598205566,grad_norm: 0.8661810478551153, iteration: 352910
loss: 1.006986379623413,grad_norm: 0.9999990711226712, iteration: 352911
loss: 0.9762659668922424,grad_norm: 0.8415607463738792, iteration: 352912
loss: 0.9759430289268494,grad_norm: 0.7612243828996078, iteration: 352913
loss: 1.1770482063293457,grad_norm: 0.9999992215442381, iteration: 352914
loss: 1.0313414335250854,grad_norm: 0.7938197771711644, iteration: 352915
loss: 0.9871360063552856,grad_norm: 0.7701779011624964, iteration: 352916
loss: 1.0176374912261963,grad_norm: 0.7167671132278288, iteration: 352917
loss: 0.966200053691864,grad_norm: 0.8524788648906005, iteration: 352918
loss: 0.9569457173347473,grad_norm: 0.990004120688807, iteration: 352919
loss: 1.012860655784607,grad_norm: 0.7799089700311282, iteration: 352920
loss: 1.0153563022613525,grad_norm: 0.7981872325636982, iteration: 352921
loss: 1.001909613609314,grad_norm: 0.6899135891305161, iteration: 352922
loss: 0.9886969327926636,grad_norm: 0.8865430269015041, iteration: 352923
loss: 1.00636625289917,grad_norm: 0.8715953298412478, iteration: 352924
loss: 0.9889843463897705,grad_norm: 0.846208625400242, iteration: 352925
loss: 0.9746974110603333,grad_norm: 0.9091503562938934, iteration: 352926
loss: 1.0051422119140625,grad_norm: 0.7651258115204745, iteration: 352927
loss: 1.0240918397903442,grad_norm: 0.946517594738872, iteration: 352928
loss: 1.0064276456832886,grad_norm: 0.6700942183346261, iteration: 352929
loss: 0.9811406135559082,grad_norm: 0.7684781142751514, iteration: 352930
loss: 1.0091536045074463,grad_norm: 0.7964699230461367, iteration: 352931
loss: 1.0244357585906982,grad_norm: 0.7380353503709071, iteration: 352932
loss: 1.0104566812515259,grad_norm: 0.7995592842730901, iteration: 352933
loss: 0.9945977926254272,grad_norm: 0.6306788099335021, iteration: 352934
loss: 0.9879506826400757,grad_norm: 0.8009908955087212, iteration: 352935
loss: 1.0557078123092651,grad_norm: 0.9999998932811572, iteration: 352936
loss: 1.0050162076950073,grad_norm: 0.9999990403025905, iteration: 352937
loss: 0.9839330315589905,grad_norm: 0.8406125592657263, iteration: 352938
loss: 1.0327818393707275,grad_norm: 0.7134292912972952, iteration: 352939
loss: 0.9757371544837952,grad_norm: 0.7240520209963092, iteration: 352940
loss: 0.9551381468772888,grad_norm: 0.8341852931456023, iteration: 352941
loss: 0.9688423871994019,grad_norm: 0.7528811522303076, iteration: 352942
loss: 0.9986650347709656,grad_norm: 0.9999992874705748, iteration: 352943
loss: 0.979276180267334,grad_norm: 0.8179387067881443, iteration: 352944
loss: 0.9777847528457642,grad_norm: 0.7938467830986368, iteration: 352945
loss: 0.9957601428031921,grad_norm: 0.7741192129443877, iteration: 352946
loss: 1.000869870185852,grad_norm: 0.8549646745988354, iteration: 352947
loss: 0.9644543528556824,grad_norm: 0.9333650467436643, iteration: 352948
loss: 0.9684683680534363,grad_norm: 0.8180477003319249, iteration: 352949
loss: 1.027761459350586,grad_norm: 0.775761736680254, iteration: 352950
loss: 0.9797966480255127,grad_norm: 0.8265272195062408, iteration: 352951
loss: 0.9906241297721863,grad_norm: 0.7037809305465971, iteration: 352952
loss: 0.9828193187713623,grad_norm: 0.9181780810281548, iteration: 352953
loss: 1.012120246887207,grad_norm: 0.8211960739041858, iteration: 352954
loss: 1.058515191078186,grad_norm: 0.8585484048415484, iteration: 352955
loss: 0.9878649711608887,grad_norm: 0.7214069873563523, iteration: 352956
loss: 0.9492922425270081,grad_norm: 0.6755612368932586, iteration: 352957
loss: 1.0236141681671143,grad_norm: 0.9142580971263237, iteration: 352958
loss: 1.0487290620803833,grad_norm: 0.8709280830428978, iteration: 352959
loss: 1.0093274116516113,grad_norm: 0.6721036259434396, iteration: 352960
loss: 1.0136914253234863,grad_norm: 0.7030506593695834, iteration: 352961
loss: 0.9440968632698059,grad_norm: 0.8311304150741309, iteration: 352962
loss: 1.0189582109451294,grad_norm: 0.9377746642984581, iteration: 352963
loss: 1.0071933269500732,grad_norm: 0.775230890026925, iteration: 352964
loss: 1.016981840133667,grad_norm: 0.8440609546194694, iteration: 352965
loss: 1.020206093788147,grad_norm: 0.9999991062359495, iteration: 352966
loss: 0.994014322757721,grad_norm: 0.9790403405148408, iteration: 352967
loss: 1.0209715366363525,grad_norm: 0.8676227146129067, iteration: 352968
loss: 0.9561090469360352,grad_norm: 0.7045484445021561, iteration: 352969
loss: 0.9962686896324158,grad_norm: 0.8304924767140222, iteration: 352970
loss: 1.0098564624786377,grad_norm: 0.7647252702277724, iteration: 352971
loss: 0.9868257641792297,grad_norm: 0.7381089677286654, iteration: 352972
loss: 1.0143734216690063,grad_norm: 0.8051261049941766, iteration: 352973
loss: 0.9571663737297058,grad_norm: 0.8402622468339596, iteration: 352974
loss: 1.0155788660049438,grad_norm: 0.833883679657024, iteration: 352975
loss: 0.9887649416923523,grad_norm: 0.9999990747734373, iteration: 352976
loss: 1.0517655611038208,grad_norm: 0.9999997406431953, iteration: 352977
loss: 0.9857481122016907,grad_norm: 0.9999990436056592, iteration: 352978
loss: 0.9786406755447388,grad_norm: 0.9160589169732872, iteration: 352979
loss: 1.0017026662826538,grad_norm: 0.8820093197572257, iteration: 352980
loss: 1.0424094200134277,grad_norm: 0.6902925495005217, iteration: 352981
loss: 0.9983658194541931,grad_norm: 0.7604192017864913, iteration: 352982
loss: 1.0013927221298218,grad_norm: 0.6999881172466516, iteration: 352983
loss: 0.9926899075508118,grad_norm: 0.8801848680196868, iteration: 352984
loss: 0.9985772967338562,grad_norm: 0.7942587887869265, iteration: 352985
loss: 0.9648340344429016,grad_norm: 0.7817429109939333, iteration: 352986
loss: 1.0219032764434814,grad_norm: 0.7279621256273602, iteration: 352987
loss: 1.009279489517212,grad_norm: 0.8839229937482532, iteration: 352988
loss: 0.9721660614013672,grad_norm: 0.9999990845450353, iteration: 352989
loss: 1.0372095108032227,grad_norm: 0.9496055168166782, iteration: 352990
loss: 1.0018008947372437,grad_norm: 0.7729768633915315, iteration: 352991
loss: 0.9726936221122742,grad_norm: 0.7844426104537519, iteration: 352992
loss: 1.0145796537399292,grad_norm: 0.753245976946041, iteration: 352993
loss: 0.9988906383514404,grad_norm: 0.7804472758922859, iteration: 352994
loss: 1.018291711807251,grad_norm: 0.9233441838112473, iteration: 352995
loss: 1.0268620252609253,grad_norm: 0.833362568819229, iteration: 352996
loss: 1.010088324546814,grad_norm: 0.8573152258730627, iteration: 352997
loss: 0.9990078210830688,grad_norm: 0.7501540687757281, iteration: 352998
loss: 0.9984151124954224,grad_norm: 0.9267513871992097, iteration: 352999
loss: 1.000229001045227,grad_norm: 0.8626562279300638, iteration: 353000
loss: 1.0099507570266724,grad_norm: 0.7160709274100021, iteration: 353001
loss: 1.023130178451538,grad_norm: 0.9500153025575974, iteration: 353002
loss: 0.9651004672050476,grad_norm: 0.8006895312027552, iteration: 353003
loss: 1.0243927240371704,grad_norm: 0.7165894958862089, iteration: 353004
loss: 1.051245093345642,grad_norm: 0.8916341319871596, iteration: 353005
loss: 0.9845044612884521,grad_norm: 0.9071448254048081, iteration: 353006
loss: 0.9663429856300354,grad_norm: 0.7573028322126898, iteration: 353007
loss: 1.0064383745193481,grad_norm: 0.7818058631413325, iteration: 353008
loss: 0.9782196879386902,grad_norm: 0.7216098655734751, iteration: 353009
loss: 1.0042062997817993,grad_norm: 0.9229399308445182, iteration: 353010
loss: 0.9773641228675842,grad_norm: 0.6845956488244571, iteration: 353011
loss: 0.9811471104621887,grad_norm: 0.9428310955654763, iteration: 353012
loss: 1.0212730169296265,grad_norm: 0.912217704438495, iteration: 353013
loss: 1.0146623849868774,grad_norm: 0.8688056433115874, iteration: 353014
loss: 1.0035051107406616,grad_norm: 0.9370525862343352, iteration: 353015
loss: 0.9906560778617859,grad_norm: 0.9285072334823011, iteration: 353016
loss: 1.1067198514938354,grad_norm: 1.000000027105864, iteration: 353017
loss: 1.0634136199951172,grad_norm: 0.9999993524527319, iteration: 353018
loss: 1.0009562969207764,grad_norm: 0.858482364292235, iteration: 353019
loss: 1.0615460872650146,grad_norm: 0.9999997640288131, iteration: 353020
loss: 0.9645751118659973,grad_norm: 0.7903447563429038, iteration: 353021
loss: 0.9910609722137451,grad_norm: 0.9517061080913541, iteration: 353022
loss: 1.059027075767517,grad_norm: 0.9999990129972033, iteration: 353023
loss: 0.9770409464836121,grad_norm: 0.7454219598372477, iteration: 353024
loss: 1.0201517343521118,grad_norm: 0.7677554008105523, iteration: 353025
loss: 0.9798300862312317,grad_norm: 0.7998315075401129, iteration: 353026
loss: 1.0109566450119019,grad_norm: 0.7153030778826124, iteration: 353027
loss: 1.0402857065200806,grad_norm: 0.8404665984259758, iteration: 353028
loss: 0.9854761958122253,grad_norm: 0.8544015597530874, iteration: 353029
loss: 1.003282904624939,grad_norm: 0.7197891629081117, iteration: 353030
loss: 1.0137860774993896,grad_norm: 0.7235598428388806, iteration: 353031
loss: 0.9976242184638977,grad_norm: 0.9999995931834128, iteration: 353032
loss: 0.9615881443023682,grad_norm: 0.9999992155189151, iteration: 353033
loss: 0.9897611737251282,grad_norm: 0.7419797808492153, iteration: 353034
loss: 0.9737458229064941,grad_norm: 0.9999991426286564, iteration: 353035
loss: 1.1154232025146484,grad_norm: 0.9999994674192272, iteration: 353036
loss: 0.9974368214607239,grad_norm: 0.7644980999452368, iteration: 353037
loss: 1.0650618076324463,grad_norm: 0.8547668010758257, iteration: 353038
loss: 0.9934724569320679,grad_norm: 0.8411261664083112, iteration: 353039
loss: 0.9596946835517883,grad_norm: 0.8474782793535596, iteration: 353040
loss: 1.042847990989685,grad_norm: 0.9991734072310673, iteration: 353041
loss: 0.9842756390571594,grad_norm: 0.9999991111430471, iteration: 353042
loss: 1.0218873023986816,grad_norm: 0.8306080569203566, iteration: 353043
loss: 0.9967637062072754,grad_norm: 0.9036864336458882, iteration: 353044
loss: 1.0036994218826294,grad_norm: 0.8056504778517855, iteration: 353045
loss: 0.9970796704292297,grad_norm: 0.7474186377059941, iteration: 353046
loss: 0.979303240776062,grad_norm: 0.9999990872618751, iteration: 353047
loss: 0.9902071952819824,grad_norm: 0.8511075030278676, iteration: 353048
loss: 1.399883508682251,grad_norm: 0.9999999029569979, iteration: 353049
loss: 0.984041690826416,grad_norm: 0.7736528958274782, iteration: 353050
loss: 1.0344170331954956,grad_norm: 0.7256123881141798, iteration: 353051
loss: 1.1413131952285767,grad_norm: 0.8558639118718514, iteration: 353052
loss: 0.9350003004074097,grad_norm: 0.8786699959412867, iteration: 353053
loss: 1.0337235927581787,grad_norm: 0.9004226710899663, iteration: 353054
loss: 0.9507358074188232,grad_norm: 0.9177688240243116, iteration: 353055
loss: 0.9998716711997986,grad_norm: 0.8737251418433448, iteration: 353056
loss: 1.0021586418151855,grad_norm: 0.6994878671454502, iteration: 353057
loss: 1.0086419582366943,grad_norm: 0.9575887612456829, iteration: 353058
loss: 0.9910941123962402,grad_norm: 0.8046197705956633, iteration: 353059
loss: 1.0351580381393433,grad_norm: 0.6585811610094056, iteration: 353060
loss: 1.0237185955047607,grad_norm: 0.9999995503703109, iteration: 353061
loss: 1.2546902894973755,grad_norm: 0.9999996590032088, iteration: 353062
loss: 1.0128874778747559,grad_norm: 0.9999993130219563, iteration: 353063
loss: 0.9929211139678955,grad_norm: 0.9338913249283449, iteration: 353064
loss: 1.00290846824646,grad_norm: 0.8289251551416331, iteration: 353065
loss: 1.0683276653289795,grad_norm: 1.0000000329676948, iteration: 353066
loss: 1.1153709888458252,grad_norm: 0.9999995427610432, iteration: 353067
loss: 1.0287806987762451,grad_norm: 0.8089136500314223, iteration: 353068
loss: 1.0316122770309448,grad_norm: 0.7836715180677275, iteration: 353069
loss: 0.9890974760055542,grad_norm: 0.9305573340633703, iteration: 353070
loss: 1.0157997608184814,grad_norm: 0.9999994377440968, iteration: 353071
loss: 1.178217887878418,grad_norm: 0.9999994889883101, iteration: 353072
loss: 1.1830835342407227,grad_norm: 0.9999996708471168, iteration: 353073
loss: 0.9752673506736755,grad_norm: 0.7864088603136844, iteration: 353074
loss: 0.9672841429710388,grad_norm: 0.9998212851806931, iteration: 353075
loss: 1.0721666812896729,grad_norm: 0.9999994153992494, iteration: 353076
loss: 1.159918189048767,grad_norm: 0.999999317142014, iteration: 353077
loss: 1.0390340089797974,grad_norm: 0.953682950712257, iteration: 353078
loss: 0.9607462286949158,grad_norm: 0.9999995919374257, iteration: 353079
loss: 0.9905322194099426,grad_norm: 0.8625195890001766, iteration: 353080
loss: 1.002606987953186,grad_norm: 0.9191725601840608, iteration: 353081
loss: 1.029923439025879,grad_norm: 0.9342701623151121, iteration: 353082
loss: 1.0077240467071533,grad_norm: 0.8268508613838655, iteration: 353083
loss: 1.0124400854110718,grad_norm: 0.689384482130944, iteration: 353084
loss: 0.9974571466445923,grad_norm: 0.8658820138630733, iteration: 353085
loss: 1.008862853050232,grad_norm: 0.9999989273571697, iteration: 353086
loss: 0.9700748920440674,grad_norm: 0.7604516609813353, iteration: 353087
loss: 0.9557433128356934,grad_norm: 0.8973626439880815, iteration: 353088
loss: 0.9724825024604797,grad_norm: 0.7728006498189744, iteration: 353089
loss: 0.9347813725471497,grad_norm: 0.9999989984700434, iteration: 353090
loss: 1.0218682289123535,grad_norm: 0.8768704331600792, iteration: 353091
loss: 1.0179210901260376,grad_norm: 0.8449560589665427, iteration: 353092
loss: 0.97316575050354,grad_norm: 0.7028591535292451, iteration: 353093
loss: 1.0194841623306274,grad_norm: 0.9240615243857613, iteration: 353094
loss: 1.065910816192627,grad_norm: 0.9999999570773953, iteration: 353095
loss: 0.9658759236335754,grad_norm: 0.8366665447300703, iteration: 353096
loss: 1.0023142099380493,grad_norm: 0.9999999893438318, iteration: 353097
loss: 1.0863800048828125,grad_norm: 0.9428136927649803, iteration: 353098
loss: 1.01448392868042,grad_norm: 0.8406517032262193, iteration: 353099
loss: 1.0226768255233765,grad_norm: 0.9543754204054862, iteration: 353100
loss: 1.0071595907211304,grad_norm: 0.7346411211996438, iteration: 353101
loss: 0.9852924346923828,grad_norm: 0.8733622165816916, iteration: 353102
loss: 1.0210206508636475,grad_norm: 0.7889179226964589, iteration: 353103
loss: 1.0077906847000122,grad_norm: 0.747472527753325, iteration: 353104
loss: 1.0471957921981812,grad_norm: 0.999999064671047, iteration: 353105
loss: 1.0330328941345215,grad_norm: 0.9930076409910246, iteration: 353106
loss: 0.9718364477157593,grad_norm: 0.9123468501231423, iteration: 353107
loss: 0.9985544681549072,grad_norm: 0.7359003616674833, iteration: 353108
loss: 0.9838663935661316,grad_norm: 0.8067483964826254, iteration: 353109
loss: 1.0041483640670776,grad_norm: 0.8750098860090688, iteration: 353110
loss: 1.0141404867172241,grad_norm: 0.8692077474764612, iteration: 353111
loss: 0.9983131289482117,grad_norm: 0.8774590283435414, iteration: 353112
loss: 0.9903690814971924,grad_norm: 0.985469172797594, iteration: 353113
loss: 1.0457429885864258,grad_norm: 0.9999991008577022, iteration: 353114
loss: 0.9778323769569397,grad_norm: 0.7318187150820721, iteration: 353115
loss: 1.0192513465881348,grad_norm: 0.872319679233849, iteration: 353116
loss: 1.0293244123458862,grad_norm: 0.9999997704071019, iteration: 353117
loss: 1.0547727346420288,grad_norm: 0.9999995548176024, iteration: 353118
loss: 0.9903755784034729,grad_norm: 0.9999997174354902, iteration: 353119
loss: 0.9896401762962341,grad_norm: 0.9999993787112528, iteration: 353120
loss: 0.9721856117248535,grad_norm: 0.83977249040692, iteration: 353121
loss: 0.9612153172492981,grad_norm: 0.9999999734137438, iteration: 353122
loss: 0.953568160533905,grad_norm: 0.8683357321773976, iteration: 353123
loss: 1.0016242265701294,grad_norm: 0.9142924235095086, iteration: 353124
loss: 0.9830847978591919,grad_norm: 0.7697908138191029, iteration: 353125
loss: 1.0299488306045532,grad_norm: 0.9999990566380328, iteration: 353126
loss: 1.0498652458190918,grad_norm: 0.9999989849889643, iteration: 353127
loss: 0.9992517828941345,grad_norm: 0.9046069746614587, iteration: 353128
loss: 0.9998482465744019,grad_norm: 0.8314269881628726, iteration: 353129
loss: 1.0232752561569214,grad_norm: 0.8191289077575249, iteration: 353130
loss: 0.9973307847976685,grad_norm: 0.7325526797902733, iteration: 353131
loss: 1.0611814260482788,grad_norm: 0.9999991201920253, iteration: 353132
loss: 0.9701594114303589,grad_norm: 0.776051712617462, iteration: 353133
loss: 1.012649416923523,grad_norm: 0.900297177528579, iteration: 353134
loss: 1.0250259637832642,grad_norm: 0.8469249166297477, iteration: 353135
loss: 1.0064678192138672,grad_norm: 0.756449543910916, iteration: 353136
loss: 0.9924401640892029,grad_norm: 0.8463141673474095, iteration: 353137
loss: 1.0359209775924683,grad_norm: 0.999999385144149, iteration: 353138
loss: 0.995933473110199,grad_norm: 0.7056538376392053, iteration: 353139
loss: 1.034445881843567,grad_norm: 0.9999994418111824, iteration: 353140
loss: 1.060992956161499,grad_norm: 0.9999993758937663, iteration: 353141
loss: 1.0459632873535156,grad_norm: 0.9999998189683778, iteration: 353142
loss: 1.0246336460113525,grad_norm: 0.739153992989708, iteration: 353143
loss: 0.9913110733032227,grad_norm: 0.9790282092553758, iteration: 353144
loss: 1.015245795249939,grad_norm: 0.8936006677429845, iteration: 353145
loss: 1.023587703704834,grad_norm: 0.7886941400766478, iteration: 353146
loss: 1.0008033514022827,grad_norm: 0.950099040914858, iteration: 353147
loss: 1.0172053575515747,grad_norm: 0.8071671084229479, iteration: 353148
loss: 1.028649091720581,grad_norm: 0.9999991689287788, iteration: 353149
loss: 0.999963641166687,grad_norm: 0.9053165642555426, iteration: 353150
loss: 0.9737374186515808,grad_norm: 0.8644473559549302, iteration: 353151
loss: 1.0586912631988525,grad_norm: 0.9999995385994774, iteration: 353152
loss: 1.041567325592041,grad_norm: 0.9999990980231125, iteration: 353153
loss: 1.0235003232955933,grad_norm: 0.7644800390093617, iteration: 353154
loss: 0.9788665771484375,grad_norm: 0.6921986030443554, iteration: 353155
loss: 0.9861115217208862,grad_norm: 0.8449250875095959, iteration: 353156
loss: 0.9938440918922424,grad_norm: 0.8381863036386825, iteration: 353157
loss: 0.9811385869979858,grad_norm: 0.7639184333447651, iteration: 353158
loss: 1.0126465559005737,grad_norm: 0.8769596231011901, iteration: 353159
loss: 1.0143508911132812,grad_norm: 0.9999991588762247, iteration: 353160
loss: 1.0124571323394775,grad_norm: 0.8144662526227279, iteration: 353161
loss: 1.0491290092468262,grad_norm: 0.999999013426036, iteration: 353162
loss: 1.0392247438430786,grad_norm: 0.8050157035237187, iteration: 353163
loss: 1.0222796201705933,grad_norm: 0.858172909674408, iteration: 353164
loss: 1.0323176383972168,grad_norm: 0.7987083540075995, iteration: 353165
loss: 0.942391037940979,grad_norm: 0.830494189485216, iteration: 353166
loss: 1.0345722436904907,grad_norm: 0.9878057756471794, iteration: 353167
loss: 0.9694714546203613,grad_norm: 0.9131740820219145, iteration: 353168
loss: 1.0058887004852295,grad_norm: 0.9907142275283881, iteration: 353169
loss: 0.9789249897003174,grad_norm: 0.7293837231450337, iteration: 353170
loss: 1.0589087009429932,grad_norm: 0.8127568813647784, iteration: 353171
loss: 1.0613470077514648,grad_norm: 0.9999991982253478, iteration: 353172
loss: 1.011386513710022,grad_norm: 0.8674263029311798, iteration: 353173
loss: 1.0242104530334473,grad_norm: 0.662630052860407, iteration: 353174
loss: 0.9685772657394409,grad_norm: 0.8553490509374144, iteration: 353175
loss: 1.015008568763733,grad_norm: 0.9328698899522381, iteration: 353176
loss: 1.0114984512329102,grad_norm: 0.7353653211522783, iteration: 353177
loss: 1.0065991878509521,grad_norm: 0.878386335077249, iteration: 353178
loss: 1.0327832698822021,grad_norm: 0.8914938989430515, iteration: 353179
loss: 0.9980661273002625,grad_norm: 0.9999991335291837, iteration: 353180
loss: 0.9455051422119141,grad_norm: 0.7075960173557464, iteration: 353181
loss: 1.008763313293457,grad_norm: 0.6965681278093274, iteration: 353182
loss: 0.9947093725204468,grad_norm: 0.9300794607309052, iteration: 353183
loss: 0.9831059575080872,grad_norm: 0.9388669022264069, iteration: 353184
loss: 0.9848282933235168,grad_norm: 0.8158806488206053, iteration: 353185
loss: 1.031583547592163,grad_norm: 0.9999993990238001, iteration: 353186
loss: 1.0153158903121948,grad_norm: 0.7290342495375919, iteration: 353187
loss: 1.0202407836914062,grad_norm: 0.9462173353938945, iteration: 353188
loss: 1.0021754503250122,grad_norm: 0.7530164692113404, iteration: 353189
loss: 0.9967010021209717,grad_norm: 0.8302871553680928, iteration: 353190
loss: 1.0156714916229248,grad_norm: 0.6854676513251269, iteration: 353191
loss: 0.9505130648612976,grad_norm: 0.7340633498899996, iteration: 353192
loss: 0.9963775277137756,grad_norm: 0.9014074756583627, iteration: 353193
loss: 1.0126030445098877,grad_norm: 0.9322097806719314, iteration: 353194
loss: 0.9828795790672302,grad_norm: 0.7677027097985921, iteration: 353195
loss: 0.9897715449333191,grad_norm: 0.7292379277205997, iteration: 353196
loss: 1.012364387512207,grad_norm: 0.9112403153040763, iteration: 353197
loss: 1.0309804677963257,grad_norm: 0.9999990576834216, iteration: 353198
loss: 0.9946486353874207,grad_norm: 0.8643693259628653, iteration: 353199
loss: 1.0105006694793701,grad_norm: 0.9879899383680253, iteration: 353200
loss: 0.9937061667442322,grad_norm: 0.8739445503579543, iteration: 353201
loss: 1.0117120742797852,grad_norm: 0.8846406058456296, iteration: 353202
loss: 0.9943935871124268,grad_norm: 0.7192553931264246, iteration: 353203
loss: 0.9888537526130676,grad_norm: 0.782867509370056, iteration: 353204
loss: 1.0591293573379517,grad_norm: 0.9074399747936137, iteration: 353205
loss: 0.984696090221405,grad_norm: 0.9446369788333195, iteration: 353206
loss: 0.9940573573112488,grad_norm: 0.7991492716190765, iteration: 353207
loss: 1.0172412395477295,grad_norm: 0.8327090327714431, iteration: 353208
loss: 0.9912298321723938,grad_norm: 0.6557624918405215, iteration: 353209
loss: 0.9711311459541321,grad_norm: 0.7418330384877773, iteration: 353210
loss: 1.0138696432113647,grad_norm: 0.779555019458918, iteration: 353211
loss: 0.9841126799583435,grad_norm: 0.7028885654960086, iteration: 353212
loss: 1.0518816709518433,grad_norm: 0.7789060781252342, iteration: 353213
loss: 0.9845525622367859,grad_norm: 0.7345493767544585, iteration: 353214
loss: 0.9809756875038147,grad_norm: 0.9353885539074366, iteration: 353215
loss: 1.05915105342865,grad_norm: 0.9999998215748803, iteration: 353216
loss: 0.9929056763648987,grad_norm: 0.767728748622514, iteration: 353217
loss: 1.0065447092056274,grad_norm: 0.7783052000659306, iteration: 353218
loss: 1.0192530155181885,grad_norm: 0.8226746294863189, iteration: 353219
loss: 1.0186774730682373,grad_norm: 0.6215491021537579, iteration: 353220
loss: 0.9867912530899048,grad_norm: 0.7595755514725807, iteration: 353221
loss: 1.012281894683838,grad_norm: 0.8148961871512808, iteration: 353222
loss: 1.017451524734497,grad_norm: 0.6936839049133481, iteration: 353223
loss: 0.9891419410705566,grad_norm: 0.9438532033189505, iteration: 353224
loss: 1.0459380149841309,grad_norm: 0.9999999753674306, iteration: 353225
loss: 1.0084588527679443,grad_norm: 0.9999994361983123, iteration: 353226
loss: 1.1106857061386108,grad_norm: 0.7716119182628475, iteration: 353227
loss: 0.9535608887672424,grad_norm: 0.9454035663494239, iteration: 353228
loss: 0.9909864068031311,grad_norm: 0.690558264260598, iteration: 353229
loss: 0.99128258228302,grad_norm: 0.943444546942066, iteration: 353230
loss: 0.9963458776473999,grad_norm: 0.8558242736158592, iteration: 353231
loss: 0.986141562461853,grad_norm: 0.9160129286479792, iteration: 353232
loss: 1.0135936737060547,grad_norm: 0.9844058979369659, iteration: 353233
loss: 1.046008586883545,grad_norm: 0.999999200138789, iteration: 353234
loss: 0.9828836917877197,grad_norm: 0.8359974266795562, iteration: 353235
loss: 1.0132192373275757,grad_norm: 0.7652085577814571, iteration: 353236
loss: 1.0369316339492798,grad_norm: 0.9999996596300172, iteration: 353237
loss: 1.007006287574768,grad_norm: 0.8998766794557947, iteration: 353238
loss: 1.0049264430999756,grad_norm: 0.7780824955350581, iteration: 353239
loss: 0.998253345489502,grad_norm: 0.896129418226601, iteration: 353240
loss: 1.004906177520752,grad_norm: 0.8826832826545328, iteration: 353241
loss: 0.9617411494255066,grad_norm: 0.9209679015911353, iteration: 353242
loss: 0.9881085157394409,grad_norm: 0.7505690554630107, iteration: 353243
loss: 0.9937078356742859,grad_norm: 0.9286975157131117, iteration: 353244
loss: 1.0082699060440063,grad_norm: 0.8023498711838762, iteration: 353245
loss: 1.0024856328964233,grad_norm: 0.7657334045366667, iteration: 353246
loss: 1.1122969388961792,grad_norm: 0.8460801322578091, iteration: 353247
loss: 1.0550419092178345,grad_norm: 0.9999998513206555, iteration: 353248
loss: 1.042810082435608,grad_norm: 0.799831288376345, iteration: 353249
loss: 0.9869604110717773,grad_norm: 0.7710374139882511, iteration: 353250
loss: 1.0230399370193481,grad_norm: 0.8025287954637333, iteration: 353251
loss: 0.9789513349533081,grad_norm: 0.7724278508557119, iteration: 353252
loss: 0.9852393865585327,grad_norm: 0.9999991366560858, iteration: 353253
loss: 1.0074886083602905,grad_norm: 0.9999999061423818, iteration: 353254
loss: 0.9873425364494324,grad_norm: 0.9590358279539773, iteration: 353255
loss: 1.0422459840774536,grad_norm: 0.7813910280945888, iteration: 353256
loss: 0.9872149229049683,grad_norm: 0.8933679636086425, iteration: 353257
loss: 1.0047065019607544,grad_norm: 0.772182991320211, iteration: 353258
loss: 0.9927399158477783,grad_norm: 0.906484676721498, iteration: 353259
loss: 1.0018751621246338,grad_norm: 0.9357407808375595, iteration: 353260
loss: 0.9666772484779358,grad_norm: 0.7510333287730387, iteration: 353261
loss: 1.0001354217529297,grad_norm: 0.8229069936966713, iteration: 353262
loss: 1.0124250650405884,grad_norm: 0.8574878286721378, iteration: 353263
loss: 0.9976619482040405,grad_norm: 0.8607652213360414, iteration: 353264
loss: 0.9948956370353699,grad_norm: 0.9160323924240364, iteration: 353265
loss: 0.9933377504348755,grad_norm: 0.8011646815000723, iteration: 353266
loss: 1.0143837928771973,grad_norm: 0.9706550023812729, iteration: 353267
loss: 0.9978925585746765,grad_norm: 0.869029228972534, iteration: 353268
loss: 0.963714063167572,grad_norm: 0.8466083829633606, iteration: 353269
loss: 0.9819148778915405,grad_norm: 0.7814712684882208, iteration: 353270
loss: 1.0040818452835083,grad_norm: 0.93713965038353, iteration: 353271
loss: 1.0529752969741821,grad_norm: 0.7456244460477489, iteration: 353272
loss: 0.9983099102973938,grad_norm: 0.7638779009652004, iteration: 353273
loss: 1.0172020196914673,grad_norm: 0.8887057213855054, iteration: 353274
loss: 1.034077763557434,grad_norm: 0.8388605515529254, iteration: 353275
loss: 0.9924145936965942,grad_norm: 0.7945113585748258, iteration: 353276
loss: 1.0556126832962036,grad_norm: 0.9999998449745662, iteration: 353277
loss: 0.9852442741394043,grad_norm: 0.7083535469958601, iteration: 353278
loss: 0.9651745557785034,grad_norm: 0.735364676213902, iteration: 353279
loss: 0.9702402949333191,grad_norm: 0.8660475656716426, iteration: 353280
loss: 0.9899395108222961,grad_norm: 0.9992224091203507, iteration: 353281
loss: 1.0647332668304443,grad_norm: 0.7268391579869328, iteration: 353282
loss: 0.9846452474594116,grad_norm: 0.719095325094418, iteration: 353283
loss: 1.016885757446289,grad_norm: 0.8495288094529482, iteration: 353284
loss: 1.0050023794174194,grad_norm: 0.7961608055120567, iteration: 353285
loss: 1.0276135206222534,grad_norm: 0.7732508905739653, iteration: 353286
loss: 1.0636186599731445,grad_norm: 0.9999995722984789, iteration: 353287
loss: 1.0034477710723877,grad_norm: 0.884438811960989, iteration: 353288
loss: 1.0130829811096191,grad_norm: 0.7762270417909228, iteration: 353289
loss: 0.9708974361419678,grad_norm: 0.9189962183402851, iteration: 353290
loss: 0.9554374814033508,grad_norm: 0.8106631763204453, iteration: 353291
loss: 0.9785796999931335,grad_norm: 0.9221242926677856, iteration: 353292
loss: 0.9935861229896545,grad_norm: 0.8654355786778085, iteration: 353293
loss: 1.06107497215271,grad_norm: 0.9999999184139998, iteration: 353294
loss: 0.9982686042785645,grad_norm: 0.8910538749884259, iteration: 353295
loss: 1.0173701047897339,grad_norm: 0.9999993905291191, iteration: 353296
loss: 0.9925789833068848,grad_norm: 0.8064357661989567, iteration: 353297
loss: 0.9760655760765076,grad_norm: 0.7820713735182248, iteration: 353298
loss: 1.0366415977478027,grad_norm: 0.866731098858503, iteration: 353299
loss: 1.032455563545227,grad_norm: 0.8552669754244239, iteration: 353300
loss: 1.087191104888916,grad_norm: 0.9999991581871954, iteration: 353301
loss: 1.0008538961410522,grad_norm: 0.8286838566952656, iteration: 353302
loss: 1.001413345336914,grad_norm: 0.9980685879558127, iteration: 353303
loss: 0.9791967868804932,grad_norm: 0.9999992569885905, iteration: 353304
loss: 1.0069193840026855,grad_norm: 0.7256337995246112, iteration: 353305
loss: 1.0087709426879883,grad_norm: 0.8607607166889554, iteration: 353306
loss: 1.023473858833313,grad_norm: 0.9999990711528401, iteration: 353307
loss: 0.9898802638053894,grad_norm: 0.8715183463429442, iteration: 353308
loss: 0.9951984882354736,grad_norm: 0.8640416017447754, iteration: 353309
loss: 1.0078405141830444,grad_norm: 0.7498138644697222, iteration: 353310
loss: 1.012222170829773,grad_norm: 0.73998024339386, iteration: 353311
loss: 1.0025746822357178,grad_norm: 0.7125112845303393, iteration: 353312
loss: 0.978036642074585,grad_norm: 0.7414542988070716, iteration: 353313
loss: 0.9832630157470703,grad_norm: 0.7624021727477222, iteration: 353314
loss: 0.9893825650215149,grad_norm: 0.7530117916434578, iteration: 353315
loss: 1.0012696981430054,grad_norm: 0.6765189647422551, iteration: 353316
loss: 0.9924283623695374,grad_norm: 0.9324568651602528, iteration: 353317
loss: 0.9690353274345398,grad_norm: 0.7352420298666532, iteration: 353318
loss: 0.9888504147529602,grad_norm: 0.7569350352840175, iteration: 353319
loss: 1.0085394382476807,grad_norm: 0.9625016842336863, iteration: 353320
loss: 1.0151814222335815,grad_norm: 0.8289635560417142, iteration: 353321
loss: 1.0155881643295288,grad_norm: 0.9999990804895694, iteration: 353322
loss: 0.9906171560287476,grad_norm: 0.8310545270385739, iteration: 353323
loss: 1.0100947618484497,grad_norm: 0.9999994786415497, iteration: 353324
loss: 1.0216830968856812,grad_norm: 0.8399580523233772, iteration: 353325
loss: 0.9697344303131104,grad_norm: 0.7843492708845828, iteration: 353326
loss: 1.0199469327926636,grad_norm: 0.8478982539780746, iteration: 353327
loss: 1.0275460481643677,grad_norm: 0.8309049977756707, iteration: 353328
loss: 0.9919692873954773,grad_norm: 0.7048196257850041, iteration: 353329
loss: 1.032716155052185,grad_norm: 0.8138484430469997, iteration: 353330
loss: 1.0004215240478516,grad_norm: 0.7853640852037119, iteration: 353331
loss: 0.9885993003845215,grad_norm: 0.999999046230342, iteration: 353332
loss: 1.034199833869934,grad_norm: 0.9524007715746317, iteration: 353333
loss: 0.9653360843658447,grad_norm: 0.7585539267820618, iteration: 353334
loss: 0.990936815738678,grad_norm: 0.7537234167686744, iteration: 353335
loss: 0.9985970854759216,grad_norm: 0.8730898311126308, iteration: 353336
loss: 1.0328316688537598,grad_norm: 0.9075513746166286, iteration: 353337
loss: 1.0346037149429321,grad_norm: 0.9818462792231699, iteration: 353338
loss: 0.9858343005180359,grad_norm: 0.7156155982832529, iteration: 353339
loss: 0.9609925746917725,grad_norm: 0.7930485041167258, iteration: 353340
loss: 0.980032205581665,grad_norm: 0.905795357023134, iteration: 353341
loss: 1.0102624893188477,grad_norm: 0.7116521047823843, iteration: 353342
loss: 1.0152932405471802,grad_norm: 0.8498505257930626, iteration: 353343
loss: 1.018467664718628,grad_norm: 0.7452153502289831, iteration: 353344
loss: 1.0146057605743408,grad_norm: 0.8617950001740107, iteration: 353345
loss: 0.9841215014457703,grad_norm: 0.8148091183845044, iteration: 353346
loss: 1.095641016960144,grad_norm: 0.9124024158659649, iteration: 353347
loss: 0.9777553677558899,grad_norm: 0.8625035482219294, iteration: 353348
loss: 0.9800043702125549,grad_norm: 0.7890844607642732, iteration: 353349
loss: 1.0131248235702515,grad_norm: 0.8362087841631427, iteration: 353350
loss: 1.0046476125717163,grad_norm: 0.7133929495730976, iteration: 353351
loss: 1.0032806396484375,grad_norm: 0.7670107571900661, iteration: 353352
loss: 1.0150721073150635,grad_norm: 0.8224010768339554, iteration: 353353
loss: 0.9836692214012146,grad_norm: 0.8391639770441182, iteration: 353354
loss: 0.981415331363678,grad_norm: 0.8088204796320211, iteration: 353355
loss: 1.0214388370513916,grad_norm: 0.8588439723008918, iteration: 353356
loss: 0.991852879524231,grad_norm: 0.9478470362753004, iteration: 353357
loss: 1.018941879272461,grad_norm: 0.99999906114783, iteration: 353358
loss: 1.0849335193634033,grad_norm: 0.9999997488113888, iteration: 353359
loss: 0.9907700419425964,grad_norm: 0.7596603616658698, iteration: 353360
loss: 0.9707757830619812,grad_norm: 0.7588217260545752, iteration: 353361
loss: 0.9795638918876648,grad_norm: 0.8073105775737257, iteration: 353362
loss: 1.0107171535491943,grad_norm: 0.9555931947702474, iteration: 353363
loss: 0.9790894389152527,grad_norm: 0.7590172538995537, iteration: 353364
loss: 0.9938673377037048,grad_norm: 0.7379510388004058, iteration: 353365
loss: 0.9912857413291931,grad_norm: 0.8278888405826378, iteration: 353366
loss: 0.9694409370422363,grad_norm: 0.9999990199821166, iteration: 353367
loss: 0.9342648386955261,grad_norm: 0.9046105722636533, iteration: 353368
loss: 0.9803428053855896,grad_norm: 0.9999991901042888, iteration: 353369
loss: 0.9993644952774048,grad_norm: 0.7324716850780747, iteration: 353370
loss: 1.0101250410079956,grad_norm: 0.8318153943382414, iteration: 353371
loss: 1.0625108480453491,grad_norm: 0.9020099706084935, iteration: 353372
loss: 1.033307671546936,grad_norm: 0.9999990502225736, iteration: 353373
loss: 1.0095411539077759,grad_norm: 0.9124068575458932, iteration: 353374
loss: 0.9911863207817078,grad_norm: 0.9999990767546122, iteration: 353375
loss: 0.9642428159713745,grad_norm: 0.6864679421956118, iteration: 353376
loss: 0.9787417054176331,grad_norm: 0.9481990042122453, iteration: 353377
loss: 1.1106581687927246,grad_norm: 0.9999991272688163, iteration: 353378
loss: 1.007716417312622,grad_norm: 0.8956042390392233, iteration: 353379
loss: 1.0304945707321167,grad_norm: 0.8790459996817745, iteration: 353380
loss: 1.037672996520996,grad_norm: 0.8017360419457737, iteration: 353381
loss: 1.0029048919677734,grad_norm: 0.9546789427929767, iteration: 353382
loss: 1.0140846967697144,grad_norm: 0.7202265977045467, iteration: 353383
loss: 0.9873050451278687,grad_norm: 0.9581110397791006, iteration: 353384
loss: 1.204736590385437,grad_norm: 0.9999996793662277, iteration: 353385
loss: 1.021972417831421,grad_norm: 0.8362551789147064, iteration: 353386
loss: 0.9600990414619446,grad_norm: 0.9130862794015552, iteration: 353387
loss: 0.9647574424743652,grad_norm: 0.8019085403369375, iteration: 353388
loss: 0.9530854821205139,grad_norm: 0.7048333500825562, iteration: 353389
loss: 0.9604374766349792,grad_norm: 0.6934506796895874, iteration: 353390
loss: 1.0222021341323853,grad_norm: 0.7098691541920421, iteration: 353391
loss: 0.9885305166244507,grad_norm: 0.8552342820549159, iteration: 353392
loss: 0.9944031238555908,grad_norm: 0.9999991457052505, iteration: 353393
loss: 1.0423533916473389,grad_norm: 0.92859897279173, iteration: 353394
loss: 0.9811729788780212,grad_norm: 0.8614588432493986, iteration: 353395
loss: 0.9951772689819336,grad_norm: 0.8645887569848627, iteration: 353396
loss: 0.987113893032074,grad_norm: 0.7628920084173122, iteration: 353397
loss: 0.9849492311477661,grad_norm: 0.7812741897306298, iteration: 353398
loss: 1.0227669477462769,grad_norm: 0.9999995359907006, iteration: 353399
loss: 1.0089643001556396,grad_norm: 0.7229134416079802, iteration: 353400
loss: 1.0124231576919556,grad_norm: 0.9281375841949044, iteration: 353401
loss: 0.9868272542953491,grad_norm: 0.8914670852067244, iteration: 353402
loss: 0.9456658363342285,grad_norm: 0.80198305022709, iteration: 353403
loss: 0.9767979383468628,grad_norm: 0.733705568216534, iteration: 353404
loss: 1.0170761346817017,grad_norm: 0.7805176716983453, iteration: 353405
loss: 0.9705025553703308,grad_norm: 0.7971898457287563, iteration: 353406
loss: 1.066793441772461,grad_norm: 0.9999992087694574, iteration: 353407
loss: 0.9859783053398132,grad_norm: 0.772893464942613, iteration: 353408
loss: 0.9654389023780823,grad_norm: 0.7429948187494233, iteration: 353409
loss: 0.9574454426765442,grad_norm: 0.7015561750086872, iteration: 353410
loss: 1.0281575918197632,grad_norm: 0.8247413802943043, iteration: 353411
loss: 1.0018398761749268,grad_norm: 0.9505883011293432, iteration: 353412
loss: 0.9942703247070312,grad_norm: 0.8386498122984221, iteration: 353413
loss: 1.02337646484375,grad_norm: 0.9999994679821494, iteration: 353414
loss: 1.0041478872299194,grad_norm: 0.9687213579777483, iteration: 353415
loss: 0.9801951050758362,grad_norm: 0.8847078657908656, iteration: 353416
loss: 0.9750673770904541,grad_norm: 0.8251434355304357, iteration: 353417
loss: 0.9966195225715637,grad_norm: 0.8655015676951251, iteration: 353418
loss: 1.0217686891555786,grad_norm: 0.9486068415466375, iteration: 353419
loss: 1.0375312566757202,grad_norm: 0.7571418600163358, iteration: 353420
loss: 1.0194429159164429,grad_norm: 0.952721634750179, iteration: 353421
loss: 1.0000638961791992,grad_norm: 0.7869298584102212, iteration: 353422
loss: 1.0104848146438599,grad_norm: 0.8042089129187043, iteration: 353423
loss: 1.0138427019119263,grad_norm: 0.7554881261670133, iteration: 353424
loss: 0.9883947372436523,grad_norm: 0.7777560358554738, iteration: 353425
loss: 1.0030384063720703,grad_norm: 0.9999999523934627, iteration: 353426
loss: 1.0575944185256958,grad_norm: 0.9999994147683503, iteration: 353427
loss: 0.9919084906578064,grad_norm: 0.6437970327969853, iteration: 353428
loss: 1.0157545804977417,grad_norm: 0.9999991152324453, iteration: 353429
loss: 1.0131549835205078,grad_norm: 0.734057495567616, iteration: 353430
loss: 1.0602797269821167,grad_norm: 0.9821617382807045, iteration: 353431
loss: 1.0337927341461182,grad_norm: 0.804493610904534, iteration: 353432
loss: 0.9849855303764343,grad_norm: 0.9313637398529614, iteration: 353433
loss: 0.9684884548187256,grad_norm: 0.9930485729379457, iteration: 353434
loss: 1.0229190587997437,grad_norm: 0.7402498154027427, iteration: 353435
loss: 1.0610370635986328,grad_norm: 0.7551376024738495, iteration: 353436
loss: 1.049612045288086,grad_norm: 0.9999997830961509, iteration: 353437
loss: 1.0129425525665283,grad_norm: 0.881177138613463, iteration: 353438
loss: 0.9786316752433777,grad_norm: 0.9999998632493008, iteration: 353439
loss: 1.3278521299362183,grad_norm: 0.999999671469113, iteration: 353440
loss: 1.010262131690979,grad_norm: 0.9039185221752668, iteration: 353441
loss: 0.9692603349685669,grad_norm: 0.9999990984167461, iteration: 353442
loss: 1.0166103839874268,grad_norm: 0.8445463495313892, iteration: 353443
loss: 0.9848846197128296,grad_norm: 0.7847189874228373, iteration: 353444
loss: 1.0071475505828857,grad_norm: 0.9999991702778821, iteration: 353445
loss: 1.0135023593902588,grad_norm: 0.9999999425866638, iteration: 353446
loss: 0.9709392786026001,grad_norm: 0.7895733413010144, iteration: 353447
loss: 0.9867035150527954,grad_norm: 0.8276376872935965, iteration: 353448
loss: 1.2295444011688232,grad_norm: 0.9999995279743936, iteration: 353449
loss: 1.0364863872528076,grad_norm: 0.9999999069782745, iteration: 353450
loss: 1.1457599401474,grad_norm: 0.9999998301234372, iteration: 353451
loss: 1.0181121826171875,grad_norm: 0.999999115313435, iteration: 353452
loss: 0.9947519898414612,grad_norm: 0.7765165034888358, iteration: 353453
loss: 0.9776961803436279,grad_norm: 0.7850922784779981, iteration: 353454
loss: 0.9987112879753113,grad_norm: 0.8312465613514324, iteration: 353455
loss: 1.1813557147979736,grad_norm: 0.9999998492249933, iteration: 353456
loss: 0.9884471297264099,grad_norm: 0.885573902209385, iteration: 353457
loss: 1.0743104219436646,grad_norm: 0.9999994841252823, iteration: 353458
loss: 1.118548035621643,grad_norm: 0.9999998771251304, iteration: 353459
loss: 0.9842983484268188,grad_norm: 0.9053449990299399, iteration: 353460
loss: 0.9830285310745239,grad_norm: 0.9999992525384942, iteration: 353461
loss: 0.9996405243873596,grad_norm: 0.8310719411079012, iteration: 353462
loss: 1.0248072147369385,grad_norm: 0.7823979311567729, iteration: 353463
loss: 0.9671428799629211,grad_norm: 0.6644577841383017, iteration: 353464
loss: 1.0309107303619385,grad_norm: 0.9999998005775502, iteration: 353465
loss: 1.0234897136688232,grad_norm: 0.9219332358070594, iteration: 353466
loss: 1.03010892868042,grad_norm: 0.9999993353573285, iteration: 353467
loss: 1.0016937255859375,grad_norm: 0.9999990716759897, iteration: 353468
loss: 1.0321630239486694,grad_norm: 0.9999994142800689, iteration: 353469
loss: 1.098170280456543,grad_norm: 0.8083484995102629, iteration: 353470
loss: 1.11260187625885,grad_norm: 0.9378633138348638, iteration: 353471
loss: 1.017482876777649,grad_norm: 0.6896424228382114, iteration: 353472
loss: 1.013451337814331,grad_norm: 0.9999990076647424, iteration: 353473
loss: 0.9848873615264893,grad_norm: 0.9362843093377634, iteration: 353474
loss: 1.0031089782714844,grad_norm: 0.8240937337477864, iteration: 353475
loss: 1.023230791091919,grad_norm: 0.9999991924150595, iteration: 353476
loss: 1.0075377225875854,grad_norm: 0.7457954257219293, iteration: 353477
loss: 0.9952571988105774,grad_norm: 0.9999992047475063, iteration: 353478
loss: 1.0043221712112427,grad_norm: 0.8158283689421193, iteration: 353479
loss: 1.0041433572769165,grad_norm: 0.6627413390919076, iteration: 353480
loss: 0.9775241017341614,grad_norm: 0.9208927833005033, iteration: 353481
loss: 1.0821704864501953,grad_norm: 0.8460855901516814, iteration: 353482
loss: 0.9859947562217712,grad_norm: 0.7958360505001216, iteration: 353483
loss: 0.9798955321311951,grad_norm: 0.7660227280291615, iteration: 353484
loss: 1.0025678873062134,grad_norm: 0.9628623501875104, iteration: 353485
loss: 1.0891146659851074,grad_norm: 0.9999994677165301, iteration: 353486
loss: 1.0255520343780518,grad_norm: 0.864487067926967, iteration: 353487
loss: 1.0519806146621704,grad_norm: 0.9331692974283551, iteration: 353488
loss: 0.9686468839645386,grad_norm: 0.8212257768676122, iteration: 353489
loss: 1.0286171436309814,grad_norm: 0.8976353087491173, iteration: 353490
loss: 1.0002135038375854,grad_norm: 0.8263067802564796, iteration: 353491
loss: 1.0018506050109863,grad_norm: 0.7343979732852075, iteration: 353492
loss: 0.9969339966773987,grad_norm: 0.9999995918236158, iteration: 353493
loss: 1.1746290922164917,grad_norm: 0.9999997508812762, iteration: 353494
loss: 1.050246000289917,grad_norm: 0.9675671195611688, iteration: 353495
loss: 1.0110361576080322,grad_norm: 0.9999991930787895, iteration: 353496
loss: 0.992368757724762,grad_norm: 0.7653536175447246, iteration: 353497
loss: 1.022782564163208,grad_norm: 0.999999373931315, iteration: 353498
loss: 0.9702816605567932,grad_norm: 0.7567262824807934, iteration: 353499
loss: 0.9996369481086731,grad_norm: 0.8182255701714171, iteration: 353500
loss: 1.033434510231018,grad_norm: 0.7458107366209882, iteration: 353501
loss: 1.1896027326583862,grad_norm: 0.9999995578426337, iteration: 353502
loss: 1.002137303352356,grad_norm: 0.9510594044283124, iteration: 353503
loss: 1.0068156719207764,grad_norm: 0.8684608678116633, iteration: 353504
loss: 1.041859745979309,grad_norm: 0.7633038610532088, iteration: 353505
loss: 1.0013082027435303,grad_norm: 0.874199021024693, iteration: 353506
loss: 1.0043624639511108,grad_norm: 0.7737787838626842, iteration: 353507
loss: 1.0356104373931885,grad_norm: 0.774351348896463, iteration: 353508
loss: 0.9961205720901489,grad_norm: 0.6661667765963089, iteration: 353509
loss: 1.0089473724365234,grad_norm: 0.9563836045512754, iteration: 353510
loss: 1.0226186513900757,grad_norm: 0.8279496229786057, iteration: 353511
loss: 0.9678903222084045,grad_norm: 0.9999990356387345, iteration: 353512
loss: 1.0121568441390991,grad_norm: 0.9008915116135222, iteration: 353513
loss: 0.9781132936477661,grad_norm: 0.8137989456821936, iteration: 353514
loss: 0.9841743111610413,grad_norm: 0.7888272761827331, iteration: 353515
loss: 0.9892714023590088,grad_norm: 0.805440779391292, iteration: 353516
loss: 0.981296181678772,grad_norm: 0.7050269761053466, iteration: 353517
loss: 0.981105625629425,grad_norm: 0.864012541131362, iteration: 353518
loss: 1.0767349004745483,grad_norm: 0.999999852788847, iteration: 353519
loss: 1.0070509910583496,grad_norm: 0.811524317744834, iteration: 353520
loss: 1.0197455883026123,grad_norm: 0.8204571829823293, iteration: 353521
loss: 0.9887512922286987,grad_norm: 0.8690979749189525, iteration: 353522
loss: 1.0399092435836792,grad_norm: 0.6460154426330758, iteration: 353523
loss: 1.0160431861877441,grad_norm: 0.9999990938504646, iteration: 353524
loss: 1.0148489475250244,grad_norm: 0.7794033029044499, iteration: 353525
loss: 1.0250757932662964,grad_norm: 0.7279853502746314, iteration: 353526
loss: 0.9997647404670715,grad_norm: 0.7938924992009282, iteration: 353527
loss: 0.9744202494621277,grad_norm: 0.9326401821745063, iteration: 353528
loss: 1.0513489246368408,grad_norm: 0.814814418059388, iteration: 353529
loss: 0.9654749631881714,grad_norm: 0.7754530896786472, iteration: 353530
loss: 1.0004316568374634,grad_norm: 0.9100901975380444, iteration: 353531
loss: 0.9850643873214722,grad_norm: 0.6771366514570095, iteration: 353532
loss: 0.962805449962616,grad_norm: 0.780074038520324, iteration: 353533
loss: 1.0541131496429443,grad_norm: 0.8670853052877016, iteration: 353534
loss: 0.9798475503921509,grad_norm: 0.7557739780340302, iteration: 353535
loss: 0.989811897277832,grad_norm: 0.7009826146939658, iteration: 353536
loss: 1.0214905738830566,grad_norm: 0.8138320096012585, iteration: 353537
loss: 1.0391684770584106,grad_norm: 0.771610031577064, iteration: 353538
loss: 1.0792670249938965,grad_norm: 0.9999998456157305, iteration: 353539
loss: 1.0035138130187988,grad_norm: 0.8539497314354145, iteration: 353540
loss: 0.9974474906921387,grad_norm: 0.8130364345318588, iteration: 353541
loss: 0.9820472002029419,grad_norm: 0.8102099890697375, iteration: 353542
loss: 1.0011472702026367,grad_norm: 0.9463916647120845, iteration: 353543
loss: 1.0311808586120605,grad_norm: 0.810213582455955, iteration: 353544
loss: 1.0187158584594727,grad_norm: 0.6965591608756825, iteration: 353545
loss: 1.004967212677002,grad_norm: 0.7253955672874304, iteration: 353546
loss: 1.0373159646987915,grad_norm: 0.8433056562576908, iteration: 353547
loss: 0.992933988571167,grad_norm: 0.8412449863426422, iteration: 353548
loss: 0.9939591288566589,grad_norm: 0.802134176545839, iteration: 353549
loss: 0.9810711145401001,grad_norm: 0.9999998761217732, iteration: 353550
loss: 1.013961911201477,grad_norm: 0.6984484122927297, iteration: 353551
loss: 1.0476794242858887,grad_norm: 0.9999992789502048, iteration: 353552
loss: 1.031806468963623,grad_norm: 0.9999995213649041, iteration: 353553
loss: 0.9824382066726685,grad_norm: 0.8739693434188816, iteration: 353554
loss: 1.0379246473312378,grad_norm: 0.9999991507621362, iteration: 353555
loss: 1.032667875289917,grad_norm: 0.8607665021945982, iteration: 353556
loss: 1.0091137886047363,grad_norm: 0.7949842639719905, iteration: 353557
loss: 0.982557475566864,grad_norm: 0.7445233817981484, iteration: 353558
loss: 0.9900684356689453,grad_norm: 0.9999990985841987, iteration: 353559
loss: 0.9509353637695312,grad_norm: 0.8087830775523462, iteration: 353560
loss: 1.0120364427566528,grad_norm: 0.7360814505637417, iteration: 353561
loss: 0.9982526898384094,grad_norm: 0.9999994908757003, iteration: 353562
loss: 1.002143144607544,grad_norm: 0.7700927692929218, iteration: 353563
loss: 1.0205535888671875,grad_norm: 0.7981314201924746, iteration: 353564
loss: 1.0059646368026733,grad_norm: 0.7862475697272384, iteration: 353565
loss: 1.0282771587371826,grad_norm: 0.9999991646198104, iteration: 353566
loss: 0.9703821539878845,grad_norm: 0.7720290051788612, iteration: 353567
loss: 0.9868974089622498,grad_norm: 0.905196840547698, iteration: 353568
loss: 1.0133570432662964,grad_norm: 0.7423493892469278, iteration: 353569
loss: 0.9942877292633057,grad_norm: 0.8166448582910324, iteration: 353570
loss: 1.0025585889816284,grad_norm: 0.864741800490197, iteration: 353571
loss: 1.024353265762329,grad_norm: 0.9999996218863845, iteration: 353572
loss: 0.9902286529541016,grad_norm: 0.738550705122954, iteration: 353573
loss: 1.0065873861312866,grad_norm: 0.795168581582041, iteration: 353574
loss: 1.0122456550598145,grad_norm: 0.9554971500975851, iteration: 353575
loss: 0.9947236180305481,grad_norm: 0.8783350682401538, iteration: 353576
loss: 0.9822176694869995,grad_norm: 0.7873743349233278, iteration: 353577
loss: 1.000329613685608,grad_norm: 0.977377222823598, iteration: 353578
loss: 1.0070345401763916,grad_norm: 0.6750556963460251, iteration: 353579
loss: 1.1014409065246582,grad_norm: 0.9999994152141684, iteration: 353580
loss: 1.0409126281738281,grad_norm: 0.7617819212642432, iteration: 353581
loss: 1.1722491979599,grad_norm: 0.8814475666571439, iteration: 353582
loss: 1.0111472606658936,grad_norm: 0.7747412356002557, iteration: 353583
loss: 0.9726075530052185,grad_norm: 0.6956720624299166, iteration: 353584
loss: 0.9751519560813904,grad_norm: 0.7926855623169397, iteration: 353585
loss: 1.0559494495391846,grad_norm: 0.8302948978151684, iteration: 353586
loss: 1.0062322616577148,grad_norm: 0.9999990526908898, iteration: 353587
loss: 1.0015380382537842,grad_norm: 0.8343110041130616, iteration: 353588
loss: 0.9870660901069641,grad_norm: 0.828995811193919, iteration: 353589
loss: 0.9897734522819519,grad_norm: 0.7951589924181566, iteration: 353590
loss: 1.0044448375701904,grad_norm: 0.8247155393675664, iteration: 353591
loss: 1.0079853534698486,grad_norm: 0.7622680263993559, iteration: 353592
loss: 1.0244166851043701,grad_norm: 0.7528094865120366, iteration: 353593
loss: 0.9935322999954224,grad_norm: 0.82118550868193, iteration: 353594
loss: 0.9950645565986633,grad_norm: 0.7974159526427069, iteration: 353595
loss: 1.0000585317611694,grad_norm: 0.8311436388146181, iteration: 353596
loss: 1.0837193727493286,grad_norm: 0.9999992370820211, iteration: 353597
loss: 1.0705829858779907,grad_norm: 0.9999999638431487, iteration: 353598
loss: 1.000004768371582,grad_norm: 0.882312291843695, iteration: 353599
loss: 0.9879549741744995,grad_norm: 0.8672436674456466, iteration: 353600
loss: 0.9793193340301514,grad_norm: 0.7873811594496466, iteration: 353601
loss: 1.0369023084640503,grad_norm: 0.9999991858149163, iteration: 353602
loss: 1.0473114252090454,grad_norm: 0.9999993609020293, iteration: 353603
loss: 0.9947736859321594,grad_norm: 0.7577405061885012, iteration: 353604
loss: 1.036989450454712,grad_norm: 0.9999995321752158, iteration: 353605
loss: 0.9874810576438904,grad_norm: 0.7499557490274172, iteration: 353606
loss: 1.0265147686004639,grad_norm: 0.8810201816046923, iteration: 353607
loss: 1.030290961265564,grad_norm: 0.7973430819291083, iteration: 353608
loss: 0.9960191249847412,grad_norm: 0.9028341513395367, iteration: 353609
loss: 1.0161393880844116,grad_norm: 0.999999211520831, iteration: 353610
loss: 1.0664942264556885,grad_norm: 0.9999995729064622, iteration: 353611
loss: 0.982965350151062,grad_norm: 0.7823979810425887, iteration: 353612
loss: 1.038957118988037,grad_norm: 0.8665318867910903, iteration: 353613
loss: 0.9881515502929688,grad_norm: 0.7360528290427566, iteration: 353614
loss: 1.0240883827209473,grad_norm: 0.8591776825217972, iteration: 353615
loss: 1.0026366710662842,grad_norm: 0.8089042254176352, iteration: 353616
loss: 1.0003249645233154,grad_norm: 0.7512459671968457, iteration: 353617
loss: 0.9946150183677673,grad_norm: 0.7656877723022856, iteration: 353618
loss: 0.9865885376930237,grad_norm: 0.8824375600502452, iteration: 353619
loss: 1.0124179124832153,grad_norm: 0.8695113167940064, iteration: 353620
loss: 1.0179941654205322,grad_norm: 0.7412064985243845, iteration: 353621
loss: 1.0185878276824951,grad_norm: 0.9999991130633352, iteration: 353622
loss: 1.0210646390914917,grad_norm: 0.8630415733725574, iteration: 353623
loss: 1.0049934387207031,grad_norm: 0.8207526816413406, iteration: 353624
loss: 1.1200557947158813,grad_norm: 0.9999991792966847, iteration: 353625
loss: 0.9684833884239197,grad_norm: 0.8304712425436058, iteration: 353626
loss: 1.0153757333755493,grad_norm: 0.7455784202361773, iteration: 353627
loss: 0.9830297231674194,grad_norm: 0.9999989715843552, iteration: 353628
loss: 1.0132447481155396,grad_norm: 0.8050162702593847, iteration: 353629
loss: 1.016223669052124,grad_norm: 0.976224401653946, iteration: 353630
loss: 0.9719883799552917,grad_norm: 0.9325588765177656, iteration: 353631
loss: 0.9930511116981506,grad_norm: 0.8783046527242343, iteration: 353632
loss: 1.021848440170288,grad_norm: 0.7542790586565119, iteration: 353633
loss: 1.0822781324386597,grad_norm: 0.9999998810416332, iteration: 353634
loss: 1.0072166919708252,grad_norm: 0.9999997129366985, iteration: 353635
loss: 1.0423861742019653,grad_norm: 0.9999997185965572, iteration: 353636
loss: 1.0093520879745483,grad_norm: 0.9336706413199035, iteration: 353637
loss: 1.076196551322937,grad_norm: 0.9224961292300141, iteration: 353638
loss: 0.978120744228363,grad_norm: 0.7527490950910675, iteration: 353639
loss: 0.9848504066467285,grad_norm: 0.6200809180482548, iteration: 353640
loss: 0.9805532097816467,grad_norm: 0.8393883104553124, iteration: 353641
loss: 0.9704293608665466,grad_norm: 0.7013664892214665, iteration: 353642
loss: 1.1342887878417969,grad_norm: 0.9999998947339863, iteration: 353643
loss: 0.9722287058830261,grad_norm: 0.8367551063780294, iteration: 353644
loss: 0.9892215132713318,grad_norm: 0.8302445895018988, iteration: 353645
loss: 1.065953254699707,grad_norm: 0.9104091209643322, iteration: 353646
loss: 0.9779044389724731,grad_norm: 0.9244326464426662, iteration: 353647
loss: 0.9505656361579895,grad_norm: 0.8878655498383542, iteration: 353648
loss: 0.9606212973594666,grad_norm: 0.9689920096554611, iteration: 353649
loss: 0.9892650246620178,grad_norm: 0.994539275207252, iteration: 353650
loss: 1.1003905534744263,grad_norm: 0.9999990972269532, iteration: 353651
loss: 1.0243072509765625,grad_norm: 0.7383109004302565, iteration: 353652
loss: 1.0449247360229492,grad_norm: 0.9999992049392219, iteration: 353653
loss: 0.9711216688156128,grad_norm: 0.9999991487616678, iteration: 353654
loss: 0.9884741902351379,grad_norm: 0.850588210942451, iteration: 353655
loss: 1.055250883102417,grad_norm: 0.9999994451411635, iteration: 353656
loss: 0.9716845154762268,grad_norm: 0.8014499084317084, iteration: 353657
loss: 1.0028232336044312,grad_norm: 0.7305513359541164, iteration: 353658
loss: 0.9988541603088379,grad_norm: 0.9830345519098572, iteration: 353659
loss: 1.0704830884933472,grad_norm: 0.8277104507443462, iteration: 353660
loss: 1.0647355318069458,grad_norm: 0.7799354440599894, iteration: 353661
loss: 1.0743660926818848,grad_norm: 0.9172667434590641, iteration: 353662
loss: 0.9725764989852905,grad_norm: 0.8462395717649062, iteration: 353663
loss: 1.0406681299209595,grad_norm: 0.9999992803143457, iteration: 353664
loss: 0.99106764793396,grad_norm: 0.8541649063893173, iteration: 353665
loss: 1.0219835042953491,grad_norm: 0.8565267891714067, iteration: 353666
loss: 1.0116404294967651,grad_norm: 0.8942015201012936, iteration: 353667
loss: 1.0032836198806763,grad_norm: 0.7072030252092865, iteration: 353668
loss: 1.017187476158142,grad_norm: 0.7961497106673671, iteration: 353669
loss: 1.0661134719848633,grad_norm: 0.8763992328849508, iteration: 353670
loss: 1.009434700012207,grad_norm: 0.9999991918883889, iteration: 353671
loss: 0.9989215731620789,grad_norm: 0.7947151078694449, iteration: 353672
loss: 1.0382423400878906,grad_norm: 0.9243950253917868, iteration: 353673
loss: 1.0021440982818604,grad_norm: 0.9731991466276285, iteration: 353674
loss: 1.0703930854797363,grad_norm: 0.9999994772230316, iteration: 353675
loss: 1.0677074193954468,grad_norm: 0.9999992714999069, iteration: 353676
loss: 0.9923260807991028,grad_norm: 0.7706302153002746, iteration: 353677
loss: 0.9972116947174072,grad_norm: 0.8459101566939359, iteration: 353678
loss: 1.0003247261047363,grad_norm: 0.9200108548344738, iteration: 353679
loss: 1.008752465248108,grad_norm: 0.844923492471805, iteration: 353680
loss: 0.9955223202705383,grad_norm: 0.7922648316761394, iteration: 353681
loss: 0.9869366884231567,grad_norm: 0.7493190076352064, iteration: 353682
loss: 1.0069659948349,grad_norm: 0.8743113208331019, iteration: 353683
loss: 0.9715681076049805,grad_norm: 0.9999993210114936, iteration: 353684
loss: 1.0244442224502563,grad_norm: 0.8695915558560781, iteration: 353685
loss: 1.0294466018676758,grad_norm: 0.8292452533865925, iteration: 353686
loss: 1.000192403793335,grad_norm: 0.6973350035389644, iteration: 353687
loss: 1.0264595746994019,grad_norm: 0.9999994286950843, iteration: 353688
loss: 0.9734215140342712,grad_norm: 0.8012845994472687, iteration: 353689
loss: 1.0227047204971313,grad_norm: 0.889573781353891, iteration: 353690
loss: 1.0645766258239746,grad_norm: 0.778457345957789, iteration: 353691
loss: 1.0537898540496826,grad_norm: 0.8421193600851341, iteration: 353692
loss: 0.9794082045555115,grad_norm: 0.812678805371956, iteration: 353693
loss: 1.000688910484314,grad_norm: 0.8947878427389407, iteration: 353694
loss: 1.0375300645828247,grad_norm: 0.999999797342557, iteration: 353695
loss: 0.9922778010368347,grad_norm: 0.8405598992004396, iteration: 353696
loss: 1.04105544090271,grad_norm: 0.9035435361745963, iteration: 353697
loss: 0.9775596857070923,grad_norm: 0.8669931535455845, iteration: 353698
loss: 1.0076889991760254,grad_norm: 0.8808473189300381, iteration: 353699
loss: 0.993927001953125,grad_norm: 0.9999998359623206, iteration: 353700
loss: 0.9972692728042603,grad_norm: 0.9001742523868407, iteration: 353701
loss: 1.0350689888000488,grad_norm: 0.7959660641281676, iteration: 353702
loss: 0.9759218096733093,grad_norm: 0.8716257999623114, iteration: 353703
loss: 0.9900624752044678,grad_norm: 0.8580639341328546, iteration: 353704
loss: 1.0043822526931763,grad_norm: 0.9999990505828565, iteration: 353705
loss: 0.9913190007209778,grad_norm: 0.9999995514915169, iteration: 353706
loss: 1.0079138278961182,grad_norm: 0.7324367286812736, iteration: 353707
loss: 0.9933426380157471,grad_norm: 0.8299939389367271, iteration: 353708
loss: 1.021730661392212,grad_norm: 0.7890836557763422, iteration: 353709
loss: 0.9666394591331482,grad_norm: 0.7939508977023484, iteration: 353710
loss: 0.9971403479576111,grad_norm: 0.8587456388583335, iteration: 353711
loss: 1.0653388500213623,grad_norm: 0.8465213703155938, iteration: 353712
loss: 0.980373740196228,grad_norm: 0.7202558815928065, iteration: 353713
loss: 1.01712167263031,grad_norm: 0.9999992925705283, iteration: 353714
loss: 0.9915712475776672,grad_norm: 0.8586247567566867, iteration: 353715
loss: 1.007367491722107,grad_norm: 0.9202387185764969, iteration: 353716
loss: 1.0327905416488647,grad_norm: 0.9953993732472036, iteration: 353717
loss: 1.0033884048461914,grad_norm: 0.9999996042618718, iteration: 353718
loss: 0.9974406957626343,grad_norm: 0.8088997230173418, iteration: 353719
loss: 0.9898231029510498,grad_norm: 0.7279740728744557, iteration: 353720
loss: 0.9714600443840027,grad_norm: 0.9999991371815413, iteration: 353721
loss: 1.0496824979782104,grad_norm: 0.999999373340523, iteration: 353722
loss: 0.9692751169204712,grad_norm: 0.7771751638539628, iteration: 353723
loss: 1.0144721269607544,grad_norm: 0.9892085107450761, iteration: 353724
loss: 0.9921520352363586,grad_norm: 0.700468775810852, iteration: 353725
loss: 0.9889546632766724,grad_norm: 0.760800849449868, iteration: 353726
loss: 0.9875279068946838,grad_norm: 0.7643015995037956, iteration: 353727
loss: 1.0160151720046997,grad_norm: 0.6972306012360765, iteration: 353728
loss: 0.9611247181892395,grad_norm: 0.9095211145959193, iteration: 353729
loss: 1.061622142791748,grad_norm: 0.6762545928219931, iteration: 353730
loss: 1.021183729171753,grad_norm: 0.9598107195442377, iteration: 353731
loss: 1.0225229263305664,grad_norm: 0.9709503030636213, iteration: 353732
loss: 1.0004913806915283,grad_norm: 0.8356629884890189, iteration: 353733
loss: 1.0072691440582275,grad_norm: 0.9999990147727823, iteration: 353734
loss: 1.0080997943878174,grad_norm: 0.9796774542955484, iteration: 353735
loss: 1.0132429599761963,grad_norm: 0.9999995263493722, iteration: 353736
loss: 1.0309653282165527,grad_norm: 0.794341177302623, iteration: 353737
loss: 1.040380597114563,grad_norm: 0.8522097300858706, iteration: 353738
loss: 1.0141136646270752,grad_norm: 0.999999490415294, iteration: 353739
loss: 0.9704559445381165,grad_norm: 0.9999993703901547, iteration: 353740
loss: 1.0286318063735962,grad_norm: 0.9014457650660815, iteration: 353741
loss: 1.022592306137085,grad_norm: 0.7359824207275164, iteration: 353742
loss: 1.0181814432144165,grad_norm: 0.7337996523655108, iteration: 353743
loss: 1.018619179725647,grad_norm: 0.8135277391643752, iteration: 353744
loss: 0.9927952289581299,grad_norm: 0.7796310808265825, iteration: 353745
loss: 0.9878143072128296,grad_norm: 0.939867787581244, iteration: 353746
loss: 1.0121210813522339,grad_norm: 0.8513344801493615, iteration: 353747
loss: 0.9777302742004395,grad_norm: 0.9999997109415145, iteration: 353748
loss: 0.9918330907821655,grad_norm: 0.8615436965615866, iteration: 353749
loss: 1.020160436630249,grad_norm: 0.644085345844081, iteration: 353750
loss: 0.9952090382575989,grad_norm: 0.9900953196337919, iteration: 353751
loss: 0.9802204370498657,grad_norm: 0.7835213621133714, iteration: 353752
loss: 1.0016616582870483,grad_norm: 0.7594982263161061, iteration: 353753
loss: 0.9723841547966003,grad_norm: 0.8297247921105035, iteration: 353754
loss: 1.0222995281219482,grad_norm: 0.8494763308772298, iteration: 353755
loss: 1.0067275762557983,grad_norm: 0.8202021508826618, iteration: 353756
loss: 1.0171536207199097,grad_norm: 0.9726629032529639, iteration: 353757
loss: 1.00680410861969,grad_norm: 0.751799012279218, iteration: 353758
loss: 1.0039870738983154,grad_norm: 0.8156122978472733, iteration: 353759
loss: 0.9628767371177673,grad_norm: 0.9030660094374324, iteration: 353760
loss: 1.0109128952026367,grad_norm: 0.844371326609837, iteration: 353761
loss: 0.9986407160758972,grad_norm: 0.7562771616283996, iteration: 353762
loss: 1.0411726236343384,grad_norm: 0.9999995121364087, iteration: 353763
loss: 1.001355528831482,grad_norm: 0.6782471417699198, iteration: 353764
loss: 1.0076162815093994,grad_norm: 0.7956172923156895, iteration: 353765
loss: 0.9825994968414307,grad_norm: 0.7495159885550633, iteration: 353766
loss: 0.9940872192382812,grad_norm: 0.8336901605795894, iteration: 353767
loss: 1.0219708681106567,grad_norm: 0.9999993881844613, iteration: 353768
loss: 0.9913766980171204,grad_norm: 0.8031544265715898, iteration: 353769
loss: 0.9933780431747437,grad_norm: 0.7664581505922635, iteration: 353770
loss: 0.9800541400909424,grad_norm: 0.9999991607586988, iteration: 353771
loss: 0.9786136150360107,grad_norm: 0.7981854357559628, iteration: 353772
loss: 0.981179416179657,grad_norm: 0.8306922111074193, iteration: 353773
loss: 0.942237913608551,grad_norm: 0.7728476172306926, iteration: 353774
loss: 1.0001521110534668,grad_norm: 0.8403479999394304, iteration: 353775
loss: 0.9922128915786743,grad_norm: 0.7642316418692594, iteration: 353776
loss: 0.9955510497093201,grad_norm: 0.8714516341606069, iteration: 353777
loss: 1.0271360874176025,grad_norm: 0.7174015953235446, iteration: 353778
loss: 0.9796081781387329,grad_norm: 0.9173354527608838, iteration: 353779
loss: 1.0551087856292725,grad_norm: 0.927585001903328, iteration: 353780
loss: 1.028714895248413,grad_norm: 0.999999476227342, iteration: 353781
loss: 1.020046353340149,grad_norm: 0.9300817045066233, iteration: 353782
loss: 1.0156532526016235,grad_norm: 0.9322281867711952, iteration: 353783
loss: 0.9771596193313599,grad_norm: 0.9993802246385424, iteration: 353784
loss: 0.9904783368110657,grad_norm: 0.9999990526006932, iteration: 353785
loss: 1.0139492750167847,grad_norm: 0.9999991884621322, iteration: 353786
loss: 1.0601930618286133,grad_norm: 0.9999993979963134, iteration: 353787
loss: 1.023962140083313,grad_norm: 0.9116377275080906, iteration: 353788
loss: 1.0077276229858398,grad_norm: 0.7868844170615672, iteration: 353789
loss: 0.999488115310669,grad_norm: 0.8596827215660183, iteration: 353790
loss: 1.016595721244812,grad_norm: 0.979394768325626, iteration: 353791
loss: 1.0042247772216797,grad_norm: 0.7811877022889907, iteration: 353792
loss: 1.0145771503448486,grad_norm: 0.9999991156385636, iteration: 353793
loss: 0.9791107773780823,grad_norm: 0.8375302240564513, iteration: 353794
loss: 0.9906603097915649,grad_norm: 0.9729058232496635, iteration: 353795
loss: 0.9934223294258118,grad_norm: 0.8160965292973154, iteration: 353796
loss: 0.9894423484802246,grad_norm: 0.8836765857570942, iteration: 353797
loss: 1.0061349868774414,grad_norm: 0.7792929835553564, iteration: 353798
loss: 0.9861992597579956,grad_norm: 0.8429202627726803, iteration: 353799
loss: 0.9933239221572876,grad_norm: 0.9169866815033827, iteration: 353800
loss: 1.0078188180923462,grad_norm: 0.8518242268170768, iteration: 353801
loss: 0.9804056882858276,grad_norm: 0.7369859050232097, iteration: 353802
loss: 0.9576299786567688,grad_norm: 0.7507781738768464, iteration: 353803
loss: 1.0224601030349731,grad_norm: 0.8406949998851591, iteration: 353804
loss: 0.9910247921943665,grad_norm: 0.8346394799443702, iteration: 353805
loss: 0.9914682507514954,grad_norm: 0.919362929027785, iteration: 353806
loss: 1.0008983612060547,grad_norm: 0.9999990837799865, iteration: 353807
loss: 0.9686011672019958,grad_norm: 0.8552527126930998, iteration: 353808
loss: 1.0171966552734375,grad_norm: 0.8080157910915501, iteration: 353809
loss: 1.0160045623779297,grad_norm: 0.6720663843894599, iteration: 353810
loss: 0.9895926117897034,grad_norm: 0.999999311653119, iteration: 353811
loss: 1.0266488790512085,grad_norm: 0.7451715709790435, iteration: 353812
loss: 0.9582758545875549,grad_norm: 0.7593290691942327, iteration: 353813
loss: 1.005447506904602,grad_norm: 0.7842629769708487, iteration: 353814
loss: 0.9339686632156372,grad_norm: 0.9995921434220272, iteration: 353815
loss: 1.0150861740112305,grad_norm: 0.8430540265028227, iteration: 353816
loss: 0.9762718081474304,grad_norm: 0.9999990755993526, iteration: 353817
loss: 1.012757420539856,grad_norm: 0.9999993851746679, iteration: 353818
loss: 1.043906569480896,grad_norm: 0.9756469119838146, iteration: 353819
loss: 1.006934404373169,grad_norm: 0.7590631887210485, iteration: 353820
loss: 1.028666377067566,grad_norm: 0.889448090191233, iteration: 353821
loss: 1.028646469116211,grad_norm: 0.9534161270186957, iteration: 353822
loss: 0.9933387041091919,grad_norm: 0.9999999075420304, iteration: 353823
loss: 0.9991171360015869,grad_norm: 0.9999997373417963, iteration: 353824
loss: 1.0042102336883545,grad_norm: 0.9497558204378823, iteration: 353825
loss: 1.0213279724121094,grad_norm: 0.9999992228265491, iteration: 353826
loss: 0.9818571209907532,grad_norm: 0.8756812983950171, iteration: 353827
loss: 0.9958407282829285,grad_norm: 0.999999678535677, iteration: 353828
loss: 1.0072062015533447,grad_norm: 0.8689703895780607, iteration: 353829
loss: 1.0277900695800781,grad_norm: 0.8666078160175142, iteration: 353830
loss: 0.9781120419502258,grad_norm: 0.8436497473456972, iteration: 353831
loss: 0.9903275966644287,grad_norm: 0.8273662654412565, iteration: 353832
loss: 1.0423965454101562,grad_norm: 0.8792265800381955, iteration: 353833
loss: 1.005836844444275,grad_norm: 0.776214023136339, iteration: 353834
loss: 0.9931361079216003,grad_norm: 0.8532590613526296, iteration: 353835
loss: 0.9859911203384399,grad_norm: 0.8091395548311214, iteration: 353836
loss: 1.0166683197021484,grad_norm: 0.8080777218951665, iteration: 353837
loss: 1.005302906036377,grad_norm: 0.8165169385372771, iteration: 353838
loss: 0.9898720383644104,grad_norm: 0.7700121901515703, iteration: 353839
loss: 1.0050702095031738,grad_norm: 0.7583135482919626, iteration: 353840
loss: 1.0036540031433105,grad_norm: 0.7813229462427721, iteration: 353841
loss: 0.9677833318710327,grad_norm: 0.9411886473793768, iteration: 353842
loss: 0.9628541469573975,grad_norm: 0.7950808172033583, iteration: 353843
loss: 0.9654829502105713,grad_norm: 0.9104180033971075, iteration: 353844
loss: 1.0621730089187622,grad_norm: 0.9999997642726418, iteration: 353845
loss: 1.169419527053833,grad_norm: 0.9999990442526145, iteration: 353846
loss: 0.992944598197937,grad_norm: 0.8765686119155884, iteration: 353847
loss: 1.018721103668213,grad_norm: 0.8460667769779806, iteration: 353848
loss: 0.9960425496101379,grad_norm: 0.7889205829022254, iteration: 353849
loss: 1.101054310798645,grad_norm: 0.9999995265738995, iteration: 353850
loss: 1.0031040906906128,grad_norm: 0.8061420689309547, iteration: 353851
loss: 1.0630254745483398,grad_norm: 0.7798310810825759, iteration: 353852
loss: 0.9624487161636353,grad_norm: 0.9621923654244574, iteration: 353853
loss: 1.003450632095337,grad_norm: 0.7305463625502917, iteration: 353854
loss: 1.0228283405303955,grad_norm: 0.9205367696524421, iteration: 353855
loss: 1.0044935941696167,grad_norm: 0.8783168827484563, iteration: 353856
loss: 0.9953131675720215,grad_norm: 0.8771861385635116, iteration: 353857
loss: 1.0291569232940674,grad_norm: 0.7824566495861427, iteration: 353858
loss: 1.0461137294769287,grad_norm: 0.7196329655947215, iteration: 353859
loss: 1.0176395177841187,grad_norm: 0.7807158469482219, iteration: 353860
loss: 0.9957557320594788,grad_norm: 0.9708122947734176, iteration: 353861
loss: 1.013096570968628,grad_norm: 0.7438461894519292, iteration: 353862
loss: 1.006260633468628,grad_norm: 0.6850582478054232, iteration: 353863
loss: 0.9908875226974487,grad_norm: 0.6620066555076907, iteration: 353864
loss: 1.0021324157714844,grad_norm: 0.9999990894724999, iteration: 353865
loss: 1.041754961013794,grad_norm: 0.9323941146513873, iteration: 353866
loss: 1.0027694702148438,grad_norm: 0.9999995240344807, iteration: 353867
loss: 1.0649412870407104,grad_norm: 0.8059243687863429, iteration: 353868
loss: 0.9908300042152405,grad_norm: 0.8447604159501496, iteration: 353869
loss: 1.0151069164276123,grad_norm: 0.6778103005847876, iteration: 353870
loss: 1.0268714427947998,grad_norm: 0.9302681465533117, iteration: 353871
loss: 1.0379291772842407,grad_norm: 0.8938969218773788, iteration: 353872
loss: 1.0796774625778198,grad_norm: 0.7816340567969894, iteration: 353873
loss: 0.9696748852729797,grad_norm: 0.757015616585765, iteration: 353874
loss: 0.9774785041809082,grad_norm: 0.7885301549943239, iteration: 353875
loss: 0.9913343191146851,grad_norm: 0.8105427889979119, iteration: 353876
loss: 0.9846591949462891,grad_norm: 0.6974289492955805, iteration: 353877
loss: 0.9680854678153992,grad_norm: 0.8525155045886657, iteration: 353878
loss: 0.9845693111419678,grad_norm: 0.8728105941769692, iteration: 353879
loss: 1.0132927894592285,grad_norm: 0.6395829906560219, iteration: 353880
loss: 1.025223970413208,grad_norm: 0.9999990772303997, iteration: 353881
loss: 1.029669165611267,grad_norm: 0.7649950156192826, iteration: 353882
loss: 1.0016343593597412,grad_norm: 0.6863164950121401, iteration: 353883
loss: 1.018117904663086,grad_norm: 0.6923212081478057, iteration: 353884
loss: 0.976421058177948,grad_norm: 0.9426461290100326, iteration: 353885
loss: 0.9690174460411072,grad_norm: 0.8124870096363235, iteration: 353886
loss: 1.004734754562378,grad_norm: 0.8521806205242952, iteration: 353887
loss: 1.038024663925171,grad_norm: 0.7580186074362338, iteration: 353888
loss: 1.032965898513794,grad_norm: 0.8515674808762141, iteration: 353889
loss: 0.9552730321884155,grad_norm: 0.788032023050353, iteration: 353890
loss: 0.9884135723114014,grad_norm: 0.9999991909825646, iteration: 353891
loss: 0.9928131103515625,grad_norm: 0.9999992690736842, iteration: 353892
loss: 1.0224268436431885,grad_norm: 0.7574406917258945, iteration: 353893
loss: 1.0090619325637817,grad_norm: 0.898074570998534, iteration: 353894
loss: 1.0399788618087769,grad_norm: 0.9999992221598113, iteration: 353895
loss: 1.0180824995040894,grad_norm: 0.7529455740818325, iteration: 353896
loss: 1.0331343412399292,grad_norm: 0.9999992986381703, iteration: 353897
loss: 1.0052932500839233,grad_norm: 0.7941129033396624, iteration: 353898
loss: 1.0188488960266113,grad_norm: 0.9999991695554452, iteration: 353899
loss: 1.0032011270523071,grad_norm: 0.9168407426882855, iteration: 353900
loss: 1.00682532787323,grad_norm: 0.9552827686634049, iteration: 353901
loss: 0.9948838353157043,grad_norm: 0.8766847265173778, iteration: 353902
loss: 0.9757570624351501,grad_norm: 0.9206080845729187, iteration: 353903
loss: 0.9758602976799011,grad_norm: 0.7402882288349693, iteration: 353904
loss: 0.9900997877120972,grad_norm: 0.8682594634067349, iteration: 353905
loss: 1.0163493156433105,grad_norm: 0.999999430054903, iteration: 353906
loss: 1.0396336317062378,grad_norm: 0.8414904929030057, iteration: 353907
loss: 1.0280022621154785,grad_norm: 0.9356531171666289, iteration: 353908
loss: 0.9686167240142822,grad_norm: 0.7655112171523236, iteration: 353909
loss: 1.0036245584487915,grad_norm: 0.797788547276899, iteration: 353910
loss: 1.0430632829666138,grad_norm: 0.9387461126857504, iteration: 353911
loss: 0.9733540415763855,grad_norm: 0.875043481376248, iteration: 353912
loss: 0.9960095286369324,grad_norm: 0.9853333126143711, iteration: 353913
loss: 1.0808744430541992,grad_norm: 0.969701518414339, iteration: 353914
loss: 1.0175251960754395,grad_norm: 0.7924721954651777, iteration: 353915
loss: 1.0096184015274048,grad_norm: 0.8826599969179143, iteration: 353916
loss: 1.1732501983642578,grad_norm: 0.9938087943034859, iteration: 353917
loss: 0.9905240535736084,grad_norm: 0.7537767643286293, iteration: 353918
loss: 1.00790274143219,grad_norm: 0.6516528513833209, iteration: 353919
loss: 1.0209317207336426,grad_norm: 0.9114192237174676, iteration: 353920
loss: 1.020916223526001,grad_norm: 0.8809396558398455, iteration: 353921
loss: 1.0026891231536865,grad_norm: 0.7233402825651843, iteration: 353922
loss: 0.9942585229873657,grad_norm: 0.8001536036155361, iteration: 353923
loss: 1.015539526939392,grad_norm: 0.8014166842723695, iteration: 353924
loss: 0.991642951965332,grad_norm: 0.7919538886502092, iteration: 353925
loss: 0.9806289076805115,grad_norm: 0.6674521404943261, iteration: 353926
loss: 0.9767597317695618,grad_norm: 0.9999996078340306, iteration: 353927
loss: 1.0143026113510132,grad_norm: 0.9215884984588961, iteration: 353928
loss: 1.0153073072433472,grad_norm: 0.7614766835761418, iteration: 353929
loss: 1.0383721590042114,grad_norm: 0.7693019628946528, iteration: 353930
loss: 0.9932848215103149,grad_norm: 0.9999992782436039, iteration: 353931
loss: 1.006883144378662,grad_norm: 0.838971480466503, iteration: 353932
loss: 1.0061086416244507,grad_norm: 0.6543685287111655, iteration: 353933
loss: 1.0310016870498657,grad_norm: 0.9999998402315954, iteration: 353934
loss: 0.9963030815124512,grad_norm: 0.7917626308300342, iteration: 353935
loss: 1.011395812034607,grad_norm: 0.7616818776539727, iteration: 353936
loss: 1.0047919750213623,grad_norm: 0.8671603968488645, iteration: 353937
loss: 0.9759907126426697,grad_norm: 0.6851369408720216, iteration: 353938
loss: 1.0062217712402344,grad_norm: 0.7780538802418224, iteration: 353939
loss: 1.0288375616073608,grad_norm: 0.9383620031861496, iteration: 353940
loss: 1.0609768629074097,grad_norm: 0.99999993343308, iteration: 353941
loss: 1.0146408081054688,grad_norm: 0.948180649222694, iteration: 353942
loss: 1.0119134187698364,grad_norm: 0.941628637734242, iteration: 353943
loss: 1.01958167552948,grad_norm: 0.9743895459788174, iteration: 353944
loss: 1.0016196966171265,grad_norm: 0.773766791086607, iteration: 353945
loss: 1.015946626663208,grad_norm: 0.8200920576517783, iteration: 353946
loss: 1.0129817724227905,grad_norm: 0.8291002354561975, iteration: 353947
loss: 1.0110636949539185,grad_norm: 0.752838622538982, iteration: 353948
loss: 1.0119545459747314,grad_norm: 0.7219855844949826, iteration: 353949
loss: 1.006047010421753,grad_norm: 0.7511810718709131, iteration: 353950
loss: 1.010633945465088,grad_norm: 0.7482623389761246, iteration: 353951
loss: 0.9961358904838562,grad_norm: 0.8179882262795702, iteration: 353952
loss: 1.0047224760055542,grad_norm: 0.9654162209373657, iteration: 353953
loss: 0.9735067486763,grad_norm: 0.7654068527235948, iteration: 353954
loss: 0.9977759718894958,grad_norm: 0.8456418813612887, iteration: 353955
loss: 0.9929721355438232,grad_norm: 0.8534915532703438, iteration: 353956
loss: 0.9872297048568726,grad_norm: 0.8722390597525622, iteration: 353957
loss: 1.0097826719284058,grad_norm: 0.7598072983326946, iteration: 353958
loss: 1.0188987255096436,grad_norm: 0.9999991126544644, iteration: 353959
loss: 1.0693084001541138,grad_norm: 0.9999990997641625, iteration: 353960
loss: 0.9854595065116882,grad_norm: 0.8298515280977363, iteration: 353961
loss: 1.0268586874008179,grad_norm: 0.8607468623389399, iteration: 353962
loss: 0.9998824596405029,grad_norm: 0.9999990892420023, iteration: 353963
loss: 0.9800732731819153,grad_norm: 0.692039027740934, iteration: 353964
loss: 1.03389573097229,grad_norm: 0.7529492740747177, iteration: 353965
loss: 0.994014322757721,grad_norm: 0.8479660822487257, iteration: 353966
loss: 1.0289028882980347,grad_norm: 0.7364333959170081, iteration: 353967
loss: 0.9674398899078369,grad_norm: 0.7508936153565464, iteration: 353968
loss: 1.0008289813995361,grad_norm: 0.8394190296390084, iteration: 353969
loss: 0.9936562180519104,grad_norm: 0.8217045485284945, iteration: 353970
loss: 0.9800947308540344,grad_norm: 0.8767231007925756, iteration: 353971
loss: 0.9861395955085754,grad_norm: 0.8024146722184186, iteration: 353972
loss: 0.9719807505607605,grad_norm: 0.8598274153727024, iteration: 353973
loss: 0.9903841614723206,grad_norm: 0.7196038553034874, iteration: 353974
loss: 1.0326935052871704,grad_norm: 0.9999991789040756, iteration: 353975
loss: 1.0206142663955688,grad_norm: 0.7995653650261539, iteration: 353976
loss: 0.9957612156867981,grad_norm: 0.7409917171360865, iteration: 353977
loss: 0.9985942244529724,grad_norm: 0.8671543262454463, iteration: 353978
loss: 0.989778459072113,grad_norm: 0.9999992462209986, iteration: 353979
loss: 1.0217341184616089,grad_norm: 0.7828582988792607, iteration: 353980
loss: 0.9951260685920715,grad_norm: 0.7129328101521423, iteration: 353981
loss: 1.0505064725875854,grad_norm: 0.9999995973163559, iteration: 353982
loss: 1.0149060487747192,grad_norm: 0.8881229088604581, iteration: 353983
loss: 0.976206362247467,grad_norm: 0.7246141046821758, iteration: 353984
loss: 1.105163335800171,grad_norm: 0.9999995273232691, iteration: 353985
loss: 1.0166728496551514,grad_norm: 0.9999998466324571, iteration: 353986
loss: 1.0623401403427124,grad_norm: 0.999999717537853, iteration: 353987
loss: 1.000808835029602,grad_norm: 0.9405876829687658, iteration: 353988
loss: 0.9670589566230774,grad_norm: 0.8655500130705708, iteration: 353989
loss: 0.9784095883369446,grad_norm: 0.834321665877982, iteration: 353990
loss: 1.0259168148040771,grad_norm: 0.9999992652811944, iteration: 353991
loss: 1.1446164846420288,grad_norm: 0.9726030472856038, iteration: 353992
loss: 0.9838376641273499,grad_norm: 0.9999991903872638, iteration: 353993
loss: 1.0206478834152222,grad_norm: 0.9999991723300501, iteration: 353994
loss: 1.0134464502334595,grad_norm: 0.7034470677536299, iteration: 353995
loss: 1.018627643585205,grad_norm: 0.9453245995690701, iteration: 353996
loss: 1.0086381435394287,grad_norm: 0.9999994021049221, iteration: 353997
loss: 1.0064798593521118,grad_norm: 0.866878723985183, iteration: 353998
loss: 0.9946267008781433,grad_norm: 0.805970321506394, iteration: 353999
loss: 0.9934546947479248,grad_norm: 0.7972707828001991, iteration: 354000
loss: 1.026176929473877,grad_norm: 0.799685481744309, iteration: 354001
loss: 1.0339189767837524,grad_norm: 0.8471433242244757, iteration: 354002
loss: 0.9915264248847961,grad_norm: 0.8256657687446017, iteration: 354003
loss: 1.0590007305145264,grad_norm: 0.9999997868499529, iteration: 354004
loss: 1.0076388120651245,grad_norm: 0.7761996069712538, iteration: 354005
loss: 0.9713512659072876,grad_norm: 0.8953186373338218, iteration: 354006
loss: 1.001814842224121,grad_norm: 0.9999993465677552, iteration: 354007
loss: 1.0398660898208618,grad_norm: 0.9075778108221051, iteration: 354008
loss: 1.0171834230422974,grad_norm: 0.8646082219326403, iteration: 354009
loss: 1.0482697486877441,grad_norm: 0.8641188761734486, iteration: 354010
loss: 0.978204607963562,grad_norm: 0.8672885837930567, iteration: 354011
loss: 1.010876178741455,grad_norm: 0.6876304696105618, iteration: 354012
loss: 1.022548794746399,grad_norm: 0.9999996406436101, iteration: 354013
loss: 1.0596692562103271,grad_norm: 0.9999990281775863, iteration: 354014
loss: 1.0092440843582153,grad_norm: 0.9240175831403529, iteration: 354015
loss: 1.0011250972747803,grad_norm: 0.8711687579000046, iteration: 354016
loss: 0.9746189117431641,grad_norm: 0.8033230630757907, iteration: 354017
loss: 1.068770170211792,grad_norm: 0.9999990751014257, iteration: 354018
loss: 1.0001217126846313,grad_norm: 0.8977350482954697, iteration: 354019
loss: 0.9965547323226929,grad_norm: 0.8937892410140793, iteration: 354020
loss: 1.008725881576538,grad_norm: 0.8566635549307668, iteration: 354021
loss: 1.0074509382247925,grad_norm: 0.8652996071239767, iteration: 354022
loss: 1.0379414558410645,grad_norm: 0.9999990286865718, iteration: 354023
loss: 1.0010602474212646,grad_norm: 0.6767261054501988, iteration: 354024
loss: 1.0007095336914062,grad_norm: 0.890683551396734, iteration: 354025
loss: 0.9692923426628113,grad_norm: 0.9999993550679158, iteration: 354026
loss: 0.9518213272094727,grad_norm: 0.6812588842566234, iteration: 354027
loss: 1.026592493057251,grad_norm: 0.8896939629039131, iteration: 354028
loss: 0.9700710773468018,grad_norm: 0.7045617729096544, iteration: 354029
loss: 1.0033835172653198,grad_norm: 0.9999993959321484, iteration: 354030
loss: 0.9949386715888977,grad_norm: 0.999999509662692, iteration: 354031
loss: 1.0168033838272095,grad_norm: 0.7691869411271317, iteration: 354032
loss: 1.0266259908676147,grad_norm: 0.8644770567361023, iteration: 354033
loss: 1.0439943075180054,grad_norm: 0.999999843804514, iteration: 354034
loss: 0.9852991104125977,grad_norm: 0.9566323079470731, iteration: 354035
loss: 1.0048987865447998,grad_norm: 0.8838402892505128, iteration: 354036
loss: 1.0081794261932373,grad_norm: 0.7525758398137116, iteration: 354037
loss: 1.0239828824996948,grad_norm: 0.7770034402160272, iteration: 354038
loss: 1.0190415382385254,grad_norm: 0.9999992293291198, iteration: 354039
loss: 0.9757885336875916,grad_norm: 0.8782957483643967, iteration: 354040
loss: 0.999485969543457,grad_norm: 0.8155771897624956, iteration: 354041
loss: 0.9986572265625,grad_norm: 0.7639824920877587, iteration: 354042
loss: 1.0068000555038452,grad_norm: 0.8570660123717343, iteration: 354043
loss: 1.0103249549865723,grad_norm: 0.8529341641480156, iteration: 354044
loss: 1.0107804536819458,grad_norm: 0.7663090334539997, iteration: 354045
loss: 1.0090934038162231,grad_norm: 0.7955895403232524, iteration: 354046
loss: 1.0537554025650024,grad_norm: 0.999999993041183, iteration: 354047
loss: 1.0024036169052124,grad_norm: 0.7584111806865766, iteration: 354048
loss: 0.9871866703033447,grad_norm: 0.8468344780095594, iteration: 354049
loss: 1.0193225145339966,grad_norm: 0.8176976613854634, iteration: 354050
loss: 1.067507266998291,grad_norm: 0.9999998636402659, iteration: 354051
loss: 0.9707069993019104,grad_norm: 0.7489390703776607, iteration: 354052
loss: 0.9778943657875061,grad_norm: 0.945091977127842, iteration: 354053
loss: 1.0278384685516357,grad_norm: 0.9999995065890646, iteration: 354054
loss: 0.9622644782066345,grad_norm: 0.7275352588598276, iteration: 354055
loss: 1.0147087574005127,grad_norm: 0.8889991131148589, iteration: 354056
loss: 0.9990841150283813,grad_norm: 0.6916810522843995, iteration: 354057
loss: 1.1120011806488037,grad_norm: 0.9999997962725556, iteration: 354058
loss: 0.9920417666435242,grad_norm: 0.766777738696215, iteration: 354059
loss: 0.9861459136009216,grad_norm: 0.9999991570538287, iteration: 354060
loss: 0.9689192175865173,grad_norm: 0.7596753425594654, iteration: 354061
loss: 1.0717304944992065,grad_norm: 0.9895387864865987, iteration: 354062
loss: 0.9486904144287109,grad_norm: 0.9999990602539315, iteration: 354063
loss: 1.0374386310577393,grad_norm: 1.0000000368906947, iteration: 354064
loss: 1.0022966861724854,grad_norm: 0.954871814542737, iteration: 354065
loss: 1.0080020427703857,grad_norm: 0.7459809694470594, iteration: 354066
loss: 1.1067471504211426,grad_norm: 0.9550315446948469, iteration: 354067
loss: 0.9631125330924988,grad_norm: 0.810845938141395, iteration: 354068
loss: 0.9934564828872681,grad_norm: 0.90171750027146, iteration: 354069
loss: 1.0161525011062622,grad_norm: 0.8662133662783106, iteration: 354070
loss: 0.9873077869415283,grad_norm: 0.7442490270900292, iteration: 354071
loss: 0.9939873814582825,grad_norm: 0.7521954664121957, iteration: 354072
loss: 1.0056763887405396,grad_norm: 0.7432618402510538, iteration: 354073
loss: 1.008580207824707,grad_norm: 0.9999991972574931, iteration: 354074
loss: 1.0456491708755493,grad_norm: 0.8206707673967201, iteration: 354075
loss: 1.0193512439727783,grad_norm: 0.7453274696648992, iteration: 354076
loss: 1.0025742053985596,grad_norm: 0.7764373778328046, iteration: 354077
loss: 1.0201289653778076,grad_norm: 0.6563815776477697, iteration: 354078
loss: 0.9533810615539551,grad_norm: 0.795515312867779, iteration: 354079
loss: 0.9625015258789062,grad_norm: 0.750235106246723, iteration: 354080
loss: 1.0307694673538208,grad_norm: 0.8595348332484486, iteration: 354081
loss: 1.0057920217514038,grad_norm: 0.7427667968068932, iteration: 354082
loss: 0.9889694452285767,grad_norm: 0.7403500984219348, iteration: 354083
loss: 1.1488735675811768,grad_norm: 0.9999992393287369, iteration: 354084
loss: 1.0111006498336792,grad_norm: 0.9106177699570555, iteration: 354085
loss: 1.086607575416565,grad_norm: 0.9999999214242402, iteration: 354086
loss: 1.0852924585342407,grad_norm: 0.7998704186301019, iteration: 354087
loss: 0.9861679077148438,grad_norm: 0.7845044045916251, iteration: 354088
loss: 1.0136399269104004,grad_norm: 0.7692386208904616, iteration: 354089
loss: 1.011129379272461,grad_norm: 0.9665668833502068, iteration: 354090
loss: 0.9729688763618469,grad_norm: 0.8443081720782568, iteration: 354091
loss: 1.1353135108947754,grad_norm: 0.9999991913695482, iteration: 354092
loss: 0.9934240579605103,grad_norm: 0.9999991928298201, iteration: 354093
loss: 1.0446652173995972,grad_norm: 0.803797285503482, iteration: 354094
loss: 1.2190357446670532,grad_norm: 0.9999997838216734, iteration: 354095
loss: 1.0768252611160278,grad_norm: 0.9999990814621763, iteration: 354096
loss: 1.0558748245239258,grad_norm: 0.9999990349283282, iteration: 354097
loss: 1.1344265937805176,grad_norm: 0.8883538347500637, iteration: 354098
loss: 1.1399561166763306,grad_norm: 0.9999991543740002, iteration: 354099
loss: 1.0787073373794556,grad_norm: 0.9426401457302169, iteration: 354100
loss: 1.007347822189331,grad_norm: 0.9560093034079841, iteration: 354101
loss: 0.976557731628418,grad_norm: 0.9999998003986494, iteration: 354102
loss: 1.0489003658294678,grad_norm: 0.9999994462959183, iteration: 354103
loss: 1.0786257982254028,grad_norm: 0.9999996150728137, iteration: 354104
loss: 0.9744546413421631,grad_norm: 0.7949896098834259, iteration: 354105
loss: 1.0306040048599243,grad_norm: 0.9999992871887491, iteration: 354106
loss: 1.0288163423538208,grad_norm: 0.9999989294723987, iteration: 354107
loss: 1.051710605621338,grad_norm: 0.8584010855834069, iteration: 354108
loss: 1.0376546382904053,grad_norm: 0.8634400382697989, iteration: 354109
loss: 1.0724865198135376,grad_norm: 0.7684186050499876, iteration: 354110
loss: 0.9723698496818542,grad_norm: 0.7702791624034488, iteration: 354111
loss: 1.0461230278015137,grad_norm: 0.8071245375421855, iteration: 354112
loss: 1.0350134372711182,grad_norm: 0.8291236035086803, iteration: 354113
loss: 1.0094091892242432,grad_norm: 0.9999996026805741, iteration: 354114
loss: 0.9975059032440186,grad_norm: 0.7787027916218189, iteration: 354115
loss: 1.024933099746704,grad_norm: 0.9999995458449668, iteration: 354116
loss: 1.045906662940979,grad_norm: 0.9999996066795298, iteration: 354117
loss: 1.0037198066711426,grad_norm: 0.9999990802344381, iteration: 354118
loss: 1.0621381998062134,grad_norm: 0.7844866613731873, iteration: 354119
loss: 0.9963597059249878,grad_norm: 0.7876652734770153, iteration: 354120
loss: 1.1144163608551025,grad_norm: 0.9638089982123453, iteration: 354121
loss: 1.0405181646347046,grad_norm: 0.9999990793648533, iteration: 354122
loss: 1.0079327821731567,grad_norm: 0.9999992080002935, iteration: 354123
loss: 0.9790446758270264,grad_norm: 0.9010937269659427, iteration: 354124
loss: 1.0169531106948853,grad_norm: 0.9999990761459018, iteration: 354125
loss: 1.0511482954025269,grad_norm: 0.9999998346042166, iteration: 354126
loss: 0.9968180656433105,grad_norm: 0.7977175969163325, iteration: 354127
loss: 1.0270190238952637,grad_norm: 0.7795305724782696, iteration: 354128
loss: 1.0012633800506592,grad_norm: 0.9999992458318214, iteration: 354129
loss: 1.0437170267105103,grad_norm: 0.8696898553084489, iteration: 354130
loss: 0.9770116806030273,grad_norm: 0.7678011400939453, iteration: 354131
loss: 1.0353899002075195,grad_norm: 0.9256463120623376, iteration: 354132
loss: 0.9683433175086975,grad_norm: 0.7488603731719045, iteration: 354133
loss: 1.0166517496109009,grad_norm: 0.8688999599032894, iteration: 354134
loss: 0.9786674976348877,grad_norm: 0.9028306693285942, iteration: 354135
loss: 1.0307508707046509,grad_norm: 0.9018635108520132, iteration: 354136
loss: 1.0090725421905518,grad_norm: 0.9999992467910979, iteration: 354137
loss: 0.9837712645530701,grad_norm: 0.7708830860981027, iteration: 354138
loss: 1.1061004400253296,grad_norm: 0.8825078069634171, iteration: 354139
loss: 1.0244182348251343,grad_norm: 0.9999992345659867, iteration: 354140
loss: 0.9855964183807373,grad_norm: 0.8049535750220048, iteration: 354141
loss: 1.1304030418395996,grad_norm: 0.9999991428371369, iteration: 354142
loss: 0.9793825149536133,grad_norm: 0.8710679745228058, iteration: 354143
loss: 1.012539267539978,grad_norm: 0.9429170640371644, iteration: 354144
loss: 1.0654925107955933,grad_norm: 0.9352128805228299, iteration: 354145
loss: 1.0252156257629395,grad_norm: 0.8683400389604445, iteration: 354146
loss: 0.9762850403785706,grad_norm: 0.896602548927858, iteration: 354147
loss: 0.9688957929611206,grad_norm: 0.8496280241801138, iteration: 354148
loss: 1.0426727533340454,grad_norm: 0.9999996420353953, iteration: 354149
loss: 1.0187842845916748,grad_norm: 0.897531823079614, iteration: 354150
loss: 1.0211938619613647,grad_norm: 0.8318808754181772, iteration: 354151
loss: 1.0553417205810547,grad_norm: 0.9999992330332731, iteration: 354152
loss: 1.0422685146331787,grad_norm: 0.845675972283637, iteration: 354153
loss: 1.0084906816482544,grad_norm: 0.7607605325523434, iteration: 354154
loss: 1.0330851078033447,grad_norm: 0.9999991872538565, iteration: 354155
loss: 0.9945907592773438,grad_norm: 0.8242396482597846, iteration: 354156
loss: 0.986900806427002,grad_norm: 0.7476708001324224, iteration: 354157
loss: 0.9981259703636169,grad_norm: 0.8565716293803532, iteration: 354158
loss: 1.0246957540512085,grad_norm: 0.9181971549785294, iteration: 354159
loss: 0.9639124274253845,grad_norm: 0.9999990815206815, iteration: 354160
loss: 0.9859797954559326,grad_norm: 0.8262690340697727, iteration: 354161
loss: 0.995596706867218,grad_norm: 0.8238111127310301, iteration: 354162
loss: 1.028554081916809,grad_norm: 0.7758255795321183, iteration: 354163
loss: 1.0037389993667603,grad_norm: 0.9999991901196632, iteration: 354164
loss: 1.0628875494003296,grad_norm: 0.776306541784909, iteration: 354165
loss: 0.9421595335006714,grad_norm: 0.8285460714017892, iteration: 354166
loss: 1.0185097455978394,grad_norm: 1.000000016607289, iteration: 354167
loss: 1.0521916151046753,grad_norm: 0.9999992937331259, iteration: 354168
loss: 1.0003548860549927,grad_norm: 0.8848429136731569, iteration: 354169
loss: 1.0336291790008545,grad_norm: 0.6925511780669009, iteration: 354170
loss: 1.075992226600647,grad_norm: 0.9999996736975824, iteration: 354171
loss: 1.0208429098129272,grad_norm: 0.9999993694612196, iteration: 354172
loss: 0.996299147605896,grad_norm: 0.9999998847680525, iteration: 354173
loss: 1.014686107635498,grad_norm: 0.9999993410980643, iteration: 354174
loss: 1.0328742265701294,grad_norm: 0.9015210922970031, iteration: 354175
loss: 1.0063937902450562,grad_norm: 0.8239760938425479, iteration: 354176
loss: 0.9763596653938293,grad_norm: 0.8912784544307343, iteration: 354177
loss: 1.0430080890655518,grad_norm: 0.9965307735884105, iteration: 354178
loss: 1.0219213962554932,grad_norm: 0.8688695026723519, iteration: 354179
loss: 0.9902188181877136,grad_norm: 0.8816627724868288, iteration: 354180
loss: 1.0756518840789795,grad_norm: 0.9266568893644056, iteration: 354181
loss: 1.1272085905075073,grad_norm: 0.8483783303587498, iteration: 354182
loss: 1.039039134979248,grad_norm: 0.9999993332397276, iteration: 354183
loss: 1.0416772365570068,grad_norm: 0.8704268355125787, iteration: 354184
loss: 1.0005443096160889,grad_norm: 0.9999993009293662, iteration: 354185
loss: 1.056972861289978,grad_norm: 0.9999990738976193, iteration: 354186
loss: 1.0403414964675903,grad_norm: 0.9999992634986188, iteration: 354187
loss: 1.074966549873352,grad_norm: 0.9877215169286097, iteration: 354188
loss: 1.005111813545227,grad_norm: 0.999999690690251, iteration: 354189
loss: 1.0082277059555054,grad_norm: 0.9999991998816368, iteration: 354190
loss: 1.073355793952942,grad_norm: 0.9999998528960737, iteration: 354191
loss: 1.004743218421936,grad_norm: 0.9999990227234156, iteration: 354192
loss: 1.0074478387832642,grad_norm: 0.7761872127501446, iteration: 354193
loss: 1.0865161418914795,grad_norm: 0.9999999821532793, iteration: 354194
loss: 1.0776541233062744,grad_norm: 0.999999125928558, iteration: 354195
loss: 1.0066193342208862,grad_norm: 0.9999992494341218, iteration: 354196
loss: 0.9962474703788757,grad_norm: 0.8152943186994884, iteration: 354197
loss: 1.1135303974151611,grad_norm: 0.9999991495469528, iteration: 354198
loss: 1.0466015338897705,grad_norm: 0.9577769945428595, iteration: 354199
loss: 1.034087061882019,grad_norm: 0.9999991348463467, iteration: 354200
loss: 1.0903370380401611,grad_norm: 1.0000000681002803, iteration: 354201
loss: 1.0092086791992188,grad_norm: 0.9999992976374011, iteration: 354202
loss: 0.9990971088409424,grad_norm: 0.7678122655436824, iteration: 354203
loss: 0.977362871170044,grad_norm: 0.9935797849805126, iteration: 354204
loss: 1.1181212663650513,grad_norm: 0.9999990203631769, iteration: 354205
loss: 1.003571629524231,grad_norm: 0.9999989880306023, iteration: 354206
loss: 1.0185613632202148,grad_norm: 0.6633077490257475, iteration: 354207
loss: 0.9997082352638245,grad_norm: 0.9344258191309196, iteration: 354208
loss: 1.0809284448623657,grad_norm: 0.9999991127537826, iteration: 354209
loss: 1.0133131742477417,grad_norm: 0.8065993507094151, iteration: 354210
loss: 1.0002281665802002,grad_norm: 0.9999993954086218, iteration: 354211
loss: 0.9894073605537415,grad_norm: 0.848004335195655, iteration: 354212
loss: 1.0198804140090942,grad_norm: 0.9999992074283027, iteration: 354213
loss: 1.0360552072525024,grad_norm: 0.8458402581296731, iteration: 354214
loss: 1.048382043838501,grad_norm: 0.9999997413793982, iteration: 354215
loss: 1.0206228494644165,grad_norm: 0.999999283675502, iteration: 354216
loss: 1.010514259338379,grad_norm: 0.8179340782418015, iteration: 354217
loss: 0.9834535121917725,grad_norm: 0.8468620687499705, iteration: 354218
loss: 1.0266010761260986,grad_norm: 0.9655040544280133, iteration: 354219
loss: 1.0523905754089355,grad_norm: 0.8240480890040087, iteration: 354220
loss: 1.080544114112854,grad_norm: 0.9999995989414774, iteration: 354221
loss: 1.2203341722488403,grad_norm: 0.9999999576084514, iteration: 354222
loss: 0.9932027459144592,grad_norm: 0.8105707959685782, iteration: 354223
loss: 0.9966203570365906,grad_norm: 0.7986883153592768, iteration: 354224
loss: 1.135905146598816,grad_norm: 0.9999991462720362, iteration: 354225
loss: 1.0673879384994507,grad_norm: 0.999999464724393, iteration: 354226
loss: 0.9996131658554077,grad_norm: 0.9357482893414144, iteration: 354227
loss: 1.1823502779006958,grad_norm: 0.9999990529571935, iteration: 354228
loss: 1.0430326461791992,grad_norm: 0.9999989622691342, iteration: 354229
loss: 0.9790791273117065,grad_norm: 0.8474254484734339, iteration: 354230
loss: 1.0764671564102173,grad_norm: 0.9999998318696268, iteration: 354231
loss: 1.0139154195785522,grad_norm: 0.9343326798310964, iteration: 354232
loss: 1.1105504035949707,grad_norm: 0.9999991869777632, iteration: 354233
loss: 1.165042757987976,grad_norm: 0.9999996383925361, iteration: 354234
loss: 1.2079942226409912,grad_norm: 0.9999998453511542, iteration: 354235
loss: 1.0011919736862183,grad_norm: 0.8140786512623641, iteration: 354236
loss: 0.9891558289527893,grad_norm: 0.9187141557529528, iteration: 354237
loss: 1.0080413818359375,grad_norm: 0.8696627252278181, iteration: 354238
loss: 1.035625696182251,grad_norm: 0.7519695158156393, iteration: 354239
loss: 1.0410927534103394,grad_norm: 0.9999990581185646, iteration: 354240
loss: 1.009382963180542,grad_norm: 0.8252748146148864, iteration: 354241
loss: 1.0868353843688965,grad_norm: 0.9999993484787459, iteration: 354242
loss: 0.9827396869659424,grad_norm: 0.9999991229981242, iteration: 354243
loss: 1.1042284965515137,grad_norm: 0.9999999910691773, iteration: 354244
loss: 0.9966842532157898,grad_norm: 0.8824568030424234, iteration: 354245
loss: 1.0017383098602295,grad_norm: 0.7542836117079729, iteration: 354246
loss: 0.9841722846031189,grad_norm: 0.9616205599437176, iteration: 354247
loss: 1.0053240060806274,grad_norm: 0.8449500874175866, iteration: 354248
loss: 1.0241914987564087,grad_norm: 0.9377210333440673, iteration: 354249
loss: 1.0212342739105225,grad_norm: 0.8281630910556397, iteration: 354250
loss: 1.0266785621643066,grad_norm: 0.8997704369346462, iteration: 354251
loss: 1.004508376121521,grad_norm: 0.8101341138107372, iteration: 354252
loss: 1.0216991901397705,grad_norm: 0.9999992117484175, iteration: 354253
loss: 1.019126057624817,grad_norm: 0.9873362193070363, iteration: 354254
loss: 1.0264052152633667,grad_norm: 0.9624054318681333, iteration: 354255
loss: 0.9881982803344727,grad_norm: 0.9999991953282893, iteration: 354256
loss: 1.0407522916793823,grad_norm: 0.9999993276473049, iteration: 354257
loss: 1.0009154081344604,grad_norm: 0.8474993961423476, iteration: 354258
loss: 1.0367761850357056,grad_norm: 0.9220422047172943, iteration: 354259
loss: 1.1123915910720825,grad_norm: 0.9999991157916903, iteration: 354260
loss: 1.0265326499938965,grad_norm: 0.9999998552931324, iteration: 354261
loss: 1.0537503957748413,grad_norm: 0.9999998778769876, iteration: 354262
loss: 1.029855728149414,grad_norm: 0.8190090973857886, iteration: 354263
loss: 1.0245516300201416,grad_norm: 0.9999990813353035, iteration: 354264
loss: 0.9768191576004028,grad_norm: 0.8534284590654938, iteration: 354265
loss: 1.0016982555389404,grad_norm: 0.836045578395739, iteration: 354266
loss: 1.0500801801681519,grad_norm: 0.8800281978752663, iteration: 354267
loss: 1.0092202425003052,grad_norm: 0.9262208684835178, iteration: 354268
loss: 0.9398842453956604,grad_norm: 0.747910404533994, iteration: 354269
loss: 0.9901589751243591,grad_norm: 0.8249282347006026, iteration: 354270
loss: 1.0129038095474243,grad_norm: 0.9999992934444487, iteration: 354271
loss: 0.9737200140953064,grad_norm: 0.7997989511587892, iteration: 354272
loss: 0.9624194502830505,grad_norm: 0.7574493987443801, iteration: 354273
loss: 1.0366308689117432,grad_norm: 0.7251335412871538, iteration: 354274
loss: 0.990172266960144,grad_norm: 0.92330208066398, iteration: 354275
loss: 1.026445984840393,grad_norm: 0.999999795072286, iteration: 354276
loss: 1.0375651121139526,grad_norm: 0.9999996522528268, iteration: 354277
loss: 1.0190021991729736,grad_norm: 0.9718361247851153, iteration: 354278
loss: 0.9934669733047485,grad_norm: 0.979916489335286, iteration: 354279
loss: 1.2302443981170654,grad_norm: 0.9999997199961571, iteration: 354280
loss: 1.0035490989685059,grad_norm: 0.9999992376881278, iteration: 354281
loss: 0.9846969246864319,grad_norm: 0.8005953403564711, iteration: 354282
loss: 1.0050286054611206,grad_norm: 0.8097194486511627, iteration: 354283
loss: 0.9924098253250122,grad_norm: 0.8509170168416613, iteration: 354284
loss: 1.0392051935195923,grad_norm: 0.7410105905820463, iteration: 354285
loss: 0.995788037776947,grad_norm: 0.7962452399387866, iteration: 354286
loss: 0.9782987833023071,grad_norm: 0.8262717040306437, iteration: 354287
loss: 0.9658892750740051,grad_norm: 0.7944987698479792, iteration: 354288
loss: 1.0243226289749146,grad_norm: 0.7688149696372433, iteration: 354289
loss: 1.0204648971557617,grad_norm: 0.8455012755543818, iteration: 354290
loss: 0.9812855124473572,grad_norm: 0.9546939828316062, iteration: 354291
loss: 0.9836300015449524,grad_norm: 0.9999992456415039, iteration: 354292
loss: 1.0236858129501343,grad_norm: 0.7671322153267462, iteration: 354293
loss: 1.0147101879119873,grad_norm: 0.9999994905378689, iteration: 354294
loss: 1.0070830583572388,grad_norm: 0.9533766818157665, iteration: 354295
loss: 0.9993596076965332,grad_norm: 0.8305428804589677, iteration: 354296
loss: 0.9916781187057495,grad_norm: 0.9195110755622726, iteration: 354297
loss: 1.0128403902053833,grad_norm: 0.7729894865316419, iteration: 354298
loss: 1.0243216753005981,grad_norm: 0.657823931330216, iteration: 354299
loss: 1.120215892791748,grad_norm: 0.9999993922768488, iteration: 354300
loss: 1.0038158893585205,grad_norm: 0.9999993619170215, iteration: 354301
loss: 1.0084028244018555,grad_norm: 0.858624119864205, iteration: 354302
loss: 1.0042979717254639,grad_norm: 0.999999328383962, iteration: 354303
loss: 1.0748602151870728,grad_norm: 0.9602730150293295, iteration: 354304
loss: 1.0039674043655396,grad_norm: 0.9999999048216005, iteration: 354305
loss: 1.0177327394485474,grad_norm: 0.8739657012500992, iteration: 354306
loss: 1.0570992231369019,grad_norm: 0.9999996679104803, iteration: 354307
loss: 1.0076236724853516,grad_norm: 0.9999999947086685, iteration: 354308
loss: 1.0468401908874512,grad_norm: 0.9999990409009468, iteration: 354309
loss: 0.9866242408752441,grad_norm: 0.7992874020472633, iteration: 354310
loss: 0.9758312702178955,grad_norm: 0.7982491871367682, iteration: 354311
loss: 0.9829108715057373,grad_norm: 0.812322196502998, iteration: 354312
loss: 0.97307288646698,grad_norm: 0.9282623359449672, iteration: 354313
loss: 0.9770411252975464,grad_norm: 0.77416242885553, iteration: 354314
loss: 0.9802395701408386,grad_norm: 0.8639449146033729, iteration: 354315
loss: 1.0139498710632324,grad_norm: 0.9449262676144149, iteration: 354316
loss: 1.047211766242981,grad_norm: 0.9891639737720558, iteration: 354317
loss: 1.005289077758789,grad_norm: 0.7984192715362243, iteration: 354318
loss: 1.0358208417892456,grad_norm: 0.8321201055855548, iteration: 354319
loss: 0.9909604787826538,grad_norm: 0.9069816503704025, iteration: 354320
loss: 0.9848247170448303,grad_norm: 0.8425279684961207, iteration: 354321
loss: 0.9899812340736389,grad_norm: 0.6921950866134464, iteration: 354322
loss: 0.9764084219932556,grad_norm: 0.6891706232495325, iteration: 354323
loss: 0.9969783425331116,grad_norm: 0.8680657025031088, iteration: 354324
loss: 1.0047993659973145,grad_norm: 0.7987662557423568, iteration: 354325
loss: 1.0024491548538208,grad_norm: 0.8494641728123753, iteration: 354326
loss: 1.012499451637268,grad_norm: 0.8627567282074563, iteration: 354327
loss: 1.002517819404602,grad_norm: 0.9999990589645432, iteration: 354328
loss: 1.0077753067016602,grad_norm: 0.9379016920371024, iteration: 354329
loss: 0.9916847348213196,grad_norm: 0.8052777928156125, iteration: 354330
loss: 1.0278576612472534,grad_norm: 0.9759308433820956, iteration: 354331
loss: 0.9877274632453918,grad_norm: 0.7333753479024316, iteration: 354332
loss: 1.0523803234100342,grad_norm: 0.9999990616082164, iteration: 354333
loss: 1.0046665668487549,grad_norm: 0.9913116469463791, iteration: 354334
loss: 0.9651955962181091,grad_norm: 0.9266552347192942, iteration: 354335
loss: 1.0070115327835083,grad_norm: 0.788702401797287, iteration: 354336
loss: 0.9865498542785645,grad_norm: 0.9001415248920791, iteration: 354337
loss: 1.0071523189544678,grad_norm: 0.9999993762163613, iteration: 354338
loss: 1.0294499397277832,grad_norm: 0.7989111136396465, iteration: 354339
loss: 0.9867016673088074,grad_norm: 0.7731184255663325, iteration: 354340
loss: 1.014750361442566,grad_norm: 0.7585775742088109, iteration: 354341
loss: 1.0090150833129883,grad_norm: 0.8554739688306562, iteration: 354342
loss: 1.0298713445663452,grad_norm: 0.9013265930617491, iteration: 354343
loss: 0.9811449646949768,grad_norm: 0.7673633947009312, iteration: 354344
loss: 1.0002082586288452,grad_norm: 0.8383800668448563, iteration: 354345
loss: 0.9941959977149963,grad_norm: 0.6707805950722195, iteration: 354346
loss: 1.0075021982192993,grad_norm: 0.664451116910459, iteration: 354347
loss: 1.0132654905319214,grad_norm: 0.7757612024577416, iteration: 354348
loss: 1.0252747535705566,grad_norm: 0.7791007028256347, iteration: 354349
loss: 1.005722165107727,grad_norm: 0.9236782323608541, iteration: 354350
loss: 0.9667945504188538,grad_norm: 0.780526471287121, iteration: 354351
loss: 1.0046137571334839,grad_norm: 0.9296355782208057, iteration: 354352
loss: 0.9867481589317322,grad_norm: 0.791174615941412, iteration: 354353
loss: 1.004271388053894,grad_norm: 0.8881764439763944, iteration: 354354
loss: 0.9781724810600281,grad_norm: 0.8119103687965276, iteration: 354355
loss: 1.0281529426574707,grad_norm: 0.9908415059878524, iteration: 354356
loss: 1.0067121982574463,grad_norm: 0.8481517478670372, iteration: 354357
loss: 0.9774482250213623,grad_norm: 0.9367303481751903, iteration: 354358
loss: 0.9962871670722961,grad_norm: 0.9565401049001815, iteration: 354359
loss: 1.001336693763733,grad_norm: 0.811103252432639, iteration: 354360
loss: 1.0274018049240112,grad_norm: 0.866446735966397, iteration: 354361
loss: 0.9904646277427673,grad_norm: 0.9999990756960648, iteration: 354362
loss: 1.041551947593689,grad_norm: 0.9999991416464828, iteration: 354363
loss: 1.0330140590667725,grad_norm: 0.9999991845777614, iteration: 354364
loss: 1.012226939201355,grad_norm: 0.9065062964941519, iteration: 354365
loss: 1.0276660919189453,grad_norm: 0.9999997634579528, iteration: 354366
loss: 1.0407004356384277,grad_norm: 0.7359411876392137, iteration: 354367
loss: 1.0092663764953613,grad_norm: 0.9457196295078719, iteration: 354368
loss: 1.0072728395462036,grad_norm: 0.8003336596110475, iteration: 354369
loss: 0.9974727630615234,grad_norm: 0.7414681548687168, iteration: 354370
loss: 0.991305947303772,grad_norm: 0.8728391429185387, iteration: 354371
loss: 0.9953194856643677,grad_norm: 0.8174486921909646, iteration: 354372
loss: 1.0032732486724854,grad_norm: 0.7784031984111832, iteration: 354373
loss: 1.0620089769363403,grad_norm: 0.9311452567402477, iteration: 354374
loss: 1.0308716297149658,grad_norm: 0.9499080765181231, iteration: 354375
loss: 0.9801260828971863,grad_norm: 0.7985308243586492, iteration: 354376
loss: 0.9843521118164062,grad_norm: 0.8449608973538116, iteration: 354377
loss: 0.9562532305717468,grad_norm: 0.8114821452570316, iteration: 354378
loss: 0.9586702585220337,grad_norm: 0.8609328960009544, iteration: 354379
loss: 1.0204133987426758,grad_norm: 0.7689686611000754, iteration: 354380
loss: 1.0111925601959229,grad_norm: 0.722918467970846, iteration: 354381
loss: 1.0997118949890137,grad_norm: 0.8254314217958046, iteration: 354382
loss: 0.9954447746276855,grad_norm: 0.8235685336138348, iteration: 354383
loss: 1.0368213653564453,grad_norm: 0.9999997244379448, iteration: 354384
loss: 1.0140365362167358,grad_norm: 0.7535375534310234, iteration: 354385
loss: 1.016351342201233,grad_norm: 0.66026891734955, iteration: 354386
loss: 0.9660277366638184,grad_norm: 0.973407501067153, iteration: 354387
loss: 0.9981533288955688,grad_norm: 0.8369287343960942, iteration: 354388
loss: 1.0471290349960327,grad_norm: 0.9999990425275662, iteration: 354389
loss: 1.0185437202453613,grad_norm: 0.8257362260588075, iteration: 354390
loss: 1.0434203147888184,grad_norm: 0.7407841975425417, iteration: 354391
loss: 0.9834790825843811,grad_norm: 0.6564551515527991, iteration: 354392
loss: 1.0245366096496582,grad_norm: 0.8858615112963527, iteration: 354393
loss: 0.9452996850013733,grad_norm: 0.9485060011333607, iteration: 354394
loss: 0.9953816533088684,grad_norm: 0.8743836950007982, iteration: 354395
loss: 0.9967805743217468,grad_norm: 0.7996302496656641, iteration: 354396
loss: 1.0202685594558716,grad_norm: 0.9999993655560554, iteration: 354397
loss: 0.9910620450973511,grad_norm: 0.8759097976944551, iteration: 354398
loss: 0.9812939167022705,grad_norm: 0.8644710849855007, iteration: 354399
loss: 1.026849627494812,grad_norm: 0.999999655023657, iteration: 354400
loss: 0.9672513604164124,grad_norm: 0.8184564698831257, iteration: 354401
loss: 1.0265504121780396,grad_norm: 0.866411314052769, iteration: 354402
loss: 1.0284504890441895,grad_norm: 0.9052380088041095, iteration: 354403
loss: 1.0038021802902222,grad_norm: 0.9999989660203571, iteration: 354404
loss: 0.9977874755859375,grad_norm: 0.7918312043689156, iteration: 354405
loss: 0.9707320332527161,grad_norm: 0.9214832336637129, iteration: 354406
loss: 1.051980972290039,grad_norm: 0.857677581660188, iteration: 354407
loss: 1.0051978826522827,grad_norm: 0.8570015175568271, iteration: 354408
loss: 0.9942766427993774,grad_norm: 0.7797694540890214, iteration: 354409
loss: 1.0858649015426636,grad_norm: 0.9999997534521481, iteration: 354410
loss: 0.9971262812614441,grad_norm: 0.8118068082618648, iteration: 354411
loss: 1.0357683897018433,grad_norm: 0.9999997872280831, iteration: 354412
loss: 1.0230169296264648,grad_norm: 0.868438161769099, iteration: 354413
loss: 0.998589813709259,grad_norm: 0.8796073685525113, iteration: 354414
loss: 1.004345417022705,grad_norm: 0.7105383094511504, iteration: 354415
loss: 0.9714668393135071,grad_norm: 0.8558297680440019, iteration: 354416
loss: 0.9750441908836365,grad_norm: 0.8936827262910368, iteration: 354417
loss: 1.0525270700454712,grad_norm: 0.8288306481235266, iteration: 354418
loss: 0.9787947535514832,grad_norm: 0.9022755886146315, iteration: 354419
loss: 1.011359691619873,grad_norm: 0.7476834511601107, iteration: 354420
loss: 1.0218991041183472,grad_norm: 0.7803689222901514, iteration: 354421
loss: 0.9948181509971619,grad_norm: 0.8931113010587045, iteration: 354422
loss: 1.0250053405761719,grad_norm: 0.9001541893023561, iteration: 354423
loss: 1.0090093612670898,grad_norm: 0.9999990323197525, iteration: 354424
loss: 0.9693001508712769,grad_norm: 0.8603328683249627, iteration: 354425
loss: 1.0079652070999146,grad_norm: 0.8931783821629053, iteration: 354426
loss: 0.9595561623573303,grad_norm: 0.7609437417101593, iteration: 354427
loss: 0.9711148142814636,grad_norm: 0.7163170623258056, iteration: 354428
loss: 0.9780417084693909,grad_norm: 0.7820820376675222, iteration: 354429
loss: 1.0142908096313477,grad_norm: 0.8817894601530384, iteration: 354430
loss: 1.0245506763458252,grad_norm: 0.7823356372141684, iteration: 354431
loss: 1.000505805015564,grad_norm: 0.8774889764761167, iteration: 354432
loss: 1.0120562314987183,grad_norm: 0.7929329373185398, iteration: 354433
loss: 1.007093906402588,grad_norm: 0.6852937240427719, iteration: 354434
loss: 1.0050081014633179,grad_norm: 0.999999276427976, iteration: 354435
loss: 0.9933710098266602,grad_norm: 0.7704259397416672, iteration: 354436
loss: 0.9702807664871216,grad_norm: 0.9008009742477776, iteration: 354437
loss: 0.9395747184753418,grad_norm: 0.8170348186205145, iteration: 354438
loss: 1.0215396881103516,grad_norm: 0.9999993136301928, iteration: 354439
loss: 1.0467742681503296,grad_norm: 0.9636872708225177, iteration: 354440
loss: 1.019559621810913,grad_norm: 0.8932101928002834, iteration: 354441
loss: 1.0727497339248657,grad_norm: 0.8427220539462866, iteration: 354442
loss: 0.9878683686256409,grad_norm: 0.7517725020846124, iteration: 354443
loss: 1.0107617378234863,grad_norm: 0.9999990745129008, iteration: 354444
loss: 1.0335817337036133,grad_norm: 0.9468242968131525, iteration: 354445
loss: 1.0101563930511475,grad_norm: 0.8403563191382163, iteration: 354446
loss: 0.9830231070518494,grad_norm: 0.7488927855696158, iteration: 354447
loss: 1.0016367435455322,grad_norm: 0.7545698594013621, iteration: 354448
loss: 1.0244390964508057,grad_norm: 0.7771440127192406, iteration: 354449
loss: 1.0030710697174072,grad_norm: 0.7485205195625966, iteration: 354450
loss: 1.000112533569336,grad_norm: 0.9366816914009722, iteration: 354451
loss: 1.020353078842163,grad_norm: 0.8405606860954814, iteration: 354452
loss: 1.005919337272644,grad_norm: 0.9999990611590435, iteration: 354453
loss: 0.9742744565010071,grad_norm: 0.8970505000033666, iteration: 354454
loss: 1.0033329725265503,grad_norm: 0.9230027322013333, iteration: 354455
loss: 0.9954384565353394,grad_norm: 0.9088167026439288, iteration: 354456
loss: 0.9833439588546753,grad_norm: 0.827001221186438, iteration: 354457
loss: 1.0058633089065552,grad_norm: 0.9999998993822072, iteration: 354458
loss: 0.9913578629493713,grad_norm: 0.8478732983012807, iteration: 354459
loss: 1.0006002187728882,grad_norm: 0.8648096235956478, iteration: 354460
loss: 0.9931951761245728,grad_norm: 0.9038189653268234, iteration: 354461
loss: 0.9998132586479187,grad_norm: 0.8832275354380954, iteration: 354462
loss: 1.0128004550933838,grad_norm: 0.9526456914837681, iteration: 354463
loss: 0.9813081622123718,grad_norm: 0.716115319847902, iteration: 354464
loss: 1.043347954750061,grad_norm: 0.7522584986351432, iteration: 354465
loss: 1.019856333732605,grad_norm: 0.7466256980540412, iteration: 354466
loss: 1.001872181892395,grad_norm: 0.7198628864641355, iteration: 354467
loss: 0.9762926697731018,grad_norm: 0.9375386344874799, iteration: 354468
loss: 1.0035496950149536,grad_norm: 0.8028898240760672, iteration: 354469
loss: 1.029687762260437,grad_norm: 0.7717600220824192, iteration: 354470
loss: 1.0722222328186035,grad_norm: 0.9999990506597464, iteration: 354471
loss: 1.0420289039611816,grad_norm: 0.8905620973832095, iteration: 354472
loss: 0.9720039367675781,grad_norm: 0.7760621194923274, iteration: 354473
loss: 0.975799560546875,grad_norm: 0.9043880714030585, iteration: 354474
loss: 0.9998733997344971,grad_norm: 0.8260567708155396, iteration: 354475
loss: 0.9970080256462097,grad_norm: 0.8480241750642367, iteration: 354476
loss: 1.0387650728225708,grad_norm: 0.8412140363727, iteration: 354477
loss: 1.010627031326294,grad_norm: 0.7603277754772854, iteration: 354478
loss: 1.0527148246765137,grad_norm: 0.9999994768286322, iteration: 354479
loss: 1.0082610845565796,grad_norm: 0.9283619888929364, iteration: 354480
loss: 0.9901068210601807,grad_norm: 0.7241417977916429, iteration: 354481
loss: 1.0017831325531006,grad_norm: 0.7338424057489343, iteration: 354482
loss: 1.023690938949585,grad_norm: 0.9999992861638509, iteration: 354483
loss: 1.0112618207931519,grad_norm: 0.7435014428161335, iteration: 354484
loss: 0.9861540198326111,grad_norm: 0.7006000599421365, iteration: 354485
loss: 1.001035451889038,grad_norm: 0.8202581186310728, iteration: 354486
loss: 1.020365595817566,grad_norm: 0.8620809811417505, iteration: 354487
loss: 0.9865066409111023,grad_norm: 0.8224128542391498, iteration: 354488
loss: 1.0181355476379395,grad_norm: 0.8523781895699962, iteration: 354489
loss: 0.9940479397773743,grad_norm: 0.8331797830201412, iteration: 354490
loss: 0.9651467800140381,grad_norm: 0.8509142411012725, iteration: 354491
loss: 1.0136789083480835,grad_norm: 0.8668654735035692, iteration: 354492
loss: 0.9734842777252197,grad_norm: 0.8182751957947304, iteration: 354493
loss: 0.9633302688598633,grad_norm: 0.8372386042961936, iteration: 354494
loss: 0.9809826612472534,grad_norm: 0.7693979247739193, iteration: 354495
loss: 1.0311020612716675,grad_norm: 0.8379016956601582, iteration: 354496
loss: 0.9920185208320618,grad_norm: 0.9795239574313613, iteration: 354497
loss: 0.9574445486068726,grad_norm: 0.9999993637134703, iteration: 354498
loss: 1.0884873867034912,grad_norm: 0.9999993495048445, iteration: 354499
loss: 1.0059551000595093,grad_norm: 0.7762276268521493, iteration: 354500
loss: 0.9983210563659668,grad_norm: 0.8998316803849382, iteration: 354501
loss: 0.976723849773407,grad_norm: 0.9909326825639956, iteration: 354502
loss: 0.9917776584625244,grad_norm: 0.9329382185297265, iteration: 354503
loss: 0.9468262791633606,grad_norm: 0.7244785361433265, iteration: 354504
loss: 0.995280385017395,grad_norm: 0.864515550702384, iteration: 354505
loss: 1.103645920753479,grad_norm: 0.9999998603195761, iteration: 354506
loss: 1.0243756771087646,grad_norm: 0.770939215086935, iteration: 354507
loss: 1.0131391286849976,grad_norm: 0.999999976288768, iteration: 354508
loss: 1.047484040260315,grad_norm: 0.9999991758456028, iteration: 354509
loss: 1.017448902130127,grad_norm: 0.7973662956586729, iteration: 354510
loss: 1.0075275897979736,grad_norm: 0.8938664554130556, iteration: 354511
loss: 1.0587913990020752,grad_norm: 0.8486220700203635, iteration: 354512
loss: 1.0068104267120361,grad_norm: 0.9999999554059493, iteration: 354513
loss: 1.0357917547225952,grad_norm: 0.8956373551415261, iteration: 354514
loss: 0.9847531914710999,grad_norm: 0.999999972296975, iteration: 354515
loss: 1.0051134824752808,grad_norm: 0.688771401189156, iteration: 354516
loss: 1.026638388633728,grad_norm: 0.8071262095977911, iteration: 354517
loss: 1.081404209136963,grad_norm: 0.9999999409008451, iteration: 354518
loss: 0.9655930995941162,grad_norm: 0.7312481178518585, iteration: 354519
loss: 0.9675765633583069,grad_norm: 0.7545865412474412, iteration: 354520
loss: 1.0110301971435547,grad_norm: 0.9999994738300579, iteration: 354521
loss: 1.0422017574310303,grad_norm: 0.9999991743745561, iteration: 354522
loss: 0.9784151911735535,grad_norm: 0.9023472730110405, iteration: 354523
loss: 1.0235778093338013,grad_norm: 0.8878113815020943, iteration: 354524
loss: 1.0230430364608765,grad_norm: 0.8185780200124828, iteration: 354525
loss: 1.0897520780563354,grad_norm: 0.999999316092491, iteration: 354526
loss: 0.9822580814361572,grad_norm: 0.9999999189232684, iteration: 354527
loss: 0.9890796542167664,grad_norm: 0.8076768415232428, iteration: 354528
loss: 1.0196826457977295,grad_norm: 0.9999996385987311, iteration: 354529
loss: 0.9592620134353638,grad_norm: 0.7178108593641626, iteration: 354530
loss: 0.993486225605011,grad_norm: 0.7775305300373929, iteration: 354531
loss: 0.9790602326393127,grad_norm: 0.7992018620541045, iteration: 354532
loss: 1.0059581995010376,grad_norm: 0.8279391991321459, iteration: 354533
loss: 1.0107622146606445,grad_norm: 0.7483409037923658, iteration: 354534
loss: 1.0092735290527344,grad_norm: 0.7647034569910816, iteration: 354535
loss: 1.0134758949279785,grad_norm: 0.9253706105476136, iteration: 354536
loss: 1.0829367637634277,grad_norm: 0.9900492730029745, iteration: 354537
loss: 0.9669471979141235,grad_norm: 0.8285027392514779, iteration: 354538
loss: 1.0219098329544067,grad_norm: 0.6651721281092413, iteration: 354539
loss: 0.9739382863044739,grad_norm: 0.8385372321002679, iteration: 354540
loss: 1.0312503576278687,grad_norm: 0.7002310911422384, iteration: 354541
loss: 1.0794041156768799,grad_norm: 0.9999998339209225, iteration: 354542
loss: 0.9882367253303528,grad_norm: 0.9074155566022185, iteration: 354543
loss: 1.0484012365341187,grad_norm: 0.8434487131576277, iteration: 354544
loss: 1.0162596702575684,grad_norm: 0.6888055035819819, iteration: 354545
loss: 0.9780657291412354,grad_norm: 0.8661342633684547, iteration: 354546
loss: 0.9993274807929993,grad_norm: 0.929427116284645, iteration: 354547
loss: 1.035935401916504,grad_norm: 0.9008949670828935, iteration: 354548
loss: 0.9571121335029602,grad_norm: 0.9066435304723084, iteration: 354549
loss: 1.0246937274932861,grad_norm: 0.9999997117544399, iteration: 354550
loss: 1.0573558807373047,grad_norm: 0.9999995869789315, iteration: 354551
loss: 1.0291922092437744,grad_norm: 0.8971757406132106, iteration: 354552
loss: 0.9898208379745483,grad_norm: 0.8397188230326625, iteration: 354553
loss: 1.0085538625717163,grad_norm: 0.6977031705205232, iteration: 354554
loss: 1.043565034866333,grad_norm: 0.7729199131191109, iteration: 354555
loss: 1.0202468633651733,grad_norm: 0.9999992028897775, iteration: 354556
loss: 1.0378873348236084,grad_norm: 0.9999996534278498, iteration: 354557
loss: 1.006972312927246,grad_norm: 0.880744306524024, iteration: 354558
loss: 0.9859526753425598,grad_norm: 0.7381443642012482, iteration: 354559
loss: 1.0739684104919434,grad_norm: 0.6177510768562264, iteration: 354560
loss: 0.9921082854270935,grad_norm: 0.7734890551154091, iteration: 354561
loss: 0.9740040898323059,grad_norm: 0.8169344618927861, iteration: 354562
loss: 1.0026216506958008,grad_norm: 0.9999991733549686, iteration: 354563
loss: 1.0060789585113525,grad_norm: 0.8603848637938866, iteration: 354564
loss: 0.9515502452850342,grad_norm: 0.826351931306076, iteration: 354565
loss: 0.9924867749214172,grad_norm: 0.9036072679999657, iteration: 354566
loss: 1.0185970067977905,grad_norm: 0.8006049825497498, iteration: 354567
loss: 1.0044363737106323,grad_norm: 0.8509847062120801, iteration: 354568
loss: 1.0350871086120605,grad_norm: 0.9428763261097369, iteration: 354569
loss: 1.020493507385254,grad_norm: 0.6881108383987046, iteration: 354570
loss: 1.0594937801361084,grad_norm: 1.0000000103389777, iteration: 354571
loss: 0.9574044942855835,grad_norm: 0.6862646564767811, iteration: 354572
loss: 1.0932679176330566,grad_norm: 0.9999994706382755, iteration: 354573
loss: 1.0146981477737427,grad_norm: 0.9597151653259774, iteration: 354574
loss: 0.9975637197494507,grad_norm: 0.9305229094978203, iteration: 354575
loss: 1.0585883855819702,grad_norm: 0.9256751373279343, iteration: 354576
loss: 1.0389972925186157,grad_norm: 0.8916804318174709, iteration: 354577
loss: 0.9884423613548279,grad_norm: 0.7146920108287261, iteration: 354578
loss: 1.0659414529800415,grad_norm: 0.8566983887889991, iteration: 354579
loss: 1.0184730291366577,grad_norm: 0.9143421265484026, iteration: 354580
loss: 1.0044156312942505,grad_norm: 0.8525582254846009, iteration: 354581
loss: 1.034520149230957,grad_norm: 0.8852577954372338, iteration: 354582
loss: 1.0075993537902832,grad_norm: 0.9986378542252217, iteration: 354583
loss: 1.0111275911331177,grad_norm: 0.7752026187760612, iteration: 354584
loss: 1.0006749629974365,grad_norm: 0.9160043152019952, iteration: 354585
loss: 0.998293936252594,grad_norm: 0.8559538570338823, iteration: 354586
loss: 0.9914237856864929,grad_norm: 0.8983435995072341, iteration: 354587
loss: 1.0815153121948242,grad_norm: 0.9999996815486603, iteration: 354588
loss: 0.9636470675468445,grad_norm: 0.7400532513954502, iteration: 354589
loss: 0.993295431137085,grad_norm: 0.8484821598909401, iteration: 354590
loss: 0.9632928967475891,grad_norm: 0.8662627433961408, iteration: 354591
loss: 1.0097944736480713,grad_norm: 0.8877698704799217, iteration: 354592
loss: 0.9625696539878845,grad_norm: 0.9217752550276798, iteration: 354593
loss: 1.0206795930862427,grad_norm: 0.7682806697074328, iteration: 354594
loss: 0.9902182221412659,grad_norm: 0.781510349026662, iteration: 354595
loss: 1.0250394344329834,grad_norm: 0.753856700546186, iteration: 354596
loss: 1.0170623064041138,grad_norm: 0.9196497353879431, iteration: 354597
loss: 1.0096434354782104,grad_norm: 0.8462953584833605, iteration: 354598
loss: 0.9844027757644653,grad_norm: 0.6925982273223348, iteration: 354599
loss: 1.005263328552246,grad_norm: 0.6195806720766907, iteration: 354600
loss: 1.0132067203521729,grad_norm: 0.9286768990990132, iteration: 354601
loss: 1.0049527883529663,grad_norm: 0.775748550458465, iteration: 354602
loss: 0.9811157584190369,grad_norm: 0.7283383671607896, iteration: 354603
loss: 0.9903879165649414,grad_norm: 0.9052801272173925, iteration: 354604
loss: 0.9618471264839172,grad_norm: 0.8690819137394237, iteration: 354605
loss: 1.0231633186340332,grad_norm: 0.7391334328636198, iteration: 354606
loss: 1.0162155628204346,grad_norm: 0.8439400292424133, iteration: 354607
loss: 0.9823393821716309,grad_norm: 0.8292322846542025, iteration: 354608
loss: 1.012858510017395,grad_norm: 0.713235756528451, iteration: 354609
loss: 0.9863253235816956,grad_norm: 0.7030688134664959, iteration: 354610
loss: 1.1013416051864624,grad_norm: 0.9999998211413544, iteration: 354611
loss: 1.0308468341827393,grad_norm: 0.5965439477020231, iteration: 354612
loss: 0.9912775754928589,grad_norm: 0.8053366885751175, iteration: 354613
loss: 1.0339381694793701,grad_norm: 0.7434744515909378, iteration: 354614
loss: 1.072049856185913,grad_norm: 0.9999991223107383, iteration: 354615
loss: 1.074121356010437,grad_norm: 0.9999991561729114, iteration: 354616
loss: 0.988469123840332,grad_norm: 0.774383506648153, iteration: 354617
loss: 0.9678476452827454,grad_norm: 0.7964122433618085, iteration: 354618
loss: 1.0368160009384155,grad_norm: 0.8804482307076728, iteration: 354619
loss: 0.9804850220680237,grad_norm: 0.9999991277285721, iteration: 354620
loss: 1.0022732019424438,grad_norm: 0.6844136456414511, iteration: 354621
loss: 1.0105541944503784,grad_norm: 0.7360747470643048, iteration: 354622
loss: 1.00324285030365,grad_norm: 0.791446765494147, iteration: 354623
loss: 1.0465583801269531,grad_norm: 0.9454447131838952, iteration: 354624
loss: 1.0072883367538452,grad_norm: 0.8782049366699568, iteration: 354625
loss: 1.0004398822784424,grad_norm: 0.8400897097584056, iteration: 354626
loss: 1.057262897491455,grad_norm: 0.9999993694113477, iteration: 354627
loss: 1.0058090686798096,grad_norm: 0.9999991411576262, iteration: 354628
loss: 0.9639507532119751,grad_norm: 0.8984726780312439, iteration: 354629
loss: 1.020249843597412,grad_norm: 0.9003313354649641, iteration: 354630
loss: 1.0172958374023438,grad_norm: 0.8931102343772874, iteration: 354631
loss: 1.014135718345642,grad_norm: 0.9999993082506106, iteration: 354632
loss: 0.9963712692260742,grad_norm: 0.9999993284460021, iteration: 354633
loss: 1.00542414188385,grad_norm: 0.9623520700963027, iteration: 354634
loss: 1.008499026298523,grad_norm: 0.9044303827766882, iteration: 354635
loss: 0.9999633431434631,grad_norm: 0.8262595513839172, iteration: 354636
loss: 0.9810807108879089,grad_norm: 0.681158624562164, iteration: 354637
loss: 0.9839037656784058,grad_norm: 0.9813039008413298, iteration: 354638
loss: 1.0551708936691284,grad_norm: 0.9999992785646659, iteration: 354639
loss: 1.0120054483413696,grad_norm: 0.8804610012396452, iteration: 354640
loss: 1.0018932819366455,grad_norm: 0.778223071500594, iteration: 354641
loss: 0.9832706451416016,grad_norm: 0.8543079902410584, iteration: 354642
loss: 1.0585331916809082,grad_norm: 0.9999993249148283, iteration: 354643
loss: 1.0288667678833008,grad_norm: 0.7800017334522322, iteration: 354644
loss: 0.9900590777397156,grad_norm: 0.9999991053313251, iteration: 354645
loss: 0.977745771408081,grad_norm: 0.8486906160586991, iteration: 354646
loss: 0.9498945474624634,grad_norm: 0.8121058068997662, iteration: 354647
loss: 0.9529557824134827,grad_norm: 0.7725344966608031, iteration: 354648
loss: 1.014133334159851,grad_norm: 0.9207952293499898, iteration: 354649
loss: 1.0278990268707275,grad_norm: 0.7464923147797643, iteration: 354650
loss: 1.01292884349823,grad_norm: 0.7548036672625857, iteration: 354651
loss: 1.0617650747299194,grad_norm: 0.8866310650667444, iteration: 354652
loss: 0.9884234666824341,grad_norm: 0.7592522140621514, iteration: 354653
loss: 1.0038775205612183,grad_norm: 0.8717820768589142, iteration: 354654
loss: 0.9780793786048889,grad_norm: 0.8709544893711398, iteration: 354655
loss: 0.9925505518913269,grad_norm: 0.7493939160084901, iteration: 354656
loss: 1.052303433418274,grad_norm: 0.999999115173597, iteration: 354657
loss: 1.0230507850646973,grad_norm: 0.8248407954248465, iteration: 354658
loss: 1.0221917629241943,grad_norm: 0.999999416524785, iteration: 354659
loss: 1.0121631622314453,grad_norm: 0.8395771063297758, iteration: 354660
loss: 0.9783002734184265,grad_norm: 0.8312906561140777, iteration: 354661
loss: 1.0124330520629883,grad_norm: 0.731319780604116, iteration: 354662
loss: 0.9646596312522888,grad_norm: 0.9474167433782105, iteration: 354663
loss: 1.006074070930481,grad_norm: 0.7852163627773611, iteration: 354664
loss: 1.0034915208816528,grad_norm: 0.999999078698303, iteration: 354665
loss: 0.967054545879364,grad_norm: 0.9930118622196316, iteration: 354666
loss: 0.9682970643043518,grad_norm: 0.958067446925751, iteration: 354667
loss: 0.9753298163414001,grad_norm: 0.8006405164745172, iteration: 354668
loss: 0.988175094127655,grad_norm: 0.9999997943838892, iteration: 354669
loss: 0.9489298462867737,grad_norm: 0.9999991458026773, iteration: 354670
loss: 0.9989925622940063,grad_norm: 0.7732013532020247, iteration: 354671
loss: 0.9985703229904175,grad_norm: 0.880450923859242, iteration: 354672
loss: 0.9879935383796692,grad_norm: 0.8964188483278972, iteration: 354673
loss: 0.991110622882843,grad_norm: 0.7942394398352031, iteration: 354674
loss: 0.997101366519928,grad_norm: 0.8443041205825416, iteration: 354675
loss: 0.998950719833374,grad_norm: 0.7341824033577165, iteration: 354676
loss: 1.0161000490188599,grad_norm: 0.7977000269071378, iteration: 354677
loss: 0.9742369651794434,grad_norm: 0.9740155640278426, iteration: 354678
loss: 1.143833041191101,grad_norm: 0.8180636882757519, iteration: 354679
loss: 0.9864438772201538,grad_norm: 0.7538494634739478, iteration: 354680
loss: 0.9807956218719482,grad_norm: 0.9244206421169988, iteration: 354681
loss: 0.9887989163398743,grad_norm: 0.9217872921054704, iteration: 354682
loss: 0.9579885601997375,grad_norm: 0.800827506432851, iteration: 354683
loss: 1.0038762092590332,grad_norm: 0.6209876061852885, iteration: 354684
loss: 0.9941975474357605,grad_norm: 0.8248205122893321, iteration: 354685
loss: 0.9939647316932678,grad_norm: 0.6731972151966193, iteration: 354686
loss: 1.0076372623443604,grad_norm: 0.8800443510763281, iteration: 354687
loss: 0.9798892736434937,grad_norm: 0.7872540373976408, iteration: 354688
loss: 1.0057886838912964,grad_norm: 0.9683625236135431, iteration: 354689
loss: 1.0042310953140259,grad_norm: 0.8010355231659194, iteration: 354690
loss: 0.9731388688087463,grad_norm: 0.7440965176032271, iteration: 354691
loss: 0.9861308932304382,grad_norm: 0.7619004191013842, iteration: 354692
loss: 1.0036512613296509,grad_norm: 0.7967101174470802, iteration: 354693
loss: 1.0017657279968262,grad_norm: 0.7598105937059343, iteration: 354694
loss: 0.9902030229568481,grad_norm: 0.8219945002420007, iteration: 354695
loss: 0.9960587024688721,grad_norm: 0.8210913974561824, iteration: 354696
loss: 0.9647645354270935,grad_norm: 0.8523664866183442, iteration: 354697
loss: 1.013713002204895,grad_norm: 0.8633180685571015, iteration: 354698
loss: 0.9965423345565796,grad_norm: 0.7360567875526423, iteration: 354699
loss: 0.9926276206970215,grad_norm: 0.8930415938182448, iteration: 354700
loss: 0.9547606110572815,grad_norm: 0.9516572005602969, iteration: 354701
loss: 1.0136245489120483,grad_norm: 0.9999990510813868, iteration: 354702
loss: 1.1618789434432983,grad_norm: 0.9999993280123152, iteration: 354703
loss: 1.0274724960327148,grad_norm: 0.7951790559347353, iteration: 354704
loss: 0.986672580242157,grad_norm: 0.8296803656041467, iteration: 354705
loss: 0.9722764492034912,grad_norm: 0.7121488401241198, iteration: 354706
loss: 1.0320491790771484,grad_norm: 0.8067236081627336, iteration: 354707
loss: 0.9873358607292175,grad_norm: 0.9792576394260747, iteration: 354708
loss: 1.000515341758728,grad_norm: 0.9473320624107406, iteration: 354709
loss: 1.0237538814544678,grad_norm: 0.6808385675366657, iteration: 354710
loss: 1.009444236755371,grad_norm: 0.7418892722529887, iteration: 354711
loss: 0.963407576084137,grad_norm: 0.6630340509824498, iteration: 354712
loss: 0.989006757736206,grad_norm: 0.9984406668614094, iteration: 354713
loss: 1.0140447616577148,grad_norm: 0.8428272325994693, iteration: 354714
loss: 0.9888315200805664,grad_norm: 0.9037937588622313, iteration: 354715
loss: 1.0008639097213745,grad_norm: 0.9443603291290802, iteration: 354716
loss: 0.9427710771560669,grad_norm: 0.7987406770366878, iteration: 354717
loss: 1.0048909187316895,grad_norm: 0.7397775613236306, iteration: 354718
loss: 1.021751046180725,grad_norm: 0.7776825181556168, iteration: 354719
loss: 1.0060603618621826,grad_norm: 0.9999991590339057, iteration: 354720
loss: 1.0054841041564941,grad_norm: 0.7332368854932141, iteration: 354721
loss: 1.0925170183181763,grad_norm: 0.9999995446286553, iteration: 354722
loss: 0.9660282731056213,grad_norm: 0.9999997791115152, iteration: 354723
loss: 0.989006519317627,grad_norm: 0.9191625579852465, iteration: 354724
loss: 0.9814717769622803,grad_norm: 0.732046556363014, iteration: 354725
loss: 0.9957265853881836,grad_norm: 0.836536902576826, iteration: 354726
loss: 0.9679619669914246,grad_norm: 0.8033332726297735, iteration: 354727
loss: 1.0138643980026245,grad_norm: 0.7062629169706244, iteration: 354728
loss: 1.015135645866394,grad_norm: 0.7259950877539494, iteration: 354729
loss: 1.087449073791504,grad_norm: 0.9999990485792832, iteration: 354730
loss: 0.9900732636451721,grad_norm: 0.7357042548852368, iteration: 354731
loss: 1.0012027025222778,grad_norm: 0.8355960991007336, iteration: 354732
loss: 0.9902386665344238,grad_norm: 0.8880153354927315, iteration: 354733
loss: 0.9950929880142212,grad_norm: 0.7887860038549352, iteration: 354734
loss: 0.9861093759536743,grad_norm: 0.8371520810178078, iteration: 354735
loss: 1.002761960029602,grad_norm: 0.8919510798299356, iteration: 354736
loss: 0.9807063341140747,grad_norm: 0.8844221274645296, iteration: 354737
loss: 0.9825605750083923,grad_norm: 0.8879255159266517, iteration: 354738
loss: 0.9784302711486816,grad_norm: 0.6251168129945716, iteration: 354739
loss: 1.0132007598876953,grad_norm: 0.8018471615388166, iteration: 354740
loss: 1.0239872932434082,grad_norm: 0.8013702194519818, iteration: 354741
loss: 1.0610735416412354,grad_norm: 0.9999999687584098, iteration: 354742
loss: 1.03016197681427,grad_norm: 0.9999990831526459, iteration: 354743
loss: 1.0241190195083618,grad_norm: 0.8839989674177994, iteration: 354744
loss: 1.0165387392044067,grad_norm: 0.8833051665291861, iteration: 354745
loss: 0.9937161207199097,grad_norm: 0.8194066940456339, iteration: 354746
loss: 0.9943001866340637,grad_norm: 0.8220492200263487, iteration: 354747
loss: 0.9917285442352295,grad_norm: 0.7842604170183275, iteration: 354748
loss: 0.9765058159828186,grad_norm: 0.9524523730967257, iteration: 354749
loss: 1.0386993885040283,grad_norm: 0.798837955312029, iteration: 354750
loss: 1.003896951675415,grad_norm: 0.8213229837066993, iteration: 354751
loss: 1.028196096420288,grad_norm: 0.8979976235677246, iteration: 354752
loss: 0.9885345697402954,grad_norm: 0.8456261114296373, iteration: 354753
loss: 1.000056505203247,grad_norm: 0.7568729980327695, iteration: 354754
loss: 0.9568736553192139,grad_norm: 0.7695517762228236, iteration: 354755
loss: 0.9619120955467224,grad_norm: 0.846538749448937, iteration: 354756
loss: 1.0158475637435913,grad_norm: 0.8318066252620642, iteration: 354757
loss: 0.9892536401748657,grad_norm: 0.9999990733386884, iteration: 354758
loss: 1.0100071430206299,grad_norm: 0.6926978047453751, iteration: 354759
loss: 0.9997193813323975,grad_norm: 0.7822817409624062, iteration: 354760
loss: 1.0301481485366821,grad_norm: 0.9999990810230625, iteration: 354761
loss: 0.9618077278137207,grad_norm: 0.8370405311238569, iteration: 354762
loss: 1.0054470300674438,grad_norm: 0.7687041131466013, iteration: 354763
loss: 1.0141234397888184,grad_norm: 0.7933369668885267, iteration: 354764
loss: 0.998336672782898,grad_norm: 0.7641639437106218, iteration: 354765
loss: 0.9994122385978699,grad_norm: 0.9872474016094663, iteration: 354766
loss: 1.0269650220870972,grad_norm: 0.8520000272930294, iteration: 354767
loss: 1.012285828590393,grad_norm: 0.8623582814186588, iteration: 354768
loss: 0.9666088223457336,grad_norm: 0.768053081202075, iteration: 354769
loss: 1.0055291652679443,grad_norm: 0.698171240933808, iteration: 354770
loss: 0.9957539439201355,grad_norm: 0.8204012946456827, iteration: 354771
loss: 0.9960842132568359,grad_norm: 0.8551513555482954, iteration: 354772
loss: 0.98829185962677,grad_norm: 0.8695178645739507, iteration: 354773
loss: 0.9800771474838257,grad_norm: 0.8474215237933876, iteration: 354774
loss: 0.9980981945991516,grad_norm: 0.7219880934185058, iteration: 354775
loss: 1.0264829397201538,grad_norm: 0.884073759529037, iteration: 354776
loss: 0.9701172113418579,grad_norm: 0.8810438915428649, iteration: 354777
loss: 1.0036550760269165,grad_norm: 0.8185836271233993, iteration: 354778
loss: 1.013744831085205,grad_norm: 0.8547523608049384, iteration: 354779
loss: 0.9803850054740906,grad_norm: 0.9999991182358033, iteration: 354780
loss: 1.0412613153457642,grad_norm: 0.6881619929667409, iteration: 354781
loss: 1.0110142230987549,grad_norm: 0.9999992515624104, iteration: 354782
loss: 0.9729915857315063,grad_norm: 0.7240956920740607, iteration: 354783
loss: 1.0711522102355957,grad_norm: 0.9996255174444825, iteration: 354784
loss: 0.9961631894111633,grad_norm: 0.7389702401256518, iteration: 354785
loss: 1.0894651412963867,grad_norm: 0.808549401967474, iteration: 354786
loss: 0.9827018976211548,grad_norm: 0.840917650161206, iteration: 354787
loss: 1.0189646482467651,grad_norm: 0.9999997727527081, iteration: 354788
loss: 0.9611453413963318,grad_norm: 0.7158366602314946, iteration: 354789
loss: 0.9593031406402588,grad_norm: 0.7714371266386318, iteration: 354790
loss: 1.0397076606750488,grad_norm: 0.7978255070598455, iteration: 354791
loss: 0.9916554093360901,grad_norm: 0.8239340011998408, iteration: 354792
loss: 1.079296350479126,grad_norm: 0.8411032695501564, iteration: 354793
loss: 1.0103155374526978,grad_norm: 0.7622937693940248, iteration: 354794
loss: 0.9942048788070679,grad_norm: 0.8585011176029348, iteration: 354795
loss: 0.9818235635757446,grad_norm: 0.9581426381857767, iteration: 354796
loss: 0.9763946533203125,grad_norm: 0.7181316796714818, iteration: 354797
loss: 0.9961304664611816,grad_norm: 0.9805337222780012, iteration: 354798
loss: 1.022807240486145,grad_norm: 0.777292294392798, iteration: 354799
loss: 0.9397855997085571,grad_norm: 0.7422202104333132, iteration: 354800
loss: 1.0121901035308838,grad_norm: 0.8569835535730591, iteration: 354801
loss: 0.9800917506217957,grad_norm: 0.8067696768037451, iteration: 354802
loss: 0.9745243787765503,grad_norm: 0.8883624646273236, iteration: 354803
loss: 1.016311764717102,grad_norm: 0.999999089105057, iteration: 354804
loss: 0.997767984867096,grad_norm: 0.9999989717714991, iteration: 354805
loss: 0.9745593070983887,grad_norm: 0.7205729741316134, iteration: 354806
loss: 1.0112589597702026,grad_norm: 0.8368718944331749, iteration: 354807
loss: 1.0124785900115967,grad_norm: 0.9999999032099252, iteration: 354808
loss: 0.9795213341712952,grad_norm: 0.6887924569268163, iteration: 354809
loss: 1.019403100013733,grad_norm: 0.936105347363559, iteration: 354810
loss: 0.9876399040222168,grad_norm: 0.7826720240384417, iteration: 354811
loss: 0.9839298129081726,grad_norm: 0.7647732716762441, iteration: 354812
loss: 1.0029523372650146,grad_norm: 0.9412516940683823, iteration: 354813
loss: 0.980701744556427,grad_norm: 0.8963929228672175, iteration: 354814
loss: 0.9816185235977173,grad_norm: 0.7706551349377926, iteration: 354815
loss: 0.9821034669876099,grad_norm: 0.8095271102863846, iteration: 354816
loss: 0.9859135150909424,grad_norm: 0.7640598988597086, iteration: 354817
loss: 0.9804136157035828,grad_norm: 0.9999990963353501, iteration: 354818
loss: 1.0502605438232422,grad_norm: 0.8221811419434725, iteration: 354819
loss: 1.0471456050872803,grad_norm: 0.9197930258411489, iteration: 354820
loss: 0.9953945875167847,grad_norm: 0.7025478389488171, iteration: 354821
loss: 1.0051450729370117,grad_norm: 0.9999997873204155, iteration: 354822
loss: 1.0106446743011475,grad_norm: 0.9999997083995549, iteration: 354823
loss: 1.0107859373092651,grad_norm: 0.69348890123716, iteration: 354824
loss: 0.9658380150794983,grad_norm: 0.727241781394443, iteration: 354825
loss: 0.9917318820953369,grad_norm: 0.9687686283797007, iteration: 354826
loss: 1.040438175201416,grad_norm: 0.9999993003619148, iteration: 354827
loss: 1.0156850814819336,grad_norm: 0.6354406646907702, iteration: 354828
loss: 1.0079456567764282,grad_norm: 0.9111187156896581, iteration: 354829
loss: 1.0147614479064941,grad_norm: 0.7993774811763001, iteration: 354830
loss: 0.9456465244293213,grad_norm: 0.8624135462957081, iteration: 354831
loss: 0.9841530919075012,grad_norm: 0.8680593998656962, iteration: 354832
loss: 1.017645001411438,grad_norm: 0.7966143864107167, iteration: 354833
loss: 1.0024338960647583,grad_norm: 0.8288226612542073, iteration: 354834
loss: 0.9943825602531433,grad_norm: 0.9419923299787233, iteration: 354835
loss: 1.0079357624053955,grad_norm: 0.9856030658198247, iteration: 354836
loss: 1.0115532875061035,grad_norm: 0.9999993028380705, iteration: 354837
loss: 1.1719799041748047,grad_norm: 0.9999998103412499, iteration: 354838
loss: 1.0359432697296143,grad_norm: 0.9999991371410507, iteration: 354839
loss: 0.9975382685661316,grad_norm: 0.9078288956507116, iteration: 354840
loss: 1.0019272565841675,grad_norm: 0.887777778492388, iteration: 354841
loss: 0.9439274072647095,grad_norm: 0.6835009490758213, iteration: 354842
loss: 0.976141095161438,grad_norm: 0.8957131926672057, iteration: 354843
loss: 1.0034910440444946,grad_norm: 0.8626518818067679, iteration: 354844
loss: 1.0182009935379028,grad_norm: 0.7470620259542374, iteration: 354845
loss: 0.9668565392494202,grad_norm: 0.8025141208277313, iteration: 354846
loss: 0.9753206968307495,grad_norm: 0.796523563660441, iteration: 354847
loss: 1.013716220855713,grad_norm: 0.9999991046731954, iteration: 354848
loss: 0.978792130947113,grad_norm: 0.8009400126845475, iteration: 354849
loss: 1.1254534721374512,grad_norm: 0.9999993601501171, iteration: 354850
loss: 0.9939262270927429,grad_norm: 0.8786269895942566, iteration: 354851
loss: 0.9802501797676086,grad_norm: 0.9282840321225337, iteration: 354852
loss: 0.9864612221717834,grad_norm: 0.9695016197392871, iteration: 354853
loss: 1.0022261142730713,grad_norm: 0.8076067755498644, iteration: 354854
loss: 0.9843932390213013,grad_norm: 0.9450924866621374, iteration: 354855
loss: 1.019061803817749,grad_norm: 0.7867599975480287, iteration: 354856
loss: 0.9822248220443726,grad_norm: 0.9999991562217906, iteration: 354857
loss: 0.9923856854438782,grad_norm: 0.793704790424748, iteration: 354858
loss: 1.0144891738891602,grad_norm: 0.83796667957527, iteration: 354859
loss: 0.9910869598388672,grad_norm: 0.8879999537566635, iteration: 354860
loss: 0.974198579788208,grad_norm: 0.8866723745881444, iteration: 354861
loss: 0.9944776296615601,grad_norm: 0.9999990499577792, iteration: 354862
loss: 0.9921448230743408,grad_norm: 0.7865486811195888, iteration: 354863
loss: 1.0232924222946167,grad_norm: 0.9150382055564059, iteration: 354864
loss: 1.0406534671783447,grad_norm: 0.9078340430761004, iteration: 354865
loss: 1.0168790817260742,grad_norm: 0.9999990875346171, iteration: 354866
loss: 0.9902740716934204,grad_norm: 0.7534392010994557, iteration: 354867
loss: 1.0002238750457764,grad_norm: 0.7778657723821679, iteration: 354868
loss: 0.9934495687484741,grad_norm: 0.774224753580034, iteration: 354869
loss: 0.9815181493759155,grad_norm: 0.8017658883793343, iteration: 354870
loss: 0.9947258234024048,grad_norm: 0.6359623411289925, iteration: 354871
loss: 0.960058331489563,grad_norm: 0.7885537565755496, iteration: 354872
loss: 1.099868893623352,grad_norm: 0.9999996153267594, iteration: 354873
loss: 0.9620906114578247,grad_norm: 0.805654744301315, iteration: 354874
loss: 0.9957151412963867,grad_norm: 0.8167441765646528, iteration: 354875
loss: 0.9796035885810852,grad_norm: 0.9064901004499326, iteration: 354876
loss: 1.024178385734558,grad_norm: 0.8350779883889607, iteration: 354877
loss: 0.9916458129882812,grad_norm: 0.8129429421029238, iteration: 354878
loss: 0.9543043971061707,grad_norm: 0.999998942971685, iteration: 354879
loss: 1.013449788093567,grad_norm: 0.7426321841913885, iteration: 354880
loss: 1.0207093954086304,grad_norm: 0.9073782504427993, iteration: 354881
loss: 0.9594153761863708,grad_norm: 0.9015951550053697, iteration: 354882
loss: 0.9495678544044495,grad_norm: 0.9999992502935305, iteration: 354883
loss: 1.0465590953826904,grad_norm: 0.6652512118522601, iteration: 354884
loss: 0.988752007484436,grad_norm: 0.9804957759708995, iteration: 354885
loss: 1.004679799079895,grad_norm: 0.8836228786003384, iteration: 354886
loss: 0.9870715737342834,grad_norm: 0.6502703655617954, iteration: 354887
loss: 1.0075551271438599,grad_norm: 0.7983523220157809, iteration: 354888
loss: 1.034120798110962,grad_norm: 0.8454156697726479, iteration: 354889
loss: 0.9881516098976135,grad_norm: 0.742941869803593, iteration: 354890
loss: 1.1128259897232056,grad_norm: 0.7219418903791924, iteration: 354891
loss: 1.0239121913909912,grad_norm: 0.8450303409159794, iteration: 354892
loss: 0.9944740533828735,grad_norm: 0.8429931232343475, iteration: 354893
loss: 0.9961237907409668,grad_norm: 0.8091638405531083, iteration: 354894
loss: 1.0103398561477661,grad_norm: 0.8722463884936219, iteration: 354895
loss: 1.0124698877334595,grad_norm: 0.918988530449438, iteration: 354896
loss: 0.9813039898872375,grad_norm: 0.7462765229865351, iteration: 354897
loss: 1.006029486656189,grad_norm: 0.8416550597187054, iteration: 354898
loss: 1.0323973894119263,grad_norm: 0.9999993284361043, iteration: 354899
loss: 1.0187584161758423,grad_norm: 0.7450187264919176, iteration: 354900
loss: 0.9870620965957642,grad_norm: 0.8420572616133254, iteration: 354901
loss: 1.0099265575408936,grad_norm: 0.7633904439985039, iteration: 354902
loss: 1.0728586912155151,grad_norm: 0.8744779486050731, iteration: 354903
loss: 1.0356833934783936,grad_norm: 0.8201342967643849, iteration: 354904
loss: 1.0641597509384155,grad_norm: 0.9999992478144112, iteration: 354905
loss: 0.9906060099601746,grad_norm: 0.86756021522073, iteration: 354906
loss: 1.0332695245742798,grad_norm: 0.854840277614411, iteration: 354907
loss: 0.9824907183647156,grad_norm: 0.9999994960201102, iteration: 354908
loss: 1.0120558738708496,grad_norm: 0.6968725203039546, iteration: 354909
loss: 0.9839588403701782,grad_norm: 0.8950551863564804, iteration: 354910
loss: 0.9914026260375977,grad_norm: 0.8148063796636792, iteration: 354911
loss: 1.0033838748931885,grad_norm: 0.8264156953587363, iteration: 354912
loss: 1.010056734085083,grad_norm: 0.886384130336626, iteration: 354913
loss: 1.0099538564682007,grad_norm: 0.7715255137598981, iteration: 354914
loss: 1.000894546508789,grad_norm: 0.8120597284226769, iteration: 354915
loss: 0.9795935153961182,grad_norm: 0.7794552740459526, iteration: 354916
loss: 1.0091183185577393,grad_norm: 0.9999991492484265, iteration: 354917
loss: 0.9850115180015564,grad_norm: 0.9292571548334834, iteration: 354918
loss: 0.981712818145752,grad_norm: 0.8519636267377433, iteration: 354919
loss: 1.201316475868225,grad_norm: 0.9999999268912495, iteration: 354920
loss: 1.0164101123809814,grad_norm: 0.8755379152355087, iteration: 354921
loss: 0.9726459980010986,grad_norm: 0.9763261462673042, iteration: 354922
loss: 0.9899300932884216,grad_norm: 0.7509662853211461, iteration: 354923
loss: 1.005957007408142,grad_norm: 0.8580876113199052, iteration: 354924
loss: 1.1580671072006226,grad_norm: 0.9999993482688334, iteration: 354925
loss: 0.9759562611579895,grad_norm: 0.773670871224105, iteration: 354926
loss: 1.044700264930725,grad_norm: 0.8463368357704987, iteration: 354927
loss: 1.0017880201339722,grad_norm: 0.8618940393043784, iteration: 354928
loss: 1.2046914100646973,grad_norm: 0.9999997975360703, iteration: 354929
loss: 0.9930847883224487,grad_norm: 0.8379788420553363, iteration: 354930
loss: 1.2572910785675049,grad_norm: 0.9999998787907718, iteration: 354931
loss: 1.4130035638809204,grad_norm: 0.9999991764818484, iteration: 354932
loss: 1.181797742843628,grad_norm: 0.9999993550273123, iteration: 354933
loss: 1.459208369255066,grad_norm: 0.9999995394711919, iteration: 354934
loss: 1.3575717210769653,grad_norm: 1.0000000781462783, iteration: 354935
loss: 0.9887921810150146,grad_norm: 0.7925201446747214, iteration: 354936
loss: 1.18937087059021,grad_norm: 0.9999997301012371, iteration: 354937
loss: 1.4436641931533813,grad_norm: 0.9999999577280011, iteration: 354938
loss: 1.6154563426971436,grad_norm: 1.0000000210961586, iteration: 354939
loss: 1.1181137561798096,grad_norm: 0.9999996675848053, iteration: 354940
loss: 1.1081972122192383,grad_norm: 0.9999995684418747, iteration: 354941
loss: 2.148059368133545,grad_norm: 0.9999999035440365, iteration: 354942
loss: 1.1722640991210938,grad_norm: 0.9999990754675461, iteration: 354943
loss: 1.1349849700927734,grad_norm: 0.999999822472402, iteration: 354944
loss: 0.974166214466095,grad_norm: 0.8333193947281111, iteration: 354945
loss: 1.0156842470169067,grad_norm: 0.9439507563449697, iteration: 354946
loss: 1.009326457977295,grad_norm: 0.9999997637041153, iteration: 354947
loss: 0.9842815399169922,grad_norm: 0.7450814972115343, iteration: 354948
loss: 1.0213910341262817,grad_norm: 0.9999999041768898, iteration: 354949
loss: 0.959202229976654,grad_norm: 0.8454060919431706, iteration: 354950
loss: 1.054597020149231,grad_norm: 0.9999991794641139, iteration: 354951
loss: 1.0069948434829712,grad_norm: 0.8628514138675053, iteration: 354952
loss: 1.0258442163467407,grad_norm: 0.916591836953357, iteration: 354953
loss: 0.994606077671051,grad_norm: 0.6924244372177443, iteration: 354954
loss: 1.015504240989685,grad_norm: 0.7371102472358022, iteration: 354955
loss: 1.01627516746521,grad_norm: 0.9999990584113664, iteration: 354956
loss: 0.9998598694801331,grad_norm: 0.9571536345802902, iteration: 354957
loss: 0.9930580854415894,grad_norm: 0.9872984738564529, iteration: 354958
loss: 1.1110068559646606,grad_norm: 0.9999997919942073, iteration: 354959
loss: 1.1779255867004395,grad_norm: 0.9999995816848429, iteration: 354960
loss: 1.0222522020339966,grad_norm: 0.8679589649066474, iteration: 354961
loss: 0.9848524928092957,grad_norm: 0.7425781567288935, iteration: 354962
loss: 1.0031229257583618,grad_norm: 0.9999992000640953, iteration: 354963
loss: 1.0191794633865356,grad_norm: 0.9999992121480907, iteration: 354964
loss: 1.0201717615127563,grad_norm: 0.7158020046527148, iteration: 354965
loss: 1.044629454612732,grad_norm: 0.7706907788857932, iteration: 354966
loss: 0.9780160188674927,grad_norm: 0.925830387681091, iteration: 354967
loss: 0.9942521452903748,grad_norm: 0.7671171485523854, iteration: 354968
loss: 0.9778171181678772,grad_norm: 0.8982019013456586, iteration: 354969
loss: 0.9698410034179688,grad_norm: 0.8279242761970582, iteration: 354970
loss: 0.9883928298950195,grad_norm: 0.8206068042378704, iteration: 354971
loss: 1.0029587745666504,grad_norm: 0.8252330401166481, iteration: 354972
loss: 1.0091125965118408,grad_norm: 0.799534329761439, iteration: 354973
loss: 1.0281717777252197,grad_norm: 0.7930055209174305, iteration: 354974
loss: 1.0098052024841309,grad_norm: 0.8350560390218824, iteration: 354975
loss: 1.049519419670105,grad_norm: 0.7520849879976655, iteration: 354976
loss: 1.0652316808700562,grad_norm: 0.9479347748270339, iteration: 354977
loss: 1.0192087888717651,grad_norm: 0.8532657492262181, iteration: 354978
loss: 1.1060484647750854,grad_norm: 0.9999998859605284, iteration: 354979
loss: 0.9433998465538025,grad_norm: 0.715188806111907, iteration: 354980
loss: 1.0165866613388062,grad_norm: 0.8666966974717674, iteration: 354981
loss: 0.9866663217544556,grad_norm: 0.6878235190343126, iteration: 354982
loss: 0.9627586603164673,grad_norm: 0.8446689038389662, iteration: 354983
loss: 1.0274386405944824,grad_norm: 0.9114465343567497, iteration: 354984
loss: 0.9587543606758118,grad_norm: 0.7945831425991157, iteration: 354985
loss: 1.0053670406341553,grad_norm: 0.9999989818088795, iteration: 354986
loss: 0.9970785975456238,grad_norm: 0.7433576872870201, iteration: 354987
loss: 1.0086933374404907,grad_norm: 0.9999999554219114, iteration: 354988
loss: 0.9806350469589233,grad_norm: 0.8414434097661004, iteration: 354989
loss: 1.0057967901229858,grad_norm: 0.7519596584019403, iteration: 354990
loss: 0.9891672134399414,grad_norm: 0.8131198761716935, iteration: 354991
loss: 0.9654578566551208,grad_norm: 0.7439835858959349, iteration: 354992
loss: 0.9976773858070374,grad_norm: 0.8459185649649567, iteration: 354993
loss: 1.0075628757476807,grad_norm: 0.8572697591799902, iteration: 354994
loss: 0.9958004951477051,grad_norm: 0.7356919654163429, iteration: 354995
loss: 1.0135356187820435,grad_norm: 0.8611276621369708, iteration: 354996
loss: 0.9863020777702332,grad_norm: 0.7784679103295784, iteration: 354997
loss: 0.974243700504303,grad_norm: 0.834174540011418, iteration: 354998
loss: 1.0055001974105835,grad_norm: 0.8719884050640812, iteration: 354999
loss: 0.9921126961708069,grad_norm: 0.999999159821983, iteration: 355000
loss: 1.015546441078186,grad_norm: 0.7963996005620612, iteration: 355001
loss: 0.9984872937202454,grad_norm: 0.7870277726709709, iteration: 355002
loss: 0.9722686409950256,grad_norm: 0.8745186017863555, iteration: 355003
loss: 1.0155951976776123,grad_norm: 0.8270807103288913, iteration: 355004
loss: 1.05131995677948,grad_norm: 0.874472685893361, iteration: 355005
loss: 1.0453875064849854,grad_norm: 0.9999992965308889, iteration: 355006
loss: 0.9937811493873596,grad_norm: 0.7730978027910772, iteration: 355007
loss: 1.0307852029800415,grad_norm: 0.7560270248605168, iteration: 355008
loss: 1.0008625984191895,grad_norm: 0.9999989752270239, iteration: 355009
loss: 0.9785115718841553,grad_norm: 0.9001784029719855, iteration: 355010
loss: 0.9945201873779297,grad_norm: 0.80571573487168, iteration: 355011
loss: 0.9858142137527466,grad_norm: 0.999999127790175, iteration: 355012
loss: 1.0154452323913574,grad_norm: 0.8688481205926543, iteration: 355013
loss: 1.0066015720367432,grad_norm: 0.8021983495146479, iteration: 355014
loss: 0.9785363078117371,grad_norm: 0.6404407980110978, iteration: 355015
loss: 0.9819515943527222,grad_norm: 0.9999989860643959, iteration: 355016
loss: 0.998928964138031,grad_norm: 0.999225001683125, iteration: 355017
loss: 1.0441858768463135,grad_norm: 0.7921032411518161, iteration: 355018
loss: 1.0467628240585327,grad_norm: 0.9999991753297326, iteration: 355019
loss: 0.9952166080474854,grad_norm: 0.7693551611812071, iteration: 355020
loss: 1.0405534505844116,grad_norm: 0.9999992443463398, iteration: 355021
loss: 1.0845551490783691,grad_norm: 0.9999990944415196, iteration: 355022
loss: 0.9747081398963928,grad_norm: 0.7622605584143807, iteration: 355023
loss: 1.0403203964233398,grad_norm: 0.9999995541105361, iteration: 355024
loss: 1.0522923469543457,grad_norm: 0.8347673013614797, iteration: 355025
loss: 1.0204466581344604,grad_norm: 0.9999990631785415, iteration: 355026
loss: 1.018396019935608,grad_norm: 0.8916225885411239, iteration: 355027
loss: 1.0051438808441162,grad_norm: 0.7043851090846288, iteration: 355028
loss: 1.0275599956512451,grad_norm: 0.9072037915721647, iteration: 355029
loss: 1.039893627166748,grad_norm: 0.7114482693026539, iteration: 355030
loss: 1.04879891872406,grad_norm: 0.9388801429688207, iteration: 355031
loss: 1.0373092889785767,grad_norm: 0.9999990304169366, iteration: 355032
loss: 0.9857582449913025,grad_norm: 0.7818687220482305, iteration: 355033
loss: 1.0030137300491333,grad_norm: 0.8657738588731563, iteration: 355034
loss: 1.0053178071975708,grad_norm: 0.8125077418704324, iteration: 355035
loss: 0.9828653335571289,grad_norm: 0.9333874050187357, iteration: 355036
loss: 1.0285035371780396,grad_norm: 0.7821850900770392, iteration: 355037
loss: 1.0008395910263062,grad_norm: 0.8848539496429855, iteration: 355038
loss: 0.9939208626747131,grad_norm: 0.7310251922137228, iteration: 355039
loss: 0.9752016067504883,grad_norm: 0.8486165286299715, iteration: 355040
loss: 0.9776903986930847,grad_norm: 0.835027535208235, iteration: 355041
loss: 0.9885597825050354,grad_norm: 0.7253410146082415, iteration: 355042
loss: 1.0122225284576416,grad_norm: 0.9999991095990938, iteration: 355043
loss: 0.9803630113601685,grad_norm: 0.749235551378008, iteration: 355044
loss: 0.9968993663787842,grad_norm: 0.9122802348614262, iteration: 355045
loss: 0.9591343998908997,grad_norm: 0.6494988708905808, iteration: 355046
loss: 0.9933710098266602,grad_norm: 0.7243462524408402, iteration: 355047
loss: 0.9747881293296814,grad_norm: 0.721697510630203, iteration: 355048
loss: 1.0077881813049316,grad_norm: 0.8211917500765964, iteration: 355049
loss: 0.9720090627670288,grad_norm: 0.7369274377733424, iteration: 355050
loss: 0.9782068729400635,grad_norm: 0.7041955117514822, iteration: 355051
loss: 1.0013909339904785,grad_norm: 0.9789081306849097, iteration: 355052
loss: 0.9868640899658203,grad_norm: 0.9999990014674016, iteration: 355053
loss: 0.9811007976531982,grad_norm: 0.8149230719347065, iteration: 355054
loss: 0.9780561923980713,grad_norm: 0.7728952925424004, iteration: 355055
loss: 0.9978007674217224,grad_norm: 0.772698376745551, iteration: 355056
loss: 0.9841960072517395,grad_norm: 0.8913859006656191, iteration: 355057
loss: 1.033493995666504,grad_norm: 0.8920221254471531, iteration: 355058
loss: 0.9843631386756897,grad_norm: 0.8540411307758438, iteration: 355059
loss: 1.1356397867202759,grad_norm: 0.7844108170029139, iteration: 355060
loss: 1.0206849575042725,grad_norm: 0.8331161723896581, iteration: 355061
loss: 0.9982966184616089,grad_norm: 0.9483735352219164, iteration: 355062
loss: 1.015258550643921,grad_norm: 0.999999731994047, iteration: 355063
loss: 0.9432216882705688,grad_norm: 0.7184744712068292, iteration: 355064
loss: 1.0130928754806519,grad_norm: 0.6617549557720876, iteration: 355065
loss: 0.9931353330612183,grad_norm: 0.7334644128629808, iteration: 355066
loss: 1.0111298561096191,grad_norm: 0.8703266791881032, iteration: 355067
loss: 0.9935572147369385,grad_norm: 0.8004720492633548, iteration: 355068
loss: 0.998927891254425,grad_norm: 0.9999995980695394, iteration: 355069
loss: 1.00320303440094,grad_norm: 0.7755976137066121, iteration: 355070
loss: 0.9913467168807983,grad_norm: 0.7119941993851963, iteration: 355071
loss: 0.9726954102516174,grad_norm: 0.8058341161194721, iteration: 355072
loss: 0.9985942244529724,grad_norm: 0.790751801362056, iteration: 355073
loss: 1.0058443546295166,grad_norm: 0.8068894244301824, iteration: 355074
loss: 0.9925954341888428,grad_norm: 0.9521260736761666, iteration: 355075
loss: 1.0020747184753418,grad_norm: 0.6139538484643782, iteration: 355076
loss: 1.0181981325149536,grad_norm: 0.9859667206382167, iteration: 355077
loss: 1.0339099168777466,grad_norm: 0.9999999162443493, iteration: 355078
loss: 1.0023739337921143,grad_norm: 0.8420249007016424, iteration: 355079
loss: 0.9713118076324463,grad_norm: 0.6872378364610885, iteration: 355080
loss: 0.9645012617111206,grad_norm: 0.9999991202435808, iteration: 355081
loss: 0.9690330028533936,grad_norm: 0.9350427593737192, iteration: 355082
loss: 1.0130850076675415,grad_norm: 0.7736817407096577, iteration: 355083
loss: 1.007935881614685,grad_norm: 0.8538948130971341, iteration: 355084
loss: 1.0239081382751465,grad_norm: 0.9999991346337197, iteration: 355085
loss: 0.9679940342903137,grad_norm: 0.7821053905152157, iteration: 355086
loss: 0.993728518486023,grad_norm: 0.8646070713007414, iteration: 355087
loss: 1.0334711074829102,grad_norm: 0.9999996914548964, iteration: 355088
loss: 1.004564881324768,grad_norm: 0.9019885903574347, iteration: 355089
loss: 1.024160385131836,grad_norm: 0.9329250329316645, iteration: 355090
loss: 0.9844920635223389,grad_norm: 0.7259372732442312, iteration: 355091
loss: 1.0047928094863892,grad_norm: 0.7260944462880143, iteration: 355092
loss: 1.004381537437439,grad_norm: 0.7693600532402373, iteration: 355093
loss: 1.2119014263153076,grad_norm: 0.9999998817186929, iteration: 355094
loss: 1.000171422958374,grad_norm: 0.9999997551910869, iteration: 355095
loss: 1.0386106967926025,grad_norm: 0.7450447121425599, iteration: 355096
loss: 0.9935088753700256,grad_norm: 0.9999999056182962, iteration: 355097
loss: 0.9937233328819275,grad_norm: 0.8326199772662921, iteration: 355098
loss: 0.9854229092597961,grad_norm: 0.7614962299443873, iteration: 355099
loss: 1.003594994544983,grad_norm: 0.8015408189053753, iteration: 355100
loss: 0.9985324740409851,grad_norm: 0.8447887622374578, iteration: 355101
loss: 0.9761697053909302,grad_norm: 0.7398340025632397, iteration: 355102
loss: 1.0162240266799927,grad_norm: 0.8828000803612652, iteration: 355103
loss: 1.0645309686660767,grad_norm: 0.9999994058143125, iteration: 355104
loss: 0.9981935620307922,grad_norm: 0.8008533723678841, iteration: 355105
loss: 1.0340008735656738,grad_norm: 0.9055524186797415, iteration: 355106
loss: 0.999492347240448,grad_norm: 0.7158460183482449, iteration: 355107
loss: 1.0458532571792603,grad_norm: 0.9999993053150223, iteration: 355108
loss: 1.112291693687439,grad_norm: 0.857255599172359, iteration: 355109
loss: 1.0074251890182495,grad_norm: 0.8258154614769994, iteration: 355110
loss: 1.0065314769744873,grad_norm: 0.9999997057962641, iteration: 355111
loss: 1.0436347723007202,grad_norm: 0.9999991711258425, iteration: 355112
loss: 1.0135442018508911,grad_norm: 0.831590754346894, iteration: 355113
loss: 1.0273874998092651,grad_norm: 0.8729826809769224, iteration: 355114
loss: 1.0198347568511963,grad_norm: 0.724733511044162, iteration: 355115
loss: 0.9854831695556641,grad_norm: 0.8453999239088049, iteration: 355116
loss: 1.0079201459884644,grad_norm: 0.9999994563001707, iteration: 355117
loss: 1.0217608213424683,grad_norm: 0.8279577092400944, iteration: 355118
loss: 0.9965871572494507,grad_norm: 0.9790512994792043, iteration: 355119
loss: 1.0270544290542603,grad_norm: 0.999999359713162, iteration: 355120
loss: 0.9759691953659058,grad_norm: 0.8772787895357211, iteration: 355121
loss: 0.9749031066894531,grad_norm: 0.68557620521631, iteration: 355122
loss: 1.0215518474578857,grad_norm: 0.8121809211176685, iteration: 355123
loss: 1.0416975021362305,grad_norm: 0.9999995139908574, iteration: 355124
loss: 1.00723397731781,grad_norm: 0.8659035840359908, iteration: 355125
loss: 0.9900259971618652,grad_norm: 0.8571314976732026, iteration: 355126
loss: 1.0219944715499878,grad_norm: 0.9138714401866892, iteration: 355127
loss: 0.9915927648544312,grad_norm: 0.8065622663307881, iteration: 355128
loss: 0.9821963310241699,grad_norm: 0.7487160523709944, iteration: 355129
loss: 1.0167183876037598,grad_norm: 0.7371920352504692, iteration: 355130
loss: 0.9554845094680786,grad_norm: 0.9797447034512075, iteration: 355131
loss: 1.0248723030090332,grad_norm: 0.7896901044024771, iteration: 355132
loss: 0.9725028872489929,grad_norm: 0.9294687005660439, iteration: 355133
loss: 1.010202169418335,grad_norm: 0.841686881408734, iteration: 355134
loss: 0.9791669249534607,grad_norm: 0.7989303509560629, iteration: 355135
loss: 1.0052299499511719,grad_norm: 0.8322701327401792, iteration: 355136
loss: 1.0114917755126953,grad_norm: 0.8633466052578103, iteration: 355137
loss: 1.0285148620605469,grad_norm: 0.8621742998584686, iteration: 355138
loss: 1.0400006771087646,grad_norm: 0.9999990760287994, iteration: 355139
loss: 1.0125774145126343,grad_norm: 0.9999990807869169, iteration: 355140
loss: 0.9890535473823547,grad_norm: 0.9433759785531788, iteration: 355141
loss: 0.9962536692619324,grad_norm: 0.9999991813557614, iteration: 355142
loss: 1.0685365200042725,grad_norm: 0.9999995459970802, iteration: 355143
loss: 0.9822854995727539,grad_norm: 0.8375710609371517, iteration: 355144
loss: 1.0004888772964478,grad_norm: 0.8576623182275825, iteration: 355145
loss: 0.991535484790802,grad_norm: 0.8368711617300064, iteration: 355146
loss: 1.0279020071029663,grad_norm: 0.9999990276863611, iteration: 355147
loss: 1.0597610473632812,grad_norm: 0.7810653524537565, iteration: 355148
loss: 1.0079094171524048,grad_norm: 0.8156600637434057, iteration: 355149
loss: 1.003255009651184,grad_norm: 0.7762845175597302, iteration: 355150
loss: 1.0165879726409912,grad_norm: 0.8204416232656732, iteration: 355151
loss: 0.984419584274292,grad_norm: 0.811720106849519, iteration: 355152
loss: 0.9696753025054932,grad_norm: 0.7402947406296503, iteration: 355153
loss: 1.005916714668274,grad_norm: 0.9364329575594748, iteration: 355154
loss: 0.9874687790870667,grad_norm: 0.685444895128155, iteration: 355155
loss: 1.0004111528396606,grad_norm: 0.7637323045669406, iteration: 355156
loss: 0.9969452619552612,grad_norm: 0.849146978428216, iteration: 355157
loss: 0.9755628108978271,grad_norm: 0.7687309070731692, iteration: 355158
loss: 1.0769906044006348,grad_norm: 0.9208552536201225, iteration: 355159
loss: 1.009839415550232,grad_norm: 0.774283690198737, iteration: 355160
loss: 1.0018401145935059,grad_norm: 0.7672926037017067, iteration: 355161
loss: 1.0259038209915161,grad_norm: 0.9862425839574325, iteration: 355162
loss: 0.9616222381591797,grad_norm: 0.8604277165473885, iteration: 355163
loss: 1.0562632083892822,grad_norm: 0.9999999023915158, iteration: 355164
loss: 1.0243116617202759,grad_norm: 0.9999995191103425, iteration: 355165
loss: 0.999658465385437,grad_norm: 0.8113942779888725, iteration: 355166
loss: 0.9956517219543457,grad_norm: 0.86274927417112, iteration: 355167
loss: 1.0061907768249512,grad_norm: 0.853873469446154, iteration: 355168
loss: 1.0044746398925781,grad_norm: 0.8657584064549277, iteration: 355169
loss: 1.0169694423675537,grad_norm: 0.9804842076433357, iteration: 355170
loss: 1.0179877281188965,grad_norm: 0.7196365192621944, iteration: 355171
loss: 0.9839529395103455,grad_norm: 0.9450935268930518, iteration: 355172
loss: 0.9934252500534058,grad_norm: 0.8144768606655068, iteration: 355173
loss: 1.011423110961914,grad_norm: 0.8621179330792846, iteration: 355174
loss: 0.9948934316635132,grad_norm: 0.7950891615601691, iteration: 355175
loss: 1.0100706815719604,grad_norm: 0.7804764663054906, iteration: 355176
loss: 0.9885696172714233,grad_norm: 0.9999997119059055, iteration: 355177
loss: 0.9600791931152344,grad_norm: 0.9295418995234536, iteration: 355178
loss: 1.015998125076294,grad_norm: 0.7959895508371543, iteration: 355179
loss: 1.0026017427444458,grad_norm: 0.746671210753624, iteration: 355180
loss: 1.0350641012191772,grad_norm: 0.8291602422448806, iteration: 355181
loss: 0.9941729307174683,grad_norm: 0.7788869576941526, iteration: 355182
loss: 1.002422571182251,grad_norm: 0.7876281351802894, iteration: 355183
loss: 0.9827550053596497,grad_norm: 0.8057751229049072, iteration: 355184
loss: 1.0182819366455078,grad_norm: 0.9234432233181772, iteration: 355185
loss: 1.022809624671936,grad_norm: 0.7622848075340335, iteration: 355186
loss: 1.0000475645065308,grad_norm: 0.718891132902299, iteration: 355187
loss: 1.0138827562332153,grad_norm: 0.6373072778078445, iteration: 355188
loss: 0.9962217211723328,grad_norm: 0.9999989517967076, iteration: 355189
loss: 0.9972622394561768,grad_norm: 0.8081027053809621, iteration: 355190
loss: 0.9772839546203613,grad_norm: 0.9221445587469514, iteration: 355191
loss: 0.9930506944656372,grad_norm: 0.9999994379088027, iteration: 355192
loss: 1.0238518714904785,grad_norm: 0.8716407067274975, iteration: 355193
loss: 1.0058408975601196,grad_norm: 0.95975407941476, iteration: 355194
loss: 0.9858364462852478,grad_norm: 0.7367458726074322, iteration: 355195
loss: 0.9963865876197815,grad_norm: 0.8541976491908118, iteration: 355196
loss: 0.9864970445632935,grad_norm: 0.8695761339082985, iteration: 355197
loss: 1.0033332109451294,grad_norm: 0.9999991859641185, iteration: 355198
loss: 0.9937618374824524,grad_norm: 0.7611794614042663, iteration: 355199
loss: 1.030252456665039,grad_norm: 0.8093352979562319, iteration: 355200
loss: 1.0114799737930298,grad_norm: 0.8565290335266018, iteration: 355201
loss: 1.0298597812652588,grad_norm: 0.7608724025102035, iteration: 355202
loss: 0.9994763731956482,grad_norm: 0.8684067987522511, iteration: 355203
loss: 1.0609041452407837,grad_norm: 0.9999991226527735, iteration: 355204
loss: 1.0088932514190674,grad_norm: 0.8203896026927222, iteration: 355205
loss: 1.0297964811325073,grad_norm: 0.9999990566136897, iteration: 355206
loss: 0.9930678606033325,grad_norm: 0.7572435524558311, iteration: 355207
loss: 1.0037481784820557,grad_norm: 0.8857537142677081, iteration: 355208
loss: 1.0046132802963257,grad_norm: 0.7462964948009079, iteration: 355209
loss: 1.0188298225402832,grad_norm: 0.9841883310518263, iteration: 355210
loss: 0.9924802184104919,grad_norm: 0.9226612739838363, iteration: 355211
loss: 1.0347384214401245,grad_norm: 0.919967447020207, iteration: 355212
loss: 0.997671902179718,grad_norm: 0.7583014636075819, iteration: 355213
loss: 0.9570298790931702,grad_norm: 0.6865690767959959, iteration: 355214
loss: 1.1729472875595093,grad_norm: 0.9999996324556369, iteration: 355215
loss: 0.9531643986701965,grad_norm: 0.6399512482090023, iteration: 355216
loss: 1.0271508693695068,grad_norm: 0.8293280036511742, iteration: 355217
loss: 1.0277258157730103,grad_norm: 0.9107042478453445, iteration: 355218
loss: 0.9904953837394714,grad_norm: 0.8481251944632853, iteration: 355219
loss: 1.009799838066101,grad_norm: 0.9999992013189466, iteration: 355220
loss: 1.0297785997390747,grad_norm: 0.783475374921553, iteration: 355221
loss: 0.9647830128669739,grad_norm: 0.7145091701055075, iteration: 355222
loss: 0.9777283668518066,grad_norm: 0.7231953433829339, iteration: 355223
loss: 0.9753389358520508,grad_norm: 0.8274100052970249, iteration: 355224
loss: 0.9632458686828613,grad_norm: 0.7956218330887083, iteration: 355225
loss: 0.982031524181366,grad_norm: 0.9999989771987216, iteration: 355226
loss: 1.0129857063293457,grad_norm: 0.7633609562662097, iteration: 355227
loss: 1.0091718435287476,grad_norm: 0.8105112887206161, iteration: 355228
loss: 1.005322813987732,grad_norm: 0.8639916788628235, iteration: 355229
loss: 0.9954951405525208,grad_norm: 0.8278637389888153, iteration: 355230
loss: 0.9987477660179138,grad_norm: 0.7646335443906277, iteration: 355231
loss: 1.002627968788147,grad_norm: 0.7433631795615617, iteration: 355232
loss: 1.0031377077102661,grad_norm: 0.8315260061976554, iteration: 355233
loss: 1.002387523651123,grad_norm: 0.9735404752224558, iteration: 355234
loss: 1.026893973350525,grad_norm: 0.7472457563959528, iteration: 355235
loss: 1.029730200767517,grad_norm: 0.7024112096874363, iteration: 355236
loss: 0.9994022846221924,grad_norm: 0.8265758289921427, iteration: 355237
loss: 1.0328956842422485,grad_norm: 0.9017971231100654, iteration: 355238
loss: 0.971123993396759,grad_norm: 0.8223536561962778, iteration: 355239
loss: 0.9402179718017578,grad_norm: 0.8506143936547117, iteration: 355240
loss: 0.9740645289421082,grad_norm: 0.8673493078893952, iteration: 355241
loss: 1.021549940109253,grad_norm: 0.9762153173331719, iteration: 355242
loss: 1.001642107963562,grad_norm: 0.9242492530098718, iteration: 355243
loss: 1.0241618156433105,grad_norm: 0.9849690936555175, iteration: 355244
loss: 1.0531713962554932,grad_norm: 0.9999999866427906, iteration: 355245
loss: 0.9764549732208252,grad_norm: 0.9999993480259016, iteration: 355246
loss: 1.037912368774414,grad_norm: 0.7380039525331739, iteration: 355247
loss: 1.0101395845413208,grad_norm: 0.681852661971887, iteration: 355248
loss: 1.0161360502243042,grad_norm: 0.8703817817220962, iteration: 355249
loss: 0.9635165333747864,grad_norm: 0.7430285694179849, iteration: 355250
loss: 0.996004581451416,grad_norm: 0.7324546087547043, iteration: 355251
loss: 1.0101995468139648,grad_norm: 0.8211664094523066, iteration: 355252
loss: 1.0172756910324097,grad_norm: 0.9787304821483389, iteration: 355253
loss: 1.0153453350067139,grad_norm: 0.7068490971500753, iteration: 355254
loss: 1.0132991075515747,grad_norm: 0.6529576864089478, iteration: 355255
loss: 0.9805225133895874,grad_norm: 0.723803553282737, iteration: 355256
loss: 1.0425776243209839,grad_norm: 0.8917425046284059, iteration: 355257
loss: 0.9990284442901611,grad_norm: 0.8808306220101622, iteration: 355258
loss: 1.008907437324524,grad_norm: 0.7874823208344119, iteration: 355259
loss: 0.9852645397186279,grad_norm: 0.7829427554740052, iteration: 355260
loss: 1.0095956325531006,grad_norm: 0.9065667791035629, iteration: 355261
loss: 0.9839879274368286,grad_norm: 0.7575406321213884, iteration: 355262
loss: 1.0133529901504517,grad_norm: 0.9999998473034903, iteration: 355263
loss: 0.9679439663887024,grad_norm: 0.7626321433524804, iteration: 355264
loss: 0.9802095890045166,grad_norm: 0.7004016637497322, iteration: 355265
loss: 0.9785682559013367,grad_norm: 0.9999992132401139, iteration: 355266
loss: 0.9899333715438843,grad_norm: 0.9999992785362528, iteration: 355267
loss: 1.0292428731918335,grad_norm: 0.8072262912352094, iteration: 355268
loss: 0.9658198356628418,grad_norm: 0.7211837305993956, iteration: 355269
loss: 0.9835320711135864,grad_norm: 0.8415137926954732, iteration: 355270
loss: 0.9973293542861938,grad_norm: 0.7595302410077776, iteration: 355271
loss: 0.9728736877441406,grad_norm: 0.7766110790182085, iteration: 355272
loss: 0.997846245765686,grad_norm: 0.9999997007840696, iteration: 355273
loss: 0.9708781242370605,grad_norm: 0.7582419075499119, iteration: 355274
loss: 0.9577088356018066,grad_norm: 0.8914432893190093, iteration: 355275
loss: 1.0023804903030396,grad_norm: 0.8137603651943163, iteration: 355276
loss: 0.9786517024040222,grad_norm: 0.8404677976304914, iteration: 355277
loss: 0.9548118710517883,grad_norm: 0.8791106791318135, iteration: 355278
loss: 1.0039538145065308,grad_norm: 0.7239439355840926, iteration: 355279
loss: 1.0613548755645752,grad_norm: 0.9074671278709266, iteration: 355280
loss: 1.0142419338226318,grad_norm: 0.8528469093347507, iteration: 355281
loss: 1.2165027856826782,grad_norm: 0.9999998629045782, iteration: 355282
loss: 1.0725717544555664,grad_norm: 0.9999989987397367, iteration: 355283
loss: 0.992737352848053,grad_norm: 0.9862251086096577, iteration: 355284
loss: 0.9861895442008972,grad_norm: 0.9999991091463085, iteration: 355285
loss: 1.003713846206665,grad_norm: 0.8639415703355592, iteration: 355286
loss: 1.0449323654174805,grad_norm: 0.9999994189451425, iteration: 355287
loss: 1.0642082691192627,grad_norm: 1.000000019751558, iteration: 355288
loss: 0.9528055191040039,grad_norm: 0.7652568991380876, iteration: 355289
loss: 1.0153043270111084,grad_norm: 0.8142564072580614, iteration: 355290
loss: 0.9889784455299377,grad_norm: 0.9999990678886457, iteration: 355291
loss: 0.9621487855911255,grad_norm: 0.9235544047331016, iteration: 355292
loss: 0.9707422852516174,grad_norm: 0.8865648912549864, iteration: 355293
loss: 0.9770942330360413,grad_norm: 0.7573595693364135, iteration: 355294
loss: 1.0238410234451294,grad_norm: 0.9999990903898659, iteration: 355295
loss: 1.004619836807251,grad_norm: 0.948451816627227, iteration: 355296
loss: 1.0126186609268188,grad_norm: 0.9350535169087647, iteration: 355297
loss: 0.9877866506576538,grad_norm: 0.6268444463994586, iteration: 355298
loss: 1.2650973796844482,grad_norm: 0.9999992001395033, iteration: 355299
loss: 0.9900724291801453,grad_norm: 0.8657387887552972, iteration: 355300
loss: 0.9892706871032715,grad_norm: 0.8695063148906278, iteration: 355301
loss: 1.003560185432434,grad_norm: 0.7419995963924387, iteration: 355302
loss: 1.010034441947937,grad_norm: 0.664462778056398, iteration: 355303
loss: 0.9911208152770996,grad_norm: 0.745043971429599, iteration: 355304
loss: 1.0180227756500244,grad_norm: 0.8099095516175568, iteration: 355305
loss: 0.9878154993057251,grad_norm: 0.9060384020391735, iteration: 355306
loss: 0.976231575012207,grad_norm: 0.7884920406291126, iteration: 355307
loss: 0.9938307404518127,grad_norm: 0.7724920728428099, iteration: 355308
loss: 0.9721715450286865,grad_norm: 0.7929820063337979, iteration: 355309
loss: 1.0106362104415894,grad_norm: 0.7494283938298613, iteration: 355310
loss: 1.101116418838501,grad_norm: 0.9999997030644531, iteration: 355311
loss: 1.0189834833145142,grad_norm: 0.831947021252092, iteration: 355312
loss: 1.1508491039276123,grad_norm: 1.000000003966392, iteration: 355313
loss: 1.0389827489852905,grad_norm: 0.8340626142957414, iteration: 355314
loss: 0.9823580980300903,grad_norm: 0.8064138145785509, iteration: 355315
loss: 1.0195256471633911,grad_norm: 0.7274946887780684, iteration: 355316
loss: 1.028259038925171,grad_norm: 0.9999993135833497, iteration: 355317
loss: 0.9812924861907959,grad_norm: 0.8276972117326242, iteration: 355318
loss: 0.9965884685516357,grad_norm: 0.8252679149915195, iteration: 355319
loss: 0.9724482893943787,grad_norm: 0.8870887090576876, iteration: 355320
loss: 1.023769736289978,grad_norm: 0.7490232894461714, iteration: 355321
loss: 0.9947584271430969,grad_norm: 0.9799986870220104, iteration: 355322
loss: 1.0055561065673828,grad_norm: 0.8340424661039061, iteration: 355323
loss: 0.9934728145599365,grad_norm: 0.780260091031365, iteration: 355324
loss: 0.9904045462608337,grad_norm: 0.8520469682448376, iteration: 355325
loss: 0.9980040192604065,grad_norm: 0.8587488865099782, iteration: 355326
loss: 0.972403347492218,grad_norm: 0.7784584680332161, iteration: 355327
loss: 1.023270606994629,grad_norm: 0.8975217908260364, iteration: 355328
loss: 1.0440970659255981,grad_norm: 0.9318082069433374, iteration: 355329
loss: 0.9883207082748413,grad_norm: 0.7272864816380705, iteration: 355330
loss: 0.9889125227928162,grad_norm: 0.9999991806251569, iteration: 355331
loss: 1.029494285583496,grad_norm: 0.9999993117485045, iteration: 355332
loss: 0.9503242373466492,grad_norm: 0.7934602186534474, iteration: 355333
loss: 0.9869153499603271,grad_norm: 0.9999989213388476, iteration: 355334
loss: 0.9742223620414734,grad_norm: 0.8827876768436292, iteration: 355335
loss: 0.9887313842773438,grad_norm: 0.8848892756791629, iteration: 355336
loss: 0.9726826548576355,grad_norm: 0.8203941467991762, iteration: 355337
loss: 0.9807347655296326,grad_norm: 0.888508058359154, iteration: 355338
loss: 1.0276310443878174,grad_norm: 0.7639464794470837, iteration: 355339
loss: 0.9915891885757446,grad_norm: 0.9420791638737215, iteration: 355340
loss: 0.9876959919929504,grad_norm: 0.9999997352955834, iteration: 355341
loss: 1.0363173484802246,grad_norm: 0.8205051290252522, iteration: 355342
loss: 1.0215959548950195,grad_norm: 0.9677134405974823, iteration: 355343
loss: 0.9608616828918457,grad_norm: 0.7575849631871204, iteration: 355344
loss: 1.0392380952835083,grad_norm: 0.7591523435154365, iteration: 355345
loss: 0.9797383546829224,grad_norm: 0.8472851765953134, iteration: 355346
loss: 0.9755203127861023,grad_norm: 0.7850643111072576, iteration: 355347
loss: 1.043605089187622,grad_norm: 0.849157012108796, iteration: 355348
loss: 0.9818990230560303,grad_norm: 0.9120519330259608, iteration: 355349
loss: 1.0038152933120728,grad_norm: 0.9344437376420708, iteration: 355350
loss: 1.0164543390274048,grad_norm: 0.7523049996015057, iteration: 355351
loss: 1.005540132522583,grad_norm: 0.869396596830576, iteration: 355352
loss: 0.9959608912467957,grad_norm: 0.999999946479222, iteration: 355353
loss: 1.0017729997634888,grad_norm: 0.8848763657012072, iteration: 355354
loss: 1.0252740383148193,grad_norm: 0.992888331807078, iteration: 355355
loss: 0.9991202354431152,grad_norm: 0.9464012614209226, iteration: 355356
loss: 1.0074156522750854,grad_norm: 0.8446069281570034, iteration: 355357
loss: 0.9812546372413635,grad_norm: 0.6783820880171466, iteration: 355358
loss: 0.9818611145019531,grad_norm: 0.7994247519522453, iteration: 355359
loss: 0.9990631937980652,grad_norm: 0.738789818152558, iteration: 355360
loss: 0.9955340623855591,grad_norm: 0.783650103553241, iteration: 355361
loss: 1.0414758920669556,grad_norm: 0.8723748641093663, iteration: 355362
loss: 0.9725398421287537,grad_norm: 0.7214365258953208, iteration: 355363
loss: 0.9676955938339233,grad_norm: 0.9427330230749862, iteration: 355364
loss: 1.0376951694488525,grad_norm: 0.9999998107671115, iteration: 355365
loss: 1.0425394773483276,grad_norm: 0.9999996342187988, iteration: 355366
loss: 1.0076476335525513,grad_norm: 0.9799494259049768, iteration: 355367
loss: 0.9558765292167664,grad_norm: 0.9294110014008854, iteration: 355368
loss: 1.018808126449585,grad_norm: 0.822355407510005, iteration: 355369
loss: 0.9832760691642761,grad_norm: 0.79780085587328, iteration: 355370
loss: 0.9967932105064392,grad_norm: 0.8460030220288475, iteration: 355371
loss: 0.9773446321487427,grad_norm: 0.8502001783774555, iteration: 355372
loss: 1.0623126029968262,grad_norm: 0.9900677996517819, iteration: 355373
loss: 1.0000754594802856,grad_norm: 0.8145158927323156, iteration: 355374
loss: 1.0268795490264893,grad_norm: 0.6759233369127715, iteration: 355375
loss: 1.0393550395965576,grad_norm: 0.9999998709832312, iteration: 355376
loss: 1.0045262575149536,grad_norm: 0.6248904055064031, iteration: 355377
loss: 1.00104558467865,grad_norm: 0.7210215816297612, iteration: 355378
loss: 0.9737042784690857,grad_norm: 0.7761925669958739, iteration: 355379
loss: 0.9651344418525696,grad_norm: 0.9999990692127406, iteration: 355380
loss: 0.9753297567367554,grad_norm: 0.8389227856365157, iteration: 355381
loss: 1.0093445777893066,grad_norm: 0.6318198975928814, iteration: 355382
loss: 1.0128527879714966,grad_norm: 0.9660167208184444, iteration: 355383
loss: 1.0101722478866577,grad_norm: 0.8085934700581642, iteration: 355384
loss: 0.9981359243392944,grad_norm: 0.8978447343177348, iteration: 355385
loss: 0.9882434606552124,grad_norm: 0.7655043208468911, iteration: 355386
loss: 0.9595737457275391,grad_norm: 0.7559435832321437, iteration: 355387
loss: 0.9716974496841431,grad_norm: 0.790681783988012, iteration: 355388
loss: 1.012795090675354,grad_norm: 0.816669653391965, iteration: 355389
loss: 0.9694768190383911,grad_norm: 0.9099086774353742, iteration: 355390
loss: 1.0243825912475586,grad_norm: 0.9999990437471024, iteration: 355391
loss: 0.9781339764595032,grad_norm: 0.852105284778104, iteration: 355392
loss: 0.9649783968925476,grad_norm: 0.8967094496015282, iteration: 355393
loss: 1.0059170722961426,grad_norm: 0.8485209739124965, iteration: 355394
loss: 0.9831037521362305,grad_norm: 0.825064995791476, iteration: 355395
loss: 1.0043784379959106,grad_norm: 0.9336220170020554, iteration: 355396
loss: 0.9632248282432556,grad_norm: 0.9441948435776721, iteration: 355397
loss: 1.0167711973190308,grad_norm: 0.7967136306302184, iteration: 355398
loss: 0.9671856760978699,grad_norm: 0.8832506449394936, iteration: 355399
loss: 1.0291262865066528,grad_norm: 0.9055418796436329, iteration: 355400
loss: 0.9864654541015625,grad_norm: 0.7977696374685896, iteration: 355401
loss: 1.05705988407135,grad_norm: 0.8446094867482621, iteration: 355402
loss: 1.0056724548339844,grad_norm: 0.817978369069033, iteration: 355403
loss: 0.9876641035079956,grad_norm: 0.8176250520061041, iteration: 355404
loss: 0.988392174243927,grad_norm: 0.7857296668404589, iteration: 355405
loss: 0.9915764331817627,grad_norm: 0.8165943172289972, iteration: 355406
loss: 0.9904894828796387,grad_norm: 0.8367362973847793, iteration: 355407
loss: 1.0418908596038818,grad_norm: 0.8564935527604076, iteration: 355408
loss: 1.08894681930542,grad_norm: 0.9999998790064331, iteration: 355409
loss: 0.987101674079895,grad_norm: 0.8243772037480217, iteration: 355410
loss: 1.055258870124817,grad_norm: 0.9999999783682949, iteration: 355411
loss: 1.0155727863311768,grad_norm: 0.84900389278739, iteration: 355412
loss: 0.9968271851539612,grad_norm: 0.8019105863063773, iteration: 355413
loss: 1.0165709257125854,grad_norm: 0.9999995257855019, iteration: 355414
loss: 1.0079354047775269,grad_norm: 0.9999994397637805, iteration: 355415
loss: 1.038393259048462,grad_norm: 0.8455894300953096, iteration: 355416
loss: 0.9925850629806519,grad_norm: 0.7426414575828776, iteration: 355417
loss: 1.0195021629333496,grad_norm: 0.8215295750298083, iteration: 355418
loss: 1.0231519937515259,grad_norm: 0.7231456996876319, iteration: 355419
loss: 0.9841517210006714,grad_norm: 0.7713263655827872, iteration: 355420
loss: 1.0130010843276978,grad_norm: 0.9552037113799853, iteration: 355421
loss: 1.0033570528030396,grad_norm: 0.7244941484022888, iteration: 355422
loss: 1.008375883102417,grad_norm: 0.8229093689600163, iteration: 355423
loss: 1.0276824235916138,grad_norm: 0.8907139016633765, iteration: 355424
loss: 0.9820500016212463,grad_norm: 0.7203647493735734, iteration: 355425
loss: 0.9901675581932068,grad_norm: 0.7958830072299484, iteration: 355426
loss: 1.056817650794983,grad_norm: 0.9999991421218865, iteration: 355427
loss: 0.9939104318618774,grad_norm: 0.7751810659827809, iteration: 355428
loss: 1.0073739290237427,grad_norm: 0.7515205608030013, iteration: 355429
loss: 1.0169141292572021,grad_norm: 0.6927641464333352, iteration: 355430
loss: 1.013353705406189,grad_norm: 0.7132712871744252, iteration: 355431
loss: 0.986957311630249,grad_norm: 0.9148068547748656, iteration: 355432
loss: 0.982317328453064,grad_norm: 0.7020317016308706, iteration: 355433
loss: 1.0260263681411743,grad_norm: 0.999999033032782, iteration: 355434
loss: 1.0270400047302246,grad_norm: 0.8087524355938697, iteration: 355435
loss: 1.0076911449432373,grad_norm: 0.9999990477651627, iteration: 355436
loss: 0.9979305863380432,grad_norm: 0.999999346518738, iteration: 355437
loss: 1.0801339149475098,grad_norm: 0.9999996342977745, iteration: 355438
loss: 1.0052821636199951,grad_norm: 0.9999991766897448, iteration: 355439
loss: 0.975604236125946,grad_norm: 0.999999090417344, iteration: 355440
loss: 1.0273996591567993,grad_norm: 0.9031769015052519, iteration: 355441
loss: 1.0016281604766846,grad_norm: 0.6668927758429085, iteration: 355442
loss: 0.9934062957763672,grad_norm: 0.8828630160820553, iteration: 355443
loss: 1.0046290159225464,grad_norm: 0.7190683423362706, iteration: 355444
loss: 1.0476757287979126,grad_norm: 0.9301464594554055, iteration: 355445
loss: 0.9713122248649597,grad_norm: 0.8153499336111019, iteration: 355446
loss: 0.9648830890655518,grad_norm: 0.8440165925570902, iteration: 355447
loss: 0.9818369150161743,grad_norm: 0.9432540735000757, iteration: 355448
loss: 0.989441454410553,grad_norm: 0.9999991200955628, iteration: 355449
loss: 0.9744507074356079,grad_norm: 0.8963615528795743, iteration: 355450
loss: 1.012147068977356,grad_norm: 0.8351226269691032, iteration: 355451
loss: 1.0464617013931274,grad_norm: 0.79688613339817, iteration: 355452
loss: 1.0122829675674438,grad_norm: 0.8415510845406097, iteration: 355453
loss: 1.023659586906433,grad_norm: 0.9609640342181757, iteration: 355454
loss: 1.0131316184997559,grad_norm: 0.7896406925395999, iteration: 355455
loss: 0.9787136316299438,grad_norm: 0.6765274809686659, iteration: 355456
loss: 1.0197139978408813,grad_norm: 0.747180560421719, iteration: 355457
loss: 0.9955567717552185,grad_norm: 0.8687855859631634, iteration: 355458
loss: 1.1835849285125732,grad_norm: 0.9999996984172166, iteration: 355459
loss: 0.9934935569763184,grad_norm: 0.8083579068684337, iteration: 355460
loss: 1.0179274082183838,grad_norm: 0.7784703478748904, iteration: 355461
loss: 1.0260857343673706,grad_norm: 0.9999999528250665, iteration: 355462
loss: 0.9777254462242126,grad_norm: 0.8955753486490097, iteration: 355463
loss: 0.9998480677604675,grad_norm: 0.9223242747664672, iteration: 355464
loss: 0.9667447209358215,grad_norm: 0.999999004034401, iteration: 355465
loss: 1.0129374265670776,grad_norm: 0.8589610318328545, iteration: 355466
loss: 1.0447688102722168,grad_norm: 0.7686470503229085, iteration: 355467
loss: 1.008147954940796,grad_norm: 0.7663208978432043, iteration: 355468
loss: 0.9951207637786865,grad_norm: 0.9166309302097686, iteration: 355469
loss: 1.0070106983184814,grad_norm: 0.8683213641595645, iteration: 355470
loss: 0.9727052450180054,grad_norm: 0.8741356982116512, iteration: 355471
loss: 1.0093097686767578,grad_norm: 0.9315358093245583, iteration: 355472
loss: 1.0066434144973755,grad_norm: 0.9999989997521869, iteration: 355473
loss: 1.0454612970352173,grad_norm: 0.7126940198777749, iteration: 355474
loss: 1.0102179050445557,grad_norm: 0.7555822039509463, iteration: 355475
loss: 0.965821385383606,grad_norm: 0.8445973890072382, iteration: 355476
loss: 0.9729636907577515,grad_norm: 0.9297237533322823, iteration: 355477
loss: 0.979135274887085,grad_norm: 0.8491559463974756, iteration: 355478
loss: 0.9797738790512085,grad_norm: 0.9999997916174427, iteration: 355479
loss: 0.9974697828292847,grad_norm: 0.7058365894381231, iteration: 355480
loss: 1.0039290189743042,grad_norm: 0.7838061152897474, iteration: 355481
loss: 1.0690844058990479,grad_norm: 0.9999996411764424, iteration: 355482
loss: 0.9963657259941101,grad_norm: 0.7381112004884092, iteration: 355483
loss: 0.9946052432060242,grad_norm: 0.8147527199710114, iteration: 355484
loss: 1.018161416053772,grad_norm: 0.7606205748642095, iteration: 355485
loss: 1.0177654027938843,grad_norm: 0.9186664829879602, iteration: 355486
loss: 1.0098415613174438,grad_norm: 0.8041094540559227, iteration: 355487
loss: 1.0023517608642578,grad_norm: 0.9045462355179642, iteration: 355488
loss: 0.9873285293579102,grad_norm: 0.7063247957471368, iteration: 355489
loss: 0.9831381440162659,grad_norm: 0.865666090454372, iteration: 355490
loss: 0.9981841444969177,grad_norm: 0.7829544598312387, iteration: 355491
loss: 0.9947022199630737,grad_norm: 0.8554254756853897, iteration: 355492
loss: 0.9897637367248535,grad_norm: 0.9282980952741998, iteration: 355493
loss: 1.020711064338684,grad_norm: 0.7769302715161147, iteration: 355494
loss: 1.0410511493682861,grad_norm: 0.999998961226094, iteration: 355495
loss: 1.0053002834320068,grad_norm: 0.8866903616111321, iteration: 355496
loss: 0.96991366147995,grad_norm: 0.9858871260557792, iteration: 355497
loss: 1.0118850469589233,grad_norm: 0.9854399843178429, iteration: 355498
loss: 1.0117586851119995,grad_norm: 0.7658433343413453, iteration: 355499
loss: 1.0291574001312256,grad_norm: 0.8366422910256212, iteration: 355500
loss: 0.9929987192153931,grad_norm: 0.8264426373861747, iteration: 355501
loss: 1.01362144947052,grad_norm: 0.7931630825524195, iteration: 355502
loss: 1.0023303031921387,grad_norm: 0.7220468364183618, iteration: 355503
loss: 0.9815176129341125,grad_norm: 0.97211891834738, iteration: 355504
loss: 1.020797610282898,grad_norm: 0.6607692375645148, iteration: 355505
loss: 0.97270268201828,grad_norm: 0.7554637892155334, iteration: 355506
loss: 0.9569531083106995,grad_norm: 0.7363062986322749, iteration: 355507
loss: 0.999679446220398,grad_norm: 0.9999989796855304, iteration: 355508
loss: 0.9931269288063049,grad_norm: 0.7853091040102843, iteration: 355509
loss: 1.0150288343429565,grad_norm: 0.6748407574141521, iteration: 355510
loss: 0.9583899974822998,grad_norm: 0.7467208703874331, iteration: 355511
loss: 0.9824479222297668,grad_norm: 0.6750540003898556, iteration: 355512
loss: 0.9960000514984131,grad_norm: 0.8456477004646771, iteration: 355513
loss: 0.9865012168884277,grad_norm: 0.7581938389226155, iteration: 355514
loss: 1.0887782573699951,grad_norm: 0.7792126489744244, iteration: 355515
loss: 0.9948886632919312,grad_norm: 0.7069567029623307, iteration: 355516
loss: 1.0157248973846436,grad_norm: 0.7506760487503555, iteration: 355517
loss: 0.9858903884887695,grad_norm: 0.8804644978701028, iteration: 355518
loss: 1.0088915824890137,grad_norm: 0.9438471597096196, iteration: 355519
loss: 0.9678876399993896,grad_norm: 0.8071877741595722, iteration: 355520
loss: 0.9864606261253357,grad_norm: 0.7908548526149392, iteration: 355521
loss: 1.0352277755737305,grad_norm: 0.8527993603131929, iteration: 355522
loss: 1.0152961015701294,grad_norm: 0.7459772549642101, iteration: 355523
loss: 0.9771613478660583,grad_norm: 0.9999992284352968, iteration: 355524
loss: 1.0204828977584839,grad_norm: 0.7829621248447484, iteration: 355525
loss: 0.9892498254776001,grad_norm: 0.7005418267099565, iteration: 355526
loss: 1.0005348920822144,grad_norm: 0.8579839667696784, iteration: 355527
loss: 1.0114043951034546,grad_norm: 0.8593859980814746, iteration: 355528
loss: 1.0305285453796387,grad_norm: 0.9713311026422454, iteration: 355529
loss: 0.9847072958946228,grad_norm: 0.8893818194002926, iteration: 355530
loss: 0.9673270583152771,grad_norm: 0.7036456013146273, iteration: 355531
loss: 0.9551095962524414,grad_norm: 0.8387209627903561, iteration: 355532
loss: 1.0822184085845947,grad_norm: 0.9013691473004058, iteration: 355533
loss: 0.9900179505348206,grad_norm: 0.7055577415079796, iteration: 355534
loss: 0.9644393920898438,grad_norm: 0.9999994722670572, iteration: 355535
loss: 0.9479053616523743,grad_norm: 0.7889300088387088, iteration: 355536
loss: 1.0268385410308838,grad_norm: 0.886132143917363, iteration: 355537
loss: 0.9693694710731506,grad_norm: 0.8196033591150554, iteration: 355538
loss: 1.0016720294952393,grad_norm: 0.691135361634034, iteration: 355539
loss: 0.9772231578826904,grad_norm: 0.7307121740875159, iteration: 355540
loss: 1.0477641820907593,grad_norm: 0.8634893142671569, iteration: 355541
loss: 1.0068399906158447,grad_norm: 0.6991721268410893, iteration: 355542
loss: 0.9810217022895813,grad_norm: 0.7225095910629167, iteration: 355543
loss: 0.9982427358627319,grad_norm: 0.999999155855901, iteration: 355544
loss: 0.9645350575447083,grad_norm: 0.8699017368747334, iteration: 355545
loss: 0.9992808699607849,grad_norm: 0.8265441459547097, iteration: 355546
loss: 0.9787105917930603,grad_norm: 0.9239988007471819, iteration: 355547
loss: 0.97783362865448,grad_norm: 0.9292374236027116, iteration: 355548
loss: 0.9792407751083374,grad_norm: 0.6833895058162827, iteration: 355549
loss: 0.9675306081771851,grad_norm: 0.7900702325470688, iteration: 355550
loss: 0.9536299109458923,grad_norm: 0.7607776642147936, iteration: 355551
loss: 0.9848142266273499,grad_norm: 0.6971418081105915, iteration: 355552
loss: 1.0041711330413818,grad_norm: 0.779404886139979, iteration: 355553
loss: 1.0225090980529785,grad_norm: 0.8050942053512721, iteration: 355554
loss: 1.0278489589691162,grad_norm: 0.8341408783588922, iteration: 355555
loss: 0.9960209131240845,grad_norm: 0.9640077867009992, iteration: 355556
loss: 0.9353458285331726,grad_norm: 0.9942125979130143, iteration: 355557
loss: 0.9983197450637817,grad_norm: 0.7481360223950299, iteration: 355558
loss: 1.0208553075790405,grad_norm: 0.9825405067789604, iteration: 355559
loss: 0.985718846321106,grad_norm: 0.7298872140174909, iteration: 355560
loss: 1.0012589693069458,grad_norm: 0.9999992189892526, iteration: 355561
loss: 1.0218206644058228,grad_norm: 0.7656851868110756, iteration: 355562
loss: 0.9601147174835205,grad_norm: 0.7331216565674363, iteration: 355563
loss: 1.02762770652771,grad_norm: 0.9999998773043556, iteration: 355564
loss: 1.0225977897644043,grad_norm: 0.9202371195264459, iteration: 355565
loss: 0.9931071996688843,grad_norm: 0.7581816303380754, iteration: 355566
loss: 0.9915140867233276,grad_norm: 0.8435322111033031, iteration: 355567
loss: 0.9755381345748901,grad_norm: 0.7818414200806594, iteration: 355568
loss: 1.019635558128357,grad_norm: 0.8945445640244369, iteration: 355569
loss: 1.0057381391525269,grad_norm: 0.8292600518595005, iteration: 355570
loss: 1.0202540159225464,grad_norm: 0.7440175506717671, iteration: 355571
loss: 1.0388426780700684,grad_norm: 0.9429724395874709, iteration: 355572
loss: 1.0099756717681885,grad_norm: 0.9143278950818239, iteration: 355573
loss: 0.9818440675735474,grad_norm: 0.9513520350062704, iteration: 355574
loss: 0.9722440838813782,grad_norm: 0.7661157721506124, iteration: 355575
loss: 0.9871265888214111,grad_norm: 0.8878196466302442, iteration: 355576
loss: 1.01292085647583,grad_norm: 0.7109613056504627, iteration: 355577
loss: 1.0113787651062012,grad_norm: 0.7881167907981627, iteration: 355578
loss: 0.9928799271583557,grad_norm: 0.9839429343449306, iteration: 355579
loss: 1.0193051099777222,grad_norm: 0.7910629655001287, iteration: 355580
loss: 0.9880003929138184,grad_norm: 0.8786158658499924, iteration: 355581
loss: 1.0129603147506714,grad_norm: 0.8718034520654, iteration: 355582
loss: 0.9961620569229126,grad_norm: 0.9611739817331034, iteration: 355583
loss: 1.0010234117507935,grad_norm: 0.8474295956171197, iteration: 355584
loss: 0.9801262021064758,grad_norm: 0.9980535556417599, iteration: 355585
loss: 0.9611813426017761,grad_norm: 0.8858578943694386, iteration: 355586
loss: 0.998187780380249,grad_norm: 0.8653992086623481, iteration: 355587
loss: 0.99372798204422,grad_norm: 0.79553843502442, iteration: 355588
loss: 0.9844011068344116,grad_norm: 0.7635668769028534, iteration: 355589
loss: 1.005843997001648,grad_norm: 0.8636478533862321, iteration: 355590
loss: 0.9800310730934143,grad_norm: 0.8594552738820843, iteration: 355591
loss: 0.9869256019592285,grad_norm: 0.7751934717801242, iteration: 355592
loss: 0.9591578245162964,grad_norm: 0.815974589782406, iteration: 355593
loss: 1.0120446681976318,grad_norm: 0.9015016721956887, iteration: 355594
loss: 0.9883731603622437,grad_norm: 0.7186840456509899, iteration: 355595
loss: 1.0164942741394043,grad_norm: 0.7011356831841872, iteration: 355596
loss: 1.0209473371505737,grad_norm: 0.7625860135733716, iteration: 355597
loss: 1.0506441593170166,grad_norm: 0.7913257082813422, iteration: 355598
loss: 1.0144442319869995,grad_norm: 0.7258430072015557, iteration: 355599
loss: 1.0297290086746216,grad_norm: 0.8785324029234668, iteration: 355600
loss: 1.0163558721542358,grad_norm: 0.9999998547022041, iteration: 355601
loss: 1.0045561790466309,grad_norm: 0.7703875384690341, iteration: 355602
loss: 1.002987027168274,grad_norm: 0.7301475406500189, iteration: 355603
loss: 0.9703311920166016,grad_norm: 0.9104643011255134, iteration: 355604
loss: 1.0170782804489136,grad_norm: 0.8461795841590427, iteration: 355605
loss: 1.038117527961731,grad_norm: 0.8888664156453188, iteration: 355606
loss: 1.0084006786346436,grad_norm: 0.9573639464545067, iteration: 355607
loss: 1.0677623748779297,grad_norm: 0.9765691861548572, iteration: 355608
loss: 1.0385009050369263,grad_norm: 0.8032482261961829, iteration: 355609
loss: 1.0099906921386719,grad_norm: 0.8093440376436015, iteration: 355610
loss: 1.0187426805496216,grad_norm: 0.9090301400167077, iteration: 355611
loss: 1.0099478960037231,grad_norm: 0.7277616479010592, iteration: 355612
loss: 1.00198495388031,grad_norm: 0.8024238218194644, iteration: 355613
loss: 1.009816288948059,grad_norm: 0.8056937973370388, iteration: 355614
loss: 1.0105830430984497,grad_norm: 0.790220578692105, iteration: 355615
loss: 0.9822710156440735,grad_norm: 0.9999995961530173, iteration: 355616
loss: 1.1556668281555176,grad_norm: 0.9098207077296281, iteration: 355617
loss: 0.9877794981002808,grad_norm: 0.6912030633794825, iteration: 355618
loss: 1.0174533128738403,grad_norm: 0.8130887877227427, iteration: 355619
loss: 0.9980351328849792,grad_norm: 0.782346929141, iteration: 355620
loss: 1.0200878381729126,grad_norm: 0.9147048492983542, iteration: 355621
loss: 1.0075068473815918,grad_norm: 0.8776371208217377, iteration: 355622
loss: 0.9941346645355225,grad_norm: 0.7982564678493743, iteration: 355623
loss: 0.9970602989196777,grad_norm: 0.7519284885718169, iteration: 355624
loss: 1.030153751373291,grad_norm: 0.9988806576720253, iteration: 355625
loss: 1.0128401517868042,grad_norm: 0.9221536726952576, iteration: 355626
loss: 0.9516928791999817,grad_norm: 0.9733752967822883, iteration: 355627
loss: 0.9893485307693481,grad_norm: 0.9028518747299071, iteration: 355628
loss: 0.957725465297699,grad_norm: 0.7613721898983585, iteration: 355629
loss: 0.9645766019821167,grad_norm: 0.9347684077287828, iteration: 355630
loss: 1.0048786401748657,grad_norm: 0.8108554697729806, iteration: 355631
loss: 1.0022374391555786,grad_norm: 0.9999996862264632, iteration: 355632
loss: 1.0027319192886353,grad_norm: 0.794748165354701, iteration: 355633
loss: 1.0464376211166382,grad_norm: 0.703129973542868, iteration: 355634
loss: 1.0173702239990234,grad_norm: 0.9634143136341466, iteration: 355635
loss: 0.9763807058334351,grad_norm: 0.851187458711027, iteration: 355636
loss: 1.0520942211151123,grad_norm: 0.9999991485189477, iteration: 355637
loss: 0.9989000558853149,grad_norm: 0.7990121185843022, iteration: 355638
loss: 1.0199558734893799,grad_norm: 0.8559530454966059, iteration: 355639
loss: 0.984233021736145,grad_norm: 0.7713033956017212, iteration: 355640
loss: 0.9818632006645203,grad_norm: 0.9666028831863114, iteration: 355641
loss: 1.0070569515228271,grad_norm: 0.7373304028103511, iteration: 355642
loss: 1.0059587955474854,grad_norm: 0.8635980915413007, iteration: 355643
loss: 1.004908800125122,grad_norm: 0.8277470804682476, iteration: 355644
loss: 1.0156325101852417,grad_norm: 0.7644384481959423, iteration: 355645
loss: 1.0169916152954102,grad_norm: 0.8399367383487892, iteration: 355646
loss: 0.9851472973823547,grad_norm: 0.8434610678810659, iteration: 355647
loss: 1.0025020837783813,grad_norm: 0.8004244444430543, iteration: 355648
loss: 1.0193428993225098,grad_norm: 0.8122030748144624, iteration: 355649
loss: 0.9872537851333618,grad_norm: 0.80890182325126, iteration: 355650
loss: 0.9738602638244629,grad_norm: 0.8187640983733041, iteration: 355651
loss: 1.0175806283950806,grad_norm: 0.7677505345207226, iteration: 355652
loss: 1.0304975509643555,grad_norm: 0.7778034875627375, iteration: 355653
loss: 0.957666277885437,grad_norm: 0.900283640971745, iteration: 355654
loss: 0.9816440343856812,grad_norm: 0.8117294390283039, iteration: 355655
loss: 1.0156855583190918,grad_norm: 0.9155891259282168, iteration: 355656
loss: 1.0072053670883179,grad_norm: 0.9999998782074889, iteration: 355657
loss: 0.971325159072876,grad_norm: 0.7905049069981427, iteration: 355658
loss: 1.0326831340789795,grad_norm: 0.982232031535291, iteration: 355659
loss: 1.0119285583496094,grad_norm: 0.7765527127356244, iteration: 355660
loss: 0.985378623008728,grad_norm: 0.9807234241918498, iteration: 355661
loss: 1.0133225917816162,grad_norm: 0.9111775789121008, iteration: 355662
loss: 1.0324846506118774,grad_norm: 0.7628797588505144, iteration: 355663
loss: 1.0300976037979126,grad_norm: 0.8944623819395098, iteration: 355664
loss: 0.9888850450515747,grad_norm: 0.8340912068207873, iteration: 355665
loss: 1.0293020009994507,grad_norm: 0.7434991710427201, iteration: 355666
loss: 1.0049691200256348,grad_norm: 0.7208026832151104, iteration: 355667
loss: 0.9619442820549011,grad_norm: 0.8910149020954508, iteration: 355668
loss: 1.0197068452835083,grad_norm: 0.9999991296987114, iteration: 355669
loss: 1.0053473711013794,grad_norm: 0.9063915157018815, iteration: 355670
loss: 1.0526856184005737,grad_norm: 0.999999290122467, iteration: 355671
loss: 0.9999428987503052,grad_norm: 0.999999596252815, iteration: 355672
loss: 1.0021106004714966,grad_norm: 0.8111873769215854, iteration: 355673
loss: 0.9852149486541748,grad_norm: 0.7267213517897926, iteration: 355674
loss: 1.0225447416305542,grad_norm: 0.732959561034579, iteration: 355675
loss: 0.9662151336669922,grad_norm: 0.7770150095925227, iteration: 355676
loss: 1.0086530447006226,grad_norm: 0.9004204310544188, iteration: 355677
loss: 1.0185706615447998,grad_norm: 0.8183588761853217, iteration: 355678
loss: 1.0491375923156738,grad_norm: 0.8124563882227108, iteration: 355679
loss: 1.004268765449524,grad_norm: 0.6133240243479431, iteration: 355680
loss: 0.9891725182533264,grad_norm: 0.8066141423263856, iteration: 355681
loss: 0.9455299973487854,grad_norm: 0.9348950185897961, iteration: 355682
loss: 1.0418548583984375,grad_norm: 0.8444493815352653, iteration: 355683
loss: 1.0039026737213135,grad_norm: 0.7642211956893875, iteration: 355684
loss: 0.9995030164718628,grad_norm: 0.7973969430593006, iteration: 355685
loss: 1.027967095375061,grad_norm: 0.7073339033956448, iteration: 355686
loss: 1.0043678283691406,grad_norm: 0.7605146761589695, iteration: 355687
loss: 1.030169129371643,grad_norm: 0.8024830164618011, iteration: 355688
loss: 1.0229581594467163,grad_norm: 0.7373606375075425, iteration: 355689
loss: 1.0502469539642334,grad_norm: 0.9999998590193977, iteration: 355690
loss: 0.987186074256897,grad_norm: 0.9379149911603788, iteration: 355691
loss: 1.070635437965393,grad_norm: 0.9999999187399706, iteration: 355692
loss: 1.003143072128296,grad_norm: 0.8778964307752924, iteration: 355693
loss: 1.0026432275772095,grad_norm: 0.9028889551259329, iteration: 355694
loss: 0.9955891966819763,grad_norm: 0.8049857971977018, iteration: 355695
loss: 1.024351716041565,grad_norm: 0.7190199819351056, iteration: 355696
loss: 1.013655662536621,grad_norm: 0.7424195849080591, iteration: 355697
loss: 1.0196503400802612,grad_norm: 0.999999229188724, iteration: 355698
loss: 1.0457712411880493,grad_norm: 0.9999996145391333, iteration: 355699
loss: 0.9956558346748352,grad_norm: 0.8450228444403103, iteration: 355700
loss: 1.0019598007202148,grad_norm: 0.9999991154750778, iteration: 355701
loss: 0.9883275032043457,grad_norm: 0.7640924275936485, iteration: 355702
loss: 1.027571439743042,grad_norm: 0.9999993223101425, iteration: 355703
loss: 1.0035706758499146,grad_norm: 0.9096457864163634, iteration: 355704
loss: 1.0012985467910767,grad_norm: 0.7861777165188316, iteration: 355705
loss: 1.0268985033035278,grad_norm: 0.868749718129615, iteration: 355706
loss: 0.9944549798965454,grad_norm: 0.8341222661035141, iteration: 355707
loss: 0.9889842867851257,grad_norm: 0.8169304161233008, iteration: 355708
loss: 1.0169626474380493,grad_norm: 0.7110809840957181, iteration: 355709
loss: 0.968146562576294,grad_norm: 0.7398786382699452, iteration: 355710
loss: 1.0274502038955688,grad_norm: 0.9999991738560355, iteration: 355711
loss: 1.0143678188323975,grad_norm: 0.814560159931165, iteration: 355712
loss: 0.9900453090667725,grad_norm: 0.8026000434455369, iteration: 355713
loss: 1.0298004150390625,grad_norm: 0.9999998868071196, iteration: 355714
loss: 1.0108133554458618,grad_norm: 0.9999991127215133, iteration: 355715
loss: 1.0348787307739258,grad_norm: 0.7662516777112024, iteration: 355716
loss: 0.9982097148895264,grad_norm: 0.7872922187015542, iteration: 355717
loss: 0.9593474268913269,grad_norm: 0.7395325670202563, iteration: 355718
loss: 1.0234389305114746,grad_norm: 0.7346419257836299, iteration: 355719
loss: 1.0349411964416504,grad_norm: 0.8208680304584735, iteration: 355720
loss: 1.0165733098983765,grad_norm: 0.8955576881907605, iteration: 355721
loss: 0.9847655892372131,grad_norm: 0.8494148223157754, iteration: 355722
loss: 0.9807532429695129,grad_norm: 0.8431618564030742, iteration: 355723
loss: 0.9825551509857178,grad_norm: 0.8829580835071626, iteration: 355724
loss: 0.9887343645095825,grad_norm: 0.7112798097954288, iteration: 355725
loss: 1.0312248468399048,grad_norm: 0.8635268067940878, iteration: 355726
loss: 1.0206257104873657,grad_norm: 0.9126347614486711, iteration: 355727
loss: 1.0067185163497925,grad_norm: 0.9999990846038411, iteration: 355728
loss: 0.9803690910339355,grad_norm: 0.6938233188963253, iteration: 355729
loss: 0.9801455736160278,grad_norm: 0.6862235459345427, iteration: 355730
loss: 1.0052756071090698,grad_norm: 0.6168496898750536, iteration: 355731
loss: 1.0081349611282349,grad_norm: 0.9573395890747393, iteration: 355732
loss: 1.1914533376693726,grad_norm: 0.999999233580299, iteration: 355733
loss: 0.9651982188224792,grad_norm: 0.8983955855373886, iteration: 355734
loss: 1.011714220046997,grad_norm: 0.8426908609760679, iteration: 355735
loss: 1.1008142232894897,grad_norm: 0.9999990537981203, iteration: 355736
loss: 1.0142639875411987,grad_norm: 0.8228245601682206, iteration: 355737
loss: 1.0008180141448975,grad_norm: 0.7962152611429543, iteration: 355738
loss: 1.0042619705200195,grad_norm: 0.8168629962811432, iteration: 355739
loss: 1.013522982597351,grad_norm: 0.7459707531865126, iteration: 355740
loss: 0.9711183905601501,grad_norm: 0.9311910638584003, iteration: 355741
loss: 0.9822128415107727,grad_norm: 0.842770364447023, iteration: 355742
loss: 1.0018442869186401,grad_norm: 0.7445246950367702, iteration: 355743
loss: 0.9814130663871765,grad_norm: 0.9220993198262194, iteration: 355744
loss: 1.021535873413086,grad_norm: 0.7714122871084301, iteration: 355745
loss: 1.0003113746643066,grad_norm: 0.8190255537604891, iteration: 355746
loss: 1.0430889129638672,grad_norm: 0.8033918909813766, iteration: 355747
loss: 1.0159125328063965,grad_norm: 0.8982701901923368, iteration: 355748
loss: 0.988861083984375,grad_norm: 0.8630676904380423, iteration: 355749
loss: 0.9709986448287964,grad_norm: 0.8599798965708636, iteration: 355750
loss: 1.028290033340454,grad_norm: 0.8109385382574998, iteration: 355751
loss: 1.0123016834259033,grad_norm: 0.9999992120945654, iteration: 355752
loss: 0.9890578985214233,grad_norm: 0.7437021288101775, iteration: 355753
loss: 1.0497342348098755,grad_norm: 0.71758792617151, iteration: 355754
loss: 0.980473518371582,grad_norm: 0.8025802228191601, iteration: 355755
loss: 1.012203335762024,grad_norm: 0.9999995759942685, iteration: 355756
loss: 0.9620254635810852,grad_norm: 0.7900648238503609, iteration: 355757
loss: 0.9586172103881836,grad_norm: 0.9884734261278103, iteration: 355758
loss: 1.0096733570098877,grad_norm: 0.7187372697853939, iteration: 355759
loss: 0.9668864607810974,grad_norm: 0.8106044994175559, iteration: 355760
loss: 0.9927063584327698,grad_norm: 0.9150961642252952, iteration: 355761
loss: 1.0056244134902954,grad_norm: 0.773002049161129, iteration: 355762
loss: 0.9722935557365417,grad_norm: 0.7751993720367105, iteration: 355763
loss: 0.9950137138366699,grad_norm: 0.8937371116874339, iteration: 355764
loss: 1.0191688537597656,grad_norm: 0.9999996735027402, iteration: 355765
loss: 0.9878023266792297,grad_norm: 0.7454258624323981, iteration: 355766
loss: 1.0222513675689697,grad_norm: 0.8194776668459737, iteration: 355767
loss: 1.0246055126190186,grad_norm: 0.8877204105579886, iteration: 355768
loss: 1.009901523590088,grad_norm: 0.9149037977424987, iteration: 355769
loss: 1.0301975011825562,grad_norm: 0.8203809765790645, iteration: 355770
loss: 1.0001260042190552,grad_norm: 0.8023918013922255, iteration: 355771
loss: 1.0084078311920166,grad_norm: 0.8152879547199403, iteration: 355772
loss: 1.015446662902832,grad_norm: 0.8244164385865119, iteration: 355773
loss: 1.0201243162155151,grad_norm: 0.7340276958201644, iteration: 355774
loss: 1.0451271533966064,grad_norm: 0.8594610922822778, iteration: 355775
loss: 1.0262393951416016,grad_norm: 0.7992259161734564, iteration: 355776
loss: 1.0003209114074707,grad_norm: 0.844865419594288, iteration: 355777
loss: 1.017867922782898,grad_norm: 0.8226839214897455, iteration: 355778
loss: 1.046317219734192,grad_norm: 0.9999994514377984, iteration: 355779
loss: 0.9846112728118896,grad_norm: 0.770959756504898, iteration: 355780
loss: 1.0234544277191162,grad_norm: 0.9241322331811656, iteration: 355781
loss: 0.9531171321868896,grad_norm: 0.6782871498321433, iteration: 355782
loss: 0.9768089056015015,grad_norm: 0.7808018999907836, iteration: 355783
loss: 0.9886734485626221,grad_norm: 0.956564627239872, iteration: 355784
loss: 1.0185351371765137,grad_norm: 0.7029773657713438, iteration: 355785
loss: 1.0038217306137085,grad_norm: 0.764553230215461, iteration: 355786
loss: 0.9702895879745483,grad_norm: 0.8836972031733953, iteration: 355787
loss: 1.0056872367858887,grad_norm: 0.815883945512671, iteration: 355788
loss: 1.0178180932998657,grad_norm: 0.8138156783174569, iteration: 355789
loss: 0.9892827868461609,grad_norm: 0.9999992507714, iteration: 355790
loss: 1.0351641178131104,grad_norm: 0.942672091344123, iteration: 355791
loss: 1.0067957639694214,grad_norm: 0.6923524956775858, iteration: 355792
loss: 0.9732662439346313,grad_norm: 0.8072950676343719, iteration: 355793
loss: 0.9765965342521667,grad_norm: 0.806036228597816, iteration: 355794
loss: 1.0855730772018433,grad_norm: 0.7066554305500602, iteration: 355795
loss: 0.9995607137680054,grad_norm: 0.8374611383750917, iteration: 355796
loss: 1.0123727321624756,grad_norm: 0.7637658141572485, iteration: 355797
loss: 1.0234698057174683,grad_norm: 0.9999989927310631, iteration: 355798
loss: 0.9963670372962952,grad_norm: 0.9243072676263363, iteration: 355799
loss: 0.9784969091415405,grad_norm: 0.9368259361794999, iteration: 355800
loss: 1.0135761499404907,grad_norm: 0.9170369197963918, iteration: 355801
loss: 0.9975336790084839,grad_norm: 0.7501193188422995, iteration: 355802
loss: 0.9936052560806274,grad_norm: 0.7864659364431299, iteration: 355803
loss: 0.9845768809318542,grad_norm: 0.7799458834624418, iteration: 355804
loss: 1.026373028755188,grad_norm: 0.999999039814911, iteration: 355805
loss: 1.003787636756897,grad_norm: 0.6810078979791481, iteration: 355806
loss: 0.9663341045379639,grad_norm: 0.8675405666545222, iteration: 355807
loss: 1.0179555416107178,grad_norm: 0.9392558418379239, iteration: 355808
loss: 0.9925169348716736,grad_norm: 0.8260414424049428, iteration: 355809
loss: 0.9591028690338135,grad_norm: 0.7216106964031569, iteration: 355810
loss: 0.9831942319869995,grad_norm: 0.8058157378346119, iteration: 355811
loss: 0.9612628221511841,grad_norm: 0.8678898746346062, iteration: 355812
loss: 1.0455363988876343,grad_norm: 0.9681712109475222, iteration: 355813
loss: 1.0454062223434448,grad_norm: 0.7732933498676156, iteration: 355814
loss: 1.0350631475448608,grad_norm: 0.7227967417459561, iteration: 355815
loss: 1.0515117645263672,grad_norm: 0.9999997170523791, iteration: 355816
loss: 0.970529317855835,grad_norm: 0.7814605410912059, iteration: 355817
loss: 0.9679524302482605,grad_norm: 0.9422363210592785, iteration: 355818
loss: 1.0164977312088013,grad_norm: 0.9831557441906529, iteration: 355819
loss: 1.0098321437835693,grad_norm: 0.949003607344101, iteration: 355820
loss: 0.9912661910057068,grad_norm: 0.8036527283721614, iteration: 355821
loss: 1.0157676935195923,grad_norm: 0.7889233165006572, iteration: 355822
loss: 1.0248137712478638,grad_norm: 0.7878235474936005, iteration: 355823
loss: 0.9846704006195068,grad_norm: 0.714084403271398, iteration: 355824
loss: 1.0347931385040283,grad_norm: 0.9999997920859426, iteration: 355825
loss: 1.0017703771591187,grad_norm: 0.8134456685351655, iteration: 355826
loss: 0.9589766263961792,grad_norm: 0.7320953237001414, iteration: 355827
loss: 1.0066673755645752,grad_norm: 0.9549515288342404, iteration: 355828
loss: 1.0166667699813843,grad_norm: 0.9999993302143427, iteration: 355829
loss: 0.9942207932472229,grad_norm: 0.8781243682011823, iteration: 355830
loss: 1.0230880975723267,grad_norm: 0.8290860534968877, iteration: 355831
loss: 1.0005594491958618,grad_norm: 0.9123463466485107, iteration: 355832
loss: 0.998823881149292,grad_norm: 0.9198134275436942, iteration: 355833
loss: 1.0624405145645142,grad_norm: 0.8599803808341829, iteration: 355834
loss: 1.010507583618164,grad_norm: 0.7805971865374939, iteration: 355835
loss: 1.0770442485809326,grad_norm: 0.9999993496173075, iteration: 355836
loss: 0.990546464920044,grad_norm: 0.7404806644068108, iteration: 355837
loss: 1.0605803728103638,grad_norm: 0.9822909990926431, iteration: 355838
loss: 1.0245170593261719,grad_norm: 0.6986300427558112, iteration: 355839
loss: 0.9900414347648621,grad_norm: 0.8232517479175988, iteration: 355840
loss: 0.9945483803749084,grad_norm: 0.808685457593635, iteration: 355841
loss: 1.0224168300628662,grad_norm: 0.8466828383179626, iteration: 355842
loss: 1.006407618522644,grad_norm: 0.7769005693666837, iteration: 355843
loss: 1.0003669261932373,grad_norm: 0.9999994017115356, iteration: 355844
loss: 1.016449213027954,grad_norm: 0.7110715714564311, iteration: 355845
loss: 0.9871541261672974,grad_norm: 0.690699813584486, iteration: 355846
loss: 0.9882537126541138,grad_norm: 0.8420052167170237, iteration: 355847
loss: 0.9644333720207214,grad_norm: 0.8068579001186177, iteration: 355848
loss: 1.0044877529144287,grad_norm: 0.8427335891816445, iteration: 355849
loss: 1.0036664009094238,grad_norm: 0.9709833653529664, iteration: 355850
loss: 0.9942246079444885,grad_norm: 0.9350044824088324, iteration: 355851
loss: 1.0012884140014648,grad_norm: 0.7503863221832284, iteration: 355852
loss: 1.020735502243042,grad_norm: 0.9999995630894802, iteration: 355853
loss: 0.9925544261932373,grad_norm: 0.8002153832102759, iteration: 355854
loss: 1.009345531463623,grad_norm: 0.707097861794796, iteration: 355855
loss: 1.0245896577835083,grad_norm: 0.8546937097496633, iteration: 355856
loss: 1.0050344467163086,grad_norm: 0.8226627441114794, iteration: 355857
loss: 1.004692554473877,grad_norm: 0.9999990778725407, iteration: 355858
loss: 0.9781357645988464,grad_norm: 0.8015012262804383, iteration: 355859
loss: 0.9866520166397095,grad_norm: 0.9999992763610861, iteration: 355860
loss: 0.9927381873130798,grad_norm: 0.9283382707537361, iteration: 355861
loss: 1.001598834991455,grad_norm: 0.7770534414923294, iteration: 355862
loss: 0.9679779410362244,grad_norm: 0.9575479340737986, iteration: 355863
loss: 0.9984797835350037,grad_norm: 0.9999991728752501, iteration: 355864
loss: 0.9648289084434509,grad_norm: 0.837385220852743, iteration: 355865
loss: 0.9823012351989746,grad_norm: 0.7982113694862025, iteration: 355866
loss: 1.0360138416290283,grad_norm: 0.8756827455809347, iteration: 355867
loss: 1.0743408203125,grad_norm: 0.9048221105867116, iteration: 355868
loss: 1.1253669261932373,grad_norm: 0.9999999563844407, iteration: 355869
loss: 0.9966616630554199,grad_norm: 0.7138337130793895, iteration: 355870
loss: 0.9958670735359192,grad_norm: 0.8001550159243618, iteration: 355871
loss: 0.9921073913574219,grad_norm: 0.9772285006336413, iteration: 355872
loss: 1.0837438106536865,grad_norm: 0.9712992118922258, iteration: 355873
loss: 0.9631436467170715,grad_norm: 0.774457337695585, iteration: 355874
loss: 0.9800881743431091,grad_norm: 0.8253591304509735, iteration: 355875
loss: 0.9814496040344238,grad_norm: 0.8636247485552759, iteration: 355876
loss: 0.9771004915237427,grad_norm: 0.9887917398282946, iteration: 355877
loss: 1.0036323070526123,grad_norm: 0.7430750905463581, iteration: 355878
loss: 1.0650779008865356,grad_norm: 0.9999994924985353, iteration: 355879
loss: 0.9970129132270813,grad_norm: 0.7491487701274611, iteration: 355880
loss: 1.0129046440124512,grad_norm: 0.8512383986986295, iteration: 355881
loss: 0.9835934042930603,grad_norm: 0.8177929319392102, iteration: 355882
loss: 1.0236209630966187,grad_norm: 0.8549335542424888, iteration: 355883
loss: 0.99619060754776,grad_norm: 0.6824782335225119, iteration: 355884
loss: 1.005346417427063,grad_norm: 0.999999205207503, iteration: 355885
loss: 1.0010212659835815,grad_norm: 0.707436662725168, iteration: 355886
loss: 1.0048929452896118,grad_norm: 0.7576295450476259, iteration: 355887
loss: 0.9447289705276489,grad_norm: 0.7619147587135185, iteration: 355888
loss: 0.9946631789207458,grad_norm: 0.8437838344842583, iteration: 355889
loss: 0.9539639353752136,grad_norm: 0.7366235472502787, iteration: 355890
loss: 0.984166145324707,grad_norm: 0.844319709284776, iteration: 355891
loss: 0.9760076999664307,grad_norm: 0.6217621675746053, iteration: 355892
loss: 1.0508030652999878,grad_norm: 0.9999995749457671, iteration: 355893
loss: 0.9690497517585754,grad_norm: 0.873962819423744, iteration: 355894
loss: 1.001475214958191,grad_norm: 0.7896237203127359, iteration: 355895
loss: 0.975980818271637,grad_norm: 0.8328873750580934, iteration: 355896
loss: 1.017016887664795,grad_norm: 0.7508001299104355, iteration: 355897
loss: 1.1378309726715088,grad_norm: 0.9999998868049563, iteration: 355898
loss: 1.1073083877563477,grad_norm: 0.9806374036271253, iteration: 355899
loss: 1.002321481704712,grad_norm: 0.7881937487838777, iteration: 355900
loss: 0.9642565846443176,grad_norm: 0.9961038525997759, iteration: 355901
loss: 0.997980535030365,grad_norm: 0.9770342705301208, iteration: 355902
loss: 1.0074423551559448,grad_norm: 0.999999282512255, iteration: 355903
loss: 0.9684024453163147,grad_norm: 0.9999990519930745, iteration: 355904
loss: 1.0039970874786377,grad_norm: 0.9433015808433509, iteration: 355905
loss: 1.0209633111953735,grad_norm: 0.744749790471714, iteration: 355906
loss: 1.0050106048583984,grad_norm: 0.8075776982028796, iteration: 355907
loss: 0.9953323602676392,grad_norm: 0.7721309340061736, iteration: 355908
loss: 1.017812967300415,grad_norm: 0.7652098712989729, iteration: 355909
loss: 0.9885652661323547,grad_norm: 0.7670189885383293, iteration: 355910
loss: 1.0265644788742065,grad_norm: 0.8594358182814514, iteration: 355911
loss: 0.978156328201294,grad_norm: 0.8186437329275943, iteration: 355912
loss: 1.009290337562561,grad_norm: 0.9976379194428098, iteration: 355913
loss: 1.013836145401001,grad_norm: 0.8417796197734267, iteration: 355914
loss: 1.0138134956359863,grad_norm: 0.9608080149473284, iteration: 355915
loss: 0.9878537058830261,grad_norm: 0.7612202702225952, iteration: 355916
loss: 1.0132886171340942,grad_norm: 0.8408652437855649, iteration: 355917
loss: 1.004326581954956,grad_norm: 0.9999993047838687, iteration: 355918
loss: 0.982961893081665,grad_norm: 0.8001259743774822, iteration: 355919
loss: 1.004961371421814,grad_norm: 0.8825183743793884, iteration: 355920
loss: 1.0185112953186035,grad_norm: 0.9999996464132697, iteration: 355921
loss: 0.9978035092353821,grad_norm: 0.8115771547645818, iteration: 355922
loss: 0.9940226674079895,grad_norm: 0.7390513139487479, iteration: 355923
loss: 0.9914718866348267,grad_norm: 0.776178385139135, iteration: 355924
loss: 1.011367678642273,grad_norm: 0.7614683108811434, iteration: 355925
loss: 0.9904296398162842,grad_norm: 0.8167384705444704, iteration: 355926
loss: 1.0372323989868164,grad_norm: 0.9178854830362269, iteration: 355927
loss: 0.9916890859603882,grad_norm: 0.736320973041455, iteration: 355928
loss: 1.0010700225830078,grad_norm: 0.721247128337425, iteration: 355929
loss: 0.9744674563407898,grad_norm: 0.9575222195767281, iteration: 355930
loss: 1.0752668380737305,grad_norm: 0.9999999477769452, iteration: 355931
loss: 1.0114948749542236,grad_norm: 0.84841674478707, iteration: 355932
loss: 1.0091922283172607,grad_norm: 0.7238560703238319, iteration: 355933
loss: 0.9762734174728394,grad_norm: 0.9323531850272836, iteration: 355934
loss: 0.9873471856117249,grad_norm: 0.773460596295429, iteration: 355935
loss: 1.042551875114441,grad_norm: 0.9999997711065641, iteration: 355936
loss: 1.0316979885101318,grad_norm: 0.9403158781625178, iteration: 355937
loss: 0.975673496723175,grad_norm: 0.9784323864538662, iteration: 355938
loss: 0.9834288954734802,grad_norm: 0.7896803348641398, iteration: 355939
loss: 0.9928552508354187,grad_norm: 0.8690964846010307, iteration: 355940
loss: 0.9906750917434692,grad_norm: 0.9999992964415497, iteration: 355941
loss: 0.9898526668548584,grad_norm: 0.7115494044544863, iteration: 355942
loss: 0.9795027375221252,grad_norm: 0.9057040456043672, iteration: 355943
loss: 1.0024747848510742,grad_norm: 0.9846438723451473, iteration: 355944
loss: 1.001185655593872,grad_norm: 0.7234336844894937, iteration: 355945
loss: 0.9755396842956543,grad_norm: 0.7300709069080322, iteration: 355946
loss: 1.0339393615722656,grad_norm: 0.8970693422698971, iteration: 355947
loss: 0.9840895533561707,grad_norm: 0.7678884113852525, iteration: 355948
loss: 0.9951984286308289,grad_norm: 0.8297543326832979, iteration: 355949
loss: 1.016282558441162,grad_norm: 0.9999992691743427, iteration: 355950
loss: 1.0061100721359253,grad_norm: 0.8208673355767279, iteration: 355951
loss: 0.9933202862739563,grad_norm: 0.844012279133211, iteration: 355952
loss: 1.0063396692276,grad_norm: 0.9636275382733739, iteration: 355953
loss: 0.99015873670578,grad_norm: 0.8419345969120337, iteration: 355954
loss: 1.0214004516601562,grad_norm: 0.9999995822971682, iteration: 355955
loss: 0.9991458058357239,grad_norm: 0.6509910202678215, iteration: 355956
loss: 0.9984734654426575,grad_norm: 0.8162960579027697, iteration: 355957
loss: 1.0128759145736694,grad_norm: 0.8576627877043094, iteration: 355958
loss: 0.9959226250648499,grad_norm: 0.7948565544595408, iteration: 355959
loss: 1.0037447214126587,grad_norm: 0.8703045658525835, iteration: 355960
loss: 0.984370768070221,grad_norm: 0.8097422227628688, iteration: 355961
loss: 1.000425100326538,grad_norm: 0.6748660762008405, iteration: 355962
loss: 1.0135716199874878,grad_norm: 0.8273700277975352, iteration: 355963
loss: 0.9973917007446289,grad_norm: 0.8246571271701, iteration: 355964
loss: 0.9959163069725037,grad_norm: 0.8041450920055939, iteration: 355965
loss: 0.9840118885040283,grad_norm: 0.8049427427283024, iteration: 355966
loss: 1.0155928134918213,grad_norm: 0.9217854564519634, iteration: 355967
loss: 1.044167399406433,grad_norm: 0.778781623587514, iteration: 355968
loss: 1.013321876525879,grad_norm: 0.9999992877140498, iteration: 355969
loss: 1.0100847482681274,grad_norm: 0.9035077040560179, iteration: 355970
loss: 1.0391708612442017,grad_norm: 0.7305416781840272, iteration: 355971
loss: 0.9802130460739136,grad_norm: 0.7320436425061968, iteration: 355972
loss: 1.0191177129745483,grad_norm: 0.9779745585831439, iteration: 355973
loss: 1.0035520792007446,grad_norm: 0.8214184672247309, iteration: 355974
loss: 1.0311247110366821,grad_norm: 0.8909633801358301, iteration: 355975
loss: 1.0104178190231323,grad_norm: 0.8221617787343014, iteration: 355976
loss: 0.9608851671218872,grad_norm: 0.8559913747751966, iteration: 355977
loss: 0.9852670431137085,grad_norm: 0.8677269685099237, iteration: 355978
loss: 0.9763773083686829,grad_norm: 0.7757548743353379, iteration: 355979
loss: 0.9955673813819885,grad_norm: 0.999999252006763, iteration: 355980
loss: 1.0020978450775146,grad_norm: 0.8148347054272354, iteration: 355981
loss: 0.9801766872406006,grad_norm: 0.7890877256363009, iteration: 355982
loss: 1.0191885232925415,grad_norm: 0.7650332237063411, iteration: 355983
loss: 1.0504086017608643,grad_norm: 0.9213730768483406, iteration: 355984
loss: 1.0580376386642456,grad_norm: 0.9999994766986289, iteration: 355985
loss: 1.0164731740951538,grad_norm: 0.7463111742932853, iteration: 355986
loss: 0.9920058250427246,grad_norm: 0.8904701438826139, iteration: 355987
loss: 1.0237412452697754,grad_norm: 0.7172808125859393, iteration: 355988
loss: 1.0901950597763062,grad_norm: 0.9999991940360289, iteration: 355989
loss: 1.0069960355758667,grad_norm: 0.8294792598641898, iteration: 355990
loss: 0.9984586238861084,grad_norm: 0.8872144787607369, iteration: 355991
loss: 1.0049830675125122,grad_norm: 0.8251006743392523, iteration: 355992
loss: 0.9666610956192017,grad_norm: 0.724734932801758, iteration: 355993
loss: 1.0364034175872803,grad_norm: 0.7866248465343821, iteration: 355994
loss: 1.010277509689331,grad_norm: 0.9402766652924532, iteration: 355995
loss: 0.9988200068473816,grad_norm: 0.843870169219926, iteration: 355996
loss: 1.0174158811569214,grad_norm: 0.9999991025406338, iteration: 355997
loss: 0.962294340133667,grad_norm: 0.8445071746098541, iteration: 355998
loss: 0.9906297922134399,grad_norm: 0.7575211822022203, iteration: 355999
loss: 1.0040385723114014,grad_norm: 0.8361606240179705, iteration: 356000
loss: 0.9842439889907837,grad_norm: 0.730276583064516, iteration: 356001
loss: 1.0163472890853882,grad_norm: 0.7483876688733909, iteration: 356002
loss: 1.0107616186141968,grad_norm: 0.9787990801915636, iteration: 356003
loss: 0.9902050495147705,grad_norm: 0.8276718523250115, iteration: 356004
loss: 0.931216835975647,grad_norm: 0.8147997006841918, iteration: 356005
loss: 1.00239896774292,grad_norm: 0.8342261249243408, iteration: 356006
loss: 1.0280121564865112,grad_norm: 0.7916727456346323, iteration: 356007
loss: 0.9966784715652466,grad_norm: 0.810017509093595, iteration: 356008
loss: 1.0218942165374756,grad_norm: 0.9452574364796803, iteration: 356009
loss: 0.9776334166526794,grad_norm: 0.8091836722976284, iteration: 356010
loss: 1.0121341943740845,grad_norm: 0.9999991990643968, iteration: 356011
loss: 0.9895046353340149,grad_norm: 0.8432733225709517, iteration: 356012
loss: 0.9992353320121765,grad_norm: 0.7169546343000679, iteration: 356013
loss: 1.0678733587265015,grad_norm: 0.9999997743993354, iteration: 356014
loss: 0.9682502150535583,grad_norm: 0.9999990812197955, iteration: 356015
loss: 1.0114543437957764,grad_norm: 0.7565101467233147, iteration: 356016
loss: 1.0139949321746826,grad_norm: 0.9999995209000339, iteration: 356017
loss: 1.0181822776794434,grad_norm: 0.6934370489528573, iteration: 356018
loss: 1.0229332447052002,grad_norm: 0.7623568963836816, iteration: 356019
loss: 1.0055712461471558,grad_norm: 0.7689306591296687, iteration: 356020
loss: 1.0182621479034424,grad_norm: 0.8934220224361595, iteration: 356021
loss: 1.019844889640808,grad_norm: 0.9999996651137764, iteration: 356022
loss: 1.0082182884216309,grad_norm: 0.7560005650047089, iteration: 356023
loss: 0.9883479475975037,grad_norm: 0.8017667000142743, iteration: 356024
loss: 0.9815150499343872,grad_norm: 0.8324350301158917, iteration: 356025
loss: 0.9501569867134094,grad_norm: 0.9999991724397205, iteration: 356026
loss: 1.0141565799713135,grad_norm: 0.7512234151057299, iteration: 356027
loss: 0.9551816582679749,grad_norm: 0.9442091131085282, iteration: 356028
loss: 0.9828463196754456,grad_norm: 0.7514397779258349, iteration: 356029
loss: 1.0598500967025757,grad_norm: 0.9999995996917229, iteration: 356030
loss: 0.9895572066307068,grad_norm: 0.9481466306745252, iteration: 356031
loss: 1.0053737163543701,grad_norm: 0.710754104330373, iteration: 356032
loss: 1.0023670196533203,grad_norm: 0.8698435910001576, iteration: 356033
loss: 0.9798474907875061,grad_norm: 0.7989942221036088, iteration: 356034
loss: 0.9821912050247192,grad_norm: 0.9999990697844814, iteration: 356035
loss: 1.0199111700057983,grad_norm: 0.8282289293089601, iteration: 356036
loss: 1.022153615951538,grad_norm: 0.8920099519652549, iteration: 356037
loss: 1.0254024267196655,grad_norm: 0.9036870619302463, iteration: 356038
loss: 1.015901803970337,grad_norm: 0.9151756022898727, iteration: 356039
loss: 1.0056663751602173,grad_norm: 0.893651341343301, iteration: 356040
loss: 0.9878118634223938,grad_norm: 0.857744810926126, iteration: 356041
loss: 0.9501572847366333,grad_norm: 0.9999991183977578, iteration: 356042
loss: 0.9918073415756226,grad_norm: 0.7271164531197079, iteration: 356043
loss: 0.9900423288345337,grad_norm: 0.8491375960310277, iteration: 356044
loss: 1.0010159015655518,grad_norm: 0.9287095582235816, iteration: 356045
loss: 0.9858574271202087,grad_norm: 0.8515135482166304, iteration: 356046
loss: 1.0041621923446655,grad_norm: 0.9167806982296873, iteration: 356047
loss: 1.0160131454467773,grad_norm: 0.8900510460757483, iteration: 356048
loss: 0.9933887124061584,grad_norm: 0.9363302106146848, iteration: 356049
loss: 1.0100728273391724,grad_norm: 0.9299647385980263, iteration: 356050
loss: 0.9897252321243286,grad_norm: 0.8530460361810946, iteration: 356051
loss: 1.033030390739441,grad_norm: 0.8551571044448031, iteration: 356052
loss: 1.0204440355300903,grad_norm: 0.7873071182746392, iteration: 356053
loss: 0.9733914732933044,grad_norm: 0.755397469405326, iteration: 356054
loss: 0.9830256700515747,grad_norm: 0.7652298932144989, iteration: 356055
loss: 1.013885736465454,grad_norm: 0.782031829492834, iteration: 356056
loss: 1.0073624849319458,grad_norm: 0.7437893516313753, iteration: 356057
loss: 1.0105093717575073,grad_norm: 0.7551067602339777, iteration: 356058
loss: 1.0451313257217407,grad_norm: 0.919243266741003, iteration: 356059
loss: 0.9948497414588928,grad_norm: 0.7038027653928812, iteration: 356060
loss: 1.0055108070373535,grad_norm: 0.9435731322470534, iteration: 356061
loss: 1.0363842248916626,grad_norm: 0.9999998033656465, iteration: 356062
loss: 0.9888759851455688,grad_norm: 0.9133051397092172, iteration: 356063
loss: 0.967775821685791,grad_norm: 0.9152090615407683, iteration: 356064
loss: 1.0200896263122559,grad_norm: 0.8598933881707949, iteration: 356065
loss: 1.0448139905929565,grad_norm: 0.9999992744668117, iteration: 356066
loss: 0.9863114356994629,grad_norm: 0.8274636582160295, iteration: 356067
loss: 1.0012747049331665,grad_norm: 0.9583150975780597, iteration: 356068
loss: 1.0130436420440674,grad_norm: 0.99999944413426, iteration: 356069
loss: 1.0005712509155273,grad_norm: 0.8678515132438628, iteration: 356070
loss: 0.9657258987426758,grad_norm: 0.8467514507292236, iteration: 356071
loss: 0.9862611889839172,grad_norm: 0.7236154792158322, iteration: 356072
loss: 0.9773048758506775,grad_norm: 0.9999991941420057, iteration: 356073
loss: 0.984472393989563,grad_norm: 0.7501984011453935, iteration: 356074
loss: 0.9744977951049805,grad_norm: 0.8589081405591622, iteration: 356075
loss: 0.9823731184005737,grad_norm: 0.843961175826372, iteration: 356076
loss: 0.9763914346694946,grad_norm: 0.7818896833603152, iteration: 356077
loss: 0.997156023979187,grad_norm: 0.8113705940636872, iteration: 356078
loss: 1.039293646812439,grad_norm: 0.9999997965936116, iteration: 356079
loss: 1.0327144861221313,grad_norm: 0.939796955848037, iteration: 356080
loss: 1.0048542022705078,grad_norm: 0.8964410298304298, iteration: 356081
loss: 0.9757800698280334,grad_norm: 0.7531388819976915, iteration: 356082
loss: 0.9844521284103394,grad_norm: 0.9999998020319624, iteration: 356083
loss: 1.0034852027893066,grad_norm: 0.7442081872078165, iteration: 356084
loss: 0.9881411194801331,grad_norm: 0.8260788213095048, iteration: 356085
loss: 1.0303280353546143,grad_norm: 0.7920732987409674, iteration: 356086
loss: 1.0163745880126953,grad_norm: 0.8673274243381126, iteration: 356087
loss: 0.9850335121154785,grad_norm: 0.9242663832772037, iteration: 356088
loss: 0.9930035471916199,grad_norm: 0.8439031418259362, iteration: 356089
loss: 0.9435763955116272,grad_norm: 0.732891187864336, iteration: 356090
loss: 1.0263166427612305,grad_norm: 0.7950751906570405, iteration: 356091
loss: 1.0097886323928833,grad_norm: 0.6920851674632646, iteration: 356092
loss: 1.0011895895004272,grad_norm: 0.9999991372299728, iteration: 356093
loss: 1.000147819519043,grad_norm: 0.8768528832061588, iteration: 356094
loss: 1.0065209865570068,grad_norm: 0.7548519976906488, iteration: 356095
loss: 1.0084187984466553,grad_norm: 0.7922880965637343, iteration: 356096
loss: 0.9487015008926392,grad_norm: 0.9037101558094761, iteration: 356097
loss: 0.9847463369369507,grad_norm: 0.6385197370586579, iteration: 356098
loss: 0.9885302782058716,grad_norm: 0.8014746563636647, iteration: 356099
loss: 0.9906728267669678,grad_norm: 0.9424251103448161, iteration: 356100
loss: 1.0053054094314575,grad_norm: 0.7397893870087872, iteration: 356101
loss: 1.0413833856582642,grad_norm: 0.9999991251505417, iteration: 356102
loss: 1.0040793418884277,grad_norm: 0.7539913963809612, iteration: 356103
loss: 0.9766315221786499,grad_norm: 0.8656825981622472, iteration: 356104
loss: 1.0583003759384155,grad_norm: 0.8857716089675758, iteration: 356105
loss: 1.0102068185806274,grad_norm: 0.9487874442262954, iteration: 356106
loss: 0.9692343473434448,grad_norm: 0.8781691248743003, iteration: 356107
loss: 1.033974289894104,grad_norm: 0.9999995069816939, iteration: 356108
loss: 0.9951720833778381,grad_norm: 0.7169204308244355, iteration: 356109
loss: 1.019586443901062,grad_norm: 0.8759716682860942, iteration: 356110
loss: 0.9752949476242065,grad_norm: 0.8459344976149479, iteration: 356111
loss: 1.0368971824645996,grad_norm: 0.96977225600534, iteration: 356112
loss: 0.9771350026130676,grad_norm: 0.9335225800616685, iteration: 356113
loss: 0.9810044169425964,grad_norm: 0.8869016870770351, iteration: 356114
loss: 0.9744175672531128,grad_norm: 0.6675083240255495, iteration: 356115
loss: 1.029266357421875,grad_norm: 0.7539087858107435, iteration: 356116
loss: 0.983887255191803,grad_norm: 0.9216371249197692, iteration: 356117
loss: 1.0036405324935913,grad_norm: 0.7809952444809737, iteration: 356118
loss: 1.001688838005066,grad_norm: 0.9451748496741244, iteration: 356119
loss: 0.9872040748596191,grad_norm: 0.8349958631107193, iteration: 356120
loss: 1.005469560623169,grad_norm: 0.7405612022750304, iteration: 356121
loss: 0.9955825805664062,grad_norm: 0.695003416545933, iteration: 356122
loss: 0.9802623987197876,grad_norm: 0.7481215341862096, iteration: 356123
loss: 0.9867554306983948,grad_norm: 0.985859663499471, iteration: 356124
loss: 1.088167667388916,grad_norm: 0.9999997460796282, iteration: 356125
loss: 0.9545472860336304,grad_norm: 0.7806557845145891, iteration: 356126
loss: 0.9967049956321716,grad_norm: 0.7870316509118207, iteration: 356127
loss: 1.024682641029358,grad_norm: 0.8724643601603306, iteration: 356128
loss: 0.9862611293792725,grad_norm: 0.8435904082182282, iteration: 356129
loss: 1.0035568475723267,grad_norm: 0.9653462597622904, iteration: 356130
loss: 0.9817993640899658,grad_norm: 0.8357369916897496, iteration: 356131
loss: 1.00326669216156,grad_norm: 0.8462939583098738, iteration: 356132
loss: 0.9724719524383545,grad_norm: 0.7143189272328466, iteration: 356133
loss: 0.9911972284317017,grad_norm: 0.7402580989063752, iteration: 356134
loss: 0.961226224899292,grad_norm: 0.8033989905219893, iteration: 356135
loss: 1.0237219333648682,grad_norm: 0.7915981996156257, iteration: 356136
loss: 1.031472086906433,grad_norm: 0.7554292234951461, iteration: 356137
loss: 1.016508936882019,grad_norm: 0.6873788179707288, iteration: 356138
loss: 1.0182750225067139,grad_norm: 0.9999992324755007, iteration: 356139
loss: 0.9859246015548706,grad_norm: 0.7015713706374468, iteration: 356140
loss: 1.0290952920913696,grad_norm: 0.9999995716580923, iteration: 356141
loss: 1.0176012516021729,grad_norm: 0.9999996904716217, iteration: 356142
loss: 1.0054856538772583,grad_norm: 0.941637897175234, iteration: 356143
loss: 1.0041495561599731,grad_norm: 0.9999993798545401, iteration: 356144
loss: 1.011357307434082,grad_norm: 0.9216749803921349, iteration: 356145
loss: 1.0005475282669067,grad_norm: 0.818509102863945, iteration: 356146
loss: 1.023521900177002,grad_norm: 0.878059501341425, iteration: 356147
loss: 0.991279125213623,grad_norm: 0.951999252034893, iteration: 356148
loss: 1.0226503610610962,grad_norm: 0.9999992296704183, iteration: 356149
loss: 1.0200601816177368,grad_norm: 0.7704046544271823, iteration: 356150
loss: 0.992266833782196,grad_norm: 0.7170948614866722, iteration: 356151
loss: 0.9875171780586243,grad_norm: 0.7599689073614287, iteration: 356152
loss: 0.984913170337677,grad_norm: 0.9999991779993085, iteration: 356153
loss: 1.011038064956665,grad_norm: 0.8233718121771366, iteration: 356154
loss: 0.9723166823387146,grad_norm: 0.7482591742750162, iteration: 356155
loss: 1.0050673484802246,grad_norm: 0.8222270902707249, iteration: 356156
loss: 0.9579360485076904,grad_norm: 0.8582393381764655, iteration: 356157
loss: 0.9891014099121094,grad_norm: 0.9970373958819786, iteration: 356158
loss: 0.9566641449928284,grad_norm: 0.7855600404614782, iteration: 356159
loss: 1.0197148323059082,grad_norm: 0.8407870821388075, iteration: 356160
loss: 1.034358263015747,grad_norm: 0.8402114581718662, iteration: 356161
loss: 0.9646148085594177,grad_norm: 0.9267278271420782, iteration: 356162
loss: 1.0175163745880127,grad_norm: 0.8695018442884774, iteration: 356163
loss: 1.0347949266433716,grad_norm: 0.9999991929478442, iteration: 356164
loss: 1.0355695486068726,grad_norm: 0.9162560716868423, iteration: 356165
loss: 0.9665802717208862,grad_norm: 0.7487785663921916, iteration: 356166
loss: 0.9474550485610962,grad_norm: 0.8296577106321512, iteration: 356167
loss: 0.9959772229194641,grad_norm: 0.7363511331790975, iteration: 356168
loss: 0.9982531070709229,grad_norm: 0.9999993608065086, iteration: 356169
loss: 1.0305852890014648,grad_norm: 0.9128891322420962, iteration: 356170
loss: 1.003218173980713,grad_norm: 0.8088805241671149, iteration: 356171
loss: 0.9938685297966003,grad_norm: 0.6951396580763461, iteration: 356172
loss: 1.0530651807785034,grad_norm: 0.9999993364553564, iteration: 356173
loss: 1.030366063117981,grad_norm: 0.7321591426112908, iteration: 356174
loss: 0.980651319026947,grad_norm: 0.9999992061258663, iteration: 356175
loss: 1.076581597328186,grad_norm: 0.8883966970369135, iteration: 356176
loss: 1.0287587642669678,grad_norm: 0.8483939955170984, iteration: 356177
loss: 1.008231282234192,grad_norm: 0.7905903615841584, iteration: 356178
loss: 0.9682847857475281,grad_norm: 0.8341367117599638, iteration: 356179
loss: 0.988917887210846,grad_norm: 0.8174497655806674, iteration: 356180
loss: 1.0120148658752441,grad_norm: 0.870583577882472, iteration: 356181
loss: 1.015321969985962,grad_norm: 0.9952980105835986, iteration: 356182
loss: 1.0171095132827759,grad_norm: 0.7781920414067893, iteration: 356183
loss: 1.0644466876983643,grad_norm: 0.9999999472649735, iteration: 356184
loss: 0.9450452923774719,grad_norm: 0.9028280586949952, iteration: 356185
loss: 0.9629777073860168,grad_norm: 0.8015596698229905, iteration: 356186
loss: 1.0329889059066772,grad_norm: 0.868617661619702, iteration: 356187
loss: 1.0149585008621216,grad_norm: 0.8053323620328033, iteration: 356188
loss: 0.9946385622024536,grad_norm: 0.8071007507351313, iteration: 356189
loss: 1.0127445459365845,grad_norm: 0.8466747357215691, iteration: 356190
loss: 0.9967140555381775,grad_norm: 0.9982649794665038, iteration: 356191
loss: 0.9946752190589905,grad_norm: 0.7965324798379844, iteration: 356192
loss: 1.0331918001174927,grad_norm: 0.9568703439971632, iteration: 356193
loss: 1.053133249282837,grad_norm: 0.9864667497917821, iteration: 356194
loss: 1.0097317695617676,grad_norm: 0.9725467572998976, iteration: 356195
loss: 1.059699296951294,grad_norm: 0.802089573355863, iteration: 356196
loss: 1.0613045692443848,grad_norm: 0.9999990827256245, iteration: 356197
loss: 1.0009793043136597,grad_norm: 0.9048586812770327, iteration: 356198
loss: 0.9999570250511169,grad_norm: 0.8536593202731607, iteration: 356199
loss: 0.9902869462966919,grad_norm: 0.9999991978668671, iteration: 356200
loss: 1.030860424041748,grad_norm: 0.9999989827085254, iteration: 356201
loss: 1.0582917928695679,grad_norm: 0.9999995406757404, iteration: 356202
loss: 0.9947475790977478,grad_norm: 0.8159918827866444, iteration: 356203
loss: 0.9897292256355286,grad_norm: 0.7981492871166906, iteration: 356204
loss: 1.0089359283447266,grad_norm: 0.7380550091600115, iteration: 356205
loss: 0.9874028563499451,grad_norm: 0.867386661590216, iteration: 356206
loss: 1.017677664756775,grad_norm: 0.999999274100001, iteration: 356207
loss: 1.011504054069519,grad_norm: 0.9201170586598183, iteration: 356208
loss: 1.0300625562667847,grad_norm: 0.8002599157676205, iteration: 356209
loss: 1.0432723760604858,grad_norm: 0.999999147332411, iteration: 356210
loss: 0.9920433759689331,grad_norm: 0.8107758299165527, iteration: 356211
loss: 1.0071823596954346,grad_norm: 0.7736099540545804, iteration: 356212
loss: 0.9866591691970825,grad_norm: 0.7762229237772509, iteration: 356213
loss: 1.0037003755569458,grad_norm: 0.8651307643130027, iteration: 356214
loss: 1.0046892166137695,grad_norm: 0.6978293092564452, iteration: 356215
loss: 1.0248886346817017,grad_norm: 0.695559742609729, iteration: 356216
loss: 0.9871313571929932,grad_norm: 0.8630923122600894, iteration: 356217
loss: 1.0031524896621704,grad_norm: 0.742145134304661, iteration: 356218
loss: 0.9987907409667969,grad_norm: 0.951511061039145, iteration: 356219
loss: 0.9831369519233704,grad_norm: 0.999999103670741, iteration: 356220
loss: 0.9828618168830872,grad_norm: 0.8095851347046418, iteration: 356221
loss: 1.006213665008545,grad_norm: 0.9225439116526757, iteration: 356222
loss: 1.0133461952209473,grad_norm: 0.9833001033935505, iteration: 356223
loss: 1.0243908166885376,grad_norm: 0.9999993931447982, iteration: 356224
loss: 1.0030603408813477,grad_norm: 0.7897937895857665, iteration: 356225
loss: 1.0135995149612427,grad_norm: 0.8338855364261353, iteration: 356226
loss: 0.9916114211082458,grad_norm: 0.8604206054164432, iteration: 356227
loss: 0.9953015446662903,grad_norm: 0.9348156757097524, iteration: 356228
loss: 1.0049713850021362,grad_norm: 0.8159181403568194, iteration: 356229
loss: 1.0145678520202637,grad_norm: 0.7431539172598755, iteration: 356230
loss: 1.0016168355941772,grad_norm: 0.6796547167751295, iteration: 356231
loss: 1.000717043876648,grad_norm: 0.9999996157582325, iteration: 356232
loss: 1.017286777496338,grad_norm: 0.6711603567332294, iteration: 356233
loss: 0.9682767987251282,grad_norm: 0.9943447790747704, iteration: 356234
loss: 0.9910016059875488,grad_norm: 0.7672929834324739, iteration: 356235
loss: 0.9699238538742065,grad_norm: 0.9625189554503537, iteration: 356236
loss: 0.9889300465583801,grad_norm: 0.8237323847028691, iteration: 356237
loss: 0.9658172726631165,grad_norm: 0.7946464931572591, iteration: 356238
loss: 1.0024570226669312,grad_norm: 0.9108201404421544, iteration: 356239
loss: 1.0067414045333862,grad_norm: 0.9999991024815184, iteration: 356240
loss: 1.0633580684661865,grad_norm: 0.9999990462096375, iteration: 356241
loss: 0.9926592707633972,grad_norm: 0.7022289141276736, iteration: 356242
loss: 0.9685125946998596,grad_norm: 0.8653852869987072, iteration: 356243
loss: 0.9978644251823425,grad_norm: 0.9999991262381189, iteration: 356244
loss: 1.013219952583313,grad_norm: 0.9999992746834258, iteration: 356245
loss: 0.9978529214859009,grad_norm: 0.894067406219447, iteration: 356246
loss: 1.0054417848587036,grad_norm: 0.8480647549191181, iteration: 356247
loss: 1.0095281600952148,grad_norm: 0.7722647371870018, iteration: 356248
loss: 0.9933086037635803,grad_norm: 0.8256178334426583, iteration: 356249
loss: 0.9730511903762817,grad_norm: 0.999999140897466, iteration: 356250
loss: 0.9612302184104919,grad_norm: 0.8951563765764927, iteration: 356251
loss: 1.0261600017547607,grad_norm: 0.9292904058106904, iteration: 356252
loss: 1.0264257192611694,grad_norm: 0.7365725461904987, iteration: 356253
loss: 1.0385849475860596,grad_norm: 0.8549695558575028, iteration: 356254
loss: 0.9730465412139893,grad_norm: 0.8142990580063316, iteration: 356255
loss: 0.9751958250999451,grad_norm: 0.7089697978680742, iteration: 356256
loss: 1.0054618120193481,grad_norm: 0.9455896225131238, iteration: 356257
loss: 0.9966720342636108,grad_norm: 0.8806998091013692, iteration: 356258
loss: 1.1243728399276733,grad_norm: 0.9999997780981711, iteration: 356259
loss: 0.9657628536224365,grad_norm: 0.9321324064406364, iteration: 356260
loss: 0.9990026950836182,grad_norm: 0.7270018027683692, iteration: 356261
loss: 0.9814318418502808,grad_norm: 0.8414307714006012, iteration: 356262
loss: 0.9846328496932983,grad_norm: 0.7914047925362828, iteration: 356263
loss: 0.9651872515678406,grad_norm: 0.7440653021381022, iteration: 356264
loss: 0.988997757434845,grad_norm: 0.8407889358990634, iteration: 356265
loss: 0.9768017530441284,grad_norm: 0.9999992695330375, iteration: 356266
loss: 0.9357876181602478,grad_norm: 0.7987936862322232, iteration: 356267
loss: 1.0447465181350708,grad_norm: 0.7819492348903172, iteration: 356268
loss: 1.0181517601013184,grad_norm: 0.8714467909091226, iteration: 356269
loss: 1.0137664079666138,grad_norm: 0.8085945622854518, iteration: 356270
loss: 1.0004957914352417,grad_norm: 0.7522980841720388, iteration: 356271
loss: 1.0253013372421265,grad_norm: 0.7798863121276585, iteration: 356272
loss: 0.973034143447876,grad_norm: 0.7717643486388506, iteration: 356273
loss: 0.9824190735816956,grad_norm: 0.9238727400878971, iteration: 356274
loss: 0.9887335300445557,grad_norm: 0.740909206598812, iteration: 356275
loss: 0.9389968514442444,grad_norm: 0.8962607435800413, iteration: 356276
loss: 1.0214943885803223,grad_norm: 0.6548485550949149, iteration: 356277
loss: 1.0051888227462769,grad_norm: 0.7907577261910408, iteration: 356278
loss: 1.0008171796798706,grad_norm: 0.8625932128437228, iteration: 356279
loss: 1.001132607460022,grad_norm: 0.9303820727653671, iteration: 356280
loss: 1.0058362483978271,grad_norm: 0.857828384933817, iteration: 356281
loss: 0.9739456176757812,grad_norm: 0.9999990300303148, iteration: 356282
loss: 0.9607701897621155,grad_norm: 0.8729904195101155, iteration: 356283
loss: 1.001729130744934,grad_norm: 0.8616072103601643, iteration: 356284
loss: 1.0114357471466064,grad_norm: 0.8903955631113756, iteration: 356285
loss: 0.9976557493209839,grad_norm: 0.6797331326897952, iteration: 356286
loss: 0.9968571662902832,grad_norm: 0.873473932940204, iteration: 356287
loss: 0.9868809580802917,grad_norm: 0.8738577682270545, iteration: 356288
loss: 0.9805390238761902,grad_norm: 0.6961954188828733, iteration: 356289
loss: 0.9982985854148865,grad_norm: 0.8707779282345611, iteration: 356290
loss: 0.970064103603363,grad_norm: 0.8346025076412729, iteration: 356291
loss: 0.9592617750167847,grad_norm: 0.835061974594584, iteration: 356292
loss: 1.0090371370315552,grad_norm: 0.6999642333187355, iteration: 356293
loss: 0.9868441224098206,grad_norm: 0.7987633840019926, iteration: 356294
loss: 0.9756546020507812,grad_norm: 0.8957886084132936, iteration: 356295
loss: 1.0059332847595215,grad_norm: 0.835053993038651, iteration: 356296
loss: 1.0416998863220215,grad_norm: 0.869738024728702, iteration: 356297
loss: 1.0202261209487915,grad_norm: 0.8798352856635178, iteration: 356298
loss: 1.0269817113876343,grad_norm: 0.7724893707972184, iteration: 356299
loss: 1.0277810096740723,grad_norm: 0.8287231968553453, iteration: 356300
loss: 0.9933320879936218,grad_norm: 0.643133963825292, iteration: 356301
loss: 1.0311846733093262,grad_norm: 0.7838268703761723, iteration: 356302
loss: 0.9886443614959717,grad_norm: 0.8895168593494267, iteration: 356303
loss: 1.005788803100586,grad_norm: 0.7697562924694615, iteration: 356304
loss: 0.9787188768386841,grad_norm: 0.7750686676447529, iteration: 356305
loss: 0.9787420034408569,grad_norm: 0.9343967754234471, iteration: 356306
loss: 0.9856088757514954,grad_norm: 0.7905326169755422, iteration: 356307
loss: 0.9881457090377808,grad_norm: 0.7817245485754574, iteration: 356308
loss: 0.999758243560791,grad_norm: 0.7304276011289308, iteration: 356309
loss: 0.9856337308883667,grad_norm: 0.7435105293378342, iteration: 356310
loss: 1.0910223722457886,grad_norm: 0.9999992913912926, iteration: 356311
loss: 0.9547523260116577,grad_norm: 0.820065172608772, iteration: 356312
loss: 0.9957534074783325,grad_norm: 0.8946895076197554, iteration: 356313
loss: 1.0056495666503906,grad_norm: 0.8204357562744973, iteration: 356314
loss: 1.009451150894165,grad_norm: 0.9032358275790668, iteration: 356315
loss: 0.9779697060585022,grad_norm: 0.9841238223620835, iteration: 356316
loss: 0.9773768782615662,grad_norm: 0.8979351988568552, iteration: 356317
loss: 0.9984592199325562,grad_norm: 0.9577951817267972, iteration: 356318
loss: 0.9937536716461182,grad_norm: 0.8016858739651119, iteration: 356319
loss: 1.012682318687439,grad_norm: 0.8202481232189769, iteration: 356320
loss: 1.0186692476272583,grad_norm: 0.9063397518419616, iteration: 356321
loss: 0.9736380577087402,grad_norm: 0.8095824714873512, iteration: 356322
loss: 1.013541340827942,grad_norm: 0.9593030567980468, iteration: 356323
loss: 1.0538755655288696,grad_norm: 0.999999376433933, iteration: 356324
loss: 0.9912988543510437,grad_norm: 0.9074738271452327, iteration: 356325
loss: 1.0231267213821411,grad_norm: 0.8826945321302678, iteration: 356326
loss: 0.9961727261543274,grad_norm: 0.8088150588203132, iteration: 356327
loss: 1.0105950832366943,grad_norm: 0.7602659061080328, iteration: 356328
loss: 1.011979579925537,grad_norm: 0.8707086711394899, iteration: 356329
loss: 0.9565367698669434,grad_norm: 0.7912746039517402, iteration: 356330
loss: 1.0177849531173706,grad_norm: 0.9999994970799827, iteration: 356331
loss: 1.025236964225769,grad_norm: 0.9999992024265236, iteration: 356332
loss: 1.0280206203460693,grad_norm: 0.8085160404352255, iteration: 356333
loss: 0.9983722567558289,grad_norm: 0.9801321621714004, iteration: 356334
loss: 1.040473461151123,grad_norm: 0.7805774601412921, iteration: 356335
loss: 1.0031121969223022,grad_norm: 0.761500665976767, iteration: 356336
loss: 0.9738379716873169,grad_norm: 0.7144236526964728, iteration: 356337
loss: 0.9984264969825745,grad_norm: 0.8499149589864047, iteration: 356338
loss: 1.0259299278259277,grad_norm: 0.8641268400727333, iteration: 356339
loss: 0.9910663366317749,grad_norm: 0.8081727215462997, iteration: 356340
loss: 1.0109727382659912,grad_norm: 0.8441595167268254, iteration: 356341
loss: 0.9893137216567993,grad_norm: 0.6441714638807741, iteration: 356342
loss: 0.9710796475410461,grad_norm: 0.8256420060190459, iteration: 356343
loss: 1.0193055868148804,grad_norm: 0.8110109332231351, iteration: 356344
loss: 1.0270177125930786,grad_norm: 0.8721898947207237, iteration: 356345
loss: 0.98395174741745,grad_norm: 0.76609238781462, iteration: 356346
loss: 1.0103689432144165,grad_norm: 0.8562514090109716, iteration: 356347
loss: 0.969635009765625,grad_norm: 0.7873375721732616, iteration: 356348
loss: 1.0307263135910034,grad_norm: 0.8528011465731014, iteration: 356349
loss: 1.020424246788025,grad_norm: 0.8909475350216063, iteration: 356350
loss: 1.025485873222351,grad_norm: 0.7015918408161581, iteration: 356351
loss: 0.9944261908531189,grad_norm: 0.8246185754133698, iteration: 356352
loss: 1.0381731986999512,grad_norm: 0.8208185130723568, iteration: 356353
loss: 0.9993466138839722,grad_norm: 0.6937110988601629, iteration: 356354
loss: 0.984306275844574,grad_norm: 0.8685056886185989, iteration: 356355
loss: 0.9810642600059509,grad_norm: 0.6833814132658119, iteration: 356356
loss: 1.037032961845398,grad_norm: 0.8008291363063084, iteration: 356357
loss: 1.0108914375305176,grad_norm: 0.8079848778374675, iteration: 356358
loss: 0.9940633177757263,grad_norm: 0.9289100398401895, iteration: 356359
loss: 0.9592127203941345,grad_norm: 0.8541427006488385, iteration: 356360
loss: 0.9801677465438843,grad_norm: 0.8473838312868812, iteration: 356361
loss: 0.9953352808952332,grad_norm: 0.8290232129187515, iteration: 356362
loss: 0.9937416911125183,grad_norm: 0.8845144153108063, iteration: 356363
loss: 0.9720202684402466,grad_norm: 0.9999990082193752, iteration: 356364
loss: 1.006641149520874,grad_norm: 0.723241540677217, iteration: 356365
loss: 0.982359766960144,grad_norm: 0.7057819087147953, iteration: 356366
loss: 0.9972873330116272,grad_norm: 0.8670327359997902, iteration: 356367
loss: 1.0331733226776123,grad_norm: 0.840530658649465, iteration: 356368
loss: 1.008571743965149,grad_norm: 0.8109425208491982, iteration: 356369
loss: 0.9879559278488159,grad_norm: 0.7220888866299269, iteration: 356370
loss: 0.9813227653503418,grad_norm: 0.7966804331418392, iteration: 356371
loss: 0.9907180070877075,grad_norm: 0.7898634010121587, iteration: 356372
loss: 1.0310765504837036,grad_norm: 0.833327537686859, iteration: 356373
loss: 0.9811049103736877,grad_norm: 0.736294112705545, iteration: 356374
loss: 1.0175416469573975,grad_norm: 0.9999992320043791, iteration: 356375
loss: 0.9977474808692932,grad_norm: 0.9059704758675239, iteration: 356376
loss: 1.0032017230987549,grad_norm: 0.7381428374552608, iteration: 356377
loss: 1.0129849910736084,grad_norm: 0.8618023403202523, iteration: 356378
loss: 1.0034111738204956,grad_norm: 0.9963704962879839, iteration: 356379
loss: 0.9903993010520935,grad_norm: 0.7707887323010635, iteration: 356380
loss: 1.015915870666504,grad_norm: 0.6544392931108081, iteration: 356381
loss: 1.0177595615386963,grad_norm: 0.8375831834995681, iteration: 356382
loss: 1.0323173999786377,grad_norm: 0.7916418043440969, iteration: 356383
loss: 1.0350295305252075,grad_norm: 0.9005751131577718, iteration: 356384
loss: 0.9556697607040405,grad_norm: 0.8217602512120666, iteration: 356385
loss: 0.9981734752655029,grad_norm: 0.8000892562416216, iteration: 356386
loss: 0.9999238848686218,grad_norm: 0.9999990881604282, iteration: 356387
loss: 0.9901042580604553,grad_norm: 0.8670493021243499, iteration: 356388
loss: 0.9756599068641663,grad_norm: 0.6862886985028713, iteration: 356389
loss: 0.9986505508422852,grad_norm: 0.8109485308706267, iteration: 356390
loss: 1.0098755359649658,grad_norm: 0.8581247821097948, iteration: 356391
loss: 1.0684486627578735,grad_norm: 0.7811220587098859, iteration: 356392
loss: 0.9850307703018188,grad_norm: 0.8047386334599934, iteration: 356393
loss: 1.0249536037445068,grad_norm: 0.7855803456167881, iteration: 356394
loss: 0.995449960231781,grad_norm: 0.9118983057235357, iteration: 356395
loss: 0.9897447824478149,grad_norm: 0.9390914581005712, iteration: 356396
loss: 1.0091851949691772,grad_norm: 0.965237505387793, iteration: 356397
loss: 1.0104711055755615,grad_norm: 0.7879588494072212, iteration: 356398
loss: 0.9503822922706604,grad_norm: 0.774817195692104, iteration: 356399
loss: 1.0081381797790527,grad_norm: 0.9999992400415357, iteration: 356400
loss: 0.9958298802375793,grad_norm: 0.7762446795898524, iteration: 356401
loss: 0.9744684100151062,grad_norm: 0.8835386747320809, iteration: 356402
loss: 1.021894931793213,grad_norm: 0.8440596646522828, iteration: 356403
loss: 1.0221664905548096,grad_norm: 0.8355849955703429, iteration: 356404
loss: 0.972734272480011,grad_norm: 0.7713445875479438, iteration: 356405
loss: 0.9973891377449036,grad_norm: 0.7060410820716212, iteration: 356406
loss: 0.9996980428695679,grad_norm: 0.945543353948234, iteration: 356407
loss: 0.9490765333175659,grad_norm: 0.7144932779667974, iteration: 356408
loss: 0.9783698916435242,grad_norm: 0.7098511237616938, iteration: 356409
loss: 0.973057210445404,grad_norm: 0.7856951896527298, iteration: 356410
loss: 0.9650387167930603,grad_norm: 0.9999999401312146, iteration: 356411
loss: 0.9909098148345947,grad_norm: 0.9206114622536639, iteration: 356412
loss: 1.0274596214294434,grad_norm: 0.89442744352044, iteration: 356413
loss: 1.0326389074325562,grad_norm: 0.7964797728039172, iteration: 356414
loss: 1.0300960540771484,grad_norm: 0.8432886918411725, iteration: 356415
loss: 1.0320985317230225,grad_norm: 0.8832197788880641, iteration: 356416
loss: 0.9850190877914429,grad_norm: 0.9999991154374608, iteration: 356417
loss: 0.9921811819076538,grad_norm: 0.7979004267223084, iteration: 356418
loss: 0.9949554204940796,grad_norm: 0.8657708254139443, iteration: 356419
loss: 0.9635477066040039,grad_norm: 0.8393710327237457, iteration: 356420
loss: 0.9952463507652283,grad_norm: 0.9433502197488481, iteration: 356421
loss: 1.0336620807647705,grad_norm: 0.7907506374141589, iteration: 356422
loss: 1.0225567817687988,grad_norm: 0.9999995033573883, iteration: 356423
loss: 0.9739657640457153,grad_norm: 0.790768522723918, iteration: 356424
loss: 1.0230026245117188,grad_norm: 0.760894110471461, iteration: 356425
loss: 0.9873828887939453,grad_norm: 0.772166870341072, iteration: 356426
loss: 1.0107386112213135,grad_norm: 0.7268737801021671, iteration: 356427
loss: 1.0025825500488281,grad_norm: 0.7438047956913968, iteration: 356428
loss: 0.9716290831565857,grad_norm: 0.9282120721440197, iteration: 356429
loss: 0.9825320839881897,grad_norm: 0.8105734552471091, iteration: 356430
loss: 1.0264396667480469,grad_norm: 0.9999999042927102, iteration: 356431
loss: 1.0752140283584595,grad_norm: 0.9999991852097677, iteration: 356432
loss: 0.999926745891571,grad_norm: 0.7706873843202078, iteration: 356433
loss: 0.9710736274719238,grad_norm: 0.7449477441963017, iteration: 356434
loss: 0.9511852264404297,grad_norm: 0.7615051889927225, iteration: 356435
loss: 1.0095267295837402,grad_norm: 0.792837574006289, iteration: 356436
loss: 0.9824656844139099,grad_norm: 0.850933630121574, iteration: 356437
loss: 1.0083212852478027,grad_norm: 0.9790642626721471, iteration: 356438
loss: 1.0318480730056763,grad_norm: 0.983773433712226, iteration: 356439
loss: 0.9990671277046204,grad_norm: 0.8176064273882507, iteration: 356440
loss: 1.0051463842391968,grad_norm: 0.757533616306523, iteration: 356441
loss: 1.0334410667419434,grad_norm: 0.7184108853660905, iteration: 356442
loss: 0.9905214309692383,grad_norm: 0.8698555240146134, iteration: 356443
loss: 1.0455516576766968,grad_norm: 0.9999991713875249, iteration: 356444
loss: 0.9914602041244507,grad_norm: 0.9038407942812757, iteration: 356445
loss: 1.0185902118682861,grad_norm: 0.7449972710176009, iteration: 356446
loss: 0.9926193952560425,grad_norm: 0.6636717156560267, iteration: 356447
loss: 1.0054559707641602,grad_norm: 0.9999992625402209, iteration: 356448
loss: 1.0451654195785522,grad_norm: 0.7881036322797528, iteration: 356449
loss: 0.9827331900596619,grad_norm: 0.9999992262480382, iteration: 356450
loss: 0.9908487796783447,grad_norm: 0.7846772181632313, iteration: 356451
loss: 0.9734293818473816,grad_norm: 0.7135052828496011, iteration: 356452
loss: 1.0474368333816528,grad_norm: 0.8026495158305479, iteration: 356453
loss: 1.0028517246246338,grad_norm: 0.8131283544903019, iteration: 356454
loss: 1.0121829509735107,grad_norm: 0.8333158034574224, iteration: 356455
loss: 1.0191959142684937,grad_norm: 0.8642930001504443, iteration: 356456
loss: 0.9651442766189575,grad_norm: 0.7045243388845841, iteration: 356457
loss: 0.9573320746421814,grad_norm: 0.9999992480486565, iteration: 356458
loss: 1.0005885362625122,grad_norm: 0.8841777456129866, iteration: 356459
loss: 1.0027623176574707,grad_norm: 0.7547188179986564, iteration: 356460
loss: 1.0034037828445435,grad_norm: 0.9590050443266024, iteration: 356461
loss: 0.9913902878761292,grad_norm: 0.8060464879428781, iteration: 356462
loss: 0.9857985377311707,grad_norm: 0.8754116358767152, iteration: 356463
loss: 1.0398763418197632,grad_norm: 0.7845036377480235, iteration: 356464
loss: 1.0200190544128418,grad_norm: 0.7494837603541475, iteration: 356465
loss: 0.9884899258613586,grad_norm: 0.981608908392317, iteration: 356466
loss: 1.008205771446228,grad_norm: 0.8313426676335285, iteration: 356467
loss: 0.9977611899375916,grad_norm: 0.8127658681064657, iteration: 356468
loss: 1.030132532119751,grad_norm: 0.861933415448543, iteration: 356469
loss: 1.0012283325195312,grad_norm: 0.8830121965250284, iteration: 356470
loss: 0.9821498990058899,grad_norm: 0.6663061206346689, iteration: 356471
loss: 1.0150338411331177,grad_norm: 0.9999998345394124, iteration: 356472
loss: 1.024916172027588,grad_norm: 0.9421520040194182, iteration: 356473
loss: 1.095567226409912,grad_norm: 0.9999998231871073, iteration: 356474
loss: 0.9941180348396301,grad_norm: 0.7541463543539847, iteration: 356475
loss: 1.0626115798950195,grad_norm: 0.999999950377125, iteration: 356476
loss: 1.0076756477355957,grad_norm: 0.7939116838677263, iteration: 356477
loss: 1.0150415897369385,grad_norm: 0.9258399194079807, iteration: 356478
loss: 1.0046826601028442,grad_norm: 0.7666769873607691, iteration: 356479
loss: 0.9708070158958435,grad_norm: 0.759367660156143, iteration: 356480
loss: 1.0416450500488281,grad_norm: 0.7876503582931027, iteration: 356481
loss: 0.9868150353431702,grad_norm: 0.7396762961973886, iteration: 356482
loss: 1.0013970136642456,grad_norm: 0.7553489678596654, iteration: 356483
loss: 1.0685323476791382,grad_norm: 0.9995003495653478, iteration: 356484
loss: 1.0009604692459106,grad_norm: 0.9999992351682156, iteration: 356485
loss: 0.9880051016807556,grad_norm: 0.7551251620191868, iteration: 356486
loss: 0.9948922991752625,grad_norm: 0.6979968992950734, iteration: 356487
loss: 0.9948306083679199,grad_norm: 0.9999993537033889, iteration: 356488
loss: 1.0485281944274902,grad_norm: 0.9999990519565572, iteration: 356489
loss: 0.9705418348312378,grad_norm: 0.9202140034089655, iteration: 356490
loss: 1.0225859880447388,grad_norm: 0.9500246931313924, iteration: 356491
loss: 0.9903210401535034,grad_norm: 0.7531140624291186, iteration: 356492
loss: 0.9630967974662781,grad_norm: 0.886104315871536, iteration: 356493
loss: 1.0746222734451294,grad_norm: 0.9999991382713193, iteration: 356494
loss: 1.0034804344177246,grad_norm: 0.7470893755372006, iteration: 356495
loss: 0.9756446480751038,grad_norm: 0.6706972575612202, iteration: 356496
loss: 0.9963716864585876,grad_norm: 0.783569939660504, iteration: 356497
loss: 0.9650295376777649,grad_norm: 0.962822877639496, iteration: 356498
loss: 1.0989269018173218,grad_norm: 0.9999996450696861, iteration: 356499
loss: 1.0183478593826294,grad_norm: 0.9070576142230774, iteration: 356500
loss: 1.0179487466812134,grad_norm: 0.9999991942587432, iteration: 356501
loss: 0.9951786994934082,grad_norm: 0.9835733154248177, iteration: 356502
loss: 1.0003947019577026,grad_norm: 0.8077268994706104, iteration: 356503
loss: 0.9980335831642151,grad_norm: 0.7453644277790227, iteration: 356504
loss: 0.9983845949172974,grad_norm: 0.7197322843728153, iteration: 356505
loss: 0.988402247428894,grad_norm: 0.8911459336211724, iteration: 356506
loss: 0.9780740141868591,grad_norm: 0.776180716303226, iteration: 356507
loss: 1.0488336086273193,grad_norm: 0.8443584627172463, iteration: 356508
loss: 0.9821904897689819,grad_norm: 0.8626560805253045, iteration: 356509
loss: 1.0403817892074585,grad_norm: 0.8510558247821913, iteration: 356510
loss: 1.0076600313186646,grad_norm: 0.9121195633088428, iteration: 356511
loss: 0.9832549095153809,grad_norm: 0.8639018860412604, iteration: 356512
loss: 0.9692943096160889,grad_norm: 0.7917199716495534, iteration: 356513
loss: 0.98649001121521,grad_norm: 0.9999991485660614, iteration: 356514
loss: 0.9935176968574524,grad_norm: 0.9824068979368483, iteration: 356515
loss: 0.9702833294868469,grad_norm: 0.8809953976026614, iteration: 356516
loss: 0.9793257117271423,grad_norm: 0.7747325201807186, iteration: 356517
loss: 1.0216658115386963,grad_norm: 0.7400251852563441, iteration: 356518
loss: 1.0490260124206543,grad_norm: 0.8853970625189849, iteration: 356519
loss: 1.0251007080078125,grad_norm: 0.933699224082105, iteration: 356520
loss: 0.992051362991333,grad_norm: 0.7967497157819852, iteration: 356521
loss: 1.019547700881958,grad_norm: 0.7474189909373486, iteration: 356522
loss: 1.0101838111877441,grad_norm: 0.8430473843954807, iteration: 356523
loss: 1.016303300857544,grad_norm: 0.8491046222089197, iteration: 356524
loss: 1.0452543497085571,grad_norm: 0.9999993438534587, iteration: 356525
loss: 1.0003321170806885,grad_norm: 0.8721976530513436, iteration: 356526
loss: 1.0069377422332764,grad_norm: 0.7358453153673389, iteration: 356527
loss: 0.9824281930923462,grad_norm: 0.756483625574543, iteration: 356528
loss: 0.9735319018363953,grad_norm: 0.6922354348608534, iteration: 356529
loss: 0.9966016411781311,grad_norm: 0.869519833183305, iteration: 356530
loss: 0.9905927777290344,grad_norm: 0.8323713264904851, iteration: 356531
loss: 1.0074414014816284,grad_norm: 0.681996417557922, iteration: 356532
loss: 0.9951640367507935,grad_norm: 0.830303881317443, iteration: 356533
loss: 1.0050389766693115,grad_norm: 0.7767748610688816, iteration: 356534
loss: 0.9769421219825745,grad_norm: 0.8326185391807441, iteration: 356535
loss: 0.998613715171814,grad_norm: 0.8283688839925282, iteration: 356536
loss: 1.0162477493286133,grad_norm: 0.999998980695595, iteration: 356537
loss: 1.043717384338379,grad_norm: 0.8114590823913419, iteration: 356538
loss: 1.0047374963760376,grad_norm: 0.8589231963441487, iteration: 356539
loss: 1.0058562755584717,grad_norm: 0.7714364237450114, iteration: 356540
loss: 0.978911817073822,grad_norm: 0.8413250175645126, iteration: 356541
loss: 0.9632710814476013,grad_norm: 0.8967338122038758, iteration: 356542
loss: 0.9777228832244873,grad_norm: 0.8402080758718744, iteration: 356543
loss: 0.9792206883430481,grad_norm: 0.9999992773855646, iteration: 356544
loss: 1.0078706741333008,grad_norm: 0.8640487976113741, iteration: 356545
loss: 0.9496651887893677,grad_norm: 0.81728349429283, iteration: 356546
loss: 0.9662739038467407,grad_norm: 0.9369670097463594, iteration: 356547
loss: 0.963742733001709,grad_norm: 0.7217914124474178, iteration: 356548
loss: 1.0165843963623047,grad_norm: 0.9999991865880359, iteration: 356549
loss: 0.9898432493209839,grad_norm: 0.8600877839768603, iteration: 356550
loss: 0.9855445027351379,grad_norm: 0.8307425649690926, iteration: 356551
loss: 0.9843977689743042,grad_norm: 0.9354497640545126, iteration: 356552
loss: 1.0181742906570435,grad_norm: 0.999999020872597, iteration: 356553
loss: 1.0378748178482056,grad_norm: 0.9999990271854341, iteration: 356554
loss: 0.9885013103485107,grad_norm: 0.7270281257155085, iteration: 356555
loss: 0.9955034255981445,grad_norm: 0.8436707726143041, iteration: 356556
loss: 0.999445915222168,grad_norm: 0.7435833427781895, iteration: 356557
loss: 1.02037513256073,grad_norm: 0.7644957942278471, iteration: 356558
loss: 0.9847733378410339,grad_norm: 0.9094043234936163, iteration: 356559
loss: 0.9871376156806946,grad_norm: 0.728965057857401, iteration: 356560
loss: 0.9856265783309937,grad_norm: 0.7334108926089092, iteration: 356561
loss: 0.9751918911933899,grad_norm: 0.7429185921322249, iteration: 356562
loss: 1.0046954154968262,grad_norm: 0.8599015585102907, iteration: 356563
loss: 0.9628946781158447,grad_norm: 0.8324888636364315, iteration: 356564
loss: 0.9826833605766296,grad_norm: 0.7820124891228419, iteration: 356565
loss: 1.0310879945755005,grad_norm: 0.8384570030642959, iteration: 356566
loss: 1.0323491096496582,grad_norm: 0.9076759105394585, iteration: 356567
loss: 1.036545753479004,grad_norm: 0.9999991799015332, iteration: 356568
loss: 1.0270401239395142,grad_norm: 0.9999993545982376, iteration: 356569
loss: 0.9560834169387817,grad_norm: 0.7819927939026805, iteration: 356570
loss: 1.0162638425827026,grad_norm: 0.7620568188435534, iteration: 356571
loss: 1.0156139135360718,grad_norm: 0.7557098787758855, iteration: 356572
loss: 1.0775508880615234,grad_norm: 0.8211080714363307, iteration: 356573
loss: 1.0269190073013306,grad_norm: 0.8728784439116726, iteration: 356574
loss: 1.0362701416015625,grad_norm: 0.9999990854235646, iteration: 356575
loss: 0.982570469379425,grad_norm: 0.8413690553414789, iteration: 356576
loss: 0.9913761615753174,grad_norm: 0.819427730805309, iteration: 356577
loss: 1.01984703540802,grad_norm: 0.6617063734006909, iteration: 356578
loss: 0.9634971618652344,grad_norm: 0.8983454838527528, iteration: 356579
loss: 1.0287506580352783,grad_norm: 0.7171862841208055, iteration: 356580
loss: 1.000661015510559,grad_norm: 0.8719905938836336, iteration: 356581
loss: 1.0149542093276978,grad_norm: 0.8360083060406801, iteration: 356582
loss: 0.9794624447822571,grad_norm: 0.9278599213518022, iteration: 356583
loss: 1.0300663709640503,grad_norm: 0.7901989218844682, iteration: 356584
loss: 1.0239845514297485,grad_norm: 0.9162885864234852, iteration: 356585
loss: 0.9822443127632141,grad_norm: 0.7981863969336376, iteration: 356586
loss: 0.9946422576904297,grad_norm: 0.9129076961360832, iteration: 356587
loss: 0.9462122321128845,grad_norm: 0.7935114403380579, iteration: 356588
loss: 1.0060014724731445,grad_norm: 0.8503514121863032, iteration: 356589
loss: 0.9944039583206177,grad_norm: 0.6994653900771707, iteration: 356590
loss: 1.0121581554412842,grad_norm: 0.8409948871625422, iteration: 356591
loss: 1.0485520362854004,grad_norm: 0.9999992287030087, iteration: 356592
loss: 0.9753936529159546,grad_norm: 0.8426441447446976, iteration: 356593
loss: 1.002211570739746,grad_norm: 0.9417787052100466, iteration: 356594
loss: 1.0043474435806274,grad_norm: 0.7460543756637704, iteration: 356595
loss: 1.0338751077651978,grad_norm: 0.9794572983856804, iteration: 356596
loss: 1.0357699394226074,grad_norm: 0.7982289263699399, iteration: 356597
loss: 0.985119640827179,grad_norm: 0.8628159976125214, iteration: 356598
loss: 1.136412262916565,grad_norm: 0.9999993448195507, iteration: 356599
loss: 1.0163042545318604,grad_norm: 0.6525569060023425, iteration: 356600
loss: 1.0089651346206665,grad_norm: 0.9999998486616225, iteration: 356601
loss: 0.9883443713188171,grad_norm: 0.999999088917228, iteration: 356602
loss: 1.0010038614273071,grad_norm: 0.8305630569755348, iteration: 356603
loss: 1.0151244401931763,grad_norm: 0.7826996608385532, iteration: 356604
loss: 0.973050594329834,grad_norm: 0.8205938233647261, iteration: 356605
loss: 1.0074615478515625,grad_norm: 0.7098451424671464, iteration: 356606
loss: 0.9729606509208679,grad_norm: 0.7761205207313754, iteration: 356607
loss: 0.9735274910926819,grad_norm: 0.8787208065554167, iteration: 356608
loss: 0.9992888569831848,grad_norm: 0.9238493783496465, iteration: 356609
loss: 1.0000108480453491,grad_norm: 0.8122378681511893, iteration: 356610
loss: 0.9912204146385193,grad_norm: 0.7071040811375825, iteration: 356611
loss: 1.031127691268921,grad_norm: 0.7278480856539309, iteration: 356612
loss: 0.9663844108581543,grad_norm: 0.7276869437312622, iteration: 356613
loss: 0.9777706861495972,grad_norm: 0.8231509203859178, iteration: 356614
loss: 1.0286219120025635,grad_norm: 0.9348117328919238, iteration: 356615
loss: 1.0112124681472778,grad_norm: 0.7398250640044041, iteration: 356616
loss: 1.0430119037628174,grad_norm: 0.8782618526779247, iteration: 356617
loss: 0.9928129315376282,grad_norm: 0.7761544658055889, iteration: 356618
loss: 0.9760206341743469,grad_norm: 0.7962733009839029, iteration: 356619
loss: 1.0144046545028687,grad_norm: 0.8929143681148133, iteration: 356620
loss: 0.9583945274353027,grad_norm: 0.9059172667378523, iteration: 356621
loss: 0.9930440783500671,grad_norm: 0.8480446107732472, iteration: 356622
loss: 1.0007127523422241,grad_norm: 0.787184212756662, iteration: 356623
loss: 1.011675477027893,grad_norm: 0.779735596223683, iteration: 356624
loss: 0.9803255796432495,grad_norm: 0.9643657900826549, iteration: 356625
loss: 1.0209035873413086,grad_norm: 0.7618200308242851, iteration: 356626
loss: 1.085255742073059,grad_norm: 0.9999996981653279, iteration: 356627
loss: 1.0312998294830322,grad_norm: 0.9343416745193089, iteration: 356628
loss: 1.0086474418640137,grad_norm: 0.8412349769266784, iteration: 356629
loss: 0.9859645962715149,grad_norm: 0.81491082983781, iteration: 356630
loss: 1.0552055835723877,grad_norm: 0.7445865580041524, iteration: 356631
loss: 0.9764306545257568,grad_norm: 0.8976714921192118, iteration: 356632
loss: 0.973646342754364,grad_norm: 0.9581510673212953, iteration: 356633
loss: 0.9811023473739624,grad_norm: 0.9760936043191842, iteration: 356634
loss: 1.038082242012024,grad_norm: 0.9984338230767531, iteration: 356635
loss: 1.0049484968185425,grad_norm: 0.9106435046110696, iteration: 356636
loss: 0.9959297180175781,grad_norm: 0.8058242499571513, iteration: 356637
loss: 0.9882868528366089,grad_norm: 0.7807388613807856, iteration: 356638
loss: 0.986320972442627,grad_norm: 0.9999992422841211, iteration: 356639
loss: 1.008345603942871,grad_norm: 0.9999990920473344, iteration: 356640
loss: 1.0081011056900024,grad_norm: 0.7447702574828232, iteration: 356641
loss: 0.979210376739502,grad_norm: 0.7628436975821534, iteration: 356642
loss: 1.059931993484497,grad_norm: 0.999999331189265, iteration: 356643
loss: 1.0250461101531982,grad_norm: 0.8211920807721297, iteration: 356644
loss: 0.9917886257171631,grad_norm: 0.8256380957725349, iteration: 356645
loss: 0.9909905791282654,grad_norm: 0.6739950248651925, iteration: 356646
loss: 0.991631031036377,grad_norm: 0.7300844802760176, iteration: 356647
loss: 0.9918220043182373,grad_norm: 0.9999995879102841, iteration: 356648
loss: 1.017691731452942,grad_norm: 0.7530049065060352, iteration: 356649
loss: 0.9968467950820923,grad_norm: 0.7992031273455877, iteration: 356650
loss: 0.992116391658783,grad_norm: 0.6618621939808459, iteration: 356651
loss: 0.9931861758232117,grad_norm: 0.8603531893424772, iteration: 356652
loss: 1.09719979763031,grad_norm: 0.9134598068662914, iteration: 356653
loss: 1.0279662609100342,grad_norm: 0.7583921169668695, iteration: 356654
loss: 1.001709222793579,grad_norm: 0.8644984689004944, iteration: 356655
loss: 0.9505748748779297,grad_norm: 0.844754910230738, iteration: 356656
loss: 1.0339529514312744,grad_norm: 0.9999992622666718, iteration: 356657
loss: 0.998759925365448,grad_norm: 0.8986773829108998, iteration: 356658
loss: 1.010843276977539,grad_norm: 0.8424866466274744, iteration: 356659
loss: 0.9976471066474915,grad_norm: 0.8046245726604256, iteration: 356660
loss: 1.0735371112823486,grad_norm: 0.93332425655058, iteration: 356661
loss: 1.0214568376541138,grad_norm: 0.7811606415145477, iteration: 356662
loss: 0.9996134638786316,grad_norm: 0.9670045299319364, iteration: 356663
loss: 1.0120280981063843,grad_norm: 0.6893998174813283, iteration: 356664
loss: 1.0318752527236938,grad_norm: 0.799929337069559, iteration: 356665
loss: 0.970604658126831,grad_norm: 0.7142081902268075, iteration: 356666
loss: 1.014041781425476,grad_norm: 0.9023154648162348, iteration: 356667
loss: 0.9844653606414795,grad_norm: 0.8553235415848602, iteration: 356668
loss: 1.026760458946228,grad_norm: 0.7897988981498849, iteration: 356669
loss: 1.0128785371780396,grad_norm: 0.736923742815773, iteration: 356670
loss: 0.9816311001777649,grad_norm: 0.8255633666046944, iteration: 356671
loss: 1.0030125379562378,grad_norm: 0.7249410089381091, iteration: 356672
loss: 1.0270084142684937,grad_norm: 0.6659758535778074, iteration: 356673
loss: 0.9797919988632202,grad_norm: 0.9418351732339574, iteration: 356674
loss: 1.0241347551345825,grad_norm: 0.7963151588693324, iteration: 356675
loss: 1.0643850564956665,grad_norm: 0.999999445944616, iteration: 356676
loss: 0.9864818453788757,grad_norm: 0.8431256875725729, iteration: 356677
loss: 1.0307421684265137,grad_norm: 0.8984406354932822, iteration: 356678
loss: 1.0221681594848633,grad_norm: 0.874901434598715, iteration: 356679
loss: 1.0047237873077393,grad_norm: 0.8770387476733295, iteration: 356680
loss: 1.0376330614089966,grad_norm: 0.9999997086474866, iteration: 356681
loss: 0.957058846950531,grad_norm: 0.7572409350954373, iteration: 356682
loss: 1.0401087999343872,grad_norm: 0.7712891144435831, iteration: 356683
loss: 1.061970591545105,grad_norm: 0.8266858894937983, iteration: 356684
loss: 1.0161982774734497,grad_norm: 0.7208874643486257, iteration: 356685
loss: 0.9826944470405579,grad_norm: 0.7554480561593745, iteration: 356686
loss: 1.005539894104004,grad_norm: 0.8010922590040682, iteration: 356687
loss: 1.0399258136749268,grad_norm: 0.9999998765538136, iteration: 356688
loss: 0.9772949814796448,grad_norm: 0.8084433341025002, iteration: 356689
loss: 0.9719347953796387,grad_norm: 0.830859819785181, iteration: 356690
loss: 0.9940444827079773,grad_norm: 0.6976346349299647, iteration: 356691
loss: 1.0031358003616333,grad_norm: 0.9999991768234258, iteration: 356692
loss: 0.9932311177253723,grad_norm: 0.6710272906634104, iteration: 356693
loss: 0.9932610392570496,grad_norm: 0.8775567017718475, iteration: 356694
loss: 1.0022796392440796,grad_norm: 0.9999999273901866, iteration: 356695
loss: 1.0389854907989502,grad_norm: 0.9999991947498647, iteration: 356696
loss: 1.0196903944015503,grad_norm: 0.7842010478287555, iteration: 356697
loss: 1.0014334917068481,grad_norm: 0.7736319758881508, iteration: 356698
loss: 1.0342105627059937,grad_norm: 0.8176187731086861, iteration: 356699
loss: 0.9822217226028442,grad_norm: 0.7668895692858534, iteration: 356700
loss: 0.9717221856117249,grad_norm: 0.6201523280319295, iteration: 356701
loss: 0.9730193614959717,grad_norm: 0.6727115372513535, iteration: 356702
loss: 0.9901567101478577,grad_norm: 0.6267552025557672, iteration: 356703
loss: 0.97419273853302,grad_norm: 0.8142416940353306, iteration: 356704
loss: 0.9659479260444641,grad_norm: 0.7546107671066208, iteration: 356705
loss: 0.9930045008659363,grad_norm: 0.7829753677246556, iteration: 356706
loss: 0.950370728969574,grad_norm: 0.6920362538109328, iteration: 356707
loss: 1.01157808303833,grad_norm: 0.7495054518768172, iteration: 356708
loss: 1.0153391361236572,grad_norm: 0.7552556885933043, iteration: 356709
loss: 1.0036544799804688,grad_norm: 0.9999997413045625, iteration: 356710
loss: 0.9863318204879761,grad_norm: 0.7633730186114696, iteration: 356711
loss: 1.0060163736343384,grad_norm: 0.8230680336793735, iteration: 356712
loss: 1.0115948915481567,grad_norm: 0.999999333640834, iteration: 356713
loss: 1.0052363872528076,grad_norm: 0.7619608858059228, iteration: 356714
loss: 0.997332751750946,grad_norm: 0.9687363588291678, iteration: 356715
loss: 1.0100650787353516,grad_norm: 0.7935024939135797, iteration: 356716
loss: 0.9784899353981018,grad_norm: 0.8014817029520448, iteration: 356717
loss: 1.013257622718811,grad_norm: 0.8060907928234242, iteration: 356718
loss: 1.017150640487671,grad_norm: 0.9248851879127755, iteration: 356719
loss: 0.9784387350082397,grad_norm: 0.8427374703292992, iteration: 356720
loss: 0.9780770540237427,grad_norm: 0.870045070406354, iteration: 356721
loss: 0.99674391746521,grad_norm: 0.6946681790936676, iteration: 356722
loss: 1.062896490097046,grad_norm: 0.9999993959443269, iteration: 356723
loss: 1.0047001838684082,grad_norm: 0.8751341747906652, iteration: 356724
loss: 0.9877769947052002,grad_norm: 0.772071877400931, iteration: 356725
loss: 0.9975989460945129,grad_norm: 0.7191546211664762, iteration: 356726
loss: 0.9588862061500549,grad_norm: 0.7555412878860891, iteration: 356727
loss: 1.0122629404067993,grad_norm: 0.8570691645863179, iteration: 356728
loss: 0.9938971996307373,grad_norm: 0.8294918627841188, iteration: 356729
loss: 1.0270074605941772,grad_norm: 0.8344927323914363, iteration: 356730
loss: 1.0082907676696777,grad_norm: 0.7765473089677261, iteration: 356731
loss: 1.001304030418396,grad_norm: 0.8631265554533637, iteration: 356732
loss: 0.9758840799331665,grad_norm: 0.9999989937729145, iteration: 356733
loss: 0.983794093132019,grad_norm: 0.8012710961096211, iteration: 356734
loss: 1.0033608675003052,grad_norm: 0.6222578451255533, iteration: 356735
loss: 1.0128252506256104,grad_norm: 0.7906328882908138, iteration: 356736
loss: 1.0007160902023315,grad_norm: 0.7972242101692534, iteration: 356737
loss: 0.996455729007721,grad_norm: 0.832371329919075, iteration: 356738
loss: 1.0414204597473145,grad_norm: 0.8392733863848252, iteration: 356739
loss: 1.0336182117462158,grad_norm: 0.8715028269646218, iteration: 356740
loss: 1.0214276313781738,grad_norm: 0.8157680605376939, iteration: 356741
loss: 1.0496001243591309,grad_norm: 0.9999996521834461, iteration: 356742
loss: 0.988193154335022,grad_norm: 0.7339657676045361, iteration: 356743
loss: 1.030571699142456,grad_norm: 0.9230333951959566, iteration: 356744
loss: 1.0093737840652466,grad_norm: 0.7622625682990523, iteration: 356745
loss: 0.9511197805404663,grad_norm: 0.838567045301236, iteration: 356746
loss: 0.9719398021697998,grad_norm: 0.9343683818870689, iteration: 356747
loss: 1.0206351280212402,grad_norm: 0.9999991792617539, iteration: 356748
loss: 0.9958911538124084,grad_norm: 0.9999990676192843, iteration: 356749
loss: 1.0646312236785889,grad_norm: 0.8347193844357076, iteration: 356750
loss: 0.9905966520309448,grad_norm: 0.9490610098056558, iteration: 356751
loss: 0.9909733533859253,grad_norm: 0.7258793681897138, iteration: 356752
loss: 1.0957882404327393,grad_norm: 0.9999993391774745, iteration: 356753
loss: 1.034231424331665,grad_norm: 0.8450083972021596, iteration: 356754
loss: 0.9688059687614441,grad_norm: 0.9511807235290063, iteration: 356755
loss: 0.9730026721954346,grad_norm: 0.850074069888477, iteration: 356756
loss: 1.0985291004180908,grad_norm: 0.7959690799120834, iteration: 356757
loss: 0.9676277041435242,grad_norm: 0.8244626128943817, iteration: 356758
loss: 1.0538281202316284,grad_norm: 0.999999704533841, iteration: 356759
loss: 0.9765128493309021,grad_norm: 0.8841797210508613, iteration: 356760
loss: 0.9965981841087341,grad_norm: 0.882582784789089, iteration: 356761
loss: 0.974501371383667,grad_norm: 0.7429977509422362, iteration: 356762
loss: 1.0073484182357788,grad_norm: 0.873945609023759, iteration: 356763
loss: 0.9945948123931885,grad_norm: 0.7735895134190365, iteration: 356764
loss: 0.9869082570075989,grad_norm: 0.7811413471083412, iteration: 356765
loss: 1.0545268058776855,grad_norm: 0.8784071355896734, iteration: 356766
loss: 1.0704466104507446,grad_norm: 0.8362636509501625, iteration: 356767
loss: 1.0846807956695557,grad_norm: 0.9961981658027067, iteration: 356768
loss: 1.0072124004364014,grad_norm: 0.8157953440667971, iteration: 356769
loss: 0.980460524559021,grad_norm: 0.8305235330421733, iteration: 356770
loss: 1.0134944915771484,grad_norm: 0.8375066809194605, iteration: 356771
loss: 1.0256059169769287,grad_norm: 0.8647200139040968, iteration: 356772
loss: 1.1477166414260864,grad_norm: 0.9999990895232327, iteration: 356773
loss: 0.9980342984199524,grad_norm: 0.7867820743233688, iteration: 356774
loss: 1.0427297353744507,grad_norm: 0.999999119053868, iteration: 356775
loss: 1.0305423736572266,grad_norm: 0.8343425012746665, iteration: 356776
loss: 0.9784969091415405,grad_norm: 0.7523130623567592, iteration: 356777
loss: 1.1547147035598755,grad_norm: 0.7916206601298521, iteration: 356778
loss: 1.0077853202819824,grad_norm: 0.8993492519989115, iteration: 356779
loss: 0.9734888672828674,grad_norm: 0.6870357056035126, iteration: 356780
loss: 0.9629068970680237,grad_norm: 0.9999994041310195, iteration: 356781
loss: 0.9853973388671875,grad_norm: 0.9999990535668306, iteration: 356782
loss: 1.019193172454834,grad_norm: 0.9999991124164759, iteration: 356783
loss: 0.9995825886726379,grad_norm: 0.8465915246786483, iteration: 356784
loss: 1.0077544450759888,grad_norm: 0.8911944979875119, iteration: 356785
loss: 1.0090991258621216,grad_norm: 0.8940366527112621, iteration: 356786
loss: 1.0818543434143066,grad_norm: 0.8269949159757789, iteration: 356787
loss: 0.9742357134819031,grad_norm: 0.8654423386234356, iteration: 356788
loss: 1.0333225727081299,grad_norm: 0.9972301694108048, iteration: 356789
loss: 1.0334159135818481,grad_norm: 0.7981547835489956, iteration: 356790
loss: 1.0255159139633179,grad_norm: 0.9975838322915682, iteration: 356791
loss: 0.9921486377716064,grad_norm: 0.9629613774583331, iteration: 356792
loss: 0.9844018220901489,grad_norm: 0.9999990241666767, iteration: 356793
loss: 1.0679315328598022,grad_norm: 0.9999991738572274, iteration: 356794
loss: 0.9875639081001282,grad_norm: 0.9999993356972695, iteration: 356795
loss: 1.0536381006240845,grad_norm: 0.8634958894840842, iteration: 356796
loss: 1.0241336822509766,grad_norm: 0.94360006487151, iteration: 356797
loss: 1.0061808824539185,grad_norm: 0.8791735070472876, iteration: 356798
loss: 0.9939106106758118,grad_norm: 0.894649204808705, iteration: 356799
loss: 1.0690613985061646,grad_norm: 0.9999993054436876, iteration: 356800
loss: 1.139622449874878,grad_norm: 0.9022982112416423, iteration: 356801
loss: 1.0416481494903564,grad_norm: 0.8860072573379216, iteration: 356802
loss: 1.1327341794967651,grad_norm: 0.8994227262303041, iteration: 356803
loss: 1.0910331010818481,grad_norm: 0.7639429368250918, iteration: 356804
loss: 1.1414631605148315,grad_norm: 0.9802050804521373, iteration: 356805
loss: 1.0690104961395264,grad_norm: 0.7878071412571928, iteration: 356806
loss: 1.0254946947097778,grad_norm: 0.8371215229517296, iteration: 356807
loss: 1.0450692176818848,grad_norm: 0.951799658961685, iteration: 356808
loss: 1.096744179725647,grad_norm: 0.990126064909976, iteration: 356809
loss: 1.001396656036377,grad_norm: 0.9091657715847462, iteration: 356810
loss: 1.0311719179153442,grad_norm: 0.8349912122149592, iteration: 356811
loss: 1.151878833770752,grad_norm: 0.9175094614963468, iteration: 356812
loss: 1.0656466484069824,grad_norm: 0.9431508351615828, iteration: 356813
loss: 1.0027700662612915,grad_norm: 0.9317405315873244, iteration: 356814
loss: 1.0200793743133545,grad_norm: 0.891457982387592, iteration: 356815
loss: 0.9677368402481079,grad_norm: 0.9191744552015163, iteration: 356816
loss: 1.003245234489441,grad_norm: 0.7757841164652175, iteration: 356817
loss: 0.9520825147628784,grad_norm: 0.8322314594897793, iteration: 356818
loss: 1.108534812927246,grad_norm: 0.9122235874525769, iteration: 356819
loss: 1.0917147397994995,grad_norm: 0.9999995572815105, iteration: 356820
loss: 0.9948415756225586,grad_norm: 0.7639530418483971, iteration: 356821
loss: 1.046958088874817,grad_norm: 0.8059778185711025, iteration: 356822
loss: 1.0052131414413452,grad_norm: 0.7876444511163088, iteration: 356823
loss: 1.1172127723693848,grad_norm: 0.9685832654576488, iteration: 356824
loss: 0.9974179267883301,grad_norm: 0.8360690992757278, iteration: 356825
loss: 0.9794647097587585,grad_norm: 0.8204847903976282, iteration: 356826
loss: 1.0100113153457642,grad_norm: 0.7549367405801133, iteration: 356827
loss: 1.035980463027954,grad_norm: 0.9999994764196266, iteration: 356828
loss: 0.9953054189682007,grad_norm: 0.8348649558276375, iteration: 356829
loss: 1.061730980873108,grad_norm: 0.7393323729523729, iteration: 356830
loss: 1.0989623069763184,grad_norm: 0.9999998397099414, iteration: 356831
loss: 1.0142674446105957,grad_norm: 0.7617170788043456, iteration: 356832
loss: 1.0731102228164673,grad_norm: 0.8272633483717342, iteration: 356833
loss: 1.0264264345169067,grad_norm: 0.945361993648397, iteration: 356834
loss: 1.0296428203582764,grad_norm: 0.8513587811922722, iteration: 356835
loss: 1.0284698009490967,grad_norm: 0.8657389714278503, iteration: 356836
loss: 1.0117721557617188,grad_norm: 0.8364227243999736, iteration: 356837
loss: 1.0067229270935059,grad_norm: 0.9676362896304825, iteration: 356838
loss: 0.982478141784668,grad_norm: 0.8134225616843811, iteration: 356839
loss: 1.0110604763031006,grad_norm: 0.9999990784670372, iteration: 356840
loss: 1.0115762948989868,grad_norm: 0.9610446423685499, iteration: 356841
loss: 1.0275002717971802,grad_norm: 0.717406897586179, iteration: 356842
loss: 1.0328524112701416,grad_norm: 0.8991971886685001, iteration: 356843
loss: 1.0139975547790527,grad_norm: 0.8417803867743489, iteration: 356844
loss: 1.0475469827651978,grad_norm: 0.8055822988324657, iteration: 356845
loss: 0.9817672967910767,grad_norm: 0.9184024128752959, iteration: 356846
loss: 0.9639679789543152,grad_norm: 0.8504472339252943, iteration: 356847
loss: 1.0417143106460571,grad_norm: 0.7646258622287321, iteration: 356848
loss: 1.0326625108718872,grad_norm: 0.80103290975765, iteration: 356849
loss: 1.0730606317520142,grad_norm: 0.9999993179024013, iteration: 356850
loss: 0.9716033935546875,grad_norm: 0.9481747248108224, iteration: 356851
loss: 0.9677253365516663,grad_norm: 0.9999991464784099, iteration: 356852
loss: 0.9968786835670471,grad_norm: 0.9965785487556142, iteration: 356853
loss: 1.0407967567443848,grad_norm: 0.8506045805491347, iteration: 356854
loss: 1.0383918285369873,grad_norm: 0.9049228893726934, iteration: 356855
loss: 0.9918915629386902,grad_norm: 0.755522996577372, iteration: 356856
loss: 0.9734529852867126,grad_norm: 0.933140148605487, iteration: 356857
loss: 0.9951152205467224,grad_norm: 0.7958169805518348, iteration: 356858
loss: 1.011476993560791,grad_norm: 0.785860462765356, iteration: 356859
loss: 0.9819432497024536,grad_norm: 0.7820127069452979, iteration: 356860
loss: 1.0246778726577759,grad_norm: 0.9999994421318605, iteration: 356861
loss: 1.0663847923278809,grad_norm: 0.9999992801616449, iteration: 356862
loss: 1.0550382137298584,grad_norm: 0.8560631621112957, iteration: 356863
loss: 0.9994670748710632,grad_norm: 0.9999992465538297, iteration: 356864
loss: 1.048959732055664,grad_norm: 0.7529370139453885, iteration: 356865
loss: 1.000657320022583,grad_norm: 0.9999991588467616, iteration: 356866
loss: 1.0209598541259766,grad_norm: 0.9706059338412442, iteration: 356867
loss: 1.0414713621139526,grad_norm: 0.7114600657437146, iteration: 356868
loss: 0.9977309107780457,grad_norm: 0.7893912429416673, iteration: 356869
loss: 1.022341251373291,grad_norm: 0.75730418795023, iteration: 356870
loss: 1.0144752264022827,grad_norm: 0.8940240605690558, iteration: 356871
loss: 1.0570732355117798,grad_norm: 0.9999999255759616, iteration: 356872
loss: 1.0611608028411865,grad_norm: 0.9260807942141572, iteration: 356873
loss: 1.0873078107833862,grad_norm: 0.9999993094144096, iteration: 356874
loss: 0.9998065829277039,grad_norm: 0.8284472229082817, iteration: 356875
loss: 1.0161792039871216,grad_norm: 0.8965524278678726, iteration: 356876
loss: 1.0928574800491333,grad_norm: 0.9999991283720469, iteration: 356877
loss: 1.1937131881713867,grad_norm: 0.9999995870036799, iteration: 356878
loss: 1.1052889823913574,grad_norm: 0.9999991795490695, iteration: 356879
loss: 1.006064534187317,grad_norm: 0.863581919478827, iteration: 356880
loss: 1.003890037536621,grad_norm: 0.8969234944864473, iteration: 356881
loss: 1.1534149646759033,grad_norm: 0.9999995224972018, iteration: 356882
loss: 0.9778695702552795,grad_norm: 0.7418382837811253, iteration: 356883
loss: 1.0148848295211792,grad_norm: 0.9096272376045658, iteration: 356884
loss: 0.9898134469985962,grad_norm: 0.7605482946245723, iteration: 356885
loss: 0.9995049238204956,grad_norm: 0.8169721189291339, iteration: 356886
loss: 1.0214205980300903,grad_norm: 0.8662916507654982, iteration: 356887
loss: 1.0764577388763428,grad_norm: 0.8780808765930886, iteration: 356888
loss: 1.0674389600753784,grad_norm: 0.999999669052945, iteration: 356889
loss: 0.9540396332740784,grad_norm: 0.9817988573368213, iteration: 356890
loss: 1.0041059255599976,grad_norm: 0.842038246448489, iteration: 356891
loss: 1.0883182287216187,grad_norm: 0.9999998980547892, iteration: 356892
loss: 1.027894377708435,grad_norm: 0.9425497619598207, iteration: 356893
loss: 1.0635780096054077,grad_norm: 0.8589730104543888, iteration: 356894
loss: 0.991402268409729,grad_norm: 0.800041019438785, iteration: 356895
loss: 0.9978396892547607,grad_norm: 0.7944832798684353, iteration: 356896
loss: 1.0131152868270874,grad_norm: 0.9999991146380705, iteration: 356897
loss: 1.0628547668457031,grad_norm: 0.7443383043744304, iteration: 356898
loss: 0.9868196249008179,grad_norm: 0.8148996325182662, iteration: 356899
loss: 0.9670846462249756,grad_norm: 0.8734969888107023, iteration: 356900
loss: 0.9864967465400696,grad_norm: 0.9999997714178958, iteration: 356901
loss: 0.9912683367729187,grad_norm: 0.9042893176164463, iteration: 356902
loss: 0.9820183515548706,grad_norm: 0.7290547943548162, iteration: 356903
loss: 1.0662329196929932,grad_norm: 0.8012683984408863, iteration: 356904
loss: 0.9891692399978638,grad_norm: 0.8818531778567846, iteration: 356905
loss: 1.0043307542800903,grad_norm: 0.8827260848910145, iteration: 356906
loss: 1.039260745048523,grad_norm: 0.9579123289010839, iteration: 356907
loss: 1.0174347162246704,grad_norm: 0.924028063321346, iteration: 356908
loss: 0.9961838722229004,grad_norm: 0.6877596024398002, iteration: 356909
loss: 1.3465169668197632,grad_norm: 0.9999994307763477, iteration: 356910
loss: 1.1005616188049316,grad_norm: 0.9999994996434493, iteration: 356911
loss: 1.0090128183364868,grad_norm: 0.9909335172296141, iteration: 356912
loss: 0.9955180883407593,grad_norm: 0.7084642410024123, iteration: 356913
loss: 1.044936180114746,grad_norm: 0.9339450979389652, iteration: 356914
loss: 0.9514400362968445,grad_norm: 0.9019464874535721, iteration: 356915
loss: 1.0190589427947998,grad_norm: 0.8472885252930276, iteration: 356916
loss: 1.0057436227798462,grad_norm: 0.8164930516887396, iteration: 356917
loss: 1.0058488845825195,grad_norm: 0.9065477274726391, iteration: 356918
loss: 1.0469671487808228,grad_norm: 0.8656412473372747, iteration: 356919
loss: 1.1246685981750488,grad_norm: 0.8413091194041894, iteration: 356920
loss: 0.9624807834625244,grad_norm: 0.8042390088430302, iteration: 356921
loss: 1.0145779848098755,grad_norm: 0.8271261182700036, iteration: 356922
loss: 1.0098178386688232,grad_norm: 0.9999998302395843, iteration: 356923
loss: 1.01093590259552,grad_norm: 0.9870795067257363, iteration: 356924
loss: 1.0403367280960083,grad_norm: 0.9999996036319061, iteration: 356925
loss: 1.0156599283218384,grad_norm: 0.9161240334999233, iteration: 356926
loss: 1.0058386325836182,grad_norm: 0.9095901504369568, iteration: 356927
loss: 1.0475938320159912,grad_norm: 0.8466029582609346, iteration: 356928
loss: 0.9897705316543579,grad_norm: 0.8932682883798936, iteration: 356929
loss: 0.9928258657455444,grad_norm: 0.8322791016237773, iteration: 356930
loss: 1.025108814239502,grad_norm: 0.8147743971131487, iteration: 356931
loss: 1.072715401649475,grad_norm: 0.786811102510177, iteration: 356932
loss: 1.0793954133987427,grad_norm: 0.856190558355644, iteration: 356933
loss: 1.0758472681045532,grad_norm: 0.9197097079307145, iteration: 356934
loss: 1.1111315488815308,grad_norm: 0.7724422266400862, iteration: 356935
loss: 1.0222219228744507,grad_norm: 0.8651131681704336, iteration: 356936
loss: 0.9662690162658691,grad_norm: 0.7372413293545098, iteration: 356937
loss: 1.0811922550201416,grad_norm: 0.9999992362737367, iteration: 356938
loss: 0.9935523867607117,grad_norm: 0.8766532958114587, iteration: 356939
loss: 0.9576499462127686,grad_norm: 0.8758241636356566, iteration: 356940
loss: 0.9807995557785034,grad_norm: 0.833306603920762, iteration: 356941
loss: 0.9882853031158447,grad_norm: 0.9999992640737846, iteration: 356942
loss: 1.0139373540878296,grad_norm: 0.765050528412574, iteration: 356943
loss: 1.0499212741851807,grad_norm: 0.9999991708318484, iteration: 356944
loss: 0.9368440508842468,grad_norm: 0.9227946651736629, iteration: 356945
loss: 1.0106388330459595,grad_norm: 0.9766691383217383, iteration: 356946
loss: 1.0026289224624634,grad_norm: 0.8090357824408574, iteration: 356947
loss: 1.0174506902694702,grad_norm: 0.7955670829619197, iteration: 356948
loss: 1.0004472732543945,grad_norm: 0.7685063903427666, iteration: 356949
loss: 1.0314795970916748,grad_norm: 0.7258744145262022, iteration: 356950
loss: 1.0958139896392822,grad_norm: 0.8054456228370828, iteration: 356951
loss: 1.081011414527893,grad_norm: 0.8397294204999715, iteration: 356952
loss: 1.045894980430603,grad_norm: 0.8190639904578535, iteration: 356953
loss: 1.0021874904632568,grad_norm: 0.7964068412008619, iteration: 356954
loss: 0.9783729910850525,grad_norm: 0.7800418275997623, iteration: 356955
loss: 0.9936181902885437,grad_norm: 0.9999991753847499, iteration: 356956
loss: 0.9970158338546753,grad_norm: 0.7038106995830978, iteration: 356957
loss: 1.1690421104431152,grad_norm: 0.9999996964634593, iteration: 356958
loss: 1.0607784986495972,grad_norm: 0.8091541568029563, iteration: 356959
loss: 1.0444103479385376,grad_norm: 0.8269354913062419, iteration: 356960
loss: 1.0884833335876465,grad_norm: 0.9999993349905947, iteration: 356961
loss: 1.015533685684204,grad_norm: 0.9463146085860549, iteration: 356962
loss: 1.0889825820922852,grad_norm: 0.9971029021294683, iteration: 356963
loss: 1.0424072742462158,grad_norm: 0.8899727627824395, iteration: 356964
loss: 1.0236804485321045,grad_norm: 0.8515866703880892, iteration: 356965
loss: 1.0700981616973877,grad_norm: 0.740702061481126, iteration: 356966
loss: 0.9930568337440491,grad_norm: 0.7731053023267144, iteration: 356967
loss: 1.0340776443481445,grad_norm: 0.9999999568207626, iteration: 356968
loss: 1.0629494190216064,grad_norm: 0.9311033640319883, iteration: 356969
loss: 1.0485798120498657,grad_norm: 0.9999989261212608, iteration: 356970
loss: 0.9841920733451843,grad_norm: 0.9999990678304849, iteration: 356971
loss: 1.0838727951049805,grad_norm: 0.7416045272149788, iteration: 356972
loss: 1.0034633874893188,grad_norm: 0.8341640793119632, iteration: 356973
loss: 1.0055930614471436,grad_norm: 0.9168685666179783, iteration: 356974
loss: 1.0143685340881348,grad_norm: 0.9856308242434114, iteration: 356975
loss: 1.0296289920806885,grad_norm: 0.7512378190523261, iteration: 356976
loss: 1.0391982793807983,grad_norm: 0.8841950424440576, iteration: 356977
loss: 1.0668716430664062,grad_norm: 0.775867212950422, iteration: 356978
loss: 1.062978744506836,grad_norm: 0.9298454631407826, iteration: 356979
loss: 1.0514841079711914,grad_norm: 0.9999992838842074, iteration: 356980
loss: 1.0151972770690918,grad_norm: 0.8885273835241754, iteration: 356981
loss: 1.1395761966705322,grad_norm: 0.9999991011447259, iteration: 356982
loss: 0.9973484873771667,grad_norm: 0.8332406263832247, iteration: 356983
loss: 1.2158706188201904,grad_norm: 0.9999990234709986, iteration: 356984
loss: 1.1321055889129639,grad_norm: 0.9999991519413846, iteration: 356985
loss: 0.9976881146430969,grad_norm: 0.9397576925467755, iteration: 356986
loss: 0.9786756038665771,grad_norm: 0.8086840113817778, iteration: 356987
loss: 1.015566110610962,grad_norm: 0.8582455336448566, iteration: 356988
loss: 1.248958945274353,grad_norm: 0.9883708549711412, iteration: 356989
loss: 1.0664701461791992,grad_norm: 0.9999991430345851, iteration: 356990
loss: 1.0172755718231201,grad_norm: 0.7923368975674204, iteration: 356991
loss: 1.027023434638977,grad_norm: 0.9462623524928013, iteration: 356992
loss: 0.9658390283584595,grad_norm: 0.8247197087117405, iteration: 356993
loss: 0.9781555533409119,grad_norm: 0.7493869731371755, iteration: 356994
loss: 0.9730563163757324,grad_norm: 0.9513221700349894, iteration: 356995
loss: 1.056403398513794,grad_norm: 0.9192271447091708, iteration: 356996
loss: 1.0345298051834106,grad_norm: 0.8224892742075066, iteration: 356997
loss: 1.0654339790344238,grad_norm: 0.9837599623435969, iteration: 356998
loss: 1.0850344896316528,grad_norm: 0.9999995510296699, iteration: 356999
loss: 0.99159175157547,grad_norm: 0.7449539617915824, iteration: 357000
loss: 0.9750877618789673,grad_norm: 0.7201429866610065, iteration: 357001
loss: 1.0472164154052734,grad_norm: 0.8366579686930219, iteration: 357002
loss: 0.9701018929481506,grad_norm: 0.8885340599049562, iteration: 357003
loss: 1.034264326095581,grad_norm: 0.8532742617318653, iteration: 357004
loss: 1.0203354358673096,grad_norm: 0.9999989335948035, iteration: 357005
loss: 0.9770925045013428,grad_norm: 0.8533180870773198, iteration: 357006
loss: 0.9996595978736877,grad_norm: 0.9999996846286776, iteration: 357007
loss: 0.9862928986549377,grad_norm: 0.8529826032099289, iteration: 357008
loss: 1.025526762008667,grad_norm: 0.919398863164892, iteration: 357009
loss: 1.0017114877700806,grad_norm: 0.999999194237708, iteration: 357010
loss: 1.0824240446090698,grad_norm: 0.9999995263864311, iteration: 357011
loss: 1.021187424659729,grad_norm: 0.9999993130438386, iteration: 357012
loss: 1.0137511491775513,grad_norm: 0.8272996800735298, iteration: 357013
loss: 1.0193889141082764,grad_norm: 0.9999998395148605, iteration: 357014
loss: 1.090890645980835,grad_norm: 0.9999994748017961, iteration: 357015
loss: 1.0203787088394165,grad_norm: 0.9314001588528044, iteration: 357016
loss: 1.0119361877441406,grad_norm: 0.7425672879811013, iteration: 357017
loss: 1.1388330459594727,grad_norm: 0.9999996107447671, iteration: 357018
loss: 1.1116960048675537,grad_norm: 0.9999996670496543, iteration: 357019
loss: 1.0354331731796265,grad_norm: 0.8561400075403874, iteration: 357020
loss: 1.008531093597412,grad_norm: 0.9999992465770858, iteration: 357021
loss: 0.9761574864387512,grad_norm: 0.8146391847395879, iteration: 357022
loss: 1.1395576000213623,grad_norm: 0.9999998074284885, iteration: 357023
loss: 0.951434314250946,grad_norm: 0.9468533515625729, iteration: 357024
loss: 1.0500622987747192,grad_norm: 0.860689436623242, iteration: 357025
loss: 0.9943684935569763,grad_norm: 0.8882822274261919, iteration: 357026
loss: 1.0014275312423706,grad_norm: 0.9389385201727671, iteration: 357027
loss: 0.9935863614082336,grad_norm: 0.8869576201677479, iteration: 357028
loss: 1.0503623485565186,grad_norm: 0.999999645683289, iteration: 357029
loss: 1.041214108467102,grad_norm: 0.9999996980274913, iteration: 357030
loss: 1.034347653388977,grad_norm: 0.7843615314371456, iteration: 357031
loss: 1.0108813047409058,grad_norm: 0.8489000599505684, iteration: 357032
loss: 1.0186717510223389,grad_norm: 0.9999996560912396, iteration: 357033
loss: 1.0402140617370605,grad_norm: 0.8242467526660379, iteration: 357034
loss: 1.0146212577819824,grad_norm: 0.9612775909158484, iteration: 357035
loss: 1.0279539823532104,grad_norm: 0.9711964748531516, iteration: 357036
loss: 1.0616319179534912,grad_norm: 0.9999990678759518, iteration: 357037
loss: 1.1584762334823608,grad_norm: 0.9149563149337157, iteration: 357038
loss: 0.994680643081665,grad_norm: 0.9999993403511255, iteration: 357039
loss: 0.9600306153297424,grad_norm: 0.9500154218110513, iteration: 357040
loss: 1.0178663730621338,grad_norm: 0.8460879487952199, iteration: 357041
loss: 0.980542004108429,grad_norm: 0.7445606970535756, iteration: 357042
loss: 1.0166431665420532,grad_norm: 0.8903454050705434, iteration: 357043
loss: 1.0895739793777466,grad_norm: 1.0000000179541828, iteration: 357044
loss: 1.0279624462127686,grad_norm: 0.9999992524699377, iteration: 357045
loss: 0.9945259690284729,grad_norm: 0.8967889870149062, iteration: 357046
loss: 1.0112755298614502,grad_norm: 0.733853393617777, iteration: 357047
loss: 1.0584979057312012,grad_norm: 0.7413199537690861, iteration: 357048
loss: 1.0022906064987183,grad_norm: 0.956718723232092, iteration: 357049
loss: 0.9865240454673767,grad_norm: 0.7844592778194597, iteration: 357050
loss: 0.9277386665344238,grad_norm: 0.8348680402549619, iteration: 357051
loss: 1.03246009349823,grad_norm: 0.748536775502573, iteration: 357052
loss: 0.9853318333625793,grad_norm: 0.7347621006734912, iteration: 357053
loss: 0.9952831864356995,grad_norm: 0.8922709071927573, iteration: 357054
loss: 1.1154729127883911,grad_norm: 0.9997677153815044, iteration: 357055
loss: 1.0127938985824585,grad_norm: 0.7958174030179752, iteration: 357056
loss: 1.166127324104309,grad_norm: 0.9999998528973485, iteration: 357057
loss: 1.035064458847046,grad_norm: 0.7686784898977437, iteration: 357058
loss: 0.9838477373123169,grad_norm: 0.7375985920495682, iteration: 357059
loss: 1.033831238746643,grad_norm: 0.8828597735822543, iteration: 357060
loss: 1.0401216745376587,grad_norm: 0.7934340133032048, iteration: 357061
loss: 0.9963371157646179,grad_norm: 0.7813644791784442, iteration: 357062
loss: 1.009641408920288,grad_norm: 0.8967771660370801, iteration: 357063
loss: 1.0800644159317017,grad_norm: 0.9999994831848849, iteration: 357064
loss: 1.0785002708435059,grad_norm: 0.9999991463605628, iteration: 357065
loss: 0.9925757646560669,grad_norm: 0.6611155405362518, iteration: 357066
loss: 1.052301287651062,grad_norm: 0.8633825203830092, iteration: 357067
loss: 1.0511469841003418,grad_norm: 0.9524764263153115, iteration: 357068
loss: 1.000494360923767,grad_norm: 0.9999993804923349, iteration: 357069
loss: 0.983353853225708,grad_norm: 0.8545686161045108, iteration: 357070
loss: 1.0654208660125732,grad_norm: 0.9999991748300187, iteration: 357071
loss: 1.0297448635101318,grad_norm: 0.9803912383569413, iteration: 357072
loss: 1.0040956735610962,grad_norm: 0.7605371914511595, iteration: 357073
loss: 1.0781229734420776,grad_norm: 0.9999992129559885, iteration: 357074
loss: 1.0280839204788208,grad_norm: 0.9999990762913049, iteration: 357075
loss: 1.0281713008880615,grad_norm: 0.9142002287530266, iteration: 357076
loss: 1.0368136167526245,grad_norm: 0.7817280654311513, iteration: 357077
loss: 0.9641984701156616,grad_norm: 0.8111353909844516, iteration: 357078
loss: 0.9799332618713379,grad_norm: 0.8331664285341008, iteration: 357079
loss: 1.0351940393447876,grad_norm: 0.8171463395284043, iteration: 357080
loss: 0.9867454767227173,grad_norm: 0.7713581564204481, iteration: 357081
loss: 0.9934799075126648,grad_norm: 0.7205504192347526, iteration: 357082
loss: 0.9932777881622314,grad_norm: 0.6488503862428306, iteration: 357083
loss: 1.0513933897018433,grad_norm: 0.8746811320951844, iteration: 357084
loss: 0.9797270894050598,grad_norm: 0.8384109081297373, iteration: 357085
loss: 1.0020852088928223,grad_norm: 0.9999995980548785, iteration: 357086
loss: 0.9750165343284607,grad_norm: 0.8621607925995293, iteration: 357087
loss: 1.0253345966339111,grad_norm: 0.8624891969861913, iteration: 357088
loss: 1.0937556028366089,grad_norm: 0.9999995529591653, iteration: 357089
loss: 1.021958589553833,grad_norm: 0.8579600520572274, iteration: 357090
loss: 1.0510480403900146,grad_norm: 0.8902784338623727, iteration: 357091
loss: 1.1062781810760498,grad_norm: 0.8005944046191467, iteration: 357092
loss: 1.0110394954681396,grad_norm: 0.7568498608047359, iteration: 357093
loss: 1.0593057870864868,grad_norm: 0.8519965577189942, iteration: 357094
loss: 0.9815791845321655,grad_norm: 0.9999990964473164, iteration: 357095
loss: 1.0399420261383057,grad_norm: 0.9557699603186484, iteration: 357096
loss: 1.037351131439209,grad_norm: 0.7570563586132463, iteration: 357097
loss: 1.0301631689071655,grad_norm: 0.8658911439463481, iteration: 357098
loss: 0.9853192567825317,grad_norm: 0.82620446202844, iteration: 357099
loss: 1.0101548433303833,grad_norm: 0.8624353010028677, iteration: 357100
loss: 1.035630226135254,grad_norm: 0.8148619678533482, iteration: 357101
loss: 1.0242581367492676,grad_norm: 0.9999991592631092, iteration: 357102
loss: 1.0308446884155273,grad_norm: 0.9999990928505971, iteration: 357103
loss: 1.0328558683395386,grad_norm: 0.7426771048711094, iteration: 357104
loss: 1.0663158893585205,grad_norm: 0.8290572691880578, iteration: 357105
loss: 0.9970034956932068,grad_norm: 0.9999990542284832, iteration: 357106
loss: 1.0400291681289673,grad_norm: 0.9076205877987601, iteration: 357107
loss: 1.0330840349197388,grad_norm: 0.9253241097380923, iteration: 357108
loss: 1.0158087015151978,grad_norm: 0.9999990764636588, iteration: 357109
loss: 1.0035239458084106,grad_norm: 0.9433334808787022, iteration: 357110
loss: 1.0014182329177856,grad_norm: 0.7680414910928647, iteration: 357111
loss: 1.020899772644043,grad_norm: 0.9594325140659649, iteration: 357112
loss: 0.9952302575111389,grad_norm: 0.8320192456182601, iteration: 357113
loss: 1.106898307800293,grad_norm: 0.9999990892615593, iteration: 357114
loss: 1.0579949617385864,grad_norm: 0.8688333054590749, iteration: 357115
loss: 1.0007669925689697,grad_norm: 0.82504564930654, iteration: 357116
loss: 0.9856821894645691,grad_norm: 0.8734393635276657, iteration: 357117
loss: 1.1131795644760132,grad_norm: 0.9999993635047268, iteration: 357118
loss: 1.0553510189056396,grad_norm: 0.999999214636202, iteration: 357119
loss: 0.992389976978302,grad_norm: 0.7890284578805984, iteration: 357120
loss: 0.9933329224586487,grad_norm: 0.9999990844350937, iteration: 357121
loss: 1.0205349922180176,grad_norm: 0.8481326793945063, iteration: 357122
loss: 1.098342776298523,grad_norm: 0.9909888383030386, iteration: 357123
loss: 0.9838208556175232,grad_norm: 0.9999992195234758, iteration: 357124
loss: 1.018554449081421,grad_norm: 0.995008435771432, iteration: 357125
loss: 1.0140197277069092,grad_norm: 0.9999991885039834, iteration: 357126
loss: 0.9894884824752808,grad_norm: 0.8545604334540834, iteration: 357127
loss: 1.058018684387207,grad_norm: 0.7765870074747456, iteration: 357128
loss: 1.1146827936172485,grad_norm: 0.9999991802390786, iteration: 357129
loss: 0.9718611836433411,grad_norm: 0.7281480094379628, iteration: 357130
loss: 1.0048733949661255,grad_norm: 0.9121139979142112, iteration: 357131
loss: 0.9667382836341858,grad_norm: 0.8180700554962945, iteration: 357132
loss: 1.028844952583313,grad_norm: 0.8246186906238733, iteration: 357133
loss: 1.0271943807601929,grad_norm: 0.7627343780451521, iteration: 357134
loss: 1.0609683990478516,grad_norm: 0.7622514958658592, iteration: 357135
loss: 1.067475438117981,grad_norm: 0.8582470063077774, iteration: 357136
loss: 1.0878140926361084,grad_norm: 0.9999997947041391, iteration: 357137
loss: 1.0512490272521973,grad_norm: 0.9999995918328766, iteration: 357138
loss: 1.0778285264968872,grad_norm: 0.8772498958589172, iteration: 357139
loss: 0.9909763336181641,grad_norm: 0.8223125449266224, iteration: 357140
loss: 0.9648619294166565,grad_norm: 0.8337793433585255, iteration: 357141
loss: 1.0255705118179321,grad_norm: 0.8566450641763932, iteration: 357142
loss: 0.9756550192832947,grad_norm: 0.7369439209522646, iteration: 357143
loss: 1.0120738744735718,grad_norm: 0.9779070076251919, iteration: 357144
loss: 1.0262715816497803,grad_norm: 0.6876833752267127, iteration: 357145
loss: 1.049780011177063,grad_norm: 0.9999994227114252, iteration: 357146
loss: 0.9731190204620361,grad_norm: 0.8498596859965845, iteration: 357147
loss: 0.9572093486785889,grad_norm: 0.8913015649013174, iteration: 357148
loss: 0.977448046207428,grad_norm: 0.8069015946022483, iteration: 357149
loss: 1.0168081521987915,grad_norm: 0.9426301984950358, iteration: 357150
loss: 1.080574631690979,grad_norm: 0.7978649367791637, iteration: 357151
loss: 1.0300177335739136,grad_norm: 0.9999994928207955, iteration: 357152
loss: 1.043483018875122,grad_norm: 0.9109737168241604, iteration: 357153
loss: 0.9756746888160706,grad_norm: 0.765320710858596, iteration: 357154
loss: 1.0079588890075684,grad_norm: 0.9632197449628278, iteration: 357155
loss: 1.0572491884231567,grad_norm: 0.9410417544985876, iteration: 357156
loss: 1.0569658279418945,grad_norm: 0.9578259251424307, iteration: 357157
loss: 0.9951931238174438,grad_norm: 0.8104110228441763, iteration: 357158
loss: 1.0783030986785889,grad_norm: 0.9999996802782726, iteration: 357159
loss: 1.0027775764465332,grad_norm: 0.775646330485097, iteration: 357160
loss: 1.0440922975540161,grad_norm: 0.8226640179164629, iteration: 357161
loss: 1.0373057126998901,grad_norm: 0.8137091360167436, iteration: 357162
loss: 1.010188341140747,grad_norm: 0.999999630319379, iteration: 357163
loss: 1.0845261812210083,grad_norm: 0.9968299509282708, iteration: 357164
loss: 1.1701935529708862,grad_norm: 0.9999993522525676, iteration: 357165
loss: 0.9815753102302551,grad_norm: 0.739918492188245, iteration: 357166
loss: 0.9940412640571594,grad_norm: 0.7220192536795823, iteration: 357167
loss: 1.049676775932312,grad_norm: 0.950664318845982, iteration: 357168
loss: 1.039644479751587,grad_norm: 0.9999993494411803, iteration: 357169
loss: 1.0280284881591797,grad_norm: 0.8499578921681273, iteration: 357170
loss: 0.9851727485656738,grad_norm: 0.9365518454743966, iteration: 357171
loss: 1.0850530862808228,grad_norm: 0.9828518215138156, iteration: 357172
loss: 1.00788152217865,grad_norm: 0.9759754920463058, iteration: 357173
loss: 1.127560019493103,grad_norm: 0.9999990944813435, iteration: 357174
loss: 0.99738609790802,grad_norm: 0.8416755187384647, iteration: 357175
loss: 0.9852535724639893,grad_norm: 0.9668198800113467, iteration: 357176
loss: 1.153821587562561,grad_norm: 0.9493392146226007, iteration: 357177
loss: 1.0610507726669312,grad_norm: 0.9999991673167643, iteration: 357178
loss: 1.0156910419464111,grad_norm: 0.9999998895662114, iteration: 357179
loss: 1.0199939012527466,grad_norm: 0.9999993397670871, iteration: 357180
loss: 1.0949053764343262,grad_norm: 0.9999991732780377, iteration: 357181
loss: 1.1430548429489136,grad_norm: 0.9999999196711093, iteration: 357182
loss: 0.9846794605255127,grad_norm: 0.7185765418616199, iteration: 357183
loss: 1.068970799446106,grad_norm: 0.9999996345422266, iteration: 357184
loss: 1.0353894233703613,grad_norm: 0.8435040344299664, iteration: 357185
loss: 1.0028325319290161,grad_norm: 0.7542723617954861, iteration: 357186
loss: 1.0119794607162476,grad_norm: 0.918493927033222, iteration: 357187
loss: 1.0137677192687988,grad_norm: 0.8238884501896923, iteration: 357188
loss: 1.0829401016235352,grad_norm: 0.9999992254449461, iteration: 357189
loss: 1.046561598777771,grad_norm: 0.8036654295347804, iteration: 357190
loss: 1.0653902292251587,grad_norm: 0.9999998002173196, iteration: 357191
loss: 1.0694726705551147,grad_norm: 0.7455764143163284, iteration: 357192
loss: 0.9803104996681213,grad_norm: 0.796218154338426, iteration: 357193
loss: 1.0725228786468506,grad_norm: 0.9999993525887462, iteration: 357194
loss: 1.088747501373291,grad_norm: 0.9999990029325486, iteration: 357195
loss: 1.0306966304779053,grad_norm: 0.8477052727939776, iteration: 357196
loss: 0.9994015097618103,grad_norm: 0.8688909765721662, iteration: 357197
loss: 0.9985458254814148,grad_norm: 0.6709514775026238, iteration: 357198
loss: 1.0219610929489136,grad_norm: 0.9999995581769093, iteration: 357199
loss: 0.9899275898933411,grad_norm: 0.8885078160493514, iteration: 357200
loss: 1.0160294771194458,grad_norm: 0.757557593799782, iteration: 357201
loss: 1.0490683317184448,grad_norm: 0.8278675619541769, iteration: 357202
loss: 1.064619779586792,grad_norm: 0.9999991816403077, iteration: 357203
loss: 1.085960865020752,grad_norm: 0.9999996926154273, iteration: 357204
loss: 1.0118805170059204,grad_norm: 0.7609008539465532, iteration: 357205
loss: 0.9819140434265137,grad_norm: 0.7955567953388638, iteration: 357206
loss: 1.1060521602630615,grad_norm: 0.9296211778818984, iteration: 357207
loss: 0.9897224307060242,grad_norm: 0.8881389172950584, iteration: 357208
loss: 1.0306421518325806,grad_norm: 0.8729428688945586, iteration: 357209
loss: 1.0839000940322876,grad_norm: 0.9999998775427923, iteration: 357210
loss: 1.005570411682129,grad_norm: 0.8583446422064688, iteration: 357211
loss: 1.0090174674987793,grad_norm: 0.9999993003508492, iteration: 357212
loss: 1.1330609321594238,grad_norm: 0.834126199440162, iteration: 357213
loss: 0.9874663949012756,grad_norm: 0.755202686184018, iteration: 357214
loss: 1.0028183460235596,grad_norm: 0.8803602467222589, iteration: 357215
loss: 1.033333659172058,grad_norm: 0.8656547249423657, iteration: 357216
loss: 1.124612808227539,grad_norm: 0.9999991973297, iteration: 357217
loss: 1.0635061264038086,grad_norm: 0.9999991207442804, iteration: 357218
loss: 1.0953125953674316,grad_norm: 0.9999991675843163, iteration: 357219
loss: 1.0689831972122192,grad_norm: 0.9999991086760758, iteration: 357220
loss: 1.0032765865325928,grad_norm: 0.885351247138871, iteration: 357221
loss: 0.9981757402420044,grad_norm: 0.999999299374847, iteration: 357222
loss: 1.0088677406311035,grad_norm: 0.8542206279870808, iteration: 357223
loss: 1.0300134420394897,grad_norm: 0.6930925313684118, iteration: 357224
loss: 1.1118266582489014,grad_norm: 0.9469807427022221, iteration: 357225
loss: 1.0358500480651855,grad_norm: 0.849272307817113, iteration: 357226
loss: 0.9886115789413452,grad_norm: 0.7569446130249271, iteration: 357227
loss: 1.029741644859314,grad_norm: 0.8752008295155106, iteration: 357228
loss: 0.9673454761505127,grad_norm: 0.789079157179517, iteration: 357229
loss: 1.0607504844665527,grad_norm: 0.9123458777312758, iteration: 357230
loss: 1.0741019248962402,grad_norm: 0.9999990316198718, iteration: 357231
loss: 1.0002501010894775,grad_norm: 0.6997059484322806, iteration: 357232
loss: 0.9753809571266174,grad_norm: 0.8164199400949108, iteration: 357233
loss: 1.0743650197982788,grad_norm: 0.999999950584775, iteration: 357234
loss: 1.006609320640564,grad_norm: 0.739672827857604, iteration: 357235
loss: 1.018042802810669,grad_norm: 0.9999990650425337, iteration: 357236
loss: 1.0100833177566528,grad_norm: 0.9584533371344439, iteration: 357237
loss: 1.003967523574829,grad_norm: 0.7345693219644681, iteration: 357238
loss: 1.112085223197937,grad_norm: 0.8495006902472411, iteration: 357239
loss: 1.0231223106384277,grad_norm: 0.9999991187051239, iteration: 357240
loss: 1.0035587549209595,grad_norm: 0.8654507362082073, iteration: 357241
loss: 1.118393063545227,grad_norm: 0.9159403937057933, iteration: 357242
loss: 1.0755516290664673,grad_norm: 0.9278228824566875, iteration: 357243
loss: 1.0293630361557007,grad_norm: 0.7215324243158678, iteration: 357244
loss: 1.1164543628692627,grad_norm: 0.9999991653198684, iteration: 357245
loss: 0.9914624094963074,grad_norm: 0.999999871179797, iteration: 357246
loss: 1.141964316368103,grad_norm: 0.9857339398136267, iteration: 357247
loss: 0.9863358736038208,grad_norm: 0.8081051907006421, iteration: 357248
loss: 1.0203560590744019,grad_norm: 0.7380409583620237, iteration: 357249
loss: 0.991565465927124,grad_norm: 0.7797336255836733, iteration: 357250
loss: 0.9583925008773804,grad_norm: 0.8346277877394901, iteration: 357251
loss: 1.0279455184936523,grad_norm: 0.9202855096628731, iteration: 357252
loss: 1.0258334875106812,grad_norm: 0.999999637646826, iteration: 357253
loss: 1.04267156124115,grad_norm: 0.9999994159344711, iteration: 357254
loss: 1.0333642959594727,grad_norm: 0.9112649830107568, iteration: 357255
loss: 0.9434012770652771,grad_norm: 0.8195532173242316, iteration: 357256
loss: 1.059478759765625,grad_norm: 0.9999993088755662, iteration: 357257
loss: 1.0830239057540894,grad_norm: 0.9999999129911717, iteration: 357258
loss: 0.9836453795433044,grad_norm: 1.000000028445508, iteration: 357259
loss: 0.9934778809547424,grad_norm: 0.999999050884355, iteration: 357260
loss: 1.0993651151657104,grad_norm: 0.9999994002470664, iteration: 357261
loss: 1.014678955078125,grad_norm: 0.9999996224653217, iteration: 357262
loss: 1.1065928936004639,grad_norm: 0.8435665050650181, iteration: 357263
loss: 1.0393661260604858,grad_norm: 0.9999995383272088, iteration: 357264
loss: 0.9989294409751892,grad_norm: 0.8030545652142397, iteration: 357265
loss: 0.9765088558197021,grad_norm: 0.9999993236387761, iteration: 357266
loss: 1.0131001472473145,grad_norm: 0.9999997321042422, iteration: 357267
loss: 1.0036649703979492,grad_norm: 0.9550122129459475, iteration: 357268
loss: 0.961297869682312,grad_norm: 0.9999991751837191, iteration: 357269
loss: 1.073927879333496,grad_norm: 0.9999993984618233, iteration: 357270
loss: 1.0173287391662598,grad_norm: 0.6847477504492094, iteration: 357271
loss: 1.020447015762329,grad_norm: 0.7701382505178157, iteration: 357272
loss: 1.0661470890045166,grad_norm: 0.9165838973297558, iteration: 357273
loss: 1.161761999130249,grad_norm: 0.999999847456668, iteration: 357274
loss: 1.0279841423034668,grad_norm: 0.9878734157036808, iteration: 357275
loss: 0.9831526875495911,grad_norm: 0.960772328922899, iteration: 357276
loss: 1.0585757493972778,grad_norm: 0.960942001467353, iteration: 357277
loss: 1.0691113471984863,grad_norm: 0.9999990359565868, iteration: 357278
loss: 1.036204218864441,grad_norm: 0.7385630000586028, iteration: 357279
loss: 1.0948913097381592,grad_norm: 0.826709938938368, iteration: 357280
loss: 1.0852553844451904,grad_norm: 0.9999993463088137, iteration: 357281
loss: 0.9989455938339233,grad_norm: 0.8146833199965345, iteration: 357282
loss: 1.0742344856262207,grad_norm: 0.9999993200807763, iteration: 357283
loss: 1.2419973611831665,grad_norm: 1.0000000082680387, iteration: 357284
loss: 1.0893036127090454,grad_norm: 0.9999990658498668, iteration: 357285
loss: 0.9892403483390808,grad_norm: 0.7463933170926158, iteration: 357286
loss: 1.2160507440567017,grad_norm: 0.9976418962008372, iteration: 357287
loss: 1.0333137512207031,grad_norm: 0.9999997944826391, iteration: 357288
loss: 1.0467544794082642,grad_norm: 0.9999993450196648, iteration: 357289
loss: 1.0197010040283203,grad_norm: 0.9999996146617489, iteration: 357290
loss: 1.0919301509857178,grad_norm: 0.9999993091032736, iteration: 357291
loss: 1.231583595275879,grad_norm: 0.9999998269533229, iteration: 357292
loss: 1.0853191614151,grad_norm: 0.9999998316890281, iteration: 357293
loss: 1.1893911361694336,grad_norm: 0.9999998389796898, iteration: 357294
loss: 1.1598975658416748,grad_norm: 0.9999997945434527, iteration: 357295
loss: 1.2402050495147705,grad_norm: 0.9999999018068912, iteration: 357296
loss: 1.585821270942688,grad_norm: 0.9999999170262942, iteration: 357297
loss: 1.2813076972961426,grad_norm: 0.9999993879259429, iteration: 357298
loss: 1.2754828929901123,grad_norm: 0.9999996570921058, iteration: 357299
loss: 1.3951777219772339,grad_norm: 0.9999998263571861, iteration: 357300
loss: 1.1162031888961792,grad_norm: 1.0000000439600263, iteration: 357301
loss: 1.284840703010559,grad_norm: 0.9999998600263285, iteration: 357302
loss: 1.0008825063705444,grad_norm: 0.9888470257816476, iteration: 357303
loss: 0.9448288679122925,grad_norm: 0.9775222472113856, iteration: 357304
loss: 1.100204586982727,grad_norm: 0.9999999324807467, iteration: 357305
loss: 0.9767728447914124,grad_norm: 0.9999998637029394, iteration: 357306
loss: 1.4005011320114136,grad_norm: 0.9999995003862794, iteration: 357307
loss: 1.0332684516906738,grad_norm: 0.9024086858231317, iteration: 357308
loss: 1.5313384532928467,grad_norm: 0.9999995819054555, iteration: 357309
loss: 1.0120699405670166,grad_norm: 0.9999991691365689, iteration: 357310
loss: 1.1565561294555664,grad_norm: 0.9999995482410216, iteration: 357311
loss: 1.197698950767517,grad_norm: 0.999999766440254, iteration: 357312
loss: 1.0629833936691284,grad_norm: 0.9999999254506255, iteration: 357313
loss: 1.0415667295455933,grad_norm: 0.9999990796851935, iteration: 357314
loss: 1.2521333694458008,grad_norm: 0.9999998341588223, iteration: 357315
loss: 1.078439474105835,grad_norm: 0.9999997676978114, iteration: 357316
loss: 1.0113885402679443,grad_norm: 0.9999992175426582, iteration: 357317
loss: 1.0351608991622925,grad_norm: 0.999999749667262, iteration: 357318
loss: 1.2439320087432861,grad_norm: 0.9999992056331439, iteration: 357319
loss: 1.1788761615753174,grad_norm: 0.9999994688836061, iteration: 357320
loss: 1.0468517541885376,grad_norm: 0.8656775035375682, iteration: 357321
loss: 1.2688020467758179,grad_norm: 0.9999998215074452, iteration: 357322
loss: 0.9864137172698975,grad_norm: 0.9921759551488372, iteration: 357323
loss: 1.0467556715011597,grad_norm: 0.9999991444050352, iteration: 357324
loss: 0.97663414478302,grad_norm: 0.7531771495084832, iteration: 357325
loss: 1.1161072254180908,grad_norm: 0.9999995399825232, iteration: 357326
loss: 1.0432583093643188,grad_norm: 0.7169601518326923, iteration: 357327
loss: 1.1763677597045898,grad_norm: 0.9999994923651743, iteration: 357328
loss: 1.0498939752578735,grad_norm: 0.88619282362462, iteration: 357329
loss: 0.9700010418891907,grad_norm: 0.8186847386349185, iteration: 357330
loss: 1.07603120803833,grad_norm: 0.7576114234172476, iteration: 357331
loss: 1.0964621305465698,grad_norm: 0.9999992973611673, iteration: 357332
loss: 0.9630581140518188,grad_norm: 0.9999990787569069, iteration: 357333
loss: 1.0604673624038696,grad_norm: 0.9999992721280097, iteration: 357334
loss: 1.1369552612304688,grad_norm: 0.9999991476, iteration: 357335
loss: 1.05257248878479,grad_norm: 0.9486564291097295, iteration: 357336
loss: 1.0487957000732422,grad_norm: 0.9949604621106446, iteration: 357337
loss: 1.1878515481948853,grad_norm: 0.9999995838898166, iteration: 357338
loss: 1.0796213150024414,grad_norm: 0.8508726924996879, iteration: 357339
loss: 1.060767412185669,grad_norm: 0.895362697145794, iteration: 357340
loss: 1.1210503578186035,grad_norm: 0.9999996735063905, iteration: 357341
loss: 1.1802043914794922,grad_norm: 0.9138569606781533, iteration: 357342
loss: 1.0340994596481323,grad_norm: 0.79480074659064, iteration: 357343
loss: 1.072657823562622,grad_norm: 0.9999992876724448, iteration: 357344
loss: 1.0170409679412842,grad_norm: 0.8028019790703854, iteration: 357345
loss: 0.9610669612884521,grad_norm: 0.9233794504190156, iteration: 357346
loss: 1.0058391094207764,grad_norm: 0.9999992419940454, iteration: 357347
loss: 1.0294487476348877,grad_norm: 0.7311438869642035, iteration: 357348
loss: 1.0289125442504883,grad_norm: 0.8829599868739764, iteration: 357349
loss: 1.0026355981826782,grad_norm: 0.791673221464234, iteration: 357350
loss: 1.0351712703704834,grad_norm: 0.8568514134292722, iteration: 357351
loss: 0.9694584012031555,grad_norm: 0.8684008690400118, iteration: 357352
loss: 1.00845205783844,grad_norm: 0.9103383429980186, iteration: 357353
loss: 0.9960265755653381,grad_norm: 0.714386608748695, iteration: 357354
loss: 1.214294195175171,grad_norm: 0.9999995666032508, iteration: 357355
loss: 0.9447397589683533,grad_norm: 0.7631640696340624, iteration: 357356
loss: 0.9597050547599792,grad_norm: 0.7226995285333827, iteration: 357357
loss: 1.0428329706192017,grad_norm: 0.9999990495558704, iteration: 357358
loss: 0.999608039855957,grad_norm: 0.7180525628055723, iteration: 357359
loss: 1.1294814348220825,grad_norm: 0.9999991243551462, iteration: 357360
loss: 1.0896683931350708,grad_norm: 0.9999993152539077, iteration: 357361
loss: 1.0895682573318481,grad_norm: 0.9999994114793332, iteration: 357362
loss: 1.0408915281295776,grad_norm: 0.9999992358693702, iteration: 357363
loss: 1.0056736469268799,grad_norm: 0.8516207764492917, iteration: 357364
loss: 1.128962516784668,grad_norm: 0.9988861365649807, iteration: 357365
loss: 1.044726014137268,grad_norm: 0.9999994264141951, iteration: 357366
loss: 1.1482094526290894,grad_norm: 0.9999991294872406, iteration: 357367
loss: 0.9409283995628357,grad_norm: 0.8607384176536064, iteration: 357368
loss: 1.1806048154830933,grad_norm: 0.9999992150047452, iteration: 357369
loss: 1.0598899126052856,grad_norm: 0.9999990524661734, iteration: 357370
loss: 1.245326280593872,grad_norm: 0.9999996746201898, iteration: 357371
loss: 1.1390072107315063,grad_norm: 0.9999998446957742, iteration: 357372
loss: 1.0283137559890747,grad_norm: 0.9999995848558392, iteration: 357373
loss: 1.004047155380249,grad_norm: 0.7751100056334063, iteration: 357374
loss: 0.9995293617248535,grad_norm: 0.9824811572890515, iteration: 357375
loss: 1.0325350761413574,grad_norm: 0.8737635292768549, iteration: 357376
loss: 1.1068954467773438,grad_norm: 0.9132502460111507, iteration: 357377
loss: 1.0665643215179443,grad_norm: 0.9999996479112471, iteration: 357378
loss: 1.0812796354293823,grad_norm: 0.9999996607328216, iteration: 357379
loss: 1.1302825212478638,grad_norm: 0.9999992348256376, iteration: 357380
loss: 1.0059354305267334,grad_norm: 0.9999991070022922, iteration: 357381
loss: 0.979310929775238,grad_norm: 0.999999304379046, iteration: 357382
loss: 1.0264095067977905,grad_norm: 0.9999990434624055, iteration: 357383
loss: 1.023094654083252,grad_norm: 0.999999588637335, iteration: 357384
loss: 0.9990788698196411,grad_norm: 0.9647217164356942, iteration: 357385
loss: 1.00978684425354,grad_norm: 0.999999841914984, iteration: 357386
loss: 1.0477588176727295,grad_norm: 0.8517993462517702, iteration: 357387
loss: 1.1507490873336792,grad_norm: 0.8279974365427232, iteration: 357388
loss: 1.0116544961929321,grad_norm: 0.99999991087022, iteration: 357389
loss: 1.0270484685897827,grad_norm: 0.999999832438987, iteration: 357390
loss: 1.0233607292175293,grad_norm: 0.999999672837127, iteration: 357391
loss: 1.0535115003585815,grad_norm: 0.7928987341971747, iteration: 357392
loss: 1.041517972946167,grad_norm: 0.9029299442658001, iteration: 357393
loss: 1.066278100013733,grad_norm: 0.9302284755598044, iteration: 357394
loss: 1.0515241622924805,grad_norm: 0.9075554832036383, iteration: 357395
loss: 1.0019338130950928,grad_norm: 0.8191845707355764, iteration: 357396
loss: 1.0044167041778564,grad_norm: 0.990302590174337, iteration: 357397
loss: 1.072729229927063,grad_norm: 0.9999991792196989, iteration: 357398
loss: 1.0553818941116333,grad_norm: 0.9999997232761819, iteration: 357399
loss: 1.0266262292861938,grad_norm: 0.8847471574083703, iteration: 357400
loss: 1.018586277961731,grad_norm: 0.7850423282286754, iteration: 357401
loss: 1.0406256914138794,grad_norm: 0.813952349614559, iteration: 357402
loss: 1.1040362119674683,grad_norm: 0.9999999065588489, iteration: 357403
loss: 0.9965343475341797,grad_norm: 0.8343655782485313, iteration: 357404
loss: 1.0322165489196777,grad_norm: 0.9999998373792816, iteration: 357405
loss: 1.0110576152801514,grad_norm: 0.9451289422459958, iteration: 357406
loss: 0.9598333835601807,grad_norm: 0.8171047379659832, iteration: 357407
loss: 1.0622919797897339,grad_norm: 0.9285624734776109, iteration: 357408
loss: 1.0298982858657837,grad_norm: 0.7214289951457916, iteration: 357409
loss: 1.029557466506958,grad_norm: 0.9614989091861931, iteration: 357410
loss: 1.0297104120254517,grad_norm: 0.729564324476734, iteration: 357411
loss: 1.0517743825912476,grad_norm: 0.999998985919027, iteration: 357412
loss: 0.9741561412811279,grad_norm: 0.9280762728112032, iteration: 357413
loss: 1.0610377788543701,grad_norm: 0.9394394055197156, iteration: 357414
loss: 1.1686280965805054,grad_norm: 0.9153881940442737, iteration: 357415
loss: 1.0386669635772705,grad_norm: 0.9422977119193405, iteration: 357416
loss: 1.0507230758666992,grad_norm: 0.9783754006790683, iteration: 357417
loss: 1.0615971088409424,grad_norm: 0.9999990058067149, iteration: 357418
loss: 1.0044457912445068,grad_norm: 0.8330736610069618, iteration: 357419
loss: 0.9846578240394592,grad_norm: 0.9999991874254192, iteration: 357420
loss: 1.0154850482940674,grad_norm: 0.8534316491406151, iteration: 357421
loss: 1.037970781326294,grad_norm: 0.9999990751710587, iteration: 357422
loss: 1.0295195579528809,grad_norm: 0.8384391526745205, iteration: 357423
loss: 1.0440634489059448,grad_norm: 0.8217706755637308, iteration: 357424
loss: 1.0731629133224487,grad_norm: 0.9154881463657772, iteration: 357425
loss: 0.9885556101799011,grad_norm: 0.741259697264349, iteration: 357426
loss: 0.9970151782035828,grad_norm: 0.9999998703217291, iteration: 357427
loss: 1.0974615812301636,grad_norm: 0.9999997370992134, iteration: 357428
loss: 1.039777159690857,grad_norm: 0.8638751156869807, iteration: 357429
loss: 1.0127151012420654,grad_norm: 0.9999991893484735, iteration: 357430
loss: 1.046667456626892,grad_norm: 0.999999915845503, iteration: 357431
loss: 1.0331214666366577,grad_norm: 0.9999990853802828, iteration: 357432
loss: 1.0268232822418213,grad_norm: 0.999999613316435, iteration: 357433
loss: 0.9947441816329956,grad_norm: 0.9999990272792527, iteration: 357434
loss: 0.9885092377662659,grad_norm: 0.9698532583912858, iteration: 357435
loss: 0.9999596476554871,grad_norm: 0.8194157256209293, iteration: 357436
loss: 1.0610653162002563,grad_norm: 0.7338882061350297, iteration: 357437
loss: 0.9741241931915283,grad_norm: 0.999999119265926, iteration: 357438
loss: 1.0057423114776611,grad_norm: 0.883571358937564, iteration: 357439
loss: 1.0780264139175415,grad_norm: 0.8798633309817777, iteration: 357440
loss: 1.0799527168273926,grad_norm: 0.999999148909371, iteration: 357441
loss: 1.0159329175949097,grad_norm: 0.8218144480291211, iteration: 357442
loss: 1.046578288078308,grad_norm: 0.8923310229334028, iteration: 357443
loss: 1.0025728940963745,grad_norm: 0.9999999540221549, iteration: 357444
loss: 1.0796769857406616,grad_norm: 0.9999991903674391, iteration: 357445
loss: 0.9791252017021179,grad_norm: 0.8798852908968476, iteration: 357446
loss: 1.0266644954681396,grad_norm: 0.9999990729401007, iteration: 357447
loss: 0.9836223721504211,grad_norm: 0.9999997520273765, iteration: 357448
loss: 1.017048716545105,grad_norm: 0.9999998600034482, iteration: 357449
loss: 1.0651799440383911,grad_norm: 0.9741353893029856, iteration: 357450
loss: 1.0000404119491577,grad_norm: 0.9166733026106545, iteration: 357451
loss: 1.003715991973877,grad_norm: 0.829898654809684, iteration: 357452
loss: 1.0386723279953003,grad_norm: 0.9205096445930163, iteration: 357453
loss: 0.9702737331390381,grad_norm: 0.7287520857808993, iteration: 357454
loss: 0.9804020524024963,grad_norm: 0.9999993387029913, iteration: 357455
loss: 1.016826868057251,grad_norm: 0.9999990645001519, iteration: 357456
loss: 0.9814443588256836,grad_norm: 0.7217571211631818, iteration: 357457
loss: 0.9984572529792786,grad_norm: 0.99999912693714, iteration: 357458
loss: 1.0454148054122925,grad_norm: 0.9670634877218045, iteration: 357459
loss: 0.985735297203064,grad_norm: 0.9999989461011338, iteration: 357460
loss: 0.9780341386795044,grad_norm: 0.6736196651928813, iteration: 357461
loss: 1.046061635017395,grad_norm: 0.9999990758500779, iteration: 357462
loss: 1.0314242839813232,grad_norm: 0.999999736899445, iteration: 357463
loss: 1.029127836227417,grad_norm: 0.8687452746698824, iteration: 357464
loss: 0.9871955513954163,grad_norm: 0.9999993905314344, iteration: 357465
loss: 1.0149952173233032,grad_norm: 0.999999311569892, iteration: 357466
loss: 1.0058153867721558,grad_norm: 0.9999998011594694, iteration: 357467
loss: 0.9589591026306152,grad_norm: 0.8723002558062395, iteration: 357468
loss: 0.9808226227760315,grad_norm: 0.883931277648241, iteration: 357469
loss: 1.0081099271774292,grad_norm: 0.6954755999186167, iteration: 357470
loss: 1.005702257156372,grad_norm: 0.8970090249074657, iteration: 357471
loss: 1.0253841876983643,grad_norm: 0.834256057911486, iteration: 357472
loss: 1.055375099182129,grad_norm: 0.9999995689237647, iteration: 357473
loss: 1.0511773824691772,grad_norm: 0.7899804517303375, iteration: 357474
loss: 0.9874569773674011,grad_norm: 0.9137419075645964, iteration: 357475
loss: 1.0116153955459595,grad_norm: 0.9999992944639922, iteration: 357476
loss: 1.0076768398284912,grad_norm: 0.8438804107268055, iteration: 357477
loss: 1.0219106674194336,grad_norm: 0.9748122590403183, iteration: 357478
loss: 0.9945976734161377,grad_norm: 0.895392929213923, iteration: 357479
loss: 1.013836145401001,grad_norm: 0.9085639201911369, iteration: 357480
loss: 0.9908796548843384,grad_norm: 0.737875375474476, iteration: 357481
loss: 0.976721465587616,grad_norm: 0.8142522635271358, iteration: 357482
loss: 1.0347634553909302,grad_norm: 0.9252902147859623, iteration: 357483
loss: 1.036115288734436,grad_norm: 0.9999997345457105, iteration: 357484
loss: 1.0468040704727173,grad_norm: 0.99999932516434, iteration: 357485
loss: 0.9931027293205261,grad_norm: 0.7674193057866252, iteration: 357486
loss: 0.9564835429191589,grad_norm: 0.8081678260112546, iteration: 357487
loss: 1.0221664905548096,grad_norm: 0.9999996033228877, iteration: 357488
loss: 0.9990995526313782,grad_norm: 0.9765769807178349, iteration: 357489
loss: 1.0768227577209473,grad_norm: 0.9622515968071592, iteration: 357490
loss: 0.9568129777908325,grad_norm: 0.8309134678205037, iteration: 357491
loss: 1.1694480180740356,grad_norm: 0.9999995412018241, iteration: 357492
loss: 1.0147297382354736,grad_norm: 0.8453861224724679, iteration: 357493
loss: 1.037215232849121,grad_norm: 0.8061450079507714, iteration: 357494
loss: 0.9711654782295227,grad_norm: 0.7599763634484463, iteration: 357495
loss: 0.9678839445114136,grad_norm: 0.9999992897685352, iteration: 357496
loss: 0.9763436913490295,grad_norm: 0.9995204133482927, iteration: 357497
loss: 0.9984590411186218,grad_norm: 0.8967050207299377, iteration: 357498
loss: 1.0637835264205933,grad_norm: 1.0000000299211858, iteration: 357499
loss: 1.0442678928375244,grad_norm: 0.9999995620861786, iteration: 357500
loss: 1.02106511592865,grad_norm: 0.878350518865329, iteration: 357501
loss: 0.994201123714447,grad_norm: 0.9999990693816798, iteration: 357502
loss: 0.964708149433136,grad_norm: 0.9490553176673843, iteration: 357503
loss: 0.9803566336631775,grad_norm: 0.7511132110723331, iteration: 357504
loss: 1.0317738056182861,grad_norm: 0.782605711606384, iteration: 357505
loss: 1.080663800239563,grad_norm: 0.8333365511904572, iteration: 357506
loss: 0.9509028792381287,grad_norm: 0.7793561181689859, iteration: 357507
loss: 0.9931655526161194,grad_norm: 0.7362830435617711, iteration: 357508
loss: 1.07399582862854,grad_norm: 0.9999999836520117, iteration: 357509
loss: 1.0062580108642578,grad_norm: 0.9999149341152646, iteration: 357510
loss: 1.0247825384140015,grad_norm: 0.8470833847736582, iteration: 357511
loss: 0.9904962182044983,grad_norm: 0.7598601452244613, iteration: 357512
loss: 1.0564948320388794,grad_norm: 0.9999995503679153, iteration: 357513
loss: 0.9727686047554016,grad_norm: 0.8644235317968432, iteration: 357514
loss: 0.987689197063446,grad_norm: 0.8179125484224009, iteration: 357515
loss: 0.9841052293777466,grad_norm: 0.812040485361073, iteration: 357516
loss: 1.0396883487701416,grad_norm: 0.9999996649786534, iteration: 357517
loss: 1.0409070253372192,grad_norm: 0.7683755109305096, iteration: 357518
loss: 1.06668221950531,grad_norm: 0.7011981248784873, iteration: 357519
loss: 0.9789608716964722,grad_norm: 0.8237452096542183, iteration: 357520
loss: 1.0074784755706787,grad_norm: 0.8187716835177887, iteration: 357521
loss: 1.0266779661178589,grad_norm: 0.8981002316921918, iteration: 357522
loss: 0.9669772386550903,grad_norm: 0.7703113457064092, iteration: 357523
loss: 0.9818533658981323,grad_norm: 0.8206706580812061, iteration: 357524
loss: 1.1507675647735596,grad_norm: 0.9999994218692091, iteration: 357525
loss: 1.0215171575546265,grad_norm: 0.785784344332232, iteration: 357526
loss: 0.9737211465835571,grad_norm: 0.7045746700531358, iteration: 357527
loss: 1.1445435285568237,grad_norm: 0.8812925457548659, iteration: 357528
loss: 1.0400859117507935,grad_norm: 0.9763001255516495, iteration: 357529
loss: 1.1739552021026611,grad_norm: 1.0000000403653015, iteration: 357530
loss: 1.1209176778793335,grad_norm: 0.9999994723205784, iteration: 357531
loss: 1.0290597677230835,grad_norm: 0.8263432530423298, iteration: 357532
loss: 1.0375498533248901,grad_norm: 0.98603316718085, iteration: 357533
loss: 1.0571231842041016,grad_norm: 0.7355070111006567, iteration: 357534
loss: 0.9902613162994385,grad_norm: 0.9199112873315948, iteration: 357535
loss: 1.0294913053512573,grad_norm: 0.999999291808396, iteration: 357536
loss: 1.0099151134490967,grad_norm: 0.7600970226263442, iteration: 357537
loss: 1.0435129404067993,grad_norm: 0.8595473800767673, iteration: 357538
loss: 0.998980700969696,grad_norm: 0.9999991386132481, iteration: 357539
loss: 1.0300791263580322,grad_norm: 0.999999603624107, iteration: 357540
loss: 1.0895674228668213,grad_norm: 0.7950644632441908, iteration: 357541
loss: 1.0168507099151611,grad_norm: 0.7164184608556908, iteration: 357542
loss: 1.003481149673462,grad_norm: 0.9268972815591876, iteration: 357543
loss: 1.0169103145599365,grad_norm: 0.9776667781878611, iteration: 357544
loss: 1.0287202596664429,grad_norm: 0.9999990726105172, iteration: 357545
loss: 0.9946042895317078,grad_norm: 0.9148125202923824, iteration: 357546
loss: 0.9902064204216003,grad_norm: 0.8076094498893482, iteration: 357547
loss: 1.0529028177261353,grad_norm: 0.9999994499651111, iteration: 357548
loss: 1.0920697450637817,grad_norm: 0.8091936552441212, iteration: 357549
loss: 0.9873751401901245,grad_norm: 0.94202102917284, iteration: 357550
loss: 0.9727450013160706,grad_norm: 0.7724056461179025, iteration: 357551
loss: 0.9934221506118774,grad_norm: 0.7436888626540609, iteration: 357552
loss: 0.99680495262146,grad_norm: 0.8382793857398378, iteration: 357553
loss: 1.011928677558899,grad_norm: 0.8753787432968932, iteration: 357554
loss: 0.9791409969329834,grad_norm: 0.6279774679197472, iteration: 357555
loss: 1.0100008249282837,grad_norm: 0.9273180722235739, iteration: 357556
loss: 1.0278682708740234,grad_norm: 0.861966207179843, iteration: 357557
loss: 0.9691845178604126,grad_norm: 0.8724990530995137, iteration: 357558
loss: 0.9714082479476929,grad_norm: 0.9200216890393745, iteration: 357559
loss: 1.0353600978851318,grad_norm: 0.7692988727282764, iteration: 357560
loss: 1.028726577758789,grad_norm: 0.9481794343340707, iteration: 357561
loss: 0.9927114248275757,grad_norm: 0.961903909776959, iteration: 357562
loss: 1.004403829574585,grad_norm: 0.8566641964472694, iteration: 357563
loss: 1.071394920349121,grad_norm: 0.8473278882204548, iteration: 357564
loss: 1.0763218402862549,grad_norm: 0.8442059549317924, iteration: 357565
loss: 0.9882418513298035,grad_norm: 0.7961791530913, iteration: 357566
loss: 0.993864119052887,grad_norm: 0.8617369228640936, iteration: 357567
loss: 1.022567629814148,grad_norm: 0.7452774889715275, iteration: 357568
loss: 1.0093841552734375,grad_norm: 0.7898162627135763, iteration: 357569
loss: 0.9889965653419495,grad_norm: 0.8051035941443817, iteration: 357570
loss: 0.9999427795410156,grad_norm: 0.9220809055329707, iteration: 357571
loss: 0.9895829558372498,grad_norm: 0.8422937616591086, iteration: 357572
loss: 1.0085898637771606,grad_norm: 0.7750939289786412, iteration: 357573
loss: 1.0116950273513794,grad_norm: 0.649086443039539, iteration: 357574
loss: 0.9914553165435791,grad_norm: 0.9665021734456534, iteration: 357575
loss: 0.9854671955108643,grad_norm: 0.823516891568791, iteration: 357576
loss: 0.9660868048667908,grad_norm: 0.941068499187377, iteration: 357577
loss: 0.978505551815033,grad_norm: 0.8516412332744459, iteration: 357578
loss: 1.0054627656936646,grad_norm: 0.9849629190485057, iteration: 357579
loss: 0.9904336929321289,grad_norm: 0.8767783824529101, iteration: 357580
loss: 1.0373172760009766,grad_norm: 0.9999990313932609, iteration: 357581
loss: 1.0080163478851318,grad_norm: 0.8288784815530112, iteration: 357582
loss: 0.9840038418769836,grad_norm: 0.8346624420448369, iteration: 357583
loss: 0.9917576909065247,grad_norm: 0.7163048721631624, iteration: 357584
loss: 0.9860180020332336,grad_norm: 0.7947565892237138, iteration: 357585
loss: 1.036160945892334,grad_norm: 0.8099183855140701, iteration: 357586
loss: 1.050915002822876,grad_norm: 0.7513461563370797, iteration: 357587
loss: 0.9615775346755981,grad_norm: 0.6928126858250753, iteration: 357588
loss: 0.9595001935958862,grad_norm: 0.9389682868356465, iteration: 357589
loss: 0.9355181455612183,grad_norm: 0.8256153864130303, iteration: 357590
loss: 0.9993994235992432,grad_norm: 0.9190897291624093, iteration: 357591
loss: 1.0217536687850952,grad_norm: 0.9999997204924247, iteration: 357592
loss: 1.0291447639465332,grad_norm: 0.9999993427677965, iteration: 357593
loss: 0.9755836725234985,grad_norm: 0.8334133617222536, iteration: 357594
loss: 0.9386768341064453,grad_norm: 0.918324258984244, iteration: 357595
loss: 0.9955921173095703,grad_norm: 0.9190036287990857, iteration: 357596
loss: 1.030643343925476,grad_norm: 0.9999990118349112, iteration: 357597
loss: 0.9975607991218567,grad_norm: 0.7850479640862189, iteration: 357598
loss: 0.9988987445831299,grad_norm: 0.8343008652785623, iteration: 357599
loss: 1.0630425214767456,grad_norm: 0.8134156146661893, iteration: 357600
loss: 1.0141258239746094,grad_norm: 0.8278732123087436, iteration: 357601
loss: 1.0600308179855347,grad_norm: 0.9999991479468443, iteration: 357602
loss: 1.0105490684509277,grad_norm: 0.8767688866983138, iteration: 357603
loss: 1.0067148208618164,grad_norm: 0.6775053688920866, iteration: 357604
loss: 1.0247972011566162,grad_norm: 0.9454415741036615, iteration: 357605
loss: 1.0232172012329102,grad_norm: 0.8183919055910687, iteration: 357606
loss: 1.0527466535568237,grad_norm: 0.8314474701872353, iteration: 357607
loss: 0.9454736709594727,grad_norm: 0.8112930573877806, iteration: 357608
loss: 1.0128635168075562,grad_norm: 0.7072496403886972, iteration: 357609
loss: 1.014669418334961,grad_norm: 0.8983681185433448, iteration: 357610
loss: 1.0222173929214478,grad_norm: 0.9245871240476358, iteration: 357611
loss: 0.9677233099937439,grad_norm: 0.766303893392438, iteration: 357612
loss: 1.0109232664108276,grad_norm: 0.8881657632054685, iteration: 357613
loss: 0.9796140789985657,grad_norm: 0.8713834116193635, iteration: 357614
loss: 1.0303752422332764,grad_norm: 0.5788931624923305, iteration: 357615
loss: 1.0269712209701538,grad_norm: 0.9999992590517838, iteration: 357616
loss: 1.0154047012329102,grad_norm: 0.826616998750513, iteration: 357617
loss: 0.9993784427642822,grad_norm: 0.8306061544681345, iteration: 357618
loss: 1.00416898727417,grad_norm: 0.7400216406275487, iteration: 357619
loss: 1.002404808998108,grad_norm: 0.9719596010308604, iteration: 357620
loss: 1.0007075071334839,grad_norm: 0.801215327349718, iteration: 357621
loss: 0.9868358969688416,grad_norm: 0.8714870111332067, iteration: 357622
loss: 1.0851714611053467,grad_norm: 0.9439042082336884, iteration: 357623
loss: 1.017379641532898,grad_norm: 0.8276278342169119, iteration: 357624
loss: 1.03388512134552,grad_norm: 0.8273150548496507, iteration: 357625
loss: 1.0584824085235596,grad_norm: 0.827411976647625, iteration: 357626
loss: 0.9997515082359314,grad_norm: 0.8127367921576006, iteration: 357627
loss: 1.0190904140472412,grad_norm: 0.9999992488786681, iteration: 357628
loss: 0.9856580495834351,grad_norm: 0.8377681260877572, iteration: 357629
loss: 1.0166679620742798,grad_norm: 0.8917154451721456, iteration: 357630
loss: 0.9932522773742676,grad_norm: 0.9677402234548538, iteration: 357631
loss: 0.9751931428909302,grad_norm: 0.7402698811873559, iteration: 357632
loss: 1.0049036741256714,grad_norm: 0.7146130667062298, iteration: 357633
loss: 1.04214608669281,grad_norm: 0.8946290205528405, iteration: 357634
loss: 1.0109663009643555,grad_norm: 0.8822220457650147, iteration: 357635
loss: 0.962463915348053,grad_norm: 0.7811323531244164, iteration: 357636
loss: 1.0242100954055786,grad_norm: 0.9119925137868777, iteration: 357637
loss: 1.016192078590393,grad_norm: 0.7709043072234982, iteration: 357638
loss: 1.0559860467910767,grad_norm: 0.9999990892712531, iteration: 357639
loss: 1.0579625368118286,grad_norm: 0.9212514016204608, iteration: 357640
loss: 1.0402560234069824,grad_norm: 0.8398132947808671, iteration: 357641
loss: 1.0191116333007812,grad_norm: 0.8034073929528611, iteration: 357642
loss: 1.0315275192260742,grad_norm: 0.7792516124754051, iteration: 357643
loss: 1.1442183256149292,grad_norm: 0.999999167920551, iteration: 357644
loss: 0.9658716320991516,grad_norm: 0.884948351192252, iteration: 357645
loss: 0.9869032502174377,grad_norm: 0.7820221567686564, iteration: 357646
loss: 1.0147749185562134,grad_norm: 0.7592606057604001, iteration: 357647
loss: 1.040044903755188,grad_norm: 0.8599382161453634, iteration: 357648
loss: 1.0142388343811035,grad_norm: 0.8284849108554347, iteration: 357649
loss: 0.9971110820770264,grad_norm: 0.9999992484328336, iteration: 357650
loss: 1.0272388458251953,grad_norm: 0.8315073000866375, iteration: 357651
loss: 1.0152437686920166,grad_norm: 0.9999998901254261, iteration: 357652
loss: 0.984631359577179,grad_norm: 0.7986379400317593, iteration: 357653
loss: 1.126127004623413,grad_norm: 0.8240812428683337, iteration: 357654
loss: 1.0268630981445312,grad_norm: 0.8684851284762231, iteration: 357655
loss: 0.9982708692550659,grad_norm: 0.9999990753675739, iteration: 357656
loss: 0.9877172112464905,grad_norm: 0.9422235067752911, iteration: 357657
loss: 0.9941881895065308,grad_norm: 0.9999992095491032, iteration: 357658
loss: 1.0326814651489258,grad_norm: 0.8967859170629142, iteration: 357659
loss: 1.0238616466522217,grad_norm: 0.8807067563017076, iteration: 357660
loss: 0.9786001443862915,grad_norm: 0.8331617716850169, iteration: 357661
loss: 1.0178804397583008,grad_norm: 0.8776241748505962, iteration: 357662
loss: 1.0652985572814941,grad_norm: 0.9999996755067337, iteration: 357663
loss: 1.0181792974472046,grad_norm: 0.7911654334625401, iteration: 357664
loss: 1.0229016542434692,grad_norm: 0.8848662729429031, iteration: 357665
loss: 0.9883522391319275,grad_norm: 0.7717815326021656, iteration: 357666
loss: 0.9839876294136047,grad_norm: 0.7995462471224268, iteration: 357667
loss: 1.022100567817688,grad_norm: 0.7184737360873841, iteration: 357668
loss: 0.9659202694892883,grad_norm: 0.8944158336404119, iteration: 357669
loss: 1.011505126953125,grad_norm: 0.7826699335011436, iteration: 357670
loss: 1.0246270895004272,grad_norm: 0.8128853945159812, iteration: 357671
loss: 1.110114574432373,grad_norm: 0.9999990336298147, iteration: 357672
loss: 0.9928857088088989,grad_norm: 0.8978564848976442, iteration: 357673
loss: 0.9789831638336182,grad_norm: 0.9175322243859894, iteration: 357674
loss: 1.0312141180038452,grad_norm: 0.8563904869048576, iteration: 357675
loss: 1.044179916381836,grad_norm: 0.739059645746704, iteration: 357676
loss: 0.9872591495513916,grad_norm: 0.7828143249646015, iteration: 357677
loss: 1.0374243259429932,grad_norm: 0.9999998247323185, iteration: 357678
loss: 1.0544166564941406,grad_norm: 0.7433975247035381, iteration: 357679
loss: 0.9978936910629272,grad_norm: 0.8640953220546133, iteration: 357680
loss: 1.0126689672470093,grad_norm: 0.9184826366825718, iteration: 357681
loss: 1.1240110397338867,grad_norm: 0.9999995354574533, iteration: 357682
loss: 1.0043847560882568,grad_norm: 0.7783351574544771, iteration: 357683
loss: 1.0179725885391235,grad_norm: 0.7651755881986922, iteration: 357684
loss: 0.997288703918457,grad_norm: 0.8605904810046456, iteration: 357685
loss: 0.9884375333786011,grad_norm: 0.9429684330330519, iteration: 357686
loss: 1.0294147729873657,grad_norm: 0.8762900150341817, iteration: 357687
loss: 1.0855845212936401,grad_norm: 0.8457033417194203, iteration: 357688
loss: 0.9853872060775757,grad_norm: 0.9323242365483343, iteration: 357689
loss: 1.0326627492904663,grad_norm: 0.8901321588688897, iteration: 357690
loss: 1.0403016805648804,grad_norm: 0.6770440900415947, iteration: 357691
loss: 1.011014699935913,grad_norm: 0.7723919128317897, iteration: 357692
loss: 1.0057497024536133,grad_norm: 0.9275330914331635, iteration: 357693
loss: 1.024403691291809,grad_norm: 0.8264293583719124, iteration: 357694
loss: 1.1357977390289307,grad_norm: 0.9999992110373186, iteration: 357695
loss: 1.010101318359375,grad_norm: 0.72380599530163, iteration: 357696
loss: 0.9878580570220947,grad_norm: 0.8268343310649413, iteration: 357697
loss: 1.1400192975997925,grad_norm: 0.9999992771756171, iteration: 357698
loss: 1.0253095626831055,grad_norm: 0.7584614577272664, iteration: 357699
loss: 1.0472776889801025,grad_norm: 0.8411289333390872, iteration: 357700
loss: 1.0030102729797363,grad_norm: 0.8776816497091209, iteration: 357701
loss: 1.0428032875061035,grad_norm: 0.9999989003219539, iteration: 357702
loss: 1.0070927143096924,grad_norm: 0.9999998503876646, iteration: 357703
loss: 1.1315562725067139,grad_norm: 0.9999992301536724, iteration: 357704
loss: 1.0870349407196045,grad_norm: 0.9999993509903281, iteration: 357705
loss: 1.015699863433838,grad_norm: 0.7561261139574575, iteration: 357706
loss: 1.0227032899856567,grad_norm: 0.8398039111317741, iteration: 357707
loss: 0.9600859880447388,grad_norm: 0.9162778084252341, iteration: 357708
loss: 1.0486871004104614,grad_norm: 0.8577427015108797, iteration: 357709
loss: 1.1410586833953857,grad_norm: 0.9999996473649845, iteration: 357710
loss: 0.9641565084457397,grad_norm: 0.7772067603964298, iteration: 357711
loss: 1.0065925121307373,grad_norm: 0.8432004137550257, iteration: 357712
loss: 1.16130793094635,grad_norm: 0.9999997156514022, iteration: 357713
loss: 0.9893134236335754,grad_norm: 0.7816025914622415, iteration: 357714
loss: 1.0480952262878418,grad_norm: 0.7978232388369663, iteration: 357715
loss: 0.9744846820831299,grad_norm: 0.7582274158471256, iteration: 357716
loss: 0.9724925756454468,grad_norm: 0.9999995189648684, iteration: 357717
loss: 1.0150867700576782,grad_norm: 0.7918811240619366, iteration: 357718
loss: 1.0255296230316162,grad_norm: 0.7661876368965909, iteration: 357719
loss: 1.089980125427246,grad_norm: 0.8182900127003461, iteration: 357720
loss: 1.01566743850708,grad_norm: 0.9999993486986847, iteration: 357721
loss: 1.0001329183578491,grad_norm: 0.6082098522755524, iteration: 357722
loss: 0.9705502390861511,grad_norm: 0.9999996647768504, iteration: 357723
loss: 0.9641503095626831,grad_norm: 0.8497467394287289, iteration: 357724
loss: 1.0236843824386597,grad_norm: 0.9215320461197536, iteration: 357725
loss: 1.059871792793274,grad_norm: 0.9999997369418296, iteration: 357726
loss: 0.995036244392395,grad_norm: 0.9451700214028802, iteration: 357727
loss: 1.0371845960617065,grad_norm: 0.9999992655758153, iteration: 357728
loss: 0.9993442893028259,grad_norm: 0.9999999149499946, iteration: 357729
loss: 1.0514830350875854,grad_norm: 0.9999989582251003, iteration: 357730
loss: 1.1503382921218872,grad_norm: 0.9999995667214555, iteration: 357731
loss: 1.02899968624115,grad_norm: 0.8037952434206674, iteration: 357732
loss: 1.0465327501296997,grad_norm: 0.9999996770677192, iteration: 357733
loss: 1.0821439027786255,grad_norm: 0.9999999297036031, iteration: 357734
loss: 0.9682225584983826,grad_norm: 0.9403025920023634, iteration: 357735
loss: 1.175903558731079,grad_norm: 0.9999997237749029, iteration: 357736
loss: 1.041856288909912,grad_norm: 0.9838906660655738, iteration: 357737
loss: 1.0191032886505127,grad_norm: 0.8652402136095508, iteration: 357738
loss: 1.2483960390090942,grad_norm: 0.999999857815598, iteration: 357739
loss: 0.9952899813652039,grad_norm: 0.7784235526112274, iteration: 357740
loss: 0.9987044334411621,grad_norm: 0.9009118529773339, iteration: 357741
loss: 1.100827932357788,grad_norm: 0.9999990934776191, iteration: 357742
loss: 1.1372591257095337,grad_norm: 0.9999994490987655, iteration: 357743
loss: 1.1267262697219849,grad_norm: 0.9999993234386604, iteration: 357744
loss: 1.1221675872802734,grad_norm: 0.9999999087185751, iteration: 357745
loss: 1.2643580436706543,grad_norm: 0.999999939635781, iteration: 357746
loss: 0.9935246706008911,grad_norm: 0.9316738428948877, iteration: 357747
loss: 1.1267194747924805,grad_norm: 0.9999997341615551, iteration: 357748
loss: 1.034906029701233,grad_norm: 0.9999996341088462, iteration: 357749
loss: 1.193847417831421,grad_norm: 0.9999997672284319, iteration: 357750
loss: 1.0169621706008911,grad_norm: 0.9232114665182237, iteration: 357751
loss: 1.0966224670410156,grad_norm: 0.99999910825405, iteration: 357752
loss: 1.0103121995925903,grad_norm: 0.9013845442120931, iteration: 357753
loss: 1.035891056060791,grad_norm: 0.9999997972158569, iteration: 357754
loss: 1.0755763053894043,grad_norm: 0.999999239302032, iteration: 357755
loss: 1.0385916233062744,grad_norm: 0.9999995399304392, iteration: 357756
loss: 1.0006500482559204,grad_norm: 0.8133790212883758, iteration: 357757
loss: 1.062929630279541,grad_norm: 0.838620256331502, iteration: 357758
loss: 1.0073586702346802,grad_norm: 0.8354508965993862, iteration: 357759
loss: 0.9714888334274292,grad_norm: 0.848919136832282, iteration: 357760
loss: 0.9678061008453369,grad_norm: 0.9999998413716448, iteration: 357761
loss: 1.2524362802505493,grad_norm: 0.9999998931398647, iteration: 357762
loss: 1.1282050609588623,grad_norm: 0.9999999582789623, iteration: 357763
loss: 1.1208584308624268,grad_norm: 0.9999993910669134, iteration: 357764
loss: 1.134101390838623,grad_norm: 0.9999997878205398, iteration: 357765
loss: 0.9933024048805237,grad_norm: 0.9342277527737515, iteration: 357766
loss: 1.0595391988754272,grad_norm: 0.9999998055373537, iteration: 357767
loss: 1.1086788177490234,grad_norm: 0.9999991240520296, iteration: 357768
loss: 1.0337274074554443,grad_norm: 0.707640482754839, iteration: 357769
loss: 0.9905238151550293,grad_norm: 0.8520602306545626, iteration: 357770
loss: 1.0159708261489868,grad_norm: 0.8029187645744561, iteration: 357771
loss: 0.9492482542991638,grad_norm: 0.999999077785036, iteration: 357772
loss: 1.0113041400909424,grad_norm: 0.8781485837977951, iteration: 357773
loss: 1.0516124963760376,grad_norm: 0.999999134154499, iteration: 357774
loss: 1.0343984365463257,grad_norm: 0.9999994370043878, iteration: 357775
loss: 1.0023400783538818,grad_norm: 0.9999991858283814, iteration: 357776
loss: 1.094376564025879,grad_norm: 0.9999998107563807, iteration: 357777
loss: 1.0165270566940308,grad_norm: 0.9999990576684665, iteration: 357778
loss: 1.0336172580718994,grad_norm: 0.9999991818636, iteration: 357779
loss: 0.9938997030258179,grad_norm: 0.8204796572205842, iteration: 357780
loss: 1.0115580558776855,grad_norm: 0.8525604791618395, iteration: 357781
loss: 1.0312734842300415,grad_norm: 0.7559525641587441, iteration: 357782
loss: 1.0086709260940552,grad_norm: 0.9999996105702339, iteration: 357783
loss: 1.027754783630371,grad_norm: 0.7294050799188609, iteration: 357784
loss: 0.9994559288024902,grad_norm: 0.9032822340367777, iteration: 357785
loss: 1.020305871963501,grad_norm: 0.6509906759078046, iteration: 357786
loss: 0.9690336585044861,grad_norm: 0.8897697015807444, iteration: 357787
loss: 1.0078133344650269,grad_norm: 0.9002106222899501, iteration: 357788
loss: 0.9924885034561157,grad_norm: 0.9999992609305798, iteration: 357789
loss: 0.9998852014541626,grad_norm: 0.7994914137266614, iteration: 357790
loss: 1.0198553800582886,grad_norm: 0.9999992038095846, iteration: 357791
loss: 0.9944478869438171,grad_norm: 0.6638829595762918, iteration: 357792
loss: 1.0436149835586548,grad_norm: 0.9999999487225024, iteration: 357793
loss: 1.1014716625213623,grad_norm: 0.9999990821345012, iteration: 357794
loss: 1.001876711845398,grad_norm: 0.8291013124013819, iteration: 357795
loss: 1.0827322006225586,grad_norm: 0.9999997345323715, iteration: 357796
loss: 1.0403944253921509,grad_norm: 0.9999994575330031, iteration: 357797
loss: 1.0260095596313477,grad_norm: 0.821488474754438, iteration: 357798
loss: 1.1268001794815063,grad_norm: 0.9999990347713599, iteration: 357799
loss: 1.0942647457122803,grad_norm: 0.999999022989736, iteration: 357800
loss: 1.0739825963974,grad_norm: 0.999999267800288, iteration: 357801
loss: 1.074981927871704,grad_norm: 0.8904835765443138, iteration: 357802
loss: 1.0587118864059448,grad_norm: 0.9999997019361289, iteration: 357803
loss: 1.1102246046066284,grad_norm: 0.9999997241687358, iteration: 357804
loss: 0.9979857802391052,grad_norm: 1.0000000042894064, iteration: 357805
loss: 1.0881093740463257,grad_norm: 0.9999991369269929, iteration: 357806
loss: 1.0695010423660278,grad_norm: 0.9459631479424028, iteration: 357807
loss: 1.0645991563796997,grad_norm: 0.9999992361342396, iteration: 357808
loss: 0.9923360347747803,grad_norm: 0.9999991850024508, iteration: 357809
loss: 1.2221062183380127,grad_norm: 0.9999993859611743, iteration: 357810
loss: 1.090470552444458,grad_norm: 0.9999993184066417, iteration: 357811
loss: 1.0274803638458252,grad_norm: 0.7486144226384476, iteration: 357812
loss: 1.0075128078460693,grad_norm: 0.8041677249248438, iteration: 357813
loss: 1.0822738409042358,grad_norm: 0.9999997795650897, iteration: 357814
loss: 1.002272129058838,grad_norm: 0.8444087445012922, iteration: 357815
loss: 0.9973616600036621,grad_norm: 0.991664505797039, iteration: 357816
loss: 1.1016649007797241,grad_norm: 0.9999990478389968, iteration: 357817
loss: 0.998192310333252,grad_norm: 0.7851893744554163, iteration: 357818
loss: 1.0870989561080933,grad_norm: 0.8700703429951446, iteration: 357819
loss: 1.038217306137085,grad_norm: 0.885419111016409, iteration: 357820
loss: 1.1609450578689575,grad_norm: 0.9999996176830883, iteration: 357821
loss: 1.0553438663482666,grad_norm: 0.9999995591538409, iteration: 357822
loss: 1.0170737504959106,grad_norm: 0.7815678035440567, iteration: 357823
loss: 1.00342857837677,grad_norm: 0.7954628485614614, iteration: 357824
loss: 0.99042147397995,grad_norm: 0.9999991344961846, iteration: 357825
loss: 1.0106525421142578,grad_norm: 0.986513929056869, iteration: 357826
loss: 1.052322268486023,grad_norm: 0.9999994649256261, iteration: 357827
loss: 1.1641288995742798,grad_norm: 0.9999999298095352, iteration: 357828
loss: 1.0035147666931152,grad_norm: 0.7585430451774485, iteration: 357829
loss: 1.0363914966583252,grad_norm: 0.8217908950741407, iteration: 357830
loss: 1.0127606391906738,grad_norm: 0.7817747166172755, iteration: 357831
loss: 1.0121337175369263,grad_norm: 0.9999993705232141, iteration: 357832
loss: 1.0069236755371094,grad_norm: 0.96750525813075, iteration: 357833
loss: 1.0211185216903687,grad_norm: 0.7709245117799404, iteration: 357834
loss: 1.0140286684036255,grad_norm: 0.9999994741392313, iteration: 357835
loss: 1.0143771171569824,grad_norm: 0.8257273891599163, iteration: 357836
loss: 1.0312122106552124,grad_norm: 0.8285504353936944, iteration: 357837
loss: 1.0024775266647339,grad_norm: 0.8642849785207103, iteration: 357838
loss: 1.020140290260315,grad_norm: 0.8004957347586985, iteration: 357839
loss: 1.0927221775054932,grad_norm: 0.9999990773078069, iteration: 357840
loss: 1.040446400642395,grad_norm: 0.7582879148715139, iteration: 357841
loss: 1.0545241832733154,grad_norm: 0.9999997980331703, iteration: 357842
loss: 1.0840798616409302,grad_norm: 0.9999991286913474, iteration: 357843
loss: 1.0145478248596191,grad_norm: 0.904682634761459, iteration: 357844
loss: 1.0694329738616943,grad_norm: 0.9999990955612037, iteration: 357845
loss: 1.0517089366912842,grad_norm: 0.8842141380393982, iteration: 357846
loss: 1.0420397520065308,grad_norm: 0.9770783055982879, iteration: 357847
loss: 1.0131475925445557,grad_norm: 0.8604592788675913, iteration: 357848
loss: 0.9885480999946594,grad_norm: 0.8025579386269995, iteration: 357849
loss: 1.0796089172363281,grad_norm: 0.9999990735370599, iteration: 357850
loss: 0.9962037801742554,grad_norm: 0.7288146544579258, iteration: 357851
loss: 1.0161556005477905,grad_norm: 0.8992961344826961, iteration: 357852
loss: 1.0207314491271973,grad_norm: 0.830618641756299, iteration: 357853
loss: 0.9988368153572083,grad_norm: 0.8727142646412839, iteration: 357854
loss: 1.0756877660751343,grad_norm: 0.9999998469030454, iteration: 357855
loss: 1.0206574201583862,grad_norm: 0.8987289204518527, iteration: 357856
loss: 1.0152711868286133,grad_norm: 0.8006017703905469, iteration: 357857
loss: 1.0186829566955566,grad_norm: 0.7466476683155744, iteration: 357858
loss: 1.0200837850570679,grad_norm: 0.9718811443979262, iteration: 357859
loss: 1.1740167140960693,grad_norm: 0.9999994281423557, iteration: 357860
loss: 1.141898274421692,grad_norm: 0.9999999877711704, iteration: 357861
loss: 0.9937390089035034,grad_norm: 0.9882758697024165, iteration: 357862
loss: 1.024540901184082,grad_norm: 0.822840193563758, iteration: 357863
loss: 1.0309133529663086,grad_norm: 0.866621404130071, iteration: 357864
loss: 1.0323823690414429,grad_norm: 0.9999990155615716, iteration: 357865
loss: 1.0176873207092285,grad_norm: 0.8047513984401892, iteration: 357866
loss: 1.0204986333847046,grad_norm: 0.8846616878536177, iteration: 357867
loss: 0.993831992149353,grad_norm: 0.8168648802074269, iteration: 357868
loss: 1.0631041526794434,grad_norm: 0.8098726119889594, iteration: 357869
loss: 1.061874508857727,grad_norm: 0.9999998507253863, iteration: 357870
loss: 0.9935398101806641,grad_norm: 0.9312713776060638, iteration: 357871
loss: 1.0288530588150024,grad_norm: 0.8255697365880909, iteration: 357872
loss: 1.0468727350234985,grad_norm: 0.9999992460506868, iteration: 357873
loss: 1.0447087287902832,grad_norm: 0.726221532538682, iteration: 357874
loss: 1.130260944366455,grad_norm: 0.9999998322448146, iteration: 357875
loss: 0.9623814225196838,grad_norm: 0.9491308103464766, iteration: 357876
loss: 0.9920722842216492,grad_norm: 0.8274578462460621, iteration: 357877
loss: 1.016033411026001,grad_norm: 0.8067173054610792, iteration: 357878
loss: 1.0210649967193604,grad_norm: 0.8828254751311555, iteration: 357879
loss: 1.0047857761383057,grad_norm: 0.7713801076833923, iteration: 357880
loss: 1.0284446477890015,grad_norm: 0.9999995177041683, iteration: 357881
loss: 1.0645469427108765,grad_norm: 0.9999995958053919, iteration: 357882
loss: 0.9796233177185059,grad_norm: 0.7263995563225882, iteration: 357883
loss: 1.1108224391937256,grad_norm: 0.7579994447574878, iteration: 357884
loss: 1.0419344902038574,grad_norm: 0.9007293154521632, iteration: 357885
loss: 1.069226861000061,grad_norm: 0.9191046445098071, iteration: 357886
loss: 0.9856359362602234,grad_norm: 0.7632728449618849, iteration: 357887
loss: 1.019490361213684,grad_norm: 0.728147748693643, iteration: 357888
loss: 0.99163818359375,grad_norm: 0.9999989945148878, iteration: 357889
loss: 1.0508215427398682,grad_norm: 0.9999992376156968, iteration: 357890
loss: 1.0184299945831299,grad_norm: 0.9398529785239015, iteration: 357891
loss: 1.0301837921142578,grad_norm: 0.9999999913731719, iteration: 357892
loss: 1.0263413190841675,grad_norm: 0.9013273225021637, iteration: 357893
loss: 0.9774888157844543,grad_norm: 0.7971298638589548, iteration: 357894
loss: 1.006165623664856,grad_norm: 0.8090335285664284, iteration: 357895
loss: 1.0061639547348022,grad_norm: 0.9999992002943262, iteration: 357896
loss: 1.026969313621521,grad_norm: 0.9451668823964413, iteration: 357897
loss: 1.0095460414886475,grad_norm: 0.9999991688540343, iteration: 357898
loss: 1.0276215076446533,grad_norm: 0.8003882050222352, iteration: 357899
loss: 1.01639986038208,grad_norm: 0.9999990475269392, iteration: 357900
loss: 1.03117036819458,grad_norm: 0.9560558345492312, iteration: 357901
loss: 0.952289879322052,grad_norm: 0.9177086200369208, iteration: 357902
loss: 1.06461763381958,grad_norm: 0.7560463540023078, iteration: 357903
loss: 1.0207573175430298,grad_norm: 0.8294276810619883, iteration: 357904
loss: 1.068068504333496,grad_norm: 0.9636492808591433, iteration: 357905
loss: 0.9968864321708679,grad_norm: 0.9068622830081566, iteration: 357906
loss: 1.0793414115905762,grad_norm: 0.9851970227717886, iteration: 357907
loss: 1.1354107856750488,grad_norm: 0.9999991837892187, iteration: 357908
loss: 1.061436414718628,grad_norm: 0.9999992408970391, iteration: 357909
loss: 1.0541092157363892,grad_norm: 0.9758840467933224, iteration: 357910
loss: 1.0576406717300415,grad_norm: 0.7907538829349845, iteration: 357911
loss: 0.9939828515052795,grad_norm: 0.9222673676488335, iteration: 357912
loss: 1.0531386137008667,grad_norm: 0.8581557698036814, iteration: 357913
loss: 1.008874535560608,grad_norm: 0.8643408672418607, iteration: 357914
loss: 1.0123662948608398,grad_norm: 0.8549654808690683, iteration: 357915
loss: 0.9947839379310608,grad_norm: 0.999999174652084, iteration: 357916
loss: 0.974248468875885,grad_norm: 0.8994379121899356, iteration: 357917
loss: 1.1147180795669556,grad_norm: 0.9344102192946513, iteration: 357918
loss: 1.0437347888946533,grad_norm: 0.9999994974862509, iteration: 357919
loss: 1.0326459407806396,grad_norm: 0.9794655661677799, iteration: 357920
loss: 0.9925863146781921,grad_norm: 0.6981685120058616, iteration: 357921
loss: 1.109668254852295,grad_norm: 0.9999999957011831, iteration: 357922
loss: 1.0495649576187134,grad_norm: 0.935666518670356, iteration: 357923
loss: 1.059722661972046,grad_norm: 0.9486180241829708, iteration: 357924
loss: 1.0641313791275024,grad_norm: 0.801822046041463, iteration: 357925
loss: 0.995824933052063,grad_norm: 0.8355874424161626, iteration: 357926
loss: 1.1003913879394531,grad_norm: 0.9999993060372008, iteration: 357927
loss: 0.9988642930984497,grad_norm: 0.7514735215429146, iteration: 357928
loss: 1.063514232635498,grad_norm: 0.9999991973561637, iteration: 357929
loss: 1.0457491874694824,grad_norm: 0.7989815206591594, iteration: 357930
loss: 1.007828712463379,grad_norm: 0.9625766029697753, iteration: 357931
loss: 0.9585967659950256,grad_norm: 0.820006968962731, iteration: 357932
loss: 0.9886974096298218,grad_norm: 0.7689047481642947, iteration: 357933
loss: 0.9984786510467529,grad_norm: 0.7956662890897229, iteration: 357934
loss: 1.0091203451156616,grad_norm: 0.7856928644665304, iteration: 357935
loss: 0.9976831078529358,grad_norm: 0.7519141861542328, iteration: 357936
loss: 0.9860000014305115,grad_norm: 0.7898278804741138, iteration: 357937
loss: 0.9773513674736023,grad_norm: 0.851778673153028, iteration: 357938
loss: 0.9979986548423767,grad_norm: 0.8831333649153127, iteration: 357939
loss: 1.001421332359314,grad_norm: 0.7890882024243481, iteration: 357940
loss: 1.0084501504898071,grad_norm: 0.9999995117548073, iteration: 357941
loss: 1.0311460494995117,grad_norm: 0.9999992958600736, iteration: 357942
loss: 1.2126747369766235,grad_norm: 0.9999996705699232, iteration: 357943
loss: 1.1005487442016602,grad_norm: 0.960281781160681, iteration: 357944
loss: 0.9850935339927673,grad_norm: 0.7988485815711521, iteration: 357945
loss: 1.020098328590393,grad_norm: 0.999999094968419, iteration: 357946
loss: 0.9875715374946594,grad_norm: 0.8677023831593941, iteration: 357947
loss: 0.9880252480506897,grad_norm: 0.9999993717687008, iteration: 357948
loss: 1.0527515411376953,grad_norm: 0.9999991194924974, iteration: 357949
loss: 1.0238451957702637,grad_norm: 0.8384315134874856, iteration: 357950
loss: 0.9780266880989075,grad_norm: 0.999999409067221, iteration: 357951
loss: 1.0093892812728882,grad_norm: 0.7097068135998262, iteration: 357952
loss: 1.0303999185562134,grad_norm: 0.7811755088100507, iteration: 357953
loss: 1.0182279348373413,grad_norm: 0.7585004321103183, iteration: 357954
loss: 1.0528805255889893,grad_norm: 0.9999991995887237, iteration: 357955
loss: 1.0413765907287598,grad_norm: 0.9999999267182977, iteration: 357956
loss: 1.0658029317855835,grad_norm: 1.0000000295213036, iteration: 357957
loss: 0.9963794350624084,grad_norm: 0.9999993351940372, iteration: 357958
loss: 1.0830798149108887,grad_norm: 0.9999996795506606, iteration: 357959
loss: 1.0209916830062866,grad_norm: 0.9999993888049974, iteration: 357960
loss: 0.996675431728363,grad_norm: 0.8481030323941258, iteration: 357961
loss: 0.9965757131576538,grad_norm: 0.9532939657449473, iteration: 357962
loss: 0.9946521520614624,grad_norm: 0.9999999836072359, iteration: 357963
loss: 1.06846022605896,grad_norm: 0.8582249460076835, iteration: 357964
loss: 1.0087782144546509,grad_norm: 0.8221550487320443, iteration: 357965
loss: 0.9873085618019104,grad_norm: 0.8804746370997613, iteration: 357966
loss: 0.9897001385688782,grad_norm: 0.820919928348152, iteration: 357967
loss: 1.0069297552108765,grad_norm: 0.730383718729722, iteration: 357968
loss: 0.9992680549621582,grad_norm: 0.8285346279873893, iteration: 357969
loss: 1.022581696510315,grad_norm: 0.8531397607575988, iteration: 357970
loss: 0.96732097864151,grad_norm: 0.9421546403327397, iteration: 357971
loss: 1.125203251838684,grad_norm: 0.9999997683991394, iteration: 357972
loss: 0.9965306520462036,grad_norm: 0.7603300758657345, iteration: 357973
loss: 1.055046558380127,grad_norm: 0.8049989605542298, iteration: 357974
loss: 0.9820199608802795,grad_norm: 0.8830569072908303, iteration: 357975
loss: 1.0042229890823364,grad_norm: 0.8297026977599741, iteration: 357976
loss: 1.0326687097549438,grad_norm: 0.7682122484063179, iteration: 357977
loss: 1.0078204870224,grad_norm: 0.7520697768596177, iteration: 357978
loss: 0.9909060597419739,grad_norm: 0.9999990560183202, iteration: 357979
loss: 1.0286321640014648,grad_norm: 0.999999122519295, iteration: 357980
loss: 1.017566204071045,grad_norm: 0.8668509887519171, iteration: 357981
loss: 0.9714088439941406,grad_norm: 0.898524067027182, iteration: 357982
loss: 1.0053749084472656,grad_norm: 0.9012382107219754, iteration: 357983
loss: 1.0179417133331299,grad_norm: 0.8544795866894428, iteration: 357984
loss: 1.0545204877853394,grad_norm: 0.9999992223371484, iteration: 357985
loss: 1.0946630239486694,grad_norm: 0.9999996813983618, iteration: 357986
loss: 1.1338014602661133,grad_norm: 0.9999994123705747, iteration: 357987
loss: 1.0489486455917358,grad_norm: 0.9999997450813192, iteration: 357988
loss: 1.0076878070831299,grad_norm: 0.8750409216013086, iteration: 357989
loss: 1.006605625152588,grad_norm: 0.7594980537188564, iteration: 357990
loss: 0.9875199794769287,grad_norm: 0.7069304192872994, iteration: 357991
loss: 1.0666084289550781,grad_norm: 0.9999994509873289, iteration: 357992
loss: 1.1469987630844116,grad_norm: 0.9999997609737317, iteration: 357993
loss: 1.021775722503662,grad_norm: 0.720932426453097, iteration: 357994
loss: 1.0017141103744507,grad_norm: 0.9999991013369962, iteration: 357995
loss: 1.0896375179290771,grad_norm: 0.9832708594484654, iteration: 357996
loss: 1.0212101936340332,grad_norm: 0.9999990811918351, iteration: 357997
loss: 1.011664628982544,grad_norm: 0.7686237423304254, iteration: 357998
loss: 1.0776422023773193,grad_norm: 0.9999993516651197, iteration: 357999
loss: 1.002116084098816,grad_norm: 0.999999802887002, iteration: 358000
loss: 0.9721823930740356,grad_norm: 0.7593426117959307, iteration: 358001
loss: 1.0627809762954712,grad_norm: 0.8520637482203727, iteration: 358002
loss: 1.0160831212997437,grad_norm: 0.7557259620137254, iteration: 358003
loss: 0.9838076829910278,grad_norm: 0.8055623063953083, iteration: 358004
loss: 1.0431909561157227,grad_norm: 0.9074429323054211, iteration: 358005
loss: 0.9736933708190918,grad_norm: 0.8231980867535499, iteration: 358006
loss: 1.031064748764038,grad_norm: 0.6678858209308032, iteration: 358007
loss: 1.0369535684585571,grad_norm: 0.9654692854763731, iteration: 358008
loss: 1.0279254913330078,grad_norm: 0.9999991102682488, iteration: 358009
loss: 1.1549760103225708,grad_norm: 0.9999998760001909, iteration: 358010
loss: 1.0035289525985718,grad_norm: 0.999999271221535, iteration: 358011
loss: 1.015491247177124,grad_norm: 0.8412524475333396, iteration: 358012
loss: 1.0160595178604126,grad_norm: 0.7921908849520677, iteration: 358013
loss: 1.000083088874817,grad_norm: 0.7466832290048053, iteration: 358014
loss: 1.0325987339019775,grad_norm: 0.9982898043468728, iteration: 358015
loss: 0.9620510339736938,grad_norm: 0.6645542779978446, iteration: 358016
loss: 0.9695327281951904,grad_norm: 0.7249220800897946, iteration: 358017
loss: 1.0147156715393066,grad_norm: 0.7991758023988286, iteration: 358018
loss: 0.9835770726203918,grad_norm: 0.644196870057823, iteration: 358019
loss: 1.0212247371673584,grad_norm: 0.8842285029481921, iteration: 358020
loss: 1.0018857717514038,grad_norm: 0.7269153462912514, iteration: 358021
loss: 0.9955491423606873,grad_norm: 0.8348974344560405, iteration: 358022
loss: 1.020012617111206,grad_norm: 0.9999997886949668, iteration: 358023
loss: 1.065201759338379,grad_norm: 0.7246380115652388, iteration: 358024
loss: 1.1223409175872803,grad_norm: 0.999999835179103, iteration: 358025
loss: 1.0032182931900024,grad_norm: 0.8629982291833812, iteration: 358026
loss: 0.9743536114692688,grad_norm: 0.8175757836703821, iteration: 358027
loss: 0.9883232712745667,grad_norm: 0.767494225825481, iteration: 358028
loss: 0.9828067421913147,grad_norm: 0.8758039470044641, iteration: 358029
loss: 1.0237454175949097,grad_norm: 0.7972262646432572, iteration: 358030
loss: 1.0201129913330078,grad_norm: 0.8996068684631704, iteration: 358031
loss: 1.0076154470443726,grad_norm: 0.9999999738636532, iteration: 358032
loss: 0.9924795031547546,grad_norm: 0.8742648563291288, iteration: 358033
loss: 1.0021615028381348,grad_norm: 0.8276651353770604, iteration: 358034
loss: 1.0659196376800537,grad_norm: 0.889122667707605, iteration: 358035
loss: 1.0019563436508179,grad_norm: 0.8813341630173341, iteration: 358036
loss: 1.0517631769180298,grad_norm: 0.8393230567930644, iteration: 358037
loss: 1.0273804664611816,grad_norm: 0.8546618669216839, iteration: 358038
loss: 0.9843661189079285,grad_norm: 0.9247159964599488, iteration: 358039
loss: 1.0394644737243652,grad_norm: 0.8422042858997308, iteration: 358040
loss: 1.004011869430542,grad_norm: 0.839785318476795, iteration: 358041
loss: 0.9913410544395447,grad_norm: 0.7951361274574198, iteration: 358042
loss: 1.0087134838104248,grad_norm: 0.9527276515893218, iteration: 358043
loss: 1.1156913042068481,grad_norm: 0.8534495060818322, iteration: 358044
loss: 0.9989324808120728,grad_norm: 0.7947454744525453, iteration: 358045
loss: 1.0284450054168701,grad_norm: 0.8941170949742065, iteration: 358046
loss: 1.011022686958313,grad_norm: 0.9918257140152333, iteration: 358047
loss: 0.9966641068458557,grad_norm: 0.7257782026706089, iteration: 358048
loss: 0.9997677206993103,grad_norm: 0.9233377053103696, iteration: 358049
loss: 1.0659836530685425,grad_norm: 0.8622129236643398, iteration: 358050
loss: 1.0231550931930542,grad_norm: 0.9153703525114582, iteration: 358051
loss: 1.0200995206832886,grad_norm: 0.8059922693732885, iteration: 358052
loss: 1.0335277318954468,grad_norm: 0.8693334166348093, iteration: 358053
loss: 1.0057463645935059,grad_norm: 0.7181970997588161, iteration: 358054
loss: 1.0452930927276611,grad_norm: 0.9577612222259074, iteration: 358055
loss: 0.9871177077293396,grad_norm: 0.8307247069722271, iteration: 358056
loss: 0.9808534979820251,grad_norm: 0.9999991801527214, iteration: 358057
loss: 1.0137768983840942,grad_norm: 0.7267936896028006, iteration: 358058
loss: 0.9716721177101135,grad_norm: 0.6971574080062499, iteration: 358059
loss: 1.021651029586792,grad_norm: 0.9999993529111324, iteration: 358060
loss: 0.9746736288070679,grad_norm: 0.827941692201183, iteration: 358061
loss: 0.9635738134384155,grad_norm: 0.7448122392765946, iteration: 358062
loss: 1.0350165367126465,grad_norm: 0.9999992571196216, iteration: 358063
loss: 1.0033750534057617,grad_norm: 0.8251325503833514, iteration: 358064
loss: 1.000293493270874,grad_norm: 0.8619769259824105, iteration: 358065
loss: 1.00627601146698,grad_norm: 0.82707460420088, iteration: 358066
loss: 1.0084381103515625,grad_norm: 0.8610254853214637, iteration: 358067
loss: 1.0190953016281128,grad_norm: 0.89195511333516, iteration: 358068
loss: 0.9648919701576233,grad_norm: 0.8069446195537266, iteration: 358069
loss: 1.0222713947296143,grad_norm: 0.8896257949375025, iteration: 358070
loss: 1.0471314191818237,grad_norm: 0.9999991996256278, iteration: 358071
loss: 0.9914172887802124,grad_norm: 0.8369240008871076, iteration: 358072
loss: 0.9811897873878479,grad_norm: 0.9999993159547422, iteration: 358073
loss: 0.9678765535354614,grad_norm: 0.7037107820429406, iteration: 358074
loss: 0.9913781881332397,grad_norm: 0.8391367884093002, iteration: 358075
loss: 1.0630550384521484,grad_norm: 0.9999994600497771, iteration: 358076
loss: 1.0138739347457886,grad_norm: 0.9053654668793253, iteration: 358077
loss: 1.03036630153656,grad_norm: 0.789400391667958, iteration: 358078
loss: 0.9749406576156616,grad_norm: 0.999999166220663, iteration: 358079
loss: 0.9814878702163696,grad_norm: 0.6989318596733871, iteration: 358080
loss: 1.0169756412506104,grad_norm: 0.9999991767397048, iteration: 358081
loss: 1.033401608467102,grad_norm: 0.8395524158268957, iteration: 358082
loss: 0.9776778221130371,grad_norm: 0.7753565165409392, iteration: 358083
loss: 0.9811423420906067,grad_norm: 0.8829398581655729, iteration: 358084
loss: 0.999678909778595,grad_norm: 0.9999994484285571, iteration: 358085
loss: 1.0263656377792358,grad_norm: 0.7604412279688556, iteration: 358086
loss: 0.9662185311317444,grad_norm: 0.999999212517594, iteration: 358087
loss: 1.0467511415481567,grad_norm: 0.9999993269032903, iteration: 358088
loss: 0.9679225087165833,grad_norm: 0.9288178913429868, iteration: 358089
loss: 0.9944764971733093,grad_norm: 0.6404513850226029, iteration: 358090
loss: 1.0046422481536865,grad_norm: 1.0000000288334145, iteration: 358091
loss: 1.0122270584106445,grad_norm: 0.9999993898862358, iteration: 358092
loss: 1.045089840888977,grad_norm: 0.9999991178160875, iteration: 358093
loss: 1.0115694999694824,grad_norm: 0.9999997804466954, iteration: 358094
loss: 0.9988470673561096,grad_norm: 0.7363642674233785, iteration: 358095
loss: 1.0086309909820557,grad_norm: 0.8187136290082304, iteration: 358096
loss: 1.0130536556243896,grad_norm: 0.8537493489857874, iteration: 358097
loss: 0.9944761395454407,grad_norm: 0.7761765938501131, iteration: 358098
loss: 1.045395016670227,grad_norm: 0.8063816393801277, iteration: 358099
loss: 1.0316015481948853,grad_norm: 0.9999994350647148, iteration: 358100
loss: 1.0356227159500122,grad_norm: 0.8899427453050102, iteration: 358101
loss: 0.9766453504562378,grad_norm: 0.9765023780979781, iteration: 358102
loss: 0.933790385723114,grad_norm: 0.7464537362868232, iteration: 358103
loss: 0.9912751913070679,grad_norm: 0.7597407150342759, iteration: 358104
loss: 0.9744673371315002,grad_norm: 0.8717201128543353, iteration: 358105
loss: 1.0034068822860718,grad_norm: 0.9467119102431544, iteration: 358106
loss: 0.9862534403800964,grad_norm: 0.5617914239893937, iteration: 358107
loss: 0.9952565431594849,grad_norm: 0.9092126124658901, iteration: 358108
loss: 0.974718451499939,grad_norm: 0.855097295076727, iteration: 358109
loss: 1.0555649995803833,grad_norm: 0.8939489835379362, iteration: 358110
loss: 0.9890117049217224,grad_norm: 0.8357402966253726, iteration: 358111
loss: 1.149042010307312,grad_norm: 0.9999992076338545, iteration: 358112
loss: 0.9782119393348694,grad_norm: 0.8182679895551703, iteration: 358113
loss: 0.9876273274421692,grad_norm: 0.8673701050726116, iteration: 358114
loss: 1.0489734411239624,grad_norm: 0.9262385843522711, iteration: 358115
loss: 0.996931791305542,grad_norm: 0.6607137002946202, iteration: 358116
loss: 1.0654960870742798,grad_norm: 0.8060959086435274, iteration: 358117
loss: 1.0084738731384277,grad_norm: 0.948221664852227, iteration: 358118
loss: 1.0048227310180664,grad_norm: 0.8866248755840552, iteration: 358119
loss: 1.206281304359436,grad_norm: 0.999999039118682, iteration: 358120
loss: 1.0236502885818481,grad_norm: 0.7983818445878564, iteration: 358121
loss: 1.0362048149108887,grad_norm: 0.9999996763921917, iteration: 358122
loss: 1.0026345252990723,grad_norm: 0.8715260569245317, iteration: 358123
loss: 1.0180381536483765,grad_norm: 0.7426305234545653, iteration: 358124
loss: 1.033827781677246,grad_norm: 0.8422488367178262, iteration: 358125
loss: 0.9885599613189697,grad_norm: 0.7916599271948872, iteration: 358126
loss: 1.0182745456695557,grad_norm: 0.96272911387555, iteration: 358127
loss: 1.0134201049804688,grad_norm: 0.8895583587181624, iteration: 358128
loss: 0.9866728186607361,grad_norm: 0.8377580927643719, iteration: 358129
loss: 1.0116420984268188,grad_norm: 0.759573938067834, iteration: 358130
loss: 1.0088191032409668,grad_norm: 0.790879282629261, iteration: 358131
loss: 1.0766539573669434,grad_norm: 0.9999990404756794, iteration: 358132
loss: 1.0407236814498901,grad_norm: 0.9116619976346775, iteration: 358133
loss: 0.9983823895454407,grad_norm: 0.7607197877572378, iteration: 358134
loss: 0.997374951839447,grad_norm: 0.8946984353489006, iteration: 358135
loss: 1.068166971206665,grad_norm: 0.9821589160520978, iteration: 358136
loss: 1.0152033567428589,grad_norm: 0.6929650028442774, iteration: 358137
loss: 1.0034316778182983,grad_norm: 0.8395667350619477, iteration: 358138
loss: 1.0703094005584717,grad_norm: 0.9999998555316718, iteration: 358139
loss: 1.0057001113891602,grad_norm: 0.7960565803631834, iteration: 358140
loss: 1.0802059173583984,grad_norm: 0.9563057467516313, iteration: 358141
loss: 1.0325711965560913,grad_norm: 0.9999994500696405, iteration: 358142
loss: 1.0795820951461792,grad_norm: 0.9999990856234824, iteration: 358143
loss: 1.0192503929138184,grad_norm: 0.9999990129260928, iteration: 358144
loss: 0.987121045589447,grad_norm: 0.8155511863075472, iteration: 358145
loss: 1.0297353267669678,grad_norm: 0.9999993955973643, iteration: 358146
loss: 1.0025888681411743,grad_norm: 0.7919293962340181, iteration: 358147
loss: 1.020874261856079,grad_norm: 0.9096272587493762, iteration: 358148
loss: 1.1194273233413696,grad_norm: 0.999999256207533, iteration: 358149
loss: 0.9965901970863342,grad_norm: 0.7956172360022652, iteration: 358150
loss: 0.9955711364746094,grad_norm: 0.9999998502247343, iteration: 358151
loss: 1.0666230916976929,grad_norm: 0.8038221069143116, iteration: 358152
loss: 0.9947526454925537,grad_norm: 0.693692353384276, iteration: 358153
loss: 1.071272611618042,grad_norm: 0.9999991539457509, iteration: 358154
loss: 0.9931337237358093,grad_norm: 0.7339365983918116, iteration: 358155
loss: 1.0434662103652954,grad_norm: 0.9405949847374171, iteration: 358156
loss: 1.0098237991333008,grad_norm: 0.8342644635520682, iteration: 358157
loss: 1.035875678062439,grad_norm: 0.999999440762229, iteration: 358158
loss: 0.9923210740089417,grad_norm: 0.7657979494936876, iteration: 358159
loss: 1.0181138515472412,grad_norm: 0.999999781908932, iteration: 358160
loss: 1.0153310298919678,grad_norm: 0.999999440300822, iteration: 358161
loss: 1.0767041444778442,grad_norm: 0.9999990394784473, iteration: 358162
loss: 1.210884928703308,grad_norm: 0.9999998160232721, iteration: 358163
loss: 1.0529752969741821,grad_norm: 0.7497890980364625, iteration: 358164
loss: 1.0645296573638916,grad_norm: 0.8990176302089111, iteration: 358165
loss: 1.0306613445281982,grad_norm: 0.7994148106443574, iteration: 358166
loss: 1.0183500051498413,grad_norm: 0.9088517033581792, iteration: 358167
loss: 0.9962764978408813,grad_norm: 0.7438419418576331, iteration: 358168
loss: 1.0791492462158203,grad_norm: 0.9999997481262444, iteration: 358169
loss: 1.0544800758361816,grad_norm: 1.0000000231313895, iteration: 358170
loss: 1.0143892765045166,grad_norm: 0.9275827090666993, iteration: 358171
loss: 1.0582151412963867,grad_norm: 0.7966237222764639, iteration: 358172
loss: 0.9842552542686462,grad_norm: 0.9330954335956043, iteration: 358173
loss: 1.0593881607055664,grad_norm: 0.9999999137029688, iteration: 358174
loss: 1.008989691734314,grad_norm: 0.8312697480586798, iteration: 358175
loss: 0.9851877689361572,grad_norm: 0.8729263066520662, iteration: 358176
loss: 0.9900060892105103,grad_norm: 0.8390945781383988, iteration: 358177
loss: 0.9952409863471985,grad_norm: 0.8031212223655865, iteration: 358178
loss: 1.034031867980957,grad_norm: 0.9999992201996432, iteration: 358179
loss: 1.091732382774353,grad_norm: 0.9999989766681661, iteration: 358180
loss: 1.0350985527038574,grad_norm: 0.9999992849595817, iteration: 358181
loss: 1.0788161754608154,grad_norm: 0.9999996656747664, iteration: 358182
loss: 1.0040876865386963,grad_norm: 0.701119293086917, iteration: 358183
loss: 0.9460850358009338,grad_norm: 0.7696207416025127, iteration: 358184
loss: 1.0110175609588623,grad_norm: 0.8070959649685687, iteration: 358185
loss: 1.0961196422576904,grad_norm: 0.9437055997169206, iteration: 358186
loss: 1.0124866962432861,grad_norm: 0.6416691115274692, iteration: 358187
loss: 0.9947223663330078,grad_norm: 0.8295126479003165, iteration: 358188
loss: 1.0880060195922852,grad_norm: 0.9999992616348706, iteration: 358189
loss: 1.0224875211715698,grad_norm: 0.9209208975418534, iteration: 358190
loss: 1.036400318145752,grad_norm: 1.000000057720415, iteration: 358191
loss: 0.9961755871772766,grad_norm: 0.8276365880766634, iteration: 358192
loss: 1.0120700597763062,grad_norm: 0.9999989945530204, iteration: 358193
loss: 1.179795503616333,grad_norm: 0.9999998914914657, iteration: 358194
loss: 1.0313866138458252,grad_norm: 0.7897462242690027, iteration: 358195
loss: 1.0304118394851685,grad_norm: 0.7581161488165302, iteration: 358196
loss: 1.0112059116363525,grad_norm: 0.7932798479108667, iteration: 358197
loss: 1.0132042169570923,grad_norm: 0.8246630178946875, iteration: 358198
loss: 1.0689232349395752,grad_norm: 0.88595971052171, iteration: 358199
loss: 1.1341912746429443,grad_norm: 0.9999999015822145, iteration: 358200
loss: 1.025827169418335,grad_norm: 0.9999989884255602, iteration: 358201
loss: 1.0377039909362793,grad_norm: 0.7455401356697956, iteration: 358202
loss: 1.0110918283462524,grad_norm: 0.7985660049888585, iteration: 358203
loss: 1.0583620071411133,grad_norm: 0.9476112559084744, iteration: 358204
loss: 1.0205347537994385,grad_norm: 0.9999994795092135, iteration: 358205
loss: 1.0269919633865356,grad_norm: 0.7781569324560319, iteration: 358206
loss: 0.9957255721092224,grad_norm: 0.898361639101008, iteration: 358207
loss: 1.0439523458480835,grad_norm: 0.999999341011576, iteration: 358208
loss: 1.290245771408081,grad_norm: 0.9999998357361511, iteration: 358209
loss: 1.0093494653701782,grad_norm: 0.8644067130842109, iteration: 358210
loss: 1.0479446649551392,grad_norm: 0.9999998600384026, iteration: 358211
loss: 1.2789664268493652,grad_norm: 0.9999998509132875, iteration: 358212
loss: 0.9928086996078491,grad_norm: 0.9999990678243125, iteration: 358213
loss: 1.020801067352295,grad_norm: 0.7137162552440389, iteration: 358214
loss: 0.9980822205543518,grad_norm: 0.9110437558163762, iteration: 358215
loss: 1.0497061014175415,grad_norm: 0.9511553334669732, iteration: 358216
loss: 1.0147866010665894,grad_norm: 0.9999990423901287, iteration: 358217
loss: 0.993006706237793,grad_norm: 0.8892929030862896, iteration: 358218
loss: 1.0085439682006836,grad_norm: 0.703563305821523, iteration: 358219
loss: 1.0960099697113037,grad_norm: 0.9999993351190214, iteration: 358220
loss: 1.1268779039382935,grad_norm: 0.9999998255285937, iteration: 358221
loss: 1.034205436706543,grad_norm: 0.8971468517072333, iteration: 358222
loss: 1.0842665433883667,grad_norm: 0.9999996257111162, iteration: 358223
loss: 1.008876919746399,grad_norm: 0.8761440045769326, iteration: 358224
loss: 1.060976505279541,grad_norm: 0.9407519718017593, iteration: 358225
loss: 1.139358401298523,grad_norm: 0.9999998486943127, iteration: 358226
loss: 1.026980996131897,grad_norm: 0.999999735497126, iteration: 358227
loss: 1.0387834310531616,grad_norm: 0.9999996119613913, iteration: 358228
loss: 1.0028084516525269,grad_norm: 0.8419352125557586, iteration: 358229
loss: 1.2283276319503784,grad_norm: 0.999999239101289, iteration: 358230
loss: 1.1262036561965942,grad_norm: 0.9999995684777209, iteration: 358231
loss: 0.9890143871307373,grad_norm: 0.8863694086483951, iteration: 358232
loss: 1.0240620374679565,grad_norm: 0.9999994275145753, iteration: 358233
loss: 1.0157772302627563,grad_norm: 0.9291863755147022, iteration: 358234
loss: 1.0137763023376465,grad_norm: 0.7318938902666927, iteration: 358235
loss: 1.1484947204589844,grad_norm: 0.9999993189716666, iteration: 358236
loss: 0.9953944087028503,grad_norm: 0.8775489136207552, iteration: 358237
loss: 1.0230997800827026,grad_norm: 0.9999992523277004, iteration: 358238
loss: 0.9922948479652405,grad_norm: 0.9090008039424441, iteration: 358239
loss: 1.014376163482666,grad_norm: 0.7923016403694146, iteration: 358240
loss: 1.0716478824615479,grad_norm: 0.9999998986384315, iteration: 358241
loss: 1.1619600057601929,grad_norm: 0.9999996524866244, iteration: 358242
loss: 1.0223687887191772,grad_norm: 0.999999018194152, iteration: 358243
loss: 1.0263639688491821,grad_norm: 0.9286683289003046, iteration: 358244
loss: 1.0499650239944458,grad_norm: 0.9999997403184779, iteration: 358245
loss: 1.0870686769485474,grad_norm: 0.9297385254019228, iteration: 358246
loss: 1.1444942951202393,grad_norm: 0.9999990175788535, iteration: 358247
loss: 1.2219980955123901,grad_norm: 0.9999996730829587, iteration: 358248
loss: 1.0380637645721436,grad_norm: 0.999999390076394, iteration: 358249
loss: 1.125583529472351,grad_norm: 0.9999997757302559, iteration: 358250
loss: 1.0440162420272827,grad_norm: 0.9999998428831516, iteration: 358251
loss: 1.013202428817749,grad_norm: 0.9999997541752502, iteration: 358252
loss: 1.0542888641357422,grad_norm: 0.9999998060753449, iteration: 358253
loss: 1.002500057220459,grad_norm: 0.790972844716503, iteration: 358254
loss: 0.9789266586303711,grad_norm: 0.9999990168437659, iteration: 358255
loss: 0.9834479689598083,grad_norm: 0.9529617056206453, iteration: 358256
loss: 0.9786776304244995,grad_norm: 0.7310485164391882, iteration: 358257
loss: 1.0857484340667725,grad_norm: 0.9999996121898639, iteration: 358258
loss: 1.012345552444458,grad_norm: 0.8602990849592845, iteration: 358259
loss: 1.0310218334197998,grad_norm: 0.9999996580107594, iteration: 358260
loss: 0.9871326088905334,grad_norm: 0.8496702556599771, iteration: 358261
loss: 0.9905647039413452,grad_norm: 0.9671815663274199, iteration: 358262
loss: 0.9856166839599609,grad_norm: 0.8797344912664152, iteration: 358263
loss: 1.060961365699768,grad_norm: 0.9999991629943257, iteration: 358264
loss: 1.0967284440994263,grad_norm: 0.9999999175521719, iteration: 358265
loss: 1.014549732208252,grad_norm: 0.651108459201639, iteration: 358266
loss: 0.952333390712738,grad_norm: 0.8897889783904817, iteration: 358267
loss: 1.0098117589950562,grad_norm: 0.841514924145364, iteration: 358268
loss: 0.985295295715332,grad_norm: 0.9999990712668758, iteration: 358269
loss: 0.9912852644920349,grad_norm: 0.8427914264396161, iteration: 358270
loss: 1.07076895236969,grad_norm: 0.999999535020664, iteration: 358271
loss: 1.0160636901855469,grad_norm: 0.7811466125101262, iteration: 358272
loss: 1.0491373538970947,grad_norm: 0.9999990382545603, iteration: 358273
loss: 0.9761708378791809,grad_norm: 0.9999999094215523, iteration: 358274
loss: 0.9895027279853821,grad_norm: 0.9419642209843353, iteration: 358275
loss: 1.0302330255508423,grad_norm: 0.9999991916883291, iteration: 358276
loss: 0.9839029312133789,grad_norm: 0.9741273887286326, iteration: 358277
loss: 1.0165094137191772,grad_norm: 0.9999993095422567, iteration: 358278
loss: 0.9907236695289612,grad_norm: 0.8347741447674676, iteration: 358279
loss: 0.9858134984970093,grad_norm: 0.8400274191805545, iteration: 358280
loss: 0.9963756799697876,grad_norm: 0.8700398630284057, iteration: 358281
loss: 0.9921059608459473,grad_norm: 0.9999999084242015, iteration: 358282
loss: 0.9777892827987671,grad_norm: 0.9999992422507875, iteration: 358283
loss: 1.0155662298202515,grad_norm: 0.9022633648282219, iteration: 358284
loss: 0.9765069484710693,grad_norm: 0.9999997203879329, iteration: 358285
loss: 0.988298237323761,grad_norm: 0.8154634641361368, iteration: 358286
loss: 1.0352435111999512,grad_norm: 0.9999999826478595, iteration: 358287
loss: 1.044845700263977,grad_norm: 0.9999990932362083, iteration: 358288
loss: 0.9809339642524719,grad_norm: 0.9303174028365986, iteration: 358289
loss: 1.0001529455184937,grad_norm: 0.8113940161726308, iteration: 358290
loss: 1.0002244710922241,grad_norm: 0.7368902413744713, iteration: 358291
loss: 1.00092613697052,grad_norm: 0.9028407282386995, iteration: 358292
loss: 1.0017163753509521,grad_norm: 0.8209144597271508, iteration: 358293
loss: 0.9302893280982971,grad_norm: 0.8871119641059254, iteration: 358294
loss: 0.9838249087333679,grad_norm: 0.7724071096410204, iteration: 358295
loss: 0.9577072858810425,grad_norm: 0.7763795864457957, iteration: 358296
loss: 0.9717289805412292,grad_norm: 0.8917791586126588, iteration: 358297
loss: 1.067372441291809,grad_norm: 0.9038127820176177, iteration: 358298
loss: 1.063119649887085,grad_norm: 0.9658475452910671, iteration: 358299
loss: 1.000991940498352,grad_norm: 0.6543719812931433, iteration: 358300
loss: 1.0050023794174194,grad_norm: 0.9999992169688594, iteration: 358301
loss: 0.9870344400405884,grad_norm: 0.8459670728077142, iteration: 358302
loss: 1.0273278951644897,grad_norm: 0.7187027395133856, iteration: 358303
loss: 0.9953089356422424,grad_norm: 0.7072802035057697, iteration: 358304
loss: 1.058388590812683,grad_norm: 0.9005402738872531, iteration: 358305
loss: 0.9888427257537842,grad_norm: 0.8607618688001899, iteration: 358306
loss: 1.0931414365768433,grad_norm: 0.9999992575085828, iteration: 358307
loss: 0.9709431529045105,grad_norm: 0.8922894823868095, iteration: 358308
loss: 1.007757544517517,grad_norm: 0.9394142517935895, iteration: 358309
loss: 1.0148965120315552,grad_norm: 0.9538414918213601, iteration: 358310
loss: 1.029040813446045,grad_norm: 0.7998115022318544, iteration: 358311
loss: 0.9794546961784363,grad_norm: 0.9999998247002, iteration: 358312
loss: 1.015834927558899,grad_norm: 0.7999417503162767, iteration: 358313
loss: 0.9968599081039429,grad_norm: 0.8926241181066303, iteration: 358314
loss: 1.0378092527389526,grad_norm: 1.0000000249854517, iteration: 358315
loss: 1.0200475454330444,grad_norm: 0.9999993840026425, iteration: 358316
loss: 0.9827631115913391,grad_norm: 0.639781219194211, iteration: 358317
loss: 0.9729857444763184,grad_norm: 0.7763292700519699, iteration: 358318
loss: 0.9965693950653076,grad_norm: 0.7303508829412448, iteration: 358319
loss: 0.989363431930542,grad_norm: 0.7533876999023825, iteration: 358320
loss: 1.007797360420227,grad_norm: 0.8294218466211382, iteration: 358321
loss: 0.9707552790641785,grad_norm: 0.9999990212783901, iteration: 358322
loss: 0.997494101524353,grad_norm: 0.8168336951270566, iteration: 358323
loss: 1.150299310684204,grad_norm: 0.9999995163663075, iteration: 358324
loss: 1.07953679561615,grad_norm: 0.8451101458357575, iteration: 358325
loss: 1.0092262029647827,grad_norm: 0.9999993510014435, iteration: 358326
loss: 1.0322368144989014,grad_norm: 0.9444841553912404, iteration: 358327
loss: 0.951298177242279,grad_norm: 0.7326344886020066, iteration: 358328
loss: 1.0850034952163696,grad_norm: 0.8668153857428808, iteration: 358329
loss: 0.971538782119751,grad_norm: 0.6747193567280259, iteration: 358330
loss: 1.0359007120132446,grad_norm: 0.8036836390069819, iteration: 358331
loss: 1.123057246208191,grad_norm: 0.999999257074474, iteration: 358332
loss: 0.9969143867492676,grad_norm: 0.9079118071528581, iteration: 358333
loss: 1.014014720916748,grad_norm: 0.7918763366875975, iteration: 358334
loss: 1.0027064085006714,grad_norm: 0.8679727401198399, iteration: 358335
loss: 1.2077727317810059,grad_norm: 0.9999997139274326, iteration: 358336
loss: 1.00298011302948,grad_norm: 0.8128190968936271, iteration: 358337
loss: 0.9889912605285645,grad_norm: 0.8027412732553125, iteration: 358338
loss: 1.0134484767913818,grad_norm: 0.7143309330391591, iteration: 358339
loss: 1.0135900974273682,grad_norm: 0.8576397410531675, iteration: 358340
loss: 0.9958410263061523,grad_norm: 0.6414160342633319, iteration: 358341
loss: 1.0125676393508911,grad_norm: 0.7437431820682288, iteration: 358342
loss: 1.0296156406402588,grad_norm: 0.8283997249380194, iteration: 358343
loss: 0.9899628758430481,grad_norm: 0.7863722410026693, iteration: 358344
loss: 0.9958237409591675,grad_norm: 0.9999994155262167, iteration: 358345
loss: 1.0280405282974243,grad_norm: 0.9350788890834232, iteration: 358346
loss: 1.0341287851333618,grad_norm: 0.8363499722238531, iteration: 358347
loss: 0.9856642484664917,grad_norm: 0.9431114607780983, iteration: 358348
loss: 0.9951026439666748,grad_norm: 0.9999993416686266, iteration: 358349
loss: 1.0367852449417114,grad_norm: 0.8698255567372731, iteration: 358350
loss: 1.0101137161254883,grad_norm: 0.9417133953476278, iteration: 358351
loss: 1.003167748451233,grad_norm: 0.8689485847565094, iteration: 358352
loss: 1.0754036903381348,grad_norm: 0.9999999928220012, iteration: 358353
loss: 0.9736799597740173,grad_norm: 0.9999989105871638, iteration: 358354
loss: 0.9777777194976807,grad_norm: 0.80199249641753, iteration: 358355
loss: 1.0174152851104736,grad_norm: 0.8998428857261301, iteration: 358356
loss: 0.9892874360084534,grad_norm: 0.8214072873849383, iteration: 358357
loss: 1.0028308629989624,grad_norm: 0.7910498419228472, iteration: 358358
loss: 0.9790949821472168,grad_norm: 0.8143372859965671, iteration: 358359
loss: 1.0155003070831299,grad_norm: 0.9676702508360541, iteration: 358360
loss: 1.0166399478912354,grad_norm: 0.8844448711810764, iteration: 358361
loss: 1.0030601024627686,grad_norm: 0.7988197687438381, iteration: 358362
loss: 0.9867675304412842,grad_norm: 0.9789805189199796, iteration: 358363
loss: 1.0282400846481323,grad_norm: 0.8896414539900388, iteration: 358364
loss: 0.9845495223999023,grad_norm: 0.852794631280773, iteration: 358365
loss: 0.9731225371360779,grad_norm: 0.7257095520768583, iteration: 358366
loss: 1.0221792459487915,grad_norm: 0.7790858976882742, iteration: 358367
loss: 1.0352811813354492,grad_norm: 0.9999996393048038, iteration: 358368
loss: 1.003793716430664,grad_norm: 0.9999994919152447, iteration: 358369
loss: 1.0780097246170044,grad_norm: 0.9568367950493061, iteration: 358370
loss: 0.9742799401283264,grad_norm: 0.7901499059597676, iteration: 358371
loss: 1.0528275966644287,grad_norm: 0.9043524338077835, iteration: 358372
loss: 0.9824725389480591,grad_norm: 0.6530876113100017, iteration: 358373
loss: 1.0117671489715576,grad_norm: 0.9999996908926907, iteration: 358374
loss: 1.0693538188934326,grad_norm: 0.8217024319699414, iteration: 358375
loss: 0.9761287569999695,grad_norm: 0.9999996053539028, iteration: 358376
loss: 0.9884254336357117,grad_norm: 0.9999991896122424, iteration: 358377
loss: 1.0663865804672241,grad_norm: 0.9999996747786422, iteration: 358378
loss: 1.0167571306228638,grad_norm: 0.8745898013812016, iteration: 358379
loss: 0.9978556036949158,grad_norm: 0.7408054522488124, iteration: 358380
loss: 1.0521329641342163,grad_norm: 0.953269824481099, iteration: 358381
loss: 0.9984533786773682,grad_norm: 0.9740535425581016, iteration: 358382
loss: 1.006981372833252,grad_norm: 0.7314441048048913, iteration: 358383
loss: 1.0206609964370728,grad_norm: 0.9999996590923266, iteration: 358384
loss: 0.9562468528747559,grad_norm: 0.7781610248600259, iteration: 358385
loss: 0.9998261332511902,grad_norm: 0.8267609103405397, iteration: 358386
loss: 0.9837971925735474,grad_norm: 0.7969037333837992, iteration: 358387
loss: 0.9839288592338562,grad_norm: 0.8808721622487221, iteration: 358388
loss: 0.9945684671401978,grad_norm: 0.999999741137298, iteration: 358389
loss: 1.020485520362854,grad_norm: 0.7675852170597575, iteration: 358390
loss: 1.0611035823822021,grad_norm: 0.9999998084421159, iteration: 358391
loss: 1.0003390312194824,grad_norm: 0.7021572817924268, iteration: 358392
loss: 1.01558256149292,grad_norm: 0.743054226313156, iteration: 358393
loss: 1.0266644954681396,grad_norm: 0.999999474314216, iteration: 358394
loss: 1.0195095539093018,grad_norm: 0.88041476631747, iteration: 358395
loss: 1.0040494203567505,grad_norm: 0.8219862663932996, iteration: 358396
loss: 1.0236424207687378,grad_norm: 0.9679726496162936, iteration: 358397
loss: 1.0558969974517822,grad_norm: 0.7498162962153594, iteration: 358398
loss: 1.0300095081329346,grad_norm: 0.9634744051015333, iteration: 358399
loss: 0.9847050905227661,grad_norm: 0.9999991900567248, iteration: 358400
loss: 1.07518470287323,grad_norm: 0.9999990308009957, iteration: 358401
loss: 1.04574716091156,grad_norm: 0.9999990962836769, iteration: 358402
loss: 1.0289133787155151,grad_norm: 0.9246737752505778, iteration: 358403
loss: 0.9816620349884033,grad_norm: 0.7751047651605657, iteration: 358404
loss: 1.0210224390029907,grad_norm: 0.7973320602981662, iteration: 358405
loss: 1.0056041479110718,grad_norm: 0.8940193155566563, iteration: 358406
loss: 0.9938033819198608,grad_norm: 0.8258807992075562, iteration: 358407
loss: 0.9621830582618713,grad_norm: 0.9347847472671073, iteration: 358408
loss: 1.0288022756576538,grad_norm: 0.8629356878637774, iteration: 358409
loss: 0.9843547940254211,grad_norm: 0.7089648992978651, iteration: 358410
loss: 0.9705175757408142,grad_norm: 0.809582342079029, iteration: 358411
loss: 1.0619994401931763,grad_norm: 0.9999995814555818, iteration: 358412
loss: 0.9810547828674316,grad_norm: 0.9999992167591722, iteration: 358413
loss: 0.9904558658599854,grad_norm: 0.9999991608791637, iteration: 358414
loss: 1.0150868892669678,grad_norm: 0.9789024797208108, iteration: 358415
loss: 0.9918925166130066,grad_norm: 0.7579057626256845, iteration: 358416
loss: 0.9982476234436035,grad_norm: 0.8302334007756591, iteration: 358417
loss: 0.9923555254936218,grad_norm: 0.7816162772478376, iteration: 358418
loss: 1.0054103136062622,grad_norm: 0.8366362532193897, iteration: 358419
loss: 1.0048071146011353,grad_norm: 0.8179846276857838, iteration: 358420
loss: 0.9803757667541504,grad_norm: 0.9076200949626778, iteration: 358421
loss: 1.0449854135513306,grad_norm: 0.9999996370605315, iteration: 358422
loss: 0.9683204293251038,grad_norm: 0.7725881194660449, iteration: 358423
loss: 1.0511505603790283,grad_norm: 0.7386498350554698, iteration: 358424
loss: 1.0801595449447632,grad_norm: 0.9999991663968723, iteration: 358425
loss: 0.9742804765701294,grad_norm: 0.7782739165664243, iteration: 358426
loss: 1.02959144115448,grad_norm: 0.9999992489583301, iteration: 358427
loss: 0.971582293510437,grad_norm: 0.9294050747541238, iteration: 358428
loss: 1.0127253532409668,grad_norm: 0.606768491008061, iteration: 358429
loss: 1.196393609046936,grad_norm: 0.9999998436219733, iteration: 358430
loss: 1.0271885395050049,grad_norm: 0.8699285691165236, iteration: 358431
loss: 1.0208157300949097,grad_norm: 0.8758849685731116, iteration: 358432
loss: 0.9969637393951416,grad_norm: 0.9473714483930193, iteration: 358433
loss: 0.9675598740577698,grad_norm: 0.8586359725871151, iteration: 358434
loss: 1.0214041471481323,grad_norm: 0.7683849432098946, iteration: 358435
loss: 1.0406732559204102,grad_norm: 0.8121182217262867, iteration: 358436
loss: 1.0213145017623901,grad_norm: 0.9999998338044286, iteration: 358437
loss: 1.0021065473556519,grad_norm: 0.8450066342247146, iteration: 358438
loss: 1.061772346496582,grad_norm: 0.7507226202450507, iteration: 358439
loss: 1.0329945087432861,grad_norm: 0.6992753242908959, iteration: 358440
loss: 0.9961788058280945,grad_norm: 0.7827810846386158, iteration: 358441
loss: 0.9989078044891357,grad_norm: 0.9999995082054671, iteration: 358442
loss: 1.0152167081832886,grad_norm: 0.7575392040766259, iteration: 358443
loss: 1.1107335090637207,grad_norm: 0.9999998181819901, iteration: 358444
loss: 0.9783336520195007,grad_norm: 0.9999996297155448, iteration: 358445
loss: 0.9871671795845032,grad_norm: 0.865916984558037, iteration: 358446
loss: 1.038488507270813,grad_norm: 0.8596120167462197, iteration: 358447
loss: 1.022220492362976,grad_norm: 0.7337074062385017, iteration: 358448
loss: 1.0442382097244263,grad_norm: 0.9999994532897476, iteration: 358449
loss: 0.9871741533279419,grad_norm: 0.8791362644532082, iteration: 358450
loss: 1.037429928779602,grad_norm: 0.9256911853144911, iteration: 358451
loss: 1.007619023323059,grad_norm: 0.9478750648363823, iteration: 358452
loss: 1.0284661054611206,grad_norm: 0.8840947527955512, iteration: 358453
loss: 0.9741231799125671,grad_norm: 0.670171041607353, iteration: 358454
loss: 1.0159987211227417,grad_norm: 0.7428427498145, iteration: 358455
loss: 1.0086201429367065,grad_norm: 0.9394544033909459, iteration: 358456
loss: 1.0131360292434692,grad_norm: 0.9999996536740213, iteration: 358457
loss: 0.9823970794677734,grad_norm: 0.9187266788307649, iteration: 358458
loss: 0.9953303337097168,grad_norm: 0.789781883398642, iteration: 358459
loss: 0.9735442399978638,grad_norm: 0.9402178971899119, iteration: 358460
loss: 0.9793559908866882,grad_norm: 0.6809364258070955, iteration: 358461
loss: 1.023048758506775,grad_norm: 0.7595110744621313, iteration: 358462
loss: 0.9950324892997742,grad_norm: 0.7792153737099876, iteration: 358463
loss: 1.038571834564209,grad_norm: 0.9999992181532922, iteration: 358464
loss: 0.9905279874801636,grad_norm: 0.8660284622001204, iteration: 358465
loss: 1.0020004510879517,grad_norm: 0.7632614263325798, iteration: 358466
loss: 1.1385512351989746,grad_norm: 0.9473925873180369, iteration: 358467
loss: 1.0061355829238892,grad_norm: 0.8707779524862822, iteration: 358468
loss: 1.0216947793960571,grad_norm: 0.9999992009058285, iteration: 358469
loss: 1.0076013803482056,grad_norm: 0.6912742024603826, iteration: 358470
loss: 0.9746464490890503,grad_norm: 0.7743436689700935, iteration: 358471
loss: 1.0033965110778809,grad_norm: 0.8515336023389846, iteration: 358472
loss: 0.9765776991844177,grad_norm: 0.9184750072026002, iteration: 358473
loss: 1.0030847787857056,grad_norm: 0.9427190662325587, iteration: 358474
loss: 1.0285769701004028,grad_norm: 0.746055983278669, iteration: 358475
loss: 0.9736643433570862,grad_norm: 0.8411086978215575, iteration: 358476
loss: 1.009105920791626,grad_norm: 0.9999996521602386, iteration: 358477
loss: 1.003197193145752,grad_norm: 0.9999994087721944, iteration: 358478
loss: 1.0501631498336792,grad_norm: 0.8193900015806219, iteration: 358479
loss: 0.9803436398506165,grad_norm: 0.7303927603970859, iteration: 358480
loss: 0.9844635128974915,grad_norm: 0.6982282335566913, iteration: 358481
loss: 1.0193558931350708,grad_norm: 0.9999994605243718, iteration: 358482
loss: 0.9576919674873352,grad_norm: 0.6303503254223274, iteration: 358483
loss: 0.9569931626319885,grad_norm: 0.8929198824114698, iteration: 358484
loss: 0.9970959424972534,grad_norm: 0.9974875481142402, iteration: 358485
loss: 1.074114203453064,grad_norm: 0.7564078854390497, iteration: 358486
loss: 1.013956904411316,grad_norm: 0.999999121862125, iteration: 358487
loss: 0.9717106223106384,grad_norm: 0.772655272204602, iteration: 358488
loss: 1.0539659261703491,grad_norm: 0.793282039538027, iteration: 358489
loss: 1.0165385007858276,grad_norm: 0.6411200623523209, iteration: 358490
loss: 1.0097872018814087,grad_norm: 0.7147560090188179, iteration: 358491
loss: 0.9917669892311096,grad_norm: 0.9999993166335645, iteration: 358492
loss: 1.0855977535247803,grad_norm: 0.999999455422144, iteration: 358493
loss: 1.2187308073043823,grad_norm: 0.9999997170611984, iteration: 358494
loss: 1.0136574506759644,grad_norm: 0.9539617683533813, iteration: 358495
loss: 1.0031174421310425,grad_norm: 0.8997033593444298, iteration: 358496
loss: 1.0361663103103638,grad_norm: 0.9999991759276577, iteration: 358497
loss: 0.951996386051178,grad_norm: 0.7611309910391871, iteration: 358498
loss: 0.9798102974891663,grad_norm: 0.833900816249431, iteration: 358499
loss: 1.0934263467788696,grad_norm: 0.9999999868085957, iteration: 358500
loss: 1.0337501764297485,grad_norm: 0.6985658942886991, iteration: 358501
loss: 1.0505352020263672,grad_norm: 0.7486708528524683, iteration: 358502
loss: 1.0949901342391968,grad_norm: 0.9999996875328286, iteration: 358503
loss: 1.026095986366272,grad_norm: 1.0000000568070482, iteration: 358504
loss: 0.9971598386764526,grad_norm: 0.9106940043147298, iteration: 358505
loss: 0.9947835803031921,grad_norm: 0.8546401064862408, iteration: 358506
loss: 0.9981486201286316,grad_norm: 0.8698460364996794, iteration: 358507
loss: 1.020229697227478,grad_norm: 0.7954312724157919, iteration: 358508
loss: 0.9868605136871338,grad_norm: 0.734682090632265, iteration: 358509
loss: 1.010271668434143,grad_norm: 0.9999991011230719, iteration: 358510
loss: 1.0063735246658325,grad_norm: 0.9999990826978514, iteration: 358511
loss: 0.980847954750061,grad_norm: 0.999999124649292, iteration: 358512
loss: 1.011673092842102,grad_norm: 0.7815752606437114, iteration: 358513
loss: 0.9936084747314453,grad_norm: 0.7245115650993582, iteration: 358514
loss: 1.0323580503463745,grad_norm: 0.9999991609952502, iteration: 358515
loss: 0.9899055361747742,grad_norm: 0.8492733474771914, iteration: 358516
loss: 0.9891160130500793,grad_norm: 0.8020140198342232, iteration: 358517
loss: 1.038357138633728,grad_norm: 0.9999990547478506, iteration: 358518
loss: 0.9452730417251587,grad_norm: 0.7740420334017353, iteration: 358519
loss: 1.116333246231079,grad_norm: 0.9999996250450256, iteration: 358520
loss: 1.0214673280715942,grad_norm: 0.9999990283251334, iteration: 358521
loss: 0.9740052819252014,grad_norm: 0.8293311957718179, iteration: 358522
loss: 1.1137679815292358,grad_norm: 0.999999746092418, iteration: 358523
loss: 0.9717356562614441,grad_norm: 0.9187982640871332, iteration: 358524
loss: 0.9936615228652954,grad_norm: 0.9993256887900545, iteration: 358525
loss: 1.0164402723312378,grad_norm: 0.9242561963702407, iteration: 358526
loss: 1.071146845817566,grad_norm: 0.9999994224929658, iteration: 358527
loss: 0.9736415147781372,grad_norm: 0.9321283927711697, iteration: 358528
loss: 1.0256943702697754,grad_norm: 0.8873346887508728, iteration: 358529
loss: 0.9939770698547363,grad_norm: 0.7518727209734476, iteration: 358530
loss: 1.0211374759674072,grad_norm: 0.8621966960209494, iteration: 358531
loss: 1.0092170238494873,grad_norm: 0.9999990174487312, iteration: 358532
loss: 0.981952965259552,grad_norm: 0.7786490027449241, iteration: 358533
loss: 0.9969874024391174,grad_norm: 0.7555815468877074, iteration: 358534
loss: 1.0075823068618774,grad_norm: 0.856837895507869, iteration: 358535
loss: 0.9955345988273621,grad_norm: 0.9765770061089724, iteration: 358536
loss: 1.0164258480072021,grad_norm: 0.715234826117364, iteration: 358537
loss: 1.1623156070709229,grad_norm: 0.9999995242129868, iteration: 358538
loss: 0.9570358395576477,grad_norm: 0.8232649196080276, iteration: 358539
loss: 0.9815373420715332,grad_norm: 0.8917142075082303, iteration: 358540
loss: 0.9830734133720398,grad_norm: 0.9999999378043802, iteration: 358541
loss: 1.0027458667755127,grad_norm: 0.9999990885517158, iteration: 358542
loss: 1.0056111812591553,grad_norm: 0.8598403732002584, iteration: 358543
loss: 1.0017062425613403,grad_norm: 0.7668296778541538, iteration: 358544
loss: 1.017741322517395,grad_norm: 0.9999990046311776, iteration: 358545
loss: 0.9999379515647888,grad_norm: 0.7518675589200987, iteration: 358546
loss: 0.9951945543289185,grad_norm: 0.7780447580213272, iteration: 358547
loss: 0.987033486366272,grad_norm: 0.9999996359906022, iteration: 358548
loss: 0.9905670881271362,grad_norm: 0.8222974539059512, iteration: 358549
loss: 1.0100945234298706,grad_norm: 0.8723549512097711, iteration: 358550
loss: 0.9651840329170227,grad_norm: 0.8405313612639165, iteration: 358551
loss: 1.045030117034912,grad_norm: 0.9999994113917394, iteration: 358552
loss: 0.9775559902191162,grad_norm: 0.7699016147087501, iteration: 358553
loss: 1.0121551752090454,grad_norm: 0.7102917437521437, iteration: 358554
loss: 1.0485525131225586,grad_norm: 0.9999998307547344, iteration: 358555
loss: 1.0576013326644897,grad_norm: 0.9824403688235773, iteration: 358556
loss: 1.0416617393493652,grad_norm: 0.8070419325826899, iteration: 358557
loss: 1.0138826370239258,grad_norm: 0.8154804020797396, iteration: 358558
loss: 1.0528972148895264,grad_norm: 0.8135204515861654, iteration: 358559
loss: 0.9852048754692078,grad_norm: 0.8791550178752967, iteration: 358560
loss: 0.9934813380241394,grad_norm: 0.9977203687031683, iteration: 358561
loss: 0.991645336151123,grad_norm: 0.809017138837556, iteration: 358562
loss: 1.0525621175765991,grad_norm: 0.9374920531005929, iteration: 358563
loss: 0.998123824596405,grad_norm: 0.9999990511042601, iteration: 358564
loss: 1.0014464855194092,grad_norm: 0.9999992568393619, iteration: 358565
loss: 1.034554123878479,grad_norm: 0.8593284218961186, iteration: 358566
loss: 1.0234605073928833,grad_norm: 0.772492649328418, iteration: 358567
loss: 1.017313838005066,grad_norm: 0.999999838982609, iteration: 358568
loss: 0.9657201170921326,grad_norm: 0.7064010156543777, iteration: 358569
loss: 0.99417644739151,grad_norm: 0.9008011007664021, iteration: 358570
loss: 0.9893549084663391,grad_norm: 0.8782149287074882, iteration: 358571
loss: 0.9918828010559082,grad_norm: 0.7762150724197137, iteration: 358572
loss: 1.00846266746521,grad_norm: 0.9999997876836602, iteration: 358573
loss: 1.0624064207077026,grad_norm: 0.9999995595981591, iteration: 358574
loss: 0.997754693031311,grad_norm: 0.8422595360748041, iteration: 358575
loss: 0.9919726252555847,grad_norm: 0.9348070365997707, iteration: 358576
loss: 0.9608662724494934,grad_norm: 0.9414805211572775, iteration: 358577
loss: 1.0442596673965454,grad_norm: 0.8137755720943671, iteration: 358578
loss: 0.9551894664764404,grad_norm: 0.8175968912639315, iteration: 358579
loss: 1.0019625425338745,grad_norm: 0.7757190780590406, iteration: 358580
loss: 1.0103996992111206,grad_norm: 0.8397647933306854, iteration: 358581
loss: 1.0156744718551636,grad_norm: 0.8602718387127353, iteration: 358582
loss: 1.0930784940719604,grad_norm: 0.7781360605426546, iteration: 358583
loss: 1.002528190612793,grad_norm: 0.8348930968555172, iteration: 358584
loss: 1.0380938053131104,grad_norm: 0.9999992069568469, iteration: 358585
loss: 1.3307744264602661,grad_norm: 0.9999992095433445, iteration: 358586
loss: 0.9850022792816162,grad_norm: 0.7712345770325655, iteration: 358587
loss: 0.9739370346069336,grad_norm: 0.7959073853968107, iteration: 358588
loss: 1.0243338346481323,grad_norm: 0.9032091397433688, iteration: 358589
loss: 0.9859237670898438,grad_norm: 0.8041099509977659, iteration: 358590
loss: 1.0236947536468506,grad_norm: 0.8791764190587025, iteration: 358591
loss: 1.0256959199905396,grad_norm: 0.7844847604773111, iteration: 358592
loss: 0.9898531436920166,grad_norm: 0.7023421768568777, iteration: 358593
loss: 1.017207145690918,grad_norm: 0.7374751708495684, iteration: 358594
loss: 1.0451315641403198,grad_norm: 0.7167540892720958, iteration: 358595
loss: 0.9588007926940918,grad_norm: 0.6959806549728418, iteration: 358596
loss: 0.9989143013954163,grad_norm: 0.8917975920681153, iteration: 358597
loss: 0.988884449005127,grad_norm: 0.6681482056109391, iteration: 358598
loss: 0.9832155108451843,grad_norm: 0.8765096227245258, iteration: 358599
loss: 1.017195463180542,grad_norm: 0.8310607492270796, iteration: 358600
loss: 0.9814685583114624,grad_norm: 0.8656499945018634, iteration: 358601
loss: 0.9860848784446716,grad_norm: 0.7460436376895165, iteration: 358602
loss: 1.0238646268844604,grad_norm: 0.9793703776883588, iteration: 358603
loss: 1.0189638137817383,grad_norm: 0.7950744068967189, iteration: 358604
loss: 0.9906348586082458,grad_norm: 0.8803107284059147, iteration: 358605
loss: 0.9838377833366394,grad_norm: 0.7221993728554481, iteration: 358606
loss: 1.0055088996887207,grad_norm: 0.7867823865317226, iteration: 358607
loss: 1.0070114135742188,grad_norm: 0.9636527284533721, iteration: 358608
loss: 0.9733321666717529,grad_norm: 0.845317046449485, iteration: 358609
loss: 0.9614678621292114,grad_norm: 0.7876884548711862, iteration: 358610
loss: 1.0596716403961182,grad_norm: 0.908295019170778, iteration: 358611
loss: 0.9754942059516907,grad_norm: 0.9052626756192825, iteration: 358612
loss: 1.000211238861084,grad_norm: 0.8859352480159826, iteration: 358613
loss: 1.0051071643829346,grad_norm: 0.6447381793226598, iteration: 358614
loss: 0.9931288957595825,grad_norm: 0.7785900723783423, iteration: 358615
loss: 1.0105923414230347,grad_norm: 0.8220981852683815, iteration: 358616
loss: 1.0203474760055542,grad_norm: 0.7725385425571849, iteration: 358617
loss: 1.0543832778930664,grad_norm: 0.9999990288916542, iteration: 358618
loss: 0.9570635557174683,grad_norm: 0.999999265107506, iteration: 358619
loss: 0.9989943504333496,grad_norm: 0.9081411476869629, iteration: 358620
loss: 1.0267277956008911,grad_norm: 0.8308801856564259, iteration: 358621
loss: 1.0205328464508057,grad_norm: 0.797758376683671, iteration: 358622
loss: 0.9910478591918945,grad_norm: 0.702677612747757, iteration: 358623
loss: 1.005906581878662,grad_norm: 0.7782099815500162, iteration: 358624
loss: 1.0879396200180054,grad_norm: 0.9999993367979865, iteration: 358625
loss: 0.9995532631874084,grad_norm: 0.7822677668065474, iteration: 358626
loss: 0.982231855392456,grad_norm: 0.8024187498358555, iteration: 358627
loss: 1.0129499435424805,grad_norm: 0.897204293425091, iteration: 358628
loss: 1.0062079429626465,grad_norm: 0.6795253678383505, iteration: 358629
loss: 0.9885395169258118,grad_norm: 0.8588933511090765, iteration: 358630
loss: 0.9743995070457458,grad_norm: 0.8772393752550562, iteration: 358631
loss: 1.051199197769165,grad_norm: 0.832212029999307, iteration: 358632
loss: 0.9840711951255798,grad_norm: 0.7538532755712115, iteration: 358633
loss: 0.9832291603088379,grad_norm: 0.9436929187757299, iteration: 358634
loss: 0.9728453159332275,grad_norm: 0.9553442981130856, iteration: 358635
loss: 0.9945279359817505,grad_norm: 0.8786230443567484, iteration: 358636
loss: 1.0013264417648315,grad_norm: 0.6457641593300688, iteration: 358637
loss: 1.0218852758407593,grad_norm: 0.8043347116359346, iteration: 358638
loss: 0.9919141530990601,grad_norm: 0.844647216993906, iteration: 358639
loss: 0.9506188631057739,grad_norm: 0.8673344354221485, iteration: 358640
loss: 1.0204869508743286,grad_norm: 0.7996035569558354, iteration: 358641
loss: 1.0040783882141113,grad_norm: 0.8138442677760922, iteration: 358642
loss: 0.9719048738479614,grad_norm: 0.7221326576636635, iteration: 358643
loss: 1.0038766860961914,grad_norm: 0.9999990987742743, iteration: 358644
loss: 0.983997642993927,grad_norm: 0.9569433546701358, iteration: 358645
loss: 1.0032950639724731,grad_norm: 0.7775906095657014, iteration: 358646
loss: 1.0290757417678833,grad_norm: 0.9999990953286193, iteration: 358647
loss: 0.9817824363708496,grad_norm: 0.8123008763717005, iteration: 358648
loss: 0.9550203084945679,grad_norm: 0.8598872944376167, iteration: 358649
loss: 0.9915154576301575,grad_norm: 0.7458803091528862, iteration: 358650
loss: 0.9715794324874878,grad_norm: 0.7962157598259446, iteration: 358651
loss: 0.9911340475082397,grad_norm: 0.7808206326298484, iteration: 358652
loss: 1.0129539966583252,grad_norm: 0.7856087775019431, iteration: 358653
loss: 1.010079026222229,grad_norm: 0.7263667772333645, iteration: 358654
loss: 1.0210363864898682,grad_norm: 0.6830866793523439, iteration: 358655
loss: 1.0784857273101807,grad_norm: 0.8440248100096115, iteration: 358656
loss: 1.0025421380996704,grad_norm: 0.6594966497341849, iteration: 358657
loss: 0.9508516192436218,grad_norm: 0.74571423275406, iteration: 358658
loss: 0.9942659735679626,grad_norm: 0.7576954731802035, iteration: 358659
loss: 1.0189086198806763,grad_norm: 0.999999544650618, iteration: 358660
loss: 0.9521757960319519,grad_norm: 0.8225678484170185, iteration: 358661
loss: 0.9906982183456421,grad_norm: 0.9307552096020015, iteration: 358662
loss: 1.0109268426895142,grad_norm: 0.8597499026260815, iteration: 358663
loss: 1.074208378791809,grad_norm: 0.9738425087462811, iteration: 358664
loss: 1.0131779909133911,grad_norm: 0.9999991106544015, iteration: 358665
loss: 0.9880822896957397,grad_norm: 0.7828771499834724, iteration: 358666
loss: 1.0352094173431396,grad_norm: 0.9999994192736962, iteration: 358667
loss: 1.063524603843689,grad_norm: 0.8985914635185187, iteration: 358668
loss: 1.0032216310501099,grad_norm: 0.8605972816869977, iteration: 358669
loss: 0.9860573410987854,grad_norm: 0.9248505934170239, iteration: 358670
loss: 0.9892169833183289,grad_norm: 0.7325914481298288, iteration: 358671
loss: 1.0041115283966064,grad_norm: 0.9164436292249358, iteration: 358672
loss: 0.9808354377746582,grad_norm: 0.6837007009536564, iteration: 358673
loss: 0.9869557619094849,grad_norm: 0.9089428523063944, iteration: 358674
loss: 1.0186995267868042,grad_norm: 0.7177760414509747, iteration: 358675
loss: 1.0021008253097534,grad_norm: 0.7742424054740453, iteration: 358676
loss: 0.982056736946106,grad_norm: 0.7564880255674132, iteration: 358677
loss: 1.0732957124710083,grad_norm: 0.9999991652347876, iteration: 358678
loss: 1.0293372869491577,grad_norm: 0.7952985787142223, iteration: 358679
loss: 0.9914100766181946,grad_norm: 0.8478712682235463, iteration: 358680
loss: 1.000922441482544,grad_norm: 0.8895981218588328, iteration: 358681
loss: 0.9934267997741699,grad_norm: 0.8136704368189878, iteration: 358682
loss: 0.9779725670814514,grad_norm: 0.9096823518539661, iteration: 358683
loss: 0.9695873856544495,grad_norm: 0.7246392079081279, iteration: 358684
loss: 1.0159250497817993,grad_norm: 0.9063475690582686, iteration: 358685
loss: 0.9879869818687439,grad_norm: 0.8022927736503239, iteration: 358686
loss: 0.9792156219482422,grad_norm: 0.7218715714605844, iteration: 358687
loss: 1.0160648822784424,grad_norm: 0.8202664477215834, iteration: 358688
loss: 0.9785279035568237,grad_norm: 0.7960513229274022, iteration: 358689
loss: 0.9475752115249634,grad_norm: 0.8792116193642521, iteration: 358690
loss: 0.9808134436607361,grad_norm: 0.8112274824314085, iteration: 358691
loss: 1.0155235528945923,grad_norm: 0.9999999001513654, iteration: 358692
loss: 1.0121002197265625,grad_norm: 0.8364499000106506, iteration: 358693
loss: 1.0515146255493164,grad_norm: 0.7146309637736079, iteration: 358694
loss: 0.991722583770752,grad_norm: 0.8336839277334623, iteration: 358695
loss: 0.9818946719169617,grad_norm: 0.8848742371703032, iteration: 358696
loss: 0.9761038422584534,grad_norm: 0.7859642801946974, iteration: 358697
loss: 1.0156856775283813,grad_norm: 0.7690941511729069, iteration: 358698
loss: 1.017062783241272,grad_norm: 0.7722502794929083, iteration: 358699
loss: 0.9954281449317932,grad_norm: 0.8814374425161959, iteration: 358700
loss: 1.0148245096206665,grad_norm: 0.7792654360880426, iteration: 358701
loss: 0.9945995807647705,grad_norm: 0.7639472607004448, iteration: 358702
loss: 0.9962962865829468,grad_norm: 0.7976240000812154, iteration: 358703
loss: 0.9699534177780151,grad_norm: 0.6926805779202377, iteration: 358704
loss: 1.012969970703125,grad_norm: 0.7276304045752541, iteration: 358705
loss: 0.983788013458252,grad_norm: 0.7300602623747381, iteration: 358706
loss: 1.011827826499939,grad_norm: 0.7707034059079562, iteration: 358707
loss: 0.9743226170539856,grad_norm: 0.874997451347009, iteration: 358708
loss: 1.0180827379226685,grad_norm: 0.7867937588879366, iteration: 358709
loss: 1.0349950790405273,grad_norm: 0.7961495533016841, iteration: 358710
loss: 0.9646592736244202,grad_norm: 0.9169831856821068, iteration: 358711
loss: 0.9387618899345398,grad_norm: 0.7796302527484356, iteration: 358712
loss: 0.9893835783004761,grad_norm: 0.7669056665742929, iteration: 358713
loss: 0.9822319149971008,grad_norm: 0.8962114319384055, iteration: 358714
loss: 1.030422329902649,grad_norm: 0.9083084985632511, iteration: 358715
loss: 1.0025063753128052,grad_norm: 0.7265520294973699, iteration: 358716
loss: 0.9896769523620605,grad_norm: 0.846222745291853, iteration: 358717
loss: 0.9823541045188904,grad_norm: 0.689115999659483, iteration: 358718
loss: 0.9827607274055481,grad_norm: 0.8236817878172663, iteration: 358719
loss: 1.096779704093933,grad_norm: 0.8427707994398852, iteration: 358720
loss: 1.002123236656189,grad_norm: 0.6784431813248908, iteration: 358721
loss: 1.0220959186553955,grad_norm: 0.724756802981934, iteration: 358722
loss: 0.9637293815612793,grad_norm: 0.9999991995331668, iteration: 358723
loss: 1.0967670679092407,grad_norm: 0.999999872721832, iteration: 358724
loss: 0.9927546977996826,grad_norm: 0.7471550983492651, iteration: 358725
loss: 0.9616814255714417,grad_norm: 0.7598916219894285, iteration: 358726
loss: 1.0098555088043213,grad_norm: 0.8382108860770275, iteration: 358727
loss: 1.0109317302703857,grad_norm: 0.7393762806058366, iteration: 358728
loss: 1.0105946063995361,grad_norm: 0.7200975885037771, iteration: 358729
loss: 0.989136278629303,grad_norm: 0.8314375773547447, iteration: 358730
loss: 1.0404603481292725,grad_norm: 0.7899633647299177, iteration: 358731
loss: 0.9756511449813843,grad_norm: 0.7596501910227826, iteration: 358732
loss: 1.0017263889312744,grad_norm: 0.7726705260288501, iteration: 358733
loss: 0.9831546545028687,grad_norm: 0.8748858384696282, iteration: 358734
loss: 0.9698225855827332,grad_norm: 0.7244428182009197, iteration: 358735
loss: 1.0206701755523682,grad_norm: 0.7985497368850429, iteration: 358736
loss: 0.9607127904891968,grad_norm: 0.8400819931044116, iteration: 358737
loss: 1.0253466367721558,grad_norm: 0.9000280660632932, iteration: 358738
loss: 1.0091817378997803,grad_norm: 0.609002880756244, iteration: 358739
loss: 1.0139509439468384,grad_norm: 0.9147918070532631, iteration: 358740
loss: 0.998784601688385,grad_norm: 0.8353420052025342, iteration: 358741
loss: 0.9767019748687744,grad_norm: 0.7087707290502315, iteration: 358742
loss: 0.9913262724876404,grad_norm: 0.8231886578561444, iteration: 358743
loss: 1.0646800994873047,grad_norm: 0.9313561834082541, iteration: 358744
loss: 0.9886517524719238,grad_norm: 0.7410136922029114, iteration: 358745
loss: 1.0251370668411255,grad_norm: 0.8231544338199098, iteration: 358746
loss: 1.0093597173690796,grad_norm: 0.7117322103330459, iteration: 358747
loss: 1.004710078239441,grad_norm: 0.8420472000862422, iteration: 358748
loss: 1.0239471197128296,grad_norm: 0.7420033112708054, iteration: 358749
loss: 1.0009422302246094,grad_norm: 0.9999990742049206, iteration: 358750
loss: 0.9869492650032043,grad_norm: 0.9999994758021173, iteration: 358751
loss: 1.0216829776763916,grad_norm: 0.9213526864034113, iteration: 358752
loss: 0.9980078339576721,grad_norm: 0.7516987439566496, iteration: 358753
loss: 1.0737191438674927,grad_norm: 0.81044048844682, iteration: 358754
loss: 1.014174222946167,grad_norm: 0.753321803968549, iteration: 358755
loss: 0.9992850422859192,grad_norm: 0.7889838936771685, iteration: 358756
loss: 1.0143963098526,grad_norm: 0.8933506318477522, iteration: 358757
loss: 1.00494384765625,grad_norm: 0.8754497787760016, iteration: 358758
loss: 1.0212639570236206,grad_norm: 0.8656323330561407, iteration: 358759
loss: 1.0144245624542236,grad_norm: 0.999999677894878, iteration: 358760
loss: 1.0146697759628296,grad_norm: 0.9744270983720679, iteration: 358761
loss: 0.989376425743103,grad_norm: 0.9999991647824122, iteration: 358762
loss: 1.0138404369354248,grad_norm: 0.7847335876212096, iteration: 358763
loss: 0.9860894083976746,grad_norm: 0.9999993330930531, iteration: 358764
loss: 0.9575786590576172,grad_norm: 0.7532717772517489, iteration: 358765
loss: 0.979391872882843,grad_norm: 0.858385408869739, iteration: 358766
loss: 1.0384793281555176,grad_norm: 0.6331439633357859, iteration: 358767
loss: 1.025130033493042,grad_norm: 0.8568605081321274, iteration: 358768
loss: 0.9949930310249329,grad_norm: 0.7774524648929898, iteration: 358769
loss: 0.9764320254325867,grad_norm: 0.7705107920807699, iteration: 358770
loss: 1.1287851333618164,grad_norm: 0.9999997413857034, iteration: 358771
loss: 1.018493890762329,grad_norm: 0.8208516099169839, iteration: 358772
loss: 1.0063337087631226,grad_norm: 0.8502915474601206, iteration: 358773
loss: 0.9970067143440247,grad_norm: 0.7775777947801776, iteration: 358774
loss: 0.9924602508544922,grad_norm: 0.9514962971397688, iteration: 358775
loss: 1.0173310041427612,grad_norm: 0.714248563150944, iteration: 358776
loss: 0.942781388759613,grad_norm: 0.7535877260641904, iteration: 358777
loss: 1.0086138248443604,grad_norm: 0.8444108074495575, iteration: 358778
loss: 1.0081238746643066,grad_norm: 0.747286235824948, iteration: 358779
loss: 0.9788480401039124,grad_norm: 0.9187604016743413, iteration: 358780
loss: 0.967484176158905,grad_norm: 0.9999991867465533, iteration: 358781
loss: 1.019721508026123,grad_norm: 0.7071163265967466, iteration: 358782
loss: 1.031313180923462,grad_norm: 0.8819182338315823, iteration: 358783
loss: 0.9782030582427979,grad_norm: 0.8933797930105655, iteration: 358784
loss: 0.983430802822113,grad_norm: 0.9999991402278008, iteration: 358785
loss: 0.9559319019317627,grad_norm: 0.8915959721456722, iteration: 358786
loss: 0.9995359778404236,grad_norm: 0.8125490271827379, iteration: 358787
loss: 1.0752583742141724,grad_norm: 0.9999992242032968, iteration: 358788
loss: 1.0299984216690063,grad_norm: 0.9241414909920617, iteration: 358789
loss: 0.9724594354629517,grad_norm: 0.8938112046896264, iteration: 358790
loss: 1.0343252420425415,grad_norm: 0.8089652238795169, iteration: 358791
loss: 0.9866969585418701,grad_norm: 0.6579976086744416, iteration: 358792
loss: 0.9833847880363464,grad_norm: 0.8290423051104128, iteration: 358793
loss: 0.9634639024734497,grad_norm: 0.8604375315417221, iteration: 358794
loss: 1.0114989280700684,grad_norm: 0.72984799948484, iteration: 358795
loss: 0.9973844289779663,grad_norm: 0.7552613293932781, iteration: 358796
loss: 0.9799507260322571,grad_norm: 0.9285556656189284, iteration: 358797
loss: 0.9975088834762573,grad_norm: 0.7814046792453688, iteration: 358798
loss: 1.0104674100875854,grad_norm: 0.8513343419178824, iteration: 358799
loss: 1.045210838317871,grad_norm: 0.8191841226094664, iteration: 358800
loss: 0.9935835599899292,grad_norm: 0.8386280154812568, iteration: 358801
loss: 1.0093560218811035,grad_norm: 0.8438572386646278, iteration: 358802
loss: 1.0330437421798706,grad_norm: 0.831402734971646, iteration: 358803
loss: 1.0364255905151367,grad_norm: 0.8113278733973682, iteration: 358804
loss: 0.988291323184967,grad_norm: 0.9999991086765627, iteration: 358805
loss: 0.9916174411773682,grad_norm: 0.9778931709695915, iteration: 358806
loss: 1.0063073635101318,grad_norm: 0.911035439852629, iteration: 358807
loss: 1.0183889865875244,grad_norm: 0.692029668117742, iteration: 358808
loss: 1.0160980224609375,grad_norm: 0.8992853994256642, iteration: 358809
loss: 1.0695549249649048,grad_norm: 0.9999994867920068, iteration: 358810
loss: 0.9948676228523254,grad_norm: 0.7763967049350265, iteration: 358811
loss: 1.0100661516189575,grad_norm: 0.7726232884475711, iteration: 358812
loss: 1.0275894403457642,grad_norm: 0.9999990247301017, iteration: 358813
loss: 0.9813199043273926,grad_norm: 0.9216713371833584, iteration: 358814
loss: 0.9591453671455383,grad_norm: 0.890509459088661, iteration: 358815
loss: 0.9812440872192383,grad_norm: 0.8071609680879572, iteration: 358816
loss: 0.9933667182922363,grad_norm: 0.9999989772881719, iteration: 358817
loss: 1.0466063022613525,grad_norm: 0.8291861952134628, iteration: 358818
loss: 0.9974706172943115,grad_norm: 0.8113849028382351, iteration: 358819
loss: 1.0512020587921143,grad_norm: 0.9999996357184563, iteration: 358820
loss: 1.0814803838729858,grad_norm: 0.9999990875861553, iteration: 358821
loss: 1.0069236755371094,grad_norm: 0.9999993821405292, iteration: 358822
loss: 1.004153847694397,grad_norm: 0.7835293680655796, iteration: 358823
loss: 1.018966794013977,grad_norm: 0.9999995912928765, iteration: 358824
loss: 0.9974183440208435,grad_norm: 0.8318746403048358, iteration: 358825
loss: 0.9775076508522034,grad_norm: 0.7258286925578793, iteration: 358826
loss: 1.04365873336792,grad_norm: 0.9702029414174512, iteration: 358827
loss: 0.9994667768478394,grad_norm: 0.7278253514767125, iteration: 358828
loss: 1.0253032445907593,grad_norm: 0.9999991523556326, iteration: 358829
loss: 1.0804959535598755,grad_norm: 0.9999994996665893, iteration: 358830
loss: 1.0228419303894043,grad_norm: 0.8815793767147366, iteration: 358831
loss: 1.0023672580718994,grad_norm: 0.7577783681720698, iteration: 358832
loss: 1.0126919746398926,grad_norm: 0.8112618105145274, iteration: 358833
loss: 1.0022592544555664,grad_norm: 0.8504776266676766, iteration: 358834
loss: 0.9781168103218079,grad_norm: 0.8636359483391682, iteration: 358835
loss: 0.9432451725006104,grad_norm: 0.9001981627858854, iteration: 358836
loss: 0.9903431534767151,grad_norm: 0.6812911699931583, iteration: 358837
loss: 1.0263704061508179,grad_norm: 0.7914742316887997, iteration: 358838
loss: 1.009008765220642,grad_norm: 0.7427999539329804, iteration: 358839
loss: 1.012676477432251,grad_norm: 0.6659526575951473, iteration: 358840
loss: 0.9950534701347351,grad_norm: 0.7741703566479613, iteration: 358841
loss: 0.9797466397285461,grad_norm: 0.705681539369555, iteration: 358842
loss: 0.9393315315246582,grad_norm: 0.7709984319804106, iteration: 358843
loss: 1.0178717374801636,grad_norm: 0.8196880187772204, iteration: 358844
loss: 1.0833325386047363,grad_norm: 0.9999991700712825, iteration: 358845
loss: 1.0508437156677246,grad_norm: 0.8720303626294269, iteration: 358846
loss: 1.016464114189148,grad_norm: 0.8453086488271846, iteration: 358847
loss: 1.0133870840072632,grad_norm: 0.9551253155670254, iteration: 358848
loss: 1.0503571033477783,grad_norm: 0.6732755492743835, iteration: 358849
loss: 1.028874397277832,grad_norm: 0.7581715989243534, iteration: 358850
loss: 0.9665827751159668,grad_norm: 0.7226321874416123, iteration: 358851
loss: 1.0057501792907715,grad_norm: 0.852822354947901, iteration: 358852
loss: 1.12167227268219,grad_norm: 0.9999996778388538, iteration: 358853
loss: 0.9758912920951843,grad_norm: 0.8224535071242065, iteration: 358854
loss: 1.0857523679733276,grad_norm: 0.7850994152525634, iteration: 358855
loss: 0.9928185939788818,grad_norm: 0.9999997274295013, iteration: 358856
loss: 0.9574786424636841,grad_norm: 0.7488742805504197, iteration: 358857
loss: 1.037104606628418,grad_norm: 0.7215266647552693, iteration: 358858
loss: 0.998969554901123,grad_norm: 0.9131252868799689, iteration: 358859
loss: 0.9590126872062683,grad_norm: 0.8615639722371412, iteration: 358860
loss: 1.01621413230896,grad_norm: 0.781047499482849, iteration: 358861
loss: 1.0491193532943726,grad_norm: 0.7453758812208926, iteration: 358862
loss: 1.052416443824768,grad_norm: 0.9999991933835746, iteration: 358863
loss: 0.9890612363815308,grad_norm: 0.6980767588660696, iteration: 358864
loss: 1.009590983390808,grad_norm: 0.7967040704450197, iteration: 358865
loss: 1.0199861526489258,grad_norm: 0.7559758503068474, iteration: 358866
loss: 1.0068076848983765,grad_norm: 0.7424459032170243, iteration: 358867
loss: 1.0218371152877808,grad_norm: 0.999999955274621, iteration: 358868
loss: 1.0375187397003174,grad_norm: 0.9697471196968479, iteration: 358869
loss: 1.0916305780410767,grad_norm: 0.9999994407314436, iteration: 358870
loss: 1.014487385749817,grad_norm: 0.6710635868933829, iteration: 358871
loss: 0.9938410520553589,grad_norm: 0.9216819450372155, iteration: 358872
loss: 1.00713050365448,grad_norm: 0.7676082804362554, iteration: 358873
loss: 0.9833009243011475,grad_norm: 0.7370622659678435, iteration: 358874
loss: 0.9867657423019409,grad_norm: 0.9418360361197116, iteration: 358875
loss: 1.0286743640899658,grad_norm: 0.7772747075247516, iteration: 358876
loss: 0.9771493673324585,grad_norm: 0.7946457344629235, iteration: 358877
loss: 0.9938443899154663,grad_norm: 0.5898186108409968, iteration: 358878
loss: 0.9894031286239624,grad_norm: 0.7468948739189136, iteration: 358879
loss: 0.9842020869255066,grad_norm: 0.7901991518682676, iteration: 358880
loss: 1.007101058959961,grad_norm: 0.9403250738982096, iteration: 358881
loss: 1.0213508605957031,grad_norm: 0.9999997815467908, iteration: 358882
loss: 0.9742763638496399,grad_norm: 0.7150736749380425, iteration: 358883
loss: 1.0005114078521729,grad_norm: 0.8738018216711497, iteration: 358884
loss: 1.0039278268814087,grad_norm: 0.7572639599159373, iteration: 358885
loss: 0.9996045231819153,grad_norm: 0.7952045413414474, iteration: 358886
loss: 0.9959114193916321,grad_norm: 0.7850326201079668, iteration: 358887
loss: 1.016268253326416,grad_norm: 0.9999994932342006, iteration: 358888
loss: 1.0196117162704468,grad_norm: 0.9999997503965496, iteration: 358889
loss: 1.012929916381836,grad_norm: 0.7409189085067329, iteration: 358890
loss: 0.9744933843612671,grad_norm: 0.7710874431759925, iteration: 358891
loss: 0.9934378266334534,grad_norm: 0.7451648769393501, iteration: 358892
loss: 0.9828502535820007,grad_norm: 0.799410088192325, iteration: 358893
loss: 1.065931797027588,grad_norm: 0.9999992262196767, iteration: 358894
loss: 0.9984528422355652,grad_norm: 0.7755017530306694, iteration: 358895
loss: 1.011958122253418,grad_norm: 0.7920156094766022, iteration: 358896
loss: 0.9814496636390686,grad_norm: 0.9735001367676035, iteration: 358897
loss: 0.9997566342353821,grad_norm: 0.7099723625676749, iteration: 358898
loss: 1.0430278778076172,grad_norm: 0.9999992488932429, iteration: 358899
loss: 1.0140442848205566,grad_norm: 0.707469709058723, iteration: 358900
loss: 1.0023117065429688,grad_norm: 0.847431960841054, iteration: 358901
loss: 0.9862490892410278,grad_norm: 0.7227093497542966, iteration: 358902
loss: 1.0401904582977295,grad_norm: 0.9999999357304544, iteration: 358903
loss: 1.0017541646957397,grad_norm: 0.9244739163932881, iteration: 358904
loss: 0.9878998398780823,grad_norm: 0.9204775103912998, iteration: 358905
loss: 1.0055781602859497,grad_norm: 0.8750203618773383, iteration: 358906
loss: 1.017209768295288,grad_norm: 0.999999249097766, iteration: 358907
loss: 0.9937810897827148,grad_norm: 0.6314846640426134, iteration: 358908
loss: 0.9618765711784363,grad_norm: 0.8682304496627575, iteration: 358909
loss: 1.0282044410705566,grad_norm: 0.8450220984703143, iteration: 358910
loss: 1.0107247829437256,grad_norm: 0.7889941533142171, iteration: 358911
loss: 0.952560305595398,grad_norm: 0.9352653588176955, iteration: 358912
loss: 1.1062114238739014,grad_norm: 0.99999974870101, iteration: 358913
loss: 1.0176887512207031,grad_norm: 0.8306316670554289, iteration: 358914
loss: 0.9669035077095032,grad_norm: 0.8220681646284649, iteration: 358915
loss: 0.982250988483429,grad_norm: 0.72697612141141, iteration: 358916
loss: 1.0088534355163574,grad_norm: 0.8443614088037759, iteration: 358917
loss: 0.9967830181121826,grad_norm: 0.9079184468795911, iteration: 358918
loss: 0.9757101535797119,grad_norm: 0.9999990080281099, iteration: 358919
loss: 0.9921166896820068,grad_norm: 0.7735580625838694, iteration: 358920
loss: 0.9980965852737427,grad_norm: 0.7963954718704792, iteration: 358921
loss: 0.9697834253311157,grad_norm: 0.9999991970560246, iteration: 358922
loss: 0.9952445030212402,grad_norm: 0.9415115528758852, iteration: 358923
loss: 1.054041862487793,grad_norm: 0.9358065381410816, iteration: 358924
loss: 1.0363271236419678,grad_norm: 0.7072455933243537, iteration: 358925
loss: 1.0000611543655396,grad_norm: 0.9808909361494509, iteration: 358926
loss: 0.9982179403305054,grad_norm: 0.7840757024611215, iteration: 358927
loss: 1.0101332664489746,grad_norm: 0.8042939877334191, iteration: 358928
loss: 0.9907860159873962,grad_norm: 0.6463035451801219, iteration: 358929
loss: 0.9760736227035522,grad_norm: 0.8334755972842285, iteration: 358930
loss: 0.9917593598365784,grad_norm: 0.9874453252919007, iteration: 358931
loss: 0.9982693195343018,grad_norm: 0.8000425551248944, iteration: 358932
loss: 1.025357961654663,grad_norm: 0.9118720325812542, iteration: 358933
loss: 1.0077167749404907,grad_norm: 0.8047730892075424, iteration: 358934
loss: 0.9994926452636719,grad_norm: 0.9754370308149641, iteration: 358935
loss: 1.0289850234985352,grad_norm: 0.756408151759879, iteration: 358936
loss: 0.9995308518409729,grad_norm: 0.7986708308765138, iteration: 358937
loss: 1.0180951356887817,grad_norm: 0.8415854046386476, iteration: 358938
loss: 1.001013994216919,grad_norm: 0.8945781449085028, iteration: 358939
loss: 0.9879035353660583,grad_norm: 0.7758848416133083, iteration: 358940
loss: 1.0047924518585205,grad_norm: 0.7922862006732984, iteration: 358941
loss: 1.0203864574432373,grad_norm: 0.8984521026548166, iteration: 358942
loss: 0.9914840459823608,grad_norm: 0.7708168683435762, iteration: 358943
loss: 0.9842696189880371,grad_norm: 0.7519596780479999, iteration: 358944
loss: 1.0203371047973633,grad_norm: 0.8392358113762316, iteration: 358945
loss: 0.9738319516181946,grad_norm: 0.7620971524412821, iteration: 358946
loss: 0.9609243869781494,grad_norm: 0.8593079106005884, iteration: 358947
loss: 1.1581292152404785,grad_norm: 0.9999992805370571, iteration: 358948
loss: 0.9848544597625732,grad_norm: 0.9999993169395383, iteration: 358949
loss: 0.9839012026786804,grad_norm: 0.82539716983746, iteration: 358950
loss: 1.0069478750228882,grad_norm: 0.6844482352717182, iteration: 358951
loss: 0.9799032807350159,grad_norm: 0.839292295300978, iteration: 358952
loss: 0.988410234451294,grad_norm: 0.8219023487177745, iteration: 358953
loss: 0.9769341945648193,grad_norm: 0.8425407179785096, iteration: 358954
loss: 1.0088328123092651,grad_norm: 0.7865922838517897, iteration: 358955
loss: 1.0048291683197021,grad_norm: 0.742737028261966, iteration: 358956
loss: 0.9959317445755005,grad_norm: 0.8180933564999805, iteration: 358957
loss: 1.0193893909454346,grad_norm: 0.6866037549748621, iteration: 358958
loss: 1.0013049840927124,grad_norm: 0.5822598177881111, iteration: 358959
loss: 1.0171767473220825,grad_norm: 0.7555928120870012, iteration: 358960
loss: 1.0234169960021973,grad_norm: 0.9999996208468114, iteration: 358961
loss: 1.021313190460205,grad_norm: 0.8590906788621002, iteration: 358962
loss: 1.0191187858581543,grad_norm: 0.9187773483535827, iteration: 358963
loss: 0.9963729977607727,grad_norm: 0.8734504583679703, iteration: 358964
loss: 1.0056880712509155,grad_norm: 0.8054638476378806, iteration: 358965
loss: 0.9702498912811279,grad_norm: 0.831505023523668, iteration: 358966
loss: 0.9935951828956604,grad_norm: 0.836825497863666, iteration: 358967
loss: 1.004733681678772,grad_norm: 0.8230446251485075, iteration: 358968
loss: 0.9615557789802551,grad_norm: 0.852314520933304, iteration: 358969
loss: 1.0234870910644531,grad_norm: 0.7412282467926719, iteration: 358970
loss: 1.0138689279556274,grad_norm: 0.828109886104927, iteration: 358971
loss: 1.0197042226791382,grad_norm: 0.791421565851901, iteration: 358972
loss: 1.0096665620803833,grad_norm: 0.7460502132806499, iteration: 358973
loss: 1.0077030658721924,grad_norm: 0.757042837474604, iteration: 358974
loss: 1.2164469957351685,grad_norm: 0.9999997316610604, iteration: 358975
loss: 1.0147866010665894,grad_norm: 0.7585724657036085, iteration: 358976
loss: 1.0050901174545288,grad_norm: 0.866753127842941, iteration: 358977
loss: 1.003345251083374,grad_norm: 0.9093196413680502, iteration: 358978
loss: 0.9691439270973206,grad_norm: 0.8469015574740191, iteration: 358979
loss: 0.9965449571609497,grad_norm: 0.665009192812136, iteration: 358980
loss: 0.9843422174453735,grad_norm: 0.7837108120850761, iteration: 358981
loss: 1.0262922048568726,grad_norm: 0.8781971967780683, iteration: 358982
loss: 1.030760645866394,grad_norm: 0.8665047438463757, iteration: 358983
loss: 0.9825056195259094,grad_norm: 0.7652177438541522, iteration: 358984
loss: 1.0389864444732666,grad_norm: 0.9999992285868986, iteration: 358985
loss: 0.9919555187225342,grad_norm: 0.9033759081961238, iteration: 358986
loss: 0.9657278060913086,grad_norm: 0.8945371356943209, iteration: 358987
loss: 0.9999736547470093,grad_norm: 0.8500598638500603, iteration: 358988
loss: 1.0109602212905884,grad_norm: 0.873257876618663, iteration: 358989
loss: 1.0126923322677612,grad_norm: 0.8183547709962763, iteration: 358990
loss: 0.9896227121353149,grad_norm: 0.9314405529624421, iteration: 358991
loss: 0.9799748659133911,grad_norm: 0.9231505545361018, iteration: 358992
loss: 0.9931674003601074,grad_norm: 0.6885675463461386, iteration: 358993
loss: 1.0494149923324585,grad_norm: 0.9999990749401458, iteration: 358994
loss: 0.9969911575317383,grad_norm: 0.8137768485593468, iteration: 358995
loss: 0.9744895100593567,grad_norm: 0.7667214592161935, iteration: 358996
loss: 0.9618323445320129,grad_norm: 0.8223989655244031, iteration: 358997
loss: 0.9968970417976379,grad_norm: 0.7963452608653583, iteration: 358998
loss: 0.9733849167823792,grad_norm: 0.9662847076146578, iteration: 358999
loss: 0.9761659502983093,grad_norm: 0.8114132361572611, iteration: 359000
loss: 0.9748791456222534,grad_norm: 0.9969247534945079, iteration: 359001
loss: 1.0191853046417236,grad_norm: 0.8741107198385247, iteration: 359002
loss: 0.9687723517417908,grad_norm: 0.9983002929106441, iteration: 359003
loss: 0.9917699694633484,grad_norm: 0.7747340596955082, iteration: 359004
loss: 0.9966383576393127,grad_norm: 0.8536074242756715, iteration: 359005
loss: 0.9729623198509216,grad_norm: 0.866096376047694, iteration: 359006
loss: 1.0469748973846436,grad_norm: 0.9999992214805017, iteration: 359007
loss: 0.9951973557472229,grad_norm: 0.8562751764487144, iteration: 359008
loss: 1.0131758451461792,grad_norm: 0.8065017072161411, iteration: 359009
loss: 0.9915914535522461,grad_norm: 0.7743674259892658, iteration: 359010
loss: 0.9905148148536682,grad_norm: 0.7357700151573695, iteration: 359011
loss: 1.0117743015289307,grad_norm: 0.899444288479101, iteration: 359012
loss: 0.9961974024772644,grad_norm: 0.9706414996823743, iteration: 359013
loss: 1.079351782798767,grad_norm: 0.8402887722396122, iteration: 359014
loss: 1.0196675062179565,grad_norm: 0.8566829085956327, iteration: 359015
loss: 0.9636110067367554,grad_norm: 0.8764576500050664, iteration: 359016
loss: 0.999179482460022,grad_norm: 0.8147286260170695, iteration: 359017
loss: 0.9971226453781128,grad_norm: 0.7381568988702586, iteration: 359018
loss: 1.0035508871078491,grad_norm: 0.7640127299703918, iteration: 359019
loss: 0.9828231334686279,grad_norm: 0.9999990171000204, iteration: 359020
loss: 0.9703115224838257,grad_norm: 0.7490781539455542, iteration: 359021
loss: 0.9978650212287903,grad_norm: 0.8440614842222288, iteration: 359022
loss: 0.9964073300361633,grad_norm: 0.7826738365617792, iteration: 359023
loss: 1.0236008167266846,grad_norm: 0.7581684881879139, iteration: 359024
loss: 0.9725791215896606,grad_norm: 0.800262566280367, iteration: 359025
loss: 0.9744333028793335,grad_norm: 0.7190378232281888, iteration: 359026
loss: 1.0029890537261963,grad_norm: 0.9103632279817611, iteration: 359027
loss: 1.0872811079025269,grad_norm: 1.0000000224998487, iteration: 359028
loss: 1.0212042331695557,grad_norm: 0.8826912561125965, iteration: 359029
loss: 1.0445586442947388,grad_norm: 0.9999993064553669, iteration: 359030
loss: 0.9802639484405518,grad_norm: 0.726242666082397, iteration: 359031
loss: 0.9722535610198975,grad_norm: 0.9999997215518177, iteration: 359032
loss: 1.0178653001785278,grad_norm: 0.7223700197544167, iteration: 359033
loss: 0.9800141453742981,grad_norm: 0.9999992176994892, iteration: 359034
loss: 0.9664931297302246,grad_norm: 0.8589748847602441, iteration: 359035
loss: 1.018856406211853,grad_norm: 0.7858635182453608, iteration: 359036
loss: 0.9828476905822754,grad_norm: 0.8701602639163039, iteration: 359037
loss: 0.9943811893463135,grad_norm: 0.9707863061178573, iteration: 359038
loss: 0.9981446266174316,grad_norm: 0.7089970988644636, iteration: 359039
loss: 0.9850253462791443,grad_norm: 0.9875937544569048, iteration: 359040
loss: 0.9957845211029053,grad_norm: 0.7649209660688326, iteration: 359041
loss: 1.0050315856933594,grad_norm: 0.8731080605142303, iteration: 359042
loss: 0.9775167107582092,grad_norm: 0.8133408185482588, iteration: 359043
loss: 1.0205943584442139,grad_norm: 0.7758959700527805, iteration: 359044
loss: 0.9868114590644836,grad_norm: 0.8526028799098542, iteration: 359045
loss: 0.9982094168663025,grad_norm: 0.9394824637805197, iteration: 359046
loss: 1.0154485702514648,grad_norm: 0.8419419677837935, iteration: 359047
loss: 1.0033080577850342,grad_norm: 0.9999995771415093, iteration: 359048
loss: 1.0263396501541138,grad_norm: 0.8515657366403255, iteration: 359049
loss: 1.0141533613204956,grad_norm: 0.7022317621850276, iteration: 359050
loss: 0.9787217378616333,grad_norm: 0.7299209598688963, iteration: 359051
loss: 1.0051428079605103,grad_norm: 0.9999993854672266, iteration: 359052
loss: 1.0084633827209473,grad_norm: 0.8077343895024846, iteration: 359053
loss: 0.9937558770179749,grad_norm: 0.7646876477886102, iteration: 359054
loss: 1.1035152673721313,grad_norm: 0.697035978950566, iteration: 359055
loss: 0.9903572797775269,grad_norm: 0.8010373260512806, iteration: 359056
loss: 0.9980394244194031,grad_norm: 0.7542152132100953, iteration: 359057
loss: 0.9759746789932251,grad_norm: 0.9506283068458161, iteration: 359058
loss: 1.018721580505371,grad_norm: 0.6371885632999417, iteration: 359059
loss: 0.9753021001815796,grad_norm: 0.9999999438981093, iteration: 359060
loss: 0.9861463904380798,grad_norm: 0.9548584017903948, iteration: 359061
loss: 1.0623188018798828,grad_norm: 0.9999999779925486, iteration: 359062
loss: 0.9865858554840088,grad_norm: 0.780527112290856, iteration: 359063
loss: 1.0135968923568726,grad_norm: 0.7204293527031727, iteration: 359064
loss: 0.9845542311668396,grad_norm: 0.8896197283738173, iteration: 359065
loss: 1.0190629959106445,grad_norm: 0.9999992453951069, iteration: 359066
loss: 1.0219178199768066,grad_norm: 0.9999990708697735, iteration: 359067
loss: 1.0204520225524902,grad_norm: 0.9999989702971843, iteration: 359068
loss: 0.9932833909988403,grad_norm: 0.6461110941935706, iteration: 359069
loss: 1.0008717775344849,grad_norm: 0.9119686121065063, iteration: 359070
loss: 0.9483727216720581,grad_norm: 0.8072031704411665, iteration: 359071
loss: 0.9608792662620544,grad_norm: 0.8365053695680736, iteration: 359072
loss: 1.0586243867874146,grad_norm: 0.9855662243833144, iteration: 359073
loss: 1.048231840133667,grad_norm: 1.0000000268310463, iteration: 359074
loss: 1.0132368803024292,grad_norm: 0.8165473819438038, iteration: 359075
loss: 0.9945725202560425,grad_norm: 0.8888030540002602, iteration: 359076
loss: 1.0155619382858276,grad_norm: 0.8998221710558578, iteration: 359077
loss: 1.008283019065857,grad_norm: 0.7969202603782056, iteration: 359078
loss: 1.009759783744812,grad_norm: 0.8802836138475882, iteration: 359079
loss: 1.0366209745407104,grad_norm: 0.999998943822913, iteration: 359080
loss: 1.002832293510437,grad_norm: 0.8007631459341115, iteration: 359081
loss: 1.0407509803771973,grad_norm: 0.9999993478245557, iteration: 359082
loss: 1.0114959478378296,grad_norm: 0.6869415819642165, iteration: 359083
loss: 0.9741016626358032,grad_norm: 0.7569548176758133, iteration: 359084
loss: 1.014938235282898,grad_norm: 0.8123936598283835, iteration: 359085
loss: 0.9974814057350159,grad_norm: 0.9999991344004024, iteration: 359086
loss: 1.012691855430603,grad_norm: 0.8566254205797954, iteration: 359087
loss: 0.9783096313476562,grad_norm: 0.7276877514227685, iteration: 359088
loss: 0.9880927801132202,grad_norm: 0.7839705595971961, iteration: 359089
loss: 1.030809998512268,grad_norm: 0.8221127541227293, iteration: 359090
loss: 0.9977733492851257,grad_norm: 0.7077927709397988, iteration: 359091
loss: 0.9959087371826172,grad_norm: 0.6184471608479623, iteration: 359092
loss: 0.9886151552200317,grad_norm: 0.7149455109488904, iteration: 359093
loss: 1.0480420589447021,grad_norm: 0.8650808316894467, iteration: 359094
loss: 0.9650599956512451,grad_norm: 0.8984452072143253, iteration: 359095
loss: 0.9754095077514648,grad_norm: 0.9437263671933996, iteration: 359096
loss: 1.0333943367004395,grad_norm: 0.7610405579767167, iteration: 359097
loss: 1.0153197050094604,grad_norm: 0.8110865447676899, iteration: 359098
loss: 0.9425780177116394,grad_norm: 0.9999993963599111, iteration: 359099
loss: 1.0537153482437134,grad_norm: 0.999999809996829, iteration: 359100
loss: 0.9976781010627747,grad_norm: 0.9362623847185617, iteration: 359101
loss: 1.030495047569275,grad_norm: 0.6892459848830301, iteration: 359102
loss: 0.9990279078483582,grad_norm: 0.8229582193795473, iteration: 359103
loss: 1.0118623971939087,grad_norm: 0.9781955324197203, iteration: 359104
loss: 0.998566210269928,grad_norm: 0.6830582780442492, iteration: 359105
loss: 1.0460362434387207,grad_norm: 0.9999993449713923, iteration: 359106
loss: 0.9673159718513489,grad_norm: 0.8884428894028691, iteration: 359107
loss: 0.9642336964607239,grad_norm: 0.7047484811042586, iteration: 359108
loss: 0.9875229597091675,grad_norm: 0.8121611507371548, iteration: 359109
loss: 1.0269192457199097,grad_norm: 0.9999991420347685, iteration: 359110
loss: 0.9825382232666016,grad_norm: 0.7384435277788992, iteration: 359111
loss: 0.9834192395210266,grad_norm: 0.7712238339661937, iteration: 359112
loss: 0.9830684661865234,grad_norm: 0.8392326942549623, iteration: 359113
loss: 1.122258186340332,grad_norm: 0.9999997335940238, iteration: 359114
loss: 1.0106942653656006,grad_norm: 0.999999798938655, iteration: 359115
loss: 0.9965955018997192,grad_norm: 0.7443279659742338, iteration: 359116
loss: 1.0074177980422974,grad_norm: 0.8782094357624433, iteration: 359117
loss: 0.9978595972061157,grad_norm: 0.8634104560725545, iteration: 359118
loss: 0.9508199691772461,grad_norm: 0.9948913549545574, iteration: 359119
loss: 0.9907252788543701,grad_norm: 0.8851327315738343, iteration: 359120
loss: 0.9951736330986023,grad_norm: 0.995244516117122, iteration: 359121
loss: 0.9776777625083923,grad_norm: 0.8773678242349418, iteration: 359122
loss: 0.9861220717430115,grad_norm: 0.7155003233079122, iteration: 359123
loss: 1.0195368528366089,grad_norm: 0.793879057926505, iteration: 359124
loss: 0.9918510913848877,grad_norm: 0.8541772399449538, iteration: 359125
loss: 1.0123686790466309,grad_norm: 0.9119681378282494, iteration: 359126
loss: 0.9482552409172058,grad_norm: 0.9999990880007386, iteration: 359127
loss: 0.9786890745162964,grad_norm: 0.8405457749271509, iteration: 359128
loss: 1.0361448526382446,grad_norm: 0.9999991752123126, iteration: 359129
loss: 1.0272821187973022,grad_norm: 0.849295255349568, iteration: 359130
loss: 1.0163930654525757,grad_norm: 0.7927518659069825, iteration: 359131
loss: 1.0262175798416138,grad_norm: 0.7284672023515846, iteration: 359132
loss: 0.975849449634552,grad_norm: 0.7189171876880186, iteration: 359133
loss: 1.0075883865356445,grad_norm: 0.8325001991436265, iteration: 359134
loss: 0.9873036742210388,grad_norm: 0.9994016027382687, iteration: 359135
loss: 1.0218806266784668,grad_norm: 0.7644894803433069, iteration: 359136
loss: 0.9519786238670349,grad_norm: 0.9531959580643621, iteration: 359137
loss: 1.0242706537246704,grad_norm: 0.9570533003263025, iteration: 359138
loss: 0.9925715923309326,grad_norm: 0.7751001524184717, iteration: 359139
loss: 1.0040215253829956,grad_norm: 0.8697979214290787, iteration: 359140
loss: 1.0040991306304932,grad_norm: 0.8548577789321395, iteration: 359141
loss: 1.096091389656067,grad_norm: 0.9999990197931136, iteration: 359142
loss: 1.018370509147644,grad_norm: 0.6959376760293536, iteration: 359143
loss: 0.9785031676292419,grad_norm: 0.7188137447223695, iteration: 359144
loss: 1.0165250301361084,grad_norm: 0.7172206626605486, iteration: 359145
loss: 0.9826123118400574,grad_norm: 0.9510600155044013, iteration: 359146
loss: 1.0024099349975586,grad_norm: 0.8275972386406331, iteration: 359147
loss: 0.9726317524909973,grad_norm: 0.7646330194010927, iteration: 359148
loss: 1.0232669115066528,grad_norm: 0.8013637734434758, iteration: 359149
loss: 1.0154483318328857,grad_norm: 0.6288206459206706, iteration: 359150
loss: 0.9986714124679565,grad_norm: 0.6743645034664785, iteration: 359151
loss: 1.1837133169174194,grad_norm: 0.9999990727434885, iteration: 359152
loss: 1.178078055381775,grad_norm: 0.9999991060259018, iteration: 359153
loss: 0.9956193566322327,grad_norm: 0.9083963074434952, iteration: 359154
loss: 0.9615825414657593,grad_norm: 0.768887592345777, iteration: 359155
loss: 1.0125592947006226,grad_norm: 0.9568847456026913, iteration: 359156
loss: 0.9958019256591797,grad_norm: 0.9238478578766, iteration: 359157
loss: 1.0079158544540405,grad_norm: 0.8046151817525758, iteration: 359158
loss: 0.9981855750083923,grad_norm: 0.7954398141397928, iteration: 359159
loss: 1.0279719829559326,grad_norm: 0.7642540946103386, iteration: 359160
loss: 0.9800588488578796,grad_norm: 0.6944235304729004, iteration: 359161
loss: 1.0127918720245361,grad_norm: 0.7523814449492688, iteration: 359162
loss: 0.9776743054389954,grad_norm: 0.9871512821231985, iteration: 359163
loss: 1.0185818672180176,grad_norm: 0.8622060232180206, iteration: 359164
loss: 1.0304917097091675,grad_norm: 0.7746837081788884, iteration: 359165
loss: 1.018763542175293,grad_norm: 0.8214721323539457, iteration: 359166
loss: 0.9787685871124268,grad_norm: 0.9999990405595064, iteration: 359167
loss: 0.9562095999717712,grad_norm: 0.7624299263100387, iteration: 359168
loss: 1.0079693794250488,grad_norm: 0.8285025026946587, iteration: 359169
loss: 1.0497815608978271,grad_norm: 0.8819416687096177, iteration: 359170
loss: 1.010884404182434,grad_norm: 0.8766669246126703, iteration: 359171
loss: 0.9984942078590393,grad_norm: 0.8898447302605692, iteration: 359172
loss: 1.0256026983261108,grad_norm: 0.9100357283634827, iteration: 359173
loss: 0.9705575704574585,grad_norm: 0.7953050767046641, iteration: 359174
loss: 0.9725167751312256,grad_norm: 0.8239242432020875, iteration: 359175
loss: 0.9957606792449951,grad_norm: 0.8194638465152729, iteration: 359176
loss: 0.9717767834663391,grad_norm: 0.7313182466392225, iteration: 359177
loss: 1.024559497833252,grad_norm: 0.7842931920905093, iteration: 359178
loss: 0.9863560795783997,grad_norm: 0.870028273938884, iteration: 359179
loss: 0.9772494435310364,grad_norm: 0.7999916600282135, iteration: 359180
loss: 0.9874410629272461,grad_norm: 0.9999991441668891, iteration: 359181
loss: 1.0159275531768799,grad_norm: 0.9999998622981406, iteration: 359182
loss: 1.0574018955230713,grad_norm: 0.8008648783444495, iteration: 359183
loss: 0.962989091873169,grad_norm: 0.8889066092571936, iteration: 359184
loss: 0.9650030732154846,grad_norm: 0.7083283308069608, iteration: 359185
loss: 1.0000020265579224,grad_norm: 0.6971295081517608, iteration: 359186
loss: 1.0156527757644653,grad_norm: 0.9999990787980907, iteration: 359187
loss: 0.997776985168457,grad_norm: 0.8633436252054072, iteration: 359188
loss: 1.0131232738494873,grad_norm: 0.7437654238433873, iteration: 359189
loss: 0.9934863448143005,grad_norm: 0.8788008850907555, iteration: 359190
loss: 0.9878515005111694,grad_norm: 0.8436934001666617, iteration: 359191
loss: 1.021592378616333,grad_norm: 0.8018766160429879, iteration: 359192
loss: 0.9972023963928223,grad_norm: 0.9570822733684952, iteration: 359193
loss: 1.0177353620529175,grad_norm: 0.8536271170938804, iteration: 359194
loss: 1.0009362697601318,grad_norm: 0.700567206558104, iteration: 359195
loss: 0.9803740978240967,grad_norm: 0.742179873537505, iteration: 359196
loss: 1.018305778503418,grad_norm: 0.8613622989548122, iteration: 359197
loss: 1.0482878684997559,grad_norm: 0.6134366997236808, iteration: 359198
loss: 1.0644258260726929,grad_norm: 0.8383039012636939, iteration: 359199
loss: 1.0292983055114746,grad_norm: 0.8590499807015972, iteration: 359200
loss: 0.9921908974647522,grad_norm: 0.7973999166015513, iteration: 359201
loss: 1.0536211729049683,grad_norm: 0.9999992444206903, iteration: 359202
loss: 0.9979712963104248,grad_norm: 0.7535344862535913, iteration: 359203
loss: 1.0032366514205933,grad_norm: 0.9999999234686888, iteration: 359204
loss: 1.0476114749908447,grad_norm: 0.7595562835413396, iteration: 359205
loss: 0.9922676086425781,grad_norm: 0.9956370601435255, iteration: 359206
loss: 0.9858604073524475,grad_norm: 0.845252779754314, iteration: 359207
loss: 0.9838286638259888,grad_norm: 0.8143296347076119, iteration: 359208
loss: 0.9715405106544495,grad_norm: 0.6345568473299104, iteration: 359209
loss: 0.9766618609428406,grad_norm: 0.7235929317076382, iteration: 359210
loss: 0.9588590264320374,grad_norm: 0.9549667898988938, iteration: 359211
loss: 1.0168861150741577,grad_norm: 0.8476724215173396, iteration: 359212
loss: 0.9673579335212708,grad_norm: 0.8851467369894566, iteration: 359213
loss: 0.9732910990715027,grad_norm: 0.8051977279800012, iteration: 359214
loss: 1.0210739374160767,grad_norm: 0.8371638575209956, iteration: 359215
loss: 0.9761015176773071,grad_norm: 0.8848801933386019, iteration: 359216
loss: 0.9775111675262451,grad_norm: 0.7615618238827453, iteration: 359217
loss: 0.979448139667511,grad_norm: 0.7312131505351629, iteration: 359218
loss: 0.9735094904899597,grad_norm: 0.7540490849764718, iteration: 359219
loss: 1.0083404779434204,grad_norm: 0.7897144149819277, iteration: 359220
loss: 1.0287257432937622,grad_norm: 0.8320369817658649, iteration: 359221
loss: 1.0104001760482788,grad_norm: 0.8219023016076685, iteration: 359222
loss: 0.973409116268158,grad_norm: 0.8044496164590567, iteration: 359223
loss: 0.998269259929657,grad_norm: 0.8157907652862387, iteration: 359224
loss: 1.004804253578186,grad_norm: 0.7425616374655191, iteration: 359225
loss: 1.0095515251159668,grad_norm: 0.8407426190164852, iteration: 359226
loss: 0.9745349884033203,grad_norm: 0.7318280094445898, iteration: 359227
loss: 0.9943504929542542,grad_norm: 0.7726498730606464, iteration: 359228
loss: 0.9940243363380432,grad_norm: 0.6929973714874218, iteration: 359229
loss: 1.1029212474822998,grad_norm: 0.9999992419075544, iteration: 359230
loss: 1.0225396156311035,grad_norm: 0.7442412634942369, iteration: 359231
loss: 0.997502863407135,grad_norm: 0.9999991555473677, iteration: 359232
loss: 0.9762564301490784,grad_norm: 0.7849835805714426, iteration: 359233
loss: 1.0470432043075562,grad_norm: 0.9999997922370479, iteration: 359234
loss: 1.0194905996322632,grad_norm: 0.7422942576576951, iteration: 359235
loss: 0.9711959958076477,grad_norm: 0.9155814531691354, iteration: 359236
loss: 0.9969810247421265,grad_norm: 0.8765296917889417, iteration: 359237
loss: 0.992611825466156,grad_norm: 0.8351754560465504, iteration: 359238
loss: 0.9927781224250793,grad_norm: 0.7465743615772397, iteration: 359239
loss: 1.0243273973464966,grad_norm: 0.7100889166003569, iteration: 359240
loss: 1.0085982084274292,grad_norm: 0.801188845054691, iteration: 359241
loss: 0.973152220249176,grad_norm: 0.7631619136837572, iteration: 359242
loss: 1.0391563177108765,grad_norm: 0.8307319544818842, iteration: 359243
loss: 1.0156363248825073,grad_norm: 0.8852737287734169, iteration: 359244
loss: 0.9627816677093506,grad_norm: 0.7504830424543492, iteration: 359245
loss: 0.9936568737030029,grad_norm: 0.8871151837714002, iteration: 359246
loss: 1.0477609634399414,grad_norm: 0.7564970945369467, iteration: 359247
loss: 0.9901567101478577,grad_norm: 0.8363448290231114, iteration: 359248
loss: 0.9947174191474915,grad_norm: 0.999999184919453, iteration: 359249
loss: 0.950818657875061,grad_norm: 0.8862343148968583, iteration: 359250
loss: 1.0463218688964844,grad_norm: 0.9438023782281361, iteration: 359251
loss: 0.969294548034668,grad_norm: 0.8527550074004735, iteration: 359252
loss: 1.1068956851959229,grad_norm: 0.8566273321265914, iteration: 359253
loss: 1.0007890462875366,grad_norm: 0.948998218352334, iteration: 359254
loss: 0.9815740585327148,grad_norm: 0.7402975879671637, iteration: 359255
loss: 0.9719763398170471,grad_norm: 0.8246711688701427, iteration: 359256
loss: 0.9936167001724243,grad_norm: 0.8852569552102129, iteration: 359257
loss: 1.0244755744934082,grad_norm: 0.7579371802445937, iteration: 359258
loss: 0.9936633706092834,grad_norm: 0.9999993584464223, iteration: 359259
loss: 1.0477598905563354,grad_norm: 0.7055024585647578, iteration: 359260
loss: 0.982187807559967,grad_norm: 0.7555854104741165, iteration: 359261
loss: 0.9968937635421753,grad_norm: 0.740598071535025, iteration: 359262
loss: 1.031509518623352,grad_norm: 0.9999990903368103, iteration: 359263
loss: 1.028674840927124,grad_norm: 0.919901079684082, iteration: 359264
loss: 1.0189608335494995,grad_norm: 0.6347680383392451, iteration: 359265
loss: 1.0051004886627197,grad_norm: 0.6944006212167185, iteration: 359266
loss: 1.005982756614685,grad_norm: 0.7378288234891538, iteration: 359267
loss: 1.0414918661117554,grad_norm: 0.7479586117092412, iteration: 359268
loss: 0.9637404084205627,grad_norm: 0.8403948024570714, iteration: 359269
loss: 1.2467745542526245,grad_norm: 0.9999991892685759, iteration: 359270
loss: 1.0193078517913818,grad_norm: 0.80921240794745, iteration: 359271
loss: 0.9896661639213562,grad_norm: 0.7146929471135551, iteration: 359272
loss: 1.0640079975128174,grad_norm: 0.9999990982146293, iteration: 359273
loss: 1.0427440404891968,grad_norm: 0.797397984691154, iteration: 359274
loss: 1.0118188858032227,grad_norm: 0.6774404068801857, iteration: 359275
loss: 1.0448431968688965,grad_norm: 1.0000000013399326, iteration: 359276
loss: 1.007035732269287,grad_norm: 0.999999751254804, iteration: 359277
loss: 0.9775374531745911,grad_norm: 0.8860608754548429, iteration: 359278
loss: 1.0243523120880127,grad_norm: 0.9999991695143317, iteration: 359279
loss: 1.0022320747375488,grad_norm: 0.9245111991419221, iteration: 359280
loss: 1.0227587223052979,grad_norm: 0.9999991806715781, iteration: 359281
loss: 1.0032415390014648,grad_norm: 0.9999993638513778, iteration: 359282
loss: 1.0042552947998047,grad_norm: 0.8626398512749263, iteration: 359283
loss: 0.9980992674827576,grad_norm: 0.811476374737736, iteration: 359284
loss: 1.0298902988433838,grad_norm: 0.8230408724351497, iteration: 359285
loss: 1.0007002353668213,grad_norm: 0.8801800452338182, iteration: 359286
loss: 1.0260677337646484,grad_norm: 0.7628205920106828, iteration: 359287
loss: 1.0338681936264038,grad_norm: 0.9525441355756628, iteration: 359288
loss: 0.9644404649734497,grad_norm: 0.8826659207872201, iteration: 359289
loss: 0.9940859079360962,grad_norm: 0.8315874891371831, iteration: 359290
loss: 1.0078946352005005,grad_norm: 0.8389755222962287, iteration: 359291
loss: 1.0252610445022583,grad_norm: 0.7388936497722041, iteration: 359292
loss: 0.9948294162750244,grad_norm: 0.8067539626467319, iteration: 359293
loss: 0.9987484812736511,grad_norm: 0.9999993871375681, iteration: 359294
loss: 1.0205085277557373,grad_norm: 0.9999999758787633, iteration: 359295
loss: 1.0313664674758911,grad_norm: 0.79257260826976, iteration: 359296
loss: 0.9978420734405518,grad_norm: 0.7826486518538835, iteration: 359297
loss: 0.9956043362617493,grad_norm: 0.9999990393911836, iteration: 359298
loss: 1.0246727466583252,grad_norm: 0.9272219834774976, iteration: 359299
loss: 0.9965196251869202,grad_norm: 0.7883796703520324, iteration: 359300
loss: 0.9603561758995056,grad_norm: 0.8124323088321874, iteration: 359301
loss: 1.043184757232666,grad_norm: 0.8712005775351677, iteration: 359302
loss: 1.0042424201965332,grad_norm: 0.7105958551395058, iteration: 359303
loss: 0.9989080429077148,grad_norm: 0.8520250895791438, iteration: 359304
loss: 0.9882693290710449,grad_norm: 0.8554970515838568, iteration: 359305
loss: 1.0091025829315186,grad_norm: 0.7948394253132883, iteration: 359306
loss: 1.001019835472107,grad_norm: 0.7195869209555341, iteration: 359307
loss: 0.9787333011627197,grad_norm: 0.8188101273748853, iteration: 359308
loss: 0.9832984805107117,grad_norm: 0.8650279575700612, iteration: 359309
loss: 0.9861479997634888,grad_norm: 0.7314686464510572, iteration: 359310
loss: 0.967918336391449,grad_norm: 0.6618121563759416, iteration: 359311
loss: 1.030592441558838,grad_norm: 0.8853213586260328, iteration: 359312
loss: 1.0506640672683716,grad_norm: 0.9999990611681572, iteration: 359313
loss: 0.9970231652259827,grad_norm: 0.6609412221947591, iteration: 359314
loss: 1.0083197355270386,grad_norm: 0.9356525523299704, iteration: 359315
loss: 1.0264257192611694,grad_norm: 0.8639141293446015, iteration: 359316
loss: 0.9952667951583862,grad_norm: 0.9999991638593243, iteration: 359317
loss: 1.0188868045806885,grad_norm: 0.7531201053511923, iteration: 359318
loss: 1.0409715175628662,grad_norm: 0.9999991738037568, iteration: 359319
loss: 1.0085170269012451,grad_norm: 0.8556210764175741, iteration: 359320
loss: 1.033086895942688,grad_norm: 0.854242379877122, iteration: 359321
loss: 1.003156304359436,grad_norm: 0.759289078357984, iteration: 359322
loss: 0.964042603969574,grad_norm: 0.6807635550513131, iteration: 359323
loss: 0.9931725263595581,grad_norm: 0.7877023644981551, iteration: 359324
loss: 0.9965388178825378,grad_norm: 0.8436759563713639, iteration: 359325
loss: 1.092900276184082,grad_norm: 0.9999998830740764, iteration: 359326
loss: 1.0166321992874146,grad_norm: 0.8619160884066052, iteration: 359327
loss: 1.0139268636703491,grad_norm: 0.7696658326585322, iteration: 359328
loss: 0.9779260754585266,grad_norm: 0.7437961685252649, iteration: 359329
loss: 1.009170651435852,grad_norm: 0.9010713727187011, iteration: 359330
loss: 1.0023425817489624,grad_norm: 0.7160405539997257, iteration: 359331
loss: 0.9917660355567932,grad_norm: 0.872057306864753, iteration: 359332
loss: 0.9802865386009216,grad_norm: 0.8181812158961639, iteration: 359333
loss: 1.0077046155929565,grad_norm: 0.7560908463604691, iteration: 359334
loss: 0.9904978275299072,grad_norm: 0.8111861149240045, iteration: 359335
loss: 0.9743980169296265,grad_norm: 0.7500747818767508, iteration: 359336
loss: 1.0064871311187744,grad_norm: 0.7675487822280536, iteration: 359337
loss: 1.0027116537094116,grad_norm: 0.9480857695564113, iteration: 359338
loss: 1.0312211513519287,grad_norm: 0.9132922349802914, iteration: 359339
loss: 0.9723960161209106,grad_norm: 0.7886338380164156, iteration: 359340
loss: 0.965457022190094,grad_norm: 0.9999991441797693, iteration: 359341
loss: 0.9906545281410217,grad_norm: 0.8626403681483777, iteration: 359342
loss: 1.0025585889816284,grad_norm: 0.9068086778038227, iteration: 359343
loss: 1.0432826280593872,grad_norm: 0.6884032394711452, iteration: 359344
loss: 0.9822580218315125,grad_norm: 0.9999991821233054, iteration: 359345
loss: 1.0111163854599,grad_norm: 0.9513101164418793, iteration: 359346
loss: 1.016156554222107,grad_norm: 1.0000000505235596, iteration: 359347
loss: 1.0035299062728882,grad_norm: 0.8686812860627161, iteration: 359348
loss: 0.9433791637420654,grad_norm: 0.9958489173584093, iteration: 359349
loss: 1.0445725917816162,grad_norm: 0.8019505087725142, iteration: 359350
loss: 1.0078529119491577,grad_norm: 0.7603398712101427, iteration: 359351
loss: 1.025579571723938,grad_norm: 0.8287151618765402, iteration: 359352
loss: 0.9635592699050903,grad_norm: 0.9053735606949681, iteration: 359353
loss: 0.9734770655632019,grad_norm: 0.8077574353816864, iteration: 359354
loss: 1.0205000638961792,grad_norm: 0.8391777970725528, iteration: 359355
loss: 1.0208289623260498,grad_norm: 0.8111545893569015, iteration: 359356
loss: 0.9968546032905579,grad_norm: 0.7167158728073223, iteration: 359357
loss: 1.0287514925003052,grad_norm: 0.7474917589267183, iteration: 359358
loss: 0.9833893775939941,grad_norm: 0.7295570471623254, iteration: 359359
loss: 1.0062638521194458,grad_norm: 0.861567301311533, iteration: 359360
loss: 1.0212799310684204,grad_norm: 0.8854148347137524, iteration: 359361
loss: 0.9859084486961365,grad_norm: 0.8011636326676398, iteration: 359362
loss: 0.9897204041481018,grad_norm: 0.6965137809495762, iteration: 359363
loss: 0.9993022680282593,grad_norm: 0.7077755505745261, iteration: 359364
loss: 1.021856665611267,grad_norm: 0.77642383098778, iteration: 359365
loss: 1.0208313465118408,grad_norm: 0.8165266602476149, iteration: 359366
loss: 0.9637846350669861,grad_norm: 0.9598301706538693, iteration: 359367
loss: 1.0042539834976196,grad_norm: 0.7475168271242061, iteration: 359368
loss: 1.0180256366729736,grad_norm: 0.716418602678016, iteration: 359369
loss: 1.0078068971633911,grad_norm: 0.8043366125159833, iteration: 359370
loss: 1.0134246349334717,grad_norm: 0.8731404181201186, iteration: 359371
loss: 0.974747896194458,grad_norm: 0.7992250150570621, iteration: 359372
loss: 0.9300473928451538,grad_norm: 0.8843746096904671, iteration: 359373
loss: 0.9970441460609436,grad_norm: 0.9388345144778634, iteration: 359374
loss: 0.9935588240623474,grad_norm: 0.6075618283197666, iteration: 359375
loss: 1.047446608543396,grad_norm: 0.936755483327656, iteration: 359376
loss: 1.0042518377304077,grad_norm: 0.7655649392174426, iteration: 359377
loss: 0.9862066507339478,grad_norm: 0.7816335630334932, iteration: 359378
loss: 0.9905309677124023,grad_norm: 0.7636592689004091, iteration: 359379
loss: 1.0102792978286743,grad_norm: 0.9271586192165987, iteration: 359380
loss: 1.0533303022384644,grad_norm: 0.8947804607264289, iteration: 359381
loss: 1.0458080768585205,grad_norm: 0.9999994411453899, iteration: 359382
loss: 1.0225399732589722,grad_norm: 0.7935603167802825, iteration: 359383
loss: 1.0298993587493896,grad_norm: 0.9283564081744133, iteration: 359384
loss: 1.0164002180099487,grad_norm: 0.8491053194524575, iteration: 359385
loss: 0.9950323700904846,grad_norm: 0.7044988295699626, iteration: 359386
loss: 0.960343599319458,grad_norm: 0.7914151053495916, iteration: 359387
loss: 1.0036927461624146,grad_norm: 0.6548095956070261, iteration: 359388
loss: 1.013884425163269,grad_norm: 0.7688603051129483, iteration: 359389
loss: 0.978653609752655,grad_norm: 0.9999990977064049, iteration: 359390
loss: 1.0024116039276123,grad_norm: 0.8250955071508854, iteration: 359391
loss: 1.0183463096618652,grad_norm: 0.7903381881809416, iteration: 359392
loss: 1.032591462135315,grad_norm: 0.7741489881492349, iteration: 359393
loss: 1.0150718688964844,grad_norm: 0.8164030696836626, iteration: 359394
loss: 0.9969601035118103,grad_norm: 0.8394368796731061, iteration: 359395
loss: 0.9764301180839539,grad_norm: 0.9059137790503876, iteration: 359396
loss: 1.0162017345428467,grad_norm: 0.8242192071199657, iteration: 359397
loss: 0.9896395802497864,grad_norm: 0.8136355470177928, iteration: 359398
loss: 0.9735684990882874,grad_norm: 0.6819352488049129, iteration: 359399
loss: 0.970908522605896,grad_norm: 0.7870067515980061, iteration: 359400
loss: 1.0195523500442505,grad_norm: 0.9999993788727809, iteration: 359401
loss: 0.9634190797805786,grad_norm: 0.8000666839545519, iteration: 359402
loss: 1.052008032798767,grad_norm: 0.9142123121653332, iteration: 359403
loss: 1.003350853919983,grad_norm: 0.7889453198116917, iteration: 359404
loss: 1.0103933811187744,grad_norm: 0.76031642959601, iteration: 359405
loss: 1.0221469402313232,grad_norm: 0.9367017731879332, iteration: 359406
loss: 0.9797889590263367,grad_norm: 0.890389058961381, iteration: 359407
loss: 0.9882355332374573,grad_norm: 0.793480242084069, iteration: 359408
loss: 0.9973828196525574,grad_norm: 0.7133926067800808, iteration: 359409
loss: 0.9856497049331665,grad_norm: 0.7307129615347723, iteration: 359410
loss: 0.9709905982017517,grad_norm: 0.8833745551541311, iteration: 359411
loss: 0.9793376922607422,grad_norm: 0.9970754516042472, iteration: 359412
loss: 1.0411754846572876,grad_norm: 0.8863793056643923, iteration: 359413
loss: 1.0058592557907104,grad_norm: 0.8274312015032886, iteration: 359414
loss: 1.0337345600128174,grad_norm: 0.999999130340732, iteration: 359415
loss: 1.013364315032959,grad_norm: 0.8903917929949379, iteration: 359416
loss: 1.0102529525756836,grad_norm: 0.8122309610585099, iteration: 359417
loss: 0.9780980944633484,grad_norm: 0.9999991835931737, iteration: 359418
loss: 1.0119261741638184,grad_norm: 0.8141745971461531, iteration: 359419
loss: 1.0220423936843872,grad_norm: 0.999999186252615, iteration: 359420
loss: 1.0014694929122925,grad_norm: 0.728341437444437, iteration: 359421
loss: 0.9601739645004272,grad_norm: 0.8723281628208094, iteration: 359422
loss: 1.0769771337509155,grad_norm: 0.9999990072337371, iteration: 359423
loss: 0.997675895690918,grad_norm: 0.6730431937406282, iteration: 359424
loss: 1.0262964963912964,grad_norm: 0.9999989773625998, iteration: 359425
loss: 0.9902048707008362,grad_norm: 0.7300666211132614, iteration: 359426
loss: 0.9509458541870117,grad_norm: 0.7907238630525858, iteration: 359427
loss: 0.9871979355812073,grad_norm: 0.8587883498553549, iteration: 359428
loss: 0.9693475961685181,grad_norm: 0.6871416193335746, iteration: 359429
loss: 0.9802026152610779,grad_norm: 0.8137476949878704, iteration: 359430
loss: 1.0361478328704834,grad_norm: 0.8013266705960683, iteration: 359431
loss: 1.0154225826263428,grad_norm: 0.6856863480421943, iteration: 359432
loss: 0.9566193222999573,grad_norm: 0.747623195193284, iteration: 359433
loss: 1.020193099975586,grad_norm: 0.9999992198655473, iteration: 359434
loss: 1.0205698013305664,grad_norm: 0.7546786651425743, iteration: 359435
loss: 0.9783727526664734,grad_norm: 0.7957917297624949, iteration: 359436
loss: 1.016447901725769,grad_norm: 0.8566487753939729, iteration: 359437
loss: 0.9713281393051147,grad_norm: 0.9489729632273943, iteration: 359438
loss: 1.0020995140075684,grad_norm: 0.896415318323862, iteration: 359439
loss: 1.0178115367889404,grad_norm: 0.7615357972944536, iteration: 359440
loss: 0.9555041193962097,grad_norm: 0.7553615110880911, iteration: 359441
loss: 1.017907977104187,grad_norm: 0.9999991582576546, iteration: 359442
loss: 0.9777721762657166,grad_norm: 0.9722755129177666, iteration: 359443
loss: 0.9985511302947998,grad_norm: 0.7417056255057474, iteration: 359444
loss: 0.9967581033706665,grad_norm: 0.6434520653829064, iteration: 359445
loss: 1.0039663314819336,grad_norm: 0.8101661562152519, iteration: 359446
loss: 0.9907737970352173,grad_norm: 0.819668640727834, iteration: 359447
loss: 0.9593895673751831,grad_norm: 0.9999992210709719, iteration: 359448
loss: 0.9457683563232422,grad_norm: 0.6992181453769891, iteration: 359449
loss: 1.0173225402832031,grad_norm: 0.8324707360464706, iteration: 359450
loss: 0.9985932111740112,grad_norm: 0.8551998126821152, iteration: 359451
loss: 0.9904651045799255,grad_norm: 0.9999989626933131, iteration: 359452
loss: 1.0325953960418701,grad_norm: 0.7475424857908474, iteration: 359453
loss: 0.9960249662399292,grad_norm: 0.8065481604018159, iteration: 359454
loss: 1.0068182945251465,grad_norm: 0.7015841307265012, iteration: 359455
loss: 0.9810652732849121,grad_norm: 0.8269426261643036, iteration: 359456
loss: 1.0021759271621704,grad_norm: 0.9482621897694741, iteration: 359457
loss: 0.9526689648628235,grad_norm: 0.8831696638622814, iteration: 359458
loss: 1.0053668022155762,grad_norm: 0.8577690711238434, iteration: 359459
loss: 0.9854478240013123,grad_norm: 0.7638313610747901, iteration: 359460
loss: 1.0496670007705688,grad_norm: 0.90295250192514, iteration: 359461
loss: 1.050775408744812,grad_norm: 0.7901128583831172, iteration: 359462
loss: 1.0177397727966309,grad_norm: 0.7517370659888246, iteration: 359463
loss: 1.0089033842086792,grad_norm: 0.7666788082674676, iteration: 359464
loss: 1.002616047859192,grad_norm: 0.7055216874884712, iteration: 359465
loss: 0.9747079014778137,grad_norm: 0.8740538026993386, iteration: 359466
loss: 0.9878417253494263,grad_norm: 0.7398596808950813, iteration: 359467
loss: 0.9811014533042908,grad_norm: 0.8201753026457669, iteration: 359468
loss: 1.0251517295837402,grad_norm: 0.682077111350745, iteration: 359469
loss: 0.9949309825897217,grad_norm: 0.8122615289193096, iteration: 359470
loss: 1.0678445100784302,grad_norm: 0.9408658487461568, iteration: 359471
loss: 0.9864098429679871,grad_norm: 0.9999996217398464, iteration: 359472
loss: 0.9803507328033447,grad_norm: 0.8125314387194984, iteration: 359473
loss: 1.0459784269332886,grad_norm: 0.7571704776415032, iteration: 359474
loss: 1.0026617050170898,grad_norm: 0.9999991567483877, iteration: 359475
loss: 0.9942096471786499,grad_norm: 0.7353196533584999, iteration: 359476
loss: 1.008518934249878,grad_norm: 0.9934045508604596, iteration: 359477
loss: 0.9905320405960083,grad_norm: 0.7724334616334705, iteration: 359478
loss: 0.9559262990951538,grad_norm: 0.7530329384268071, iteration: 359479
loss: 1.027886152267456,grad_norm: 0.9999998543525916, iteration: 359480
loss: 1.023289442062378,grad_norm: 0.7567730067566468, iteration: 359481
loss: 1.0067130327224731,grad_norm: 0.7387634566403124, iteration: 359482
loss: 0.9992080926895142,grad_norm: 0.7459744841331751, iteration: 359483
loss: 0.9871947169303894,grad_norm: 0.7539595021905835, iteration: 359484
loss: 0.9693199396133423,grad_norm: 0.811648086308507, iteration: 359485
loss: 0.9937196373939514,grad_norm: 0.75518390396769, iteration: 359486
loss: 1.0195993185043335,grad_norm: 0.9345279444502672, iteration: 359487
loss: 0.9879047870635986,grad_norm: 0.8583116149022326, iteration: 359488
loss: 0.9873080849647522,grad_norm: 0.684684354443671, iteration: 359489
loss: 1.0071223974227905,grad_norm: 0.8959205307093452, iteration: 359490
loss: 1.0086524486541748,grad_norm: 0.8163837489986217, iteration: 359491
loss: 0.9956364631652832,grad_norm: 0.9999996921117644, iteration: 359492
loss: 0.9804645776748657,grad_norm: 0.8832158304138282, iteration: 359493
loss: 1.0272125005722046,grad_norm: 0.8934633447376422, iteration: 359494
loss: 0.9687287211418152,grad_norm: 0.6905139003817138, iteration: 359495
loss: 1.0212998390197754,grad_norm: 0.8088083093406361, iteration: 359496
loss: 1.0137040615081787,grad_norm: 0.8024237582171061, iteration: 359497
loss: 0.9836086630821228,grad_norm: 0.8159136835471102, iteration: 359498
loss: 1.030695915222168,grad_norm: 0.6604421422449428, iteration: 359499
loss: 0.9979485273361206,grad_norm: 0.8564109615169977, iteration: 359500
loss: 0.9746319651603699,grad_norm: 0.8041649492701682, iteration: 359501
loss: 1.0130281448364258,grad_norm: 0.6818082907119908, iteration: 359502
loss: 1.0213686227798462,grad_norm: 0.7568960648040548, iteration: 359503
loss: 1.011222004890442,grad_norm: 0.7780736535364593, iteration: 359504
loss: 1.0002365112304688,grad_norm: 0.9999993106077499, iteration: 359505
loss: 1.0321528911590576,grad_norm: 0.78779355832727, iteration: 359506
loss: 0.9949016571044922,grad_norm: 0.8529489381281169, iteration: 359507
loss: 1.006629228591919,grad_norm: 0.9053098395518935, iteration: 359508
loss: 1.012786626815796,grad_norm: 0.8589846013465532, iteration: 359509
loss: 0.970384418964386,grad_norm: 0.8212068329962232, iteration: 359510
loss: 1.040799617767334,grad_norm: 0.9999995543282617, iteration: 359511
loss: 1.0005744695663452,grad_norm: 0.8166984252256112, iteration: 359512
loss: 1.0242892503738403,grad_norm: 0.9761678853029144, iteration: 359513
loss: 1.0149081945419312,grad_norm: 0.7041236741857276, iteration: 359514
loss: 0.9831218123435974,grad_norm: 0.7850019157677461, iteration: 359515
loss: 0.9749286770820618,grad_norm: 0.7944879980760716, iteration: 359516
loss: 1.0292984247207642,grad_norm: 0.8290063919220809, iteration: 359517
loss: 1.0312647819519043,grad_norm: 0.9999995833231158, iteration: 359518
loss: 0.9803134799003601,grad_norm: 0.9182511858509727, iteration: 359519
loss: 1.0259183645248413,grad_norm: 0.7938873728737024, iteration: 359520
loss: 0.9588356614112854,grad_norm: 0.8214409362056297, iteration: 359521
loss: 0.9817869663238525,grad_norm: 0.6865759449722504, iteration: 359522
loss: 0.9732410311698914,grad_norm: 0.661741705625014, iteration: 359523
loss: 1.0077574253082275,grad_norm: 0.7549599722866931, iteration: 359524
loss: 1.0405118465423584,grad_norm: 0.9999992749652606, iteration: 359525
loss: 0.9665203094482422,grad_norm: 0.7655228899047198, iteration: 359526
loss: 0.9978758692741394,grad_norm: 0.7128053698060552, iteration: 359527
loss: 1.000012755393982,grad_norm: 0.9220790040010932, iteration: 359528
loss: 0.986953854560852,grad_norm: 0.914121200949604, iteration: 359529
loss: 1.0015348196029663,grad_norm: 0.7058815389205207, iteration: 359530
loss: 0.9902875423431396,grad_norm: 0.9999994597292352, iteration: 359531
loss: 1.0008800029754639,grad_norm: 0.8988928754118686, iteration: 359532
loss: 1.0054255723953247,grad_norm: 0.827041104331119, iteration: 359533
loss: 1.0393613576889038,grad_norm: 0.9004110898901148, iteration: 359534
loss: 0.992703378200531,grad_norm: 0.80611483877091, iteration: 359535
loss: 1.0230454206466675,grad_norm: 0.6682155753935956, iteration: 359536
loss: 1.0139672756195068,grad_norm: 0.7609891004118917, iteration: 359537
loss: 1.026910424232483,grad_norm: 0.8017943546785729, iteration: 359538
loss: 0.9661572575569153,grad_norm: 0.8758122794425851, iteration: 359539
loss: 1.0059857368469238,grad_norm: 0.9038859226037654, iteration: 359540
loss: 0.9590282440185547,grad_norm: 0.8196502273884966, iteration: 359541
loss: 0.9731423258781433,grad_norm: 0.9999990557130134, iteration: 359542
loss: 0.9982196092605591,grad_norm: 0.9999990183624509, iteration: 359543
loss: 0.999237060546875,grad_norm: 0.7892957728690767, iteration: 359544
loss: 1.0015103816986084,grad_norm: 0.8759212266592107, iteration: 359545
loss: 1.0325417518615723,grad_norm: 0.8702721180609065, iteration: 359546
loss: 1.0159339904785156,grad_norm: 0.8330256919596604, iteration: 359547
loss: 1.020013689994812,grad_norm: 0.9999990633729079, iteration: 359548
loss: 1.0157448053359985,grad_norm: 0.688344268451956, iteration: 359549
loss: 0.9958015084266663,grad_norm: 0.9999991717685754, iteration: 359550
loss: 1.0021628141403198,grad_norm: 0.9620082738732129, iteration: 359551
loss: 1.0097118616104126,grad_norm: 0.929843971233529, iteration: 359552
loss: 0.9885404706001282,grad_norm: 0.8210193250942095, iteration: 359553
loss: 1.0089411735534668,grad_norm: 0.7901856860286535, iteration: 359554
loss: 0.97293621301651,grad_norm: 0.8386866559010299, iteration: 359555
loss: 0.9986480474472046,grad_norm: 0.7746642716811754, iteration: 359556
loss: 1.0037896633148193,grad_norm: 0.9253079137389801, iteration: 359557
loss: 1.0185084342956543,grad_norm: 0.8331172791894248, iteration: 359558
loss: 1.0121663808822632,grad_norm: 0.7440455267926307, iteration: 359559
loss: 1.0133018493652344,grad_norm: 0.81937678345123, iteration: 359560
loss: 0.9890021085739136,grad_norm: 0.6042653337838054, iteration: 359561
loss: 1.0189849138259888,grad_norm: 0.6547604977834659, iteration: 359562
loss: 0.9702430367469788,grad_norm: 0.9327663705820716, iteration: 359563
loss: 0.9732038378715515,grad_norm: 0.711917080986668, iteration: 359564
loss: 0.9968234300613403,grad_norm: 0.7633364335007737, iteration: 359565
loss: 0.9883536696434021,grad_norm: 0.7986638916959676, iteration: 359566
loss: 0.9893785715103149,grad_norm: 0.7487165279815646, iteration: 359567
loss: 1.0331001281738281,grad_norm: 0.9543097413940426, iteration: 359568
loss: 1.0238003730773926,grad_norm: 0.9184737600857287, iteration: 359569
loss: 1.0074673891067505,grad_norm: 0.6895082193471544, iteration: 359570
loss: 0.9741379022598267,grad_norm: 0.9170022724940345, iteration: 359571
loss: 1.0753058195114136,grad_norm: 0.8536405544664183, iteration: 359572
loss: 0.989876925945282,grad_norm: 0.8875194864415913, iteration: 359573
loss: 1.023400902748108,grad_norm: 0.7946686666187012, iteration: 359574
loss: 0.9956528544425964,grad_norm: 0.792103537358079, iteration: 359575
loss: 0.9872365593910217,grad_norm: 0.8356779039311643, iteration: 359576
loss: 0.9946937561035156,grad_norm: 0.7553405845933888, iteration: 359577
loss: 1.001541018486023,grad_norm: 0.8733584496624063, iteration: 359578
loss: 0.9718639254570007,grad_norm: 0.7574289245481063, iteration: 359579
loss: 1.091037392616272,grad_norm: 0.9999991961462774, iteration: 359580
loss: 1.0072510242462158,grad_norm: 0.7105997138664442, iteration: 359581
loss: 0.9874123930931091,grad_norm: 0.9870776950375126, iteration: 359582
loss: 0.996738612651825,grad_norm: 0.8178266154229135, iteration: 359583
loss: 0.994148313999176,grad_norm: 0.7129410945048099, iteration: 359584
loss: 0.9600905179977417,grad_norm: 0.9671183608586202, iteration: 359585
loss: 0.9770788550376892,grad_norm: 0.7243545386753633, iteration: 359586
loss: 1.028715968132019,grad_norm: 0.9999993192585511, iteration: 359587
loss: 1.026086449623108,grad_norm: 0.8910283338590297, iteration: 359588
loss: 0.9901320934295654,grad_norm: 0.7868223596154577, iteration: 359589
loss: 1.0138671398162842,grad_norm: 0.999998933271712, iteration: 359590
loss: 1.0182017087936401,grad_norm: 0.8117556444613154, iteration: 359591
loss: 0.9706152081489563,grad_norm: 0.7632095354002281, iteration: 359592
loss: 1.0191949605941772,grad_norm: 0.767259002466167, iteration: 359593
loss: 1.0110321044921875,grad_norm: 0.8072580567132611, iteration: 359594
loss: 1.0385278463363647,grad_norm: 0.9999993985535995, iteration: 359595
loss: 0.9751248359680176,grad_norm: 0.8725477636242561, iteration: 359596
loss: 1.0125908851623535,grad_norm: 0.8119811188170457, iteration: 359597
loss: 1.007648229598999,grad_norm: 0.850032885264489, iteration: 359598
loss: 1.0004637241363525,grad_norm: 0.9086639049643559, iteration: 359599
loss: 1.0101264715194702,grad_norm: 0.7637566400163283, iteration: 359600
loss: 1.0162663459777832,grad_norm: 0.8135293379061356, iteration: 359601
loss: 1.0075992345809937,grad_norm: 0.8337472065669876, iteration: 359602
loss: 1.0075995922088623,grad_norm: 0.7826043798370935, iteration: 359603
loss: 0.970593273639679,grad_norm: 0.9426893667532732, iteration: 359604
loss: 0.9937002658843994,grad_norm: 0.958676932388191, iteration: 359605
loss: 1.0170234441757202,grad_norm: 0.7251873658828555, iteration: 359606
loss: 0.9996381402015686,grad_norm: 0.9492544669372933, iteration: 359607
loss: 0.9926365613937378,grad_norm: 0.9999992570660419, iteration: 359608
loss: 1.0282700061798096,grad_norm: 0.9999993063005208, iteration: 359609
loss: 1.0200988054275513,grad_norm: 0.9999990754514136, iteration: 359610
loss: 1.035883903503418,grad_norm: 0.7546354046801241, iteration: 359611
loss: 0.999615490436554,grad_norm: 0.9153013571551619, iteration: 359612
loss: 1.0124926567077637,grad_norm: 0.7339676548472384, iteration: 359613
loss: 0.9930503368377686,grad_norm: 0.8267884353236528, iteration: 359614
loss: 1.0109754800796509,grad_norm: 0.8585392864533347, iteration: 359615
loss: 1.0005475282669067,grad_norm: 0.6641903163241377, iteration: 359616
loss: 0.9961888790130615,grad_norm: 0.7258628239262679, iteration: 359617
loss: 0.9858230948448181,grad_norm: 0.6630107230931129, iteration: 359618
loss: 0.9802833199501038,grad_norm: 0.6404110064501571, iteration: 359619
loss: 1.0078543424606323,grad_norm: 0.860538043255734, iteration: 359620
loss: 1.0162255764007568,grad_norm: 0.7256621597487394, iteration: 359621
loss: 1.004289984703064,grad_norm: 0.8060459348822389, iteration: 359622
loss: 0.9976634383201599,grad_norm: 0.939639211200987, iteration: 359623
loss: 0.9869107007980347,grad_norm: 0.8350042601539096, iteration: 359624
loss: 1.0468652248382568,grad_norm: 0.7875663725617761, iteration: 359625
loss: 1.0362435579299927,grad_norm: 0.8051043632854734, iteration: 359626
loss: 0.9953854084014893,grad_norm: 0.8080684250251484, iteration: 359627
loss: 1.0046133995056152,grad_norm: 0.8740867206214666, iteration: 359628
loss: 0.9989709258079529,grad_norm: 0.8583891565249544, iteration: 359629
loss: 1.0043883323669434,grad_norm: 0.8331954079691493, iteration: 359630
loss: 1.0372601747512817,grad_norm: 0.818925151650153, iteration: 359631
loss: 0.9991834163665771,grad_norm: 0.8045947513128446, iteration: 359632
loss: 0.9511678218841553,grad_norm: 0.8075566522508891, iteration: 359633
loss: 1.0081307888031006,grad_norm: 0.9999993610188718, iteration: 359634
loss: 1.0041685104370117,grad_norm: 0.81045837250697, iteration: 359635
loss: 1.0023739337921143,grad_norm: 0.6894005847173089, iteration: 359636
loss: 1.0302656888961792,grad_norm: 0.7368585974151816, iteration: 359637
loss: 1.0210717916488647,grad_norm: 0.9999990733533282, iteration: 359638
loss: 0.973197340965271,grad_norm: 0.9537525178703683, iteration: 359639
loss: 0.9828490614891052,grad_norm: 0.9999991879808683, iteration: 359640
loss: 1.0609654188156128,grad_norm: 0.7292936655828756, iteration: 359641
loss: 0.9679564237594604,grad_norm: 0.8004981428553644, iteration: 359642
loss: 1.0044740438461304,grad_norm: 0.7110378794418685, iteration: 359643
loss: 0.967341423034668,grad_norm: 0.8949746063953182, iteration: 359644
loss: 1.0040558576583862,grad_norm: 0.8879092887384942, iteration: 359645
loss: 1.0056695938110352,grad_norm: 0.7914230226390192, iteration: 359646
loss: 0.9963207244873047,grad_norm: 0.8360530940133919, iteration: 359647
loss: 1.1951102018356323,grad_norm: 0.9999991615423882, iteration: 359648
loss: 0.9980623722076416,grad_norm: 0.9018448214895807, iteration: 359649
loss: 0.9779417514801025,grad_norm: 0.7415201370371084, iteration: 359650
loss: 1.0138829946517944,grad_norm: 0.8865377257624144, iteration: 359651
loss: 0.9820418357849121,grad_norm: 0.898660549772339, iteration: 359652
loss: 1.0380038022994995,grad_norm: 0.7091750134432517, iteration: 359653
loss: 0.9769626259803772,grad_norm: 0.8387947896342656, iteration: 359654
loss: 1.1297036409378052,grad_norm: 0.885886845538607, iteration: 359655
loss: 0.9971410036087036,grad_norm: 0.9999992322637667, iteration: 359656
loss: 1.023735761642456,grad_norm: 0.9123680104374379, iteration: 359657
loss: 0.9918626546859741,grad_norm: 0.9999990453685417, iteration: 359658
loss: 0.9827839136123657,grad_norm: 0.8123197698139693, iteration: 359659
loss: 1.0052591562271118,grad_norm: 0.7516145507986175, iteration: 359660
loss: 0.9860737323760986,grad_norm: 0.9999993765699451, iteration: 359661
loss: 1.0010592937469482,grad_norm: 0.881493148189904, iteration: 359662
loss: 1.0302531719207764,grad_norm: 0.9242719442946765, iteration: 359663
loss: 0.979458212852478,grad_norm: 0.7641692919022782, iteration: 359664
loss: 0.9972289204597473,grad_norm: 0.6280067097844703, iteration: 359665
loss: 1.0536600351333618,grad_norm: 0.9196944274867972, iteration: 359666
loss: 0.9821990728378296,grad_norm: 0.8550207496831194, iteration: 359667
loss: 0.9843449592590332,grad_norm: 0.909940560110535, iteration: 359668
loss: 0.9934999346733093,grad_norm: 0.703959468227907, iteration: 359669
loss: 1.118328332901001,grad_norm: 0.9999998390065216, iteration: 359670
loss: 1.0211846828460693,grad_norm: 0.9454085058092244, iteration: 359671
loss: 0.9967883825302124,grad_norm: 0.7648391817142353, iteration: 359672
loss: 1.0331768989562988,grad_norm: 0.8793398755286054, iteration: 359673
loss: 0.9992637038230896,grad_norm: 0.937162545121357, iteration: 359674
loss: 0.9961104989051819,grad_norm: 0.9999991872604984, iteration: 359675
loss: 0.9889628887176514,grad_norm: 0.7153960290805134, iteration: 359676
loss: 0.9828892946243286,grad_norm: 0.9444436283311691, iteration: 359677
loss: 1.002001166343689,grad_norm: 0.9799041520161293, iteration: 359678
loss: 1.0710642337799072,grad_norm: 0.9999999118023202, iteration: 359679
loss: 1.041547179222107,grad_norm: 0.9999992661336294, iteration: 359680
loss: 0.9652791619300842,grad_norm: 0.94269705947672, iteration: 359681
loss: 1.0209276676177979,grad_norm: 0.9053079641666886, iteration: 359682
loss: 1.026036024093628,grad_norm: 0.8807690032576363, iteration: 359683
loss: 1.005442500114441,grad_norm: 0.9184452206346819, iteration: 359684
loss: 1.0010571479797363,grad_norm: 0.7444614634071134, iteration: 359685
loss: 0.9623005390167236,grad_norm: 0.7870745568463172, iteration: 359686
loss: 1.0021426677703857,grad_norm: 0.9188046182651989, iteration: 359687
loss: 0.9818156361579895,grad_norm: 0.8199833949685641, iteration: 359688
loss: 1.0362118482589722,grad_norm: 0.9509631363721573, iteration: 359689
loss: 0.9812430143356323,grad_norm: 0.7409952647797442, iteration: 359690
loss: 0.9816820621490479,grad_norm: 0.8306118904458296, iteration: 359691
loss: 1.0420039892196655,grad_norm: 0.9688072192624558, iteration: 359692
loss: 1.0014402866363525,grad_norm: 0.8716197853553661, iteration: 359693
loss: 0.9840615391731262,grad_norm: 0.8954417696576182, iteration: 359694
loss: 0.9805459380149841,grad_norm: 0.8440113258758813, iteration: 359695
loss: 0.9976479411125183,grad_norm: 0.727233177928816, iteration: 359696
loss: 0.9630674719810486,grad_norm: 0.8802749813246418, iteration: 359697
loss: 0.9363203048706055,grad_norm: 0.88179266751874, iteration: 359698
loss: 1.003995418548584,grad_norm: 0.7559280693362622, iteration: 359699
loss: 0.9923842549324036,grad_norm: 0.7289327855094165, iteration: 359700
loss: 0.9841920137405396,grad_norm: 0.9168797858969489, iteration: 359701
loss: 1.002922773361206,grad_norm: 0.8481338188686677, iteration: 359702
loss: 1.014780044555664,grad_norm: 0.8712338683102264, iteration: 359703
loss: 1.0505998134613037,grad_norm: 0.8765821113478315, iteration: 359704
loss: 1.0121564865112305,grad_norm: 0.7803908171814252, iteration: 359705
loss: 1.0163369178771973,grad_norm: 0.9279765550465632, iteration: 359706
loss: 0.994134247303009,grad_norm: 0.9915904571824885, iteration: 359707
loss: 1.0213640928268433,grad_norm: 0.7155482806796414, iteration: 359708
loss: 1.0037003755569458,grad_norm: 0.8775279849349714, iteration: 359709
loss: 1.024594783782959,grad_norm: 0.9026448004319297, iteration: 359710
loss: 0.9959457516670227,grad_norm: 0.7837464925887638, iteration: 359711
loss: 1.0190637111663818,grad_norm: 0.7570132923448095, iteration: 359712
loss: 1.0241999626159668,grad_norm: 0.6968360491349546, iteration: 359713
loss: 0.9928191304206848,grad_norm: 0.7814927884365211, iteration: 359714
loss: 0.995274007320404,grad_norm: 0.9629369502892466, iteration: 359715
loss: 0.9932803511619568,grad_norm: 0.9439401649798769, iteration: 359716
loss: 0.9961912631988525,grad_norm: 0.8032469127242982, iteration: 359717
loss: 0.9964833855628967,grad_norm: 0.876029978290511, iteration: 359718
loss: 0.9780212044715881,grad_norm: 0.7799825865029208, iteration: 359719
loss: 0.9837930798530579,grad_norm: 0.6166384823943521, iteration: 359720
loss: 0.9876254796981812,grad_norm: 0.9999991735613974, iteration: 359721
loss: 0.9633030891418457,grad_norm: 0.7149596283397073, iteration: 359722
loss: 1.0100265741348267,grad_norm: 0.8708462073525854, iteration: 359723
loss: 0.9783012866973877,grad_norm: 0.7060526519769238, iteration: 359724
loss: 0.9901109933853149,grad_norm: 0.7688243961398433, iteration: 359725
loss: 1.0166935920715332,grad_norm: 0.9712691709601243, iteration: 359726
loss: 1.0598692893981934,grad_norm: 0.8711675406341705, iteration: 359727
loss: 0.9949394464492798,grad_norm: 0.6918589841670069, iteration: 359728
loss: 1.00082266330719,grad_norm: 0.8265233779920126, iteration: 359729
loss: 1.013225793838501,grad_norm: 0.7773997596809604, iteration: 359730
loss: 1.0635443925857544,grad_norm: 0.9999999272156466, iteration: 359731
loss: 0.9617353081703186,grad_norm: 0.9355774750096312, iteration: 359732
loss: 0.9697431325912476,grad_norm: 0.8995887117296574, iteration: 359733
loss: 1.0043742656707764,grad_norm: 0.7938567604873646, iteration: 359734
loss: 0.960590124130249,grad_norm: 0.7412127418418489, iteration: 359735
loss: 1.004368782043457,grad_norm: 0.8354102149489164, iteration: 359736
loss: 1.0139819383621216,grad_norm: 0.9084951711759329, iteration: 359737
loss: 0.9680368900299072,grad_norm: 0.7849808978047417, iteration: 359738
loss: 0.9799146056175232,grad_norm: 0.7835930306372596, iteration: 359739
loss: 0.99956876039505,grad_norm: 0.5720299337875904, iteration: 359740
loss: 0.9969363808631897,grad_norm: 0.8144710208224111, iteration: 359741
loss: 1.0217528343200684,grad_norm: 0.778051116992581, iteration: 359742
loss: 1.0360910892486572,grad_norm: 0.8363972296467572, iteration: 359743
loss: 1.0290361642837524,grad_norm: 0.7390151385485387, iteration: 359744
loss: 0.9861752986907959,grad_norm: 0.958501221845113, iteration: 359745
loss: 0.9702104330062866,grad_norm: 0.8029836862750457, iteration: 359746
loss: 1.013535499572754,grad_norm: 0.8531152627476281, iteration: 359747
loss: 1.1047354936599731,grad_norm: 0.9999999131411587, iteration: 359748
loss: 0.9763398170471191,grad_norm: 0.9777121302396015, iteration: 359749
loss: 1.014570951461792,grad_norm: 0.9023085013790489, iteration: 359750
loss: 0.9759659767150879,grad_norm: 0.7156054637573919, iteration: 359751
loss: 0.997719943523407,grad_norm: 0.707107229936749, iteration: 359752
loss: 1.029248833656311,grad_norm: 0.7659520903323103, iteration: 359753
loss: 1.0146993398666382,grad_norm: 0.8374720879435305, iteration: 359754
loss: 1.082816481590271,grad_norm: 0.9826752763659614, iteration: 359755
loss: 0.9598349928855896,grad_norm: 0.7800059817703425, iteration: 359756
loss: 0.9893196225166321,grad_norm: 0.7420895102233375, iteration: 359757
loss: 1.0305861234664917,grad_norm: 0.912461363653565, iteration: 359758
loss: 0.9932870268821716,grad_norm: 0.9999995344077559, iteration: 359759
loss: 1.0921704769134521,grad_norm: 0.9999991992542234, iteration: 359760
loss: 0.9572895765304565,grad_norm: 0.8304410300062719, iteration: 359761
loss: 0.9963532090187073,grad_norm: 0.7621315247561561, iteration: 359762
loss: 0.9995400905609131,grad_norm: 0.7578099185803463, iteration: 359763
loss: 1.0165941715240479,grad_norm: 0.9999989606769042, iteration: 359764
loss: 0.9651408791542053,grad_norm: 0.8451322980663831, iteration: 359765
loss: 1.0035195350646973,grad_norm: 0.7783244188409789, iteration: 359766
loss: 0.9977735280990601,grad_norm: 0.9249244378034777, iteration: 359767
loss: 1.0093666315078735,grad_norm: 0.8862321532426607, iteration: 359768
loss: 1.0049477815628052,grad_norm: 0.8187274207042677, iteration: 359769
loss: 0.9760063290596008,grad_norm: 0.8258894202616146, iteration: 359770
loss: 1.0055862665176392,grad_norm: 0.6998277610137245, iteration: 359771
loss: 0.9630921483039856,grad_norm: 0.9999989551602652, iteration: 359772
loss: 1.0141509771347046,grad_norm: 0.9050044705978293, iteration: 359773
loss: 0.9892477989196777,grad_norm: 0.7807640584374218, iteration: 359774
loss: 0.972384512424469,grad_norm: 0.7075831079914086, iteration: 359775
loss: 0.9824954867362976,grad_norm: 0.9748161731887265, iteration: 359776
loss: 0.9947025179862976,grad_norm: 0.7018652626430314, iteration: 359777
loss: 0.9901281595230103,grad_norm: 0.7691821271938011, iteration: 359778
loss: 0.987715482711792,grad_norm: 0.7151188146486985, iteration: 359779
loss: 0.984413743019104,grad_norm: 0.7407326382041085, iteration: 359780
loss: 0.979084312915802,grad_norm: 0.7695285739485083, iteration: 359781
loss: 1.0048800706863403,grad_norm: 0.7246482847113239, iteration: 359782
loss: 0.979631245136261,grad_norm: 0.8233936007383965, iteration: 359783
loss: 1.00101900100708,grad_norm: 0.8149848917708655, iteration: 359784
loss: 1.0386316776275635,grad_norm: 0.6847496456771747, iteration: 359785
loss: 0.9821454286575317,grad_norm: 0.965316773217337, iteration: 359786
loss: 1.0800421237945557,grad_norm: 0.938340705330885, iteration: 359787
loss: 1.0153522491455078,grad_norm: 0.86298501646737, iteration: 359788
loss: 0.9938673377037048,grad_norm: 0.7732545227319593, iteration: 359789
loss: 1.0176528692245483,grad_norm: 0.8898917496244797, iteration: 359790
loss: 1.0220088958740234,grad_norm: 0.8821571590578952, iteration: 359791
loss: 1.2037060260772705,grad_norm: 0.9999996910156537, iteration: 359792
loss: 0.9972551465034485,grad_norm: 0.7392295313598423, iteration: 359793
loss: 1.0303243398666382,grad_norm: 0.8220420154845326, iteration: 359794
loss: 1.0405522584915161,grad_norm: 0.924771369371493, iteration: 359795
loss: 1.0481786727905273,grad_norm: 0.9999999162811829, iteration: 359796
loss: 0.9945333003997803,grad_norm: 0.7339477621170364, iteration: 359797
loss: 1.0487639904022217,grad_norm: 0.9589184240913916, iteration: 359798
loss: 1.0985749959945679,grad_norm: 0.8841634495452331, iteration: 359799
loss: 0.9683480858802795,grad_norm: 0.8172190046051948, iteration: 359800
loss: 1.02815842628479,grad_norm: 0.853382475801871, iteration: 359801
loss: 1.0046353340148926,grad_norm: 0.8547323086319425, iteration: 359802
loss: 1.0749287605285645,grad_norm: 0.9999990688903204, iteration: 359803
loss: 0.9882261157035828,grad_norm: 0.7645696044251048, iteration: 359804
loss: 1.022015929222107,grad_norm: 0.7701864949877941, iteration: 359805
loss: 1.1110002994537354,grad_norm: 0.999999525515036, iteration: 359806
loss: 1.0803428888320923,grad_norm: 1.0000000771757414, iteration: 359807
loss: 1.0211962461471558,grad_norm: 0.9999998253436322, iteration: 359808
loss: 1.0477765798568726,grad_norm: 0.818832466488104, iteration: 359809
loss: 1.0040669441223145,grad_norm: 0.853943051404398, iteration: 359810
loss: 0.9928109645843506,grad_norm: 0.9107925807586664, iteration: 359811
loss: 1.0306237936019897,grad_norm: 0.7504620918279354, iteration: 359812
loss: 0.9913283586502075,grad_norm: 0.7728485452813629, iteration: 359813
loss: 0.9526634216308594,grad_norm: 0.8664357010349247, iteration: 359814
loss: 0.9695412516593933,grad_norm: 0.8800035771804429, iteration: 359815
loss: 1.2562652826309204,grad_norm: 0.99999988505157, iteration: 359816
loss: 1.0089274644851685,grad_norm: 0.8553548175384647, iteration: 359817
loss: 0.9731438159942627,grad_norm: 0.7748157393781427, iteration: 359818
loss: 1.1436717510223389,grad_norm: 0.9999999257116805, iteration: 359819
loss: 0.9557108879089355,grad_norm: 0.9045295586881241, iteration: 359820
loss: 1.1386438608169556,grad_norm: 0.9999998714730246, iteration: 359821
loss: 0.997277021408081,grad_norm: 0.693321114392859, iteration: 359822
loss: 0.9756695628166199,grad_norm: 0.9468016372868244, iteration: 359823
loss: 1.031133770942688,grad_norm: 0.824351891826833, iteration: 359824
loss: 0.9872041940689087,grad_norm: 0.8235792966349789, iteration: 359825
loss: 1.0844398736953735,grad_norm: 0.9999993373013013, iteration: 359826
loss: 1.0052118301391602,grad_norm: 0.8445178568242511, iteration: 359827
loss: 1.0151875019073486,grad_norm: 0.7653688223034061, iteration: 359828
loss: 1.0859991312026978,grad_norm: 0.9371413362757415, iteration: 359829
loss: 1.0784164667129517,grad_norm: 0.9999999392906821, iteration: 359830
loss: 1.0660513639450073,grad_norm: 0.9999992106849899, iteration: 359831
loss: 0.9973459839820862,grad_norm: 0.9228256792072722, iteration: 359832
loss: 0.9819256067276001,grad_norm: 0.8103717871584315, iteration: 359833
loss: 1.0050981044769287,grad_norm: 0.6712140657411622, iteration: 359834
loss: 0.994769275188446,grad_norm: 0.8776005226525961, iteration: 359835
loss: 1.19435715675354,grad_norm: 0.9999998512906363, iteration: 359836
loss: 1.1379413604736328,grad_norm: 0.9999990606238297, iteration: 359837
loss: 1.1281121969223022,grad_norm: 0.9999993624633075, iteration: 359838
loss: 0.9846600294113159,grad_norm: 0.8812436291033758, iteration: 359839
loss: 0.9690001606941223,grad_norm: 0.8952405968819663, iteration: 359840
loss: 1.0315853357315063,grad_norm: 0.7730742982598042, iteration: 359841
loss: 1.0325895547866821,grad_norm: 0.9999991831921972, iteration: 359842
loss: 1.0040689706802368,grad_norm: 0.7813720307946931, iteration: 359843
loss: 1.0100632905960083,grad_norm: 0.6897573603063625, iteration: 359844
loss: 1.030861735343933,grad_norm: 0.8710977834548652, iteration: 359845
loss: 1.1009031534194946,grad_norm: 0.9999995206717348, iteration: 359846
loss: 1.0375237464904785,grad_norm: 0.9999995093813906, iteration: 359847
loss: 1.126623511314392,grad_norm: 0.9999993149345255, iteration: 359848
loss: 0.9875972867012024,grad_norm: 0.7669535413399117, iteration: 359849
loss: 1.0560203790664673,grad_norm: 0.9999991260378, iteration: 359850
loss: 0.9962079524993896,grad_norm: 0.9999994247237458, iteration: 359851
loss: 1.124932050704956,grad_norm: 0.9999990966213405, iteration: 359852
loss: 1.0004431009292603,grad_norm: 0.8785437951557666, iteration: 359853
loss: 1.0189014673233032,grad_norm: 0.7699341669599716, iteration: 359854
loss: 0.9917871356010437,grad_norm: 0.8145394446021104, iteration: 359855
loss: 1.3044917583465576,grad_norm: 0.9999998600955978, iteration: 359856
loss: 1.0018340349197388,grad_norm: 0.8206983054371725, iteration: 359857
loss: 1.0752155780792236,grad_norm: 0.749516843114535, iteration: 359858
loss: 1.0365426540374756,grad_norm: 0.999999252263922, iteration: 359859
loss: 1.1092146635055542,grad_norm: 0.9999998186484265, iteration: 359860
loss: 0.9810482859611511,grad_norm: 0.8356847404774846, iteration: 359861
loss: 1.2040725946426392,grad_norm: 0.9999994748526849, iteration: 359862
loss: 1.0286297798156738,grad_norm: 0.9654491680293525, iteration: 359863
loss: 1.1822941303253174,grad_norm: 1.000000098369344, iteration: 359864
loss: 1.0212337970733643,grad_norm: 0.9539547383060776, iteration: 359865
loss: 1.018474817276001,grad_norm: 0.8476674720754007, iteration: 359866
loss: 0.9871714115142822,grad_norm: 0.7243256911435314, iteration: 359867
loss: 1.0067437887191772,grad_norm: 0.9999992250615913, iteration: 359868
loss: 1.0542771816253662,grad_norm: 0.9999995685428805, iteration: 359869
loss: 0.9869911670684814,grad_norm: 0.7331366465535605, iteration: 359870
loss: 1.1410762071609497,grad_norm: 0.999999643287517, iteration: 359871
loss: 1.0956311225891113,grad_norm: 0.9861888717801874, iteration: 359872
loss: 1.0398318767547607,grad_norm: 0.7780785495252972, iteration: 359873
loss: 0.9914994239807129,grad_norm: 0.864406971800383, iteration: 359874
loss: 1.0116167068481445,grad_norm: 0.9788142734237135, iteration: 359875
loss: 1.004037857055664,grad_norm: 0.9999991860409853, iteration: 359876
loss: 1.023327350616455,grad_norm: 0.6852337798609863, iteration: 359877
loss: 1.1145226955413818,grad_norm: 0.9999993046296991, iteration: 359878
loss: 1.0007351636886597,grad_norm: 0.7888706338716935, iteration: 359879
loss: 0.9820860028266907,grad_norm: 0.7329703466510235, iteration: 359880
loss: 1.1390613317489624,grad_norm: 0.9718393042851821, iteration: 359881
loss: 1.015153169631958,grad_norm: 0.9287276840047524, iteration: 359882
loss: 1.037750244140625,grad_norm: 0.8202295460565052, iteration: 359883
loss: 1.051053762435913,grad_norm: 0.9999993070073554, iteration: 359884
loss: 1.0630369186401367,grad_norm: 0.9999998578420193, iteration: 359885
loss: 1.0389161109924316,grad_norm: 0.9999993769058968, iteration: 359886
loss: 0.9642250537872314,grad_norm: 0.6819097549901963, iteration: 359887
loss: 0.9743131995201111,grad_norm: 0.8805207178784169, iteration: 359888
loss: 0.96018385887146,grad_norm: 0.9999994132330904, iteration: 359889
loss: 1.0249804258346558,grad_norm: 0.6580319129609803, iteration: 359890
loss: 1.0229740142822266,grad_norm: 0.758124455236102, iteration: 359891
loss: 0.983478844165802,grad_norm: 0.9999992227362049, iteration: 359892
loss: 0.98777174949646,grad_norm: 0.7422606202454097, iteration: 359893
loss: 1.0051110982894897,grad_norm: 0.9608497140594604, iteration: 359894
loss: 1.0203826427459717,grad_norm: 0.747624033475672, iteration: 359895
loss: 1.030392050743103,grad_norm: 0.8537801254017706, iteration: 359896
loss: 1.0014156103134155,grad_norm: 0.8393464616056293, iteration: 359897
loss: 1.0652419328689575,grad_norm: 0.940995361484931, iteration: 359898
loss: 1.0820108652114868,grad_norm: 0.9999994655676562, iteration: 359899
loss: 1.0117995738983154,grad_norm: 0.8512520339101778, iteration: 359900
loss: 1.0303499698638916,grad_norm: 0.9999999022757097, iteration: 359901
loss: 0.9770191311836243,grad_norm: 0.8244071465968915, iteration: 359902
loss: 1.0415011644363403,grad_norm: 0.9999990410283077, iteration: 359903
loss: 0.995680570602417,grad_norm: 0.8106181560323874, iteration: 359904
loss: 0.9715524315834045,grad_norm: 0.7562188240855161, iteration: 359905
loss: 0.9801371693611145,grad_norm: 0.9407590036205821, iteration: 359906
loss: 1.039565086364746,grad_norm: 0.9999992371319352, iteration: 359907
loss: 1.0374189615249634,grad_norm: 0.999999278128701, iteration: 359908
loss: 0.9990284442901611,grad_norm: 0.9227719562162192, iteration: 359909
loss: 0.9617741703987122,grad_norm: 0.8184681579304169, iteration: 359910
loss: 0.9938137531280518,grad_norm: 0.8148120973599563, iteration: 359911
loss: 0.9842408299446106,grad_norm: 0.9999990948484447, iteration: 359912
loss: 1.0207221508026123,grad_norm: 0.8013446662798923, iteration: 359913
loss: 1.0039634704589844,grad_norm: 0.7702461689900365, iteration: 359914
loss: 1.0042911767959595,grad_norm: 0.8715109633435978, iteration: 359915
loss: 1.0051182508468628,grad_norm: 0.7629197931800168, iteration: 359916
loss: 0.9802411198616028,grad_norm: 0.9332141184602863, iteration: 359917
loss: 0.963235080242157,grad_norm: 0.9573889894951397, iteration: 359918
loss: 0.9578729867935181,grad_norm: 0.7093312421053595, iteration: 359919
loss: 0.9983326196670532,grad_norm: 0.7419695528978788, iteration: 359920
loss: 0.9877919554710388,grad_norm: 0.7350103811337249, iteration: 359921
loss: 0.9633761644363403,grad_norm: 0.7809488456445532, iteration: 359922
loss: 0.994879961013794,grad_norm: 0.7479108655239318, iteration: 359923
loss: 1.1866058111190796,grad_norm: 0.903367776163564, iteration: 359924
loss: 0.9867913126945496,grad_norm: 0.7241459779796598, iteration: 359925
loss: 1.0477524995803833,grad_norm: 0.8331746457538995, iteration: 359926
loss: 0.9972796440124512,grad_norm: 0.8569732317006891, iteration: 359927
loss: 0.9446509480476379,grad_norm: 0.9024902759871332, iteration: 359928
loss: 1.072372555732727,grad_norm: 0.9999992037426434, iteration: 359929
loss: 0.999955415725708,grad_norm: 0.8953109969697729, iteration: 359930
loss: 1.0032581090927124,grad_norm: 0.862166818744409, iteration: 359931
loss: 0.9913057684898376,grad_norm: 0.8909560884608069, iteration: 359932
loss: 1.0371040105819702,grad_norm: 0.9999992317324968, iteration: 359933
loss: 1.0215359926223755,grad_norm: 0.8180900466901435, iteration: 359934
loss: 1.0334107875823975,grad_norm: 0.9584768042602939, iteration: 359935
loss: 1.0118807554244995,grad_norm: 0.9415490113312713, iteration: 359936
loss: 1.0164556503295898,grad_norm: 0.7514874596131451, iteration: 359937
loss: 0.9791752696037292,grad_norm: 0.8183709716792127, iteration: 359938
loss: 1.0365113019943237,grad_norm: 0.7215808905484359, iteration: 359939
loss: 1.0183924436569214,grad_norm: 0.7590778532477614, iteration: 359940
loss: 0.9970394968986511,grad_norm: 0.8478647077989412, iteration: 359941
loss: 0.9693130254745483,grad_norm: 0.7359610785720149, iteration: 359942
loss: 1.0406341552734375,grad_norm: 0.9055101061765748, iteration: 359943
loss: 0.9717206358909607,grad_norm: 0.8874332340567391, iteration: 359944
loss: 1.0268827676773071,grad_norm: 0.999999539945317, iteration: 359945
loss: 1.005746841430664,grad_norm: 0.9999990691063698, iteration: 359946
loss: 1.0428929328918457,grad_norm: 0.9515013301384038, iteration: 359947
loss: 0.9856564998626709,grad_norm: 0.8138455077410128, iteration: 359948
loss: 0.9701955914497375,grad_norm: 0.7931453768766696, iteration: 359949
loss: 0.9737424254417419,grad_norm: 0.8799418642062569, iteration: 359950
loss: 0.9864164590835571,grad_norm: 0.8391962137985014, iteration: 359951
loss: 1.0076061487197876,grad_norm: 0.7792925403944092, iteration: 359952
loss: 1.010538101196289,grad_norm: 0.7701462352627297, iteration: 359953
loss: 1.0366220474243164,grad_norm: 0.9999990604973379, iteration: 359954
loss: 1.0023921728134155,grad_norm: 0.7518994366409585, iteration: 359955
loss: 0.9884397387504578,grad_norm: 0.9518029702351959, iteration: 359956
loss: 0.9920266270637512,grad_norm: 0.8173379657583051, iteration: 359957
loss: 1.0331519842147827,grad_norm: 0.9785206224813552, iteration: 359958
loss: 1.0049598217010498,grad_norm: 0.7477919676391296, iteration: 359959
loss: 1.0028634071350098,grad_norm: 0.9482294466968357, iteration: 359960
loss: 0.9947568774223328,grad_norm: 0.8624320027197161, iteration: 359961
loss: 0.9734219908714294,grad_norm: 0.9999989321112177, iteration: 359962
loss: 1.019845724105835,grad_norm: 0.999999239412906, iteration: 359963
loss: 0.9854024648666382,grad_norm: 0.6745627903823909, iteration: 359964
loss: 1.0191820859909058,grad_norm: 0.8256123957188806, iteration: 359965
loss: 0.984738290309906,grad_norm: 0.863634554796793, iteration: 359966
loss: 1.0342081785202026,grad_norm: 0.7287125928880739, iteration: 359967
loss: 0.9665091633796692,grad_norm: 0.7250634082544999, iteration: 359968
loss: 0.9912434816360474,grad_norm: 0.9670025000869963, iteration: 359969
loss: 1.0189907550811768,grad_norm: 0.741156369743159, iteration: 359970
loss: 1.0184653997421265,grad_norm: 0.7831069188454095, iteration: 359971
loss: 1.022611379623413,grad_norm: 0.9136598672305256, iteration: 359972
loss: 0.9705827832221985,grad_norm: 0.7410886294108323, iteration: 359973
loss: 1.018928050994873,grad_norm: 0.8887370022189552, iteration: 359974
loss: 0.983219563961029,grad_norm: 0.6542077102736092, iteration: 359975
loss: 1.019984245300293,grad_norm: 0.8572995627830483, iteration: 359976
loss: 1.017021656036377,grad_norm: 0.9999994340332088, iteration: 359977
loss: 1.0075684785842896,grad_norm: 0.7688912926646322, iteration: 359978
loss: 1.0404762029647827,grad_norm: 0.9526638862363761, iteration: 359979
loss: 0.9885327219963074,grad_norm: 0.8853954662761786, iteration: 359980
loss: 1.0239537954330444,grad_norm: 0.941291315297353, iteration: 359981
loss: 0.981706976890564,grad_norm: 0.8177036221927425, iteration: 359982
loss: 1.0251306295394897,grad_norm: 0.8032236020895293, iteration: 359983
loss: 0.9477401971817017,grad_norm: 0.8187197924152606, iteration: 359984
loss: 1.0515352487564087,grad_norm: 0.8510492043592621, iteration: 359985
loss: 0.965868353843689,grad_norm: 0.8267478852751815, iteration: 359986
loss: 0.9882616400718689,grad_norm: 0.7477910317622243, iteration: 359987
loss: 0.9930737018585205,grad_norm: 0.7056434296807553, iteration: 359988
loss: 0.9716036915779114,grad_norm: 0.9999997760998774, iteration: 359989
loss: 1.0268131494522095,grad_norm: 0.8521794675725022, iteration: 359990
loss: 0.992954432964325,grad_norm: 0.999999575766997, iteration: 359991
loss: 0.9847491383552551,grad_norm: 0.9999990349167407, iteration: 359992
loss: 1.0075488090515137,grad_norm: 0.7482411745391453, iteration: 359993
loss: 1.020406723022461,grad_norm: 0.748235669313333, iteration: 359994
loss: 1.0216546058654785,grad_norm: 0.8680738451832678, iteration: 359995
loss: 1.0054125785827637,grad_norm: 0.9799850408114616, iteration: 359996
loss: 1.0253925323486328,grad_norm: 0.7989692865687117, iteration: 359997
loss: 0.9825555682182312,grad_norm: 0.7592855552390845, iteration: 359998
loss: 1.083760380744934,grad_norm: 0.8523495368662699, iteration: 359999
loss: 1.018626093864441,grad_norm: 0.780649225396449, iteration: 360000
Evaluating at step 360000
{'val': 0.9948624465614557, 'test': 2.2625263308564696}
loss: 0.99980229139328,grad_norm: 0.8752807957418782, iteration: 360001
loss: 0.9556012153625488,grad_norm: 0.7027558092966593, iteration: 360002
loss: 0.9788694381713867,grad_norm: 0.8401697159198125, iteration: 360003
loss: 1.0205931663513184,grad_norm: 0.9999990358379922, iteration: 360004
loss: 0.9855028986930847,grad_norm: 0.9999991808540586, iteration: 360005
loss: 0.9980340003967285,grad_norm: 0.8317350536789901, iteration: 360006
loss: 0.9874286651611328,grad_norm: 0.9999990410671947, iteration: 360007
loss: 1.015580177307129,grad_norm: 0.7642993683104623, iteration: 360008
loss: 1.07121741771698,grad_norm: 0.9999992176179998, iteration: 360009
loss: 0.9650057554244995,grad_norm: 0.7694176934604204, iteration: 360010
loss: 1.0489296913146973,grad_norm: 0.9999990040787041, iteration: 360011
loss: 0.9786179661750793,grad_norm: 0.718406267069731, iteration: 360012
loss: 1.0699301958084106,grad_norm: 0.9999991873941423, iteration: 360013
loss: 0.9802374243736267,grad_norm: 0.8701303297196907, iteration: 360014
loss: 0.9881699681282043,grad_norm: 0.768077778362242, iteration: 360015
loss: 1.0085023641586304,grad_norm: 0.8299901336489514, iteration: 360016
loss: 0.9676575660705566,grad_norm: 0.7943314813845083, iteration: 360017
loss: 0.9892359972000122,grad_norm: 0.7141891561991335, iteration: 360018
loss: 1.0454879999160767,grad_norm: 0.9430159807054141, iteration: 360019
loss: 0.9670782685279846,grad_norm: 0.8207491859404049, iteration: 360020
loss: 0.9922197461128235,grad_norm: 0.7598081809360082, iteration: 360021
loss: 1.038040280342102,grad_norm: 0.7444413208084993, iteration: 360022
loss: 0.991041898727417,grad_norm: 0.7935226167806552, iteration: 360023
loss: 0.9754522442817688,grad_norm: 0.8615249889675559, iteration: 360024
loss: 1.0216009616851807,grad_norm: 0.8494739977403236, iteration: 360025
loss: 0.9932115077972412,grad_norm: 0.6909693320917701, iteration: 360026
loss: 1.0346829891204834,grad_norm: 0.9999994231693885, iteration: 360027
loss: 1.0168832540512085,grad_norm: 0.9828997327237079, iteration: 360028
loss: 0.999028205871582,grad_norm: 0.785427389733022, iteration: 360029
loss: 1.0413907766342163,grad_norm: 0.7158982923948625, iteration: 360030
loss: 0.97867751121521,grad_norm: 0.8352219064871603, iteration: 360031
loss: 0.9893905520439148,grad_norm: 0.7865327231024968, iteration: 360032
loss: 1.0197205543518066,grad_norm: 0.8327855629592134, iteration: 360033
loss: 1.0941787958145142,grad_norm: 0.8561443676746167, iteration: 360034
loss: 0.9896279573440552,grad_norm: 0.9999999006297211, iteration: 360035
loss: 1.0166751146316528,grad_norm: 0.7577348134002334, iteration: 360036
loss: 1.0025274753570557,grad_norm: 0.8178390415023883, iteration: 360037
loss: 1.0063390731811523,grad_norm: 0.9999992365857963, iteration: 360038
loss: 1.0651668310165405,grad_norm: 0.9999992067271929, iteration: 360039
loss: 0.975273847579956,grad_norm: 0.6902080806809997, iteration: 360040
loss: 1.0200291872024536,grad_norm: 0.9132512789430302, iteration: 360041
loss: 1.020979642868042,grad_norm: 0.6772336486098391, iteration: 360042
loss: 0.9932480454444885,grad_norm: 0.800123094154945, iteration: 360043
loss: 1.0316617488861084,grad_norm: 0.9999993561171115, iteration: 360044
loss: 1.0007485151290894,grad_norm: 0.7097779834837921, iteration: 360045
loss: 1.0578892230987549,grad_norm: 0.9127588665203524, iteration: 360046
loss: 1.0122872591018677,grad_norm: 0.7262325997187304, iteration: 360047
loss: 0.9899377226829529,grad_norm: 0.7580334316778744, iteration: 360048
loss: 0.9762193560600281,grad_norm: 0.76069498926356, iteration: 360049
loss: 0.9847338199615479,grad_norm: 0.7888145787079263, iteration: 360050
loss: 0.9690980911254883,grad_norm: 0.7638497294529487, iteration: 360051
loss: 1.0087436437606812,grad_norm: 0.8261604425402651, iteration: 360052
loss: 0.9808453917503357,grad_norm: 0.8801025409089851, iteration: 360053
loss: 0.9524554014205933,grad_norm: 0.9308352252896526, iteration: 360054
loss: 1.1075737476348877,grad_norm: 0.8248089824517831, iteration: 360055
loss: 1.0157899856567383,grad_norm: 0.7077240515177105, iteration: 360056
loss: 0.9626606106758118,grad_norm: 0.8222388937067483, iteration: 360057
loss: 0.9636535048484802,grad_norm: 0.7665144408980143, iteration: 360058
loss: 1.0114994049072266,grad_norm: 0.8994587493967052, iteration: 360059
loss: 0.9668347835540771,grad_norm: 0.9178753278913626, iteration: 360060
loss: 1.0007238388061523,grad_norm: 0.8696025865316251, iteration: 360061
loss: 0.9809700846672058,grad_norm: 0.9999996605196314, iteration: 360062
loss: 0.9641725420951843,grad_norm: 0.694435106338898, iteration: 360063
loss: 1.0043772459030151,grad_norm: 0.7561516632735064, iteration: 360064
loss: 1.0238416194915771,grad_norm: 0.6247768087556216, iteration: 360065
loss: 1.0743504762649536,grad_norm: 0.9999995081955436, iteration: 360066
loss: 0.9944713711738586,grad_norm: 0.8168478919976413, iteration: 360067
loss: 1.054983377456665,grad_norm: 0.8277637794112443, iteration: 360068
loss: 0.9883637428283691,grad_norm: 0.9000142653619296, iteration: 360069
loss: 1.0207291841506958,grad_norm: 0.999999114887218, iteration: 360070
loss: 0.9481574892997742,grad_norm: 0.8742460316082459, iteration: 360071
loss: 1.0204402208328247,grad_norm: 0.7679004230755523, iteration: 360072
loss: 1.0320483446121216,grad_norm: 0.8781399544098674, iteration: 360073
loss: 1.016678810119629,grad_norm: 0.8043050167872342, iteration: 360074
loss: 0.9854360818862915,grad_norm: 0.9356822063194956, iteration: 360075
loss: 0.9993602633476257,grad_norm: 0.9999990258662427, iteration: 360076
loss: 0.989290714263916,grad_norm: 0.7459443919817242, iteration: 360077
loss: 0.9946563839912415,grad_norm: 0.8118010339886113, iteration: 360078
loss: 0.9860597252845764,grad_norm: 0.87777888129425, iteration: 360079
loss: 0.9745323657989502,grad_norm: 0.94215326807896, iteration: 360080
loss: 0.9990814328193665,grad_norm: 0.7651221830943927, iteration: 360081
loss: 1.0512665510177612,grad_norm: 0.8468428535074148, iteration: 360082
loss: 1.0351959466934204,grad_norm: 0.9999991349567321, iteration: 360083
loss: 1.0775580406188965,grad_norm: 1.0000000014127448, iteration: 360084
loss: 1.0464009046554565,grad_norm: 0.8826823458513285, iteration: 360085
loss: 1.0032721757888794,grad_norm: 0.7799214330901459, iteration: 360086
loss: 0.9722928404808044,grad_norm: 0.9999992435547603, iteration: 360087
loss: 1.0127049684524536,grad_norm: 0.8917681515275311, iteration: 360088
loss: 0.996140718460083,grad_norm: 0.8210714883863117, iteration: 360089
loss: 1.0002634525299072,grad_norm: 0.8051161611416666, iteration: 360090
loss: 1.024188756942749,grad_norm: 0.7530170252846837, iteration: 360091
loss: 1.0176717042922974,grad_norm: 0.7353345994309153, iteration: 360092
loss: 1.0060408115386963,grad_norm: 0.8474701858173568, iteration: 360093
loss: 1.0478616952896118,grad_norm: 0.999999027345071, iteration: 360094
loss: 0.9719192981719971,grad_norm: 0.8550050936649837, iteration: 360095
loss: 1.0160937309265137,grad_norm: 0.999999569718606, iteration: 360096
loss: 1.026860237121582,grad_norm: 0.9999991646417903, iteration: 360097
loss: 1.0183948278427124,grad_norm: 0.9999992263332729, iteration: 360098
loss: 1.0123240947723389,grad_norm: 0.8008419250486564, iteration: 360099
loss: 0.9927733540534973,grad_norm: 0.7117972138174785, iteration: 360100
loss: 0.96905916929245,grad_norm: 0.8615263616028193, iteration: 360101
loss: 1.008397102355957,grad_norm: 0.8763601615659189, iteration: 360102
loss: 1.0571162700653076,grad_norm: 0.6975974101848913, iteration: 360103
loss: 0.9966930747032166,grad_norm: 0.8384848271083217, iteration: 360104
loss: 1.0019057989120483,grad_norm: 0.9999991282581452, iteration: 360105
loss: 0.9845362901687622,grad_norm: 0.8200797370995352, iteration: 360106
loss: 1.0057718753814697,grad_norm: 0.686466743096517, iteration: 360107
loss: 1.0278674364089966,grad_norm: 0.6996936054408716, iteration: 360108
loss: 1.0057052373886108,grad_norm: 0.8955263862708299, iteration: 360109
loss: 0.9631125330924988,grad_norm: 0.7159124616046523, iteration: 360110
loss: 1.0289793014526367,grad_norm: 0.7375739556207248, iteration: 360111
loss: 0.9783559441566467,grad_norm: 0.754559492403289, iteration: 360112
loss: 0.975142240524292,grad_norm: 0.8699516339806022, iteration: 360113
loss: 1.0299043655395508,grad_norm: 0.9399077353834535, iteration: 360114
loss: 1.0220012664794922,grad_norm: 0.8827062774913849, iteration: 360115
loss: 0.9520052075386047,grad_norm: 0.8117107913163806, iteration: 360116
loss: 1.0187914371490479,grad_norm: 0.7606599185100869, iteration: 360117
loss: 1.003280758857727,grad_norm: 0.8039658500898851, iteration: 360118
loss: 1.0287487506866455,grad_norm: 0.9058119433289286, iteration: 360119
loss: 1.091281533241272,grad_norm: 0.9811756073797835, iteration: 360120
loss: 1.0320323705673218,grad_norm: 0.8442130451053228, iteration: 360121
loss: 0.9900932312011719,grad_norm: 0.8671322711663872, iteration: 360122
loss: 0.975369393825531,grad_norm: 0.7590397650111465, iteration: 360123
loss: 1.0398494005203247,grad_norm: 0.7588885987142319, iteration: 360124
loss: 1.0446155071258545,grad_norm: 0.9508530577567841, iteration: 360125
loss: 1.0161579847335815,grad_norm: 0.9999995540483588, iteration: 360126
loss: 1.0115211009979248,grad_norm: 0.8065717937476254, iteration: 360127
loss: 1.0084969997406006,grad_norm: 0.8364747241802744, iteration: 360128
loss: 1.0153944492340088,grad_norm: 0.827916300940542, iteration: 360129
loss: 1.1184413433074951,grad_norm: 0.8084837077880064, iteration: 360130
loss: 0.9695166349411011,grad_norm: 0.901944981270508, iteration: 360131
loss: 0.9853492379188538,grad_norm: 0.8152964481767804, iteration: 360132
loss: 1.0359562635421753,grad_norm: 0.8801475421766523, iteration: 360133
loss: 0.9737419486045837,grad_norm: 0.8218043978109949, iteration: 360134
loss: 0.9983864426612854,grad_norm: 0.7748665371130806, iteration: 360135
loss: 0.9838901162147522,grad_norm: 0.7053066437378217, iteration: 360136
loss: 1.0029041767120361,grad_norm: 0.7447406349293778, iteration: 360137
loss: 1.0243604183197021,grad_norm: 0.7303315218804971, iteration: 360138
loss: 0.9758962988853455,grad_norm: 0.8052105804629409, iteration: 360139
loss: 1.006099820137024,grad_norm: 0.7451515119367647, iteration: 360140
loss: 1.0099223852157593,grad_norm: 0.9999991040656112, iteration: 360141
loss: 1.0480867624282837,grad_norm: 0.8089387195682636, iteration: 360142
loss: 0.9674999713897705,grad_norm: 0.9500274238706907, iteration: 360143
loss: 0.9918879866600037,grad_norm: 0.7551378813811708, iteration: 360144
loss: 1.0678400993347168,grad_norm: 0.999999805019695, iteration: 360145
loss: 1.1186628341674805,grad_norm: 0.9999990848439442, iteration: 360146
loss: 1.0368216037750244,grad_norm: 0.9999990583564675, iteration: 360147
loss: 0.9969823360443115,grad_norm: 0.8340919195803101, iteration: 360148
loss: 1.0088868141174316,grad_norm: 0.8662488073983762, iteration: 360149
loss: 0.9892745614051819,grad_norm: 0.9999993261042259, iteration: 360150
loss: 1.0225220918655396,grad_norm: 0.7238780534291182, iteration: 360151
loss: 1.010117530822754,grad_norm: 0.9331948745843504, iteration: 360152
loss: 1.0075184106826782,grad_norm: 0.7068276535567741, iteration: 360153
loss: 0.9963089823722839,grad_norm: 0.8267092666162711, iteration: 360154
loss: 1.002333402633667,grad_norm: 0.828830980339452, iteration: 360155
loss: 1.100904107093811,grad_norm: 0.999999268704919, iteration: 360156
loss: 0.984516441822052,grad_norm: 0.6430082198937908, iteration: 360157
loss: 0.9803720712661743,grad_norm: 0.7864386756609221, iteration: 360158
loss: 0.9975078701972961,grad_norm: 0.9786269809891188, iteration: 360159
loss: 1.052545189857483,grad_norm: 0.9999992554083721, iteration: 360160
loss: 0.9926985502243042,grad_norm: 0.8428123864300986, iteration: 360161
loss: 1.010016918182373,grad_norm: 0.8144801061045961, iteration: 360162
loss: 0.9993395209312439,grad_norm: 0.6871955612703385, iteration: 360163
loss: 0.9886485934257507,grad_norm: 0.6958007160692555, iteration: 360164
loss: 0.9990901947021484,grad_norm: 0.7847863490445117, iteration: 360165
loss: 1.029589056968689,grad_norm: 0.9076419591732661, iteration: 360166
loss: 0.9717968702316284,grad_norm: 0.9999989756555747, iteration: 360167
loss: 1.0251123905181885,grad_norm: 0.7761532384088913, iteration: 360168
loss: 1.000413417816162,grad_norm: 0.8154650058496062, iteration: 360169
loss: 1.0142167806625366,grad_norm: 0.9999999243845021, iteration: 360170
loss: 1.0117422342300415,grad_norm: 0.7850717200800013, iteration: 360171
loss: 0.9846230745315552,grad_norm: 0.7910145136744867, iteration: 360172
loss: 0.9568650722503662,grad_norm: 0.8356506244480916, iteration: 360173
loss: 1.0255178213119507,grad_norm: 0.9999993069434154, iteration: 360174
loss: 0.9957848191261292,grad_norm: 0.7755718572590129, iteration: 360175
loss: 1.0005853176116943,grad_norm: 0.7921366724945558, iteration: 360176
loss: 1.0342226028442383,grad_norm: 0.912230338966301, iteration: 360177
loss: 1.011592149734497,grad_norm: 0.9457691693896675, iteration: 360178
loss: 1.0261768102645874,grad_norm: 0.7600004764116749, iteration: 360179
loss: 0.9878594279289246,grad_norm: 0.9999995633142887, iteration: 360180
loss: 0.9798258543014526,grad_norm: 0.9999992622519082, iteration: 360181
loss: 1.0031981468200684,grad_norm: 0.724464524833723, iteration: 360182
loss: 1.006201148033142,grad_norm: 0.7566093728075864, iteration: 360183
loss: 0.9634192585945129,grad_norm: 0.8288140927196762, iteration: 360184
loss: 0.9629018306732178,grad_norm: 0.6913035764711338, iteration: 360185
loss: 0.9827661514282227,grad_norm: 0.8544297310829068, iteration: 360186
loss: 0.9835759401321411,grad_norm: 0.86503127199089, iteration: 360187
loss: 0.965168833732605,grad_norm: 0.8900125282970491, iteration: 360188
loss: 0.9987878203392029,grad_norm: 0.8445132695924116, iteration: 360189
loss: 0.9731274843215942,grad_norm: 0.8917430598253929, iteration: 360190
loss: 0.967937171459198,grad_norm: 0.8538196623111023, iteration: 360191
loss: 0.9887325763702393,grad_norm: 0.7215251939680932, iteration: 360192
loss: 0.9988551735877991,grad_norm: 0.8308813370747365, iteration: 360193
loss: 1.0024305582046509,grad_norm: 0.9999995713081083, iteration: 360194
loss: 0.9665820002555847,grad_norm: 0.74986025893536, iteration: 360195
loss: 1.0522406101226807,grad_norm: 0.9999994913033906, iteration: 360196
loss: 1.1389492750167847,grad_norm: 0.9999997194725108, iteration: 360197
loss: 0.9869130849838257,grad_norm: 0.7424668938003303, iteration: 360198
loss: 1.1000556945800781,grad_norm: 0.9999995966089049, iteration: 360199
loss: 0.9999414086341858,grad_norm: 0.9028793185165898, iteration: 360200
loss: 1.0338177680969238,grad_norm: 0.8235454243272813, iteration: 360201
loss: 0.9465868473052979,grad_norm: 0.8010132297872722, iteration: 360202
loss: 0.9977558255195618,grad_norm: 0.720025950202965, iteration: 360203
loss: 1.0376313924789429,grad_norm: 0.9999991221598548, iteration: 360204
loss: 1.1032118797302246,grad_norm: 0.8710266102212344, iteration: 360205
loss: 1.01651930809021,grad_norm: 0.887242297350953, iteration: 360206
loss: 1.011961817741394,grad_norm: 0.7553852432620449, iteration: 360207
loss: 0.9882612824440002,grad_norm: 0.8597283871837912, iteration: 360208
loss: 0.9962779879570007,grad_norm: 0.9999990482205633, iteration: 360209
loss: 1.0737384557724,grad_norm: 0.9999993877179653, iteration: 360210
loss: 1.0323708057403564,grad_norm: 0.9835556769133166, iteration: 360211
loss: 1.0021756887435913,grad_norm: 0.7550353005013543, iteration: 360212
loss: 1.0170701742172241,grad_norm: 0.815963461343904, iteration: 360213
loss: 1.0082998275756836,grad_norm: 0.8240444638299406, iteration: 360214
loss: 0.9883429408073425,grad_norm: 0.9999990109898699, iteration: 360215
loss: 0.9863555431365967,grad_norm: 0.9953327739875821, iteration: 360216
loss: 1.0371801853179932,grad_norm: 0.8666037267856986, iteration: 360217
loss: 1.0548115968704224,grad_norm: 0.818148421843032, iteration: 360218
loss: 1.0053801536560059,grad_norm: 0.7802071089220873, iteration: 360219
loss: 1.0026168823242188,grad_norm: 0.9590177753425033, iteration: 360220
loss: 0.9815255403518677,grad_norm: 0.8536924318880721, iteration: 360221
loss: 1.0174953937530518,grad_norm: 0.9143596557811052, iteration: 360222
loss: 1.0450431108474731,grad_norm: 0.9999998323054824, iteration: 360223
loss: 1.043397307395935,grad_norm: 0.8200492599099697, iteration: 360224
loss: 1.0034385919570923,grad_norm: 0.7818319146191135, iteration: 360225
loss: 1.036623239517212,grad_norm: 0.916190723223908, iteration: 360226
loss: 0.9663605093955994,grad_norm: 0.8595925622114966, iteration: 360227
loss: 1.131351351737976,grad_norm: 0.9999997527701738, iteration: 360228
loss: 1.0198975801467896,grad_norm: 0.7410326461232366, iteration: 360229
loss: 1.0463672876358032,grad_norm: 0.8722647025168128, iteration: 360230
loss: 1.0180047750473022,grad_norm: 0.833620838736302, iteration: 360231
loss: 0.9767117500305176,grad_norm: 0.999999167080181, iteration: 360232
loss: 1.0085636377334595,grad_norm: 0.9351752551469869, iteration: 360233
loss: 0.983546793460846,grad_norm: 0.9381095278524045, iteration: 360234
loss: 0.9978895783424377,grad_norm: 0.8089948648841084, iteration: 360235
loss: 0.9830397367477417,grad_norm: 0.9321408600837066, iteration: 360236
loss: 0.98477703332901,grad_norm: 0.7884725867684412, iteration: 360237
loss: 1.0374646186828613,grad_norm: 0.9999990808530652, iteration: 360238
loss: 1.0224015712738037,grad_norm: 0.8553925967821456, iteration: 360239
loss: 1.0028971433639526,grad_norm: 0.9999992840017033, iteration: 360240
loss: 1.0232046842575073,grad_norm: 0.7839657728198041, iteration: 360241
loss: 1.078956961631775,grad_norm: 0.999999294303165, iteration: 360242
loss: 0.9918019771575928,grad_norm: 0.7900323658879226, iteration: 360243
loss: 0.9954636693000793,grad_norm: 0.6488289774732873, iteration: 360244
loss: 0.980966329574585,grad_norm: 0.8593519402636289, iteration: 360245
loss: 1.0311124324798584,grad_norm: 0.8506403216113239, iteration: 360246
loss: 0.9598159193992615,grad_norm: 0.7988127038476888, iteration: 360247
loss: 0.9704760313034058,grad_norm: 0.728135260010554, iteration: 360248
loss: 1.0353232622146606,grad_norm: 0.9933843496255953, iteration: 360249
loss: 1.021551489830017,grad_norm: 0.91621038017036, iteration: 360250
loss: 1.011093258857727,grad_norm: 0.7589753330158472, iteration: 360251
loss: 1.0877580642700195,grad_norm: 0.8946595364791735, iteration: 360252
loss: 0.9959158301353455,grad_norm: 0.8031404263220581, iteration: 360253
loss: 0.9991235733032227,grad_norm: 0.8900230275704539, iteration: 360254
loss: 0.9996224045753479,grad_norm: 0.8211349281203574, iteration: 360255
loss: 1.0073246955871582,grad_norm: 0.6794480679010718, iteration: 360256
loss: 1.0332578420639038,grad_norm: 0.9510637525012067, iteration: 360257
loss: 1.007074236869812,grad_norm: 0.7600611570741065, iteration: 360258
loss: 1.0807862281799316,grad_norm: 0.9999990234271474, iteration: 360259
loss: 0.9837012887001038,grad_norm: 0.9794059431097708, iteration: 360260
loss: 0.9707871079444885,grad_norm: 0.8779451846213451, iteration: 360261
loss: 1.03294837474823,grad_norm: 0.8263000894949496, iteration: 360262
loss: 1.0162907838821411,grad_norm: 0.8219786596839135, iteration: 360263
loss: 1.0370959043502808,grad_norm: 0.6957691929959847, iteration: 360264
loss: 1.0190072059631348,grad_norm: 0.677720865531468, iteration: 360265
loss: 1.0307925939559937,grad_norm: 0.731012305365422, iteration: 360266
loss: 0.9994465708732605,grad_norm: 0.8552029913537552, iteration: 360267
loss: 0.9604921340942383,grad_norm: 0.7589139649949636, iteration: 360268
loss: 1.0126090049743652,grad_norm: 0.9999999868235542, iteration: 360269
loss: 1.024938702583313,grad_norm: 0.9999990922310736, iteration: 360270
loss: 1.090759038925171,grad_norm: 0.9999999089650403, iteration: 360271
loss: 0.9743506908416748,grad_norm: 0.7541257580365858, iteration: 360272
loss: 1.0356111526489258,grad_norm: 0.9298231244000624, iteration: 360273
loss: 1.0406140089035034,grad_norm: 0.7700515884955601, iteration: 360274
loss: 0.9978958368301392,grad_norm: 0.907261993111948, iteration: 360275
loss: 1.0586038827896118,grad_norm: 0.9283258894357186, iteration: 360276
loss: 1.0098278522491455,grad_norm: 0.6161871808113637, iteration: 360277
loss: 1.0890610218048096,grad_norm: 0.8515343063337557, iteration: 360278
loss: 1.0326707363128662,grad_norm: 0.8641603062622714, iteration: 360279
loss: 1.1054532527923584,grad_norm: 0.9999990908006754, iteration: 360280
loss: 1.0247384309768677,grad_norm: 0.8758824333629422, iteration: 360281
loss: 0.9921674728393555,grad_norm: 0.811460223505404, iteration: 360282
loss: 0.9542592167854309,grad_norm: 0.9999990434346101, iteration: 360283
loss: 1.1589114665985107,grad_norm: 0.9957578449651052, iteration: 360284
loss: 1.0225443840026855,grad_norm: 0.7579098143132577, iteration: 360285
loss: 1.0164239406585693,grad_norm: 0.8791834294761968, iteration: 360286
loss: 0.99460369348526,grad_norm: 0.7606121051781524, iteration: 360287
loss: 0.9799430966377258,grad_norm: 0.9999991882437879, iteration: 360288
loss: 0.9801827669143677,grad_norm: 0.99999942224481, iteration: 360289
loss: 1.0395338535308838,grad_norm: 0.8293929455836319, iteration: 360290
loss: 1.0692092180252075,grad_norm: 0.7585447773289441, iteration: 360291
loss: 0.9742323160171509,grad_norm: 0.8594098174294595, iteration: 360292
loss: 0.9830411076545715,grad_norm: 0.8106744000846686, iteration: 360293
loss: 0.9729731678962708,grad_norm: 0.680933788446798, iteration: 360294
loss: 1.002712368965149,grad_norm: 0.8922540440446317, iteration: 360295
loss: 1.0160624980926514,grad_norm: 0.7067793203743042, iteration: 360296
loss: 0.9700177907943726,grad_norm: 0.8045450630696164, iteration: 360297
loss: 0.9829346537590027,grad_norm: 0.9999990840717028, iteration: 360298
loss: 0.9705497622489929,grad_norm: 0.9999991435076708, iteration: 360299
loss: 1.0056259632110596,grad_norm: 0.7653932030737655, iteration: 360300
loss: 0.9785782098770142,grad_norm: 0.7821575890153549, iteration: 360301
loss: 0.9955016374588013,grad_norm: 0.7983045138033412, iteration: 360302
loss: 1.0043503046035767,grad_norm: 0.8371230478182944, iteration: 360303
loss: 1.0523730516433716,grad_norm: 0.999999770446048, iteration: 360304
loss: 0.9537471532821655,grad_norm: 0.8548320516124831, iteration: 360305
loss: 0.9739614725112915,grad_norm: 0.8290416745372043, iteration: 360306
loss: 1.0156962871551514,grad_norm: 0.7709929989561879, iteration: 360307
loss: 0.98785400390625,grad_norm: 0.7240237473941842, iteration: 360308
loss: 0.986423671245575,grad_norm: 0.9309728293322964, iteration: 360309
loss: 0.9936038255691528,grad_norm: 0.9999994250452813, iteration: 360310
loss: 0.9982402324676514,grad_norm: 0.7275771013926405, iteration: 360311
loss: 1.0069776773452759,grad_norm: 0.9578334188988612, iteration: 360312
loss: 1.0044766664505005,grad_norm: 0.7438296339613495, iteration: 360313
loss: 1.1455254554748535,grad_norm: 0.9999991803524976, iteration: 360314
loss: 1.0049115419387817,grad_norm: 0.8718639967105942, iteration: 360315
loss: 1.0038995742797852,grad_norm: 0.99999930033583, iteration: 360316
loss: 1.0079361200332642,grad_norm: 0.766122430865154, iteration: 360317
loss: 1.012863039970398,grad_norm: 0.7851378230068666, iteration: 360318
loss: 1.0106691122055054,grad_norm: 0.8943530588963899, iteration: 360319
loss: 1.0261362791061401,grad_norm: 0.999999289197433, iteration: 360320
loss: 1.0187727212905884,grad_norm: 0.9895685258486836, iteration: 360321
loss: 0.9645622968673706,grad_norm: 0.8217399815169287, iteration: 360322
loss: 0.9957839846611023,grad_norm: 0.6480049101675021, iteration: 360323
loss: 1.0487401485443115,grad_norm: 0.7283016656093976, iteration: 360324
loss: 0.9861637949943542,grad_norm: 0.7687876318186887, iteration: 360325
loss: 1.0111205577850342,grad_norm: 0.7308772995072421, iteration: 360326
loss: 0.9977148175239563,grad_norm: 0.6685118325342297, iteration: 360327
loss: 1.0188572406768799,grad_norm: 0.9999995103384502, iteration: 360328
loss: 0.9824919700622559,grad_norm: 0.6559689594396901, iteration: 360329
loss: 0.9623185992240906,grad_norm: 0.7427061112716479, iteration: 360330
loss: 1.0092434883117676,grad_norm: 0.8657235750415578, iteration: 360331
loss: 1.0259653329849243,grad_norm: 0.8217297354003242, iteration: 360332
loss: 1.0185455083847046,grad_norm: 0.770150235412476, iteration: 360333
loss: 1.002009630203247,grad_norm: 0.7361737106779859, iteration: 360334
loss: 0.9844889640808105,grad_norm: 0.7395700165512243, iteration: 360335
loss: 1.021116852760315,grad_norm: 0.9103704880057962, iteration: 360336
loss: 0.9875106811523438,grad_norm: 0.7867668081070133, iteration: 360337
loss: 1.0233651399612427,grad_norm: 0.8321646138990334, iteration: 360338
loss: 0.9908353686332703,grad_norm: 0.7642701561043703, iteration: 360339
loss: 0.9904351830482483,grad_norm: 0.8698654889684622, iteration: 360340
loss: 1.0231244564056396,grad_norm: 0.9191649738419748, iteration: 360341
loss: 1.0015923976898193,grad_norm: 0.9999990748528065, iteration: 360342
loss: 0.9310729503631592,grad_norm: 0.8560380975902004, iteration: 360343
loss: 1.005438208580017,grad_norm: 0.8669519758867998, iteration: 360344
loss: 1.0178923606872559,grad_norm: 0.8080730216924064, iteration: 360345
loss: 1.115071415901184,grad_norm: 0.9999990820068584, iteration: 360346
loss: 0.9900352358818054,grad_norm: 0.6362138833957304, iteration: 360347
loss: 0.9845817685127258,grad_norm: 0.7859794708269959, iteration: 360348
loss: 1.0067996978759766,grad_norm: 0.8470571720474935, iteration: 360349
loss: 1.0366525650024414,grad_norm: 0.8236228045788087, iteration: 360350
loss: 0.9602915048599243,grad_norm: 0.9222244838328635, iteration: 360351
loss: 0.9830305576324463,grad_norm: 0.9604472682452087, iteration: 360352
loss: 0.9647499322891235,grad_norm: 0.8347681442555924, iteration: 360353
loss: 0.9812487363815308,grad_norm: 0.8263175284498905, iteration: 360354
loss: 1.0151185989379883,grad_norm: 0.7130014459369689, iteration: 360355
loss: 1.078711748123169,grad_norm: 0.8725089797660411, iteration: 360356
loss: 1.0074760913848877,grad_norm: 0.8542685886746285, iteration: 360357
loss: 0.9900969862937927,grad_norm: 0.8525689774914496, iteration: 360358
loss: 1.0083491802215576,grad_norm: 0.6784134276492833, iteration: 360359
loss: 0.9860184192657471,grad_norm: 0.8942041037548788, iteration: 360360
loss: 1.007954478263855,grad_norm: 0.9999990763534843, iteration: 360361
loss: 1.086106300354004,grad_norm: 0.9305451665397357, iteration: 360362
loss: 0.9874545335769653,grad_norm: 0.8344561060907663, iteration: 360363
loss: 1.0667870044708252,grad_norm: 0.7746953953753104, iteration: 360364
loss: 1.0077260732650757,grad_norm: 0.7984368211835923, iteration: 360365
loss: 0.9828174114227295,grad_norm: 0.781124740968367, iteration: 360366
loss: 1.0368174314498901,grad_norm: 0.8754036114460257, iteration: 360367
loss: 1.0214667320251465,grad_norm: 0.8215632712518286, iteration: 360368
loss: 0.9805025458335876,grad_norm: 0.9448629481466836, iteration: 360369
loss: 0.9611353874206543,grad_norm: 0.8918764682426148, iteration: 360370
loss: 0.9958378672599792,grad_norm: 0.9004286099461702, iteration: 360371
loss: 1.0232884883880615,grad_norm: 0.7922794741093618, iteration: 360372
loss: 1.0186872482299805,grad_norm: 0.8162522727501669, iteration: 360373
loss: 0.9805893898010254,grad_norm: 0.8876534786534114, iteration: 360374
loss: 1.0057295560836792,grad_norm: 0.9999991901892903, iteration: 360375
loss: 0.9853264689445496,grad_norm: 0.9999992012530268, iteration: 360376
loss: 1.0161858797073364,grad_norm: 0.9046228881706763, iteration: 360377
loss: 0.988317608833313,grad_norm: 0.8868391156488007, iteration: 360378
loss: 0.9671666026115417,grad_norm: 0.762759013294617, iteration: 360379
loss: 0.9824976325035095,grad_norm: 0.8670135069555513, iteration: 360380
loss: 0.9987285733222961,grad_norm: 0.8705812861255562, iteration: 360381
loss: 1.0174394845962524,grad_norm: 0.9999990817479056, iteration: 360382
loss: 0.9842652678489685,grad_norm: 0.9132726342929152, iteration: 360383
loss: 1.0303252935409546,grad_norm: 0.999999670630738, iteration: 360384
loss: 0.9749385118484497,grad_norm: 0.7453330741122106, iteration: 360385
loss: 0.9735410809516907,grad_norm: 0.9147511459673953, iteration: 360386
loss: 0.989162266254425,grad_norm: 0.9179251027280229, iteration: 360387
loss: 1.031829595565796,grad_norm: 0.7250215239677598, iteration: 360388
loss: 0.9579258561134338,grad_norm: 0.8265564870492613, iteration: 360389
loss: 0.9964116811752319,grad_norm: 0.7355532580511404, iteration: 360390
loss: 1.1545816659927368,grad_norm: 0.9999999429991251, iteration: 360391
loss: 0.9893973469734192,grad_norm: 0.9654534293894629, iteration: 360392
loss: 1.0044540166854858,grad_norm: 0.9999995374873885, iteration: 360393
loss: 0.9900943636894226,grad_norm: 0.7220878891635573, iteration: 360394
loss: 0.9896721839904785,grad_norm: 0.853084661638065, iteration: 360395
loss: 1.017835021018982,grad_norm: 0.8786903694534774, iteration: 360396
loss: 1.0324572324752808,grad_norm: 0.8380415085516977, iteration: 360397
loss: 1.0336544513702393,grad_norm: 0.9999991541058526, iteration: 360398
loss: 0.9960857629776001,grad_norm: 0.9999989194764495, iteration: 360399
loss: 0.984937310218811,grad_norm: 0.7282473504383713, iteration: 360400
loss: 0.9889764785766602,grad_norm: 0.8387494074145763, iteration: 360401
loss: 0.9553693532943726,grad_norm: 0.8898051178003579, iteration: 360402
loss: 1.0223424434661865,grad_norm: 0.6543498850764552, iteration: 360403
loss: 0.9910829067230225,grad_norm: 0.9999999017575864, iteration: 360404
loss: 0.9776489734649658,grad_norm: 0.8135518563712657, iteration: 360405
loss: 0.9752324819564819,grad_norm: 0.7159512820141646, iteration: 360406
loss: 0.9990404844284058,grad_norm: 0.7545804832382539, iteration: 360407
loss: 0.9743450284004211,grad_norm: 0.8447696406084072, iteration: 360408
loss: 0.989422082901001,grad_norm: 0.7125108966352913, iteration: 360409
loss: 1.0023372173309326,grad_norm: 0.9999991424688935, iteration: 360410
loss: 0.9949744343757629,grad_norm: 0.8202505731187286, iteration: 360411
loss: 1.0143060684204102,grad_norm: 0.7607897894845611, iteration: 360412
loss: 1.0133843421936035,grad_norm: 0.8311573585763016, iteration: 360413
loss: 1.0021042823791504,grad_norm: 0.6819953139402848, iteration: 360414
loss: 1.0304806232452393,grad_norm: 0.9999990571943231, iteration: 360415
loss: 1.0019190311431885,grad_norm: 0.8931145216412654, iteration: 360416
loss: 1.041528344154358,grad_norm: 0.844634390260855, iteration: 360417
loss: 0.9626208543777466,grad_norm: 0.8137304732677135, iteration: 360418
loss: 0.985226571559906,grad_norm: 0.9999989644581604, iteration: 360419
loss: 1.012850046157837,grad_norm: 0.6426417780143815, iteration: 360420
loss: 1.006706953048706,grad_norm: 0.8285203345431373, iteration: 360421
loss: 1.0080349445343018,grad_norm: 0.651531464717901, iteration: 360422
loss: 1.0526528358459473,grad_norm: 0.9999999597135166, iteration: 360423
loss: 1.041170597076416,grad_norm: 0.9999996977448632, iteration: 360424
loss: 0.9665488600730896,grad_norm: 0.7415965732643537, iteration: 360425
loss: 0.9906014800071716,grad_norm: 0.99785271692175, iteration: 360426
loss: 1.0265806913375854,grad_norm: 0.8483228198657305, iteration: 360427
loss: 1.0154376029968262,grad_norm: 0.8786544017266681, iteration: 360428
loss: 1.11411452293396,grad_norm: 0.9999990574417447, iteration: 360429
loss: 0.9925256371498108,grad_norm: 0.6990212331977836, iteration: 360430
loss: 1.0171990394592285,grad_norm: 0.7311550708529517, iteration: 360431
loss: 1.0326664447784424,grad_norm: 0.9281886605424586, iteration: 360432
loss: 0.9947696924209595,grad_norm: 0.8939597940120888, iteration: 360433
loss: 0.9664209485054016,grad_norm: 0.9587336025347934, iteration: 360434
loss: 1.069283127784729,grad_norm: 0.9999991261547817, iteration: 360435
loss: 1.01144278049469,grad_norm: 0.8157053722008027, iteration: 360436
loss: 0.9563639163970947,grad_norm: 0.9684024383292177, iteration: 360437
loss: 0.9834253787994385,grad_norm: 0.7581480043364698, iteration: 360438
loss: 0.9989737868309021,grad_norm: 0.7099154496362942, iteration: 360439
loss: 0.9967929720878601,grad_norm: 0.9999998294205501, iteration: 360440
loss: 1.0031994581222534,grad_norm: 0.7470072505941262, iteration: 360441
loss: 1.0152714252471924,grad_norm: 0.8162753018257003, iteration: 360442
loss: 1.0222058296203613,grad_norm: 0.9999991526164811, iteration: 360443
loss: 0.9950721859931946,grad_norm: 0.7951279293325839, iteration: 360444
loss: 1.043227195739746,grad_norm: 0.7532358548562009, iteration: 360445
loss: 1.0197757482528687,grad_norm: 0.9219890414900871, iteration: 360446
loss: 1.0058891773223877,grad_norm: 0.7660962372469275, iteration: 360447
loss: 1.0121805667877197,grad_norm: 0.7787328280546452, iteration: 360448
loss: 1.0482217073440552,grad_norm: 0.9999999718683438, iteration: 360449
loss: 0.9888701438903809,grad_norm: 0.8496527793843237, iteration: 360450
loss: 1.0086960792541504,grad_norm: 0.9999998839238099, iteration: 360451
loss: 0.9938279986381531,grad_norm: 0.9556646874309555, iteration: 360452
loss: 0.993564784526825,grad_norm: 0.8854740944653398, iteration: 360453
loss: 0.9980611801147461,grad_norm: 0.68191672925266, iteration: 360454
loss: 1.0011849403381348,grad_norm: 0.9378145307205917, iteration: 360455
loss: 0.9441561102867126,grad_norm: 0.7384186580888669, iteration: 360456
loss: 1.0236066579818726,grad_norm: 0.8710920334237046, iteration: 360457
loss: 0.9863638877868652,grad_norm: 0.789779807404048, iteration: 360458
loss: 0.9966678023338318,grad_norm: 0.9999998941210936, iteration: 360459
loss: 0.9779402017593384,grad_norm: 0.8470424037953745, iteration: 360460
loss: 1.0732214450836182,grad_norm: 0.8785922441178465, iteration: 360461
loss: 1.0441685914993286,grad_norm: 0.9999996351949166, iteration: 360462
loss: 1.0375980138778687,grad_norm: 0.9944006486978844, iteration: 360463
loss: 0.9840958714485168,grad_norm: 0.7562166645146894, iteration: 360464
loss: 0.9924582242965698,grad_norm: 0.7620267553606985, iteration: 360465
loss: 1.015446662902832,grad_norm: 0.9462098526781797, iteration: 360466
loss: 1.0241583585739136,grad_norm: 0.8923934186533857, iteration: 360467
loss: 1.038903832435608,grad_norm: 0.9504305389604824, iteration: 360468
loss: 1.0279591083526611,grad_norm: 0.9999990250181882, iteration: 360469
loss: 1.0079154968261719,grad_norm: 0.8151669957196256, iteration: 360470
loss: 0.990896463394165,grad_norm: 0.8656491558707272, iteration: 360471
loss: 1.014043927192688,grad_norm: 0.9999994622018777, iteration: 360472
loss: 1.0336179733276367,grad_norm: 0.8289846052025291, iteration: 360473
loss: 1.012137532234192,grad_norm: 0.8602921691970834, iteration: 360474
loss: 0.9872137308120728,grad_norm: 0.7087900934105225, iteration: 360475
loss: 0.9967930316925049,grad_norm: 0.7126043340894896, iteration: 360476
loss: 1.0112617015838623,grad_norm: 0.7965773001300277, iteration: 360477
loss: 0.9979683756828308,grad_norm: 0.6796296609847308, iteration: 360478
loss: 0.998558521270752,grad_norm: 0.7491641095323741, iteration: 360479
loss: 0.9761012196540833,grad_norm: 0.7704044016522201, iteration: 360480
loss: 0.9869386553764343,grad_norm: 0.762149440731348, iteration: 360481
loss: 1.0107802152633667,grad_norm: 0.8300958428826714, iteration: 360482
loss: 1.0275018215179443,grad_norm: 0.8227839010962157, iteration: 360483
loss: 0.9526337385177612,grad_norm: 0.9999989748041065, iteration: 360484
loss: 1.0171787738800049,grad_norm: 0.8793463423910873, iteration: 360485
loss: 0.9703701138496399,grad_norm: 0.7803513076736424, iteration: 360486
loss: 0.9787986874580383,grad_norm: 0.9197598638940419, iteration: 360487
loss: 1.0350052118301392,grad_norm: 0.9999996593735352, iteration: 360488
loss: 1.0056546926498413,grad_norm: 0.7494774090987159, iteration: 360489
loss: 1.0113879442214966,grad_norm: 0.8699024805369222, iteration: 360490
loss: 1.0430543422698975,grad_norm: 0.9651517973684217, iteration: 360491
loss: 1.0299626588821411,grad_norm: 0.9420790592485139, iteration: 360492
loss: 1.00035560131073,grad_norm: 0.711177629106127, iteration: 360493
loss: 0.9934706687927246,grad_norm: 0.8179246190653043, iteration: 360494
loss: 1.015269160270691,grad_norm: 0.9081384983784658, iteration: 360495
loss: 1.0096700191497803,grad_norm: 0.8337595164680528, iteration: 360496
loss: 1.025779128074646,grad_norm: 0.9349935304728326, iteration: 360497
loss: 1.010514259338379,grad_norm: 0.9999991913725347, iteration: 360498
loss: 1.0218591690063477,grad_norm: 0.9999998985076628, iteration: 360499
loss: 0.9679336547851562,grad_norm: 0.7189537333716145, iteration: 360500
loss: 1.002200722694397,grad_norm: 0.9193204673173602, iteration: 360501
loss: 0.9838504195213318,grad_norm: 0.8424087706778107, iteration: 360502
loss: 0.9986795783042908,grad_norm: 0.8915530544180171, iteration: 360503
loss: 1.0100517272949219,grad_norm: 0.7367537274887912, iteration: 360504
loss: 0.9931338429450989,grad_norm: 0.661191364112028, iteration: 360505
loss: 1.0092480182647705,grad_norm: 0.7553014619579496, iteration: 360506
loss: 1.050429344177246,grad_norm: 0.9999991174821034, iteration: 360507
loss: 1.0053589344024658,grad_norm: 0.7669902114669975, iteration: 360508
loss: 0.9932180643081665,grad_norm: 0.9974217283983207, iteration: 360509
loss: 0.9870323538780212,grad_norm: 0.9457132758849123, iteration: 360510
loss: 0.9919642210006714,grad_norm: 0.7179406059603678, iteration: 360511
loss: 1.0837205648422241,grad_norm: 0.9999992423409872, iteration: 360512
loss: 1.0237362384796143,grad_norm: 0.7906465885981588, iteration: 360513
loss: 1.1113369464874268,grad_norm: 0.99999948757241, iteration: 360514
loss: 0.9897385239601135,grad_norm: 0.9533069559131012, iteration: 360515
loss: 0.990483283996582,grad_norm: 0.9999992038226672, iteration: 360516
loss: 0.9795230031013489,grad_norm: 0.8712851836453426, iteration: 360517
loss: 0.9656016826629639,grad_norm: 0.7870940229746837, iteration: 360518
loss: 1.0058118104934692,grad_norm: 0.7074362948699535, iteration: 360519
loss: 1.1166695356369019,grad_norm: 0.8749728660953333, iteration: 360520
loss: 1.1324803829193115,grad_norm: 0.9999991197761209, iteration: 360521
loss: 1.0759700536727905,grad_norm: 0.9999998915034093, iteration: 360522
loss: 1.0183476209640503,grad_norm: 0.9999990525211726, iteration: 360523
loss: 1.0204017162322998,grad_norm: 0.8919139837182142, iteration: 360524
loss: 0.9792415499687195,grad_norm: 0.7065606644952804, iteration: 360525
loss: 1.034849762916565,grad_norm: 0.999999350617389, iteration: 360526
loss: 0.9939402937889099,grad_norm: 0.8047104611797157, iteration: 360527
loss: 1.0202877521514893,grad_norm: 0.8910658000081751, iteration: 360528
loss: 1.0095453262329102,grad_norm: 0.6665450641579493, iteration: 360529
loss: 1.018481731414795,grad_norm: 0.715568368158697, iteration: 360530
loss: 0.9785432815551758,grad_norm: 0.8046122982921262, iteration: 360531
loss: 0.9935728907585144,grad_norm: 0.8775361737646196, iteration: 360532
loss: 1.0429937839508057,grad_norm: 0.9999998076166666, iteration: 360533
loss: 0.9695626497268677,grad_norm: 0.9622662924852371, iteration: 360534
loss: 1.0148030519485474,grad_norm: 0.6428669094489137, iteration: 360535
loss: 1.0127248764038086,grad_norm: 0.8207049464798868, iteration: 360536
loss: 1.0132405757904053,grad_norm: 0.9999998853661859, iteration: 360537
loss: 1.0070828199386597,grad_norm: 0.7396956633602356, iteration: 360538
loss: 1.036069631576538,grad_norm: 0.748636939645818, iteration: 360539
loss: 1.0313249826431274,grad_norm: 0.7751115822650984, iteration: 360540
loss: 1.0579496622085571,grad_norm: 0.7950066577380224, iteration: 360541
loss: 1.0381752252578735,grad_norm: 0.7999232453727886, iteration: 360542
loss: 0.9897773265838623,grad_norm: 0.8498711349288276, iteration: 360543
loss: 0.9880920052528381,grad_norm: 0.7702642239781672, iteration: 360544
loss: 1.0062426328659058,grad_norm: 0.8781745325195233, iteration: 360545
loss: 0.9864671230316162,grad_norm: 0.7661737346277346, iteration: 360546
loss: 1.0305213928222656,grad_norm: 0.9999995904226053, iteration: 360547
loss: 0.9664875268936157,grad_norm: 0.8836593280584847, iteration: 360548
loss: 1.0232462882995605,grad_norm: 0.7237744229739859, iteration: 360549
loss: 0.9924208521842957,grad_norm: 0.7032551332501268, iteration: 360550
loss: 0.9975112676620483,grad_norm: 0.8506108931543307, iteration: 360551
loss: 0.9585361480712891,grad_norm: 0.8536135869975598, iteration: 360552
loss: 1.009616732597351,grad_norm: 0.7551212070238791, iteration: 360553
loss: 0.9954244494438171,grad_norm: 0.8245225954859208, iteration: 360554
loss: 0.9784725308418274,grad_norm: 0.8253446685890581, iteration: 360555
loss: 0.9839588403701782,grad_norm: 0.7535248727394485, iteration: 360556
loss: 1.0654759407043457,grad_norm: 0.897348086088986, iteration: 360557
loss: 1.0039418935775757,grad_norm: 0.881442417916327, iteration: 360558
loss: 1.0037542581558228,grad_norm: 0.8259838098846014, iteration: 360559
loss: 1.0353959798812866,grad_norm: 0.9999995428445053, iteration: 360560
loss: 0.9772059917449951,grad_norm: 0.9316636731524172, iteration: 360561
loss: 1.0026286840438843,grad_norm: 0.9210557866642372, iteration: 360562
loss: 0.9575071334838867,grad_norm: 0.8062064510949931, iteration: 360563
loss: 0.9899987578392029,grad_norm: 0.7751400347939778, iteration: 360564
loss: 0.9793604016304016,grad_norm: 0.7903002034148443, iteration: 360565
loss: 0.9847173094749451,grad_norm: 0.9999992243489344, iteration: 360566
loss: 0.9648943543434143,grad_norm: 0.8203453016318584, iteration: 360567
loss: 1.0190163850784302,grad_norm: 0.8540501462327853, iteration: 360568
loss: 1.0065560340881348,grad_norm: 0.8553046805482312, iteration: 360569
loss: 0.9819404482841492,grad_norm: 0.9153128524555678, iteration: 360570
loss: 0.9965077638626099,grad_norm: 0.7947151247982303, iteration: 360571
loss: 1.0009503364562988,grad_norm: 0.9999990185074217, iteration: 360572
loss: 1.036081075668335,grad_norm: 0.8157404580780638, iteration: 360573
loss: 0.9686976075172424,grad_norm: 0.8905565547265513, iteration: 360574
loss: 0.9973087906837463,grad_norm: 0.8219285154553652, iteration: 360575
loss: 1.0315337181091309,grad_norm: 0.8327209196722009, iteration: 360576
loss: 1.0582408905029297,grad_norm: 0.9999995344619937, iteration: 360577
loss: 1.0020558834075928,grad_norm: 0.7463582092605429, iteration: 360578
loss: 0.9710622429847717,grad_norm: 0.9815680611047478, iteration: 360579
loss: 1.0250903367996216,grad_norm: 0.8887393149720305, iteration: 360580
loss: 1.0564476251602173,grad_norm: 0.7781259677677274, iteration: 360581
loss: 1.022122859954834,grad_norm: 0.8088205283806053, iteration: 360582
loss: 1.0406813621520996,grad_norm: 0.9999993447014073, iteration: 360583
loss: 1.0255650281906128,grad_norm: 0.6800769866405741, iteration: 360584
loss: 1.0065715312957764,grad_norm: 0.8439302343262284, iteration: 360585
loss: 0.975769579410553,grad_norm: 0.7733224211466588, iteration: 360586
loss: 1.002069115638733,grad_norm: 0.9999990959749966, iteration: 360587
loss: 1.0027015209197998,grad_norm: 0.6887588035427089, iteration: 360588
loss: 1.078260898590088,grad_norm: 0.9483386361399004, iteration: 360589
loss: 1.02945077419281,grad_norm: 0.835644543791971, iteration: 360590
loss: 1.0253785848617554,grad_norm: 0.8903467314770933, iteration: 360591
loss: 1.0167982578277588,grad_norm: 0.7760281365813608, iteration: 360592
loss: 1.0334205627441406,grad_norm: 0.8286020193441407, iteration: 360593
loss: 0.9858276844024658,grad_norm: 0.8061487653545188, iteration: 360594
loss: 1.0022125244140625,grad_norm: 0.785428221495598, iteration: 360595
loss: 0.9768815040588379,grad_norm: 0.7533917479162198, iteration: 360596
loss: 1.0142707824707031,grad_norm: 0.9999990734186067, iteration: 360597
loss: 0.9905557036399841,grad_norm: 0.7037754292190672, iteration: 360598
loss: 1.015224575996399,grad_norm: 0.8069736130007958, iteration: 360599
loss: 1.01877760887146,grad_norm: 0.8316750267204496, iteration: 360600
loss: 0.9747176766395569,grad_norm: 0.8897947120468784, iteration: 360601
loss: 1.0015017986297607,grad_norm: 0.8044292221777974, iteration: 360602
loss: 0.9726210832595825,grad_norm: 0.8248455380885616, iteration: 360603
loss: 1.0004421472549438,grad_norm: 0.696020627896219, iteration: 360604
loss: 1.0118017196655273,grad_norm: 0.8635349679593389, iteration: 360605
loss: 0.9916648268699646,grad_norm: 0.722921692877554, iteration: 360606
loss: 1.0364041328430176,grad_norm: 0.7510601565029114, iteration: 360607
loss: 0.9750882983207703,grad_norm: 0.8099904110524293, iteration: 360608
loss: 1.018004059791565,grad_norm: 0.9236801683219118, iteration: 360609
loss: 1.006820559501648,grad_norm: 0.6983430799929106, iteration: 360610
loss: 0.9926766157150269,grad_norm: 0.7734060064695624, iteration: 360611
loss: 1.0227769613265991,grad_norm: 0.8227906443296833, iteration: 360612
loss: 1.0471442937850952,grad_norm: 0.8289897360406016, iteration: 360613
loss: 0.9934152364730835,grad_norm: 0.7617149791790347, iteration: 360614
loss: 1.0071053504943848,grad_norm: 0.7541474823163794, iteration: 360615
loss: 1.0112500190734863,grad_norm: 0.7991097000045857, iteration: 360616
loss: 0.988135039806366,grad_norm: 0.7718892635249561, iteration: 360617
loss: 0.9998260736465454,grad_norm: 0.7632358023541607, iteration: 360618
loss: 1.0663114786148071,grad_norm: 0.9962940668718013, iteration: 360619
loss: 1.0105116367340088,grad_norm: 0.788094127537257, iteration: 360620
loss: 0.9939907193183899,grad_norm: 0.7477365289847354, iteration: 360621
loss: 1.0093923807144165,grad_norm: 0.7287530150331797, iteration: 360622
loss: 1.0187084674835205,grad_norm: 0.8154450735121037, iteration: 360623
loss: 0.9742218255996704,grad_norm: 0.8267975219436423, iteration: 360624
loss: 1.0067681074142456,grad_norm: 0.7613189309652085, iteration: 360625
loss: 1.0416158437728882,grad_norm: 0.999999211531436, iteration: 360626
loss: 1.0304681062698364,grad_norm: 0.6988924200505305, iteration: 360627
loss: 0.9832127094268799,grad_norm: 0.7020204503318959, iteration: 360628
loss: 0.9968550205230713,grad_norm: 0.8763748129633618, iteration: 360629
loss: 1.0004736185073853,grad_norm: 0.7775354196135409, iteration: 360630
loss: 1.0100305080413818,grad_norm: 0.7898917519628741, iteration: 360631
loss: 1.0283421277999878,grad_norm: 0.9999990973471342, iteration: 360632
loss: 0.9919461607933044,grad_norm: 0.7697529822622775, iteration: 360633
loss: 0.9855648279190063,grad_norm: 0.9999991020000417, iteration: 360634
loss: 1.0040396451950073,grad_norm: 0.8613959312784961, iteration: 360635
loss: 1.0258954763412476,grad_norm: 0.8529081329838897, iteration: 360636
loss: 1.0099624395370483,grad_norm: 0.6628849012047963, iteration: 360637
loss: 0.9964869618415833,grad_norm: 0.8238252159160204, iteration: 360638
loss: 1.0357478857040405,grad_norm: 0.9999991904404747, iteration: 360639
loss: 1.048934817314148,grad_norm: 0.8547311161997896, iteration: 360640
loss: 1.071433663368225,grad_norm: 0.9999998593937541, iteration: 360641
loss: 0.9668404459953308,grad_norm: 0.8351032219994667, iteration: 360642
loss: 1.0769504308700562,grad_norm: 0.9999993252869145, iteration: 360643
loss: 1.008256196975708,grad_norm: 0.9999990457290693, iteration: 360644
loss: 0.9663892984390259,grad_norm: 0.8185335838911799, iteration: 360645
loss: 0.9559898972511292,grad_norm: 0.7602570476857818, iteration: 360646
loss: 0.9759051203727722,grad_norm: 0.9999990438247619, iteration: 360647
loss: 1.0240353345870972,grad_norm: 0.9999995185797587, iteration: 360648
loss: 1.0518090724945068,grad_norm: 0.9999996521563536, iteration: 360649
loss: 0.941611111164093,grad_norm: 0.7299600674347543, iteration: 360650
loss: 1.0038179159164429,grad_norm: 0.7491678264885904, iteration: 360651
loss: 0.9815098643302917,grad_norm: 0.9999993221615072, iteration: 360652
loss: 0.9946725368499756,grad_norm: 0.8824113167504223, iteration: 360653
loss: 0.9586181640625,grad_norm: 0.7815770750508925, iteration: 360654
loss: 1.004377841949463,grad_norm: 0.7246195398409563, iteration: 360655
loss: 1.0131772756576538,grad_norm: 0.8904884701760498, iteration: 360656
loss: 0.9918773174285889,grad_norm: 0.8125876328710367, iteration: 360657
loss: 1.013279914855957,grad_norm: 0.7194816492326801, iteration: 360658
loss: 1.015174388885498,grad_norm: 0.978626859893121, iteration: 360659
loss: 1.010499119758606,grad_norm: 0.7882238795425315, iteration: 360660
loss: 1.036329746246338,grad_norm: 0.8857106297546808, iteration: 360661
loss: 0.9805229306221008,grad_norm: 0.9999993243040523, iteration: 360662
loss: 1.0289099216461182,grad_norm: 0.7644048826362376, iteration: 360663
loss: 1.0097720623016357,grad_norm: 0.8226061881879864, iteration: 360664
loss: 1.0150866508483887,grad_norm: 0.8480128342735207, iteration: 360665
loss: 0.9921160340309143,grad_norm: 0.7666787008480044, iteration: 360666
loss: 0.9980390071868896,grad_norm: 0.7115500370903549, iteration: 360667
loss: 1.0148228406906128,grad_norm: 0.7934056738616924, iteration: 360668
loss: 0.9758473634719849,grad_norm: 0.7486972297239098, iteration: 360669
loss: 0.9869682788848877,grad_norm: 0.9266963326503684, iteration: 360670
loss: 0.9757673144340515,grad_norm: 0.785479907730558, iteration: 360671
loss: 1.0348162651062012,grad_norm: 0.7616049507109213, iteration: 360672
loss: 1.0119562149047852,grad_norm: 0.768424926800994, iteration: 360673
loss: 1.0404140949249268,grad_norm: 0.944679127208494, iteration: 360674
loss: 0.9928233623504639,grad_norm: 0.716912786058991, iteration: 360675
loss: 1.0491911172866821,grad_norm: 0.8758598931643656, iteration: 360676
loss: 1.0151698589324951,grad_norm: 0.7942091230430348, iteration: 360677
loss: 0.9603563547134399,grad_norm: 0.7063402214106478, iteration: 360678
loss: 0.9574247598648071,grad_norm: 0.9441923887350014, iteration: 360679
loss: 0.9789331555366516,grad_norm: 0.8769769477044359, iteration: 360680
loss: 1.1319472789764404,grad_norm: 0.9960728619981423, iteration: 360681
loss: 1.0327917337417603,grad_norm: 0.8277192602374762, iteration: 360682
loss: 0.9740957021713257,grad_norm: 0.7650360273828155, iteration: 360683
loss: 0.9910160899162292,grad_norm: 0.7533054554445789, iteration: 360684
loss: 0.9996406435966492,grad_norm: 0.9514958319695568, iteration: 360685
loss: 0.9928959608078003,grad_norm: 0.9128832490009644, iteration: 360686
loss: 1.1041016578674316,grad_norm: 0.9999994254275082, iteration: 360687
loss: 1.0276139974594116,grad_norm: 0.999999302430681, iteration: 360688
loss: 1.0324606895446777,grad_norm: 0.8565115014041143, iteration: 360689
loss: 1.0179164409637451,grad_norm: 0.856460668840911, iteration: 360690
loss: 0.9835382103919983,grad_norm: 0.9710350170573534, iteration: 360691
loss: 0.970397412776947,grad_norm: 0.7916968291907395, iteration: 360692
loss: 1.0034904479980469,grad_norm: 0.9999991804772178, iteration: 360693
loss: 1.0290279388427734,grad_norm: 0.9206212376539394, iteration: 360694
loss: 1.0141255855560303,grad_norm: 0.7446019675079939, iteration: 360695
loss: 0.9953488707542419,grad_norm: 0.8880280273311917, iteration: 360696
loss: 0.9884040355682373,grad_norm: 0.8837044661439393, iteration: 360697
loss: 1.0107238292694092,grad_norm: 0.9163268501926543, iteration: 360698
loss: 1.0482569932937622,grad_norm: 0.999999119373469, iteration: 360699
loss: 0.9783298969268799,grad_norm: 0.9999990517899633, iteration: 360700
loss: 1.0014549493789673,grad_norm: 0.9220707588739668, iteration: 360701
loss: 1.0067964792251587,grad_norm: 0.7846264643355083, iteration: 360702
loss: 1.028106927871704,grad_norm: 0.6885936197493013, iteration: 360703
loss: 1.002441644668579,grad_norm: 0.6861762966857873, iteration: 360704
loss: 1.0242186784744263,grad_norm: 0.8844734325200105, iteration: 360705
loss: 1.0410468578338623,grad_norm: 0.823107069433355, iteration: 360706
loss: 1.0985790491104126,grad_norm: 0.9999990864514473, iteration: 360707
loss: 1.0179365873336792,grad_norm: 0.8136603375407947, iteration: 360708
loss: 1.0256903171539307,grad_norm: 0.917843431049751, iteration: 360709
loss: 0.9744709730148315,grad_norm: 0.5793700947500842, iteration: 360710
loss: 0.9945348501205444,grad_norm: 0.9999995152780168, iteration: 360711
loss: 1.1132621765136719,grad_norm: 0.9999999409488595, iteration: 360712
loss: 1.0656871795654297,grad_norm: 0.8357640351135407, iteration: 360713
loss: 1.001166820526123,grad_norm: 0.7594979444104807, iteration: 360714
loss: 1.0020920038223267,grad_norm: 0.7420347802492909, iteration: 360715
loss: 0.9828636646270752,grad_norm: 0.7874029814709076, iteration: 360716
loss: 0.9971715211868286,grad_norm: 0.8678576463142971, iteration: 360717
loss: 1.0293138027191162,grad_norm: 0.9999998384765371, iteration: 360718
loss: 1.0072154998779297,grad_norm: 0.7303841095960948, iteration: 360719
loss: 0.9713831543922424,grad_norm: 0.7411327401870694, iteration: 360720
loss: 1.0031059980392456,grad_norm: 0.9561583136519234, iteration: 360721
loss: 0.9957115054130554,grad_norm: 0.9134120831661209, iteration: 360722
loss: 1.077236533164978,grad_norm: 0.8912169247758707, iteration: 360723
loss: 1.0037388801574707,grad_norm: 0.7834379613607196, iteration: 360724
loss: 1.0000567436218262,grad_norm: 0.7920338229049395, iteration: 360725
loss: 0.9784075021743774,grad_norm: 0.8712795938948955, iteration: 360726
loss: 0.9879264235496521,grad_norm: 0.8181905712371434, iteration: 360727
loss: 1.0169402360916138,grad_norm: 0.8351242705545828, iteration: 360728
loss: 0.9907101392745972,grad_norm: 0.7465066682229381, iteration: 360729
loss: 0.9798269271850586,grad_norm: 0.9019204790143474, iteration: 360730
loss: 0.9867307543754578,grad_norm: 0.8697733275557066, iteration: 360731
loss: 1.0106662511825562,grad_norm: 0.8564117756797591, iteration: 360732
loss: 1.0135383605957031,grad_norm: 0.8430881645722756, iteration: 360733
loss: 1.010727047920227,grad_norm: 0.7216494857336215, iteration: 360734
loss: 1.0001581907272339,grad_norm: 0.9999992013497989, iteration: 360735
loss: 1.0004807710647583,grad_norm: 0.7589891443812521, iteration: 360736
loss: 1.020272970199585,grad_norm: 0.8776859136127897, iteration: 360737
loss: 1.0199904441833496,grad_norm: 0.8022374424669493, iteration: 360738
loss: 0.9993994235992432,grad_norm: 0.8522809719460596, iteration: 360739
loss: 0.9837104678153992,grad_norm: 0.8181482139564583, iteration: 360740
loss: 1.0296869277954102,grad_norm: 0.9999994577311189, iteration: 360741
loss: 1.0138264894485474,grad_norm: 0.8462834645858294, iteration: 360742
loss: 1.0268747806549072,grad_norm: 0.7423824131065156, iteration: 360743
loss: 1.0338923931121826,grad_norm: 0.9999994674648327, iteration: 360744
loss: 1.0927786827087402,grad_norm: 0.9999990591268338, iteration: 360745
loss: 0.9684178233146667,grad_norm: 0.9534738967205622, iteration: 360746
loss: 1.0004785060882568,grad_norm: 0.754851329682502, iteration: 360747
loss: 1.002944827079773,grad_norm: 0.7635320725353676, iteration: 360748
loss: 0.9974471926689148,grad_norm: 0.9434969617747418, iteration: 360749
loss: 1.0269618034362793,grad_norm: 0.7986827018629143, iteration: 360750
loss: 1.0216758251190186,grad_norm: 0.8265011453866211, iteration: 360751
loss: 0.982090950012207,grad_norm: 0.7480364197019289, iteration: 360752
loss: 0.9707896113395691,grad_norm: 0.8455371299569104, iteration: 360753
loss: 0.9966478943824768,grad_norm: 0.9999991125192503, iteration: 360754
loss: 1.0470335483551025,grad_norm: 0.9999990687258565, iteration: 360755
loss: 1.0049463510513306,grad_norm: 0.9999992545731756, iteration: 360756
loss: 1.0596898794174194,grad_norm: 0.9999996918316155, iteration: 360757
loss: 1.0051684379577637,grad_norm: 0.8453016916353228, iteration: 360758
loss: 1.0271937847137451,grad_norm: 0.7136143319860043, iteration: 360759
loss: 1.0644726753234863,grad_norm: 0.9999991992568805, iteration: 360760
loss: 1.0603922605514526,grad_norm: 0.9999992419737852, iteration: 360761
loss: 0.9790521264076233,grad_norm: 0.9173259293983936, iteration: 360762
loss: 0.9896458983421326,grad_norm: 0.7295981695035678, iteration: 360763
loss: 1.033347487449646,grad_norm: 0.7939247238306945, iteration: 360764
loss: 1.0177081823349,grad_norm: 0.6806548375494483, iteration: 360765
loss: 1.0003057718276978,grad_norm: 0.8224809182259121, iteration: 360766
loss: 1.0094237327575684,grad_norm: 0.9999996278970887, iteration: 360767
loss: 1.0206122398376465,grad_norm: 0.7810492848762214, iteration: 360768
loss: 1.0110732316970825,grad_norm: 0.7636782813541353, iteration: 360769
loss: 0.974878191947937,grad_norm: 0.6901342306576026, iteration: 360770
loss: 0.9823485612869263,grad_norm: 0.6768697016257179, iteration: 360771
loss: 1.0137923955917358,grad_norm: 0.7989764632604295, iteration: 360772
loss: 1.033659815788269,grad_norm: 0.8155880330486917, iteration: 360773
loss: 1.0556206703186035,grad_norm: 0.9999997619337528, iteration: 360774
loss: 1.0076128244400024,grad_norm: 0.9711319725447388, iteration: 360775
loss: 0.9971248507499695,grad_norm: 0.9099278952114015, iteration: 360776
loss: 0.9742463827133179,grad_norm: 0.9999999794049707, iteration: 360777
loss: 0.9927995204925537,grad_norm: 0.754374901458865, iteration: 360778
loss: 0.9970454573631287,grad_norm: 0.7432956030387196, iteration: 360779
loss: 0.9988625645637512,grad_norm: 0.8444739019838395, iteration: 360780
loss: 0.9990774989128113,grad_norm: 0.9543441001241297, iteration: 360781
loss: 0.9997522830963135,grad_norm: 0.7747258350938216, iteration: 360782
loss: 1.0079970359802246,grad_norm: 0.8166246819588556, iteration: 360783
loss: 1.000328779220581,grad_norm: 0.9999994880282498, iteration: 360784
loss: 0.9898577332496643,grad_norm: 0.8778817640686393, iteration: 360785
loss: 0.9578197598457336,grad_norm: 0.6439922568231895, iteration: 360786
loss: 1.0531902313232422,grad_norm: 0.6386978399855784, iteration: 360787
loss: 1.0028012990951538,grad_norm: 0.8774894978361059, iteration: 360788
loss: 1.019301176071167,grad_norm: 0.7812616635368352, iteration: 360789
loss: 1.086001992225647,grad_norm: 0.9999991471943448, iteration: 360790
loss: 1.0148543119430542,grad_norm: 0.8354439021282484, iteration: 360791
loss: 0.9946816563606262,grad_norm: 0.8347302326920303, iteration: 360792
loss: 1.0181909799575806,grad_norm: 0.7068069083837811, iteration: 360793
loss: 1.0021194219589233,grad_norm: 0.8626318454658519, iteration: 360794
loss: 1.0159775018692017,grad_norm: 0.8573079766480778, iteration: 360795
loss: 0.9769847989082336,grad_norm: 0.8505137609647649, iteration: 360796
loss: 1.013198971748352,grad_norm: 0.747049508213591, iteration: 360797
loss: 1.0540086030960083,grad_norm: 0.9999998798846893, iteration: 360798
loss: 1.0262209177017212,grad_norm: 0.8096669112128085, iteration: 360799
loss: 0.9946643710136414,grad_norm: 0.9068899407106545, iteration: 360800
loss: 1.010945200920105,grad_norm: 0.7436339520804082, iteration: 360801
loss: 0.9749578833580017,grad_norm: 0.8135103070260826, iteration: 360802
loss: 1.0472733974456787,grad_norm: 0.7823437104881396, iteration: 360803
loss: 1.1378315687179565,grad_norm: 0.9999996381428199, iteration: 360804
loss: 1.070088505744934,grad_norm: 0.9999991793802258, iteration: 360805
loss: 1.0194740295410156,grad_norm: 0.9999998396823004, iteration: 360806
loss: 0.9834362864494324,grad_norm: 0.7538121437443687, iteration: 360807
loss: 0.9799648523330688,grad_norm: 0.824742657442416, iteration: 360808
loss: 1.0062384605407715,grad_norm: 0.7712695678540982, iteration: 360809
loss: 0.9849539995193481,grad_norm: 0.7860713653134577, iteration: 360810
loss: 1.00883150100708,grad_norm: 0.779209888958959, iteration: 360811
loss: 0.9442231059074402,grad_norm: 0.8231995163219139, iteration: 360812
loss: 1.1198502779006958,grad_norm: 0.9999994393604371, iteration: 360813
loss: 0.9676384329795837,grad_norm: 0.9999998101616856, iteration: 360814
loss: 1.0115046501159668,grad_norm: 0.794940994685991, iteration: 360815
loss: 0.9909371733665466,grad_norm: 0.9509542816262851, iteration: 360816
loss: 0.9883604049682617,grad_norm: 0.882521869486821, iteration: 360817
loss: 1.0360709428787231,grad_norm: 0.9999991314194395, iteration: 360818
loss: 1.0288684368133545,grad_norm: 0.7903017595758943, iteration: 360819
loss: 0.9669840931892395,grad_norm: 0.7648367702258492, iteration: 360820
loss: 0.9526391625404358,grad_norm: 0.7190396220712059, iteration: 360821
loss: 0.9964275360107422,grad_norm: 0.999999125712918, iteration: 360822
loss: 1.0148807764053345,grad_norm: 0.999999133189338, iteration: 360823
loss: 1.0327941179275513,grad_norm: 0.8223640715946815, iteration: 360824
loss: 1.0285239219665527,grad_norm: 0.7745890736024582, iteration: 360825
loss: 1.2287546396255493,grad_norm: 0.9999990246054232, iteration: 360826
loss: 1.1182243824005127,grad_norm: 0.9999999049592462, iteration: 360827
loss: 0.9889369606971741,grad_norm: 0.8535863865669029, iteration: 360828
loss: 0.970639705657959,grad_norm: 0.8116212328256349, iteration: 360829
loss: 1.016822338104248,grad_norm: 0.9999993549199256, iteration: 360830
loss: 1.0608261823654175,grad_norm: 0.999999651134616, iteration: 360831
loss: 1.1320236921310425,grad_norm: 0.9999998302216474, iteration: 360832
loss: 1.0379266738891602,grad_norm: 1.000000008327572, iteration: 360833
loss: 0.980095624923706,grad_norm: 0.7026569347844781, iteration: 360834
loss: 1.1556565761566162,grad_norm: 0.9999994485143934, iteration: 360835
loss: 1.0073150396347046,grad_norm: 0.827735285812339, iteration: 360836
loss: 0.999549388885498,grad_norm: 0.7166848293834951, iteration: 360837
loss: 0.9917422533035278,grad_norm: 0.745510653109613, iteration: 360838
loss: 1.0042445659637451,grad_norm: 0.7480242044888543, iteration: 360839
loss: 1.0136104822158813,grad_norm: 0.8227125869274574, iteration: 360840
loss: 0.998982846736908,grad_norm: 0.8499115395809899, iteration: 360841
loss: 1.0198438167572021,grad_norm: 0.8049094731508011, iteration: 360842
loss: 1.0467255115509033,grad_norm: 0.9266581015766452, iteration: 360843
loss: 0.9777714610099792,grad_norm: 0.8277095316575783, iteration: 360844
loss: 1.0061875581741333,grad_norm: 0.9558099929549697, iteration: 360845
loss: 1.0237582921981812,grad_norm: 0.861541225422657, iteration: 360846
loss: 0.9823400378227234,grad_norm: 0.9859530157487991, iteration: 360847
loss: 1.0541504621505737,grad_norm: 0.9999990232413711, iteration: 360848
loss: 1.0238358974456787,grad_norm: 0.7736228353882154, iteration: 360849
loss: 1.0190305709838867,grad_norm: 0.7911796939979034, iteration: 360850
loss: 1.066563367843628,grad_norm: 0.9999996258481211, iteration: 360851
loss: 1.001064658164978,grad_norm: 0.7855700302126128, iteration: 360852
loss: 0.992306649684906,grad_norm: 0.7383016703715779, iteration: 360853
loss: 1.010076642036438,grad_norm: 0.8707131730290351, iteration: 360854
loss: 1.0322140455245972,grad_norm: 0.7881278322523694, iteration: 360855
loss: 0.9858644604682922,grad_norm: 0.8405078387981395, iteration: 360856
loss: 1.032406210899353,grad_norm: 0.8119188625185221, iteration: 360857
loss: 1.0797123908996582,grad_norm: 0.9999993632066304, iteration: 360858
loss: 1.0214641094207764,grad_norm: 0.8583664197228482, iteration: 360859
loss: 0.9673864841461182,grad_norm: 0.8921623471408302, iteration: 360860
loss: 0.9538536071777344,grad_norm: 0.8155313364801346, iteration: 360861
loss: 0.9780659079551697,grad_norm: 0.9016780404854386, iteration: 360862
loss: 1.0755449533462524,grad_norm: 0.8401100128549687, iteration: 360863
loss: 0.9848684668540955,grad_norm: 0.8464114429453012, iteration: 360864
loss: 0.9719677567481995,grad_norm: 0.8002778753298062, iteration: 360865
loss: 1.0356813669204712,grad_norm: 0.8827445100044125, iteration: 360866
loss: 1.0087597370147705,grad_norm: 0.9999997016928579, iteration: 360867
loss: 0.9606776237487793,grad_norm: 0.9363003645883187, iteration: 360868
loss: 1.036690592765808,grad_norm: 0.7900571809788587, iteration: 360869
loss: 1.004314661026001,grad_norm: 0.8618519472916784, iteration: 360870
loss: 1.030523657798767,grad_norm: 0.70056428477221, iteration: 360871
loss: 1.0338129997253418,grad_norm: 0.7867771877373007, iteration: 360872
loss: 0.9999971389770508,grad_norm: 0.9277586005177515, iteration: 360873
loss: 1.0007655620574951,grad_norm: 0.7080797821849492, iteration: 360874
loss: 1.0611473321914673,grad_norm: 0.8071413403071952, iteration: 360875
loss: 1.0023373365402222,grad_norm: 0.7361617676694495, iteration: 360876
loss: 1.0124597549438477,grad_norm: 0.9874308148823195, iteration: 360877
loss: 1.0230958461761475,grad_norm: 0.7846388151103288, iteration: 360878
loss: 0.9918464422225952,grad_norm: 0.8412895203897359, iteration: 360879
loss: 1.0602110624313354,grad_norm: 0.8229324019597516, iteration: 360880
loss: 1.2047325372695923,grad_norm: 0.9999997439846964, iteration: 360881
loss: 0.9924702644348145,grad_norm: 0.9456566097674747, iteration: 360882
loss: 1.021997332572937,grad_norm: 0.8450512644867154, iteration: 360883
loss: 0.9872856140136719,grad_norm: 0.753978057608374, iteration: 360884
loss: 1.0252701044082642,grad_norm: 0.9893319680242767, iteration: 360885
loss: 1.0106253623962402,grad_norm: 0.9211578927299223, iteration: 360886
loss: 0.9742798209190369,grad_norm: 0.6804197484288421, iteration: 360887
loss: 0.9888589382171631,grad_norm: 0.9048961793684137, iteration: 360888
loss: 0.9890971779823303,grad_norm: 0.7162165619463959, iteration: 360889
loss: 1.030164361000061,grad_norm: 0.9999991933217363, iteration: 360890
loss: 1.017188549041748,grad_norm: 0.8487586842609889, iteration: 360891
loss: 0.9952594637870789,grad_norm: 0.7853579385092824, iteration: 360892
loss: 0.9841175079345703,grad_norm: 0.7824269875108181, iteration: 360893
loss: 0.9935351014137268,grad_norm: 0.8464081982265401, iteration: 360894
loss: 1.0115315914154053,grad_norm: 0.8819340491650378, iteration: 360895
loss: 0.9940364360809326,grad_norm: 0.835173836470459, iteration: 360896
loss: 0.9851353764533997,grad_norm: 0.8044007093603103, iteration: 360897
loss: 1.0253446102142334,grad_norm: 0.9208190052676569, iteration: 360898
loss: 1.0044105052947998,grad_norm: 0.8371976091004096, iteration: 360899
loss: 0.9758811593055725,grad_norm: 0.7462687796222265, iteration: 360900
loss: 1.0315678119659424,grad_norm: 0.762058499286965, iteration: 360901
loss: 1.0074928998947144,grad_norm: 0.8003085694232004, iteration: 360902
loss: 1.0514633655548096,grad_norm: 0.9999999107770075, iteration: 360903
loss: 0.996437132358551,grad_norm: 0.7884557307397932, iteration: 360904
loss: 0.9787173867225647,grad_norm: 0.7407228980231183, iteration: 360905
loss: 1.0002690553665161,grad_norm: 0.7543548256580981, iteration: 360906
loss: 1.0284863710403442,grad_norm: 0.9867675334456109, iteration: 360907
loss: 1.0732667446136475,grad_norm: 0.9184441236324957, iteration: 360908
loss: 1.0095585584640503,grad_norm: 0.8310272264521247, iteration: 360909
loss: 0.9978654980659485,grad_norm: 0.9228172338366705, iteration: 360910
loss: 0.9944078922271729,grad_norm: 0.9802357743010666, iteration: 360911
loss: 1.012497067451477,grad_norm: 0.9099616983900294, iteration: 360912
loss: 1.0236128568649292,grad_norm: 0.9111263804526356, iteration: 360913
loss: 0.991024911403656,grad_norm: 0.7151299223219421, iteration: 360914
loss: 0.9792299866676331,grad_norm: 0.8213424135743443, iteration: 360915
loss: 0.9926225543022156,grad_norm: 0.8037009029774801, iteration: 360916
loss: 0.9806217551231384,grad_norm: 0.7859185990397956, iteration: 360917
loss: 0.9627184271812439,grad_norm: 0.815429640545202, iteration: 360918
loss: 1.0004469156265259,grad_norm: 0.987258342511128, iteration: 360919
loss: 0.9982041120529175,grad_norm: 0.7657845424514634, iteration: 360920
loss: 1.0400389432907104,grad_norm: 0.9284262154894934, iteration: 360921
loss: 1.055976390838623,grad_norm: 0.9999997502618534, iteration: 360922
loss: 1.2825506925582886,grad_norm: 0.9999994247912343, iteration: 360923
loss: 0.9742525219917297,grad_norm: 0.9416211864034435, iteration: 360924
loss: 0.984314501285553,grad_norm: 0.8711208959069928, iteration: 360925
loss: 1.0231105089187622,grad_norm: 0.8686047497033637, iteration: 360926
loss: 0.9752134680747986,grad_norm: 0.7552970219942665, iteration: 360927
loss: 0.9848738312721252,grad_norm: 0.8049218126762102, iteration: 360928
loss: 0.9954037666320801,grad_norm: 0.7773027738986775, iteration: 360929
loss: 1.0067102909088135,grad_norm: 0.781840370576019, iteration: 360930
loss: 1.008022427558899,grad_norm: 0.9926334101467, iteration: 360931
loss: 0.9912825226783752,grad_norm: 0.8350009339240805, iteration: 360932
loss: 1.006397008895874,grad_norm: 0.7696049354483488, iteration: 360933
loss: 1.0642322301864624,grad_norm: 0.7925606907774039, iteration: 360934
loss: 1.012319564819336,grad_norm: 0.8929263894053717, iteration: 360935
loss: 0.9977774620056152,grad_norm: 0.9249613058371288, iteration: 360936
loss: 1.0551143884658813,grad_norm: 0.9999994811350382, iteration: 360937
loss: 1.0435235500335693,grad_norm: 0.8810555500437186, iteration: 360938
loss: 1.035798192024231,grad_norm: 0.8876038416653841, iteration: 360939
loss: 0.9702061414718628,grad_norm: 0.8835683919427331, iteration: 360940
loss: 1.0226892232894897,grad_norm: 0.9999989956036367, iteration: 360941
loss: 0.9931653141975403,grad_norm: 0.9563382104402796, iteration: 360942
loss: 1.0137732028961182,grad_norm: 0.8731816248270826, iteration: 360943
loss: 1.0046237707138062,grad_norm: 0.7579987775045445, iteration: 360944
loss: 1.0193731784820557,grad_norm: 0.9999990779792205, iteration: 360945
loss: 0.9696779251098633,grad_norm: 0.7334597731845721, iteration: 360946
loss: 1.0184876918792725,grad_norm: 0.9063117402252877, iteration: 360947
loss: 0.9828761219978333,grad_norm: 0.8199728312562284, iteration: 360948
loss: 0.9577223658561707,grad_norm: 0.7677705933477992, iteration: 360949
loss: 1.0755503177642822,grad_norm: 0.9999995636564653, iteration: 360950
loss: 0.9690185785293579,grad_norm: 0.7907558220812414, iteration: 360951
loss: 1.0782570838928223,grad_norm: 0.9999994874424234, iteration: 360952
loss: 0.9934094548225403,grad_norm: 0.7689015409370371, iteration: 360953
loss: 0.9911251068115234,grad_norm: 0.8102320913403583, iteration: 360954
loss: 0.9897847175598145,grad_norm: 0.8832680091117925, iteration: 360955
loss: 0.9873570203781128,grad_norm: 0.8452762422540646, iteration: 360956
loss: 1.0218065977096558,grad_norm: 0.7367689631789673, iteration: 360957
loss: 0.9756180644035339,grad_norm: 0.8034678167218146, iteration: 360958
loss: 0.992830216884613,grad_norm: 0.9034103019421129, iteration: 360959
loss: 1.0422943830490112,grad_norm: 0.7177253135412064, iteration: 360960
loss: 0.986824095249176,grad_norm: 0.850051678975867, iteration: 360961
loss: 0.9872320294380188,grad_norm: 0.6646517996327588, iteration: 360962
loss: 0.9806464314460754,grad_norm: 0.9999988879877101, iteration: 360963
loss: 0.969847559928894,grad_norm: 0.7241462272675486, iteration: 360964
loss: 1.0817044973373413,grad_norm: 0.9999995467980886, iteration: 360965
loss: 1.0001651048660278,grad_norm: 0.8866484768240792, iteration: 360966
loss: 1.016741156578064,grad_norm: 0.9999999310901546, iteration: 360967
loss: 0.9876282811164856,grad_norm: 0.8517837541729414, iteration: 360968
loss: 1.016219973564148,grad_norm: 0.9999990442277378, iteration: 360969
loss: 0.9897510409355164,grad_norm: 0.7619771718913043, iteration: 360970
loss: 0.989512026309967,grad_norm: 0.8476518859460823, iteration: 360971
loss: 0.9915478825569153,grad_norm: 0.8180348531701795, iteration: 360972
loss: 0.996647298336029,grad_norm: 0.9999990538306172, iteration: 360973
loss: 1.012304425239563,grad_norm: 0.7599868546924713, iteration: 360974
loss: 1.0233045816421509,grad_norm: 0.8479432548388183, iteration: 360975
loss: 1.151518702507019,grad_norm: 0.8476383213051382, iteration: 360976
loss: 1.0107200145721436,grad_norm: 0.7635112725668879, iteration: 360977
loss: 0.9682327508926392,grad_norm: 0.8137660715120957, iteration: 360978
loss: 0.994531512260437,grad_norm: 0.743524827463629, iteration: 360979
loss: 0.9978326559066772,grad_norm: 0.8070957733171438, iteration: 360980
loss: 0.9758096933364868,grad_norm: 0.7832354892500997, iteration: 360981
loss: 1.0045355558395386,grad_norm: 0.8971225337684601, iteration: 360982
loss: 1.0185760259628296,grad_norm: 0.914625931923071, iteration: 360983
loss: 1.0879310369491577,grad_norm: 0.9999995076799227, iteration: 360984
loss: 1.0273513793945312,grad_norm: 0.9999991364934628, iteration: 360985
loss: 1.01947820186615,grad_norm: 0.9999992956258819, iteration: 360986
loss: 1.0196034908294678,grad_norm: 0.7321200606223848, iteration: 360987
loss: 0.9767700433731079,grad_norm: 0.7862647349412981, iteration: 360988
loss: 0.9970152974128723,grad_norm: 0.8146074289852848, iteration: 360989
loss: 1.0339548587799072,grad_norm: 0.9224145586124483, iteration: 360990
loss: 0.9497236609458923,grad_norm: 0.843642727578699, iteration: 360991
loss: 0.9843804240226746,grad_norm: 0.8778018808665841, iteration: 360992
loss: 1.0030912160873413,grad_norm: 0.7834158322518181, iteration: 360993
loss: 1.0113991498947144,grad_norm: 0.8426788237121137, iteration: 360994
loss: 1.0191301107406616,grad_norm: 0.8143508611471991, iteration: 360995
loss: 1.0621557235717773,grad_norm: 0.6978355205559809, iteration: 360996
loss: 0.9803775548934937,grad_norm: 0.7941888871903948, iteration: 360997
loss: 0.988555371761322,grad_norm: 0.8067288694496423, iteration: 360998
loss: 0.9944009780883789,grad_norm: 0.8322869045029185, iteration: 360999
loss: 0.9902502298355103,grad_norm: 0.8612077771221864, iteration: 361000
loss: 1.2190371751785278,grad_norm: 0.999999982640185, iteration: 361001
loss: 1.0013240575790405,grad_norm: 0.8266817750508625, iteration: 361002
loss: 0.9532443284988403,grad_norm: 0.98256955380668, iteration: 361003
loss: 1.0646135807037354,grad_norm: 0.8739468223514973, iteration: 361004
loss: 1.0079691410064697,grad_norm: 0.7804111192819838, iteration: 361005
loss: 0.965996265411377,grad_norm: 0.739753989209675, iteration: 361006
loss: 1.019396185874939,grad_norm: 0.9082510541830826, iteration: 361007
loss: 1.0108340978622437,grad_norm: 0.7498027040712939, iteration: 361008
loss: 0.9830251336097717,grad_norm: 0.9521581399179277, iteration: 361009
loss: 0.9952445030212402,grad_norm: 0.8142141122664666, iteration: 361010
loss: 1.0008095502853394,grad_norm: 0.9714691316331155, iteration: 361011
loss: 1.0264194011688232,grad_norm: 0.9999991933163331, iteration: 361012
loss: 1.0092532634735107,grad_norm: 0.8993994921321072, iteration: 361013
loss: 0.997052013874054,grad_norm: 0.7897175209102276, iteration: 361014
loss: 0.9838957190513611,grad_norm: 0.9190629318119914, iteration: 361015
loss: 0.9632734060287476,grad_norm: 0.8889450455641115, iteration: 361016
loss: 0.9891842007637024,grad_norm: 0.7687575121014509, iteration: 361017
loss: 1.0251820087432861,grad_norm: 0.999999870744038, iteration: 361018
loss: 1.0058280229568481,grad_norm: 0.8051905283754991, iteration: 361019
loss: 0.9764313101768494,grad_norm: 0.6873905167321744, iteration: 361020
loss: 1.0283145904541016,grad_norm: 0.7744546667979114, iteration: 361021
loss: 1.0082995891571045,grad_norm: 0.8477586367601956, iteration: 361022
loss: 1.3491019010543823,grad_norm: 1.000000107708336, iteration: 361023
loss: 0.9411284327507019,grad_norm: 0.7283553345691582, iteration: 361024
loss: 1.0131202936172485,grad_norm: 0.8666515162565337, iteration: 361025
loss: 0.9993938207626343,grad_norm: 0.8630072495697797, iteration: 361026
loss: 0.9924083352088928,grad_norm: 0.9999998997243107, iteration: 361027
loss: 0.9899815320968628,grad_norm: 0.7163282962417706, iteration: 361028
loss: 0.9987046122550964,grad_norm: 0.8816884021543703, iteration: 361029
loss: 1.0411994457244873,grad_norm: 0.9999997373520373, iteration: 361030
loss: 1.0044536590576172,grad_norm: 0.8847615156805968, iteration: 361031
loss: 1.0421299934387207,grad_norm: 0.7786585214997114, iteration: 361032
loss: 0.9651820063591003,grad_norm: 0.7702659963056223, iteration: 361033
loss: 1.015250563621521,grad_norm: 0.7389204107353332, iteration: 361034
loss: 1.005937933921814,grad_norm: 0.749361398938691, iteration: 361035
loss: 0.9646180272102356,grad_norm: 0.9010211614451712, iteration: 361036
loss: 1.0016037225723267,grad_norm: 0.8637854586835156, iteration: 361037
loss: 1.0389679670333862,grad_norm: 0.9999997578148747, iteration: 361038
loss: 1.054324984550476,grad_norm: 0.9999996236518741, iteration: 361039
loss: 0.9841915965080261,grad_norm: 0.8499224430451929, iteration: 361040
loss: 1.013288140296936,grad_norm: 0.8501420979247856, iteration: 361041
loss: 1.0275040864944458,grad_norm: 0.7577894886639187, iteration: 361042
loss: 0.9642032384872437,grad_norm: 0.8236600459239678, iteration: 361043
loss: 1.0125004053115845,grad_norm: 0.9999991774962654, iteration: 361044
loss: 0.993337094783783,grad_norm: 0.7672765002691965, iteration: 361045
loss: 0.982336163520813,grad_norm: 0.7776254292844887, iteration: 361046
loss: 1.0203920602798462,grad_norm: 0.9518519803952252, iteration: 361047
loss: 0.9793826341629028,grad_norm: 0.996140250176073, iteration: 361048
loss: 1.056061863899231,grad_norm: 0.7294295315357116, iteration: 361049
loss: 1.0729511976242065,grad_norm: 0.870911204359272, iteration: 361050
loss: 1.0006217956542969,grad_norm: 0.9251654409258386, iteration: 361051
loss: 1.0078145265579224,grad_norm: 0.777753317717737, iteration: 361052
loss: 1.0226716995239258,grad_norm: 0.8881223576154517, iteration: 361053
loss: 0.9571040868759155,grad_norm: 0.979253461744044, iteration: 361054
loss: 1.014737606048584,grad_norm: 0.8722704596007145, iteration: 361055
loss: 1.0124499797821045,grad_norm: 0.7252359690313538, iteration: 361056
loss: 1.0006933212280273,grad_norm: 0.8161547132324728, iteration: 361057
loss: 0.9600507616996765,grad_norm: 0.9191227799946626, iteration: 361058
loss: 0.9907932281494141,grad_norm: 0.9524671840502565, iteration: 361059
loss: 0.9542691111564636,grad_norm: 0.9560896787739641, iteration: 361060
loss: 1.0425941944122314,grad_norm: 0.9752702056381346, iteration: 361061
loss: 0.9801803827285767,grad_norm: 0.816364935488781, iteration: 361062
loss: 0.9725462794303894,grad_norm: 0.794476984644876, iteration: 361063
loss: 0.9962118864059448,grad_norm: 0.9451391193509909, iteration: 361064
loss: 0.9659565091133118,grad_norm: 0.8144300391316698, iteration: 361065
loss: 1.0268239974975586,grad_norm: 0.9999993791572868, iteration: 361066
loss: 1.0842499732971191,grad_norm: 0.999999364640474, iteration: 361067
loss: 1.0277979373931885,grad_norm: 0.9050108040681472, iteration: 361068
loss: 1.1006747484207153,grad_norm: 0.9999995348135481, iteration: 361069
loss: 1.0142996311187744,grad_norm: 0.8105177307168011, iteration: 361070
loss: 1.0086232423782349,grad_norm: 0.7086916978100833, iteration: 361071
loss: 1.0536152124404907,grad_norm: 0.7877442998049348, iteration: 361072
loss: 1.0472854375839233,grad_norm: 0.9033522008189121, iteration: 361073
loss: 1.0863797664642334,grad_norm: 0.9999998516884132, iteration: 361074
loss: 1.0890101194381714,grad_norm: 0.9999994198454821, iteration: 361075
loss: 0.989848792552948,grad_norm: 0.7145248290382179, iteration: 361076
loss: 1.0502066612243652,grad_norm: 0.9999991111699988, iteration: 361077
loss: 1.0267466306686401,grad_norm: 0.7808285608349083, iteration: 361078
loss: 1.2650457620620728,grad_norm: 0.9999996721616873, iteration: 361079
loss: 0.978120744228363,grad_norm: 0.8788308511217409, iteration: 361080
loss: 1.180578351020813,grad_norm: 0.9999998902095397, iteration: 361081
loss: 1.0011703968048096,grad_norm: 0.9066771749889689, iteration: 361082
loss: 0.9864261150360107,grad_norm: 0.8399967033803257, iteration: 361083
loss: 1.0692899227142334,grad_norm: 0.9999999725448974, iteration: 361084
loss: 1.0160719156265259,grad_norm: 0.8236123309288331, iteration: 361085
loss: 1.0771420001983643,grad_norm: 0.9999993834672753, iteration: 361086
loss: 1.0403265953063965,grad_norm: 0.9733929993551771, iteration: 361087
loss: 1.0225247144699097,grad_norm: 0.8999959206155704, iteration: 361088
loss: 1.053039789199829,grad_norm: 0.8953366772927439, iteration: 361089
loss: 1.0484981536865234,grad_norm: 0.9999995524219077, iteration: 361090
loss: 1.0491018295288086,grad_norm: 0.961214388103225, iteration: 361091
loss: 1.0424214601516724,grad_norm: 0.9999996429616068, iteration: 361092
loss: 1.1857659816741943,grad_norm: 0.9498715429813006, iteration: 361093
loss: 1.2016788721084595,grad_norm: 0.99999903775391, iteration: 361094
loss: 1.1179169416427612,grad_norm: 0.9999997493495933, iteration: 361095
loss: 0.9965389966964722,grad_norm: 0.7102937323483673, iteration: 361096
loss: 1.2833141088485718,grad_norm: 0.999999420097777, iteration: 361097
loss: 0.9848751425743103,grad_norm: 0.9099408751325597, iteration: 361098
loss: 1.0253801345825195,grad_norm: 0.993992528844315, iteration: 361099
loss: 1.2367876768112183,grad_norm: 0.9999999750658091, iteration: 361100
loss: 0.9929422736167908,grad_norm: 0.9160963506273089, iteration: 361101
loss: 1.0371867418289185,grad_norm: 0.9999990604175576, iteration: 361102
loss: 1.1219407320022583,grad_norm: 0.9999991602297481, iteration: 361103
loss: 1.0303184986114502,grad_norm: 0.7931440564168513, iteration: 361104
loss: 0.9996384978294373,grad_norm: 0.8019741403120371, iteration: 361105
loss: 1.0555261373519897,grad_norm: 0.9999997507533521, iteration: 361106
loss: 0.990891695022583,grad_norm: 0.9999990666335841, iteration: 361107
loss: 1.0091179609298706,grad_norm: 0.8276549427589904, iteration: 361108
loss: 1.1157063245773315,grad_norm: 0.9999999894926085, iteration: 361109
loss: 1.0306442975997925,grad_norm: 0.7742947964053909, iteration: 361110
loss: 1.0181535482406616,grad_norm: 0.9999999009460678, iteration: 361111
loss: 1.053499698638916,grad_norm: 0.9999999803281139, iteration: 361112
loss: 1.0301100015640259,grad_norm: 0.8210689175712188, iteration: 361113
loss: 1.0399458408355713,grad_norm: 0.9999992249196967, iteration: 361114
loss: 1.0017842054367065,grad_norm: 0.9999991866415031, iteration: 361115
loss: 1.0287667512893677,grad_norm: 0.773062470870777, iteration: 361116
loss: 1.0573376417160034,grad_norm: 0.9527411741268793, iteration: 361117
loss: 0.9971022605895996,grad_norm: 0.7991796611311611, iteration: 361118
loss: 0.9723476767539978,grad_norm: 0.9076016000850625, iteration: 361119
loss: 1.014523983001709,grad_norm: 0.7249265414365316, iteration: 361120
loss: 1.013938307762146,grad_norm: 0.9088107109030794, iteration: 361121
loss: 1.1396194696426392,grad_norm: 0.9999992567608469, iteration: 361122
loss: 0.9871888160705566,grad_norm: 0.772591928768402, iteration: 361123
loss: 1.0063210725784302,grad_norm: 0.7254295286507536, iteration: 361124
loss: 1.0259616374969482,grad_norm: 0.9999995746510983, iteration: 361125
loss: 1.0686033964157104,grad_norm: 0.9999993290177658, iteration: 361126
loss: 1.0651636123657227,grad_norm: 0.948976372407243, iteration: 361127
loss: 0.9922320246696472,grad_norm: 0.7489519734522938, iteration: 361128
loss: 1.1187770366668701,grad_norm: 1.0000000406117702, iteration: 361129
loss: 1.009372353553772,grad_norm: 0.7760364971523848, iteration: 361130
loss: 0.9766808748245239,grad_norm: 0.8962914437808822, iteration: 361131
loss: 1.0351520776748657,grad_norm: 0.9999991054716818, iteration: 361132
loss: 1.011090874671936,grad_norm: 0.9999990305217238, iteration: 361133
loss: 1.0519495010375977,grad_norm: 0.9999993616902721, iteration: 361134
loss: 1.0863704681396484,grad_norm: 0.8204313164165401, iteration: 361135
loss: 1.0312957763671875,grad_norm: 0.8521821521727516, iteration: 361136
loss: 1.0904124975204468,grad_norm: 0.8944571307552347, iteration: 361137
loss: 0.9934666156768799,grad_norm: 0.7161241981681606, iteration: 361138
loss: 0.9654222130775452,grad_norm: 0.9999993337560629, iteration: 361139
loss: 0.9696968197822571,grad_norm: 0.782666662997128, iteration: 361140
loss: 0.9806145429611206,grad_norm: 0.9460460580100076, iteration: 361141
loss: 1.008151650428772,grad_norm: 0.7723051182971624, iteration: 361142
loss: 0.9676300287246704,grad_norm: 0.8209715490042874, iteration: 361143
loss: 1.0499670505523682,grad_norm: 0.8167293653090671, iteration: 361144
loss: 1.0124119520187378,grad_norm: 0.7368586125403709, iteration: 361145
loss: 1.007051944732666,grad_norm: 0.9079709217262536, iteration: 361146
loss: 1.05197012424469,grad_norm: 0.9992981191552965, iteration: 361147
loss: 1.0333486795425415,grad_norm: 0.8095294930071042, iteration: 361148
loss: 0.9907584190368652,grad_norm: 0.8800104024719025, iteration: 361149
loss: 0.9267997145652771,grad_norm: 0.8385915860847418, iteration: 361150
loss: 1.0334227085113525,grad_norm: 0.8033027571418461, iteration: 361151
loss: 1.0065648555755615,grad_norm: 0.9999990467233093, iteration: 361152
loss: 0.9983500242233276,grad_norm: 0.804268531075591, iteration: 361153
loss: 1.0007693767547607,grad_norm: 0.6742185205663794, iteration: 361154
loss: 0.9828790426254272,grad_norm: 0.7025197222220337, iteration: 361155
loss: 1.003564476966858,grad_norm: 0.999999417009904, iteration: 361156
loss: 1.0185391902923584,grad_norm: 0.9554431114434156, iteration: 361157
loss: 0.9797630906105042,grad_norm: 0.6635056326243487, iteration: 361158
loss: 1.0097960233688354,grad_norm: 0.7919968727175284, iteration: 361159
loss: 1.0119760036468506,grad_norm: 0.9999998415224876, iteration: 361160
loss: 1.022751808166504,grad_norm: 0.7002417728936892, iteration: 361161
loss: 0.982074499130249,grad_norm: 0.834996432670504, iteration: 361162
loss: 1.0602179765701294,grad_norm: 0.9999991349277224, iteration: 361163
loss: 0.9669311046600342,grad_norm: 0.6909606116901233, iteration: 361164
loss: 0.9669486880302429,grad_norm: 0.8734160365954803, iteration: 361165
loss: 0.9915329813957214,grad_norm: 0.9678674647975213, iteration: 361166
loss: 1.015518307685852,grad_norm: 0.809936133474003, iteration: 361167
loss: 1.0571538209915161,grad_norm: 0.7311645202620658, iteration: 361168
loss: 0.9811798930168152,grad_norm: 0.9999992040054051, iteration: 361169
loss: 1.047275424003601,grad_norm: 0.9999996319280563, iteration: 361170
loss: 1.0091296434402466,grad_norm: 0.862800469319272, iteration: 361171
loss: 1.0943681001663208,grad_norm: 0.9999995292583757, iteration: 361172
loss: 1.0417038202285767,grad_norm: 0.9999994049176238, iteration: 361173
loss: 1.0453273057937622,grad_norm: 0.8212631732887573, iteration: 361174
loss: 1.0133934020996094,grad_norm: 0.9999990788616071, iteration: 361175
loss: 1.0000327825546265,grad_norm: 0.9220460090395175, iteration: 361176
loss: 1.0020390748977661,grad_norm: 0.8806062013690849, iteration: 361177
loss: 0.9954537153244019,grad_norm: 0.8103912420572134, iteration: 361178
loss: 1.0081238746643066,grad_norm: 0.8704829511261217, iteration: 361179
loss: 0.9785600304603577,grad_norm: 0.8504120035791511, iteration: 361180
loss: 1.1241816282272339,grad_norm: 0.9999992985117268, iteration: 361181
loss: 1.1474236249923706,grad_norm: 0.999999817327036, iteration: 361182
loss: 1.0144788026809692,grad_norm: 0.9999991972107934, iteration: 361183
loss: 0.9932886362075806,grad_norm: 0.8121136617705201, iteration: 361184
loss: 1.019187569618225,grad_norm: 0.9999991888296568, iteration: 361185
loss: 1.014206051826477,grad_norm: 0.7631717178455623, iteration: 361186
loss: 0.9898509979248047,grad_norm: 0.8359532554489878, iteration: 361187
loss: 0.9936625361442566,grad_norm: 0.8855360404264436, iteration: 361188
loss: 0.9758697152137756,grad_norm: 0.6783778821270341, iteration: 361189
loss: 1.0533939599990845,grad_norm: 0.953745735139919, iteration: 361190
loss: 0.9709557890892029,grad_norm: 0.9999991238465902, iteration: 361191
loss: 1.0161664485931396,grad_norm: 0.999999129800462, iteration: 361192
loss: 1.0027706623077393,grad_norm: 0.9999995263324911, iteration: 361193
loss: 1.0356757640838623,grad_norm: 0.6779054038328712, iteration: 361194
loss: 0.9848407506942749,grad_norm: 0.8150101436839969, iteration: 361195
loss: 0.976992130279541,grad_norm: 0.9188041607226897, iteration: 361196
loss: 1.019127368927002,grad_norm: 0.9999994170121522, iteration: 361197
loss: 1.0238910913467407,grad_norm: 0.8595102091316662, iteration: 361198
loss: 1.0601800680160522,grad_norm: 0.7201881681499833, iteration: 361199
loss: 1.0198171138763428,grad_norm: 0.9999994271439309, iteration: 361200
loss: 1.0318994522094727,grad_norm: 0.999999100462842, iteration: 361201
loss: 1.011536955833435,grad_norm: 0.9999990212608901, iteration: 361202
loss: 1.2908594608306885,grad_norm: 0.9999997738790252, iteration: 361203
loss: 0.9695553183555603,grad_norm: 0.8659543839426059, iteration: 361204
loss: 1.0061103105545044,grad_norm: 0.9599730740741155, iteration: 361205
loss: 0.9963871240615845,grad_norm: 0.8899541416448749, iteration: 361206
loss: 1.014678955078125,grad_norm: 0.8939540296466817, iteration: 361207
loss: 1.0175180435180664,grad_norm: 0.9133502366536407, iteration: 361208
loss: 0.9766129851341248,grad_norm: 0.9070692856032225, iteration: 361209
loss: 0.9981594085693359,grad_norm: 0.9405189609754617, iteration: 361210
loss: 0.9809892773628235,grad_norm: 0.8663260196057256, iteration: 361211
loss: 1.032135248184204,grad_norm: 0.9999997934589556, iteration: 361212
loss: 0.9722327589988708,grad_norm: 0.9999991128689139, iteration: 361213
loss: 1.050658106803894,grad_norm: 0.999999805963818, iteration: 361214
loss: 0.9892164468765259,grad_norm: 0.8810820350893035, iteration: 361215
loss: 0.9918692111968994,grad_norm: 0.778640601429828, iteration: 361216
loss: 0.9878466129302979,grad_norm: 0.8988242432437262, iteration: 361217
loss: 1.0153694152832031,grad_norm: 0.9999997252657193, iteration: 361218
loss: 1.0069868564605713,grad_norm: 0.8752603508107507, iteration: 361219
loss: 1.0292601585388184,grad_norm: 0.713527664021308, iteration: 361220
loss: 1.0463685989379883,grad_norm: 0.7000955822495879, iteration: 361221
loss: 1.0190786123275757,grad_norm: 0.794000918345032, iteration: 361222
loss: 1.018900990486145,grad_norm: 0.9376478123021927, iteration: 361223
loss: 1.0186837911605835,grad_norm: 0.7718864059416046, iteration: 361224
loss: 1.0023694038391113,grad_norm: 0.929624593309638, iteration: 361225
loss: 0.9612009525299072,grad_norm: 0.9999990368572109, iteration: 361226
loss: 0.9561734199523926,grad_norm: 0.7530346513027106, iteration: 361227
loss: 1.0029737949371338,grad_norm: 0.8276487593912509, iteration: 361228
loss: 1.0835261344909668,grad_norm: 0.9999993050987975, iteration: 361229
loss: 1.2337135076522827,grad_norm: 0.8538300169888962, iteration: 361230
loss: 1.0265971422195435,grad_norm: 0.9999996839988716, iteration: 361231
loss: 1.06672203540802,grad_norm: 0.9999996848531129, iteration: 361232
loss: 1.0057066679000854,grad_norm: 0.8678713669047351, iteration: 361233
loss: 1.0545401573181152,grad_norm: 0.9999997791243114, iteration: 361234
loss: 1.0264869928359985,grad_norm: 0.9999990109365897, iteration: 361235
loss: 1.0097285509109497,grad_norm: 0.7322988431093658, iteration: 361236
loss: 0.9889341592788696,grad_norm: 0.7698096731831572, iteration: 361237
loss: 0.9944270849227905,grad_norm: 0.9999992226810968, iteration: 361238
loss: 0.988701343536377,grad_norm: 0.9756540832249095, iteration: 361239
loss: 1.15571928024292,grad_norm: 0.999999836854482, iteration: 361240
loss: 1.0398266315460205,grad_norm: 0.9658530303917735, iteration: 361241
loss: 0.9876304268836975,grad_norm: 0.9999995428493093, iteration: 361242
loss: 1.0098142623901367,grad_norm: 0.9999992165375069, iteration: 361243
loss: 1.0136667490005493,grad_norm: 0.7879944938801242, iteration: 361244
loss: 1.0088499784469604,grad_norm: 0.9999994073983222, iteration: 361245
loss: 1.0123828649520874,grad_norm: 0.7673321820045786, iteration: 361246
loss: 0.9938937425613403,grad_norm: 0.7606878073313469, iteration: 361247
loss: 1.0511332750320435,grad_norm: 0.9999991869592509, iteration: 361248
loss: 1.1022933721542358,grad_norm: 0.9999993992469987, iteration: 361249
loss: 1.0267614126205444,grad_norm: 0.999999050121927, iteration: 361250
loss: 1.0078870058059692,grad_norm: 0.8018473748806441, iteration: 361251
loss: 1.009034276008606,grad_norm: 0.6719268437515262, iteration: 361252
loss: 1.0209592580795288,grad_norm: 0.99999900653107, iteration: 361253
loss: 1.0338997840881348,grad_norm: 0.9999992927028983, iteration: 361254
loss: 1.0469963550567627,grad_norm: 0.9999998900510186, iteration: 361255
loss: 1.0301880836486816,grad_norm: 0.9999992798699201, iteration: 361256
loss: 1.0007954835891724,grad_norm: 0.8839991112975241, iteration: 361257
loss: 1.0475659370422363,grad_norm: 0.9999991689034552, iteration: 361258
loss: 0.9851598143577576,grad_norm: 0.8232317049099954, iteration: 361259
loss: 1.036353349685669,grad_norm: 0.9999999675942418, iteration: 361260
loss: 1.0263056755065918,grad_norm: 0.9999993806750005, iteration: 361261
loss: 1.013755202293396,grad_norm: 0.8684458243881981, iteration: 361262
loss: 1.0150684118270874,grad_norm: 0.8448308051229473, iteration: 361263
loss: 1.0425224304199219,grad_norm: 0.8538720134900433, iteration: 361264
loss: 0.9776036739349365,grad_norm: 0.8438569971197875, iteration: 361265
loss: 1.005538821220398,grad_norm: 0.9956752714379014, iteration: 361266
loss: 0.9528756737709045,grad_norm: 0.7637734783941056, iteration: 361267
loss: 0.9843254685401917,grad_norm: 0.8898655269025655, iteration: 361268
loss: 1.14319908618927,grad_norm: 0.9999992403850567, iteration: 361269
loss: 1.0068116188049316,grad_norm: 0.9094489862259116, iteration: 361270
loss: 0.9569644927978516,grad_norm: 0.8279739188811833, iteration: 361271
loss: 0.9645076990127563,grad_norm: 0.8006243687474961, iteration: 361272
loss: 1.003461241722107,grad_norm: 0.7727727175958099, iteration: 361273
loss: 1.0378907918930054,grad_norm: 0.9999991044216245, iteration: 361274
loss: 0.9935473799705505,grad_norm: 0.839509670229585, iteration: 361275
loss: 0.977895975112915,grad_norm: 0.8816807210702845, iteration: 361276
loss: 1.047727108001709,grad_norm: 0.9999996448734901, iteration: 361277
loss: 1.0223722457885742,grad_norm: 0.9778128238619264, iteration: 361278
loss: 1.0077991485595703,grad_norm: 0.8687684543427602, iteration: 361279
loss: 1.0517185926437378,grad_norm: 0.7876265707267378, iteration: 361280
loss: 0.9771955609321594,grad_norm: 0.6872594704291813, iteration: 361281
loss: 1.1018314361572266,grad_norm: 0.9858981878580365, iteration: 361282
loss: 0.989512026309967,grad_norm: 0.8145522621214001, iteration: 361283
loss: 1.0093084573745728,grad_norm: 0.8344846483457558, iteration: 361284
loss: 0.9907646179199219,grad_norm: 0.9999994641027057, iteration: 361285
loss: 1.0116164684295654,grad_norm: 0.9999990392128614, iteration: 361286
loss: 0.968504011631012,grad_norm: 0.9999992794787752, iteration: 361287
loss: 1.0164203643798828,grad_norm: 0.9117663544452822, iteration: 361288
loss: 1.0215951204299927,grad_norm: 0.7756660046708185, iteration: 361289
loss: 1.0212949514389038,grad_norm: 0.923409784056732, iteration: 361290
loss: 1.0034223794937134,grad_norm: 0.9999990703872982, iteration: 361291
loss: 0.9838003516197205,grad_norm: 0.8219964575131136, iteration: 361292
loss: 0.9813122749328613,grad_norm: 0.999999259476639, iteration: 361293
loss: 1.032254934310913,grad_norm: 0.7753018008707361, iteration: 361294
loss: 0.9739397764205933,grad_norm: 0.9068224948021906, iteration: 361295
loss: 0.997230589389801,grad_norm: 0.9999997813180269, iteration: 361296
loss: 0.9743178486824036,grad_norm: 0.8080017694586841, iteration: 361297
loss: 1.0705296993255615,grad_norm: 0.9999992118614643, iteration: 361298
loss: 0.9866610169410706,grad_norm: 0.9831300065384065, iteration: 361299
loss: 1.1946635246276855,grad_norm: 0.9999993353438832, iteration: 361300
loss: 1.0005838871002197,grad_norm: 0.8622701506748144, iteration: 361301
loss: 1.1333791017532349,grad_norm: 0.9999997546563708, iteration: 361302
loss: 1.0102369785308838,grad_norm: 0.875983451924179, iteration: 361303
loss: 1.0177125930786133,grad_norm: 0.854698256190391, iteration: 361304
loss: 1.0053424835205078,grad_norm: 0.6723184305060738, iteration: 361305
loss: 1.1106724739074707,grad_norm: 0.9999999415730105, iteration: 361306
loss: 1.3156553506851196,grad_norm: 0.9999997607424771, iteration: 361307
loss: 0.9924264550209045,grad_norm: 0.9999992555176787, iteration: 361308
loss: 1.0982800722122192,grad_norm: 0.9999990012933082, iteration: 361309
loss: 1.0076829195022583,grad_norm: 0.8004540113649494, iteration: 361310
loss: 1.0059293508529663,grad_norm: 0.9216045432179655, iteration: 361311
loss: 1.0162873268127441,grad_norm: 0.9999997426495353, iteration: 361312
loss: 0.9978358745574951,grad_norm: 0.7141636696469731, iteration: 361313
loss: 1.0067074298858643,grad_norm: 0.7061433690757664, iteration: 361314
loss: 0.9882037043571472,grad_norm: 0.8811189452531739, iteration: 361315
loss: 1.0278339385986328,grad_norm: 0.6626592548045205, iteration: 361316
loss: 0.9919928908348083,grad_norm: 0.74956527522751, iteration: 361317
loss: 1.0290192365646362,grad_norm: 0.9999998535658297, iteration: 361318
loss: 0.9751061797142029,grad_norm: 0.8434522098121549, iteration: 361319
loss: 0.963958203792572,grad_norm: 0.8181323313855959, iteration: 361320
loss: 1.0348833799362183,grad_norm: 0.9062742430848877, iteration: 361321
loss: 1.0008788108825684,grad_norm: 0.8953650943448674, iteration: 361322
loss: 1.010867714881897,grad_norm: 0.9042564178298685, iteration: 361323
loss: 1.067183017730713,grad_norm: 1.000000020788914, iteration: 361324
loss: 1.1314505338668823,grad_norm: 0.9999997909215247, iteration: 361325
loss: 0.9935045838356018,grad_norm: 0.8938775388211165, iteration: 361326
loss: 1.120070219039917,grad_norm: 0.9999998238769502, iteration: 361327
loss: 0.9665170907974243,grad_norm: 0.724521964212591, iteration: 361328
loss: 1.0070292949676514,grad_norm: 0.8979368663380395, iteration: 361329
loss: 0.9792453050613403,grad_norm: 0.8582947684388872, iteration: 361330
loss: 1.0064233541488647,grad_norm: 0.8044160338023667, iteration: 361331
loss: 1.0029829740524292,grad_norm: 0.9999995045549679, iteration: 361332
loss: 1.0794298648834229,grad_norm: 0.9999992441788038, iteration: 361333
loss: 0.9928195476531982,grad_norm: 0.8484720005011526, iteration: 361334
loss: 1.1473195552825928,grad_norm: 0.9999998264332397, iteration: 361335
loss: 1.088615894317627,grad_norm: 0.9999999879747906, iteration: 361336
loss: 1.0274574756622314,grad_norm: 0.8362310184425463, iteration: 361337
loss: 0.9700847864151001,grad_norm: 0.7171918903169836, iteration: 361338
loss: 1.0087735652923584,grad_norm: 0.9999992855466605, iteration: 361339
loss: 0.9701433181762695,grad_norm: 0.7733519979363973, iteration: 361340
loss: 1.008422613143921,grad_norm: 0.9263870184410776, iteration: 361341
loss: 1.0955095291137695,grad_norm: 0.9999991656198101, iteration: 361342
loss: 1.0697468519210815,grad_norm: 0.9999996495343473, iteration: 361343
loss: 1.0006788969039917,grad_norm: 0.8372393679574428, iteration: 361344
loss: 0.9917294383049011,grad_norm: 0.762047415866564, iteration: 361345
loss: 1.0560109615325928,grad_norm: 0.99999983404514, iteration: 361346
loss: 0.9480871558189392,grad_norm: 0.819672756569416, iteration: 361347
loss: 0.9932998418807983,grad_norm: 0.7648496337980202, iteration: 361348
loss: 1.0035865306854248,grad_norm: 0.9999994623475376, iteration: 361349
loss: 1.010352611541748,grad_norm: 0.7974264430687136, iteration: 361350
loss: 1.2285364866256714,grad_norm: 0.9999998961358585, iteration: 361351
loss: 1.038347840309143,grad_norm: 0.829087197131712, iteration: 361352
loss: 1.0190272331237793,grad_norm: 0.795754489647803, iteration: 361353
loss: 0.956876277923584,grad_norm: 0.8389039031490653, iteration: 361354
loss: 1.0325978994369507,grad_norm: 0.7559652705991011, iteration: 361355
loss: 1.0700703859329224,grad_norm: 0.8996087457046923, iteration: 361356
loss: 0.9663860201835632,grad_norm: 0.7827843764909557, iteration: 361357
loss: 1.024593472480774,grad_norm: 0.999999398316338, iteration: 361358
loss: 0.9614387154579163,grad_norm: 0.8283505161169795, iteration: 361359
loss: 1.0256190299987793,grad_norm: 0.652227592052891, iteration: 361360
loss: 1.154186725616455,grad_norm: 0.9999993115908784, iteration: 361361
loss: 0.9645186066627502,grad_norm: 0.7314278207299925, iteration: 361362
loss: 0.9746565222740173,grad_norm: 0.8124093048770927, iteration: 361363
loss: 1.0389279127120972,grad_norm: 0.9999993723344323, iteration: 361364
loss: 1.0556448698043823,grad_norm: 0.81091026953941, iteration: 361365
loss: 1.0186576843261719,grad_norm: 0.8603073782659287, iteration: 361366
loss: 0.9999809265136719,grad_norm: 0.7653617847292762, iteration: 361367
loss: 1.0568392276763916,grad_norm: 0.9999994867659561, iteration: 361368
loss: 1.006731629371643,grad_norm: 0.8227678137271751, iteration: 361369
loss: 1.02537202835083,grad_norm: 0.7711256218006829, iteration: 361370
loss: 0.9731994867324829,grad_norm: 0.815658794536534, iteration: 361371
loss: 1.1326074600219727,grad_norm: 0.9999997716112333, iteration: 361372
loss: 1.0825408697128296,grad_norm: 0.9999998274867576, iteration: 361373
loss: 0.998500406742096,grad_norm: 0.7202598070075598, iteration: 361374
loss: 1.005123496055603,grad_norm: 0.8946526047325384, iteration: 361375
loss: 1.3313734531402588,grad_norm: 0.9999996039497978, iteration: 361376
loss: 1.12444269657135,grad_norm: 0.9999998362262763, iteration: 361377
loss: 1.150923252105713,grad_norm: 1.0000000137520144, iteration: 361378
loss: 0.9858661890029907,grad_norm: 0.7082932088428346, iteration: 361379
loss: 1.068101167678833,grad_norm: 0.9999992373243484, iteration: 361380
loss: 0.9950724840164185,grad_norm: 0.7820277803445427, iteration: 361381
loss: 0.9686386585235596,grad_norm: 0.8622849945580757, iteration: 361382
loss: 1.0302854776382446,grad_norm: 0.8487836388619512, iteration: 361383
loss: 1.0091376304626465,grad_norm: 0.9333669797049304, iteration: 361384
loss: 1.296110987663269,grad_norm: 0.9999994090692595, iteration: 361385
loss: 1.0962932109832764,grad_norm: 0.9999997892725583, iteration: 361386
loss: 1.0086349248886108,grad_norm: 0.8318498923286256, iteration: 361387
loss: 1.0241247415542603,grad_norm: 0.8227897468351372, iteration: 361388
loss: 0.9941103458404541,grad_norm: 0.7918444073311112, iteration: 361389
loss: 1.0494507551193237,grad_norm: 0.8786689877232551, iteration: 361390
loss: 1.0194709300994873,grad_norm: 0.999999080995784, iteration: 361391
loss: 1.011080265045166,grad_norm: 0.9256822853491127, iteration: 361392
loss: 0.9988836050033569,grad_norm: 0.718442333915996, iteration: 361393
loss: 1.0056512355804443,grad_norm: 0.8614686036257472, iteration: 361394
loss: 0.9998483061790466,grad_norm: 0.8508787779054306, iteration: 361395
loss: 0.9850510954856873,grad_norm: 0.8852457361977407, iteration: 361396
loss: 1.0054789781570435,grad_norm: 0.8174193223698238, iteration: 361397
loss: 0.9999561309814453,grad_norm: 0.9907723304488198, iteration: 361398
loss: 1.3978922367095947,grad_norm: 0.9999997986849112, iteration: 361399
loss: 0.9891276955604553,grad_norm: 0.8001563388306888, iteration: 361400
loss: 1.0164889097213745,grad_norm: 0.8175793866402065, iteration: 361401
loss: 0.9995713829994202,grad_norm: 0.6659699645941634, iteration: 361402
loss: 1.0820541381835938,grad_norm: 0.8936161904632145, iteration: 361403
loss: 1.0207245349884033,grad_norm: 0.815907660927109, iteration: 361404
loss: 0.9957864880561829,grad_norm: 0.7712155910432228, iteration: 361405
loss: 1.0309231281280518,grad_norm: 0.9999996697671558, iteration: 361406
loss: 1.0088764429092407,grad_norm: 0.7557558548593841, iteration: 361407
loss: 0.9824773669242859,grad_norm: 0.8799208814007058, iteration: 361408
loss: 1.0430163145065308,grad_norm: 0.8511237016057127, iteration: 361409
loss: 0.9825374484062195,grad_norm: 0.929188847536325, iteration: 361410
loss: 0.9853859543800354,grad_norm: 0.8594195078801493, iteration: 361411
loss: 0.9950487613677979,grad_norm: 0.8279437934615209, iteration: 361412
loss: 1.0289868116378784,grad_norm: 0.9999996207448519, iteration: 361413
loss: 0.9764973521232605,grad_norm: 0.898189853988014, iteration: 361414
loss: 1.0056554079055786,grad_norm: 0.7556667390687349, iteration: 361415
loss: 1.0143978595733643,grad_norm: 0.637125709637724, iteration: 361416
loss: 0.992662250995636,grad_norm: 0.7903258839739351, iteration: 361417
loss: 1.005782961845398,grad_norm: 0.8001811317154981, iteration: 361418
loss: 1.0316660404205322,grad_norm: 0.9999992712652896, iteration: 361419
loss: 1.0597312450408936,grad_norm: 0.9326035021445477, iteration: 361420
loss: 1.0028467178344727,grad_norm: 0.9851389696080408, iteration: 361421
loss: 0.977012038230896,grad_norm: 0.8783173414907083, iteration: 361422
loss: 0.975910484790802,grad_norm: 0.842068065644644, iteration: 361423
loss: 0.9747047424316406,grad_norm: 0.9999991938547486, iteration: 361424
loss: 0.9897372722625732,grad_norm: 0.8734436622318408, iteration: 361425
loss: 0.9802595973014832,grad_norm: 0.823933532295268, iteration: 361426
loss: 1.0314122438430786,grad_norm: 0.9999998139310572, iteration: 361427
loss: 1.004818081855774,grad_norm: 0.7995056836248084, iteration: 361428
loss: 1.0172072649002075,grad_norm: 0.8508065004464663, iteration: 361429
loss: 0.9973980784416199,grad_norm: 0.8979047911605579, iteration: 361430
loss: 0.9711474776268005,grad_norm: 0.7609395075908594, iteration: 361431
loss: 1.0107643604278564,grad_norm: 0.9999994068385885, iteration: 361432
loss: 1.0036205053329468,grad_norm: 0.9194394976196711, iteration: 361433
loss: 0.9867697954177856,grad_norm: 0.7588336130393841, iteration: 361434
loss: 1.0272139310836792,grad_norm: 0.7885659257612493, iteration: 361435
loss: 1.0016546249389648,grad_norm: 0.7418113105506726, iteration: 361436
loss: 1.0415290594100952,grad_norm: 0.8920336443110828, iteration: 361437
loss: 0.9805451035499573,grad_norm: 0.7920257885787665, iteration: 361438
loss: 1.0222290754318237,grad_norm: 0.9999999113471646, iteration: 361439
loss: 1.0553169250488281,grad_norm: 0.8496468112208267, iteration: 361440
loss: 0.9650569558143616,grad_norm: 0.8331143793738186, iteration: 361441
loss: 1.0076847076416016,grad_norm: 0.6813073739989937, iteration: 361442
loss: 1.022077202796936,grad_norm: 0.7752295423715432, iteration: 361443
loss: 1.015668272972107,grad_norm: 0.8357564013159287, iteration: 361444
loss: 1.0086839199066162,grad_norm: 0.8440877892640731, iteration: 361445
loss: 0.987677812576294,grad_norm: 0.6588989736873573, iteration: 361446
loss: 0.9547373056411743,grad_norm: 0.9999991105836911, iteration: 361447
loss: 0.9880239367485046,grad_norm: 0.7637906732609474, iteration: 361448
loss: 0.9773142337799072,grad_norm: 0.856979580340073, iteration: 361449
loss: 1.0123523473739624,grad_norm: 0.9999998722676947, iteration: 361450
loss: 1.0041794776916504,grad_norm: 0.8244942926135741, iteration: 361451
loss: 0.9813416004180908,grad_norm: 0.9783279845873764, iteration: 361452
loss: 0.9876539707183838,grad_norm: 0.7106840568375157, iteration: 361453
loss: 1.0562466382980347,grad_norm: 0.8612148691597548, iteration: 361454
loss: 1.0176953077316284,grad_norm: 0.7187978269294344, iteration: 361455
loss: 0.9884147047996521,grad_norm: 0.797198214269275, iteration: 361456
loss: 0.975677490234375,grad_norm: 0.9105469551790915, iteration: 361457
loss: 1.0716373920440674,grad_norm: 0.9999992444306204, iteration: 361458
loss: 0.9992697238922119,grad_norm: 0.8268171355516545, iteration: 361459
loss: 0.974556028842926,grad_norm: 0.6921275762411685, iteration: 361460
loss: 0.9996781349182129,grad_norm: 0.8105946039484112, iteration: 361461
loss: 0.9799334406852722,grad_norm: 0.9999992960622707, iteration: 361462
loss: 0.9930965900421143,grad_norm: 0.8345834043444077, iteration: 361463
loss: 0.9947511553764343,grad_norm: 0.9137933418524322, iteration: 361464
loss: 1.0143579244613647,grad_norm: 0.84004507016801, iteration: 361465
loss: 0.9766075611114502,grad_norm: 0.8093192667764576, iteration: 361466
loss: 0.9611191749572754,grad_norm: 0.8234134569452976, iteration: 361467
loss: 0.9809847474098206,grad_norm: 0.8877159859742907, iteration: 361468
loss: 0.9845905303955078,grad_norm: 0.8352928954459646, iteration: 361469
loss: 0.9642749428749084,grad_norm: 0.9999994750115664, iteration: 361470
loss: 1.0321983098983765,grad_norm: 0.999999367524705, iteration: 361471
loss: 0.986826479434967,grad_norm: 0.8596861654313062, iteration: 361472
loss: 1.0162765979766846,grad_norm: 0.7753643465686609, iteration: 361473
loss: 0.9503034949302673,grad_norm: 0.9929335164903321, iteration: 361474
loss: 0.9915830492973328,grad_norm: 0.8819521073122096, iteration: 361475
loss: 0.9921343326568604,grad_norm: 0.6898823945039423, iteration: 361476
loss: 1.0084669589996338,grad_norm: 0.9541889913537428, iteration: 361477
loss: 1.0033042430877686,grad_norm: 0.8275792529725466, iteration: 361478
loss: 0.9816732406616211,grad_norm: 0.7614292587262437, iteration: 361479
loss: 0.9993809461593628,grad_norm: 0.9827111311157399, iteration: 361480
loss: 0.9669273495674133,grad_norm: 0.8329545377684072, iteration: 361481
loss: 1.0182381868362427,grad_norm: 0.8529463582547084, iteration: 361482
loss: 1.0011050701141357,grad_norm: 0.7494349393194412, iteration: 361483
loss: 1.0004571676254272,grad_norm: 0.9999990106225864, iteration: 361484
loss: 1.0318489074707031,grad_norm: 0.9999990685250297, iteration: 361485
loss: 0.9965361952781677,grad_norm: 0.7176357417115269, iteration: 361486
loss: 0.9940121173858643,grad_norm: 0.7328026850115569, iteration: 361487
loss: 1.028732419013977,grad_norm: 0.9999995503509189, iteration: 361488
loss: 0.9911155104637146,grad_norm: 0.7449644082946019, iteration: 361489
loss: 0.9530296921730042,grad_norm: 0.8278982327058401, iteration: 361490
loss: 1.0139400959014893,grad_norm: 0.8663597889392068, iteration: 361491
loss: 0.9851025938987732,grad_norm: 0.8489312869109692, iteration: 361492
loss: 0.9919551014900208,grad_norm: 0.7957476740931412, iteration: 361493
loss: 0.9734846949577332,grad_norm: 0.8263677542769712, iteration: 361494
loss: 0.9697218537330627,grad_norm: 0.7042365050441525, iteration: 361495
loss: 1.037187099456787,grad_norm: 0.9999993749839634, iteration: 361496
loss: 0.9627879858016968,grad_norm: 0.8048841235515226, iteration: 361497
loss: 0.9820161461830139,grad_norm: 0.9192929935658359, iteration: 361498
loss: 1.0177981853485107,grad_norm: 0.9067381650288875, iteration: 361499
loss: 0.9499630331993103,grad_norm: 0.927128173273679, iteration: 361500
loss: 1.023917555809021,grad_norm: 0.9999992727952083, iteration: 361501
loss: 0.9826561212539673,grad_norm: 0.7878225312431051, iteration: 361502
loss: 1.0002162456512451,grad_norm: 0.8260596924087481, iteration: 361503
loss: 1.0518221855163574,grad_norm: 0.8439706826511769, iteration: 361504
loss: 0.9634981155395508,grad_norm: 0.999999098259314, iteration: 361505
loss: 1.0020617246627808,grad_norm: 0.7769584960285995, iteration: 361506
loss: 0.9889969825744629,grad_norm: 0.8677047815568067, iteration: 361507
loss: 0.9846069812774658,grad_norm: 0.6754927448852249, iteration: 361508
loss: 0.9878140687942505,grad_norm: 0.8363716769617069, iteration: 361509
loss: 1.0097777843475342,grad_norm: 0.7045847263726247, iteration: 361510
loss: 1.0121440887451172,grad_norm: 0.6871072205038807, iteration: 361511
loss: 0.9916693568229675,grad_norm: 0.7589483172621551, iteration: 361512
loss: 1.0377514362335205,grad_norm: 0.7671287050084454, iteration: 361513
loss: 0.9371716976165771,grad_norm: 0.7387037302593022, iteration: 361514
loss: 0.9889642000198364,grad_norm: 0.7242928338006523, iteration: 361515
loss: 1.0100293159484863,grad_norm: 0.837606039484667, iteration: 361516
loss: 0.9634615778923035,grad_norm: 0.9114323646703371, iteration: 361517
loss: 1.0214931964874268,grad_norm: 0.6798613812984251, iteration: 361518
loss: 1.1169052124023438,grad_norm: 0.9999992237886905, iteration: 361519
loss: 1.0088914632797241,grad_norm: 0.9965603455011354, iteration: 361520
loss: 0.9648050665855408,grad_norm: 0.7270176339479134, iteration: 361521
loss: 0.9939543008804321,grad_norm: 0.7271062594486319, iteration: 361522
loss: 1.0247865915298462,grad_norm: 0.999999040158912, iteration: 361523
loss: 1.042243480682373,grad_norm: 0.9999996598576782, iteration: 361524
loss: 0.96355801820755,grad_norm: 0.7883981052833804, iteration: 361525
loss: 1.0523039102554321,grad_norm: 0.7983875757238779, iteration: 361526
loss: 1.029253602027893,grad_norm: 0.9999990195274105, iteration: 361527
loss: 1.001834511756897,grad_norm: 0.8142176302301423, iteration: 361528
loss: 1.006589651107788,grad_norm: 0.8403495429309731, iteration: 361529
loss: 1.0027797222137451,grad_norm: 0.9526130629566371, iteration: 361530
loss: 1.0057686567306519,grad_norm: 0.9374417052892202, iteration: 361531
loss: 0.9945078492164612,grad_norm: 0.7961931435813444, iteration: 361532
loss: 1.0254080295562744,grad_norm: 0.7591344279586195, iteration: 361533
loss: 0.9926797151565552,grad_norm: 0.8549038489258014, iteration: 361534
loss: 0.9827564358711243,grad_norm: 0.9999991586875665, iteration: 361535
loss: 1.003890872001648,grad_norm: 0.7345153894744876, iteration: 361536
loss: 1.0330321788787842,grad_norm: 0.7729191742967818, iteration: 361537
loss: 1.017845630645752,grad_norm: 0.9422544844396868, iteration: 361538
loss: 0.9886232614517212,grad_norm: 0.8591000340220395, iteration: 361539
loss: 0.9984939694404602,grad_norm: 0.7914347150689568, iteration: 361540
loss: 1.0327837467193604,grad_norm: 0.9218547110123825, iteration: 361541
loss: 1.0926461219787598,grad_norm: 0.9999994363958411, iteration: 361542
loss: 0.9627582430839539,grad_norm: 0.7977274797709271, iteration: 361543
loss: 1.0386261940002441,grad_norm: 0.8038782529706112, iteration: 361544
loss: 0.9771074056625366,grad_norm: 0.8524779894406533, iteration: 361545
loss: 0.9834493398666382,grad_norm: 0.7594233628006305, iteration: 361546
loss: 1.023431420326233,grad_norm: 0.8952223275146363, iteration: 361547
loss: 1.0068755149841309,grad_norm: 0.8716046223347715, iteration: 361548
loss: 1.0093960762023926,grad_norm: 0.999999440830641, iteration: 361549
loss: 0.976146936416626,grad_norm: 0.8562085398069625, iteration: 361550
loss: 0.9833477139472961,grad_norm: 0.8606392203112573, iteration: 361551
loss: 1.1147148609161377,grad_norm: 0.999999436320164, iteration: 361552
loss: 1.0399415493011475,grad_norm: 0.9999991489241805, iteration: 361553
loss: 0.9682122468948364,grad_norm: 0.8495376580838383, iteration: 361554
loss: 1.0010676383972168,grad_norm: 0.729590593683592, iteration: 361555
loss: 0.9889995455741882,grad_norm: 0.8063895319656575, iteration: 361556
loss: 0.983032763004303,grad_norm: 0.9283801959961063, iteration: 361557
loss: 0.9665277004241943,grad_norm: 0.7676288281247646, iteration: 361558
loss: 0.9866472482681274,grad_norm: 0.7808365295631526, iteration: 361559
loss: 0.9734126329421997,grad_norm: 0.9993146757251373, iteration: 361560
loss: 0.9647930264472961,grad_norm: 0.9413696983297098, iteration: 361561
loss: 1.015350103378296,grad_norm: 0.7874395140846819, iteration: 361562
loss: 0.9692627191543579,grad_norm: 0.7381951924328112, iteration: 361563
loss: 1.0343953371047974,grad_norm: 0.8304238846927467, iteration: 361564
loss: 1.0097906589508057,grad_norm: 0.9999995114310899, iteration: 361565
loss: 1.0275688171386719,grad_norm: 0.8889461499037037, iteration: 361566
loss: 0.9952373504638672,grad_norm: 0.8816319265606143, iteration: 361567
loss: 0.9935131072998047,grad_norm: 0.9363949791389349, iteration: 361568
loss: 1.068243145942688,grad_norm: 0.9402158893770761, iteration: 361569
loss: 1.3019130229949951,grad_norm: 0.9999993647996283, iteration: 361570
loss: 0.9766735434532166,grad_norm: 0.6917973983111393, iteration: 361571
loss: 0.972281813621521,grad_norm: 0.8808024497853296, iteration: 361572
loss: 1.1673023700714111,grad_norm: 0.9018960895322062, iteration: 361573
loss: 1.0892194509506226,grad_norm: 0.7233293390649224, iteration: 361574
loss: 0.993119478225708,grad_norm: 0.7437262498253424, iteration: 361575
loss: 1.0141096115112305,grad_norm: 0.9999989769782508, iteration: 361576
loss: 0.9674786925315857,grad_norm: 0.7779226385706772, iteration: 361577
loss: 1.004167079925537,grad_norm: 0.8392332092153056, iteration: 361578
loss: 0.9878442883491516,grad_norm: 0.7920634163036959, iteration: 361579
loss: 1.0069243907928467,grad_norm: 0.8065543403841817, iteration: 361580
loss: 1.0001370906829834,grad_norm: 0.7013712030647484, iteration: 361581
loss: 0.978205144405365,grad_norm: 0.8365157926668313, iteration: 361582
loss: 0.9858722686767578,grad_norm: 0.7458021038936897, iteration: 361583
loss: 1.155021071434021,grad_norm: 0.9999997635818856, iteration: 361584
loss: 1.0476263761520386,grad_norm: 0.9999992137339773, iteration: 361585
loss: 1.0116405487060547,grad_norm: 0.8635287014103732, iteration: 361586
loss: 0.9964373707771301,grad_norm: 0.9839660283978212, iteration: 361587
loss: 0.9992278814315796,grad_norm: 0.899393577504617, iteration: 361588
loss: 1.0080845355987549,grad_norm: 0.7800114270639598, iteration: 361589
loss: 1.0266766548156738,grad_norm: 1.0000000682399046, iteration: 361590
loss: 1.1161545515060425,grad_norm: 0.9999998227706186, iteration: 361591
loss: 0.9847179055213928,grad_norm: 0.7264727346622711, iteration: 361592
loss: 1.021834135055542,grad_norm: 0.9999991166152199, iteration: 361593
loss: 0.9753443598747253,grad_norm: 0.8414289698559325, iteration: 361594
loss: 1.019492506980896,grad_norm: 0.7911575509651951, iteration: 361595
loss: 0.9946298599243164,grad_norm: 0.9999995578781279, iteration: 361596
loss: 0.9690594673156738,grad_norm: 0.8755725534968722, iteration: 361597
loss: 1.032356858253479,grad_norm: 0.8394908379858352, iteration: 361598
loss: 0.9928023219108582,grad_norm: 0.9149829069234279, iteration: 361599
loss: 1.072413682937622,grad_norm: 0.999999772111772, iteration: 361600
loss: 1.029793620109558,grad_norm: 0.974154346012585, iteration: 361601
loss: 1.0161113739013672,grad_norm: 0.8581267673187903, iteration: 361602
loss: 0.9676238894462585,grad_norm: 0.9268984944156407, iteration: 361603
loss: 0.9837926030158997,grad_norm: 0.8784823626484429, iteration: 361604
loss: 1.2378885746002197,grad_norm: 0.9999998927451329, iteration: 361605
loss: 0.9775213599205017,grad_norm: 0.8434411167546267, iteration: 361606
loss: 1.0302762985229492,grad_norm: 1.0000000116534915, iteration: 361607
loss: 1.0201592445373535,grad_norm: 0.983464953205531, iteration: 361608
loss: 0.9979534149169922,grad_norm: 0.8220102678197503, iteration: 361609
loss: 1.022452712059021,grad_norm: 0.8445112019331776, iteration: 361610
loss: 1.0016820430755615,grad_norm: 0.8617361900185828, iteration: 361611
loss: 1.062029480934143,grad_norm: 0.9999992546361524, iteration: 361612
loss: 0.9732798337936401,grad_norm: 0.675303978574177, iteration: 361613
loss: 1.0440361499786377,grad_norm: 0.9999990929425062, iteration: 361614
loss: 0.9962067604064941,grad_norm: 0.747869561647758, iteration: 361615
loss: 0.9977043271064758,grad_norm: 0.692071611651547, iteration: 361616
loss: 0.993929386138916,grad_norm: 0.8378186572274671, iteration: 361617
loss: 0.9878754615783691,grad_norm: 0.8602736226885432, iteration: 361618
loss: 1.056580662727356,grad_norm: 0.9999991429292213, iteration: 361619
loss: 1.0142436027526855,grad_norm: 0.9999991019288214, iteration: 361620
loss: 1.0410693883895874,grad_norm: 0.8293963918274897, iteration: 361621
loss: 0.9772718548774719,grad_norm: 0.791529807102106, iteration: 361622
loss: 0.9690216183662415,grad_norm: 0.9721535754147761, iteration: 361623
loss: 0.9859312772750854,grad_norm: 0.7521996144212484, iteration: 361624
loss: 0.9928487539291382,grad_norm: 0.8014373418510946, iteration: 361625
loss: 0.9981489777565002,grad_norm: 0.8620476043874514, iteration: 361626
loss: 0.9645046591758728,grad_norm: 0.8318937316926277, iteration: 361627
loss: 1.0080677270889282,grad_norm: 0.9850627921571576, iteration: 361628
loss: 0.9944419264793396,grad_norm: 0.7762840920985677, iteration: 361629
loss: 1.0113351345062256,grad_norm: 0.8684339255730663, iteration: 361630
loss: 1.00563645362854,grad_norm: 0.8349538695531278, iteration: 361631
loss: 0.9599853754043579,grad_norm: 0.8307844350682811, iteration: 361632
loss: 0.9713277220726013,grad_norm: 0.799627336414887, iteration: 361633
loss: 1.0164074897766113,grad_norm: 0.7157218686536044, iteration: 361634
loss: 1.012865662574768,grad_norm: 0.8706064760262898, iteration: 361635
loss: 1.0494107007980347,grad_norm: 0.9322743734607354, iteration: 361636
loss: 1.0021281242370605,grad_norm: 0.9655442964261564, iteration: 361637
loss: 1.0299915075302124,grad_norm: 0.8146281911774287, iteration: 361638
loss: 0.9972189664840698,grad_norm: 0.9999990516177849, iteration: 361639
loss: 1.0335229635238647,grad_norm: 0.9109712069513024, iteration: 361640
loss: 1.109324336051941,grad_norm: 0.8907076905898598, iteration: 361641
loss: 1.015552043914795,grad_norm: 0.9636044039624485, iteration: 361642
loss: 0.9860354065895081,grad_norm: 0.824257131891829, iteration: 361643
loss: 0.9945629239082336,grad_norm: 0.8088693999032526, iteration: 361644
loss: 1.0068929195404053,grad_norm: 0.8488931537860308, iteration: 361645
loss: 1.0003505945205688,grad_norm: 0.8165725517873781, iteration: 361646
loss: 1.0845434665679932,grad_norm: 0.8382093064739153, iteration: 361647
loss: 1.0486938953399658,grad_norm: 0.806180077558479, iteration: 361648
loss: 1.0223695039749146,grad_norm: 0.9727912833119462, iteration: 361649
loss: 1.0334454774856567,grad_norm: 0.999999185538771, iteration: 361650
loss: 0.9962626099586487,grad_norm: 0.9999997911752737, iteration: 361651
loss: 1.0064656734466553,grad_norm: 0.9999994891063152, iteration: 361652
loss: 1.1924004554748535,grad_norm: 0.9999997280302679, iteration: 361653
loss: 1.0332834720611572,grad_norm: 0.6910655304965421, iteration: 361654
loss: 0.9925752878189087,grad_norm: 0.7901296166688132, iteration: 361655
loss: 1.3497816324234009,grad_norm: 0.999999929716988, iteration: 361656
loss: 1.0023983716964722,grad_norm: 0.6711142356525894, iteration: 361657
loss: 1.01089346408844,grad_norm: 0.9202761138056109, iteration: 361658
loss: 0.9842272996902466,grad_norm: 0.999999252472524, iteration: 361659
loss: 0.9735926389694214,grad_norm: 0.8458986563580286, iteration: 361660
loss: 0.9601544141769409,grad_norm: 0.9999990686519588, iteration: 361661
loss: 1.0011227130889893,grad_norm: 0.9999995411226125, iteration: 361662
loss: 1.0384286642074585,grad_norm: 0.9270678907157816, iteration: 361663
loss: 1.0157995223999023,grad_norm: 0.7517422080714613, iteration: 361664
loss: 1.0151432752609253,grad_norm: 0.803801498739543, iteration: 361665
loss: 0.9916378855705261,grad_norm: 0.9999991878559743, iteration: 361666
loss: 1.0056897401809692,grad_norm: 0.8045863470838116, iteration: 361667
loss: 1.0050541162490845,grad_norm: 0.8442639634539159, iteration: 361668
loss: 1.0158442258834839,grad_norm: 0.9484838908522152, iteration: 361669
loss: 0.9677175283432007,grad_norm: 0.9262249150223728, iteration: 361670
loss: 0.999874472618103,grad_norm: 0.9644008816153814, iteration: 361671
loss: 1.0416089296340942,grad_norm: 0.9999992267782992, iteration: 361672
loss: 0.9884320497512817,grad_norm: 0.999999028645592, iteration: 361673
loss: 0.9741738438606262,grad_norm: 0.7475512134676874, iteration: 361674
loss: 0.9934402108192444,grad_norm: 0.740115997308495, iteration: 361675
loss: 1.0091407299041748,grad_norm: 0.8690154462201386, iteration: 361676
loss: 0.9810367226600647,grad_norm: 0.8398663998069172, iteration: 361677
loss: 1.0940061807632446,grad_norm: 0.9999999924636478, iteration: 361678
loss: 1.0107420682907104,grad_norm: 0.8646598146855653, iteration: 361679
loss: 1.0315526723861694,grad_norm: 0.7811298727972298, iteration: 361680
loss: 1.0985206365585327,grad_norm: 0.9999990644573504, iteration: 361681
loss: 0.991543173789978,grad_norm: 0.8737382358697519, iteration: 361682
loss: 0.9947497248649597,grad_norm: 0.6759434243672737, iteration: 361683
loss: 1.0301182270050049,grad_norm: 0.9130439607505657, iteration: 361684
loss: 1.0017057657241821,grad_norm: 0.7396474753934436, iteration: 361685
loss: 1.2140671014785767,grad_norm: 0.9999991270382483, iteration: 361686
loss: 0.9954525828361511,grad_norm: 0.7406474589752632, iteration: 361687
loss: 1.0055395364761353,grad_norm: 0.8164570493059319, iteration: 361688
loss: 1.0161116123199463,grad_norm: 0.8060970022525704, iteration: 361689
loss: 0.9710527062416077,grad_norm: 0.8332079471119255, iteration: 361690
loss: 1.0019503831863403,grad_norm: 0.9184671251657968, iteration: 361691
loss: 1.0902938842773438,grad_norm: 0.7781417116119462, iteration: 361692
loss: 0.9607916474342346,grad_norm: 0.845455195128132, iteration: 361693
loss: 1.0286864042282104,grad_norm: 0.7133787450547555, iteration: 361694
loss: 1.021497130393982,grad_norm: 0.7742417510292601, iteration: 361695
loss: 0.9968610405921936,grad_norm: 0.8552028072524067, iteration: 361696
loss: 1.0031650066375732,grad_norm: 0.7817097570256086, iteration: 361697
loss: 0.9905884861946106,grad_norm: 0.8355227281234591, iteration: 361698
loss: 1.0238101482391357,grad_norm: 0.7682672309913935, iteration: 361699
loss: 1.0037708282470703,grad_norm: 0.7856574923427626, iteration: 361700
loss: 1.0224908590316772,grad_norm: 0.7170740165989211, iteration: 361701
loss: 1.04671311378479,grad_norm: 0.7312002792521411, iteration: 361702
loss: 1.012209177017212,grad_norm: 0.9795172712732885, iteration: 361703
loss: 0.98817378282547,grad_norm: 0.8182567110237929, iteration: 361704
loss: 0.9894760847091675,grad_norm: 0.8622566378070685, iteration: 361705
loss: 1.0001420974731445,grad_norm: 0.9999995839094347, iteration: 361706
loss: 1.003676414489746,grad_norm: 0.7512482902452146, iteration: 361707
loss: 0.960538387298584,grad_norm: 0.7391444014593589, iteration: 361708
loss: 1.1410932540893555,grad_norm: 0.735540943936975, iteration: 361709
loss: 1.0068910121917725,grad_norm: 0.879177023770597, iteration: 361710
loss: 1.0372244119644165,grad_norm: 0.8698792243844753, iteration: 361711
loss: 1.0098748207092285,grad_norm: 0.8105273025559407, iteration: 361712
loss: 0.9837200045585632,grad_norm: 0.763763710306038, iteration: 361713
loss: 0.9884703159332275,grad_norm: 0.820906188842569, iteration: 361714
loss: 0.9912514090538025,grad_norm: 0.7826316910259496, iteration: 361715
loss: 1.0090622901916504,grad_norm: 0.7285431750037142, iteration: 361716
loss: 1.0064513683319092,grad_norm: 0.7460581326724466, iteration: 361717
loss: 1.0596470832824707,grad_norm: 0.9999991577237687, iteration: 361718
loss: 1.0065594911575317,grad_norm: 0.7517846820982823, iteration: 361719
loss: 0.9990592002868652,grad_norm: 0.9999990203531564, iteration: 361720
loss: 0.9664117693901062,grad_norm: 0.7730252752998421, iteration: 361721
loss: 1.0040045976638794,grad_norm: 0.6485384887798454, iteration: 361722
loss: 0.9671780467033386,grad_norm: 0.8066867383829456, iteration: 361723
loss: 0.9876700639724731,grad_norm: 0.7595016535164019, iteration: 361724
loss: 1.0105468034744263,grad_norm: 0.9999991510956476, iteration: 361725
loss: 1.0030330419540405,grad_norm: 0.8229571158495432, iteration: 361726
loss: 1.0209476947784424,grad_norm: 0.853529066890226, iteration: 361727
loss: 0.9726380109786987,grad_norm: 0.8397032417650386, iteration: 361728
loss: 1.000727891921997,grad_norm: 0.8124227825110678, iteration: 361729
loss: 0.9530542492866516,grad_norm: 0.8994481437909936, iteration: 361730
loss: 1.0406838655471802,grad_norm: 0.8067062191779217, iteration: 361731
loss: 0.9949893355369568,grad_norm: 0.6552283764268995, iteration: 361732
loss: 0.9946325421333313,grad_norm: 0.7239445864531308, iteration: 361733
loss: 1.0248005390167236,grad_norm: 0.824435363546023, iteration: 361734
loss: 0.9780259132385254,grad_norm: 0.7489998702318607, iteration: 361735
loss: 0.9831056594848633,grad_norm: 0.8084100531528767, iteration: 361736
loss: 1.04950749874115,grad_norm: 0.7899952383869224, iteration: 361737
loss: 0.9642454385757446,grad_norm: 0.914477707031949, iteration: 361738
loss: 1.0201740264892578,grad_norm: 0.784942091711362, iteration: 361739
loss: 1.0623456239700317,grad_norm: 0.9999994100016737, iteration: 361740
loss: 0.9836205244064331,grad_norm: 0.7944306414523047, iteration: 361741
loss: 0.9945157766342163,grad_norm: 0.8363691477441554, iteration: 361742
loss: 0.9677872657775879,grad_norm: 0.8670882668550427, iteration: 361743
loss: 0.9725680351257324,grad_norm: 0.7887735088042891, iteration: 361744
loss: 0.9880715012550354,grad_norm: 0.7993537280416331, iteration: 361745
loss: 1.066548466682434,grad_norm: 0.9999994911101422, iteration: 361746
loss: 1.0859299898147583,grad_norm: 0.9999997773932956, iteration: 361747
loss: 1.0347944498062134,grad_norm: 0.806545243416488, iteration: 361748
loss: 0.9617587327957153,grad_norm: 0.8977590578279224, iteration: 361749
loss: 1.025679588317871,grad_norm: 0.8331552869711716, iteration: 361750
loss: 0.9961193799972534,grad_norm: 0.7004398159368921, iteration: 361751
loss: 0.9841098785400391,grad_norm: 0.7049367340689994, iteration: 361752
loss: 0.9937917590141296,grad_norm: 0.9533178863438238, iteration: 361753
loss: 1.0024014711380005,grad_norm: 0.7356761110724607, iteration: 361754
loss: 1.0235408544540405,grad_norm: 0.9999991330017635, iteration: 361755
loss: 1.0845986604690552,grad_norm: 0.8476586658769766, iteration: 361756
loss: 1.0683355331420898,grad_norm: 0.9999994587896925, iteration: 361757
loss: 0.9968398809432983,grad_norm: 0.7320708524511265, iteration: 361758
loss: 1.0031237602233887,grad_norm: 0.9004531275420384, iteration: 361759
loss: 0.9937807321548462,grad_norm: 0.824533996323372, iteration: 361760
loss: 1.0199286937713623,grad_norm: 0.7096603133811309, iteration: 361761
loss: 1.0388435125350952,grad_norm: 0.9022281855631906, iteration: 361762
loss: 1.012580394744873,grad_norm: 0.8855479508972319, iteration: 361763
loss: 1.0477027893066406,grad_norm: 0.9999996955658536, iteration: 361764
loss: 1.0265722274780273,grad_norm: 0.9084466410737349, iteration: 361765
loss: 1.0296845436096191,grad_norm: 0.9999995669174182, iteration: 361766
loss: 0.9891431331634521,grad_norm: 0.828796477069457, iteration: 361767
loss: 1.0247858762741089,grad_norm: 0.8797091665959101, iteration: 361768
loss: 1.0108553171157837,grad_norm: 0.9139997778747903, iteration: 361769
loss: 0.9723619818687439,grad_norm: 0.787657401467599, iteration: 361770
loss: 0.9998871088027954,grad_norm: 0.9162756902515875, iteration: 361771
loss: 1.176767349243164,grad_norm: 0.9999998344334299, iteration: 361772
loss: 1.1384005546569824,grad_norm: 0.9999994941111477, iteration: 361773
loss: 1.027335286140442,grad_norm: 0.850783133163766, iteration: 361774
loss: 1.0223456621170044,grad_norm: 0.9120710335488146, iteration: 361775
loss: 1.0178210735321045,grad_norm: 0.8257957327013151, iteration: 361776
loss: 1.0278781652450562,grad_norm: 0.9283861916405379, iteration: 361777
loss: 0.98930823802948,grad_norm: 0.9999995808728236, iteration: 361778
loss: 1.0383789539337158,grad_norm: 0.9119287911983066, iteration: 361779
loss: 1.0023993253707886,grad_norm: 0.9673726037935906, iteration: 361780
loss: 1.0317295789718628,grad_norm: 0.893603818072969, iteration: 361781
loss: 1.004001498222351,grad_norm: 0.8035945133341625, iteration: 361782
loss: 1.0011357069015503,grad_norm: 0.8853840370925963, iteration: 361783
loss: 1.0356779098510742,grad_norm: 0.9999990700268312, iteration: 361784
loss: 1.0238546133041382,grad_norm: 0.8872952313127044, iteration: 361785
loss: 0.9894675016403198,grad_norm: 0.7598310717035123, iteration: 361786
loss: 1.0634533166885376,grad_norm: 0.9999993473282023, iteration: 361787
loss: 0.9698566794395447,grad_norm: 0.8957896586513161, iteration: 361788
loss: 1.0202351808547974,grad_norm: 0.9999999294684301, iteration: 361789
loss: 1.0116733312606812,grad_norm: 0.8792233200809858, iteration: 361790
loss: 1.034983515739441,grad_norm: 0.9999991769180173, iteration: 361791
loss: 1.0175198316574097,grad_norm: 0.7568326356706471, iteration: 361792
loss: 0.9838647246360779,grad_norm: 0.9999995297516218, iteration: 361793
loss: 1.0059090852737427,grad_norm: 0.8345092760353547, iteration: 361794
loss: 0.9801543354988098,grad_norm: 0.9401415963043556, iteration: 361795
loss: 1.009021282196045,grad_norm: 0.9031461894601021, iteration: 361796
loss: 1.019783854484558,grad_norm: 0.8137006662038838, iteration: 361797
loss: 0.9775865077972412,grad_norm: 0.7850818233884248, iteration: 361798
loss: 0.9827873110771179,grad_norm: 0.7334432589325127, iteration: 361799
loss: 0.9596622586250305,grad_norm: 0.9999991215729321, iteration: 361800
loss: 1.107340931892395,grad_norm: 0.9933262120528588, iteration: 361801
loss: 0.9687192440032959,grad_norm: 0.8024047418683192, iteration: 361802
loss: 1.0018668174743652,grad_norm: 0.8888563999153017, iteration: 361803
loss: 0.9706695079803467,grad_norm: 0.758298775403634, iteration: 361804
loss: 1.0626513957977295,grad_norm: 0.99999911540676, iteration: 361805
loss: 0.9719022512435913,grad_norm: 0.8177969441446149, iteration: 361806
loss: 1.0115892887115479,grad_norm: 0.9349492644446326, iteration: 361807
loss: 1.0087928771972656,grad_norm: 0.8473976099912357, iteration: 361808
loss: 0.9932097792625427,grad_norm: 0.6718380023801447, iteration: 361809
loss: 0.985334038734436,grad_norm: 0.8026525591853909, iteration: 361810
loss: 0.9741416573524475,grad_norm: 0.8486000682938887, iteration: 361811
loss: 0.9835075736045837,grad_norm: 0.8030024756001138, iteration: 361812
loss: 0.9910938739776611,grad_norm: 0.7979506233708907, iteration: 361813
loss: 0.9636915326118469,grad_norm: 0.9285877750736804, iteration: 361814
loss: 1.0000258684158325,grad_norm: 0.9006994714754998, iteration: 361815
loss: 0.9699950218200684,grad_norm: 0.8351338391191362, iteration: 361816
loss: 0.9958849549293518,grad_norm: 0.7125182246995158, iteration: 361817
loss: 1.00089430809021,grad_norm: 0.9420744182356463, iteration: 361818
loss: 0.971986711025238,grad_norm: 0.8258452696025925, iteration: 361819
loss: 1.0350685119628906,grad_norm: 0.931074520023147, iteration: 361820
loss: 0.980893611907959,grad_norm: 0.9882160415177074, iteration: 361821
loss: 1.1096888780593872,grad_norm: 0.9999993758746822, iteration: 361822
loss: 0.9548752903938293,grad_norm: 0.8854954528054945, iteration: 361823
loss: 1.016501784324646,grad_norm: 0.8945445006751436, iteration: 361824
loss: 1.0295391082763672,grad_norm: 0.9999992184656302, iteration: 361825
loss: 0.9932091236114502,grad_norm: 0.9701092259497511, iteration: 361826
loss: 1.0010440349578857,grad_norm: 0.841304846811465, iteration: 361827
loss: 1.3488898277282715,grad_norm: 0.999999540682758, iteration: 361828
loss: 0.9844043850898743,grad_norm: 0.9391330105614685, iteration: 361829
loss: 1.013405203819275,grad_norm: 0.8332029164595102, iteration: 361830
loss: 1.0338618755340576,grad_norm: 0.8491601727171958, iteration: 361831
loss: 1.041139006614685,grad_norm: 0.9550092380640322, iteration: 361832
loss: 1.0337998867034912,grad_norm: 0.7483688797037193, iteration: 361833
loss: 1.0344493389129639,grad_norm: 0.8120539539840872, iteration: 361834
loss: 0.9922757148742676,grad_norm: 0.9999994727624352, iteration: 361835
loss: 0.9848500490188599,grad_norm: 0.8737283913467652, iteration: 361836
loss: 1.0094579458236694,grad_norm: 0.7944552474847064, iteration: 361837
loss: 1.0857945680618286,grad_norm: 0.9999996382101762, iteration: 361838
loss: 0.9928774237632751,grad_norm: 0.9314275117222534, iteration: 361839
loss: 0.9779354333877563,grad_norm: 0.9628951368083186, iteration: 361840
loss: 1.0317760705947876,grad_norm: 0.8197328826291431, iteration: 361841
loss: 1.054786205291748,grad_norm: 0.9999992554323945, iteration: 361842
loss: 1.017387866973877,grad_norm: 0.7675700211068761, iteration: 361843
loss: 1.00129234790802,grad_norm: 0.7156940838310467, iteration: 361844
loss: 1.0822501182556152,grad_norm: 0.9545761040717768, iteration: 361845
loss: 1.1504411697387695,grad_norm: 0.9999998703111638, iteration: 361846
loss: 1.0018566846847534,grad_norm: 0.8744892165191677, iteration: 361847
loss: 0.9867871403694153,grad_norm: 0.8017705903090379, iteration: 361848
loss: 0.9891703128814697,grad_norm: 0.9558226668616622, iteration: 361849
loss: 1.0297924280166626,grad_norm: 0.7297250029311307, iteration: 361850
loss: 1.0064259767532349,grad_norm: 0.7806711577613022, iteration: 361851
loss: 1.0400898456573486,grad_norm: 0.9999991645780496, iteration: 361852
loss: 1.0079290866851807,grad_norm: 0.6807811068524064, iteration: 361853
loss: 1.1233091354370117,grad_norm: 0.8138270151701564, iteration: 361854
loss: 1.0143842697143555,grad_norm: 0.7492468858304616, iteration: 361855
loss: 0.9883846640586853,grad_norm: 0.8517963679193081, iteration: 361856
loss: 1.006679654121399,grad_norm: 0.8981053988213495, iteration: 361857
loss: 0.9848031997680664,grad_norm: 0.8737014233015833, iteration: 361858
loss: 1.0126768350601196,grad_norm: 0.7723948663121897, iteration: 361859
loss: 1.0111123323440552,grad_norm: 0.8033336952571613, iteration: 361860
loss: 0.979666531085968,grad_norm: 0.9999997411532258, iteration: 361861
loss: 1.0454941987991333,grad_norm: 0.7664111281949259, iteration: 361862
loss: 1.0038821697235107,grad_norm: 0.7985306454630593, iteration: 361863
loss: 0.9798648953437805,grad_norm: 0.7547153869956746, iteration: 361864
loss: 1.0036699771881104,grad_norm: 0.8013569174959977, iteration: 361865
loss: 1.1136481761932373,grad_norm: 0.9655990523282165, iteration: 361866
loss: 0.9984854459762573,grad_norm: 0.7885405000189794, iteration: 361867
loss: 0.9938529133796692,grad_norm: 0.7683367321494216, iteration: 361868
loss: 0.9903483390808105,grad_norm: 0.7122547852968547, iteration: 361869
loss: 1.0688804388046265,grad_norm: 0.7818072883604799, iteration: 361870
loss: 1.0309361219406128,grad_norm: 0.9443945923348314, iteration: 361871
loss: 1.00846266746521,grad_norm: 0.7062918498987514, iteration: 361872
loss: 1.0086219310760498,grad_norm: 0.8403651129497676, iteration: 361873
loss: 1.026978850364685,grad_norm: 0.7977362027153404, iteration: 361874
loss: 1.0273104906082153,grad_norm: 0.7708908840258873, iteration: 361875
loss: 0.9746775031089783,grad_norm: 0.7793532126189103, iteration: 361876
loss: 1.0153220891952515,grad_norm: 0.7581137093402369, iteration: 361877
loss: 0.9816386699676514,grad_norm: 0.8168478059046574, iteration: 361878
loss: 1.0035465955734253,grad_norm: 0.9281158004659393, iteration: 361879
loss: 0.9897187352180481,grad_norm: 0.9576033794145105, iteration: 361880
loss: 1.001675009727478,grad_norm: 0.802724166884544, iteration: 361881
loss: 1.0014411211013794,grad_norm: 0.73095272096927, iteration: 361882
loss: 1.0079668760299683,grad_norm: 0.9263480337647692, iteration: 361883
loss: 0.9971145987510681,grad_norm: 0.8435799897039709, iteration: 361884
loss: 1.034279227256775,grad_norm: 0.8005346392997087, iteration: 361885
loss: 1.0459246635437012,grad_norm: 0.9999991325541697, iteration: 361886
loss: 0.9900833368301392,grad_norm: 0.8057702413785438, iteration: 361887
loss: 0.9949227571487427,grad_norm: 0.8923491971130685, iteration: 361888
loss: 0.9879600405693054,grad_norm: 0.8984662282018365, iteration: 361889
loss: 0.9805600643157959,grad_norm: 0.7929543901755166, iteration: 361890
loss: 0.998794436454773,grad_norm: 0.7304045430707317, iteration: 361891
loss: 0.9679072499275208,grad_norm: 0.9999990871744304, iteration: 361892
loss: 0.996922492980957,grad_norm: 0.9066232261576445, iteration: 361893
loss: 0.9988203644752502,grad_norm: 0.9414648335506742, iteration: 361894
loss: 1.031063437461853,grad_norm: 0.9953434787908314, iteration: 361895
loss: 0.9680618047714233,grad_norm: 0.9039058365628487, iteration: 361896
loss: 0.986515462398529,grad_norm: 0.9343374161391457, iteration: 361897
loss: 1.0141390562057495,grad_norm: 0.999999178189062, iteration: 361898
loss: 1.0115760564804077,grad_norm: 0.8039031864728311, iteration: 361899
loss: 0.9887424111366272,grad_norm: 0.8040892267888862, iteration: 361900
loss: 0.99554842710495,grad_norm: 0.952393087617638, iteration: 361901
loss: 1.020668864250183,grad_norm: 0.7801042890175246, iteration: 361902
loss: 1.0005563497543335,grad_norm: 0.8644150294694097, iteration: 361903
loss: 1.0251193046569824,grad_norm: 0.7306600431378977, iteration: 361904
loss: 0.995688259601593,grad_norm: 0.9163627768924076, iteration: 361905
loss: 1.0015560388565063,grad_norm: 0.7253282930734127, iteration: 361906
loss: 0.9996833205223083,grad_norm: 0.702689235725206, iteration: 361907
loss: 1.0125699043273926,grad_norm: 0.9164156654513618, iteration: 361908
loss: 0.9696031212806702,grad_norm: 0.8095757831287466, iteration: 361909
loss: 1.0001707077026367,grad_norm: 0.7899328651385215, iteration: 361910
loss: 0.9894629716873169,grad_norm: 0.7885482042685311, iteration: 361911
loss: 1.0253269672393799,grad_norm: 0.7835653746344199, iteration: 361912
loss: 0.9624064564704895,grad_norm: 0.7254262034136681, iteration: 361913
loss: 0.9673446416854858,grad_norm: 0.895657035857264, iteration: 361914
loss: 1.02555251121521,grad_norm: 0.8214817988232267, iteration: 361915
loss: 1.0371261835098267,grad_norm: 0.8560214653493137, iteration: 361916
loss: 0.9954556226730347,grad_norm: 0.780326782513138, iteration: 361917
loss: 1.0304062366485596,grad_norm: 0.9999991462148907, iteration: 361918
loss: 1.0012643337249756,grad_norm: 0.783695883750575, iteration: 361919
loss: 1.0294468402862549,grad_norm: 0.7822353442580112, iteration: 361920
loss: 0.9997974634170532,grad_norm: 0.8072691542213051, iteration: 361921
loss: 1.0983186960220337,grad_norm: 0.9611468499055482, iteration: 361922
loss: 1.0043362379074097,grad_norm: 0.774518877093736, iteration: 361923
loss: 1.0413844585418701,grad_norm: 0.9311886482990571, iteration: 361924
loss: 0.9766750931739807,grad_norm: 0.8289364189013781, iteration: 361925
loss: 0.9965426921844482,grad_norm: 0.8235158767003266, iteration: 361926
loss: 1.0499658584594727,grad_norm: 0.7677289730888452, iteration: 361927
loss: 0.9964237213134766,grad_norm: 0.748873688179752, iteration: 361928
loss: 1.0227423906326294,grad_norm: 0.645141816099167, iteration: 361929
loss: 1.0028899908065796,grad_norm: 0.8812354933423275, iteration: 361930
loss: 1.0238583087921143,grad_norm: 0.9999992205320405, iteration: 361931
loss: 0.9822803139686584,grad_norm: 0.9214960285354683, iteration: 361932
loss: 0.9993191361427307,grad_norm: 0.8332951445475293, iteration: 361933
loss: 1.013150691986084,grad_norm: 0.8900336891626092, iteration: 361934
loss: 0.9987000226974487,grad_norm: 0.8388619800099654, iteration: 361935
loss: 0.9825842976570129,grad_norm: 0.999999129352906, iteration: 361936
loss: 1.0101096630096436,grad_norm: 0.6762814619853241, iteration: 361937
loss: 1.0136280059814453,grad_norm: 0.9999992507480809, iteration: 361938
loss: 1.023098111152649,grad_norm: 0.7925850454339483, iteration: 361939
loss: 1.0508794784545898,grad_norm: 0.9999990783624807, iteration: 361940
loss: 0.9983261823654175,grad_norm: 0.999999037172218, iteration: 361941
loss: 1.0150641202926636,grad_norm: 0.9613151687648103, iteration: 361942
loss: 0.9947261810302734,grad_norm: 0.7398304751836079, iteration: 361943
loss: 0.9771640300750732,grad_norm: 0.7065095196911798, iteration: 361944
loss: 0.9909896850585938,grad_norm: 0.6949919165565761, iteration: 361945
loss: 1.023736596107483,grad_norm: 0.9999994022452867, iteration: 361946
loss: 0.9845361113548279,grad_norm: 0.7543334066074373, iteration: 361947
loss: 1.0233947038650513,grad_norm: 0.7964975061519386, iteration: 361948
loss: 0.9375901222229004,grad_norm: 0.9033882068493431, iteration: 361949
loss: 1.0122233629226685,grad_norm: 0.8042428196947956, iteration: 361950
loss: 1.0142111778259277,grad_norm: 0.9915131399288345, iteration: 361951
loss: 0.9819701910018921,grad_norm: 0.7944626141756476, iteration: 361952
loss: 1.0107700824737549,grad_norm: 0.8713857102306418, iteration: 361953
loss: 0.9958783984184265,grad_norm: 0.7210577357050914, iteration: 361954
loss: 0.9530225992202759,grad_norm: 0.7177599249557425, iteration: 361955
loss: 0.9964226484298706,grad_norm: 0.9999998945107751, iteration: 361956
loss: 1.0225871801376343,grad_norm: 0.7495442242461252, iteration: 361957
loss: 1.0231964588165283,grad_norm: 0.8329450600033373, iteration: 361958
loss: 0.9848992824554443,grad_norm: 0.9999993453275607, iteration: 361959
loss: 1.0084478855133057,grad_norm: 0.6744950334622181, iteration: 361960
loss: 0.9631439447402954,grad_norm: 0.8540517934800353, iteration: 361961
loss: 1.0583535432815552,grad_norm: 0.8567674290338125, iteration: 361962
loss: 1.0252118110656738,grad_norm: 0.604924359200351, iteration: 361963
loss: 0.9914987683296204,grad_norm: 0.9617337182852685, iteration: 361964
loss: 1.0382661819458008,grad_norm: 0.9999997306337678, iteration: 361965
loss: 0.9833455681800842,grad_norm: 0.7816534712450353, iteration: 361966
loss: 1.0104748010635376,grad_norm: 0.8149511856301318, iteration: 361967
loss: 0.9780218601226807,grad_norm: 0.8658998512182752, iteration: 361968
loss: 1.027738094329834,grad_norm: 0.7999136739630448, iteration: 361969
loss: 0.9794825315475464,grad_norm: 0.999999375500336, iteration: 361970
loss: 0.987629234790802,grad_norm: 0.8629896210242847, iteration: 361971
loss: 0.9827989339828491,grad_norm: 0.8479209183348108, iteration: 361972
loss: 0.9818915128707886,grad_norm: 0.8670725362175004, iteration: 361973
loss: 1.0925923585891724,grad_norm: 0.852223178409657, iteration: 361974
loss: 1.0010058879852295,grad_norm: 0.8151965468991795, iteration: 361975
loss: 1.0118235349655151,grad_norm: 0.6658084780205866, iteration: 361976
loss: 0.9860298037528992,grad_norm: 0.7708789308292672, iteration: 361977
loss: 1.023512601852417,grad_norm: 0.8001174216686902, iteration: 361978
loss: 1.049741506576538,grad_norm: 0.9115186626363605, iteration: 361979
loss: 0.9824939370155334,grad_norm: 0.737828711482837, iteration: 361980
loss: 1.0591769218444824,grad_norm: 0.9999993396882262, iteration: 361981
loss: 0.9959462881088257,grad_norm: 0.7576131322576225, iteration: 361982
loss: 0.942897379398346,grad_norm: 0.8490762687178517, iteration: 361983
loss: 0.985684335231781,grad_norm: 0.9106324825485761, iteration: 361984
loss: 0.9993054866790771,grad_norm: 0.8667188478622032, iteration: 361985
loss: 0.9918437600135803,grad_norm: 0.8651562255419321, iteration: 361986
loss: 0.9995654225349426,grad_norm: 0.8281820088082497, iteration: 361987
loss: 1.0008436441421509,grad_norm: 0.8940895218732395, iteration: 361988
loss: 0.9955002069473267,grad_norm: 0.6756026661544727, iteration: 361989
loss: 1.0161211490631104,grad_norm: 0.9030156835813589, iteration: 361990
loss: 0.9985700845718384,grad_norm: 0.7422797009360906, iteration: 361991
loss: 0.9874017834663391,grad_norm: 0.762825838083579, iteration: 361992
loss: 1.1270238161087036,grad_norm: 1.000000057699033, iteration: 361993
loss: 1.0055301189422607,grad_norm: 0.6604132952419703, iteration: 361994
loss: 1.0332822799682617,grad_norm: 0.7602091176971119, iteration: 361995
loss: 0.9765378832817078,grad_norm: 0.736002276543573, iteration: 361996
loss: 0.9800657629966736,grad_norm: 0.8537869433399833, iteration: 361997
loss: 1.0211405754089355,grad_norm: 0.9999990497163029, iteration: 361998
loss: 0.9923203587532043,grad_norm: 0.8093417255595032, iteration: 361999
loss: 1.0110833644866943,grad_norm: 0.8303449325328637, iteration: 362000
loss: 1.0268663167953491,grad_norm: 0.9999991533221783, iteration: 362001
loss: 0.9745985865592957,grad_norm: 0.7719672635507929, iteration: 362002
loss: 1.0342925786972046,grad_norm: 0.7687687616839751, iteration: 362003
loss: 1.0279651880264282,grad_norm: 0.969480542024104, iteration: 362004
loss: 0.9833784699440002,grad_norm: 0.7581957713152575, iteration: 362005
loss: 1.008634328842163,grad_norm: 0.6587084381222855, iteration: 362006
loss: 0.9740811586380005,grad_norm: 0.8987642955601505, iteration: 362007
loss: 0.976202666759491,grad_norm: 0.824086450169847, iteration: 362008
loss: 1.0190526247024536,grad_norm: 0.7160638702616414, iteration: 362009
loss: 1.0334571599960327,grad_norm: 0.7531064881012943, iteration: 362010
loss: 0.9808802604675293,grad_norm: 0.831510016784932, iteration: 362011
loss: 0.9667263031005859,grad_norm: 0.9999991969671993, iteration: 362012
loss: 0.9779695272445679,grad_norm: 0.8329591470508358, iteration: 362013
loss: 0.9920709729194641,grad_norm: 0.99999915483646, iteration: 362014
loss: 1.0241788625717163,grad_norm: 0.7420357541284015, iteration: 362015
loss: 0.9913393259048462,grad_norm: 0.9999993625290226, iteration: 362016
loss: 1.0115963220596313,grad_norm: 0.8657990542245069, iteration: 362017
loss: 0.9995904564857483,grad_norm: 0.840898772025294, iteration: 362018
loss: 0.9990615844726562,grad_norm: 0.9832603775498866, iteration: 362019
loss: 1.0136765241622925,grad_norm: 0.9999989942626419, iteration: 362020
loss: 0.995288610458374,grad_norm: 0.8532546860857091, iteration: 362021
loss: 1.0542733669281006,grad_norm: 0.8801639896188477, iteration: 362022
loss: 1.0295907258987427,grad_norm: 0.7985122818933265, iteration: 362023
loss: 1.0190260410308838,grad_norm: 0.8107261964197559, iteration: 362024
loss: 1.0343469381332397,grad_norm: 0.8509822004097959, iteration: 362025
loss: 1.0848686695098877,grad_norm: 0.9999994120648776, iteration: 362026
loss: 1.0351485013961792,grad_norm: 0.8864968641536464, iteration: 362027
loss: 1.0830140113830566,grad_norm: 0.9999999567328487, iteration: 362028
loss: 1.010312795639038,grad_norm: 0.734313997467911, iteration: 362029
loss: 1.0044552087783813,grad_norm: 0.7235978371165309, iteration: 362030
loss: 1.0079083442687988,grad_norm: 0.8247622065376895, iteration: 362031
loss: 0.9546777009963989,grad_norm: 0.8366529880765612, iteration: 362032
loss: 1.00468111038208,grad_norm: 0.8240095425994957, iteration: 362033
loss: 1.0124671459197998,grad_norm: 0.8557238374214305, iteration: 362034
loss: 0.9634566307067871,grad_norm: 0.9999990422945998, iteration: 362035
loss: 0.9949284791946411,grad_norm: 0.6695208949617298, iteration: 362036
loss: 0.9965237975120544,grad_norm: 0.8611049051154461, iteration: 362037
loss: 0.9908996820449829,grad_norm: 0.7835161993418644, iteration: 362038
loss: 0.9711562991142273,grad_norm: 0.8258471466430768, iteration: 362039
loss: 1.0071460008621216,grad_norm: 0.8948791266601289, iteration: 362040
loss: 1.152072548866272,grad_norm: 0.9999991384101927, iteration: 362041
loss: 1.0245379209518433,grad_norm: 0.9052155006503343, iteration: 362042
loss: 0.9968405365943909,grad_norm: 0.8996309892727566, iteration: 362043
loss: 1.006917953491211,grad_norm: 0.7722474682193726, iteration: 362044
loss: 0.974810004234314,grad_norm: 0.7676208487349259, iteration: 362045
loss: 1.1026668548583984,grad_norm: 0.9999996810524642, iteration: 362046
loss: 1.0403531789779663,grad_norm: 0.9999991677272678, iteration: 362047
loss: 1.2240347862243652,grad_norm: 0.9999997100219677, iteration: 362048
loss: 0.949010968208313,grad_norm: 0.7455999001089026, iteration: 362049
loss: 0.9603644013404846,grad_norm: 0.9999990879834029, iteration: 362050
loss: 1.001102328300476,grad_norm: 0.8544722096684084, iteration: 362051
loss: 0.9957075715065002,grad_norm: 0.7847512595524454, iteration: 362052
loss: 1.027130365371704,grad_norm: 0.9591733112203878, iteration: 362053
loss: 1.0225646495819092,grad_norm: 0.7821902055700911, iteration: 362054
loss: 0.971918523311615,grad_norm: 0.8918449803551185, iteration: 362055
loss: 1.0291924476623535,grad_norm: 0.9198578129574877, iteration: 362056
loss: 0.9795186519622803,grad_norm: 0.8158505829955611, iteration: 362057
loss: 1.01796555519104,grad_norm: 0.8551442190028513, iteration: 362058
loss: 0.9795565605163574,grad_norm: 0.8057412371326689, iteration: 362059
loss: 1.015864610671997,grad_norm: 0.9999995065826339, iteration: 362060
loss: 1.0362200736999512,grad_norm: 0.7787858120416513, iteration: 362061
loss: 0.9870568513870239,grad_norm: 0.7006653825907102, iteration: 362062
loss: 1.037071943283081,grad_norm: 0.9428527355185764, iteration: 362063
loss: 1.0554581880569458,grad_norm: 0.9999990484678516, iteration: 362064
loss: 1.0360348224639893,grad_norm: 0.8688054685223279, iteration: 362065
loss: 0.9754826426506042,grad_norm: 0.9133296840830739, iteration: 362066
loss: 1.0253753662109375,grad_norm: 0.9322901376746008, iteration: 362067
loss: 1.3686814308166504,grad_norm: 0.9999999291925922, iteration: 362068
loss: 0.9829812049865723,grad_norm: 0.893781013561558, iteration: 362069
loss: 1.0256829261779785,grad_norm: 0.9999995358277062, iteration: 362070
loss: 1.0710514783859253,grad_norm: 0.876891790177115, iteration: 362071
loss: 1.0050318241119385,grad_norm: 0.7805840472297837, iteration: 362072
loss: 1.0059822797775269,grad_norm: 0.9292906972324659, iteration: 362073
loss: 0.9977297186851501,grad_norm: 0.7558846977615239, iteration: 362074
loss: 1.0655345916748047,grad_norm: 0.9999990839533142, iteration: 362075
loss: 1.1192773580551147,grad_norm: 0.9999999691641558, iteration: 362076
loss: 1.0964542627334595,grad_norm: 0.8533387041881715, iteration: 362077
loss: 0.9912412166595459,grad_norm: 0.8964467187967371, iteration: 362078
loss: 1.291562557220459,grad_norm: 0.9999999625363363, iteration: 362079
loss: 0.9926226139068604,grad_norm: 0.8997903084219337, iteration: 362080
loss: 1.015660047531128,grad_norm: 0.8123624275649487, iteration: 362081
loss: 1.0080257654190063,grad_norm: 0.9999990709717422, iteration: 362082
loss: 1.0792242288589478,grad_norm: 0.9932157610140646, iteration: 362083
loss: 1.0603338479995728,grad_norm: 0.9999994929138004, iteration: 362084
loss: 1.0206143856048584,grad_norm: 0.9530250771936555, iteration: 362085
loss: 1.0155538320541382,grad_norm: 0.876329593764344, iteration: 362086
loss: 1.0292052030563354,grad_norm: 0.9089583221300495, iteration: 362087
loss: 1.013774037361145,grad_norm: 0.9139078229084551, iteration: 362088
loss: 1.0003008842468262,grad_norm: 0.8086572621265974, iteration: 362089
loss: 0.9865550398826599,grad_norm: 0.8014798701181369, iteration: 362090
loss: 1.012843370437622,grad_norm: 0.7671109000729475, iteration: 362091
loss: 0.9915666580200195,grad_norm: 0.8405665969426933, iteration: 362092
loss: 0.9995242953300476,grad_norm: 0.7729045540706415, iteration: 362093
loss: 0.9920954704284668,grad_norm: 0.7975969008618267, iteration: 362094
loss: 0.994682252407074,grad_norm: 0.9076808973221011, iteration: 362095
loss: 1.018311858177185,grad_norm: 0.8640961445029451, iteration: 362096
loss: 1.0072933435440063,grad_norm: 0.9169620720447198, iteration: 362097
loss: 0.9700692296028137,grad_norm: 0.7374540898804897, iteration: 362098
loss: 1.0007750988006592,grad_norm: 0.8798626271670934, iteration: 362099
loss: 0.9791933298110962,grad_norm: 0.8817139601562006, iteration: 362100
loss: 1.033556580543518,grad_norm: 0.7937260700926895, iteration: 362101
loss: 0.9930015206336975,grad_norm: 0.7704174262432923, iteration: 362102
loss: 0.9977540969848633,grad_norm: 0.8493523438417616, iteration: 362103
loss: 1.019349455833435,grad_norm: 0.8115547888864881, iteration: 362104
loss: 1.0344297885894775,grad_norm: 0.7856728375343056, iteration: 362105
loss: 1.043119192123413,grad_norm: 0.9999994524077632, iteration: 362106
loss: 1.0857255458831787,grad_norm: 0.8596381797784007, iteration: 362107
loss: 1.0435290336608887,grad_norm: 0.7524299659797247, iteration: 362108
loss: 0.9592039585113525,grad_norm: 0.7993062048148555, iteration: 362109
loss: 1.0131436586380005,grad_norm: 0.7573028682503079, iteration: 362110
loss: 0.9607964158058167,grad_norm: 0.8331580066593952, iteration: 362111
loss: 1.0489563941955566,grad_norm: 0.7283125042236892, iteration: 362112
loss: 1.009579062461853,grad_norm: 0.9506991751900817, iteration: 362113
loss: 1.0195070505142212,grad_norm: 0.8264031435181868, iteration: 362114
loss: 0.9813535809516907,grad_norm: 0.8687191436943936, iteration: 362115
loss: 0.9739567637443542,grad_norm: 0.7981650877574741, iteration: 362116
loss: 1.0063114166259766,grad_norm: 0.6722502859988405, iteration: 362117
loss: 1.0879969596862793,grad_norm: 0.9999995294960026, iteration: 362118
loss: 1.0066089630126953,grad_norm: 0.9069424829631053, iteration: 362119
loss: 1.0042741298675537,grad_norm: 0.8133109232222362, iteration: 362120
loss: 1.0244572162628174,grad_norm: 0.9999991846691966, iteration: 362121
loss: 1.000440239906311,grad_norm: 0.7417881931412443, iteration: 362122
loss: 0.9985975623130798,grad_norm: 0.7625597626871293, iteration: 362123
loss: 1.0070148706436157,grad_norm: 0.9785780430739667, iteration: 362124
loss: 1.0682610273361206,grad_norm: 0.864430135526176, iteration: 362125
loss: 1.0062642097473145,grad_norm: 0.9999991219744183, iteration: 362126
loss: 0.9860078692436218,grad_norm: 0.8949263334272416, iteration: 362127
loss: 0.9982430934906006,grad_norm: 0.8488325265793647, iteration: 362128
loss: 1.0137461423873901,grad_norm: 0.7556496314827945, iteration: 362129
loss: 1.0238208770751953,grad_norm: 0.9999997683872013, iteration: 362130
loss: 1.0359607934951782,grad_norm: 0.8423487659299598, iteration: 362131
loss: 1.0362863540649414,grad_norm: 0.9054000156007647, iteration: 362132
loss: 1.0056898593902588,grad_norm: 0.8282353913239766, iteration: 362133
loss: 1.0006972551345825,grad_norm: 0.9999990312209405, iteration: 362134
loss: 0.9841771721839905,grad_norm: 0.8762801270444058, iteration: 362135
loss: 0.9976871013641357,grad_norm: 0.8306773978846218, iteration: 362136
loss: 1.0324313640594482,grad_norm: 0.999999670815275, iteration: 362137
loss: 1.0003485679626465,grad_norm: 0.8065818780776929, iteration: 362138
loss: 1.0157727003097534,grad_norm: 0.7782093108376764, iteration: 362139
loss: 0.9976192712783813,grad_norm: 0.866458771159344, iteration: 362140
loss: 1.3454210758209229,grad_norm: 0.9999994005670242, iteration: 362141
loss: 1.0599143505096436,grad_norm: 0.9999991589059254, iteration: 362142
loss: 1.048060417175293,grad_norm: 0.865464021075856, iteration: 362143
loss: 0.9907588362693787,grad_norm: 0.8827365526690216, iteration: 362144
loss: 0.9942886829376221,grad_norm: 0.8368912071108866, iteration: 362145
loss: 1.203261137008667,grad_norm: 0.9999998709665066, iteration: 362146
loss: 1.0060135126113892,grad_norm: 0.8748574395670191, iteration: 362147
loss: 1.0005799531936646,grad_norm: 0.9884297009243992, iteration: 362148
loss: 0.9991187453269958,grad_norm: 0.8076692962170036, iteration: 362149
loss: 0.9550940990447998,grad_norm: 0.8346742382315543, iteration: 362150
loss: 1.0056129693984985,grad_norm: 0.7412002773772309, iteration: 362151
loss: 1.0311392545700073,grad_norm: 0.8500460385519462, iteration: 362152
loss: 1.057564377784729,grad_norm: 0.9999993930543324, iteration: 362153
loss: 1.013479471206665,grad_norm: 0.8221338200797829, iteration: 362154
loss: 1.0139739513397217,grad_norm: 0.7788390217912178, iteration: 362155
loss: 0.948696494102478,grad_norm: 0.7433139908475047, iteration: 362156
loss: 1.017068862915039,grad_norm: 0.7743337235490483, iteration: 362157
loss: 1.1871706247329712,grad_norm: 0.9999998318185913, iteration: 362158
loss: 1.1223244667053223,grad_norm: 0.9999991870661735, iteration: 362159
loss: 1.086458683013916,grad_norm: 0.9999992205409673, iteration: 362160
loss: 0.9967194199562073,grad_norm: 0.9999991097989203, iteration: 362161
loss: 0.9852777719497681,grad_norm: 0.8267703599460068, iteration: 362162
loss: 1.2080950736999512,grad_norm: 0.9999995977914035, iteration: 362163
loss: 1.2517166137695312,grad_norm: 0.9999999583382113, iteration: 362164
loss: 1.0069315433502197,grad_norm: 0.8118239017305311, iteration: 362165
loss: 1.0318045616149902,grad_norm: 0.99999913242486, iteration: 362166
loss: 0.981905460357666,grad_norm: 0.7977667548340335, iteration: 362167
loss: 1.0269227027893066,grad_norm: 0.8891937515960676, iteration: 362168
loss: 1.0452828407287598,grad_norm: 0.9999999399722399, iteration: 362169
loss: 1.017770767211914,grad_norm: 0.7894636418254936, iteration: 362170
loss: 0.9874047636985779,grad_norm: 0.7851146276782927, iteration: 362171
loss: 0.9727122783660889,grad_norm: 0.9999990635391672, iteration: 362172
loss: 1.0069386959075928,grad_norm: 0.7369855792419857, iteration: 362173
loss: 0.963708221912384,grad_norm: 0.7159973942876638, iteration: 362174
loss: 1.0120748281478882,grad_norm: 0.8064410642852269, iteration: 362175
loss: 1.0408920049667358,grad_norm: 0.9467282355508131, iteration: 362176
loss: 0.9884625673294067,grad_norm: 0.7542599188655256, iteration: 362177
loss: 0.9733132123947144,grad_norm: 0.677047243578526, iteration: 362178
loss: 1.0149294137954712,grad_norm: 0.7020315985645436, iteration: 362179
loss: 1.0118573904037476,grad_norm: 0.9999998273194154, iteration: 362180
loss: 1.0022560358047485,grad_norm: 0.6449694750596942, iteration: 362181
loss: 1.013941764831543,grad_norm: 0.9999993426416451, iteration: 362182
loss: 1.0278364419937134,grad_norm: 0.7814821693772013, iteration: 362183
loss: 1.0352134704589844,grad_norm: 0.8518413135497083, iteration: 362184
loss: 1.0181044340133667,grad_norm: 0.8053893192409117, iteration: 362185
loss: 1.0728063583374023,grad_norm: 0.9999993070120277, iteration: 362186
loss: 1.0250788927078247,grad_norm: 0.9080372661823957, iteration: 362187
loss: 0.982646107673645,grad_norm: 0.8626622298086288, iteration: 362188
loss: 1.101396083831787,grad_norm: 1.0000000426735738, iteration: 362189
loss: 1.008347988128662,grad_norm: 0.9664116807939915, iteration: 362190
loss: 0.9947484135627747,grad_norm: 0.737875426641629, iteration: 362191
loss: 0.9797598719596863,grad_norm: 0.6999023276513285, iteration: 362192
loss: 0.9425488114356995,grad_norm: 0.8446355347100648, iteration: 362193
loss: 1.0024396181106567,grad_norm: 0.8691811450127072, iteration: 362194
loss: 0.9752902388572693,grad_norm: 0.9999991146092475, iteration: 362195
loss: 1.0223345756530762,grad_norm: 0.9999990491166827, iteration: 362196
loss: 0.9762701392173767,grad_norm: 0.9141577103552109, iteration: 362197
loss: 0.9856367707252502,grad_norm: 0.8623645437637528, iteration: 362198
loss: 0.994269073009491,grad_norm: 0.754214022312413, iteration: 362199
loss: 1.0087840557098389,grad_norm: 0.8604996151880023, iteration: 362200
loss: 1.0116666555404663,grad_norm: 0.7315069612190186, iteration: 362201
loss: 0.9906159043312073,grad_norm: 0.753505768792295, iteration: 362202
loss: 0.990689218044281,grad_norm: 0.7911858201819433, iteration: 362203
loss: 1.0272027254104614,grad_norm: 0.9346150399294908, iteration: 362204
loss: 1.0024255514144897,grad_norm: 0.9999997856475055, iteration: 362205
loss: 1.0237175226211548,grad_norm: 0.6982044253278306, iteration: 362206
loss: 1.01018226146698,grad_norm: 0.9999995539511081, iteration: 362207
loss: 1.0028774738311768,grad_norm: 0.8194375127125372, iteration: 362208
loss: 0.9442464709281921,grad_norm: 0.8056398276311407, iteration: 362209
loss: 1.006455659866333,grad_norm: 0.999999209717553, iteration: 362210
loss: 1.1102361679077148,grad_norm: 0.9999991639065926, iteration: 362211
loss: 0.9676188826560974,grad_norm: 0.7840572634204831, iteration: 362212
loss: 1.006269097328186,grad_norm: 0.9423177495727986, iteration: 362213
loss: 0.9744529128074646,grad_norm: 0.7511076917533523, iteration: 362214
loss: 0.9897643327713013,grad_norm: 0.8174585751823007, iteration: 362215
loss: 0.9710957407951355,grad_norm: 0.8591824361685506, iteration: 362216
loss: 1.012191891670227,grad_norm: 0.8539161804921906, iteration: 362217
loss: 0.966540515422821,grad_norm: 0.7644266858120624, iteration: 362218
loss: 1.0088670253753662,grad_norm: 0.7749683549316961, iteration: 362219
loss: 1.003557801246643,grad_norm: 0.9999999501697012, iteration: 362220
loss: 1.0051718950271606,grad_norm: 0.6935370745197484, iteration: 362221
loss: 1.1039204597473145,grad_norm: 0.9999999491306577, iteration: 362222
loss: 1.0177909135818481,grad_norm: 0.8126611785378187, iteration: 362223
loss: 1.00150728225708,grad_norm: 0.8461090864466817, iteration: 362224
loss: 1.0285499095916748,grad_norm: 0.6636189217176766, iteration: 362225
loss: 1.071999192237854,grad_norm: 0.9999993117196866, iteration: 362226
loss: 1.008638620376587,grad_norm: 0.9006005535970462, iteration: 362227
loss: 0.9649913907051086,grad_norm: 0.6690908286370998, iteration: 362228
loss: 0.9924175143241882,grad_norm: 0.7310850741938749, iteration: 362229
loss: 0.9928504824638367,grad_norm: 0.9281882661986817, iteration: 362230
loss: 1.0323593616485596,grad_norm: 0.832687524290058, iteration: 362231
loss: 1.0222342014312744,grad_norm: 0.6847993312776658, iteration: 362232
loss: 0.9921913743019104,grad_norm: 0.7811889962491371, iteration: 362233
loss: 0.9879661202430725,grad_norm: 0.6842835112530328, iteration: 362234
loss: 0.9888899922370911,grad_norm: 0.9999991345899654, iteration: 362235
loss: 0.9959471225738525,grad_norm: 0.6876616434334181, iteration: 362236
loss: 0.9950500130653381,grad_norm: 0.8297140197401841, iteration: 362237
loss: 1.0044286251068115,grad_norm: 0.8194188232289445, iteration: 362238
loss: 1.0004371404647827,grad_norm: 0.742680981229101, iteration: 362239
loss: 0.9826154112815857,grad_norm: 0.8554329778263884, iteration: 362240
loss: 1.033380150794983,grad_norm: 0.8091416274708869, iteration: 362241
loss: 0.9970707893371582,grad_norm: 0.9498273746245571, iteration: 362242
loss: 0.9735932350158691,grad_norm: 0.7929741897235477, iteration: 362243
loss: 0.9933643341064453,grad_norm: 0.7819889093568703, iteration: 362244
loss: 1.038684606552124,grad_norm: 0.9999991426260455, iteration: 362245
loss: 1.0508227348327637,grad_norm: 0.9999991927590688, iteration: 362246
loss: 1.0217686891555786,grad_norm: 0.750434103031238, iteration: 362247
loss: 1.039716124534607,grad_norm: 0.8860666546481191, iteration: 362248
loss: 1.009222388267517,grad_norm: 0.8848700043675796, iteration: 362249
loss: 1.0065608024597168,grad_norm: 0.9999995317104976, iteration: 362250
loss: 0.9625644683837891,grad_norm: 0.8489862906993, iteration: 362251
loss: 0.9859597682952881,grad_norm: 0.727803122649487, iteration: 362252
loss: 0.9724416732788086,grad_norm: 0.7092760764158738, iteration: 362253
loss: 1.050864815711975,grad_norm: 0.9908565089986103, iteration: 362254
loss: 1.0799461603164673,grad_norm: 0.9999992028897909, iteration: 362255
loss: 1.0330547094345093,grad_norm: 0.9999993440189263, iteration: 362256
loss: 0.9766674041748047,grad_norm: 0.7463066372114757, iteration: 362257
loss: 1.0081140995025635,grad_norm: 0.8048803541257328, iteration: 362258
loss: 0.9931650757789612,grad_norm: 0.7129921452931647, iteration: 362259
loss: 1.1176958084106445,grad_norm: 0.999999763863697, iteration: 362260
loss: 0.9816046357154846,grad_norm: 0.7687506379070613, iteration: 362261
loss: 1.004563570022583,grad_norm: 0.9528622589009867, iteration: 362262
loss: 0.9989941716194153,grad_norm: 0.8938039457330801, iteration: 362263
loss: 0.9944210648536682,grad_norm: 0.9353874521854175, iteration: 362264
loss: 0.9887260794639587,grad_norm: 0.6738198588508164, iteration: 362265
loss: 0.9920879602432251,grad_norm: 0.8708498961525959, iteration: 362266
loss: 0.9620082974433899,grad_norm: 0.9999992111651241, iteration: 362267
loss: 1.0164474248886108,grad_norm: 0.9701055311746485, iteration: 362268
loss: 1.0299376249313354,grad_norm: 0.9999992635587832, iteration: 362269
loss: 0.968970775604248,grad_norm: 0.8937539144995602, iteration: 362270
loss: 0.9860483407974243,grad_norm: 0.7607553048336329, iteration: 362271
loss: 1.0393227338790894,grad_norm: 0.7447705043765989, iteration: 362272
loss: 0.9912431240081787,grad_norm: 0.8627091793008803, iteration: 362273
loss: 1.0169978141784668,grad_norm: 0.7303345846132377, iteration: 362274
loss: 1.005359172821045,grad_norm: 0.9999998222998219, iteration: 362275
loss: 1.0342025756835938,grad_norm: 0.8000556856378074, iteration: 362276
loss: 1.0430808067321777,grad_norm: 0.7032372979061631, iteration: 362277
loss: 0.9984796643257141,grad_norm: 0.9999991963703444, iteration: 362278
loss: 1.011622428894043,grad_norm: 0.8005115421355837, iteration: 362279
loss: 1.006280779838562,grad_norm: 0.7345651956774373, iteration: 362280
loss: 0.9748585820198059,grad_norm: 0.8912222181723214, iteration: 362281
loss: 0.9954606890678406,grad_norm: 0.868307806062716, iteration: 362282
loss: 0.9979656338691711,grad_norm: 0.9273160233674599, iteration: 362283
loss: 0.9745970964431763,grad_norm: 0.7567685173354464, iteration: 362284
loss: 1.1229381561279297,grad_norm: 0.9999995916790823, iteration: 362285
loss: 1.008647084236145,grad_norm: 0.6680939005155874, iteration: 362286
loss: 1.0392844676971436,grad_norm: 0.7698532618291366, iteration: 362287
loss: 1.0126646757125854,grad_norm: 0.7604699185969042, iteration: 362288
loss: 1.0182174444198608,grad_norm: 0.7058351163604156, iteration: 362289
loss: 1.0538369417190552,grad_norm: 0.7959671123048305, iteration: 362290
loss: 1.0113800764083862,grad_norm: 0.7416410926894045, iteration: 362291
loss: 0.9995645880699158,grad_norm: 0.8595570893549205, iteration: 362292
loss: 0.9802451729774475,grad_norm: 0.999999264982252, iteration: 362293
loss: 1.0439704656600952,grad_norm: 0.9999991689277228, iteration: 362294
loss: 0.9830946922302246,grad_norm: 0.6958251825772529, iteration: 362295
loss: 1.0004535913467407,grad_norm: 0.9424405566201886, iteration: 362296
loss: 0.9676300287246704,grad_norm: 0.7940795244044679, iteration: 362297
loss: 1.0405476093292236,grad_norm: 0.9323792736631296, iteration: 362298
loss: 0.995281457901001,grad_norm: 0.8140643675876735, iteration: 362299
loss: 1.0130534172058105,grad_norm: 0.899889233129451, iteration: 362300
loss: 1.0159929990768433,grad_norm: 0.6891804399765113, iteration: 362301
loss: 0.9997764825820923,grad_norm: 0.7488803115545519, iteration: 362302
loss: 0.9826325178146362,grad_norm: 0.7694791975636962, iteration: 362303
loss: 1.0144964456558228,grad_norm: 0.8152061548852014, iteration: 362304
loss: 1.0220102071762085,grad_norm: 0.9999994736207509, iteration: 362305
loss: 1.1276637315750122,grad_norm: 0.9999991827417456, iteration: 362306
loss: 1.0048410892486572,grad_norm: 0.8120261738182097, iteration: 362307
loss: 0.9905195832252502,grad_norm: 0.7865613120739768, iteration: 362308
loss: 1.0564770698547363,grad_norm: 0.8337672230645929, iteration: 362309
loss: 0.9650404453277588,grad_norm: 0.700148864445025, iteration: 362310
loss: 1.0271471738815308,grad_norm: 0.8114299709862424, iteration: 362311
loss: 1.0294944047927856,grad_norm: 0.8085638531412965, iteration: 362312
loss: 0.9849409461021423,grad_norm: 0.7889329781446692, iteration: 362313
loss: 0.9935823678970337,grad_norm: 0.9999992643973017, iteration: 362314
loss: 1.0177218914031982,grad_norm: 0.880196513005558, iteration: 362315
loss: 1.0398627519607544,grad_norm: 0.8392739723747703, iteration: 362316
loss: 0.9684555530548096,grad_norm: 0.8752297730146692, iteration: 362317
loss: 1.0242773294448853,grad_norm: 0.7323618956032251, iteration: 362318
loss: 1.0325813293457031,grad_norm: 0.8727773789672978, iteration: 362319
loss: 0.9959070682525635,grad_norm: 0.6857944079424191, iteration: 362320
loss: 1.0428396463394165,grad_norm: 0.8540062768222362, iteration: 362321
loss: 1.026320457458496,grad_norm: 0.8274813710078059, iteration: 362322
loss: 0.9997517466545105,grad_norm: 0.8347055589275183, iteration: 362323
loss: 1.0132813453674316,grad_norm: 0.7675202881816474, iteration: 362324
loss: 0.9935109615325928,grad_norm: 0.7325530264717076, iteration: 362325
loss: 0.9800338745117188,grad_norm: 0.9999995013970315, iteration: 362326
loss: 1.0105706453323364,grad_norm: 0.7353821236257947, iteration: 362327
loss: 0.9760491847991943,grad_norm: 0.7249540389040864, iteration: 362328
loss: 0.9793239235877991,grad_norm: 0.8332158489492348, iteration: 362329
loss: 1.0357215404510498,grad_norm: 0.7109791167735092, iteration: 362330
loss: 1.0093224048614502,grad_norm: 0.822130394618221, iteration: 362331
loss: 1.0843491554260254,grad_norm: 0.9999995904652731, iteration: 362332
loss: 1.0050115585327148,grad_norm: 0.8138474367799221, iteration: 362333
loss: 1.0578786134719849,grad_norm: 0.760871263978858, iteration: 362334
loss: 1.0662832260131836,grad_norm: 0.9097991323714242, iteration: 362335
loss: 1.0214248895645142,grad_norm: 0.7661107384789185, iteration: 362336
loss: 1.0029605627059937,grad_norm: 0.7916560251515337, iteration: 362337
loss: 1.014797329902649,grad_norm: 0.8141861319329934, iteration: 362338
loss: 1.0634937286376953,grad_norm: 0.7644026175277108, iteration: 362339
loss: 1.010218858718872,grad_norm: 0.7354685410161979, iteration: 362340
loss: 0.9687101244926453,grad_norm: 0.7148688125811367, iteration: 362341
loss: 1.0291862487792969,grad_norm: 0.9999992624628855, iteration: 362342
loss: 0.9721564054489136,grad_norm: 0.7980206936945169, iteration: 362343
loss: 0.9741188287734985,grad_norm: 0.8369665134107364, iteration: 362344
loss: 1.0125325918197632,grad_norm: 0.7612646171624937, iteration: 362345
loss: 0.9817336201667786,grad_norm: 0.9999991772459605, iteration: 362346
loss: 1.00863516330719,grad_norm: 0.999999708209694, iteration: 362347
loss: 1.0323915481567383,grad_norm: 0.8547652360876762, iteration: 362348
loss: 1.0201505422592163,grad_norm: 0.8947402808325583, iteration: 362349
loss: 1.0097482204437256,grad_norm: 0.8285455073955574, iteration: 362350
loss: 0.9956567287445068,grad_norm: 0.9999992189624607, iteration: 362351
loss: 0.9780321717262268,grad_norm: 0.8631722902773724, iteration: 362352
loss: 1.075046420097351,grad_norm: 0.8309268580446173, iteration: 362353
loss: 0.9730117321014404,grad_norm: 0.8478408203561175, iteration: 362354
loss: 1.0251978635787964,grad_norm: 0.8536683066710424, iteration: 362355
loss: 0.9994691610336304,grad_norm: 0.7651174711555494, iteration: 362356
loss: 0.9800912737846375,grad_norm: 0.9573543957699406, iteration: 362357
loss: 1.0021506547927856,grad_norm: 0.9999992488700339, iteration: 362358
loss: 1.024768590927124,grad_norm: 0.8478335333792273, iteration: 362359
loss: 0.9835452437400818,grad_norm: 0.8040477208189811, iteration: 362360
loss: 1.0327388048171997,grad_norm: 0.8703608205195044, iteration: 362361
loss: 0.9808292984962463,grad_norm: 0.8862752861550842, iteration: 362362
loss: 1.0131129026412964,grad_norm: 0.8602073143312438, iteration: 362363
loss: 0.955085039138794,grad_norm: 0.8007513678158761, iteration: 362364
loss: 0.9627289175987244,grad_norm: 0.740139106093654, iteration: 362365
loss: 0.9805536270141602,grad_norm: 0.9999993008361124, iteration: 362366
loss: 0.974415123462677,grad_norm: 0.9019122005414373, iteration: 362367
loss: 1.0008946657180786,grad_norm: 0.8524273399306691, iteration: 362368
loss: 1.023323655128479,grad_norm: 0.924791989636544, iteration: 362369
loss: 1.0161114931106567,grad_norm: 0.9999991329636517, iteration: 362370
loss: 1.0422817468643188,grad_norm: 0.7519763118542155, iteration: 362371
loss: 1.013022541999817,grad_norm: 0.8110476073511236, iteration: 362372
loss: 0.9986916184425354,grad_norm: 0.9999994390680621, iteration: 362373
loss: 1.0709335803985596,grad_norm: 0.9999990773162218, iteration: 362374
loss: 1.0135483741760254,grad_norm: 0.9119041549202892, iteration: 362375
loss: 1.0242047309875488,grad_norm: 0.7182293336123879, iteration: 362376
loss: 1.023797869682312,grad_norm: 0.9238216132544558, iteration: 362377
loss: 0.9993979334831238,grad_norm: 0.8358936694288924, iteration: 362378
loss: 0.9529918432235718,grad_norm: 0.8729415935319036, iteration: 362379
loss: 1.0021138191223145,grad_norm: 0.7635244604598105, iteration: 362380
loss: 1.0158114433288574,grad_norm: 0.7896467927890911, iteration: 362381
loss: 1.0260926485061646,grad_norm: 0.8021453029356876, iteration: 362382
loss: 0.9907405376434326,grad_norm: 0.844142248099663, iteration: 362383
loss: 1.0749669075012207,grad_norm: 0.8694950548447533, iteration: 362384
loss: 1.0068950653076172,grad_norm: 0.7872163883997549, iteration: 362385
loss: 1.0024617910385132,grad_norm: 0.8834743745791067, iteration: 362386
loss: 1.0104820728302002,grad_norm: 0.8375998313985522, iteration: 362387
loss: 1.0183144807815552,grad_norm: 0.6449158799066954, iteration: 362388
loss: 0.967415452003479,grad_norm: 0.7187588235345885, iteration: 362389
loss: 0.9471543431282043,grad_norm: 0.7760226790004923, iteration: 362390
loss: 0.9705916047096252,grad_norm: 0.9999991422354074, iteration: 362391
loss: 0.9689304232597351,grad_norm: 0.9999991123289192, iteration: 362392
loss: 1.0080595016479492,grad_norm: 0.8075985660527731, iteration: 362393
loss: 1.0260541439056396,grad_norm: 0.999999863019514, iteration: 362394
loss: 1.0260326862335205,grad_norm: 0.8000704566105572, iteration: 362395
loss: 1.0746601819992065,grad_norm: 0.9999997868873758, iteration: 362396
loss: 0.9987379312515259,grad_norm: 0.7914723684905757, iteration: 362397
loss: 0.9933229684829712,grad_norm: 0.9999993291876935, iteration: 362398
loss: 0.9893848299980164,grad_norm: 0.7657222754833558, iteration: 362399
loss: 0.983275294303894,grad_norm: 0.9844700502157445, iteration: 362400
loss: 1.0210812091827393,grad_norm: 0.7449962961362075, iteration: 362401
loss: 0.9977909326553345,grad_norm: 0.8185473505094092, iteration: 362402
loss: 0.997144341468811,grad_norm: 0.894283850740998, iteration: 362403
loss: 0.9664637446403503,grad_norm: 0.7529928336168515, iteration: 362404
loss: 1.0083386898040771,grad_norm: 0.6405798400001196, iteration: 362405
loss: 0.9979109168052673,grad_norm: 0.8110449430105342, iteration: 362406
loss: 1.1196486949920654,grad_norm: 0.9999992979141076, iteration: 362407
loss: 0.9739965796470642,grad_norm: 0.708311499059224, iteration: 362408
loss: 0.9763578772544861,grad_norm: 0.7453476158867717, iteration: 362409
loss: 1.0019415616989136,grad_norm: 0.8578464854302925, iteration: 362410
loss: 0.9904499650001526,grad_norm: 0.7798481031581246, iteration: 362411
loss: 1.0383888483047485,grad_norm: 0.9179020225972155, iteration: 362412
loss: 1.003340244293213,grad_norm: 0.6841643755363422, iteration: 362413
loss: 1.0018004179000854,grad_norm: 0.8693703828163495, iteration: 362414
loss: 0.9830681085586548,grad_norm: 0.7420868099879602, iteration: 362415
loss: 1.0025177001953125,grad_norm: 0.9999990438140683, iteration: 362416
loss: 0.9808701872825623,grad_norm: 0.999999102232365, iteration: 362417
loss: 1.0014996528625488,grad_norm: 0.7639807201250451, iteration: 362418
loss: 0.9947957396507263,grad_norm: 0.7559505526980852, iteration: 362419
loss: 0.9806110262870789,grad_norm: 0.7188781740772037, iteration: 362420
loss: 0.9753134250640869,grad_norm: 0.9999996974523683, iteration: 362421
loss: 0.9977454543113708,grad_norm: 0.8091154147719866, iteration: 362422
loss: 1.013303279876709,grad_norm: 0.7819012118039158, iteration: 362423
loss: 1.012986660003662,grad_norm: 0.9999990430047011, iteration: 362424
loss: 0.9706308841705322,grad_norm: 0.8800482976178327, iteration: 362425
loss: 1.0043963193893433,grad_norm: 0.8570702369516295, iteration: 362426
loss: 0.9966576099395752,grad_norm: 0.7515409419677698, iteration: 362427
loss: 1.0580071210861206,grad_norm: 0.8975768002673448, iteration: 362428
loss: 0.9855466485023499,grad_norm: 0.7056195286869944, iteration: 362429
loss: 0.9895544648170471,grad_norm: 0.8438930828877735, iteration: 362430
loss: 0.9764948487281799,grad_norm: 0.8077721060036911, iteration: 362431
loss: 0.9781180024147034,grad_norm: 0.8718222548694101, iteration: 362432
loss: 1.0124547481536865,grad_norm: 0.8077951487119589, iteration: 362433
loss: 1.013858437538147,grad_norm: 0.8460187837858282, iteration: 362434
loss: 1.017979383468628,grad_norm: 0.8619486395141265, iteration: 362435
loss: 0.9945155382156372,grad_norm: 0.7804909054920846, iteration: 362436
loss: 0.9893555641174316,grad_norm: 0.7437330021223773, iteration: 362437
loss: 1.0391844511032104,grad_norm: 0.8421946726528052, iteration: 362438
loss: 1.017520546913147,grad_norm: 0.707042824872358, iteration: 362439
loss: 1.0137065649032593,grad_norm: 0.9725341528003857, iteration: 362440
loss: 0.9832039475440979,grad_norm: 0.7402451299664253, iteration: 362441
loss: 0.9742251038551331,grad_norm: 0.776931080645005, iteration: 362442
loss: 1.0020371675491333,grad_norm: 0.7538745638909058, iteration: 362443
loss: 1.0148587226867676,grad_norm: 0.7107217354972072, iteration: 362444
loss: 0.9779386520385742,grad_norm: 0.8321085094677958, iteration: 362445
loss: 0.9830101728439331,grad_norm: 0.999999099483324, iteration: 362446
loss: 1.0207765102386475,grad_norm: 0.9186714242508204, iteration: 362447
loss: 0.971864640712738,grad_norm: 0.9999990197053792, iteration: 362448
loss: 1.0195436477661133,grad_norm: 0.7277261403283097, iteration: 362449
loss: 1.0216057300567627,grad_norm: 0.8578346740717783, iteration: 362450
loss: 0.9751856327056885,grad_norm: 0.7242749818393118, iteration: 362451
loss: 0.9885119199752808,grad_norm: 0.8088196196656747, iteration: 362452
loss: 0.9845839142799377,grad_norm: 0.8904254361201585, iteration: 362453
loss: 0.991530179977417,grad_norm: 0.7489913301852152, iteration: 362454
loss: 1.0033820867538452,grad_norm: 0.8870653161850703, iteration: 362455
loss: 1.0069960355758667,grad_norm: 0.823605837914288, iteration: 362456
loss: 0.9946224689483643,grad_norm: 0.8436277818144425, iteration: 362457
loss: 1.0083059072494507,grad_norm: 0.8289483384284648, iteration: 362458
loss: 0.9853410720825195,grad_norm: 0.8969969569085685, iteration: 362459
loss: 1.018486499786377,grad_norm: 0.8130035747894618, iteration: 362460
loss: 1.0189530849456787,grad_norm: 0.8055883903129696, iteration: 362461
loss: 1.0547547340393066,grad_norm: 0.9014192012872139, iteration: 362462
loss: 1.0296190977096558,grad_norm: 0.8681208281584907, iteration: 362463
loss: 1.0216004848480225,grad_norm: 0.8077320043628152, iteration: 362464
loss: 0.9833497405052185,grad_norm: 0.7164919608474367, iteration: 362465
loss: 0.9947464466094971,grad_norm: 0.999999254720391, iteration: 362466
loss: 1.2018201351165771,grad_norm: 0.9999991285125289, iteration: 362467
loss: 1.0285334587097168,grad_norm: 0.7617845360633986, iteration: 362468
loss: 0.9807832837104797,grad_norm: 0.9026639917622449, iteration: 362469
loss: 1.015677571296692,grad_norm: 0.8107022604290679, iteration: 362470
loss: 1.0409003496170044,grad_norm: 0.9999999082802767, iteration: 362471
loss: 0.995225191116333,grad_norm: 0.749522770666027, iteration: 362472
loss: 0.9672673344612122,grad_norm: 0.8186783079246934, iteration: 362473
loss: 0.9720481038093567,grad_norm: 0.824616097227745, iteration: 362474
loss: 0.9765287637710571,grad_norm: 0.8401114034672974, iteration: 362475
loss: 0.9857086539268494,grad_norm: 0.9162764820547272, iteration: 362476
loss: 0.9858853816986084,grad_norm: 0.8337257013210079, iteration: 362477
loss: 0.9650666117668152,grad_norm: 0.9493449995587663, iteration: 362478
loss: 0.9400246143341064,grad_norm: 0.9360066644233965, iteration: 362479
loss: 0.9756013751029968,grad_norm: 0.7839085406783244, iteration: 362480
loss: 0.9991713166236877,grad_norm: 0.8851262421151097, iteration: 362481
loss: 0.9781882762908936,grad_norm: 0.7595517853979304, iteration: 362482
loss: 0.978879988193512,grad_norm: 0.7268131079984032, iteration: 362483
loss: 0.9830043911933899,grad_norm: 0.9127872057789302, iteration: 362484
loss: 0.9838607907295227,grad_norm: 0.7327092449280995, iteration: 362485
loss: 1.0053268671035767,grad_norm: 0.7415112347301596, iteration: 362486
loss: 0.9999418258666992,grad_norm: 0.9303330073731135, iteration: 362487
loss: 0.9624589085578918,grad_norm: 0.663591171425006, iteration: 362488
loss: 0.9732532501220703,grad_norm: 0.7395731608902948, iteration: 362489
loss: 1.004594326019287,grad_norm: 0.9999992250507669, iteration: 362490
loss: 1.0091333389282227,grad_norm: 0.8206762896820932, iteration: 362491
loss: 1.003401279449463,grad_norm: 0.7571787046484862, iteration: 362492
loss: 0.9991485476493835,grad_norm: 0.7017049360744958, iteration: 362493
loss: 1.0051887035369873,grad_norm: 0.9352383518495434, iteration: 362494
loss: 1.049206018447876,grad_norm: 0.9999991591849833, iteration: 362495
loss: 0.997406542301178,grad_norm: 0.882403742132873, iteration: 362496
loss: 0.9854252934455872,grad_norm: 0.8334832691129593, iteration: 362497
loss: 1.0230607986450195,grad_norm: 0.7396251457775209, iteration: 362498
loss: 1.0093071460723877,grad_norm: 0.8157139477601718, iteration: 362499
loss: 1.0344645977020264,grad_norm: 0.7436748611569414, iteration: 362500
loss: 1.0206886529922485,grad_norm: 0.8247242886102158, iteration: 362501
loss: 0.9448857307434082,grad_norm: 0.8951991057880746, iteration: 362502
loss: 0.9672130346298218,grad_norm: 0.8212655048864347, iteration: 362503
loss: 0.9809941053390503,grad_norm: 0.8246997055817062, iteration: 362504
loss: 1.0031017065048218,grad_norm: 0.8097498191196739, iteration: 362505
loss: 0.9846767783164978,grad_norm: 0.8024697090877076, iteration: 362506
loss: 1.0353583097457886,grad_norm: 0.9999998644109964, iteration: 362507
loss: 1.019516110420227,grad_norm: 0.8234733341937189, iteration: 362508
loss: 0.9652414917945862,grad_norm: 0.8335757430340045, iteration: 362509
loss: 0.962304413318634,grad_norm: 0.9999991658708995, iteration: 362510
loss: 1.0073161125183105,grad_norm: 0.8555475980592555, iteration: 362511
loss: 0.9996628165245056,grad_norm: 0.8107505470159493, iteration: 362512
loss: 1.0283395051956177,grad_norm: 0.6963264099783305, iteration: 362513
loss: 0.9666609764099121,grad_norm: 0.9554561196271705, iteration: 362514
loss: 0.9965669512748718,grad_norm: 0.7724038035134656, iteration: 362515
loss: 1.0421992540359497,grad_norm: 0.9029740106976388, iteration: 362516
loss: 1.0305300951004028,grad_norm: 0.7453723632903635, iteration: 362517
loss: 0.9637510776519775,grad_norm: 0.7505613448303007, iteration: 362518
loss: 1.014693021774292,grad_norm: 0.7426486330200066, iteration: 362519
loss: 1.007449984550476,grad_norm: 0.7342734111690123, iteration: 362520
loss: 1.0519964694976807,grad_norm: 0.919204115413763, iteration: 362521
loss: 1.0033406019210815,grad_norm: 0.824971875169148, iteration: 362522
loss: 1.0033061504364014,grad_norm: 0.8379888544893163, iteration: 362523
loss: 1.0220693349838257,grad_norm: 0.7611945524868662, iteration: 362524
loss: 0.9939581751823425,grad_norm: 0.988759099793192, iteration: 362525
loss: 0.997898280620575,grad_norm: 0.7985866750659942, iteration: 362526
loss: 1.0006122589111328,grad_norm: 0.6754327238698982, iteration: 362527
loss: 0.9886322021484375,grad_norm: 0.7271875556450101, iteration: 362528
loss: 1.0131374597549438,grad_norm: 0.7210338612484191, iteration: 362529
loss: 1.0135055780410767,grad_norm: 0.9999990389805994, iteration: 362530
loss: 1.065895676612854,grad_norm: 0.8822252100782901, iteration: 362531
loss: 0.985385000705719,grad_norm: 0.7698356660563956, iteration: 362532
loss: 1.000770092010498,grad_norm: 0.7886592720020122, iteration: 362533
loss: 0.9929105043411255,grad_norm: 0.9603768786318284, iteration: 362534
loss: 1.0200546979904175,grad_norm: 0.854626910562016, iteration: 362535
loss: 0.9854397177696228,grad_norm: 0.9561943454067299, iteration: 362536
loss: 0.9807063341140747,grad_norm: 0.7606814912943748, iteration: 362537
loss: 0.9932098388671875,grad_norm: 0.7232438775133316, iteration: 362538
loss: 1.0203304290771484,grad_norm: 0.9999992663555127, iteration: 362539
loss: 0.9779220819473267,grad_norm: 0.8203030662618913, iteration: 362540
loss: 1.0000078678131104,grad_norm: 0.6557591206596355, iteration: 362541
loss: 1.0339529514312744,grad_norm: 0.8283918027364717, iteration: 362542
loss: 1.0033663511276245,grad_norm: 0.7860561359482724, iteration: 362543
loss: 1.0198044776916504,grad_norm: 0.9514382832877071, iteration: 362544
loss: 0.9831159710884094,grad_norm: 0.9308652243794809, iteration: 362545
loss: 1.0829066038131714,grad_norm: 0.9585874152284601, iteration: 362546
loss: 0.98835688829422,grad_norm: 0.9000356530504312, iteration: 362547
loss: 0.997356653213501,grad_norm: 0.9504713080155367, iteration: 362548
loss: 0.9928545355796814,grad_norm: 0.7926515408372433, iteration: 362549
loss: 1.014025330543518,grad_norm: 0.9999997804598868, iteration: 362550
loss: 0.961636483669281,grad_norm: 0.8443412188269986, iteration: 362551
loss: 0.9705352187156677,grad_norm: 0.8415684101266852, iteration: 362552
loss: 1.0408127307891846,grad_norm: 0.6819503135115418, iteration: 362553
loss: 1.0823392868041992,grad_norm: 0.9999996678637143, iteration: 362554
loss: 1.033151388168335,grad_norm: 0.9717950328077857, iteration: 362555
loss: 0.998986005783081,grad_norm: 0.8270592689518261, iteration: 362556
loss: 0.9868716597557068,grad_norm: 0.9042517141082863, iteration: 362557
loss: 1.0141416788101196,grad_norm: 0.6928217966640959, iteration: 362558
loss: 1.0802204608917236,grad_norm: 0.9999999183153807, iteration: 362559
loss: 1.0124565362930298,grad_norm: 0.8110616478164729, iteration: 362560
loss: 1.0229297876358032,grad_norm: 0.9999991312640982, iteration: 362561
loss: 1.036279320716858,grad_norm: 0.8875444061253911, iteration: 362562
loss: 0.9728122353553772,grad_norm: 0.8647177353128123, iteration: 362563
loss: 1.0590589046478271,grad_norm: 0.8383287877763166, iteration: 362564
loss: 1.0586986541748047,grad_norm: 0.9999995178177642, iteration: 362565
loss: 1.0426667928695679,grad_norm: 0.921475554811764, iteration: 362566
loss: 0.9332218170166016,grad_norm: 0.8539854258376967, iteration: 362567
loss: 1.033774971961975,grad_norm: 0.8844153768225559, iteration: 362568
loss: 0.9532012939453125,grad_norm: 0.903591036799965, iteration: 362569
loss: 0.963527500629425,grad_norm: 0.9999992961317238, iteration: 362570
loss: 1.011928677558899,grad_norm: 0.7115085897145846, iteration: 362571
loss: 0.9903357625007629,grad_norm: 0.8015014053238205, iteration: 362572
loss: 1.0424286127090454,grad_norm: 0.9999992409178179, iteration: 362573
loss: 0.9689647555351257,grad_norm: 0.8389143120380822, iteration: 362574
loss: 1.0096287727355957,grad_norm: 0.8908089726502201, iteration: 362575
loss: 0.9951089024543762,grad_norm: 0.7880317302525315, iteration: 362576
loss: 0.9735284447669983,grad_norm: 0.999999147392899, iteration: 362577
loss: 0.9780791997909546,grad_norm: 0.8595171602757619, iteration: 362578
loss: 1.0197148323059082,grad_norm: 0.6784957287163095, iteration: 362579
loss: 1.001791000366211,grad_norm: 0.6978790479214187, iteration: 362580
loss: 0.982890784740448,grad_norm: 0.8387541158729178, iteration: 362581
loss: 0.9714062809944153,grad_norm: 0.7033078346947464, iteration: 362582
loss: 0.9804835915565491,grad_norm: 0.8548229901014142, iteration: 362583
loss: 0.9686384201049805,grad_norm: 0.6404803861242722, iteration: 362584
loss: 1.0044678449630737,grad_norm: 0.7079031312682154, iteration: 362585
loss: 1.0498992204666138,grad_norm: 0.9999995132291019, iteration: 362586
loss: 0.9967169165611267,grad_norm: 0.8763421494969479, iteration: 362587
loss: 1.0042595863342285,grad_norm: 0.8280835685517133, iteration: 362588
loss: 0.9902177453041077,grad_norm: 0.7830996877430824, iteration: 362589
loss: 1.0050005912780762,grad_norm: 0.9999993874301973, iteration: 362590
loss: 1.1198607683181763,grad_norm: 0.9999992066238419, iteration: 362591
loss: 0.9880189895629883,grad_norm: 0.9043139989093372, iteration: 362592
loss: 1.0108166933059692,grad_norm: 0.8853251831973991, iteration: 362593
loss: 1.0005403757095337,grad_norm: 0.7913189588928644, iteration: 362594
loss: 0.9582417011260986,grad_norm: 0.8729479428594605, iteration: 362595
loss: 1.0040686130523682,grad_norm: 0.9755164617182098, iteration: 362596
loss: 0.9587135910987854,grad_norm: 0.6408565741377251, iteration: 362597
loss: 1.0069400072097778,grad_norm: 0.9999993569424834, iteration: 362598
loss: 1.026533603668213,grad_norm: 0.7183463152306104, iteration: 362599
loss: 1.0094430446624756,grad_norm: 0.7795380686495396, iteration: 362600
loss: 0.9606295228004456,grad_norm: 0.7338035996272352, iteration: 362601
loss: 1.0836583375930786,grad_norm: 0.8604660313976884, iteration: 362602
loss: 1.0380996465682983,grad_norm: 0.7802499206153372, iteration: 362603
loss: 0.9896806478500366,grad_norm: 0.7959757101884174, iteration: 362604
loss: 0.9925041198730469,grad_norm: 0.8583722871734164, iteration: 362605
loss: 1.0672935247421265,grad_norm: 0.9398577863754255, iteration: 362606
loss: 0.971589982509613,grad_norm: 0.8280228426116596, iteration: 362607
loss: 1.0406253337860107,grad_norm: 0.7949581811297607, iteration: 362608
loss: 1.0116016864776611,grad_norm: 0.7613313156295757, iteration: 362609
loss: 0.9864163994789124,grad_norm: 0.8080803658210939, iteration: 362610
loss: 1.029098629951477,grad_norm: 0.7334376275601997, iteration: 362611
loss: 1.0366261005401611,grad_norm: 0.8942642152870617, iteration: 362612
loss: 1.0023436546325684,grad_norm: 0.8435158658165947, iteration: 362613
loss: 0.989423930644989,grad_norm: 0.971706569394462, iteration: 362614
loss: 0.9855526089668274,grad_norm: 0.9682331030752516, iteration: 362615
loss: 0.9750593900680542,grad_norm: 0.7522382935280744, iteration: 362616
loss: 0.98167884349823,grad_norm: 0.8249494248650617, iteration: 362617
loss: 0.993725061416626,grad_norm: 0.6873773763999134, iteration: 362618
loss: 1.1308213472366333,grad_norm: 0.9999996908957911, iteration: 362619
loss: 0.9998600482940674,grad_norm: 0.8411713015433303, iteration: 362620
loss: 1.0419114828109741,grad_norm: 0.9999995900516069, iteration: 362621
loss: 0.9731897711753845,grad_norm: 0.7894962741201046, iteration: 362622
loss: 1.0500636100769043,grad_norm: 0.8388983144138725, iteration: 362623
loss: 0.9610886573791504,grad_norm: 0.9159996021926378, iteration: 362624
loss: 1.0239769220352173,grad_norm: 0.7057013073117414, iteration: 362625
loss: 0.9502390027046204,grad_norm: 0.858260320035538, iteration: 362626
loss: 0.9964086413383484,grad_norm: 0.7553167677021247, iteration: 362627
loss: 1.1056193113327026,grad_norm: 0.8745602315486785, iteration: 362628
loss: 1.0018020868301392,grad_norm: 0.7610696112055345, iteration: 362629
loss: 0.9879473447799683,grad_norm: 0.7274833653867221, iteration: 362630
loss: 0.9979350566864014,grad_norm: 0.9491700545455422, iteration: 362631
loss: 1.0194849967956543,grad_norm: 0.7260882895352985, iteration: 362632
loss: 1.0006078481674194,grad_norm: 0.773109408460457, iteration: 362633
loss: 1.0150554180145264,grad_norm: 0.9999993598319573, iteration: 362634
loss: 1.0958783626556396,grad_norm: 0.999999118601379, iteration: 362635
loss: 0.9994228482246399,grad_norm: 0.6865946455729516, iteration: 362636
loss: 1.0327671766281128,grad_norm: 0.7858408833498066, iteration: 362637
loss: 0.9956504106521606,grad_norm: 0.6700461860816583, iteration: 362638
loss: 1.0076518058776855,grad_norm: 0.9999990296218729, iteration: 362639
loss: 0.9584853053092957,grad_norm: 0.6685819408948805, iteration: 362640
loss: 1.0193113088607788,grad_norm: 0.7705188831067933, iteration: 362641
loss: 0.9993485808372498,grad_norm: 0.6751768089613815, iteration: 362642
loss: 0.9737500548362732,grad_norm: 0.7309945764910836, iteration: 362643
loss: 0.9961321353912354,grad_norm: 0.7368730180841094, iteration: 362644
loss: 1.019531488418579,grad_norm: 0.8736130141579381, iteration: 362645
loss: 1.1048883199691772,grad_norm: 0.7497333297615566, iteration: 362646
loss: 1.0235278606414795,grad_norm: 0.7529150284486874, iteration: 362647
loss: 1.0220617055892944,grad_norm: 0.7525916093663367, iteration: 362648
loss: 0.963934600353241,grad_norm: 0.939514954695549, iteration: 362649
loss: 0.996619701385498,grad_norm: 0.8513241694415586, iteration: 362650
loss: 1.0406547784805298,grad_norm: 0.9999991666780071, iteration: 362651
loss: 1.0586862564086914,grad_norm: 0.9999990850457876, iteration: 362652
loss: 1.0374860763549805,grad_norm: 0.8996952865964245, iteration: 362653
loss: 0.9787299036979675,grad_norm: 0.7434872873865798, iteration: 362654
loss: 1.017410159111023,grad_norm: 0.7214503788591143, iteration: 362655
loss: 0.9771542549133301,grad_norm: 0.7575642345649184, iteration: 362656
loss: 0.9532867670059204,grad_norm: 0.7989203794797625, iteration: 362657
loss: 0.9836692214012146,grad_norm: 0.7326923062514898, iteration: 362658
loss: 1.0312813520431519,grad_norm: 0.8169139382199999, iteration: 362659
loss: 1.0150920152664185,grad_norm: 0.9283818897456492, iteration: 362660
loss: 1.014323353767395,grad_norm: 0.8653531787825149, iteration: 362661
loss: 1.001179814338684,grad_norm: 0.9999991673535749, iteration: 362662
loss: 0.9858415722846985,grad_norm: 0.9787022592822602, iteration: 362663
loss: 0.9796863198280334,grad_norm: 0.7217830693353324, iteration: 362664
loss: 0.979829728603363,grad_norm: 0.814915047930006, iteration: 362665
loss: 0.9829519391059875,grad_norm: 0.7882758018621343, iteration: 362666
loss: 1.0393486022949219,grad_norm: 0.9999991861620421, iteration: 362667
loss: 1.0202929973602295,grad_norm: 0.9999990285267899, iteration: 362668
loss: 0.994355320930481,grad_norm: 0.8207940686454962, iteration: 362669
loss: 1.0418248176574707,grad_norm: 0.7830107146368014, iteration: 362670
loss: 0.9857605695724487,grad_norm: 0.6902068284975716, iteration: 362671
loss: 0.9926774501800537,grad_norm: 0.7893781220328078, iteration: 362672
loss: 1.0260863304138184,grad_norm: 0.8220817247235968, iteration: 362673
loss: 1.0076961517333984,grad_norm: 0.9999991773100485, iteration: 362674
loss: 0.9760107398033142,grad_norm: 0.7960167459304266, iteration: 362675
loss: 0.9758700728416443,grad_norm: 0.8163726572074245, iteration: 362676
loss: 0.9955742955207825,grad_norm: 0.6913115828837034, iteration: 362677
loss: 1.0276468992233276,grad_norm: 0.811790136217268, iteration: 362678
loss: 1.0144445896148682,grad_norm: 0.9604733309245206, iteration: 362679
loss: 1.0323697328567505,grad_norm: 0.6921528865026866, iteration: 362680
loss: 1.049978256225586,grad_norm: 0.7932083424176004, iteration: 362681
loss: 0.9947986006736755,grad_norm: 0.9034549266642155, iteration: 362682
loss: 0.9656639099121094,grad_norm: 0.7503157473817124, iteration: 362683
loss: 0.9886482357978821,grad_norm: 0.7636022954586076, iteration: 362684
loss: 1.0317506790161133,grad_norm: 0.7107854961003789, iteration: 362685
loss: 1.0568856000900269,grad_norm: 0.8083585148761526, iteration: 362686
loss: 1.030889868736267,grad_norm: 0.7435673438177598, iteration: 362687
loss: 1.0144511461257935,grad_norm: 0.7546759248817348, iteration: 362688
loss: 0.9748513698577881,grad_norm: 0.7108646149874708, iteration: 362689
loss: 0.9588666558265686,grad_norm: 0.8434354187258934, iteration: 362690
loss: 1.0102603435516357,grad_norm: 0.8393784908012678, iteration: 362691
loss: 1.0284528732299805,grad_norm: 0.8280318873490807, iteration: 362692
loss: 1.0282000303268433,grad_norm: 0.7559460982802412, iteration: 362693
loss: 0.998342752456665,grad_norm: 0.9233639315014961, iteration: 362694
loss: 0.9825935363769531,grad_norm: 0.7631621031650028, iteration: 362695
loss: 1.0072070360183716,grad_norm: 0.8364969983421842, iteration: 362696
loss: 1.0049999952316284,grad_norm: 0.8385335132044631, iteration: 362697
loss: 1.0456712245941162,grad_norm: 0.7782069539879913, iteration: 362698
loss: 0.9880503416061401,grad_norm: 0.7279237922947713, iteration: 362699
loss: 0.9772701263427734,grad_norm: 0.8226989461365772, iteration: 362700
loss: 0.9742762446403503,grad_norm: 0.681558650688705, iteration: 362701
loss: 0.9858956933021545,grad_norm: 0.99999963156821, iteration: 362702
loss: 1.0245122909545898,grad_norm: 0.9300835256800452, iteration: 362703
loss: 1.0119609832763672,grad_norm: 0.7745242748625959, iteration: 362704
loss: 1.006902813911438,grad_norm: 0.7432965613746967, iteration: 362705
loss: 1.0018540620803833,grad_norm: 0.7659933566832794, iteration: 362706
loss: 1.0615904331207275,grad_norm: 0.8793318728092708, iteration: 362707
loss: 0.9947866201400757,grad_norm: 0.6541910851575059, iteration: 362708
loss: 0.960414707660675,grad_norm: 0.8434546921859094, iteration: 362709
loss: 0.9701644778251648,grad_norm: 0.6907864932111712, iteration: 362710
loss: 1.0339125394821167,grad_norm: 0.6612489252697794, iteration: 362711
loss: 0.9989068508148193,grad_norm: 0.8176518561979887, iteration: 362712
loss: 1.0058534145355225,grad_norm: 0.821698601348811, iteration: 362713
loss: 0.9942135214805603,grad_norm: 0.8477812982983912, iteration: 362714
loss: 0.9982130527496338,grad_norm: 0.8399288523722288, iteration: 362715
loss: 0.9990443587303162,grad_norm: 0.7984409037434654, iteration: 362716
loss: 0.986372172832489,grad_norm: 0.8287947977915445, iteration: 362717
loss: 0.9730876684188843,grad_norm: 0.7209947596014138, iteration: 362718
loss: 0.9876373410224915,grad_norm: 0.7364284734992306, iteration: 362719
loss: 0.9871661067008972,grad_norm: 0.9138331189237904, iteration: 362720
loss: 0.9955869913101196,grad_norm: 0.8201335176845788, iteration: 362721
loss: 0.9931833148002625,grad_norm: 0.8939104398868477, iteration: 362722
loss: 0.9879598021507263,grad_norm: 0.9251266636923073, iteration: 362723
loss: 1.0162475109100342,grad_norm: 0.8668793960447336, iteration: 362724
loss: 0.9764813780784607,grad_norm: 0.7884729452041598, iteration: 362725
loss: 1.0075105428695679,grad_norm: 0.7856849044071387, iteration: 362726
loss: 1.015761137008667,grad_norm: 0.7152789345303016, iteration: 362727
loss: 1.0855460166931152,grad_norm: 0.999999386699397, iteration: 362728
loss: 1.0007853507995605,grad_norm: 0.7976159666990613, iteration: 362729
loss: 0.9984512329101562,grad_norm: 0.8123021155521069, iteration: 362730
loss: 0.9981856346130371,grad_norm: 0.8471450696162806, iteration: 362731
loss: 1.027982234954834,grad_norm: 0.8668909971270004, iteration: 362732
loss: 0.9845224022865295,grad_norm: 0.7598632112000832, iteration: 362733
loss: 1.0140835046768188,grad_norm: 0.7314398861723521, iteration: 362734
loss: 1.0569400787353516,grad_norm: 0.9101824235112985, iteration: 362735
loss: 0.9866703748703003,grad_norm: 0.7776851397447239, iteration: 362736
loss: 1.0310410261154175,grad_norm: 0.8929238435862225, iteration: 362737
loss: 1.0101046562194824,grad_norm: 0.8021366097631879, iteration: 362738
loss: 0.9808074235916138,grad_norm: 0.7485882008759097, iteration: 362739
loss: 1.0143452882766724,grad_norm: 0.943323445304657, iteration: 362740
loss: 1.0179699659347534,grad_norm: 0.9999995122035418, iteration: 362741
loss: 0.9686267375946045,grad_norm: 0.7477847726086332, iteration: 362742
loss: 1.067305326461792,grad_norm: 0.9683389610905565, iteration: 362743
loss: 0.9917922616004944,grad_norm: 0.9838114279206925, iteration: 362744
loss: 0.9733191728591919,grad_norm: 0.95739699483719, iteration: 362745
loss: 1.0014885663986206,grad_norm: 0.6830132778651049, iteration: 362746
loss: 0.992835521697998,grad_norm: 0.9999998382766652, iteration: 362747
loss: 1.0035619735717773,grad_norm: 0.6955944056424483, iteration: 362748
loss: 0.974654495716095,grad_norm: 0.7614007793185552, iteration: 362749
loss: 0.9954366683959961,grad_norm: 0.695792066388258, iteration: 362750
loss: 1.0137895345687866,grad_norm: 0.8957273167329167, iteration: 362751
loss: 1.0256316661834717,grad_norm: 0.9020673106016465, iteration: 362752
loss: 0.9775390625,grad_norm: 0.9106594284246685, iteration: 362753
loss: 1.025072693824768,grad_norm: 0.8272160447348548, iteration: 362754
loss: 0.9991910457611084,grad_norm: 0.7441287042516084, iteration: 362755
loss: 0.9944097995758057,grad_norm: 0.9554131064001486, iteration: 362756
loss: 1.0327026844024658,grad_norm: 0.9999996566208764, iteration: 362757
loss: 1.027834415435791,grad_norm: 0.8518911455672903, iteration: 362758
loss: 0.9606291055679321,grad_norm: 0.7470550531603155, iteration: 362759
loss: 0.9770536422729492,grad_norm: 0.797175139009499, iteration: 362760
loss: 1.1078848838806152,grad_norm: 0.9999999941455328, iteration: 362761
loss: 1.0146675109863281,grad_norm: 0.7678902520062492, iteration: 362762
loss: 1.008434772491455,grad_norm: 0.776884362377806, iteration: 362763
loss: 1.0283983945846558,grad_norm: 0.854105263739064, iteration: 362764
loss: 0.9774008989334106,grad_norm: 0.8999663306044009, iteration: 362765
loss: 1.01932954788208,grad_norm: 0.8240796596916363, iteration: 362766
loss: 1.00369131565094,grad_norm: 0.7678523958997294, iteration: 362767
loss: 0.9373829364776611,grad_norm: 0.6862094124900566, iteration: 362768
loss: 1.0182651281356812,grad_norm: 0.8147345669013769, iteration: 362769
loss: 0.9940945506095886,grad_norm: 0.8935832187041548, iteration: 362770
loss: 0.992607593536377,grad_norm: 0.7894235462390832, iteration: 362771
loss: 0.9709873795509338,grad_norm: 0.7817951634536354, iteration: 362772
loss: 0.9698773622512817,grad_norm: 0.7950743192189742, iteration: 362773
loss: 1.0379388332366943,grad_norm: 0.9999995878566945, iteration: 362774
loss: 1.0143928527832031,grad_norm: 0.8123866258201893, iteration: 362775
loss: 1.0040531158447266,grad_norm: 0.99999894503955, iteration: 362776
loss: 1.0607213973999023,grad_norm: 0.8450180140585298, iteration: 362777
loss: 0.9971083998680115,grad_norm: 0.7048754430084195, iteration: 362778
loss: 0.9523757100105286,grad_norm: 0.775822305100689, iteration: 362779
loss: 0.998583197593689,grad_norm: 0.8099962268748266, iteration: 362780
loss: 1.0147143602371216,grad_norm: 0.8329357765752059, iteration: 362781
loss: 1.0187350511550903,grad_norm: 0.9380567038113538, iteration: 362782
loss: 0.9824084043502808,grad_norm: 0.9779135600105582, iteration: 362783
loss: 0.9809695482254028,grad_norm: 0.7782118944765618, iteration: 362784
loss: 1.039137363433838,grad_norm: 0.8628876388671732, iteration: 362785
loss: 1.0096559524536133,grad_norm: 0.9999991556398968, iteration: 362786
loss: 0.9818890690803528,grad_norm: 0.8827657014924134, iteration: 362787
loss: 0.9945563077926636,grad_norm: 0.8487400475123325, iteration: 362788
loss: 0.9917111396789551,grad_norm: 0.8062454977816578, iteration: 362789
loss: 1.030138373374939,grad_norm: 0.6670037326741668, iteration: 362790
loss: 1.002975344657898,grad_norm: 0.9555469981368651, iteration: 362791
loss: 0.9957990646362305,grad_norm: 0.8549232277811847, iteration: 362792
loss: 0.9671388864517212,grad_norm: 0.8315084761863311, iteration: 362793
loss: 0.9766225218772888,grad_norm: 0.703947809419862, iteration: 362794
loss: 0.9779338240623474,grad_norm: 0.7307120456780903, iteration: 362795
loss: 1.0337456464767456,grad_norm: 0.7822816845888055, iteration: 362796
loss: 0.9896665811538696,grad_norm: 0.7190627184291878, iteration: 362797
loss: 0.9664270877838135,grad_norm: 0.7534380943136886, iteration: 362798
loss: 1.042678952217102,grad_norm: 0.9999992543275767, iteration: 362799
loss: 0.9690585732460022,grad_norm: 0.7309268159500554, iteration: 362800
loss: 1.0405296087265015,grad_norm: 0.8008113076022155, iteration: 362801
loss: 0.974443793296814,grad_norm: 0.652391677420764, iteration: 362802
loss: 0.9949842691421509,grad_norm: 0.7892993502497141, iteration: 362803
loss: 0.9878898859024048,grad_norm: 0.9999998217682596, iteration: 362804
loss: 1.1070818901062012,grad_norm: 0.9999999163016287, iteration: 362805
loss: 0.993218719959259,grad_norm: 0.8916958528908335, iteration: 362806
loss: 0.976148247718811,grad_norm: 0.7518859930837207, iteration: 362807
loss: 1.022096037864685,grad_norm: 0.7717020107680534, iteration: 362808
loss: 1.0164848566055298,grad_norm: 0.8149211492346256, iteration: 362809
loss: 1.0305018424987793,grad_norm: 0.9540408164269069, iteration: 362810
loss: 1.0386667251586914,grad_norm: 0.7837736091814032, iteration: 362811
loss: 1.0102629661560059,grad_norm: 0.9999990744151909, iteration: 362812
loss: 1.024598240852356,grad_norm: 0.9999993291216068, iteration: 362813
loss: 1.0173847675323486,grad_norm: 0.9726933782369425, iteration: 362814
loss: 1.0144068002700806,grad_norm: 0.6752838758381049, iteration: 362815
loss: 1.0125083923339844,grad_norm: 0.8068634950831016, iteration: 362816
loss: 1.0386021137237549,grad_norm: 0.9999993336158071, iteration: 362817
loss: 1.0256744623184204,grad_norm: 0.7728510157452667, iteration: 362818
loss: 0.9750720262527466,grad_norm: 0.8344431926740224, iteration: 362819
loss: 1.006546139717102,grad_norm: 0.744595700489828, iteration: 362820
loss: 1.0215716361999512,grad_norm: 0.940695005461323, iteration: 362821
loss: 0.9651651382446289,grad_norm: 0.689966855966844, iteration: 362822
loss: 0.9840250015258789,grad_norm: 0.930292145612564, iteration: 362823
loss: 1.0213901996612549,grad_norm: 0.9375295375881467, iteration: 362824
loss: 1.0079352855682373,grad_norm: 0.9061463400245622, iteration: 362825
loss: 1.0116065740585327,grad_norm: 0.9999995304901215, iteration: 362826
loss: 1.0711594820022583,grad_norm: 0.999999521261878, iteration: 362827
loss: 0.9949093461036682,grad_norm: 0.9999998036573533, iteration: 362828
loss: 1.0053573846817017,grad_norm: 0.8283698022424905, iteration: 362829
loss: 0.9837338924407959,grad_norm: 0.8011866710521934, iteration: 362830
loss: 1.118314504623413,grad_norm: 0.9472893335508175, iteration: 362831
loss: 0.9902392029762268,grad_norm: 0.9690404123856469, iteration: 362832
loss: 0.9908968806266785,grad_norm: 0.8206389420297786, iteration: 362833
loss: 0.9704763889312744,grad_norm: 0.8211866208092485, iteration: 362834
loss: 0.974137008190155,grad_norm: 0.9874226321333046, iteration: 362835
loss: 1.0122264623641968,grad_norm: 0.837937826611408, iteration: 362836
loss: 0.9834308624267578,grad_norm: 0.7879945789157861, iteration: 362837
loss: 0.9733319878578186,grad_norm: 0.9999991442466702, iteration: 362838
loss: 0.9862674474716187,grad_norm: 0.855002088963702, iteration: 362839
loss: 1.0153114795684814,grad_norm: 0.7177848138478593, iteration: 362840
loss: 1.0343705415725708,grad_norm: 0.9802369158922395, iteration: 362841
loss: 0.9848713874816895,grad_norm: 0.7301792289535577, iteration: 362842
loss: 1.0682071447372437,grad_norm: 0.9199366141218318, iteration: 362843
loss: 1.0965185165405273,grad_norm: 0.9999991788293794, iteration: 362844
loss: 1.1309040784835815,grad_norm: 0.9999995072870057, iteration: 362845
loss: 1.023745059967041,grad_norm: 0.9764699781532246, iteration: 362846
loss: 0.9914121627807617,grad_norm: 0.7333399139141632, iteration: 362847
loss: 0.9921439290046692,grad_norm: 0.8902002786523825, iteration: 362848
loss: 0.991686999797821,grad_norm: 0.7384122831685536, iteration: 362849
loss: 1.0053884983062744,grad_norm: 0.8159277221597264, iteration: 362850
loss: 1.0064857006072998,grad_norm: 0.864077294254939, iteration: 362851
loss: 1.0205167531967163,grad_norm: 0.8538462909977995, iteration: 362852
loss: 1.0037027597427368,grad_norm: 0.9999998885684575, iteration: 362853
loss: 0.9840684533119202,grad_norm: 0.8879152293322718, iteration: 362854
loss: 1.066933274269104,grad_norm: 0.9999997863404013, iteration: 362855
loss: 1.0432182550430298,grad_norm: 0.7599959928976929, iteration: 362856
loss: 1.0222699642181396,grad_norm: 0.7433653134000434, iteration: 362857
loss: 1.0171144008636475,grad_norm: 0.7313169087526945, iteration: 362858
loss: 0.9961479902267456,grad_norm: 0.967285263521686, iteration: 362859
loss: 1.0717172622680664,grad_norm: 0.9999994638418316, iteration: 362860
loss: 0.9978523850440979,grad_norm: 0.6772141217452798, iteration: 362861
loss: 1.018456220626831,grad_norm: 0.8217430009291333, iteration: 362862
loss: 1.0167889595031738,grad_norm: 0.6773506741836932, iteration: 362863
loss: 0.9748977422714233,grad_norm: 0.7057572886210076, iteration: 362864
loss: 0.9803204536437988,grad_norm: 0.8030020452871164, iteration: 362865
loss: 0.9797086715698242,grad_norm: 0.9827503606446838, iteration: 362866
loss: 1.0397231578826904,grad_norm: 0.9999998580183742, iteration: 362867
loss: 0.9657694101333618,grad_norm: 0.8220185926536115, iteration: 362868
loss: 1.0546019077301025,grad_norm: 0.9999992499011588, iteration: 362869
loss: 1.0260494947433472,grad_norm: 0.9999990645813561, iteration: 362870
loss: 1.0877248048782349,grad_norm: 0.9999993886914597, iteration: 362871
loss: 1.0127887725830078,grad_norm: 0.9999996076037074, iteration: 362872
loss: 0.9806934595108032,grad_norm: 0.7784251131030159, iteration: 362873
loss: 1.0008639097213745,grad_norm: 0.8654848313198557, iteration: 362874
loss: 0.9971466064453125,grad_norm: 0.9669141508411447, iteration: 362875
loss: 1.0654199123382568,grad_norm: 0.9999993171749623, iteration: 362876
loss: 1.1131850481033325,grad_norm: 0.9999992544662312, iteration: 362877
loss: 1.0836917161941528,grad_norm: 0.9999995969072429, iteration: 362878
loss: 1.081668496131897,grad_norm: 0.9999991117004936, iteration: 362879
loss: 1.011471152305603,grad_norm: 0.9105334419405464, iteration: 362880
loss: 1.0062357187271118,grad_norm: 0.9999991368303025, iteration: 362881
loss: 1.0871788263320923,grad_norm: 0.9999995021742061, iteration: 362882
loss: 1.135335922241211,grad_norm: 1.0000000299003746, iteration: 362883
loss: 1.0025250911712646,grad_norm: 0.7039069547423283, iteration: 362884
loss: 1.019139289855957,grad_norm: 0.7229369389732636, iteration: 362885
loss: 1.0641562938690186,grad_norm: 0.8544169667615508, iteration: 362886
loss: 1.1402679681777954,grad_norm: 1.0000001096001543, iteration: 362887
loss: 0.9915339946746826,grad_norm: 0.9999991325114077, iteration: 362888
loss: 1.001094102859497,grad_norm: 0.9346696257225894, iteration: 362889
loss: 0.9947928786277771,grad_norm: 0.8184892561190142, iteration: 362890
loss: 0.9894415736198425,grad_norm: 0.7502783928668745, iteration: 362891
loss: 1.020450472831726,grad_norm: 0.8074063359521957, iteration: 362892
loss: 1.1356619596481323,grad_norm: 0.999999147568895, iteration: 362893
loss: 1.0742589235305786,grad_norm: 0.9999991702972177, iteration: 362894
loss: 1.0611318349838257,grad_norm: 0.7502583534009101, iteration: 362895
loss: 1.0195462703704834,grad_norm: 0.9999999324933733, iteration: 362896
loss: 1.0423343181610107,grad_norm: 0.9999990925749062, iteration: 362897
loss: 0.9981653690338135,grad_norm: 0.8709037117514352, iteration: 362898
loss: 1.0478612184524536,grad_norm: 0.9999993289771518, iteration: 362899
loss: 1.0026313066482544,grad_norm: 0.9999989897487608, iteration: 362900
loss: 0.9769212603569031,grad_norm: 0.8331305072048177, iteration: 362901
loss: 1.0231199264526367,grad_norm: 0.9920315672497395, iteration: 362902
loss: 1.032770037651062,grad_norm: 0.8972419242342723, iteration: 362903
loss: 0.9937725067138672,grad_norm: 0.8755529858471922, iteration: 362904
loss: 1.0338963270187378,grad_norm: 0.9999993051925985, iteration: 362905
loss: 0.9516245126724243,grad_norm: 0.9999990387842398, iteration: 362906
loss: 1.1080360412597656,grad_norm: 0.9999990104169507, iteration: 362907
loss: 1.03924560546875,grad_norm: 0.9029918643034814, iteration: 362908
loss: 1.0094151496887207,grad_norm: 0.8106646556434244, iteration: 362909
loss: 1.0735869407653809,grad_norm: 0.999999619972874, iteration: 362910
loss: 0.979616641998291,grad_norm: 0.9256871569995141, iteration: 362911
loss: 1.0356013774871826,grad_norm: 0.7590924061512269, iteration: 362912
loss: 1.1466352939605713,grad_norm: 0.9999995882059727, iteration: 362913
loss: 1.0960450172424316,grad_norm: 0.9999999483102133, iteration: 362914
loss: 1.0313125848770142,grad_norm: 0.9999997402602506, iteration: 362915
loss: 1.0126289129257202,grad_norm: 0.9999990858039022, iteration: 362916
loss: 0.9959860444068909,grad_norm: 0.7278518477803411, iteration: 362917
loss: 0.9952337741851807,grad_norm: 0.9999995860916108, iteration: 362918
loss: 1.0300195217132568,grad_norm: 0.9999992453976224, iteration: 362919
loss: 1.0247749090194702,grad_norm: 0.8079974404383029, iteration: 362920
loss: 1.0070006847381592,grad_norm: 0.8532658613474999, iteration: 362921
loss: 1.00631582736969,grad_norm: 0.9506979775628485, iteration: 362922
loss: 1.1103612184524536,grad_norm: 0.9999996350298974, iteration: 362923
loss: 1.0229244232177734,grad_norm: 0.7040684235052374, iteration: 362924
loss: 0.9901506304740906,grad_norm: 0.9999991043936349, iteration: 362925
loss: 1.0222885608673096,grad_norm: 0.9999995011721904, iteration: 362926
loss: 0.990349292755127,grad_norm: 0.7971979701951334, iteration: 362927
loss: 1.009626865386963,grad_norm: 0.9999990547561564, iteration: 362928
loss: 1.0151690244674683,grad_norm: 0.7367414192824828, iteration: 362929
loss: 1.0718821287155151,grad_norm: 0.9999992033545139, iteration: 362930
loss: 1.0471247434616089,grad_norm: 0.9999998052678705, iteration: 362931
loss: 1.1039248704910278,grad_norm: 0.9999997589918616, iteration: 362932
loss: 1.0026823282241821,grad_norm: 0.8528404732511365, iteration: 362933
loss: 1.0389708280563354,grad_norm: 0.8602122559197797, iteration: 362934
loss: 1.0860087871551514,grad_norm: 0.796675512894017, iteration: 362935
loss: 1.1642042398452759,grad_norm: 0.9255496961302184, iteration: 362936
loss: 1.0253630876541138,grad_norm: 0.8419446593636551, iteration: 362937
loss: 1.0311756134033203,grad_norm: 0.9410222158825637, iteration: 362938
loss: 1.0703208446502686,grad_norm: 0.9999991635485345, iteration: 362939
loss: 0.980195939540863,grad_norm: 0.8495637439093634, iteration: 362940
loss: 1.2906286716461182,grad_norm: 0.999999995448376, iteration: 362941
loss: 1.0052831172943115,grad_norm: 0.9999990712097795, iteration: 362942
loss: 1.1513690948486328,grad_norm: 0.9999997925360675, iteration: 362943
loss: 0.9924901127815247,grad_norm: 0.9334133506021749, iteration: 362944
loss: 0.9346950054168701,grad_norm: 0.9136141107899303, iteration: 362945
loss: 0.9977383017539978,grad_norm: 0.9335699242069861, iteration: 362946
loss: 1.019647479057312,grad_norm: 0.7237185088374659, iteration: 362947
loss: 1.0179988145828247,grad_norm: 0.7917449937750873, iteration: 362948
loss: 1.0301203727722168,grad_norm: 0.9999996696306312, iteration: 362949
loss: 1.0773471593856812,grad_norm: 0.9999993518785205, iteration: 362950
loss: 1.0210801362991333,grad_norm: 0.71093079800186, iteration: 362951
loss: 1.0580230951309204,grad_norm: 0.9999990956523214, iteration: 362952
loss: 1.0076630115509033,grad_norm: 0.8365793099000031, iteration: 362953
loss: 0.9965488910675049,grad_norm: 0.999999079061653, iteration: 362954
loss: 1.0589584112167358,grad_norm: 0.8211719257411424, iteration: 362955
loss: 1.0226044654846191,grad_norm: 0.8366163920338385, iteration: 362956
loss: 1.0155324935913086,grad_norm: 0.8473443416156552, iteration: 362957
loss: 0.9547525644302368,grad_norm: 0.8692472265460893, iteration: 362958
loss: 1.0759785175323486,grad_norm: 0.9999993265510947, iteration: 362959
loss: 1.0022494792938232,grad_norm: 0.7892004564558777, iteration: 362960
loss: 1.0477560758590698,grad_norm: 0.9999998365204122, iteration: 362961
loss: 1.012654185295105,grad_norm: 0.9999997375527971, iteration: 362962
loss: 1.0656236410140991,grad_norm: 0.925882521525166, iteration: 362963
loss: 1.1001498699188232,grad_norm: 0.8248103210468528, iteration: 362964
loss: 0.9818481206893921,grad_norm: 0.8475632532741219, iteration: 362965
loss: 0.9807401299476624,grad_norm: 0.8045027540765243, iteration: 362966
loss: 1.0177079439163208,grad_norm: 0.9287473967824744, iteration: 362967
loss: 1.032344102859497,grad_norm: 0.8712646593260231, iteration: 362968
loss: 0.9787854552268982,grad_norm: 0.9682401022370186, iteration: 362969
loss: 0.9936316609382629,grad_norm: 0.8822467940184382, iteration: 362970
loss: 1.0202521085739136,grad_norm: 0.8261464587228873, iteration: 362971
loss: 0.999390721321106,grad_norm: 0.7342870919769897, iteration: 362972
loss: 0.9845635890960693,grad_norm: 0.8610390887197354, iteration: 362973
loss: 1.0585514307022095,grad_norm: 0.9838159308009216, iteration: 362974
loss: 1.0290449857711792,grad_norm: 0.8767052768506809, iteration: 362975
loss: 1.0465282201766968,grad_norm: 0.8790845396646174, iteration: 362976
loss: 1.0238643884658813,grad_norm: 0.9999990854947052, iteration: 362977
loss: 0.9863130450248718,grad_norm: 0.7781190027947092, iteration: 362978
loss: 1.003036618232727,grad_norm: 0.6421735404395975, iteration: 362979
loss: 1.0106558799743652,grad_norm: 0.9532268686881807, iteration: 362980
loss: 0.9910641312599182,grad_norm: 0.7595691663026973, iteration: 362981
loss: 1.0097601413726807,grad_norm: 0.8968525705115827, iteration: 362982
loss: 1.0049374103546143,grad_norm: 0.920561441436065, iteration: 362983
loss: 1.0301482677459717,grad_norm: 0.858439865522969, iteration: 362984
loss: 1.0196267366409302,grad_norm: 0.873220397860809, iteration: 362985
loss: 1.0447328090667725,grad_norm: 0.9999997641525262, iteration: 362986
loss: 0.9729750752449036,grad_norm: 0.8012417127008651, iteration: 362987
loss: 1.0132951736450195,grad_norm: 0.9999990798028653, iteration: 362988
loss: 0.9476442933082581,grad_norm: 0.7473222562748065, iteration: 362989
loss: 0.9959864616394043,grad_norm: 0.8257721539269243, iteration: 362990
loss: 1.0040779113769531,grad_norm: 0.8261071077326326, iteration: 362991
loss: 1.0416654348373413,grad_norm: 0.8983042402573517, iteration: 362992
loss: 0.9984224438667297,grad_norm: 0.7969461764326984, iteration: 362993
loss: 0.9968193769454956,grad_norm: 0.8301746862059041, iteration: 362994
loss: 0.9962096214294434,grad_norm: 0.8854340762455944, iteration: 362995
loss: 0.9771953821182251,grad_norm: 0.7815364254992776, iteration: 362996
loss: 1.0597448348999023,grad_norm: 0.9999990954236359, iteration: 362997
loss: 1.0038586854934692,grad_norm: 0.99999896310238, iteration: 362998
loss: 1.0082151889801025,grad_norm: 0.7521394655091759, iteration: 362999
loss: 0.9919790625572205,grad_norm: 0.9617480135224168, iteration: 363000
loss: 0.9817842841148376,grad_norm: 0.9789447033380038, iteration: 363001
loss: 1.017261266708374,grad_norm: 0.7606846834166322, iteration: 363002
loss: 0.99137943983078,grad_norm: 0.7105391281670754, iteration: 363003
loss: 1.0120701789855957,grad_norm: 0.9999991766849112, iteration: 363004
loss: 1.0184451341629028,grad_norm: 0.9019740846192749, iteration: 363005
loss: 1.014480710029602,grad_norm: 0.8980981762848601, iteration: 363006
loss: 1.0136815309524536,grad_norm: 0.8273203025839072, iteration: 363007
loss: 0.9931561946868896,grad_norm: 0.8192170644057536, iteration: 363008
loss: 0.9993561506271362,grad_norm: 0.8493367956705327, iteration: 363009
loss: 1.007489800453186,grad_norm: 0.6282392708031416, iteration: 363010
loss: 1.0046318769454956,grad_norm: 0.8168749156718816, iteration: 363011
loss: 1.0322719812393188,grad_norm: 0.869590281063209, iteration: 363012
loss: 1.0368552207946777,grad_norm: 0.9999990219676803, iteration: 363013
loss: 1.0066368579864502,grad_norm: 0.8463055970140264, iteration: 363014
loss: 1.0276297330856323,grad_norm: 0.9083238562237739, iteration: 363015
loss: 1.0106303691864014,grad_norm: 0.9703734382518688, iteration: 363016
loss: 1.0383871793746948,grad_norm: 0.9999999399641069, iteration: 363017
loss: 0.9848741292953491,grad_norm: 0.8651368278054488, iteration: 363018
loss: 1.033828616142273,grad_norm: 0.9999992920589459, iteration: 363019
loss: 0.9769113659858704,grad_norm: 0.9622150240363105, iteration: 363020
loss: 0.9872231483459473,grad_norm: 0.7790739238693737, iteration: 363021
loss: 1.0975792407989502,grad_norm: 0.9999998047485973, iteration: 363022
loss: 0.973456621170044,grad_norm: 0.8422343452317457, iteration: 363023
loss: 1.02041494846344,grad_norm: 0.8006606520220592, iteration: 363024
loss: 1.0592162609100342,grad_norm: 0.9999995034724504, iteration: 363025
loss: 0.9834588766098022,grad_norm: 0.8460290263517722, iteration: 363026
loss: 1.0173238515853882,grad_norm: 0.9253443621515329, iteration: 363027
loss: 1.0227497816085815,grad_norm: 0.7803210756816247, iteration: 363028
loss: 1.0304614305496216,grad_norm: 0.7526465694974349, iteration: 363029
loss: 1.016090750694275,grad_norm: 0.8101651540557866, iteration: 363030
loss: 0.979012131690979,grad_norm: 0.7518641627840933, iteration: 363031
loss: 1.0009902715682983,grad_norm: 0.6529242794519624, iteration: 363032
loss: 1.000806212425232,grad_norm: 0.848357209483107, iteration: 363033
loss: 1.01304030418396,grad_norm: 0.7934353391736242, iteration: 363034
loss: 1.0124410390853882,grad_norm: 0.7428752878851541, iteration: 363035
loss: 1.012366533279419,grad_norm: 0.8077603928544469, iteration: 363036
loss: 0.9972087144851685,grad_norm: 0.8740672313317487, iteration: 363037
loss: 1.0276648998260498,grad_norm: 0.8280718127364598, iteration: 363038
loss: 0.9732894897460938,grad_norm: 0.9999996801418587, iteration: 363039
loss: 1.0839108228683472,grad_norm: 0.898617078578642, iteration: 363040
loss: 1.0386464595794678,grad_norm: 0.8731854008599153, iteration: 363041
loss: 1.1624034643173218,grad_norm: 0.9999991233058937, iteration: 363042
loss: 0.972383439540863,grad_norm: 0.6937678550458038, iteration: 363043
loss: 1.0192302465438843,grad_norm: 0.9702324674297453, iteration: 363044
loss: 0.961732029914856,grad_norm: 0.7885028970520859, iteration: 363045
loss: 0.9598122239112854,grad_norm: 0.8429711380814429, iteration: 363046
loss: 0.9546546936035156,grad_norm: 0.8543671319754746, iteration: 363047
loss: 0.9870470762252808,grad_norm: 0.7406797794580914, iteration: 363048
loss: 1.0498247146606445,grad_norm: 0.9999990833063889, iteration: 363049
loss: 1.026879072189331,grad_norm: 0.9142818542029382, iteration: 363050
loss: 1.0225483179092407,grad_norm: 0.9719093554473108, iteration: 363051
loss: 1.0058549642562866,grad_norm: 0.877200850318891, iteration: 363052
loss: 0.9745983481407166,grad_norm: 0.9075854574055261, iteration: 363053
loss: 0.9878002405166626,grad_norm: 0.9683127414630992, iteration: 363054
loss: 1.0039982795715332,grad_norm: 0.7971770605033165, iteration: 363055
loss: 0.9734550714492798,grad_norm: 0.7396704353185015, iteration: 363056
loss: 1.0155630111694336,grad_norm: 0.7986939497004947, iteration: 363057
loss: 0.9980512857437134,grad_norm: 0.9999995648130736, iteration: 363058
loss: 1.0462251901626587,grad_norm: 0.9035556612295276, iteration: 363059
loss: 1.028662085533142,grad_norm: 0.7132320641733728, iteration: 363060
loss: 1.0123343467712402,grad_norm: 0.8168665281783184, iteration: 363061
loss: 1.003508448600769,grad_norm: 0.7707874139427517, iteration: 363062
loss: 0.9963239431381226,grad_norm: 0.7881586141781082, iteration: 363063
loss: 0.9862494468688965,grad_norm: 0.8375799406216535, iteration: 363064
loss: 1.0091983079910278,grad_norm: 0.8943222449225184, iteration: 363065
loss: 0.9672977924346924,grad_norm: 0.7648011157773736, iteration: 363066
loss: 1.1739463806152344,grad_norm: 0.9999993277844635, iteration: 363067
loss: 1.0418856143951416,grad_norm: 0.7674134494675495, iteration: 363068
loss: 1.0297001600265503,grad_norm: 0.8742589861520396, iteration: 363069
loss: 1.0022789239883423,grad_norm: 0.7317413744255618, iteration: 363070
loss: 1.0085147619247437,grad_norm: 0.9283763890252544, iteration: 363071
loss: 0.9659063220024109,grad_norm: 0.8290313240082253, iteration: 363072
loss: 1.0142803192138672,grad_norm: 0.960528604247887, iteration: 363073
loss: 0.9889690279960632,grad_norm: 0.7695619881525546, iteration: 363074
loss: 1.0376315116882324,grad_norm: 0.8956314652078121, iteration: 363075
loss: 1.0097752809524536,grad_norm: 0.9355395329356369, iteration: 363076
loss: 1.0044394731521606,grad_norm: 0.7711181462141492, iteration: 363077
loss: 1.0072872638702393,grad_norm: 0.9284391523655147, iteration: 363078
loss: 1.0309327840805054,grad_norm: 0.7452788515391489, iteration: 363079
loss: 1.0209026336669922,grad_norm: 0.888601308725597, iteration: 363080
loss: 0.9837111830711365,grad_norm: 0.8550944052444921, iteration: 363081
loss: 0.959502637386322,grad_norm: 0.7104359312536161, iteration: 363082
loss: 1.006906509399414,grad_norm: 0.7000445094635167, iteration: 363083
loss: 1.0024981498718262,grad_norm: 0.9311715670835636, iteration: 363084
loss: 0.9661921262741089,grad_norm: 0.9033636270130706, iteration: 363085
loss: 0.9893299341201782,grad_norm: 0.9999991331319655, iteration: 363086
loss: 1.0216854810714722,grad_norm: 0.8314299785978368, iteration: 363087
loss: 1.0253773927688599,grad_norm: 0.6781443996417754, iteration: 363088
loss: 0.9658855199813843,grad_norm: 0.73983280508388, iteration: 363089
loss: 1.0250177383422852,grad_norm: 0.9755128718403803, iteration: 363090
loss: 0.9926439523696899,grad_norm: 0.8261151248264875, iteration: 363091
loss: 1.0070257186889648,grad_norm: 0.8168276900225525, iteration: 363092
loss: 0.9831889867782593,grad_norm: 0.7981648353688918, iteration: 363093
loss: 0.9639294743537903,grad_norm: 0.7710672792307517, iteration: 363094
loss: 0.9965875148773193,grad_norm: 0.926530507931846, iteration: 363095
loss: 0.9984734654426575,grad_norm: 0.7347335966804623, iteration: 363096
loss: 0.9973362684249878,grad_norm: 0.7891864406325398, iteration: 363097
loss: 1.0099689960479736,grad_norm: 0.754630928409047, iteration: 363098
loss: 0.9695644378662109,grad_norm: 0.6412973915402983, iteration: 363099
loss: 0.9857265949249268,grad_norm: 0.8203941875791892, iteration: 363100
loss: 0.9680382013320923,grad_norm: 0.8984217649767766, iteration: 363101
loss: 0.9871560335159302,grad_norm: 0.7996742170019934, iteration: 363102
loss: 1.001360535621643,grad_norm: 0.8814759113286085, iteration: 363103
loss: 0.9668501019477844,grad_norm: 0.9257428656824855, iteration: 363104
loss: 1.020529866218567,grad_norm: 0.9362733155617957, iteration: 363105
loss: 1.0276963710784912,grad_norm: 0.780828241451021, iteration: 363106
loss: 0.9824691414833069,grad_norm: 0.8823535694193413, iteration: 363107
loss: 0.9777189493179321,grad_norm: 0.7650801600309854, iteration: 363108
loss: 0.9927710890769958,grad_norm: 0.9999994186157158, iteration: 363109
loss: 1.0985803604125977,grad_norm: 1.0000000305225285, iteration: 363110
loss: 0.9788398146629333,grad_norm: 0.9999990985818404, iteration: 363111
loss: 1.0227521657943726,grad_norm: 0.8117986771494977, iteration: 363112
loss: 0.9875038266181946,grad_norm: 0.7174377525165183, iteration: 363113
loss: 0.9981787800788879,grad_norm: 0.8062162898176948, iteration: 363114
loss: 0.9604949951171875,grad_norm: 0.7343757722983384, iteration: 363115
loss: 0.9762298464775085,grad_norm: 0.8138993121494004, iteration: 363116
loss: 1.0184988975524902,grad_norm: 0.9026153895279683, iteration: 363117
loss: 0.9896631836891174,grad_norm: 0.8970358357296646, iteration: 363118
loss: 0.9779207110404968,grad_norm: 0.9999990858203271, iteration: 363119
loss: 1.015708565711975,grad_norm: 0.7963317217624747, iteration: 363120
loss: 0.9805139899253845,grad_norm: 0.7746685381258459, iteration: 363121
loss: 0.9826555252075195,grad_norm: 0.6410325613035159, iteration: 363122
loss: 0.9684800505638123,grad_norm: 0.8490380196925933, iteration: 363123
loss: 1.0227606296539307,grad_norm: 0.8490280572892529, iteration: 363124
loss: 0.9768897891044617,grad_norm: 0.8773054216148806, iteration: 363125
loss: 1.028186559677124,grad_norm: 0.8018216400083674, iteration: 363126
loss: 0.9962959885597229,grad_norm: 0.7827378814200608, iteration: 363127
loss: 1.0266584157943726,grad_norm: 0.9023530635193127, iteration: 363128
loss: 0.9962427616119385,grad_norm: 0.9999996853213029, iteration: 363129
loss: 1.0051065683364868,grad_norm: 0.9999990766780629, iteration: 363130
loss: 0.9946298003196716,grad_norm: 0.7459414413668882, iteration: 363131
loss: 0.9977354407310486,grad_norm: 0.9999996305694895, iteration: 363132
loss: 0.9654415845870972,grad_norm: 0.8414960964082604, iteration: 363133
loss: 0.9857423305511475,grad_norm: 0.9999992741124458, iteration: 363134
loss: 1.01095712184906,grad_norm: 0.7398577538823856, iteration: 363135
loss: 0.9783309102058411,grad_norm: 0.8146850771080685, iteration: 363136
loss: 0.9768875241279602,grad_norm: 0.7907620511329335, iteration: 363137
loss: 1.0062713623046875,grad_norm: 0.7580248323770763, iteration: 363138
loss: 1.046297550201416,grad_norm: 0.9653987253051535, iteration: 363139
loss: 0.9972229599952698,grad_norm: 0.7603018880509611, iteration: 363140
loss: 1.0161824226379395,grad_norm: 0.9519038076280333, iteration: 363141
loss: 0.9928349852561951,grad_norm: 0.765928265930963, iteration: 363142
loss: 1.008495569229126,grad_norm: 0.8393526019641531, iteration: 363143
loss: 0.9907898902893066,grad_norm: 0.8155712711994069, iteration: 363144
loss: 0.9897908568382263,grad_norm: 0.9480888733982278, iteration: 363145
loss: 1.0059672594070435,grad_norm: 0.8170408628156555, iteration: 363146
loss: 1.087868332862854,grad_norm: 0.9999991763718172, iteration: 363147
loss: 1.0541194677352905,grad_norm: 0.9823709335612045, iteration: 363148
loss: 0.9759053587913513,grad_norm: 0.7959857963896251, iteration: 363149
loss: 0.9861652851104736,grad_norm: 0.9959420994738523, iteration: 363150
loss: 0.985494077205658,grad_norm: 0.7881446076092337, iteration: 363151
loss: 1.1185625791549683,grad_norm: 0.8823616606851632, iteration: 363152
loss: 1.0102448463439941,grad_norm: 0.7817269547307257, iteration: 363153
loss: 0.9948140382766724,grad_norm: 0.8054911008978155, iteration: 363154
loss: 1.009095311164856,grad_norm: 0.9999992178380847, iteration: 363155
loss: 0.993850588798523,grad_norm: 0.7992739014930995, iteration: 363156
loss: 1.1210955381393433,grad_norm: 0.9999993924698666, iteration: 363157
loss: 0.963105320930481,grad_norm: 0.8783924878584064, iteration: 363158
loss: 1.002153992652893,grad_norm: 0.7219224989474573, iteration: 363159
loss: 0.9977767467498779,grad_norm: 0.9999993527700352, iteration: 363160
loss: 1.028969168663025,grad_norm: 0.7816294253098925, iteration: 363161
loss: 1.0028094053268433,grad_norm: 0.8443127954161133, iteration: 363162
loss: 0.9945402145385742,grad_norm: 0.8557444903688634, iteration: 363163
loss: 0.9626873731613159,grad_norm: 0.9190030706663712, iteration: 363164
loss: 0.9935657978057861,grad_norm: 0.9227349204819203, iteration: 363165
loss: 0.9694847464561462,grad_norm: 0.9999997955183405, iteration: 363166
loss: 1.0166338682174683,grad_norm: 0.9470297395689543, iteration: 363167
loss: 1.0066145658493042,grad_norm: 0.867826257652919, iteration: 363168
loss: 1.0459543466567993,grad_norm: 0.9154835630685801, iteration: 363169
loss: 1.0018854141235352,grad_norm: 0.7803185369428166, iteration: 363170
loss: 1.0460816621780396,grad_norm: 0.9999993181332997, iteration: 363171
loss: 1.0961350202560425,grad_norm: 0.9999995870069104, iteration: 363172
loss: 1.1105070114135742,grad_norm: 0.9999992470682351, iteration: 363173
loss: 1.035849690437317,grad_norm: 0.9085296214979826, iteration: 363174
loss: 0.9670971632003784,grad_norm: 0.7210303939584787, iteration: 363175
loss: 1.080257773399353,grad_norm: 0.791795535433653, iteration: 363176
loss: 1.00838303565979,grad_norm: 0.7390823141470133, iteration: 363177
loss: 1.1222566366195679,grad_norm: 0.9999991722227453, iteration: 363178
loss: 1.0332077741622925,grad_norm: 0.9999995525386929, iteration: 363179
loss: 1.0043021440505981,grad_norm: 0.8460248924489008, iteration: 363180
loss: 0.9977894425392151,grad_norm: 0.8141937233159604, iteration: 363181
loss: 1.0318231582641602,grad_norm: 0.8006440566922055, iteration: 363182
loss: 1.0231159925460815,grad_norm: 0.9999991082760227, iteration: 363183
loss: 1.1969958543777466,grad_norm: 1.0000000462619536, iteration: 363184
loss: 1.126997709274292,grad_norm: 0.9999997791362911, iteration: 363185
loss: 0.9848339557647705,grad_norm: 0.8949326895845067, iteration: 363186
loss: 0.9727365374565125,grad_norm: 0.8732884355423784, iteration: 363187
loss: 1.066825032234192,grad_norm: 0.9999999660352528, iteration: 363188
loss: 1.040968656539917,grad_norm: 0.7228064844666026, iteration: 363189
loss: 1.0148794651031494,grad_norm: 0.8037078002278367, iteration: 363190
loss: 0.9980839490890503,grad_norm: 0.7670376085877795, iteration: 363191
loss: 1.057742953300476,grad_norm: 0.7721668539700743, iteration: 363192
loss: 0.9945036768913269,grad_norm: 0.9960172439686452, iteration: 363193
loss: 1.0753118991851807,grad_norm: 0.9999992259164531, iteration: 363194
loss: 1.074893832206726,grad_norm: 0.9999991269223509, iteration: 363195
loss: 0.9908670783042908,grad_norm: 0.7445349048794047, iteration: 363196
loss: 0.9775340557098389,grad_norm: 0.9999993615631703, iteration: 363197
loss: 1.0211771726608276,grad_norm: 0.999999247827632, iteration: 363198
loss: 0.9612379670143127,grad_norm: 0.7421918190048962, iteration: 363199
loss: 1.008334994316101,grad_norm: 0.9339415956431253, iteration: 363200
loss: 0.9825069308280945,grad_norm: 0.8770183908430972, iteration: 363201
loss: 1.2444100379943848,grad_norm: 0.9757094506706807, iteration: 363202
loss: 1.0835403203964233,grad_norm: 0.999999333404908, iteration: 363203
loss: 0.9504236578941345,grad_norm: 0.9072964076261838, iteration: 363204
loss: 1.0289833545684814,grad_norm: 0.7925461723220448, iteration: 363205
loss: 0.9828129410743713,grad_norm: 0.8517376492016858, iteration: 363206
loss: 0.9823977947235107,grad_norm: 0.7676564910921156, iteration: 363207
loss: 0.9823449850082397,grad_norm: 0.8000513043744475, iteration: 363208
loss: 1.1810837984085083,grad_norm: 0.9999997840424656, iteration: 363209
loss: 1.0174158811569214,grad_norm: 0.9250555122273593, iteration: 363210
loss: 1.2244770526885986,grad_norm: 0.9999998732945462, iteration: 363211
loss: 1.0220001935958862,grad_norm: 0.920695410999128, iteration: 363212
loss: 1.0153114795684814,grad_norm: 0.7609823933498407, iteration: 363213
loss: 0.9928879141807556,grad_norm: 0.7698541701446755, iteration: 363214
loss: 1.0044209957122803,grad_norm: 0.9999998419037237, iteration: 363215
loss: 1.0120346546173096,grad_norm: 0.7429251941123184, iteration: 363216
loss: 1.0228488445281982,grad_norm: 0.9718997469246105, iteration: 363217
loss: 0.9926005601882935,grad_norm: 0.9247390245397225, iteration: 363218
loss: 1.055299162864685,grad_norm: 0.9999998165407163, iteration: 363219
loss: 1.0167185068130493,grad_norm: 0.984420086026596, iteration: 363220
loss: 0.9979574680328369,grad_norm: 0.7306490857559523, iteration: 363221
loss: 1.0997897386550903,grad_norm: 0.9999999113860547, iteration: 363222
loss: 1.001202940940857,grad_norm: 0.8119496469438858, iteration: 363223
loss: 1.0379142761230469,grad_norm: 0.9999996423954357, iteration: 363224
loss: 1.013866901397705,grad_norm: 0.9999997815938375, iteration: 363225
loss: 1.0300719738006592,grad_norm: 0.9999990773402017, iteration: 363226
loss: 0.9800849556922913,grad_norm: 0.747886593580294, iteration: 363227
loss: 0.9969291090965271,grad_norm: 0.999999191736746, iteration: 363228
loss: 1.003401756286621,grad_norm: 0.7927744145907781, iteration: 363229
loss: 1.0334408283233643,grad_norm: 0.9999992841996724, iteration: 363230
loss: 1.0078991651535034,grad_norm: 0.7581667680438293, iteration: 363231
loss: 1.0186794996261597,grad_norm: 0.9999998488593692, iteration: 363232
loss: 1.199479341506958,grad_norm: 0.9999998837890807, iteration: 363233
loss: 1.058797001838684,grad_norm: 0.6794319858695228, iteration: 363234
loss: 0.9931562542915344,grad_norm: 0.8126486452916469, iteration: 363235
loss: 1.0134382247924805,grad_norm: 0.7465481729217369, iteration: 363236
loss: 1.2265719175338745,grad_norm: 0.9999999747077478, iteration: 363237
loss: 1.0600978136062622,grad_norm: 0.8993627162012796, iteration: 363238
loss: 0.9877892732620239,grad_norm: 0.9999990836017215, iteration: 363239
loss: 0.9927436709403992,grad_norm: 0.8236876126688242, iteration: 363240
loss: 1.0074985027313232,grad_norm: 0.8177705560028478, iteration: 363241
loss: 1.027360439300537,grad_norm: 0.9542402643346527, iteration: 363242
loss: 0.9899860620498657,grad_norm: 0.8425285147045373, iteration: 363243
loss: 0.9719606041908264,grad_norm: 0.7951936587755646, iteration: 363244
loss: 0.982021152973175,grad_norm: 0.8617913031876127, iteration: 363245
loss: 1.0560615062713623,grad_norm: 0.9999998438583245, iteration: 363246
loss: 1.0385950803756714,grad_norm: 0.8134361270828893, iteration: 363247
loss: 1.126151442527771,grad_norm: 0.999999644192306, iteration: 363248
loss: 1.0113646984100342,grad_norm: 0.6832339114173316, iteration: 363249
loss: 0.9968520998954773,grad_norm: 0.6994369639933978, iteration: 363250
loss: 1.0930490493774414,grad_norm: 0.9999992895302855, iteration: 363251
loss: 1.0027995109558105,grad_norm: 0.7350136933465362, iteration: 363252
loss: 0.9827682375907898,grad_norm: 0.9158898717905659, iteration: 363253
loss: 1.0617114305496216,grad_norm: 0.9433401213520761, iteration: 363254
loss: 1.1024450063705444,grad_norm: 0.9212771286688995, iteration: 363255
loss: 0.9810183644294739,grad_norm: 0.8698768712040107, iteration: 363256
loss: 0.9967493414878845,grad_norm: 0.7964512126364746, iteration: 363257
loss: 0.9960261583328247,grad_norm: 0.6402719976410567, iteration: 363258
loss: 1.0738385915756226,grad_norm: 0.9999998364552933, iteration: 363259
loss: 1.0097533464431763,grad_norm: 0.8300235173164511, iteration: 363260
loss: 1.1135072708129883,grad_norm: 0.8590053162719429, iteration: 363261
loss: 0.9422986507415771,grad_norm: 0.7588290770475966, iteration: 363262
loss: 1.0521705150604248,grad_norm: 0.7515708928804283, iteration: 363263
loss: 1.0960489511489868,grad_norm: 0.8721543019284046, iteration: 363264
loss: 0.9804339408874512,grad_norm: 0.7886965726561083, iteration: 363265
loss: 0.9927806258201599,grad_norm: 0.8896145632466905, iteration: 363266
loss: 0.9719595909118652,grad_norm: 0.6830152573592955, iteration: 363267
loss: 1.0145825147628784,grad_norm: 0.8264954014956003, iteration: 363268
loss: 0.9716509580612183,grad_norm: 0.814941047957445, iteration: 363269
loss: 1.011069893836975,grad_norm: 0.7849869315615854, iteration: 363270
loss: 0.9952418804168701,grad_norm: 0.8530979223229261, iteration: 363271
loss: 0.9833002686500549,grad_norm: 0.8794993249539436, iteration: 363272
loss: 1.0453444719314575,grad_norm: 0.8189781502940915, iteration: 363273
loss: 1.1051745414733887,grad_norm: 0.9999995490586506, iteration: 363274
loss: 1.0196692943572998,grad_norm: 0.8288530620262706, iteration: 363275
loss: 0.9840616583824158,grad_norm: 0.9805695107227719, iteration: 363276
loss: 0.9913858771324158,grad_norm: 0.7590331826611856, iteration: 363277
loss: 1.0114655494689941,grad_norm: 0.841525911192813, iteration: 363278
loss: 1.05812668800354,grad_norm: 0.9999999334802001, iteration: 363279
loss: 0.9551801681518555,grad_norm: 0.8159088047771927, iteration: 363280
loss: 1.081346869468689,grad_norm: 0.9999994760890045, iteration: 363281
loss: 1.0374118089675903,grad_norm: 0.8179876912392321, iteration: 363282
loss: 1.0317797660827637,grad_norm: 0.7608646485486688, iteration: 363283
loss: 1.0387088060379028,grad_norm: 0.9999998620641555, iteration: 363284
loss: 1.098333716392517,grad_norm: 0.9999998418477851, iteration: 363285
loss: 1.0599403381347656,grad_norm: 0.8733187428341326, iteration: 363286
loss: 1.0477166175842285,grad_norm: 0.9999991164608812, iteration: 363287
loss: 1.1298489570617676,grad_norm: 0.9672592325566501, iteration: 363288
loss: 1.0270613431930542,grad_norm: 0.8474108315833098, iteration: 363289
loss: 1.0437029600143433,grad_norm: 0.9999993508769323, iteration: 363290
loss: 1.1733826398849487,grad_norm: 0.9999996290735264, iteration: 363291
loss: 1.1457253694534302,grad_norm: 0.8991607312197669, iteration: 363292
loss: 1.0881149768829346,grad_norm: 0.9896406279120548, iteration: 363293
loss: 1.018041729927063,grad_norm: 0.9234548285111991, iteration: 363294
loss: 0.9747187495231628,grad_norm: 0.7240592309822415, iteration: 363295
loss: 1.0896074771881104,grad_norm: 0.999999612726151, iteration: 363296
loss: 0.9886593818664551,grad_norm: 0.8838444935073644, iteration: 363297
loss: 1.0203909873962402,grad_norm: 0.9999992595987057, iteration: 363298
loss: 1.0423738956451416,grad_norm: 1.0000000158787699, iteration: 363299
loss: 0.9779606461524963,grad_norm: 0.927209748698261, iteration: 363300
loss: 1.017221450805664,grad_norm: 0.9652043439492047, iteration: 363301
loss: 1.0431582927703857,grad_norm: 0.8295619694407016, iteration: 363302
loss: 1.064249038696289,grad_norm: 0.7254148366481613, iteration: 363303
loss: 0.9938010573387146,grad_norm: 0.7706953900912181, iteration: 363304
loss: 0.9873021841049194,grad_norm: 0.8126099679660209, iteration: 363305
loss: 1.0431110858917236,grad_norm: 0.8326860369268468, iteration: 363306
loss: 1.1166399717330933,grad_norm: 0.9999991448785158, iteration: 363307
loss: 1.0527470111846924,grad_norm: 0.8224875415319228, iteration: 363308
loss: 1.016408085823059,grad_norm: 0.8740583775596248, iteration: 363309
loss: 0.9976158142089844,grad_norm: 0.9999989699133579, iteration: 363310
loss: 1.0629514455795288,grad_norm: 0.8985836001945756, iteration: 363311
loss: 1.0272029638290405,grad_norm: 0.8045832602394044, iteration: 363312
loss: 1.0786476135253906,grad_norm: 0.9999994738714403, iteration: 363313
loss: 0.9334598779678345,grad_norm: 0.7640067795440644, iteration: 363314
loss: 1.121567964553833,grad_norm: 0.9999996136515911, iteration: 363315
loss: 1.0487642288208008,grad_norm: 0.8085779891689663, iteration: 363316
loss: 1.0822333097457886,grad_norm: 0.9999990377050635, iteration: 363317
loss: 1.024584412574768,grad_norm: 0.8248461315834714, iteration: 363318
loss: 1.0321741104125977,grad_norm: 0.7909451647455317, iteration: 363319
loss: 1.126253604888916,grad_norm: 0.9999992493346326, iteration: 363320
loss: 1.0421017408370972,grad_norm: 0.9632811117427661, iteration: 363321
loss: 0.9735724925994873,grad_norm: 0.7964351366536385, iteration: 363322
loss: 1.0112415552139282,grad_norm: 0.9999992592332653, iteration: 363323
loss: 1.0315955877304077,grad_norm: 0.9999993857569162, iteration: 363324
loss: 1.009637713432312,grad_norm: 0.7192533669309422, iteration: 363325
loss: 1.024552583694458,grad_norm: 0.7366342653378025, iteration: 363326
loss: 0.9973125457763672,grad_norm: 0.8417314198248925, iteration: 363327
loss: 1.0207133293151855,grad_norm: 0.6799311088360691, iteration: 363328
loss: 1.027237892150879,grad_norm: 1.0000000210821158, iteration: 363329
loss: 1.020287036895752,grad_norm: 0.8726048261238923, iteration: 363330
loss: 0.9876039624214172,grad_norm: 0.8078029535379094, iteration: 363331
loss: 1.0006434917449951,grad_norm: 0.736765110557662, iteration: 363332
loss: 1.012010931968689,grad_norm: 0.930103753066934, iteration: 363333
loss: 0.9875587224960327,grad_norm: 0.7223005570579918, iteration: 363334
loss: 1.0064436197280884,grad_norm: 0.9999993248141612, iteration: 363335
loss: 1.0669074058532715,grad_norm: 0.7454504164048695, iteration: 363336
loss: 0.9977389574050903,grad_norm: 0.6632336846571657, iteration: 363337
loss: 0.9868277311325073,grad_norm: 0.9956924917963658, iteration: 363338
loss: 1.0597264766693115,grad_norm: 0.7700300173724012, iteration: 363339
loss: 1.0006412267684937,grad_norm: 0.9999998769921493, iteration: 363340
loss: 1.2257966995239258,grad_norm: 0.9999996382837983, iteration: 363341
loss: 1.3175714015960693,grad_norm: 1.00000000663257, iteration: 363342
loss: 1.0296826362609863,grad_norm: 0.8448181673778973, iteration: 363343
loss: 0.9844540953636169,grad_norm: 0.9486385552459239, iteration: 363344
loss: 1.008357048034668,grad_norm: 0.8248367801324512, iteration: 363345
loss: 1.1069786548614502,grad_norm: 0.9999993307287253, iteration: 363346
loss: 0.9771272540092468,grad_norm: 0.7668107039325954, iteration: 363347
loss: 1.10050368309021,grad_norm: 0.8267455896051984, iteration: 363348
loss: 1.0339471101760864,grad_norm: 0.9999992303573679, iteration: 363349
loss: 1.0192756652832031,grad_norm: 0.819624210605407, iteration: 363350
loss: 1.0096324682235718,grad_norm: 0.8354996380691311, iteration: 363351
loss: 1.1385234594345093,grad_norm: 0.9999990599380921, iteration: 363352
loss: 1.0509403944015503,grad_norm: 0.8910326223358151, iteration: 363353
loss: 0.9713425636291504,grad_norm: 0.9999996141380215, iteration: 363354
loss: 0.9912075996398926,grad_norm: 0.9087012891097546, iteration: 363355
loss: 1.1572102308273315,grad_norm: 0.9670358858266638, iteration: 363356
loss: 1.0060484409332275,grad_norm: 0.8141919572579516, iteration: 363357
loss: 1.0197935104370117,grad_norm: 0.8784223460875937, iteration: 363358
loss: 1.108742117881775,grad_norm: 0.9999991519030433, iteration: 363359
loss: 0.9961594343185425,grad_norm: 0.8009714691959421, iteration: 363360
loss: 0.9808984398841858,grad_norm: 0.999999915700786, iteration: 363361
loss: 1.0974575281143188,grad_norm: 0.9999991183560879, iteration: 363362
loss: 1.0531564950942993,grad_norm: 0.9999999426881573, iteration: 363363
loss: 1.033334732055664,grad_norm: 0.9999990503507492, iteration: 363364
loss: 0.9841784834861755,grad_norm: 0.9989117330737233, iteration: 363365
loss: 1.004426121711731,grad_norm: 0.9999989280724391, iteration: 363366
loss: 0.9747181534767151,grad_norm: 0.923376951082188, iteration: 363367
loss: 1.0239850282669067,grad_norm: 0.9425529647603497, iteration: 363368
loss: 1.0485860109329224,grad_norm: 0.9999990497749, iteration: 363369
loss: 1.0198255777359009,grad_norm: 0.8230479573285361, iteration: 363370
loss: 1.0177688598632812,grad_norm: 0.9356363936129531, iteration: 363371
loss: 0.974282443523407,grad_norm: 0.8188899716259136, iteration: 363372
loss: 1.015296220779419,grad_norm: 0.8323136758269728, iteration: 363373
loss: 1.0601805448532104,grad_norm: 0.9999990902742846, iteration: 363374
loss: 1.021809458732605,grad_norm: 0.9360559068549745, iteration: 363375
loss: 0.9839284420013428,grad_norm: 0.8873450770869739, iteration: 363376
loss: 1.0437121391296387,grad_norm: 0.9430305503674989, iteration: 363377
loss: 1.0808279514312744,grad_norm: 0.999999090775874, iteration: 363378
loss: 0.9592796564102173,grad_norm: 0.8231418742139748, iteration: 363379
loss: 1.0443426370620728,grad_norm: 0.9999999175642895, iteration: 363380
loss: 1.0019522905349731,grad_norm: 0.9999996210611813, iteration: 363381
loss: 1.0626577138900757,grad_norm: 0.9999996246933226, iteration: 363382
loss: 1.0231096744537354,grad_norm: 0.7777651792195718, iteration: 363383
loss: 0.9801254272460938,grad_norm: 0.8354225046357033, iteration: 363384
loss: 1.003393530845642,grad_norm: 0.7286156903558817, iteration: 363385
loss: 1.0792808532714844,grad_norm: 0.9999991862543073, iteration: 363386
loss: 0.9867219924926758,grad_norm: 0.898627588679627, iteration: 363387
loss: 0.9701149463653564,grad_norm: 0.9999996788705965, iteration: 363388
loss: 1.0814367532730103,grad_norm: 0.9999991427539656, iteration: 363389
loss: 0.9923664331436157,grad_norm: 0.9999994269721393, iteration: 363390
loss: 1.036205530166626,grad_norm: 0.9999990833060877, iteration: 363391
loss: 1.0559786558151245,grad_norm: 0.7158199081161157, iteration: 363392
loss: 1.0669822692871094,grad_norm: 0.9527710233866633, iteration: 363393
loss: 0.9719234108924866,grad_norm: 0.8818712115019341, iteration: 363394
loss: 0.990505039691925,grad_norm: 0.9999998896912417, iteration: 363395
loss: 0.9657613039016724,grad_norm: 0.7508620424335883, iteration: 363396
loss: 1.021718978881836,grad_norm: 0.9999993262501731, iteration: 363397
loss: 1.0001424551010132,grad_norm: 0.9732956240330687, iteration: 363398
loss: 1.0099754333496094,grad_norm: 0.9682490897230323, iteration: 363399
loss: 1.1015866994857788,grad_norm: 0.9999994209867414, iteration: 363400
loss: 1.0490999221801758,grad_norm: 0.9820877170106499, iteration: 363401
loss: 1.1590851545333862,grad_norm: 0.9999999334101776, iteration: 363402
loss: 1.0930615663528442,grad_norm: 0.999999669225209, iteration: 363403
loss: 0.9819604158401489,grad_norm: 0.7256385865778393, iteration: 363404
loss: 1.0147337913513184,grad_norm: 0.819228175002456, iteration: 363405
loss: 1.0080010890960693,grad_norm: 0.8899766250407974, iteration: 363406
loss: 1.0364593267440796,grad_norm: 0.9999990297890851, iteration: 363407
loss: 1.0245084762573242,grad_norm: 0.9999999097595896, iteration: 363408
loss: 0.9979676008224487,grad_norm: 0.9506447221400613, iteration: 363409
loss: 1.0774399042129517,grad_norm: 0.9521674415619874, iteration: 363410
loss: 1.0513278245925903,grad_norm: 0.9999991970295817, iteration: 363411
loss: 1.0244778394699097,grad_norm: 0.6848936339462479, iteration: 363412
loss: 1.0437854528427124,grad_norm: 0.9999998677909522, iteration: 363413
loss: 1.0062594413757324,grad_norm: 0.8051335635032472, iteration: 363414
loss: 1.0382342338562012,grad_norm: 0.999999355599747, iteration: 363415
loss: 0.9998931884765625,grad_norm: 0.7043680346772891, iteration: 363416
loss: 1.0410561561584473,grad_norm: 0.8688467424676811, iteration: 363417
loss: 0.9880021214485168,grad_norm: 0.9264989732977417, iteration: 363418
loss: 0.9881375432014465,grad_norm: 0.7919929507354088, iteration: 363419
loss: 1.0148035287857056,grad_norm: 0.7552863637893388, iteration: 363420
loss: 1.024942398071289,grad_norm: 0.999999577736374, iteration: 363421
loss: 1.100062370300293,grad_norm: 0.9999997556218615, iteration: 363422
loss: 1.0751553773880005,grad_norm: 0.9999998236678421, iteration: 363423
loss: 1.033427357673645,grad_norm: 0.7899094214214009, iteration: 363424
loss: 0.9717199206352234,grad_norm: 0.7999372540547771, iteration: 363425
loss: 1.0064557790756226,grad_norm: 0.7877200063391374, iteration: 363426
loss: 1.0178380012512207,grad_norm: 0.9366961792807702, iteration: 363427
loss: 0.991565465927124,grad_norm: 0.7535763795492652, iteration: 363428
loss: 1.0015647411346436,grad_norm: 0.9999996084939239, iteration: 363429
loss: 1.0093544721603394,grad_norm: 0.9999999753051244, iteration: 363430
loss: 1.0222811698913574,grad_norm: 0.9374094761341396, iteration: 363431
loss: 0.9975801110267639,grad_norm: 0.8505845951314976, iteration: 363432
loss: 1.0126234292984009,grad_norm: 0.7832724578677328, iteration: 363433
loss: 1.002789855003357,grad_norm: 0.5951324668007099, iteration: 363434
loss: 1.015371322631836,grad_norm: 0.6735027796375929, iteration: 363435
loss: 1.0359561443328857,grad_norm: 0.8942793610537375, iteration: 363436
loss: 1.0491093397140503,grad_norm: 0.744239373564592, iteration: 363437
loss: 0.976426899433136,grad_norm: 0.7950208314286353, iteration: 363438
loss: 1.0698469877243042,grad_norm: 0.96243269556239, iteration: 363439
loss: 1.1789156198501587,grad_norm: 0.9999993570467283, iteration: 363440
loss: 1.0172016620635986,grad_norm: 0.8365852019625686, iteration: 363441
loss: 0.9842877388000488,grad_norm: 0.6960638271297714, iteration: 363442
loss: 1.020591378211975,grad_norm: 0.6820684690444392, iteration: 363443
loss: 0.9907829761505127,grad_norm: 0.9999995516048625, iteration: 363444
loss: 1.032207727432251,grad_norm: 0.9999992567678596, iteration: 363445
loss: 0.9887154698371887,grad_norm: 0.7642068637470791, iteration: 363446
loss: 0.9660497307777405,grad_norm: 0.9596426185643031, iteration: 363447
loss: 1.0671627521514893,grad_norm: 0.8874546226779054, iteration: 363448
loss: 1.099047064781189,grad_norm: 0.9999991409343855, iteration: 363449
loss: 0.9857921004295349,grad_norm: 0.7513529986326978, iteration: 363450
loss: 1.1005250215530396,grad_norm: 0.9514856307793197, iteration: 363451
loss: 1.01471745967865,grad_norm: 0.8081580814438524, iteration: 363452
loss: 1.0506244897842407,grad_norm: 0.9999991809370014, iteration: 363453
loss: 0.9849944710731506,grad_norm: 0.8938655201786155, iteration: 363454
loss: 0.9646004438400269,grad_norm: 0.8006611603957459, iteration: 363455
loss: 0.9947527647018433,grad_norm: 0.6830895165065454, iteration: 363456
loss: 0.9909425973892212,grad_norm: 0.715501213174576, iteration: 363457
loss: 1.0061531066894531,grad_norm: 0.9655439608684381, iteration: 363458
loss: 1.1476532220840454,grad_norm: 0.9999992228370554, iteration: 363459
loss: 1.0381356477737427,grad_norm: 0.9999992280971328, iteration: 363460
loss: 1.0355960130691528,grad_norm: 0.9999996801276663, iteration: 363461
loss: 1.0394281148910522,grad_norm: 0.7975533664377171, iteration: 363462
loss: 0.9672250747680664,grad_norm: 0.8067314463709927, iteration: 363463
loss: 1.0148931741714478,grad_norm: 0.7455158611927111, iteration: 363464
loss: 0.9886729121208191,grad_norm: 0.7650319610533215, iteration: 363465
loss: 0.9749173521995544,grad_norm: 0.8301758250094806, iteration: 363466
loss: 1.0323927402496338,grad_norm: 0.9999993257332956, iteration: 363467
loss: 0.9886144399642944,grad_norm: 0.7144205331829034, iteration: 363468
loss: 0.9923856854438782,grad_norm: 0.7771837562355182, iteration: 363469
loss: 1.006654143333435,grad_norm: 0.8108325355556911, iteration: 363470
loss: 1.0240813493728638,grad_norm: 0.9999990347693954, iteration: 363471
loss: 1.0143226385116577,grad_norm: 0.8632023075489396, iteration: 363472
loss: 1.0752220153808594,grad_norm: 0.9999998443793292, iteration: 363473
loss: 1.0300546884536743,grad_norm: 0.8023483752323162, iteration: 363474
loss: 0.9694460034370422,grad_norm: 0.8725719469010537, iteration: 363475
loss: 1.0051921606063843,grad_norm: 0.8040295333164507, iteration: 363476
loss: 1.0789436101913452,grad_norm: 0.9999993043247787, iteration: 363477
loss: 0.9746918082237244,grad_norm: 0.6976991635237964, iteration: 363478
loss: 1.0197831392288208,grad_norm: 0.7998018863510565, iteration: 363479
loss: 0.9995461106300354,grad_norm: 0.7808317969682248, iteration: 363480
loss: 0.9581589102745056,grad_norm: 0.6877940502468682, iteration: 363481
loss: 1.024062991142273,grad_norm: 0.7648598334771377, iteration: 363482
loss: 0.9993261098861694,grad_norm: 0.8534243658359331, iteration: 363483
loss: 1.0175448656082153,grad_norm: 0.7851535372028603, iteration: 363484
loss: 1.025963306427002,grad_norm: 0.735697001027478, iteration: 363485
loss: 1.0540052652359009,grad_norm: 0.9624512080931126, iteration: 363486
loss: 0.9743360877037048,grad_norm: 0.8391010226471535, iteration: 363487
loss: 1.0326597690582275,grad_norm: 0.7398538055240184, iteration: 363488
loss: 1.024043321609497,grad_norm: 0.7563453350526224, iteration: 363489
loss: 1.06232750415802,grad_norm: 0.8686590328753451, iteration: 363490
loss: 1.013241171836853,grad_norm: 0.9999991310053683, iteration: 363491
loss: 1.0473002195358276,grad_norm: 0.9947123327382535, iteration: 363492
loss: 1.1892368793487549,grad_norm: 0.9999997093894347, iteration: 363493
loss: 0.9984508156776428,grad_norm: 0.9999991484052004, iteration: 363494
loss: 0.9742539525032043,grad_norm: 0.8825181925931856, iteration: 363495
loss: 1.0148937702178955,grad_norm: 0.8571219334091678, iteration: 363496
loss: 1.0217119455337524,grad_norm: 0.9999992296909996, iteration: 363497
loss: 1.1157654523849487,grad_norm: 1.0000000737779535, iteration: 363498
loss: 0.9746118187904358,grad_norm: 0.9992702927910727, iteration: 363499
loss: 0.9932031035423279,grad_norm: 0.7021282070353211, iteration: 363500
loss: 1.0156852006912231,grad_norm: 0.8093159979946071, iteration: 363501
loss: 1.0026909112930298,grad_norm: 0.8509881223927277, iteration: 363502
loss: 1.084562063217163,grad_norm: 0.9999997783419148, iteration: 363503
loss: 0.9969372749328613,grad_norm: 0.8408355410176002, iteration: 363504
loss: 1.1033766269683838,grad_norm: 0.9999991053226648, iteration: 363505
loss: 1.0313589572906494,grad_norm: 0.7409848043300968, iteration: 363506
loss: 1.0094349384307861,grad_norm: 0.9999989777444034, iteration: 363507
loss: 0.9778247475624084,grad_norm: 0.849110703984415, iteration: 363508
loss: 1.0235192775726318,grad_norm: 0.9999998469337158, iteration: 363509
loss: 1.0671210289001465,grad_norm: 0.7929021835312355, iteration: 363510
loss: 1.0156348943710327,grad_norm: 0.7540342694729599, iteration: 363511
loss: 1.050632119178772,grad_norm: 0.8805350807814708, iteration: 363512
loss: 1.0069007873535156,grad_norm: 0.8506117828806664, iteration: 363513
loss: 0.993441104888916,grad_norm: 0.8338924059337993, iteration: 363514
loss: 0.9600395560264587,grad_norm: 0.6994350111584146, iteration: 363515
loss: 1.0793899297714233,grad_norm: 0.9437919219165205, iteration: 363516
loss: 1.0018434524536133,grad_norm: 0.8812835963590894, iteration: 363517
loss: 1.0604333877563477,grad_norm: 0.8858426420651617, iteration: 363518
loss: 0.9620401859283447,grad_norm: 0.8207249066467226, iteration: 363519
loss: 1.0728384256362915,grad_norm: 0.9999996255374025, iteration: 363520
loss: 1.017939805984497,grad_norm: 0.687846231148066, iteration: 363521
loss: 1.0523622035980225,grad_norm: 0.9999996355123252, iteration: 363522
loss: 0.9837265014648438,grad_norm: 0.9999989810830924, iteration: 363523
loss: 1.0029348134994507,grad_norm: 0.6675216149341336, iteration: 363524
loss: 1.5123838186264038,grad_norm: 0.999999438109176, iteration: 363525
loss: 1.005147933959961,grad_norm: 0.9302804790716909, iteration: 363526
loss: 1.0030663013458252,grad_norm: 0.8909676516748667, iteration: 363527
loss: 1.2044929265975952,grad_norm: 0.9999996332928849, iteration: 363528
loss: 1.024622917175293,grad_norm: 0.9999998618425179, iteration: 363529
loss: 1.0411959886550903,grad_norm: 0.7881517892204666, iteration: 363530
loss: 0.9965982437133789,grad_norm: 0.7725498622098471, iteration: 363531
loss: 1.0264946222305298,grad_norm: 0.8246110658029573, iteration: 363532
loss: 0.9721947908401489,grad_norm: 0.862607410545938, iteration: 363533
loss: 1.0279905796051025,grad_norm: 0.793886252454427, iteration: 363534
loss: 1.0574061870574951,grad_norm: 0.9999998812875883, iteration: 363535
loss: 1.291451334953308,grad_norm: 1.0000000606952861, iteration: 363536
loss: 1.0696752071380615,grad_norm: 0.9999992891591338, iteration: 363537
loss: 1.003322958946228,grad_norm: 0.8308074952859992, iteration: 363538
loss: 1.516542911529541,grad_norm: 0.9999996858216657, iteration: 363539
loss: 1.2108087539672852,grad_norm: 0.9999992470073931, iteration: 363540
loss: 1.126704216003418,grad_norm: 0.9999998598154557, iteration: 363541
loss: 1.0355583429336548,grad_norm: 0.8077080437081664, iteration: 363542
loss: 1.0600254535675049,grad_norm: 0.9999999619385149, iteration: 363543
loss: 1.012220859527588,grad_norm: 0.9999996034210146, iteration: 363544
loss: 1.0430383682250977,grad_norm: 0.9999992036380547, iteration: 363545
loss: 0.9744013547897339,grad_norm: 0.7359039057461823, iteration: 363546
loss: 0.9942979216575623,grad_norm: 0.6968331726179035, iteration: 363547
loss: 1.0272830724716187,grad_norm: 0.801052531105485, iteration: 363548
loss: 1.0155225992202759,grad_norm: 0.9999992190841377, iteration: 363549
loss: 1.0070744752883911,grad_norm: 0.9081351618845918, iteration: 363550
loss: 1.2246370315551758,grad_norm: 0.9999992371414402, iteration: 363551
loss: 1.0036579370498657,grad_norm: 0.8758425839125384, iteration: 363552
loss: 0.9898254871368408,grad_norm: 0.9999990845797632, iteration: 363553
loss: 1.034741997718811,grad_norm: 0.8995471176616018, iteration: 363554
loss: 1.0116381645202637,grad_norm: 0.8776705587424286, iteration: 363555
loss: 1.0615121126174927,grad_norm: 0.8221323531672977, iteration: 363556
loss: 1.0721793174743652,grad_norm: 0.9999994792123564, iteration: 363557
loss: 1.007232904434204,grad_norm: 0.999999619819059, iteration: 363558
loss: 0.9555318355560303,grad_norm: 0.8244612034508498, iteration: 363559
loss: 1.0145901441574097,grad_norm: 0.9279081173146653, iteration: 363560
loss: 1.0441843271255493,grad_norm: 0.9999992644372577, iteration: 363561
loss: 1.0409550666809082,grad_norm: 0.7915489378334164, iteration: 363562
loss: 1.0705461502075195,grad_norm: 0.8734183315362581, iteration: 363563
loss: 1.0462746620178223,grad_norm: 0.932362987547873, iteration: 363564
loss: 1.0308963060379028,grad_norm: 0.9488609160593442, iteration: 363565
loss: 1.1157886981964111,grad_norm: 0.999999618180899, iteration: 363566
loss: 1.0129828453063965,grad_norm: 0.9999991350785793, iteration: 363567
loss: 0.9897462129592896,grad_norm: 0.9422556193564395, iteration: 363568
loss: 1.0146307945251465,grad_norm: 0.8075632347382525, iteration: 363569
loss: 1.0690984725952148,grad_norm: 0.9999999170968589, iteration: 363570
loss: 1.0405620336532593,grad_norm: 0.7912302283089235, iteration: 363571
loss: 1.0306081771850586,grad_norm: 0.882248471592853, iteration: 363572
loss: 1.0841563940048218,grad_norm: 0.9999992343360947, iteration: 363573
loss: 1.0708045959472656,grad_norm: 0.9999998708516598, iteration: 363574
loss: 1.0633636713027954,grad_norm: 0.8532132737666639, iteration: 363575
loss: 1.0335191488265991,grad_norm: 0.7695235823160034, iteration: 363576
loss: 1.099090337753296,grad_norm: 0.9999991124529578, iteration: 363577
loss: 1.0279115438461304,grad_norm: 0.8661207240539935, iteration: 363578
loss: 1.0355985164642334,grad_norm: 0.7315931515367853, iteration: 363579
loss: 1.220942735671997,grad_norm: 0.9724625763299903, iteration: 363580
loss: 1.01326584815979,grad_norm: 0.9999998727714495, iteration: 363581
loss: 1.0487456321716309,grad_norm: 0.999999411624214, iteration: 363582
loss: 1.084841251373291,grad_norm: 0.9999996326448172, iteration: 363583
loss: 1.1923424005508423,grad_norm: 0.9999997870877465, iteration: 363584
loss: 1.0451483726501465,grad_norm: 0.9999999718438506, iteration: 363585
loss: 1.0381113290786743,grad_norm: 0.9999999979909651, iteration: 363586
loss: 1.0311634540557861,grad_norm: 0.7588661429206532, iteration: 363587
loss: 1.0000914335250854,grad_norm: 0.8873468567610036, iteration: 363588
loss: 1.0425729751586914,grad_norm: 0.8581440972520816, iteration: 363589
loss: 1.2076722383499146,grad_norm: 0.9999993834504086, iteration: 363590
loss: 1.0468652248382568,grad_norm: 0.8114163694293756, iteration: 363591
loss: 1.0791133642196655,grad_norm: 0.9999998835953331, iteration: 363592
loss: 1.0701265335083008,grad_norm: 0.8690961354677916, iteration: 363593
loss: 1.1973795890808105,grad_norm: 0.999999150967669, iteration: 363594
loss: 1.0123791694641113,grad_norm: 0.8405998081561766, iteration: 363595
loss: 1.0375927686691284,grad_norm: 1.0000000116530363, iteration: 363596
loss: 1.2351831197738647,grad_norm: 0.9999998065735957, iteration: 363597
loss: 1.0650545358657837,grad_norm: 0.7871242473092801, iteration: 363598
loss: 1.0369573831558228,grad_norm: 0.7287539745405774, iteration: 363599
loss: 0.9932410717010498,grad_norm: 0.8069544956444221, iteration: 363600
loss: 0.972902238368988,grad_norm: 0.9999998607847475, iteration: 363601
loss: 1.055279016494751,grad_norm: 0.878514056142762, iteration: 363602
loss: 1.1720967292785645,grad_norm: 0.9999999145337372, iteration: 363603
loss: 1.037431001663208,grad_norm: 0.9999999320022599, iteration: 363604
loss: 1.0922558307647705,grad_norm: 0.8096261357353225, iteration: 363605
loss: 1.0568757057189941,grad_norm: 0.8846691937613913, iteration: 363606
loss: 1.0026450157165527,grad_norm: 0.8694970717057894, iteration: 363607
loss: 1.1626499891281128,grad_norm: 0.9999991748983715, iteration: 363608
loss: 1.0283783674240112,grad_norm: 0.7252394440841855, iteration: 363609
loss: 1.0312516689300537,grad_norm: 0.9999999687421428, iteration: 363610
loss: 1.0058019161224365,grad_norm: 0.9999990936657128, iteration: 363611
loss: 1.0543222427368164,grad_norm: 0.7684428374781261, iteration: 363612
loss: 1.0087026357650757,grad_norm: 0.7425094421448694, iteration: 363613
loss: 1.0241377353668213,grad_norm: 0.9999991987065946, iteration: 363614
loss: 1.0568040609359741,grad_norm: 0.877937846761143, iteration: 363615
loss: 0.9879253506660461,grad_norm: 0.9109449431789088, iteration: 363616
loss: 1.122090458869934,grad_norm: 0.9999999427237252, iteration: 363617
loss: 1.0074381828308105,grad_norm: 0.9999991914262857, iteration: 363618
loss: 1.0168317556381226,grad_norm: 0.7307285484132063, iteration: 363619
loss: 1.01839017868042,grad_norm: 0.863139753961738, iteration: 363620
loss: 0.9872097969055176,grad_norm: 0.8224627051967064, iteration: 363621
loss: 0.9847053289413452,grad_norm: 0.7936019945415118, iteration: 363622
loss: 1.0042026042938232,grad_norm: 0.8816446498032492, iteration: 363623
loss: 1.015761375427246,grad_norm: 0.8395590334397619, iteration: 363624
loss: 0.991581916809082,grad_norm: 0.835403655634608, iteration: 363625
loss: 1.0358972549438477,grad_norm: 0.999999767822035, iteration: 363626
loss: 1.0198253393173218,grad_norm: 0.8671045682468781, iteration: 363627
loss: 1.0194083452224731,grad_norm: 0.6904867467340314, iteration: 363628
loss: 1.0019465684890747,grad_norm: 0.8374084269876418, iteration: 363629
loss: 0.9834046959877014,grad_norm: 0.7645737199155327, iteration: 363630
loss: 0.979802668094635,grad_norm: 0.839026418700887, iteration: 363631
loss: 0.9826242327690125,grad_norm: 0.8059323886223182, iteration: 363632
loss: 1.00668466091156,grad_norm: 0.7943281395513615, iteration: 363633
loss: 0.9913897514343262,grad_norm: 0.9960470329531227, iteration: 363634
loss: 1.0141551494598389,grad_norm: 0.8214046568100387, iteration: 363635
loss: 0.9870437383651733,grad_norm: 0.9999993663805596, iteration: 363636
loss: 1.1061733961105347,grad_norm: 0.9999991221101896, iteration: 363637
loss: 1.0254528522491455,grad_norm: 0.9999992508871607, iteration: 363638
loss: 1.0274765491485596,grad_norm: 0.9999990435760423, iteration: 363639
loss: 0.972200334072113,grad_norm: 0.7670965531042161, iteration: 363640
loss: 0.9724961519241333,grad_norm: 0.8002073520267233, iteration: 363641
loss: 1.0291129350662231,grad_norm: 0.8403265328562077, iteration: 363642
loss: 1.0186156034469604,grad_norm: 0.8292451327464477, iteration: 363643
loss: 1.0021597146987915,grad_norm: 0.8896680632104201, iteration: 363644
loss: 1.053489327430725,grad_norm: 0.9247300865439954, iteration: 363645
loss: 0.9930664300918579,grad_norm: 0.9727899255343485, iteration: 363646
loss: 1.0485590696334839,grad_norm: 0.8680362931941852, iteration: 363647
loss: 0.9998985528945923,grad_norm: 0.8642765902920488, iteration: 363648
loss: 1.007170557975769,grad_norm: 0.8327933763899003, iteration: 363649
loss: 1.0055195093154907,grad_norm: 0.764679131829167, iteration: 363650
loss: 0.976779580116272,grad_norm: 0.6661886348317535, iteration: 363651
loss: 1.013765811920166,grad_norm: 0.9999998884338988, iteration: 363652
loss: 0.9768672585487366,grad_norm: 0.8359769638953737, iteration: 363653
loss: 1.000326156616211,grad_norm: 0.8375007030569421, iteration: 363654
loss: 1.0033563375473022,grad_norm: 0.8217245685229987, iteration: 363655
loss: 0.9831043481826782,grad_norm: 0.6915938810052558, iteration: 363656
loss: 1.0508642196655273,grad_norm: 0.9920518404659915, iteration: 363657
loss: 1.050052285194397,grad_norm: 0.9999990620859001, iteration: 363658
loss: 0.9726265072822571,grad_norm: 0.7730680820993634, iteration: 363659
loss: 0.9962016940116882,grad_norm: 0.7692679121227127, iteration: 363660
loss: 1.0030877590179443,grad_norm: 0.8318032297423645, iteration: 363661
loss: 1.040570855140686,grad_norm: 0.9999996665612232, iteration: 363662
loss: 1.0219013690948486,grad_norm: 0.8661169938108352, iteration: 363663
loss: 1.0705208778381348,grad_norm: 0.8996343506098975, iteration: 363664
loss: 1.0304838418960571,grad_norm: 0.7917310503273493, iteration: 363665
loss: 1.0481741428375244,grad_norm: 0.9999996921060742, iteration: 363666
loss: 1.0485913753509521,grad_norm: 0.9999991685112202, iteration: 363667
loss: 1.0098429918289185,grad_norm: 0.9999995765913092, iteration: 363668
loss: 1.0369471311569214,grad_norm: 0.9213835028365758, iteration: 363669
loss: 1.0460708141326904,grad_norm: 0.916300922385147, iteration: 363670
loss: 1.0251142978668213,grad_norm: 0.7550850979147555, iteration: 363671
loss: 0.9967982769012451,grad_norm: 0.9999990417593086, iteration: 363672
loss: 1.0196810960769653,grad_norm: 0.999999139117122, iteration: 363673
loss: 1.008963704109192,grad_norm: 0.9144594541559791, iteration: 363674
loss: 0.9854979515075684,grad_norm: 0.909180954462934, iteration: 363675
loss: 1.0351276397705078,grad_norm: 0.9999995616086175, iteration: 363676
loss: 0.9961748719215393,grad_norm: 0.8413938638184273, iteration: 363677
loss: 1.0380700826644897,grad_norm: 0.84868273508524, iteration: 363678
loss: 1.0055958032608032,grad_norm: 0.7838234958186259, iteration: 363679
loss: 0.9828276634216309,grad_norm: 0.9020259918038417, iteration: 363680
loss: 1.1495463848114014,grad_norm: 0.9999990751101793, iteration: 363681
loss: 1.0077099800109863,grad_norm: 0.7936876719025799, iteration: 363682
loss: 0.997675359249115,grad_norm: 0.9065798551342809, iteration: 363683
loss: 1.018386960029602,grad_norm: 0.8457257383298645, iteration: 363684
loss: 0.9922598600387573,grad_norm: 0.926149691930674, iteration: 363685
loss: 1.0347356796264648,grad_norm: 0.8716715835614658, iteration: 363686
loss: 0.9979803562164307,grad_norm: 0.7600319258465971, iteration: 363687
loss: 0.9965546131134033,grad_norm: 0.9999996900577948, iteration: 363688
loss: 0.9640754461288452,grad_norm: 0.7701781626880813, iteration: 363689
loss: 1.036331295967102,grad_norm: 0.937893595546312, iteration: 363690
loss: 0.9967729449272156,grad_norm: 0.8215182316905969, iteration: 363691
loss: 0.9788288474082947,grad_norm: 0.7625956507445303, iteration: 363692
loss: 0.9680542349815369,grad_norm: 0.8507720970569478, iteration: 363693
loss: 1.057072639465332,grad_norm: 0.9999995400860596, iteration: 363694
loss: 1.0039328336715698,grad_norm: 0.6483308232992684, iteration: 363695
loss: 1.0048255920410156,grad_norm: 0.8824132554994752, iteration: 363696
loss: 0.9775021076202393,grad_norm: 0.7703547607633923, iteration: 363697
loss: 0.9841926097869873,grad_norm: 0.8568613816416089, iteration: 363698
loss: 1.11834716796875,grad_norm: 0.9999992442869629, iteration: 363699
loss: 1.008805751800537,grad_norm: 0.8848989508402914, iteration: 363700
loss: 0.9686857461929321,grad_norm: 0.7981920508594799, iteration: 363701
loss: 1.0135747194290161,grad_norm: 0.7901709299549226, iteration: 363702
loss: 0.9719030857086182,grad_norm: 0.656682872392862, iteration: 363703
loss: 1.0199095010757446,grad_norm: 0.6619159867686442, iteration: 363704
loss: 0.957037091255188,grad_norm: 0.8073922411517729, iteration: 363705
loss: 0.9875839352607727,grad_norm: 0.7903261946964864, iteration: 363706
loss: 0.9983108639717102,grad_norm: 0.9259656866371581, iteration: 363707
loss: 0.9742081761360168,grad_norm: 0.7139760833225354, iteration: 363708
loss: 0.9898123145103455,grad_norm: 0.9373095352108323, iteration: 363709
loss: 1.0034986734390259,grad_norm: 0.8781488860176122, iteration: 363710
loss: 1.0251704454421997,grad_norm: 0.869508292980475, iteration: 363711
loss: 1.0030897855758667,grad_norm: 0.7748304001861854, iteration: 363712
loss: 0.9628726243972778,grad_norm: 0.999999193402551, iteration: 363713
loss: 1.0215834379196167,grad_norm: 0.7605372684359321, iteration: 363714
loss: 1.0221326351165771,grad_norm: 0.8616696871219639, iteration: 363715
loss: 0.9840049147605896,grad_norm: 0.7901983860129032, iteration: 363716
loss: 1.0021610260009766,grad_norm: 0.9999999365984527, iteration: 363717
loss: 0.9731250405311584,grad_norm: 0.7702788674395706, iteration: 363718
loss: 0.9937155842781067,grad_norm: 0.9999991469300205, iteration: 363719
loss: 1.027098298072815,grad_norm: 0.7425693914403484, iteration: 363720
loss: 0.9934330582618713,grad_norm: 0.702059223672084, iteration: 363721
loss: 1.019345760345459,grad_norm: 0.6942265549130957, iteration: 363722
loss: 1.037175178527832,grad_norm: 0.9822534149937188, iteration: 363723
loss: 1.0113431215286255,grad_norm: 0.8862561301721683, iteration: 363724
loss: 0.982753336429596,grad_norm: 0.7402668807414717, iteration: 363725
loss: 1.0248689651489258,grad_norm: 0.8501936868839648, iteration: 363726
loss: 1.0075684785842896,grad_norm: 0.837077291976725, iteration: 363727
loss: 1.0124996900558472,grad_norm: 0.752390914400277, iteration: 363728
loss: 1.002705454826355,grad_norm: 0.817439359775624, iteration: 363729
loss: 1.050215244293213,grad_norm: 0.9559534386713813, iteration: 363730
loss: 1.0094411373138428,grad_norm: 0.7987571945737405, iteration: 363731
loss: 0.9992789030075073,grad_norm: 0.9999993160722135, iteration: 363732
loss: 0.9617036581039429,grad_norm: 0.8935788940772547, iteration: 363733
loss: 0.9977750778198242,grad_norm: 0.7562599723504047, iteration: 363734
loss: 0.9764830470085144,grad_norm: 0.8874462584079539, iteration: 363735
loss: 1.012021541595459,grad_norm: 0.7613136808265054, iteration: 363736
loss: 1.0795435905456543,grad_norm: 0.9999999382908112, iteration: 363737
loss: 1.0050897598266602,grad_norm: 0.8573192117264563, iteration: 363738
loss: 0.9813207387924194,grad_norm: 0.9192684303065563, iteration: 363739
loss: 1.0371235609054565,grad_norm: 0.9999992514639557, iteration: 363740
loss: 0.9845197200775146,grad_norm: 0.7045191643846054, iteration: 363741
loss: 0.9865422248840332,grad_norm: 0.9999991612936445, iteration: 363742
loss: 1.0186715126037598,grad_norm: 0.900418789241867, iteration: 363743
loss: 1.0514822006225586,grad_norm: 0.9999995460063106, iteration: 363744
loss: 1.0256646871566772,grad_norm: 0.8497514429154172, iteration: 363745
loss: 0.9986249804496765,grad_norm: 0.8521646689536635, iteration: 363746
loss: 1.0131317377090454,grad_norm: 0.8254260849779608, iteration: 363747
loss: 1.0726743936538696,grad_norm: 0.9999991669264359, iteration: 363748
loss: 0.9699767827987671,grad_norm: 0.7335512581010252, iteration: 363749
loss: 1.0135902166366577,grad_norm: 0.9999995712329645, iteration: 363750
loss: 1.0121197700500488,grad_norm: 0.9999991754815475, iteration: 363751
loss: 0.9977408051490784,grad_norm: 0.7303593019635994, iteration: 363752
loss: 0.9898490309715271,grad_norm: 0.8496597107780901, iteration: 363753
loss: 1.0269238948822021,grad_norm: 0.6902006776794475, iteration: 363754
loss: 1.0341256856918335,grad_norm: 0.5876533815599384, iteration: 363755
loss: 0.9993510842323303,grad_norm: 0.7872170769385983, iteration: 363756
loss: 0.9816498756408691,grad_norm: 0.9999998643228982, iteration: 363757
loss: 1.0062122344970703,grad_norm: 0.9304518852613539, iteration: 363758
loss: 0.9880086183547974,grad_norm: 0.7853035447747531, iteration: 363759
loss: 1.0112698078155518,grad_norm: 0.9999990853821787, iteration: 363760
loss: 1.0190675258636475,grad_norm: 0.916638028685154, iteration: 363761
loss: 1.0811059474945068,grad_norm: 0.9999994608271224, iteration: 363762
loss: 0.9989266991615295,grad_norm: 0.7292386664139356, iteration: 363763
loss: 1.3452528715133667,grad_norm: 0.9999995823696416, iteration: 363764
loss: 1.0153003931045532,grad_norm: 0.8597858035788307, iteration: 363765
loss: 1.002686619758606,grad_norm: 0.8502724150908357, iteration: 363766
loss: 0.9934993982315063,grad_norm: 0.7115597447695242, iteration: 363767
loss: 1.0045933723449707,grad_norm: 0.7776253934408194, iteration: 363768
loss: 0.9879938960075378,grad_norm: 0.8628533826958396, iteration: 363769
loss: 1.0088245868682861,grad_norm: 0.8859094411230712, iteration: 363770
loss: 1.0306495428085327,grad_norm: 0.9638368764243521, iteration: 363771
loss: 0.981271505355835,grad_norm: 0.7954143605789883, iteration: 363772
loss: 1.0379276275634766,grad_norm: 0.9999990965628365, iteration: 363773
loss: 0.9766660332679749,grad_norm: 0.7962955486766007, iteration: 363774
loss: 0.9948546886444092,grad_norm: 0.769140062410325, iteration: 363775
loss: 1.008811116218567,grad_norm: 0.8508708780304657, iteration: 363776
loss: 1.2420411109924316,grad_norm: 0.9999995983539651, iteration: 363777
loss: 1.0609160661697388,grad_norm: 0.9999998859341362, iteration: 363778
loss: 1.0236353874206543,grad_norm: 0.634816819219288, iteration: 363779
loss: 0.9846093654632568,grad_norm: 0.9165148209332965, iteration: 363780
loss: 1.0064716339111328,grad_norm: 0.986068129183431, iteration: 363781
loss: 0.9983202219009399,grad_norm: 0.9329845524929077, iteration: 363782
loss: 1.0345414876937866,grad_norm: 0.8573586505325624, iteration: 363783
loss: 1.0927523374557495,grad_norm: 0.8126811797242476, iteration: 363784
loss: 0.9702057242393494,grad_norm: 0.781488571196721, iteration: 363785
loss: 1.0383166074752808,grad_norm: 0.6989825782387152, iteration: 363786
loss: 1.0223722457885742,grad_norm: 0.7923190772430477, iteration: 363787
loss: 1.024827480316162,grad_norm: 0.772235410367311, iteration: 363788
loss: 1.0130122900009155,grad_norm: 0.7870084671597601, iteration: 363789
loss: 1.0684326887130737,grad_norm: 0.9999993008303691, iteration: 363790
loss: 0.9711095094680786,grad_norm: 0.7821995530813246, iteration: 363791
loss: 0.9956555366516113,grad_norm: 0.9729685548142041, iteration: 363792
loss: 1.0032979249954224,grad_norm: 0.7760563970506165, iteration: 363793
loss: 1.0026274919509888,grad_norm: 0.7662108575134543, iteration: 363794
loss: 1.0696160793304443,grad_norm: 0.9999994434439927, iteration: 363795
loss: 0.964574933052063,grad_norm: 0.7895933029533653, iteration: 363796
loss: 1.0123331546783447,grad_norm: 0.7504783086696043, iteration: 363797
loss: 1.0090460777282715,grad_norm: 0.8539533536969469, iteration: 363798
loss: 0.9680235981941223,grad_norm: 0.8085270120839309, iteration: 363799
loss: 1.001914143562317,grad_norm: 0.9633073666417724, iteration: 363800
loss: 1.000125765800476,grad_norm: 0.8160346133960681, iteration: 363801
loss: 0.9947275519371033,grad_norm: 0.6831707052325067, iteration: 363802
loss: 1.0215035676956177,grad_norm: 0.959236928387301, iteration: 363803
loss: 1.0612643957138062,grad_norm: 0.9616158545135309, iteration: 363804
loss: 1.0160062313079834,grad_norm: 0.7927323161386939, iteration: 363805
loss: 1.02360200881958,grad_norm: 0.8151658608875952, iteration: 363806
loss: 1.0757046937942505,grad_norm: 0.9999990892760845, iteration: 363807
loss: 1.016301155090332,grad_norm: 0.8740726907352316, iteration: 363808
loss: 0.9871360659599304,grad_norm: 0.8956399078714247, iteration: 363809
loss: 1.0059747695922852,grad_norm: 0.7309446910755283, iteration: 363810
loss: 1.0706136226654053,grad_norm: 0.999999637504703, iteration: 363811
loss: 1.0150203704833984,grad_norm: 0.7912174089075249, iteration: 363812
loss: 1.0766417980194092,grad_norm: 0.8608596886588213, iteration: 363813
loss: 1.0023993253707886,grad_norm: 0.6753180417162242, iteration: 363814
loss: 1.087417721748352,grad_norm: 0.9999998111528048, iteration: 363815
loss: 1.003373622894287,grad_norm: 0.9999995859770161, iteration: 363816
loss: 1.0066264867782593,grad_norm: 0.9999991421306991, iteration: 363817
loss: 1.0307594537734985,grad_norm: 0.7383263405381385, iteration: 363818
loss: 0.9982637166976929,grad_norm: 0.8496133598558137, iteration: 363819
loss: 0.9604153037071228,grad_norm: 0.9629597638466427, iteration: 363820
loss: 0.9697343707084656,grad_norm: 0.7999103280223608, iteration: 363821
loss: 1.0281413793563843,grad_norm: 0.8659300405881999, iteration: 363822
loss: 0.9675613045692444,grad_norm: 0.8405333761961202, iteration: 363823
loss: 1.0070654153823853,grad_norm: 0.733244106581964, iteration: 363824
loss: 1.0334985256195068,grad_norm: 0.7602538366486621, iteration: 363825
loss: 0.9963776469230652,grad_norm: 0.7977543459669354, iteration: 363826
loss: 1.0241607427597046,grad_norm: 0.880702590714781, iteration: 363827
loss: 0.9809292554855347,grad_norm: 0.7969574510132486, iteration: 363828
loss: 0.9873173236846924,grad_norm: 0.8317869572245641, iteration: 363829
loss: 1.1346514225006104,grad_norm: 0.9999992786591434, iteration: 363830
loss: 0.9917863607406616,grad_norm: 0.8875923882021294, iteration: 363831
loss: 1.030739426612854,grad_norm: 0.8143642849485806, iteration: 363832
loss: 1.031385898590088,grad_norm: 0.937752780445793, iteration: 363833
loss: 0.9720836281776428,grad_norm: 0.8288511533039833, iteration: 363834
loss: 1.0700337886810303,grad_norm: 0.9999990696866483, iteration: 363835
loss: 1.0306569337844849,grad_norm: 0.7512134061509896, iteration: 363836
loss: 0.9822694063186646,grad_norm: 0.8518271484813779, iteration: 363837
loss: 1.0087040662765503,grad_norm: 0.7608775631918561, iteration: 363838
loss: 1.0517834424972534,grad_norm: 0.9999999239927886, iteration: 363839
loss: 1.1118597984313965,grad_norm: 0.8426588488200629, iteration: 363840
loss: 1.0576704740524292,grad_norm: 0.9819870374360637, iteration: 363841
loss: 0.9649704098701477,grad_norm: 0.824854546847711, iteration: 363842
loss: 1.0067322254180908,grad_norm: 0.7240612897021097, iteration: 363843
loss: 0.9509287476539612,grad_norm: 0.9881823048194605, iteration: 363844
loss: 1.0153493881225586,grad_norm: 0.8510960198165887, iteration: 363845
loss: 0.9828986525535583,grad_norm: 0.995123125028407, iteration: 363846
loss: 1.0031378269195557,grad_norm: 0.7750378437057572, iteration: 363847
loss: 0.9708523154258728,grad_norm: 0.9264574609006672, iteration: 363848
loss: 1.0010124444961548,grad_norm: 0.9999990404039912, iteration: 363849
loss: 1.0724719762802124,grad_norm: 0.9999995377153872, iteration: 363850
loss: 1.0146187543869019,grad_norm: 0.7483749716058894, iteration: 363851
loss: 0.9934030175209045,grad_norm: 0.8452619425751047, iteration: 363852
loss: 1.0154404640197754,grad_norm: 0.9999990434962913, iteration: 363853
loss: 0.9492303729057312,grad_norm: 0.7713348846253097, iteration: 363854
loss: 1.026764988899231,grad_norm: 0.8710839871371678, iteration: 363855
loss: 0.9884139895439148,grad_norm: 0.9999991661901054, iteration: 363856
loss: 1.034116268157959,grad_norm: 0.9999999599346738, iteration: 363857
loss: 1.0082091093063354,grad_norm: 0.7891615826077973, iteration: 363858
loss: 0.9959492683410645,grad_norm: 0.7224160058187633, iteration: 363859
loss: 0.9669960141181946,grad_norm: 0.7886494470113752, iteration: 363860
loss: 1.063214898109436,grad_norm: 0.9065280004776782, iteration: 363861
loss: 1.031065583229065,grad_norm: 0.9421253183669861, iteration: 363862
loss: 0.9630029201507568,grad_norm: 0.6914057779870141, iteration: 363863
loss: 0.9655940532684326,grad_norm: 0.7277401884030353, iteration: 363864
loss: 1.0068073272705078,grad_norm: 0.7667418198032849, iteration: 363865
loss: 1.0070160627365112,grad_norm: 0.7471572800269003, iteration: 363866
loss: 0.9952137470245361,grad_norm: 0.7357704566429069, iteration: 363867
loss: 1.0121039152145386,grad_norm: 0.7244877891798781, iteration: 363868
loss: 1.0325021743774414,grad_norm: 0.9999993317910336, iteration: 363869
loss: 1.0061805248260498,grad_norm: 0.9015805852710298, iteration: 363870
loss: 1.0262665748596191,grad_norm: 0.9187730384968454, iteration: 363871
loss: 0.9800850749015808,grad_norm: 0.999999748837738, iteration: 363872
loss: 0.986667811870575,grad_norm: 0.8058380965735964, iteration: 363873
loss: 1.027886152267456,grad_norm: 0.6950180956954088, iteration: 363874
loss: 0.9811511635780334,grad_norm: 0.8324365363794829, iteration: 363875
loss: 1.0460325479507446,grad_norm: 0.7157907231616093, iteration: 363876
loss: 1.0077719688415527,grad_norm: 0.7721202454473771, iteration: 363877
loss: 1.0221954584121704,grad_norm: 0.9999992401418596, iteration: 363878
loss: 1.0380507707595825,grad_norm: 0.8182046523761362, iteration: 363879
loss: 0.9826899170875549,grad_norm: 0.7921011281958975, iteration: 363880
loss: 0.981309711933136,grad_norm: 0.7303095894460524, iteration: 363881
loss: 1.0060293674468994,grad_norm: 0.8655667700719971, iteration: 363882
loss: 0.9929971098899841,grad_norm: 0.7419611338243399, iteration: 363883
loss: 0.9704288244247437,grad_norm: 0.7676523671549419, iteration: 363884
loss: 0.9910932779312134,grad_norm: 0.729349569378044, iteration: 363885
loss: 1.0111998319625854,grad_norm: 0.999639541647424, iteration: 363886
loss: 0.9944393634796143,grad_norm: 0.8102771506503751, iteration: 363887
loss: 1.0184234380722046,grad_norm: 0.8851345813492916, iteration: 363888
loss: 1.0095939636230469,grad_norm: 0.7267000314088669, iteration: 363889
loss: 0.9781734943389893,grad_norm: 0.8533701502489007, iteration: 363890
loss: 1.130130410194397,grad_norm: 0.9999997451821369, iteration: 363891
loss: 1.0250797271728516,grad_norm: 0.8440656474327394, iteration: 363892
loss: 1.0096454620361328,grad_norm: 0.6727785071492225, iteration: 363893
loss: 1.1254009008407593,grad_norm: 1.0000000213401552, iteration: 363894
loss: 0.9817137718200684,grad_norm: 0.8299655291911492, iteration: 363895
loss: 1.0216693878173828,grad_norm: 0.7873276861170072, iteration: 363896
loss: 0.9783357381820679,grad_norm: 0.9073349805898495, iteration: 363897
loss: 0.9937252402305603,grad_norm: 0.8096851894269466, iteration: 363898
loss: 0.9894860982894897,grad_norm: 0.9999991418785591, iteration: 363899
loss: 1.021863341331482,grad_norm: 0.9912627547020976, iteration: 363900
loss: 1.013853669166565,grad_norm: 0.6583456911862297, iteration: 363901
loss: 1.1539992094039917,grad_norm: 0.9999999986674236, iteration: 363902
loss: 1.0050665140151978,grad_norm: 0.6675510835774072, iteration: 363903
loss: 0.9940452575683594,grad_norm: 0.9546562689737453, iteration: 363904
loss: 0.9645673632621765,grad_norm: 0.8211101021863076, iteration: 363905
loss: 1.017311692237854,grad_norm: 0.7718558447434377, iteration: 363906
loss: 1.017687201499939,grad_norm: 0.9999997394209841, iteration: 363907
loss: 1.0569179058074951,grad_norm: 0.8346361411330362, iteration: 363908
loss: 1.0896480083465576,grad_norm: 0.8489486492486179, iteration: 363909
loss: 1.0827733278274536,grad_norm: 0.9898629770962417, iteration: 363910
loss: 1.0552479028701782,grad_norm: 0.9706031228966463, iteration: 363911
loss: 1.0067260265350342,grad_norm: 0.8745798602103175, iteration: 363912
loss: 1.0520484447479248,grad_norm: 0.7942445974825253, iteration: 363913
loss: 1.0037060976028442,grad_norm: 0.9045321169375877, iteration: 363914
loss: 1.0914431810379028,grad_norm: 0.8920879419507822, iteration: 363915
loss: 1.041565179824829,grad_norm: 0.8307902936511554, iteration: 363916
loss: 1.151909589767456,grad_norm: 0.9999991412921283, iteration: 363917
loss: 1.0114078521728516,grad_norm: 0.921786565203737, iteration: 363918
loss: 0.9909514784812927,grad_norm: 0.7972067191714032, iteration: 363919
loss: 1.2379225492477417,grad_norm: 0.9999994489674467, iteration: 363920
loss: 1.0263152122497559,grad_norm: 0.7812131226302435, iteration: 363921
loss: 1.0266568660736084,grad_norm: 0.9400312555944254, iteration: 363922
loss: 0.9555668830871582,grad_norm: 0.7196600455656171, iteration: 363923
loss: 0.9674936532974243,grad_norm: 0.5995143048897358, iteration: 363924
loss: 1.0377691984176636,grad_norm: 0.7299480851352611, iteration: 363925
loss: 1.003972053527832,grad_norm: 0.752499068923492, iteration: 363926
loss: 0.9568963050842285,grad_norm: 0.8161285943923555, iteration: 363927
loss: 0.9759441018104553,grad_norm: 0.7752463651210145, iteration: 363928
loss: 1.0158624649047852,grad_norm: 0.8853029316879483, iteration: 363929
loss: 1.1459358930587769,grad_norm: 0.9999994986786104, iteration: 363930
loss: 1.1231831312179565,grad_norm: 0.9201515271292549, iteration: 363931
loss: 1.0138180255889893,grad_norm: 0.8926400725436222, iteration: 363932
loss: 1.017834186553955,grad_norm: 0.8823933468399268, iteration: 363933
loss: 1.0187617540359497,grad_norm: 0.6577706537509624, iteration: 363934
loss: 1.0260673761367798,grad_norm: 0.8173461463170701, iteration: 363935
loss: 0.9950966238975525,grad_norm: 0.9114927186454953, iteration: 363936
loss: 1.007907748222351,grad_norm: 0.8049147579691013, iteration: 363937
loss: 1.0131527185440063,grad_norm: 0.7396617176739982, iteration: 363938
loss: 1.0010967254638672,grad_norm: 0.8622509450069925, iteration: 363939
loss: 1.0148135423660278,grad_norm: 0.9999991763843185, iteration: 363940
loss: 1.02109694480896,grad_norm: 0.9999998256164522, iteration: 363941
loss: 1.0127558708190918,grad_norm: 0.7683890875315286, iteration: 363942
loss: 1.0174567699432373,grad_norm: 0.9999999623383058, iteration: 363943
loss: 1.0328428745269775,grad_norm: 0.9999994719192072, iteration: 363944
loss: 1.0663464069366455,grad_norm: 0.8706285037394822, iteration: 363945
loss: 1.0040174722671509,grad_norm: 0.9282467177521646, iteration: 363946
loss: 1.0821670293807983,grad_norm: 0.999999342735714, iteration: 363947
loss: 1.0032068490982056,grad_norm: 0.7285436906871118, iteration: 363948
loss: 0.9850174784660339,grad_norm: 0.8992646227327015, iteration: 363949
loss: 1.007631540298462,grad_norm: 0.8615900280728538, iteration: 363950
loss: 0.9767438173294067,grad_norm: 0.8292531987889659, iteration: 363951
loss: 1.0078500509262085,grad_norm: 0.9301553974627016, iteration: 363952
loss: 0.9870981574058533,grad_norm: 0.7294520045591811, iteration: 363953
loss: 1.0264983177185059,grad_norm: 0.7744368302966639, iteration: 363954
loss: 0.9937765598297119,grad_norm: 0.6591153563296455, iteration: 363955
loss: 1.1122175455093384,grad_norm: 0.9999993233496286, iteration: 363956
loss: 1.1457573175430298,grad_norm: 0.9999998052362068, iteration: 363957
loss: 1.0238258838653564,grad_norm: 0.7691136604555572, iteration: 363958
loss: 0.9896819591522217,grad_norm: 0.9118475419405757, iteration: 363959
loss: 1.0911040306091309,grad_norm: 0.9999991365234123, iteration: 363960
loss: 1.02797269821167,grad_norm: 0.9243084274988909, iteration: 363961
loss: 0.97825026512146,grad_norm: 0.7805899520794775, iteration: 363962
loss: 1.004348874092102,grad_norm: 0.999999006382207, iteration: 363963
loss: 1.0230464935302734,grad_norm: 0.9999992660794388, iteration: 363964
loss: 1.1258697509765625,grad_norm: 0.9999991940889312, iteration: 363965
loss: 1.0085521936416626,grad_norm: 0.8815535939714042, iteration: 363966
loss: 1.0360440015792847,grad_norm: 0.8020851889424011, iteration: 363967
loss: 1.1016807556152344,grad_norm: 0.9999995040530576, iteration: 363968
loss: 1.0030688047409058,grad_norm: 0.6553524586986731, iteration: 363969
loss: 1.053347110748291,grad_norm: 0.9999998100358074, iteration: 363970
loss: 1.1335475444793701,grad_norm: 0.9885559836817279, iteration: 363971
loss: 1.127556562423706,grad_norm: 0.9307091994759714, iteration: 363972
loss: 1.0865424871444702,grad_norm: 0.866176782149675, iteration: 363973
loss: 0.9930422902107239,grad_norm: 0.7292860727766501, iteration: 363974
loss: 1.0326719284057617,grad_norm: 0.999999313699861, iteration: 363975
loss: 1.0776804685592651,grad_norm: 0.9999993077584085, iteration: 363976
loss: 1.0026613473892212,grad_norm: 0.8119674906433418, iteration: 363977
loss: 1.001595139503479,grad_norm: 0.6888638171326521, iteration: 363978
loss: 1.0068517923355103,grad_norm: 0.9999993309933705, iteration: 363979
loss: 1.0420359373092651,grad_norm: 0.9999992328287565, iteration: 363980
loss: 1.0924233198165894,grad_norm: 0.9999992658099601, iteration: 363981
loss: 1.002725601196289,grad_norm: 0.8813912494012431, iteration: 363982
loss: 1.0595237016677856,grad_norm: 0.7583448311807554, iteration: 363983
loss: 1.0061702728271484,grad_norm: 0.8401601266272608, iteration: 363984
loss: 1.0071407556533813,grad_norm: 0.9999990957299522, iteration: 363985
loss: 1.0473182201385498,grad_norm: 0.8680631641428036, iteration: 363986
loss: 0.9960689544677734,grad_norm: 0.8104059029604326, iteration: 363987
loss: 1.0116826295852661,grad_norm: 0.9999991068203248, iteration: 363988
loss: 1.0405305624008179,grad_norm: 0.8836093778004893, iteration: 363989
loss: 0.9950317740440369,grad_norm: 0.9999993032653273, iteration: 363990
loss: 1.0037189722061157,grad_norm: 0.6886972624969024, iteration: 363991
loss: 0.9720438122749329,grad_norm: 0.8490976969900105, iteration: 363992
loss: 0.9943436980247498,grad_norm: 0.8266510684650068, iteration: 363993
loss: 1.0168715715408325,grad_norm: 0.9115105695762882, iteration: 363994
loss: 1.0891484022140503,grad_norm: 0.9999996648442923, iteration: 363995
loss: 0.9911779761314392,grad_norm: 0.8613979254377448, iteration: 363996
loss: 0.9824972152709961,grad_norm: 0.9313241279162541, iteration: 363997
loss: 1.0290640592575073,grad_norm: 0.8940147884587539, iteration: 363998
loss: 1.067097544670105,grad_norm: 0.9999991925603174, iteration: 363999
loss: 1.0271451473236084,grad_norm: 0.999999017596526, iteration: 364000
loss: 1.018246054649353,grad_norm: 0.9999991662184575, iteration: 364001
loss: 1.033441185951233,grad_norm: 0.9999989785496955, iteration: 364002
loss: 0.9909821152687073,grad_norm: 0.8194314105679968, iteration: 364003
loss: 1.077021598815918,grad_norm: 1.000000027760998, iteration: 364004
loss: 1.0685280561447144,grad_norm: 0.9999991233712338, iteration: 364005
loss: 0.9745694994926453,grad_norm: 0.772871322904913, iteration: 364006
loss: 1.1003485918045044,grad_norm: 0.8596943515081052, iteration: 364007
loss: 0.9850975871086121,grad_norm: 0.9056504760919704, iteration: 364008
loss: 1.0528513193130493,grad_norm: 0.8061473449211694, iteration: 364009
loss: 1.0048145055770874,grad_norm: 0.8057669083073798, iteration: 364010
loss: 0.9880951642990112,grad_norm: 0.7904849276069132, iteration: 364011
loss: 1.0170763731002808,grad_norm: 0.9999991027098535, iteration: 364012
loss: 1.0146297216415405,grad_norm: 0.8658896807222703, iteration: 364013
loss: 0.9563414454460144,grad_norm: 0.8866448046390238, iteration: 364014
loss: 1.0191469192504883,grad_norm: 0.9999992611711815, iteration: 364015
loss: 1.035353660583496,grad_norm: 0.817430615564354, iteration: 364016
loss: 1.0532997846603394,grad_norm: 0.999999003697769, iteration: 364017
loss: 1.0010439157485962,grad_norm: 0.9999990127059801, iteration: 364018
loss: 1.0140892267227173,grad_norm: 0.9165634465900508, iteration: 364019
loss: 1.0545135736465454,grad_norm: 0.9999999017074466, iteration: 364020
loss: 1.0399419069290161,grad_norm: 0.818434469365627, iteration: 364021
loss: 0.9981330633163452,grad_norm: 0.7481093832219973, iteration: 364022
loss: 1.003455638885498,grad_norm: 0.9002663185290846, iteration: 364023
loss: 0.9800372123718262,grad_norm: 0.7607840095518577, iteration: 364024
loss: 0.9928842186927795,grad_norm: 0.8016193251109476, iteration: 364025
loss: 1.0274064540863037,grad_norm: 0.9999992036710024, iteration: 364026
loss: 0.9690725803375244,grad_norm: 0.9171895426627655, iteration: 364027
loss: 1.125084400177002,grad_norm: 0.8498153634633597, iteration: 364028
loss: 1.0340651273727417,grad_norm: 0.7583184951938992, iteration: 364029
loss: 1.0169304609298706,grad_norm: 0.8214337942146215, iteration: 364030
loss: 1.049697756767273,grad_norm: 0.8853194071529191, iteration: 364031
loss: 1.0225290060043335,grad_norm: 0.9999991844373077, iteration: 364032
loss: 0.983119547367096,grad_norm: 0.803718560728891, iteration: 364033
loss: 1.0369917154312134,grad_norm: 0.9999990076433763, iteration: 364034
loss: 1.0064865350723267,grad_norm: 0.8484683866221141, iteration: 364035
loss: 1.0325722694396973,grad_norm: 0.9999993857637741, iteration: 364036
loss: 1.1334035396575928,grad_norm: 0.9999996809474688, iteration: 364037
loss: 0.9914586544036865,grad_norm: 0.8218524419278488, iteration: 364038
loss: 1.0582494735717773,grad_norm: 0.8781445036357143, iteration: 364039
loss: 1.0548838376998901,grad_norm: 0.9999992165610088, iteration: 364040
loss: 1.0473991632461548,grad_norm: 0.714107881193845, iteration: 364041
loss: 1.0330116748809814,grad_norm: 0.8120568125655289, iteration: 364042
loss: 1.0135562419891357,grad_norm: 0.9999990720467755, iteration: 364043
loss: 0.9884901642799377,grad_norm: 0.771768739783499, iteration: 364044
loss: 0.9932471513748169,grad_norm: 0.8337756447096223, iteration: 364045
loss: 1.0322245359420776,grad_norm: 0.8432726547277621, iteration: 364046
loss: 1.002657175064087,grad_norm: 0.7696846250894441, iteration: 364047
loss: 1.0537813901901245,grad_norm: 0.9353244648706116, iteration: 364048
loss: 1.0997000932693481,grad_norm: 0.8698253818000005, iteration: 364049
loss: 0.9514948725700378,grad_norm: 0.8061075997215025, iteration: 364050
loss: 0.9956079721450806,grad_norm: 0.8589583744182375, iteration: 364051
loss: 1.0390359163284302,grad_norm: 0.9218567066868903, iteration: 364052
loss: 1.2063758373260498,grad_norm: 0.9999996878835017, iteration: 364053
loss: 0.9967016577720642,grad_norm: 0.9999991984114817, iteration: 364054
loss: 1.0495800971984863,grad_norm: 0.9999992330632473, iteration: 364055
loss: 0.9967489242553711,grad_norm: 0.8968450918063029, iteration: 364056
loss: 1.0291709899902344,grad_norm: 0.7975299402498112, iteration: 364057
loss: 1.0052238702774048,grad_norm: 0.9999996686275245, iteration: 364058
loss: 1.089188814163208,grad_norm: 0.9999994674759775, iteration: 364059
loss: 1.0406132936477661,grad_norm: 0.7634311959035894, iteration: 364060
loss: 1.0130282640457153,grad_norm: 0.9999992251905524, iteration: 364061
loss: 1.0347141027450562,grad_norm: 0.9372677482978073, iteration: 364062
loss: 0.9969858527183533,grad_norm: 0.8405326632121317, iteration: 364063
loss: 1.00556480884552,grad_norm: 0.9686625045473254, iteration: 364064
loss: 1.0775411128997803,grad_norm: 0.9400126845769301, iteration: 364065
loss: 0.9858298301696777,grad_norm: 0.8823747103288274, iteration: 364066
loss: 1.0073699951171875,grad_norm: 0.7694700767100813, iteration: 364067
loss: 0.9989160299301147,grad_norm: 0.779840476755957, iteration: 364068
loss: 1.0204466581344604,grad_norm: 0.9999990679901838, iteration: 364069
loss: 1.0356228351593018,grad_norm: 0.7507976543475117, iteration: 364070
loss: 1.0002646446228027,grad_norm: 0.9829380296166869, iteration: 364071
loss: 1.0357625484466553,grad_norm: 0.9999992606797733, iteration: 364072
loss: 0.9897587895393372,grad_norm: 0.9776018452374917, iteration: 364073
loss: 1.0229148864746094,grad_norm: 0.6822762271679534, iteration: 364074
loss: 1.0002437829971313,grad_norm: 0.856130253555286, iteration: 364075
loss: 1.0254933834075928,grad_norm: 0.940932413465629, iteration: 364076
loss: 0.9970276951789856,grad_norm: 0.76156528457377, iteration: 364077
loss: 0.987975001335144,grad_norm: 0.9999990612424262, iteration: 364078
loss: 1.0306397676467896,grad_norm: 0.7428429801789532, iteration: 364079
loss: 1.2188947200775146,grad_norm: 0.9999997906230452, iteration: 364080
loss: 1.081413984298706,grad_norm: 0.9602258671238345, iteration: 364081
loss: 1.0194791555404663,grad_norm: 0.7359817998588423, iteration: 364082
loss: 1.0641855001449585,grad_norm: 0.9999992663888194, iteration: 364083
loss: 1.0161362886428833,grad_norm: 0.814992086525757, iteration: 364084
loss: 1.0055601596832275,grad_norm: 0.6658486595682007, iteration: 364085
loss: 1.048649787902832,grad_norm: 0.9999991349024846, iteration: 364086
loss: 1.0064949989318848,grad_norm: 0.771417327197946, iteration: 364087
loss: 1.0298594236373901,grad_norm: 0.9135479751498473, iteration: 364088
loss: 1.0248242616653442,grad_norm: 0.9999999369826998, iteration: 364089
loss: 1.0292526483535767,grad_norm: 0.8836384931388657, iteration: 364090
loss: 1.0231637954711914,grad_norm: 0.9999989638844589, iteration: 364091
loss: 0.9637613296508789,grad_norm: 0.8956166421679338, iteration: 364092
loss: 1.1722160577774048,grad_norm: 0.9999997587168825, iteration: 364093
loss: 0.9820098876953125,grad_norm: 0.8963212933026654, iteration: 364094
loss: 1.2548894882202148,grad_norm: 0.9999996674891614, iteration: 364095
loss: 1.0191296339035034,grad_norm: 0.9999992425644584, iteration: 364096
loss: 1.1212536096572876,grad_norm: 0.873057302268087, iteration: 364097
loss: 0.9794156551361084,grad_norm: 0.9605664239162621, iteration: 364098
loss: 1.0319136381149292,grad_norm: 0.846089861554771, iteration: 364099
loss: 0.9799787402153015,grad_norm: 0.9999997536731827, iteration: 364100
loss: 1.0191624164581299,grad_norm: 0.7968116063805586, iteration: 364101
loss: 1.0323538780212402,grad_norm: 0.7458002791136821, iteration: 364102
loss: 1.0166386365890503,grad_norm: 0.9999991287922548, iteration: 364103
loss: 0.9844666123390198,grad_norm: 0.9999993313234049, iteration: 364104
loss: 1.019426941871643,grad_norm: 0.6942463571221064, iteration: 364105
loss: 1.071420431137085,grad_norm: 0.9159503059290338, iteration: 364106
loss: 1.022087812423706,grad_norm: 0.7422460970504092, iteration: 364107
loss: 1.0209721326828003,grad_norm: 0.9999999002069423, iteration: 364108
loss: 1.092487096786499,grad_norm: 0.9999993294101543, iteration: 364109
loss: 1.0236854553222656,grad_norm: 0.9999992836451977, iteration: 364110
loss: 1.0336017608642578,grad_norm: 0.9787841763655725, iteration: 364111
loss: 0.9885651469230652,grad_norm: 0.9180711319704161, iteration: 364112
loss: 1.0844042301177979,grad_norm: 0.9999995969731846, iteration: 364113
loss: 1.007369875907898,grad_norm: 0.9415640827185648, iteration: 364114
loss: 1.0281420946121216,grad_norm: 0.7817086748412004, iteration: 364115
loss: 0.999527633190155,grad_norm: 0.9999993511231698, iteration: 364116
loss: 1.1112252473831177,grad_norm: 0.8769312213848116, iteration: 364117
loss: 1.0022187232971191,grad_norm: 0.9999990872013157, iteration: 364118
loss: 1.0022034645080566,grad_norm: 0.9714617864163992, iteration: 364119
loss: 1.1293672323226929,grad_norm: 0.9999999554041401, iteration: 364120
loss: 1.167947769165039,grad_norm: 0.9999994182032501, iteration: 364121
loss: 1.02717924118042,grad_norm: 0.8719170922734957, iteration: 364122
loss: 1.0563591718673706,grad_norm: 0.9999993230318226, iteration: 364123
loss: 1.0258852243423462,grad_norm: 0.9999999036879609, iteration: 364124
loss: 1.0353951454162598,grad_norm: 0.9999992868714668, iteration: 364125
loss: 1.0405263900756836,grad_norm: 0.999999844697692, iteration: 364126
loss: 1.0494571924209595,grad_norm: 0.9395790254023683, iteration: 364127
loss: 1.0047500133514404,grad_norm: 0.9999995376760419, iteration: 364128
loss: 1.0877957344055176,grad_norm: 0.9999997513343661, iteration: 364129
loss: 1.0139598846435547,grad_norm: 0.9999990532127427, iteration: 364130
loss: 1.1340810060501099,grad_norm: 0.9999996044013141, iteration: 364131
loss: 1.0546131134033203,grad_norm: 0.8276688089807757, iteration: 364132
loss: 1.017939567565918,grad_norm: 0.9112148150733786, iteration: 364133
loss: 1.040594458580017,grad_norm: 0.7986266186844138, iteration: 364134
loss: 1.089316487312317,grad_norm: 0.8319449267255864, iteration: 364135
loss: 1.0475798845291138,grad_norm: 0.9999999639818681, iteration: 364136
loss: 1.0109148025512695,grad_norm: 0.8716749766878124, iteration: 364137
loss: 0.980914294719696,grad_norm: 0.6751040616975816, iteration: 364138
loss: 0.9887526035308838,grad_norm: 0.8427707884715829, iteration: 364139
loss: 1.010790467262268,grad_norm: 0.8206169868219735, iteration: 364140
loss: 1.0315325260162354,grad_norm: 0.9078670199854075, iteration: 364141
loss: 1.0154991149902344,grad_norm: 0.9999994417234195, iteration: 364142
loss: 1.026144027709961,grad_norm: 0.7472817075546372, iteration: 364143
loss: 1.0417509078979492,grad_norm: 0.9999991803598952, iteration: 364144
loss: 0.9996443390846252,grad_norm: 0.9491610994677676, iteration: 364145
loss: 1.0475003719329834,grad_norm: 0.8657792450625629, iteration: 364146
loss: 1.0049711465835571,grad_norm: 0.9545673214375102, iteration: 364147
loss: 1.070707082748413,grad_norm: 0.9999991224445481, iteration: 364148
loss: 1.0396443605422974,grad_norm: 0.8983273988011152, iteration: 364149
loss: 1.1292531490325928,grad_norm: 0.9999990463843721, iteration: 364150
loss: 0.9551389217376709,grad_norm: 0.8026849192831583, iteration: 364151
loss: 1.098829984664917,grad_norm: 0.9052926136601207, iteration: 364152
loss: 1.0494470596313477,grad_norm: 0.999999290660628, iteration: 364153
loss: 1.015458106994629,grad_norm: 0.8720032966721559, iteration: 364154
loss: 1.1201845407485962,grad_norm: 0.9999999911857345, iteration: 364155
loss: 1.125101089477539,grad_norm: 0.9999997978483438, iteration: 364156
loss: 1.0041900873184204,grad_norm: 0.9999991467251114, iteration: 364157
loss: 1.0273874998092651,grad_norm: 0.9077040091307707, iteration: 364158
loss: 1.135062336921692,grad_norm: 0.9999989683131342, iteration: 364159
loss: 1.0191267728805542,grad_norm: 0.8726452493047009, iteration: 364160
loss: 1.0838721990585327,grad_norm: 0.9999994435764417, iteration: 364161
loss: 1.0933713912963867,grad_norm: 0.8819715003750929, iteration: 364162
loss: 1.0506067276000977,grad_norm: 0.75233282467901, iteration: 364163
loss: 1.0758118629455566,grad_norm: 0.9999993375300846, iteration: 364164
loss: 1.0261141061782837,grad_norm: 0.7386613989911078, iteration: 364165
loss: 1.017555594444275,grad_norm: 0.7743902692068149, iteration: 364166
loss: 1.1312878131866455,grad_norm: 0.9999994046745914, iteration: 364167
loss: 1.0409290790557861,grad_norm: 0.9400664676488334, iteration: 364168
loss: 0.965374767780304,grad_norm: 0.999999953766322, iteration: 364169
loss: 1.0197609663009644,grad_norm: 0.9999994746152112, iteration: 364170
loss: 1.091877818107605,grad_norm: 0.9999998578512873, iteration: 364171
loss: 1.0258703231811523,grad_norm: 0.9999993977677825, iteration: 364172
loss: 1.0637097358703613,grad_norm: 0.7984957506854198, iteration: 364173
loss: 0.9886196851730347,grad_norm: 0.8657674763583875, iteration: 364174
loss: 1.1355477571487427,grad_norm: 0.9999996462849109, iteration: 364175
loss: 1.0027047395706177,grad_norm: 0.9999996632562405, iteration: 364176
loss: 0.9994068145751953,grad_norm: 0.861686151313889, iteration: 364177
loss: 1.0951471328735352,grad_norm: 0.9999998374148973, iteration: 364178
loss: 0.9994884729385376,grad_norm: 0.7926665376728038, iteration: 364179
loss: 1.0325487852096558,grad_norm: 0.8521057073556195, iteration: 364180
loss: 1.1072951555252075,grad_norm: 0.9999993404778162, iteration: 364181
loss: 1.1480300426483154,grad_norm: 0.9999999387114349, iteration: 364182
loss: 1.0197244882583618,grad_norm: 0.8080265147751283, iteration: 364183
loss: 0.9847201704978943,grad_norm: 0.9999999172796126, iteration: 364184
loss: 0.976742684841156,grad_norm: 0.7184476926906124, iteration: 364185
loss: 1.0693057775497437,grad_norm: 0.9999995856545735, iteration: 364186
loss: 0.9993996024131775,grad_norm: 0.9999989942579094, iteration: 364187
loss: 1.0808210372924805,grad_norm: 0.999999568893268, iteration: 364188
loss: 1.0644949674606323,grad_norm: 0.9999998790774459, iteration: 364189
loss: 1.1960245370864868,grad_norm: 1.000000028935644, iteration: 364190
loss: 0.979417085647583,grad_norm: 0.9999996468662848, iteration: 364191
loss: 1.0473322868347168,grad_norm: 0.8982554261209074, iteration: 364192
loss: 1.0244204998016357,grad_norm: 0.8491413016576994, iteration: 364193
loss: 1.080601692199707,grad_norm: 0.9227524865340523, iteration: 364194
loss: 1.0025129318237305,grad_norm: 0.999999810476025, iteration: 364195
loss: 1.0111098289489746,grad_norm: 0.9999990520876286, iteration: 364196
loss: 1.0742697715759277,grad_norm: 0.9999993251372115, iteration: 364197
loss: 0.9658748507499695,grad_norm: 0.9999990520249711, iteration: 364198
loss: 0.9928510785102844,grad_norm: 0.9832917270158524, iteration: 364199
loss: 0.96354740858078,grad_norm: 0.967998950990354, iteration: 364200
loss: 1.067873477935791,grad_norm: 0.9999991324015259, iteration: 364201
loss: 1.029300570487976,grad_norm: 0.9312671261957437, iteration: 364202
loss: 1.0637593269348145,grad_norm: 0.9999989962223098, iteration: 364203
loss: 1.0813374519348145,grad_norm: 0.7372844518915134, iteration: 364204
loss: 1.0343431234359741,grad_norm: 0.7684403700611727, iteration: 364205
loss: 1.0447192192077637,grad_norm: 0.9999995309531102, iteration: 364206
loss: 1.0862362384796143,grad_norm: 0.9999997318388295, iteration: 364207
loss: 0.9812567234039307,grad_norm: 0.9999994155640403, iteration: 364208
loss: 1.0303560495376587,grad_norm: 0.9999989886880525, iteration: 364209
loss: 1.0664386749267578,grad_norm: 0.9999998132215786, iteration: 364210
loss: 0.9480156898498535,grad_norm: 0.9999991431659049, iteration: 364211
loss: 0.9993196129798889,grad_norm: 0.886209815016923, iteration: 364212
loss: 1.1662228107452393,grad_norm: 0.9999996810945122, iteration: 364213
loss: 1.0888885259628296,grad_norm: 0.9999996073182805, iteration: 364214
loss: 1.0674830675125122,grad_norm: 0.9999999478527751, iteration: 364215
loss: 0.9997908473014832,grad_norm: 0.8081675741534989, iteration: 364216
loss: 1.1089996099472046,grad_norm: 0.9999995037463865, iteration: 364217
loss: 1.0355743169784546,grad_norm: 1.0000000805890068, iteration: 364218
loss: 1.0395101308822632,grad_norm: 0.9999995889963645, iteration: 364219
loss: 1.0286179780960083,grad_norm: 0.9497338044430864, iteration: 364220
loss: 1.0215260982513428,grad_norm: 0.8569182493065156, iteration: 364221
loss: 1.0511553287506104,grad_norm: 0.9186464820392708, iteration: 364222
loss: 1.0184613466262817,grad_norm: 0.9999998803373261, iteration: 364223
loss: 1.099826693534851,grad_norm: 0.9999993663105705, iteration: 364224
loss: 1.0668262243270874,grad_norm: 0.9022751864586674, iteration: 364225
loss: 1.1294474601745605,grad_norm: 0.9999992447384732, iteration: 364226
loss: 1.050466775894165,grad_norm: 0.9273922923615231, iteration: 364227
loss: 0.9688332676887512,grad_norm: 0.6852957615514113, iteration: 364228
loss: 1.026610255241394,grad_norm: 0.8092619034827682, iteration: 364229
loss: 1.0084699392318726,grad_norm: 0.9497159221974504, iteration: 364230
loss: 1.0369926691055298,grad_norm: 0.8826177270469667, iteration: 364231
loss: 1.0106391906738281,grad_norm: 0.7202117624870475, iteration: 364232
loss: 1.008591890335083,grad_norm: 0.9188935332857973, iteration: 364233
loss: 1.0066105127334595,grad_norm: 0.9331250383823214, iteration: 364234
loss: 1.0651731491088867,grad_norm: 0.8537229800614909, iteration: 364235
loss: 1.0381603240966797,grad_norm: 0.934625059576513, iteration: 364236
loss: 1.0439397096633911,grad_norm: 0.866244298300746, iteration: 364237
loss: 1.012717366218567,grad_norm: 0.9243982181081395, iteration: 364238
loss: 1.0000276565551758,grad_norm: 0.947260536220152, iteration: 364239
loss: 1.021896481513977,grad_norm: 0.8828730529998964, iteration: 364240
loss: 1.008604884147644,grad_norm: 0.9999991627732562, iteration: 364241
loss: 1.0087134838104248,grad_norm: 0.8869955929543368, iteration: 364242
loss: 1.0483269691467285,grad_norm: 0.9424096630192691, iteration: 364243
loss: 1.152465581893921,grad_norm: 0.9485230854246597, iteration: 364244
loss: 1.085459589958191,grad_norm: 0.8089591788284306, iteration: 364245
loss: 1.0395970344543457,grad_norm: 0.9006256256121099, iteration: 364246
loss: 1.069381594657898,grad_norm: 0.9033087519318763, iteration: 364247
loss: 1.0549681186676025,grad_norm: 0.9999999320424909, iteration: 364248
loss: 1.097839593887329,grad_norm: 0.9999998476697486, iteration: 364249
loss: 1.0286816358566284,grad_norm: 0.9999991949343914, iteration: 364250
loss: 1.0474371910095215,grad_norm: 0.7392799811037447, iteration: 364251
loss: 1.0708856582641602,grad_norm: 0.7241961831150486, iteration: 364252
loss: 1.1220018863677979,grad_norm: 0.9999990796159269, iteration: 364253
loss: 1.065898060798645,grad_norm: 0.8081001041446101, iteration: 364254
loss: 1.0612468719482422,grad_norm: 0.9999999803275617, iteration: 364255
loss: 0.9960229992866516,grad_norm: 0.9999997321669554, iteration: 364256
loss: 1.2116421461105347,grad_norm: 0.9999995272238986, iteration: 364257
loss: 0.9749635457992554,grad_norm: 0.8051367606028345, iteration: 364258
loss: 0.9585629105567932,grad_norm: 0.7506348471559102, iteration: 364259
loss: 0.9777081608772278,grad_norm: 0.7168184491017878, iteration: 364260
loss: 1.032580852508545,grad_norm: 0.8202771816807586, iteration: 364261
loss: 0.9837098121643066,grad_norm: 0.7790781492241445, iteration: 364262
loss: 1.1008715629577637,grad_norm: 0.7534940386330152, iteration: 364263
loss: 0.996974527835846,grad_norm: 0.841019689651769, iteration: 364264
loss: 1.0895613431930542,grad_norm: 0.8316764057441135, iteration: 364265
loss: 1.0566461086273193,grad_norm: 0.9999990571894137, iteration: 364266
loss: 0.9929035305976868,grad_norm: 0.9982299750221124, iteration: 364267
loss: 1.0487613677978516,grad_norm: 0.9999995741084304, iteration: 364268
loss: 1.2127704620361328,grad_norm: 0.9999998555700387, iteration: 364269
loss: 0.9812055826187134,grad_norm: 0.9263394300756315, iteration: 364270
loss: 0.9995651841163635,grad_norm: 0.9999989973964194, iteration: 364271
loss: 1.105064034461975,grad_norm: 0.9999997498334411, iteration: 364272
loss: 1.110093116760254,grad_norm: 0.9999998935124281, iteration: 364273
loss: 1.0096261501312256,grad_norm: 0.6694652721172838, iteration: 364274
loss: 1.0721951723098755,grad_norm: 0.999999730107635, iteration: 364275
loss: 0.9805245995521545,grad_norm: 0.8162569134622943, iteration: 364276
loss: 0.9884159564971924,grad_norm: 0.8890929579705111, iteration: 364277
loss: 0.9875783920288086,grad_norm: 0.7809028555267752, iteration: 364278
loss: 1.0737274885177612,grad_norm: 0.904664972019316, iteration: 364279
loss: 1.059747576713562,grad_norm: 0.7196864334058519, iteration: 364280
loss: 1.057241678237915,grad_norm: 0.799010624065603, iteration: 364281
loss: 1.0005327463150024,grad_norm: 0.7950164557601901, iteration: 364282
loss: 1.0480462312698364,grad_norm: 0.8239890165119789, iteration: 364283
loss: 1.1019949913024902,grad_norm: 0.9999998022977145, iteration: 364284
loss: 0.9766126275062561,grad_norm: 0.8722117315952346, iteration: 364285
loss: 1.088584542274475,grad_norm: 1.0000000237114393, iteration: 364286
loss: 1.0322014093399048,grad_norm: 0.8508296150480624, iteration: 364287
loss: 1.1256556510925293,grad_norm: 0.9362073963006516, iteration: 364288
loss: 1.1125433444976807,grad_norm: 0.999999103443827, iteration: 364289
loss: 1.1868655681610107,grad_norm: 0.9999999807820865, iteration: 364290
loss: 1.198880672454834,grad_norm: 0.9771936602179716, iteration: 364291
loss: 1.0558775663375854,grad_norm: 0.8971590879689532, iteration: 364292
loss: 1.0233227014541626,grad_norm: 0.9999998706914173, iteration: 364293
loss: 1.109093189239502,grad_norm: 0.8254656554591268, iteration: 364294
loss: 1.0456019639968872,grad_norm: 0.9999996197253215, iteration: 364295
loss: 1.003554105758667,grad_norm: 0.951603515503082, iteration: 364296
loss: 1.044050931930542,grad_norm: 0.9950296008966192, iteration: 364297
loss: 1.0662726163864136,grad_norm: 0.7966593386119635, iteration: 364298
loss: 1.0046353340148926,grad_norm: 0.8885924314647848, iteration: 364299
loss: 1.0522449016571045,grad_norm: 0.9999997507348496, iteration: 364300
loss: 0.9559545516967773,grad_norm: 0.8393495593264809, iteration: 364301
loss: 1.0647166967391968,grad_norm: 0.9999991596992218, iteration: 364302
loss: 1.0584001541137695,grad_norm: 0.9999996523472218, iteration: 364303
loss: 1.1184515953063965,grad_norm: 0.9999992676405353, iteration: 364304
loss: 1.018189549446106,grad_norm: 0.8283694187695959, iteration: 364305
loss: 1.1356961727142334,grad_norm: 0.9202372608712698, iteration: 364306
loss: 1.0039215087890625,grad_norm: 0.9180135258358462, iteration: 364307
loss: 1.0283108949661255,grad_norm: 0.8620821768574171, iteration: 364308
loss: 1.112305760383606,grad_norm: 0.9999991454376473, iteration: 364309
loss: 1.027585506439209,grad_norm: 0.8025585689357949, iteration: 364310
loss: 1.126112699508667,grad_norm: 0.9999994903222681, iteration: 364311
loss: 1.0174877643585205,grad_norm: 0.9126901858739026, iteration: 364312
loss: 1.0089987516403198,grad_norm: 0.9999991764033068, iteration: 364313
loss: 1.1014050245285034,grad_norm: 0.9677407822925281, iteration: 364314
loss: 0.9772577285766602,grad_norm: 0.6088611094070834, iteration: 364315
loss: 1.0456074476242065,grad_norm: 0.63881532529722, iteration: 364316
loss: 0.992643415927887,grad_norm: 0.6712524693878958, iteration: 364317
loss: 1.0687087774276733,grad_norm: 0.826045294118758, iteration: 364318
loss: 1.0427910089492798,grad_norm: 0.9999990430756608, iteration: 364319
loss: 1.0449912548065186,grad_norm: 0.8549773281158007, iteration: 364320
loss: 1.0808981657028198,grad_norm: 0.9999991207579442, iteration: 364321
loss: 1.0006638765335083,grad_norm: 0.7214051181544056, iteration: 364322
loss: 1.0442063808441162,grad_norm: 0.7822166075656579, iteration: 364323
loss: 1.0928242206573486,grad_norm: 0.9999997282412837, iteration: 364324
loss: 1.0372865200042725,grad_norm: 0.9999998809060798, iteration: 364325
loss: 1.0262761116027832,grad_norm: 0.9999995462561434, iteration: 364326
loss: 1.1385912895202637,grad_norm: 0.9999991300225051, iteration: 364327
loss: 0.9866918921470642,grad_norm: 0.9999992606083509, iteration: 364328
loss: 1.0150160789489746,grad_norm: 0.9999995179137515, iteration: 364329
loss: 0.9674946665763855,grad_norm: 0.8893203836949638, iteration: 364330
loss: 1.0317167043685913,grad_norm: 0.9999999654774844, iteration: 364331
loss: 1.065530776977539,grad_norm: 0.9999994505812533, iteration: 364332
loss: 1.0794107913970947,grad_norm: 0.9107374629764644, iteration: 364333
loss: 1.0801432132720947,grad_norm: 0.9825772623853861, iteration: 364334
loss: 1.0005642175674438,grad_norm: 0.8465477993962784, iteration: 364335
loss: 1.0332221984863281,grad_norm: 0.999999763589883, iteration: 364336
loss: 0.9905505180358887,grad_norm: 0.7469655830308, iteration: 364337
loss: 1.0737154483795166,grad_norm: 0.993354413943497, iteration: 364338
loss: 1.062153935432434,grad_norm: 0.9999999174856957, iteration: 364339
loss: 1.0274347066879272,grad_norm: 0.999999928296081, iteration: 364340
loss: 1.0253602266311646,grad_norm: 0.9284152886498516, iteration: 364341
loss: 1.0604063272476196,grad_norm: 0.9999995684821593, iteration: 364342
loss: 0.996376633644104,grad_norm: 0.9187654348691024, iteration: 364343
loss: 1.04185152053833,grad_norm: 0.9546039441726694, iteration: 364344
loss: 0.9808799028396606,grad_norm: 0.8324744452005236, iteration: 364345
loss: 1.0495045185089111,grad_norm: 0.866731422010746, iteration: 364346
loss: 1.0813426971435547,grad_norm: 0.9999991820544081, iteration: 364347
loss: 1.004016637802124,grad_norm: 0.8038583312615961, iteration: 364348
loss: 1.0156995058059692,grad_norm: 0.7955561553088816, iteration: 364349
loss: 1.017831802368164,grad_norm: 0.7984546549506943, iteration: 364350
loss: 1.0920592546463013,grad_norm: 0.963752625955791, iteration: 364351
loss: 1.036352515220642,grad_norm: 0.9971672645218745, iteration: 364352
loss: 0.9496757984161377,grad_norm: 0.913491907524086, iteration: 364353
loss: 1.0593267679214478,grad_norm: 0.8480641659477876, iteration: 364354
loss: 1.053587794303894,grad_norm: 0.8383624438140932, iteration: 364355
loss: 1.1096413135528564,grad_norm: 0.8103820405619401, iteration: 364356
loss: 1.0023103952407837,grad_norm: 0.9999995145605945, iteration: 364357
loss: 1.0297868251800537,grad_norm: 0.8675126881786357, iteration: 364358
loss: 1.0472066402435303,grad_norm: 0.999999347351868, iteration: 364359
loss: 1.0237840414047241,grad_norm: 0.9999994547026737, iteration: 364360
loss: 1.0544873476028442,grad_norm: 0.9999996240195456, iteration: 364361
loss: 1.0294909477233887,grad_norm: 0.7263450569006266, iteration: 364362
loss: 1.0060133934020996,grad_norm: 0.7124224621487723, iteration: 364363
loss: 1.0165293216705322,grad_norm: 0.9999990470101445, iteration: 364364
loss: 0.9916432499885559,grad_norm: 0.834515711696121, iteration: 364365
loss: 1.0277979373931885,grad_norm: 0.6586348319811858, iteration: 364366
loss: 1.0448720455169678,grad_norm: 0.9999997105160722, iteration: 364367
loss: 1.0477010011672974,grad_norm: 0.7918996639516139, iteration: 364368
loss: 0.957179844379425,grad_norm: 0.8335395091650689, iteration: 364369
loss: 0.9867245554924011,grad_norm: 0.8304351328837442, iteration: 364370
loss: 1.0273579359054565,grad_norm: 0.966142479827134, iteration: 364371
loss: 1.0423356294631958,grad_norm: 1.0000000166059098, iteration: 364372
loss: 1.0199780464172363,grad_norm: 0.9999999296972558, iteration: 364373
loss: 1.0472242832183838,grad_norm: 0.9407809134117502, iteration: 364374
loss: 1.0137912034988403,grad_norm: 0.9670846535890028, iteration: 364375
loss: 1.0677841901779175,grad_norm: 0.9373076824461151, iteration: 364376
loss: 1.0673754215240479,grad_norm: 0.6731157875811966, iteration: 364377
loss: 0.9957101941108704,grad_norm: 0.8186436194546328, iteration: 364378
loss: 1.0001901388168335,grad_norm: 0.7593071761770387, iteration: 364379
loss: 1.0412719249725342,grad_norm: 0.8186159437156554, iteration: 364380
loss: 1.0600929260253906,grad_norm: 0.9182337795419316, iteration: 364381
loss: 1.0432978868484497,grad_norm: 0.9999992262990184, iteration: 364382
loss: 1.045999526977539,grad_norm: 0.7319487656288601, iteration: 364383
loss: 0.9757729172706604,grad_norm: 0.8248941978052877, iteration: 364384
loss: 0.9769089818000793,grad_norm: 0.999999262945645, iteration: 364385
loss: 1.0028237104415894,grad_norm: 0.8265552208049701, iteration: 364386
loss: 0.9972224235534668,grad_norm: 0.8863574507007979, iteration: 364387
loss: 1.0573630332946777,grad_norm: 0.978877031287115, iteration: 364388
loss: 1.0233416557312012,grad_norm: 0.7478488253540847, iteration: 364389
loss: 1.0565546751022339,grad_norm: 0.811524586942994, iteration: 364390
loss: 1.0795379877090454,grad_norm: 0.7929777610331799, iteration: 364391
loss: 1.0069507360458374,grad_norm: 0.99999921828397, iteration: 364392
loss: 0.9541531205177307,grad_norm: 0.868986880889612, iteration: 364393
loss: 1.1253401041030884,grad_norm: 0.9999998698481188, iteration: 364394
loss: 1.0767326354980469,grad_norm: 0.9999991750392369, iteration: 364395
loss: 1.055051565170288,grad_norm: 0.8929422471239845, iteration: 364396
loss: 0.9753799438476562,grad_norm: 0.7665799285343845, iteration: 364397
loss: 1.0124211311340332,grad_norm: 0.9782395184394984, iteration: 364398
loss: 1.0134409666061401,grad_norm: 0.9402390713605798, iteration: 364399
loss: 1.0044399499893188,grad_norm: 0.8428659024396283, iteration: 364400
loss: 1.04073965549469,grad_norm: 0.6696270665497714, iteration: 364401
loss: 1.0476011037826538,grad_norm: 0.9999992854248647, iteration: 364402
loss: 1.0849342346191406,grad_norm: 0.9902724972357375, iteration: 364403
loss: 0.9428092837333679,grad_norm: 0.8528321339274008, iteration: 364404
loss: 0.9872180223464966,grad_norm: 0.8935437552950787, iteration: 364405
loss: 1.0214667320251465,grad_norm: 0.8370729315484785, iteration: 364406
loss: 1.0249121189117432,grad_norm: 0.8629896754156792, iteration: 364407
loss: 1.0117409229278564,grad_norm: 0.9999991147078019, iteration: 364408
loss: 1.0440603494644165,grad_norm: 0.6974226930048758, iteration: 364409
loss: 1.037087082862854,grad_norm: 0.9712915736254553, iteration: 364410
loss: 1.008687973022461,grad_norm: 0.7861477617364631, iteration: 364411
loss: 1.0073708295822144,grad_norm: 0.8842784077304374, iteration: 364412
loss: 1.001769781112671,grad_norm: 0.9999991344804594, iteration: 364413
loss: 1.0173059701919556,grad_norm: 0.7987355496214407, iteration: 364414
loss: 1.0024287700653076,grad_norm: 0.9999997911007467, iteration: 364415
loss: 0.9963464140892029,grad_norm: 0.9999997056602228, iteration: 364416
loss: 0.9855618476867676,grad_norm: 0.6662913477999032, iteration: 364417
loss: 1.0740245580673218,grad_norm: 0.8067193367017501, iteration: 364418
loss: 1.0215142965316772,grad_norm: 0.8011050389381859, iteration: 364419
loss: 1.0049892663955688,grad_norm: 0.8722540724015059, iteration: 364420
loss: 1.010857343673706,grad_norm: 0.9357040816084691, iteration: 364421
loss: 1.0247710943222046,grad_norm: 0.999999207039856, iteration: 364422
loss: 1.1644788980484009,grad_norm: 0.9836083088764944, iteration: 364423
loss: 1.011483073234558,grad_norm: 0.9995089154848569, iteration: 364424
loss: 0.9568378925323486,grad_norm: 0.7501917064631678, iteration: 364425
loss: 1.0245928764343262,grad_norm: 0.758842782045335, iteration: 364426
loss: 1.034077763557434,grad_norm: 0.7402223701760318, iteration: 364427
loss: 1.0243651866912842,grad_norm: 0.7738470159786833, iteration: 364428
loss: 1.003692865371704,grad_norm: 0.9999994521811482, iteration: 364429
loss: 0.997285008430481,grad_norm: 0.7793191740701446, iteration: 364430
loss: 0.9975475072860718,grad_norm: 0.8271204063369908, iteration: 364431
loss: 0.9838322997093201,grad_norm: 0.855262138336463, iteration: 364432
loss: 1.0402216911315918,grad_norm: 0.9999990980795239, iteration: 364433
loss: 0.9774040579795837,grad_norm: 0.7617842903013415, iteration: 364434
loss: 0.9489712119102478,grad_norm: 0.7999597244983165, iteration: 364435
loss: 1.111191749572754,grad_norm: 0.7436780208904641, iteration: 364436
loss: 0.9874499440193176,grad_norm: 0.9695549012986615, iteration: 364437
loss: 0.9961155652999878,grad_norm: 0.6154714264876748, iteration: 364438
loss: 1.0611755847930908,grad_norm: 0.9999990267634215, iteration: 364439
loss: 1.022442102432251,grad_norm: 0.9999999072087097, iteration: 364440
loss: 1.0779626369476318,grad_norm: 0.9999998556274731, iteration: 364441
loss: 1.0365771055221558,grad_norm: 0.999999991789735, iteration: 364442
loss: 1.0844866037368774,grad_norm: 0.9999998584936218, iteration: 364443
loss: 1.016631007194519,grad_norm: 0.9842531943211611, iteration: 364444
loss: 1.0680829286575317,grad_norm: 0.9999998328626244, iteration: 364445
loss: 1.0140382051467896,grad_norm: 0.8753970783299745, iteration: 364446
loss: 1.0146162509918213,grad_norm: 0.999999138779861, iteration: 364447
loss: 1.010410189628601,grad_norm: 0.7336717287912536, iteration: 364448
loss: 1.0914180278778076,grad_norm: 0.9999994387882541, iteration: 364449
loss: 0.983713686466217,grad_norm: 0.8749551653772759, iteration: 364450
loss: 0.9414302706718445,grad_norm: 0.8364288361045009, iteration: 364451
loss: 0.9744005799293518,grad_norm: 0.9999990557284779, iteration: 364452
loss: 1.1246616840362549,grad_norm: 0.9999998686102362, iteration: 364453
loss: 1.036600112915039,grad_norm: 0.965396043883905, iteration: 364454
loss: 0.9816008806228638,grad_norm: 0.7905244785156503, iteration: 364455
loss: 1.0299500226974487,grad_norm: 0.895764959601792, iteration: 364456
loss: 1.0363810062408447,grad_norm: 0.7855822751473759, iteration: 364457
loss: 1.027089238166809,grad_norm: 0.8250902769885006, iteration: 364458
loss: 1.0847824811935425,grad_norm: 0.9999995425524316, iteration: 364459
loss: 0.9945899844169617,grad_norm: 0.7279552356270012, iteration: 364460
loss: 0.9847686290740967,grad_norm: 0.859397677067445, iteration: 364461
loss: 0.9829976558685303,grad_norm: 0.8057434148153808, iteration: 364462
loss: 1.0144013166427612,grad_norm: 0.7949844206366599, iteration: 364463
loss: 1.0310693979263306,grad_norm: 0.9423977182721414, iteration: 364464
loss: 1.169334888458252,grad_norm: 0.9999995385802254, iteration: 364465
loss: 0.9908340573310852,grad_norm: 0.6992912055257281, iteration: 364466
loss: 0.9861169457435608,grad_norm: 0.8043648673389829, iteration: 364467
loss: 0.9646773934364319,grad_norm: 0.6995790760589062, iteration: 364468
loss: 0.9830390810966492,grad_norm: 0.8524526501018691, iteration: 364469
loss: 0.9947981238365173,grad_norm: 0.7821264026465324, iteration: 364470
loss: 0.9493489861488342,grad_norm: 0.7926644732612336, iteration: 364471
loss: 1.0045956373214722,grad_norm: 0.7969960406022505, iteration: 364472
loss: 0.9927058219909668,grad_norm: 0.7715493247692391, iteration: 364473
loss: 1.002261996269226,grad_norm: 0.7048650380057182, iteration: 364474
loss: 0.9983065724372864,grad_norm: 0.8778666513121606, iteration: 364475
loss: 1.0639936923980713,grad_norm: 0.8853975115690896, iteration: 364476
loss: 1.0318527221679688,grad_norm: 0.7993915290203276, iteration: 364477
loss: 1.0591928958892822,grad_norm: 0.7264704105869844, iteration: 364478
loss: 0.958070695400238,grad_norm: 0.9999999285378364, iteration: 364479
loss: 0.9770718216896057,grad_norm: 0.7070747554817157, iteration: 364480
loss: 0.9998674988746643,grad_norm: 0.6989888351787461, iteration: 364481
loss: 1.000757098197937,grad_norm: 0.7112045098135144, iteration: 364482
loss: 1.100305199623108,grad_norm: 0.9999997675249939, iteration: 364483
loss: 1.0446922779083252,grad_norm: 0.9356731009612554, iteration: 364484
loss: 1.0551550388336182,grad_norm: 0.8772705458207298, iteration: 364485
loss: 1.0341119766235352,grad_norm: 0.9999996822041504, iteration: 364486
loss: 0.9922201037406921,grad_norm: 0.8864514699937432, iteration: 364487
loss: 1.056533694267273,grad_norm: 0.7930025147738444, iteration: 364488
loss: 1.0932639837265015,grad_norm: 0.999999768263814, iteration: 364489
loss: 1.022876262664795,grad_norm: 0.9999996918368891, iteration: 364490
loss: 0.9944945573806763,grad_norm: 0.9034780707220145, iteration: 364491
loss: 1.2215744256973267,grad_norm: 0.9999991920567339, iteration: 364492
loss: 1.0215765237808228,grad_norm: 0.6784089617688571, iteration: 364493
loss: 1.028886318206787,grad_norm: 0.8307013912943777, iteration: 364494
loss: 1.046151041984558,grad_norm: 0.9595828515062647, iteration: 364495
loss: 0.9884416460990906,grad_norm: 0.7672331098582703, iteration: 364496
loss: 1.0227727890014648,grad_norm: 0.9999994766029837, iteration: 364497
loss: 1.0094934701919556,grad_norm: 0.9465141731784159, iteration: 364498
loss: 0.9492509961128235,grad_norm: 0.8579214193424769, iteration: 364499
loss: 0.969036340713501,grad_norm: 0.8769032509486254, iteration: 364500
loss: 1.0537015199661255,grad_norm: 0.7926487607017532, iteration: 364501
loss: 0.9949065446853638,grad_norm: 0.7495715582448589, iteration: 364502
loss: 1.016603708267212,grad_norm: 0.7958085720778093, iteration: 364503
loss: 0.963383674621582,grad_norm: 0.7692097622790635, iteration: 364504
loss: 1.005024790763855,grad_norm: 0.8244056750959015, iteration: 364505
loss: 1.0321263074874878,grad_norm: 0.856116004597028, iteration: 364506
loss: 0.9722403883934021,grad_norm: 0.8799456600006198, iteration: 364507
loss: 1.0244580507278442,grad_norm: 0.6671082728823718, iteration: 364508
loss: 1.084091067314148,grad_norm: 0.999999594793034, iteration: 364509
loss: 0.9532451033592224,grad_norm: 0.8483341787067121, iteration: 364510
loss: 1.1377545595169067,grad_norm: 0.9999998328481415, iteration: 364511
loss: 1.0143342018127441,grad_norm: 0.769058511216696, iteration: 364512
loss: 1.0005760192871094,grad_norm: 0.8878006411044006, iteration: 364513
loss: 1.0658915042877197,grad_norm: 0.7643790424632751, iteration: 364514
loss: 1.0049728155136108,grad_norm: 0.7818143436265782, iteration: 364515
loss: 1.0045630931854248,grad_norm: 0.9218168065002698, iteration: 364516
loss: 1.066988229751587,grad_norm: 0.9999990967875296, iteration: 364517
loss: 1.0376213788986206,grad_norm: 0.8249453020478198, iteration: 364518
loss: 1.0783543586730957,grad_norm: 0.9999990220850069, iteration: 364519
loss: 1.0235689878463745,grad_norm: 0.6955911596224995, iteration: 364520
loss: 0.9899091124534607,grad_norm: 0.7648161705508485, iteration: 364521
loss: 1.067024827003479,grad_norm: 0.9999990496303985, iteration: 364522
loss: 1.0613789558410645,grad_norm: 0.8255252158312006, iteration: 364523
loss: 1.0265802145004272,grad_norm: 0.8468812010318724, iteration: 364524
loss: 1.0078988075256348,grad_norm: 0.906917327853928, iteration: 364525
loss: 1.1048322916030884,grad_norm: 0.9849752960311634, iteration: 364526
loss: 0.9554046392440796,grad_norm: 0.7571692287234087, iteration: 364527
loss: 1.0176788568496704,grad_norm: 0.8207355403107586, iteration: 364528
loss: 1.0030525922775269,grad_norm: 0.8153428042200297, iteration: 364529
loss: 1.0875219106674194,grad_norm: 0.7997327834096979, iteration: 364530
loss: 1.0329053401947021,grad_norm: 0.7713420267114564, iteration: 364531
loss: 0.9973660707473755,grad_norm: 0.8345053728963695, iteration: 364532
loss: 1.0156266689300537,grad_norm: 0.8567023343457082, iteration: 364533
loss: 1.007903814315796,grad_norm: 0.9999998428535473, iteration: 364534
loss: 1.0804768800735474,grad_norm: 0.9999990445705426, iteration: 364535
loss: 1.1481012105941772,grad_norm: 0.9999999452168588, iteration: 364536
loss: 1.0017389059066772,grad_norm: 0.764622509011435, iteration: 364537
loss: 1.0348249673843384,grad_norm: 0.894087989628191, iteration: 364538
loss: 1.0084562301635742,grad_norm: 0.7875430186386144, iteration: 364539
loss: 0.9978690147399902,grad_norm: 0.8541571895928631, iteration: 364540
loss: 1.0068281888961792,grad_norm: 0.8242935330831209, iteration: 364541
loss: 1.040565013885498,grad_norm: 0.7611635240349726, iteration: 364542
loss: 1.0908311605453491,grad_norm: 0.8105485526059448, iteration: 364543
loss: 0.9990366101264954,grad_norm: 0.7784362356700778, iteration: 364544
loss: 1.046275019645691,grad_norm: 0.9999999681088934, iteration: 364545
loss: 1.0256441831588745,grad_norm: 0.9262147301003436, iteration: 364546
loss: 1.023818850517273,grad_norm: 0.9999999346950426, iteration: 364547
loss: 1.0679198503494263,grad_norm: 0.8955895932108151, iteration: 364548
loss: 0.9758284091949463,grad_norm: 0.8610714393338124, iteration: 364549
loss: 1.0371758937835693,grad_norm: 0.8939043620832712, iteration: 364550
loss: 1.0583205223083496,grad_norm: 0.9999994938973215, iteration: 364551
loss: 1.037255048751831,grad_norm: 0.7238909618677961, iteration: 364552
loss: 1.1428464651107788,grad_norm: 0.9838955777289694, iteration: 364553
loss: 1.1981980800628662,grad_norm: 0.9999996306882051, iteration: 364554
loss: 1.0901695489883423,grad_norm: 0.9999999723597597, iteration: 364555
loss: 0.9942020773887634,grad_norm: 0.8138231618834951, iteration: 364556
loss: 1.3177719116210938,grad_norm: 0.9999998450879273, iteration: 364557
loss: 1.0123625993728638,grad_norm: 0.7137368140494132, iteration: 364558
loss: 1.2624640464782715,grad_norm: 0.9999999348043707, iteration: 364559
loss: 1.0139113664627075,grad_norm: 0.8341557179045662, iteration: 364560
loss: 1.2346751689910889,grad_norm: 0.9999991786910487, iteration: 364561
loss: 1.0689048767089844,grad_norm: 0.7298365666652551, iteration: 364562
loss: 1.0048768520355225,grad_norm: 0.8245623880804152, iteration: 364563
loss: 1.2725077867507935,grad_norm: 0.9999998914040029, iteration: 364564
loss: 1.0244688987731934,grad_norm: 0.7562319946225528, iteration: 364565
loss: 1.1418863534927368,grad_norm: 1.0000000077254176, iteration: 364566
loss: 1.5375038385391235,grad_norm: 0.9999999581166035, iteration: 364567
loss: 1.1921982765197754,grad_norm: 0.774029396629545, iteration: 364568
loss: 1.0160338878631592,grad_norm: 0.8497060569723273, iteration: 364569
loss: 0.9866688847541809,grad_norm: 0.9999996038369923, iteration: 364570
loss: 1.0235958099365234,grad_norm: 0.6929247475844427, iteration: 364571
loss: 1.1297417879104614,grad_norm: 0.8957057006762797, iteration: 364572
loss: 1.232559323310852,grad_norm: 0.9999994066237373, iteration: 364573
loss: 1.1319183111190796,grad_norm: 0.9999999156653601, iteration: 364574
loss: 1.15584397315979,grad_norm: 0.9999997071655216, iteration: 364575
loss: 1.0875904560089111,grad_norm: 0.9999991096844116, iteration: 364576
loss: 1.0755293369293213,grad_norm: 0.9999997329605782, iteration: 364577
loss: 1.003562569618225,grad_norm: 0.9527457199081548, iteration: 364578
loss: 1.0015431642532349,grad_norm: 0.9999993746306834, iteration: 364579
loss: 1.107332706451416,grad_norm: 0.9999992393697704, iteration: 364580
loss: 1.153072714805603,grad_norm: 0.9999998828012208, iteration: 364581
loss: 1.2379529476165771,grad_norm: 0.9999997223965746, iteration: 364582
loss: 1.1005505323410034,grad_norm: 0.9999996555069649, iteration: 364583
loss: 1.2645589113235474,grad_norm: 0.9999999158546339, iteration: 364584
loss: 1.002179503440857,grad_norm: 0.8116545040763666, iteration: 364585
loss: 1.0973896980285645,grad_norm: 0.9999997350078214, iteration: 364586
loss: 1.316142201423645,grad_norm: 0.9999998626954595, iteration: 364587
loss: 1.0043511390686035,grad_norm: 0.9999992909178339, iteration: 364588
loss: 1.02613365650177,grad_norm: 0.7141793166036262, iteration: 364589
loss: 0.9831649661064148,grad_norm: 0.780335461631341, iteration: 364590
loss: 1.0351155996322632,grad_norm: 0.9999991315756982, iteration: 364591
loss: 1.2190420627593994,grad_norm: 0.9999998691867615, iteration: 364592
loss: 1.0289276838302612,grad_norm: 0.9258473629429379, iteration: 364593
loss: 1.1012455224990845,grad_norm: 0.9999994437481444, iteration: 364594
loss: 0.9793336391448975,grad_norm: 0.8153559708833892, iteration: 364595
loss: 1.0715609788894653,grad_norm: 0.9999996190903694, iteration: 364596
loss: 1.0619215965270996,grad_norm: 0.9999998721863117, iteration: 364597
loss: 0.9769073724746704,grad_norm: 0.9999991104189199, iteration: 364598
loss: 1.0228804349899292,grad_norm: 0.9999996052402522, iteration: 364599
loss: 0.970940351486206,grad_norm: 0.9521902033683365, iteration: 364600
loss: 1.2243072986602783,grad_norm: 0.9999995430693968, iteration: 364601
loss: 1.0151567459106445,grad_norm: 0.6588302158194991, iteration: 364602
loss: 1.1611957550048828,grad_norm: 0.9999996971516666, iteration: 364603
loss: 0.999650239944458,grad_norm: 1.000000014025763, iteration: 364604
loss: 1.3107664585113525,grad_norm: 0.9999998222980725, iteration: 364605
loss: 0.9910469651222229,grad_norm: 0.811156828084578, iteration: 364606
loss: 1.074305534362793,grad_norm: 0.9999990983802998, iteration: 364607
loss: 1.1545242071151733,grad_norm: 0.9999998198388246, iteration: 364608
loss: 0.9916075468063354,grad_norm: 0.725879141528945, iteration: 364609
loss: 1.0330417156219482,grad_norm: 0.7033062009947321, iteration: 364610
loss: 1.1770496368408203,grad_norm: 0.9999996348895546, iteration: 364611
loss: 1.0335882902145386,grad_norm: 0.9747454765325295, iteration: 364612
loss: 1.0010236501693726,grad_norm: 0.9999990976124536, iteration: 364613
loss: 1.0568910837173462,grad_norm: 0.9999994094018546, iteration: 364614
loss: 1.1452205181121826,grad_norm: 0.9999222661391548, iteration: 364615
loss: 1.3493249416351318,grad_norm: 0.9999993321316644, iteration: 364616
loss: 1.0901795625686646,grad_norm: 0.9999993265146871, iteration: 364617
loss: 1.047629952430725,grad_norm: 0.7924773015690085, iteration: 364618
loss: 1.002670168876648,grad_norm: 0.9999992925925383, iteration: 364619
loss: 0.9969437122344971,grad_norm: 0.8412757169681083, iteration: 364620
loss: 1.0196629762649536,grad_norm: 0.8945202829908697, iteration: 364621
loss: 1.0449298620224,grad_norm: 0.9999999232160104, iteration: 364622
loss: 0.9770272970199585,grad_norm: 0.9999991130752989, iteration: 364623
loss: 1.1663404703140259,grad_norm: 0.9999997063723574, iteration: 364624
loss: 1.1285794973373413,grad_norm: 0.9999997980230375, iteration: 364625
loss: 1.1789332628250122,grad_norm: 0.999999275901258, iteration: 364626
loss: 1.1035199165344238,grad_norm: 0.9999999036191916, iteration: 364627
loss: 1.0241748094558716,grad_norm: 0.9999994034110408, iteration: 364628
loss: 1.0250320434570312,grad_norm: 0.7438745098730427, iteration: 364629
loss: 1.1100797653198242,grad_norm: 0.9999997925932267, iteration: 364630
loss: 1.0221809148788452,grad_norm: 0.8863826035128668, iteration: 364631
loss: 1.0464800596237183,grad_norm: 0.9999992184286098, iteration: 364632
loss: 1.03145432472229,grad_norm: 0.9999998995232194, iteration: 364633
loss: 1.0481950044631958,grad_norm: 0.9999991351523888, iteration: 364634
loss: 1.1126351356506348,grad_norm: 0.9999990825899495, iteration: 364635
loss: 1.005105972290039,grad_norm: 0.7021796510875642, iteration: 364636
loss: 1.2053396701812744,grad_norm: 0.9999997671212524, iteration: 364637
loss: 1.0057252645492554,grad_norm: 0.9999992007816686, iteration: 364638
loss: 1.0425915718078613,grad_norm: 0.8469095907827567, iteration: 364639
loss: 0.9994924068450928,grad_norm: 0.7962652789037343, iteration: 364640
loss: 1.019545555114746,grad_norm: 0.774636727110543, iteration: 364641
loss: 1.16416597366333,grad_norm: 0.9999996950041686, iteration: 364642
loss: 1.0552449226379395,grad_norm: 0.9999997163111044, iteration: 364643
loss: 0.9925671815872192,grad_norm: 0.9174496917413029, iteration: 364644
loss: 1.0370571613311768,grad_norm: 0.8278026278067715, iteration: 364645
loss: 1.1227028369903564,grad_norm: 0.9999993282201937, iteration: 364646
loss: 1.0166845321655273,grad_norm: 0.8581740124633122, iteration: 364647
loss: 1.0196826457977295,grad_norm: 0.8396595965109511, iteration: 364648
loss: 0.9869532585144043,grad_norm: 0.8104287315564831, iteration: 364649
loss: 0.9982548356056213,grad_norm: 0.8579240351770304, iteration: 364650
loss: 1.0449334383010864,grad_norm: 0.9999998061429831, iteration: 364651
loss: 1.0412492752075195,grad_norm: 0.9999995240901592, iteration: 364652
loss: 1.013365387916565,grad_norm: 0.7380573787109221, iteration: 364653
loss: 1.048970103263855,grad_norm: 0.8661232767622697, iteration: 364654
loss: 0.9909139275550842,grad_norm: 0.842782999826413, iteration: 364655
loss: 0.9980055093765259,grad_norm: 0.7161259479792671, iteration: 364656
loss: 1.2218232154846191,grad_norm: 0.9999997682796204, iteration: 364657
loss: 1.00762140750885,grad_norm: 0.812227993289191, iteration: 364658
loss: 1.2276287078857422,grad_norm: 0.9999994552421169, iteration: 364659
loss: 1.2802975177764893,grad_norm: 0.9999993903658387, iteration: 364660
loss: 1.098879337310791,grad_norm: 0.9999996958545267, iteration: 364661
loss: 0.9912586808204651,grad_norm: 0.7184878127649961, iteration: 364662
loss: 1.141085147857666,grad_norm: 0.9999994633543551, iteration: 364663
loss: 1.0149139165878296,grad_norm: 0.9999996642800625, iteration: 364664
loss: 1.0184801816940308,grad_norm: 0.9999991280919651, iteration: 364665
loss: 1.1854652166366577,grad_norm: 0.9999997364748808, iteration: 364666
loss: 1.01236891746521,grad_norm: 0.9439295088597999, iteration: 364667
loss: 1.0487861633300781,grad_norm: 0.9999989726859424, iteration: 364668
loss: 1.1487958431243896,grad_norm: 0.9999997943908646, iteration: 364669
loss: 0.9918087720870972,grad_norm: 0.9861738878112619, iteration: 364670
loss: 1.0204683542251587,grad_norm: 0.9999990829730709, iteration: 364671
loss: 0.9542333483695984,grad_norm: 0.9464341548267631, iteration: 364672
loss: 1.1087371110916138,grad_norm: 0.9999994608394616, iteration: 364673
loss: 1.0446085929870605,grad_norm: 0.9881377393918828, iteration: 364674
loss: 0.9872734546661377,grad_norm: 0.6945094158708257, iteration: 364675
loss: 1.1197986602783203,grad_norm: 0.9999998156381014, iteration: 364676
loss: 1.1177501678466797,grad_norm: 0.9999999950501706, iteration: 364677
loss: 1.0233018398284912,grad_norm: 0.8056220053538902, iteration: 364678
loss: 1.0506445169448853,grad_norm: 0.9823525717544326, iteration: 364679
loss: 1.0145829916000366,grad_norm: 0.8589313449923102, iteration: 364680
loss: 1.1072741746902466,grad_norm: 0.9978149990598962, iteration: 364681
loss: 0.9900721311569214,grad_norm: 0.8293198088064813, iteration: 364682
loss: 1.0690243244171143,grad_norm: 0.9999992153556302, iteration: 364683
loss: 1.1444227695465088,grad_norm: 1.000000066035713, iteration: 364684
loss: 1.0296869277954102,grad_norm: 0.8249436161564478, iteration: 364685
loss: 0.9885097742080688,grad_norm: 0.9226119938691437, iteration: 364686
loss: 0.9902940988540649,grad_norm: 0.9283731798888593, iteration: 364687
loss: 1.1204190254211426,grad_norm: 0.9999996508487702, iteration: 364688
loss: 0.9750005602836609,grad_norm: 0.8698005125013124, iteration: 364689
loss: 1.0543913841247559,grad_norm: 0.9999994743233597, iteration: 364690
loss: 1.0993050336837769,grad_norm: 0.9999990774939351, iteration: 364691
loss: 0.971895694732666,grad_norm: 0.9999997912350853, iteration: 364692
loss: 1.1193084716796875,grad_norm: 0.9999995742898686, iteration: 364693
loss: 1.1864755153656006,grad_norm: 0.999999292415391, iteration: 364694
loss: 0.9805412888526917,grad_norm: 0.8634057466951881, iteration: 364695
loss: 1.1301686763763428,grad_norm: 0.999999139812277, iteration: 364696
loss: 1.0221329927444458,grad_norm: 0.9100207213344458, iteration: 364697
loss: 1.1324609518051147,grad_norm: 0.9999990607466052, iteration: 364698
loss: 1.0365360975265503,grad_norm: 0.9999992931979953, iteration: 364699
loss: 1.1141217947006226,grad_norm: 0.999999977823236, iteration: 364700
loss: 0.9951756000518799,grad_norm: 0.954943080132027, iteration: 364701
loss: 1.1036211252212524,grad_norm: 0.9999997618804313, iteration: 364702
loss: 1.0688544511795044,grad_norm: 0.9948447811663262, iteration: 364703
loss: 1.0488606691360474,grad_norm: 0.999999042560134, iteration: 364704
loss: 1.0678220987319946,grad_norm: 0.8838082885306746, iteration: 364705
loss: 1.0997393131256104,grad_norm: 0.8995764631623059, iteration: 364706
loss: 1.0200237035751343,grad_norm: 0.6922560819483655, iteration: 364707
loss: 1.0005565881729126,grad_norm: 0.8535859485947386, iteration: 364708
loss: 1.0625158548355103,grad_norm: 0.7888485007933029, iteration: 364709
loss: 1.1001057624816895,grad_norm: 0.9999993585828656, iteration: 364710
loss: 0.9865829348564148,grad_norm: 0.858420356732018, iteration: 364711
loss: 1.05224609375,grad_norm: 0.9999991872640719, iteration: 364712
loss: 0.983715295791626,grad_norm: 0.7975940163545687, iteration: 364713
loss: 1.0109336376190186,grad_norm: 0.9062402892194629, iteration: 364714
loss: 1.0561519861221313,grad_norm: 0.7185326752168375, iteration: 364715
loss: 1.0667593479156494,grad_norm: 0.9442532095652758, iteration: 364716
loss: 0.9981635212898254,grad_norm: 0.9043116478097158, iteration: 364717
loss: 0.9926301836967468,grad_norm: 0.7546390816728749, iteration: 364718
loss: 1.0190832614898682,grad_norm: 0.8969389820625566, iteration: 364719
loss: 0.9940261244773865,grad_norm: 0.7891701526732722, iteration: 364720
loss: 1.001986026763916,grad_norm: 0.9999998795114181, iteration: 364721
loss: 1.012367606163025,grad_norm: 0.8866747475490195, iteration: 364722
loss: 1.101892352104187,grad_norm: 0.9999999177933113, iteration: 364723
loss: 1.0407072305679321,grad_norm: 0.9967201860492219, iteration: 364724
loss: 1.0328845977783203,grad_norm: 0.8163111284733363, iteration: 364725
loss: 1.1117985248565674,grad_norm: 0.9999995770800075, iteration: 364726
loss: 1.0265482664108276,grad_norm: 0.7713798444406152, iteration: 364727
loss: 1.0054105520248413,grad_norm: 0.999999816668803, iteration: 364728
loss: 1.03083074092865,grad_norm: 0.9088834983973834, iteration: 364729
loss: 1.1034146547317505,grad_norm: 0.999999673766346, iteration: 364730
loss: 1.1350260972976685,grad_norm: 0.8892980693404621, iteration: 364731
loss: 1.0869495868682861,grad_norm: 0.8779121761899549, iteration: 364732
loss: 1.00849187374115,grad_norm: 0.8233709142908578, iteration: 364733
loss: 1.0300683975219727,grad_norm: 0.8435542538377907, iteration: 364734
loss: 1.0365103483200073,grad_norm: 0.9999992095829123, iteration: 364735
loss: 0.977104663848877,grad_norm: 0.8644288962362722, iteration: 364736
loss: 1.0363609790802002,grad_norm: 0.8847595462899519, iteration: 364737
loss: 1.11426842212677,grad_norm: 0.999999646021523, iteration: 364738
loss: 1.012378215789795,grad_norm: 0.7679209294472694, iteration: 364739
loss: 1.0125863552093506,grad_norm: 0.9999991805498474, iteration: 364740
loss: 1.1626547574996948,grad_norm: 0.9999994147265078, iteration: 364741
loss: 1.0380126237869263,grad_norm: 0.9999997687602987, iteration: 364742
loss: 1.1402682065963745,grad_norm: 0.9999992938030782, iteration: 364743
loss: 1.0420360565185547,grad_norm: 0.9199387190872548, iteration: 364744
loss: 1.1742327213287354,grad_norm: 0.9999990572354122, iteration: 364745
loss: 0.9936292767524719,grad_norm: 0.7913885181741306, iteration: 364746
loss: 1.1069022417068481,grad_norm: 0.9999995537291195, iteration: 364747
loss: 1.101994514465332,grad_norm: 0.9999993463082264, iteration: 364748
loss: 1.0399043560028076,grad_norm: 0.9929444127635251, iteration: 364749
loss: 1.1035152673721313,grad_norm: 0.9540378642691391, iteration: 364750
loss: 0.9787219762802124,grad_norm: 0.8343715460289135, iteration: 364751
loss: 1.0296629667282104,grad_norm: 0.7308564378225771, iteration: 364752
loss: 0.9939311146736145,grad_norm: 0.755519027373165, iteration: 364753
loss: 0.9584709405899048,grad_norm: 0.8007552866195974, iteration: 364754
loss: 1.0354117155075073,grad_norm: 0.9999990783848076, iteration: 364755
loss: 1.0246918201446533,grad_norm: 0.8347767868403579, iteration: 364756
loss: 1.247312307357788,grad_norm: 0.999999686842406, iteration: 364757
loss: 0.9880512356758118,grad_norm: 0.8035370649854058, iteration: 364758
loss: 0.9858748316764832,grad_norm: 0.7400042104196882, iteration: 364759
loss: 1.1706675291061401,grad_norm: 0.9536302661038497, iteration: 364760
loss: 1.0955235958099365,grad_norm: 0.9999991160064475, iteration: 364761
loss: 1.1870895624160767,grad_norm: 0.9999997889798578, iteration: 364762
loss: 1.036049485206604,grad_norm: 0.9999993493747953, iteration: 364763
loss: 1.0437005758285522,grad_norm: 0.789983461931591, iteration: 364764
loss: 1.1878678798675537,grad_norm: 0.9999996778093073, iteration: 364765
loss: 1.0944324731826782,grad_norm: 0.9999995063500228, iteration: 364766
loss: 1.0224908590316772,grad_norm: 0.9999996491163051, iteration: 364767
loss: 1.0210561752319336,grad_norm: 0.7710769465665822, iteration: 364768
loss: 0.9832412600517273,grad_norm: 0.8766177303984797, iteration: 364769
loss: 1.0769987106323242,grad_norm: 0.9999994968533961, iteration: 364770
loss: 1.0158863067626953,grad_norm: 1.000000030861364, iteration: 364771
loss: 1.0586284399032593,grad_norm: 0.9999993835284845, iteration: 364772
loss: 0.972301185131073,grad_norm: 0.7847892023772788, iteration: 364773
loss: 1.0336002111434937,grad_norm: 0.8980857084426106, iteration: 364774
loss: 1.0201866626739502,grad_norm: 0.8214432625829657, iteration: 364775
loss: 0.9578536152839661,grad_norm: 0.9568417572025544, iteration: 364776
loss: 1.0820715427398682,grad_norm: 0.9316138573361087, iteration: 364777
loss: 1.0114946365356445,grad_norm: 0.8167553693038777, iteration: 364778
loss: 1.0497736930847168,grad_norm: 0.9197593787808489, iteration: 364779
loss: 1.0806962251663208,grad_norm: 0.9999998522855742, iteration: 364780
loss: 1.0817464590072632,grad_norm: 0.9999999419954498, iteration: 364781
loss: 1.0311297178268433,grad_norm: 0.7896874260624691, iteration: 364782
loss: 1.051074743270874,grad_norm: 0.8358972610458875, iteration: 364783
loss: 1.0004723072052002,grad_norm: 0.5799983432099769, iteration: 364784
loss: 0.9810088872909546,grad_norm: 0.9999998594477585, iteration: 364785
loss: 1.0291776657104492,grad_norm: 0.7061363508295326, iteration: 364786
loss: 1.147678017616272,grad_norm: 0.930053766654819, iteration: 364787
loss: 0.9906522035598755,grad_norm: 0.7545913057871697, iteration: 364788
loss: 1.1182079315185547,grad_norm: 0.9999995436654358, iteration: 364789
loss: 1.0532041788101196,grad_norm: 0.9999994158920407, iteration: 364790
loss: 1.0889856815338135,grad_norm: 0.9999997837213667, iteration: 364791
loss: 1.2196602821350098,grad_norm: 0.9999999922500239, iteration: 364792
loss: 1.0308260917663574,grad_norm: 0.9444540099162987, iteration: 364793
loss: 1.0831845998764038,grad_norm: 0.9999994576178765, iteration: 364794
loss: 0.9804328680038452,grad_norm: 0.8424629363963908, iteration: 364795
loss: 0.9897043108940125,grad_norm: 0.9254217597588572, iteration: 364796
loss: 1.011204719543457,grad_norm: 0.9999999471182627, iteration: 364797
loss: 1.016668438911438,grad_norm: 0.8054199350879487, iteration: 364798
loss: 1.0068427324295044,grad_norm: 0.9999993155954784, iteration: 364799
loss: 0.9760180711746216,grad_norm: 0.9999992193649331, iteration: 364800
loss: 1.0268561840057373,grad_norm: 0.9999998303945837, iteration: 364801
loss: 0.9996992349624634,grad_norm: 0.9999991014230708, iteration: 364802
loss: 1.025145173072815,grad_norm: 0.8105914781931881, iteration: 364803
loss: 1.0080486536026,grad_norm: 0.813517672869416, iteration: 364804
loss: 1.2116817235946655,grad_norm: 0.9999999628806503, iteration: 364805
loss: 1.0472761392593384,grad_norm: 0.8428296458445106, iteration: 364806
loss: 0.9866947531700134,grad_norm: 0.5958934920050581, iteration: 364807
loss: 1.1077601909637451,grad_norm: 0.8342372276082085, iteration: 364808
loss: 1.0304045677185059,grad_norm: 0.9999994046812493, iteration: 364809
loss: 1.086816430091858,grad_norm: 0.9999995039386459, iteration: 364810
loss: 1.0145949125289917,grad_norm: 0.8546939330163617, iteration: 364811
loss: 1.0180121660232544,grad_norm: 0.9999998166768367, iteration: 364812
loss: 1.023858904838562,grad_norm: 0.9120192175755463, iteration: 364813
loss: 1.1901326179504395,grad_norm: 0.9999994864596197, iteration: 364814
loss: 1.1619161367416382,grad_norm: 0.9999998345850876, iteration: 364815
loss: 1.0772361755371094,grad_norm: 0.9999999193522184, iteration: 364816
loss: 1.073618769645691,grad_norm: 0.778303121101311, iteration: 364817
loss: 1.0248218774795532,grad_norm: 0.9982079531526187, iteration: 364818
loss: 1.060361623764038,grad_norm: 0.9999993357070819, iteration: 364819
loss: 1.0043379068374634,grad_norm: 0.8678659949772464, iteration: 364820
loss: 1.1050654649734497,grad_norm: 0.9999995836701855, iteration: 364821
loss: 1.0024082660675049,grad_norm: 0.9999996914002457, iteration: 364822
loss: 0.9995639324188232,grad_norm: 0.9999999242924001, iteration: 364823
loss: 1.0262091159820557,grad_norm: 0.8834939436905884, iteration: 364824
loss: 1.007610559463501,grad_norm: 0.9999989853740188, iteration: 364825
loss: 1.0444517135620117,grad_norm: 0.7235425784390644, iteration: 364826
loss: 1.0115940570831299,grad_norm: 0.8230464796077502, iteration: 364827
loss: 1.0295268297195435,grad_norm: 0.7687653798843223, iteration: 364828
loss: 1.0193219184875488,grad_norm: 0.9999991786777075, iteration: 364829
loss: 1.016977071762085,grad_norm: 0.736590371173768, iteration: 364830
loss: 0.9984717965126038,grad_norm: 0.7691675317262849, iteration: 364831
loss: 1.056551218032837,grad_norm: 0.8092156972877637, iteration: 364832
loss: 1.045807123184204,grad_norm: 0.9999995756996275, iteration: 364833
loss: 1.1209373474121094,grad_norm: 0.9999996681132157, iteration: 364834
loss: 1.017832636833191,grad_norm: 0.9999991390587479, iteration: 364835
loss: 0.997387170791626,grad_norm: 0.8798645696943906, iteration: 364836
loss: 1.0548673868179321,grad_norm: 0.9999999868464859, iteration: 364837
loss: 1.016674280166626,grad_norm: 0.6856394772458883, iteration: 364838
loss: 0.9799607396125793,grad_norm: 0.9999993599677212, iteration: 364839
loss: 1.0350044965744019,grad_norm: 0.9999998890337386, iteration: 364840
loss: 1.0612527132034302,grad_norm: 0.9999991217521937, iteration: 364841
loss: 1.0430933237075806,grad_norm: 0.9999999131590033, iteration: 364842
loss: 1.0693155527114868,grad_norm: 0.9999990588558939, iteration: 364843
loss: 1.005805253982544,grad_norm: 0.8762266912629244, iteration: 364844
loss: 1.0558223724365234,grad_norm: 0.9446409650296426, iteration: 364845
loss: 1.061452031135559,grad_norm: 0.9999994940390518, iteration: 364846
loss: 1.0314414501190186,grad_norm: 0.7198639194116955, iteration: 364847
loss: 1.089493751525879,grad_norm: 0.9999999177821545, iteration: 364848
loss: 1.0925787687301636,grad_norm: 0.9999998766182278, iteration: 364849
loss: 1.1061211824417114,grad_norm: 0.9999995219992666, iteration: 364850
loss: 1.0700856447219849,grad_norm: 0.9999993513245073, iteration: 364851
loss: 1.0460729598999023,grad_norm: 0.9999996830347068, iteration: 364852
loss: 1.0571991205215454,grad_norm: 0.710401985490238, iteration: 364853
loss: 1.131833553314209,grad_norm: 0.9999994892059086, iteration: 364854
loss: 1.0038399696350098,grad_norm: 0.7065906796854886, iteration: 364855
loss: 1.0251373052597046,grad_norm: 0.9407415962090512, iteration: 364856
loss: 1.005236029624939,grad_norm: 0.7074758445473757, iteration: 364857
loss: 0.998938798904419,grad_norm: 0.8899658308078623, iteration: 364858
loss: 1.0391314029693604,grad_norm: 0.9999993764089701, iteration: 364859
loss: 1.0366201400756836,grad_norm: 0.7571341275156986, iteration: 364860
loss: 0.9930267333984375,grad_norm: 0.6707461167858462, iteration: 364861
loss: 1.0562372207641602,grad_norm: 0.9839831044330467, iteration: 364862
loss: 1.0793687105178833,grad_norm: 0.9999992222148124, iteration: 364863
loss: 1.002587914466858,grad_norm: 0.9999997295584752, iteration: 364864
loss: 1.0475788116455078,grad_norm: 0.9999998178990468, iteration: 364865
loss: 1.0673359632492065,grad_norm: 0.9999994084759977, iteration: 364866
loss: 1.0697391033172607,grad_norm: 0.9999997283941362, iteration: 364867
loss: 0.9953474998474121,grad_norm: 0.9999990601125255, iteration: 364868
loss: 1.0157183408737183,grad_norm: 0.9999998035579699, iteration: 364869
loss: 1.0357508659362793,grad_norm: 0.9999994258078726, iteration: 364870
loss: 1.0290781259536743,grad_norm: 0.9999993055055562, iteration: 364871
loss: 0.9878604412078857,grad_norm: 0.9212494459631198, iteration: 364872
loss: 1.0321069955825806,grad_norm: 0.8541802108123044, iteration: 364873
loss: 1.1704046726226807,grad_norm: 0.9999999093448937, iteration: 364874
loss: 1.0653811693191528,grad_norm: 0.8062306099558983, iteration: 364875
loss: 1.0082640647888184,grad_norm: 0.9702025884294644, iteration: 364876
loss: 0.975020706653595,grad_norm: 0.7383894796298394, iteration: 364877
loss: 1.0082197189331055,grad_norm: 0.9999993177210124, iteration: 364878
loss: 1.0571545362472534,grad_norm: 0.9999998686449997, iteration: 364879
loss: 1.023780345916748,grad_norm: 0.9999997242672753, iteration: 364880
loss: 0.9983841180801392,grad_norm: 0.7902278030508453, iteration: 364881
loss: 1.1458919048309326,grad_norm: 0.9999996153058094, iteration: 364882
loss: 1.1027600765228271,grad_norm: 0.9999998944947496, iteration: 364883
loss: 1.0868059396743774,grad_norm: 1.0000000407207963, iteration: 364884
loss: 0.9743070006370544,grad_norm: 0.7778854829907195, iteration: 364885
loss: 1.3024218082427979,grad_norm: 0.9999998307991614, iteration: 364886
loss: 1.0576496124267578,grad_norm: 0.9999995804681849, iteration: 364887
loss: 1.0167566537857056,grad_norm: 0.7262778998494152, iteration: 364888
loss: 1.1276885271072388,grad_norm: 0.9999991548424189, iteration: 364889
loss: 0.9701888561248779,grad_norm: 0.8864913253201329, iteration: 364890
loss: 1.0682452917099,grad_norm: 0.9999991540495357, iteration: 364891
loss: 1.0379058122634888,grad_norm: 0.7307662481007516, iteration: 364892
loss: 1.0223881006240845,grad_norm: 0.9006996550268571, iteration: 364893
loss: 1.0181117057800293,grad_norm: 0.8333984889226976, iteration: 364894
loss: 1.0619385242462158,grad_norm: 0.9999996196241024, iteration: 364895
loss: 1.019319772720337,grad_norm: 0.665223186664189, iteration: 364896
loss: 0.9838863015174866,grad_norm: 0.6871208041141295, iteration: 364897
loss: 1.0358319282531738,grad_norm: 0.9999993906113633, iteration: 364898
loss: 1.022703766822815,grad_norm: 0.9015906028783761, iteration: 364899
loss: 1.0778018236160278,grad_norm: 0.9999990644593814, iteration: 364900
loss: 1.102768063545227,grad_norm: 0.9999999500351202, iteration: 364901
loss: 1.0256288051605225,grad_norm: 0.753390589847129, iteration: 364902
loss: 0.9952520132064819,grad_norm: 0.8264194228602939, iteration: 364903
loss: 1.0109858512878418,grad_norm: 0.9999992233394885, iteration: 364904
loss: 1.061532735824585,grad_norm: 0.9999999595126512, iteration: 364905
loss: 1.0391379594802856,grad_norm: 0.9999989981014883, iteration: 364906
loss: 1.025702953338623,grad_norm: 0.7456175247219654, iteration: 364907
loss: 1.0567628145217896,grad_norm: 0.741724436315334, iteration: 364908
loss: 1.0288774967193604,grad_norm: 0.8299985033346039, iteration: 364909
loss: 1.1086881160736084,grad_norm: 0.9999993333686412, iteration: 364910
loss: 1.0224634408950806,grad_norm: 0.8173924163738133, iteration: 364911
loss: 1.0267996788024902,grad_norm: 0.8533938763175112, iteration: 364912
loss: 0.9981180429458618,grad_norm: 0.870067259768237, iteration: 364913
loss: 1.0828063488006592,grad_norm: 0.999999549298833, iteration: 364914
loss: 1.1091835498809814,grad_norm: 0.9704725866881153, iteration: 364915
loss: 1.0807201862335205,grad_norm: 0.8328430574493104, iteration: 364916
loss: 1.0540227890014648,grad_norm: 0.8989294758204158, iteration: 364917
loss: 1.0369905233383179,grad_norm: 0.9801174336822567, iteration: 364918
loss: 1.1316190958023071,grad_norm: 0.9999998061901875, iteration: 364919
loss: 1.2474242448806763,grad_norm: 1.0000000628541894, iteration: 364920
loss: 1.0039364099502563,grad_norm: 0.7499332890380577, iteration: 364921
loss: 1.1358294486999512,grad_norm: 0.9999990701224303, iteration: 364922
loss: 1.1664156913757324,grad_norm: 0.9999998304682626, iteration: 364923
loss: 1.0480490922927856,grad_norm: 0.9999999662615974, iteration: 364924
loss: 1.0330971479415894,grad_norm: 0.9177594263928235, iteration: 364925
loss: 0.9848368167877197,grad_norm: 0.9999994991414639, iteration: 364926
loss: 1.0033856630325317,grad_norm: 0.9807712658497535, iteration: 364927
loss: 1.0909841060638428,grad_norm: 0.9197474124775432, iteration: 364928
loss: 1.2394392490386963,grad_norm: 0.9999998901826466, iteration: 364929
loss: 1.0867340564727783,grad_norm: 0.9999995002266833, iteration: 364930
loss: 1.0372309684753418,grad_norm: 0.9999998844485277, iteration: 364931
loss: 1.041968822479248,grad_norm: 0.8559261446708699, iteration: 364932
loss: 1.1283148527145386,grad_norm: 0.9999997938658574, iteration: 364933
loss: 1.0109469890594482,grad_norm: 0.9999993875682673, iteration: 364934
loss: 1.0245323181152344,grad_norm: 0.7597777230903295, iteration: 364935
loss: 1.0955257415771484,grad_norm: 0.9999992193255653, iteration: 364936
loss: 1.0885648727416992,grad_norm: 0.9999999796112179, iteration: 364937
loss: 1.002734661102295,grad_norm: 0.8427509040058704, iteration: 364938
loss: 1.0384196043014526,grad_norm: 0.772812211106454, iteration: 364939
loss: 1.0796314477920532,grad_norm: 0.8761906284835321, iteration: 364940
loss: 1.047729253768921,grad_norm: 0.8072844102673724, iteration: 364941
loss: 1.1364648342132568,grad_norm: 0.9999993036023537, iteration: 364942
loss: 0.9947130084037781,grad_norm: 0.9037570828084265, iteration: 364943
loss: 1.1475082635879517,grad_norm: 0.9999996958195403, iteration: 364944
loss: 1.197714924812317,grad_norm: 0.9999999751106471, iteration: 364945
loss: 0.9832313656806946,grad_norm: 0.7645943473903377, iteration: 364946
loss: 1.0967789888381958,grad_norm: 1.0000000592113374, iteration: 364947
loss: 1.0022910833358765,grad_norm: 0.7500456985310906, iteration: 364948
loss: 1.1062613725662231,grad_norm: 0.9999990465962095, iteration: 364949
loss: 1.1381944417953491,grad_norm: 0.9999998958836116, iteration: 364950
loss: 1.110819697380066,grad_norm: 0.814812749369238, iteration: 364951
loss: 0.9836166501045227,grad_norm: 0.9561340851960087, iteration: 364952
loss: 1.1565216779708862,grad_norm: 0.999999938133358, iteration: 364953
loss: 1.1599327325820923,grad_norm: 0.9999999275699241, iteration: 364954
loss: 1.08643639087677,grad_norm: 0.9999993615426934, iteration: 364955
loss: 1.1682696342468262,grad_norm: 0.9999999442909984, iteration: 364956
loss: 1.0655217170715332,grad_norm: 0.9999997222059739, iteration: 364957
loss: 1.040320873260498,grad_norm: 1.0000000643140188, iteration: 364958
loss: 1.180283546447754,grad_norm: 0.9999999482727294, iteration: 364959
loss: 1.2920420169830322,grad_norm: 0.9999998127606162, iteration: 364960
loss: 1.099224328994751,grad_norm: 0.9999997336149496, iteration: 364961
loss: 1.0381500720977783,grad_norm: 0.9999990572703914, iteration: 364962
loss: 1.186906099319458,grad_norm: 0.9999999412412662, iteration: 364963
loss: 1.0956531763076782,grad_norm: 0.912280645253189, iteration: 364964
loss: 1.086389183998108,grad_norm: 0.999999349850898, iteration: 364965
loss: 1.0196506977081299,grad_norm: 0.9999991191120459, iteration: 364966
loss: 1.124448299407959,grad_norm: 0.9999998093639213, iteration: 364967
loss: 1.0547609329223633,grad_norm: 0.9121282601664521, iteration: 364968
loss: 1.002854585647583,grad_norm: 0.8547827468226272, iteration: 364969
loss: 1.02492094039917,grad_norm: 0.9999993908386873, iteration: 364970
loss: 1.0180448293685913,grad_norm: 0.7500741662111972, iteration: 364971
loss: 1.0309799909591675,grad_norm: 0.967152282351879, iteration: 364972
loss: 1.059195637702942,grad_norm: 0.9999996451897921, iteration: 364973
loss: 1.044484257698059,grad_norm: 0.9999999571228358, iteration: 364974
loss: 0.9776613116264343,grad_norm: 0.6998092251366779, iteration: 364975
loss: 1.0710018873214722,grad_norm: 0.8362283178554879, iteration: 364976
loss: 1.1671476364135742,grad_norm: 0.9999997322353261, iteration: 364977
loss: 1.2284530401229858,grad_norm: 0.9999998964102451, iteration: 364978
loss: 1.0811047554016113,grad_norm: 0.9470166899306333, iteration: 364979
loss: 1.0728330612182617,grad_norm: 0.9999991736482002, iteration: 364980
loss: 1.058941125869751,grad_norm: 0.9999990374904202, iteration: 364981
loss: 1.0630680322647095,grad_norm: 0.8374317134019688, iteration: 364982
loss: 1.0892095565795898,grad_norm: 0.8044829543888954, iteration: 364983
loss: 1.2356761693954468,grad_norm: 0.9999999860393233, iteration: 364984
loss: 1.1455472707748413,grad_norm: 0.9999997501681337, iteration: 364985
loss: 1.1686139106750488,grad_norm: 0.9999993762881759, iteration: 364986
loss: 1.1671905517578125,grad_norm: 0.9999996685353995, iteration: 364987
loss: 1.0716933012008667,grad_norm: 0.9999998620529886, iteration: 364988
loss: 1.1187613010406494,grad_norm: 0.9999998192345412, iteration: 364989
loss: 1.0542731285095215,grad_norm: 0.8639071327146428, iteration: 364990
loss: 1.0483193397521973,grad_norm: 0.9999992342251629, iteration: 364991
loss: 1.0014911890029907,grad_norm: 0.9191594902728376, iteration: 364992
loss: 0.9878484010696411,grad_norm: 0.6720744616488887, iteration: 364993
loss: 1.020951509475708,grad_norm: 0.8111241410939148, iteration: 364994
loss: 0.9760996699333191,grad_norm: 0.8100220180869204, iteration: 364995
loss: 1.0303078889846802,grad_norm: 0.9999998790950134, iteration: 364996
loss: 1.0255929231643677,grad_norm: 0.9999998340214514, iteration: 364997
loss: 0.9287976622581482,grad_norm: 0.9999991096545447, iteration: 364998
loss: 0.9928234815597534,grad_norm: 0.9999998425482811, iteration: 364999
loss: 1.053348183631897,grad_norm: 0.8899961604504913, iteration: 365000
loss: 1.024477243423462,grad_norm: 0.8967260971616096, iteration: 365001
loss: 1.033986210823059,grad_norm: 0.9704467979716831, iteration: 365002
loss: 1.0146759748458862,grad_norm: 0.7618941851168117, iteration: 365003
loss: 1.0476603507995605,grad_norm: 0.9624052331188433, iteration: 365004
loss: 1.0410230159759521,grad_norm: 0.9999994239163705, iteration: 365005
loss: 1.0199791193008423,grad_norm: 0.9075855444251454, iteration: 365006
loss: 0.9971330761909485,grad_norm: 0.7854259868283012, iteration: 365007
loss: 1.0261045694351196,grad_norm: 0.7053045494297948, iteration: 365008
loss: 1.0018301010131836,grad_norm: 0.9999991165912179, iteration: 365009
loss: 0.9996083378791809,grad_norm: 0.9999998779784124, iteration: 365010
loss: 1.0119993686676025,grad_norm: 0.9999991566236921, iteration: 365011
loss: 1.017880916595459,grad_norm: 0.9999998977148274, iteration: 365012
loss: 1.0648589134216309,grad_norm: 0.9999991146183442, iteration: 365013
loss: 1.113213062286377,grad_norm: 0.950954958949937, iteration: 365014
loss: 1.0395621061325073,grad_norm: 0.9449779658590951, iteration: 365015
loss: 0.9893287420272827,grad_norm: 0.8912786947342136, iteration: 365016
loss: 1.0107887983322144,grad_norm: 0.9999991826665845, iteration: 365017
loss: 1.0704102516174316,grad_norm: 0.8311907503705176, iteration: 365018
loss: 1.0143758058547974,grad_norm: 0.9999990791503439, iteration: 365019
loss: 1.015088438987732,grad_norm: 0.892071384300273, iteration: 365020
loss: 1.0039193630218506,grad_norm: 0.6965863471761286, iteration: 365021
loss: 1.0258575677871704,grad_norm: 0.7699701092522154, iteration: 365022
loss: 1.0307210683822632,grad_norm: 0.899759161156668, iteration: 365023
loss: 0.987750768661499,grad_norm: 0.9408269942893609, iteration: 365024
loss: 1.0216913223266602,grad_norm: 0.9999994916841259, iteration: 365025
loss: 1.020279884338379,grad_norm: 0.855315164791629, iteration: 365026
loss: 1.054442048072815,grad_norm: 0.9999992128349182, iteration: 365027
loss: 1.006998896598816,grad_norm: 0.7095113022229138, iteration: 365028
loss: 0.9821679592132568,grad_norm: 0.9999998229354734, iteration: 365029
loss: 1.0457459688186646,grad_norm: 0.9999995251492984, iteration: 365030
loss: 1.0506075620651245,grad_norm: 0.9999998217992606, iteration: 365031
loss: 1.0911442041397095,grad_norm: 0.9999993674217453, iteration: 365032
loss: 1.015204906463623,grad_norm: 0.9999996187106486, iteration: 365033
loss: 0.9960642457008362,grad_norm: 0.9186235391371879, iteration: 365034
loss: 1.0206526517868042,grad_norm: 0.9999990307803438, iteration: 365035
loss: 1.0271912813186646,grad_norm: 0.7583554842323988, iteration: 365036
loss: 1.104833722114563,grad_norm: 0.999999816100168, iteration: 365037
loss: 1.0842602252960205,grad_norm: 0.9999993883853446, iteration: 365038
loss: 1.0150954723358154,grad_norm: 0.863020629367123, iteration: 365039
loss: 1.0003812313079834,grad_norm: 0.7516961320178728, iteration: 365040
loss: 1.0608711242675781,grad_norm: 0.9999996071070179, iteration: 365041
loss: 1.0051788091659546,grad_norm: 0.8481674230855358, iteration: 365042
loss: 1.0017720460891724,grad_norm: 0.9128751426790653, iteration: 365043
loss: 1.0173513889312744,grad_norm: 0.9915426751892642, iteration: 365044
loss: 1.0569030046463013,grad_norm: 0.9999992953733932, iteration: 365045
loss: 0.9988945722579956,grad_norm: 0.8700683305300192, iteration: 365046
loss: 0.9671064615249634,grad_norm: 0.7864923626158516, iteration: 365047
loss: 1.0569988489151,grad_norm: 0.9999996547205187, iteration: 365048
loss: 1.0794804096221924,grad_norm: 0.90270640518644, iteration: 365049
loss: 1.025390863418579,grad_norm: 0.82340368899014, iteration: 365050
loss: 1.0003657341003418,grad_norm: 0.9999997112277766, iteration: 365051
loss: 0.976882815361023,grad_norm: 0.9246506298713619, iteration: 365052
loss: 0.9553365111351013,grad_norm: 0.9999993640815426, iteration: 365053
loss: 1.0654438734054565,grad_norm: 0.9999991145184516, iteration: 365054
loss: 0.9937682747840881,grad_norm: 0.7915663888669565, iteration: 365055
loss: 1.0172252655029297,grad_norm: 0.7287493182889186, iteration: 365056
loss: 1.02585768699646,grad_norm: 0.9188821317361748, iteration: 365057
loss: 1.0603824853897095,grad_norm: 0.9999994664035408, iteration: 365058
loss: 1.1386438608169556,grad_norm: 0.9999991626452822, iteration: 365059
loss: 1.0491001605987549,grad_norm: 0.9999997549097296, iteration: 365060
loss: 1.0366668701171875,grad_norm: 0.8923641199676883, iteration: 365061
loss: 1.0203849077224731,grad_norm: 0.7970954977543688, iteration: 365062
loss: 1.011733055114746,grad_norm: 0.8901454224297236, iteration: 365063
loss: 1.0330252647399902,grad_norm: 0.999999264648012, iteration: 365064
loss: 1.0039758682250977,grad_norm: 0.8263991839716922, iteration: 365065
loss: 1.0675352811813354,grad_norm: 0.8512794631039081, iteration: 365066
loss: 1.0056416988372803,grad_norm: 0.8229255663606323, iteration: 365067
loss: 1.0573577880859375,grad_norm: 0.9999998197239445, iteration: 365068
loss: 0.9914422035217285,grad_norm: 0.8626322697836114, iteration: 365069
loss: 0.9481155276298523,grad_norm: 0.7841848836411908, iteration: 365070
loss: 1.0167049169540405,grad_norm: 0.7727700293470735, iteration: 365071
loss: 0.9953065514564514,grad_norm: 0.9999991558643596, iteration: 365072
loss: 1.0398057699203491,grad_norm: 0.8293292044031958, iteration: 365073
loss: 1.0153616666793823,grad_norm: 0.7930514274811191, iteration: 365074
loss: 1.0968807935714722,grad_norm: 0.8932174024046038, iteration: 365075
loss: 1.017026662826538,grad_norm: 0.7219474339368022, iteration: 365076
loss: 1.0322874784469604,grad_norm: 0.8368941209959644, iteration: 365077
loss: 1.0116384029388428,grad_norm: 0.9999992515001926, iteration: 365078
loss: 0.9855594038963318,grad_norm: 0.7680038480892154, iteration: 365079
loss: 1.0006496906280518,grad_norm: 0.9099310208874933, iteration: 365080
loss: 0.9996019005775452,grad_norm: 0.7836187794343618, iteration: 365081
loss: 1.0164357423782349,grad_norm: 0.9650970590541383, iteration: 365082
loss: 1.0478678941726685,grad_norm: 0.8506535577462649, iteration: 365083
loss: 1.0302903652191162,grad_norm: 0.9999990435695711, iteration: 365084
loss: 0.9845645427703857,grad_norm: 0.8269862769879536, iteration: 365085
loss: 1.0038400888442993,grad_norm: 0.8206704591820935, iteration: 365086
loss: 1.103594422340393,grad_norm: 0.9999991444782612, iteration: 365087
loss: 1.006621241569519,grad_norm: 0.8095504940530367, iteration: 365088
loss: 1.007304072380066,grad_norm: 0.812311086277575, iteration: 365089
loss: 1.0242269039154053,grad_norm: 0.9999996535060067, iteration: 365090
loss: 0.9765176773071289,grad_norm: 0.6920353329773625, iteration: 365091
loss: 0.998232901096344,grad_norm: 0.9999992257943376, iteration: 365092
loss: 1.091280460357666,grad_norm: 0.949290244246489, iteration: 365093
loss: 1.0801841020584106,grad_norm: 0.9999991991250456, iteration: 365094
loss: 1.1215016841888428,grad_norm: 0.9999990118377045, iteration: 365095
loss: 1.001222014427185,grad_norm: 0.9999998990204098, iteration: 365096
loss: 1.0212689638137817,grad_norm: 0.7762794790805331, iteration: 365097
loss: 1.0399749279022217,grad_norm: 0.8504461952866665, iteration: 365098
loss: 1.0113253593444824,grad_norm: 0.9133370145509142, iteration: 365099
loss: 0.9890938401222229,grad_norm: 0.7926096207640588, iteration: 365100
loss: 1.0499943494796753,grad_norm: 0.9290600962968423, iteration: 365101
loss: 1.0116368532180786,grad_norm: 0.9999991099162707, iteration: 365102
loss: 1.1297158002853394,grad_norm: 0.9999999230932524, iteration: 365103
loss: 0.9740443825721741,grad_norm: 0.9999994564229158, iteration: 365104
loss: 0.9967119693756104,grad_norm: 0.8340559979774581, iteration: 365105
loss: 1.0913900136947632,grad_norm: 0.9999998950570835, iteration: 365106
loss: 1.0086655616760254,grad_norm: 0.9999995412689626, iteration: 365107
loss: 1.045986533164978,grad_norm: 0.9999992155514321, iteration: 365108
loss: 1.0754852294921875,grad_norm: 0.9999991731990687, iteration: 365109
loss: 1.0204521417617798,grad_norm: 0.8344848254996142, iteration: 365110
loss: 1.0021591186523438,grad_norm: 0.723898321019669, iteration: 365111
loss: 1.026157021522522,grad_norm: 0.7053920818066101, iteration: 365112
loss: 1.0196635723114014,grad_norm: 0.8309434995372024, iteration: 365113
loss: 1.1542414426803589,grad_norm: 0.9999999486670363, iteration: 365114
loss: 1.0096046924591064,grad_norm: 0.8164504489560224, iteration: 365115
loss: 1.0331774950027466,grad_norm: 0.8737374716713079, iteration: 365116
loss: 1.0945303440093994,grad_norm: 0.8401489910991811, iteration: 365117
loss: 1.0680245161056519,grad_norm: 0.7191344680433698, iteration: 365118
loss: 0.942038357257843,grad_norm: 0.8420897433633592, iteration: 365119
loss: 0.9624621272087097,grad_norm: 0.8176488873443732, iteration: 365120
loss: 1.0689280033111572,grad_norm: 0.9999993983843583, iteration: 365121
loss: 1.0331785678863525,grad_norm: 0.8957813906728086, iteration: 365122
loss: 1.083593726158142,grad_norm: 0.999999061427557, iteration: 365123
loss: 1.07552969455719,grad_norm: 0.9999991815767929, iteration: 365124
loss: 0.9678822159767151,grad_norm: 0.8552786632299296, iteration: 365125
loss: 0.9877361059188843,grad_norm: 0.8946108842840723, iteration: 365126
loss: 1.0056613683700562,grad_norm: 0.7854774229697866, iteration: 365127
loss: 0.9806289076805115,grad_norm: 0.7960245840147807, iteration: 365128
loss: 0.9745888710021973,grad_norm: 0.8332538832834421, iteration: 365129
loss: 1.148250937461853,grad_norm: 0.9999996535055434, iteration: 365130
loss: 1.023363709449768,grad_norm: 0.8467744594113191, iteration: 365131
loss: 0.9992326498031616,grad_norm: 0.7658083554712016, iteration: 365132
loss: 0.9574047327041626,grad_norm: 0.8073332224241965, iteration: 365133
loss: 1.0186967849731445,grad_norm: 0.9999994803540779, iteration: 365134
loss: 0.9630416631698608,grad_norm: 0.7100756201804549, iteration: 365135
loss: 1.1647907495498657,grad_norm: 0.99999996781815, iteration: 365136
loss: 1.111029028892517,grad_norm: 0.9999994000493281, iteration: 365137
loss: 1.0616333484649658,grad_norm: 0.894665845689421, iteration: 365138
loss: 1.0260484218597412,grad_norm: 0.9999989803897462, iteration: 365139
loss: 1.042475938796997,grad_norm: 0.9999994590068939, iteration: 365140
loss: 0.970484733581543,grad_norm: 0.8523685950728543, iteration: 365141
loss: 1.0161947011947632,grad_norm: 0.9999997472572366, iteration: 365142
loss: 1.0545475482940674,grad_norm: 0.9999999856162632, iteration: 365143
loss: 0.9798797369003296,grad_norm: 0.7791709634330514, iteration: 365144
loss: 1.0164389610290527,grad_norm: 0.9999996659671054, iteration: 365145
loss: 1.0202808380126953,grad_norm: 0.7523999648285199, iteration: 365146
loss: 1.020037055015564,grad_norm: 0.9014083653292877, iteration: 365147
loss: 0.9701393246650696,grad_norm: 0.6812324619935243, iteration: 365148
loss: 0.9889156222343445,grad_norm: 0.8062048357139507, iteration: 365149
loss: 1.0067040920257568,grad_norm: 0.8188434921930021, iteration: 365150
loss: 1.0104563236236572,grad_norm: 0.799174192418433, iteration: 365151
loss: 1.0653403997421265,grad_norm: 0.9999995865423903, iteration: 365152
loss: 1.0403882265090942,grad_norm: 0.9131120258327128, iteration: 365153
loss: 1.0073761940002441,grad_norm: 0.8305211174289113, iteration: 365154
loss: 1.021234393119812,grad_norm: 0.9634810059065124, iteration: 365155
loss: 0.989138126373291,grad_norm: 0.7776751244096697, iteration: 365156
loss: 0.9839658737182617,grad_norm: 0.9999991554645798, iteration: 365157
loss: 1.012995719909668,grad_norm: 0.9924978934436963, iteration: 365158
loss: 1.1780054569244385,grad_norm: 0.8448157892009267, iteration: 365159
loss: 0.9959471225738525,grad_norm: 0.7627051632221842, iteration: 365160
loss: 1.0309290885925293,grad_norm: 0.7889398669344988, iteration: 365161
loss: 0.9798253774642944,grad_norm: 0.975288148538688, iteration: 365162
loss: 1.0512703657150269,grad_norm: 0.9999995268706138, iteration: 365163
loss: 1.0466817617416382,grad_norm: 0.8862793075228363, iteration: 365164
loss: 0.9589188694953918,grad_norm: 0.7041362983084476, iteration: 365165
loss: 1.0087730884552002,grad_norm: 0.6981074166844705, iteration: 365166
loss: 1.0376442670822144,grad_norm: 0.8205492659203544, iteration: 365167
loss: 1.0131691694259644,grad_norm: 0.99999953439405, iteration: 365168
loss: 0.954973578453064,grad_norm: 0.7950976001501425, iteration: 365169
loss: 1.065949559211731,grad_norm: 0.8440080211743244, iteration: 365170
loss: 1.0429548025131226,grad_norm: 0.9999991025815173, iteration: 365171
loss: 0.9799765944480896,grad_norm: 0.7048916308759652, iteration: 365172
loss: 1.0602630376815796,grad_norm: 0.9999998881509904, iteration: 365173
loss: 1.1697683334350586,grad_norm: 0.9999996375492642, iteration: 365174
loss: 0.9860398173332214,grad_norm: 0.8819068511871049, iteration: 365175
loss: 1.045543909072876,grad_norm: 0.8581785403787002, iteration: 365176
loss: 1.0738662481307983,grad_norm: 0.9970237358680241, iteration: 365177
loss: 1.0092648267745972,grad_norm: 0.8566402554339533, iteration: 365178
loss: 1.1449425220489502,grad_norm: 0.9626180511287856, iteration: 365179
loss: 1.0133652687072754,grad_norm: 0.8692279145525038, iteration: 365180
loss: 1.0213823318481445,grad_norm: 0.8626724985282128, iteration: 365181
loss: 0.997523844242096,grad_norm: 0.9999995947832847, iteration: 365182
loss: 1.0568262338638306,grad_norm: 0.9000473402513214, iteration: 365183
loss: 1.0396146774291992,grad_norm: 0.966001681212538, iteration: 365184
loss: 1.1149570941925049,grad_norm: 0.9999993379514597, iteration: 365185
loss: 1.0223723649978638,grad_norm: 0.9999991389046265, iteration: 365186
loss: 0.9870959520339966,grad_norm: 0.7950723232676992, iteration: 365187
loss: 1.0810004472732544,grad_norm: 0.9999992190671242, iteration: 365188
loss: 1.0948771238327026,grad_norm: 0.8323866363395233, iteration: 365189
loss: 1.0203825235366821,grad_norm: 0.8596949337325192, iteration: 365190
loss: 1.0719046592712402,grad_norm: 0.9627355681664708, iteration: 365191
loss: 0.9696775674819946,grad_norm: 0.9542672329985455, iteration: 365192
loss: 0.9995895624160767,grad_norm: 0.7519808323258659, iteration: 365193
loss: 0.972895085811615,grad_norm: 0.8957659046470171, iteration: 365194
loss: 1.0282833576202393,grad_norm: 0.9999994354601184, iteration: 365195
loss: 1.0332605838775635,grad_norm: 0.9999991282269237, iteration: 365196
loss: 0.9950575828552246,grad_norm: 0.8211931122227588, iteration: 365197
loss: 0.9793058633804321,grad_norm: 0.7676481472400715, iteration: 365198
loss: 1.0178946256637573,grad_norm: 0.9999992235707771, iteration: 365199
loss: 1.0297527313232422,grad_norm: 0.7146646618043826, iteration: 365200
loss: 1.0099494457244873,grad_norm: 0.8212837771373849, iteration: 365201
loss: 1.037846565246582,grad_norm: 0.9999998224788158, iteration: 365202
loss: 1.0144151449203491,grad_norm: 0.9999990895664282, iteration: 365203
loss: 1.0222848653793335,grad_norm: 0.7962019355440565, iteration: 365204
loss: 0.9718367457389832,grad_norm: 0.807458220496342, iteration: 365205
loss: 0.9973704218864441,grad_norm: 0.9999993230644221, iteration: 365206
loss: 1.010310173034668,grad_norm: 0.7664288536866255, iteration: 365207
loss: 0.9838765263557434,grad_norm: 0.7912999926485792, iteration: 365208
loss: 1.0278862714767456,grad_norm: 0.9601259373405144, iteration: 365209
loss: 1.034700870513916,grad_norm: 0.9999990034373022, iteration: 365210
loss: 0.991807222366333,grad_norm: 0.8763370781849991, iteration: 365211
loss: 0.9915967583656311,grad_norm: 0.8385731357442444, iteration: 365212
loss: 1.0676404237747192,grad_norm: 0.6940651470314466, iteration: 365213
loss: 0.9842313528060913,grad_norm: 0.9234725945489556, iteration: 365214
loss: 0.9875218868255615,grad_norm: 0.8579139795067238, iteration: 365215
loss: 1.0127818584442139,grad_norm: 0.7131661931572084, iteration: 365216
loss: 1.0547276735305786,grad_norm: 0.9999997754582576, iteration: 365217
loss: 1.1337436437606812,grad_norm: 0.9999991276857678, iteration: 365218
loss: 1.0905095338821411,grad_norm: 0.9999999283917483, iteration: 365219
loss: 1.1177492141723633,grad_norm: 0.9999992333019827, iteration: 365220
loss: 1.0719410181045532,grad_norm: 0.8274249544446503, iteration: 365221
loss: 0.9995887279510498,grad_norm: 0.766635597367982, iteration: 365222
loss: 1.0541129112243652,grad_norm: 0.9999994574307364, iteration: 365223
loss: 1.0733275413513184,grad_norm: 0.9999992346128509, iteration: 365224
loss: 1.0318273305892944,grad_norm: 0.8742940057793884, iteration: 365225
loss: 1.0177102088928223,grad_norm: 0.7896230314462622, iteration: 365226
loss: 1.0604137182235718,grad_norm: 0.9171625441181934, iteration: 365227
loss: 1.0528193712234497,grad_norm: 0.9291062323978652, iteration: 365228
loss: 1.0191650390625,grad_norm: 0.8349645456348447, iteration: 365229
loss: 1.0085011720657349,grad_norm: 0.9999998806512571, iteration: 365230
loss: 1.0215874910354614,grad_norm: 0.9767907909376651, iteration: 365231
loss: 1.1277453899383545,grad_norm: 0.9999992374473665, iteration: 365232
loss: 0.9986899495124817,grad_norm: 0.7980501425681447, iteration: 365233
loss: 0.9905216097831726,grad_norm: 0.8757590646925864, iteration: 365234
loss: 0.9878749847412109,grad_norm: 0.9999996776559656, iteration: 365235
loss: 1.0211211442947388,grad_norm: 0.9999997466408788, iteration: 365236
loss: 1.0006474256515503,grad_norm: 0.7770138912498633, iteration: 365237
loss: 1.042762041091919,grad_norm: 0.8468219022349979, iteration: 365238
loss: 0.9944545030593872,grad_norm: 0.9999997578398357, iteration: 365239
loss: 1.0371607542037964,grad_norm: 0.9999997037619495, iteration: 365240
loss: 1.0240615606307983,grad_norm: 0.7860357085020514, iteration: 365241
loss: 1.0515648126602173,grad_norm: 0.9999989920483611, iteration: 365242
loss: 1.0304293632507324,grad_norm: 0.939034943714196, iteration: 365243
loss: 1.0501742362976074,grad_norm: 0.9999998619050511, iteration: 365244
loss: 1.0522034168243408,grad_norm: 0.8528086879102988, iteration: 365245
loss: 0.94798344373703,grad_norm: 0.7410700652681378, iteration: 365246
loss: 1.0769071578979492,grad_norm: 0.9999995087841161, iteration: 365247
loss: 1.0203408002853394,grad_norm: 0.9999999069868948, iteration: 365248
loss: 1.0496760606765747,grad_norm: 0.8944643269003052, iteration: 365249
loss: 0.9986588954925537,grad_norm: 0.9999991243332288, iteration: 365250
loss: 1.0223413705825806,grad_norm: 0.8938256880235319, iteration: 365251
loss: 1.0193865299224854,grad_norm: 0.942478005241567, iteration: 365252
loss: 1.0035074949264526,grad_norm: 0.9769284500289828, iteration: 365253
loss: 0.9854307770729065,grad_norm: 0.8062248645898614, iteration: 365254
loss: 1.0417739152908325,grad_norm: 0.9999992220218479, iteration: 365255
loss: 0.9621452689170837,grad_norm: 0.7722256520703763, iteration: 365256
loss: 1.0035319328308105,grad_norm: 0.8493435778323141, iteration: 365257
loss: 1.0128580331802368,grad_norm: 0.7538353480162346, iteration: 365258
loss: 1.0031846761703491,grad_norm: 0.8713754865026341, iteration: 365259
loss: 1.0859808921813965,grad_norm: 0.9999993101457192, iteration: 365260
loss: 1.0245506763458252,grad_norm: 0.7460866537889477, iteration: 365261
loss: 0.9892855882644653,grad_norm: 0.7001449141441118, iteration: 365262
loss: 1.0266836881637573,grad_norm: 0.9258703295098119, iteration: 365263
loss: 0.9914082288742065,grad_norm: 0.7466920750662251, iteration: 365264
loss: 1.0057716369628906,grad_norm: 0.7120664128781435, iteration: 365265
loss: 0.9811534285545349,grad_norm: 0.9999991150430142, iteration: 365266
loss: 1.0259745121002197,grad_norm: 0.792289666725108, iteration: 365267
loss: 1.0548179149627686,grad_norm: 0.9999990341639681, iteration: 365268
loss: 1.0205388069152832,grad_norm: 0.9671457390080545, iteration: 365269
loss: 1.097017526626587,grad_norm: 0.9999997757491811, iteration: 365270
loss: 1.0503792762756348,grad_norm: 0.9999998149079572, iteration: 365271
loss: 0.9975919723510742,grad_norm: 0.7889793843771378, iteration: 365272
loss: 0.9804774522781372,grad_norm: 0.8123910181491674, iteration: 365273
loss: 1.058779001235962,grad_norm: 0.9196875634677436, iteration: 365274
loss: 1.1432271003723145,grad_norm: 0.9999991745579667, iteration: 365275
loss: 1.003867745399475,grad_norm: 0.9826272480323257, iteration: 365276
loss: 1.0025821924209595,grad_norm: 0.7546232680842125, iteration: 365277
loss: 0.997485876083374,grad_norm: 0.7542933225740953, iteration: 365278
loss: 0.9843875169754028,grad_norm: 0.9763686814354474, iteration: 365279
loss: 1.0118951797485352,grad_norm: 0.7201244940076711, iteration: 365280
loss: 0.961228609085083,grad_norm: 0.8313480425016477, iteration: 365281
loss: 1.0129427909851074,grad_norm: 0.8519916757582685, iteration: 365282
loss: 0.9710654020309448,grad_norm: 0.7578304234127973, iteration: 365283
loss: 0.9621191620826721,grad_norm: 0.9999999168003817, iteration: 365284
loss: 0.9585410952568054,grad_norm: 0.9999996059907509, iteration: 365285
loss: 1.066065788269043,grad_norm: 0.9999998268288025, iteration: 365286
loss: 1.0352727174758911,grad_norm: 0.8106976354256189, iteration: 365287
loss: 1.0251796245574951,grad_norm: 0.9074533416368632, iteration: 365288
loss: 0.9754928350448608,grad_norm: 0.8233846951132995, iteration: 365289
loss: 0.9911850690841675,grad_norm: 0.7199086663802818, iteration: 365290
loss: 1.039587140083313,grad_norm: 0.9580283662051226, iteration: 365291
loss: 0.9913052320480347,grad_norm: 0.8467814395912038, iteration: 365292
loss: 1.0627866983413696,grad_norm: 0.9999998969217183, iteration: 365293
loss: 1.0136523246765137,grad_norm: 0.6719247845299657, iteration: 365294
loss: 1.0602704286575317,grad_norm: 0.9999999402357921, iteration: 365295
loss: 1.0548346042633057,grad_norm: 0.8681552780619753, iteration: 365296
loss: 1.0124560594558716,grad_norm: 0.944808771812594, iteration: 365297
loss: 1.0071420669555664,grad_norm: 0.9999998652921672, iteration: 365298
loss: 1.015699028968811,grad_norm: 0.6331737621171318, iteration: 365299
loss: 1.0428210496902466,grad_norm: 0.9999998077990311, iteration: 365300
loss: 1.122766375541687,grad_norm: 0.9999993184450647, iteration: 365301
loss: 1.0178152322769165,grad_norm: 0.7534572794881936, iteration: 365302
loss: 1.021830677986145,grad_norm: 0.747309042093956, iteration: 365303
loss: 1.01145601272583,grad_norm: 0.8744310210582817, iteration: 365304
loss: 1.0887950658798218,grad_norm: 0.9999996058427125, iteration: 365305
loss: 0.992233395576477,grad_norm: 0.7859535957195954, iteration: 365306
loss: 1.0335001945495605,grad_norm: 0.9999992795163251, iteration: 365307
loss: 0.9981905817985535,grad_norm: 0.8223446169663644, iteration: 365308
loss: 1.0086464881896973,grad_norm: 0.9999993580483388, iteration: 365309
loss: 1.017561912536621,grad_norm: 0.8429178098646116, iteration: 365310
loss: 1.0712807178497314,grad_norm: 0.9999994628813728, iteration: 365311
loss: 1.0261260271072388,grad_norm: 0.9999991775939304, iteration: 365312
loss: 0.9861626029014587,grad_norm: 0.8049634963554257, iteration: 365313
loss: 0.9962406754493713,grad_norm: 0.8630787419678011, iteration: 365314
loss: 0.9893549680709839,grad_norm: 0.7443857044718482, iteration: 365315
loss: 1.0054751634597778,grad_norm: 0.853536620697533, iteration: 365316
loss: 1.0462076663970947,grad_norm: 0.7381655184632258, iteration: 365317
loss: 1.0175085067749023,grad_norm: 0.9084004871859362, iteration: 365318
loss: 1.0142865180969238,grad_norm: 0.8623581011230753, iteration: 365319
loss: 0.9906232357025146,grad_norm: 0.803365332620756, iteration: 365320
loss: 0.9956831932067871,grad_norm: 0.7638204152094397, iteration: 365321
loss: 1.0121690034866333,grad_norm: 0.8507058164341773, iteration: 365322
loss: 1.0278123617172241,grad_norm: 0.7359060517387889, iteration: 365323
loss: 0.9774463772773743,grad_norm: 0.7962527776628598, iteration: 365324
loss: 1.0189628601074219,grad_norm: 0.7632439472089783, iteration: 365325
loss: 1.0065168142318726,grad_norm: 0.8752919533902449, iteration: 365326
loss: 1.0264383554458618,grad_norm: 0.7428464973367707, iteration: 365327
loss: 1.0122219324111938,grad_norm: 0.9216105022959848, iteration: 365328
loss: 1.0243057012557983,grad_norm: 0.8536958483965245, iteration: 365329
loss: 1.036045789718628,grad_norm: 0.9999997843853199, iteration: 365330
loss: 1.0006541013717651,grad_norm: 0.9999991823345146, iteration: 365331
loss: 1.0541236400604248,grad_norm: 0.9615595828834025, iteration: 365332
loss: 0.9831303358078003,grad_norm: 0.875851702719102, iteration: 365333
loss: 1.0776357650756836,grad_norm: 0.795658515197847, iteration: 365334
loss: 0.9900596737861633,grad_norm: 0.8710433139484068, iteration: 365335
loss: 1.008219838142395,grad_norm: 0.8195385808416895, iteration: 365336
loss: 0.9822900891304016,grad_norm: 0.9999991342721591, iteration: 365337
loss: 1.0012562274932861,grad_norm: 0.6553962834384687, iteration: 365338
loss: 1.027783989906311,grad_norm: 0.9301552991153681, iteration: 365339
loss: 0.9830905795097351,grad_norm: 0.7097267759971835, iteration: 365340
loss: 0.9871552586555481,grad_norm: 0.7268385808528252, iteration: 365341
loss: 1.0397855043411255,grad_norm: 0.8485688589200446, iteration: 365342
loss: 1.0929718017578125,grad_norm: 0.9999999879150753, iteration: 365343
loss: 0.9572656154632568,grad_norm: 0.7043121639735225, iteration: 365344
loss: 0.9751028418540955,grad_norm: 0.9000280163494394, iteration: 365345
loss: 0.989271342754364,grad_norm: 0.7228995977943473, iteration: 365346
loss: 1.018702507019043,grad_norm: 0.9999992183988097, iteration: 365347
loss: 1.0439484119415283,grad_norm: 0.7258084951279343, iteration: 365348
loss: 1.0048328638076782,grad_norm: 0.9999990135480884, iteration: 365349
loss: 1.0379570722579956,grad_norm: 0.999999384018413, iteration: 365350
loss: 1.0056164264678955,grad_norm: 0.818995952632355, iteration: 365351
loss: 1.0918269157409668,grad_norm: 0.9999999940735207, iteration: 365352
loss: 1.078428030014038,grad_norm: 0.9999995577792904, iteration: 365353
loss: 0.9859845042228699,grad_norm: 0.9607259890908474, iteration: 365354
loss: 0.9902270436286926,grad_norm: 0.996694773888313, iteration: 365355
loss: 0.972865641117096,grad_norm: 0.8728794584647827, iteration: 365356
loss: 0.9945824146270752,grad_norm: 0.8940740973778412, iteration: 365357
loss: 0.9970692992210388,grad_norm: 0.7737945114314924, iteration: 365358
loss: 1.022863507270813,grad_norm: 0.9812611215569241, iteration: 365359
loss: 0.9739218354225159,grad_norm: 0.6997742434793722, iteration: 365360
loss: 1.0347777605056763,grad_norm: 0.8956495117508272, iteration: 365361
loss: 0.9794022440910339,grad_norm: 0.8923651673854438, iteration: 365362
loss: 0.9618663191795349,grad_norm: 0.9883756981675104, iteration: 365363
loss: 1.0079671144485474,grad_norm: 0.9999991487102097, iteration: 365364
loss: 1.0142896175384521,grad_norm: 0.8040507970087916, iteration: 365365
loss: 0.9817032217979431,grad_norm: 0.8797289357656204, iteration: 365366
loss: 1.0018508434295654,grad_norm: 0.7853208170549514, iteration: 365367
loss: 0.988720715045929,grad_norm: 0.9187050656377039, iteration: 365368
loss: 1.0240265130996704,grad_norm: 0.7763468641340859, iteration: 365369
loss: 0.9811379909515381,grad_norm: 0.9999998287491196, iteration: 365370
loss: 1.0261073112487793,grad_norm: 0.8470316592010075, iteration: 365371
loss: 1.0232614278793335,grad_norm: 0.7731679062946062, iteration: 365372
loss: 1.0050705671310425,grad_norm: 0.6975911941286989, iteration: 365373
loss: 0.9896888136863708,grad_norm: 0.7342569734450306, iteration: 365374
loss: 0.9548647999763489,grad_norm: 0.7565508408298337, iteration: 365375
loss: 1.0278518199920654,grad_norm: 0.9224252643755484, iteration: 365376
loss: 1.0339149236679077,grad_norm: 0.8039853523599927, iteration: 365377
loss: 1.1120057106018066,grad_norm: 0.9999997134438862, iteration: 365378
loss: 1.0217788219451904,grad_norm: 0.8235477232745523, iteration: 365379
loss: 1.0577068328857422,grad_norm: 0.838982715996225, iteration: 365380
loss: 1.0332212448120117,grad_norm: 0.752992359320995, iteration: 365381
loss: 0.9678488373756409,grad_norm: 0.8384304001149521, iteration: 365382
loss: 0.9523136019706726,grad_norm: 0.9999991492154809, iteration: 365383
loss: 0.9739258885383606,grad_norm: 0.816946429685648, iteration: 365384
loss: 0.9737347364425659,grad_norm: 0.8614660687410368, iteration: 365385
loss: 1.1149654388427734,grad_norm: 0.855248516748086, iteration: 365386
loss: 0.9960155487060547,grad_norm: 0.7374379519931167, iteration: 365387
loss: 1.0590232610702515,grad_norm: 0.7327832192456251, iteration: 365388
loss: 0.9790329933166504,grad_norm: 0.7577552558665515, iteration: 365389
loss: 0.9944370985031128,grad_norm: 0.8454251625439093, iteration: 365390
loss: 0.9869813919067383,grad_norm: 0.9161058430986693, iteration: 365391
loss: 0.9819482564926147,grad_norm: 0.8757940073273975, iteration: 365392
loss: 1.013534426689148,grad_norm: 0.7372428214949914, iteration: 365393
loss: 1.0536071062088013,grad_norm: 0.9999998845194196, iteration: 365394
loss: 0.9944819808006287,grad_norm: 0.7355379013647517, iteration: 365395
loss: 1.0316710472106934,grad_norm: 0.7619065910493743, iteration: 365396
loss: 0.9710822105407715,grad_norm: 0.8814053058508813, iteration: 365397
loss: 0.9773499965667725,grad_norm: 0.7832033432324068, iteration: 365398
loss: 1.026832938194275,grad_norm: 0.8996581799718887, iteration: 365399
loss: 0.9706255793571472,grad_norm: 0.9647528967653889, iteration: 365400
loss: 0.9738216996192932,grad_norm: 0.8593198836052215, iteration: 365401
loss: 1.0229729413986206,grad_norm: 0.7375469054376613, iteration: 365402
loss: 0.9764165878295898,grad_norm: 0.6609280403055517, iteration: 365403
loss: 1.007076621055603,grad_norm: 0.7134484000489708, iteration: 365404
loss: 1.028380274772644,grad_norm: 0.7573331746250324, iteration: 365405
loss: 0.9961444139480591,grad_norm: 0.9191999362979304, iteration: 365406
loss: 1.0234171152114868,grad_norm: 0.7577953757065993, iteration: 365407
loss: 0.9957436323165894,grad_norm: 0.9645906317825794, iteration: 365408
loss: 0.9740249514579773,grad_norm: 0.9999990728064455, iteration: 365409
loss: 1.117941975593567,grad_norm: 0.895411647404394, iteration: 365410
loss: 1.0350606441497803,grad_norm: 0.9999996560506228, iteration: 365411
loss: 1.0372234582901,grad_norm: 0.9999994827017233, iteration: 365412
loss: 0.9983795881271362,grad_norm: 0.7667071766584517, iteration: 365413
loss: 0.9993181824684143,grad_norm: 0.8040598294299391, iteration: 365414
loss: 1.0739548206329346,grad_norm: 0.9999992773410638, iteration: 365415
loss: 1.0206762552261353,grad_norm: 0.8139056479560204, iteration: 365416
loss: 1.0083794593811035,grad_norm: 0.9999996622430112, iteration: 365417
loss: 0.9893551468849182,grad_norm: 0.8606997623476156, iteration: 365418
loss: 0.9881552457809448,grad_norm: 0.8590124160335286, iteration: 365419
loss: 0.9766912460327148,grad_norm: 0.8060463256913883, iteration: 365420
loss: 1.0106232166290283,grad_norm: 0.8041720566134839, iteration: 365421
loss: 0.9945248365402222,grad_norm: 0.8245019997549405, iteration: 365422
loss: 0.9947219491004944,grad_norm: 0.9402691952562797, iteration: 365423
loss: 1.014883041381836,grad_norm: 0.7353814532464023, iteration: 365424
loss: 1.00240957736969,grad_norm: 0.8159937660107099, iteration: 365425
loss: 1.007333755493164,grad_norm: 0.8592164605981, iteration: 365426
loss: 1.0172476768493652,grad_norm: 0.7771460985962009, iteration: 365427
loss: 0.9830618500709534,grad_norm: 0.7772722141092442, iteration: 365428
loss: 1.0227068662643433,grad_norm: 0.9999996128108537, iteration: 365429
loss: 0.9978300333023071,grad_norm: 0.8075595932495948, iteration: 365430
loss: 1.0105613470077515,grad_norm: 0.8857380143335378, iteration: 365431
loss: 1.0344377756118774,grad_norm: 0.9642713039023663, iteration: 365432
loss: 0.9919804334640503,grad_norm: 0.8407487257208955, iteration: 365433
loss: 1.022086262702942,grad_norm: 0.6892851681971018, iteration: 365434
loss: 0.9671334028244019,grad_norm: 0.9063715346188941, iteration: 365435
loss: 0.9687724113464355,grad_norm: 0.7676907529873603, iteration: 365436
loss: 1.031997561454773,grad_norm: 0.7811433800415236, iteration: 365437
loss: 1.0392736196517944,grad_norm: 0.7904020516666642, iteration: 365438
loss: 0.979101836681366,grad_norm: 0.8157846160733994, iteration: 365439
loss: 0.9970619082450867,grad_norm: 0.910793662457246, iteration: 365440
loss: 0.9758707284927368,grad_norm: 0.8149753377301142, iteration: 365441
loss: 1.059714913368225,grad_norm: 0.9999991382691029, iteration: 365442
loss: 0.9721287488937378,grad_norm: 0.7689251560010087, iteration: 365443
loss: 1.0182665586471558,grad_norm: 0.7903311863818085, iteration: 365444
loss: 1.0057557821273804,grad_norm: 0.8780700307094192, iteration: 365445
loss: 0.9890755414962769,grad_norm: 0.8501702425332486, iteration: 365446
loss: 1.0085489749908447,grad_norm: 0.8358400130213177, iteration: 365447
loss: 1.0025651454925537,grad_norm: 0.7686602253805325, iteration: 365448
loss: 1.0726444721221924,grad_norm: 0.9999990133246575, iteration: 365449
loss: 0.9885284304618835,grad_norm: 0.901327750785275, iteration: 365450
loss: 0.9728636145591736,grad_norm: 0.6842365200031153, iteration: 365451
loss: 0.9923055171966553,grad_norm: 0.7991709227800845, iteration: 365452
loss: 1.0396596193313599,grad_norm: 0.999999478455736, iteration: 365453
loss: 1.0052051544189453,grad_norm: 1.000000053560493, iteration: 365454
loss: 1.0105397701263428,grad_norm: 0.7503420848924379, iteration: 365455
loss: 0.992490291595459,grad_norm: 0.9631682248944814, iteration: 365456
loss: 0.9611825346946716,grad_norm: 0.8381227214908198, iteration: 365457
loss: 0.9798815250396729,grad_norm: 0.9107587215358881, iteration: 365458
loss: 1.0243436098098755,grad_norm: 0.9101970736454376, iteration: 365459
loss: 1.013757348060608,grad_norm: 0.8561869327003118, iteration: 365460
loss: 0.9847511649131775,grad_norm: 0.7780851232565219, iteration: 365461
loss: 1.0090314149856567,grad_norm: 0.7519888243372379, iteration: 365462
loss: 1.021429419517517,grad_norm: 0.674631244003957, iteration: 365463
loss: 1.0137704610824585,grad_norm: 0.7786427694049138, iteration: 365464
loss: 0.9643093943595886,grad_norm: 0.8006531240348809, iteration: 365465
loss: 1.0366290807724,grad_norm: 0.8957534206652954, iteration: 365466
loss: 1.065989375114441,grad_norm: 0.9999990956758174, iteration: 365467
loss: 1.0173689126968384,grad_norm: 0.7958055695073193, iteration: 365468
loss: 1.0201687812805176,grad_norm: 0.9403861122221941, iteration: 365469
loss: 1.004160761833191,grad_norm: 0.999999007854369, iteration: 365470
loss: 1.016875147819519,grad_norm: 0.7176715581505081, iteration: 365471
loss: 0.9804837107658386,grad_norm: 0.8214454885128085, iteration: 365472
loss: 0.9921412467956543,grad_norm: 0.8583147758584604, iteration: 365473
loss: 0.9583680033683777,grad_norm: 0.9208226835315036, iteration: 365474
loss: 0.9586475491523743,grad_norm: 0.747762995102705, iteration: 365475
loss: 1.0160040855407715,grad_norm: 0.7642610186733138, iteration: 365476
loss: 1.0426589250564575,grad_norm: 0.9999996471695446, iteration: 365477
loss: 1.017907977104187,grad_norm: 0.9999991914030798, iteration: 365478
loss: 1.038612723350525,grad_norm: 0.7445086283199699, iteration: 365479
loss: 0.9743133783340454,grad_norm: 0.9007559329393319, iteration: 365480
loss: 1.0224900245666504,grad_norm: 0.7183572286386383, iteration: 365481
loss: 0.9967089891433716,grad_norm: 0.8832409997286891, iteration: 365482
loss: 0.9999074339866638,grad_norm: 0.6676749258050797, iteration: 365483
loss: 0.9992242455482483,grad_norm: 0.9999992371234729, iteration: 365484
loss: 0.9640864729881287,grad_norm: 0.7087892174739566, iteration: 365485
loss: 0.9893180727958679,grad_norm: 0.6970273210727935, iteration: 365486
loss: 0.9948068261146545,grad_norm: 0.7724134182379078, iteration: 365487
loss: 0.9882605075836182,grad_norm: 0.9281501151134016, iteration: 365488
loss: 1.0399783849716187,grad_norm: 0.8310396562367651, iteration: 365489
loss: 1.0165096521377563,grad_norm: 0.870201763534751, iteration: 365490
loss: 1.022979497909546,grad_norm: 0.7801989613783032, iteration: 365491
loss: 1.0696438550949097,grad_norm: 0.9941165276191106, iteration: 365492
loss: 0.9775860905647278,grad_norm: 0.8473184447953642, iteration: 365493
loss: 0.9744866490364075,grad_norm: 0.8569838203741721, iteration: 365494
loss: 0.9738831520080566,grad_norm: 0.7931679234077097, iteration: 365495
loss: 1.0183333158493042,grad_norm: 0.6923326021392349, iteration: 365496
loss: 1.0389846563339233,grad_norm: 0.8682461066586816, iteration: 365497
loss: 0.9955067038536072,grad_norm: 0.8124938469438139, iteration: 365498
loss: 1.0218676328659058,grad_norm: 0.9209235034795332, iteration: 365499
loss: 1.0100935697555542,grad_norm: 0.795736873767381, iteration: 365500
loss: 0.998680591583252,grad_norm: 0.8427225011253644, iteration: 365501
loss: 1.0019198656082153,grad_norm: 0.771312552932029, iteration: 365502
loss: 0.9993739724159241,grad_norm: 0.8263932236024164, iteration: 365503
loss: 1.007331371307373,grad_norm: 0.890078231254738, iteration: 365504
loss: 0.9486740827560425,grad_norm: 0.7897512781479887, iteration: 365505
loss: 1.0212204456329346,grad_norm: 0.8079940293716515, iteration: 365506
loss: 1.0099607706069946,grad_norm: 0.8754600176526436, iteration: 365507
loss: 1.0278918743133545,grad_norm: 0.8778263331003313, iteration: 365508
loss: 0.9944751262664795,grad_norm: 0.7630782166883276, iteration: 365509
loss: 0.9986511468887329,grad_norm: 0.7237837897992296, iteration: 365510
loss: 1.0869098901748657,grad_norm: 0.9999991034657276, iteration: 365511
loss: 1.0058338642120361,grad_norm: 0.9480928850612625, iteration: 365512
loss: 0.9604310989379883,grad_norm: 0.9126766108515099, iteration: 365513
loss: 0.9643658995628357,grad_norm: 0.884547208909804, iteration: 365514
loss: 0.9659926295280457,grad_norm: 0.7740407433067131, iteration: 365515
loss: 0.9905775189399719,grad_norm: 0.7595612707141572, iteration: 365516
loss: 0.9932097792625427,grad_norm: 0.8071600462097914, iteration: 365517
loss: 0.9528197050094604,grad_norm: 0.793390874886356, iteration: 365518
loss: 0.9871110916137695,grad_norm: 0.7851986000169402, iteration: 365519
loss: 0.984071671962738,grad_norm: 0.8571455168693579, iteration: 365520
loss: 0.9616990089416504,grad_norm: 0.9999991046048972, iteration: 365521
loss: 1.0550299882888794,grad_norm: 0.8296078921159418, iteration: 365522
loss: 0.9883418083190918,grad_norm: 0.9999989621387766, iteration: 365523
loss: 1.0312923192977905,grad_norm: 0.8121023135298745, iteration: 365524
loss: 1.003842830657959,grad_norm: 0.9013151727456121, iteration: 365525
loss: 0.9628381729125977,grad_norm: 0.9999991431665441, iteration: 365526
loss: 0.9618310928344727,grad_norm: 0.7604798422707721, iteration: 365527
loss: 1.0036929845809937,grad_norm: 0.9333554015253611, iteration: 365528
loss: 1.0264843702316284,grad_norm: 0.7103034966030664, iteration: 365529
loss: 0.9926154613494873,grad_norm: 0.9999993399472826, iteration: 365530
loss: 1.0342613458633423,grad_norm: 0.90811368673567, iteration: 365531
loss: 0.9964271783828735,grad_norm: 0.7833275597293526, iteration: 365532
loss: 1.0356780290603638,grad_norm: 0.6837752383549367, iteration: 365533
loss: 0.9940587282180786,grad_norm: 0.7799297288006314, iteration: 365534
loss: 1.065855622291565,grad_norm: 0.9999993111392473, iteration: 365535
loss: 1.0013800859451294,grad_norm: 0.9999995416899907, iteration: 365536
loss: 0.9944663047790527,grad_norm: 0.8564180259132077, iteration: 365537
loss: 1.0261590480804443,grad_norm: 0.6801182421177169, iteration: 365538
loss: 0.9821210503578186,grad_norm: 0.9951442438147331, iteration: 365539
loss: 0.9982584118843079,grad_norm: 0.9999991403717026, iteration: 365540
loss: 1.0128512382507324,grad_norm: 0.9080162417863292, iteration: 365541
loss: 1.001354455947876,grad_norm: 0.8620984124685475, iteration: 365542
loss: 0.9951321482658386,grad_norm: 0.8450234049462871, iteration: 365543
loss: 1.002174735069275,grad_norm: 0.7468805770011248, iteration: 365544
loss: 0.9859208464622498,grad_norm: 0.8341539220034341, iteration: 365545
loss: 1.0134093761444092,grad_norm: 0.7151742162904752, iteration: 365546
loss: 0.986207127571106,grad_norm: 0.7565649502894981, iteration: 365547
loss: 1.0055625438690186,grad_norm: 0.8932276546379535, iteration: 365548
loss: 0.9740861058235168,grad_norm: 0.9297856805282294, iteration: 365549
loss: 1.070988416671753,grad_norm: 0.999999385750981, iteration: 365550
loss: 0.9788191914558411,grad_norm: 0.7136466941189272, iteration: 365551
loss: 1.0800749063491821,grad_norm: 0.9999998286867976, iteration: 365552
loss: 1.0503284931182861,grad_norm: 0.8047905809941114, iteration: 365553
loss: 1.005890965461731,grad_norm: 0.6872783606264747, iteration: 365554
loss: 1.0151278972625732,grad_norm: 0.734525746903663, iteration: 365555
loss: 0.9906464219093323,grad_norm: 0.8104403674526205, iteration: 365556
loss: 0.9751203060150146,grad_norm: 0.6749192627321211, iteration: 365557
loss: 0.9809706807136536,grad_norm: 0.818382038668328, iteration: 365558
loss: 0.9817438125610352,grad_norm: 0.624225362120199, iteration: 365559
loss: 1.0041640996932983,grad_norm: 0.7401898309013215, iteration: 365560
loss: 1.0200135707855225,grad_norm: 0.8344220786115489, iteration: 365561
loss: 0.9867276549339294,grad_norm: 0.9999991502718665, iteration: 365562
loss: 0.9793909192085266,grad_norm: 0.8356168423985891, iteration: 365563
loss: 0.9784579873085022,grad_norm: 0.7347639807166955, iteration: 365564
loss: 0.9803633689880371,grad_norm: 0.7940903079965033, iteration: 365565
loss: 1.0299897193908691,grad_norm: 0.8604500840152578, iteration: 365566
loss: 0.9815435409545898,grad_norm: 0.7711497394009489, iteration: 365567
loss: 1.0294501781463623,grad_norm: 0.7598285746483366, iteration: 365568
loss: 0.9869024753570557,grad_norm: 0.7002113451846629, iteration: 365569
loss: 0.9937624335289001,grad_norm: 0.9112690618693962, iteration: 365570
loss: 1.01304030418396,grad_norm: 0.6462498513744596, iteration: 365571
loss: 1.0114370584487915,grad_norm: 0.7468890637189185, iteration: 365572
loss: 0.9739677309989929,grad_norm: 0.7627432336696252, iteration: 365573
loss: 0.9781181812286377,grad_norm: 0.8777469443742607, iteration: 365574
loss: 0.9823840856552124,grad_norm: 0.8280299417423783, iteration: 365575
loss: 1.0436931848526,grad_norm: 0.8247480298177019, iteration: 365576
loss: 0.9981953501701355,grad_norm: 0.8016827445892416, iteration: 365577
loss: 1.0347514152526855,grad_norm: 0.8298691721225926, iteration: 365578
loss: 0.9806811809539795,grad_norm: 0.7422814579553882, iteration: 365579
loss: 1.0160256624221802,grad_norm: 0.6593424081132726, iteration: 365580
loss: 0.9793700575828552,grad_norm: 0.6656933883958667, iteration: 365581
loss: 0.9722399115562439,grad_norm: 0.7451344585763985, iteration: 365582
loss: 0.9920291900634766,grad_norm: 0.8051317717520349, iteration: 365583
loss: 1.0068533420562744,grad_norm: 0.7173405079929271, iteration: 365584
loss: 1.0168288946151733,grad_norm: 0.861771654348501, iteration: 365585
loss: 1.0124046802520752,grad_norm: 0.732312262032502, iteration: 365586
loss: 0.9979081153869629,grad_norm: 0.7576292660110507, iteration: 365587
loss: 0.9977884888648987,grad_norm: 0.7522677360350443, iteration: 365588
loss: 0.9627516269683838,grad_norm: 0.8041849207750151, iteration: 365589
loss: 1.00790274143219,grad_norm: 0.9999997063705931, iteration: 365590
loss: 0.9904033541679382,grad_norm: 0.7678024697455729, iteration: 365591
loss: 1.011496901512146,grad_norm: 0.8183620924031747, iteration: 365592
loss: 1.0554072856903076,grad_norm: 0.9999995849346177, iteration: 365593
loss: 0.9980565905570984,grad_norm: 0.7594155394644038, iteration: 365594
loss: 0.9763445258140564,grad_norm: 0.8491744566646694, iteration: 365595
loss: 1.0062659978866577,grad_norm: 0.9999997280183052, iteration: 365596
loss: 0.9769690036773682,grad_norm: 0.9392348031073969, iteration: 365597
loss: 0.9904073476791382,grad_norm: 0.8072401631997966, iteration: 365598
loss: 1.0205891132354736,grad_norm: 0.7579512614036871, iteration: 365599
loss: 1.0213940143585205,grad_norm: 0.8463468818440668, iteration: 365600
loss: 1.0566786527633667,grad_norm: 0.9999996475029838, iteration: 365601
loss: 0.9872863292694092,grad_norm: 0.8383726561906019, iteration: 365602
loss: 1.0412675142288208,grad_norm: 0.7834098773317103, iteration: 365603
loss: 1.0011072158813477,grad_norm: 0.7658933161971566, iteration: 365604
loss: 1.0551646947860718,grad_norm: 0.9999990914878282, iteration: 365605
loss: 0.984902560710907,grad_norm: 0.9358369541722117, iteration: 365606
loss: 1.0337245464324951,grad_norm: 0.9164936402224695, iteration: 365607
loss: 1.007503867149353,grad_norm: 0.8767695490690247, iteration: 365608
loss: 0.9951245784759521,grad_norm: 0.8738414585251966, iteration: 365609
loss: 1.0151512622833252,grad_norm: 0.9999991177538051, iteration: 365610
loss: 1.2975428104400635,grad_norm: 0.9999990940985172, iteration: 365611
loss: 0.9968665242195129,grad_norm: 0.9442652703918369, iteration: 365612
loss: 0.990711510181427,grad_norm: 0.7448836528736131, iteration: 365613
loss: 1.015279769897461,grad_norm: 0.9480503631124247, iteration: 365614
loss: 1.0471584796905518,grad_norm: 0.9503756362239265, iteration: 365615
loss: 1.0300624370574951,grad_norm: 0.9999991060741962, iteration: 365616
loss: 1.0001208782196045,grad_norm: 0.9999990406217599, iteration: 365617
loss: 1.0273785591125488,grad_norm: 0.7973181189833737, iteration: 365618
loss: 0.9878994822502136,grad_norm: 0.8068532572117928, iteration: 365619
loss: 1.024485468864441,grad_norm: 0.657660501411276, iteration: 365620
loss: 0.994579017162323,grad_norm: 0.8705514229429118, iteration: 365621
loss: 1.03553307056427,grad_norm: 0.9999994010997069, iteration: 365622
loss: 1.0162992477416992,grad_norm: 0.7669669668078403, iteration: 365623
loss: 0.9704587459564209,grad_norm: 0.9999995213995151, iteration: 365624
loss: 1.014746069908142,grad_norm: 0.8399781473244989, iteration: 365625
loss: 0.9950181841850281,grad_norm: 0.6097674955404516, iteration: 365626
loss: 0.9997856616973877,grad_norm: 0.8395339758527631, iteration: 365627
loss: 1.0016673803329468,grad_norm: 0.875701269931767, iteration: 365628
loss: 1.0307117700576782,grad_norm: 0.7882999534179322, iteration: 365629
loss: 1.0040415525436401,grad_norm: 0.7083160287731348, iteration: 365630
loss: 0.9995891451835632,grad_norm: 0.7670696469284725, iteration: 365631
loss: 0.9935407638549805,grad_norm: 0.6251824696701623, iteration: 365632
loss: 1.0302637815475464,grad_norm: 0.9999995404956477, iteration: 365633
loss: 1.0433399677276611,grad_norm: 0.972216801751433, iteration: 365634
loss: 0.9704229235649109,grad_norm: 0.935490524338795, iteration: 365635
loss: 1.0023717880249023,grad_norm: 0.9039812848061235, iteration: 365636
loss: 1.0074204206466675,grad_norm: 0.9052256125631751, iteration: 365637
loss: 0.9675744771957397,grad_norm: 0.88027108828784, iteration: 365638
loss: 0.986230194568634,grad_norm: 0.6586310972720731, iteration: 365639
loss: 1.0623421669006348,grad_norm: 0.8135751750539045, iteration: 365640
loss: 0.9573150873184204,grad_norm: 0.7312081658861563, iteration: 365641
loss: 0.9792750477790833,grad_norm: 0.8197098933780324, iteration: 365642
loss: 1.0098751783370972,grad_norm: 0.8589879642219591, iteration: 365643
loss: 0.9882028698921204,grad_norm: 0.942724848462032, iteration: 365644
loss: 1.0223175287246704,grad_norm: 0.789944537886033, iteration: 365645
loss: 1.0102765560150146,grad_norm: 0.7825533259390657, iteration: 365646
loss: 0.9822282195091248,grad_norm: 0.8364827021454591, iteration: 365647
loss: 1.035396933555603,grad_norm: 0.9999993018954695, iteration: 365648
loss: 1.056099772453308,grad_norm: 0.7343823320107515, iteration: 365649
loss: 1.0182594060897827,grad_norm: 0.8659708956766933, iteration: 365650
loss: 0.9884439706802368,grad_norm: 0.6694530050635028, iteration: 365651
loss: 0.9684524536132812,grad_norm: 0.9628934409210012, iteration: 365652
loss: 1.041742205619812,grad_norm: 0.863482866771732, iteration: 365653
loss: 1.0095642805099487,grad_norm: 0.7575714101216304, iteration: 365654
loss: 1.035427451133728,grad_norm: 0.9999993868756373, iteration: 365655
loss: 1.0163973569869995,grad_norm: 0.9999991985338602, iteration: 365656
loss: 0.9869390726089478,grad_norm: 0.7441028300493593, iteration: 365657
loss: 1.0093878507614136,grad_norm: 0.8688809627434275, iteration: 365658
loss: 1.0109994411468506,grad_norm: 0.9212882981423381, iteration: 365659
loss: 1.0157109498977661,grad_norm: 0.9999991680466872, iteration: 365660
loss: 1.0568904876708984,grad_norm: 0.9572085955366998, iteration: 365661
loss: 1.01509428024292,grad_norm: 0.9999993736248133, iteration: 365662
loss: 0.9520017504692078,grad_norm: 0.9280974702586742, iteration: 365663
loss: 0.9825057983398438,grad_norm: 0.7052094010765421, iteration: 365664
loss: 0.943551242351532,grad_norm: 0.7863039827928017, iteration: 365665
loss: 1.0926326513290405,grad_norm: 0.999999319638348, iteration: 365666
loss: 1.0453991889953613,grad_norm: 0.9706872944783755, iteration: 365667
loss: 0.9804526567459106,grad_norm: 0.8327252816856917, iteration: 365668
loss: 1.04263436794281,grad_norm: 0.9999996398760259, iteration: 365669
loss: 1.0501779317855835,grad_norm: 0.8240471669632179, iteration: 365670
loss: 1.0053958892822266,grad_norm: 0.8570431621807555, iteration: 365671
loss: 0.9996567964553833,grad_norm: 0.8117359206918142, iteration: 365672
loss: 1.0502362251281738,grad_norm: 0.9020951788948524, iteration: 365673
loss: 1.0163084268569946,grad_norm: 0.8106163099439857, iteration: 365674
loss: 1.0190476179122925,grad_norm: 0.6833155474177487, iteration: 365675
loss: 1.0248726606369019,grad_norm: 0.9999992721270143, iteration: 365676
loss: 0.9711607694625854,grad_norm: 0.8543771726895348, iteration: 365677
loss: 1.0519016981124878,grad_norm: 0.8292935799840216, iteration: 365678
loss: 0.9940156936645508,grad_norm: 0.9071973301336006, iteration: 365679
loss: 1.1017951965332031,grad_norm: 0.9999993751170255, iteration: 365680
loss: 1.027010202407837,grad_norm: 0.802953697282381, iteration: 365681
loss: 1.0397517681121826,grad_norm: 0.8111309769945387, iteration: 365682
loss: 1.0055036544799805,grad_norm: 0.7447949465332774, iteration: 365683
loss: 1.0138758420944214,grad_norm: 0.6798422129589747, iteration: 365684
loss: 1.0698773860931396,grad_norm: 0.8030571865471337, iteration: 365685
loss: 0.9934113621711731,grad_norm: 0.7459706121263191, iteration: 365686
loss: 1.0185481309890747,grad_norm: 0.8865197110540828, iteration: 365687
loss: 1.024930715560913,grad_norm: 0.8676520603881124, iteration: 365688
loss: 0.9955353140830994,grad_norm: 0.7526821263938045, iteration: 365689
loss: 1.0176280736923218,grad_norm: 0.8910227396632682, iteration: 365690
loss: 0.9800099730491638,grad_norm: 0.7423123522805618, iteration: 365691
loss: 0.9992896318435669,grad_norm: 0.8895346781725126, iteration: 365692
loss: 0.9920336604118347,grad_norm: 0.7315509676492467, iteration: 365693
loss: 0.9977273941040039,grad_norm: 0.7826712011963358, iteration: 365694
loss: 1.04155433177948,grad_norm: 0.9999992104709174, iteration: 365695
loss: 0.97674959897995,grad_norm: 0.8859322453415854, iteration: 365696
loss: 1.0367717742919922,grad_norm: 0.999999190695442, iteration: 365697
loss: 0.9997519254684448,grad_norm: 0.675030177306424, iteration: 365698
loss: 1.0112996101379395,grad_norm: 0.9566620415565578, iteration: 365699
loss: 1.0564631223678589,grad_norm: 0.9999999525662285, iteration: 365700
loss: 0.995231032371521,grad_norm: 0.7395182397721816, iteration: 365701
loss: 1.0218709707260132,grad_norm: 0.8886388320945214, iteration: 365702
loss: 1.0022656917572021,grad_norm: 0.8831945962145135, iteration: 365703
loss: 0.9853886365890503,grad_norm: 0.924290226925634, iteration: 365704
loss: 0.9809543490409851,grad_norm: 0.8153477374974498, iteration: 365705
loss: 0.9812443852424622,grad_norm: 0.9999990207632292, iteration: 365706
loss: 1.0131696462631226,grad_norm: 0.8227913015734042, iteration: 365707
loss: 0.9545783400535583,grad_norm: 0.9234489722157625, iteration: 365708
loss: 1.0315120220184326,grad_norm: 0.8013238310639484, iteration: 365709
loss: 0.9883987903594971,grad_norm: 0.9999992947597959, iteration: 365710
loss: 1.0634257793426514,grad_norm: 0.9999999665774536, iteration: 365711
loss: 0.9760575890541077,grad_norm: 0.9999994124473536, iteration: 365712
loss: 0.9807802438735962,grad_norm: 0.8347457438685703, iteration: 365713
loss: 0.9764901995658875,grad_norm: 0.8943747925526516, iteration: 365714
loss: 1.0341209173202515,grad_norm: 0.9080844567407025, iteration: 365715
loss: 0.9824097752571106,grad_norm: 0.6971421195456394, iteration: 365716
loss: 0.9815073609352112,grad_norm: 0.8555783417873685, iteration: 365717
loss: 1.0323694944381714,grad_norm: 0.793563122676717, iteration: 365718
loss: 1.002716064453125,grad_norm: 0.8949087069365133, iteration: 365719
loss: 1.043178677558899,grad_norm: 0.9999992765144293, iteration: 365720
loss: 1.036591649055481,grad_norm: 0.999999896743321, iteration: 365721
loss: 0.995448112487793,grad_norm: 0.9999991282069915, iteration: 365722
loss: 0.9977131485939026,grad_norm: 0.8189637521253499, iteration: 365723
loss: 0.989696204662323,grad_norm: 0.8569125217359991, iteration: 365724
loss: 0.9784963130950928,grad_norm: 0.7220990222871991, iteration: 365725
loss: 0.9580251574516296,grad_norm: 0.9675355460644713, iteration: 365726
loss: 1.0194172859191895,grad_norm: 0.8747604716920833, iteration: 365727
loss: 0.9976872205734253,grad_norm: 0.7960622821538329, iteration: 365728
loss: 1.0130232572555542,grad_norm: 0.7875948325487419, iteration: 365729
loss: 0.9818024039268494,grad_norm: 0.9999996270705838, iteration: 365730
loss: 0.9908701181411743,grad_norm: 0.9999992255369784, iteration: 365731
loss: 1.0457216501235962,grad_norm: 0.8921455262302808, iteration: 365732
loss: 0.9954872727394104,grad_norm: 0.7351619701415003, iteration: 365733
loss: 0.9771096110343933,grad_norm: 0.9120941450165498, iteration: 365734
loss: 0.9862529635429382,grad_norm: 0.8605684253183283, iteration: 365735
loss: 1.0033317804336548,grad_norm: 0.9043745292253094, iteration: 365736
loss: 1.0273897647857666,grad_norm: 0.7216306982323376, iteration: 365737
loss: 1.0331172943115234,grad_norm: 0.9999996641316955, iteration: 365738
loss: 0.9723871946334839,grad_norm: 0.7196278436482734, iteration: 365739
loss: 0.9941825866699219,grad_norm: 0.9999991748794361, iteration: 365740
loss: 0.9633909463882446,grad_norm: 0.8502253986403638, iteration: 365741
loss: 1.0000759363174438,grad_norm: 0.7906106472120352, iteration: 365742
loss: 0.9825794696807861,grad_norm: 0.7593480729468721, iteration: 365743
loss: 1.0162054300308228,grad_norm: 0.9193640054567921, iteration: 365744
loss: 0.9519863128662109,grad_norm: 0.8083033417006454, iteration: 365745
loss: 0.9779378771781921,grad_norm: 0.653473095417854, iteration: 365746
loss: 0.9833753108978271,grad_norm: 0.999999788466049, iteration: 365747
loss: 1.0095715522766113,grad_norm: 0.7055105966624065, iteration: 365748
loss: 1.009008526802063,grad_norm: 0.852358037296089, iteration: 365749
loss: 0.9817384481430054,grad_norm: 0.8701989109866477, iteration: 365750
loss: 1.027246356010437,grad_norm: 0.9999991663660714, iteration: 365751
loss: 1.0390697717666626,grad_norm: 0.8553354899442625, iteration: 365752
loss: 1.0031681060791016,grad_norm: 0.6396163239296467, iteration: 365753
loss: 1.0397580862045288,grad_norm: 0.8140949231525112, iteration: 365754
loss: 1.0523515939712524,grad_norm: 0.8171813530751576, iteration: 365755
loss: 1.1159697771072388,grad_norm: 0.9999999531345096, iteration: 365756
loss: 1.0279964208602905,grad_norm: 0.9999990983379311, iteration: 365757
loss: 1.0106370449066162,grad_norm: 0.9878985821972524, iteration: 365758
loss: 1.0111385583877563,grad_norm: 0.7680226480088348, iteration: 365759
loss: 1.0332742929458618,grad_norm: 0.8033577665438356, iteration: 365760
loss: 0.9805444478988647,grad_norm: 0.7589606098374345, iteration: 365761
loss: 1.0794166326522827,grad_norm: 1.0000000400700708, iteration: 365762
loss: 0.9999067783355713,grad_norm: 0.9146951646537624, iteration: 365763
loss: 1.0165802240371704,grad_norm: 0.889675124887785, iteration: 365764
loss: 0.9886503219604492,grad_norm: 0.8053237485058943, iteration: 365765
loss: 0.9791763424873352,grad_norm: 0.7705656711169269, iteration: 365766
loss: 1.0035568475723267,grad_norm: 0.8501159062290495, iteration: 365767
loss: 1.0688114166259766,grad_norm: 0.9999997227675145, iteration: 365768
loss: 1.026628017425537,grad_norm: 0.8132630810897531, iteration: 365769
loss: 1.0030012130737305,grad_norm: 0.8956214188012488, iteration: 365770
loss: 1.071590781211853,grad_norm: 0.8985865832746692, iteration: 365771
loss: 0.98939049243927,grad_norm: 0.7095220471810103, iteration: 365772
loss: 0.9821706414222717,grad_norm: 0.9129221820040524, iteration: 365773
loss: 0.9826169013977051,grad_norm: 0.7096906931752409, iteration: 365774
loss: 0.9987502694129944,grad_norm: 0.99999912716698, iteration: 365775
loss: 0.9432957768440247,grad_norm: 0.5760026034985983, iteration: 365776
loss: 0.9715745449066162,grad_norm: 0.7169997014959372, iteration: 365777
loss: 1.014654517173767,grad_norm: 0.9999992382125532, iteration: 365778
loss: 0.9989878535270691,grad_norm: 0.8724864697848949, iteration: 365779
loss: 1.0292446613311768,grad_norm: 0.9780409188029977, iteration: 365780
loss: 0.9479244947433472,grad_norm: 0.8155879035905991, iteration: 365781
loss: 0.9951732158660889,grad_norm: 0.7897766085368416, iteration: 365782
loss: 0.9918572306632996,grad_norm: 0.9457144320143165, iteration: 365783
loss: 1.0476560592651367,grad_norm: 0.9999996102786274, iteration: 365784
loss: 0.9734969139099121,grad_norm: 0.6593031419723644, iteration: 365785
loss: 1.011382818222046,grad_norm: 0.9584807160444744, iteration: 365786
loss: 1.010668158531189,grad_norm: 0.9999998411592455, iteration: 365787
loss: 0.9773223996162415,grad_norm: 0.7237224019078359, iteration: 365788
loss: 0.993659257888794,grad_norm: 0.7198125773113807, iteration: 365789
loss: 1.0400476455688477,grad_norm: 0.8604313646072431, iteration: 365790
loss: 1.0142946243286133,grad_norm: 0.8855118021609097, iteration: 365791
loss: 1.0129541158676147,grad_norm: 0.7985747892614026, iteration: 365792
loss: 0.9994848370552063,grad_norm: 0.8848616597606653, iteration: 365793
loss: 1.0176619291305542,grad_norm: 0.7403011260975866, iteration: 365794
loss: 0.9931371212005615,grad_norm: 0.7815956622389738, iteration: 365795
loss: 0.9834697246551514,grad_norm: 0.7693863727414875, iteration: 365796
loss: 1.0044288635253906,grad_norm: 0.8044739095682921, iteration: 365797
loss: 0.9774565100669861,grad_norm: 0.7799949929465257, iteration: 365798
loss: 1.007588505744934,grad_norm: 0.765371018079208, iteration: 365799
loss: 0.9689769148826599,grad_norm: 0.7341428916957665, iteration: 365800
loss: 1.014418125152588,grad_norm: 0.6874622843149867, iteration: 365801
loss: 1.014650583267212,grad_norm: 0.9999992361995668, iteration: 365802
loss: 1.0030516386032104,grad_norm: 0.7391841430600786, iteration: 365803
loss: 0.9834128022193909,grad_norm: 0.678203242768818, iteration: 365804
loss: 1.0488883256912231,grad_norm: 0.9999992536139384, iteration: 365805
loss: 1.0037126541137695,grad_norm: 0.7385221167626262, iteration: 365806
loss: 1.0229088068008423,grad_norm: 0.9258956888704545, iteration: 365807
loss: 1.010741949081421,grad_norm: 0.9695971797344969, iteration: 365808
loss: 0.9993130564689636,grad_norm: 0.8973859003917418, iteration: 365809
loss: 1.0453697443008423,grad_norm: 0.796115983776268, iteration: 365810
loss: 1.0307564735412598,grad_norm: 0.9999998778443652, iteration: 365811
loss: 1.0078421831130981,grad_norm: 0.9999995819587606, iteration: 365812
loss: 1.0762627124786377,grad_norm: 0.9999997298374866, iteration: 365813
loss: 0.9632276296615601,grad_norm: 0.9999996357330942, iteration: 365814
loss: 1.0128861665725708,grad_norm: 0.8912309505325706, iteration: 365815
loss: 1.1692728996276855,grad_norm: 0.9452506896533471, iteration: 365816
loss: 1.0162849426269531,grad_norm: 0.8015604880119488, iteration: 365817
loss: 0.9859465956687927,grad_norm: 0.7568931450929274, iteration: 365818
loss: 1.0017393827438354,grad_norm: 0.99999923670782, iteration: 365819
loss: 1.0138221979141235,grad_norm: 0.7570547356374309, iteration: 365820
loss: 0.9807770252227783,grad_norm: 0.675248596442914, iteration: 365821
loss: 0.9878652691841125,grad_norm: 0.8148694093577552, iteration: 365822
loss: 0.9774214029312134,grad_norm: 0.6951873775459175, iteration: 365823
loss: 1.0087546110153198,grad_norm: 0.790396863730843, iteration: 365824
loss: 1.080552101135254,grad_norm: 0.9999998178557259, iteration: 365825
loss: 0.9886390566825867,grad_norm: 0.999999840983649, iteration: 365826
loss: 1.03477942943573,grad_norm: 0.9999991993979678, iteration: 365827
loss: 1.012257695198059,grad_norm: 0.9999998332721384, iteration: 365828
loss: 1.03166925907135,grad_norm: 0.8756476293185116, iteration: 365829
loss: 1.0434156656265259,grad_norm: 0.999999552731329, iteration: 365830
loss: 0.9776800274848938,grad_norm: 0.7414859144967669, iteration: 365831
loss: 0.9557508826255798,grad_norm: 0.6602278602915311, iteration: 365832
loss: 1.0315773487091064,grad_norm: 0.7629104246547797, iteration: 365833
loss: 1.1170742511749268,grad_norm: 0.8826438041771626, iteration: 365834
loss: 1.0470845699310303,grad_norm: 0.9999991262323237, iteration: 365835
loss: 0.9950881600379944,grad_norm: 0.8385836586463228, iteration: 365836
loss: 1.053321361541748,grad_norm: 0.9999991355406469, iteration: 365837
loss: 1.0123140811920166,grad_norm: 0.9999991707051169, iteration: 365838
loss: 1.0220445394515991,grad_norm: 0.8560785450284859, iteration: 365839
loss: 1.002347707748413,grad_norm: 0.8347607599387766, iteration: 365840
loss: 0.969828724861145,grad_norm: 0.8016446619496241, iteration: 365841
loss: 1.0114084482192993,grad_norm: 0.9999999249257495, iteration: 365842
loss: 0.9989712238311768,grad_norm: 0.9074701079907569, iteration: 365843
loss: 0.9994863271713257,grad_norm: 0.7479660736198682, iteration: 365844
loss: 0.9837772250175476,grad_norm: 0.9190448231934477, iteration: 365845
loss: 1.0137670040130615,grad_norm: 0.8692761570228548, iteration: 365846
loss: 1.0215188264846802,grad_norm: 0.99999963222663, iteration: 365847
loss: 0.975127100944519,grad_norm: 0.9999997571955856, iteration: 365848
loss: 1.031667947769165,grad_norm: 0.9999991193844464, iteration: 365849
loss: 0.9869480729103088,grad_norm: 0.9419433485599514, iteration: 365850
loss: 1.0390551090240479,grad_norm: 0.7051054200551949, iteration: 365851
loss: 0.9992850422859192,grad_norm: 0.7537020266340271, iteration: 365852
loss: 1.0259132385253906,grad_norm: 0.8120199438559075, iteration: 365853
loss: 1.030380368232727,grad_norm: 0.848325938744663, iteration: 365854
loss: 1.007932186126709,grad_norm: 0.8553133595019781, iteration: 365855
loss: 1.013812780380249,grad_norm: 0.8024054703467025, iteration: 365856
loss: 1.0555487871170044,grad_norm: 0.9999993834155889, iteration: 365857
loss: 0.9857614040374756,grad_norm: 0.9999996993121656, iteration: 365858
loss: 0.9932059645652771,grad_norm: 0.841160878802679, iteration: 365859
loss: 0.9815616011619568,grad_norm: 0.6709766496706683, iteration: 365860
loss: 1.0344575643539429,grad_norm: 0.914824963547008, iteration: 365861
loss: 1.062938928604126,grad_norm: 0.999999887080169, iteration: 365862
loss: 0.9712948799133301,grad_norm: 0.8170408102142237, iteration: 365863
loss: 1.0289616584777832,grad_norm: 0.8329102288626098, iteration: 365864
loss: 1.0112419128417969,grad_norm: 0.9496724161275466, iteration: 365865
loss: 0.9937604069709778,grad_norm: 0.9081966264419211, iteration: 365866
loss: 1.0244240760803223,grad_norm: 0.929469539946861, iteration: 365867
loss: 1.001787543296814,grad_norm: 0.9032831868090051, iteration: 365868
loss: 1.0161149501800537,grad_norm: 0.8292645011237327, iteration: 365869
loss: 0.9993770718574524,grad_norm: 0.7627739803206708, iteration: 365870
loss: 0.9462847113609314,grad_norm: 0.7647061187050285, iteration: 365871
loss: 0.9711220860481262,grad_norm: 0.8533709118530998, iteration: 365872
loss: 0.9901241660118103,grad_norm: 0.7613466796936577, iteration: 365873
loss: 1.0040448904037476,grad_norm: 0.9999992355551293, iteration: 365874
loss: 0.9978763461112976,grad_norm: 0.8344528496137682, iteration: 365875
loss: 0.9957631826400757,grad_norm: 0.7528083125934485, iteration: 365876
loss: 0.9844655990600586,grad_norm: 0.8264036231010777, iteration: 365877
loss: 0.9526388645172119,grad_norm: 0.8397506580762093, iteration: 365878
loss: 1.0371341705322266,grad_norm: 0.9999990860100028, iteration: 365879
loss: 0.9836801290512085,grad_norm: 0.7500737300470909, iteration: 365880
loss: 1.0110771656036377,grad_norm: 0.9999992262728472, iteration: 365881
loss: 0.979300320148468,grad_norm: 0.8086983167986468, iteration: 365882
loss: 0.9601348638534546,grad_norm: 0.7658339730148646, iteration: 365883
loss: 1.00419282913208,grad_norm: 0.8506546951805248, iteration: 365884
loss: 0.9747921228408813,grad_norm: 0.8081566576484587, iteration: 365885
loss: 1.072502613067627,grad_norm: 0.9999995629141687, iteration: 365886
loss: 1.050487995147705,grad_norm: 0.9818248512390726, iteration: 365887
loss: 1.027185082435608,grad_norm: 0.7027945946789951, iteration: 365888
loss: 0.9853176474571228,grad_norm: 0.7711902918593355, iteration: 365889
loss: 1.0028831958770752,grad_norm: 0.8630712843756221, iteration: 365890
loss: 1.017578125,grad_norm: 0.9999990922862372, iteration: 365891
loss: 0.9918573498725891,grad_norm: 0.738413391398338, iteration: 365892
loss: 1.0114909410476685,grad_norm: 0.8567145404635202, iteration: 365893
loss: 1.0328056812286377,grad_norm: 0.8720592194504163, iteration: 365894
loss: 0.9989620447158813,grad_norm: 0.816190178749534, iteration: 365895
loss: 1.0029635429382324,grad_norm: 0.6791067582354028, iteration: 365896
loss: 1.0340759754180908,grad_norm: 0.9999994387202501, iteration: 365897
loss: 1.0045645236968994,grad_norm: 0.8633604766886716, iteration: 365898
loss: 0.9989502429962158,grad_norm: 0.8614901771203111, iteration: 365899
loss: 1.0222305059432983,grad_norm: 0.7259545215538783, iteration: 365900
loss: 1.0141741037368774,grad_norm: 0.7627536942460592, iteration: 365901
loss: 0.9852620363235474,grad_norm: 0.8956086542821359, iteration: 365902
loss: 0.9902772903442383,grad_norm: 0.7952485702803377, iteration: 365903
loss: 0.9770958423614502,grad_norm: 0.7719389478977049, iteration: 365904
loss: 0.9694899916648865,grad_norm: 0.7700252823511178, iteration: 365905
loss: 1.0346593856811523,grad_norm: 0.9650135603044508, iteration: 365906
loss: 1.0204249620437622,grad_norm: 0.8089161130606287, iteration: 365907
loss: 0.9963933229446411,grad_norm: 0.7627353865675011, iteration: 365908
loss: 1.0000439882278442,grad_norm: 0.8128847568338462, iteration: 365909
loss: 1.0469163656234741,grad_norm: 0.8742932105996373, iteration: 365910
loss: 0.9974812269210815,grad_norm: 0.8880199772369219, iteration: 365911
loss: 0.9900205135345459,grad_norm: 0.7908235315111576, iteration: 365912
loss: 1.0005663633346558,grad_norm: 0.880572269476067, iteration: 365913
loss: 0.9658612012863159,grad_norm: 0.8538733716228207, iteration: 365914
loss: 1.019643783569336,grad_norm: 0.7228886143356384, iteration: 365915
loss: 1.012147307395935,grad_norm: 0.7147794992852556, iteration: 365916
loss: 1.0048519372940063,grad_norm: 0.840398658242835, iteration: 365917
loss: 0.9845094084739685,grad_norm: 0.7696522655024006, iteration: 365918
loss: 0.9869757890701294,grad_norm: 0.8851736391566818, iteration: 365919
loss: 1.0138262510299683,grad_norm: 0.8588753247126721, iteration: 365920
loss: 1.0389549732208252,grad_norm: 0.8580169096514475, iteration: 365921
loss: 0.9828433990478516,grad_norm: 0.906440732468404, iteration: 365922
loss: 0.975001335144043,grad_norm: 0.8959881382430185, iteration: 365923
loss: 0.9979746341705322,grad_norm: 0.8769297886413255, iteration: 365924
loss: 0.9900704622268677,grad_norm: 0.8125724042252699, iteration: 365925
loss: 0.962596595287323,grad_norm: 0.7150192544087822, iteration: 365926
loss: 1.027346134185791,grad_norm: 0.8524225271894752, iteration: 365927
loss: 1.0144548416137695,grad_norm: 0.8278534724891763, iteration: 365928
loss: 1.1450022459030151,grad_norm: 0.9275839340486015, iteration: 365929
loss: 1.0062220096588135,grad_norm: 0.8242923759570806, iteration: 365930
loss: 1.0067317485809326,grad_norm: 0.6960912343407104, iteration: 365931
loss: 0.981597900390625,grad_norm: 0.9287377675982014, iteration: 365932
loss: 1.0384360551834106,grad_norm: 0.9999997684248787, iteration: 365933
loss: 0.9843576550483704,grad_norm: 0.7071974605086652, iteration: 365934
loss: 1.0119659900665283,grad_norm: 0.9069701889431845, iteration: 365935
loss: 1.0010541677474976,grad_norm: 0.7711784917606922, iteration: 365936
loss: 0.9801484942436218,grad_norm: 0.9999993311562848, iteration: 365937
loss: 1.0139715671539307,grad_norm: 0.9072315021686882, iteration: 365938
loss: 0.9939297437667847,grad_norm: 0.7460118547795948, iteration: 365939
loss: 1.0619460344314575,grad_norm: 0.9999991362962795, iteration: 365940
loss: 0.9895241260528564,grad_norm: 0.7623446225204498, iteration: 365941
loss: 0.951162576675415,grad_norm: 0.7957415895786394, iteration: 365942
loss: 1.0252752304077148,grad_norm: 0.8814020144729526, iteration: 365943
loss: 0.9954786896705627,grad_norm: 0.809995451147843, iteration: 365944
loss: 1.0178745985031128,grad_norm: 0.9999997255497544, iteration: 365945
loss: 1.1067355871200562,grad_norm: 0.9999998759113992, iteration: 365946
loss: 1.0056573152542114,grad_norm: 0.9015508486394318, iteration: 365947
loss: 1.0464857816696167,grad_norm: 0.7905379172379355, iteration: 365948
loss: 1.0155727863311768,grad_norm: 0.9999999386641338, iteration: 365949
loss: 1.0098003149032593,grad_norm: 0.8470002304108735, iteration: 365950
loss: 0.9731281399726868,grad_norm: 0.9571759565120668, iteration: 365951
loss: 0.9915834665298462,grad_norm: 0.9061935906515175, iteration: 365952
loss: 0.9772290587425232,grad_norm: 0.8361763445165551, iteration: 365953
loss: 1.0245637893676758,grad_norm: 0.7310509537478395, iteration: 365954
loss: 0.9673967957496643,grad_norm: 0.9999993316859164, iteration: 365955
loss: 1.0196348428726196,grad_norm: 0.9023034304852924, iteration: 365956
loss: 1.0122387409210205,grad_norm: 0.9999991569249824, iteration: 365957
loss: 0.9724079966545105,grad_norm: 0.7385641624232419, iteration: 365958
loss: 1.18446683883667,grad_norm: 0.9999998152360292, iteration: 365959
loss: 1.0152068138122559,grad_norm: 0.8005379419273004, iteration: 365960
loss: 1.0143980979919434,grad_norm: 0.7442186265935123, iteration: 365961
loss: 1.0236369371414185,grad_norm: 0.9999994974284101, iteration: 365962
loss: 1.0419259071350098,grad_norm: 0.9757467565137529, iteration: 365963
loss: 0.9617970585823059,grad_norm: 0.8769925962389187, iteration: 365964
loss: 0.9947288632392883,grad_norm: 0.8561971874571191, iteration: 365965
loss: 0.9775089025497437,grad_norm: 0.9999991564784243, iteration: 365966
loss: 0.9918806552886963,grad_norm: 0.8010619715224648, iteration: 365967
loss: 1.024804949760437,grad_norm: 0.9337857092214848, iteration: 365968
loss: 1.0095653533935547,grad_norm: 0.9543767867221493, iteration: 365969
loss: 1.0683345794677734,grad_norm: 0.9527013940569318, iteration: 365970
loss: 1.0733349323272705,grad_norm: 0.9999991409795024, iteration: 365971
loss: 1.0318701267242432,grad_norm: 0.8993115528005471, iteration: 365972
loss: 1.008833885192871,grad_norm: 0.6671399625516783, iteration: 365973
loss: 0.9779713153839111,grad_norm: 0.906640561081852, iteration: 365974
loss: 0.9896607398986816,grad_norm: 0.8287051557639663, iteration: 365975
loss: 1.0557039976119995,grad_norm: 0.9830924395362224, iteration: 365976
loss: 0.986572265625,grad_norm: 0.9999997586880994, iteration: 365977
loss: 1.0033951997756958,grad_norm: 0.8757188500697468, iteration: 365978
loss: 1.0022227764129639,grad_norm: 0.8724494067309961, iteration: 365979
loss: 0.9895586371421814,grad_norm: 0.833479288931852, iteration: 365980
loss: 1.1454905271530151,grad_norm: 0.9999999559586835, iteration: 365981
loss: 1.0138376951217651,grad_norm: 0.6806315360691915, iteration: 365982
loss: 1.089029312133789,grad_norm: 0.9999991874148396, iteration: 365983
loss: 1.035023808479309,grad_norm: 0.9999998702184797, iteration: 365984
loss: 0.9913583993911743,grad_norm: 0.7963528483197737, iteration: 365985
loss: 0.9949254989624023,grad_norm: 0.7915267781631631, iteration: 365986
loss: 1.0425384044647217,grad_norm: 0.9999995700549515, iteration: 365987
loss: 0.998877227306366,grad_norm: 0.6821945819111247, iteration: 365988
loss: 1.052442193031311,grad_norm: 0.7999686099576617, iteration: 365989
loss: 0.9768244028091431,grad_norm: 0.8128274772032148, iteration: 365990
loss: 0.9866344928741455,grad_norm: 0.9999992412240039, iteration: 365991
loss: 0.9981575608253479,grad_norm: 0.8624588872045571, iteration: 365992
loss: 0.9645289778709412,grad_norm: 0.9999991944825136, iteration: 365993
loss: 1.1525920629501343,grad_norm: 0.9999994147331546, iteration: 365994
loss: 1.0198296308517456,grad_norm: 0.8503041213623397, iteration: 365995
loss: 1.013622522354126,grad_norm: 0.7680462486639466, iteration: 365996
loss: 0.9956051111221313,grad_norm: 0.8310944988309181, iteration: 365997
loss: 1.037765383720398,grad_norm: 0.8556798233796098, iteration: 365998
loss: 1.0477409362792969,grad_norm: 0.9999991040809492, iteration: 365999
loss: 0.9948705434799194,grad_norm: 0.8209659479323043, iteration: 366000
loss: 0.9501468539237976,grad_norm: 0.7749998105472016, iteration: 366001
loss: 0.948573112487793,grad_norm: 0.9999990165988615, iteration: 366002
loss: 1.0026254653930664,grad_norm: 0.8942141963272653, iteration: 366003
loss: 1.019362211227417,grad_norm: 0.9999997448581933, iteration: 366004
loss: 0.9895877242088318,grad_norm: 0.9999994622594643, iteration: 366005
loss: 1.03226900100708,grad_norm: 0.8241379972767944, iteration: 366006
loss: 1.032026767730713,grad_norm: 0.7957576247158609, iteration: 366007
loss: 0.9998749494552612,grad_norm: 0.8487235637015462, iteration: 366008
loss: 0.9823505282402039,grad_norm: 0.9847094751982344, iteration: 366009
loss: 1.0884844064712524,grad_norm: 0.9396400156744148, iteration: 366010
loss: 1.0301319360733032,grad_norm: 0.7226544384765456, iteration: 366011
loss: 1.022325038909912,grad_norm: 0.8757373107163784, iteration: 366012
loss: 1.051904320716858,grad_norm: 0.9999996104136581, iteration: 366013
loss: 1.0682306289672852,grad_norm: 0.9999991210238961, iteration: 366014
loss: 0.9821279048919678,grad_norm: 0.8339710861364764, iteration: 366015
loss: 1.0200152397155762,grad_norm: 0.8265995758796026, iteration: 366016
loss: 1.1424365043640137,grad_norm: 0.9999994057392319, iteration: 366017
loss: 1.0278202295303345,grad_norm: 0.8430661860375976, iteration: 366018
loss: 0.9956904053688049,grad_norm: 0.7659877778864201, iteration: 366019
loss: 1.0459954738616943,grad_norm: 0.8147763401401911, iteration: 366020
loss: 0.9925004839897156,grad_norm: 0.7906442805882976, iteration: 366021
loss: 1.0017671585083008,grad_norm: 0.852911910639156, iteration: 366022
loss: 0.9893709421157837,grad_norm: 0.8392644412135887, iteration: 366023
loss: 1.0406303405761719,grad_norm: 0.8636106412912148, iteration: 366024
loss: 1.0340951681137085,grad_norm: 0.9809958321989921, iteration: 366025
loss: 0.9995115995407104,grad_norm: 0.7494916204958973, iteration: 366026
loss: 1.0538744926452637,grad_norm: 0.994179414739538, iteration: 366027
loss: 1.0832823514938354,grad_norm: 0.9999998180154634, iteration: 366028
loss: 1.0224331617355347,grad_norm: 0.9500850646246817, iteration: 366029
loss: 1.118897795677185,grad_norm: 0.9999994870183092, iteration: 366030
loss: 1.0353355407714844,grad_norm: 0.9999991929011275, iteration: 366031
loss: 0.9771460890769958,grad_norm: 0.9999992122681197, iteration: 366032
loss: 1.031284213066101,grad_norm: 1.0000000202740917, iteration: 366033
loss: 1.0784000158309937,grad_norm: 0.9999997311854966, iteration: 366034
loss: 1.0267469882965088,grad_norm: 0.9999999039891063, iteration: 366035
loss: 0.9756476283073425,grad_norm: 0.8771731574504932, iteration: 366036
loss: 0.9752881526947021,grad_norm: 0.8680032525483354, iteration: 366037
loss: 1.0236096382141113,grad_norm: 0.9866371674165927, iteration: 366038
loss: 1.0155922174453735,grad_norm: 0.817624653270868, iteration: 366039
loss: 1.052748203277588,grad_norm: 0.9999996779016538, iteration: 366040
loss: 0.9750762581825256,grad_norm: 0.855383713741928, iteration: 366041
loss: 1.0686395168304443,grad_norm: 0.9084265136965082, iteration: 366042
loss: 0.9954016804695129,grad_norm: 0.7743545042915695, iteration: 366043
loss: 0.9827131032943726,grad_norm: 0.6899279695975292, iteration: 366044
loss: 1.0311599969863892,grad_norm: 0.686167076674263, iteration: 366045
loss: 0.9784102439880371,grad_norm: 0.7363173035718631, iteration: 366046
loss: 1.025265097618103,grad_norm: 0.9999992641514093, iteration: 366047
loss: 1.068865180015564,grad_norm: 0.9999992692934957, iteration: 366048
loss: 1.0068261623382568,grad_norm: 0.6660992789465077, iteration: 366049
loss: 1.1138317584991455,grad_norm: 0.9999991300483771, iteration: 366050
loss: 1.026898741722107,grad_norm: 0.7725985330439992, iteration: 366051
loss: 0.984932541847229,grad_norm: 0.7998328622529626, iteration: 366052
loss: 1.0190532207489014,grad_norm: 0.8851203157802553, iteration: 366053
loss: 1.1276310682296753,grad_norm: 0.9999997035659202, iteration: 366054
loss: 1.0045011043548584,grad_norm: 0.9999997762037487, iteration: 366055
loss: 1.0595513582229614,grad_norm: 0.9999997665636063, iteration: 366056
loss: 1.001155972480774,grad_norm: 0.8134483362112562, iteration: 366057
loss: 1.0648771524429321,grad_norm: 0.9999990597103086, iteration: 366058
loss: 1.0222972631454468,grad_norm: 0.8123046153583758, iteration: 366059
loss: 0.9936556220054626,grad_norm: 0.9006302783087371, iteration: 366060
loss: 1.0909022092819214,grad_norm: 0.9799558120865364, iteration: 366061
loss: 1.0270441770553589,grad_norm: 0.9999998690427764, iteration: 366062
loss: 0.9675461649894714,grad_norm: 0.8186324428537024, iteration: 366063
loss: 1.0753165483474731,grad_norm: 0.999999046208669, iteration: 366064
loss: 1.0241963863372803,grad_norm: 0.7481553615864373, iteration: 366065
loss: 1.0592918395996094,grad_norm: 0.9999997907804986, iteration: 366066
loss: 1.0844957828521729,grad_norm: 1.0000000188927234, iteration: 366067
loss: 1.0187093019485474,grad_norm: 0.9063627958964444, iteration: 366068
loss: 0.9654390215873718,grad_norm: 0.9129430434077813, iteration: 366069
loss: 1.0643740892410278,grad_norm: 0.8944591422627293, iteration: 366070
loss: 1.0981879234313965,grad_norm: 0.9999994545358946, iteration: 366071
loss: 0.9812751412391663,grad_norm: 0.9999991656566228, iteration: 366072
loss: 0.9980536103248596,grad_norm: 0.877416279244346, iteration: 366073
loss: 1.177141785621643,grad_norm: 0.9999995005181721, iteration: 366074
loss: 1.0501405000686646,grad_norm: 0.816136083915212, iteration: 366075
loss: 1.0055713653564453,grad_norm: 0.7171249447443017, iteration: 366076
loss: 1.0222703218460083,grad_norm: 0.9685304325484198, iteration: 366077
loss: 1.166498064994812,grad_norm: 0.9999997936782399, iteration: 366078
loss: 0.9935437440872192,grad_norm: 0.782372155576642, iteration: 366079
loss: 0.9722524285316467,grad_norm: 0.9999990236137113, iteration: 366080
loss: 1.009937047958374,grad_norm: 0.8423523230956904, iteration: 366081
loss: 0.97135329246521,grad_norm: 0.6855999656669896, iteration: 366082
loss: 1.0199517011642456,grad_norm: 0.9999998511351131, iteration: 366083
loss: 1.1639485359191895,grad_norm: 0.9999996797953467, iteration: 366084
loss: 0.9714975357055664,grad_norm: 0.8278212775862634, iteration: 366085
loss: 0.9702359437942505,grad_norm: 0.7945066044725331, iteration: 366086
loss: 1.000442624092102,grad_norm: 0.9560911409358994, iteration: 366087
loss: 0.9898933172225952,grad_norm: 0.7515582805573281, iteration: 366088
loss: 0.9973416328430176,grad_norm: 0.8801389773608075, iteration: 366089
loss: 1.0252717733383179,grad_norm: 0.7402538194190112, iteration: 366090
loss: 1.0563907623291016,grad_norm: 0.9999998112680563, iteration: 366091
loss: 1.0508111715316772,grad_norm: 0.9762096011233967, iteration: 366092
loss: 0.9965229034423828,grad_norm: 0.7453511504236932, iteration: 366093
loss: 0.9654752016067505,grad_norm: 0.9354339300651657, iteration: 366094
loss: 0.9907520413398743,grad_norm: 0.7728187621135476, iteration: 366095
loss: 0.9980332255363464,grad_norm: 0.9999995028727404, iteration: 366096
loss: 1.095950961112976,grad_norm: 0.9999999061007767, iteration: 366097
loss: 1.096218228340149,grad_norm: 0.9999996561817505, iteration: 366098
loss: 0.9998663067817688,grad_norm: 0.8049395540504055, iteration: 366099
loss: 1.0319170951843262,grad_norm: 0.9999999227273669, iteration: 366100
loss: 0.9676201343536377,grad_norm: 0.8778113040356061, iteration: 366101
loss: 0.9797505140304565,grad_norm: 0.834080577805275, iteration: 366102
loss: 1.0070842504501343,grad_norm: 0.8935410397987185, iteration: 366103
loss: 1.0322784185409546,grad_norm: 0.9738153795002146, iteration: 366104
loss: 1.026581048965454,grad_norm: 0.9999991791417183, iteration: 366105
loss: 1.0130246877670288,grad_norm: 0.9999995471266173, iteration: 366106
loss: 1.043813943862915,grad_norm: 0.9999992979820332, iteration: 366107
loss: 1.0527859926223755,grad_norm: 0.9999990685857051, iteration: 366108
loss: 1.0361533164978027,grad_norm: 0.9999991866439837, iteration: 366109
loss: 1.0094448328018188,grad_norm: 0.9518418009118119, iteration: 366110
loss: 1.0644097328186035,grad_norm: 0.9334199930095664, iteration: 366111
loss: 0.989963173866272,grad_norm: 0.9084630678652597, iteration: 366112
loss: 1.0274395942687988,grad_norm: 0.7196437364456689, iteration: 366113
loss: 1.0168561935424805,grad_norm: 0.7500670372591247, iteration: 366114
loss: 1.050362229347229,grad_norm: 0.99999919146824, iteration: 366115
loss: 0.9825766086578369,grad_norm: 0.8543460792514034, iteration: 366116
loss: 1.0109944343566895,grad_norm: 0.9999995909024788, iteration: 366117
loss: 1.0152219533920288,grad_norm: 0.7804918687605149, iteration: 366118
loss: 0.9898189306259155,grad_norm: 0.9999993725147391, iteration: 366119
loss: 1.0163769721984863,grad_norm: 0.7531711711520659, iteration: 366120
loss: 1.0365357398986816,grad_norm: 0.8624902587744671, iteration: 366121
loss: 1.1223187446594238,grad_norm: 0.8939446533555669, iteration: 366122
loss: 1.0577976703643799,grad_norm: 0.8160585700864528, iteration: 366123
loss: 1.018228530883789,grad_norm: 0.9613338068612883, iteration: 366124
loss: 1.0043591260910034,grad_norm: 0.7329402087869972, iteration: 366125
loss: 1.033501148223877,grad_norm: 0.9762050817682946, iteration: 366126
loss: 0.9909494519233704,grad_norm: 0.9999989868766798, iteration: 366127
loss: 1.0452041625976562,grad_norm: 0.755351313811187, iteration: 366128
loss: 0.9657036066055298,grad_norm: 0.809288722599295, iteration: 366129
loss: 1.0186935663223267,grad_norm: 0.7549668468978019, iteration: 366130
loss: 0.9810780882835388,grad_norm: 0.7822924515523041, iteration: 366131
loss: 1.008271336555481,grad_norm: 0.9324872276360667, iteration: 366132
loss: 0.9868670701980591,grad_norm: 0.7736407054016298, iteration: 366133
loss: 1.013007640838623,grad_norm: 0.7449630600765439, iteration: 366134
loss: 1.0227047204971313,grad_norm: 0.7404737357428205, iteration: 366135
loss: 1.0275206565856934,grad_norm: 0.8770600071630498, iteration: 366136
loss: 0.9592229127883911,grad_norm: 0.8886375454163878, iteration: 366137
loss: 1.031610369682312,grad_norm: 0.9235422575977748, iteration: 366138
loss: 1.0407401323318481,grad_norm: 0.9856756587009299, iteration: 366139
loss: 0.9849433302879333,grad_norm: 0.729508396174902, iteration: 366140
loss: 1.055290699005127,grad_norm: 0.6677464859304031, iteration: 366141
loss: 1.026416301727295,grad_norm: 0.7184763993862692, iteration: 366142
loss: 0.9761531352996826,grad_norm: 0.7943642066599603, iteration: 366143
loss: 1.029767394065857,grad_norm: 0.8098040128377394, iteration: 366144
loss: 1.0044960975646973,grad_norm: 0.7816226578643881, iteration: 366145
loss: 1.0102555751800537,grad_norm: 0.743045928656622, iteration: 366146
loss: 0.9959753751754761,grad_norm: 0.8736394255829308, iteration: 366147
loss: 1.090983271598816,grad_norm: 0.9999992905431605, iteration: 366148
loss: 0.9789128303527832,grad_norm: 0.7350466431229922, iteration: 366149
loss: 0.9977082014083862,grad_norm: 0.9683732377713111, iteration: 366150
loss: 0.9811549186706543,grad_norm: 0.878585729710462, iteration: 366151
loss: 1.018772840499878,grad_norm: 0.8230178263456388, iteration: 366152
loss: 0.9936773777008057,grad_norm: 0.8904398046408794, iteration: 366153
loss: 0.9856489300727844,grad_norm: 0.8026482589413615, iteration: 366154
loss: 1.039358139038086,grad_norm: 0.9999999546232287, iteration: 366155
loss: 1.0211457014083862,grad_norm: 0.7652452052518913, iteration: 366156
loss: 1.0000120401382446,grad_norm: 0.8666440782761254, iteration: 366157
loss: 1.1276862621307373,grad_norm: 0.9999991235942719, iteration: 366158
loss: 1.0610986948013306,grad_norm: 0.9999994454394479, iteration: 366159
loss: 1.0157526731491089,grad_norm: 0.8249470646254554, iteration: 366160
loss: 1.1041098833084106,grad_norm: 0.999999080778795, iteration: 366161
loss: 0.9928691983222961,grad_norm: 0.7805612539857445, iteration: 366162
loss: 1.0323807001113892,grad_norm: 0.9999990115765099, iteration: 366163
loss: 0.9943246245384216,grad_norm: 0.9392119861705217, iteration: 366164
loss: 1.0330945253372192,grad_norm: 0.8883530785738466, iteration: 366165
loss: 0.989339292049408,grad_norm: 0.8093731105593049, iteration: 366166
loss: 1.0004265308380127,grad_norm: 0.8176378842837944, iteration: 366167
loss: 0.9642531275749207,grad_norm: 0.7547586964111497, iteration: 366168
loss: 1.0092592239379883,grad_norm: 0.8816556384520495, iteration: 366169
loss: 1.0443243980407715,grad_norm: 0.74710566795773, iteration: 366170
loss: 1.0000280141830444,grad_norm: 0.8970904067471582, iteration: 366171
loss: 0.987878680229187,grad_norm: 0.8594210387620655, iteration: 366172
loss: 1.0269473791122437,grad_norm: 0.9999992794507272, iteration: 366173
loss: 1.0733623504638672,grad_norm: 0.9999996718671188, iteration: 366174
loss: 1.0158590078353882,grad_norm: 0.9883516128139824, iteration: 366175
loss: 1.0205409526824951,grad_norm: 0.7570871514720645, iteration: 366176
loss: 1.0071145296096802,grad_norm: 0.8819683866099315, iteration: 366177
loss: 1.0332688093185425,grad_norm: 0.8679584687016069, iteration: 366178
loss: 0.9852936267852783,grad_norm: 0.9009804329849872, iteration: 366179
loss: 1.0293430089950562,grad_norm: 0.9555312277219392, iteration: 366180
loss: 1.084288239479065,grad_norm: 0.9999992669166794, iteration: 366181
loss: 1.0044533014297485,grad_norm: 0.9999997005379256, iteration: 366182
loss: 1.1154046058654785,grad_norm: 0.8693660052638865, iteration: 366183
loss: 0.9791573286056519,grad_norm: 0.8061605806034087, iteration: 366184
loss: 1.0267126560211182,grad_norm: 0.8598900489433371, iteration: 366185
loss: 1.0208240747451782,grad_norm: 0.9999997083552321, iteration: 366186
loss: 1.008905291557312,grad_norm: 0.9128398798009102, iteration: 366187
loss: 0.9865306615829468,grad_norm: 0.9360421652532268, iteration: 366188
loss: 1.0575530529022217,grad_norm: 0.875876483361078, iteration: 366189
loss: 0.9657854437828064,grad_norm: 0.7691330876022393, iteration: 366190
loss: 0.9753362536430359,grad_norm: 0.7468887415697885, iteration: 366191
loss: 0.9889352917671204,grad_norm: 0.713864284444713, iteration: 366192
loss: 0.9678532481193542,grad_norm: 0.8840935630421738, iteration: 366193
loss: 1.0407942533493042,grad_norm: 0.9999991290577547, iteration: 366194
loss: 0.9600375890731812,grad_norm: 1.0000000458152702, iteration: 366195
loss: 1.059309482574463,grad_norm: 0.9360465672087676, iteration: 366196
loss: 1.0019097328186035,grad_norm: 0.9999996765850268, iteration: 366197
loss: 1.005196452140808,grad_norm: 0.7448463468235037, iteration: 366198
loss: 1.0278472900390625,grad_norm: 0.7259370509010792, iteration: 366199
loss: 1.0444694757461548,grad_norm: 0.9999999404881235, iteration: 366200
loss: 0.9636381268501282,grad_norm: 0.9272000542248672, iteration: 366201
loss: 0.9910810589790344,grad_norm: 0.9999994176279405, iteration: 366202
loss: 0.9975227117538452,grad_norm: 0.9546483481733902, iteration: 366203
loss: 1.0731210708618164,grad_norm: 0.999999122650742, iteration: 366204
loss: 0.9739564657211304,grad_norm: 0.9376350451563581, iteration: 366205
loss: 1.0525109767913818,grad_norm: 0.99999931135572, iteration: 366206
loss: 1.0506982803344727,grad_norm: 0.9999995378477374, iteration: 366207
loss: 1.0178292989730835,grad_norm: 0.7352408649713917, iteration: 366208
loss: 1.0096336603164673,grad_norm: 0.9102175475267212, iteration: 366209
loss: 1.0502982139587402,grad_norm: 0.9999995276083523, iteration: 366210
loss: 1.0311055183410645,grad_norm: 0.9147701637697453, iteration: 366211
loss: 1.0267407894134521,grad_norm: 0.8542270021980809, iteration: 366212
loss: 1.1204304695129395,grad_norm: 0.9999996488645803, iteration: 366213
loss: 1.1784871816635132,grad_norm: 0.9999999149331146, iteration: 366214
loss: 1.1720377206802368,grad_norm: 0.9999999466000039, iteration: 366215
loss: 0.9849528670310974,grad_norm: 0.9999992323552287, iteration: 366216
loss: 1.1095805168151855,grad_norm: 0.9999997868645228, iteration: 366217
loss: 1.0357494354248047,grad_norm: 0.7853149270567841, iteration: 366218
loss: 0.9933216571807861,grad_norm: 0.8690216658915606, iteration: 366219
loss: 0.9865686893463135,grad_norm: 0.9127195181367613, iteration: 366220
loss: 1.0265848636627197,grad_norm: 0.9999993106713594, iteration: 366221
loss: 1.0374170541763306,grad_norm: 0.8821087572858123, iteration: 366222
loss: 0.9839483499526978,grad_norm: 0.7896460273817243, iteration: 366223
loss: 0.9911267161369324,grad_norm: 0.8755928299061034, iteration: 366224
loss: 1.0244325399398804,grad_norm: 0.8565480783499051, iteration: 366225
loss: 1.0769338607788086,grad_norm: 0.9999990627358225, iteration: 366226
loss: 0.9910230040550232,grad_norm: 0.6692623978785515, iteration: 366227
loss: 1.0308964252471924,grad_norm: 0.9999994499163938, iteration: 366228
loss: 0.9748189449310303,grad_norm: 0.8707363386959633, iteration: 366229
loss: 0.9851349592208862,grad_norm: 0.749289786034492, iteration: 366230
loss: 1.019132375717163,grad_norm: 0.9999992869505372, iteration: 366231
loss: 1.0429291725158691,grad_norm: 0.9999991193051349, iteration: 366232
loss: 1.1010799407958984,grad_norm: 0.9425523285077729, iteration: 366233
loss: 0.9911471605300903,grad_norm: 0.7999636678514974, iteration: 366234
loss: 0.9772676825523376,grad_norm: 0.7186950339830918, iteration: 366235
loss: 1.0145193338394165,grad_norm: 0.9999993867230674, iteration: 366236
loss: 1.012657880783081,grad_norm: 0.6936133117136686, iteration: 366237
loss: 0.9843774437904358,grad_norm: 0.6706106215703972, iteration: 366238
loss: 1.0481305122375488,grad_norm: 0.7931712609838483, iteration: 366239
loss: 1.0150622129440308,grad_norm: 0.8018322780127475, iteration: 366240
loss: 1.1107332706451416,grad_norm: 0.9999991249924253, iteration: 366241
loss: 1.07277250289917,grad_norm: 0.8755932795047267, iteration: 366242
loss: 0.990851640701294,grad_norm: 0.7901131289928899, iteration: 366243
loss: 1.0925266742706299,grad_norm: 0.8244067135057594, iteration: 366244
loss: 1.0004727840423584,grad_norm: 0.6101784037866343, iteration: 366245
loss: 1.0070613622665405,grad_norm: 0.8607264427534995, iteration: 366246
loss: 1.0344128608703613,grad_norm: 0.8784216844514245, iteration: 366247
loss: 0.9995669722557068,grad_norm: 0.80267853133784, iteration: 366248
loss: 1.0537596940994263,grad_norm: 0.8468156021746166, iteration: 366249
loss: 0.9968910813331604,grad_norm: 0.6518024750277582, iteration: 366250
loss: 1.0526775121688843,grad_norm: 0.8451329898857901, iteration: 366251
loss: 0.9984488487243652,grad_norm: 0.832636014379988, iteration: 366252
loss: 1.032273769378662,grad_norm: 0.7860092694928728, iteration: 366253
loss: 0.9868285059928894,grad_norm: 0.8350504269508542, iteration: 366254
loss: 1.0342737436294556,grad_norm: 0.8143578690714897, iteration: 366255
loss: 0.9896466135978699,grad_norm: 0.8256068230164255, iteration: 366256
loss: 1.0669426918029785,grad_norm: 0.9999990422145681, iteration: 366257
loss: 0.9750553369522095,grad_norm: 0.8180858463697745, iteration: 366258
loss: 1.014930248260498,grad_norm: 0.8381258935700954, iteration: 366259
loss: 1.0064544677734375,grad_norm: 0.7016139533481348, iteration: 366260
loss: 0.9702455401420593,grad_norm: 0.7263493179433135, iteration: 366261
loss: 1.0108352899551392,grad_norm: 0.9029180684574815, iteration: 366262
loss: 0.9884836673736572,grad_norm: 0.8459728876564132, iteration: 366263
loss: 0.9913045763969421,grad_norm: 0.792191587887141, iteration: 366264
loss: 0.9723473787307739,grad_norm: 0.7316718432145294, iteration: 366265
loss: 1.0942975282669067,grad_norm: 0.9999998095520786, iteration: 366266
loss: 0.9881086945533752,grad_norm: 0.7095500847226942, iteration: 366267
loss: 1.0103561878204346,grad_norm: 0.8496735169346524, iteration: 366268
loss: 0.9985185265541077,grad_norm: 0.8388813989364052, iteration: 366269
loss: 0.9762668609619141,grad_norm: 0.9672745184933161, iteration: 366270
loss: 0.9997844696044922,grad_norm: 0.756164710721645, iteration: 366271
loss: 1.0817242860794067,grad_norm: 0.9999994983657576, iteration: 366272
loss: 0.9962764978408813,grad_norm: 0.8022432513876465, iteration: 366273
loss: 0.9864276051521301,grad_norm: 0.7882689501341043, iteration: 366274
loss: 0.9785284996032715,grad_norm: 0.7164260036615827, iteration: 366275
loss: 0.970672607421875,grad_norm: 0.8360953017989755, iteration: 366276
loss: 1.002943515777588,grad_norm: 0.7494430769346878, iteration: 366277
loss: 1.0191999673843384,grad_norm: 0.8524141397130433, iteration: 366278
loss: 1.0482203960418701,grad_norm: 0.7813349253455336, iteration: 366279
loss: 1.0183957815170288,grad_norm: 0.8143399621761245, iteration: 366280
loss: 1.0144908428192139,grad_norm: 0.8239429110632535, iteration: 366281
loss: 1.0259209871292114,grad_norm: 0.9116230216266576, iteration: 366282
loss: 1.0050538778305054,grad_norm: 0.7894861088192077, iteration: 366283
loss: 1.0273700952529907,grad_norm: 0.9999994267724841, iteration: 366284
loss: 1.0216975212097168,grad_norm: 0.8168541417677601, iteration: 366285
loss: 1.0143113136291504,grad_norm: 0.7928989422034302, iteration: 366286
loss: 0.9748949408531189,grad_norm: 0.9555545128425262, iteration: 366287
loss: 1.135170340538025,grad_norm: 0.8067589463964258, iteration: 366288
loss: 0.9894929528236389,grad_norm: 0.8239592441936423, iteration: 366289
loss: 1.035232424736023,grad_norm: 0.9999999465119805, iteration: 366290
loss: 0.9736989140510559,grad_norm: 0.858082599964035, iteration: 366291
loss: 1.006532073020935,grad_norm: 0.713301359721975, iteration: 366292
loss: 1.0105931758880615,grad_norm: 0.8766868175223778, iteration: 366293
loss: 1.0254693031311035,grad_norm: 0.8766768058113507, iteration: 366294
loss: 1.001231074333191,grad_norm: 0.9999995867201046, iteration: 366295
loss: 1.0351611375808716,grad_norm: 0.8293510843409666, iteration: 366296
loss: 1.0222264528274536,grad_norm: 0.8073196001733277, iteration: 366297
loss: 0.9948264360427856,grad_norm: 0.9999991516662124, iteration: 366298
loss: 1.0367341041564941,grad_norm: 0.9297923281402678, iteration: 366299
loss: 1.0101207494735718,grad_norm: 0.8745884284873576, iteration: 366300
loss: 0.9711365699768066,grad_norm: 0.7269097346058818, iteration: 366301
loss: 0.9907834529876709,grad_norm: 0.7953836623774204, iteration: 366302
loss: 0.9940354228019714,grad_norm: 0.8140752125606637, iteration: 366303
loss: 0.9864379167556763,grad_norm: 0.7691839493596779, iteration: 366304
loss: 1.022948145866394,grad_norm: 0.7743116957486256, iteration: 366305
loss: 1.0897011756896973,grad_norm: 0.9253816179713086, iteration: 366306
loss: 0.9710734486579895,grad_norm: 0.8155060549478534, iteration: 366307
loss: 1.0217218399047852,grad_norm: 0.9999995179043979, iteration: 366308
loss: 0.9980964660644531,grad_norm: 0.7609327815043448, iteration: 366309
loss: 0.9954324960708618,grad_norm: 0.6751715307171486, iteration: 366310
loss: 0.9822870492935181,grad_norm: 0.853498326726219, iteration: 366311
loss: 0.96156907081604,grad_norm: 0.886065830842658, iteration: 366312
loss: 1.0460916757583618,grad_norm: 0.9999991773034456, iteration: 366313
loss: 1.028923511505127,grad_norm: 0.8211272334727868, iteration: 366314
loss: 0.9487571120262146,grad_norm: 0.9460766922698077, iteration: 366315
loss: 1.000532627105713,grad_norm: 0.731445427857859, iteration: 366316
loss: 1.0117285251617432,grad_norm: 0.8832881791238546, iteration: 366317
loss: 1.0265674591064453,grad_norm: 0.8530237754894376, iteration: 366318
loss: 0.9693437218666077,grad_norm: 0.8178531312415115, iteration: 366319
loss: 0.9934748411178589,grad_norm: 0.711461947489818, iteration: 366320
loss: 1.005579948425293,grad_norm: 0.999999107777179, iteration: 366321
loss: 1.0088961124420166,grad_norm: 0.7183242876062863, iteration: 366322
loss: 1.0086228847503662,grad_norm: 0.7621519232760608, iteration: 366323
loss: 1.0247069597244263,grad_norm: 0.9999998457398435, iteration: 366324
loss: 1.0035263299942017,grad_norm: 0.9171256135084138, iteration: 366325
loss: 1.0015201568603516,grad_norm: 0.9659429466170983, iteration: 366326
loss: 1.069094181060791,grad_norm: 0.9999990472760589, iteration: 366327
loss: 1.00698721408844,grad_norm: 0.9385675223384803, iteration: 366328
loss: 1.0415176153182983,grad_norm: 0.9999992813011036, iteration: 366329
loss: 1.0011804103851318,grad_norm: 0.8017250533321101, iteration: 366330
loss: 1.0026265382766724,grad_norm: 0.6971090029039269, iteration: 366331
loss: 1.0889192819595337,grad_norm: 0.9999999431959794, iteration: 366332
loss: 1.0523024797439575,grad_norm: 0.9999991296412781, iteration: 366333
loss: 1.1911481618881226,grad_norm: 0.8554878533196926, iteration: 366334
loss: 1.0146924257278442,grad_norm: 0.7589130533483964, iteration: 366335
loss: 1.0563864707946777,grad_norm: 0.9999993348312272, iteration: 366336
loss: 0.9689064621925354,grad_norm: 0.7467957818747816, iteration: 366337
loss: 0.9651556611061096,grad_norm: 0.9384900693992119, iteration: 366338
loss: 1.076941728591919,grad_norm: 0.8946400443659053, iteration: 366339
loss: 1.0484622716903687,grad_norm: 0.7043691646620968, iteration: 366340
loss: 0.9688848853111267,grad_norm: 0.6384758042580846, iteration: 366341
loss: 0.986382246017456,grad_norm: 0.7742111914496973, iteration: 366342
loss: 1.0198825597763062,grad_norm: 0.7534211530511351, iteration: 366343
loss: 1.0112398862838745,grad_norm: 0.8939062019263609, iteration: 366344
loss: 1.0222539901733398,grad_norm: 0.7841754347671068, iteration: 366345
loss: 1.0101224184036255,grad_norm: 0.6807251236696281, iteration: 366346
loss: 1.004638910293579,grad_norm: 0.705351973524073, iteration: 366347
loss: 1.0359034538269043,grad_norm: 0.8572735988197321, iteration: 366348
loss: 0.9837003946304321,grad_norm: 0.7689697896001431, iteration: 366349
loss: 0.9795151352882385,grad_norm: 0.8337542856780072, iteration: 366350
loss: 1.0498963594436646,grad_norm: 0.9999990524533118, iteration: 366351
loss: 1.0074704885482788,grad_norm: 0.7370210290332742, iteration: 366352
loss: 1.0485267639160156,grad_norm: 0.9999992383334774, iteration: 366353
loss: 1.0005241632461548,grad_norm: 0.8590084107886421, iteration: 366354
loss: 1.0131723880767822,grad_norm: 0.826579876954202, iteration: 366355
loss: 1.019302248954773,grad_norm: 0.7371040837282995, iteration: 366356
loss: 0.967296302318573,grad_norm: 0.7689006490887083, iteration: 366357
loss: 0.96543949842453,grad_norm: 0.7398390437199543, iteration: 366358
loss: 1.098941445350647,grad_norm: 0.9753953177823728, iteration: 366359
loss: 0.965355396270752,grad_norm: 0.7760578801107593, iteration: 366360
loss: 1.0527005195617676,grad_norm: 0.9999995082355083, iteration: 366361
loss: 1.0923957824707031,grad_norm: 0.9999992326097491, iteration: 366362
loss: 1.1181119680404663,grad_norm: 0.9999993454010343, iteration: 366363
loss: 1.0201630592346191,grad_norm: 0.743874479024711, iteration: 366364
loss: 0.989769697189331,grad_norm: 0.6911417766595681, iteration: 366365
loss: 1.0623887777328491,grad_norm: 0.7871600562749334, iteration: 366366
loss: 0.9978733658790588,grad_norm: 0.9761180272610654, iteration: 366367
loss: 0.9971112012863159,grad_norm: 0.7242723808000482, iteration: 366368
loss: 0.9900020360946655,grad_norm: 0.8541092003835424, iteration: 366369
loss: 0.9909836649894714,grad_norm: 0.9909977220157044, iteration: 366370
loss: 1.0218384265899658,grad_norm: 0.7108999232022746, iteration: 366371
loss: 1.0059559345245361,grad_norm: 0.7981688870032935, iteration: 366372
loss: 1.021061658859253,grad_norm: 0.8439298059381686, iteration: 366373
loss: 1.036112904548645,grad_norm: 0.910188575819126, iteration: 366374
loss: 0.9788733124732971,grad_norm: 0.8898649757268766, iteration: 366375
loss: 1.0439653396606445,grad_norm: 0.9999995032075617, iteration: 366376
loss: 1.0053282976150513,grad_norm: 0.7294117413516601, iteration: 366377
loss: 1.0430039167404175,grad_norm: 0.9693718008704489, iteration: 366378
loss: 1.0911152362823486,grad_norm: 0.9999991750457863, iteration: 366379
loss: 1.0254849195480347,grad_norm: 0.8243955663133672, iteration: 366380
loss: 1.1141409873962402,grad_norm: 0.9999991045497763, iteration: 366381
loss: 1.036080241203308,grad_norm: 0.7839118257053058, iteration: 366382
loss: 1.0868721008300781,grad_norm: 0.9127722058743387, iteration: 366383
loss: 1.0407527685165405,grad_norm: 0.9115936703192149, iteration: 366384
loss: 1.0131765604019165,grad_norm: 0.8320605758603462, iteration: 366385
loss: 1.015273928642273,grad_norm: 0.9059220520528236, iteration: 366386
loss: 0.9945621490478516,grad_norm: 0.8820959386737721, iteration: 366387
loss: 1.0555435419082642,grad_norm: 0.889525027374548, iteration: 366388
loss: 0.9991008043289185,grad_norm: 0.8720400657378355, iteration: 366389
loss: 0.9651812314987183,grad_norm: 0.9444487530658536, iteration: 366390
loss: 1.0700796842575073,grad_norm: 0.9234043290901783, iteration: 366391
loss: 1.0200499296188354,grad_norm: 0.8018122625941185, iteration: 366392
loss: 0.9700914621353149,grad_norm: 0.9999993536991681, iteration: 366393
loss: 0.9995390772819519,grad_norm: 0.7837008729327967, iteration: 366394
loss: 0.9781299829483032,grad_norm: 0.7965931099796356, iteration: 366395
loss: 1.0219730138778687,grad_norm: 0.7966865983680015, iteration: 366396
loss: 1.00874924659729,grad_norm: 0.6633327511971115, iteration: 366397
loss: 1.0244743824005127,grad_norm: 0.8583234943341517, iteration: 366398
loss: 1.0174953937530518,grad_norm: 0.7617399628978202, iteration: 366399
loss: 1.0309773683547974,grad_norm: 0.7715426369218167, iteration: 366400
loss: 0.9772461652755737,grad_norm: 0.7639109869242935, iteration: 366401
loss: 0.9781341552734375,grad_norm: 0.8741307803224494, iteration: 366402
loss: 0.9682393074035645,grad_norm: 0.8985327933598828, iteration: 366403
loss: 0.9826506972312927,grad_norm: 0.8077626128017017, iteration: 366404
loss: 1.0930155515670776,grad_norm: 0.9999993239750694, iteration: 366405
loss: 1.028781533241272,grad_norm: 0.74821249861434, iteration: 366406
loss: 1.009494423866272,grad_norm: 0.8269315468792898, iteration: 366407
loss: 1.082740306854248,grad_norm: 0.9085963917429795, iteration: 366408
loss: 1.12748122215271,grad_norm: 0.8001196571035561, iteration: 366409
loss: 0.9944807291030884,grad_norm: 0.9512688284777733, iteration: 366410
loss: 1.0174546241760254,grad_norm: 0.8513607074371924, iteration: 366411
loss: 1.0334452390670776,grad_norm: 0.8342077738879787, iteration: 366412
loss: 0.9803839325904846,grad_norm: 0.8663763785136337, iteration: 366413
loss: 0.9778398871421814,grad_norm: 0.8465808322318497, iteration: 366414
loss: 1.0340694189071655,grad_norm: 0.8179985022754909, iteration: 366415
loss: 0.9857645630836487,grad_norm: 0.9019170899229215, iteration: 366416
loss: 0.977435827255249,grad_norm: 0.8563958813551681, iteration: 366417
loss: 1.0379846096038818,grad_norm: 0.9999994195321932, iteration: 366418
loss: 1.0233063697814941,grad_norm: 0.8909351343449036, iteration: 366419
loss: 0.988154411315918,grad_norm: 0.8071071062359182, iteration: 366420
loss: 1.0016933679580688,grad_norm: 0.7793183049733169, iteration: 366421
loss: 1.0014716386795044,grad_norm: 0.9148383434448245, iteration: 366422
loss: 1.0083552598953247,grad_norm: 0.8083214089189019, iteration: 366423
loss: 1.0191107988357544,grad_norm: 0.9999993684009432, iteration: 366424
loss: 1.017364740371704,grad_norm: 0.8636745576216587, iteration: 366425
loss: 1.0123146772384644,grad_norm: 0.8769613836757912, iteration: 366426
loss: 1.0010623931884766,grad_norm: 0.8000294562886007, iteration: 366427
loss: 0.959099292755127,grad_norm: 0.7692880149298473, iteration: 366428
loss: 1.0426057577133179,grad_norm: 0.9999993692686086, iteration: 366429
loss: 1.0042164325714111,grad_norm: 0.7815125734242739, iteration: 366430
loss: 0.9976723790168762,grad_norm: 0.7348607015992592, iteration: 366431
loss: 1.0275572538375854,grad_norm: 0.8579316264540584, iteration: 366432
loss: 1.0013145208358765,grad_norm: 0.9166492237560574, iteration: 366433
loss: 1.0378731489181519,grad_norm: 0.9999999678352115, iteration: 366434
loss: 0.9973568916320801,grad_norm: 0.9999992772190693, iteration: 366435
loss: 0.9966778755187988,grad_norm: 0.9441868181176498, iteration: 366436
loss: 0.9999003410339355,grad_norm: 0.8783539068922143, iteration: 366437
loss: 1.05259108543396,grad_norm: 0.9999992403196751, iteration: 366438
loss: 0.9684267044067383,grad_norm: 0.9999996898769546, iteration: 366439
loss: 0.9824637174606323,grad_norm: 0.7609226232474491, iteration: 366440
loss: 1.0554426908493042,grad_norm: 0.9999993426408351, iteration: 366441
loss: 0.94595867395401,grad_norm: 0.9999991052929077, iteration: 366442
loss: 1.0034079551696777,grad_norm: 0.9999997634637794, iteration: 366443
loss: 1.0322407484054565,grad_norm: 0.861819069374322, iteration: 366444
loss: 0.9905101656913757,grad_norm: 0.8571852668238057, iteration: 366445
loss: 1.0029209852218628,grad_norm: 0.7368597217762135, iteration: 366446
loss: 1.0653126239776611,grad_norm: 0.9187788158490557, iteration: 366447
loss: 1.0094748735427856,grad_norm: 0.7689112719355852, iteration: 366448
loss: 1.0210862159729004,grad_norm: 0.8151141776402436, iteration: 366449
loss: 0.9634488224983215,grad_norm: 0.7819112652442457, iteration: 366450
loss: 0.989226758480072,grad_norm: 0.7090189171991492, iteration: 366451
loss: 1.0256428718566895,grad_norm: 0.9999992224090087, iteration: 366452
loss: 1.0487899780273438,grad_norm: 0.896021293141107, iteration: 366453
loss: 0.9715402126312256,grad_norm: 0.9014313952568163, iteration: 366454
loss: 1.0101038217544556,grad_norm: 0.8369364425275846, iteration: 366455
loss: 1.0296698808670044,grad_norm: 0.8118933810163231, iteration: 366456
loss: 1.09633207321167,grad_norm: 0.9999990688326784, iteration: 366457
loss: 1.0011606216430664,grad_norm: 0.8709784926530613, iteration: 366458
loss: 1.010703682899475,grad_norm: 0.8067284533370365, iteration: 366459
loss: 1.0140916109085083,grad_norm: 0.8528936070040548, iteration: 366460
loss: 1.0250401496887207,grad_norm: 0.9999993780214905, iteration: 366461
loss: 1.0527983903884888,grad_norm: 0.9999997710180104, iteration: 366462
loss: 1.0387089252471924,grad_norm: 0.9865115090847475, iteration: 366463
loss: 1.024705410003662,grad_norm: 0.7188408043291106, iteration: 366464
loss: 1.0306434631347656,grad_norm: 0.9999991931544465, iteration: 366465
loss: 1.0307095050811768,grad_norm: 0.7977037994113525, iteration: 366466
loss: 1.0544264316558838,grad_norm: 0.9999998519040962, iteration: 366467
loss: 1.006273865699768,grad_norm: 0.720426401320075, iteration: 366468
loss: 1.0667883157730103,grad_norm: 0.9999999206753429, iteration: 366469
loss: 0.9733033180236816,grad_norm: 0.9469879680380573, iteration: 366470
loss: 1.0541504621505737,grad_norm: 0.8356418851229112, iteration: 366471
loss: 0.9978220462799072,grad_norm: 0.7802528466890437, iteration: 366472
loss: 1.036422848701477,grad_norm: 0.9999994139337982, iteration: 366473
loss: 0.9953752160072327,grad_norm: 0.8932791507582697, iteration: 366474
loss: 0.9901397228240967,grad_norm: 0.7970593170349434, iteration: 366475
loss: 0.9786825776100159,grad_norm: 0.9999990929479058, iteration: 366476
loss: 0.9916027188301086,grad_norm: 0.9999991421369887, iteration: 366477
loss: 1.0360629558563232,grad_norm: 0.6523159013587146, iteration: 366478
loss: 0.9694591164588928,grad_norm: 0.9999990572535913, iteration: 366479
loss: 1.0561727285385132,grad_norm: 0.9999992727545695, iteration: 366480
loss: 1.020789384841919,grad_norm: 0.9617589736737968, iteration: 366481
loss: 0.9953073859214783,grad_norm: 0.7791574261738138, iteration: 366482
loss: 1.0217864513397217,grad_norm: 0.8452182764906165, iteration: 366483
loss: 1.0419782400131226,grad_norm: 0.9999993487218398, iteration: 366484
loss: 1.0843636989593506,grad_norm: 0.7450520423565942, iteration: 366485
loss: 0.9973024129867554,grad_norm: 0.8152810392233043, iteration: 366486
loss: 0.970038890838623,grad_norm: 0.846453994913852, iteration: 366487
loss: 1.1048096418380737,grad_norm: 0.8920785574594077, iteration: 366488
loss: 1.0208454132080078,grad_norm: 0.7387942305323728, iteration: 366489
loss: 0.9940946698188782,grad_norm: 0.7805071104786, iteration: 366490
loss: 1.0030171871185303,grad_norm: 0.685487903822, iteration: 366491
loss: 1.0203725099563599,grad_norm: 0.9948167813630157, iteration: 366492
loss: 0.9861282706260681,grad_norm: 0.7275931966940778, iteration: 366493
loss: 0.9990758895874023,grad_norm: 0.7474220558630397, iteration: 366494
loss: 1.0112202167510986,grad_norm: 0.778702512542444, iteration: 366495
loss: 1.0145491361618042,grad_norm: 0.7922187361566543, iteration: 366496
loss: 1.0044224262237549,grad_norm: 0.8238415806543676, iteration: 366497
loss: 0.9730517864227295,grad_norm: 0.8521895343639382, iteration: 366498
loss: 1.0441304445266724,grad_norm: 0.954938913796831, iteration: 366499
loss: 0.9851528406143188,grad_norm: 0.7415313810451675, iteration: 366500
loss: 1.0291881561279297,grad_norm: 0.9999990880952508, iteration: 366501
loss: 1.0853972434997559,grad_norm: 0.999999664043976, iteration: 366502
loss: 0.9796825051307678,grad_norm: 0.7715325600080755, iteration: 366503
loss: 1.015156626701355,grad_norm: 0.7701509862421819, iteration: 366504
loss: 0.997474193572998,grad_norm: 0.7189482511541414, iteration: 366505
loss: 1.0399986505508423,grad_norm: 0.7081478477111142, iteration: 366506
loss: 0.9642935991287231,grad_norm: 0.7305244312514939, iteration: 366507
loss: 0.995639443397522,grad_norm: 0.8055629079379295, iteration: 366508
loss: 1.0358378887176514,grad_norm: 0.6965184899493433, iteration: 366509
loss: 1.0165210962295532,grad_norm: 0.750973662910247, iteration: 366510
loss: 1.0023221969604492,grad_norm: 0.7679769471273187, iteration: 366511
loss: 1.0064315795898438,grad_norm: 0.8736660026071846, iteration: 366512
loss: 0.9586099982261658,grad_norm: 0.857236948184736, iteration: 366513
loss: 1.0081212520599365,grad_norm: 0.7651947298027927, iteration: 366514
loss: 0.9853562712669373,grad_norm: 0.7392581900332644, iteration: 366515
loss: 1.024728536605835,grad_norm: 0.9267635692495482, iteration: 366516
loss: 0.9479922652244568,grad_norm: 0.7369835515283426, iteration: 366517
loss: 1.0713648796081543,grad_norm: 0.9999995215611407, iteration: 366518
loss: 1.0305203199386597,grad_norm: 0.9361891469413847, iteration: 366519
loss: 0.9904791116714478,grad_norm: 0.6887328495304182, iteration: 366520
loss: 1.034021258354187,grad_norm: 0.8386917504435828, iteration: 366521
loss: 1.0410388708114624,grad_norm: 0.9999991947535173, iteration: 366522
loss: 0.9970411658287048,grad_norm: 0.7250307323792511, iteration: 366523
loss: 1.0228806734085083,grad_norm: 0.8813536861224592, iteration: 366524
loss: 0.983750581741333,grad_norm: 0.8046533514120182, iteration: 366525
loss: 0.9463968873023987,grad_norm: 0.7543076184136114, iteration: 366526
loss: 0.9653066992759705,grad_norm: 0.8986125365947414, iteration: 366527
loss: 1.0340436697006226,grad_norm: 0.9786019980427036, iteration: 366528
loss: 0.9830482602119446,grad_norm: 0.7433232729042538, iteration: 366529
loss: 1.002585768699646,grad_norm: 0.80972853850286, iteration: 366530
loss: 1.0476710796356201,grad_norm: 0.8243879519527789, iteration: 366531
loss: 1.0095608234405518,grad_norm: 0.9999990699902366, iteration: 366532
loss: 0.9765191078186035,grad_norm: 0.7911970288836937, iteration: 366533
loss: 0.9985126256942749,grad_norm: 0.9999989186882715, iteration: 366534
loss: 1.0549819469451904,grad_norm: 0.9193181829611512, iteration: 366535
loss: 1.005095362663269,grad_norm: 0.9198381272490674, iteration: 366536
loss: 1.035002589225769,grad_norm: 0.8972703984835724, iteration: 366537
loss: 0.9747514128684998,grad_norm: 0.7315521713224936, iteration: 366538
loss: 0.9943147897720337,grad_norm: 0.9999990886744615, iteration: 366539
loss: 0.9762550592422485,grad_norm: 0.810942033809323, iteration: 366540
loss: 1.0176104307174683,grad_norm: 0.8816727683818617, iteration: 366541
loss: 1.03317129611969,grad_norm: 0.9999994038070443, iteration: 366542
loss: 0.9851606488227844,grad_norm: 0.918046946290228, iteration: 366543
loss: 1.004830002784729,grad_norm: 0.9999993030957235, iteration: 366544
loss: 1.1036279201507568,grad_norm: 0.925772330123664, iteration: 366545
loss: 1.0194988250732422,grad_norm: 0.6706026617927476, iteration: 366546
loss: 0.9912670850753784,grad_norm: 0.7764033587593453, iteration: 366547
loss: 1.0503294467926025,grad_norm: 0.9999990682335271, iteration: 366548
loss: 1.077810287475586,grad_norm: 0.8129323287894852, iteration: 366549
loss: 0.9748725295066833,grad_norm: 0.7735374737218343, iteration: 366550
loss: 1.0215989351272583,grad_norm: 0.8655947376750451, iteration: 366551
loss: 1.0418568849563599,grad_norm: 0.817372837595618, iteration: 366552
loss: 1.0021945238113403,grad_norm: 0.9926003818256057, iteration: 366553
loss: 1.0129388570785522,grad_norm: 0.9892600739334261, iteration: 366554
loss: 1.0234546661376953,grad_norm: 0.6864194873267344, iteration: 366555
loss: 1.0113236904144287,grad_norm: 0.9999991777313145, iteration: 366556
loss: 0.9721342325210571,grad_norm: 0.8155969371054514, iteration: 366557
loss: 0.9757328629493713,grad_norm: 0.9349285545887144, iteration: 366558
loss: 0.9459376335144043,grad_norm: 0.8034275355063832, iteration: 366559
loss: 0.9973440766334534,grad_norm: 0.9311783830585503, iteration: 366560
loss: 0.9743300080299377,grad_norm: 0.9191623850774302, iteration: 366561
loss: 1.0193499326705933,grad_norm: 0.7135789655398305, iteration: 366562
loss: 0.9831584095954895,grad_norm: 0.8971726762021689, iteration: 366563
loss: 1.0128742456436157,grad_norm: 0.999999496540206, iteration: 366564
loss: 0.9954633116722107,grad_norm: 0.999999232372477, iteration: 366565
loss: 0.9992101192474365,grad_norm: 0.705627931736059, iteration: 366566
loss: 1.1135207414627075,grad_norm: 0.9999999459415567, iteration: 366567
loss: 1.0408821105957031,grad_norm: 0.9220380250951685, iteration: 366568
loss: 1.0119051933288574,grad_norm: 0.7057065510078743, iteration: 366569
loss: 1.027693510055542,grad_norm: 0.806840404984746, iteration: 366570
loss: 0.9575690627098083,grad_norm: 0.9324658419789684, iteration: 366571
loss: 1.0231258869171143,grad_norm: 0.6509402920670789, iteration: 366572
loss: 0.988821804523468,grad_norm: 0.8664236486759523, iteration: 366573
loss: 1.059033751487732,grad_norm: 0.9092976814429043, iteration: 366574
loss: 0.9656679034233093,grad_norm: 0.8657969105868674, iteration: 366575
loss: 0.9965760707855225,grad_norm: 0.8155059353398032, iteration: 366576
loss: 0.9517260789871216,grad_norm: 0.9655667395222091, iteration: 366577
loss: 1.0069010257720947,grad_norm: 0.8373142622456338, iteration: 366578
loss: 1.0467268228530884,grad_norm: 0.8420983341566, iteration: 366579
loss: 1.0445603132247925,grad_norm: 0.7026143409433984, iteration: 366580
loss: 0.9783888459205627,grad_norm: 0.8715857660904845, iteration: 366581
loss: 1.004174828529358,grad_norm: 0.793530662949172, iteration: 366582
loss: 1.0521388053894043,grad_norm: 0.9341555073325932, iteration: 366583
loss: 1.0106983184814453,grad_norm: 0.7477470650831816, iteration: 366584
loss: 0.9667850732803345,grad_norm: 0.7472451765636912, iteration: 366585
loss: 1.0499563217163086,grad_norm: 0.7939865068374184, iteration: 366586
loss: 1.0007877349853516,grad_norm: 0.9999993882042466, iteration: 366587
loss: 1.0147969722747803,grad_norm: 0.9369276989939906, iteration: 366588
loss: 1.0020421743392944,grad_norm: 0.7602008237157503, iteration: 366589
loss: 1.037449598312378,grad_norm: 0.856536207677478, iteration: 366590
loss: 1.0045427083969116,grad_norm: 0.8100114541259169, iteration: 366591
loss: 0.9888967275619507,grad_norm: 0.9279395283897818, iteration: 366592
loss: 0.968251645565033,grad_norm: 0.8772217925378725, iteration: 366593
loss: 0.9797857999801636,grad_norm: 0.9689694856244848, iteration: 366594
loss: 0.9683237671852112,grad_norm: 0.9999992646149137, iteration: 366595
loss: 0.9515712261199951,grad_norm: 0.8663689658781438, iteration: 366596
loss: 1.0067731142044067,grad_norm: 0.7023369718005558, iteration: 366597
loss: 0.9939450621604919,grad_norm: 0.7476829587020871, iteration: 366598
loss: 0.9619672894477844,grad_norm: 0.7455183467035549, iteration: 366599
loss: 1.0385621786117554,grad_norm: 0.8696598086331355, iteration: 366600
loss: 1.0059330463409424,grad_norm: 0.9067661730280867, iteration: 366601
loss: 0.9688896536827087,grad_norm: 0.750873858202979, iteration: 366602
loss: 1.0069853067398071,grad_norm: 0.9999994480450838, iteration: 366603
loss: 0.9724102020263672,grad_norm: 0.9019806532018548, iteration: 366604
loss: 1.0188909769058228,grad_norm: 0.82297555590564, iteration: 366605
loss: 1.0108405351638794,grad_norm: 0.9999992118990378, iteration: 366606
loss: 0.9402884840965271,grad_norm: 0.8412008056087441, iteration: 366607
loss: 1.0584509372711182,grad_norm: 0.7337690687660632, iteration: 366608
loss: 1.0044385194778442,grad_norm: 0.796034342054026, iteration: 366609
loss: 1.0005784034729004,grad_norm: 0.7996312092826884, iteration: 366610
loss: 0.9521650075912476,grad_norm: 0.7993651873930879, iteration: 366611
loss: 1.0535491704940796,grad_norm: 0.7752372617686183, iteration: 366612
loss: 0.9903481006622314,grad_norm: 0.8521377077517743, iteration: 366613
loss: 1.0113329887390137,grad_norm: 0.9237639469520987, iteration: 366614
loss: 1.0455836057662964,grad_norm: 0.7960637000538436, iteration: 366615
loss: 1.0198228359222412,grad_norm: 0.9999993890582825, iteration: 366616
loss: 1.0264629125595093,grad_norm: 0.9999995658693002, iteration: 366617
loss: 0.9734558463096619,grad_norm: 0.7295929448196451, iteration: 366618
loss: 0.9867309331893921,grad_norm: 0.6885075126130614, iteration: 366619
loss: 0.992708146572113,grad_norm: 0.8357040585437548, iteration: 366620
loss: 0.9961805939674377,grad_norm: 0.7135147852252737, iteration: 366621
loss: 1.0091243982315063,grad_norm: 0.7833464922176818, iteration: 366622
loss: 1.019412636756897,grad_norm: 0.7507318861574188, iteration: 366623
loss: 1.0009690523147583,grad_norm: 0.9999991345830243, iteration: 366624
loss: 1.0021134614944458,grad_norm: 0.9999991036625465, iteration: 366625
loss: 0.9871812462806702,grad_norm: 0.7906524611286878, iteration: 366626
loss: 1.0132883787155151,grad_norm: 0.8141557242420512, iteration: 366627
loss: 1.0909240245819092,grad_norm: 0.921092048105907, iteration: 366628
loss: 1.0071542263031006,grad_norm: 0.7573676841859098, iteration: 366629
loss: 1.0268388986587524,grad_norm: 0.9235435169199645, iteration: 366630
loss: 0.9812977313995361,grad_norm: 0.7208680294577237, iteration: 366631
loss: 1.0119785070419312,grad_norm: 0.8650210211719548, iteration: 366632
loss: 1.1407310962677002,grad_norm: 0.999999512113622, iteration: 366633
loss: 1.0225496292114258,grad_norm: 0.7914178821939675, iteration: 366634
loss: 0.9863247871398926,grad_norm: 0.7902767987038504, iteration: 366635
loss: 1.0457335710525513,grad_norm: 0.9360069301400089, iteration: 366636
loss: 1.0096551179885864,grad_norm: 0.7747691765263466, iteration: 366637
loss: 1.0037503242492676,grad_norm: 0.7263020574598931, iteration: 366638
loss: 0.9884387850761414,grad_norm: 0.7744981344282654, iteration: 366639
loss: 1.0223629474639893,grad_norm: 0.7777914393594888, iteration: 366640
loss: 1.0109041929244995,grad_norm: 0.7811606597555509, iteration: 366641
loss: 1.0282999277114868,grad_norm: 0.6551758328827749, iteration: 366642
loss: 0.9539172649383545,grad_norm: 0.9613780929985059, iteration: 366643
loss: 1.033445954322815,grad_norm: 0.8311900591125967, iteration: 366644
loss: 1.0639749765396118,grad_norm: 0.9999989245528785, iteration: 366645
loss: 1.0233757495880127,grad_norm: 0.9999997090572983, iteration: 366646
loss: 1.0072723627090454,grad_norm: 0.7968511708120185, iteration: 366647
loss: 1.017544150352478,grad_norm: 0.8901794659053606, iteration: 366648
loss: 0.9739246368408203,grad_norm: 0.9999992162096574, iteration: 366649
loss: 0.9832056164741516,grad_norm: 0.8654517843835742, iteration: 366650
loss: 0.9766153693199158,grad_norm: 0.742737125131164, iteration: 366651
loss: 1.00960373878479,grad_norm: 0.9884623562574606, iteration: 366652
loss: 1.0592436790466309,grad_norm: 0.9929986397310467, iteration: 366653
loss: 0.986605703830719,grad_norm: 0.9737132353395686, iteration: 366654
loss: 0.9953852891921997,grad_norm: 0.7272589310279528, iteration: 366655
loss: 0.9958778619766235,grad_norm: 0.8180644294445927, iteration: 366656
loss: 0.9841312170028687,grad_norm: 0.815384432383855, iteration: 366657
loss: 1.0148701667785645,grad_norm: 0.859041248288739, iteration: 366658
loss: 0.9891636371612549,grad_norm: 0.9163131248478437, iteration: 366659
loss: 1.072988510131836,grad_norm: 0.8534091080681688, iteration: 366660
loss: 1.047064185142517,grad_norm: 0.8547470742160167, iteration: 366661
loss: 1.0281338691711426,grad_norm: 0.6616987189473683, iteration: 366662
loss: 0.9879509210586548,grad_norm: 0.9999992109754898, iteration: 366663
loss: 1.023708701133728,grad_norm: 0.8189897232056023, iteration: 366664
loss: 1.0731523036956787,grad_norm: 0.9594636018501626, iteration: 366665
loss: 1.0062462091445923,grad_norm: 0.8423998708759054, iteration: 366666
loss: 1.0273709297180176,grad_norm: 0.8397719416382063, iteration: 366667
loss: 0.9789417386054993,grad_norm: 0.7361955563855925, iteration: 366668
loss: 1.0214641094207764,grad_norm: 0.9786572057199554, iteration: 366669
loss: 0.9750769734382629,grad_norm: 0.8244400665901291, iteration: 366670
loss: 1.003852367401123,grad_norm: 0.9368310092629879, iteration: 366671
loss: 0.9902323484420776,grad_norm: 0.7519507292098276, iteration: 366672
loss: 1.0088201761245728,grad_norm: 0.6509672299294622, iteration: 366673
loss: 0.9696333408355713,grad_norm: 0.9801003509417989, iteration: 366674
loss: 1.0601325035095215,grad_norm: 0.9415625694285271, iteration: 366675
loss: 0.9954962730407715,grad_norm: 0.9337953195667316, iteration: 366676
loss: 1.0192049741744995,grad_norm: 0.7678516234474954, iteration: 366677
loss: 1.001542568206787,grad_norm: 0.730224709712315, iteration: 366678
loss: 0.9825454950332642,grad_norm: 0.7350882268369604, iteration: 366679
loss: 0.9596653580665588,grad_norm: 0.8225912709991525, iteration: 366680
loss: 1.0237795114517212,grad_norm: 0.9999997800261351, iteration: 366681
loss: 0.9742360711097717,grad_norm: 0.7573788086123289, iteration: 366682
loss: 1.0928523540496826,grad_norm: 0.9999994113236452, iteration: 366683
loss: 0.9895556569099426,grad_norm: 0.744379642124545, iteration: 366684
loss: 1.0191885232925415,grad_norm: 0.8463619802870062, iteration: 366685
loss: 0.9803731441497803,grad_norm: 0.8144932105551742, iteration: 366686
loss: 0.960418701171875,grad_norm: 0.8777532420423931, iteration: 366687
loss: 1.0316786766052246,grad_norm: 0.8933834721961229, iteration: 366688
loss: 1.021749496459961,grad_norm: 0.7803998120565386, iteration: 366689
loss: 1.0622133016586304,grad_norm: 0.9999999018897995, iteration: 366690
loss: 1.0510493516921997,grad_norm: 0.7760636250250633, iteration: 366691
loss: 1.000186562538147,grad_norm: 0.8514792674224904, iteration: 366692
loss: 1.020748496055603,grad_norm: 0.7030169253308329, iteration: 366693
loss: 0.9679467678070068,grad_norm: 0.8137506679260128, iteration: 366694
loss: 1.0070382356643677,grad_norm: 0.9999992913212571, iteration: 366695
loss: 1.039369821548462,grad_norm: 0.9031810386077997, iteration: 366696
loss: 1.0267517566680908,grad_norm: 0.999999284812871, iteration: 366697
loss: 0.9934541583061218,grad_norm: 0.7985027799688029, iteration: 366698
loss: 1.0313775539398193,grad_norm: 0.7180741875231546, iteration: 366699
loss: 0.9985331296920776,grad_norm: 0.7633663376140721, iteration: 366700
loss: 1.0157896280288696,grad_norm: 0.7400926130446741, iteration: 366701
loss: 1.0103392601013184,grad_norm: 0.7749856789442231, iteration: 366702
loss: 0.9926661252975464,grad_norm: 0.8501704210187832, iteration: 366703
loss: 1.0272737741470337,grad_norm: 0.999999568522477, iteration: 366704
loss: 1.0267300605773926,grad_norm: 0.9999999319321448, iteration: 366705
loss: 1.0796947479248047,grad_norm: 0.9999991194798239, iteration: 366706
loss: 1.024222731590271,grad_norm: 0.7277107073555918, iteration: 366707
loss: 1.0371036529541016,grad_norm: 0.8834792428343342, iteration: 366708
loss: 1.086159110069275,grad_norm: 0.9999991166847082, iteration: 366709
loss: 0.9958263635635376,grad_norm: 0.8706914072671879, iteration: 366710
loss: 0.9857597351074219,grad_norm: 0.7938071687703863, iteration: 366711
loss: 1.045682668685913,grad_norm: 0.9999999094611914, iteration: 366712
loss: 0.9771511554718018,grad_norm: 0.8957941561126647, iteration: 366713
loss: 0.9972928762435913,grad_norm: 0.6926942433618932, iteration: 366714
loss: 1.026751160621643,grad_norm: 0.7655969021209318, iteration: 366715
loss: 1.0166887044906616,grad_norm: 0.7532074732420733, iteration: 366716
loss: 0.9910272359848022,grad_norm: 0.7467779917146838, iteration: 366717
loss: 0.986820638179779,grad_norm: 0.9999991601598233, iteration: 366718
loss: 1.140607237815857,grad_norm: 0.9999999317952417, iteration: 366719
loss: 0.99433434009552,grad_norm: 0.7823102542280426, iteration: 366720
loss: 1.0307682752609253,grad_norm: 0.8086868291473541, iteration: 366721
loss: 1.0223913192749023,grad_norm: 0.7900527484288136, iteration: 366722
loss: 0.990744411945343,grad_norm: 0.9999099172053816, iteration: 366723
loss: 1.0141009092330933,grad_norm: 0.7655510316303786, iteration: 366724
loss: 0.9869135022163391,grad_norm: 0.9073501165871272, iteration: 366725
loss: 1.0478004217147827,grad_norm: 0.9969157257343306, iteration: 366726
loss: 1.0157155990600586,grad_norm: 0.9692874217236812, iteration: 366727
loss: 1.009840965270996,grad_norm: 0.667073852849595, iteration: 366728
loss: 1.01958429813385,grad_norm: 0.7912398778242404, iteration: 366729
loss: 0.9935546517372131,grad_norm: 0.8672827231028595, iteration: 366730
loss: 1.0134265422821045,grad_norm: 0.9999998250420583, iteration: 366731
loss: 1.133705973625183,grad_norm: 0.9999998844422776, iteration: 366732
loss: 0.9957472681999207,grad_norm: 0.7365289406222337, iteration: 366733
loss: 1.0285513401031494,grad_norm: 0.8699517781138721, iteration: 366734
loss: 0.9749258756637573,grad_norm: 0.9999994026387117, iteration: 366735
loss: 1.0612934827804565,grad_norm: 0.9597822117573915, iteration: 366736
loss: 0.997277021408081,grad_norm: 0.8585329667595644, iteration: 366737
loss: 1.0337663888931274,grad_norm: 0.9255420832380107, iteration: 366738
loss: 1.0011568069458008,grad_norm: 0.9728491789245067, iteration: 366739
loss: 1.0207933187484741,grad_norm: 0.7643471849934179, iteration: 366740
loss: 1.0064069032669067,grad_norm: 0.7199771711112073, iteration: 366741
loss: 0.9554758071899414,grad_norm: 0.9366792607113839, iteration: 366742
loss: 1.0317872762680054,grad_norm: 0.9999994618602124, iteration: 366743
loss: 1.0101146697998047,grad_norm: 0.6719709763210868, iteration: 366744
loss: 1.0116602182388306,grad_norm: 0.9117847515324949, iteration: 366745
loss: 0.9939355850219727,grad_norm: 0.7963161390482435, iteration: 366746
loss: 1.0698438882827759,grad_norm: 0.9999995008034613, iteration: 366747
loss: 0.9922302961349487,grad_norm: 0.9999991199285234, iteration: 366748
loss: 0.991162896156311,grad_norm: 0.9885908752273829, iteration: 366749
loss: 1.1795334815979004,grad_norm: 0.9999995496632748, iteration: 366750
loss: 1.0254647731781006,grad_norm: 0.7128581506455536, iteration: 366751
loss: 0.9866907000541687,grad_norm: 0.8065851181336134, iteration: 366752
loss: 0.9940617084503174,grad_norm: 0.8440682213368932, iteration: 366753
loss: 0.982530415058136,grad_norm: 0.99999964315883, iteration: 366754
loss: 1.0084017515182495,grad_norm: 0.8259894206290633, iteration: 366755
loss: 1.0155906677246094,grad_norm: 0.9965254830734426, iteration: 366756
loss: 1.025918960571289,grad_norm: 0.999197271323551, iteration: 366757
loss: 0.9881439208984375,grad_norm: 0.9089870785551991, iteration: 366758
loss: 0.9973708391189575,grad_norm: 0.7374014948213297, iteration: 366759
loss: 1.0169106721878052,grad_norm: 0.8371938777525235, iteration: 366760
loss: 1.0314772129058838,grad_norm: 0.9999996627742386, iteration: 366761
loss: 1.0053646564483643,grad_norm: 0.7533038835642639, iteration: 366762
loss: 1.014567255973816,grad_norm: 0.7670063357280659, iteration: 366763
loss: 1.1004300117492676,grad_norm: 0.9999990105433708, iteration: 366764
loss: 0.9932693243026733,grad_norm: 0.8787272956857045, iteration: 366765
loss: 1.061401128768921,grad_norm: 0.9510132474319899, iteration: 366766
loss: 0.9772393703460693,grad_norm: 0.8104283921235087, iteration: 366767
loss: 1.0106747150421143,grad_norm: 0.9371237656758178, iteration: 366768
loss: 0.9882149696350098,grad_norm: 0.8079725197684643, iteration: 366769
loss: 0.981499195098877,grad_norm: 0.7368439398345257, iteration: 366770
loss: 1.028765320777893,grad_norm: 0.9999993514075562, iteration: 366771
loss: 1.005629301071167,grad_norm: 0.9575609346385974, iteration: 366772
loss: 1.0174227952957153,grad_norm: 0.8499597614354912, iteration: 366773
loss: 1.0250744819641113,grad_norm: 0.7628581575286252, iteration: 366774
loss: 1.0112814903259277,grad_norm: 0.8424330092797739, iteration: 366775
loss: 1.0370309352874756,grad_norm: 0.6357650459139099, iteration: 366776
loss: 0.9915244579315186,grad_norm: 0.7641266348040351, iteration: 366777
loss: 1.0035443305969238,grad_norm: 0.9222036870167246, iteration: 366778
loss: 1.0678024291992188,grad_norm: 0.9112547898514742, iteration: 366779
loss: 1.0039585828781128,grad_norm: 0.6893676025155903, iteration: 366780
loss: 0.9893631339073181,grad_norm: 0.6722014737809044, iteration: 366781
loss: 1.0337337255477905,grad_norm: 0.7776201359644266, iteration: 366782
loss: 0.9990902543067932,grad_norm: 0.8396076815781945, iteration: 366783
loss: 1.026360273361206,grad_norm: 0.9853319080339761, iteration: 366784
loss: 1.0140771865844727,grad_norm: 0.7701562890477851, iteration: 366785
loss: 1.0383169651031494,grad_norm: 0.9999996739999054, iteration: 366786
loss: 1.0438281297683716,grad_norm: 0.8323177784311053, iteration: 366787
loss: 0.9524860382080078,grad_norm: 0.8155434553623111, iteration: 366788
loss: 0.9906067848205566,grad_norm: 0.9981271652137952, iteration: 366789
loss: 0.9692516326904297,grad_norm: 0.99999914045122, iteration: 366790
loss: 1.0342674255371094,grad_norm: 0.9320668930691853, iteration: 366791
loss: 1.0357160568237305,grad_norm: 0.9999999191103085, iteration: 366792
loss: 1.0027369260787964,grad_norm: 0.8117285183849795, iteration: 366793
loss: 1.0160616636276245,grad_norm: 0.8294115971402009, iteration: 366794
loss: 0.9729400873184204,grad_norm: 0.9999990683704578, iteration: 366795
loss: 0.9992876648902893,grad_norm: 0.8142155620823076, iteration: 366796
loss: 0.9904366135597229,grad_norm: 0.811868650059402, iteration: 366797
loss: 0.9969537854194641,grad_norm: 0.7487325702571024, iteration: 366798
loss: 0.9788164496421814,grad_norm: 0.7697414547394433, iteration: 366799
loss: 0.978887140750885,grad_norm: 0.791625359788254, iteration: 366800
loss: 0.9984796643257141,grad_norm: 0.7770975812922005, iteration: 366801
loss: 1.0349102020263672,grad_norm: 0.6939738355443533, iteration: 366802
loss: 1.0002937316894531,grad_norm: 0.8275992893054259, iteration: 366803
loss: 1.0619616508483887,grad_norm: 0.9999991358906825, iteration: 366804
loss: 1.0179378986358643,grad_norm: 0.9999993161956207, iteration: 366805
loss: 1.0481473207473755,grad_norm: 0.9999995141749389, iteration: 366806
loss: 0.9480835199356079,grad_norm: 0.8913559574941526, iteration: 366807
loss: 0.9826453328132629,grad_norm: 0.9999990656248866, iteration: 366808
loss: 1.0117977857589722,grad_norm: 0.9999997839163877, iteration: 366809
loss: 1.0004616975784302,grad_norm: 0.8137915416519816, iteration: 366810
loss: 1.0228198766708374,grad_norm: 0.889995166395127, iteration: 366811
loss: 1.0061066150665283,grad_norm: 0.9999994281409118, iteration: 366812
loss: 0.9927554130554199,grad_norm: 0.8010076019155769, iteration: 366813
loss: 0.9861164093017578,grad_norm: 0.9999991859715651, iteration: 366814
loss: 0.9880138039588928,grad_norm: 0.7953383208685915, iteration: 366815
loss: 0.9983493685722351,grad_norm: 0.878193620526568, iteration: 366816
loss: 0.9888480305671692,grad_norm: 0.8007818012867342, iteration: 366817
loss: 1.0104796886444092,grad_norm: 0.7730420462504122, iteration: 366818
loss: 1.0203125476837158,grad_norm: 0.8110084725495129, iteration: 366819
loss: 0.9879496097564697,grad_norm: 0.7624495747578158, iteration: 366820
loss: 0.999942421913147,grad_norm: 0.7069208339976946, iteration: 366821
loss: 0.9790365695953369,grad_norm: 0.8893092447899701, iteration: 366822
loss: 0.9851053357124329,grad_norm: 0.9516817498269691, iteration: 366823
loss: 0.9827291369438171,grad_norm: 0.7936883678409569, iteration: 366824
loss: 1.065269112586975,grad_norm: 0.6666955184532785, iteration: 366825
loss: 1.0028305053710938,grad_norm: 0.7298565138886803, iteration: 366826
loss: 0.954420804977417,grad_norm: 0.9615715045124281, iteration: 366827
loss: 1.0093880891799927,grad_norm: 0.7578412212612549, iteration: 366828
loss: 1.0132009983062744,grad_norm: 0.8391312486149638, iteration: 366829
loss: 1.0337868928909302,grad_norm: 0.7371500659196419, iteration: 366830
loss: 0.9953533411026001,grad_norm: 0.8883878525762168, iteration: 366831
loss: 0.9967062473297119,grad_norm: 0.9702772474882819, iteration: 366832
loss: 1.0001239776611328,grad_norm: 0.9999992769961774, iteration: 366833
loss: 1.0126893520355225,grad_norm: 0.9077936185228482, iteration: 366834
loss: 0.9821569323539734,grad_norm: 0.7709572148857907, iteration: 366835
loss: 1.0172690153121948,grad_norm: 0.7792145402032205, iteration: 366836
loss: 0.9982127547264099,grad_norm: 0.9939462749528841, iteration: 366837
loss: 0.9670149683952332,grad_norm: 0.7025868199289925, iteration: 366838
loss: 1.0227702856063843,grad_norm: 0.9045111569048486, iteration: 366839
loss: 1.000044822692871,grad_norm: 0.8304478998015364, iteration: 366840
loss: 1.0252788066864014,grad_norm: 0.9999992065358624, iteration: 366841
loss: 1.0291072130203247,grad_norm: 0.8525712629018849, iteration: 366842
loss: 1.0089446306228638,grad_norm: 0.8210727821698452, iteration: 366843
loss: 0.9821668863296509,grad_norm: 0.8096052275603954, iteration: 366844
loss: 1.0474833250045776,grad_norm: 0.8308715615484047, iteration: 366845
loss: 1.0112148523330688,grad_norm: 0.779198468547853, iteration: 366846
loss: 0.9775587320327759,grad_norm: 0.7409811952414431, iteration: 366847
loss: 0.9690557718276978,grad_norm: 0.7082372837001772, iteration: 366848
loss: 0.9757382273674011,grad_norm: 0.7538504780095421, iteration: 366849
loss: 0.9730939865112305,grad_norm: 0.8009537765522449, iteration: 366850
loss: 0.9713689684867859,grad_norm: 0.7991794290060225, iteration: 366851
loss: 0.9633739590644836,grad_norm: 0.8326086098317838, iteration: 366852
loss: 1.0285961627960205,grad_norm: 0.8055227412561287, iteration: 366853
loss: 0.9544886350631714,grad_norm: 0.8067109976985698, iteration: 366854
loss: 0.9634998440742493,grad_norm: 0.9157913893953592, iteration: 366855
loss: 0.9971325993537903,grad_norm: 0.872575153670763, iteration: 366856
loss: 0.9809665679931641,grad_norm: 0.8953994668053957, iteration: 366857
loss: 1.0140564441680908,grad_norm: 0.9651395977793271, iteration: 366858
loss: 0.9922675490379333,grad_norm: 0.7234984013866018, iteration: 366859
loss: 0.9729459881782532,grad_norm: 0.9463515770141037, iteration: 366860
loss: 1.003954529762268,grad_norm: 0.7678160834058144, iteration: 366861
loss: 1.0058151483535767,grad_norm: 0.9905855955211526, iteration: 366862
loss: 1.005346417427063,grad_norm: 0.6981284028176445, iteration: 366863
loss: 0.9546821117401123,grad_norm: 0.71642657475793, iteration: 366864
loss: 0.9845536351203918,grad_norm: 0.7297731185003892, iteration: 366865
loss: 1.0228396654129028,grad_norm: 0.8600401498750473, iteration: 366866
loss: 1.0081347227096558,grad_norm: 0.8736629493496789, iteration: 366867
loss: 0.9992801547050476,grad_norm: 0.8884712957802113, iteration: 366868
loss: 0.9634646773338318,grad_norm: 0.7599323704775671, iteration: 366869
loss: 0.9652515053749084,grad_norm: 0.8035828406779492, iteration: 366870
loss: 0.9966222047805786,grad_norm: 0.7400194431463006, iteration: 366871
loss: 0.996738076210022,grad_norm: 0.7357135319913021, iteration: 366872
loss: 1.0662389993667603,grad_norm: 0.9999995695596081, iteration: 366873
loss: 1.041078805923462,grad_norm: 0.6672200477170085, iteration: 366874
loss: 0.9647433161735535,grad_norm: 0.982137125015331, iteration: 366875
loss: 0.9985083341598511,grad_norm: 0.8679103919477746, iteration: 366876
loss: 1.0598289966583252,grad_norm: 0.9304755566033789, iteration: 366877
loss: 0.986558735370636,grad_norm: 0.7606707139235658, iteration: 366878
loss: 0.9903184771537781,grad_norm: 0.8332372154734506, iteration: 366879
loss: 1.0010554790496826,grad_norm: 0.7362317686404257, iteration: 366880
loss: 1.0087411403656006,grad_norm: 0.8240496759409255, iteration: 366881
loss: 0.9888240098953247,grad_norm: 0.9363201047571864, iteration: 366882
loss: 1.0216763019561768,grad_norm: 0.782468994943192, iteration: 366883
loss: 1.0595580339431763,grad_norm: 0.704620029844694, iteration: 366884
loss: 0.9608291387557983,grad_norm: 0.6539099005364353, iteration: 366885
loss: 1.0457168817520142,grad_norm: 0.7736911394775914, iteration: 366886
loss: 1.0403164625167847,grad_norm: 0.9999997019288676, iteration: 366887
loss: 0.9876260757446289,grad_norm: 0.9107605564779587, iteration: 366888
loss: 0.9628087878227234,grad_norm: 0.9223816092213036, iteration: 366889
loss: 0.972802996635437,grad_norm: 0.844997219474338, iteration: 366890
loss: 1.0539783239364624,grad_norm: 0.708800959840406, iteration: 366891
loss: 1.0055437088012695,grad_norm: 0.9999992151084103, iteration: 366892
loss: 1.006363034248352,grad_norm: 0.7829578385484104, iteration: 366893
loss: 1.0407088994979858,grad_norm: 0.9160862477998971, iteration: 366894
loss: 0.9732860922813416,grad_norm: 0.7087393720272684, iteration: 366895
loss: 1.0182949304580688,grad_norm: 0.9726245853991318, iteration: 366896
loss: 1.0221753120422363,grad_norm: 0.9999994496820862, iteration: 366897
loss: 1.0116257667541504,grad_norm: 0.7522913780894909, iteration: 366898
loss: 0.9879052042961121,grad_norm: 0.7286294198113693, iteration: 366899
loss: 1.0226988792419434,grad_norm: 0.8465180628183211, iteration: 366900
loss: 1.0028108358383179,grad_norm: 0.9552896630408146, iteration: 366901
loss: 0.9723795056343079,grad_norm: 0.8872865435818305, iteration: 366902
loss: 0.9903964996337891,grad_norm: 0.9902814315127509, iteration: 366903
loss: 1.0571603775024414,grad_norm: 0.8829760122556932, iteration: 366904
loss: 1.022106409072876,grad_norm: 0.6529261184177421, iteration: 366905
loss: 1.000898838043213,grad_norm: 0.9999990760742955, iteration: 366906
loss: 0.9956286549568176,grad_norm: 0.7949195514227098, iteration: 366907
loss: 0.958163321018219,grad_norm: 0.7028403752624398, iteration: 366908
loss: 0.9885904788970947,grad_norm: 0.7380569232987425, iteration: 366909
loss: 0.9942293763160706,grad_norm: 0.9704365608628219, iteration: 366910
loss: 0.9983288645744324,grad_norm: 0.7511348728378094, iteration: 366911
loss: 1.0042445659637451,grad_norm: 0.9829347657381229, iteration: 366912
loss: 0.9994418621063232,grad_norm: 0.7979001909116692, iteration: 366913
loss: 0.9815823435783386,grad_norm: 0.7454781854615959, iteration: 366914
loss: 1.0862929821014404,grad_norm: 0.9104906021780562, iteration: 366915
loss: 0.9826484322547913,grad_norm: 0.8884172503565749, iteration: 366916
loss: 0.9677353501319885,grad_norm: 0.7638993882850473, iteration: 366917
loss: 1.013859510421753,grad_norm: 0.7105827507402311, iteration: 366918
loss: 1.0129023790359497,grad_norm: 0.6995754347095109, iteration: 366919
loss: 1.0166046619415283,grad_norm: 0.8456713172394212, iteration: 366920
loss: 1.020636796951294,grad_norm: 0.8538749693184808, iteration: 366921
loss: 0.9811131954193115,grad_norm: 0.8018050105206823, iteration: 366922
loss: 1.0192610025405884,grad_norm: 0.7885797365749322, iteration: 366923
loss: 1.017714500427246,grad_norm: 0.9999990654806467, iteration: 366924
loss: 0.9926905632019043,grad_norm: 0.822954644663053, iteration: 366925
loss: 0.993618369102478,grad_norm: 0.9233614488416915, iteration: 366926
loss: 0.9839436411857605,grad_norm: 0.6996180245759519, iteration: 366927
loss: 1.0118820667266846,grad_norm: 0.8269859342226306, iteration: 366928
loss: 1.097255825996399,grad_norm: 0.9999990370299215, iteration: 366929
loss: 1.032882809638977,grad_norm: 0.7547524484929541, iteration: 366930
loss: 0.990026593208313,grad_norm: 0.807702592096601, iteration: 366931
loss: 0.9886417984962463,grad_norm: 0.8473057370348318, iteration: 366932
loss: 1.0784279108047485,grad_norm: 0.9999997256337421, iteration: 366933
loss: 0.9908499121665955,grad_norm: 0.8628576995476068, iteration: 366934
loss: 1.0251232385635376,grad_norm: 0.8088122561244917, iteration: 366935
loss: 1.00163733959198,grad_norm: 0.8332634077205798, iteration: 366936
loss: 0.9733279943466187,grad_norm: 0.7606132009330902, iteration: 366937
loss: 1.0029914379119873,grad_norm: 0.7795116083692422, iteration: 366938
loss: 0.967437744140625,grad_norm: 0.6492120278164919, iteration: 366939
loss: 1.057224988937378,grad_norm: 0.7709259823010584, iteration: 366940
loss: 1.0146677494049072,grad_norm: 0.7526676150092783, iteration: 366941
loss: 0.984185516834259,grad_norm: 0.7844735889335721, iteration: 366942
loss: 1.020923376083374,grad_norm: 0.8968888508609467, iteration: 366943
loss: 1.001951813697815,grad_norm: 0.7657884357139049, iteration: 366944
loss: 0.9613613486289978,grad_norm: 0.7151240055080763, iteration: 366945
loss: 0.9789532423019409,grad_norm: 0.9963371759211913, iteration: 366946
loss: 0.9865993857383728,grad_norm: 0.7618316525289988, iteration: 366947
loss: 0.999120831489563,grad_norm: 0.7125453411405123, iteration: 366948
loss: 0.9951961040496826,grad_norm: 0.844092941523745, iteration: 366949
loss: 0.98469477891922,grad_norm: 0.8190165647209199, iteration: 366950
loss: 0.9924271702766418,grad_norm: 0.8742759512793743, iteration: 366951
loss: 1.127557396888733,grad_norm: 0.9999992691062383, iteration: 366952
loss: 1.0054364204406738,grad_norm: 0.7175135232615492, iteration: 366953
loss: 0.9912038445472717,grad_norm: 0.8157901198501532, iteration: 366954
loss: 1.002865195274353,grad_norm: 0.8870817047258236, iteration: 366955
loss: 1.0336257219314575,grad_norm: 0.8117087041872614, iteration: 366956
loss: 1.0041558742523193,grad_norm: 0.6961993550514588, iteration: 366957
loss: 0.9849410057067871,grad_norm: 0.768484013900612, iteration: 366958
loss: 1.004529356956482,grad_norm: 0.7007848280905074, iteration: 366959
loss: 1.0247842073440552,grad_norm: 0.8343503091663557, iteration: 366960
loss: 1.0244228839874268,grad_norm: 0.6948166376489895, iteration: 366961
loss: 0.9624223709106445,grad_norm: 0.9586951151705022, iteration: 366962
loss: 1.1409692764282227,grad_norm: 0.9999992500676463, iteration: 366963
loss: 1.0018635988235474,grad_norm: 0.9102761055498058, iteration: 366964
loss: 0.9876763820648193,grad_norm: 0.6865324386766408, iteration: 366965
loss: 1.0102365016937256,grad_norm: 0.8564147151123602, iteration: 366966
loss: 0.9524298310279846,grad_norm: 0.8577256620153157, iteration: 366967
loss: 0.9939817190170288,grad_norm: 0.9084758641848079, iteration: 366968
loss: 0.987206220626831,grad_norm: 0.8512361277915699, iteration: 366969
loss: 1.038495421409607,grad_norm: 0.8509496720051579, iteration: 366970
loss: 1.0751609802246094,grad_norm: 0.9999989878949905, iteration: 366971
loss: 0.979520857334137,grad_norm: 0.7387654847682911, iteration: 366972
loss: 1.044725775718689,grad_norm: 0.9999991123564729, iteration: 366973
loss: 1.007156252861023,grad_norm: 0.7840303533161512, iteration: 366974
loss: 1.0038796663284302,grad_norm: 0.8705356124543635, iteration: 366975
loss: 1.0070395469665527,grad_norm: 0.9999996533177857, iteration: 366976
loss: 0.9920730590820312,grad_norm: 0.7803523069940586, iteration: 366977
loss: 1.0342236757278442,grad_norm: 0.8956606966620142, iteration: 366978
loss: 1.0026545524597168,grad_norm: 0.7350199937302608, iteration: 366979
loss: 1.0382977724075317,grad_norm: 0.9999998734363714, iteration: 366980
loss: 1.0454845428466797,grad_norm: 0.8295626173087995, iteration: 366981
loss: 0.9765806198120117,grad_norm: 0.6224195224100789, iteration: 366982
loss: 0.9750630855560303,grad_norm: 0.8013449906102279, iteration: 366983
loss: 1.0128566026687622,grad_norm: 0.966004697452606, iteration: 366984
loss: 0.9876889586448669,grad_norm: 0.8550946380091805, iteration: 366985
loss: 1.0067142248153687,grad_norm: 0.7505919091877681, iteration: 366986
loss: 0.9983985424041748,grad_norm: 0.7721070659603816, iteration: 366987
loss: 1.0034563541412354,grad_norm: 0.8235000500054074, iteration: 366988
loss: 1.0104879140853882,grad_norm: 0.7526330043553932, iteration: 366989
loss: 0.9808867573738098,grad_norm: 0.8818574384107023, iteration: 366990
loss: 1.0049176216125488,grad_norm: 0.9185369897475439, iteration: 366991
loss: 1.00757896900177,grad_norm: 0.9999991213975753, iteration: 366992
loss: 0.9807953834533691,grad_norm: 0.9999992260969385, iteration: 366993
loss: 1.0578140020370483,grad_norm: 0.8018634275565734, iteration: 366994
loss: 0.9779578447341919,grad_norm: 0.8175229115744232, iteration: 366995
loss: 0.993380069732666,grad_norm: 0.8488220898670208, iteration: 366996
loss: 1.005142331123352,grad_norm: 0.6374953442864817, iteration: 366997
loss: 1.0257694721221924,grad_norm: 0.7549212095040592, iteration: 366998
loss: 1.0177990198135376,grad_norm: 0.8139196378801966, iteration: 366999
loss: 0.9945299625396729,grad_norm: 0.7641510374228098, iteration: 367000
loss: 0.9808664321899414,grad_norm: 0.9436741382931725, iteration: 367001
loss: 1.0041894912719727,grad_norm: 0.8054869889996663, iteration: 367002
loss: 1.0349444150924683,grad_norm: 0.8733691383001798, iteration: 367003
loss: 1.020952820777893,grad_norm: 0.7962879581822657, iteration: 367004
loss: 1.0560669898986816,grad_norm: 0.9273197222396256, iteration: 367005
loss: 0.9908194541931152,grad_norm: 0.6877225553778382, iteration: 367006
loss: 1.003799319267273,grad_norm: 0.677503169200692, iteration: 367007
loss: 1.0004867315292358,grad_norm: 0.8831993958754596, iteration: 367008
loss: 1.0641894340515137,grad_norm: 0.7591188064630856, iteration: 367009
loss: 1.0140453577041626,grad_norm: 0.874945446975173, iteration: 367010
loss: 1.1387200355529785,grad_norm: 0.9999998449521552, iteration: 367011
loss: 1.004187822341919,grad_norm: 0.7799894249357335, iteration: 367012
loss: 1.0625563859939575,grad_norm: 0.8443066449230148, iteration: 367013
loss: 0.9902203679084778,grad_norm: 0.8014250266362701, iteration: 367014
loss: 1.0135794878005981,grad_norm: 0.9999998926536182, iteration: 367015
loss: 0.993222177028656,grad_norm: 0.8449033715952585, iteration: 367016
loss: 1.0092841386795044,grad_norm: 0.8851412609973973, iteration: 367017
loss: 0.9616724848747253,grad_norm: 0.6974357124286682, iteration: 367018
loss: 0.9967615604400635,grad_norm: 0.8171602192635454, iteration: 367019
loss: 1.0215426683425903,grad_norm: 0.9999993194277379, iteration: 367020
loss: 1.1403818130493164,grad_norm: 0.9999998441991858, iteration: 367021
loss: 1.0419814586639404,grad_norm: 0.9999996428949491, iteration: 367022
loss: 0.9928959012031555,grad_norm: 0.8336115400200517, iteration: 367023
loss: 1.0405387878417969,grad_norm: 0.8091451119645764, iteration: 367024
loss: 1.0021499395370483,grad_norm: 0.778074128135546, iteration: 367025
loss: 1.0149295330047607,grad_norm: 0.9999991833268679, iteration: 367026
loss: 0.9657516479492188,grad_norm: 0.849262182460705, iteration: 367027
loss: 1.018483281135559,grad_norm: 0.7587005221956055, iteration: 367028
loss: 0.9798458814620972,grad_norm: 0.8629318578343419, iteration: 367029
loss: 1.0206691026687622,grad_norm: 0.7823528109413759, iteration: 367030
loss: 1.0682382583618164,grad_norm: 0.9999995044697043, iteration: 367031
loss: 1.002583622932434,grad_norm: 0.7457965900365586, iteration: 367032
loss: 0.97921222448349,grad_norm: 0.9999991523140348, iteration: 367033
loss: 1.138031244277954,grad_norm: 0.8617378379087574, iteration: 367034
loss: 1.1854969263076782,grad_norm: 0.9999995541729035, iteration: 367035
loss: 1.0267179012298584,grad_norm: 0.8051868386842049, iteration: 367036
loss: 0.9985112547874451,grad_norm: 0.9999994516482932, iteration: 367037
loss: 1.0402182340621948,grad_norm: 0.8433861270345081, iteration: 367038
loss: 1.0404735803604126,grad_norm: 0.9240479837458285, iteration: 367039
loss: 1.0323951244354248,grad_norm: 0.9999989524910085, iteration: 367040
loss: 0.9918531775474548,grad_norm: 0.9999997076104453, iteration: 367041
loss: 1.0076260566711426,grad_norm: 0.9999993249643296, iteration: 367042
loss: 0.9832509160041809,grad_norm: 0.8025849164439136, iteration: 367043
loss: 1.0015079975128174,grad_norm: 0.8165424176033254, iteration: 367044
loss: 0.9934276938438416,grad_norm: 0.9999992057331195, iteration: 367045
loss: 1.022905707359314,grad_norm: 0.7675190151057963, iteration: 367046
loss: 1.0254987478256226,grad_norm: 0.8617648849053554, iteration: 367047
loss: 0.9740024209022522,grad_norm: 0.7060674728069787, iteration: 367048
loss: 1.068892002105713,grad_norm: 0.8904554488162215, iteration: 367049
loss: 0.9972319006919861,grad_norm: 0.7281810480128145, iteration: 367050
loss: 1.0797501802444458,grad_norm: 0.999999764417385, iteration: 367051
loss: 1.04921293258667,grad_norm: 0.9999992558427278, iteration: 367052
loss: 1.0547361373901367,grad_norm: 0.9999996323457953, iteration: 367053
loss: 0.9631003737449646,grad_norm: 0.7841694387467196, iteration: 367054
loss: 0.9761537909507751,grad_norm: 0.728733054636411, iteration: 367055
loss: 1.0192458629608154,grad_norm: 0.7808382968051036, iteration: 367056
loss: 1.0007288455963135,grad_norm: 0.9205017312336681, iteration: 367057
loss: 1.0503978729248047,grad_norm: 0.9999992124773921, iteration: 367058
loss: 1.00984525680542,grad_norm: 0.9999989325075704, iteration: 367059
loss: 0.9736871123313904,grad_norm: 0.7256679960505283, iteration: 367060
loss: 1.00101900100708,grad_norm: 0.7387835290482349, iteration: 367061
loss: 0.9927716851234436,grad_norm: 0.7796011951091625, iteration: 367062
loss: 1.0258122682571411,grad_norm: 0.9999997622227812, iteration: 367063
loss: 0.9823736548423767,grad_norm: 0.8111084831806534, iteration: 367064
loss: 1.0200287103652954,grad_norm: 0.8848529710110326, iteration: 367065
loss: 1.0423424243927002,grad_norm: 0.9999997969939588, iteration: 367066
loss: 0.9830746054649353,grad_norm: 0.7059730093876364, iteration: 367067
loss: 1.0175434350967407,grad_norm: 0.8744252438454078, iteration: 367068
loss: 1.0221292972564697,grad_norm: 0.8728793005365941, iteration: 367069
loss: 0.9605627655982971,grad_norm: 0.8052038569034112, iteration: 367070
loss: 0.9718717932701111,grad_norm: 0.7179551253943537, iteration: 367071
loss: 1.0481836795806885,grad_norm: 0.9999996910707605, iteration: 367072
loss: 0.9744463562965393,grad_norm: 0.8509874253384516, iteration: 367073
loss: 0.9968058466911316,grad_norm: 0.9999990731751861, iteration: 367074
loss: 1.008329153060913,grad_norm: 0.8259141558401373, iteration: 367075
loss: 1.008066177368164,grad_norm: 0.9999992456980729, iteration: 367076
loss: 1.00437593460083,grad_norm: 0.8591163112369505, iteration: 367077
loss: 0.9926298260688782,grad_norm: 0.8837267301999465, iteration: 367078
loss: 1.0505950450897217,grad_norm: 0.7514281900985573, iteration: 367079
loss: 0.9976992607116699,grad_norm: 0.7606341165482888, iteration: 367080
loss: 0.9999952912330627,grad_norm: 0.9105205931148654, iteration: 367081
loss: 0.9861866235733032,grad_norm: 0.7819039001239428, iteration: 367082
loss: 0.9842082858085632,grad_norm: 0.8628217358074565, iteration: 367083
loss: 1.0207163095474243,grad_norm: 0.6964025949767639, iteration: 367084
loss: 0.9971215128898621,grad_norm: 0.7345085145273964, iteration: 367085
loss: 0.9934415221214294,grad_norm: 0.9382956993219539, iteration: 367086
loss: 1.0157246589660645,grad_norm: 0.9295861094289392, iteration: 367087
loss: 1.0016980171203613,grad_norm: 0.8509982440730979, iteration: 367088
loss: 1.0221951007843018,grad_norm: 0.8111764106939408, iteration: 367089
loss: 0.9802985191345215,grad_norm: 0.7639473213190433, iteration: 367090
loss: 0.9892262816429138,grad_norm: 0.9999991892331973, iteration: 367091
loss: 0.9740149974822998,grad_norm: 0.7768714314193725, iteration: 367092
loss: 1.0252234935760498,grad_norm: 0.790930647681967, iteration: 367093
loss: 1.0330990552902222,grad_norm: 0.8287355101898288, iteration: 367094
loss: 0.9823548197746277,grad_norm: 0.9328138012080589, iteration: 367095
loss: 0.9873663783073425,grad_norm: 0.8122575814810745, iteration: 367096
loss: 0.9699341058731079,grad_norm: 0.8435736300449348, iteration: 367097
loss: 1.0213594436645508,grad_norm: 0.9742930697071438, iteration: 367098
loss: 0.9906132221221924,grad_norm: 0.8086750453002337, iteration: 367099
loss: 1.0217941999435425,grad_norm: 0.8371863016419303, iteration: 367100
loss: 1.0260448455810547,grad_norm: 0.9343087420010666, iteration: 367101
loss: 1.0065813064575195,grad_norm: 0.7383481909178028, iteration: 367102
loss: 1.0196880102157593,grad_norm: 0.9999996662906012, iteration: 367103
loss: 0.9964815974235535,grad_norm: 0.7809898688805872, iteration: 367104
loss: 1.004618763923645,grad_norm: 0.8028835091529034, iteration: 367105
loss: 1.0269973278045654,grad_norm: 0.926283571355397, iteration: 367106
loss: 0.9681046009063721,grad_norm: 0.7861627909193666, iteration: 367107
loss: 0.9832780361175537,grad_norm: 0.7960757748285577, iteration: 367108
loss: 1.0038495063781738,grad_norm: 0.7765061581484263, iteration: 367109
loss: 1.006899118423462,grad_norm: 0.9999991985130879, iteration: 367110
loss: 1.0119519233703613,grad_norm: 0.9048061952094408, iteration: 367111
loss: 0.9951077699661255,grad_norm: 0.9219226654658867, iteration: 367112
loss: 0.9898556470870972,grad_norm: 0.6878744010843401, iteration: 367113
loss: 0.9810085892677307,grad_norm: 0.8682172379431236, iteration: 367114
loss: 1.004244089126587,grad_norm: 0.81473835601279, iteration: 367115
loss: 1.0218578577041626,grad_norm: 0.9999991388738448, iteration: 367116
loss: 0.9915282726287842,grad_norm: 0.736777708324845, iteration: 367117
loss: 1.0227835178375244,grad_norm: 0.9145453800983018, iteration: 367118
loss: 0.9870012402534485,grad_norm: 0.9358453021259824, iteration: 367119
loss: 0.9910755157470703,grad_norm: 0.8903754885385798, iteration: 367120
loss: 1.0153613090515137,grad_norm: 0.8967398804457397, iteration: 367121
loss: 1.0129352807998657,grad_norm: 0.7997288404228742, iteration: 367122
loss: 1.0165817737579346,grad_norm: 0.9206923429488187, iteration: 367123
loss: 0.9907609224319458,grad_norm: 0.818562428630619, iteration: 367124
loss: 1.0218911170959473,grad_norm: 0.8071712554797074, iteration: 367125
loss: 0.9695547223091125,grad_norm: 0.7355851970153274, iteration: 367126
loss: 1.0054945945739746,grad_norm: 0.8274471342497687, iteration: 367127
loss: 1.0080167055130005,grad_norm: 0.8078501078439998, iteration: 367128
loss: 1.0471864938735962,grad_norm: 0.9264403350887361, iteration: 367129
loss: 0.957305908203125,grad_norm: 0.7387729755684814, iteration: 367130
loss: 1.0040035247802734,grad_norm: 0.9543060618582335, iteration: 367131
loss: 1.0371183156967163,grad_norm: 0.9999991854171482, iteration: 367132
loss: 0.9816184639930725,grad_norm: 0.6747430921133064, iteration: 367133
loss: 1.0288561582565308,grad_norm: 0.8917917663795296, iteration: 367134
loss: 0.9831119775772095,grad_norm: 0.7750777532873454, iteration: 367135
loss: 1.021989345550537,grad_norm: 0.9867255184452783, iteration: 367136
loss: 1.004797339439392,grad_norm: 0.8611853456083806, iteration: 367137
loss: 0.9777704477310181,grad_norm: 0.7347666925558752, iteration: 367138
loss: 1.0101584196090698,grad_norm: 0.7663460940260387, iteration: 367139
loss: 0.9583724737167358,grad_norm: 0.7684351392180596, iteration: 367140
loss: 1.0138798952102661,grad_norm: 0.8160318258002234, iteration: 367141
loss: 1.0212229490280151,grad_norm: 0.7443164930707846, iteration: 367142
loss: 1.0030134916305542,grad_norm: 0.7741288695229084, iteration: 367143
loss: 0.9518744349479675,grad_norm: 0.9142286714128516, iteration: 367144
loss: 1.013473629951477,grad_norm: 0.7283311960081297, iteration: 367145
loss: 0.9721322655677795,grad_norm: 0.8258229671568075, iteration: 367146
loss: 1.0068774223327637,grad_norm: 0.922622077837253, iteration: 367147
loss: 0.9941365122795105,grad_norm: 0.7818846904100885, iteration: 367148
loss: 0.9623591899871826,grad_norm: 0.8667475838137917, iteration: 367149
loss: 1.0628759860992432,grad_norm: 0.9999990836644477, iteration: 367150
loss: 1.0394006967544556,grad_norm: 0.9999992873517561, iteration: 367151
loss: 1.0009689331054688,grad_norm: 0.7088776804134603, iteration: 367152
loss: 1.0763322114944458,grad_norm: 0.9999998446344064, iteration: 367153
loss: 1.0229867696762085,grad_norm: 0.8196570304414014, iteration: 367154
loss: 1.0054514408111572,grad_norm: 0.7936950295606999, iteration: 367155
loss: 1.0207793712615967,grad_norm: 0.8324169822407397, iteration: 367156
loss: 0.9969518780708313,grad_norm: 0.8383693796406871, iteration: 367157
loss: 1.120341181755066,grad_norm: 0.9999999691319021, iteration: 367158
loss: 1.014835000038147,grad_norm: 0.999999116648772, iteration: 367159
loss: 0.9830399751663208,grad_norm: 0.9999998339132631, iteration: 367160
loss: 0.9814416170120239,grad_norm: 0.7179103158609105, iteration: 367161
loss: 1.0465105772018433,grad_norm: 0.9999993177708211, iteration: 367162
loss: 1.0193403959274292,grad_norm: 0.9277102655911323, iteration: 367163
loss: 1.0218536853790283,grad_norm: 0.8301600350218097, iteration: 367164
loss: 0.9849570989608765,grad_norm: 0.7984047157487761, iteration: 367165
loss: 0.9991965293884277,grad_norm: 0.7460588741391514, iteration: 367166
loss: 1.0203436613082886,grad_norm: 0.8354481156785648, iteration: 367167
loss: 0.9913584589958191,grad_norm: 0.7009001214338434, iteration: 367168
loss: 1.0047105550765991,grad_norm: 0.873363838437975, iteration: 367169
loss: 0.9753285050392151,grad_norm: 0.7017280339255858, iteration: 367170
loss: 0.9888147115707397,grad_norm: 0.9999990836145232, iteration: 367171
loss: 1.0178273916244507,grad_norm: 0.960607141413023, iteration: 367172
loss: 1.0608930587768555,grad_norm: 0.9698766606324914, iteration: 367173
loss: 1.0016274452209473,grad_norm: 0.6746611776331456, iteration: 367174
loss: 1.0117781162261963,grad_norm: 0.9304963984154123, iteration: 367175
loss: 1.0041698217391968,grad_norm: 0.8598627987821986, iteration: 367176
loss: 1.0236740112304688,grad_norm: 0.7726676834565467, iteration: 367177
loss: 1.0230355262756348,grad_norm: 0.7891662648075229, iteration: 367178
loss: 0.9639143943786621,grad_norm: 0.6851031920500081, iteration: 367179
loss: 1.0102012157440186,grad_norm: 0.7816149550563105, iteration: 367180
loss: 1.0061628818511963,grad_norm: 0.8665469861688693, iteration: 367181
loss: 1.028050184249878,grad_norm: 0.9152538893074945, iteration: 367182
loss: 0.9431342482566833,grad_norm: 0.9045283298265289, iteration: 367183
loss: 0.9862398505210876,grad_norm: 0.8307426163576824, iteration: 367184
loss: 1.012904405593872,grad_norm: 0.7408464893136268, iteration: 367185
loss: 1.023282766342163,grad_norm: 0.9999996856754836, iteration: 367186
loss: 0.9647645950317383,grad_norm: 0.8980822936654489, iteration: 367187
loss: 0.9920501708984375,grad_norm: 0.8683561285012832, iteration: 367188
loss: 1.0582855939865112,grad_norm: 0.9109953895211381, iteration: 367189
loss: 1.0185719728469849,grad_norm: 0.9498055931680492, iteration: 367190
loss: 1.007710576057434,grad_norm: 0.7785443757538502, iteration: 367191
loss: 0.9972378611564636,grad_norm: 0.8055026292427033, iteration: 367192
loss: 1.0244803428649902,grad_norm: 0.8469082079343949, iteration: 367193
loss: 0.986793041229248,grad_norm: 0.8392748142236273, iteration: 367194
loss: 0.9943819046020508,grad_norm: 0.882410845577105, iteration: 367195
loss: 0.9894760251045227,grad_norm: 0.7861264117693092, iteration: 367196
loss: 1.0473521947860718,grad_norm: 0.7852138631851145, iteration: 367197
loss: 0.9683774709701538,grad_norm: 0.97627507133742, iteration: 367198
loss: 1.0257370471954346,grad_norm: 0.8376988053671386, iteration: 367199
loss: 1.0379836559295654,grad_norm: 0.8309159812465802, iteration: 367200
loss: 0.9949080944061279,grad_norm: 0.731888813824563, iteration: 367201
loss: 0.9885619878768921,grad_norm: 0.8198885930852494, iteration: 367202
loss: 0.9958927035331726,grad_norm: 0.7133938389245668, iteration: 367203
loss: 1.0050268173217773,grad_norm: 0.8630006665066812, iteration: 367204
loss: 0.9933245778083801,grad_norm: 0.9999991585231796, iteration: 367205
loss: 0.9839058518409729,grad_norm: 0.7015107637184868, iteration: 367206
loss: 1.0135782957077026,grad_norm: 0.908080814831034, iteration: 367207
loss: 1.014900803565979,grad_norm: 0.7631061662059458, iteration: 367208
loss: 1.0096001625061035,grad_norm: 0.7540434615114379, iteration: 367209
loss: 0.9987437725067139,grad_norm: 0.860871016262622, iteration: 367210
loss: 1.0006170272827148,grad_norm: 0.915176145888132, iteration: 367211
loss: 0.9886387586593628,grad_norm: 0.8998465575112923, iteration: 367212
loss: 0.9674410820007324,grad_norm: 0.7756189978027267, iteration: 367213
loss: 0.9922555685043335,grad_norm: 0.6643286150905324, iteration: 367214
loss: 1.0492466688156128,grad_norm: 0.8349594608924934, iteration: 367215
loss: 0.9940886497497559,grad_norm: 0.7390305881338587, iteration: 367216
loss: 1.0326071977615356,grad_norm: 0.839182796942745, iteration: 367217
loss: 1.0095665454864502,grad_norm: 0.9583569229276915, iteration: 367218
loss: 1.0313915014266968,grad_norm: 0.7312452668958597, iteration: 367219
loss: 1.0001145601272583,grad_norm: 0.7750936940184359, iteration: 367220
loss: 1.027218222618103,grad_norm: 0.9266416011567314, iteration: 367221
loss: 0.9941639304161072,grad_norm: 0.8583556944938778, iteration: 367222
loss: 1.00538170337677,grad_norm: 0.7462055278713708, iteration: 367223
loss: 1.015514850616455,grad_norm: 0.8600623098719856, iteration: 367224
loss: 1.0284711122512817,grad_norm: 0.7592759901296974, iteration: 367225
loss: 1.0755664110183716,grad_norm: 0.9999997317278162, iteration: 367226
loss: 1.0478757619857788,grad_norm: 0.809739185946882, iteration: 367227
loss: 0.9866670370101929,grad_norm: 0.9999993740777061, iteration: 367228
loss: 1.0164868831634521,grad_norm: 0.6859404875170274, iteration: 367229
loss: 1.00409734249115,grad_norm: 0.6146464412123306, iteration: 367230
loss: 1.0050570964813232,grad_norm: 0.8321936775447403, iteration: 367231
loss: 1.0247247219085693,grad_norm: 0.8030469629825768, iteration: 367232
loss: 0.985504150390625,grad_norm: 0.9678056773161022, iteration: 367233
loss: 0.9629768133163452,grad_norm: 0.8229251191569773, iteration: 367234
loss: 0.982121467590332,grad_norm: 0.8649014687174972, iteration: 367235
loss: 0.9891809225082397,grad_norm: 0.790266526245605, iteration: 367236
loss: 1.051306962966919,grad_norm: 0.8059366764839263, iteration: 367237
loss: 0.9910239577293396,grad_norm: 0.7377666775047843, iteration: 367238
loss: 1.048823595046997,grad_norm: 0.9999996516689863, iteration: 367239
loss: 0.9949371218681335,grad_norm: 0.7174661595686007, iteration: 367240
loss: 1.0028855800628662,grad_norm: 0.8811695760092316, iteration: 367241
loss: 1.0481600761413574,grad_norm: 0.7473784821104343, iteration: 367242
loss: 0.9985857605934143,grad_norm: 0.7898212022142168, iteration: 367243
loss: 1.0169570446014404,grad_norm: 0.7183423220503861, iteration: 367244
loss: 1.0367064476013184,grad_norm: 0.9999995214521068, iteration: 367245
loss: 1.036338210105896,grad_norm: 0.9747101764458461, iteration: 367246
loss: 0.9952632784843445,grad_norm: 0.8434035069320186, iteration: 367247
loss: 1.0032031536102295,grad_norm: 0.9999998608051878, iteration: 367248
loss: 0.9693474769592285,grad_norm: 0.7330431187648706, iteration: 367249
loss: 1.026564359664917,grad_norm: 0.847709376183328, iteration: 367250
loss: 1.0338635444641113,grad_norm: 0.8354609678047139, iteration: 367251
loss: 1.0125293731689453,grad_norm: 0.870890032992573, iteration: 367252
loss: 0.9966134428977966,grad_norm: 0.8640285432711583, iteration: 367253
loss: 0.9563612937927246,grad_norm: 0.9082327153529425, iteration: 367254
loss: 1.0338988304138184,grad_norm: 0.814357885110731, iteration: 367255
loss: 0.9823587536811829,grad_norm: 0.944604424345363, iteration: 367256
loss: 1.016532063484192,grad_norm: 0.9603608684682735, iteration: 367257
loss: 1.0187615156173706,grad_norm: 0.9628830599632323, iteration: 367258
loss: 1.0242921113967896,grad_norm: 0.9999995871664752, iteration: 367259
loss: 0.9793422222137451,grad_norm: 0.8261673474003097, iteration: 367260
loss: 1.005941390991211,grad_norm: 0.8561077908217298, iteration: 367261
loss: 1.0347360372543335,grad_norm: 0.999999235821408, iteration: 367262
loss: 0.9964393377304077,grad_norm: 0.6951117243316691, iteration: 367263
loss: 1.0152060985565186,grad_norm: 0.8116060824884053, iteration: 367264
loss: 0.9945240020751953,grad_norm: 0.7612841446797183, iteration: 367265
loss: 0.9901715517044067,grad_norm: 0.817270695113861, iteration: 367266
loss: 0.9964285492897034,grad_norm: 0.8521617091839616, iteration: 367267
loss: 0.9572576880455017,grad_norm: 0.6535074238957114, iteration: 367268
loss: 0.9739065766334534,grad_norm: 0.8127964426976723, iteration: 367269
loss: 0.9928677082061768,grad_norm: 0.8572615398830257, iteration: 367270
loss: 1.0022672414779663,grad_norm: 0.6022238895787994, iteration: 367271
loss: 0.9767775535583496,grad_norm: 0.8965671339858208, iteration: 367272
loss: 1.031195878982544,grad_norm: 0.8661894148189838, iteration: 367273
loss: 0.9644933342933655,grad_norm: 0.935252408388342, iteration: 367274
loss: 1.0008293390274048,grad_norm: 0.9999995240774324, iteration: 367275
loss: 1.042658805847168,grad_norm: 0.9782576954344138, iteration: 367276
loss: 0.9944462180137634,grad_norm: 0.7969492911910313, iteration: 367277
loss: 1.0068374872207642,grad_norm: 0.7408737371147593, iteration: 367278
loss: 1.0468240976333618,grad_norm: 0.9386007166708793, iteration: 367279
loss: 1.0006672143936157,grad_norm: 0.7808169577163289, iteration: 367280
loss: 0.9718431234359741,grad_norm: 0.8390417168729827, iteration: 367281
loss: 0.9864595532417297,grad_norm: 0.7511200923432644, iteration: 367282
loss: 0.9591443538665771,grad_norm: 0.8399732314105887, iteration: 367283
loss: 0.9797729849815369,grad_norm: 0.764603325238831, iteration: 367284
loss: 0.9789084792137146,grad_norm: 0.928727690333859, iteration: 367285
loss: 0.9567559361457825,grad_norm: 0.755962605234394, iteration: 367286
loss: 0.9743556380271912,grad_norm: 0.8095469793064453, iteration: 367287
loss: 1.0006095170974731,grad_norm: 0.6360767496850711, iteration: 367288
loss: 0.988252580165863,grad_norm: 0.6847554380428577, iteration: 367289
loss: 0.9872579574584961,grad_norm: 0.7752930793105728, iteration: 367290
loss: 0.9699071049690247,grad_norm: 0.8048577344676706, iteration: 367291
loss: 1.0221658945083618,grad_norm: 0.7608936053119756, iteration: 367292
loss: 1.0973044633865356,grad_norm: 0.7851086338706474, iteration: 367293
loss: 1.016312837600708,grad_norm: 0.8630920996221897, iteration: 367294
loss: 0.9842357635498047,grad_norm: 0.6787991792141939, iteration: 367295
loss: 1.0117520093917847,grad_norm: 0.8924919834732631, iteration: 367296
loss: 1.0125868320465088,grad_norm: 0.8840586121187601, iteration: 367297
loss: 1.0139820575714111,grad_norm: 0.9999992078229784, iteration: 367298
loss: 0.9834816455841064,grad_norm: 0.9009752019466599, iteration: 367299
loss: 1.0100547075271606,grad_norm: 0.9999995789102377, iteration: 367300
loss: 1.0312345027923584,grad_norm: 0.7581223583179654, iteration: 367301
loss: 1.0699164867401123,grad_norm: 0.8308449454622087, iteration: 367302
loss: 1.00443696975708,grad_norm: 0.7795955061531317, iteration: 367303
loss: 0.9889306426048279,grad_norm: 0.7889820850018616, iteration: 367304
loss: 1.0268508195877075,grad_norm: 0.9999996553757596, iteration: 367305
loss: 1.0095597505569458,grad_norm: 0.7170751453494004, iteration: 367306
loss: 1.032577633857727,grad_norm: 1.0000000386202035, iteration: 367307
loss: 0.9992002844810486,grad_norm: 0.8238252045509659, iteration: 367308
loss: 1.0137715339660645,grad_norm: 0.7018669449929682, iteration: 367309
loss: 0.9861973524093628,grad_norm: 0.8060623905384174, iteration: 367310
loss: 0.9814208149909973,grad_norm: 0.8050358636617166, iteration: 367311
loss: 1.0192066431045532,grad_norm: 0.7029322194813783, iteration: 367312
loss: 1.0113919973373413,grad_norm: 0.9678380484267706, iteration: 367313
loss: 0.9705160856246948,grad_norm: 0.8238211978188386, iteration: 367314
loss: 1.0252652168273926,grad_norm: 0.6563925184582493, iteration: 367315
loss: 1.0026731491088867,grad_norm: 0.7849836544059399, iteration: 367316
loss: 0.9972116351127625,grad_norm: 0.751163436051417, iteration: 367317
loss: 1.0049060583114624,grad_norm: 0.8437451784879109, iteration: 367318
loss: 0.9674755930900574,grad_norm: 0.8422111845152439, iteration: 367319
loss: 0.991861879825592,grad_norm: 0.7885296557385064, iteration: 367320
loss: 0.9888584613800049,grad_norm: 0.9183238870193173, iteration: 367321
loss: 1.0267163515090942,grad_norm: 0.7718527151548077, iteration: 367322
loss: 1.0002092123031616,grad_norm: 0.7791605543864757, iteration: 367323
loss: 1.0118656158447266,grad_norm: 0.732869753570311, iteration: 367324
loss: 0.9584168195724487,grad_norm: 0.948974227349313, iteration: 367325
loss: 1.029995322227478,grad_norm: 0.9999991165815125, iteration: 367326
loss: 1.1000182628631592,grad_norm: 0.9999993459791864, iteration: 367327
loss: 1.001100778579712,grad_norm: 0.7087475475988498, iteration: 367328
loss: 1.0169808864593506,grad_norm: 0.844977888681931, iteration: 367329
loss: 1.0131584405899048,grad_norm: 0.8491899532782003, iteration: 367330
loss: 1.0138964653015137,grad_norm: 0.7052396773796855, iteration: 367331
loss: 1.0036197900772095,grad_norm: 0.9999995542921484, iteration: 367332
loss: 1.1304385662078857,grad_norm: 0.99098323051335, iteration: 367333
loss: 1.0017019510269165,grad_norm: 0.7589880869201463, iteration: 367334
loss: 1.013039469718933,grad_norm: 0.8251710985238457, iteration: 367335
loss: 1.010080099105835,grad_norm: 0.8489677087798567, iteration: 367336
loss: 0.9888692498207092,grad_norm: 0.8193940391781621, iteration: 367337
loss: 0.9941846132278442,grad_norm: 0.822450650676299, iteration: 367338
loss: 1.0384480953216553,grad_norm: 0.8091309598598551, iteration: 367339
loss: 1.0163121223449707,grad_norm: 0.8454157884654034, iteration: 367340
loss: 1.0166133642196655,grad_norm: 0.7043959874737901, iteration: 367341
loss: 0.971521258354187,grad_norm: 0.8904098394609149, iteration: 367342
loss: 0.9880573749542236,grad_norm: 0.7745311696004024, iteration: 367343
loss: 1.025292992591858,grad_norm: 0.7989043014688102, iteration: 367344
loss: 0.9458551406860352,grad_norm: 0.8395221234182796, iteration: 367345
loss: 0.9949619770050049,grad_norm: 0.6920913041073214, iteration: 367346
loss: 0.9945551753044128,grad_norm: 0.9999990356170984, iteration: 367347
loss: 1.0162450075149536,grad_norm: 0.7430347155787393, iteration: 367348
loss: 1.0351325273513794,grad_norm: 0.7113746192997641, iteration: 367349
loss: 0.9967353940010071,grad_norm: 0.817238964305956, iteration: 367350
loss: 0.9982807636260986,grad_norm: 0.7232499303903819, iteration: 367351
loss: 1.0698179006576538,grad_norm: 0.8638455185664912, iteration: 367352
loss: 0.9580852389335632,grad_norm: 0.8408091116967417, iteration: 367353
loss: 1.033045768737793,grad_norm: 0.9999990877396191, iteration: 367354
loss: 1.0308853387832642,grad_norm: 0.7832972961547463, iteration: 367355
loss: 1.002410650253296,grad_norm: 0.816317339702587, iteration: 367356
loss: 1.0164247751235962,grad_norm: 0.8508289111603085, iteration: 367357
loss: 0.9682641625404358,grad_norm: 0.6533335306167204, iteration: 367358
loss: 1.0178160667419434,grad_norm: 0.9999990400495075, iteration: 367359
loss: 1.019682765007019,grad_norm: 0.9880610009460181, iteration: 367360
loss: 0.9751970171928406,grad_norm: 0.9999988982484851, iteration: 367361
loss: 1.0154328346252441,grad_norm: 0.9476721288768775, iteration: 367362
loss: 1.0203964710235596,grad_norm: 0.907665334046398, iteration: 367363
loss: 1.0264036655426025,grad_norm: 0.7580783213538962, iteration: 367364
loss: 0.9987354874610901,grad_norm: 0.7522860457091123, iteration: 367365
loss: 1.0008282661437988,grad_norm: 0.9174553500388056, iteration: 367366
loss: 0.9935569763183594,grad_norm: 0.6912249646001919, iteration: 367367
loss: 0.9914637207984924,grad_norm: 0.7782608074896381, iteration: 367368
loss: 0.9593691825866699,grad_norm: 0.8341524440790141, iteration: 367369
loss: 0.9973127245903015,grad_norm: 0.9322456000545486, iteration: 367370
loss: 0.9984557628631592,grad_norm: 0.9999991295295524, iteration: 367371
loss: 1.0182610750198364,grad_norm: 0.886568603801829, iteration: 367372
loss: 0.9387942552566528,grad_norm: 0.7630426390395952, iteration: 367373
loss: 1.0032777786254883,grad_norm: 0.999999099049755, iteration: 367374
loss: 0.9702496528625488,grad_norm: 0.8961760672002358, iteration: 367375
loss: 1.0276007652282715,grad_norm: 0.7369744763744834, iteration: 367376
loss: 1.006862759590149,grad_norm: 0.8673869742062221, iteration: 367377
loss: 1.0216258764266968,grad_norm: 0.6739643394944007, iteration: 367378
loss: 1.012798547744751,grad_norm: 0.8237407982062063, iteration: 367379
loss: 0.9739047288894653,grad_norm: 0.9101759134446492, iteration: 367380
loss: 0.9765534400939941,grad_norm: 0.8436737339135512, iteration: 367381
loss: 0.9638904333114624,grad_norm: 0.667217536701095, iteration: 367382
loss: 1.0086642503738403,grad_norm: 0.8435558131032995, iteration: 367383
loss: 0.9842731952667236,grad_norm: 0.7540271509818124, iteration: 367384
loss: 0.9985976815223694,grad_norm: 0.9999999439454644, iteration: 367385
loss: 1.0132068395614624,grad_norm: 0.9999996732337866, iteration: 367386
loss: 0.9829652309417725,grad_norm: 0.8057101267026047, iteration: 367387
loss: 1.0053722858428955,grad_norm: 0.8010075612469575, iteration: 367388
loss: 0.9954847693443298,grad_norm: 0.9166709888611141, iteration: 367389
loss: 1.0088818073272705,grad_norm: 0.7505715541761606, iteration: 367390
loss: 0.9823200702667236,grad_norm: 0.7774205086530367, iteration: 367391
loss: 0.9747316241264343,grad_norm: 0.8264602717154566, iteration: 367392
loss: 1.0114538669586182,grad_norm: 0.9074330047479218, iteration: 367393
loss: 1.0343314409255981,grad_norm: 0.7398648372399426, iteration: 367394
loss: 1.0025994777679443,grad_norm: 0.7958120888308806, iteration: 367395
loss: 1.009882926940918,grad_norm: 0.7121171114145471, iteration: 367396
loss: 1.0152254104614258,grad_norm: 0.8352278823818979, iteration: 367397
loss: 0.9982478022575378,grad_norm: 0.7448304149763014, iteration: 367398
loss: 1.0181124210357666,grad_norm: 0.7826490467208691, iteration: 367399
loss: 1.0096358060836792,grad_norm: 0.7615763437447507, iteration: 367400
loss: 1.0548524856567383,grad_norm: 0.9646617866637038, iteration: 367401
loss: 1.0120313167572021,grad_norm: 0.9283831717716757, iteration: 367402
loss: 1.0014086961746216,grad_norm: 0.7306999803220079, iteration: 367403
loss: 1.0107285976409912,grad_norm: 0.9894264401539544, iteration: 367404
loss: 0.9912948608398438,grad_norm: 0.6852757147628213, iteration: 367405
loss: 0.9759226441383362,grad_norm: 0.7706726307794826, iteration: 367406
loss: 0.9796496033668518,grad_norm: 0.7029458133342922, iteration: 367407
loss: 0.9611522555351257,grad_norm: 0.8221788982049166, iteration: 367408
loss: 1.0088579654693604,grad_norm: 0.7725011839664807, iteration: 367409
loss: 1.0208741426467896,grad_norm: 0.9999990833239955, iteration: 367410
loss: 0.9807378649711609,grad_norm: 0.7844992127904603, iteration: 367411
loss: 1.0145843029022217,grad_norm: 0.9866935601823936, iteration: 367412
loss: 0.9542427659034729,grad_norm: 0.8114555794657411, iteration: 367413
loss: 0.9868987798690796,grad_norm: 0.7826542507095607, iteration: 367414
loss: 1.005851149559021,grad_norm: 0.6580085109112691, iteration: 367415
loss: 1.0098153352737427,grad_norm: 0.6947240784470399, iteration: 367416
loss: 0.9669857025146484,grad_norm: 0.8886997663398646, iteration: 367417
loss: 1.0059562921524048,grad_norm: 0.9004304056447349, iteration: 367418
loss: 1.045037031173706,grad_norm: 0.9440546663526768, iteration: 367419
loss: 1.058517336845398,grad_norm: 0.9999998130388147, iteration: 367420
loss: 1.0551114082336426,grad_norm: 0.9720061919033924, iteration: 367421
loss: 0.9963836669921875,grad_norm: 0.7057537581224134, iteration: 367422
loss: 1.0047420263290405,grad_norm: 0.8896672126910484, iteration: 367423
loss: 1.0310146808624268,grad_norm: 0.9754902187527074, iteration: 367424
loss: 1.0101757049560547,grad_norm: 0.9787223205269492, iteration: 367425
loss: 1.0330758094787598,grad_norm: 0.819685039290884, iteration: 367426
loss: 1.0118356943130493,grad_norm: 0.9999992535373091, iteration: 367427
loss: 0.9783947467803955,grad_norm: 0.8066200804384829, iteration: 367428
loss: 1.0174661874771118,grad_norm: 0.8381135626349037, iteration: 367429
loss: 1.0365681648254395,grad_norm: 0.877468092542062, iteration: 367430
loss: 1.0103304386138916,grad_norm: 0.9199556044924383, iteration: 367431
loss: 0.9812426567077637,grad_norm: 0.9086473956949818, iteration: 367432
loss: 1.0121639966964722,grad_norm: 0.9999996118082012, iteration: 367433
loss: 0.9923394322395325,grad_norm: 0.9726460020934893, iteration: 367434
loss: 1.0031386613845825,grad_norm: 0.7493357330389443, iteration: 367435
loss: 1.0066337585449219,grad_norm: 0.7673046430805548, iteration: 367436
loss: 1.04973304271698,grad_norm: 0.9725693198783154, iteration: 367437
loss: 0.9684456586837769,grad_norm: 0.8326590848175445, iteration: 367438
loss: 0.9971629977226257,grad_norm: 0.808477051905625, iteration: 367439
loss: 0.9937423467636108,grad_norm: 0.914741751454592, iteration: 367440
loss: 0.9996300339698792,grad_norm: 0.8278194814325549, iteration: 367441
loss: 1.0169873237609863,grad_norm: 0.8038621461150627, iteration: 367442
loss: 1.004746913909912,grad_norm: 0.7740707305727896, iteration: 367443
loss: 1.013443112373352,grad_norm: 0.7002660446549321, iteration: 367444
loss: 0.9748510718345642,grad_norm: 0.8744541371298462, iteration: 367445
loss: 1.0234464406967163,grad_norm: 0.8490424676288157, iteration: 367446
loss: 0.994837760925293,grad_norm: 0.875453283292654, iteration: 367447
loss: 1.0183264017105103,grad_norm: 0.928478074497672, iteration: 367448
loss: 1.0325101613998413,grad_norm: 0.9999997994359443, iteration: 367449
loss: 1.012284278869629,grad_norm: 0.7283752192011936, iteration: 367450
loss: 1.02614164352417,grad_norm: 0.9999991825545353, iteration: 367451
loss: 1.0315464735031128,grad_norm: 0.7957355278207284, iteration: 367452
loss: 0.9876828193664551,grad_norm: 0.8001252298930235, iteration: 367453
loss: 0.9497941732406616,grad_norm: 0.999999782135436, iteration: 367454
loss: 0.9979556798934937,grad_norm: 0.7528605953415907, iteration: 367455
loss: 1.0060153007507324,grad_norm: 0.6855592570433371, iteration: 367456
loss: 1.0351144075393677,grad_norm: 0.7897584205983276, iteration: 367457
loss: 0.9827327728271484,grad_norm: 0.8699730992905715, iteration: 367458
loss: 0.9461526274681091,grad_norm: 0.8229326572498334, iteration: 367459
loss: 1.0160548686981201,grad_norm: 0.9999992230391223, iteration: 367460
loss: 0.9885247945785522,grad_norm: 0.739052081087623, iteration: 367461
loss: 0.9888794422149658,grad_norm: 0.765309708593917, iteration: 367462
loss: 0.9939028024673462,grad_norm: 0.6902107615823816, iteration: 367463
loss: 1.0289943218231201,grad_norm: 0.6618294905363517, iteration: 367464
loss: 1.014244794845581,grad_norm: 0.6813026713045002, iteration: 367465
loss: 0.9992967844009399,grad_norm: 0.8410883265893466, iteration: 367466
loss: 1.050123929977417,grad_norm: 0.963855819322093, iteration: 367467
loss: 0.9759126305580139,grad_norm: 0.8217296303323752, iteration: 367468
loss: 1.0365828275680542,grad_norm: 0.7735543020388159, iteration: 367469
loss: 1.0536569356918335,grad_norm: 1.0000000457203848, iteration: 367470
loss: 0.9986225962638855,grad_norm: 0.9999997389190483, iteration: 367471
loss: 1.0482690334320068,grad_norm: 0.9999993861545916, iteration: 367472
loss: 1.015066385269165,grad_norm: 0.7661234647719624, iteration: 367473
loss: 0.9820448160171509,grad_norm: 0.7855273254865676, iteration: 367474
loss: 0.9749160408973694,grad_norm: 0.7447499166497235, iteration: 367475
loss: 1.015419363975525,grad_norm: 0.7151586996893844, iteration: 367476
loss: 0.9826178550720215,grad_norm: 0.8989937952924355, iteration: 367477
loss: 0.9959565997123718,grad_norm: 0.7647800966672574, iteration: 367478
loss: 1.0172278881072998,grad_norm: 0.7959417040588694, iteration: 367479
loss: 1.0188496112823486,grad_norm: 0.7703303007240611, iteration: 367480
loss: 1.0168040990829468,grad_norm: 0.8450635456586717, iteration: 367481
loss: 0.9824709892272949,grad_norm: 0.8003011675524898, iteration: 367482
loss: 0.9738855361938477,grad_norm: 0.8624771560725064, iteration: 367483
loss: 0.9664110541343689,grad_norm: 0.8668056117728657, iteration: 367484
loss: 0.9687195420265198,grad_norm: 0.8695307625755637, iteration: 367485
loss: 1.0034520626068115,grad_norm: 0.6729800266327113, iteration: 367486
loss: 1.0302706956863403,grad_norm: 0.7344035128951693, iteration: 367487
loss: 1.0180262327194214,grad_norm: 0.7106908293063277, iteration: 367488
loss: 0.9963579773902893,grad_norm: 0.7580104778962863, iteration: 367489
loss: 1.0334481000900269,grad_norm: 0.9939566555616468, iteration: 367490
loss: 1.0437170267105103,grad_norm: 0.8721062494398155, iteration: 367491
loss: 0.9915674328804016,grad_norm: 0.9999994671497094, iteration: 367492
loss: 0.9554386734962463,grad_norm: 1.0000000224084136, iteration: 367493
loss: 0.9990484714508057,grad_norm: 0.9740400148206656, iteration: 367494
loss: 0.9689861536026001,grad_norm: 0.9677046071238271, iteration: 367495
loss: 0.978839635848999,grad_norm: 0.7979669274988495, iteration: 367496
loss: 1.027912974357605,grad_norm: 0.8258620618249016, iteration: 367497
loss: 1.0442231893539429,grad_norm: 0.6536394546222688, iteration: 367498
loss: 0.9983362555503845,grad_norm: 0.8047785842283315, iteration: 367499
loss: 1.0008783340454102,grad_norm: 0.7810556791154849, iteration: 367500
loss: 1.001031756401062,grad_norm: 0.8131997604082466, iteration: 367501
loss: 1.0927914381027222,grad_norm: 0.7858317876497507, iteration: 367502
loss: 1.019180417060852,grad_norm: 0.9172892968559165, iteration: 367503
loss: 1.0883780717849731,grad_norm: 0.9993923309576024, iteration: 367504
loss: 0.9617441296577454,grad_norm: 0.9999990373217152, iteration: 367505
loss: 0.9631168246269226,grad_norm: 0.7083310282511243, iteration: 367506
loss: 0.9751437306404114,grad_norm: 0.9999997115223448, iteration: 367507
loss: 0.9810581803321838,grad_norm: 0.7076355712767004, iteration: 367508
loss: 0.9677605628967285,grad_norm: 0.9828065170905935, iteration: 367509
loss: 1.040770173072815,grad_norm: 0.8721135663212699, iteration: 367510
loss: 0.9883794188499451,grad_norm: 0.8516599645271128, iteration: 367511
loss: 0.9891319274902344,grad_norm: 0.884868275580051, iteration: 367512
loss: 1.0571357011795044,grad_norm: 0.9999999428632852, iteration: 367513
loss: 1.0958611965179443,grad_norm: 0.9999990496095218, iteration: 367514
loss: 1.0076112747192383,grad_norm: 0.7913969397708738, iteration: 367515
loss: 1.055127501487732,grad_norm: 0.8214522722976806, iteration: 367516
loss: 1.0272095203399658,grad_norm: 0.8021054608026968, iteration: 367517
loss: 1.003633737564087,grad_norm: 0.826965128989995, iteration: 367518
loss: 0.9557867646217346,grad_norm: 0.8926481174626324, iteration: 367519
loss: 1.0093058347702026,grad_norm: 0.959883538157159, iteration: 367520
loss: 1.0768928527832031,grad_norm: 0.9999989492348552, iteration: 367521
loss: 1.016645073890686,grad_norm: 0.7145697231731765, iteration: 367522
loss: 1.0149214267730713,grad_norm: 0.8721203278731132, iteration: 367523
loss: 1.0064986944198608,grad_norm: 0.9999993797955776, iteration: 367524
loss: 0.9842566847801208,grad_norm: 0.7858782470672009, iteration: 367525
loss: 0.9637857675552368,grad_norm: 0.803196493721112, iteration: 367526
loss: 1.0060251951217651,grad_norm: 0.9999995080770976, iteration: 367527
loss: 1.0030419826507568,grad_norm: 0.7343797954476511, iteration: 367528
loss: 1.0251295566558838,grad_norm: 0.7628373972661298, iteration: 367529
loss: 1.1258790493011475,grad_norm: 0.9999990545670598, iteration: 367530
loss: 1.020568609237671,grad_norm: 0.8147932828761453, iteration: 367531
loss: 1.0289313793182373,grad_norm: 0.9999991857660909, iteration: 367532
loss: 0.9975013136863708,grad_norm: 0.9999999114798322, iteration: 367533
loss: 1.038453221321106,grad_norm: 0.999999389555801, iteration: 367534
loss: 0.9768649339675903,grad_norm: 0.9999993428429591, iteration: 367535
loss: 0.9827041029930115,grad_norm: 0.8582480833368757, iteration: 367536
loss: 1.0144641399383545,grad_norm: 0.9999995190118512, iteration: 367537
loss: 1.0057071447372437,grad_norm: 0.9999990666786402, iteration: 367538
loss: 1.015799641609192,grad_norm: 0.8529492318894981, iteration: 367539
loss: 1.0223201513290405,grad_norm: 0.7950551141239175, iteration: 367540
loss: 0.9792861342430115,grad_norm: 0.9999992533341346, iteration: 367541
loss: 0.9714762568473816,grad_norm: 0.9999993407555645, iteration: 367542
loss: 1.0168322324752808,grad_norm: 0.821621086502365, iteration: 367543
loss: 1.03580904006958,grad_norm: 0.7989908650374906, iteration: 367544
loss: 0.9733350276947021,grad_norm: 0.9999997702253735, iteration: 367545
loss: 0.9697270393371582,grad_norm: 0.7228518253233164, iteration: 367546
loss: 1.0416333675384521,grad_norm: 0.8550227378993517, iteration: 367547
loss: 1.0113967657089233,grad_norm: 0.9667461289278882, iteration: 367548
loss: 0.9899083971977234,grad_norm: 0.8912989579130541, iteration: 367549
loss: 1.0718443393707275,grad_norm: 0.9999992513078924, iteration: 367550
loss: 1.006499171257019,grad_norm: 0.9989547609196896, iteration: 367551
loss: 1.028134822845459,grad_norm: 0.7549091066073436, iteration: 367552
loss: 1.039157748222351,grad_norm: 0.9050652104894622, iteration: 367553
loss: 0.9837172627449036,grad_norm: 0.631454670885695, iteration: 367554
loss: 1.0521365404129028,grad_norm: 0.9999989501450361, iteration: 367555
loss: 0.9907923936843872,grad_norm: 0.8223812014598946, iteration: 367556
loss: 0.9884049892425537,grad_norm: 0.8409905367166293, iteration: 367557
loss: 0.9882612824440002,grad_norm: 0.7761313459717382, iteration: 367558
loss: 1.0567787885665894,grad_norm: 1.0000000180405375, iteration: 367559
loss: 1.0046671628952026,grad_norm: 0.929859028360704, iteration: 367560
loss: 1.0019118785858154,grad_norm: 0.999999050434173, iteration: 367561
loss: 0.9850948452949524,grad_norm: 0.8329115159596204, iteration: 367562
loss: 0.989018976688385,grad_norm: 0.7956530198551682, iteration: 367563
loss: 0.9993133544921875,grad_norm: 0.8519911518121343, iteration: 367564
loss: 1.0141245126724243,grad_norm: 0.9003627595026146, iteration: 367565
loss: 0.9942467212677002,grad_norm: 0.9724511867035921, iteration: 367566
loss: 1.0152809619903564,grad_norm: 0.9035191097779541, iteration: 367567
loss: 1.0333521366119385,grad_norm: 0.7877235582446585, iteration: 367568
loss: 0.9647313356399536,grad_norm: 0.8767441906737249, iteration: 367569
loss: 1.0065977573394775,grad_norm: 0.9576475792491997, iteration: 367570
loss: 1.0078094005584717,grad_norm: 0.7969395078868615, iteration: 367571
loss: 1.009455680847168,grad_norm: 0.8041465935247781, iteration: 367572
loss: 1.022855281829834,grad_norm: 0.7916858716984764, iteration: 367573
loss: 1.0084010362625122,grad_norm: 0.8494895504893626, iteration: 367574
loss: 1.006636381149292,grad_norm: 0.8472773253739335, iteration: 367575
loss: 1.0329111814498901,grad_norm: 0.8826857574496411, iteration: 367576
loss: 0.9953251481056213,grad_norm: 0.7571504303798153, iteration: 367577
loss: 0.9967128038406372,grad_norm: 0.7456184204508558, iteration: 367578
loss: 1.074912428855896,grad_norm: 0.8419948246245198, iteration: 367579
loss: 1.0343589782714844,grad_norm: 0.8583986277192553, iteration: 367580
loss: 1.0521528720855713,grad_norm: 0.7514530794056391, iteration: 367581
loss: 1.037973165512085,grad_norm: 0.7660774941636654, iteration: 367582
loss: 0.983950674533844,grad_norm: 0.9511190754995132, iteration: 367583
loss: 1.0036324262619019,grad_norm: 0.7367062363424645, iteration: 367584
loss: 0.996260404586792,grad_norm: 0.729550982154981, iteration: 367585
loss: 1.0219374895095825,grad_norm: 0.7697004594317715, iteration: 367586
loss: 1.017072081565857,grad_norm: 0.8542674315503332, iteration: 367587
loss: 1.0204014778137207,grad_norm: 0.7794067886094947, iteration: 367588
loss: 0.9951730966567993,grad_norm: 0.6943639771838674, iteration: 367589
loss: 1.0054914951324463,grad_norm: 0.8044119147144072, iteration: 367590
loss: 1.033258080482483,grad_norm: 0.9499744073789486, iteration: 367591
loss: 1.0004901885986328,grad_norm: 0.7164320611224188, iteration: 367592
loss: 0.954403281211853,grad_norm: 0.7330932712363362, iteration: 367593
loss: 0.9942837953567505,grad_norm: 0.8849922132188895, iteration: 367594
loss: 1.0425151586532593,grad_norm: 0.9999992875110618, iteration: 367595
loss: 1.0364211797714233,grad_norm: 0.777762185852044, iteration: 367596
loss: 1.2543001174926758,grad_norm: 0.99999984971657, iteration: 367597
loss: 1.0037624835968018,grad_norm: 0.7271299185119953, iteration: 367598
loss: 0.9865697622299194,grad_norm: 0.7709689632602293, iteration: 367599
loss: 1.019625186920166,grad_norm: 0.7336929969001852, iteration: 367600
loss: 1.0027424097061157,grad_norm: 0.7833183087003402, iteration: 367601
loss: 1.0026438236236572,grad_norm: 0.8976013869658026, iteration: 367602
loss: 0.9885401129722595,grad_norm: 0.9947448462944105, iteration: 367603
loss: 1.058524489402771,grad_norm: 0.8125519611877687, iteration: 367604
loss: 0.9753939509391785,grad_norm: 0.9999991107214838, iteration: 367605
loss: 1.0213918685913086,grad_norm: 0.8142601133190556, iteration: 367606
loss: 1.0346616506576538,grad_norm: 0.7752766343934158, iteration: 367607
loss: 0.9788513779640198,grad_norm: 0.7152497622400199, iteration: 367608
loss: 0.9905675649642944,grad_norm: 0.7572348961977354, iteration: 367609
loss: 0.9436072707176208,grad_norm: 0.9999989769941832, iteration: 367610
loss: 1.0173816680908203,grad_norm: 0.7930599142074045, iteration: 367611
loss: 0.9879354238510132,grad_norm: 0.7687831357856948, iteration: 367612
loss: 1.0004013776779175,grad_norm: 0.8809959394374128, iteration: 367613
loss: 1.0480575561523438,grad_norm: 0.7802146589316479, iteration: 367614
loss: 0.9440014362335205,grad_norm: 0.7561203605377492, iteration: 367615
loss: 0.9875337481498718,grad_norm: 0.7813071740736857, iteration: 367616
loss: 1.068650484085083,grad_norm: 0.6982612500560015, iteration: 367617
loss: 1.025523066520691,grad_norm: 0.9999993510052144, iteration: 367618
loss: 1.0259474515914917,grad_norm: 0.7767935523753836, iteration: 367619
loss: 0.9786221385002136,grad_norm: 0.8196323942331777, iteration: 367620
loss: 0.973588764667511,grad_norm: 0.7916127370508318, iteration: 367621
loss: 1.0310479402542114,grad_norm: 0.9755719143903064, iteration: 367622
loss: 1.0019901990890503,grad_norm: 0.8200301877175928, iteration: 367623
loss: 1.028902292251587,grad_norm: 0.888107819604915, iteration: 367624
loss: 1.0743598937988281,grad_norm: 0.9999992474174532, iteration: 367625
loss: 0.9986112713813782,grad_norm: 0.8188171353481644, iteration: 367626
loss: 1.0766209363937378,grad_norm: 0.9999995158075845, iteration: 367627
loss: 0.9943214058876038,grad_norm: 0.965948623571658, iteration: 367628
loss: 1.0010322332382202,grad_norm: 0.9925052053225942, iteration: 367629
loss: 0.9823299646377563,grad_norm: 0.9999993036050638, iteration: 367630
loss: 0.951099693775177,grad_norm: 0.9625876067627481, iteration: 367631
loss: 1.0613601207733154,grad_norm: 0.7352717956453614, iteration: 367632
loss: 1.003037452697754,grad_norm: 0.9999990565298146, iteration: 367633
loss: 1.0439821481704712,grad_norm: 0.7821018876364547, iteration: 367634
loss: 1.0158976316452026,grad_norm: 0.6179803484494956, iteration: 367635
loss: 1.0155636072158813,grad_norm: 0.7510044550689593, iteration: 367636
loss: 1.00309157371521,grad_norm: 0.9999990989119854, iteration: 367637
loss: 1.0337262153625488,grad_norm: 0.7482130391621411, iteration: 367638
loss: 0.9404970407485962,grad_norm: 0.8344187639495295, iteration: 367639
loss: 1.013648271560669,grad_norm: 0.691313048533125, iteration: 367640
loss: 1.0386385917663574,grad_norm: 0.9999996465756598, iteration: 367641
loss: 1.0439509153366089,grad_norm: 0.7168791168331029, iteration: 367642
loss: 0.9720675945281982,grad_norm: 0.9999997371747625, iteration: 367643
loss: 1.0246167182922363,grad_norm: 0.9999996925792461, iteration: 367644
loss: 0.9979419708251953,grad_norm: 0.7557131095020542, iteration: 367645
loss: 1.1257245540618896,grad_norm: 0.9999993292839049, iteration: 367646
loss: 0.9596271514892578,grad_norm: 0.9008424393131438, iteration: 367647
loss: 1.0083811283111572,grad_norm: 0.9104823797247587, iteration: 367648
loss: 0.9990442395210266,grad_norm: 0.8701040352947197, iteration: 367649
loss: 0.9980372786521912,grad_norm: 0.9031575072475646, iteration: 367650
loss: 0.9861654043197632,grad_norm: 0.7702370700417778, iteration: 367651
loss: 0.9610004425048828,grad_norm: 0.7742751767900509, iteration: 367652
loss: 1.0204646587371826,grad_norm: 0.7869761342340507, iteration: 367653
loss: 1.033522129058838,grad_norm: 0.8289893408953013, iteration: 367654
loss: 0.9721912741661072,grad_norm: 0.8857869358976878, iteration: 367655
loss: 1.015804409980774,grad_norm: 0.8170499272933958, iteration: 367656
loss: 1.0040769577026367,grad_norm: 0.7726684820477644, iteration: 367657
loss: 0.981367826461792,grad_norm: 0.7698583234295969, iteration: 367658
loss: 0.9868731498718262,grad_norm: 0.8351545253976486, iteration: 367659
loss: 1.0180975198745728,grad_norm: 0.8778309066033242, iteration: 367660
loss: 1.0316094160079956,grad_norm: 0.8607303292115086, iteration: 367661
loss: 0.9897556304931641,grad_norm: 0.7600046983341449, iteration: 367662
loss: 0.9893960952758789,grad_norm: 0.826742653695878, iteration: 367663
loss: 1.0181984901428223,grad_norm: 0.8173551894551991, iteration: 367664
loss: 1.024004578590393,grad_norm: 0.9198395036947217, iteration: 367665
loss: 1.0153708457946777,grad_norm: 0.9999998849829196, iteration: 367666
loss: 0.9505382180213928,grad_norm: 0.6620977216238105, iteration: 367667
loss: 0.9898016452789307,grad_norm: 0.7686142638212526, iteration: 367668
loss: 0.9900625348091125,grad_norm: 0.7406059387091298, iteration: 367669
loss: 1.0090758800506592,grad_norm: 0.7625043251742081, iteration: 367670
loss: 0.9962369799613953,grad_norm: 0.8542004961687983, iteration: 367671
loss: 0.9712467789649963,grad_norm: 0.8609665715849874, iteration: 367672
loss: 0.97715824842453,grad_norm: 0.8695153651499407, iteration: 367673
loss: 0.9847814440727234,grad_norm: 0.7686282077434754, iteration: 367674
loss: 0.9844849705696106,grad_norm: 0.8021959839330662, iteration: 367675
loss: 1.0040998458862305,grad_norm: 0.7469465450769986, iteration: 367676
loss: 1.0011790990829468,grad_norm: 0.8248654737774674, iteration: 367677
loss: 1.0483126640319824,grad_norm: 0.999999923677701, iteration: 367678
loss: 0.9833354949951172,grad_norm: 0.7984069755858711, iteration: 367679
loss: 1.0084781646728516,grad_norm: 0.9313402021381153, iteration: 367680
loss: 1.0188192129135132,grad_norm: 0.9999990073825994, iteration: 367681
loss: 0.9436136484146118,grad_norm: 0.8251175016292192, iteration: 367682
loss: 1.0053561925888062,grad_norm: 0.7767554659622251, iteration: 367683
loss: 1.0304878950119019,grad_norm: 0.7841031577069182, iteration: 367684
loss: 1.002281665802002,grad_norm: 0.8167607294863073, iteration: 367685
loss: 1.0066945552825928,grad_norm: 0.6414907193039797, iteration: 367686
loss: 0.9871085286140442,grad_norm: 0.8030970941747073, iteration: 367687
loss: 1.0058945417404175,grad_norm: 0.9999990297224296, iteration: 367688
loss: 0.9592424631118774,grad_norm: 0.793673636864653, iteration: 367689
loss: 0.9995835423469543,grad_norm: 0.7143166031566145, iteration: 367690
loss: 0.9754109978675842,grad_norm: 0.9999991501826149, iteration: 367691
loss: 1.01618230342865,grad_norm: 0.7380361303802768, iteration: 367692
loss: 0.9719477891921997,grad_norm: 0.8300644229376345, iteration: 367693
loss: 0.9941465854644775,grad_norm: 0.8254845366887311, iteration: 367694
loss: 0.9705340266227722,grad_norm: 0.8196352094929622, iteration: 367695
loss: 1.0033239126205444,grad_norm: 0.9868772279258006, iteration: 367696
loss: 0.9857553243637085,grad_norm: 0.8029638692314539, iteration: 367697
loss: 1.0315128564834595,grad_norm: 0.8166213525788837, iteration: 367698
loss: 1.0225616693496704,grad_norm: 0.8538242579480982, iteration: 367699
loss: 1.0000015497207642,grad_norm: 0.9475512778636751, iteration: 367700
loss: 0.9949437379837036,grad_norm: 0.9999992342515518, iteration: 367701
loss: 0.9927945137023926,grad_norm: 0.7499107391606056, iteration: 367702
loss: 1.0906481742858887,grad_norm: 0.9999995474001513, iteration: 367703
loss: 1.0218443870544434,grad_norm: 0.801473961787125, iteration: 367704
loss: 0.9658942222595215,grad_norm: 0.7876590131165607, iteration: 367705
loss: 1.0023022890090942,grad_norm: 0.8600931739359408, iteration: 367706
loss: 1.0104426145553589,grad_norm: 0.7364082193562188, iteration: 367707
loss: 0.9913747310638428,grad_norm: 0.7822582405491674, iteration: 367708
loss: 1.004668951034546,grad_norm: 0.8168474940214259, iteration: 367709
loss: 0.9943432807922363,grad_norm: 0.828103956682712, iteration: 367710
loss: 1.0016276836395264,grad_norm: 0.8369124017434624, iteration: 367711
loss: 0.9893307685852051,grad_norm: 0.7115270649535881, iteration: 367712
loss: 0.9981210827827454,grad_norm: 0.9127080211442157, iteration: 367713
loss: 1.0056498050689697,grad_norm: 0.9327356175775132, iteration: 367714
loss: 1.0280699729919434,grad_norm: 0.9404800054842596, iteration: 367715
loss: 1.0046879053115845,grad_norm: 0.9999991909748822, iteration: 367716
loss: 1.011006236076355,grad_norm: 0.8907759144035845, iteration: 367717
loss: 0.9961342215538025,grad_norm: 0.7730776084006634, iteration: 367718
loss: 1.0270073413848877,grad_norm: 0.9287228515120999, iteration: 367719
loss: 0.9922643899917603,grad_norm: 0.8174838558160059, iteration: 367720
loss: 1.0064964294433594,grad_norm: 0.9725293312782176, iteration: 367721
loss: 1.0914407968521118,grad_norm: 0.7588043392406949, iteration: 367722
loss: 0.9935665130615234,grad_norm: 0.733875957435149, iteration: 367723
loss: 1.0020408630371094,grad_norm: 0.8068439894361337, iteration: 367724
loss: 1.0001846551895142,grad_norm: 0.8673162484571503, iteration: 367725
loss: 1.0195777416229248,grad_norm: 0.8439035997745045, iteration: 367726
loss: 0.9629238843917847,grad_norm: 0.8417675780014862, iteration: 367727
loss: 1.0915565490722656,grad_norm: 0.9999993670199422, iteration: 367728
loss: 1.0073083639144897,grad_norm: 0.788605425013586, iteration: 367729
loss: 1.0174448490142822,grad_norm: 0.7933758029318179, iteration: 367730
loss: 0.9815536141395569,grad_norm: 0.9999990146238429, iteration: 367731
loss: 0.9616425633430481,grad_norm: 0.7017010779533116, iteration: 367732
loss: 0.9837743639945984,grad_norm: 0.7942519769108523, iteration: 367733
loss: 0.9969396591186523,grad_norm: 0.7157481772939983, iteration: 367734
loss: 1.0234570503234863,grad_norm: 0.7890711546940137, iteration: 367735
loss: 1.0312196016311646,grad_norm: 0.8385039315301145, iteration: 367736
loss: 1.012332558631897,grad_norm: 0.8250043976712034, iteration: 367737
loss: 1.0046666860580444,grad_norm: 0.7720825434731183, iteration: 367738
loss: 0.9993048310279846,grad_norm: 0.9999991302707976, iteration: 367739
loss: 1.0266237258911133,grad_norm: 0.6956909972059043, iteration: 367740
loss: 1.0106357336044312,grad_norm: 0.8344745351865714, iteration: 367741
loss: 1.0135912895202637,grad_norm: 0.9120814995474561, iteration: 367742
loss: 0.9716363549232483,grad_norm: 0.8323043971864306, iteration: 367743
loss: 0.9583173394203186,grad_norm: 0.8418188018697466, iteration: 367744
loss: 1.0025217533111572,grad_norm: 0.9509816496501082, iteration: 367745
loss: 0.9924985766410828,grad_norm: 0.951545346359393, iteration: 367746
loss: 0.988530695438385,grad_norm: 0.6847390102885008, iteration: 367747
loss: 1.197142243385315,grad_norm: 0.9999996031289491, iteration: 367748
loss: 0.9756154417991638,grad_norm: 0.8528248659751919, iteration: 367749
loss: 1.0114303827285767,grad_norm: 0.7481615351947973, iteration: 367750
loss: 0.9774527549743652,grad_norm: 0.7425834216014123, iteration: 367751
loss: 0.9669056534767151,grad_norm: 0.8085674384193887, iteration: 367752
loss: 1.0030558109283447,grad_norm: 0.6556115775947521, iteration: 367753
loss: 1.0342843532562256,grad_norm: 0.8433356404946675, iteration: 367754
loss: 0.9659764766693115,grad_norm: 0.9360140025590008, iteration: 367755
loss: 1.0192101001739502,grad_norm: 0.8189135721033496, iteration: 367756
loss: 1.0007712841033936,grad_norm: 0.9108494338296503, iteration: 367757
loss: 1.0386182069778442,grad_norm: 0.8850530655306538, iteration: 367758
loss: 0.9785062670707703,grad_norm: 0.7787467141447859, iteration: 367759
loss: 1.0982120037078857,grad_norm: 0.9999992310710247, iteration: 367760
loss: 0.9820889234542847,grad_norm: 0.7379320825146672, iteration: 367761
loss: 1.0179773569107056,grad_norm: 0.8370886003556062, iteration: 367762
loss: 1.1565837860107422,grad_norm: 0.8711065280280361, iteration: 367763
loss: 1.0495210886001587,grad_norm: 0.9238231771419616, iteration: 367764
loss: 1.021180272102356,grad_norm: 0.9999990392776252, iteration: 367765
loss: 1.1092894077301025,grad_norm: 0.9999995916682073, iteration: 367766
loss: 0.9691866636276245,grad_norm: 0.9999991633169206, iteration: 367767
loss: 0.9853150844573975,grad_norm: 0.8837412416615584, iteration: 367768
loss: 0.9929460287094116,grad_norm: 0.7751318532054334, iteration: 367769
loss: 1.0927644968032837,grad_norm: 0.8567285987795374, iteration: 367770
loss: 1.0143791437149048,grad_norm: 0.923743401190935, iteration: 367771
loss: 0.964087963104248,grad_norm: 0.8473760649757347, iteration: 367772
loss: 1.0228803157806396,grad_norm: 0.9579010640756679, iteration: 367773
loss: 1.0243537425994873,grad_norm: 0.9942407687838998, iteration: 367774
loss: 1.2508922815322876,grad_norm: 0.99999994446916, iteration: 367775
loss: 1.0034880638122559,grad_norm: 0.7949939727722143, iteration: 367776
loss: 1.0038213729858398,grad_norm: 0.8203191164223633, iteration: 367777
loss: 1.0522758960723877,grad_norm: 0.9339283124240274, iteration: 367778
loss: 0.9536945223808289,grad_norm: 0.8367852609335373, iteration: 367779
loss: 1.0091547966003418,grad_norm: 0.713049936758764, iteration: 367780
loss: 0.9722706079483032,grad_norm: 0.7956988320211364, iteration: 367781
loss: 0.9763986468315125,grad_norm: 0.8220793779298237, iteration: 367782
loss: 1.0286717414855957,grad_norm: 0.737534362447616, iteration: 367783
loss: 0.9922143220901489,grad_norm: 0.8614457885999484, iteration: 367784
loss: 0.9633859992027283,grad_norm: 0.7581063792331076, iteration: 367785
loss: 0.972493052482605,grad_norm: 0.9265879623669828, iteration: 367786
loss: 0.990055501461029,grad_norm: 0.8656995075989732, iteration: 367787
loss: 0.990106463432312,grad_norm: 0.9908727787489905, iteration: 367788
loss: 1.031279444694519,grad_norm: 1.0000000087403254, iteration: 367789
loss: 0.964087724685669,grad_norm: 0.7631393830913784, iteration: 367790
loss: 0.9840203523635864,grad_norm: 0.999999080252337, iteration: 367791
loss: 1.0648765563964844,grad_norm: 0.8498114585104942, iteration: 367792
loss: 0.9843041896820068,grad_norm: 0.9999998331939209, iteration: 367793
loss: 1.0081841945648193,grad_norm: 0.7966311192561071, iteration: 367794
loss: 1.0052385330200195,grad_norm: 0.8375105392594406, iteration: 367795
loss: 0.9772490859031677,grad_norm: 0.7986178424505206, iteration: 367796
loss: 1.0314515829086304,grad_norm: 0.7917212767222693, iteration: 367797
loss: 0.9824626445770264,grad_norm: 0.7150269669037081, iteration: 367798
loss: 0.9953336715698242,grad_norm: 0.9999990695750607, iteration: 367799
loss: 1.0271258354187012,grad_norm: 0.7658862701252481, iteration: 367800
loss: 1.0201525688171387,grad_norm: 0.7802101026149856, iteration: 367801
loss: 0.9835020303726196,grad_norm: 0.8827486984022418, iteration: 367802
loss: 0.9730959534645081,grad_norm: 0.8238294105374683, iteration: 367803
loss: 1.0189846754074097,grad_norm: 0.7238988566784448, iteration: 367804
loss: 1.0435348749160767,grad_norm: 0.9999997353754161, iteration: 367805
loss: 1.0004630088806152,grad_norm: 0.8467981112478866, iteration: 367806
loss: 1.0287216901779175,grad_norm: 0.7279264445624544, iteration: 367807
loss: 1.0010011196136475,grad_norm: 0.8255483124913392, iteration: 367808
loss: 1.025983452796936,grad_norm: 0.8525789891955217, iteration: 367809
loss: 1.0161843299865723,grad_norm: 0.8520364099887993, iteration: 367810
loss: 1.0193884372711182,grad_norm: 0.9529594127097741, iteration: 367811
loss: 1.0144054889678955,grad_norm: 0.8097399596602705, iteration: 367812
loss: 1.0366941690444946,grad_norm: 0.8973488733042955, iteration: 367813
loss: 0.9905835390090942,grad_norm: 0.9295680612212044, iteration: 367814
loss: 1.0754977464675903,grad_norm: 0.8393725549483546, iteration: 367815
loss: 0.9912179708480835,grad_norm: 0.8539726288749547, iteration: 367816
loss: 1.0601332187652588,grad_norm: 0.9999991912390338, iteration: 367817
loss: 0.9952794909477234,grad_norm: 0.6866021990846903, iteration: 367818
loss: 1.0323964357376099,grad_norm: 0.8389306779018324, iteration: 367819
loss: 1.1080032587051392,grad_norm: 0.9999995004526372, iteration: 367820
loss: 1.0074450969696045,grad_norm: 0.8247288721090597, iteration: 367821
loss: 1.0023053884506226,grad_norm: 0.9250785153357481, iteration: 367822
loss: 1.0298357009887695,grad_norm: 0.6704189273925888, iteration: 367823
loss: 1.0315793752670288,grad_norm: 0.8051919385854511, iteration: 367824
loss: 0.9773105978965759,grad_norm: 0.8032825664880424, iteration: 367825
loss: 1.1264690160751343,grad_norm: 0.999999564811712, iteration: 367826
loss: 0.9749460816383362,grad_norm: 0.6105011447680223, iteration: 367827
loss: 1.0068140029907227,grad_norm: 0.7702707453312376, iteration: 367828
loss: 1.0598379373550415,grad_norm: 0.9999991490535247, iteration: 367829
loss: 1.0441454648971558,grad_norm: 0.8937435971453312, iteration: 367830
loss: 1.03316068649292,grad_norm: 0.999999446385049, iteration: 367831
loss: 1.0042327642440796,grad_norm: 0.6916051218236035, iteration: 367832
loss: 1.0502017736434937,grad_norm: 0.8163698221370199, iteration: 367833
loss: 0.9811294674873352,grad_norm: 0.9999991392998551, iteration: 367834
loss: 0.9913277626037598,grad_norm: 0.9540853037397664, iteration: 367835
loss: 0.9874098300933838,grad_norm: 0.8575947057654456, iteration: 367836
loss: 1.0365346670150757,grad_norm: 0.8768946840010077, iteration: 367837
loss: 0.9916526675224304,grad_norm: 0.8083320535290619, iteration: 367838
loss: 1.0355784893035889,grad_norm: 0.8881949650330876, iteration: 367839
loss: 1.0210998058319092,grad_norm: 0.9999997566509111, iteration: 367840
loss: 1.0167627334594727,grad_norm: 0.8830108140899747, iteration: 367841
loss: 0.9758692383766174,grad_norm: 0.6922220295699063, iteration: 367842
loss: 0.9629239439964294,grad_norm: 0.7840889131849592, iteration: 367843
loss: 1.0442510843276978,grad_norm: 0.9853547753380153, iteration: 367844
loss: 1.0462533235549927,grad_norm: 0.7540298772846171, iteration: 367845
loss: 1.00824773311615,grad_norm: 0.8880340241610827, iteration: 367846
loss: 0.9716818332672119,grad_norm: 0.9608770207517159, iteration: 367847
loss: 1.070900559425354,grad_norm: 0.9050419963874248, iteration: 367848
loss: 1.1433545351028442,grad_norm: 0.9999992836745786, iteration: 367849
loss: 0.9705645442008972,grad_norm: 0.769412655762062, iteration: 367850
loss: 0.9874054193496704,grad_norm: 0.829108198340167, iteration: 367851
loss: 0.9907362461090088,grad_norm: 0.8498355859460694, iteration: 367852
loss: 0.9818947911262512,grad_norm: 0.7787989743455632, iteration: 367853
loss: 0.982714056968689,grad_norm: 0.8542382291816566, iteration: 367854
loss: 1.0090328454971313,grad_norm: 0.9258313044853725, iteration: 367855
loss: 1.0481938123703003,grad_norm: 0.9999999023631527, iteration: 367856
loss: 1.0087441205978394,grad_norm: 0.8414011567968748, iteration: 367857
loss: 0.9843685626983643,grad_norm: 0.7656268962212593, iteration: 367858
loss: 1.0357686281204224,grad_norm: 0.7906327512436414, iteration: 367859
loss: 1.0081026554107666,grad_norm: 0.7190396329553047, iteration: 367860
loss: 1.00519859790802,grad_norm: 0.999999109926318, iteration: 367861
loss: 1.0150617361068726,grad_norm: 0.8194266789585107, iteration: 367862
loss: 0.9907538294792175,grad_norm: 0.8988509855441382, iteration: 367863
loss: 1.0187854766845703,grad_norm: 0.6797528688338721, iteration: 367864
loss: 0.9732058048248291,grad_norm: 0.7142605202918182, iteration: 367865
loss: 0.9894189834594727,grad_norm: 0.7679263532802202, iteration: 367866
loss: 0.992442786693573,grad_norm: 0.7234887493423919, iteration: 367867
loss: 0.9871007204055786,grad_norm: 0.8947916217718054, iteration: 367868
loss: 0.9776771664619446,grad_norm: 0.9017828973355444, iteration: 367869
loss: 1.018825888633728,grad_norm: 0.8118303652680676, iteration: 367870
loss: 0.9916313290596008,grad_norm: 0.8680352819082997, iteration: 367871
loss: 0.9623709917068481,grad_norm: 0.7172523597698983, iteration: 367872
loss: 0.9815171957015991,grad_norm: 0.7436265156930632, iteration: 367873
loss: 1.0740485191345215,grad_norm: 0.8555823660051298, iteration: 367874
loss: 1.0453704595565796,grad_norm: 0.8180737926254795, iteration: 367875
loss: 0.9674379825592041,grad_norm: 0.8594655048863425, iteration: 367876
loss: 0.9953356385231018,grad_norm: 0.8518655187910762, iteration: 367877
loss: 1.0955841541290283,grad_norm: 0.9565029974768156, iteration: 367878
loss: 0.985082745552063,grad_norm: 0.81896728633316, iteration: 367879
loss: 1.023909568786621,grad_norm: 0.8664051739873943, iteration: 367880
loss: 0.9698995351791382,grad_norm: 0.8534894793680031, iteration: 367881
loss: 0.9767918586730957,grad_norm: 0.7715888022830821, iteration: 367882
loss: 1.0824143886566162,grad_norm: 0.9999991349934512, iteration: 367883
loss: 1.0047142505645752,grad_norm: 0.9065467085341314, iteration: 367884
loss: 1.0256577730178833,grad_norm: 0.7428968631088262, iteration: 367885
loss: 1.072805643081665,grad_norm: 0.9999996601711105, iteration: 367886
loss: 0.9798863530158997,grad_norm: 0.7716553075240886, iteration: 367887
loss: 0.9632253050804138,grad_norm: 0.8619548064259449, iteration: 367888
loss: 0.95794278383255,grad_norm: 0.9999991829870284, iteration: 367889
loss: 1.0993924140930176,grad_norm: 0.9999991866037643, iteration: 367890
loss: 1.0153216123580933,grad_norm: 0.7407307057159562, iteration: 367891
loss: 0.9945666193962097,grad_norm: 0.7587860776105133, iteration: 367892
loss: 1.0242741107940674,grad_norm: 0.9999992473378498, iteration: 367893
loss: 0.9957366585731506,grad_norm: 0.8725925641407799, iteration: 367894
loss: 1.008595585823059,grad_norm: 0.99999913963274, iteration: 367895
loss: 0.981735348701477,grad_norm: 0.8203352092968571, iteration: 367896
loss: 0.9937700629234314,grad_norm: 0.772818854540936, iteration: 367897
loss: 1.0364553928375244,grad_norm: 0.999999302817082, iteration: 367898
loss: 0.9778844118118286,grad_norm: 0.9338161783344743, iteration: 367899
loss: 1.0408614873886108,grad_norm: 0.9999999947150785, iteration: 367900
loss: 1.1779999732971191,grad_norm: 0.9999997256674642, iteration: 367901
loss: 1.0558384656906128,grad_norm: 0.906410712258527, iteration: 367902
loss: 1.065192699432373,grad_norm: 0.9999996450983452, iteration: 367903
loss: 1.0172293186187744,grad_norm: 0.940972133744052, iteration: 367904
loss: 1.0736392736434937,grad_norm: 0.9999999301566469, iteration: 367905
loss: 0.9545300602912903,grad_norm: 0.9999994842443455, iteration: 367906
loss: 0.9476906657218933,grad_norm: 0.8888530920837436, iteration: 367907
loss: 0.9880919456481934,grad_norm: 0.7910370830084467, iteration: 367908
loss: 1.0054168701171875,grad_norm: 0.8303021836800483, iteration: 367909
loss: 0.9709771871566772,grad_norm: 0.7729891382321223, iteration: 367910
loss: 0.9678004384040833,grad_norm: 0.8109905504443797, iteration: 367911
loss: 1.0780081748962402,grad_norm: 0.9999991779954026, iteration: 367912
loss: 1.0328096151351929,grad_norm: 0.8757927614478205, iteration: 367913
loss: 0.9800342321395874,grad_norm: 0.7386139148595922, iteration: 367914
loss: 1.0263758897781372,grad_norm: 0.999999220937691, iteration: 367915
loss: 0.9845284819602966,grad_norm: 0.9999996962569779, iteration: 367916
loss: 1.026073694229126,grad_norm: 0.9287207855755552, iteration: 367917
loss: 1.0261554718017578,grad_norm: 0.7740733458906345, iteration: 367918
loss: 1.0302623510360718,grad_norm: 0.8442692261268393, iteration: 367919
loss: 1.1364299058914185,grad_norm: 0.9410422669674492, iteration: 367920
loss: 0.9821150898933411,grad_norm: 0.7762711558741419, iteration: 367921
loss: 1.0181738138198853,grad_norm: 0.9999989761918562, iteration: 367922
loss: 1.0031781196594238,grad_norm: 0.7754262667951025, iteration: 367923
loss: 1.0129504203796387,grad_norm: 0.9999998737199495, iteration: 367924
loss: 0.9959605932235718,grad_norm: 0.8128377511523127, iteration: 367925
loss: 1.0046930313110352,grad_norm: 0.9999998322296811, iteration: 367926
loss: 0.9801914691925049,grad_norm: 0.7231499423162508, iteration: 367927
loss: 0.9862145781517029,grad_norm: 0.7910188816541529, iteration: 367928
loss: 0.965307354927063,grad_norm: 0.6642131455220224, iteration: 367929
loss: 0.992440402507782,grad_norm: 0.9999997953641406, iteration: 367930
loss: 1.0019714832305908,grad_norm: 0.8226411722543412, iteration: 367931
loss: 1.0080790519714355,grad_norm: 0.9102235389951571, iteration: 367932
loss: 1.028294563293457,grad_norm: 0.9999990030940489, iteration: 367933
loss: 0.9612372517585754,grad_norm: 0.7535386631024209, iteration: 367934
loss: 1.0080665349960327,grad_norm: 0.9229374298639478, iteration: 367935
loss: 1.0249130725860596,grad_norm: 0.9053967389979193, iteration: 367936
loss: 1.0132322311401367,grad_norm: 0.9515954138952712, iteration: 367937
loss: 1.0101778507232666,grad_norm: 0.9999993084705671, iteration: 367938
loss: 1.0738623142242432,grad_norm: 0.9999997375826086, iteration: 367939
loss: 1.0198990106582642,grad_norm: 0.7987832578709855, iteration: 367940
loss: 0.974398136138916,grad_norm: 0.845577713544362, iteration: 367941
loss: 1.0381711721420288,grad_norm: 0.7938087323829013, iteration: 367942
loss: 1.0509275197982788,grad_norm: 0.8707863573930751, iteration: 367943
loss: 0.9578223824501038,grad_norm: 0.7928434937429286, iteration: 367944
loss: 0.987019956111908,grad_norm: 0.9130301030373185, iteration: 367945
loss: 0.9968743324279785,grad_norm: 0.7733261305217198, iteration: 367946
loss: 1.0503257513046265,grad_norm: 0.7366239383757744, iteration: 367947
loss: 1.0001825094223022,grad_norm: 0.8739904712235822, iteration: 367948
loss: 1.0168575048446655,grad_norm: 0.7836209612683168, iteration: 367949
loss: 0.9763461947441101,grad_norm: 0.8695776672387868, iteration: 367950
loss: 1.007431149482727,grad_norm: 0.9999992295831064, iteration: 367951
loss: 0.9871655702590942,grad_norm: 0.8936598036108194, iteration: 367952
loss: 1.0000282526016235,grad_norm: 0.7941940538133802, iteration: 367953
loss: 0.9720157384872437,grad_norm: 0.9999995350395269, iteration: 367954
loss: 1.0047423839569092,grad_norm: 0.9999995482751696, iteration: 367955
loss: 0.9960042238235474,grad_norm: 0.8160991262304109, iteration: 367956
loss: 1.0178242921829224,grad_norm: 0.8596565643739615, iteration: 367957
loss: 1.0623522996902466,grad_norm: 0.9999992690657882, iteration: 367958
loss: 1.0111243724822998,grad_norm: 0.6847806088876445, iteration: 367959
loss: 1.0297777652740479,grad_norm: 0.9757796193798108, iteration: 367960
loss: 0.9574546813964844,grad_norm: 0.7902505134008043, iteration: 367961
loss: 0.9951478838920593,grad_norm: 0.7698499487785478, iteration: 367962
loss: 1.0231963396072388,grad_norm: 0.9999991745147575, iteration: 367963
loss: 1.0252916812896729,grad_norm: 0.7917114448565337, iteration: 367964
loss: 1.0696537494659424,grad_norm: 0.9999995842422209, iteration: 367965
loss: 1.0591915845870972,grad_norm: 0.9288149457355119, iteration: 367966
loss: 1.0841480493545532,grad_norm: 0.9999994223453526, iteration: 367967
loss: 0.9932247400283813,grad_norm: 0.8265851492144176, iteration: 367968
loss: 1.0357136726379395,grad_norm: 0.7082370485557284, iteration: 367969
loss: 0.9835626482963562,grad_norm: 0.957595388668212, iteration: 367970
loss: 1.0036976337432861,grad_norm: 0.7369864953613133, iteration: 367971
loss: 1.0679056644439697,grad_norm: 0.9350680158925553, iteration: 367972
loss: 1.0390316247940063,grad_norm: 0.8477884797705162, iteration: 367973
loss: 1.0815417766571045,grad_norm: 0.9999991899703022, iteration: 367974
loss: 0.9638711214065552,grad_norm: 0.8745663008632512, iteration: 367975
loss: 0.9844486117362976,grad_norm: 0.7890243696517871, iteration: 367976
loss: 0.974376380443573,grad_norm: 0.9999999904410359, iteration: 367977
loss: 1.0640360116958618,grad_norm: 0.8343121631938909, iteration: 367978
loss: 0.9812875986099243,grad_norm: 0.9581717436799811, iteration: 367979
loss: 0.9970532655715942,grad_norm: 0.9549022690324979, iteration: 367980
loss: 1.0157197713851929,grad_norm: 0.7948497300717047, iteration: 367981
loss: 1.0093592405319214,grad_norm: 0.9999999030450026, iteration: 367982
loss: 0.995415449142456,grad_norm: 0.7640363164943149, iteration: 367983
loss: 0.9894428849220276,grad_norm: 0.7440232390713144, iteration: 367984
loss: 1.022359013557434,grad_norm: 0.999999314770191, iteration: 367985
loss: 1.0567529201507568,grad_norm: 0.999999808419445, iteration: 367986
loss: 0.9871348738670349,grad_norm: 0.9999994450533511, iteration: 367987
loss: 0.9832437634468079,grad_norm: 0.8242443420111618, iteration: 367988
loss: 1.0251545906066895,grad_norm: 0.9008228569542249, iteration: 367989
loss: 0.9929631948471069,grad_norm: 0.999999627113809, iteration: 367990
loss: 0.9834460616111755,grad_norm: 0.6939823158113347, iteration: 367991
loss: 0.9949160218238831,grad_norm: 0.8696446100868047, iteration: 367992
loss: 0.9849175214767456,grad_norm: 0.7505230544522118, iteration: 367993
loss: 1.0084787607192993,grad_norm: 0.8647874082515846, iteration: 367994
loss: 0.9606743454933167,grad_norm: 0.9999994080979933, iteration: 367995
loss: 1.021836757659912,grad_norm: 0.8240277633417495, iteration: 367996
loss: 0.9991592764854431,grad_norm: 0.7998136701744378, iteration: 367997
loss: 1.036743402481079,grad_norm: 0.9999992841001116, iteration: 367998
loss: 1.1357331275939941,grad_norm: 0.9302231306044252, iteration: 367999
loss: 1.0240323543548584,grad_norm: 0.9999991168959738, iteration: 368000
loss: 1.0622618198394775,grad_norm: 0.9999998455902138, iteration: 368001
loss: 0.9963907599449158,grad_norm: 0.9381418026931204, iteration: 368002
loss: 0.9837672710418701,grad_norm: 0.9999998261768493, iteration: 368003
loss: 1.036697268486023,grad_norm: 0.9650099615314093, iteration: 368004
loss: 0.9849401116371155,grad_norm: 0.9999995419708408, iteration: 368005
loss: 0.9906794428825378,grad_norm: 0.7922761815666467, iteration: 368006
loss: 1.0294677019119263,grad_norm: 0.894710448939944, iteration: 368007
loss: 0.9656158089637756,grad_norm: 0.7841039783040656, iteration: 368008
loss: 1.027085542678833,grad_norm: 0.9999992675351375, iteration: 368009
loss: 1.0816782712936401,grad_norm: 0.999999104278789, iteration: 368010
loss: 1.0693544149398804,grad_norm: 0.9999994283922933, iteration: 368011
loss: 1.0324097871780396,grad_norm: 0.999999506549865, iteration: 368012
loss: 0.9980667233467102,grad_norm: 0.9437961980297711, iteration: 368013
loss: 1.0271087884902954,grad_norm: 0.9999995443116383, iteration: 368014
loss: 1.0072466135025024,grad_norm: 0.6385282888371264, iteration: 368015
loss: 0.9941946864128113,grad_norm: 0.945517833233175, iteration: 368016
loss: 0.9572840929031372,grad_norm: 0.9999990270570384, iteration: 368017
loss: 0.9967484474182129,grad_norm: 0.8849941243455718, iteration: 368018
loss: 0.9885141253471375,grad_norm: 0.6722524975948229, iteration: 368019
loss: 1.0560789108276367,grad_norm: 0.9999994088851172, iteration: 368020
loss: 0.9796624183654785,grad_norm: 0.8498857649074955, iteration: 368021
loss: 1.0785162448883057,grad_norm: 1.0000000560072664, iteration: 368022
loss: 0.9997266530990601,grad_norm: 0.8684508270361874, iteration: 368023
loss: 1.0113167762756348,grad_norm: 0.8127258805949561, iteration: 368024
loss: 0.9865254163742065,grad_norm: 0.7540227909844104, iteration: 368025
loss: 1.0628511905670166,grad_norm: 0.9999993728928263, iteration: 368026
loss: 1.0849518775939941,grad_norm: 0.9999996885409373, iteration: 368027
loss: 1.0444331169128418,grad_norm: 1.000000111058684, iteration: 368028
loss: 1.0201436281204224,grad_norm: 0.7698918020369089, iteration: 368029
loss: 1.0153721570968628,grad_norm: 0.7211760562188432, iteration: 368030
loss: 0.9893858432769775,grad_norm: 0.9999996856652993, iteration: 368031
loss: 1.079547643661499,grad_norm: 0.9999997700079647, iteration: 368032
loss: 1.0253952741622925,grad_norm: 0.8715632735029956, iteration: 368033
loss: 0.9968644976615906,grad_norm: 0.7599155049175295, iteration: 368034
loss: 1.0324926376342773,grad_norm: 0.9296822235401001, iteration: 368035
loss: 1.0688848495483398,grad_norm: 0.8859866878068626, iteration: 368036
loss: 0.9740746021270752,grad_norm: 0.8070860717147901, iteration: 368037
loss: 1.0045781135559082,grad_norm: 0.9999997582908794, iteration: 368038
loss: 0.998294472694397,grad_norm: 0.9999998298359166, iteration: 368039
loss: 1.0024126768112183,grad_norm: 0.7674219963451907, iteration: 368040
loss: 0.9807220101356506,grad_norm: 0.9187258692041481, iteration: 368041
loss: 1.0286705493927002,grad_norm: 0.9999990820792364, iteration: 368042
loss: 1.1616100072860718,grad_norm: 0.9999998318507086, iteration: 368043
loss: 1.0602120161056519,grad_norm: 0.9319479547543674, iteration: 368044
loss: 1.109448790550232,grad_norm: 0.9999998631906756, iteration: 368045
loss: 0.9591422080993652,grad_norm: 0.9818088816018908, iteration: 368046
loss: 1.020090103149414,grad_norm: 0.8822958263218478, iteration: 368047
loss: 1.0603042840957642,grad_norm: 0.9999995055308782, iteration: 368048
loss: 1.0075056552886963,grad_norm: 0.9999991703730516, iteration: 368049
loss: 1.0386515855789185,grad_norm: 0.7124787122383246, iteration: 368050
loss: 1.0375804901123047,grad_norm: 1.000000019024259, iteration: 368051
loss: 1.1149656772613525,grad_norm: 0.9999996773813424, iteration: 368052
loss: 1.0043269395828247,grad_norm: 0.8992577689910561, iteration: 368053
loss: 1.0646488666534424,grad_norm: 0.9488501012927729, iteration: 368054
loss: 1.0166423320770264,grad_norm: 0.9999998336171917, iteration: 368055
loss: 1.1070241928100586,grad_norm: 0.9999999323285167, iteration: 368056
loss: 0.9571862816810608,grad_norm: 0.8126998622940393, iteration: 368057
loss: 1.0167081356048584,grad_norm: 0.7705548306337461, iteration: 368058
loss: 1.1385728120803833,grad_norm: 0.9542794754075313, iteration: 368059
loss: 1.09725821018219,grad_norm: 0.999999827058131, iteration: 368060
loss: 0.9888290166854858,grad_norm: 0.9874285256991822, iteration: 368061
loss: 1.0648616552352905,grad_norm: 0.8422715892328532, iteration: 368062
loss: 1.0598491430282593,grad_norm: 0.9999996591852164, iteration: 368063
loss: 1.0680783987045288,grad_norm: 0.8793436673054408, iteration: 368064
loss: 0.98542720079422,grad_norm: 0.9999997977681715, iteration: 368065
loss: 1.0490400791168213,grad_norm: 0.9999996328973398, iteration: 368066
loss: 0.9615771770477295,grad_norm: 0.83999465759019, iteration: 368067
loss: 1.0820204019546509,grad_norm: 0.9999999429541365, iteration: 368068
loss: 1.104162335395813,grad_norm: 0.9999997881501351, iteration: 368069
loss: 1.0252411365509033,grad_norm: 0.9579426578296574, iteration: 368070
loss: 0.9750137329101562,grad_norm: 0.9999994729338505, iteration: 368071
loss: 1.0852656364440918,grad_norm: 0.9999998365103255, iteration: 368072
loss: 1.0258530378341675,grad_norm: 0.8099552062431183, iteration: 368073
loss: 0.9981719851493835,grad_norm: 0.8439166865211498, iteration: 368074
loss: 1.0659958124160767,grad_norm: 0.999999088433099, iteration: 368075
loss: 1.016811728477478,grad_norm: 0.9176938643542869, iteration: 368076
loss: 0.9769178628921509,grad_norm: 0.839801683877593, iteration: 368077
loss: 0.9991775751113892,grad_norm: 0.9999992393648849, iteration: 368078
loss: 1.0360134840011597,grad_norm: 0.9756536782190169, iteration: 368079
loss: 0.9925845861434937,grad_norm: 0.8569988952016422, iteration: 368080
loss: 1.024702787399292,grad_norm: 0.9999998444732222, iteration: 368081
loss: 0.9968627691268921,grad_norm: 0.7181661672149964, iteration: 368082
loss: 1.0647313594818115,grad_norm: 0.9999997080294155, iteration: 368083
loss: 0.9999026656150818,grad_norm: 0.9999997061191654, iteration: 368084
loss: 1.1070127487182617,grad_norm: 0.9999994588382426, iteration: 368085
loss: 1.3316271305084229,grad_norm: 0.9999999388208463, iteration: 368086
loss: 1.0198079347610474,grad_norm: 0.9999994769184065, iteration: 368087
loss: 1.0729503631591797,grad_norm: 0.8600634312029527, iteration: 368088
loss: 1.0568770170211792,grad_norm: 0.9999998100058384, iteration: 368089
loss: 1.0326976776123047,grad_norm: 0.9999993335971332, iteration: 368090
loss: 0.9857851266860962,grad_norm: 0.9194189023428839, iteration: 368091
loss: 1.053765058517456,grad_norm: 0.9999992224494602, iteration: 368092
loss: 1.0226800441741943,grad_norm: 0.70331413406106, iteration: 368093
loss: 1.0894736051559448,grad_norm: 0.8033765066206188, iteration: 368094
loss: 1.0511566400527954,grad_norm: 0.9999996304524807, iteration: 368095
loss: 0.9931080937385559,grad_norm: 0.999999852749784, iteration: 368096
loss: 0.9989932775497437,grad_norm: 0.9999993437255509, iteration: 368097
loss: 1.2405619621276855,grad_norm: 0.9999998362894984, iteration: 368098
loss: 1.0134505033493042,grad_norm: 0.7495956497152488, iteration: 368099
loss: 0.9838075041770935,grad_norm: 0.8259092862436462, iteration: 368100
loss: 0.980446457862854,grad_norm: 0.9435407954297982, iteration: 368101
loss: 1.089965581893921,grad_norm: 1.000000032812801, iteration: 368102
loss: 1.1124783754348755,grad_norm: 0.9999995641318338, iteration: 368103
loss: 1.0366142988204956,grad_norm: 0.9999998778676061, iteration: 368104
loss: 0.9868632555007935,grad_norm: 0.999999342075402, iteration: 368105
loss: 1.0572775602340698,grad_norm: 0.9999999150706739, iteration: 368106
loss: 0.9817346334457397,grad_norm: 0.9999991269473566, iteration: 368107
loss: 1.0236364603042603,grad_norm: 0.7361452848957648, iteration: 368108
loss: 1.1600090265274048,grad_norm: 0.9999997198450457, iteration: 368109
loss: 1.0761468410491943,grad_norm: 0.9999992155211819, iteration: 368110
loss: 1.0048943758010864,grad_norm: 0.8933272347700723, iteration: 368111
loss: 0.9786204099655151,grad_norm: 0.8218786484840364, iteration: 368112
loss: 1.0615239143371582,grad_norm: 0.8482743034164686, iteration: 368113
loss: 1.0125209093093872,grad_norm: 0.9999994275284108, iteration: 368114
loss: 1.0385710000991821,grad_norm: 0.9999993155457627, iteration: 368115
loss: 1.0473015308380127,grad_norm: 0.9999995018474029, iteration: 368116
loss: 1.053750991821289,grad_norm: 0.9999996189555275, iteration: 368117
loss: 1.3237684965133667,grad_norm: 0.9999998980399042, iteration: 368118
loss: 1.1064003705978394,grad_norm: 0.9999996060601841, iteration: 368119
loss: 1.0065035820007324,grad_norm: 0.7933987125976433, iteration: 368120
loss: 0.9983957409858704,grad_norm: 0.9999990640355519, iteration: 368121
loss: 0.9829790592193604,grad_norm: 0.7818698857915487, iteration: 368122
loss: 1.0367130041122437,grad_norm: 0.9999995114260141, iteration: 368123
loss: 1.1097732782363892,grad_norm: 0.9999991669838878, iteration: 368124
loss: 1.1189026832580566,grad_norm: 0.9999991179846907, iteration: 368125
loss: 1.1169708967208862,grad_norm: 0.9999997642490945, iteration: 368126
loss: 1.082168459892273,grad_norm: 0.9999992844886454, iteration: 368127
loss: 1.0878757238388062,grad_norm: 0.9999999925217541, iteration: 368128
loss: 0.9851082563400269,grad_norm: 0.8803273410518336, iteration: 368129
loss: 1.0140548944473267,grad_norm: 0.9999996345604654, iteration: 368130
loss: 1.0355861186981201,grad_norm: 0.9682564868755384, iteration: 368131
loss: 1.1159019470214844,grad_norm: 0.999999773363635, iteration: 368132
loss: 0.9978138208389282,grad_norm: 0.8305831538106678, iteration: 368133
loss: 1.0600497722625732,grad_norm: 0.9999999519646312, iteration: 368134
loss: 1.0203858613967896,grad_norm: 0.7944574725560472, iteration: 368135
loss: 1.0295813083648682,grad_norm: 0.9999992225989365, iteration: 368136
loss: 1.009272813796997,grad_norm: 0.7713085676917727, iteration: 368137
loss: 1.0341542959213257,grad_norm: 0.9999990492566768, iteration: 368138
loss: 1.0179747343063354,grad_norm: 0.9999990951231618, iteration: 368139
loss: 1.1062564849853516,grad_norm: 0.9999998568754415, iteration: 368140
loss: 1.0877256393432617,grad_norm: 0.999999783218471, iteration: 368141
loss: 0.9798295497894287,grad_norm: 0.927884637574274, iteration: 368142
loss: 1.075770378112793,grad_norm: 0.9999994032331394, iteration: 368143
loss: 1.0725531578063965,grad_norm: 0.7627780526617743, iteration: 368144
loss: 0.9619077444076538,grad_norm: 0.9545812855749871, iteration: 368145
loss: 1.0103915929794312,grad_norm: 0.8430927777805529, iteration: 368146
loss: 0.9884095191955566,grad_norm: 0.8327393349505133, iteration: 368147
loss: 1.0008004903793335,grad_norm: 0.999999176827032, iteration: 368148
loss: 1.2816818952560425,grad_norm: 0.9999999603809882, iteration: 368149
loss: 1.0165202617645264,grad_norm: 0.9784251836519688, iteration: 368150
loss: 1.0968017578125,grad_norm: 0.999999862518599, iteration: 368151
loss: 1.0316592454910278,grad_norm: 0.7922454279518211, iteration: 368152
loss: 1.0294288396835327,grad_norm: 0.7859307827511595, iteration: 368153
loss: 1.066992163658142,grad_norm: 0.8163150890386633, iteration: 368154
loss: 1.1229922771453857,grad_norm: 0.9999990572169886, iteration: 368155
loss: 1.0323975086212158,grad_norm: 0.7766295335785899, iteration: 368156
loss: 0.9903422594070435,grad_norm: 0.8857331513758309, iteration: 368157
loss: 1.0776219367980957,grad_norm: 0.9999996682899619, iteration: 368158
loss: 1.0425198078155518,grad_norm: 0.8819878194598284, iteration: 368159
loss: 1.0022141933441162,grad_norm: 0.8306143777162123, iteration: 368160
loss: 1.1488780975341797,grad_norm: 0.8731813822985629, iteration: 368161
loss: 1.0461171865463257,grad_norm: 0.9999994896454713, iteration: 368162
loss: 1.0735753774642944,grad_norm: 0.9999997356311168, iteration: 368163
loss: 1.065877914428711,grad_norm: 0.9999990792288196, iteration: 368164
loss: 1.0251778364181519,grad_norm: 0.771815386952171, iteration: 368165
loss: 1.016770839691162,grad_norm: 0.760214719359905, iteration: 368166
loss: 1.077520728111267,grad_norm: 0.9598049866085014, iteration: 368167
loss: 0.9982860088348389,grad_norm: 0.8170114772087892, iteration: 368168
loss: 1.0407824516296387,grad_norm: 0.9035623784275891, iteration: 368169
loss: 1.0701322555541992,grad_norm: 0.8308390834270207, iteration: 368170
loss: 0.9911178350448608,grad_norm: 0.9999991947043352, iteration: 368171
loss: 1.1354303359985352,grad_norm: 0.9999997770431728, iteration: 368172
loss: 1.0891276597976685,grad_norm: 0.9999993669396776, iteration: 368173
loss: 1.0070527791976929,grad_norm: 0.8443484605407514, iteration: 368174
loss: 1.0038914680480957,grad_norm: 0.8596514801213023, iteration: 368175
loss: 1.0365666151046753,grad_norm: 0.9999996045339135, iteration: 368176
loss: 1.0720175504684448,grad_norm: 0.999999815482728, iteration: 368177
loss: 1.0683752298355103,grad_norm: 0.9999996681893141, iteration: 368178
loss: 0.9693289399147034,grad_norm: 0.8196898485966138, iteration: 368179
loss: 1.096470832824707,grad_norm: 0.8953598874343095, iteration: 368180
loss: 0.998522937297821,grad_norm: 0.8292665279693797, iteration: 368181
loss: 1.1679362058639526,grad_norm: 0.9999994985464133, iteration: 368182
loss: 1.0120363235473633,grad_norm: 0.9999993743246596, iteration: 368183
loss: 1.0377954244613647,grad_norm: 0.9999996357604979, iteration: 368184
loss: 1.2626850605010986,grad_norm: 0.999999688356033, iteration: 368185
loss: 1.1279598474502563,grad_norm: 0.9999992869079602, iteration: 368186
loss: 1.0118104219436646,grad_norm: 0.9004936546497927, iteration: 368187
loss: 1.080421805381775,grad_norm: 0.999999331179869, iteration: 368188
loss: 0.979763388633728,grad_norm: 0.6883004136864479, iteration: 368189
loss: 1.0270408391952515,grad_norm: 0.9668643628623511, iteration: 368190
loss: 1.0582114458084106,grad_norm: 0.9999995310746747, iteration: 368191
loss: 1.0097100734710693,grad_norm: 0.7738623452049803, iteration: 368192
loss: 1.0171633958816528,grad_norm: 0.999999231308227, iteration: 368193
loss: 0.9993996024131775,grad_norm: 0.7374281204749085, iteration: 368194
loss: 0.9938010573387146,grad_norm: 0.875087307754353, iteration: 368195
loss: 1.0161808729171753,grad_norm: 0.7714157295798818, iteration: 368196
loss: 1.0675053596496582,grad_norm: 0.8876720596580324, iteration: 368197
loss: 1.0521678924560547,grad_norm: 0.9999994043985745, iteration: 368198
loss: 1.0382229089736938,grad_norm: 0.7315568703635014, iteration: 368199
loss: 1.1195114850997925,grad_norm: 0.9999998716743639, iteration: 368200
loss: 1.0805913209915161,grad_norm: 0.9999998999353943, iteration: 368201
loss: 1.004622220993042,grad_norm: 0.9151072941168487, iteration: 368202
loss: 1.0984646081924438,grad_norm: 0.9999998589761917, iteration: 368203
loss: 1.1421622037887573,grad_norm: 0.9999998782534849, iteration: 368204
loss: 1.0051623582839966,grad_norm: 0.9880625464454598, iteration: 368205
loss: 1.011957049369812,grad_norm: 0.9999996101029691, iteration: 368206
loss: 1.0382721424102783,grad_norm: 0.9999998777964878, iteration: 368207
loss: 1.0625723600387573,grad_norm: 0.9999990141532183, iteration: 368208
loss: 1.0684736967086792,grad_norm: 0.9999995153444073, iteration: 368209
loss: 1.0203646421432495,grad_norm: 0.7943498882749312, iteration: 368210
loss: 1.0680811405181885,grad_norm: 0.9999993072985838, iteration: 368211
loss: 1.0524652004241943,grad_norm: 0.9999993705370355, iteration: 368212
loss: 1.0110152959823608,grad_norm: 0.6804525892733477, iteration: 368213
loss: 1.0107154846191406,grad_norm: 0.7294060416721158, iteration: 368214
loss: 1.1947863101959229,grad_norm: 0.9999998707556481, iteration: 368215
loss: 0.9762616157531738,grad_norm: 0.9877762036935103, iteration: 368216
loss: 1.004837155342102,grad_norm: 0.9999999450493652, iteration: 368217
loss: 1.0595901012420654,grad_norm: 0.9999996643430743, iteration: 368218
loss: 1.0453606843948364,grad_norm: 0.9999997533528863, iteration: 368219
loss: 1.006502389907837,grad_norm: 0.9999991744350927, iteration: 368220
loss: 1.0248467922210693,grad_norm: 0.9999994084782143, iteration: 368221
loss: 1.011548638343811,grad_norm: 0.9008111194570082, iteration: 368222
loss: 1.0773470401763916,grad_norm: 0.9999999266222186, iteration: 368223
loss: 0.9741597175598145,grad_norm: 0.9255651739594676, iteration: 368224
loss: 1.0100973844528198,grad_norm: 0.9292730638971839, iteration: 368225
loss: 1.008522391319275,grad_norm: 0.7248953933480675, iteration: 368226
loss: 1.019384741783142,grad_norm: 0.7210132907347295, iteration: 368227
loss: 1.0136464834213257,grad_norm: 0.7854559666020113, iteration: 368228
loss: 1.0189508199691772,grad_norm: 0.9259529782578191, iteration: 368229
loss: 0.9918360114097595,grad_norm: 0.8301659046664155, iteration: 368230
loss: 1.0020301342010498,grad_norm: 0.8274213172507868, iteration: 368231
loss: 1.0227651596069336,grad_norm: 0.999999232009921, iteration: 368232
loss: 0.9941742420196533,grad_norm: 0.8874505130064982, iteration: 368233
loss: 0.9908557534217834,grad_norm: 0.7913007144506644, iteration: 368234
loss: 0.9943360090255737,grad_norm: 0.7901282111239722, iteration: 368235
loss: 0.9400177597999573,grad_norm: 0.864526777184287, iteration: 368236
loss: 1.000298023223877,grad_norm: 0.9999991884665391, iteration: 368237
loss: 1.0932962894439697,grad_norm: 0.9999997088220206, iteration: 368238
loss: 1.0159962177276611,grad_norm: 0.69670170324758, iteration: 368239
loss: 1.0397703647613525,grad_norm: 0.9699810100128526, iteration: 368240
loss: 1.0188974142074585,grad_norm: 0.9298793655861332, iteration: 368241
loss: 0.9884509444236755,grad_norm: 0.8203376527555121, iteration: 368242
loss: 0.9821190237998962,grad_norm: 0.6603261194809624, iteration: 368243
loss: 1.030006766319275,grad_norm: 0.9695868170203382, iteration: 368244
loss: 1.007795810699463,grad_norm: 0.8746546993679923, iteration: 368245
loss: 0.9866765737533569,grad_norm: 0.9999991305909657, iteration: 368246
loss: 1.0725350379943848,grad_norm: 0.9999999135200488, iteration: 368247
loss: 1.0092029571533203,grad_norm: 0.9999998906497174, iteration: 368248
loss: 1.007788062095642,grad_norm: 0.9171709254721996, iteration: 368249
loss: 1.0160129070281982,grad_norm: 0.7911606102086539, iteration: 368250
loss: 1.0709998607635498,grad_norm: 0.9999990731773994, iteration: 368251
loss: 1.0196876525878906,grad_norm: 0.69328642056414, iteration: 368252
loss: 1.1415663957595825,grad_norm: 0.9999999861070814, iteration: 368253
loss: 1.0084004402160645,grad_norm: 0.8238788130155499, iteration: 368254
loss: 1.0089190006256104,grad_norm: 0.9115372273396419, iteration: 368255
loss: 1.0941134691238403,grad_norm: 0.9999999006862855, iteration: 368256
loss: 1.015932559967041,grad_norm: 0.9786683100466341, iteration: 368257
loss: 0.9702560901641846,grad_norm: 0.9999994908294954, iteration: 368258
loss: 1.0049042701721191,grad_norm: 0.776771807646603, iteration: 368259
loss: 1.02910578250885,grad_norm: 0.6648441958924187, iteration: 368260
loss: 1.1089534759521484,grad_norm: 0.8886666141153402, iteration: 368261
loss: 0.9937620759010315,grad_norm: 0.8279911978629226, iteration: 368262
loss: 0.9939615726470947,grad_norm: 0.7898826166856091, iteration: 368263
loss: 0.9825168251991272,grad_norm: 0.740665758994802, iteration: 368264
loss: 1.00492525100708,grad_norm: 0.9080272039705848, iteration: 368265
loss: 1.0274131298065186,grad_norm: 0.8148423128721017, iteration: 368266
loss: 1.0669678449630737,grad_norm: 0.9999994439729526, iteration: 368267
loss: 1.039844274520874,grad_norm: 0.9999998046040602, iteration: 368268
loss: 0.995972216129303,grad_norm: 0.7584953484185715, iteration: 368269
loss: 0.9915717840194702,grad_norm: 0.9999996387368153, iteration: 368270
loss: 1.0086079835891724,grad_norm: 0.9999990677275848, iteration: 368271
loss: 0.9620576500892639,grad_norm: 0.7482243313214646, iteration: 368272
loss: 1.0232336521148682,grad_norm: 0.8145784611315963, iteration: 368273
loss: 1.0274419784545898,grad_norm: 0.999999046532788, iteration: 368274
loss: 1.0371297597885132,grad_norm: 0.8703638024978175, iteration: 368275
loss: 1.0356943607330322,grad_norm: 0.8752504862382184, iteration: 368276
loss: 1.0844422578811646,grad_norm: 0.9999996809898148, iteration: 368277
loss: 1.062850832939148,grad_norm: 0.9555249429281595, iteration: 368278
loss: 1.0248920917510986,grad_norm: 0.9957760656743123, iteration: 368279
loss: 1.019083857536316,grad_norm: 0.8335216056593362, iteration: 368280
loss: 0.9824694395065308,grad_norm: 0.9999993717132175, iteration: 368281
loss: 1.0276732444763184,grad_norm: 0.9999996891152824, iteration: 368282
loss: 1.0239886045455933,grad_norm: 0.9455709013865782, iteration: 368283
loss: 1.0056447982788086,grad_norm: 0.7737941782172301, iteration: 368284
loss: 1.0553510189056396,grad_norm: 0.9999995858353975, iteration: 368285
loss: 1.076587438583374,grad_norm: 0.9999990775259519, iteration: 368286
loss: 1.0038461685180664,grad_norm: 0.8746450007197474, iteration: 368287
loss: 1.0232232809066772,grad_norm: 0.8494504965182029, iteration: 368288
loss: 0.9993975162506104,grad_norm: 0.9428877034428356, iteration: 368289
loss: 1.0032106637954712,grad_norm: 0.9999997791135832, iteration: 368290
loss: 0.9488728642463684,grad_norm: 0.9999991107242314, iteration: 368291
loss: 1.0211018323898315,grad_norm: 0.9550144045331862, iteration: 368292
loss: 1.0494710206985474,grad_norm: 0.9999992681174554, iteration: 368293
loss: 1.0538560152053833,grad_norm: 0.9999999199581706, iteration: 368294
loss: 1.0606482028961182,grad_norm: 0.9999995793386893, iteration: 368295
loss: 1.0093541145324707,grad_norm: 0.9889836490625531, iteration: 368296
loss: 1.2332762479782104,grad_norm: 0.9999994403570851, iteration: 368297
loss: 1.3468129634857178,grad_norm: 0.9999998003797079, iteration: 368298
loss: 1.0045535564422607,grad_norm: 0.7844422566461549, iteration: 368299
loss: 1.0937062501907349,grad_norm: 0.9999994772617173, iteration: 368300
loss: 0.9985848069190979,grad_norm: 0.8118144692961351, iteration: 368301
loss: 1.0216764211654663,grad_norm: 0.9999990395760628, iteration: 368302
loss: 1.002961277961731,grad_norm: 0.9999996404893248, iteration: 368303
loss: 1.1482621431350708,grad_norm: 0.917908861918696, iteration: 368304
loss: 1.0207830667495728,grad_norm: 0.8443454972086789, iteration: 368305
loss: 1.0187633037567139,grad_norm: 0.9999997186559001, iteration: 368306
loss: 1.0987735986709595,grad_norm: 0.9999990658543947, iteration: 368307
loss: 1.033649206161499,grad_norm: 0.9999991607102268, iteration: 368308
loss: 1.009841799736023,grad_norm: 0.999999391972513, iteration: 368309
loss: 1.0652798414230347,grad_norm: 0.9999993948551889, iteration: 368310
loss: 1.1204179525375366,grad_norm: 0.9999997193439375, iteration: 368311
loss: 1.22025728225708,grad_norm: 0.9999997422546397, iteration: 368312
loss: 1.0561083555221558,grad_norm: 0.999999911706661, iteration: 368313
loss: 1.0603052377700806,grad_norm: 0.9590803358251324, iteration: 368314
loss: 1.0420234203338623,grad_norm: 0.9144533530361405, iteration: 368315
loss: 1.0152915716171265,grad_norm: 0.9833589914472862, iteration: 368316
loss: 1.10225248336792,grad_norm: 0.99999983028418, iteration: 368317
loss: 1.058456301689148,grad_norm: 0.999999989306292, iteration: 368318
loss: 1.0535310506820679,grad_norm: 0.9155811721930688, iteration: 368319
loss: 1.0560845136642456,grad_norm: 0.9486872880260039, iteration: 368320
loss: 1.1592321395874023,grad_norm: 0.9999996118704922, iteration: 368321
loss: 0.9960412979125977,grad_norm: 0.9999997734102835, iteration: 368322
loss: 1.0048366785049438,grad_norm: 0.9999996232971772, iteration: 368323
loss: 1.0143933296203613,grad_norm: 0.866092769064456, iteration: 368324
loss: 0.9927886724472046,grad_norm: 0.9999999516995642, iteration: 368325
loss: 0.9600939154624939,grad_norm: 0.9623172141071576, iteration: 368326
loss: 1.034706950187683,grad_norm: 0.9999999717376369, iteration: 368327
loss: 1.039028286933899,grad_norm: 0.9999992977299385, iteration: 368328
loss: 1.034432053565979,grad_norm: 0.9999995652956428, iteration: 368329
loss: 1.0503827333450317,grad_norm: 0.9999992100637866, iteration: 368330
loss: 1.0610852241516113,grad_norm: 0.9990578221222345, iteration: 368331
loss: 1.0005970001220703,grad_norm: 0.9999994064017609, iteration: 368332
loss: 1.0022896528244019,grad_norm: 0.9999992533583678, iteration: 368333
loss: 1.0480093955993652,grad_norm: 1.0000000337849921, iteration: 368334
loss: 1.0315192937850952,grad_norm: 0.99999950041981, iteration: 368335
loss: 1.1291468143463135,grad_norm: 0.9999998645367952, iteration: 368336
loss: 1.0491092205047607,grad_norm: 0.9877432432503772, iteration: 368337
loss: 0.9910221099853516,grad_norm: 0.9492832884716246, iteration: 368338
loss: 1.049259066581726,grad_norm: 0.9999997692925194, iteration: 368339
loss: 1.021373987197876,grad_norm: 1.0000000624471013, iteration: 368340
loss: 1.1142596006393433,grad_norm: 0.9999999587745155, iteration: 368341
loss: 1.0857235193252563,grad_norm: 0.9999993794536433, iteration: 368342
loss: 1.0516510009765625,grad_norm: 0.9999997023657382, iteration: 368343
loss: 1.009453535079956,grad_norm: 0.8463056991239744, iteration: 368344
loss: 1.0342568159103394,grad_norm: 0.6862930469094474, iteration: 368345
loss: 1.0335919857025146,grad_norm: 0.9999994809497499, iteration: 368346
loss: 1.1219037771224976,grad_norm: 0.9999995593373768, iteration: 368347
loss: 1.1085036993026733,grad_norm: 1.0000000102746625, iteration: 368348
loss: 0.9791029095649719,grad_norm: 0.9999995768057306, iteration: 368349
loss: 0.9934718608856201,grad_norm: 0.9999990098239925, iteration: 368350
loss: 0.996178388595581,grad_norm: 0.8300692815376226, iteration: 368351
loss: 0.9915906190872192,grad_norm: 0.9057308783801441, iteration: 368352
loss: 1.0126398801803589,grad_norm: 0.999999591232503, iteration: 368353
loss: 1.0930441617965698,grad_norm: 0.9999995048203767, iteration: 368354
loss: 1.123884677886963,grad_norm: 0.9999998390664321, iteration: 368355
loss: 1.026755452156067,grad_norm: 0.9999997643607264, iteration: 368356
loss: 1.042279601097107,grad_norm: 0.9999999388692096, iteration: 368357
loss: 1.0688952207565308,grad_norm: 0.9999996232412516, iteration: 368358
loss: 0.9922364950180054,grad_norm: 0.9115030812656806, iteration: 368359
loss: 0.9947860240936279,grad_norm: 0.8670364861009822, iteration: 368360
loss: 1.0039970874786377,grad_norm: 0.8353588369305872, iteration: 368361
loss: 1.1662400960922241,grad_norm: 0.9999989883677671, iteration: 368362
loss: 0.9986451864242554,grad_norm: 0.9999997953672488, iteration: 368363
loss: 1.056105613708496,grad_norm: 0.9999997830092817, iteration: 368364
loss: 1.0213388204574585,grad_norm: 0.9999994382055309, iteration: 368365
loss: 0.9583587646484375,grad_norm: 0.8423705864648775, iteration: 368366
loss: 0.9989233016967773,grad_norm: 0.8487417357637395, iteration: 368367
loss: 0.9647824764251709,grad_norm: 0.8389096362637477, iteration: 368368
loss: 0.9980204105377197,grad_norm: 0.719950080545122, iteration: 368369
loss: 1.001933217048645,grad_norm: 0.9937851541412692, iteration: 368370
loss: 1.0058135986328125,grad_norm: 0.649793814505683, iteration: 368371
loss: 0.9729546904563904,grad_norm: 0.8514012035623932, iteration: 368372
loss: 1.1698240041732788,grad_norm: 0.9999993204279198, iteration: 368373
loss: 1.0210334062576294,grad_norm: 0.7674788038159606, iteration: 368374
loss: 1.0321319103240967,grad_norm: 0.9999991843454203, iteration: 368375
loss: 0.995624303817749,grad_norm: 0.9999991966087223, iteration: 368376
loss: 1.2692655324935913,grad_norm: 0.9999997123305054, iteration: 368377
loss: 0.9801022410392761,grad_norm: 0.8872887952338631, iteration: 368378
loss: 1.030593991279602,grad_norm: 0.9999991575545057, iteration: 368379
loss: 0.9679483771324158,grad_norm: 0.8503188066968081, iteration: 368380
loss: 1.0195204019546509,grad_norm: 0.7226615008986215, iteration: 368381
loss: 1.0087440013885498,grad_norm: 0.7397433382700019, iteration: 368382
loss: 1.037225365638733,grad_norm: 0.9357270050821258, iteration: 368383
loss: 0.980739176273346,grad_norm: 0.7883056399786555, iteration: 368384
loss: 1.0577713251113892,grad_norm: 0.8610832528120461, iteration: 368385
loss: 1.0199766159057617,grad_norm: 0.793746457377707, iteration: 368386
loss: 1.039387583732605,grad_norm: 0.9999994337094588, iteration: 368387
loss: 0.9866573214530945,grad_norm: 0.8112549820758901, iteration: 368388
loss: 1.0268937349319458,grad_norm: 0.9999993967041817, iteration: 368389
loss: 0.9988628029823303,grad_norm: 0.7736758146327976, iteration: 368390
loss: 1.0331298112869263,grad_norm: 0.7619749136435763, iteration: 368391
loss: 1.0033272504806519,grad_norm: 0.8151015374507792, iteration: 368392
loss: 1.0130987167358398,grad_norm: 0.9999995344634931, iteration: 368393
loss: 0.972193717956543,grad_norm: 0.9036371829553491, iteration: 368394
loss: 1.022011637687683,grad_norm: 0.8345257670609291, iteration: 368395
loss: 1.1193170547485352,grad_norm: 0.9999999204186105, iteration: 368396
loss: 1.0263899564743042,grad_norm: 0.9999997914297826, iteration: 368397
loss: 0.9823387861251831,grad_norm: 0.8736166599134351, iteration: 368398
loss: 0.9874972105026245,grad_norm: 0.6788990480195897, iteration: 368399
loss: 1.0458199977874756,grad_norm: 0.834641427974997, iteration: 368400
loss: 0.9838911890983582,grad_norm: 0.9999999715257011, iteration: 368401
loss: 0.988674521446228,grad_norm: 0.8551098279818907, iteration: 368402
loss: 1.4023641347885132,grad_norm: 0.999999949729108, iteration: 368403
loss: 1.1539108753204346,grad_norm: 1.000000037163586, iteration: 368404
loss: 1.0435471534729004,grad_norm: 0.9999991480673592, iteration: 368405
loss: 1.0740928649902344,grad_norm: 0.999999254301337, iteration: 368406
loss: 1.071960687637329,grad_norm: 0.9051550586559213, iteration: 368407
loss: 1.0135793685913086,grad_norm: 0.7929870291088247, iteration: 368408
loss: 1.0495200157165527,grad_norm: 0.9999993193040335, iteration: 368409
loss: 1.0378235578536987,grad_norm: 0.9999994147919387, iteration: 368410
loss: 1.0008621215820312,grad_norm: 0.9999993106114566, iteration: 368411
loss: 1.0172382593154907,grad_norm: 0.866806203512635, iteration: 368412
loss: 0.9774074554443359,grad_norm: 0.9999990351175294, iteration: 368413
loss: 1.0045853853225708,grad_norm: 0.9999997710799989, iteration: 368414
loss: 0.9759159684181213,grad_norm: 0.9265719198981262, iteration: 368415
loss: 0.9923918843269348,grad_norm: 0.6714206388622562, iteration: 368416
loss: 0.9541641473770142,grad_norm: 0.7780023076699975, iteration: 368417
loss: 0.9766196608543396,grad_norm: 0.880123469332804, iteration: 368418
loss: 1.009602427482605,grad_norm: 0.7999289782303247, iteration: 368419
loss: 1.0043165683746338,grad_norm: 0.9999990466187354, iteration: 368420
loss: 1.0077309608459473,grad_norm: 0.8496275920421258, iteration: 368421
loss: 1.0317195653915405,grad_norm: 0.9999991675322685, iteration: 368422
loss: 1.008314609527588,grad_norm: 0.835340223020181, iteration: 368423
loss: 1.0022130012512207,grad_norm: 0.9999996126630999, iteration: 368424
loss: 0.9803447723388672,grad_norm: 0.9999992860667309, iteration: 368425
loss: 1.0757492780685425,grad_norm: 0.9999990684748629, iteration: 368426
loss: 1.0024274587631226,grad_norm: 0.9366769188114632, iteration: 368427
loss: 0.9844343662261963,grad_norm: 0.9999991667634854, iteration: 368428
loss: 1.0052359104156494,grad_norm: 0.9999991908547667, iteration: 368429
loss: 1.0380232334136963,grad_norm: 0.7111420504847707, iteration: 368430
loss: 1.0052963495254517,grad_norm: 0.8424860274020612, iteration: 368431
loss: 0.9640078544616699,grad_norm: 0.925426131793126, iteration: 368432
loss: 1.063766360282898,grad_norm: 0.9762660688897604, iteration: 368433
loss: 1.0063055753707886,grad_norm: 0.8791875234055669, iteration: 368434
loss: 1.0122544765472412,grad_norm: 0.9182210958019086, iteration: 368435
loss: 1.0376428365707397,grad_norm: 0.9999989978107229, iteration: 368436
loss: 1.0069339275360107,grad_norm: 0.6641824504769376, iteration: 368437
loss: 1.0092132091522217,grad_norm: 0.8234586960493155, iteration: 368438
loss: 0.9939221739768982,grad_norm: 0.6971785784307623, iteration: 368439
loss: 1.1326258182525635,grad_norm: 0.9999997117846403, iteration: 368440
loss: 0.9772791266441345,grad_norm: 0.9439697135513045, iteration: 368441
loss: 0.9722656607627869,grad_norm: 0.9941799150275683, iteration: 368442
loss: 1.0037552118301392,grad_norm: 0.9935254291303431, iteration: 368443
loss: 0.9704508781433105,grad_norm: 0.9253034196076035, iteration: 368444
loss: 1.0512760877609253,grad_norm: 0.9999994274145996, iteration: 368445
loss: 0.9953709840774536,grad_norm: 0.7359666887855539, iteration: 368446
loss: 1.0932400226593018,grad_norm: 0.999999809917726, iteration: 368447
loss: 0.9861926436424255,grad_norm: 0.7916325670379861, iteration: 368448
loss: 1.0277122259140015,grad_norm: 0.7838744193224385, iteration: 368449
loss: 0.9697108268737793,grad_norm: 0.7875592606704269, iteration: 368450
loss: 1.0096391439437866,grad_norm: 0.7722867003546414, iteration: 368451
loss: 1.0557013750076294,grad_norm: 0.9999994155821204, iteration: 368452
loss: 1.0097850561141968,grad_norm: 0.9999993766735197, iteration: 368453
loss: 1.023501992225647,grad_norm: 0.8498045954624666, iteration: 368454
loss: 1.0676462650299072,grad_norm: 0.9799551951269194, iteration: 368455
loss: 1.004401445388794,grad_norm: 0.9999994075886511, iteration: 368456
loss: 1.1479359865188599,grad_norm: 0.9999993708033539, iteration: 368457
loss: 1.0014679431915283,grad_norm: 0.8964069781437992, iteration: 368458
loss: 1.0484760999679565,grad_norm: 0.9999998637773662, iteration: 368459
loss: 1.0831587314605713,grad_norm: 0.9999999096333388, iteration: 368460
loss: 1.0155621767044067,grad_norm: 0.999999814268023, iteration: 368461
loss: 1.0874295234680176,grad_norm: 0.9999991637733546, iteration: 368462
loss: 1.0337823629379272,grad_norm: 0.9999990102692787, iteration: 368463
loss: 1.057631254196167,grad_norm: 0.9999995291668908, iteration: 368464
loss: 0.9852628707885742,grad_norm: 0.812613211937383, iteration: 368465
loss: 0.9724828004837036,grad_norm: 0.9309165854853084, iteration: 368466
loss: 1.0552270412445068,grad_norm: 0.9999991713964872, iteration: 368467
loss: 1.0592443943023682,grad_norm: 0.9999997074576994, iteration: 368468
loss: 0.9985551834106445,grad_norm: 0.9999993763479101, iteration: 368469
loss: 1.157043218612671,grad_norm: 0.9705087337926087, iteration: 368470
loss: 1.1028183698654175,grad_norm: 0.9999993810455242, iteration: 368471
loss: 1.0605664253234863,grad_norm: 0.7945978029108798, iteration: 368472
loss: 0.9972257614135742,grad_norm: 0.8316084023643893, iteration: 368473
loss: 1.2464993000030518,grad_norm: 0.9999998584367631, iteration: 368474
loss: 1.0028764009475708,grad_norm: 0.7259792677830289, iteration: 368475
loss: 1.0141390562057495,grad_norm: 0.9999994895416536, iteration: 368476
loss: 1.06527578830719,grad_norm: 0.9999999442020809, iteration: 368477
loss: 1.078708291053772,grad_norm: 0.8967645918405115, iteration: 368478
loss: 1.0049432516098022,grad_norm: 0.9999991972783937, iteration: 368479
loss: 1.0220386981964111,grad_norm: 0.9999997165695215, iteration: 368480
loss: 0.9558475017547607,grad_norm: 0.9999998637575256, iteration: 368481
loss: 1.142762541770935,grad_norm: 0.9999999343879473, iteration: 368482
loss: 1.0345968008041382,grad_norm: 0.7637411419788828, iteration: 368483
loss: 1.0541660785675049,grad_norm: 0.7801689505816479, iteration: 368484
loss: 0.9909992218017578,grad_norm: 0.8548278093886049, iteration: 368485
loss: 1.0470471382141113,grad_norm: 0.9999996451830727, iteration: 368486
loss: 1.0440104007720947,grad_norm: 0.7655718855230299, iteration: 368487
loss: 0.995242178440094,grad_norm: 0.8600095003589449, iteration: 368488
loss: 0.9846091866493225,grad_norm: 0.9415417701553438, iteration: 368489
loss: 0.9752849340438843,grad_norm: 0.8002357575855694, iteration: 368490
loss: 1.0735634565353394,grad_norm: 0.9999993427905953, iteration: 368491
loss: 1.016458511352539,grad_norm: 0.7871572911746629, iteration: 368492
loss: 0.9925127029418945,grad_norm: 0.9999990800463178, iteration: 368493
loss: 1.011667013168335,grad_norm: 0.8351053101997343, iteration: 368494
loss: 1.0353671312332153,grad_norm: 0.979628460322423, iteration: 368495
loss: 0.9815552830696106,grad_norm: 0.999999603909339, iteration: 368496
loss: 0.9856824278831482,grad_norm: 0.7364874900639058, iteration: 368497
loss: 1.0159993171691895,grad_norm: 0.8106554600679586, iteration: 368498
loss: 1.1010185480117798,grad_norm: 0.7747653717740605, iteration: 368499
loss: 1.0167511701583862,grad_norm: 0.815169986008094, iteration: 368500
loss: 1.0120511054992676,grad_norm: 0.9083586774596314, iteration: 368501
loss: 0.9875957369804382,grad_norm: 0.9319126413507481, iteration: 368502
loss: 1.0106186866760254,grad_norm: 0.8301210718324828, iteration: 368503
loss: 1.0996832847595215,grad_norm: 0.9999994852232218, iteration: 368504
loss: 0.9863864779472351,grad_norm: 0.9204569490128118, iteration: 368505
loss: 0.9964590072631836,grad_norm: 0.9562513502585638, iteration: 368506
loss: 0.9979090690612793,grad_norm: 0.6747234370772842, iteration: 368507
loss: 1.0282379388809204,grad_norm: 0.999999577077507, iteration: 368508
loss: 1.1064071655273438,grad_norm: 0.9999991186827216, iteration: 368509
loss: 1.0239627361297607,grad_norm: 0.7427055689969941, iteration: 368510
loss: 1.0035037994384766,grad_norm: 0.9540346255748186, iteration: 368511
loss: 1.0241272449493408,grad_norm: 0.9999990436969035, iteration: 368512
loss: 0.9861065149307251,grad_norm: 0.7312418808211889, iteration: 368513
loss: 1.086037039756775,grad_norm: 0.9999995750830425, iteration: 368514
loss: 1.0458723306655884,grad_norm: 0.9999995668388976, iteration: 368515
loss: 1.074453353881836,grad_norm: 0.9999993903422876, iteration: 368516
loss: 0.9970210790634155,grad_norm: 0.9999995094325224, iteration: 368517
loss: 0.9564180374145508,grad_norm: 0.9096958671298281, iteration: 368518
loss: 1.2856477499008179,grad_norm: 0.9999998851198406, iteration: 368519
loss: 0.9691116809844971,grad_norm: 0.9629422676920558, iteration: 368520
loss: 0.9923901557922363,grad_norm: 0.7565251662740887, iteration: 368521
loss: 0.9906331896781921,grad_norm: 0.6442131874176412, iteration: 368522
loss: 1.136167049407959,grad_norm: 0.9999994369246018, iteration: 368523
loss: 1.0250753164291382,grad_norm: 0.8899517171330867, iteration: 368524
loss: 1.059504747390747,grad_norm: 0.9400839071038705, iteration: 368525
loss: 1.0159918069839478,grad_norm: 0.8251259889839844, iteration: 368526
loss: 1.1711245775222778,grad_norm: 0.9999999136052299, iteration: 368527
loss: 1.0221362113952637,grad_norm: 0.8484248151919358, iteration: 368528
loss: 1.1025543212890625,grad_norm: 0.9999991460913257, iteration: 368529
loss: 1.0287257432937622,grad_norm: 0.8335173531575507, iteration: 368530
loss: 1.0033806562423706,grad_norm: 0.9999991963255005, iteration: 368531
loss: 1.078113317489624,grad_norm: 0.9999999563149493, iteration: 368532
loss: 0.9650347828865051,grad_norm: 0.786227164844376, iteration: 368533
loss: 1.0461249351501465,grad_norm: 0.9999992692183888, iteration: 368534
loss: 1.057297945022583,grad_norm: 0.9876470118869253, iteration: 368535
loss: 0.9790424704551697,grad_norm: 0.9999989622392887, iteration: 368536
loss: 1.0045921802520752,grad_norm: 0.8207633143321361, iteration: 368537
loss: 1.0086432695388794,grad_norm: 0.755360569715845, iteration: 368538
loss: 1.0146979093551636,grad_norm: 0.9999994559825145, iteration: 368539
loss: 0.9881719350814819,grad_norm: 0.9999997890066914, iteration: 368540
loss: 0.9726645350456238,grad_norm: 0.7003975713929687, iteration: 368541
loss: 1.0311644077301025,grad_norm: 0.7543378584446974, iteration: 368542
loss: 1.0083975791931152,grad_norm: 0.9196586295582495, iteration: 368543
loss: 1.2676281929016113,grad_norm: 0.985562934181866, iteration: 368544
loss: 1.0626379251480103,grad_norm: 0.8435769332790118, iteration: 368545
loss: 0.9992637038230896,grad_norm: 0.7284619994097528, iteration: 368546
loss: 1.0034140348434448,grad_norm: 0.8257170884567597, iteration: 368547
loss: 1.1202253103256226,grad_norm: 0.9999989977278919, iteration: 368548
loss: 1.0197113752365112,grad_norm: 0.9999993779311308, iteration: 368549
loss: 1.0947438478469849,grad_norm: 0.9999993079357793, iteration: 368550
loss: 1.045799732208252,grad_norm: 0.9731218632696869, iteration: 368551
loss: 1.0276964902877808,grad_norm: 0.8357676978306425, iteration: 368552
loss: 1.0174736976623535,grad_norm: 0.9999999404772763, iteration: 368553
loss: 1.0149571895599365,grad_norm: 0.769345526395782, iteration: 368554
loss: 0.9971190690994263,grad_norm: 0.777332558407305, iteration: 368555
loss: 1.029089331626892,grad_norm: 0.999999691240044, iteration: 368556
loss: 0.9463700652122498,grad_norm: 0.8877860224423542, iteration: 368557
loss: 1.0405137538909912,grad_norm: 1.0000000878713424, iteration: 368558
loss: 1.0048798322677612,grad_norm: 0.811262389356905, iteration: 368559
loss: 1.0385793447494507,grad_norm: 0.8736827522818508, iteration: 368560
loss: 1.050392508506775,grad_norm: 0.9119939309936897, iteration: 368561
loss: 1.0664150714874268,grad_norm: 0.9999996614725339, iteration: 368562
loss: 1.00381338596344,grad_norm: 0.9999991176575074, iteration: 368563
loss: 0.9866570830345154,grad_norm: 0.7011057819707373, iteration: 368564
loss: 1.173574447631836,grad_norm: 0.9999998527866799, iteration: 368565
loss: 1.0998207330703735,grad_norm: 0.9999994156189208, iteration: 368566
loss: 1.0052257776260376,grad_norm: 0.9999997440208629, iteration: 368567
loss: 0.9664995670318604,grad_norm: 0.741569133655103, iteration: 368568
loss: 1.0155949592590332,grad_norm: 0.9999999463950572, iteration: 368569
loss: 1.0774438381195068,grad_norm: 0.9999993775372228, iteration: 368570
loss: 1.0159488916397095,grad_norm: 0.9999992096778813, iteration: 368571
loss: 0.9913445711135864,grad_norm: 0.9329680845076496, iteration: 368572
loss: 1.0391819477081299,grad_norm: 0.9084291461227061, iteration: 368573
loss: 1.0408201217651367,grad_norm: 0.9974081629096778, iteration: 368574
loss: 1.0316696166992188,grad_norm: 0.9999996063695121, iteration: 368575
loss: 1.0260182619094849,grad_norm: 0.8190966384303857, iteration: 368576
loss: 1.0132781267166138,grad_norm: 0.8913361460149096, iteration: 368577
loss: 1.0050315856933594,grad_norm: 0.8951999301500425, iteration: 368578
loss: 1.0177202224731445,grad_norm: 0.9999990100281865, iteration: 368579
loss: 1.0124588012695312,grad_norm: 0.7740897856065374, iteration: 368580
loss: 1.1142876148223877,grad_norm: 0.9999996543117055, iteration: 368581
loss: 1.0043463706970215,grad_norm: 0.7709181056894754, iteration: 368582
loss: 1.036389708518982,grad_norm: 0.9999994516840404, iteration: 368583
loss: 1.0250613689422607,grad_norm: 0.8001365442334577, iteration: 368584
loss: 1.0186445713043213,grad_norm: 0.9999990723911407, iteration: 368585
loss: 0.9757120013237,grad_norm: 0.8851898863281658, iteration: 368586
loss: 0.9450454711914062,grad_norm: 0.7563652322932368, iteration: 368587
loss: 1.0203766822814941,grad_norm: 0.9999995700727092, iteration: 368588
loss: 0.9783082604408264,grad_norm: 0.9999994179918141, iteration: 368589
loss: 0.9529743790626526,grad_norm: 0.8125253812314038, iteration: 368590
loss: 1.0243552923202515,grad_norm: 0.9507567890769643, iteration: 368591
loss: 1.0161936283111572,grad_norm: 0.7636535119197569, iteration: 368592
loss: 1.095744013786316,grad_norm: 0.9999990780983955, iteration: 368593
loss: 1.0145633220672607,grad_norm: 0.81071521965591, iteration: 368594
loss: 0.9797137975692749,grad_norm: 0.7053850922483498, iteration: 368595
loss: 0.9750234484672546,grad_norm: 0.8768788799003631, iteration: 368596
loss: 1.0515438318252563,grad_norm: 0.9999989976821627, iteration: 368597
loss: 0.976948618888855,grad_norm: 0.710078162387022, iteration: 368598
loss: 1.0443226099014282,grad_norm: 0.8476199987838652, iteration: 368599
loss: 0.9731689691543579,grad_norm: 0.9188363986524983, iteration: 368600
loss: 1.0343624353408813,grad_norm: 0.9999990342196662, iteration: 368601
loss: 0.9838322997093201,grad_norm: 0.8546964145139019, iteration: 368602
loss: 1.017868995666504,grad_norm: 0.9999991263642866, iteration: 368603
loss: 1.0923333168029785,grad_norm: 0.9999995137868445, iteration: 368604
loss: 1.0431389808654785,grad_norm: 0.9999989858044753, iteration: 368605
loss: 0.9780953526496887,grad_norm: 0.9333267643628355, iteration: 368606
loss: 1.0118211507797241,grad_norm: 0.8795763284953854, iteration: 368607
loss: 1.0130560398101807,grad_norm: 0.8768099761756222, iteration: 368608
loss: 1.0554052591323853,grad_norm: 0.9999995872459402, iteration: 368609
loss: 0.974885106086731,grad_norm: 0.8137396410533034, iteration: 368610
loss: 1.0381239652633667,grad_norm: 0.9999994136599607, iteration: 368611
loss: 0.9681727886199951,grad_norm: 0.8750744878229247, iteration: 368612
loss: 1.0023049116134644,grad_norm: 0.9999998879946206, iteration: 368613
loss: 0.9977410435676575,grad_norm: 0.7819000488029828, iteration: 368614
loss: 1.0130692720413208,grad_norm: 0.9155032204922434, iteration: 368615
loss: 1.0270549058914185,grad_norm: 0.8759637925914591, iteration: 368616
loss: 1.0376535654067993,grad_norm: 0.9999995732328892, iteration: 368617
loss: 1.0493338108062744,grad_norm: 0.7240538138603029, iteration: 368618
loss: 1.0067143440246582,grad_norm: 0.7505950550300973, iteration: 368619
loss: 0.9978501200675964,grad_norm: 0.7432646325603559, iteration: 368620
loss: 0.9821802377700806,grad_norm: 0.9486605957006471, iteration: 368621
loss: 0.9724897742271423,grad_norm: 0.8212456364808081, iteration: 368622
loss: 1.0496083498001099,grad_norm: 0.99999904297003, iteration: 368623
loss: 1.0394830703735352,grad_norm: 0.7120226189546018, iteration: 368624
loss: 1.0067501068115234,grad_norm: 0.9999994004273021, iteration: 368625
loss: 0.9641907811164856,grad_norm: 0.7603799117266432, iteration: 368626
loss: 0.9980529546737671,grad_norm: 0.6767711500896474, iteration: 368627
loss: 1.0198771953582764,grad_norm: 0.8520792538820661, iteration: 368628
loss: 1.0113492012023926,grad_norm: 0.8464895582919867, iteration: 368629
loss: 1.0232841968536377,grad_norm: 0.7509444693724537, iteration: 368630
loss: 1.0090528726577759,grad_norm: 0.7534384563795897, iteration: 368631
loss: 1.012013554573059,grad_norm: 0.7608893785153243, iteration: 368632
loss: 1.0221089124679565,grad_norm: 0.9999997171188735, iteration: 368633
loss: 1.0335593223571777,grad_norm: 0.9999993609036322, iteration: 368634
loss: 1.0716840028762817,grad_norm: 0.9999990546765088, iteration: 368635
loss: 0.992260217666626,grad_norm: 0.8235382436551358, iteration: 368636
loss: 1.003061056137085,grad_norm: 0.8008305310986585, iteration: 368637
loss: 0.9613627195358276,grad_norm: 0.8773917075703093, iteration: 368638
loss: 1.0071526765823364,grad_norm: 0.9999992337387456, iteration: 368639
loss: 1.0467348098754883,grad_norm: 0.8714722407693541, iteration: 368640
loss: 0.9903097748756409,grad_norm: 0.7365294166532038, iteration: 368641
loss: 0.9932342767715454,grad_norm: 0.8005547319585311, iteration: 368642
loss: 1.014357566833496,grad_norm: 0.999999931987971, iteration: 368643
loss: 1.0007363557815552,grad_norm: 0.8124884977487328, iteration: 368644
loss: 1.0606416463851929,grad_norm: 0.999999080318476, iteration: 368645
loss: 0.9952220916748047,grad_norm: 0.7816119931574244, iteration: 368646
loss: 1.0031489133834839,grad_norm: 0.7471463383903671, iteration: 368647
loss: 0.9832640290260315,grad_norm: 0.7599724972735685, iteration: 368648
loss: 0.9920826554298401,grad_norm: 0.7807249075791235, iteration: 368649
loss: 0.9895147681236267,grad_norm: 0.9826615904330344, iteration: 368650
loss: 1.0096107721328735,grad_norm: 0.914406459888266, iteration: 368651
loss: 0.9978265762329102,grad_norm: 0.7341666566043434, iteration: 368652
loss: 1.0471501350402832,grad_norm: 0.9999995125531868, iteration: 368653
loss: 1.0613253116607666,grad_norm: 0.9999993319093206, iteration: 368654
loss: 1.0327503681182861,grad_norm: 0.9999993850140746, iteration: 368655
loss: 1.035532832145691,grad_norm: 0.9999994299394263, iteration: 368656
loss: 0.9717101454734802,grad_norm: 0.7706909663087474, iteration: 368657
loss: 0.9967272281646729,grad_norm: 0.8918721194114173, iteration: 368658
loss: 1.0969821214675903,grad_norm: 0.9999996625037548, iteration: 368659
loss: 1.0257446765899658,grad_norm: 0.8996916699863406, iteration: 368660
loss: 1.074496865272522,grad_norm: 0.9999994790511181, iteration: 368661
loss: 1.0140531063079834,grad_norm: 0.643898068777631, iteration: 368662
loss: 1.0010706186294556,grad_norm: 0.7108657994384251, iteration: 368663
loss: 0.9788317084312439,grad_norm: 0.8026907872853071, iteration: 368664
loss: 1.0882039070129395,grad_norm: 0.9999999367026441, iteration: 368665
loss: 1.0125569105148315,grad_norm: 0.7932324489310261, iteration: 368666
loss: 0.9611923694610596,grad_norm: 0.9999993632073517, iteration: 368667
loss: 1.0207388401031494,grad_norm: 0.9999997213563229, iteration: 368668
loss: 0.9985791444778442,grad_norm: 0.7703963977989029, iteration: 368669
loss: 0.9717757701873779,grad_norm: 0.7919986066831444, iteration: 368670
loss: 1.0382028818130493,grad_norm: 0.9503837623423684, iteration: 368671
loss: 0.9738147258758545,grad_norm: 0.9202440691802041, iteration: 368672
loss: 1.014635443687439,grad_norm: 0.9999991757407345, iteration: 368673
loss: 1.1098084449768066,grad_norm: 0.862757556180634, iteration: 368674
loss: 0.9817303419113159,grad_norm: 0.8792011691488303, iteration: 368675
loss: 1.0515245199203491,grad_norm: 0.9999996601677753, iteration: 368676
loss: 1.0214842557907104,grad_norm: 0.7812035563104067, iteration: 368677
loss: 0.9869961142539978,grad_norm: 0.7585763860428831, iteration: 368678
loss: 1.0100117921829224,grad_norm: 0.7341312980517707, iteration: 368679
loss: 1.029509425163269,grad_norm: 0.9332115392648607, iteration: 368680
loss: 1.0703562498092651,grad_norm: 0.9999991651579621, iteration: 368681
loss: 1.016432523727417,grad_norm: 0.7891703905738111, iteration: 368682
loss: 0.9651627540588379,grad_norm: 0.9999992496900972, iteration: 368683
loss: 1.0289095640182495,grad_norm: 0.7959901954444325, iteration: 368684
loss: 1.0271012783050537,grad_norm: 0.9999991640989994, iteration: 368685
loss: 1.0330334901809692,grad_norm: 0.9999996968723471, iteration: 368686
loss: 0.9944999814033508,grad_norm: 0.7943119202330888, iteration: 368687
loss: 1.0030094385147095,grad_norm: 0.7959281722292844, iteration: 368688
loss: 1.0646744966506958,grad_norm: 0.8908066712490333, iteration: 368689
loss: 1.0037925243377686,grad_norm: 0.7937557318746407, iteration: 368690
loss: 1.0412663221359253,grad_norm: 0.9999993508960916, iteration: 368691
loss: 1.002539873123169,grad_norm: 0.9999991395689037, iteration: 368692
loss: 0.9876892566680908,grad_norm: 0.8815133898923939, iteration: 368693
loss: 1.0144901275634766,grad_norm: 0.9538274791367422, iteration: 368694
loss: 1.0218318700790405,grad_norm: 0.9999994378139436, iteration: 368695
loss: 0.9684320688247681,grad_norm: 0.8386038314696843, iteration: 368696
loss: 1.0850540399551392,grad_norm: 0.9999993175067928, iteration: 368697
loss: 0.9529008269309998,grad_norm: 0.8265778831169238, iteration: 368698
loss: 1.0085207223892212,grad_norm: 0.8450667653346975, iteration: 368699
loss: 1.047147274017334,grad_norm: 0.8968363129413874, iteration: 368700
loss: 1.1216626167297363,grad_norm: 0.9999990891226632, iteration: 368701
loss: 1.0114014148712158,grad_norm: 0.999999122891251, iteration: 368702
loss: 1.0352219343185425,grad_norm: 0.7390805223914068, iteration: 368703
loss: 1.0063939094543457,grad_norm: 0.9315289133298172, iteration: 368704
loss: 1.007306456565857,grad_norm: 0.99999931945457, iteration: 368705
loss: 0.9761722087860107,grad_norm: 0.7551900406336668, iteration: 368706
loss: 1.0377501249313354,grad_norm: 0.8867513030684804, iteration: 368707
loss: 1.038905382156372,grad_norm: 0.8237036728630559, iteration: 368708
loss: 1.0094987154006958,grad_norm: 0.920247045152382, iteration: 368709
loss: 0.9984477162361145,grad_norm: 0.7364931837010338, iteration: 368710
loss: 1.083876132965088,grad_norm: 0.9590328778361759, iteration: 368711
loss: 0.9808672666549683,grad_norm: 0.7533952456697793, iteration: 368712
loss: 0.9916996359825134,grad_norm: 0.8714777495751493, iteration: 368713
loss: 0.9966064095497131,grad_norm: 0.9605552386958469, iteration: 368714
loss: 1.0602566003799438,grad_norm: 0.7648641416528571, iteration: 368715
loss: 1.0176700353622437,grad_norm: 0.7981594251665143, iteration: 368716
loss: 1.003913402557373,grad_norm: 0.7041490135463325, iteration: 368717
loss: 1.0209966897964478,grad_norm: 0.8392659044473352, iteration: 368718
loss: 0.9827367663383484,grad_norm: 0.8515530440769635, iteration: 368719
loss: 0.9860272407531738,grad_norm: 0.8623410466438353, iteration: 368720
loss: 1.0125778913497925,grad_norm: 0.7480924428061764, iteration: 368721
loss: 0.9761321544647217,grad_norm: 0.7482451803340786, iteration: 368722
loss: 1.07085382938385,grad_norm: 0.7955915073734122, iteration: 368723
loss: 1.0356636047363281,grad_norm: 0.9999991549535387, iteration: 368724
loss: 0.9759998917579651,grad_norm: 0.8051807111259813, iteration: 368725
loss: 1.0101500749588013,grad_norm: 0.9314347427737405, iteration: 368726
loss: 0.9925286769866943,grad_norm: 0.7206143023959327, iteration: 368727
loss: 1.0217022895812988,grad_norm: 0.916376134273429, iteration: 368728
loss: 1.0150710344314575,grad_norm: 0.9014491072901585, iteration: 368729
loss: 1.0635923147201538,grad_norm: 0.9999991152396177, iteration: 368730
loss: 0.9630411863327026,grad_norm: 0.7160305934185388, iteration: 368731
loss: 0.9912832379341125,grad_norm: 0.6684019705177665, iteration: 368732
loss: 1.0470880270004272,grad_norm: 1.0000000177297106, iteration: 368733
loss: 1.0141136646270752,grad_norm: 0.9837887902409002, iteration: 368734
loss: 1.0313400030136108,grad_norm: 0.7628708134787638, iteration: 368735
loss: 0.9954898953437805,grad_norm: 0.8056306318072955, iteration: 368736
loss: 0.9741566777229309,grad_norm: 0.9889397824819443, iteration: 368737
loss: 1.2472220659255981,grad_norm: 0.9999991292859567, iteration: 368738
loss: 0.9821121692657471,grad_norm: 0.9019446923392841, iteration: 368739
loss: 0.9893328547477722,grad_norm: 0.8449086464220458, iteration: 368740
loss: 1.0192316770553589,grad_norm: 0.9130303762222515, iteration: 368741
loss: 0.9790095686912537,grad_norm: 0.7322751496478871, iteration: 368742
loss: 0.9794651865959167,grad_norm: 0.7184800523937128, iteration: 368743
loss: 1.0236941576004028,grad_norm: 0.8386235845918258, iteration: 368744
loss: 1.0162012577056885,grad_norm: 0.7022007094902233, iteration: 368745
loss: 0.9855250716209412,grad_norm: 0.9999993015122541, iteration: 368746
loss: 0.9958528280258179,grad_norm: 0.8962130452716539, iteration: 368747
loss: 0.9952471256256104,grad_norm: 0.854621543565159, iteration: 368748
loss: 0.9949842691421509,grad_norm: 0.9938973044964865, iteration: 368749
loss: 1.0008454322814941,grad_norm: 0.9105247609963817, iteration: 368750
loss: 1.0102251768112183,grad_norm: 0.8390249549345119, iteration: 368751
loss: 1.0116311311721802,grad_norm: 0.7361832023172116, iteration: 368752
loss: 1.080907940864563,grad_norm: 0.922987770897638, iteration: 368753
loss: 1.0310859680175781,grad_norm: 0.9912175192979841, iteration: 368754
loss: 0.9730110764503479,grad_norm: 0.8476717427946139, iteration: 368755
loss: 0.970485270023346,grad_norm: 0.849548595827733, iteration: 368756
loss: 1.0221589803695679,grad_norm: 0.9999991706687346, iteration: 368757
loss: 1.293299674987793,grad_norm: 0.9999997949785154, iteration: 368758
loss: 1.076888918876648,grad_norm: 0.9999991062902988, iteration: 368759
loss: 1.0206279754638672,grad_norm: 0.9513691426896095, iteration: 368760
loss: 0.9742944836616516,grad_norm: 0.891679924307305, iteration: 368761
loss: 0.9897680282592773,grad_norm: 0.8075504074432843, iteration: 368762
loss: 1.0643008947372437,grad_norm: 0.9999992348812382, iteration: 368763
loss: 1.0085657835006714,grad_norm: 0.9309362968074542, iteration: 368764
loss: 1.0478339195251465,grad_norm: 0.7086872553390884, iteration: 368765
loss: 0.9948265552520752,grad_norm: 0.8634575946812619, iteration: 368766
loss: 0.9929627180099487,grad_norm: 0.7305787163931137, iteration: 368767
loss: 0.93784499168396,grad_norm: 0.7718834445059122, iteration: 368768
loss: 0.9886263012886047,grad_norm: 0.9462475629559932, iteration: 368769
loss: 1.0567882061004639,grad_norm: 0.999999764002238, iteration: 368770
loss: 0.9848995208740234,grad_norm: 0.8227920653886541, iteration: 368771
loss: 1.0292508602142334,grad_norm: 0.9999993029402172, iteration: 368772
loss: 0.9973706603050232,grad_norm: 0.8952305814155338, iteration: 368773
loss: 1.0398246049880981,grad_norm: 0.7896934515046088, iteration: 368774
loss: 1.0008162260055542,grad_norm: 0.7890031544189321, iteration: 368775
loss: 1.041945457458496,grad_norm: 0.8820905740831471, iteration: 368776
loss: 0.9977718591690063,grad_norm: 0.8359633172332616, iteration: 368777
loss: 1.0619856119155884,grad_norm: 0.9999991889279415, iteration: 368778
loss: 0.9968565106391907,grad_norm: 0.7699529057527262, iteration: 368779
loss: 1.0106152296066284,grad_norm: 0.9999995587527747, iteration: 368780
loss: 1.0238523483276367,grad_norm: 0.7775051369417851, iteration: 368781
loss: 1.011816143989563,grad_norm: 0.9117260623043042, iteration: 368782
loss: 1.019371747970581,grad_norm: 0.7401078370131018, iteration: 368783
loss: 0.9453919529914856,grad_norm: 0.7752846893609858, iteration: 368784
loss: 1.0033053159713745,grad_norm: 0.9647822693817707, iteration: 368785
loss: 0.9606719017028809,grad_norm: 0.7975599698683107, iteration: 368786
loss: 0.9947673678398132,grad_norm: 0.7641005022141819, iteration: 368787
loss: 1.0127649307250977,grad_norm: 0.9473953707816389, iteration: 368788
loss: 1.0341253280639648,grad_norm: 0.9999999243025544, iteration: 368789
loss: 0.9781030416488647,grad_norm: 0.8527138954844453, iteration: 368790
loss: 1.0011534690856934,grad_norm: 0.7628867545142798, iteration: 368791
loss: 1.0141124725341797,grad_norm: 0.7665587761858041, iteration: 368792
loss: 1.0136940479278564,grad_norm: 0.8173156155414076, iteration: 368793
loss: 1.018676996231079,grad_norm: 0.7460310793082208, iteration: 368794
loss: 0.9898673892021179,grad_norm: 0.7783519343107128, iteration: 368795
loss: 0.9785192608833313,grad_norm: 0.9999992297856619, iteration: 368796
loss: 1.0185822248458862,grad_norm: 0.999999056808594, iteration: 368797
loss: 0.9858667850494385,grad_norm: 0.9225242639093415, iteration: 368798
loss: 1.0166879892349243,grad_norm: 0.7822389354671438, iteration: 368799
loss: 1.0316575765609741,grad_norm: 0.8146734867602089, iteration: 368800
loss: 0.9932898283004761,grad_norm: 0.9130124486158498, iteration: 368801
loss: 1.0681350231170654,grad_norm: 0.7362732763782738, iteration: 368802
loss: 1.028957724571228,grad_norm: 0.8828965574710794, iteration: 368803
loss: 1.0129741430282593,grad_norm: 0.9999991963454977, iteration: 368804
loss: 1.0033800601959229,grad_norm: 0.7061986584281579, iteration: 368805
loss: 1.004919171333313,grad_norm: 0.6811173922253825, iteration: 368806
loss: 1.0003184080123901,grad_norm: 0.8255028726111114, iteration: 368807
loss: 1.0558152198791504,grad_norm: 0.9999996273984126, iteration: 368808
loss: 1.0175200700759888,grad_norm: 0.8472745935478797, iteration: 368809
loss: 0.9999387860298157,grad_norm: 0.8821183074927583, iteration: 368810
loss: 0.9772414565086365,grad_norm: 0.6841664651486105, iteration: 368811
loss: 0.9979780316352844,grad_norm: 0.822365875499572, iteration: 368812
loss: 1.2040046453475952,grad_norm: 0.9999995261836062, iteration: 368813
loss: 1.029758334159851,grad_norm: 0.9999991068108594, iteration: 368814
loss: 1.0722757577896118,grad_norm: 0.8567171834280711, iteration: 368815
loss: 0.9709280729293823,grad_norm: 0.777421747916426, iteration: 368816
loss: 1.0381767749786377,grad_norm: 0.999999101128152, iteration: 368817
loss: 1.0293017625808716,grad_norm: 0.9079079133545012, iteration: 368818
loss: 0.9859570860862732,grad_norm: 0.7898523819171896, iteration: 368819
loss: 1.0017999410629272,grad_norm: 0.9261768142700936, iteration: 368820
loss: 0.9837924242019653,grad_norm: 0.8428732918657749, iteration: 368821
loss: 1.0069260597229004,grad_norm: 0.9999991612012028, iteration: 368822
loss: 1.055350661277771,grad_norm: 0.9999993428904871, iteration: 368823
loss: 1.0361790657043457,grad_norm: 0.9999999152919301, iteration: 368824
loss: 1.0402055978775024,grad_norm: 0.9999990710362525, iteration: 368825
loss: 1.0256129503250122,grad_norm: 0.9999991037080823, iteration: 368826
loss: 1.062199592590332,grad_norm: 0.9999994001028362, iteration: 368827
loss: 0.994131326675415,grad_norm: 0.8044855347902334, iteration: 368828
loss: 1.0069350004196167,grad_norm: 0.9999993641663568, iteration: 368829
loss: 1.0103156566619873,grad_norm: 0.9999992162430599, iteration: 368830
loss: 1.0101417303085327,grad_norm: 0.9999991178817473, iteration: 368831
loss: 1.0196393728256226,grad_norm: 0.9417401262610816, iteration: 368832
loss: 1.0118141174316406,grad_norm: 0.7881646358755034, iteration: 368833
loss: 0.9772282838821411,grad_norm: 0.9999990854803131, iteration: 368834
loss: 0.9720491766929626,grad_norm: 0.9999999363568448, iteration: 368835
loss: 0.9555457830429077,grad_norm: 0.9061890921266872, iteration: 368836
loss: 0.9907132387161255,grad_norm: 0.8166726198758767, iteration: 368837
loss: 0.9872010350227356,grad_norm: 0.8125742939133367, iteration: 368838
loss: 1.0084360837936401,grad_norm: 0.7944823791127582, iteration: 368839
loss: 1.0409042835235596,grad_norm: 0.9999992360056535, iteration: 368840
loss: 0.9963635206222534,grad_norm: 0.6895576230344587, iteration: 368841
loss: 0.9893684387207031,grad_norm: 0.9676892060714238, iteration: 368842
loss: 0.9893298745155334,grad_norm: 0.6796934409790022, iteration: 368843
loss: 1.0223768949508667,grad_norm: 0.8056865889658945, iteration: 368844
loss: 1.0113685131072998,grad_norm: 0.9685957969484291, iteration: 368845
loss: 1.0298497676849365,grad_norm: 0.7753684692272385, iteration: 368846
loss: 1.0383459329605103,grad_norm: 0.9999990511804392, iteration: 368847
loss: 1.0665888786315918,grad_norm: 0.9999997987774059, iteration: 368848
loss: 0.9926381707191467,grad_norm: 0.9760242150102844, iteration: 368849
loss: 1.076950192451477,grad_norm: 0.9999998363904835, iteration: 368850
loss: 1.0213946104049683,grad_norm: 0.8355152210111068, iteration: 368851
loss: 0.9859392046928406,grad_norm: 0.8031163332681883, iteration: 368852
loss: 1.158555269241333,grad_norm: 0.9999999129286155, iteration: 368853
loss: 0.993902325630188,grad_norm: 0.8367693403403458, iteration: 368854
loss: 1.0302722454071045,grad_norm: 0.8733676992801337, iteration: 368855
loss: 0.9884312748908997,grad_norm: 0.9999991861560068, iteration: 368856
loss: 1.0096644163131714,grad_norm: 0.7945042572335953, iteration: 368857
loss: 1.0319136381149292,grad_norm: 0.9999998776757282, iteration: 368858
loss: 1.0616793632507324,grad_norm: 1.0000000627468766, iteration: 368859
loss: 1.1701935529708862,grad_norm: 0.9999997721458042, iteration: 368860
loss: 1.0205137729644775,grad_norm: 0.8810100775964456, iteration: 368861
loss: 1.015332818031311,grad_norm: 0.9933277017957254, iteration: 368862
loss: 1.036950707435608,grad_norm: 0.9999990846140778, iteration: 368863
loss: 1.0992369651794434,grad_norm: 0.9999992797035935, iteration: 368864
loss: 0.9948475360870361,grad_norm: 0.9541432819457996, iteration: 368865
loss: 1.1750621795654297,grad_norm: 0.9999991533623506, iteration: 368866
loss: 1.010331392288208,grad_norm: 0.9999992094590899, iteration: 368867
loss: 1.098648190498352,grad_norm: 0.9999999020575847, iteration: 368868
loss: 1.093549370765686,grad_norm: 0.9690639242301358, iteration: 368869
loss: 1.0380576848983765,grad_norm: 0.9999995861974237, iteration: 368870
loss: 0.9724426865577698,grad_norm: 0.8081400101374401, iteration: 368871
loss: 1.0184197425842285,grad_norm: 0.7177764979424892, iteration: 368872
loss: 0.9734699130058289,grad_norm: 0.8970054137378717, iteration: 368873
loss: 1.1469931602478027,grad_norm: 0.9999996753852841, iteration: 368874
loss: 0.9882235527038574,grad_norm: 0.8759092727228351, iteration: 368875
loss: 1.0022337436676025,grad_norm: 0.8734171250576771, iteration: 368876
loss: 0.9831744432449341,grad_norm: 0.8943375588976753, iteration: 368877
loss: 1.0365655422210693,grad_norm: 0.9865973929494708, iteration: 368878
loss: 0.9541202187538147,grad_norm: 0.727583071699067, iteration: 368879
loss: 1.0419739484786987,grad_norm: 0.9999991978433316, iteration: 368880
loss: 1.069435715675354,grad_norm: 0.9999991044412722, iteration: 368881
loss: 1.086607813835144,grad_norm: 0.9004422575993306, iteration: 368882
loss: 0.9777980446815491,grad_norm: 0.7871900119962071, iteration: 368883
loss: 1.047165870666504,grad_norm: 0.9999998830242072, iteration: 368884
loss: 1.0015158653259277,grad_norm: 0.9446418088817763, iteration: 368885
loss: 0.9877121448516846,grad_norm: 0.7483388660916329, iteration: 368886
loss: 1.0352730751037598,grad_norm: 0.9999998735239651, iteration: 368887
loss: 1.0259227752685547,grad_norm: 0.9999991617258025, iteration: 368888
loss: 0.9749911427497864,grad_norm: 0.8504312740228773, iteration: 368889
loss: 0.9996391534805298,grad_norm: 0.8512598193328751, iteration: 368890
loss: 0.9767290949821472,grad_norm: 0.9999994752222595, iteration: 368891
loss: 0.9829401969909668,grad_norm: 0.9999991985731702, iteration: 368892
loss: 0.98027104139328,grad_norm: 0.8250744540065865, iteration: 368893
loss: 0.9865968823432922,grad_norm: 0.790368621420923, iteration: 368894
loss: 0.9706523418426514,grad_norm: 0.9999992975102604, iteration: 368895
loss: 1.0204037427902222,grad_norm: 0.7581271745811826, iteration: 368896
loss: 0.9915000796318054,grad_norm: 0.7191744042285738, iteration: 368897
loss: 1.003402590751648,grad_norm: 0.9999989361680054, iteration: 368898
loss: 0.9533883929252625,grad_norm: 0.8967344869083702, iteration: 368899
loss: 1.0450750589370728,grad_norm: 0.9999996326335684, iteration: 368900
loss: 1.0392699241638184,grad_norm: 0.8070057038761963, iteration: 368901
loss: 1.011340618133545,grad_norm: 0.7888675850428739, iteration: 368902
loss: 1.0174825191497803,grad_norm: 0.9999991556950684, iteration: 368903
loss: 1.003947377204895,grad_norm: 0.8612267855593216, iteration: 368904
loss: 1.0477157831192017,grad_norm: 0.9999994384328559, iteration: 368905
loss: 1.0025454759597778,grad_norm: 0.7838087916648336, iteration: 368906
loss: 0.9785674810409546,grad_norm: 0.7862348712203154, iteration: 368907
loss: 0.9890158176422119,grad_norm: 0.7848270193407005, iteration: 368908
loss: 1.059816837310791,grad_norm: 0.7900963364088379, iteration: 368909
loss: 1.045297384262085,grad_norm: 0.7279827457590698, iteration: 368910
loss: 1.061998724937439,grad_norm: 0.7868351066064496, iteration: 368911
loss: 0.9937564730644226,grad_norm: 0.9176352370691806, iteration: 368912
loss: 1.0076998472213745,grad_norm: 0.8037954124750457, iteration: 368913
loss: 0.9959315061569214,grad_norm: 0.7786920778336708, iteration: 368914
loss: 0.9903557896614075,grad_norm: 0.8645576519664808, iteration: 368915
loss: 0.9714729189872742,grad_norm: 0.999877911671742, iteration: 368916
loss: 0.9887121319770813,grad_norm: 0.9999993971509991, iteration: 368917
loss: 1.0195860862731934,grad_norm: 0.8096207436072905, iteration: 368918
loss: 1.0226644277572632,grad_norm: 0.8329357147756015, iteration: 368919
loss: 1.0072053670883179,grad_norm: 0.8379637161540197, iteration: 368920
loss: 1.093514084815979,grad_norm: 0.9999995006526494, iteration: 368921
loss: 1.0268524885177612,grad_norm: 0.8185300104433205, iteration: 368922
loss: 1.0398509502410889,grad_norm: 0.9999991520219895, iteration: 368923
loss: 1.051599383354187,grad_norm: 0.9999998590558908, iteration: 368924
loss: 1.0078213214874268,grad_norm: 0.9093511774895545, iteration: 368925
loss: 1.0102055072784424,grad_norm: 0.9999996230791367, iteration: 368926
loss: 0.9741402268409729,grad_norm: 0.7207659601586313, iteration: 368927
loss: 1.117582082748413,grad_norm: 0.9999994539037627, iteration: 368928
loss: 1.0218284130096436,grad_norm: 0.850542234675053, iteration: 368929
loss: 1.0276134014129639,grad_norm: 0.9455723174985949, iteration: 368930
loss: 0.957149863243103,grad_norm: 0.807643598728465, iteration: 368931
loss: 1.0317394733428955,grad_norm: 0.8009353465829832, iteration: 368932
loss: 0.9661375880241394,grad_norm: 0.8885176429044722, iteration: 368933
loss: 0.9951720237731934,grad_norm: 0.9698135143716619, iteration: 368934
loss: 0.9930233955383301,grad_norm: 0.9999990910262154, iteration: 368935
loss: 1.0078288316726685,grad_norm: 0.9704776272438174, iteration: 368936
loss: 0.9956172108650208,grad_norm: 0.8315753504165179, iteration: 368937
loss: 1.0250004529953003,grad_norm: 0.9999991558636059, iteration: 368938
loss: 1.0278276205062866,grad_norm: 0.9999991403009421, iteration: 368939
loss: 1.0059508085250854,grad_norm: 0.9377003859979433, iteration: 368940
loss: 0.9925079345703125,grad_norm: 0.999999014877591, iteration: 368941
loss: 0.9629262089729309,grad_norm: 0.7718598894562891, iteration: 368942
loss: 1.0303947925567627,grad_norm: 0.9999991704246729, iteration: 368943
loss: 1.0276777744293213,grad_norm: 0.7934710426029712, iteration: 368944
loss: 0.9857704639434814,grad_norm: 0.7168329563387931, iteration: 368945
loss: 1.0135600566864014,grad_norm: 0.7984893070321359, iteration: 368946
loss: 1.0082473754882812,grad_norm: 0.7823666148168293, iteration: 368947
loss: 0.940554141998291,grad_norm: 0.9223520917475164, iteration: 368948
loss: 1.0689159631729126,grad_norm: 0.9999999541947033, iteration: 368949
loss: 1.0967607498168945,grad_norm: 0.9999998023293694, iteration: 368950
loss: 1.0062452554702759,grad_norm: 0.7627698263796788, iteration: 368951
loss: 0.9946388006210327,grad_norm: 0.6577595725468967, iteration: 368952
loss: 0.997516393661499,grad_norm: 0.699736535881273, iteration: 368953
loss: 1.051517367362976,grad_norm: 0.9999998617256296, iteration: 368954
loss: 1.082277536392212,grad_norm: 0.8581738411894909, iteration: 368955
loss: 1.010414481163025,grad_norm: 0.9999991429551629, iteration: 368956
loss: 1.001482367515564,grad_norm: 0.7736184872377927, iteration: 368957
loss: 0.9889197945594788,grad_norm: 0.824131481431808, iteration: 368958
loss: 0.9916325211524963,grad_norm: 0.7828155270736544, iteration: 368959
loss: 1.077720284461975,grad_norm: 0.9999992597735224, iteration: 368960
loss: 1.0385243892669678,grad_norm: 0.736596853809376, iteration: 368961
loss: 1.0105640888214111,grad_norm: 0.999999117690934, iteration: 368962
loss: 1.059809684753418,grad_norm: 0.9999991634089961, iteration: 368963
loss: 1.1136040687561035,grad_norm: 0.9999995103018142, iteration: 368964
loss: 1.0072604417800903,grad_norm: 0.8980713786680847, iteration: 368965
loss: 1.0313310623168945,grad_norm: 0.8192341226553318, iteration: 368966
loss: 1.0208730697631836,grad_norm: 0.8300444785685797, iteration: 368967
loss: 0.9994707107543945,grad_norm: 0.7624839304906154, iteration: 368968
loss: 1.1783612966537476,grad_norm: 0.9999990914274937, iteration: 368969
loss: 1.0078996419906616,grad_norm: 0.696420653232939, iteration: 368970
loss: 1.0353134870529175,grad_norm: 0.9053039842003213, iteration: 368971
loss: 1.025040626525879,grad_norm: 0.9351798602589102, iteration: 368972
loss: 0.9686971306800842,grad_norm: 0.9245040033849746, iteration: 368973
loss: 1.0093727111816406,grad_norm: 0.7239072628800243, iteration: 368974
loss: 1.010892391204834,grad_norm: 0.820887289608695, iteration: 368975
loss: 1.0369088649749756,grad_norm: 0.815598456240022, iteration: 368976
loss: 1.0021440982818604,grad_norm: 0.81485436161375, iteration: 368977
loss: 0.9907562732696533,grad_norm: 0.7695998928234786, iteration: 368978
loss: 1.0580049753189087,grad_norm: 0.999999480614191, iteration: 368979
loss: 1.054282546043396,grad_norm: 0.9999991346399602, iteration: 368980
loss: 1.0018017292022705,grad_norm: 0.7725416644967944, iteration: 368981
loss: 0.9890220165252686,grad_norm: 0.7688468019121216, iteration: 368982
loss: 0.9771724939346313,grad_norm: 0.9999990564005309, iteration: 368983
loss: 1.004804253578186,grad_norm: 0.9999995557853939, iteration: 368984
loss: 1.2068846225738525,grad_norm: 0.9999992675144868, iteration: 368985
loss: 1.047297716140747,grad_norm: 0.9999999556310233, iteration: 368986
loss: 1.077749490737915,grad_norm: 0.9999998687908442, iteration: 368987
loss: 1.0508246421813965,grad_norm: 0.9999990655987074, iteration: 368988
loss: 1.0217946767807007,grad_norm: 0.7378403424993261, iteration: 368989
loss: 0.9985957145690918,grad_norm: 0.9200708163851046, iteration: 368990
loss: 1.0667694807052612,grad_norm: 0.9999991995716132, iteration: 368991
loss: 1.0126219987869263,grad_norm: 0.9414720907590767, iteration: 368992
loss: 0.9833829998970032,grad_norm: 0.6874829856826487, iteration: 368993
loss: 1.0102219581604004,grad_norm: 0.6909457440289537, iteration: 368994
loss: 1.0695970058441162,grad_norm: 0.7388521259435131, iteration: 368995
loss: 1.058696985244751,grad_norm: 0.999999812891323, iteration: 368996
loss: 1.1192914247512817,grad_norm: 0.9999995014448299, iteration: 368997
loss: 1.0032740831375122,grad_norm: 0.7828550379229303, iteration: 368998
loss: 1.061574935913086,grad_norm: 0.949587027507533, iteration: 368999
loss: 0.9985827207565308,grad_norm: 0.6681655302334607, iteration: 369000
loss: 1.0232328176498413,grad_norm: 0.9222893896514718, iteration: 369001
loss: 0.9752961993217468,grad_norm: 0.8458225932850552, iteration: 369002
loss: 1.089586615562439,grad_norm: 0.9999998490770708, iteration: 369003
loss: 1.1850876808166504,grad_norm: 0.9999999274998967, iteration: 369004
loss: 1.0323771238327026,grad_norm: 0.9556112199312892, iteration: 369005
loss: 1.009016513824463,grad_norm: 0.9999995250770316, iteration: 369006
loss: 1.0880645513534546,grad_norm: 0.9999992390326566, iteration: 369007
loss: 1.0240309238433838,grad_norm: 0.8879246602051455, iteration: 369008
loss: 1.0815643072128296,grad_norm: 0.9999998544470824, iteration: 369009
loss: 0.9886288046836853,grad_norm: 0.8783222768916117, iteration: 369010
loss: 1.0026271343231201,grad_norm: 0.7726132731125319, iteration: 369011
loss: 1.1326223611831665,grad_norm: 0.9999994974654657, iteration: 369012
loss: 1.066049575805664,grad_norm: 0.9999991014627788, iteration: 369013
loss: 1.0790618658065796,grad_norm: 0.9304450327221159, iteration: 369014
loss: 1.020037293434143,grad_norm: 0.9624142073354354, iteration: 369015
loss: 1.0725972652435303,grad_norm: 0.9999998676002341, iteration: 369016
loss: 0.9997767210006714,grad_norm: 0.7199068585865361, iteration: 369017
loss: 1.0972836017608643,grad_norm: 0.7759601387942184, iteration: 369018
loss: 1.0867427587509155,grad_norm: 0.999999538559472, iteration: 369019
loss: 1.0496494770050049,grad_norm: 0.7825246278080925, iteration: 369020
loss: 1.0111607313156128,grad_norm: 0.8579615722597468, iteration: 369021
loss: 1.0161817073822021,grad_norm: 0.7613898127161094, iteration: 369022
loss: 1.0225090980529785,grad_norm: 0.9999998894196749, iteration: 369023
loss: 1.012103796005249,grad_norm: 0.9493829014785451, iteration: 369024
loss: 1.1121692657470703,grad_norm: 0.9999995406306127, iteration: 369025
loss: 0.9510053396224976,grad_norm: 0.8466195943885656, iteration: 369026
loss: 1.039297342300415,grad_norm: 0.8699711491026817, iteration: 369027
loss: 0.9763341546058655,grad_norm: 0.6714559060950616, iteration: 369028
loss: 0.9973869919776917,grad_norm: 0.6442905328290542, iteration: 369029
loss: 1.0115917921066284,grad_norm: 0.7968080603349185, iteration: 369030
loss: 1.021445393562317,grad_norm: 0.9999990453288469, iteration: 369031
loss: 1.0278387069702148,grad_norm: 0.8476836055239173, iteration: 369032
loss: 0.982300877571106,grad_norm: 0.7796526651349781, iteration: 369033
loss: 1.0003201961517334,grad_norm: 0.8757605907033834, iteration: 369034
loss: 0.987786591053009,grad_norm: 0.8685319809241556, iteration: 369035
loss: 0.9598290324211121,grad_norm: 0.9999996555287277, iteration: 369036
loss: 0.9970700144767761,grad_norm: 0.8379449181934987, iteration: 369037
loss: 1.0006990432739258,grad_norm: 0.7603702586294484, iteration: 369038
loss: 1.0598703622817993,grad_norm: 0.9999999288604705, iteration: 369039
loss: 1.0120797157287598,grad_norm: 0.8055001633505596, iteration: 369040
loss: 1.0303452014923096,grad_norm: 0.8004880559007779, iteration: 369041
loss: 0.9989787340164185,grad_norm: 0.9116501705560558, iteration: 369042
loss: 1.0555142164230347,grad_norm: 0.9999990773392813, iteration: 369043
loss: 1.0137584209442139,grad_norm: 0.8867196903337331, iteration: 369044
loss: 1.033545732498169,grad_norm: 0.9999999137389393, iteration: 369045
loss: 1.0286381244659424,grad_norm: 0.8852992872854106, iteration: 369046
loss: 1.0473169088363647,grad_norm: 0.9999997542612119, iteration: 369047
loss: 0.969577968120575,grad_norm: 0.8653104313460527, iteration: 369048
loss: 1.106911540031433,grad_norm: 0.9999996362110645, iteration: 369049
loss: 1.0083391666412354,grad_norm: 0.999999184561666, iteration: 369050
loss: 1.0651516914367676,grad_norm: 0.959126829795611, iteration: 369051
loss: 1.0475945472717285,grad_norm: 0.6959063078666882, iteration: 369052
loss: 1.0356268882751465,grad_norm: 0.9999996312404533, iteration: 369053
loss: 1.0162755250930786,grad_norm: 0.7122679406615144, iteration: 369054
loss: 1.0884480476379395,grad_norm: 0.9999997035369549, iteration: 369055
loss: 1.1324421167373657,grad_norm: 0.9999996628083873, iteration: 369056
loss: 1.022157073020935,grad_norm: 0.7049931694542102, iteration: 369057
loss: 0.9849190711975098,grad_norm: 0.8172197108496146, iteration: 369058
loss: 1.024134874343872,grad_norm: 0.7753954863887502, iteration: 369059
loss: 1.0540263652801514,grad_norm: 0.9999996149658151, iteration: 369060
loss: 1.0105960369110107,grad_norm: 0.99999947185522, iteration: 369061
loss: 1.0292712450027466,grad_norm: 0.9999989349132307, iteration: 369062
loss: 1.0392204523086548,grad_norm: 0.9467283682995286, iteration: 369063
loss: 1.0144076347351074,grad_norm: 0.9999996714290863, iteration: 369064
loss: 0.9763115048408508,grad_norm: 0.860883414178264, iteration: 369065
loss: 1.0036810636520386,grad_norm: 0.9999990500548658, iteration: 369066
loss: 0.9471124410629272,grad_norm: 0.8715366410685821, iteration: 369067
loss: 1.0247082710266113,grad_norm: 0.862497314704971, iteration: 369068
loss: 1.0287635326385498,grad_norm: 0.9999998923965763, iteration: 369069
loss: 1.0566920042037964,grad_norm: 0.8666609326371297, iteration: 369070
loss: 1.2039891481399536,grad_norm: 0.999999509408235, iteration: 369071
loss: 1.0686345100402832,grad_norm: 0.9999998473167137, iteration: 369072
loss: 1.2027099132537842,grad_norm: 0.9999991131090226, iteration: 369073
loss: 1.0718201398849487,grad_norm: 0.8430417271722095, iteration: 369074
loss: 0.9794227480888367,grad_norm: 0.8315271085766028, iteration: 369075
loss: 1.0017751455307007,grad_norm: 0.7515471604968719, iteration: 369076
loss: 0.9777711629867554,grad_norm: 0.8018904804039061, iteration: 369077
loss: 1.0262802839279175,grad_norm: 0.8214950837691257, iteration: 369078
loss: 1.0139838457107544,grad_norm: 0.7931200525602513, iteration: 369079
loss: 0.9905003309249878,grad_norm: 0.9884249523329055, iteration: 369080
loss: 1.0173451900482178,grad_norm: 0.8402281711453449, iteration: 369081
loss: 1.0030103921890259,grad_norm: 0.759192917805404, iteration: 369082
loss: 1.1386815309524536,grad_norm: 0.9999994941205395, iteration: 369083
loss: 0.9770655035972595,grad_norm: 0.9995688873682322, iteration: 369084
loss: 1.0422552824020386,grad_norm: 0.9999995922628393, iteration: 369085
loss: 1.0117275714874268,grad_norm: 0.8468582375409272, iteration: 369086
loss: 1.0502110719680786,grad_norm: 0.853772170957671, iteration: 369087
loss: 1.0172327756881714,grad_norm: 0.9989027477479318, iteration: 369088
loss: 1.0302406549453735,grad_norm: 0.9999994570783853, iteration: 369089
loss: 1.072258710861206,grad_norm: 0.9999993795592308, iteration: 369090
loss: 1.0242136716842651,grad_norm: 0.7872995837392427, iteration: 369091
loss: 1.031243920326233,grad_norm: 0.952951179595917, iteration: 369092
loss: 0.9945963025093079,grad_norm: 0.9999990293872593, iteration: 369093
loss: 0.9776914715766907,grad_norm: 0.8776597462318586, iteration: 369094
loss: 1.2841755151748657,grad_norm: 0.9999993197970909, iteration: 369095
loss: 1.079298496246338,grad_norm: 0.9999999535635604, iteration: 369096
loss: 1.0930625200271606,grad_norm: 0.9999992615879637, iteration: 369097
loss: 1.0445119142532349,grad_norm: 0.9999998955550548, iteration: 369098
loss: 0.9996110796928406,grad_norm: 0.9999991588249357, iteration: 369099
loss: 1.0321998596191406,grad_norm: 0.991711865041187, iteration: 369100
loss: 1.010706901550293,grad_norm: 0.9999992483081526, iteration: 369101
loss: 1.0160462856292725,grad_norm: 0.8357773516205196, iteration: 369102
loss: 0.9680513143539429,grad_norm: 0.8418387254808942, iteration: 369103
loss: 0.9888831377029419,grad_norm: 0.7885052992421854, iteration: 369104
loss: 1.129068374633789,grad_norm: 0.9999997380574127, iteration: 369105
loss: 0.9906927943229675,grad_norm: 0.9999999898558634, iteration: 369106
loss: 1.012264609336853,grad_norm: 0.9274573724789217, iteration: 369107
loss: 1.0327216386795044,grad_norm: 0.9999999504687432, iteration: 369108
loss: 1.0166993141174316,grad_norm: 0.8271318547181336, iteration: 369109
loss: 0.9705326557159424,grad_norm: 0.7650089274022641, iteration: 369110
loss: 0.9397973418235779,grad_norm: 0.9999991523298637, iteration: 369111
loss: 1.0996208190917969,grad_norm: 0.9999996542179355, iteration: 369112
loss: 0.9582912921905518,grad_norm: 0.751591431622002, iteration: 369113
loss: 1.0129802227020264,grad_norm: 0.9999997227452276, iteration: 369114
loss: 0.9723905920982361,grad_norm: 0.9999993614656476, iteration: 369115
loss: 1.013421893119812,grad_norm: 0.8152755274338596, iteration: 369116
loss: 1.0084391832351685,grad_norm: 1.0000000368164503, iteration: 369117
loss: 0.9885566830635071,grad_norm: 0.8589635236031264, iteration: 369118
loss: 0.9932931065559387,grad_norm: 0.9451104275040322, iteration: 369119
loss: 1.0576527118682861,grad_norm: 0.9999994108243973, iteration: 369120
loss: 1.0436657667160034,grad_norm: 0.999999221900485, iteration: 369121
loss: 1.0186419486999512,grad_norm: 0.9999991407774184, iteration: 369122
loss: 0.9887838363647461,grad_norm: 0.750485440938219, iteration: 369123
loss: 1.0047671794891357,grad_norm: 0.8183601332699852, iteration: 369124
loss: 1.041382908821106,grad_norm: 0.8180972512083541, iteration: 369125
loss: 1.0227488279342651,grad_norm: 0.8586405571594011, iteration: 369126
loss: 1.0033483505249023,grad_norm: 0.6631769163584462, iteration: 369127
loss: 1.0346529483795166,grad_norm: 0.8834776677153209, iteration: 369128
loss: 1.0017414093017578,grad_norm: 0.8508590561556406, iteration: 369129
loss: 1.0302377939224243,grad_norm: 0.9999997919596498, iteration: 369130
loss: 1.0440757274627686,grad_norm: 0.9999997200410804, iteration: 369131
loss: 0.9925889372825623,grad_norm: 0.8410479795309841, iteration: 369132
loss: 1.0009938478469849,grad_norm: 0.8113461296689884, iteration: 369133
loss: 1.0358284711837769,grad_norm: 0.9999990887936873, iteration: 369134
loss: 1.0130873918533325,grad_norm: 0.9999992922786026, iteration: 369135
loss: 1.0150526762008667,grad_norm: 0.7580403862954513, iteration: 369136
loss: 1.0161584615707397,grad_norm: 0.7048974972606037, iteration: 369137
loss: 1.1848148107528687,grad_norm: 0.9999999057721488, iteration: 369138
loss: 0.9852120280265808,grad_norm: 0.9874808600336837, iteration: 369139
loss: 1.0636249780654907,grad_norm: 0.9065562734206712, iteration: 369140
loss: 0.979196310043335,grad_norm: 0.9999991506661033, iteration: 369141
loss: 1.002274513244629,grad_norm: 0.8109751616878077, iteration: 369142
loss: 1.011873722076416,grad_norm: 0.9138312960531655, iteration: 369143
loss: 1.020298957824707,grad_norm: 0.7428482672470823, iteration: 369144
loss: 1.064886212348938,grad_norm: 0.9999992742384333, iteration: 369145
loss: 1.0894277095794678,grad_norm: 0.9222350275869905, iteration: 369146
loss: 1.0038994550704956,grad_norm: 0.9735643476167855, iteration: 369147
loss: 0.9874387383460999,grad_norm: 0.9999990561935871, iteration: 369148
loss: 1.0275115966796875,grad_norm: 0.8397416113881693, iteration: 369149
loss: 1.0166398286819458,grad_norm: 0.7768197318218643, iteration: 369150
loss: 1.027665138244629,grad_norm: 0.8122457870944777, iteration: 369151
loss: 1.0078424215316772,grad_norm: 0.9474361780642194, iteration: 369152
loss: 1.0027198791503906,grad_norm: 0.9588273793697433, iteration: 369153
loss: 1.029304027557373,grad_norm: 0.9999992595679497, iteration: 369154
loss: 1.0105934143066406,grad_norm: 0.9235157148940151, iteration: 369155
loss: 1.1046340465545654,grad_norm: 0.999999159878111, iteration: 369156
loss: 1.022739291191101,grad_norm: 0.7684013631717908, iteration: 369157
loss: 1.129440426826477,grad_norm: 0.8873409281770506, iteration: 369158
loss: 0.9790826439857483,grad_norm: 0.8899199938572435, iteration: 369159
loss: 0.9944956302642822,grad_norm: 0.9338255820307396, iteration: 369160
loss: 0.9947875738143921,grad_norm: 0.9999993746755322, iteration: 369161
loss: 1.0330963134765625,grad_norm: 0.869432104878801, iteration: 369162
loss: 1.0500222444534302,grad_norm: 0.8843479163193213, iteration: 369163
loss: 0.9932506084442139,grad_norm: 0.7882754471657908, iteration: 369164
loss: 0.9931584000587463,grad_norm: 1.000000024344186, iteration: 369165
loss: 1.0210078954696655,grad_norm: 0.7414443595585267, iteration: 369166
loss: 1.1136665344238281,grad_norm: 0.9999991673620177, iteration: 369167
loss: 0.9721190929412842,grad_norm: 0.8857963197128838, iteration: 369168
loss: 1.0469101667404175,grad_norm: 0.999999721647654, iteration: 369169
loss: 1.2499034404754639,grad_norm: 0.9999998187037922, iteration: 369170
loss: 1.0244463682174683,grad_norm: 0.9999996308766738, iteration: 369171
loss: 1.0545846223831177,grad_norm: 0.9999993477947849, iteration: 369172
loss: 0.9764707684516907,grad_norm: 0.9999992285702191, iteration: 369173
loss: 1.0235131978988647,grad_norm: 0.9281970677302174, iteration: 369174
loss: 1.0066598653793335,grad_norm: 0.8970589021894645, iteration: 369175
loss: 1.104078769683838,grad_norm: 0.9999993344784396, iteration: 369176
loss: 0.9837192893028259,grad_norm: 0.6797271534384034, iteration: 369177
loss: 1.107469081878662,grad_norm: 0.9999991775970354, iteration: 369178
loss: 0.9855501055717468,grad_norm: 0.9247976044199367, iteration: 369179
loss: 1.006334900856018,grad_norm: 0.8533537095521434, iteration: 369180
loss: 0.9988209009170532,grad_norm: 0.6755451220433262, iteration: 369181
loss: 1.1248369216918945,grad_norm: 0.9999991498962305, iteration: 369182
loss: 1.1137834787368774,grad_norm: 0.9999996753293993, iteration: 369183
loss: 1.0086532831192017,grad_norm: 0.933844439471934, iteration: 369184
loss: 1.0274451971054077,grad_norm: 0.8454035728198636, iteration: 369185
loss: 1.1260597705841064,grad_norm: 0.999999610367134, iteration: 369186
loss: 1.1234855651855469,grad_norm: 0.9999994206293787, iteration: 369187
loss: 1.0211963653564453,grad_norm: 0.9090080643851873, iteration: 369188
loss: 1.2750245332717896,grad_norm: 0.9999996262078936, iteration: 369189
loss: 1.1336793899536133,grad_norm: 0.9999998804363709, iteration: 369190
loss: 1.0983844995498657,grad_norm: 0.9999990450908713, iteration: 369191
loss: 0.9893046617507935,grad_norm: 0.9141858822911422, iteration: 369192
loss: 1.0778741836547852,grad_norm: 0.9999990109018648, iteration: 369193
loss: 1.1143300533294678,grad_norm: 0.9999993903448968, iteration: 369194
loss: 1.0607548952102661,grad_norm: 0.9722742308052148, iteration: 369195
loss: 1.0228185653686523,grad_norm: 0.9370052747478188, iteration: 369196
loss: 1.0550099611282349,grad_norm: 0.9999992636323287, iteration: 369197
loss: 1.170530080795288,grad_norm: 0.9427709367132965, iteration: 369198
loss: 0.9965460896492004,grad_norm: 0.8128704194045067, iteration: 369199
loss: 0.9972365498542786,grad_norm: 0.9424745170848946, iteration: 369200
loss: 0.972081184387207,grad_norm: 0.8301337795555953, iteration: 369201
loss: 1.1519792079925537,grad_norm: 0.9999999774398379, iteration: 369202
loss: 1.0107940435409546,grad_norm: 0.9999994258144178, iteration: 369203
loss: 1.108238935470581,grad_norm: 0.999999795250691, iteration: 369204
loss: 0.9959109425544739,grad_norm: 0.8961939219997042, iteration: 369205
loss: 0.9839320778846741,grad_norm: 0.6788067964769232, iteration: 369206
loss: 1.0026699304580688,grad_norm: 0.8964117611220863, iteration: 369207
loss: 0.9390660524368286,grad_norm: 0.7834699765865651, iteration: 369208
loss: 0.9991320967674255,grad_norm: 0.9999999073047652, iteration: 369209
loss: 0.9932502508163452,grad_norm: 0.8970969529455406, iteration: 369210
loss: 0.970476508140564,grad_norm: 0.7843701048504641, iteration: 369211
loss: 1.0231667757034302,grad_norm: 0.9999997514171778, iteration: 369212
loss: 1.0042519569396973,grad_norm: 0.7641364341268759, iteration: 369213
loss: 1.0112732648849487,grad_norm: 0.9418737944397073, iteration: 369214
loss: 1.000531792640686,grad_norm: 0.7065592204911718, iteration: 369215
loss: 1.0043102502822876,grad_norm: 0.8878858287045188, iteration: 369216
loss: 1.1187033653259277,grad_norm: 0.9999997297980262, iteration: 369217
loss: 0.9784658551216125,grad_norm: 0.8111495188029656, iteration: 369218
loss: 1.0124489068984985,grad_norm: 0.9999991280834468, iteration: 369219
loss: 1.0378419160842896,grad_norm: 0.8029087958473712, iteration: 369220
loss: 1.05119788646698,grad_norm: 0.9999997941191225, iteration: 369221
loss: 1.0215981006622314,grad_norm: 0.99999923449612, iteration: 369222
loss: 1.0379509925842285,grad_norm: 0.8592630394600785, iteration: 369223
loss: 1.0662057399749756,grad_norm: 0.9999999562358707, iteration: 369224
loss: 1.1851450204849243,grad_norm: 0.9999998834764345, iteration: 369225
loss: 1.0372358560562134,grad_norm: 0.8718247585059813, iteration: 369226
loss: 1.043960690498352,grad_norm: 0.8055857258341301, iteration: 369227
loss: 1.0461097955703735,grad_norm: 0.9999997073286158, iteration: 369228
loss: 1.0611294507980347,grad_norm: 0.9999994269753362, iteration: 369229
loss: 1.0304187536239624,grad_norm: 0.9999994295068518, iteration: 369230
loss: 0.997654914855957,grad_norm: 0.9999991619019097, iteration: 369231
loss: 1.0337141752243042,grad_norm: 0.7975244650627861, iteration: 369232
loss: 1.018872857093811,grad_norm: 0.8904353479995942, iteration: 369233
loss: 0.997247576713562,grad_norm: 0.874376158507224, iteration: 369234
loss: 1.0246161222457886,grad_norm: 0.7928002106120332, iteration: 369235
loss: 1.0485591888427734,grad_norm: 0.9999997447314988, iteration: 369236
loss: 0.9995824694633484,grad_norm: 0.7775682580120458, iteration: 369237
loss: 1.011989712715149,grad_norm: 0.9999990300338717, iteration: 369238
loss: 1.0244947671890259,grad_norm: 0.9999990890518717, iteration: 369239
loss: 0.9923760294914246,grad_norm: 0.8605602251990649, iteration: 369240
loss: 0.9828881621360779,grad_norm: 0.7661953311894845, iteration: 369241
loss: 1.0126726627349854,grad_norm: 0.7995070446510644, iteration: 369242
loss: 1.057411789894104,grad_norm: 0.9999996232455245, iteration: 369243
loss: 1.007100224494934,grad_norm: 0.8649519499282013, iteration: 369244
loss: 0.9984521269798279,grad_norm: 0.7338652188483558, iteration: 369245
loss: 0.9899057745933533,grad_norm: 0.8097056820586941, iteration: 369246
loss: 1.0674327611923218,grad_norm: 0.9999994137814663, iteration: 369247
loss: 1.0224250555038452,grad_norm: 0.790005389824303, iteration: 369248
loss: 1.086173415184021,grad_norm: 0.832735205723071, iteration: 369249
loss: 0.9961548447608948,grad_norm: 0.9999990324676735, iteration: 369250
loss: 1.009146809577942,grad_norm: 0.8814638547497851, iteration: 369251
loss: 0.9891314506530762,grad_norm: 0.7040456118283749, iteration: 369252
loss: 1.0217236280441284,grad_norm: 0.9354440349901735, iteration: 369253
loss: 1.0106947422027588,grad_norm: 0.6479292596962446, iteration: 369254
loss: 1.0287154912948608,grad_norm: 0.9999991007654023, iteration: 369255
loss: 1.017427682876587,grad_norm: 0.8412323169998434, iteration: 369256
loss: 0.9922171831130981,grad_norm: 0.7815626747851344, iteration: 369257
loss: 0.9844214916229248,grad_norm: 0.8056007586142778, iteration: 369258
loss: 0.9794963598251343,grad_norm: 0.8282168329178871, iteration: 369259
loss: 1.019092082977295,grad_norm: 0.6863297498953931, iteration: 369260
loss: 1.1011356115341187,grad_norm: 1.0000000009939238, iteration: 369261
loss: 0.9845964908599854,grad_norm: 0.9011889284495254, iteration: 369262
loss: 1.0074949264526367,grad_norm: 0.8655372963510765, iteration: 369263
loss: 0.9999527335166931,grad_norm: 0.8567374558398979, iteration: 369264
loss: 0.9775922894477844,grad_norm: 0.6427752846007824, iteration: 369265
loss: 0.9947118163108826,grad_norm: 0.9281807205809149, iteration: 369266
loss: 1.015010952949524,grad_norm: 0.9158794546313175, iteration: 369267
loss: 0.9737699627876282,grad_norm: 0.8876218899449875, iteration: 369268
loss: 1.0082734823226929,grad_norm: 0.8417991381935976, iteration: 369269
loss: 1.0035301446914673,grad_norm: 0.8786697610930253, iteration: 369270
loss: 1.004146933555603,grad_norm: 0.7447976503534931, iteration: 369271
loss: 1.0274467468261719,grad_norm: 0.7212724284064828, iteration: 369272
loss: 0.9607008099555969,grad_norm: 0.6067963332931059, iteration: 369273
loss: 0.9947503805160522,grad_norm: 0.8407517377802638, iteration: 369274
loss: 0.998375415802002,grad_norm: 0.744624122302304, iteration: 369275
loss: 1.064651370048523,grad_norm: 0.9999997321824298, iteration: 369276
loss: 1.0231258869171143,grad_norm: 0.7834417014901334, iteration: 369277
loss: 1.0409011840820312,grad_norm: 0.775641481483697, iteration: 369278
loss: 0.9944995641708374,grad_norm: 0.769134893244719, iteration: 369279
loss: 1.0434284210205078,grad_norm: 0.9460613882962696, iteration: 369280
loss: 1.0111461877822876,grad_norm: 0.9732433707041396, iteration: 369281
loss: 1.075348138809204,grad_norm: 0.9999999583961428, iteration: 369282
loss: 1.0606670379638672,grad_norm: 0.8814618348389087, iteration: 369283
loss: 0.9702332615852356,grad_norm: 0.7556350169237004, iteration: 369284
loss: 0.9834032654762268,grad_norm: 0.9999995764668955, iteration: 369285
loss: 1.0288119316101074,grad_norm: 0.847221160158898, iteration: 369286
loss: 0.9959975481033325,grad_norm: 0.8154461823313672, iteration: 369287
loss: 1.0112051963806152,grad_norm: 0.6973170329962884, iteration: 369288
loss: 1.0124154090881348,grad_norm: 0.7713040932182595, iteration: 369289
loss: 1.0267101526260376,grad_norm: 0.7773798399866935, iteration: 369290
loss: 1.0326849222183228,grad_norm: 0.9297800920563292, iteration: 369291
loss: 1.0021913051605225,grad_norm: 0.7060200602025862, iteration: 369292
loss: 0.9555506706237793,grad_norm: 0.7460727129223396, iteration: 369293
loss: 1.0969560146331787,grad_norm: 0.9999999187448546, iteration: 369294
loss: 1.0317598581314087,grad_norm: 0.9105012184299259, iteration: 369295
loss: 1.0251120328903198,grad_norm: 0.7922656942075267, iteration: 369296
loss: 1.0973349809646606,grad_norm: 0.9999998881114226, iteration: 369297
loss: 0.9905568361282349,grad_norm: 0.6873565712175295, iteration: 369298
loss: 1.0274291038513184,grad_norm: 0.9999990292216572, iteration: 369299
loss: 1.026745319366455,grad_norm: 0.9999994143443843, iteration: 369300
loss: 1.0016355514526367,grad_norm: 0.8206126257526999, iteration: 369301
loss: 1.0244967937469482,grad_norm: 0.937759557070497, iteration: 369302
loss: 1.0450247526168823,grad_norm: 0.9999996142208409, iteration: 369303
loss: 1.0263292789459229,grad_norm: 0.8565883666841215, iteration: 369304
loss: 1.0070384740829468,grad_norm: 0.8090148695607511, iteration: 369305
loss: 1.0099854469299316,grad_norm: 0.7335565140001065, iteration: 369306
loss: 1.0156174898147583,grad_norm: 0.782552226389507, iteration: 369307
loss: 1.0105953216552734,grad_norm: 0.8135882895598728, iteration: 369308
loss: 0.9906564950942993,grad_norm: 0.6566727714547397, iteration: 369309
loss: 0.994530439376831,grad_norm: 0.8537337988690051, iteration: 369310
loss: 1.0575807094573975,grad_norm: 0.9405982793752857, iteration: 369311
loss: 0.9980919361114502,grad_norm: 0.8831687202902222, iteration: 369312
loss: 1.1508336067199707,grad_norm: 0.8876541451716718, iteration: 369313
loss: 0.9883719682693481,grad_norm: 0.8213880583794254, iteration: 369314
loss: 1.1187993288040161,grad_norm: 0.999999756260237, iteration: 369315
loss: 0.9430411458015442,grad_norm: 0.7959386524940261, iteration: 369316
loss: 1.008247971534729,grad_norm: 0.9676570036814146, iteration: 369317
loss: 1.0758196115493774,grad_norm: 0.9999991254542954, iteration: 369318
loss: 0.9798810482025146,grad_norm: 0.7448430256879986, iteration: 369319
loss: 1.0199799537658691,grad_norm: 0.9999996255796486, iteration: 369320
loss: 0.994574785232544,grad_norm: 0.7695880583539674, iteration: 369321
loss: 1.000389814376831,grad_norm: 0.7962274479013844, iteration: 369322
loss: 0.9897153377532959,grad_norm: 0.7425813023457942, iteration: 369323
loss: 1.0081671476364136,grad_norm: 0.7385291272831707, iteration: 369324
loss: 1.0277986526489258,grad_norm: 0.8027997967525987, iteration: 369325
loss: 0.999315619468689,grad_norm: 0.6408409819898713, iteration: 369326
loss: 1.099092960357666,grad_norm: 0.7254185800354433, iteration: 369327
loss: 0.9727762341499329,grad_norm: 0.9145415013156322, iteration: 369328
loss: 1.0036009550094604,grad_norm: 0.9999994438054108, iteration: 369329
loss: 0.9991884827613831,grad_norm: 0.7644831259466319, iteration: 369330
loss: 0.9880399703979492,grad_norm: 0.8234118371290569, iteration: 369331
loss: 1.0574721097946167,grad_norm: 0.9999998073827714, iteration: 369332
loss: 0.9712720513343811,grad_norm: 0.8631549340107662, iteration: 369333
loss: 1.0510398149490356,grad_norm: 0.9999994746034503, iteration: 369334
loss: 1.014664888381958,grad_norm: 0.7918939256320601, iteration: 369335
loss: 0.9774060249328613,grad_norm: 0.7737994358688146, iteration: 369336
loss: 1.037163257598877,grad_norm: 0.9937176960284647, iteration: 369337
loss: 0.9785999059677124,grad_norm: 0.999999053347749, iteration: 369338
loss: 0.9910550117492676,grad_norm: 0.8261511038186095, iteration: 369339
loss: 0.991072952747345,grad_norm: 0.8249185980812892, iteration: 369340
loss: 1.0008805990219116,grad_norm: 0.8967446330138128, iteration: 369341
loss: 0.9915105700492859,grad_norm: 0.7555091973056963, iteration: 369342
loss: 0.9414079785346985,grad_norm: 0.8787637574132944, iteration: 369343
loss: 1.0366312265396118,grad_norm: 0.9999997456476745, iteration: 369344
loss: 1.0337746143341064,grad_norm: 0.8264761538598577, iteration: 369345
loss: 1.1093345880508423,grad_norm: 0.9730075328123956, iteration: 369346
loss: 1.0035942792892456,grad_norm: 0.7939737100748664, iteration: 369347
loss: 0.9972139596939087,grad_norm: 0.6721857780297532, iteration: 369348
loss: 0.9974619746208191,grad_norm: 0.7781697858809598, iteration: 369349
loss: 1.001633882522583,grad_norm: 0.9616892495819299, iteration: 369350
loss: 1.0598169565200806,grad_norm: 0.9008479617794461, iteration: 369351
loss: 1.0179733037948608,grad_norm: 0.9999991945588312, iteration: 369352
loss: 1.0223296880722046,grad_norm: 0.7360258068276023, iteration: 369353
loss: 1.0267353057861328,grad_norm: 0.8212834302559271, iteration: 369354
loss: 0.9734113812446594,grad_norm: 0.7563575317026815, iteration: 369355
loss: 0.9861292839050293,grad_norm: 0.9999989813866589, iteration: 369356
loss: 1.0427674055099487,grad_norm: 0.8264042830284104, iteration: 369357
loss: 1.0216223001480103,grad_norm: 0.7893854348585576, iteration: 369358
loss: 1.0201270580291748,grad_norm: 0.6970323812366903, iteration: 369359
loss: 1.00669264793396,grad_norm: 0.6680138762510647, iteration: 369360
loss: 1.0139251947402954,grad_norm: 0.8319343534158576, iteration: 369361
loss: 0.9970390200614929,grad_norm: 0.746391729138833, iteration: 369362
loss: 0.9846809506416321,grad_norm: 0.8495688437541571, iteration: 369363
loss: 0.9765400290489197,grad_norm: 0.9458756983304167, iteration: 369364
loss: 0.993690550327301,grad_norm: 0.9999993287832413, iteration: 369365
loss: 0.9715859293937683,grad_norm: 0.9107973831401226, iteration: 369366
loss: 0.9919652342796326,grad_norm: 0.8405557532979927, iteration: 369367
loss: 0.9870525002479553,grad_norm: 0.88540230468052, iteration: 369368
loss: 1.0334947109222412,grad_norm: 0.9999994598788281, iteration: 369369
loss: 0.9448877573013306,grad_norm: 0.7319865010968406, iteration: 369370
loss: 0.958331823348999,grad_norm: 0.8385687652656969, iteration: 369371
loss: 1.0931860208511353,grad_norm: 0.9512348597626133, iteration: 369372
loss: 0.9818676114082336,grad_norm: 0.7693928178900103, iteration: 369373
loss: 1.011583685874939,grad_norm: 0.9129567561031364, iteration: 369374
loss: 1.0706989765167236,grad_norm: 0.8987101897401849, iteration: 369375
loss: 1.0197895765304565,grad_norm: 0.8170540725908672, iteration: 369376
loss: 1.028540849685669,grad_norm: 0.7726575320327715, iteration: 369377
loss: 1.0013397932052612,grad_norm: 0.7609474949838316, iteration: 369378
loss: 1.0007785558700562,grad_norm: 0.7788384932003243, iteration: 369379
loss: 1.0243570804595947,grad_norm: 0.8232655838179405, iteration: 369380
loss: 0.9930312037467957,grad_norm: 0.8737707729630753, iteration: 369381
loss: 1.02567458152771,grad_norm: 0.8498685579614813, iteration: 369382
loss: 1.0300045013427734,grad_norm: 0.8973127371008057, iteration: 369383
loss: 0.9658986330032349,grad_norm: 0.7906138366894568, iteration: 369384
loss: 1.189012050628662,grad_norm: 0.9999999247448507, iteration: 369385
loss: 1.1632471084594727,grad_norm: 0.9999995187717935, iteration: 369386
loss: 1.0143649578094482,grad_norm: 0.7511328440097703, iteration: 369387
loss: 1.0182512998580933,grad_norm: 0.82760159505946, iteration: 369388
loss: 0.9935531616210938,grad_norm: 0.7255479056871935, iteration: 369389
loss: 0.9983774423599243,grad_norm: 0.8107275893865658, iteration: 369390
loss: 1.002194881439209,grad_norm: 0.8952972664723478, iteration: 369391
loss: 1.0082015991210938,grad_norm: 0.825136786023012, iteration: 369392
loss: 1.0200732946395874,grad_norm: 0.7759961331027372, iteration: 369393
loss: 1.0219967365264893,grad_norm: 0.8148757738615069, iteration: 369394
loss: 0.9830160140991211,grad_norm: 0.9999991347132231, iteration: 369395
loss: 1.0046952962875366,grad_norm: 0.798995064652686, iteration: 369396
loss: 1.0315595865249634,grad_norm: 0.8275260691047711, iteration: 369397
loss: 0.9821797013282776,grad_norm: 0.92233674317769, iteration: 369398
loss: 0.9880636930465698,grad_norm: 0.8859339972449272, iteration: 369399
loss: 1.1464672088623047,grad_norm: 0.9999999321157249, iteration: 369400
loss: 1.0410655736923218,grad_norm: 0.854949981404852, iteration: 369401
loss: 0.9910920858383179,grad_norm: 0.7737173890329615, iteration: 369402
loss: 1.0001786947250366,grad_norm: 0.7852423226799263, iteration: 369403
loss: 0.9545484781265259,grad_norm: 0.8104704500665886, iteration: 369404
loss: 1.0243399143218994,grad_norm: 0.7496854937483489, iteration: 369405
loss: 0.992606520652771,grad_norm: 0.7084887947661204, iteration: 369406
loss: 1.0046576261520386,grad_norm: 0.7571613710252537, iteration: 369407
loss: 0.9918735027313232,grad_norm: 0.7397179429109475, iteration: 369408
loss: 1.0341849327087402,grad_norm: 0.9999989867262619, iteration: 369409
loss: 0.9879016876220703,grad_norm: 0.8027589309631475, iteration: 369410
loss: 1.0042288303375244,grad_norm: 0.8089877011894034, iteration: 369411
loss: 0.9981278777122498,grad_norm: 0.9999997240949765, iteration: 369412
loss: 1.0076792240142822,grad_norm: 0.9999991131460608, iteration: 369413
loss: 1.0042725801467896,grad_norm: 0.8620369651163328, iteration: 369414
loss: 1.0157579183578491,grad_norm: 0.7999951303050189, iteration: 369415
loss: 0.9661582708358765,grad_norm: 0.7833844067521889, iteration: 369416
loss: 0.9769157767295837,grad_norm: 0.6258748246901361, iteration: 369417
loss: 0.9946833252906799,grad_norm: 0.9999993127563032, iteration: 369418
loss: 1.022691249847412,grad_norm: 0.7469626523287458, iteration: 369419
loss: 1.0152255296707153,grad_norm: 0.8369706545264872, iteration: 369420
loss: 1.0582118034362793,grad_norm: 0.9999998949327037, iteration: 369421
loss: 0.9865856766700745,grad_norm: 0.7927463736433948, iteration: 369422
loss: 0.9606279730796814,grad_norm: 0.942455843339688, iteration: 369423
loss: 1.1769728660583496,grad_norm: 0.9999999794082295, iteration: 369424
loss: 1.088150143623352,grad_norm: 0.9999991674428609, iteration: 369425
loss: 0.9572759866714478,grad_norm: 0.7561139566566704, iteration: 369426
loss: 0.9864055514335632,grad_norm: 0.8726026040330149, iteration: 369427
loss: 1.0040055513381958,grad_norm: 0.822779140088492, iteration: 369428
loss: 1.0199800729751587,grad_norm: 0.9018224615322874, iteration: 369429
loss: 0.9798517823219299,grad_norm: 0.8090108682879379, iteration: 369430
loss: 0.991709291934967,grad_norm: 0.8085413143733883, iteration: 369431
loss: 0.9921337366104126,grad_norm: 0.8659551001444981, iteration: 369432
loss: 0.9792036414146423,grad_norm: 0.999999204466007, iteration: 369433
loss: 1.0108442306518555,grad_norm: 0.9999997347177958, iteration: 369434
loss: 1.0390939712524414,grad_norm: 0.9999999499216256, iteration: 369435
loss: 1.0127846002578735,grad_norm: 0.9999992541540199, iteration: 369436
loss: 0.9758849740028381,grad_norm: 0.8991793061253679, iteration: 369437
loss: 1.0333970785140991,grad_norm: 0.9999994015906428, iteration: 369438
loss: 1.0039269924163818,grad_norm: 0.9999996926577167, iteration: 369439
loss: 0.9814488291740417,grad_norm: 0.789745280297824, iteration: 369440
loss: 1.0282224416732788,grad_norm: 0.9999992370939852, iteration: 369441
loss: 0.9945518970489502,grad_norm: 0.8158362095263609, iteration: 369442
loss: 0.9697220325469971,grad_norm: 0.8614993141700763, iteration: 369443
loss: 1.0277130603790283,grad_norm: 0.9331087477351154, iteration: 369444
loss: 1.0001283884048462,grad_norm: 0.9999989703412268, iteration: 369445
loss: 1.0221015214920044,grad_norm: 0.7708892910831501, iteration: 369446
loss: 0.9849012494087219,grad_norm: 0.8072120500511489, iteration: 369447
loss: 1.0335346460342407,grad_norm: 0.6993708377953712, iteration: 369448
loss: 1.0129644870758057,grad_norm: 0.7661996562855092, iteration: 369449
loss: 1.009866714477539,grad_norm: 0.9059522248838973, iteration: 369450
loss: 0.9710363745689392,grad_norm: 0.7749712157481864, iteration: 369451
loss: 0.97960364818573,grad_norm: 0.999999401054437, iteration: 369452
loss: 1.0158964395523071,grad_norm: 0.8717458287643729, iteration: 369453
loss: 0.9144877195358276,grad_norm: 0.8313675876061174, iteration: 369454
loss: 0.9966368675231934,grad_norm: 0.7011909136129146, iteration: 369455
loss: 1.0097805261611938,grad_norm: 0.999998962381111, iteration: 369456
loss: 0.9968905448913574,grad_norm: 0.7597239099347091, iteration: 369457
loss: 1.0514696836471558,grad_norm: 0.8866922268834293, iteration: 369458
loss: 1.0020911693572998,grad_norm: 0.8820011696746901, iteration: 369459
loss: 1.0697484016418457,grad_norm: 0.834899725172375, iteration: 369460
loss: 1.0478296279907227,grad_norm: 0.8480167225250446, iteration: 369461
loss: 1.0234357118606567,grad_norm: 0.783051279745474, iteration: 369462
loss: 1.004686951637268,grad_norm: 0.9999993768086398, iteration: 369463
loss: 1.0016108751296997,grad_norm: 0.77619439805731, iteration: 369464
loss: 1.0061143636703491,grad_norm: 0.8318646411743057, iteration: 369465
loss: 1.0618407726287842,grad_norm: 0.8042418687462635, iteration: 369466
loss: 1.1154143810272217,grad_norm: 0.9025582316812513, iteration: 369467
loss: 0.998760461807251,grad_norm: 0.7181093742629217, iteration: 369468
loss: 0.9767780303955078,grad_norm: 0.8668154824872165, iteration: 369469
loss: 0.9795552492141724,grad_norm: 0.9650538262452385, iteration: 369470
loss: 0.9839048385620117,grad_norm: 0.9999989607779988, iteration: 369471
loss: 1.0062463283538818,grad_norm: 0.9138084295883511, iteration: 369472
loss: 1.038456678390503,grad_norm: 0.8716475128244615, iteration: 369473
loss: 1.0262588262557983,grad_norm: 1.0000000978908363, iteration: 369474
loss: 0.9772484302520752,grad_norm: 0.7550628909048304, iteration: 369475
loss: 0.9754784107208252,grad_norm: 0.7153926047872813, iteration: 369476
loss: 0.9637314677238464,grad_norm: 0.955602130048538, iteration: 369477
loss: 1.0889252424240112,grad_norm: 0.9999998348617021, iteration: 369478
loss: 1.0168009996414185,grad_norm: 0.8526713366746216, iteration: 369479
loss: 1.0289255380630493,grad_norm: 0.7963432011912184, iteration: 369480
loss: 0.9780340194702148,grad_norm: 0.9857809930978962, iteration: 369481
loss: 1.0305968523025513,grad_norm: 0.9305546150253866, iteration: 369482
loss: 0.9986070394515991,grad_norm: 0.8698767220306012, iteration: 369483
loss: 1.002272129058838,grad_norm: 0.8416882665849952, iteration: 369484
loss: 0.9528118371963501,grad_norm: 0.8282015002477912, iteration: 369485
loss: 1.006919026374817,grad_norm: 0.7481345409053611, iteration: 369486
loss: 0.9886097311973572,grad_norm: 0.6801862290461215, iteration: 369487
loss: 1.026149868965149,grad_norm: 0.8805696656959536, iteration: 369488
loss: 0.9616389274597168,grad_norm: 0.7399764604677476, iteration: 369489
loss: 1.0083539485931396,grad_norm: 0.7269992877418648, iteration: 369490
loss: 1.0219013690948486,grad_norm: 0.9090789571884834, iteration: 369491
loss: 0.9839583039283752,grad_norm: 0.6923348283394105, iteration: 369492
loss: 1.023863434791565,grad_norm: 0.9371707566595117, iteration: 369493
loss: 1.0762377977371216,grad_norm: 0.9999994581261594, iteration: 369494
loss: 0.9543737173080444,grad_norm: 0.9011211238293334, iteration: 369495
loss: 0.9882434606552124,grad_norm: 0.6440290310724249, iteration: 369496
loss: 1.0091617107391357,grad_norm: 0.9861322749859812, iteration: 369497
loss: 1.0144530534744263,grad_norm: 0.8362970567990761, iteration: 369498
loss: 1.0302746295928955,grad_norm: 0.7779971472952263, iteration: 369499
loss: 1.0177547931671143,grad_norm: 0.6858272955159087, iteration: 369500
loss: 1.0088578462600708,grad_norm: 0.7616688690401148, iteration: 369501
loss: 1.0327246189117432,grad_norm: 0.7728583788703499, iteration: 369502
loss: 1.0197153091430664,grad_norm: 0.6476323844303384, iteration: 369503
loss: 1.0277434587478638,grad_norm: 0.9494232763105007, iteration: 369504
loss: 0.9851117730140686,grad_norm: 0.792707507353139, iteration: 369505
loss: 1.0310041904449463,grad_norm: 0.999999270947457, iteration: 369506
loss: 1.2224786281585693,grad_norm: 0.9999993541343221, iteration: 369507
loss: 1.0137577056884766,grad_norm: 0.9417797921884531, iteration: 369508
loss: 1.0298855304718018,grad_norm: 0.8849420436851173, iteration: 369509
loss: 1.0046921968460083,grad_norm: 0.8330658313918995, iteration: 369510
loss: 0.9881236553192139,grad_norm: 0.8600672201778329, iteration: 369511
loss: 0.9923956990242004,grad_norm: 0.8048378144132831, iteration: 369512
loss: 1.0421351194381714,grad_norm: 0.9999993856875653, iteration: 369513
loss: 0.9829153418540955,grad_norm: 0.9990699682225869, iteration: 369514
loss: 1.0054632425308228,grad_norm: 0.9363561677200246, iteration: 369515
loss: 1.063759684562683,grad_norm: 0.8951494770569636, iteration: 369516
loss: 0.9933583736419678,grad_norm: 0.7318401743302261, iteration: 369517
loss: 1.0353273153305054,grad_norm: 0.999999192688526, iteration: 369518
loss: 0.9534193873405457,grad_norm: 0.8007785870794295, iteration: 369519
loss: 1.0101675987243652,grad_norm: 0.7905637395544356, iteration: 369520
loss: 1.0216578245162964,grad_norm: 0.8446251728227385, iteration: 369521
loss: 1.0158369541168213,grad_norm: 0.998992946278132, iteration: 369522
loss: 0.9927566647529602,grad_norm: 0.891281940353707, iteration: 369523
loss: 1.0715681314468384,grad_norm: 0.999999093249535, iteration: 369524
loss: 0.9841209053993225,grad_norm: 0.6963100641089398, iteration: 369525
loss: 1.0006403923034668,grad_norm: 0.8114034356027698, iteration: 369526
loss: 1.0881526470184326,grad_norm: 0.9999999551896622, iteration: 369527
loss: 0.988152027130127,grad_norm: 0.8247060615122208, iteration: 369528
loss: 1.0085515975952148,grad_norm: 0.970907630334436, iteration: 369529
loss: 0.9750509262084961,grad_norm: 0.7131649510928055, iteration: 369530
loss: 1.028772234916687,grad_norm: 0.9690584169122185, iteration: 369531
loss: 1.0063246488571167,grad_norm: 0.8276979975681619, iteration: 369532
loss: 0.9839686751365662,grad_norm: 0.9999991147652073, iteration: 369533
loss: 0.9925023317337036,grad_norm: 0.7491356832961148, iteration: 369534
loss: 1.005143404006958,grad_norm: 0.7578550306223238, iteration: 369535
loss: 1.041033387184143,grad_norm: 0.9265522681523695, iteration: 369536
loss: 0.9872931241989136,grad_norm: 0.8156378433317639, iteration: 369537
loss: 1.0418487787246704,grad_norm: 0.8250520805234995, iteration: 369538
loss: 0.9818456172943115,grad_norm: 0.9999992718919803, iteration: 369539
loss: 1.0322593450546265,grad_norm: 0.987368118155524, iteration: 369540
loss: 1.0482945442199707,grad_norm: 0.9999994458321861, iteration: 369541
loss: 1.0644201040267944,grad_norm: 0.7273714179745294, iteration: 369542
loss: 0.9763838052749634,grad_norm: 0.7870167550513185, iteration: 369543
loss: 0.9968870282173157,grad_norm: 0.8699767487512461, iteration: 369544
loss: 1.0677422285079956,grad_norm: 0.7951009124777965, iteration: 369545
loss: 1.0251842737197876,grad_norm: 0.9999990709067109, iteration: 369546
loss: 1.045505404472351,grad_norm: 0.9999993363842349, iteration: 369547
loss: 0.9616422653198242,grad_norm: 0.7933295824878703, iteration: 369548
loss: 0.991187572479248,grad_norm: 0.7449046072995248, iteration: 369549
loss: 0.992196261882782,grad_norm: 0.7673718570721422, iteration: 369550
loss: 0.9566555023193359,grad_norm: 0.8624929415612391, iteration: 369551
loss: 1.0019166469573975,grad_norm: 0.8066546183595481, iteration: 369552
loss: 0.9580429792404175,grad_norm: 0.9999990934936352, iteration: 369553
loss: 1.006049394607544,grad_norm: 0.9999998954564974, iteration: 369554
loss: 1.0040476322174072,grad_norm: 0.9239902846912162, iteration: 369555
loss: 1.0007494688034058,grad_norm: 0.6599909514715758, iteration: 369556
loss: 0.9796227216720581,grad_norm: 0.9023162742944977, iteration: 369557
loss: 1.050809383392334,grad_norm: 0.9999998675724532, iteration: 369558
loss: 0.9355600476264954,grad_norm: 0.8709750688848117, iteration: 369559
loss: 1.0045539140701294,grad_norm: 0.6996433068116761, iteration: 369560
loss: 1.0255788564682007,grad_norm: 0.7594024274390908, iteration: 369561
loss: 1.0059187412261963,grad_norm: 0.9999991518300906, iteration: 369562
loss: 1.0294132232666016,grad_norm: 0.844664169387414, iteration: 369563
loss: 1.063436508178711,grad_norm: 0.9999994164416038, iteration: 369564
loss: 1.0166369676589966,grad_norm: 0.9999991064354762, iteration: 369565
loss: 1.0584185123443604,grad_norm: 0.8297296729404524, iteration: 369566
loss: 0.9980500340461731,grad_norm: 0.7713094186631599, iteration: 369567
loss: 1.004563331604004,grad_norm: 0.7894931526410172, iteration: 369568
loss: 1.03293776512146,grad_norm: 0.7823277464575419, iteration: 369569
loss: 1.0192474126815796,grad_norm: 0.9309709632932834, iteration: 369570
loss: 0.9677420854568481,grad_norm: 0.9999992967152601, iteration: 369571
loss: 0.9831035733222961,grad_norm: 0.8578533165637476, iteration: 369572
loss: 1.0146523714065552,grad_norm: 0.8351244074659863, iteration: 369573
loss: 0.9906629920005798,grad_norm: 0.798871107697693, iteration: 369574
loss: 1.0955442190170288,grad_norm: 0.9999992162688404, iteration: 369575
loss: 1.038874864578247,grad_norm: 0.9999992388256909, iteration: 369576
loss: 1.0050657987594604,grad_norm: 0.8328654159240717, iteration: 369577
loss: 0.9753345251083374,grad_norm: 0.7712715461086892, iteration: 369578
loss: 0.9967990517616272,grad_norm: 0.8251065736240983, iteration: 369579
loss: 1.0107650756835938,grad_norm: 0.9999993666005856, iteration: 369580
loss: 1.290578842163086,grad_norm: 1.0000000153694109, iteration: 369581
loss: 1.0415949821472168,grad_norm: 0.7722034744045433, iteration: 369582
loss: 1.034403920173645,grad_norm: 0.9999996424378166, iteration: 369583
loss: 0.9737741351127625,grad_norm: 0.908228794315008, iteration: 369584
loss: 0.9679297804832458,grad_norm: 0.858589236411601, iteration: 369585
loss: 0.9808784127235413,grad_norm: 0.7681923943597643, iteration: 369586
loss: 1.0451951026916504,grad_norm: 0.9999993908917895, iteration: 369587
loss: 0.9817867875099182,grad_norm: 0.8416669229655269, iteration: 369588
loss: 1.0493661165237427,grad_norm: 0.9999997603924472, iteration: 369589
loss: 1.0021265745162964,grad_norm: 0.7800456881546148, iteration: 369590
loss: 1.0084892511367798,grad_norm: 0.909843067489782, iteration: 369591
loss: 1.0241057872772217,grad_norm: 0.9998086121803079, iteration: 369592
loss: 1.0170072317123413,grad_norm: 0.9479046285210323, iteration: 369593
loss: 0.9694738388061523,grad_norm: 0.7415441346598499, iteration: 369594
loss: 1.0784739255905151,grad_norm: 0.9999992341626942, iteration: 369595
loss: 0.996696412563324,grad_norm: 0.8840682142905243, iteration: 369596
loss: 0.9847339987754822,grad_norm: 0.8207762139709479, iteration: 369597
loss: 0.9971269965171814,grad_norm: 0.726265325745792, iteration: 369598
loss: 0.9923251867294312,grad_norm: 0.6495447163153502, iteration: 369599
loss: 0.994539737701416,grad_norm: 0.9966068860827622, iteration: 369600
loss: 1.0469976663589478,grad_norm: 0.9999991445650527, iteration: 369601
loss: 0.9992310404777527,grad_norm: 0.8668384940984469, iteration: 369602
loss: 0.9561578035354614,grad_norm: 0.7286950442958691, iteration: 369603
loss: 1.0454801321029663,grad_norm: 0.9368067063036813, iteration: 369604
loss: 1.008235216140747,grad_norm: 0.9335539736329819, iteration: 369605
loss: 1.1236642599105835,grad_norm: 0.7866762575587624, iteration: 369606
loss: 1.0315814018249512,grad_norm: 0.8407093627752472, iteration: 369607
loss: 0.9815239310264587,grad_norm: 0.9407174652068552, iteration: 369608
loss: 0.9930894374847412,grad_norm: 0.9515692049825148, iteration: 369609
loss: 1.024425983428955,grad_norm: 0.7492611830484003, iteration: 369610
loss: 1.0329033136367798,grad_norm: 0.7457504928310654, iteration: 369611
loss: 0.9808630347251892,grad_norm: 0.8187422293095198, iteration: 369612
loss: 1.0420631170272827,grad_norm: 0.8020118511230305, iteration: 369613
loss: 0.9809192419052124,grad_norm: 0.788984088895138, iteration: 369614
loss: 1.0195118188858032,grad_norm: 0.6583603903428706, iteration: 369615
loss: 0.9893373847007751,grad_norm: 0.9999995331202144, iteration: 369616
loss: 0.9800240397453308,grad_norm: 0.9999995779851221, iteration: 369617
loss: 1.030216097831726,grad_norm: 0.8062318199925254, iteration: 369618
loss: 0.9695447087287903,grad_norm: 0.8744658634134908, iteration: 369619
loss: 1.0461901426315308,grad_norm: 0.9999993598023219, iteration: 369620
loss: 0.9791586399078369,grad_norm: 0.9999997787837087, iteration: 369621
loss: 0.9973435997962952,grad_norm: 0.9071103254452862, iteration: 369622
loss: 0.9951246380805969,grad_norm: 0.8238219162212053, iteration: 369623
loss: 1.0118846893310547,grad_norm: 0.7866956106632592, iteration: 369624
loss: 1.0081464052200317,grad_norm: 0.7223652541959827, iteration: 369625
loss: 1.0027074813842773,grad_norm: 0.8902152421396621, iteration: 369626
loss: 0.9848496317863464,grad_norm: 0.9999991316210349, iteration: 369627
loss: 1.0058177709579468,grad_norm: 0.8075410294821181, iteration: 369628
loss: 0.9975243806838989,grad_norm: 0.9317910741735578, iteration: 369629
loss: 0.9971624612808228,grad_norm: 0.6924676694518977, iteration: 369630
loss: 1.0696502923965454,grad_norm: 0.7438989548125918, iteration: 369631
loss: 0.9794861674308777,grad_norm: 0.999999924912852, iteration: 369632
loss: 0.9822334051132202,grad_norm: 0.790594791052451, iteration: 369633
loss: 1.0263937711715698,grad_norm: 0.79625588160872, iteration: 369634
loss: 1.0625423192977905,grad_norm: 0.9315397352669376, iteration: 369635
loss: 1.0017871856689453,grad_norm: 0.7201401743513731, iteration: 369636
loss: 1.083032250404358,grad_norm: 0.9999999866479304, iteration: 369637
loss: 0.9727033972740173,grad_norm: 0.9999998939005703, iteration: 369638
loss: 0.9863656163215637,grad_norm: 0.7150579744834797, iteration: 369639
loss: 1.029829740524292,grad_norm: 0.9999995878718836, iteration: 369640
loss: 1.0111151933670044,grad_norm: 0.9999990830622009, iteration: 369641
loss: 0.9904659390449524,grad_norm: 0.8087903277219942, iteration: 369642
loss: 1.020599603652954,grad_norm: 0.7803431325984107, iteration: 369643
loss: 1.0067874193191528,grad_norm: 0.8357625240264616, iteration: 369644
loss: 1.013146162033081,grad_norm: 0.9374974649938289, iteration: 369645
loss: 0.993550181388855,grad_norm: 0.7692719021996046, iteration: 369646
loss: 0.9789235591888428,grad_norm: 0.8758134181070678, iteration: 369647
loss: 1.0516505241394043,grad_norm: 0.9999996715469749, iteration: 369648
loss: 1.0441139936447144,grad_norm: 0.9999990611629741, iteration: 369649
loss: 1.0014513731002808,grad_norm: 0.818947763290954, iteration: 369650
loss: 0.9834386706352234,grad_norm: 0.7553075365217145, iteration: 369651
loss: 1.0198721885681152,grad_norm: 0.7647797033837426, iteration: 369652
loss: 0.9894213080406189,grad_norm: 0.7502008041387433, iteration: 369653
loss: 0.9759924411773682,grad_norm: 0.9489765087415516, iteration: 369654
loss: 1.0184991359710693,grad_norm: 0.6908954980984511, iteration: 369655
loss: 0.9948044419288635,grad_norm: 0.7332630741338179, iteration: 369656
loss: 1.0599846839904785,grad_norm: 0.9999995008337107, iteration: 369657
loss: 1.022315263748169,grad_norm: 0.7576615820540505, iteration: 369658
loss: 1.0480966567993164,grad_norm: 0.9164636432802524, iteration: 369659
loss: 0.9853129982948303,grad_norm: 0.6453740770524801, iteration: 369660
loss: 1.009331464767456,grad_norm: 0.8809728658188364, iteration: 369661
loss: 1.1549009084701538,grad_norm: 0.9999999794738099, iteration: 369662
loss: 1.0254391431808472,grad_norm: 0.9999990755417443, iteration: 369663
loss: 0.9784224629402161,grad_norm: 0.7920375541282924, iteration: 369664
loss: 1.011946678161621,grad_norm: 0.8626479447832387, iteration: 369665
loss: 1.0107409954071045,grad_norm: 0.9331343358525813, iteration: 369666
loss: 1.0241845846176147,grad_norm: 0.7985093052396609, iteration: 369667
loss: 1.021383285522461,grad_norm: 0.9999997008088497, iteration: 369668
loss: 1.0659101009368896,grad_norm: 0.9999993326920541, iteration: 369669
loss: 1.0256444215774536,grad_norm: 0.7875117241796081, iteration: 369670
loss: 0.9986888766288757,grad_norm: 0.7885861265094863, iteration: 369671
loss: 1.0224529504776,grad_norm: 0.7616384156460008, iteration: 369672
loss: 0.9972963333129883,grad_norm: 0.8093146707819131, iteration: 369673
loss: 1.0396078824996948,grad_norm: 0.8756175566014445, iteration: 369674
loss: 1.0034282207489014,grad_norm: 0.8325447747409392, iteration: 369675
loss: 0.9815802574157715,grad_norm: 0.8123698349829647, iteration: 369676
loss: 1.014086365699768,grad_norm: 0.7637508886245892, iteration: 369677
loss: 1.0061677694320679,grad_norm: 0.9999997547127952, iteration: 369678
loss: 1.0335856676101685,grad_norm: 0.8375753048888857, iteration: 369679
loss: 1.3381026983261108,grad_norm: 0.9999998936177242, iteration: 369680
loss: 0.9716756343841553,grad_norm: 0.9957776313567112, iteration: 369681
loss: 1.0119223594665527,grad_norm: 0.6587043495876382, iteration: 369682
loss: 1.013654351234436,grad_norm: 0.9534576993049596, iteration: 369683
loss: 1.1680846214294434,grad_norm: 0.8920393128032357, iteration: 369684
loss: 0.9704819321632385,grad_norm: 0.835021870783166, iteration: 369685
loss: 1.0012086629867554,grad_norm: 0.6570526697477369, iteration: 369686
loss: 1.077648639678955,grad_norm: 0.9999998883821575, iteration: 369687
loss: 0.990024209022522,grad_norm: 0.743870099230181, iteration: 369688
loss: 1.0187084674835205,grad_norm: 0.8244153255473511, iteration: 369689
loss: 0.9637769460678101,grad_norm: 0.7491621378940789, iteration: 369690
loss: 1.1163939237594604,grad_norm: 0.9999992497124538, iteration: 369691
loss: 0.9830256104469299,grad_norm: 0.7728025761703776, iteration: 369692
loss: 1.049458384513855,grad_norm: 0.9208726216091637, iteration: 369693
loss: 1.0656702518463135,grad_norm: 0.9999999437721803, iteration: 369694
loss: 0.9938869476318359,grad_norm: 0.759306107045098, iteration: 369695
loss: 0.9585205912590027,grad_norm: 0.8982344152796069, iteration: 369696
loss: 0.9558342099189758,grad_norm: 0.842350164519649, iteration: 369697
loss: 0.9785584211349487,grad_norm: 0.8538398223419038, iteration: 369698
loss: 0.979070782661438,grad_norm: 0.8848818299328376, iteration: 369699
loss: 1.0289033651351929,grad_norm: 0.9999994084140276, iteration: 369700
loss: 0.9983572363853455,grad_norm: 0.9999997053540431, iteration: 369701
loss: 1.0600565671920776,grad_norm: 0.9197182904204794, iteration: 369702
loss: 1.0529170036315918,grad_norm: 0.9999999970204974, iteration: 369703
loss: 1.0717860460281372,grad_norm: 0.9999996539775152, iteration: 369704
loss: 1.0335193872451782,grad_norm: 0.813263074294127, iteration: 369705
loss: 0.9746144413948059,grad_norm: 0.7913791607920208, iteration: 369706
loss: 1.0396525859832764,grad_norm: 0.8853703074963966, iteration: 369707
loss: 0.9873852729797363,grad_norm: 0.9999997342784097, iteration: 369708
loss: 0.9633615016937256,grad_norm: 0.8031285172830461, iteration: 369709
loss: 0.9622800946235657,grad_norm: 0.8423315092137369, iteration: 369710
loss: 1.2155358791351318,grad_norm: 0.9310616715955163, iteration: 369711
loss: 1.0173938274383545,grad_norm: 0.822711983055261, iteration: 369712
loss: 0.997758150100708,grad_norm: 0.7410758664352302, iteration: 369713
loss: 0.9911832809448242,grad_norm: 0.9169537832438884, iteration: 369714
loss: 0.9913819432258606,grad_norm: 0.819871982368914, iteration: 369715
loss: 1.0490132570266724,grad_norm: 0.9999994642840805, iteration: 369716
loss: 0.9797561764717102,grad_norm: 0.8148227125704506, iteration: 369717
loss: 1.002598762512207,grad_norm: 0.8426694877170775, iteration: 369718
loss: 0.9426851868629456,grad_norm: 0.9999990302204073, iteration: 369719
loss: 1.016303300857544,grad_norm: 0.7737266361015664, iteration: 369720
loss: 1.00324547290802,grad_norm: 0.8768124068928561, iteration: 369721
loss: 1.0659747123718262,grad_norm: 0.9999998810048225, iteration: 369722
loss: 1.069271445274353,grad_norm: 0.753728938128871, iteration: 369723
loss: 0.9713035821914673,grad_norm: 0.9695540488440844, iteration: 369724
loss: 1.0103389024734497,grad_norm: 0.8580388909296419, iteration: 369725
loss: 0.9789631962776184,grad_norm: 0.8864674635054819, iteration: 369726
loss: 1.0386950969696045,grad_norm: 0.8103917002497245, iteration: 369727
loss: 1.0038989782333374,grad_norm: 0.8560804130356688, iteration: 369728
loss: 1.0041152238845825,grad_norm: 0.925768793300089, iteration: 369729
loss: 0.96053147315979,grad_norm: 0.9104280741660845, iteration: 369730
loss: 1.052035927772522,grad_norm: 0.9999992749575759, iteration: 369731
loss: 0.9642495512962341,grad_norm: 0.7247069542524854, iteration: 369732
loss: 1.014446496963501,grad_norm: 0.7085807098830286, iteration: 369733
loss: 0.9756425023078918,grad_norm: 0.670478871794699, iteration: 369734
loss: 1.0689764022827148,grad_norm: 0.9999997436351423, iteration: 369735
loss: 0.9943864941596985,grad_norm: 0.7147855522727965, iteration: 369736
loss: 1.0065479278564453,grad_norm: 0.7393672563270322, iteration: 369737
loss: 0.9593275785446167,grad_norm: 0.8701216950810204, iteration: 369738
loss: 0.9935645461082458,grad_norm: 0.8159305224772947, iteration: 369739
loss: 1.0441968441009521,grad_norm: 0.9999990878738556, iteration: 369740
loss: 1.0168359279632568,grad_norm: 0.7904668407609365, iteration: 369741
loss: 1.0092501640319824,grad_norm: 0.9419687618066718, iteration: 369742
loss: 1.0668067932128906,grad_norm: 0.9999991356046386, iteration: 369743
loss: 1.0107728242874146,grad_norm: 0.9371247451059299, iteration: 369744
loss: 1.0968612432479858,grad_norm: 0.9999993560787831, iteration: 369745
loss: 1.0122801065444946,grad_norm: 0.7678399606956617, iteration: 369746
loss: 1.0032012462615967,grad_norm: 0.8781398642487624, iteration: 369747
loss: 0.9820950031280518,grad_norm: 0.799603677383084, iteration: 369748
loss: 1.043967843055725,grad_norm: 0.6978296157635544, iteration: 369749
loss: 1.0286353826522827,grad_norm: 0.7528168452780628, iteration: 369750
loss: 1.026670217514038,grad_norm: 0.8666292162207543, iteration: 369751
loss: 0.9715697169303894,grad_norm: 0.985972230821216, iteration: 369752
loss: 0.9600236415863037,grad_norm: 0.85477333049318, iteration: 369753
loss: 1.0642505884170532,grad_norm: 0.912132956795109, iteration: 369754
loss: 1.0265027284622192,grad_norm: 0.9999998074237484, iteration: 369755
loss: 1.0135271549224854,grad_norm: 0.8755052054969514, iteration: 369756
loss: 0.9551620483398438,grad_norm: 0.8234839822768385, iteration: 369757
loss: 1.0471644401550293,grad_norm: 0.999999900631267, iteration: 369758
loss: 1.0965014696121216,grad_norm: 0.9189695502525015, iteration: 369759
loss: 1.1109044551849365,grad_norm: 0.8754627544024665, iteration: 369760
loss: 0.9617248773574829,grad_norm: 0.7687537346027569, iteration: 369761
loss: 0.9948787093162537,grad_norm: 0.7742947128465645, iteration: 369762
loss: 0.9648873805999756,grad_norm: 0.8296916558140968, iteration: 369763
loss: 1.0241312980651855,grad_norm: 0.9030545710069687, iteration: 369764
loss: 1.0017770528793335,grad_norm: 0.97357887379855, iteration: 369765
loss: 0.9907907247543335,grad_norm: 0.829634356380161, iteration: 369766
loss: 0.9789546132087708,grad_norm: 0.8149437293301898, iteration: 369767
loss: 0.9714772701263428,grad_norm: 0.7707778004151181, iteration: 369768
loss: 1.0008565187454224,grad_norm: 0.8068490118956353, iteration: 369769
loss: 1.0276700258255005,grad_norm: 0.8705728528449279, iteration: 369770
loss: 1.0050402879714966,grad_norm: 0.7524550386537382, iteration: 369771
loss: 0.990505039691925,grad_norm: 0.745282007163592, iteration: 369772
loss: 0.9588609337806702,grad_norm: 0.9035815975405782, iteration: 369773
loss: 0.9941646456718445,grad_norm: 0.7893554903318557, iteration: 369774
loss: 0.9998344779014587,grad_norm: 0.9051019494964123, iteration: 369775
loss: 0.9870441555976868,grad_norm: 0.8302748326500337, iteration: 369776
loss: 1.1031478643417358,grad_norm: 0.9447606995052155, iteration: 369777
loss: 0.9745903611183167,grad_norm: 0.9999999513575883, iteration: 369778
loss: 0.9989820718765259,grad_norm: 0.875147787850742, iteration: 369779
loss: 0.984478771686554,grad_norm: 0.7776860099815465, iteration: 369780
loss: 1.06838858127594,grad_norm: 1.0000000217380505, iteration: 369781
loss: 0.9963962435722351,grad_norm: 0.9999992383494248, iteration: 369782
loss: 0.9879136681556702,grad_norm: 0.7254525068481453, iteration: 369783
loss: 1.0208297967910767,grad_norm: 0.9457060469221902, iteration: 369784
loss: 0.9711988568305969,grad_norm: 0.9999992531965824, iteration: 369785
loss: 1.0483933687210083,grad_norm: 0.8671254887282583, iteration: 369786
loss: 1.0393399000167847,grad_norm: 0.9999990465628291, iteration: 369787
loss: 0.9921503663063049,grad_norm: 0.8218027672534022, iteration: 369788
loss: 1.060112714767456,grad_norm: 0.718138050006945, iteration: 369789
loss: 1.0314329862594604,grad_norm: 0.6258879543056525, iteration: 369790
loss: 1.0215705633163452,grad_norm: 0.9999992946187527, iteration: 369791
loss: 1.0091161727905273,grad_norm: 0.9600478659402331, iteration: 369792
loss: 1.029672384262085,grad_norm: 0.9060982028317577, iteration: 369793
loss: 0.99214768409729,grad_norm: 0.9999997485914904, iteration: 369794
loss: 0.998357892036438,grad_norm: 0.8733721430422744, iteration: 369795
loss: 1.0244656801223755,grad_norm: 0.8112210619620084, iteration: 369796
loss: 1.0718437433242798,grad_norm: 0.8313131521988534, iteration: 369797
loss: 0.9747705459594727,grad_norm: 0.9999998544608678, iteration: 369798
loss: 1.0416609048843384,grad_norm: 0.8674568119665297, iteration: 369799
loss: 0.9990957379341125,grad_norm: 0.8309821516851655, iteration: 369800
loss: 0.9882019758224487,grad_norm: 0.9317412924629104, iteration: 369801
loss: 1.0400912761688232,grad_norm: 0.9999992799057977, iteration: 369802
loss: 1.0100657939910889,grad_norm: 0.99999997904635, iteration: 369803
loss: 0.9792158007621765,grad_norm: 0.8391320283996255, iteration: 369804
loss: 0.9815123081207275,grad_norm: 0.9896003141828994, iteration: 369805
loss: 1.001883864402771,grad_norm: 0.7976939846354439, iteration: 369806
loss: 1.002843976020813,grad_norm: 0.8584815827216176, iteration: 369807
loss: 1.019997000694275,grad_norm: 0.9999991201841388, iteration: 369808
loss: 1.1269729137420654,grad_norm: 0.8694200405237177, iteration: 369809
loss: 0.9957994818687439,grad_norm: 1.0000000294272515, iteration: 369810
loss: 1.0485777854919434,grad_norm: 0.7651037738453756, iteration: 369811
loss: 1.0028128623962402,grad_norm: 0.8966636814358764, iteration: 369812
loss: 1.0148848295211792,grad_norm: 0.8153177277018638, iteration: 369813
loss: 1.0261807441711426,grad_norm: 0.7639170887267236, iteration: 369814
loss: 0.9729745984077454,grad_norm: 0.8602659846410021, iteration: 369815
loss: 1.005170464515686,grad_norm: 0.8167946354737852, iteration: 369816
loss: 1.0712459087371826,grad_norm: 0.9778516399501661, iteration: 369817
loss: 1.0055729150772095,grad_norm: 0.8661162805903893, iteration: 369818
loss: 1.0253584384918213,grad_norm: 0.68961771231403, iteration: 369819
loss: 0.9916066527366638,grad_norm: 0.9999991147418819, iteration: 369820
loss: 1.0365859270095825,grad_norm: 0.9999992128880941, iteration: 369821
loss: 1.0392035245895386,grad_norm: 0.9999991877041193, iteration: 369822
loss: 1.1018272638320923,grad_norm: 0.7395025547533172, iteration: 369823
loss: 0.9964600205421448,grad_norm: 0.9999996500729452, iteration: 369824
loss: 1.0187984704971313,grad_norm: 0.8992559680896967, iteration: 369825
loss: 1.013903260231018,grad_norm: 0.765399352345585, iteration: 369826
loss: 0.9889881610870361,grad_norm: 0.9159086415401355, iteration: 369827
loss: 1.1283849477767944,grad_norm: 0.9999996356097289, iteration: 369828
loss: 1.0754280090332031,grad_norm: 0.9999992029458893, iteration: 369829
loss: 1.0650019645690918,grad_norm: 0.817134845787383, iteration: 369830
loss: 0.9608070254325867,grad_norm: 0.7755986004327652, iteration: 369831
loss: 1.0073474645614624,grad_norm: 0.5567203957061615, iteration: 369832
loss: 1.0234850645065308,grad_norm: 0.8126007265569865, iteration: 369833
loss: 1.0052865743637085,grad_norm: 0.7883929448611239, iteration: 369834
loss: 0.9917336106300354,grad_norm: 0.761324353105532, iteration: 369835
loss: 1.0180708169937134,grad_norm: 0.8410506395050302, iteration: 369836
loss: 1.0120645761489868,grad_norm: 0.6986104940693237, iteration: 369837
loss: 0.999568521976471,grad_norm: 0.7907289352948168, iteration: 369838
loss: 0.9516041874885559,grad_norm: 0.7660092942703736, iteration: 369839
loss: 1.060072422027588,grad_norm: 0.999999229241024, iteration: 369840
loss: 1.0034761428833008,grad_norm: 0.9999998395355105, iteration: 369841
loss: 0.9591558575630188,grad_norm: 0.770800519020725, iteration: 369842
loss: 1.056232213973999,grad_norm: 0.746053518179373, iteration: 369843
loss: 0.9904569387435913,grad_norm: 0.8286171200280733, iteration: 369844
loss: 1.0124080181121826,grad_norm: 0.9999990775904589, iteration: 369845
loss: 1.0148500204086304,grad_norm: 0.7688649018257876, iteration: 369846
loss: 1.0105853080749512,grad_norm: 0.7795560885006051, iteration: 369847
loss: 0.9588969349861145,grad_norm: 0.7726423189243093, iteration: 369848
loss: 1.043573260307312,grad_norm: 0.8089735722083903, iteration: 369849
loss: 1.0128759145736694,grad_norm: 0.9700676866742537, iteration: 369850
loss: 1.0259838104248047,grad_norm: 0.8352680072486827, iteration: 369851
loss: 0.9661728143692017,grad_norm: 0.7681152164429986, iteration: 369852
loss: 1.0335572957992554,grad_norm: 0.7520029557922334, iteration: 369853
loss: 0.9925571084022522,grad_norm: 0.99999925575244, iteration: 369854
loss: 1.0028616189956665,grad_norm: 0.787147104931728, iteration: 369855
loss: 1.020965337753296,grad_norm: 0.9999995213023373, iteration: 369856
loss: 0.9989956021308899,grad_norm: 0.7243190052759122, iteration: 369857
loss: 1.0075420141220093,grad_norm: 0.8332436416885713, iteration: 369858
loss: 1.0166875123977661,grad_norm: 0.742554539188595, iteration: 369859
loss: 1.0213623046875,grad_norm: 0.9999990653728932, iteration: 369860
loss: 1.0026503801345825,grad_norm: 0.7541342045435308, iteration: 369861
loss: 0.9883477091789246,grad_norm: 0.8507324508604543, iteration: 369862
loss: 1.021546483039856,grad_norm: 0.9150121124102377, iteration: 369863
loss: 1.019881010055542,grad_norm: 0.7065308310970639, iteration: 369864
loss: 1.0187078714370728,grad_norm: 0.9999993216256938, iteration: 369865
loss: 1.0037685632705688,grad_norm: 0.7109094851867592, iteration: 369866
loss: 1.0165293216705322,grad_norm: 0.7549112279879633, iteration: 369867
loss: 1.1495311260223389,grad_norm: 0.999999951999726, iteration: 369868
loss: 0.9965332746505737,grad_norm: 0.9999999187523589, iteration: 369869
loss: 0.9995886087417603,grad_norm: 0.9737729425914358, iteration: 369870
loss: 0.9940129518508911,grad_norm: 0.637845908552487, iteration: 369871
loss: 1.0209702253341675,grad_norm: 0.878985926529762, iteration: 369872
loss: 0.9734345078468323,grad_norm: 0.9999991112799259, iteration: 369873
loss: 1.1217002868652344,grad_norm: 0.999999454495382, iteration: 369874
loss: 1.0221052169799805,grad_norm: 0.7872396692465564, iteration: 369875
loss: 1.0073249340057373,grad_norm: 0.6554178308305783, iteration: 369876
loss: 0.9863385558128357,grad_norm: 0.9370955346881065, iteration: 369877
loss: 1.051104187965393,grad_norm: 0.9999991058615785, iteration: 369878
loss: 0.9986817836761475,grad_norm: 0.9785059414128873, iteration: 369879
loss: 1.0141981840133667,grad_norm: 0.7880364942431636, iteration: 369880
loss: 1.117782711982727,grad_norm: 0.9999992001067751, iteration: 369881
loss: 1.037251591682434,grad_norm: 0.7889721452409758, iteration: 369882
loss: 1.0750327110290527,grad_norm: 0.999999257054292, iteration: 369883
loss: 0.9822208285331726,grad_norm: 0.9999994050897766, iteration: 369884
loss: 0.9829708933830261,grad_norm: 0.7822052375598254, iteration: 369885
loss: 1.0046907663345337,grad_norm: 0.7414915623690952, iteration: 369886
loss: 0.9866568446159363,grad_norm: 0.999999165186993, iteration: 369887
loss: 0.997643232345581,grad_norm: 0.7366662484665748, iteration: 369888
loss: 0.996837317943573,grad_norm: 0.7852620525937813, iteration: 369889
loss: 1.0031044483184814,grad_norm: 0.7461186381708913, iteration: 369890
loss: 1.0106209516525269,grad_norm: 0.792050914400591, iteration: 369891
loss: 1.0214542150497437,grad_norm: 0.8179222209483269, iteration: 369892
loss: 0.992769181728363,grad_norm: 0.8596535783252047, iteration: 369893
loss: 1.0097062587738037,grad_norm: 0.8838837674317892, iteration: 369894
loss: 0.9944786429405212,grad_norm: 0.7197844134682139, iteration: 369895
loss: 0.9711849689483643,grad_norm: 0.8384139855277538, iteration: 369896
loss: 1.027122974395752,grad_norm: 0.8777004618597877, iteration: 369897
loss: 1.0128653049468994,grad_norm: 0.7799225944450516, iteration: 369898
loss: 1.0119343996047974,grad_norm: 0.8814994491103996, iteration: 369899
loss: 0.987389326095581,grad_norm: 0.738103179792619, iteration: 369900
loss: 0.9962819814682007,grad_norm: 0.7824590071612617, iteration: 369901
loss: 1.022422432899475,grad_norm: 0.8566440445178537, iteration: 369902
loss: 1.0897778272628784,grad_norm: 0.9999992118414636, iteration: 369903
loss: 1.0098427534103394,grad_norm: 0.8531902916276571, iteration: 369904
loss: 1.0351899862289429,grad_norm: 0.9999993585278822, iteration: 369905
loss: 1.0000853538513184,grad_norm: 0.7713690219597003, iteration: 369906
loss: 1.000115990638733,grad_norm: 0.7262179134688862, iteration: 369907
loss: 0.9938943982124329,grad_norm: 0.6951209874217746, iteration: 369908
loss: 1.0301761627197266,grad_norm: 0.9999995637692648, iteration: 369909
loss: 1.0679142475128174,grad_norm: 0.9999996954920182, iteration: 369910
loss: 1.0157415866851807,grad_norm: 0.8364231392009189, iteration: 369911
loss: 1.0311936140060425,grad_norm: 0.7622477523058276, iteration: 369912
loss: 0.9954531788825989,grad_norm: 0.8408919847413161, iteration: 369913
loss: 0.9605882167816162,grad_norm: 0.8812922056674279, iteration: 369914
loss: 1.0251718759536743,grad_norm: 0.9758737707066047, iteration: 369915
loss: 1.0811288356781006,grad_norm: 0.8969806469401311, iteration: 369916
loss: 1.0183603763580322,grad_norm: 0.9999990080328539, iteration: 369917
loss: 1.0080416202545166,grad_norm: 0.801658655666613, iteration: 369918
loss: 1.0790843963623047,grad_norm: 0.7641120655158117, iteration: 369919
loss: 1.009626030921936,grad_norm: 0.7847394802518263, iteration: 369920
loss: 0.9973657727241516,grad_norm: 0.8148700534836002, iteration: 369921
loss: 0.9910190105438232,grad_norm: 0.8294169533839911, iteration: 369922
loss: 0.986484706401825,grad_norm: 0.9259201156995917, iteration: 369923
loss: 1.0575441122055054,grad_norm: 1.0000000415662107, iteration: 369924
loss: 1.0419703722000122,grad_norm: 0.9999990978986975, iteration: 369925
loss: 1.0013841390609741,grad_norm: 0.9326450111939656, iteration: 369926
loss: 1.0728188753128052,grad_norm: 0.8499005662387499, iteration: 369927
loss: 1.0009921789169312,grad_norm: 0.9408713669063293, iteration: 369928
loss: 1.0136643648147583,grad_norm: 0.7765003020069056, iteration: 369929
loss: 1.0364004373550415,grad_norm: 0.9999992615805884, iteration: 369930
loss: 0.9663845300674438,grad_norm: 0.7815709176563306, iteration: 369931
loss: 0.9776172041893005,grad_norm: 0.9200180235144038, iteration: 369932
loss: 1.0359896421432495,grad_norm: 0.9999990949597555, iteration: 369933
loss: 1.0305877923965454,grad_norm: 0.985914311929153, iteration: 369934
loss: 0.9742920398712158,grad_norm: 0.8632898656331317, iteration: 369935
loss: 1.006367564201355,grad_norm: 0.8878131670993444, iteration: 369936
loss: 1.027390956878662,grad_norm: 0.8276858969351396, iteration: 369937
loss: 1.0077521800994873,grad_norm: 0.7970619728985899, iteration: 369938
loss: 1.0163397789001465,grad_norm: 0.7224803464899588, iteration: 369939
loss: 0.9851416349411011,grad_norm: 0.7727366522308626, iteration: 369940
loss: 0.9865479469299316,grad_norm: 0.71442434952747, iteration: 369941
loss: 0.9786558151245117,grad_norm: 0.7893534365379579, iteration: 369942
loss: 0.9941613674163818,grad_norm: 0.7280561993238741, iteration: 369943
loss: 1.0165077447891235,grad_norm: 0.9999994655553975, iteration: 369944
loss: 0.9570342898368835,grad_norm: 0.8986117131436193, iteration: 369945
loss: 1.0151022672653198,grad_norm: 0.8143566324454232, iteration: 369946
loss: 0.9994960427284241,grad_norm: 0.879465403814977, iteration: 369947
loss: 0.9749869108200073,grad_norm: 0.9473673637839943, iteration: 369948
loss: 1.4739081859588623,grad_norm: 0.9999999454324339, iteration: 369949
loss: 1.0195326805114746,grad_norm: 0.997486643795688, iteration: 369950
loss: 0.9908850193023682,grad_norm: 0.9391540790629251, iteration: 369951
loss: 0.9768890738487244,grad_norm: 0.8454507604545698, iteration: 369952
loss: 0.9720829725265503,grad_norm: 0.6890841298570067, iteration: 369953
loss: 1.0038598775863647,grad_norm: 0.8411597948940567, iteration: 369954
loss: 0.9971932172775269,grad_norm: 0.6666141420996899, iteration: 369955
loss: 0.9592975974082947,grad_norm: 0.8583909177261543, iteration: 369956
loss: 1.152807593345642,grad_norm: 0.9999998190479903, iteration: 369957
loss: 1.0345348119735718,grad_norm: 0.730308415628188, iteration: 369958
loss: 1.0298117399215698,grad_norm: 0.9781236745927558, iteration: 369959
loss: 0.9946994185447693,grad_norm: 0.7725365599201313, iteration: 369960
loss: 1.1392902135849,grad_norm: 0.9999995591859946, iteration: 369961
loss: 0.9733842015266418,grad_norm: 0.8668011411659435, iteration: 369962
loss: 0.9824195504188538,grad_norm: 0.7639159138220878, iteration: 369963
loss: 0.9751495718955994,grad_norm: 0.7396286204938876, iteration: 369964
loss: 0.9721183180809021,grad_norm: 0.8902910915230066, iteration: 369965
loss: 1.0294783115386963,grad_norm: 0.9999991530674079, iteration: 369966
loss: 1.035882592201233,grad_norm: 0.9999992508972609, iteration: 369967
loss: 1.0600217580795288,grad_norm: 0.999999200011266, iteration: 369968
loss: 1.0314570665359497,grad_norm: 0.999999256233742, iteration: 369969
loss: 1.049726963043213,grad_norm: 0.7916227413946306, iteration: 369970
loss: 1.0002323389053345,grad_norm: 0.6795121023713044, iteration: 369971
loss: 0.9868314862251282,grad_norm: 0.9999992511968512, iteration: 369972
loss: 1.013687014579773,grad_norm: 0.671855940544837, iteration: 369973
loss: 1.0532563924789429,grad_norm: 0.9472716587680388, iteration: 369974
loss: 1.031826376914978,grad_norm: 0.7617265431142337, iteration: 369975
loss: 1.0312868356704712,grad_norm: 0.7640684636890818, iteration: 369976
loss: 1.036960482597351,grad_norm: 0.9999999783062941, iteration: 369977
loss: 0.9839242100715637,grad_norm: 0.7700508159635256, iteration: 369978
loss: 1.0336140394210815,grad_norm: 0.9035794392037222, iteration: 369979
loss: 1.0270053148269653,grad_norm: 0.7163839075781073, iteration: 369980
loss: 1.0109535455703735,grad_norm: 0.8399220341758513, iteration: 369981
loss: 0.9922063946723938,grad_norm: 0.8809575016245581, iteration: 369982
loss: 1.007067084312439,grad_norm: 0.8598442032805464, iteration: 369983
loss: 0.994063675403595,grad_norm: 0.8090566312357445, iteration: 369984
loss: 1.0079363584518433,grad_norm: 0.7973019810931107, iteration: 369985
loss: 1.0208631753921509,grad_norm: 0.7212629061196422, iteration: 369986
loss: 1.007896065711975,grad_norm: 0.9999993955928622, iteration: 369987
loss: 1.0190188884735107,grad_norm: 0.9999990386745783, iteration: 369988
loss: 1.0045571327209473,grad_norm: 0.8403726449537204, iteration: 369989
loss: 1.0319007635116577,grad_norm: 0.9999991618904444, iteration: 369990
loss: 0.9965728521347046,grad_norm: 0.7308145274447363, iteration: 369991
loss: 0.9683090448379517,grad_norm: 0.8949633063742604, iteration: 369992
loss: 1.047914743423462,grad_norm: 0.9999995156277531, iteration: 369993
loss: 0.9690360426902771,grad_norm: 0.7117309103506427, iteration: 369994
loss: 0.9864163398742676,grad_norm: 0.8043116985204141, iteration: 369995
loss: 0.9829291701316833,grad_norm: 0.9999991603761847, iteration: 369996
loss: 0.9709727168083191,grad_norm: 0.7993450987098847, iteration: 369997
loss: 0.9999442100524902,grad_norm: 0.9334597596777539, iteration: 369998
loss: 0.9748080372810364,grad_norm: 0.848636283945869, iteration: 369999
loss: 1.0009064674377441,grad_norm: 0.8718609676261602, iteration: 370000
Evaluating at step 370000
{'val': 0.9961965177208185, 'test': 1.8979419382235494}
loss: 1.0333935022354126,grad_norm: 0.8792091520904289, iteration: 370001
loss: 1.0628314018249512,grad_norm: 0.9999998034652341, iteration: 370002
loss: 1.0148074626922607,grad_norm: 0.7681288220792546, iteration: 370003
loss: 1.0090768337249756,grad_norm: 0.8534776432001554, iteration: 370004
loss: 1.0045493841171265,grad_norm: 0.7068573906735193, iteration: 370005
loss: 1.030194640159607,grad_norm: 0.8838463176131927, iteration: 370006
loss: 1.0720796585083008,grad_norm: 0.999999331860485, iteration: 370007
loss: 1.0183900594711304,grad_norm: 0.7918513813856368, iteration: 370008
loss: 1.0046889781951904,grad_norm: 0.9763887695107214, iteration: 370009
loss: 1.008131980895996,grad_norm: 0.8450880459970517, iteration: 370010
loss: 1.0235720872879028,grad_norm: 0.6611986988324968, iteration: 370011
loss: 1.0111045837402344,grad_norm: 0.7714599171588206, iteration: 370012
loss: 0.987531840801239,grad_norm: 0.891270772939794, iteration: 370013
loss: 1.0214264392852783,grad_norm: 0.9309195763327955, iteration: 370014
loss: 1.0421615839004517,grad_norm: 0.874287331474283, iteration: 370015
loss: 0.9939444065093994,grad_norm: 0.9392658151965513, iteration: 370016
loss: 1.018197774887085,grad_norm: 0.7725886844533741, iteration: 370017
loss: 0.9877073764801025,grad_norm: 0.6258704785960195, iteration: 370018
loss: 1.0166889429092407,grad_norm: 0.9632286720883458, iteration: 370019
loss: 1.0272972583770752,grad_norm: 0.6785413499393624, iteration: 370020
loss: 1.0013798475265503,grad_norm: 0.7410788695008244, iteration: 370021
loss: 1.021347999572754,grad_norm: 0.8954545710170287, iteration: 370022
loss: 1.0033721923828125,grad_norm: 0.8178105066312906, iteration: 370023
loss: 0.9455914497375488,grad_norm: 0.8671552694873695, iteration: 370024
loss: 1.1066110134124756,grad_norm: 0.9999991637494264, iteration: 370025
loss: 1.0109548568725586,grad_norm: 0.8157855639920633, iteration: 370026
loss: 0.9719228744506836,grad_norm: 0.7166360686901336, iteration: 370027
loss: 0.9857938289642334,grad_norm: 0.7975993406370626, iteration: 370028
loss: 0.9590914845466614,grad_norm: 0.7731738049413742, iteration: 370029
loss: 0.996529221534729,grad_norm: 0.768089556258328, iteration: 370030
loss: 1.0130215883255005,grad_norm: 0.8693713182773112, iteration: 370031
loss: 0.9860926270484924,grad_norm: 0.7319495266368847, iteration: 370032
loss: 0.9958688616752625,grad_norm: 0.7412399563175831, iteration: 370033
loss: 1.1071922779083252,grad_norm: 0.9999998456141211, iteration: 370034
loss: 0.9802270531654358,grad_norm: 0.8897021498586702, iteration: 370035
loss: 0.9884384274482727,grad_norm: 0.8348140514004928, iteration: 370036
loss: 0.9894605875015259,grad_norm: 0.7998150422637894, iteration: 370037
loss: 1.0085244178771973,grad_norm: 0.8672616519476991, iteration: 370038
loss: 0.9944221377372742,grad_norm: 0.9999989286504092, iteration: 370039
loss: 1.0205157995224,grad_norm: 0.7574229880525162, iteration: 370040
loss: 0.9739867448806763,grad_norm: 0.6798348763066987, iteration: 370041
loss: 0.993851363658905,grad_norm: 0.700390844810248, iteration: 370042
loss: 1.0327297449111938,grad_norm: 0.8746410740742061, iteration: 370043
loss: 1.007967233657837,grad_norm: 0.8977788166186156, iteration: 370044
loss: 1.0332255363464355,grad_norm: 0.8638551367076016, iteration: 370045
loss: 0.9489311575889587,grad_norm: 0.8357597294439981, iteration: 370046
loss: 1.0024648904800415,grad_norm: 0.6830362879636108, iteration: 370047
loss: 1.0213122367858887,grad_norm: 0.8866194997113074, iteration: 370048
loss: 1.012831687927246,grad_norm: 0.733386794544525, iteration: 370049
loss: 0.96614670753479,grad_norm: 0.7880238530811415, iteration: 370050
loss: 1.0003458261489868,grad_norm: 0.8379232794004661, iteration: 370051
loss: 0.9865809679031372,grad_norm: 0.806351281219484, iteration: 370052
loss: 1.0537809133529663,grad_norm: 0.8516837187036866, iteration: 370053
loss: 0.987975537776947,grad_norm: 0.9013156622288288, iteration: 370054
loss: 0.9800503849983215,grad_norm: 0.9999990757380368, iteration: 370055
loss: 1.0195190906524658,grad_norm: 0.8247808116951719, iteration: 370056
loss: 0.9820087552070618,grad_norm: 0.7560796794672169, iteration: 370057
loss: 0.9938334822654724,grad_norm: 0.7635446303054026, iteration: 370058
loss: 1.034256935119629,grad_norm: 0.9999994916799458, iteration: 370059
loss: 0.9754495024681091,grad_norm: 0.8472136646396549, iteration: 370060
loss: 1.0173475742340088,grad_norm: 0.7746545488704164, iteration: 370061
loss: 1.00652277469635,grad_norm: 0.8080484743292745, iteration: 370062
loss: 1.0256801843643188,grad_norm: 0.709454139977804, iteration: 370063
loss: 0.9414249658584595,grad_norm: 0.9067355190240936, iteration: 370064
loss: 0.9788616895675659,grad_norm: 0.746192071560791, iteration: 370065
loss: 1.016006350517273,grad_norm: 0.9047733227501152, iteration: 370066
loss: 1.0091947317123413,grad_norm: 0.8352691664746559, iteration: 370067
loss: 1.013946771621704,grad_norm: 0.9002187000102955, iteration: 370068
loss: 1.0194789171218872,grad_norm: 0.7551409356112252, iteration: 370069
loss: 1.148467779159546,grad_norm: 0.9999998843866886, iteration: 370070
loss: 1.0067131519317627,grad_norm: 0.8712608132215914, iteration: 370071
loss: 1.0055023431777954,grad_norm: 0.8361983067682499, iteration: 370072
loss: 0.9911019206047058,grad_norm: 0.8474201789898067, iteration: 370073
loss: 0.9788448810577393,grad_norm: 0.7309368278370125, iteration: 370074
loss: 0.9942450523376465,grad_norm: 0.7530647639702245, iteration: 370075
loss: 1.014281988143921,grad_norm: 0.8351972257283, iteration: 370076
loss: 1.0659711360931396,grad_norm: 0.9999989773137753, iteration: 370077
loss: 0.997089147567749,grad_norm: 0.7858074227446344, iteration: 370078
loss: 0.9959968328475952,grad_norm: 0.8278402474695254, iteration: 370079
loss: 0.9701200127601624,grad_norm: 0.8053552259115184, iteration: 370080
loss: 0.9848929047584534,grad_norm: 0.7803643771460357, iteration: 370081
loss: 0.9929289221763611,grad_norm: 0.7871678439936153, iteration: 370082
loss: 0.9748992919921875,grad_norm: 0.6857946437453984, iteration: 370083
loss: 0.9918302297592163,grad_norm: 0.7534620290737044, iteration: 370084
loss: 1.0017390251159668,grad_norm: 0.8691246519934765, iteration: 370085
loss: 1.0063238143920898,grad_norm: 0.8112952770547767, iteration: 370086
loss: 1.0037059783935547,grad_norm: 0.7765126730575388, iteration: 370087
loss: 0.9816251397132874,grad_norm: 0.8687468122184806, iteration: 370088
loss: 1.0367039442062378,grad_norm: 0.9999999083818143, iteration: 370089
loss: 0.9815672039985657,grad_norm: 0.8503734880090846, iteration: 370090
loss: 0.9907233715057373,grad_norm: 0.9103150614968877, iteration: 370091
loss: 0.9436957240104675,grad_norm: 0.8784725795632942, iteration: 370092
loss: 1.0126584768295288,grad_norm: 0.7599069248322141, iteration: 370093
loss: 1.012986183166504,grad_norm: 0.789522359237447, iteration: 370094
loss: 0.9764049649238586,grad_norm: 0.7353063254623984, iteration: 370095
loss: 0.9827143549919128,grad_norm: 0.9914953695863674, iteration: 370096
loss: 0.9919102787971497,grad_norm: 0.9254787659902616, iteration: 370097
loss: 0.9874863624572754,grad_norm: 0.7972697816845262, iteration: 370098
loss: 1.0299280881881714,grad_norm: 0.8088621647082603, iteration: 370099
loss: 1.071955680847168,grad_norm: 0.9172077293256459, iteration: 370100
loss: 1.016581416130066,grad_norm: 0.9286652952188788, iteration: 370101
loss: 1.0191380977630615,grad_norm: 0.6425711805271805, iteration: 370102
loss: 1.0058344602584839,grad_norm: 0.841205073075417, iteration: 370103
loss: 1.0007067918777466,grad_norm: 0.9999993524645878, iteration: 370104
loss: 0.9628984332084656,grad_norm: 0.8891691951250446, iteration: 370105
loss: 0.9925314784049988,grad_norm: 0.8557740359404832, iteration: 370106
loss: 1.0312037467956543,grad_norm: 0.8165244575930459, iteration: 370107
loss: 1.0417909622192383,grad_norm: 0.7967842855214977, iteration: 370108
loss: 0.9962290525436401,grad_norm: 0.9412081034249625, iteration: 370109
loss: 1.003920078277588,grad_norm: 0.7691901709683524, iteration: 370110
loss: 1.1178536415100098,grad_norm: 0.9999999436539718, iteration: 370111
loss: 1.0137813091278076,grad_norm: 0.8117687857734868, iteration: 370112
loss: 0.9924409985542297,grad_norm: 0.8816043631925219, iteration: 370113
loss: 0.9997372031211853,grad_norm: 0.9463808426634125, iteration: 370114
loss: 1.0139952898025513,grad_norm: 0.9999999017865422, iteration: 370115
loss: 0.9984586238861084,grad_norm: 0.7133447404517932, iteration: 370116
loss: 1.0111509561538696,grad_norm: 0.9999994373856194, iteration: 370117
loss: 1.0218168497085571,grad_norm: 0.7083819854992085, iteration: 370118
loss: 1.0534436702728271,grad_norm: 0.7915868484966619, iteration: 370119
loss: 0.9965050220489502,grad_norm: 0.6670749735025777, iteration: 370120
loss: 0.9721189737319946,grad_norm: 0.8493731027848868, iteration: 370121
loss: 0.9934824705123901,grad_norm: 0.8606370758301082, iteration: 370122
loss: 1.0114682912826538,grad_norm: 0.92189066358802, iteration: 370123
loss: 0.9842966794967651,grad_norm: 0.8049569382106576, iteration: 370124
loss: 1.010469913482666,grad_norm: 0.8038637464072615, iteration: 370125
loss: 1.0271319150924683,grad_norm: 0.7786511312742209, iteration: 370126
loss: 1.014639973640442,grad_norm: 0.7615519660312953, iteration: 370127
loss: 1.0128945112228394,grad_norm: 0.8098096862543877, iteration: 370128
loss: 0.9882696270942688,grad_norm: 0.913749250961476, iteration: 370129
loss: 1.0121397972106934,grad_norm: 0.7524770745375182, iteration: 370130
loss: 1.0501283407211304,grad_norm: 0.7668471039805169, iteration: 370131
loss: 1.0007188320159912,grad_norm: 0.7508694671252318, iteration: 370132
loss: 1.1016634702682495,grad_norm: 0.889202143573745, iteration: 370133
loss: 0.9696438312530518,grad_norm: 0.7445915089876946, iteration: 370134
loss: 0.9957588315010071,grad_norm: 0.7295416987701222, iteration: 370135
loss: 0.9829434752464294,grad_norm: 0.9445267987164793, iteration: 370136
loss: 0.9888073801994324,grad_norm: 0.8060285649111957, iteration: 370137
loss: 0.962321400642395,grad_norm: 0.8487890772472145, iteration: 370138
loss: 1.0046411752700806,grad_norm: 0.8891395894873494, iteration: 370139
loss: 0.9738996624946594,grad_norm: 0.7549489780288245, iteration: 370140
loss: 1.0013295412063599,grad_norm: 0.8050668563619826, iteration: 370141
loss: 1.0166897773742676,grad_norm: 0.6533161341493491, iteration: 370142
loss: 0.9730535745620728,grad_norm: 0.7329899563193608, iteration: 370143
loss: 0.9819314479827881,grad_norm: 0.7583370704411129, iteration: 370144
loss: 1.002547264099121,grad_norm: 0.8207532999530815, iteration: 370145
loss: 0.9598730206489563,grad_norm: 0.6901395970176903, iteration: 370146
loss: 0.9792873859405518,grad_norm: 0.8606055950548277, iteration: 370147
loss: 1.0045291185379028,grad_norm: 0.8303487554941607, iteration: 370148
loss: 0.9964967370033264,grad_norm: 0.7194959952615735, iteration: 370149
loss: 1.0070793628692627,grad_norm: 0.8206057891307881, iteration: 370150
loss: 0.9672299027442932,grad_norm: 0.6872332911027956, iteration: 370151
loss: 1.0662072896957397,grad_norm: 0.9503389886092017, iteration: 370152
loss: 0.9821224212646484,grad_norm: 0.8187727427475188, iteration: 370153
loss: 1.0075827836990356,grad_norm: 0.9999998765391293, iteration: 370154
loss: 1.0203065872192383,grad_norm: 0.7885959742048996, iteration: 370155
loss: 1.0110081434249878,grad_norm: 0.702193490036117, iteration: 370156
loss: 1.0041694641113281,grad_norm: 0.9455523304380562, iteration: 370157
loss: 0.9950557947158813,grad_norm: 0.82650193512123, iteration: 370158
loss: 0.9981695413589478,grad_norm: 0.9999991170410438, iteration: 370159
loss: 1.0261021852493286,grad_norm: 0.9999991319145586, iteration: 370160
loss: 0.973162055015564,grad_norm: 0.9012188864126413, iteration: 370161
loss: 0.987468421459198,grad_norm: 0.8781964132927966, iteration: 370162
loss: 0.9887842535972595,grad_norm: 0.6262655168422971, iteration: 370163
loss: 0.9990440011024475,grad_norm: 0.835208969001285, iteration: 370164
loss: 1.0021435022354126,grad_norm: 0.6799055191270587, iteration: 370165
loss: 1.0086722373962402,grad_norm: 0.9999994781164123, iteration: 370166
loss: 0.9967629909515381,grad_norm: 0.8581533148611435, iteration: 370167
loss: 1.0159393548965454,grad_norm: 0.9681403321104628, iteration: 370168
loss: 1.0089428424835205,grad_norm: 0.9999990766161089, iteration: 370169
loss: 0.994686484336853,grad_norm: 0.8760641941319002, iteration: 370170
loss: 1.0093491077423096,grad_norm: 0.8749787710690234, iteration: 370171
loss: 1.0023918151855469,grad_norm: 0.8606129772856955, iteration: 370172
loss: 0.9614565968513489,grad_norm: 0.7852343401545251, iteration: 370173
loss: 1.005196452140808,grad_norm: 0.8116863502245191, iteration: 370174
loss: 0.9954365491867065,grad_norm: 0.6273390609143986, iteration: 370175
loss: 1.0049669742584229,grad_norm: 0.9999999401979098, iteration: 370176
loss: 0.9769564270973206,grad_norm: 0.7922028300551006, iteration: 370177
loss: 1.0312178134918213,grad_norm: 0.8324846712835661, iteration: 370178
loss: 1.0198323726654053,grad_norm: 0.9999999891032418, iteration: 370179
loss: 0.9864606857299805,grad_norm: 0.7968778767504434, iteration: 370180
loss: 0.981657862663269,grad_norm: 0.999999207504164, iteration: 370181
loss: 1.0639660358428955,grad_norm: 0.8377126813784347, iteration: 370182
loss: 0.9911755323410034,grad_norm: 0.7501259208271689, iteration: 370183
loss: 1.0216422080993652,grad_norm: 0.8641259453683963, iteration: 370184
loss: 1.0193418264389038,grad_norm: 0.8664015259781058, iteration: 370185
loss: 1.0363640785217285,grad_norm: 0.8844431283971295, iteration: 370186
loss: 0.9968075156211853,grad_norm: 0.8824702351559163, iteration: 370187
loss: 1.0130606889724731,grad_norm: 0.9130900659212714, iteration: 370188
loss: 1.0100605487823486,grad_norm: 0.8233314278235264, iteration: 370189
loss: 1.004050374031067,grad_norm: 0.999999254365609, iteration: 370190
loss: 1.0054876804351807,grad_norm: 0.9737047328264661, iteration: 370191
loss: 1.0137715339660645,grad_norm: 0.9999990599148055, iteration: 370192
loss: 1.0673404932022095,grad_norm: 0.9999991561515753, iteration: 370193
loss: 0.9693995118141174,grad_norm: 0.8221087097731392, iteration: 370194
loss: 1.0114452838897705,grad_norm: 0.7593041787386439, iteration: 370195
loss: 1.0117292404174805,grad_norm: 0.762136097287918, iteration: 370196
loss: 0.9869054555892944,grad_norm: 0.9999990912158239, iteration: 370197
loss: 1.0053870677947998,grad_norm: 0.8222909018158467, iteration: 370198
loss: 1.0017937421798706,grad_norm: 0.8109693640484246, iteration: 370199
loss: 0.9635361433029175,grad_norm: 0.6872041790979831, iteration: 370200
loss: 1.0372101068496704,grad_norm: 0.6782317780116628, iteration: 370201
loss: 0.997621476650238,grad_norm: 0.6487575492733398, iteration: 370202
loss: 1.00901460647583,grad_norm: 0.7599718193721093, iteration: 370203
loss: 0.9869925379753113,grad_norm: 0.6919704868159973, iteration: 370204
loss: 1.0111812353134155,grad_norm: 0.8824319504990162, iteration: 370205
loss: 0.9863560199737549,grad_norm: 0.6654579947245703, iteration: 370206
loss: 0.9953582286834717,grad_norm: 0.8375010767419697, iteration: 370207
loss: 1.0043483972549438,grad_norm: 0.9999992966095292, iteration: 370208
loss: 1.0099296569824219,grad_norm: 0.9935025206623747, iteration: 370209
loss: 0.9976472854614258,grad_norm: 0.8000182089014132, iteration: 370210
loss: 0.9816298484802246,grad_norm: 0.9999993357315058, iteration: 370211
loss: 1.0255053043365479,grad_norm: 0.768646046106593, iteration: 370212
loss: 0.9900471568107605,grad_norm: 0.8981277233908918, iteration: 370213
loss: 1.0078660249710083,grad_norm: 0.9102883859647707, iteration: 370214
loss: 1.0051348209381104,grad_norm: 0.6651605404601174, iteration: 370215
loss: 1.0010461807250977,grad_norm: 0.8803150251093974, iteration: 370216
loss: 1.0152512788772583,grad_norm: 0.70372577363806, iteration: 370217
loss: 0.9859934449195862,grad_norm: 0.8953711531926634, iteration: 370218
loss: 0.9977062344551086,grad_norm: 0.8557091189773967, iteration: 370219
loss: 1.0188148021697998,grad_norm: 0.8246345696839524, iteration: 370220
loss: 0.9997104406356812,grad_norm: 0.9999996695293948, iteration: 370221
loss: 0.9720560908317566,grad_norm: 0.7230847428092527, iteration: 370222
loss: 0.9922913908958435,grad_norm: 0.7056078348437314, iteration: 370223
loss: 1.032249093055725,grad_norm: 0.7856417208418844, iteration: 370224
loss: 0.9975081086158752,grad_norm: 0.7777640218221464, iteration: 370225
loss: 0.9903134703636169,grad_norm: 0.8729107551670832, iteration: 370226
loss: 0.9933222532272339,grad_norm: 0.9689120070284817, iteration: 370227
loss: 0.9928549528121948,grad_norm: 0.7469822441461832, iteration: 370228
loss: 1.1040138006210327,grad_norm: 0.9032805596308394, iteration: 370229
loss: 1.0015008449554443,grad_norm: 0.9863001477816651, iteration: 370230
loss: 1.026308298110962,grad_norm: 0.8451960592394782, iteration: 370231
loss: 0.992253303527832,grad_norm: 0.7926177597469864, iteration: 370232
loss: 0.9928710460662842,grad_norm: 0.7524040899766505, iteration: 370233
loss: 1.0358223915100098,grad_norm: 0.8390147975212376, iteration: 370234
loss: 1.0211962461471558,grad_norm: 0.9999990253487707, iteration: 370235
loss: 1.0601917505264282,grad_norm: 0.9999998970543836, iteration: 370236
loss: 0.9853308200836182,grad_norm: 0.9999992151451447, iteration: 370237
loss: 1.00883948802948,grad_norm: 0.693362969109733, iteration: 370238
loss: 1.001362919807434,grad_norm: 0.7996546036319698, iteration: 370239
loss: 0.9939237833023071,grad_norm: 0.998202938904345, iteration: 370240
loss: 1.0019023418426514,grad_norm: 0.9523117879858368, iteration: 370241
loss: 0.9966371059417725,grad_norm: 0.6857094824917692, iteration: 370242
loss: 1.0019186735153198,grad_norm: 0.999999546053326, iteration: 370243
loss: 0.9510494470596313,grad_norm: 0.7645408746078486, iteration: 370244
loss: 0.9949790239334106,grad_norm: 0.9165267388294979, iteration: 370245
loss: 1.0115240812301636,grad_norm: 0.9999989685469958, iteration: 370246
loss: 1.0041767358779907,grad_norm: 0.8153270029282809, iteration: 370247
loss: 0.9874705076217651,grad_norm: 0.7842151807390229, iteration: 370248
loss: 1.0029665231704712,grad_norm: 0.9823547398483371, iteration: 370249
loss: 0.9908634424209595,grad_norm: 0.7542476014031417, iteration: 370250
loss: 1.047243595123291,grad_norm: 0.9999990162298187, iteration: 370251
loss: 0.9842404127120972,grad_norm: 0.7790352836319318, iteration: 370252
loss: 1.008426308631897,grad_norm: 0.7570507735252184, iteration: 370253
loss: 1.055936336517334,grad_norm: 0.9040447339998939, iteration: 370254
loss: 0.9970216155052185,grad_norm: 0.8689387427251716, iteration: 370255
loss: 1.0101639032363892,grad_norm: 0.8074309491863241, iteration: 370256
loss: 1.1916940212249756,grad_norm: 0.9999995593917228, iteration: 370257
loss: 1.04849374294281,grad_norm: 0.839014284276259, iteration: 370258
loss: 0.9850865006446838,grad_norm: 0.7053110577222387, iteration: 370259
loss: 0.9984668493270874,grad_norm: 0.8288307493634421, iteration: 370260
loss: 1.000110387802124,grad_norm: 0.9435697974124206, iteration: 370261
loss: 1.0185574293136597,grad_norm: 0.8614007506346061, iteration: 370262
loss: 0.9796287417411804,grad_norm: 0.8178431346038868, iteration: 370263
loss: 1.0606417655944824,grad_norm: 0.9999999839578322, iteration: 370264
loss: 0.9621546864509583,grad_norm: 0.8376285918670225, iteration: 370265
loss: 1.0087528228759766,grad_norm: 0.8489514843418857, iteration: 370266
loss: 1.0394985675811768,grad_norm: 0.9999992558906997, iteration: 370267
loss: 0.9963854551315308,grad_norm: 0.8702696641096698, iteration: 370268
loss: 1.0260310173034668,grad_norm: 0.9181368094336624, iteration: 370269
loss: 0.9661746025085449,grad_norm: 0.7907905632702803, iteration: 370270
loss: 1.0123637914657593,grad_norm: 0.7052087215886087, iteration: 370271
loss: 1.0013301372528076,grad_norm: 0.7593103764413865, iteration: 370272
loss: 0.9883874654769897,grad_norm: 0.6927101842040332, iteration: 370273
loss: 0.9564369916915894,grad_norm: 0.6864758267370801, iteration: 370274
loss: 1.0838241577148438,grad_norm: 0.9722950679345177, iteration: 370275
loss: 1.0220491886138916,grad_norm: 0.9999997319141432, iteration: 370276
loss: 0.9986011385917664,grad_norm: 0.9999996637879497, iteration: 370277
loss: 0.9907369613647461,grad_norm: 0.7887338469660439, iteration: 370278
loss: 1.0205060243606567,grad_norm: 0.9952023533426524, iteration: 370279
loss: 0.9885648488998413,grad_norm: 0.8888488534453154, iteration: 370280
loss: 1.0065646171569824,grad_norm: 0.7726464337705156, iteration: 370281
loss: 1.1568118333816528,grad_norm: 0.999999227435616, iteration: 370282
loss: 0.951141357421875,grad_norm: 0.7904073776306062, iteration: 370283
loss: 1.0397305488586426,grad_norm: 0.8752905360132246, iteration: 370284
loss: 0.9958198666572571,grad_norm: 0.9999990750194758, iteration: 370285
loss: 0.9904358386993408,grad_norm: 0.9344912413094364, iteration: 370286
loss: 0.958655059337616,grad_norm: 0.9189997790714528, iteration: 370287
loss: 1.0142091512680054,grad_norm: 0.8336489124217452, iteration: 370288
loss: 0.998867928981781,grad_norm: 0.8302981351803231, iteration: 370289
loss: 1.0460352897644043,grad_norm: 0.7701058530793411, iteration: 370290
loss: 1.0243275165557861,grad_norm: 0.9369265424736386, iteration: 370291
loss: 0.9729870557785034,grad_norm: 0.7621934224132579, iteration: 370292
loss: 0.9800141453742981,grad_norm: 0.9031048266046613, iteration: 370293
loss: 0.9726356863975525,grad_norm: 0.7245464775562649, iteration: 370294
loss: 0.9606253504753113,grad_norm: 0.7693513141242821, iteration: 370295
loss: 0.9993711113929749,grad_norm: 0.6354162179997994, iteration: 370296
loss: 1.0168741941452026,grad_norm: 0.8641703731226981, iteration: 370297
loss: 0.9944725036621094,grad_norm: 0.8668871833112449, iteration: 370298
loss: 0.9868903160095215,grad_norm: 0.8659616298059106, iteration: 370299
loss: 1.0093003511428833,grad_norm: 0.8060527360748532, iteration: 370300
loss: 0.9842803478240967,grad_norm: 0.8777468481730004, iteration: 370301
loss: 0.9794004559516907,grad_norm: 0.7989857468653188, iteration: 370302
loss: 1.0004092454910278,grad_norm: 0.8626466322112927, iteration: 370303
loss: 1.013633131980896,grad_norm: 0.9561088129149228, iteration: 370304
loss: 1.0527803897857666,grad_norm: 0.8311354371898995, iteration: 370305
loss: 0.9817962646484375,grad_norm: 0.9964808202303431, iteration: 370306
loss: 0.9669461250305176,grad_norm: 0.7799692770018425, iteration: 370307
loss: 0.9732521772384644,grad_norm: 0.8619921151391882, iteration: 370308
loss: 1.050002098083496,grad_norm: 0.8304281919271005, iteration: 370309
loss: 1.0148499011993408,grad_norm: 0.7481951808034902, iteration: 370310
loss: 1.0453500747680664,grad_norm: 0.9999993076446526, iteration: 370311
loss: 1.0056397914886475,grad_norm: 0.8183789723653212, iteration: 370312
loss: 1.0079294443130493,grad_norm: 0.8917463887960919, iteration: 370313
loss: 1.1450048685073853,grad_norm: 0.9999999769795245, iteration: 370314
loss: 1.0331602096557617,grad_norm: 0.9499091632961546, iteration: 370315
loss: 0.9796149730682373,grad_norm: 0.9999994295234514, iteration: 370316
loss: 0.9726603627204895,grad_norm: 0.7630653868010814, iteration: 370317
loss: 1.020106315612793,grad_norm: 0.8688020036291333, iteration: 370318
loss: 0.9660990834236145,grad_norm: 0.8926574769395612, iteration: 370319
loss: 1.0410456657409668,grad_norm: 0.9489059096028198, iteration: 370320
loss: 0.9978593587875366,grad_norm: 0.6394577037588206, iteration: 370321
loss: 0.9569615721702576,grad_norm: 0.704276605656702, iteration: 370322
loss: 1.0408270359039307,grad_norm: 0.8587012612923534, iteration: 370323
loss: 1.0008342266082764,grad_norm: 0.8861030219963978, iteration: 370324
loss: 0.9856712818145752,grad_norm: 0.7703744566463445, iteration: 370325
loss: 1.02293860912323,grad_norm: 0.8592444447295704, iteration: 370326
loss: 1.0079014301300049,grad_norm: 0.8896504115799042, iteration: 370327
loss: 0.9789482951164246,grad_norm: 0.9272334302899016, iteration: 370328
loss: 1.003659963607788,grad_norm: 0.6868827449826574, iteration: 370329
loss: 1.1811729669570923,grad_norm: 0.9999997646560425, iteration: 370330
loss: 1.0111943483352661,grad_norm: 0.9703420009168632, iteration: 370331
loss: 0.9738542437553406,grad_norm: 0.8472820763100349, iteration: 370332
loss: 0.9844651818275452,grad_norm: 0.91822599623834, iteration: 370333
loss: 0.9894659519195557,grad_norm: 0.65887640560344, iteration: 370334
loss: 0.9923989772796631,grad_norm: 0.9193777252209069, iteration: 370335
loss: 0.9906604886054993,grad_norm: 0.9916983194933261, iteration: 370336
loss: 0.9612647294998169,grad_norm: 0.8535665224456691, iteration: 370337
loss: 1.038193702697754,grad_norm: 0.9999998418301875, iteration: 370338
loss: 0.9929659962654114,grad_norm: 0.8963557079512168, iteration: 370339
loss: 0.9896805882453918,grad_norm: 0.9088746294672241, iteration: 370340
loss: 0.9924409985542297,grad_norm: 0.903913693782236, iteration: 370341
loss: 1.0146201848983765,grad_norm: 0.968569069110321, iteration: 370342
loss: 0.9949119687080383,grad_norm: 0.867515937731566, iteration: 370343
loss: 0.9978389739990234,grad_norm: 0.7753598136318306, iteration: 370344
loss: 1.0267592668533325,grad_norm: 0.7968154311665301, iteration: 370345
loss: 1.0217028856277466,grad_norm: 0.6881579925482868, iteration: 370346
loss: 0.9673899412155151,grad_norm: 0.9805788017059337, iteration: 370347
loss: 0.9943255186080933,grad_norm: 0.8292575468963097, iteration: 370348
loss: 0.9629040956497192,grad_norm: 0.769343756759094, iteration: 370349
loss: 1.0130854845046997,grad_norm: 0.7905953057100698, iteration: 370350
loss: 1.0064194202423096,grad_norm: 0.9999991153256891, iteration: 370351
loss: 0.956732988357544,grad_norm: 0.7724625436978458, iteration: 370352
loss: 0.9795355200767517,grad_norm: 0.9999990602473652, iteration: 370353
loss: 1.032538890838623,grad_norm: 0.8264397673607429, iteration: 370354
loss: 1.0077627897262573,grad_norm: 0.8484817418750148, iteration: 370355
loss: 1.0341957807540894,grad_norm: 0.8763105848163073, iteration: 370356
loss: 0.9938543438911438,grad_norm: 0.7686125643219291, iteration: 370357
loss: 1.0428318977355957,grad_norm: 0.6825207292050604, iteration: 370358
loss: 0.9830519556999207,grad_norm: 0.8921331885619742, iteration: 370359
loss: 0.9952870011329651,grad_norm: 0.8419896416865915, iteration: 370360
loss: 1.0027780532836914,grad_norm: 0.7932298522451446, iteration: 370361
loss: 0.9774995446205139,grad_norm: 0.7619142891373452, iteration: 370362
loss: 1.0377941131591797,grad_norm: 0.9999998955295323, iteration: 370363
loss: 1.02126145362854,grad_norm: 0.7681892310343942, iteration: 370364
loss: 1.0185805559158325,grad_norm: 0.9690241978851077, iteration: 370365
loss: 1.01291823387146,grad_norm: 0.7815542855110267, iteration: 370366
loss: 0.971161425113678,grad_norm: 0.8422947490304392, iteration: 370367
loss: 0.9656963348388672,grad_norm: 0.7919920462776069, iteration: 370368
loss: 1.0014612674713135,grad_norm: 0.7223333599831094, iteration: 370369
loss: 0.9840324521064758,grad_norm: 0.7989679391419515, iteration: 370370
loss: 0.9657933712005615,grad_norm: 0.695947406924619, iteration: 370371
loss: 1.0137369632720947,grad_norm: 0.9999990355930652, iteration: 370372
loss: 0.991844117641449,grad_norm: 0.7492323391998031, iteration: 370373
loss: 1.0039409399032593,grad_norm: 0.7530696506600901, iteration: 370374
loss: 1.0026212930679321,grad_norm: 0.7571465287755185, iteration: 370375
loss: 1.132760763168335,grad_norm: 0.9999997784781727, iteration: 370376
loss: 1.0075315237045288,grad_norm: 0.7410856990555148, iteration: 370377
loss: 0.9939171075820923,grad_norm: 0.8343814902257876, iteration: 370378
loss: 1.1579093933105469,grad_norm: 0.9999996898168264, iteration: 370379
loss: 0.9931191205978394,grad_norm: 0.891779535576012, iteration: 370380
loss: 1.003235936164856,grad_norm: 0.8648289390385645, iteration: 370381
loss: 1.0124197006225586,grad_norm: 0.9999992762168707, iteration: 370382
loss: 1.0061966180801392,grad_norm: 0.6344816305952307, iteration: 370383
loss: 0.9849174618721008,grad_norm: 0.8182490404754087, iteration: 370384
loss: 1.040407657623291,grad_norm: 0.9999994783466124, iteration: 370385
loss: 1.0065876245498657,grad_norm: 0.799930008231873, iteration: 370386
loss: 1.0053606033325195,grad_norm: 0.7994251810402315, iteration: 370387
loss: 1.00739324092865,grad_norm: 0.7487733977333083, iteration: 370388
loss: 1.0278667211532593,grad_norm: 0.9999997776593509, iteration: 370389
loss: 1.0085698366165161,grad_norm: 0.7621462768949905, iteration: 370390
loss: 0.9789572954177856,grad_norm: 0.7289706681350858, iteration: 370391
loss: 0.9999415874481201,grad_norm: 0.7419472144599519, iteration: 370392
loss: 1.0422673225402832,grad_norm: 0.8125951487173361, iteration: 370393
loss: 1.0209771394729614,grad_norm: 0.8014096189085204, iteration: 370394
loss: 0.9789247512817383,grad_norm: 0.7996649031280496, iteration: 370395
loss: 1.0097386837005615,grad_norm: 0.64802070216211, iteration: 370396
loss: 1.0174850225448608,grad_norm: 0.7939215006071244, iteration: 370397
loss: 0.9983686208724976,grad_norm: 0.7550525855270906, iteration: 370398
loss: 1.0251758098602295,grad_norm: 0.9999990216651763, iteration: 370399
loss: 1.0093072652816772,grad_norm: 0.775501715926903, iteration: 370400
loss: 0.9764304757118225,grad_norm: 0.679745575802677, iteration: 370401
loss: 0.9759070873260498,grad_norm: 0.8439975291782968, iteration: 370402
loss: 1.0052478313446045,grad_norm: 0.7986400082108663, iteration: 370403
loss: 1.0092085599899292,grad_norm: 0.7776310854685485, iteration: 370404
loss: 0.9918359518051147,grad_norm: 0.7719826685456729, iteration: 370405
loss: 1.0400631427764893,grad_norm: 0.8476661871771018, iteration: 370406
loss: 0.9994242787361145,grad_norm: 0.8566155712171817, iteration: 370407
loss: 1.0082545280456543,grad_norm: 0.8088973351803834, iteration: 370408
loss: 0.9798430800437927,grad_norm: 0.7747651846999872, iteration: 370409
loss: 1.0389877557754517,grad_norm: 0.9999991690747365, iteration: 370410
loss: 0.9685169458389282,grad_norm: 0.8648155622175716, iteration: 370411
loss: 1.0027508735656738,grad_norm: 0.6611790277709476, iteration: 370412
loss: 0.9790275692939758,grad_norm: 0.7821683977754028, iteration: 370413
loss: 1.0292119979858398,grad_norm: 0.9278258973850411, iteration: 370414
loss: 1.0479211807250977,grad_norm: 0.7679546873860682, iteration: 370415
loss: 1.004143476486206,grad_norm: 0.7189263058726083, iteration: 370416
loss: 1.0002585649490356,grad_norm: 0.8735789315712582, iteration: 370417
loss: 1.0137059688568115,grad_norm: 0.8482905321318313, iteration: 370418
loss: 0.9674690365791321,grad_norm: 0.9133452240007103, iteration: 370419
loss: 0.9824346303939819,grad_norm: 0.7593218715395986, iteration: 370420
loss: 1.104762077331543,grad_norm: 0.9999994663974281, iteration: 370421
loss: 1.0026733875274658,grad_norm: 0.9646271658325636, iteration: 370422
loss: 1.1088955402374268,grad_norm: 0.9999992841800399, iteration: 370423
loss: 1.0112934112548828,grad_norm: 0.8535777256248614, iteration: 370424
loss: 0.9769821763038635,grad_norm: 0.6949932137041105, iteration: 370425
loss: 0.9792162775993347,grad_norm: 0.7788646312500163, iteration: 370426
loss: 0.9952500462532043,grad_norm: 0.967916701678261, iteration: 370427
loss: 0.9640109539031982,grad_norm: 0.9063986601043514, iteration: 370428
loss: 0.9977559447288513,grad_norm: 0.8793949253600094, iteration: 370429
loss: 1.0114558935165405,grad_norm: 0.6634225011884539, iteration: 370430
loss: 0.9652215838432312,grad_norm: 0.7461907883631937, iteration: 370431
loss: 1.0393096208572388,grad_norm: 0.9288333306650218, iteration: 370432
loss: 0.988356351852417,grad_norm: 0.741386286583002, iteration: 370433
loss: 0.9707112908363342,grad_norm: 0.643854962789402, iteration: 370434
loss: 0.9957787394523621,grad_norm: 0.7316684015760254, iteration: 370435
loss: 0.9733153581619263,grad_norm: 0.7913215595263068, iteration: 370436
loss: 0.9789309501647949,grad_norm: 0.9757353856084116, iteration: 370437
loss: 0.9638758897781372,grad_norm: 0.8604423818307567, iteration: 370438
loss: 0.98353111743927,grad_norm: 0.6626992413817633, iteration: 370439
loss: 1.091744065284729,grad_norm: 0.9120453215989185, iteration: 370440
loss: 0.9869890213012695,grad_norm: 0.753072306320363, iteration: 370441
loss: 1.029029369354248,grad_norm: 0.8969540028431449, iteration: 370442
loss: 1.0013881921768188,grad_norm: 0.9317514374395727, iteration: 370443
loss: 1.0127280950546265,grad_norm: 0.9077362948336021, iteration: 370444
loss: 0.9852578639984131,grad_norm: 0.9332218788649626, iteration: 370445
loss: 1.06943941116333,grad_norm: 0.9999999418389275, iteration: 370446
loss: 1.000043511390686,grad_norm: 0.9999992933523433, iteration: 370447
loss: 1.0000063180923462,grad_norm: 0.8719770239022337, iteration: 370448
loss: 0.9880085587501526,grad_norm: 0.8797537891898296, iteration: 370449
loss: 1.0037003755569458,grad_norm: 0.7004955688889821, iteration: 370450
loss: 0.9970434904098511,grad_norm: 0.9321882831548726, iteration: 370451
loss: 1.0123233795166016,grad_norm: 0.7279657177822162, iteration: 370452
loss: 1.0575193166732788,grad_norm: 0.8659260207230206, iteration: 370453
loss: 0.9885247349739075,grad_norm: 0.7720815339415555, iteration: 370454
loss: 1.0151842832565308,grad_norm: 0.7773375607826958, iteration: 370455
loss: 1.0052425861358643,grad_norm: 0.8926652813725292, iteration: 370456
loss: 0.9950847625732422,grad_norm: 0.6415051431587186, iteration: 370457
loss: 1.0142067670822144,grad_norm: 0.7800647500864953, iteration: 370458
loss: 1.0069684982299805,grad_norm: 0.7817739305157929, iteration: 370459
loss: 0.9946886897087097,grad_norm: 0.9350472895442562, iteration: 370460
loss: 1.016813039779663,grad_norm: 0.7419117827137738, iteration: 370461
loss: 0.9963079690933228,grad_norm: 0.8304880570972448, iteration: 370462
loss: 1.0245041847229004,grad_norm: 0.8039352124135057, iteration: 370463
loss: 0.9865826964378357,grad_norm: 0.6107367801595487, iteration: 370464
loss: 1.0124372243881226,grad_norm: 0.7112071213052128, iteration: 370465
loss: 1.016405701637268,grad_norm: 0.7608903198263202, iteration: 370466
loss: 0.9978727102279663,grad_norm: 0.7750828485557206, iteration: 370467
loss: 1.000677227973938,grad_norm: 0.785740363357111, iteration: 370468
loss: 1.039939522743225,grad_norm: 0.9999991020507263, iteration: 370469
loss: 0.9942552447319031,grad_norm: 0.836159792268293, iteration: 370470
loss: 0.9982218742370605,grad_norm: 0.7010054665718002, iteration: 370471
loss: 1.0029423236846924,grad_norm: 0.7889673188722521, iteration: 370472
loss: 1.0061746835708618,grad_norm: 0.946346756993568, iteration: 370473
loss: 1.072952151298523,grad_norm: 0.9999991690710284, iteration: 370474
loss: 1.048546314239502,grad_norm: 0.8469850453497462, iteration: 370475
loss: 1.0281176567077637,grad_norm: 0.999999181887714, iteration: 370476
loss: 0.9733180999755859,grad_norm: 0.8188269302080361, iteration: 370477
loss: 1.0354351997375488,grad_norm: 0.9999999501302478, iteration: 370478
loss: 1.0212123394012451,grad_norm: 0.8783840512292826, iteration: 370479
loss: 1.036196231842041,grad_norm: 0.9512744848257662, iteration: 370480
loss: 0.9751860499382019,grad_norm: 0.9999997782342634, iteration: 370481
loss: 0.969109296798706,grad_norm: 0.8771003349015424, iteration: 370482
loss: 1.0077255964279175,grad_norm: 0.8317229944092495, iteration: 370483
loss: 0.9968089461326599,grad_norm: 0.7302727432554075, iteration: 370484
loss: 0.9631490111351013,grad_norm: 0.8003415655949377, iteration: 370485
loss: 1.0099422931671143,grad_norm: 0.7471610743632349, iteration: 370486
loss: 1.0274754762649536,grad_norm: 0.7147688255551988, iteration: 370487
loss: 0.999341607093811,grad_norm: 0.8153590302445299, iteration: 370488
loss: 0.9750995635986328,grad_norm: 0.7829475045891725, iteration: 370489
loss: 1.0422929525375366,grad_norm: 0.9999997060346199, iteration: 370490
loss: 0.9826870560646057,grad_norm: 0.8687294095368845, iteration: 370491
loss: 0.9833536744117737,grad_norm: 0.9376655780407102, iteration: 370492
loss: 1.0452576875686646,grad_norm: 0.9999993567359906, iteration: 370493
loss: 1.022810935974121,grad_norm: 0.8991992607157915, iteration: 370494
loss: 1.0369783639907837,grad_norm: 0.9999992778229433, iteration: 370495
loss: 0.9951085448265076,grad_norm: 0.926974317834634, iteration: 370496
loss: 1.0154478549957275,grad_norm: 0.6658032597013899, iteration: 370497
loss: 0.9880322217941284,grad_norm: 0.7987547083634567, iteration: 370498
loss: 1.0140106678009033,grad_norm: 0.8625534829625714, iteration: 370499
loss: 1.0362114906311035,grad_norm: 0.999999126380041, iteration: 370500
loss: 1.0058006048202515,grad_norm: 0.8927834995594395, iteration: 370501
loss: 1.005967140197754,grad_norm: 0.6683361204056298, iteration: 370502
loss: 1.0728877782821655,grad_norm: 0.9999992767954753, iteration: 370503
loss: 1.089539647102356,grad_norm: 0.9999998614910509, iteration: 370504
loss: 1.0454375743865967,grad_norm: 0.9999997670850678, iteration: 370505
loss: 1.0375776290893555,grad_norm: 0.6861841657083295, iteration: 370506
loss: 0.9312400817871094,grad_norm: 0.9253245981141107, iteration: 370507
loss: 0.9599570631980896,grad_norm: 0.8789678920664876, iteration: 370508
loss: 0.9659767150878906,grad_norm: 0.7921083074020411, iteration: 370509
loss: 1.038561463356018,grad_norm: 0.9999991641958381, iteration: 370510
loss: 1.0132957696914673,grad_norm: 0.9180383444333103, iteration: 370511
loss: 0.9720733761787415,grad_norm: 0.919697242843269, iteration: 370512
loss: 1.0214065313339233,grad_norm: 0.959249571980537, iteration: 370513
loss: 1.063707709312439,grad_norm: 0.8021065325044738, iteration: 370514
loss: 1.0215122699737549,grad_norm: 0.9999993087864222, iteration: 370515
loss: 1.0161570310592651,grad_norm: 0.7399052800341055, iteration: 370516
loss: 0.9890701174736023,grad_norm: 0.7958883789652353, iteration: 370517
loss: 1.057450771331787,grad_norm: 0.9999997927305729, iteration: 370518
loss: 0.9794998168945312,grad_norm: 0.7498950954434129, iteration: 370519
loss: 1.0928642749786377,grad_norm: 0.7813187885876745, iteration: 370520
loss: 0.9681750535964966,grad_norm: 0.9052161847147541, iteration: 370521
loss: 1.0163798332214355,grad_norm: 0.8413129619482406, iteration: 370522
loss: 1.0246288776397705,grad_norm: 0.9523255504443175, iteration: 370523
loss: 1.0076102018356323,grad_norm: 0.8691185805348874, iteration: 370524
loss: 0.9842921495437622,grad_norm: 0.8525895777769786, iteration: 370525
loss: 1.0332001447677612,grad_norm: 0.999999755841466, iteration: 370526
loss: 0.9929409027099609,grad_norm: 0.9148019977753497, iteration: 370527
loss: 0.9905356168746948,grad_norm: 0.9040143486632073, iteration: 370528
loss: 1.0357915163040161,grad_norm: 0.9424855297460047, iteration: 370529
loss: 0.977900505065918,grad_norm: 0.9411539104257169, iteration: 370530
loss: 1.0242385864257812,grad_norm: 0.7625077567084966, iteration: 370531
loss: 0.990808367729187,grad_norm: 0.7525529077524914, iteration: 370532
loss: 0.9912075996398926,grad_norm: 0.6315334581729555, iteration: 370533
loss: 0.9923616051673889,grad_norm: 0.9999995671450992, iteration: 370534
loss: 1.065146565437317,grad_norm: 0.9999998334512029, iteration: 370535
loss: 1.0015493631362915,grad_norm: 0.99999910595937, iteration: 370536
loss: 1.0376509428024292,grad_norm: 0.999999720553911, iteration: 370537
loss: 1.0138068199157715,grad_norm: 0.7658447626397888, iteration: 370538
loss: 0.9557044506072998,grad_norm: 0.8295758740442787, iteration: 370539
loss: 1.014876127243042,grad_norm: 0.9999998194964184, iteration: 370540
loss: 0.981774628162384,grad_norm: 0.8288520825631346, iteration: 370541
loss: 1.0136213302612305,grad_norm: 0.737759522703232, iteration: 370542
loss: 1.0816470384597778,grad_norm: 0.9999998860136122, iteration: 370543
loss: 1.0453518629074097,grad_norm: 0.7828160510430452, iteration: 370544
loss: 1.0143057107925415,grad_norm: 0.9999996908481615, iteration: 370545
loss: 1.0151339769363403,grad_norm: 0.9999992946818479, iteration: 370546
loss: 1.0134061574935913,grad_norm: 0.9999990661040696, iteration: 370547
loss: 0.9873453378677368,grad_norm: 0.7043523451985807, iteration: 370548
loss: 0.9746739864349365,grad_norm: 0.775423630759761, iteration: 370549
loss: 0.9999687075614929,grad_norm: 0.999999845408326, iteration: 370550
loss: 1.0197235345840454,grad_norm: 0.9360593935310321, iteration: 370551
loss: 0.9959820508956909,grad_norm: 0.7450679550810093, iteration: 370552
loss: 1.0268892049789429,grad_norm: 0.9868128182513862, iteration: 370553
loss: 1.0336748361587524,grad_norm: 0.9162636424116674, iteration: 370554
loss: 0.9774216413497925,grad_norm: 0.8869764756699801, iteration: 370555
loss: 1.0266906023025513,grad_norm: 0.8024331850930433, iteration: 370556
loss: 0.9975539445877075,grad_norm: 0.8626955721236237, iteration: 370557
loss: 0.9770559072494507,grad_norm: 0.7995547663344139, iteration: 370558
loss: 1.0382753610610962,grad_norm: 0.9590292445441176, iteration: 370559
loss: 1.0370533466339111,grad_norm: 0.8561285607998588, iteration: 370560
loss: 1.0682951211929321,grad_norm: 0.8700112557914238, iteration: 370561
loss: 1.0190260410308838,grad_norm: 0.8810801233202487, iteration: 370562
loss: 1.1164597272872925,grad_norm: 0.9999997443022557, iteration: 370563
loss: 0.9964926838874817,grad_norm: 0.786935654419521, iteration: 370564
loss: 0.9836298227310181,grad_norm: 0.7635618103984589, iteration: 370565
loss: 1.019290804862976,grad_norm: 0.9999995344405676, iteration: 370566
loss: 0.987633228302002,grad_norm: 0.7724144117366952, iteration: 370567
loss: 0.968384325504303,grad_norm: 0.9999990790260879, iteration: 370568
loss: 1.0115139484405518,grad_norm: 0.8952004918557815, iteration: 370569
loss: 1.0035382509231567,grad_norm: 0.999999235426202, iteration: 370570
loss: 1.0140630006790161,grad_norm: 0.7229616723326481, iteration: 370571
loss: 0.986291766166687,grad_norm: 0.999999162102188, iteration: 370572
loss: 1.0262150764465332,grad_norm: 0.7987676218625241, iteration: 370573
loss: 0.9666520953178406,grad_norm: 0.9180840614876044, iteration: 370574
loss: 0.9524729251861572,grad_norm: 0.8071793262950901, iteration: 370575
loss: 0.9946058392524719,grad_norm: 0.9999996688898467, iteration: 370576
loss: 1.0571742057800293,grad_norm: 0.9106960086038464, iteration: 370577
loss: 1.0017967224121094,grad_norm: 0.7527384439216701, iteration: 370578
loss: 0.9895815849304199,grad_norm: 0.6873706941510292, iteration: 370579
loss: 1.0262166261672974,grad_norm: 0.9999993399719134, iteration: 370580
loss: 1.03653085231781,grad_norm: 0.9942781230421746, iteration: 370581
loss: 1.0039269924163818,grad_norm: 0.8429589753798489, iteration: 370582
loss: 0.997347891330719,grad_norm: 0.7655393666229023, iteration: 370583
loss: 1.0045992136001587,grad_norm: 0.713486919150593, iteration: 370584
loss: 0.9708017706871033,grad_norm: 0.729141502758575, iteration: 370585
loss: 1.007095217704773,grad_norm: 0.7904982782086332, iteration: 370586
loss: 0.9886389374732971,grad_norm: 0.8420282241183235, iteration: 370587
loss: 0.9792051911354065,grad_norm: 0.9241592066689747, iteration: 370588
loss: 0.9674140810966492,grad_norm: 0.9008914814643827, iteration: 370589
loss: 1.008386492729187,grad_norm: 0.8577725985517257, iteration: 370590
loss: 0.9818649888038635,grad_norm: 0.8816627536265769, iteration: 370591
loss: 0.9866982698440552,grad_norm: 0.7583510700873651, iteration: 370592
loss: 1.016981601715088,grad_norm: 0.7731581980039008, iteration: 370593
loss: 0.9460893273353577,grad_norm: 0.7629562020926355, iteration: 370594
loss: 1.0810710191726685,grad_norm: 0.9999998372321387, iteration: 370595
loss: 1.0069905519485474,grad_norm: 0.7807682161055836, iteration: 370596
loss: 1.058823823928833,grad_norm: 0.9999992725000723, iteration: 370597
loss: 1.0270469188690186,grad_norm: 0.7502411969455195, iteration: 370598
loss: 1.011980652809143,grad_norm: 0.7467606945449282, iteration: 370599
loss: 0.9605121612548828,grad_norm: 0.8295764460202656, iteration: 370600
loss: 1.0046932697296143,grad_norm: 0.8222149660982117, iteration: 370601
loss: 0.9958898425102234,grad_norm: 0.8867481485755957, iteration: 370602
loss: 1.0153876543045044,grad_norm: 0.900422467552067, iteration: 370603
loss: 1.0112602710723877,grad_norm: 0.9999989980570856, iteration: 370604
loss: 1.0169894695281982,grad_norm: 0.8945505577701983, iteration: 370605
loss: 0.9951781630516052,grad_norm: 0.8219318512912354, iteration: 370606
loss: 0.9935302734375,grad_norm: 0.7630030165722157, iteration: 370607
loss: 0.9818606376647949,grad_norm: 0.7540707136177912, iteration: 370608
loss: 1.09712553024292,grad_norm: 0.9999990454210071, iteration: 370609
loss: 1.113700270652771,grad_norm: 0.9999995488452512, iteration: 370610
loss: 0.98714679479599,grad_norm: 0.6934529792537795, iteration: 370611
loss: 0.9967442750930786,grad_norm: 0.8510892757529688, iteration: 370612
loss: 0.9914557933807373,grad_norm: 0.7359195026958101, iteration: 370613
loss: 0.9983782768249512,grad_norm: 0.8383272240770432, iteration: 370614
loss: 0.9998971819877625,grad_norm: 0.9262339808885065, iteration: 370615
loss: 1.013043999671936,grad_norm: 0.823283447382659, iteration: 370616
loss: 1.002217173576355,grad_norm: 0.7218020583145861, iteration: 370617
loss: 1.004418969154358,grad_norm: 0.9448318703012235, iteration: 370618
loss: 1.0264666080474854,grad_norm: 0.8320921980470964, iteration: 370619
loss: 1.0042479038238525,grad_norm: 0.8990189079853086, iteration: 370620
loss: 1.0132025480270386,grad_norm: 0.718380754096068, iteration: 370621
loss: 1.034212350845337,grad_norm: 0.6749955253135395, iteration: 370622
loss: 0.9771462082862854,grad_norm: 0.8311473941888745, iteration: 370623
loss: 0.9733951687812805,grad_norm: 0.7530880465530136, iteration: 370624
loss: 1.0434179306030273,grad_norm: 0.7241715876033779, iteration: 370625
loss: 1.015807032585144,grad_norm: 0.7774507260230038, iteration: 370626
loss: 0.9603642821311951,grad_norm: 0.7848308409571788, iteration: 370627
loss: 0.982087254524231,grad_norm: 0.7459871647702058, iteration: 370628
loss: 0.9786046743392944,grad_norm: 0.7540731435883629, iteration: 370629
loss: 0.976611852645874,grad_norm: 0.999999408679512, iteration: 370630
loss: 1.0130767822265625,grad_norm: 0.7665074311354728, iteration: 370631
loss: 1.0084456205368042,grad_norm: 0.7338111432809394, iteration: 370632
loss: 0.9855907559394836,grad_norm: 0.8411498686768673, iteration: 370633
loss: 1.0170797109603882,grad_norm: 0.9120872799470919, iteration: 370634
loss: 1.0074807405471802,grad_norm: 0.7750299755163168, iteration: 370635
loss: 0.9509390592575073,grad_norm: 0.8039788456428337, iteration: 370636
loss: 1.0236530303955078,grad_norm: 0.7204284663810939, iteration: 370637
loss: 0.9773983359336853,grad_norm: 0.8954460466912839, iteration: 370638
loss: 1.0055421590805054,grad_norm: 0.8529395292113815, iteration: 370639
loss: 0.9810307025909424,grad_norm: 0.8958703073811937, iteration: 370640
loss: 0.9495334625244141,grad_norm: 0.7835601461350159, iteration: 370641
loss: 0.9933087229728699,grad_norm: 0.7707448011841662, iteration: 370642
loss: 1.0565999746322632,grad_norm: 0.9999990523846766, iteration: 370643
loss: 0.9856838583946228,grad_norm: 0.8112674604817232, iteration: 370644
loss: 1.0142438411712646,grad_norm: 0.9402144592854221, iteration: 370645
loss: 0.9895662069320679,grad_norm: 0.708660252789895, iteration: 370646
loss: 1.0140384435653687,grad_norm: 0.8039882962136745, iteration: 370647
loss: 0.980033278465271,grad_norm: 0.776231527373586, iteration: 370648
loss: 0.9993911385536194,grad_norm: 0.7836389679982726, iteration: 370649
loss: 0.9879545569419861,grad_norm: 0.9833099612134951, iteration: 370650
loss: 1.0253820419311523,grad_norm: 0.8840940345917195, iteration: 370651
loss: 0.9361169338226318,grad_norm: 0.7982947153147039, iteration: 370652
loss: 1.0229287147521973,grad_norm: 0.77468263549556, iteration: 370653
loss: 1.0299439430236816,grad_norm: 0.774000675682702, iteration: 370654
loss: 1.0347990989685059,grad_norm: 0.7407155316639239, iteration: 370655
loss: 0.9859642386436462,grad_norm: 0.7473836664507336, iteration: 370656
loss: 0.9934988021850586,grad_norm: 0.753917460131147, iteration: 370657
loss: 1.019407868385315,grad_norm: 0.836697496993536, iteration: 370658
loss: 1.0071163177490234,grad_norm: 0.6994292294413413, iteration: 370659
loss: 0.9741790890693665,grad_norm: 0.8357149219573182, iteration: 370660
loss: 1.0155045986175537,grad_norm: 0.8857771452849978, iteration: 370661
loss: 1.0256283283233643,grad_norm: 0.889383816467537, iteration: 370662
loss: 1.0300726890563965,grad_norm: 0.7885467661668052, iteration: 370663
loss: 1.0041273832321167,grad_norm: 0.8148031397596784, iteration: 370664
loss: 1.0010138750076294,grad_norm: 0.8132980450652793, iteration: 370665
loss: 1.0353753566741943,grad_norm: 0.7990344128479051, iteration: 370666
loss: 1.0132617950439453,grad_norm: 0.9040190130073421, iteration: 370667
loss: 0.9963073134422302,grad_norm: 0.6926044515258193, iteration: 370668
loss: 0.981515645980835,grad_norm: 0.9009293217583685, iteration: 370669
loss: 0.9474392533302307,grad_norm: 0.884019220602692, iteration: 370670
loss: 1.0131434202194214,grad_norm: 0.9257320379587859, iteration: 370671
loss: 1.00693941116333,grad_norm: 0.7913286866952406, iteration: 370672
loss: 0.9945700764656067,grad_norm: 0.9921567171679814, iteration: 370673
loss: 1.04621160030365,grad_norm: 0.9192092159096265, iteration: 370674
loss: 0.9905300736427307,grad_norm: 0.8589065553754383, iteration: 370675
loss: 1.0217604637145996,grad_norm: 0.9999999977250683, iteration: 370676
loss: 1.011427402496338,grad_norm: 0.9999997523578446, iteration: 370677
loss: 1.019940972328186,grad_norm: 0.7937191099051937, iteration: 370678
loss: 1.0219846963882446,grad_norm: 0.999999878801064, iteration: 370679
loss: 0.9920189380645752,grad_norm: 0.7402577486430202, iteration: 370680
loss: 1.0035151243209839,grad_norm: 0.9158692833708433, iteration: 370681
loss: 0.9912187457084656,grad_norm: 0.7369814582409153, iteration: 370682
loss: 0.9979442954063416,grad_norm: 0.9567892852732692, iteration: 370683
loss: 1.0166895389556885,grad_norm: 0.9515565212115744, iteration: 370684
loss: 1.0205552577972412,grad_norm: 0.7302722374504615, iteration: 370685
loss: 1.0123955011367798,grad_norm: 0.8058700699854874, iteration: 370686
loss: 1.0121113061904907,grad_norm: 0.9390623013735819, iteration: 370687
loss: 1.0267177820205688,grad_norm: 0.8029490851097922, iteration: 370688
loss: 1.0087937116622925,grad_norm: 0.7786611925402164, iteration: 370689
loss: 1.0051497220993042,grad_norm: 0.7783783844915803, iteration: 370690
loss: 1.1017237901687622,grad_norm: 0.9999990591636285, iteration: 370691
loss: 1.0205540657043457,grad_norm: 0.733465187219159, iteration: 370692
loss: 0.9864866733551025,grad_norm: 0.7378218476251865, iteration: 370693
loss: 1.000434160232544,grad_norm: 0.9094044965875517, iteration: 370694
loss: 1.0085604190826416,grad_norm: 0.7887117804395022, iteration: 370695
loss: 0.9796737432479858,grad_norm: 0.8310112601941708, iteration: 370696
loss: 1.0044292211532593,grad_norm: 0.9931556407191305, iteration: 370697
loss: 1.0168601274490356,grad_norm: 0.8347205387434814, iteration: 370698
loss: 0.9938138723373413,grad_norm: 0.7551766343308701, iteration: 370699
loss: 0.9965370893478394,grad_norm: 0.8054662043620102, iteration: 370700
loss: 1.01856529712677,grad_norm: 0.9344967402374805, iteration: 370701
loss: 0.9796797037124634,grad_norm: 0.8442096568199979, iteration: 370702
loss: 1.01932692527771,grad_norm: 0.8337477035078035, iteration: 370703
loss: 1.04934561252594,grad_norm: 0.7097114760172891, iteration: 370704
loss: 0.9865595102310181,grad_norm: 0.927458025978869, iteration: 370705
loss: 0.996518075466156,grad_norm: 0.8245130657360746, iteration: 370706
loss: 0.9891930222511292,grad_norm: 0.6821877977609493, iteration: 370707
loss: 0.9921122193336487,grad_norm: 0.9999990328746202, iteration: 370708
loss: 0.9631515145301819,grad_norm: 0.7934535970117005, iteration: 370709
loss: 0.9747492671012878,grad_norm: 0.8400505783058081, iteration: 370710
loss: 0.9660295844078064,grad_norm: 0.6709556316507762, iteration: 370711
loss: 0.9943188428878784,grad_norm: 0.843249871912581, iteration: 370712
loss: 0.9994653463363647,grad_norm: 0.999998902237158, iteration: 370713
loss: 0.9797306060791016,grad_norm: 0.7538066053501702, iteration: 370714
loss: 0.9949454069137573,grad_norm: 0.9999991535292296, iteration: 370715
loss: 0.9705554246902466,grad_norm: 0.9999991289813943, iteration: 370716
loss: 1.0229063034057617,grad_norm: 1.0000000221417444, iteration: 370717
loss: 1.1023272275924683,grad_norm: 0.9999993310047122, iteration: 370718
loss: 0.9883336424827576,grad_norm: 0.9819183815328895, iteration: 370719
loss: 1.004231333732605,grad_norm: 0.8038361143573302, iteration: 370720
loss: 1.0044809579849243,grad_norm: 0.7684917073108029, iteration: 370721
loss: 1.0436824560165405,grad_norm: 0.8053570647909165, iteration: 370722
loss: 1.0121580362319946,grad_norm: 0.9999992619038105, iteration: 370723
loss: 1.0079054832458496,grad_norm: 0.800641299371893, iteration: 370724
loss: 1.0089246034622192,grad_norm: 0.9214225057185043, iteration: 370725
loss: 0.9607522487640381,grad_norm: 0.8253864700579168, iteration: 370726
loss: 1.0383788347244263,grad_norm: 0.8013217789628434, iteration: 370727
loss: 1.0159398317337036,grad_norm: 0.6456671818627506, iteration: 370728
loss: 0.9874595999717712,grad_norm: 0.999999005672227, iteration: 370729
loss: 0.9952223300933838,grad_norm: 0.8100682486179255, iteration: 370730
loss: 1.0437926054000854,grad_norm: 0.9999992725977658, iteration: 370731
loss: 1.0309637784957886,grad_norm: 0.8255015104769353, iteration: 370732
loss: 1.001417636871338,grad_norm: 0.9999990917840422, iteration: 370733
loss: 1.1569443941116333,grad_norm: 0.999999359967894, iteration: 370734
loss: 1.0555344820022583,grad_norm: 0.8295890870117706, iteration: 370735
loss: 0.9465957880020142,grad_norm: 0.7251086092136095, iteration: 370736
loss: 1.00397527217865,grad_norm: 0.7312016970845852, iteration: 370737
loss: 1.2744234800338745,grad_norm: 0.9999995449058061, iteration: 370738
loss: 0.9844027757644653,grad_norm: 0.9213150590369522, iteration: 370739
loss: 1.1143244504928589,grad_norm: 0.8224037988082777, iteration: 370740
loss: 0.9962809085845947,grad_norm: 0.9663533813093209, iteration: 370741
loss: 0.9949503540992737,grad_norm: 0.8741475759071909, iteration: 370742
loss: 1.0127660036087036,grad_norm: 0.810728128472225, iteration: 370743
loss: 1.0001213550567627,grad_norm: 0.8075999683486035, iteration: 370744
loss: 0.9946461319923401,grad_norm: 0.7940459187990629, iteration: 370745
loss: 1.0054404735565186,grad_norm: 0.8561926422502795, iteration: 370746
loss: 1.0272557735443115,grad_norm: 0.9999999912620509, iteration: 370747
loss: 0.9957332015037537,grad_norm: 0.8921851653419586, iteration: 370748
loss: 0.9933404326438904,grad_norm: 0.9999991576998727, iteration: 370749
loss: 1.114271879196167,grad_norm: 0.9999990272568655, iteration: 370750
loss: 0.9918599724769592,grad_norm: 0.9999997009840165, iteration: 370751
loss: 1.0111249685287476,grad_norm: 0.7858165553869385, iteration: 370752
loss: 0.9946589469909668,grad_norm: 0.8103550885548113, iteration: 370753
loss: 0.9946298003196716,grad_norm: 0.8761779893906153, iteration: 370754
loss: 1.0032415390014648,grad_norm: 0.8107270501013957, iteration: 370755
loss: 1.0018539428710938,grad_norm: 0.7863071659998043, iteration: 370756
loss: 1.1165568828582764,grad_norm: 0.9999997066303192, iteration: 370757
loss: 0.9385643005371094,grad_norm: 0.9999992755075364, iteration: 370758
loss: 0.9770933389663696,grad_norm: 0.6661429266891905, iteration: 370759
loss: 1.0215321779251099,grad_norm: 0.8370312754725974, iteration: 370760
loss: 0.9851781129837036,grad_norm: 0.8352221471767138, iteration: 370761
loss: 0.9795567393302917,grad_norm: 0.8763419961153502, iteration: 370762
loss: 1.015623688697815,grad_norm: 0.9999990420765333, iteration: 370763
loss: 1.0091876983642578,grad_norm: 0.9006768704975926, iteration: 370764
loss: 1.020262360572815,grad_norm: 0.9999998791776639, iteration: 370765
loss: 1.0240167379379272,grad_norm: 0.9035667032812658, iteration: 370766
loss: 1.009285807609558,grad_norm: 0.7527724801722768, iteration: 370767
loss: 1.0762898921966553,grad_norm: 1.0000000008138343, iteration: 370768
loss: 0.9879847764968872,grad_norm: 0.7992081731962722, iteration: 370769
loss: 0.9597095251083374,grad_norm: 0.719259979252806, iteration: 370770
loss: 0.9985416531562805,grad_norm: 0.8468015433274204, iteration: 370771
loss: 1.0078984498977661,grad_norm: 0.9487115081619641, iteration: 370772
loss: 1.0341100692749023,grad_norm: 0.9999995511505764, iteration: 370773
loss: 0.9973670244216919,grad_norm: 0.8762821654257453, iteration: 370774
loss: 1.1909663677215576,grad_norm: 0.9999999406006826, iteration: 370775
loss: 1.0537599325180054,grad_norm: 0.999999268274357, iteration: 370776
loss: 1.005746841430664,grad_norm: 0.9999996154223602, iteration: 370777
loss: 1.0059092044830322,grad_norm: 0.8999778156386921, iteration: 370778
loss: 1.1104849576950073,grad_norm: 0.9999990019608187, iteration: 370779
loss: 1.0118571519851685,grad_norm: 0.8939659567893788, iteration: 370780
loss: 1.0512551069259644,grad_norm: 0.999999427090793, iteration: 370781
loss: 1.0048445463180542,grad_norm: 0.9999990489775556, iteration: 370782
loss: 1.0023927688598633,grad_norm: 0.7357753777285715, iteration: 370783
loss: 1.0057307481765747,grad_norm: 0.960759133608254, iteration: 370784
loss: 1.0384855270385742,grad_norm: 0.9999994795562411, iteration: 370785
loss: 0.9965830445289612,grad_norm: 0.8433259995418948, iteration: 370786
loss: 0.9844599366188049,grad_norm: 0.857054901552865, iteration: 370787
loss: 0.9676589965820312,grad_norm: 0.8584242912863568, iteration: 370788
loss: 1.001068353652954,grad_norm: 0.7958812433402283, iteration: 370789
loss: 1.0653330087661743,grad_norm: 0.9277961184036881, iteration: 370790
loss: 1.0131802558898926,grad_norm: 0.8943103744729437, iteration: 370791
loss: 1.0147123336791992,grad_norm: 0.7666478663025771, iteration: 370792
loss: 1.068918228149414,grad_norm: 0.8428324627653226, iteration: 370793
loss: 1.0321738719940186,grad_norm: 0.9999992836059599, iteration: 370794
loss: 1.0021165609359741,grad_norm: 0.8711237828105273, iteration: 370795
loss: 0.9867566823959351,grad_norm: 0.9072211287386893, iteration: 370796
loss: 0.9866136908531189,grad_norm: 0.662102241928212, iteration: 370797
loss: 1.0351471900939941,grad_norm: 0.9999999713948899, iteration: 370798
loss: 1.0001170635223389,grad_norm: 0.8228882445528025, iteration: 370799
loss: 1.0919849872589111,grad_norm: 0.9999997614425336, iteration: 370800
loss: 1.0139650106430054,grad_norm: 0.7986760562342617, iteration: 370801
loss: 0.9757209420204163,grad_norm: 0.7438214821064115, iteration: 370802
loss: 0.9888554215431213,grad_norm: 0.8866974414716153, iteration: 370803
loss: 1.0247749090194702,grad_norm: 0.7303290698851224, iteration: 370804
loss: 0.9870274066925049,grad_norm: 0.8227248027876929, iteration: 370805
loss: 1.0080689191818237,grad_norm: 0.8679170289893026, iteration: 370806
loss: 0.9776327610015869,grad_norm: 0.7876149715766294, iteration: 370807
loss: 1.0031425952911377,grad_norm: 0.7350046876554679, iteration: 370808
loss: 1.0167207717895508,grad_norm: 0.8537168090429053, iteration: 370809
loss: 1.0499906539916992,grad_norm: 0.9999998580813938, iteration: 370810
loss: 1.0241761207580566,grad_norm: 0.8234711549947594, iteration: 370811
loss: 0.9763870239257812,grad_norm: 0.8692180748386055, iteration: 370812
loss: 1.006986141204834,grad_norm: 0.9999996956982363, iteration: 370813
loss: 1.018455147743225,grad_norm: 0.7829342848247035, iteration: 370814
loss: 1.0086110830307007,grad_norm: 0.9999996123447239, iteration: 370815
loss: 1.0233418941497803,grad_norm: 0.9612040809367108, iteration: 370816
loss: 0.9785836935043335,grad_norm: 0.7736993194884487, iteration: 370817
loss: 0.9940398931503296,grad_norm: 0.8457727319758762, iteration: 370818
loss: 1.0254299640655518,grad_norm: 0.8697320534317403, iteration: 370819
loss: 0.9918134808540344,grad_norm: 0.8521096308526854, iteration: 370820
loss: 0.9957258105278015,grad_norm: 0.8406954727896517, iteration: 370821
loss: 0.9738019704818726,grad_norm: 0.9999991329415917, iteration: 370822
loss: 0.9881898760795593,grad_norm: 0.7310277979430695, iteration: 370823
loss: 1.0217474699020386,grad_norm: 0.6459616798910747, iteration: 370824
loss: 1.0447275638580322,grad_norm: 0.9289886967261628, iteration: 370825
loss: 1.0189859867095947,grad_norm: 0.9403966391938638, iteration: 370826
loss: 1.087717056274414,grad_norm: 0.8117895911922896, iteration: 370827
loss: 1.0903000831604004,grad_norm: 0.9999992953754012, iteration: 370828
loss: 0.9534189701080322,grad_norm: 0.7081153005858415, iteration: 370829
loss: 1.0526114702224731,grad_norm: 0.9764781818010404, iteration: 370830
loss: 1.0165938138961792,grad_norm: 0.999999210308698, iteration: 370831
loss: 1.027064561843872,grad_norm: 0.9999992017795688, iteration: 370832
loss: 1.003395676612854,grad_norm: 0.830717832884397, iteration: 370833
loss: 1.029074788093567,grad_norm: 0.8312235565611555, iteration: 370834
loss: 1.0410016775131226,grad_norm: 0.8636849217564261, iteration: 370835
loss: 0.9991313219070435,grad_norm: 0.8049844223667357, iteration: 370836
loss: 1.0145914554595947,grad_norm: 0.9276534417019893, iteration: 370837
loss: 0.9792566895484924,grad_norm: 0.7544855544928821, iteration: 370838
loss: 0.9817389249801636,grad_norm: 0.9426519316024072, iteration: 370839
loss: 1.003957748413086,grad_norm: 0.8167014198196156, iteration: 370840
loss: 0.9909451007843018,grad_norm: 0.913016405352297, iteration: 370841
loss: 1.0142110586166382,grad_norm: 0.7615031551557476, iteration: 370842
loss: 1.009026050567627,grad_norm: 0.6767605875442839, iteration: 370843
loss: 1.0023049116134644,grad_norm: 0.9999991220732118, iteration: 370844
loss: 1.006888747215271,grad_norm: 0.7638912293775935, iteration: 370845
loss: 1.0715166330337524,grad_norm: 0.7945623690744361, iteration: 370846
loss: 1.0347672700881958,grad_norm: 0.8720812733154805, iteration: 370847
loss: 1.0488687753677368,grad_norm: 0.9520221039114034, iteration: 370848
loss: 1.0084457397460938,grad_norm: 0.8613242768954205, iteration: 370849
loss: 1.0844672918319702,grad_norm: 0.999999596125113, iteration: 370850
loss: 0.9797325730323792,grad_norm: 0.6774702857053909, iteration: 370851
loss: 1.0309444665908813,grad_norm: 0.8092103502777689, iteration: 370852
loss: 1.000207781791687,grad_norm: 0.7212652741171907, iteration: 370853
loss: 1.1193174123764038,grad_norm: 0.8398369347777198, iteration: 370854
loss: 1.081192135810852,grad_norm: 0.9999991225842967, iteration: 370855
loss: 0.9880056381225586,grad_norm: 0.7384345890845934, iteration: 370856
loss: 0.9833905696868896,grad_norm: 0.8393732665510538, iteration: 370857
loss: 1.0032480955123901,grad_norm: 0.9953370046343936, iteration: 370858
loss: 0.9684895873069763,grad_norm: 0.8036185619044105, iteration: 370859
loss: 1.0179907083511353,grad_norm: 0.7961750276570758, iteration: 370860
loss: 0.9625172019004822,grad_norm: 0.8094564845339947, iteration: 370861
loss: 0.9607843160629272,grad_norm: 0.9099391540510983, iteration: 370862
loss: 0.9746132493019104,grad_norm: 0.9886097926951958, iteration: 370863
loss: 1.0671082735061646,grad_norm: 0.9999998458343278, iteration: 370864
loss: 1.0021294355392456,grad_norm: 0.6340308533719836, iteration: 370865
loss: 0.9852264523506165,grad_norm: 0.7205084867634325, iteration: 370866
loss: 0.9860525131225586,grad_norm: 0.9999990155501596, iteration: 370867
loss: 0.9904942512512207,grad_norm: 0.7675902448916448, iteration: 370868
loss: 1.0253461599349976,grad_norm: 0.7266397593430054, iteration: 370869
loss: 0.9970198273658752,grad_norm: 0.8409920953620642, iteration: 370870
loss: 1.053530216217041,grad_norm: 0.9999997919005785, iteration: 370871
loss: 1.0161961317062378,grad_norm: 0.9751310830188343, iteration: 370872
loss: 1.0238605737686157,grad_norm: 0.7384756367609832, iteration: 370873
loss: 0.9725804328918457,grad_norm: 0.8939051434130509, iteration: 370874
loss: 1.015727162361145,grad_norm: 0.8239439797494287, iteration: 370875
loss: 1.0344222784042358,grad_norm: 0.8429326223145113, iteration: 370876
loss: 0.9817640781402588,grad_norm: 0.8005653081656866, iteration: 370877
loss: 0.9848861694335938,grad_norm: 0.7615316482525015, iteration: 370878
loss: 1.045969009399414,grad_norm: 0.9488562282946873, iteration: 370879
loss: 1.089482307434082,grad_norm: 0.9999996311062377, iteration: 370880
loss: 0.9891117811203003,grad_norm: 0.7692572614788403, iteration: 370881
loss: 1.0220409631729126,grad_norm: 0.8571604269051744, iteration: 370882
loss: 0.9768016934394836,grad_norm: 0.8066284081916556, iteration: 370883
loss: 0.9923774003982544,grad_norm: 0.7195117032801941, iteration: 370884
loss: 1.0223222970962524,grad_norm: 0.7777708293840325, iteration: 370885
loss: 1.0249011516571045,grad_norm: 0.7617142270580848, iteration: 370886
loss: 1.0012907981872559,grad_norm: 0.9999991560614563, iteration: 370887
loss: 1.0177805423736572,grad_norm: 0.6527221854966677, iteration: 370888
loss: 1.0115978717803955,grad_norm: 0.7784823806614044, iteration: 370889
loss: 1.0279539823532104,grad_norm: 0.8063477784764934, iteration: 370890
loss: 1.035134196281433,grad_norm: 0.8997315533020132, iteration: 370891
loss: 0.9987478852272034,grad_norm: 0.838412849416633, iteration: 370892
loss: 1.0290347337722778,grad_norm: 0.6957696730748619, iteration: 370893
loss: 1.0110105276107788,grad_norm: 0.883618857429023, iteration: 370894
loss: 1.0221582651138306,grad_norm: 0.8956783486699728, iteration: 370895
loss: 1.0299063920974731,grad_norm: 0.8804851667272725, iteration: 370896
loss: 1.020325779914856,grad_norm: 0.7782073318461681, iteration: 370897
loss: 1.0879576206207275,grad_norm: 0.7186543063185085, iteration: 370898
loss: 0.9779760837554932,grad_norm: 0.7872934096696993, iteration: 370899
loss: 1.0095610618591309,grad_norm: 0.8706227835780884, iteration: 370900
loss: 1.0341675281524658,grad_norm: 0.8521114840500146, iteration: 370901
loss: 0.9875378012657166,grad_norm: 0.9999991745663673, iteration: 370902
loss: 0.9846856594085693,grad_norm: 0.7530191005746066, iteration: 370903
loss: 0.9962252378463745,grad_norm: 0.8091220925552234, iteration: 370904
loss: 1.0118573904037476,grad_norm: 0.999999047012267, iteration: 370905
loss: 1.0000462532043457,grad_norm: 0.9999993454028588, iteration: 370906
loss: 0.9906303286552429,grad_norm: 0.7488323006397964, iteration: 370907
loss: 0.9873215556144714,grad_norm: 0.8804235891450946, iteration: 370908
loss: 0.9671496748924255,grad_norm: 0.72742688937837, iteration: 370909
loss: 1.0138766765594482,grad_norm: 0.7037714523128535, iteration: 370910
loss: 1.008288860321045,grad_norm: 0.9999991939187125, iteration: 370911
loss: 1.000484824180603,grad_norm: 0.7789133863765713, iteration: 370912
loss: 1.015580415725708,grad_norm: 0.8176711520607634, iteration: 370913
loss: 0.990994393825531,grad_norm: 0.7443186194687629, iteration: 370914
loss: 1.0165369510650635,grad_norm: 0.7018527735057029, iteration: 370915
loss: 1.123421311378479,grad_norm: 0.9999997721520582, iteration: 370916
loss: 1.0089269876480103,grad_norm: 0.850582406745098, iteration: 370917
loss: 0.9860571026802063,grad_norm: 0.7842458194728833, iteration: 370918
loss: 1.0731695890426636,grad_norm: 0.7887485229973704, iteration: 370919
loss: 0.993787407875061,grad_norm: 0.6948797355661228, iteration: 370920
loss: 0.9841506481170654,grad_norm: 0.8939575606907606, iteration: 370921
loss: 1.0066983699798584,grad_norm: 0.9920712568881374, iteration: 370922
loss: 0.9900317192077637,grad_norm: 0.8376524245581157, iteration: 370923
loss: 1.0721051692962646,grad_norm: 0.7877795278617595, iteration: 370924
loss: 0.9859403967857361,grad_norm: 0.869122410595423, iteration: 370925
loss: 0.9857001304626465,grad_norm: 0.9999995911424147, iteration: 370926
loss: 0.9604201912879944,grad_norm: 0.7776872657164706, iteration: 370927
loss: 0.9797453284263611,grad_norm: 0.9999999869782344, iteration: 370928
loss: 1.1192331314086914,grad_norm: 0.9999992381617833, iteration: 370929
loss: 1.015641212463379,grad_norm: 0.9999990397707281, iteration: 370930
loss: 1.0827161073684692,grad_norm: 0.9999991338192992, iteration: 370931
loss: 1.0028387308120728,grad_norm: 0.6975514711770634, iteration: 370932
loss: 1.0687953233718872,grad_norm: 0.9999993170556385, iteration: 370933
loss: 1.0040498971939087,grad_norm: 0.9370109576077302, iteration: 370934
loss: 1.004402995109558,grad_norm: 0.783556454055347, iteration: 370935
loss: 0.9671439528465271,grad_norm: 0.792724897879153, iteration: 370936
loss: 1.003010630607605,grad_norm: 0.8904541755618411, iteration: 370937
loss: 1.0018019676208496,grad_norm: 0.9709452432796807, iteration: 370938
loss: 1.0012364387512207,grad_norm: 0.7059274143568459, iteration: 370939
loss: 1.0137943029403687,grad_norm: 0.7308236363314822, iteration: 370940
loss: 0.9572205543518066,grad_norm: 0.8093654810767269, iteration: 370941
loss: 1.0114405155181885,grad_norm: 0.7183861842566661, iteration: 370942
loss: 1.0102299451828003,grad_norm: 0.9999995478678695, iteration: 370943
loss: 1.028111457824707,grad_norm: 0.8482988917780179, iteration: 370944
loss: 0.9924501180648804,grad_norm: 0.7474066459287192, iteration: 370945
loss: 0.9984592795372009,grad_norm: 0.8129309223379692, iteration: 370946
loss: 0.9798762202262878,grad_norm: 0.6789804567579015, iteration: 370947
loss: 1.0071072578430176,grad_norm: 0.6690138170616451, iteration: 370948
loss: 0.9665435552597046,grad_norm: 0.794926162707047, iteration: 370949
loss: 0.9836633205413818,grad_norm: 0.999999389933816, iteration: 370950
loss: 0.9999127388000488,grad_norm: 0.7951539315183382, iteration: 370951
loss: 0.9916231036186218,grad_norm: 0.904754214466101, iteration: 370952
loss: 1.039691686630249,grad_norm: 0.8615771045630889, iteration: 370953
loss: 1.0289634466171265,grad_norm: 0.9223825567298971, iteration: 370954
loss: 1.0566762685775757,grad_norm: 0.9999992948041235, iteration: 370955
loss: 1.0115563869476318,grad_norm: 0.9999991272271963, iteration: 370956
loss: 1.015292763710022,grad_norm: 0.7952837327857885, iteration: 370957
loss: 0.9884176254272461,grad_norm: 0.7687993376667329, iteration: 370958
loss: 0.9911884665489197,grad_norm: 0.8864754865464435, iteration: 370959
loss: 0.9950047731399536,grad_norm: 0.9999990776300604, iteration: 370960
loss: 1.0060222148895264,grad_norm: 0.7800909444084102, iteration: 370961
loss: 0.9747695922851562,grad_norm: 0.8414513877720894, iteration: 370962
loss: 1.0674526691436768,grad_norm: 0.7840257344254331, iteration: 370963
loss: 1.0257844924926758,grad_norm: 0.8000159970525711, iteration: 370964
loss: 0.99224853515625,grad_norm: 0.8689966025520706, iteration: 370965
loss: 0.9872141480445862,grad_norm: 0.8475890178032272, iteration: 370966
loss: 1.0447494983673096,grad_norm: 0.8143085939288711, iteration: 370967
loss: 1.002509355545044,grad_norm: 0.7877058177963869, iteration: 370968
loss: 1.0112855434417725,grad_norm: 0.9999991497759296, iteration: 370969
loss: 1.0006145238876343,grad_norm: 0.8210625753976886, iteration: 370970
loss: 0.9584446549415588,grad_norm: 0.8165637823324283, iteration: 370971
loss: 1.0012565851211548,grad_norm: 0.9293132933712402, iteration: 370972
loss: 0.9564099311828613,grad_norm: 0.99999912944126, iteration: 370973
loss: 0.9823358654975891,grad_norm: 0.7881787847218367, iteration: 370974
loss: 0.9563367366790771,grad_norm: 0.7421820209518517, iteration: 370975
loss: 0.9921624660491943,grad_norm: 0.7715609556709331, iteration: 370976
loss: 1.0282503366470337,grad_norm: 0.999999189044662, iteration: 370977
loss: 1.0007184743881226,grad_norm: 0.9999990922766232, iteration: 370978
loss: 1.0013794898986816,grad_norm: 0.7361455154555417, iteration: 370979
loss: 0.991662323474884,grad_norm: 0.9821643696385576, iteration: 370980
loss: 1.0245184898376465,grad_norm: 0.9195159565138814, iteration: 370981
loss: 0.9966204762458801,grad_norm: 0.7943278851194231, iteration: 370982
loss: 1.0275448560714722,grad_norm: 0.6206217266713429, iteration: 370983
loss: 1.0002784729003906,grad_norm: 0.7621319807589443, iteration: 370984
loss: 1.0298881530761719,grad_norm: 0.850014112801828, iteration: 370985
loss: 0.9985559582710266,grad_norm: 0.6988625912286592, iteration: 370986
loss: 1.0355018377304077,grad_norm: 0.8169103656806631, iteration: 370987
loss: 1.0305949449539185,grad_norm: 0.9999990354388554, iteration: 370988
loss: 1.0010643005371094,grad_norm: 0.9999996382385911, iteration: 370989
loss: 1.1734927892684937,grad_norm: 0.9999996919231086, iteration: 370990
loss: 1.0011541843414307,grad_norm: 0.9431565185282111, iteration: 370991
loss: 1.0049688816070557,grad_norm: 0.9864467755462779, iteration: 370992
loss: 1.0074763298034668,grad_norm: 0.737864077871655, iteration: 370993
loss: 0.9784095883369446,grad_norm: 0.887263323807251, iteration: 370994
loss: 1.0607601404190063,grad_norm: 0.8384506475451026, iteration: 370995
loss: 1.0048298835754395,grad_norm: 0.8625491797342415, iteration: 370996
loss: 1.016361951828003,grad_norm: 0.7525031171871632, iteration: 370997
loss: 1.0014773607254028,grad_norm: 0.979920736726492, iteration: 370998
loss: 1.0185587406158447,grad_norm: 0.7299238747996435, iteration: 370999
loss: 1.123154878616333,grad_norm: 0.9999998476430925, iteration: 371000
loss: 0.9901242852210999,grad_norm: 0.7703117581093583, iteration: 371001
loss: 1.0377728939056396,grad_norm: 0.693359953054609, iteration: 371002
loss: 1.0047357082366943,grad_norm: 0.8213018957149006, iteration: 371003
loss: 1.0006779432296753,grad_norm: 0.8211229623892408, iteration: 371004
loss: 1.0057259798049927,grad_norm: 0.8315563374268929, iteration: 371005
loss: 1.0267457962036133,grad_norm: 0.8183516130696858, iteration: 371006
loss: 0.995673418045044,grad_norm: 0.7679182894933836, iteration: 371007
loss: 1.0784770250320435,grad_norm: 0.9999998616575532, iteration: 371008
loss: 0.9567686915397644,grad_norm: 0.8008777352808931, iteration: 371009
loss: 0.9858523011207581,grad_norm: 0.7906132175705877, iteration: 371010
loss: 0.9835914969444275,grad_norm: 0.838005373980174, iteration: 371011
loss: 0.984789252281189,grad_norm: 0.8717734304060155, iteration: 371012
loss: 1.0047235488891602,grad_norm: 0.7384968554698622, iteration: 371013
loss: 1.0412936210632324,grad_norm: 0.735422909779587, iteration: 371014
loss: 1.0077273845672607,grad_norm: 0.7387519009103716, iteration: 371015
loss: 0.9285670518875122,grad_norm: 0.8764583541859992, iteration: 371016
loss: 1.1676238775253296,grad_norm: 0.99999920471927, iteration: 371017
loss: 0.967585563659668,grad_norm: 0.69318948576206, iteration: 371018
loss: 0.9992482662200928,grad_norm: 0.8775654783562753, iteration: 371019
loss: 1.0084235668182373,grad_norm: 0.9046973162713579, iteration: 371020
loss: 1.0076924562454224,grad_norm: 0.9536343946807371, iteration: 371021
loss: 1.0036062002182007,grad_norm: 0.8508590381643022, iteration: 371022
loss: 1.0224030017852783,grad_norm: 0.9103672190945443, iteration: 371023
loss: 1.0727006196975708,grad_norm: 0.9502362996407773, iteration: 371024
loss: 0.9732620716094971,grad_norm: 0.942071869870279, iteration: 371025
loss: 1.025143027305603,grad_norm: 0.771336818727238, iteration: 371026
loss: 1.0004128217697144,grad_norm: 0.9713967781266107, iteration: 371027
loss: 1.005325436592102,grad_norm: 0.7924096068851187, iteration: 371028
loss: 0.9798344969749451,grad_norm: 0.8284905115486899, iteration: 371029
loss: 0.9971373081207275,grad_norm: 0.9999989670436495, iteration: 371030
loss: 1.0231199264526367,grad_norm: 0.9999998670435628, iteration: 371031
loss: 1.0250262022018433,grad_norm: 0.9180829527629293, iteration: 371032
loss: 0.9707039594650269,grad_norm: 0.7645327601627173, iteration: 371033
loss: 1.007216215133667,grad_norm: 0.7531243443937038, iteration: 371034
loss: 1.0001580715179443,grad_norm: 0.7793077885483202, iteration: 371035
loss: 1.0095374584197998,grad_norm: 0.7349630703790004, iteration: 371036
loss: 1.0086147785186768,grad_norm: 0.7950652939978158, iteration: 371037
loss: 1.0383949279785156,grad_norm: 0.6636533161299472, iteration: 371038
loss: 1.0130672454833984,grad_norm: 0.8711307422352136, iteration: 371039
loss: 0.9726282358169556,grad_norm: 0.6256480090540517, iteration: 371040
loss: 0.9990112781524658,grad_norm: 0.8647360792704446, iteration: 371041
loss: 0.9510146379470825,grad_norm: 0.766518512298618, iteration: 371042
loss: 1.015938639640808,grad_norm: 0.6845848397493147, iteration: 371043
loss: 0.9811596870422363,grad_norm: 0.7221834073934047, iteration: 371044
loss: 0.9334639310836792,grad_norm: 0.8730771842978645, iteration: 371045
loss: 0.9900591373443604,grad_norm: 0.9999990863482409, iteration: 371046
loss: 1.0031230449676514,grad_norm: 0.753149456227976, iteration: 371047
loss: 0.967552900314331,grad_norm: 0.8003396049537023, iteration: 371048
loss: 0.9849408268928528,grad_norm: 0.8212305667933031, iteration: 371049
loss: 0.9867696762084961,grad_norm: 0.8983014942451792, iteration: 371050
loss: 0.9929381608963013,grad_norm: 0.8999352770714865, iteration: 371051
loss: 0.997687578201294,grad_norm: 0.8593102280860532, iteration: 371052
loss: 1.0012767314910889,grad_norm: 0.9458404729288635, iteration: 371053
loss: 0.9827923774719238,grad_norm: 0.9147748100182028, iteration: 371054
loss: 0.9665492177009583,grad_norm: 0.738051753459757, iteration: 371055
loss: 0.9953593015670776,grad_norm: 0.8891082667885527, iteration: 371056
loss: 0.9734275341033936,grad_norm: 0.9999991542929384, iteration: 371057
loss: 1.0279425382614136,grad_norm: 0.9999993791073339, iteration: 371058
loss: 0.9785865545272827,grad_norm: 0.6629758284315678, iteration: 371059
loss: 0.9690459966659546,grad_norm: 0.722996678570662, iteration: 371060
loss: 1.0559256076812744,grad_norm: 0.9515408144322718, iteration: 371061
loss: 0.9830926060676575,grad_norm: 0.9222239565198778, iteration: 371062
loss: 1.0302256345748901,grad_norm: 0.8102867489596991, iteration: 371063
loss: 0.9833875894546509,grad_norm: 0.9715428135177321, iteration: 371064
loss: 1.0530120134353638,grad_norm: 0.8387361375902975, iteration: 371065
loss: 1.0018144845962524,grad_norm: 0.8803930118270653, iteration: 371066
loss: 0.9774394035339355,grad_norm: 0.806912382639122, iteration: 371067
loss: 1.018455147743225,grad_norm: 0.9508661512156991, iteration: 371068
loss: 1.0155965089797974,grad_norm: 0.9999990805818866, iteration: 371069
loss: 1.0129656791687012,grad_norm: 0.734659499950962, iteration: 371070
loss: 1.0468519926071167,grad_norm: 0.9272877684386654, iteration: 371071
loss: 1.0002254247665405,grad_norm: 0.874326494463794, iteration: 371072
loss: 0.9777735471725464,grad_norm: 0.7806307121117512, iteration: 371073
loss: 1.02483069896698,grad_norm: 0.9665480443444248, iteration: 371074
loss: 1.0108983516693115,grad_norm: 0.887058561431147, iteration: 371075
loss: 0.9773481488227844,grad_norm: 0.7344169987938405, iteration: 371076
loss: 1.0493240356445312,grad_norm: 0.9999994613216631, iteration: 371077
loss: 1.014996886253357,grad_norm: 0.8183398238059478, iteration: 371078
loss: 1.0458831787109375,grad_norm: 1.0000000657210877, iteration: 371079
loss: 0.9751297235488892,grad_norm: 0.7511981030072867, iteration: 371080
loss: 1.0002145767211914,grad_norm: 0.7298132008133548, iteration: 371081
loss: 1.0271300077438354,grad_norm: 0.9384219330626857, iteration: 371082
loss: 0.9990716576576233,grad_norm: 0.9034944761480543, iteration: 371083
loss: 1.0829553604125977,grad_norm: 0.9999991402084408, iteration: 371084
loss: 0.9645036458969116,grad_norm: 0.8316490579238534, iteration: 371085
loss: 1.0289850234985352,grad_norm: 0.8206426787525372, iteration: 371086
loss: 1.006196141242981,grad_norm: 0.6485206069724684, iteration: 371087
loss: 1.0145174264907837,grad_norm: 0.8460219709834997, iteration: 371088
loss: 0.9915973544120789,grad_norm: 0.8854039484753062, iteration: 371089
loss: 0.9893701076507568,grad_norm: 0.7550057733031343, iteration: 371090
loss: 0.9998362064361572,grad_norm: 0.8874384860360284, iteration: 371091
loss: 1.0091229677200317,grad_norm: 0.8262106209588623, iteration: 371092
loss: 1.0143115520477295,grad_norm: 0.9999993077590508, iteration: 371093
loss: 1.0048308372497559,grad_norm: 0.7993584682953898, iteration: 371094
loss: 0.9830654263496399,grad_norm: 0.7490080961959587, iteration: 371095
loss: 0.9647626280784607,grad_norm: 0.7769439367900229, iteration: 371096
loss: 1.0232596397399902,grad_norm: 0.6021200235612996, iteration: 371097
loss: 1.0009149312973022,grad_norm: 0.6941040150629036, iteration: 371098
loss: 0.9981088042259216,grad_norm: 0.8075703301910092, iteration: 371099
loss: 1.0173851251602173,grad_norm: 0.814991847731921, iteration: 371100
loss: 0.9872214794158936,grad_norm: 0.8698085351217693, iteration: 371101
loss: 0.9928023815155029,grad_norm: 0.8237125901769723, iteration: 371102
loss: 0.9730106592178345,grad_norm: 0.8939066776339525, iteration: 371103
loss: 1.1051533222198486,grad_norm: 0.9999997730913522, iteration: 371104
loss: 1.0096794366836548,grad_norm: 0.9819855405335036, iteration: 371105
loss: 0.984153687953949,grad_norm: 0.7429895204055609, iteration: 371106
loss: 0.9872519373893738,grad_norm: 0.9770205265120699, iteration: 371107
loss: 0.9975364208221436,grad_norm: 0.9999994603565283, iteration: 371108
loss: 1.0062505006790161,grad_norm: 0.7126702983721487, iteration: 371109
loss: 1.0022279024124146,grad_norm: 0.8511649082639112, iteration: 371110
loss: 1.0393946170806885,grad_norm: 0.9999991939451427, iteration: 371111
loss: 0.9906319975852966,grad_norm: 0.9281793680127325, iteration: 371112
loss: 1.3471505641937256,grad_norm: 0.9999995272660428, iteration: 371113
loss: 1.0141328573226929,grad_norm: 0.8484538450880025, iteration: 371114
loss: 0.9851228594779968,grad_norm: 0.8875863722143006, iteration: 371115
loss: 1.061744213104248,grad_norm: 0.9999990045926122, iteration: 371116
loss: 1.0145944356918335,grad_norm: 0.8672481779197486, iteration: 371117
loss: 0.9935194253921509,grad_norm: 0.6288373964851328, iteration: 371118
loss: 1.0613257884979248,grad_norm: 0.7617100926240288, iteration: 371119
loss: 0.99383145570755,grad_norm: 0.9041195189549748, iteration: 371120
loss: 1.096818447113037,grad_norm: 0.9999994592855148, iteration: 371121
loss: 1.028782606124878,grad_norm: 0.706400015019854, iteration: 371122
loss: 0.9973023533821106,grad_norm: 0.9999999559545893, iteration: 371123
loss: 1.0224113464355469,grad_norm: 0.8140314072239945, iteration: 371124
loss: 0.9940012693405151,grad_norm: 0.7658724258116906, iteration: 371125
loss: 1.0173200368881226,grad_norm: 0.7975875120786604, iteration: 371126
loss: 0.982887864112854,grad_norm: 0.8732565025719323, iteration: 371127
loss: 0.9621468186378479,grad_norm: 0.7823970809602325, iteration: 371128
loss: 0.999943733215332,grad_norm: 0.7115698003701258, iteration: 371129
loss: 1.0265655517578125,grad_norm: 0.685716548982109, iteration: 371130
loss: 1.022526741027832,grad_norm: 0.7775583020338711, iteration: 371131
loss: 0.9980728626251221,grad_norm: 0.8008394456506019, iteration: 371132
loss: 1.0156018733978271,grad_norm: 0.9999993863682223, iteration: 371133
loss: 0.9917852878570557,grad_norm: 0.7257094180236725, iteration: 371134
loss: 0.9878414869308472,grad_norm: 0.9999997495408791, iteration: 371135
loss: 1.0783823728561401,grad_norm: 0.999999228103999, iteration: 371136
loss: 0.9809459447860718,grad_norm: 0.8390558609380875, iteration: 371137
loss: 1.0128287076950073,grad_norm: 0.8227532367286813, iteration: 371138
loss: 0.981574535369873,grad_norm: 0.7459024812426439, iteration: 371139
loss: 1.0218768119812012,grad_norm: 0.9536436883455621, iteration: 371140
loss: 0.9993970394134521,grad_norm: 0.7319513530478111, iteration: 371141
loss: 0.9929014444351196,grad_norm: 0.7934540446621628, iteration: 371142
loss: 0.9890726208686829,grad_norm: 0.7201924989372009, iteration: 371143
loss: 0.9822295308113098,grad_norm: 0.9332236379121018, iteration: 371144
loss: 1.0130122900009155,grad_norm: 0.6649740093165875, iteration: 371145
loss: 0.9757136106491089,grad_norm: 0.7378162966651554, iteration: 371146
loss: 1.03203547000885,grad_norm: 0.9999992314368636, iteration: 371147
loss: 0.9877639412879944,grad_norm: 0.776414344451669, iteration: 371148
loss: 1.0082991123199463,grad_norm: 0.9999995987836259, iteration: 371149
loss: 0.9672171473503113,grad_norm: 0.8227537941831291, iteration: 371150
loss: 1.0018556118011475,grad_norm: 0.8445050928416737, iteration: 371151
loss: 1.008806824684143,grad_norm: 0.7125340219986326, iteration: 371152
loss: 1.019051194190979,grad_norm: 0.9999991119077243, iteration: 371153
loss: 1.006317138671875,grad_norm: 0.9186769111530818, iteration: 371154
loss: 1.0126250982284546,grad_norm: 0.7636309831182101, iteration: 371155
loss: 0.9889103174209595,grad_norm: 0.9999989993503712, iteration: 371156
loss: 0.9578559994697571,grad_norm: 0.9999990180844821, iteration: 371157
loss: 1.0007075071334839,grad_norm: 0.8037370240673711, iteration: 371158
loss: 1.0417309999465942,grad_norm: 0.92761057996423, iteration: 371159
loss: 0.9997307062149048,grad_norm: 0.8544002587638077, iteration: 371160
loss: 0.9510570168495178,grad_norm: 0.8071300799127845, iteration: 371161
loss: 0.9659088253974915,grad_norm: 0.8052107148799021, iteration: 371162
loss: 0.9994873404502869,grad_norm: 0.8571992605871258, iteration: 371163
loss: 1.0452868938446045,grad_norm: 0.7509624174495247, iteration: 371164
loss: 0.9976192116737366,grad_norm: 0.7354350001318352, iteration: 371165
loss: 1.0713996887207031,grad_norm: 0.9999998110832208, iteration: 371166
loss: 1.0155302286148071,grad_norm: 0.6719190290684984, iteration: 371167
loss: 0.9778944849967957,grad_norm: 0.8068607207284638, iteration: 371168
loss: 1.0005098581314087,grad_norm: 0.7358881990812961, iteration: 371169
loss: 1.0125031471252441,grad_norm: 0.8126986498254487, iteration: 371170
loss: 0.9676933288574219,grad_norm: 0.7282810583884392, iteration: 371171
loss: 1.006784439086914,grad_norm: 0.8737951508763744, iteration: 371172
loss: 0.9914052486419678,grad_norm: 0.8250803778377029, iteration: 371173
loss: 1.0036622285842896,grad_norm: 0.7505552199492603, iteration: 371174
loss: 0.9700247049331665,grad_norm: 0.726863471059889, iteration: 371175
loss: 1.0216630697250366,grad_norm: 0.9513041851176127, iteration: 371176
loss: 0.9695351719856262,grad_norm: 0.8254918920620471, iteration: 371177
loss: 1.0375276803970337,grad_norm: 1.0000000483803735, iteration: 371178
loss: 1.006565809249878,grad_norm: 0.8410830530974058, iteration: 371179
loss: 1.0309737920761108,grad_norm: 0.7972875985475233, iteration: 371180
loss: 1.0716017484664917,grad_norm: 0.7704366268657653, iteration: 371181
loss: 1.0074275732040405,grad_norm: 0.7529394027436965, iteration: 371182
loss: 1.0023632049560547,grad_norm: 0.7432930381980116, iteration: 371183
loss: 1.0170631408691406,grad_norm: 0.7158050537065647, iteration: 371184
loss: 0.9871806502342224,grad_norm: 0.9999994323655115, iteration: 371185
loss: 0.9886411428451538,grad_norm: 0.9973319572655798, iteration: 371186
loss: 1.008424162864685,grad_norm: 0.7842343414256088, iteration: 371187
loss: 0.9714034795761108,grad_norm: 0.8259412352167516, iteration: 371188
loss: 0.9644845724105835,grad_norm: 0.771989254820448, iteration: 371189
loss: 1.0328831672668457,grad_norm: 0.8937282693504884, iteration: 371190
loss: 1.0100423097610474,grad_norm: 0.6699143251182695, iteration: 371191
loss: 0.9813517928123474,grad_norm: 0.835446977603703, iteration: 371192
loss: 1.0642956495285034,grad_norm: 0.9999994516205197, iteration: 371193
loss: 0.99029141664505,grad_norm: 0.8029123990010248, iteration: 371194
loss: 1.1007874011993408,grad_norm: 0.999999615629756, iteration: 371195
loss: 0.975744903087616,grad_norm: 0.6600115008056965, iteration: 371196
loss: 1.0443869829177856,grad_norm: 0.8051223669859572, iteration: 371197
loss: 0.9723532199859619,grad_norm: 0.6710131070835915, iteration: 371198
loss: 1.0207451581954956,grad_norm: 0.8822338828901746, iteration: 371199
loss: 1.008876919746399,grad_norm: 0.8891825241169015, iteration: 371200
loss: 0.9847070574760437,grad_norm: 0.7962743324638688, iteration: 371201
loss: 0.9959525465965271,grad_norm: 0.999999314028168, iteration: 371202
loss: 1.0258787870407104,grad_norm: 0.999999277658023, iteration: 371203
loss: 1.008488655090332,grad_norm: 0.7724993662528419, iteration: 371204
loss: 1.0219305753707886,grad_norm: 0.9915483374856334, iteration: 371205
loss: 1.0164673328399658,grad_norm: 0.6845044086970258, iteration: 371206
loss: 1.0370771884918213,grad_norm: 0.9999993074570427, iteration: 371207
loss: 1.0058945417404175,grad_norm: 0.840963792430237, iteration: 371208
loss: 0.9895715713500977,grad_norm: 0.9999997207580589, iteration: 371209
loss: 1.1364011764526367,grad_norm: 0.8443828829761016, iteration: 371210
loss: 0.9489163756370544,grad_norm: 0.9467806089210469, iteration: 371211
loss: 0.9955528378486633,grad_norm: 0.9353302297209198, iteration: 371212
loss: 1.0060862302780151,grad_norm: 0.8471802220587857, iteration: 371213
loss: 0.9998422861099243,grad_norm: 0.9999995564329472, iteration: 371214
loss: 0.9643846750259399,grad_norm: 0.7625144451159293, iteration: 371215
loss: 0.9695848226547241,grad_norm: 0.7217048936319059, iteration: 371216
loss: 0.9630855321884155,grad_norm: 0.8428175778027901, iteration: 371217
loss: 1.043198585510254,grad_norm: 0.8440595648309827, iteration: 371218
loss: 0.9915820360183716,grad_norm: 0.7766588134558694, iteration: 371219
loss: 1.0245027542114258,grad_norm: 0.999999515888298, iteration: 371220
loss: 1.0127638578414917,grad_norm: 0.7026638636869738, iteration: 371221
loss: 0.9635554552078247,grad_norm: 0.9999993823191461, iteration: 371222
loss: 0.9950904846191406,grad_norm: 0.7431938537216969, iteration: 371223
loss: 1.052685260772705,grad_norm: 0.7988404474432824, iteration: 371224
loss: 0.9681130647659302,grad_norm: 0.9999991845646774, iteration: 371225
loss: 0.9993070960044861,grad_norm: 0.7596196680590005, iteration: 371226
loss: 0.9591533541679382,grad_norm: 0.7300830082128825, iteration: 371227
loss: 1.0119082927703857,grad_norm: 0.8271895116737257, iteration: 371228
loss: 0.9983823895454407,grad_norm: 0.8300084740067423, iteration: 371229
loss: 0.9868045449256897,grad_norm: 0.7980783016554706, iteration: 371230
loss: 0.986754298210144,grad_norm: 0.7405328154760941, iteration: 371231
loss: 0.9436096549034119,grad_norm: 0.8481113934508933, iteration: 371232
loss: 0.9514043927192688,grad_norm: 0.7625397526496195, iteration: 371233
loss: 0.9759454131126404,grad_norm: 0.86516957731151, iteration: 371234
loss: 0.9837823510169983,grad_norm: 0.9999991135832161, iteration: 371235
loss: 0.995581865310669,grad_norm: 0.8804145335615696, iteration: 371236
loss: 1.0089462995529175,grad_norm: 0.7992097590407434, iteration: 371237
loss: 0.9720906615257263,grad_norm: 0.968546830553772, iteration: 371238
loss: 0.9896066188812256,grad_norm: 0.7448771876021667, iteration: 371239
loss: 0.9967231154441833,grad_norm: 0.832268067645851, iteration: 371240
loss: 1.1647670269012451,grad_norm: 0.9875593595601733, iteration: 371241
loss: 1.033821940422058,grad_norm: 0.8356886372570622, iteration: 371242
loss: 1.0272247791290283,grad_norm: 0.8443682251992993, iteration: 371243
loss: 0.9635119438171387,grad_norm: 0.7251898671603251, iteration: 371244
loss: 1.0077522993087769,grad_norm: 0.9239641131812543, iteration: 371245
loss: 0.9959760904312134,grad_norm: 0.8262443428485226, iteration: 371246
loss: 0.9712477922439575,grad_norm: 0.8739869603589919, iteration: 371247
loss: 1.0620437860488892,grad_norm: 0.750289470382302, iteration: 371248
loss: 0.950372040271759,grad_norm: 0.7994025938236844, iteration: 371249
loss: 0.9966580271720886,grad_norm: 0.6914151383602589, iteration: 371250
loss: 1.0231919288635254,grad_norm: 0.82426967554517, iteration: 371251
loss: 1.0265978574752808,grad_norm: 0.8254284809080634, iteration: 371252
loss: 1.1613341569900513,grad_norm: 0.9999997869232063, iteration: 371253
loss: 1.038659930229187,grad_norm: 0.7535474696404476, iteration: 371254
loss: 0.9951780438423157,grad_norm: 0.9999991438817671, iteration: 371255
loss: 0.9997107982635498,grad_norm: 0.7891501630629654, iteration: 371256
loss: 0.9559871554374695,grad_norm: 0.7545557295761429, iteration: 371257
loss: 1.0142855644226074,grad_norm: 0.7751576940157533, iteration: 371258
loss: 1.0002069473266602,grad_norm: 0.8581491001602632, iteration: 371259
loss: 1.1103969812393188,grad_norm: 0.9166012263315586, iteration: 371260
loss: 1.0532785654067993,grad_norm: 0.9999991474561863, iteration: 371261
loss: 0.9984223246574402,grad_norm: 0.8254135459648021, iteration: 371262
loss: 1.0107678174972534,grad_norm: 0.7733266147678409, iteration: 371263
loss: 0.9614146947860718,grad_norm: 0.8409901077339917, iteration: 371264
loss: 1.0390442609786987,grad_norm: 0.9999992087010229, iteration: 371265
loss: 1.029303789138794,grad_norm: 0.8027019658640271, iteration: 371266
loss: 1.000722885131836,grad_norm: 0.8442212754765129, iteration: 371267
loss: 1.0630089044570923,grad_norm: 0.937811108094021, iteration: 371268
loss: 1.0022811889648438,grad_norm: 0.7426658939881599, iteration: 371269
loss: 1.0179904699325562,grad_norm: 0.8743456488177992, iteration: 371270
loss: 0.9979349374771118,grad_norm: 0.7800943034610908, iteration: 371271
loss: 1.0091612339019775,grad_norm: 0.7360712272686785, iteration: 371272
loss: 0.9861489534378052,grad_norm: 0.8688850620285677, iteration: 371273
loss: 1.0362629890441895,grad_norm: 0.8779296829817519, iteration: 371274
loss: 1.0283713340759277,grad_norm: 0.9999990661125334, iteration: 371275
loss: 1.015848159790039,grad_norm: 0.9999993396655722, iteration: 371276
loss: 1.0292209386825562,grad_norm: 0.9999992182885498, iteration: 371277
loss: 1.0533097982406616,grad_norm: 0.8343788902352918, iteration: 371278
loss: 0.9988897442817688,grad_norm: 0.7354498388806228, iteration: 371279
loss: 1.010125994682312,grad_norm: 0.9999990817756859, iteration: 371280
loss: 1.022196888923645,grad_norm: 0.9345185344947354, iteration: 371281
loss: 1.0097265243530273,grad_norm: 0.7998489276124365, iteration: 371282
loss: 1.0015549659729004,grad_norm: 0.8697563526304096, iteration: 371283
loss: 0.9954801201820374,grad_norm: 0.7058583560231949, iteration: 371284
loss: 1.0044466257095337,grad_norm: 0.8714078698781578, iteration: 371285
loss: 0.9954794049263,grad_norm: 0.7976587048481838, iteration: 371286
loss: 1.0077482461929321,grad_norm: 0.7418238360534726, iteration: 371287
loss: 0.989567756652832,grad_norm: 0.7372583443498338, iteration: 371288
loss: 0.9960173964500427,grad_norm: 0.793499334501671, iteration: 371289
loss: 0.9348604679107666,grad_norm: 0.8729058986546732, iteration: 371290
loss: 0.9843340516090393,grad_norm: 0.9999994836746431, iteration: 371291
loss: 1.0055170059204102,grad_norm: 0.7619254566843567, iteration: 371292
loss: 1.0252922773361206,grad_norm: 0.9238186447375453, iteration: 371293
loss: 0.9972336888313293,grad_norm: 0.830181210411832, iteration: 371294
loss: 0.9869381785392761,grad_norm: 0.8962393769658026, iteration: 371295
loss: 0.9843409657478333,grad_norm: 0.6716977328714602, iteration: 371296
loss: 0.9939314723014832,grad_norm: 0.8671886434276219, iteration: 371297
loss: 1.1083003282546997,grad_norm: 0.9816418818186934, iteration: 371298
loss: 1.032217025756836,grad_norm: 0.8037807109542258, iteration: 371299
loss: 0.9914208650588989,grad_norm: 0.9077943482830957, iteration: 371300
loss: 1.0429351329803467,grad_norm: 0.9999990770958479, iteration: 371301
loss: 1.0373181104660034,grad_norm: 0.7981782146090621, iteration: 371302
loss: 1.0117511749267578,grad_norm: 0.8939939379515668, iteration: 371303
loss: 0.9844424724578857,grad_norm: 0.9999992614634907, iteration: 371304
loss: 1.0067998170852661,grad_norm: 0.8107621231868132, iteration: 371305
loss: 0.9845966696739197,grad_norm: 0.7530398780319317, iteration: 371306
loss: 0.9918934106826782,grad_norm: 0.7144918510278019, iteration: 371307
loss: 1.0053631067276,grad_norm: 0.9999992709463355, iteration: 371308
loss: 1.0195467472076416,grad_norm: 0.9999993123073938, iteration: 371309
loss: 0.9918450117111206,grad_norm: 0.8873915005594615, iteration: 371310
loss: 0.9926881790161133,grad_norm: 0.7782156705974542, iteration: 371311
loss: 1.0142751932144165,grad_norm: 0.9999991140409618, iteration: 371312
loss: 1.0189412832260132,grad_norm: 0.7990380657337065, iteration: 371313
loss: 0.9863126277923584,grad_norm: 0.8143945643928199, iteration: 371314
loss: 1.0095881223678589,grad_norm: 0.7334302015092099, iteration: 371315
loss: 1.0783041715621948,grad_norm: 0.9999993092498244, iteration: 371316
loss: 1.0075347423553467,grad_norm: 0.7255294395099591, iteration: 371317
loss: 0.9887149930000305,grad_norm: 0.7248127686111658, iteration: 371318
loss: 1.019136905670166,grad_norm: 0.7169502354197061, iteration: 371319
loss: 1.0105232000350952,grad_norm: 0.9999992535675848, iteration: 371320
loss: 0.993270218372345,grad_norm: 0.9022475223688086, iteration: 371321
loss: 1.0046546459197998,grad_norm: 0.9999989393541562, iteration: 371322
loss: 1.0123789310455322,grad_norm: 0.7898033368265384, iteration: 371323
loss: 1.0228759050369263,grad_norm: 0.8622406275112119, iteration: 371324
loss: 1.008609652519226,grad_norm: 0.9999990000533089, iteration: 371325
loss: 1.0173450708389282,grad_norm: 0.9999990418291637, iteration: 371326
loss: 1.0132871866226196,grad_norm: 0.755982287502337, iteration: 371327
loss: 1.007808804512024,grad_norm: 0.7179418506751407, iteration: 371328
loss: 0.9603146314620972,grad_norm: 0.7494786764618037, iteration: 371329
loss: 1.148072361946106,grad_norm: 0.9999991426584449, iteration: 371330
loss: 0.9997227191925049,grad_norm: 0.9247757621383896, iteration: 371331
loss: 0.9617385864257812,grad_norm: 0.8459235128682645, iteration: 371332
loss: 0.9999328255653381,grad_norm: 0.8910441317479858, iteration: 371333
loss: 0.9891775846481323,grad_norm: 0.8201314263149226, iteration: 371334
loss: 1.016451120376587,grad_norm: 0.7691011441863702, iteration: 371335
loss: 1.014702320098877,grad_norm: 0.9999992020979276, iteration: 371336
loss: 1.1024706363677979,grad_norm: 0.9999991816058644, iteration: 371337
loss: 1.030380368232727,grad_norm: 0.9999990803256601, iteration: 371338
loss: 0.9985916018486023,grad_norm: 0.7461880441614347, iteration: 371339
loss: 0.9647356271743774,grad_norm: 0.9999995128408589, iteration: 371340
loss: 1.0170806646347046,grad_norm: 0.9247709829220027, iteration: 371341
loss: 1.0154458284378052,grad_norm: 0.8057485174787087, iteration: 371342
loss: 1.0174864530563354,grad_norm: 0.9999991830770945, iteration: 371343
loss: 1.0445774793624878,grad_norm: 0.999999690647108, iteration: 371344
loss: 1.104006052017212,grad_norm: 0.9999998586757735, iteration: 371345
loss: 1.0186405181884766,grad_norm: 0.9382958434184746, iteration: 371346
loss: 1.0365715026855469,grad_norm: 0.9999990727517829, iteration: 371347
loss: 0.9961715340614319,grad_norm: 0.7479513129710404, iteration: 371348
loss: 1.0215785503387451,grad_norm: 0.8671168765230247, iteration: 371349
loss: 1.064766526222229,grad_norm: 0.9797768278521166, iteration: 371350
loss: 1.0430808067321777,grad_norm: 0.7675532230762274, iteration: 371351
loss: 1.0060192346572876,grad_norm: 0.7403130449790017, iteration: 371352
loss: 0.9921762347221375,grad_norm: 0.6551053378078421, iteration: 371353
loss: 0.9524041414260864,grad_norm: 0.8348020566854333, iteration: 371354
loss: 1.133462905883789,grad_norm: 0.9810290501029976, iteration: 371355
loss: 0.9654805660247803,grad_norm: 0.9017603265920775, iteration: 371356
loss: 1.0951650142669678,grad_norm: 0.856672399438878, iteration: 371357
loss: 1.0643826723098755,grad_norm: 0.9999997033629784, iteration: 371358
loss: 1.0354079008102417,grad_norm: 0.9435455237337007, iteration: 371359
loss: 0.987625241279602,grad_norm: 0.9154044469116464, iteration: 371360
loss: 1.083196759223938,grad_norm: 0.9999999434743806, iteration: 371361
loss: 0.973642110824585,grad_norm: 0.8168344055055266, iteration: 371362
loss: 1.0162988901138306,grad_norm: 0.8518730001078447, iteration: 371363
loss: 0.9987086653709412,grad_norm: 0.9073084847936992, iteration: 371364
loss: 0.9781932830810547,grad_norm: 0.7736735579282664, iteration: 371365
loss: 1.0754303932189941,grad_norm: 0.999999800274228, iteration: 371366
loss: 0.944251537322998,grad_norm: 0.7824034398855618, iteration: 371367
loss: 1.022876501083374,grad_norm: 0.9999993755264477, iteration: 371368
loss: 1.014433741569519,grad_norm: 0.813875139761759, iteration: 371369
loss: 0.9817450046539307,grad_norm: 0.7496162082111614, iteration: 371370
loss: 1.0155383348464966,grad_norm: 0.8081492344695362, iteration: 371371
loss: 1.0026450157165527,grad_norm: 0.8302613429700862, iteration: 371372
loss: 0.9926405549049377,grad_norm: 0.819457895199197, iteration: 371373
loss: 1.0110201835632324,grad_norm: 0.8013038038136852, iteration: 371374
loss: 0.990837574005127,grad_norm: 0.7239440218246622, iteration: 371375
loss: 0.9597517251968384,grad_norm: 0.9999991212925902, iteration: 371376
loss: 0.9800880551338196,grad_norm: 0.9999995382257764, iteration: 371377
loss: 0.9931370615959167,grad_norm: 0.8864681987298161, iteration: 371378
loss: 1.005944848060608,grad_norm: 0.7411994715093855, iteration: 371379
loss: 0.9580276012420654,grad_norm: 0.8685171438650912, iteration: 371380
loss: 0.9914730191230774,grad_norm: 0.9999998287618528, iteration: 371381
loss: 1.027469277381897,grad_norm: 0.893777487951866, iteration: 371382
loss: 0.9635667204856873,grad_norm: 0.7132515985529032, iteration: 371383
loss: 1.0535329580307007,grad_norm: 0.9999990850990654, iteration: 371384
loss: 0.9649759531021118,grad_norm: 0.8443384009977737, iteration: 371385
loss: 1.0019041299819946,grad_norm: 0.9999996800132508, iteration: 371386
loss: 0.9805518984794617,grad_norm: 0.9999990028076452, iteration: 371387
loss: 1.0098423957824707,grad_norm: 0.7264114873601302, iteration: 371388
loss: 1.0166584253311157,grad_norm: 0.7821220680816641, iteration: 371389
loss: 1.0060874223709106,grad_norm: 0.8190257238547458, iteration: 371390
loss: 1.0009348392486572,grad_norm: 0.8665600589498732, iteration: 371391
loss: 0.9881817102432251,grad_norm: 0.7138702363271123, iteration: 371392
loss: 1.0021109580993652,grad_norm: 0.7135306308012276, iteration: 371393
loss: 1.0061012506484985,grad_norm: 0.8408084038758742, iteration: 371394
loss: 1.0634498596191406,grad_norm: 0.7250912047386461, iteration: 371395
loss: 1.0297821760177612,grad_norm: 0.9999990194423423, iteration: 371396
loss: 0.9977068901062012,grad_norm: 0.9583729365350269, iteration: 371397
loss: 1.0021634101867676,grad_norm: 0.7652470862088536, iteration: 371398
loss: 0.9722784161567688,grad_norm: 0.7065458044181402, iteration: 371399
loss: 1.0350162982940674,grad_norm: 0.7844931139587445, iteration: 371400
loss: 1.0297894477844238,grad_norm: 0.8014659034529876, iteration: 371401
loss: 0.9735828042030334,grad_norm: 0.9032948632285046, iteration: 371402
loss: 1.022355079650879,grad_norm: 0.9145802381352026, iteration: 371403
loss: 1.0105069875717163,grad_norm: 0.8820890248016903, iteration: 371404
loss: 1.012285590171814,grad_norm: 0.8919546918122545, iteration: 371405
loss: 1.017028570175171,grad_norm: 0.9999990147493535, iteration: 371406
loss: 1.0377790927886963,grad_norm: 0.8174913409928913, iteration: 371407
loss: 1.0642133951187134,grad_norm: 0.7294545541881567, iteration: 371408
loss: 1.0012487173080444,grad_norm: 0.9999995213352542, iteration: 371409
loss: 0.9977172613143921,grad_norm: 0.7837912284349866, iteration: 371410
loss: 1.0286871194839478,grad_norm: 0.705197619850377, iteration: 371411
loss: 1.0348904132843018,grad_norm: 0.7590143815504875, iteration: 371412
loss: 1.0321543216705322,grad_norm: 0.7895073058936529, iteration: 371413
loss: 0.9751654863357544,grad_norm: 0.7340410642876429, iteration: 371414
loss: 1.0093615055084229,grad_norm: 0.8293042965718977, iteration: 371415
loss: 1.0497052669525146,grad_norm: 0.8117573499677864, iteration: 371416
loss: 0.9928315281867981,grad_norm: 0.999998976765871, iteration: 371417
loss: 1.0201793909072876,grad_norm: 0.9091164851332482, iteration: 371418
loss: 1.016050100326538,grad_norm: 0.9999991022678735, iteration: 371419
loss: 0.9954835772514343,grad_norm: 0.9999992662571904, iteration: 371420
loss: 1.040712833404541,grad_norm: 0.999999363675066, iteration: 371421
loss: 1.0042102336883545,grad_norm: 0.8941201168155583, iteration: 371422
loss: 0.9860087633132935,grad_norm: 0.9136945175777238, iteration: 371423
loss: 0.9825927019119263,grad_norm: 0.854000898686967, iteration: 371424
loss: 0.955070436000824,grad_norm: 0.7012371163489008, iteration: 371425
loss: 1.0029550790786743,grad_norm: 0.8077592079910798, iteration: 371426
loss: 1.020918607711792,grad_norm: 0.8279181792445107, iteration: 371427
loss: 1.073695421218872,grad_norm: 0.9999992669205561, iteration: 371428
loss: 1.0109076499938965,grad_norm: 0.9663895096421091, iteration: 371429
loss: 0.9522987604141235,grad_norm: 0.9271117854848252, iteration: 371430
loss: 0.9873467087745667,grad_norm: 0.8053099691708318, iteration: 371431
loss: 0.9971462488174438,grad_norm: 0.8104047993713939, iteration: 371432
loss: 0.9685845971107483,grad_norm: 0.9999990885392086, iteration: 371433
loss: 1.0036824941635132,grad_norm: 0.9999998386586916, iteration: 371434
loss: 1.0207947492599487,grad_norm: 0.8411262037681123, iteration: 371435
loss: 0.9968885779380798,grad_norm: 0.9999989403160804, iteration: 371436
loss: 0.9829439520835876,grad_norm: 0.8210434050161698, iteration: 371437
loss: 1.0306396484375,grad_norm: 0.6236050691368968, iteration: 371438
loss: 0.9933915138244629,grad_norm: 0.742787608355008, iteration: 371439
loss: 1.0129079818725586,grad_norm: 0.8754463203846649, iteration: 371440
loss: 0.9976359605789185,grad_norm: 0.8105456602480577, iteration: 371441
loss: 1.015274167060852,grad_norm: 0.8423012493879951, iteration: 371442
loss: 1.017408013343811,grad_norm: 0.7734085441387527, iteration: 371443
loss: 1.0655878782272339,grad_norm: 0.9603974839593413, iteration: 371444
loss: 1.0361440181732178,grad_norm: 0.9857921519221491, iteration: 371445
loss: 1.0053969621658325,grad_norm: 0.981370224253855, iteration: 371446
loss: 1.0171715021133423,grad_norm: 0.9999992561336286, iteration: 371447
loss: 0.9934329986572266,grad_norm: 0.7270169113549074, iteration: 371448
loss: 1.0276238918304443,grad_norm: 0.8332662751099649, iteration: 371449
loss: 1.019404649734497,grad_norm: 0.948113147846764, iteration: 371450
loss: 1.0392452478408813,grad_norm: 0.8853093541933225, iteration: 371451
loss: 1.027695894241333,grad_norm: 0.7747352382373804, iteration: 371452
loss: 1.0713318586349487,grad_norm: 0.9999997414423102, iteration: 371453
loss: 1.030015230178833,grad_norm: 0.99738670495644, iteration: 371454
loss: 1.030754566192627,grad_norm: 0.8024754331661156, iteration: 371455
loss: 1.0518083572387695,grad_norm: 0.8253142242402856, iteration: 371456
loss: 0.9887071847915649,grad_norm: 0.7399739165298578, iteration: 371457
loss: 1.0276081562042236,grad_norm: 0.8126528385553959, iteration: 371458
loss: 1.0162241458892822,grad_norm: 0.7935774339872949, iteration: 371459
loss: 0.9786414504051208,grad_norm: 0.945620176832683, iteration: 371460
loss: 1.0767289400100708,grad_norm: 0.9999992636190536, iteration: 371461
loss: 1.0037870407104492,grad_norm: 0.7498298958650996, iteration: 371462
loss: 1.0417585372924805,grad_norm: 0.7784589844898732, iteration: 371463
loss: 0.9836001992225647,grad_norm: 0.8004729723385068, iteration: 371464
loss: 1.020479440689087,grad_norm: 0.8263723254389559, iteration: 371465
loss: 1.0577133893966675,grad_norm: 0.9999998412607518, iteration: 371466
loss: 1.0539333820343018,grad_norm: 0.9999993929856721, iteration: 371467
loss: 0.9981955289840698,grad_norm: 0.7764901401613338, iteration: 371468
loss: 0.9769054651260376,grad_norm: 0.8698480269012309, iteration: 371469
loss: 1.0198696851730347,grad_norm: 0.6620746403699831, iteration: 371470
loss: 1.0263618230819702,grad_norm: 0.9999991560453144, iteration: 371471
loss: 0.9909387826919556,grad_norm: 0.7879102106331711, iteration: 371472
loss: 0.9844808578491211,grad_norm: 0.8601705357083442, iteration: 371473
loss: 0.9744320511817932,grad_norm: 0.9999990848802597, iteration: 371474
loss: 0.9903771281242371,grad_norm: 0.7939434949873051, iteration: 371475
loss: 1.0076154470443726,grad_norm: 0.7941903438310127, iteration: 371476
loss: 1.040224313735962,grad_norm: 0.6779771958678887, iteration: 371477
loss: 1.0074892044067383,grad_norm: 0.8757310596098562, iteration: 371478
loss: 1.0547467470169067,grad_norm: 0.9520060138294797, iteration: 371479
loss: 1.0217803716659546,grad_norm: 0.7846647403530015, iteration: 371480
loss: 1.0018763542175293,grad_norm: 0.854316885132846, iteration: 371481
loss: 1.056877613067627,grad_norm: 0.8636050867686457, iteration: 371482
loss: 1.0132737159729004,grad_norm: 0.76236895987064, iteration: 371483
loss: 0.9645682573318481,grad_norm: 0.7194521016615011, iteration: 371484
loss: 1.019095778465271,grad_norm: 0.762578739152344, iteration: 371485
loss: 1.0029159784317017,grad_norm: 0.7370952197483024, iteration: 371486
loss: 0.9766397476196289,grad_norm: 0.8032878546020675, iteration: 371487
loss: 1.109439492225647,grad_norm: 0.9999994665758164, iteration: 371488
loss: 1.0023479461669922,grad_norm: 0.9294526024583707, iteration: 371489
loss: 1.000427484512329,grad_norm: 0.8155033511679894, iteration: 371490
loss: 1.0056796073913574,grad_norm: 0.6369883208404205, iteration: 371491
loss: 1.4566665887832642,grad_norm: 0.9999998041702534, iteration: 371492
loss: 1.0321812629699707,grad_norm: 0.8267435880681571, iteration: 371493
loss: 1.0253084897994995,grad_norm: 0.9059473446128516, iteration: 371494
loss: 1.0069050788879395,grad_norm: 0.9999995716194018, iteration: 371495
loss: 1.041417121887207,grad_norm: 0.7590568333533223, iteration: 371496
loss: 1.0418834686279297,grad_norm: 0.9999993354221847, iteration: 371497
loss: 1.017491102218628,grad_norm: 0.99999915265633, iteration: 371498
loss: 0.992563784122467,grad_norm: 0.8137854694083371, iteration: 371499
loss: 1.0127036571502686,grad_norm: 0.8671878805903402, iteration: 371500
loss: 1.0604543685913086,grad_norm: 0.9999993854750003, iteration: 371501
loss: 0.9967005252838135,grad_norm: 0.7851077802115546, iteration: 371502
loss: 0.9981375336647034,grad_norm: 0.9512903997554393, iteration: 371503
loss: 1.0006757974624634,grad_norm: 0.9413308357776061, iteration: 371504
loss: 1.01932692527771,grad_norm: 0.8059780148403061, iteration: 371505
loss: 1.006502628326416,grad_norm: 0.8470187949009502, iteration: 371506
loss: 1.0055493116378784,grad_norm: 0.9999995517373703, iteration: 371507
loss: 0.9986530542373657,grad_norm: 0.7120890087663608, iteration: 371508
loss: 0.9755577445030212,grad_norm: 0.8701107295259131, iteration: 371509
loss: 1.0318301916122437,grad_norm: 0.783512189727975, iteration: 371510
loss: 1.0124696493148804,grad_norm: 0.8383785376992984, iteration: 371511
loss: 0.9895868301391602,grad_norm: 0.7008043345191373, iteration: 371512
loss: 0.9784078001976013,grad_norm: 0.9062633987485762, iteration: 371513
loss: 0.9959425330162048,grad_norm: 0.9158006085176069, iteration: 371514
loss: 0.9733209609985352,grad_norm: 0.9733099393893851, iteration: 371515
loss: 1.1145882606506348,grad_norm: 0.9999997228872818, iteration: 371516
loss: 0.9817603230476379,grad_norm: 0.8197271052619014, iteration: 371517
loss: 1.0007379055023193,grad_norm: 0.6372913501257257, iteration: 371518
loss: 0.9674479961395264,grad_norm: 0.878416259212461, iteration: 371519
loss: 0.9717828631401062,grad_norm: 0.8130672614767148, iteration: 371520
loss: 1.036003828048706,grad_norm: 0.8391496419845241, iteration: 371521
loss: 1.0168178081512451,grad_norm: 0.9227064493881426, iteration: 371522
loss: 0.9518146514892578,grad_norm: 0.848680063443974, iteration: 371523
loss: 0.995292067527771,grad_norm: 0.7319653726828949, iteration: 371524
loss: 0.9941257238388062,grad_norm: 0.7838005197495521, iteration: 371525
loss: 1.0431078672409058,grad_norm: 0.818894373847649, iteration: 371526
loss: 1.0942779779434204,grad_norm: 0.8094422823847287, iteration: 371527
loss: 0.9788123965263367,grad_norm: 0.6982200650075401, iteration: 371528
loss: 1.0054115056991577,grad_norm: 0.673360741429037, iteration: 371529
loss: 1.0072458982467651,grad_norm: 0.9999992872552662, iteration: 371530
loss: 0.9936766624450684,grad_norm: 0.779076861867102, iteration: 371531
loss: 0.9746009707450867,grad_norm: 0.7580927581833776, iteration: 371532
loss: 1.0173085927963257,grad_norm: 0.999999290655777, iteration: 371533
loss: 0.9984946846961975,grad_norm: 0.6820407278730567, iteration: 371534
loss: 1.0396331548690796,grad_norm: 0.7998615637217621, iteration: 371535
loss: 0.9933863282203674,grad_norm: 0.7341438689690722, iteration: 371536
loss: 1.0360091924667358,grad_norm: 0.8837557615546228, iteration: 371537
loss: 1.048425555229187,grad_norm: 0.8475874841747123, iteration: 371538
loss: 0.9703121185302734,grad_norm: 0.8636095404845446, iteration: 371539
loss: 1.0157599449157715,grad_norm: 0.8486974705318903, iteration: 371540
loss: 0.979428231716156,grad_norm: 0.7979432673313456, iteration: 371541
loss: 1.0459961891174316,grad_norm: 0.8668679362004781, iteration: 371542
loss: 1.0131806135177612,grad_norm: 0.9999999422473684, iteration: 371543
loss: 0.9782633781433105,grad_norm: 0.9335443303496436, iteration: 371544
loss: 1.0131016969680786,grad_norm: 0.624708510813524, iteration: 371545
loss: 0.9985114932060242,grad_norm: 0.9249129147325422, iteration: 371546
loss: 0.9927266836166382,grad_norm: 0.8917409680642271, iteration: 371547
loss: 0.9681335091590881,grad_norm: 0.821048997975821, iteration: 371548
loss: 1.0211598873138428,grad_norm: 0.9051193099098241, iteration: 371549
loss: 1.0382198095321655,grad_norm: 0.9999996720481945, iteration: 371550
loss: 1.0250738859176636,grad_norm: 0.8749497601249944, iteration: 371551
loss: 1.041555404663086,grad_norm: 0.9999991127441668, iteration: 371552
loss: 1.0245782136917114,grad_norm: 0.7558721057453, iteration: 371553
loss: 1.0606999397277832,grad_norm: 0.879183369344355, iteration: 371554
loss: 1.0342854261398315,grad_norm: 0.7323564652296561, iteration: 371555
loss: 1.0271403789520264,grad_norm: 0.7600366147265253, iteration: 371556
loss: 1.0124725103378296,grad_norm: 0.7951076491523011, iteration: 371557
loss: 0.9744377732276917,grad_norm: 0.7612571903750853, iteration: 371558
loss: 1.012532114982605,grad_norm: 0.8212162533945192, iteration: 371559
loss: 0.9887848496437073,grad_norm: 0.7940540758897313, iteration: 371560
loss: 1.015051007270813,grad_norm: 0.667906296320825, iteration: 371561
loss: 1.0644704103469849,grad_norm: 0.9999990327451614, iteration: 371562
loss: 0.9987891912460327,grad_norm: 0.9999990045739783, iteration: 371563
loss: 1.0310441255569458,grad_norm: 0.7427822286166491, iteration: 371564
loss: 1.0181583166122437,grad_norm: 0.7206582481589654, iteration: 371565
loss: 1.0133472681045532,grad_norm: 0.7449027896031704, iteration: 371566
loss: 1.0505602359771729,grad_norm: 0.9999996300741442, iteration: 371567
loss: 1.0068399906158447,grad_norm: 0.7462328325739704, iteration: 371568
loss: 1.0111310482025146,grad_norm: 0.8718881616609242, iteration: 371569
loss: 0.9856096506118774,grad_norm: 0.784523665571699, iteration: 371570
loss: 1.00960111618042,grad_norm: 0.7484137445955064, iteration: 371571
loss: 0.963555634021759,grad_norm: 0.8355923651646942, iteration: 371572
loss: 1.0136158466339111,grad_norm: 0.9054289281115065, iteration: 371573
loss: 1.0314276218414307,grad_norm: 0.6425427460277054, iteration: 371574
loss: 0.9773852229118347,grad_norm: 0.9311626402841646, iteration: 371575
loss: 1.052968144416809,grad_norm: 0.9999990634662592, iteration: 371576
loss: 1.0929734706878662,grad_norm: 0.9999995265475453, iteration: 371577
loss: 1.0345211029052734,grad_norm: 0.8922230607817351, iteration: 371578
loss: 0.9736641645431519,grad_norm: 0.8456307617932296, iteration: 371579
loss: 1.0400704145431519,grad_norm: 0.9999996031477647, iteration: 371580
loss: 1.040219783782959,grad_norm: 0.9275921349867099, iteration: 371581
loss: 1.039612054824829,grad_norm: 0.9999996151479893, iteration: 371582
loss: 1.0189005136489868,grad_norm: 0.871012378082601, iteration: 371583
loss: 1.0328271389007568,grad_norm: 0.9939888933770032, iteration: 371584
loss: 0.9935107827186584,grad_norm: 0.7368817423294638, iteration: 371585
loss: 1.0095503330230713,grad_norm: 0.7968346290298781, iteration: 371586
loss: 0.9726098775863647,grad_norm: 0.9999995335049349, iteration: 371587
loss: 1.035878300666809,grad_norm: 0.8639119739012535, iteration: 371588
loss: 1.1054528951644897,grad_norm: 0.9999991182757566, iteration: 371589
loss: 1.038674235343933,grad_norm: 0.7906493748021648, iteration: 371590
loss: 1.0129212141036987,grad_norm: 0.7239729400899452, iteration: 371591
loss: 1.050911784172058,grad_norm: 0.8806545807527407, iteration: 371592
loss: 1.0131652355194092,grad_norm: 0.9062280996731689, iteration: 371593
loss: 1.0061315298080444,grad_norm: 0.8111184934671664, iteration: 371594
loss: 1.0427793264389038,grad_norm: 0.7581775718406139, iteration: 371595
loss: 1.001578688621521,grad_norm: 0.7889893329912654, iteration: 371596
loss: 1.0085151195526123,grad_norm: 0.8850056721537936, iteration: 371597
loss: 0.9590669274330139,grad_norm: 0.7617946912145138, iteration: 371598
loss: 0.98150634765625,grad_norm: 0.9999990880995706, iteration: 371599
loss: 0.981840193271637,grad_norm: 0.9999991254019643, iteration: 371600
loss: 0.9830154180526733,grad_norm: 0.7751802467264086, iteration: 371601
loss: 1.0058127641677856,grad_norm: 0.83114946780632, iteration: 371602
loss: 0.9936388731002808,grad_norm: 0.9890915014408417, iteration: 371603
loss: 1.059820294380188,grad_norm: 0.8887399988760043, iteration: 371604
loss: 0.9918780326843262,grad_norm: 0.7444353645681167, iteration: 371605
loss: 0.9960294961929321,grad_norm: 0.8893286081114878, iteration: 371606
loss: 1.0619641542434692,grad_norm: 0.9999999088803412, iteration: 371607
loss: 0.9784339070320129,grad_norm: 0.7071834063948774, iteration: 371608
loss: 1.0073521137237549,grad_norm: 0.8673838578852916, iteration: 371609
loss: 1.0010432004928589,grad_norm: 0.769889819744911, iteration: 371610
loss: 0.9852601289749146,grad_norm: 0.8707546637755448, iteration: 371611
loss: 0.992741584777832,grad_norm: 0.8822887379373456, iteration: 371612
loss: 0.9874076247215271,grad_norm: 0.9189627985657327, iteration: 371613
loss: 0.9665946960449219,grad_norm: 0.8120541280852384, iteration: 371614
loss: 1.0419127941131592,grad_norm: 0.9999993189191342, iteration: 371615
loss: 1.041080117225647,grad_norm: 0.85371908150697, iteration: 371616
loss: 1.1133909225463867,grad_norm: 0.9999992956442985, iteration: 371617
loss: 1.016703486442566,grad_norm: 0.9026102491825335, iteration: 371618
loss: 1.0155783891677856,grad_norm: 0.7963052833477238, iteration: 371619
loss: 1.0049247741699219,grad_norm: 0.8758034114379019, iteration: 371620
loss: 0.9929473996162415,grad_norm: 0.8913503289422408, iteration: 371621
loss: 0.998458981513977,grad_norm: 0.931065035875947, iteration: 371622
loss: 0.9692906737327576,grad_norm: 0.8018763491108909, iteration: 371623
loss: 1.0689016580581665,grad_norm: 0.9999991072575611, iteration: 371624
loss: 0.9766056537628174,grad_norm: 0.906946646730462, iteration: 371625
loss: 1.0175838470458984,grad_norm: 0.7850206034171924, iteration: 371626
loss: 1.0152952671051025,grad_norm: 0.8370873942362812, iteration: 371627
loss: 1.0272438526153564,grad_norm: 0.9999991498338279, iteration: 371628
loss: 1.231624722480774,grad_norm: 0.999999869764118, iteration: 371629
loss: 0.9996669292449951,grad_norm: 0.8780531064748255, iteration: 371630
loss: 1.059098482131958,grad_norm: 0.9999994550442934, iteration: 371631
loss: 1.0551592111587524,grad_norm: 0.7426892493707622, iteration: 371632
loss: 1.0498974323272705,grad_norm: 0.9999990729122598, iteration: 371633
loss: 0.9801292419433594,grad_norm: 0.9478469567025375, iteration: 371634
loss: 1.010487675666809,grad_norm: 0.6903540404204047, iteration: 371635
loss: 1.0026150941848755,grad_norm: 0.7783682001289604, iteration: 371636
loss: 1.022178053855896,grad_norm: 0.9999990308632328, iteration: 371637
loss: 1.0037697553634644,grad_norm: 0.9666276042816804, iteration: 371638
loss: 1.0006539821624756,grad_norm: 0.9499938617552501, iteration: 371639
loss: 1.0936449766159058,grad_norm: 0.9774839258512819, iteration: 371640
loss: 0.961601734161377,grad_norm: 0.9170538185983612, iteration: 371641
loss: 1.0119885206222534,grad_norm: 0.7855691355156941, iteration: 371642
loss: 0.9709868431091309,grad_norm: 0.8214382047245288, iteration: 371643
loss: 1.0226608514785767,grad_norm: 0.8869028327142177, iteration: 371644
loss: 1.0329571962356567,grad_norm: 0.8318705406233021, iteration: 371645
loss: 0.9959343671798706,grad_norm: 0.999999688245329, iteration: 371646
loss: 0.9913708567619324,grad_norm: 0.838862052900919, iteration: 371647
loss: 1.0460829734802246,grad_norm: 0.7683393110182435, iteration: 371648
loss: 1.024479866027832,grad_norm: 0.7472217342477044, iteration: 371649
loss: 1.0459766387939453,grad_norm: 0.9999990428066529, iteration: 371650
loss: 1.013176679611206,grad_norm: 0.7795400415409708, iteration: 371651
loss: 1.0469439029693604,grad_norm: 0.9999991812443361, iteration: 371652
loss: 1.0318163633346558,grad_norm: 0.9246001368703836, iteration: 371653
loss: 1.0000642538070679,grad_norm: 0.9999991530518688, iteration: 371654
loss: 0.9987722039222717,grad_norm: 0.7625988669799567, iteration: 371655
loss: 0.9707576632499695,grad_norm: 0.8086901752425778, iteration: 371656
loss: 1.0100387334823608,grad_norm: 0.8248121013788176, iteration: 371657
loss: 1.0208913087844849,grad_norm: 0.8219443308032643, iteration: 371658
loss: 1.0565440654754639,grad_norm: 0.7867577223153794, iteration: 371659
loss: 1.0019114017486572,grad_norm: 0.9999991548697156, iteration: 371660
loss: 1.0090303421020508,grad_norm: 0.8700239246464646, iteration: 371661
loss: 1.0835217237472534,grad_norm: 0.9999995890407919, iteration: 371662
loss: 1.0075287818908691,grad_norm: 0.7687256351422432, iteration: 371663
loss: 1.0200238227844238,grad_norm: 0.9283029538564302, iteration: 371664
loss: 1.0302435159683228,grad_norm: 0.7278532234867922, iteration: 371665
loss: 1.2187577486038208,grad_norm: 0.9999991198032424, iteration: 371666
loss: 1.003356695175171,grad_norm: 0.7090014125322203, iteration: 371667
loss: 1.0233628749847412,grad_norm: 0.9029793878361868, iteration: 371668
loss: 0.9692814350128174,grad_norm: 0.7941986451321001, iteration: 371669
loss: 1.0011568069458008,grad_norm: 0.8631214581438348, iteration: 371670
loss: 1.0066624879837036,grad_norm: 0.792084942356244, iteration: 371671
loss: 0.9905073046684265,grad_norm: 0.9533683151578874, iteration: 371672
loss: 1.0085208415985107,grad_norm: 0.7352131002840996, iteration: 371673
loss: 1.1027638912200928,grad_norm: 0.9999999748493117, iteration: 371674
loss: 0.974754810333252,grad_norm: 0.8993531558718711, iteration: 371675
loss: 1.052443265914917,grad_norm: 0.9999991394704122, iteration: 371676
loss: 1.0229805707931519,grad_norm: 0.9664379187172023, iteration: 371677
loss: 1.0194061994552612,grad_norm: 0.7763273226808756, iteration: 371678
loss: 0.969967782497406,grad_norm: 0.7508564140437548, iteration: 371679
loss: 1.0210617780685425,grad_norm: 0.7610187677899013, iteration: 371680
loss: 1.0055623054504395,grad_norm: 0.8695127940010522, iteration: 371681
loss: 0.9660833477973938,grad_norm: 0.7229541107642861, iteration: 371682
loss: 1.003846287727356,grad_norm: 0.7468636243997667, iteration: 371683
loss: 1.0258619785308838,grad_norm: 0.8399225938496551, iteration: 371684
loss: 1.0020164251327515,grad_norm: 0.9755029961867967, iteration: 371685
loss: 1.026721715927124,grad_norm: 0.7724081837849214, iteration: 371686
loss: 0.9769080877304077,grad_norm: 0.9999990751990029, iteration: 371687
loss: 0.9939719438552856,grad_norm: 0.9999998388600018, iteration: 371688
loss: 0.989349365234375,grad_norm: 0.7852777389564155, iteration: 371689
loss: 1.0253781080245972,grad_norm: 0.99999909895171, iteration: 371690
loss: 1.0716862678527832,grad_norm: 0.9999997238231937, iteration: 371691
loss: 0.9940459728240967,grad_norm: 0.8322316566864741, iteration: 371692
loss: 1.0340290069580078,grad_norm: 0.8136166163264623, iteration: 371693
loss: 0.9758497476577759,grad_norm: 0.9287624842394057, iteration: 371694
loss: 1.0359666347503662,grad_norm: 0.9999995806165144, iteration: 371695
loss: 1.0807280540466309,grad_norm: 0.9999992146053037, iteration: 371696
loss: 1.0436033010482788,grad_norm: 0.7518506951124267, iteration: 371697
loss: 0.9902479648590088,grad_norm: 0.7687240897701073, iteration: 371698
loss: 1.0019937753677368,grad_norm: 0.7043054264724804, iteration: 371699
loss: 1.0241810083389282,grad_norm: 0.8109114414701347, iteration: 371700
loss: 1.001768946647644,grad_norm: 0.807184952621955, iteration: 371701
loss: 0.9799045324325562,grad_norm: 0.8105290367209422, iteration: 371702
loss: 0.989890456199646,grad_norm: 0.8367526744519003, iteration: 371703
loss: 1.02565336227417,grad_norm: 0.9146006777024406, iteration: 371704
loss: 1.0514298677444458,grad_norm: 0.8649502446212012, iteration: 371705
loss: 1.0607709884643555,grad_norm: 0.9999989955631751, iteration: 371706
loss: 1.0180870294570923,grad_norm: 0.8862507334806093, iteration: 371707
loss: 0.9957107305526733,grad_norm: 0.7942649209783434, iteration: 371708
loss: 1.0146675109863281,grad_norm: 0.7112855703973257, iteration: 371709
loss: 1.208074688911438,grad_norm: 0.9999989710645846, iteration: 371710
loss: 0.9967113137245178,grad_norm: 0.9328919456728328, iteration: 371711
loss: 1.0188874006271362,grad_norm: 0.9999998190766806, iteration: 371712
loss: 1.0004854202270508,grad_norm: 0.8867914008365438, iteration: 371713
loss: 1.0601239204406738,grad_norm: 0.9999991047065458, iteration: 371714
loss: 0.9817742109298706,grad_norm: 0.7179156662250423, iteration: 371715
loss: 1.040346622467041,grad_norm: 0.8732829014797925, iteration: 371716
loss: 1.0751539468765259,grad_norm: 0.9999992642765647, iteration: 371717
loss: 1.180188775062561,grad_norm: 0.9999990955281851, iteration: 371718
loss: 1.0141558647155762,grad_norm: 0.7462222329623474, iteration: 371719
loss: 1.0547994375228882,grad_norm: 0.6628290951160406, iteration: 371720
loss: 1.0329828262329102,grad_norm: 0.7771874009393042, iteration: 371721
loss: 1.02675461769104,grad_norm: 0.8674159898567966, iteration: 371722
loss: 0.9762641787528992,grad_norm: 0.7972977667064838, iteration: 371723
loss: 1.0161306858062744,grad_norm: 0.9999994996106562, iteration: 371724
loss: 1.0920741558074951,grad_norm: 0.9999996604171345, iteration: 371725
loss: 0.9552409052848816,grad_norm: 0.9007724913218526, iteration: 371726
loss: 1.0172512531280518,grad_norm: 0.7548658102614677, iteration: 371727
loss: 1.0198729038238525,grad_norm: 0.9999991220094723, iteration: 371728
loss: 1.0597538948059082,grad_norm: 0.8128222856941301, iteration: 371729
loss: 1.0105730295181274,grad_norm: 0.9999997846426147, iteration: 371730
loss: 1.1043719053268433,grad_norm: 0.958091075568546, iteration: 371731
loss: 1.081094741821289,grad_norm: 0.7741448990567198, iteration: 371732
loss: 0.9906392097473145,grad_norm: 0.7437126959456326, iteration: 371733
loss: 0.9741060733795166,grad_norm: 0.6923105980454389, iteration: 371734
loss: 0.9669519066810608,grad_norm: 0.846983739312236, iteration: 371735
loss: 0.9874563217163086,grad_norm: 0.9175540267400611, iteration: 371736
loss: 0.9846013784408569,grad_norm: 0.966485962172998, iteration: 371737
loss: 1.0170989036560059,grad_norm: 0.871484264469865, iteration: 371738
loss: 1.0181254148483276,grad_norm: 0.8033434780273103, iteration: 371739
loss: 1.0757461786270142,grad_norm: 0.7557597218877539, iteration: 371740
loss: 1.0437668561935425,grad_norm: 0.9999990023365952, iteration: 371741
loss: 1.0395591259002686,grad_norm: 0.9999993524954858, iteration: 371742
loss: 0.9737818241119385,grad_norm: 0.7898672393369011, iteration: 371743
loss: 1.021979808807373,grad_norm: 0.798383823183492, iteration: 371744
loss: 1.007796287536621,grad_norm: 0.9999995951050336, iteration: 371745
loss: 1.0359340906143188,grad_norm: 0.7759674244832281, iteration: 371746
loss: 1.0256965160369873,grad_norm: 0.9999997866017253, iteration: 371747
loss: 1.0009301900863647,grad_norm: 0.9131056923428124, iteration: 371748
loss: 1.0048125982284546,grad_norm: 0.7178512985252925, iteration: 371749
loss: 1.0119625329971313,grad_norm: 0.9999995075081474, iteration: 371750
loss: 1.034415364265442,grad_norm: 0.7109551690328099, iteration: 371751
loss: 0.9778235554695129,grad_norm: 0.8106993379823807, iteration: 371752
loss: 1.0673036575317383,grad_norm: 0.8440453037552667, iteration: 371753
loss: 1.035597562789917,grad_norm: 0.9413505375609468, iteration: 371754
loss: 1.007430911064148,grad_norm: 0.9999991186878154, iteration: 371755
loss: 0.950082004070282,grad_norm: 0.9290597824883471, iteration: 371756
loss: 0.9873878359794617,grad_norm: 0.9554688078924104, iteration: 371757
loss: 1.041719913482666,grad_norm: 0.999999792447219, iteration: 371758
loss: 1.027108073234558,grad_norm: 0.9491887120695219, iteration: 371759
loss: 0.9960597157478333,grad_norm: 0.7210906788878024, iteration: 371760
loss: 0.9948743581771851,grad_norm: 0.9395172552547616, iteration: 371761
loss: 0.9751967787742615,grad_norm: 0.9512123393548767, iteration: 371762
loss: 1.05745267868042,grad_norm: 0.9252988747301515, iteration: 371763
loss: 1.1118614673614502,grad_norm: 0.7678122207102065, iteration: 371764
loss: 1.0201135873794556,grad_norm: 0.9999993557460258, iteration: 371765
loss: 1.0130139589309692,grad_norm: 0.7992208717743051, iteration: 371766
loss: 0.9883294701576233,grad_norm: 0.8449449942719562, iteration: 371767
loss: 0.9603602886199951,grad_norm: 0.8536346467806101, iteration: 371768
loss: 1.0364466905593872,grad_norm: 0.7755572061464752, iteration: 371769
loss: 1.0143696069717407,grad_norm: 0.7715895654601808, iteration: 371770
loss: 0.9955049157142639,grad_norm: 0.7894770526417361, iteration: 371771
loss: 0.9731758832931519,grad_norm: 0.9124804862139944, iteration: 371772
loss: 0.9801567792892456,grad_norm: 0.8351592416973466, iteration: 371773
loss: 1.0087001323699951,grad_norm: 0.7703393971435811, iteration: 371774
loss: 1.0479135513305664,grad_norm: 0.9367334105918027, iteration: 371775
loss: 1.0139663219451904,grad_norm: 0.9999995644731723, iteration: 371776
loss: 0.9852707386016846,grad_norm: 0.6928443819024676, iteration: 371777
loss: 1.0088387727737427,grad_norm: 0.7769665726643163, iteration: 371778
loss: 1.000859260559082,grad_norm: 0.99999968610062, iteration: 371779
loss: 1.0021969079971313,grad_norm: 0.8754406148089148, iteration: 371780
loss: 1.015034794807434,grad_norm: 0.8670086266501306, iteration: 371781
loss: 0.9745532870292664,grad_norm: 0.9269146762417914, iteration: 371782
loss: 1.035818099975586,grad_norm: 0.8449760213002137, iteration: 371783
loss: 1.0279841423034668,grad_norm: 0.7144388642766947, iteration: 371784
loss: 1.0221308469772339,grad_norm: 0.9545877843146555, iteration: 371785
loss: 0.9896180033683777,grad_norm: 0.7485055785024044, iteration: 371786
loss: 1.036634087562561,grad_norm: 0.9999990017089517, iteration: 371787
loss: 1.00841224193573,grad_norm: 0.8131338207376205, iteration: 371788
loss: 1.0301560163497925,grad_norm: 0.7256143181405024, iteration: 371789
loss: 1.0779953002929688,grad_norm: 0.7869234773641594, iteration: 371790
loss: 1.011045217514038,grad_norm: 0.7997626911176751, iteration: 371791
loss: 0.9950269460678101,grad_norm: 0.9367066934362671, iteration: 371792
loss: 1.0546289682388306,grad_norm: 0.9449286049136562, iteration: 371793
loss: 1.0181621313095093,grad_norm: 0.9999994145064225, iteration: 371794
loss: 1.0372428894042969,grad_norm: 0.7274555556166604, iteration: 371795
loss: 1.0101945400238037,grad_norm: 0.8336567087872871, iteration: 371796
loss: 1.0127533674240112,grad_norm: 0.7416155543226111, iteration: 371797
loss: 0.9979952573776245,grad_norm: 0.8955437078600118, iteration: 371798
loss: 1.0430015325546265,grad_norm: 0.9999993282854316, iteration: 371799
loss: 0.9967902898788452,grad_norm: 0.8199056308013865, iteration: 371800
loss: 0.9859061241149902,grad_norm: 0.8954029816317802, iteration: 371801
loss: 0.9994400143623352,grad_norm: 0.6754050440277736, iteration: 371802
loss: 1.0907710790634155,grad_norm: 0.999999213593552, iteration: 371803
loss: 1.0198287963867188,grad_norm: 0.9999999550690813, iteration: 371804
loss: 0.9914381504058838,grad_norm: 0.7739314634685316, iteration: 371805
loss: 0.9944007992744446,grad_norm: 0.721805511826977, iteration: 371806
loss: 0.9827203750610352,grad_norm: 0.7613309733602006, iteration: 371807
loss: 0.9954774975776672,grad_norm: 0.9999996265848509, iteration: 371808
loss: 1.0390046834945679,grad_norm: 0.9999992078282506, iteration: 371809
loss: 1.0666810274124146,grad_norm: 0.9046665612370475, iteration: 371810
loss: 1.0109285116195679,grad_norm: 0.9012997027681826, iteration: 371811
loss: 0.9602113962173462,grad_norm: 0.8874252460928307, iteration: 371812
loss: 0.9629195332527161,grad_norm: 0.9999993361532199, iteration: 371813
loss: 1.0141819715499878,grad_norm: 0.7151185159161141, iteration: 371814
loss: 0.9730510115623474,grad_norm: 0.664123720754235, iteration: 371815
loss: 1.0278619527816772,grad_norm: 0.8442688947251952, iteration: 371816
loss: 1.0400866270065308,grad_norm: 0.9999994132638832, iteration: 371817
loss: 1.0109001398086548,grad_norm: 0.7738554986299228, iteration: 371818
loss: 0.9980089068412781,grad_norm: 0.9999992027818805, iteration: 371819
loss: 1.0409986972808838,grad_norm: 0.9999990979448619, iteration: 371820
loss: 0.9669210314750671,grad_norm: 0.7981642096827667, iteration: 371821
loss: 1.0155524015426636,grad_norm: 0.9999992216454697, iteration: 371822
loss: 1.0316660404205322,grad_norm: 0.9999999292361466, iteration: 371823
loss: 1.0083867311477661,grad_norm: 0.9999990928327347, iteration: 371824
loss: 1.0272533893585205,grad_norm: 0.8119668148892495, iteration: 371825
loss: 1.054321050643921,grad_norm: 0.8110980147255442, iteration: 371826
loss: 0.9920725226402283,grad_norm: 0.9999990196748156, iteration: 371827
loss: 0.9540212154388428,grad_norm: 0.7739083628114174, iteration: 371828
loss: 1.000455379486084,grad_norm: 0.8974592209929012, iteration: 371829
loss: 1.0207637548446655,grad_norm: 0.8204880153379178, iteration: 371830
loss: 1.1943174600601196,grad_norm: 0.9999998175773299, iteration: 371831
loss: 1.0309057235717773,grad_norm: 0.7923092758423281, iteration: 371832
loss: 1.0674688816070557,grad_norm: 0.9999992192943048, iteration: 371833
loss: 1.0227676630020142,grad_norm: 0.8505203699894176, iteration: 371834
loss: 1.0401489734649658,grad_norm: 0.9741000138403016, iteration: 371835
loss: 0.9631398916244507,grad_norm: 0.9576454450224406, iteration: 371836
loss: 0.9746893048286438,grad_norm: 0.8084343972253756, iteration: 371837
loss: 1.0423665046691895,grad_norm: 0.9999990135106714, iteration: 371838
loss: 1.0101593732833862,grad_norm: 0.8475151345152495, iteration: 371839
loss: 1.0079435110092163,grad_norm: 0.8172593835913468, iteration: 371840
loss: 1.0212446451187134,grad_norm: 0.974804828089269, iteration: 371841
loss: 1.0017826557159424,grad_norm: 0.9191732064078557, iteration: 371842
loss: 1.0686653852462769,grad_norm: 0.9999994613107162, iteration: 371843
loss: 1.0023794174194336,grad_norm: 0.6607131822362708, iteration: 371844
loss: 0.9780164957046509,grad_norm: 0.9999993534258039, iteration: 371845
loss: 1.029274344444275,grad_norm: 0.8015220659760625, iteration: 371846
loss: 0.9705312252044678,grad_norm: 0.740307569184959, iteration: 371847
loss: 0.9797569513320923,grad_norm: 0.9063968850740848, iteration: 371848
loss: 0.9940343499183655,grad_norm: 0.853518296576799, iteration: 371849
loss: 1.056323766708374,grad_norm: 0.8707429765437562, iteration: 371850
loss: 0.9604375958442688,grad_norm: 0.7208095943479935, iteration: 371851
loss: 0.9813625812530518,grad_norm: 0.7112947903159935, iteration: 371852
loss: 1.0693029165267944,grad_norm: 0.9999998151151893, iteration: 371853
loss: 0.9801559448242188,grad_norm: 0.7970534936838786, iteration: 371854
loss: 1.0231738090515137,grad_norm: 0.7516322653434866, iteration: 371855
loss: 0.9540321826934814,grad_norm: 0.853589648499068, iteration: 371856
loss: 0.9696577191352844,grad_norm: 0.8628555529970476, iteration: 371857
loss: 1.02717924118042,grad_norm: 0.8091750054438497, iteration: 371858
loss: 1.0525507926940918,grad_norm: 0.999999436561135, iteration: 371859
loss: 0.9723207354545593,grad_norm: 0.7560138518188827, iteration: 371860
loss: 1.063562035560608,grad_norm: 0.9474947380695026, iteration: 371861
loss: 0.9844983816146851,grad_norm: 0.6548651425640034, iteration: 371862
loss: 1.0585696697235107,grad_norm: 0.9999996592849545, iteration: 371863
loss: 1.0184704065322876,grad_norm: 0.9999990837444392, iteration: 371864
loss: 1.0168343782424927,grad_norm: 1.000000133695048, iteration: 371865
loss: 1.0459623336791992,grad_norm: 0.7391787423634316, iteration: 371866
loss: 1.0154602527618408,grad_norm: 0.999999208423792, iteration: 371867
loss: 1.003049373626709,grad_norm: 0.7719893874557265, iteration: 371868
loss: 1.0333415269851685,grad_norm: 0.8225666318061725, iteration: 371869
loss: 0.980377197265625,grad_norm: 0.8108535611517554, iteration: 371870
loss: 1.0304768085479736,grad_norm: 0.9459695626560648, iteration: 371871
loss: 1.0092341899871826,grad_norm: 0.8712115722625894, iteration: 371872
loss: 0.9503486156463623,grad_norm: 0.7804502220780067, iteration: 371873
loss: 1.0307526588439941,grad_norm: 0.7800578215819732, iteration: 371874
loss: 0.9878523349761963,grad_norm: 0.8618747646275231, iteration: 371875
loss: 0.9810863733291626,grad_norm: 0.9999990762203843, iteration: 371876
loss: 0.9834123849868774,grad_norm: 0.7330311304937701, iteration: 371877
loss: 1.0284632444381714,grad_norm: 0.7257092816592328, iteration: 371878
loss: 1.0020921230316162,grad_norm: 0.7314765425995947, iteration: 371879
loss: 0.9913244247436523,grad_norm: 0.7832503648277266, iteration: 371880
loss: 1.0411617755889893,grad_norm: 0.7716663741062139, iteration: 371881
loss: 1.0431175231933594,grad_norm: 0.965928438397236, iteration: 371882
loss: 1.0001968145370483,grad_norm: 0.879097243032232, iteration: 371883
loss: 0.9931830763816833,grad_norm: 0.7901417897628775, iteration: 371884
loss: 1.040261149406433,grad_norm: 0.836508582440533, iteration: 371885
loss: 1.044026255607605,grad_norm: 1.000000113906053, iteration: 371886
loss: 1.0082298517227173,grad_norm: 0.9999991789297618, iteration: 371887
loss: 1.023592472076416,grad_norm: 0.8874159414327166, iteration: 371888
loss: 1.022792935371399,grad_norm: 0.7984591700184042, iteration: 371889
loss: 0.9953060150146484,grad_norm: 0.6297781283066521, iteration: 371890
loss: 1.0100942850112915,grad_norm: 0.8647933031908235, iteration: 371891
loss: 0.9963581562042236,grad_norm: 0.7473914353233109, iteration: 371892
loss: 1.029646873474121,grad_norm: 0.9381934846708165, iteration: 371893
loss: 0.9707919359207153,grad_norm: 0.9999990545695436, iteration: 371894
loss: 0.9701586365699768,grad_norm: 0.9074788056925657, iteration: 371895
loss: 1.0413200855255127,grad_norm: 0.8303374104980453, iteration: 371896
loss: 0.9649624228477478,grad_norm: 0.7863997227820547, iteration: 371897
loss: 1.0068004131317139,grad_norm: 0.8806245987820354, iteration: 371898
loss: 1.036278486251831,grad_norm: 0.9999993340988115, iteration: 371899
loss: 0.9734983444213867,grad_norm: 0.8368857684672739, iteration: 371900
loss: 1.0131049156188965,grad_norm: 0.9999996452425947, iteration: 371901
loss: 1.019607663154602,grad_norm: 0.9999993545856952, iteration: 371902
loss: 0.9619344472885132,grad_norm: 0.7773496681446095, iteration: 371903
loss: 1.0741164684295654,grad_norm: 0.9251892058409986, iteration: 371904
loss: 1.0136867761611938,grad_norm: 0.7503842032630434, iteration: 371905
loss: 1.0070890188217163,grad_norm: 0.7553716728087717, iteration: 371906
loss: 1.0003689527511597,grad_norm: 0.7414952053879187, iteration: 371907
loss: 1.0539250373840332,grad_norm: 0.9999998759632086, iteration: 371908
loss: 1.0001837015151978,grad_norm: 0.9999999444858635, iteration: 371909
loss: 1.157647967338562,grad_norm: 0.8375590987131111, iteration: 371910
loss: 1.0308575630187988,grad_norm: 0.7935964693873844, iteration: 371911
loss: 0.9849115610122681,grad_norm: 0.9887753875434007, iteration: 371912
loss: 0.9984706044197083,grad_norm: 0.9587367043880554, iteration: 371913
loss: 1.0402964353561401,grad_norm: 0.9999997776542847, iteration: 371914
loss: 0.999815046787262,grad_norm: 0.8752148557753042, iteration: 371915
loss: 0.9798740148544312,grad_norm: 0.7926999760628742, iteration: 371916
loss: 0.9666383862495422,grad_norm: 0.9603228100011825, iteration: 371917
loss: 0.9534138441085815,grad_norm: 0.8765729164405419, iteration: 371918
loss: 1.0129987001419067,grad_norm: 0.9868404122262762, iteration: 371919
loss: 1.0028059482574463,grad_norm: 0.9999994226106366, iteration: 371920
loss: 1.0226706266403198,grad_norm: 0.9112513456378178, iteration: 371921
loss: 1.052732229232788,grad_norm: 0.8489985588424773, iteration: 371922
loss: 0.9937049746513367,grad_norm: 0.7860951693548617, iteration: 371923
loss: 1.104734182357788,grad_norm: 0.9999994168329365, iteration: 371924
loss: 0.969976007938385,grad_norm: 0.8539306983346583, iteration: 371925
loss: 0.9757599234580994,grad_norm: 0.7621819044099934, iteration: 371926
loss: 0.9848621487617493,grad_norm: 0.7325947684939548, iteration: 371927
loss: 0.9885213375091553,grad_norm: 0.9599914880439708, iteration: 371928
loss: 1.0202330350875854,grad_norm: 0.9999995312613299, iteration: 371929
loss: 1.027656078338623,grad_norm: 0.7778471782190864, iteration: 371930
loss: 0.9756856560707092,grad_norm: 0.7556931948806674, iteration: 371931
loss: 1.0085842609405518,grad_norm: 0.7428582457409789, iteration: 371932
loss: 0.9930291771888733,grad_norm: 0.7863116978624467, iteration: 371933
loss: 1.0127193927764893,grad_norm: 0.9999991962546848, iteration: 371934
loss: 0.9923223257064819,grad_norm: 0.6588225206916771, iteration: 371935
loss: 1.0095396041870117,grad_norm: 0.9481306315847438, iteration: 371936
loss: 1.0416500568389893,grad_norm: 0.9250526907084257, iteration: 371937
loss: 0.9691064357757568,grad_norm: 0.8409380563062591, iteration: 371938
loss: 0.9821445941925049,grad_norm: 0.7711987670525712, iteration: 371939
loss: 0.9836500287055969,grad_norm: 0.7719881677295427, iteration: 371940
loss: 0.9698906540870667,grad_norm: 0.8556331453331667, iteration: 371941
loss: 0.9522811770439148,grad_norm: 0.9737164159086652, iteration: 371942
loss: 0.9929332733154297,grad_norm: 0.749074260202373, iteration: 371943
loss: 1.0735585689544678,grad_norm: 0.6932626260440183, iteration: 371944
loss: 0.9604894518852234,grad_norm: 0.8320586479082686, iteration: 371945
loss: 1.0201443433761597,grad_norm: 0.9999995202358203, iteration: 371946
loss: 0.9910709261894226,grad_norm: 0.9519863300915495, iteration: 371947
loss: 0.9972068071365356,grad_norm: 0.7836828168503808, iteration: 371948
loss: 1.0205039978027344,grad_norm: 0.7563129931102098, iteration: 371949
loss: 0.9725865721702576,grad_norm: 0.7356280166108575, iteration: 371950
loss: 0.9853686094284058,grad_norm: 0.802321081277799, iteration: 371951
loss: 0.9960189461708069,grad_norm: 0.9999990982668274, iteration: 371952
loss: 1.0539718866348267,grad_norm: 0.7817535967299131, iteration: 371953
loss: 0.9874044060707092,grad_norm: 0.9214960820053749, iteration: 371954
loss: 1.0039265155792236,grad_norm: 0.8124956683433603, iteration: 371955
loss: 0.9795653223991394,grad_norm: 0.8088903621250209, iteration: 371956
loss: 1.032541036605835,grad_norm: 0.8413755139564685, iteration: 371957
loss: 0.9475023746490479,grad_norm: 0.8469422982575, iteration: 371958
loss: 1.0095672607421875,grad_norm: 0.7207960009607123, iteration: 371959
loss: 1.015907883644104,grad_norm: 0.9999992837155394, iteration: 371960
loss: 1.0065230131149292,grad_norm: 0.8707392410975032, iteration: 371961
loss: 0.9816707968711853,grad_norm: 0.8448013376105913, iteration: 371962
loss: 1.0110855102539062,grad_norm: 0.8655017230462583, iteration: 371963
loss: 0.9669629335403442,grad_norm: 0.8029113477571289, iteration: 371964
loss: 1.0288312435150146,grad_norm: 0.754893201014848, iteration: 371965
loss: 1.0338579416275024,grad_norm: 0.9212896185202613, iteration: 371966
loss: 1.0431921482086182,grad_norm: 0.7024650166810144, iteration: 371967
loss: 0.9793428778648376,grad_norm: 0.8884761503035815, iteration: 371968
loss: 1.0343822240829468,grad_norm: 0.9999990732265411, iteration: 371969
loss: 0.9703471660614014,grad_norm: 0.9999992341016843, iteration: 371970
loss: 1.006845474243164,grad_norm: 0.798291432563828, iteration: 371971
loss: 1.0175684690475464,grad_norm: 0.8301268350950671, iteration: 371972
loss: 1.031654953956604,grad_norm: 0.919720399533046, iteration: 371973
loss: 1.0040642023086548,grad_norm: 0.8909831341020513, iteration: 371974
loss: 0.9949542284011841,grad_norm: 0.863497930127096, iteration: 371975
loss: 1.0391254425048828,grad_norm: 0.9999991596998595, iteration: 371976
loss: 1.024141550064087,grad_norm: 0.780043212523536, iteration: 371977
loss: 1.0703210830688477,grad_norm: 0.9999996431536694, iteration: 371978
loss: 0.9715552926063538,grad_norm: 0.8189406540422451, iteration: 371979
loss: 0.9918174743652344,grad_norm: 0.7285014532176078, iteration: 371980
loss: 0.9729442000389099,grad_norm: 0.8768567932068715, iteration: 371981
loss: 0.9910118579864502,grad_norm: 0.9377539856317028, iteration: 371982
loss: 1.009047031402588,grad_norm: 0.8569541758366007, iteration: 371983
loss: 0.9797336459159851,grad_norm: 0.9999992139199646, iteration: 371984
loss: 0.954400897026062,grad_norm: 0.8719604656661366, iteration: 371985
loss: 1.0317350625991821,grad_norm: 0.856760211068169, iteration: 371986
loss: 0.9807433485984802,grad_norm: 0.8191569102501436, iteration: 371987
loss: 0.992484450340271,grad_norm: 0.7765884984179355, iteration: 371988
loss: 1.0075246095657349,grad_norm: 0.9999990402668558, iteration: 371989
loss: 0.9468479156494141,grad_norm: 0.7746495926633628, iteration: 371990
loss: 0.9976198673248291,grad_norm: 0.7614221821451045, iteration: 371991
loss: 0.9786769151687622,grad_norm: 0.6950429054564131, iteration: 371992
loss: 0.9972460269927979,grad_norm: 0.7157563771959141, iteration: 371993
loss: 0.9794562458992004,grad_norm: 0.8332652127912319, iteration: 371994
loss: 0.9874525666236877,grad_norm: 0.7378982632524581, iteration: 371995
loss: 0.9659947156906128,grad_norm: 0.8070362077285917, iteration: 371996
loss: 1.0155364274978638,grad_norm: 0.9999991665970849, iteration: 371997
loss: 1.004380226135254,grad_norm: 0.8331111770717798, iteration: 371998
loss: 1.016505479812622,grad_norm: 0.7086390434017168, iteration: 371999
loss: 1.0011416673660278,grad_norm: 0.8409005867421593, iteration: 372000
loss: 1.0081524848937988,grad_norm: 0.9418781414719104, iteration: 372001
loss: 1.0277788639068604,grad_norm: 0.9999991537095885, iteration: 372002
loss: 0.956176221370697,grad_norm: 0.8742739493341785, iteration: 372003
loss: 1.002884864807129,grad_norm: 0.7142349119541969, iteration: 372004
loss: 1.0036133527755737,grad_norm: 0.8130637454555636, iteration: 372005
loss: 0.9977768063545227,grad_norm: 0.9999991006754221, iteration: 372006
loss: 0.9683379530906677,grad_norm: 0.9046011157410879, iteration: 372007
loss: 1.0104655027389526,grad_norm: 0.7566640058193939, iteration: 372008
loss: 0.9679775238037109,grad_norm: 0.7843860000329652, iteration: 372009
loss: 1.0126304626464844,grad_norm: 0.8647079149304967, iteration: 372010
loss: 0.9984757900238037,grad_norm: 0.7567477400577869, iteration: 372011
loss: 0.9739676117897034,grad_norm: 0.8309630055636276, iteration: 372012
loss: 0.9944000244140625,grad_norm: 0.8763583566869796, iteration: 372013
loss: 1.0393970012664795,grad_norm: 0.8943040301232148, iteration: 372014
loss: 0.9706761240959167,grad_norm: 0.7756275670702448, iteration: 372015
loss: 0.9852975606918335,grad_norm: 0.8140661817424291, iteration: 372016
loss: 0.9818679094314575,grad_norm: 0.9154333174735366, iteration: 372017
loss: 1.0352410078048706,grad_norm: 0.926933997587151, iteration: 372018
loss: 1.0076395273208618,grad_norm: 0.9999997771840352, iteration: 372019
loss: 1.0066968202590942,grad_norm: 0.7974706211962539, iteration: 372020
loss: 1.0343965291976929,grad_norm: 0.6455975327313349, iteration: 372021
loss: 1.0725677013397217,grad_norm: 0.776398077019334, iteration: 372022
loss: 1.00288987159729,grad_norm: 0.7998635490209014, iteration: 372023
loss: 0.9867480397224426,grad_norm: 0.7921485423725864, iteration: 372024
loss: 0.9652553796768188,grad_norm: 0.9999991343522084, iteration: 372025
loss: 0.9810243844985962,grad_norm: 0.9370879920440353, iteration: 372026
loss: 1.0438002347946167,grad_norm: 0.9999994406819367, iteration: 372027
loss: 0.9540067911148071,grad_norm: 0.8396353225194076, iteration: 372028
loss: 1.027721643447876,grad_norm: 0.8978631680020672, iteration: 372029
loss: 1.0138638019561768,grad_norm: 0.7725761234748142, iteration: 372030
loss: 1.0050935745239258,grad_norm: 0.9999991195992468, iteration: 372031
loss: 0.999811053276062,grad_norm: 0.8451497061849894, iteration: 372032
loss: 0.9560742378234863,grad_norm: 0.8651944519191582, iteration: 372033
loss: 0.9904239773750305,grad_norm: 0.9065241978717619, iteration: 372034
loss: 0.9826939702033997,grad_norm: 0.9579246858070329, iteration: 372035
loss: 1.0227380990982056,grad_norm: 0.7257120960558059, iteration: 372036
loss: 0.975654125213623,grad_norm: 0.8255120846668854, iteration: 372037
loss: 0.981136679649353,grad_norm: 0.8872700089566783, iteration: 372038
loss: 1.0819956064224243,grad_norm: 0.8252181970267667, iteration: 372039
loss: 0.9866406321525574,grad_norm: 0.7653982766023574, iteration: 372040
loss: 1.0154837369918823,grad_norm: 0.7064885864356079, iteration: 372041
loss: 1.0387980937957764,grad_norm: 0.9999990535468245, iteration: 372042
loss: 0.9712076187133789,grad_norm: 0.7530941953654194, iteration: 372043
loss: 0.9982829689979553,grad_norm: 0.6961275018319767, iteration: 372044
loss: 0.9950597286224365,grad_norm: 0.8462759883181702, iteration: 372045
loss: 1.0216678380966187,grad_norm: 0.8131842896633018, iteration: 372046
loss: 0.9712709784507751,grad_norm: 0.7870316019825465, iteration: 372047
loss: 1.0199666023254395,grad_norm: 0.9999990065695016, iteration: 372048
loss: 0.9745173454284668,grad_norm: 0.9999992940707463, iteration: 372049
loss: 1.060353398323059,grad_norm: 0.9999992557459906, iteration: 372050
loss: 1.0458052158355713,grad_norm: 0.9611595158075106, iteration: 372051
loss: 1.0491218566894531,grad_norm: 1.0000000456963931, iteration: 372052
loss: 0.9918596148490906,grad_norm: 0.9678936776157678, iteration: 372053
loss: 1.0473660230636597,grad_norm: 0.990514199464033, iteration: 372054
loss: 0.951532244682312,grad_norm: 0.9890814939864527, iteration: 372055
loss: 0.9972103834152222,grad_norm: 0.7987897058093004, iteration: 372056
loss: 0.9663323163986206,grad_norm: 0.7677799174290177, iteration: 372057
loss: 1.0525745153427124,grad_norm: 0.8293480712572833, iteration: 372058
loss: 0.9909512400627136,grad_norm: 0.714058860978383, iteration: 372059
loss: 1.0210026502609253,grad_norm: 0.7642066957838632, iteration: 372060
loss: 0.9729776382446289,grad_norm: 0.8086213539689814, iteration: 372061
loss: 0.999919593334198,grad_norm: 0.9725952542585646, iteration: 372062
loss: 0.9913579821586609,grad_norm: 0.7955712025865902, iteration: 372063
loss: 0.9880245327949524,grad_norm: 0.8868830162204496, iteration: 372064
loss: 0.9685132503509521,grad_norm: 0.9911896355093579, iteration: 372065
loss: 1.0398861169815063,grad_norm: 0.8010065675129633, iteration: 372066
loss: 0.9863059520721436,grad_norm: 0.9999994400884485, iteration: 372067
loss: 0.9898838996887207,grad_norm: 0.8766497356014633, iteration: 372068
loss: 1.0333452224731445,grad_norm: 0.9999991194642965, iteration: 372069
loss: 1.0011485815048218,grad_norm: 0.8879725226871327, iteration: 372070
loss: 0.9824571013450623,grad_norm: 0.9999993848972425, iteration: 372071
loss: 0.9933479428291321,grad_norm: 0.6682567898213019, iteration: 372072
loss: 1.0236378908157349,grad_norm: 0.9334439482332956, iteration: 372073
loss: 1.0104906558990479,grad_norm: 0.7613300626567999, iteration: 372074
loss: 0.9897034168243408,grad_norm: 0.9999991049971824, iteration: 372075
loss: 0.9874653816223145,grad_norm: 0.7975437768593978, iteration: 372076
loss: 1.0130304098129272,grad_norm: 0.9253267032592768, iteration: 372077
loss: 1.0346016883850098,grad_norm: 0.8844006104187369, iteration: 372078
loss: 1.0100046396255493,grad_norm: 0.8248056238486894, iteration: 372079
loss: 1.0106396675109863,grad_norm: 0.9026122472457742, iteration: 372080
loss: 0.9825090765953064,grad_norm: 0.9941620374346112, iteration: 372081
loss: 0.9302092790603638,grad_norm: 0.6860429998361723, iteration: 372082
loss: 0.9837354421615601,grad_norm: 0.7621428781440434, iteration: 372083
loss: 1.0382646322250366,grad_norm: 0.9158754767226922, iteration: 372084
loss: 1.022256851196289,grad_norm: 0.7315381278973899, iteration: 372085
loss: 0.9971659183502197,grad_norm: 0.764394341152586, iteration: 372086
loss: 1.0092500448226929,grad_norm: 0.9999993187753969, iteration: 372087
loss: 1.021517276763916,grad_norm: 0.7372196008402587, iteration: 372088
loss: 1.0121228694915771,grad_norm: 0.7332061968091651, iteration: 372089
loss: 1.0117557048797607,grad_norm: 0.7263965759440475, iteration: 372090
loss: 1.0076091289520264,grad_norm: 0.80888725297375, iteration: 372091
loss: 1.0061991214752197,grad_norm: 0.844416693526135, iteration: 372092
loss: 0.984498143196106,grad_norm: 0.7842259531770497, iteration: 372093
loss: 1.0605547428131104,grad_norm: 0.9999993261266987, iteration: 372094
loss: 1.0172268152236938,grad_norm: 0.7741561644676883, iteration: 372095
loss: 1.0210480690002441,grad_norm: 0.6967250704669116, iteration: 372096
loss: 0.9670608043670654,grad_norm: 0.8139626191212266, iteration: 372097
loss: 1.011674165725708,grad_norm: 0.6868950956391223, iteration: 372098
loss: 0.9878777861595154,grad_norm: 0.7767392309829096, iteration: 372099
loss: 1.047123908996582,grad_norm: 0.9420869125767095, iteration: 372100
loss: 1.0097600221633911,grad_norm: 0.9999995653516317, iteration: 372101
loss: 0.9826607704162598,grad_norm: 0.6581312016407069, iteration: 372102
loss: 0.9705110788345337,grad_norm: 0.7509961240568698, iteration: 372103
loss: 0.9660107493400574,grad_norm: 0.6965626457691514, iteration: 372104
loss: 1.0208786725997925,grad_norm: 0.7298427015852981, iteration: 372105
loss: 1.045693278312683,grad_norm: 0.8046093772495354, iteration: 372106
loss: 1.0290727615356445,grad_norm: 0.9272132002147342, iteration: 372107
loss: 0.9887979030609131,grad_norm: 0.8232042546924861, iteration: 372108
loss: 0.9960373640060425,grad_norm: 0.9999990204622108, iteration: 372109
loss: 1.0568311214447021,grad_norm: 0.9999995195685853, iteration: 372110
loss: 1.0433399677276611,grad_norm: 0.8360807992332825, iteration: 372111
loss: 1.0672626495361328,grad_norm: 0.8615254852449702, iteration: 372112
loss: 1.0188976526260376,grad_norm: 0.8008452203499795, iteration: 372113
loss: 1.0206270217895508,grad_norm: 0.8317374651619049, iteration: 372114
loss: 1.0326857566833496,grad_norm: 0.9281658288262322, iteration: 372115
loss: 0.9853585958480835,grad_norm: 0.7980329558700545, iteration: 372116
loss: 1.0138282775878906,grad_norm: 0.9999996943427704, iteration: 372117
loss: 1.0126346349716187,grad_norm: 0.8842970789906553, iteration: 372118
loss: 0.988106906414032,grad_norm: 0.9999995042967597, iteration: 372119
loss: 0.9570443630218506,grad_norm: 0.7621664890091089, iteration: 372120
loss: 0.9814295768737793,grad_norm: 0.7922680699207024, iteration: 372121
loss: 1.0034674406051636,grad_norm: 0.707909260212235, iteration: 372122
loss: 0.9835171103477478,grad_norm: 0.8019976259086602, iteration: 372123
loss: 0.9929918050765991,grad_norm: 0.8258265398108879, iteration: 372124
loss: 0.9889140129089355,grad_norm: 0.6415565534724724, iteration: 372125
loss: 0.9894090294837952,grad_norm: 0.8008047932779485, iteration: 372126
loss: 1.0064085721969604,grad_norm: 0.8285391608417023, iteration: 372127
loss: 1.0036048889160156,grad_norm: 0.9999989803648847, iteration: 372128
loss: 1.0348109006881714,grad_norm: 0.883708980066404, iteration: 372129
loss: 0.9730581045150757,grad_norm: 0.7987782968194396, iteration: 372130
loss: 0.9808037281036377,grad_norm: 0.7416315746198259, iteration: 372131
loss: 1.0112299919128418,grad_norm: 0.8488764999524995, iteration: 372132
loss: 0.9707083106040955,grad_norm: 0.8894642263494011, iteration: 372133
loss: 1.0081593990325928,grad_norm: 0.9172803327976167, iteration: 372134
loss: 1.0160444974899292,grad_norm: 0.8052725288649406, iteration: 372135
loss: 1.0040477514266968,grad_norm: 0.7086256931886402, iteration: 372136
loss: 0.9955584406852722,grad_norm: 0.8299433148740698, iteration: 372137
loss: 0.9826342463493347,grad_norm: 0.897298689776248, iteration: 372138
loss: 1.006020188331604,grad_norm: 0.9999994722189595, iteration: 372139
loss: 1.012144684791565,grad_norm: 0.9999990190780639, iteration: 372140
loss: 0.954092264175415,grad_norm: 0.7397944372307327, iteration: 372141
loss: 1.039483904838562,grad_norm: 0.704707989853336, iteration: 372142
loss: 0.9796621799468994,grad_norm: 0.9585366285231262, iteration: 372143
loss: 0.9827278256416321,grad_norm: 0.8970907471570085, iteration: 372144
loss: 1.1245688199996948,grad_norm: 0.9999996649823129, iteration: 372145
loss: 1.0484150648117065,grad_norm: 0.9356998405164869, iteration: 372146
loss: 1.0112124681472778,grad_norm: 0.8848253928007755, iteration: 372147
loss: 0.9869460463523865,grad_norm: 0.9368119469582862, iteration: 372148
loss: 0.9877952337265015,grad_norm: 0.8942739511759423, iteration: 372149
loss: 1.0008083581924438,grad_norm: 0.9303995160064864, iteration: 372150
loss: 1.013621211051941,grad_norm: 0.9383853191077421, iteration: 372151
loss: 1.0200190544128418,grad_norm: 0.7922809112831272, iteration: 372152
loss: 1.009308934211731,grad_norm: 0.7336612044904639, iteration: 372153
loss: 1.013282299041748,grad_norm: 0.9999997305923369, iteration: 372154
loss: 0.9943753480911255,grad_norm: 0.9420721595017932, iteration: 372155
loss: 0.9766994118690491,grad_norm: 0.80412884917761, iteration: 372156
loss: 0.9933005571365356,grad_norm: 0.8576549842413943, iteration: 372157
loss: 1.0134615898132324,grad_norm: 0.8732606913509139, iteration: 372158
loss: 1.0296305418014526,grad_norm: 0.7552553461780619, iteration: 372159
loss: 1.0087007284164429,grad_norm: 0.7688109790546008, iteration: 372160
loss: 1.0618692636489868,grad_norm: 0.8492731293896171, iteration: 372161
loss: 1.0115643739700317,grad_norm: 0.8680446048694961, iteration: 372162
loss: 1.0042215585708618,grad_norm: 0.757103756621936, iteration: 372163
loss: 0.9992178678512573,grad_norm: 0.9412724256168844, iteration: 372164
loss: 1.0755304098129272,grad_norm: 0.9999996504443329, iteration: 372165
loss: 1.0155739784240723,grad_norm: 0.9999996794886549, iteration: 372166
loss: 1.0036870241165161,grad_norm: 0.7622443867799804, iteration: 372167
loss: 1.0051298141479492,grad_norm: 0.9244941803074883, iteration: 372168
loss: 0.9873142838478088,grad_norm: 0.9999989976014164, iteration: 372169
loss: 1.0388699769973755,grad_norm: 0.938105389648229, iteration: 372170
loss: 0.9722824692726135,grad_norm: 0.8073422593067557, iteration: 372171
loss: 0.9871302843093872,grad_norm: 0.7593584492049953, iteration: 372172
loss: 0.986231803894043,grad_norm: 0.8359164028855838, iteration: 372173
loss: 1.0189993381500244,grad_norm: 0.7398356965511298, iteration: 372174
loss: 0.9808180928230286,grad_norm: 0.9754821045144643, iteration: 372175
loss: 1.0456792116165161,grad_norm: 0.5939282883563652, iteration: 372176
loss: 1.0239813327789307,grad_norm: 0.7136104821683343, iteration: 372177
loss: 0.9736810326576233,grad_norm: 0.9438449348922362, iteration: 372178
loss: 0.9911438226699829,grad_norm: 0.7919492018959925, iteration: 372179
loss: 0.9849955439567566,grad_norm: 0.8450963750263651, iteration: 372180
loss: 1.1291875839233398,grad_norm: 0.9999999892655813, iteration: 372181
loss: 0.985709011554718,grad_norm: 0.8522436288445578, iteration: 372182
loss: 1.0167348384857178,grad_norm: 0.7190424022319781, iteration: 372183
loss: 0.9965318441390991,grad_norm: 0.8666685853621005, iteration: 372184
loss: 1.0017322301864624,grad_norm: 0.8210282765128442, iteration: 372185
loss: 0.9733113050460815,grad_norm: 0.7819409146504342, iteration: 372186
loss: 1.0324888229370117,grad_norm: 0.9999992050826149, iteration: 372187
loss: 0.9930741786956787,grad_norm: 0.7697639621376923, iteration: 372188
loss: 1.1088815927505493,grad_norm: 0.9999997603366585, iteration: 372189
loss: 0.9570040106773376,grad_norm: 0.779995718657378, iteration: 372190
loss: 1.0361878871917725,grad_norm: 0.8442219740671278, iteration: 372191
loss: 1.031900405883789,grad_norm: 0.8263856254364492, iteration: 372192
loss: 0.9742459058761597,grad_norm: 0.8397208070875123, iteration: 372193
loss: 1.0037094354629517,grad_norm: 0.902422276049287, iteration: 372194
loss: 0.9854795932769775,grad_norm: 0.803853555968103, iteration: 372195
loss: 0.9685985445976257,grad_norm: 0.8391971420318587, iteration: 372196
loss: 1.0509696006774902,grad_norm: 0.8334064611730704, iteration: 372197
loss: 1.0268293619155884,grad_norm: 0.7709588152663694, iteration: 372198
loss: 1.0052456855773926,grad_norm: 0.8628055433024336, iteration: 372199
loss: 1.026259183883667,grad_norm: 0.684487359343823, iteration: 372200
loss: 0.9949910044670105,grad_norm: 0.7421589964204538, iteration: 372201
loss: 0.9820263981819153,grad_norm: 0.8273909540446357, iteration: 372202
loss: 1.0185426473617554,grad_norm: 0.8041296894933034, iteration: 372203
loss: 1.0940297842025757,grad_norm: 0.9999994898518192, iteration: 372204
loss: 0.9822344779968262,grad_norm: 0.8053193434580423, iteration: 372205
loss: 0.9971176385879517,grad_norm: 0.7599183669823294, iteration: 372206
loss: 0.980712890625,grad_norm: 0.8436643633326496, iteration: 372207
loss: 0.9857851266860962,grad_norm: 0.7495970253551726, iteration: 372208
loss: 0.9544375538825989,grad_norm: 0.7498818923945436, iteration: 372209
loss: 0.9665577411651611,grad_norm: 0.985329570184177, iteration: 372210
loss: 0.9746372699737549,grad_norm: 0.8862859899206853, iteration: 372211
loss: 1.0286048650741577,grad_norm: 0.8722227627184489, iteration: 372212
loss: 1.0974197387695312,grad_norm: 0.9039065179767558, iteration: 372213
loss: 1.0027462244033813,grad_norm: 0.7889237792503613, iteration: 372214
loss: 1.0032594203948975,grad_norm: 0.8379942124988194, iteration: 372215
loss: 1.0305057764053345,grad_norm: 0.7043906584483354, iteration: 372216
loss: 1.102403998374939,grad_norm: 0.8339974957570927, iteration: 372217
loss: 0.9972807168960571,grad_norm: 0.739427643094858, iteration: 372218
loss: 0.9778509736061096,grad_norm: 0.8459712905309396, iteration: 372219
loss: 1.0070730447769165,grad_norm: 0.8807922640222857, iteration: 372220
loss: 1.0184400081634521,grad_norm: 0.8953044519542286, iteration: 372221
loss: 1.0070172548294067,grad_norm: 0.8104856213349616, iteration: 372222
loss: 1.0110245943069458,grad_norm: 0.8498010693035513, iteration: 372223
loss: 0.985660195350647,grad_norm: 0.8014877186406282, iteration: 372224
loss: 1.0445634126663208,grad_norm: 0.9627874406396112, iteration: 372225
loss: 1.036023736000061,grad_norm: 0.9999996981646181, iteration: 372226
loss: 1.0648841857910156,grad_norm: 0.9999992068919038, iteration: 372227
loss: 0.982710599899292,grad_norm: 0.7112597181086213, iteration: 372228
loss: 1.0351157188415527,grad_norm: 0.9373259065157483, iteration: 372229
loss: 1.0188655853271484,grad_norm: 0.9999991824082083, iteration: 372230
loss: 0.9899942278862,grad_norm: 0.8168377594995304, iteration: 372231
loss: 1.0320056676864624,grad_norm: 0.9999992532503897, iteration: 372232
loss: 1.0755740404129028,grad_norm: 0.766710588398966, iteration: 372233
loss: 1.0092477798461914,grad_norm: 0.7536794049246082, iteration: 372234
loss: 1.1012628078460693,grad_norm: 0.99999929495768, iteration: 372235
loss: 0.9775586724281311,grad_norm: 0.8392739707174021, iteration: 372236
loss: 1.0003966093063354,grad_norm: 0.8308257523899589, iteration: 372237
loss: 0.9934415817260742,grad_norm: 0.9280769652214665, iteration: 372238
loss: 0.9761459827423096,grad_norm: 0.7275546283638906, iteration: 372239
loss: 1.0851013660430908,grad_norm: 0.999999927432265, iteration: 372240
loss: 1.0272045135498047,grad_norm: 0.9291332433824262, iteration: 372241
loss: 1.0229350328445435,grad_norm: 0.976499910739838, iteration: 372242
loss: 1.0354417562484741,grad_norm: 0.8457333501625912, iteration: 372243
loss: 1.0184662342071533,grad_norm: 0.9999990605193403, iteration: 372244
loss: 1.0529216527938843,grad_norm: 0.9999995486442428, iteration: 372245
loss: 1.0773991346359253,grad_norm: 0.9044423987420815, iteration: 372246
loss: 1.0704010725021362,grad_norm: 0.9999998480993546, iteration: 372247
loss: 1.0205740928649902,grad_norm: 0.8634802927819438, iteration: 372248
loss: 1.0304714441299438,grad_norm: 0.9999994620010282, iteration: 372249
loss: 1.0210744142532349,grad_norm: 0.9548087852050299, iteration: 372250
loss: 1.083349585533142,grad_norm: 0.7663172909468973, iteration: 372251
loss: 1.0325162410736084,grad_norm: 0.9999990544241166, iteration: 372252
loss: 0.9970279335975647,grad_norm: 0.8917277127384656, iteration: 372253
loss: 1.0318982601165771,grad_norm: 0.8180706327274511, iteration: 372254
loss: 1.0098261833190918,grad_norm: 0.7006093421386371, iteration: 372255
loss: 1.0293277502059937,grad_norm: 0.8555926721686088, iteration: 372256
loss: 0.9781347513198853,grad_norm: 0.8457887795668001, iteration: 372257
loss: 1.0655553340911865,grad_norm: 0.912296792070549, iteration: 372258
loss: 1.0501254796981812,grad_norm: 0.9999994458882063, iteration: 372259
loss: 1.0537853240966797,grad_norm: 0.9999998672248769, iteration: 372260
loss: 1.0175491571426392,grad_norm: 0.95115804698573, iteration: 372261
loss: 1.0737248659133911,grad_norm: 0.810922487949684, iteration: 372262
loss: 1.0542960166931152,grad_norm: 0.8871148896703439, iteration: 372263
loss: 1.0222251415252686,grad_norm: 0.7398166604424369, iteration: 372264
loss: 0.989859938621521,grad_norm: 0.7737805009314597, iteration: 372265
loss: 1.0700472593307495,grad_norm: 0.9999990886776196, iteration: 372266
loss: 0.9662386775016785,grad_norm: 0.8523936058663133, iteration: 372267
loss: 1.0103569030761719,grad_norm: 0.7423762249370428, iteration: 372268
loss: 1.031741738319397,grad_norm: 1.0000000419837658, iteration: 372269
loss: 1.0574604272842407,grad_norm: 0.9999992956959383, iteration: 372270
loss: 1.006495475769043,grad_norm: 0.8819544960221684, iteration: 372271
loss: 1.0380091667175293,grad_norm: 0.8358663139007758, iteration: 372272
loss: 1.0242098569869995,grad_norm: 0.8734071679604496, iteration: 372273
loss: 1.0216459035873413,grad_norm: 0.7504228976372811, iteration: 372274
loss: 1.0588843822479248,grad_norm: 0.9999991286096362, iteration: 372275
loss: 1.0382193326950073,grad_norm: 0.8923598509108213, iteration: 372276
loss: 1.0042299032211304,grad_norm: 0.9999990005651408, iteration: 372277
loss: 1.0541646480560303,grad_norm: 0.8462697773540108, iteration: 372278
loss: 1.014526605606079,grad_norm: 0.7045313041867552, iteration: 372279
loss: 1.0131314992904663,grad_norm: 0.8242012707231074, iteration: 372280
loss: 0.9742480516433716,grad_norm: 0.845740794335123, iteration: 372281
loss: 1.0150502920150757,grad_norm: 0.8046282534070343, iteration: 372282
loss: 1.0034624338150024,grad_norm: 0.8275548240209725, iteration: 372283
loss: 1.0030077695846558,grad_norm: 0.8579279839202268, iteration: 372284
loss: 1.008802056312561,grad_norm: 0.8858579307218319, iteration: 372285
loss: 1.0180644989013672,grad_norm: 0.7762128616825208, iteration: 372286
loss: 1.0760136842727661,grad_norm: 0.999999784831199, iteration: 372287
loss: 1.0035746097564697,grad_norm: 0.7385567934830432, iteration: 372288
loss: 0.9989510178565979,grad_norm: 0.7278607346035173, iteration: 372289
loss: 0.9904751181602478,grad_norm: 0.8389785072047817, iteration: 372290
loss: 0.9830726981163025,grad_norm: 0.6179677008159679, iteration: 372291
loss: 1.0101230144500732,grad_norm: 0.7964945373065638, iteration: 372292
loss: 1.0106364488601685,grad_norm: 0.8282660751499965, iteration: 372293
loss: 0.9880868196487427,grad_norm: 0.7192998079235977, iteration: 372294
loss: 1.0241745710372925,grad_norm: 0.8375900400982632, iteration: 372295
loss: 0.991057276725769,grad_norm: 0.8679290682771754, iteration: 372296
loss: 1.006707787513733,grad_norm: 0.8993615425267827, iteration: 372297
loss: 0.9972035884857178,grad_norm: 0.7536859759996135, iteration: 372298
loss: 1.026108980178833,grad_norm: 0.7101277709029791, iteration: 372299
loss: 1.0076336860656738,grad_norm: 0.9273301788103935, iteration: 372300
loss: 1.0092122554779053,grad_norm: 0.8024314886416215, iteration: 372301
loss: 1.006392478942871,grad_norm: 0.8111722616195831, iteration: 372302
loss: 1.0284593105316162,grad_norm: 0.9013466886435632, iteration: 372303
loss: 1.0320680141448975,grad_norm: 0.8432914470666701, iteration: 372304
loss: 0.9815842509269714,grad_norm: 0.8445780260334311, iteration: 372305
loss: 1.008732795715332,grad_norm: 0.9005914774279054, iteration: 372306
loss: 1.0289583206176758,grad_norm: 0.7733950432152924, iteration: 372307
loss: 1.0432085990905762,grad_norm: 0.9144239130747686, iteration: 372308
loss: 0.9928485155105591,grad_norm: 0.9006229240911443, iteration: 372309
loss: 1.0197193622589111,grad_norm: 0.6276630700231005, iteration: 372310
loss: 1.0255614519119263,grad_norm: 0.763422589094451, iteration: 372311
loss: 0.9706651568412781,grad_norm: 0.9548489984745605, iteration: 372312
loss: 1.0065650939941406,grad_norm: 0.8503574618090846, iteration: 372313
loss: 0.9786564111709595,grad_norm: 0.7754658126095332, iteration: 372314
loss: 1.1471768617630005,grad_norm: 0.9999996151595364, iteration: 372315
loss: 0.99651700258255,grad_norm: 0.9645944682423885, iteration: 372316
loss: 1.0263381004333496,grad_norm: 0.9999990168202941, iteration: 372317
loss: 0.9947164058685303,grad_norm: 0.7402236308870804, iteration: 372318
loss: 1.0265159606933594,grad_norm: 0.8573980781737568, iteration: 372319
loss: 1.0188888311386108,grad_norm: 0.9999998348118442, iteration: 372320
loss: 1.0180675983428955,grad_norm: 0.8670501671239277, iteration: 372321
loss: 1.081253170967102,grad_norm: 0.8656408709422607, iteration: 372322
loss: 0.9619127511978149,grad_norm: 0.999999067354048, iteration: 372323
loss: 0.9990382194519043,grad_norm: 0.8684758663222024, iteration: 372324
loss: 0.9959559440612793,grad_norm: 0.789737056377191, iteration: 372325
loss: 1.0312577486038208,grad_norm: 0.7322709100387058, iteration: 372326
loss: 0.9995659589767456,grad_norm: 0.9414479913631036, iteration: 372327
loss: 0.9916980862617493,grad_norm: 0.8279978569768508, iteration: 372328
loss: 1.00312077999115,grad_norm: 0.8709625618707887, iteration: 372329
loss: 1.0523234605789185,grad_norm: 0.7291715896496201, iteration: 372330
loss: 0.990415096282959,grad_norm: 0.6895230684939057, iteration: 372331
loss: 0.9911895990371704,grad_norm: 0.8607592335222721, iteration: 372332
loss: 0.9848909974098206,grad_norm: 0.6785129818196078, iteration: 372333
loss: 1.034708023071289,grad_norm: 0.9999991132561821, iteration: 372334
loss: 1.0083882808685303,grad_norm: 0.7385917708233931, iteration: 372335
loss: 1.0695509910583496,grad_norm: 0.7854936582489537, iteration: 372336
loss: 1.022262454032898,grad_norm: 0.9999996513167503, iteration: 372337
loss: 1.0331987142562866,grad_norm: 0.8575441152690246, iteration: 372338
loss: 1.0516530275344849,grad_norm: 0.9999993029512089, iteration: 372339
loss: 0.9947362542152405,grad_norm: 0.8702033590690598, iteration: 372340
loss: 0.9988444447517395,grad_norm: 0.9999991680956022, iteration: 372341
loss: 0.9742153286933899,grad_norm: 0.7755747845465325, iteration: 372342
loss: 1.0056653022766113,grad_norm: 0.7224662988206857, iteration: 372343
loss: 0.9877520799636841,grad_norm: 0.6972022849581311, iteration: 372344
loss: 0.997170627117157,grad_norm: 0.9999990928974831, iteration: 372345
loss: 0.9966444969177246,grad_norm: 0.9999991039957826, iteration: 372346
loss: 1.00875723361969,grad_norm: 0.8055356308481117, iteration: 372347
loss: 0.9802636504173279,grad_norm: 0.898315322278328, iteration: 372348
loss: 1.0124292373657227,grad_norm: 0.9531324075096065, iteration: 372349
loss: 0.9631696939468384,grad_norm: 0.7142886878543496, iteration: 372350
loss: 1.0042668581008911,grad_norm: 0.8555994924826952, iteration: 372351
loss: 1.0107612609863281,grad_norm: 0.7600613116854114, iteration: 372352
loss: 1.0075457096099854,grad_norm: 0.7268063824563502, iteration: 372353
loss: 0.9640495777130127,grad_norm: 0.6773673262498645, iteration: 372354
loss: 0.9946720600128174,grad_norm: 0.8555115916994833, iteration: 372355
loss: 1.0232102870941162,grad_norm: 0.864613058040559, iteration: 372356
loss: 0.9916209578514099,grad_norm: 0.7889776708319969, iteration: 372357
loss: 1.0007895231246948,grad_norm: 0.7169134613691543, iteration: 372358
loss: 0.9918471574783325,grad_norm: 0.6865373604098053, iteration: 372359
loss: 1.00729501247406,grad_norm: 0.6865205117570365, iteration: 372360
loss: 0.9866679906845093,grad_norm: 0.7191185860433525, iteration: 372361
loss: 0.9831255674362183,grad_norm: 0.9999991611197789, iteration: 372362
loss: 0.9680883884429932,grad_norm: 0.8889918971501571, iteration: 372363
loss: 0.9433378577232361,grad_norm: 0.8649035966206383, iteration: 372364
loss: 1.0102134943008423,grad_norm: 0.9345545762261074, iteration: 372365
loss: 0.9833009243011475,grad_norm: 0.7284983446655721, iteration: 372366
loss: 0.9881747364997864,grad_norm: 0.9999992293315484, iteration: 372367
loss: 1.0193926095962524,grad_norm: 0.9999990848469927, iteration: 372368
loss: 0.9809987545013428,grad_norm: 0.6512449661773236, iteration: 372369
loss: 1.0020376443862915,grad_norm: 0.8504338715126243, iteration: 372370
loss: 0.9823458194732666,grad_norm: 0.6757888076079958, iteration: 372371
loss: 0.9973699450492859,grad_norm: 0.946130920032796, iteration: 372372
loss: 0.9809379577636719,grad_norm: 0.9660631293388041, iteration: 372373
loss: 0.9869099855422974,grad_norm: 0.999999273644952, iteration: 372374
loss: 1.0233772993087769,grad_norm: 0.911701826818296, iteration: 372375
loss: 0.9925062656402588,grad_norm: 0.9999992617909457, iteration: 372376
loss: 0.9953742027282715,grad_norm: 0.7837644570145992, iteration: 372377
loss: 0.997139573097229,grad_norm: 0.880309471131921, iteration: 372378
loss: 1.0094618797302246,grad_norm: 0.7783416126922711, iteration: 372379
loss: 1.0166289806365967,grad_norm: 0.8557818133463981, iteration: 372380
loss: 0.9674563407897949,grad_norm: 0.967565298842753, iteration: 372381
loss: 0.9653543829917908,grad_norm: 0.799175288491496, iteration: 372382
loss: 1.024933934211731,grad_norm: 0.952343904768011, iteration: 372383
loss: 0.9778040051460266,grad_norm: 0.8685348449899349, iteration: 372384
loss: 1.0105708837509155,grad_norm: 0.7132109779890596, iteration: 372385
loss: 0.9814401268959045,grad_norm: 0.8323490031976062, iteration: 372386
loss: 0.9971849918365479,grad_norm: 0.7945253538906879, iteration: 372387
loss: 1.0195292234420776,grad_norm: 0.7940670358436394, iteration: 372388
loss: 0.9902381300926208,grad_norm: 0.8296857187465432, iteration: 372389
loss: 1.0047800540924072,grad_norm: 0.774877061655954, iteration: 372390
loss: 1.0271772146224976,grad_norm: 0.8014922139195677, iteration: 372391
loss: 1.0690208673477173,grad_norm: 0.9999992358150406, iteration: 372392
loss: 0.9851551651954651,grad_norm: 0.9358580067823086, iteration: 372393
loss: 1.072427749633789,grad_norm: 0.9999998860153235, iteration: 372394
loss: 0.9796752333641052,grad_norm: 0.7420020599878968, iteration: 372395
loss: 0.9667950868606567,grad_norm: 0.7603594707745579, iteration: 372396
loss: 1.0204331874847412,grad_norm: 0.7553785336708984, iteration: 372397
loss: 1.003537654876709,grad_norm: 0.699660401619167, iteration: 372398
loss: 1.0430355072021484,grad_norm: 0.8798244743938322, iteration: 372399
loss: 1.0333590507507324,grad_norm: 0.7518153193439758, iteration: 372400
loss: 0.9858617782592773,grad_norm: 0.8413903224428187, iteration: 372401
loss: 1.0247894525527954,grad_norm: 0.8093694870842353, iteration: 372402
loss: 0.9795933365821838,grad_norm: 0.9999997477225866, iteration: 372403
loss: 0.9924162030220032,grad_norm: 0.9045095534446783, iteration: 372404
loss: 0.9964520335197449,grad_norm: 0.9999996504709979, iteration: 372405
loss: 1.0395747423171997,grad_norm: 0.7630705030088744, iteration: 372406
loss: 0.9948453307151794,grad_norm: 0.8022097788252848, iteration: 372407
loss: 0.9664819240570068,grad_norm: 0.6488807822558794, iteration: 372408
loss: 0.9781637787818909,grad_norm: 0.838186484052852, iteration: 372409
loss: 1.001179814338684,grad_norm: 0.9999996091441395, iteration: 372410
loss: 0.9569130539894104,grad_norm: 0.8755748356944676, iteration: 372411
loss: 1.039386510848999,grad_norm: 0.8103357052371991, iteration: 372412
loss: 1.018576979637146,grad_norm: 0.8252309871577096, iteration: 372413
loss: 0.9976466298103333,grad_norm: 0.7583584924707102, iteration: 372414
loss: 1.1055370569229126,grad_norm: 0.8757364728622573, iteration: 372415
loss: 1.010931134223938,grad_norm: 0.9999991904127429, iteration: 372416
loss: 1.0163507461547852,grad_norm: 0.7813894398793372, iteration: 372417
loss: 1.0011396408081055,grad_norm: 0.7810273249482862, iteration: 372418
loss: 1.0274375677108765,grad_norm: 0.8672594089141683, iteration: 372419
loss: 0.9823769927024841,grad_norm: 0.9057214727438283, iteration: 372420
loss: 0.9809115529060364,grad_norm: 0.8242933076409418, iteration: 372421
loss: 0.9904164671897888,grad_norm: 0.9999992362499351, iteration: 372422
loss: 0.9923415184020996,grad_norm: 0.7194680520469866, iteration: 372423
loss: 0.9732069373130798,grad_norm: 0.8609520369620085, iteration: 372424
loss: 1.001668095588684,grad_norm: 0.7194700267996208, iteration: 372425
loss: 0.9897645115852356,grad_norm: 0.9999994388649104, iteration: 372426
loss: 1.024917483329773,grad_norm: 0.9235200186615019, iteration: 372427
loss: 0.9951732158660889,grad_norm: 0.8877867713005159, iteration: 372428
loss: 0.9861141443252563,grad_norm: 0.9484177862697225, iteration: 372429
loss: 1.0007160902023315,grad_norm: 0.7652730305199223, iteration: 372430
loss: 1.0391629934310913,grad_norm: 0.9999991232206238, iteration: 372431
loss: 0.999080240726471,grad_norm: 0.7744167108615782, iteration: 372432
loss: 0.9768950343132019,grad_norm: 0.839056700357449, iteration: 372433
loss: 1.014937162399292,grad_norm: 0.8803268426417037, iteration: 372434
loss: 1.0067411661148071,grad_norm: 0.9051264458563488, iteration: 372435
loss: 0.9612360000610352,grad_norm: 0.9999992051215474, iteration: 372436
loss: 0.9926345944404602,grad_norm: 0.8959476088014559, iteration: 372437
loss: 1.047411322593689,grad_norm: 0.9999992629020146, iteration: 372438
loss: 0.9849055409431458,grad_norm: 0.7777379099451324, iteration: 372439
loss: 0.9711209535598755,grad_norm: 0.7917008038568707, iteration: 372440
loss: 1.0122140645980835,grad_norm: 0.7888530929066332, iteration: 372441
loss: 1.0244227647781372,grad_norm: 0.9999994889545356, iteration: 372442
loss: 0.9768082499504089,grad_norm: 0.9999989060733949, iteration: 372443
loss: 0.962028980255127,grad_norm: 0.8534371780443828, iteration: 372444
loss: 0.9963809847831726,grad_norm: 0.8496840498167496, iteration: 372445
loss: 0.9798224568367004,grad_norm: 0.9915148179446851, iteration: 372446
loss: 1.0547508001327515,grad_norm: 0.902010956049214, iteration: 372447
loss: 0.9792603850364685,grad_norm: 0.9474482233371846, iteration: 372448
loss: 0.9730972051620483,grad_norm: 0.9726375861817131, iteration: 372449
loss: 1.046916127204895,grad_norm: 0.7062364073085104, iteration: 372450
loss: 0.9792989492416382,grad_norm: 0.9071050037615734, iteration: 372451
loss: 1.0002444982528687,grad_norm: 0.8437673399101142, iteration: 372452
loss: 0.9744020700454712,grad_norm: 0.9713666778822834, iteration: 372453
loss: 0.9872322082519531,grad_norm: 0.9056280382824047, iteration: 372454
loss: 0.9891446828842163,grad_norm: 0.7388107827790816, iteration: 372455
loss: 0.9952059984207153,grad_norm: 0.915825991374987, iteration: 372456
loss: 1.0222911834716797,grad_norm: 0.9742886707102998, iteration: 372457
loss: 0.9873290657997131,grad_norm: 0.6740565265465813, iteration: 372458
loss: 1.0070196390151978,grad_norm: 0.7723986380401151, iteration: 372459
loss: 0.9823911190032959,grad_norm: 0.7680561480649761, iteration: 372460
loss: 0.9969501495361328,grad_norm: 0.9999993027913204, iteration: 372461
loss: 1.033153772354126,grad_norm: 0.7315757689942373, iteration: 372462
loss: 0.996853232383728,grad_norm: 0.9547804510765187, iteration: 372463
loss: 1.041161060333252,grad_norm: 0.999999100049527, iteration: 372464
loss: 0.9889894723892212,grad_norm: 0.8978020679063914, iteration: 372465
loss: 1.0035232305526733,grad_norm: 0.8484238425396965, iteration: 372466
loss: 1.0134707689285278,grad_norm: 0.7470984572737541, iteration: 372467
loss: 1.001042127609253,grad_norm: 0.7791952071369252, iteration: 372468
loss: 0.993695080280304,grad_norm: 0.7772646914186437, iteration: 372469
loss: 0.9591253995895386,grad_norm: 0.8103171135197695, iteration: 372470
loss: 1.013007402420044,grad_norm: 0.8368263643951033, iteration: 372471
loss: 1.2026492357254028,grad_norm: 0.9999989958755608, iteration: 372472
loss: 0.9787269830703735,grad_norm: 0.7837468506124374, iteration: 372473
loss: 1.0525888204574585,grad_norm: 0.9479545296327239, iteration: 372474
loss: 1.0172799825668335,grad_norm: 0.9999996599676175, iteration: 372475
loss: 1.0246857404708862,grad_norm: 0.6928221717126276, iteration: 372476
loss: 0.9788632392883301,grad_norm: 0.9999991280545489, iteration: 372477
loss: 0.9873815774917603,grad_norm: 0.8400794647291395, iteration: 372478
loss: 1.0128114223480225,grad_norm: 0.7495058854218822, iteration: 372479
loss: 0.9917361736297607,grad_norm: 0.7415832174565632, iteration: 372480
loss: 0.9834635257720947,grad_norm: 0.9936363908351311, iteration: 372481
loss: 0.9821705222129822,grad_norm: 0.8289134532706285, iteration: 372482
loss: 1.0801020860671997,grad_norm: 0.7828927291314309, iteration: 372483
loss: 1.0244899988174438,grad_norm: 0.9999995424647813, iteration: 372484
loss: 0.9726497530937195,grad_norm: 0.8231969255976775, iteration: 372485
loss: 0.9513265490531921,grad_norm: 0.8260336965281901, iteration: 372486
loss: 0.9808799624443054,grad_norm: 0.7037813245518902, iteration: 372487
loss: 0.9489266872406006,grad_norm: 0.8325963537353283, iteration: 372488
loss: 0.942345142364502,grad_norm: 0.9999992146188825, iteration: 372489
loss: 0.9905150532722473,grad_norm: 0.8425306009663908, iteration: 372490
loss: 0.98147052526474,grad_norm: 0.868807119665802, iteration: 372491
loss: 1.0053653717041016,grad_norm: 0.6900570223372384, iteration: 372492
loss: 0.9983025789260864,grad_norm: 0.9756758851593335, iteration: 372493
loss: 1.0068670511245728,grad_norm: 0.9999991500466807, iteration: 372494
loss: 0.9903780221939087,grad_norm: 0.841674819225889, iteration: 372495
loss: 0.9787294268608093,grad_norm: 0.9404634617452948, iteration: 372496
loss: 0.9996001720428467,grad_norm: 0.8392801043739563, iteration: 372497
loss: 0.996359646320343,grad_norm: 0.8175016238692671, iteration: 372498
loss: 1.0559569597244263,grad_norm: 0.9378772869982328, iteration: 372499
loss: 0.9811620116233826,grad_norm: 0.7856582697095744, iteration: 372500
loss: 0.9804462790489197,grad_norm: 0.8630783079867739, iteration: 372501
loss: 0.9826158285140991,grad_norm: 0.8758420491742573, iteration: 372502
loss: 1.0608681440353394,grad_norm: 0.9999991892986828, iteration: 372503
loss: 1.0230835676193237,grad_norm: 0.8223528110899886, iteration: 372504
loss: 0.9804012179374695,grad_norm: 0.8926344191711613, iteration: 372505
loss: 1.012905478477478,grad_norm: 0.8561393310968488, iteration: 372506
loss: 1.0031168460845947,grad_norm: 0.8134521943534389, iteration: 372507
loss: 0.9710449576377869,grad_norm: 0.9999990762672908, iteration: 372508
loss: 0.9860672950744629,grad_norm: 0.8105553603146299, iteration: 372509
loss: 0.9965280890464783,grad_norm: 0.9999999090156153, iteration: 372510
loss: 1.0510869026184082,grad_norm: 0.9999995124297573, iteration: 372511
loss: 1.0081247091293335,grad_norm: 0.9838723023522983, iteration: 372512
loss: 1.0177593231201172,grad_norm: 0.8525861196091185, iteration: 372513
loss: 1.0143566131591797,grad_norm: 0.7401128722902186, iteration: 372514
loss: 1.060800313949585,grad_norm: 0.8736212629617716, iteration: 372515
loss: 0.9911213517189026,grad_norm: 0.6713150538241813, iteration: 372516
loss: 0.9811196327209473,grad_norm: 0.811246716241892, iteration: 372517
loss: 1.0185338258743286,grad_norm: 0.9424958023302903, iteration: 372518
loss: 1.0023553371429443,grad_norm: 0.726630400579414, iteration: 372519
loss: 1.0315440893173218,grad_norm: 0.999999142503206, iteration: 372520
loss: 0.9854115843772888,grad_norm: 0.7613510365940266, iteration: 372521
loss: 0.9805481433868408,grad_norm: 0.8687133625751632, iteration: 372522
loss: 1.0137605667114258,grad_norm: 0.6910282525518262, iteration: 372523
loss: 1.0703977346420288,grad_norm: 0.7319666378270514, iteration: 372524
loss: 0.9704577326774597,grad_norm: 0.8262701029117626, iteration: 372525
loss: 0.9843389987945557,grad_norm: 0.8284244528539991, iteration: 372526
loss: 1.0347620248794556,grad_norm: 0.9324201549150405, iteration: 372527
loss: 0.9881285429000854,grad_norm: 0.945774451446216, iteration: 372528
loss: 1.0311670303344727,grad_norm: 0.9438298931532327, iteration: 372529
loss: 1.0115277767181396,grad_norm: 0.8940118236273319, iteration: 372530
loss: 1.025397539138794,grad_norm: 0.9753658717788243, iteration: 372531
loss: 1.010438084602356,grad_norm: 0.7320764397778612, iteration: 372532
loss: 1.0072271823883057,grad_norm: 0.8524397367305656, iteration: 372533
loss: 1.0057655572891235,grad_norm: 0.7564703529699348, iteration: 372534
loss: 1.0211814641952515,grad_norm: 0.9999996215955664, iteration: 372535
loss: 1.0454843044281006,grad_norm: 0.9064973901621669, iteration: 372536
loss: 0.9918403029441833,grad_norm: 0.9999993450210068, iteration: 372537
loss: 0.9905475974082947,grad_norm: 0.7688247747818985, iteration: 372538
loss: 1.011386752128601,grad_norm: 0.7581068664396106, iteration: 372539
loss: 0.986832320690155,grad_norm: 0.695289267823993, iteration: 372540
loss: 1.0173029899597168,grad_norm: 0.9842287152276922, iteration: 372541
loss: 1.0298186540603638,grad_norm: 0.7788221591374553, iteration: 372542
loss: 1.0334354639053345,grad_norm: 0.6882641624143513, iteration: 372543
loss: 0.9998167753219604,grad_norm: 0.9716022565959205, iteration: 372544
loss: 1.03472900390625,grad_norm: 0.8327584044869762, iteration: 372545
loss: 0.9386531710624695,grad_norm: 0.7534286988263444, iteration: 372546
loss: 1.1317310333251953,grad_norm: 0.9899964129907398, iteration: 372547
loss: 0.9875673055648804,grad_norm: 0.7974743725560479, iteration: 372548
loss: 1.005794644355774,grad_norm: 0.7945155948811716, iteration: 372549
loss: 0.984453022480011,grad_norm: 0.6756702185662098, iteration: 372550
loss: 1.0399775505065918,grad_norm: 0.787857317628339, iteration: 372551
loss: 0.9897751212120056,grad_norm: 0.7344806595875223, iteration: 372552
loss: 0.985883891582489,grad_norm: 0.649764970507763, iteration: 372553
loss: 1.0703117847442627,grad_norm: 0.9148571252789679, iteration: 372554
loss: 1.018519401550293,grad_norm: 0.7787740527297108, iteration: 372555
loss: 0.9890176057815552,grad_norm: 0.7379436959780599, iteration: 372556
loss: 1.0360071659088135,grad_norm: 0.754750883974351, iteration: 372557
loss: 0.971555233001709,grad_norm: 0.8027443834253317, iteration: 372558
loss: 1.0331931114196777,grad_norm: 0.9632878512878912, iteration: 372559
loss: 1.0154247283935547,grad_norm: 0.8582336043616784, iteration: 372560
loss: 1.0425565242767334,grad_norm: 0.7051941962032289, iteration: 372561
loss: 1.0153616666793823,grad_norm: 0.7425143255235201, iteration: 372562
loss: 1.0007096529006958,grad_norm: 0.8129467148785172, iteration: 372563
loss: 1.0108917951583862,grad_norm: 0.6869305495591812, iteration: 372564
loss: 1.0082257986068726,grad_norm: 0.7532662356072427, iteration: 372565
loss: 0.9914838671684265,grad_norm: 0.7021982062458005, iteration: 372566
loss: 1.0134434700012207,grad_norm: 0.8837163621771147, iteration: 372567
loss: 0.9948935508728027,grad_norm: 0.7210922980825784, iteration: 372568
loss: 1.021655797958374,grad_norm: 0.8980265204596577, iteration: 372569
loss: 1.0097674131393433,grad_norm: 0.6674311212456023, iteration: 372570
loss: 0.995068371295929,grad_norm: 0.843791423692009, iteration: 372571
loss: 1.0181207656860352,grad_norm: 0.9999990755478438, iteration: 372572
loss: 1.1008487939834595,grad_norm: 0.8489289455601021, iteration: 372573
loss: 0.9966623187065125,grad_norm: 0.8090892820899708, iteration: 372574
loss: 0.9625646471977234,grad_norm: 0.8006999198676505, iteration: 372575
loss: 0.9707713723182678,grad_norm: 0.9148684617998664, iteration: 372576
loss: 0.9800640940666199,grad_norm: 0.8202296433228735, iteration: 372577
loss: 1.046582579612732,grad_norm: 0.8473375167897934, iteration: 372578
loss: 0.9962716102600098,grad_norm: 0.8324639929636276, iteration: 372579
loss: 1.0197168588638306,grad_norm: 0.9999998650811667, iteration: 372580
loss: 0.9933590888977051,grad_norm: 0.9999991573953357, iteration: 372581
loss: 0.9973195791244507,grad_norm: 0.830483024109105, iteration: 372582
loss: 0.9951437711715698,grad_norm: 0.8340375718012915, iteration: 372583
loss: 1.005184292793274,grad_norm: 0.8678653485972607, iteration: 372584
loss: 1.0091267824172974,grad_norm: 0.8319227493990402, iteration: 372585
loss: 1.0002803802490234,grad_norm: 0.76541131324746, iteration: 372586
loss: 1.043707251548767,grad_norm: 0.8235455576919137, iteration: 372587
loss: 1.0347070693969727,grad_norm: 0.9999989931288725, iteration: 372588
loss: 0.9932277202606201,grad_norm: 0.8682850803350319, iteration: 372589
loss: 0.9944312572479248,grad_norm: 0.8316453228686722, iteration: 372590
loss: 1.0273483991622925,grad_norm: 0.8384400122170172, iteration: 372591
loss: 1.0201513767242432,grad_norm: 0.7581366683649778, iteration: 372592
loss: 0.9849351048469543,grad_norm: 0.7340124734384121, iteration: 372593
loss: 1.024391770362854,grad_norm: 0.7071419290549826, iteration: 372594
loss: 0.9884675741195679,grad_norm: 0.9999999068417702, iteration: 372595
loss: 0.9664072394371033,grad_norm: 0.7413190069317271, iteration: 372596
loss: 1.0001213550567627,grad_norm: 0.9999992857558004, iteration: 372597
loss: 0.9957820177078247,grad_norm: 0.8181862675816102, iteration: 372598
loss: 0.9980071187019348,grad_norm: 0.7494532516898739, iteration: 372599
loss: 1.0162279605865479,grad_norm: 0.7712666788944639, iteration: 372600
loss: 0.9894318580627441,grad_norm: 0.7193759951874825, iteration: 372601
loss: 0.9988656640052795,grad_norm: 0.9252484058672426, iteration: 372602
loss: 0.9734677076339722,grad_norm: 1.0000001821792255, iteration: 372603
loss: 0.9882504940032959,grad_norm: 0.9584915981850541, iteration: 372604
loss: 0.9954171180725098,grad_norm: 0.6969783668795966, iteration: 372605
loss: 0.9729458093643188,grad_norm: 0.7433813198867888, iteration: 372606
loss: 0.9989532828330994,grad_norm: 0.7346451825955045, iteration: 372607
loss: 0.9718455076217651,grad_norm: 0.8045413137753625, iteration: 372608
loss: 0.9820504188537598,grad_norm: 0.809942330043473, iteration: 372609
loss: 1.004465937614441,grad_norm: 0.6039000332476361, iteration: 372610
loss: 1.0304917097091675,grad_norm: 0.8274250747319551, iteration: 372611
loss: 0.9870850443840027,grad_norm: 0.759883175224926, iteration: 372612
loss: 1.0085737705230713,grad_norm: 0.9867486018767424, iteration: 372613
loss: 0.9889059662818909,grad_norm: 0.8563179096595503, iteration: 372614
loss: 0.9641027450561523,grad_norm: 0.7549897077874822, iteration: 372615
loss: 1.0090245008468628,grad_norm: 0.7796815823186108, iteration: 372616
loss: 0.9929397702217102,grad_norm: 0.7735849096736767, iteration: 372617
loss: 1.0090097188949585,grad_norm: 0.8995684489905542, iteration: 372618
loss: 0.9872844219207764,grad_norm: 0.7296493309296982, iteration: 372619
loss: 0.962665319442749,grad_norm: 0.8438211532705892, iteration: 372620
loss: 1.0046801567077637,grad_norm: 0.9751701737170965, iteration: 372621
loss: 1.003166675567627,grad_norm: 0.8084475875195993, iteration: 372622
loss: 1.0188428163528442,grad_norm: 0.6900841833397682, iteration: 372623
loss: 1.052139401435852,grad_norm: 0.9209818340049292, iteration: 372624
loss: 1.0071159601211548,grad_norm: 0.7811511636929177, iteration: 372625
loss: 0.9874227046966553,grad_norm: 0.7658413726226505, iteration: 372626
loss: 0.9868072271347046,grad_norm: 0.7581234200689843, iteration: 372627
loss: 1.0244828462600708,grad_norm: 0.9999991187178617, iteration: 372628
loss: 1.0633922815322876,grad_norm: 0.901437818732245, iteration: 372629
loss: 0.9921513199806213,grad_norm: 0.8956489633089779, iteration: 372630
loss: 1.043380856513977,grad_norm: 0.8238584317868983, iteration: 372631
loss: 1.0092687606811523,grad_norm: 0.8420579167866648, iteration: 372632
loss: 0.9933993816375732,grad_norm: 0.8994219318092173, iteration: 372633
loss: 1.0398881435394287,grad_norm: 0.8948097385277215, iteration: 372634
loss: 1.0223934650421143,grad_norm: 0.9194147330925253, iteration: 372635
loss: 1.0093482732772827,grad_norm: 0.798237935335237, iteration: 372636
loss: 1.0097662210464478,grad_norm: 0.737245116491694, iteration: 372637
loss: 1.0024065971374512,grad_norm: 0.9514947742076806, iteration: 372638
loss: 0.9961438179016113,grad_norm: 0.8245345447527335, iteration: 372639
loss: 1.0549743175506592,grad_norm: 0.8337566468787534, iteration: 372640
loss: 1.0133646726608276,grad_norm: 0.9198143994905241, iteration: 372641
loss: 1.0182636976242065,grad_norm: 0.848453088035032, iteration: 372642
loss: 1.020249605178833,grad_norm: 0.8553356471201052, iteration: 372643
loss: 0.992137610912323,grad_norm: 0.9999990806148747, iteration: 372644
loss: 1.0052036046981812,grad_norm: 0.8251726019153736, iteration: 372645
loss: 1.0074145793914795,grad_norm: 0.9447578675342174, iteration: 372646
loss: 0.9803860187530518,grad_norm: 0.7477697198317791, iteration: 372647
loss: 0.9753283858299255,grad_norm: 0.7219476790188174, iteration: 372648
loss: 0.9913483262062073,grad_norm: 0.6922799515542503, iteration: 372649
loss: 1.0243825912475586,grad_norm: 0.8147849036219332, iteration: 372650
loss: 0.9855285286903381,grad_norm: 0.8480103488779753, iteration: 372651
loss: 1.0019499063491821,grad_norm: 0.7590559465188746, iteration: 372652
loss: 1.0001423358917236,grad_norm: 0.8287052775724244, iteration: 372653
loss: 1.0255180597305298,grad_norm: 0.8218516790621374, iteration: 372654
loss: 1.0028964281082153,grad_norm: 0.8495612306209658, iteration: 372655
loss: 0.9822141528129578,grad_norm: 0.7136206908195668, iteration: 372656
loss: 0.9877843856811523,grad_norm: 0.7630084228897107, iteration: 372657
loss: 1.0259567499160767,grad_norm: 0.8006070302687475, iteration: 372658
loss: 1.0508317947387695,grad_norm: 0.9999996240733425, iteration: 372659
loss: 0.9832726120948792,grad_norm: 0.7188922935757548, iteration: 372660
loss: 0.9870070815086365,grad_norm: 0.7505633670027084, iteration: 372661
loss: 1.0091748237609863,grad_norm: 0.7185087893577663, iteration: 372662
loss: 1.0103256702423096,grad_norm: 0.7828835632705257, iteration: 372663
loss: 1.0036462545394897,grad_norm: 0.7694789180799203, iteration: 372664
loss: 1.0507240295410156,grad_norm: 0.8270166962095655, iteration: 372665
loss: 1.036911129951477,grad_norm: 0.9407504060752131, iteration: 372666
loss: 0.9435251951217651,grad_norm: 0.7102400026296322, iteration: 372667
loss: 0.9693204164505005,grad_norm: 0.8540438545542222, iteration: 372668
loss: 0.9953650832176208,grad_norm: 0.7199528426999864, iteration: 372669
loss: 1.0103363990783691,grad_norm: 0.9821891147317259, iteration: 372670
loss: 0.9887511730194092,grad_norm: 0.8306642249534112, iteration: 372671
loss: 0.9646780490875244,grad_norm: 0.7737747084424106, iteration: 372672
loss: 0.9866803288459778,grad_norm: 0.9999990287248218, iteration: 372673
loss: 0.9680551290512085,grad_norm: 0.8070725789175491, iteration: 372674
loss: 1.0660008192062378,grad_norm: 0.9999994146880646, iteration: 372675
loss: 1.004355549812317,grad_norm: 0.8780067264800643, iteration: 372676
loss: 0.9950355291366577,grad_norm: 0.9010611336756136, iteration: 372677
loss: 1.05451238155365,grad_norm: 0.9999991326971059, iteration: 372678
loss: 1.028306007385254,grad_norm: 0.7696607266489788, iteration: 372679
loss: 1.011307716369629,grad_norm: 0.847333068856794, iteration: 372680
loss: 1.0208213329315186,grad_norm: 0.8824103537555303, iteration: 372681
loss: 1.0052170753479004,grad_norm: 0.9999996096953233, iteration: 372682
loss: 1.032734990119934,grad_norm: 0.931044434533736, iteration: 372683
loss: 1.0391484498977661,grad_norm: 0.8409676352707721, iteration: 372684
loss: 1.016825556755066,grad_norm: 0.949989124718272, iteration: 372685
loss: 1.008746862411499,grad_norm: 0.7989947216351003, iteration: 372686
loss: 1.0249147415161133,grad_norm: 0.8554683761353217, iteration: 372687
loss: 1.0577353239059448,grad_norm: 0.9999994890407103, iteration: 372688
loss: 1.063746452331543,grad_norm: 0.9486805857782282, iteration: 372689
loss: 0.9589229822158813,grad_norm: 0.8708702510863934, iteration: 372690
loss: 0.9770686626434326,grad_norm: 0.8451394947309856, iteration: 372691
loss: 0.9761306643486023,grad_norm: 0.8577537365022284, iteration: 372692
loss: 0.9906241297721863,grad_norm: 0.707334704092565, iteration: 372693
loss: 1.0038553476333618,grad_norm: 0.8805617242089739, iteration: 372694
loss: 0.9700163006782532,grad_norm: 0.9999995126511162, iteration: 372695
loss: 1.0593215227127075,grad_norm: 0.9999990563226943, iteration: 372696
loss: 0.9972089529037476,grad_norm: 0.9999993721575338, iteration: 372697
loss: 1.0157830715179443,grad_norm: 0.7623489311510778, iteration: 372698
loss: 1.0224617719650269,grad_norm: 0.7134773834796837, iteration: 372699
loss: 0.9731383323669434,grad_norm: 0.8064330208773447, iteration: 372700
loss: 1.0288662910461426,grad_norm: 0.9999992156764445, iteration: 372701
loss: 1.0265229940414429,grad_norm: 0.9241841709616913, iteration: 372702
loss: 0.9556557536125183,grad_norm: 0.7133873198588319, iteration: 372703
loss: 0.9659284353256226,grad_norm: 0.8351789047673089, iteration: 372704
loss: 1.0140891075134277,grad_norm: 0.8255773833002726, iteration: 372705
loss: 1.0139926671981812,grad_norm: 0.7357338367029479, iteration: 372706
loss: 0.9930786490440369,grad_norm: 0.9293215724057761, iteration: 372707
loss: 1.017716884613037,grad_norm: 0.999998979333364, iteration: 372708
loss: 0.979863703250885,grad_norm: 0.9999993479724595, iteration: 372709
loss: 1.0194392204284668,grad_norm: 0.6956000197611533, iteration: 372710
loss: 0.9897516369819641,grad_norm: 0.7425722257387111, iteration: 372711
loss: 1.0018867254257202,grad_norm: 0.8380967474920521, iteration: 372712
loss: 1.0523134469985962,grad_norm: 0.8356013452638962, iteration: 372713
loss: 1.0216025114059448,grad_norm: 0.7540998166031749, iteration: 372714
loss: 0.9974189400672913,grad_norm: 0.8771278524165599, iteration: 372715
loss: 0.987498939037323,grad_norm: 0.9297402419216074, iteration: 372716
loss: 1.0037310123443604,grad_norm: 0.7929270947989138, iteration: 372717
loss: 0.9925007224082947,grad_norm: 0.7563493967083866, iteration: 372718
loss: 1.0286880731582642,grad_norm: 0.8116646625127, iteration: 372719
loss: 1.029322624206543,grad_norm: 0.9296881228212515, iteration: 372720
loss: 1.0152312517166138,grad_norm: 0.9999993087147541, iteration: 372721
loss: 1.1450209617614746,grad_norm: 0.999999986125684, iteration: 372722
loss: 1.00670325756073,grad_norm: 0.8606295167966921, iteration: 372723
loss: 0.9981456398963928,grad_norm: 0.8308894961347326, iteration: 372724
loss: 0.9823721051216125,grad_norm: 0.9858858742928917, iteration: 372725
loss: 1.1429749727249146,grad_norm: 0.9999995187478825, iteration: 372726
loss: 0.9615076780319214,grad_norm: 0.7914050730654284, iteration: 372727
loss: 0.99066561460495,grad_norm: 0.7929024239688858, iteration: 372728
loss: 0.9949078559875488,grad_norm: 0.8217623938352415, iteration: 372729
loss: 1.0010700225830078,grad_norm: 0.8574403249280392, iteration: 372730
loss: 0.9815020561218262,grad_norm: 0.7255158006233741, iteration: 372731
loss: 0.9828280806541443,grad_norm: 0.871333945774242, iteration: 372732
loss: 0.9724928736686707,grad_norm: 0.999999024959583, iteration: 372733
loss: 0.9627693295478821,grad_norm: 0.7143692692901459, iteration: 372734
loss: 1.0601645708084106,grad_norm: 0.999999100659173, iteration: 372735
loss: 1.034237027168274,grad_norm: 0.9447589745013544, iteration: 372736
loss: 0.9619666934013367,grad_norm: 0.7800108685380274, iteration: 372737
loss: 1.0094826221466064,grad_norm: 0.8686867725022792, iteration: 372738
loss: 1.067342758178711,grad_norm: 0.9999997971030476, iteration: 372739
loss: 1.0623406171798706,grad_norm: 0.9766453885860563, iteration: 372740
loss: 0.9628613591194153,grad_norm: 0.9999997096406821, iteration: 372741
loss: 0.9692739248275757,grad_norm: 0.8665564219445471, iteration: 372742
loss: 1.013556718826294,grad_norm: 0.7705353214939841, iteration: 372743
loss: 1.071215033531189,grad_norm: 0.7352128463534382, iteration: 372744
loss: 1.0751415491104126,grad_norm: 1.0000000839035632, iteration: 372745
loss: 0.9892339110374451,grad_norm: 1.0000000011073475, iteration: 372746
loss: 0.9995530843734741,grad_norm: 0.7410217616565961, iteration: 372747
loss: 0.9825794100761414,grad_norm: 0.7844799000557791, iteration: 372748
loss: 0.9647823572158813,grad_norm: 0.8225024373885436, iteration: 372749
loss: 1.037796139717102,grad_norm: 0.7413501916341827, iteration: 372750
loss: 0.9815215468406677,grad_norm: 0.8059661813548545, iteration: 372751
loss: 0.9671551585197449,grad_norm: 0.9401797407432273, iteration: 372752
loss: 0.9670612812042236,grad_norm: 0.9347527207199824, iteration: 372753
loss: 1.0263444185256958,grad_norm: 0.9423657325848868, iteration: 372754
loss: 1.0346258878707886,grad_norm: 0.675633587234367, iteration: 372755
loss: 0.9742058515548706,grad_norm: 0.7887813763682698, iteration: 372756
loss: 0.9916771054267883,grad_norm: 0.774267697469954, iteration: 372757
loss: 1.0167795419692993,grad_norm: 0.9106357230087586, iteration: 372758
loss: 1.0135564804077148,grad_norm: 0.9999997683235313, iteration: 372759
loss: 0.9735928177833557,grad_norm: 0.8163836315172288, iteration: 372760
loss: 0.9969441294670105,grad_norm: 0.8028727160904405, iteration: 372761
loss: 1.0037908554077148,grad_norm: 0.8730172676693905, iteration: 372762
loss: 0.9732194542884827,grad_norm: 0.8979442537633116, iteration: 372763
loss: 1.0055592060089111,grad_norm: 0.6929090282286177, iteration: 372764
loss: 1.0332307815551758,grad_norm: 0.9999992299602375, iteration: 372765
loss: 0.9690864682197571,grad_norm: 0.8023843723665489, iteration: 372766
loss: 1.0083837509155273,grad_norm: 0.8850132980368585, iteration: 372767
loss: 1.0069189071655273,grad_norm: 0.7674170955117161, iteration: 372768
loss: 1.0017738342285156,grad_norm: 0.7305402419372599, iteration: 372769
loss: 1.0236355066299438,grad_norm: 0.751822652557367, iteration: 372770
loss: 0.9993538856506348,grad_norm: 0.8272605744526114, iteration: 372771
loss: 0.9894932508468628,grad_norm: 0.8225944460752985, iteration: 372772
loss: 1.0138158798217773,grad_norm: 0.8587860894279299, iteration: 372773
loss: 1.0062860250473022,grad_norm: 0.9999990783019004, iteration: 372774
loss: 1.0751711130142212,grad_norm: 0.8626636268140188, iteration: 372775
loss: 1.01673424243927,grad_norm: 0.8087145626442861, iteration: 372776
loss: 0.9656367897987366,grad_norm: 0.874615667688978, iteration: 372777
loss: 1.000813364982605,grad_norm: 0.9999991002055699, iteration: 372778
loss: 1.003098726272583,grad_norm: 0.9982543485786769, iteration: 372779
loss: 0.9840187430381775,grad_norm: 0.7742055758433252, iteration: 372780
loss: 1.045890212059021,grad_norm: 0.9999992285708481, iteration: 372781
loss: 1.0103155374526978,grad_norm: 0.8619018785770873, iteration: 372782
loss: 0.9700835943222046,grad_norm: 0.9999999744638335, iteration: 372783
loss: 1.0225430727005005,grad_norm: 0.7396715406564486, iteration: 372784
loss: 0.9703812003135681,grad_norm: 0.8413233642898137, iteration: 372785
loss: 0.9858481287956238,grad_norm: 0.860326607964603, iteration: 372786
loss: 1.0090323686599731,grad_norm: 0.7271771702979819, iteration: 372787
loss: 0.947319507598877,grad_norm: 0.7104578108444177, iteration: 372788
loss: 1.0143178701400757,grad_norm: 0.9217747221776766, iteration: 372789
loss: 1.005828619003296,grad_norm: 0.8306410739910192, iteration: 372790
loss: 0.9781036376953125,grad_norm: 0.8441098939054035, iteration: 372791
loss: 1.0120347738265991,grad_norm: 0.6693550917489571, iteration: 372792
loss: 0.9653581976890564,grad_norm: 0.9521757504674164, iteration: 372793
loss: 1.055381178855896,grad_norm: 0.9999990989795667, iteration: 372794
loss: 0.9951967000961304,grad_norm: 0.9999992281417821, iteration: 372795
loss: 0.9579484462738037,grad_norm: 0.6646827339781013, iteration: 372796
loss: 1.0165430307388306,grad_norm: 0.8061881893223193, iteration: 372797
loss: 1.0293138027191162,grad_norm: 0.9299525329591647, iteration: 372798
loss: 1.0491437911987305,grad_norm: 0.999999443154737, iteration: 372799
loss: 1.0115265846252441,grad_norm: 0.8923740038062009, iteration: 372800
loss: 1.0561264753341675,grad_norm: 0.9999998487341727, iteration: 372801
loss: 0.9975433349609375,grad_norm: 0.8398690844201504, iteration: 372802
loss: 1.0231313705444336,grad_norm: 0.8995684175360988, iteration: 372803
loss: 0.986419677734375,grad_norm: 0.9999991143572533, iteration: 372804
loss: 1.0047132968902588,grad_norm: 0.8025190556716432, iteration: 372805
loss: 0.9947022795677185,grad_norm: 0.8409844489327809, iteration: 372806
loss: 1.021220326423645,grad_norm: 0.9999993327371491, iteration: 372807
loss: 0.9784209132194519,grad_norm: 0.8175756414192371, iteration: 372808
loss: 0.9983969926834106,grad_norm: 0.8344817448958709, iteration: 372809
loss: 0.9668232202529907,grad_norm: 0.8518172750019566, iteration: 372810
loss: 1.0054330825805664,grad_norm: 0.8399611081137309, iteration: 372811
loss: 0.998195469379425,grad_norm: 0.8458244650198898, iteration: 372812
loss: 1.033075213432312,grad_norm: 0.8509057269051069, iteration: 372813
loss: 1.00480055809021,grad_norm: 0.7251355842176153, iteration: 372814
loss: 0.9939462542533875,grad_norm: 0.8493261185451919, iteration: 372815
loss: 1.0080273151397705,grad_norm: 0.8113509341826464, iteration: 372816
loss: 0.9974351525306702,grad_norm: 0.8202324644189827, iteration: 372817
loss: 1.022206425666809,grad_norm: 0.9999999545962032, iteration: 372818
loss: 1.0305426120758057,grad_norm: 0.9999991212782977, iteration: 372819
loss: 0.978824257850647,grad_norm: 0.9999989910863474, iteration: 372820
loss: 1.0087602138519287,grad_norm: 0.8014352442206493, iteration: 372821
loss: 1.2015055418014526,grad_norm: 0.9999996153075936, iteration: 372822
loss: 0.9721853733062744,grad_norm: 0.8673784890919352, iteration: 372823
loss: 1.0479679107666016,grad_norm: 0.7769618338786385, iteration: 372824
loss: 1.0308598279953003,grad_norm: 0.8144468352140187, iteration: 372825
loss: 0.964045524597168,grad_norm: 0.7707814121853587, iteration: 372826
loss: 0.9825348258018494,grad_norm: 0.7105089620741418, iteration: 372827
loss: 0.9971839189529419,grad_norm: 0.7766143249768425, iteration: 372828
loss: 1.010775089263916,grad_norm: 0.9999991224767631, iteration: 372829
loss: 0.9829701781272888,grad_norm: 0.7731200387115568, iteration: 372830
loss: 0.9891834259033203,grad_norm: 0.9804525105762629, iteration: 372831
loss: 1.0199832916259766,grad_norm: 0.7417613592870796, iteration: 372832
loss: 0.9950756430625916,grad_norm: 0.8870923662845298, iteration: 372833
loss: 1.0105006694793701,grad_norm: 0.8496048966711938, iteration: 372834
loss: 1.010050892829895,grad_norm: 0.7367958746089419, iteration: 372835
loss: 1.0035910606384277,grad_norm: 0.7114225056218161, iteration: 372836
loss: 1.0076051950454712,grad_norm: 0.9372186594735888, iteration: 372837
loss: 0.9877324104309082,grad_norm: 0.7104452307889314, iteration: 372838
loss: 0.983985960483551,grad_norm: 0.9503129245725822, iteration: 372839
loss: 0.9900387525558472,grad_norm: 0.8670366405055375, iteration: 372840
loss: 0.9696907997131348,grad_norm: 0.747491151489933, iteration: 372841
loss: 0.9790852665901184,grad_norm: 0.7394642756796392, iteration: 372842
loss: 1.0229802131652832,grad_norm: 0.947133851426197, iteration: 372843
loss: 1.0385229587554932,grad_norm: 0.9999990257246302, iteration: 372844
loss: 1.00492262840271,grad_norm: 0.672649951522592, iteration: 372845
loss: 0.9887535572052002,grad_norm: 0.7237529063922404, iteration: 372846
loss: 0.9675888419151306,grad_norm: 0.8173890412268006, iteration: 372847
loss: 1.0120688676834106,grad_norm: 0.8900224626953276, iteration: 372848
loss: 0.9943045377731323,grad_norm: 0.7451595421222514, iteration: 372849
loss: 0.9855572581291199,grad_norm: 0.8369239850191197, iteration: 372850
loss: 1.0129320621490479,grad_norm: 0.7550832940297623, iteration: 372851
loss: 1.0016586780548096,grad_norm: 0.9999992852567061, iteration: 372852
loss: 0.9778687953948975,grad_norm: 0.6764972773420737, iteration: 372853
loss: 1.0141681432724,grad_norm: 0.7402237494977203, iteration: 372854
loss: 0.9669629335403442,grad_norm: 0.8079585963722972, iteration: 372855
loss: 1.0179574489593506,grad_norm: 0.9999991899501038, iteration: 372856
loss: 0.9765253663063049,grad_norm: 0.8781310573604916, iteration: 372857
loss: 0.9948095679283142,grad_norm: 0.9999991153969746, iteration: 372858
loss: 1.1490213871002197,grad_norm: 0.9999996541393975, iteration: 372859
loss: 1.0342034101486206,grad_norm: 0.7434990764529718, iteration: 372860
loss: 1.027394413948059,grad_norm: 0.8421913722006936, iteration: 372861
loss: 1.0375025272369385,grad_norm: 0.8698076911580811, iteration: 372862
loss: 1.0386552810668945,grad_norm: 0.7282579607100027, iteration: 372863
loss: 1.006628394126892,grad_norm: 0.6176796098064928, iteration: 372864
loss: 1.015838623046875,grad_norm: 0.7722397324689665, iteration: 372865
loss: 0.9983028173446655,grad_norm: 0.8071992452741144, iteration: 372866
loss: 1.0096031427383423,grad_norm: 0.7866123617337091, iteration: 372867
loss: 0.9780020117759705,grad_norm: 0.7855276108883138, iteration: 372868
loss: 1.009592056274414,grad_norm: 0.8137346776497051, iteration: 372869
loss: 0.9866840839385986,grad_norm: 0.9724348000819738, iteration: 372870
loss: 1.0157779455184937,grad_norm: 0.7610565951079623, iteration: 372871
loss: 1.0087213516235352,grad_norm: 0.7517230909334026, iteration: 372872
loss: 1.0069588422775269,grad_norm: 0.6936793168414762, iteration: 372873
loss: 1.0258456468582153,grad_norm: 0.8306061812295942, iteration: 372874
loss: 0.98737633228302,grad_norm: 0.8673258830391356, iteration: 372875
loss: 0.9755352735519409,grad_norm: 0.9241250009052651, iteration: 372876
loss: 0.9864352941513062,grad_norm: 0.8849911177805626, iteration: 372877
loss: 0.9653244018554688,grad_norm: 0.9004686102587518, iteration: 372878
loss: 0.9745135307312012,grad_norm: 0.8596447129469612, iteration: 372879
loss: 0.9938281774520874,grad_norm: 0.9999990654911921, iteration: 372880
loss: 0.9897410273551941,grad_norm: 0.9999994968448881, iteration: 372881
loss: 0.9649596810340881,grad_norm: 0.8609769697187026, iteration: 372882
loss: 0.9911672472953796,grad_norm: 0.9945785398736521, iteration: 372883
loss: 1.0324877500534058,grad_norm: 0.8530027642029955, iteration: 372884
loss: 1.0204427242279053,grad_norm: 0.9690872401519918, iteration: 372885
loss: 1.0198806524276733,grad_norm: 0.8734414870899747, iteration: 372886
loss: 0.9802960157394409,grad_norm: 0.8655167686154678, iteration: 372887
loss: 1.0169204473495483,grad_norm: 0.999999441579277, iteration: 372888
loss: 0.9956437945365906,grad_norm: 0.7562990336180906, iteration: 372889
loss: 0.9872534871101379,grad_norm: 0.7827092435368836, iteration: 372890
loss: 0.973305881023407,grad_norm: 0.8966537276670606, iteration: 372891
loss: 0.9994741678237915,grad_norm: 0.8928090592102194, iteration: 372892
loss: 0.9916025400161743,grad_norm: 0.7655140153620841, iteration: 372893
loss: 0.9905797243118286,grad_norm: 0.6337565546961571, iteration: 372894
loss: 1.0131721496582031,grad_norm: 0.8070493047477054, iteration: 372895
loss: 1.028140664100647,grad_norm: 0.7241256353378368, iteration: 372896
loss: 1.006603717803955,grad_norm: 0.7779565275650164, iteration: 372897
loss: 1.0363552570343018,grad_norm: 0.9832165800362135, iteration: 372898
loss: 1.0339219570159912,grad_norm: 0.7352529776294009, iteration: 372899
loss: 0.9949837327003479,grad_norm: 0.7297287026539008, iteration: 372900
loss: 0.9752690196037292,grad_norm: 0.7407686478202944, iteration: 372901
loss: 1.0064997673034668,grad_norm: 0.6972886385194038, iteration: 372902
loss: 1.0007494688034058,grad_norm: 0.7273945386065424, iteration: 372903
loss: 1.010063648223877,grad_norm: 0.8324494481687499, iteration: 372904
loss: 1.0284415483474731,grad_norm: 0.8735718320466995, iteration: 372905
loss: 1.0009486675262451,grad_norm: 0.7795318150771423, iteration: 372906
loss: 0.9971091747283936,grad_norm: 0.7423007290864421, iteration: 372907
loss: 0.9975258111953735,grad_norm: 0.6441939275910938, iteration: 372908
loss: 0.9785863161087036,grad_norm: 0.8275191621907455, iteration: 372909
loss: 1.0039962530136108,grad_norm: 0.7729261885866768, iteration: 372910
loss: 1.0069164037704468,grad_norm: 0.9999990245485253, iteration: 372911
loss: 1.0560805797576904,grad_norm: 0.9999994789401037, iteration: 372912
loss: 1.0111552476882935,grad_norm: 0.9100330814915356, iteration: 372913
loss: 0.9981554746627808,grad_norm: 0.8465767981664731, iteration: 372914
loss: 0.9983475208282471,grad_norm: 0.9999990248410107, iteration: 372915
loss: 1.017002820968628,grad_norm: 0.7947996980831198, iteration: 372916
loss: 1.1311564445495605,grad_norm: 0.9999997694864373, iteration: 372917
loss: 0.9769754409790039,grad_norm: 0.934246494650628, iteration: 372918
loss: 1.0697447061538696,grad_norm: 0.9999999031364295, iteration: 372919
loss: 1.0495351552963257,grad_norm: 0.9999991294716314, iteration: 372920
loss: 1.0082879066467285,grad_norm: 0.7747705490393001, iteration: 372921
loss: 1.0628907680511475,grad_norm: 0.9999997478635181, iteration: 372922
loss: 1.0115195512771606,grad_norm: 0.9999991533544886, iteration: 372923
loss: 0.9970633387565613,grad_norm: 0.7263538762230966, iteration: 372924
loss: 0.9867091178894043,grad_norm: 0.6687715438995796, iteration: 372925
loss: 1.0077818632125854,grad_norm: 0.9594609376409831, iteration: 372926
loss: 1.0067495107650757,grad_norm: 0.6975055120058069, iteration: 372927
loss: 0.9977537989616394,grad_norm: 0.8386648880038696, iteration: 372928
loss: 0.9762747883796692,grad_norm: 0.7595629832536178, iteration: 372929
loss: 0.9898914694786072,grad_norm: 0.8045933863449993, iteration: 372930
loss: 0.9887375831604004,grad_norm: 0.7934833210618262, iteration: 372931
loss: 1.0268127918243408,grad_norm: 0.9999991424877891, iteration: 372932
loss: 1.0009008646011353,grad_norm: 0.8169766789894948, iteration: 372933
loss: 0.9871296882629395,grad_norm: 0.8161971926609168, iteration: 372934
loss: 0.9950847029685974,grad_norm: 0.812248798510943, iteration: 372935
loss: 0.9915757775306702,grad_norm: 0.8296928414856442, iteration: 372936
loss: 1.059135913848877,grad_norm: 0.8874503168433081, iteration: 372937
loss: 1.0090761184692383,grad_norm: 0.8137847448724053, iteration: 372938
loss: 1.0015983581542969,grad_norm: 0.9420588597044868, iteration: 372939
loss: 1.0056368112564087,grad_norm: 0.7731810559380495, iteration: 372940
loss: 1.102685809135437,grad_norm: 0.8316171616315485, iteration: 372941
loss: 0.9882246255874634,grad_norm: 0.7279155638014561, iteration: 372942
loss: 1.1040090322494507,grad_norm: 0.9999997786518564, iteration: 372943
loss: 1.0451973676681519,grad_norm: 0.9999995806241633, iteration: 372944
loss: 0.990270733833313,grad_norm: 0.8757624697393263, iteration: 372945
loss: 1.0092008113861084,grad_norm: 0.9915697081056601, iteration: 372946
loss: 1.002551794052124,grad_norm: 0.6874763008524293, iteration: 372947
loss: 1.0242764949798584,grad_norm: 0.9999989031556408, iteration: 372948
loss: 1.0098077058792114,grad_norm: 0.8391950726558427, iteration: 372949
loss: 1.0202276706695557,grad_norm: 0.9523875961216711, iteration: 372950
loss: 1.031677007675171,grad_norm: 0.9115449373600284, iteration: 372951
loss: 0.9645253419876099,grad_norm: 0.8322664544510411, iteration: 372952
loss: 0.9976959228515625,grad_norm: 0.7961033568054701, iteration: 372953
loss: 1.0814285278320312,grad_norm: 0.9999995323274707, iteration: 372954
loss: 0.9980292320251465,grad_norm: 0.9042965896381279, iteration: 372955
loss: 0.9808405041694641,grad_norm: 0.7811608041109946, iteration: 372956
loss: 0.9939219951629639,grad_norm: 0.6553717111022824, iteration: 372957
loss: 0.9962930679321289,grad_norm: 0.9999994530799646, iteration: 372958
loss: 1.0296827554702759,grad_norm: 0.9719311527463363, iteration: 372959
loss: 1.0472174882888794,grad_norm: 0.9999997264766513, iteration: 372960
loss: 1.040059208869934,grad_norm: 0.8495015009412614, iteration: 372961
loss: 1.0028001070022583,grad_norm: 0.7990081996977145, iteration: 372962
loss: 1.0964922904968262,grad_norm: 0.9011384200681875, iteration: 372963
loss: 1.001030683517456,grad_norm: 0.9001280305409128, iteration: 372964
loss: 0.98944091796875,grad_norm: 0.7532561095795538, iteration: 372965
loss: 1.0499895811080933,grad_norm: 0.9999991223033379, iteration: 372966
loss: 1.003666877746582,grad_norm: 0.6799793523938961, iteration: 372967
loss: 1.0643004179000854,grad_norm: 0.7843237061227893, iteration: 372968
loss: 0.9618820548057556,grad_norm: 0.9161506438896503, iteration: 372969
loss: 1.0054917335510254,grad_norm: 0.9999993059609377, iteration: 372970
loss: 1.018121600151062,grad_norm: 0.7376376519598136, iteration: 372971
loss: 0.9923657774925232,grad_norm: 0.7304636105500029, iteration: 372972
loss: 0.9901517033576965,grad_norm: 0.8133609974985697, iteration: 372973
loss: 0.9944252371788025,grad_norm: 0.8427684447879561, iteration: 372974
loss: 0.9765962958335876,grad_norm: 0.7954967795743897, iteration: 372975
loss: 1.01259446144104,grad_norm: 0.8173749858569476, iteration: 372976
loss: 1.0754367113113403,grad_norm: 0.9999993674042833, iteration: 372977
loss: 0.994274914264679,grad_norm: 0.7073733232924495, iteration: 372978
loss: 1.0091317892074585,grad_norm: 0.9999994157078672, iteration: 372979
loss: 1.0194789171218872,grad_norm: 0.8880345420343095, iteration: 372980
loss: 0.9947893023490906,grad_norm: 0.9848835099927766, iteration: 372981
loss: 0.9754227995872498,grad_norm: 0.8383832258923853, iteration: 372982
loss: 0.9863948225975037,grad_norm: 0.7389787153207467, iteration: 372983
loss: 1.0645954608917236,grad_norm: 0.9999995337561239, iteration: 372984
loss: 0.9841943383216858,grad_norm: 0.6996964067918825, iteration: 372985
loss: 1.065419316291809,grad_norm: 0.9999993634091378, iteration: 372986
loss: 1.0152535438537598,grad_norm: 0.9195674459190187, iteration: 372987
loss: 0.9737439751625061,grad_norm: 0.768201687266865, iteration: 372988
loss: 1.016485571861267,grad_norm: 0.7465232888627068, iteration: 372989
loss: 0.9945192933082581,grad_norm: 0.7325791712817137, iteration: 372990
loss: 1.1460978984832764,grad_norm: 0.999999782625617, iteration: 372991
loss: 1.054140329360962,grad_norm: 0.7535699805598919, iteration: 372992
loss: 1.010852575302124,grad_norm: 1.000000110528728, iteration: 372993
loss: 1.0073506832122803,grad_norm: 0.6752814819646257, iteration: 372994
loss: 0.9861806035041809,grad_norm: 0.8858075354052595, iteration: 372995
loss: 1.0062837600708008,grad_norm: 0.9439502421134304, iteration: 372996
loss: 0.9549142122268677,grad_norm: 0.911021524328729, iteration: 372997
loss: 1.0118062496185303,grad_norm: 0.9762176772636191, iteration: 372998
loss: 1.0352174043655396,grad_norm: 0.8086152619219233, iteration: 372999
loss: 1.0008976459503174,grad_norm: 0.705194234702032, iteration: 373000
loss: 0.9930705428123474,grad_norm: 0.7431923446020793, iteration: 373001
loss: 1.0144904851913452,grad_norm: 0.7198170476758992, iteration: 373002
loss: 0.9983388781547546,grad_norm: 0.8044727167825891, iteration: 373003
loss: 1.027831792831421,grad_norm: 0.9999994466932249, iteration: 373004
loss: 0.9707207083702087,grad_norm: 0.6508619846818522, iteration: 373005
loss: 0.9907370209693909,grad_norm: 0.8485184853119405, iteration: 373006
loss: 1.0390986204147339,grad_norm: 0.8342602097577947, iteration: 373007
loss: 0.990666925907135,grad_norm: 0.7071195123229641, iteration: 373008
loss: 1.0203115940093994,grad_norm: 0.9999995694351449, iteration: 373009
loss: 0.993560254573822,grad_norm: 0.6888307207502122, iteration: 373010
loss: 1.007575511932373,grad_norm: 0.9999995075780781, iteration: 373011
loss: 1.066264033317566,grad_norm: 0.8764116101129436, iteration: 373012
loss: 0.9859206676483154,grad_norm: 0.7989529517265004, iteration: 373013
loss: 0.9954485297203064,grad_norm: 0.7974877434905184, iteration: 373014
loss: 0.9655907154083252,grad_norm: 0.7056382823153936, iteration: 373015
loss: 0.9878633618354797,grad_norm: 0.7439043052121627, iteration: 373016
loss: 1.015540361404419,grad_norm: 0.8321243609946473, iteration: 373017
loss: 0.9936464428901672,grad_norm: 0.7678126051763575, iteration: 373018
loss: 1.0373262166976929,grad_norm: 0.8088426211256252, iteration: 373019
loss: 1.029165267944336,grad_norm: 0.8703061927925072, iteration: 373020
loss: 1.0149832963943481,grad_norm: 0.7121213247613608, iteration: 373021
loss: 0.9791301488876343,grad_norm: 0.9999990231691903, iteration: 373022
loss: 0.9809192419052124,grad_norm: 0.9640086008274072, iteration: 373023
loss: 1.014255404472351,grad_norm: 0.9999991415070173, iteration: 373024
loss: 0.9782343506813049,grad_norm: 0.6656758270806129, iteration: 373025
loss: 1.021018147468567,grad_norm: 0.9307018851459399, iteration: 373026
loss: 1.0484192371368408,grad_norm: 0.9999994241917737, iteration: 373027
loss: 0.952115535736084,grad_norm: 0.9934853096111425, iteration: 373028
loss: 0.9787739515304565,grad_norm: 0.7867995251360191, iteration: 373029
loss: 0.9986912608146667,grad_norm: 0.758661068260477, iteration: 373030
loss: 0.9631567001342773,grad_norm: 0.9999990535125485, iteration: 373031
loss: 1.0210464000701904,grad_norm: 0.932023087857953, iteration: 373032
loss: 1.047820806503296,grad_norm: 0.8908113815522124, iteration: 373033
loss: 0.9840300679206848,grad_norm: 0.712678432931335, iteration: 373034
loss: 1.0089515447616577,grad_norm: 0.9747608421211621, iteration: 373035
loss: 0.9953269958496094,grad_norm: 0.748915805639884, iteration: 373036
loss: 1.011629343032837,grad_norm: 0.896448295261693, iteration: 373037
loss: 0.98292076587677,grad_norm: 0.693406012278859, iteration: 373038
loss: 0.9627358913421631,grad_norm: 0.9681491788216909, iteration: 373039
loss: 1.0207552909851074,grad_norm: 0.9999998722175917, iteration: 373040
loss: 1.0329947471618652,grad_norm: 0.621756837980723, iteration: 373041
loss: 1.0652292966842651,grad_norm: 0.9999991694462623, iteration: 373042
loss: 0.9922916889190674,grad_norm: 0.8597914849450997, iteration: 373043
loss: 1.0092705488204956,grad_norm: 0.8121614896190082, iteration: 373044
loss: 0.9668216109275818,grad_norm: 0.9059363462336352, iteration: 373045
loss: 0.9864382147789001,grad_norm: 0.6586825650584756, iteration: 373046
loss: 0.9766233563423157,grad_norm: 0.8448087969999034, iteration: 373047
loss: 0.984408974647522,grad_norm: 0.9023678411704648, iteration: 373048
loss: 0.9960420727729797,grad_norm: 0.7661785592925183, iteration: 373049
loss: 1.0268899202346802,grad_norm: 0.7484766848341895, iteration: 373050
loss: 1.0102198123931885,grad_norm: 0.6629695130821793, iteration: 373051
loss: 1.0514681339263916,grad_norm: 0.9999994266972161, iteration: 373052
loss: 1.030348300933838,grad_norm: 0.9102434221030578, iteration: 373053
loss: 1.019507884979248,grad_norm: 0.7567941599829141, iteration: 373054
loss: 0.9669248461723328,grad_norm: 0.5837974789457547, iteration: 373055
loss: 1.0115394592285156,grad_norm: 0.8175016708742836, iteration: 373056
loss: 1.022714376449585,grad_norm: 0.8345443720149162, iteration: 373057
loss: 1.0050297975540161,grad_norm: 0.9999992658788174, iteration: 373058
loss: 1.024605393409729,grad_norm: 0.999999079800564, iteration: 373059
loss: 1.0651483535766602,grad_norm: 0.8192971065143766, iteration: 373060
loss: 1.0499577522277832,grad_norm: 0.9184099452194828, iteration: 373061
loss: 1.022126317024231,grad_norm: 0.7912808581965852, iteration: 373062
loss: 0.9583849310874939,grad_norm: 0.8674923178060531, iteration: 373063
loss: 0.9828829169273376,grad_norm: 0.9999990800272592, iteration: 373064
loss: 1.0502420663833618,grad_norm: 0.9999990201880564, iteration: 373065
loss: 0.9651138186454773,grad_norm: 0.8345328181015297, iteration: 373066
loss: 1.035571575164795,grad_norm: 0.9999990230414113, iteration: 373067
loss: 1.014094591140747,grad_norm: 0.8459854171254517, iteration: 373068
loss: 0.97850501537323,grad_norm: 0.9015553816535441, iteration: 373069
loss: 0.9801586270332336,grad_norm: 0.7941190426439404, iteration: 373070
loss: 0.9818269610404968,grad_norm: 0.8083756485986902, iteration: 373071
loss: 0.9678842425346375,grad_norm: 0.8151218816670045, iteration: 373072
loss: 0.9837605357170105,grad_norm: 0.9866458925396793, iteration: 373073
loss: 1.0248136520385742,grad_norm: 0.772630946993452, iteration: 373074
loss: 0.9748599529266357,grad_norm: 0.7214283757899362, iteration: 373075
loss: 0.9814513325691223,grad_norm: 0.785814174112143, iteration: 373076
loss: 0.9673035144805908,grad_norm: 0.9272591773422967, iteration: 373077
loss: 0.9901013374328613,grad_norm: 0.8330544757642125, iteration: 373078
loss: 0.9999794363975525,grad_norm: 0.7922482359022736, iteration: 373079
loss: 1.0142632722854614,grad_norm: 0.7547392146779484, iteration: 373080
loss: 0.985987663269043,grad_norm: 0.732002426581316, iteration: 373081
loss: 1.1411712169647217,grad_norm: 0.7713853159920656, iteration: 373082
loss: 0.9954671263694763,grad_norm: 1.0000000928487618, iteration: 373083
loss: 1.0183353424072266,grad_norm: 0.8853800448375626, iteration: 373084
loss: 0.9833807349205017,grad_norm: 0.7486587104252562, iteration: 373085
loss: 0.9859514236450195,grad_norm: 0.7454286534657215, iteration: 373086
loss: 1.068831443786621,grad_norm: 0.9999991215543246, iteration: 373087
loss: 0.9911275506019592,grad_norm: 0.7043840810542036, iteration: 373088
loss: 0.986006498336792,grad_norm: 0.8181060107342047, iteration: 373089
loss: 0.9721336364746094,grad_norm: 0.9999991191887073, iteration: 373090
loss: 1.0263499021530151,grad_norm: 0.7041666442454412, iteration: 373091
loss: 0.9870477914810181,grad_norm: 0.7443372059013601, iteration: 373092
loss: 1.0117555856704712,grad_norm: 0.7707488000307278, iteration: 373093
loss: 1.048890233039856,grad_norm: 0.8060762974178368, iteration: 373094
loss: 0.9800177812576294,grad_norm: 0.9999992728224745, iteration: 373095
loss: 1.005186676979065,grad_norm: 0.7165274224011219, iteration: 373096
loss: 0.9895340204238892,grad_norm: 0.779317448429213, iteration: 373097
loss: 0.9752199053764343,grad_norm: 0.8362224871785829, iteration: 373098
loss: 0.9755312204360962,grad_norm: 0.7462725316423867, iteration: 373099
loss: 0.9879515171051025,grad_norm: 0.7621172597795584, iteration: 373100
loss: 0.9795823097229004,grad_norm: 0.802593866960821, iteration: 373101
loss: 1.0221772193908691,grad_norm: 0.7238562919682261, iteration: 373102
loss: 0.9805070757865906,grad_norm: 0.7228976049565945, iteration: 373103
loss: 0.9579083919525146,grad_norm: 0.6985649259856487, iteration: 373104
loss: 1.057421326637268,grad_norm: 0.7804796217962781, iteration: 373105
loss: 1.113066554069519,grad_norm: 0.9999994344385806, iteration: 373106
loss: 1.0003923177719116,grad_norm: 0.9173513728077757, iteration: 373107
loss: 0.9977249503135681,grad_norm: 0.8187886293154509, iteration: 373108
loss: 1.041674017906189,grad_norm: 0.9999990406898238, iteration: 373109
loss: 1.0188015699386597,grad_norm: 0.9060700803888406, iteration: 373110
loss: 0.9968053102493286,grad_norm: 0.7313784572809042, iteration: 373111
loss: 0.9631167054176331,grad_norm: 0.9999996599102389, iteration: 373112
loss: 0.9987850785255432,grad_norm: 0.7719891705151692, iteration: 373113
loss: 0.9617036581039429,grad_norm: 0.7274866269704661, iteration: 373114
loss: 1.0181915760040283,grad_norm: 0.789161679456946, iteration: 373115
loss: 1.0193352699279785,grad_norm: 0.9999996896535877, iteration: 373116
loss: 1.0061780214309692,grad_norm: 0.6922070858564741, iteration: 373117
loss: 0.9859140515327454,grad_norm: 0.7400674245630059, iteration: 373118
loss: 1.0292943716049194,grad_norm: 0.756553327190303, iteration: 373119
loss: 0.9629490375518799,grad_norm: 0.8253205636274048, iteration: 373120
loss: 0.9690109491348267,grad_norm: 0.8939866144105073, iteration: 373121
loss: 0.9853212833404541,grad_norm: 0.7796968244728028, iteration: 373122
loss: 1.1063103675842285,grad_norm: 0.9999997115263672, iteration: 373123
loss: 0.9996875524520874,grad_norm: 0.908478934007643, iteration: 373124
loss: 1.0182666778564453,grad_norm: 0.7546792600796264, iteration: 373125
loss: 1.0324972867965698,grad_norm: 0.838538429327536, iteration: 373126
loss: 1.0047061443328857,grad_norm: 0.8775411842206069, iteration: 373127
loss: 0.9362279176712036,grad_norm: 0.7129274410697678, iteration: 373128
loss: 1.0207152366638184,grad_norm: 0.8807848453376306, iteration: 373129
loss: 1.0582712888717651,grad_norm: 0.9999992505159278, iteration: 373130
loss: 1.0684887170791626,grad_norm: 0.9999993078492758, iteration: 373131
loss: 0.9932679533958435,grad_norm: 0.7396009353045778, iteration: 373132
loss: 1.068951964378357,grad_norm: 0.9999999203363179, iteration: 373133
loss: 0.9806452393531799,grad_norm: 0.7414554263331006, iteration: 373134
loss: 1.0225796699523926,grad_norm: 0.9418106031088229, iteration: 373135
loss: 1.001183271408081,grad_norm: 0.700015343547599, iteration: 373136
loss: 1.0000479221343994,grad_norm: 0.7635016875540093, iteration: 373137
loss: 1.1649887561798096,grad_norm: 0.9999995674371573, iteration: 373138
loss: 1.0919016599655151,grad_norm: 0.9999995873142109, iteration: 373139
loss: 1.1043224334716797,grad_norm: 0.8230812770840558, iteration: 373140
loss: 1.0285471677780151,grad_norm: 0.8977752573001357, iteration: 373141
loss: 1.0190895795822144,grad_norm: 0.806977706972529, iteration: 373142
loss: 0.9961633086204529,grad_norm: 0.6643603350868381, iteration: 373143
loss: 1.1136643886566162,grad_norm: 0.9999999576739843, iteration: 373144
loss: 0.9755370616912842,grad_norm: 0.7526441302239194, iteration: 373145
loss: 1.1339614391326904,grad_norm: 0.9999992960595887, iteration: 373146
loss: 0.9990796446800232,grad_norm: 0.9015354157982352, iteration: 373147
loss: 0.9678001999855042,grad_norm: 0.7672262724975955, iteration: 373148
loss: 1.0385549068450928,grad_norm: 0.9999991148737881, iteration: 373149
loss: 0.96401447057724,grad_norm: 0.7977574762500512, iteration: 373150
loss: 1.1940208673477173,grad_norm: 0.9999999489348012, iteration: 373151
loss: 1.0302425622940063,grad_norm: 0.9868893757156879, iteration: 373152
loss: 1.0129225254058838,grad_norm: 0.7132254768015017, iteration: 373153
loss: 0.9850017428398132,grad_norm: 0.8162216256724921, iteration: 373154
loss: 1.0143815279006958,grad_norm: 0.9999991629678276, iteration: 373155
loss: 1.0649558305740356,grad_norm: 0.9999995176228086, iteration: 373156
loss: 0.9766154885292053,grad_norm: 0.7143703690994058, iteration: 373157
loss: 0.9577563405036926,grad_norm: 0.9474478712168203, iteration: 373158
loss: 1.0212206840515137,grad_norm: 0.790340810065642, iteration: 373159
loss: 1.0057318210601807,grad_norm: 0.7807118393193774, iteration: 373160
loss: 1.0051569938659668,grad_norm: 0.8622526696878473, iteration: 373161
loss: 1.044100046157837,grad_norm: 0.7961242553701279, iteration: 373162
loss: 0.9826042056083679,grad_norm: 0.8212325727078083, iteration: 373163
loss: 1.0762463808059692,grad_norm: 0.9999995445928458, iteration: 373164
loss: 1.0526033639907837,grad_norm: 0.9999996681539629, iteration: 373165
loss: 1.0078067779541016,grad_norm: 0.6906500781581719, iteration: 373166
loss: 0.9803423881530762,grad_norm: 0.8544095541383445, iteration: 373167
loss: 1.040618658065796,grad_norm: 0.7497806505894002, iteration: 373168
loss: 1.0089548826217651,grad_norm: 0.7863373765791994, iteration: 373169
loss: 1.0024274587631226,grad_norm: 0.7965756855164697, iteration: 373170
loss: 1.0286661386489868,grad_norm: 0.890016701722204, iteration: 373171
loss: 1.0394762754440308,grad_norm: 0.7646603617040508, iteration: 373172
loss: 0.9902973175048828,grad_norm: 0.9211761289977349, iteration: 373173
loss: 1.064367651939392,grad_norm: 0.9999996493279908, iteration: 373174
loss: 1.0174001455307007,grad_norm: 0.7978094369065958, iteration: 373175
loss: 1.0844447612762451,grad_norm: 0.9999992589053999, iteration: 373176
loss: 0.9713733196258545,grad_norm: 0.9968809133718836, iteration: 373177
loss: 1.0590826272964478,grad_norm: 0.9999994598020853, iteration: 373178
loss: 1.0082213878631592,grad_norm: 0.7372165435708016, iteration: 373179
loss: 1.009350061416626,grad_norm: 0.6621764701942843, iteration: 373180
loss: 0.9990545511245728,grad_norm: 0.7534191414677497, iteration: 373181
loss: 1.0004175901412964,grad_norm: 0.7957458619093167, iteration: 373182
loss: 0.9753772616386414,grad_norm: 0.8767883915705511, iteration: 373183
loss: 0.9594575762748718,grad_norm: 0.9216609677166377, iteration: 373184
loss: 0.9829720854759216,grad_norm: 0.9930426068269317, iteration: 373185
loss: 1.026050329208374,grad_norm: 0.8029291569773802, iteration: 373186
loss: 1.0037001371383667,grad_norm: 0.9903575406693652, iteration: 373187
loss: 1.0020378828048706,grad_norm: 0.9999998767706857, iteration: 373188
loss: 0.9735373258590698,grad_norm: 0.7694359760568931, iteration: 373189
loss: 1.039829969406128,grad_norm: 0.8105791081742246, iteration: 373190
loss: 0.9870488047599792,grad_norm: 0.8923021125331214, iteration: 373191
loss: 0.9979046583175659,grad_norm: 0.8821830073469921, iteration: 373192
loss: 1.000324010848999,grad_norm: 0.8161497510702683, iteration: 373193
loss: 1.0185595750808716,grad_norm: 0.6711432845340155, iteration: 373194
loss: 0.9894036054611206,grad_norm: 0.8850659424838249, iteration: 373195
loss: 1.0017120838165283,grad_norm: 0.917200620843163, iteration: 373196
loss: 0.994364321231842,grad_norm: 0.8663308543186973, iteration: 373197
loss: 1.0210130214691162,grad_norm: 0.7831068042755341, iteration: 373198
loss: 1.0057264566421509,grad_norm: 0.7647569603397723, iteration: 373199
loss: 1.067360281944275,grad_norm: 0.9999991635873341, iteration: 373200
loss: 0.9751670360565186,grad_norm: 0.8577122789100866, iteration: 373201
loss: 0.9848827123641968,grad_norm: 0.789349218165923, iteration: 373202
loss: 1.0017093420028687,grad_norm: 0.9981059613218709, iteration: 373203
loss: 0.9801670908927917,grad_norm: 0.6476319151680385, iteration: 373204
loss: 1.0812166929244995,grad_norm: 0.9999996315761456, iteration: 373205
loss: 1.0308589935302734,grad_norm: 0.6915279611524875, iteration: 373206
loss: 0.9630833268165588,grad_norm: 0.8228927158236125, iteration: 373207
loss: 1.0232897996902466,grad_norm: 0.7639388951352155, iteration: 373208
loss: 1.0144226551055908,grad_norm: 0.7430369581321008, iteration: 373209
loss: 1.0566315650939941,grad_norm: 0.8604230494293271, iteration: 373210
loss: 0.9789996147155762,grad_norm: 0.984278523126085, iteration: 373211
loss: 0.9898971915245056,grad_norm: 0.99999906484578, iteration: 373212
loss: 1.002358317375183,grad_norm: 0.7234294001124253, iteration: 373213
loss: 1.014868140220642,grad_norm: 0.8006877674012512, iteration: 373214
loss: 0.9952316284179688,grad_norm: 0.8039048406630293, iteration: 373215
loss: 1.0002120733261108,grad_norm: 0.8409339336908213, iteration: 373216
loss: 1.000152587890625,grad_norm: 0.7720237489136867, iteration: 373217
loss: 1.014644742012024,grad_norm: 0.8263456033501761, iteration: 373218
loss: 1.0278174877166748,grad_norm: 0.7863714883993828, iteration: 373219
loss: 1.0027093887329102,grad_norm: 0.7345762243077237, iteration: 373220
loss: 1.0530836582183838,grad_norm: 0.9999998334412252, iteration: 373221
loss: 1.0018842220306396,grad_norm: 0.9474364779756775, iteration: 373222
loss: 1.043048620223999,grad_norm: 0.9999993903903128, iteration: 373223
loss: 1.001822590827942,grad_norm: 0.915535510963177, iteration: 373224
loss: 1.0810259580612183,grad_norm: 0.9999990661088397, iteration: 373225
loss: 0.9737801551818848,grad_norm: 0.7963976365179111, iteration: 373226
loss: 0.9671666622161865,grad_norm: 0.8414036650976543, iteration: 373227
loss: 1.0315392017364502,grad_norm: 0.8284965673168309, iteration: 373228
loss: 0.9934478402137756,grad_norm: 0.8706360194843389, iteration: 373229
loss: 1.0053831338882446,grad_norm: 0.7162742902104645, iteration: 373230
loss: 1.0434517860412598,grad_norm: 0.8604103132041481, iteration: 373231
loss: 1.0181218385696411,grad_norm: 0.8861751190022364, iteration: 373232
loss: 1.0087523460388184,grad_norm: 0.8598041093715904, iteration: 373233
loss: 1.054870843887329,grad_norm: 0.8834580599856798, iteration: 373234
loss: 0.9820418357849121,grad_norm: 0.9561757001395623, iteration: 373235
loss: 0.9845747947692871,grad_norm: 0.7029131089058696, iteration: 373236
loss: 0.9775437712669373,grad_norm: 0.8977231816990427, iteration: 373237
loss: 1.1739675998687744,grad_norm: 0.9999991339260418, iteration: 373238
loss: 0.9825575947761536,grad_norm: 0.7339733867635809, iteration: 373239
loss: 1.0214616060256958,grad_norm: 0.8767323798225702, iteration: 373240
loss: 0.9903771281242371,grad_norm: 0.8969447394309933, iteration: 373241
loss: 0.9890316724777222,grad_norm: 0.895960163746298, iteration: 373242
loss: 1.0048145055770874,grad_norm: 0.8121765169025144, iteration: 373243
loss: 1.0222735404968262,grad_norm: 0.8677234819104863, iteration: 373244
loss: 1.0307272672653198,grad_norm: 0.774758823064796, iteration: 373245
loss: 0.9677912592887878,grad_norm: 0.959099166990159, iteration: 373246
loss: 1.0279642343521118,grad_norm: 0.7320356743877204, iteration: 373247
loss: 0.9886000156402588,grad_norm: 0.7544401430556805, iteration: 373248
loss: 0.9997096657752991,grad_norm: 0.814778882792567, iteration: 373249
loss: 1.0305685997009277,grad_norm: 0.8244879190018661, iteration: 373250
loss: 1.0107203722000122,grad_norm: 0.6213477247905463, iteration: 373251
loss: 1.0239417552947998,grad_norm: 0.8758634089839499, iteration: 373252
loss: 1.011093258857727,grad_norm: 0.99999941311524, iteration: 373253
loss: 1.0184855461120605,grad_norm: 0.9103084570926361, iteration: 373254
loss: 0.9485085606575012,grad_norm: 0.7504861086986422, iteration: 373255
loss: 1.0140316486358643,grad_norm: 0.999999624235461, iteration: 373256
loss: 0.9930386543273926,grad_norm: 0.9131244407470914, iteration: 373257
loss: 1.0988407135009766,grad_norm: 0.9999998089217972, iteration: 373258
loss: 0.9946303367614746,grad_norm: 0.8325027180543056, iteration: 373259
loss: 0.9822564721107483,grad_norm: 0.8918099447310996, iteration: 373260
loss: 1.0106003284454346,grad_norm: 0.7003977608482532, iteration: 373261
loss: 1.0278470516204834,grad_norm: 0.7575682012997095, iteration: 373262
loss: 1.0148707628250122,grad_norm: 0.9999992535970313, iteration: 373263
loss: 1.0338279008865356,grad_norm: 0.93613862872225, iteration: 373264
loss: 1.0737158060073853,grad_norm: 0.813312876237451, iteration: 373265
loss: 1.0189915895462036,grad_norm: 0.7318928686627186, iteration: 373266
loss: 0.9866436719894409,grad_norm: 0.774781941517858, iteration: 373267
loss: 1.0094783306121826,grad_norm: 0.811249819466589, iteration: 373268
loss: 0.9987408518791199,grad_norm: 0.7435654681417599, iteration: 373269
loss: 0.9617170691490173,grad_norm: 0.8224681892351282, iteration: 373270
loss: 0.9891943335533142,grad_norm: 0.7079389328837892, iteration: 373271
loss: 1.094467043876648,grad_norm: 0.9999993238890779, iteration: 373272
loss: 0.9939965605735779,grad_norm: 0.8967570031429217, iteration: 373273
loss: 0.9836211204528809,grad_norm: 0.7091061785591816, iteration: 373274
loss: 1.0019588470458984,grad_norm: 0.9999992943028998, iteration: 373275
loss: 0.9989076852798462,grad_norm: 0.794569848563805, iteration: 373276
loss: 1.0131787061691284,grad_norm: 0.8953724368835619, iteration: 373277
loss: 1.042742371559143,grad_norm: 0.7882120837300848, iteration: 373278
loss: 1.011712908744812,grad_norm: 0.999999038625113, iteration: 373279
loss: 1.032599687576294,grad_norm: 0.7946391735070776, iteration: 373280
loss: 0.9965261220932007,grad_norm: 0.7545095439279814, iteration: 373281
loss: 1.0189580917358398,grad_norm: 0.693911773860323, iteration: 373282
loss: 1.001728892326355,grad_norm: 0.7608100442220097, iteration: 373283
loss: 1.0876926183700562,grad_norm: 0.7191669844631485, iteration: 373284
loss: 0.9638275504112244,grad_norm: 0.8958623894945306, iteration: 373285
loss: 0.9774315357208252,grad_norm: 0.7451535818118367, iteration: 373286
loss: 1.0430837869644165,grad_norm: 0.8100437162553775, iteration: 373287
loss: 0.9795352220535278,grad_norm: 0.7794296053323264, iteration: 373288
loss: 0.9734904170036316,grad_norm: 0.7819505895664529, iteration: 373289
loss: 1.0218477249145508,grad_norm: 0.9492055217356824, iteration: 373290
loss: 1.0427353382110596,grad_norm: 0.9999998556856904, iteration: 373291
loss: 1.0079883337020874,grad_norm: 0.7422591088401184, iteration: 373292
loss: 1.0129386186599731,grad_norm: 0.7495876098887774, iteration: 373293
loss: 1.0331599712371826,grad_norm: 0.9070800870779205, iteration: 373294
loss: 0.9864224791526794,grad_norm: 0.789472157955468, iteration: 373295
loss: 0.9705163836479187,grad_norm: 0.7646485772886784, iteration: 373296
loss: 0.9691126346588135,grad_norm: 0.739485536570564, iteration: 373297
loss: 0.9750954508781433,grad_norm: 0.8232246745300847, iteration: 373298
loss: 1.0112154483795166,grad_norm: 0.7447222494971254, iteration: 373299
loss: 1.0255604982376099,grad_norm: 0.6404722328918664, iteration: 373300
loss: 1.0120792388916016,grad_norm: 0.7441730567271932, iteration: 373301
loss: 1.0007319450378418,grad_norm: 0.8340676465697336, iteration: 373302
loss: 0.9749529361724854,grad_norm: 0.7345736375407914, iteration: 373303
loss: 1.0289289951324463,grad_norm: 0.8765020238703938, iteration: 373304
loss: 0.9921472668647766,grad_norm: 0.6507853778210889, iteration: 373305
loss: 1.0099916458129883,grad_norm: 0.8003703476096411, iteration: 373306
loss: 1.0006638765335083,grad_norm: 0.729245339648086, iteration: 373307
loss: 0.9839766621589661,grad_norm: 0.8571183489662162, iteration: 373308
loss: 1.0256178379058838,grad_norm: 0.9241616590299456, iteration: 373309
loss: 1.0095326900482178,grad_norm: 0.7122330563933135, iteration: 373310
loss: 1.1239604949951172,grad_norm: 0.9999999210778587, iteration: 373311
loss: 0.9872187376022339,grad_norm: 0.9999990446397321, iteration: 373312
loss: 1.0275890827178955,grad_norm: 0.8336210245795918, iteration: 373313
loss: 1.012391448020935,grad_norm: 0.827117991814228, iteration: 373314
loss: 1.002323031425476,grad_norm: 0.9761996530322662, iteration: 373315
loss: 1.0375608205795288,grad_norm: 0.9999995697811495, iteration: 373316
loss: 1.034423828125,grad_norm: 0.999999592245133, iteration: 373317
loss: 1.0251260995864868,grad_norm: 0.7571765888213988, iteration: 373318
loss: 1.0520083904266357,grad_norm: 0.9809455128903454, iteration: 373319
loss: 0.9840431809425354,grad_norm: 0.7706145764235829, iteration: 373320
loss: 1.0175970792770386,grad_norm: 0.999998999781194, iteration: 373321
loss: 1.0305681228637695,grad_norm: 0.7980888700196634, iteration: 373322
loss: 0.9725245237350464,grad_norm: 0.8464924281277953, iteration: 373323
loss: 1.0113643407821655,grad_norm: 0.6133296628639229, iteration: 373324
loss: 1.0188597440719604,grad_norm: 0.8284628163473318, iteration: 373325
loss: 1.010506510734558,grad_norm: 0.7222536393719524, iteration: 373326
loss: 1.0006792545318604,grad_norm: 0.8606860834405268, iteration: 373327
loss: 1.1154447793960571,grad_norm: 0.9999989806675694, iteration: 373328
loss: 0.9631717801094055,grad_norm: 0.8182796676184156, iteration: 373329
loss: 1.0050617456436157,grad_norm: 0.9999995970115193, iteration: 373330
loss: 1.0361504554748535,grad_norm: 0.9999991178828294, iteration: 373331
loss: 1.021376132965088,grad_norm: 0.7842665337011819, iteration: 373332
loss: 0.979278564453125,grad_norm: 0.7250882131454888, iteration: 373333
loss: 1.101397156715393,grad_norm: 0.9999993079894909, iteration: 373334
loss: 0.9959718585014343,grad_norm: 0.8183582371016698, iteration: 373335
loss: 0.9948399066925049,grad_norm: 0.8318047947673226, iteration: 373336
loss: 0.990113377571106,grad_norm: 0.802900635819555, iteration: 373337
loss: 1.0467609167099,grad_norm: 0.7516988376474971, iteration: 373338
loss: 1.040388822555542,grad_norm: 0.7805768490521221, iteration: 373339
loss: 0.9708730578422546,grad_norm: 0.8352954502082317, iteration: 373340
loss: 1.0002329349517822,grad_norm: 0.8018780061021691, iteration: 373341
loss: 1.014242172241211,grad_norm: 0.9999999823024278, iteration: 373342
loss: 0.964957058429718,grad_norm: 0.7806273018589298, iteration: 373343
loss: 1.015304684638977,grad_norm: 0.7453315501621809, iteration: 373344
loss: 1.0433604717254639,grad_norm: 0.9120709791915047, iteration: 373345
loss: 1.0373774766921997,grad_norm: 0.9999990918516897, iteration: 373346
loss: 1.0024237632751465,grad_norm: 0.8128523422912315, iteration: 373347
loss: 0.9717596769332886,grad_norm: 0.9999994833991979, iteration: 373348
loss: 1.0056477785110474,grad_norm: 0.8006974149343008, iteration: 373349
loss: 1.0064599514007568,grad_norm: 0.9119838519727306, iteration: 373350
loss: 1.0778288841247559,grad_norm: 0.9999993860541568, iteration: 373351
loss: 0.9961341023445129,grad_norm: 0.8830802311004939, iteration: 373352
loss: 0.9934908747673035,grad_norm: 0.9485544774605178, iteration: 373353
loss: 0.9834386110305786,grad_norm: 0.8151885869564205, iteration: 373354
loss: 0.9991231560707092,grad_norm: 0.7812277708096439, iteration: 373355
loss: 1.0097488164901733,grad_norm: 0.7728241450702767, iteration: 373356
loss: 0.9867291450500488,grad_norm: 0.920996295819993, iteration: 373357
loss: 0.9941430687904358,grad_norm: 0.9082661403392291, iteration: 373358
loss: 1.0269170999526978,grad_norm: 0.913525847873926, iteration: 373359
loss: 1.0068585872650146,grad_norm: 0.6617480841582737, iteration: 373360
loss: 0.989776074886322,grad_norm: 0.8174414243316426, iteration: 373361
loss: 1.031994342803955,grad_norm: 0.9999998209608764, iteration: 373362
loss: 0.9908527135848999,grad_norm: 0.7992935187767356, iteration: 373363
loss: 0.9834470748901367,grad_norm: 0.8479270402749794, iteration: 373364
loss: 0.9811617136001587,grad_norm: 0.6991838733516268, iteration: 373365
loss: 1.0050139427185059,grad_norm: 0.6615430856283927, iteration: 373366
loss: 1.0401923656463623,grad_norm: 0.9999991947605027, iteration: 373367
loss: 1.0135117769241333,grad_norm: 0.8573556741972379, iteration: 373368
loss: 0.9806991815567017,grad_norm: 0.8664848614810159, iteration: 373369
loss: 0.9973002076148987,grad_norm: 0.7401196189943352, iteration: 373370
loss: 1.0164577960968018,grad_norm: 0.8242213266106542, iteration: 373371
loss: 1.0244433879852295,grad_norm: 0.8445966131237617, iteration: 373372
loss: 1.0432841777801514,grad_norm: 0.9999997550776242, iteration: 373373
loss: 0.9794877767562866,grad_norm: 0.8449976266953368, iteration: 373374
loss: 1.0365670919418335,grad_norm: 0.8526892836295111, iteration: 373375
loss: 1.0229933261871338,grad_norm: 0.7391020187314084, iteration: 373376
loss: 1.0371623039245605,grad_norm: 0.7251281272757382, iteration: 373377
loss: 1.0128637552261353,grad_norm: 0.9086253889366042, iteration: 373378
loss: 1.0027823448181152,grad_norm: 0.7752356156908197, iteration: 373379
loss: 0.9906793832778931,grad_norm: 0.6761011678634049, iteration: 373380
loss: 1.0124095678329468,grad_norm: 0.8446387098296012, iteration: 373381
loss: 1.014254093170166,grad_norm: 0.650738169789564, iteration: 373382
loss: 1.0955685377120972,grad_norm: 0.9999995823694982, iteration: 373383
loss: 1.0040256977081299,grad_norm: 0.8921873406977968, iteration: 373384
loss: 0.9886003732681274,grad_norm: 0.8280232874153731, iteration: 373385
loss: 0.9788960814476013,grad_norm: 0.6792916963561374, iteration: 373386
loss: 1.0342535972595215,grad_norm: 0.9702199977690305, iteration: 373387
loss: 1.0152517557144165,grad_norm: 0.7511232917923517, iteration: 373388
loss: 0.9587039947509766,grad_norm: 0.8523025456257161, iteration: 373389
loss: 1.0335850715637207,grad_norm: 0.9999991300939872, iteration: 373390
loss: 0.9707304835319519,grad_norm: 0.9999994869414706, iteration: 373391
loss: 1.013332724571228,grad_norm: 0.6970752720957303, iteration: 373392
loss: 0.9902058839797974,grad_norm: 0.8179631495044345, iteration: 373393
loss: 1.0002270936965942,grad_norm: 0.790022991935652, iteration: 373394
loss: 1.004922866821289,grad_norm: 0.8649148028765332, iteration: 373395
loss: 1.0190173387527466,grad_norm: 0.7923770845856642, iteration: 373396
loss: 0.984323263168335,grad_norm: 0.7483947915098808, iteration: 373397
loss: 1.0041338205337524,grad_norm: 0.9999991095335987, iteration: 373398
loss: 1.0281347036361694,grad_norm: 0.7178830652433136, iteration: 373399
loss: 1.0008920431137085,grad_norm: 0.8990481072483556, iteration: 373400
loss: 1.002264142036438,grad_norm: 0.8810829285681043, iteration: 373401
loss: 0.9717750549316406,grad_norm: 0.7780120787060518, iteration: 373402
loss: 0.9926279187202454,grad_norm: 0.8843281195871238, iteration: 373403
loss: 0.9755245447158813,grad_norm: 0.7375566333693007, iteration: 373404
loss: 0.9758001565933228,grad_norm: 0.8644599025131481, iteration: 373405
loss: 0.9952532052993774,grad_norm: 0.9226216972608994, iteration: 373406
loss: 1.095313310623169,grad_norm: 0.9055148166868198, iteration: 373407
loss: 0.9651511907577515,grad_norm: 0.8031307589293557, iteration: 373408
loss: 0.994361162185669,grad_norm: 0.6707764851788277, iteration: 373409
loss: 1.0763578414916992,grad_norm: 0.9999996026865102, iteration: 373410
loss: 0.9947018623352051,grad_norm: 0.7334158930434066, iteration: 373411
loss: 1.0082309246063232,grad_norm: 0.8633565800711447, iteration: 373412
loss: 0.9971908926963806,grad_norm: 0.8166095211908483, iteration: 373413
loss: 1.0118659734725952,grad_norm: 0.8449814578328064, iteration: 373414
loss: 0.94078528881073,grad_norm: 0.8444512236273053, iteration: 373415
loss: 1.0025471448898315,grad_norm: 0.7708873937225619, iteration: 373416
loss: 0.9879502654075623,grad_norm: 0.800824627532692, iteration: 373417
loss: 1.0370581150054932,grad_norm: 0.8644762687844154, iteration: 373418
loss: 1.0039567947387695,grad_norm: 0.7784520927207292, iteration: 373419
loss: 1.0018330812454224,grad_norm: 0.6970670629805027, iteration: 373420
loss: 1.0331053733825684,grad_norm: 0.7999187376245973, iteration: 373421
loss: 0.9877087473869324,grad_norm: 0.9149823175071895, iteration: 373422
loss: 0.9669182896614075,grad_norm: 0.6981981470046407, iteration: 373423
loss: 1.0107518434524536,grad_norm: 0.7048160191479088, iteration: 373424
loss: 0.9899146556854248,grad_norm: 0.9517111372701487, iteration: 373425
loss: 0.9963386058807373,grad_norm: 0.7534042798327899, iteration: 373426
loss: 1.0773943662643433,grad_norm: 0.8416124277508031, iteration: 373427
loss: 0.9835362434387207,grad_norm: 0.7769092762272644, iteration: 373428
loss: 1.0071889162063599,grad_norm: 0.8586389507428445, iteration: 373429
loss: 1.1321262121200562,grad_norm: 0.9212706775550973, iteration: 373430
loss: 1.0158480405807495,grad_norm: 0.9007827906737204, iteration: 373431
loss: 0.9923405647277832,grad_norm: 0.9999997555585968, iteration: 373432
loss: 0.944961428642273,grad_norm: 0.8357792227237515, iteration: 373433
loss: 1.015225887298584,grad_norm: 0.8745566294644178, iteration: 373434
loss: 1.0864081382751465,grad_norm: 0.9999997647179119, iteration: 373435
loss: 1.0417968034744263,grad_norm: 0.832097053856558, iteration: 373436
loss: 1.013340711593628,grad_norm: 0.9999991100644221, iteration: 373437
loss: 0.999188244342804,grad_norm: 0.7397130814417899, iteration: 373438
loss: 1.0215706825256348,grad_norm: 0.6979181845073341, iteration: 373439
loss: 1.0134286880493164,grad_norm: 0.6320649892652979, iteration: 373440
loss: 1.0751712322235107,grad_norm: 0.7544471537908041, iteration: 373441
loss: 0.968808114528656,grad_norm: 0.7107423827073246, iteration: 373442
loss: 1.0011043548583984,grad_norm: 0.8308193844341368, iteration: 373443
loss: 0.9912887811660767,grad_norm: 0.8316806318414919, iteration: 373444
loss: 1.0215458869934082,grad_norm: 0.8823312949406068, iteration: 373445
loss: 1.0144047737121582,grad_norm: 0.9618986460176311, iteration: 373446
loss: 0.9898313879966736,grad_norm: 0.7284709563567874, iteration: 373447
loss: 0.9942906498908997,grad_norm: 0.9999993136196892, iteration: 373448
loss: 0.9990509748458862,grad_norm: 0.8231357639728349, iteration: 373449
loss: 0.9762142300605774,grad_norm: 0.8704698294222236, iteration: 373450
loss: 0.9792048335075378,grad_norm: 0.7492497317119365, iteration: 373451
loss: 1.0301311016082764,grad_norm: 0.7396736123264575, iteration: 373452
loss: 1.0082217454910278,grad_norm: 0.8277500272914512, iteration: 373453
loss: 1.0269169807434082,grad_norm: 0.7747831394107428, iteration: 373454
loss: 0.9883298873901367,grad_norm: 0.6139928962520921, iteration: 373455
loss: 1.0534467697143555,grad_norm: 0.8452252805474052, iteration: 373456
loss: 0.9894595742225647,grad_norm: 0.8019035730279799, iteration: 373457
loss: 0.9801335334777832,grad_norm: 0.7682486908776933, iteration: 373458
loss: 1.0065428018569946,grad_norm: 0.7141820401706682, iteration: 373459
loss: 1.0085724592208862,grad_norm: 0.9999993231066969, iteration: 373460
loss: 1.0172098875045776,grad_norm: 0.7840680875610347, iteration: 373461
loss: 0.9914129376411438,grad_norm: 0.904241679169471, iteration: 373462
loss: 1.0017406940460205,grad_norm: 0.7835880832934212, iteration: 373463
loss: 0.9772466421127319,grad_norm: 0.6869846880431685, iteration: 373464
loss: 0.974075973033905,grad_norm: 0.9999993132850886, iteration: 373465
loss: 0.9864799380302429,grad_norm: 0.7552528529603905, iteration: 373466
loss: 1.0045113563537598,grad_norm: 0.9056069105500432, iteration: 373467
loss: 1.0102654695510864,grad_norm: 0.9999994726350806, iteration: 373468
loss: 1.003603458404541,grad_norm: 0.9999992284775056, iteration: 373469
loss: 1.0058197975158691,grad_norm: 0.878894161312419, iteration: 373470
loss: 1.0003074407577515,grad_norm: 0.8927359383114164, iteration: 373471
loss: 1.0274971723556519,grad_norm: 0.7808138785390127, iteration: 373472
loss: 0.9945780634880066,grad_norm: 0.8188812474102951, iteration: 373473
loss: 0.9983163475990295,grad_norm: 0.7704547793390008, iteration: 373474
loss: 1.0114789009094238,grad_norm: 0.7388676396492576, iteration: 373475
loss: 0.9807109236717224,grad_norm: 0.7481404583542233, iteration: 373476
loss: 1.0057811737060547,grad_norm: 0.9999994757253818, iteration: 373477
loss: 1.0649844408035278,grad_norm: 0.9999997237073347, iteration: 373478
loss: 0.9608586430549622,grad_norm: 0.7699760167989701, iteration: 373479
loss: 0.9872501492500305,grad_norm: 0.8440705719971907, iteration: 373480
loss: 1.0090378522872925,grad_norm: 0.887687117517327, iteration: 373481
loss: 0.985191822052002,grad_norm: 0.8249368972869702, iteration: 373482
loss: 1.0042208433151245,grad_norm: 0.7826208174975281, iteration: 373483
loss: 0.9452800750732422,grad_norm: 0.7768293026028718, iteration: 373484
loss: 1.0160938501358032,grad_norm: 0.9473359051054474, iteration: 373485
loss: 1.02363121509552,grad_norm: 0.937743346025922, iteration: 373486
loss: 1.0362694263458252,grad_norm: 0.712355275654149, iteration: 373487
loss: 0.9894362688064575,grad_norm: 0.9999995715818121, iteration: 373488
loss: 1.000662088394165,grad_norm: 0.7093307285833113, iteration: 373489
loss: 1.0209908485412598,grad_norm: 0.9213400795677437, iteration: 373490
loss: 0.9863204956054688,grad_norm: 0.7956662018886158, iteration: 373491
loss: 1.0550276041030884,grad_norm: 0.9999991497055435, iteration: 373492
loss: 0.9833361506462097,grad_norm: 0.943710749289568, iteration: 373493
loss: 1.013558030128479,grad_norm: 0.9999991176991526, iteration: 373494
loss: 0.9900369644165039,grad_norm: 0.874018850252099, iteration: 373495
loss: 0.9731321334838867,grad_norm: 0.7781988444278096, iteration: 373496
loss: 1.014277696609497,grad_norm: 0.7884368158067867, iteration: 373497
loss: 1.0788356065750122,grad_norm: 0.9999993867893927, iteration: 373498
loss: 1.0025804042816162,grad_norm: 0.6910038436385495, iteration: 373499
loss: 0.9989530444145203,grad_norm: 0.7656348280895355, iteration: 373500
loss: 0.9985836744308472,grad_norm: 0.82108496678008, iteration: 373501
loss: 1.0213656425476074,grad_norm: 1.0000000571636283, iteration: 373502
loss: 0.9873999357223511,grad_norm: 0.8491525311347563, iteration: 373503
loss: 0.9857970476150513,grad_norm: 0.8871376903522622, iteration: 373504
loss: 1.052811861038208,grad_norm: 0.8556557941496725, iteration: 373505
loss: 0.9965363144874573,grad_norm: 0.7714008002425662, iteration: 373506
loss: 0.9440121054649353,grad_norm: 0.6644132215146525, iteration: 373507
loss: 1.0298216342926025,grad_norm: 0.9362017290930799, iteration: 373508
loss: 1.2286856174468994,grad_norm: 0.9999993401966593, iteration: 373509
loss: 0.9685648679733276,grad_norm: 0.7909104439888816, iteration: 373510
loss: 0.9804801940917969,grad_norm: 0.7594048615836726, iteration: 373511
loss: 1.0118439197540283,grad_norm: 0.8908583526144463, iteration: 373512
loss: 1.0382250547409058,grad_norm: 0.9142961900376518, iteration: 373513
loss: 1.0433580875396729,grad_norm: 0.9035150645907406, iteration: 373514
loss: 0.9876702427864075,grad_norm: 0.6955216950527796, iteration: 373515
loss: 0.9520514607429504,grad_norm: 0.8012325368738525, iteration: 373516
loss: 1.0302516222000122,grad_norm: 0.8921413729931156, iteration: 373517
loss: 0.9673588871955872,grad_norm: 0.8273018159313967, iteration: 373518
loss: 1.0082552433013916,grad_norm: 0.6266897968902582, iteration: 373519
loss: 1.0036801099777222,grad_norm: 0.9999991082959949, iteration: 373520
loss: 0.9879648089408875,grad_norm: 0.9451222784180986, iteration: 373521
loss: 1.016929030418396,grad_norm: 0.7955043583054936, iteration: 373522
loss: 1.0086357593536377,grad_norm: 0.8700110198734193, iteration: 373523
loss: 0.988264799118042,grad_norm: 0.685704576236845, iteration: 373524
loss: 1.0154792070388794,grad_norm: 0.6283873961733281, iteration: 373525
loss: 1.0067565441131592,grad_norm: 0.9644327227487408, iteration: 373526
loss: 1.022650122642517,grad_norm: 0.7188324307532201, iteration: 373527
loss: 0.9715791344642639,grad_norm: 0.8166596138518529, iteration: 373528
loss: 1.0165568590164185,grad_norm: 0.8453209818471454, iteration: 373529
loss: 1.0119918584823608,grad_norm: 0.9999995032108188, iteration: 373530
loss: 0.9787617921829224,grad_norm: 0.8560649633110712, iteration: 373531
loss: 0.9709427356719971,grad_norm: 0.7400357160886537, iteration: 373532
loss: 0.9739364385604858,grad_norm: 0.999999236311932, iteration: 373533
loss: 0.984089195728302,grad_norm: 0.7276648883036115, iteration: 373534
loss: 1.00966215133667,grad_norm: 0.8100211202639113, iteration: 373535
loss: 1.0003094673156738,grad_norm: 0.9553859086712082, iteration: 373536
loss: 0.997974693775177,grad_norm: 0.892957125635262, iteration: 373537
loss: 0.9911450743675232,grad_norm: 0.9999992715088741, iteration: 373538
loss: 1.0363801717758179,grad_norm: 0.8408983358834937, iteration: 373539
loss: 0.9544641375541687,grad_norm: 0.8472478670904863, iteration: 373540
loss: 1.0174773931503296,grad_norm: 0.7921547956319562, iteration: 373541
loss: 0.9869487285614014,grad_norm: 0.9709665606245053, iteration: 373542
loss: 0.997755765914917,grad_norm: 0.8815873005601109, iteration: 373543
loss: 0.9921925067901611,grad_norm: 0.9999997535991638, iteration: 373544
loss: 0.99772709608078,grad_norm: 0.9033585055611167, iteration: 373545
loss: 0.9663842916488647,grad_norm: 0.7445816018693387, iteration: 373546
loss: 1.0026694536209106,grad_norm: 0.7143976013542338, iteration: 373547
loss: 1.0092390775680542,grad_norm: 0.7116572551172453, iteration: 373548
loss: 0.9772190451622009,grad_norm: 0.870003089715608, iteration: 373549
loss: 1.077196478843689,grad_norm: 0.9999992853891237, iteration: 373550
loss: 1.0073256492614746,grad_norm: 0.9578594122480636, iteration: 373551
loss: 1.009415626525879,grad_norm: 0.7980319756205978, iteration: 373552
loss: 0.9639928340911865,grad_norm: 0.7996896632157915, iteration: 373553
loss: 0.9981362223625183,grad_norm: 0.8301884467413323, iteration: 373554
loss: 0.9924418926239014,grad_norm: 0.9999991207302241, iteration: 373555
loss: 1.0258179903030396,grad_norm: 0.8580447995794397, iteration: 373556
loss: 1.0281845331192017,grad_norm: 0.9999997928622416, iteration: 373557
loss: 1.0062713623046875,grad_norm: 0.8433442542770188, iteration: 373558
loss: 1.0000656843185425,grad_norm: 0.9737745851921353, iteration: 373559
loss: 1.0176763534545898,grad_norm: 0.9999996779745025, iteration: 373560
loss: 1.0286662578582764,grad_norm: 0.9999996656147856, iteration: 373561
loss: 0.9956158399581909,grad_norm: 0.9550923095584323, iteration: 373562
loss: 0.9947682023048401,grad_norm: 0.8545735795690028, iteration: 373563
loss: 1.0179861783981323,grad_norm: 0.8856622232726853, iteration: 373564
loss: 0.9693270325660706,grad_norm: 0.9999990221458955, iteration: 373565
loss: 1.0221809148788452,grad_norm: 0.9999994568823984, iteration: 373566
loss: 1.0650473833084106,grad_norm: 0.790352080320055, iteration: 373567
loss: 1.0264931917190552,grad_norm: 0.7694714719652405, iteration: 373568
loss: 1.015246033668518,grad_norm: 0.9999994869769612, iteration: 373569
loss: 0.9817118048667908,grad_norm: 0.717149053220278, iteration: 373570
loss: 1.0300168991088867,grad_norm: 0.8394068386574398, iteration: 373571
loss: 1.0151227712631226,grad_norm: 0.9999992560268084, iteration: 373572
loss: 0.9841874837875366,grad_norm: 0.9217427603512258, iteration: 373573
loss: 1.032774806022644,grad_norm: 0.8305381130992284, iteration: 373574
loss: 1.0106511116027832,grad_norm: 0.6793125326297168, iteration: 373575
loss: 1.0221997499465942,grad_norm: 0.7934651272874538, iteration: 373576
loss: 0.9897534251213074,grad_norm: 0.7797838566145997, iteration: 373577
loss: 1.0790259838104248,grad_norm: 0.976817016553225, iteration: 373578
loss: 1.0077797174453735,grad_norm: 0.9999990987929451, iteration: 373579
loss: 1.0145922899246216,grad_norm: 0.9109252804665253, iteration: 373580
loss: 0.9789661765098572,grad_norm: 0.753214215492478, iteration: 373581
loss: 0.9564801454544067,grad_norm: 0.999999090994335, iteration: 373582
loss: 0.9613639712333679,grad_norm: 0.9999992958100007, iteration: 373583
loss: 0.9718145132064819,grad_norm: 0.7561286039711955, iteration: 373584
loss: 0.9984132051467896,grad_norm: 0.7908944072835237, iteration: 373585
loss: 1.0060381889343262,grad_norm: 0.9999992369785389, iteration: 373586
loss: 0.9695718884468079,grad_norm: 0.8831635524762701, iteration: 373587
loss: 0.9765936732292175,grad_norm: 0.9342515823522904, iteration: 373588
loss: 1.0069713592529297,grad_norm: 0.7723810071725269, iteration: 373589
loss: 1.0058107376098633,grad_norm: 0.9426493665068362, iteration: 373590
loss: 0.9647903442382812,grad_norm: 0.8559369589428457, iteration: 373591
loss: 1.0351847410202026,grad_norm: 0.7660874026574898, iteration: 373592
loss: 0.9839678406715393,grad_norm: 0.999999815555735, iteration: 373593
loss: 0.9925622940063477,grad_norm: 0.7412108552046822, iteration: 373594
loss: 1.006832242012024,grad_norm: 0.9277611240754207, iteration: 373595
loss: 0.9639963507652283,grad_norm: 0.7812912841292494, iteration: 373596
loss: 0.9917331337928772,grad_norm: 0.6356193243225576, iteration: 373597
loss: 0.9786561131477356,grad_norm: 0.8200918231580036, iteration: 373598
loss: 0.9873207211494446,grad_norm: 0.8335896278714421, iteration: 373599
loss: 1.083569884300232,grad_norm: 0.9999992299449709, iteration: 373600
loss: 0.9799458980560303,grad_norm: 0.7898313828806522, iteration: 373601
loss: 0.9882120490074158,grad_norm: 0.8028440387164036, iteration: 373602
loss: 1.0071511268615723,grad_norm: 0.8082816385458519, iteration: 373603
loss: 1.0182409286499023,grad_norm: 0.7561797455530869, iteration: 373604
loss: 1.0377410650253296,grad_norm: 0.947316749017904, iteration: 373605
loss: 1.010141134262085,grad_norm: 0.7521056289639677, iteration: 373606
loss: 0.9761479496955872,grad_norm: 0.7983127066855916, iteration: 373607
loss: 0.9968923330307007,grad_norm: 0.8314383744934116, iteration: 373608
loss: 1.0264687538146973,grad_norm: 0.9361892023235218, iteration: 373609
loss: 1.0000003576278687,grad_norm: 0.8358319634373151, iteration: 373610
loss: 1.0139977931976318,grad_norm: 0.9649672207552143, iteration: 373611
loss: 0.9996582269668579,grad_norm: 0.9999992856158728, iteration: 373612
loss: 1.00473952293396,grad_norm: 0.7803550748506423, iteration: 373613
loss: 1.009819507598877,grad_norm: 0.7772736264395524, iteration: 373614
loss: 0.9822494387626648,grad_norm: 0.925370680506678, iteration: 373615
loss: 1.0302680730819702,grad_norm: 0.8410027578545369, iteration: 373616
loss: 1.0067871809005737,grad_norm: 0.7308564029953045, iteration: 373617
loss: 0.9566923379898071,grad_norm: 0.7356940548759602, iteration: 373618
loss: 1.0249700546264648,grad_norm: 0.9999990154855798, iteration: 373619
loss: 1.0251675844192505,grad_norm: 0.9999990905807409, iteration: 373620
loss: 1.0340750217437744,grad_norm: 0.8178234098492285, iteration: 373621
loss: 1.194470763206482,grad_norm: 0.9999995998221882, iteration: 373622
loss: 1.0365748405456543,grad_norm: 0.9341014353374578, iteration: 373623
loss: 0.9803394079208374,grad_norm: 0.7216752742919217, iteration: 373624
loss: 0.9765809178352356,grad_norm: 0.9999996399843962, iteration: 373625
loss: 1.01446533203125,grad_norm: 0.6749943642398779, iteration: 373626
loss: 1.007859706878662,grad_norm: 0.7566359516348707, iteration: 373627
loss: 1.0007244348526,grad_norm: 0.9768767581156275, iteration: 373628
loss: 1.0169076919555664,grad_norm: 0.8489737387305182, iteration: 373629
loss: 1.0080645084381104,grad_norm: 0.7494923524175057, iteration: 373630
loss: 0.9602715969085693,grad_norm: 0.8495536731901225, iteration: 373631
loss: 0.9815770387649536,grad_norm: 0.7936228541552047, iteration: 373632
loss: 1.0239763259887695,grad_norm: 0.8814757276687119, iteration: 373633
loss: 1.0003070831298828,grad_norm: 0.6816550370872315, iteration: 373634
loss: 1.0200705528259277,grad_norm: 0.8492013244436154, iteration: 373635
loss: 0.988034725189209,grad_norm: 0.7992230932130235, iteration: 373636
loss: 0.9590649008750916,grad_norm: 0.8345292897308817, iteration: 373637
loss: 1.0269197225570679,grad_norm: 0.7363741783374086, iteration: 373638
loss: 0.9733948111534119,grad_norm: 0.9999998566282404, iteration: 373639
loss: 0.9855426549911499,grad_norm: 0.7018594507523277, iteration: 373640
loss: 0.9542059302330017,grad_norm: 0.796550603719114, iteration: 373641
loss: 1.0035042762756348,grad_norm: 0.7406047537289571, iteration: 373642
loss: 0.9965839385986328,grad_norm: 0.6582879799000575, iteration: 373643
loss: 0.9518486261367798,grad_norm: 0.783666017496194, iteration: 373644
loss: 0.9787184596061707,grad_norm: 0.7253461299559385, iteration: 373645
loss: 1.0305030345916748,grad_norm: 0.9999995882091198, iteration: 373646
loss: 1.007979154586792,grad_norm: 0.9006189870646698, iteration: 373647
loss: 0.9690383076667786,grad_norm: 0.7805353427697117, iteration: 373648
loss: 1.015624761581421,grad_norm: 0.9167922773351549, iteration: 373649
loss: 1.1763279438018799,grad_norm: 0.9999992496293911, iteration: 373650
loss: 1.0187402963638306,grad_norm: 0.9085184292887531, iteration: 373651
loss: 0.9990110397338867,grad_norm: 0.8039293362123943, iteration: 373652
loss: 1.0345178842544556,grad_norm: 0.9999999189477333, iteration: 373653
loss: 1.0230048894882202,grad_norm: 0.8945523250418481, iteration: 373654
loss: 0.9863959550857544,grad_norm: 0.974024689146505, iteration: 373655
loss: 1.097373604774475,grad_norm: 0.9999991128946102, iteration: 373656
loss: 1.0733202695846558,grad_norm: 0.8250369524479629, iteration: 373657
loss: 1.0293997526168823,grad_norm: 0.9999998437892167, iteration: 373658
loss: 1.031424641609192,grad_norm: 0.9524758675523278, iteration: 373659
loss: 1.2234089374542236,grad_norm: 0.9999998036425917, iteration: 373660
loss: 1.0027931928634644,grad_norm: 0.9309231563661127, iteration: 373661
loss: 1.0409655570983887,grad_norm: 0.9999991613816296, iteration: 373662
loss: 1.0762465000152588,grad_norm: 0.9999997107518765, iteration: 373663
loss: 1.0896257162094116,grad_norm: 0.9999993941852408, iteration: 373664
loss: 1.0434741973876953,grad_norm: 0.9999995490972012, iteration: 373665
loss: 1.0135607719421387,grad_norm: 0.8586447474485706, iteration: 373666
loss: 0.9951179623603821,grad_norm: 0.9999998915893202, iteration: 373667
loss: 1.1437824964523315,grad_norm: 0.9999995266965322, iteration: 373668
loss: 1.064943790435791,grad_norm: 0.9999994045421359, iteration: 373669
loss: 1.1206082105636597,grad_norm: 0.9999998987048799, iteration: 373670
loss: 1.0040416717529297,grad_norm: 0.9999991138959778, iteration: 373671
loss: 0.9986253976821899,grad_norm: 0.7179528337181844, iteration: 373672
loss: 1.1468756198883057,grad_norm: 0.9999998173868417, iteration: 373673
loss: 1.025368571281433,grad_norm: 0.9999996892932141, iteration: 373674
loss: 1.0200097560882568,grad_norm: 0.9018688104746346, iteration: 373675
loss: 0.9959033727645874,grad_norm: 0.9999991852038156, iteration: 373676
loss: 1.1357667446136475,grad_norm: 0.9999998775134065, iteration: 373677
loss: 1.0662883520126343,grad_norm: 0.9999997749395684, iteration: 373678
loss: 1.0306931734085083,grad_norm: 0.9999994517806919, iteration: 373679
loss: 0.9992906451225281,grad_norm: 0.8230671233260988, iteration: 373680
loss: 1.0498723983764648,grad_norm: 0.9559518989828146, iteration: 373681
loss: 1.0639644861221313,grad_norm: 0.9999995558812945, iteration: 373682
loss: 1.04850435256958,grad_norm: 0.9999997590528772, iteration: 373683
loss: 1.079087495803833,grad_norm: 0.9999997936160715, iteration: 373684
loss: 1.2612113952636719,grad_norm: 0.9999996340861584, iteration: 373685
loss: 1.2228649854660034,grad_norm: 0.9999997016368436, iteration: 373686
loss: 0.9917589426040649,grad_norm: 0.7041408322906317, iteration: 373687
loss: 1.0605578422546387,grad_norm: 0.9706166975606664, iteration: 373688
loss: 1.0876859426498413,grad_norm: 0.9999996868234413, iteration: 373689
loss: 1.077107310295105,grad_norm: 0.9999995182407023, iteration: 373690
loss: 1.0322904586791992,grad_norm: 0.9999996863556194, iteration: 373691
loss: 1.1437928676605225,grad_norm: 0.9999998217271111, iteration: 373692
loss: 1.1303483247756958,grad_norm: 0.9999996705241379, iteration: 373693
loss: 1.1274442672729492,grad_norm: 0.9999997493878697, iteration: 373694
loss: 1.194635033607483,grad_norm: 0.9999997690712159, iteration: 373695
loss: 1.1004079580307007,grad_norm: 0.9999995003935462, iteration: 373696
loss: 1.1302404403686523,grad_norm: 0.999999919013321, iteration: 373697
loss: 1.031655192375183,grad_norm: 0.9999994006208976, iteration: 373698
loss: 0.9752573370933533,grad_norm: 0.6949219681877917, iteration: 373699
loss: 1.0803543329238892,grad_norm: 0.9999993156699327, iteration: 373700
loss: 1.0452817678451538,grad_norm: 0.9999999927969242, iteration: 373701
loss: 0.9806342124938965,grad_norm: 0.8414856801412522, iteration: 373702
loss: 1.0319607257843018,grad_norm: 0.9999999359111507, iteration: 373703
loss: 0.9955761432647705,grad_norm: 0.9813471844257793, iteration: 373704
loss: 1.071171522140503,grad_norm: 0.8942204081063351, iteration: 373705
loss: 1.0147348642349243,grad_norm: 0.8054296541302269, iteration: 373706
loss: 1.0087896585464478,grad_norm: 0.9999992082462418, iteration: 373707
loss: 1.030525803565979,grad_norm: 0.743550031514274, iteration: 373708
loss: 0.9928036332130432,grad_norm: 0.8473106045928802, iteration: 373709
loss: 0.9867660403251648,grad_norm: 0.9287686683571247, iteration: 373710
loss: 1.092003345489502,grad_norm: 0.99999930954539, iteration: 373711
loss: 0.980812132358551,grad_norm: 0.7996283118644527, iteration: 373712
loss: 1.0462783575057983,grad_norm: 0.8485425735318165, iteration: 373713
loss: 1.0029293298721313,grad_norm: 0.924444124700901, iteration: 373714
loss: 0.9761680960655212,grad_norm: 0.9999998795600654, iteration: 373715
loss: 0.9655328392982483,grad_norm: 0.8111318958176376, iteration: 373716
loss: 0.962140679359436,grad_norm: 0.8451711391320793, iteration: 373717
loss: 1.0024625062942505,grad_norm: 0.8263581373906983, iteration: 373718
loss: 1.0019919872283936,grad_norm: 0.8223638465138525, iteration: 373719
loss: 1.003651738166809,grad_norm: 0.9067840977541289, iteration: 373720
loss: 1.0381457805633545,grad_norm: 0.9999989706587685, iteration: 373721
loss: 1.022233486175537,grad_norm: 0.7733896830438343, iteration: 373722
loss: 1.0040860176086426,grad_norm: 0.8588364388376912, iteration: 373723
loss: 1.0502948760986328,grad_norm: 0.9999996228873491, iteration: 373724
loss: 1.0368422269821167,grad_norm: 0.9999997187135391, iteration: 373725
loss: 1.035346508026123,grad_norm: 0.8141797801530082, iteration: 373726
loss: 1.1559542417526245,grad_norm: 0.9999997654559105, iteration: 373727
loss: 1.076971411705017,grad_norm: 0.9999991338557448, iteration: 373728
loss: 0.9986691474914551,grad_norm: 0.9235284927399959, iteration: 373729
loss: 1.0092607736587524,grad_norm: 0.5981416849249741, iteration: 373730
loss: 0.9766921401023865,grad_norm: 0.83606763699233, iteration: 373731
loss: 1.008505940437317,grad_norm: 0.7960471714857147, iteration: 373732
loss: 1.0166476964950562,grad_norm: 0.8363346720092621, iteration: 373733
loss: 0.9688718914985657,grad_norm: 0.999999897207717, iteration: 373734
loss: 1.0823218822479248,grad_norm: 0.9408964458723749, iteration: 373735
loss: 1.0281035900115967,grad_norm: 0.8579466260334824, iteration: 373736
loss: 1.0117436647415161,grad_norm: 0.7655658158271084, iteration: 373737
loss: 1.054547905921936,grad_norm: 0.956145701620082, iteration: 373738
loss: 0.9923761487007141,grad_norm: 0.80697076141499, iteration: 373739
loss: 1.066704273223877,grad_norm: 0.9999991460622184, iteration: 373740
loss: 0.9949653744697571,grad_norm: 0.9713224578002294, iteration: 373741
loss: 1.041224718093872,grad_norm: 0.8738178841230132, iteration: 373742
loss: 1.0159839391708374,grad_norm: 0.7825197358105604, iteration: 373743
loss: 1.113263487815857,grad_norm: 0.8206721532263773, iteration: 373744
loss: 1.0529803037643433,grad_norm: 0.9999997713973598, iteration: 373745
loss: 1.006758213043213,grad_norm: 0.9999989663288114, iteration: 373746
loss: 0.9921770095825195,grad_norm: 0.9999990964951315, iteration: 373747
loss: 0.9905467629432678,grad_norm: 0.936344271594697, iteration: 373748
loss: 1.0287792682647705,grad_norm: 0.9360518598321657, iteration: 373749
loss: 1.0318145751953125,grad_norm: 0.8375756925934696, iteration: 373750
loss: 1.030572772026062,grad_norm: 0.9159470776280062, iteration: 373751
loss: 1.0004547834396362,grad_norm: 0.9999991111063625, iteration: 373752
loss: 0.9926321506500244,grad_norm: 0.9999999036525513, iteration: 373753
loss: 1.0362147092819214,grad_norm: 0.8893223200359258, iteration: 373754
loss: 1.0541659593582153,grad_norm: 0.9999998726382637, iteration: 373755
loss: 0.9860628247261047,grad_norm: 0.8545151631046591, iteration: 373756
loss: 1.019188404083252,grad_norm: 0.7441474912510367, iteration: 373757
loss: 1.0321624279022217,grad_norm: 0.9999999441542889, iteration: 373758
loss: 1.023119330406189,grad_norm: 0.9430104738794464, iteration: 373759
loss: 1.1603929996490479,grad_norm: 1.0000000182610358, iteration: 373760
loss: 1.117011308670044,grad_norm: 0.9999992883194646, iteration: 373761
loss: 1.145907998085022,grad_norm: 1.0000000215883464, iteration: 373762
loss: 0.9916959404945374,grad_norm: 0.8852961840205712, iteration: 373763
loss: 0.9832665324211121,grad_norm: 0.7444250300761825, iteration: 373764
loss: 1.0175702571868896,grad_norm: 0.8750866939706454, iteration: 373765
loss: 1.1172457933425903,grad_norm: 0.9953319838635982, iteration: 373766
loss: 0.9916394352912903,grad_norm: 0.7728113438535269, iteration: 373767
loss: 1.0775314569473267,grad_norm: 0.9999989888990048, iteration: 373768
loss: 1.0055279731750488,grad_norm: 0.9999999002067729, iteration: 373769
loss: 1.0346450805664062,grad_norm: 0.8256295070328551, iteration: 373770
loss: 1.0974204540252686,grad_norm: 0.9082131514620688, iteration: 373771
loss: 0.9937048554420471,grad_norm: 0.8823848003818561, iteration: 373772
loss: 1.3103926181793213,grad_norm: 0.9999998767698932, iteration: 373773
loss: 1.0146700143814087,grad_norm: 0.9743549754222244, iteration: 373774
loss: 1.0026922225952148,grad_norm: 0.7585191824149233, iteration: 373775
loss: 1.617497205734253,grad_norm: 0.9999993856444542, iteration: 373776
loss: 1.0216580629348755,grad_norm: 0.9110474800472834, iteration: 373777
loss: 1.1340601444244385,grad_norm: 0.9999997981906362, iteration: 373778
loss: 0.9913539886474609,grad_norm: 0.8332982979391717, iteration: 373779
loss: 1.0265816450119019,grad_norm: 0.8792336897358507, iteration: 373780
loss: 0.9885082244873047,grad_norm: 0.8229637045961357, iteration: 373781
loss: 1.0699279308319092,grad_norm: 0.8848368835843436, iteration: 373782
loss: 1.2404104471206665,grad_norm: 0.9999992216226481, iteration: 373783
loss: 0.9846376776695251,grad_norm: 0.9999991405643284, iteration: 373784
loss: 1.0344786643981934,grad_norm: 0.7996920825385739, iteration: 373785
loss: 0.9983509182929993,grad_norm: 0.9999994724257134, iteration: 373786
loss: 1.0544017553329468,grad_norm: 0.9999990717627606, iteration: 373787
loss: 0.986370861530304,grad_norm: 0.7991294941880696, iteration: 373788
loss: 0.9719239473342896,grad_norm: 0.8416585219752254, iteration: 373789
loss: 0.9722797870635986,grad_norm: 0.7313073117584581, iteration: 373790
loss: 1.0527093410491943,grad_norm: 0.9999999607145105, iteration: 373791
loss: 1.1729669570922852,grad_norm: 0.9999999768059301, iteration: 373792
loss: 1.0306520462036133,grad_norm: 0.7979627761918413, iteration: 373793
loss: 1.0329614877700806,grad_norm: 0.9999990339877647, iteration: 373794
loss: 1.0277478694915771,grad_norm: 0.8200101815054889, iteration: 373795
loss: 1.002140760421753,grad_norm: 0.7471478049363817, iteration: 373796
loss: 1.124292254447937,grad_norm: 0.9999991138861101, iteration: 373797
loss: 1.2097090482711792,grad_norm: 0.999999123847743, iteration: 373798
loss: 0.988084077835083,grad_norm: 0.8953462411891062, iteration: 373799
loss: 1.0117378234863281,grad_norm: 0.927552864486049, iteration: 373800
loss: 1.0095975399017334,grad_norm: 0.8190100326571789, iteration: 373801
loss: 1.0171431303024292,grad_norm: 0.9999992968043162, iteration: 373802
loss: 1.056679606437683,grad_norm: 0.9999993581267325, iteration: 373803
loss: 1.0027945041656494,grad_norm: 0.8623925750122897, iteration: 373804
loss: 1.090552568435669,grad_norm: 0.9999993269146517, iteration: 373805
loss: 0.9881420135498047,grad_norm: 0.7655136320058787, iteration: 373806
loss: 0.981377124786377,grad_norm: 0.9818224952499551, iteration: 373807
loss: 0.9662373065948486,grad_norm: 0.8494393044483822, iteration: 373808
loss: 1.0176788568496704,grad_norm: 0.8873781935109415, iteration: 373809
loss: 1.0303683280944824,grad_norm: 0.7371912000900464, iteration: 373810
loss: 1.1597230434417725,grad_norm: 0.9999998105058792, iteration: 373811
loss: 1.1195687055587769,grad_norm: 0.9999998619126638, iteration: 373812
loss: 1.03384268283844,grad_norm: 0.8405894244468355, iteration: 373813
loss: 1.0139364004135132,grad_norm: 0.8420230145598526, iteration: 373814
loss: 1.0226988792419434,grad_norm: 0.8178607138991116, iteration: 373815
loss: 0.980417013168335,grad_norm: 0.7186520980755383, iteration: 373816
loss: 1.0672688484191895,grad_norm: 0.9999989893873397, iteration: 373817
loss: 1.0508522987365723,grad_norm: 0.9999991342077579, iteration: 373818
loss: 1.0124101638793945,grad_norm: 0.8258181270820344, iteration: 373819
loss: 0.9808360934257507,grad_norm: 0.7743328178162535, iteration: 373820
loss: 1.0987190008163452,grad_norm: 0.9999993998604615, iteration: 373821
loss: 1.1370092630386353,grad_norm: 0.9419711072496075, iteration: 373822
loss: 1.1345961093902588,grad_norm: 0.9999989787320926, iteration: 373823
loss: 0.9712632298469543,grad_norm: 0.8163142856388566, iteration: 373824
loss: 1.0247009992599487,grad_norm: 0.977898874469325, iteration: 373825
loss: 0.9816051125526428,grad_norm: 0.9740269893974538, iteration: 373826
loss: 1.131932258605957,grad_norm: 0.9999998848539894, iteration: 373827
loss: 1.0367348194122314,grad_norm: 0.9494893675905368, iteration: 373828
loss: 0.9949848055839539,grad_norm: 0.8302113478378094, iteration: 373829
loss: 0.9842744469642639,grad_norm: 0.7254275519926753, iteration: 373830
loss: 0.9685971736907959,grad_norm: 0.6753758419630276, iteration: 373831
loss: 1.0257800817489624,grad_norm: 0.9999991244142112, iteration: 373832
loss: 0.9820955395698547,grad_norm: 0.6511935578892999, iteration: 373833
loss: 1.026992678642273,grad_norm: 0.9923305129000821, iteration: 373834
loss: 0.9682847857475281,grad_norm: 0.7766647368870931, iteration: 373835
loss: 1.0000605583190918,grad_norm: 0.8324287243522748, iteration: 373836
loss: 1.0240569114685059,grad_norm: 0.8574936354528695, iteration: 373837
loss: 0.9832684397697449,grad_norm: 0.7772930640253868, iteration: 373838
loss: 0.980128288269043,grad_norm: 0.9466013118770326, iteration: 373839
loss: 0.9854606986045837,grad_norm: 0.8342060171220868, iteration: 373840
loss: 0.9989761114120483,grad_norm: 0.7219097637843536, iteration: 373841
loss: 1.0714985132217407,grad_norm: 0.9999996608429513, iteration: 373842
loss: 0.9983470439910889,grad_norm: 0.8820218774796109, iteration: 373843
loss: 1.0096198320388794,grad_norm: 0.7744067395018078, iteration: 373844
loss: 1.022497534751892,grad_norm: 0.8502536869909934, iteration: 373845
loss: 1.0733591318130493,grad_norm: 0.999999133280492, iteration: 373846
loss: 1.0035121440887451,grad_norm: 0.7557353686957017, iteration: 373847
loss: 0.9916332364082336,grad_norm: 0.9688529749037958, iteration: 373848
loss: 0.9994715452194214,grad_norm: 0.9000699507369797, iteration: 373849
loss: 0.9916024208068848,grad_norm: 0.7580146689183609, iteration: 373850
loss: 1.1217575073242188,grad_norm: 0.9999995047700447, iteration: 373851
loss: 0.9871088266372681,grad_norm: 0.8673118300150725, iteration: 373852
loss: 1.0735399723052979,grad_norm: 0.7135441225887635, iteration: 373853
loss: 1.007655143737793,grad_norm: 0.7899830107073844, iteration: 373854
loss: 1.0109997987747192,grad_norm: 0.9999989287895056, iteration: 373855
loss: 1.0155186653137207,grad_norm: 0.751397696190371, iteration: 373856
loss: 1.0990259647369385,grad_norm: 0.9999992183721965, iteration: 373857
loss: 0.9786003232002258,grad_norm: 0.6748440891721383, iteration: 373858
loss: 0.985899806022644,grad_norm: 0.9999993245965721, iteration: 373859
loss: 0.9800143837928772,grad_norm: 0.9999994246219037, iteration: 373860
loss: 1.0312553644180298,grad_norm: 0.814711359352834, iteration: 373861
loss: 0.9666777849197388,grad_norm: 0.8557462186867314, iteration: 373862
loss: 1.1080864667892456,grad_norm: 0.9999992403697444, iteration: 373863
loss: 0.9838595390319824,grad_norm: 0.8662002630959336, iteration: 373864
loss: 0.974107027053833,grad_norm: 0.7541508231353291, iteration: 373865
loss: 1.001049518585205,grad_norm: 0.8366200871058956, iteration: 373866
loss: 0.989784836769104,grad_norm: 0.7670557336597705, iteration: 373867
loss: 1.081459641456604,grad_norm: 0.999999103883253, iteration: 373868
loss: 1.0024410486221313,grad_norm: 0.999999887754633, iteration: 373869
loss: 0.9878992438316345,grad_norm: 0.8336020602450968, iteration: 373870
loss: 0.9876154065132141,grad_norm: 0.9999992685824687, iteration: 373871
loss: 1.0763583183288574,grad_norm: 0.9999996913186704, iteration: 373872
loss: 1.037214994430542,grad_norm: 0.9544999426972816, iteration: 373873
loss: 1.0350723266601562,grad_norm: 0.966191807900724, iteration: 373874
loss: 1.0341171026229858,grad_norm: 0.9434912253961657, iteration: 373875
loss: 1.058881163597107,grad_norm: 0.9999998029706133, iteration: 373876
loss: 0.9825931787490845,grad_norm: 0.7593602176906524, iteration: 373877
loss: 1.0357145071029663,grad_norm: 0.9999999716143327, iteration: 373878
loss: 0.9832473993301392,grad_norm: 0.9254670928068216, iteration: 373879
loss: 1.0860093832015991,grad_norm: 0.9999994309787467, iteration: 373880
loss: 1.1525403261184692,grad_norm: 0.9999995077390474, iteration: 373881
loss: 1.0571541786193848,grad_norm: 0.9075521433940736, iteration: 373882
loss: 1.1503363847732544,grad_norm: 0.9999992369997871, iteration: 373883
loss: 1.0535119771957397,grad_norm: 0.9999998665552544, iteration: 373884
loss: 0.9694200754165649,grad_norm: 0.9355044555215766, iteration: 373885
loss: 0.9846429824829102,grad_norm: 0.9348620851451903, iteration: 373886
loss: 1.0499178171157837,grad_norm: 0.9999994500693429, iteration: 373887
loss: 1.1825745105743408,grad_norm: 0.9999994883024631, iteration: 373888
loss: 1.0362225770950317,grad_norm: 0.999999599896208, iteration: 373889
loss: 0.9919057488441467,grad_norm: 0.7540897661959964, iteration: 373890
loss: 0.9696275591850281,grad_norm: 0.7495761602957958, iteration: 373891
loss: 1.0586737394332886,grad_norm: 0.9999996799552697, iteration: 373892
loss: 1.0056437253952026,grad_norm: 0.8987587545654463, iteration: 373893
loss: 1.019371509552002,grad_norm: 0.7529645567644793, iteration: 373894
loss: 0.9850323796272278,grad_norm: 0.7717237442272867, iteration: 373895
loss: 0.9702700972557068,grad_norm: 0.9135181511717482, iteration: 373896
loss: 1.0195190906524658,grad_norm: 0.8089812806541887, iteration: 373897
loss: 1.057007074356079,grad_norm: 0.9999994756828459, iteration: 373898
loss: 0.9913406372070312,grad_norm: 0.7254783866141864, iteration: 373899
loss: 1.1296172142028809,grad_norm: 0.9999998530148975, iteration: 373900
loss: 0.9499247074127197,grad_norm: 0.7231268282913346, iteration: 373901
loss: 1.0043034553527832,grad_norm: 0.715027501623418, iteration: 373902
loss: 1.1789613962173462,grad_norm: 0.999999613805565, iteration: 373903
loss: 1.0474469661712646,grad_norm: 0.999999537291268, iteration: 373904
loss: 1.0260382890701294,grad_norm: 0.9999993889923151, iteration: 373905
loss: 0.9793227910995483,grad_norm: 0.9999995139237036, iteration: 373906
loss: 0.9879619479179382,grad_norm: 0.8640710266606667, iteration: 373907
loss: 1.0064831972122192,grad_norm: 0.9999990607915019, iteration: 373908
loss: 1.0875829458236694,grad_norm: 0.9671389343381398, iteration: 373909
loss: 1.0177218914031982,grad_norm: 0.9999994745873191, iteration: 373910
loss: 1.0010346174240112,grad_norm: 0.8117784870781153, iteration: 373911
loss: 0.9883127808570862,grad_norm: 0.947821175158917, iteration: 373912
loss: 0.9828400611877441,grad_norm: 0.9410186073485247, iteration: 373913
loss: 1.0296629667282104,grad_norm: 0.9999990693451106, iteration: 373914
loss: 1.0806394815444946,grad_norm: 0.9999992203396633, iteration: 373915
loss: 1.0486036539077759,grad_norm: 0.9999992000910766, iteration: 373916
loss: 1.0245881080627441,grad_norm: 0.7763299692141341, iteration: 373917
loss: 1.1002591848373413,grad_norm: 0.9999993083875771, iteration: 373918
loss: 1.107589840888977,grad_norm: 0.9999993460758306, iteration: 373919
loss: 1.0881876945495605,grad_norm: 0.9999996015879214, iteration: 373920
loss: 0.9888949394226074,grad_norm: 0.8897879965661409, iteration: 373921
loss: 1.0020015239715576,grad_norm: 0.798578422397184, iteration: 373922
loss: 1.0766912698745728,grad_norm: 0.999999518354905, iteration: 373923
loss: 1.0894545316696167,grad_norm: 0.9999991770484856, iteration: 373924
loss: 1.1320009231567383,grad_norm: 0.9999997937397314, iteration: 373925
loss: 1.0011470317840576,grad_norm: 0.8915809502958738, iteration: 373926
loss: 1.0265535116195679,grad_norm: 0.8358308386447453, iteration: 373927
loss: 1.1183886528015137,grad_norm: 0.9999991970684105, iteration: 373928
loss: 1.0144743919372559,grad_norm: 0.698959942040897, iteration: 373929
loss: 1.0118054151535034,grad_norm: 0.6825707807130744, iteration: 373930
loss: 1.0078811645507812,grad_norm: 0.8519109107324648, iteration: 373931
loss: 1.1074333190917969,grad_norm: 0.9999997471924296, iteration: 373932
loss: 1.2064419984817505,grad_norm: 0.9999998550253999, iteration: 373933
loss: 0.9729923009872437,grad_norm: 0.9999994860684513, iteration: 373934
loss: 1.0378382205963135,grad_norm: 0.8619478251414214, iteration: 373935
loss: 1.0053986310958862,grad_norm: 0.8908361254421429, iteration: 373936
loss: 0.9977751970291138,grad_norm: 0.7538860682275058, iteration: 373937
loss: 1.0839442014694214,grad_norm: 0.9999990774100951, iteration: 373938
loss: 0.9607206583023071,grad_norm: 0.8115404190558526, iteration: 373939
loss: 1.038161277770996,grad_norm: 0.9999996653504902, iteration: 373940
loss: 1.0167171955108643,grad_norm: 0.8014232094475435, iteration: 373941
loss: 1.0201222896575928,grad_norm: 0.999999520382594, iteration: 373942
loss: 1.043534278869629,grad_norm: 0.9999991949286811, iteration: 373943
loss: 1.0069072246551514,grad_norm: 0.7579823944687898, iteration: 373944
loss: 1.0668121576309204,grad_norm: 0.9999997487290846, iteration: 373945
loss: 0.9921756982803345,grad_norm: 0.8139175694804212, iteration: 373946
loss: 1.0493770837783813,grad_norm: 0.9999998548340484, iteration: 373947
loss: 0.9833718538284302,grad_norm: 0.9278798980811385, iteration: 373948
loss: 1.0778179168701172,grad_norm: 0.9999990684455696, iteration: 373949
loss: 1.1103119850158691,grad_norm: 0.9999996516574727, iteration: 373950
loss: 1.0177990198135376,grad_norm: 0.8158254543386584, iteration: 373951
loss: 0.9842787981033325,grad_norm: 0.9157785381454027, iteration: 373952
loss: 1.057139277458191,grad_norm: 0.9999996385093735, iteration: 373953
loss: 1.0517208576202393,grad_norm: 0.9999993830500027, iteration: 373954
loss: 1.0815738439559937,grad_norm: 0.9999991300323356, iteration: 373955
loss: 1.2168171405792236,grad_norm: 0.9999993191195355, iteration: 373956
loss: 1.0432580709457397,grad_norm: 0.9999993688111041, iteration: 373957
loss: 1.1922075748443604,grad_norm: 0.9999999177842156, iteration: 373958
loss: 0.9947577714920044,grad_norm: 0.7890220154026559, iteration: 373959
loss: 0.9843631982803345,grad_norm: 0.8448320161067957, iteration: 373960
loss: 1.0335580110549927,grad_norm: 1.000000052186655, iteration: 373961
loss: 0.95244300365448,grad_norm: 0.7156157188426487, iteration: 373962
loss: 1.0950778722763062,grad_norm: 0.9999999031652298, iteration: 373963
loss: 1.096131443977356,grad_norm: 0.9999991603660972, iteration: 373964
loss: 0.9996787309646606,grad_norm: 0.9420165534544819, iteration: 373965
loss: 1.030964970588684,grad_norm: 0.9999995842695061, iteration: 373966
loss: 0.9686615467071533,grad_norm: 0.8114890975834269, iteration: 373967
loss: 0.9994677305221558,grad_norm: 0.9999999310487652, iteration: 373968
loss: 1.152963399887085,grad_norm: 0.8738255982182114, iteration: 373969
loss: 1.303044080734253,grad_norm: 0.9999996482198416, iteration: 373970
loss: 1.1078953742980957,grad_norm: 0.9999998074627355, iteration: 373971
loss: 1.2645879983901978,grad_norm: 0.9999997058884988, iteration: 373972
loss: 0.9997166395187378,grad_norm: 0.7140004929374749, iteration: 373973
loss: 1.0446429252624512,grad_norm: 0.9999991550856457, iteration: 373974
loss: 1.09140944480896,grad_norm: 1.0000000323038796, iteration: 373975
loss: 1.022942066192627,grad_norm: 0.9999991707441928, iteration: 373976
loss: 1.1300784349441528,grad_norm: 0.9999992129744483, iteration: 373977
loss: 1.0151311159133911,grad_norm: 0.8333865214590345, iteration: 373978
loss: 0.9832992553710938,grad_norm: 0.8672925737385434, iteration: 373979
loss: 1.0022237300872803,grad_norm: 0.7743974289055519, iteration: 373980
loss: 0.9911420941352844,grad_norm: 0.8468179578350935, iteration: 373981
loss: 1.0670042037963867,grad_norm: 0.9999991955967533, iteration: 373982
loss: 1.0561745166778564,grad_norm: 0.9127212093458769, iteration: 373983
loss: 0.9854856133460999,grad_norm: 0.6473040712610654, iteration: 373984
loss: 0.9933439493179321,grad_norm: 0.9999990617518795, iteration: 373985
loss: 0.9881429076194763,grad_norm: 0.7797093874527282, iteration: 373986
loss: 1.0195411443710327,grad_norm: 0.9999991761183563, iteration: 373987
loss: 0.986864447593689,grad_norm: 0.8034954847515594, iteration: 373988
loss: 1.038564920425415,grad_norm: 0.9999993479601461, iteration: 373989
loss: 1.0477741956710815,grad_norm: 0.7609230502940633, iteration: 373990
loss: 1.0977354049682617,grad_norm: 0.9999993895959505, iteration: 373991
loss: 1.0226292610168457,grad_norm: 0.9814941966395915, iteration: 373992
loss: 1.0187464952468872,grad_norm: 0.9987275062704487, iteration: 373993
loss: 0.996246337890625,grad_norm: 0.8286458863215106, iteration: 373994
loss: 1.0477899312973022,grad_norm: 0.9999999427724464, iteration: 373995
loss: 1.0250639915466309,grad_norm: 0.9999990346703977, iteration: 373996
loss: 1.038340449333191,grad_norm: 0.8294495478721587, iteration: 373997
loss: 0.988707959651947,grad_norm: 0.8735206526373013, iteration: 373998
loss: 1.005163311958313,grad_norm: 0.7805497726669137, iteration: 373999
loss: 0.9751338958740234,grad_norm: 0.7268022904041171, iteration: 374000
loss: 1.0519354343414307,grad_norm: 0.8976147376402548, iteration: 374001
loss: 1.0174403190612793,grad_norm: 0.9350921236590587, iteration: 374002
loss: 1.0073761940002441,grad_norm: 0.8361629617199903, iteration: 374003
loss: 1.0099241733551025,grad_norm: 0.7963800700502991, iteration: 374004
loss: 0.9925999641418457,grad_norm: 0.7592359127573114, iteration: 374005
loss: 1.0620698928833008,grad_norm: 0.9999999257883929, iteration: 374006
loss: 0.9751800298690796,grad_norm: 0.999999284019976, iteration: 374007
loss: 1.046315312385559,grad_norm: 0.9999996122971053, iteration: 374008
loss: 0.999646008014679,grad_norm: 0.8388089833524528, iteration: 374009
loss: 1.0273876190185547,grad_norm: 0.7333567092891086, iteration: 374010
loss: 1.090815544128418,grad_norm: 0.7009188990951698, iteration: 374011
loss: 1.0032404661178589,grad_norm: 0.8669682549888785, iteration: 374012
loss: 1.0115467309951782,grad_norm: 0.841426925937746, iteration: 374013
loss: 1.0481244325637817,grad_norm: 0.7873054111050082, iteration: 374014
loss: 1.0324487686157227,grad_norm: 0.9999994636610088, iteration: 374015
loss: 1.0403831005096436,grad_norm: 0.7630748978804909, iteration: 374016
loss: 0.989414632320404,grad_norm: 0.9999991371350911, iteration: 374017
loss: 1.0170601606369019,grad_norm: 0.9180224185887665, iteration: 374018
loss: 0.9865164756774902,grad_norm: 1.0000000432593779, iteration: 374019
loss: 1.01055109500885,grad_norm: 0.8253892135519945, iteration: 374020
loss: 1.070985198020935,grad_norm: 0.9999990848002579, iteration: 374021
loss: 1.076403260231018,grad_norm: 0.999999631465813, iteration: 374022
loss: 1.0106698274612427,grad_norm: 0.8245382942757041, iteration: 374023
loss: 1.0148379802703857,grad_norm: 0.7042491446744807, iteration: 374024
loss: 1.2343569993972778,grad_norm: 0.9999998025981496, iteration: 374025
loss: 1.136958360671997,grad_norm: 0.9999998250313771, iteration: 374026
loss: 1.1184074878692627,grad_norm: 0.9999998841085532, iteration: 374027
loss: 1.0070738792419434,grad_norm: 0.8427702536410363, iteration: 374028
loss: 1.2710963487625122,grad_norm: 0.9999993806564976, iteration: 374029
loss: 1.0247238874435425,grad_norm: 0.8794884940536253, iteration: 374030
loss: 1.0218539237976074,grad_norm: 0.9514407907992909, iteration: 374031
loss: 1.0048441886901855,grad_norm: 0.7308984399689612, iteration: 374032
loss: 0.9786232709884644,grad_norm: 0.9061669274997586, iteration: 374033
loss: 0.9988179802894592,grad_norm: 0.9787116158033259, iteration: 374034
loss: 1.0472627878189087,grad_norm: 0.9999998034543222, iteration: 374035
loss: 0.9501093029975891,grad_norm: 0.8533367000793796, iteration: 374036
loss: 1.2180074453353882,grad_norm: 0.9999999488014204, iteration: 374037
loss: 0.9486366510391235,grad_norm: 0.7523321248024659, iteration: 374038
loss: 1.012221336364746,grad_norm: 0.8494669906649501, iteration: 374039
loss: 0.9987786412239075,grad_norm: 0.9296144185964472, iteration: 374040
loss: 0.9894901514053345,grad_norm: 0.9296034110563925, iteration: 374041
loss: 1.127540946006775,grad_norm: 0.9999991291595265, iteration: 374042
loss: 1.0100092887878418,grad_norm: 0.7771103550814039, iteration: 374043
loss: 1.0045779943466187,grad_norm: 0.81196134464534, iteration: 374044
loss: 1.009101390838623,grad_norm: 0.7229566688064645, iteration: 374045
loss: 1.0255120992660522,grad_norm: 0.9999993138025124, iteration: 374046
loss: 0.9655748009681702,grad_norm: 0.7825551093713505, iteration: 374047
loss: 1.1099680662155151,grad_norm: 0.9999992023456369, iteration: 374048
loss: 1.1547977924346924,grad_norm: 0.9528317755333473, iteration: 374049
loss: 1.157679557800293,grad_norm: 0.9999993748406006, iteration: 374050
loss: 1.0362261533737183,grad_norm: 0.824017193431072, iteration: 374051
loss: 1.1182509660720825,grad_norm: 0.9999997960497427, iteration: 374052
loss: 1.026511788368225,grad_norm: 0.9999993594106424, iteration: 374053
loss: 1.0142024755477905,grad_norm: 0.8300837958961965, iteration: 374054
loss: 0.9735813736915588,grad_norm: 0.8322936991399041, iteration: 374055
loss: 1.0093145370483398,grad_norm: 0.751670490451793, iteration: 374056
loss: 1.066228985786438,grad_norm: 0.8634623785925114, iteration: 374057
loss: 1.013367772102356,grad_norm: 0.6770230650148906, iteration: 374058
loss: 1.0581480264663696,grad_norm: 0.9648643018857339, iteration: 374059
loss: 1.0053527355194092,grad_norm: 0.7331754457545813, iteration: 374060
loss: 0.976876437664032,grad_norm: 0.7238207229250129, iteration: 374061
loss: 0.9748373031616211,grad_norm: 0.7833656251557285, iteration: 374062
loss: 1.0239646434783936,grad_norm: 0.7856749504026969, iteration: 374063
loss: 0.9990111589431763,grad_norm: 0.9250490126985244, iteration: 374064
loss: 0.9951797723770142,grad_norm: 0.9999994368553515, iteration: 374065
loss: 1.0176929235458374,grad_norm: 0.7352580951639526, iteration: 374066
loss: 1.0078911781311035,grad_norm: 0.7609479644182525, iteration: 374067
loss: 0.9969480633735657,grad_norm: 0.791250300692042, iteration: 374068
loss: 0.9968355298042297,grad_norm: 0.775093342782384, iteration: 374069
loss: 1.0091036558151245,grad_norm: 0.8666698332433774, iteration: 374070
loss: 0.9990431666374207,grad_norm: 0.9893733219034389, iteration: 374071
loss: 1.0139249563217163,grad_norm: 0.784495703683356, iteration: 374072
loss: 1.0023554563522339,grad_norm: 0.7930172083158389, iteration: 374073
loss: 1.2511370182037354,grad_norm: 0.8795154293236787, iteration: 374074
loss: 1.0308799743652344,grad_norm: 0.8701035065631425, iteration: 374075
loss: 1.0946193933486938,grad_norm: 0.874526746894079, iteration: 374076
loss: 0.983547031879425,grad_norm: 0.7767199006903229, iteration: 374077
loss: 1.009254813194275,grad_norm: 0.8027343376191155, iteration: 374078
loss: 0.9446019530296326,grad_norm: 0.7474901037151356, iteration: 374079
loss: 0.9832093119621277,grad_norm: 0.771348935217188, iteration: 374080
loss: 1.0410820245742798,grad_norm: 0.9999991294366353, iteration: 374081
loss: 1.0960216522216797,grad_norm: 0.9999997766990824, iteration: 374082
loss: 1.0063241720199585,grad_norm: 0.8030593913733864, iteration: 374083
loss: 1.0306525230407715,grad_norm: 0.761026180480634, iteration: 374084
loss: 1.025866150856018,grad_norm: 0.7233661840091931, iteration: 374085
loss: 1.0388046503067017,grad_norm: 0.737632095226999, iteration: 374086
loss: 0.9785900712013245,grad_norm: 0.826726874787723, iteration: 374087
loss: 1.018229365348816,grad_norm: 0.8078250701192317, iteration: 374088
loss: 0.9773747324943542,grad_norm: 0.9359942071177838, iteration: 374089
loss: 0.9751161336898804,grad_norm: 0.767881557702525, iteration: 374090
loss: 1.0187394618988037,grad_norm: 0.9560793571607481, iteration: 374091
loss: 1.023025631904602,grad_norm: 0.9374389835864166, iteration: 374092
loss: 1.0038615465164185,grad_norm: 0.9999992457709586, iteration: 374093
loss: 0.9961123466491699,grad_norm: 0.8995414364625149, iteration: 374094
loss: 1.001261591911316,grad_norm: 0.9908215471588602, iteration: 374095
loss: 1.0123127698898315,grad_norm: 0.7755559056557647, iteration: 374096
loss: 1.010859727859497,grad_norm: 0.7305875803674209, iteration: 374097
loss: 0.9583762288093567,grad_norm: 0.7571261726579395, iteration: 374098
loss: 0.989604115486145,grad_norm: 0.9553030112030781, iteration: 374099
loss: 0.9913637042045593,grad_norm: 0.7839814439451132, iteration: 374100
loss: 1.0664381980895996,grad_norm: 0.9999995051141299, iteration: 374101
loss: 1.0090080499649048,grad_norm: 0.7248931888615423, iteration: 374102
loss: 0.990539014339447,grad_norm: 0.820778556372535, iteration: 374103
loss: 1.0113147497177124,grad_norm: 0.7449439770085697, iteration: 374104
loss: 1.0653656721115112,grad_norm: 0.999999182248586, iteration: 374105
loss: 0.996527910232544,grad_norm: 0.8954371560194133, iteration: 374106
loss: 0.9765686988830566,grad_norm: 0.8200833922010572, iteration: 374107
loss: 1.024511456489563,grad_norm: 0.9999991132426266, iteration: 374108
loss: 0.9956942796707153,grad_norm: 0.6951100546132181, iteration: 374109
loss: 1.0223857164382935,grad_norm: 0.9121788182994522, iteration: 374110
loss: 0.9713812470436096,grad_norm: 0.8727984699314398, iteration: 374111
loss: 0.9873813390731812,grad_norm: 0.7900818256774432, iteration: 374112
loss: 1.0019837617874146,grad_norm: 0.7431632467638519, iteration: 374113
loss: 1.0108723640441895,grad_norm: 0.7171605117788277, iteration: 374114
loss: 0.9986496567726135,grad_norm: 0.8837544910746113, iteration: 374115
loss: 1.0073167085647583,grad_norm: 0.8793827422754436, iteration: 374116
loss: 0.9589018821716309,grad_norm: 0.7111190582004235, iteration: 374117
loss: 0.9748861789703369,grad_norm: 0.86825210243972, iteration: 374118
loss: 0.9318932294845581,grad_norm: 0.8338563858521698, iteration: 374119
loss: 1.0093461275100708,grad_norm: 0.808282217044753, iteration: 374120
loss: 0.9992139935493469,grad_norm: 0.8253371248600373, iteration: 374121
loss: 1.0417455434799194,grad_norm: 0.7693649531807686, iteration: 374122
loss: 1.0144431591033936,grad_norm: 0.7993524781549018, iteration: 374123
loss: 0.9549590945243835,grad_norm: 0.9999991051396706, iteration: 374124
loss: 0.9804956912994385,grad_norm: 0.6884894317311189, iteration: 374125
loss: 1.055862545967102,grad_norm: 0.8402881576202783, iteration: 374126
loss: 0.9876907467842102,grad_norm: 0.8288901792373349, iteration: 374127
loss: 1.0640217065811157,grad_norm: 0.8083341626404535, iteration: 374128
loss: 1.0187522172927856,grad_norm: 0.9999997340480671, iteration: 374129
loss: 1.0731987953186035,grad_norm: 0.7919689614519805, iteration: 374130
loss: 0.97769695520401,grad_norm: 0.8592200055859626, iteration: 374131
loss: 1.0908424854278564,grad_norm: 0.891652075109396, iteration: 374132
loss: 1.0191617012023926,grad_norm: 0.8844045069054276, iteration: 374133
loss: 1.0644886493682861,grad_norm: 0.9999990245064886, iteration: 374134
loss: 1.0603370666503906,grad_norm: 0.9999995664608604, iteration: 374135
loss: 1.120789647102356,grad_norm: 0.9999991283004425, iteration: 374136
loss: 1.0376032590866089,grad_norm: 0.8637780248466876, iteration: 374137
loss: 0.9601244926452637,grad_norm: 0.7374455846869392, iteration: 374138
loss: 1.0814167261123657,grad_norm: 0.9999992587182701, iteration: 374139
loss: 0.9708263874053955,grad_norm: 0.8774032684186969, iteration: 374140
loss: 1.0043121576309204,grad_norm: 0.9999998361496777, iteration: 374141
loss: 1.0790057182312012,grad_norm: 0.9999998321812624, iteration: 374142
loss: 1.144974708557129,grad_norm: 0.9999996707203688, iteration: 374143
loss: 1.0278611183166504,grad_norm: 0.999999746581772, iteration: 374144
loss: 1.0079063177108765,grad_norm: 0.705505011169309, iteration: 374145
loss: 0.9826890826225281,grad_norm: 0.905896601641317, iteration: 374146
loss: 0.9980089664459229,grad_norm: 0.7313788427218744, iteration: 374147
loss: 0.9986954927444458,grad_norm: 0.9903055268147133, iteration: 374148
loss: 1.0953865051269531,grad_norm: 0.9174422246307815, iteration: 374149
loss: 1.0324082374572754,grad_norm: 0.7154181362071943, iteration: 374150
loss: 0.9690269231796265,grad_norm: 0.9999992309165031, iteration: 374151
loss: 1.0149571895599365,grad_norm: 0.9999992642652362, iteration: 374152
loss: 1.0291683673858643,grad_norm: 0.999999860807665, iteration: 374153
loss: 1.015392541885376,grad_norm: 0.7423986378771166, iteration: 374154
loss: 0.9673155546188354,grad_norm: 0.8646908821744371, iteration: 374155
loss: 1.0450103282928467,grad_norm: 0.8798720615831506, iteration: 374156
loss: 1.032704472541809,grad_norm: 0.8252313436710247, iteration: 374157
loss: 1.0553451776504517,grad_norm: 0.7062608725371574, iteration: 374158
loss: 1.067541241645813,grad_norm: 0.9140628943249148, iteration: 374159
loss: 0.9738537669181824,grad_norm: 0.9888568298554002, iteration: 374160
loss: 1.0064986944198608,grad_norm: 0.9999992206392008, iteration: 374161
loss: 0.9941962957382202,grad_norm: 0.85119189029506, iteration: 374162
loss: 1.001412272453308,grad_norm: 0.9999998822030738, iteration: 374163
loss: 1.0615460872650146,grad_norm: 0.9360009492378968, iteration: 374164
loss: 1.0083945989608765,grad_norm: 0.7796196591998036, iteration: 374165
loss: 1.0124927759170532,grad_norm: 0.8150969419213767, iteration: 374166
loss: 1.0239465236663818,grad_norm: 0.8414236369412189, iteration: 374167
loss: 1.09743070602417,grad_norm: 0.9999994170191593, iteration: 374168
loss: 1.0214303731918335,grad_norm: 0.8381925067663276, iteration: 374169
loss: 1.0208745002746582,grad_norm: 0.8364030684978969, iteration: 374170
loss: 0.9575963020324707,grad_norm: 0.849743845059366, iteration: 374171
loss: 1.0362553596496582,grad_norm: 0.9334765588286602, iteration: 374172
loss: 1.0184886455535889,grad_norm: 0.9765980079931353, iteration: 374173
loss: 1.0093729496002197,grad_norm: 0.846472657615565, iteration: 374174
loss: 1.0147299766540527,grad_norm: 0.6823358315237482, iteration: 374175
loss: 0.9835211634635925,grad_norm: 0.6891609212325085, iteration: 374176
loss: 0.9636432528495789,grad_norm: 0.6837551538475782, iteration: 374177
loss: 0.9916596412658691,grad_norm: 0.7885802450875887, iteration: 374178
loss: 1.1228159666061401,grad_norm: 0.9999993992415616, iteration: 374179
loss: 1.0508170127868652,grad_norm: 0.9999999382942691, iteration: 374180
loss: 0.9903771281242371,grad_norm: 0.9512097698337582, iteration: 374181
loss: 0.9593164920806885,grad_norm: 0.7600568758933104, iteration: 374182
loss: 1.0364415645599365,grad_norm: 0.8163466307340984, iteration: 374183
loss: 1.0146193504333496,grad_norm: 0.8673758725816646, iteration: 374184
loss: 0.98758465051651,grad_norm: 0.9999991247643613, iteration: 374185
loss: 1.0715813636779785,grad_norm: 0.828967520637048, iteration: 374186
loss: 1.0479485988616943,grad_norm: 0.999999957691946, iteration: 374187
loss: 1.057152509689331,grad_norm: 0.9999997294018695, iteration: 374188
loss: 1.0247926712036133,grad_norm: 0.9999996159708845, iteration: 374189
loss: 0.9834452867507935,grad_norm: 0.8468823127165884, iteration: 374190
loss: 1.002745270729065,grad_norm: 0.9627077577462964, iteration: 374191
loss: 0.9843350052833557,grad_norm: 0.9999992021873106, iteration: 374192
loss: 1.0003936290740967,grad_norm: 0.9999990414916581, iteration: 374193
loss: 0.996607780456543,grad_norm: 0.8791991207276285, iteration: 374194
loss: 1.0027897357940674,grad_norm: 0.7361434810311172, iteration: 374195
loss: 1.0989843606948853,grad_norm: 0.9999992569257941, iteration: 374196
loss: 1.0388970375061035,grad_norm: 0.9999997288133737, iteration: 374197
loss: 0.9926835298538208,grad_norm: 0.9273275667110323, iteration: 374198
loss: 1.024588704109192,grad_norm: 0.8495528645415129, iteration: 374199
loss: 0.9862025380134583,grad_norm: 0.8459325206532898, iteration: 374200
loss: 1.0235792398452759,grad_norm: 0.7461676929718671, iteration: 374201
loss: 1.03402578830719,grad_norm: 0.8538272058868024, iteration: 374202
loss: 0.9901843667030334,grad_norm: 0.8281876983601885, iteration: 374203
loss: 1.0773773193359375,grad_norm: 0.999999062354266, iteration: 374204
loss: 1.021114468574524,grad_norm: 0.8148376826447874, iteration: 374205
loss: 0.9966545701026917,grad_norm: 0.8095880109502628, iteration: 374206
loss: 0.998073160648346,grad_norm: 0.7740294710879408, iteration: 374207
loss: 1.0756112337112427,grad_norm: 0.9999996652788798, iteration: 374208
loss: 1.0215529203414917,grad_norm: 0.9415792068785233, iteration: 374209
loss: 0.9839651584625244,grad_norm: 0.6371275752172544, iteration: 374210
loss: 0.9890800714492798,grad_norm: 0.9987931562754747, iteration: 374211
loss: 1.0022873878479004,grad_norm: 0.7673833720128179, iteration: 374212
loss: 1.0232185125350952,grad_norm: 0.7646884800517061, iteration: 374213
loss: 0.9895852208137512,grad_norm: 0.8236152823297068, iteration: 374214
loss: 0.9823795557022095,grad_norm: 0.9872698490894468, iteration: 374215
loss: 0.9910992383956909,grad_norm: 0.7934751694880872, iteration: 374216
loss: 1.017310619354248,grad_norm: 0.9431650841334042, iteration: 374217
loss: 0.9970858693122864,grad_norm: 0.827649041495123, iteration: 374218
loss: 1.0039129257202148,grad_norm: 0.9999991083952032, iteration: 374219
loss: 1.0007543563842773,grad_norm: 0.7910571516614352, iteration: 374220
loss: 1.015430212020874,grad_norm: 0.9999991968613947, iteration: 374221
loss: 1.0306578874588013,grad_norm: 0.8067047038625167, iteration: 374222
loss: 1.0120607614517212,grad_norm: 0.8421767542499148, iteration: 374223
loss: 1.0277819633483887,grad_norm: 0.7426350689152849, iteration: 374224
loss: 1.0003716945648193,grad_norm: 0.7739699851587591, iteration: 374225
loss: 1.0223159790039062,grad_norm: 0.9999990620665349, iteration: 374226
loss: 0.9673101902008057,grad_norm: 0.8088216762697614, iteration: 374227
loss: 1.0030310153961182,grad_norm: 0.9999997891843024, iteration: 374228
loss: 1.0019289255142212,grad_norm: 0.868145218323129, iteration: 374229
loss: 0.9891594052314758,grad_norm: 0.6829196678583506, iteration: 374230
loss: 1.0081831216812134,grad_norm: 0.8946973087980379, iteration: 374231
loss: 1.0201655626296997,grad_norm: 0.7790743025039716, iteration: 374232
loss: 1.0040576457977295,grad_norm: 0.8119789779368518, iteration: 374233
loss: 0.9764789938926697,grad_norm: 0.8517101620429548, iteration: 374234
loss: 1.02606201171875,grad_norm: 0.9999989783707264, iteration: 374235
loss: 1.0020039081573486,grad_norm: 0.7436939235583588, iteration: 374236
loss: 0.994286060333252,grad_norm: 0.8116708410928003, iteration: 374237
loss: 0.9984980821609497,grad_norm: 0.8652281990766579, iteration: 374238
loss: 0.9987956881523132,grad_norm: 0.7335613130169707, iteration: 374239
loss: 0.9691332578659058,grad_norm: 0.8943752503536357, iteration: 374240
loss: 1.0755605697631836,grad_norm: 0.7624012820770016, iteration: 374241
loss: 0.9984519481658936,grad_norm: 0.7881332678075753, iteration: 374242
loss: 1.0398327112197876,grad_norm: 0.9999994178726139, iteration: 374243
loss: 1.0242300033569336,grad_norm: 0.9999998437264046, iteration: 374244
loss: 1.0236999988555908,grad_norm: 0.9999990930355862, iteration: 374245
loss: 0.9651452302932739,grad_norm: 0.9463481344929051, iteration: 374246
loss: 0.9962365031242371,grad_norm: 0.8512161871887459, iteration: 374247
loss: 0.9892723560333252,grad_norm: 0.7181605083545699, iteration: 374248
loss: 0.9946854114532471,grad_norm: 0.7748113450936659, iteration: 374249
loss: 1.0072999000549316,grad_norm: 0.8363615725179616, iteration: 374250
loss: 1.0026636123657227,grad_norm: 0.9247096060806047, iteration: 374251
loss: 0.9803242087364197,grad_norm: 0.8096315780294211, iteration: 374252
loss: 1.008546233177185,grad_norm: 0.8017722552574185, iteration: 374253
loss: 0.983808159828186,grad_norm: 0.8369857007269903, iteration: 374254
loss: 0.980549693107605,grad_norm: 0.7745701323203527, iteration: 374255
loss: 0.9631012082099915,grad_norm: 0.7753581099725939, iteration: 374256
loss: 0.9689667820930481,grad_norm: 0.7985462484269436, iteration: 374257
loss: 0.9720274209976196,grad_norm: 0.8940261697100046, iteration: 374258
loss: 1.0655523538589478,grad_norm: 0.7061189712637014, iteration: 374259
loss: 1.0163830518722534,grad_norm: 0.999999184077243, iteration: 374260
loss: 0.9972587823867798,grad_norm: 0.9999996890124375, iteration: 374261
loss: 0.9947202801704407,grad_norm: 0.7200530457610753, iteration: 374262
loss: 1.1191304922103882,grad_norm: 0.9618955728726485, iteration: 374263
loss: 0.9751525521278381,grad_norm: 0.7526279607377762, iteration: 374264
loss: 0.9756261110305786,grad_norm: 0.7238006356824981, iteration: 374265
loss: 1.040661096572876,grad_norm: 0.8899144305198563, iteration: 374266
loss: 1.0226424932479858,grad_norm: 0.9999992468902746, iteration: 374267
loss: 1.031612753868103,grad_norm: 0.7814523352788408, iteration: 374268
loss: 1.0281976461410522,grad_norm: 0.8290210172308322, iteration: 374269
loss: 1.0047138929367065,grad_norm: 0.8286111570848315, iteration: 374270
loss: 1.0078130960464478,grad_norm: 0.8264361817151322, iteration: 374271
loss: 1.0141915082931519,grad_norm: 0.8072967202793527, iteration: 374272
loss: 0.9990715384483337,grad_norm: 0.6788663947043168, iteration: 374273
loss: 1.0418120622634888,grad_norm: 0.7062527962595262, iteration: 374274
loss: 0.9960353970527649,grad_norm: 0.8449615508434918, iteration: 374275
loss: 1.006415843963623,grad_norm: 0.8818481185759984, iteration: 374276
loss: 0.9972398281097412,grad_norm: 0.9999992561182391, iteration: 374277
loss: 1.0322915315628052,grad_norm: 0.9035274788784405, iteration: 374278
loss: 0.9843705296516418,grad_norm: 0.8717084899616957, iteration: 374279
loss: 0.9822065830230713,grad_norm: 0.7709357573014862, iteration: 374280
loss: 1.0224744081497192,grad_norm: 0.635716594310431, iteration: 374281
loss: 0.9692544937133789,grad_norm: 0.7625014946682785, iteration: 374282
loss: 0.9870657324790955,grad_norm: 0.8967937923408105, iteration: 374283
loss: 0.9739258289337158,grad_norm: 0.768235629303704, iteration: 374284
loss: 0.9816705584526062,grad_norm: 0.8919825893646827, iteration: 374285
loss: 0.9877791404724121,grad_norm: 0.8402845604542593, iteration: 374286
loss: 1.0088896751403809,grad_norm: 0.8621987067275353, iteration: 374287
loss: 1.1083807945251465,grad_norm: 0.9999990932477988, iteration: 374288
loss: 1.023171305656433,grad_norm: 0.9999992239466915, iteration: 374289
loss: 1.0221953392028809,grad_norm: 0.883638736278385, iteration: 374290
loss: 0.9905973076820374,grad_norm: 0.8059233783844996, iteration: 374291
loss: 1.0009795427322388,grad_norm: 0.8269457117072466, iteration: 374292
loss: 1.0063121318817139,grad_norm: 0.9999990790225991, iteration: 374293
loss: 1.015877604484558,grad_norm: 0.8601274501787317, iteration: 374294
loss: 0.9769296646118164,grad_norm: 0.8365906200116625, iteration: 374295
loss: 0.9858288764953613,grad_norm: 0.6417729778228315, iteration: 374296
loss: 1.0077450275421143,grad_norm: 0.6335666835767689, iteration: 374297
loss: 0.9930639863014221,grad_norm: 0.9304848254628927, iteration: 374298
loss: 1.0311170816421509,grad_norm: 0.8081603582664025, iteration: 374299
loss: 1.0110524892807007,grad_norm: 0.9999990679451555, iteration: 374300
loss: 0.9868078827857971,grad_norm: 0.6188718356438808, iteration: 374301
loss: 0.9795289039611816,grad_norm: 0.8265010267796253, iteration: 374302
loss: 0.9854485392570496,grad_norm: 0.8911404913084735, iteration: 374303
loss: 1.0433183908462524,grad_norm: 1.0000000870789587, iteration: 374304
loss: 0.9971311092376709,grad_norm: 0.7027650896187352, iteration: 374305
loss: 0.959324300289154,grad_norm: 0.8359378964721954, iteration: 374306
loss: 1.0141326189041138,grad_norm: 0.8193213424465694, iteration: 374307
loss: 0.9809610247612,grad_norm: 0.785584895283825, iteration: 374308
loss: 1.0125080347061157,grad_norm: 0.7652089271379703, iteration: 374309
loss: 1.0025347471237183,grad_norm: 0.7632607866430272, iteration: 374310
loss: 1.015138030052185,grad_norm: 0.9294558166356173, iteration: 374311
loss: 0.9692584276199341,grad_norm: 0.8421259668342925, iteration: 374312
loss: 1.0222575664520264,grad_norm: 0.8400496919238042, iteration: 374313
loss: 0.9967427849769592,grad_norm: 0.8218049517740003, iteration: 374314
loss: 0.9659828543663025,grad_norm: 0.7229829213700713, iteration: 374315
loss: 1.0528289079666138,grad_norm: 0.8104495719773808, iteration: 374316
loss: 1.0417064428329468,grad_norm: 0.8046521376323873, iteration: 374317
loss: 1.1671992540359497,grad_norm: 0.9999992701561125, iteration: 374318
loss: 1.029097318649292,grad_norm: 0.8724169176324179, iteration: 374319
loss: 1.0444763898849487,grad_norm: 0.9379759910127136, iteration: 374320
loss: 0.9952483177185059,grad_norm: 0.7852861889138215, iteration: 374321
loss: 1.028833270072937,grad_norm: 0.7777806773827662, iteration: 374322
loss: 1.0264192819595337,grad_norm: 0.8864707713329003, iteration: 374323
loss: 0.9802209734916687,grad_norm: 0.8328266317196105, iteration: 374324
loss: 0.9845125079154968,grad_norm: 0.7510575476666839, iteration: 374325
loss: 0.9894593954086304,grad_norm: 0.8724176079579707, iteration: 374326
loss: 0.9723460078239441,grad_norm: 0.9999990696768185, iteration: 374327
loss: 0.9968662858009338,grad_norm: 0.7147413446683428, iteration: 374328
loss: 1.0282710790634155,grad_norm: 0.999999341878315, iteration: 374329
loss: 0.9762105941772461,grad_norm: 0.8039250277019908, iteration: 374330
loss: 0.9868165850639343,grad_norm: 0.7123788595144298, iteration: 374331
loss: 0.9832602143287659,grad_norm: 0.7394958481817959, iteration: 374332
loss: 1.0091667175292969,grad_norm: 0.7104143166715684, iteration: 374333
loss: 0.9923827648162842,grad_norm: 0.7530139067737142, iteration: 374334
loss: 0.9736337065696716,grad_norm: 0.7628641312825424, iteration: 374335
loss: 1.0155236721038818,grad_norm: 0.8184019054101053, iteration: 374336
loss: 1.037330985069275,grad_norm: 0.8565342533942565, iteration: 374337
loss: 1.014768123626709,grad_norm: 0.8902427297330484, iteration: 374338
loss: 0.979309618473053,grad_norm: 0.7744688455441948, iteration: 374339
loss: 0.978173553943634,grad_norm: 0.7384425711097767, iteration: 374340
loss: 1.0312713384628296,grad_norm: 0.8461928139777806, iteration: 374341
loss: 1.0228911638259888,grad_norm: 0.7973553112170029, iteration: 374342
loss: 1.0681698322296143,grad_norm: 0.8504797763746687, iteration: 374343
loss: 0.9565072655677795,grad_norm: 0.8628040558331643, iteration: 374344
loss: 1.036864161491394,grad_norm: 0.8486004100509781, iteration: 374345
loss: 1.001535415649414,grad_norm: 0.9999994597398905, iteration: 374346
loss: 0.9624438285827637,grad_norm: 0.8320251088143251, iteration: 374347
loss: 1.015624761581421,grad_norm: 0.8872210876685118, iteration: 374348
loss: 0.9407421350479126,grad_norm: 0.8137659814274287, iteration: 374349
loss: 0.9935160875320435,grad_norm: 0.9999991140403277, iteration: 374350
loss: 1.0851174592971802,grad_norm: 0.9866071379366564, iteration: 374351
loss: 0.9785178899765015,grad_norm: 0.9999991122012838, iteration: 374352
loss: 1.003349781036377,grad_norm: 0.7633922983215702, iteration: 374353
loss: 1.0150172710418701,grad_norm: 0.7989152804662346, iteration: 374354
loss: 0.9845630526542664,grad_norm: 0.7925903450991972, iteration: 374355
loss: 1.0279892683029175,grad_norm: 0.7420033851192532, iteration: 374356
loss: 0.9912658929824829,grad_norm: 0.7561590267655632, iteration: 374357
loss: 0.9697222709655762,grad_norm: 0.768160405088371, iteration: 374358
loss: 0.9969483613967896,grad_norm: 0.9704369398174341, iteration: 374359
loss: 0.988837480545044,grad_norm: 0.8236099706314451, iteration: 374360
loss: 0.993745744228363,grad_norm: 0.911433207607082, iteration: 374361
loss: 0.986629843711853,grad_norm: 0.9999998601678773, iteration: 374362
loss: 1.0389430522918701,grad_norm: 0.8988341156798406, iteration: 374363
loss: 1.0009236335754395,grad_norm: 0.9999998635703311, iteration: 374364
loss: 1.0112955570220947,grad_norm: 0.9161847346663776, iteration: 374365
loss: 0.9578322172164917,grad_norm: 0.7684811646711095, iteration: 374366
loss: 1.0003154277801514,grad_norm: 0.7652280686649205, iteration: 374367
loss: 0.9885203838348389,grad_norm: 0.7773251199477588, iteration: 374368
loss: 1.0143083333969116,grad_norm: 0.7339690146141786, iteration: 374369
loss: 0.995424747467041,grad_norm: 0.7679870137312895, iteration: 374370
loss: 1.003995656967163,grad_norm: 0.8038488973292419, iteration: 374371
loss: 0.9794376492500305,grad_norm: 0.9700682068128643, iteration: 374372
loss: 1.0327353477478027,grad_norm: 0.76464571207611, iteration: 374373
loss: 1.0284980535507202,grad_norm: 0.931168221002066, iteration: 374374
loss: 1.01897132396698,grad_norm: 0.917977468835273, iteration: 374375
loss: 1.0172860622406006,grad_norm: 0.9999991881288648, iteration: 374376
loss: 0.98370361328125,grad_norm: 0.8679826380089704, iteration: 374377
loss: 1.0250009298324585,grad_norm: 0.8337388844212694, iteration: 374378
loss: 0.9800235033035278,grad_norm: 0.7905819402358439, iteration: 374379
loss: 1.0640476942062378,grad_norm: 0.8940411961873425, iteration: 374380
loss: 1.0470625162124634,grad_norm: 0.7130308074937007, iteration: 374381
loss: 0.9757495522499084,grad_norm: 0.6777237709793958, iteration: 374382
loss: 1.0038446187973022,grad_norm: 0.8662525776725591, iteration: 374383
loss: 1.0494946241378784,grad_norm: 0.9470285458740696, iteration: 374384
loss: 1.000344157218933,grad_norm: 0.7096792866203782, iteration: 374385
loss: 0.9851484298706055,grad_norm: 0.6827625955675425, iteration: 374386
loss: 1.0074931383132935,grad_norm: 0.9677624912543423, iteration: 374387
loss: 1.1748743057250977,grad_norm: 0.9026909743661469, iteration: 374388
loss: 0.9762406945228577,grad_norm: 0.7521138550848895, iteration: 374389
loss: 1.0015695095062256,grad_norm: 0.862248574632317, iteration: 374390
loss: 1.029597282409668,grad_norm: 0.9999999740699107, iteration: 374391
loss: 1.0116848945617676,grad_norm: 0.9999998625568195, iteration: 374392
loss: 1.0756587982177734,grad_norm: 0.999999137419832, iteration: 374393
loss: 1.0177072286605835,grad_norm: 0.8712057477596574, iteration: 374394
loss: 0.9977267384529114,grad_norm: 0.8107333487479529, iteration: 374395
loss: 1.0348832607269287,grad_norm: 0.8916291207637549, iteration: 374396
loss: 0.9868439435958862,grad_norm: 0.7631848840880086, iteration: 374397
loss: 1.0618579387664795,grad_norm: 0.9999992670022105, iteration: 374398
loss: 0.9815440773963928,grad_norm: 0.6717572845603688, iteration: 374399
loss: 1.0417976379394531,grad_norm: 0.9453877358784141, iteration: 374400
loss: 1.0520834922790527,grad_norm: 0.8782220617546571, iteration: 374401
loss: 1.035258173942566,grad_norm: 0.9999997956108408, iteration: 374402
loss: 1.0074596405029297,grad_norm: 0.8615891820621059, iteration: 374403
loss: 1.1002237796783447,grad_norm: 0.9999999033057426, iteration: 374404
loss: 1.0177985429763794,grad_norm: 0.9099489111791313, iteration: 374405
loss: 1.0016313791275024,grad_norm: 0.9257243586644371, iteration: 374406
loss: 0.9810548424720764,grad_norm: 0.8593086229172774, iteration: 374407
loss: 1.021338701248169,grad_norm: 0.8239869226569184, iteration: 374408
loss: 1.0140544176101685,grad_norm: 0.7132787847426542, iteration: 374409
loss: 1.0074865818023682,grad_norm: 0.8104684077612156, iteration: 374410
loss: 0.9794321060180664,grad_norm: 0.6531430735265318, iteration: 374411
loss: 1.0097206830978394,grad_norm: 0.7916385106910974, iteration: 374412
loss: 1.0068023204803467,grad_norm: 0.9999990797303359, iteration: 374413
loss: 0.9743582010269165,grad_norm: 0.8181225271388813, iteration: 374414
loss: 1.0171501636505127,grad_norm: 0.9999994587810604, iteration: 374415
loss: 1.063457727432251,grad_norm: 0.9263559409053235, iteration: 374416
loss: 1.0490368604660034,grad_norm: 0.9999996718347642, iteration: 374417
loss: 1.001776099205017,grad_norm: 0.7922872333759885, iteration: 374418
loss: 0.9845147728919983,grad_norm: 0.7217881496249218, iteration: 374419
loss: 1.0566136837005615,grad_norm: 0.7379617186750298, iteration: 374420
loss: 1.0205135345458984,grad_norm: 0.7857094563994842, iteration: 374421
loss: 0.9865562319755554,grad_norm: 0.7537343595411985, iteration: 374422
loss: 1.0799944400787354,grad_norm: 0.9999991673076614, iteration: 374423
loss: 1.0065938234329224,grad_norm: 0.9359347840562465, iteration: 374424
loss: 1.045022964477539,grad_norm: 0.9182594444579973, iteration: 374425
loss: 1.1543062925338745,grad_norm: 0.9999992644566708, iteration: 374426
loss: 1.0219037532806396,grad_norm: 0.7892681398198017, iteration: 374427
loss: 1.0256788730621338,grad_norm: 0.7847790774699944, iteration: 374428
loss: 1.0084909200668335,grad_norm: 0.7720996491997665, iteration: 374429
loss: 1.073177456855774,grad_norm: 0.999999818249663, iteration: 374430
loss: 1.03654146194458,grad_norm: 0.999999462468643, iteration: 374431
loss: 1.027376651763916,grad_norm: 0.9357792519944113, iteration: 374432
loss: 0.9640653729438782,grad_norm: 0.7819700132694765, iteration: 374433
loss: 1.0621088743209839,grad_norm: 0.7415438927894273, iteration: 374434
loss: 0.975491464138031,grad_norm: 0.8566511103612716, iteration: 374435
loss: 1.0607075691223145,grad_norm: 0.9999991304064558, iteration: 374436
loss: 1.2032406330108643,grad_norm: 0.9999994514690829, iteration: 374437
loss: 1.0892667770385742,grad_norm: 0.9999999050332621, iteration: 374438
loss: 1.0662860870361328,grad_norm: 0.6962624636171261, iteration: 374439
loss: 1.0972217321395874,grad_norm: 0.9999998331826693, iteration: 374440
loss: 1.0470014810562134,grad_norm: 0.9999999544245363, iteration: 374441
loss: 1.019158959388733,grad_norm: 0.7552046341198909, iteration: 374442
loss: 0.988848865032196,grad_norm: 0.7846797904114003, iteration: 374443
loss: 1.0279687643051147,grad_norm: 0.8232584129388427, iteration: 374444
loss: 0.9540535807609558,grad_norm: 0.6866150315836622, iteration: 374445
loss: 1.1681262254714966,grad_norm: 0.999999992470241, iteration: 374446
loss: 1.0696827173233032,grad_norm: 0.8737344419288408, iteration: 374447
loss: 1.0624421834945679,grad_norm: 0.9841258496175448, iteration: 374448
loss: 1.0309120416641235,grad_norm: 0.9714759092072244, iteration: 374449
loss: 0.9745824933052063,grad_norm: 0.7580771678518606, iteration: 374450
loss: 1.0247762203216553,grad_norm: 0.9713087083830887, iteration: 374451
loss: 1.0109326839447021,grad_norm: 0.6947332038757423, iteration: 374452
loss: 1.0608363151550293,grad_norm: 0.9999991147517737, iteration: 374453
loss: 1.0211180448532104,grad_norm: 0.8399968111421329, iteration: 374454
loss: 1.0105037689208984,grad_norm: 0.9999998665230184, iteration: 374455
loss: 1.1214934587478638,grad_norm: 0.9555456439799235, iteration: 374456
loss: 1.0037333965301514,grad_norm: 0.9999990294616106, iteration: 374457
loss: 1.0029689073562622,grad_norm: 0.6875388534207066, iteration: 374458
loss: 1.0131051540374756,grad_norm: 0.9720982761050851, iteration: 374459
loss: 0.9969774484634399,grad_norm: 0.7615737431310614, iteration: 374460
loss: 1.0010133981704712,grad_norm: 0.9655214279029181, iteration: 374461
loss: 1.2568551301956177,grad_norm: 0.9999997441456034, iteration: 374462
loss: 0.9753120541572571,grad_norm: 0.7198034775918943, iteration: 374463
loss: 1.0039901733398438,grad_norm: 0.7867062740663846, iteration: 374464
loss: 0.9794764518737793,grad_norm: 0.7621686225697357, iteration: 374465
loss: 0.9792547821998596,grad_norm: 0.7712114041717554, iteration: 374466
loss: 0.9922053813934326,grad_norm: 0.6654628625146396, iteration: 374467
loss: 1.047230839729309,grad_norm: 0.9165735158040228, iteration: 374468
loss: 0.9547042846679688,grad_norm: 0.808083182777968, iteration: 374469
loss: 1.023212194442749,grad_norm: 0.7170790100211544, iteration: 374470
loss: 1.0072263479232788,grad_norm: 0.7744417322262579, iteration: 374471
loss: 1.0508431196212769,grad_norm: 0.8333460237871076, iteration: 374472
loss: 1.0110565423965454,grad_norm: 0.7803793128861236, iteration: 374473
loss: 1.0562304258346558,grad_norm: 0.9999995195168334, iteration: 374474
loss: 1.0214853286743164,grad_norm: 0.8532199161535259, iteration: 374475
loss: 1.0266728401184082,grad_norm: 0.6816416485995772, iteration: 374476
loss: 0.9815120697021484,grad_norm: 0.7694986810273754, iteration: 374477
loss: 1.01154625415802,grad_norm: 0.8463685950049196, iteration: 374478
loss: 0.9780979752540588,grad_norm: 0.9372331473368091, iteration: 374479
loss: 1.0575834512710571,grad_norm: 0.8102896401515843, iteration: 374480
loss: 1.1005945205688477,grad_norm: 0.7681669426219458, iteration: 374481
loss: 1.007802128791809,grad_norm: 0.8431436413858171, iteration: 374482
loss: 1.0405765771865845,grad_norm: 0.8736989934944, iteration: 374483
loss: 1.011447787284851,grad_norm: 0.8134815630132782, iteration: 374484
loss: 1.0059990882873535,grad_norm: 0.7832924070552862, iteration: 374485
loss: 1.0078831911087036,grad_norm: 0.8437883830297592, iteration: 374486
loss: 1.0413743257522583,grad_norm: 0.9999999662035215, iteration: 374487
loss: 1.014422059059143,grad_norm: 0.87577704201165, iteration: 374488
loss: 1.0429831743240356,grad_norm: 0.914174836327494, iteration: 374489
loss: 1.0099859237670898,grad_norm: 0.9999999108249648, iteration: 374490
loss: 0.9773567914962769,grad_norm: 0.9999993598354884, iteration: 374491
loss: 0.9776212573051453,grad_norm: 0.7985963358643534, iteration: 374492
loss: 1.0329456329345703,grad_norm: 0.9999992810351827, iteration: 374493
loss: 0.9730145335197449,grad_norm: 0.840777259720103, iteration: 374494
loss: 0.9960221648216248,grad_norm: 0.7600182587944887, iteration: 374495
loss: 1.0246655941009521,grad_norm: 0.9002964238615675, iteration: 374496
loss: 1.020290493965149,grad_norm: 0.9999999607060059, iteration: 374497
loss: 0.9874075055122375,grad_norm: 0.8071498916968786, iteration: 374498
loss: 1.0095009803771973,grad_norm: 0.9999991421459418, iteration: 374499
loss: 0.9921769499778748,grad_norm: 0.7585163207924629, iteration: 374500
loss: 0.9906259775161743,grad_norm: 0.684804374396673, iteration: 374501
loss: 1.0829426050186157,grad_norm: 0.9999998275828872, iteration: 374502
loss: 0.990288257598877,grad_norm: 0.8733794547839487, iteration: 374503
loss: 0.9863226413726807,grad_norm: 0.8691134299387893, iteration: 374504
loss: 1.0173437595367432,grad_norm: 0.7784954214792938, iteration: 374505
loss: 0.9932212233543396,grad_norm: 0.7852908300606071, iteration: 374506
loss: 0.962020993232727,grad_norm: 0.7398251503689108, iteration: 374507
loss: 0.9846692681312561,grad_norm: 0.7656944312486529, iteration: 374508
loss: 1.0229734182357788,grad_norm: 0.6402765955108689, iteration: 374509
loss: 1.0265860557556152,grad_norm: 0.6815995450320597, iteration: 374510
loss: 0.9992970824241638,grad_norm: 0.7511563555811035, iteration: 374511
loss: 1.032941222190857,grad_norm: 0.9790872250730261, iteration: 374512
loss: 0.9859552383422852,grad_norm: 0.999999638326007, iteration: 374513
loss: 1.0269709825515747,grad_norm: 0.9999998860918607, iteration: 374514
loss: 0.9676181077957153,grad_norm: 0.7957888598983136, iteration: 374515
loss: 1.0260281562805176,grad_norm: 0.6490216895405364, iteration: 374516
loss: 0.9738196134567261,grad_norm: 0.8966834655029403, iteration: 374517
loss: 1.0025728940963745,grad_norm: 0.8158701443078639, iteration: 374518
loss: 0.9948012232780457,grad_norm: 0.7518964637866791, iteration: 374519
loss: 1.0052411556243896,grad_norm: 0.8619418976292129, iteration: 374520
loss: 1.007474660873413,grad_norm: 0.837077547416342, iteration: 374521
loss: 0.9844300150871277,grad_norm: 0.8189617622852761, iteration: 374522
loss: 0.9881970882415771,grad_norm: 0.7471643367666875, iteration: 374523
loss: 1.072880744934082,grad_norm: 0.9999997152850901, iteration: 374524
loss: 0.9866903424263,grad_norm: 0.7185777943469752, iteration: 374525
loss: 0.986411452293396,grad_norm: 0.7314183281360644, iteration: 374526
loss: 1.0074464082717896,grad_norm: 0.781177658693887, iteration: 374527
loss: 0.9809483289718628,grad_norm: 0.9556318565118573, iteration: 374528
loss: 1.0414748191833496,grad_norm: 0.8308708769741874, iteration: 374529
loss: 0.9695796370506287,grad_norm: 0.7660895985983813, iteration: 374530
loss: 0.9767245054244995,grad_norm: 0.6877438182065633, iteration: 374531
loss: 1.001752495765686,grad_norm: 0.9999998816478748, iteration: 374532
loss: 1.034555435180664,grad_norm: 0.9999990132041201, iteration: 374533
loss: 1.0065250396728516,grad_norm: 0.7624292895319312, iteration: 374534
loss: 0.9829146862030029,grad_norm: 0.831708178553346, iteration: 374535
loss: 0.969208836555481,grad_norm: 0.7108030354753102, iteration: 374536
loss: 1.0229626893997192,grad_norm: 0.8117843580063255, iteration: 374537
loss: 0.9622122049331665,grad_norm: 0.7634054547925456, iteration: 374538
loss: 0.9633585810661316,grad_norm: 0.7742600030100504, iteration: 374539
loss: 1.0199437141418457,grad_norm: 0.9900169182453304, iteration: 374540
loss: 0.9700454473495483,grad_norm: 0.7051851730899776, iteration: 374541
loss: 0.9893179535865784,grad_norm: 0.8437837183285035, iteration: 374542
loss: 1.0102837085723877,grad_norm: 0.8732998065699569, iteration: 374543
loss: 1.0436606407165527,grad_norm: 0.8193828767619125, iteration: 374544
loss: 1.0540562868118286,grad_norm: 0.6497293362493921, iteration: 374545
loss: 1.101914882659912,grad_norm: 0.9542971913326969, iteration: 374546
loss: 1.0000337362289429,grad_norm: 0.7023006364283194, iteration: 374547
loss: 0.9989579916000366,grad_norm: 0.7307975138029383, iteration: 374548
loss: 0.9800049066543579,grad_norm: 0.9999994900031038, iteration: 374549
loss: 0.9959821105003357,grad_norm: 0.7722938439467726, iteration: 374550
loss: 1.0052564144134521,grad_norm: 0.8517656091260384, iteration: 374551
loss: 0.9787529110908508,grad_norm: 0.7404065012060884, iteration: 374552
loss: 0.9991747736930847,grad_norm: 0.7130867033360558, iteration: 374553
loss: 1.0152779817581177,grad_norm: 0.6720162602505217, iteration: 374554
loss: 0.9634682536125183,grad_norm: 0.8242441571242114, iteration: 374555
loss: 0.9969004988670349,grad_norm: 0.9999995008500028, iteration: 374556
loss: 0.9975982904434204,grad_norm: 0.8087616469109271, iteration: 374557
loss: 1.098578929901123,grad_norm: 0.7772806182963597, iteration: 374558
loss: 1.0210450887680054,grad_norm: 0.8114454176207576, iteration: 374559
loss: 0.994769811630249,grad_norm: 0.7580920905693299, iteration: 374560
loss: 1.0394032001495361,grad_norm: 1.0000000151301154, iteration: 374561
loss: 1.001176118850708,grad_norm: 0.7865152523761416, iteration: 374562
loss: 1.0298799276351929,grad_norm: 0.9999999204602934, iteration: 374563
loss: 1.014560580253601,grad_norm: 0.8108846781413375, iteration: 374564
loss: 1.0032397508621216,grad_norm: 0.9159818725006327, iteration: 374565
loss: 0.9828683137893677,grad_norm: 0.7956435866349185, iteration: 374566
loss: 0.9928922653198242,grad_norm: 0.7879296802839085, iteration: 374567
loss: 0.9676015973091125,grad_norm: 0.8035164071848205, iteration: 374568
loss: 0.9947511553764343,grad_norm: 0.7724389350632346, iteration: 374569
loss: 0.9930868148803711,grad_norm: 0.9887107788163028, iteration: 374570
loss: 1.0097812414169312,grad_norm: 0.856962360024718, iteration: 374571
loss: 0.9874264001846313,grad_norm: 0.9999999791053558, iteration: 374572
loss: 0.9934636354446411,grad_norm: 0.8846278196559492, iteration: 374573
loss: 0.983432412147522,grad_norm: 0.8413030331829033, iteration: 374574
loss: 1.046832799911499,grad_norm: 0.8260689622449277, iteration: 374575
loss: 1.0073654651641846,grad_norm: 0.8032320608911263, iteration: 374576
loss: 1.0012884140014648,grad_norm: 0.6468586611293263, iteration: 374577
loss: 1.008306622505188,grad_norm: 0.6932371081566235, iteration: 374578
loss: 1.017742395401001,grad_norm: 0.9999997823033576, iteration: 374579
loss: 0.9966631531715393,grad_norm: 0.7513873792108624, iteration: 374580
loss: 1.0002225637435913,grad_norm: 0.9639156653483211, iteration: 374581
loss: 0.9904303550720215,grad_norm: 0.7303189279906495, iteration: 374582
loss: 0.9977017045021057,grad_norm: 0.7422418906545365, iteration: 374583
loss: 1.0325276851654053,grad_norm: 0.7480409197161058, iteration: 374584
loss: 1.003364086151123,grad_norm: 0.7029939466786086, iteration: 374585
loss: 0.9928193092346191,grad_norm: 0.850263344131773, iteration: 374586
loss: 1.0165245532989502,grad_norm: 0.812217748145689, iteration: 374587
loss: 0.9855188727378845,grad_norm: 0.72944189954192, iteration: 374588
loss: 1.0754281282424927,grad_norm: 0.999999761533517, iteration: 374589
loss: 1.0001826286315918,grad_norm: 0.9999998526643901, iteration: 374590
loss: 1.0094696283340454,grad_norm: 0.7627905587267299, iteration: 374591
loss: 1.1586145162582397,grad_norm: 0.9999998504353796, iteration: 374592
loss: 0.9893401861190796,grad_norm: 0.8650202467145512, iteration: 374593
loss: 1.0073376893997192,grad_norm: 0.7868465749969464, iteration: 374594
loss: 1.006874442100525,grad_norm: 0.8201607586112538, iteration: 374595
loss: 1.0401015281677246,grad_norm: 0.7780624362782106, iteration: 374596
loss: 0.9789848327636719,grad_norm: 0.8990742101148085, iteration: 374597
loss: 1.0042929649353027,grad_norm: 0.9999994412649358, iteration: 374598
loss: 1.028403639793396,grad_norm: 0.999999728440683, iteration: 374599
loss: 0.9817640781402588,grad_norm: 0.7748550087200704, iteration: 374600
loss: 0.9831858277320862,grad_norm: 0.7336827547298029, iteration: 374601
loss: 0.9397677779197693,grad_norm: 0.668384358413248, iteration: 374602
loss: 0.973434567451477,grad_norm: 0.9999992051515754, iteration: 374603
loss: 1.1294161081314087,grad_norm: 0.7970251656485987, iteration: 374604
loss: 0.9980521202087402,grad_norm: 0.8043854832864398, iteration: 374605
loss: 0.99757981300354,grad_norm: 0.7879685130124496, iteration: 374606
loss: 0.9802034497261047,grad_norm: 0.6937945095535141, iteration: 374607
loss: 0.9530972838401794,grad_norm: 0.7964761921185888, iteration: 374608
loss: 1.0081790685653687,grad_norm: 0.8744551536444849, iteration: 374609
loss: 1.0385535955429077,grad_norm: 0.8840711822588311, iteration: 374610
loss: 0.9487195014953613,grad_norm: 0.8266295307356111, iteration: 374611
loss: 0.9975594282150269,grad_norm: 0.859480799502699, iteration: 374612
loss: 0.965509295463562,grad_norm: 0.7537264789619832, iteration: 374613
loss: 0.9743484258651733,grad_norm: 0.70601470159399, iteration: 374614
loss: 1.0407826900482178,grad_norm: 0.9999990091427345, iteration: 374615
loss: 1.0216470956802368,grad_norm: 0.8669444372388327, iteration: 374616
loss: 1.0385961532592773,grad_norm: 0.7590319520467345, iteration: 374617
loss: 1.0355021953582764,grad_norm: 0.8682784638489358, iteration: 374618
loss: 1.0269415378570557,grad_norm: 0.7443247589684848, iteration: 374619
loss: 1.0047502517700195,grad_norm: 0.9999995447026391, iteration: 374620
loss: 1.0344831943511963,grad_norm: 0.9999997583494852, iteration: 374621
loss: 1.06144380569458,grad_norm: 0.9999997775729019, iteration: 374622
loss: 1.0163841247558594,grad_norm: 0.8973982170356567, iteration: 374623
loss: 1.0146552324295044,grad_norm: 0.8737771372713092, iteration: 374624
loss: 0.9709083437919617,grad_norm: 0.9630828387007954, iteration: 374625
loss: 1.0078636407852173,grad_norm: 0.999999170293361, iteration: 374626
loss: 1.015112280845642,grad_norm: 0.9089907249364608, iteration: 374627
loss: 0.9713191986083984,grad_norm: 0.7498510399668945, iteration: 374628
loss: 1.0098037719726562,grad_norm: 0.8630457526373874, iteration: 374629
loss: 1.0127596855163574,grad_norm: 0.9999991220672078, iteration: 374630
loss: 1.0331815481185913,grad_norm: 0.7725259301191352, iteration: 374631
loss: 0.9963275790214539,grad_norm: 0.8916454793573957, iteration: 374632
loss: 1.0283540487289429,grad_norm: 0.6497332188349767, iteration: 374633
loss: 1.002476692199707,grad_norm: 0.9188988039417393, iteration: 374634
loss: 1.0571194887161255,grad_norm: 0.9999998148098663, iteration: 374635
loss: 0.9982370734214783,grad_norm: 0.8701352544273763, iteration: 374636
loss: 0.9964532852172852,grad_norm: 0.750797260377457, iteration: 374637
loss: 0.9972452521324158,grad_norm: 0.7166326125120294, iteration: 374638
loss: 0.9781662225723267,grad_norm: 0.7857707536418531, iteration: 374639
loss: 1.0234711170196533,grad_norm: 0.8099411388856596, iteration: 374640
loss: 1.027843952178955,grad_norm: 0.715427413298973, iteration: 374641
loss: 1.0154237747192383,grad_norm: 0.9162054913264588, iteration: 374642
loss: 0.9726623892784119,grad_norm: 0.785820625167224, iteration: 374643
loss: 1.0055466890335083,grad_norm: 0.9123304012334639, iteration: 374644
loss: 0.9586493968963623,grad_norm: 0.871856671861176, iteration: 374645
loss: 1.0189812183380127,grad_norm: 0.7599079199761238, iteration: 374646
loss: 1.0127273797988892,grad_norm: 0.8672115699766076, iteration: 374647
loss: 1.0155850648880005,grad_norm: 0.8073916582234829, iteration: 374648
loss: 1.00071382522583,grad_norm: 0.9691357911481345, iteration: 374649
loss: 1.0358035564422607,grad_norm: 0.9999992051571536, iteration: 374650
loss: 1.0020911693572998,grad_norm: 0.718793507499853, iteration: 374651
loss: 0.9758304357528687,grad_norm: 0.8722442201867656, iteration: 374652
loss: 1.0095947980880737,grad_norm: 0.7012640298413746, iteration: 374653
loss: 0.9931907057762146,grad_norm: 0.7214473901955685, iteration: 374654
loss: 1.002834677696228,grad_norm: 0.8580771063500282, iteration: 374655
loss: 1.0110918283462524,grad_norm: 0.7534191665195897, iteration: 374656
loss: 1.0153839588165283,grad_norm: 0.7020539167673528, iteration: 374657
loss: 1.0289607048034668,grad_norm: 0.8320333525979096, iteration: 374658
loss: 1.018102765083313,grad_norm: 0.713777043626864, iteration: 374659
loss: 0.9976211190223694,grad_norm: 0.8202262737287025, iteration: 374660
loss: 0.9877898693084717,grad_norm: 0.8901325040806299, iteration: 374661
loss: 0.9800974726676941,grad_norm: 0.8681795467662714, iteration: 374662
loss: 1.033194899559021,grad_norm: 0.8718608095487927, iteration: 374663
loss: 0.9877192974090576,grad_norm: 0.7562306189287734, iteration: 374664
loss: 0.9978975653648376,grad_norm: 0.851784860109702, iteration: 374665
loss: 0.9807482957839966,grad_norm: 0.6883972554578055, iteration: 374666
loss: 1.0172892808914185,grad_norm: 0.7553589793591897, iteration: 374667
loss: 0.984070360660553,grad_norm: 0.781963732704235, iteration: 374668
loss: 0.9711401462554932,grad_norm: 0.7002226659922282, iteration: 374669
loss: 1.029923439025879,grad_norm: 0.9999991586611853, iteration: 374670
loss: 1.027586579322815,grad_norm: 0.9488077402121222, iteration: 374671
loss: 1.0179035663604736,grad_norm: 0.8954184402556167, iteration: 374672
loss: 0.9794168472290039,grad_norm: 0.833604759187576, iteration: 374673
loss: 1.0125274658203125,grad_norm: 0.7900826624107581, iteration: 374674
loss: 1.0109403133392334,grad_norm: 0.9224837898766033, iteration: 374675
loss: 0.9780085682868958,grad_norm: 0.8286917118068992, iteration: 374676
loss: 1.0003036260604858,grad_norm: 0.7964725584725318, iteration: 374677
loss: 0.9848161935806274,grad_norm: 0.8240227624800482, iteration: 374678
loss: 1.0330170392990112,grad_norm: 0.8072114961099254, iteration: 374679
loss: 1.0425866842269897,grad_norm: 0.9999990496112053, iteration: 374680
loss: 1.005590796470642,grad_norm: 0.8408019916234734, iteration: 374681
loss: 1.0035933256149292,grad_norm: 0.7895224265643503, iteration: 374682
loss: 0.9727041125297546,grad_norm: 0.7915562613048719, iteration: 374683
loss: 1.0499557256698608,grad_norm: 0.8477737277810384, iteration: 374684
loss: 1.0325149297714233,grad_norm: 0.898555608209142, iteration: 374685
loss: 0.993472695350647,grad_norm: 0.9999991732584236, iteration: 374686
loss: 1.0251001119613647,grad_norm: 0.8655640795345221, iteration: 374687
loss: 0.9939756393432617,grad_norm: 0.7114552298686245, iteration: 374688
loss: 1.0270627737045288,grad_norm: 0.8675097063529863, iteration: 374689
loss: 0.9842320680618286,grad_norm: 0.8393392004280299, iteration: 374690
loss: 0.9952322840690613,grad_norm: 0.8583164254183278, iteration: 374691
loss: 1.0515295267105103,grad_norm: 0.7767266081940167, iteration: 374692
loss: 1.0836427211761475,grad_norm: 0.999999810597839, iteration: 374693
loss: 1.0120658874511719,grad_norm: 0.7238877868826012, iteration: 374694
loss: 1.022596836090088,grad_norm: 0.9999996065478954, iteration: 374695
loss: 0.9630774855613708,grad_norm: 0.7808514275545954, iteration: 374696
loss: 0.9566600918769836,grad_norm: 0.8425668440966039, iteration: 374697
loss: 0.9943974018096924,grad_norm: 0.9999990789896569, iteration: 374698
loss: 1.0035847425460815,grad_norm: 0.7413826657176039, iteration: 374699
loss: 0.9935996532440186,grad_norm: 0.6640203527594717, iteration: 374700
loss: 1.0042225122451782,grad_norm: 0.9999995431077608, iteration: 374701
loss: 0.9998141527175903,grad_norm: 0.7960447385313182, iteration: 374702
loss: 1.0431407690048218,grad_norm: 0.8490899319357403, iteration: 374703
loss: 0.9932937622070312,grad_norm: 0.7205855937780046, iteration: 374704
loss: 0.9632644653320312,grad_norm: 0.8247525777505643, iteration: 374705
loss: 1.0486805438995361,grad_norm: 0.8761204144765091, iteration: 374706
loss: 0.9946732521057129,grad_norm: 0.6888768743801095, iteration: 374707
loss: 1.0244494676589966,grad_norm: 0.7141245692627453, iteration: 374708
loss: 0.970058262348175,grad_norm: 0.8588421021902981, iteration: 374709
loss: 1.016205906867981,grad_norm: 0.6481577161763865, iteration: 374710
loss: 1.0055090188980103,grad_norm: 0.8231715766683977, iteration: 374711
loss: 1.0146284103393555,grad_norm: 0.9088914040669065, iteration: 374712
loss: 0.9852864742279053,grad_norm: 0.7277645964525304, iteration: 374713
loss: 1.0032206773757935,grad_norm: 0.7464309315386292, iteration: 374714
loss: 0.9950105547904968,grad_norm: 0.8568033506589797, iteration: 374715
loss: 1.026899814605713,grad_norm: 0.758622766288489, iteration: 374716
loss: 0.9783632159233093,grad_norm: 0.8809942719941366, iteration: 374717
loss: 0.9667032361030579,grad_norm: 0.7988686757219803, iteration: 374718
loss: 1.0656864643096924,grad_norm: 0.8031084241953935, iteration: 374719
loss: 0.9884355664253235,grad_norm: 0.7625848545939127, iteration: 374720
loss: 1.0992037057876587,grad_norm: 0.8700994965971733, iteration: 374721
loss: 0.9788240194320679,grad_norm: 0.7212509935213993, iteration: 374722
loss: 1.2012137174606323,grad_norm: 0.9999994866960257, iteration: 374723
loss: 0.9902855753898621,grad_norm: 0.8253422490934336, iteration: 374724
loss: 0.9824002981185913,grad_norm: 0.7911983674512888, iteration: 374725
loss: 0.9621654152870178,grad_norm: 0.9369461082529369, iteration: 374726
loss: 1.0076956748962402,grad_norm: 0.8213392292258156, iteration: 374727
loss: 1.0405476093292236,grad_norm: 0.81933304912707, iteration: 374728
loss: 0.9832379817962646,grad_norm: 0.6791965050415029, iteration: 374729
loss: 1.0383039712905884,grad_norm: 0.8981069064342275, iteration: 374730
loss: 0.9842442274093628,grad_norm: 0.8168268298617508, iteration: 374731
loss: 0.9642947912216187,grad_norm: 0.9999993048285177, iteration: 374732
loss: 1.0216139554977417,grad_norm: 0.7511050496169609, iteration: 374733
loss: 0.9826858043670654,grad_norm: 0.8838022699956088, iteration: 374734
loss: 1.012251615524292,grad_norm: 0.6913381284875127, iteration: 374735
loss: 0.9777014851570129,grad_norm: 0.8434170469693792, iteration: 374736
loss: 1.0521069765090942,grad_norm: 0.9506330063300839, iteration: 374737
loss: 0.9880479574203491,grad_norm: 0.8250660274912636, iteration: 374738
loss: 0.9942619204521179,grad_norm: 0.798849345998332, iteration: 374739
loss: 1.0017516613006592,grad_norm: 0.8116945340150019, iteration: 374740
loss: 0.9825404286384583,grad_norm: 0.7413182540555777, iteration: 374741
loss: 1.0201596021652222,grad_norm: 0.7532032909874962, iteration: 374742
loss: 1.0368446111679077,grad_norm: 0.8924113491691065, iteration: 374743
loss: 0.9995899200439453,grad_norm: 0.7655023781307251, iteration: 374744
loss: 1.0202322006225586,grad_norm: 0.775660069339401, iteration: 374745
loss: 1.019992709159851,grad_norm: 0.9112091402773166, iteration: 374746
loss: 1.0330477952957153,grad_norm: 0.9999992378350178, iteration: 374747
loss: 1.1258734464645386,grad_norm: 0.9999993284798306, iteration: 374748
loss: 0.9979691505432129,grad_norm: 0.8701027996617764, iteration: 374749
loss: 1.06118905544281,grad_norm: 0.9999989959661315, iteration: 374750
loss: 1.0152952671051025,grad_norm: 0.9266875275199825, iteration: 374751
loss: 1.047842264175415,grad_norm: 0.999999808584063, iteration: 374752
loss: 1.0381940603256226,grad_norm: 0.911147440090486, iteration: 374753
loss: 1.015175223350525,grad_norm: 0.8613631420951611, iteration: 374754
loss: 0.9852162599563599,grad_norm: 0.6853459863679116, iteration: 374755
loss: 0.9974200129508972,grad_norm: 0.8906140796460134, iteration: 374756
loss: 1.0258209705352783,grad_norm: 0.9338261041623602, iteration: 374757
loss: 1.0022767782211304,grad_norm: 0.747718694977001, iteration: 374758
loss: 0.9832518696784973,grad_norm: 0.8177941489025853, iteration: 374759
loss: 1.0927578210830688,grad_norm: 0.9999999041280444, iteration: 374760
loss: 1.0335725545883179,grad_norm: 0.9999998113594437, iteration: 374761
loss: 1.005734920501709,grad_norm: 0.6210186326139376, iteration: 374762
loss: 1.0370705127716064,grad_norm: 0.8910510467739122, iteration: 374763
loss: 1.0183398723602295,grad_norm: 0.730824449756211, iteration: 374764
loss: 1.0102593898773193,grad_norm: 0.9999990792786568, iteration: 374765
loss: 1.1773459911346436,grad_norm: 0.9999992543626305, iteration: 374766
loss: 1.0741811990737915,grad_norm: 0.8698422091831269, iteration: 374767
loss: 0.9896236658096313,grad_norm: 0.9798553055857387, iteration: 374768
loss: 1.0080496072769165,grad_norm: 0.7251003898158779, iteration: 374769
loss: 1.0477913618087769,grad_norm: 0.9389457478219504, iteration: 374770
loss: 0.9960675239562988,grad_norm: 0.8138597662269472, iteration: 374771
loss: 1.0005549192428589,grad_norm: 0.8366750293115477, iteration: 374772
loss: 0.9761314988136292,grad_norm: 0.8540277826805209, iteration: 374773
loss: 0.9920101761817932,grad_norm: 0.8898893567790287, iteration: 374774
loss: 1.0340049266815186,grad_norm: 0.809896733707575, iteration: 374775
loss: 1.0575015544891357,grad_norm: 0.8888061659017125, iteration: 374776
loss: 1.0282899141311646,grad_norm: 0.8536413581899837, iteration: 374777
loss: 0.9702926278114319,grad_norm: 0.7469407875265831, iteration: 374778
loss: 0.9911491870880127,grad_norm: 0.9090804373356784, iteration: 374779
loss: 1.0709244012832642,grad_norm: 0.9179173244337266, iteration: 374780
loss: 0.9870556592941284,grad_norm: 0.8338107058891319, iteration: 374781
loss: 1.037319302558899,grad_norm: 0.7397694343889276, iteration: 374782
loss: 0.993344247341156,grad_norm: 0.7160738883266498, iteration: 374783
loss: 0.9826164841651917,grad_norm: 0.8627954668029857, iteration: 374784
loss: 1.0893946886062622,grad_norm: 0.936976834405291, iteration: 374785
loss: 0.9473860859870911,grad_norm: 0.8761830161034018, iteration: 374786
loss: 1.0081584453582764,grad_norm: 0.7646777184195402, iteration: 374787
loss: 1.0360767841339111,grad_norm: 0.7469277650606441, iteration: 374788
loss: 1.0320364236831665,grad_norm: 0.9999991340548666, iteration: 374789
loss: 0.9990953803062439,grad_norm: 0.7386304577977229, iteration: 374790
loss: 1.0148415565490723,grad_norm: 0.8890970157228932, iteration: 374791
loss: 1.0899677276611328,grad_norm: 0.9999999536484102, iteration: 374792
loss: 1.0159456729888916,grad_norm: 0.9999990057483563, iteration: 374793
loss: 1.0074940919876099,grad_norm: 0.7863961009133213, iteration: 374794
loss: 0.9695050120353699,grad_norm: 0.7926613576357433, iteration: 374795
loss: 0.9757530093193054,grad_norm: 0.9233433833204946, iteration: 374796
loss: 1.012415885925293,grad_norm: 0.9525170918192952, iteration: 374797
loss: 1.083331823348999,grad_norm: 0.9843889080754872, iteration: 374798
loss: 0.9892343878746033,grad_norm: 0.9163972374970231, iteration: 374799
loss: 0.9650301337242126,grad_norm: 0.8606172565829222, iteration: 374800
loss: 0.9639076590538025,grad_norm: 0.9999991684023992, iteration: 374801
loss: 0.9950973391532898,grad_norm: 0.9142287455122019, iteration: 374802
loss: 0.9969713091850281,grad_norm: 0.9728366694040169, iteration: 374803
loss: 1.0095362663269043,grad_norm: 0.7787447854178913, iteration: 374804
loss: 1.0551249980926514,grad_norm: 0.9999991684760652, iteration: 374805
loss: 0.9943441152572632,grad_norm: 0.79462934974636, iteration: 374806
loss: 0.985493004322052,grad_norm: 0.9994256874280963, iteration: 374807
loss: 1.2698726654052734,grad_norm: 0.9999992524022719, iteration: 374808
loss: 0.9959561824798584,grad_norm: 0.7577248932773514, iteration: 374809
loss: 1.0550847053527832,grad_norm: 0.999998984152904, iteration: 374810
loss: 0.9577364325523376,grad_norm: 0.7656561352661388, iteration: 374811
loss: 0.9735943675041199,grad_norm: 1.0000000279773396, iteration: 374812
loss: 0.9882856011390686,grad_norm: 0.7672176028861561, iteration: 374813
loss: 0.964554488658905,grad_norm: 0.8146728840540244, iteration: 374814
loss: 1.0117532014846802,grad_norm: 0.9999995426857534, iteration: 374815
loss: 1.0413211584091187,grad_norm: 0.8280254638683644, iteration: 374816
loss: 0.9961211085319519,grad_norm: 0.9999991254706696, iteration: 374817
loss: 1.0313693284988403,grad_norm: 0.9287159258989913, iteration: 374818
loss: 0.9962407350540161,grad_norm: 0.8271151033009264, iteration: 374819
loss: 0.9910521507263184,grad_norm: 0.929918491270261, iteration: 374820
loss: 0.9815945029258728,grad_norm: 0.9244646993190688, iteration: 374821
loss: 1.0006920099258423,grad_norm: 0.8821027300061701, iteration: 374822
loss: 0.9998183250427246,grad_norm: 0.9999992972312973, iteration: 374823
loss: 0.9848109483718872,grad_norm: 0.8872151936996148, iteration: 374824
loss: 1.010555624961853,grad_norm: 0.7907139005619419, iteration: 374825
loss: 1.0109059810638428,grad_norm: 0.5838380315987832, iteration: 374826
loss: 1.0166524648666382,grad_norm: 0.7158233490751216, iteration: 374827
loss: 0.9826548099517822,grad_norm: 0.7079773423706088, iteration: 374828
loss: 0.979686439037323,grad_norm: 0.9999991743527478, iteration: 374829
loss: 1.0495123863220215,grad_norm: 0.999999427295468, iteration: 374830
loss: 1.012673258781433,grad_norm: 0.7589132938964607, iteration: 374831
loss: 0.984880805015564,grad_norm: 0.8164101589697873, iteration: 374832
loss: 1.0853296518325806,grad_norm: 0.9999991380128704, iteration: 374833
loss: 0.9841665029525757,grad_norm: 0.871946079649319, iteration: 374834
loss: 1.002056360244751,grad_norm: 0.7277616475737231, iteration: 374835
loss: 1.0113924741744995,grad_norm: 0.7299180391216264, iteration: 374836
loss: 1.0568104982376099,grad_norm: 0.9999998179336977, iteration: 374837
loss: 1.0210562944412231,grad_norm: 0.829467814441436, iteration: 374838
loss: 1.0629172325134277,grad_norm: 0.7382582415500585, iteration: 374839
loss: 1.0191147327423096,grad_norm: 0.9212436560975964, iteration: 374840
loss: 1.031772255897522,grad_norm: 0.8697932406068575, iteration: 374841
loss: 1.030720829963684,grad_norm: 0.804028275497878, iteration: 374842
loss: 1.0478088855743408,grad_norm: 0.8528700834716554, iteration: 374843
loss: 0.9878101944923401,grad_norm: 0.8384565785281163, iteration: 374844
loss: 0.9892657995223999,grad_norm: 0.9898912496991173, iteration: 374845
loss: 1.0762228965759277,grad_norm: 0.9525894969459183, iteration: 374846
loss: 1.0216851234436035,grad_norm: 1.0000000806990357, iteration: 374847
loss: 0.9696032404899597,grad_norm: 0.7992772710970824, iteration: 374848
loss: 1.0758318901062012,grad_norm: 0.9999999873053752, iteration: 374849
loss: 1.0164021253585815,grad_norm: 0.9547128712987484, iteration: 374850
loss: 1.1894400119781494,grad_norm: 0.9999990449626557, iteration: 374851
loss: 0.9679347276687622,grad_norm: 0.75935032717337, iteration: 374852
loss: 0.9962178468704224,grad_norm: 0.7347574013137105, iteration: 374853
loss: 1.0000576972961426,grad_norm: 0.9999993616052856, iteration: 374854
loss: 1.0691032409667969,grad_norm: 0.9999990438550552, iteration: 374855
loss: 1.0046731233596802,grad_norm: 0.7348985704870589, iteration: 374856
loss: 1.0172299146652222,grad_norm: 0.9999992179556633, iteration: 374857
loss: 1.0493673086166382,grad_norm: 0.999999440720843, iteration: 374858
loss: 1.0072753429412842,grad_norm: 0.9999991363678937, iteration: 374859
loss: 0.9681656956672668,grad_norm: 0.8854390794899034, iteration: 374860
loss: 1.0351439714431763,grad_norm: 0.7905689242272824, iteration: 374861
loss: 1.0755847692489624,grad_norm: 1.00000001230517, iteration: 374862
loss: 1.0138452053070068,grad_norm: 0.844773012682902, iteration: 374863
loss: 1.048653244972229,grad_norm: 0.8013635975471092, iteration: 374864
loss: 1.003275990486145,grad_norm: 0.853400194804698, iteration: 374865
loss: 1.0189661979675293,grad_norm: 0.6997313192871688, iteration: 374866
loss: 1.0136516094207764,grad_norm: 0.9999996256922409, iteration: 374867
loss: 1.021363615989685,grad_norm: 0.9999995698013654, iteration: 374868
loss: 1.0281797647476196,grad_norm: 0.7446352355343869, iteration: 374869
loss: 0.9998781085014343,grad_norm: 0.8325970676103928, iteration: 374870
loss: 0.976222574710846,grad_norm: 0.7895735928771712, iteration: 374871
loss: 1.0426102876663208,grad_norm: 0.8398280295451692, iteration: 374872
loss: 0.9890142679214478,grad_norm: 0.9074081382940155, iteration: 374873
loss: 1.0268213748931885,grad_norm: 0.9999998661548812, iteration: 374874
loss: 0.9927142858505249,grad_norm: 0.8130117354473844, iteration: 374875
loss: 1.0744762420654297,grad_norm: 0.9999992034516662, iteration: 374876
loss: 1.008756160736084,grad_norm: 0.7151966091894314, iteration: 374877
loss: 1.009042739868164,grad_norm: 0.8093698966135728, iteration: 374878
loss: 1.0302777290344238,grad_norm: 0.8266453646837211, iteration: 374879
loss: 0.9626502394676208,grad_norm: 0.9999991286714439, iteration: 374880
loss: 1.0779399871826172,grad_norm: 0.999999798027175, iteration: 374881
loss: 1.0009520053863525,grad_norm: 0.7174194827012793, iteration: 374882
loss: 0.9785211682319641,grad_norm: 0.8353093666448778, iteration: 374883
loss: 1.175438404083252,grad_norm: 0.9999992472235526, iteration: 374884
loss: 1.0410212278366089,grad_norm: 0.774596668373534, iteration: 374885
loss: 1.0864812135696411,grad_norm: 0.9169777849317041, iteration: 374886
loss: 0.9619266986846924,grad_norm: 0.7777606546746737, iteration: 374887
loss: 0.9917921423912048,grad_norm: 0.9999991446559556, iteration: 374888
loss: 1.0156733989715576,grad_norm: 0.9228723354582478, iteration: 374889
loss: 0.9679468274116516,grad_norm: 0.7673794054272284, iteration: 374890
loss: 0.9872449040412903,grad_norm: 0.7386179814062475, iteration: 374891
loss: 0.9993678331375122,grad_norm: 0.6818361279016041, iteration: 374892
loss: 1.0193217992782593,grad_norm: 0.8664001127369397, iteration: 374893
loss: 1.0092629194259644,grad_norm: 0.8260237937052333, iteration: 374894
loss: 0.9954686164855957,grad_norm: 0.723473499827594, iteration: 374895
loss: 1.0053812265396118,grad_norm: 0.8450458608039253, iteration: 374896
loss: 0.9860514402389526,grad_norm: 0.9730925366027866, iteration: 374897
loss: 0.9427968263626099,grad_norm: 0.9999990184244617, iteration: 374898
loss: 1.037471055984497,grad_norm: 0.6585086075254849, iteration: 374899
loss: 0.9830999374389648,grad_norm: 0.8328319202209818, iteration: 374900
loss: 0.9914083480834961,grad_norm: 0.6887012422980164, iteration: 374901
loss: 1.0236603021621704,grad_norm: 0.9446921255419207, iteration: 374902
loss: 1.0209711790084839,grad_norm: 0.9999995900569517, iteration: 374903
loss: 0.983654797077179,grad_norm: 0.8273654213146059, iteration: 374904
loss: 0.9750989675521851,grad_norm: 0.8812040294795559, iteration: 374905
loss: 1.444382905960083,grad_norm: 0.9999996892156545, iteration: 374906
loss: 0.9930728673934937,grad_norm: 0.7322807484039865, iteration: 374907
loss: 1.035434365272522,grad_norm: 0.9509541019970801, iteration: 374908
loss: 1.010748267173767,grad_norm: 0.8429399180291612, iteration: 374909
loss: 1.0095558166503906,grad_norm: 0.9999994128531458, iteration: 374910
loss: 0.9793020486831665,grad_norm: 0.7041985619162592, iteration: 374911
loss: 0.9925000667572021,grad_norm: 0.6723627323731348, iteration: 374912
loss: 1.0327463150024414,grad_norm: 0.8430301303665411, iteration: 374913
loss: 1.040197730064392,grad_norm: 0.7513752672757339, iteration: 374914
loss: 1.0247429609298706,grad_norm: 0.9999990724911321, iteration: 374915
loss: 0.9904517531394958,grad_norm: 0.7658105253804575, iteration: 374916
loss: 1.0178998708724976,grad_norm: 0.8635845611822671, iteration: 374917
loss: 1.019865870475769,grad_norm: 0.7809183175412547, iteration: 374918
loss: 1.159466028213501,grad_norm: 0.999999915788662, iteration: 374919
loss: 0.978803813457489,grad_norm: 0.8308079148355942, iteration: 374920
loss: 1.0231248140335083,grad_norm: 0.8195488284119453, iteration: 374921
loss: 0.9949293732643127,grad_norm: 0.9087133227991825, iteration: 374922
loss: 0.9758520126342773,grad_norm: 0.7803892827582719, iteration: 374923
loss: 1.0009608268737793,grad_norm: 0.9894957102888524, iteration: 374924
loss: 1.0241742134094238,grad_norm: 0.7428950990599003, iteration: 374925
loss: 1.0078293085098267,grad_norm: 0.8602614434560163, iteration: 374926
loss: 1.0262857675552368,grad_norm: 0.8968753090228849, iteration: 374927
loss: 0.9787458777427673,grad_norm: 0.8462933323902782, iteration: 374928
loss: 0.9946606755256653,grad_norm: 0.8683668592145786, iteration: 374929
loss: 0.9889163374900818,grad_norm: 0.7924211706784522, iteration: 374930
loss: 1.0044810771942139,grad_norm: 0.6662411894946159, iteration: 374931
loss: 0.98600834608078,grad_norm: 0.7398147897748308, iteration: 374932
loss: 1.0157546997070312,grad_norm: 0.8901419235826007, iteration: 374933
loss: 0.9791381359100342,grad_norm: 0.9999990817878351, iteration: 374934
loss: 1.0017094612121582,grad_norm: 0.9230489417736852, iteration: 374935
loss: 1.0029624700546265,grad_norm: 0.8108991134882685, iteration: 374936
loss: 1.015804648399353,grad_norm: 0.713169219902927, iteration: 374937
loss: 0.9851750135421753,grad_norm: 0.8061120980469503, iteration: 374938
loss: 1.0129917860031128,grad_norm: 0.9999993107660288, iteration: 374939
loss: 1.020848035812378,grad_norm: 0.6939892774231763, iteration: 374940
loss: 1.0202733278274536,grad_norm: 0.8285829137907684, iteration: 374941
loss: 0.9825834631919861,grad_norm: 0.7790077316701463, iteration: 374942
loss: 0.9997599124908447,grad_norm: 0.7813101453039297, iteration: 374943
loss: 1.069251537322998,grad_norm: 0.9589385004685538, iteration: 374944
loss: 0.9586109519004822,grad_norm: 0.7679958865581664, iteration: 374945
loss: 1.065109372138977,grad_norm: 0.9528348947977523, iteration: 374946
loss: 1.0159151554107666,grad_norm: 0.7735684731435284, iteration: 374947
loss: 1.0196410417556763,grad_norm: 0.9071589524938618, iteration: 374948
loss: 0.9821874499320984,grad_norm: 0.7947992974822368, iteration: 374949
loss: 0.9849111437797546,grad_norm: 0.6125450136288976, iteration: 374950
loss: 1.0379784107208252,grad_norm: 0.8565447067437976, iteration: 374951
loss: 1.0062326192855835,grad_norm: 0.9019363942226726, iteration: 374952
loss: 0.980806827545166,grad_norm: 0.9999991383301621, iteration: 374953
loss: 0.9820470213890076,grad_norm: 0.7130916753674378, iteration: 374954
loss: 0.9976332783699036,grad_norm: 0.7447971866611247, iteration: 374955
loss: 1.0242877006530762,grad_norm: 0.8105737595372217, iteration: 374956
loss: 1.0031379461288452,grad_norm: 0.7368239635433843, iteration: 374957
loss: 0.9941778779029846,grad_norm: 0.7615768937629123, iteration: 374958
loss: 1.0195488929748535,grad_norm: 0.9694556366106727, iteration: 374959
loss: 1.0105232000350952,grad_norm: 0.69330773137053, iteration: 374960
loss: 1.0026880502700806,grad_norm: 0.8110614737666402, iteration: 374961
loss: 0.9374836683273315,grad_norm: 0.7796480429834531, iteration: 374962
loss: 1.028260350227356,grad_norm: 0.8262052585048888, iteration: 374963
loss: 1.0284157991409302,grad_norm: 0.7907264672438379, iteration: 374964
loss: 0.9988469481468201,grad_norm: 0.8421877195881773, iteration: 374965
loss: 1.0137158632278442,grad_norm: 0.6509758973094378, iteration: 374966
loss: 1.0264524221420288,grad_norm: 0.9400527365731385, iteration: 374967
loss: 1.0336867570877075,grad_norm: 0.8000710299554977, iteration: 374968
loss: 0.9923390746116638,grad_norm: 0.817112696215227, iteration: 374969
loss: 0.9978044033050537,grad_norm: 0.6096152056541669, iteration: 374970
loss: 0.9940043687820435,grad_norm: 0.7917804084445909, iteration: 374971
loss: 0.9897003173828125,grad_norm: 0.7509256538361946, iteration: 374972
loss: 0.9934174418449402,grad_norm: 0.9999992538146548, iteration: 374973
loss: 1.0183391571044922,grad_norm: 0.9999992709258725, iteration: 374974
loss: 0.9870127439498901,grad_norm: 0.999999714061888, iteration: 374975
loss: 1.0296051502227783,grad_norm: 0.9944052156674317, iteration: 374976
loss: 1.0052025318145752,grad_norm: 0.9999999009263809, iteration: 374977
loss: 1.0114305019378662,grad_norm: 0.6763480372963488, iteration: 374978
loss: 1.0116112232208252,grad_norm: 0.8303979017791735, iteration: 374979
loss: 0.9697916507720947,grad_norm: 0.8009229220293127, iteration: 374980
loss: 0.9824457764625549,grad_norm: 0.7564085710540261, iteration: 374981
loss: 0.9837523698806763,grad_norm: 0.9819683560881312, iteration: 374982
loss: 0.9924790263175964,grad_norm: 0.7490210753936487, iteration: 374983
loss: 1.0131574869155884,grad_norm: 0.9813076404764884, iteration: 374984
loss: 1.0519824028015137,grad_norm: 0.9714079409021464, iteration: 374985
loss: 1.0628145933151245,grad_norm: 0.999999885884531, iteration: 374986
loss: 1.0053218603134155,grad_norm: 0.9999990856099149, iteration: 374987
loss: 1.0027813911437988,grad_norm: 0.9999991119264634, iteration: 374988
loss: 1.0079352855682373,grad_norm: 0.8323680602697536, iteration: 374989
loss: 0.9754543304443359,grad_norm: 0.8683002655034422, iteration: 374990
loss: 1.016074538230896,grad_norm: 0.9999994374664449, iteration: 374991
loss: 1.0102206468582153,grad_norm: 0.7072861381045858, iteration: 374992
loss: 1.0213795900344849,grad_norm: 0.9532956312627597, iteration: 374993
loss: 1.0580817461013794,grad_norm: 0.999999692782278, iteration: 374994
loss: 0.9674853086471558,grad_norm: 0.7386031849882675, iteration: 374995
loss: 0.9683453440666199,grad_norm: 0.8549337456619315, iteration: 374996
loss: 1.0251996517181396,grad_norm: 0.7955412723044182, iteration: 374997
loss: 1.0348505973815918,grad_norm: 0.99999923886363, iteration: 374998
loss: 1.0067280530929565,grad_norm: 0.7129850641732692, iteration: 374999
loss: 1.0808173418045044,grad_norm: 0.8074185567076146, iteration: 375000
loss: 1.011961579322815,grad_norm: 0.828638029525166, iteration: 375001
loss: 0.976021945476532,grad_norm: 0.6841771304499924, iteration: 375002
loss: 0.9782638549804688,grad_norm: 0.9723175646690018, iteration: 375003
loss: 0.9945951104164124,grad_norm: 0.677340791190797, iteration: 375004
loss: 1.0008635520935059,grad_norm: 0.8211487715905316, iteration: 375005
loss: 1.0126780271530151,grad_norm: 0.9999998406872014, iteration: 375006
loss: 1.0027998685836792,grad_norm: 0.9314101130124752, iteration: 375007
loss: 0.9836564660072327,grad_norm: 0.7613831291371956, iteration: 375008
loss: 0.9692130088806152,grad_norm: 0.7809106406269007, iteration: 375009
loss: 0.9962195158004761,grad_norm: 0.9361932542834034, iteration: 375010
loss: 0.974721372127533,grad_norm: 0.838945542960838, iteration: 375011
loss: 1.0571694374084473,grad_norm: 0.6805342884134392, iteration: 375012
loss: 0.998023271560669,grad_norm: 0.7330446158421029, iteration: 375013
loss: 0.9671337008476257,grad_norm: 0.7244028557072663, iteration: 375014
loss: 1.011541485786438,grad_norm: 1.0000000625646897, iteration: 375015
loss: 0.981669008731842,grad_norm: 0.8029676914732015, iteration: 375016
loss: 1.0032317638397217,grad_norm: 0.9249792279684343, iteration: 375017
loss: 1.0161020755767822,grad_norm: 0.7772861369781348, iteration: 375018
loss: 1.0173773765563965,grad_norm: 0.9999991579452333, iteration: 375019
loss: 1.0077732801437378,grad_norm: 0.9160698622741611, iteration: 375020
loss: 0.9959580302238464,grad_norm: 0.7816814224757019, iteration: 375021
loss: 0.995097279548645,grad_norm: 0.7600656767701183, iteration: 375022
loss: 0.9879559874534607,grad_norm: 0.7259837104639378, iteration: 375023
loss: 1.0201866626739502,grad_norm: 0.7700740571227748, iteration: 375024
loss: 0.9977222084999084,grad_norm: 0.8691831736596922, iteration: 375025
loss: 0.9785941243171692,grad_norm: 0.7409060967491282, iteration: 375026
loss: 0.9734134078025818,grad_norm: 0.7463672679174124, iteration: 375027
loss: 0.9919012188911438,grad_norm: 0.7274784361039046, iteration: 375028
loss: 1.0384119749069214,grad_norm: 0.9999993682936446, iteration: 375029
loss: 0.9944502711296082,grad_norm: 0.7669821689869744, iteration: 375030
loss: 1.004018783569336,grad_norm: 0.7783539248336949, iteration: 375031
loss: 1.0766700506210327,grad_norm: 0.891634069423593, iteration: 375032
loss: 1.0599766969680786,grad_norm: 0.8782747630131272, iteration: 375033
loss: 1.0324186086654663,grad_norm: 0.8246357296392688, iteration: 375034
loss: 1.0112998485565186,grad_norm: 0.8632922959104561, iteration: 375035
loss: 0.9885252118110657,grad_norm: 0.7694798681816617, iteration: 375036
loss: 0.996148943901062,grad_norm: 0.7956016299130952, iteration: 375037
loss: 0.9926241636276245,grad_norm: 0.759494166046566, iteration: 375038
loss: 1.0164763927459717,grad_norm: 0.7835120723482544, iteration: 375039
loss: 0.9801783561706543,grad_norm: 0.9057012719450889, iteration: 375040
loss: 1.0055432319641113,grad_norm: 0.9999992065763942, iteration: 375041
loss: 0.9940839409828186,grad_norm: 0.7794236654987287, iteration: 375042
loss: 0.9832988977432251,grad_norm: 0.8096008103574575, iteration: 375043
loss: 1.1147106885910034,grad_norm: 0.9999995364819757, iteration: 375044
loss: 1.0053058862686157,grad_norm: 0.8732723935852835, iteration: 375045
loss: 1.0213943719863892,grad_norm: 0.7901231184413974, iteration: 375046
loss: 1.0075384378433228,grad_norm: 0.8548874118490497, iteration: 375047
loss: 1.029307246208191,grad_norm: 0.8161470792522657, iteration: 375048
loss: 0.9656153917312622,grad_norm: 0.7528491060416503, iteration: 375049
loss: 1.044133186340332,grad_norm: 0.8323292767687475, iteration: 375050
loss: 1.004687786102295,grad_norm: 0.9999994198633219, iteration: 375051
loss: 1.046039342880249,grad_norm: 0.9999993239593546, iteration: 375052
loss: 0.9882534742355347,grad_norm: 0.7811844539720721, iteration: 375053
loss: 1.0064940452575684,grad_norm: 0.7888804136578468, iteration: 375054
loss: 0.9862761497497559,grad_norm: 0.8061813038673241, iteration: 375055
loss: 0.9599686861038208,grad_norm: 0.7436931655574497, iteration: 375056
loss: 1.0193554162979126,grad_norm: 0.9032132811705479, iteration: 375057
loss: 0.9793550372123718,grad_norm: 0.8853253959291061, iteration: 375058
loss: 0.975031852722168,grad_norm: 0.9409903545992685, iteration: 375059
loss: 1.008566975593567,grad_norm: 0.6973673984222726, iteration: 375060
loss: 0.981730580329895,grad_norm: 0.7808252758635268, iteration: 375061
loss: 0.9987563490867615,grad_norm: 0.8070419732139519, iteration: 375062
loss: 0.9533652663230896,grad_norm: 0.7143828806240311, iteration: 375063
loss: 1.0123783349990845,grad_norm: 0.8553336622504195, iteration: 375064
loss: 0.9936466813087463,grad_norm: 0.829106201473847, iteration: 375065
loss: 0.9852955341339111,grad_norm: 0.8383679187426563, iteration: 375066
loss: 0.951174795627594,grad_norm: 0.8765153877770278, iteration: 375067
loss: 0.9787240028381348,grad_norm: 0.8467325358032299, iteration: 375068
loss: 1.021435260772705,grad_norm: 0.8404552117615771, iteration: 375069
loss: 1.032477617263794,grad_norm: 0.9999996476806006, iteration: 375070
loss: 1.0852429866790771,grad_norm: 0.8776771853027117, iteration: 375071
loss: 1.0202940702438354,grad_norm: 0.854972796138315, iteration: 375072
loss: 1.0015416145324707,grad_norm: 0.7645221525266361, iteration: 375073
loss: 0.9678399562835693,grad_norm: 0.9474620717567177, iteration: 375074
loss: 0.9776449203491211,grad_norm: 0.8007971004530008, iteration: 375075
loss: 1.0075827836990356,grad_norm: 0.999999101015691, iteration: 375076
loss: 0.98691326379776,grad_norm: 0.9173410047529016, iteration: 375077
loss: 0.9760794639587402,grad_norm: 0.7114234410989876, iteration: 375078
loss: 0.9911843538284302,grad_norm: 0.847200803138325, iteration: 375079
loss: 0.9988749623298645,grad_norm: 0.820331912481823, iteration: 375080
loss: 0.9995169639587402,grad_norm: 0.7334773573219967, iteration: 375081
loss: 1.0112911462783813,grad_norm: 0.7575124355989532, iteration: 375082
loss: 1.0059939622879028,grad_norm: 0.8848882822578575, iteration: 375083
loss: 0.9843053817749023,grad_norm: 0.7330588063157253, iteration: 375084
loss: 0.9756585955619812,grad_norm: 0.7716624230977841, iteration: 375085
loss: 0.9808919429779053,grad_norm: 0.797296765485591, iteration: 375086
loss: 0.9883676767349243,grad_norm: 0.9064507908359701, iteration: 375087
loss: 0.9908615946769714,grad_norm: 0.982352203110922, iteration: 375088
loss: 0.9962061047554016,grad_norm: 0.9999994055705753, iteration: 375089
loss: 0.9729963541030884,grad_norm: 0.7981202451032215, iteration: 375090
loss: 0.9868196845054626,grad_norm: 0.9011271537598832, iteration: 375091
loss: 1.0186892747879028,grad_norm: 0.7661015795542394, iteration: 375092
loss: 1.0003622770309448,grad_norm: 0.7621284974985842, iteration: 375093
loss: 1.0265856981277466,grad_norm: 0.9107389048351645, iteration: 375094
loss: 1.0154169797897339,grad_norm: 0.9819777664313968, iteration: 375095
loss: 0.964597761631012,grad_norm: 0.7118545116889904, iteration: 375096
loss: 1.027114748954773,grad_norm: 0.9721777876875034, iteration: 375097
loss: 1.0075900554656982,grad_norm: 0.9337573480051652, iteration: 375098
loss: 1.1093978881835938,grad_norm: 0.938163192911858, iteration: 375099
loss: 0.9680423140525818,grad_norm: 0.9450634286929978, iteration: 375100
loss: 1.0336962938308716,grad_norm: 0.8250193604873112, iteration: 375101
loss: 1.0065584182739258,grad_norm: 0.926548134791277, iteration: 375102
loss: 1.0010446310043335,grad_norm: 0.7269876680535414, iteration: 375103
loss: 1.0046627521514893,grad_norm: 0.6799977457009686, iteration: 375104
loss: 1.0097182989120483,grad_norm: 0.9846252749453799, iteration: 375105
loss: 1.03532075881958,grad_norm: 0.8868528373869001, iteration: 375106
loss: 1.0029499530792236,grad_norm: 0.8128728723976558, iteration: 375107
loss: 1.0131797790527344,grad_norm: 0.8282493723180073, iteration: 375108
loss: 1.051576018333435,grad_norm: 0.8809091434665542, iteration: 375109
loss: 1.1187365055084229,grad_norm: 0.9999998805343278, iteration: 375110
loss: 1.004874348640442,grad_norm: 0.6228161648659887, iteration: 375111
loss: 1.01749849319458,grad_norm: 0.9999991186901133, iteration: 375112
loss: 1.026075005531311,grad_norm: 0.7435019444907406, iteration: 375113
loss: 1.0024466514587402,grad_norm: 0.8095870815801751, iteration: 375114
loss: 1.0193175077438354,grad_norm: 0.9999996542372318, iteration: 375115
loss: 1.0207841396331787,grad_norm: 0.7418058747973247, iteration: 375116
loss: 1.0023185014724731,grad_norm: 0.7259115304437648, iteration: 375117
loss: 1.0129441022872925,grad_norm: 0.663165989985836, iteration: 375118
loss: 1.0350472927093506,grad_norm: 0.9999992763867597, iteration: 375119
loss: 1.020398736000061,grad_norm: 0.8559104837844532, iteration: 375120
loss: 1.0008389949798584,grad_norm: 0.7236602736511571, iteration: 375121
loss: 1.013639211654663,grad_norm: 0.9128260527665052, iteration: 375122
loss: 1.015487551689148,grad_norm: 0.9999991257846729, iteration: 375123
loss: 0.9971835613250732,grad_norm: 0.6957309148445042, iteration: 375124
loss: 1.01846182346344,grad_norm: 0.7879025594115453, iteration: 375125
loss: 0.9997159838676453,grad_norm: 0.8388435494508544, iteration: 375126
loss: 0.9972913265228271,grad_norm: 0.8181514256810819, iteration: 375127
loss: 0.9643831849098206,grad_norm: 0.7981078551321952, iteration: 375128
loss: 1.0599886178970337,grad_norm: 0.9999995494329342, iteration: 375129
loss: 1.0068447589874268,grad_norm: 0.9638761949521423, iteration: 375130
loss: 1.010420322418213,grad_norm: 0.7101492442207866, iteration: 375131
loss: 1.0287350416183472,grad_norm: 0.9142417215690192, iteration: 375132
loss: 0.9908300042152405,grad_norm: 0.8341349332131254, iteration: 375133
loss: 0.9879603385925293,grad_norm: 0.7549012399280605, iteration: 375134
loss: 1.01439368724823,grad_norm: 0.8985183963796494, iteration: 375135
loss: 1.00851309299469,grad_norm: 0.9999998794768384, iteration: 375136
loss: 0.9913344383239746,grad_norm: 0.8573799532870482, iteration: 375137
loss: 1.0086363554000854,grad_norm: 0.7736544364612451, iteration: 375138
loss: 0.9873350858688354,grad_norm: 0.7137174200142962, iteration: 375139
loss: 1.0040868520736694,grad_norm: 0.8610737074612387, iteration: 375140
loss: 1.0360110998153687,grad_norm: 0.8088375190748897, iteration: 375141
loss: 1.0003432035446167,grad_norm: 0.7624971267044081, iteration: 375142
loss: 0.9789555072784424,grad_norm: 0.8532157291869112, iteration: 375143
loss: 0.9982247352600098,grad_norm: 0.8204813728131891, iteration: 375144
loss: 0.9712032675743103,grad_norm: 0.7677484075528245, iteration: 375145
loss: 0.989980936050415,grad_norm: 0.794242823133635, iteration: 375146
loss: 0.9819444417953491,grad_norm: 0.7898530531633975, iteration: 375147
loss: 1.00043523311615,grad_norm: 0.70863897495432, iteration: 375148
loss: 1.0820523500442505,grad_norm: 0.7152629182891231, iteration: 375149
loss: 0.9538094997406006,grad_norm: 0.9518648410594669, iteration: 375150
loss: 1.0143764019012451,grad_norm: 0.7077817201812585, iteration: 375151
loss: 0.9734076857566833,grad_norm: 0.9102357192949666, iteration: 375152
loss: 1.0246646404266357,grad_norm: 0.8184330861383411, iteration: 375153
loss: 1.0335317850112915,grad_norm: 0.7257196424506605, iteration: 375154
loss: 1.1107546091079712,grad_norm: 0.9330180509131778, iteration: 375155
loss: 1.0206061601638794,grad_norm: 0.7204087915129003, iteration: 375156
loss: 1.0167956352233887,grad_norm: 0.8611281889363381, iteration: 375157
loss: 1.0008416175842285,grad_norm: 0.8829027715615676, iteration: 375158
loss: 1.0202666521072388,grad_norm: 0.9778347948972295, iteration: 375159
loss: 1.0090795755386353,grad_norm: 0.9305069625638339, iteration: 375160
loss: 0.9849872589111328,grad_norm: 0.6370884527959801, iteration: 375161
loss: 1.0217654705047607,grad_norm: 0.9096995666664197, iteration: 375162
loss: 1.002882719039917,grad_norm: 0.7566251607441231, iteration: 375163
loss: 0.9834150671958923,grad_norm: 0.6195175839896937, iteration: 375164
loss: 0.9736096858978271,grad_norm: 0.7238533510380745, iteration: 375165
loss: 0.9773770570755005,grad_norm: 0.9999993610564454, iteration: 375166
loss: 0.9831070899963379,grad_norm: 0.7720472723690364, iteration: 375167
loss: 0.995004415512085,grad_norm: 0.9999997381073866, iteration: 375168
loss: 0.9834316372871399,grad_norm: 0.8852485508583926, iteration: 375169
loss: 0.9958298206329346,grad_norm: 0.9392308275294399, iteration: 375170
loss: 0.9925128221511841,grad_norm: 0.9999997092902367, iteration: 375171
loss: 1.0322731733322144,grad_norm: 0.8002465639469142, iteration: 375172
loss: 1.0191056728363037,grad_norm: 0.8208534477630136, iteration: 375173
loss: 0.987382173538208,grad_norm: 0.7916120793884405, iteration: 375174
loss: 1.0378973484039307,grad_norm: 0.7335486752143338, iteration: 375175
loss: 1.0801199674606323,grad_norm: 0.7557843461006767, iteration: 375176
loss: 1.0222610235214233,grad_norm: 0.8222700386059107, iteration: 375177
loss: 1.050912618637085,grad_norm: 0.9999997336862073, iteration: 375178
loss: 0.9765543937683105,grad_norm: 0.9108986823636187, iteration: 375179
loss: 0.9846915602684021,grad_norm: 0.7716720208640743, iteration: 375180
loss: 1.0176926851272583,grad_norm: 0.6746548154908429, iteration: 375181
loss: 1.0114858150482178,grad_norm: 0.8405505318696491, iteration: 375182
loss: 0.9788289070129395,grad_norm: 0.8324120881992684, iteration: 375183
loss: 1.004157543182373,grad_norm: 0.6825781475576604, iteration: 375184
loss: 1.0220166444778442,grad_norm: 0.8059947322756454, iteration: 375185
loss: 1.0155446529388428,grad_norm: 0.8152141706178202, iteration: 375186
loss: 1.029007911682129,grad_norm: 0.8485691168797289, iteration: 375187
loss: 1.0074975490570068,grad_norm: 0.7304835553923062, iteration: 375188
loss: 1.0171934366226196,grad_norm: 0.9389791622070107, iteration: 375189
loss: 1.0272589921951294,grad_norm: 0.8215942250863348, iteration: 375190
loss: 1.010479211807251,grad_norm: 0.9127950579120273, iteration: 375191
loss: 1.004578709602356,grad_norm: 0.8550409588474498, iteration: 375192
loss: 0.9977515339851379,grad_norm: 0.7802138886112274, iteration: 375193
loss: 0.9689609408378601,grad_norm: 0.9788809334135412, iteration: 375194
loss: 0.9569255113601685,grad_norm: 0.8252209788664319, iteration: 375195
loss: 1.0343968868255615,grad_norm: 0.9999992829165606, iteration: 375196
loss: 1.1331120729446411,grad_norm: 0.9999998822845766, iteration: 375197
loss: 1.003967046737671,grad_norm: 0.792938783921897, iteration: 375198
loss: 1.0401029586791992,grad_norm: 0.7889020834308023, iteration: 375199
loss: 1.0098634958267212,grad_norm: 0.788485186168954, iteration: 375200
loss: 1.0152006149291992,grad_norm: 0.6308140875313641, iteration: 375201
loss: 1.0525175333023071,grad_norm: 0.7972174478784869, iteration: 375202
loss: 1.020755648612976,grad_norm: 0.9081664373369581, iteration: 375203
loss: 0.9968006014823914,grad_norm: 0.8055870658598405, iteration: 375204
loss: 1.0094271898269653,grad_norm: 0.8501321209054744, iteration: 375205
loss: 1.0179685354232788,grad_norm: 0.707539850656434, iteration: 375206
loss: 0.9819675087928772,grad_norm: 0.7204685967230479, iteration: 375207
loss: 0.9878069758415222,grad_norm: 0.7505576345209557, iteration: 375208
loss: 0.9926990270614624,grad_norm: 0.6550739777357567, iteration: 375209
loss: 1.0109226703643799,grad_norm: 0.9645895810494407, iteration: 375210
loss: 1.0047593116760254,grad_norm: 0.7456688505267546, iteration: 375211
loss: 0.9795364141464233,grad_norm: 0.7689602612181166, iteration: 375212
loss: 0.9689809083938599,grad_norm: 0.8558594211429553, iteration: 375213
loss: 1.0326205492019653,grad_norm: 0.9999990676089989, iteration: 375214
loss: 1.009289026260376,grad_norm: 0.8182204162936245, iteration: 375215
loss: 1.0271962881088257,grad_norm: 0.8912607344261402, iteration: 375216
loss: 1.0186265707015991,grad_norm: 0.9999991282388329, iteration: 375217
loss: 0.9607536792755127,grad_norm: 0.832739398758459, iteration: 375218
loss: 1.0301910638809204,grad_norm: 0.8739009013256632, iteration: 375219
loss: 1.0084435939788818,grad_norm: 0.6766265078370924, iteration: 375220
loss: 0.9332771301269531,grad_norm: 0.8004220277832222, iteration: 375221
loss: 1.0089596509933472,grad_norm: 0.711238577220627, iteration: 375222
loss: 0.9860029816627502,grad_norm: 0.9416345468784931, iteration: 375223
loss: 0.9817794561386108,grad_norm: 0.9151054462048284, iteration: 375224
loss: 0.9789499640464783,grad_norm: 0.740000400601117, iteration: 375225
loss: 1.0303828716278076,grad_norm: 0.7126522378270318, iteration: 375226
loss: 1.017445683479309,grad_norm: 0.8452326811196672, iteration: 375227
loss: 1.0168297290802002,grad_norm: 0.7700026861234421, iteration: 375228
loss: 0.9743446707725525,grad_norm: 0.6707051721014405, iteration: 375229
loss: 1.024596929550171,grad_norm: 0.8734195706359049, iteration: 375230
loss: 0.9852625727653503,grad_norm: 0.8249571536535772, iteration: 375231
loss: 0.9706356525421143,grad_norm: 0.7811494931955588, iteration: 375232
loss: 0.9941309690475464,grad_norm: 0.7365099016894305, iteration: 375233
loss: 0.9797092080116272,grad_norm: 0.9671262570423952, iteration: 375234
loss: 1.0265107154846191,grad_norm: 0.8224541496538512, iteration: 375235
loss: 1.0116957426071167,grad_norm: 0.8065219246545589, iteration: 375236
loss: 1.0086405277252197,grad_norm: 0.8531718556262428, iteration: 375237
loss: 0.9824758768081665,grad_norm: 0.9999992323757836, iteration: 375238
loss: 1.0515395402908325,grad_norm: 0.8614102915536352, iteration: 375239
loss: 1.0053174495697021,grad_norm: 0.8546499527281228, iteration: 375240
loss: 1.0031946897506714,grad_norm: 0.9999990472339568, iteration: 375241
loss: 1.139818787574768,grad_norm: 0.891729462582798, iteration: 375242
loss: 1.0251222848892212,grad_norm: 0.999999683203222, iteration: 375243
loss: 0.990937352180481,grad_norm: 0.7714741974664886, iteration: 375244
loss: 0.9794419407844543,grad_norm: 0.7326273986668143, iteration: 375245
loss: 0.9871120452880859,grad_norm: 0.9999996285727822, iteration: 375246
loss: 0.9718320965766907,grad_norm: 0.6745903497299447, iteration: 375247
loss: 1.0979163646697998,grad_norm: 0.9999991602064355, iteration: 375248
loss: 1.0442008972167969,grad_norm: 0.8527593908656124, iteration: 375249
loss: 0.9839709997177124,grad_norm: 0.7246761385396866, iteration: 375250
loss: 1.0081487894058228,grad_norm: 0.8780854549510797, iteration: 375251
loss: 1.0013519525527954,grad_norm: 0.924253616437651, iteration: 375252
loss: 1.015596628189087,grad_norm: 0.9999993387139217, iteration: 375253
loss: 0.984498143196106,grad_norm: 0.8080185410845496, iteration: 375254
loss: 0.9855329394340515,grad_norm: 0.8356669856734564, iteration: 375255
loss: 0.9927695989608765,grad_norm: 0.8480919456167748, iteration: 375256
loss: 1.0040695667266846,grad_norm: 0.7546918230825838, iteration: 375257
loss: 0.9932140707969666,grad_norm: 0.8309659646387254, iteration: 375258
loss: 0.9823264479637146,grad_norm: 0.8461722901825112, iteration: 375259
loss: 0.9626169800758362,grad_norm: 0.7890617684075203, iteration: 375260
loss: 1.01365327835083,grad_norm: 0.9123369359279188, iteration: 375261
loss: 1.0067108869552612,grad_norm: 0.772919125532945, iteration: 375262
loss: 0.9827407598495483,grad_norm: 0.8935244493507991, iteration: 375263
loss: 0.9623517990112305,grad_norm: 0.6678147603734076, iteration: 375264
loss: 1.0149282217025757,grad_norm: 0.7879909549259915, iteration: 375265
loss: 1.004848837852478,grad_norm: 0.6675521093274752, iteration: 375266
loss: 1.209786057472229,grad_norm: 0.9999995221872225, iteration: 375267
loss: 0.9991979002952576,grad_norm: 0.8400834863014129, iteration: 375268
loss: 1.004892110824585,grad_norm: 0.93427259095098, iteration: 375269
loss: 1.010887861251831,grad_norm: 0.7735725950460718, iteration: 375270
loss: 1.0424542427062988,grad_norm: 0.8388190622414096, iteration: 375271
loss: 0.995588481426239,grad_norm: 0.8835936165424099, iteration: 375272
loss: 0.9908152222633362,grad_norm: 0.7385787415833791, iteration: 375273
loss: 1.040183186531067,grad_norm: 0.6486899361708056, iteration: 375274
loss: 1.002431035041809,grad_norm: 0.9999990501330989, iteration: 375275
loss: 0.9742697477340698,grad_norm: 0.7558681823074254, iteration: 375276
loss: 1.019320011138916,grad_norm: 0.7668376894940123, iteration: 375277
loss: 0.9937697052955627,grad_norm: 0.9474691697679073, iteration: 375278
loss: 0.9926289916038513,grad_norm: 0.9999996006890289, iteration: 375279
loss: 0.9933528304100037,grad_norm: 0.8760069765194838, iteration: 375280
loss: 1.002030849456787,grad_norm: 0.7278292384464785, iteration: 375281
loss: 1.019349217414856,grad_norm: 0.999999976199391, iteration: 375282
loss: 0.9914260506629944,grad_norm: 0.6266258179370768, iteration: 375283
loss: 0.9986242651939392,grad_norm: 0.8411563998154119, iteration: 375284
loss: 1.0138992071151733,grad_norm: 0.9999225741383913, iteration: 375285
loss: 1.0368956327438354,grad_norm: 0.7911161290211461, iteration: 375286
loss: 0.9793010354042053,grad_norm: 0.8653206407693752, iteration: 375287
loss: 1.0666230916976929,grad_norm: 1.0000000533657776, iteration: 375288
loss: 1.1176356077194214,grad_norm: 0.9999998280978907, iteration: 375289
loss: 0.9867479801177979,grad_norm: 0.8143966183024673, iteration: 375290
loss: 1.0147960186004639,grad_norm: 0.7308762257317029, iteration: 375291
loss: 1.0251156091690063,grad_norm: 0.8332769157731302, iteration: 375292
loss: 1.023069977760315,grad_norm: 0.9999990794684819, iteration: 375293
loss: 1.0091757774353027,grad_norm: 0.7531203504296659, iteration: 375294
loss: 1.0214232206344604,grad_norm: 0.7233087545891195, iteration: 375295
loss: 1.0238784551620483,grad_norm: 0.7434700966908339, iteration: 375296
loss: 0.9857116341590881,grad_norm: 0.8657938003654656, iteration: 375297
loss: 1.0081613063812256,grad_norm: 0.7015288612941364, iteration: 375298
loss: 1.036199688911438,grad_norm: 0.9665562501888239, iteration: 375299
loss: 0.952580451965332,grad_norm: 0.8920182730202997, iteration: 375300
loss: 1.0255770683288574,grad_norm: 0.9132421288678382, iteration: 375301
loss: 0.9917635917663574,grad_norm: 0.8597496056937122, iteration: 375302
loss: 1.008074402809143,grad_norm: 0.7102253022970001, iteration: 375303
loss: 0.9840924143791199,grad_norm: 0.9999994581474907, iteration: 375304
loss: 1.0211364030838013,grad_norm: 0.9912992015322148, iteration: 375305
loss: 0.9878318905830383,grad_norm: 0.7808585936198486, iteration: 375306
loss: 0.9943149089813232,grad_norm: 0.8014381851893984, iteration: 375307
loss: 0.9659265875816345,grad_norm: 0.9999990178739095, iteration: 375308
loss: 0.961947500705719,grad_norm: 0.8057273312769143, iteration: 375309
loss: 1.0394598245620728,grad_norm: 0.8454943028059597, iteration: 375310
loss: 1.0389976501464844,grad_norm: 0.8624874127749015, iteration: 375311
loss: 0.9535638093948364,grad_norm: 0.807384557897054, iteration: 375312
loss: 0.9832477569580078,grad_norm: 0.7243604406382834, iteration: 375313
loss: 0.980731189250946,grad_norm: 0.7448352304391288, iteration: 375314
loss: 1.0289379358291626,grad_norm: 0.803750951756566, iteration: 375315
loss: 0.98506760597229,grad_norm: 0.791700168336323, iteration: 375316
loss: 1.0687668323516846,grad_norm: 0.940304584418793, iteration: 375317
loss: 1.016052484512329,grad_norm: 0.9293202327936511, iteration: 375318
loss: 0.984488308429718,grad_norm: 0.8680233603212278, iteration: 375319
loss: 1.0275570154190063,grad_norm: 0.9999997799084294, iteration: 375320
loss: 1.0091520547866821,grad_norm: 0.7624764426100703, iteration: 375321
loss: 0.974672794342041,grad_norm: 0.6680624158195324, iteration: 375322
loss: 0.9894214272499084,grad_norm: 0.8222174503250623, iteration: 375323
loss: 1.0005425214767456,grad_norm: 0.756162605036677, iteration: 375324
loss: 1.0271004438400269,grad_norm: 0.8773302371285936, iteration: 375325
loss: 1.0301486253738403,grad_norm: 0.9999990210488698, iteration: 375326
loss: 1.0094366073608398,grad_norm: 0.7502670416263583, iteration: 375327
loss: 1.0987474918365479,grad_norm: 0.9534549968575537, iteration: 375328
loss: 0.9967890381813049,grad_norm: 0.9999995375938019, iteration: 375329
loss: 1.0007030963897705,grad_norm: 0.9416602443480953, iteration: 375330
loss: 1.0093492269515991,grad_norm: 0.9651997897138108, iteration: 375331
loss: 0.997934877872467,grad_norm: 0.6950189549128298, iteration: 375332
loss: 1.0393697023391724,grad_norm: 0.7170012666515754, iteration: 375333
loss: 0.9910796880722046,grad_norm: 0.7215034982598398, iteration: 375334
loss: 1.0053211450576782,grad_norm: 0.999999127904839, iteration: 375335
loss: 0.9986674189567566,grad_norm: 0.8896881167085905, iteration: 375336
loss: 1.0528013706207275,grad_norm: 0.741003671167256, iteration: 375337
loss: 1.0111680030822754,grad_norm: 0.8092438790204939, iteration: 375338
loss: 1.027245283126831,grad_norm: 0.722942515116119, iteration: 375339
loss: 0.9838352799415588,grad_norm: 0.7429108657451972, iteration: 375340
loss: 0.992143452167511,grad_norm: 0.7012692918842921, iteration: 375341
loss: 1.0010242462158203,grad_norm: 0.8407743422332733, iteration: 375342
loss: 0.9953231811523438,grad_norm: 0.8537185136807001, iteration: 375343
loss: 1.0657107830047607,grad_norm: 0.9999991467479227, iteration: 375344
loss: 1.0217907428741455,grad_norm: 0.9933319145262433, iteration: 375345
loss: 1.0183022022247314,grad_norm: 0.7500018608680744, iteration: 375346
loss: 1.0125439167022705,grad_norm: 0.8013143745593669, iteration: 375347
loss: 1.018794059753418,grad_norm: 0.8003068867612537, iteration: 375348
loss: 1.0481173992156982,grad_norm: 0.9999991687822017, iteration: 375349
loss: 1.0201563835144043,grad_norm: 0.7321124044172976, iteration: 375350
loss: 1.037652850151062,grad_norm: 0.9999992896487595, iteration: 375351
loss: 0.971696674823761,grad_norm: 0.9999989772439986, iteration: 375352
loss: 1.0168262720108032,grad_norm: 0.7736840358343133, iteration: 375353
loss: 1.0055477619171143,grad_norm: 0.735783936884606, iteration: 375354
loss: 1.0079584121704102,grad_norm: 0.7073954178520464, iteration: 375355
loss: 1.0079063177108765,grad_norm: 0.7900328670389377, iteration: 375356
loss: 0.961710512638092,grad_norm: 0.7403029839017332, iteration: 375357
loss: 1.0411114692687988,grad_norm: 0.7353020358673016, iteration: 375358
loss: 1.0386475324630737,grad_norm: 0.9999991646913191, iteration: 375359
loss: 0.9849410057067871,grad_norm: 0.7125671569407883, iteration: 375360
loss: 1.010359287261963,grad_norm: 0.882964281052401, iteration: 375361
loss: 0.9961832165718079,grad_norm: 0.9735270675340958, iteration: 375362
loss: 0.9844940304756165,grad_norm: 0.7628285809803076, iteration: 375363
loss: 0.9290358424186707,grad_norm: 0.8840397654538219, iteration: 375364
loss: 0.961122989654541,grad_norm: 0.8539862544849798, iteration: 375365
loss: 1.0154271125793457,grad_norm: 0.6571071556208816, iteration: 375366
loss: 1.0226470232009888,grad_norm: 0.8869870294380907, iteration: 375367
loss: 1.0309321880340576,grad_norm: 0.9532857353688262, iteration: 375368
loss: 1.010168194770813,grad_norm: 0.8172140897001935, iteration: 375369
loss: 1.0140435695648193,grad_norm: 0.7806489479541256, iteration: 375370
loss: 0.9909422993659973,grad_norm: 0.6854749943707472, iteration: 375371
loss: 1.0024701356887817,grad_norm: 0.8533356555471113, iteration: 375372
loss: 1.0056524276733398,grad_norm: 0.790141524750087, iteration: 375373
loss: 1.0525195598602295,grad_norm: 0.8107724850834451, iteration: 375374
loss: 0.9800001978874207,grad_norm: 0.9875774971934859, iteration: 375375
loss: 1.0261516571044922,grad_norm: 0.8355621040500069, iteration: 375376
loss: 1.0225452184677124,grad_norm: 0.8874396293273602, iteration: 375377
loss: 1.0707231760025024,grad_norm: 0.9999996619490426, iteration: 375378
loss: 1.0461692810058594,grad_norm: 0.9057805686010528, iteration: 375379
loss: 0.9934839010238647,grad_norm: 0.7341269927253122, iteration: 375380
loss: 1.0017402172088623,grad_norm: 0.9188315704842778, iteration: 375381
loss: 1.010999321937561,grad_norm: 0.608442560610526, iteration: 375382
loss: 0.9996805787086487,grad_norm: 0.8226936888870736, iteration: 375383
loss: 1.0307905673980713,grad_norm: 0.9999994933742105, iteration: 375384
loss: 1.0813661813735962,grad_norm: 0.7257104357467931, iteration: 375385
loss: 0.9917170405387878,grad_norm: 0.7844427708063709, iteration: 375386
loss: 1.0014948844909668,grad_norm: 0.7954681875553857, iteration: 375387
loss: 1.0438936948776245,grad_norm: 0.9282505205713817, iteration: 375388
loss: 0.9960638284683228,grad_norm: 0.9375999439358036, iteration: 375389
loss: 0.9961712956428528,grad_norm: 0.7689748648823134, iteration: 375390
loss: 1.0327662229537964,grad_norm: 0.9632844635268334, iteration: 375391
loss: 0.9774757027626038,grad_norm: 0.7190689418395535, iteration: 375392
loss: 1.0314823389053345,grad_norm: 0.9999992181111373, iteration: 375393
loss: 1.0074928998947144,grad_norm: 0.9355110551565922, iteration: 375394
loss: 0.9442251920700073,grad_norm: 0.7239595051005131, iteration: 375395
loss: 0.9987826347351074,grad_norm: 0.7106323416282202, iteration: 375396
loss: 1.0550484657287598,grad_norm: 0.9999998627061037, iteration: 375397
loss: 1.0017696619033813,grad_norm: 0.8031707654003337, iteration: 375398
loss: 1.007519006729126,grad_norm: 0.7757143707214172, iteration: 375399
loss: 1.047979712486267,grad_norm: 0.6989710578156048, iteration: 375400
loss: 1.12636137008667,grad_norm: 0.9999991149104633, iteration: 375401
loss: 1.0021775960922241,grad_norm: 0.8825486842600812, iteration: 375402
loss: 1.0206937789916992,grad_norm: 0.9655254232534881, iteration: 375403
loss: 0.9900615215301514,grad_norm: 0.7626757990488978, iteration: 375404
loss: 0.9628541469573975,grad_norm: 0.7522689362767992, iteration: 375405
loss: 1.0345646142959595,grad_norm: 0.999999156431927, iteration: 375406
loss: 0.980049192905426,grad_norm: 0.7181159315431493, iteration: 375407
loss: 0.960672914981842,grad_norm: 0.9999990305492311, iteration: 375408
loss: 1.0244760513305664,grad_norm: 0.8159016812675166, iteration: 375409
loss: 0.9510366916656494,grad_norm: 0.727946943386081, iteration: 375410
loss: 1.1421178579330444,grad_norm: 0.8415684632886216, iteration: 375411
loss: 1.0272380113601685,grad_norm: 0.6522422859610693, iteration: 375412
loss: 1.001627802848816,grad_norm: 0.7341219984427186, iteration: 375413
loss: 0.9725919365882874,grad_norm: 0.8965288734952102, iteration: 375414
loss: 1.0087254047393799,grad_norm: 0.8030482592015248, iteration: 375415
loss: 0.977412760257721,grad_norm: 0.8805619566806694, iteration: 375416
loss: 0.9953466653823853,grad_norm: 0.8198077303607848, iteration: 375417
loss: 1.0212305784225464,grad_norm: 0.9243748737185291, iteration: 375418
loss: 1.0022169351577759,grad_norm: 0.675704913616835, iteration: 375419
loss: 1.0247503519058228,grad_norm: 0.9999991637703641, iteration: 375420
loss: 1.0649549961090088,grad_norm: 0.9999994521958171, iteration: 375421
loss: 0.9941645860671997,grad_norm: 0.7589403640497853, iteration: 375422
loss: 0.9936761856079102,grad_norm: 0.7817372584783336, iteration: 375423
loss: 1.0255540609359741,grad_norm: 0.7004015616206081, iteration: 375424
loss: 1.0464946031570435,grad_norm: 0.9690402930773979, iteration: 375425
loss: 1.010558009147644,grad_norm: 0.724043045543927, iteration: 375426
loss: 1.0111571550369263,grad_norm: 0.8395195011226204, iteration: 375427
loss: 1.0265109539031982,grad_norm: 0.8252771635993902, iteration: 375428
loss: 0.9971489310264587,grad_norm: 0.6208475136181038, iteration: 375429
loss: 0.9793907403945923,grad_norm: 0.8326181226203021, iteration: 375430
loss: 0.9859216809272766,grad_norm: 0.7202314760933485, iteration: 375431
loss: 1.0523192882537842,grad_norm: 0.8226562177313473, iteration: 375432
loss: 1.011013150215149,grad_norm: 0.8033532381702012, iteration: 375433
loss: 1.0129673480987549,grad_norm: 0.8403659023816726, iteration: 375434
loss: 1.0522150993347168,grad_norm: 0.8023864654790872, iteration: 375435
loss: 1.0192416906356812,grad_norm: 0.6534804278924252, iteration: 375436
loss: 0.9904965162277222,grad_norm: 0.9255913243267254, iteration: 375437
loss: 1.0085211992263794,grad_norm: 0.9999993376765149, iteration: 375438
loss: 1.0578196048736572,grad_norm: 0.9071207298007381, iteration: 375439
loss: 1.0195447206497192,grad_norm: 0.8893269860454764, iteration: 375440
loss: 1.0798060894012451,grad_norm: 0.9999994439544099, iteration: 375441
loss: 1.011375904083252,grad_norm: 0.8916887046890253, iteration: 375442
loss: 0.9854928851127625,grad_norm: 0.9999998054760831, iteration: 375443
loss: 1.0172165632247925,grad_norm: 0.7763146666530982, iteration: 375444
loss: 0.9910068511962891,grad_norm: 0.7638872648681458, iteration: 375445
loss: 1.01211416721344,grad_norm: 0.9556078771030656, iteration: 375446
loss: 1.0176639556884766,grad_norm: 0.8900738418437923, iteration: 375447
loss: 1.006596326828003,grad_norm: 0.8798359337056955, iteration: 375448
loss: 0.9981380701065063,grad_norm: 0.7743114551784734, iteration: 375449
loss: 1.0154297351837158,grad_norm: 0.8776746135253422, iteration: 375450
loss: 1.0495872497558594,grad_norm: 0.8412397525130679, iteration: 375451
loss: 0.9716095924377441,grad_norm: 0.6588590294167129, iteration: 375452
loss: 1.0150882005691528,grad_norm: 0.6752669409451629, iteration: 375453
loss: 0.9806854128837585,grad_norm: 0.9999990612425562, iteration: 375454
loss: 1.0335204601287842,grad_norm: 0.8705307207094366, iteration: 375455
loss: 1.0161851644515991,grad_norm: 0.7831514045163738, iteration: 375456
loss: 0.9995322823524475,grad_norm: 0.9367200883910202, iteration: 375457
loss: 1.0164021253585815,grad_norm: 0.800977709904964, iteration: 375458
loss: 1.009778380393982,grad_norm: 0.8182936443324891, iteration: 375459
loss: 1.0094417333602905,grad_norm: 0.7329475695204839, iteration: 375460
loss: 1.014838695526123,grad_norm: 0.9999992869618766, iteration: 375461
loss: 1.0330451726913452,grad_norm: 0.9798238353520333, iteration: 375462
loss: 1.049896001815796,grad_norm: 0.7475381695221182, iteration: 375463
loss: 0.9839886426925659,grad_norm: 0.8751785254562252, iteration: 375464
loss: 0.9784970283508301,grad_norm: 0.7445536055954152, iteration: 375465
loss: 1.1127865314483643,grad_norm: 0.9999993512113515, iteration: 375466
loss: 0.9811958074569702,grad_norm: 0.8094120205224031, iteration: 375467
loss: 0.985085129737854,grad_norm: 0.6644789647771464, iteration: 375468
loss: 1.0305113792419434,grad_norm: 0.999999861608382, iteration: 375469
loss: 1.0936694145202637,grad_norm: 0.9999992958654398, iteration: 375470
loss: 1.0359714031219482,grad_norm: 0.9999998882124715, iteration: 375471
loss: 1.007766604423523,grad_norm: 0.8029550202968555, iteration: 375472
loss: 0.9641386866569519,grad_norm: 0.7468684009095179, iteration: 375473
loss: 1.0110708475112915,grad_norm: 0.8782445294392008, iteration: 375474
loss: 1.0037420988082886,grad_norm: 0.736199485196923, iteration: 375475
loss: 1.0966284275054932,grad_norm: 0.999999875551106, iteration: 375476
loss: 1.251766562461853,grad_norm: 0.9999998632042065, iteration: 375477
loss: 0.9873038530349731,grad_norm: 0.9999992746139937, iteration: 375478
loss: 1.0331660509109497,grad_norm: 0.951305033597924, iteration: 375479
loss: 1.0210448503494263,grad_norm: 0.9999998859018788, iteration: 375480
loss: 1.0546551942825317,grad_norm: 0.9999991965161282, iteration: 375481
loss: 1.0274778604507446,grad_norm: 0.9999991910493858, iteration: 375482
loss: 0.9635900855064392,grad_norm: 0.7766663805498457, iteration: 375483
loss: 1.0097644329071045,grad_norm: 0.7889225340714698, iteration: 375484
loss: 1.0013395547866821,grad_norm: 0.8659586607517435, iteration: 375485
loss: 0.9692743420600891,grad_norm: 0.7698392569437209, iteration: 375486
loss: 0.9698951244354248,grad_norm: 0.7973416337012571, iteration: 375487
loss: 1.0004572868347168,grad_norm: 0.9999990613637665, iteration: 375488
loss: 1.0237419605255127,grad_norm: 0.8262889066310297, iteration: 375489
loss: 1.001961350440979,grad_norm: 0.7474081664249488, iteration: 375490
loss: 0.9764960408210754,grad_norm: 0.7153868200820458, iteration: 375491
loss: 0.9740729331970215,grad_norm: 0.7454967335101225, iteration: 375492
loss: 0.9792261719703674,grad_norm: 0.7437084042154217, iteration: 375493
loss: 1.0314615964889526,grad_norm: 0.7315647443452131, iteration: 375494
loss: 1.0213435888290405,grad_norm: 0.8373424098517404, iteration: 375495
loss: 0.9753281474113464,grad_norm: 0.7039800746186844, iteration: 375496
loss: 1.012296438217163,grad_norm: 0.7619369573052847, iteration: 375497
loss: 1.0131337642669678,grad_norm: 0.758902220286809, iteration: 375498
loss: 0.9781341552734375,grad_norm: 0.7252056918149158, iteration: 375499
loss: 0.9890498518943787,grad_norm: 0.7396812266008069, iteration: 375500
loss: 1.004678726196289,grad_norm: 0.8895171301627993, iteration: 375501
loss: 1.013090968132019,grad_norm: 0.8223905662890857, iteration: 375502
loss: 1.0294458866119385,grad_norm: 0.7744680064366173, iteration: 375503
loss: 1.0116910934448242,grad_norm: 0.7068142660012665, iteration: 375504
loss: 0.9962074160575867,grad_norm: 0.711518120622311, iteration: 375505
loss: 1.0046641826629639,grad_norm: 0.7216376903110161, iteration: 375506
loss: 0.9931525588035583,grad_norm: 0.8647294676976075, iteration: 375507
loss: 1.0013689994812012,grad_norm: 0.6559536949058373, iteration: 375508
loss: 1.0206139087677002,grad_norm: 0.8550716689527955, iteration: 375509
loss: 0.9876513481140137,grad_norm: 0.9999991691920365, iteration: 375510
loss: 1.0090065002441406,grad_norm: 0.9192415410495869, iteration: 375511
loss: 1.004432201385498,grad_norm: 0.7478572994892858, iteration: 375512
loss: 1.0678284168243408,grad_norm: 0.8984962370966633, iteration: 375513
loss: 0.9943084716796875,grad_norm: 0.8777845498513478, iteration: 375514
loss: 1.00947904586792,grad_norm: 0.8270592682266584, iteration: 375515
loss: 0.9753595590591431,grad_norm: 0.9602025405084481, iteration: 375516
loss: 1.005273461341858,grad_norm: 0.7629670357412421, iteration: 375517
loss: 0.999198317527771,grad_norm: 0.9149296837432781, iteration: 375518
loss: 0.9928990602493286,grad_norm: 0.8242331587185867, iteration: 375519
loss: 0.9703558683395386,grad_norm: 0.9999992883356803, iteration: 375520
loss: 1.0595732927322388,grad_norm: 0.9999999337536486, iteration: 375521
loss: 0.9830097556114197,grad_norm: 0.9999991039714091, iteration: 375522
loss: 1.0235408544540405,grad_norm: 0.7984122511152663, iteration: 375523
loss: 1.0046133995056152,grad_norm: 0.8821942886237141, iteration: 375524
loss: 1.0025279521942139,grad_norm: 0.7162980122426676, iteration: 375525
loss: 0.9937600493431091,grad_norm: 0.7408363673541201, iteration: 375526
loss: 1.0077614784240723,grad_norm: 0.798119788204808, iteration: 375527
loss: 1.0333516597747803,grad_norm: 0.6567906010321042, iteration: 375528
loss: 0.993398129940033,grad_norm: 0.866692771481668, iteration: 375529
loss: 0.9555866718292236,grad_norm: 0.9454890942618541, iteration: 375530
loss: 0.9697935581207275,grad_norm: 0.9133532703089315, iteration: 375531
loss: 0.9761650562286377,grad_norm: 0.9948411963536227, iteration: 375532
loss: 0.9835895299911499,grad_norm: 0.8616507224123798, iteration: 375533
loss: 1.0201936960220337,grad_norm: 0.8325892232654194, iteration: 375534
loss: 1.047338843345642,grad_norm: 0.999999228544469, iteration: 375535
loss: 0.9835472702980042,grad_norm: 0.741149293864572, iteration: 375536
loss: 0.9809094667434692,grad_norm: 0.8015318773716552, iteration: 375537
loss: 1.0033602714538574,grad_norm: 0.9999997173591458, iteration: 375538
loss: 1.0045703649520874,grad_norm: 0.685445911216575, iteration: 375539
loss: 1.0108511447906494,grad_norm: 0.8281646612355045, iteration: 375540
loss: 1.120556354522705,grad_norm: 1.0000000152159807, iteration: 375541
loss: 1.0094749927520752,grad_norm: 0.7085066818771758, iteration: 375542
loss: 0.9925862550735474,grad_norm: 0.7513709426134526, iteration: 375543
loss: 0.9903948903083801,grad_norm: 0.9999991352583194, iteration: 375544
loss: 0.9721502661705017,grad_norm: 0.8671302925818047, iteration: 375545
loss: 0.9905059933662415,grad_norm: 0.7257376339931876, iteration: 375546
loss: 0.9988462924957275,grad_norm: 0.7585856353968758, iteration: 375547
loss: 1.1150699853897095,grad_norm: 0.8847798420332379, iteration: 375548
loss: 0.9779801368713379,grad_norm: 0.8184752932815252, iteration: 375549
loss: 1.0007522106170654,grad_norm: 0.9999991938779116, iteration: 375550
loss: 1.0168366432189941,grad_norm: 0.7551780260787662, iteration: 375551
loss: 0.9975390434265137,grad_norm: 0.8602220473945293, iteration: 375552
loss: 1.0183426141738892,grad_norm: 0.7457391419670816, iteration: 375553
loss: 0.9813202619552612,grad_norm: 0.692637927190294, iteration: 375554
loss: 1.2554830312728882,grad_norm: 0.9999992637503872, iteration: 375555
loss: 1.026503324508667,grad_norm: 0.9999998036544989, iteration: 375556
loss: 0.9719722270965576,grad_norm: 0.8759995193233646, iteration: 375557
loss: 1.06753671169281,grad_norm: 0.7762363105088255, iteration: 375558
loss: 0.9890787601470947,grad_norm: 0.725440670559373, iteration: 375559
loss: 1.0195032358169556,grad_norm: 0.8414720589172275, iteration: 375560
loss: 1.0125831365585327,grad_norm: 0.7987036219275693, iteration: 375561
loss: 1.0164058208465576,grad_norm: 0.7876426675312597, iteration: 375562
loss: 0.9840676784515381,grad_norm: 0.7168875031941678, iteration: 375563
loss: 0.9823018908500671,grad_norm: 0.7457298415233214, iteration: 375564
loss: 0.9848175048828125,grad_norm: 0.798511168307725, iteration: 375565
loss: 0.9999569654464722,grad_norm: 0.772126742835013, iteration: 375566
loss: 1.0444135665893555,grad_norm: 0.9999996332791592, iteration: 375567
loss: 1.0175187587738037,grad_norm: 0.68891066683714, iteration: 375568
loss: 1.0016385316848755,grad_norm: 0.8334928955946215, iteration: 375569
loss: 1.0118392705917358,grad_norm: 0.9300787452562183, iteration: 375570
loss: 1.0344960689544678,grad_norm: 0.9999997182829001, iteration: 375571
loss: 1.0066689252853394,grad_norm: 0.6821239887017316, iteration: 375572
loss: 0.9970158338546753,grad_norm: 0.7821398336470915, iteration: 375573
loss: 1.004031777381897,grad_norm: 0.9794055300791397, iteration: 375574
loss: 1.0197466611862183,grad_norm: 0.7595575103740753, iteration: 375575
loss: 0.9688862562179565,grad_norm: 0.7914786393951622, iteration: 375576
loss: 0.9630947709083557,grad_norm: 0.7565500428063459, iteration: 375577
loss: 1.0338517427444458,grad_norm: 0.7641241018277032, iteration: 375578
loss: 1.038112759590149,grad_norm: 0.757178338087733, iteration: 375579
loss: 1.0130295753479004,grad_norm: 0.8193300160435213, iteration: 375580
loss: 0.9719985723495483,grad_norm: 0.6939828798377687, iteration: 375581
loss: 0.9497138261795044,grad_norm: 0.6842911577115327, iteration: 375582
loss: 1.0084439516067505,grad_norm: 0.8428621518856069, iteration: 375583
loss: 1.0194453001022339,grad_norm: 0.7788168380950201, iteration: 375584
loss: 0.9531683921813965,grad_norm: 0.9032772345920642, iteration: 375585
loss: 0.9426246285438538,grad_norm: 0.7866401950476711, iteration: 375586
loss: 1.0229703187942505,grad_norm: 0.857143738724277, iteration: 375587
loss: 1.0325700044631958,grad_norm: 0.9999998422910418, iteration: 375588
loss: 0.956990122795105,grad_norm: 0.7061444189367785, iteration: 375589
loss: 1.006007432937622,grad_norm: 0.9651790562130736, iteration: 375590
loss: 0.9590752720832825,grad_norm: 0.9454714636498387, iteration: 375591
loss: 1.0052857398986816,grad_norm: 0.9017851668429469, iteration: 375592
loss: 1.0180354118347168,grad_norm: 0.8483593345532339, iteration: 375593
loss: 1.0287878513336182,grad_norm: 0.7957170970219185, iteration: 375594
loss: 0.9597277045249939,grad_norm: 0.8209244662884899, iteration: 375595
loss: 0.9805842041969299,grad_norm: 0.8066958828731116, iteration: 375596
loss: 1.0146737098693848,grad_norm: 0.7484860820666204, iteration: 375597
loss: 0.9729786515235901,grad_norm: 0.8536953292803146, iteration: 375598
loss: 1.0797889232635498,grad_norm: 0.9999992589830954, iteration: 375599
loss: 0.9774548411369324,grad_norm: 0.9122415804553281, iteration: 375600
loss: 1.0318825244903564,grad_norm: 0.9588196071290791, iteration: 375601
loss: 0.9500325322151184,grad_norm: 0.7064620774175792, iteration: 375602
loss: 0.9537081718444824,grad_norm: 0.8852763184069423, iteration: 375603
loss: 1.005220651626587,grad_norm: 0.8524422865372923, iteration: 375604
loss: 1.0093234777450562,grad_norm: 0.8438753711999543, iteration: 375605
loss: 0.980954647064209,grad_norm: 0.783375221545083, iteration: 375606
loss: 1.0208604335784912,grad_norm: 0.7790432367075234, iteration: 375607
loss: 1.0183707475662231,grad_norm: 0.834677838741692, iteration: 375608
loss: 0.9955622553825378,grad_norm: 0.7834468415710201, iteration: 375609
loss: 0.9851819276809692,grad_norm: 0.6480812607156688, iteration: 375610
loss: 1.0037224292755127,grad_norm: 0.8554406518322232, iteration: 375611
loss: 1.0219312906265259,grad_norm: 0.7544492801596989, iteration: 375612
loss: 0.9883744716644287,grad_norm: 0.7346200672949645, iteration: 375613
loss: 0.9672897458076477,grad_norm: 0.7903488860983016, iteration: 375614
loss: 0.9910987019538879,grad_norm: 0.75426537796035, iteration: 375615
loss: 0.9774934649467468,grad_norm: 0.8942929514266859, iteration: 375616
loss: 1.007570743560791,grad_norm: 0.7880532659724817, iteration: 375617
loss: 0.9894623756408691,grad_norm: 0.9828909812761957, iteration: 375618
loss: 0.985882580280304,grad_norm: 0.6947861391946089, iteration: 375619
loss: 1.0009186267852783,grad_norm: 0.854009689396441, iteration: 375620
loss: 0.9758148193359375,grad_norm: 0.8923347639099287, iteration: 375621
loss: 0.9925215840339661,grad_norm: 0.7909252993953183, iteration: 375622
loss: 1.0135928392410278,grad_norm: 0.7336850127803742, iteration: 375623
loss: 0.9806320071220398,grad_norm: 0.8211535463879118, iteration: 375624
loss: 1.011928915977478,grad_norm: 0.7884188501105414, iteration: 375625
loss: 1.0319530963897705,grad_norm: 0.8509675145435885, iteration: 375626
loss: 0.9967833757400513,grad_norm: 0.7718618071301951, iteration: 375627
loss: 0.9937264919281006,grad_norm: 0.8007456911992897, iteration: 375628
loss: 0.9691636562347412,grad_norm: 0.7073635367857282, iteration: 375629
loss: 1.0091582536697388,grad_norm: 0.725951885516873, iteration: 375630
loss: 0.9941110610961914,grad_norm: 0.8492823371567201, iteration: 375631
loss: 1.0056567192077637,grad_norm: 0.7054501241745463, iteration: 375632
loss: 1.0300755500793457,grad_norm: 0.8029040183218631, iteration: 375633
loss: 0.9961075782775879,grad_norm: 0.9543833088161828, iteration: 375634
loss: 1.0160964727401733,grad_norm: 0.7796016906228544, iteration: 375635
loss: 1.017106533050537,grad_norm: 0.7257375782360131, iteration: 375636
loss: 0.9735273718833923,grad_norm: 0.7782613385272976, iteration: 375637
loss: 0.9864274859428406,grad_norm: 0.7345527585726386, iteration: 375638
loss: 0.9993937015533447,grad_norm: 0.7827918813338376, iteration: 375639
loss: 1.0416256189346313,grad_norm: 0.8112432198479818, iteration: 375640
loss: 1.0099945068359375,grad_norm: 0.828392538795798, iteration: 375641
loss: 1.0000656843185425,grad_norm: 0.8069855963976855, iteration: 375642
loss: 0.9506686925888062,grad_norm: 0.801004401730681, iteration: 375643
loss: 1.0104948282241821,grad_norm: 0.7164511735933605, iteration: 375644
loss: 0.9954937696456909,grad_norm: 0.9552293935815508, iteration: 375645
loss: 1.0355169773101807,grad_norm: 0.8304427895947044, iteration: 375646
loss: 0.9809888601303101,grad_norm: 0.7459267616516251, iteration: 375647
loss: 0.9992892146110535,grad_norm: 0.7844337354156935, iteration: 375648
loss: 1.0221896171569824,grad_norm: 0.7352508344429217, iteration: 375649
loss: 0.9510058164596558,grad_norm: 0.814780126349091, iteration: 375650
loss: 0.9720363020896912,grad_norm: 0.8987020788529859, iteration: 375651
loss: 0.9963865876197815,grad_norm: 0.6865238188535638, iteration: 375652
loss: 0.9876531362533569,grad_norm: 0.7989922685769983, iteration: 375653
loss: 1.093306303024292,grad_norm: 0.9999994278923542, iteration: 375654
loss: 1.0047134160995483,grad_norm: 0.9999998873905493, iteration: 375655
loss: 1.0871846675872803,grad_norm: 0.8447507339282818, iteration: 375656
loss: 1.051512360572815,grad_norm: 0.8636297933513681, iteration: 375657
loss: 1.002331256866455,grad_norm: 0.6770623460099098, iteration: 375658
loss: 0.996027410030365,grad_norm: 0.7388444482134346, iteration: 375659
loss: 1.0857986211776733,grad_norm: 0.9999999777946632, iteration: 375660
loss: 0.9837807416915894,grad_norm: 0.8592403629970138, iteration: 375661
loss: 1.0212854146957397,grad_norm: 0.6768882369049559, iteration: 375662
loss: 0.993362545967102,grad_norm: 0.9999992688774333, iteration: 375663
loss: 1.0161423683166504,grad_norm: 0.8964711369644731, iteration: 375664
loss: 1.0494163036346436,grad_norm: 0.9999992012347875, iteration: 375665
loss: 1.0780292749404907,grad_norm: 0.9510864009213739, iteration: 375666
loss: 1.0224615335464478,grad_norm: 0.9999998258367458, iteration: 375667
loss: 0.9421775341033936,grad_norm: 0.8314224121525272, iteration: 375668
loss: 1.0157451629638672,grad_norm: 0.7102634989307668, iteration: 375669
loss: 0.9946643710136414,grad_norm: 0.9740265058030024, iteration: 375670
loss: 0.9952018857002258,grad_norm: 0.8298590774632321, iteration: 375671
loss: 0.9913068413734436,grad_norm: 0.9352172447172006, iteration: 375672
loss: 1.0228928327560425,grad_norm: 0.9341133470007972, iteration: 375673
loss: 0.997007429599762,grad_norm: 0.8757568111582323, iteration: 375674
loss: 0.9920298457145691,grad_norm: 0.9315744009216494, iteration: 375675
loss: 0.975918710231781,grad_norm: 0.7798417484355746, iteration: 375676
loss: 1.0073379278182983,grad_norm: 0.761984028892481, iteration: 375677
loss: 1.071202039718628,grad_norm: 0.999999002472743, iteration: 375678
loss: 0.9954540729522705,grad_norm: 0.7507703407461519, iteration: 375679
loss: 1.01333749294281,grad_norm: 0.7345571729139898, iteration: 375680
loss: 0.995570719242096,grad_norm: 0.8146780519734115, iteration: 375681
loss: 1.0250998735427856,grad_norm: 0.9999995798979093, iteration: 375682
loss: 0.968869686126709,grad_norm: 0.9357800622522707, iteration: 375683
loss: 1.0136061906814575,grad_norm: 0.7820891644970255, iteration: 375684
loss: 1.0022183656692505,grad_norm: 0.786146755150081, iteration: 375685
loss: 0.9829705357551575,grad_norm: 0.6804813543521192, iteration: 375686
loss: 1.0054844617843628,grad_norm: 0.7130678466147384, iteration: 375687
loss: 1.0209060907363892,grad_norm: 0.682619459068536, iteration: 375688
loss: 0.9833257794380188,grad_norm: 0.7106432664730205, iteration: 375689
loss: 1.0192409753799438,grad_norm: 0.8209938571749341, iteration: 375690
loss: 1.0167165994644165,grad_norm: 0.9999991894909527, iteration: 375691
loss: 0.9739909768104553,grad_norm: 0.809644559554743, iteration: 375692
loss: 1.0096728801727295,grad_norm: 0.7580723224288316, iteration: 375693
loss: 0.9983847737312317,grad_norm: 0.7901702555253936, iteration: 375694
loss: 0.9682497978210449,grad_norm: 0.7524901775358505, iteration: 375695
loss: 0.9964465498924255,grad_norm: 0.9151602532306955, iteration: 375696
loss: 1.0080028772354126,grad_norm: 0.9999998687903257, iteration: 375697
loss: 1.0054340362548828,grad_norm: 0.7219414231249631, iteration: 375698
loss: 0.975115180015564,grad_norm: 0.9999991613341379, iteration: 375699
loss: 0.9930737018585205,grad_norm: 0.7146758082407747, iteration: 375700
loss: 0.9690372347831726,grad_norm: 0.9090084814311464, iteration: 375701
loss: 1.0488399267196655,grad_norm: 0.9999991482011558, iteration: 375702
loss: 0.9864148497581482,grad_norm: 0.7155764852190168, iteration: 375703
loss: 0.9929619431495667,grad_norm: 0.7104770528154355, iteration: 375704
loss: 1.0176844596862793,grad_norm: 0.819817063043863, iteration: 375705
loss: 1.042451024055481,grad_norm: 0.9999999005873303, iteration: 375706
loss: 0.9889695644378662,grad_norm: 0.8353862888484891, iteration: 375707
loss: 1.0204609632492065,grad_norm: 0.7535081129000035, iteration: 375708
loss: 0.976082444190979,grad_norm: 0.7915476550028641, iteration: 375709
loss: 0.995474636554718,grad_norm: 0.7672483500277743, iteration: 375710
loss: 1.011242389678955,grad_norm: 0.9999992008329853, iteration: 375711
loss: 0.984900712966919,grad_norm: 0.8119814833370248, iteration: 375712
loss: 1.099000096321106,grad_norm: 0.7795956694254281, iteration: 375713
loss: 0.9944902658462524,grad_norm: 0.772850914098415, iteration: 375714
loss: 1.0213652849197388,grad_norm: 0.9999992285542997, iteration: 375715
loss: 1.025550127029419,grad_norm: 0.9999995956814751, iteration: 375716
loss: 1.0508031845092773,grad_norm: 0.7438995456770423, iteration: 375717
loss: 1.0186463594436646,grad_norm: 0.7588442056810678, iteration: 375718
loss: 0.9659388065338135,grad_norm: 0.7887691773506345, iteration: 375719
loss: 0.97984379529953,grad_norm: 0.7655211151052914, iteration: 375720
loss: 1.0104811191558838,grad_norm: 0.7264701405883233, iteration: 375721
loss: 1.0213592052459717,grad_norm: 0.7194884399374952, iteration: 375722
loss: 1.0309439897537231,grad_norm: 0.7553819982435731, iteration: 375723
loss: 0.9896330237388611,grad_norm: 0.7983812621398355, iteration: 375724
loss: 0.99172443151474,grad_norm: 0.9348236553886634, iteration: 375725
loss: 1.007150650024414,grad_norm: 0.7341055002528469, iteration: 375726
loss: 0.9999593496322632,grad_norm: 0.8090898300374006, iteration: 375727
loss: 0.9638954997062683,grad_norm: 0.8050408901786653, iteration: 375728
loss: 1.0015783309936523,grad_norm: 0.8169326474757359, iteration: 375729
loss: 1.0820611715316772,grad_norm: 0.7685058141754366, iteration: 375730
loss: 1.014233112335205,grad_norm: 0.8339735967950213, iteration: 375731
loss: 1.0053952932357788,grad_norm: 0.8192425211407266, iteration: 375732
loss: 0.9749789237976074,grad_norm: 0.854378259235885, iteration: 375733
loss: 0.9730058908462524,grad_norm: 0.9849574692031303, iteration: 375734
loss: 0.9631440043449402,grad_norm: 0.999998945610522, iteration: 375735
loss: 1.0148425102233887,grad_norm: 0.8294518054097779, iteration: 375736
loss: 0.9828004240989685,grad_norm: 0.6564730908964231, iteration: 375737
loss: 1.0219452381134033,grad_norm: 0.591867608287697, iteration: 375738
loss: 1.028444528579712,grad_norm: 0.7667154029633642, iteration: 375739
loss: 0.9888355135917664,grad_norm: 0.8010227528596982, iteration: 375740
loss: 0.9960112571716309,grad_norm: 0.6774959227009791, iteration: 375741
loss: 1.012532114982605,grad_norm: 0.7302237871849508, iteration: 375742
loss: 1.056067943572998,grad_norm: 0.7940263152073151, iteration: 375743
loss: 0.9737533926963806,grad_norm: 0.7504285895559122, iteration: 375744
loss: 0.9447757601737976,grad_norm: 0.7731249055359402, iteration: 375745
loss: 1.0316866636276245,grad_norm: 0.7140565345525152, iteration: 375746
loss: 1.0286593437194824,grad_norm: 0.8769335662887183, iteration: 375747
loss: 0.97734135389328,grad_norm: 0.9089108729532538, iteration: 375748
loss: 1.0143667459487915,grad_norm: 0.9310388418638544, iteration: 375749
loss: 1.0261448621749878,grad_norm: 0.8819681661498412, iteration: 375750
loss: 0.9789463877677917,grad_norm: 0.7575969505989283, iteration: 375751
loss: 1.0247057676315308,grad_norm: 0.7179591432316139, iteration: 375752
loss: 1.0589110851287842,grad_norm: 0.9999994833670861, iteration: 375753
loss: 1.0211288928985596,grad_norm: 0.8042975290914205, iteration: 375754
loss: 1.000772476196289,grad_norm: 0.9012492748559907, iteration: 375755
loss: 0.9546583294868469,grad_norm: 0.771086191054199, iteration: 375756
loss: 0.9877992272377014,grad_norm: 0.7808002990882462, iteration: 375757
loss: 1.0242230892181396,grad_norm: 0.9003700512320254, iteration: 375758
loss: 1.012773036956787,grad_norm: 0.9533690028778224, iteration: 375759
loss: 0.9659507274627686,grad_norm: 0.8629809295861083, iteration: 375760
loss: 1.017327904701233,grad_norm: 0.7559569420684648, iteration: 375761
loss: 0.98470139503479,grad_norm: 0.8948121106694003, iteration: 375762
loss: 1.0238679647445679,grad_norm: 0.7839195919202274, iteration: 375763
loss: 1.06377112865448,grad_norm: 0.8096341045275189, iteration: 375764
loss: 1.0148661136627197,grad_norm: 0.8442078511384795, iteration: 375765
loss: 1.0334471464157104,grad_norm: 0.8096533148680094, iteration: 375766
loss: 1.0695147514343262,grad_norm: 0.999999163438552, iteration: 375767
loss: 1.020862340927124,grad_norm: 0.8547950108274502, iteration: 375768
loss: 1.0523513555526733,grad_norm: 0.7462244339023743, iteration: 375769
loss: 0.9815544486045837,grad_norm: 0.8137748453271545, iteration: 375770
loss: 1.015235424041748,grad_norm: 0.7380528819571099, iteration: 375771
loss: 0.9688175320625305,grad_norm: 0.8042134561081647, iteration: 375772
loss: 1.022152066230774,grad_norm: 0.8654653901639772, iteration: 375773
loss: 1.0138379335403442,grad_norm: 0.9438205834113229, iteration: 375774
loss: 0.9924792647361755,grad_norm: 0.9260004241225455, iteration: 375775
loss: 0.9628036022186279,grad_norm: 0.7740475751103378, iteration: 375776
loss: 1.0336769819259644,grad_norm: 0.90426238350849, iteration: 375777
loss: 0.9965804219245911,grad_norm: 0.786446919567108, iteration: 375778
loss: 0.9943408966064453,grad_norm: 0.8593505574170196, iteration: 375779
loss: 0.9928938150405884,grad_norm: 0.8845498516403358, iteration: 375780
loss: 1.0174452066421509,grad_norm: 0.9918734692102686, iteration: 375781
loss: 1.013779878616333,grad_norm: 0.9999997076414741, iteration: 375782
loss: 1.0101858377456665,grad_norm: 0.7641443805389517, iteration: 375783
loss: 1.0299513339996338,grad_norm: 0.9570749593401753, iteration: 375784
loss: 1.0134953260421753,grad_norm: 0.8333411694508559, iteration: 375785
loss: 0.9897555708885193,grad_norm: 0.733230159768199, iteration: 375786
loss: 0.9784082174301147,grad_norm: 0.6775475566393163, iteration: 375787
loss: 0.9847992062568665,grad_norm: 0.8330416644625487, iteration: 375788
loss: 1.0500705242156982,grad_norm: 0.9999994657582015, iteration: 375789
loss: 1.0254569053649902,grad_norm: 0.9999996342604752, iteration: 375790
loss: 0.9387144446372986,grad_norm: 0.9140860134898254, iteration: 375791
loss: 1.0658668279647827,grad_norm: 0.9999998118600368, iteration: 375792
loss: 0.9768588542938232,grad_norm: 0.7007458759492636, iteration: 375793
loss: 1.0012260675430298,grad_norm: 0.8515437766399037, iteration: 375794
loss: 1.0264308452606201,grad_norm: 0.8598472173780163, iteration: 375795
loss: 0.990048348903656,grad_norm: 0.9058608390459916, iteration: 375796
loss: 0.9914499521255493,grad_norm: 0.6864773598767493, iteration: 375797
loss: 1.004805088043213,grad_norm: 0.7827248971926748, iteration: 375798
loss: 1.0016984939575195,grad_norm: 0.9879863909886275, iteration: 375799
loss: 0.9785941243171692,grad_norm: 0.8476467528502296, iteration: 375800
loss: 0.9969906210899353,grad_norm: 0.755138862336436, iteration: 375801
loss: 0.9785089492797852,grad_norm: 0.7786169772241956, iteration: 375802
loss: 0.9844472408294678,grad_norm: 0.8606841514700445, iteration: 375803
loss: 1.0316593647003174,grad_norm: 0.7941345057724016, iteration: 375804
loss: 0.9926701784133911,grad_norm: 0.7158348799065354, iteration: 375805
loss: 0.9887426495552063,grad_norm: 0.9094596929400544, iteration: 375806
loss: 0.9742857813835144,grad_norm: 0.9999996991781896, iteration: 375807
loss: 0.9970600605010986,grad_norm: 0.8196559259126908, iteration: 375808
loss: 1.01676344871521,grad_norm: 0.8889072023637123, iteration: 375809
loss: 1.0235204696655273,grad_norm: 0.7501333137047489, iteration: 375810
loss: 1.0003917217254639,grad_norm: 0.9929892073550118, iteration: 375811
loss: 1.0501166582107544,grad_norm: 0.9999993751103259, iteration: 375812
loss: 0.9746577143669128,grad_norm: 0.7829019715349517, iteration: 375813
loss: 1.010384202003479,grad_norm: 0.8131658836339432, iteration: 375814
loss: 0.9852421283721924,grad_norm: 0.8018872861471374, iteration: 375815
loss: 0.998816728591919,grad_norm: 0.7329844869414047, iteration: 375816
loss: 0.9969462156295776,grad_norm: 0.9860389309789334, iteration: 375817
loss: 1.0338704586029053,grad_norm: 0.8780186168119836, iteration: 375818
loss: 0.9977999329566956,grad_norm: 0.6472584427306686, iteration: 375819
loss: 1.006185531616211,grad_norm: 0.6703086694283521, iteration: 375820
loss: 1.0005629062652588,grad_norm: 0.7878163738591009, iteration: 375821
loss: 1.045548677444458,grad_norm: 0.7191920372071084, iteration: 375822
loss: 1.0184818506240845,grad_norm: 0.8053947933520531, iteration: 375823
loss: 1.0266380310058594,grad_norm: 0.908228426450589, iteration: 375824
loss: 0.9875612854957581,grad_norm: 0.8445281330032114, iteration: 375825
loss: 1.0087547302246094,grad_norm: 0.846303875295838, iteration: 375826
loss: 0.9696382880210876,grad_norm: 0.8046992922710747, iteration: 375827
loss: 1.0645365715026855,grad_norm: 0.9999996163446064, iteration: 375828
loss: 1.0997000932693481,grad_norm: 0.8780674439579531, iteration: 375829
loss: 0.9880357384681702,grad_norm: 0.9999988663054064, iteration: 375830
loss: 1.0404667854309082,grad_norm: 0.978784292528622, iteration: 375831
loss: 0.9886372089385986,grad_norm: 0.787246382882353, iteration: 375832
loss: 0.9983195066452026,grad_norm: 0.684451032347632, iteration: 375833
loss: 1.0789520740509033,grad_norm: 0.9212852545615838, iteration: 375834
loss: 0.989680290222168,grad_norm: 0.8118258765063338, iteration: 375835
loss: 0.9865369200706482,grad_norm: 0.971433509057689, iteration: 375836
loss: 1.0384175777435303,grad_norm: 0.7407977572371225, iteration: 375837
loss: 0.9725391268730164,grad_norm: 0.7540591362983525, iteration: 375838
loss: 0.9600332379341125,grad_norm: 0.812724532470999, iteration: 375839
loss: 0.9954997897148132,grad_norm: 0.6968118047460051, iteration: 375840
loss: 0.9960341453552246,grad_norm: 0.8287092368830044, iteration: 375841
loss: 1.0296109914779663,grad_norm: 0.8142191942147659, iteration: 375842
loss: 0.9686816930770874,grad_norm: 0.7666983310765519, iteration: 375843
loss: 1.0011138916015625,grad_norm: 0.999999810503256, iteration: 375844
loss: 1.013741135597229,grad_norm: 0.9999996469477599, iteration: 375845
loss: 1.0175832509994507,grad_norm: 0.9999990155312418, iteration: 375846
loss: 0.972332239151001,grad_norm: 0.962728821860737, iteration: 375847
loss: 1.0095667839050293,grad_norm: 0.9999991839349515, iteration: 375848
loss: 0.9731425642967224,grad_norm: 0.8612199577913674, iteration: 375849
loss: 0.9835255742073059,grad_norm: 0.8297326651802929, iteration: 375850
loss: 0.9974137544631958,grad_norm: 0.8099221405435042, iteration: 375851
loss: 1.0051106214523315,grad_norm: 0.8330321425543751, iteration: 375852
loss: 0.9773189425468445,grad_norm: 0.9062123083151867, iteration: 375853
loss: 1.0085904598236084,grad_norm: 0.751818075581356, iteration: 375854
loss: 0.9963274598121643,grad_norm: 0.7741714555949403, iteration: 375855
loss: 0.9845408797264099,grad_norm: 0.8303570076220352, iteration: 375856
loss: 1.019659161567688,grad_norm: 0.7805997519009034, iteration: 375857
loss: 1.0284268856048584,grad_norm: 0.8511081924080934, iteration: 375858
loss: 1.0153396129608154,grad_norm: 0.9999991332715544, iteration: 375859
loss: 0.9941295981407166,grad_norm: 0.8012479960384947, iteration: 375860
loss: 0.9922555088996887,grad_norm: 0.9999991007856089, iteration: 375861
loss: 1.0017756223678589,grad_norm: 0.8031591560477731, iteration: 375862
loss: 1.0124143362045288,grad_norm: 0.9286566159007608, iteration: 375863
loss: 1.0151734352111816,grad_norm: 0.7983713541704919, iteration: 375864
loss: 1.0244145393371582,grad_norm: 0.7540773917033232, iteration: 375865
loss: 1.1121748685836792,grad_norm: 0.8386180983744673, iteration: 375866
loss: 1.0214319229125977,grad_norm: 0.9260286995554657, iteration: 375867
loss: 0.9852184057235718,grad_norm: 0.999999452767793, iteration: 375868
loss: 0.9759592413902283,grad_norm: 0.7098859875327914, iteration: 375869
loss: 1.069114327430725,grad_norm: 0.8378770140340407, iteration: 375870
loss: 1.010227918624878,grad_norm: 0.691187872004304, iteration: 375871
loss: 0.9960930347442627,grad_norm: 0.8208479434064222, iteration: 375872
loss: 1.0122132301330566,grad_norm: 0.8330325274222729, iteration: 375873
loss: 0.9677898287773132,grad_norm: 0.6884016435078453, iteration: 375874
loss: 0.9781751036643982,grad_norm: 0.7835739655946432, iteration: 375875
loss: 0.9974985122680664,grad_norm: 0.678834279060161, iteration: 375876
loss: 1.020099401473999,grad_norm: 0.7856456510049893, iteration: 375877
loss: 0.9801859259605408,grad_norm: 0.9999991112194324, iteration: 375878
loss: 0.9963708519935608,grad_norm: 0.7200013255546822, iteration: 375879
loss: 0.9996467232704163,grad_norm: 0.8617925500083651, iteration: 375880
loss: 0.9975354671478271,grad_norm: 0.8504682564007817, iteration: 375881
loss: 1.049737572669983,grad_norm: 0.9306183782257216, iteration: 375882
loss: 1.0001335144042969,grad_norm: 0.7324670444433697, iteration: 375883
loss: 0.9980365037918091,grad_norm: 0.7641296677956189, iteration: 375884
loss: 1.0010744333267212,grad_norm: 0.999999181302767, iteration: 375885
loss: 0.9672325253486633,grad_norm: 0.9033001200538612, iteration: 375886
loss: 1.0069453716278076,grad_norm: 0.6792369109793968, iteration: 375887
loss: 0.9894189238548279,grad_norm: 0.7704483471029773, iteration: 375888
loss: 1.0171418190002441,grad_norm: 0.7534801318735651, iteration: 375889
loss: 0.9676045179367065,grad_norm: 0.7134166553479803, iteration: 375890
loss: 1.0230391025543213,grad_norm: 0.9492367598382873, iteration: 375891
loss: 0.9982442855834961,grad_norm: 0.8577816306808427, iteration: 375892
loss: 1.0076268911361694,grad_norm: 0.9063070172635594, iteration: 375893
loss: 1.0467368364334106,grad_norm: 0.9999995838697704, iteration: 375894
loss: 0.9853047728538513,grad_norm: 0.7641384872705997, iteration: 375895
loss: 1.0186145305633545,grad_norm: 0.8610081833697305, iteration: 375896
loss: 0.9539731740951538,grad_norm: 0.818731492358463, iteration: 375897
loss: 1.0176331996917725,grad_norm: 0.7813415843836784, iteration: 375898
loss: 0.9846193194389343,grad_norm: 0.7418825399827026, iteration: 375899
loss: 1.005024790763855,grad_norm: 0.7810236202870559, iteration: 375900
loss: 1.0184694528579712,grad_norm: 0.9539118385479053, iteration: 375901
loss: 0.9980658292770386,grad_norm: 0.951788785757404, iteration: 375902
loss: 1.0010603666305542,grad_norm: 0.8436056238537439, iteration: 375903
loss: 0.984335720539093,grad_norm: 0.803675664541585, iteration: 375904
loss: 0.97596675157547,grad_norm: 0.8385911737902803, iteration: 375905
loss: 1.087381362915039,grad_norm: 0.9999998940271783, iteration: 375906
loss: 1.0093973875045776,grad_norm: 0.9436624600344985, iteration: 375907
loss: 1.0298737287521362,grad_norm: 0.7386262184347987, iteration: 375908
loss: 1.002834439277649,grad_norm: 0.7197043586679329, iteration: 375909
loss: 0.9659411311149597,grad_norm: 0.6809231612705852, iteration: 375910
loss: 1.038772702217102,grad_norm: 0.8880178127849082, iteration: 375911
loss: 0.9714187383651733,grad_norm: 0.7830644984691253, iteration: 375912
loss: 1.0159962177276611,grad_norm: 0.735412338448786, iteration: 375913
loss: 0.98658686876297,grad_norm: 0.9072001167590484, iteration: 375914
loss: 1.0854626893997192,grad_norm: 0.93988906279261, iteration: 375915
loss: 0.9976099729537964,grad_norm: 0.9002316545350709, iteration: 375916
loss: 0.9834222197532654,grad_norm: 0.6783292599729872, iteration: 375917
loss: 1.0078517198562622,grad_norm: 0.8390397926987427, iteration: 375918
loss: 1.0378953218460083,grad_norm: 0.7258769264279691, iteration: 375919
loss: 1.022495150566101,grad_norm: 0.7547633788568182, iteration: 375920
loss: 0.9935367703437805,grad_norm: 0.7555057593457758, iteration: 375921
loss: 1.0877646207809448,grad_norm: 0.8273316654892351, iteration: 375922
loss: 1.0033520460128784,grad_norm: 0.7197993051707684, iteration: 375923
loss: 1.0169204473495483,grad_norm: 0.8180797666487668, iteration: 375924
loss: 0.9762332439422607,grad_norm: 0.8192047991281578, iteration: 375925
loss: 1.0305055379867554,grad_norm: 0.8779408369164753, iteration: 375926
loss: 1.0005615949630737,grad_norm: 0.7606746313144305, iteration: 375927
loss: 0.989888608455658,grad_norm: 0.8466205172549186, iteration: 375928
loss: 1.0099546909332275,grad_norm: 0.9177390743899925, iteration: 375929
loss: 0.9538572430610657,grad_norm: 0.7294046590401896, iteration: 375930
loss: 0.9928311109542847,grad_norm: 0.9780127598014763, iteration: 375931
loss: 0.9690532684326172,grad_norm: 0.8339163331944689, iteration: 375932
loss: 1.1301053762435913,grad_norm: 0.9999996759871924, iteration: 375933
loss: 0.9802990555763245,grad_norm: 0.7862909564302973, iteration: 375934
loss: 0.993610680103302,grad_norm: 0.7120297668760651, iteration: 375935
loss: 0.9974870681762695,grad_norm: 0.8552842009643273, iteration: 375936
loss: 1.00014328956604,grad_norm: 0.6862792347228841, iteration: 375937
loss: 1.018851399421692,grad_norm: 0.7659960583323925, iteration: 375938
loss: 0.9782649874687195,grad_norm: 0.8611164452628185, iteration: 375939
loss: 1.0160149335861206,grad_norm: 0.889579738993699, iteration: 375940
loss: 1.0147379636764526,grad_norm: 0.7757505854543982, iteration: 375941
loss: 1.0104937553405762,grad_norm: 0.8569366301928459, iteration: 375942
loss: 1.015432357788086,grad_norm: 0.8548624984945934, iteration: 375943
loss: 0.999318540096283,grad_norm: 0.8268370868078485, iteration: 375944
loss: 1.0820175409317017,grad_norm: 0.9156441270617018, iteration: 375945
loss: 1.0258461236953735,grad_norm: 0.790089600429432, iteration: 375946
loss: 1.0142731666564941,grad_norm: 1.0000000003365945, iteration: 375947
loss: 0.9637767672538757,grad_norm: 0.9219277662392855, iteration: 375948
loss: 1.0471163988113403,grad_norm: 0.9999998830447956, iteration: 375949
loss: 1.0151755809783936,grad_norm: 0.6393099941389815, iteration: 375950
loss: 1.0005700588226318,grad_norm: 0.9588411559495317, iteration: 375951
loss: 1.0178319215774536,grad_norm: 0.8496498475571495, iteration: 375952
loss: 1.0104864835739136,grad_norm: 0.9999990264790515, iteration: 375953
loss: 0.9713515043258667,grad_norm: 0.7732838630145837, iteration: 375954
loss: 1.0008195638656616,grad_norm: 0.855250181931515, iteration: 375955
loss: 1.0155836343765259,grad_norm: 0.860020624777805, iteration: 375956
loss: 0.9654442071914673,grad_norm: 0.9450140652939393, iteration: 375957
loss: 1.0996168851852417,grad_norm: 0.9542329456035823, iteration: 375958
loss: 0.9590482115745544,grad_norm: 0.8622889618321502, iteration: 375959
loss: 1.0609205961227417,grad_norm: 0.9455450017384415, iteration: 375960
loss: 1.0184049606323242,grad_norm: 0.9362178415087646, iteration: 375961
loss: 1.0285261869430542,grad_norm: 0.9806384848176294, iteration: 375962
loss: 0.9976028800010681,grad_norm: 0.8981451590882406, iteration: 375963
loss: 0.9667195081710815,grad_norm: 0.7632525576933279, iteration: 375964
loss: 1.0166246891021729,grad_norm: 0.9643686771403115, iteration: 375965
loss: 0.9914712905883789,grad_norm: 0.5998137067539931, iteration: 375966
loss: 1.0177708864212036,grad_norm: 0.9999993898132578, iteration: 375967
loss: 0.9755650162696838,grad_norm: 0.8022105707495675, iteration: 375968
loss: 1.0144847631454468,grad_norm: 0.8058000622549957, iteration: 375969
loss: 0.987328290939331,grad_norm: 0.9193512111385591, iteration: 375970
loss: 0.9733419418334961,grad_norm: 0.9999994613991909, iteration: 375971
loss: 0.9766542315483093,grad_norm: 0.8232685847152613, iteration: 375972
loss: 0.985754668712616,grad_norm: 0.7709324692827251, iteration: 375973
loss: 1.023932695388794,grad_norm: 0.7844915782967541, iteration: 375974
loss: 0.9573791027069092,grad_norm: 0.9555995844884986, iteration: 375975
loss: 1.000808835029602,grad_norm: 0.8039021483578002, iteration: 375976
loss: 1.019754409790039,grad_norm: 0.7143495053856296, iteration: 375977
loss: 0.9878079295158386,grad_norm: 0.8911883502029548, iteration: 375978
loss: 0.9963200092315674,grad_norm: 0.9999990829358729, iteration: 375979
loss: 0.9784535765647888,grad_norm: 0.668769220692285, iteration: 375980
loss: 0.9915095567703247,grad_norm: 0.8245333698675102, iteration: 375981
loss: 0.9725461602210999,grad_norm: 0.7557181202529257, iteration: 375982
loss: 1.0086002349853516,grad_norm: 0.7475201284251887, iteration: 375983
loss: 1.0269343852996826,grad_norm: 0.9826749915689634, iteration: 375984
loss: 1.0103302001953125,grad_norm: 0.9423627927640248, iteration: 375985
loss: 0.9900059700012207,grad_norm: 0.8469734095468427, iteration: 375986
loss: 1.0177596807479858,grad_norm: 0.7293798930521345, iteration: 375987
loss: 1.0291285514831543,grad_norm: 0.7818569092740689, iteration: 375988
loss: 1.0834286212921143,grad_norm: 0.9772763723800078, iteration: 375989
loss: 1.0291426181793213,grad_norm: 0.9999989618439157, iteration: 375990
loss: 0.9983776807785034,grad_norm: 0.5995382806362461, iteration: 375991
loss: 1.140917420387268,grad_norm: 0.9999989891070651, iteration: 375992
loss: 0.9583256244659424,grad_norm: 0.9429675917458163, iteration: 375993
loss: 0.9636168479919434,grad_norm: 0.6994689865019783, iteration: 375994
loss: 1.0243525505065918,grad_norm: 0.7521248192784655, iteration: 375995
loss: 1.0206432342529297,grad_norm: 0.7336290490306584, iteration: 375996
loss: 1.0078822374343872,grad_norm: 0.758899696068332, iteration: 375997
loss: 1.0305324792861938,grad_norm: 0.9999991550916937, iteration: 375998
loss: 0.9731868505477905,grad_norm: 0.6598439064786608, iteration: 375999
loss: 0.9816318154335022,grad_norm: 0.8101399863760228, iteration: 376000
loss: 0.9926232695579529,grad_norm: 0.7828538345919502, iteration: 376001
loss: 1.0510671138763428,grad_norm: 0.8523392535770485, iteration: 376002
loss: 0.997704029083252,grad_norm: 0.7573861108854256, iteration: 376003
loss: 0.9872798919677734,grad_norm: 0.6207480169064935, iteration: 376004
loss: 1.0049352645874023,grad_norm: 0.868919393663571, iteration: 376005
loss: 0.9816429018974304,grad_norm: 0.725647774195614, iteration: 376006
loss: 0.9707338809967041,grad_norm: 0.7256854897956809, iteration: 376007
loss: 0.9804012179374695,grad_norm: 0.8072031847284479, iteration: 376008
loss: 0.9773054718971252,grad_norm: 0.8741335233437391, iteration: 376009
loss: 0.9598177075386047,grad_norm: 0.6989218528081416, iteration: 376010
loss: 1.0247740745544434,grad_norm: 0.7811705888605989, iteration: 376011
loss: 1.0006698369979858,grad_norm: 0.9999992385014537, iteration: 376012
loss: 1.0723092555999756,grad_norm: 0.9999993313507103, iteration: 376013
loss: 0.9898285269737244,grad_norm: 0.8730987092567031, iteration: 376014
loss: 0.9959816932678223,grad_norm: 0.8134398309929628, iteration: 376015
loss: 0.988254189491272,grad_norm: 0.9166835118917148, iteration: 376016
loss: 1.0196638107299805,grad_norm: 0.7460017170311635, iteration: 376017
loss: 1.0267252922058105,grad_norm: 0.8428769104216337, iteration: 376018
loss: 0.9981128573417664,grad_norm: 0.7135309114955299, iteration: 376019
loss: 1.0043985843658447,grad_norm: 0.8192990220057716, iteration: 376020
loss: 0.9992519021034241,grad_norm: 0.9829411777916504, iteration: 376021
loss: 0.9790393114089966,grad_norm: 0.8418614004687214, iteration: 376022
loss: 0.9916650652885437,grad_norm: 0.6943906519389368, iteration: 376023
loss: 1.0141102075576782,grad_norm: 0.760866579714388, iteration: 376024
loss: 1.0284497737884521,grad_norm: 0.9999996630343816, iteration: 376025
loss: 1.0307258367538452,grad_norm: 0.9255356349575147, iteration: 376026
loss: 1.023012638092041,grad_norm: 0.8456601788410555, iteration: 376027
loss: 0.9774301052093506,grad_norm: 0.7297882034134278, iteration: 376028
loss: 0.963355541229248,grad_norm: 0.8052426433113818, iteration: 376029
loss: 1.026456356048584,grad_norm: 0.7150183330971365, iteration: 376030
loss: 1.0332083702087402,grad_norm: 0.7703856946573358, iteration: 376031
loss: 1.0072969198226929,grad_norm: 0.7276321757492861, iteration: 376032
loss: 0.9908125400543213,grad_norm: 0.7057438861254672, iteration: 376033
loss: 1.008062720298767,grad_norm: 0.6524963085469364, iteration: 376034
loss: 1.0181883573532104,grad_norm: 0.8123208718026231, iteration: 376035
loss: 0.9487729668617249,grad_norm: 0.7860681026256278, iteration: 376036
loss: 1.0039318799972534,grad_norm: 0.7199504820929564, iteration: 376037
loss: 1.0131889581680298,grad_norm: 0.85294995298074, iteration: 376038
loss: 1.0116307735443115,grad_norm: 0.844930439018716, iteration: 376039
loss: 1.008052945137024,grad_norm: 0.8547042099650679, iteration: 376040
loss: 0.9906893372535706,grad_norm: 0.9416387556917548, iteration: 376041
loss: 1.002514362335205,grad_norm: 0.7431749915572362, iteration: 376042
loss: 0.959362804889679,grad_norm: 0.7861733410772234, iteration: 376043
loss: 0.9992713928222656,grad_norm: 0.834857072235513, iteration: 376044
loss: 1.0575530529022217,grad_norm: 0.9999990511271873, iteration: 376045
loss: 1.0132590532302856,grad_norm: 0.7580177610523982, iteration: 376046
loss: 1.0001765489578247,grad_norm: 0.7928043895308373, iteration: 376047
loss: 1.0195555686950684,grad_norm: 0.9730699697170387, iteration: 376048
loss: 0.9958072304725647,grad_norm: 0.7667311632091516, iteration: 376049
loss: 1.0256304740905762,grad_norm: 0.8184972862969409, iteration: 376050
loss: 0.9925528764724731,grad_norm: 0.7812898060593825, iteration: 376051
loss: 0.9934262633323669,grad_norm: 0.7492688041701819, iteration: 376052
loss: 0.9484440088272095,grad_norm: 0.7164324216737001, iteration: 376053
loss: 1.0202627182006836,grad_norm: 0.8348944973390809, iteration: 376054
loss: 1.0247020721435547,grad_norm: 0.8900884501302876, iteration: 376055
loss: 0.9843930006027222,grad_norm: 0.8639385612212989, iteration: 376056
loss: 0.9684700965881348,grad_norm: 0.9999989916535539, iteration: 376057
loss: 1.005352258682251,grad_norm: 0.87638434427878, iteration: 376058
loss: 1.0209263563156128,grad_norm: 0.8909294385528391, iteration: 376059
loss: 0.9947327971458435,grad_norm: 0.8600203164686642, iteration: 376060
loss: 1.0187855958938599,grad_norm: 0.7390109847478112, iteration: 376061
loss: 1.1784456968307495,grad_norm: 0.9999996728574546, iteration: 376062
loss: 0.9566581845283508,grad_norm: 0.7583401724599637, iteration: 376063
loss: 1.0054669380187988,grad_norm: 0.8912454754203193, iteration: 376064
loss: 0.9932446479797363,grad_norm: 0.7997319425490891, iteration: 376065
loss: 0.9897283911705017,grad_norm: 0.8170840651711208, iteration: 376066
loss: 1.052333950996399,grad_norm: 0.6730486187172089, iteration: 376067
loss: 1.0379340648651123,grad_norm: 0.8307976764205759, iteration: 376068
loss: 0.9719662666320801,grad_norm: 0.6837265023819726, iteration: 376069
loss: 0.9688991904258728,grad_norm: 0.8658816280362096, iteration: 376070
loss: 1.0091274976730347,grad_norm: 0.8049117970250682, iteration: 376071
loss: 1.0121612548828125,grad_norm: 0.8812572182328138, iteration: 376072
loss: 1.022355079650879,grad_norm: 0.6944628616560978, iteration: 376073
loss: 1.010628581047058,grad_norm: 0.665794174761242, iteration: 376074
loss: 0.9936133027076721,grad_norm: 0.9999999367355312, iteration: 376075
loss: 0.9964165687561035,grad_norm: 0.6728269304563609, iteration: 376076
loss: 1.0448919534683228,grad_norm: 0.7261815628722307, iteration: 376077
loss: 1.037140965461731,grad_norm: 0.799932703799477, iteration: 376078
loss: 1.0191065073013306,grad_norm: 0.9999994121806302, iteration: 376079
loss: 0.9824588894844055,grad_norm: 0.628897916140741, iteration: 376080
loss: 0.9740301966667175,grad_norm: 0.8285714653671826, iteration: 376081
loss: 1.0081658363342285,grad_norm: 0.8447258345331876, iteration: 376082
loss: 0.9915534853935242,grad_norm: 0.7156181560505624, iteration: 376083
loss: 1.0048199892044067,grad_norm: 0.8221866416317944, iteration: 376084
loss: 0.9982237815856934,grad_norm: 0.7846332827753771, iteration: 376085
loss: 1.0228694677352905,grad_norm: 0.7314926099877083, iteration: 376086
loss: 0.9966573119163513,grad_norm: 0.8604522347163507, iteration: 376087
loss: 1.196130394935608,grad_norm: 0.9999994764259155, iteration: 376088
loss: 1.0345615148544312,grad_norm: 0.8907999416847892, iteration: 376089
loss: 1.0255953073501587,grad_norm: 0.807066336375776, iteration: 376090
loss: 0.9755456447601318,grad_norm: 0.9999994889678687, iteration: 376091
loss: 1.00698983669281,grad_norm: 0.7745524087276139, iteration: 376092
loss: 1.0262000560760498,grad_norm: 0.9999995331518433, iteration: 376093
loss: 1.0109354257583618,grad_norm: 0.8750883237586675, iteration: 376094
loss: 1.043030023574829,grad_norm: 0.7794385932530367, iteration: 376095
loss: 1.0036559104919434,grad_norm: 0.8771813318907163, iteration: 376096
loss: 1.004686951637268,grad_norm: 0.7289961848460371, iteration: 376097
loss: 1.0589570999145508,grad_norm: 0.7643627208865769, iteration: 376098
loss: 0.9946328401565552,grad_norm: 0.7269963765743666, iteration: 376099
loss: 1.0006505250930786,grad_norm: 0.8763237852350455, iteration: 376100
loss: 1.0222587585449219,grad_norm: 0.7736525426352057, iteration: 376101
loss: 0.9913069009780884,grad_norm: 0.8441806407863667, iteration: 376102
loss: 1.040504813194275,grad_norm: 0.739835531193343, iteration: 376103
loss: 0.9863287210464478,grad_norm: 0.9999990743388725, iteration: 376104
loss: 1.0197213888168335,grad_norm: 0.9999997282766186, iteration: 376105
loss: 1.0456701517105103,grad_norm: 0.9999991003559038, iteration: 376106
loss: 0.9827448725700378,grad_norm: 0.7686115900160799, iteration: 376107
loss: 1.0466499328613281,grad_norm: 0.7733190723188165, iteration: 376108
loss: 0.9954109787940979,grad_norm: 0.7186828023382416, iteration: 376109
loss: 0.9846643209457397,grad_norm: 0.756168627219634, iteration: 376110
loss: 0.9999654293060303,grad_norm: 0.9999992866155247, iteration: 376111
loss: 0.9789121150970459,grad_norm: 0.8516050815711628, iteration: 376112
loss: 1.001293659210205,grad_norm: 0.9030401970230107, iteration: 376113
loss: 1.0135769844055176,grad_norm: 0.9999992065431051, iteration: 376114
loss: 0.9953935146331787,grad_norm: 0.8264150726603601, iteration: 376115
loss: 1.0000721216201782,grad_norm: 0.7249850488805369, iteration: 376116
loss: 0.9959599375724792,grad_norm: 0.9602367478872733, iteration: 376117
loss: 1.089683175086975,grad_norm: 0.9999990613460729, iteration: 376118
loss: 0.9984232783317566,grad_norm: 0.7835365569157806, iteration: 376119
loss: 1.0097625255584717,grad_norm: 0.8759841145886365, iteration: 376120
loss: 1.233682632446289,grad_norm: 0.854423692602348, iteration: 376121
loss: 0.9883471131324768,grad_norm: 0.84479501380439, iteration: 376122
loss: 1.0267443656921387,grad_norm: 0.9999999158679558, iteration: 376123
loss: 0.9575024843215942,grad_norm: 0.8285300861524615, iteration: 376124
loss: 1.0143541097640991,grad_norm: 0.7980682880731823, iteration: 376125
loss: 1.0392696857452393,grad_norm: 0.9999991599567044, iteration: 376126
loss: 1.0560214519500732,grad_norm: 0.9209115867984922, iteration: 376127
loss: 1.0223889350891113,grad_norm: 0.7236545079816423, iteration: 376128
loss: 1.075848937034607,grad_norm: 0.985137826491709, iteration: 376129
loss: 1.0600510835647583,grad_norm: 0.999999927228347, iteration: 376130
loss: 1.037623643875122,grad_norm: 0.9022764941876568, iteration: 376131
loss: 0.9882391095161438,grad_norm: 0.7985186496091613, iteration: 376132
loss: 1.021616816520691,grad_norm: 0.7343482260723646, iteration: 376133
loss: 1.0230450630187988,grad_norm: 0.8542842087503789, iteration: 376134
loss: 1.0418413877487183,grad_norm: 0.7922450825486904, iteration: 376135
loss: 1.0553115606307983,grad_norm: 0.9999991578381703, iteration: 376136
loss: 0.9970259666442871,grad_norm: 0.8818641608797739, iteration: 376137
loss: 1.0492554903030396,grad_norm: 0.9999998622772286, iteration: 376138
loss: 0.9590181708335876,grad_norm: 0.7475947369786671, iteration: 376139
loss: 0.9657478332519531,grad_norm: 0.7400310092500685, iteration: 376140
loss: 1.0273586511611938,grad_norm: 0.9107587460718942, iteration: 376141
loss: 1.0208673477172852,grad_norm: 0.8274167249405635, iteration: 376142
loss: 0.9812380075454712,grad_norm: 0.6803210271535031, iteration: 376143
loss: 0.9512478709220886,grad_norm: 0.8204190908462441, iteration: 376144
loss: 0.9729921817779541,grad_norm: 0.8513766299019682, iteration: 376145
loss: 0.9993428587913513,grad_norm: 0.8178054423573514, iteration: 376146
loss: 1.0003368854522705,grad_norm: 0.6997919267622547, iteration: 376147
loss: 0.9896095395088196,grad_norm: 0.7537916639431331, iteration: 376148
loss: 0.9881488084793091,grad_norm: 0.9999991990005573, iteration: 376149
loss: 1.034096121788025,grad_norm: 0.791720102267421, iteration: 376150
loss: 0.9902241230010986,grad_norm: 0.8964811074571044, iteration: 376151
loss: 1.0066518783569336,grad_norm: 0.9746726953356629, iteration: 376152
loss: 0.9821841716766357,grad_norm: 0.714530818743712, iteration: 376153
loss: 1.063891053199768,grad_norm: 0.6678460357592694, iteration: 376154
loss: 1.0323423147201538,grad_norm: 0.7559056912946699, iteration: 376155
loss: 0.9857211112976074,grad_norm: 0.8272662684787814, iteration: 376156
loss: 0.9940902590751648,grad_norm: 0.8750048134314293, iteration: 376157
loss: 0.9859291315078735,grad_norm: 0.7639109038807813, iteration: 376158
loss: 0.9460108876228333,grad_norm: 0.9672263111959355, iteration: 376159
loss: 1.0377053022384644,grad_norm: 0.8912444133004367, iteration: 376160
loss: 0.9934020042419434,grad_norm: 0.7511089222931748, iteration: 376161
loss: 0.9491938352584839,grad_norm: 0.780972722271966, iteration: 376162
loss: 0.9929161071777344,grad_norm: 0.8265253302147774, iteration: 376163
loss: 0.9990516901016235,grad_norm: 0.86813862931817, iteration: 376164
loss: 1.0061911344528198,grad_norm: 0.8428972726452154, iteration: 376165
loss: 1.0510034561157227,grad_norm: 0.8057087913025313, iteration: 376166
loss: 1.0315686464309692,grad_norm: 0.9999992613342343, iteration: 376167
loss: 1.013148307800293,grad_norm: 0.7153376582903941, iteration: 376168
loss: 0.9770806431770325,grad_norm: 0.9999995499811074, iteration: 376169
loss: 1.0356301069259644,grad_norm: 0.9999990848805728, iteration: 376170
loss: 1.0182737112045288,grad_norm: 0.8882181562903743, iteration: 376171
loss: 0.956253707408905,grad_norm: 0.8830826377601355, iteration: 376172
loss: 0.99250727891922,grad_norm: 0.7662772162167677, iteration: 376173
loss: 0.984796941280365,grad_norm: 0.9999992319281347, iteration: 376174
loss: 1.0508944988250732,grad_norm: 0.9999999478128372, iteration: 376175
loss: 0.9783593416213989,grad_norm: 0.6687956531255846, iteration: 376176
loss: 0.9893936514854431,grad_norm: 0.8253007317579749, iteration: 376177
loss: 1.0332821607589722,grad_norm: 0.7244590357645082, iteration: 376178
loss: 1.032148838043213,grad_norm: 0.9999996632382877, iteration: 376179
loss: 1.0073997974395752,grad_norm: 0.8177671003699309, iteration: 376180
loss: 0.9777525067329407,grad_norm: 0.765075450813163, iteration: 376181
loss: 1.0317416191101074,grad_norm: 0.8256768439533669, iteration: 376182
loss: 1.0112019777297974,grad_norm: 0.7643111173671288, iteration: 376183
loss: 1.0005697011947632,grad_norm: 0.6874795018216601, iteration: 376184
loss: 1.0127816200256348,grad_norm: 0.9999991411523952, iteration: 376185
loss: 0.9806917309761047,grad_norm: 0.6559462723894606, iteration: 376186
loss: 1.049551248550415,grad_norm: 0.9999991783096541, iteration: 376187
loss: 1.0014779567718506,grad_norm: 0.7961060126837505, iteration: 376188
loss: 1.0330018997192383,grad_norm: 0.9031885412190959, iteration: 376189
loss: 1.0179921388626099,grad_norm: 0.9999990227113562, iteration: 376190
loss: 1.0593596696853638,grad_norm: 0.9336045216483615, iteration: 376191
loss: 1.0251579284667969,grad_norm: 0.9999992100915123, iteration: 376192
loss: 1.0156382322311401,grad_norm: 0.7073814339684291, iteration: 376193
loss: 1.0352696180343628,grad_norm: 0.9437829968874969, iteration: 376194
loss: 0.9870373606681824,grad_norm: 0.7811677271740631, iteration: 376195
loss: 0.9802509546279907,grad_norm: 0.7882612843670523, iteration: 376196
loss: 0.9786847829818726,grad_norm: 0.662026009903413, iteration: 376197
loss: 1.0098459720611572,grad_norm: 0.7208205790438127, iteration: 376198
loss: 0.9753825068473816,grad_norm: 0.8845340284577685, iteration: 376199
loss: 0.9986552000045776,grad_norm: 0.7200108024988787, iteration: 376200
loss: 1.013491153717041,grad_norm: 0.8613936963671145, iteration: 376201
loss: 1.010034203529358,grad_norm: 0.7900215467960969, iteration: 376202
loss: 1.0383436679840088,grad_norm: 0.9999990798470758, iteration: 376203
loss: 0.9737919569015503,grad_norm: 0.7887694278442381, iteration: 376204
loss: 1.015946626663208,grad_norm: 0.701874564147537, iteration: 376205
loss: 1.0430861711502075,grad_norm: 0.9999997624160487, iteration: 376206
loss: 0.9847851991653442,grad_norm: 0.8595506209775454, iteration: 376207
loss: 0.9820886254310608,grad_norm: 0.7962180914808457, iteration: 376208
loss: 1.0059577226638794,grad_norm: 0.792227018878406, iteration: 376209
loss: 1.0055594444274902,grad_norm: 0.9999993281699154, iteration: 376210
loss: 0.9614839553833008,grad_norm: 0.8122568985311516, iteration: 376211
loss: 0.9887648224830627,grad_norm: 0.6940243327332585, iteration: 376212
loss: 1.0124930143356323,grad_norm: 0.6092660041915992, iteration: 376213
loss: 1.0143027305603027,grad_norm: 0.887389403957375, iteration: 376214
loss: 0.9836994409561157,grad_norm: 0.7434147796195602, iteration: 376215
loss: 0.945327639579773,grad_norm: 0.8826828463242897, iteration: 376216
loss: 0.9418049454689026,grad_norm: 0.8436707192247092, iteration: 376217
loss: 0.9721673130989075,grad_norm: 0.7701196964112373, iteration: 376218
loss: 0.994945228099823,grad_norm: 0.7717776144510509, iteration: 376219
loss: 1.0059552192687988,grad_norm: 0.7428708209910764, iteration: 376220
loss: 0.9799719452857971,grad_norm: 0.7766165432404689, iteration: 376221
loss: 0.9791353344917297,grad_norm: 0.7799315256076799, iteration: 376222
loss: 1.0018378496170044,grad_norm: 0.9999992530199066, iteration: 376223
loss: 1.0405347347259521,grad_norm: 0.9999997508492314, iteration: 376224
loss: 1.0365147590637207,grad_norm: 0.9999991807642782, iteration: 376225
loss: 1.0286120176315308,grad_norm: 0.719094386996441, iteration: 376226
loss: 1.0213613510131836,grad_norm: 0.9999990785062627, iteration: 376227
loss: 0.9809314012527466,grad_norm: 0.740031000646407, iteration: 376228
loss: 1.0488908290863037,grad_norm: 0.9999992050033378, iteration: 376229
loss: 0.9721955060958862,grad_norm: 0.8497090119524627, iteration: 376230
loss: 0.9857134222984314,grad_norm: 0.7595233360890906, iteration: 376231
loss: 1.1059733629226685,grad_norm: 0.7698688860535227, iteration: 376232
loss: 0.979364275932312,grad_norm: 0.9441524115005125, iteration: 376233
loss: 0.9883340001106262,grad_norm: 0.7776011009600239, iteration: 376234
loss: 0.9829939007759094,grad_norm: 0.794844186050309, iteration: 376235
loss: 0.9415649175643921,grad_norm: 0.9172088482044799, iteration: 376236
loss: 0.9923303127288818,grad_norm: 0.8748730904736518, iteration: 376237
loss: 0.9830409288406372,grad_norm: 0.8617237599124201, iteration: 376238
loss: 0.970751166343689,grad_norm: 0.7792133488703666, iteration: 376239
loss: 0.9760946035385132,grad_norm: 0.9046318367805142, iteration: 376240
loss: 1.0103570222854614,grad_norm: 0.7627600225203531, iteration: 376241
loss: 1.0312273502349854,grad_norm: 0.999999679438601, iteration: 376242
loss: 1.1817982196807861,grad_norm: 0.999999262827106, iteration: 376243
loss: 0.9924758076667786,grad_norm: 0.7688635284185841, iteration: 376244
loss: 1.044736385345459,grad_norm: 0.7081133110084589, iteration: 376245
loss: 1.021838665008545,grad_norm: 0.8429199994708031, iteration: 376246
loss: 0.968876302242279,grad_norm: 0.7597847282820399, iteration: 376247
loss: 1.0192358493804932,grad_norm: 0.9999994810222441, iteration: 376248
loss: 0.9628949761390686,grad_norm: 0.8150466754088186, iteration: 376249
loss: 0.9847727417945862,grad_norm: 0.7419584824642612, iteration: 376250
loss: 1.0133153200149536,grad_norm: 0.9146805955199648, iteration: 376251
loss: 1.0509631633758545,grad_norm: 0.9999994622903916, iteration: 376252
loss: 0.9875282049179077,grad_norm: 0.9802786985653603, iteration: 376253
loss: 0.987445592880249,grad_norm: 0.801635747336393, iteration: 376254
loss: 1.0127981901168823,grad_norm: 0.7394380930860293, iteration: 376255
loss: 0.9708225727081299,grad_norm: 0.7816337025207347, iteration: 376256
loss: 1.026899814605713,grad_norm: 0.7071685156162364, iteration: 376257
loss: 0.9607807397842407,grad_norm: 0.9999991773158072, iteration: 376258
loss: 1.013043999671936,grad_norm: 0.9999993520627967, iteration: 376259
loss: 0.9811859130859375,grad_norm: 0.7361366361334574, iteration: 376260
loss: 0.9953581690788269,grad_norm: 0.803871331181646, iteration: 376261
loss: 1.0072227716445923,grad_norm: 0.8929161057826787, iteration: 376262
loss: 1.0154435634613037,grad_norm: 0.7745106702779393, iteration: 376263
loss: 1.0548256635665894,grad_norm: 0.9999994922356794, iteration: 376264
loss: 0.9915209412574768,grad_norm: 0.9099257875988527, iteration: 376265
loss: 1.0103572607040405,grad_norm: 0.8185508979804201, iteration: 376266
loss: 1.0378929376602173,grad_norm: 0.8364145584927754, iteration: 376267
loss: 1.0340205430984497,grad_norm: 0.9999994366178941, iteration: 376268
loss: 1.0298317670822144,grad_norm: 0.7114798292490756, iteration: 376269
loss: 0.935139536857605,grad_norm: 0.7453263981036506, iteration: 376270
loss: 0.9942185282707214,grad_norm: 0.8774416634619919, iteration: 376271
loss: 1.0342395305633545,grad_norm: 0.7763325120137547, iteration: 376272
loss: 0.997759222984314,grad_norm: 0.9214696436940012, iteration: 376273
loss: 1.0056188106536865,grad_norm: 0.8635688321203361, iteration: 376274
loss: 0.9936787486076355,grad_norm: 0.9021007754772754, iteration: 376275
loss: 0.9816516637802124,grad_norm: 0.7480873023626675, iteration: 376276
loss: 1.0006526708602905,grad_norm: 0.7330288655686329, iteration: 376277
loss: 1.0347411632537842,grad_norm: 0.8848456675373039, iteration: 376278
loss: 1.034792184829712,grad_norm: 0.9762497357718317, iteration: 376279
loss: 0.9659119248390198,grad_norm: 0.8445086934029922, iteration: 376280
loss: 1.006125569343567,grad_norm: 0.755311033093194, iteration: 376281
loss: 0.9927456378936768,grad_norm: 0.8151390477541945, iteration: 376282
loss: 0.9886559247970581,grad_norm: 0.7712221346766736, iteration: 376283
loss: 1.0096403360366821,grad_norm: 0.8311213623883167, iteration: 376284
loss: 1.0247172117233276,grad_norm: 0.9153110047059065, iteration: 376285
loss: 1.01994788646698,grad_norm: 0.7094897898076227, iteration: 376286
loss: 0.9835817217826843,grad_norm: 0.8985555279842863, iteration: 376287
loss: 1.028403878211975,grad_norm: 0.8385760045567302, iteration: 376288
loss: 0.9633134603500366,grad_norm: 0.8337201836653864, iteration: 376289
loss: 1.001211166381836,grad_norm: 0.9746674709865363, iteration: 376290
loss: 0.9778733849525452,grad_norm: 0.7882995141059393, iteration: 376291
loss: 1.0136682987213135,grad_norm: 0.9999999001183367, iteration: 376292
loss: 0.9648904800415039,grad_norm: 0.8258923987893925, iteration: 376293
loss: 1.01151704788208,grad_norm: 0.6818208697039494, iteration: 376294
loss: 1.0062276124954224,grad_norm: 0.9999995212005749, iteration: 376295
loss: 0.9973657727241516,grad_norm: 0.7040337186047162, iteration: 376296
loss: 0.9680077433586121,grad_norm: 0.7575822819882414, iteration: 376297
loss: 0.996308445930481,grad_norm: 0.8391136444174871, iteration: 376298
loss: 0.9979400634765625,grad_norm: 0.8215074211023841, iteration: 376299
loss: 1.0260355472564697,grad_norm: 0.8090030653095441, iteration: 376300
loss: 0.9996697306632996,grad_norm: 0.7751348332654749, iteration: 376301
loss: 0.9965161085128784,grad_norm: 0.8392155110341465, iteration: 376302
loss: 1.0186647176742554,grad_norm: 0.8628864788557182, iteration: 376303
loss: 1.0128179788589478,grad_norm: 0.8049240388360436, iteration: 376304
loss: 0.9772797226905823,grad_norm: 0.7019812790933203, iteration: 376305
loss: 0.9777010083198547,grad_norm: 0.7707337441366493, iteration: 376306
loss: 1.0338884592056274,grad_norm: 0.9999992687008815, iteration: 376307
loss: 0.9824231863021851,grad_norm: 0.9999989442347552, iteration: 376308
loss: 1.018051028251648,grad_norm: 0.7836989928190773, iteration: 376309
loss: 1.0431203842163086,grad_norm: 0.9999998795062612, iteration: 376310
loss: 1.0845495462417603,grad_norm: 0.9999992029035439, iteration: 376311
loss: 0.9744463562965393,grad_norm: 0.7473867713668085, iteration: 376312
loss: 1.0058132410049438,grad_norm: 0.9355556094561492, iteration: 376313
loss: 1.0318480730056763,grad_norm: 0.8901197156542131, iteration: 376314
loss: 1.0045325756072998,grad_norm: 0.7363493264175117, iteration: 376315
loss: 1.0019876956939697,grad_norm: 0.9647902045970673, iteration: 376316
loss: 0.997771143913269,grad_norm: 0.7064345508758831, iteration: 376317
loss: 1.0252957344055176,grad_norm: 0.7380470722630585, iteration: 376318
loss: 1.0731687545776367,grad_norm: 0.9999989956978026, iteration: 376319
loss: 0.9667309522628784,grad_norm: 0.5740462418856002, iteration: 376320
loss: 1.0114911794662476,grad_norm: 0.9611718132776347, iteration: 376321
loss: 1.0321826934814453,grad_norm: 0.9999991819161071, iteration: 376322
loss: 0.9853733777999878,grad_norm: 0.9103810592594164, iteration: 376323
loss: 1.0249038934707642,grad_norm: 0.9339787744240003, iteration: 376324
loss: 1.0026638507843018,grad_norm: 0.8490251253288452, iteration: 376325
loss: 1.000722885131836,grad_norm: 0.9424866796133902, iteration: 376326
loss: 0.9891617894172668,grad_norm: 0.7690072703426885, iteration: 376327
loss: 0.9906451106071472,grad_norm: 0.9999995514380503, iteration: 376328
loss: 0.9908626079559326,grad_norm: 0.9999990651603566, iteration: 376329
loss: 1.0171427726745605,grad_norm: 0.6927950278243266, iteration: 376330
loss: 1.035626769065857,grad_norm: 0.8170793078543424, iteration: 376331
loss: 0.9904583096504211,grad_norm: 0.8644254497940812, iteration: 376332
loss: 0.9733649492263794,grad_norm: 0.7321438212181385, iteration: 376333
loss: 1.0146559476852417,grad_norm: 0.999999100446466, iteration: 376334
loss: 0.96382075548172,grad_norm: 0.826406045550725, iteration: 376335
loss: 1.005667805671692,grad_norm: 0.7339933466018347, iteration: 376336
loss: 0.9945334792137146,grad_norm: 0.8753872459664314, iteration: 376337
loss: 0.983536422252655,grad_norm: 0.7744950368315363, iteration: 376338
loss: 0.9931179881095886,grad_norm: 0.7077445554247285, iteration: 376339
loss: 1.0079338550567627,grad_norm: 0.7349583256006924, iteration: 376340
loss: 0.9793967604637146,grad_norm: 0.9464313037586174, iteration: 376341
loss: 0.9996327757835388,grad_norm: 0.766164988525399, iteration: 376342
loss: 0.9904099106788635,grad_norm: 0.9280901658140243, iteration: 376343
loss: 1.0202457904815674,grad_norm: 0.8005915445340528, iteration: 376344
loss: 0.9957088828086853,grad_norm: 0.9026993103888518, iteration: 376345
loss: 0.972197413444519,grad_norm: 0.7559377242262527, iteration: 376346
loss: 0.9776037335395813,grad_norm: 0.9821859072079492, iteration: 376347
loss: 0.9943000674247742,grad_norm: 0.8429676758414374, iteration: 376348
loss: 1.0136821269989014,grad_norm: 0.9120383047524568, iteration: 376349
loss: 1.001084566116333,grad_norm: 0.6924533252525465, iteration: 376350
loss: 0.9733946919441223,grad_norm: 0.8773934975290015, iteration: 376351
loss: 0.9873285293579102,grad_norm: 0.7621048424212065, iteration: 376352
loss: 0.9924779534339905,grad_norm: 0.6858170623339654, iteration: 376353
loss: 1.0157129764556885,grad_norm: 0.8180983328867075, iteration: 376354
loss: 1.0057225227355957,grad_norm: 0.7006655504764235, iteration: 376355
loss: 1.0675065517425537,grad_norm: 0.7263979700749116, iteration: 376356
loss: 1.0040318965911865,grad_norm: 0.7832349450691186, iteration: 376357
loss: 1.0218846797943115,grad_norm: 0.7699123654218472, iteration: 376358
loss: 1.0377659797668457,grad_norm: 0.724855289222438, iteration: 376359
loss: 1.0092610120773315,grad_norm: 0.7162856129732222, iteration: 376360
loss: 0.9951258301734924,grad_norm: 0.9301985334533627, iteration: 376361
loss: 0.9867021441459656,grad_norm: 0.8380231015914686, iteration: 376362
loss: 0.9921479225158691,grad_norm: 0.8941219218545227, iteration: 376363
loss: 0.9992341995239258,grad_norm: 0.7853805875442004, iteration: 376364
loss: 0.9665139317512512,grad_norm: 0.8501848632730469, iteration: 376365
loss: 1.0012112855911255,grad_norm: 0.9999992118608148, iteration: 376366
loss: 0.9647346138954163,grad_norm: 0.7856056814874606, iteration: 376367
loss: 0.9740560054779053,grad_norm: 0.8193823332752166, iteration: 376368
loss: 1.0244905948638916,grad_norm: 0.7749096660879808, iteration: 376369
loss: 1.0436453819274902,grad_norm: 0.9154155959102386, iteration: 376370
loss: 1.0261943340301514,grad_norm: 0.9999995174332619, iteration: 376371
loss: 0.9487878680229187,grad_norm: 0.7934682615796477, iteration: 376372
loss: 0.991845428943634,grad_norm: 0.8698037212299304, iteration: 376373
loss: 0.9947242140769958,grad_norm: 0.7334230200270773, iteration: 376374
loss: 1.0226918458938599,grad_norm: 0.9999991103200886, iteration: 376375
loss: 1.006719708442688,grad_norm: 0.9999990635789531, iteration: 376376
loss: 0.9587904214859009,grad_norm: 0.7243311294883067, iteration: 376377
loss: 0.9789713621139526,grad_norm: 0.8905780976848786, iteration: 376378
loss: 0.9558095335960388,grad_norm: 0.7437490380802738, iteration: 376379
loss: 0.94241863489151,grad_norm: 0.8162103806659231, iteration: 376380
loss: 1.0176200866699219,grad_norm: 0.8763363277761105, iteration: 376381
loss: 0.9974817037582397,grad_norm: 0.9999990727721247, iteration: 376382
loss: 1.0320838689804077,grad_norm: 0.8270578312329869, iteration: 376383
loss: 1.0028362274169922,grad_norm: 0.7870089861465992, iteration: 376384
loss: 1.142061471939087,grad_norm: 0.9999997854033226, iteration: 376385
loss: 1.0172970294952393,grad_norm: 0.8808923302463488, iteration: 376386
loss: 0.960192859172821,grad_norm: 0.7230639108319871, iteration: 376387
loss: 0.989016056060791,grad_norm: 0.7806089167632378, iteration: 376388
loss: 0.9838653802871704,grad_norm: 0.7851833530008664, iteration: 376389
loss: 0.9948727488517761,grad_norm: 0.8369455896066396, iteration: 376390
loss: 1.0107694864273071,grad_norm: 0.8914160808306175, iteration: 376391
loss: 1.0083156824111938,grad_norm: 0.7949049475051966, iteration: 376392
loss: 0.9864330291748047,grad_norm: 0.5955349068337356, iteration: 376393
loss: 1.0286155939102173,grad_norm: 0.9173112012012749, iteration: 376394
loss: 1.0140032768249512,grad_norm: 0.8288736894625126, iteration: 376395
loss: 0.9869717359542847,grad_norm: 0.7818900077210449, iteration: 376396
loss: 1.03925359249115,grad_norm: 0.9999990581101397, iteration: 376397
loss: 1.0920531749725342,grad_norm: 0.959226259295502, iteration: 376398
loss: 1.0463775396347046,grad_norm: 0.8953446942846381, iteration: 376399
loss: 0.9885149002075195,grad_norm: 0.9999990901222016, iteration: 376400
loss: 1.00840425491333,grad_norm: 0.68862423350948, iteration: 376401
loss: 1.0042479038238525,grad_norm: 0.8394234119848604, iteration: 376402
loss: 0.9852998852729797,grad_norm: 0.871898699400769, iteration: 376403
loss: 1.0124484300613403,grad_norm: 0.6790037441984459, iteration: 376404
loss: 1.013809323310852,grad_norm: 0.6738820795639001, iteration: 376405
loss: 1.0450022220611572,grad_norm: 0.7846996688756155, iteration: 376406
loss: 1.0041816234588623,grad_norm: 0.7924262057074634, iteration: 376407
loss: 0.9859537482261658,grad_norm: 0.9999994392077004, iteration: 376408
loss: 1.0122748613357544,grad_norm: 0.9608970123492225, iteration: 376409
loss: 1.0101664066314697,grad_norm: 0.9416701135859933, iteration: 376410
loss: 1.0091477632522583,grad_norm: 0.8693451620269955, iteration: 376411
loss: 0.9977097511291504,grad_norm: 0.8267419224823906, iteration: 376412
loss: 0.9933395385742188,grad_norm: 0.7361532711435698, iteration: 376413
loss: 0.9768214225769043,grad_norm: 0.8583468933621786, iteration: 376414
loss: 0.9953462481498718,grad_norm: 0.7918086247975425, iteration: 376415
loss: 0.9864152073860168,grad_norm: 0.8992219407875306, iteration: 376416
loss: 1.0130740404129028,grad_norm: 0.7467980592562284, iteration: 376417
loss: 0.9615491032600403,grad_norm: 0.8358221601536112, iteration: 376418
loss: 1.123490571975708,grad_norm: 0.9999998441340368, iteration: 376419
loss: 1.0175622701644897,grad_norm: 0.8447314840819097, iteration: 376420
loss: 0.9892904162406921,grad_norm: 0.8906720170787806, iteration: 376421
loss: 1.0388352870941162,grad_norm: 0.6727006491802022, iteration: 376422
loss: 1.0408958196640015,grad_norm: 0.793360451716905, iteration: 376423
loss: 0.9785035848617554,grad_norm: 0.7111548733808419, iteration: 376424
loss: 0.9910026788711548,grad_norm: 0.7878379261204354, iteration: 376425
loss: 0.9643204212188721,grad_norm: 0.8047674985530499, iteration: 376426
loss: 0.9991726875305176,grad_norm: 0.7393329346430777, iteration: 376427
loss: 1.0307741165161133,grad_norm: 0.9595148177272744, iteration: 376428
loss: 1.0243746042251587,grad_norm: 0.8684695593826142, iteration: 376429
loss: 0.9690831899642944,grad_norm: 0.7088145530263485, iteration: 376430
loss: 1.0052762031555176,grad_norm: 0.8014969136555717, iteration: 376431
loss: 1.0646474361419678,grad_norm: 0.8389929312375962, iteration: 376432
loss: 0.9742936491966248,grad_norm: 0.874170513740251, iteration: 376433
loss: 0.9782869219779968,grad_norm: 0.8800457501403357, iteration: 376434
loss: 1.022699236869812,grad_norm: 0.8296244749248203, iteration: 376435
loss: 1.009159803390503,grad_norm: 0.9999999373852996, iteration: 376436
loss: 0.9630556106567383,grad_norm: 0.8154356006042145, iteration: 376437
loss: 0.974730908870697,grad_norm: 0.8126093896781064, iteration: 376438
loss: 0.9997609257698059,grad_norm: 0.8207850340638608, iteration: 376439
loss: 1.010880947113037,grad_norm: 0.967963002099307, iteration: 376440
loss: 1.0144459009170532,grad_norm: 0.8376484199481359, iteration: 376441
loss: 0.975008487701416,grad_norm: 0.8270096477738201, iteration: 376442
loss: 0.9616711139678955,grad_norm: 0.8208251390986164, iteration: 376443
loss: 0.9756284952163696,grad_norm: 0.9999991143263987, iteration: 376444
loss: 1.0467970371246338,grad_norm: 0.9999994844170276, iteration: 376445
loss: 0.9892550110816956,grad_norm: 0.8631648525333258, iteration: 376446
loss: 0.9985285401344299,grad_norm: 0.9189892516809884, iteration: 376447
loss: 1.0365464687347412,grad_norm: 0.9999989779386714, iteration: 376448
loss: 1.0708558559417725,grad_norm: 0.9999999405356366, iteration: 376449
loss: 1.011530876159668,grad_norm: 0.6619925468221505, iteration: 376450
loss: 1.0043209791183472,grad_norm: 0.8500415595510199, iteration: 376451
loss: 0.9916855692863464,grad_norm: 0.999999723852071, iteration: 376452
loss: 0.9778594970703125,grad_norm: 0.8747511116038823, iteration: 376453
loss: 1.0668764114379883,grad_norm: 0.9481429526735151, iteration: 376454
loss: 1.036558747291565,grad_norm: 0.8452260561975737, iteration: 376455
loss: 1.503416895866394,grad_norm: 0.9999998454888157, iteration: 376456
loss: 1.3285378217697144,grad_norm: 0.9999996813073737, iteration: 376457
loss: 0.9603551030158997,grad_norm: 0.7283265490593175, iteration: 376458
loss: 1.1725469827651978,grad_norm: 0.9999998182300015, iteration: 376459
loss: 1.0012134313583374,grad_norm: 0.8401371791669947, iteration: 376460
loss: 1.0066320896148682,grad_norm: 0.9252363624194131, iteration: 376461
loss: 1.0190341472625732,grad_norm: 0.999999845270814, iteration: 376462
loss: 1.3048350811004639,grad_norm: 0.999999864730491, iteration: 376463
loss: 1.0116641521453857,grad_norm: 0.7245509736216129, iteration: 376464
loss: 1.007657766342163,grad_norm: 0.7705275277658052, iteration: 376465
loss: 0.9597448110580444,grad_norm: 0.9890356028625495, iteration: 376466
loss: 1.0084502696990967,grad_norm: 0.9999991777017189, iteration: 376467
loss: 0.9984011650085449,grad_norm: 0.8094976983874042, iteration: 376468
loss: 0.975875973701477,grad_norm: 0.7981747283420738, iteration: 376469
loss: 1.0484539270401,grad_norm: 0.9999994684516355, iteration: 376470
loss: 1.0037561655044556,grad_norm: 1.0000000068302806, iteration: 376471
loss: 0.9741947054862976,grad_norm: 0.9543337023698544, iteration: 376472
loss: 1.007530927658081,grad_norm: 0.999999812712976, iteration: 376473
loss: 1.0249722003936768,grad_norm: 0.9999993612642609, iteration: 376474
loss: 0.9833058714866638,grad_norm: 0.7731875221252854, iteration: 376475
loss: 1.0110085010528564,grad_norm: 0.7516419141456913, iteration: 376476
loss: 1.0036810636520386,grad_norm: 0.9589331511535943, iteration: 376477
loss: 0.9803167581558228,grad_norm: 0.7668311716793308, iteration: 376478
loss: 0.9798942804336548,grad_norm: 0.9999999904663682, iteration: 376479
loss: 0.9945085048675537,grad_norm: 0.7324158273710523, iteration: 376480
loss: 1.0329444408416748,grad_norm: 0.7296034312475641, iteration: 376481
loss: 1.00124192237854,grad_norm: 0.9104261765552186, iteration: 376482
loss: 1.0075271129608154,grad_norm: 0.99999924504921, iteration: 376483
loss: 0.9804671406745911,grad_norm: 0.6863967068801714, iteration: 376484
loss: 1.0419055223464966,grad_norm: 0.8737798090934548, iteration: 376485
loss: 0.9985257983207703,grad_norm: 0.8066885921933836, iteration: 376486
loss: 1.0987621545791626,grad_norm: 0.9999997886895939, iteration: 376487
loss: 0.9995418787002563,grad_norm: 0.6889488693264423, iteration: 376488
loss: 1.0538837909698486,grad_norm: 0.8131048388983859, iteration: 376489
loss: 0.9847617745399475,grad_norm: 0.7528606842957245, iteration: 376490
loss: 0.967000424861908,grad_norm: 0.7095483810665656, iteration: 376491
loss: 1.0202056169509888,grad_norm: 0.8644787482769454, iteration: 376492
loss: 1.0115690231323242,grad_norm: 0.7742978716541314, iteration: 376493
loss: 1.0149505138397217,grad_norm: 0.7585133325225568, iteration: 376494
loss: 1.0107086896896362,grad_norm: 0.831020499260965, iteration: 376495
loss: 1.053696870803833,grad_norm: 1.0000001006642116, iteration: 376496
loss: 1.0365341901779175,grad_norm: 0.9999993334521204, iteration: 376497
loss: 0.9778417348861694,grad_norm: 0.8071557889299049, iteration: 376498
loss: 0.9659820795059204,grad_norm: 0.7463598743073497, iteration: 376499
loss: 0.9908949136734009,grad_norm: 0.7545068449257931, iteration: 376500
loss: 0.9924502372741699,grad_norm: 0.9046700632032079, iteration: 376501
loss: 0.9633354544639587,grad_norm: 0.7393278484403953, iteration: 376502
loss: 1.3786436319351196,grad_norm: 0.9999997299624215, iteration: 376503
loss: 0.9914582371711731,grad_norm: 0.7474641381767667, iteration: 376504
loss: 0.9715746641159058,grad_norm: 0.9999992307971142, iteration: 376505
loss: 0.9925571084022522,grad_norm: 0.8913035017675728, iteration: 376506
loss: 0.9802262783050537,grad_norm: 0.8281096542857276, iteration: 376507
loss: 1.0091204643249512,grad_norm: 0.7358749502343516, iteration: 376508
loss: 0.990014374256134,grad_norm: 0.7399929081347418, iteration: 376509
loss: 0.973399817943573,grad_norm: 0.9999992774865836, iteration: 376510
loss: 1.0408194065093994,grad_norm: 0.6951187514527549, iteration: 376511
loss: 1.0073715448379517,grad_norm: 0.7115698254315762, iteration: 376512
loss: 1.0165550708770752,grad_norm: 0.9966192895668842, iteration: 376513
loss: 0.9798864126205444,grad_norm: 0.8162405035702494, iteration: 376514
loss: 0.9625421166419983,grad_norm: 0.8995989473561495, iteration: 376515
loss: 1.015952229499817,grad_norm: 0.9209716114199825, iteration: 376516
loss: 0.9835813045501709,grad_norm: 0.9999994012587213, iteration: 376517
loss: 0.9854426383972168,grad_norm: 0.6932067165622328, iteration: 376518
loss: 1.0022125244140625,grad_norm: 0.7517418176669006, iteration: 376519
loss: 0.9878367185592651,grad_norm: 0.7757278192127546, iteration: 376520
loss: 0.9969441890716553,grad_norm: 0.999999175796867, iteration: 376521
loss: 1.0169236660003662,grad_norm: 0.7031389584514058, iteration: 376522
loss: 1.0204522609710693,grad_norm: 0.8566540699475302, iteration: 376523
loss: 1.0046018362045288,grad_norm: 0.9999992536775231, iteration: 376524
loss: 1.0200914144515991,grad_norm: 0.9372900391529138, iteration: 376525
loss: 0.9950218796730042,grad_norm: 0.7715600947226404, iteration: 376526
loss: 0.997707188129425,grad_norm: 0.9999995243507538, iteration: 376527
loss: 0.9970958232879639,grad_norm: 0.7232909094286497, iteration: 376528
loss: 0.9825944304466248,grad_norm: 0.6939823979443714, iteration: 376529
loss: 0.9704084396362305,grad_norm: 0.9999999490277683, iteration: 376530
loss: 1.0056506395339966,grad_norm: 0.7500471185853194, iteration: 376531
loss: 1.0169745683670044,grad_norm: 0.9999990730142555, iteration: 376532
loss: 0.9964209198951721,grad_norm: 0.8539893407445032, iteration: 376533
loss: 1.002076268196106,grad_norm: 0.9053805711550788, iteration: 376534
loss: 1.0006839036941528,grad_norm: 0.7029697091994046, iteration: 376535
loss: 1.0155519247055054,grad_norm: 0.8970946130678301, iteration: 376536
loss: 0.9967444539070129,grad_norm: 0.7908383362446212, iteration: 376537
loss: 1.0125130414962769,grad_norm: 0.9999999030067135, iteration: 376538
loss: 1.0408810377120972,grad_norm: 0.8576746148046837, iteration: 376539
loss: 0.9935988783836365,grad_norm: 0.8521935382883563, iteration: 376540
loss: 0.9994403123855591,grad_norm: 0.876438952558715, iteration: 376541
loss: 0.9792798161506653,grad_norm: 0.8569605626220137, iteration: 376542
loss: 1.0148836374282837,grad_norm: 0.8312833315330758, iteration: 376543
loss: 0.9573460221290588,grad_norm: 0.8468052634933183, iteration: 376544
loss: 1.0170378684997559,grad_norm: 0.6703231307500376, iteration: 376545
loss: 0.9635259509086609,grad_norm: 0.7871561042216568, iteration: 376546
loss: 1.0092440843582153,grad_norm: 0.9999996711698415, iteration: 376547
loss: 0.9646845459938049,grad_norm: 0.7763691683956224, iteration: 376548
loss: 0.9892284274101257,grad_norm: 0.7934620774475024, iteration: 376549
loss: 0.9566531777381897,grad_norm: 0.7830303026475022, iteration: 376550
loss: 1.0067875385284424,grad_norm: 0.9932782776407117, iteration: 376551
loss: 0.9663422703742981,grad_norm: 0.7518050383780284, iteration: 376552
loss: 0.9616667032241821,grad_norm: 0.8492471259836375, iteration: 376553
loss: 0.9990056157112122,grad_norm: 0.7480441371145131, iteration: 376554
loss: 0.9722053408622742,grad_norm: 0.7040477081713248, iteration: 376555
loss: 0.9616003036499023,grad_norm: 0.8523657578475182, iteration: 376556
loss: 1.0035754442214966,grad_norm: 0.7684433832101912, iteration: 376557
loss: 1.0452864170074463,grad_norm: 0.8204778326853113, iteration: 376558
loss: 1.0212668180465698,grad_norm: 0.794887877509372, iteration: 376559
loss: 1.0301625728607178,grad_norm: 0.8193059742495815, iteration: 376560
loss: 0.9947549104690552,grad_norm: 0.9636958879543238, iteration: 376561
loss: 0.9739818572998047,grad_norm: 0.7839690305633136, iteration: 376562
loss: 1.03182053565979,grad_norm: 0.8175959269783296, iteration: 376563
loss: 0.9980911016464233,grad_norm: 0.9874226765835651, iteration: 376564
loss: 0.9747185707092285,grad_norm: 0.7509333794361968, iteration: 376565
loss: 1.0087848901748657,grad_norm: 0.9906324633395492, iteration: 376566
loss: 0.9924384951591492,grad_norm: 0.8164672828887638, iteration: 376567
loss: 1.0269651412963867,grad_norm: 0.8292372803373146, iteration: 376568
loss: 1.0475318431854248,grad_norm: 0.8950232172496205, iteration: 376569
loss: 1.018614649772644,grad_norm: 0.8398302046346808, iteration: 376570
loss: 1.0564062595367432,grad_norm: 0.9999996635911824, iteration: 376571
loss: 0.9855194091796875,grad_norm: 0.7735153543391926, iteration: 376572
loss: 1.024922490119934,grad_norm: 0.7468336205840124, iteration: 376573
loss: 1.0130360126495361,grad_norm: 0.762297807629894, iteration: 376574
loss: 1.0078520774841309,grad_norm: 0.7964691631462149, iteration: 376575
loss: 1.0103604793548584,grad_norm: 0.7183612500111329, iteration: 376576
loss: 0.9867830276489258,grad_norm: 0.9171503236156946, iteration: 376577
loss: 1.0200361013412476,grad_norm: 0.6501577037960398, iteration: 376578
loss: 0.9976071715354919,grad_norm: 0.6706276246794929, iteration: 376579
loss: 0.9837486147880554,grad_norm: 0.7806806683682763, iteration: 376580
loss: 0.9932926893234253,grad_norm: 0.8673730407270496, iteration: 376581
loss: 1.0394519567489624,grad_norm: 0.7503877350609991, iteration: 376582
loss: 0.9950049519538879,grad_norm: 0.8920113962207465, iteration: 376583
loss: 0.9695674777030945,grad_norm: 0.9999991063635132, iteration: 376584
loss: 0.9930960536003113,grad_norm: 0.7429740598370881, iteration: 376585
loss: 0.9812359809875488,grad_norm: 0.8478616290653385, iteration: 376586
loss: 1.001660704612732,grad_norm: 0.9999991967849575, iteration: 376587
loss: 1.009568691253662,grad_norm: 0.9402362549500751, iteration: 376588
loss: 0.973135232925415,grad_norm: 0.753919812361264, iteration: 376589
loss: 0.9849178791046143,grad_norm: 0.6334741286040593, iteration: 376590
loss: 1.02572500705719,grad_norm: 0.9999993435042815, iteration: 376591
loss: 0.9598473906517029,grad_norm: 0.7625368570619375, iteration: 376592
loss: 0.9864133596420288,grad_norm: 0.6948705095524578, iteration: 376593
loss: 0.9665642380714417,grad_norm: 0.7663756634196152, iteration: 376594
loss: 0.9943851232528687,grad_norm: 0.8397411482638598, iteration: 376595
loss: 0.991832971572876,grad_norm: 0.9038629867369031, iteration: 376596
loss: 0.9767184257507324,grad_norm: 0.9523709447480371, iteration: 376597
loss: 1.005009412765503,grad_norm: 0.8570890439714288, iteration: 376598
loss: 1.0422077178955078,grad_norm: 0.8334031901222783, iteration: 376599
loss: 0.9937831163406372,grad_norm: 0.6644996847438203, iteration: 376600
loss: 0.995181143283844,grad_norm: 0.8658071043320248, iteration: 376601
loss: 0.9915270805358887,grad_norm: 0.7998364769498916, iteration: 376602
loss: 1.0248867273330688,grad_norm: 0.7847776107203519, iteration: 376603
loss: 1.0177793502807617,grad_norm: 0.8587911760981494, iteration: 376604
loss: 1.0317264795303345,grad_norm: 0.9999991243178565, iteration: 376605
loss: 1.015580415725708,grad_norm: 0.7995913680906854, iteration: 376606
loss: 1.026469111442566,grad_norm: 0.917718885591584, iteration: 376607
loss: 1.007125735282898,grad_norm: 0.8657651998628058, iteration: 376608
loss: 0.9449009895324707,grad_norm: 0.7876126668977034, iteration: 376609
loss: 1.0214743614196777,grad_norm: 0.9998831341866686, iteration: 376610
loss: 0.9838308095932007,grad_norm: 0.7285328127693069, iteration: 376611
loss: 1.0340341329574585,grad_norm: 0.9999991874396245, iteration: 376612
loss: 0.9874308705329895,grad_norm: 0.7028267278251418, iteration: 376613
loss: 0.9859864711761475,grad_norm: 0.786884775141837, iteration: 376614
loss: 1.0070626735687256,grad_norm: 0.8418970847073344, iteration: 376615
loss: 1.0218459367752075,grad_norm: 0.674640112613595, iteration: 376616
loss: 0.9776002764701843,grad_norm: 0.8080307684722972, iteration: 376617
loss: 1.0447417497634888,grad_norm: 0.9999993656342271, iteration: 376618
loss: 1.0079950094223022,grad_norm: 0.6899425663989431, iteration: 376619
loss: 1.0623364448547363,grad_norm: 0.7560328669266183, iteration: 376620
loss: 1.0180466175079346,grad_norm: 0.8020029811003377, iteration: 376621
loss: 1.0006173849105835,grad_norm: 0.6805710195760836, iteration: 376622
loss: 0.9927100539207458,grad_norm: 0.7022642366016877, iteration: 376623
loss: 1.0143139362335205,grad_norm: 0.8019784549527696, iteration: 376624
loss: 1.0433725118637085,grad_norm: 0.9033232892990471, iteration: 376625
loss: 1.002358078956604,grad_norm: 0.801025420745043, iteration: 376626
loss: 0.981195867061615,grad_norm: 0.9597139403716983, iteration: 376627
loss: 0.9922937154769897,grad_norm: 0.810974314254454, iteration: 376628
loss: 1.0177875757217407,grad_norm: 0.8120082514940408, iteration: 376629
loss: 0.9853121638298035,grad_norm: 0.7662408554936937, iteration: 376630
loss: 0.9813282489776611,grad_norm: 0.722464495488735, iteration: 376631
loss: 0.9981470704078674,grad_norm: 0.798568855964114, iteration: 376632
loss: 1.0303921699523926,grad_norm: 0.9999993713147541, iteration: 376633
loss: 0.9568601250648499,grad_norm: 0.7828270562502958, iteration: 376634
loss: 1.06398606300354,grad_norm: 0.89727151645021, iteration: 376635
loss: 0.9951845407485962,grad_norm: 0.858844253202376, iteration: 376636
loss: 0.9625040888786316,grad_norm: 0.7641432476897623, iteration: 376637
loss: 1.0103754997253418,grad_norm: 0.8098576256121601, iteration: 376638
loss: 1.0103561878204346,grad_norm: 0.8933965540587282, iteration: 376639
loss: 0.9879594445228577,grad_norm: 0.9471184992013917, iteration: 376640
loss: 0.9777944684028625,grad_norm: 0.9999990914916229, iteration: 376641
loss: 0.9888745546340942,grad_norm: 0.7700018911863483, iteration: 376642
loss: 1.000815987586975,grad_norm: 0.7776082523099712, iteration: 376643
loss: 0.9727980494499207,grad_norm: 0.8777881642987545, iteration: 376644
loss: 0.9931731820106506,grad_norm: 0.8772210630186065, iteration: 376645
loss: 0.9582741260528564,grad_norm: 0.8148720719668734, iteration: 376646
loss: 1.023858904838562,grad_norm: 0.9999991858500116, iteration: 376647
loss: 0.9833343625068665,grad_norm: 0.8134106132863633, iteration: 376648
loss: 1.0154272317886353,grad_norm: 0.8441756671858186, iteration: 376649
loss: 0.9824280738830566,grad_norm: 0.746760999539083, iteration: 376650
loss: 0.96559739112854,grad_norm: 0.9999999082829452, iteration: 376651
loss: 0.9808244705200195,grad_norm: 0.9323021427604566, iteration: 376652
loss: 0.9816288948059082,grad_norm: 0.7644044568382082, iteration: 376653
loss: 0.989704966545105,grad_norm: 0.8349115729492106, iteration: 376654
loss: 0.9798964262008667,grad_norm: 0.717418228520335, iteration: 376655
loss: 1.0151033401489258,grad_norm: 0.7079899840606434, iteration: 376656
loss: 0.9926064610481262,grad_norm: 0.905104689122283, iteration: 376657
loss: 0.9907084703445435,grad_norm: 0.6915801752666525, iteration: 376658
loss: 0.9805230498313904,grad_norm: 0.7625493175491833, iteration: 376659
loss: 1.020597219467163,grad_norm: 0.9027081369038279, iteration: 376660
loss: 1.00215482711792,grad_norm: 0.7672893903365102, iteration: 376661
loss: 0.9604460000991821,grad_norm: 0.8423952613072956, iteration: 376662
loss: 0.9918361902236938,grad_norm: 0.6122374129200897, iteration: 376663
loss: 1.0225335359573364,grad_norm: 0.8383073472630371, iteration: 376664
loss: 1.0111403465270996,grad_norm: 0.7559908124141437, iteration: 376665
loss: 1.018517017364502,grad_norm: 0.765186724776388, iteration: 376666
loss: 0.9720022678375244,grad_norm: 0.8980343836367255, iteration: 376667
loss: 0.9677200317382812,grad_norm: 0.9467098003356008, iteration: 376668
loss: 0.9879515767097473,grad_norm: 0.8674825022469642, iteration: 376669
loss: 0.9992524981498718,grad_norm: 0.8075099646307173, iteration: 376670
loss: 1.0023431777954102,grad_norm: 0.9162131659348104, iteration: 376671
loss: 1.0054608583450317,grad_norm: 0.6705447676824318, iteration: 376672
loss: 1.028765320777893,grad_norm: 0.9411514411211546, iteration: 376673
loss: 1.0122816562652588,grad_norm: 0.7152084924886806, iteration: 376674
loss: 1.0263614654541016,grad_norm: 0.9999990245564069, iteration: 376675
loss: 1.0018537044525146,grad_norm: 0.73143739271313, iteration: 376676
loss: 0.9994479417800903,grad_norm: 0.9333722451391928, iteration: 376677
loss: 1.0205947160720825,grad_norm: 0.7918406112536281, iteration: 376678
loss: 0.9712195992469788,grad_norm: 0.7648454772540464, iteration: 376679
loss: 1.0043649673461914,grad_norm: 0.834668987843433, iteration: 376680
loss: 0.9902237057685852,grad_norm: 0.7826321927281151, iteration: 376681
loss: 0.9964517951011658,grad_norm: 0.9126065389746209, iteration: 376682
loss: 0.9855004549026489,grad_norm: 0.7160275552751101, iteration: 376683
loss: 0.9905474781990051,grad_norm: 0.8386836680625864, iteration: 376684
loss: 0.9857431054115295,grad_norm: 0.8845374075484838, iteration: 376685
loss: 1.0151052474975586,grad_norm: 0.8204094857983624, iteration: 376686
loss: 1.0007295608520508,grad_norm: 0.8011430403594587, iteration: 376687
loss: 0.9730901718139648,grad_norm: 0.7473931955874349, iteration: 376688
loss: 1.0281792879104614,grad_norm: 0.8647127179131382, iteration: 376689
loss: 0.9857259392738342,grad_norm: 0.6784242261418528, iteration: 376690
loss: 1.0055468082427979,grad_norm: 0.999999000551933, iteration: 376691
loss: 1.0206046104431152,grad_norm: 0.9647407991985157, iteration: 376692
loss: 0.9654290080070496,grad_norm: 0.7173380902391167, iteration: 376693
loss: 0.9903566241264343,grad_norm: 0.8210391589563414, iteration: 376694
loss: 1.0507276058197021,grad_norm: 0.9999995204879596, iteration: 376695
loss: 1.0166960954666138,grad_norm: 0.6561844567602942, iteration: 376696
loss: 1.0201138257980347,grad_norm: 0.9999994342873972, iteration: 376697
loss: 1.0062566995620728,grad_norm: 0.6311976484521681, iteration: 376698
loss: 1.0260182619094849,grad_norm: 0.9398573065731756, iteration: 376699
loss: 1.0149489641189575,grad_norm: 0.9884820325549699, iteration: 376700
loss: 1.0034955739974976,grad_norm: 0.918086992927813, iteration: 376701
loss: 0.9949021339416504,grad_norm: 0.6836740151308737, iteration: 376702
loss: 1.0262492895126343,grad_norm: 0.8934657198831416, iteration: 376703
loss: 0.9799840450286865,grad_norm: 0.8896578214765835, iteration: 376704
loss: 0.9965720772743225,grad_norm: 0.8179013666697998, iteration: 376705
loss: 1.0199605226516724,grad_norm: 0.7883387721228452, iteration: 376706
loss: 0.9962646961212158,grad_norm: 0.9091055454896712, iteration: 376707
loss: 1.0460933446884155,grad_norm: 0.9830684809910092, iteration: 376708
loss: 1.0032634735107422,grad_norm: 0.8479756204300031, iteration: 376709
loss: 1.022411823272705,grad_norm: 0.7592330474651754, iteration: 376710
loss: 1.0053434371948242,grad_norm: 0.7881102877319419, iteration: 376711
loss: 1.030909776687622,grad_norm: 0.941558444033188, iteration: 376712
loss: 1.0137871503829956,grad_norm: 0.7866710994050974, iteration: 376713
loss: 1.0071138143539429,grad_norm: 0.6963287200942981, iteration: 376714
loss: 1.0146125555038452,grad_norm: 0.8656465639618616, iteration: 376715
loss: 0.9677438139915466,grad_norm: 0.7736072289644418, iteration: 376716
loss: 1.0207048654556274,grad_norm: 0.7231992723353172, iteration: 376717
loss: 1.0126173496246338,grad_norm: 0.8445260658885042, iteration: 376718
loss: 0.9968105554580688,grad_norm: 0.8453501924260236, iteration: 376719
loss: 1.005768060684204,grad_norm: 0.7194284614707416, iteration: 376720
loss: 0.9701606631278992,grad_norm: 0.8003203962799416, iteration: 376721
loss: 0.9975094795227051,grad_norm: 0.6894366941973202, iteration: 376722
loss: 0.9893948435783386,grad_norm: 0.6711177270281178, iteration: 376723
loss: 1.0018835067749023,grad_norm: 0.9999990570343813, iteration: 376724
loss: 1.0211533308029175,grad_norm: 0.7475781663009592, iteration: 376725
loss: 1.0222495794296265,grad_norm: 0.7423263331824441, iteration: 376726
loss: 0.9778543710708618,grad_norm: 0.8478134962739097, iteration: 376727
loss: 1.0079728364944458,grad_norm: 0.8093403881275415, iteration: 376728
loss: 0.9851915836334229,grad_norm: 0.9372696087949872, iteration: 376729
loss: 1.0098716020584106,grad_norm: 0.7779446894077386, iteration: 376730
loss: 1.0258898735046387,grad_norm: 0.8591839325252334, iteration: 376731
loss: 0.9769672155380249,grad_norm: 0.8785245547841504, iteration: 376732
loss: 0.9922990202903748,grad_norm: 0.7626457129431496, iteration: 376733
loss: 1.0274056196212769,grad_norm: 0.7939666968083272, iteration: 376734
loss: 0.953583836555481,grad_norm: 0.7852168321580714, iteration: 376735
loss: 1.0127511024475098,grad_norm: 0.9999996713139787, iteration: 376736
loss: 1.00141179561615,grad_norm: 0.719627938067755, iteration: 376737
loss: 0.9978686571121216,grad_norm: 0.8681521182736562, iteration: 376738
loss: 0.9902546405792236,grad_norm: 0.7805286117501898, iteration: 376739
loss: 1.0380642414093018,grad_norm: 0.6911025578835065, iteration: 376740
loss: 1.0554865598678589,grad_norm: 0.7281516916394395, iteration: 376741
loss: 0.9923194646835327,grad_norm: 0.6767201721882121, iteration: 376742
loss: 0.9858273267745972,grad_norm: 0.7739446025248797, iteration: 376743
loss: 1.0298514366149902,grad_norm: 0.808213983757837, iteration: 376744
loss: 0.9817867279052734,grad_norm: 0.8580305742206447, iteration: 376745
loss: 1.0032199621200562,grad_norm: 0.8605609101514143, iteration: 376746
loss: 0.9802465438842773,grad_norm: 0.9261268793674992, iteration: 376747
loss: 1.0330840349197388,grad_norm: 0.8174726205314857, iteration: 376748
loss: 0.9835707545280457,grad_norm: 0.7801214558708108, iteration: 376749
loss: 1.0173643827438354,grad_norm: 0.895422363764895, iteration: 376750
loss: 0.9816363453865051,grad_norm: 0.727261256378765, iteration: 376751
loss: 0.9657183885574341,grad_norm: 0.7317667165880605, iteration: 376752
loss: 1.0907173156738281,grad_norm: 0.9999992202846603, iteration: 376753
loss: 0.9931219220161438,grad_norm: 0.9285194376955282, iteration: 376754
loss: 0.9478921890258789,grad_norm: 0.7801904039033588, iteration: 376755
loss: 1.0016084909439087,grad_norm: 0.8996068205418761, iteration: 376756
loss: 1.0134966373443604,grad_norm: 0.8340381804153464, iteration: 376757
loss: 0.9867604970932007,grad_norm: 0.8007406878576168, iteration: 376758
loss: 0.9891766309738159,grad_norm: 0.9581546402877178, iteration: 376759
loss: 1.002753496170044,grad_norm: 0.8975564361816286, iteration: 376760
loss: 1.032422661781311,grad_norm: 0.9999989831987599, iteration: 376761
loss: 0.9502508044242859,grad_norm: 0.8301991584788394, iteration: 376762
loss: 1.0078885555267334,grad_norm: 0.9304304904600229, iteration: 376763
loss: 1.0342782735824585,grad_norm: 0.7783016158014254, iteration: 376764
loss: 1.0067510604858398,grad_norm: 0.815862998592274, iteration: 376765
loss: 1.0150068998336792,grad_norm: 0.9159831987267084, iteration: 376766
loss: 0.9892787933349609,grad_norm: 0.7487449724878023, iteration: 376767
loss: 0.9736344218254089,grad_norm: 0.8134627878584575, iteration: 376768
loss: 0.9837573766708374,grad_norm: 0.7653769632380533, iteration: 376769
loss: 0.9835887551307678,grad_norm: 0.770567161087395, iteration: 376770
loss: 0.9690342545509338,grad_norm: 0.9999992167104831, iteration: 376771
loss: 0.9951056241989136,grad_norm: 0.9027013347262698, iteration: 376772
loss: 1.010036826133728,grad_norm: 0.6768101470550787, iteration: 376773
loss: 1.032234787940979,grad_norm: 0.9999993301784297, iteration: 376774
loss: 1.0010584592819214,grad_norm: 0.7226535961669808, iteration: 376775
loss: 0.9557326436042786,grad_norm: 0.8882937787250823, iteration: 376776
loss: 0.9881899356842041,grad_norm: 0.8113357195303229, iteration: 376777
loss: 1.0051196813583374,grad_norm: 0.8861580524036442, iteration: 376778
loss: 1.0244580507278442,grad_norm: 0.8078472014488431, iteration: 376779
loss: 0.976731538772583,grad_norm: 0.7990199533291067, iteration: 376780
loss: 1.0178675651550293,grad_norm: 0.8412433527387059, iteration: 376781
loss: 1.0102620124816895,grad_norm: 0.6723483961778345, iteration: 376782
loss: 1.0286526679992676,grad_norm: 0.9100616917855631, iteration: 376783
loss: 0.9981025457382202,grad_norm: 0.8658594500299638, iteration: 376784
loss: 0.9869009256362915,grad_norm: 0.7488228091827608, iteration: 376785
loss: 1.004067063331604,grad_norm: 0.6524555530145457, iteration: 376786
loss: 0.9845449924468994,grad_norm: 0.92927541786125, iteration: 376787
loss: 0.9622655510902405,grad_norm: 0.8837323706889623, iteration: 376788
loss: 1.093044638633728,grad_norm: 0.9999991534454464, iteration: 376789
loss: 0.9710223078727722,grad_norm: 0.9999995106515345, iteration: 376790
loss: 1.0257784128189087,grad_norm: 0.7119303894483167, iteration: 376791
loss: 1.0114271640777588,grad_norm: 0.8520450965674968, iteration: 376792
loss: 1.015257477760315,grad_norm: 0.9999990741524868, iteration: 376793
loss: 0.9956973791122437,grad_norm: 0.7557618201600854, iteration: 376794
loss: 1.0171382427215576,grad_norm: 0.8347399560276695, iteration: 376795
loss: 1.000612497329712,grad_norm: 0.9999997060984142, iteration: 376796
loss: 0.9937418103218079,grad_norm: 0.7206917718392477, iteration: 376797
loss: 0.9916601777076721,grad_norm: 0.7633439379753185, iteration: 376798
loss: 1.037411093711853,grad_norm: 0.9671577628290194, iteration: 376799
loss: 0.9946008324623108,grad_norm: 0.7990256453710377, iteration: 376800
loss: 1.0070850849151611,grad_norm: 0.6391974800903808, iteration: 376801
loss: 1.0115118026733398,grad_norm: 0.757660770957497, iteration: 376802
loss: 0.9479224681854248,grad_norm: 0.854884176301183, iteration: 376803
loss: 1.0260701179504395,grad_norm: 0.9603997817902917, iteration: 376804
loss: 0.9876977205276489,grad_norm: 0.8050050465726537, iteration: 376805
loss: 1.0220048427581787,grad_norm: 0.8764440063028908, iteration: 376806
loss: 0.9834501147270203,grad_norm: 0.8923292660942437, iteration: 376807
loss: 0.9984853267669678,grad_norm: 0.7638053855964041, iteration: 376808
loss: 0.9640436172485352,grad_norm: 0.8149033263087003, iteration: 376809
loss: 0.98237144947052,grad_norm: 0.9999990132233517, iteration: 376810
loss: 0.9659553170204163,grad_norm: 0.8274998994943472, iteration: 376811
loss: 1.01674485206604,grad_norm: 0.8208036310178334, iteration: 376812
loss: 1.0356230735778809,grad_norm: 0.9254787481798362, iteration: 376813
loss: 0.9979013204574585,grad_norm: 0.702390751438907, iteration: 376814
loss: 1.0084986686706543,grad_norm: 0.7721832352627697, iteration: 376815
loss: 1.003511905670166,grad_norm: 0.8481551724486427, iteration: 376816
loss: 0.9474918842315674,grad_norm: 0.8608690112805234, iteration: 376817
loss: 1.0069414377212524,grad_norm: 0.9999990998683145, iteration: 376818
loss: 1.017143726348877,grad_norm: 0.7393703258867184, iteration: 376819
loss: 1.0248172283172607,grad_norm: 0.9437721134000926, iteration: 376820
loss: 1.0026413202285767,grad_norm: 0.9999998788871388, iteration: 376821
loss: 1.0120351314544678,grad_norm: 0.9999991087648773, iteration: 376822
loss: 1.001239538192749,grad_norm: 0.8929397207828919, iteration: 376823
loss: 1.021352767944336,grad_norm: 0.7944162177685752, iteration: 376824
loss: 0.9776785373687744,grad_norm: 0.8322786858377543, iteration: 376825
loss: 1.0242671966552734,grad_norm: 0.9999996317405299, iteration: 376826
loss: 1.0426702499389648,grad_norm: 0.8375822554075817, iteration: 376827
loss: 1.043961524963379,grad_norm: 0.9999999009666689, iteration: 376828
loss: 1.0486031770706177,grad_norm: 1.0000000121270483, iteration: 376829
loss: 0.959549069404602,grad_norm: 0.725714225561008, iteration: 376830
loss: 0.975737988948822,grad_norm: 0.8381487946174743, iteration: 376831
loss: 1.0004518032073975,grad_norm: 0.7677146600749225, iteration: 376832
loss: 1.0047687292099,grad_norm: 0.8835407619736911, iteration: 376833
loss: 1.007453203201294,grad_norm: 0.9405593884505569, iteration: 376834
loss: 0.9688597917556763,grad_norm: 0.9999996885573632, iteration: 376835
loss: 0.9789600968360901,grad_norm: 0.8504421426420296, iteration: 376836
loss: 1.0239531993865967,grad_norm: 0.7398891992694703, iteration: 376837
loss: 1.0128722190856934,grad_norm: 0.785828314035613, iteration: 376838
loss: 1.0155036449432373,grad_norm: 0.9999993780760513, iteration: 376839
loss: 1.0153766870498657,grad_norm: 0.7229983089547765, iteration: 376840
loss: 1.010043740272522,grad_norm: 0.7434477227661295, iteration: 376841
loss: 1.0329807996749878,grad_norm: 0.8089699930530249, iteration: 376842
loss: 1.0041279792785645,grad_norm: 0.9999990668531124, iteration: 376843
loss: 1.0243494510650635,grad_norm: 0.7815530779509897, iteration: 376844
loss: 1.0814305543899536,grad_norm: 0.9999992100740627, iteration: 376845
loss: 0.9932063221931458,grad_norm: 0.9875007539648204, iteration: 376846
loss: 1.0703763961791992,grad_norm: 0.9387250385949722, iteration: 376847
loss: 0.9835187196731567,grad_norm: 0.8865368170535396, iteration: 376848
loss: 0.9893022775650024,grad_norm: 0.980637659001207, iteration: 376849
loss: 1.014278769493103,grad_norm: 0.9822199987590989, iteration: 376850
loss: 0.9874146580696106,grad_norm: 0.8919687313622985, iteration: 376851
loss: 0.983710765838623,grad_norm: 0.8145296648880279, iteration: 376852
loss: 0.9707698822021484,grad_norm: 0.7663979379594577, iteration: 376853
loss: 0.9861400723457336,grad_norm: 0.9999999578076736, iteration: 376854
loss: 0.9551472067832947,grad_norm: 0.8769732740101368, iteration: 376855
loss: 1.0353662967681885,grad_norm: 0.8809436966680729, iteration: 376856
loss: 0.9819032549858093,grad_norm: 0.6873934784417545, iteration: 376857
loss: 0.9588337540626526,grad_norm: 0.8020020600797916, iteration: 376858
loss: 1.0012353658676147,grad_norm: 0.7331086067941404, iteration: 376859
loss: 0.9932506680488586,grad_norm: 0.6677970295394935, iteration: 376860
loss: 0.9804012775421143,grad_norm: 0.8446402434315663, iteration: 376861
loss: 1.0643765926361084,grad_norm: 0.999999727950178, iteration: 376862
loss: 1.0066665410995483,grad_norm: 0.8626552603910719, iteration: 376863
loss: 1.080176830291748,grad_norm: 0.9999998558190493, iteration: 376864
loss: 0.98785400390625,grad_norm: 0.691944261528267, iteration: 376865
loss: 1.0325942039489746,grad_norm: 0.7836360486024218, iteration: 376866
loss: 0.9768960475921631,grad_norm: 0.8305604061944831, iteration: 376867
loss: 1.048005223274231,grad_norm: 0.9115415772980745, iteration: 376868
loss: 0.9805415272712708,grad_norm: 0.763849736021117, iteration: 376869
loss: 1.0314079523086548,grad_norm: 0.7421888375992757, iteration: 376870
loss: 0.968630313873291,grad_norm: 0.7361434481714497, iteration: 376871
loss: 1.0081970691680908,grad_norm: 0.854790896801767, iteration: 376872
loss: 1.0337965488433838,grad_norm: 0.8329853889307308, iteration: 376873
loss: 1.0047367811203003,grad_norm: 0.773869895510941, iteration: 376874
loss: 0.9926748275756836,grad_norm: 0.9999989811539555, iteration: 376875
loss: 1.0291601419448853,grad_norm: 0.8950761156933984, iteration: 376876
loss: 1.028846025466919,grad_norm: 0.9183939098990184, iteration: 376877
loss: 1.0257477760314941,grad_norm: 0.7223650669033865, iteration: 376878
loss: 1.0155541896820068,grad_norm: 0.8585664142708777, iteration: 376879
loss: 0.9854082465171814,grad_norm: 0.9999989956767793, iteration: 376880
loss: 0.9879130721092224,grad_norm: 0.9105515994595663, iteration: 376881
loss: 1.0081419944763184,grad_norm: 0.8334598488839714, iteration: 376882
loss: 1.0226645469665527,grad_norm: 0.7917988908098902, iteration: 376883
loss: 1.0167444944381714,grad_norm: 0.9999991061185974, iteration: 376884
loss: 0.9981012344360352,grad_norm: 0.6462804336231369, iteration: 376885
loss: 1.010932207107544,grad_norm: 0.6597971957755422, iteration: 376886
loss: 1.018447995185852,grad_norm: 0.9661821106267323, iteration: 376887
loss: 0.9536818265914917,grad_norm: 0.8506277758804971, iteration: 376888
loss: 0.9998299479484558,grad_norm: 0.7391083664073337, iteration: 376889
loss: 1.0084292888641357,grad_norm: 0.6751180872164572, iteration: 376890
loss: 0.9972984790802002,grad_norm: 0.8360451962788376, iteration: 376891
loss: 1.00315260887146,grad_norm: 0.9150935955797284, iteration: 376892
loss: 1.001127004623413,grad_norm: 0.778400816420285, iteration: 376893
loss: 1.0208923816680908,grad_norm: 0.7361171028630065, iteration: 376894
loss: 1.0015805959701538,grad_norm: 0.6875470966708661, iteration: 376895
loss: 0.9702158570289612,grad_norm: 0.801039962289149, iteration: 376896
loss: 0.9520518183708191,grad_norm: 0.7819243661707189, iteration: 376897
loss: 0.9786141514778137,grad_norm: 0.7522525017174541, iteration: 376898
loss: 0.9761114120483398,grad_norm: 0.7261462686177246, iteration: 376899
loss: 0.9829733967781067,grad_norm: 0.6655713401720379, iteration: 376900
loss: 0.9923562407493591,grad_norm: 0.8282185123127762, iteration: 376901
loss: 0.9864208102226257,grad_norm: 0.8611924033077453, iteration: 376902
loss: 0.9933176040649414,grad_norm: 0.9214959122560001, iteration: 376903
loss: 1.0229908227920532,grad_norm: 0.7875978664308966, iteration: 376904
loss: 1.00904381275177,grad_norm: 0.7254841637884215, iteration: 376905
loss: 1.0019625425338745,grad_norm: 0.9254853590366611, iteration: 376906
loss: 1.0008385181427002,grad_norm: 0.622234517258888, iteration: 376907
loss: 1.0385680198669434,grad_norm: 0.6644844117239121, iteration: 376908
loss: 0.9856981635093689,grad_norm: 0.8908806527809779, iteration: 376909
loss: 0.997741162776947,grad_norm: 0.8277824470986043, iteration: 376910
loss: 1.0300627946853638,grad_norm: 0.9447710624607905, iteration: 376911
loss: 1.0054868459701538,grad_norm: 0.7423906670897112, iteration: 376912
loss: 0.9859512448310852,grad_norm: 0.7903211973651921, iteration: 376913
loss: 0.9861846566200256,grad_norm: 0.8680084792408155, iteration: 376914
loss: 0.9899229407310486,grad_norm: 0.9664205669945856, iteration: 376915
loss: 1.0249643325805664,grad_norm: 0.9373537057626729, iteration: 376916
loss: 0.99711012840271,grad_norm: 0.6929640699803604, iteration: 376917
loss: 0.9633172154426575,grad_norm: 0.7796475011898085, iteration: 376918
loss: 0.9875062704086304,grad_norm: 0.8107191103337061, iteration: 376919
loss: 0.994476854801178,grad_norm: 0.9999990993558028, iteration: 376920
loss: 1.0058302879333496,grad_norm: 0.7642179940587435, iteration: 376921
loss: 0.9795292615890503,grad_norm: 0.9513030291972537, iteration: 376922
loss: 0.9775599241256714,grad_norm: 0.9002221567797822, iteration: 376923
loss: 0.9980961680412292,grad_norm: 0.9597755297842352, iteration: 376924
loss: 0.9919761419296265,grad_norm: 0.6557240248329298, iteration: 376925
loss: 1.0090187788009644,grad_norm: 0.9144150166766848, iteration: 376926
loss: 1.0436173677444458,grad_norm: 0.8290110717161022, iteration: 376927
loss: 0.9937618970870972,grad_norm: 0.8576427292822385, iteration: 376928
loss: 1.005942463874817,grad_norm: 0.8204498988084773, iteration: 376929
loss: 1.0435125827789307,grad_norm: 0.7491871639376608, iteration: 376930
loss: 0.9838734865188599,grad_norm: 0.7424409044047698, iteration: 376931
loss: 1.0427814722061157,grad_norm: 0.9999997594317728, iteration: 376932
loss: 0.9635139107704163,grad_norm: 0.9613191331303663, iteration: 376933
loss: 1.0722720623016357,grad_norm: 0.8329890738794414, iteration: 376934
loss: 1.0132380723953247,grad_norm: 0.779240634784372, iteration: 376935
loss: 1.0010831356048584,grad_norm: 0.8235738186487757, iteration: 376936
loss: 1.0296589136123657,grad_norm: 0.7663057835388543, iteration: 376937
loss: 1.0092042684555054,grad_norm: 0.7922726635556838, iteration: 376938
loss: 0.9644216299057007,grad_norm: 0.7710235777282556, iteration: 376939
loss: 1.060584306716919,grad_norm: 0.9999991855760026, iteration: 376940
loss: 0.9683428406715393,grad_norm: 0.8243201581236655, iteration: 376941
loss: 1.0489435195922852,grad_norm: 0.823930290816972, iteration: 376942
loss: 0.9855444431304932,grad_norm: 0.7351135819412751, iteration: 376943
loss: 1.1039215326309204,grad_norm: 0.9999990252990864, iteration: 376944
loss: 0.9844115376472473,grad_norm: 0.9157028777888656, iteration: 376945
loss: 1.0092326402664185,grad_norm: 0.7066709914210713, iteration: 376946
loss: 0.9771242141723633,grad_norm: 0.7722470538839044, iteration: 376947
loss: 1.0194118022918701,grad_norm: 0.9918932468810603, iteration: 376948
loss: 0.9983383417129517,grad_norm: 0.796158480465784, iteration: 376949
loss: 1.010908842086792,grad_norm: 0.9488159115108727, iteration: 376950
loss: 0.9942000508308411,grad_norm: 0.8795457733823716, iteration: 376951
loss: 1.0112947225570679,grad_norm: 0.8107506900827367, iteration: 376952
loss: 1.0038636922836304,grad_norm: 0.860360815207031, iteration: 376953
loss: 0.9903546571731567,grad_norm: 0.7894027127096549, iteration: 376954
loss: 1.015595555305481,grad_norm: 0.8216766329452043, iteration: 376955
loss: 0.9634210467338562,grad_norm: 0.6710728141834853, iteration: 376956
loss: 1.0020215511322021,grad_norm: 0.8055909989290071, iteration: 376957
loss: 1.0130802392959595,grad_norm: 0.6781457989376586, iteration: 376958
loss: 0.9551963806152344,grad_norm: 0.6995663425772493, iteration: 376959
loss: 0.9789800047874451,grad_norm: 0.8808128223663837, iteration: 376960
loss: 1.005763292312622,grad_norm: 0.7464519829970202, iteration: 376961
loss: 0.9751823544502258,grad_norm: 0.9999991854843998, iteration: 376962
loss: 0.97607421875,grad_norm: 0.9515469007888406, iteration: 376963
loss: 0.9923407435417175,grad_norm: 0.7728821764105377, iteration: 376964
loss: 1.020261287689209,grad_norm: 0.7907783613219131, iteration: 376965
loss: 0.9945299029350281,grad_norm: 0.6887442131266006, iteration: 376966
loss: 1.0124834775924683,grad_norm: 0.6245383353577743, iteration: 376967
loss: 0.9895555973052979,grad_norm: 0.700089237113537, iteration: 376968
loss: 1.018567442893982,grad_norm: 0.7728924047337014, iteration: 376969
loss: 0.9790744781494141,grad_norm: 0.8165885953651278, iteration: 376970
loss: 1.0117567777633667,grad_norm: 0.7680372272318291, iteration: 376971
loss: 1.0127052068710327,grad_norm: 0.7402550187615584, iteration: 376972
loss: 1.0152373313903809,grad_norm: 0.8366631242415293, iteration: 376973
loss: 1.0298118591308594,grad_norm: 0.796355655780416, iteration: 376974
loss: 0.9884907603263855,grad_norm: 0.8470763876008814, iteration: 376975
loss: 1.0047444105148315,grad_norm: 0.7222729273076427, iteration: 376976
loss: 1.0000191926956177,grad_norm: 0.9361581209352026, iteration: 376977
loss: 1.0038188695907593,grad_norm: 0.8389256807653557, iteration: 376978
loss: 0.9964036345481873,grad_norm: 0.8153931090601021, iteration: 376979
loss: 0.9934538006782532,grad_norm: 0.7449081185204695, iteration: 376980
loss: 1.0181434154510498,grad_norm: 0.7540139098696801, iteration: 376981
loss: 1.0337196588516235,grad_norm: 0.9999999744163715, iteration: 376982
loss: 1.0461957454681396,grad_norm: 0.8161370167819468, iteration: 376983
loss: 1.001955270767212,grad_norm: 0.7295279485554982, iteration: 376984
loss: 1.0178357362747192,grad_norm: 0.7094737503962912, iteration: 376985
loss: 1.0050896406173706,grad_norm: 0.8827807054707377, iteration: 376986
loss: 1.0114619731903076,grad_norm: 0.9298277560832288, iteration: 376987
loss: 1.0093690156936646,grad_norm: 0.7259937753065243, iteration: 376988
loss: 1.0213202238082886,grad_norm: 0.9999999586784021, iteration: 376989
loss: 1.0256274938583374,grad_norm: 0.9029119852649069, iteration: 376990
loss: 0.9868648052215576,grad_norm: 0.9410029116392493, iteration: 376991
loss: 0.9935620427131653,grad_norm: 0.7103881301738552, iteration: 376992
loss: 0.9660946130752563,grad_norm: 0.6898369556109424, iteration: 376993
loss: 0.9562667012214661,grad_norm: 0.8776205256566597, iteration: 376994
loss: 0.9681034684181213,grad_norm: 0.8614504186044063, iteration: 376995
loss: 1.0161175727844238,grad_norm: 0.9228504648655451, iteration: 376996
loss: 1.0110546350479126,grad_norm: 0.7151340322657614, iteration: 376997
loss: 0.9790467023849487,grad_norm: 0.8875162631771626, iteration: 376998
loss: 1.0351375341415405,grad_norm: 0.8544140795935405, iteration: 376999
loss: 0.9914066195487976,grad_norm: 0.9999996106374488, iteration: 377000
loss: 0.9990755319595337,grad_norm: 0.9999998872995697, iteration: 377001
loss: 0.9729517102241516,grad_norm: 0.7978688358458895, iteration: 377002
loss: 1.035998821258545,grad_norm: 0.9999994877363585, iteration: 377003
loss: 1.0135483741760254,grad_norm: 0.7017749586019044, iteration: 377004
loss: 1.009626030921936,grad_norm: 0.7974459630966773, iteration: 377005
loss: 0.9988682270050049,grad_norm: 0.8281725135039214, iteration: 377006
loss: 0.9987577795982361,grad_norm: 0.9691266580333977, iteration: 377007
loss: 1.0096545219421387,grad_norm: 0.7788543334632133, iteration: 377008
loss: 1.0132943391799927,grad_norm: 0.7266793106674553, iteration: 377009
loss: 0.994267463684082,grad_norm: 0.6934644217839714, iteration: 377010
loss: 1.0044212341308594,grad_norm: 0.8162215018492607, iteration: 377011
loss: 1.0285766124725342,grad_norm: 0.8243568833655692, iteration: 377012
loss: 1.0840994119644165,grad_norm: 0.9999990670493389, iteration: 377013
loss: 1.0060598850250244,grad_norm: 0.8352947912499438, iteration: 377014
loss: 0.9869905114173889,grad_norm: 0.9669437437499111, iteration: 377015
loss: 1.0087815523147583,grad_norm: 0.7855867289260072, iteration: 377016
loss: 0.9422577023506165,grad_norm: 0.8828465682137056, iteration: 377017
loss: 1.0234768390655518,grad_norm: 0.8499593670803705, iteration: 377018
loss: 0.97443026304245,grad_norm: 0.838654674480051, iteration: 377019
loss: 0.9982863664627075,grad_norm: 0.9999998369687743, iteration: 377020
loss: 1.017523169517517,grad_norm: 0.6807279361699148, iteration: 377021
loss: 1.0139410495758057,grad_norm: 0.6792054240002944, iteration: 377022
loss: 0.9871191382408142,grad_norm: 0.7939558535202021, iteration: 377023
loss: 1.0224888324737549,grad_norm: 0.8318685770019189, iteration: 377024
loss: 1.144944190979004,grad_norm: 0.999999817925014, iteration: 377025
loss: 1.235916018486023,grad_norm: 0.999999944811987, iteration: 377026
loss: 0.9887285232543945,grad_norm: 0.774306651778494, iteration: 377027
loss: 0.9687528610229492,grad_norm: 0.7816299994330002, iteration: 377028
loss: 1.0186069011688232,grad_norm: 0.9388800800138803, iteration: 377029
loss: 1.0020428895950317,grad_norm: 0.6101232921864216, iteration: 377030
loss: 1.0157548189163208,grad_norm: 0.9203933540175195, iteration: 377031
loss: 0.978329598903656,grad_norm: 0.6907280514679139, iteration: 377032
loss: 1.0305951833724976,grad_norm: 0.918781597368054, iteration: 377033
loss: 1.0058356523513794,grad_norm: 0.7311709025100191, iteration: 377034
loss: 1.0534647703170776,grad_norm: 0.8122614438558305, iteration: 377035
loss: 0.9999423027038574,grad_norm: 0.9271566291328641, iteration: 377036
loss: 1.0143684148788452,grad_norm: 0.8033670756805035, iteration: 377037
loss: 0.9647265672683716,grad_norm: 0.8261770962012555, iteration: 377038
loss: 1.0155467987060547,grad_norm: 0.9999990378693812, iteration: 377039
loss: 1.0182744264602661,grad_norm: 0.7598060941985417, iteration: 377040
loss: 1.0217249393463135,grad_norm: 0.9916552459580877, iteration: 377041
loss: 0.9716498255729675,grad_norm: 0.879426581586858, iteration: 377042
loss: 1.0019242763519287,grad_norm: 0.840527936863846, iteration: 377043
loss: 0.9844605326652527,grad_norm: 0.8559388507009876, iteration: 377044
loss: 1.0014536380767822,grad_norm: 0.8259716380819739, iteration: 377045
loss: 0.986530601978302,grad_norm: 0.8194699223621802, iteration: 377046
loss: 0.9966233968734741,grad_norm: 0.7156803262739594, iteration: 377047
loss: 0.984646737575531,grad_norm: 0.7849346747973346, iteration: 377048
loss: 0.9971473813056946,grad_norm: 0.7861765551379655, iteration: 377049
loss: 1.0005459785461426,grad_norm: 0.9921533557381151, iteration: 377050
loss: 0.9882137775421143,grad_norm: 0.8472722143233734, iteration: 377051
loss: 0.9998462200164795,grad_norm: 0.948456744536896, iteration: 377052
loss: 0.9704828858375549,grad_norm: 0.8057482739917905, iteration: 377053
loss: 1.037172794342041,grad_norm: 0.8076201587140863, iteration: 377054
loss: 1.0161802768707275,grad_norm: 0.8309033343375047, iteration: 377055
loss: 0.9885691404342651,grad_norm: 0.7930537845076596, iteration: 377056
loss: 0.997017502784729,grad_norm: 0.7669978136350781, iteration: 377057
loss: 0.9669952988624573,grad_norm: 0.9999989821108295, iteration: 377058
loss: 1.0106308460235596,grad_norm: 0.8496227132802019, iteration: 377059
loss: 1.0200738906860352,grad_norm: 0.6361789599725239, iteration: 377060
loss: 1.0710102319717407,grad_norm: 0.9999995413314723, iteration: 377061
loss: 1.0069987773895264,grad_norm: 0.8725711322293457, iteration: 377062
loss: 0.9811417460441589,grad_norm: 0.7211390574457277, iteration: 377063
loss: 0.9626857042312622,grad_norm: 0.7836271855939563, iteration: 377064
loss: 0.9911909699440002,grad_norm: 0.7468738507326581, iteration: 377065
loss: 1.051117181777954,grad_norm: 0.820140973909749, iteration: 377066
loss: 1.000235676765442,grad_norm: 0.9132786055314184, iteration: 377067
loss: 1.0033115148544312,grad_norm: 0.799926170381555, iteration: 377068
loss: 1.0428330898284912,grad_norm: 0.8982808278279419, iteration: 377069
loss: 1.0390057563781738,grad_norm: 0.9349727773194371, iteration: 377070
loss: 0.9904783368110657,grad_norm: 0.8487472837945799, iteration: 377071
loss: 0.9848978519439697,grad_norm: 0.9241194791565496, iteration: 377072
loss: 0.9846481084823608,grad_norm: 0.7814821753277237, iteration: 377073
loss: 0.9859955906867981,grad_norm: 0.6493334398024654, iteration: 377074
loss: 1.0596414804458618,grad_norm: 0.9999998340172925, iteration: 377075
loss: 1.0000475645065308,grad_norm: 0.7375906850348924, iteration: 377076
loss: 0.9730319976806641,grad_norm: 0.8842272456336894, iteration: 377077
loss: 0.9747077822685242,grad_norm: 0.8758673499075312, iteration: 377078
loss: 0.9884025454521179,grad_norm: 0.7559710397693536, iteration: 377079
loss: 1.0131272077560425,grad_norm: 0.9999994151854389, iteration: 377080
loss: 1.0126804113388062,grad_norm: 0.8872514925976346, iteration: 377081
loss: 0.9718427658081055,grad_norm: 0.7359524470447218, iteration: 377082
loss: 0.9936838746070862,grad_norm: 0.8978145346359769, iteration: 377083
loss: 0.9924601912498474,grad_norm: 0.7733858209912379, iteration: 377084
loss: 1.063875675201416,grad_norm: 0.8705425790222447, iteration: 377085
loss: 0.9941774010658264,grad_norm: 0.8076820917748476, iteration: 377086
loss: 0.9242015480995178,grad_norm: 0.732026043538607, iteration: 377087
loss: 1.0132203102111816,grad_norm: 0.8111301134347696, iteration: 377088
loss: 0.9751620888710022,grad_norm: 0.8478683339317971, iteration: 377089
loss: 0.9742178916931152,grad_norm: 0.8074035198779004, iteration: 377090
loss: 1.024541974067688,grad_norm: 0.7340925864249706, iteration: 377091
loss: 0.9495570063591003,grad_norm: 0.8573911533155115, iteration: 377092
loss: 0.986373782157898,grad_norm: 0.8192558138522027, iteration: 377093
loss: 1.0249018669128418,grad_norm: 0.7062873361985114, iteration: 377094
loss: 1.0221612453460693,grad_norm: 0.8916943762218995, iteration: 377095
loss: 1.012880563735962,grad_norm: 0.7880701620128173, iteration: 377096
loss: 1.0217516422271729,grad_norm: 0.6428111374192325, iteration: 377097
loss: 1.026038646697998,grad_norm: 0.9314818324009794, iteration: 377098
loss: 1.00674307346344,grad_norm: 0.6635144075445484, iteration: 377099
loss: 1.0236634016036987,grad_norm: 0.7823226538001247, iteration: 377100
loss: 0.9724407196044922,grad_norm: 0.8035290996629214, iteration: 377101
loss: 1.0160328149795532,grad_norm: 0.7554976301663279, iteration: 377102
loss: 0.9774375557899475,grad_norm: 0.9182127538738972, iteration: 377103
loss: 0.9780391454696655,grad_norm: 0.9999989658013512, iteration: 377104
loss: 0.9828572273254395,grad_norm: 0.6562426917893844, iteration: 377105
loss: 1.0051958560943604,grad_norm: 0.8219475757352722, iteration: 377106
loss: 0.9961599111557007,grad_norm: 0.7968693684292069, iteration: 377107
loss: 1.0136526823043823,grad_norm: 0.7484817581470067, iteration: 377108
loss: 1.0440328121185303,grad_norm: 0.8282121235274328, iteration: 377109
loss: 0.9818684458732605,grad_norm: 0.8075256506855312, iteration: 377110
loss: 1.0437675714492798,grad_norm: 0.9999998497318835, iteration: 377111
loss: 0.9974335432052612,grad_norm: 0.7973685263313991, iteration: 377112
loss: 1.0317981243133545,grad_norm: 0.7701544676596209, iteration: 377113
loss: 1.0171935558319092,grad_norm: 0.7149071030512314, iteration: 377114
loss: 0.9938592314720154,grad_norm: 0.6155512558287274, iteration: 377115
loss: 0.9746685028076172,grad_norm: 0.8022084411648461, iteration: 377116
loss: 1.0104851722717285,grad_norm: 0.8958794233181336, iteration: 377117
loss: 0.9993252754211426,grad_norm: 0.7326149593363515, iteration: 377118
loss: 0.9944620728492737,grad_norm: 0.7165447871753993, iteration: 377119
loss: 0.9986972808837891,grad_norm: 0.7621472428484516, iteration: 377120
loss: 0.9797389507293701,grad_norm: 0.7993452433447383, iteration: 377121
loss: 1.0026875734329224,grad_norm: 0.8419742931962697, iteration: 377122
loss: 1.0295521020889282,grad_norm: 0.8753166210506896, iteration: 377123
loss: 0.9956042170524597,grad_norm: 0.7194616638368385, iteration: 377124
loss: 0.983040988445282,grad_norm: 0.7791561645175109, iteration: 377125
loss: 1.0089765787124634,grad_norm: 0.7517845952786935, iteration: 377126
loss: 1.0077168941497803,grad_norm: 0.7839990500315925, iteration: 377127
loss: 0.9939199090003967,grad_norm: 0.8331925573712021, iteration: 377128
loss: 0.9867472648620605,grad_norm: 0.7107689495103279, iteration: 377129
loss: 0.9507972002029419,grad_norm: 0.843514001556824, iteration: 377130
loss: 0.9945604205131531,grad_norm: 0.6706168586880439, iteration: 377131
loss: 1.0198980569839478,grad_norm: 0.9999990532967028, iteration: 377132
loss: 1.0061354637145996,grad_norm: 0.7156502072628407, iteration: 377133
loss: 1.001611590385437,grad_norm: 0.7304396510827783, iteration: 377134
loss: 0.9807082414627075,grad_norm: 0.9551794131535433, iteration: 377135
loss: 0.9852719902992249,grad_norm: 0.9103566241637033, iteration: 377136
loss: 0.999548614025116,grad_norm: 0.7682240771958426, iteration: 377137
loss: 0.9819387793540955,grad_norm: 0.802587502946287, iteration: 377138
loss: 1.0004892349243164,grad_norm: 0.8293373109853599, iteration: 377139
loss: 0.9803901314735413,grad_norm: 0.6894776051991929, iteration: 377140
loss: 1.0044163465499878,grad_norm: 0.8279525323395951, iteration: 377141
loss: 1.0385276079177856,grad_norm: 0.999999124664272, iteration: 377142
loss: 1.011483073234558,grad_norm: 0.7082855343784077, iteration: 377143
loss: 0.9863819479942322,grad_norm: 0.7158493969078644, iteration: 377144
loss: 1.0068483352661133,grad_norm: 0.7963737648633153, iteration: 377145
loss: 0.9888859987258911,grad_norm: 0.8139034884455276, iteration: 377146
loss: 1.0035713911056519,grad_norm: 0.7987350526324478, iteration: 377147
loss: 0.991971492767334,grad_norm: 0.8251120856624784, iteration: 377148
loss: 1.0073446035385132,grad_norm: 0.8430993645211406, iteration: 377149
loss: 0.9686598181724548,grad_norm: 0.7854314433956258, iteration: 377150
loss: 1.0513103008270264,grad_norm: 0.7731635697632643, iteration: 377151
loss: 1.0468649864196777,grad_norm: 0.7996365090482317, iteration: 377152
loss: 1.0086232423782349,grad_norm: 0.7744773696698867, iteration: 377153
loss: 0.9853352904319763,grad_norm: 0.8229793680652935, iteration: 377154
loss: 1.0091979503631592,grad_norm: 0.9999991692222593, iteration: 377155
loss: 1.0611709356307983,grad_norm: 0.9999993254640157, iteration: 377156
loss: 1.0262036323547363,grad_norm: 0.803131341210329, iteration: 377157
loss: 1.0077706575393677,grad_norm: 0.6824694670113345, iteration: 377158
loss: 1.01627779006958,grad_norm: 0.6846230499063162, iteration: 377159
loss: 1.0453964471817017,grad_norm: 0.9999991534784932, iteration: 377160
loss: 0.984226644039154,grad_norm: 0.9999991849593654, iteration: 377161
loss: 0.9971964955329895,grad_norm: 0.8096841371374659, iteration: 377162
loss: 0.9848905801773071,grad_norm: 0.7142666565226166, iteration: 377163
loss: 1.031435251235962,grad_norm: 0.8064749114907558, iteration: 377164
loss: 1.0157105922698975,grad_norm: 0.8730987208744769, iteration: 377165
loss: 1.004986047744751,grad_norm: 0.6678438384913252, iteration: 377166
loss: 0.9867709279060364,grad_norm: 0.6853867535729278, iteration: 377167
loss: 1.0072896480560303,grad_norm: 0.9139864708418549, iteration: 377168
loss: 0.9988604784011841,grad_norm: 0.8446956020385966, iteration: 377169
loss: 1.0018270015716553,grad_norm: 0.7396294841805771, iteration: 377170
loss: 0.9924407005310059,grad_norm: 0.8708997947880065, iteration: 377171
loss: 0.9931228756904602,grad_norm: 0.7906077460113781, iteration: 377172
loss: 0.9701797366142273,grad_norm: 0.7307731697378013, iteration: 377173
loss: 0.9949145317077637,grad_norm: 0.8453263928748774, iteration: 377174
loss: 0.9989034533500671,grad_norm: 0.9999990186370484, iteration: 377175
loss: 0.9950954914093018,grad_norm: 0.6857645587031324, iteration: 377176
loss: 1.0249234437942505,grad_norm: 0.9999994077206772, iteration: 377177
loss: 1.0400495529174805,grad_norm: 0.65649905368444, iteration: 377178
loss: 1.0343562364578247,grad_norm: 0.6863764935835817, iteration: 377179
loss: 0.988496720790863,grad_norm: 0.9671275492203217, iteration: 377180
loss: 1.0215727090835571,grad_norm: 0.7394836960182097, iteration: 377181
loss: 0.995485782623291,grad_norm: 0.9644123330170714, iteration: 377182
loss: 1.0080859661102295,grad_norm: 0.8151938412098854, iteration: 377183
loss: 1.0685553550720215,grad_norm: 0.9999998292243673, iteration: 377184
loss: 1.0382740497589111,grad_norm: 0.9999993135143178, iteration: 377185
loss: 0.9582806825637817,grad_norm: 0.7933286553012013, iteration: 377186
loss: 0.9942498803138733,grad_norm: 0.8033514870152141, iteration: 377187
loss: 1.0180537700653076,grad_norm: 0.9771809095832489, iteration: 377188
loss: 1.0038758516311646,grad_norm: 0.8273835845194442, iteration: 377189
loss: 0.9698394536972046,grad_norm: 0.7807496518393974, iteration: 377190
loss: 1.0497596263885498,grad_norm: 0.9999990987087486, iteration: 377191
loss: 0.9949783682823181,grad_norm: 0.9999991578504688, iteration: 377192
loss: 0.9820576310157776,grad_norm: 0.7701213706942764, iteration: 377193
loss: 1.0230172872543335,grad_norm: 0.8814405074649411, iteration: 377194
loss: 0.9818613529205322,grad_norm: 0.8552950513138934, iteration: 377195
loss: 0.9705392718315125,grad_norm: 0.9201201550214557, iteration: 377196
loss: 1.025325059890747,grad_norm: 0.9101530178850816, iteration: 377197
loss: 0.9837995171546936,grad_norm: 0.6667045085214663, iteration: 377198
loss: 0.9685441851615906,grad_norm: 0.8756823859116707, iteration: 377199
loss: 0.9722725749015808,grad_norm: 0.7888897908533082, iteration: 377200
loss: 0.9833929538726807,grad_norm: 0.7567377243191529, iteration: 377201
loss: 1.0832048654556274,grad_norm: 0.9999991490054028, iteration: 377202
loss: 0.9850024580955505,grad_norm: 0.800641847273365, iteration: 377203
loss: 0.973339855670929,grad_norm: 0.752860169206419, iteration: 377204
loss: 1.0132243633270264,grad_norm: 0.9753910860833221, iteration: 377205
loss: 1.0017606019973755,grad_norm: 0.7631654530940255, iteration: 377206
loss: 0.9805301427841187,grad_norm: 0.7718184635537999, iteration: 377207
loss: 0.9837795495986938,grad_norm: 0.7880179617607745, iteration: 377208
loss: 0.9875816702842712,grad_norm: 0.999999314308162, iteration: 377209
loss: 0.997696578502655,grad_norm: 0.8043179001897387, iteration: 377210
loss: 1.0295177698135376,grad_norm: 0.9877449487208921, iteration: 377211
loss: 0.9785789251327515,grad_norm: 0.7584309248525586, iteration: 377212
loss: 1.0480456352233887,grad_norm: 0.7463366682919995, iteration: 377213
loss: 1.035778284072876,grad_norm: 0.8575263223553183, iteration: 377214
loss: 0.9975578188896179,grad_norm: 0.7352362221655262, iteration: 377215
loss: 1.0331287384033203,grad_norm: 0.8702892698877627, iteration: 377216
loss: 1.0143625736236572,grad_norm: 0.8544869161000367, iteration: 377217
loss: 0.9923702478408813,grad_norm: 0.7173679578199254, iteration: 377218
loss: 1.0110058784484863,grad_norm: 0.7091126942112701, iteration: 377219
loss: 0.9822642207145691,grad_norm: 0.8924784021753639, iteration: 377220
loss: 0.9863433837890625,grad_norm: 0.7874750931207041, iteration: 377221
loss: 1.0380603075027466,grad_norm: 0.8006158123576486, iteration: 377222
loss: 0.9976430535316467,grad_norm: 0.7059424055280304, iteration: 377223
loss: 1.0344719886779785,grad_norm: 0.7994865460410222, iteration: 377224
loss: 0.998661458492279,grad_norm: 0.9358327029137136, iteration: 377225
loss: 0.9694758653640747,grad_norm: 0.7019816817970917, iteration: 377226
loss: 0.9934975504875183,grad_norm: 0.8640675007362827, iteration: 377227
loss: 0.9690953493118286,grad_norm: 0.6819262247781915, iteration: 377228
loss: 0.9839322566986084,grad_norm: 0.6377739711781127, iteration: 377229
loss: 1.0150938034057617,grad_norm: 0.7613692052424569, iteration: 377230
loss: 0.9881850481033325,grad_norm: 0.7675964931257493, iteration: 377231
loss: 1.0341874361038208,grad_norm: 0.7850628079545998, iteration: 377232
loss: 0.9898932576179504,grad_norm: 0.8243057496684982, iteration: 377233
loss: 1.0412687063217163,grad_norm: 0.9999990272285638, iteration: 377234
loss: 1.0095304250717163,grad_norm: 0.9822502773443055, iteration: 377235
loss: 1.0568654537200928,grad_norm: 0.7447085028771554, iteration: 377236
loss: 1.0574827194213867,grad_norm: 0.8204265502499513, iteration: 377237
loss: 0.9749601483345032,grad_norm: 0.860964319385708, iteration: 377238
loss: 1.0220023393630981,grad_norm: 0.8175047674673056, iteration: 377239
loss: 0.9950459599494934,grad_norm: 0.7045560176084285, iteration: 377240
loss: 1.0042139291763306,grad_norm: 0.7660561905085849, iteration: 377241
loss: 1.0029411315917969,grad_norm: 0.9099332258875283, iteration: 377242
loss: 1.0220071077346802,grad_norm: 0.70217463277677, iteration: 377243
loss: 1.0426526069641113,grad_norm: 0.7759135775629465, iteration: 377244
loss: 0.9902119636535645,grad_norm: 0.9767706747859118, iteration: 377245
loss: 0.9913825988769531,grad_norm: 0.8012689818236474, iteration: 377246
loss: 0.9809929132461548,grad_norm: 0.6981150467233828, iteration: 377247
loss: 0.9902795553207397,grad_norm: 0.7912317647134468, iteration: 377248
loss: 0.9795784950256348,grad_norm: 0.8116816210146458, iteration: 377249
loss: 1.0173429250717163,grad_norm: 0.8643658113367391, iteration: 377250
loss: 0.9710423350334167,grad_norm: 0.637866492081868, iteration: 377251
loss: 0.9926055669784546,grad_norm: 0.7965206626624182, iteration: 377252
loss: 1.0054391622543335,grad_norm: 0.8593524342315632, iteration: 377253
loss: 1.0196354389190674,grad_norm: 0.7372230440561011, iteration: 377254
loss: 0.9712286591529846,grad_norm: 0.6451226981168073, iteration: 377255
loss: 0.9665514230728149,grad_norm: 0.8528135524032844, iteration: 377256
loss: 1.0206153392791748,grad_norm: 0.6718089592310441, iteration: 377257
loss: 1.0346457958221436,grad_norm: 0.8880613460994227, iteration: 377258
loss: 0.9778118133544922,grad_norm: 0.8017992195770721, iteration: 377259
loss: 0.9906894564628601,grad_norm: 0.8638758866933268, iteration: 377260
loss: 0.9956547617912292,grad_norm: 0.987247427406184, iteration: 377261
loss: 0.9865949749946594,grad_norm: 0.7700303090473107, iteration: 377262
loss: 0.9876221418380737,grad_norm: 0.9999992842496181, iteration: 377263
loss: 1.010542869567871,grad_norm: 0.7903307097731292, iteration: 377264
loss: 0.9711305499076843,grad_norm: 0.7267643038717375, iteration: 377265
loss: 1.0194973945617676,grad_norm: 0.783019124775236, iteration: 377266
loss: 0.9552339911460876,grad_norm: 0.9999991091599822, iteration: 377267
loss: 0.9819597005844116,grad_norm: 0.7377421267585662, iteration: 377268
loss: 0.9921637773513794,grad_norm: 0.8458754915977127, iteration: 377269
loss: 1.0144882202148438,grad_norm: 0.8579497133562615, iteration: 377270
loss: 0.9929962754249573,grad_norm: 0.7664342390063783, iteration: 377271
loss: 0.9956828951835632,grad_norm: 0.7116731168223734, iteration: 377272
loss: 1.0130419731140137,grad_norm: 0.8245984062135163, iteration: 377273
loss: 1.027622103691101,grad_norm: 0.881785532753192, iteration: 377274
loss: 1.0029984712600708,grad_norm: 0.7766182196379658, iteration: 377275
loss: 1.0073784589767456,grad_norm: 0.8412494131373919, iteration: 377276
loss: 1.0110139846801758,grad_norm: 0.7796824659612355, iteration: 377277
loss: 1.0348310470581055,grad_norm: 0.9999994866404592, iteration: 377278
loss: 0.9858892560005188,grad_norm: 0.8440987061319951, iteration: 377279
loss: 1.0109549760818481,grad_norm: 0.7592384283799091, iteration: 377280
loss: 1.0505210161209106,grad_norm: 0.9999997119130698, iteration: 377281
loss: 1.0135056972503662,grad_norm: 0.6872769031039709, iteration: 377282
loss: 1.0425138473510742,grad_norm: 0.9999991577875937, iteration: 377283
loss: 1.0088027715682983,grad_norm: 0.781684279905243, iteration: 377284
loss: 0.9885930418968201,grad_norm: 0.6219444754207202, iteration: 377285
loss: 0.9738302826881409,grad_norm: 0.7551400633086676, iteration: 377286
loss: 0.9917730689048767,grad_norm: 0.7546053305455382, iteration: 377287
loss: 1.0091137886047363,grad_norm: 0.7620583625911559, iteration: 377288
loss: 1.004732608795166,grad_norm: 0.9297518842103637, iteration: 377289
loss: 0.9878710508346558,grad_norm: 0.7154683035359704, iteration: 377290
loss: 1.0010335445404053,grad_norm: 0.7966052586716152, iteration: 377291
loss: 1.0187499523162842,grad_norm: 0.7654960345250476, iteration: 377292
loss: 0.9973096251487732,grad_norm: 0.8312339571232912, iteration: 377293
loss: 0.9782477617263794,grad_norm: 0.8076783395762803, iteration: 377294
loss: 0.9740690588951111,grad_norm: 0.868322177800638, iteration: 377295
loss: 0.9805916547775269,grad_norm: 0.791318425086527, iteration: 377296
loss: 1.086303949356079,grad_norm: 0.9426235661899625, iteration: 377297
loss: 0.988798201084137,grad_norm: 0.7721341058987857, iteration: 377298
loss: 1.00332772731781,grad_norm: 0.6963930129247528, iteration: 377299
loss: 1.0052602291107178,grad_norm: 0.8400755813681965, iteration: 377300
loss: 1.0650655031204224,grad_norm: 0.8109314258239746, iteration: 377301
loss: 0.9968602657318115,grad_norm: 0.7205714337763174, iteration: 377302
loss: 0.9755560755729675,grad_norm: 0.83772439713494, iteration: 377303
loss: 0.9994378685951233,grad_norm: 0.974699772132943, iteration: 377304
loss: 1.0285989046096802,grad_norm: 0.835158994763507, iteration: 377305
loss: 1.0923140048980713,grad_norm: 0.9999991213567927, iteration: 377306
loss: 0.9521022439002991,grad_norm: 0.7670098866520032, iteration: 377307
loss: 0.9873082637786865,grad_norm: 0.8341285258506094, iteration: 377308
loss: 1.0164979696273804,grad_norm: 0.9842919780315718, iteration: 377309
loss: 1.0336729288101196,grad_norm: 0.8036072874107588, iteration: 377310
loss: 1.0046188831329346,grad_norm: 0.9326472009217827, iteration: 377311
loss: 0.9591503739356995,grad_norm: 0.9751107663510532, iteration: 377312
loss: 1.019703984260559,grad_norm: 0.8250042969433492, iteration: 377313
loss: 0.992688775062561,grad_norm: 0.8436541010507363, iteration: 377314
loss: 1.0134717226028442,grad_norm: 0.80837135143994, iteration: 377315
loss: 1.0705904960632324,grad_norm: 0.8996823711464911, iteration: 377316
loss: 0.9686025381088257,grad_norm: 0.7222082652368071, iteration: 377317
loss: 1.0114980936050415,grad_norm: 0.7203570155565536, iteration: 377318
loss: 0.9784061312675476,grad_norm: 0.8506226979986153, iteration: 377319
loss: 0.9958381652832031,grad_norm: 0.7002347764909203, iteration: 377320
loss: 1.0376414060592651,grad_norm: 0.9999996680651927, iteration: 377321
loss: 1.0215421915054321,grad_norm: 0.7347395906792807, iteration: 377322
loss: 1.003940224647522,grad_norm: 0.8053611309647899, iteration: 377323
loss: 0.977068305015564,grad_norm: 0.8403008425148669, iteration: 377324
loss: 0.9774294495582581,grad_norm: 0.7329340828040298, iteration: 377325
loss: 1.0003749132156372,grad_norm: 0.7575817822312607, iteration: 377326
loss: 0.9632085561752319,grad_norm: 0.7568360046855797, iteration: 377327
loss: 0.959135115146637,grad_norm: 0.9999992154081457, iteration: 377328
loss: 1.020369291305542,grad_norm: 0.7353043187377228, iteration: 377329
loss: 1.0297619104385376,grad_norm: 0.9999990585892449, iteration: 377330
loss: 1.0092799663543701,grad_norm: 0.7485785134057653, iteration: 377331
loss: 0.9907100796699524,grad_norm: 0.8141774715302573, iteration: 377332
loss: 1.0019478797912598,grad_norm: 0.7378759375584638, iteration: 377333
loss: 0.9846727848052979,grad_norm: 0.7263359615048721, iteration: 377334
loss: 0.983265221118927,grad_norm: 0.9127639041458183, iteration: 377335
loss: 1.072797179222107,grad_norm: 0.9627898093755737, iteration: 377336
loss: 1.0711474418640137,grad_norm: 0.9999993604884444, iteration: 377337
loss: 1.0192201137542725,grad_norm: 0.6900832417930838, iteration: 377338
loss: 1.0630804300308228,grad_norm: 0.9954973560197801, iteration: 377339
loss: 1.0172944068908691,grad_norm: 0.7629587371941651, iteration: 377340
loss: 1.0655018091201782,grad_norm: 0.7923036379786703, iteration: 377341
loss: 1.0074931383132935,grad_norm: 0.7476501654625368, iteration: 377342
loss: 1.0097850561141968,grad_norm: 0.738587692081602, iteration: 377343
loss: 1.0358195304870605,grad_norm: 0.7982576858961041, iteration: 377344
loss: 1.0364397764205933,grad_norm: 0.9999998450532441, iteration: 377345
loss: 0.9920955300331116,grad_norm: 0.7574770203360054, iteration: 377346
loss: 1.0705429315567017,grad_norm: 0.9886531022023657, iteration: 377347
loss: 0.9701693654060364,grad_norm: 0.9559716305222787, iteration: 377348
loss: 1.0282045602798462,grad_norm: 0.6456712028986376, iteration: 377349
loss: 1.007204294204712,grad_norm: 0.6323612714514221, iteration: 377350
loss: 1.0034475326538086,grad_norm: 0.719909011433453, iteration: 377351
loss: 1.000911831855774,grad_norm: 0.8420218125773666, iteration: 377352
loss: 0.971519410610199,grad_norm: 0.8772430864105097, iteration: 377353
loss: 1.022215723991394,grad_norm: 0.7981555841053058, iteration: 377354
loss: 1.0042600631713867,grad_norm: 0.8108316379964327, iteration: 377355
loss: 1.0170445442199707,grad_norm: 0.7309717922821162, iteration: 377356
loss: 0.9739636778831482,grad_norm: 0.8082761509245923, iteration: 377357
loss: 0.9444955587387085,grad_norm: 0.6882514866758104, iteration: 377358
loss: 1.0464715957641602,grad_norm: 0.8361233833080414, iteration: 377359
loss: 0.9922661781311035,grad_norm: 0.854575607685124, iteration: 377360
loss: 1.0080125331878662,grad_norm: 0.804101961407651, iteration: 377361
loss: 0.9984606504440308,grad_norm: 0.6396642415264264, iteration: 377362
loss: 1.0113937854766846,grad_norm: 0.7290101553067674, iteration: 377363
loss: 0.9952359199523926,grad_norm: 0.7846966961061118, iteration: 377364
loss: 0.9982689619064331,grad_norm: 0.7656640659676007, iteration: 377365
loss: 1.0471763610839844,grad_norm: 0.8482242470262326, iteration: 377366
loss: 0.9930462837219238,grad_norm: 0.7387936125252446, iteration: 377367
loss: 0.9875361323356628,grad_norm: 0.8940539613033941, iteration: 377368
loss: 1.0033249855041504,grad_norm: 0.6795336676534344, iteration: 377369
loss: 1.0592635869979858,grad_norm: 0.9999998786698086, iteration: 377370
loss: 1.0048524141311646,grad_norm: 0.9999991490250513, iteration: 377371
loss: 0.9649086594581604,grad_norm: 0.714820050308365, iteration: 377372
loss: 0.9873456358909607,grad_norm: 0.679430969811342, iteration: 377373
loss: 0.9784615635871887,grad_norm: 0.9999989966415863, iteration: 377374
loss: 1.0208796262741089,grad_norm: 0.9999998925078704, iteration: 377375
loss: 0.9664214253425598,grad_norm: 0.7369578362856832, iteration: 377376
loss: 1.000586748123169,grad_norm: 0.9053409887251826, iteration: 377377
loss: 0.9658690094947815,grad_norm: 0.766561975049177, iteration: 377378
loss: 0.9895918965339661,grad_norm: 0.7842917068937649, iteration: 377379
loss: 0.9823930263519287,grad_norm: 0.8542623383646949, iteration: 377380
loss: 0.9993413686752319,grad_norm: 0.7813417094435307, iteration: 377381
loss: 1.0076581239700317,grad_norm: 0.7749522106981129, iteration: 377382
loss: 0.9508483409881592,grad_norm: 0.7359833984192243, iteration: 377383
loss: 0.970702588558197,grad_norm: 0.999999003813752, iteration: 377384
loss: 1.0015320777893066,grad_norm: 0.7383732789098993, iteration: 377385
loss: 1.006751537322998,grad_norm: 0.766456796489591, iteration: 377386
loss: 1.011415958404541,grad_norm: 0.999999086448511, iteration: 377387
loss: 1.0218536853790283,grad_norm: 0.8389340464930685, iteration: 377388
loss: 0.9942750334739685,grad_norm: 0.658312830636154, iteration: 377389
loss: 1.0280832052230835,grad_norm: 0.747647547027366, iteration: 377390
loss: 1.0234726667404175,grad_norm: 0.8223290697487189, iteration: 377391
loss: 0.9946965575218201,grad_norm: 0.8502335370936431, iteration: 377392
loss: 0.9886569976806641,grad_norm: 0.7624311967676235, iteration: 377393
loss: 1.0273221731185913,grad_norm: 0.7724253610582096, iteration: 377394
loss: 1.0060311555862427,grad_norm: 0.9999989764636206, iteration: 377395
loss: 0.9765422344207764,grad_norm: 0.8265580882359486, iteration: 377396
loss: 1.0327143669128418,grad_norm: 0.9999992933824492, iteration: 377397
loss: 0.9974589347839355,grad_norm: 0.7813109378053876, iteration: 377398
loss: 1.008298397064209,grad_norm: 0.7131565808816354, iteration: 377399
loss: 1.016993761062622,grad_norm: 0.9999997633393836, iteration: 377400
loss: 1.006131649017334,grad_norm: 0.9999991090440736, iteration: 377401
loss: 0.9958176612854004,grad_norm: 0.9213822587656391, iteration: 377402
loss: 0.9767446517944336,grad_norm: 0.9585485627198251, iteration: 377403
loss: 0.971004843711853,grad_norm: 0.733655371313964, iteration: 377404
loss: 0.9810014367103577,grad_norm: 0.9138938177910092, iteration: 377405
loss: 0.9799497723579407,grad_norm: 0.9999992310827732, iteration: 377406
loss: 1.0353909730911255,grad_norm: 0.9999994180288558, iteration: 377407
loss: 1.0061651468276978,grad_norm: 0.7343932689619289, iteration: 377408
loss: 0.9621387124061584,grad_norm: 0.8113153554740367, iteration: 377409
loss: 0.9842329025268555,grad_norm: 0.8621749155051511, iteration: 377410
loss: 0.970276415348053,grad_norm: 0.7141577862146076, iteration: 377411
loss: 1.00892174243927,grad_norm: 0.8641007916439882, iteration: 377412
loss: 0.9755398035049438,grad_norm: 0.8133897971873576, iteration: 377413
loss: 0.982638418674469,grad_norm: 0.7203513209292289, iteration: 377414
loss: 0.9690098762512207,grad_norm: 0.8149711352890048, iteration: 377415
loss: 1.0024213790893555,grad_norm: 0.9369081501446773, iteration: 377416
loss: 1.0380524396896362,grad_norm: 0.8811161322495284, iteration: 377417
loss: 1.012315273284912,grad_norm: 0.8337804630751153, iteration: 377418
loss: 0.9961518049240112,grad_norm: 0.6333160533664101, iteration: 377419
loss: 0.9902662634849548,grad_norm: 0.7154531815914089, iteration: 377420
loss: 0.9999032616615295,grad_norm: 0.8442757556639777, iteration: 377421
loss: 0.9778476357460022,grad_norm: 0.7544242556181735, iteration: 377422
loss: 0.9900299310684204,grad_norm: 0.8337835067794601, iteration: 377423
loss: 0.9832510352134705,grad_norm: 0.7203481724362731, iteration: 377424
loss: 0.9886702299118042,grad_norm: 0.8470937876047657, iteration: 377425
loss: 0.9802387356758118,grad_norm: 0.7414508554450259, iteration: 377426
loss: 1.0002639293670654,grad_norm: 0.8054183848102714, iteration: 377427
loss: 1.0622187852859497,grad_norm: 0.9999993230933043, iteration: 377428
loss: 1.0086719989776611,grad_norm: 0.6927455561822972, iteration: 377429
loss: 1.0012588500976562,grad_norm: 0.9113021042196168, iteration: 377430
loss: 0.9831718802452087,grad_norm: 0.9999992338150553, iteration: 377431
loss: 0.9816603064537048,grad_norm: 0.7755550133658906, iteration: 377432
loss: 0.9962174892425537,grad_norm: 0.8053479656332142, iteration: 377433
loss: 1.075976014137268,grad_norm: 0.8576461862839971, iteration: 377434
loss: 1.0036662817001343,grad_norm: 0.9798508945022089, iteration: 377435
loss: 1.018721580505371,grad_norm: 0.8053512417382921, iteration: 377436
loss: 0.9790471792221069,grad_norm: 0.8249287874547104, iteration: 377437
loss: 0.9885039925575256,grad_norm: 0.8249417730703903, iteration: 377438
loss: 0.981674313545227,grad_norm: 0.9450672377852217, iteration: 377439
loss: 1.0012505054473877,grad_norm: 0.7546423952999548, iteration: 377440
loss: 0.9666677117347717,grad_norm: 0.9999995626546481, iteration: 377441
loss: 1.0561676025390625,grad_norm: 0.8120608908818048, iteration: 377442
loss: 1.0025993585586548,grad_norm: 0.7068777989562111, iteration: 377443
loss: 0.9946873188018799,grad_norm: 0.9999992686787817, iteration: 377444
loss: 0.9868423938751221,grad_norm: 0.6929672024396581, iteration: 377445
loss: 1.0041043758392334,grad_norm: 0.8259047345552345, iteration: 377446
loss: 0.9776817560195923,grad_norm: 0.8247657709262084, iteration: 377447
loss: 1.0005478858947754,grad_norm: 0.7421769233647206, iteration: 377448
loss: 1.0292558670043945,grad_norm: 0.8222145331122637, iteration: 377449
loss: 1.0321214199066162,grad_norm: 0.8204174112061967, iteration: 377450
loss: 1.0025335550308228,grad_norm: 0.8954953859183419, iteration: 377451
loss: 0.9631974697113037,grad_norm: 0.8825962919541328, iteration: 377452
loss: 0.9656222462654114,grad_norm: 0.7058079494102825, iteration: 377453
loss: 0.962542712688446,grad_norm: 0.7380292496361683, iteration: 377454
loss: 0.985539972782135,grad_norm: 0.6751736326909799, iteration: 377455
loss: 1.0681886672973633,grad_norm: 0.8129418066798506, iteration: 377456
loss: 0.974621593952179,grad_norm: 0.6798944775621169, iteration: 377457
loss: 0.996957004070282,grad_norm: 0.8753387193142175, iteration: 377458
loss: 1.010550856590271,grad_norm: 0.7458873434389032, iteration: 377459
loss: 1.0119895935058594,grad_norm: 0.8101967842802887, iteration: 377460
loss: 0.9676997065544128,grad_norm: 0.8557749678676352, iteration: 377461
loss: 0.994360625743866,grad_norm: 0.7520334410822719, iteration: 377462
loss: 0.9939648509025574,grad_norm: 0.8555221826355822, iteration: 377463
loss: 1.0489420890808105,grad_norm: 0.8400195504732467, iteration: 377464
loss: 1.0017468929290771,grad_norm: 0.8354806031358473, iteration: 377465
loss: 1.0224597454071045,grad_norm: 0.7073159741892058, iteration: 377466
loss: 1.0139211416244507,grad_norm: 0.792842727543193, iteration: 377467
loss: 0.9800207614898682,grad_norm: 0.8257765367728584, iteration: 377468
loss: 0.995326042175293,grad_norm: 0.7456688789820309, iteration: 377469
loss: 1.009478211402893,grad_norm: 0.9743153970035785, iteration: 377470
loss: 0.980536699295044,grad_norm: 0.9190847266178029, iteration: 377471
loss: 0.9890443682670593,grad_norm: 0.6823729097008081, iteration: 377472
loss: 0.9912167191505432,grad_norm: 0.9035094631207296, iteration: 377473
loss: 1.0096616744995117,grad_norm: 0.8927134698588218, iteration: 377474
loss: 1.0326770544052124,grad_norm: 0.8016839086473972, iteration: 377475
loss: 1.0120528936386108,grad_norm: 0.9999996540313864, iteration: 377476
loss: 0.9851506948471069,grad_norm: 0.8380387342789066, iteration: 377477
loss: 0.9860425591468811,grad_norm: 0.6983269390894155, iteration: 377478
loss: 0.9803324341773987,grad_norm: 0.8008219275331092, iteration: 377479
loss: 0.984078586101532,grad_norm: 0.8097095873822311, iteration: 377480
loss: 1.0133976936340332,grad_norm: 0.878601992646324, iteration: 377481
loss: 1.0126264095306396,grad_norm: 0.8627022866524, iteration: 377482
loss: 0.9930506348609924,grad_norm: 0.9999994929799856, iteration: 377483
loss: 0.9978404641151428,grad_norm: 0.7713916273858842, iteration: 377484
loss: 1.0149097442626953,grad_norm: 0.7496239880738472, iteration: 377485
loss: 1.0224703550338745,grad_norm: 0.7641746425056788, iteration: 377486
loss: 1.0013960599899292,grad_norm: 0.6449218925631621, iteration: 377487
loss: 0.9708771109580994,grad_norm: 0.7567503440845715, iteration: 377488
loss: 1.0163253545761108,grad_norm: 0.7449078613027137, iteration: 377489
loss: 1.047562837600708,grad_norm: 0.9999998012959807, iteration: 377490
loss: 1.0100038051605225,grad_norm: 0.835998706032349, iteration: 377491
loss: 1.0217584371566772,grad_norm: 0.9001329222924244, iteration: 377492
loss: 1.0008875131607056,grad_norm: 0.7631782943549807, iteration: 377493
loss: 1.002633810043335,grad_norm: 0.8875041518107656, iteration: 377494
loss: 1.0969098806381226,grad_norm: 0.9999992943204686, iteration: 377495
loss: 0.9924840927124023,grad_norm: 0.7707042986137594, iteration: 377496
loss: 0.9943752288818359,grad_norm: 0.8413357635289138, iteration: 377497
loss: 1.0401350259780884,grad_norm: 0.9711677175418841, iteration: 377498
loss: 0.9978171586990356,grad_norm: 0.780842496113594, iteration: 377499
loss: 1.019338607788086,grad_norm: 0.8392382636761836, iteration: 377500
loss: 0.963286817073822,grad_norm: 0.9959152284641238, iteration: 377501
loss: 1.0027281045913696,grad_norm: 0.8621526937602219, iteration: 377502
loss: 1.0005559921264648,grad_norm: 0.7822455171110091, iteration: 377503
loss: 0.9571518898010254,grad_norm: 0.8638950511084375, iteration: 377504
loss: 1.008269190788269,grad_norm: 0.9010692046907737, iteration: 377505
loss: 1.013763189315796,grad_norm: 0.9999990783231416, iteration: 377506
loss: 1.0625985860824585,grad_norm: 0.8531507711216556, iteration: 377507
loss: 0.9957969188690186,grad_norm: 0.7574847082176823, iteration: 377508
loss: 1.0028867721557617,grad_norm: 0.7415399398117476, iteration: 377509
loss: 1.055463433265686,grad_norm: 0.9077293859099058, iteration: 377510
loss: 0.9648718237876892,grad_norm: 0.8055179972861778, iteration: 377511
loss: 0.9908924102783203,grad_norm: 0.8154600350013343, iteration: 377512
loss: 0.9923587441444397,grad_norm: 0.7951045901386047, iteration: 377513
loss: 1.0184434652328491,grad_norm: 0.8655221029800173, iteration: 377514
loss: 0.9886805415153503,grad_norm: 0.7971622041585675, iteration: 377515
loss: 1.015104055404663,grad_norm: 0.814354581699146, iteration: 377516
loss: 0.9876188635826111,grad_norm: 0.9999990930050291, iteration: 377517
loss: 0.9595445990562439,grad_norm: 0.7937612712653723, iteration: 377518
loss: 1.0228973627090454,grad_norm: 0.8813772210907055, iteration: 377519
loss: 0.9859791994094849,grad_norm: 0.7739340781447431, iteration: 377520
loss: 0.9901046752929688,grad_norm: 0.7135768817052062, iteration: 377521
loss: 0.9999881982803345,grad_norm: 0.7864164290589831, iteration: 377522
loss: 0.9743638038635254,grad_norm: 0.850549789030548, iteration: 377523
loss: 0.9968445897102356,grad_norm: 0.7484129059463069, iteration: 377524
loss: 0.9970940351486206,grad_norm: 0.7126697476048263, iteration: 377525
loss: 1.0495500564575195,grad_norm: 0.896100492589146, iteration: 377526
loss: 1.0002809762954712,grad_norm: 0.6816104752025206, iteration: 377527
loss: 1.0423262119293213,grad_norm: 0.9999995653005067, iteration: 377528
loss: 1.2045131921768188,grad_norm: 0.9999995681454295, iteration: 377529
loss: 0.979928195476532,grad_norm: 0.9248639849788725, iteration: 377530
loss: 1.009009599685669,grad_norm: 0.8518820052725604, iteration: 377531
loss: 0.9994947910308838,grad_norm: 0.8008494230373215, iteration: 377532
loss: 0.9676600694656372,grad_norm: 0.7591215458451257, iteration: 377533
loss: 0.9562864303588867,grad_norm: 0.8884783427960586, iteration: 377534
loss: 0.9582663178443909,grad_norm: 0.7857774474089749, iteration: 377535
loss: 1.017209768295288,grad_norm: 0.7780684323940036, iteration: 377536
loss: 0.986427366733551,grad_norm: 0.8500659302558611, iteration: 377537
loss: 1.0042237043380737,grad_norm: 0.9506241253055482, iteration: 377538
loss: 1.0124205350875854,grad_norm: 0.6958795653465306, iteration: 377539
loss: 1.030094861984253,grad_norm: 0.9999998348013058, iteration: 377540
loss: 0.9849348068237305,grad_norm: 0.874422871003209, iteration: 377541
loss: 1.0071643590927124,grad_norm: 0.793716946312919, iteration: 377542
loss: 1.011572003364563,grad_norm: 0.7673481995285623, iteration: 377543
loss: 0.9866431951522827,grad_norm: 0.9196889959531982, iteration: 377544
loss: 0.9856652617454529,grad_norm: 0.8450620003332817, iteration: 377545
loss: 0.9839828014373779,grad_norm: 0.689526350698484, iteration: 377546
loss: 0.9630803465843201,grad_norm: 0.8206938316286279, iteration: 377547
loss: 1.0295331478118896,grad_norm: 0.999999708162538, iteration: 377548
loss: 0.9951638579368591,grad_norm: 0.9012653105481869, iteration: 377549
loss: 0.9801270365715027,grad_norm: 0.7730464958595269, iteration: 377550
loss: 0.9856923818588257,grad_norm: 0.7890829369016585, iteration: 377551
loss: 1.0136343240737915,grad_norm: 0.6939500147174466, iteration: 377552
loss: 1.0428344011306763,grad_norm: 0.9999995665311912, iteration: 377553
loss: 0.9843906164169312,grad_norm: 0.8064394578098892, iteration: 377554
loss: 0.9604723453521729,grad_norm: 0.9187364101690437, iteration: 377555
loss: 0.9962939023971558,grad_norm: 0.9999992339434747, iteration: 377556
loss: 0.9933269023895264,grad_norm: 0.9337917990827621, iteration: 377557
loss: 0.9660659432411194,grad_norm: 0.9999995280003329, iteration: 377558
loss: 1.0559765100479126,grad_norm: 0.999999854596207, iteration: 377559
loss: 1.0131365060806274,grad_norm: 0.99999948613112, iteration: 377560
loss: 1.007059097290039,grad_norm: 0.7963083596403974, iteration: 377561
loss: 0.9844523668289185,grad_norm: 0.9999989691022031, iteration: 377562
loss: 1.035247802734375,grad_norm: 0.7036296149532608, iteration: 377563
loss: 0.9972525835037231,grad_norm: 0.773333397108203, iteration: 377564
loss: 0.9810081720352173,grad_norm: 0.7218253554063699, iteration: 377565
loss: 1.0914140939712524,grad_norm: 0.9999997021777527, iteration: 377566
loss: 0.9851066470146179,grad_norm: 0.7801188006625921, iteration: 377567
loss: 1.0128461122512817,grad_norm: 0.9999993962206869, iteration: 377568
loss: 1.0031535625457764,grad_norm: 0.7868419915211531, iteration: 377569
loss: 0.980720043182373,grad_norm: 0.7912272685818552, iteration: 377570
loss: 1.0062905550003052,grad_norm: 0.8501336440077036, iteration: 377571
loss: 1.018285870552063,grad_norm: 0.7181871444515021, iteration: 377572
loss: 0.9782031774520874,grad_norm: 0.7941590698904807, iteration: 377573
loss: 0.978502094745636,grad_norm: 0.7261484830317162, iteration: 377574
loss: 1.0185819864273071,grad_norm: 0.9017299216116081, iteration: 377575
loss: 0.9885225296020508,grad_norm: 0.7092190780537866, iteration: 377576
loss: 0.9897481799125671,grad_norm: 0.8825075192057099, iteration: 377577
loss: 0.9800487756729126,grad_norm: 0.7921753152818245, iteration: 377578
loss: 0.9926607608795166,grad_norm: 0.9999990041342478, iteration: 377579
loss: 1.038455843925476,grad_norm: 0.9016531167437419, iteration: 377580
loss: 0.9743173718452454,grad_norm: 0.7842880934848127, iteration: 377581
loss: 1.0009195804595947,grad_norm: 0.8165983769281024, iteration: 377582
loss: 1.0186429023742676,grad_norm: 0.9894035421412009, iteration: 377583
loss: 1.012663722038269,grad_norm: 0.9999991063999185, iteration: 377584
loss: 1.0050370693206787,grad_norm: 0.8347045279684819, iteration: 377585
loss: 1.014107584953308,grad_norm: 0.8077120181106857, iteration: 377586
loss: 0.992537260055542,grad_norm: 0.9669399305733016, iteration: 377587
loss: 1.007622241973877,grad_norm: 0.8965267389198073, iteration: 377588
loss: 0.9941770434379578,grad_norm: 0.7285076049708702, iteration: 377589
loss: 1.0139222145080566,grad_norm: 0.8981879533906808, iteration: 377590
loss: 0.9621607065200806,grad_norm: 0.774969555109846, iteration: 377591
loss: 0.9734846353530884,grad_norm: 0.7218657800013315, iteration: 377592
loss: 1.003031611442566,grad_norm: 0.7538936057129375, iteration: 377593
loss: 0.9849188923835754,grad_norm: 0.8177423670433557, iteration: 377594
loss: 1.0463954210281372,grad_norm: 0.9999992630892173, iteration: 377595
loss: 0.9891520142555237,grad_norm: 0.8164587011867096, iteration: 377596
loss: 1.0146492719650269,grad_norm: 0.7270413101576153, iteration: 377597
loss: 0.985688328742981,grad_norm: 0.9999997171577102, iteration: 377598
loss: 1.019858479499817,grad_norm: 0.999999419430892, iteration: 377599
loss: 1.0032802820205688,grad_norm: 0.7710078445613354, iteration: 377600
loss: 1.0676233768463135,grad_norm: 0.9999999324411631, iteration: 377601
loss: 1.0136739015579224,grad_norm: 0.8286909001890632, iteration: 377602
loss: 0.9811962842941284,grad_norm: 0.6837209112447324, iteration: 377603
loss: 1.0244109630584717,grad_norm: 0.8218603634401925, iteration: 377604
loss: 0.9900027513504028,grad_norm: 0.9078811673191602, iteration: 377605
loss: 0.9854031801223755,grad_norm: 0.7825778789574285, iteration: 377606
loss: 1.0064753293991089,grad_norm: 0.9929435064087672, iteration: 377607
loss: 0.9672163128852844,grad_norm: 0.8407952712025432, iteration: 377608
loss: 0.9823688864707947,grad_norm: 0.7457039231232059, iteration: 377609
loss: 0.9986777305603027,grad_norm: 0.9580332207849094, iteration: 377610
loss: 1.0063954591751099,grad_norm: 0.6794181823083908, iteration: 377611
loss: 1.0074886083602905,grad_norm: 0.8400872385989743, iteration: 377612
loss: 0.9910101890563965,grad_norm: 0.7823116819631606, iteration: 377613
loss: 0.9807246923446655,grad_norm: 0.7640014726615594, iteration: 377614
loss: 0.9757739901542664,grad_norm: 0.7864838869044031, iteration: 377615
loss: 1.001001000404358,grad_norm: 0.8602010971278663, iteration: 377616
loss: 0.9709445238113403,grad_norm: 0.7937921362699916, iteration: 377617
loss: 0.9847564697265625,grad_norm: 0.7580848133432162, iteration: 377618
loss: 0.9887070059776306,grad_norm: 0.8071348209419938, iteration: 377619
loss: 1.009635090827942,grad_norm: 0.8089528771870014, iteration: 377620
loss: 1.0128051042556763,grad_norm: 0.9657873268815031, iteration: 377621
loss: 1.0052459239959717,grad_norm: 0.9455632644986206, iteration: 377622
loss: 1.0329967737197876,grad_norm: 0.8938538736459988, iteration: 377623
loss: 1.0237112045288086,grad_norm: 0.859773628370841, iteration: 377624
loss: 1.0275031328201294,grad_norm: 0.799249410086159, iteration: 377625
loss: 1.0398179292678833,grad_norm: 0.9999998009201924, iteration: 377626
loss: 0.9964012503623962,grad_norm: 0.9999996217039145, iteration: 377627
loss: 0.9984562397003174,grad_norm: 0.8600555570330832, iteration: 377628
loss: 0.9907795786857605,grad_norm: 0.7353229238725619, iteration: 377629
loss: 1.0400099754333496,grad_norm: 0.9211491156384286, iteration: 377630
loss: 1.000288963317871,grad_norm: 0.7897386637254797, iteration: 377631
loss: 0.9921544194221497,grad_norm: 0.8858675849437992, iteration: 377632
loss: 1.0071622133255005,grad_norm: 0.7403016500265944, iteration: 377633
loss: 1.008189082145691,grad_norm: 0.8185216510447411, iteration: 377634
loss: 1.0430445671081543,grad_norm: 0.999999222443807, iteration: 377635
loss: 0.9735172390937805,grad_norm: 0.8383503107208805, iteration: 377636
loss: 0.984083354473114,grad_norm: 0.7701400151187131, iteration: 377637
loss: 0.9939272403717041,grad_norm: 0.8300326845719976, iteration: 377638
loss: 1.031321406364441,grad_norm: 0.9402339293216061, iteration: 377639
loss: 1.0058269500732422,grad_norm: 0.7440214152850739, iteration: 377640
loss: 1.029837727546692,grad_norm: 0.9999995639004299, iteration: 377641
loss: 0.9746036529541016,grad_norm: 0.8143825552724876, iteration: 377642
loss: 0.9983195662498474,grad_norm: 0.870593588245711, iteration: 377643
loss: 1.0353044271469116,grad_norm: 0.8893742096589259, iteration: 377644
loss: 0.9574371576309204,grad_norm: 0.7092752208742982, iteration: 377645
loss: 1.0626932382583618,grad_norm: 0.9238803291492758, iteration: 377646
loss: 1.0028184652328491,grad_norm: 0.8357604984813017, iteration: 377647
loss: 1.0101429224014282,grad_norm: 0.7070913406043987, iteration: 377648
loss: 0.998648464679718,grad_norm: 0.8117503586320708, iteration: 377649
loss: 0.9943663477897644,grad_norm: 0.7084900667213777, iteration: 377650
loss: 0.9903731346130371,grad_norm: 0.8804183610571439, iteration: 377651
loss: 0.9837595820426941,grad_norm: 0.7346138179686276, iteration: 377652
loss: 0.9822630286216736,grad_norm: 0.7615130010931892, iteration: 377653
loss: 1.0036758184432983,grad_norm: 0.9788488941580495, iteration: 377654
loss: 0.9667510390281677,grad_norm: 0.8027863931957624, iteration: 377655
loss: 0.9948202967643738,grad_norm: 0.6928043126923057, iteration: 377656
loss: 0.984673023223877,grad_norm: 0.8354748937955672, iteration: 377657
loss: 1.0183378458023071,grad_norm: 0.9999999192811584, iteration: 377658
loss: 0.9786665439605713,grad_norm: 0.6809687171959302, iteration: 377659
loss: 0.9911729693412781,grad_norm: 0.6310394033313224, iteration: 377660
loss: 1.0018868446350098,grad_norm: 0.8739474348713151, iteration: 377661
loss: 0.9643816947937012,grad_norm: 0.9999990360157788, iteration: 377662
loss: 1.0276927947998047,grad_norm: 0.949877487074823, iteration: 377663
loss: 0.9951702356338501,grad_norm: 0.9980490063237142, iteration: 377664
loss: 1.0613142251968384,grad_norm: 0.9725747322541289, iteration: 377665
loss: 1.0110782384872437,grad_norm: 0.7830714422725024, iteration: 377666
loss: 0.9456354379653931,grad_norm: 0.7351466619039191, iteration: 377667
loss: 1.078177809715271,grad_norm: 0.9999992179906313, iteration: 377668
loss: 1.0326557159423828,grad_norm: 0.701266597411654, iteration: 377669
loss: 0.9665548205375671,grad_norm: 0.8802180451994295, iteration: 377670
loss: 1.0178847312927246,grad_norm: 0.9335851006688268, iteration: 377671
loss: 1.0046027898788452,grad_norm: 0.9135157884822867, iteration: 377672
loss: 0.9952815771102905,grad_norm: 0.7723042140006893, iteration: 377673
loss: 0.9881827235221863,grad_norm: 0.8335606675049417, iteration: 377674
loss: 0.9763352274894714,grad_norm: 0.7980542976917105, iteration: 377675
loss: 1.0335272550582886,grad_norm: 0.920464476210074, iteration: 377676
loss: 1.005515694618225,grad_norm: 0.8676679860763672, iteration: 377677
loss: 1.022910475730896,grad_norm: 0.7830543362900902, iteration: 377678
loss: 1.0186675786972046,grad_norm: 0.8365331978347884, iteration: 377679
loss: 1.091822862625122,grad_norm: 0.9997950356527086, iteration: 377680
loss: 1.025154709815979,grad_norm: 0.8800950823254988, iteration: 377681
loss: 0.9914653897285461,grad_norm: 0.7915692993699388, iteration: 377682
loss: 0.9616021513938904,grad_norm: 0.634702603949077, iteration: 377683
loss: 1.0447547435760498,grad_norm: 0.9999994899691177, iteration: 377684
loss: 0.9859652519226074,grad_norm: 0.9255172673630043, iteration: 377685
loss: 0.9858710765838623,grad_norm: 0.6980031608698727, iteration: 377686
loss: 0.9905314445495605,grad_norm: 0.7657680591960785, iteration: 377687
loss: 1.0578100681304932,grad_norm: 0.821389989721344, iteration: 377688
loss: 1.055902361869812,grad_norm: 0.7721072333844965, iteration: 377689
loss: 1.102216124534607,grad_norm: 0.8846534331714474, iteration: 377690
loss: 1.005513310432434,grad_norm: 0.8370735531711706, iteration: 377691
loss: 1.0169320106506348,grad_norm: 0.8936991168317375, iteration: 377692
loss: 0.9859294295310974,grad_norm: 0.7947822702536665, iteration: 377693
loss: 0.9882314205169678,grad_norm: 0.9999998907271076, iteration: 377694
loss: 1.0109087228775024,grad_norm: 0.9072860032917104, iteration: 377695
loss: 0.999550461769104,grad_norm: 0.9999990908960235, iteration: 377696
loss: 0.9771461486816406,grad_norm: 0.9254190371246912, iteration: 377697
loss: 1.0967793464660645,grad_norm: 0.9999991420424242, iteration: 377698
loss: 0.991970956325531,grad_norm: 0.9999993030871687, iteration: 377699
loss: 1.0603731870651245,grad_norm: 0.9602173724593848, iteration: 377700
loss: 1.071269154548645,grad_norm: 0.9999998863967814, iteration: 377701
loss: 1.1747313737869263,grad_norm: 0.9999991872660023, iteration: 377702
loss: 1.0647213459014893,grad_norm: 0.9121148588899476, iteration: 377703
loss: 1.0290175676345825,grad_norm: 0.8519476371052792, iteration: 377704
loss: 0.982277512550354,grad_norm: 0.9999995248903125, iteration: 377705
loss: 0.9899077415466309,grad_norm: 0.9122145419509963, iteration: 377706
loss: 1.1854336261749268,grad_norm: 0.9999995344798555, iteration: 377707
loss: 1.0199922323226929,grad_norm: 0.8190220135089165, iteration: 377708
loss: 1.042722463607788,grad_norm: 0.9999995822243297, iteration: 377709
loss: 1.082612156867981,grad_norm: 0.999999427521882, iteration: 377710
loss: 1.0335019826889038,grad_norm: 0.7588148670152688, iteration: 377711
loss: 1.021185278892517,grad_norm: 0.9999991509004729, iteration: 377712
loss: 1.0787440538406372,grad_norm: 0.9999996287972281, iteration: 377713
loss: 1.1915477514266968,grad_norm: 0.9999994677198627, iteration: 377714
loss: 1.2542006969451904,grad_norm: 0.999999793538184, iteration: 377715
loss: 1.0482239723205566,grad_norm: 0.9999999481666335, iteration: 377716
loss: 1.0272842645645142,grad_norm: 0.9330064580405264, iteration: 377717
loss: 1.0736210346221924,grad_norm: 0.9999991811315405, iteration: 377718
loss: 1.0545986890792847,grad_norm: 0.9999994787610118, iteration: 377719
loss: 1.0327178239822388,grad_norm: 0.9999993848176192, iteration: 377720
loss: 1.0618284940719604,grad_norm: 0.8316769003006713, iteration: 377721
loss: 1.0658037662506104,grad_norm: 0.999999776959309, iteration: 377722
loss: 1.0226449966430664,grad_norm: 0.9413067366666679, iteration: 377723
loss: 1.0957367420196533,grad_norm: 0.9999998122583463, iteration: 377724
loss: 0.9915306568145752,grad_norm: 0.8577959204173923, iteration: 377725
loss: 1.002668023109436,grad_norm: 0.9999997523891943, iteration: 377726
loss: 1.008532166481018,grad_norm: 0.8625393435113238, iteration: 377727
loss: 1.2597336769104004,grad_norm: 0.9999998243882297, iteration: 377728
loss: 1.0479209423065186,grad_norm: 0.9999994592394228, iteration: 377729
loss: 0.9886934161186218,grad_norm: 0.8192409718302598, iteration: 377730
loss: 1.0433415174484253,grad_norm: 0.9999990951660218, iteration: 377731
loss: 1.0114805698394775,grad_norm: 0.93669550356227, iteration: 377732
loss: 1.0672858953475952,grad_norm: 1.0000000452973825, iteration: 377733
loss: 1.0191705226898193,grad_norm: 0.9620153653270807, iteration: 377734
loss: 1.0448999404907227,grad_norm: 0.9999993492804701, iteration: 377735
loss: 1.0521395206451416,grad_norm: 0.999999804810691, iteration: 377736
loss: 1.031410813331604,grad_norm: 0.8770348208701995, iteration: 377737
loss: 1.0281882286071777,grad_norm: 0.9999992803075101, iteration: 377738
loss: 1.0322394371032715,grad_norm: 0.75128684248757, iteration: 377739
loss: 1.020087718963623,grad_norm: 0.9508695299213216, iteration: 377740
loss: 0.9999202489852905,grad_norm: 0.8030213104262551, iteration: 377741
loss: 1.0011603832244873,grad_norm: 0.7247247695978353, iteration: 377742
loss: 1.0160101652145386,grad_norm: 0.872872788829147, iteration: 377743
loss: 1.0111629962921143,grad_norm: 0.6818120816240848, iteration: 377744
loss: 1.0710595846176147,grad_norm: 0.9999998093631196, iteration: 377745
loss: 1.0081590414047241,grad_norm: 0.702543157236194, iteration: 377746
loss: 1.0320740938186646,grad_norm: 0.9711456445696981, iteration: 377747
loss: 1.0438579320907593,grad_norm: 0.8210120830238367, iteration: 377748
loss: 1.0328364372253418,grad_norm: 0.8736140177387289, iteration: 377749
loss: 0.9534510374069214,grad_norm: 0.7134123754347858, iteration: 377750
loss: 0.9721439480781555,grad_norm: 0.9018767778595158, iteration: 377751
loss: 0.9938886761665344,grad_norm: 0.7791861145592808, iteration: 377752
loss: 0.9751265048980713,grad_norm: 0.7268217554448624, iteration: 377753
loss: 1.0559107065200806,grad_norm: 0.7496280431425195, iteration: 377754
loss: 1.0176961421966553,grad_norm: 0.8728901034574278, iteration: 377755
loss: 0.9868777990341187,grad_norm: 0.8603666541408788, iteration: 377756
loss: 1.037623405456543,grad_norm: 0.8429060352761228, iteration: 377757
loss: 1.0070013999938965,grad_norm: 0.8794561337818199, iteration: 377758
loss: 1.0579290390014648,grad_norm: 0.9999992203977609, iteration: 377759
loss: 0.998647153377533,grad_norm: 0.7688049088643196, iteration: 377760
loss: 1.0033353567123413,grad_norm: 0.8152532729560314, iteration: 377761
loss: 0.9766680002212524,grad_norm: 0.8747758801335683, iteration: 377762
loss: 0.9855784177780151,grad_norm: 0.8924622548834168, iteration: 377763
loss: 1.0073883533477783,grad_norm: 0.7718300617997224, iteration: 377764
loss: 0.9818136096000671,grad_norm: 0.853303366749318, iteration: 377765
loss: 0.9933714866638184,grad_norm: 0.8150059058376394, iteration: 377766
loss: 0.9924052357673645,grad_norm: 0.8384129559821536, iteration: 377767
loss: 1.0183719396591187,grad_norm: 0.9999990034858748, iteration: 377768
loss: 1.110814094543457,grad_norm: 0.9999992483173497, iteration: 377769
loss: 1.007875919342041,grad_norm: 0.9999991879908325, iteration: 377770
loss: 1.2460042238235474,grad_norm: 0.99999992399377, iteration: 377771
loss: 1.0011662244796753,grad_norm: 0.7040133844055889, iteration: 377772
loss: 1.1128052473068237,grad_norm: 0.9999998890881328, iteration: 377773
loss: 0.9766213297843933,grad_norm: 0.7099715043508037, iteration: 377774
loss: 1.027374267578125,grad_norm: 0.8836747371007843, iteration: 377775
loss: 1.006089687347412,grad_norm: 0.7310526407356402, iteration: 377776
loss: 1.0972316265106201,grad_norm: 0.8088174616048683, iteration: 377777
loss: 1.0199472904205322,grad_norm: 0.6760719046785009, iteration: 377778
loss: 1.0225574970245361,grad_norm: 0.8442500517173761, iteration: 377779
loss: 1.0185766220092773,grad_norm: 0.7925874574417661, iteration: 377780
loss: 1.065321922302246,grad_norm: 0.7808012333014612, iteration: 377781
loss: 1.013998031616211,grad_norm: 0.9685025189967524, iteration: 377782
loss: 1.0464632511138916,grad_norm: 0.7992870505592763, iteration: 377783
loss: 0.9653708338737488,grad_norm: 0.9298060999184696, iteration: 377784
loss: 0.9872057437896729,grad_norm: 0.9999992828899184, iteration: 377785
loss: 0.975762665271759,grad_norm: 0.9742038546390581, iteration: 377786
loss: 1.0926905870437622,grad_norm: 0.9999998141070738, iteration: 377787
loss: 1.00926673412323,grad_norm: 0.8421354222661349, iteration: 377788
loss: 1.0177925825119019,grad_norm: 0.8090817843814165, iteration: 377789
loss: 1.0051215887069702,grad_norm: 0.7900272183015407, iteration: 377790
loss: 1.0724352598190308,grad_norm: 0.84077799738779, iteration: 377791
loss: 0.9870816469192505,grad_norm: 0.8659311478133724, iteration: 377792
loss: 1.0258485078811646,grad_norm: 0.9999993243119574, iteration: 377793
loss: 0.9844878315925598,grad_norm: 0.8457102378960353, iteration: 377794
loss: 0.9956153035163879,grad_norm: 0.8023370528988469, iteration: 377795
loss: 1.1154887676239014,grad_norm: 0.9999991294860653, iteration: 377796
loss: 0.9999585747718811,grad_norm: 0.7278213801652358, iteration: 377797
loss: 0.9585095643997192,grad_norm: 0.9140210550304525, iteration: 377798
loss: 0.9947654008865356,grad_norm: 0.8504973569400518, iteration: 377799
loss: 1.018479347229004,grad_norm: 0.8453864610036719, iteration: 377800
loss: 0.9917080998420715,grad_norm: 0.9760927558730356, iteration: 377801
loss: 0.9786854982376099,grad_norm: 0.9003778663552571, iteration: 377802
loss: 1.0077375173568726,grad_norm: 0.8280440645993125, iteration: 377803
loss: 0.9914899468421936,grad_norm: 0.6849361667968165, iteration: 377804
loss: 1.0277607440948486,grad_norm: 0.8624855690323449, iteration: 377805
loss: 1.0372967720031738,grad_norm: 0.999999182031851, iteration: 377806
loss: 1.0807536840438843,grad_norm: 0.8337895588353228, iteration: 377807
loss: 1.044539213180542,grad_norm: 0.861465597116869, iteration: 377808
loss: 1.0057734251022339,grad_norm: 0.8264511614359301, iteration: 377809
loss: 1.023846983909607,grad_norm: 0.9999993421341974, iteration: 377810
loss: 0.9953352212905884,grad_norm: 0.8197095181254355, iteration: 377811
loss: 0.9962379336357117,grad_norm: 0.9999991026374296, iteration: 377812
loss: 1.0189647674560547,grad_norm: 0.7650184819045995, iteration: 377813
loss: 0.9654861688613892,grad_norm: 0.7878525239756686, iteration: 377814
loss: 1.017263412475586,grad_norm: 0.9999996649183728, iteration: 377815
loss: 1.1466716527938843,grad_norm: 0.9999999220649874, iteration: 377816
loss: 0.9921241402626038,grad_norm: 0.8753684674311458, iteration: 377817
loss: 1.0304926633834839,grad_norm: 0.8902566613628642, iteration: 377818
loss: 1.0122708082199097,grad_norm: 0.8648064750574225, iteration: 377819
loss: 0.9985211491584778,grad_norm: 0.710021402431078, iteration: 377820
loss: 0.9838751554489136,grad_norm: 0.6808911072996473, iteration: 377821
loss: 1.0298670530319214,grad_norm: 0.9310753704420347, iteration: 377822
loss: 1.0595895051956177,grad_norm: 0.9261783643614927, iteration: 377823
loss: 1.0576304197311401,grad_norm: 0.751454248143281, iteration: 377824
loss: 0.9942132830619812,grad_norm: 0.631924695086153, iteration: 377825
loss: 1.061797022819519,grad_norm: 0.999999866537018, iteration: 377826
loss: 1.017823576927185,grad_norm: 0.8051542465653012, iteration: 377827
loss: 1.0336607694625854,grad_norm: 0.9999990783124616, iteration: 377828
loss: 1.008342981338501,grad_norm: 0.9999996085517964, iteration: 377829
loss: 1.0119081735610962,grad_norm: 0.9999994470102381, iteration: 377830
loss: 0.9956884980201721,grad_norm: 0.7702935979957641, iteration: 377831
loss: 1.0149450302124023,grad_norm: 0.9999996843913566, iteration: 377832
loss: 1.0671470165252686,grad_norm: 0.9131066694646333, iteration: 377833
loss: 1.0610076189041138,grad_norm: 0.9999992082496916, iteration: 377834
loss: 1.0019158124923706,grad_norm: 0.9999992790642614, iteration: 377835
loss: 0.9493528008460999,grad_norm: 0.7539803788537023, iteration: 377836
loss: 1.097371220588684,grad_norm: 0.9999999290401534, iteration: 377837
loss: 0.9730492830276489,grad_norm: 0.8253318422086511, iteration: 377838
loss: 1.0124002695083618,grad_norm: 0.9999999265041682, iteration: 377839
loss: 1.0146231651306152,grad_norm: 0.8491534166974322, iteration: 377840
loss: 1.1621443033218384,grad_norm: 0.9999997223869067, iteration: 377841
loss: 1.0121761560440063,grad_norm: 0.9999996920242403, iteration: 377842
loss: 1.1830252408981323,grad_norm: 0.9999992140497711, iteration: 377843
loss: 1.0258795022964478,grad_norm: 0.8662536716022082, iteration: 377844
loss: 1.1496578454971313,grad_norm: 0.9999995292444526, iteration: 377845
loss: 1.1857595443725586,grad_norm: 0.9999998886688763, iteration: 377846
loss: 1.0304964780807495,grad_norm: 0.8954869520873792, iteration: 377847
loss: 0.9897512197494507,grad_norm: 0.6909219351162483, iteration: 377848
loss: 0.9966452717781067,grad_norm: 0.7352677544034306, iteration: 377849
loss: 1.0636769533157349,grad_norm: 0.9999995778966165, iteration: 377850
loss: 1.0270735025405884,grad_norm: 0.9490070194946919, iteration: 377851
loss: 0.9847103953361511,grad_norm: 0.9684580920230428, iteration: 377852
loss: 1.0332916975021362,grad_norm: 0.9999998134318038, iteration: 377853
loss: 1.2366399765014648,grad_norm: 0.9999994075054275, iteration: 377854
loss: 1.030175805091858,grad_norm: 0.9999994492350321, iteration: 377855
loss: 1.0132477283477783,grad_norm: 0.9870646813678553, iteration: 377856
loss: 1.0653640031814575,grad_norm: 0.9999997890576665, iteration: 377857
loss: 0.984831690788269,grad_norm: 0.8630940975759238, iteration: 377858
loss: 1.003798007965088,grad_norm: 0.9999991534410042, iteration: 377859
loss: 0.9971334338188171,grad_norm: 0.7722697895141619, iteration: 377860
loss: 1.0133135318756104,grad_norm: 0.9999992280353522, iteration: 377861
loss: 1.1073027849197388,grad_norm: 0.9999994307638272, iteration: 377862
loss: 0.9943673014640808,grad_norm: 0.7888736134936263, iteration: 377863
loss: 1.0135804414749146,grad_norm: 0.8443729397394628, iteration: 377864
loss: 0.9991086721420288,grad_norm: 0.9999996598752264, iteration: 377865
loss: 1.0369271039962769,grad_norm: 0.7999388575510421, iteration: 377866
loss: 0.9761698842048645,grad_norm: 0.8024543880868581, iteration: 377867
loss: 1.050098180770874,grad_norm: 0.9999993949741053, iteration: 377868
loss: 1.1397364139556885,grad_norm: 0.999999754103928, iteration: 377869
loss: 0.9970712661743164,grad_norm: 0.7786415158978393, iteration: 377870
loss: 0.9825023412704468,grad_norm: 0.7803178065014302, iteration: 377871
loss: 1.025688886642456,grad_norm: 0.8051701929877629, iteration: 377872
loss: 1.0258384943008423,grad_norm: 0.8613738806906377, iteration: 377873
loss: 1.0455108880996704,grad_norm: 0.7415720789956524, iteration: 377874
loss: 0.9584317207336426,grad_norm: 0.7282421620580388, iteration: 377875
loss: 1.2460429668426514,grad_norm: 0.9999997748736564, iteration: 377876
loss: 1.0032583475112915,grad_norm: 0.9999990763926404, iteration: 377877
loss: 0.9941100478172302,grad_norm: 0.938766147079447, iteration: 377878
loss: 1.1998460292816162,grad_norm: 0.9999999782505795, iteration: 377879
loss: 0.9960200190544128,grad_norm: 0.6933772974621435, iteration: 377880
loss: 1.0658928155899048,grad_norm: 0.8352995465202372, iteration: 377881
loss: 1.0048315525054932,grad_norm: 0.8704105592444963, iteration: 377882
loss: 1.0219628810882568,grad_norm: 0.7370421021050697, iteration: 377883
loss: 0.9981802105903625,grad_norm: 0.8678587092637743, iteration: 377884
loss: 1.1060419082641602,grad_norm: 0.9936703881298286, iteration: 377885
loss: 1.2882180213928223,grad_norm: 0.9999992845174475, iteration: 377886
loss: 1.005956768989563,grad_norm: 0.7069482958771792, iteration: 377887
loss: 0.990928053855896,grad_norm: 0.8948024876621259, iteration: 377888
loss: 1.0638188123703003,grad_norm: 0.9999992820645682, iteration: 377889
loss: 1.163515329360962,grad_norm: 0.9999997231930475, iteration: 377890
loss: 1.077481985092163,grad_norm: 0.9999997928805092, iteration: 377891
loss: 0.9912907481193542,grad_norm: 0.9501253293758642, iteration: 377892
loss: 0.9851289987564087,grad_norm: 0.8311074130380799, iteration: 377893
loss: 1.1034791469573975,grad_norm: 0.8896977046509158, iteration: 377894
loss: 0.9846091270446777,grad_norm: 0.8346008499173698, iteration: 377895
loss: 1.000004529953003,grad_norm: 0.7424851050378278, iteration: 377896
loss: 0.9704621434211731,grad_norm: 0.7864178881667864, iteration: 377897
loss: 0.9978875517845154,grad_norm: 0.9328236193432817, iteration: 377898
loss: 1.1165409088134766,grad_norm: 1.0000000051556392, iteration: 377899
loss: 1.1319987773895264,grad_norm: 0.999999136160514, iteration: 377900
loss: 1.154505968093872,grad_norm: 0.9999991177406835, iteration: 377901
loss: 0.9805617332458496,grad_norm: 0.8305427035775679, iteration: 377902
loss: 1.0932912826538086,grad_norm: 0.8269750456975055, iteration: 377903
loss: 0.9958488345146179,grad_norm: 0.999999456288964, iteration: 377904
loss: 0.9842087626457214,grad_norm: 0.9573408148933101, iteration: 377905
loss: 1.029331088066101,grad_norm: 0.999999187797789, iteration: 377906
loss: 1.0074416399002075,grad_norm: 0.9999990138513742, iteration: 377907
loss: 1.0407322645187378,grad_norm: 0.9999999761970035, iteration: 377908
loss: 1.145521640777588,grad_norm: 0.999999496895442, iteration: 377909
loss: 1.0039067268371582,grad_norm: 0.8231855448018198, iteration: 377910
loss: 1.1072782278060913,grad_norm: 0.9756452382258591, iteration: 377911
loss: 1.1090888977050781,grad_norm: 0.9999999627948043, iteration: 377912
loss: 0.9853383302688599,grad_norm: 0.6881369924697366, iteration: 377913
loss: 1.0407471656799316,grad_norm: 0.8488890032118571, iteration: 377914
loss: 1.1340223550796509,grad_norm: 0.9999994199599406, iteration: 377915
loss: 0.9929444193840027,grad_norm: 0.7063447173010464, iteration: 377916
loss: 1.111362099647522,grad_norm: 0.8400608243642624, iteration: 377917
loss: 1.0081496238708496,grad_norm: 0.9999992964183978, iteration: 377918
loss: 0.998304009437561,grad_norm: 0.8566861161060605, iteration: 377919
loss: 1.151088833808899,grad_norm: 0.9999993606055481, iteration: 377920
loss: 0.9946441054344177,grad_norm: 0.8044010470290534, iteration: 377921
loss: 0.9505874514579773,grad_norm: 0.7762681764693627, iteration: 377922
loss: 1.0330455303192139,grad_norm: 0.9999991699004649, iteration: 377923
loss: 1.1406677961349487,grad_norm: 0.8113181664284486, iteration: 377924
loss: 0.9945844411849976,grad_norm: 0.9472282828591103, iteration: 377925
loss: 1.007421612739563,grad_norm: 0.9226827032314907, iteration: 377926
loss: 1.0406630039215088,grad_norm: 0.9570861226721461, iteration: 377927
loss: 1.0559239387512207,grad_norm: 0.9999994997466884, iteration: 377928
loss: 0.9994179606437683,grad_norm: 0.755239915196259, iteration: 377929
loss: 1.0273802280426025,grad_norm: 0.8770490910964434, iteration: 377930
loss: 0.996394157409668,grad_norm: 0.8469712053339647, iteration: 377931
loss: 1.0305434465408325,grad_norm: 0.7872620694650208, iteration: 377932
loss: 1.050592303276062,grad_norm: 0.859481634318124, iteration: 377933
loss: 1.1566091775894165,grad_norm: 0.999999079000501, iteration: 377934
loss: 0.9973340630531311,grad_norm: 0.9999999266931838, iteration: 377935
loss: 1.0061421394348145,grad_norm: 0.8093127554474087, iteration: 377936
loss: 0.9652890563011169,grad_norm: 0.9428314580984702, iteration: 377937
loss: 1.1042400598526,grad_norm: 0.9742740210570803, iteration: 377938
loss: 1.0109786987304688,grad_norm: 0.809159325765297, iteration: 377939
loss: 1.087477684020996,grad_norm: 0.9999993584142123, iteration: 377940
loss: 0.9985693097114563,grad_norm: 0.7943583715256591, iteration: 377941
loss: 1.11417555809021,grad_norm: 0.9130760543856732, iteration: 377942
loss: 1.2871631383895874,grad_norm: 0.9999996439992711, iteration: 377943
loss: 1.0008002519607544,grad_norm: 0.781853653801736, iteration: 377944
loss: 1.050768494606018,grad_norm: 0.9999997067600316, iteration: 377945
loss: 1.0220128297805786,grad_norm: 0.8152257445542086, iteration: 377946
loss: 1.0516242980957031,grad_norm: 0.9999994365935378, iteration: 377947
loss: 1.0052316188812256,grad_norm: 0.7197993282982842, iteration: 377948
loss: 1.019311547279358,grad_norm: 0.7363708012860302, iteration: 377949
loss: 1.082287311553955,grad_norm: 0.8484530376845384, iteration: 377950
loss: 1.0264276266098022,grad_norm: 0.7770308191267269, iteration: 377951
loss: 1.105601191520691,grad_norm: 0.9999999488694237, iteration: 377952
loss: 0.9897275567054749,grad_norm: 0.9946921351001031, iteration: 377953
loss: 1.0097025632858276,grad_norm: 0.7715036985218718, iteration: 377954
loss: 0.9952289462089539,grad_norm: 0.7099283231151241, iteration: 377955
loss: 0.9993669390678406,grad_norm: 0.8288768975448434, iteration: 377956
loss: 1.036815881729126,grad_norm: 0.8918448565212668, iteration: 377957
loss: 1.036909818649292,grad_norm: 0.7472691167375193, iteration: 377958
loss: 0.9763314723968506,grad_norm: 0.8011885438440651, iteration: 377959
loss: 1.0190142393112183,grad_norm: 0.9288687002179178, iteration: 377960
loss: 0.991865336894989,grad_norm: 0.745173172961731, iteration: 377961
loss: 1.0008777379989624,grad_norm: 0.6756272178205078, iteration: 377962
loss: 1.0216854810714722,grad_norm: 0.8867011492368435, iteration: 377963
loss: 1.1077370643615723,grad_norm: 0.890286536095272, iteration: 377964
loss: 1.0768687725067139,grad_norm: 0.8989335055449775, iteration: 377965
loss: 1.0238512754440308,grad_norm: 0.9999991827417862, iteration: 377966
loss: 1.1168513298034668,grad_norm: 0.9999996174492086, iteration: 377967
loss: 1.0367933511734009,grad_norm: 0.9999990586216686, iteration: 377968
loss: 1.0756803750991821,grad_norm: 0.9999993759850879, iteration: 377969
loss: 0.9927317500114441,grad_norm: 0.8063414868069754, iteration: 377970
loss: 0.9916893839836121,grad_norm: 0.9999995833910318, iteration: 377971
loss: 0.9930602312088013,grad_norm: 0.9312386001226907, iteration: 377972
loss: 0.9949108362197876,grad_norm: 0.852701253074046, iteration: 377973
loss: 1.1041607856750488,grad_norm: 0.9999994410892289, iteration: 377974
loss: 1.0109591484069824,grad_norm: 0.9999990692426628, iteration: 377975
loss: 1.0965529680252075,grad_norm: 0.9034117854064081, iteration: 377976
loss: 1.0161386728286743,grad_norm: 0.8364381600666309, iteration: 377977
loss: 0.9940105676651001,grad_norm: 0.770095539307986, iteration: 377978
loss: 1.0454753637313843,grad_norm: 0.881146032009841, iteration: 377979
loss: 1.047196626663208,grad_norm: 0.999999507445707, iteration: 377980
loss: 1.0211738348007202,grad_norm: 0.9999993116289858, iteration: 377981
loss: 1.098698616027832,grad_norm: 0.999999178264342, iteration: 377982
loss: 0.987428605556488,grad_norm: 0.7077665500998146, iteration: 377983
loss: 0.9914397597312927,grad_norm: 0.8290615446841298, iteration: 377984
loss: 1.1169570684432983,grad_norm: 0.9999992167696758, iteration: 377985
loss: 0.9909093379974365,grad_norm: 0.727874564869132, iteration: 377986
loss: 1.008981704711914,grad_norm: 0.9999991003041457, iteration: 377987
loss: 1.029996395111084,grad_norm: 0.8496676190851441, iteration: 377988
loss: 1.0497316122055054,grad_norm: 0.9999998473400722, iteration: 377989
loss: 1.0114569664001465,grad_norm: 0.9266054598669219, iteration: 377990
loss: 1.039028525352478,grad_norm: 0.9618190591916653, iteration: 377991
loss: 1.041832685470581,grad_norm: 0.7243460041029, iteration: 377992
loss: 1.0014431476593018,grad_norm: 0.8207849057000833, iteration: 377993
loss: 1.0183994770050049,grad_norm: 0.8994368165436576, iteration: 377994
loss: 0.9649115204811096,grad_norm: 0.9441136910789898, iteration: 377995
loss: 0.9741405248641968,grad_norm: 0.821086744330778, iteration: 377996
loss: 1.1075152158737183,grad_norm: 0.9999992775361406, iteration: 377997
loss: 0.9903308153152466,grad_norm: 0.7733633073429578, iteration: 377998
loss: 0.9882587790489197,grad_norm: 0.8220897874908223, iteration: 377999
loss: 0.9808690547943115,grad_norm: 0.6957212408572208, iteration: 378000
loss: 1.1273380517959595,grad_norm: 0.9999997742256137, iteration: 378001
loss: 1.017370581626892,grad_norm: 0.7556109226069163, iteration: 378002
loss: 0.9967405796051025,grad_norm: 0.999999037416578, iteration: 378003
loss: 1.1920514106750488,grad_norm: 0.9999995481912061, iteration: 378004
loss: 1.047051191329956,grad_norm: 0.9999991326122328, iteration: 378005
loss: 1.0079755783081055,grad_norm: 0.6662109072640259, iteration: 378006
loss: 1.1787832975387573,grad_norm: 0.9999995283645312, iteration: 378007
loss: 1.002446174621582,grad_norm: 0.9999992151843211, iteration: 378008
loss: 1.053015947341919,grad_norm: 0.7560884383919867, iteration: 378009
loss: 0.9804512858390808,grad_norm: 0.8216756541883254, iteration: 378010
loss: 1.018386960029602,grad_norm: 0.9999992647798578, iteration: 378011
loss: 1.0201607942581177,grad_norm: 0.9999991642825788, iteration: 378012
loss: 1.0192159414291382,grad_norm: 0.736948821634479, iteration: 378013
loss: 1.0165356397628784,grad_norm: 0.6843480863576767, iteration: 378014
loss: 1.0779194831848145,grad_norm: 0.9999996208012687, iteration: 378015
loss: 0.9740424752235413,grad_norm: 0.7354569599107732, iteration: 378016
loss: 1.0792819261550903,grad_norm: 0.8429049257152803, iteration: 378017
loss: 1.012776255607605,grad_norm: 0.7211000622257712, iteration: 378018
loss: 1.003705382347107,grad_norm: 0.7950913218904955, iteration: 378019
loss: 1.1662623882293701,grad_norm: 0.999999328438833, iteration: 378020
loss: 0.9781845808029175,grad_norm: 0.7533668060598223, iteration: 378021
loss: 1.059757947921753,grad_norm: 0.9999999837646949, iteration: 378022
loss: 1.0100935697555542,grad_norm: 0.7761271792393111, iteration: 378023
loss: 1.0466794967651367,grad_norm: 0.9999994224437694, iteration: 378024
loss: 1.0064888000488281,grad_norm: 0.8210183259016546, iteration: 378025
loss: 1.0063077211380005,grad_norm: 0.9627619018296697, iteration: 378026
loss: 0.9772255420684814,grad_norm: 0.9999991163607419, iteration: 378027
loss: 1.0174685716629028,grad_norm: 0.8589526847699097, iteration: 378028
loss: 1.0518977642059326,grad_norm: 0.9284351678463657, iteration: 378029
loss: 1.0282403230667114,grad_norm: 0.749327184210117, iteration: 378030
loss: 1.153512954711914,grad_norm: 0.9999994071449273, iteration: 378031
loss: 0.9474066495895386,grad_norm: 0.7396202317753078, iteration: 378032
loss: 1.0151875019073486,grad_norm: 0.8015540779069199, iteration: 378033
loss: 0.9433966279029846,grad_norm: 0.9358565980962134, iteration: 378034
loss: 1.0417907238006592,grad_norm: 0.7735767452590011, iteration: 378035
loss: 0.9725367426872253,grad_norm: 0.9229429409958496, iteration: 378036
loss: 1.0237733125686646,grad_norm: 0.9999992583178017, iteration: 378037
loss: 0.9863126277923584,grad_norm: 0.6682187877203475, iteration: 378038
loss: 1.0379087924957275,grad_norm: 0.7464491359715042, iteration: 378039
loss: 0.9900544285774231,grad_norm: 0.9999993973105099, iteration: 378040
loss: 1.0256294012069702,grad_norm: 0.6371550451502034, iteration: 378041
loss: 0.9896330833435059,grad_norm: 0.999999969874744, iteration: 378042
loss: 1.0200574398040771,grad_norm: 0.9027090097492133, iteration: 378043
loss: 1.0200910568237305,grad_norm: 0.8397371742210928, iteration: 378044
loss: 1.0101938247680664,grad_norm: 0.7703252765031059, iteration: 378045
loss: 1.0086678266525269,grad_norm: 0.8908522385865937, iteration: 378046
loss: 0.9849948287010193,grad_norm: 0.8124089823747891, iteration: 378047
loss: 1.0142356157302856,grad_norm: 0.9999991797994663, iteration: 378048
loss: 1.0239263772964478,grad_norm: 0.9178958623546469, iteration: 378049
loss: 1.0030680894851685,grad_norm: 0.9999991329922839, iteration: 378050
loss: 1.0644416809082031,grad_norm: 0.9449154728607683, iteration: 378051
loss: 1.0509456396102905,grad_norm: 0.9999995451680117, iteration: 378052
loss: 1.0012092590332031,grad_norm: 0.9999992861846105, iteration: 378053
loss: 1.002943515777588,grad_norm: 0.9999996535969711, iteration: 378054
loss: 0.9947991371154785,grad_norm: 0.8277561044393682, iteration: 378055
loss: 0.9972309470176697,grad_norm: 0.7648130155790743, iteration: 378056
loss: 0.9762045741081238,grad_norm: 0.8048502137376654, iteration: 378057
loss: 0.9849518537521362,grad_norm: 0.999999145359803, iteration: 378058
loss: 1.0190863609313965,grad_norm: 0.9999996238331433, iteration: 378059
loss: 0.9842727184295654,grad_norm: 0.8438770763604837, iteration: 378060
loss: 1.0999451875686646,grad_norm: 0.9538264473127749, iteration: 378061
loss: 1.0459842681884766,grad_norm: 0.9999991195662699, iteration: 378062
loss: 1.0626635551452637,grad_norm: 0.9999998200630315, iteration: 378063
loss: 1.0221987962722778,grad_norm: 0.8540472904952586, iteration: 378064
loss: 1.029949426651001,grad_norm: 0.787797681772692, iteration: 378065
loss: 1.0185754299163818,grad_norm: 0.8193370157041477, iteration: 378066
loss: 0.9814901947975159,grad_norm: 0.8387126671733499, iteration: 378067
loss: 1.0267821550369263,grad_norm: 0.7758141758591834, iteration: 378068
loss: 1.0346702337265015,grad_norm: 0.8427637030365577, iteration: 378069
loss: 1.067378282546997,grad_norm: 0.9621141702401146, iteration: 378070
loss: 1.0180373191833496,grad_norm: 0.9999992332851168, iteration: 378071
loss: 0.9814146161079407,grad_norm: 0.9212077038810564, iteration: 378072
loss: 0.9970216155052185,grad_norm: 0.7714105418146644, iteration: 378073
loss: 1.0136460065841675,grad_norm: 0.9354058205313598, iteration: 378074
loss: 1.150734782218933,grad_norm: 0.9999999250133669, iteration: 378075
loss: 1.0149025917053223,grad_norm: 0.9187154255395912, iteration: 378076
loss: 0.9946430325508118,grad_norm: 0.7360157337424114, iteration: 378077
loss: 1.0011245012283325,grad_norm: 0.9089215249596055, iteration: 378078
loss: 0.9930665493011475,grad_norm: 0.9999990520818837, iteration: 378079
loss: 0.9978955388069153,grad_norm: 0.7602268802293862, iteration: 378080
loss: 1.0164377689361572,grad_norm: 0.9999994258713626, iteration: 378081
loss: 1.156610369682312,grad_norm: 0.9999995947324622, iteration: 378082
loss: 1.0623234510421753,grad_norm: 0.9999998649484764, iteration: 378083
loss: 1.1083635091781616,grad_norm: 1.000000013706266, iteration: 378084
loss: 0.9839310050010681,grad_norm: 0.9975209843595978, iteration: 378085
loss: 0.9913807511329651,grad_norm: 0.8172536913302133, iteration: 378086
loss: 0.9628683924674988,grad_norm: 0.761537251347222, iteration: 378087
loss: 0.9773193597793579,grad_norm: 0.7022681127329757, iteration: 378088
loss: 0.947841227054596,grad_norm: 0.8204726160237006, iteration: 378089
loss: 1.0924557447433472,grad_norm: 0.999999969360733, iteration: 378090
loss: 1.0019261837005615,grad_norm: 0.9999994523317425, iteration: 378091
loss: 1.0110937356948853,grad_norm: 0.8781615186357137, iteration: 378092
loss: 1.0846556425094604,grad_norm: 0.7934267589140281, iteration: 378093
loss: 1.0380569696426392,grad_norm: 0.9851186192362164, iteration: 378094
loss: 0.9942456483840942,grad_norm: 0.8018222494165143, iteration: 378095
loss: 1.0259915590286255,grad_norm: 0.7907208934391123, iteration: 378096
loss: 1.0004853010177612,grad_norm: 0.8250179810938121, iteration: 378097
loss: 1.012377381324768,grad_norm: 0.822658824147827, iteration: 378098
loss: 0.9797990322113037,grad_norm: 0.8605520810171561, iteration: 378099
loss: 0.9719451665878296,grad_norm: 0.6856151110681763, iteration: 378100
loss: 1.0233275890350342,grad_norm: 0.9999996479093349, iteration: 378101
loss: 0.9962138533592224,grad_norm: 0.9999995384317367, iteration: 378102
loss: 0.9815325140953064,grad_norm: 0.7048713243888458, iteration: 378103
loss: 1.0780317783355713,grad_norm: 0.864589141793314, iteration: 378104
loss: 0.9975804686546326,grad_norm: 0.9999995938792154, iteration: 378105
loss: 1.0122267007827759,grad_norm: 0.9999992097347015, iteration: 378106
loss: 1.0860376358032227,grad_norm: 0.9999998178674729, iteration: 378107
loss: 1.0216078758239746,grad_norm: 0.9999999661279312, iteration: 378108
loss: 1.0084666013717651,grad_norm: 0.9999993130887833, iteration: 378109
loss: 1.0146042108535767,grad_norm: 0.9999995019159437, iteration: 378110
loss: 0.9570823311805725,grad_norm: 0.8013331804624256, iteration: 378111
loss: 0.9713466167449951,grad_norm: 0.892591087337054, iteration: 378112
loss: 1.0108973979949951,grad_norm: 0.7989489648283851, iteration: 378113
loss: 1.1268141269683838,grad_norm: 0.9220190916745196, iteration: 378114
loss: 0.960374116897583,grad_norm: 0.8299102226066885, iteration: 378115
loss: 1.0444395542144775,grad_norm: 0.7967429620019185, iteration: 378116
loss: 1.0245013236999512,grad_norm: 0.7821845025445704, iteration: 378117
loss: 0.9913061261177063,grad_norm: 0.9999995527851961, iteration: 378118
loss: 1.023887276649475,grad_norm: 0.742652558692019, iteration: 378119
loss: 0.97503262758255,grad_norm: 0.9724891365758619, iteration: 378120
loss: 1.0314260721206665,grad_norm: 0.8516013401280421, iteration: 378121
loss: 0.9731836318969727,grad_norm: 0.8537891736277964, iteration: 378122
loss: 1.039634346961975,grad_norm: 0.7479407377564865, iteration: 378123
loss: 0.9775406718254089,grad_norm: 0.9566818315498564, iteration: 378124
loss: 0.9862317442893982,grad_norm: 0.9999991873916134, iteration: 378125
loss: 0.9952403903007507,grad_norm: 0.8319750223469711, iteration: 378126
loss: 1.0105605125427246,grad_norm: 0.7045010609516276, iteration: 378127
loss: 0.991745114326477,grad_norm: 0.7034529732616895, iteration: 378128
loss: 1.3210517168045044,grad_norm: 0.9999992076352067, iteration: 378129
loss: 0.9825721979141235,grad_norm: 0.9005945170878028, iteration: 378130
loss: 0.9951500296592712,grad_norm: 0.794979942850899, iteration: 378131
loss: 0.9352119565010071,grad_norm: 0.8838302353352362, iteration: 378132
loss: 0.9994450807571411,grad_norm: 0.8161541339416053, iteration: 378133
loss: 0.9913032650947571,grad_norm: 0.8125840710009967, iteration: 378134
loss: 1.0359855890274048,grad_norm: 0.7454957196432926, iteration: 378135
loss: 0.9876561760902405,grad_norm: 0.8058243286055528, iteration: 378136
loss: 1.0030696392059326,grad_norm: 0.8679503431468379, iteration: 378137
loss: 1.0155611038208008,grad_norm: 0.8841523799124169, iteration: 378138
loss: 0.9851853847503662,grad_norm: 0.9437750013163614, iteration: 378139
loss: 1.01937997341156,grad_norm: 0.9999991024458904, iteration: 378140
loss: 0.9466992616653442,grad_norm: 0.748818469106022, iteration: 378141
loss: 1.006865382194519,grad_norm: 0.6877111970089722, iteration: 378142
loss: 1.0436474084854126,grad_norm: 0.8078666895967149, iteration: 378143
loss: 0.9754540324211121,grad_norm: 0.9999999250176314, iteration: 378144
loss: 1.1391661167144775,grad_norm: 1.0000000029153122, iteration: 378145
loss: 0.9689843058586121,grad_norm: 0.8048203941909244, iteration: 378146
loss: 0.9864069223403931,grad_norm: 0.9999990243133771, iteration: 378147
loss: 1.0765066146850586,grad_norm: 0.999999875522031, iteration: 378148
loss: 1.0482914447784424,grad_norm: 0.7338499350814577, iteration: 378149
loss: 1.049741506576538,grad_norm: 0.7749984109344417, iteration: 378150
loss: 1.046997308731079,grad_norm: 0.9999992944759886, iteration: 378151
loss: 1.0211989879608154,grad_norm: 0.9999991646503336, iteration: 378152
loss: 0.98234623670578,grad_norm: 0.787375845041938, iteration: 378153
loss: 1.0139281749725342,grad_norm: 0.8038850899271529, iteration: 378154
loss: 1.0176719427108765,grad_norm: 0.8246266933297185, iteration: 378155
loss: 1.0168213844299316,grad_norm: 0.8304396863656727, iteration: 378156
loss: 0.9581037759780884,grad_norm: 0.8437348922388977, iteration: 378157
loss: 1.0280805826187134,grad_norm: 0.9999993037279259, iteration: 378158
loss: 1.0134851932525635,grad_norm: 0.8261809261603393, iteration: 378159
loss: 0.9736922979354858,grad_norm: 0.951404943893853, iteration: 378160
loss: 1.042030930519104,grad_norm: 0.8579074189001599, iteration: 378161
loss: 0.9962239265441895,grad_norm: 0.8644612963933593, iteration: 378162
loss: 1.0169005393981934,grad_norm: 0.9999997457794549, iteration: 378163
loss: 0.979479193687439,grad_norm: 0.6535650355879798, iteration: 378164
loss: 1.0737553834915161,grad_norm: 0.9999996188887644, iteration: 378165
loss: 1.0486758947372437,grad_norm: 0.9999990526968544, iteration: 378166
loss: 1.4008516073226929,grad_norm: 0.9999995937391046, iteration: 378167
loss: 1.0577839612960815,grad_norm: 0.9967995833990104, iteration: 378168
loss: 0.982031524181366,grad_norm: 0.9999994662688232, iteration: 378169
loss: 0.9975723028182983,grad_norm: 0.8719911936039607, iteration: 378170
loss: 1.001721978187561,grad_norm: 0.8387208605366798, iteration: 378171
loss: 1.0270442962646484,grad_norm: 0.8622665455662336, iteration: 378172
loss: 1.0022413730621338,grad_norm: 0.9999990858745561, iteration: 378173
loss: 0.977677583694458,grad_norm: 0.903930910372289, iteration: 378174
loss: 1.0401577949523926,grad_norm: 0.7676847470587848, iteration: 378175
loss: 0.9908666014671326,grad_norm: 0.8389123501986955, iteration: 378176
loss: 1.0275622606277466,grad_norm: 0.9019318442295859, iteration: 378177
loss: 0.9763258099555969,grad_norm: 0.9639020673602396, iteration: 378178
loss: 0.996261715888977,grad_norm: 0.7794375987377414, iteration: 378179
loss: 0.9829521775245667,grad_norm: 0.7589444806651453, iteration: 378180
loss: 1.009798288345337,grad_norm: 0.7308802994282987, iteration: 378181
loss: 1.0151454210281372,grad_norm: 0.9999998601510961, iteration: 378182
loss: 0.9742542505264282,grad_norm: 0.8381085258740983, iteration: 378183
loss: 1.0361542701721191,grad_norm: 0.999999429058327, iteration: 378184
loss: 0.9925389885902405,grad_norm: 0.861032810807184, iteration: 378185
loss: 1.065379023551941,grad_norm: 0.9381912043155184, iteration: 378186
loss: 0.9601770043373108,grad_norm: 0.9343089809029728, iteration: 378187
loss: 0.9579370617866516,grad_norm: 0.7637279644245258, iteration: 378188
loss: 0.9644736051559448,grad_norm: 0.8493087053143591, iteration: 378189
loss: 1.0459660291671753,grad_norm: 0.999999682213869, iteration: 378190
loss: 1.0603234767913818,grad_norm: 0.9999993129043214, iteration: 378191
loss: 1.0300558805465698,grad_norm: 0.8495447877273913, iteration: 378192
loss: 1.0271936655044556,grad_norm: 0.7006465502118958, iteration: 378193
loss: 1.0054433345794678,grad_norm: 0.7717539293961562, iteration: 378194
loss: 1.0017874240875244,grad_norm: 0.7448871267244083, iteration: 378195
loss: 0.9861031174659729,grad_norm: 0.79250773968934, iteration: 378196
loss: 0.9911901354789734,grad_norm: 0.7260402823808444, iteration: 378197
loss: 0.983161985874176,grad_norm: 0.6808582567494715, iteration: 378198
loss: 1.04509699344635,grad_norm: 0.9999996392177896, iteration: 378199
loss: 0.96921706199646,grad_norm: 0.8574405543898308, iteration: 378200
loss: 1.0328575372695923,grad_norm: 0.88315162739098, iteration: 378201
loss: 0.9879506230354309,grad_norm: 0.7736414182039794, iteration: 378202
loss: 1.0263081789016724,grad_norm: 0.7688214351153552, iteration: 378203
loss: 0.9498810172080994,grad_norm: 0.8408836526113977, iteration: 378204
loss: 1.0180387496948242,grad_norm: 0.6906703482819513, iteration: 378205
loss: 1.0207473039627075,grad_norm: 0.9999991541154526, iteration: 378206
loss: 1.021064043045044,grad_norm: 0.9084147363488597, iteration: 378207
loss: 0.9890956282615662,grad_norm: 0.6913383712520229, iteration: 378208
loss: 1.027337670326233,grad_norm: 0.8814339759078421, iteration: 378209
loss: 1.0170494318008423,grad_norm: 0.8773361073218391, iteration: 378210
loss: 1.0350615978240967,grad_norm: 0.9001712503044867, iteration: 378211
loss: 1.0104708671569824,grad_norm: 0.9999996573691828, iteration: 378212
loss: 1.025672435760498,grad_norm: 0.8785398235291376, iteration: 378213
loss: 0.9710946083068848,grad_norm: 0.6956866194771893, iteration: 378214
loss: 0.9924378991127014,grad_norm: 0.8016091309846535, iteration: 378215
loss: 1.0155137777328491,grad_norm: 0.8695665964239395, iteration: 378216
loss: 1.036515474319458,grad_norm: 0.8760090398304536, iteration: 378217
loss: 1.2828224897384644,grad_norm: 0.9999993694787619, iteration: 378218
loss: 0.9985610842704773,grad_norm: 0.7858432192098833, iteration: 378219
loss: 0.9552082419395447,grad_norm: 0.9999992549523797, iteration: 378220
loss: 1.0092469453811646,grad_norm: 0.9999995222737449, iteration: 378221
loss: 1.0062861442565918,grad_norm: 0.999999377728851, iteration: 378222
loss: 0.9921389818191528,grad_norm: 0.7527129333005789, iteration: 378223
loss: 1.038827657699585,grad_norm: 0.9999992845248232, iteration: 378224
loss: 1.0222578048706055,grad_norm: 0.7586063588177783, iteration: 378225
loss: 0.9837867617607117,grad_norm: 0.7478383348917544, iteration: 378226
loss: 0.9997304677963257,grad_norm: 0.958829119967024, iteration: 378227
loss: 0.9826754927635193,grad_norm: 0.9999990782382199, iteration: 378228
loss: 1.0110658407211304,grad_norm: 0.7610155139954387, iteration: 378229
loss: 0.9842837452888489,grad_norm: 0.9463109327746292, iteration: 378230
loss: 1.042836308479309,grad_norm: 0.9126197314846395, iteration: 378231
loss: 1.3406012058258057,grad_norm: 0.9999996629402664, iteration: 378232
loss: 1.0275321006774902,grad_norm: 0.9406930070990569, iteration: 378233
loss: 0.9987918138504028,grad_norm: 0.9999997886928388, iteration: 378234
loss: 0.9782372117042542,grad_norm: 0.6233832731859226, iteration: 378235
loss: 1.122295618057251,grad_norm: 0.999999293746984, iteration: 378236
loss: 1.0165534019470215,grad_norm: 0.7961917253049768, iteration: 378237
loss: 0.9989902973175049,grad_norm: 0.7409723790662681, iteration: 378238
loss: 1.0097757577896118,grad_norm: 0.8653996232257309, iteration: 378239
loss: 1.0370808839797974,grad_norm: 0.9999991977722071, iteration: 378240
loss: 0.9868572950363159,grad_norm: 0.8639093934367514, iteration: 378241
loss: 1.0392663478851318,grad_norm: 0.9659658747077827, iteration: 378242
loss: 1.0404843091964722,grad_norm: 0.9999997866789023, iteration: 378243
loss: 0.9937539100646973,grad_norm: 0.9354474313452605, iteration: 378244
loss: 0.9959797263145447,grad_norm: 0.7494065936294605, iteration: 378245
loss: 1.1750473976135254,grad_norm: 0.9999996168551644, iteration: 378246
loss: 0.9808027744293213,grad_norm: 0.7609970894237243, iteration: 378247
loss: 0.9935424327850342,grad_norm: 0.9626455838114891, iteration: 378248
loss: 0.9718380570411682,grad_norm: 0.9084250586320465, iteration: 378249
loss: 1.0757741928100586,grad_norm: 0.9999997729007264, iteration: 378250
loss: 0.9984527826309204,grad_norm: 0.7057349197293372, iteration: 378251
loss: 1.0391271114349365,grad_norm: 0.6978935866117495, iteration: 378252
loss: 1.0217562913894653,grad_norm: 0.7999095538742069, iteration: 378253
loss: 1.007566213607788,grad_norm: 0.8082018071840348, iteration: 378254
loss: 1.0191099643707275,grad_norm: 0.7463987674520957, iteration: 378255
loss: 1.0035078525543213,grad_norm: 0.7499091248786542, iteration: 378256
loss: 1.0167982578277588,grad_norm: 0.9999993211476346, iteration: 378257
loss: 1.0108977556228638,grad_norm: 0.9381854525782185, iteration: 378258
loss: 0.980616569519043,grad_norm: 0.9808168760020473, iteration: 378259
loss: 1.0228503942489624,grad_norm: 0.9069666739497819, iteration: 378260
loss: 0.9748296737670898,grad_norm: 0.9999998001156353, iteration: 378261
loss: 1.0570217370986938,grad_norm: 0.7147792597810851, iteration: 378262
loss: 1.0048799514770508,grad_norm: 0.9999993743427199, iteration: 378263
loss: 1.0287131071090698,grad_norm: 0.9863675165147526, iteration: 378264
loss: 1.003657579421997,grad_norm: 0.8295120317386122, iteration: 378265
loss: 0.9908586740493774,grad_norm: 0.9046976893991417, iteration: 378266
loss: 0.9410181045532227,grad_norm: 0.9201914269002013, iteration: 378267
loss: 1.0058726072311401,grad_norm: 0.7627540499315449, iteration: 378268
loss: 0.9449398517608643,grad_norm: 0.7696883080010987, iteration: 378269
loss: 1.029555320739746,grad_norm: 0.9999990338977227, iteration: 378270
loss: 0.9727020263671875,grad_norm: 0.9893388734850176, iteration: 378271
loss: 0.9928337931632996,grad_norm: 0.8328233290908047, iteration: 378272
loss: 1.0017585754394531,grad_norm: 0.7996824330817016, iteration: 378273
loss: 0.9947344064712524,grad_norm: 0.8328370138887903, iteration: 378274
loss: 0.9767491817474365,grad_norm: 0.8141515838593676, iteration: 378275
loss: 1.0501490831375122,grad_norm: 0.8210141266903969, iteration: 378276
loss: 0.973426342010498,grad_norm: 0.662110759849342, iteration: 378277
loss: 1.0035033226013184,grad_norm: 0.8738805283211714, iteration: 378278
loss: 1.0179283618927002,grad_norm: 0.9465548828146746, iteration: 378279
loss: 0.9733343124389648,grad_norm: 0.8454377556061052, iteration: 378280
loss: 1.0495167970657349,grad_norm: 0.9226972293241572, iteration: 378281
loss: 0.9905521273612976,grad_norm: 0.7927513183786705, iteration: 378282
loss: 1.032875895500183,grad_norm: 0.7623760800473888, iteration: 378283
loss: 0.9891355037689209,grad_norm: 0.7924151133632241, iteration: 378284
loss: 0.9828351140022278,grad_norm: 0.7437441853472297, iteration: 378285
loss: 1.010646104812622,grad_norm: 0.709178088033984, iteration: 378286
loss: 0.989232063293457,grad_norm: 0.9121142974686592, iteration: 378287
loss: 1.0080925226211548,grad_norm: 0.8545631960941276, iteration: 378288
loss: 1.0090426206588745,grad_norm: 0.8840386116771826, iteration: 378289
loss: 1.2176343202590942,grad_norm: 0.9999997355511513, iteration: 378290
loss: 0.9637002348899841,grad_norm: 0.9547365370075066, iteration: 378291
loss: 1.020914077758789,grad_norm: 0.8899352680340058, iteration: 378292
loss: 0.9997780919075012,grad_norm: 0.840991495565951, iteration: 378293
loss: 1.00154447555542,grad_norm: 0.9563375873062854, iteration: 378294
loss: 1.01947021484375,grad_norm: 0.9999995210811026, iteration: 378295
loss: 0.9940964579582214,grad_norm: 0.7815543130168033, iteration: 378296
loss: 1.006510853767395,grad_norm: 0.7585125629433138, iteration: 378297
loss: 0.9949683547019958,grad_norm: 0.6796078301389534, iteration: 378298
loss: 0.9868131875991821,grad_norm: 0.8055386973521661, iteration: 378299
loss: 1.0268951654434204,grad_norm: 0.8351939204980415, iteration: 378300
loss: 1.021968126296997,grad_norm: 0.880343353703382, iteration: 378301
loss: 1.1682950258255005,grad_norm: 0.9999997541947019, iteration: 378302
loss: 1.029778003692627,grad_norm: 0.7430787850648236, iteration: 378303
loss: 0.9852222204208374,grad_norm: 0.7464807259731605, iteration: 378304
loss: 1.0223023891448975,grad_norm: 0.886274358170735, iteration: 378305
loss: 0.980255663394928,grad_norm: 0.8720241920785139, iteration: 378306
loss: 1.0736397504806519,grad_norm: 0.9999997662564805, iteration: 378307
loss: 1.009721279144287,grad_norm: 0.9999998711955425, iteration: 378308
loss: 1.040993094444275,grad_norm: 0.9999991792708006, iteration: 378309
loss: 0.9954947233200073,grad_norm: 0.8925944261978731, iteration: 378310
loss: 1.011683464050293,grad_norm: 0.8000826910554634, iteration: 378311
loss: 1.0108277797698975,grad_norm: 0.8008001901032972, iteration: 378312
loss: 0.9921493530273438,grad_norm: 0.8212162129375193, iteration: 378313
loss: 0.996823787689209,grad_norm: 0.6996878054395674, iteration: 378314
loss: 1.0293214321136475,grad_norm: 0.9999990748689076, iteration: 378315
loss: 1.0086508989334106,grad_norm: 0.9999990889792061, iteration: 378316
loss: 1.0510183572769165,grad_norm: 0.8608298864991233, iteration: 378317
loss: 0.9797508120536804,grad_norm: 0.9314766036756916, iteration: 378318
loss: 0.9779263734817505,grad_norm: 0.9023989344649579, iteration: 378319
loss: 1.0215363502502441,grad_norm: 0.8951256217870329, iteration: 378320
loss: 1.0304161310195923,grad_norm: 0.9999998485569006, iteration: 378321
loss: 0.9882307052612305,grad_norm: 0.7676807625582248, iteration: 378322
loss: 1.0703319311141968,grad_norm: 0.8169777652580907, iteration: 378323
loss: 0.9560964703559875,grad_norm: 0.886419737941691, iteration: 378324
loss: 1.0183781385421753,grad_norm: 0.8327262715243902, iteration: 378325
loss: 0.9960242509841919,grad_norm: 0.8075528608896155, iteration: 378326
loss: 0.9850610494613647,grad_norm: 0.7006567458896285, iteration: 378327
loss: 0.9838511943817139,grad_norm: 0.7758640545984816, iteration: 378328
loss: 1.0141783952713013,grad_norm: 0.9139769972749149, iteration: 378329
loss: 1.020476222038269,grad_norm: 0.9999993653424349, iteration: 378330
loss: 1.001128077507019,grad_norm: 0.8218406644143037, iteration: 378331
loss: 1.019583821296692,grad_norm: 0.761027041645655, iteration: 378332
loss: 1.0395361185073853,grad_norm: 0.9999991655740836, iteration: 378333
loss: 0.9852319955825806,grad_norm: 0.9999989716432626, iteration: 378334
loss: 1.0802600383758545,grad_norm: 0.9999999686502863, iteration: 378335
loss: 1.3172307014465332,grad_norm: 0.9999999072497208, iteration: 378336
loss: 1.0200270414352417,grad_norm: 0.6748961666963956, iteration: 378337
loss: 0.9533666968345642,grad_norm: 0.7619661753089415, iteration: 378338
loss: 1.0190656185150146,grad_norm: 0.9999995389497294, iteration: 378339
loss: 1.1317883729934692,grad_norm: 0.9999990459583544, iteration: 378340
loss: 1.088564395904541,grad_norm: 0.9999998044693155, iteration: 378341
loss: 0.9563525915145874,grad_norm: 0.7056026009123549, iteration: 378342
loss: 0.9764002561569214,grad_norm: 0.9999998246702021, iteration: 378343
loss: 0.9738509654998779,grad_norm: 0.8512758905098724, iteration: 378344
loss: 0.9896757006645203,grad_norm: 0.8336025906679397, iteration: 378345
loss: 1.0022190809249878,grad_norm: 0.9043196548229331, iteration: 378346
loss: 1.0180474519729614,grad_norm: 0.9999995077249658, iteration: 378347
loss: 1.061269760131836,grad_norm: 0.8980383386325194, iteration: 378348
loss: 1.0271251201629639,grad_norm: 0.76419794869378, iteration: 378349
loss: 0.9970868229866028,grad_norm: 0.7491977659348388, iteration: 378350
loss: 0.9992714524269104,grad_norm: 0.6990953897531202, iteration: 378351
loss: 0.9929879903793335,grad_norm: 0.7838205552954561, iteration: 378352
loss: 1.0089948177337646,grad_norm: 0.8121752360134752, iteration: 378353
loss: 1.0716115236282349,grad_norm: 0.9286713510181211, iteration: 378354
loss: 1.0142486095428467,grad_norm: 0.8237791843600332, iteration: 378355
loss: 1.029033899307251,grad_norm: 0.7830854252719757, iteration: 378356
loss: 1.0115642547607422,grad_norm: 0.7479143916348311, iteration: 378357
loss: 1.0142550468444824,grad_norm: 0.9999989390108569, iteration: 378358
loss: 1.0059207677841187,grad_norm: 0.8082359305550516, iteration: 378359
loss: 1.0060778856277466,grad_norm: 0.7574166975110311, iteration: 378360
loss: 1.0144402980804443,grad_norm: 0.8834282404930701, iteration: 378361
loss: 1.0022380352020264,grad_norm: 0.9720666727667097, iteration: 378362
loss: 0.9900569319725037,grad_norm: 0.8061251251181275, iteration: 378363
loss: 1.0881521701812744,grad_norm: 0.8068318555298762, iteration: 378364
loss: 0.9801971316337585,grad_norm: 0.8893377402292673, iteration: 378365
loss: 0.9790552854537964,grad_norm: 0.8312914579234454, iteration: 378366
loss: 0.9840880036354065,grad_norm: 0.8096200648428141, iteration: 378367
loss: 1.0598459243774414,grad_norm: 0.6167842472963786, iteration: 378368
loss: 0.9941105842590332,grad_norm: 0.7230227209857639, iteration: 378369
loss: 0.9999291896820068,grad_norm: 0.9892022433456918, iteration: 378370
loss: 1.0332963466644287,grad_norm: 0.8528879249357808, iteration: 378371
loss: 0.9774817228317261,grad_norm: 0.712188334742654, iteration: 378372
loss: 1.031352162361145,grad_norm: 0.7197075405082463, iteration: 378373
loss: 1.022643804550171,grad_norm: 0.9999993262589587, iteration: 378374
loss: 1.020797848701477,grad_norm: 0.811791297485414, iteration: 378375
loss: 1.0198917388916016,grad_norm: 0.9999992059754585, iteration: 378376
loss: 1.0326907634735107,grad_norm: 0.8134691982236841, iteration: 378377
loss: 0.96812504529953,grad_norm: 0.7765683199603579, iteration: 378378
loss: 1.289579153060913,grad_norm: 0.999999655898102, iteration: 378379
loss: 1.1014198064804077,grad_norm: 0.8148439551136212, iteration: 378380
loss: 1.1647868156433105,grad_norm: 0.7085832763098511, iteration: 378381
loss: 1.0187243223190308,grad_norm: 0.8217817799200574, iteration: 378382
loss: 0.9939256310462952,grad_norm: 0.862808892518146, iteration: 378383
loss: 1.12691330909729,grad_norm: 0.8574378040955839, iteration: 378384
loss: 0.980445146560669,grad_norm: 0.7669875433756, iteration: 378385
loss: 1.0108305215835571,grad_norm: 0.8546881626300098, iteration: 378386
loss: 1.000352382659912,grad_norm: 0.8951509329201991, iteration: 378387
loss: 1.0005449056625366,grad_norm: 0.7664744005344889, iteration: 378388
loss: 1.053379774093628,grad_norm: 0.9999999107728759, iteration: 378389
loss: 0.9643036127090454,grad_norm: 0.8603155820789321, iteration: 378390
loss: 0.9880854487419128,grad_norm: 0.7655885753277071, iteration: 378391
loss: 1.020885705947876,grad_norm: 0.8443480476103122, iteration: 378392
loss: 1.0823372602462769,grad_norm: 0.9999993251975792, iteration: 378393
loss: 0.982840359210968,grad_norm: 0.9085062949136699, iteration: 378394
loss: 0.9806928634643555,grad_norm: 0.9401016297885335, iteration: 378395
loss: 1.0036386251449585,grad_norm: 0.6483664106228988, iteration: 378396
loss: 1.0787638425827026,grad_norm: 0.999999842697059, iteration: 378397
loss: 1.0264253616333008,grad_norm: 0.813105076328151, iteration: 378398
loss: 1.0153223276138306,grad_norm: 0.8783541043913594, iteration: 378399
loss: 0.9737005233764648,grad_norm: 0.8688212860087389, iteration: 378400
loss: 1.0224891901016235,grad_norm: 0.8949466623540066, iteration: 378401
loss: 1.0098977088928223,grad_norm: 0.9201179593053566, iteration: 378402
loss: 1.306754231452942,grad_norm: 0.9999995705502649, iteration: 378403
loss: 1.0216546058654785,grad_norm: 0.7546358022073552, iteration: 378404
loss: 1.122344732284546,grad_norm: 0.9999998605492316, iteration: 378405
loss: 1.038680076599121,grad_norm: 0.809286824495644, iteration: 378406
loss: 0.9864128828048706,grad_norm: 0.7247265747593828, iteration: 378407
loss: 0.9915668368339539,grad_norm: 0.7376507785571574, iteration: 378408
loss: 1.0292863845825195,grad_norm: 0.7609858435370677, iteration: 378409
loss: 1.026279091835022,grad_norm: 0.7276721325123251, iteration: 378410
loss: 1.0543066263198853,grad_norm: 0.9459175657645489, iteration: 378411
loss: 1.0041619539260864,grad_norm: 0.8006240078960604, iteration: 378412
loss: 0.9971907734870911,grad_norm: 0.6935986685188221, iteration: 378413
loss: 1.0136853456497192,grad_norm: 0.8621840246665152, iteration: 378414
loss: 1.020765781402588,grad_norm: 0.9999999893186196, iteration: 378415
loss: 0.9709441661834717,grad_norm: 0.8252451629171687, iteration: 378416
loss: 0.9988065958023071,grad_norm: 0.7848204848608685, iteration: 378417
loss: 1.2018944025039673,grad_norm: 1.0000000622601541, iteration: 378418
loss: 1.0885343551635742,grad_norm: 0.9999997341172984, iteration: 378419
loss: 1.1834734678268433,grad_norm: 0.9999991599905952, iteration: 378420
loss: 1.1418002843856812,grad_norm: 0.999999988900856, iteration: 378421
loss: 0.9801446199417114,grad_norm: 0.8262476073975947, iteration: 378422
loss: 1.0144667625427246,grad_norm: 0.8278035475043287, iteration: 378423
loss: 0.9946677088737488,grad_norm: 0.9999996092399881, iteration: 378424
loss: 1.0272225141525269,grad_norm: 0.7962183825448766, iteration: 378425
loss: 1.07607102394104,grad_norm: 0.9807368290832017, iteration: 378426
loss: 0.9887203574180603,grad_norm: 0.8013859163408622, iteration: 378427
loss: 1.215693712234497,grad_norm: 0.9999991149216473, iteration: 378428
loss: 1.0197826623916626,grad_norm: 0.9999990898660219, iteration: 378429
loss: 1.0368080139160156,grad_norm: 0.9999991289955433, iteration: 378430
loss: 1.1321676969528198,grad_norm: 0.9999991930897946, iteration: 378431
loss: 0.990300714969635,grad_norm: 0.9442955063093175, iteration: 378432
loss: 1.031479001045227,grad_norm: 0.8182042172301033, iteration: 378433
loss: 1.0066115856170654,grad_norm: 0.7067960807396574, iteration: 378434
loss: 1.0069531202316284,grad_norm: 0.8402054164410688, iteration: 378435
loss: 1.3659876585006714,grad_norm: 0.9999995443418342, iteration: 378436
loss: 0.9917006492614746,grad_norm: 0.7448478676231322, iteration: 378437
loss: 1.055790662765503,grad_norm: 0.8007003313785108, iteration: 378438
loss: 0.9911726117134094,grad_norm: 0.842631440916208, iteration: 378439
loss: 1.2008532285690308,grad_norm: 0.9999991611495981, iteration: 378440
loss: 1.2882258892059326,grad_norm: 0.99999949705962, iteration: 378441
loss: 0.9768613576889038,grad_norm: 0.7334849949804638, iteration: 378442
loss: 1.008407473564148,grad_norm: 0.9296375016480024, iteration: 378443
loss: 0.9625057578086853,grad_norm: 0.909192224958623, iteration: 378444
loss: 0.9952973127365112,grad_norm: 0.7229755184582088, iteration: 378445
loss: 0.988731324672699,grad_norm: 0.8210845396849377, iteration: 378446
loss: 1.025206446647644,grad_norm: 0.7561356632844043, iteration: 378447
loss: 1.0771843194961548,grad_norm: 0.8636882999919816, iteration: 378448
loss: 0.9773484468460083,grad_norm: 0.8678551316826241, iteration: 378449
loss: 1.0166256427764893,grad_norm: 0.9582762086434764, iteration: 378450
loss: 1.0757728815078735,grad_norm: 0.7913067646925692, iteration: 378451
loss: 1.003641963005066,grad_norm: 0.9273140593915769, iteration: 378452
loss: 0.9699404239654541,grad_norm: 0.9373453245492764, iteration: 378453
loss: 1.046053409576416,grad_norm: 0.9999991079955014, iteration: 378454
loss: 1.0382065773010254,grad_norm: 0.8852936789653124, iteration: 378455
loss: 1.026921033859253,grad_norm: 0.899753658359533, iteration: 378456
loss: 1.1129937171936035,grad_norm: 0.9430245992272528, iteration: 378457
loss: 1.0573785305023193,grad_norm: 0.9999992629146389, iteration: 378458
loss: 1.0532761812210083,grad_norm: 0.8985650006985978, iteration: 378459
loss: 0.9895827174186707,grad_norm: 0.7567561005943426, iteration: 378460
loss: 1.2005990743637085,grad_norm: 0.9999991280844224, iteration: 378461
loss: 0.9991821646690369,grad_norm: 0.999999273762662, iteration: 378462
loss: 1.1834276914596558,grad_norm: 0.9999996768251037, iteration: 378463
loss: 1.0072311162948608,grad_norm: 0.8595745650267228, iteration: 378464
loss: 1.014437198638916,grad_norm: 0.8065211896316322, iteration: 378465
loss: 1.031982421875,grad_norm: 0.7869081683423077, iteration: 378466
loss: 1.0108566284179688,grad_norm: 0.7111837876563845, iteration: 378467
loss: 0.9717915654182434,grad_norm: 0.9990713685352086, iteration: 378468
loss: 1.0026817321777344,grad_norm: 0.9318088331852351, iteration: 378469
loss: 1.0369893312454224,grad_norm: 1.0000000371305187, iteration: 378470
loss: 1.0165404081344604,grad_norm: 0.9999991976425733, iteration: 378471
loss: 1.0361485481262207,grad_norm: 0.8101075642750799, iteration: 378472
loss: 1.0241925716400146,grad_norm: 0.8335634830133273, iteration: 378473
loss: 1.1612249612808228,grad_norm: 0.9399324567273897, iteration: 378474
loss: 1.064738392829895,grad_norm: 0.8328058283053517, iteration: 378475
loss: 1.0189813375473022,grad_norm: 0.9999991095607386, iteration: 378476
loss: 1.0314172506332397,grad_norm: 0.7661183037105707, iteration: 378477
loss: 1.0225143432617188,grad_norm: 0.9999996357059139, iteration: 378478
loss: 1.057573914527893,grad_norm: 0.9899638483265145, iteration: 378479
loss: 1.0064287185668945,grad_norm: 0.8255405741378051, iteration: 378480
loss: 1.0041539669036865,grad_norm: 0.7736624408917313, iteration: 378481
loss: 1.097291111946106,grad_norm: 0.9999995611211867, iteration: 378482
loss: 1.0419633388519287,grad_norm: 0.9999991461695075, iteration: 378483
loss: 0.9551315307617188,grad_norm: 0.9999991697368852, iteration: 378484
loss: 0.9902700781822205,grad_norm: 0.8147264095146262, iteration: 378485
loss: 0.9800706505775452,grad_norm: 0.8501049106026293, iteration: 378486
loss: 1.1063164472579956,grad_norm: 0.7964844843757605, iteration: 378487
loss: 1.0555930137634277,grad_norm: 0.9999994392576402, iteration: 378488
loss: 1.0169905424118042,grad_norm: 0.7414627198785188, iteration: 378489
loss: 0.9882265329360962,grad_norm: 0.858245074456992, iteration: 378490
loss: 1.0346267223358154,grad_norm: 0.9999998840762871, iteration: 378491
loss: 1.1151199340820312,grad_norm: 0.9999992756629505, iteration: 378492
loss: 0.9678727984428406,grad_norm: 0.9208447974727381, iteration: 378493
loss: 1.0166035890579224,grad_norm: 0.7897829422049498, iteration: 378494
loss: 1.029413104057312,grad_norm: 0.7450498814426461, iteration: 378495
loss: 1.026411533355713,grad_norm: 0.8641870368924253, iteration: 378496
loss: 1.054431676864624,grad_norm: 0.9999992823361586, iteration: 378497
loss: 1.0191017389297485,grad_norm: 0.9999996264899643, iteration: 378498
loss: 1.0304707288742065,grad_norm: 0.8249889499796711, iteration: 378499
loss: 0.9866005182266235,grad_norm: 0.7666261432714913, iteration: 378500
loss: 1.0042271614074707,grad_norm: 0.9999993617016947, iteration: 378501
loss: 1.0215798616409302,grad_norm: 0.7437403085245373, iteration: 378502
loss: 1.0018213987350464,grad_norm: 0.9999992268226897, iteration: 378503
loss: 0.9776671528816223,grad_norm: 0.8162815324020701, iteration: 378504
loss: 1.0034343004226685,grad_norm: 0.74489492689858, iteration: 378505
loss: 1.0492148399353027,grad_norm: 0.6862091544817864, iteration: 378506
loss: 1.0089010000228882,grad_norm: 0.8453741638757417, iteration: 378507
loss: 1.065207839012146,grad_norm: 0.9999992553107415, iteration: 378508
loss: 1.001091480255127,grad_norm: 0.9977237789233792, iteration: 378509
loss: 0.987823486328125,grad_norm: 0.8369770972284014, iteration: 378510
loss: 1.001226782798767,grad_norm: 0.8479412205739579, iteration: 378511
loss: 1.0063341856002808,grad_norm: 0.7709971848875496, iteration: 378512
loss: 1.00597083568573,grad_norm: 0.9845610825978747, iteration: 378513
loss: 1.0084582567214966,grad_norm: 0.7525928243299461, iteration: 378514
loss: 1.0774086713790894,grad_norm: 0.9999991589027092, iteration: 378515
loss: 0.9692025184631348,grad_norm: 0.6908889039764515, iteration: 378516
loss: 1.025133490562439,grad_norm: 0.7752265690026497, iteration: 378517
loss: 1.0359965562820435,grad_norm: 0.7811060118232582, iteration: 378518
loss: 0.9574745893478394,grad_norm: 0.8642350805479581, iteration: 378519
loss: 1.0398175716400146,grad_norm: 0.9999992371662618, iteration: 378520
loss: 0.9975851774215698,grad_norm: 0.724713779190293, iteration: 378521
loss: 0.9839714765548706,grad_norm: 0.9864929974491194, iteration: 378522
loss: 1.0284745693206787,grad_norm: 0.8696423567016961, iteration: 378523
loss: 1.0004185438156128,grad_norm: 0.9999995657658417, iteration: 378524
loss: 1.0118616819381714,grad_norm: 0.7650119890915488, iteration: 378525
loss: 1.0812526941299438,grad_norm: 0.9999990930660004, iteration: 378526
loss: 0.9779000282287598,grad_norm: 0.7206319131005051, iteration: 378527
loss: 1.0547187328338623,grad_norm: 0.9945819519570203, iteration: 378528
loss: 0.9974207282066345,grad_norm: 0.6990184572698351, iteration: 378529
loss: 0.9756529331207275,grad_norm: 0.8033283498844449, iteration: 378530
loss: 1.0081089735031128,grad_norm: 0.9999998127945795, iteration: 378531
loss: 1.145600438117981,grad_norm: 0.9999997626905134, iteration: 378532
loss: 1.0043575763702393,grad_norm: 0.8207532912666949, iteration: 378533
loss: 1.0199187994003296,grad_norm: 0.6795226322687203, iteration: 378534
loss: 1.0603265762329102,grad_norm: 0.9999994482804176, iteration: 378535
loss: 1.041022539138794,grad_norm: 0.8867063953051235, iteration: 378536
loss: 1.0166691541671753,grad_norm: 0.9807804639910491, iteration: 378537
loss: 0.9756555557250977,grad_norm: 0.7203852803872731, iteration: 378538
loss: 1.0167814493179321,grad_norm: 0.9999999373791344, iteration: 378539
loss: 0.9808818101882935,grad_norm: 0.761440951706426, iteration: 378540
loss: 1.0014586448669434,grad_norm: 0.8369818668951204, iteration: 378541
loss: 0.9675593972206116,grad_norm: 0.9042066862600768, iteration: 378542
loss: 0.9720373153686523,grad_norm: 0.7436797043323666, iteration: 378543
loss: 1.0065860748291016,grad_norm: 0.7863147701436721, iteration: 378544
loss: 0.9968315958976746,grad_norm: 0.730750037601682, iteration: 378545
loss: 1.0113986730575562,grad_norm: 0.8836939029003638, iteration: 378546
loss: 0.9864016771316528,grad_norm: 0.813966639499796, iteration: 378547
loss: 0.9842045307159424,grad_norm: 0.7832260100511322, iteration: 378548
loss: 0.9829563498497009,grad_norm: 0.9999991446042307, iteration: 378549
loss: 1.0298669338226318,grad_norm: 0.7301345575150658, iteration: 378550
loss: 1.023052453994751,grad_norm: 0.9999992197187895, iteration: 378551
loss: 1.0105444192886353,grad_norm: 0.9609054253367755, iteration: 378552
loss: 0.99261474609375,grad_norm: 0.7769759294758535, iteration: 378553
loss: 1.0092123746871948,grad_norm: 0.9247298250203955, iteration: 378554
loss: 1.0377421379089355,grad_norm: 0.99999936204787, iteration: 378555
loss: 1.0398058891296387,grad_norm: 0.9999991453244028, iteration: 378556
loss: 1.0008213520050049,grad_norm: 0.7554469723762461, iteration: 378557
loss: 1.335253357887268,grad_norm: 0.9999999208186979, iteration: 378558
loss: 0.9951821565628052,grad_norm: 0.9999991866329091, iteration: 378559
loss: 1.0357342958450317,grad_norm: 0.9289012017779917, iteration: 378560
loss: 0.9865125417709351,grad_norm: 0.7689752813635654, iteration: 378561
loss: 0.9923388957977295,grad_norm: 0.8606161408130047, iteration: 378562
loss: 0.9970606565475464,grad_norm: 0.7651962621374674, iteration: 378563
loss: 1.0222216844558716,grad_norm: 0.8488633090055, iteration: 378564
loss: 0.962058961391449,grad_norm: 0.7562398985690131, iteration: 378565
loss: 0.9750081300735474,grad_norm: 0.6876277673177209, iteration: 378566
loss: 0.9614618420600891,grad_norm: 0.8350474470152369, iteration: 378567
loss: 0.9612782597541809,grad_norm: 0.7685285954802739, iteration: 378568
loss: 1.0337010622024536,grad_norm: 0.9999995566660449, iteration: 378569
loss: 0.9761108756065369,grad_norm: 0.8686295234265825, iteration: 378570
loss: 1.0246696472167969,grad_norm: 0.6975954706152452, iteration: 378571
loss: 1.0653743743896484,grad_norm: 0.7454356124902114, iteration: 378572
loss: 0.9949408173561096,grad_norm: 0.9999990819440235, iteration: 378573
loss: 1.0073686838150024,grad_norm: 0.752659789836082, iteration: 378574
loss: 1.0146729946136475,grad_norm: 0.7431883056975493, iteration: 378575
loss: 1.0323050022125244,grad_norm: 0.9999994227006347, iteration: 378576
loss: 1.0191668272018433,grad_norm: 0.7631035597963287, iteration: 378577
loss: 0.9929111003875732,grad_norm: 0.9140267727227676, iteration: 378578
loss: 0.9835221171379089,grad_norm: 0.7431981400625288, iteration: 378579
loss: 1.0129953622817993,grad_norm: 0.7897447608066588, iteration: 378580
loss: 0.9853479862213135,grad_norm: 0.9108712260680272, iteration: 378581
loss: 1.0426548719406128,grad_norm: 0.9999997103846782, iteration: 378582
loss: 0.959881067276001,grad_norm: 0.8519351358657437, iteration: 378583
loss: 1.0025935173034668,grad_norm: 0.7962604049779272, iteration: 378584
loss: 1.0009700059890747,grad_norm: 0.7639608509510585, iteration: 378585
loss: 1.0158551931381226,grad_norm: 0.8590110407931099, iteration: 378586
loss: 1.0259370803833008,grad_norm: 0.8398908697594103, iteration: 378587
loss: 1.0169590711593628,grad_norm: 0.7906749643360057, iteration: 378588
loss: 0.9774210453033447,grad_norm: 0.7982284146185066, iteration: 378589
loss: 1.012779951095581,grad_norm: 0.9754577256699679, iteration: 378590
loss: 1.0193402767181396,grad_norm: 0.999999620844326, iteration: 378591
loss: 0.9987272024154663,grad_norm: 0.8144341355965761, iteration: 378592
loss: 1.0131036043167114,grad_norm: 0.9450353909498449, iteration: 378593
loss: 1.017116665840149,grad_norm: 0.6753378348684891, iteration: 378594
loss: 0.9863367080688477,grad_norm: 0.7791388328620873, iteration: 378595
loss: 1.094973087310791,grad_norm: 0.9999992615099343, iteration: 378596
loss: 0.9918245077133179,grad_norm: 0.7945623021882172, iteration: 378597
loss: 0.9996192455291748,grad_norm: 0.9551192224460154, iteration: 378598
loss: 1.0342179536819458,grad_norm: 0.9936096464880386, iteration: 378599
loss: 0.9984655976295471,grad_norm: 0.7637983949959137, iteration: 378600
loss: 0.9981868863105774,grad_norm: 0.8147711747835601, iteration: 378601
loss: 0.9770603179931641,grad_norm: 0.7532021296573143, iteration: 378602
loss: 1.0000802278518677,grad_norm: 0.999999003052064, iteration: 378603
loss: 1.0516924858093262,grad_norm: 0.7618067937769234, iteration: 378604
loss: 1.0043208599090576,grad_norm: 0.8163091231769998, iteration: 378605
loss: 0.9963509440422058,grad_norm: 0.7869152880796954, iteration: 378606
loss: 1.0511783361434937,grad_norm: 0.9576978193593432, iteration: 378607
loss: 0.9916839003562927,grad_norm: 0.869462480497491, iteration: 378608
loss: 1.0636348724365234,grad_norm: 0.7677327495059124, iteration: 378609
loss: 0.9911102056503296,grad_norm: 0.7191262889525031, iteration: 378610
loss: 1.0266969203948975,grad_norm: 0.9999992618838522, iteration: 378611
loss: 1.045669674873352,grad_norm: 0.9999991463907872, iteration: 378612
loss: 1.0185825824737549,grad_norm: 0.7732148295360582, iteration: 378613
loss: 0.9770558476448059,grad_norm: 0.8805288866763119, iteration: 378614
loss: 1.0457603931427002,grad_norm: 0.7562680335116715, iteration: 378615
loss: 1.0030511617660522,grad_norm: 0.9999990205699684, iteration: 378616
loss: 0.9964290857315063,grad_norm: 0.6637239290290717, iteration: 378617
loss: 0.9719789028167725,grad_norm: 0.8356856593160359, iteration: 378618
loss: 1.018073320388794,grad_norm: 0.9999990938018088, iteration: 378619
loss: 0.9978927373886108,grad_norm: 0.833869520825409, iteration: 378620
loss: 1.0327683687210083,grad_norm: 0.8609440365197857, iteration: 378621
loss: 0.9790107607841492,grad_norm: 0.8459788520214754, iteration: 378622
loss: 0.9895939230918884,grad_norm: 0.8515539437317662, iteration: 378623
loss: 0.9778119325637817,grad_norm: 0.8830975814493058, iteration: 378624
loss: 1.026971459388733,grad_norm: 0.9999995548868764, iteration: 378625
loss: 1.002583622932434,grad_norm: 0.8583184099843454, iteration: 378626
loss: 1.0738626718521118,grad_norm: 0.9491633648128966, iteration: 378627
loss: 0.9974197745323181,grad_norm: 0.7121469837964174, iteration: 378628
loss: 1.0278888940811157,grad_norm: 0.7988836555290422, iteration: 378629
loss: 0.9964531660079956,grad_norm: 0.8441783974257304, iteration: 378630
loss: 1.028631329536438,grad_norm: 0.9999999375043139, iteration: 378631
loss: 0.9959959387779236,grad_norm: 0.9595787928269236, iteration: 378632
loss: 1.0000978708267212,grad_norm: 0.8612285649903786, iteration: 378633
loss: 1.0153592824935913,grad_norm: 0.7980631215299394, iteration: 378634
loss: 0.9979082345962524,grad_norm: 0.836457168428067, iteration: 378635
loss: 0.980312705039978,grad_norm: 0.8479852095922267, iteration: 378636
loss: 0.9754655957221985,grad_norm: 0.8897893884034518, iteration: 378637
loss: 1.0239461660385132,grad_norm: 0.8854517223447863, iteration: 378638
loss: 1.0030516386032104,grad_norm: 0.8794289465267112, iteration: 378639
loss: 1.0038546323776245,grad_norm: 0.7542052498970301, iteration: 378640
loss: 0.9942818284034729,grad_norm: 0.91887914816764, iteration: 378641
loss: 1.0149739980697632,grad_norm: 0.6614534674044543, iteration: 378642
loss: 1.0139405727386475,grad_norm: 0.9999990150887522, iteration: 378643
loss: 1.024195909500122,grad_norm: 0.7322348353956185, iteration: 378644
loss: 0.9983792901039124,grad_norm: 0.9603869844961834, iteration: 378645
loss: 1.021415114402771,grad_norm: 0.9932701402900754, iteration: 378646
loss: 0.9795039296150208,grad_norm: 0.9028681401963777, iteration: 378647
loss: 0.9903290271759033,grad_norm: 0.7805087490419776, iteration: 378648
loss: 1.012481927871704,grad_norm: 0.8614732377436282, iteration: 378649
loss: 0.977858304977417,grad_norm: 0.7004970241170075, iteration: 378650
loss: 1.012017011642456,grad_norm: 0.8930081630566844, iteration: 378651
loss: 1.0205447673797607,grad_norm: 0.7467738350488796, iteration: 378652
loss: 1.0605826377868652,grad_norm: 0.733080465492727, iteration: 378653
loss: 1.0765981674194336,grad_norm: 0.7412328168345504, iteration: 378654
loss: 1.0285817384719849,grad_norm: 0.8209562396661613, iteration: 378655
loss: 1.0954760313034058,grad_norm: 0.9999991947010567, iteration: 378656
loss: 0.9854665994644165,grad_norm: 0.804156071680404, iteration: 378657
loss: 0.986251950263977,grad_norm: 0.8798679699451508, iteration: 378658
loss: 0.9883219003677368,grad_norm: 0.999999003097922, iteration: 378659
loss: 1.013430118560791,grad_norm: 0.9740750817217374, iteration: 378660
loss: 1.001947045326233,grad_norm: 0.772317400547531, iteration: 378661
loss: 1.003653645515442,grad_norm: 0.8436241102143978, iteration: 378662
loss: 0.9852723479270935,grad_norm: 0.7556107104455546, iteration: 378663
loss: 1.0096583366394043,grad_norm: 0.811826792925462, iteration: 378664
loss: 0.986947238445282,grad_norm: 0.7169183217104397, iteration: 378665
loss: 0.9470582008361816,grad_norm: 0.8525716274022908, iteration: 378666
loss: 0.990797221660614,grad_norm: 0.7798414909562829, iteration: 378667
loss: 0.994996964931488,grad_norm: 0.7938711937254861, iteration: 378668
loss: 1.0225764513015747,grad_norm: 0.8386570512253964, iteration: 378669
loss: 1.0091960430145264,grad_norm: 0.8018988898755908, iteration: 378670
loss: 1.0046063661575317,grad_norm: 0.8255355185425727, iteration: 378671
loss: 1.0027291774749756,grad_norm: 0.9999999071056791, iteration: 378672
loss: 0.9813379645347595,grad_norm: 0.7109831406526501, iteration: 378673
loss: 1.003684401512146,grad_norm: 0.99999947300932, iteration: 378674
loss: 0.9993179440498352,grad_norm: 0.8022425905715646, iteration: 378675
loss: 1.0283383131027222,grad_norm: 0.901328592618006, iteration: 378676
loss: 1.0201343297958374,grad_norm: 0.9999999772694427, iteration: 378677
loss: 0.9808933138847351,grad_norm: 0.7103966413070786, iteration: 378678
loss: 1.2647743225097656,grad_norm: 0.9999993956940029, iteration: 378679
loss: 1.0190560817718506,grad_norm: 0.7567224692725827, iteration: 378680
loss: 1.0100306272506714,grad_norm: 0.8538046690994758, iteration: 378681
loss: 1.2453088760375977,grad_norm: 0.999999119027256, iteration: 378682
loss: 0.9869446754455566,grad_norm: 0.7835471544735422, iteration: 378683
loss: 1.0170632600784302,grad_norm: 0.7761778321947191, iteration: 378684
loss: 0.9917856454849243,grad_norm: 0.5781350800503124, iteration: 378685
loss: 1.0301363468170166,grad_norm: 0.9000070902759906, iteration: 378686
loss: 1.0004899501800537,grad_norm: 0.8489563010168113, iteration: 378687
loss: 0.976710855960846,grad_norm: 0.8102026768765256, iteration: 378688
loss: 0.9615017771720886,grad_norm: 0.8594597478946672, iteration: 378689
loss: 0.9872702956199646,grad_norm: 0.8837933725484437, iteration: 378690
loss: 1.0100343227386475,grad_norm: 0.9999991749053813, iteration: 378691
loss: 0.9984641671180725,grad_norm: 0.768675426730086, iteration: 378692
loss: 1.042941927909851,grad_norm: 0.6907603159117027, iteration: 378693
loss: 1.0572090148925781,grad_norm: 0.9999993580707804, iteration: 378694
loss: 0.960766613483429,grad_norm: 0.6154502153793586, iteration: 378695
loss: 0.9877713322639465,grad_norm: 0.8265141682239078, iteration: 378696
loss: 1.0391587018966675,grad_norm: 0.9668583148928303, iteration: 378697
loss: 0.9925450682640076,grad_norm: 0.7423964858000186, iteration: 378698
loss: 0.976336658000946,grad_norm: 0.829270636023924, iteration: 378699
loss: 0.9765889644622803,grad_norm: 0.8083078670508775, iteration: 378700
loss: 0.9881681799888611,grad_norm: 0.7969786956762954, iteration: 378701
loss: 1.0000983476638794,grad_norm: 0.6882366456979503, iteration: 378702
loss: 0.9489933252334595,grad_norm: 0.8088652888504404, iteration: 378703
loss: 0.970651388168335,grad_norm: 0.8712360336995225, iteration: 378704
loss: 1.0254772901535034,grad_norm: 0.7144378053954631, iteration: 378705
loss: 0.9833734631538391,grad_norm: 0.932069493544866, iteration: 378706
loss: 1.0014963150024414,grad_norm: 0.999999345358684, iteration: 378707
loss: 0.9830781817436218,grad_norm: 0.8112915438999522, iteration: 378708
loss: 1.0483568906784058,grad_norm: 0.9999993092738106, iteration: 378709
loss: 0.9771862030029297,grad_norm: 0.9048302874107738, iteration: 378710
loss: 1.0280039310455322,grad_norm: 0.8022613460913985, iteration: 378711
loss: 0.9990977644920349,grad_norm: 0.7417759550557035, iteration: 378712
loss: 1.0280178785324097,grad_norm: 0.8926776210702729, iteration: 378713
loss: 0.991215169429779,grad_norm: 0.8747738696644562, iteration: 378714
loss: 0.9942713975906372,grad_norm: 0.7212262588377182, iteration: 378715
loss: 0.9494485855102539,grad_norm: 0.7574792386824154, iteration: 378716
loss: 0.9780080914497375,grad_norm: 0.7600900809161827, iteration: 378717
loss: 0.9815041422843933,grad_norm: 0.7996967602677686, iteration: 378718
loss: 1.0634392499923706,grad_norm: 0.9999996507361927, iteration: 378719
loss: 1.0341256856918335,grad_norm: 0.9589545692347408, iteration: 378720
loss: 1.0246238708496094,grad_norm: 0.8471920026322046, iteration: 378721
loss: 1.0102070569992065,grad_norm: 0.8756643949659427, iteration: 378722
loss: 1.0720596313476562,grad_norm: 0.9999991172286915, iteration: 378723
loss: 1.0074657201766968,grad_norm: 0.7307984066261972, iteration: 378724
loss: 1.0035210847854614,grad_norm: 0.9511662556595875, iteration: 378725
loss: 1.3117607831954956,grad_norm: 0.9999996627861858, iteration: 378726
loss: 0.9783030152320862,grad_norm: 0.8913372171064932, iteration: 378727
loss: 0.9946085810661316,grad_norm: 0.9538770054985932, iteration: 378728
loss: 0.9705755710601807,grad_norm: 0.7759545842902418, iteration: 378729
loss: 0.9723647236824036,grad_norm: 0.8239420310455954, iteration: 378730
loss: 1.0489087104797363,grad_norm: 0.7965619930524012, iteration: 378731
loss: 0.985776960849762,grad_norm: 0.7354127243227392, iteration: 378732
loss: 1.0382750034332275,grad_norm: 0.9999997430425142, iteration: 378733
loss: 0.9860966205596924,grad_norm: 0.8016587127146537, iteration: 378734
loss: 1.035759687423706,grad_norm: 0.9999996176226235, iteration: 378735
loss: 0.9754271507263184,grad_norm: 0.8265198755198186, iteration: 378736
loss: 1.0138193368911743,grad_norm: 0.9120379876512373, iteration: 378737
loss: 0.985688328742981,grad_norm: 0.912591905189127, iteration: 378738
loss: 0.9774778485298157,grad_norm: 0.646525568320613, iteration: 378739
loss: 0.9835801720619202,grad_norm: 0.7564458536385062, iteration: 378740
loss: 1.0181944370269775,grad_norm: 0.9999992576052184, iteration: 378741
loss: 0.9954833984375,grad_norm: 0.8175518876278564, iteration: 378742
loss: 1.0104883909225464,grad_norm: 0.836950098758207, iteration: 378743
loss: 1.01677668094635,grad_norm: 0.9841967690739988, iteration: 378744
loss: 0.9963273406028748,grad_norm: 0.8313027803804872, iteration: 378745
loss: 1.0511969327926636,grad_norm: 0.7282442300303859, iteration: 378746
loss: 1.1377493143081665,grad_norm: 0.9999994087755208, iteration: 378747
loss: 1.0002152919769287,grad_norm: 0.7554898082607129, iteration: 378748
loss: 0.9812193512916565,grad_norm: 0.999999533895126, iteration: 378749
loss: 0.9844943881034851,grad_norm: 0.9044963423273872, iteration: 378750
loss: 0.992878794670105,grad_norm: 0.8478159742240976, iteration: 378751
loss: 1.010776400566101,grad_norm: 0.8198890160646155, iteration: 378752
loss: 1.1060458421707153,grad_norm: 0.948995221144367, iteration: 378753
loss: 1.0109111070632935,grad_norm: 0.7345864702442783, iteration: 378754
loss: 1.0185850858688354,grad_norm: 0.7640580585084598, iteration: 378755
loss: 1.007102608680725,grad_norm: 0.8922948413943649, iteration: 378756
loss: 1.008810043334961,grad_norm: 0.8053175109683474, iteration: 378757
loss: 0.9696696996688843,grad_norm: 0.8779064546364188, iteration: 378758
loss: 1.0125398635864258,grad_norm: 0.999999797400041, iteration: 378759
loss: 1.024250864982605,grad_norm: 0.99999910175608, iteration: 378760
loss: 1.0009137392044067,grad_norm: 0.7364045902336368, iteration: 378761
loss: 0.9853259325027466,grad_norm: 0.7823409458406805, iteration: 378762
loss: 0.9923596978187561,grad_norm: 0.9999995471667216, iteration: 378763
loss: 0.99632728099823,grad_norm: 0.8733306951776606, iteration: 378764
loss: 0.9644967317581177,grad_norm: 0.7261520247308597, iteration: 378765
loss: 0.969897449016571,grad_norm: 0.8606568268963498, iteration: 378766
loss: 0.9876396059989929,grad_norm: 0.7854685637258827, iteration: 378767
loss: 0.9938616156578064,grad_norm: 0.6743536457387345, iteration: 378768
loss: 1.0086750984191895,grad_norm: 0.7503991659456084, iteration: 378769
loss: 0.983180820941925,grad_norm: 0.8628834696853649, iteration: 378770
loss: 0.994592010974884,grad_norm: 0.8056467381870145, iteration: 378771
loss: 1.0421438217163086,grad_norm: 0.8005212017334294, iteration: 378772
loss: 0.969697892665863,grad_norm: 0.725652318124858, iteration: 378773
loss: 1.0319559574127197,grad_norm: 0.7831769696973653, iteration: 378774
loss: 1.010303020477295,grad_norm: 0.7038472248714979, iteration: 378775
loss: 0.9854929447174072,grad_norm: 0.9280356791911454, iteration: 378776
loss: 0.9979279637336731,grad_norm: 0.7166400693257411, iteration: 378777
loss: 1.196829080581665,grad_norm: 0.9999997399426188, iteration: 378778
loss: 0.9804732203483582,grad_norm: 0.7536728345165381, iteration: 378779
loss: 1.0022335052490234,grad_norm: 0.6895317355704141, iteration: 378780
loss: 1.0434868335723877,grad_norm: 0.8227757442362647, iteration: 378781
loss: 1.034011960029602,grad_norm: 0.9999999723669518, iteration: 378782
loss: 1.0258275270462036,grad_norm: 0.8395828941426053, iteration: 378783
loss: 0.9536922574043274,grad_norm: 0.8385537032584481, iteration: 378784
loss: 0.9907791018486023,grad_norm: 0.7538087290250414, iteration: 378785
loss: 0.9633386731147766,grad_norm: 0.7909464499408958, iteration: 378786
loss: 1.0133781433105469,grad_norm: 0.770517032144898, iteration: 378787
loss: 0.9732556939125061,grad_norm: 0.7505894715996767, iteration: 378788
loss: 1.0731418132781982,grad_norm: 0.9403435627182, iteration: 378789
loss: 1.0526450872421265,grad_norm: 0.9999993690423766, iteration: 378790
loss: 0.9799803495407104,grad_norm: 0.6832449408406239, iteration: 378791
loss: 1.0133905410766602,grad_norm: 0.7867455696258228, iteration: 378792
loss: 1.100954294204712,grad_norm: 0.8922085858373108, iteration: 378793
loss: 0.9637593030929565,grad_norm: 0.7830720208641381, iteration: 378794
loss: 1.0103154182434082,grad_norm: 0.8852106635520057, iteration: 378795
loss: 0.9922735095024109,grad_norm: 0.9176975383523882, iteration: 378796
loss: 0.9629135727882385,grad_norm: 0.6359533636092728, iteration: 378797
loss: 0.988960325717926,grad_norm: 0.7185779799838982, iteration: 378798
loss: 1.0168195962905884,grad_norm: 0.8710394337028098, iteration: 378799
loss: 1.0167778730392456,grad_norm: 0.8437218602308659, iteration: 378800
loss: 0.9581642150878906,grad_norm: 0.6861926370364887, iteration: 378801
loss: 1.0064256191253662,grad_norm: 0.8820333194889998, iteration: 378802
loss: 0.9609777331352234,grad_norm: 0.7946074377832358, iteration: 378803
loss: 1.002539038658142,grad_norm: 0.9999992072150697, iteration: 378804
loss: 1.041634440422058,grad_norm: 0.9999995814687744, iteration: 378805
loss: 1.0049678087234497,grad_norm: 0.833673872706581, iteration: 378806
loss: 0.967749834060669,grad_norm: 0.6914439033289627, iteration: 378807
loss: 0.9756148457527161,grad_norm: 0.6925353777628029, iteration: 378808
loss: 0.9672915935516357,grad_norm: 0.7920740348696361, iteration: 378809
loss: 0.9980764985084534,grad_norm: 0.9999991079245802, iteration: 378810
loss: 0.9897551536560059,grad_norm: 0.7905596514343678, iteration: 378811
loss: 0.9843395352363586,grad_norm: 0.6640894760733207, iteration: 378812
loss: 1.1371361017227173,grad_norm: 0.9999997930211963, iteration: 378813
loss: 1.0965464115142822,grad_norm: 0.9999993413872438, iteration: 378814
loss: 1.0062578916549683,grad_norm: 0.630309361217204, iteration: 378815
loss: 1.0152958631515503,grad_norm: 0.9999990218774041, iteration: 378816
loss: 1.0375571250915527,grad_norm: 0.7867769277906362, iteration: 378817
loss: 1.0123875141143799,grad_norm: 0.7301886337420315, iteration: 378818
loss: 0.991676390171051,grad_norm: 0.7779159493257588, iteration: 378819
loss: 1.0398095846176147,grad_norm: 0.8963132546176851, iteration: 378820
loss: 1.1000739336013794,grad_norm: 0.9999996033042534, iteration: 378821
loss: 1.0027210712432861,grad_norm: 0.8316913054462561, iteration: 378822
loss: 1.0334937572479248,grad_norm: 0.7930273841881971, iteration: 378823
loss: 1.0226850509643555,grad_norm: 0.7149475085584115, iteration: 378824
loss: 1.0025551319122314,grad_norm: 0.9999994327853665, iteration: 378825
loss: 0.9942736625671387,grad_norm: 0.9558216119646478, iteration: 378826
loss: 0.9848470091819763,grad_norm: 0.8094327826502246, iteration: 378827
loss: 1.010255217552185,grad_norm: 0.7210607375892789, iteration: 378828
loss: 1.0243011713027954,grad_norm: 0.9271983959368999, iteration: 378829
loss: 0.9980493187904358,grad_norm: 0.5795894671323817, iteration: 378830
loss: 0.9876474142074585,grad_norm: 0.7242257618652829, iteration: 378831
loss: 1.0222761631011963,grad_norm: 0.9638313893718471, iteration: 378832
loss: 1.0013664960861206,grad_norm: 0.9086812526911447, iteration: 378833
loss: 1.0411570072174072,grad_norm: 0.8577955446368427, iteration: 378834
loss: 1.0322884321212769,grad_norm: 0.8870964398897531, iteration: 378835
loss: 1.0497941970825195,grad_norm: 0.9999999378474491, iteration: 378836
loss: 0.9839698076248169,grad_norm: 0.7657346288099317, iteration: 378837
loss: 0.9694008827209473,grad_norm: 0.7939777041924945, iteration: 378838
loss: 0.9998603463172913,grad_norm: 0.6582163524615058, iteration: 378839
loss: 0.992821991443634,grad_norm: 0.8649415362184111, iteration: 378840
loss: 0.94722580909729,grad_norm: 0.8641088304508956, iteration: 378841
loss: 0.9629870057106018,grad_norm: 0.6580528457178176, iteration: 378842
loss: 0.9424729943275452,grad_norm: 0.7828377903868214, iteration: 378843
loss: 0.9591346383094788,grad_norm: 0.8381861676564532, iteration: 378844
loss: 1.0215166807174683,grad_norm: 0.9999994676268064, iteration: 378845
loss: 0.9896424412727356,grad_norm: 0.9999992943837772, iteration: 378846
loss: 0.9915826916694641,grad_norm: 0.7482026517392448, iteration: 378847
loss: 0.9741951823234558,grad_norm: 0.8994537636620293, iteration: 378848
loss: 1.103270411491394,grad_norm: 0.9999991128911615, iteration: 378849
loss: 1.0103297233581543,grad_norm: 0.7043717624979504, iteration: 378850
loss: 1.0328364372253418,grad_norm: 0.9135352779910432, iteration: 378851
loss: 0.9425345063209534,grad_norm: 0.7618428163470281, iteration: 378852
loss: 0.9788808822631836,grad_norm: 0.824408048497609, iteration: 378853
loss: 1.0284504890441895,grad_norm: 0.749942585509564, iteration: 378854
loss: 1.072515845298767,grad_norm: 0.9999996759201031, iteration: 378855
loss: 0.9600123167037964,grad_norm: 0.8248320429814905, iteration: 378856
loss: 0.9763007760047913,grad_norm: 0.8641612586098498, iteration: 378857
loss: 1.0285046100616455,grad_norm: 0.999999149934271, iteration: 378858
loss: 1.0422917604446411,grad_norm: 0.8912384857551456, iteration: 378859
loss: 0.9951357245445251,grad_norm: 0.9000578722468929, iteration: 378860
loss: 1.0122146606445312,grad_norm: 0.8729346153936538, iteration: 378861
loss: 1.0002070665359497,grad_norm: 0.6471489479963201, iteration: 378862
loss: 0.9682241082191467,grad_norm: 0.7586670386867373, iteration: 378863
loss: 0.9996476769447327,grad_norm: 0.6973810126884992, iteration: 378864
loss: 0.9730079770088196,grad_norm: 0.9999990624955889, iteration: 378865
loss: 1.0722187757492065,grad_norm: 0.9999997662861487, iteration: 378866
loss: 1.0415494441986084,grad_norm: 0.8204275529723907, iteration: 378867
loss: 1.1366816759109497,grad_norm: 0.9999994093805089, iteration: 378868
loss: 0.9670996069908142,grad_norm: 0.7175669833971392, iteration: 378869
loss: 0.9700656533241272,grad_norm: 0.8204279938260017, iteration: 378870
loss: 0.9642758965492249,grad_norm: 0.8452449573196014, iteration: 378871
loss: 1.1070562601089478,grad_norm: 0.8186769965900929, iteration: 378872
loss: 1.0092542171478271,grad_norm: 0.8053781578126872, iteration: 378873
loss: 1.0019170045852661,grad_norm: 0.7565581754518856, iteration: 378874
loss: 1.0123002529144287,grad_norm: 0.7821648101986629, iteration: 378875
loss: 1.05819571018219,grad_norm: 0.7826999094347734, iteration: 378876
loss: 1.0421956777572632,grad_norm: 0.7048523174815701, iteration: 378877
loss: 1.046802043914795,grad_norm: 0.9999990998481696, iteration: 378878
loss: 0.9905842542648315,grad_norm: 0.6917515387682853, iteration: 378879
loss: 1.0882140398025513,grad_norm: 0.9679502054067214, iteration: 378880
loss: 0.9927430748939514,grad_norm: 0.8251795327706596, iteration: 378881
loss: 1.0049430131912231,grad_norm: 0.8466250905111774, iteration: 378882
loss: 0.9981707334518433,grad_norm: 0.9999992047711085, iteration: 378883
loss: 1.0046796798706055,grad_norm: 0.8248392670730443, iteration: 378884
loss: 1.0736925601959229,grad_norm: 0.9999990438112822, iteration: 378885
loss: 1.030838131904602,grad_norm: 0.9072693897584289, iteration: 378886
loss: 1.065466284751892,grad_norm: 0.8779680765629903, iteration: 378887
loss: 1.055659532546997,grad_norm: 0.9999990263989016, iteration: 378888
loss: 1.0056992769241333,grad_norm: 0.9280976549268418, iteration: 378889
loss: 0.9831452369689941,grad_norm: 0.8035111693791432, iteration: 378890
loss: 1.046633005142212,grad_norm: 0.8412242849932814, iteration: 378891
loss: 0.9713681936264038,grad_norm: 0.7249935637986983, iteration: 378892
loss: 1.0087924003601074,grad_norm: 0.9704695456075398, iteration: 378893
loss: 1.0016988515853882,grad_norm: 0.6635266361534495, iteration: 378894
loss: 1.0420000553131104,grad_norm: 0.9999996479235423, iteration: 378895
loss: 0.9826028943061829,grad_norm: 0.763215652857039, iteration: 378896
loss: 0.9912791848182678,grad_norm: 0.8546059622455491, iteration: 378897
loss: 1.0008211135864258,grad_norm: 0.8053666526614379, iteration: 378898
loss: 0.9865437150001526,grad_norm: 0.7342287458045174, iteration: 378899
loss: 1.0695366859436035,grad_norm: 0.9498947421311794, iteration: 378900
loss: 1.0830674171447754,grad_norm: 0.6701493753016307, iteration: 378901
loss: 0.9892125725746155,grad_norm: 0.8023281466878825, iteration: 378902
loss: 0.9978810548782349,grad_norm: 0.714913577455714, iteration: 378903
loss: 0.9799508452415466,grad_norm: 0.6578672907087405, iteration: 378904
loss: 1.008014440536499,grad_norm: 0.6558354783538661, iteration: 378905
loss: 0.9793577194213867,grad_norm: 0.9157465195225238, iteration: 378906
loss: 1.0499615669250488,grad_norm: 0.750491097749887, iteration: 378907
loss: 0.9745151400566101,grad_norm: 0.6525450398513792, iteration: 378908
loss: 1.0851672887802124,grad_norm: 0.9999994855783151, iteration: 378909
loss: 0.982319176197052,grad_norm: 0.7869844109591159, iteration: 378910
loss: 0.9960556626319885,grad_norm: 0.7931374062108881, iteration: 378911
loss: 0.9776806831359863,grad_norm: 0.813546964279274, iteration: 378912
loss: 0.9647715091705322,grad_norm: 0.7205386905793889, iteration: 378913
loss: 1.042413353919983,grad_norm: 0.9999992482070033, iteration: 378914
loss: 1.0093568563461304,grad_norm: 0.9070449147574802, iteration: 378915
loss: 1.0059272050857544,grad_norm: 0.928708870104863, iteration: 378916
loss: 1.0026161670684814,grad_norm: 0.810154094928128, iteration: 378917
loss: 1.0210707187652588,grad_norm: 0.8623084509137494, iteration: 378918
loss: 1.0017277002334595,grad_norm: 0.9999992646320442, iteration: 378919
loss: 0.9818889498710632,grad_norm: 0.7863840447184642, iteration: 378920
loss: 0.9798265099525452,grad_norm: 0.8711339821421261, iteration: 378921
loss: 0.9535089731216431,grad_norm: 0.8629044702758126, iteration: 378922
loss: 0.9816628098487854,grad_norm: 0.7258559354180742, iteration: 378923
loss: 1.029757022857666,grad_norm: 0.9999995513768029, iteration: 378924
loss: 0.9527575969696045,grad_norm: 0.7366488237012018, iteration: 378925
loss: 1.0091297626495361,grad_norm: 0.9999994103765546, iteration: 378926
loss: 0.9742056131362915,grad_norm: 0.9999990960608309, iteration: 378927
loss: 1.0062346458435059,grad_norm: 0.8232955080438474, iteration: 378928
loss: 1.1588648557662964,grad_norm: 0.9999991808512061, iteration: 378929
loss: 1.0150502920150757,grad_norm: 0.9999993706544263, iteration: 378930
loss: 1.0098450183868408,grad_norm: 0.8267308531143206, iteration: 378931
loss: 1.01266610622406,grad_norm: 0.7247172863192499, iteration: 378932
loss: 0.9816052317619324,grad_norm: 0.8184206745793862, iteration: 378933
loss: 0.9896263480186462,grad_norm: 0.732365928334753, iteration: 378934
loss: 0.9998936653137207,grad_norm: 0.8489037308131234, iteration: 378935
loss: 1.018311858177185,grad_norm: 0.8489322569206831, iteration: 378936
loss: 1.0236561298370361,grad_norm: 0.9999992654031601, iteration: 378937
loss: 1.0106927156448364,grad_norm: 0.7831948553407723, iteration: 378938
loss: 0.9655532240867615,grad_norm: 0.9342156951463798, iteration: 378939
loss: 1.040774941444397,grad_norm: 0.9999992289932199, iteration: 378940
loss: 1.0044440031051636,grad_norm: 0.7859520623799608, iteration: 378941
loss: 0.9807056188583374,grad_norm: 0.8372043451754989, iteration: 378942
loss: 0.9969990253448486,grad_norm: 0.980236110818585, iteration: 378943
loss: 0.9877822995185852,grad_norm: 0.7914606422932412, iteration: 378944
loss: 0.9607724547386169,grad_norm: 0.6741588507435057, iteration: 378945
loss: 0.9982893466949463,grad_norm: 0.8446542400176986, iteration: 378946
loss: 1.0669853687286377,grad_norm: 0.9999995287359495, iteration: 378947
loss: 1.0257768630981445,grad_norm: 0.9999996340315043, iteration: 378948
loss: 1.0050338506698608,grad_norm: 0.8746098083197441, iteration: 378949
loss: 0.955707848072052,grad_norm: 0.7470510509785512, iteration: 378950
loss: 1.0660027265548706,grad_norm: 0.9999992383258335, iteration: 378951
loss: 0.9793043732643127,grad_norm: 0.7316974774231626, iteration: 378952
loss: 1.1293669939041138,grad_norm: 0.9999999346049188, iteration: 378953
loss: 0.9536933302879333,grad_norm: 0.7399808211331729, iteration: 378954
loss: 1.0623146295547485,grad_norm: 0.9789044839056957, iteration: 378955
loss: 0.9977090358734131,grad_norm: 0.7481910234155574, iteration: 378956
loss: 1.042352318763733,grad_norm: 0.747587053387122, iteration: 378957
loss: 1.0116546154022217,grad_norm: 0.6981696945358781, iteration: 378958
loss: 1.0277045965194702,grad_norm: 0.9999991157237286, iteration: 378959
loss: 1.0268800258636475,grad_norm: 0.869163659036122, iteration: 378960
loss: 1.1360830068588257,grad_norm: 0.9999999357521093, iteration: 378961
loss: 1.00904381275177,grad_norm: 0.9090761920313334, iteration: 378962
loss: 1.0246620178222656,grad_norm: 0.8586996680515432, iteration: 378963
loss: 1.0616592168807983,grad_norm: 0.955505215710053, iteration: 378964
loss: 1.0038679838180542,grad_norm: 0.8097627956970347, iteration: 378965
loss: 1.0504493713378906,grad_norm: 0.9098618641922132, iteration: 378966
loss: 1.0031260251998901,grad_norm: 0.8694786785032944, iteration: 378967
loss: 0.9955892562866211,grad_norm: 0.7801676127176788, iteration: 378968
loss: 1.019868016242981,grad_norm: 0.8862858376780126, iteration: 378969
loss: 0.9499049186706543,grad_norm: 0.7314023754604378, iteration: 378970
loss: 1.0132488012313843,grad_norm: 0.7927809572558712, iteration: 378971
loss: 0.9981946349143982,grad_norm: 0.7716141378358938, iteration: 378972
loss: 0.9793258309364319,grad_norm: 0.6776456164926131, iteration: 378973
loss: 1.0621435642242432,grad_norm: 0.9999996342547853, iteration: 378974
loss: 1.0054786205291748,grad_norm: 0.8019655557226084, iteration: 378975
loss: 1.0052006244659424,grad_norm: 0.7686908511800816, iteration: 378976
loss: 1.028537631034851,grad_norm: 0.7243365665605696, iteration: 378977
loss: 1.0203697681427002,grad_norm: 0.7536119443519244, iteration: 378978
loss: 0.9881002902984619,grad_norm: 0.7820432086049237, iteration: 378979
loss: 1.0281105041503906,grad_norm: 0.9999996337472652, iteration: 378980
loss: 0.9975225329399109,grad_norm: 0.721269397531491, iteration: 378981
loss: 1.015831708908081,grad_norm: 0.8583386598139929, iteration: 378982
loss: 0.9958718419075012,grad_norm: 0.9436594109937526, iteration: 378983
loss: 1.0281267166137695,grad_norm: 0.6678831424488766, iteration: 378984
loss: 1.0018330812454224,grad_norm: 0.9653498151721716, iteration: 378985
loss: 1.0158921480178833,grad_norm: 0.9366575523195049, iteration: 378986
loss: 1.0022709369659424,grad_norm: 0.8110893438411307, iteration: 378987
loss: 0.9894858002662659,grad_norm: 0.9149232257906661, iteration: 378988
loss: 1.068390130996704,grad_norm: 0.8130601913655242, iteration: 378989
loss: 1.1560642719268799,grad_norm: 0.9999991542480864, iteration: 378990
loss: 1.028546929359436,grad_norm: 0.9280797170799263, iteration: 378991
loss: 1.044012427330017,grad_norm: 0.9999992914517991, iteration: 378992
loss: 1.0310912132263184,grad_norm: 0.999999534776831, iteration: 378993
loss: 0.9820462465286255,grad_norm: 0.8439694853818203, iteration: 378994
loss: 1.0620871782302856,grad_norm: 0.8485762552760465, iteration: 378995
loss: 0.9918169379234314,grad_norm: 0.9655224105176258, iteration: 378996
loss: 1.0515100955963135,grad_norm: 0.9999991236116749, iteration: 378997
loss: 1.0525987148284912,grad_norm: 0.9999990907573556, iteration: 378998
loss: 0.9906972050666809,grad_norm: 0.7246427050885331, iteration: 378999
loss: 1.0094141960144043,grad_norm: 0.7623024678705659, iteration: 379000
loss: 1.0232034921646118,grad_norm: 0.9154579649720884, iteration: 379001
loss: 1.0066547393798828,grad_norm: 0.8747193113295014, iteration: 379002
loss: 0.9770587682723999,grad_norm: 0.788992280256294, iteration: 379003
loss: 0.9673620462417603,grad_norm: 0.7808169381316769, iteration: 379004
loss: 1.0382510423660278,grad_norm: 0.9999994186869678, iteration: 379005
loss: 0.9870836138725281,grad_norm: 0.7820956633865215, iteration: 379006
loss: 1.1189707517623901,grad_norm: 0.9999999744957873, iteration: 379007
loss: 1.0924098491668701,grad_norm: 0.9999990709076239, iteration: 379008
loss: 0.99149090051651,grad_norm: 0.8972055297834076, iteration: 379009
loss: 1.0080833435058594,grad_norm: 0.6860249219505488, iteration: 379010
loss: 1.0114434957504272,grad_norm: 0.9075095856354419, iteration: 379011
loss: 1.0393595695495605,grad_norm: 0.8251184028130413, iteration: 379012
loss: 0.9944725632667542,grad_norm: 0.8881738774496368, iteration: 379013
loss: 1.0158511400222778,grad_norm: 0.745142276618215, iteration: 379014
loss: 1.0027414560317993,grad_norm: 0.8001454386598662, iteration: 379015
loss: 1.0067930221557617,grad_norm: 0.866196357866348, iteration: 379016
loss: 0.9906246066093445,grad_norm: 0.710241034207485, iteration: 379017
loss: 1.0846432447433472,grad_norm: 0.98807400503945, iteration: 379018
loss: 1.0278452634811401,grad_norm: 0.9738559500983374, iteration: 379019
loss: 1.0419738292694092,grad_norm: 0.8858152701947588, iteration: 379020
loss: 0.9963582158088684,grad_norm: 0.9999994125278487, iteration: 379021
loss: 0.9693796634674072,grad_norm: 0.9791081210590475, iteration: 379022
loss: 0.9848901629447937,grad_norm: 0.8945691587435793, iteration: 379023
loss: 1.078624963760376,grad_norm: 0.9999997570131633, iteration: 379024
loss: 0.990507185459137,grad_norm: 0.9092058807041712, iteration: 379025
loss: 0.9955437183380127,grad_norm: 0.7552415561102276, iteration: 379026
loss: 0.9953565001487732,grad_norm: 0.8871173845711333, iteration: 379027
loss: 1.0022531747817993,grad_norm: 0.8439368558821335, iteration: 379028
loss: 0.9799696207046509,grad_norm: 0.7869130355015238, iteration: 379029
loss: 1.045426368713379,grad_norm: 0.7230517091803348, iteration: 379030
loss: 0.9954721927642822,grad_norm: 0.6907862058816822, iteration: 379031
loss: 1.0241624116897583,grad_norm: 0.7263411327488845, iteration: 379032
loss: 0.9610479474067688,grad_norm: 0.7068086901978236, iteration: 379033
loss: 0.9980843663215637,grad_norm: 0.7804744661563425, iteration: 379034
loss: 1.0043681859970093,grad_norm: 0.8857995111596233, iteration: 379035
loss: 0.9776285290718079,grad_norm: 0.7643360732998522, iteration: 379036
loss: 1.0177035331726074,grad_norm: 0.9999991831271544, iteration: 379037
loss: 1.0667310953140259,grad_norm: 0.9999996067461194, iteration: 379038
loss: 1.0731086730957031,grad_norm: 0.9999992775957962, iteration: 379039
loss: 0.984441876411438,grad_norm: 0.8449742596871627, iteration: 379040
loss: 1.0407260656356812,grad_norm: 0.9999993909610857, iteration: 379041
loss: 1.0042474269866943,grad_norm: 0.999999272604246, iteration: 379042
loss: 1.065138578414917,grad_norm: 0.8817628294767679, iteration: 379043
loss: 0.99592125415802,grad_norm: 0.7911832791677205, iteration: 379044
loss: 0.9941372275352478,grad_norm: 0.8078179429143324, iteration: 379045
loss: 0.9904734492301941,grad_norm: 0.9457188309456215, iteration: 379046
loss: 1.010852575302124,grad_norm: 0.7828495646688383, iteration: 379047
loss: 1.0078883171081543,grad_norm: 0.999999358383194, iteration: 379048
loss: 1.0686628818511963,grad_norm: 0.9742060171425291, iteration: 379049
loss: 1.1701383590698242,grad_norm: 0.9999997997053116, iteration: 379050
loss: 1.0307905673980713,grad_norm: 0.9999994911822353, iteration: 379051
loss: 0.9745836853981018,grad_norm: 0.7918168099909276, iteration: 379052
loss: 1.073185682296753,grad_norm: 0.9999989295037495, iteration: 379053
loss: 0.9924407601356506,grad_norm: 0.7082933509305636, iteration: 379054
loss: 0.9920332431793213,grad_norm: 0.8355642020950108, iteration: 379055
loss: 1.0230307579040527,grad_norm: 0.9513429950129438, iteration: 379056
loss: 0.9610675573348999,grad_norm: 0.8889231819118774, iteration: 379057
loss: 1.0766903162002563,grad_norm: 0.9999990727360247, iteration: 379058
loss: 1.0457674264907837,grad_norm: 0.9999991217827202, iteration: 379059
loss: 0.9929659366607666,grad_norm: 0.7775871667466981, iteration: 379060
loss: 0.9808105230331421,grad_norm: 0.7840266868321707, iteration: 379061
loss: 1.0116840600967407,grad_norm: 0.9005516371049487, iteration: 379062
loss: 1.0279616117477417,grad_norm: 0.9999991127868766, iteration: 379063
loss: 1.0122277736663818,grad_norm: 0.7599023544735811, iteration: 379064
loss: 1.0050604343414307,grad_norm: 0.8170136828946768, iteration: 379065
loss: 1.011911392211914,grad_norm: 0.9999991564336987, iteration: 379066
loss: 1.0133322477340698,grad_norm: 0.9700347660845821, iteration: 379067
loss: 1.0244864225387573,grad_norm: 0.9999994967087136, iteration: 379068
loss: 1.0438036918640137,grad_norm: 0.783059353531667, iteration: 379069
loss: 1.0295051336288452,grad_norm: 0.9999999500197451, iteration: 379070
loss: 0.9899777173995972,grad_norm: 0.9999990600046652, iteration: 379071
loss: 1.0250216722488403,grad_norm: 0.7793240026128468, iteration: 379072
loss: 1.0136973857879639,grad_norm: 0.8738261314104763, iteration: 379073
loss: 1.003458023071289,grad_norm: 0.9080416928082904, iteration: 379074
loss: 1.1070749759674072,grad_norm: 0.8939040077859511, iteration: 379075
loss: 0.9666649699211121,grad_norm: 0.7209177970408908, iteration: 379076
loss: 1.0207682847976685,grad_norm: 0.8661207195256498, iteration: 379077
loss: 1.0324311256408691,grad_norm: 0.9999998966746914, iteration: 379078
loss: 1.0090259313583374,grad_norm: 0.7624346811261781, iteration: 379079
loss: 1.0021084547042847,grad_norm: 0.7866941312306492, iteration: 379080
loss: 0.9970189929008484,grad_norm: 0.8336128214023248, iteration: 379081
loss: 1.0683053731918335,grad_norm: 0.9999994773544691, iteration: 379082
loss: 1.037319302558899,grad_norm: 0.8484915034903752, iteration: 379083
loss: 1.1574194431304932,grad_norm: 0.9999992939316298, iteration: 379084
loss: 1.0187842845916748,grad_norm: 0.6972415496566298, iteration: 379085
loss: 1.0087591409683228,grad_norm: 0.8774083889946829, iteration: 379086
loss: 1.0481832027435303,grad_norm: 0.9999994287547883, iteration: 379087
loss: 1.0110116004943848,grad_norm: 0.7935726600597434, iteration: 379088
loss: 1.0014395713806152,grad_norm: 0.7875586169200554, iteration: 379089
loss: 0.996766209602356,grad_norm: 0.8187615016085521, iteration: 379090
loss: 0.9833619594573975,grad_norm: 0.9999991097417325, iteration: 379091
loss: 1.0376907587051392,grad_norm: 0.8324308992310638, iteration: 379092
loss: 0.9882509112358093,grad_norm: 0.9999991780285039, iteration: 379093
loss: 0.9954747557640076,grad_norm: 0.7022567938008669, iteration: 379094
loss: 0.9861116409301758,grad_norm: 0.9369342401843339, iteration: 379095
loss: 0.9901136755943298,grad_norm: 0.9042450183962993, iteration: 379096
loss: 1.0422183275222778,grad_norm: 0.9999992459544669, iteration: 379097
loss: 0.9918339848518372,grad_norm: 0.9219802382697208, iteration: 379098
loss: 1.0537768602371216,grad_norm: 0.9999992441549063, iteration: 379099
loss: 0.9809110760688782,grad_norm: 0.9838013989471952, iteration: 379100
loss: 1.069523572921753,grad_norm: 0.9999994623992872, iteration: 379101
loss: 1.0748202800750732,grad_norm: 0.7616876348150319, iteration: 379102
loss: 0.994476854801178,grad_norm: 0.7705193511262436, iteration: 379103
loss: 1.0967997312545776,grad_norm: 0.8420823724324523, iteration: 379104
loss: 0.9989356398582458,grad_norm: 0.7899407408011271, iteration: 379105
loss: 1.0158625841140747,grad_norm: 0.7373539982228825, iteration: 379106
loss: 1.0143349170684814,grad_norm: 0.8201792645640699, iteration: 379107
loss: 1.0266931056976318,grad_norm: 0.9999999017081834, iteration: 379108
loss: 0.9701771140098572,grad_norm: 0.716226692633088, iteration: 379109
loss: 1.000312328338623,grad_norm: 0.7983174007312857, iteration: 379110
loss: 0.9765332341194153,grad_norm: 0.7800651032547917, iteration: 379111
loss: 1.0121502876281738,grad_norm: 0.7113032998603241, iteration: 379112
loss: 1.0222711563110352,grad_norm: 0.7199240495338316, iteration: 379113
loss: 1.055673360824585,grad_norm: 0.9999993524531292, iteration: 379114
loss: 0.9722644090652466,grad_norm: 0.7574548256600326, iteration: 379115
loss: 1.0819108486175537,grad_norm: 0.9999993998126925, iteration: 379116
loss: 0.9574632048606873,grad_norm: 0.7670574943161667, iteration: 379117
loss: 0.9976614713668823,grad_norm: 0.7653064080636879, iteration: 379118
loss: 0.9865282773971558,grad_norm: 0.8172648874384437, iteration: 379119
loss: 1.0030385255813599,grad_norm: 0.7373534045318847, iteration: 379120
loss: 0.9586283564567566,grad_norm: 0.8767011347458464, iteration: 379121
loss: 1.0084532499313354,grad_norm: 0.8656739949287208, iteration: 379122
loss: 0.9650102853775024,grad_norm: 0.7362520147368976, iteration: 379123
loss: 1.0348811149597168,grad_norm: 0.671953003368488, iteration: 379124
loss: 0.9767559170722961,grad_norm: 0.9999989294618008, iteration: 379125
loss: 1.0212148427963257,grad_norm: 0.9999999177162655, iteration: 379126
loss: 1.009199857711792,grad_norm: 0.9999992765618725, iteration: 379127
loss: 0.9935211539268494,grad_norm: 0.6982006432731959, iteration: 379128
loss: 0.9912881851196289,grad_norm: 0.8144384412453025, iteration: 379129
loss: 0.9856160283088684,grad_norm: 0.8471204814635603, iteration: 379130
loss: 1.0007789134979248,grad_norm: 0.7947963353233005, iteration: 379131
loss: 1.0363569259643555,grad_norm: 0.9125778075240986, iteration: 379132
loss: 1.0166056156158447,grad_norm: 0.9999997027457729, iteration: 379133
loss: 1.001343846321106,grad_norm: 0.929955084977361, iteration: 379134
loss: 1.002193570137024,grad_norm: 0.9999991382470024, iteration: 379135
loss: 0.9727552533149719,grad_norm: 0.676579083673203, iteration: 379136
loss: 0.9947768449783325,grad_norm: 0.8578541572863604, iteration: 379137
loss: 1.0368316173553467,grad_norm: 0.9999995833280785, iteration: 379138
loss: 1.0036736726760864,grad_norm: 0.8201908231640792, iteration: 379139
loss: 1.0154500007629395,grad_norm: 0.8064608191435371, iteration: 379140
loss: 1.0122106075286865,grad_norm: 0.852557714188864, iteration: 379141
loss: 1.0151642560958862,grad_norm: 0.8863744378041427, iteration: 379142
loss: 0.9971626996994019,grad_norm: 0.8105533786729466, iteration: 379143
loss: 1.0005322694778442,grad_norm: 0.8109323905364759, iteration: 379144
loss: 1.009679913520813,grad_norm: 0.999998929764425, iteration: 379145
loss: 1.011446237564087,grad_norm: 0.8111868623825769, iteration: 379146
loss: 1.033882737159729,grad_norm: 0.9999998526048021, iteration: 379147
loss: 0.9886487722396851,grad_norm: 0.6585601516931465, iteration: 379148
loss: 1.0848991870880127,grad_norm: 0.9080559517023449, iteration: 379149
loss: 0.980252742767334,grad_norm: 0.7731040908974597, iteration: 379150
loss: 1.0065741539001465,grad_norm: 0.9860553407984379, iteration: 379151
loss: 1.0554301738739014,grad_norm: 0.6514191078527435, iteration: 379152
loss: 0.9833020567893982,grad_norm: 0.8291373968645239, iteration: 379153
loss: 1.0190845727920532,grad_norm: 0.9999992962509725, iteration: 379154
loss: 1.0144644975662231,grad_norm: 0.7260747195850299, iteration: 379155
loss: 0.9873120784759521,grad_norm: 0.6736955068877051, iteration: 379156
loss: 0.9712665677070618,grad_norm: 0.8868082661224437, iteration: 379157
loss: 1.0011845827102661,grad_norm: 0.7900394153010165, iteration: 379158
loss: 1.040848970413208,grad_norm: 0.9999991993773302, iteration: 379159
loss: 1.0661931037902832,grad_norm: 0.9999992415312386, iteration: 379160
loss: 1.0207524299621582,grad_norm: 0.8355650194524725, iteration: 379161
loss: 1.006043553352356,grad_norm: 0.8010546918872032, iteration: 379162
loss: 1.0150136947631836,grad_norm: 0.9999999028906358, iteration: 379163
loss: 1.033909797668457,grad_norm: 0.8992717813452837, iteration: 379164
loss: 1.0783534049987793,grad_norm: 0.9999999753887716, iteration: 379165
loss: 0.9855203032493591,grad_norm: 0.6730189774849431, iteration: 379166
loss: 1.0362484455108643,grad_norm: 0.9999990710625067, iteration: 379167
loss: 1.0057939291000366,grad_norm: 0.7297930686800105, iteration: 379168
loss: 0.9957457184791565,grad_norm: 0.6185773489348249, iteration: 379169
loss: 1.016464114189148,grad_norm: 0.7042527312626925, iteration: 379170
loss: 0.9921213984489441,grad_norm: 0.8845601974922823, iteration: 379171
loss: 0.964229166507721,grad_norm: 0.7519336569096672, iteration: 379172
loss: 0.9784075617790222,grad_norm: 0.7236431193769317, iteration: 379173
loss: 0.9939825534820557,grad_norm: 0.6976758341140773, iteration: 379174
loss: 0.9927016496658325,grad_norm: 0.7567745150599193, iteration: 379175
loss: 1.0514932870864868,grad_norm: 0.9999993902392295, iteration: 379176
loss: 1.0056623220443726,grad_norm: 0.8485309542813717, iteration: 379177
loss: 0.9936372637748718,grad_norm: 0.6699847029505084, iteration: 379178
loss: 1.0001869201660156,grad_norm: 0.7413600646991572, iteration: 379179
loss: 1.0259060859680176,grad_norm: 0.6909835411573377, iteration: 379180
loss: 0.9797942042350769,grad_norm: 0.9993261783548288, iteration: 379181
loss: 1.0219833850860596,grad_norm: 0.8032498928644356, iteration: 379182
loss: 0.9996387362480164,grad_norm: 0.8464160941876345, iteration: 379183
loss: 0.9957979917526245,grad_norm: 0.7116378934765424, iteration: 379184
loss: 0.983140230178833,grad_norm: 0.7884502071710497, iteration: 379185
loss: 1.0222830772399902,grad_norm: 0.936740222306784, iteration: 379186
loss: 1.0197759866714478,grad_norm: 0.7201412155779648, iteration: 379187
loss: 1.0190553665161133,grad_norm: 0.9054481001935942, iteration: 379188
loss: 0.9668338894844055,grad_norm: 0.7788317584810537, iteration: 379189
loss: 0.9682847261428833,grad_norm: 0.9355190091415297, iteration: 379190
loss: 1.0070576667785645,grad_norm: 0.9336911494147859, iteration: 379191
loss: 1.0092698335647583,grad_norm: 0.7011891867435073, iteration: 379192
loss: 0.9667913913726807,grad_norm: 0.8925105308757946, iteration: 379193
loss: 1.0655066967010498,grad_norm: 0.9634360325475368, iteration: 379194
loss: 1.0254194736480713,grad_norm: 0.6989728897912688, iteration: 379195
loss: 0.9896501898765564,grad_norm: 0.9999993997451323, iteration: 379196
loss: 1.0313196182250977,grad_norm: 0.8612617686915357, iteration: 379197
loss: 0.9948624968528748,grad_norm: 0.9242647598718521, iteration: 379198
loss: 1.1052463054656982,grad_norm: 0.6605937064017994, iteration: 379199
loss: 1.0147334337234497,grad_norm: 0.9999991739141457, iteration: 379200
loss: 1.013833999633789,grad_norm: 0.9999993770142968, iteration: 379201
loss: 0.9655267000198364,grad_norm: 0.7591577821058536, iteration: 379202
loss: 1.0261151790618896,grad_norm: 0.8423479220377449, iteration: 379203
loss: 1.0257874727249146,grad_norm: 0.8445668234565409, iteration: 379204
loss: 0.9969277381896973,grad_norm: 0.6540885590182149, iteration: 379205
loss: 0.9931363463401794,grad_norm: 0.7782831123549363, iteration: 379206
loss: 1.015916109085083,grad_norm: 0.8763287527434127, iteration: 379207
loss: 1.017996907234192,grad_norm: 0.9078746435207881, iteration: 379208
loss: 1.0039175748825073,grad_norm: 0.927280923403094, iteration: 379209
loss: 1.0015785694122314,grad_norm: 0.7589134295335949, iteration: 379210
loss: 1.0878503322601318,grad_norm: 0.9999990495846683, iteration: 379211
loss: 1.0905578136444092,grad_norm: 0.964505884245246, iteration: 379212
loss: 1.079803705215454,grad_norm: 0.9999992592045738, iteration: 379213
loss: 1.0051262378692627,grad_norm: 0.8010678190812303, iteration: 379214
loss: 0.9824484586715698,grad_norm: 0.7773732195832845, iteration: 379215
loss: 1.0129181146621704,grad_norm: 0.7549937026405099, iteration: 379216
loss: 0.9944180846214294,grad_norm: 0.7763871299727159, iteration: 379217
loss: 1.0034514665603638,grad_norm: 0.7212471193591217, iteration: 379218
loss: 0.9928630590438843,grad_norm: 0.7556553836114478, iteration: 379219
loss: 1.0309510231018066,grad_norm: 0.9316830846704427, iteration: 379220
loss: 1.005521535873413,grad_norm: 0.7255782302363205, iteration: 379221
loss: 0.9719943404197693,grad_norm: 0.7507946074637426, iteration: 379222
loss: 0.9818085432052612,grad_norm: 0.742042497807675, iteration: 379223
loss: 1.0836597681045532,grad_norm: 0.9999996878157993, iteration: 379224
loss: 1.0465319156646729,grad_norm: 0.8256638205171865, iteration: 379225
loss: 1.0226870775222778,grad_norm: 0.9999991989438458, iteration: 379226
loss: 1.0053794384002686,grad_norm: 0.8112472148986419, iteration: 379227
loss: 1.0256725549697876,grad_norm: 0.9999991418524428, iteration: 379228
loss: 1.0700769424438477,grad_norm: 0.9999992306387324, iteration: 379229
loss: 1.026871681213379,grad_norm: 0.7912042754556007, iteration: 379230
loss: 1.0059888362884521,grad_norm: 0.7707364052349629, iteration: 379231
loss: 1.0313769578933716,grad_norm: 0.7731012659521687, iteration: 379232
loss: 0.9687055349349976,grad_norm: 0.9999995194172507, iteration: 379233
loss: 0.9774335622787476,grad_norm: 0.7462103890935353, iteration: 379234
loss: 0.9951005578041077,grad_norm: 0.809898611522819, iteration: 379235
loss: 1.0378161668777466,grad_norm: 0.9999999117942215, iteration: 379236
loss: 1.0402617454528809,grad_norm: 0.7489122601745216, iteration: 379237
loss: 0.9740692973136902,grad_norm: 0.9682054065520814, iteration: 379238
loss: 1.021873116493225,grad_norm: 0.7695404084004064, iteration: 379239
loss: 0.9844000339508057,grad_norm: 0.9999990660476651, iteration: 379240
loss: 1.0183873176574707,grad_norm: 0.7847779813089617, iteration: 379241
loss: 1.1302787065505981,grad_norm: 0.9999996304520843, iteration: 379242
loss: 1.0881465673446655,grad_norm: 0.8081469938335206, iteration: 379243
loss: 1.0089843273162842,grad_norm: 0.9842350341618373, iteration: 379244
loss: 1.0600284337997437,grad_norm: 0.9893080563660887, iteration: 379245
loss: 1.0158966779708862,grad_norm: 0.8114207521453575, iteration: 379246
loss: 0.9959204196929932,grad_norm: 0.8693897907198901, iteration: 379247
loss: 0.9718780517578125,grad_norm: 0.6282076482538659, iteration: 379248
loss: 1.0007143020629883,grad_norm: 0.9999991823900103, iteration: 379249
loss: 0.9732459187507629,grad_norm: 0.7328995225287734, iteration: 379250
loss: 1.0155487060546875,grad_norm: 0.7607880045713088, iteration: 379251
loss: 0.9655165672302246,grad_norm: 0.852144377020806, iteration: 379252
loss: 0.9821770191192627,grad_norm: 0.8636308037209172, iteration: 379253
loss: 0.9780162572860718,grad_norm: 0.8046680297465307, iteration: 379254
loss: 1.041609764099121,grad_norm: 0.7536817588000176, iteration: 379255
loss: 1.0259913206100464,grad_norm: 0.9999992343523302, iteration: 379256
loss: 0.9714749455451965,grad_norm: 0.8394597269836392, iteration: 379257
loss: 1.016459584236145,grad_norm: 0.7540288124706157, iteration: 379258
loss: 0.9975140690803528,grad_norm: 0.8001300777056762, iteration: 379259
loss: 0.9806545972824097,grad_norm: 0.8373985410189273, iteration: 379260
loss: 0.9999476671218872,grad_norm: 0.9999994837592691, iteration: 379261
loss: 0.9923032522201538,grad_norm: 0.6437332280523593, iteration: 379262
loss: 1.0111877918243408,grad_norm: 0.9999993121861904, iteration: 379263
loss: 0.9355307817459106,grad_norm: 0.8678628139767619, iteration: 379264
loss: 1.029135823249817,grad_norm: 0.9999997040262477, iteration: 379265
loss: 0.9934938549995422,grad_norm: 0.709848460293076, iteration: 379266
loss: 1.0302696228027344,grad_norm: 0.949912435221976, iteration: 379267
loss: 1.0450786352157593,grad_norm: 0.999999362027536, iteration: 379268
loss: 1.0332388877868652,grad_norm: 0.7481543996747638, iteration: 379269
loss: 0.9898821115493774,grad_norm: 0.9999990474977041, iteration: 379270
loss: 1.176053524017334,grad_norm: 0.9999996363760764, iteration: 379271
loss: 1.0062406063079834,grad_norm: 0.8005720943219834, iteration: 379272
loss: 1.1905843019485474,grad_norm: 0.9999995460212666, iteration: 379273
loss: 0.9946719408035278,grad_norm: 0.7745062442177658, iteration: 379274
loss: 1.019885540008545,grad_norm: 0.8366784794630122, iteration: 379275
loss: 1.0928558111190796,grad_norm: 0.8064246662992879, iteration: 379276
loss: 0.9777641296386719,grad_norm: 0.8801228714478427, iteration: 379277
loss: 1.0032753944396973,grad_norm: 0.8351349507112502, iteration: 379278
loss: 1.0428701639175415,grad_norm: 0.9999993104151805, iteration: 379279
loss: 0.9913594722747803,grad_norm: 0.7161388903143133, iteration: 379280
loss: 1.0050227642059326,grad_norm: 0.9211581107410357, iteration: 379281
loss: 1.0285104513168335,grad_norm: 0.9999991630307091, iteration: 379282
loss: 1.3825019598007202,grad_norm: 0.9999996153134287, iteration: 379283
loss: 1.0148403644561768,grad_norm: 0.864206758999766, iteration: 379284
loss: 1.0071172714233398,grad_norm: 0.6519980682450325, iteration: 379285
loss: 1.0065031051635742,grad_norm: 0.9536690271806861, iteration: 379286
loss: 1.0037128925323486,grad_norm: 0.7105989651485021, iteration: 379287
loss: 1.0053846836090088,grad_norm: 0.8368687534063034, iteration: 379288
loss: 0.9869017004966736,grad_norm: 0.765800582412411, iteration: 379289
loss: 0.9974061846733093,grad_norm: 0.7650352172963516, iteration: 379290
loss: 1.0295851230621338,grad_norm: 0.7442693186883071, iteration: 379291
loss: 1.0024393796920776,grad_norm: 0.696549300103978, iteration: 379292
loss: 1.0584126710891724,grad_norm: 0.8808599527118759, iteration: 379293
loss: 0.9848202466964722,grad_norm: 0.9379419266764087, iteration: 379294
loss: 1.0622684955596924,grad_norm: 0.9999991620734181, iteration: 379295
loss: 0.9745045900344849,grad_norm: 0.8339241157819465, iteration: 379296
loss: 1.0209248065948486,grad_norm: 0.80629288211058, iteration: 379297
loss: 1.028252363204956,grad_norm: 0.9999994171338882, iteration: 379298
loss: 0.9934952259063721,grad_norm: 0.8145503358051184, iteration: 379299
loss: 0.9948346018791199,grad_norm: 0.7176910721376282, iteration: 379300
loss: 1.0477432012557983,grad_norm: 0.9999997630341928, iteration: 379301
loss: 1.040925145149231,grad_norm: 0.7903807900147359, iteration: 379302
loss: 1.0314888954162598,grad_norm: 0.9999994500987477, iteration: 379303
loss: 1.0750170946121216,grad_norm: 0.9467726699326194, iteration: 379304
loss: 1.0671756267547607,grad_norm: 0.9999993839685795, iteration: 379305
loss: 0.9566609263420105,grad_norm: 0.7682765130321935, iteration: 379306
loss: 1.018511176109314,grad_norm: 0.9999995954721836, iteration: 379307
loss: 1.006366491317749,grad_norm: 0.7396835055430272, iteration: 379308
loss: 1.0950264930725098,grad_norm: 0.827817140317897, iteration: 379309
loss: 0.9797454476356506,grad_norm: 0.78158871744853, iteration: 379310
loss: 0.9402441382408142,grad_norm: 0.8420819897783182, iteration: 379311
loss: 1.0075143575668335,grad_norm: 0.8234833634452259, iteration: 379312
loss: 0.9825857877731323,grad_norm: 0.8590779389769886, iteration: 379313
loss: 0.9890369176864624,grad_norm: 0.7172877421776755, iteration: 379314
loss: 0.9734200239181519,grad_norm: 0.7784439950339384, iteration: 379315
loss: 0.9977512955665588,grad_norm: 0.7612577185009597, iteration: 379316
loss: 0.960375964641571,grad_norm: 0.751454213063143, iteration: 379317
loss: 1.013940691947937,grad_norm: 0.8538468828365253, iteration: 379318
loss: 0.9866743087768555,grad_norm: 0.9500105222966645, iteration: 379319
loss: 0.9854161739349365,grad_norm: 0.7913922114934625, iteration: 379320
loss: 1.0322086811065674,grad_norm: 0.9999995284849208, iteration: 379321
loss: 1.0193172693252563,grad_norm: 0.6653594929447548, iteration: 379322
loss: 1.0835723876953125,grad_norm: 0.852248185134839, iteration: 379323
loss: 0.985432505607605,grad_norm: 0.756039809807475, iteration: 379324
loss: 1.1765776872634888,grad_norm: 0.9999991736871874, iteration: 379325
loss: 1.1947851181030273,grad_norm: 0.9227419893191252, iteration: 379326
loss: 1.0071171522140503,grad_norm: 0.8860699397622537, iteration: 379327
loss: 1.0056915283203125,grad_norm: 0.6889388717793719, iteration: 379328
loss: 1.0736701488494873,grad_norm: 0.9999994398156555, iteration: 379329
loss: 1.1172738075256348,grad_norm: 0.9999991245857001, iteration: 379330
loss: 1.3193567991256714,grad_norm: 0.9999993896290722, iteration: 379331
loss: 1.0409892797470093,grad_norm: 0.7358411033058885, iteration: 379332
loss: 1.328198790550232,grad_norm: 0.9999994148617326, iteration: 379333
loss: 0.9726940393447876,grad_norm: 0.7035477674791525, iteration: 379334
loss: 1.0572452545166016,grad_norm: 0.9999994088728437, iteration: 379335
loss: 1.0226572751998901,grad_norm: 0.8751299417991676, iteration: 379336
loss: 0.9990319013595581,grad_norm: 0.9999991467929272, iteration: 379337
loss: 1.0524965524673462,grad_norm: 0.9999995120681053, iteration: 379338
loss: 1.0351864099502563,grad_norm: 0.839125413399021, iteration: 379339
loss: 1.3667292594909668,grad_norm: 0.9999994631455811, iteration: 379340
loss: 1.1012862920761108,grad_norm: 0.9999997336316004, iteration: 379341
loss: 0.9744873046875,grad_norm: 0.8011757209810529, iteration: 379342
loss: 1.016005516052246,grad_norm: 0.7470703000632679, iteration: 379343
loss: 1.1841747760772705,grad_norm: 0.9999999861923616, iteration: 379344
loss: 1.003605842590332,grad_norm: 0.830724286170997, iteration: 379345
loss: 1.1434568166732788,grad_norm: 0.9999992609631417, iteration: 379346
loss: 0.9871863722801208,grad_norm: 0.7964096913987734, iteration: 379347
loss: 1.0401908159255981,grad_norm: 0.6975039866448796, iteration: 379348
loss: 1.034570574760437,grad_norm: 0.9999999782753838, iteration: 379349
loss: 1.0044550895690918,grad_norm: 0.7642235574779941, iteration: 379350
loss: 1.0421231985092163,grad_norm: 0.7759144597466601, iteration: 379351
loss: 0.9815875291824341,grad_norm: 0.9999996820728332, iteration: 379352
loss: 1.0843477249145508,grad_norm: 0.9999991502354961, iteration: 379353
loss: 1.151914119720459,grad_norm: 0.9999994577520025, iteration: 379354
loss: 1.031633973121643,grad_norm: 0.8872417594504923, iteration: 379355
loss: 0.9876084923744202,grad_norm: 0.6868064500890017, iteration: 379356
loss: 0.9815495014190674,grad_norm: 0.8863773677469055, iteration: 379357
loss: 0.9757393002510071,grad_norm: 0.8507047510437766, iteration: 379358
loss: 0.9979040026664734,grad_norm: 0.942983331711822, iteration: 379359
loss: 1.112722396850586,grad_norm: 0.999999815305192, iteration: 379360
loss: 1.0201210975646973,grad_norm: 0.7441797978750246, iteration: 379361
loss: 1.0668578147888184,grad_norm: 0.8728396903288145, iteration: 379362
loss: 1.0289627313613892,grad_norm: 0.9999989826310195, iteration: 379363
loss: 1.0112254619598389,grad_norm: 0.9999990315619944, iteration: 379364
loss: 1.3257811069488525,grad_norm: 0.9999999505960337, iteration: 379365
loss: 0.9792336821556091,grad_norm: 0.8068357663212403, iteration: 379366
loss: 1.0349266529083252,grad_norm: 0.8726581660381478, iteration: 379367
loss: 1.0005735158920288,grad_norm: 0.787193364627009, iteration: 379368
loss: 1.0500799417495728,grad_norm: 0.999999305094879, iteration: 379369
loss: 1.117565393447876,grad_norm: 0.9999995157883705, iteration: 379370
loss: 0.9886369109153748,grad_norm: 0.9999993603120227, iteration: 379371
loss: 0.9985807538032532,grad_norm: 0.9999998345875709, iteration: 379372
loss: 0.9860587120056152,grad_norm: 0.7824243912103552, iteration: 379373
loss: 1.2193337678909302,grad_norm: 0.999999757893543, iteration: 379374
loss: 1.004041075706482,grad_norm: 0.9398387581256369, iteration: 379375
loss: 1.238892674446106,grad_norm: 1.0000000198611325, iteration: 379376
loss: 1.0038576126098633,grad_norm: 0.7321086735004397, iteration: 379377
loss: 0.9906938672065735,grad_norm: 0.8191821859406496, iteration: 379378
loss: 0.9954196214675903,grad_norm: 0.864594179782964, iteration: 379379
loss: 0.9953853487968445,grad_norm: 0.7202830644811214, iteration: 379380
loss: 1.0208715200424194,grad_norm: 0.8373887806053765, iteration: 379381
loss: 0.9791569113731384,grad_norm: 0.9999993372815613, iteration: 379382
loss: 0.9546987414360046,grad_norm: 0.7720267234761534, iteration: 379383
loss: 0.9481682777404785,grad_norm: 0.8744553930873626, iteration: 379384
loss: 1.0910429954528809,grad_norm: 0.9999996545749856, iteration: 379385
loss: 1.0333484411239624,grad_norm: 0.7514167491734317, iteration: 379386
loss: 1.0210355520248413,grad_norm: 0.7775368859569479, iteration: 379387
loss: 1.1309428215026855,grad_norm: 0.9661599941395245, iteration: 379388
loss: 1.0091341733932495,grad_norm: 0.6836157395424677, iteration: 379389
loss: 0.977954626083374,grad_norm: 0.9999999497319143, iteration: 379390
loss: 1.0025064945220947,grad_norm: 0.7101192405489805, iteration: 379391
loss: 1.0224788188934326,grad_norm: 0.876389376926011, iteration: 379392
loss: 1.0342636108398438,grad_norm: 0.8934926176094095, iteration: 379393
loss: 1.0037134885787964,grad_norm: 0.9999993040699386, iteration: 379394
loss: 1.1179875135421753,grad_norm: 0.9999995964143601, iteration: 379395
loss: 0.9755691289901733,grad_norm: 0.8143843368657143, iteration: 379396
loss: 1.0061005353927612,grad_norm: 0.812437689780175, iteration: 379397
loss: 1.0544016361236572,grad_norm: 0.9999990934867586, iteration: 379398
loss: 0.9770627617835999,grad_norm: 0.8459037628542778, iteration: 379399
loss: 0.9976564645767212,grad_norm: 0.9999991921042618, iteration: 379400
loss: 1.0316369533538818,grad_norm: 0.7976458069454717, iteration: 379401
loss: 1.1999034881591797,grad_norm: 0.9999999016585102, iteration: 379402
loss: 1.068715214729309,grad_norm: 0.9999999719004672, iteration: 379403
loss: 1.0213791131973267,grad_norm: 0.8465145717921314, iteration: 379404
loss: 1.0557219982147217,grad_norm: 0.8127180600486179, iteration: 379405
loss: 1.000486969947815,grad_norm: 0.812168856932344, iteration: 379406
loss: 1.0118123292922974,grad_norm: 0.8315594879955381, iteration: 379407
loss: 1.0026445388793945,grad_norm: 0.8494123195179598, iteration: 379408
loss: 1.0583269596099854,grad_norm: 0.9687598616515152, iteration: 379409
loss: 1.1229890584945679,grad_norm: 0.9110348037901145, iteration: 379410
loss: 1.0566949844360352,grad_norm: 0.9999996007410951, iteration: 379411
loss: 1.0091121196746826,grad_norm: 0.7526563100171771, iteration: 379412
loss: 0.9927430152893066,grad_norm: 0.7432109569946639, iteration: 379413
loss: 1.0271344184875488,grad_norm: 0.845000707514506, iteration: 379414
loss: 1.0126675367355347,grad_norm: 0.7766955902108861, iteration: 379415
loss: 1.0040526390075684,grad_norm: 0.8627594654323306, iteration: 379416
loss: 1.0185189247131348,grad_norm: 0.987745867748879, iteration: 379417
loss: 0.9558843374252319,grad_norm: 0.8608363722487706, iteration: 379418
loss: 1.0271148681640625,grad_norm: 0.7796214393588643, iteration: 379419
loss: 1.0238837003707886,grad_norm: 0.9999993373327817, iteration: 379420
loss: 0.9588573575019836,grad_norm: 0.7588379922209685, iteration: 379421
loss: 1.0394848585128784,grad_norm: 0.7387451478626054, iteration: 379422
loss: 1.0047022104263306,grad_norm: 0.7579634279324502, iteration: 379423
loss: 1.002851963043213,grad_norm: 0.9999997431451548, iteration: 379424
loss: 0.9879480600357056,grad_norm: 0.9999998408484194, iteration: 379425
loss: 1.0853514671325684,grad_norm: 0.9706315803490033, iteration: 379426
loss: 1.069926381111145,grad_norm: 0.9999999352315487, iteration: 379427
loss: 1.0043658018112183,grad_norm: 0.7374634590360644, iteration: 379428
loss: 1.0070587396621704,grad_norm: 0.7912155130201451, iteration: 379429
loss: 1.1017674207687378,grad_norm: 0.9999991145685911, iteration: 379430
loss: 0.9588816165924072,grad_norm: 0.7888667909853165, iteration: 379431
loss: 1.0527915954589844,grad_norm: 0.9999999101380471, iteration: 379432
loss: 1.021134853363037,grad_norm: 0.7306492742006014, iteration: 379433
loss: 1.015284776687622,grad_norm: 0.9265921771864408, iteration: 379434
loss: 1.0266733169555664,grad_norm: 0.9999993923930082, iteration: 379435
loss: 1.062658429145813,grad_norm: 0.9999990581170408, iteration: 379436
loss: 0.9885955452919006,grad_norm: 0.7861294218744004, iteration: 379437
loss: 0.9736753702163696,grad_norm: 0.9957688781284539, iteration: 379438
loss: 0.977214515209198,grad_norm: 0.9430371430005099, iteration: 379439
loss: 1.0024877786636353,grad_norm: 0.7383031139965163, iteration: 379440
loss: 0.9885424375534058,grad_norm: 0.8993736477343018, iteration: 379441
loss: 0.9954820871353149,grad_norm: 0.6978377501047381, iteration: 379442
loss: 0.9844161868095398,grad_norm: 0.7528020370242968, iteration: 379443
loss: 0.9795364737510681,grad_norm: 0.7418579049422172, iteration: 379444
loss: 1.0702171325683594,grad_norm: 0.9999994617549458, iteration: 379445
loss: 1.0017579793930054,grad_norm: 0.9999991215640953, iteration: 379446
loss: 0.986454427242279,grad_norm: 0.9950462711058494, iteration: 379447
loss: 0.9950591921806335,grad_norm: 0.764153530398404, iteration: 379448
loss: 0.9847749471664429,grad_norm: 0.9999991255630205, iteration: 379449
loss: 0.9684743881225586,grad_norm: 0.7644087210328638, iteration: 379450
loss: 1.0165311098098755,grad_norm: 0.9999995611206571, iteration: 379451
loss: 1.0161762237548828,grad_norm: 0.8318925284165936, iteration: 379452
loss: 0.9743835926055908,grad_norm: 0.8285842393947972, iteration: 379453
loss: 1.1583969593048096,grad_norm: 0.9999991375884085, iteration: 379454
loss: 0.95376056432724,grad_norm: 0.8622109298407737, iteration: 379455
loss: 1.0262932777404785,grad_norm: 0.8149094420449514, iteration: 379456
loss: 1.0042688846588135,grad_norm: 0.8121243312060871, iteration: 379457
loss: 1.2247262001037598,grad_norm: 1.0000000044969208, iteration: 379458
loss: 1.0020332336425781,grad_norm: 0.7744392884868125, iteration: 379459
loss: 1.0868364572525024,grad_norm: 0.9999998924201903, iteration: 379460
loss: 0.9983943700790405,grad_norm: 0.9324745257177451, iteration: 379461
loss: 1.05635666847229,grad_norm: 0.6904465486001113, iteration: 379462
loss: 0.9471098184585571,grad_norm: 0.7017337416390632, iteration: 379463
loss: 1.1473606824874878,grad_norm: 0.9999999324676427, iteration: 379464
loss: 1.0094901323318481,grad_norm: 0.999999106418953, iteration: 379465
loss: 1.0204726457595825,grad_norm: 0.6960775005996721, iteration: 379466
loss: 0.9723111391067505,grad_norm: 0.9999996799536173, iteration: 379467
loss: 1.0375648736953735,grad_norm: 0.9999998555306312, iteration: 379468
loss: 1.0290215015411377,grad_norm: 0.9587487863947969, iteration: 379469
loss: 1.0241141319274902,grad_norm: 0.8056702133650918, iteration: 379470
loss: 1.0121926069259644,grad_norm: 0.8645276890644357, iteration: 379471
loss: 0.986856997013092,grad_norm: 0.7331601326303537, iteration: 379472
loss: 0.9950363039970398,grad_norm: 0.7876407182226919, iteration: 379473
loss: 1.0340110063552856,grad_norm: 0.9225180526082403, iteration: 379474
loss: 0.9983676671981812,grad_norm: 0.7184723880908374, iteration: 379475
loss: 0.9639701247215271,grad_norm: 0.8165241089798987, iteration: 379476
loss: 0.9768764972686768,grad_norm: 0.760086870863903, iteration: 379477
loss: 1.027804970741272,grad_norm: 0.9276501438624248, iteration: 379478
loss: 1.0259894132614136,grad_norm: 0.8273808529579757, iteration: 379479
loss: 1.0158071517944336,grad_norm: 0.703829356307433, iteration: 379480
loss: 1.0238327980041504,grad_norm: 0.8085472294760401, iteration: 379481
loss: 1.010482668876648,grad_norm: 0.6984071003792751, iteration: 379482
loss: 1.0720754861831665,grad_norm: 0.9999999878195531, iteration: 379483
loss: 0.9735420346260071,grad_norm: 0.6838316421767002, iteration: 379484
loss: 0.9816606640815735,grad_norm: 0.9999995200308379, iteration: 379485
loss: 1.0466110706329346,grad_norm: 0.8202204464369284, iteration: 379486
loss: 1.0175166130065918,grad_norm: 0.7189030038241941, iteration: 379487
loss: 1.017142653465271,grad_norm: 0.7120572655592978, iteration: 379488
loss: 1.0358766317367554,grad_norm: 0.9999999547539383, iteration: 379489
loss: 1.0063165426254272,grad_norm: 0.9035796641735835, iteration: 379490
loss: 1.019958734512329,grad_norm: 0.7766447983401983, iteration: 379491
loss: 1.0193997621536255,grad_norm: 0.9999994264540991, iteration: 379492
loss: 1.0192480087280273,grad_norm: 0.9999996343909069, iteration: 379493
loss: 0.9741232991218567,grad_norm: 0.7286369389646274, iteration: 379494
loss: 1.0020897388458252,grad_norm: 0.8239918845985376, iteration: 379495
loss: 0.9546158313751221,grad_norm: 0.8779148335558974, iteration: 379496
loss: 1.0592906475067139,grad_norm: 0.9999995205016083, iteration: 379497
loss: 1.0146225690841675,grad_norm: 0.9999993737639999, iteration: 379498
loss: 0.9945751428604126,grad_norm: 0.6903843155695937, iteration: 379499
loss: 0.9634172320365906,grad_norm: 0.7512647304824885, iteration: 379500
loss: 1.1068350076675415,grad_norm: 0.9999993344399297, iteration: 379501
loss: 0.9801619648933411,grad_norm: 0.7179158218043972, iteration: 379502
loss: 0.9806551933288574,grad_norm: 0.9999994123434358, iteration: 379503
loss: 0.9855071902275085,grad_norm: 0.8801877664726917, iteration: 379504
loss: 0.9858014583587646,grad_norm: 0.8699827766726881, iteration: 379505
loss: 1.0353870391845703,grad_norm: 0.9999993541112719, iteration: 379506
loss: 1.0535670518875122,grad_norm: 0.879781944127388, iteration: 379507
loss: 0.996429443359375,grad_norm: 0.8711109354865076, iteration: 379508
loss: 0.9918046593666077,grad_norm: 0.6373775197237439, iteration: 379509
loss: 1.0071687698364258,grad_norm: 0.6281111917077287, iteration: 379510
loss: 1.0256481170654297,grad_norm: 0.8051067192846796, iteration: 379511
loss: 1.0779869556427002,grad_norm: 0.9999993104385594, iteration: 379512
loss: 1.0946508646011353,grad_norm: 0.9999996580755666, iteration: 379513
loss: 1.1374591588974,grad_norm: 0.9999992665702224, iteration: 379514
loss: 1.0537266731262207,grad_norm: 0.9999997116511085, iteration: 379515
loss: 0.9599300622940063,grad_norm: 0.771497539728582, iteration: 379516
loss: 1.0280259847640991,grad_norm: 0.9999990637847381, iteration: 379517
loss: 0.9999240040779114,grad_norm: 0.7063229791298964, iteration: 379518
loss: 1.1672192811965942,grad_norm: 0.999999335638502, iteration: 379519
loss: 1.030187726020813,grad_norm: 0.9999999644894468, iteration: 379520
loss: 1.0634714365005493,grad_norm: 0.9999999237053244, iteration: 379521
loss: 1.0048246383666992,grad_norm: 0.8122766377170263, iteration: 379522
loss: 1.064583420753479,grad_norm: 0.9999996435768427, iteration: 379523
loss: 1.0337092876434326,grad_norm: 0.9999991679054591, iteration: 379524
loss: 1.0133672952651978,grad_norm: 0.7266904106404117, iteration: 379525
loss: 1.0130610466003418,grad_norm: 0.9999998830479093, iteration: 379526
loss: 0.9883632659912109,grad_norm: 0.8807691290992126, iteration: 379527
loss: 1.0259712934494019,grad_norm: 0.8488515904216948, iteration: 379528
loss: 0.991902768611908,grad_norm: 0.8861887834005475, iteration: 379529
loss: 1.0336741209030151,grad_norm: 0.7337924253464571, iteration: 379530
loss: 1.1407415866851807,grad_norm: 0.9999996088487532, iteration: 379531
loss: 1.0509047508239746,grad_norm: 0.9999999602169435, iteration: 379532
loss: 1.0328927040100098,grad_norm: 0.7762215345507316, iteration: 379533
loss: 1.0171452760696411,grad_norm: 0.866661442715097, iteration: 379534
loss: 1.0057626962661743,grad_norm: 0.8992389278055264, iteration: 379535
loss: 1.0591410398483276,grad_norm: 0.9351473705369564, iteration: 379536
loss: 1.163312315940857,grad_norm: 0.9999996608966444, iteration: 379537
loss: 0.9509307742118835,grad_norm: 0.7613154720172788, iteration: 379538
loss: 1.0038859844207764,grad_norm: 0.8889568744634758, iteration: 379539
loss: 0.9990405440330505,grad_norm: 0.8077367645592238, iteration: 379540
loss: 1.0439167022705078,grad_norm: 0.9999992733343008, iteration: 379541
loss: 1.0226566791534424,grad_norm: 0.9999990465034266, iteration: 379542
loss: 1.1734254360198975,grad_norm: 0.7770881089136052, iteration: 379543
loss: 0.9812124967575073,grad_norm: 0.8747488735344124, iteration: 379544
loss: 1.0177761316299438,grad_norm: 0.8795796674255445, iteration: 379545
loss: 1.0283085107803345,grad_norm: 0.7577739836308715, iteration: 379546
loss: 1.0701154470443726,grad_norm: 0.9922260115884973, iteration: 379547
loss: 0.9601125121116638,grad_norm: 0.7467892154963257, iteration: 379548
loss: 0.9874100685119629,grad_norm: 0.7534107212586395, iteration: 379549
loss: 0.9991609454154968,grad_norm: 0.9714138010439165, iteration: 379550
loss: 0.9557392001152039,grad_norm: 0.8561853602767945, iteration: 379551
loss: 1.0021641254425049,grad_norm: 0.790281344936065, iteration: 379552
loss: 1.0162891149520874,grad_norm: 0.7383506245049529, iteration: 379553
loss: 1.0498533248901367,grad_norm: 0.9999990219785428, iteration: 379554
loss: 1.014726996421814,grad_norm: 0.822589940012267, iteration: 379555
loss: 1.0196782350540161,grad_norm: 0.7555773583096334, iteration: 379556
loss: 1.0150789022445679,grad_norm: 0.9999999776412715, iteration: 379557
loss: 1.0387685298919678,grad_norm: 0.9999998554748363, iteration: 379558
loss: 1.0098187923431396,grad_norm: 0.9111649950318225, iteration: 379559
loss: 0.9839071035385132,grad_norm: 0.8303729226442118, iteration: 379560
loss: 1.0150048732757568,grad_norm: 0.9095974283832087, iteration: 379561
loss: 1.0444947481155396,grad_norm: 0.8024665731178362, iteration: 379562
loss: 0.980520486831665,grad_norm: 0.7124343788365318, iteration: 379563
loss: 0.9465550184249878,grad_norm: 0.8559255259990152, iteration: 379564
loss: 0.9889069199562073,grad_norm: 0.7740617479861687, iteration: 379565
loss: 1.01985502243042,grad_norm: 0.7571812201807838, iteration: 379566
loss: 1.0374009609222412,grad_norm: 0.8496504708697766, iteration: 379567
loss: 1.0158982276916504,grad_norm: 0.9019460746343103, iteration: 379568
loss: 1.0139673948287964,grad_norm: 0.6812588007111262, iteration: 379569
loss: 1.1206892728805542,grad_norm: 0.9205093102708019, iteration: 379570
loss: 1.094477891921997,grad_norm: 0.9999998739713023, iteration: 379571
loss: 1.0802528858184814,grad_norm: 0.9999994132340816, iteration: 379572
loss: 0.9964249134063721,grad_norm: 0.7726137807421962, iteration: 379573
loss: 0.981903076171875,grad_norm: 0.7969924088164285, iteration: 379574
loss: 1.0199182033538818,grad_norm: 0.8214173212840474, iteration: 379575
loss: 1.020101547241211,grad_norm: 0.9477104403802241, iteration: 379576
loss: 1.0305850505828857,grad_norm: 0.999999505789654, iteration: 379577
loss: 0.9975855946540833,grad_norm: 0.7872521445100834, iteration: 379578
loss: 1.014647364616394,grad_norm: 0.7542908277557293, iteration: 379579
loss: 1.3102797269821167,grad_norm: 0.9999998264806105, iteration: 379580
loss: 1.0091490745544434,grad_norm: 0.7896977664110947, iteration: 379581
loss: 1.0413588285446167,grad_norm: 0.9163944430552333, iteration: 379582
loss: 1.0003360509872437,grad_norm: 0.7549861769027599, iteration: 379583
loss: 1.0701100826263428,grad_norm: 0.7849755043940558, iteration: 379584
loss: 1.075598120689392,grad_norm: 0.9999996243003451, iteration: 379585
loss: 0.9869046211242676,grad_norm: 0.8448428001260305, iteration: 379586
loss: 1.0640549659729004,grad_norm: 0.8241661107599676, iteration: 379587
loss: 1.1025657653808594,grad_norm: 0.9999989796563712, iteration: 379588
loss: 1.0555917024612427,grad_norm: 0.8138853108403336, iteration: 379589
loss: 1.065679907798767,grad_norm: 0.999999871051121, iteration: 379590
loss: 1.0220847129821777,grad_norm: 0.9999999597011289, iteration: 379591
loss: 1.002808928489685,grad_norm: 0.7429710571563402, iteration: 379592
loss: 1.01827871799469,grad_norm: 0.9999997474312257, iteration: 379593
loss: 1.0103739500045776,grad_norm: 0.8209277555965943, iteration: 379594
loss: 1.020724892616272,grad_norm: 0.7045758494440806, iteration: 379595
loss: 1.080854892730713,grad_norm: 0.9999998071611403, iteration: 379596
loss: 0.9924259185791016,grad_norm: 0.9129353254786303, iteration: 379597
loss: 1.123357892036438,grad_norm: 0.9999990156861569, iteration: 379598
loss: 1.1549617052078247,grad_norm: 0.9999999379500053, iteration: 379599
loss: 1.0012320280075073,grad_norm: 0.8300571499940035, iteration: 379600
loss: 0.9844580888748169,grad_norm: 0.7828584731217586, iteration: 379601
loss: 1.0856679677963257,grad_norm: 0.9999994722421723, iteration: 379602
loss: 1.077432632446289,grad_norm: 0.9999998332653222, iteration: 379603
loss: 1.025980830192566,grad_norm: 0.999999790700153, iteration: 379604
loss: 1.0466701984405518,grad_norm: 0.9999994366029142, iteration: 379605
loss: 1.0689620971679688,grad_norm: 0.9999991774069857, iteration: 379606
loss: 1.0349549055099487,grad_norm: 0.7912091429737442, iteration: 379607
loss: 1.001293420791626,grad_norm: 0.8144066952756317, iteration: 379608
loss: 1.0213255882263184,grad_norm: 0.9999999196224665, iteration: 379609
loss: 0.9948335886001587,grad_norm: 0.9999995185156658, iteration: 379610
loss: 1.0677059888839722,grad_norm: 0.9999991891334403, iteration: 379611
loss: 0.9686056971549988,grad_norm: 0.7239747936694443, iteration: 379612
loss: 0.9947537183761597,grad_norm: 0.9180237660141981, iteration: 379613
loss: 1.0542007684707642,grad_norm: 0.8434595258952696, iteration: 379614
loss: 1.0332084894180298,grad_norm: 0.9999991261963604, iteration: 379615
loss: 1.0678515434265137,grad_norm: 0.7774786867586971, iteration: 379616
loss: 0.9920068979263306,grad_norm: 0.9628188137219981, iteration: 379617
loss: 1.1243219375610352,grad_norm: 0.9999993095106419, iteration: 379618
loss: 1.0415358543395996,grad_norm: 0.8207913820847663, iteration: 379619
loss: 1.0008867979049683,grad_norm: 0.9159791061304532, iteration: 379620
loss: 1.124963641166687,grad_norm: 0.9589630841935006, iteration: 379621
loss: 0.9700993895530701,grad_norm: 0.8184846670039944, iteration: 379622
loss: 1.1683145761489868,grad_norm: 0.9999992329485418, iteration: 379623
loss: 1.1591253280639648,grad_norm: 0.9999995045719616, iteration: 379624
loss: 1.1028327941894531,grad_norm: 0.9999999380989417, iteration: 379625
loss: 1.0128244161605835,grad_norm: 0.8581714348539091, iteration: 379626
loss: 1.0547016859054565,grad_norm: 0.856228617165163, iteration: 379627
loss: 1.0133862495422363,grad_norm: 0.9042271455257581, iteration: 379628
loss: 1.0118173360824585,grad_norm: 0.873435525714371, iteration: 379629
loss: 1.420861005783081,grad_norm: 0.9999997380732497, iteration: 379630
loss: 1.0175416469573975,grad_norm: 0.8293862551103844, iteration: 379631
loss: 1.0791800022125244,grad_norm: 0.9989995130656844, iteration: 379632
loss: 1.1776067018508911,grad_norm: 0.9833767007689738, iteration: 379633
loss: 1.271144986152649,grad_norm: 0.9999997898076615, iteration: 379634
loss: 0.9722734689712524,grad_norm: 0.8364216608373266, iteration: 379635
loss: 1.0044573545455933,grad_norm: 0.7899604152443978, iteration: 379636
loss: 1.011108636856079,grad_norm: 0.8807986972247287, iteration: 379637
loss: 0.9736970067024231,grad_norm: 0.7979454509548286, iteration: 379638
loss: 1.0182127952575684,grad_norm: 0.9849939128272829, iteration: 379639
loss: 1.165980339050293,grad_norm: 1.0000000528290454, iteration: 379640
loss: 1.0216026306152344,grad_norm: 0.9999992689102322, iteration: 379641
loss: 1.0290158987045288,grad_norm: 0.7787146365535531, iteration: 379642
loss: 1.0970580577850342,grad_norm: 0.9999999511064038, iteration: 379643
loss: 1.0631396770477295,grad_norm: 0.851502476099399, iteration: 379644
loss: 0.9902815222740173,grad_norm: 0.8026122864274698, iteration: 379645
loss: 1.0896409749984741,grad_norm: 0.8883856237074119, iteration: 379646
loss: 1.0183049440383911,grad_norm: 0.9999998369681838, iteration: 379647
loss: 1.0111531019210815,grad_norm: 0.9198291602227626, iteration: 379648
loss: 1.0029911994934082,grad_norm: 0.9497642468272407, iteration: 379649
loss: 1.070156455039978,grad_norm: 0.9831880404504846, iteration: 379650
loss: 1.1496894359588623,grad_norm: 0.9999999995649583, iteration: 379651
loss: 1.1140176057815552,grad_norm: 0.9999991644456498, iteration: 379652
loss: 1.0685316324234009,grad_norm: 0.9999998526337769, iteration: 379653
loss: 1.0366681814193726,grad_norm: 0.8793729883157219, iteration: 379654
loss: 1.0452871322631836,grad_norm: 0.9999994413032888, iteration: 379655
loss: 0.995364248752594,grad_norm: 0.81983587786731, iteration: 379656
loss: 0.9860895872116089,grad_norm: 0.7516855641003708, iteration: 379657
loss: 1.061405062675476,grad_norm: 0.9462752507912044, iteration: 379658
loss: 0.9972251057624817,grad_norm: 0.6309855871651875, iteration: 379659
loss: 1.0096395015716553,grad_norm: 0.8070630128482318, iteration: 379660
loss: 0.9813401699066162,grad_norm: 0.8099613385063626, iteration: 379661
loss: 0.9829207062721252,grad_norm: 0.7079363235456511, iteration: 379662
loss: 1.031319260597229,grad_norm: 0.8849973067446779, iteration: 379663
loss: 1.0005180835723877,grad_norm: 0.8514449443276815, iteration: 379664
loss: 1.0371193885803223,grad_norm: 0.8096603441297414, iteration: 379665
loss: 0.9978522658348083,grad_norm: 0.9999997828066507, iteration: 379666
loss: 1.0597705841064453,grad_norm: 0.8206436456209346, iteration: 379667
loss: 1.0184431076049805,grad_norm: 0.9999990791078949, iteration: 379668
loss: 0.9744362831115723,grad_norm: 0.9440682149501283, iteration: 379669
loss: 1.0328236818313599,grad_norm: 0.803743746477285, iteration: 379670
loss: 1.036769986152649,grad_norm: 0.9999998642680004, iteration: 379671
loss: 0.9970985054969788,grad_norm: 0.7748843166165987, iteration: 379672
loss: 1.0014771223068237,grad_norm: 0.9396489835726697, iteration: 379673
loss: 1.0148307085037231,grad_norm: 0.7872763952910755, iteration: 379674
loss: 1.012500286102295,grad_norm: 0.9999995110374482, iteration: 379675
loss: 0.9841475486755371,grad_norm: 0.7455385496482817, iteration: 379676
loss: 1.000183343887329,grad_norm: 0.8335366501222514, iteration: 379677
loss: 1.0336527824401855,grad_norm: 0.803076571601093, iteration: 379678
loss: 1.165178656578064,grad_norm: 0.9999997007095693, iteration: 379679
loss: 1.0406705141067505,grad_norm: 0.6858973544867337, iteration: 379680
loss: 1.2176166772842407,grad_norm: 0.9999994550154252, iteration: 379681
loss: 1.030523657798767,grad_norm: 0.8713934804726838, iteration: 379682
loss: 0.9523781538009644,grad_norm: 0.7201250575787916, iteration: 379683
loss: 1.0420256853103638,grad_norm: 0.9999992863333389, iteration: 379684
loss: 1.0481864213943481,grad_norm: 0.9999991135016643, iteration: 379685
loss: 1.095508098602295,grad_norm: 0.8195790628317077, iteration: 379686
loss: 1.0438472032546997,grad_norm: 0.8881811555870673, iteration: 379687
loss: 0.9602857232093811,grad_norm: 0.7708888198591997, iteration: 379688
loss: 0.998435914516449,grad_norm: 0.7547010951254, iteration: 379689
loss: 1.016359806060791,grad_norm: 0.99999905405605, iteration: 379690
loss: 0.976031482219696,grad_norm: 0.7974447412708372, iteration: 379691
loss: 0.9874699115753174,grad_norm: 0.698108858250632, iteration: 379692
loss: 1.083603024482727,grad_norm: 0.994608580054837, iteration: 379693
loss: 0.9483801126480103,grad_norm: 0.6589222673545901, iteration: 379694
loss: 1.154619574546814,grad_norm: 0.9999998860809264, iteration: 379695
loss: 1.0915311574935913,grad_norm: 0.7917438501511088, iteration: 379696
loss: 1.0943937301635742,grad_norm: 0.9999991160872963, iteration: 379697
loss: 1.0656968355178833,grad_norm: 0.9999994916832796, iteration: 379698
loss: 1.0609279870986938,grad_norm: 0.8893911990087088, iteration: 379699
loss: 0.9877514839172363,grad_norm: 0.8484902737217118, iteration: 379700
loss: 0.9961045980453491,grad_norm: 0.8060384327834157, iteration: 379701
loss: 1.0534881353378296,grad_norm: 0.9999993878501735, iteration: 379702
loss: 0.992301881313324,grad_norm: 0.8112653465811327, iteration: 379703
loss: 1.0435611009597778,grad_norm: 0.985258363690118, iteration: 379704
loss: 1.3032456636428833,grad_norm: 0.9999993894566501, iteration: 379705
loss: 1.077871561050415,grad_norm: 0.9999996810502547, iteration: 379706
loss: 0.9667763113975525,grad_norm: 0.8098067758832254, iteration: 379707
loss: 1.0129414796829224,grad_norm: 0.7079970999657205, iteration: 379708
loss: 1.0207961797714233,grad_norm: 0.9999998011294036, iteration: 379709
loss: 1.015958547592163,grad_norm: 0.9749731780112812, iteration: 379710
loss: 1.033954381942749,grad_norm: 0.9999997807675967, iteration: 379711
loss: 1.0848139524459839,grad_norm: 0.8214364554900553, iteration: 379712
loss: 1.0145691633224487,grad_norm: 0.7921809309075268, iteration: 379713
loss: 0.9912793040275574,grad_norm: 0.8499760791186844, iteration: 379714
loss: 1.1011072397232056,grad_norm: 0.9999996434232233, iteration: 379715
loss: 0.9521306753158569,grad_norm: 0.9522919644427389, iteration: 379716
loss: 1.0166598558425903,grad_norm: 0.9032626622227621, iteration: 379717
loss: 1.0315512418746948,grad_norm: 0.9999996953552227, iteration: 379718
loss: 1.0428874492645264,grad_norm: 0.7248159086122599, iteration: 379719
loss: 1.0906941890716553,grad_norm: 0.9999995571103685, iteration: 379720
loss: 1.0633233785629272,grad_norm: 0.8588845126387317, iteration: 379721
loss: 1.0999253988265991,grad_norm: 0.9999992079194442, iteration: 379722
loss: 1.0150502920150757,grad_norm: 0.7492619126112583, iteration: 379723
loss: 0.9872358441352844,grad_norm: 0.6822429525401025, iteration: 379724
loss: 1.0862377882003784,grad_norm: 0.9999994566648108, iteration: 379725
loss: 1.0093327760696411,grad_norm: 0.9262456920988286, iteration: 379726
loss: 1.048882007598877,grad_norm: 0.9999996982442794, iteration: 379727
loss: 1.5069537162780762,grad_norm: 0.9999999020848793, iteration: 379728
loss: 1.1205480098724365,grad_norm: 1.0000000522692936, iteration: 379729
loss: 1.0578497648239136,grad_norm: 0.805462807352282, iteration: 379730
loss: 1.2313164472579956,grad_norm: 0.9999997255657053, iteration: 379731
loss: 0.9910001754760742,grad_norm: 0.7826560714708668, iteration: 379732
loss: 0.9891247153282166,grad_norm: 0.9999996617327553, iteration: 379733
loss: 1.0595619678497314,grad_norm: 0.6975591484534527, iteration: 379734
loss: 1.0527276992797852,grad_norm: 0.8276628341520768, iteration: 379735
loss: 1.0138094425201416,grad_norm: 0.9999999987185516, iteration: 379736
loss: 1.031851053237915,grad_norm: 0.9999995591391868, iteration: 379737
loss: 1.0346472263336182,grad_norm: 0.9999998098336081, iteration: 379738
loss: 1.02203369140625,grad_norm: 0.9009517442668264, iteration: 379739
loss: 1.1472885608673096,grad_norm: 0.9056347582753508, iteration: 379740
loss: 1.1050900220870972,grad_norm: 0.9999997209646247, iteration: 379741
loss: 1.0407618284225464,grad_norm: 0.9999997047240088, iteration: 379742
loss: 1.0218088626861572,grad_norm: 0.8189292513866974, iteration: 379743
loss: 1.0758419036865234,grad_norm: 0.999999663046597, iteration: 379744
loss: 1.0858957767486572,grad_norm: 0.9998642451222937, iteration: 379745
loss: 1.038568377494812,grad_norm: 0.9999993747343459, iteration: 379746
loss: 0.9693581461906433,grad_norm: 0.7202561722108582, iteration: 379747
loss: 1.2193212509155273,grad_norm: 0.9999993961806046, iteration: 379748
loss: 1.0318681001663208,grad_norm: 0.9999990481342348, iteration: 379749
loss: 1.0722583532333374,grad_norm: 0.9999996688465087, iteration: 379750
loss: 1.0067577362060547,grad_norm: 0.9956976363933141, iteration: 379751
loss: 1.0355663299560547,grad_norm: 0.9999991198005206, iteration: 379752
loss: 1.0008835792541504,grad_norm: 0.9939447743078583, iteration: 379753
loss: 1.0802382230758667,grad_norm: 0.7809968326945503, iteration: 379754
loss: 1.0469448566436768,grad_norm: 0.9999994156832515, iteration: 379755
loss: 1.057479739189148,grad_norm: 0.8337085690024025, iteration: 379756
loss: 1.0436286926269531,grad_norm: 0.9736479363083634, iteration: 379757
loss: 1.0054614543914795,grad_norm: 0.9876681837887742, iteration: 379758
loss: 1.0523035526275635,grad_norm: 0.9020875007852718, iteration: 379759
loss: 1.1426724195480347,grad_norm: 0.9999994674433274, iteration: 379760
loss: 1.3497371673583984,grad_norm: 0.9999997451434735, iteration: 379761
loss: 0.9516854882240295,grad_norm: 0.9999991697062183, iteration: 379762
loss: 1.006038784980774,grad_norm: 0.9534535329462904, iteration: 379763
loss: 1.07255220413208,grad_norm: 0.9999991638883843, iteration: 379764
loss: 1.0455302000045776,grad_norm: 0.9999997865110646, iteration: 379765
loss: 0.9447098970413208,grad_norm: 0.8215385376846811, iteration: 379766
loss: 1.0088605880737305,grad_norm: 0.9999991990322016, iteration: 379767
loss: 1.0620983839035034,grad_norm: 0.7851143135842754, iteration: 379768
loss: 1.2756562232971191,grad_norm: 0.9999999990081876, iteration: 379769
loss: 0.975094199180603,grad_norm: 0.8227905040204049, iteration: 379770
loss: 0.9833723902702332,grad_norm: 0.7802652078637237, iteration: 379771
loss: 0.9865999817848206,grad_norm: 0.779868484818921, iteration: 379772
loss: 1.0541695356369019,grad_norm: 0.9999997334778057, iteration: 379773
loss: 1.0124138593673706,grad_norm: 0.9110938263889711, iteration: 379774
loss: 1.059409499168396,grad_norm: 0.8009638215472852, iteration: 379775
loss: 1.1168804168701172,grad_norm: 0.9999999316714243, iteration: 379776
loss: 1.0703612565994263,grad_norm: 0.9999991782640354, iteration: 379777
loss: 1.2267050743103027,grad_norm: 0.9999993071815177, iteration: 379778
loss: 1.0643645524978638,grad_norm: 0.9999992303260555, iteration: 379779
loss: 0.9967455267906189,grad_norm: 0.8400900077272471, iteration: 379780
loss: 0.9988127946853638,grad_norm: 0.9999997217682275, iteration: 379781
loss: 1.0742119550704956,grad_norm: 0.9999991012276019, iteration: 379782
loss: 1.0956025123596191,grad_norm: 0.9999990561488726, iteration: 379783
loss: 1.015974521636963,grad_norm: 0.767518563697067, iteration: 379784
loss: 1.1023461818695068,grad_norm: 0.999999918015722, iteration: 379785
loss: 1.0851327180862427,grad_norm: 0.9999998755842748, iteration: 379786
loss: 0.9766798615455627,grad_norm: 0.87494085946087, iteration: 379787
loss: 0.9860607385635376,grad_norm: 0.8659709789962912, iteration: 379788
loss: 1.0734435319900513,grad_norm: 0.9999994384845068, iteration: 379789
loss: 1.1512624025344849,grad_norm: 0.9999991800706142, iteration: 379790
loss: 0.9757336974143982,grad_norm: 0.7970252188921635, iteration: 379791
loss: 1.2951582670211792,grad_norm: 0.9999999242477718, iteration: 379792
loss: 1.0228092670440674,grad_norm: 0.9999997258192121, iteration: 379793
loss: 0.9982661008834839,grad_norm: 0.9999992841958009, iteration: 379794
loss: 1.0826925039291382,grad_norm: 0.9067853115492175, iteration: 379795
loss: 0.9775746464729309,grad_norm: 0.9672013993834491, iteration: 379796
loss: 1.0979779958724976,grad_norm: 0.9999991384772542, iteration: 379797
loss: 1.5080560445785522,grad_norm: 0.9999999190470976, iteration: 379798
loss: 1.0119199752807617,grad_norm: 0.8963416754900841, iteration: 379799
loss: 1.0122840404510498,grad_norm: 0.9999997728051097, iteration: 379800
loss: 1.21934175491333,grad_norm: 0.9999990205657918, iteration: 379801
loss: 1.0835055112838745,grad_norm: 0.9802228612228001, iteration: 379802
loss: 1.0267512798309326,grad_norm: 0.9999998497156205, iteration: 379803
loss: 0.9963763952255249,grad_norm: 0.812693315221178, iteration: 379804
loss: 1.0025601387023926,grad_norm: 0.7889268304111255, iteration: 379805
loss: 1.2126961946487427,grad_norm: 0.99999990864232, iteration: 379806
loss: 1.1797491312026978,grad_norm: 0.9999995669183518, iteration: 379807
loss: 1.2492039203643799,grad_norm: 0.9999995573924708, iteration: 379808
loss: 1.1320136785507202,grad_norm: 0.9999990865163225, iteration: 379809
loss: 0.962188184261322,grad_norm: 0.7782110641543859, iteration: 379810
loss: 1.0825501680374146,grad_norm: 0.813256247913223, iteration: 379811
loss: 1.0077450275421143,grad_norm: 0.9999995679862984, iteration: 379812
loss: 0.998370349407196,grad_norm: 0.8650038909416783, iteration: 379813
loss: 1.0079032182693481,grad_norm: 0.8374581513950199, iteration: 379814
loss: 1.0125539302825928,grad_norm: 0.7837633024743216, iteration: 379815
loss: 0.9989429116249084,grad_norm: 0.9957651533400298, iteration: 379816
loss: 1.1043195724487305,grad_norm: 0.9999998340210569, iteration: 379817
loss: 1.2839311361312866,grad_norm: 0.9625838031382091, iteration: 379818
loss: 0.9797312021255493,grad_norm: 0.9518697568349681, iteration: 379819
loss: 1.0458037853240967,grad_norm: 0.9999992139281514, iteration: 379820
loss: 1.0184080600738525,grad_norm: 0.7495067446367868, iteration: 379821
loss: 1.1563652753829956,grad_norm: 0.9999998428436694, iteration: 379822
loss: 1.0789865255355835,grad_norm: 0.8048983071096308, iteration: 379823
loss: 1.0035840272903442,grad_norm: 0.9999993375588169, iteration: 379824
loss: 0.9782477617263794,grad_norm: 0.8534520189449438, iteration: 379825
loss: 1.1521191596984863,grad_norm: 0.9999998107855789, iteration: 379826
loss: 1.0951128005981445,grad_norm: 0.9999992762464776, iteration: 379827
loss: 1.10832941532135,grad_norm: 0.9999990914640389, iteration: 379828
loss: 1.0048270225524902,grad_norm: 0.9999996779522451, iteration: 379829
loss: 1.068810224533081,grad_norm: 0.7657569435932853, iteration: 379830
loss: 1.0227941274642944,grad_norm: 0.8597180240422978, iteration: 379831
loss: 1.150191307067871,grad_norm: 0.9999996962527676, iteration: 379832
loss: 0.9757025241851807,grad_norm: 0.8138409771582105, iteration: 379833
loss: 0.9731029272079468,grad_norm: 0.853953633502046, iteration: 379834
loss: 1.0337045192718506,grad_norm: 0.999999186719489, iteration: 379835
loss: 1.0046168565750122,grad_norm: 0.8858186554171955, iteration: 379836
loss: 1.018538475036621,grad_norm: 0.7664286299529234, iteration: 379837
loss: 1.057964563369751,grad_norm: 0.9999995238304763, iteration: 379838
loss: 1.1545664072036743,grad_norm: 0.9999990614959803, iteration: 379839
loss: 0.971202552318573,grad_norm: 0.6785565301646748, iteration: 379840
loss: 1.0603610277175903,grad_norm: 0.8026864637491757, iteration: 379841
loss: 1.0418792963027954,grad_norm: 0.9999992944056283, iteration: 379842
loss: 1.1061843633651733,grad_norm: 0.9999998649134857, iteration: 379843
loss: 1.0772507190704346,grad_norm: 0.99999936257494, iteration: 379844
loss: 0.9759429693222046,grad_norm: 0.7115745889196758, iteration: 379845
loss: 1.0796546936035156,grad_norm: 0.9999989878383578, iteration: 379846
loss: 0.9951856136322021,grad_norm: 0.9999994270882021, iteration: 379847
loss: 1.012635350227356,grad_norm: 0.9591895456229814, iteration: 379848
loss: 1.020039439201355,grad_norm: 0.9999991621164455, iteration: 379849
loss: 0.978125274181366,grad_norm: 0.8179221586647549, iteration: 379850
loss: 1.0393781661987305,grad_norm: 0.8126056945256708, iteration: 379851
loss: 1.0598210096359253,grad_norm: 0.9999993627075012, iteration: 379852
loss: 0.9910607933998108,grad_norm: 0.9229149598100982, iteration: 379853
loss: 1.1106253862380981,grad_norm: 0.9999994025158228, iteration: 379854
loss: 1.0864394903182983,grad_norm: 0.9999993791682938, iteration: 379855
loss: 1.0154083967208862,grad_norm: 0.9999992998019636, iteration: 379856
loss: 1.047959804534912,grad_norm: 0.7368877553695108, iteration: 379857
loss: 1.0272186994552612,grad_norm: 0.8376113789173392, iteration: 379858
loss: 1.0230027437210083,grad_norm: 0.6675380477130883, iteration: 379859
loss: 0.995225727558136,grad_norm: 0.7417594405933207, iteration: 379860
loss: 0.971250593662262,grad_norm: 0.8943379360828604, iteration: 379861
loss: 1.0384235382080078,grad_norm: 0.801373823548874, iteration: 379862
loss: 1.10269296169281,grad_norm: 0.9550182092012245, iteration: 379863
loss: 1.0266929864883423,grad_norm: 0.8506707745296562, iteration: 379864
loss: 1.044966459274292,grad_norm: 0.8032786757504793, iteration: 379865
loss: 1.0265412330627441,grad_norm: 0.9999992681179171, iteration: 379866
loss: 1.0374231338500977,grad_norm: 0.8961167220977249, iteration: 379867
loss: 0.972283661365509,grad_norm: 0.7242075810753151, iteration: 379868
loss: 0.991192638874054,grad_norm: 0.9999991784193859, iteration: 379869
loss: 1.0289568901062012,grad_norm: 0.9455272568939439, iteration: 379870
loss: 1.0081948041915894,grad_norm: 0.7048113725611246, iteration: 379871
loss: 1.041927456855774,grad_norm: 0.8184824140679553, iteration: 379872
loss: 1.0613927841186523,grad_norm: 0.9510793464161278, iteration: 379873
loss: 1.0911084413528442,grad_norm: 0.9999992634928127, iteration: 379874
loss: 1.0008636713027954,grad_norm: 0.9999995549246251, iteration: 379875
loss: 1.0141364336013794,grad_norm: 0.9532806417127343, iteration: 379876
loss: 1.2078521251678467,grad_norm: 0.9999993352021364, iteration: 379877
loss: 1.0248031616210938,grad_norm: 0.8463083606021229, iteration: 379878
loss: 1.0139710903167725,grad_norm: 0.9999999907766581, iteration: 379879
loss: 1.105833649635315,grad_norm: 0.9999998979644573, iteration: 379880
loss: 1.17396080493927,grad_norm: 0.9999999313881869, iteration: 379881
loss: 0.9966118335723877,grad_norm: 0.7276085021902182, iteration: 379882
loss: 1.0402687788009644,grad_norm: 0.9999998554708104, iteration: 379883
loss: 1.026479721069336,grad_norm: 0.8028915280490961, iteration: 379884
loss: 1.068116545677185,grad_norm: 0.9999997287132071, iteration: 379885
loss: 0.9860082268714905,grad_norm: 0.7806566712669586, iteration: 379886
loss: 1.0996016263961792,grad_norm: 0.9999999295192953, iteration: 379887
loss: 1.001973271369934,grad_norm: 0.8212048381080491, iteration: 379888
loss: 1.1444308757781982,grad_norm: 0.9999996206514751, iteration: 379889
loss: 1.1011369228363037,grad_norm: 0.9999997966395816, iteration: 379890
loss: 1.0509787797927856,grad_norm: 0.9577359170119141, iteration: 379891
loss: 1.039116382598877,grad_norm: 0.9109181216654896, iteration: 379892
loss: 1.0312871932983398,grad_norm: 0.9999993386015847, iteration: 379893
loss: 1.0377854108810425,grad_norm: 0.788016886385534, iteration: 379894
loss: 1.088794469833374,grad_norm: 0.9999997848731067, iteration: 379895
loss: 0.9680845737457275,grad_norm: 0.7720739840142876, iteration: 379896
loss: 1.0195010900497437,grad_norm: 0.8440529597417564, iteration: 379897
loss: 1.0303277969360352,grad_norm: 0.6778365890381012, iteration: 379898
loss: 0.9923263192176819,grad_norm: 0.8383078016706672, iteration: 379899
loss: 1.0236278772354126,grad_norm: 0.9999991706361356, iteration: 379900
loss: 1.0829355716705322,grad_norm: 0.9999990076889509, iteration: 379901
loss: 1.0223443508148193,grad_norm: 0.7487049468995599, iteration: 379902
loss: 0.9931617379188538,grad_norm: 0.902531459410524, iteration: 379903
loss: 0.9812609553337097,grad_norm: 0.9999996211744726, iteration: 379904
loss: 1.0753016471862793,grad_norm: 0.999999198899479, iteration: 379905
loss: 0.9914656281471252,grad_norm: 0.8634770295018231, iteration: 379906
loss: 1.0237306356430054,grad_norm: 0.8700135686350822, iteration: 379907
loss: 0.985941469669342,grad_norm: 0.750475917695011, iteration: 379908
loss: 0.9919095039367676,grad_norm: 0.9999991533948155, iteration: 379909
loss: 0.9871903657913208,grad_norm: 0.9020880235120253, iteration: 379910
loss: 1.05829918384552,grad_norm: 0.861906003268926, iteration: 379911
loss: 0.9605292081832886,grad_norm: 0.7981969321987928, iteration: 379912
loss: 1.0358505249023438,grad_norm: 0.7289558444707115, iteration: 379913
loss: 1.0686219930648804,grad_norm: 1.0000000171125896, iteration: 379914
loss: 0.9910418391227722,grad_norm: 0.9685505147907671, iteration: 379915
loss: 0.9652755260467529,grad_norm: 0.7178078751450344, iteration: 379916
loss: 1.1152293682098389,grad_norm: 0.9014812261864157, iteration: 379917
loss: 0.9976410865783691,grad_norm: 0.8451744625253741, iteration: 379918
loss: 0.9786827564239502,grad_norm: 0.8392343175735043, iteration: 379919
loss: 1.0019347667694092,grad_norm: 0.9999992647131634, iteration: 379920
loss: 1.0139012336730957,grad_norm: 0.9999990666283816, iteration: 379921
loss: 1.0097774267196655,grad_norm: 0.9999992654590181, iteration: 379922
loss: 1.03690767288208,grad_norm: 0.9999992976671263, iteration: 379923
loss: 1.1072078943252563,grad_norm: 0.9999995203937323, iteration: 379924
loss: 1.2138721942901611,grad_norm: 0.9999998668023509, iteration: 379925
loss: 1.0005744695663452,grad_norm: 0.8017922355943661, iteration: 379926
loss: 0.9723748564720154,grad_norm: 0.8101152408538767, iteration: 379927
loss: 1.0067994594573975,grad_norm: 0.7853243628955243, iteration: 379928
loss: 1.0042699575424194,grad_norm: 0.9001233402577131, iteration: 379929
loss: 1.0430148839950562,grad_norm: 0.8945111160818601, iteration: 379930
loss: 0.9735513925552368,grad_norm: 0.8350716957484027, iteration: 379931
loss: 1.058356523513794,grad_norm: 0.9076811853504638, iteration: 379932
loss: 0.9807971715927124,grad_norm: 0.8484989524194976, iteration: 379933
loss: 1.0186434984207153,grad_norm: 0.9165942865721437, iteration: 379934
loss: 1.0664913654327393,grad_norm: 0.9999990637214606, iteration: 379935
loss: 1.0847492218017578,grad_norm: 0.8165394651888926, iteration: 379936
loss: 1.0031899213790894,grad_norm: 0.7970889684679915, iteration: 379937
loss: 1.029083251953125,grad_norm: 0.9398765796004155, iteration: 379938
loss: 0.9679475426673889,grad_norm: 0.8818778685627228, iteration: 379939
loss: 1.0338364839553833,grad_norm: 0.7931244993261238, iteration: 379940
loss: 1.00018310546875,grad_norm: 0.8014764645705611, iteration: 379941
loss: 1.0366300344467163,grad_norm: 0.7910678316535303, iteration: 379942
loss: 0.9902896881103516,grad_norm: 0.93166145147669, iteration: 379943
loss: 1.1426390409469604,grad_norm: 0.9999996489675212, iteration: 379944
loss: 1.4004693031311035,grad_norm: 0.9999997540351078, iteration: 379945
loss: 1.0463488101959229,grad_norm: 0.9049564313692824, iteration: 379946
loss: 1.127604603767395,grad_norm: 0.9999998478743838, iteration: 379947
loss: 1.1234791278839111,grad_norm: 0.9999994587227854, iteration: 379948
loss: 0.9788092970848083,grad_norm: 0.7585740646789333, iteration: 379949
loss: 1.0666007995605469,grad_norm: 0.7728429244463939, iteration: 379950
loss: 1.0447708368301392,grad_norm: 0.7868793489530777, iteration: 379951
loss: 1.0405303239822388,grad_norm: 0.8914575846798986, iteration: 379952
loss: 1.054579734802246,grad_norm: 0.9999990772210222, iteration: 379953
loss: 1.034226894378662,grad_norm: 0.9999992657818594, iteration: 379954
loss: 1.1638706922531128,grad_norm: 0.9999998520797437, iteration: 379955
loss: 1.042220115661621,grad_norm: 0.8663442040546244, iteration: 379956
loss: 1.0122205018997192,grad_norm: 0.7938933963353183, iteration: 379957
loss: 0.9670497179031372,grad_norm: 0.8330148065602918, iteration: 379958
loss: 0.9883037805557251,grad_norm: 0.9514765752271643, iteration: 379959
loss: 1.028823971748352,grad_norm: 0.9999998956989582, iteration: 379960
loss: 1.0283817052841187,grad_norm: 0.9540388320522346, iteration: 379961
loss: 1.256569743156433,grad_norm: 0.9999993555166873, iteration: 379962
loss: 1.0812667608261108,grad_norm: 0.9999997581548954, iteration: 379963
loss: 1.0573344230651855,grad_norm: 0.9728777386741709, iteration: 379964
loss: 1.0130852460861206,grad_norm: 0.7546744843309091, iteration: 379965
loss: 0.9993452429771423,grad_norm: 0.716968828494509, iteration: 379966
loss: 0.9974222183227539,grad_norm: 0.6650985958555781, iteration: 379967
loss: 0.9886276721954346,grad_norm: 0.7019210603027108, iteration: 379968
loss: 1.0721052885055542,grad_norm: 0.9999996641859689, iteration: 379969
loss: 1.2806369066238403,grad_norm: 0.9999997603994026, iteration: 379970
loss: 1.032579779624939,grad_norm: 0.8304521945924029, iteration: 379971
loss: 1.0022532939910889,grad_norm: 0.8744531226717892, iteration: 379972
loss: 1.0667439699172974,grad_norm: 0.999999778102317, iteration: 379973
loss: 1.0129334926605225,grad_norm: 0.8327243493328422, iteration: 379974
loss: 0.997815728187561,grad_norm: 0.9999994912191968, iteration: 379975
loss: 1.0174909830093384,grad_norm: 0.7466457833396495, iteration: 379976
loss: 1.0146379470825195,grad_norm: 0.9999991600468829, iteration: 379977
loss: 1.05430006980896,grad_norm: 0.8729129747140328, iteration: 379978
loss: 0.9973275661468506,grad_norm: 0.8172082619994041, iteration: 379979
loss: 0.9583978652954102,grad_norm: 0.7963793014209706, iteration: 379980
loss: 1.0060992240905762,grad_norm: 0.8411098089946275, iteration: 379981
loss: 1.0043666362762451,grad_norm: 0.9999992805707829, iteration: 379982
loss: 1.0713882446289062,grad_norm: 0.8534726897841787, iteration: 379983
loss: 1.0016552209854126,grad_norm: 0.8795779935965743, iteration: 379984
loss: 1.040867567062378,grad_norm: 0.8114388249816759, iteration: 379985
loss: 1.0827070474624634,grad_norm: 0.9999994735859763, iteration: 379986
loss: 1.0304045677185059,grad_norm: 0.8323770810965525, iteration: 379987
loss: 0.999073326587677,grad_norm: 0.8342619667864218, iteration: 379988
loss: 0.979168713092804,grad_norm: 0.8327634303389795, iteration: 379989
loss: 1.010589838027954,grad_norm: 0.9999993735834651, iteration: 379990
loss: 0.9977543354034424,grad_norm: 0.9716650044204423, iteration: 379991
loss: 1.0370765924453735,grad_norm: 0.7413382381709936, iteration: 379992
loss: 1.0119030475616455,grad_norm: 0.8107076270469219, iteration: 379993
loss: 1.057500958442688,grad_norm: 0.9999996348955476, iteration: 379994
loss: 1.06955087184906,grad_norm: 0.9765733775236176, iteration: 379995
loss: 1.0180584192276,grad_norm: 0.935982091621443, iteration: 379996
loss: 1.0049041509628296,grad_norm: 0.9999995626184608, iteration: 379997
loss: 1.023934006690979,grad_norm: 0.9999994871086214, iteration: 379998
loss: 1.0387109518051147,grad_norm: 0.9999998055908972, iteration: 379999
loss: 0.9968849420547485,grad_norm: 0.7140071352950994, iteration: 380000
Evaluating at step 380000
{'val': 1.016446776688099, 'test': 1.9592914196629874}
loss: 0.985372006893158,grad_norm: 0.6607343294682565, iteration: 380001
loss: 0.9803137183189392,grad_norm: 0.81672316784637, iteration: 380002
loss: 1.0189727544784546,grad_norm: 0.9528852965712225, iteration: 380003
loss: 1.0103923082351685,grad_norm: 0.8725311853917979, iteration: 380004
loss: 0.9799416661262512,grad_norm: 0.8457206606604615, iteration: 380005
loss: 0.9631481766700745,grad_norm: 0.8245066107023066, iteration: 380006
loss: 1.0202291011810303,grad_norm: 0.9102708379313378, iteration: 380007
loss: 0.9914553761482239,grad_norm: 0.7699280496247898, iteration: 380008
loss: 1.0041199922561646,grad_norm: 0.7382705829229516, iteration: 380009
loss: 1.005213975906372,grad_norm: 0.8315854929979407, iteration: 380010
loss: 0.9953645467758179,grad_norm: 0.7819301672969108, iteration: 380011
loss: 0.9948164224624634,grad_norm: 0.7629352086203073, iteration: 380012
loss: 1.020521879196167,grad_norm: 0.908356802040872, iteration: 380013
loss: 1.0337491035461426,grad_norm: 0.9999992744777437, iteration: 380014
loss: 0.9707729816436768,grad_norm: 0.8235620402808089, iteration: 380015
loss: 0.951626718044281,grad_norm: 0.8126270188235093, iteration: 380016
loss: 1.0183286666870117,grad_norm: 0.8968888596107011, iteration: 380017
loss: 1.0152246952056885,grad_norm: 0.9298476660246027, iteration: 380018
loss: 1.0188053846359253,grad_norm: 0.8685438173273023, iteration: 380019
loss: 1.0092439651489258,grad_norm: 0.6383461673354797, iteration: 380020
loss: 0.9953190088272095,grad_norm: 0.8015015609347692, iteration: 380021
loss: 1.0039128065109253,grad_norm: 0.8367718832291061, iteration: 380022
loss: 0.9765304327011108,grad_norm: 0.8445826998056186, iteration: 380023
loss: 1.30803644657135,grad_norm: 0.9999991067770999, iteration: 380024
loss: 1.016811490058899,grad_norm: 0.9999992148721372, iteration: 380025
loss: 1.0151463747024536,grad_norm: 0.8740241967937707, iteration: 380026
loss: 0.999449610710144,grad_norm: 0.9999994191780343, iteration: 380027
loss: 1.0576306581497192,grad_norm: 0.9999992457494025, iteration: 380028
loss: 1.006779432296753,grad_norm: 0.6912173678009551, iteration: 380029
loss: 1.0046504735946655,grad_norm: 0.7614232971453898, iteration: 380030
loss: 0.9996454119682312,grad_norm: 0.8142579647425989, iteration: 380031
loss: 1.0967395305633545,grad_norm: 0.8663979012540841, iteration: 380032
loss: 0.9851319193840027,grad_norm: 0.8012954655358392, iteration: 380033
loss: 1.098991870880127,grad_norm: 0.9999992959238588, iteration: 380034
loss: 1.0340274572372437,grad_norm: 0.999999230687026, iteration: 380035
loss: 0.9691463708877563,grad_norm: 0.8028068073119122, iteration: 380036
loss: 0.9902583360671997,grad_norm: 0.7677469773104514, iteration: 380037
loss: 1.0103232860565186,grad_norm: 0.7272692094526635, iteration: 380038
loss: 1.0078524351119995,grad_norm: 0.877706219470019, iteration: 380039
loss: 0.996526837348938,grad_norm: 0.843012024069564, iteration: 380040
loss: 1.018664836883545,grad_norm: 0.9999992035092176, iteration: 380041
loss: 1.0836068391799927,grad_norm: 0.9999996852564194, iteration: 380042
loss: 1.174714207649231,grad_norm: 0.99999943388411, iteration: 380043
loss: 0.9681573510169983,grad_norm: 0.835867258743153, iteration: 380044
loss: 0.9869517683982849,grad_norm: 0.8626626617978118, iteration: 380045
loss: 1.013328194618225,grad_norm: 0.9999991873542935, iteration: 380046
loss: 1.1295256614685059,grad_norm: 0.9999992767589443, iteration: 380047
loss: 1.0359702110290527,grad_norm: 0.999999367799023, iteration: 380048
loss: 1.0268332958221436,grad_norm: 0.9999995773540339, iteration: 380049
loss: 1.0148651599884033,grad_norm: 0.9999996058582946, iteration: 380050
loss: 1.0410008430480957,grad_norm: 0.7636492334381545, iteration: 380051
loss: 0.9640138745307922,grad_norm: 0.9387624466998936, iteration: 380052
loss: 1.1237553358078003,grad_norm: 0.9999992090148594, iteration: 380053
loss: 1.0176573991775513,grad_norm: 0.7000125306635167, iteration: 380054
loss: 0.9831085801124573,grad_norm: 0.8086922325691218, iteration: 380055
loss: 1.015225887298584,grad_norm: 0.759532301457908, iteration: 380056
loss: 1.3377171754837036,grad_norm: 0.999999297413898, iteration: 380057
loss: 1.0057538747787476,grad_norm: 0.9146149513488554, iteration: 380058
loss: 1.0320035219192505,grad_norm: 0.7609309707421369, iteration: 380059
loss: 0.9899206161499023,grad_norm: 0.7997617509979895, iteration: 380060
loss: 1.0891504287719727,grad_norm: 0.9999993268454613, iteration: 380061
loss: 1.0256330966949463,grad_norm: 0.9999990994133106, iteration: 380062
loss: 0.9951549172401428,grad_norm: 0.8491213475844848, iteration: 380063
loss: 0.9975209832191467,grad_norm: 0.8608516722032157, iteration: 380064
loss: 1.0411015748977661,grad_norm: 0.7869466273255059, iteration: 380065
loss: 0.9965792298316956,grad_norm: 0.8118178957228052, iteration: 380066
loss: 1.0135326385498047,grad_norm: 0.9999992618357066, iteration: 380067
loss: 0.9760147333145142,grad_norm: 0.9988436259901866, iteration: 380068
loss: 0.9654432535171509,grad_norm: 0.9999991122111335, iteration: 380069
loss: 1.0164872407913208,grad_norm: 0.9254601992235977, iteration: 380070
loss: 0.9869282245635986,grad_norm: 0.7724249828600936, iteration: 380071
loss: 1.0874873399734497,grad_norm: 0.9999995174053603, iteration: 380072
loss: 1.0065196752548218,grad_norm: 0.9999992904579024, iteration: 380073
loss: 1.019047498703003,grad_norm: 0.8940408057410778, iteration: 380074
loss: 1.0688464641571045,grad_norm: 0.9177296924596898, iteration: 380075
loss: 1.0276858806610107,grad_norm: 0.9442790689822654, iteration: 380076
loss: 0.9877219796180725,grad_norm: 0.9999991940526208, iteration: 380077
loss: 1.009560465812683,grad_norm: 0.9999990430636152, iteration: 380078
loss: 0.9927014112472534,grad_norm: 0.9999991609719912, iteration: 380079
loss: 1.0389450788497925,grad_norm: 0.8064933265206142, iteration: 380080
loss: 0.9507104158401489,grad_norm: 0.8205314372566024, iteration: 380081
loss: 0.9827244877815247,grad_norm: 0.7690618315986135, iteration: 380082
loss: 1.0173553228378296,grad_norm: 0.9999994382350711, iteration: 380083
loss: 0.9825114607810974,grad_norm: 0.8518188465083685, iteration: 380084
loss: 1.0661265850067139,grad_norm: 1.0000000002960951, iteration: 380085
loss: 1.0038243532180786,grad_norm: 0.6693926049996143, iteration: 380086
loss: 1.0296835899353027,grad_norm: 0.8440486261439079, iteration: 380087
loss: 1.0038745403289795,grad_norm: 0.9144859308050872, iteration: 380088
loss: 1.0124642848968506,grad_norm: 0.8254019656792201, iteration: 380089
loss: 1.0181727409362793,grad_norm: 0.999999227888396, iteration: 380090
loss: 0.9922800064086914,grad_norm: 0.7954962023997303, iteration: 380091
loss: 0.9960753917694092,grad_norm: 0.7284077610086214, iteration: 380092
loss: 1.0107611417770386,grad_norm: 0.8078463259019172, iteration: 380093
loss: 1.0275593996047974,grad_norm: 0.999999044856228, iteration: 380094
loss: 1.0597550868988037,grad_norm: 0.9999996251425011, iteration: 380095
loss: 1.0681655406951904,grad_norm: 0.8803093740254943, iteration: 380096
loss: 1.1452587842941284,grad_norm: 0.9999997426108298, iteration: 380097
loss: 1.0396660566329956,grad_norm: 0.9999995105946582, iteration: 380098
loss: 1.0776454210281372,grad_norm: 0.999999095167832, iteration: 380099
loss: 1.0443801879882812,grad_norm: 0.999999630365697, iteration: 380100
loss: 0.9889412522315979,grad_norm: 0.8776409175394555, iteration: 380101
loss: 1.0186573266983032,grad_norm: 0.8268377851613836, iteration: 380102
loss: 1.022190809249878,grad_norm: 0.8679846971232319, iteration: 380103
loss: 0.9975194334983826,grad_norm: 0.7716625154051885, iteration: 380104
loss: 0.996142566204071,grad_norm: 0.5892177507114092, iteration: 380105
loss: 0.9968080520629883,grad_norm: 0.8697717770350796, iteration: 380106
loss: 1.0191093683242798,grad_norm: 0.8529151866264346, iteration: 380107
loss: 1.0526498556137085,grad_norm: 0.9993402847115174, iteration: 380108
loss: 0.9907272458076477,grad_norm: 0.8723765043564106, iteration: 380109
loss: 0.964302659034729,grad_norm: 0.6974929745155432, iteration: 380110
loss: 1.0022145509719849,grad_norm: 0.8624049372187377, iteration: 380111
loss: 0.9976715445518494,grad_norm: 0.7950245303162616, iteration: 380112
loss: 1.0202045440673828,grad_norm: 0.806019781291312, iteration: 380113
loss: 1.0066704750061035,grad_norm: 0.9999991890605473, iteration: 380114
loss: 0.9903219938278198,grad_norm: 0.9999992568082391, iteration: 380115
loss: 1.0044459104537964,grad_norm: 0.7721275706666585, iteration: 380116
loss: 1.0019062757492065,grad_norm: 0.7884239114715592, iteration: 380117
loss: 1.0478790998458862,grad_norm: 0.8075425490467779, iteration: 380118
loss: 0.9547210931777954,grad_norm: 0.8057279063539136, iteration: 380119
loss: 0.993349015712738,grad_norm: 0.999999089021225, iteration: 380120
loss: 1.0712910890579224,grad_norm: 0.9999990724333985, iteration: 380121
loss: 0.9974163174629211,grad_norm: 0.6753813977975284, iteration: 380122
loss: 1.0183091163635254,grad_norm: 0.9999995722915263, iteration: 380123
loss: 0.9916767477989197,grad_norm: 0.8093146733766572, iteration: 380124
loss: 0.9735007286071777,grad_norm: 0.8514485080742671, iteration: 380125
loss: 1.0054491758346558,grad_norm: 0.7251006204759314, iteration: 380126
loss: 0.9853304624557495,grad_norm: 0.7190838502854237, iteration: 380127
loss: 1.0284093618392944,grad_norm: 0.9999991217694926, iteration: 380128
loss: 0.9767242670059204,grad_norm: 0.9522758656996093, iteration: 380129
loss: 1.0218093395233154,grad_norm: 0.8700260306301849, iteration: 380130
loss: 0.9630687236785889,grad_norm: 0.7917214959540461, iteration: 380131
loss: 1.0280721187591553,grad_norm: 0.7323376144337541, iteration: 380132
loss: 0.9938035607337952,grad_norm: 0.9034555285066013, iteration: 380133
loss: 0.9861537218093872,grad_norm: 0.8164993899406443, iteration: 380134
loss: 0.998646080493927,grad_norm: 0.7600493674367343, iteration: 380135
loss: 0.9842509031295776,grad_norm: 0.9999997439146435, iteration: 380136
loss: 1.0103076696395874,grad_norm: 0.795814129356154, iteration: 380137
loss: 1.0834357738494873,grad_norm: 0.8197781728157747, iteration: 380138
loss: 0.9859301447868347,grad_norm: 0.9611500225809642, iteration: 380139
loss: 1.0138378143310547,grad_norm: 0.7952869682732521, iteration: 380140
loss: 1.0430703163146973,grad_norm: 0.8842023211689198, iteration: 380141
loss: 1.0243051052093506,grad_norm: 0.815061967026402, iteration: 380142
loss: 1.2152739763259888,grad_norm: 0.8808003900038488, iteration: 380143
loss: 1.0380759239196777,grad_norm: 0.9999992346933194, iteration: 380144
loss: 0.9904719591140747,grad_norm: 0.7709331972863787, iteration: 380145
loss: 0.9845632910728455,grad_norm: 0.9999992491816277, iteration: 380146
loss: 0.9930436611175537,grad_norm: 0.8825656751972647, iteration: 380147
loss: 1.0364773273468018,grad_norm: 0.9999996232779168, iteration: 380148
loss: 0.9896637201309204,grad_norm: 0.8175777302748463, iteration: 380149
loss: 0.9769220948219299,grad_norm: 0.7570029143101412, iteration: 380150
loss: 0.9684429168701172,grad_norm: 0.711297221672318, iteration: 380151
loss: 1.1380870342254639,grad_norm: 0.9999999785468859, iteration: 380152
loss: 1.043397307395935,grad_norm: 0.8054183200538727, iteration: 380153
loss: 1.021937370300293,grad_norm: 0.842732279766194, iteration: 380154
loss: 0.9986491203308105,grad_norm: 0.8542707075717724, iteration: 380155
loss: 1.052169680595398,grad_norm: 0.999999602504495, iteration: 380156
loss: 1.0082999467849731,grad_norm: 0.9239680386792525, iteration: 380157
loss: 0.9968047738075256,grad_norm: 0.6960977044167281, iteration: 380158
loss: 1.0716753005981445,grad_norm: 0.9999994138107106, iteration: 380159
loss: 1.0443503856658936,grad_norm: 0.9184617596745344, iteration: 380160
loss: 0.9859934449195862,grad_norm: 0.7397913812263822, iteration: 380161
loss: 1.017952561378479,grad_norm: 0.7401427469226376, iteration: 380162
loss: 1.0124404430389404,grad_norm: 0.7932609997157218, iteration: 380163
loss: 1.0023692846298218,grad_norm: 0.7423851593623838, iteration: 380164
loss: 1.0090620517730713,grad_norm: 0.8414877483724307, iteration: 380165
loss: 1.0499812364578247,grad_norm: 0.9293645950579712, iteration: 380166
loss: 1.0505163669586182,grad_norm: 0.9083903366039691, iteration: 380167
loss: 0.976446270942688,grad_norm: 0.6834422823059919, iteration: 380168
loss: 0.9757299423217773,grad_norm: 0.8480940783323537, iteration: 380169
loss: 1.0499223470687866,grad_norm: 0.9999990710276555, iteration: 380170
loss: 0.9480133056640625,grad_norm: 0.8754516343759705, iteration: 380171
loss: 0.9631555676460266,grad_norm: 0.8223401724900755, iteration: 380172
loss: 1.0592529773712158,grad_norm: 0.8281798048945873, iteration: 380173
loss: 1.0280152559280396,grad_norm: 0.7684944679493191, iteration: 380174
loss: 1.0154881477355957,grad_norm: 0.8124621231390937, iteration: 380175
loss: 0.9842494130134583,grad_norm: 0.999999058605039, iteration: 380176
loss: 0.962212860584259,grad_norm: 0.8298961378851597, iteration: 380177
loss: 0.9609301686286926,grad_norm: 0.6929079113570671, iteration: 380178
loss: 0.9913671612739563,grad_norm: 0.7485582201107615, iteration: 380179
loss: 0.9893550276756287,grad_norm: 0.7793870835679527, iteration: 380180
loss: 1.0513030290603638,grad_norm: 0.8468450438837246, iteration: 380181
loss: 1.01089608669281,grad_norm: 0.9999991707222087, iteration: 380182
loss: 0.9991846680641174,grad_norm: 0.9999993358277518, iteration: 380183
loss: 1.0020992755889893,grad_norm: 0.8494165688236898, iteration: 380184
loss: 1.0454216003417969,grad_norm: 0.8565007243747432, iteration: 380185
loss: 0.9756222367286682,grad_norm: 0.8908744873514615, iteration: 380186
loss: 0.9848664402961731,grad_norm: 0.9999992150135386, iteration: 380187
loss: 0.9719268083572388,grad_norm: 0.666163200364423, iteration: 380188
loss: 0.9588599801063538,grad_norm: 0.7379395494511886, iteration: 380189
loss: 1.0393997430801392,grad_norm: 0.7262267691014301, iteration: 380190
loss: 0.9994268417358398,grad_norm: 0.7863593719992266, iteration: 380191
loss: 1.0118168592453003,grad_norm: 0.816069288282376, iteration: 380192
loss: 0.978533923625946,grad_norm: 0.7624695190232601, iteration: 380193
loss: 1.0625991821289062,grad_norm: 0.9999997797230527, iteration: 380194
loss: 0.9901695847511292,grad_norm: 0.7556385476279298, iteration: 380195
loss: 1.054229497909546,grad_norm: 0.9999994114457019, iteration: 380196
loss: 1.0092096328735352,grad_norm: 0.967221451431844, iteration: 380197
loss: 0.9944443106651306,grad_norm: 0.8592635305737117, iteration: 380198
loss: 1.0208858251571655,grad_norm: 0.7286907065143391, iteration: 380199
loss: 0.9916806221008301,grad_norm: 0.8073853208788441, iteration: 380200
loss: 1.0333950519561768,grad_norm: 0.7217897935504559, iteration: 380201
loss: 0.9847385883331299,grad_norm: 0.7069023796537706, iteration: 380202
loss: 1.004897952079773,grad_norm: 0.9487841755681984, iteration: 380203
loss: 1.0226038694381714,grad_norm: 0.7253358994236877, iteration: 380204
loss: 1.0343655347824097,grad_norm: 0.7345084785179896, iteration: 380205
loss: 0.9778783917427063,grad_norm: 0.8792579023891509, iteration: 380206
loss: 0.9831311702728271,grad_norm: 0.9999990077018768, iteration: 380207
loss: 1.0191620588302612,grad_norm: 0.7052094269557639, iteration: 380208
loss: 1.0355987548828125,grad_norm: 0.9766104906734464, iteration: 380209
loss: 1.0859627723693848,grad_norm: 0.9999994069531795, iteration: 380210
loss: 0.9942317605018616,grad_norm: 0.9761335605606699, iteration: 380211
loss: 0.9896708130836487,grad_norm: 0.7798946688166278, iteration: 380212
loss: 1.0316224098205566,grad_norm: 0.7393500249787675, iteration: 380213
loss: 0.9950923919677734,grad_norm: 0.7425630423911411, iteration: 380214
loss: 1.0403008460998535,grad_norm: 0.9999991327699628, iteration: 380215
loss: 1.0384278297424316,grad_norm: 0.8775935239661167, iteration: 380216
loss: 1.048529028892517,grad_norm: 0.867835954245638, iteration: 380217
loss: 0.9956094026565552,grad_norm: 0.7508571006283584, iteration: 380218
loss: 1.0767580270767212,grad_norm: 0.9999998319626945, iteration: 380219
loss: 1.01034414768219,grad_norm: 0.7983046148031421, iteration: 380220
loss: 0.9982997179031372,grad_norm: 0.9400912565853365, iteration: 380221
loss: 1.0298190116882324,grad_norm: 0.8754808227776932, iteration: 380222
loss: 0.9938066601753235,grad_norm: 0.8943509346134891, iteration: 380223
loss: 1.095268726348877,grad_norm: 0.9999998230536028, iteration: 380224
loss: 1.011991024017334,grad_norm: 0.835395456494791, iteration: 380225
loss: 1.0031179189682007,grad_norm: 0.7448985884776244, iteration: 380226
loss: 0.9843810200691223,grad_norm: 0.7323237598940189, iteration: 380227
loss: 0.981951892375946,grad_norm: 0.9018589906586647, iteration: 380228
loss: 1.0134904384613037,grad_norm: 0.9999993751342847, iteration: 380229
loss: 0.9813675284385681,grad_norm: 0.7907378257598066, iteration: 380230
loss: 0.9873865246772766,grad_norm: 0.7468246188526527, iteration: 380231
loss: 0.9630962610244751,grad_norm: 0.8688038417084579, iteration: 380232
loss: 1.033582329750061,grad_norm: 0.7348522519024807, iteration: 380233
loss: 1.0025492906570435,grad_norm: 0.7999440451004618, iteration: 380234
loss: 1.0156358480453491,grad_norm: 0.9999991698151032, iteration: 380235
loss: 0.9733348488807678,grad_norm: 0.8154633084237152, iteration: 380236
loss: 1.0316035747528076,grad_norm: 0.9999993699741555, iteration: 380237
loss: 0.9876168370246887,grad_norm: 0.6995839887278452, iteration: 380238
loss: 1.012319803237915,grad_norm: 0.7003575856573717, iteration: 380239
loss: 1.0260257720947266,grad_norm: 0.819889789750938, iteration: 380240
loss: 1.0271310806274414,grad_norm: 0.7473274460676018, iteration: 380241
loss: 1.0207977294921875,grad_norm: 0.9999990256904535, iteration: 380242
loss: 0.9984368681907654,grad_norm: 0.7643250278432256, iteration: 380243
loss: 0.994195282459259,grad_norm: 0.9999993225123264, iteration: 380244
loss: 0.9955193400382996,grad_norm: 0.6606533987060311, iteration: 380245
loss: 1.0084222555160522,grad_norm: 0.7053935324371449, iteration: 380246
loss: 1.0399609804153442,grad_norm: 0.9999999418331369, iteration: 380247
loss: 0.9874091148376465,grad_norm: 0.999999214599484, iteration: 380248
loss: 0.976564884185791,grad_norm: 0.8186540665927232, iteration: 380249
loss: 0.9772189855575562,grad_norm: 0.8066735125937129, iteration: 380250
loss: 0.9967710971832275,grad_norm: 0.7420742157002569, iteration: 380251
loss: 1.0006403923034668,grad_norm: 0.7638017005732269, iteration: 380252
loss: 1.0112190246582031,grad_norm: 0.94210962029645, iteration: 380253
loss: 1.0206611156463623,grad_norm: 0.7065230636455558, iteration: 380254
loss: 1.0514812469482422,grad_norm: 0.9318952525517185, iteration: 380255
loss: 0.9937880635261536,grad_norm: 0.9120912510137009, iteration: 380256
loss: 0.962364137172699,grad_norm: 0.7593799360997109, iteration: 380257
loss: 1.0398268699645996,grad_norm: 0.8752869764498025, iteration: 380258
loss: 0.9862714409828186,grad_norm: 0.7958571373818831, iteration: 380259
loss: 1.0084747076034546,grad_norm: 0.7276657800050385, iteration: 380260
loss: 1.0149424076080322,grad_norm: 0.932808617033335, iteration: 380261
loss: 0.9616025686264038,grad_norm: 0.7943876406719311, iteration: 380262
loss: 0.98036789894104,grad_norm: 0.8819667396208943, iteration: 380263
loss: 1.0406008958816528,grad_norm: 0.783967986485949, iteration: 380264
loss: 1.0197479724884033,grad_norm: 0.7687925776337564, iteration: 380265
loss: 1.0605944395065308,grad_norm: 0.9334599613984358, iteration: 380266
loss: 1.0228017568588257,grad_norm: 0.7345241705375241, iteration: 380267
loss: 0.9954894781112671,grad_norm: 0.9999992054264313, iteration: 380268
loss: 1.1009761095046997,grad_norm: 0.9999990275782759, iteration: 380269
loss: 1.0120552778244019,grad_norm: 0.999999202797436, iteration: 380270
loss: 0.9630959033966064,grad_norm: 0.7327740053418142, iteration: 380271
loss: 0.9881757497787476,grad_norm: 0.8199475826930275, iteration: 380272
loss: 1.0236142873764038,grad_norm: 0.9999991660031673, iteration: 380273
loss: 1.1211369037628174,grad_norm: 0.8648161147377136, iteration: 380274
loss: 1.0080417394638062,grad_norm: 0.7965214137538121, iteration: 380275
loss: 1.0095065832138062,grad_norm: 0.8273835083740919, iteration: 380276
loss: 1.009783387184143,grad_norm: 0.9993921959154182, iteration: 380277
loss: 0.9706488847732544,grad_norm: 0.7887302594755644, iteration: 380278
loss: 0.9836965203285217,grad_norm: 0.926742120378409, iteration: 380279
loss: 1.0129516124725342,grad_norm: 0.7293270440003605, iteration: 380280
loss: 1.02360999584198,grad_norm: 0.999999073496219, iteration: 380281
loss: 1.0308504104614258,grad_norm: 0.7889563789658094, iteration: 380282
loss: 1.0786041021347046,grad_norm: 0.8345609860802803, iteration: 380283
loss: 0.988446831703186,grad_norm: 0.7269176843519158, iteration: 380284
loss: 1.004041075706482,grad_norm: 0.7804595972024575, iteration: 380285
loss: 1.035054326057434,grad_norm: 0.8697041592877486, iteration: 380286
loss: 1.00497305393219,grad_norm: 0.9999999041898426, iteration: 380287
loss: 1.0496519804000854,grad_norm: 0.9999990157718026, iteration: 380288
loss: 1.088403582572937,grad_norm: 0.9999999264069663, iteration: 380289
loss: 0.9623014330863953,grad_norm: 0.8255342368211837, iteration: 380290
loss: 0.9987320303916931,grad_norm: 0.8545067371051032, iteration: 380291
loss: 0.9616379141807556,grad_norm: 0.7167279600092844, iteration: 380292
loss: 1.1035841703414917,grad_norm: 0.9477380601963842, iteration: 380293
loss: 0.973335325717926,grad_norm: 0.9999993713191001, iteration: 380294
loss: 1.0040926933288574,grad_norm: 0.8609930605852195, iteration: 380295
loss: 1.01661217212677,grad_norm: 0.9999999573330349, iteration: 380296
loss: 1.0657331943511963,grad_norm: 0.9999998392265436, iteration: 380297
loss: 0.9963133335113525,grad_norm: 0.9188382807984606, iteration: 380298
loss: 0.9927961230278015,grad_norm: 0.7736958725397343, iteration: 380299
loss: 1.0236399173736572,grad_norm: 0.8978768007286012, iteration: 380300
loss: 1.008593201637268,grad_norm: 0.9999998126083864, iteration: 380301
loss: 0.9894639849662781,grad_norm: 0.7333223803218701, iteration: 380302
loss: 1.028355598449707,grad_norm: 0.9833386556884972, iteration: 380303
loss: 0.9893777966499329,grad_norm: 0.7483721501194355, iteration: 380304
loss: 1.0853787660598755,grad_norm: 0.8351822881877073, iteration: 380305
loss: 1.005069375038147,grad_norm: 0.6947526278111156, iteration: 380306
loss: 1.0279234647750854,grad_norm: 0.9999995198697161, iteration: 380307
loss: 1.0646429061889648,grad_norm: 0.9383124026903723, iteration: 380308
loss: 1.0072522163391113,grad_norm: 0.8100634090069342, iteration: 380309
loss: 0.9644472002983093,grad_norm: 0.8456013429268027, iteration: 380310
loss: 0.9564979076385498,grad_norm: 0.7674003375867376, iteration: 380311
loss: 1.0120292901992798,grad_norm: 0.9954013031630711, iteration: 380312
loss: 1.034274935722351,grad_norm: 0.7784235822492124, iteration: 380313
loss: 0.9685972332954407,grad_norm: 0.7503656751753205, iteration: 380314
loss: 1.0696243047714233,grad_norm: 0.9999990875556118, iteration: 380315
loss: 1.0327894687652588,grad_norm: 0.7688171880165516, iteration: 380316
loss: 0.99534672498703,grad_norm: 0.7766260980166917, iteration: 380317
loss: 1.0490018129348755,grad_norm: 0.9999997465491178, iteration: 380318
loss: 0.970180869102478,grad_norm: 0.8253736893912578, iteration: 380319
loss: 0.9860031604766846,grad_norm: 0.8287940792767136, iteration: 380320
loss: 1.0684077739715576,grad_norm: 0.9999995983622214, iteration: 380321
loss: 1.0640000104904175,grad_norm: 0.7823265639978865, iteration: 380322
loss: 1.1160666942596436,grad_norm: 0.9999993943838436, iteration: 380323
loss: 1.0617839097976685,grad_norm: 0.8002114129726925, iteration: 380324
loss: 1.024318814277649,grad_norm: 0.8189855451196396, iteration: 380325
loss: 0.9948141574859619,grad_norm: 0.8149067567111491, iteration: 380326
loss: 0.9910616874694824,grad_norm: 0.7452194488638122, iteration: 380327
loss: 1.0330463647842407,grad_norm: 0.8425885690579002, iteration: 380328
loss: 1.0445860624313354,grad_norm: 0.9208914901494694, iteration: 380329
loss: 1.007006287574768,grad_norm: 0.7039845466790313, iteration: 380330
loss: 1.0273334980010986,grad_norm: 0.9999998817066418, iteration: 380331
loss: 0.988003134727478,grad_norm: 0.8197904675689174, iteration: 380332
loss: 0.9962161779403687,grad_norm: 0.8184652306828278, iteration: 380333
loss: 0.9569872617721558,grad_norm: 0.8080586933060022, iteration: 380334
loss: 1.0719761848449707,grad_norm: 0.9999999896135647, iteration: 380335
loss: 0.9927400946617126,grad_norm: 0.6964106670123738, iteration: 380336
loss: 1.052802324295044,grad_norm: 0.9784194841371215, iteration: 380337
loss: 0.9979869723320007,grad_norm: 0.7156028275229537, iteration: 380338
loss: 1.0099272727966309,grad_norm: 0.8163756965982041, iteration: 380339
loss: 0.9987989664077759,grad_norm: 0.8899449181401188, iteration: 380340
loss: 0.9448173642158508,grad_norm: 0.821538715923988, iteration: 380341
loss: 1.0177079439163208,grad_norm: 0.7509715199047615, iteration: 380342
loss: 0.9756631851196289,grad_norm: 0.862568652038706, iteration: 380343
loss: 1.007844090461731,grad_norm: 0.8083633292412568, iteration: 380344
loss: 1.002268671989441,grad_norm: 0.8829607669854858, iteration: 380345
loss: 1.0020201206207275,grad_norm: 0.9626373313618078, iteration: 380346
loss: 1.0086252689361572,grad_norm: 0.80869353920713, iteration: 380347
loss: 1.0317567586898804,grad_norm: 0.8449053294480051, iteration: 380348
loss: 1.05006742477417,grad_norm: 0.7932556332902393, iteration: 380349
loss: 0.9904580116271973,grad_norm: 0.7204664800901651, iteration: 380350
loss: 0.992591381072998,grad_norm: 0.8172691311983227, iteration: 380351
loss: 0.97362220287323,grad_norm: 0.8173544112259605, iteration: 380352
loss: 0.9947043061256409,grad_norm: 0.9999995830796014, iteration: 380353
loss: 0.9725350737571716,grad_norm: 0.8674924650310817, iteration: 380354
loss: 0.9941652417182922,grad_norm: 0.9385516144703031, iteration: 380355
loss: 1.0145978927612305,grad_norm: 0.7909565614264343, iteration: 380356
loss: 1.043919324874878,grad_norm: 0.7604463624950613, iteration: 380357
loss: 1.0122343301773071,grad_norm: 0.6718398463478513, iteration: 380358
loss: 0.9864165186882019,grad_norm: 0.8308810233493287, iteration: 380359
loss: 1.0017991065979004,grad_norm: 0.8953359155308456, iteration: 380360
loss: 0.9885825514793396,grad_norm: 0.846952011182547, iteration: 380361
loss: 0.9947916865348816,grad_norm: 0.7885162142412326, iteration: 380362
loss: 0.9965224862098694,grad_norm: 0.8254095564143231, iteration: 380363
loss: 0.9936909675598145,grad_norm: 0.8518706445927423, iteration: 380364
loss: 0.9829654097557068,grad_norm: 0.7623491518264642, iteration: 380365
loss: 1.0515162944793701,grad_norm: 0.9999994514450055, iteration: 380366
loss: 1.2342571020126343,grad_norm: 0.9999995363954499, iteration: 380367
loss: 1.006995677947998,grad_norm: 0.6801907485444914, iteration: 380368
loss: 1.0246015787124634,grad_norm: 0.7736222370398513, iteration: 380369
loss: 1.0295438766479492,grad_norm: 0.9999990833999418, iteration: 380370
loss: 0.9260739088058472,grad_norm: 0.8537659393721758, iteration: 380371
loss: 0.9867215156555176,grad_norm: 0.7629774526399656, iteration: 380372
loss: 1.0074533224105835,grad_norm: 0.8497643515498045, iteration: 380373
loss: 1.0436822175979614,grad_norm: 0.9999991478591744, iteration: 380374
loss: 1.0244848728179932,grad_norm: 0.8101348728226729, iteration: 380375
loss: 0.973758339881897,grad_norm: 0.82492094212476, iteration: 380376
loss: 1.023708462715149,grad_norm: 0.8494070063926005, iteration: 380377
loss: 1.001618504524231,grad_norm: 0.9652177546618042, iteration: 380378
loss: 0.970948338508606,grad_norm: 0.9192913549943887, iteration: 380379
loss: 1.0103726387023926,grad_norm: 0.7459953923525577, iteration: 380380
loss: 1.0847159624099731,grad_norm: 0.9999995324039606, iteration: 380381
loss: 1.0097168684005737,grad_norm: 0.9434900401224501, iteration: 380382
loss: 1.0063732862472534,grad_norm: 0.9999996260772994, iteration: 380383
loss: 1.0767369270324707,grad_norm: 0.9999995605978427, iteration: 380384
loss: 0.9604942202568054,grad_norm: 0.9943505482460779, iteration: 380385
loss: 0.9835843443870544,grad_norm: 0.945577565904388, iteration: 380386
loss: 1.0298209190368652,grad_norm: 0.8013187998323821, iteration: 380387
loss: 0.9912921190261841,grad_norm: 0.9999995757016457, iteration: 380388
loss: 0.9740716814994812,grad_norm: 0.7673295699055545, iteration: 380389
loss: 0.9944947957992554,grad_norm: 0.7725902568688771, iteration: 380390
loss: 1.0021721124649048,grad_norm: 0.8944596113352066, iteration: 380391
loss: 1.018988013267517,grad_norm: 0.8588418820726904, iteration: 380392
loss: 0.9879558086395264,grad_norm: 0.7500701021506543, iteration: 380393
loss: 1.0293606519699097,grad_norm: 0.999999964928486, iteration: 380394
loss: 0.970291793346405,grad_norm: 0.7857812443936036, iteration: 380395
loss: 1.0153769254684448,grad_norm: 0.7785032003009588, iteration: 380396
loss: 1.006840467453003,grad_norm: 0.8366933970992583, iteration: 380397
loss: 1.0058258771896362,grad_norm: 0.6784182584273972, iteration: 380398
loss: 1.0012017488479614,grad_norm: 0.7533357612629159, iteration: 380399
loss: 1.0122300386428833,grad_norm: 0.9999992824955228, iteration: 380400
loss: 1.0699071884155273,grad_norm: 1.0000000529720017, iteration: 380401
loss: 0.9745293855667114,grad_norm: 0.927444708369096, iteration: 380402
loss: 0.98387610912323,grad_norm: 0.7195740021253647, iteration: 380403
loss: 1.042805790901184,grad_norm: 0.7919102055053316, iteration: 380404
loss: 1.0552644729614258,grad_norm: 0.9999991875552535, iteration: 380405
loss: 0.9877826571464539,grad_norm: 0.8697006659982701, iteration: 380406
loss: 1.0492134094238281,grad_norm: 0.7913604377006943, iteration: 380407
loss: 1.0329889059066772,grad_norm: 0.7285829201765914, iteration: 380408
loss: 1.001832127571106,grad_norm: 0.6082767905883084, iteration: 380409
loss: 0.9801122546195984,grad_norm: 0.7253880608549267, iteration: 380410
loss: 1.0400866270065308,grad_norm: 0.8675711969728253, iteration: 380411
loss: 0.9832643270492554,grad_norm: 0.7460380205105378, iteration: 380412
loss: 0.9807931780815125,grad_norm: 0.7674524850687087, iteration: 380413
loss: 1.002792477607727,grad_norm: 0.7444816534977158, iteration: 380414
loss: 1.05177640914917,grad_norm: 0.9999998066418139, iteration: 380415
loss: 1.0718227624893188,grad_norm: 0.9889806559966412, iteration: 380416
loss: 0.9998077750205994,grad_norm: 0.8524300349360188, iteration: 380417
loss: 0.9997981190681458,grad_norm: 0.8135367094594862, iteration: 380418
loss: 0.9966500401496887,grad_norm: 0.6709780406876941, iteration: 380419
loss: 0.9783698916435242,grad_norm: 0.8101195643887937, iteration: 380420
loss: 1.0155853033065796,grad_norm: 0.8173276570490009, iteration: 380421
loss: 1.003697156906128,grad_norm: 0.6996731942804552, iteration: 380422
loss: 1.0213334560394287,grad_norm: 0.7928876288823751, iteration: 380423
loss: 1.0446677207946777,grad_norm: 0.9999992347078993, iteration: 380424
loss: 1.0337786674499512,grad_norm: 0.8576364555474913, iteration: 380425
loss: 1.0408941507339478,grad_norm: 0.9999998558797317, iteration: 380426
loss: 1.0606187582015991,grad_norm: 0.7341881206166041, iteration: 380427
loss: 1.122327208518982,grad_norm: 0.9999998224297043, iteration: 380428
loss: 1.0073422193527222,grad_norm: 0.9999990457979143, iteration: 380429
loss: 0.9762591123580933,grad_norm: 0.6892410654193639, iteration: 380430
loss: 0.9804481863975525,grad_norm: 0.9554724707329189, iteration: 380431
loss: 1.3343509435653687,grad_norm: 0.9483866628692382, iteration: 380432
loss: 0.9859881401062012,grad_norm: 0.9999995369528966, iteration: 380433
loss: 1.0033271312713623,grad_norm: 0.9999999832228488, iteration: 380434
loss: 1.0225889682769775,grad_norm: 0.8087197493335594, iteration: 380435
loss: 1.0303765535354614,grad_norm: 0.8090562337685979, iteration: 380436
loss: 1.1560800075531006,grad_norm: 0.9999990998734714, iteration: 380437
loss: 0.994247555732727,grad_norm: 0.7794919302421857, iteration: 380438
loss: 1.1408861875534058,grad_norm: 0.999999532419477, iteration: 380439
loss: 1.0010956525802612,grad_norm: 0.9227708093896181, iteration: 380440
loss: 1.0576891899108887,grad_norm: 0.9658123818773515, iteration: 380441
loss: 0.9902587532997131,grad_norm: 0.7710292433264696, iteration: 380442
loss: 0.9856575131416321,grad_norm: 0.8823730368202226, iteration: 380443
loss: 1.0208390951156616,grad_norm: 0.7996537831454996, iteration: 380444
loss: 1.1288131475448608,grad_norm: 0.9999991757044893, iteration: 380445
loss: 0.9818282723426819,grad_norm: 0.8603524970878138, iteration: 380446
loss: 0.9895864725112915,grad_norm: 0.767427137107118, iteration: 380447
loss: 1.0596835613250732,grad_norm: 0.9013231164052354, iteration: 380448
loss: 0.9673908948898315,grad_norm: 0.8887527769224237, iteration: 380449
loss: 1.0153158903121948,grad_norm: 0.7012138659348688, iteration: 380450
loss: 1.2738529443740845,grad_norm: 0.9999995013887588, iteration: 380451
loss: 1.0566521883010864,grad_norm: 0.7634070179409397, iteration: 380452
loss: 1.013641119003296,grad_norm: 0.9999989700606359, iteration: 380453
loss: 1.0129903554916382,grad_norm: 0.7361141088547991, iteration: 380454
loss: 0.9904741048812866,grad_norm: 0.7005198760282667, iteration: 380455
loss: 0.9786204099655151,grad_norm: 0.7905223975134313, iteration: 380456
loss: 1.0155619382858276,grad_norm: 0.7982560645193353, iteration: 380457
loss: 1.0223629474639893,grad_norm: 0.9999998778274017, iteration: 380458
loss: 1.0288615226745605,grad_norm: 0.8279888161148908, iteration: 380459
loss: 0.9856584668159485,grad_norm: 0.6777582387005474, iteration: 380460
loss: 1.0128732919692993,grad_norm: 0.8059019852773467, iteration: 380461
loss: 0.9554834365844727,grad_norm: 0.8709286839649301, iteration: 380462
loss: 1.0054742097854614,grad_norm: 0.8870768270424948, iteration: 380463
loss: 1.015501618385315,grad_norm: 0.8572820307156676, iteration: 380464
loss: 0.9957460165023804,grad_norm: 0.9733385371160278, iteration: 380465
loss: 1.0163819789886475,grad_norm: 0.7524919361487464, iteration: 380466
loss: 0.9976943731307983,grad_norm: 0.8028777062595827, iteration: 380467
loss: 1.0158910751342773,grad_norm: 0.7192040739038982, iteration: 380468
loss: 1.0155656337738037,grad_norm: 0.8538960633587599, iteration: 380469
loss: 1.0263323783874512,grad_norm: 0.6989403791690987, iteration: 380470
loss: 1.0228813886642456,grad_norm: 0.8311874380972335, iteration: 380471
loss: 1.2312642335891724,grad_norm: 0.9999999394325898, iteration: 380472
loss: 1.0139952898025513,grad_norm: 0.7468437352662757, iteration: 380473
loss: 1.223334789276123,grad_norm: 0.9999997642131476, iteration: 380474
loss: 1.018486738204956,grad_norm: 0.5946943623564578, iteration: 380475
loss: 1.0247300863265991,grad_norm: 0.878490061745288, iteration: 380476
loss: 1.0759729146957397,grad_norm: 0.9999993641715503, iteration: 380477
loss: 1.041010856628418,grad_norm: 0.7382473681714067, iteration: 380478
loss: 1.0591121912002563,grad_norm: 0.9999998799361813, iteration: 380479
loss: 0.9850242733955383,grad_norm: 0.6830390794911404, iteration: 380480
loss: 1.0506302118301392,grad_norm: 0.734547266553289, iteration: 380481
loss: 0.9953711628913879,grad_norm: 0.8986011602314602, iteration: 380482
loss: 0.9910505414009094,grad_norm: 0.9478558928383144, iteration: 380483
loss: 1.2343965768814087,grad_norm: 0.9999997146289686, iteration: 380484
loss: 1.1445282697677612,grad_norm: 0.9999992034783343, iteration: 380485
loss: 1.0165587663650513,grad_norm: 0.7888684926021082, iteration: 380486
loss: 1.024752140045166,grad_norm: 0.7511479812453293, iteration: 380487
loss: 1.0348376035690308,grad_norm: 0.9999992025545895, iteration: 380488
loss: 1.0251778364181519,grad_norm: 0.8930162893180642, iteration: 380489
loss: 1.02566659450531,grad_norm: 0.9999994925937346, iteration: 380490
loss: 0.9926683306694031,grad_norm: 0.8540049100399826, iteration: 380491
loss: 1.0803649425506592,grad_norm: 0.9680367632042759, iteration: 380492
loss: 1.0013456344604492,grad_norm: 0.7719813591976807, iteration: 380493
loss: 1.0731695890426636,grad_norm: 0.9861070011880076, iteration: 380494
loss: 1.2565456628799438,grad_norm: 0.9999991330476317, iteration: 380495
loss: 1.3010386228561401,grad_norm: 0.9999999440268335, iteration: 380496
loss: 1.2257208824157715,grad_norm: 0.9999992676518127, iteration: 380497
loss: 0.9902135133743286,grad_norm: 0.7238710037989582, iteration: 380498
loss: 0.9599429965019226,grad_norm: 0.7570529614641157, iteration: 380499
loss: 1.0272648334503174,grad_norm: 0.9999993403442695, iteration: 380500
loss: 1.0692875385284424,grad_norm: 0.684352882217212, iteration: 380501
loss: 0.9883668422698975,grad_norm: 0.7308940599705118, iteration: 380502
loss: 0.9706175923347473,grad_norm: 0.7125615054447119, iteration: 380503
loss: 0.9700356721878052,grad_norm: 0.9999992351180155, iteration: 380504
loss: 0.9832814335823059,grad_norm: 0.7696550628845804, iteration: 380505
loss: 0.9975033402442932,grad_norm: 0.8189394727214948, iteration: 380506
loss: 0.9724458456039429,grad_norm: 0.9999995441635385, iteration: 380507
loss: 1.0959893465042114,grad_norm: 0.9867088905859582, iteration: 380508
loss: 1.0054214000701904,grad_norm: 0.6758933398265361, iteration: 380509
loss: 0.9849498867988586,grad_norm: 0.907863848396966, iteration: 380510
loss: 0.9809256196022034,grad_norm: 0.868730505881302, iteration: 380511
loss: 1.0251344442367554,grad_norm: 0.8825953505532108, iteration: 380512
loss: 1.0134894847869873,grad_norm: 0.8113968806193103, iteration: 380513
loss: 1.0494390726089478,grad_norm: 0.9999996151617411, iteration: 380514
loss: 1.013532280921936,grad_norm: 0.7933736158561684, iteration: 380515
loss: 1.0274955034255981,grad_norm: 0.8455061614857762, iteration: 380516
loss: 0.9918532371520996,grad_norm: 0.6658607155604036, iteration: 380517
loss: 1.0791107416152954,grad_norm: 0.9999998093940485, iteration: 380518
loss: 0.987064003944397,grad_norm: 0.857253489237563, iteration: 380519
loss: 1.0103607177734375,grad_norm: 0.8375510010727845, iteration: 380520
loss: 1.0470638275146484,grad_norm: 0.79742943108552, iteration: 380521
loss: 1.0128436088562012,grad_norm: 0.7835935480439572, iteration: 380522
loss: 0.9980833530426025,grad_norm: 0.8081980634703052, iteration: 380523
loss: 0.9631862044334412,grad_norm: 0.7989421232699057, iteration: 380524
loss: 1.2686694860458374,grad_norm: 0.9999994027253638, iteration: 380525
loss: 1.0662097930908203,grad_norm: 0.9999991775199418, iteration: 380526
loss: 1.0027663707733154,grad_norm: 0.7634881935992339, iteration: 380527
loss: 1.1581405401229858,grad_norm: 0.999999534853358, iteration: 380528
loss: 1.0274053812026978,grad_norm: 0.9704679705706737, iteration: 380529
loss: 1.062789797782898,grad_norm: 0.999999645387658, iteration: 380530
loss: 0.9891690015792847,grad_norm: 0.7235251261154079, iteration: 380531
loss: 0.9955697059631348,grad_norm: 0.7782436809258712, iteration: 380532
loss: 1.0418155193328857,grad_norm: 0.8305260717896922, iteration: 380533
loss: 1.001914620399475,grad_norm: 0.9999997093334093, iteration: 380534
loss: 0.990796685218811,grad_norm: 0.7339169440457702, iteration: 380535
loss: 0.9966701865196228,grad_norm: 0.7573397684061587, iteration: 380536
loss: 1.305469274520874,grad_norm: 0.9999996522261948, iteration: 380537
loss: 1.0128945112228394,grad_norm: 0.8163775418278681, iteration: 380538
loss: 1.027527928352356,grad_norm: 0.7866105812739719, iteration: 380539
loss: 1.053467035293579,grad_norm: 0.8876603565997496, iteration: 380540
loss: 1.0091264247894287,grad_norm: 0.99999956717904, iteration: 380541
loss: 1.0026887655258179,grad_norm: 0.8153728940640566, iteration: 380542
loss: 1.0152274370193481,grad_norm: 0.6898823767961797, iteration: 380543
loss: 0.9691715836524963,grad_norm: 0.999999105992212, iteration: 380544
loss: 1.0173425674438477,grad_norm: 0.7651532052850696, iteration: 380545
loss: 1.0992037057876587,grad_norm: 0.8796791857824743, iteration: 380546
loss: 1.6413654088974,grad_norm: 1.0000001238598875, iteration: 380547
loss: 1.032716989517212,grad_norm: 0.8303309438507346, iteration: 380548
loss: 1.0318217277526855,grad_norm: 0.8408862355373531, iteration: 380549
loss: 1.056730031967163,grad_norm: 0.7932554815864402, iteration: 380550
loss: 1.1366801261901855,grad_norm: 0.9999993554017167, iteration: 380551
loss: 0.9850783348083496,grad_norm: 0.9999999360907035, iteration: 380552
loss: 1.1779364347457886,grad_norm: 0.9999997597537014, iteration: 380553
loss: 1.0321390628814697,grad_norm: 0.9999996988410327, iteration: 380554
loss: 1.0322049856185913,grad_norm: 0.8243569318158913, iteration: 380555
loss: 1.0105700492858887,grad_norm: 0.7121209969685456, iteration: 380556
loss: 1.00895094871521,grad_norm: 0.6826694956087502, iteration: 380557
loss: 1.0092494487762451,grad_norm: 0.7569104193604386, iteration: 380558
loss: 0.969866156578064,grad_norm: 0.8464776963318557, iteration: 380559
loss: 0.9861844778060913,grad_norm: 0.8605357167136802, iteration: 380560
loss: 1.0305097103118896,grad_norm: 0.8293189130531167, iteration: 380561
loss: 1.0579768419265747,grad_norm: 0.9999992880372809, iteration: 380562
loss: 1.062881350517273,grad_norm: 0.8585083429362562, iteration: 380563
loss: 1.0059598684310913,grad_norm: 0.8324167693733778, iteration: 380564
loss: 1.0197856426239014,grad_norm: 0.8321027954571046, iteration: 380565
loss: 1.0513114929199219,grad_norm: 0.9999991727954352, iteration: 380566
loss: 0.997483491897583,grad_norm: 0.8383672057674733, iteration: 380567
loss: 0.9864478707313538,grad_norm: 0.7445511087076943, iteration: 380568
loss: 1.033278226852417,grad_norm: 0.6621293493360546, iteration: 380569
loss: 1.011403203010559,grad_norm: 0.7775914003222044, iteration: 380570
loss: 0.9766386151313782,grad_norm: 0.7127422962207836, iteration: 380571
loss: 0.9748706817626953,grad_norm: 0.6806884433166813, iteration: 380572
loss: 0.9909065961837769,grad_norm: 0.8415851599081979, iteration: 380573
loss: 1.0280643701553345,grad_norm: 0.8768337897857076, iteration: 380574
loss: 1.0690925121307373,grad_norm: 0.8071917393363434, iteration: 380575
loss: 0.992110550403595,grad_norm: 0.8739822462454748, iteration: 380576
loss: 1.0288398265838623,grad_norm: 0.9653374354258332, iteration: 380577
loss: 1.0472174882888794,grad_norm: 0.9999990843667265, iteration: 380578
loss: 0.9882096648216248,grad_norm: 0.9999991865663637, iteration: 380579
loss: 1.0118703842163086,grad_norm: 0.7514502959680944, iteration: 380580
loss: 1.0109971761703491,grad_norm: 0.8118881727551888, iteration: 380581
loss: 1.0370495319366455,grad_norm: 0.8164495117400761, iteration: 380582
loss: 1.0122170448303223,grad_norm: 0.6297318694436779, iteration: 380583
loss: 1.0430220365524292,grad_norm: 0.9120894500891591, iteration: 380584
loss: 1.016662359237671,grad_norm: 0.9000312756456516, iteration: 380585
loss: 0.9823644161224365,grad_norm: 0.7587913319802184, iteration: 380586
loss: 1.0794354677200317,grad_norm: 0.9999995891732993, iteration: 380587
loss: 0.9959084987640381,grad_norm: 0.997419621391781, iteration: 380588
loss: 0.9852639436721802,grad_norm: 0.8858121384129458, iteration: 380589
loss: 1.0179836750030518,grad_norm: 0.8599025874846079, iteration: 380590
loss: 1.0427464246749878,grad_norm: 0.8321565873635375, iteration: 380591
loss: 0.990810751914978,grad_norm: 0.8545481734646371, iteration: 380592
loss: 1.008089542388916,grad_norm: 0.771984712807082, iteration: 380593
loss: 1.004206657409668,grad_norm: 0.7387880619157243, iteration: 380594
loss: 1.3039436340332031,grad_norm: 0.999999513506711, iteration: 380595
loss: 1.096007227897644,grad_norm: 0.9999992747589532, iteration: 380596
loss: 0.9716707468032837,grad_norm: 0.7929531903244823, iteration: 380597
loss: 0.97211754322052,grad_norm: 0.6914787929194977, iteration: 380598
loss: 1.1033390760421753,grad_norm: 0.9999999184522145, iteration: 380599
loss: 0.9826381206512451,grad_norm: 0.7927552328262, iteration: 380600
loss: 1.108608365058899,grad_norm: 0.99999960872899, iteration: 380601
loss: 0.9931954741477966,grad_norm: 0.8350498101412057, iteration: 380602
loss: 1.0296170711517334,grad_norm: 0.8640044644073197, iteration: 380603
loss: 1.0768364667892456,grad_norm: 0.9999994152674673, iteration: 380604
loss: 1.0110206604003906,grad_norm: 0.7250745188694188, iteration: 380605
loss: 1.0804980993270874,grad_norm: 0.9999990960363611, iteration: 380606
loss: 1.0236427783966064,grad_norm: 0.9999997808531761, iteration: 380607
loss: 0.9946904182434082,grad_norm: 0.7647322191195997, iteration: 380608
loss: 0.9544978141784668,grad_norm: 0.7399881801461492, iteration: 380609
loss: 1.2369595766067505,grad_norm: 0.9999997568547965, iteration: 380610
loss: 0.9809359908103943,grad_norm: 0.747946586853559, iteration: 380611
loss: 0.9542841911315918,grad_norm: 0.8089129586571432, iteration: 380612
loss: 1.0044652223587036,grad_norm: 0.8223391298131795, iteration: 380613
loss: 1.1841647624969482,grad_norm: 0.9999999924632132, iteration: 380614
loss: 1.002288579940796,grad_norm: 0.7548144351857261, iteration: 380615
loss: 0.9977073669433594,grad_norm: 0.8701170639249973, iteration: 380616
loss: 1.002506136894226,grad_norm: 0.8317669774476535, iteration: 380617
loss: 1.132973313331604,grad_norm: 0.9999993051572278, iteration: 380618
loss: 1.005657434463501,grad_norm: 0.7908016049818848, iteration: 380619
loss: 1.0896260738372803,grad_norm: 0.9999996509803386, iteration: 380620
loss: 1.0300822257995605,grad_norm: 0.7820891735818861, iteration: 380621
loss: 1.021911859512329,grad_norm: 0.8573082723922565, iteration: 380622
loss: 1.0576943159103394,grad_norm: 0.9999994177552277, iteration: 380623
loss: 1.0482887029647827,grad_norm: 0.9999996907061101, iteration: 380624
loss: 0.9718520641326904,grad_norm: 0.7311250369274627, iteration: 380625
loss: 1.0306568145751953,grad_norm: 0.9902817588858359, iteration: 380626
loss: 0.9846090078353882,grad_norm: 0.8725318864298103, iteration: 380627
loss: 1.1132855415344238,grad_norm: 0.9999994878783126, iteration: 380628
loss: 1.0929335355758667,grad_norm: 0.9999991375969409, iteration: 380629
loss: 1.027740240097046,grad_norm: 0.938627601918408, iteration: 380630
loss: 0.9946309328079224,grad_norm: 0.7721974080949615, iteration: 380631
loss: 1.0751763582229614,grad_norm: 0.9956048834448561, iteration: 380632
loss: 1.0480409860610962,grad_norm: 0.9999995251570664, iteration: 380633
loss: 1.0190398693084717,grad_norm: 0.9128758005387722, iteration: 380634
loss: 1.0038032531738281,grad_norm: 0.8170848864321744, iteration: 380635
loss: 1.2179055213928223,grad_norm: 0.9999998679376626, iteration: 380636
loss: 1.0047014951705933,grad_norm: 0.7747683730087944, iteration: 380637
loss: 1.0829699039459229,grad_norm: 0.9199303734257799, iteration: 380638
loss: 1.0531162023544312,grad_norm: 0.8814563241975586, iteration: 380639
loss: 0.9839141368865967,grad_norm: 0.8267863325006335, iteration: 380640
loss: 0.995167076587677,grad_norm: 0.7371214781372041, iteration: 380641
loss: 1.1973676681518555,grad_norm: 0.9999999780627504, iteration: 380642
loss: 1.0699384212493896,grad_norm: 0.9999993615378563, iteration: 380643
loss: 0.9895167350769043,grad_norm: 0.7139310282268203, iteration: 380644
loss: 1.008158564567566,grad_norm: 0.9999999694084472, iteration: 380645
loss: 1.1184110641479492,grad_norm: 0.999999882839126, iteration: 380646
loss: 1.0094832181930542,grad_norm: 0.8488928219345262, iteration: 380647
loss: 1.0107343196868896,grad_norm: 0.9999998671688081, iteration: 380648
loss: 1.0053908824920654,grad_norm: 0.764890002101948, iteration: 380649
loss: 1.0561882257461548,grad_norm: 0.730779805275584, iteration: 380650
loss: 0.987734317779541,grad_norm: 0.8016361953840898, iteration: 380651
loss: 1.0146836042404175,grad_norm: 0.7146534749719097, iteration: 380652
loss: 1.0073397159576416,grad_norm: 0.854218360628744, iteration: 380653
loss: 0.9927582144737244,grad_norm: 0.7826919911320084, iteration: 380654
loss: 1.0314277410507202,grad_norm: 0.7098377632915658, iteration: 380655
loss: 0.9806966185569763,grad_norm: 0.9999990135714907, iteration: 380656
loss: 1.0588254928588867,grad_norm: 0.8944342023793233, iteration: 380657
loss: 1.0169246196746826,grad_norm: 0.7758126392340843, iteration: 380658
loss: 1.032477617263794,grad_norm: 0.9999994608863235, iteration: 380659
loss: 1.2565237283706665,grad_norm: 0.99999910984703, iteration: 380660
loss: 1.0622191429138184,grad_norm: 0.9999997932022489, iteration: 380661
loss: 1.030922532081604,grad_norm: 0.8036511642243606, iteration: 380662
loss: 1.0071289539337158,grad_norm: 0.9202748270988086, iteration: 380663
loss: 1.0649598836898804,grad_norm: 0.9999998145404373, iteration: 380664
loss: 0.9783506989479065,grad_norm: 0.6908859683834565, iteration: 380665
loss: 1.0184108018875122,grad_norm: 0.8812407013322824, iteration: 380666
loss: 1.0186970233917236,grad_norm: 0.9999993451456431, iteration: 380667
loss: 0.9521530270576477,grad_norm: 0.7100370828696442, iteration: 380668
loss: 1.0096888542175293,grad_norm: 0.6576531444997904, iteration: 380669
loss: 1.0090588331222534,grad_norm: 0.8249450836887557, iteration: 380670
loss: 1.0627353191375732,grad_norm: 0.9999992228484234, iteration: 380671
loss: 1.0289552211761475,grad_norm: 0.7408467853395739, iteration: 380672
loss: 1.0137302875518799,grad_norm: 0.7929431988665638, iteration: 380673
loss: 0.9879903197288513,grad_norm: 0.8638916189652669, iteration: 380674
loss: 1.148193359375,grad_norm: 0.9999994245457616, iteration: 380675
loss: 0.9680323600769043,grad_norm: 0.9999991164184611, iteration: 380676
loss: 1.0000889301300049,grad_norm: 0.7634572782144812, iteration: 380677
loss: 1.065706729888916,grad_norm: 0.8373458699460642, iteration: 380678
loss: 1.078076720237732,grad_norm: 0.9999997910614499, iteration: 380679
loss: 1.0633113384246826,grad_norm: 0.9999991798094043, iteration: 380680
loss: 1.0195109844207764,grad_norm: 0.8576058897614091, iteration: 380681
loss: 1.0033297538757324,grad_norm: 0.7875292828158214, iteration: 380682
loss: 1.1044092178344727,grad_norm: 0.9999990757824984, iteration: 380683
loss: 0.9814942479133606,grad_norm: 0.9029599328842556, iteration: 380684
loss: 1.0197532176971436,grad_norm: 0.8417600106681562, iteration: 380685
loss: 0.9994316101074219,grad_norm: 0.8086380328921852, iteration: 380686
loss: 0.9942471385002136,grad_norm: 0.8413316173850744, iteration: 380687
loss: 0.9611309766769409,grad_norm: 0.8512146567770443, iteration: 380688
loss: 0.9981158971786499,grad_norm: 0.8126596679579807, iteration: 380689
loss: 1.018405795097351,grad_norm: 0.8259458273353065, iteration: 380690
loss: 1.0130616426467896,grad_norm: 0.7987019573034635, iteration: 380691
loss: 0.9988846182823181,grad_norm: 0.7698488165130973, iteration: 380692
loss: 1.032400131225586,grad_norm: 0.8484590298157682, iteration: 380693
loss: 1.0142850875854492,grad_norm: 0.9574184130371479, iteration: 380694
loss: 1.0012133121490479,grad_norm: 0.8794659190492489, iteration: 380695
loss: 1.075364589691162,grad_norm: 0.9999997496284657, iteration: 380696
loss: 1.0060844421386719,grad_norm: 1.0000000603821653, iteration: 380697
loss: 1.1149733066558838,grad_norm: 0.9999994887173929, iteration: 380698
loss: 0.9846675395965576,grad_norm: 0.7941165089744641, iteration: 380699
loss: 1.0544912815093994,grad_norm: 0.7412504224414738, iteration: 380700
loss: 0.9966878890991211,grad_norm: 0.7205328533900474, iteration: 380701
loss: 1.010183572769165,grad_norm: 0.9959913294270215, iteration: 380702
loss: 1.022057294845581,grad_norm: 0.9999998580211275, iteration: 380703
loss: 1.0054851770401,grad_norm: 0.6816505435953867, iteration: 380704
loss: 1.0040134191513062,grad_norm: 0.7208096905886183, iteration: 380705
loss: 0.9823611378669739,grad_norm: 0.9166682924153938, iteration: 380706
loss: 0.9672634601593018,grad_norm: 0.7038626124052075, iteration: 380707
loss: 1.151785135269165,grad_norm: 0.9291177674452964, iteration: 380708
loss: 0.9702586531639099,grad_norm: 0.9999995571674689, iteration: 380709
loss: 0.9618234038352966,grad_norm: 0.8194495876251214, iteration: 380710
loss: 0.9876875877380371,grad_norm: 0.7968325294188631, iteration: 380711
loss: 1.0083404779434204,grad_norm: 0.8267757356149686, iteration: 380712
loss: 1.0039399862289429,grad_norm: 0.7611790276967582, iteration: 380713
loss: 1.0465359687805176,grad_norm: 0.9999999243303856, iteration: 380714
loss: 1.0276743173599243,grad_norm: 0.8719193339557173, iteration: 380715
loss: 0.9952799677848816,grad_norm: 0.9439441275488087, iteration: 380716
loss: 1.0679255723953247,grad_norm: 0.9999996621642598, iteration: 380717
loss: 0.9677146673202515,grad_norm: 0.7659745815303806, iteration: 380718
loss: 1.003430724143982,grad_norm: 0.647603031167302, iteration: 380719
loss: 0.9818388223648071,grad_norm: 0.6717879703354169, iteration: 380720
loss: 0.9797791838645935,grad_norm: 0.7405200234671494, iteration: 380721
loss: 1.0089291334152222,grad_norm: 0.7880318103659787, iteration: 380722
loss: 1.0775964260101318,grad_norm: 0.9999991264830302, iteration: 380723
loss: 1.0071719884872437,grad_norm: 0.7716288695397149, iteration: 380724
loss: 1.0166808366775513,grad_norm: 0.8112280756685539, iteration: 380725
loss: 0.9958438277244568,grad_norm: 0.9999989973938006, iteration: 380726
loss: 0.9927185773849487,grad_norm: 0.6319720210716939, iteration: 380727
loss: 0.9545369744300842,grad_norm: 0.7611333099568507, iteration: 380728
loss: 0.9510430097579956,grad_norm: 0.7219641702060178, iteration: 380729
loss: 0.9993493556976318,grad_norm: 0.9581271454311212, iteration: 380730
loss: 1.0030243396759033,grad_norm: 0.7368696512055988, iteration: 380731
loss: 1.0699018239974976,grad_norm: 0.997003027300641, iteration: 380732
loss: 0.9772724509239197,grad_norm: 0.9319018325900182, iteration: 380733
loss: 1.0002877712249756,grad_norm: 0.6606562784869523, iteration: 380734
loss: 0.9714971780776978,grad_norm: 0.9999993800254593, iteration: 380735
loss: 1.1823155879974365,grad_norm: 1.000000013840795, iteration: 380736
loss: 1.042892575263977,grad_norm: 0.7617145780286835, iteration: 380737
loss: 1.0008801221847534,grad_norm: 0.8948470403288775, iteration: 380738
loss: 0.9985430836677551,grad_norm: 0.7440908747418337, iteration: 380739
loss: 0.9933438897132874,grad_norm: 0.7872430209726904, iteration: 380740
loss: 0.9814501404762268,grad_norm: 0.798936913082246, iteration: 380741
loss: 0.9859480857849121,grad_norm: 0.8334504227980585, iteration: 380742
loss: 1.0369155406951904,grad_norm: 0.9999999878439161, iteration: 380743
loss: 1.0107272863388062,grad_norm: 0.99360009737724, iteration: 380744
loss: 1.0208944082260132,grad_norm: 0.7650352913970043, iteration: 380745
loss: 0.9596987366676331,grad_norm: 0.7783985701604424, iteration: 380746
loss: 1.0031486749649048,grad_norm: 0.9999999392665695, iteration: 380747
loss: 1.0152784585952759,grad_norm: 0.7114143380031378, iteration: 380748
loss: 1.0386732816696167,grad_norm: 0.9999990746530395, iteration: 380749
loss: 0.9756518602371216,grad_norm: 0.8049305725254711, iteration: 380750
loss: 1.035143256187439,grad_norm: 0.7100368613991489, iteration: 380751
loss: 1.0510104894638062,grad_norm: 0.7459316665546687, iteration: 380752
loss: 1.0034512281417847,grad_norm: 0.9027897081554734, iteration: 380753
loss: 1.0725451707839966,grad_norm: 0.9999994654390384, iteration: 380754
loss: 1.1465749740600586,grad_norm: 0.9999995632445324, iteration: 380755
loss: 0.9876986145973206,grad_norm: 0.8273204598219019, iteration: 380756
loss: 0.9984626770019531,grad_norm: 0.9999993161550597, iteration: 380757
loss: 1.0960181951522827,grad_norm: 0.9999995180739455, iteration: 380758
loss: 1.065636396408081,grad_norm: 0.9999992810724717, iteration: 380759
loss: 1.0046873092651367,grad_norm: 0.9248576708240186, iteration: 380760
loss: 0.9765239953994751,grad_norm: 0.5951825392635323, iteration: 380761
loss: 1.0008295774459839,grad_norm: 0.8568427623448118, iteration: 380762
loss: 1.0590696334838867,grad_norm: 0.7363885358018817, iteration: 380763
loss: 1.0634385347366333,grad_norm: 0.9999992706856917, iteration: 380764
loss: 1.0084080696105957,grad_norm: 0.8041456822919738, iteration: 380765
loss: 1.0665873289108276,grad_norm: 0.9213623393230979, iteration: 380766
loss: 0.9690554738044739,grad_norm: 0.6839410496787762, iteration: 380767
loss: 1.0015802383422852,grad_norm: 0.9999992493700951, iteration: 380768
loss: 1.0440765619277954,grad_norm: 0.8705676886148981, iteration: 380769
loss: 0.9424235224723816,grad_norm: 0.7130857093662034, iteration: 380770
loss: 1.056219220161438,grad_norm: 0.700354134085003, iteration: 380771
loss: 1.0020016431808472,grad_norm: 0.822593641350385, iteration: 380772
loss: 0.9927842020988464,grad_norm: 0.8197728011745472, iteration: 380773
loss: 1.0296518802642822,grad_norm: 0.9572733516105496, iteration: 380774
loss: 1.0066807270050049,grad_norm: 0.9999998140817763, iteration: 380775
loss: 1.0288829803466797,grad_norm: 0.9999991591069611, iteration: 380776
loss: 1.1104583740234375,grad_norm: 0.9999992089783681, iteration: 380777
loss: 1.007738471031189,grad_norm: 0.9295859000376488, iteration: 380778
loss: 0.9639691114425659,grad_norm: 0.7276636972263862, iteration: 380779
loss: 1.0319993495941162,grad_norm: 0.7722848927003827, iteration: 380780
loss: 1.029795527458191,grad_norm: 0.7447924924742393, iteration: 380781
loss: 0.9969707727432251,grad_norm: 0.7805770654745066, iteration: 380782
loss: 1.0315829515457153,grad_norm: 0.906511422812237, iteration: 380783
loss: 1.0325672626495361,grad_norm: 0.7310643635024849, iteration: 380784
loss: 0.9991024732589722,grad_norm: 0.7276218280047052, iteration: 380785
loss: 0.9912003874778748,grad_norm: 0.7204694663339236, iteration: 380786
loss: 1.1081488132476807,grad_norm: 0.9999990819100166, iteration: 380787
loss: 1.0272905826568604,grad_norm: 0.8573257141496128, iteration: 380788
loss: 1.0269701480865479,grad_norm: 0.9999997490831692, iteration: 380789
loss: 1.0301380157470703,grad_norm: 0.6937790935333114, iteration: 380790
loss: 0.9875339269638062,grad_norm: 0.9581528163880126, iteration: 380791
loss: 1.0023443698883057,grad_norm: 0.9999997790443964, iteration: 380792
loss: 1.0160428285598755,grad_norm: 0.9011055938752093, iteration: 380793
loss: 1.00309157371521,grad_norm: 0.7482436356855763, iteration: 380794
loss: 1.0385565757751465,grad_norm: 0.7359659238750188, iteration: 380795
loss: 1.0132863521575928,grad_norm: 0.7681196810324574, iteration: 380796
loss: 0.9773042798042297,grad_norm: 0.7756377606531585, iteration: 380797
loss: 1.022939682006836,grad_norm: 0.7701912802114367, iteration: 380798
loss: 0.9986633658409119,grad_norm: 0.7030861460237414, iteration: 380799
loss: 0.946732223033905,grad_norm: 0.6338707156162507, iteration: 380800
loss: 0.9961895942687988,grad_norm: 0.8734818380041616, iteration: 380801
loss: 1.0013636350631714,grad_norm: 0.9999993164601257, iteration: 380802
loss: 1.0866568088531494,grad_norm: 1.000000052987201, iteration: 380803
loss: 1.0216690301895142,grad_norm: 0.8076152816168831, iteration: 380804
loss: 0.9715898036956787,grad_norm: 0.8031049803646558, iteration: 380805
loss: 1.0323939323425293,grad_norm: 0.9999998277609156, iteration: 380806
loss: 1.0738691091537476,grad_norm: 0.7632011235190844, iteration: 380807
loss: 0.9982128143310547,grad_norm: 0.8692792663875543, iteration: 380808
loss: 1.0967035293579102,grad_norm: 0.999999559641186, iteration: 380809
loss: 0.9956867694854736,grad_norm: 1.0000000069831323, iteration: 380810
loss: 1.175499677658081,grad_norm: 0.9999995214105653, iteration: 380811
loss: 1.0341545343399048,grad_norm: 0.7266233601579807, iteration: 380812
loss: 1.0175260305404663,grad_norm: 0.7709485825689824, iteration: 380813
loss: 1.1774263381958008,grad_norm: 0.9999991276592816, iteration: 380814
loss: 1.0153071880340576,grad_norm: 0.6882871863569846, iteration: 380815
loss: 1.0404434204101562,grad_norm: 0.9999992619060621, iteration: 380816
loss: 1.014670491218567,grad_norm: 0.7521683732669958, iteration: 380817
loss: 1.0063966512680054,grad_norm: 0.9999998434897007, iteration: 380818
loss: 0.9861482381820679,grad_norm: 0.9854140418481809, iteration: 380819
loss: 1.0551151037216187,grad_norm: 0.8671774524222313, iteration: 380820
loss: 1.0208444595336914,grad_norm: 0.8799765473546092, iteration: 380821
loss: 1.0326215028762817,grad_norm: 0.7750777930345463, iteration: 380822
loss: 1.0079280138015747,grad_norm: 0.80385542390973, iteration: 380823
loss: 1.0856207609176636,grad_norm: 0.9999991028647391, iteration: 380824
loss: 0.9946264624595642,grad_norm: 0.8732120041218032, iteration: 380825
loss: 1.0554161071777344,grad_norm: 0.7780231215793917, iteration: 380826
loss: 1.039365530014038,grad_norm: 0.7685980080479937, iteration: 380827
loss: 1.0298874378204346,grad_norm: 0.8062596117380291, iteration: 380828
loss: 0.9761036038398743,grad_norm: 0.7185864922898525, iteration: 380829
loss: 1.0137907266616821,grad_norm: 0.6690974174955157, iteration: 380830
loss: 0.9833399653434753,grad_norm: 0.8948978892745734, iteration: 380831
loss: 0.9739856123924255,grad_norm: 0.759102674354557, iteration: 380832
loss: 1.0098974704742432,grad_norm: 0.8031547444719104, iteration: 380833
loss: 0.9697008728981018,grad_norm: 0.7203542609393063, iteration: 380834
loss: 1.1769206523895264,grad_norm: 0.9999996520606694, iteration: 380835
loss: 0.9947879910469055,grad_norm: 0.7711823327020123, iteration: 380836
loss: 1.0339019298553467,grad_norm: 0.7451367229930036, iteration: 380837
loss: 1.0846229791641235,grad_norm: 0.999999051982643, iteration: 380838
loss: 0.9943428635597229,grad_norm: 0.8775256189542794, iteration: 380839
loss: 1.1670514345169067,grad_norm: 0.9999998929550024, iteration: 380840
loss: 1.0082066059112549,grad_norm: 0.8402541895651786, iteration: 380841
loss: 0.9834938049316406,grad_norm: 0.9282297806741644, iteration: 380842
loss: 1.0009088516235352,grad_norm: 0.6871159539064127, iteration: 380843
loss: 1.045021891593933,grad_norm: 0.7939556739918057, iteration: 380844
loss: 0.9965797066688538,grad_norm: 0.7957558464911852, iteration: 380845
loss: 1.042776346206665,grad_norm: 0.7163701844826109, iteration: 380846
loss: 1.0100908279418945,grad_norm: 0.752752087118548, iteration: 380847
loss: 0.9762624502182007,grad_norm: 0.8595596785778871, iteration: 380848
loss: 1.0975605249404907,grad_norm: 0.999999928133712, iteration: 380849
loss: 1.0085225105285645,grad_norm: 0.7466557041117273, iteration: 380850
loss: 1.040050983428955,grad_norm: 0.7600071527424073, iteration: 380851
loss: 0.9674707055091858,grad_norm: 0.9626484323639913, iteration: 380852
loss: 0.9976832866668701,grad_norm: 0.8056111143132748, iteration: 380853
loss: 1.0091991424560547,grad_norm: 0.6622779951105553, iteration: 380854
loss: 1.0193461179733276,grad_norm: 0.9645841866263454, iteration: 380855
loss: 0.9879032373428345,grad_norm: 0.7010642578734237, iteration: 380856
loss: 1.004307508468628,grad_norm: 0.6387017421594935, iteration: 380857
loss: 1.001186490058899,grad_norm: 0.8212777263551784, iteration: 380858
loss: 1.0111474990844727,grad_norm: 0.7461163625727408, iteration: 380859
loss: 0.9864834547042847,grad_norm: 0.6378408723284126, iteration: 380860
loss: 1.0465906858444214,grad_norm: 0.9999991875647518, iteration: 380861
loss: 1.0603687763214111,grad_norm: 0.8619542121918481, iteration: 380862
loss: 1.0130927562713623,grad_norm: 0.7832765779870909, iteration: 380863
loss: 1.0151255130767822,grad_norm: 0.7598097425533575, iteration: 380864
loss: 0.9958235025405884,grad_norm: 0.8044890089931042, iteration: 380865
loss: 1.0166733264923096,grad_norm: 0.9875834013846516, iteration: 380866
loss: 1.0093885660171509,grad_norm: 0.8425081588301827, iteration: 380867
loss: 1.2045543193817139,grad_norm: 0.999999471126882, iteration: 380868
loss: 1.147003412246704,grad_norm: 0.9999997263963616, iteration: 380869
loss: 1.029208779335022,grad_norm: 0.969188805068028, iteration: 380870
loss: 1.0027087926864624,grad_norm: 0.7352309673511053, iteration: 380871
loss: 1.0148000717163086,grad_norm: 0.9999997694492293, iteration: 380872
loss: 0.986513614654541,grad_norm: 0.7458371424513508, iteration: 380873
loss: 0.9641268849372864,grad_norm: 0.766981347795319, iteration: 380874
loss: 1.167165994644165,grad_norm: 0.9999997339248956, iteration: 380875
loss: 1.0307949781417847,grad_norm: 0.8967770270627442, iteration: 380876
loss: 1.0223788022994995,grad_norm: 0.8776053275744882, iteration: 380877
loss: 1.011207103729248,grad_norm: 0.8308966828575696, iteration: 380878
loss: 1.0061894655227661,grad_norm: 0.7568067155025584, iteration: 380879
loss: 1.1594383716583252,grad_norm: 0.9999998900154247, iteration: 380880
loss: 1.0376676321029663,grad_norm: 0.7574037498519592, iteration: 380881
loss: 1.096376657485962,grad_norm: 0.9999993784393431, iteration: 380882
loss: 1.0199053287506104,grad_norm: 0.790327178337068, iteration: 380883
loss: 1.017735481262207,grad_norm: 0.9087293574195275, iteration: 380884
loss: 1.059261679649353,grad_norm: 0.8281644710927735, iteration: 380885
loss: 1.1082836389541626,grad_norm: 0.9122700184941688, iteration: 380886
loss: 1.062851071357727,grad_norm: 0.9999996368781752, iteration: 380887
loss: 1.0151077508926392,grad_norm: 0.7221767005652242, iteration: 380888
loss: 1.1002753973007202,grad_norm: 0.9999991406771022, iteration: 380889
loss: 0.9662513136863708,grad_norm: 0.8416081410475418, iteration: 380890
loss: 1.0107933282852173,grad_norm: 0.8204768440887623, iteration: 380891
loss: 0.9797695875167847,grad_norm: 0.8130248576016812, iteration: 380892
loss: 1.0161336660385132,grad_norm: 0.7829162456341874, iteration: 380893
loss: 1.0279124975204468,grad_norm: 0.9999998388576964, iteration: 380894
loss: 0.9890580177307129,grad_norm: 0.8599586532007113, iteration: 380895
loss: 1.0093133449554443,grad_norm: 0.9999999729568013, iteration: 380896
loss: 0.9885354042053223,grad_norm: 0.7381948948245454, iteration: 380897
loss: 1.012583613395691,grad_norm: 0.8245761110718592, iteration: 380898
loss: 1.069578766822815,grad_norm: 0.9999997749915674, iteration: 380899
loss: 1.0088088512420654,grad_norm: 0.740860492033755, iteration: 380900
loss: 1.004563808441162,grad_norm: 0.7569241048241965, iteration: 380901
loss: 0.9746841788291931,grad_norm: 0.768294970066414, iteration: 380902
loss: 1.1439385414123535,grad_norm: 0.9999999212460369, iteration: 380903
loss: 1.0516464710235596,grad_norm: 0.9999994940514809, iteration: 380904
loss: 0.9780094027519226,grad_norm: 0.7454039442057873, iteration: 380905
loss: 1.107513666152954,grad_norm: 0.9999995321734225, iteration: 380906
loss: 0.9757892489433289,grad_norm: 0.9999995900462101, iteration: 380907
loss: 1.0628249645233154,grad_norm: 0.9999994241962813, iteration: 380908
loss: 1.0195293426513672,grad_norm: 0.7794583945342975, iteration: 380909
loss: 0.9789294004440308,grad_norm: 0.8242408919552107, iteration: 380910
loss: 0.9910053014755249,grad_norm: 0.7348451530224083, iteration: 380911
loss: 1.0129929780960083,grad_norm: 0.8785232363606468, iteration: 380912
loss: 1.0169179439544678,grad_norm: 0.8823592778953144, iteration: 380913
loss: 1.0528796911239624,grad_norm: 0.9302823533572409, iteration: 380914
loss: 1.099836826324463,grad_norm: 0.8938556377434178, iteration: 380915
loss: 0.9824946522712708,grad_norm: 0.7847958807787118, iteration: 380916
loss: 1.058344841003418,grad_norm: 0.9999995248913102, iteration: 380917
loss: 0.9973013997077942,grad_norm: 0.9999992309637277, iteration: 380918
loss: 1.0190349817276,grad_norm: 0.9999990767447702, iteration: 380919
loss: 1.010587215423584,grad_norm: 0.812372799920956, iteration: 380920
loss: 1.027462363243103,grad_norm: 0.9885418917425096, iteration: 380921
loss: 1.0090490579605103,grad_norm: 0.8091755992954626, iteration: 380922
loss: 1.0417275428771973,grad_norm: 0.9390957376803086, iteration: 380923
loss: 0.9937145709991455,grad_norm: 0.9999995752953083, iteration: 380924
loss: 1.1309698820114136,grad_norm: 0.9999990977686182, iteration: 380925
loss: 1.159085750579834,grad_norm: 0.9999998398784667, iteration: 380926
loss: 0.9860262274742126,grad_norm: 0.8113126766785679, iteration: 380927
loss: 1.010157585144043,grad_norm: 0.7345850658082133, iteration: 380928
loss: 0.9831916689872742,grad_norm: 0.7801774255933185, iteration: 380929
loss: 1.0400851964950562,grad_norm: 0.7714094708397194, iteration: 380930
loss: 1.0110310316085815,grad_norm: 0.8001563412791111, iteration: 380931
loss: 0.9740766286849976,grad_norm: 0.73487886671019, iteration: 380932
loss: 1.343112587928772,grad_norm: 1.0000000349633122, iteration: 380933
loss: 1.0342010259628296,grad_norm: 0.9960732376951509, iteration: 380934
loss: 0.9708097577095032,grad_norm: 0.7162567723527288, iteration: 380935
loss: 1.011433482170105,grad_norm: 0.747588492358755, iteration: 380936
loss: 0.9898073077201843,grad_norm: 0.9999990512887874, iteration: 380937
loss: 0.9984982013702393,grad_norm: 0.8698276859380661, iteration: 380938
loss: 0.9714632630348206,grad_norm: 0.9999991648512754, iteration: 380939
loss: 1.0491234064102173,grad_norm: 0.9999993480594134, iteration: 380940
loss: 0.9996628165245056,grad_norm: 0.7536161376331248, iteration: 380941
loss: 0.9518030881881714,grad_norm: 0.7187501288507462, iteration: 380942
loss: 1.0132554769515991,grad_norm: 0.9354747159537395, iteration: 380943
loss: 1.0013786554336548,grad_norm: 0.9637290501705245, iteration: 380944
loss: 0.9651153683662415,grad_norm: 0.9142677741294583, iteration: 380945
loss: 1.0270061492919922,grad_norm: 0.7965358146612764, iteration: 380946
loss: 1.0403093099594116,grad_norm: 0.9999991688136272, iteration: 380947
loss: 0.9808078408241272,grad_norm: 0.8603869480683325, iteration: 380948
loss: 1.0900540351867676,grad_norm: 0.9999998573829834, iteration: 380949
loss: 1.0507444143295288,grad_norm: 0.9999992155339092, iteration: 380950
loss: 1.0471503734588623,grad_norm: 0.999999226357387, iteration: 380951
loss: 1.014646291732788,grad_norm: 0.9021878572678889, iteration: 380952
loss: 0.9861850738525391,grad_norm: 0.7617806771374733, iteration: 380953
loss: 0.9988099336624146,grad_norm: 0.9999993510231143, iteration: 380954
loss: 0.9630081653594971,grad_norm: 0.8069325258972057, iteration: 380955
loss: 1.044485330581665,grad_norm: 0.7731841134048069, iteration: 380956
loss: 0.960818886756897,grad_norm: 0.8352534564721708, iteration: 380957
loss: 0.9562638998031616,grad_norm: 0.7516974936115, iteration: 380958
loss: 1.0058239698410034,grad_norm: 0.8495304384638493, iteration: 380959
loss: 0.9642049074172974,grad_norm: 0.9999991982978533, iteration: 380960
loss: 1.382387638092041,grad_norm: 0.9999995190450924, iteration: 380961
loss: 1.0217220783233643,grad_norm: 1.0000000329363588, iteration: 380962
loss: 1.0382380485534668,grad_norm: 0.6455708616189817, iteration: 380963
loss: 1.03948175907135,grad_norm: 0.9999994584658504, iteration: 380964
loss: 1.0185673236846924,grad_norm: 0.910508714598639, iteration: 380965
loss: 0.9556322693824768,grad_norm: 0.7968055869019626, iteration: 380966
loss: 1.2271020412445068,grad_norm: 0.9999998604949341, iteration: 380967
loss: 1.0386933088302612,grad_norm: 0.6854355516381808, iteration: 380968
loss: 1.0203700065612793,grad_norm: 0.7787197613631178, iteration: 380969
loss: 1.0058635473251343,grad_norm: 0.9999996493570632, iteration: 380970
loss: 0.9658876061439514,grad_norm: 0.7787672890176105, iteration: 380971
loss: 1.0600292682647705,grad_norm: 0.7391886735700514, iteration: 380972
loss: 0.9943616986274719,grad_norm: 0.9999998923034702, iteration: 380973
loss: 1.0008387565612793,grad_norm: 0.7361921317067583, iteration: 380974
loss: 0.982090413570404,grad_norm: 0.8265019859178537, iteration: 380975
loss: 1.0524518489837646,grad_norm: 0.9536339241162581, iteration: 380976
loss: 1.1245797872543335,grad_norm: 0.9477127797425245, iteration: 380977
loss: 1.016769528388977,grad_norm: 0.8271230092976021, iteration: 380978
loss: 1.0365062952041626,grad_norm: 0.7891729032630497, iteration: 380979
loss: 0.9838986396789551,grad_norm: 0.8275291784005715, iteration: 380980
loss: 1.1769213676452637,grad_norm: 0.9999997771672012, iteration: 380981
loss: 0.9741342663764954,grad_norm: 0.8171588561557264, iteration: 380982
loss: 1.087762475013733,grad_norm: 0.9999998849999335, iteration: 380983
loss: 1.0110206604003906,grad_norm: 0.9999997922644075, iteration: 380984
loss: 1.0430364608764648,grad_norm: 0.7508203508533473, iteration: 380985
loss: 1.036333680152893,grad_norm: 0.8235532568202019, iteration: 380986
loss: 1.0259112119674683,grad_norm: 0.999999305680884, iteration: 380987
loss: 1.0095914602279663,grad_norm: 0.7272404935226112, iteration: 380988
loss: 1.298098087310791,grad_norm: 0.9999999754341797, iteration: 380989
loss: 1.0178489685058594,grad_norm: 0.6992366580096665, iteration: 380990
loss: 1.0617152452468872,grad_norm: 0.9999992385010281, iteration: 380991
loss: 0.981711745262146,grad_norm: 0.8249436046030462, iteration: 380992
loss: 0.9873867630958557,grad_norm: 0.7319526996309158, iteration: 380993
loss: 0.9868806600570679,grad_norm: 0.8478711133042837, iteration: 380994
loss: 0.9876119494438171,grad_norm: 0.8649678837179707, iteration: 380995
loss: 1.038205862045288,grad_norm: 0.999999757894232, iteration: 380996
loss: 1.0752602815628052,grad_norm: 0.7837542506374668, iteration: 380997
loss: 0.9880040884017944,grad_norm: 0.7930060004627133, iteration: 380998
loss: 1.2550606727600098,grad_norm: 0.9999996410294423, iteration: 380999
loss: 0.9852172136306763,grad_norm: 0.7096453530564332, iteration: 381000
loss: 1.5448342561721802,grad_norm: 0.999999443168303, iteration: 381001
loss: 1.0109082460403442,grad_norm: 0.6956454077795822, iteration: 381002
loss: 1.3795690536499023,grad_norm: 0.9999996468494307, iteration: 381003
loss: 1.0263346433639526,grad_norm: 0.8300346314163816, iteration: 381004
loss: 1.0065494775772095,grad_norm: 0.8493701768354358, iteration: 381005
loss: 1.039276123046875,grad_norm: 0.9999997197834454, iteration: 381006
loss: 1.0192533731460571,grad_norm: 0.7608100581367628, iteration: 381007
loss: 1.025000810623169,grad_norm: 0.9316172321380466, iteration: 381008
loss: 1.0050135850906372,grad_norm: 0.7996700860064302, iteration: 381009
loss: 1.0268899202346802,grad_norm: 0.8420052446996563, iteration: 381010
loss: 0.9803844690322876,grad_norm: 0.7344039515819435, iteration: 381011
loss: 0.9924709796905518,grad_norm: 0.757552252659454, iteration: 381012
loss: 1.0542815923690796,grad_norm: 0.8642706620620298, iteration: 381013
loss: 1.043900489807129,grad_norm: 0.940670232552492, iteration: 381014
loss: 0.9845150113105774,grad_norm: 0.9601362276636886, iteration: 381015
loss: 1.045809030532837,grad_norm: 0.8262822958937618, iteration: 381016
loss: 1.0792388916015625,grad_norm: 0.8829838812698828, iteration: 381017
loss: 0.9886777400970459,grad_norm: 0.681397614369556, iteration: 381018
loss: 1.0733202695846558,grad_norm: 0.9999996623333769, iteration: 381019
loss: 1.1420077085494995,grad_norm: 0.9999998078740651, iteration: 381020
loss: 1.2658755779266357,grad_norm: 0.9999999743698271, iteration: 381021
loss: 0.9671186804771423,grad_norm: 0.9999993991613838, iteration: 381022
loss: 1.0171761512756348,grad_norm: 0.8935545504071474, iteration: 381023
loss: 1.4321964979171753,grad_norm: 0.9999992625066317, iteration: 381024
loss: 1.0355775356292725,grad_norm: 0.6893687222033685, iteration: 381025
loss: 1.0981556177139282,grad_norm: 0.999999792722919, iteration: 381026
loss: 1.094631552696228,grad_norm: 0.8626245163633012, iteration: 381027
loss: 1.0073933601379395,grad_norm: 0.7374320343789907, iteration: 381028
loss: 1.0237599611282349,grad_norm: 0.6388557861305569, iteration: 381029
loss: 1.0640946626663208,grad_norm: 0.9999990038907423, iteration: 381030
loss: 0.9893752336502075,grad_norm: 0.7865219679523582, iteration: 381031
loss: 1.12001371383667,grad_norm: 1.0000000455866975, iteration: 381032
loss: 0.979476809501648,grad_norm: 0.967000868693902, iteration: 381033
loss: 1.0166071653366089,grad_norm: 0.8514632012567178, iteration: 381034
loss: 0.9741194844245911,grad_norm: 0.8715673745738883, iteration: 381035
loss: 1.0384472608566284,grad_norm: 0.9999994599905075, iteration: 381036
loss: 1.0285650491714478,grad_norm: 0.8378671931161424, iteration: 381037
loss: 1.0954585075378418,grad_norm: 0.9999995214663715, iteration: 381038
loss: 0.996867299079895,grad_norm: 0.7356887157512222, iteration: 381039
loss: 1.0191231966018677,grad_norm: 0.8938644555789428, iteration: 381040
loss: 1.0752191543579102,grad_norm: 0.9338519034000645, iteration: 381041
loss: 1.0512620210647583,grad_norm: 0.6979090278826625, iteration: 381042
loss: 1.0259120464324951,grad_norm: 0.9999997334181637, iteration: 381043
loss: 1.251029372215271,grad_norm: 0.9999997277083714, iteration: 381044
loss: 1.1556620597839355,grad_norm: 0.9999998118148515, iteration: 381045
loss: 1.0581355094909668,grad_norm: 0.9999998074664049, iteration: 381046
loss: 1.1597932577133179,grad_norm: 0.8237237466901712, iteration: 381047
loss: 1.1004407405853271,grad_norm: 0.9999992664903777, iteration: 381048
loss: 1.0362452268600464,grad_norm: 0.7732762104438268, iteration: 381049
loss: 1.340360164642334,grad_norm: 0.999999343041966, iteration: 381050
loss: 1.0339688062667847,grad_norm: 0.8552905927768094, iteration: 381051
loss: 1.0470284223556519,grad_norm: 0.9999998980202208, iteration: 381052
loss: 0.9885343909263611,grad_norm: 0.8238136542849621, iteration: 381053
loss: 1.0219359397888184,grad_norm: 0.999999381605614, iteration: 381054
loss: 1.0771732330322266,grad_norm: 0.9999995517226627, iteration: 381055
loss: 0.9797236323356628,grad_norm: 0.7715831074092758, iteration: 381056
loss: 1.111656665802002,grad_norm: 0.9999994416343047, iteration: 381057
loss: 1.0158334970474243,grad_norm: 0.770205466816287, iteration: 381058
loss: 0.988012969493866,grad_norm: 0.8187495178132311, iteration: 381059
loss: 1.0113438367843628,grad_norm: 0.8057222337929816, iteration: 381060
loss: 1.0108399391174316,grad_norm: 0.7746184007270012, iteration: 381061
loss: 0.9640603065490723,grad_norm: 0.9735597806893901, iteration: 381062
loss: 1.0094661712646484,grad_norm: 0.6996294328275967, iteration: 381063
loss: 1.0026320219039917,grad_norm: 0.7156861221376368, iteration: 381064
loss: 1.0103665590286255,grad_norm: 0.9999993394522213, iteration: 381065
loss: 1.0141584873199463,grad_norm: 0.8114056104467183, iteration: 381066
loss: 1.0069928169250488,grad_norm: 0.9999991847938591, iteration: 381067
loss: 0.9658226370811462,grad_norm: 0.7916870155953025, iteration: 381068
loss: 1.0096755027770996,grad_norm: 0.8325016440969978, iteration: 381069
loss: 1.0075992345809937,grad_norm: 0.8567958804324493, iteration: 381070
loss: 0.9536293745040894,grad_norm: 0.8722527102059447, iteration: 381071
loss: 1.0867329835891724,grad_norm: 0.9999996296663911, iteration: 381072
loss: 1.0189521312713623,grad_norm: 0.8539315016527466, iteration: 381073
loss: 1.0039235353469849,grad_norm: 0.6612712890252264, iteration: 381074
loss: 1.049172043800354,grad_norm: 0.9849575164338336, iteration: 381075
loss: 1.036124587059021,grad_norm: 0.8891249512879681, iteration: 381076
loss: 0.9764934778213501,grad_norm: 0.7784299338192631, iteration: 381077
loss: 1.0569981336593628,grad_norm: 0.9999992190631759, iteration: 381078
loss: 1.0189507007598877,grad_norm: 0.8532510357570318, iteration: 381079
loss: 1.0218254327774048,grad_norm: 0.8375790078858192, iteration: 381080
loss: 1.017770767211914,grad_norm: 0.9999991055431313, iteration: 381081
loss: 0.9926360845565796,grad_norm: 0.8610249323319159, iteration: 381082
loss: 1.000912070274353,grad_norm: 0.7186818199975682, iteration: 381083
loss: 1.0366452932357788,grad_norm: 0.7115957408100912, iteration: 381084
loss: 1.1410775184631348,grad_norm: 0.8120760147081233, iteration: 381085
loss: 1.0121458768844604,grad_norm: 0.7434630366088039, iteration: 381086
loss: 1.0206208229064941,grad_norm: 0.8300522366629289, iteration: 381087
loss: 1.1062650680541992,grad_norm: 0.9999999607515873, iteration: 381088
loss: 1.0475144386291504,grad_norm: 0.9947388440330762, iteration: 381089
loss: 1.021782398223877,grad_norm: 0.7599458456256115, iteration: 381090
loss: 0.9914718270301819,grad_norm: 0.9999995575925241, iteration: 381091
loss: 1.0472257137298584,grad_norm: 0.723513254527056, iteration: 381092
loss: 0.9807757139205933,grad_norm: 0.7073006213713379, iteration: 381093
loss: 0.9818628430366516,grad_norm: 0.9591787762891689, iteration: 381094
loss: 1.0879392623901367,grad_norm: 0.8985406801545879, iteration: 381095
loss: 1.0038537979125977,grad_norm: 0.7379554897996186, iteration: 381096
loss: 0.9833064675331116,grad_norm: 0.7697787224474673, iteration: 381097
loss: 0.9906550049781799,grad_norm: 0.7106944097882313, iteration: 381098
loss: 1.0013809204101562,grad_norm: 0.7719925044183985, iteration: 381099
loss: 1.0424103736877441,grad_norm: 0.9999990079561926, iteration: 381100
loss: 1.0254712104797363,grad_norm: 0.9013372523867955, iteration: 381101
loss: 1.0177193880081177,grad_norm: 0.6888043188292672, iteration: 381102
loss: 0.9987949132919312,grad_norm: 0.907271161211248, iteration: 381103
loss: 0.9917539954185486,grad_norm: 0.747439280146467, iteration: 381104
loss: 0.9884046316146851,grad_norm: 0.686083523704015, iteration: 381105
loss: 0.9983079433441162,grad_norm: 0.8476251354490827, iteration: 381106
loss: 0.9740982055664062,grad_norm: 0.8198450837105417, iteration: 381107
loss: 1.0325905084609985,grad_norm: 0.9632217655024701, iteration: 381108
loss: 1.1782407760620117,grad_norm: 0.8249929875863923, iteration: 381109
loss: 1.0234321355819702,grad_norm: 0.7774711471274649, iteration: 381110
loss: 1.047049641609192,grad_norm: 0.8801020016471536, iteration: 381111
loss: 1.062681794166565,grad_norm: 0.999999822261704, iteration: 381112
loss: 1.047497272491455,grad_norm: 0.9335653456977454, iteration: 381113
loss: 1.0032786130905151,grad_norm: 0.834335423776079, iteration: 381114
loss: 1.0637872219085693,grad_norm: 0.6831534923141828, iteration: 381115
loss: 0.9868912696838379,grad_norm: 0.6942136087207872, iteration: 381116
loss: 0.9856662750244141,grad_norm: 0.7761968577307842, iteration: 381117
loss: 0.9854080677032471,grad_norm: 0.866954287444566, iteration: 381118
loss: 1.0113483667373657,grad_norm: 0.9999991768147256, iteration: 381119
loss: 1.022497534751892,grad_norm: 0.8135509547724104, iteration: 381120
loss: 1.0176167488098145,grad_norm: 0.8008818395209494, iteration: 381121
loss: 1.0231945514678955,grad_norm: 0.9845607220804162, iteration: 381122
loss: 1.0858983993530273,grad_norm: 0.8166766840219123, iteration: 381123
loss: 1.0524808168411255,grad_norm: 0.8500011904475285, iteration: 381124
loss: 0.9829102158546448,grad_norm: 0.9024538326004856, iteration: 381125
loss: 1.1093826293945312,grad_norm: 0.9680250704479663, iteration: 381126
loss: 1.0016734600067139,grad_norm: 0.8567129560724663, iteration: 381127
loss: 1.062234878540039,grad_norm: 0.9999996798592721, iteration: 381128
loss: 1.0190421342849731,grad_norm: 0.795603645376049, iteration: 381129
loss: 1.0498126745224,grad_norm: 0.9999993065068444, iteration: 381130
loss: 1.0017309188842773,grad_norm: 0.7122392718903652, iteration: 381131
loss: 1.031729817390442,grad_norm: 0.9064588091044631, iteration: 381132
loss: 1.1130101680755615,grad_norm: 0.999999102569988, iteration: 381133
loss: 0.9634004831314087,grad_norm: 0.8192497808181526, iteration: 381134
loss: 1.0517600774765015,grad_norm: 0.9833414616521508, iteration: 381135
loss: 0.98549485206604,grad_norm: 0.8651168429623829, iteration: 381136
loss: 0.9500941038131714,grad_norm: 0.8109647502365963, iteration: 381137
loss: 0.9959952235221863,grad_norm: 0.7793596710182289, iteration: 381138
loss: 0.960816502571106,grad_norm: 0.9999992138023543, iteration: 381139
loss: 0.9700378775596619,grad_norm: 0.8676324269459409, iteration: 381140
loss: 1.0428133010864258,grad_norm: 0.8327707413405233, iteration: 381141
loss: 0.9892064332962036,grad_norm: 0.7021121701359702, iteration: 381142
loss: 0.9913009405136108,grad_norm: 0.7568418924022168, iteration: 381143
loss: 0.9595925211906433,grad_norm: 0.7965596772264242, iteration: 381144
loss: 1.1892967224121094,grad_norm: 0.9999999452305247, iteration: 381145
loss: 1.0574378967285156,grad_norm: 0.7164571441071137, iteration: 381146
loss: 1.026723027229309,grad_norm: 0.7801235295222434, iteration: 381147
loss: 1.0064903497695923,grad_norm: 0.8086566257772307, iteration: 381148
loss: 1.0200612545013428,grad_norm: 0.6873800222267897, iteration: 381149
loss: 1.0438487529754639,grad_norm: 0.7423037182120049, iteration: 381150
loss: 0.970086932182312,grad_norm: 0.9584555125991, iteration: 381151
loss: 1.006817102432251,grad_norm: 0.8858759191490935, iteration: 381152
loss: 0.9966338872909546,grad_norm: 1.0000000456991138, iteration: 381153
loss: 1.078284502029419,grad_norm: 0.9999997172918106, iteration: 381154
loss: 1.0056958198547363,grad_norm: 0.848963261421572, iteration: 381155
loss: 0.9881532192230225,grad_norm: 0.8911991082274796, iteration: 381156
loss: 0.9884825944900513,grad_norm: 0.7355453717021034, iteration: 381157
loss: 1.052297592163086,grad_norm: 0.9138452500824706, iteration: 381158
loss: 1.0103793144226074,grad_norm: 0.8415896018442023, iteration: 381159
loss: 1.0960595607757568,grad_norm: 0.8971362996918384, iteration: 381160
loss: 1.0401558876037598,grad_norm: 0.9999997823652832, iteration: 381161
loss: 1.0151325464248657,grad_norm: 0.7063960502808465, iteration: 381162
loss: 1.0278809070587158,grad_norm: 0.7145067433378492, iteration: 381163
loss: 1.0177701711654663,grad_norm: 0.8004089076948647, iteration: 381164
loss: 1.012976050376892,grad_norm: 0.9240248994452667, iteration: 381165
loss: 1.0018924474716187,grad_norm: 0.9999997741078082, iteration: 381166
loss: 1.0434247255325317,grad_norm: 0.9744787131270916, iteration: 381167
loss: 1.033279538154602,grad_norm: 0.7749546716493849, iteration: 381168
loss: 0.9997659921646118,grad_norm: 0.6944055456502664, iteration: 381169
loss: 0.9853115677833557,grad_norm: 0.8145363359833195, iteration: 381170
loss: 1.000837802886963,grad_norm: 0.7645179571132287, iteration: 381171
loss: 1.0139676332473755,grad_norm: 0.6624152500881197, iteration: 381172
loss: 1.0355857610702515,grad_norm: 0.8549827614564882, iteration: 381173
loss: 0.9813641905784607,grad_norm: 0.7734404237934408, iteration: 381174
loss: 0.977012574672699,grad_norm: 0.7083196524271372, iteration: 381175
loss: 1.0026353597640991,grad_norm: 0.8736959254692434, iteration: 381176
loss: 1.0169085264205933,grad_norm: 0.8437406201426046, iteration: 381177
loss: 1.0409291982650757,grad_norm: 0.9999990748819404, iteration: 381178
loss: 1.0274996757507324,grad_norm: 0.919563560658222, iteration: 381179
loss: 1.0653188228607178,grad_norm: 0.9999992057842438, iteration: 381180
loss: 0.9877261519432068,grad_norm: 0.7475773065645748, iteration: 381181
loss: 0.9898337721824646,grad_norm: 0.6452621656592482, iteration: 381182
loss: 0.9986134767532349,grad_norm: 0.8771515344316057, iteration: 381183
loss: 1.0589267015457153,grad_norm: 0.9999992680062976, iteration: 381184
loss: 1.0660916566848755,grad_norm: 0.8590771665554917, iteration: 381185
loss: 1.0073634386062622,grad_norm: 0.8965451432129367, iteration: 381186
loss: 0.9932601451873779,grad_norm: 0.9999993675170176, iteration: 381187
loss: 1.003208875656128,grad_norm: 0.9999993659300572, iteration: 381188
loss: 0.9973995685577393,grad_norm: 0.8415414900721747, iteration: 381189
loss: 0.9850366115570068,grad_norm: 0.7308327322247656, iteration: 381190
loss: 1.0200905799865723,grad_norm: 0.8009578346369901, iteration: 381191
loss: 1.0160282850265503,grad_norm: 0.7949572930141892, iteration: 381192
loss: 1.0079665184020996,grad_norm: 0.9847823921920402, iteration: 381193
loss: 1.0125494003295898,grad_norm: 0.8937188294992522, iteration: 381194
loss: 1.0510591268539429,grad_norm: 0.9246926084606562, iteration: 381195
loss: 0.9967782497406006,grad_norm: 0.7586032669563968, iteration: 381196
loss: 1.1379636526107788,grad_norm: 0.800055171403319, iteration: 381197
loss: 0.9773693084716797,grad_norm: 0.6584638164006311, iteration: 381198
loss: 1.0972784757614136,grad_norm: 0.9050846288388362, iteration: 381199
loss: 1.0818936824798584,grad_norm: 0.9303257779129643, iteration: 381200
loss: 0.9804155230522156,grad_norm: 0.7014720528673603, iteration: 381201
loss: 0.988703727722168,grad_norm: 0.7591485355692923, iteration: 381202
loss: 1.0849391222000122,grad_norm: 0.9999997743687009, iteration: 381203
loss: 1.0396965742111206,grad_norm: 0.9999991191070718, iteration: 381204
loss: 1.0306508541107178,grad_norm: 0.7552673327649954, iteration: 381205
loss: 1.0101888179779053,grad_norm: 0.8957087759139708, iteration: 381206
loss: 1.0047610998153687,grad_norm: 0.9032347951653694, iteration: 381207
loss: 0.970212459564209,grad_norm: 0.9999997332267897, iteration: 381208
loss: 1.0143622159957886,grad_norm: 0.842876500534489, iteration: 381209
loss: 1.000251293182373,grad_norm: 0.8830393517725859, iteration: 381210
loss: 1.1477644443511963,grad_norm: 0.8279685670363001, iteration: 381211
loss: 1.0358891487121582,grad_norm: 0.7634555792543004, iteration: 381212
loss: 0.9898056983947754,grad_norm: 0.9999999451830256, iteration: 381213
loss: 0.9772670865058899,grad_norm: 0.9999994924542468, iteration: 381214
loss: 0.9963080286979675,grad_norm: 0.7440227569820717, iteration: 381215
loss: 0.9513809680938721,grad_norm: 0.7226105527716917, iteration: 381216
loss: 1.2599921226501465,grad_norm: 0.9999997997082315, iteration: 381217
loss: 1.0079057216644287,grad_norm: 0.6835899099742234, iteration: 381218
loss: 1.0144256353378296,grad_norm: 0.7694468375235775, iteration: 381219
loss: 1.0124138593673706,grad_norm: 0.9560474949814097, iteration: 381220
loss: 0.9958215355873108,grad_norm: 0.7254485898098575, iteration: 381221
loss: 0.9864281415939331,grad_norm: 0.8279217922530212, iteration: 381222
loss: 0.9816880226135254,grad_norm: 0.7691419881777312, iteration: 381223
loss: 1.1591761112213135,grad_norm: 0.999999077556797, iteration: 381224
loss: 1.0096005201339722,grad_norm: 0.739819477589227, iteration: 381225
loss: 0.964121401309967,grad_norm: 0.7498327420400115, iteration: 381226
loss: 0.9842239618301392,grad_norm: 0.8550494022055182, iteration: 381227
loss: 1.15534508228302,grad_norm: 0.926634651922166, iteration: 381228
loss: 1.0818880796432495,grad_norm: 0.9371503145147672, iteration: 381229
loss: 0.9611296653747559,grad_norm: 0.695058923967077, iteration: 381230
loss: 1.016241431236267,grad_norm: 0.745894820765981, iteration: 381231
loss: 1.0964075326919556,grad_norm: 0.9731351065674999, iteration: 381232
loss: 0.9744512438774109,grad_norm: 0.8002124551465789, iteration: 381233
loss: 1.0075230598449707,grad_norm: 0.7627556750798916, iteration: 381234
loss: 1.0318262577056885,grad_norm: 0.8820308967717826, iteration: 381235
loss: 1.0285147428512573,grad_norm: 0.9999993516144069, iteration: 381236
loss: 0.9947181940078735,grad_norm: 0.8107290410219892, iteration: 381237
loss: 0.9839898943901062,grad_norm: 0.7726627831234598, iteration: 381238
loss: 0.9872663617134094,grad_norm: 0.7117880233382009, iteration: 381239
loss: 0.9835568070411682,grad_norm: 0.8669848169384177, iteration: 381240
loss: 0.970048189163208,grad_norm: 0.8669596895122, iteration: 381241
loss: 1.013990044593811,grad_norm: 0.9850186802215825, iteration: 381242
loss: 1.053083062171936,grad_norm: 0.8872970926559484, iteration: 381243
loss: 1.017795205116272,grad_norm: 0.740081664496943, iteration: 381244
loss: 0.9970934987068176,grad_norm: 0.8025947767667625, iteration: 381245
loss: 0.9735047221183777,grad_norm: 0.7000301538592065, iteration: 381246
loss: 1.0112735033035278,grad_norm: 0.7815105284391365, iteration: 381247
loss: 1.0224881172180176,grad_norm: 0.6646854427341672, iteration: 381248
loss: 1.0236276388168335,grad_norm: 0.8548409800009891, iteration: 381249
loss: 0.9713947772979736,grad_norm: 0.9999991591292203, iteration: 381250
loss: 0.9623442888259888,grad_norm: 0.8159761906391493, iteration: 381251
loss: 1.0223851203918457,grad_norm: 0.7378647628825686, iteration: 381252
loss: 1.0124750137329102,grad_norm: 0.9999997653350691, iteration: 381253
loss: 0.969725489616394,grad_norm: 0.8834553236284326, iteration: 381254
loss: 0.9968734979629517,grad_norm: 0.9919879151398134, iteration: 381255
loss: 1.0033553838729858,grad_norm: 0.8467494873479694, iteration: 381256
loss: 0.9754852652549744,grad_norm: 0.7266743894944735, iteration: 381257
loss: 0.9834194779396057,grad_norm: 0.7567758706092457, iteration: 381258
loss: 0.9805570244789124,grad_norm: 0.7286234867663037, iteration: 381259
loss: 0.9900560975074768,grad_norm: 0.8822080716329108, iteration: 381260
loss: 1.0120333433151245,grad_norm: 0.8005717245280938, iteration: 381261
loss: 0.9639934301376343,grad_norm: 0.6092967870721676, iteration: 381262
loss: 1.1800388097763062,grad_norm: 0.9999992057466959, iteration: 381263
loss: 0.9943935871124268,grad_norm: 0.74632793272427, iteration: 381264
loss: 1.007454752922058,grad_norm: 0.9214361325065195, iteration: 381265
loss: 1.0669118165969849,grad_norm: 0.9999999793419407, iteration: 381266
loss: 1.0476456880569458,grad_norm: 0.8104973767032191, iteration: 381267
loss: 1.0278198719024658,grad_norm: 0.8171964643583535, iteration: 381268
loss: 1.0763906240463257,grad_norm: 0.999999183286593, iteration: 381269
loss: 0.9716101884841919,grad_norm: 0.7222317530533632, iteration: 381270
loss: 0.9497187733650208,grad_norm: 0.9999994496107267, iteration: 381271
loss: 0.997088611125946,grad_norm: 0.8371877077646512, iteration: 381272
loss: 1.0241986513137817,grad_norm: 0.9189085792469085, iteration: 381273
loss: 0.9963195323944092,grad_norm: 0.8093753239201175, iteration: 381274
loss: 0.9618144631385803,grad_norm: 0.7618795106598334, iteration: 381275
loss: 1.038144826889038,grad_norm: 0.860259061933788, iteration: 381276
loss: 1.0063318014144897,grad_norm: 0.7062052790979356, iteration: 381277
loss: 1.0078020095825195,grad_norm: 0.7467562989636283, iteration: 381278
loss: 0.9927494525909424,grad_norm: 0.7471033992661017, iteration: 381279
loss: 1.0401581525802612,grad_norm: 0.9999998990566951, iteration: 381280
loss: 0.9770590662956238,grad_norm: 0.9999999938237208, iteration: 381281
loss: 0.985844612121582,grad_norm: 0.7776007399754542, iteration: 381282
loss: 1.0155606269836426,grad_norm: 0.9061228609954655, iteration: 381283
loss: 1.0135911703109741,grad_norm: 0.7942383778872526, iteration: 381284
loss: 0.979866623878479,grad_norm: 0.8547353667109755, iteration: 381285
loss: 1.0229963064193726,grad_norm: 0.8067980100021472, iteration: 381286
loss: 1.022456407546997,grad_norm: 0.8172535715104715, iteration: 381287
loss: 1.0294795036315918,grad_norm: 0.9999991033216905, iteration: 381288
loss: 0.9692259430885315,grad_norm: 0.7107992422675267, iteration: 381289
loss: 0.9751558899879456,grad_norm: 0.8215655818344656, iteration: 381290
loss: 1.0132039785385132,grad_norm: 0.8390435067687876, iteration: 381291
loss: 0.9981452226638794,grad_norm: 0.7941972339693429, iteration: 381292
loss: 0.9962406158447266,grad_norm: 0.8226915187776005, iteration: 381293
loss: 0.9641289114952087,grad_norm: 0.6918670709008937, iteration: 381294
loss: 0.9820802211761475,grad_norm: 0.8516991143216964, iteration: 381295
loss: 1.0366601943969727,grad_norm: 0.9999996722586288, iteration: 381296
loss: 1.0070288181304932,grad_norm: 0.7253291693384734, iteration: 381297
loss: 0.995930552482605,grad_norm: 0.9999993733263874, iteration: 381298
loss: 0.9809499382972717,grad_norm: 0.7377463530696659, iteration: 381299
loss: 1.0046982765197754,grad_norm: 0.8227367721823806, iteration: 381300
loss: 1.0373733043670654,grad_norm: 0.7175719431619757, iteration: 381301
loss: 1.0370358228683472,grad_norm: 0.8416394096889017, iteration: 381302
loss: 0.9799962639808655,grad_norm: 0.7395919813765298, iteration: 381303
loss: 0.9810786843299866,grad_norm: 0.821460081381221, iteration: 381304
loss: 1.0142216682434082,grad_norm: 0.8901433737571739, iteration: 381305
loss: 0.9790767431259155,grad_norm: 0.7484249652426838, iteration: 381306
loss: 0.9885775446891785,grad_norm: 0.7727311200211038, iteration: 381307
loss: 0.9585267305374146,grad_norm: 0.9659560069082146, iteration: 381308
loss: 1.0284470319747925,grad_norm: 0.8204244149913913, iteration: 381309
loss: 0.9225595593452454,grad_norm: 0.9999993525383115, iteration: 381310
loss: 0.9954864978790283,grad_norm: 0.9248399584757893, iteration: 381311
loss: 1.0189061164855957,grad_norm: 0.9999992104163656, iteration: 381312
loss: 0.999293327331543,grad_norm: 0.8835744568071602, iteration: 381313
loss: 1.007944941520691,grad_norm: 0.7976983457783001, iteration: 381314
loss: 1.122469425201416,grad_norm: 0.9999993925924536, iteration: 381315
loss: 1.0281189680099487,grad_norm: 0.8774611802486214, iteration: 381316
loss: 1.0313289165496826,grad_norm: 0.8131076981343387, iteration: 381317
loss: 1.0129166841506958,grad_norm: 0.6842016842513916, iteration: 381318
loss: 0.9983567595481873,grad_norm: 0.9999990300759547, iteration: 381319
loss: 0.9952607154846191,grad_norm: 0.8424251053787714, iteration: 381320
loss: 0.978272557258606,grad_norm: 0.7980980770736069, iteration: 381321
loss: 0.9977648854255676,grad_norm: 0.8119392053271104, iteration: 381322
loss: 0.9828429818153381,grad_norm: 0.8112898444737198, iteration: 381323
loss: 1.0088433027267456,grad_norm: 0.8583146605719567, iteration: 381324
loss: 0.9745689034461975,grad_norm: 0.7580111782520179, iteration: 381325
loss: 0.9935041069984436,grad_norm: 0.7574260861988324, iteration: 381326
loss: 1.0321162939071655,grad_norm: 0.778427352074222, iteration: 381327
loss: 1.007917046546936,grad_norm: 0.7147659646054993, iteration: 381328
loss: 0.9804607629776001,grad_norm: 0.8792810647716187, iteration: 381329
loss: 0.9710696935653687,grad_norm: 0.7940353956197429, iteration: 381330
loss: 1.0450811386108398,grad_norm: 0.7529493841794295, iteration: 381331
loss: 0.9760679602622986,grad_norm: 0.8733155850852684, iteration: 381332
loss: 0.9986142516136169,grad_norm: 0.8083501159570442, iteration: 381333
loss: 1.0066033601760864,grad_norm: 0.6092607432492337, iteration: 381334
loss: 0.9850097894668579,grad_norm: 0.6269903324098752, iteration: 381335
loss: 0.9844008088111877,grad_norm: 0.8394236540272071, iteration: 381336
loss: 1.0243035554885864,grad_norm: 0.9563567246770821, iteration: 381337
loss: 1.0105249881744385,grad_norm: 0.9999999611750056, iteration: 381338
loss: 1.0354496240615845,grad_norm: 0.7447387923959735, iteration: 381339
loss: 1.0197997093200684,grad_norm: 0.8771721505961595, iteration: 381340
loss: 0.993288516998291,grad_norm: 0.6222455631409615, iteration: 381341
loss: 0.992739200592041,grad_norm: 0.9999997142360821, iteration: 381342
loss: 1.001160979270935,grad_norm: 0.8210873168949643, iteration: 381343
loss: 1.0317381620407104,grad_norm: 0.7533611337566379, iteration: 381344
loss: 1.0797308683395386,grad_norm: 0.9999990561821932, iteration: 381345
loss: 0.9704532027244568,grad_norm: 0.859577620871925, iteration: 381346
loss: 1.0128082036972046,grad_norm: 1.0000000473910862, iteration: 381347
loss: 1.0407272577285767,grad_norm: 0.7302299966159578, iteration: 381348
loss: 0.9294809699058533,grad_norm: 0.8037899688665978, iteration: 381349
loss: 1.0245503187179565,grad_norm: 0.8077057416669481, iteration: 381350
loss: 0.9904709458351135,grad_norm: 0.8833950572549844, iteration: 381351
loss: 0.9910260438919067,grad_norm: 0.7724347804520018, iteration: 381352
loss: 1.0315639972686768,grad_norm: 0.821161286846743, iteration: 381353
loss: 1.04329252243042,grad_norm: 0.7929590133218116, iteration: 381354
loss: 1.0124692916870117,grad_norm: 0.7571872103421544, iteration: 381355
loss: 0.980824887752533,grad_norm: 0.9999989757879025, iteration: 381356
loss: 0.9519187211990356,grad_norm: 0.7666208996591519, iteration: 381357
loss: 1.0145785808563232,grad_norm: 0.6545684694032311, iteration: 381358
loss: 0.9947132468223572,grad_norm: 0.7532633417464839, iteration: 381359
loss: 1.010926365852356,grad_norm: 0.9999995101054584, iteration: 381360
loss: 0.9827645421028137,grad_norm: 0.9999992186556854, iteration: 381361
loss: 0.9956483840942383,grad_norm: 0.8281976295415497, iteration: 381362
loss: 1.026293158531189,grad_norm: 0.8193261779022992, iteration: 381363
loss: 0.9836039543151855,grad_norm: 0.8929416159284369, iteration: 381364
loss: 1.0159558057785034,grad_norm: 0.8884878930240032, iteration: 381365
loss: 1.024140477180481,grad_norm: 0.7241833802618795, iteration: 381366
loss: 1.0215041637420654,grad_norm: 0.7951742784287301, iteration: 381367
loss: 1.0431185960769653,grad_norm: 0.9999995967544911, iteration: 381368
loss: 0.9997396469116211,grad_norm: 0.9625466099194943, iteration: 381369
loss: 0.9816595315933228,grad_norm: 0.7717113420046274, iteration: 381370
loss: 1.029529333114624,grad_norm: 0.8465206344170131, iteration: 381371
loss: 1.0514819622039795,grad_norm: 0.9999999804891212, iteration: 381372
loss: 1.028417706489563,grad_norm: 0.7631803874089473, iteration: 381373
loss: 0.9866309762001038,grad_norm: 0.7605498146573031, iteration: 381374
loss: 1.0169198513031006,grad_norm: 0.761957976211027, iteration: 381375
loss: 0.9756573438644409,grad_norm: 0.7601825834147574, iteration: 381376
loss: 0.9785739779472351,grad_norm: 0.7591452758060876, iteration: 381377
loss: 0.9482679963111877,grad_norm: 0.9244093947615365, iteration: 381378
loss: 1.0206894874572754,grad_norm: 0.770916471452935, iteration: 381379
loss: 1.0147507190704346,grad_norm: 0.8388237973663449, iteration: 381380
loss: 1.008713722229004,grad_norm: 0.9979246954256642, iteration: 381381
loss: 0.9975550174713135,grad_norm: 0.7786537284363485, iteration: 381382
loss: 0.9838409423828125,grad_norm: 0.711628959272827, iteration: 381383
loss: 1.0528773069381714,grad_norm: 0.9783971467427625, iteration: 381384
loss: 1.007362961769104,grad_norm: 0.9551603940311103, iteration: 381385
loss: 0.9796269536018372,grad_norm: 0.7834694995331628, iteration: 381386
loss: 1.0854723453521729,grad_norm: 0.9999996187656912, iteration: 381387
loss: 1.0305112600326538,grad_norm: 0.844504453522819, iteration: 381388
loss: 0.9765776991844177,grad_norm: 0.7131781832675375, iteration: 381389
loss: 0.9803325533866882,grad_norm: 0.6744730103002736, iteration: 381390
loss: 1.04323148727417,grad_norm: 0.7199340296622737, iteration: 381391
loss: 0.9747459292411804,grad_norm: 0.8780245429059834, iteration: 381392
loss: 0.9717285633087158,grad_norm: 0.7323238042913536, iteration: 381393
loss: 0.9708119630813599,grad_norm: 0.840075511123644, iteration: 381394
loss: 0.9896852374076843,grad_norm: 0.8159497686678405, iteration: 381395
loss: 1.1190471649169922,grad_norm: 0.9999996559974834, iteration: 381396
loss: 0.9630998373031616,grad_norm: 0.6660564791695178, iteration: 381397
loss: 1.0130293369293213,grad_norm: 0.8230548265629805, iteration: 381398
loss: 0.971596360206604,grad_norm: 0.9293762022833935, iteration: 381399
loss: 1.0116053819656372,grad_norm: 0.6453847938070982, iteration: 381400
loss: 1.006108045578003,grad_norm: 0.8409478907902177, iteration: 381401
loss: 1.135333776473999,grad_norm: 0.8812110542862188, iteration: 381402
loss: 0.9681323766708374,grad_norm: 0.7302775656761957, iteration: 381403
loss: 0.966509222984314,grad_norm: 0.9154399893640679, iteration: 381404
loss: 0.9732939600944519,grad_norm: 0.7527227304779932, iteration: 381405
loss: 0.9925978183746338,grad_norm: 0.8431368597222888, iteration: 381406
loss: 1.0087884664535522,grad_norm: 0.6412967354855136, iteration: 381407
loss: 0.9470982551574707,grad_norm: 0.75147374297132, iteration: 381408
loss: 0.986865222454071,grad_norm: 0.8517469342928266, iteration: 381409
loss: 0.9787722229957581,grad_norm: 0.6598369775680429, iteration: 381410
loss: 1.0339683294296265,grad_norm: 0.9999999726231774, iteration: 381411
loss: 0.9554716348648071,grad_norm: 0.7165156742001615, iteration: 381412
loss: 0.9834855198860168,grad_norm: 0.888763920067194, iteration: 381413
loss: 1.0113734006881714,grad_norm: 0.8811590196080228, iteration: 381414
loss: 0.9917190670967102,grad_norm: 0.8704919149123912, iteration: 381415
loss: 1.0146619081497192,grad_norm: 0.7218792586549219, iteration: 381416
loss: 1.0040202140808105,grad_norm: 0.808783587493018, iteration: 381417
loss: 1.0818309783935547,grad_norm: 0.9999998976419381, iteration: 381418
loss: 0.970606803894043,grad_norm: 0.7613701517048201, iteration: 381419
loss: 0.9898817539215088,grad_norm: 0.6794467918709555, iteration: 381420
loss: 1.0044037103652954,grad_norm: 0.6248293281139788, iteration: 381421
loss: 0.9707041382789612,grad_norm: 0.8911518413987135, iteration: 381422
loss: 0.9954483509063721,grad_norm: 0.8912146514142525, iteration: 381423
loss: 1.0123722553253174,grad_norm: 0.7551451267444768, iteration: 381424
loss: 1.04537034034729,grad_norm: 0.9999995644559656, iteration: 381425
loss: 1.017227053642273,grad_norm: 0.8141677330469193, iteration: 381426
loss: 1.0243501663208008,grad_norm: 0.815004471483523, iteration: 381427
loss: 1.004724383354187,grad_norm: 0.8290773214096985, iteration: 381428
loss: 1.0000351667404175,grad_norm: 0.8903112123687166, iteration: 381429
loss: 1.0323677062988281,grad_norm: 0.999999064048089, iteration: 381430
loss: 1.0119967460632324,grad_norm: 0.7786494805491089, iteration: 381431
loss: 0.9813231825828552,grad_norm: 0.6825478939767599, iteration: 381432
loss: 0.9981591701507568,grad_norm: 0.9095835486861641, iteration: 381433
loss: 0.9980009198188782,grad_norm: 0.764652445666412, iteration: 381434
loss: 0.983487069606781,grad_norm: 0.8937349082670833, iteration: 381435
loss: 1.0062756538391113,grad_norm: 0.7247891836372453, iteration: 381436
loss: 1.0085511207580566,grad_norm: 0.7159191110124096, iteration: 381437
loss: 1.0168901681900024,grad_norm: 0.7796190447434244, iteration: 381438
loss: 1.0368329286575317,grad_norm: 0.9999989630763444, iteration: 381439
loss: 1.0015519857406616,grad_norm: 0.8368992828748374, iteration: 381440
loss: 0.992860734462738,grad_norm: 0.882006240596032, iteration: 381441
loss: 1.0373425483703613,grad_norm: 0.8030788368609038, iteration: 381442
loss: 1.0013647079467773,grad_norm: 0.749625469774488, iteration: 381443
loss: 0.9869736433029175,grad_norm: 0.7040165153793483, iteration: 381444
loss: 1.1161537170410156,grad_norm: 0.9785707277168042, iteration: 381445
loss: 1.0544157028198242,grad_norm: 0.999999073643542, iteration: 381446
loss: 0.9788134098052979,grad_norm: 0.7787650314801644, iteration: 381447
loss: 0.9782782196998596,grad_norm: 0.7514969554502254, iteration: 381448
loss: 1.0006952285766602,grad_norm: 0.8111246599309879, iteration: 381449
loss: 1.0048754215240479,grad_norm: 0.8862029460085398, iteration: 381450
loss: 1.0386962890625,grad_norm: 0.7599816166791549, iteration: 381451
loss: 1.0366078615188599,grad_norm: 0.8265848869793073, iteration: 381452
loss: 0.9831991195678711,grad_norm: 0.7698325348463818, iteration: 381453
loss: 1.0142251253128052,grad_norm: 0.9139627277419605, iteration: 381454
loss: 1.024316668510437,grad_norm: 0.999999340140735, iteration: 381455
loss: 0.999334990978241,grad_norm: 0.7191064540815972, iteration: 381456
loss: 1.0021874904632568,grad_norm: 0.8814003134829569, iteration: 381457
loss: 0.9676761031150818,grad_norm: 0.8050763001268315, iteration: 381458
loss: 1.0229233503341675,grad_norm: 0.7246434179984836, iteration: 381459
loss: 1.0156439542770386,grad_norm: 0.8199603702409418, iteration: 381460
loss: 0.9878867268562317,grad_norm: 0.659462229379699, iteration: 381461
loss: 0.9573456645011902,grad_norm: 0.7150189794216304, iteration: 381462
loss: 0.9684380888938904,grad_norm: 0.9006851077486826, iteration: 381463
loss: 0.9965489506721497,grad_norm: 0.7799923276012136, iteration: 381464
loss: 0.9958146214485168,grad_norm: 0.8313574619778823, iteration: 381465
loss: 1.0512434244155884,grad_norm: 0.999999671720377, iteration: 381466
loss: 0.953342616558075,grad_norm: 0.7965383008519207, iteration: 381467
loss: 1.0176489353179932,grad_norm: 0.7646245734224945, iteration: 381468
loss: 0.9979414939880371,grad_norm: 0.7887974530765398, iteration: 381469
loss: 0.9717903733253479,grad_norm: 0.7707935666167098, iteration: 381470
loss: 0.9926087856292725,grad_norm: 0.7192161871792605, iteration: 381471
loss: 1.0503274202346802,grad_norm: 0.776126885768118, iteration: 381472
loss: 1.0089579820632935,grad_norm: 0.7293725980832995, iteration: 381473
loss: 0.9847066402435303,grad_norm: 0.7579397555258672, iteration: 381474
loss: 1.0126469135284424,grad_norm: 0.7746727575195089, iteration: 381475
loss: 1.0271886587142944,grad_norm: 0.6862557630682387, iteration: 381476
loss: 1.0344210863113403,grad_norm: 0.8424992843518138, iteration: 381477
loss: 0.9994193315505981,grad_norm: 0.6792388597096836, iteration: 381478
loss: 0.9808914065361023,grad_norm: 0.8656082075245011, iteration: 381479
loss: 1.0107132196426392,grad_norm: 0.666126098835424, iteration: 381480
loss: 1.0311346054077148,grad_norm: 0.7057387357228406, iteration: 381481
loss: 1.023134708404541,grad_norm: 0.6968660174422031, iteration: 381482
loss: 1.0206881761550903,grad_norm: 0.8435546911743604, iteration: 381483
loss: 1.0253715515136719,grad_norm: 0.9999997874329956, iteration: 381484
loss: 1.0818212032318115,grad_norm: 0.9940219802680912, iteration: 381485
loss: 1.0226633548736572,grad_norm: 0.7283097323952117, iteration: 381486
loss: 0.9902539849281311,grad_norm: 0.7867438459306401, iteration: 381487
loss: 1.0006102323532104,grad_norm: 0.8344793275142756, iteration: 381488
loss: 0.9633137583732605,grad_norm: 0.7997588700732456, iteration: 381489
loss: 0.979836642742157,grad_norm: 0.7751537935073771, iteration: 381490
loss: 0.9989631772041321,grad_norm: 0.8348066879345799, iteration: 381491
loss: 1.0409190654754639,grad_norm: 0.7661553076864765, iteration: 381492
loss: 0.9766108393669128,grad_norm: 0.7461863062226632, iteration: 381493
loss: 1.0634127855300903,grad_norm: 0.8178551041590625, iteration: 381494
loss: 0.9968695044517517,grad_norm: 0.7893290690789015, iteration: 381495
loss: 0.982639729976654,grad_norm: 0.7488619968933458, iteration: 381496
loss: 0.9647327065467834,grad_norm: 0.7043890779356934, iteration: 381497
loss: 1.0358375310897827,grad_norm: 0.9999991762779152, iteration: 381498
loss: 0.9744319319725037,grad_norm: 0.8584558307223091, iteration: 381499
loss: 1.1443015336990356,grad_norm: 0.9999991185683824, iteration: 381500
loss: 0.9894298315048218,grad_norm: 0.7540961900311167, iteration: 381501
loss: 0.9678353071212769,grad_norm: 0.8339589750208907, iteration: 381502
loss: 0.9753062129020691,grad_norm: 0.7337838556578196, iteration: 381503
loss: 0.9914306402206421,grad_norm: 0.8713242878577462, iteration: 381504
loss: 0.9867103695869446,grad_norm: 0.6599540235471009, iteration: 381505
loss: 1.0474241971969604,grad_norm: 0.9999997925511699, iteration: 381506
loss: 0.9827824831008911,grad_norm: 0.9823296475368476, iteration: 381507
loss: 1.0690741539001465,grad_norm: 0.7264724134113345, iteration: 381508
loss: 0.9621648788452148,grad_norm: 0.8255095933185301, iteration: 381509
loss: 1.020243763923645,grad_norm: 0.7448447164462447, iteration: 381510
loss: 0.9837278723716736,grad_norm: 0.7849023565273722, iteration: 381511
loss: 0.9807685613632202,grad_norm: 0.9999990787044659, iteration: 381512
loss: 0.9683583378791809,grad_norm: 0.8323652688988227, iteration: 381513
loss: 0.9800594449043274,grad_norm: 0.7575554354935468, iteration: 381514
loss: 1.047795295715332,grad_norm: 0.7584885056551812, iteration: 381515
loss: 1.007977843284607,grad_norm: 0.7683666358118266, iteration: 381516
loss: 1.0034526586532593,grad_norm: 0.8445619704665321, iteration: 381517
loss: 1.0112833976745605,grad_norm: 0.6867064333813035, iteration: 381518
loss: 1.000314712524414,grad_norm: 0.7251920199046625, iteration: 381519
loss: 1.1769236326217651,grad_norm: 0.9999999525703521, iteration: 381520
loss: 1.076791524887085,grad_norm: 0.9999998851555792, iteration: 381521
loss: 1.0312587022781372,grad_norm: 0.7600779427494777, iteration: 381522
loss: 1.035306692123413,grad_norm: 0.8151304952916995, iteration: 381523
loss: 1.0121173858642578,grad_norm: 0.767646920311221, iteration: 381524
loss: 1.0473893880844116,grad_norm: 0.7019734212974532, iteration: 381525
loss: 0.9236897230148315,grad_norm: 0.7588878376864604, iteration: 381526
loss: 1.0713073015213013,grad_norm: 0.7832260192653742, iteration: 381527
loss: 1.0168964862823486,grad_norm: 0.7952372647675412, iteration: 381528
loss: 1.0308680534362793,grad_norm: 0.9999992115150749, iteration: 381529
loss: 0.9896394610404968,grad_norm: 0.7145743437258399, iteration: 381530
loss: 1.0659838914871216,grad_norm: 0.7852309844388413, iteration: 381531
loss: 1.0319770574569702,grad_norm: 0.773937627256184, iteration: 381532
loss: 0.9998489022254944,grad_norm: 0.7957898278540242, iteration: 381533
loss: 1.0236058235168457,grad_norm: 0.8998451843056575, iteration: 381534
loss: 1.0117108821868896,grad_norm: 0.9999998127522958, iteration: 381535
loss: 1.0439667701721191,grad_norm: 0.9999991541686536, iteration: 381536
loss: 1.148231029510498,grad_norm: 0.9999994637315202, iteration: 381537
loss: 1.0067908763885498,grad_norm: 0.6589939916687966, iteration: 381538
loss: 1.0115387439727783,grad_norm: 0.8194160165435314, iteration: 381539
loss: 0.9877075552940369,grad_norm: 0.9463750761107904, iteration: 381540
loss: 1.05276620388031,grad_norm: 0.9999991877824563, iteration: 381541
loss: 0.9834108948707581,grad_norm: 0.8063862206443113, iteration: 381542
loss: 1.0189601182937622,grad_norm: 0.7122659301528036, iteration: 381543
loss: 0.9868301153182983,grad_norm: 0.9014191761954857, iteration: 381544
loss: 1.228864073753357,grad_norm: 0.9999993756145635, iteration: 381545
loss: 1.0231750011444092,grad_norm: 0.8432109674025919, iteration: 381546
loss: 1.1211018562316895,grad_norm: 0.9999993826263721, iteration: 381547
loss: 0.9886690378189087,grad_norm: 0.7546817110162954, iteration: 381548
loss: 1.0984618663787842,grad_norm: 0.9999994676918065, iteration: 381549
loss: 1.0185439586639404,grad_norm: 0.999999119198425, iteration: 381550
loss: 0.9672968983650208,grad_norm: 0.693025625583656, iteration: 381551
loss: 0.9837015271186829,grad_norm: 0.8823896679504242, iteration: 381552
loss: 1.0010523796081543,grad_norm: 0.7528470815685441, iteration: 381553
loss: 1.0219324827194214,grad_norm: 0.8655897479216698, iteration: 381554
loss: 1.0532264709472656,grad_norm: 0.9999991792482043, iteration: 381555
loss: 1.0114067792892456,grad_norm: 0.7413233436696252, iteration: 381556
loss: 1.0549156665802002,grad_norm: 0.9999995337439072, iteration: 381557
loss: 0.9482343792915344,grad_norm: 0.7078517348757365, iteration: 381558
loss: 1.0045188665390015,grad_norm: 0.8883662562707391, iteration: 381559
loss: 0.9664863348007202,grad_norm: 0.9050939767286417, iteration: 381560
loss: 1.0414034128189087,grad_norm: 0.9999992566524968, iteration: 381561
loss: 1.0493378639221191,grad_norm: 0.8199749514637605, iteration: 381562
loss: 1.0491443872451782,grad_norm: 0.9999992977537251, iteration: 381563
loss: 1.1689428091049194,grad_norm: 0.9999995817750926, iteration: 381564
loss: 0.9870319366455078,grad_norm: 0.8046533409040642, iteration: 381565
loss: 0.9923250079154968,grad_norm: 0.6331024952003962, iteration: 381566
loss: 1.0408934354782104,grad_norm: 0.9080649367680046, iteration: 381567
loss: 1.070412516593933,grad_norm: 0.9999997997749615, iteration: 381568
loss: 1.0119575262069702,grad_norm: 0.7953630817462398, iteration: 381569
loss: 0.9626433253288269,grad_norm: 0.6513117283443846, iteration: 381570
loss: 0.9791686534881592,grad_norm: 0.7926441245410059, iteration: 381571
loss: 1.004746437072754,grad_norm: 0.7632735716635041, iteration: 381572
loss: 1.00877845287323,grad_norm: 0.7981393888518111, iteration: 381573
loss: 0.9812673330307007,grad_norm: 0.7793152186150676, iteration: 381574
loss: 1.0099431276321411,grad_norm: 0.7887633680144452, iteration: 381575
loss: 1.027721881866455,grad_norm: 0.6520974029230178, iteration: 381576
loss: 1.0956357717514038,grad_norm: 0.9999994009867259, iteration: 381577
loss: 0.9917622804641724,grad_norm: 0.7128914098913552, iteration: 381578
loss: 1.0037025213241577,grad_norm: 0.7637306766550225, iteration: 381579
loss: 1.0210576057434082,grad_norm: 0.7073750361436215, iteration: 381580
loss: 1.0361626148223877,grad_norm: 0.8295784422221613, iteration: 381581
loss: 1.090015172958374,grad_norm: 0.9731615778981839, iteration: 381582
loss: 1.0067522525787354,grad_norm: 0.7859178825264558, iteration: 381583
loss: 1.0170999765396118,grad_norm: 0.7988658463768915, iteration: 381584
loss: 1.0958033800125122,grad_norm: 0.9999990369070063, iteration: 381585
loss: 0.9834582209587097,grad_norm: 0.6530320414503747, iteration: 381586
loss: 0.9961602687835693,grad_norm: 0.9841315352213605, iteration: 381587
loss: 0.994289755821228,grad_norm: 0.8061940629511181, iteration: 381588
loss: 1.011204481124878,grad_norm: 0.8343491949660373, iteration: 381589
loss: 0.9472655057907104,grad_norm: 0.7507353821293727, iteration: 381590
loss: 1.0204222202301025,grad_norm: 0.9115478380558877, iteration: 381591
loss: 0.9982739686965942,grad_norm: 0.7010360113878104, iteration: 381592
loss: 1.0170979499816895,grad_norm: 0.7986261511110936, iteration: 381593
loss: 1.015517234802246,grad_norm: 0.9999996437360777, iteration: 381594
loss: 1.0157610177993774,grad_norm: 0.8065165557864624, iteration: 381595
loss: 0.9908837080001831,grad_norm: 0.8514805532741588, iteration: 381596
loss: 1.061944842338562,grad_norm: 0.9999997581786247, iteration: 381597
loss: 1.056214451789856,grad_norm: 0.9999992850897906, iteration: 381598
loss: 0.9829450249671936,grad_norm: 0.8645868253082791, iteration: 381599
loss: 1.0010781288146973,grad_norm: 0.7835312267875393, iteration: 381600
loss: 1.0090360641479492,grad_norm: 0.6578634773723043, iteration: 381601
loss: 1.0056703090667725,grad_norm: 0.7845797660066484, iteration: 381602
loss: 0.9909175038337708,grad_norm: 0.7860337929351439, iteration: 381603
loss: 1.0120030641555786,grad_norm: 0.7691125724453318, iteration: 381604
loss: 1.0392206907272339,grad_norm: 0.7397219558563306, iteration: 381605
loss: 1.0162458419799805,grad_norm: 0.7963603346855906, iteration: 381606
loss: 1.0039621591567993,grad_norm: 0.7387884280408578, iteration: 381607
loss: 1.0103387832641602,grad_norm: 0.6956358096134119, iteration: 381608
loss: 1.0749911069869995,grad_norm: 0.999999736241855, iteration: 381609
loss: 1.0128494501113892,grad_norm: 0.6860177750613445, iteration: 381610
loss: 1.1171842813491821,grad_norm: 0.9460669954134773, iteration: 381611
loss: 1.0161231756210327,grad_norm: 0.872821080846819, iteration: 381612
loss: 1.0804446935653687,grad_norm: 0.9999990519189034, iteration: 381613
loss: 0.9969344735145569,grad_norm: 0.8233728613067777, iteration: 381614
loss: 1.0029443502426147,grad_norm: 0.8457649979426146, iteration: 381615
loss: 1.0128470659255981,grad_norm: 0.815649040593699, iteration: 381616
loss: 1.0083523988723755,grad_norm: 0.9999999598544309, iteration: 381617
loss: 0.9950947761535645,grad_norm: 0.6963746597413925, iteration: 381618
loss: 1.1251083612442017,grad_norm: 0.8726324169765456, iteration: 381619
loss: 0.9783368110656738,grad_norm: 0.873547829179011, iteration: 381620
loss: 1.0125802755355835,grad_norm: 0.673165126038975, iteration: 381621
loss: 0.9475524425506592,grad_norm: 0.8768575643560972, iteration: 381622
loss: 1.0293177366256714,grad_norm: 0.9999996905791949, iteration: 381623
loss: 1.0441243648529053,grad_norm: 0.93888074872492, iteration: 381624
loss: 1.1822789907455444,grad_norm: 0.9999991797196283, iteration: 381625
loss: 1.0157839059829712,grad_norm: 0.9999993808343399, iteration: 381626
loss: 1.052108645439148,grad_norm: 0.9999992501840803, iteration: 381627
loss: 1.1990551948547363,grad_norm: 0.9999990266340008, iteration: 381628
loss: 1.1122525930404663,grad_norm: 0.9999991221971516, iteration: 381629
loss: 1.0106651782989502,grad_norm: 0.8613806141006292, iteration: 381630
loss: 1.007474660873413,grad_norm: 0.8021720939396713, iteration: 381631
loss: 0.991928219795227,grad_norm: 0.8856999588551362, iteration: 381632
loss: 0.9744923710823059,grad_norm: 0.8109173430209059, iteration: 381633
loss: 0.9847801327705383,grad_norm: 0.7549769080221838, iteration: 381634
loss: 0.9812607765197754,grad_norm: 0.8716064088866474, iteration: 381635
loss: 1.027544617652893,grad_norm: 0.9364134719637585, iteration: 381636
loss: 1.0091441869735718,grad_norm: 0.770420463444735, iteration: 381637
loss: 1.0009868144989014,grad_norm: 0.6019044069410369, iteration: 381638
loss: 0.9744942784309387,grad_norm: 0.7862348728792775, iteration: 381639
loss: 1.1078473329544067,grad_norm: 1.0000000577361723, iteration: 381640
loss: 1.013174295425415,grad_norm: 0.6946977307142725, iteration: 381641
loss: 1.023081660270691,grad_norm: 0.8056669456579335, iteration: 381642
loss: 0.9807955026626587,grad_norm: 0.8081372716672154, iteration: 381643
loss: 1.0300554037094116,grad_norm: 0.9999994431169736, iteration: 381644
loss: 1.0083662271499634,grad_norm: 0.8042575795879163, iteration: 381645
loss: 0.9864157438278198,grad_norm: 0.8851454709949242, iteration: 381646
loss: 1.0516120195388794,grad_norm: 0.7335404164985777, iteration: 381647
loss: 1.0299588441848755,grad_norm: 0.8650233881818379, iteration: 381648
loss: 0.9878833293914795,grad_norm: 0.7014797051787028, iteration: 381649
loss: 1.014976143836975,grad_norm: 0.797491128065185, iteration: 381650
loss: 1.0156993865966797,grad_norm: 0.8850301928352512, iteration: 381651
loss: 1.0751583576202393,grad_norm: 0.9999994056352769, iteration: 381652
loss: 0.9831094145774841,grad_norm: 0.7763503280939138, iteration: 381653
loss: 1.110103726387024,grad_norm: 0.8442280503787419, iteration: 381654
loss: 1.0404882431030273,grad_norm: 0.9999998763232241, iteration: 381655
loss: 1.0555540323257446,grad_norm: 0.9999994730155548, iteration: 381656
loss: 1.0103245973587036,grad_norm: 0.7144665310462306, iteration: 381657
loss: 0.9879999160766602,grad_norm: 0.7523294979981388, iteration: 381658
loss: 1.0015254020690918,grad_norm: 0.7815123759765679, iteration: 381659
loss: 1.0887796878814697,grad_norm: 0.9999994218969827, iteration: 381660
loss: 1.0088633298873901,grad_norm: 0.912693457669252, iteration: 381661
loss: 1.0283154249191284,grad_norm: 0.8562851164786092, iteration: 381662
loss: 1.0098177194595337,grad_norm: 0.9051468887140152, iteration: 381663
loss: 1.013149380683899,grad_norm: 0.73067396713716, iteration: 381664
loss: 1.0090042352676392,grad_norm: 0.9128180793309485, iteration: 381665
loss: 1.0064167976379395,grad_norm: 0.9999990615051266, iteration: 381666
loss: 1.0202046632766724,grad_norm: 0.9999990770523864, iteration: 381667
loss: 1.0175882577896118,grad_norm: 0.7984281945680605, iteration: 381668
loss: 0.993961751461029,grad_norm: 0.88171363283805, iteration: 381669
loss: 1.0119825601577759,grad_norm: 0.7915767645335068, iteration: 381670
loss: 1.0193359851837158,grad_norm: 0.9999990344002773, iteration: 381671
loss: 0.9743717908859253,grad_norm: 0.8534965006612152, iteration: 381672
loss: 1.0227469205856323,grad_norm: 0.9999991091641786, iteration: 381673
loss: 0.9837665557861328,grad_norm: 0.8375065511172073, iteration: 381674
loss: 0.9793099761009216,grad_norm: 0.8132510445157651, iteration: 381675
loss: 0.99068683385849,grad_norm: 0.9025457021627276, iteration: 381676
loss: 1.0648025274276733,grad_norm: 0.9999993478206489, iteration: 381677
loss: 1.0312117338180542,grad_norm: 0.8638616641512618, iteration: 381678
loss: 1.0134077072143555,grad_norm: 0.6877959882043436, iteration: 381679
loss: 0.982415497303009,grad_norm: 0.9745052803258624, iteration: 381680
loss: 0.9909272789955139,grad_norm: 0.8024847892737134, iteration: 381681
loss: 0.9578522443771362,grad_norm: 0.9124703743765982, iteration: 381682
loss: 0.9476765990257263,grad_norm: 0.7230512256676854, iteration: 381683
loss: 0.9656422138214111,grad_norm: 0.721969534966657, iteration: 381684
loss: 1.0247105360031128,grad_norm: 0.7986553091682377, iteration: 381685
loss: 0.9693319201469421,grad_norm: 0.8009728895528879, iteration: 381686
loss: 1.023340106010437,grad_norm: 0.9091933962889428, iteration: 381687
loss: 1.170323133468628,grad_norm: 0.9999999995694231, iteration: 381688
loss: 0.9963343739509583,grad_norm: 0.783944396046316, iteration: 381689
loss: 1.0435527563095093,grad_norm: 0.9022560551943493, iteration: 381690
loss: 0.9922381043434143,grad_norm: 0.9554330692307621, iteration: 381691
loss: 1.0322108268737793,grad_norm: 0.9999990669136272, iteration: 381692
loss: 1.0381207466125488,grad_norm: 0.9330028949446014, iteration: 381693
loss: 1.033766508102417,grad_norm: 0.8589095801020604, iteration: 381694
loss: 0.9489856958389282,grad_norm: 0.7294500639370065, iteration: 381695
loss: 1.0476462841033936,grad_norm: 0.8588399006349902, iteration: 381696
loss: 1.0896588563919067,grad_norm: 0.9999990880403865, iteration: 381697
loss: 1.01519775390625,grad_norm: 0.9999991074732788, iteration: 381698
loss: 1.0903422832489014,grad_norm: 0.8156539214854435, iteration: 381699
loss: 0.9850583076477051,grad_norm: 0.9999996556643711, iteration: 381700
loss: 1.0349719524383545,grad_norm: 0.6899987051053799, iteration: 381701
loss: 1.05448317527771,grad_norm: 0.9999998077425166, iteration: 381702
loss: 1.0136867761611938,grad_norm: 0.811000777999075, iteration: 381703
loss: 0.9828382134437561,grad_norm: 0.8630633972666409, iteration: 381704
loss: 1.001492977142334,grad_norm: 0.7080924969654382, iteration: 381705
loss: 1.0408564805984497,grad_norm: 0.7375555991118485, iteration: 381706
loss: 1.0502526760101318,grad_norm: 0.999999652701115, iteration: 381707
loss: 0.9937397837638855,grad_norm: 0.980113237980923, iteration: 381708
loss: 1.0222625732421875,grad_norm: 0.7689200455598695, iteration: 381709
loss: 0.9980913996696472,grad_norm: 0.680962878895648, iteration: 381710
loss: 1.0248950719833374,grad_norm: 0.8061493171229005, iteration: 381711
loss: 1.030977487564087,grad_norm: 0.7544565286271157, iteration: 381712
loss: 1.0367789268493652,grad_norm: 0.9999996093613579, iteration: 381713
loss: 1.0038130283355713,grad_norm: 0.9999996732889695, iteration: 381714
loss: 0.9521769285202026,grad_norm: 0.8339949360528899, iteration: 381715
loss: 1.0893735885620117,grad_norm: 0.9610329877164626, iteration: 381716
loss: 1.0207957029342651,grad_norm: 0.9031874011764031, iteration: 381717
loss: 1.0171414613723755,grad_norm: 0.73174605102932, iteration: 381718
loss: 1.0655434131622314,grad_norm: 0.8064589285447944, iteration: 381719
loss: 0.9921598434448242,grad_norm: 0.819643834063198, iteration: 381720
loss: 1.0329437255859375,grad_norm: 0.6978725482381232, iteration: 381721
loss: 0.9937839508056641,grad_norm: 0.8184481484720084, iteration: 381722
loss: 0.9820892810821533,grad_norm: 0.8463033775205886, iteration: 381723
loss: 0.9628420472145081,grad_norm: 0.7899395322826022, iteration: 381724
loss: 0.9982755780220032,grad_norm: 0.6630979550451442, iteration: 381725
loss: 0.9862111210823059,grad_norm: 0.7234020921928721, iteration: 381726
loss: 1.0109262466430664,grad_norm: 0.7869606139026627, iteration: 381727
loss: 1.0182996988296509,grad_norm: 0.9818962350466764, iteration: 381728
loss: 1.005702257156372,grad_norm: 0.7143893869535894, iteration: 381729
loss: 1.0076733827590942,grad_norm: 0.7949803006819393, iteration: 381730
loss: 1.038871169090271,grad_norm: 0.7317103892111406, iteration: 381731
loss: 1.0261962413787842,grad_norm: 0.8286279628928204, iteration: 381732
loss: 0.9905095100402832,grad_norm: 0.792049683022684, iteration: 381733
loss: 0.9764832854270935,grad_norm: 0.7050656437122649, iteration: 381734
loss: 0.9945809841156006,grad_norm: 0.9999994080310832, iteration: 381735
loss: 1.1369709968566895,grad_norm: 0.999506489816122, iteration: 381736
loss: 1.15724778175354,grad_norm: 0.999999824190362, iteration: 381737
loss: 1.0046440362930298,grad_norm: 0.7896751997133282, iteration: 381738
loss: 1.0106933116912842,grad_norm: 0.9193093661264998, iteration: 381739
loss: 1.0058296918869019,grad_norm: 0.8726509574016863, iteration: 381740
loss: 1.0237079858779907,grad_norm: 0.699994878493646, iteration: 381741
loss: 1.0003784894943237,grad_norm: 0.7233511359820414, iteration: 381742
loss: 1.0006961822509766,grad_norm: 0.866885456985558, iteration: 381743
loss: 1.003571629524231,grad_norm: 0.9126562858078353, iteration: 381744
loss: 1.1155890226364136,grad_norm: 0.999999444356583, iteration: 381745
loss: 1.1267549991607666,grad_norm: 0.999999930982562, iteration: 381746
loss: 0.9707046151161194,grad_norm: 0.8300911496209623, iteration: 381747
loss: 1.075433373451233,grad_norm: 0.9999996412145995, iteration: 381748
loss: 1.0050742626190186,grad_norm: 0.7918868890601953, iteration: 381749
loss: 1.100693702697754,grad_norm: 0.9999996093107643, iteration: 381750
loss: 1.0303133726119995,grad_norm: 0.9868841802332445, iteration: 381751
loss: 1.0196480751037598,grad_norm: 0.6753157239284897, iteration: 381752
loss: 1.1266014575958252,grad_norm: 0.9999999447368567, iteration: 381753
loss: 1.0001568794250488,grad_norm: 0.7564086600438633, iteration: 381754
loss: 1.0469037294387817,grad_norm: 0.7435196587277637, iteration: 381755
loss: 1.0249210596084595,grad_norm: 0.9052814173633086, iteration: 381756
loss: 1.0535268783569336,grad_norm: 0.9999994372490458, iteration: 381757
loss: 1.040022611618042,grad_norm: 0.9758347950512923, iteration: 381758
loss: 0.9850900173187256,grad_norm: 0.7525572574513302, iteration: 381759
loss: 1.040355920791626,grad_norm: 0.9999999418017707, iteration: 381760
loss: 1.0515446662902832,grad_norm: 0.9999991968631816, iteration: 381761
loss: 1.1076531410217285,grad_norm: 0.999999493534769, iteration: 381762
loss: 1.1220622062683105,grad_norm: 0.9999998734229019, iteration: 381763
loss: 1.0274550914764404,grad_norm: 0.7167826695832548, iteration: 381764
loss: 1.136755108833313,grad_norm: 1.0000000088321135, iteration: 381765
loss: 0.985711932182312,grad_norm: 0.9915692517427918, iteration: 381766
loss: 0.9786840677261353,grad_norm: 0.8210576631193168, iteration: 381767
loss: 1.0386321544647217,grad_norm: 0.8000704934527831, iteration: 381768
loss: 0.9842553734779358,grad_norm: 0.9719936452551565, iteration: 381769
loss: 1.0162067413330078,grad_norm: 0.7887261781998471, iteration: 381770
loss: 1.0266528129577637,grad_norm: 0.999999819292072, iteration: 381771
loss: 1.0416991710662842,grad_norm: 0.8209424513862779, iteration: 381772
loss: 0.9962568283081055,grad_norm: 0.7356620857137016, iteration: 381773
loss: 1.0179247856140137,grad_norm: 0.874230088309147, iteration: 381774
loss: 1.0005642175674438,grad_norm: 0.744067452205713, iteration: 381775
loss: 1.1498247385025024,grad_norm: 0.9999994307849558, iteration: 381776
loss: 1.014145851135254,grad_norm: 0.8762306243031966, iteration: 381777
loss: 0.992673397064209,grad_norm: 0.7753831420247276, iteration: 381778
loss: 0.9969576001167297,grad_norm: 0.6350344342861521, iteration: 381779
loss: 0.9775518178939819,grad_norm: 0.9018927031494541, iteration: 381780
loss: 1.0048854351043701,grad_norm: 0.999999309541817, iteration: 381781
loss: 1.074718952178955,grad_norm: 0.7703120851592985, iteration: 381782
loss: 1.013710379600525,grad_norm: 0.9947459493947748, iteration: 381783
loss: 1.0320583581924438,grad_norm: 0.9999994704181072, iteration: 381784
loss: 0.9847346544265747,grad_norm: 0.999999914820819, iteration: 381785
loss: 1.057169795036316,grad_norm: 1.000000001533827, iteration: 381786
loss: 1.0000834465026855,grad_norm: 0.7560643536952847, iteration: 381787
loss: 1.0067040920257568,grad_norm: 0.7366412371065397, iteration: 381788
loss: 1.0295264720916748,grad_norm: 0.9012822437663578, iteration: 381789
loss: 1.0488898754119873,grad_norm: 0.9999993591919651, iteration: 381790
loss: 1.0056592226028442,grad_norm: 0.9974733952928622, iteration: 381791
loss: 1.0022903680801392,grad_norm: 0.9522431937225513, iteration: 381792
loss: 1.0620367527008057,grad_norm: 0.8052775757026076, iteration: 381793
loss: 0.9944453835487366,grad_norm: 0.7541986126669268, iteration: 381794
loss: 0.9466384649276733,grad_norm: 0.8194300369087992, iteration: 381795
loss: 1.0457384586334229,grad_norm: 0.9999992248423692, iteration: 381796
loss: 1.0406728982925415,grad_norm: 0.6725563805711645, iteration: 381797
loss: 1.0853859186172485,grad_norm: 0.7895868204954316, iteration: 381798
loss: 1.058931827545166,grad_norm: 0.7932224121903192, iteration: 381799
loss: 1.0065613985061646,grad_norm: 0.7749128550356981, iteration: 381800
loss: 1.000058650970459,grad_norm: 0.9558950342748511, iteration: 381801
loss: 0.9916517734527588,grad_norm: 0.9999992585273862, iteration: 381802
loss: 1.0370705127716064,grad_norm: 0.9999997359781433, iteration: 381803
loss: 1.0078376531600952,grad_norm: 0.6705332463432147, iteration: 381804
loss: 1.0554265975952148,grad_norm: 0.8184406089261989, iteration: 381805
loss: 1.0264103412628174,grad_norm: 0.9999993930188927, iteration: 381806
loss: 0.9824377298355103,grad_norm: 0.7577802889457348, iteration: 381807
loss: 1.0248287916183472,grad_norm: 0.9999990200604588, iteration: 381808
loss: 0.9670827388763428,grad_norm: 0.9733660797007864, iteration: 381809
loss: 1.015124797821045,grad_norm: 0.9999993391218968, iteration: 381810
loss: 1.1610790491104126,grad_norm: 0.9999994083194415, iteration: 381811
loss: 0.9612230062484741,grad_norm: 0.7891489342417038, iteration: 381812
loss: 0.9882152080535889,grad_norm: 0.7980593543223918, iteration: 381813
loss: 1.0452584028244019,grad_norm: 0.8959670854626756, iteration: 381814
loss: 1.037253499031067,grad_norm: 0.9675604890482576, iteration: 381815
loss: 1.0072803497314453,grad_norm: 0.9999991441688286, iteration: 381816
loss: 1.0492571592330933,grad_norm: 0.9999991627549552, iteration: 381817
loss: 0.9969271421432495,grad_norm: 1.0000000336758301, iteration: 381818
loss: 0.9664081931114197,grad_norm: 0.9259544533414469, iteration: 381819
loss: 1.0096663236618042,grad_norm: 0.9239400975221304, iteration: 381820
loss: 1.0134726762771606,grad_norm: 0.8160316354099414, iteration: 381821
loss: 1.023239016532898,grad_norm: 0.8985581143796183, iteration: 381822
loss: 1.0606424808502197,grad_norm: 0.9999997490757339, iteration: 381823
loss: 0.9914263486862183,grad_norm: 0.9539374514853072, iteration: 381824
loss: 1.027120590209961,grad_norm: 0.7703347755874943, iteration: 381825
loss: 0.9786697030067444,grad_norm: 0.8521382380065715, iteration: 381826
loss: 1.1251943111419678,grad_norm: 0.9999992251780222, iteration: 381827
loss: 1.1698018312454224,grad_norm: 0.9999999545120012, iteration: 381828
loss: 0.9945418238639832,grad_norm: 0.7812982828657649, iteration: 381829
loss: 1.0162361860275269,grad_norm: 0.9579494341784786, iteration: 381830
loss: 1.0315990447998047,grad_norm: 0.8952311653604764, iteration: 381831
loss: 1.0365285873413086,grad_norm: 0.9999994763889968, iteration: 381832
loss: 1.0186384916305542,grad_norm: 0.9999997670902182, iteration: 381833
loss: 1.007155179977417,grad_norm: 0.8427907382805007, iteration: 381834
loss: 1.0303574800491333,grad_norm: 0.8058451667626358, iteration: 381835
loss: 0.9817428588867188,grad_norm: 0.7597749229883388, iteration: 381836
loss: 0.9821200370788574,grad_norm: 0.9999989039974041, iteration: 381837
loss: 1.4693008661270142,grad_norm: 0.999999912151359, iteration: 381838
loss: 1.060078740119934,grad_norm: 0.9275238748971403, iteration: 381839
loss: 1.0404853820800781,grad_norm: 0.8569652366241922, iteration: 381840
loss: 1.0883210897445679,grad_norm: 0.9999998452514235, iteration: 381841
loss: 0.9867085814476013,grad_norm: 0.7481476211067372, iteration: 381842
loss: 1.07673978805542,grad_norm: 0.9999990039231945, iteration: 381843
loss: 0.9819812178611755,grad_norm: 0.9020592710187397, iteration: 381844
loss: 1.0324432849884033,grad_norm: 0.9999991052516066, iteration: 381845
loss: 0.9539209604263306,grad_norm: 0.8178301495555692, iteration: 381846
loss: 0.9727687239646912,grad_norm: 0.8332038260237468, iteration: 381847
loss: 1.0057474374771118,grad_norm: 0.7930183168277637, iteration: 381848
loss: 1.0329041481018066,grad_norm: 0.9051799684050517, iteration: 381849
loss: 0.9789222478866577,grad_norm: 0.9306785474752074, iteration: 381850
loss: 0.9673950672149658,grad_norm: 0.9999998206785579, iteration: 381851
loss: 1.067121148109436,grad_norm: 0.9916289069695117, iteration: 381852
loss: 1.0165815353393555,grad_norm: 0.9999993731736955, iteration: 381853
loss: 1.0047600269317627,grad_norm: 0.7902079712128119, iteration: 381854
loss: 1.0498677492141724,grad_norm: 0.8640784223290691, iteration: 381855
loss: 0.9952561855316162,grad_norm: 0.9442876066156598, iteration: 381856
loss: 1.0735950469970703,grad_norm: 0.9999994683622686, iteration: 381857
loss: 1.0195894241333008,grad_norm: 0.8448320353334987, iteration: 381858
loss: 1.071743369102478,grad_norm: 0.9999998381881617, iteration: 381859
loss: 1.03481125831604,grad_norm: 0.8795062117388679, iteration: 381860
loss: 0.958674967288971,grad_norm: 0.7825504443697088, iteration: 381861
loss: 0.9893121123313904,grad_norm: 0.6990993532329198, iteration: 381862
loss: 0.9734352827072144,grad_norm: 0.9999990308854269, iteration: 381863
loss: 1.0526466369628906,grad_norm: 0.9251809139457474, iteration: 381864
loss: 1.0647169351577759,grad_norm: 0.9999993702465915, iteration: 381865
loss: 1.0033305883407593,grad_norm: 0.9999990749899573, iteration: 381866
loss: 0.9985562562942505,grad_norm: 0.7425123767483626, iteration: 381867
loss: 0.9815376996994019,grad_norm: 0.7201152865773194, iteration: 381868
loss: 1.0330349206924438,grad_norm: 0.790856032277515, iteration: 381869
loss: 0.9866308569908142,grad_norm: 0.6877862663362038, iteration: 381870
loss: 1.0041272640228271,grad_norm: 0.8256250252028808, iteration: 381871
loss: 1.0294910669326782,grad_norm: 0.9057433578361638, iteration: 381872
loss: 1.0098339319229126,grad_norm: 0.8319518294535165, iteration: 381873
loss: 1.0064613819122314,grad_norm: 0.8521606057339256, iteration: 381874
loss: 1.0197792053222656,grad_norm: 0.9113728211920384, iteration: 381875
loss: 1.045073390007019,grad_norm: 0.99999969843196, iteration: 381876
loss: 0.9590216875076294,grad_norm: 0.7699036355691884, iteration: 381877
loss: 0.994175136089325,grad_norm: 0.8030386016228978, iteration: 381878
loss: 0.9643439054489136,grad_norm: 0.750209824524476, iteration: 381879
loss: 1.000994324684143,grad_norm: 0.7756935363168869, iteration: 381880
loss: 1.0228617191314697,grad_norm: 0.8443997345003805, iteration: 381881
loss: 1.0009795427322388,grad_norm: 0.8176715648541548, iteration: 381882
loss: 0.9886749386787415,grad_norm: 0.7215281218465941, iteration: 381883
loss: 1.1313157081604004,grad_norm: 0.9999991637300082, iteration: 381884
loss: 0.9940099120140076,grad_norm: 0.9933302605698878, iteration: 381885
loss: 1.0506592988967896,grad_norm: 0.9896807552726801, iteration: 381886
loss: 1.165879487991333,grad_norm: 0.9999998427385496, iteration: 381887
loss: 1.010353922843933,grad_norm: 0.999999427703055, iteration: 381888
loss: 1.011556625366211,grad_norm: 0.8002946356235651, iteration: 381889
loss: 1.0072243213653564,grad_norm: 0.8117191761125935, iteration: 381890
loss: 1.018171787261963,grad_norm: 0.9999989820697666, iteration: 381891
loss: 1.0870424509048462,grad_norm: 0.9999996802002804, iteration: 381892
loss: 1.1097056865692139,grad_norm: 0.9999994087874196, iteration: 381893
loss: 0.9987562298774719,grad_norm: 0.8128523148012604, iteration: 381894
loss: 0.9975359439849854,grad_norm: 0.6988808592251673, iteration: 381895
loss: 0.9942302703857422,grad_norm: 0.7426877018382229, iteration: 381896
loss: 1.0060899257659912,grad_norm: 0.8105655254520011, iteration: 381897
loss: 1.0518485307693481,grad_norm: 0.8478157988629306, iteration: 381898
loss: 1.0236585140228271,grad_norm: 0.6365685270624226, iteration: 381899
loss: 1.0409148931503296,grad_norm: 0.8387170444516049, iteration: 381900
loss: 1.001573085784912,grad_norm: 0.9267106036415077, iteration: 381901
loss: 0.9903841614723206,grad_norm: 0.8050386702962588, iteration: 381902
loss: 1.0099809169769287,grad_norm: 0.8999853660672761, iteration: 381903
loss: 0.9910489320755005,grad_norm: 0.651773415996953, iteration: 381904
loss: 1.0204095840454102,grad_norm: 0.9999991120223733, iteration: 381905
loss: 1.0028469562530518,grad_norm: 0.8417376821312283, iteration: 381906
loss: 1.043979287147522,grad_norm: 0.719205800907949, iteration: 381907
loss: 1.0084470510482788,grad_norm: 0.7519517975709138, iteration: 381908
loss: 1.0435086488723755,grad_norm: 0.9999996553532574, iteration: 381909
loss: 0.9882953763008118,grad_norm: 0.9999997279724571, iteration: 381910
loss: 1.0527315139770508,grad_norm: 0.9418106560068494, iteration: 381911
loss: 1.020504117012024,grad_norm: 0.9721483814714138, iteration: 381912
loss: 1.0239169597625732,grad_norm: 0.9267209696480175, iteration: 381913
loss: 1.0083670616149902,grad_norm: 0.7640194256431089, iteration: 381914
loss: 1.2074377536773682,grad_norm: 0.9999995946743699, iteration: 381915
loss: 1.0191644430160522,grad_norm: 0.999999140587708, iteration: 381916
loss: 1.0194560289382935,grad_norm: 0.9999998629915177, iteration: 381917
loss: 1.0146801471710205,grad_norm: 0.7616129404216361, iteration: 381918
loss: 1.1147233247756958,grad_norm: 0.9999993623298087, iteration: 381919
loss: 1.0004618167877197,grad_norm: 0.9999997183595277, iteration: 381920
loss: 0.9918968081474304,grad_norm: 0.7334434638478656, iteration: 381921
loss: 0.9554043412208557,grad_norm: 0.8276373694664222, iteration: 381922
loss: 0.994410514831543,grad_norm: 0.8375010350555893, iteration: 381923
loss: 1.0859456062316895,grad_norm: 0.9864592939631256, iteration: 381924
loss: 1.029490351676941,grad_norm: 0.9999998653729653, iteration: 381925
loss: 1.0779949426651,grad_norm: 0.9999997419470423, iteration: 381926
loss: 0.9716475605964661,grad_norm: 0.9999992782844098, iteration: 381927
loss: 1.0564956665039062,grad_norm: 0.999999355540948, iteration: 381928
loss: 0.9822247624397278,grad_norm: 0.7323238556325372, iteration: 381929
loss: 0.9994120001792908,grad_norm: 0.8785727858766275, iteration: 381930
loss: 1.0324440002441406,grad_norm: 0.9999993006043404, iteration: 381931
loss: 1.0209428071975708,grad_norm: 0.9999999238024706, iteration: 381932
loss: 1.0912708044052124,grad_norm: 0.9999994461678089, iteration: 381933
loss: 0.9973122477531433,grad_norm: 0.9350783698011967, iteration: 381934
loss: 1.004404067993164,grad_norm: 0.8652783488178113, iteration: 381935
loss: 1.2332991361618042,grad_norm: 0.9999999084163304, iteration: 381936
loss: 1.0550845861434937,grad_norm: 0.9999995500280134, iteration: 381937
loss: 1.1102467775344849,grad_norm: 0.8912276191730495, iteration: 381938
loss: 0.9915387630462646,grad_norm: 0.8395755568077814, iteration: 381939
loss: 1.001365303993225,grad_norm: 0.7487593060665989, iteration: 381940
loss: 1.1214649677276611,grad_norm: 0.999999687582486, iteration: 381941
loss: 1.000785231590271,grad_norm: 0.9559270689895732, iteration: 381942
loss: 1.0176609754562378,grad_norm: 0.7152532025705314, iteration: 381943
loss: 0.9990290403366089,grad_norm: 0.8697629610141919, iteration: 381944
loss: 1.087052583694458,grad_norm: 0.7114560826003397, iteration: 381945
loss: 0.9492267370223999,grad_norm: 0.8071402204053012, iteration: 381946
loss: 1.0887761116027832,grad_norm: 0.9999996960159876, iteration: 381947
loss: 1.1113476753234863,grad_norm: 0.9836006223776577, iteration: 381948
loss: 0.9466750621795654,grad_norm: 0.7799661290733755, iteration: 381949
loss: 1.0423996448516846,grad_norm: 0.9999992824319551, iteration: 381950
loss: 1.0358190536499023,grad_norm: 0.703000758167128, iteration: 381951
loss: 0.9594813585281372,grad_norm: 0.8280593237480327, iteration: 381952
loss: 1.0138741731643677,grad_norm: 0.791403272628165, iteration: 381953
loss: 1.0453033447265625,grad_norm: 0.8415608448807192, iteration: 381954
loss: 1.1713839769363403,grad_norm: 0.9999999445546169, iteration: 381955
loss: 1.0142279863357544,grad_norm: 0.7501272244923917, iteration: 381956
loss: 1.0677855014801025,grad_norm: 0.9468683029147344, iteration: 381957
loss: 1.0033231973648071,grad_norm: 0.972046538017281, iteration: 381958
loss: 1.0068418979644775,grad_norm: 0.9979288636938503, iteration: 381959
loss: 1.0043776035308838,grad_norm: 0.9999992363818169, iteration: 381960
loss: 1.0427407026290894,grad_norm: 0.9776609349868611, iteration: 381961
loss: 0.9839988350868225,grad_norm: 0.7655336636691432, iteration: 381962
loss: 1.0630905628204346,grad_norm: 0.771977986183127, iteration: 381963
loss: 1.015874981880188,grad_norm: 0.7914487794044333, iteration: 381964
loss: 1.1035515069961548,grad_norm: 0.9999995808046658, iteration: 381965
loss: 0.9867580533027649,grad_norm: 0.7765248196024682, iteration: 381966
loss: 1.004151463508606,grad_norm: 0.782632111951813, iteration: 381967
loss: 0.9761298894882202,grad_norm: 0.7139298418372694, iteration: 381968
loss: 0.9712510108947754,grad_norm: 0.8420742076802515, iteration: 381969
loss: 1.0465718507766724,grad_norm: 0.8719184404833787, iteration: 381970
loss: 1.009647011756897,grad_norm: 0.999999357183132, iteration: 381971
loss: 1.0079679489135742,grad_norm: 0.8717078374061653, iteration: 381972
loss: 1.0203776359558105,grad_norm: 0.8655754131136696, iteration: 381973
loss: 0.9757668375968933,grad_norm: 0.9999998182900773, iteration: 381974
loss: 1.0096306800842285,grad_norm: 0.8933326777211008, iteration: 381975
loss: 1.037219762802124,grad_norm: 0.9999998308764148, iteration: 381976
loss: 1.22022545337677,grad_norm: 0.9999999194197047, iteration: 381977
loss: 1.030698299407959,grad_norm: 0.7184026949631788, iteration: 381978
loss: 0.9843863844871521,grad_norm: 0.6889584037222776, iteration: 381979
loss: 1.0396369695663452,grad_norm: 0.7674790739902114, iteration: 381980
loss: 0.9852818250656128,grad_norm: 0.8413087959266919, iteration: 381981
loss: 0.9973049163818359,grad_norm: 0.7516025621253779, iteration: 381982
loss: 1.0670169591903687,grad_norm: 0.8235739081444441, iteration: 381983
loss: 1.0422921180725098,grad_norm: 0.9800680463341424, iteration: 381984
loss: 1.0313401222229004,grad_norm: 0.7912652983281805, iteration: 381985
loss: 0.9970192313194275,grad_norm: 0.7255914539148445, iteration: 381986
loss: 0.9998356103897095,grad_norm: 0.9999993222476284, iteration: 381987
loss: 1.0646734237670898,grad_norm: 0.999999252135949, iteration: 381988
loss: 1.0237942934036255,grad_norm: 0.8578955892108899, iteration: 381989
loss: 1.021255373954773,grad_norm: 0.8195464970107754, iteration: 381990
loss: 1.0548224449157715,grad_norm: 0.9999991798063411, iteration: 381991
loss: 0.9682782292366028,grad_norm: 0.8750730818647109, iteration: 381992
loss: 1.0156301259994507,grad_norm: 1.0000000286564628, iteration: 381993
loss: 1.003971815109253,grad_norm: 0.8241170919447232, iteration: 381994
loss: 1.0725277662277222,grad_norm: 0.7808763336997291, iteration: 381995
loss: 1.0000488758087158,grad_norm: 0.9999996988586378, iteration: 381996
loss: 1.0217444896697998,grad_norm: 0.7206798027952865, iteration: 381997
loss: 1.0035054683685303,grad_norm: 0.7585519450131336, iteration: 381998
loss: 0.9966906309127808,grad_norm: 0.9999994345877391, iteration: 381999
loss: 1.178106427192688,grad_norm: 0.9999998212998084, iteration: 382000
loss: 0.9868430495262146,grad_norm: 0.6975944913296644, iteration: 382001
loss: 1.0082519054412842,grad_norm: 0.7333919946353501, iteration: 382002
loss: 0.9975070357322693,grad_norm: 0.6748313669164427, iteration: 382003
loss: 0.9967588186264038,grad_norm: 0.7446287100073169, iteration: 382004
loss: 0.9564185738563538,grad_norm: 0.8442590271370144, iteration: 382005
loss: 1.010384440422058,grad_norm: 0.7295305774143778, iteration: 382006
loss: 0.9616247415542603,grad_norm: 0.6894388888091282, iteration: 382007
loss: 1.0567735433578491,grad_norm: 0.6310007764594797, iteration: 382008
loss: 1.0166003704071045,grad_norm: 0.9999997067577306, iteration: 382009
loss: 1.053210973739624,grad_norm: 0.8769399476334874, iteration: 382010
loss: 1.0282272100448608,grad_norm: 0.9782677032741203, iteration: 382011
loss: 1.0610363483428955,grad_norm: 0.999999731058597, iteration: 382012
loss: 1.0156837701797485,grad_norm: 0.679422875907479, iteration: 382013
loss: 0.9940354228019714,grad_norm: 0.8182572701908382, iteration: 382014
loss: 0.9740437269210815,grad_norm: 0.9217369775581863, iteration: 382015
loss: 1.0348761081695557,grad_norm: 0.8597703469989486, iteration: 382016
loss: 1.07146155834198,grad_norm: 0.9106335761565554, iteration: 382017
loss: 0.9844743609428406,grad_norm: 0.7761695178775448, iteration: 382018
loss: 0.9493878483772278,grad_norm: 0.7956739185170101, iteration: 382019
loss: 1.0175232887268066,grad_norm: 0.9542569629451745, iteration: 382020
loss: 0.988380491733551,grad_norm: 0.9987824382851076, iteration: 382021
loss: 0.9895450472831726,grad_norm: 0.7751331204610857, iteration: 382022
loss: 0.9871402382850647,grad_norm: 0.78575144876288, iteration: 382023
loss: 1.0175458192825317,grad_norm: 0.9436767967944296, iteration: 382024
loss: 0.9979223608970642,grad_norm: 0.8572823498826434, iteration: 382025
loss: 0.9891595244407654,grad_norm: 0.9999990944435028, iteration: 382026
loss: 1.0489720106124878,grad_norm: 0.7904872654279191, iteration: 382027
loss: 0.9976449012756348,grad_norm: 0.7471009766952436, iteration: 382028
loss: 1.0414594411849976,grad_norm: 0.9251278435036372, iteration: 382029
loss: 1.0062785148620605,grad_norm: 0.9999991531385295, iteration: 382030
loss: 0.9793525338172913,grad_norm: 0.6381532452380474, iteration: 382031
loss: 0.9748342037200928,grad_norm: 0.7027034605811475, iteration: 382032
loss: 0.9993662238121033,grad_norm: 0.7146987574972535, iteration: 382033
loss: 1.073927879333496,grad_norm: 0.9999994792520229, iteration: 382034
loss: 0.9952046275138855,grad_norm: 0.7471815865923457, iteration: 382035
loss: 1.0235857963562012,grad_norm: 0.8616453090668615, iteration: 382036
loss: 0.9703497886657715,grad_norm: 0.7970742492733136, iteration: 382037
loss: 0.9809619784355164,grad_norm: 0.7374880581428624, iteration: 382038
loss: 0.9425650238990784,grad_norm: 0.7393898550265763, iteration: 382039
loss: 1.0183571577072144,grad_norm: 0.9278208068547155, iteration: 382040
loss: 1.0121866464614868,grad_norm: 0.803627473599752, iteration: 382041
loss: 0.9532514810562134,grad_norm: 0.6684542282665481, iteration: 382042
loss: 1.029787540435791,grad_norm: 0.9346035805407226, iteration: 382043
loss: 1.013130784034729,grad_norm: 0.9005858079709187, iteration: 382044
loss: 0.9880419969558716,grad_norm: 0.9575981682803506, iteration: 382045
loss: 0.9913586378097534,grad_norm: 0.8273116782485612, iteration: 382046
loss: 1.0154194831848145,grad_norm: 0.9442586469436849, iteration: 382047
loss: 0.9813641905784607,grad_norm: 0.759736902692849, iteration: 382048
loss: 1.000433087348938,grad_norm: 0.9999998403125091, iteration: 382049
loss: 0.9985603094100952,grad_norm: 0.8628330059272823, iteration: 382050
loss: 0.9727340340614319,grad_norm: 0.7557658876985128, iteration: 382051
loss: 1.0763176679611206,grad_norm: 0.7970986929640173, iteration: 382052
loss: 1.0203994512557983,grad_norm: 0.8600503460076709, iteration: 382053
loss: 1.0696136951446533,grad_norm: 0.9999997750451088, iteration: 382054
loss: 0.9700400233268738,grad_norm: 0.7639871402107976, iteration: 382055
loss: 1.0765186548233032,grad_norm: 0.7663867734469998, iteration: 382056
loss: 0.9824703931808472,grad_norm: 0.9966031087808479, iteration: 382057
loss: 1.005393385887146,grad_norm: 0.8072957809420296, iteration: 382058
loss: 0.9927564859390259,grad_norm: 0.7488635099486529, iteration: 382059
loss: 1.0285331010818481,grad_norm: 0.9999992434570376, iteration: 382060
loss: 0.9854942560195923,grad_norm: 0.9999991265124589, iteration: 382061
loss: 1.0108721256256104,grad_norm: 0.8406561715143543, iteration: 382062
loss: 1.0127646923065186,grad_norm: 0.9076175396082566, iteration: 382063
loss: 0.9844873547554016,grad_norm: 0.7216931641592405, iteration: 382064
loss: 1.0347552299499512,grad_norm: 0.7927934285398293, iteration: 382065
loss: 0.9665898084640503,grad_norm: 0.9999994206708894, iteration: 382066
loss: 0.9950771927833557,grad_norm: 0.851107085877938, iteration: 382067
loss: 0.9968301057815552,grad_norm: 0.7410093810268307, iteration: 382068
loss: 0.995672345161438,grad_norm: 0.6974497609999671, iteration: 382069
loss: 0.9951677918434143,grad_norm: 0.8151548624771533, iteration: 382070
loss: 1.00600004196167,grad_norm: 0.999999093776914, iteration: 382071
loss: 0.9641072154045105,grad_norm: 0.7817569179971486, iteration: 382072
loss: 0.9708516597747803,grad_norm: 0.8923056433181804, iteration: 382073
loss: 1.0420464277267456,grad_norm: 0.9999995314680143, iteration: 382074
loss: 1.0040640830993652,grad_norm: 0.7800072919973616, iteration: 382075
loss: 0.9815682768821716,grad_norm: 0.7862947022924276, iteration: 382076
loss: 1.0352635383605957,grad_norm: 0.7938347595441183, iteration: 382077
loss: 0.9951002597808838,grad_norm: 0.8016567053219608, iteration: 382078
loss: 0.9779040217399597,grad_norm: 0.7543225458713471, iteration: 382079
loss: 1.099543809890747,grad_norm: 0.9999994690110076, iteration: 382080
loss: 1.0076203346252441,grad_norm: 0.8083709952927636, iteration: 382081
loss: 1.046867847442627,grad_norm: 0.9999996257444022, iteration: 382082
loss: 1.0061932802200317,grad_norm: 0.8761205881933768, iteration: 382083
loss: 1.0168261528015137,grad_norm: 0.9708178238503722, iteration: 382084
loss: 1.0367811918258667,grad_norm: 0.7689017535349674, iteration: 382085
loss: 1.019464135169983,grad_norm: 0.9999995667605496, iteration: 382086
loss: 1.0811318159103394,grad_norm: 0.9999992688600727, iteration: 382087
loss: 1.0505000352859497,grad_norm: 0.8413879520155215, iteration: 382088
loss: 0.9827389121055603,grad_norm: 0.7566360175504961, iteration: 382089
loss: 1.1083844900131226,grad_norm: 0.9999996472234866, iteration: 382090
loss: 0.9708259701728821,grad_norm: 0.725270971343547, iteration: 382091
loss: 1.0135657787322998,grad_norm: 0.8954537730592751, iteration: 382092
loss: 0.9613344073295593,grad_norm: 0.7768055920534902, iteration: 382093
loss: 1.0775951147079468,grad_norm: 0.6899111938839597, iteration: 382094
loss: 1.0481206178665161,grad_norm: 0.7502821465911895, iteration: 382095
loss: 1.0610905885696411,grad_norm: 0.9999997109615716, iteration: 382096
loss: 0.9865575432777405,grad_norm: 0.9999992046701052, iteration: 382097
loss: 1.008155107498169,grad_norm: 0.7819730877952541, iteration: 382098
loss: 1.0003868341445923,grad_norm: 0.6849557912380888, iteration: 382099
loss: 1.0551717281341553,grad_norm: 0.7170639303696429, iteration: 382100
loss: 0.9727829098701477,grad_norm: 0.8698103854629087, iteration: 382101
loss: 1.0544625520706177,grad_norm: 0.8222788587484741, iteration: 382102
loss: 0.9664076566696167,grad_norm: 0.7462728414412001, iteration: 382103
loss: 1.019759178161621,grad_norm: 0.8466377030180974, iteration: 382104
loss: 0.9730736017227173,grad_norm: 0.777113355551328, iteration: 382105
loss: 0.9821411967277527,grad_norm: 0.8176431461759311, iteration: 382106
loss: 0.9972389340400696,grad_norm: 0.9999998947435419, iteration: 382107
loss: 1.0194122791290283,grad_norm: 0.8900487900880745, iteration: 382108
loss: 1.126854658126831,grad_norm: 0.9999991110752928, iteration: 382109
loss: 1.0827442407608032,grad_norm: 0.8874429965694428, iteration: 382110
loss: 1.0326380729675293,grad_norm: 0.9271092313412617, iteration: 382111
loss: 1.0154730081558228,grad_norm: 0.9400480700856231, iteration: 382112
loss: 1.002938151359558,grad_norm: 0.9805486389771465, iteration: 382113
loss: 0.9821503758430481,grad_norm: 0.9999990541002127, iteration: 382114
loss: 1.0141208171844482,grad_norm: 0.8179960899833388, iteration: 382115
loss: 1.0040044784545898,grad_norm: 0.8011057075706429, iteration: 382116
loss: 0.9965063333511353,grad_norm: 0.8542286930254257, iteration: 382117
loss: 0.9952347874641418,grad_norm: 0.9348261345875687, iteration: 382118
loss: 1.0060986280441284,grad_norm: 0.8219027310122571, iteration: 382119
loss: 1.0318913459777832,grad_norm: 0.9526963671224083, iteration: 382120
loss: 1.1387567520141602,grad_norm: 0.9999991300945257, iteration: 382121
loss: 1.0362204313278198,grad_norm: 0.9999998604131768, iteration: 382122
loss: 0.9738311767578125,grad_norm: 0.723983268970303, iteration: 382123
loss: 0.9805358648300171,grad_norm: 0.8753671063638405, iteration: 382124
loss: 0.9878560304641724,grad_norm: 0.89422783725882, iteration: 382125
loss: 0.956231415271759,grad_norm: 0.7605473769047814, iteration: 382126
loss: 1.0068615674972534,grad_norm: 0.83715873086339, iteration: 382127
loss: 1.0555652379989624,grad_norm: 0.9999993195214305, iteration: 382128
loss: 1.0070661306381226,grad_norm: 0.7427583924137896, iteration: 382129
loss: 0.9858697056770325,grad_norm: 0.85546331618527, iteration: 382130
loss: 1.0315598249435425,grad_norm: 0.7684606557286748, iteration: 382131
loss: 0.9484607577323914,grad_norm: 0.8375632031579425, iteration: 382132
loss: 0.9993644952774048,grad_norm: 0.7682167117092092, iteration: 382133
loss: 0.982313334941864,grad_norm: 0.8272279726046313, iteration: 382134
loss: 1.0016236305236816,grad_norm: 0.7717194738806835, iteration: 382135
loss: 1.0192950963974,grad_norm: 0.880887147332414, iteration: 382136
loss: 0.9996280670166016,grad_norm: 0.8291828580461107, iteration: 382137
loss: 1.0301493406295776,grad_norm: 0.8426823630039054, iteration: 382138
loss: 1.0135973691940308,grad_norm: 0.7748592680698938, iteration: 382139
loss: 0.9970988035202026,grad_norm: 0.7415418684941298, iteration: 382140
loss: 1.0861910581588745,grad_norm: 0.8782603093302808, iteration: 382141
loss: 1.0240588188171387,grad_norm: 0.9999992859796505, iteration: 382142
loss: 1.0051034688949585,grad_norm: 0.7764937980401049, iteration: 382143
loss: 1.0083646774291992,grad_norm: 0.865806511166512, iteration: 382144
loss: 1.0294559001922607,grad_norm: 0.7330953937486376, iteration: 382145
loss: 1.0332249402999878,grad_norm: 0.8403425113424978, iteration: 382146
loss: 0.9762322902679443,grad_norm: 0.735195309163054, iteration: 382147
loss: 1.0107587575912476,grad_norm: 0.6424713926418685, iteration: 382148
loss: 1.0176602602005005,grad_norm: 0.8815883897124693, iteration: 382149
loss: 1.0105518102645874,grad_norm: 0.8177707389550354, iteration: 382150
loss: 0.9899382591247559,grad_norm: 0.6133110371469668, iteration: 382151
loss: 1.0388884544372559,grad_norm: 0.7130980868693215, iteration: 382152
loss: 1.000724196434021,grad_norm: 0.733228403134089, iteration: 382153
loss: 0.9914885759353638,grad_norm: 0.7585391669935242, iteration: 382154
loss: 0.9912464618682861,grad_norm: 0.7614125457378194, iteration: 382155
loss: 0.9746596217155457,grad_norm: 0.72101778777163, iteration: 382156
loss: 1.049656867980957,grad_norm: 0.90606511379105, iteration: 382157
loss: 1.0025919675827026,grad_norm: 0.7228092929478821, iteration: 382158
loss: 1.043662428855896,grad_norm: 0.8805654203538295, iteration: 382159
loss: 0.9791249632835388,grad_norm: 0.8176230702429208, iteration: 382160
loss: 1.0094314813613892,grad_norm: 0.7644212941756733, iteration: 382161
loss: 0.9854042530059814,grad_norm: 0.7391969726372203, iteration: 382162
loss: 0.9461170434951782,grad_norm: 0.6953633197527581, iteration: 382163
loss: 1.0069425106048584,grad_norm: 0.9689436773355359, iteration: 382164
loss: 1.115952730178833,grad_norm: 0.9999990741910246, iteration: 382165
loss: 0.9753058552742004,grad_norm: 0.6587696380519814, iteration: 382166
loss: 1.0312467813491821,grad_norm: 0.9999996084216817, iteration: 382167
loss: 0.9669215083122253,grad_norm: 0.7882144919880907, iteration: 382168
loss: 0.9977390170097351,grad_norm: 0.870265666212592, iteration: 382169
loss: 0.9990808367729187,grad_norm: 0.7440546485162911, iteration: 382170
loss: 1.0233097076416016,grad_norm: 0.6883107078156473, iteration: 382171
loss: 1.0165687799453735,grad_norm: 0.79336080339484, iteration: 382172
loss: 1.086476445198059,grad_norm: 0.8335352157139231, iteration: 382173
loss: 0.9741673469543457,grad_norm: 0.8142984463817322, iteration: 382174
loss: 0.9921877384185791,grad_norm: 0.7659939725343532, iteration: 382175
loss: 1.0990257263183594,grad_norm: 0.7453950816904821, iteration: 382176
loss: 0.9909956455230713,grad_norm: 0.929382084113539, iteration: 382177
loss: 1.0218170881271362,grad_norm: 0.7539151873642617, iteration: 382178
loss: 1.0113844871520996,grad_norm: 0.8743304182443504, iteration: 382179
loss: 1.034435749053955,grad_norm: 0.9999992802506807, iteration: 382180
loss: 1.0365029573440552,grad_norm: 0.9999994181483557, iteration: 382181
loss: 0.973778486251831,grad_norm: 0.8972235840883165, iteration: 382182
loss: 0.9964319467544556,grad_norm: 0.7329315027770107, iteration: 382183
loss: 1.0106507539749146,grad_norm: 0.777005415246966, iteration: 382184
loss: 1.0453084707260132,grad_norm: 0.8833315750226032, iteration: 382185
loss: 0.971622109413147,grad_norm: 0.866990688346151, iteration: 382186
loss: 1.0072011947631836,grad_norm: 0.999999858570756, iteration: 382187
loss: 1.0041273832321167,grad_norm: 0.8497720125510742, iteration: 382188
loss: 1.060750126838684,grad_norm: 0.9999992360215813, iteration: 382189
loss: 1.0047078132629395,grad_norm: 0.7404583865974983, iteration: 382190
loss: 1.0416816473007202,grad_norm: 0.7434949970341601, iteration: 382191
loss: 1.0433316230773926,grad_norm: 0.7849137611377236, iteration: 382192
loss: 1.0058411359786987,grad_norm: 0.6465415078535173, iteration: 382193
loss: 1.0053296089172363,grad_norm: 0.7551557242768836, iteration: 382194
loss: 0.9675741791725159,grad_norm: 0.6931520268448439, iteration: 382195
loss: 1.1084728240966797,grad_norm: 0.9999993593023471, iteration: 382196
loss: 0.9762697815895081,grad_norm: 0.7630803437048022, iteration: 382197
loss: 1.037583827972412,grad_norm: 0.9530456345510409, iteration: 382198
loss: 1.0990376472473145,grad_norm: 0.9999992210563932, iteration: 382199
loss: 0.9790005683898926,grad_norm: 0.6947608775181809, iteration: 382200
loss: 0.9856424331665039,grad_norm: 0.9782400992616199, iteration: 382201
loss: 1.0084763765335083,grad_norm: 0.7985228549628992, iteration: 382202
loss: 1.0407872200012207,grad_norm: 0.7655969187207157, iteration: 382203
loss: 0.995180606842041,grad_norm: 0.7904138233781081, iteration: 382204
loss: 1.0334078073501587,grad_norm: 0.9724426734272554, iteration: 382205
loss: 1.0419002771377563,grad_norm: 0.7666998648863331, iteration: 382206
loss: 1.0398207902908325,grad_norm: 0.9999994180947043, iteration: 382207
loss: 0.9836958050727844,grad_norm: 0.8718197919184238, iteration: 382208
loss: 1.0708163976669312,grad_norm: 0.999999184870929, iteration: 382209
loss: 0.9665278792381287,grad_norm: 0.8902183115205199, iteration: 382210
loss: 0.9907131791114807,grad_norm: 0.8824556771805953, iteration: 382211
loss: 1.0233094692230225,grad_norm: 0.9999999600011337, iteration: 382212
loss: 1.0240614414215088,grad_norm: 0.999999152276915, iteration: 382213
loss: 1.0301796197891235,grad_norm: 0.9999999730499881, iteration: 382214
loss: 1.260585904121399,grad_norm: 0.9999997788520985, iteration: 382215
loss: 1.0183414220809937,grad_norm: 0.853528423293504, iteration: 382216
loss: 1.0121448040008545,grad_norm: 0.7892337567128219, iteration: 382217
loss: 1.0094330310821533,grad_norm: 0.6571615212154617, iteration: 382218
loss: 0.9666818380355835,grad_norm: 0.6929266243798505, iteration: 382219
loss: 1.011439323425293,grad_norm: 0.7005300201467126, iteration: 382220
loss: 1.0020095109939575,grad_norm: 0.8074734333678281, iteration: 382221
loss: 0.9593408107757568,grad_norm: 0.7809774504236443, iteration: 382222
loss: 0.9924761056900024,grad_norm: 0.7527012796855816, iteration: 382223
loss: 0.9775928854942322,grad_norm: 0.6771351377411556, iteration: 382224
loss: 1.0065418481826782,grad_norm: 0.7501899703514233, iteration: 382225
loss: 0.9888970255851746,grad_norm: 0.8898555179722906, iteration: 382226
loss: 0.9892910718917847,grad_norm: 0.7798414309409277, iteration: 382227
loss: 0.9782603979110718,grad_norm: 0.8206038668863674, iteration: 382228
loss: 1.074193000793457,grad_norm: 0.9999997814498301, iteration: 382229
loss: 1.0269274711608887,grad_norm: 0.6837867392793487, iteration: 382230
loss: 0.9784008264541626,grad_norm: 0.7954248855055193, iteration: 382231
loss: 0.9994445443153381,grad_norm: 0.8104365021025611, iteration: 382232
loss: 1.013069748878479,grad_norm: 0.7804576090387492, iteration: 382233
loss: 1.0031427145004272,grad_norm: 0.7498816223842845, iteration: 382234
loss: 0.9583255648612976,grad_norm: 0.9669097081228599, iteration: 382235
loss: 1.006493330001831,grad_norm: 0.8440841405291047, iteration: 382236
loss: 1.0402860641479492,grad_norm: 0.9999999977283784, iteration: 382237
loss: 0.97990483045578,grad_norm: 0.824705499183451, iteration: 382238
loss: 1.0191987752914429,grad_norm: 0.7608403024179929, iteration: 382239
loss: 1.0137728452682495,grad_norm: 0.7548112007352001, iteration: 382240
loss: 1.0228867530822754,grad_norm: 0.7466976054567326, iteration: 382241
loss: 1.0047340393066406,grad_norm: 0.8719702892910934, iteration: 382242
loss: 1.055059790611267,grad_norm: 0.9999996927089564, iteration: 382243
loss: 1.032214641571045,grad_norm: 0.7061090725136424, iteration: 382244
loss: 1.0008132457733154,grad_norm: 0.8164628888919166, iteration: 382245
loss: 0.9994231462478638,grad_norm: 0.8987780546537225, iteration: 382246
loss: 1.0191038846969604,grad_norm: 0.830744261384507, iteration: 382247
loss: 0.9838367104530334,grad_norm: 0.852606306002148, iteration: 382248
loss: 0.999725878238678,grad_norm: 0.965559941738212, iteration: 382249
loss: 1.034930944442749,grad_norm: 0.9999991272114185, iteration: 382250
loss: 1.0123196840286255,grad_norm: 0.742907964732773, iteration: 382251
loss: 1.0288079977035522,grad_norm: 0.9718618065771482, iteration: 382252
loss: 0.9769891500473022,grad_norm: 0.8930105922896462, iteration: 382253
loss: 0.97842937707901,grad_norm: 0.8291007055314451, iteration: 382254
loss: 0.99017733335495,grad_norm: 0.9999996592320399, iteration: 382255
loss: 1.0649396181106567,grad_norm: 0.7837340713100229, iteration: 382256
loss: 0.9912887811660767,grad_norm: 0.8501964319505595, iteration: 382257
loss: 1.046423316001892,grad_norm: 0.8207494277625541, iteration: 382258
loss: 0.9990183115005493,grad_norm: 0.8863570633314395, iteration: 382259
loss: 1.0989291667938232,grad_norm: 0.9999994195688763, iteration: 382260
loss: 1.0189491510391235,grad_norm: 0.8557207202435501, iteration: 382261
loss: 1.0263382196426392,grad_norm: 0.7783103784424652, iteration: 382262
loss: 0.9853185415267944,grad_norm: 0.7969625794556123, iteration: 382263
loss: 1.0593198537826538,grad_norm: 0.8934767903835666, iteration: 382264
loss: 0.9663013219833374,grad_norm: 0.6191456829511184, iteration: 382265
loss: 1.0098098516464233,grad_norm: 0.8251341291709998, iteration: 382266
loss: 1.025816559791565,grad_norm: 0.9074193040583012, iteration: 382267
loss: 1.1162797212600708,grad_norm: 0.9999991206004576, iteration: 382268
loss: 1.0181370973587036,grad_norm: 0.9502105822829269, iteration: 382269
loss: 1.0191820859909058,grad_norm: 0.7789589831307243, iteration: 382270
loss: 1.0214699506759644,grad_norm: 0.7679180166469844, iteration: 382271
loss: 0.9980296492576599,grad_norm: 0.9266973473594701, iteration: 382272
loss: 0.9654089212417603,grad_norm: 0.7315653855840597, iteration: 382273
loss: 1.011645793914795,grad_norm: 0.9665128859303841, iteration: 382274
loss: 1.0467342138290405,grad_norm: 0.9377943004950687, iteration: 382275
loss: 1.0148584842681885,grad_norm: 0.9303004952064079, iteration: 382276
loss: 0.9983628988265991,grad_norm: 0.8631876285520668, iteration: 382277
loss: 0.9943134188652039,grad_norm: 0.7380249422704311, iteration: 382278
loss: 0.9845205545425415,grad_norm: 0.8413371816724325, iteration: 382279
loss: 0.99837726354599,grad_norm: 0.7522572548478393, iteration: 382280
loss: 0.9978559613227844,grad_norm: 0.902347464770952, iteration: 382281
loss: 0.9581431746482849,grad_norm: 0.7734710752278914, iteration: 382282
loss: 1.0177894830703735,grad_norm: 0.6678795176797722, iteration: 382283
loss: 0.9884912967681885,grad_norm: 0.8074496623860106, iteration: 382284
loss: 1.0008999109268188,grad_norm: 0.7434669563180636, iteration: 382285
loss: 1.0067853927612305,grad_norm: 0.7912555850855619, iteration: 382286
loss: 1.0078846216201782,grad_norm: 0.7607880251013888, iteration: 382287
loss: 0.9694797992706299,grad_norm: 0.7441627014206451, iteration: 382288
loss: 0.9739392399787903,grad_norm: 0.8516910599147793, iteration: 382289
loss: 1.0274145603179932,grad_norm: 0.7073554715962255, iteration: 382290
loss: 0.9632630944252014,grad_norm: 0.9185388757408441, iteration: 382291
loss: 1.0386472940444946,grad_norm: 0.9999991956797917, iteration: 382292
loss: 0.9992098212242126,grad_norm: 0.7899857074987776, iteration: 382293
loss: 0.9991658926010132,grad_norm: 0.8286575640793307, iteration: 382294
loss: 1.0064680576324463,grad_norm: 0.999999496327558, iteration: 382295
loss: 0.9954988956451416,grad_norm: 0.7737233295944808, iteration: 382296
loss: 1.0012586116790771,grad_norm: 0.7779847065385592, iteration: 382297
loss: 1.0320559740066528,grad_norm: 0.8602717243029593, iteration: 382298
loss: 0.9704862833023071,grad_norm: 0.8023841729373822, iteration: 382299
loss: 1.0268999338150024,grad_norm: 0.658166828245427, iteration: 382300
loss: 1.0012516975402832,grad_norm: 0.7288034591593233, iteration: 382301
loss: 0.9868146181106567,grad_norm: 0.7892396859951732, iteration: 382302
loss: 1.018298864364624,grad_norm: 0.8278894126731847, iteration: 382303
loss: 0.9790821671485901,grad_norm: 0.8017194259406911, iteration: 382304
loss: 1.0250115394592285,grad_norm: 0.9999992081634377, iteration: 382305
loss: 0.9941180348396301,grad_norm: 0.8473750414599786, iteration: 382306
loss: 0.9946507215499878,grad_norm: 0.770217212955943, iteration: 382307
loss: 1.006313443183899,grad_norm: 0.9999998982398539, iteration: 382308
loss: 1.013891577720642,grad_norm: 0.8616719347061398, iteration: 382309
loss: 1.0144058465957642,grad_norm: 0.9569164077968619, iteration: 382310
loss: 1.0069026947021484,grad_norm: 0.9999997078322543, iteration: 382311
loss: 1.012589693069458,grad_norm: 0.8758481789043498, iteration: 382312
loss: 1.0555318593978882,grad_norm: 0.9999995964856386, iteration: 382313
loss: 0.9570385217666626,grad_norm: 0.7999710174916402, iteration: 382314
loss: 1.016613245010376,grad_norm: 0.9466003041364857, iteration: 382315
loss: 0.9706330895423889,grad_norm: 0.8945891611781717, iteration: 382316
loss: 0.9715520143508911,grad_norm: 0.7442730292226966, iteration: 382317
loss: 1.0118207931518555,grad_norm: 0.8173006537849271, iteration: 382318
loss: 0.9559083580970764,grad_norm: 0.7715054152794583, iteration: 382319
loss: 1.0428937673568726,grad_norm: 0.9999992531235345, iteration: 382320
loss: 1.013932704925537,grad_norm: 0.9100954038783949, iteration: 382321
loss: 1.0081470012664795,grad_norm: 0.7509872970854619, iteration: 382322
loss: 1.0165091753005981,grad_norm: 0.9999990871564104, iteration: 382323
loss: 1.0183496475219727,grad_norm: 0.9088846383392112, iteration: 382324
loss: 0.9791951179504395,grad_norm: 0.707748934142799, iteration: 382325
loss: 0.9996982216835022,grad_norm: 0.7391826541791457, iteration: 382326
loss: 1.0263748168945312,grad_norm: 0.6450663302917842, iteration: 382327
loss: 1.0233485698699951,grad_norm: 0.9415709759566663, iteration: 382328
loss: 0.9800155162811279,grad_norm: 0.8796757091533604, iteration: 382329
loss: 1.021798014640808,grad_norm: 0.9485425719093072, iteration: 382330
loss: 0.9686552286148071,grad_norm: 0.7481827017342235, iteration: 382331
loss: 1.0196822881698608,grad_norm: 0.9999993542694539, iteration: 382332
loss: 0.9810187220573425,grad_norm: 0.8226150253305518, iteration: 382333
loss: 1.0546634197235107,grad_norm: 1.000000034945627, iteration: 382334
loss: 1.0343011617660522,grad_norm: 0.7523319698852716, iteration: 382335
loss: 1.0305447578430176,grad_norm: 0.9283483396027907, iteration: 382336
loss: 1.0320303440093994,grad_norm: 0.9999992509727278, iteration: 382337
loss: 0.9968599081039429,grad_norm: 0.8314860373381266, iteration: 382338
loss: 1.0160094499588013,grad_norm: 0.8040939002644708, iteration: 382339
loss: 1.0058622360229492,grad_norm: 0.6582766023346673, iteration: 382340
loss: 1.0049917697906494,grad_norm: 0.8413646044877396, iteration: 382341
loss: 0.9883947372436523,grad_norm: 0.6879134107453663, iteration: 382342
loss: 0.9844999313354492,grad_norm: 0.7160795933385399, iteration: 382343
loss: 1.0559641122817993,grad_norm: 0.8969822415288348, iteration: 382344
loss: 1.027938961982727,grad_norm: 0.9999990738060999, iteration: 382345
loss: 0.9879196286201477,grad_norm: 0.7387299365885857, iteration: 382346
loss: 1.027044653892517,grad_norm: 0.9375309893116712, iteration: 382347
loss: 1.0116322040557861,grad_norm: 0.9171715611301439, iteration: 382348
loss: 1.0186811685562134,grad_norm: 0.819493038724263, iteration: 382349
loss: 0.9842250943183899,grad_norm: 0.7991975915391794, iteration: 382350
loss: 1.0591070652008057,grad_norm: 0.8248028488577165, iteration: 382351
loss: 1.0191773176193237,grad_norm: 0.9999997209161646, iteration: 382352
loss: 0.9823750853538513,grad_norm: 0.9431010308535664, iteration: 382353
loss: 1.003578782081604,grad_norm: 0.7207065363558017, iteration: 382354
loss: 1.0234438180923462,grad_norm: 0.9359174977674675, iteration: 382355
loss: 1.0215332508087158,grad_norm: 0.8522920504366972, iteration: 382356
loss: 1.0359301567077637,grad_norm: 0.7068477802619753, iteration: 382357
loss: 1.0229700803756714,grad_norm: 0.8482609419612777, iteration: 382358
loss: 1.0324405431747437,grad_norm: 0.6856799956536521, iteration: 382359
loss: 1.0165810585021973,grad_norm: 0.7746905102598626, iteration: 382360
loss: 0.9969857335090637,grad_norm: 0.9823154693822003, iteration: 382361
loss: 0.9804286360740662,grad_norm: 0.7555934394126698, iteration: 382362
loss: 0.9944117665290833,grad_norm: 0.7994442912558323, iteration: 382363
loss: 1.0225800275802612,grad_norm: 0.6586374772669038, iteration: 382364
loss: 1.009716510772705,grad_norm: 0.7977312041000553, iteration: 382365
loss: 0.9558415412902832,grad_norm: 0.7959134980584626, iteration: 382366
loss: 1.0185763835906982,grad_norm: 0.7539237918304668, iteration: 382367
loss: 1.0437546968460083,grad_norm: 0.8474068014872723, iteration: 382368
loss: 1.0014595985412598,grad_norm: 0.7370262161071652, iteration: 382369
loss: 1.0705931186676025,grad_norm: 0.9574029386852304, iteration: 382370
loss: 1.0100327730178833,grad_norm: 0.7418932330960292, iteration: 382371
loss: 1.0403236150741577,grad_norm: 0.8413840799574822, iteration: 382372
loss: 1.0194224119186401,grad_norm: 0.8220009714645511, iteration: 382373
loss: 1.0312542915344238,grad_norm: 0.7121490271210329, iteration: 382374
loss: 0.9742506146430969,grad_norm: 0.8341250020994521, iteration: 382375
loss: 0.9797487258911133,grad_norm: 0.71150231617908, iteration: 382376
loss: 1.0323224067687988,grad_norm: 0.9999999162098026, iteration: 382377
loss: 0.978224515914917,grad_norm: 0.7071649608928465, iteration: 382378
loss: 1.0077048540115356,grad_norm: 0.6578101692036158, iteration: 382379
loss: 1.0456054210662842,grad_norm: 0.7905034755803071, iteration: 382380
loss: 1.0078026056289673,grad_norm: 0.7319774125797726, iteration: 382381
loss: 0.9960843324661255,grad_norm: 0.9398558655424528, iteration: 382382
loss: 0.9889488220214844,grad_norm: 0.7801210066320708, iteration: 382383
loss: 1.0379868745803833,grad_norm: 0.7317659427245288, iteration: 382384
loss: 1.013945460319519,grad_norm: 0.7321848815475982, iteration: 382385
loss: 0.9869312644004822,grad_norm: 0.9026996449824495, iteration: 382386
loss: 0.966502845287323,grad_norm: 0.9999990688212484, iteration: 382387
loss: 1.0443514585494995,grad_norm: 0.999999437364608, iteration: 382388
loss: 1.0929261445999146,grad_norm: 0.9999995054054882, iteration: 382389
loss: 0.9739835858345032,grad_norm: 0.7479686624114428, iteration: 382390
loss: 0.9904486536979675,grad_norm: 0.7652951441716399, iteration: 382391
loss: 0.9915744662284851,grad_norm: 0.9999995029377114, iteration: 382392
loss: 0.9612948894500732,grad_norm: 0.8727034266175641, iteration: 382393
loss: 1.0075407028198242,grad_norm: 0.775032970705838, iteration: 382394
loss: 1.033139944076538,grad_norm: 0.7798567450938862, iteration: 382395
loss: 0.9875364303588867,grad_norm: 0.7947658381721373, iteration: 382396
loss: 0.9990846514701843,grad_norm: 0.7628341758390609, iteration: 382397
loss: 1.0417920351028442,grad_norm: 0.9999991141462875, iteration: 382398
loss: 1.0457605123519897,grad_norm: 0.9815400442901091, iteration: 382399
loss: 1.0228846073150635,grad_norm: 0.841550450764282, iteration: 382400
loss: 0.9875385165214539,grad_norm: 0.8244515278888517, iteration: 382401
loss: 0.9930718541145325,grad_norm: 0.9999993596339413, iteration: 382402
loss: 1.0652880668640137,grad_norm: 0.9999990282761698, iteration: 382403
loss: 0.9851877689361572,grad_norm: 0.8412485415505355, iteration: 382404
loss: 1.0073426961898804,grad_norm: 0.8071069104378807, iteration: 382405
loss: 1.0241460800170898,grad_norm: 0.7580876901781516, iteration: 382406
loss: 1.0041450262069702,grad_norm: 0.8082354671702059, iteration: 382407
loss: 1.046713948249817,grad_norm: 0.7159790757465354, iteration: 382408
loss: 1.0039364099502563,grad_norm: 0.9027319470916301, iteration: 382409
loss: 1.0071320533752441,grad_norm: 0.7268723193990665, iteration: 382410
loss: 0.9950600862503052,grad_norm: 0.7094756197642483, iteration: 382411
loss: 1.1175408363342285,grad_norm: 0.9999993817717684, iteration: 382412
loss: 1.1135387420654297,grad_norm: 0.9999995678018474, iteration: 382413
loss: 1.0323777198791504,grad_norm: 0.7904730485879786, iteration: 382414
loss: 0.9762617945671082,grad_norm: 0.7502501301398725, iteration: 382415
loss: 1.0940132141113281,grad_norm: 0.9728868158390073, iteration: 382416
loss: 0.9622155427932739,grad_norm: 0.7845116288976935, iteration: 382417
loss: 0.9951620697975159,grad_norm: 0.7063091544075534, iteration: 382418
loss: 1.0825480222702026,grad_norm: 0.9999994131595316, iteration: 382419
loss: 1.031947374343872,grad_norm: 0.8772314512508306, iteration: 382420
loss: 0.9686886668205261,grad_norm: 0.7506144866103819, iteration: 382421
loss: 0.993904173374176,grad_norm: 0.7081600316661609, iteration: 382422
loss: 1.015285849571228,grad_norm: 0.8888655873319277, iteration: 382423
loss: 0.968347430229187,grad_norm: 0.7490341065961077, iteration: 382424
loss: 1.0515048503875732,grad_norm: 0.8268240068132018, iteration: 382425
loss: 1.0186636447906494,grad_norm: 0.7046711474629419, iteration: 382426
loss: 1.0352131128311157,grad_norm: 0.9999996927467956, iteration: 382427
loss: 1.0010735988616943,grad_norm: 0.6905888808030615, iteration: 382428
loss: 0.9989115595817566,grad_norm: 0.9999990044737922, iteration: 382429
loss: 0.9681723117828369,grad_norm: 0.813394463897101, iteration: 382430
loss: 1.0374952554702759,grad_norm: 0.8108245106993012, iteration: 382431
loss: 0.9779283404350281,grad_norm: 0.795021698406424, iteration: 382432
loss: 0.9384286999702454,grad_norm: 0.7630803877320879, iteration: 382433
loss: 0.992285430431366,grad_norm: 0.7125483384566103, iteration: 382434
loss: 0.9705238342285156,grad_norm: 0.9726198380676784, iteration: 382435
loss: 0.9989814162254333,grad_norm: 0.8064734006667585, iteration: 382436
loss: 0.9763444066047668,grad_norm: 0.7040559651471983, iteration: 382437
loss: 0.9870653748512268,grad_norm: 0.7672235933849282, iteration: 382438
loss: 0.9718005061149597,grad_norm: 0.7369821930127596, iteration: 382439
loss: 0.9923738837242126,grad_norm: 0.7212244330316353, iteration: 382440
loss: 1.020106315612793,grad_norm: 0.99999897796471, iteration: 382441
loss: 1.0541348457336426,grad_norm: 0.802917245722332, iteration: 382442
loss: 1.0244442224502563,grad_norm: 0.7007280252469117, iteration: 382443
loss: 0.9942789077758789,grad_norm: 0.8807136757776651, iteration: 382444
loss: 1.0052366256713867,grad_norm: 0.7566976360210628, iteration: 382445
loss: 0.991595447063446,grad_norm: 0.6679785910542797, iteration: 382446
loss: 0.9844764471054077,grad_norm: 0.6199088598761904, iteration: 382447
loss: 1.0326275825500488,grad_norm: 0.9067157419912986, iteration: 382448
loss: 0.9806883335113525,grad_norm: 0.7270419529366998, iteration: 382449
loss: 1.0905600786209106,grad_norm: 0.7832321951109876, iteration: 382450
loss: 0.9844344854354858,grad_norm: 0.7917076762849601, iteration: 382451
loss: 1.0450164079666138,grad_norm: 0.9999991773335348, iteration: 382452
loss: 0.9945775270462036,grad_norm: 0.9999990995782331, iteration: 382453
loss: 1.0974204540252686,grad_norm: 0.999999594923944, iteration: 382454
loss: 0.9794219136238098,grad_norm: 0.813371367987722, iteration: 382455
loss: 0.9933896660804749,grad_norm: 0.9717952083949914, iteration: 382456
loss: 1.0436429977416992,grad_norm: 0.8594617206531368, iteration: 382457
loss: 1.0915968418121338,grad_norm: 0.8371501716669585, iteration: 382458
loss: 1.0138028860092163,grad_norm: 0.8125954190238457, iteration: 382459
loss: 1.025804877281189,grad_norm: 0.8538870448134215, iteration: 382460
loss: 1.0283774137496948,grad_norm: 0.9923961689124485, iteration: 382461
loss: 1.0082662105560303,grad_norm: 0.7739170177448031, iteration: 382462
loss: 1.0564484596252441,grad_norm: 0.9999992241813558, iteration: 382463
loss: 1.003733515739441,grad_norm: 0.7359564826478643, iteration: 382464
loss: 0.9850867986679077,grad_norm: 0.7169244704656914, iteration: 382465
loss: 1.0986342430114746,grad_norm: 0.9999990213415336, iteration: 382466
loss: 0.980836033821106,grad_norm: 0.7865828780251298, iteration: 382467
loss: 0.990356981754303,grad_norm: 0.7014467926423543, iteration: 382468
loss: 1.008034586906433,grad_norm: 0.6857552493842829, iteration: 382469
loss: 0.9872477054595947,grad_norm: 0.9230269181711456, iteration: 382470
loss: 1.0105379819869995,grad_norm: 0.7622974135115951, iteration: 382471
loss: 0.9542808532714844,grad_norm: 0.7342525082348906, iteration: 382472
loss: 0.978542685508728,grad_norm: 0.5950605929484131, iteration: 382473
loss: 1.120556116104126,grad_norm: 0.9999996352226446, iteration: 382474
loss: 1.0175296068191528,grad_norm: 0.8478711972669278, iteration: 382475
loss: 1.0039688348770142,grad_norm: 0.6516763352470873, iteration: 382476
loss: 0.9914924502372742,grad_norm: 0.7525740108429456, iteration: 382477
loss: 0.9703048467636108,grad_norm: 0.9999991464471282, iteration: 382478
loss: 1.0331504344940186,grad_norm: 0.7928786685479527, iteration: 382479
loss: 1.0169051885604858,grad_norm: 0.8895381272162461, iteration: 382480
loss: 0.992845892906189,grad_norm: 0.7791963631311454, iteration: 382481
loss: 1.0308691263198853,grad_norm: 0.9999989323352657, iteration: 382482
loss: 0.9906520247459412,grad_norm: 0.7623604371353828, iteration: 382483
loss: 0.9793018102645874,grad_norm: 0.7588233373019996, iteration: 382484
loss: 0.9743402600288391,grad_norm: 0.9055720539694659, iteration: 382485
loss: 0.9827600121498108,grad_norm: 0.8073094477399431, iteration: 382486
loss: 1.0919854640960693,grad_norm: 0.9999994081971977, iteration: 382487
loss: 1.0101162195205688,grad_norm: 0.7899669074481719, iteration: 382488
loss: 1.0123604536056519,grad_norm: 0.880602556027045, iteration: 382489
loss: 1.004974126815796,grad_norm: 0.9016821181980975, iteration: 382490
loss: 1.0953370332717896,grad_norm: 0.9935157965631869, iteration: 382491
loss: 1.052030086517334,grad_norm: 0.7945474044245684, iteration: 382492
loss: 1.0217483043670654,grad_norm: 0.7647356775761458, iteration: 382493
loss: 1.0227808952331543,grad_norm: 0.7952504426487899, iteration: 382494
loss: 1.0206637382507324,grad_norm: 0.6808958650768915, iteration: 382495
loss: 0.975929856300354,grad_norm: 0.7158323864048725, iteration: 382496
loss: 0.961823046207428,grad_norm: 0.7822474426700748, iteration: 382497
loss: 1.0102585554122925,grad_norm: 0.7755652751505898, iteration: 382498
loss: 1.0149437189102173,grad_norm: 0.8640441698955258, iteration: 382499
loss: 1.0237157344818115,grad_norm: 0.8307929878805816, iteration: 382500
loss: 0.9963769912719727,grad_norm: 0.9999994377047564, iteration: 382501
loss: 0.9842763543128967,grad_norm: 0.8243101537455543, iteration: 382502
loss: 0.9894999265670776,grad_norm: 0.7968453961580854, iteration: 382503
loss: 0.9866906404495239,grad_norm: 0.7133278730109732, iteration: 382504
loss: 0.9983848333358765,grad_norm: 0.8347191482284162, iteration: 382505
loss: 0.9687377214431763,grad_norm: 0.7405965144634131, iteration: 382506
loss: 1.1002464294433594,grad_norm: 0.9999991700436027, iteration: 382507
loss: 1.0263142585754395,grad_norm: 0.7687992029710498, iteration: 382508
loss: 0.9929042458534241,grad_norm: 0.7916212460358995, iteration: 382509
loss: 1.0605288743972778,grad_norm: 0.9721153634798068, iteration: 382510
loss: 0.9541378021240234,grad_norm: 0.7699213103961118, iteration: 382511
loss: 1.0212714672088623,grad_norm: 0.8410249344656625, iteration: 382512
loss: 0.9670166373252869,grad_norm: 0.8174130763214635, iteration: 382513
loss: 0.9735502600669861,grad_norm: 0.8283848610483131, iteration: 382514
loss: 0.9973776936531067,grad_norm: 0.7628076283469853, iteration: 382515
loss: 0.9796481728553772,grad_norm: 0.9429206237959835, iteration: 382516
loss: 0.9709409475326538,grad_norm: 0.8693818325003736, iteration: 382517
loss: 1.0393754243850708,grad_norm: 0.9999992102001518, iteration: 382518
loss: 0.9952941536903381,grad_norm: 0.7939945973468718, iteration: 382519
loss: 0.9722192287445068,grad_norm: 0.7826689142604012, iteration: 382520
loss: 1.048193097114563,grad_norm: 0.9999991088552418, iteration: 382521
loss: 0.9525059461593628,grad_norm: 0.7286105957276662, iteration: 382522
loss: 1.0028401613235474,grad_norm: 0.9999991964881686, iteration: 382523
loss: 0.9655694961547852,grad_norm: 0.8553794048514164, iteration: 382524
loss: 0.9944698810577393,grad_norm: 0.7961457387107133, iteration: 382525
loss: 1.12575101852417,grad_norm: 0.9376168079558423, iteration: 382526
loss: 1.0228897333145142,grad_norm: 0.8621396822647035, iteration: 382527
loss: 0.9638870358467102,grad_norm: 0.7749010524156408, iteration: 382528
loss: 1.0581973791122437,grad_norm: 0.9999997426679829, iteration: 382529
loss: 0.9949877262115479,grad_norm: 0.752980430818421, iteration: 382530
loss: 0.9844732284545898,grad_norm: 0.8206075116534126, iteration: 382531
loss: 0.9927644729614258,grad_norm: 0.850879142136154, iteration: 382532
loss: 0.9882767200469971,grad_norm: 0.8503862044185334, iteration: 382533
loss: 1.0322659015655518,grad_norm: 0.9999999180888512, iteration: 382534
loss: 1.0010395050048828,grad_norm: 0.7054766926811337, iteration: 382535
loss: 1.0410653352737427,grad_norm: 0.8574370221847568, iteration: 382536
loss: 0.984138548374176,grad_norm: 0.828609662542688, iteration: 382537
loss: 1.0090864896774292,grad_norm: 0.9357538318056604, iteration: 382538
loss: 1.0114878416061401,grad_norm: 0.8651145566335531, iteration: 382539
loss: 0.9652421474456787,grad_norm: 0.7919051409727141, iteration: 382540
loss: 0.9428916573524475,grad_norm: 0.7779498570230051, iteration: 382541
loss: 0.9657106995582581,grad_norm: 0.837132331678194, iteration: 382542
loss: 0.9911046028137207,grad_norm: 0.8661621924589556, iteration: 382543
loss: 0.9878508448600769,grad_norm: 0.763365524858183, iteration: 382544
loss: 0.9758791923522949,grad_norm: 0.7620473963518435, iteration: 382545
loss: 0.9774999022483826,grad_norm: 0.7376484048911375, iteration: 382546
loss: 1.01119065284729,grad_norm: 0.8242465219998988, iteration: 382547
loss: 1.0006046295166016,grad_norm: 0.9999999100408729, iteration: 382548
loss: 0.9998492002487183,grad_norm: 0.9185217523829728, iteration: 382549
loss: 0.998593270778656,grad_norm: 0.7055359807286642, iteration: 382550
loss: 1.0090603828430176,grad_norm: 0.7204316705913169, iteration: 382551
loss: 0.9794324636459351,grad_norm: 0.8100193732082671, iteration: 382552
loss: 1.0088979005813599,grad_norm: 0.800096864345694, iteration: 382553
loss: 1.10367751121521,grad_norm: 0.999999509837517, iteration: 382554
loss: 1.081202507019043,grad_norm: 0.8760548724518126, iteration: 382555
loss: 0.9993908405303955,grad_norm: 0.742566548372352, iteration: 382556
loss: 1.0656819343566895,grad_norm: 0.9999994658092808, iteration: 382557
loss: 1.0352522134780884,grad_norm: 0.8232699607552019, iteration: 382558
loss: 0.9668214917182922,grad_norm: 0.7611371420590751, iteration: 382559
loss: 1.030872106552124,grad_norm: 0.9999994245574335, iteration: 382560
loss: 1.0898911952972412,grad_norm: 0.7629805159918925, iteration: 382561
loss: 1.01815664768219,grad_norm: 0.9999995642153764, iteration: 382562
loss: 1.0016289949417114,grad_norm: 0.7205873146029278, iteration: 382563
loss: 0.991187334060669,grad_norm: 0.99999920522531, iteration: 382564
loss: 1.0538491010665894,grad_norm: 0.8316774555476902, iteration: 382565
loss: 0.9559963345527649,grad_norm: 0.7346557039933878, iteration: 382566
loss: 0.9952657222747803,grad_norm: 0.7278362489224273, iteration: 382567
loss: 0.9962287545204163,grad_norm: 0.7800028303622184, iteration: 382568
loss: 1.0828056335449219,grad_norm: 0.9653375795068956, iteration: 382569
loss: 0.9503030180931091,grad_norm: 0.9357502667162364, iteration: 382570
loss: 1.1172854900360107,grad_norm: 0.9999992850094901, iteration: 382571
loss: 1.0107005834579468,grad_norm: 0.8598605286046045, iteration: 382572
loss: 0.9706771969795227,grad_norm: 0.8556913830674281, iteration: 382573
loss: 1.0128735303878784,grad_norm: 1.000000064570116, iteration: 382574
loss: 0.9676328897476196,grad_norm: 0.7193624770057502, iteration: 382575
loss: 1.0436240434646606,grad_norm: 0.8576975105333408, iteration: 382576
loss: 1.2071586847305298,grad_norm: 0.9999998957587939, iteration: 382577
loss: 0.961359441280365,grad_norm: 0.9427004110272016, iteration: 382578
loss: 1.0009799003601074,grad_norm: 0.869989335553138, iteration: 382579
loss: 1.0680471658706665,grad_norm: 0.9248440927762938, iteration: 382580
loss: 0.9899064898490906,grad_norm: 0.6972675458044943, iteration: 382581
loss: 1.0306748151779175,grad_norm: 0.9999991015183946, iteration: 382582
loss: 0.9735936522483826,grad_norm: 0.7457557111701785, iteration: 382583
loss: 0.9952831268310547,grad_norm: 0.8578537735052163, iteration: 382584
loss: 1.001771092414856,grad_norm: 0.7707334625820432, iteration: 382585
loss: 0.9609763026237488,grad_norm: 0.6935973803120603, iteration: 382586
loss: 1.0018961429595947,grad_norm: 0.8983571569787167, iteration: 382587
loss: 1.00385582447052,grad_norm: 0.9004297810921018, iteration: 382588
loss: 1.0491777658462524,grad_norm: 0.9999998538597062, iteration: 382589
loss: 1.0637242794036865,grad_norm: 0.829831504163399, iteration: 382590
loss: 0.999823272228241,grad_norm: 0.7394036706694649, iteration: 382591
loss: 1.047992467880249,grad_norm: 0.9999999760345638, iteration: 382592
loss: 1.0482606887817383,grad_norm: 0.9999997984670689, iteration: 382593
loss: 1.0780068635940552,grad_norm: 0.870825541255562, iteration: 382594
loss: 1.0118485689163208,grad_norm: 0.6862497752142043, iteration: 382595
loss: 1.0191593170166016,grad_norm: 0.7348379280460343, iteration: 382596
loss: 1.0471000671386719,grad_norm: 0.9999989832707636, iteration: 382597
loss: 1.029271125793457,grad_norm: 0.8003407217521894, iteration: 382598
loss: 0.9900951385498047,grad_norm: 0.9909589807337941, iteration: 382599
loss: 1.0235178470611572,grad_norm: 0.9999990817362927, iteration: 382600
loss: 0.9791492819786072,grad_norm: 0.9123776187176191, iteration: 382601
loss: 0.962370753288269,grad_norm: 0.7304935743663851, iteration: 382602
loss: 0.980493426322937,grad_norm: 0.7206763756641476, iteration: 382603
loss: 0.9792036414146423,grad_norm: 0.8203128984404846, iteration: 382604
loss: 1.0704847574234009,grad_norm: 0.9999996047584615, iteration: 382605
loss: 1.0094934701919556,grad_norm: 0.8435065840602374, iteration: 382606
loss: 1.018194556236267,grad_norm: 0.9999999668963013, iteration: 382607
loss: 1.0189844369888306,grad_norm: 0.7188637227018606, iteration: 382608
loss: 1.0306345224380493,grad_norm: 0.8278659039022248, iteration: 382609
loss: 1.0356330871582031,grad_norm: 0.8189232102164771, iteration: 382610
loss: 1.041551113128662,grad_norm: 0.8551666865288916, iteration: 382611
loss: 1.0147525072097778,grad_norm: 0.7150752076436415, iteration: 382612
loss: 1.0345349311828613,grad_norm: 0.8182943768748931, iteration: 382613
loss: 1.0598344802856445,grad_norm: 0.7418790941700503, iteration: 382614
loss: 1.0071436166763306,grad_norm: 0.9999997545940921, iteration: 382615
loss: 1.052322268486023,grad_norm: 0.8692660029334617, iteration: 382616
loss: 1.0042102336883545,grad_norm: 0.7902916022567258, iteration: 382617
loss: 0.9692850708961487,grad_norm: 0.7022374817251188, iteration: 382618
loss: 0.9631359577178955,grad_norm: 0.6805466552721462, iteration: 382619
loss: 1.0800774097442627,grad_norm: 0.9999995839139308, iteration: 382620
loss: 1.0069931745529175,grad_norm: 0.7944568341662082, iteration: 382621
loss: 1.0191329717636108,grad_norm: 0.9091223747020767, iteration: 382622
loss: 1.0136802196502686,grad_norm: 0.7646373854345356, iteration: 382623
loss: 0.994073748588562,grad_norm: 0.7504400414974167, iteration: 382624
loss: 1.0102698802947998,grad_norm: 0.9999990950449358, iteration: 382625
loss: 0.9489057064056396,grad_norm: 0.824672787034307, iteration: 382626
loss: 1.1517020463943481,grad_norm: 0.8410155775232222, iteration: 382627
loss: 0.9967807531356812,grad_norm: 0.8263199632297012, iteration: 382628
loss: 1.0512627363204956,grad_norm: 0.7599814223135211, iteration: 382629
loss: 1.0238041877746582,grad_norm: 0.999999510249073, iteration: 382630
loss: 1.010206699371338,grad_norm: 0.8401386849499851, iteration: 382631
loss: 1.0400394201278687,grad_norm: 0.999999368489707, iteration: 382632
loss: 0.9922662973403931,grad_norm: 0.8997105088551306, iteration: 382633
loss: 1.0134776830673218,grad_norm: 0.8139189204440097, iteration: 382634
loss: 1.0032305717468262,grad_norm: 0.6924418724968384, iteration: 382635
loss: 1.0944252014160156,grad_norm: 0.9999999895310778, iteration: 382636
loss: 1.0727788209915161,grad_norm: 0.9999993320778551, iteration: 382637
loss: 0.9626092314720154,grad_norm: 0.7858876003891377, iteration: 382638
loss: 1.0635160207748413,grad_norm: 0.6875622331933111, iteration: 382639
loss: 1.0567514896392822,grad_norm: 0.9999997646550343, iteration: 382640
loss: 1.0872306823730469,grad_norm: 0.8934107669646351, iteration: 382641
loss: 1.0454413890838623,grad_norm: 0.8119815086570388, iteration: 382642
loss: 1.009296178817749,grad_norm: 0.9999992992911564, iteration: 382643
loss: 1.0925962924957275,grad_norm: 0.9999990375363578, iteration: 382644
loss: 1.1392290592193604,grad_norm: 0.7827324072468564, iteration: 382645
loss: 1.2257397174835205,grad_norm: 0.9999993308107753, iteration: 382646
loss: 1.153855323791504,grad_norm: 0.9999999198882371, iteration: 382647
loss: 1.1503204107284546,grad_norm: 1.0000000344291693, iteration: 382648
loss: 1.0902246236801147,grad_norm: 0.9999991572235013, iteration: 382649
loss: 1.1547001600265503,grad_norm: 0.999999126542453, iteration: 382650
loss: 0.9743728637695312,grad_norm: 0.7794004911939705, iteration: 382651
loss: 1.1190893650054932,grad_norm: 0.9999993915002326, iteration: 382652
loss: 1.3091832399368286,grad_norm: 0.9999998591068611, iteration: 382653
loss: 1.0379996299743652,grad_norm: 0.8704545077385765, iteration: 382654
loss: 1.0826488733291626,grad_norm: 0.9999990851984029, iteration: 382655
loss: 0.9991745948791504,grad_norm: 0.9220408404794455, iteration: 382656
loss: 1.1199370622634888,grad_norm: 0.9999998353919539, iteration: 382657
loss: 1.0180531740188599,grad_norm: 0.9999989726285087, iteration: 382658
loss: 1.1727323532104492,grad_norm: 0.9999991264023614, iteration: 382659
loss: 0.9927660822868347,grad_norm: 0.9999991312917758, iteration: 382660
loss: 1.0821561813354492,grad_norm: 0.999999260420159, iteration: 382661
loss: 1.0599005222320557,grad_norm: 0.9999997884607372, iteration: 382662
loss: 1.0640977621078491,grad_norm: 0.7928544190280312, iteration: 382663
loss: 0.9827415943145752,grad_norm: 0.8323493399541916, iteration: 382664
loss: 1.149237871170044,grad_norm: 0.9999993044151531, iteration: 382665
loss: 1.0734338760375977,grad_norm: 0.9999991751962505, iteration: 382666
loss: 1.0732096433639526,grad_norm: 0.9999992301372335, iteration: 382667
loss: 1.0941191911697388,grad_norm: 0.9999991087410369, iteration: 382668
loss: 0.982774555683136,grad_norm: 0.6720374423739295, iteration: 382669
loss: 0.9952631592750549,grad_norm: 0.999999661972373, iteration: 382670
loss: 1.1812939643859863,grad_norm: 0.9999999977249969, iteration: 382671
loss: 1.007291555404663,grad_norm: 0.8854837863347331, iteration: 382672
loss: 1.0355627536773682,grad_norm: 0.8486620764500787, iteration: 382673
loss: 0.98996502161026,grad_norm: 0.9312382726679053, iteration: 382674
loss: 1.0195308923721313,grad_norm: 0.8409401370546031, iteration: 382675
loss: 1.006209373474121,grad_norm: 0.8113064472545656, iteration: 382676
loss: 0.9913203716278076,grad_norm: 0.8545268556997396, iteration: 382677
loss: 1.0318063497543335,grad_norm: 0.7676713765997641, iteration: 382678
loss: 1.0256099700927734,grad_norm: 0.8563595723310884, iteration: 382679
loss: 1.0962735414505005,grad_norm: 0.9999992956553443, iteration: 382680
loss: 1.14911687374115,grad_norm: 0.9999995836713057, iteration: 382681
loss: 1.116489052772522,grad_norm: 0.999999172663028, iteration: 382682
loss: 1.0403364896774292,grad_norm: 0.8596400575558306, iteration: 382683
loss: 1.036027193069458,grad_norm: 0.8431782414359599, iteration: 382684
loss: 1.0605829954147339,grad_norm: 0.9999996000954946, iteration: 382685
loss: 1.0720970630645752,grad_norm: 0.9426938458189225, iteration: 382686
loss: 1.0310474634170532,grad_norm: 0.7863558755224332, iteration: 382687
loss: 0.9729498028755188,grad_norm: 0.8831455409331537, iteration: 382688
loss: 1.2025984525680542,grad_norm: 0.9999994315767501, iteration: 382689
loss: 1.0835862159729004,grad_norm: 0.9999993271528358, iteration: 382690
loss: 1.0470398664474487,grad_norm: 0.9999999637192704, iteration: 382691
loss: 1.081128478050232,grad_norm: 0.9999997347521652, iteration: 382692
loss: 1.0136269330978394,grad_norm: 0.9999991616510866, iteration: 382693
loss: 1.1126564741134644,grad_norm: 0.9999992790569405, iteration: 382694
loss: 1.1838812828063965,grad_norm: 0.9999992790036869, iteration: 382695
loss: 1.0355032682418823,grad_norm: 0.8231010409685425, iteration: 382696
loss: 0.9996187090873718,grad_norm: 0.9146003591696992, iteration: 382697
loss: 1.0537519454956055,grad_norm: 0.9093796874976289, iteration: 382698
loss: 1.1047425270080566,grad_norm: 0.9999997240729273, iteration: 382699
loss: 1.246185302734375,grad_norm: 0.9999994817552114, iteration: 382700
loss: 1.0844426155090332,grad_norm: 0.9999995916592739, iteration: 382701
loss: 1.146823763847351,grad_norm: 0.9999996043120569, iteration: 382702
loss: 1.0972589254379272,grad_norm: 0.9999999083094515, iteration: 382703
loss: 1.0136691331863403,grad_norm: 0.7735434219664715, iteration: 382704
loss: 1.036522626876831,grad_norm: 0.999999247223834, iteration: 382705
loss: 1.1799240112304688,grad_norm: 0.9999998933945597, iteration: 382706
loss: 1.0942529439926147,grad_norm: 0.9999995841998562, iteration: 382707
loss: 1.1041264533996582,grad_norm: 0.9222662700942529, iteration: 382708
loss: 0.9967668056488037,grad_norm: 0.6550644847646315, iteration: 382709
loss: 1.0537383556365967,grad_norm: 0.999999374364429, iteration: 382710
loss: 1.1367720365524292,grad_norm: 0.999999305960737, iteration: 382711
loss: 1.0500922203063965,grad_norm: 0.7683417888638653, iteration: 382712
loss: 1.1023895740509033,grad_norm: 0.9999992701956191, iteration: 382713
loss: 0.9578898549079895,grad_norm: 0.8448920952920103, iteration: 382714
loss: 1.0118380784988403,grad_norm: 0.8849713044920494, iteration: 382715
loss: 1.0850036144256592,grad_norm: 0.7446073985248799, iteration: 382716
loss: 1.1663340330123901,grad_norm: 0.9999997545290845, iteration: 382717
loss: 1.1578476428985596,grad_norm: 0.9999995700251669, iteration: 382718
loss: 1.040497899055481,grad_norm: 0.9999991291648467, iteration: 382719
loss: 1.0315113067626953,grad_norm: 0.9266023641178481, iteration: 382720
loss: 1.1737560033798218,grad_norm: 0.9999994005738633, iteration: 382721
loss: 1.0899035930633545,grad_norm: 0.9999993128485352, iteration: 382722
loss: 1.0323153734207153,grad_norm: 0.9999992923797093, iteration: 382723
loss: 1.009543538093567,grad_norm: 0.9521602056030849, iteration: 382724
loss: 1.0769413709640503,grad_norm: 0.9648409650634397, iteration: 382725
loss: 0.9857214689254761,grad_norm: 0.9999998858589243, iteration: 382726
loss: 1.0008492469787598,grad_norm: 0.9999991327157592, iteration: 382727
loss: 1.0707672834396362,grad_norm: 0.9999992420219468, iteration: 382728
loss: 1.0045032501220703,grad_norm: 0.6980625804845426, iteration: 382729
loss: 1.0488908290863037,grad_norm: 0.9999995113301047, iteration: 382730
loss: 1.2456849813461304,grad_norm: 0.999999800867637, iteration: 382731
loss: 0.9931553602218628,grad_norm: 0.9999992579704079, iteration: 382732
loss: 1.0083520412445068,grad_norm: 0.9857054019197492, iteration: 382733
loss: 1.0035104751586914,grad_norm: 0.9999994726700638, iteration: 382734
loss: 1.0846248865127563,grad_norm: 0.9471067738876798, iteration: 382735
loss: 0.9942823052406311,grad_norm: 0.7887068765585349, iteration: 382736
loss: 1.0872955322265625,grad_norm: 0.9999994438362966, iteration: 382737
loss: 0.9732704758644104,grad_norm: 0.815633514923384, iteration: 382738
loss: 1.1594651937484741,grad_norm: 0.9999995591441847, iteration: 382739
loss: 1.3471914529800415,grad_norm: 0.9999999365711946, iteration: 382740
loss: 1.1512298583984375,grad_norm: 0.9999997281769721, iteration: 382741
loss: 1.253893494606018,grad_norm: 1.0000000521957344, iteration: 382742
loss: 1.0079083442687988,grad_norm: 0.7970112179862465, iteration: 382743
loss: 1.1864863634109497,grad_norm: 0.9999999122819055, iteration: 382744
loss: 1.1541277170181274,grad_norm: 0.9999996840214078, iteration: 382745
loss: 0.9998995065689087,grad_norm: 0.7102694311302348, iteration: 382746
loss: 1.1814252138137817,grad_norm: 0.9999993447597375, iteration: 382747
loss: 1.1920098066329956,grad_norm: 0.9999996771340134, iteration: 382748
loss: 1.183430790901184,grad_norm: 0.9999994229509223, iteration: 382749
loss: 1.0232750177383423,grad_norm: 0.9999999121114559, iteration: 382750
loss: 1.2368208169937134,grad_norm: 0.999999416693406, iteration: 382751
loss: 1.3116724491119385,grad_norm: 0.9999994069362611, iteration: 382752
loss: 1.0615967512130737,grad_norm: 0.9999997738543113, iteration: 382753
loss: 1.0544533729553223,grad_norm: 0.7907879149629128, iteration: 382754
loss: 1.064149260520935,grad_norm: 0.9999999659820052, iteration: 382755
loss: 1.1560707092285156,grad_norm: 0.9999995713666459, iteration: 382756
loss: 1.368293285369873,grad_norm: 1.0000000259039017, iteration: 382757
loss: 1.0564988851547241,grad_norm: 0.7646001934700778, iteration: 382758
loss: 1.46985924243927,grad_norm: 0.9999995630040727, iteration: 382759
loss: 1.1702659130096436,grad_norm: 0.9999995177200397, iteration: 382760
loss: 1.0931358337402344,grad_norm: 0.9999993208481421, iteration: 382761
loss: 1.1754354238510132,grad_norm: 0.9999991043687352, iteration: 382762
loss: 1.0241138935089111,grad_norm: 0.7829637195908247, iteration: 382763
loss: 1.0784798860549927,grad_norm: 0.7936339267033795, iteration: 382764
loss: 1.1244370937347412,grad_norm: 0.9999998628682104, iteration: 382765
loss: 1.0451593399047852,grad_norm: 0.9999666101034562, iteration: 382766
loss: 1.0666500329971313,grad_norm: 0.8133718527855573, iteration: 382767
loss: 1.1361380815505981,grad_norm: 0.8870991481938144, iteration: 382768
loss: 1.0912818908691406,grad_norm: 0.9999990296176529, iteration: 382769
loss: 1.0476967096328735,grad_norm: 0.9999998204095021, iteration: 382770
loss: 1.170576810836792,grad_norm: 0.9999993914460343, iteration: 382771
loss: 1.089989423751831,grad_norm: 0.9999990164224098, iteration: 382772
loss: 1.0256015062332153,grad_norm: 0.8403616356378288, iteration: 382773
loss: 1.0325443744659424,grad_norm: 0.7650964240847479, iteration: 382774
loss: 1.0188082456588745,grad_norm: 0.8509503437697301, iteration: 382775
loss: 1.0502110719680786,grad_norm: 0.8452043584219051, iteration: 382776
loss: 1.1885690689086914,grad_norm: 0.9999997339111247, iteration: 382777
loss: 1.0683510303497314,grad_norm: 0.9999990699463671, iteration: 382778
loss: 1.0490970611572266,grad_norm: 0.9999992077962324, iteration: 382779
loss: 1.102190375328064,grad_norm: 0.9999999043168227, iteration: 382780
loss: 1.021064281463623,grad_norm: 0.9999991613891738, iteration: 382781
loss: 1.0602799654006958,grad_norm: 0.9999999185602207, iteration: 382782
loss: 1.0196876525878906,grad_norm: 0.9999991077001267, iteration: 382783
loss: 1.1392608880996704,grad_norm: 0.9999996587841984, iteration: 382784
loss: 1.1283570528030396,grad_norm: 0.9999990813675281, iteration: 382785
loss: 0.9734993577003479,grad_norm: 0.9514248525483012, iteration: 382786
loss: 1.122752070426941,grad_norm: 0.9999992206155589, iteration: 382787
loss: 1.1459587812423706,grad_norm: 0.9568827899869279, iteration: 382788
loss: 0.9933884739875793,grad_norm: 0.8264748979893223, iteration: 382789
loss: 1.0281577110290527,grad_norm: 0.8348948254758243, iteration: 382790
loss: 1.0309199094772339,grad_norm: 0.8037333881201111, iteration: 382791
loss: 0.9851017594337463,grad_norm: 0.902716488039809, iteration: 382792
loss: 1.0312641859054565,grad_norm: 0.9293003618122349, iteration: 382793
loss: 1.0209660530090332,grad_norm: 0.9999997525860083, iteration: 382794
loss: 1.1579571962356567,grad_norm: 0.9999997267783617, iteration: 382795
loss: 1.0072053670883179,grad_norm: 0.9999991559072079, iteration: 382796
loss: 1.0288246870040894,grad_norm: 0.8160139544208574, iteration: 382797
loss: 1.040856122970581,grad_norm: 0.9738718957980583, iteration: 382798
loss: 1.0536686182022095,grad_norm: 0.9999997481000747, iteration: 382799
loss: 1.1136316061019897,grad_norm: 0.9999996781921379, iteration: 382800
loss: 1.0139163732528687,grad_norm: 0.8506891730177263, iteration: 382801
loss: 1.1454524993896484,grad_norm: 0.9999999597091221, iteration: 382802
loss: 1.1080394983291626,grad_norm: 0.9999991372959154, iteration: 382803
loss: 1.0090290307998657,grad_norm: 0.9007469662470423, iteration: 382804
loss: 1.0596452951431274,grad_norm: 0.9999990486984326, iteration: 382805
loss: 0.9741045236587524,grad_norm: 0.9634835198537263, iteration: 382806
loss: 1.0172799825668335,grad_norm: 0.7436251089951659, iteration: 382807
loss: 1.1041955947875977,grad_norm: 0.9999998216786876, iteration: 382808
loss: 1.0868468284606934,grad_norm: 0.9999995147785006, iteration: 382809
loss: 1.004096269607544,grad_norm: 0.7726852183390005, iteration: 382810
loss: 1.041443943977356,grad_norm: 0.9999991288268123, iteration: 382811
loss: 1.113534927368164,grad_norm: 0.9999997481645129, iteration: 382812
loss: 1.0169342756271362,grad_norm: 0.8021962299638778, iteration: 382813
loss: 1.0429378747940063,grad_norm: 0.829505543129078, iteration: 382814
loss: 1.037716031074524,grad_norm: 0.7604187547547798, iteration: 382815
loss: 1.0067623853683472,grad_norm: 0.9999990870241507, iteration: 382816
loss: 0.9910529851913452,grad_norm: 0.831307451706546, iteration: 382817
loss: 1.0213695764541626,grad_norm: 0.750051986874829, iteration: 382818
loss: 1.019682765007019,grad_norm: 0.9999994745561038, iteration: 382819
loss: 0.9849595427513123,grad_norm: 0.9082781814796781, iteration: 382820
loss: 1.0643781423568726,grad_norm: 1.0000000236504032, iteration: 382821
loss: 1.0042585134506226,grad_norm: 0.8692368930179908, iteration: 382822
loss: 1.001857042312622,grad_norm: 0.9999995876212637, iteration: 382823
loss: 1.0510380268096924,grad_norm: 0.7511148229929057, iteration: 382824
loss: 1.0504200458526611,grad_norm: 0.9999998230302792, iteration: 382825
loss: 0.9977206587791443,grad_norm: 0.7716436549622981, iteration: 382826
loss: 1.0216689109802246,grad_norm: 0.8713278048568739, iteration: 382827
loss: 1.0744386911392212,grad_norm: 0.999999114370548, iteration: 382828
loss: 1.0126971006393433,grad_norm: 0.9999997416852446, iteration: 382829
loss: 1.0597811937332153,grad_norm: 0.9999993524487193, iteration: 382830
loss: 0.9634993672370911,grad_norm: 0.9074348079903489, iteration: 382831
loss: 0.9999153017997742,grad_norm: 0.7199735799953613, iteration: 382832
loss: 0.9899754524230957,grad_norm: 0.9142841396368181, iteration: 382833
loss: 1.0070823431015015,grad_norm: 0.7648399570876409, iteration: 382834
loss: 1.0446107387542725,grad_norm: 0.9999991019791662, iteration: 382835
loss: 1.0592994689941406,grad_norm: 0.9118083025573559, iteration: 382836
loss: 1.0518262386322021,grad_norm: 0.8785031997670354, iteration: 382837
loss: 1.0097630023956299,grad_norm: 0.9999990570010712, iteration: 382838
loss: 0.9923299551010132,grad_norm: 0.7357300453280152, iteration: 382839
loss: 1.0847257375717163,grad_norm: 0.9431328884614629, iteration: 382840
loss: 1.1195690631866455,grad_norm: 0.9999990983998732, iteration: 382841
loss: 1.0224803686141968,grad_norm: 0.999999308285297, iteration: 382842
loss: 1.0222336053848267,grad_norm: 1.0000000018706414, iteration: 382843
loss: 0.9994887709617615,grad_norm: 0.7635628576299052, iteration: 382844
loss: 1.0029449462890625,grad_norm: 0.7671083167569506, iteration: 382845
loss: 0.9722701907157898,grad_norm: 0.8623746646302407, iteration: 382846
loss: 1.0251944065093994,grad_norm: 0.9999996887359359, iteration: 382847
loss: 1.0084574222564697,grad_norm: 0.687877500965752, iteration: 382848
loss: 1.0079680681228638,grad_norm: 0.6106985485139524, iteration: 382849
loss: 0.9853771328926086,grad_norm: 0.9245362469134868, iteration: 382850
loss: 0.976043701171875,grad_norm: 0.7202854801118155, iteration: 382851
loss: 0.950753927230835,grad_norm: 0.7822299290619574, iteration: 382852
loss: 1.0237257480621338,grad_norm: 0.811148659259718, iteration: 382853
loss: 0.9355039596557617,grad_norm: 0.7398768763457633, iteration: 382854
loss: 1.0116792917251587,grad_norm: 0.9243481284868746, iteration: 382855
loss: 1.0045561790466309,grad_norm: 0.9999996589936557, iteration: 382856
loss: 1.0699530839920044,grad_norm: 0.7059235596597712, iteration: 382857
loss: 1.0654945373535156,grad_norm: 0.9999997190503216, iteration: 382858
loss: 0.9990948438644409,grad_norm: 0.9999996756606883, iteration: 382859
loss: 0.9787994623184204,grad_norm: 0.7260176096290583, iteration: 382860
loss: 0.9725497364997864,grad_norm: 0.99999922836154, iteration: 382861
loss: 0.9931545257568359,grad_norm: 0.8719339187237166, iteration: 382862
loss: 1.011150598526001,grad_norm: 0.8630945250966794, iteration: 382863
loss: 1.0217312574386597,grad_norm: 0.9999998574225653, iteration: 382864
loss: 0.9876224398612976,grad_norm: 0.636312392613554, iteration: 382865
loss: 1.0254241228103638,grad_norm: 0.9999991241805306, iteration: 382866
loss: 1.0573407411575317,grad_norm: 0.7574008535362976, iteration: 382867
loss: 0.9778594374656677,grad_norm: 0.7925284777244445, iteration: 382868
loss: 0.9974802732467651,grad_norm: 0.9322749835952525, iteration: 382869
loss: 1.0282257795333862,grad_norm: 0.8707813284795393, iteration: 382870
loss: 1.0016977787017822,grad_norm: 0.7924231221773849, iteration: 382871
loss: 0.9866083860397339,grad_norm: 0.9999991633558415, iteration: 382872
loss: 1.0145072937011719,grad_norm: 0.8177722210544659, iteration: 382873
loss: 1.0210809707641602,grad_norm: 0.7458536322824698, iteration: 382874
loss: 1.0067869424819946,grad_norm: 0.9609865098516459, iteration: 382875
loss: 1.016748070716858,grad_norm: 0.7160776040993843, iteration: 382876
loss: 1.0391288995742798,grad_norm: 0.9999999862175954, iteration: 382877
loss: 1.0232455730438232,grad_norm: 0.7444340789418825, iteration: 382878
loss: 0.9730318188667297,grad_norm: 0.763335940987383, iteration: 382879
loss: 1.0207068920135498,grad_norm: 0.6917208203379052, iteration: 382880
loss: 1.0144683122634888,grad_norm: 0.7886418324555476, iteration: 382881
loss: 0.9571472406387329,grad_norm: 0.7744811461729711, iteration: 382882
loss: 1.0211206674575806,grad_norm: 0.7897385917990222, iteration: 382883
loss: 0.9969636797904968,grad_norm: 0.7751055319891564, iteration: 382884
loss: 1.0894595384597778,grad_norm: 0.9999998005228802, iteration: 382885
loss: 0.9783554673194885,grad_norm: 0.7148544272516627, iteration: 382886
loss: 1.0496312379837036,grad_norm: 0.9999992920378652, iteration: 382887
loss: 1.0397299528121948,grad_norm: 0.9047595907564131, iteration: 382888
loss: 1.2280139923095703,grad_norm: 0.9999992012097062, iteration: 382889
loss: 1.0600467920303345,grad_norm: 0.9999993017430974, iteration: 382890
loss: 0.971922755241394,grad_norm: 0.8842542172306582, iteration: 382891
loss: 1.097124457359314,grad_norm: 0.9999997377004629, iteration: 382892
loss: 1.0675461292266846,grad_norm: 0.9999991317894854, iteration: 382893
loss: 1.0214407444000244,grad_norm: 0.9999995261127352, iteration: 382894
loss: 0.9897987842559814,grad_norm: 0.79751884018042, iteration: 382895
loss: 1.043514370918274,grad_norm: 0.9999989700073124, iteration: 382896
loss: 0.978934645652771,grad_norm: 0.7919798859047612, iteration: 382897
loss: 1.0123530626296997,grad_norm: 0.905283621150015, iteration: 382898
loss: 1.0702570676803589,grad_norm: 0.999999405084038, iteration: 382899
loss: 0.9830325841903687,grad_norm: 0.8221114050201183, iteration: 382900
loss: 0.9953098297119141,grad_norm: 0.6790986458249698, iteration: 382901
loss: 1.0126616954803467,grad_norm: 0.7514365169432938, iteration: 382902
loss: 0.9903374910354614,grad_norm: 0.827187638416156, iteration: 382903
loss: 0.990983247756958,grad_norm: 0.7264809176161021, iteration: 382904
loss: 1.0382367372512817,grad_norm: 0.990397735954775, iteration: 382905
loss: 1.0135219097137451,grad_norm: 0.6638523443341365, iteration: 382906
loss: 1.0890111923217773,grad_norm: 0.9999996278634566, iteration: 382907
loss: 1.0252312421798706,grad_norm: 0.9486815593813519, iteration: 382908
loss: 1.0032743215560913,grad_norm: 0.7856182340068331, iteration: 382909
loss: 1.0323535203933716,grad_norm: 0.77920245235859, iteration: 382910
loss: 1.0415059328079224,grad_norm: 0.831072845566179, iteration: 382911
loss: 1.0361946821212769,grad_norm: 0.999999846511388, iteration: 382912
loss: 1.038878083229065,grad_norm: 1.000000097787388, iteration: 382913
loss: 1.2006492614746094,grad_norm: 0.9999998564223632, iteration: 382914
loss: 0.9984956979751587,grad_norm: 0.9999998606519641, iteration: 382915
loss: 0.991597592830658,grad_norm: 0.7432872884930589, iteration: 382916
loss: 1.0168614387512207,grad_norm: 0.797769890709702, iteration: 382917
loss: 0.9922091364860535,grad_norm: 0.7289136892017414, iteration: 382918
loss: 1.0371007919311523,grad_norm: 0.9999991154695512, iteration: 382919
loss: 1.0645626783370972,grad_norm: 0.8806867247363117, iteration: 382920
loss: 1.0690417289733887,grad_norm: 0.9999991813856361, iteration: 382921
loss: 1.0516936779022217,grad_norm: 0.9999994026923971, iteration: 382922
loss: 1.0350010395050049,grad_norm: 0.9545741326721826, iteration: 382923
loss: 1.0314922332763672,grad_norm: 0.9210549944124697, iteration: 382924
loss: 1.126904010772705,grad_norm: 0.9999997484027864, iteration: 382925
loss: 0.980268120765686,grad_norm: 0.6907934264634191, iteration: 382926
loss: 1.0798320770263672,grad_norm: 0.99999982606866, iteration: 382927
loss: 1.0431383848190308,grad_norm: 0.9999997168235296, iteration: 382928
loss: 1.0288004875183105,grad_norm: 0.9999996294842137, iteration: 382929
loss: 1.036711573600769,grad_norm: 0.9999991206407238, iteration: 382930
loss: 1.0979541540145874,grad_norm: 0.999999833518345, iteration: 382931
loss: 1.0161925554275513,grad_norm: 0.7747780893081222, iteration: 382932
loss: 0.966808021068573,grad_norm: 0.8347462143786021, iteration: 382933
loss: 1.0114784240722656,grad_norm: 0.9999993575098461, iteration: 382934
loss: 1.0168869495391846,grad_norm: 0.9999991949548687, iteration: 382935
loss: 1.0103514194488525,grad_norm: 0.7220439081118002, iteration: 382936
loss: 1.0102382898330688,grad_norm: 0.6754865400999837, iteration: 382937
loss: 1.27525794506073,grad_norm: 0.9999997766603751, iteration: 382938
loss: 1.0245851278305054,grad_norm: 0.7643198439656135, iteration: 382939
loss: 0.9904325604438782,grad_norm: 0.999999098418705, iteration: 382940
loss: 0.9977023601531982,grad_norm: 0.8160694541742365, iteration: 382941
loss: 1.012739896774292,grad_norm: 0.9213919266767403, iteration: 382942
loss: 1.0904196500778198,grad_norm: 0.9999995720061469, iteration: 382943
loss: 1.0197350978851318,grad_norm: 0.9999997093793616, iteration: 382944
loss: 1.0389997959136963,grad_norm: 0.9999991766755641, iteration: 382945
loss: 1.0245020389556885,grad_norm: 0.9021795034803366, iteration: 382946
loss: 1.0722565650939941,grad_norm: 0.9192624332726302, iteration: 382947
loss: 1.3552964925765991,grad_norm: 0.9999997442123323, iteration: 382948
loss: 1.0005463361740112,grad_norm: 0.8925354149181198, iteration: 382949
loss: 1.1887015104293823,grad_norm: 0.9999998131754441, iteration: 382950
loss: 1.1301491260528564,grad_norm: 0.8724334489459363, iteration: 382951
loss: 1.3264271020889282,grad_norm: 0.9999995622485273, iteration: 382952
loss: 1.1051781177520752,grad_norm: 0.9999998684717124, iteration: 382953
loss: 1.0467149019241333,grad_norm: 0.9065661783522142, iteration: 382954
loss: 0.9840176701545715,grad_norm: 0.8934275697949015, iteration: 382955
loss: 1.0610778331756592,grad_norm: 0.8553352832394724, iteration: 382956
loss: 1.013785719871521,grad_norm: 0.7343449554534471, iteration: 382957
loss: 0.995820164680481,grad_norm: 0.7368883689830739, iteration: 382958
loss: 0.9984318614006042,grad_norm: 0.9256856864474802, iteration: 382959
loss: 0.9788677096366882,grad_norm: 0.8524537490875699, iteration: 382960
loss: 0.9753268361091614,grad_norm: 0.7968934245144947, iteration: 382961
loss: 1.0948697328567505,grad_norm: 0.9999998921185321, iteration: 382962
loss: 1.0010550022125244,grad_norm: 0.655100016241933, iteration: 382963
loss: 1.0846887826919556,grad_norm: 0.9999999032138945, iteration: 382964
loss: 1.0274689197540283,grad_norm: 0.7936457752574516, iteration: 382965
loss: 1.0117107629776,grad_norm: 0.6319921572718766, iteration: 382966
loss: 0.9736732244491577,grad_norm: 0.7626106520530996, iteration: 382967
loss: 1.0114141702651978,grad_norm: 0.8380207532336568, iteration: 382968
loss: 1.0177583694458008,grad_norm: 0.9999990923381522, iteration: 382969
loss: 0.9962794780731201,grad_norm: 0.7143011831097784, iteration: 382970
loss: 1.0137823820114136,grad_norm: 0.7161666773784475, iteration: 382971
loss: 1.0151398181915283,grad_norm: 0.6919988394112746, iteration: 382972
loss: 0.9741582274436951,grad_norm: 0.9999991456215211, iteration: 382973
loss: 1.038895845413208,grad_norm: 0.9794773799399539, iteration: 382974
loss: 1.0334513187408447,grad_norm: 0.9909775597317863, iteration: 382975
loss: 1.0255568027496338,grad_norm: 0.9999992193080571, iteration: 382976
loss: 1.0028122663497925,grad_norm: 0.9210796046604696, iteration: 382977
loss: 0.9956890344619751,grad_norm: 0.7534247395100954, iteration: 382978
loss: 1.0914254188537598,grad_norm: 0.9999999618013277, iteration: 382979
loss: 0.9765537977218628,grad_norm: 0.8542992715141012, iteration: 382980
loss: 1.0462300777435303,grad_norm: 0.8957157362383186, iteration: 382981
loss: 0.999446451663971,grad_norm: 0.7779498733850969, iteration: 382982
loss: 1.020521879196167,grad_norm: 0.798238127310837, iteration: 382983
loss: 1.0287033319473267,grad_norm: 0.9976787635267098, iteration: 382984
loss: 1.0301238298416138,grad_norm: 0.9999990771943303, iteration: 382985
loss: 1.0026757717132568,grad_norm: 0.9999999004298412, iteration: 382986
loss: 1.0082545280456543,grad_norm: 0.7055999947134978, iteration: 382987
loss: 0.9809696674346924,grad_norm: 0.8262887446365799, iteration: 382988
loss: 1.004677414894104,grad_norm: 0.7174827252702461, iteration: 382989
loss: 1.0171904563903809,grad_norm: 0.77946538128157, iteration: 382990
loss: 1.0174005031585693,grad_norm: 0.6495424447643077, iteration: 382991
loss: 1.017106056213379,grad_norm: 0.7678270032972033, iteration: 382992
loss: 0.990115225315094,grad_norm: 0.6447072381707686, iteration: 382993
loss: 1.0495703220367432,grad_norm: 0.9999998403048708, iteration: 382994
loss: 1.1628674268722534,grad_norm: 0.9999996108754536, iteration: 382995
loss: 1.090265154838562,grad_norm: 0.9999999819633506, iteration: 382996
loss: 0.9726240634918213,grad_norm: 0.8565522958181532, iteration: 382997
loss: 0.9884071946144104,grad_norm: 0.7430008095818271, iteration: 382998
loss: 1.0141816139221191,grad_norm: 0.8753210167026596, iteration: 382999
loss: 1.0594300031661987,grad_norm: 0.7871657456265483, iteration: 383000
loss: 1.063544511795044,grad_norm: 0.8952366293339621, iteration: 383001
loss: 1.2244888544082642,grad_norm: 0.9999998363327766, iteration: 383002
loss: 1.0653935670852661,grad_norm: 0.9947830041407232, iteration: 383003
loss: 1.0370482206344604,grad_norm: 0.9718876100260881, iteration: 383004
loss: 0.9953643083572388,grad_norm: 0.9578220079606755, iteration: 383005
loss: 1.0179859399795532,grad_norm: 0.8403087332931735, iteration: 383006
loss: 1.0504475831985474,grad_norm: 0.99999961965098, iteration: 383007
loss: 1.013771414756775,grad_norm: 0.9999994388041119, iteration: 383008
loss: 1.1861436367034912,grad_norm: 0.9999992118754583, iteration: 383009
loss: 0.9938700199127197,grad_norm: 0.9999998500992553, iteration: 383010
loss: 0.9795263409614563,grad_norm: 0.9999997757254797, iteration: 383011
loss: 0.9677029252052307,grad_norm: 0.8754477345667289, iteration: 383012
loss: 0.9810967445373535,grad_norm: 0.9999992276874979, iteration: 383013
loss: 0.9760386943817139,grad_norm: 0.8264634731236442, iteration: 383014
loss: 1.011154055595398,grad_norm: 0.7784167957124711, iteration: 383015
loss: 1.0654743909835815,grad_norm: 0.9568929774933117, iteration: 383016
loss: 0.9999125599861145,grad_norm: 0.9616957997849862, iteration: 383017
loss: 1.012969970703125,grad_norm: 0.7492552760941444, iteration: 383018
loss: 0.9760319590568542,grad_norm: 0.7506954783789267, iteration: 383019
loss: 1.0070656538009644,grad_norm: 0.8931190008915249, iteration: 383020
loss: 1.0564141273498535,grad_norm: 0.9619432397823837, iteration: 383021
loss: 1.00811767578125,grad_norm: 0.8649773968593614, iteration: 383022
loss: 1.003356695175171,grad_norm: 0.7864771634402478, iteration: 383023
loss: 1.0288074016571045,grad_norm: 0.9999992804396047, iteration: 383024
loss: 1.0853002071380615,grad_norm: 0.9999997928949161, iteration: 383025
loss: 1.013309359550476,grad_norm: 0.9999994818578172, iteration: 383026
loss: 0.9915820360183716,grad_norm: 0.8086245753766266, iteration: 383027
loss: 0.9813792109489441,grad_norm: 0.7026128699257158, iteration: 383028
loss: 1.058700442314148,grad_norm: 0.8300930004379359, iteration: 383029
loss: 1.007324457168579,grad_norm: 0.7717154612282586, iteration: 383030
loss: 0.9916893243789673,grad_norm: 0.9999993070968756, iteration: 383031
loss: 0.992304801940918,grad_norm: 0.7407012183827494, iteration: 383032
loss: 1.0003163814544678,grad_norm: 0.802002770470716, iteration: 383033
loss: 1.0104281902313232,grad_norm: 0.8887415394609997, iteration: 383034
loss: 0.9778634309768677,grad_norm: 0.7244337143149252, iteration: 383035
loss: 0.9686206579208374,grad_norm: 0.6974108122850321, iteration: 383036
loss: 1.0033968687057495,grad_norm: 0.8728210350632636, iteration: 383037
loss: 0.9916256070137024,grad_norm: 0.9121075341003791, iteration: 383038
loss: 1.0475256443023682,grad_norm: 0.9999995197595444, iteration: 383039
loss: 0.9487422704696655,grad_norm: 0.8238237484307465, iteration: 383040
loss: 0.9991156458854675,grad_norm: 0.7343953145765959, iteration: 383041
loss: 1.0153638124465942,grad_norm: 0.7689735998561515, iteration: 383042
loss: 1.0076313018798828,grad_norm: 0.9999990193511288, iteration: 383043
loss: 0.9735420942306519,grad_norm: 0.8750640886978196, iteration: 383044
loss: 1.0197519063949585,grad_norm: 0.791368902233025, iteration: 383045
loss: 0.9630573987960815,grad_norm: 0.9999991934218063, iteration: 383046
loss: 1.0150582790374756,grad_norm: 0.7442751899446362, iteration: 383047
loss: 1.073628544807434,grad_norm: 0.9999998523048441, iteration: 383048
loss: 1.0239942073822021,grad_norm: 0.8848918747513809, iteration: 383049
loss: 0.9782931208610535,grad_norm: 0.9587274669298724, iteration: 383050
loss: 0.9662945866584778,grad_norm: 0.8262260958473469, iteration: 383051
loss: 1.1211236715316772,grad_norm: 0.8196944074141607, iteration: 383052
loss: 1.0482887029647827,grad_norm: 0.7562133321693856, iteration: 383053
loss: 1.0142930746078491,grad_norm: 0.9999992582355329, iteration: 383054
loss: 0.9996306896209717,grad_norm: 0.6492580866159917, iteration: 383055
loss: 1.0857044458389282,grad_norm: 0.9838346366820836, iteration: 383056
loss: 1.0174113512039185,grad_norm: 0.9999995411870243, iteration: 383057
loss: 0.9948150515556335,grad_norm: 0.9999996782091055, iteration: 383058
loss: 0.9930136203765869,grad_norm: 0.7599127283677035, iteration: 383059
loss: 1.0591516494750977,grad_norm: 0.9999997729993289, iteration: 383060
loss: 1.0255533456802368,grad_norm: 0.8235218099847846, iteration: 383061
loss: 1.0568947792053223,grad_norm: 0.999999899404827, iteration: 383062
loss: 1.1126213073730469,grad_norm: 0.9999994061740133, iteration: 383063
loss: 1.030761957168579,grad_norm: 0.9329407162354604, iteration: 383064
loss: 1.0055952072143555,grad_norm: 0.7841909305456984, iteration: 383065
loss: 1.0513092279434204,grad_norm: 0.999999469866093, iteration: 383066
loss: 1.0031543970108032,grad_norm: 0.7885866333903384, iteration: 383067
loss: 1.035421371459961,grad_norm: 0.9999990713350688, iteration: 383068
loss: 1.0024386644363403,grad_norm: 0.9999998716148173, iteration: 383069
loss: 1.26425302028656,grad_norm: 0.9999997066845371, iteration: 383070
loss: 1.032905101776123,grad_norm: 0.8090052476971864, iteration: 383071
loss: 0.9516674280166626,grad_norm: 0.7874705334186319, iteration: 383072
loss: 1.0891035795211792,grad_norm: 0.9964559078312586, iteration: 383073
loss: 1.0186429023742676,grad_norm: 0.8305841535054909, iteration: 383074
loss: 1.033287525177002,grad_norm: 0.7270727482566411, iteration: 383075
loss: 1.0316532850265503,grad_norm: 0.7967667644023922, iteration: 383076
loss: 1.0705541372299194,grad_norm: 0.9999997345131743, iteration: 383077
loss: 1.0043694972991943,grad_norm: 0.999999158884997, iteration: 383078
loss: 1.0411912202835083,grad_norm: 0.9999995534293854, iteration: 383079
loss: 1.0122199058532715,grad_norm: 0.7952256118473562, iteration: 383080
loss: 0.9935451745986938,grad_norm: 0.8714619620610176, iteration: 383081
loss: 1.0102736949920654,grad_norm: 0.9736702734379818, iteration: 383082
loss: 0.9946673512458801,grad_norm: 0.8617501046339848, iteration: 383083
loss: 0.9853439331054688,grad_norm: 0.9072228349954858, iteration: 383084
loss: 0.9880152344703674,grad_norm: 0.8269670622122036, iteration: 383085
loss: 0.9918118715286255,grad_norm: 0.6778385598823264, iteration: 383086
loss: 0.9878958463668823,grad_norm: 0.8095977260017932, iteration: 383087
loss: 0.9806015491485596,grad_norm: 0.9999993293064177, iteration: 383088
loss: 1.0211944580078125,grad_norm: 0.9999990876075818, iteration: 383089
loss: 1.001891851425171,grad_norm: 0.8456171603290704, iteration: 383090
loss: 1.0627952814102173,grad_norm: 0.8321866103558155, iteration: 383091
loss: 0.9870708584785461,grad_norm: 0.992987017201957, iteration: 383092
loss: 1.0374524593353271,grad_norm: 0.7561102750777543, iteration: 383093
loss: 1.08004891872406,grad_norm: 0.8194600269971452, iteration: 383094
loss: 1.0037413835525513,grad_norm: 0.7598837818643257, iteration: 383095
loss: 0.9983408451080322,grad_norm: 0.6973614825813457, iteration: 383096
loss: 1.1443417072296143,grad_norm: 0.9999998001868236, iteration: 383097
loss: 1.0912461280822754,grad_norm: 0.9542493559679726, iteration: 383098
loss: 0.9666846394538879,grad_norm: 0.8386141089424434, iteration: 383099
loss: 1.007521629333496,grad_norm: 0.9320377639338612, iteration: 383100
loss: 1.0248738527297974,grad_norm: 0.9287444570136411, iteration: 383101
loss: 0.9896780848503113,grad_norm: 0.9281013792186503, iteration: 383102
loss: 1.059324026107788,grad_norm: 0.9999999401812993, iteration: 383103
loss: 1.0326800346374512,grad_norm: 0.7744240694140493, iteration: 383104
loss: 0.9614498019218445,grad_norm: 0.8288618502476554, iteration: 383105
loss: 1.030100703239441,grad_norm: 0.8955540218168083, iteration: 383106
loss: 1.0509554147720337,grad_norm: 0.9999997203749773, iteration: 383107
loss: 0.9854015707969666,grad_norm: 0.8152753720482745, iteration: 383108
loss: 1.0394448041915894,grad_norm: 0.9999995740644102, iteration: 383109
loss: 1.277588129043579,grad_norm: 0.9999997124891714, iteration: 383110
loss: 1.058822512626648,grad_norm: 0.9999991681673508, iteration: 383111
loss: 1.0483194589614868,grad_norm: 0.9570009125498941, iteration: 383112
loss: 0.9818733930587769,grad_norm: 0.804809076724364, iteration: 383113
loss: 1.0498136281967163,grad_norm: 0.9999991046447128, iteration: 383114
loss: 1.0984617471694946,grad_norm: 0.7486100078785667, iteration: 383115
loss: 1.0660593509674072,grad_norm: 0.99999966661178, iteration: 383116
loss: 1.0193023681640625,grad_norm: 0.7832465030521001, iteration: 383117
loss: 1.0265135765075684,grad_norm: 0.8577911658293407, iteration: 383118
loss: 1.0024840831756592,grad_norm: 0.7288894685538873, iteration: 383119
loss: 1.0147467851638794,grad_norm: 0.8132008065059043, iteration: 383120
loss: 0.9905246496200562,grad_norm: 0.8193805211386888, iteration: 383121
loss: 1.0132043361663818,grad_norm: 0.9160837876022617, iteration: 383122
loss: 1.0943678617477417,grad_norm: 0.9999992224165323, iteration: 383123
loss: 1.0231993198394775,grad_norm: 0.9999991644281664, iteration: 383124
loss: 1.0323584079742432,grad_norm: 0.7211923882938667, iteration: 383125
loss: 1.018968105316162,grad_norm: 0.9999996121948529, iteration: 383126
loss: 1.0250498056411743,grad_norm: 0.731653195553294, iteration: 383127
loss: 1.104958176612854,grad_norm: 0.9918702478261644, iteration: 383128
loss: 1.0763744115829468,grad_norm: 0.99999926780797, iteration: 383129
loss: 0.9816386699676514,grad_norm: 0.9556013427098466, iteration: 383130
loss: 0.9844950437545776,grad_norm: 0.7983379158414412, iteration: 383131
loss: 1.014259696006775,grad_norm: 0.9265002046735085, iteration: 383132
loss: 1.0397828817367554,grad_norm: 0.7014411416200004, iteration: 383133
loss: 1.01042902469635,grad_norm: 0.8209442275973982, iteration: 383134
loss: 1.254512071609497,grad_norm: 0.9999997331367031, iteration: 383135
loss: 1.0285178422927856,grad_norm: 0.7632365645416422, iteration: 383136
loss: 1.0064594745635986,grad_norm: 0.8510936818046855, iteration: 383137
loss: 1.0085067749023438,grad_norm: 0.7895838707459438, iteration: 383138
loss: 1.0091403722763062,grad_norm: 0.6842739421847847, iteration: 383139
loss: 1.2052468061447144,grad_norm: 0.9999998109528101, iteration: 383140
loss: 1.0059599876403809,grad_norm: 0.8321911103789977, iteration: 383141
loss: 1.0103622674942017,grad_norm: 0.8304698695301421, iteration: 383142
loss: 0.9974184036254883,grad_norm: 0.8335033217054858, iteration: 383143
loss: 1.0097466707229614,grad_norm: 0.6786617354317277, iteration: 383144
loss: 1.0238579511642456,grad_norm: 0.9999991621458709, iteration: 383145
loss: 1.0710009336471558,grad_norm: 0.9999995995984934, iteration: 383146
loss: 0.9857664108276367,grad_norm: 0.7437000387677503, iteration: 383147
loss: 1.111047387123108,grad_norm: 0.858913751803334, iteration: 383148
loss: 0.992409884929657,grad_norm: 0.8014863049989902, iteration: 383149
loss: 0.9638212323188782,grad_norm: 0.8655045559875152, iteration: 383150
loss: 1.025436282157898,grad_norm: 0.7155454763397728, iteration: 383151
loss: 0.9936507344245911,grad_norm: 0.9642800104815383, iteration: 383152
loss: 1.0237419605255127,grad_norm: 0.999999280094038, iteration: 383153
loss: 1.0233933925628662,grad_norm: 0.8171713699326164, iteration: 383154
loss: 1.1589219570159912,grad_norm: 0.9999999422107486, iteration: 383155
loss: 0.9945157170295715,grad_norm: 0.8663952947660505, iteration: 383156
loss: 1.0216989517211914,grad_norm: 0.9999998597847872, iteration: 383157
loss: 1.004892349243164,grad_norm: 0.999999461721386, iteration: 383158
loss: 1.0559971332550049,grad_norm: 0.7719881323204957, iteration: 383159
loss: 0.9800026416778564,grad_norm: 0.8530483283628859, iteration: 383160
loss: 1.0073884725570679,grad_norm: 0.8416236279978985, iteration: 383161
loss: 1.0252254009246826,grad_norm: 0.9999996634816638, iteration: 383162
loss: 1.0175939798355103,grad_norm: 0.6287989985751, iteration: 383163
loss: 1.0168027877807617,grad_norm: 0.6945681092392098, iteration: 383164
loss: 1.003624439239502,grad_norm: 0.8659066677586575, iteration: 383165
loss: 1.1387932300567627,grad_norm: 0.999999105351254, iteration: 383166
loss: 1.14464271068573,grad_norm: 0.999999664367483, iteration: 383167
loss: 1.0528528690338135,grad_norm: 0.9999990303039168, iteration: 383168
loss: 1.0207295417785645,grad_norm: 0.9999999550689064, iteration: 383169
loss: 0.9939670562744141,grad_norm: 0.7284730128480538, iteration: 383170
loss: 1.1148825883865356,grad_norm: 0.9999990833584008, iteration: 383171
loss: 1.0141119956970215,grad_norm: 0.8547204166862292, iteration: 383172
loss: 1.0091782808303833,grad_norm: 0.9362999182944379, iteration: 383173
loss: 0.9656153321266174,grad_norm: 0.9999995498958896, iteration: 383174
loss: 0.9944983720779419,grad_norm: 0.8424588171557763, iteration: 383175
loss: 0.9941398501396179,grad_norm: 0.682522349524344, iteration: 383176
loss: 1.0472692251205444,grad_norm: 0.7583279915200052, iteration: 383177
loss: 1.0233180522918701,grad_norm: 0.9769260939266123, iteration: 383178
loss: 1.0053462982177734,grad_norm: 0.8182160563957841, iteration: 383179
loss: 1.0580828189849854,grad_norm: 0.9999990826431802, iteration: 383180
loss: 1.2088823318481445,grad_norm: 0.9999999704796191, iteration: 383181
loss: 1.0050625801086426,grad_norm: 0.8924586078534069, iteration: 383182
loss: 1.0679612159729004,grad_norm: 0.9999991247695732, iteration: 383183
loss: 1.0690604448318481,grad_norm: 0.9999993598367263, iteration: 383184
loss: 1.0208511352539062,grad_norm: 0.7833973771281756, iteration: 383185
loss: 1.0030146837234497,grad_norm: 0.7253857079618883, iteration: 383186
loss: 1.0370830297470093,grad_norm: 0.9540642118868229, iteration: 383187
loss: 1.0572624206542969,grad_norm: 0.91154864525072, iteration: 383188
loss: 0.996641218662262,grad_norm: 0.9192458032011442, iteration: 383189
loss: 1.0752625465393066,grad_norm: 0.9999989838725455, iteration: 383190
loss: 1.0114381313323975,grad_norm: 0.9999990503616605, iteration: 383191
loss: 1.0158575773239136,grad_norm: 0.999999586579479, iteration: 383192
loss: 1.050204873085022,grad_norm: 0.9308217990459775, iteration: 383193
loss: 1.08664870262146,grad_norm: 0.9999992700477155, iteration: 383194
loss: 1.0123594999313354,grad_norm: 0.7683020384313637, iteration: 383195
loss: 1.0270134210586548,grad_norm: 0.9999991470642453, iteration: 383196
loss: 1.0234746932983398,grad_norm: 0.9999997331137854, iteration: 383197
loss: 1.0030094385147095,grad_norm: 0.7378704939798227, iteration: 383198
loss: 1.0324304103851318,grad_norm: 0.7404073254126203, iteration: 383199
loss: 0.9896283745765686,grad_norm: 0.8355616714893943, iteration: 383200
loss: 1.0373073816299438,grad_norm: 0.8578794554516207, iteration: 383201
loss: 1.0738688707351685,grad_norm: 0.9999996420757288, iteration: 383202
loss: 1.1176280975341797,grad_norm: 0.9999993603317281, iteration: 383203
loss: 1.0260882377624512,grad_norm: 0.9113257977515283, iteration: 383204
loss: 1.0122077465057373,grad_norm: 0.7400250048936652, iteration: 383205
loss: 1.212985873222351,grad_norm: 0.9999994506285247, iteration: 383206
loss: 1.0040806531906128,grad_norm: 0.9999996825790398, iteration: 383207
loss: 1.0549743175506592,grad_norm: 0.9583178978579003, iteration: 383208
loss: 1.054160475730896,grad_norm: 0.6301807673809827, iteration: 383209
loss: 0.9682453274726868,grad_norm: 0.8439283322924498, iteration: 383210
loss: 1.0455509424209595,grad_norm: 0.7694979785307265, iteration: 383211
loss: 1.07720148563385,grad_norm: 0.8069049706697032, iteration: 383212
loss: 1.041922926902771,grad_norm: 0.889009556849423, iteration: 383213
loss: 1.0209001302719116,grad_norm: 0.7464985369832975, iteration: 383214
loss: 1.2918351888656616,grad_norm: 0.9999994525814464, iteration: 383215
loss: 1.0167863368988037,grad_norm: 0.8790100623047037, iteration: 383216
loss: 1.1009502410888672,grad_norm: 0.9999999317816917, iteration: 383217
loss: 1.0707297325134277,grad_norm: 0.9999998731621512, iteration: 383218
loss: 0.9646946787834167,grad_norm: 0.7705375125752383, iteration: 383219
loss: 1.0652533769607544,grad_norm: 0.6988533121137199, iteration: 383220
loss: 1.0338374376296997,grad_norm: 0.8503582396227974, iteration: 383221
loss: 1.0141587257385254,grad_norm: 0.7591222015912729, iteration: 383222
loss: 0.9882006645202637,grad_norm: 0.9999993946745339, iteration: 383223
loss: 1.0285290479660034,grad_norm: 0.8582723381775698, iteration: 383224
loss: 1.0381451845169067,grad_norm: 0.9901314028115509, iteration: 383225
loss: 1.0077122449874878,grad_norm: 0.8169677707122631, iteration: 383226
loss: 1.1238259077072144,grad_norm: 1.0000000266343245, iteration: 383227
loss: 0.9912099838256836,grad_norm: 0.8547487551019705, iteration: 383228
loss: 1.010435700416565,grad_norm: 0.9367792115636034, iteration: 383229
loss: 1.0139580965042114,grad_norm: 0.8114774104277136, iteration: 383230
loss: 1.0034458637237549,grad_norm: 0.9999993930905633, iteration: 383231
loss: 1.0066347122192383,grad_norm: 0.8950794552220628, iteration: 383232
loss: 1.0474777221679688,grad_norm: 0.6770237532779315, iteration: 383233
loss: 1.1021729707717896,grad_norm: 0.7721294819875272, iteration: 383234
loss: 1.309617280960083,grad_norm: 0.9999994686783991, iteration: 383235
loss: 0.9650139808654785,grad_norm: 0.8410134223203242, iteration: 383236
loss: 1.0665944814682007,grad_norm: 0.9999992827722752, iteration: 383237
loss: 0.9752872586250305,grad_norm: 0.8207395665791283, iteration: 383238
loss: 0.9376062750816345,grad_norm: 0.7672165440946372, iteration: 383239
loss: 1.0144912004470825,grad_norm: 0.8285606726903098, iteration: 383240
loss: 1.194780945777893,grad_norm: 0.9999992067759057, iteration: 383241
loss: 0.9736273884773254,grad_norm: 0.6755104127834783, iteration: 383242
loss: 1.0650780200958252,grad_norm: 0.9069139011482859, iteration: 383243
loss: 1.0746530294418335,grad_norm: 0.9999994936890882, iteration: 383244
loss: 1.0021110773086548,grad_norm: 0.8884610660708115, iteration: 383245
loss: 1.0027767419815063,grad_norm: 0.8856167265467275, iteration: 383246
loss: 1.0081822872161865,grad_norm: 0.9575825210841034, iteration: 383247
loss: 0.980064332485199,grad_norm: 0.7224813047713109, iteration: 383248
loss: 1.0503350496292114,grad_norm: 0.8220028124814384, iteration: 383249
loss: 1.198189377784729,grad_norm: 0.9999997178040932, iteration: 383250
loss: 1.0367313623428345,grad_norm: 0.9999990717149189, iteration: 383251
loss: 0.9938362836837769,grad_norm: 0.8895447916847777, iteration: 383252
loss: 1.0081392526626587,grad_norm: 0.8124414008961289, iteration: 383253
loss: 1.0022755861282349,grad_norm: 0.786341409041175, iteration: 383254
loss: 0.9888270497322083,grad_norm: 0.9999991122728902, iteration: 383255
loss: 1.0794427394866943,grad_norm: 0.9999999341008456, iteration: 383256
loss: 0.9580315351486206,grad_norm: 0.9094892333673947, iteration: 383257
loss: 1.022809386253357,grad_norm: 0.9754342938158589, iteration: 383258
loss: 1.0534063577651978,grad_norm: 0.9999990454632821, iteration: 383259
loss: 1.0371075868606567,grad_norm: 0.9999991047752657, iteration: 383260
loss: 1.0166679620742798,grad_norm: 0.6892839676136716, iteration: 383261
loss: 1.0136823654174805,grad_norm: 0.911778450527358, iteration: 383262
loss: 1.1621623039245605,grad_norm: 0.9999993113312386, iteration: 383263
loss: 0.9636178016662598,grad_norm: 0.9999995545685934, iteration: 383264
loss: 1.07905912399292,grad_norm: 0.9999992297236282, iteration: 383265
loss: 1.0063756704330444,grad_norm: 0.7091788108172805, iteration: 383266
loss: 1.0142279863357544,grad_norm: 0.9495726272003918, iteration: 383267
loss: 0.9659215211868286,grad_norm: 0.8574619395223232, iteration: 383268
loss: 1.057584285736084,grad_norm: 0.9999998058310569, iteration: 383269
loss: 0.9898213744163513,grad_norm: 0.9012886957697764, iteration: 383270
loss: 1.0336257219314575,grad_norm: 0.9999996011613663, iteration: 383271
loss: 1.2542901039123535,grad_norm: 0.9999994296179495, iteration: 383272
loss: 1.0172293186187744,grad_norm: 0.7636048981150186, iteration: 383273
loss: 1.0727322101593018,grad_norm: 0.9999993269085922, iteration: 383274
loss: 1.0680738687515259,grad_norm: 0.9999997322196903, iteration: 383275
loss: 1.085770845413208,grad_norm: 0.9177003815050726, iteration: 383276
loss: 1.0346498489379883,grad_norm: 0.9999990477793953, iteration: 383277
loss: 1.1014548540115356,grad_norm: 0.9999992751734555, iteration: 383278
loss: 1.1117576360702515,grad_norm: 0.9999998035272749, iteration: 383279
loss: 1.0397506952285767,grad_norm: 0.8822399935583092, iteration: 383280
loss: 0.9601758122444153,grad_norm: 0.7747409969735326, iteration: 383281
loss: 1.0462381839752197,grad_norm: 0.9139639762143507, iteration: 383282
loss: 1.0970942974090576,grad_norm: 0.8987542214646828, iteration: 383283
loss: 1.0653325319290161,grad_norm: 0.9999993388738411, iteration: 383284
loss: 1.0492523908615112,grad_norm: 0.9999994303379449, iteration: 383285
loss: 0.9949267506599426,grad_norm: 0.7672065890101294, iteration: 383286
loss: 1.0339716672897339,grad_norm: 0.7365312250358298, iteration: 383287
loss: 0.983982503414154,grad_norm: 0.606563828399427, iteration: 383288
loss: 1.0190612077713013,grad_norm: 0.9815088797645435, iteration: 383289
loss: 1.0525802373886108,grad_norm: 0.9999992545540322, iteration: 383290
loss: 1.1026935577392578,grad_norm: 0.8329035946961317, iteration: 383291
loss: 1.0468605756759644,grad_norm: 0.7923204978733744, iteration: 383292
loss: 1.0028972625732422,grad_norm: 0.9999992478898441, iteration: 383293
loss: 1.0181039571762085,grad_norm: 0.785423700985901, iteration: 383294
loss: 1.0353447198867798,grad_norm: 0.8855903997247, iteration: 383295
loss: 1.0160714387893677,grad_norm: 0.8186987539106354, iteration: 383296
loss: 0.9569334983825684,grad_norm: 0.7666783703322608, iteration: 383297
loss: 1.0790202617645264,grad_norm: 0.8049334416808238, iteration: 383298
loss: 0.9750467538833618,grad_norm: 0.8259948367373784, iteration: 383299
loss: 1.0623911619186401,grad_norm: 0.9999990715758427, iteration: 383300
loss: 0.9988557696342468,grad_norm: 0.7933600799380515, iteration: 383301
loss: 0.93902987241745,grad_norm: 0.7759298553437757, iteration: 383302
loss: 1.1813896894454956,grad_norm: 0.9999994701216286, iteration: 383303
loss: 1.0412187576293945,grad_norm: 0.9999996968981685, iteration: 383304
loss: 1.0619977712631226,grad_norm: 0.9999992940641298, iteration: 383305
loss: 1.0647042989730835,grad_norm: 0.7578909444089577, iteration: 383306
loss: 1.00053870677948,grad_norm: 0.9999997149784067, iteration: 383307
loss: 1.0027729272842407,grad_norm: 0.8996247767381724, iteration: 383308
loss: 1.0142697095870972,grad_norm: 0.7930494860221375, iteration: 383309
loss: 1.0068660974502563,grad_norm: 0.9478776722873429, iteration: 383310
loss: 0.9902825951576233,grad_norm: 0.8731246068549552, iteration: 383311
loss: 1.009568452835083,grad_norm: 0.856297781551998, iteration: 383312
loss: 0.9553120732307434,grad_norm: 0.8372654690688561, iteration: 383313
loss: 0.9854266047477722,grad_norm: 0.6756489998213281, iteration: 383314
loss: 1.0158416032791138,grad_norm: 0.7217180010274582, iteration: 383315
loss: 1.0485368967056274,grad_norm: 0.9999992395807307, iteration: 383316
loss: 1.1001876592636108,grad_norm: 0.6980082005212082, iteration: 383317
loss: 0.9924069046974182,grad_norm: 0.7523699399632334, iteration: 383318
loss: 1.0660151243209839,grad_norm: 0.8700440305823094, iteration: 383319
loss: 0.9834496378898621,grad_norm: 0.8976585328545845, iteration: 383320
loss: 0.9924777150154114,grad_norm: 0.7878520241490814, iteration: 383321
loss: 1.0004338026046753,grad_norm: 0.999999270682716, iteration: 383322
loss: 1.0139168500900269,grad_norm: 0.7836883647750509, iteration: 383323
loss: 1.0158764123916626,grad_norm: 0.8505010060957562, iteration: 383324
loss: 1.0062168836593628,grad_norm: 0.9999991282037142, iteration: 383325
loss: 1.102668046951294,grad_norm: 0.9999995256008006, iteration: 383326
loss: 1.0284874439239502,grad_norm: 0.8341718285570209, iteration: 383327
loss: 1.0782583951950073,grad_norm: 0.8422453637918611, iteration: 383328
loss: 1.0012080669403076,grad_norm: 0.74752507670414, iteration: 383329
loss: 1.051679015159607,grad_norm: 0.9999998497021384, iteration: 383330
loss: 1.054996371269226,grad_norm: 0.946427805433736, iteration: 383331
loss: 0.9933652877807617,grad_norm: 0.8149741612730521, iteration: 383332
loss: 1.0142641067504883,grad_norm: 0.8766410955733137, iteration: 383333
loss: 1.0343708992004395,grad_norm: 0.9999993364240924, iteration: 383334
loss: 1.0817126035690308,grad_norm: 0.9999998512734383, iteration: 383335
loss: 1.0813343524932861,grad_norm: 0.8178421633029953, iteration: 383336
loss: 0.9739338755607605,grad_norm: 0.7129347216861169, iteration: 383337
loss: 0.9796389937400818,grad_norm: 0.879988943605501, iteration: 383338
loss: 0.96885746717453,grad_norm: 0.8457275836352911, iteration: 383339
loss: 1.012260913848877,grad_norm: 0.6808599262795275, iteration: 383340
loss: 1.1472145318984985,grad_norm: 0.999999323206378, iteration: 383341
loss: 1.0145188570022583,grad_norm: 0.9999998012606977, iteration: 383342
loss: 1.030237078666687,grad_norm: 0.7016310657232312, iteration: 383343
loss: 0.980678379535675,grad_norm: 0.7104946842179454, iteration: 383344
loss: 0.9950438737869263,grad_norm: 0.727068653133589, iteration: 383345
loss: 0.9735986590385437,grad_norm: 0.7240028549261472, iteration: 383346
loss: 0.9765684008598328,grad_norm: 0.7045771482502174, iteration: 383347
loss: 1.1609774827957153,grad_norm: 0.9999997048464021, iteration: 383348
loss: 1.0113215446472168,grad_norm: 0.7809111424626726, iteration: 383349
loss: 1.123500108718872,grad_norm: 0.9999999087699416, iteration: 383350
loss: 1.0224443674087524,grad_norm: 0.9999995010179996, iteration: 383351
loss: 1.1676322221755981,grad_norm: 0.9068323263918849, iteration: 383352
loss: 1.0203468799591064,grad_norm: 0.8249616354433971, iteration: 383353
loss: 0.9833464622497559,grad_norm: 0.9999991849266903, iteration: 383354
loss: 1.0558547973632812,grad_norm: 0.999999192486454, iteration: 383355
loss: 1.0916231870651245,grad_norm: 0.999999685229014, iteration: 383356
loss: 0.9668571352958679,grad_norm: 0.9408103790900129, iteration: 383357
loss: 1.0093941688537598,grad_norm: 0.6508767067563869, iteration: 383358
loss: 0.9885743856430054,grad_norm: 0.7925137251390645, iteration: 383359
loss: 1.0636249780654907,grad_norm: 0.8721309757720146, iteration: 383360
loss: 1.0137780904769897,grad_norm: 0.9325728445031587, iteration: 383361
loss: 0.9793833494186401,grad_norm: 0.7089192103038721, iteration: 383362
loss: 1.0827049016952515,grad_norm: 0.9999995063946359, iteration: 383363
loss: 1.0421706438064575,grad_norm: 0.9999990634886029, iteration: 383364
loss: 1.0251784324645996,grad_norm: 0.8198992644507876, iteration: 383365
loss: 1.0112230777740479,grad_norm: 0.9287660845757267, iteration: 383366
loss: 1.012029767036438,grad_norm: 0.8017717659876785, iteration: 383367
loss: 1.0943752527236938,grad_norm: 0.9459331892656803, iteration: 383368
loss: 1.0060756206512451,grad_norm: 0.9003842963813787, iteration: 383369
loss: 1.2768115997314453,grad_norm: 0.9999997695834463, iteration: 383370
loss: 0.9903824329376221,grad_norm: 0.959039086858247, iteration: 383371
loss: 1.4437910318374634,grad_norm: 0.9999994071847215, iteration: 383372
loss: 1.1102240085601807,grad_norm: 0.9999998069445226, iteration: 383373
loss: 1.1563875675201416,grad_norm: 0.9999990088033132, iteration: 383374
loss: 1.0357332229614258,grad_norm: 0.7734902837252119, iteration: 383375
loss: 1.0487033128738403,grad_norm: 0.8558894054250034, iteration: 383376
loss: 1.0235836505889893,grad_norm: 0.8959383466242584, iteration: 383377
loss: 1.1382710933685303,grad_norm: 0.9999997833983111, iteration: 383378
loss: 1.4006505012512207,grad_norm: 0.9999999445453887, iteration: 383379
loss: 1.1226247549057007,grad_norm: 0.9999991628691066, iteration: 383380
loss: 1.0010777711868286,grad_norm: 0.9999992832806998, iteration: 383381
loss: 1.0417072772979736,grad_norm: 0.8613137674520429, iteration: 383382
loss: 1.0571603775024414,grad_norm: 0.9999999328277792, iteration: 383383
loss: 1.1417064666748047,grad_norm: 0.999999579886646, iteration: 383384
loss: 1.01768159866333,grad_norm: 0.9999998560118962, iteration: 383385
loss: 1.0324872732162476,grad_norm: 0.9999990407274015, iteration: 383386
loss: 1.1024010181427002,grad_norm: 0.999999215493863, iteration: 383387
loss: 1.0356996059417725,grad_norm: 0.9999997807134309, iteration: 383388
loss: 1.0228239297866821,grad_norm: 0.9999992406522523, iteration: 383389
loss: 1.0982681512832642,grad_norm: 0.8967014719073879, iteration: 383390
loss: 1.028186321258545,grad_norm: 0.99999936418481, iteration: 383391
loss: 1.0734455585479736,grad_norm: 0.9999992664302751, iteration: 383392
loss: 1.0822325944900513,grad_norm: 0.9999996109647583, iteration: 383393
loss: 1.0578213930130005,grad_norm: 0.9999994093977942, iteration: 383394
loss: 0.9849441647529602,grad_norm: 0.7103060567123048, iteration: 383395
loss: 1.0650197267532349,grad_norm: 0.9999991655452553, iteration: 383396
loss: 1.0894278287887573,grad_norm: 0.9472969253786826, iteration: 383397
loss: 1.0505073070526123,grad_norm: 0.8561422432630579, iteration: 383398
loss: 0.9797965288162231,grad_norm: 0.7467327621699237, iteration: 383399
loss: 1.0150848627090454,grad_norm: 0.6971840024953601, iteration: 383400
loss: 1.0066334009170532,grad_norm: 0.9999992294750717, iteration: 383401
loss: 1.0725973844528198,grad_norm: 0.9653160636296871, iteration: 383402
loss: 1.1098759174346924,grad_norm: 0.9632039979547268, iteration: 383403
loss: 1.0190727710723877,grad_norm: 0.8270774818651888, iteration: 383404
loss: 1.0172507762908936,grad_norm: 0.9999990008799584, iteration: 383405
loss: 1.3015403747558594,grad_norm: 0.999999701216786, iteration: 383406
loss: 1.0678389072418213,grad_norm: 0.788358311004554, iteration: 383407
loss: 1.0340708494186401,grad_norm: 0.8151504548278139, iteration: 383408
loss: 0.9828334450721741,grad_norm: 0.8815551947184948, iteration: 383409
loss: 0.9637119770050049,grad_norm: 0.9605250516091921, iteration: 383410
loss: 1.005299687385559,grad_norm: 1.000000013985689, iteration: 383411
loss: 1.008069634437561,grad_norm: 0.9999990454659397, iteration: 383412
loss: 1.0228863954544067,grad_norm: 0.9999992772469817, iteration: 383413
loss: 0.9958261251449585,grad_norm: 0.9882876628078863, iteration: 383414
loss: 1.0245048999786377,grad_norm: 0.99999930946838, iteration: 383415
loss: 1.1229596138000488,grad_norm: 0.9999998799664352, iteration: 383416
loss: 1.0558664798736572,grad_norm: 0.8195607884924647, iteration: 383417
loss: 1.0383403301239014,grad_norm: 0.8207295358355468, iteration: 383418
loss: 1.3429244756698608,grad_norm: 0.9999991791026321, iteration: 383419
loss: 1.0840299129486084,grad_norm: 0.999999794653707, iteration: 383420
loss: 1.1818523406982422,grad_norm: 0.9999993170678342, iteration: 383421
loss: 0.9963829517364502,grad_norm: 0.8276852483894706, iteration: 383422
loss: 0.9905918836593628,grad_norm: 0.8927265039216171, iteration: 383423
loss: 1.1609793901443481,grad_norm: 0.999999536955654, iteration: 383424
loss: 1.0195521116256714,grad_norm: 0.8347838217097517, iteration: 383425
loss: 1.020339012145996,grad_norm: 0.9999990820610527, iteration: 383426
loss: 1.0656930208206177,grad_norm: 0.9356912764302466, iteration: 383427
loss: 1.0407978296279907,grad_norm: 0.9999991569791107, iteration: 383428
loss: 0.9984059929847717,grad_norm: 0.9999992895557221, iteration: 383429
loss: 1.4158012866973877,grad_norm: 1.0000001096511646, iteration: 383430
loss: 0.99073326587677,grad_norm: 0.7547361999215184, iteration: 383431
loss: 1.0665206909179688,grad_norm: 0.8422751404255222, iteration: 383432
loss: 0.9871976971626282,grad_norm: 0.7856196901232858, iteration: 383433
loss: 1.1033344268798828,grad_norm: 0.9999990522830692, iteration: 383434
loss: 0.984015166759491,grad_norm: 0.7360230316444659, iteration: 383435
loss: 0.9995827674865723,grad_norm: 0.7538603509689825, iteration: 383436
loss: 1.0600568056106567,grad_norm: 0.9999991573142492, iteration: 383437
loss: 1.0241146087646484,grad_norm: 0.7873256016977656, iteration: 383438
loss: 1.095888614654541,grad_norm: 0.9999992353597564, iteration: 383439
loss: 1.0671249628067017,grad_norm: 0.7392307321786108, iteration: 383440
loss: 0.9766885042190552,grad_norm: 0.9999991299342277, iteration: 383441
loss: 1.0704461336135864,grad_norm: 0.999999799368214, iteration: 383442
loss: 1.0517704486846924,grad_norm: 0.9268177578030099, iteration: 383443
loss: 1.049614667892456,grad_norm: 0.9999995563121495, iteration: 383444
loss: 1.0446168184280396,grad_norm: 0.9004963354314202, iteration: 383445
loss: 1.0225416421890259,grad_norm: 0.8182505780225694, iteration: 383446
loss: 1.0097017288208008,grad_norm: 0.7734135932081664, iteration: 383447
loss: 1.1185708045959473,grad_norm: 0.9999994390144152, iteration: 383448
loss: 0.9757512807846069,grad_norm: 0.7884098326594672, iteration: 383449
loss: 1.0275399684906006,grad_norm: 0.9999995798314852, iteration: 383450
loss: 1.0159294605255127,grad_norm: 0.8329603121836605, iteration: 383451
loss: 1.052922010421753,grad_norm: 0.9936296317187225, iteration: 383452
loss: 1.0321241617202759,grad_norm: 0.9024191349245925, iteration: 383453
loss: 0.9983799457550049,grad_norm: 0.7187538279042398, iteration: 383454
loss: 1.1359692811965942,grad_norm: 0.9999994853684525, iteration: 383455
loss: 1.0274139642715454,grad_norm: 0.9223433956507934, iteration: 383456
loss: 1.0657596588134766,grad_norm: 0.7581027774409065, iteration: 383457
loss: 0.9572680592536926,grad_norm: 0.7867496488703809, iteration: 383458
loss: 1.0541887283325195,grad_norm: 0.865324940048274, iteration: 383459
loss: 1.1288145780563354,grad_norm: 0.999999477288584, iteration: 383460
loss: 1.0273023843765259,grad_norm: 0.6850050981903154, iteration: 383461
loss: 1.189429759979248,grad_norm: 0.9999993541581392, iteration: 383462
loss: 0.9922057390213013,grad_norm: 0.865690059968345, iteration: 383463
loss: 1.0012614727020264,grad_norm: 0.8825618769152134, iteration: 383464
loss: 1.0069022178649902,grad_norm: 0.999999629578386, iteration: 383465
loss: 1.1025018692016602,grad_norm: 0.9999997219554627, iteration: 383466
loss: 1.0005934238433838,grad_norm: 0.772304178598567, iteration: 383467
loss: 0.9776917695999146,grad_norm: 0.7497197313716387, iteration: 383468
loss: 1.1014916896820068,grad_norm: 0.999999779784135, iteration: 383469
loss: 1.0673344135284424,grad_norm: 0.9999997329103253, iteration: 383470
loss: 1.0490652322769165,grad_norm: 0.999999442614525, iteration: 383471
loss: 0.9989501237869263,grad_norm: 0.7428412515315691, iteration: 383472
loss: 1.1304057836532593,grad_norm: 0.9999998604184092, iteration: 383473
loss: 1.0969219207763672,grad_norm: 0.9999998092881767, iteration: 383474
loss: 1.0417619943618774,grad_norm: 0.7376359087679487, iteration: 383475
loss: 1.0325608253479004,grad_norm: 0.7490759166922784, iteration: 383476
loss: 1.0139892101287842,grad_norm: 0.9999993285386902, iteration: 383477
loss: 1.1236774921417236,grad_norm: 0.999999923459825, iteration: 383478
loss: 1.0514217615127563,grad_norm: 0.9999999841890083, iteration: 383479
loss: 0.9860532283782959,grad_norm: 0.7686557913505953, iteration: 383480
loss: 1.0480859279632568,grad_norm: 0.9999995415929341, iteration: 383481
loss: 0.9763745069503784,grad_norm: 0.7837239652405431, iteration: 383482
loss: 0.9786565899848938,grad_norm: 0.7569190791759343, iteration: 383483
loss: 1.0770442485809326,grad_norm: 0.9999995381294577, iteration: 383484
loss: 1.0009516477584839,grad_norm: 0.7996581784161829, iteration: 383485
loss: 1.0036370754241943,grad_norm: 0.7779500761825141, iteration: 383486
loss: 1.0013118982315063,grad_norm: 0.883327690779622, iteration: 383487
loss: 1.0268330574035645,grad_norm: 0.9999995270464694, iteration: 383488
loss: 0.9754314422607422,grad_norm: 0.8763355847339764, iteration: 383489
loss: 1.0149728059768677,grad_norm: 0.8052186963375251, iteration: 383490
loss: 1.071121335029602,grad_norm: 0.8037005880768177, iteration: 383491
loss: 1.1215240955352783,grad_norm: 0.9999997570057364, iteration: 383492
loss: 1.007927417755127,grad_norm: 0.9999991650119476, iteration: 383493
loss: 0.9825525283813477,grad_norm: 0.7746774492686851, iteration: 383494
loss: 0.9732694625854492,grad_norm: 0.7534447167664804, iteration: 383495
loss: 0.9879024028778076,grad_norm: 0.8190312297637743, iteration: 383496
loss: 1.0279031991958618,grad_norm: 0.9999996816781893, iteration: 383497
loss: 1.0629034042358398,grad_norm: 0.9999998798776348, iteration: 383498
loss: 1.0232346057891846,grad_norm: 0.9999991875113275, iteration: 383499
loss: 1.0941423177719116,grad_norm: 0.9999996744525996, iteration: 383500
loss: 1.008873701095581,grad_norm: 0.9141758320717797, iteration: 383501
loss: 1.0271601676940918,grad_norm: 0.8183179060676083, iteration: 383502
loss: 0.9988469481468201,grad_norm: 0.8371238045824854, iteration: 383503
loss: 1.0106799602508545,grad_norm: 0.8796525446244913, iteration: 383504
loss: 1.0012491941452026,grad_norm: 0.9999990254960571, iteration: 383505
loss: 1.0848889350891113,grad_norm: 0.9999991554853236, iteration: 383506
loss: 0.9892871975898743,grad_norm: 0.9999998650416947, iteration: 383507
loss: 1.013992190361023,grad_norm: 0.6705450210437849, iteration: 383508
loss: 1.0614323616027832,grad_norm: 0.9755508909411759, iteration: 383509
loss: 1.0726534128189087,grad_norm: 0.9999995888216808, iteration: 383510
loss: 0.9928085803985596,grad_norm: 0.9999996463171459, iteration: 383511
loss: 1.0158101320266724,grad_norm: 0.7369440219097155, iteration: 383512
loss: 1.2832893133163452,grad_norm: 0.999999872146962, iteration: 383513
loss: 1.0138347148895264,grad_norm: 0.7447410055595686, iteration: 383514
loss: 1.071764349937439,grad_norm: 0.9999999966368375, iteration: 383515
loss: 1.033308744430542,grad_norm: 0.9999991315403617, iteration: 383516
loss: 1.077373743057251,grad_norm: 0.9999989973894238, iteration: 383517
loss: 1.0546998977661133,grad_norm: 0.9134435634179244, iteration: 383518
loss: 1.0431827306747437,grad_norm: 0.9465544228094288, iteration: 383519
loss: 1.05510413646698,grad_norm: 0.9999992079837601, iteration: 383520
loss: 1.0303994417190552,grad_norm: 0.7936724960458489, iteration: 383521
loss: 1.100875973701477,grad_norm: 0.8556186424710657, iteration: 383522
loss: 1.00819730758667,grad_norm: 0.7774921949592231, iteration: 383523
loss: 0.974936842918396,grad_norm: 0.8837340349721096, iteration: 383524
loss: 1.2524278163909912,grad_norm: 0.99999918607291, iteration: 383525
loss: 0.9940895438194275,grad_norm: 0.7706526843722739, iteration: 383526
loss: 1.0765067338943481,grad_norm: 0.9999998430410648, iteration: 383527
loss: 1.007851243019104,grad_norm: 0.9999999605950464, iteration: 383528
loss: 1.1535894870758057,grad_norm: 0.9999998124488361, iteration: 383529
loss: 1.0646625757217407,grad_norm: 0.9999990333889957, iteration: 383530
loss: 1.0948735475540161,grad_norm: 0.9999992604068056, iteration: 383531
loss: 1.0141006708145142,grad_norm: 0.8825735459269421, iteration: 383532
loss: 0.9976608157157898,grad_norm: 0.9999991909811873, iteration: 383533
loss: 1.13148033618927,grad_norm: 0.9999993080653177, iteration: 383534
loss: 1.0355702638626099,grad_norm: 0.9999999043969704, iteration: 383535
loss: 0.9838488698005676,grad_norm: 0.8013969411972676, iteration: 383536
loss: 1.0782668590545654,grad_norm: 0.9999993271111713, iteration: 383537
loss: 1.0111165046691895,grad_norm: 0.9999992973761069, iteration: 383538
loss: 0.9948220252990723,grad_norm: 0.7056132721105525, iteration: 383539
loss: 1.0540659427642822,grad_norm: 0.9999996158118163, iteration: 383540
loss: 1.009828805923462,grad_norm: 0.9999996819546126, iteration: 383541
loss: 0.9861465692520142,grad_norm: 0.9999995658754667, iteration: 383542
loss: 1.024449348449707,grad_norm: 0.7373173494921325, iteration: 383543
loss: 1.0503441095352173,grad_norm: 0.9999994390201432, iteration: 383544
loss: 1.0115509033203125,grad_norm: 0.932113538227448, iteration: 383545
loss: 1.1033744812011719,grad_norm: 0.8363093053710308, iteration: 383546
loss: 0.9840301275253296,grad_norm: 0.7920179005589798, iteration: 383547
loss: 1.0092486143112183,grad_norm: 0.9041505938422982, iteration: 383548
loss: 1.0237151384353638,grad_norm: 0.7507629094814627, iteration: 383549
loss: 0.9984986782073975,grad_norm: 0.8732097242965638, iteration: 383550
loss: 1.022220492362976,grad_norm: 0.9999994509958903, iteration: 383551
loss: 1.060901403427124,grad_norm: 0.9999991775571353, iteration: 383552
loss: 1.0085673332214355,grad_norm: 0.9999991531456602, iteration: 383553
loss: 0.9851195216178894,grad_norm: 0.5942970298198913, iteration: 383554
loss: 1.0745545625686646,grad_norm: 0.9228957331727109, iteration: 383555
loss: 1.0104268789291382,grad_norm: 0.9999995360293777, iteration: 383556
loss: 0.9825857281684875,grad_norm: 0.8496200591361065, iteration: 383557
loss: 1.0353615283966064,grad_norm: 0.7230627791959595, iteration: 383558
loss: 1.0407259464263916,grad_norm: 0.9999991200612157, iteration: 383559
loss: 0.9921506643295288,grad_norm: 0.9462030016420946, iteration: 383560
loss: 0.9688811302185059,grad_norm: 0.9999994721072888, iteration: 383561
loss: 1.0021051168441772,grad_norm: 0.8284842390170587, iteration: 383562
loss: 0.9839597940444946,grad_norm: 0.9713870946161365, iteration: 383563
loss: 1.2275495529174805,grad_norm: 0.9999998988041148, iteration: 383564
loss: 1.017430067062378,grad_norm: 0.8207742900392826, iteration: 383565
loss: 1.0356626510620117,grad_norm: 0.7668760606854561, iteration: 383566
loss: 1.1977684497833252,grad_norm: 0.999999431853859, iteration: 383567
loss: 1.0930280685424805,grad_norm: 0.9999990815887342, iteration: 383568
loss: 1.012813687324524,grad_norm: 0.9395702339921643, iteration: 383569
loss: 1.034430980682373,grad_norm: 0.7842877837610885, iteration: 383570
loss: 1.0381484031677246,grad_norm: 0.7828922539122075, iteration: 383571
loss: 1.0191192626953125,grad_norm: 0.7649725288687255, iteration: 383572
loss: 1.122335433959961,grad_norm: 0.9020401542529223, iteration: 383573
loss: 1.042203426361084,grad_norm: 0.999999810120932, iteration: 383574
loss: 1.0046337842941284,grad_norm: 0.9122609553748835, iteration: 383575
loss: 1.1927152872085571,grad_norm: 0.9999999873742252, iteration: 383576
loss: 1.005707025527954,grad_norm: 1.0000000286393087, iteration: 383577
loss: 1.0057306289672852,grad_norm: 0.8719462916951277, iteration: 383578
loss: 1.0685415267944336,grad_norm: 0.9999998199474864, iteration: 383579
loss: 1.0197919607162476,grad_norm: 0.8209017319518267, iteration: 383580
loss: 0.9901291728019714,grad_norm: 0.783349880427693, iteration: 383581
loss: 0.9689478874206543,grad_norm: 0.7958281389192252, iteration: 383582
loss: 1.0013024806976318,grad_norm: 0.9999992517130503, iteration: 383583
loss: 1.0616992712020874,grad_norm: 0.7802169834283379, iteration: 383584
loss: 0.9867449998855591,grad_norm: 0.7425417186021276, iteration: 383585
loss: 1.009913682937622,grad_norm: 0.8242910654906995, iteration: 383586
loss: 1.0595338344573975,grad_norm: 0.8888790674890311, iteration: 383587
loss: 1.0481672286987305,grad_norm: 0.9999998767018556, iteration: 383588
loss: 1.0175235271453857,grad_norm: 0.7859222616860506, iteration: 383589
loss: 1.0165040493011475,grad_norm: 0.6743682845182865, iteration: 383590
loss: 1.0494904518127441,grad_norm: 0.8465855507013094, iteration: 383591
loss: 1.0140094757080078,grad_norm: 0.7635866165876656, iteration: 383592
loss: 1.0409923791885376,grad_norm: 0.9999991558274637, iteration: 383593
loss: 0.9702782034873962,grad_norm: 0.9999998981404372, iteration: 383594
loss: 1.0515403747558594,grad_norm: 0.9999994493059716, iteration: 383595
loss: 1.0244842767715454,grad_norm: 0.7300061570193517, iteration: 383596
loss: 1.0136874914169312,grad_norm: 0.9521424835835279, iteration: 383597
loss: 1.0253313779830933,grad_norm: 0.9999998769736869, iteration: 383598
loss: 1.0943963527679443,grad_norm: 0.9999999058597573, iteration: 383599
loss: 1.0778865814208984,grad_norm: 0.9999992368498003, iteration: 383600
loss: 1.0693974494934082,grad_norm: 0.9012227786151522, iteration: 383601
loss: 1.0049155950546265,grad_norm: 0.7705290177432663, iteration: 383602
loss: 1.0148509740829468,grad_norm: 0.9999993298789602, iteration: 383603
loss: 1.0128077268600464,grad_norm: 0.9999996288585971, iteration: 383604
loss: 1.021615743637085,grad_norm: 0.6910119821325456, iteration: 383605
loss: 1.090977668762207,grad_norm: 0.8136061267212507, iteration: 383606
loss: 1.0482202768325806,grad_norm: 0.999999304679494, iteration: 383607
loss: 1.0584033727645874,grad_norm: 0.779189093256311, iteration: 383608
loss: 0.986832320690155,grad_norm: 0.8423075821583657, iteration: 383609
loss: 0.9723219275474548,grad_norm: 0.9999994107028979, iteration: 383610
loss: 1.0888099670410156,grad_norm: 0.999999615088031, iteration: 383611
loss: 0.9922686815261841,grad_norm: 0.9346261834023663, iteration: 383612
loss: 1.0705751180648804,grad_norm: 0.9999996632226839, iteration: 383613
loss: 0.9963412284851074,grad_norm: 0.8713857796814568, iteration: 383614
loss: 0.9894595146179199,grad_norm: 0.6568963442433042, iteration: 383615
loss: 1.0297130346298218,grad_norm: 0.8109077326064451, iteration: 383616
loss: 0.96941739320755,grad_norm: 0.7734341911149727, iteration: 383617
loss: 0.9930096864700317,grad_norm: 0.807873030539884, iteration: 383618
loss: 1.052682876586914,grad_norm: 0.9999997123068517, iteration: 383619
loss: 1.028231143951416,grad_norm: 0.9999997748232096, iteration: 383620
loss: 1.0010550022125244,grad_norm: 0.8687257810433483, iteration: 383621
loss: 1.0494403839111328,grad_norm: 0.9999993502892088, iteration: 383622
loss: 1.0030975341796875,grad_norm: 0.6884518093595865, iteration: 383623
loss: 1.2856460809707642,grad_norm: 0.9999999024014796, iteration: 383624
loss: 1.03507399559021,grad_norm: 0.790676715088596, iteration: 383625
loss: 1.0439101457595825,grad_norm: 0.861542588694977, iteration: 383626
loss: 0.9882274866104126,grad_norm: 0.9999999537496562, iteration: 383627
loss: 1.0128039121627808,grad_norm: 0.7548638998140834, iteration: 383628
loss: 1.035293459892273,grad_norm: 0.9999991991974814, iteration: 383629
loss: 0.9670966267585754,grad_norm: 0.7746029126138092, iteration: 383630
loss: 1.0447361469268799,grad_norm: 0.9402865819741136, iteration: 383631
loss: 1.0206300020217896,grad_norm: 0.743678584087252, iteration: 383632
loss: 1.0149223804473877,grad_norm: 0.9999992660938304, iteration: 383633
loss: 0.9991258382797241,grad_norm: 0.8413243128754139, iteration: 383634
loss: 1.0491071939468384,grad_norm: 0.9999990178923165, iteration: 383635
loss: 1.0404915809631348,grad_norm: 0.9223262937562409, iteration: 383636
loss: 1.1506974697113037,grad_norm: 0.9999990968490574, iteration: 383637
loss: 1.0172127485275269,grad_norm: 0.8024103149414292, iteration: 383638
loss: 1.0225987434387207,grad_norm: 0.9257458790379542, iteration: 383639
loss: 1.032058596611023,grad_norm: 0.8529236984837247, iteration: 383640
loss: 1.07119619846344,grad_norm: 0.9999994338935438, iteration: 383641
loss: 1.0154081583023071,grad_norm: 0.8345268610635114, iteration: 383642
loss: 1.0083860158920288,grad_norm: 0.9999999498580031, iteration: 383643
loss: 1.0742316246032715,grad_norm: 0.9999992408517446, iteration: 383644
loss: 1.0086579322814941,grad_norm: 0.7649526119557303, iteration: 383645
loss: 1.0685703754425049,grad_norm: 0.999999842214077, iteration: 383646
loss: 0.9975742101669312,grad_norm: 0.8388040341662186, iteration: 383647
loss: 0.9829078912734985,grad_norm: 0.8591541732263582, iteration: 383648
loss: 1.0435359477996826,grad_norm: 0.9999997733284673, iteration: 383649
loss: 1.0135419368743896,grad_norm: 0.6470469381234988, iteration: 383650
loss: 1.0026326179504395,grad_norm: 0.9372530599446424, iteration: 383651
loss: 1.0021960735321045,grad_norm: 0.8750938193205949, iteration: 383652
loss: 1.010608196258545,grad_norm: 0.7539017931100339, iteration: 383653
loss: 1.0216195583343506,grad_norm: 0.9621244753233587, iteration: 383654
loss: 1.0261887311935425,grad_norm: 0.7730581273970779, iteration: 383655
loss: 1.0153491497039795,grad_norm: 0.7249063479720975, iteration: 383656
loss: 1.0176255702972412,grad_norm: 0.8507283451126085, iteration: 383657
loss: 1.0578393936157227,grad_norm: 0.999999597074601, iteration: 383658
loss: 1.0087069272994995,grad_norm: 0.8185843502518312, iteration: 383659
loss: 0.9913247227668762,grad_norm: 0.9832368255823156, iteration: 383660
loss: 1.0095857381820679,grad_norm: 0.9999991216179646, iteration: 383661
loss: 1.093271255493164,grad_norm: 0.9999992479944663, iteration: 383662
loss: 1.0190614461898804,grad_norm: 0.9999998477646869, iteration: 383663
loss: 1.0044234991073608,grad_norm: 0.8765273229618188, iteration: 383664
loss: 1.0171364545822144,grad_norm: 0.7538506772545908, iteration: 383665
loss: 1.0160880088806152,grad_norm: 0.7015276444822169, iteration: 383666
loss: 1.0290027856826782,grad_norm: 0.9272372517464129, iteration: 383667
loss: 1.0079190731048584,grad_norm: 0.7493565370114434, iteration: 383668
loss: 0.9858265519142151,grad_norm: 0.9999990925371623, iteration: 383669
loss: 1.024415135383606,grad_norm: 0.9350121329944027, iteration: 383670
loss: 1.0101606845855713,grad_norm: 0.8675106969277296, iteration: 383671
loss: 0.9972022771835327,grad_norm: 0.8774145275465016, iteration: 383672
loss: 0.9984870553016663,grad_norm: 0.8301547971436346, iteration: 383673
loss: 1.1145175695419312,grad_norm: 0.9999991676867523, iteration: 383674
loss: 1.0133718252182007,grad_norm: 0.8669755421150211, iteration: 383675
loss: 0.9689367413520813,grad_norm: 0.8662770205874967, iteration: 383676
loss: 1.105794906616211,grad_norm: 0.9999997251288127, iteration: 383677
loss: 1.1650751829147339,grad_norm: 0.9999998358089764, iteration: 383678
loss: 0.9819228649139404,grad_norm: 0.999999122820508, iteration: 383679
loss: 1.0039232969284058,grad_norm: 0.8341776355596281, iteration: 383680
loss: 0.9999673366546631,grad_norm: 0.8655052440009307, iteration: 383681
loss: 0.9999234080314636,grad_norm: 0.8483611317031586, iteration: 383682
loss: 0.9622594714164734,grad_norm: 0.9999991319990403, iteration: 383683
loss: 1.0971012115478516,grad_norm: 0.9999994640751545, iteration: 383684
loss: 1.025393009185791,grad_norm: 0.9999993711563346, iteration: 383685
loss: 0.9630942344665527,grad_norm: 0.9999993477147768, iteration: 383686
loss: 0.9912235736846924,grad_norm: 0.6861391803951036, iteration: 383687
loss: 0.9929397106170654,grad_norm: 0.7674237576618628, iteration: 383688
loss: 1.0414217710494995,grad_norm: 0.9999995204106424, iteration: 383689
loss: 0.9786093235015869,grad_norm: 0.7742182164244954, iteration: 383690
loss: 1.0321600437164307,grad_norm: 0.9999999171073453, iteration: 383691
loss: 0.991357147693634,grad_norm: 0.7821193591307664, iteration: 383692
loss: 1.1504515409469604,grad_norm: 1.0000000076355335, iteration: 383693
loss: 1.0189270973205566,grad_norm: 0.7734038493352734, iteration: 383694
loss: 0.9641854763031006,grad_norm: 0.809769688941522, iteration: 383695
loss: 1.0110983848571777,grad_norm: 0.7674300484255898, iteration: 383696
loss: 0.9846025705337524,grad_norm: 0.7278437412129035, iteration: 383697
loss: 1.003261685371399,grad_norm: 0.8940267916040338, iteration: 383698
loss: 1.0148155689239502,grad_norm: 0.751854330420054, iteration: 383699
loss: 1.1053450107574463,grad_norm: 0.9999998493015212, iteration: 383700
loss: 1.2819546461105347,grad_norm: 0.9999991436617748, iteration: 383701
loss: 0.9876411557197571,grad_norm: 0.8767832853893341, iteration: 383702
loss: 0.9771726727485657,grad_norm: 0.766048272874085, iteration: 383703
loss: 0.9891006350517273,grad_norm: 0.8958446562160839, iteration: 383704
loss: 1.0096073150634766,grad_norm: 0.999999194381159, iteration: 383705
loss: 1.1599912643432617,grad_norm: 0.9999992560940572, iteration: 383706
loss: 1.3610011339187622,grad_norm: 0.999999831677704, iteration: 383707
loss: 1.0110936164855957,grad_norm: 0.7985636681217441, iteration: 383708
loss: 1.0459012985229492,grad_norm: 0.9999998258557351, iteration: 383709
loss: 1.0461121797561646,grad_norm: 0.9999991521015683, iteration: 383710
loss: 1.0894759893417358,grad_norm: 0.9999993956468659, iteration: 383711
loss: 1.037745475769043,grad_norm: 0.999999062045535, iteration: 383712
loss: 1.0510404109954834,grad_norm: 0.971788399863963, iteration: 383713
loss: 1.0415431261062622,grad_norm: 0.917351537352044, iteration: 383714
loss: 1.0666279792785645,grad_norm: 0.9999994113184838, iteration: 383715
loss: 0.9844608306884766,grad_norm: 0.9999991364634471, iteration: 383716
loss: 1.0100682973861694,grad_norm: 0.758197484880489, iteration: 383717
loss: 0.9857786297798157,grad_norm: 0.7815845805656322, iteration: 383718
loss: 1.0015989542007446,grad_norm: 0.7572519112110698, iteration: 383719
loss: 1.028923511505127,grad_norm: 0.7696186142120743, iteration: 383720
loss: 1.0694433450698853,grad_norm: 0.9999997692982736, iteration: 383721
loss: 1.0395857095718384,grad_norm: 0.8096030303343876, iteration: 383722
loss: 0.9724143743515015,grad_norm: 0.6969664031332355, iteration: 383723
loss: 1.0009455680847168,grad_norm: 1.000000018271662, iteration: 383724
loss: 1.0856049060821533,grad_norm: 0.8045424579493528, iteration: 383725
loss: 1.045706868171692,grad_norm: 0.8441654388380793, iteration: 383726
loss: 1.0141741037368774,grad_norm: 0.7783315165737155, iteration: 383727
loss: 0.9899358749389648,grad_norm: 0.8779799631852379, iteration: 383728
loss: 1.0463589429855347,grad_norm: 0.9999990335384907, iteration: 383729
loss: 1.0842140913009644,grad_norm: 0.999999774988076, iteration: 383730
loss: 0.9558073878288269,grad_norm: 0.6974850801898881, iteration: 383731
loss: 1.024104118347168,grad_norm: 0.8217559776043745, iteration: 383732
loss: 0.9916561245918274,grad_norm: 0.8743234992051976, iteration: 383733
loss: 0.9903352856636047,grad_norm: 0.999999703217097, iteration: 383734
loss: 1.034945011138916,grad_norm: 0.8901715265457513, iteration: 383735
loss: 1.0385798215866089,grad_norm: 0.8298599072191384, iteration: 383736
loss: 1.1142929792404175,grad_norm: 0.9999994459302389, iteration: 383737
loss: 0.9811227321624756,grad_norm: 0.9999996271848135, iteration: 383738
loss: 0.9961374402046204,grad_norm: 0.7733982638644754, iteration: 383739
loss: 1.0386850833892822,grad_norm: 0.8589764604704924, iteration: 383740
loss: 1.023011565208435,grad_norm: 0.7411288298604365, iteration: 383741
loss: 1.0010169744491577,grad_norm: 0.8765648639728759, iteration: 383742
loss: 1.0267058610916138,grad_norm: 0.9999993277009326, iteration: 383743
loss: 1.0079264640808105,grad_norm: 0.9999994275952063, iteration: 383744
loss: 1.0801275968551636,grad_norm: 0.9999994441023546, iteration: 383745
loss: 1.0046297311782837,grad_norm: 0.8160581250261754, iteration: 383746
loss: 1.0561329126358032,grad_norm: 0.6919839514650372, iteration: 383747
loss: 0.9827618598937988,grad_norm: 0.8315098331904455, iteration: 383748
loss: 1.1218465566635132,grad_norm: 0.9999998533923283, iteration: 383749
loss: 1.0454754829406738,grad_norm: 1.0000000002211669, iteration: 383750
loss: 1.0820679664611816,grad_norm: 0.7882453276266133, iteration: 383751
loss: 1.07476007938385,grad_norm: 0.999999358675213, iteration: 383752
loss: 1.030603289604187,grad_norm: 0.9999991471731186, iteration: 383753
loss: 1.013959527015686,grad_norm: 0.6807250825228774, iteration: 383754
loss: 1.0114303827285767,grad_norm: 0.9158192665856646, iteration: 383755
loss: 1.0205451250076294,grad_norm: 0.930026164870641, iteration: 383756
loss: 1.009568452835083,grad_norm: 0.7508551961924501, iteration: 383757
loss: 0.9962426424026489,grad_norm: 0.8622032229082067, iteration: 383758
loss: 1.033805012702942,grad_norm: 0.9999997157004848, iteration: 383759
loss: 1.0308563709259033,grad_norm: 0.9999990933537877, iteration: 383760
loss: 0.9687602519989014,grad_norm: 0.824366596519725, iteration: 383761
loss: 1.1576199531555176,grad_norm: 0.9999994637058591, iteration: 383762
loss: 1.0499520301818848,grad_norm: 0.9999994748740905, iteration: 383763
loss: 1.01880943775177,grad_norm: 0.7154290043297489, iteration: 383764
loss: 1.0219244956970215,grad_norm: 0.7240292480973767, iteration: 383765
loss: 1.0928534269332886,grad_norm: 0.7466768176160908, iteration: 383766
loss: 1.0858454704284668,grad_norm: 0.9999993602914564, iteration: 383767
loss: 1.048508882522583,grad_norm: 0.9999998197355047, iteration: 383768
loss: 0.9944266676902771,grad_norm: 0.8910203019664363, iteration: 383769
loss: 1.0224343538284302,grad_norm: 0.8927132395825863, iteration: 383770
loss: 1.1035054922103882,grad_norm: 0.9999998620908199, iteration: 383771
loss: 0.9772648215293884,grad_norm: 0.8646091617506519, iteration: 383772
loss: 0.9691944718360901,grad_norm: 0.7675879308616269, iteration: 383773
loss: 1.0083990097045898,grad_norm: 0.9999997580278257, iteration: 383774
loss: 1.0031423568725586,grad_norm: 0.8842680861396097, iteration: 383775
loss: 1.0591779947280884,grad_norm: 0.9999990123668357, iteration: 383776
loss: 1.0188679695129395,grad_norm: 0.9605097659489726, iteration: 383777
loss: 1.105844497680664,grad_norm: 0.8626739618751565, iteration: 383778
loss: 0.991158127784729,grad_norm: 0.6308771728091128, iteration: 383779
loss: 1.1082262992858887,grad_norm: 0.9999996259484799, iteration: 383780
loss: 0.9919295310974121,grad_norm: 0.9999999032987766, iteration: 383781
loss: 1.1234407424926758,grad_norm: 0.9999993524145522, iteration: 383782
loss: 1.0393030643463135,grad_norm: 0.770980128137794, iteration: 383783
loss: 1.0738451480865479,grad_norm: 0.7249700322806415, iteration: 383784
loss: 1.1120223999023438,grad_norm: 0.9999990906693954, iteration: 383785
loss: 1.0139566659927368,grad_norm: 0.7484527954918717, iteration: 383786
loss: 1.0987176895141602,grad_norm: 0.929233871557679, iteration: 383787
loss: 0.9986398220062256,grad_norm: 0.9999991764455992, iteration: 383788
loss: 1.020206332206726,grad_norm: 0.9999993137298872, iteration: 383789
loss: 1.0444897413253784,grad_norm: 0.9999991238335989, iteration: 383790
loss: 1.0187021493911743,grad_norm: 0.9999995870536644, iteration: 383791
loss: 1.0977773666381836,grad_norm: 0.9999994046162091, iteration: 383792
loss: 1.0925174951553345,grad_norm: 0.9999991814966184, iteration: 383793
loss: 1.0583772659301758,grad_norm: 0.9999998038837463, iteration: 383794
loss: 1.1176711320877075,grad_norm: 0.9999998353222831, iteration: 383795
loss: 1.003366470336914,grad_norm: 0.7599716500290545, iteration: 383796
loss: 1.0346958637237549,grad_norm: 0.9999996411120969, iteration: 383797
loss: 1.0462489128112793,grad_norm: 0.9999992113714592, iteration: 383798
loss: 1.013440728187561,grad_norm: 0.9999995431804153, iteration: 383799
loss: 1.0711220502853394,grad_norm: 0.9999997219364156, iteration: 383800
loss: 0.9475261569023132,grad_norm: 0.9106074881504028, iteration: 383801
loss: 1.0270817279815674,grad_norm: 0.9999997618409598, iteration: 383802
loss: 1.0040313005447388,grad_norm: 0.9418925491351668, iteration: 383803
loss: 1.0295525789260864,grad_norm: 0.7910640311314372, iteration: 383804
loss: 0.9620996713638306,grad_norm: 0.7978800124249157, iteration: 383805
loss: 1.0859200954437256,grad_norm: 0.9999998111135407, iteration: 383806
loss: 1.0713694095611572,grad_norm: 0.985971676477567, iteration: 383807
loss: 1.039048194885254,grad_norm: 0.9999998462708697, iteration: 383808
loss: 1.024605393409729,grad_norm: 0.999999776692143, iteration: 383809
loss: 0.9840540289878845,grad_norm: 0.8996520315307356, iteration: 383810
loss: 1.0037670135498047,grad_norm: 0.9724431645822775, iteration: 383811
loss: 0.9797833561897278,grad_norm: 0.7417116171603945, iteration: 383812
loss: 1.0094388723373413,grad_norm: 0.7532242292614778, iteration: 383813
loss: 1.0879409313201904,grad_norm: 0.8871895747883863, iteration: 383814
loss: 1.0423128604888916,grad_norm: 0.999999617779043, iteration: 383815
loss: 0.9935628175735474,grad_norm: 0.937877228565468, iteration: 383816
loss: 0.980237603187561,grad_norm: 0.7986515966971595, iteration: 383817
loss: 0.9731833338737488,grad_norm: 0.8301070424660697, iteration: 383818
loss: 1.0342886447906494,grad_norm: 0.7826735437364574, iteration: 383819
loss: 1.032555341720581,grad_norm: 0.8358246213739173, iteration: 383820
loss: 1.0091114044189453,grad_norm: 0.8342074241326226, iteration: 383821
loss: 1.0334008932113647,grad_norm: 0.9999991198012371, iteration: 383822
loss: 1.0477765798568726,grad_norm: 0.9738908021160315, iteration: 383823
loss: 1.0573574304580688,grad_norm: 0.9999990255610558, iteration: 383824
loss: 0.9812702536582947,grad_norm: 0.920103809046837, iteration: 383825
loss: 1.029942274093628,grad_norm: 0.962309345260509, iteration: 383826
loss: 1.1236408948898315,grad_norm: 0.8441245588670372, iteration: 383827
loss: 1.0267446041107178,grad_norm: 0.9094955972371362, iteration: 383828
loss: 0.9621530175209045,grad_norm: 0.7934468761899118, iteration: 383829
loss: 1.0383827686309814,grad_norm: 0.9999993234852258, iteration: 383830
loss: 0.9743911027908325,grad_norm: 0.8848681844807631, iteration: 383831
loss: 1.0366015434265137,grad_norm: 0.8569221560922726, iteration: 383832
loss: 0.9848089814186096,grad_norm: 0.7344162787921428, iteration: 383833
loss: 1.0046069622039795,grad_norm: 0.823314758088044, iteration: 383834
loss: 1.0238916873931885,grad_norm: 0.8064913553789239, iteration: 383835
loss: 1.0086302757263184,grad_norm: 0.8821039059290142, iteration: 383836
loss: 1.0023518800735474,grad_norm: 0.9999990017642929, iteration: 383837
loss: 0.9878664016723633,grad_norm: 0.7330735194354352, iteration: 383838
loss: 1.033588171005249,grad_norm: 0.739799396640863, iteration: 383839
loss: 1.0117357969284058,grad_norm: 0.999999074754712, iteration: 383840
loss: 1.0426139831542969,grad_norm: 0.6760719011564018, iteration: 383841
loss: 0.96549391746521,grad_norm: 0.8220774765546409, iteration: 383842
loss: 1.0623677968978882,grad_norm: 0.9999990624148251, iteration: 383843
loss: 1.0124711990356445,grad_norm: 0.8548047705084685, iteration: 383844
loss: 1.026269555091858,grad_norm: 0.8703662949029954, iteration: 383845
loss: 1.1149688959121704,grad_norm: 0.9999998795809857, iteration: 383846
loss: 1.0811022520065308,grad_norm: 0.8574346699927173, iteration: 383847
loss: 1.0268718004226685,grad_norm: 0.9999993484714257, iteration: 383848
loss: 1.0273617506027222,grad_norm: 0.9999999516332699, iteration: 383849
loss: 1.0249966382980347,grad_norm: 0.9999994134024491, iteration: 383850
loss: 1.0255177021026611,grad_norm: 0.9481476042136878, iteration: 383851
loss: 1.0436545610427856,grad_norm: 0.9999994987460346, iteration: 383852
loss: 1.06300687789917,grad_norm: 0.9999995500679595, iteration: 383853
loss: 1.180591344833374,grad_norm: 0.999999900840116, iteration: 383854
loss: 1.172284483909607,grad_norm: 0.9999998661906108, iteration: 383855
loss: 0.9946321845054626,grad_norm: 0.9582506615071128, iteration: 383856
loss: 1.06261146068573,grad_norm: 0.8555248265326306, iteration: 383857
loss: 1.0035029649734497,grad_norm: 0.9999990823445103, iteration: 383858
loss: 0.990216851234436,grad_norm: 0.7031006995539592, iteration: 383859
loss: 0.9638127684593201,grad_norm: 0.7590777164542065, iteration: 383860
loss: 1.0040740966796875,grad_norm: 0.9999990351431074, iteration: 383861
loss: 1.002007007598877,grad_norm: 0.9918408924117074, iteration: 383862
loss: 0.981339693069458,grad_norm: 0.7644115013027135, iteration: 383863
loss: 1.0516070127487183,grad_norm: 0.9999992925082969, iteration: 383864
loss: 1.0114864110946655,grad_norm: 0.7075381624268833, iteration: 383865
loss: 1.022699236869812,grad_norm: 0.8526711455247105, iteration: 383866
loss: 1.0067387819290161,grad_norm: 0.8507504999321645, iteration: 383867
loss: 1.039527416229248,grad_norm: 0.8283094476197082, iteration: 383868
loss: 1.0231170654296875,grad_norm: 0.9999992028849308, iteration: 383869
loss: 0.9859721660614014,grad_norm: 0.8078149984953509, iteration: 383870
loss: 1.0083914995193481,grad_norm: 0.8829726927137609, iteration: 383871
loss: 1.0530049800872803,grad_norm: 0.9999990109101299, iteration: 383872
loss: 0.9948906898498535,grad_norm: 0.7805124448379026, iteration: 383873
loss: 1.0179810523986816,grad_norm: 0.999999034384371, iteration: 383874
loss: 1.0684857368469238,grad_norm: 0.9999996984778975, iteration: 383875
loss: 1.0046218633651733,grad_norm: 0.7589623178797537, iteration: 383876
loss: 1.0269237756729126,grad_norm: 0.9999990458797625, iteration: 383877
loss: 1.0043038129806519,grad_norm: 0.9999992470809829, iteration: 383878
loss: 1.071354866027832,grad_norm: 0.9999991250943494, iteration: 383879
loss: 0.9947922825813293,grad_norm: 0.8060236899326112, iteration: 383880
loss: 1.0586705207824707,grad_norm: 0.9434068500688594, iteration: 383881
loss: 0.9957192540168762,grad_norm: 0.9224691634316979, iteration: 383882
loss: 0.9893320202827454,grad_norm: 0.7372499879356852, iteration: 383883
loss: 1.0090575218200684,grad_norm: 0.8411351715682184, iteration: 383884
loss: 0.9910770058631897,grad_norm: 0.9135643537419275, iteration: 383885
loss: 1.0647684335708618,grad_norm: 0.9445940424136648, iteration: 383886
loss: 1.0205118656158447,grad_norm: 0.7276465898297297, iteration: 383887
loss: 1.0339967012405396,grad_norm: 0.84477036685897, iteration: 383888
loss: 1.0534303188323975,grad_norm: 0.7449496832708843, iteration: 383889
loss: 0.9544063210487366,grad_norm: 0.7234559279885784, iteration: 383890
loss: 1.0179738998413086,grad_norm: 0.8317358102642264, iteration: 383891
loss: 1.0160046815872192,grad_norm: 0.6767915127252595, iteration: 383892
loss: 1.0196622610092163,grad_norm: 0.9625968068005331, iteration: 383893
loss: 1.071577548980713,grad_norm: 0.9999995764655429, iteration: 383894
loss: 1.055384635925293,grad_norm: 0.8478077355249216, iteration: 383895
loss: 1.023911952972412,grad_norm: 0.7411559490076448, iteration: 383896
loss: 0.9500077962875366,grad_norm: 0.7373218186119572, iteration: 383897
loss: 1.018343210220337,grad_norm: 0.8231383474937658, iteration: 383898
loss: 1.020448088645935,grad_norm: 0.9892621387563212, iteration: 383899
loss: 1.0226131677627563,grad_norm: 0.8229889410783382, iteration: 383900
loss: 1.0145090818405151,grad_norm: 0.690588156542662, iteration: 383901
loss: 1.0629616975784302,grad_norm: 0.8656409768249578, iteration: 383902
loss: 1.00121009349823,grad_norm: 0.8583347320661545, iteration: 383903
loss: 1.00255286693573,grad_norm: 0.9999994950932821, iteration: 383904
loss: 1.0194787979125977,grad_norm: 0.999999091565241, iteration: 383905
loss: 1.0400395393371582,grad_norm: 0.790769885367371, iteration: 383906
loss: 1.224401593208313,grad_norm: 0.9999994983521276, iteration: 383907
loss: 1.0401968955993652,grad_norm: 0.999999118974156, iteration: 383908
loss: 1.0225920677185059,grad_norm: 0.9943002731347895, iteration: 383909
loss: 1.0019400119781494,grad_norm: 0.9389553260412227, iteration: 383910
loss: 1.0145002603530884,grad_norm: 0.6276812114999886, iteration: 383911
loss: 1.0439828634262085,grad_norm: 0.8443757114184514, iteration: 383912
loss: 1.014420986175537,grad_norm: 0.9999992507602329, iteration: 383913
loss: 0.9831628203392029,grad_norm: 0.7256226963497371, iteration: 383914
loss: 0.9824087023735046,grad_norm: 0.7599540958562612, iteration: 383915
loss: 1.0775072574615479,grad_norm: 0.9999991722361276, iteration: 383916
loss: 1.0313700437545776,grad_norm: 0.8647089284029345, iteration: 383917
loss: 1.036632776260376,grad_norm: 0.9999993627680414, iteration: 383918
loss: 1.0857415199279785,grad_norm: 0.6658937614240097, iteration: 383919
loss: 0.9882742762565613,grad_norm: 0.7891847093998214, iteration: 383920
loss: 1.039528489112854,grad_norm: 0.8036080123673757, iteration: 383921
loss: 0.986274242401123,grad_norm: 0.8782640279672251, iteration: 383922
loss: 0.9779849052429199,grad_norm: 0.7800432745831678, iteration: 383923
loss: 1.0068795680999756,grad_norm: 0.7363443661630468, iteration: 383924
loss: 0.9923115372657776,grad_norm: 0.8525285689857455, iteration: 383925
loss: 1.0744982957839966,grad_norm: 0.9999995032016414, iteration: 383926
loss: 1.0081232786178589,grad_norm: 0.7193080890230925, iteration: 383927
loss: 1.044048547744751,grad_norm: 1.0000000070979154, iteration: 383928
loss: 1.0466969013214111,grad_norm: 0.9999999103567621, iteration: 383929
loss: 1.021794319152832,grad_norm: 0.9999996081426938, iteration: 383930
loss: 1.2344441413879395,grad_norm: 0.999999645254935, iteration: 383931
loss: 1.058722734451294,grad_norm: 0.8097388432920152, iteration: 383932
loss: 1.0724352598190308,grad_norm: 0.999999220382488, iteration: 383933
loss: 0.9953306317329407,grad_norm: 0.8994889055124695, iteration: 383934
loss: 0.9584444761276245,grad_norm: 0.7358830794500145, iteration: 383935
loss: 1.024545431137085,grad_norm: 0.873755933885464, iteration: 383936
loss: 0.9964854717254639,grad_norm: 0.8301454329095777, iteration: 383937
loss: 0.9826420545578003,grad_norm: 0.7668125111778138, iteration: 383938
loss: 0.9755551218986511,grad_norm: 0.8124602749303367, iteration: 383939
loss: 1.0827187299728394,grad_norm: 0.9999996800291365, iteration: 383940
loss: 0.9651833176612854,grad_norm: 0.7987455093271748, iteration: 383941
loss: 0.9925127029418945,grad_norm: 0.9999997345276082, iteration: 383942
loss: 0.9987112283706665,grad_norm: 0.8036138373571498, iteration: 383943
loss: 0.9694443345069885,grad_norm: 0.7546740036782059, iteration: 383944
loss: 1.0521312952041626,grad_norm: 0.9999995368849235, iteration: 383945
loss: 1.0164575576782227,grad_norm: 0.8886121411527689, iteration: 383946
loss: 1.0466642379760742,grad_norm: 0.9997368292641082, iteration: 383947
loss: 1.0938373804092407,grad_norm: 0.999999323728637, iteration: 383948
loss: 1.0016148090362549,grad_norm: 0.8385614826575859, iteration: 383949
loss: 1.0046156644821167,grad_norm: 0.999999384103377, iteration: 383950
loss: 1.0378713607788086,grad_norm: 0.9999998739838235, iteration: 383951
loss: 1.0919448137283325,grad_norm: 0.9999999090720564, iteration: 383952
loss: 1.0351842641830444,grad_norm: 0.8651965624025137, iteration: 383953
loss: 1.023189902305603,grad_norm: 0.9255254462866647, iteration: 383954
loss: 0.9866291284561157,grad_norm: 0.7568008234753907, iteration: 383955
loss: 1.0113693475723267,grad_norm: 0.8237815257882006, iteration: 383956
loss: 1.0525034666061401,grad_norm: 0.9999996242062508, iteration: 383957
loss: 0.9659309983253479,grad_norm: 0.9485909145024095, iteration: 383958
loss: 0.9543404579162598,grad_norm: 0.7841468626373352, iteration: 383959
loss: 1.0926387310028076,grad_norm: 0.9999991195267732, iteration: 383960
loss: 0.9811134934425354,grad_norm: 0.705267522795249, iteration: 383961
loss: 1.033137559890747,grad_norm: 0.9999993606078836, iteration: 383962
loss: 0.9847740530967712,grad_norm: 0.7863932284531561, iteration: 383963
loss: 1.0341368913650513,grad_norm: 0.9999998768853903, iteration: 383964
loss: 1.033453345298767,grad_norm: 0.9999996474192432, iteration: 383965
loss: 1.0484100580215454,grad_norm: 0.999999558520538, iteration: 383966
loss: 1.0159194469451904,grad_norm: 0.9999994255813669, iteration: 383967
loss: 1.0285210609436035,grad_norm: 0.8164415112920735, iteration: 383968
loss: 1.0455387830734253,grad_norm: 0.9252056786721313, iteration: 383969
loss: 1.024825930595398,grad_norm: 0.9083251813379988, iteration: 383970
loss: 1.004996657371521,grad_norm: 0.9999997436652068, iteration: 383971
loss: 1.0985095500946045,grad_norm: 0.9999998852630637, iteration: 383972
loss: 1.1826800107955933,grad_norm: 0.999999856136665, iteration: 383973
loss: 1.0964808464050293,grad_norm: 0.9999996611583656, iteration: 383974
loss: 1.0102912187576294,grad_norm: 0.7389975251832726, iteration: 383975
loss: 1.0011119842529297,grad_norm: 0.9751934984791252, iteration: 383976
loss: 1.0570265054702759,grad_norm: 0.796433056550664, iteration: 383977
loss: 0.9906059503555298,grad_norm: 0.9999990593033736, iteration: 383978
loss: 0.9934089183807373,grad_norm: 0.8421329435107453, iteration: 383979
loss: 1.024600625038147,grad_norm: 0.9264662614462705, iteration: 383980
loss: 1.0413628816604614,grad_norm: 0.9055780469164733, iteration: 383981
loss: 0.9763389229774475,grad_norm: 0.8443785401048816, iteration: 383982
loss: 0.9905546307563782,grad_norm: 0.8076288023989412, iteration: 383983
loss: 0.9872458577156067,grad_norm: 0.8338884603399367, iteration: 383984
loss: 1.0221863985061646,grad_norm: 0.7568673933568965, iteration: 383985
loss: 1.0138317346572876,grad_norm: 0.9268214455310464, iteration: 383986
loss: 1.1471898555755615,grad_norm: 0.9999998612802978, iteration: 383987
loss: 1.003138542175293,grad_norm: 0.7654630095744425, iteration: 383988
loss: 1.0144288539886475,grad_norm: 0.6731112877703329, iteration: 383989
loss: 0.9991300106048584,grad_norm: 0.671058097869603, iteration: 383990
loss: 0.9982014894485474,grad_norm: 0.9999996173305873, iteration: 383991
loss: 0.9741647839546204,grad_norm: 0.8159033762925446, iteration: 383992
loss: 1.01677405834198,grad_norm: 0.9135614257271412, iteration: 383993
loss: 0.9942576885223389,grad_norm: 0.8099012912658095, iteration: 383994
loss: 0.9520310163497925,grad_norm: 0.9021033916940976, iteration: 383995
loss: 0.9899797439575195,grad_norm: 0.9269669086738004, iteration: 383996
loss: 1.0533554553985596,grad_norm: 0.7267702433351799, iteration: 383997
loss: 1.0533132553100586,grad_norm: 0.9999997734636736, iteration: 383998
loss: 1.1349875926971436,grad_norm: 0.9357155539479742, iteration: 383999
loss: 1.0609577894210815,grad_norm: 0.9999998009096837, iteration: 384000
loss: 0.9648738503456116,grad_norm: 0.7291397084712483, iteration: 384001
loss: 0.9948866963386536,grad_norm: 0.7366421412401147, iteration: 384002
loss: 0.9638076424598694,grad_norm: 0.8596158084196635, iteration: 384003
loss: 0.9898587465286255,grad_norm: 0.7822940695735598, iteration: 384004
loss: 1.0127384662628174,grad_norm: 0.8728635416998005, iteration: 384005
loss: 1.0116490125656128,grad_norm: 0.8555218018335455, iteration: 384006
loss: 1.0050156116485596,grad_norm: 0.6854355359668158, iteration: 384007
loss: 1.0574636459350586,grad_norm: 0.9999994483513115, iteration: 384008
loss: 1.0012751817703247,grad_norm: 0.7684822491682222, iteration: 384009
loss: 0.9996855854988098,grad_norm: 0.735254079722038, iteration: 384010
loss: 1.0037963390350342,grad_norm: 0.9999999272376832, iteration: 384011
loss: 0.9943299889564514,grad_norm: 0.9102788570469764, iteration: 384012
loss: 1.2247148752212524,grad_norm: 0.9999997385914502, iteration: 384013
loss: 0.9936339855194092,grad_norm: 0.8553653096405934, iteration: 384014
loss: 0.9832518100738525,grad_norm: 0.8209454134410431, iteration: 384015
loss: 0.9714125990867615,grad_norm: 0.7460261761847602, iteration: 384016
loss: 0.9847634434700012,grad_norm: 0.8307446087705921, iteration: 384017
loss: 1.0657652616500854,grad_norm: 0.9999999616281164, iteration: 384018
loss: 1.0252701044082642,grad_norm: 0.6691054076533939, iteration: 384019
loss: 1.0118417739868164,grad_norm: 0.7740904492886715, iteration: 384020
loss: 1.024062991142273,grad_norm: 0.6896745132562525, iteration: 384021
loss: 1.0613795518875122,grad_norm: 0.999999860033374, iteration: 384022
loss: 1.0488489866256714,grad_norm: 0.7574892865732655, iteration: 384023
loss: 0.9807319045066833,grad_norm: 0.8288165978250311, iteration: 384024
loss: 0.9805387258529663,grad_norm: 0.7769874864368139, iteration: 384025
loss: 1.008993148803711,grad_norm: 0.686952115826486, iteration: 384026
loss: 1.0107848644256592,grad_norm: 0.9999995751911015, iteration: 384027
loss: 1.0244492292404175,grad_norm: 0.7493838453117962, iteration: 384028
loss: 1.1158156394958496,grad_norm: 0.9999992054250687, iteration: 384029
loss: 1.0127028226852417,grad_norm: 0.9999989608426092, iteration: 384030
loss: 0.9619738459587097,grad_norm: 0.9999998641099295, iteration: 384031
loss: 1.0343354940414429,grad_norm: 0.9999995646006636, iteration: 384032
loss: 1.1094027757644653,grad_norm: 0.9999992135951411, iteration: 384033
loss: 1.0029395818710327,grad_norm: 0.6625025556003501, iteration: 384034
loss: 1.027508020401001,grad_norm: 0.9999996965350599, iteration: 384035
loss: 1.053185224533081,grad_norm: 0.892356666647808, iteration: 384036
loss: 1.0174707174301147,grad_norm: 0.7428980512941562, iteration: 384037
loss: 1.008942723274231,grad_norm: 0.7508184118694808, iteration: 384038
loss: 1.002130150794983,grad_norm: 0.8474338206801333, iteration: 384039
loss: 0.9439751505851746,grad_norm: 0.7672201792096288, iteration: 384040
loss: 1.0342988967895508,grad_norm: 0.9999992452012774, iteration: 384041
loss: 1.0007964372634888,grad_norm: 0.8076203907932251, iteration: 384042
loss: 0.9505059123039246,grad_norm: 0.8824588043314405, iteration: 384043
loss: 1.0418161153793335,grad_norm: 0.8762731555116101, iteration: 384044
loss: 0.9977485537528992,grad_norm: 0.6944939925953904, iteration: 384045
loss: 1.0183210372924805,grad_norm: 0.9565497458721212, iteration: 384046
loss: 1.0274527072906494,grad_norm: 0.6859958125572718, iteration: 384047
loss: 1.0063971281051636,grad_norm: 0.7193285046566202, iteration: 384048
loss: 1.007621169090271,grad_norm: 0.7902983886254366, iteration: 384049
loss: 1.0086084604263306,grad_norm: 0.9963163971033802, iteration: 384050
loss: 1.0077570676803589,grad_norm: 0.7900412589379457, iteration: 384051
loss: 0.993586003780365,grad_norm: 0.7037645209167606, iteration: 384052
loss: 0.9641056060791016,grad_norm: 0.7702102863741164, iteration: 384053
loss: 1.011637568473816,grad_norm: 0.8496609498587535, iteration: 384054
loss: 0.9957727193832397,grad_norm: 0.8114390496261303, iteration: 384055
loss: 0.9776511788368225,grad_norm: 0.7854490497109705, iteration: 384056
loss: 0.9785081744194031,grad_norm: 0.8542733782399135, iteration: 384057
loss: 0.974822461605072,grad_norm: 0.7373055171072155, iteration: 384058
loss: 1.0014007091522217,grad_norm: 0.9205348057983609, iteration: 384059
loss: 1.0066275596618652,grad_norm: 0.6851787166047607, iteration: 384060
loss: 1.0561535358428955,grad_norm: 0.9999993975125182, iteration: 384061
loss: 0.9994351863861084,grad_norm: 0.925756920363088, iteration: 384062
loss: 0.981173038482666,grad_norm: 0.7801832005591032, iteration: 384063
loss: 1.0071488618850708,grad_norm: 0.9999992205077083, iteration: 384064
loss: 1.0003662109375,grad_norm: 0.6712296274887534, iteration: 384065
loss: 0.9959452152252197,grad_norm: 0.7377337191686582, iteration: 384066
loss: 0.9779412746429443,grad_norm: 0.9040399439385038, iteration: 384067
loss: 0.9985915422439575,grad_norm: 0.8372537714400363, iteration: 384068
loss: 1.0231074094772339,grad_norm: 0.7485142092605205, iteration: 384069
loss: 1.0431137084960938,grad_norm: 0.8048698233717559, iteration: 384070
loss: 0.9660406112670898,grad_norm: 0.7051849599003094, iteration: 384071
loss: 0.9502582550048828,grad_norm: 0.838684032632097, iteration: 384072
loss: 1.0201928615570068,grad_norm: 0.843061275215284, iteration: 384073
loss: 1.0186781883239746,grad_norm: 0.8350765301055654, iteration: 384074
loss: 1.008675456047058,grad_norm: 0.8138144712641399, iteration: 384075
loss: 1.112473726272583,grad_norm: 0.9999990913132607, iteration: 384076
loss: 1.1290827989578247,grad_norm: 0.9999997289194917, iteration: 384077
loss: 0.9820172786712646,grad_norm: 0.7782259859318447, iteration: 384078
loss: 0.9904881715774536,grad_norm: 0.8977835119775409, iteration: 384079
loss: 0.9964848160743713,grad_norm: 0.9999990439451046, iteration: 384080
loss: 0.9794996380805969,grad_norm: 0.9999991693174395, iteration: 384081
loss: 0.9995473027229309,grad_norm: 0.9278087626364488, iteration: 384082
loss: 1.0166114568710327,grad_norm: 0.9547383952509367, iteration: 384083
loss: 1.0013216733932495,grad_norm: 0.9443308722056654, iteration: 384084
loss: 0.984106719493866,grad_norm: 0.9145622196097941, iteration: 384085
loss: 0.9766719937324524,grad_norm: 0.7841648307630364, iteration: 384086
loss: 0.9883947968482971,grad_norm: 0.7383089139599472, iteration: 384087
loss: 1.03755784034729,grad_norm: 0.9999989267210185, iteration: 384088
loss: 1.018756628036499,grad_norm: 0.6417586273792999, iteration: 384089
loss: 1.021911859512329,grad_norm: 0.8736781039920618, iteration: 384090
loss: 0.992855429649353,grad_norm: 0.9211512369393564, iteration: 384091
loss: 1.0010334253311157,grad_norm: 0.809868137797847, iteration: 384092
loss: 1.0095700025558472,grad_norm: 0.9197746477598576, iteration: 384093
loss: 0.9562641382217407,grad_norm: 0.8854260961648447, iteration: 384094
loss: 1.0150929689407349,grad_norm: 0.889703350197508, iteration: 384095
loss: 1.03192937374115,grad_norm: 0.6832573166822754, iteration: 384096
loss: 1.0182477235794067,grad_norm: 0.9999997198267775, iteration: 384097
loss: 0.968590259552002,grad_norm: 0.9088090885153526, iteration: 384098
loss: 0.9527494311332703,grad_norm: 0.7674583841324136, iteration: 384099
loss: 1.0223536491394043,grad_norm: 0.9999996712991661, iteration: 384100
loss: 0.9882945418357849,grad_norm: 0.7677541243981656, iteration: 384101
loss: 1.0302835702896118,grad_norm: 0.9999995034844423, iteration: 384102
loss: 0.9986077547073364,grad_norm: 0.7758632952816067, iteration: 384103
loss: 1.0210195779800415,grad_norm: 0.8505378858346349, iteration: 384104
loss: 0.9701284766197205,grad_norm: 0.8781058585352035, iteration: 384105
loss: 1.0180026292800903,grad_norm: 0.8499231232537062, iteration: 384106
loss: 1.031516432762146,grad_norm: 0.8312625485448517, iteration: 384107
loss: 1.018730878829956,grad_norm: 0.9999995504131167, iteration: 384108
loss: 0.9873915910720825,grad_norm: 0.7119565379139234, iteration: 384109
loss: 1.0642937421798706,grad_norm: 0.999999755163215, iteration: 384110
loss: 1.0248667001724243,grad_norm: 0.867282840519521, iteration: 384111
loss: 1.0608642101287842,grad_norm: 0.6464710784699602, iteration: 384112
loss: 1.033765196800232,grad_norm: 0.9721607074271672, iteration: 384113
loss: 0.9775385856628418,grad_norm: 0.7730632816825697, iteration: 384114
loss: 0.9959756135940552,grad_norm: 0.9531665768598676, iteration: 384115
loss: 1.0267854928970337,grad_norm: 0.7675088115902573, iteration: 384116
loss: 0.9702100157737732,grad_norm: 0.9432320810369418, iteration: 384117
loss: 0.9685874581336975,grad_norm: 0.8259712682610812, iteration: 384118
loss: 1.0102955102920532,grad_norm: 0.8344851833267551, iteration: 384119
loss: 0.9754335284233093,grad_norm: 0.9690922216896957, iteration: 384120
loss: 1.0210988521575928,grad_norm: 0.9999993788634124, iteration: 384121
loss: 1.025665521621704,grad_norm: 0.8204366152812629, iteration: 384122
loss: 0.9970852136611938,grad_norm: 0.7442831126987439, iteration: 384123
loss: 1.1261643171310425,grad_norm: 0.9999997971300881, iteration: 384124
loss: 1.0611836910247803,grad_norm: 0.9999997216102704, iteration: 384125
loss: 1.054032325744629,grad_norm: 0.999999840466115, iteration: 384126
loss: 1.0132750272750854,grad_norm: 0.9999994677655502, iteration: 384127
loss: 0.9956453442573547,grad_norm: 0.9999990727459769, iteration: 384128
loss: 0.9955732822418213,grad_norm: 0.8196384926943008, iteration: 384129
loss: 0.976046085357666,grad_norm: 0.9999994914455486, iteration: 384130
loss: 1.052319884300232,grad_norm: 0.9594556784729996, iteration: 384131
loss: 0.9967185854911804,grad_norm: 0.7466577072218326, iteration: 384132
loss: 1.070313572883606,grad_norm: 0.8082396419011921, iteration: 384133
loss: 0.9958102107048035,grad_norm: 0.8516834289517289, iteration: 384134
loss: 0.9723585247993469,grad_norm: 0.7330020930846423, iteration: 384135
loss: 0.965880811214447,grad_norm: 0.8610109719529875, iteration: 384136
loss: 0.9886383414268494,grad_norm: 0.7137737404953918, iteration: 384137
loss: 1.0290427207946777,grad_norm: 0.9999996589211424, iteration: 384138
loss: 1.0536072254180908,grad_norm: 0.9999997419341317, iteration: 384139
loss: 1.0132825374603271,grad_norm: 0.7241607196667441, iteration: 384140
loss: 1.0245367288589478,grad_norm: 0.87797308041137, iteration: 384141
loss: 0.9795677065849304,grad_norm: 0.999999393830065, iteration: 384142
loss: 1.0023223161697388,grad_norm: 0.7781267334707129, iteration: 384143
loss: 0.946278989315033,grad_norm: 0.7690769428391303, iteration: 384144
loss: 0.9844570755958557,grad_norm: 0.910808563466879, iteration: 384145
loss: 0.9724821448326111,grad_norm: 0.7802347809076271, iteration: 384146
loss: 0.999443531036377,grad_norm: 0.89469526239602, iteration: 384147
loss: 1.0399580001831055,grad_norm: 0.8326308728717411, iteration: 384148
loss: 0.9606409668922424,grad_norm: 0.8797327363387603, iteration: 384149
loss: 1.0631601810455322,grad_norm: 0.9999992197852453, iteration: 384150
loss: 1.0212790966033936,grad_norm: 0.7672768056851057, iteration: 384151
loss: 0.9573789834976196,grad_norm: 0.8095015562234914, iteration: 384152
loss: 1.0706636905670166,grad_norm: 0.9013577821684763, iteration: 384153
loss: 1.0140503644943237,grad_norm: 0.7707047383724854, iteration: 384154
loss: 0.9969844222068787,grad_norm: 0.608223352988334, iteration: 384155
loss: 1.015601634979248,grad_norm: 0.7999563832958072, iteration: 384156
loss: 1.017299771308899,grad_norm: 0.7335907282844175, iteration: 384157
loss: 1.0146766901016235,grad_norm: 0.7955277088808762, iteration: 384158
loss: 1.0168375968933105,grad_norm: 0.7666707357321961, iteration: 384159
loss: 1.0577927827835083,grad_norm: 0.9999992967117208, iteration: 384160
loss: 1.0049504041671753,grad_norm: 0.7463137929016825, iteration: 384161
loss: 1.012808084487915,grad_norm: 0.6851506345578738, iteration: 384162
loss: 1.134725570678711,grad_norm: 0.9432536968423321, iteration: 384163
loss: 1.0053223371505737,grad_norm: 0.7318098408568942, iteration: 384164
loss: 0.9852181077003479,grad_norm: 0.828348837754191, iteration: 384165
loss: 1.0002912282943726,grad_norm: 0.8619537228656629, iteration: 384166
loss: 0.9806503653526306,grad_norm: 0.8514470561639227, iteration: 384167
loss: 1.0139158964157104,grad_norm: 0.782490370981387, iteration: 384168
loss: 0.9850354194641113,grad_norm: 0.8781298345775383, iteration: 384169
loss: 1.0446208715438843,grad_norm: 0.8108744238001934, iteration: 384170
loss: 1.0326930284500122,grad_norm: 0.999999257819991, iteration: 384171
loss: 1.032922387123108,grad_norm: 0.8145502552731728, iteration: 384172
loss: 1.0467616319656372,grad_norm: 0.9999992666873573, iteration: 384173
loss: 0.9979148507118225,grad_norm: 0.8307412917020318, iteration: 384174
loss: 0.9936901926994324,grad_norm: 0.8186340228376264, iteration: 384175
loss: 1.0737769603729248,grad_norm: 0.9999991348175162, iteration: 384176
loss: 0.9803863167762756,grad_norm: 0.7229489313074875, iteration: 384177
loss: 0.9784688353538513,grad_norm: 0.9432796976378687, iteration: 384178
loss: 1.0688430070877075,grad_norm: 0.762511121839385, iteration: 384179
loss: 0.9671227335929871,grad_norm: 0.7360583143591138, iteration: 384180
loss: 0.9711424708366394,grad_norm: 0.8501259417177663, iteration: 384181
loss: 0.9964897036552429,grad_norm: 0.8104638228321469, iteration: 384182
loss: 1.0263535976409912,grad_norm: 0.9643700423867062, iteration: 384183
loss: 0.9824033379554749,grad_norm: 0.7864434248795259, iteration: 384184
loss: 0.9753884673118591,grad_norm: 0.8499971628748987, iteration: 384185
loss: 1.0140275955200195,grad_norm: 0.8895787251582639, iteration: 384186
loss: 1.0425056219100952,grad_norm: 0.6673008167423965, iteration: 384187
loss: 0.989306628704071,grad_norm: 0.7190359697802068, iteration: 384188
loss: 0.9595063328742981,grad_norm: 0.8755431910077409, iteration: 384189
loss: 1.0224311351776123,grad_norm: 0.8998301879885027, iteration: 384190
loss: 0.9778732657432556,grad_norm: 0.78852162795673, iteration: 384191
loss: 1.0391236543655396,grad_norm: 0.8166817463899163, iteration: 384192
loss: 1.0063871145248413,grad_norm: 0.6759034933228313, iteration: 384193
loss: 0.983234167098999,grad_norm: 0.9999993084930131, iteration: 384194
loss: 1.0569193363189697,grad_norm: 0.999999915379395, iteration: 384195
loss: 1.0035005807876587,grad_norm: 0.9999990124800455, iteration: 384196
loss: 1.1234358549118042,grad_norm: 0.9999996194763673, iteration: 384197
loss: 1.1733063459396362,grad_norm: 0.9999996304416345, iteration: 384198
loss: 1.0063199996948242,grad_norm: 0.6972784644390184, iteration: 384199
loss: 0.9945194721221924,grad_norm: 0.9550149277273953, iteration: 384200
loss: 1.0062899589538574,grad_norm: 0.9157826131879891, iteration: 384201
loss: 1.0159080028533936,grad_norm: 0.9999997068366531, iteration: 384202
loss: 0.9701316356658936,grad_norm: 0.8873707125351319, iteration: 384203
loss: 1.0542707443237305,grad_norm: 0.9999993433061214, iteration: 384204
loss: 1.0484613180160522,grad_norm: 0.7716386163354582, iteration: 384205
loss: 1.005173921585083,grad_norm: 0.7550545916659582, iteration: 384206
loss: 1.0173356533050537,grad_norm: 0.8023582544761747, iteration: 384207
loss: 0.9853416085243225,grad_norm: 0.7308053550181443, iteration: 384208
loss: 0.982577919960022,grad_norm: 0.6433517418560892, iteration: 384209
loss: 1.020401120185852,grad_norm: 0.7122466103614412, iteration: 384210
loss: 1.0071436166763306,grad_norm: 0.7984207264172727, iteration: 384211
loss: 1.1402722597122192,grad_norm: 0.9999996569881215, iteration: 384212
loss: 1.107269287109375,grad_norm: 0.9999991942039211, iteration: 384213
loss: 1.0496582984924316,grad_norm: 0.9999990603496284, iteration: 384214
loss: 1.0009922981262207,grad_norm: 0.7445123051240414, iteration: 384215
loss: 1.012113332748413,grad_norm: 0.9560705319503072, iteration: 384216
loss: 0.9921543002128601,grad_norm: 0.7310533227777382, iteration: 384217
loss: 1.0066453218460083,grad_norm: 0.7360375459141701, iteration: 384218
loss: 1.0444954633712769,grad_norm: 0.9999994002742572, iteration: 384219
loss: 0.9736951589584351,grad_norm: 0.6998338254192802, iteration: 384220
loss: 1.0270053148269653,grad_norm: 0.9999995935200798, iteration: 384221
loss: 1.2442768812179565,grad_norm: 0.9999999751217172, iteration: 384222
loss: 1.001388669013977,grad_norm: 0.8307984056075968, iteration: 384223
loss: 1.0160273313522339,grad_norm: 0.8374217957524808, iteration: 384224
loss: 1.0856742858886719,grad_norm: 0.8800273552021752, iteration: 384225
loss: 1.0021841526031494,grad_norm: 0.8412630240874832, iteration: 384226
loss: 1.004134178161621,grad_norm: 0.9316127264026424, iteration: 384227
loss: 1.0162385702133179,grad_norm: 0.8831782327967754, iteration: 384228
loss: 0.9698860049247742,grad_norm: 0.9999997183807638, iteration: 384229
loss: 1.03035569190979,grad_norm: 0.7647968824173148, iteration: 384230
loss: 0.9464682340621948,grad_norm: 0.835697958297084, iteration: 384231
loss: 1.001370906829834,grad_norm: 0.9236630043017843, iteration: 384232
loss: 0.9784933924674988,grad_norm: 0.6950430292098508, iteration: 384233
loss: 1.0359046459197998,grad_norm: 0.7620820959999691, iteration: 384234
loss: 1.0032129287719727,grad_norm: 0.7280486148923462, iteration: 384235
loss: 0.9795103073120117,grad_norm: 0.7941613962012299, iteration: 384236
loss: 1.090591549873352,grad_norm: 0.9999990467669505, iteration: 384237
loss: 0.9976968765258789,grad_norm: 0.966142366635653, iteration: 384238
loss: 0.9789100289344788,grad_norm: 0.8870206992900488, iteration: 384239
loss: 0.9819247126579285,grad_norm: 0.7626718187299512, iteration: 384240
loss: 0.9913681149482727,grad_norm: 0.9574787785879798, iteration: 384241
loss: 0.9783056974411011,grad_norm: 0.7375038203277278, iteration: 384242
loss: 0.9857587814331055,grad_norm: 0.7603520299444948, iteration: 384243
loss: 0.9465749263763428,grad_norm: 0.6190419179735475, iteration: 384244
loss: 1.0293081998825073,grad_norm: 0.9999994978716958, iteration: 384245
loss: 1.0192439556121826,grad_norm: 0.999999374990244, iteration: 384246
loss: 1.0121285915374756,grad_norm: 0.9999990657208279, iteration: 384247
loss: 0.9838635921478271,grad_norm: 0.7057507675947238, iteration: 384248
loss: 0.9770516753196716,grad_norm: 0.7192647005964863, iteration: 384249
loss: 0.9754309058189392,grad_norm: 0.796716632210029, iteration: 384250
loss: 1.034224033355713,grad_norm: 0.6986738047532605, iteration: 384251
loss: 0.9906248450279236,grad_norm: 0.7736439204300453, iteration: 384252
loss: 1.0163352489471436,grad_norm: 0.8342065536765626, iteration: 384253
loss: 0.9905781149864197,grad_norm: 0.8614827558640117, iteration: 384254
loss: 1.0183253288269043,grad_norm: 0.7960546658666778, iteration: 384255
loss: 0.984696090221405,grad_norm: 0.8099417321350401, iteration: 384256
loss: 0.9789207577705383,grad_norm: 0.9385899904147117, iteration: 384257
loss: 1.010098934173584,grad_norm: 0.88923396618401, iteration: 384258
loss: 0.9593513011932373,grad_norm: 0.7951451903039862, iteration: 384259
loss: 0.9934674501419067,grad_norm: 0.9999992087826013, iteration: 384260
loss: 0.9724333882331848,grad_norm: 0.7823896404020466, iteration: 384261
loss: 1.0022975206375122,grad_norm: 0.8949552999624284, iteration: 384262
loss: 1.000261902809143,grad_norm: 0.7692937251723042, iteration: 384263
loss: 0.985306441783905,grad_norm: 0.8374340166994121, iteration: 384264
loss: 1.0299971103668213,grad_norm: 0.999999884181705, iteration: 384265
loss: 1.0498449802398682,grad_norm: 0.8136701145990021, iteration: 384266
loss: 1.017423391342163,grad_norm: 0.8055649569190774, iteration: 384267
loss: 1.0066030025482178,grad_norm: 0.6544281605207232, iteration: 384268
loss: 1.0433448553085327,grad_norm: 0.7211032976214697, iteration: 384269
loss: 0.9829602241516113,grad_norm: 0.9721674142861537, iteration: 384270
loss: 1.0211673974990845,grad_norm: 0.8676411871690078, iteration: 384271
loss: 0.9702479243278503,grad_norm: 0.8371376675013438, iteration: 384272
loss: 1.0766499042510986,grad_norm: 0.98915885354253, iteration: 384273
loss: 0.975487470626831,grad_norm: 0.8584281514202979, iteration: 384274
loss: 0.9875791668891907,grad_norm: 0.6930583887513452, iteration: 384275
loss: 1.0125564336776733,grad_norm: 0.8004873629424337, iteration: 384276
loss: 1.0313706398010254,grad_norm: 0.8729727270671042, iteration: 384277
loss: 1.0218883752822876,grad_norm: 0.8282116890768251, iteration: 384278
loss: 0.9472571015357971,grad_norm: 0.7859055209613816, iteration: 384279
loss: 0.9958569407463074,grad_norm: 0.999999227281302, iteration: 384280
loss: 0.9983462691307068,grad_norm: 0.9406416946729758, iteration: 384281
loss: 1.0462108850479126,grad_norm: 0.9999992023396771, iteration: 384282
loss: 1.0742101669311523,grad_norm: 0.999999103294441, iteration: 384283
loss: 1.0443426370620728,grad_norm: 0.8239621671136171, iteration: 384284
loss: 1.0189368724822998,grad_norm: 0.6463246790655738, iteration: 384285
loss: 1.0883843898773193,grad_norm: 0.8393243184453467, iteration: 384286
loss: 1.0122274160385132,grad_norm: 0.7757960018065476, iteration: 384287
loss: 0.9642925262451172,grad_norm: 0.8301086326489405, iteration: 384288
loss: 0.9718590378761292,grad_norm: 0.7310758317069752, iteration: 384289
loss: 1.0237202644348145,grad_norm: 0.7705932011461227, iteration: 384290
loss: 0.9549161195755005,grad_norm: 0.77026098626737, iteration: 384291
loss: 0.9960803985595703,grad_norm: 0.8807553238042272, iteration: 384292
loss: 1.025338888168335,grad_norm: 0.9232723634135016, iteration: 384293
loss: 0.9954120516777039,grad_norm: 0.9909967061697745, iteration: 384294
loss: 1.0093203783035278,grad_norm: 0.7453275249627326, iteration: 384295
loss: 1.0262078046798706,grad_norm: 0.9999992860620559, iteration: 384296
loss: 1.0103819370269775,grad_norm: 0.7426493815502799, iteration: 384297
loss: 1.004685640335083,grad_norm: 0.9253034875425421, iteration: 384298
loss: 0.9910603761672974,grad_norm: 0.7431386930854617, iteration: 384299
loss: 1.0007216930389404,grad_norm: 0.9836574694888478, iteration: 384300
loss: 1.0453182458877563,grad_norm: 0.9425076714496667, iteration: 384301
loss: 1.0249868631362915,grad_norm: 0.9999992456444534, iteration: 384302
loss: 0.9722175598144531,grad_norm: 0.7199184303813019, iteration: 384303
loss: 1.030182123184204,grad_norm: 0.7889120626656366, iteration: 384304
loss: 0.9963950514793396,grad_norm: 0.9701054563373224, iteration: 384305
loss: 1.010266900062561,grad_norm: 0.629919522367001, iteration: 384306
loss: 1.0419131517410278,grad_norm: 0.8729929408544908, iteration: 384307
loss: 1.0261743068695068,grad_norm: 0.7503458127290239, iteration: 384308
loss: 0.9945212006568909,grad_norm: 0.9028344278417924, iteration: 384309
loss: 1.0190402269363403,grad_norm: 0.741655342013739, iteration: 384310
loss: 1.008887767791748,grad_norm: 0.7782149354952362, iteration: 384311
loss: 1.0070618391036987,grad_norm: 0.7582333614037534, iteration: 384312
loss: 1.0244853496551514,grad_norm: 0.999998971314704, iteration: 384313
loss: 0.990868091583252,grad_norm: 0.8729083547614155, iteration: 384314
loss: 0.999817430973053,grad_norm: 0.7902949026877176, iteration: 384315
loss: 0.9837933778762817,grad_norm: 0.9999996433726617, iteration: 384316
loss: 1.0280704498291016,grad_norm: 0.999999507821572, iteration: 384317
loss: 1.04520845413208,grad_norm: 0.9999997092057149, iteration: 384318
loss: 0.9939358830451965,grad_norm: 0.8824812091375929, iteration: 384319
loss: 1.0399681329727173,grad_norm: 0.9999990656613471, iteration: 384320
loss: 0.9622865915298462,grad_norm: 0.7964657838371254, iteration: 384321
loss: 1.0135005712509155,grad_norm: 0.810095066120111, iteration: 384322
loss: 1.0352269411087036,grad_norm: 0.7644524957582434, iteration: 384323
loss: 1.0239765644073486,grad_norm: 0.8056375119293895, iteration: 384324
loss: 0.970264732837677,grad_norm: 0.9039403065503474, iteration: 384325
loss: 0.9740481376647949,grad_norm: 0.7344340155005316, iteration: 384326
loss: 1.009788155555725,grad_norm: 0.9999990271214662, iteration: 384327
loss: 1.0638285875320435,grad_norm: 0.9999991763409337, iteration: 384328
loss: 1.0265167951583862,grad_norm: 0.8113355237444343, iteration: 384329
loss: 0.9957758784294128,grad_norm: 0.741819687519007, iteration: 384330
loss: 1.0046989917755127,grad_norm: 0.9999999048650026, iteration: 384331
loss: 1.003282904624939,grad_norm: 0.9999999050136785, iteration: 384332
loss: 1.0162513256072998,grad_norm: 0.8208251760572373, iteration: 384333
loss: 1.0166226625442505,grad_norm: 0.779555708830279, iteration: 384334
loss: 0.9921435117721558,grad_norm: 0.7871657450432636, iteration: 384335
loss: 1.0166071653366089,grad_norm: 0.9900158508292259, iteration: 384336
loss: 1.0128337144851685,grad_norm: 0.9524983675275274, iteration: 384337
loss: 0.9983789324760437,grad_norm: 0.8090842094236271, iteration: 384338
loss: 0.9960340261459351,grad_norm: 0.822591562198439, iteration: 384339
loss: 0.9847659468650818,grad_norm: 0.7643986060437431, iteration: 384340
loss: 1.0414097309112549,grad_norm: 0.9999991035287029, iteration: 384341
loss: 1.0033382177352905,grad_norm: 0.8944953498698877, iteration: 384342
loss: 0.9895267486572266,grad_norm: 1.0000000265073554, iteration: 384343
loss: 1.005326271057129,grad_norm: 0.6853986095377814, iteration: 384344
loss: 0.971798300743103,grad_norm: 0.8517817708050861, iteration: 384345
loss: 0.9809055328369141,grad_norm: 0.8940411131150545, iteration: 384346
loss: 0.9783035516738892,grad_norm: 0.8243242016058917, iteration: 384347
loss: 1.0098872184753418,grad_norm: 0.9999990970494756, iteration: 384348
loss: 1.028294324874878,grad_norm: 0.9780486252667965, iteration: 384349
loss: 1.0352451801300049,grad_norm: 0.9999993922455905, iteration: 384350
loss: 0.9993634819984436,grad_norm: 0.8039347023642934, iteration: 384351
loss: 1.0724989175796509,grad_norm: 0.8616677584450341, iteration: 384352
loss: 1.0127760171890259,grad_norm: 0.8787166157150601, iteration: 384353
loss: 1.0144017934799194,grad_norm: 0.7952684893988841, iteration: 384354
loss: 1.0077556371688843,grad_norm: 0.7710448236327598, iteration: 384355
loss: 0.9599164724349976,grad_norm: 0.8176771332697659, iteration: 384356
loss: 1.022638201713562,grad_norm: 0.708784599709985, iteration: 384357
loss: 1.0558083057403564,grad_norm: 0.7390112889742136, iteration: 384358
loss: 1.0015637874603271,grad_norm: 0.9776321058067207, iteration: 384359
loss: 0.9888827800750732,grad_norm: 0.9999993962194941, iteration: 384360
loss: 1.0344432592391968,grad_norm: 0.7456956262925665, iteration: 384361
loss: 1.0327738523483276,grad_norm: 0.9999991245296624, iteration: 384362
loss: 1.0614821910858154,grad_norm: 0.9999996403517605, iteration: 384363
loss: 1.1040271520614624,grad_norm: 0.9999995940252907, iteration: 384364
loss: 1.074751853942871,grad_norm: 0.7739774012325, iteration: 384365
loss: 0.9864712953567505,grad_norm: 0.9999990135201526, iteration: 384366
loss: 0.9986581206321716,grad_norm: 0.8476131163056173, iteration: 384367
loss: 0.9999851584434509,grad_norm: 0.7612554626795633, iteration: 384368
loss: 0.9912929534912109,grad_norm: 0.9891452365280506, iteration: 384369
loss: 1.0110702514648438,grad_norm: 0.862701689508602, iteration: 384370
loss: 0.9915836453437805,grad_norm: 0.8425314025159952, iteration: 384371
loss: 1.006262183189392,grad_norm: 0.8388033211078857, iteration: 384372
loss: 1.0149286985397339,grad_norm: 0.999999032204223, iteration: 384373
loss: 0.9721264243125916,grad_norm: 0.9156465091107102, iteration: 384374
loss: 1.0385360717773438,grad_norm: 0.7390467570611209, iteration: 384375
loss: 1.019180178642273,grad_norm: 0.8399258913182395, iteration: 384376
loss: 1.0259755849838257,grad_norm: 0.9999992551539533, iteration: 384377
loss: 1.0491259098052979,grad_norm: 0.7865837810853947, iteration: 384378
loss: 0.9776566624641418,grad_norm: 0.9999990774303349, iteration: 384379
loss: 1.0235837697982788,grad_norm: 0.8891797235050453, iteration: 384380
loss: 1.0217068195343018,grad_norm: 0.9999992445386341, iteration: 384381
loss: 0.9948531985282898,grad_norm: 0.8583203269197525, iteration: 384382
loss: 1.025912880897522,grad_norm: 0.8150481728809214, iteration: 384383
loss: 1.0035181045532227,grad_norm: 0.8089143892378408, iteration: 384384
loss: 0.9450879693031311,grad_norm: 0.7818218809274915, iteration: 384385
loss: 1.0709606409072876,grad_norm: 0.9999997731895001, iteration: 384386
loss: 0.9939604997634888,grad_norm: 0.766391149004792, iteration: 384387
loss: 0.9532084465026855,grad_norm: 0.7742749034778984, iteration: 384388
loss: 1.1122976541519165,grad_norm: 0.936404715340719, iteration: 384389
loss: 0.9923089146614075,grad_norm: 0.9999991116921512, iteration: 384390
loss: 1.0049614906311035,grad_norm: 0.7776559714580945, iteration: 384391
loss: 1.0657192468643188,grad_norm: 0.9999998731272735, iteration: 384392
loss: 1.0785012245178223,grad_norm: 0.9999999057867609, iteration: 384393
loss: 1.0277732610702515,grad_norm: 0.760031941666387, iteration: 384394
loss: 0.9850354194641113,grad_norm: 0.8492262625360564, iteration: 384395
loss: 1.019911289215088,grad_norm: 0.7667273833897974, iteration: 384396
loss: 0.9651826024055481,grad_norm: 0.8995281219850012, iteration: 384397
loss: 1.0274113416671753,grad_norm: 0.8521075924232746, iteration: 384398
loss: 0.9936122894287109,grad_norm: 0.9999992257828229, iteration: 384399
loss: 1.0590859651565552,grad_norm: 0.9300304196790661, iteration: 384400
loss: 1.0017777681350708,grad_norm: 0.8256000482447138, iteration: 384401
loss: 0.9920472502708435,grad_norm: 0.7957801014329362, iteration: 384402
loss: 1.0029354095458984,grad_norm: 0.7039375569951649, iteration: 384403
loss: 0.9738041162490845,grad_norm: 0.833163822388718, iteration: 384404
loss: 0.972800612449646,grad_norm: 0.9999996622330453, iteration: 384405
loss: 0.9923660159111023,grad_norm: 0.9999997664507652, iteration: 384406
loss: 1.0094059705734253,grad_norm: 0.6607814995320418, iteration: 384407
loss: 1.0766981840133667,grad_norm: 0.9999998629723073, iteration: 384408
loss: 0.9854063391685486,grad_norm: 0.9999997867146886, iteration: 384409
loss: 0.9638233780860901,grad_norm: 0.7188856813637563, iteration: 384410
loss: 1.0199954509735107,grad_norm: 0.9999990691471555, iteration: 384411
loss: 0.9960353374481201,grad_norm: 0.8801436233336639, iteration: 384412
loss: 1.0403045415878296,grad_norm: 0.8152896588372962, iteration: 384413
loss: 1.0557446479797363,grad_norm: 0.999999523958473, iteration: 384414
loss: 1.0459171533584595,grad_norm: 0.9999995086537441, iteration: 384415
loss: 0.989777684211731,grad_norm: 0.7474493044559777, iteration: 384416
loss: 1.0291361808776855,grad_norm: 0.7979908340972174, iteration: 384417
loss: 1.0434504747390747,grad_norm: 0.8983877432078405, iteration: 384418
loss: 1.0012317895889282,grad_norm: 0.7178365921967036, iteration: 384419
loss: 0.986208438873291,grad_norm: 0.819329903007455, iteration: 384420
loss: 1.0076500177383423,grad_norm: 0.6946004233141999, iteration: 384421
loss: 0.9838697910308838,grad_norm: 0.7172240776242534, iteration: 384422
loss: 0.9890235066413879,grad_norm: 0.7803720333063175, iteration: 384423
loss: 0.9935883283615112,grad_norm: 0.885740552562185, iteration: 384424
loss: 0.9960004687309265,grad_norm: 0.9601309213781365, iteration: 384425
loss: 0.9802606105804443,grad_norm: 0.8836005651531507, iteration: 384426
loss: 0.9729474782943726,grad_norm: 0.7809760464299614, iteration: 384427
loss: 0.9635311365127563,grad_norm: 0.8469495372508598, iteration: 384428
loss: 1.0060008764266968,grad_norm: 0.8486887535399756, iteration: 384429
loss: 1.024854302406311,grad_norm: 0.760351545839981, iteration: 384430
loss: 0.9766818881034851,grad_norm: 0.8597714929498701, iteration: 384431
loss: 1.0031005144119263,grad_norm: 0.6345078554104445, iteration: 384432
loss: 1.0457292795181274,grad_norm: 0.8884657694373085, iteration: 384433
loss: 1.0290939807891846,grad_norm: 0.7564685848660738, iteration: 384434
loss: 0.9906841516494751,grad_norm: 0.7334205148567284, iteration: 384435
loss: 1.138842225074768,grad_norm: 0.9616764204912311, iteration: 384436
loss: 1.0268030166625977,grad_norm: 0.6266803334621452, iteration: 384437
loss: 1.015081763267517,grad_norm: 0.814531356262827, iteration: 384438
loss: 0.9846790432929993,grad_norm: 0.692461337154319, iteration: 384439
loss: 0.9848260283470154,grad_norm: 0.8192718248445233, iteration: 384440
loss: 1.0253856182098389,grad_norm: 0.8191406280205299, iteration: 384441
loss: 0.9904636144638062,grad_norm: 0.9783725657614277, iteration: 384442
loss: 0.9891035556793213,grad_norm: 0.7579160099353512, iteration: 384443
loss: 1.0490504503250122,grad_norm: 0.9999990726602408, iteration: 384444
loss: 0.9986999034881592,grad_norm: 0.788124811490623, iteration: 384445
loss: 1.0258541107177734,grad_norm: 0.9059292520242733, iteration: 384446
loss: 1.0058640241622925,grad_norm: 0.7470314261819186, iteration: 384447
loss: 0.9855114817619324,grad_norm: 0.770986833862455, iteration: 384448
loss: 1.0097362995147705,grad_norm: 0.9154692574335208, iteration: 384449
loss: 1.0151715278625488,grad_norm: 0.7988145574259203, iteration: 384450
loss: 0.9758353233337402,grad_norm: 0.7285892765630503, iteration: 384451
loss: 0.9948763251304626,grad_norm: 0.7641750279024337, iteration: 384452
loss: 1.0557072162628174,grad_norm: 0.9999990373925988, iteration: 384453
loss: 1.0387625694274902,grad_norm: 0.8817688084862593, iteration: 384454
loss: 1.144768476486206,grad_norm: 0.9999991303116097, iteration: 384455
loss: 1.01163911819458,grad_norm: 0.8179435255363636, iteration: 384456
loss: 1.0379953384399414,grad_norm: 0.999999730236824, iteration: 384457
loss: 1.0229294300079346,grad_norm: 0.8326088447549497, iteration: 384458
loss: 1.0944527387619019,grad_norm: 0.8982807313409285, iteration: 384459
loss: 0.9952576160430908,grad_norm: 0.6974579886193375, iteration: 384460
loss: 1.0456362962722778,grad_norm: 0.9999999173992485, iteration: 384461
loss: 0.9980878233909607,grad_norm: 0.7969577062962864, iteration: 384462
loss: 0.9577916264533997,grad_norm: 0.7281960462032204, iteration: 384463
loss: 1.025146722793579,grad_norm: 0.8470144955564629, iteration: 384464
loss: 1.058323621749878,grad_norm: 0.849220094620861, iteration: 384465
loss: 1.0416738986968994,grad_norm: 0.9108726734659212, iteration: 384466
loss: 1.0296967029571533,grad_norm: 0.7165309835409744, iteration: 384467
loss: 1.0398191213607788,grad_norm: 0.9999999002146375, iteration: 384468
loss: 1.0363727807998657,grad_norm: 0.7322362799844832, iteration: 384469
loss: 1.0115817785263062,grad_norm: 0.704883892817588, iteration: 384470
loss: 1.0342580080032349,grad_norm: 0.999999536750223, iteration: 384471
loss: 0.9983537793159485,grad_norm: 0.7598172908168012, iteration: 384472
loss: 0.9977501630783081,grad_norm: 0.8237262084993918, iteration: 384473
loss: 0.9935128092765808,grad_norm: 0.8500170485857561, iteration: 384474
loss: 0.990086555480957,grad_norm: 0.7518747330001161, iteration: 384475
loss: 1.0035902261734009,grad_norm: 0.7290228601191588, iteration: 384476
loss: 1.0162009000778198,grad_norm: 0.8574050980413656, iteration: 384477
loss: 0.9855832457542419,grad_norm: 0.8799683584297482, iteration: 384478
loss: 1.018652081489563,grad_norm: 0.7308872954178562, iteration: 384479
loss: 0.9877080917358398,grad_norm: 0.6912205732452942, iteration: 384480
loss: 1.0291211605072021,grad_norm: 0.7875758269706064, iteration: 384481
loss: 1.0160462856292725,grad_norm: 0.9758948296018343, iteration: 384482
loss: 1.0068925619125366,grad_norm: 0.8648293809030544, iteration: 384483
loss: 0.9598402380943298,grad_norm: 0.7871224774020283, iteration: 384484
loss: 0.9925926327705383,grad_norm: 0.9446040704783601, iteration: 384485
loss: 1.0241514444351196,grad_norm: 0.7304346108664774, iteration: 384486
loss: 1.0408231019973755,grad_norm: 0.8244150621888251, iteration: 384487
loss: 1.050842046737671,grad_norm: 0.6901617169714319, iteration: 384488
loss: 1.026279330253601,grad_norm: 0.8363381090717478, iteration: 384489
loss: 1.0322048664093018,grad_norm: 0.8368266418272345, iteration: 384490
loss: 0.9908671379089355,grad_norm: 0.7558214680060339, iteration: 384491
loss: 0.9606688022613525,grad_norm: 0.6425569911421236, iteration: 384492
loss: 1.0227375030517578,grad_norm: 0.9039860521942977, iteration: 384493
loss: 1.0508872270584106,grad_norm: 1.0000000714624986, iteration: 384494
loss: 1.053928256034851,grad_norm: 0.8972902436098328, iteration: 384495
loss: 1.1041796207427979,grad_norm: 0.9999993019030343, iteration: 384496
loss: 1.0758538246154785,grad_norm: 0.9999990012563393, iteration: 384497
loss: 1.0005605220794678,grad_norm: 0.8357277751786594, iteration: 384498
loss: 0.9944421052932739,grad_norm: 0.7874268143562202, iteration: 384499
loss: 1.0102444887161255,grad_norm: 0.8630289431069675, iteration: 384500
loss: 0.9502810835838318,grad_norm: 0.879842929407788, iteration: 384501
loss: 0.9908061027526855,grad_norm: 0.9943633558442269, iteration: 384502
loss: 1.0150971412658691,grad_norm: 0.6267407195465383, iteration: 384503
loss: 0.9967426657676697,grad_norm: 0.9999991071798499, iteration: 384504
loss: 0.989406406879425,grad_norm: 0.7167329247494729, iteration: 384505
loss: 0.9949132800102234,grad_norm: 0.8279918110472227, iteration: 384506
loss: 0.9968454241752625,grad_norm: 0.7530978325213677, iteration: 384507
loss: 1.086650013923645,grad_norm: 0.8797814561078431, iteration: 384508
loss: 0.9898690581321716,grad_norm: 0.7544083984873544, iteration: 384509
loss: 1.0830024480819702,grad_norm: 0.9999993200048095, iteration: 384510
loss: 0.9951159954071045,grad_norm: 0.9999990914174561, iteration: 384511
loss: 1.0169651508331299,grad_norm: 0.7610368571438274, iteration: 384512
loss: 0.9949473142623901,grad_norm: 0.8120111535850489, iteration: 384513
loss: 1.015916109085083,grad_norm: 0.9999990915053719, iteration: 384514
loss: 0.9813772439956665,grad_norm: 0.8659160161346112, iteration: 384515
loss: 0.9968985915184021,grad_norm: 0.7656053372900485, iteration: 384516
loss: 0.9735180139541626,grad_norm: 0.9999990934196522, iteration: 384517
loss: 1.0701676607131958,grad_norm: 0.9999995757462704, iteration: 384518
loss: 0.9885575175285339,grad_norm: 0.7283991955832823, iteration: 384519
loss: 0.9950359463691711,grad_norm: 0.769309528022445, iteration: 384520
loss: 1.0320932865142822,grad_norm: 0.9999990910093137, iteration: 384521
loss: 1.0045580863952637,grad_norm: 0.9999994153886949, iteration: 384522
loss: 0.9960960745811462,grad_norm: 0.932336091949082, iteration: 384523
loss: 0.9669784903526306,grad_norm: 0.8619413037749608, iteration: 384524
loss: 1.0089610815048218,grad_norm: 0.8225252554057071, iteration: 384525
loss: 0.9710838794708252,grad_norm: 0.9126700302638526, iteration: 384526
loss: 0.9826661944389343,grad_norm: 0.7442209817411212, iteration: 384527
loss: 1.0036981105804443,grad_norm: 0.6674353988976743, iteration: 384528
loss: 1.0113024711608887,grad_norm: 0.8689226744072949, iteration: 384529
loss: 0.9971723556518555,grad_norm: 0.7667855109966414, iteration: 384530
loss: 1.0297436714172363,grad_norm: 0.9530709508895783, iteration: 384531
loss: 1.0010020732879639,grad_norm: 0.7595979129752731, iteration: 384532
loss: 1.0118653774261475,grad_norm: 0.7462575457722112, iteration: 384533
loss: 1.050963044166565,grad_norm: 0.7203683737980957, iteration: 384534
loss: 0.9708161354064941,grad_norm: 0.8667301825296664, iteration: 384535
loss: 1.0155766010284424,grad_norm: 0.9174993408617526, iteration: 384536
loss: 0.9931584000587463,grad_norm: 0.7900470912467209, iteration: 384537
loss: 1.0032813549041748,grad_norm: 0.8445815707572754, iteration: 384538
loss: 1.047881841659546,grad_norm: 0.9999993115480323, iteration: 384539
loss: 1.0154982805252075,grad_norm: 0.8133708020442313, iteration: 384540
loss: 0.9963207840919495,grad_norm: 0.7780076328301905, iteration: 384541
loss: 1.0562710762023926,grad_norm: 0.778797739591747, iteration: 384542
loss: 0.9689239263534546,grad_norm: 0.8723794367161151, iteration: 384543
loss: 1.0048737525939941,grad_norm: 0.6840395357643568, iteration: 384544
loss: 1.0919277667999268,grad_norm: 0.7890986882443253, iteration: 384545
loss: 1.0966185331344604,grad_norm: 0.7357608489337404, iteration: 384546
loss: 1.0119856595993042,grad_norm: 0.7273822204533038, iteration: 384547
loss: 1.0338951349258423,grad_norm: 0.9999992652947137, iteration: 384548
loss: 0.9936953186988831,grad_norm: 0.7536731197235177, iteration: 384549
loss: 1.0466701984405518,grad_norm: 0.9999990906347844, iteration: 384550
loss: 1.0022214651107788,grad_norm: 0.8517870239952233, iteration: 384551
loss: 0.9754631519317627,grad_norm: 0.9999991418082386, iteration: 384552
loss: 1.0197817087173462,grad_norm: 0.9999994180859653, iteration: 384553
loss: 0.9858278632164001,grad_norm: 0.7416633270669986, iteration: 384554
loss: 1.0445665121078491,grad_norm: 1.0000000144704233, iteration: 384555
loss: 0.9440443515777588,grad_norm: 0.8643183386617522, iteration: 384556
loss: 1.0506901741027832,grad_norm: 0.737860965254715, iteration: 384557
loss: 0.9996474981307983,grad_norm: 0.7486306641482324, iteration: 384558
loss: 1.0168893337249756,grad_norm: 0.8626857026402976, iteration: 384559
loss: 0.967128574848175,grad_norm: 0.845015087852677, iteration: 384560
loss: 1.0215110778808594,grad_norm: 0.8314764929770924, iteration: 384561
loss: 0.9961239099502563,grad_norm: 0.8665279619277836, iteration: 384562
loss: 1.0175529718399048,grad_norm: 0.7036599686184968, iteration: 384563
loss: 1.052739143371582,grad_norm: 0.9337922841969684, iteration: 384564
loss: 1.0028947591781616,grad_norm: 0.7658232950777055, iteration: 384565
loss: 1.0836118459701538,grad_norm: 0.9999990301346383, iteration: 384566
loss: 0.974032461643219,grad_norm: 0.926249214657959, iteration: 384567
loss: 0.9759833216667175,grad_norm: 0.6797864895291028, iteration: 384568
loss: 0.9734182953834534,grad_norm: 0.8185863708762929, iteration: 384569
loss: 1.0231298208236694,grad_norm: 0.989079942624094, iteration: 384570
loss: 1.059946060180664,grad_norm: 0.9805748645581843, iteration: 384571
loss: 1.0025620460510254,grad_norm: 0.656493907479752, iteration: 384572
loss: 1.0279864072799683,grad_norm: 0.8501821245995035, iteration: 384573
loss: 0.9614036083221436,grad_norm: 0.963698316863137, iteration: 384574
loss: 1.0202616453170776,grad_norm: 0.8588271335194149, iteration: 384575
loss: 1.0015523433685303,grad_norm: 0.8216999215840407, iteration: 384576
loss: 0.9940950870513916,grad_norm: 0.7399001295396369, iteration: 384577
loss: 0.9919536709785461,grad_norm: 0.8946984758665772, iteration: 384578
loss: 1.0018033981323242,grad_norm: 0.9328626959136701, iteration: 384579
loss: 0.9851366877555847,grad_norm: 0.8545059636934448, iteration: 384580
loss: 0.9861587285995483,grad_norm: 0.8459334123657174, iteration: 384581
loss: 0.9862208962440491,grad_norm: 0.7893723489799341, iteration: 384582
loss: 1.0182498693466187,grad_norm: 0.9999997118406355, iteration: 384583
loss: 0.9829663634300232,grad_norm: 0.7987367648895701, iteration: 384584
loss: 0.9919785261154175,grad_norm: 0.8472579351949632, iteration: 384585
loss: 1.010740041732788,grad_norm: 0.8154902640704895, iteration: 384586
loss: 0.9826265573501587,grad_norm: 0.9999992202811565, iteration: 384587
loss: 0.9949742555618286,grad_norm: 0.6771991625175362, iteration: 384588
loss: 0.9805480241775513,grad_norm: 0.8609691673011173, iteration: 384589
loss: 0.9871930480003357,grad_norm: 0.8080572955021321, iteration: 384590
loss: 0.9704378247261047,grad_norm: 0.845828372553535, iteration: 384591
loss: 0.96871018409729,grad_norm: 0.7342340728031852, iteration: 384592
loss: 0.994343638420105,grad_norm: 0.7043610394904085, iteration: 384593
loss: 1.0546150207519531,grad_norm: 0.8658676520159325, iteration: 384594
loss: 1.0190000534057617,grad_norm: 0.7535920439076371, iteration: 384595
loss: 1.0375702381134033,grad_norm: 0.7311527137027382, iteration: 384596
loss: 0.9792713522911072,grad_norm: 0.9999998612525041, iteration: 384597
loss: 1.1166050434112549,grad_norm: 0.9999992669947444, iteration: 384598
loss: 1.0279443264007568,grad_norm: 0.740182082577179, iteration: 384599
loss: 1.0893275737762451,grad_norm: 0.8279133355893541, iteration: 384600
loss: 1.0196738243103027,grad_norm: 0.9999997556750121, iteration: 384601
loss: 0.9847144484519958,grad_norm: 0.8609738880352458, iteration: 384602
loss: 0.9803998470306396,grad_norm: 0.7724587608477842, iteration: 384603
loss: 0.9750427603721619,grad_norm: 0.7963490345350455, iteration: 384604
loss: 1.0103164911270142,grad_norm: 0.8176453813925159, iteration: 384605
loss: 1.014838457107544,grad_norm: 0.6717735581418501, iteration: 384606
loss: 0.9838587641716003,grad_norm: 0.9220560778789325, iteration: 384607
loss: 0.9918302297592163,grad_norm: 0.9134646215437002, iteration: 384608
loss: 0.9982622861862183,grad_norm: 0.9999991693553607, iteration: 384609
loss: 1.0921504497528076,grad_norm: 0.9999990552594545, iteration: 384610
loss: 0.9487193822860718,grad_norm: 0.6437666808779359, iteration: 384611
loss: 1.035309076309204,grad_norm: 0.8886831118408713, iteration: 384612
loss: 1.1105177402496338,grad_norm: 0.9999995048292076, iteration: 384613
loss: 1.0049132108688354,grad_norm: 0.9999999180716395, iteration: 384614
loss: 0.9874154925346375,grad_norm: 0.7163106028882096, iteration: 384615
loss: 0.999620795249939,grad_norm: 0.7685655896792192, iteration: 384616
loss: 1.0391634702682495,grad_norm: 0.7203794615339186, iteration: 384617
loss: 1.0102956295013428,grad_norm: 0.8064149061933579, iteration: 384618
loss: 0.9567317366600037,grad_norm: 0.9986538297040182, iteration: 384619
loss: 1.0208426713943481,grad_norm: 0.8648351086850217, iteration: 384620
loss: 1.0216169357299805,grad_norm: 0.904404560640212, iteration: 384621
loss: 1.0175087451934814,grad_norm: 0.9065567981853169, iteration: 384622
loss: 1.0031782388687134,grad_norm: 0.7868750560076054, iteration: 384623
loss: 0.9876556396484375,grad_norm: 0.8381264934866423, iteration: 384624
loss: 0.9659155607223511,grad_norm: 0.8196801193477423, iteration: 384625
loss: 0.9808467030525208,grad_norm: 0.7781893015124705, iteration: 384626
loss: 0.9809184074401855,grad_norm: 0.7834942769853385, iteration: 384627
loss: 1.0268465280532837,grad_norm: 0.8950363694028434, iteration: 384628
loss: 0.994918167591095,grad_norm: 0.7485993446846876, iteration: 384629
loss: 0.9995595812797546,grad_norm: 0.7106630170288062, iteration: 384630
loss: 0.9750703573226929,grad_norm: 0.9999993576353979, iteration: 384631
loss: 0.9772510528564453,grad_norm: 0.9999995640844017, iteration: 384632
loss: 1.0032978057861328,grad_norm: 0.763663389863884, iteration: 384633
loss: 1.031680941581726,grad_norm: 0.8489948525957077, iteration: 384634
loss: 1.0209413766860962,grad_norm: 0.9999990356910278, iteration: 384635
loss: 1.004166603088379,grad_norm: 0.8905694153383299, iteration: 384636
loss: 1.0194203853607178,grad_norm: 0.9095591721639358, iteration: 384637
loss: 1.018757700920105,grad_norm: 0.7780808372120351, iteration: 384638
loss: 1.0131537914276123,grad_norm: 0.978889372426884, iteration: 384639
loss: 1.000291347503662,grad_norm: 0.9999994918523449, iteration: 384640
loss: 0.9881788492202759,grad_norm: 0.9999989977041827, iteration: 384641
loss: 1.0111421346664429,grad_norm: 0.9999997680914403, iteration: 384642
loss: 0.9886348843574524,grad_norm: 0.753429393592682, iteration: 384643
loss: 1.0372456312179565,grad_norm: 0.8874264372268302, iteration: 384644
loss: 1.0003665685653687,grad_norm: 0.6596944138803619, iteration: 384645
loss: 1.0091122388839722,grad_norm: 0.6362189493013661, iteration: 384646
loss: 0.9860674142837524,grad_norm: 0.7728597612399551, iteration: 384647
loss: 1.0679571628570557,grad_norm: 0.9999995254206662, iteration: 384648
loss: 1.008834958076477,grad_norm: 0.9999990513262837, iteration: 384649
loss: 1.002722144126892,grad_norm: 0.9999992376523702, iteration: 384650
loss: 0.9525591731071472,grad_norm: 0.7722067779386089, iteration: 384651
loss: 1.0065851211547852,grad_norm: 0.6963584977711264, iteration: 384652
loss: 1.0099126100540161,grad_norm: 0.7239015954254285, iteration: 384653
loss: 1.0272302627563477,grad_norm: 0.7943832953804683, iteration: 384654
loss: 1.016278624534607,grad_norm: 0.920649315458041, iteration: 384655
loss: 0.9930221438407898,grad_norm: 0.7138155462226636, iteration: 384656
loss: 0.9744166135787964,grad_norm: 0.7019861356021452, iteration: 384657
loss: 1.027608036994934,grad_norm: 0.9824463919922545, iteration: 384658
loss: 1.0358493328094482,grad_norm: 0.7887663330663695, iteration: 384659
loss: 1.0284528732299805,grad_norm: 0.8424062548551455, iteration: 384660
loss: 1.00128173828125,grad_norm: 0.8427629306666623, iteration: 384661
loss: 0.9745653867721558,grad_norm: 0.9999995926333177, iteration: 384662
loss: 0.9834330081939697,grad_norm: 0.9999991652064902, iteration: 384663
loss: 1.0090333223342896,grad_norm: 0.7732782596194195, iteration: 384664
loss: 1.0129001140594482,grad_norm: 0.855876374803013, iteration: 384665
loss: 1.0026664733886719,grad_norm: 0.9411390816162085, iteration: 384666
loss: 0.9786348938941956,grad_norm: 0.9999989471162002, iteration: 384667
loss: 1.019149899482727,grad_norm: 0.7429708505881636, iteration: 384668
loss: 1.017959475517273,grad_norm: 0.723518360052828, iteration: 384669
loss: 1.0058038234710693,grad_norm: 0.9370502217289316, iteration: 384670
loss: 0.9722332954406738,grad_norm: 0.7381923050281448, iteration: 384671
loss: 1.0014787912368774,grad_norm: 0.9999991009943957, iteration: 384672
loss: 1.0164822340011597,grad_norm: 0.6706469206402879, iteration: 384673
loss: 1.0591330528259277,grad_norm: 0.8503638274392723, iteration: 384674
loss: 1.0089952945709229,grad_norm: 0.8286722318265987, iteration: 384675
loss: 1.0028046369552612,grad_norm: 0.7715454436452672, iteration: 384676
loss: 1.0038809776306152,grad_norm: 0.9999992565316708, iteration: 384677
loss: 1.0291035175323486,grad_norm: 0.8539749274063108, iteration: 384678
loss: 1.0241607427597046,grad_norm: 0.9999990414247945, iteration: 384679
loss: 1.058478593826294,grad_norm: 0.999999545227094, iteration: 384680
loss: 0.9713749289512634,grad_norm: 0.7380336346496782, iteration: 384681
loss: 1.0097044706344604,grad_norm: 0.8747112645685187, iteration: 384682
loss: 0.9965894818305969,grad_norm: 0.8378667303020993, iteration: 384683
loss: 0.9545725584030151,grad_norm: 0.8384096863112507, iteration: 384684
loss: 1.0836387872695923,grad_norm: 0.9999991642091253, iteration: 384685
loss: 1.016880989074707,grad_norm: 0.8307320189295108, iteration: 384686
loss: 0.9975824952125549,grad_norm: 0.9035986174285489, iteration: 384687
loss: 1.0021589994430542,grad_norm: 0.9999999746408513, iteration: 384688
loss: 1.0331676006317139,grad_norm: 0.8137904516345559, iteration: 384689
loss: 1.0378185510635376,grad_norm: 0.8254458763346461, iteration: 384690
loss: 1.0295188426971436,grad_norm: 0.8958824331246449, iteration: 384691
loss: 1.0320895910263062,grad_norm: 0.7122169752377864, iteration: 384692
loss: 0.960574746131897,grad_norm: 0.7801921020786748, iteration: 384693
loss: 0.9895287156105042,grad_norm: 0.8257619022825997, iteration: 384694
loss: 1.0026969909667969,grad_norm: 0.7841456586422834, iteration: 384695
loss: 1.074857473373413,grad_norm: 0.8136881778082236, iteration: 384696
loss: 0.9825002551078796,grad_norm: 0.7356112544548935, iteration: 384697
loss: 1.0200872421264648,grad_norm: 0.9999999690082525, iteration: 384698
loss: 0.9967077374458313,grad_norm: 0.9999995156371747, iteration: 384699
loss: 0.9907378554344177,grad_norm: 0.9332686933632497, iteration: 384700
loss: 1.0046584606170654,grad_norm: 0.9131096823951054, iteration: 384701
loss: 0.9881089329719543,grad_norm: 0.8999150736031609, iteration: 384702
loss: 0.9777666926383972,grad_norm: 0.8625169121104151, iteration: 384703
loss: 0.9742533564567566,grad_norm: 0.7399433561987869, iteration: 384704
loss: 1.0397847890853882,grad_norm: 0.9667579876976056, iteration: 384705
loss: 1.032180905342102,grad_norm: 0.8105082434585437, iteration: 384706
loss: 1.0116232633590698,grad_norm: 0.9999993078209108, iteration: 384707
loss: 1.0384620428085327,grad_norm: 0.8256676813731274, iteration: 384708
loss: 0.9643477201461792,grad_norm: 0.6852438548361187, iteration: 384709
loss: 1.0388782024383545,grad_norm: 0.6871763920640591, iteration: 384710
loss: 1.0065577030181885,grad_norm: 0.7633704688268982, iteration: 384711
loss: 0.9857901930809021,grad_norm: 0.9999998295435013, iteration: 384712
loss: 1.040414571762085,grad_norm: 0.9999991221326661, iteration: 384713
loss: 0.9889520406723022,grad_norm: 0.8461458294148856, iteration: 384714
loss: 1.0383851528167725,grad_norm: 0.9999993330336521, iteration: 384715
loss: 0.9970494508743286,grad_norm: 0.9524356583785476, iteration: 384716
loss: 0.9729812145233154,grad_norm: 0.9999998383816039, iteration: 384717
loss: 1.0244777202606201,grad_norm: 0.6474460756446516, iteration: 384718
loss: 0.9687305092811584,grad_norm: 0.7489027948853203, iteration: 384719
loss: 1.0038632154464722,grad_norm: 0.8441316690551347, iteration: 384720
loss: 1.0635724067687988,grad_norm: 0.8511710377762183, iteration: 384721
loss: 1.020736813545227,grad_norm: 0.8050212772982411, iteration: 384722
loss: 0.9938594698905945,grad_norm: 0.8425711088859038, iteration: 384723
loss: 1.0188844203948975,grad_norm: 0.999999700814276, iteration: 384724
loss: 1.009611964225769,grad_norm: 0.6592681092542296, iteration: 384725
loss: 1.0033912658691406,grad_norm: 0.6094456475945864, iteration: 384726
loss: 1.0281469821929932,grad_norm: 0.9946641168718591, iteration: 384727
loss: 0.9909935593605042,grad_norm: 0.775560814615356, iteration: 384728
loss: 1.0378446578979492,grad_norm: 0.636755226513581, iteration: 384729
loss: 1.1820083856582642,grad_norm: 0.9999996035072909, iteration: 384730
loss: 1.0038211345672607,grad_norm: 0.6583948309669642, iteration: 384731
loss: 1.003613829612732,grad_norm: 0.8415625596635509, iteration: 384732
loss: 1.0245667695999146,grad_norm: 0.8040857192867011, iteration: 384733
loss: 1.0078037977218628,grad_norm: 0.8325186156540165, iteration: 384734
loss: 0.9829394817352295,grad_norm: 0.7165418073619974, iteration: 384735
loss: 0.9729193449020386,grad_norm: 0.7589869312091171, iteration: 384736
loss: 1.019363284111023,grad_norm: 0.7377629119308357, iteration: 384737
loss: 0.9648801684379578,grad_norm: 0.7796638567395934, iteration: 384738
loss: 1.0081818103790283,grad_norm: 0.725206173103052, iteration: 384739
loss: 1.0150450468063354,grad_norm: 0.8804246483402037, iteration: 384740
loss: 1.0051374435424805,grad_norm: 0.9285272701368126, iteration: 384741
loss: 1.0236684083938599,grad_norm: 0.9999993679867619, iteration: 384742
loss: 1.0892040729522705,grad_norm: 0.9999994702907737, iteration: 384743
loss: 1.0264713764190674,grad_norm: 0.7596634787082297, iteration: 384744
loss: 1.005312442779541,grad_norm: 0.7007559918046926, iteration: 384745
loss: 0.990559995174408,grad_norm: 0.6472660052487249, iteration: 384746
loss: 1.0712451934814453,grad_norm: 0.9999993233032757, iteration: 384747
loss: 1.02292001247406,grad_norm: 1.0000000011047323, iteration: 384748
loss: 1.074541449546814,grad_norm: 0.9999991369058346, iteration: 384749
loss: 0.9954655766487122,grad_norm: 0.7123321403216555, iteration: 384750
loss: 1.0384305715560913,grad_norm: 0.9999994511591931, iteration: 384751
loss: 1.0901405811309814,grad_norm: 0.9432110462365566, iteration: 384752
loss: 1.0464588403701782,grad_norm: 0.8768279911274282, iteration: 384753
loss: 1.055901050567627,grad_norm: 0.8077247319671569, iteration: 384754
loss: 0.9878042340278625,grad_norm: 0.7752871051517743, iteration: 384755
loss: 1.0049245357513428,grad_norm: 0.8730111379152465, iteration: 384756
loss: 1.0227361917495728,grad_norm: 0.878139126001023, iteration: 384757
loss: 1.0227669477462769,grad_norm: 0.7162872531537464, iteration: 384758
loss: 1.016618013381958,grad_norm: 0.7721911127501957, iteration: 384759
loss: 1.0533840656280518,grad_norm: 0.8010047323847491, iteration: 384760
loss: 1.0100970268249512,grad_norm: 0.657831637438086, iteration: 384761
loss: 1.0619966983795166,grad_norm: 0.9999991369935823, iteration: 384762
loss: 0.9695522785186768,grad_norm: 0.6966328819529274, iteration: 384763
loss: 1.045731544494629,grad_norm: 0.901257850951824, iteration: 384764
loss: 0.9903325438499451,grad_norm: 0.999999492284959, iteration: 384765
loss: 1.0137550830841064,grad_norm: 0.7111501454904079, iteration: 384766
loss: 1.014158010482788,grad_norm: 0.8971160428984527, iteration: 384767
loss: 0.9982245564460754,grad_norm: 0.7357535720437597, iteration: 384768
loss: 1.0449397563934326,grad_norm: 0.8527337777691667, iteration: 384769
loss: 0.9780397415161133,grad_norm: 0.7389756673769341, iteration: 384770
loss: 0.991612434387207,grad_norm: 0.7278979309171846, iteration: 384771
loss: 1.0535439252853394,grad_norm: 0.999999119940807, iteration: 384772
loss: 0.9838338494300842,grad_norm: 0.8479152453468579, iteration: 384773
loss: 1.1074111461639404,grad_norm: 0.9999993332449006, iteration: 384774
loss: 1.027178168296814,grad_norm: 0.7666582497269064, iteration: 384775
loss: 1.0345425605773926,grad_norm: 0.8301256356978091, iteration: 384776
loss: 1.0042572021484375,grad_norm: 0.9999993204259832, iteration: 384777
loss: 1.0337655544281006,grad_norm: 0.8700738802502491, iteration: 384778
loss: 1.0079575777053833,grad_norm: 0.7112707993842079, iteration: 384779
loss: 0.9831832051277161,grad_norm: 0.8540576047753091, iteration: 384780
loss: 1.0042320489883423,grad_norm: 0.6331495118274036, iteration: 384781
loss: 1.1254851818084717,grad_norm: 0.9999997437146185, iteration: 384782
loss: 1.0130574703216553,grad_norm: 0.6353997458452079, iteration: 384783
loss: 1.0920631885528564,grad_norm: 0.9999999617058081, iteration: 384784
loss: 1.0209929943084717,grad_norm: 0.6767809683628595, iteration: 384785
loss: 0.9858064651489258,grad_norm: 0.7809066726049707, iteration: 384786
loss: 0.9654274582862854,grad_norm: 0.7091220698426718, iteration: 384787
loss: 1.0044623613357544,grad_norm: 0.7584840107721593, iteration: 384788
loss: 0.9794370532035828,grad_norm: 0.7638996236399344, iteration: 384789
loss: 1.0277602672576904,grad_norm: 0.8678714140669408, iteration: 384790
loss: 1.0016283988952637,grad_norm: 0.9565133162720857, iteration: 384791
loss: 1.0066115856170654,grad_norm: 0.7603355103736174, iteration: 384792
loss: 0.9930330514907837,grad_norm: 0.9999989236616484, iteration: 384793
loss: 1.0170494318008423,grad_norm: 0.9999992830548007, iteration: 384794
loss: 1.0179553031921387,grad_norm: 0.8328873286089007, iteration: 384795
loss: 1.097182273864746,grad_norm: 0.841524939296796, iteration: 384796
loss: 1.0161454677581787,grad_norm: 0.7370636717781978, iteration: 384797
loss: 1.0149949789047241,grad_norm: 0.8533958861628018, iteration: 384798
loss: 1.0544317960739136,grad_norm: 0.7042215247549912, iteration: 384799
loss: 1.0190727710723877,grad_norm: 0.8760642201273994, iteration: 384800
loss: 1.046816349029541,grad_norm: 0.9999990933863897, iteration: 384801
loss: 0.978646457195282,grad_norm: 0.9999990804546177, iteration: 384802
loss: 0.9525545835494995,grad_norm: 0.853437009261263, iteration: 384803
loss: 1.034758448600769,grad_norm: 0.999999128465388, iteration: 384804
loss: 0.9655445218086243,grad_norm: 0.9210655346747691, iteration: 384805
loss: 0.99012690782547,grad_norm: 0.8545118392139386, iteration: 384806
loss: 1.0291990041732788,grad_norm: 0.9652277803503015, iteration: 384807
loss: 1.022238850593567,grad_norm: 0.9040660024209858, iteration: 384808
loss: 1.0147992372512817,grad_norm: 0.823501608571912, iteration: 384809
loss: 0.972790539264679,grad_norm: 0.6501596742617752, iteration: 384810
loss: 1.0767531394958496,grad_norm: 0.8391031861752567, iteration: 384811
loss: 1.0105210542678833,grad_norm: 0.725588465386397, iteration: 384812
loss: 1.0233287811279297,grad_norm: 0.7710753614700313, iteration: 384813
loss: 1.086423635482788,grad_norm: 0.9999993811613636, iteration: 384814
loss: 1.0106630325317383,grad_norm: 0.7958230240055486, iteration: 384815
loss: 0.9960765838623047,grad_norm: 0.9625053664555336, iteration: 384816
loss: 0.995330274105072,grad_norm: 0.8470644049470611, iteration: 384817
loss: 1.0007551908493042,grad_norm: 0.6906876731169318, iteration: 384818
loss: 1.0449563264846802,grad_norm: 0.9999993865330654, iteration: 384819
loss: 1.0254944562911987,grad_norm: 0.8823195451277684, iteration: 384820
loss: 0.9860518574714661,grad_norm: 0.7624497647095633, iteration: 384821
loss: 1.0808180570602417,grad_norm: 0.9999990863430663, iteration: 384822
loss: 0.9926353096961975,grad_norm: 0.7679234562535284, iteration: 384823
loss: 1.0008158683776855,grad_norm: 0.7186162970059944, iteration: 384824
loss: 1.006445050239563,grad_norm: 0.7563861039654582, iteration: 384825
loss: 1.0145939588546753,grad_norm: 0.787192123490765, iteration: 384826
loss: 1.0965774059295654,grad_norm: 0.8169000447184714, iteration: 384827
loss: 0.9958296418190002,grad_norm: 0.9999996634212834, iteration: 384828
loss: 1.0045210123062134,grad_norm: 0.7320300658683282, iteration: 384829
loss: 1.0218100547790527,grad_norm: 0.8272498506415734, iteration: 384830
loss: 0.9811099171638489,grad_norm: 0.6792287566800741, iteration: 384831
loss: 1.0249388217926025,grad_norm: 0.7851165908581631, iteration: 384832
loss: 1.020692229270935,grad_norm: 0.7187791574853525, iteration: 384833
loss: 1.0312626361846924,grad_norm: 0.7058491108014299, iteration: 384834
loss: 1.0061436891555786,grad_norm: 0.9039279907864753, iteration: 384835
loss: 0.9998703002929688,grad_norm: 0.930817788098451, iteration: 384836
loss: 1.0125988721847534,grad_norm: 0.9999999456590026, iteration: 384837
loss: 1.0561579465866089,grad_norm: 0.9999992380864186, iteration: 384838
loss: 1.0319472551345825,grad_norm: 0.8356697254595831, iteration: 384839
loss: 0.997283399105072,grad_norm: 0.8302796969005359, iteration: 384840
loss: 1.020554780960083,grad_norm: 0.9999994569564837, iteration: 384841
loss: 1.004716396331787,grad_norm: 0.8522485586916846, iteration: 384842
loss: 0.9882133603096008,grad_norm: 0.9999991751484097, iteration: 384843
loss: 0.9949873089790344,grad_norm: 0.7571519670034583, iteration: 384844
loss: 0.977689802646637,grad_norm: 0.8857002108934556, iteration: 384845
loss: 0.9675918817520142,grad_norm: 0.8839747590235937, iteration: 384846
loss: 1.016890048980713,grad_norm: 0.9838227065053275, iteration: 384847
loss: 1.0336426496505737,grad_norm: 0.7719972498524827, iteration: 384848
loss: 0.9920002222061157,grad_norm: 0.7328842810735783, iteration: 384849
loss: 0.9588883519172668,grad_norm: 0.7627632965210112, iteration: 384850
loss: 1.0227532386779785,grad_norm: 0.9999998522933657, iteration: 384851
loss: 1.0218580961227417,grad_norm: 0.7629699688434962, iteration: 384852
loss: 1.0233101844787598,grad_norm: 0.9999991616054034, iteration: 384853
loss: 0.9992741346359253,grad_norm: 0.7762268212598982, iteration: 384854
loss: 0.9824200868606567,grad_norm: 0.9485702229423318, iteration: 384855
loss: 1.0167818069458008,grad_norm: 0.7418468771987112, iteration: 384856
loss: 1.0175926685333252,grad_norm: 1.0000000034249672, iteration: 384857
loss: 1.0396277904510498,grad_norm: 0.8426751610661318, iteration: 384858
loss: 0.9880093336105347,grad_norm: 0.7236171330323996, iteration: 384859
loss: 0.9928575158119202,grad_norm: 0.8573378556269797, iteration: 384860
loss: 1.0530846118927002,grad_norm: 0.999999508593856, iteration: 384861
loss: 0.9945910573005676,grad_norm: 0.8381828847984762, iteration: 384862
loss: 0.9910218715667725,grad_norm: 0.7729532167169414, iteration: 384863
loss: 0.9871002435684204,grad_norm: 0.7815176775630995, iteration: 384864
loss: 1.0072098970413208,grad_norm: 0.7774929583463074, iteration: 384865
loss: 0.9837549328804016,grad_norm: 0.8205700100680416, iteration: 384866
loss: 0.9964353442192078,grad_norm: 0.9999991223803971, iteration: 384867
loss: 1.0473037958145142,grad_norm: 0.760072118584838, iteration: 384868
loss: 1.0159912109375,grad_norm: 0.8433800752361686, iteration: 384869
loss: 0.9859920740127563,grad_norm: 0.7900453808749343, iteration: 384870
loss: 0.9921163320541382,grad_norm: 0.8032491365912673, iteration: 384871
loss: 1.025094747543335,grad_norm: 0.7379169232377568, iteration: 384872
loss: 0.9890016317367554,grad_norm: 0.9999995906669396, iteration: 384873
loss: 0.9766882061958313,grad_norm: 0.742457234244016, iteration: 384874
loss: 0.9918054342269897,grad_norm: 0.9999993472811245, iteration: 384875
loss: 1.0100178718566895,grad_norm: 0.6485558145336903, iteration: 384876
loss: 0.9953930974006653,grad_norm: 0.834834231960158, iteration: 384877
loss: 0.9911500215530396,grad_norm: 0.778137213539403, iteration: 384878
loss: 1.0461599826812744,grad_norm: 0.8688252557600009, iteration: 384879
loss: 0.9987319707870483,grad_norm: 0.7156874449280898, iteration: 384880
loss: 1.016964316368103,grad_norm: 0.7970470616476591, iteration: 384881
loss: 1.0353280305862427,grad_norm: 0.8544067454869871, iteration: 384882
loss: 0.9834244847297668,grad_norm: 0.9999993642213014, iteration: 384883
loss: 0.9801630973815918,grad_norm: 0.8005548348043856, iteration: 384884
loss: 1.0068738460540771,grad_norm: 0.7867306322677128, iteration: 384885
loss: 1.015195608139038,grad_norm: 0.843020789720823, iteration: 384886
loss: 0.9888428449630737,grad_norm: 0.7852583657396104, iteration: 384887
loss: 0.9888968467712402,grad_norm: 0.8386889964429343, iteration: 384888
loss: 0.9898034930229187,grad_norm: 0.8259140272407964, iteration: 384889
loss: 1.016252875328064,grad_norm: 0.8445483591684996, iteration: 384890
loss: 1.1330804824829102,grad_norm: 0.7335225121364722, iteration: 384891
loss: 1.0505588054656982,grad_norm: 0.8328938735005276, iteration: 384892
loss: 0.9919936060905457,grad_norm: 0.7895184981728878, iteration: 384893
loss: 0.9719944596290588,grad_norm: 0.8171609271716084, iteration: 384894
loss: 1.043927550315857,grad_norm: 0.6676501984548934, iteration: 384895
loss: 0.9959239363670349,grad_norm: 0.9999993100008268, iteration: 384896
loss: 0.9659257531166077,grad_norm: 0.660651353680247, iteration: 384897
loss: 1.038864016532898,grad_norm: 0.6851543566990329, iteration: 384898
loss: 0.969115674495697,grad_norm: 0.8808128977099822, iteration: 384899
loss: 0.9788546562194824,grad_norm: 0.8200754791018507, iteration: 384900
loss: 0.9945847988128662,grad_norm: 0.8462496130437285, iteration: 384901
loss: 1.0172021389007568,grad_norm: 0.9999992945085835, iteration: 384902
loss: 1.0048853158950806,grad_norm: 0.6825562079056978, iteration: 384903
loss: 0.9986562132835388,grad_norm: 0.7639245352819266, iteration: 384904
loss: 1.1237996816635132,grad_norm: 0.9999993609177004, iteration: 384905
loss: 1.0039352178573608,grad_norm: 0.9128546212085416, iteration: 384906
loss: 1.0805776119232178,grad_norm: 0.9999993554788524, iteration: 384907
loss: 0.9801644086837769,grad_norm: 0.9173402471030313, iteration: 384908
loss: 1.0553663969039917,grad_norm: 0.929596184360426, iteration: 384909
loss: 1.0058600902557373,grad_norm: 0.9551917215281526, iteration: 384910
loss: 0.9600390791893005,grad_norm: 0.9143830786347631, iteration: 384911
loss: 1.0115959644317627,grad_norm: 0.7587159030595316, iteration: 384912
loss: 1.0641655921936035,grad_norm: 0.999999701403387, iteration: 384913
loss: 0.977354109287262,grad_norm: 0.8188058118098412, iteration: 384914
loss: 0.9718340635299683,grad_norm: 0.7552205231782605, iteration: 384915
loss: 0.994450032711029,grad_norm: 0.8113119926918033, iteration: 384916
loss: 1.0049408674240112,grad_norm: 0.6598796425346812, iteration: 384917
loss: 1.034326195716858,grad_norm: 0.9910737328658988, iteration: 384918
loss: 1.0960426330566406,grad_norm: 0.9999999459511749, iteration: 384919
loss: 1.0167372226715088,grad_norm: 0.9999992357557119, iteration: 384920
loss: 1.0514551401138306,grad_norm: 0.8755258535293118, iteration: 384921
loss: 0.9814367890357971,grad_norm: 0.8484657985929364, iteration: 384922
loss: 1.0051534175872803,grad_norm: 0.682916566769779, iteration: 384923
loss: 1.080824613571167,grad_norm: 0.9947682610478078, iteration: 384924
loss: 1.0613632202148438,grad_norm: 0.9999996373519592, iteration: 384925
loss: 1.0691386461257935,grad_norm: 0.999999328254248, iteration: 384926
loss: 1.026137351989746,grad_norm: 0.9579707884476842, iteration: 384927
loss: 1.0206122398376465,grad_norm: 0.8136749847632161, iteration: 384928
loss: 1.0083675384521484,grad_norm: 0.7383241579748101, iteration: 384929
loss: 0.9860382676124573,grad_norm: 0.846415207357026, iteration: 384930
loss: 0.9912450313568115,grad_norm: 0.7403476149399458, iteration: 384931
loss: 0.9996947646141052,grad_norm: 0.770524633517272, iteration: 384932
loss: 1.056225061416626,grad_norm: 0.9999991292561727, iteration: 384933
loss: 0.9860886335372925,grad_norm: 0.8140003161753484, iteration: 384934
loss: 1.0331312417984009,grad_norm: 0.8189267109921098, iteration: 384935
loss: 1.034554362297058,grad_norm: 0.6974482250021998, iteration: 384936
loss: 1.0838929414749146,grad_norm: 0.9999991817166544, iteration: 384937
loss: 1.0012587308883667,grad_norm: 0.9999992172601929, iteration: 384938
loss: 1.0321632623672485,grad_norm: 0.9803044654245922, iteration: 384939
loss: 1.0881942510604858,grad_norm: 0.9207223672500231, iteration: 384940
loss: 0.9847848415374756,grad_norm: 0.7340784316941321, iteration: 384941
loss: 1.036257028579712,grad_norm: 0.8433630636023461, iteration: 384942
loss: 1.023407220840454,grad_norm: 0.9618947882426503, iteration: 384943
loss: 0.9779989123344421,grad_norm: 0.9999993190287605, iteration: 384944
loss: 0.9861345291137695,grad_norm: 0.7079925443852852, iteration: 384945
loss: 1.006097435951233,grad_norm: 0.9222461955733344, iteration: 384946
loss: 0.9798530340194702,grad_norm: 0.9088413283653969, iteration: 384947
loss: 1.018641471862793,grad_norm: 0.7876283608391683, iteration: 384948
loss: 0.9848808646202087,grad_norm: 0.7094158739341264, iteration: 384949
loss: 0.9632555246353149,grad_norm: 0.7899475325461773, iteration: 384950
loss: 1.053146481513977,grad_norm: 0.9999998009734173, iteration: 384951
loss: 1.241845726966858,grad_norm: 0.9999994268834314, iteration: 384952
loss: 1.000143051147461,grad_norm: 0.9999999231575595, iteration: 384953
loss: 1.0699232816696167,grad_norm: 0.9999999037717199, iteration: 384954
loss: 1.0028125047683716,grad_norm: 0.7932346952162344, iteration: 384955
loss: 0.9961113333702087,grad_norm: 0.9805430661440983, iteration: 384956
loss: 1.0130515098571777,grad_norm: 1.0000000108350025, iteration: 384957
loss: 1.0038609504699707,grad_norm: 0.8134888578607532, iteration: 384958
loss: 0.9774696230888367,grad_norm: 0.8457899891877397, iteration: 384959
loss: 1.0653005838394165,grad_norm: 0.9999991769749537, iteration: 384960
loss: 0.9747573733329773,grad_norm: 0.808128196476024, iteration: 384961
loss: 1.0106853246688843,grad_norm: 0.7115360159906186, iteration: 384962
loss: 1.0508205890655518,grad_norm: 0.9999991094805876, iteration: 384963
loss: 1.0073717832565308,grad_norm: 0.7236681711693519, iteration: 384964
loss: 1.0259917974472046,grad_norm: 0.7884150429124672, iteration: 384965
loss: 0.9703094959259033,grad_norm: 0.9481415927303437, iteration: 384966
loss: 0.9941487908363342,grad_norm: 0.9999998812342163, iteration: 384967
loss: 1.010922908782959,grad_norm: 0.9999996386617583, iteration: 384968
loss: 0.9947901368141174,grad_norm: 0.9805793133188293, iteration: 384969
loss: 1.0187820196151733,grad_norm: 0.9334187316244883, iteration: 384970
loss: 0.9812273979187012,grad_norm: 0.999999768201262, iteration: 384971
loss: 1.0492523908615112,grad_norm: 0.9407267850981209, iteration: 384972
loss: 0.9907552003860474,grad_norm: 0.8127809325591607, iteration: 384973
loss: 0.971524178981781,grad_norm: 0.9016898593156443, iteration: 384974
loss: 0.9893238544464111,grad_norm: 0.7119989905234636, iteration: 384975
loss: 0.965106725692749,grad_norm: 0.7966211141358466, iteration: 384976
loss: 0.9943726658821106,grad_norm: 0.8244077064656721, iteration: 384977
loss: 0.9777247309684753,grad_norm: 0.8893452623839454, iteration: 384978
loss: 0.9967705607414246,grad_norm: 0.6950000006348233, iteration: 384979
loss: 1.0040007829666138,grad_norm: 0.7311434133863238, iteration: 384980
loss: 1.0844712257385254,grad_norm: 0.8118621735562017, iteration: 384981
loss: 0.9814329147338867,grad_norm: 0.8039070849755966, iteration: 384982
loss: 1.0443098545074463,grad_norm: 0.7594982491063439, iteration: 384983
loss: 1.0058871507644653,grad_norm: 0.9999992102469458, iteration: 384984
loss: 0.9843795299530029,grad_norm: 0.8268840589322528, iteration: 384985
loss: 0.9993688464164734,grad_norm: 0.9999991834080529, iteration: 384986
loss: 0.9752148985862732,grad_norm: 0.8085247103736274, iteration: 384987
loss: 1.0301119089126587,grad_norm: 0.8587756527107573, iteration: 384988
loss: 0.983758807182312,grad_norm: 0.9085916645178707, iteration: 384989
loss: 0.9954038262367249,grad_norm: 0.7761281271224026, iteration: 384990
loss: 1.1155023574829102,grad_norm: 0.9999998699933774, iteration: 384991
loss: 0.9866724014282227,grad_norm: 0.9999998029403608, iteration: 384992
loss: 0.9647009372711182,grad_norm: 0.8375157996144507, iteration: 384993
loss: 0.9891231060028076,grad_norm: 0.8010402616472606, iteration: 384994
loss: 1.02312433719635,grad_norm: 0.9277242384060677, iteration: 384995
loss: 1.0325223207473755,grad_norm: 0.8207593653067917, iteration: 384996
loss: 1.00173819065094,grad_norm: 0.7423848945712946, iteration: 384997
loss: 1.0157238245010376,grad_norm: 0.8132716517487961, iteration: 384998
loss: 0.9728209376335144,grad_norm: 0.787218739784811, iteration: 384999
loss: 1.0106446743011475,grad_norm: 0.9999991002289439, iteration: 385000
loss: 0.9581189751625061,grad_norm: 0.8121675331153122, iteration: 385001
loss: 1.0199713706970215,grad_norm: 0.7518469603762198, iteration: 385002
loss: 1.0296082496643066,grad_norm: 0.9999990860101147, iteration: 385003
loss: 1.0087741613388062,grad_norm: 0.962289715959024, iteration: 385004
loss: 0.9675557017326355,grad_norm: 0.9999992731414794, iteration: 385005
loss: 0.947370171546936,grad_norm: 0.8197648352547068, iteration: 385006
loss: 1.0332348346710205,grad_norm: 0.8888966261579966, iteration: 385007
loss: 0.9874598979949951,grad_norm: 0.7449202420892007, iteration: 385008
loss: 0.9649860858917236,grad_norm: 0.8014127229385533, iteration: 385009
loss: 0.9955069422721863,grad_norm: 0.7480081095241544, iteration: 385010
loss: 1.1392849683761597,grad_norm: 0.9999997149368485, iteration: 385011
loss: 1.1286983489990234,grad_norm: 0.9999995580672776, iteration: 385012
loss: 1.0139119625091553,grad_norm: 0.9999990470987823, iteration: 385013
loss: 0.9925835132598877,grad_norm: 0.8439365036615634, iteration: 385014
loss: 0.9943472743034363,grad_norm: 0.7170601502373541, iteration: 385015
loss: 0.9946792721748352,grad_norm: 0.8086841606378382, iteration: 385016
loss: 0.9826008677482605,grad_norm: 0.7027874210024121, iteration: 385017
loss: 1.1101237535476685,grad_norm: 0.9999989847850443, iteration: 385018
loss: 0.9788187742233276,grad_norm: 0.9877291210853442, iteration: 385019
loss: 1.0029406547546387,grad_norm: 0.9366275182396631, iteration: 385020
loss: 1.0490654706954956,grad_norm: 0.7710108855792163, iteration: 385021
loss: 0.9735159277915955,grad_norm: 0.8912624915768577, iteration: 385022
loss: 1.0091724395751953,grad_norm: 0.7613965556972909, iteration: 385023
loss: 1.01885187625885,grad_norm: 0.9999991540991663, iteration: 385024
loss: 0.9852963089942932,grad_norm: 0.9999992730509729, iteration: 385025
loss: 1.0183725357055664,grad_norm: 0.9999990802854295, iteration: 385026
loss: 0.9933897256851196,grad_norm: 0.9999992006687788, iteration: 385027
loss: 1.3623789548873901,grad_norm: 0.9999998450838018, iteration: 385028
loss: 1.081115961074829,grad_norm: 0.832041536150065, iteration: 385029
loss: 0.9896783828735352,grad_norm: 0.8899846355780094, iteration: 385030
loss: 1.0237576961517334,grad_norm: 0.8313789961674662, iteration: 385031
loss: 1.1215746402740479,grad_norm: 0.9999999151800586, iteration: 385032
loss: 1.028852105140686,grad_norm: 0.8729374034909381, iteration: 385033
loss: 1.0162811279296875,grad_norm: 0.8828756209204562, iteration: 385034
loss: 1.0550243854522705,grad_norm: 0.9999990263707398, iteration: 385035
loss: 1.0783214569091797,grad_norm: 0.7751122403410295, iteration: 385036
loss: 1.0622109174728394,grad_norm: 0.9999995193206985, iteration: 385037
loss: 1.0038812160491943,grad_norm: 0.8136148972909566, iteration: 385038
loss: 0.9796637892723083,grad_norm: 0.818194187786216, iteration: 385039
loss: 1.0198783874511719,grad_norm: 0.7718209376338471, iteration: 385040
loss: 1.041001558303833,grad_norm: 0.7806823195950535, iteration: 385041
loss: 1.0285096168518066,grad_norm: 0.9999990997197958, iteration: 385042
loss: 1.0307198762893677,grad_norm: 0.8580503325785943, iteration: 385043
loss: 1.0392776727676392,grad_norm: 0.977230623354953, iteration: 385044
loss: 0.9551045894622803,grad_norm: 0.9480206619189853, iteration: 385045
loss: 1.0135668516159058,grad_norm: 0.7348432969703728, iteration: 385046
loss: 1.0447672605514526,grad_norm: 0.8430039190643973, iteration: 385047
loss: 1.0127601623535156,grad_norm: 0.7098988999499845, iteration: 385048
loss: 1.0487563610076904,grad_norm: 0.8696736505391207, iteration: 385049
loss: 0.9980455636978149,grad_norm: 0.7224961761836967, iteration: 385050
loss: 0.9961377382278442,grad_norm: 0.7860434279816318, iteration: 385051
loss: 1.001975417137146,grad_norm: 0.9706781610309511, iteration: 385052
loss: 1.076978087425232,grad_norm: 0.9248298240482898, iteration: 385053
loss: 1.014497995376587,grad_norm: 0.7741331821744185, iteration: 385054
loss: 0.9996736645698547,grad_norm: 0.7586698147977905, iteration: 385055
loss: 1.007320761680603,grad_norm: 0.9473275895676487, iteration: 385056
loss: 1.0695728063583374,grad_norm: 0.9999995993181793, iteration: 385057
loss: 0.9909294843673706,grad_norm: 0.821146801994085, iteration: 385058
loss: 1.0018951892852783,grad_norm: 0.7454560305613732, iteration: 385059
loss: 1.0397086143493652,grad_norm: 0.9723712779231047, iteration: 385060
loss: 0.9870250225067139,grad_norm: 0.7506534378344669, iteration: 385061
loss: 0.9948942065238953,grad_norm: 0.781437107536645, iteration: 385062
loss: 0.9581704139709473,grad_norm: 0.7268582907896519, iteration: 385063
loss: 1.0143306255340576,grad_norm: 0.7592174668031753, iteration: 385064
loss: 1.029389500617981,grad_norm: 0.9999999917582052, iteration: 385065
loss: 1.0501898527145386,grad_norm: 0.9999994088281029, iteration: 385066
loss: 1.0766218900680542,grad_norm: 0.9999991948090754, iteration: 385067
loss: 1.0214799642562866,grad_norm: 0.9999992979035766, iteration: 385068
loss: 1.0192115306854248,grad_norm: 0.8654005595146673, iteration: 385069
loss: 0.9839704036712646,grad_norm: 0.763008565811999, iteration: 385070
loss: 0.9921518564224243,grad_norm: 0.8874896304137533, iteration: 385071
loss: 1.0199521780014038,grad_norm: 0.9585295331141351, iteration: 385072
loss: 1.0038650035858154,grad_norm: 0.9999989710412597, iteration: 385073
loss: 1.08025324344635,grad_norm: 0.999999124997016, iteration: 385074
loss: 0.9982423782348633,grad_norm: 0.7509900757829442, iteration: 385075
loss: 0.9877200126647949,grad_norm: 0.8361500042857839, iteration: 385076
loss: 0.9600900411605835,grad_norm: 0.75535474841387, iteration: 385077
loss: 1.001206636428833,grad_norm: 0.8494687619880197, iteration: 385078
loss: 1.0900732278823853,grad_norm: 0.8785235954760627, iteration: 385079
loss: 0.9498182535171509,grad_norm: 0.7525382253174706, iteration: 385080
loss: 1.0092651844024658,grad_norm: 0.8323234697014887, iteration: 385081
loss: 1.0907413959503174,grad_norm: 0.9999992590758806, iteration: 385082
loss: 1.040287971496582,grad_norm: 0.9999991840071286, iteration: 385083
loss: 1.0210264921188354,grad_norm: 0.883854486457375, iteration: 385084
loss: 1.0957574844360352,grad_norm: 0.9999991622296247, iteration: 385085
loss: 1.0309315919876099,grad_norm: 0.7882265651150585, iteration: 385086
loss: 1.0993735790252686,grad_norm: 0.9999990559454163, iteration: 385087
loss: 1.0459963083267212,grad_norm: 0.9122013687192352, iteration: 385088
loss: 1.0341607332229614,grad_norm: 0.9265681592271707, iteration: 385089
loss: 0.9647327065467834,grad_norm: 0.872385867852351, iteration: 385090
loss: 1.0174307823181152,grad_norm: 0.8243706355504987, iteration: 385091
loss: 1.0432848930358887,grad_norm: 0.9489534502696921, iteration: 385092
loss: 0.9995110630989075,grad_norm: 0.819714767964742, iteration: 385093
loss: 1.052236557006836,grad_norm: 0.99999897821375, iteration: 385094
loss: 1.065100908279419,grad_norm: 0.9149315476666371, iteration: 385095
loss: 1.0323408842086792,grad_norm: 0.9999994747180101, iteration: 385096
loss: 1.00123131275177,grad_norm: 0.8005508571267725, iteration: 385097
loss: 1.016788125038147,grad_norm: 0.7733999160173186, iteration: 385098
loss: 1.0101912021636963,grad_norm: 0.7417398965974629, iteration: 385099
loss: 1.0136626958847046,grad_norm: 0.9027644290702117, iteration: 385100
loss: 1.0578068494796753,grad_norm: 0.8810200246139485, iteration: 385101
loss: 0.9917213916778564,grad_norm: 0.8983523362467145, iteration: 385102
loss: 1.0313831567764282,grad_norm: 0.9999998773440445, iteration: 385103
loss: 0.9809070825576782,grad_norm: 0.6629052911699326, iteration: 385104
loss: 1.0393832921981812,grad_norm: 0.9999990902735103, iteration: 385105
loss: 1.0876520872116089,grad_norm: 0.7703853566831849, iteration: 385106
loss: 1.0267260074615479,grad_norm: 0.9999993195848225, iteration: 385107
loss: 1.0830000638961792,grad_norm: 0.9999992691824915, iteration: 385108
loss: 1.2541508674621582,grad_norm: 0.999999911027182, iteration: 385109
loss: 1.1928328275680542,grad_norm: 0.9999999375096355, iteration: 385110
loss: 0.9720607995986938,grad_norm: 0.9086822192779879, iteration: 385111
loss: 1.0859495401382446,grad_norm: 0.9999998328343888, iteration: 385112
loss: 1.02095627784729,grad_norm: 0.9502941299385237, iteration: 385113
loss: 1.0185688734054565,grad_norm: 0.9098603596397679, iteration: 385114
loss: 1.2271673679351807,grad_norm: 0.9999995992016346, iteration: 385115
loss: 0.962011456489563,grad_norm: 0.7086435091364424, iteration: 385116
loss: 1.043328046798706,grad_norm: 0.999999523350899, iteration: 385117
loss: 0.9981048703193665,grad_norm: 0.6978272869781939, iteration: 385118
loss: 1.2569222450256348,grad_norm: 0.9999993201426992, iteration: 385119
loss: 0.9774819612503052,grad_norm: 0.9540136731135627, iteration: 385120
loss: 0.972248375415802,grad_norm: 0.9302657266368072, iteration: 385121
loss: 1.095595359802246,grad_norm: 0.9999999216031685, iteration: 385122
loss: 1.0311734676361084,grad_norm: 0.8860263498247841, iteration: 385123
loss: 1.013512134552002,grad_norm: 0.7786538580176253, iteration: 385124
loss: 1.059943675994873,grad_norm: 1.000000035993477, iteration: 385125
loss: 1.0875102281570435,grad_norm: 0.9999994904879199, iteration: 385126
loss: 1.0979318618774414,grad_norm: 0.9999999331143476, iteration: 385127
loss: 1.076941967010498,grad_norm: 0.9999999916939889, iteration: 385128
loss: 1.0754132270812988,grad_norm: 0.7544333925738006, iteration: 385129
loss: 0.9850578904151917,grad_norm: 0.7550119238151954, iteration: 385130
loss: 1.0600976943969727,grad_norm: 0.9999996806297049, iteration: 385131
loss: 1.0376720428466797,grad_norm: 0.8716338017736168, iteration: 385132
loss: 1.005026936531067,grad_norm: 0.717279816774331, iteration: 385133
loss: 1.0037562847137451,grad_norm: 0.8968834720905466, iteration: 385134
loss: 0.9951136708259583,grad_norm: 0.8088080407950018, iteration: 385135
loss: 1.0655467510223389,grad_norm: 0.999999655798848, iteration: 385136
loss: 0.9566131234169006,grad_norm: 0.7821368923442877, iteration: 385137
loss: 1.006771206855774,grad_norm: 0.7436855090279707, iteration: 385138
loss: 1.0615874528884888,grad_norm: 0.999998996037766, iteration: 385139
loss: 1.021378755569458,grad_norm: 0.8121124364053698, iteration: 385140
loss: 1.0560215711593628,grad_norm: 0.9999994678537208, iteration: 385141
loss: 1.0165719985961914,grad_norm: 0.8962084861867167, iteration: 385142
loss: 1.032118797302246,grad_norm: 0.6898401177935253, iteration: 385143
loss: 1.0288792848587036,grad_norm: 0.9999992462499698, iteration: 385144
loss: 1.0514410734176636,grad_norm: 0.9999994803430653, iteration: 385145
loss: 1.049855351448059,grad_norm: 0.9210882255944878, iteration: 385146
loss: 1.0389878749847412,grad_norm: 1.0000000193104805, iteration: 385147
loss: 1.0228959321975708,grad_norm: 0.8290630002717146, iteration: 385148
loss: 1.0057718753814697,grad_norm: 0.7240332917011537, iteration: 385149
loss: 0.9950402975082397,grad_norm: 0.9999992653513321, iteration: 385150
loss: 1.0747439861297607,grad_norm: 0.9999990801582254, iteration: 385151
loss: 0.9836038947105408,grad_norm: 0.7153498858562702, iteration: 385152
loss: 0.9905441999435425,grad_norm: 0.9328464757437481, iteration: 385153
loss: 1.012993335723877,grad_norm: 0.7675467251795555, iteration: 385154
loss: 1.0031685829162598,grad_norm: 0.7445166553249368, iteration: 385155
loss: 0.9914976358413696,grad_norm: 0.8330516366328855, iteration: 385156
loss: 1.007472038269043,grad_norm: 0.9999990985094703, iteration: 385157
loss: 1.03424870967865,grad_norm: 0.8267235924097037, iteration: 385158
loss: 0.9835652112960815,grad_norm: 0.7827469772257636, iteration: 385159
loss: 1.0130038261413574,grad_norm: 0.8276641895178684, iteration: 385160
loss: 0.9685378670692444,grad_norm: 0.8152491406075999, iteration: 385161
loss: 1.0486407279968262,grad_norm: 0.8185512821937927, iteration: 385162
loss: 1.014050841331482,grad_norm: 0.8576229112561828, iteration: 385163
loss: 0.9962409138679504,grad_norm: 0.9016717205875344, iteration: 385164
loss: 0.9898954629898071,grad_norm: 0.9999990862378626, iteration: 385165
loss: 1.0690860748291016,grad_norm: 0.9999997401090267, iteration: 385166
loss: 1.0514291524887085,grad_norm: 0.9999993552120093, iteration: 385167
loss: 1.0093045234680176,grad_norm: 0.7264406725449613, iteration: 385168
loss: 1.0434366464614868,grad_norm: 0.9999993701285735, iteration: 385169
loss: 1.0146138668060303,grad_norm: 0.9265263579906615, iteration: 385170
loss: 1.019813060760498,grad_norm: 0.6390084961684414, iteration: 385171
loss: 0.9938564300537109,grad_norm: 0.8770373089032003, iteration: 385172
loss: 0.986737847328186,grad_norm: 0.827158038506315, iteration: 385173
loss: 1.0280044078826904,grad_norm: 0.9999997621194631, iteration: 385174
loss: 1.023014783859253,grad_norm: 0.7360378525087474, iteration: 385175
loss: 1.0218476057052612,grad_norm: 0.999999009404401, iteration: 385176
loss: 1.015523910522461,grad_norm: 0.89765852274066, iteration: 385177
loss: 1.024256944656372,grad_norm: 0.8940478790786359, iteration: 385178
loss: 0.9960563778877258,grad_norm: 0.7727693496132052, iteration: 385179
loss: 0.9941560626029968,grad_norm: 0.7922122428930004, iteration: 385180
loss: 1.103651762008667,grad_norm: 0.7995268930607229, iteration: 385181
loss: 1.043796420097351,grad_norm: 0.8270436856842627, iteration: 385182
loss: 0.9881647825241089,grad_norm: 0.8121433253783877, iteration: 385183
loss: 1.0256967544555664,grad_norm: 0.8749922973223208, iteration: 385184
loss: 1.0500630140304565,grad_norm: 0.9999990705356945, iteration: 385185
loss: 1.0298818349838257,grad_norm: 0.9999991352910681, iteration: 385186
loss: 0.989392101764679,grad_norm: 0.9175570066087786, iteration: 385187
loss: 1.009790062904358,grad_norm: 0.815945041102586, iteration: 385188
loss: 1.0783265829086304,grad_norm: 0.9999994193244255, iteration: 385189
loss: 1.103075385093689,grad_norm: 0.999999229475044, iteration: 385190
loss: 0.9613040089607239,grad_norm: 0.8855953771617032, iteration: 385191
loss: 1.0166000127792358,grad_norm: 0.747742640515925, iteration: 385192
loss: 0.9966480135917664,grad_norm: 0.7510342079769063, iteration: 385193
loss: 0.9671832323074341,grad_norm: 0.9999991391701525, iteration: 385194
loss: 1.0225493907928467,grad_norm: 0.7944790543104233, iteration: 385195
loss: 0.9986429214477539,grad_norm: 0.7755396750911911, iteration: 385196
loss: 0.9676544666290283,grad_norm: 0.8326505199946509, iteration: 385197
loss: 1.045729637145996,grad_norm: 0.999999599345195, iteration: 385198
loss: 0.9564653038978577,grad_norm: 0.8519023433518219, iteration: 385199
loss: 1.0721311569213867,grad_norm: 0.999999902070941, iteration: 385200
loss: 1.0023844242095947,grad_norm: 0.7778179837032333, iteration: 385201
loss: 0.9797636270523071,grad_norm: 0.9999993256609244, iteration: 385202
loss: 1.0158053636550903,grad_norm: 0.7379844161482768, iteration: 385203
loss: 1.0179948806762695,grad_norm: 0.8448259280040311, iteration: 385204
loss: 1.0281246900558472,grad_norm: 0.8418700820780571, iteration: 385205
loss: 1.0116243362426758,grad_norm: 0.999999056507202, iteration: 385206
loss: 1.153978705406189,grad_norm: 0.9999999005808923, iteration: 385207
loss: 1.0272239446640015,grad_norm: 0.9999991474433461, iteration: 385208
loss: 1.0298703908920288,grad_norm: 0.93061285463998, iteration: 385209
loss: 1.004271388053894,grad_norm: 0.697219109819531, iteration: 385210
loss: 1.0095895528793335,grad_norm: 0.7546893768017002, iteration: 385211
loss: 1.022731065750122,grad_norm: 0.8184244770414375, iteration: 385212
loss: 1.0641523599624634,grad_norm: 0.803515725788429, iteration: 385213
loss: 1.0258392095565796,grad_norm: 0.8417870746361363, iteration: 385214
loss: 0.9896275401115417,grad_norm: 0.912240923158485, iteration: 385215
loss: 0.9855982661247253,grad_norm: 0.7863495560003907, iteration: 385216
loss: 1.03031587600708,grad_norm: 0.9999991586657998, iteration: 385217
loss: 0.9910641312599182,grad_norm: 0.8125277081850036, iteration: 385218
loss: 1.080979585647583,grad_norm: 0.9187388602463477, iteration: 385219
loss: 0.9958940148353577,grad_norm: 0.999999937294388, iteration: 385220
loss: 1.1322190761566162,grad_norm: 0.9999999520240914, iteration: 385221
loss: 0.9977032542228699,grad_norm: 0.8139428547258585, iteration: 385222
loss: 0.9694161415100098,grad_norm: 0.8123003690141022, iteration: 385223
loss: 1.001881718635559,grad_norm: 0.9999996819054006, iteration: 385224
loss: 0.9683461785316467,grad_norm: 0.7173197463873769, iteration: 385225
loss: 1.0672632455825806,grad_norm: 0.8661413204871081, iteration: 385226
loss: 1.0216264724731445,grad_norm: 0.7995811638664373, iteration: 385227
loss: 1.065883755683899,grad_norm: 0.9999996235595499, iteration: 385228
loss: 1.0172388553619385,grad_norm: 0.8267124804211093, iteration: 385229
loss: 0.9813419580459595,grad_norm: 0.7111431106738147, iteration: 385230
loss: 0.9593636393547058,grad_norm: 0.7204046221714204, iteration: 385231
loss: 1.0371448993682861,grad_norm: 0.8463192467272391, iteration: 385232
loss: 0.9895530939102173,grad_norm: 0.9184038107904888, iteration: 385233
loss: 1.016074538230896,grad_norm: 0.9304251686975002, iteration: 385234
loss: 1.0034390687942505,grad_norm: 0.8416479178182711, iteration: 385235
loss: 1.0514867305755615,grad_norm: 0.9999999301920283, iteration: 385236
loss: 1.165524959564209,grad_norm: 0.9999998229726899, iteration: 385237
loss: 0.9857777953147888,grad_norm: 0.792972226602372, iteration: 385238
loss: 0.9932926297187805,grad_norm: 0.9316099544566926, iteration: 385239
loss: 1.066268801689148,grad_norm: 0.874838738312677, iteration: 385240
loss: 0.9938405752182007,grad_norm: 0.7856630112632373, iteration: 385241
loss: 1.0525753498077393,grad_norm: 0.7575354764507155, iteration: 385242
loss: 1.0166733264923096,grad_norm: 0.8041056064538326, iteration: 385243
loss: 0.9495525360107422,grad_norm: 0.8073178936504024, iteration: 385244
loss: 1.020700454711914,grad_norm: 0.9965210328306215, iteration: 385245
loss: 1.005794882774353,grad_norm: 0.7827558877601463, iteration: 385246
loss: 1.0043892860412598,grad_norm: 0.7438497273903462, iteration: 385247
loss: 0.9725640416145325,grad_norm: 0.9473033441139602, iteration: 385248
loss: 1.2475664615631104,grad_norm: 0.9999991620999475, iteration: 385249
loss: 1.0679821968078613,grad_norm: 0.9112168124594208, iteration: 385250
loss: 1.1914018392562866,grad_norm: 0.9999998942195291, iteration: 385251
loss: 1.0589430332183838,grad_norm: 0.9999997433873299, iteration: 385252
loss: 1.0034154653549194,grad_norm: 0.9644450612636128, iteration: 385253
loss: 1.0205656290054321,grad_norm: 0.9179481136193092, iteration: 385254
loss: 1.0480886697769165,grad_norm: 0.9999990719027646, iteration: 385255
loss: 1.1232373714447021,grad_norm: 0.9999992358798976, iteration: 385256
loss: 1.1470119953155518,grad_norm: 0.9999997379511724, iteration: 385257
loss: 1.023797631263733,grad_norm: 0.99999947439937, iteration: 385258
loss: 0.9850178360939026,grad_norm: 0.8810987981691918, iteration: 385259
loss: 1.053918719291687,grad_norm: 0.8858917661453, iteration: 385260
loss: 0.9901580810546875,grad_norm: 0.7711041995736804, iteration: 385261
loss: 1.0046660900115967,grad_norm: 0.9722734587350746, iteration: 385262
loss: 1.1249651908874512,grad_norm: 0.9999998131349315, iteration: 385263
loss: 1.006624698638916,grad_norm: 0.9076209676314706, iteration: 385264
loss: 1.011612892150879,grad_norm: 0.7919330945202926, iteration: 385265
loss: 1.0690746307373047,grad_norm: 0.9999990647981393, iteration: 385266
loss: 0.9639930129051208,grad_norm: 0.8359421905968055, iteration: 385267
loss: 0.9930038452148438,grad_norm: 0.9999999445558795, iteration: 385268
loss: 1.0659098625183105,grad_norm: 0.999999191017046, iteration: 385269
loss: 0.9938608407974243,grad_norm: 0.9285775464288553, iteration: 385270
loss: 0.957636296749115,grad_norm: 0.7972351085580384, iteration: 385271
loss: 1.029269814491272,grad_norm: 0.9014768411834913, iteration: 385272
loss: 0.9896020293235779,grad_norm: 0.8275089535197495, iteration: 385273
loss: 1.0215641260147095,grad_norm: 0.7759452143996152, iteration: 385274
loss: 1.0120625495910645,grad_norm: 0.9999994157144199, iteration: 385275
loss: 1.015235185623169,grad_norm: 0.6909792167913557, iteration: 385276
loss: 0.9895969033241272,grad_norm: 0.7618954064982695, iteration: 385277
loss: 1.1218866109848022,grad_norm: 0.9999993807358742, iteration: 385278
loss: 1.040818452835083,grad_norm: 0.9999997372958414, iteration: 385279
loss: 1.0182267427444458,grad_norm: 0.999999430590776, iteration: 385280
loss: 1.0401980876922607,grad_norm: 0.7725605886874716, iteration: 385281
loss: 1.0022755861282349,grad_norm: 0.7685628140398945, iteration: 385282
loss: 0.992926836013794,grad_norm: 0.9999992748579932, iteration: 385283
loss: 1.023953914642334,grad_norm: 0.7760667160250899, iteration: 385284
loss: 0.9737814664840698,grad_norm: 0.8165752918199107, iteration: 385285
loss: 1.062659502029419,grad_norm: 0.7769246255427954, iteration: 385286
loss: 1.012229561805725,grad_norm: 0.7491822265741624, iteration: 385287
loss: 1.0795016288757324,grad_norm: 0.8201778201664818, iteration: 385288
loss: 1.0232856273651123,grad_norm: 0.9999990237903813, iteration: 385289
loss: 0.9932805895805359,grad_norm: 0.9999990925981126, iteration: 385290
loss: 1.0126869678497314,grad_norm: 0.999999204476996, iteration: 385291
loss: 1.0313022136688232,grad_norm: 0.9142675794109942, iteration: 385292
loss: 0.9899216890335083,grad_norm: 0.8168114080463866, iteration: 385293
loss: 0.9562445282936096,grad_norm: 0.7401545574034527, iteration: 385294
loss: 0.9980539083480835,grad_norm: 0.7532215409936084, iteration: 385295
loss: 1.0911072492599487,grad_norm: 0.9999995546110978, iteration: 385296
loss: 0.9762082099914551,grad_norm: 0.7276564770559237, iteration: 385297
loss: 0.9940158724784851,grad_norm: 0.7630256608658579, iteration: 385298
loss: 1.100308895111084,grad_norm: 0.999474500916923, iteration: 385299
loss: 1.011204481124878,grad_norm: 0.7533248522130681, iteration: 385300
loss: 1.0310076475143433,grad_norm: 0.9547349121816308, iteration: 385301
loss: 0.9893345832824707,grad_norm: 0.7756444092978082, iteration: 385302
loss: 1.0488883256912231,grad_norm: 0.8902688689667417, iteration: 385303
loss: 1.0194140672683716,grad_norm: 0.7855080205652087, iteration: 385304
loss: 0.9276625514030457,grad_norm: 0.8613158652790481, iteration: 385305
loss: 0.997160017490387,grad_norm: 0.7458299084043708, iteration: 385306
loss: 1.0205925703048706,grad_norm: 0.8248305270048871, iteration: 385307
loss: 0.9610517621040344,grad_norm: 0.723970004989743, iteration: 385308
loss: 0.9901993870735168,grad_norm: 0.8792186646720896, iteration: 385309
loss: 1.1752580404281616,grad_norm: 0.9999998723040813, iteration: 385310
loss: 1.039278507232666,grad_norm: 0.8002657268342132, iteration: 385311
loss: 1.0208568572998047,grad_norm: 0.9999995525791898, iteration: 385312
loss: 0.9940558075904846,grad_norm: 0.7644486031062554, iteration: 385313
loss: 1.124272108078003,grad_norm: 0.9999998679061556, iteration: 385314
loss: 1.0719438791275024,grad_norm: 0.9999995661234284, iteration: 385315
loss: 0.983019232749939,grad_norm: 0.8364176574272514, iteration: 385316
loss: 1.0139442682266235,grad_norm: 0.7844989688915435, iteration: 385317
loss: 0.9885626435279846,grad_norm: 0.8916187695606134, iteration: 385318
loss: 1.0077543258666992,grad_norm: 0.6622019524698118, iteration: 385319
loss: 0.9817985892295837,grad_norm: 0.721437338506746, iteration: 385320
loss: 0.990702748298645,grad_norm: 0.9999998908868876, iteration: 385321
loss: 0.9925813674926758,grad_norm: 0.9346916839614998, iteration: 385322
loss: 1.024793267250061,grad_norm: 0.8634304601392782, iteration: 385323
loss: 0.9898116588592529,grad_norm: 0.831877849919869, iteration: 385324
loss: 1.0163921117782593,grad_norm: 0.7047887044696406, iteration: 385325
loss: 1.0142443180084229,grad_norm: 0.9999992737931923, iteration: 385326
loss: 0.9888489246368408,grad_norm: 0.9999992027090369, iteration: 385327
loss: 0.9801551103591919,grad_norm: 0.9999995366976915, iteration: 385328
loss: 0.9902591705322266,grad_norm: 0.921224480760221, iteration: 385329
loss: 1.0819271802902222,grad_norm: 0.9999997338768127, iteration: 385330
loss: 1.0238604545593262,grad_norm: 0.8077461960835486, iteration: 385331
loss: 1.0136922597885132,grad_norm: 0.9418866315277141, iteration: 385332
loss: 0.9606321454048157,grad_norm: 0.7155558342300126, iteration: 385333
loss: 0.9857078790664673,grad_norm: 0.7880894006726185, iteration: 385334
loss: 1.016396164894104,grad_norm: 0.9647228517430037, iteration: 385335
loss: 0.9988827705383301,grad_norm: 0.8229811134633483, iteration: 385336
loss: 1.1877108812332153,grad_norm: 0.9999995181946987, iteration: 385337
loss: 0.998397171497345,grad_norm: 0.7897777068828904, iteration: 385338
loss: 1.0280712842941284,grad_norm: 0.8027939582130631, iteration: 385339
loss: 0.9466866850852966,grad_norm: 0.764313915149603, iteration: 385340
loss: 1.1255158185958862,grad_norm: 0.999999700709749, iteration: 385341
loss: 0.9679281711578369,grad_norm: 0.9999991085800664, iteration: 385342
loss: 1.0346792936325073,grad_norm: 0.818941728083138, iteration: 385343
loss: 1.0161205530166626,grad_norm: 0.7269371504637435, iteration: 385344
loss: 1.0262680053710938,grad_norm: 0.9536531467558461, iteration: 385345
loss: 1.020283818244934,grad_norm: 0.7282504158145101, iteration: 385346
loss: 1.0631377696990967,grad_norm: 0.8144576730731926, iteration: 385347
loss: 1.0000330209732056,grad_norm: 0.7535230612235029, iteration: 385348
loss: 1.0123764276504517,grad_norm: 1.0000000090024008, iteration: 385349
loss: 1.005388617515564,grad_norm: 0.8316755764916439, iteration: 385350
loss: 0.9917380213737488,grad_norm: 0.9999990090141293, iteration: 385351
loss: 1.0463335514068604,grad_norm: 0.9999991534497541, iteration: 385352
loss: 0.9878035187721252,grad_norm: 0.7456045227061959, iteration: 385353
loss: 1.0652340650558472,grad_norm: 0.8633102100193508, iteration: 385354
loss: 1.0507652759552002,grad_norm: 0.9346321746019042, iteration: 385355
loss: 1.0022871494293213,grad_norm: 0.9999999856679779, iteration: 385356
loss: 1.0078661441802979,grad_norm: 0.8359111281754499, iteration: 385357
loss: 1.012041449546814,grad_norm: 0.9999992503492192, iteration: 385358
loss: 0.9738171100616455,grad_norm: 0.7541683677353196, iteration: 385359
loss: 1.0306318998336792,grad_norm: 0.9999995266201227, iteration: 385360
loss: 1.252318024635315,grad_norm: 0.9999998398815982, iteration: 385361
loss: 0.9908762574195862,grad_norm: 0.6765058097620252, iteration: 385362
loss: 1.0061465501785278,grad_norm: 0.7892541652307136, iteration: 385363
loss: 1.0545376539230347,grad_norm: 0.9999997259297331, iteration: 385364
loss: 1.0052170753479004,grad_norm: 0.7108734061215305, iteration: 385365
loss: 1.060807228088379,grad_norm: 0.9999991872417427, iteration: 385366
loss: 1.0007286071777344,grad_norm: 0.8060310715602489, iteration: 385367
loss: 0.9927420020103455,grad_norm: 0.7633150077357372, iteration: 385368
loss: 1.008631944656372,grad_norm: 0.9150357996964331, iteration: 385369
loss: 1.0134148597717285,grad_norm: 0.9999993541020126, iteration: 385370
loss: 1.0868110656738281,grad_norm: 0.9999996115545113, iteration: 385371
loss: 1.0121288299560547,grad_norm: 0.8627993427145633, iteration: 385372
loss: 1.0071336030960083,grad_norm: 0.7107670531673443, iteration: 385373
loss: 1.008142113685608,grad_norm: 0.9594526408118098, iteration: 385374
loss: 0.9921240210533142,grad_norm: 0.6619893055479686, iteration: 385375
loss: 1.1204625368118286,grad_norm: 0.9999997435625158, iteration: 385376
loss: 0.9865982532501221,grad_norm: 0.7050251831017909, iteration: 385377
loss: 0.994842529296875,grad_norm: 0.9134604353749196, iteration: 385378
loss: 1.127571702003479,grad_norm: 0.9999998480898538, iteration: 385379
loss: 0.9830181002616882,grad_norm: 0.9999997232404011, iteration: 385380
loss: 1.00730299949646,grad_norm: 0.8968790205953678, iteration: 385381
loss: 1.1282896995544434,grad_norm: 0.999999753244306, iteration: 385382
loss: 1.3466304540634155,grad_norm: 0.9999996091183593, iteration: 385383
loss: 1.384954571723938,grad_norm: 0.9999995798113022, iteration: 385384
loss: 1.5708520412445068,grad_norm: 0.9999997460291828, iteration: 385385
loss: 1.3420493602752686,grad_norm: 0.9999997938232456, iteration: 385386
loss: 1.492812156677246,grad_norm: 0.9999999797987098, iteration: 385387
loss: 1.102185845375061,grad_norm: 0.9999994044365647, iteration: 385388
loss: 1.07564115524292,grad_norm: 0.7869578307302568, iteration: 385389
loss: 1.0499436855316162,grad_norm: 0.7509501562627459, iteration: 385390
loss: 0.9844195246696472,grad_norm: 0.9646477744622743, iteration: 385391
loss: 1.3278703689575195,grad_norm: 0.999999848806436, iteration: 385392
loss: 1.1684201955795288,grad_norm: 0.9999996927223195, iteration: 385393
loss: 1.3838526010513306,grad_norm: 0.9999997075841249, iteration: 385394
loss: 0.9723942875862122,grad_norm: 0.7567258562735759, iteration: 385395
loss: 1.140931248664856,grad_norm: 0.999999395137425, iteration: 385396
loss: 1.063462495803833,grad_norm: 0.801277988030693, iteration: 385397
loss: 1.069720983505249,grad_norm: 0.8650868367124647, iteration: 385398
loss: 1.0233372449874878,grad_norm: 0.9999995539680618, iteration: 385399
loss: 1.001971960067749,grad_norm: 0.8239025326683325, iteration: 385400
loss: 1.2504255771636963,grad_norm: 0.9832307090440556, iteration: 385401
loss: 1.0837602615356445,grad_norm: 0.9999992377987379, iteration: 385402
loss: 1.015217900276184,grad_norm: 0.9231415150646067, iteration: 385403
loss: 1.0007835626602173,grad_norm: 0.8413540851403682, iteration: 385404
loss: 1.1045721769332886,grad_norm: 0.99999986732834, iteration: 385405
loss: 0.9818001389503479,grad_norm: 0.6917371654205582, iteration: 385406
loss: 1.037140130996704,grad_norm: 0.9999998168923019, iteration: 385407
loss: 1.0320748090744019,grad_norm: 0.7706510154503431, iteration: 385408
loss: 1.0190540552139282,grad_norm: 0.8801291393715728, iteration: 385409
loss: 1.0715245008468628,grad_norm: 0.8994852079831113, iteration: 385410
loss: 0.9896143674850464,grad_norm: 0.7615826463681078, iteration: 385411
loss: 1.0626462697982788,grad_norm: 0.9359662653571328, iteration: 385412
loss: 0.9750406742095947,grad_norm: 0.6593702846186696, iteration: 385413
loss: 1.0301226377487183,grad_norm: 0.9999998738435188, iteration: 385414
loss: 0.9984859228134155,grad_norm: 0.9999991817243975, iteration: 385415
loss: 1.0926450490951538,grad_norm: 0.9999998424901162, iteration: 385416
loss: 1.1334939002990723,grad_norm: 0.9999998191499656, iteration: 385417
loss: 0.9917213916778564,grad_norm: 0.7721488705479164, iteration: 385418
loss: 1.1087056398391724,grad_norm: 0.8413028411870628, iteration: 385419
loss: 1.0702508687973022,grad_norm: 0.9999992770333994, iteration: 385420
loss: 0.9674767255783081,grad_norm: 0.7300827467137567, iteration: 385421
loss: 0.9703278541564941,grad_norm: 0.7294167202046241, iteration: 385422
loss: 1.02091646194458,grad_norm: 0.8610116095776552, iteration: 385423
loss: 1.0045210123062134,grad_norm: 0.702615648802398, iteration: 385424
loss: 1.0910061597824097,grad_norm: 0.9999992908614466, iteration: 385425
loss: 0.9862573742866516,grad_norm: 0.7011737054608208, iteration: 385426
loss: 1.0478119850158691,grad_norm: 0.9999996162333031, iteration: 385427
loss: 1.017987608909607,grad_norm: 0.9999991105125954, iteration: 385428
loss: 0.9555429816246033,grad_norm: 0.7773144462665457, iteration: 385429
loss: 0.9754330515861511,grad_norm: 0.7385678137434148, iteration: 385430
loss: 1.0492122173309326,grad_norm: 0.9999998923252841, iteration: 385431
loss: 0.9965327978134155,grad_norm: 0.9999995253642386, iteration: 385432
loss: 0.9921560287475586,grad_norm: 0.8221977826753336, iteration: 385433
loss: 0.9951821565628052,grad_norm: 0.618680728105729, iteration: 385434
loss: 0.9917293190956116,grad_norm: 0.7015110488559482, iteration: 385435
loss: 1.0530922412872314,grad_norm: 0.9999994486374032, iteration: 385436
loss: 0.9848929643630981,grad_norm: 0.8891089889486546, iteration: 385437
loss: 0.9807597994804382,grad_norm: 0.951930653059254, iteration: 385438
loss: 1.0608285665512085,grad_norm: 0.9999990430732838, iteration: 385439
loss: 0.9685672521591187,grad_norm: 0.8602709697692301, iteration: 385440
loss: 0.996318519115448,grad_norm: 0.75316898217847, iteration: 385441
loss: 1.0057677030563354,grad_norm: 0.6985530332659491, iteration: 385442
loss: 1.0415852069854736,grad_norm: 0.8968753532960791, iteration: 385443
loss: 1.128725528717041,grad_norm: 0.9999991019917448, iteration: 385444
loss: 0.9956808686256409,grad_norm: 0.7119337176612222, iteration: 385445
loss: 0.9705536365509033,grad_norm: 0.6873790536716456, iteration: 385446
loss: 0.9917675256729126,grad_norm: 0.7937701891878107, iteration: 385447
loss: 0.9957504272460938,grad_norm: 0.7764710316895421, iteration: 385448
loss: 0.9809472560882568,grad_norm: 0.69055085906188, iteration: 385449
loss: 1.021999716758728,grad_norm: 0.7876505711361665, iteration: 385450
loss: 1.0086570978164673,grad_norm: 0.6791887859533697, iteration: 385451
loss: 0.9765130877494812,grad_norm: 0.6421277906287192, iteration: 385452
loss: 1.0056538581848145,grad_norm: 0.9611641364667541, iteration: 385453
loss: 1.0114322900772095,grad_norm: 0.9141705421551792, iteration: 385454
loss: 1.0090560913085938,grad_norm: 0.9999990418352812, iteration: 385455
loss: 1.0764585733413696,grad_norm: 0.9999995383604775, iteration: 385456
loss: 1.0289162397384644,grad_norm: 0.7094780323629801, iteration: 385457
loss: 0.9938017725944519,grad_norm: 0.8162360503812758, iteration: 385458
loss: 0.9800832271575928,grad_norm: 0.8917551306229674, iteration: 385459
loss: 1.0032941102981567,grad_norm: 0.6897118062000078, iteration: 385460
loss: 0.9566173553466797,grad_norm: 0.660667701638476, iteration: 385461
loss: 1.0065443515777588,grad_norm: 0.8166368668390874, iteration: 385462
loss: 1.0046147108078003,grad_norm: 0.6720604586286166, iteration: 385463
loss: 1.0412745475769043,grad_norm: 0.9999998277314328, iteration: 385464
loss: 0.9879845380783081,grad_norm: 0.9999991435936979, iteration: 385465
loss: 1.0460667610168457,grad_norm: 0.9999991781620378, iteration: 385466
loss: 1.10842764377594,grad_norm: 0.9999991668677131, iteration: 385467
loss: 0.964695394039154,grad_norm: 0.9702815203092314, iteration: 385468
loss: 0.9997524619102478,grad_norm: 0.7080548487175252, iteration: 385469
loss: 1.1322649717330933,grad_norm: 0.9091551223557549, iteration: 385470
loss: 1.0273150205612183,grad_norm: 0.7392688371766697, iteration: 385471
loss: 1.0148935317993164,grad_norm: 0.792801795136867, iteration: 385472
loss: 0.9846223592758179,grad_norm: 0.7417208910404564, iteration: 385473
loss: 1.0152336359024048,grad_norm: 0.7393538399837379, iteration: 385474
loss: 1.043348789215088,grad_norm: 0.9999991079650714, iteration: 385475
loss: 1.0032851696014404,grad_norm: 0.8098387120196117, iteration: 385476
loss: 1.0354689359664917,grad_norm: 0.9999991167360943, iteration: 385477
loss: 0.9932560920715332,grad_norm: 0.7940845875653678, iteration: 385478
loss: 1.0103049278259277,grad_norm: 0.8123283150113649, iteration: 385479
loss: 0.9942566156387329,grad_norm: 0.7548347302265442, iteration: 385480
loss: 1.0544685125350952,grad_norm: 0.894528115637777, iteration: 385481
loss: 1.0614213943481445,grad_norm: 0.9999992900174672, iteration: 385482
loss: 0.9733632802963257,grad_norm: 0.9203709942008345, iteration: 385483
loss: 1.0709915161132812,grad_norm: 0.9999991199602523, iteration: 385484
loss: 1.021890640258789,grad_norm: 0.7081977425803015, iteration: 385485
loss: 1.0064654350280762,grad_norm: 0.9999995672703536, iteration: 385486
loss: 1.089046597480774,grad_norm: 0.9999996666793559, iteration: 385487
loss: 1.0111647844314575,grad_norm: 0.7532460919280055, iteration: 385488
loss: 0.9489684104919434,grad_norm: 0.7859754342585235, iteration: 385489
loss: 0.97801673412323,grad_norm: 0.7755192289808285, iteration: 385490
loss: 1.023231029510498,grad_norm: 0.7300066823311754, iteration: 385491
loss: 0.9905260801315308,grad_norm: 0.7843364200562971, iteration: 385492
loss: 0.9961923956871033,grad_norm: 0.9999996085174888, iteration: 385493
loss: 1.0745277404785156,grad_norm: 0.8134601586025523, iteration: 385494
loss: 1.029150366783142,grad_norm: 0.9069494978288263, iteration: 385495
loss: 1.0669760704040527,grad_norm: 0.8509638901876415, iteration: 385496
loss: 1.0144109725952148,grad_norm: 0.7872823877273097, iteration: 385497
loss: 1.007092833518982,grad_norm: 0.99999988009605, iteration: 385498
loss: 0.9916001558303833,grad_norm: 0.7771671278726509, iteration: 385499
loss: 0.9435427784919739,grad_norm: 0.9999991784375875, iteration: 385500
loss: 0.9792509078979492,grad_norm: 0.8151217595279078, iteration: 385501
loss: 1.0907725095748901,grad_norm: 0.9999996942257089, iteration: 385502
loss: 0.9986820220947266,grad_norm: 0.9966752777134429, iteration: 385503
loss: 1.0233283042907715,grad_norm: 0.7918209385220469, iteration: 385504
loss: 0.9988197684288025,grad_norm: 0.8211771647668491, iteration: 385505
loss: 1.0196969509124756,grad_norm: 0.8367717414548006, iteration: 385506
loss: 1.018433690071106,grad_norm: 0.9803785777856995, iteration: 385507
loss: 1.0445616245269775,grad_norm: 0.9999996174403109, iteration: 385508
loss: 1.0020380020141602,grad_norm: 0.8762593759938694, iteration: 385509
loss: 0.9939947128295898,grad_norm: 0.7610214501441783, iteration: 385510
loss: 1.0603034496307373,grad_norm: 0.999999615737508, iteration: 385511
loss: 1.0523738861083984,grad_norm: 0.9999991259736697, iteration: 385512
loss: 1.0040740966796875,grad_norm: 0.6803488355939351, iteration: 385513
loss: 0.9865533113479614,grad_norm: 0.7604326378284751, iteration: 385514
loss: 1.0245981216430664,grad_norm: 0.7338840375592178, iteration: 385515
loss: 1.0794157981872559,grad_norm: 0.9500193573980248, iteration: 385516
loss: 0.9924177527427673,grad_norm: 0.923003331703211, iteration: 385517
loss: 0.9552034139633179,grad_norm: 0.8143789666523648, iteration: 385518
loss: 1.046897053718567,grad_norm: 0.9999999430356076, iteration: 385519
loss: 0.9933739304542542,grad_norm: 0.9999992282713964, iteration: 385520
loss: 1.0860320329666138,grad_norm: 0.9999999300825342, iteration: 385521
loss: 0.9990225434303284,grad_norm: 0.8902207696378729, iteration: 385522
loss: 1.0582424402236938,grad_norm: 0.9999992305876207, iteration: 385523
loss: 1.0194307565689087,grad_norm: 0.9999990590670443, iteration: 385524
loss: 0.9613181948661804,grad_norm: 0.8166080344383729, iteration: 385525
loss: 0.9668213725090027,grad_norm: 0.6007360574480155, iteration: 385526
loss: 1.0044602155685425,grad_norm: 0.8191700708029885, iteration: 385527
loss: 0.9691336750984192,grad_norm: 0.7335959742580476, iteration: 385528
loss: 1.0365148782730103,grad_norm: 0.9200497094334293, iteration: 385529
loss: 1.0406521558761597,grad_norm: 1.0000000603418873, iteration: 385530
loss: 1.0351085662841797,grad_norm: 0.9999998296646604, iteration: 385531
loss: 0.9734658002853394,grad_norm: 0.9825809926740637, iteration: 385532
loss: 1.0161402225494385,grad_norm: 0.8509136032695264, iteration: 385533
loss: 1.1149535179138184,grad_norm: 0.969594423928615, iteration: 385534
loss: 1.0374979972839355,grad_norm: 0.773340549563489, iteration: 385535
loss: 0.9554795026779175,grad_norm: 0.9999989923601253, iteration: 385536
loss: 1.0207626819610596,grad_norm: 0.8729414665398331, iteration: 385537
loss: 1.0377088785171509,grad_norm: 0.9999993851009326, iteration: 385538
loss: 1.0089675188064575,grad_norm: 0.9538847564785198, iteration: 385539
loss: 1.005171298980713,grad_norm: 0.9999993837120039, iteration: 385540
loss: 0.9803828001022339,grad_norm: 0.8255480080198828, iteration: 385541
loss: 1.0362837314605713,grad_norm: 0.8158552041696757, iteration: 385542
loss: 1.0370895862579346,grad_norm: 0.7037164660306251, iteration: 385543
loss: 1.043389081954956,grad_norm: 0.9719764311377036, iteration: 385544
loss: 0.9926316738128662,grad_norm: 0.849478313318061, iteration: 385545
loss: 1.105007290840149,grad_norm: 0.9999991895335502, iteration: 385546
loss: 1.0105154514312744,grad_norm: 0.8661532822822932, iteration: 385547
loss: 1.0400503873825073,grad_norm: 0.776292847055385, iteration: 385548
loss: 1.0315048694610596,grad_norm: 0.8162201152830849, iteration: 385549
loss: 1.0514525175094604,grad_norm: 0.8623059964124752, iteration: 385550
loss: 1.0098068714141846,grad_norm: 0.6780859155073622, iteration: 385551
loss: 0.9443021416664124,grad_norm: 0.7513173525920169, iteration: 385552
loss: 1.0436768531799316,grad_norm: 0.7091553082929984, iteration: 385553
loss: 0.9915834069252014,grad_norm: 0.7032381286901132, iteration: 385554
loss: 1.0097123384475708,grad_norm: 0.9999996996235279, iteration: 385555
loss: 1.0205131769180298,grad_norm: 0.8077977978549218, iteration: 385556
loss: 1.0395524501800537,grad_norm: 0.7735927102919298, iteration: 385557
loss: 1.0101709365844727,grad_norm: 0.8650083437595613, iteration: 385558
loss: 1.0102746486663818,grad_norm: 0.9227660013763241, iteration: 385559
loss: 0.9571425318717957,grad_norm: 0.7502379532792096, iteration: 385560
loss: 1.0552955865859985,grad_norm: 0.9446353981129376, iteration: 385561
loss: 1.0114496946334839,grad_norm: 0.7743065417826308, iteration: 385562
loss: 1.0015630722045898,grad_norm: 0.7612850002777362, iteration: 385563
loss: 1.005785346031189,grad_norm: 0.8647325323234364, iteration: 385564
loss: 0.9956319332122803,grad_norm: 0.8328806542965471, iteration: 385565
loss: 0.995079517364502,grad_norm: 0.9999990948191658, iteration: 385566
loss: 0.9783003330230713,grad_norm: 0.797267099970859, iteration: 385567
loss: 0.9979553818702698,grad_norm: 0.7808755601736018, iteration: 385568
loss: 0.9577306509017944,grad_norm: 0.8762672219771466, iteration: 385569
loss: 1.027250051498413,grad_norm: 0.8367175733113391, iteration: 385570
loss: 1.0289875268936157,grad_norm: 0.8972431117488275, iteration: 385571
loss: 1.0234870910644531,grad_norm: 0.9565288138662903, iteration: 385572
loss: 0.9853147268295288,grad_norm: 0.7266009982559043, iteration: 385573
loss: 0.9705211520195007,grad_norm: 0.8526263764803215, iteration: 385574
loss: 1.0254703760147095,grad_norm: 0.9999998490696705, iteration: 385575
loss: 0.972449541091919,grad_norm: 0.9419238204290696, iteration: 385576
loss: 1.013606071472168,grad_norm: 0.8625351165039579, iteration: 385577
loss: 0.977542519569397,grad_norm: 0.9999990580606003, iteration: 385578
loss: 1.0245048999786377,grad_norm: 0.9999990007287984, iteration: 385579
loss: 0.9871732592582703,grad_norm: 0.6238148808240077, iteration: 385580
loss: 0.9968885779380798,grad_norm: 0.999999287852912, iteration: 385581
loss: 1.0430448055267334,grad_norm: 0.6203217381247231, iteration: 385582
loss: 1.0580145120620728,grad_norm: 0.8955559301637525, iteration: 385583
loss: 1.025835394859314,grad_norm: 0.9999990172967173, iteration: 385584
loss: 0.943899393081665,grad_norm: 0.9999997943921733, iteration: 385585
loss: 1.0183249711990356,grad_norm: 0.7356129483990826, iteration: 385586
loss: 1.020340085029602,grad_norm: 0.7668232966285367, iteration: 385587
loss: 1.1017316579818726,grad_norm: 0.999999935107703, iteration: 385588
loss: 1.0841004848480225,grad_norm: 0.9999996449047961, iteration: 385589
loss: 1.0272071361541748,grad_norm: 0.6780259775212087, iteration: 385590
loss: 1.0146448612213135,grad_norm: 0.6987803780149948, iteration: 385591
loss: 1.0330463647842407,grad_norm: 0.9452151232537539, iteration: 385592
loss: 1.184627890586853,grad_norm: 0.9686524161038716, iteration: 385593
loss: 1.0144845247268677,grad_norm: 0.8881691315038049, iteration: 385594
loss: 1.0019092559814453,grad_norm: 0.7292461891379164, iteration: 385595
loss: 1.0520985126495361,grad_norm: 0.975903184581337, iteration: 385596
loss: 1.0019245147705078,grad_norm: 0.9999989874941908, iteration: 385597
loss: 1.0031651258468628,grad_norm: 0.733274373248546, iteration: 385598
loss: 0.9962867498397827,grad_norm: 0.9999996588977648, iteration: 385599
loss: 1.064609169960022,grad_norm: 0.9999992190921067, iteration: 385600
loss: 1.0738648176193237,grad_norm: 0.9999996984542139, iteration: 385601
loss: 1.0809506177902222,grad_norm: 0.9999993101936021, iteration: 385602
loss: 0.9797651767730713,grad_norm: 0.7159266728688991, iteration: 385603
loss: 0.9984474182128906,grad_norm: 0.7263452546217265, iteration: 385604
loss: 1.0473194122314453,grad_norm: 0.9999995159557061, iteration: 385605
loss: 1.0191892385482788,grad_norm: 0.957276258174615, iteration: 385606
loss: 0.9884986281394958,grad_norm: 0.8482635375398693, iteration: 385607
loss: 0.9947490692138672,grad_norm: 0.9999991933245979, iteration: 385608
loss: 0.9827163815498352,grad_norm: 0.880544139610265, iteration: 385609
loss: 0.9964659810066223,grad_norm: 0.7443308987170696, iteration: 385610
loss: 0.9632647633552551,grad_norm: 0.8399268750040185, iteration: 385611
loss: 0.9747390151023865,grad_norm: 0.8518233028516107, iteration: 385612
loss: 0.9933680295944214,grad_norm: 0.7984014554612073, iteration: 385613
loss: 0.9865228533744812,grad_norm: 0.9999994211015936, iteration: 385614
loss: 0.9984691143035889,grad_norm: 0.7722093673200359, iteration: 385615
loss: 1.0307962894439697,grad_norm: 0.702393444474197, iteration: 385616
loss: 0.9900648593902588,grad_norm: 0.686325502249115, iteration: 385617
loss: 1.0313719511032104,grad_norm: 0.7541332045363315, iteration: 385618
loss: 0.9932980537414551,grad_norm: 0.999999119833875, iteration: 385619
loss: 1.0016580820083618,grad_norm: 0.9999994517381064, iteration: 385620
loss: 1.0233467817306519,grad_norm: 0.7123898048817978, iteration: 385621
loss: 1.0316067934036255,grad_norm: 0.999999303941756, iteration: 385622
loss: 1.0038909912109375,grad_norm: 0.7849301374273938, iteration: 385623
loss: 0.9999858736991882,grad_norm: 0.8976574383831354, iteration: 385624
loss: 1.031766653060913,grad_norm: 0.9999992479870641, iteration: 385625
loss: 0.9991695880889893,grad_norm: 0.9999993831382527, iteration: 385626
loss: 1.0672036409378052,grad_norm: 0.7347428525022424, iteration: 385627
loss: 1.0187411308288574,grad_norm: 0.783629044014089, iteration: 385628
loss: 1.0165995359420776,grad_norm: 0.9999992642105248, iteration: 385629
loss: 1.0885964632034302,grad_norm: 0.6844581328080347, iteration: 385630
loss: 0.9782731533050537,grad_norm: 0.9127511174254347, iteration: 385631
loss: 1.029070496559143,grad_norm: 0.8351802654543097, iteration: 385632
loss: 1.1898008584976196,grad_norm: 0.9999999155233834, iteration: 385633
loss: 1.0025105476379395,grad_norm: 0.8460276448196802, iteration: 385634
loss: 0.9686890244483948,grad_norm: 0.6890488790905474, iteration: 385635
loss: 0.9844838380813599,grad_norm: 0.7742917346965483, iteration: 385636
loss: 1.0008509159088135,grad_norm: 0.7131508760789544, iteration: 385637
loss: 0.9834147691726685,grad_norm: 0.7614225919666059, iteration: 385638
loss: 1.0104897022247314,grad_norm: 0.8504640861954588, iteration: 385639
loss: 1.027335524559021,grad_norm: 0.9182166963968407, iteration: 385640
loss: 1.0127511024475098,grad_norm: 0.6727193572402347, iteration: 385641
loss: 1.0703178644180298,grad_norm: 0.9999990884602199, iteration: 385642
loss: 1.18998122215271,grad_norm: 0.9999996183748089, iteration: 385643
loss: 0.9881969690322876,grad_norm: 0.9196383817873696, iteration: 385644
loss: 1.050981044769287,grad_norm: 0.8117824364523919, iteration: 385645
loss: 1.0013577938079834,grad_norm: 0.8859132782008527, iteration: 385646
loss: 0.978930652141571,grad_norm: 0.8403815233846146, iteration: 385647
loss: 0.9829961657524109,grad_norm: 0.9544108321838652, iteration: 385648
loss: 1.0000596046447754,grad_norm: 0.7265434439433553, iteration: 385649
loss: 1.0021519660949707,grad_norm: 0.7586373544903586, iteration: 385650
loss: 1.0491604804992676,grad_norm: 0.9910959270246432, iteration: 385651
loss: 1.0353431701660156,grad_norm: 0.891820842121806, iteration: 385652
loss: 1.0050463676452637,grad_norm: 0.7794307714224161, iteration: 385653
loss: 1.0167300701141357,grad_norm: 0.7756626060476566, iteration: 385654
loss: 0.9987895488739014,grad_norm: 0.9920947169793932, iteration: 385655
loss: 1.029126763343811,grad_norm: 0.957703028423657, iteration: 385656
loss: 1.1142027378082275,grad_norm: 0.8189891361140923, iteration: 385657
loss: 1.0065789222717285,grad_norm: 0.7635824934766585, iteration: 385658
loss: 0.9981438517570496,grad_norm: 0.7454101108944109, iteration: 385659
loss: 1.0003955364227295,grad_norm: 0.9098695727522774, iteration: 385660
loss: 0.9916679263114929,grad_norm: 0.7453415970680191, iteration: 385661
loss: 1.034778118133545,grad_norm: 0.9999991370260634, iteration: 385662
loss: 0.9801559448242188,grad_norm: 0.7492071358958418, iteration: 385663
loss: 0.9999874234199524,grad_norm: 0.7718880978153798, iteration: 385664
loss: 1.066789984703064,grad_norm: 0.90038321881531, iteration: 385665
loss: 0.9780327081680298,grad_norm: 0.7247477988473099, iteration: 385666
loss: 1.0322564840316772,grad_norm: 0.7971952356328462, iteration: 385667
loss: 1.0307605266571045,grad_norm: 0.7592527345805796, iteration: 385668
loss: 1.0205107927322388,grad_norm: 0.9999993663726212, iteration: 385669
loss: 1.02178955078125,grad_norm: 0.9999998695488688, iteration: 385670
loss: 1.0788055658340454,grad_norm: 0.7916478229212766, iteration: 385671
loss: 1.0455102920532227,grad_norm: 0.8605114076212474, iteration: 385672
loss: 0.9937594532966614,grad_norm: 0.7654712338719226, iteration: 385673
loss: 0.9948213696479797,grad_norm: 0.7886487194282115, iteration: 385674
loss: 1.0671063661575317,grad_norm: 0.9143125707577293, iteration: 385675
loss: 0.9836642146110535,grad_norm: 0.6479829846016696, iteration: 385676
loss: 1.0010876655578613,grad_norm: 0.7550672791840523, iteration: 385677
loss: 0.9703323841094971,grad_norm: 0.751685029103124, iteration: 385678
loss: 1.0086150169372559,grad_norm: 0.7746300113026385, iteration: 385679
loss: 1.0151134729385376,grad_norm: 1.000000042885255, iteration: 385680
loss: 0.9893155694007874,grad_norm: 0.8440234191766912, iteration: 385681
loss: 1.038053274154663,grad_norm: 0.7995754826596188, iteration: 385682
loss: 0.9989154934883118,grad_norm: 0.744405712242927, iteration: 385683
loss: 0.9869617223739624,grad_norm: 0.7442749120088187, iteration: 385684
loss: 0.983411431312561,grad_norm: 0.7597317244990048, iteration: 385685
loss: 1.0201702117919922,grad_norm: 0.8634139974012562, iteration: 385686
loss: 0.9789054989814758,grad_norm: 0.9999991348776651, iteration: 385687
loss: 0.9939897060394287,grad_norm: 0.6909097287108141, iteration: 385688
loss: 1.1002821922302246,grad_norm: 0.8765305931756671, iteration: 385689
loss: 0.9803884625434875,grad_norm: 0.9456427788586466, iteration: 385690
loss: 1.021239161491394,grad_norm: 0.7795054748913527, iteration: 385691
loss: 0.9901143312454224,grad_norm: 0.7075097913575329, iteration: 385692
loss: 1.053048014640808,grad_norm: 0.9999998846938118, iteration: 385693
loss: 0.993476927280426,grad_norm: 0.8752765735707589, iteration: 385694
loss: 0.9900932908058167,grad_norm: 0.8245024491154295, iteration: 385695
loss: 1.0128949880599976,grad_norm: 0.7110985443701259, iteration: 385696
loss: 1.0137213468551636,grad_norm: 0.9580872154227117, iteration: 385697
loss: 0.9963312745094299,grad_norm: 0.9873752605834778, iteration: 385698
loss: 0.992433488368988,grad_norm: 0.7543269679094634, iteration: 385699
loss: 0.992917001247406,grad_norm: 0.7051772233822805, iteration: 385700
loss: 1.0052509307861328,grad_norm: 0.9999990758878828, iteration: 385701
loss: 1.0042519569396973,grad_norm: 0.7351732481862926, iteration: 385702
loss: 1.024246335029602,grad_norm: 0.6616800346422487, iteration: 385703
loss: 1.0696736574172974,grad_norm: 0.9999991417497814, iteration: 385704
loss: 0.9957979917526245,grad_norm: 0.8872849720297327, iteration: 385705
loss: 0.9666001200675964,grad_norm: 0.9425203289625499, iteration: 385706
loss: 0.9847328066825867,grad_norm: 0.8333511479713094, iteration: 385707
loss: 1.0183664560317993,grad_norm: 0.9999992610115865, iteration: 385708
loss: 1.0050832033157349,grad_norm: 0.8911422998861899, iteration: 385709
loss: 1.015448808670044,grad_norm: 0.9999991468756071, iteration: 385710
loss: 1.0128624439239502,grad_norm: 0.8417517937884128, iteration: 385711
loss: 1.0138791799545288,grad_norm: 0.9350620105751857, iteration: 385712
loss: 0.9833696484565735,grad_norm: 0.9226455357603944, iteration: 385713
loss: 0.9714319705963135,grad_norm: 0.7843653920712544, iteration: 385714
loss: 0.997825562953949,grad_norm: 0.7177453850808128, iteration: 385715
loss: 0.9991108775138855,grad_norm: 0.8500384178813941, iteration: 385716
loss: 0.9706753492355347,grad_norm: 0.7828553902867821, iteration: 385717
loss: 0.992174506187439,grad_norm: 0.7722249624209995, iteration: 385718
loss: 0.9929616451263428,grad_norm: 0.7980853703156607, iteration: 385719
loss: 1.0687227249145508,grad_norm: 0.9999990636337405, iteration: 385720
loss: 1.065545678138733,grad_norm: 0.9999996930163875, iteration: 385721
loss: 0.9962544441223145,grad_norm: 0.7525592357256823, iteration: 385722
loss: 1.0414785146713257,grad_norm: 0.9999991096840114, iteration: 385723
loss: 0.9920949339866638,grad_norm: 0.8973819484985517, iteration: 385724
loss: 1.0183730125427246,grad_norm: 0.7041918303110527, iteration: 385725
loss: 1.0146985054016113,grad_norm: 0.9999993570681557, iteration: 385726
loss: 0.9759851098060608,grad_norm: 0.7373160562155399, iteration: 385727
loss: 1.0432004928588867,grad_norm: 0.9999993847159251, iteration: 385728
loss: 0.9979934096336365,grad_norm: 0.7801324496882232, iteration: 385729
loss: 1.0230069160461426,grad_norm: 0.8975374109414361, iteration: 385730
loss: 1.0036903619766235,grad_norm: 0.8234490445201028, iteration: 385731
loss: 1.0318323373794556,grad_norm: 0.8083432952343748, iteration: 385732
loss: 1.0293821096420288,grad_norm: 0.9999992349739001, iteration: 385733
loss: 0.9788016080856323,grad_norm: 0.7794445103319555, iteration: 385734
loss: 1.0009944438934326,grad_norm: 0.885037686865626, iteration: 385735
loss: 0.9894658327102661,grad_norm: 0.7230930911277977, iteration: 385736
loss: 0.987955629825592,grad_norm: 0.7517952246057127, iteration: 385737
loss: 1.0116139650344849,grad_norm: 0.7409237543243433, iteration: 385738
loss: 1.010693073272705,grad_norm: 0.84153942085719, iteration: 385739
loss: 1.0096038579940796,grad_norm: 0.9999998235022584, iteration: 385740
loss: 0.979522705078125,grad_norm: 0.809980668087844, iteration: 385741
loss: 0.9699522256851196,grad_norm: 0.7127369115071435, iteration: 385742
loss: 1.0771857500076294,grad_norm: 0.9999992679990969, iteration: 385743
loss: 0.9688538908958435,grad_norm: 0.7400963069841402, iteration: 385744
loss: 1.0295939445495605,grad_norm: 0.8258753327243908, iteration: 385745
loss: 0.9735355973243713,grad_norm: 0.7522769581685269, iteration: 385746
loss: 0.9891543984413147,grad_norm: 0.7535886930051641, iteration: 385747
loss: 1.0128310918807983,grad_norm: 0.9635839814529462, iteration: 385748
loss: 1.0197701454162598,grad_norm: 0.8159333324419719, iteration: 385749
loss: 1.015710473060608,grad_norm: 0.8500636453140193, iteration: 385750
loss: 0.9858603477478027,grad_norm: 0.7665369136927629, iteration: 385751
loss: 1.0169609785079956,grad_norm: 0.9485257159037814, iteration: 385752
loss: 0.9923865795135498,grad_norm: 0.6206122437801903, iteration: 385753
loss: 1.0660544633865356,grad_norm: 0.9999997012653608, iteration: 385754
loss: 1.026352047920227,grad_norm: 0.9999992199697371, iteration: 385755
loss: 1.0463043451309204,grad_norm: 0.9999997293997219, iteration: 385756
loss: 0.9620233178138733,grad_norm: 0.7119592191845008, iteration: 385757
loss: 1.023447871208191,grad_norm: 0.8728309199997024, iteration: 385758
loss: 1.0026122331619263,grad_norm: 0.7485049004026432, iteration: 385759
loss: 1.016132116317749,grad_norm: 0.7572459112857401, iteration: 385760
loss: 1.046301007270813,grad_norm: 0.8583714153592179, iteration: 385761
loss: 0.9545931816101074,grad_norm: 0.6376230537836608, iteration: 385762
loss: 1.0014896392822266,grad_norm: 0.8482544379157714, iteration: 385763
loss: 0.9993493556976318,grad_norm: 0.8028737379210734, iteration: 385764
loss: 0.9826726317405701,grad_norm: 0.9314266463230165, iteration: 385765
loss: 0.9711155295372009,grad_norm: 0.80150959367481, iteration: 385766
loss: 0.9694563746452332,grad_norm: 0.7917507894432145, iteration: 385767
loss: 0.9927012324333191,grad_norm: 0.9999991502145047, iteration: 385768
loss: 0.973222017288208,grad_norm: 0.6731732494486027, iteration: 385769
loss: 1.0449944734573364,grad_norm: 0.9269526725196685, iteration: 385770
loss: 1.0102298259735107,grad_norm: 0.9999996150017371, iteration: 385771
loss: 0.9735550284385681,grad_norm: 0.8186048996993007, iteration: 385772
loss: 1.0740172863006592,grad_norm: 0.9999993574860568, iteration: 385773
loss: 1.0165841579437256,grad_norm: 0.999999878081259, iteration: 385774
loss: 1.02348792552948,grad_norm: 0.9999994424274898, iteration: 385775
loss: 1.0166634321212769,grad_norm: 0.8652860025269934, iteration: 385776
loss: 1.018088459968567,grad_norm: 0.8954138853182704, iteration: 385777
loss: 1.0148388147354126,grad_norm: 0.8698027303452105, iteration: 385778
loss: 0.9916383624076843,grad_norm: 0.80079585957948, iteration: 385779
loss: 0.9960244297981262,grad_norm: 0.9999990774354087, iteration: 385780
loss: 0.996605634689331,grad_norm: 0.9999991256914691, iteration: 385781
loss: 1.0586143732070923,grad_norm: 0.9999990315724185, iteration: 385782
loss: 0.9971498250961304,grad_norm: 0.8796608884800011, iteration: 385783
loss: 1.0212258100509644,grad_norm: 0.8054104630243814, iteration: 385784
loss: 1.0164028406143188,grad_norm: 0.9272378708861874, iteration: 385785
loss: 1.0229474306106567,grad_norm: 0.9999992636485923, iteration: 385786
loss: 0.9771293997764587,grad_norm: 0.7359608311192603, iteration: 385787
loss: 0.9806036949157715,grad_norm: 0.9999992768813764, iteration: 385788
loss: 0.9748259782791138,grad_norm: 0.6443794921041515, iteration: 385789
loss: 1.0402500629425049,grad_norm: 0.8656928945591214, iteration: 385790
loss: 1.0055660009384155,grad_norm: 0.9999993980249596, iteration: 385791
loss: 1.0603837966918945,grad_norm: 0.9098506379796764, iteration: 385792
loss: 1.0306954383850098,grad_norm: 0.7586987988607353, iteration: 385793
loss: 1.0172953605651855,grad_norm: 0.866972804851831, iteration: 385794
loss: 1.027419090270996,grad_norm: 0.9999993352994204, iteration: 385795
loss: 1.0417782068252563,grad_norm: 0.8800822376071739, iteration: 385796
loss: 1.0006705522537231,grad_norm: 0.8269613855794625, iteration: 385797
loss: 1.0951097011566162,grad_norm: 0.9999992771745299, iteration: 385798
loss: 1.0178027153015137,grad_norm: 0.9999990721587121, iteration: 385799
loss: 1.0123440027236938,grad_norm: 0.8515627338974342, iteration: 385800
loss: 0.9753814935684204,grad_norm: 0.7313466691999819, iteration: 385801
loss: 1.0192303657531738,grad_norm: 0.9999998644331191, iteration: 385802
loss: 1.011128306388855,grad_norm: 0.9999991921550573, iteration: 385803
loss: 1.004403829574585,grad_norm: 0.8643038778716134, iteration: 385804
loss: 0.9525983333587646,grad_norm: 0.7121404813387929, iteration: 385805
loss: 0.9573218822479248,grad_norm: 0.9511694497620404, iteration: 385806
loss: 1.0019526481628418,grad_norm: 0.8017714499065258, iteration: 385807
loss: 0.9645406007766724,grad_norm: 0.9999992710710072, iteration: 385808
loss: 1.023726463317871,grad_norm: 0.719039330419369, iteration: 385809
loss: 0.9616747498512268,grad_norm: 0.8482503942587143, iteration: 385810
loss: 1.0500261783599854,grad_norm: 0.7599915360442099, iteration: 385811
loss: 0.9972900748252869,grad_norm: 0.742993311776252, iteration: 385812
loss: 0.9836034774780273,grad_norm: 0.9132996987470922, iteration: 385813
loss: 1.0284247398376465,grad_norm: 0.9163352108962637, iteration: 385814
loss: 0.9942910075187683,grad_norm: 0.6131140524011189, iteration: 385815
loss: 1.0334111452102661,grad_norm: 0.6601900298212263, iteration: 385816
loss: 0.9703842997550964,grad_norm: 0.8562136602599222, iteration: 385817
loss: 1.130550742149353,grad_norm: 1.000000029859834, iteration: 385818
loss: 1.0267717838287354,grad_norm: 0.780029400776026, iteration: 385819
loss: 1.0475773811340332,grad_norm: 0.9999994997967567, iteration: 385820
loss: 0.9878478050231934,grad_norm: 0.8596555568118183, iteration: 385821
loss: 1.1142226457595825,grad_norm: 0.9999999740861302, iteration: 385822
loss: 1.0035927295684814,grad_norm: 0.9999991797704447, iteration: 385823
loss: 0.9489664435386658,grad_norm: 0.8365828362714683, iteration: 385824
loss: 0.9874988794326782,grad_norm: 0.7103844488305233, iteration: 385825
loss: 1.0090835094451904,grad_norm: 0.9374297083984454, iteration: 385826
loss: 1.0214685201644897,grad_norm: 0.7973842413379819, iteration: 385827
loss: 0.9697601199150085,grad_norm: 0.7333953037725366, iteration: 385828
loss: 0.9552388191223145,grad_norm: 0.8160781572495807, iteration: 385829
loss: 1.0520871877670288,grad_norm: 0.99999965793164, iteration: 385830
loss: 1.0730385780334473,grad_norm: 0.9999993129737567, iteration: 385831
loss: 1.0238666534423828,grad_norm: 0.9999998622928709, iteration: 385832
loss: 1.0340136289596558,grad_norm: 0.8524586065025979, iteration: 385833
loss: 1.009709119796753,grad_norm: 0.9999990426621049, iteration: 385834
loss: 0.9991030097007751,grad_norm: 0.8485950260139398, iteration: 385835
loss: 1.1069847345352173,grad_norm: 0.9999997414058878, iteration: 385836
loss: 1.0093016624450684,grad_norm: 0.9999990172689809, iteration: 385837
loss: 1.0796129703521729,grad_norm: 0.9999991930007204, iteration: 385838
loss: 0.9998239874839783,grad_norm: 0.6740173845828492, iteration: 385839
loss: 1.0232765674591064,grad_norm: 0.9232322278267695, iteration: 385840
loss: 1.01547110080719,grad_norm: 0.8337500665217203, iteration: 385841
loss: 1.0205988883972168,grad_norm: 0.681948976943273, iteration: 385842
loss: 0.9946048855781555,grad_norm: 0.8007652305564364, iteration: 385843
loss: 0.9971631169319153,grad_norm: 0.7490912456460176, iteration: 385844
loss: 1.0292543172836304,grad_norm: 0.9225233085062301, iteration: 385845
loss: 1.021478533744812,grad_norm: 0.9999999220167182, iteration: 385846
loss: 1.005499005317688,grad_norm: 0.8976702892029469, iteration: 385847
loss: 1.0499666929244995,grad_norm: 0.9999991804168663, iteration: 385848
loss: 1.0029767751693726,grad_norm: 0.83768527882164, iteration: 385849
loss: 0.9914536476135254,grad_norm: 0.9228134677780842, iteration: 385850
loss: 1.0975534915924072,grad_norm: 1.0000001279888417, iteration: 385851
loss: 0.9877433776855469,grad_norm: 0.9999991494517427, iteration: 385852
loss: 0.9951748251914978,grad_norm: 0.9057626074635091, iteration: 385853
loss: 1.0176979303359985,grad_norm: 0.9999990478356239, iteration: 385854
loss: 1.0861736536026,grad_norm: 0.9999998771178004, iteration: 385855
loss: 1.1054075956344604,grad_norm: 0.8144480019335801, iteration: 385856
loss: 1.019674301147461,grad_norm: 0.7385318972496988, iteration: 385857
loss: 1.008195161819458,grad_norm: 0.8722788853478107, iteration: 385858
loss: 1.0062072277069092,grad_norm: 0.7798949274873801, iteration: 385859
loss: 1.0058012008666992,grad_norm: 0.9999996340441746, iteration: 385860
loss: 1.004043459892273,grad_norm: 0.9999998748061347, iteration: 385861
loss: 1.0469285249710083,grad_norm: 0.9999999989560965, iteration: 385862
loss: 1.0460526943206787,grad_norm: 0.7819549327151812, iteration: 385863
loss: 0.964388906955719,grad_norm: 0.6270659007134903, iteration: 385864
loss: 0.9862096905708313,grad_norm: 0.5768519499438309, iteration: 385865
loss: 0.9916197061538696,grad_norm: 0.7759637649518424, iteration: 385866
loss: 1.079874038696289,grad_norm: 0.7772268870523585, iteration: 385867
loss: 1.191019892692566,grad_norm: 0.9999990374818786, iteration: 385868
loss: 1.000656247138977,grad_norm: 0.9947300680685196, iteration: 385869
loss: 1.0284550189971924,grad_norm: 0.7541370514943576, iteration: 385870
loss: 0.9962124228477478,grad_norm: 0.9999991828018725, iteration: 385871
loss: 0.9865550994873047,grad_norm: 0.7772566495899431, iteration: 385872
loss: 0.9975911974906921,grad_norm: 0.6736052355549027, iteration: 385873
loss: 0.9907190203666687,grad_norm: 0.7081134489566024, iteration: 385874
loss: 1.0034798383712769,grad_norm: 0.8997462597993496, iteration: 385875
loss: 1.1173734664916992,grad_norm: 0.9999996180836402, iteration: 385876
loss: 1.04581880569458,grad_norm: 0.7333449764439414, iteration: 385877
loss: 1.0325074195861816,grad_norm: 0.7318417451688176, iteration: 385878
loss: 1.0483708381652832,grad_norm: 0.7428276778255836, iteration: 385879
loss: 1.0003286600112915,grad_norm: 0.9096515967431265, iteration: 385880
loss: 0.9823998808860779,grad_norm: 0.9552098090466749, iteration: 385881
loss: 0.9951032996177673,grad_norm: 0.8812507124839044, iteration: 385882
loss: 0.9852604866027832,grad_norm: 0.9999991668785703, iteration: 385883
loss: 1.0139542818069458,grad_norm: 0.7522595083939037, iteration: 385884
loss: 1.0428600311279297,grad_norm: 0.768650647928575, iteration: 385885
loss: 0.9924031496047974,grad_norm: 0.800678656500686, iteration: 385886
loss: 0.9770557880401611,grad_norm: 0.720631370321292, iteration: 385887
loss: 1.1562330722808838,grad_norm: 0.9999994063635445, iteration: 385888
loss: 0.9966602921485901,grad_norm: 0.9124268282246326, iteration: 385889
loss: 0.9627450704574585,grad_norm: 0.8452210200579916, iteration: 385890
loss: 1.0043511390686035,grad_norm: 0.7637784483505649, iteration: 385891
loss: 0.9790530204772949,grad_norm: 0.6724917076500803, iteration: 385892
loss: 1.01278555393219,grad_norm: 0.8016895945095135, iteration: 385893
loss: 1.0103751420974731,grad_norm: 0.7440404960674765, iteration: 385894
loss: 1.0219461917877197,grad_norm: 0.9744000803983077, iteration: 385895
loss: 1.0122827291488647,grad_norm: 0.8406385119353775, iteration: 385896
loss: 1.12931489944458,grad_norm: 0.809201722570964, iteration: 385897
loss: 0.9712819457054138,grad_norm: 0.7824069850577958, iteration: 385898
loss: 1.0257152318954468,grad_norm: 0.787663140465276, iteration: 385899
loss: 1.0122343301773071,grad_norm: 0.894448685402877, iteration: 385900
loss: 1.0561338663101196,grad_norm: 0.9762730346122651, iteration: 385901
loss: 1.0032458305358887,grad_norm: 0.9256891104229845, iteration: 385902
loss: 1.0054296255111694,grad_norm: 0.9999990154475166, iteration: 385903
loss: 0.9445577263832092,grad_norm: 0.7748379998142335, iteration: 385904
loss: 0.9878546595573425,grad_norm: 0.7091522876746493, iteration: 385905
loss: 1.0147191286087036,grad_norm: 0.7377986441777133, iteration: 385906
loss: 1.0109171867370605,grad_norm: 0.6810891930564162, iteration: 385907
loss: 1.0080493688583374,grad_norm: 0.6881843112734592, iteration: 385908
loss: 0.9987562298774719,grad_norm: 0.7825469221578553, iteration: 385909
loss: 1.014150857925415,grad_norm: 0.8452805241442873, iteration: 385910
loss: 1.0177714824676514,grad_norm: 0.9999994485975018, iteration: 385911
loss: 1.0125352144241333,grad_norm: 0.7400121980476261, iteration: 385912
loss: 1.0025367736816406,grad_norm: 0.9131103805573941, iteration: 385913
loss: 1.107682704925537,grad_norm: 0.9999999494105231, iteration: 385914
loss: 1.0164624452590942,grad_norm: 0.680251991564398, iteration: 385915
loss: 1.0007045269012451,grad_norm: 0.8397307958014437, iteration: 385916
loss: 1.0129833221435547,grad_norm: 0.8139677491655808, iteration: 385917
loss: 1.0110347270965576,grad_norm: 0.725515882479643, iteration: 385918
loss: 0.9881891012191772,grad_norm: 0.7435855645590731, iteration: 385919
loss: 0.9873180985450745,grad_norm: 0.993469772403045, iteration: 385920
loss: 0.9500414729118347,grad_norm: 0.7863886251960684, iteration: 385921
loss: 1.075003743171692,grad_norm: 0.9999996529702055, iteration: 385922
loss: 1.0570298433303833,grad_norm: 0.9999994978466472, iteration: 385923
loss: 1.0160998106002808,grad_norm: 0.651293886218742, iteration: 385924
loss: 0.9622048735618591,grad_norm: 0.701681264254866, iteration: 385925
loss: 0.9829018712043762,grad_norm: 0.9466279177156917, iteration: 385926
loss: 0.9822365045547485,grad_norm: 0.7990536967474114, iteration: 385927
loss: 1.0088731050491333,grad_norm: 0.8897878678173964, iteration: 385928
loss: 0.998980700969696,grad_norm: 0.7493315154960875, iteration: 385929
loss: 0.9606924653053284,grad_norm: 0.817369460656944, iteration: 385930
loss: 1.0358479022979736,grad_norm: 0.8022312571703332, iteration: 385931
loss: 1.0202151536941528,grad_norm: 0.7263230390080372, iteration: 385932
loss: 0.9824636578559875,grad_norm: 0.7075493301641101, iteration: 385933
loss: 1.0001641511917114,grad_norm: 0.8821507603080061, iteration: 385934
loss: 0.9990326166152954,grad_norm: 0.8297311030147181, iteration: 385935
loss: 1.0092564821243286,grad_norm: 0.7099389917972224, iteration: 385936
loss: 1.1105163097381592,grad_norm: 0.9999997480611144, iteration: 385937
loss: 1.0295443534851074,grad_norm: 0.8448643693303091, iteration: 385938
loss: 0.9972026944160461,grad_norm: 0.8429150229644079, iteration: 385939
loss: 1.0383350849151611,grad_norm: 0.8522746400349939, iteration: 385940
loss: 0.9880995750427246,grad_norm: 0.6761710882356499, iteration: 385941
loss: 0.9622473120689392,grad_norm: 0.7368946977380284, iteration: 385942
loss: 1.1818355321884155,grad_norm: 0.9999991389243271, iteration: 385943
loss: 1.1387643814086914,grad_norm: 0.9999998368240807, iteration: 385944
loss: 1.0011001825332642,grad_norm: 0.9999991974402677, iteration: 385945
loss: 1.0561256408691406,grad_norm: 0.7866763261172313, iteration: 385946
loss: 1.0421496629714966,grad_norm: 0.9999991604963998, iteration: 385947
loss: 1.0214471817016602,grad_norm: 0.7127118225662797, iteration: 385948
loss: 0.9533205032348633,grad_norm: 0.9999999740262115, iteration: 385949
loss: 0.9958199262619019,grad_norm: 0.7813497070962621, iteration: 385950
loss: 0.9862342476844788,grad_norm: 0.732976070988852, iteration: 385951
loss: 0.9961044192314148,grad_norm: 0.7557658484568118, iteration: 385952
loss: 1.0006687641143799,grad_norm: 0.7091643206074602, iteration: 385953
loss: 1.0131703615188599,grad_norm: 0.9999991133612934, iteration: 385954
loss: 0.9996185302734375,grad_norm: 0.6904061218395923, iteration: 385955
loss: 0.9937244057655334,grad_norm: 0.9405905300364448, iteration: 385956
loss: 0.9942880868911743,grad_norm: 0.7485754805313821, iteration: 385957
loss: 1.0142275094985962,grad_norm: 0.9999999139353416, iteration: 385958
loss: 1.247971773147583,grad_norm: 0.9999992842009968, iteration: 385959
loss: 1.0445623397827148,grad_norm: 0.9999998768056051, iteration: 385960
loss: 0.9952970147132874,grad_norm: 0.9492811112481672, iteration: 385961
loss: 0.9850770831108093,grad_norm: 0.7328264840156496, iteration: 385962
loss: 0.9721839427947998,grad_norm: 0.6911273877603662, iteration: 385963
loss: 1.003260850906372,grad_norm: 0.639829155973241, iteration: 385964
loss: 0.9740793704986572,grad_norm: 0.6551253121991248, iteration: 385965
loss: 1.0082753896713257,grad_norm: 0.7245801843289679, iteration: 385966
loss: 0.9955715537071228,grad_norm: 0.713673758970952, iteration: 385967
loss: 1.0743496417999268,grad_norm: 0.99999988019034, iteration: 385968
loss: 0.9652585983276367,grad_norm: 0.7548227934426947, iteration: 385969
loss: 1.0014309883117676,grad_norm: 0.75249817912542, iteration: 385970
loss: 0.9793949723243713,grad_norm: 0.9402445893272536, iteration: 385971
loss: 0.9912000894546509,grad_norm: 0.8110925590683152, iteration: 385972
loss: 1.0329530239105225,grad_norm: 0.9999990572713053, iteration: 385973
loss: 1.0325490236282349,grad_norm: 0.7608887760873214, iteration: 385974
loss: 1.001463770866394,grad_norm: 0.7338943879122072, iteration: 385975
loss: 1.0100772380828857,grad_norm: 0.6797481295660514, iteration: 385976
loss: 1.0138589143753052,grad_norm: 0.6758472892874264, iteration: 385977
loss: 1.0433257818222046,grad_norm: 0.7793732613250928, iteration: 385978
loss: 1.009738564491272,grad_norm: 0.7478064192547537, iteration: 385979
loss: 1.0157358646392822,grad_norm: 0.741843889554299, iteration: 385980
loss: 1.0213512182235718,grad_norm: 0.7178107151598948, iteration: 385981
loss: 1.085950493812561,grad_norm: 0.9999991200632409, iteration: 385982
loss: 1.002856731414795,grad_norm: 0.725899015384952, iteration: 385983
loss: 1.0053997039794922,grad_norm: 0.8699118521659119, iteration: 385984
loss: 1.0281953811645508,grad_norm: 0.884774753540049, iteration: 385985
loss: 1.0137723684310913,grad_norm: 0.7485870575428324, iteration: 385986
loss: 0.9789744019508362,grad_norm: 0.9999992274460151, iteration: 385987
loss: 1.0066457986831665,grad_norm: 0.849848521046536, iteration: 385988
loss: 1.00416898727417,grad_norm: 0.914867061840902, iteration: 385989
loss: 1.0230203866958618,grad_norm: 0.7536818596347398, iteration: 385990
loss: 0.9809518456459045,grad_norm: 0.9999991280243651, iteration: 385991
loss: 1.1345733404159546,grad_norm: 0.8327617068733424, iteration: 385992
loss: 0.981863260269165,grad_norm: 0.735264973983098, iteration: 385993
loss: 0.9635900855064392,grad_norm: 0.8074313168396374, iteration: 385994
loss: 0.9948018193244934,grad_norm: 0.7547532121153072, iteration: 385995
loss: 0.9839446544647217,grad_norm: 0.8464269301794907, iteration: 385996
loss: 0.9784144759178162,grad_norm: 0.7706156627751211, iteration: 385997
loss: 1.0002015829086304,grad_norm: 0.9294503917592526, iteration: 385998
loss: 1.0121047496795654,grad_norm: 0.9999999363345488, iteration: 385999
loss: 1.012115240097046,grad_norm: 0.9478931472687104, iteration: 386000
loss: 1.022890329360962,grad_norm: 0.8099781931491884, iteration: 386001
loss: 1.0129365921020508,grad_norm: 0.6922026986964988, iteration: 386002
loss: 0.9673628211021423,grad_norm: 0.7344649291630526, iteration: 386003
loss: 1.0464228391647339,grad_norm: 0.8854279825967591, iteration: 386004
loss: 1.0184788703918457,grad_norm: 0.8285401205875862, iteration: 386005
loss: 1.0000882148742676,grad_norm: 0.6212059196045465, iteration: 386006
loss: 1.1045067310333252,grad_norm: 0.9999999070553487, iteration: 386007
loss: 1.0850635766983032,grad_norm: 0.999999909182369, iteration: 386008
loss: 1.0325727462768555,grad_norm: 0.7541520827279048, iteration: 386009
loss: 1.0172423124313354,grad_norm: 0.534046593110657, iteration: 386010
loss: 1.018979787826538,grad_norm: 0.9999994410421595, iteration: 386011
loss: 1.0126255750656128,grad_norm: 0.7341998182975417, iteration: 386012
loss: 1.0168378353118896,grad_norm: 0.7539555132279995, iteration: 386013
loss: 1.0293246507644653,grad_norm: 0.8862776300448344, iteration: 386014
loss: 1.0218884944915771,grad_norm: 0.6328161267571346, iteration: 386015
loss: 0.9892340898513794,grad_norm: 0.6376169047260773, iteration: 386016
loss: 1.0380381345748901,grad_norm: 0.9999998298088794, iteration: 386017
loss: 0.9655309915542603,grad_norm: 0.7462724838000554, iteration: 386018
loss: 0.9373008012771606,grad_norm: 0.8681997706736516, iteration: 386019
loss: 0.9996878504753113,grad_norm: 0.7573786301704181, iteration: 386020
loss: 1.0488754510879517,grad_norm: 0.7951175778979981, iteration: 386021
loss: 1.0398019552230835,grad_norm: 0.8862264833750748, iteration: 386022
loss: 1.028073787689209,grad_norm: 0.9999998468676782, iteration: 386023
loss: 0.9921078085899353,grad_norm: 0.7125983292919716, iteration: 386024
loss: 0.985722005367279,grad_norm: 0.7876721759537261, iteration: 386025
loss: 0.9673577547073364,grad_norm: 0.8156630048843102, iteration: 386026
loss: 0.9712991118431091,grad_norm: 0.8651593031081358, iteration: 386027
loss: 0.9934254288673401,grad_norm: 0.9999992369468893, iteration: 386028
loss: 1.0166738033294678,grad_norm: 0.8368361806732204, iteration: 386029
loss: 0.9922755360603333,grad_norm: 0.8688130062543403, iteration: 386030
loss: 1.1727931499481201,grad_norm: 0.9999999680002807, iteration: 386031
loss: 1.0178236961364746,grad_norm: 0.9999990592496119, iteration: 386032
loss: 1.0984106063842773,grad_norm: 0.9999992832686536, iteration: 386033
loss: 1.0035885572433472,grad_norm: 0.767671520713466, iteration: 386034
loss: 1.0048192739486694,grad_norm: 0.8746224718175044, iteration: 386035
loss: 0.9820199012756348,grad_norm: 0.8275384168827855, iteration: 386036
loss: 0.9682033061981201,grad_norm: 0.9117121493779988, iteration: 386037
loss: 0.9807916283607483,grad_norm: 0.9999991580516823, iteration: 386038
loss: 1.0023508071899414,grad_norm: 0.7878149196210097, iteration: 386039
loss: 0.9778552651405334,grad_norm: 0.9118686141600839, iteration: 386040
loss: 1.0050688982009888,grad_norm: 0.8184718817779613, iteration: 386041
loss: 1.0507162809371948,grad_norm: 0.999999304495319, iteration: 386042
loss: 0.9748570322990417,grad_norm: 0.9999992138055502, iteration: 386043
loss: 1.1257168054580688,grad_norm: 0.9999997450944421, iteration: 386044
loss: 1.007601261138916,grad_norm: 0.801338535987061, iteration: 386045
loss: 0.9870170950889587,grad_norm: 0.6796115617465747, iteration: 386046
loss: 0.9224278330802917,grad_norm: 0.8238885322847469, iteration: 386047
loss: 1.0194755792617798,grad_norm: 0.7075972322831139, iteration: 386048
loss: 0.9732766151428223,grad_norm: 0.85272300918293, iteration: 386049
loss: 1.0190573930740356,grad_norm: 0.7781615929296913, iteration: 386050
loss: 1.0814906358718872,grad_norm: 0.9999989450837441, iteration: 386051
loss: 0.9937734603881836,grad_norm: 0.6498126784668523, iteration: 386052
loss: 1.0055444240570068,grad_norm: 0.9035115616661906, iteration: 386053
loss: 0.9835067987442017,grad_norm: 0.7713315898098417, iteration: 386054
loss: 1.0229897499084473,grad_norm: 0.7164643236197099, iteration: 386055
loss: 1.0977474451065063,grad_norm: 0.9999996541195996, iteration: 386056
loss: 1.0489654541015625,grad_norm: 0.9481752930255104, iteration: 386057
loss: 0.9860135912895203,grad_norm: 0.8338352751416399, iteration: 386058
loss: 1.0256770849227905,grad_norm: 0.9910147500687283, iteration: 386059
loss: 0.9840282201766968,grad_norm: 0.8653033235804928, iteration: 386060
loss: 0.9899507761001587,grad_norm: 0.7458077115327735, iteration: 386061
loss: 1.031423568725586,grad_norm: 0.6837732598207107, iteration: 386062
loss: 0.9810002446174622,grad_norm: 0.781621638554194, iteration: 386063
loss: 1.0079827308654785,grad_norm: 0.8470591094574372, iteration: 386064
loss: 0.9798398613929749,grad_norm: 0.8890208317489904, iteration: 386065
loss: 0.9590370655059814,grad_norm: 0.7576348057606078, iteration: 386066
loss: 1.0125998258590698,grad_norm: 0.7750990560408095, iteration: 386067
loss: 1.0230175256729126,grad_norm: 0.8065404661434408, iteration: 386068
loss: 1.0095511674880981,grad_norm: 0.7739591008929363, iteration: 386069
loss: 1.036211609840393,grad_norm: 0.7844636453150783, iteration: 386070
loss: 1.0381804704666138,grad_norm: 0.8601246858332914, iteration: 386071
loss: 0.9776298403739929,grad_norm: 0.9999994654840233, iteration: 386072
loss: 0.9856911301612854,grad_norm: 0.9333192573428963, iteration: 386073
loss: 1.0100022554397583,grad_norm: 0.7199762124217458, iteration: 386074
loss: 0.9761844873428345,grad_norm: 0.7108313890690434, iteration: 386075
loss: 1.027436375617981,grad_norm: 0.7745532031698693, iteration: 386076
loss: 0.9760014414787292,grad_norm: 0.8184348354106058, iteration: 386077
loss: 0.9989138245582581,grad_norm: 0.7354826489165026, iteration: 386078
loss: 0.9844205975532532,grad_norm: 0.7970545763870579, iteration: 386079
loss: 1.0436053276062012,grad_norm: 0.9999992422946519, iteration: 386080
loss: 1.0139013528823853,grad_norm: 0.9999992735439548, iteration: 386081
loss: 0.9956300854682922,grad_norm: 0.9170070183878613, iteration: 386082
loss: 0.9991364479064941,grad_norm: 0.795346674255355, iteration: 386083
loss: 0.9936378002166748,grad_norm: 0.746020696656484, iteration: 386084
loss: 1.0321838855743408,grad_norm: 0.9999991661052792, iteration: 386085
loss: 0.9784461259841919,grad_norm: 0.9418233057575142, iteration: 386086
loss: 0.9484861493110657,grad_norm: 0.8298433692085563, iteration: 386087
loss: 0.9928268790245056,grad_norm: 0.7145352686774654, iteration: 386088
loss: 1.035369873046875,grad_norm: 0.7821426642947864, iteration: 386089
loss: 1.0057984590530396,grad_norm: 0.8164623662865033, iteration: 386090
loss: 0.9983350038528442,grad_norm: 0.7856579560753532, iteration: 386091
loss: 0.9901651740074158,grad_norm: 0.8661186968143688, iteration: 386092
loss: 1.0133192539215088,grad_norm: 0.9999992753215022, iteration: 386093
loss: 1.1384518146514893,grad_norm: 0.9999993271169771, iteration: 386094
loss: 1.0063637495040894,grad_norm: 0.9999993484516891, iteration: 386095
loss: 0.992565929889679,grad_norm: 0.7486868263975746, iteration: 386096
loss: 0.9886505007743835,grad_norm: 0.8392232679871907, iteration: 386097
loss: 1.0854225158691406,grad_norm: 0.8618817008327287, iteration: 386098
loss: 0.9827482104301453,grad_norm: 0.7260073104296786, iteration: 386099
loss: 1.0188320875167847,grad_norm: 0.7724673754773261, iteration: 386100
loss: 1.0162930488586426,grad_norm: 0.72321979437834, iteration: 386101
loss: 0.9939472675323486,grad_norm: 0.9009315838607499, iteration: 386102
loss: 1.0129064321517944,grad_norm: 0.7781049164189409, iteration: 386103
loss: 0.9876318573951721,grad_norm: 0.7224640512382688, iteration: 386104
loss: 0.9921352863311768,grad_norm: 0.9181523718956335, iteration: 386105
loss: 1.0271527767181396,grad_norm: 0.979130271192761, iteration: 386106
loss: 0.9619693756103516,grad_norm: 0.8069577113527554, iteration: 386107
loss: 1.0203899145126343,grad_norm: 0.6876426524402097, iteration: 386108
loss: 1.02389395236969,grad_norm: 0.9999991433544909, iteration: 386109
loss: 0.9897050857543945,grad_norm: 0.6925042218182811, iteration: 386110
loss: 1.0275650024414062,grad_norm: 0.8680866481831803, iteration: 386111
loss: 0.9727441072463989,grad_norm: 0.7899721635351675, iteration: 386112
loss: 1.0152842998504639,grad_norm: 0.7792586544727731, iteration: 386113
loss: 1.0911349058151245,grad_norm: 0.9999991440684514, iteration: 386114
loss: 0.9912879467010498,grad_norm: 0.7037815937864131, iteration: 386115
loss: 0.9901441335678101,grad_norm: 0.9755705940027865, iteration: 386116
loss: 0.9871506690979004,grad_norm: 0.734493491773364, iteration: 386117
loss: 0.9947464466094971,grad_norm: 0.9999994796689836, iteration: 386118
loss: 1.005697250366211,grad_norm: 0.9278443044635301, iteration: 386119
loss: 0.992318332195282,grad_norm: 0.820139963273234, iteration: 386120
loss: 0.980335533618927,grad_norm: 0.9408510925596556, iteration: 386121
loss: 1.0182819366455078,grad_norm: 0.8097815702089025, iteration: 386122
loss: 0.9812015295028687,grad_norm: 0.7098490872327414, iteration: 386123
loss: 1.0402780771255493,grad_norm: 0.9018795610405084, iteration: 386124
loss: 1.0044764280319214,grad_norm: 0.744812759109252, iteration: 386125
loss: 0.9859253168106079,grad_norm: 0.9999997125696213, iteration: 386126
loss: 1.0212244987487793,grad_norm: 0.7918199757828708, iteration: 386127
loss: 1.001082181930542,grad_norm: 0.7729572143903666, iteration: 386128
loss: 1.0611354112625122,grad_norm: 0.9999993758896589, iteration: 386129
loss: 1.0347546339035034,grad_norm: 0.7966560618847597, iteration: 386130
loss: 1.0092427730560303,grad_norm: 0.8454466889331814, iteration: 386131
loss: 1.0096244812011719,grad_norm: 0.8272611562232891, iteration: 386132
loss: 0.9827157855033875,grad_norm: 0.9999989813039014, iteration: 386133
loss: 0.958221435546875,grad_norm: 0.7859156529099153, iteration: 386134
loss: 1.0207284688949585,grad_norm: 0.8056190284011002, iteration: 386135
loss: 1.0141639709472656,grad_norm: 0.9999994525543887, iteration: 386136
loss: 0.9737661480903625,grad_norm: 0.7344754897196146, iteration: 386137
loss: 1.009879469871521,grad_norm: 0.9999993835552003, iteration: 386138
loss: 1.0854525566101074,grad_norm: 0.9999991615525357, iteration: 386139
loss: 0.9843019843101501,grad_norm: 0.7895971078517062, iteration: 386140
loss: 1.0068316459655762,grad_norm: 0.6336264354650797, iteration: 386141
loss: 1.0219229459762573,grad_norm: 0.868554398752162, iteration: 386142
loss: 1.0051753520965576,grad_norm: 0.8013711362969268, iteration: 386143
loss: 1.0131621360778809,grad_norm: 0.999999506824777, iteration: 386144
loss: 0.9809905886650085,grad_norm: 0.8730495971848561, iteration: 386145
loss: 0.9867144823074341,grad_norm: 0.9003784680477761, iteration: 386146
loss: 1.03377103805542,grad_norm: 0.7777917101328351, iteration: 386147
loss: 1.0222762823104858,grad_norm: 0.8786724860675504, iteration: 386148
loss: 1.0335801839828491,grad_norm: 0.9861001952101315, iteration: 386149
loss: 0.9594305157661438,grad_norm: 0.8325900519706495, iteration: 386150
loss: 0.9867088198661804,grad_norm: 0.6917936805743896, iteration: 386151
loss: 0.9790058732032776,grad_norm: 0.9999995320494759, iteration: 386152
loss: 0.9935202598571777,grad_norm: 0.6593170165986163, iteration: 386153
loss: 0.9770603179931641,grad_norm: 0.6663601362933859, iteration: 386154
loss: 0.9935499429702759,grad_norm: 0.7557291190376504, iteration: 386155
loss: 0.9783965349197388,grad_norm: 0.8607314377934989, iteration: 386156
loss: 1.0178112983703613,grad_norm: 0.8837539538791459, iteration: 386157
loss: 1.0073405504226685,grad_norm: 0.686640539602669, iteration: 386158
loss: 0.9867945313453674,grad_norm: 0.7650072864491011, iteration: 386159
loss: 1.0405856370925903,grad_norm: 1.0000000373352793, iteration: 386160
loss: 0.9955597519874573,grad_norm: 0.8971513995094432, iteration: 386161
loss: 0.9585087895393372,grad_norm: 0.8011291957019315, iteration: 386162
loss: 1.0014880895614624,grad_norm: 0.8104013269147171, iteration: 386163
loss: 1.0193289518356323,grad_norm: 0.795354837643771, iteration: 386164
loss: 0.9736085534095764,grad_norm: 0.7455360791838374, iteration: 386165
loss: 1.0502222776412964,grad_norm: 0.7894492291760312, iteration: 386166
loss: 1.0078495740890503,grad_norm: 0.8179525440200208, iteration: 386167
loss: 1.0529773235321045,grad_norm: 0.9999992894813104, iteration: 386168
loss: 1.0226978063583374,grad_norm: 0.7768356487060211, iteration: 386169
loss: 1.0488625764846802,grad_norm: 0.8127915942925069, iteration: 386170
loss: 1.0143241882324219,grad_norm: 0.7118793753566696, iteration: 386171
loss: 1.0311436653137207,grad_norm: 0.9571365767705943, iteration: 386172
loss: 0.9751870632171631,grad_norm: 0.8474725359365582, iteration: 386173
loss: 1.0078941583633423,grad_norm: 0.8811384320784702, iteration: 386174
loss: 1.0108078718185425,grad_norm: 0.8722165028435358, iteration: 386175
loss: 0.9705690741539001,grad_norm: 0.796151488424452, iteration: 386176
loss: 1.0425045490264893,grad_norm: 0.7963615347738426, iteration: 386177
loss: 1.0609240531921387,grad_norm: 0.9999992732145739, iteration: 386178
loss: 1.0829684734344482,grad_norm: 1.0000000447461634, iteration: 386179
loss: 0.992963969707489,grad_norm: 0.7443599247402404, iteration: 386180
loss: 1.0446974039077759,grad_norm: 0.9512509758614932, iteration: 386181
loss: 0.9867091178894043,grad_norm: 0.9999991469906586, iteration: 386182
loss: 0.9691125750541687,grad_norm: 0.703904177925302, iteration: 386183
loss: 0.9795372486114502,grad_norm: 0.8130451868996876, iteration: 386184
loss: 1.0000665187835693,grad_norm: 0.8009644236896881, iteration: 386185
loss: 1.0027093887329102,grad_norm: 0.9999997862085781, iteration: 386186
loss: 1.0387948751449585,grad_norm: 0.7024214396330543, iteration: 386187
loss: 1.0333638191223145,grad_norm: 0.8316031309594798, iteration: 386188
loss: 0.9892888069152832,grad_norm: 0.9999993302969707, iteration: 386189
loss: 1.012129545211792,grad_norm: 0.7918840753539557, iteration: 386190
loss: 0.9671617150306702,grad_norm: 0.7635991830752112, iteration: 386191
loss: 1.070821762084961,grad_norm: 0.9999995030378581, iteration: 386192
loss: 1.0683393478393555,grad_norm: 0.9999992947892897, iteration: 386193
loss: 1.0561010837554932,grad_norm: 0.7829310097226776, iteration: 386194
loss: 0.9671322107315063,grad_norm: 0.7949319187789914, iteration: 386195
loss: 0.9793402552604675,grad_norm: 0.8901998822637117, iteration: 386196
loss: 1.034741759300232,grad_norm: 0.6797596394408025, iteration: 386197
loss: 0.9979693293571472,grad_norm: 0.7508562961226678, iteration: 386198
loss: 1.2321363687515259,grad_norm: 0.9999998401746446, iteration: 386199
loss: 0.9802855849266052,grad_norm: 0.8293758944498657, iteration: 386200
loss: 0.978782057762146,grad_norm: 0.7564027253295683, iteration: 386201
loss: 1.013360857963562,grad_norm: 0.7948186688581367, iteration: 386202
loss: 1.0532305240631104,grad_norm: 0.7276482880767596, iteration: 386203
loss: 1.006194829940796,grad_norm: 0.9999990702597229, iteration: 386204
loss: 0.9961559772491455,grad_norm: 0.6958662307655515, iteration: 386205
loss: 0.9800319075584412,grad_norm: 0.9271533090190969, iteration: 386206
loss: 0.9438074231147766,grad_norm: 0.999999261046988, iteration: 386207
loss: 1.0002727508544922,grad_norm: 0.8247336250645749, iteration: 386208
loss: 1.0076453685760498,grad_norm: 0.8357436524376518, iteration: 386209
loss: 0.976320207118988,grad_norm: 0.999999453843951, iteration: 386210
loss: 1.0102150440216064,grad_norm: 0.961995792767693, iteration: 386211
loss: 0.9985417127609253,grad_norm: 0.8377702815262609, iteration: 386212
loss: 1.0426452159881592,grad_norm: 0.8408208195600401, iteration: 386213
loss: 1.0151370763778687,grad_norm: 0.8252494399824177, iteration: 386214
loss: 0.9980640411376953,grad_norm: 0.9999999292706976, iteration: 386215
loss: 0.9648981690406799,grad_norm: 0.8138345969192389, iteration: 386216
loss: 1.0257824659347534,grad_norm: 0.8272159699076869, iteration: 386217
loss: 1.039378046989441,grad_norm: 0.8461046825075594, iteration: 386218
loss: 1.1089051961898804,grad_norm: 0.9999999974672912, iteration: 386219
loss: 0.9531823992729187,grad_norm: 0.7569894199074454, iteration: 386220
loss: 0.9880712628364563,grad_norm: 0.9338646785180196, iteration: 386221
loss: 1.0144611597061157,grad_norm: 0.8431969100918573, iteration: 386222
loss: 0.9990735054016113,grad_norm: 0.6852040363176874, iteration: 386223
loss: 1.1114931106567383,grad_norm: 0.9999991391122665, iteration: 386224
loss: 0.9996954798698425,grad_norm: 0.7218601796734102, iteration: 386225
loss: 1.056058645248413,grad_norm: 0.9999991426565081, iteration: 386226
loss: 1.0066063404083252,grad_norm: 0.8050948270545407, iteration: 386227
loss: 1.0159258842468262,grad_norm: 0.9999991450574595, iteration: 386228
loss: 1.0228271484375,grad_norm: 0.8774299653841251, iteration: 386229
loss: 0.9969670176506042,grad_norm: 0.7421224887663151, iteration: 386230
loss: 1.0064469575881958,grad_norm: 0.9952945065963901, iteration: 386231
loss: 0.993949294090271,grad_norm: 0.7195980522502029, iteration: 386232
loss: 1.0067414045333862,grad_norm: 0.8681609569455058, iteration: 386233
loss: 1.0118361711502075,grad_norm: 0.999999243370097, iteration: 386234
loss: 1.0245002508163452,grad_norm: 0.8123588209993673, iteration: 386235
loss: 1.0083507299423218,grad_norm: 0.7439109587497077, iteration: 386236
loss: 1.0409260988235474,grad_norm: 0.7941121390777472, iteration: 386237
loss: 1.0059466361999512,grad_norm: 0.8229365864625409, iteration: 386238
loss: 1.0412124395370483,grad_norm: 0.999999799291732, iteration: 386239
loss: 0.9581290483474731,grad_norm: 0.7983193196178732, iteration: 386240
loss: 0.9834882616996765,grad_norm: 0.8236126155253615, iteration: 386241
loss: 1.0320513248443604,grad_norm: 0.8711131632508633, iteration: 386242
loss: 1.053975224494934,grad_norm: 0.9999992120387978, iteration: 386243
loss: 1.019201397895813,grad_norm: 0.9903560932717168, iteration: 386244
loss: 1.0285861492156982,grad_norm: 0.999999389618083, iteration: 386245
loss: 1.0159776210784912,grad_norm: 0.7402018888155467, iteration: 386246
loss: 1.0075913667678833,grad_norm: 0.7734595171867917, iteration: 386247
loss: 1.029120922088623,grad_norm: 0.9390080280214903, iteration: 386248
loss: 0.9816020727157593,grad_norm: 0.9999992653440979, iteration: 386249
loss: 1.001554250717163,grad_norm: 0.8571517547509419, iteration: 386250
loss: 0.9710952043533325,grad_norm: 0.7986266771499406, iteration: 386251
loss: 1.0435527563095093,grad_norm: 0.8495873151996898, iteration: 386252
loss: 0.969385027885437,grad_norm: 0.7431315995227437, iteration: 386253
loss: 1.021417260169983,grad_norm: 0.9999993891157919, iteration: 386254
loss: 1.039025068283081,grad_norm: 0.7589907562233016, iteration: 386255
loss: 1.025761365890503,grad_norm: 0.999999526880676, iteration: 386256
loss: 0.9889056086540222,grad_norm: 0.8183606357905923, iteration: 386257
loss: 1.0832358598709106,grad_norm: 0.9999997728300936, iteration: 386258
loss: 1.0180096626281738,grad_norm: 0.8128905551214877, iteration: 386259
loss: 0.9787976741790771,grad_norm: 0.6565912987300422, iteration: 386260
loss: 0.988869845867157,grad_norm: 0.7840245223985862, iteration: 386261
loss: 0.9888976216316223,grad_norm: 0.7805377031623402, iteration: 386262
loss: 1.0069042444229126,grad_norm: 0.8208638129791276, iteration: 386263
loss: 1.0192428827285767,grad_norm: 0.8647427137181255, iteration: 386264
loss: 0.982312798500061,grad_norm: 0.8122959837053548, iteration: 386265
loss: 0.9875304698944092,grad_norm: 0.6873274295717731, iteration: 386266
loss: 0.9627063274383545,grad_norm: 0.8338680671172974, iteration: 386267
loss: 0.9370967745780945,grad_norm: 0.9265173659929095, iteration: 386268
loss: 0.9990466833114624,grad_norm: 0.9402564697244676, iteration: 386269
loss: 1.0146235227584839,grad_norm: 0.8109625985508867, iteration: 386270
loss: 1.052986979484558,grad_norm: 0.9999991591285659, iteration: 386271
loss: 1.0032702684402466,grad_norm: 0.7685341699775714, iteration: 386272
loss: 0.961795449256897,grad_norm: 0.7162404744985017, iteration: 386273
loss: 1.033841848373413,grad_norm: 0.6402785118624063, iteration: 386274
loss: 0.9971074461936951,grad_norm: 0.7459123127839511, iteration: 386275
loss: 0.9704603552818298,grad_norm: 0.7151206768710371, iteration: 386276
loss: 1.0216342210769653,grad_norm: 0.9999992311291067, iteration: 386277
loss: 1.022757887840271,grad_norm: 0.801956280068852, iteration: 386278
loss: 1.012500524520874,grad_norm: 0.8162287085618525, iteration: 386279
loss: 1.0188539028167725,grad_norm: 0.9999990933489085, iteration: 386280
loss: 0.9796249866485596,grad_norm: 0.7703062907706789, iteration: 386281
loss: 1.0066405534744263,grad_norm: 0.9678127717416007, iteration: 386282
loss: 1.0377886295318604,grad_norm: 0.6648431409107358, iteration: 386283
loss: 0.986066997051239,grad_norm: 0.791426114470078, iteration: 386284
loss: 0.9983306527137756,grad_norm: 0.7062908607342551, iteration: 386285
loss: 1.0071134567260742,grad_norm: 0.7445563284872125, iteration: 386286
loss: 0.9980775713920593,grad_norm: 0.8963561146868096, iteration: 386287
loss: 0.9806435704231262,grad_norm: 0.8736017174063203, iteration: 386288
loss: 0.9876906871795654,grad_norm: 0.9999999180070547, iteration: 386289
loss: 0.9668488502502441,grad_norm: 0.7882291895928328, iteration: 386290
loss: 1.008751392364502,grad_norm: 0.6837303881878403, iteration: 386291
loss: 0.9589952826499939,grad_norm: 0.718966219120558, iteration: 386292
loss: 0.9897390007972717,grad_norm: 0.6676558766967512, iteration: 386293
loss: 1.0160695314407349,grad_norm: 0.9999996839258327, iteration: 386294
loss: 0.9812605977058411,grad_norm: 0.9328560849377453, iteration: 386295
loss: 1.0053468942642212,grad_norm: 0.7376370532191504, iteration: 386296
loss: 1.0052281618118286,grad_norm: 0.7531517479056156, iteration: 386297
loss: 1.0366504192352295,grad_norm: 0.99999938501802, iteration: 386298
loss: 1.0420352220535278,grad_norm: 0.7618487323176407, iteration: 386299
loss: 0.9790610671043396,grad_norm: 0.6816784635894606, iteration: 386300
loss: 0.9751566052436829,grad_norm: 0.8175909792786077, iteration: 386301
loss: 0.9648212194442749,grad_norm: 0.7740342361273902, iteration: 386302
loss: 0.9677149057388306,grad_norm: 0.8654914810800954, iteration: 386303
loss: 0.9729970097541809,grad_norm: 0.7181230679185772, iteration: 386304
loss: 0.9846204519271851,grad_norm: 0.7422935848088983, iteration: 386305
loss: 1.0071700811386108,grad_norm: 0.6788920279611234, iteration: 386306
loss: 1.0297950506210327,grad_norm: 0.9999990222017863, iteration: 386307
loss: 0.9999669194221497,grad_norm: 0.9999990952537162, iteration: 386308
loss: 1.0146592855453491,grad_norm: 0.6930323830077714, iteration: 386309
loss: 0.9717859625816345,grad_norm: 0.7876642033293048, iteration: 386310
loss: 0.9823125004768372,grad_norm: 0.7629451632522448, iteration: 386311
loss: 0.9687114357948303,grad_norm: 0.7188215554384334, iteration: 386312
loss: 1.0188181400299072,grad_norm: 0.9999997867913923, iteration: 386313
loss: 1.0575047731399536,grad_norm: 0.9978477177446149, iteration: 386314
loss: 0.9607254862785339,grad_norm: 0.813692838814901, iteration: 386315
loss: 1.0336183309555054,grad_norm: 0.9999997329468846, iteration: 386316
loss: 1.0291229486465454,grad_norm: 0.9146450880301065, iteration: 386317
loss: 1.017642617225647,grad_norm: 0.9999997914606197, iteration: 386318
loss: 1.0018588304519653,grad_norm: 0.6750825586352873, iteration: 386319
loss: 0.9888344407081604,grad_norm: 0.8691462353553207, iteration: 386320
loss: 0.9866811633110046,grad_norm: 0.8061817800591483, iteration: 386321
loss: 0.9950051307678223,grad_norm: 0.749698368712673, iteration: 386322
loss: 1.001799464225769,grad_norm: 0.8982898447106538, iteration: 386323
loss: 0.9845506548881531,grad_norm: 0.7402619800085186, iteration: 386324
loss: 1.003129482269287,grad_norm: 0.7244816016787632, iteration: 386325
loss: 0.9552294015884399,grad_norm: 0.8882630337982114, iteration: 386326
loss: 1.004118800163269,grad_norm: 0.6984457140073478, iteration: 386327
loss: 0.9990097880363464,grad_norm: 0.9016121275116155, iteration: 386328
loss: 1.0001894235610962,grad_norm: 0.9999991065035233, iteration: 386329
loss: 1.033394455909729,grad_norm: 0.9095678390747981, iteration: 386330
loss: 0.9889982342720032,grad_norm: 0.8072975848669471, iteration: 386331
loss: 1.0155770778656006,grad_norm: 0.8021072709070036, iteration: 386332
loss: 1.0372568368911743,grad_norm: 0.7443085824051858, iteration: 386333
loss: 0.9945337176322937,grad_norm: 0.8889698586833281, iteration: 386334
loss: 1.0544636249542236,grad_norm: 0.9999992208506158, iteration: 386335
loss: 1.0064990520477295,grad_norm: 0.9999999737239634, iteration: 386336
loss: 0.9963299632072449,grad_norm: 0.908830441275732, iteration: 386337
loss: 0.98329097032547,grad_norm: 0.9999997822097938, iteration: 386338
loss: 1.0074877738952637,grad_norm: 0.7573295014413148, iteration: 386339
loss: 0.9867889881134033,grad_norm: 0.9566360725157601, iteration: 386340
loss: 1.0483628511428833,grad_norm: 0.9999990679360139, iteration: 386341
loss: 1.002924919128418,grad_norm: 0.6988058300126719, iteration: 386342
loss: 1.0034013986587524,grad_norm: 0.921950535230679, iteration: 386343
loss: 1.0751768350601196,grad_norm: 0.7139680962526611, iteration: 386344
loss: 0.992937445640564,grad_norm: 0.8149392060941084, iteration: 386345
loss: 0.9813157916069031,grad_norm: 0.685622104369338, iteration: 386346
loss: 0.9518736004829407,grad_norm: 0.8978927239169645, iteration: 386347
loss: 0.9691455960273743,grad_norm: 0.8417494482207599, iteration: 386348
loss: 0.993838906288147,grad_norm: 0.7559323751645297, iteration: 386349
loss: 1.0412400960922241,grad_norm: 0.9636354529332473, iteration: 386350
loss: 1.0494481325149536,grad_norm: 0.840623477928844, iteration: 386351
loss: 0.977301836013794,grad_norm: 0.5894923419543532, iteration: 386352
loss: 1.0115727186203003,grad_norm: 0.8142351039190439, iteration: 386353
loss: 0.9659562110900879,grad_norm: 0.7298320696073385, iteration: 386354
loss: 1.2378382682800293,grad_norm: 1.000000017549125, iteration: 386355
loss: 0.9886875748634338,grad_norm: 0.7534324141959715, iteration: 386356
loss: 1.0043951272964478,grad_norm: 0.7828187169024388, iteration: 386357
loss: 1.1067545413970947,grad_norm: 0.9999994876672885, iteration: 386358
loss: 1.0366359949111938,grad_norm: 0.8736420136405544, iteration: 386359
loss: 0.9926939010620117,grad_norm: 0.9591991103837372, iteration: 386360
loss: 1.036393165588379,grad_norm: 0.9999997901744548, iteration: 386361
loss: 1.0029633045196533,grad_norm: 0.6795042652414505, iteration: 386362
loss: 1.0111340284347534,grad_norm: 0.6738653304329149, iteration: 386363
loss: 0.9945701360702515,grad_norm: 0.8453709924906914, iteration: 386364
loss: 1.1787869930267334,grad_norm: 0.9999996107449988, iteration: 386365
loss: 1.009108304977417,grad_norm: 0.9999998291179456, iteration: 386366
loss: 1.0135835409164429,grad_norm: 0.9554758701181431, iteration: 386367
loss: 1.0174401998519897,grad_norm: 0.7468124222991216, iteration: 386368
loss: 1.0131279230117798,grad_norm: 0.7331042669438834, iteration: 386369
loss: 1.0286654233932495,grad_norm: 0.9999991156153369, iteration: 386370
loss: 1.0172064304351807,grad_norm: 0.9883295625517415, iteration: 386371
loss: 1.0304651260375977,grad_norm: 0.8439904717328286, iteration: 386372
loss: 1.0385901927947998,grad_norm: 0.8647721930049324, iteration: 386373
loss: 1.0510594844818115,grad_norm: 0.999999877187445, iteration: 386374
loss: 1.0158932209014893,grad_norm: 0.855259766781289, iteration: 386375
loss: 1.0058144330978394,grad_norm: 0.6837460750156406, iteration: 386376
loss: 0.9910494685173035,grad_norm: 0.7317869338432289, iteration: 386377
loss: 1.0256236791610718,grad_norm: 0.9457417389051406, iteration: 386378
loss: 0.9853560924530029,grad_norm: 0.9999992090119956, iteration: 386379
loss: 1.0455749034881592,grad_norm: 0.9999992798428754, iteration: 386380
loss: 1.020909070968628,grad_norm: 0.8261633013509199, iteration: 386381
loss: 0.9808341264724731,grad_norm: 0.6693153936212458, iteration: 386382
loss: 1.0023558139801025,grad_norm: 0.7803979606468917, iteration: 386383
loss: 1.0539007186889648,grad_norm: 0.9048708492260047, iteration: 386384
loss: 1.021470069885254,grad_norm: 0.6875483645742794, iteration: 386385
loss: 1.0392757654190063,grad_norm: 0.8212453805528475, iteration: 386386
loss: 1.1114295721054077,grad_norm: 0.9999996524958367, iteration: 386387
loss: 1.1256705522537231,grad_norm: 0.9999992248721076, iteration: 386388
loss: 1.2282437086105347,grad_norm: 0.9999994275013102, iteration: 386389
loss: 0.9893149137496948,grad_norm: 0.6648457731537873, iteration: 386390
loss: 1.1749471426010132,grad_norm: 0.9999992714427031, iteration: 386391
loss: 0.9993610978126526,grad_norm: 0.7195134829947903, iteration: 386392
loss: 1.0497556924819946,grad_norm: 0.7825542394574976, iteration: 386393
loss: 1.0013686418533325,grad_norm: 0.835436232201532, iteration: 386394
loss: 0.9660580158233643,grad_norm: 0.6727108053228077, iteration: 386395
loss: 1.1140950918197632,grad_norm: 0.8840920075304014, iteration: 386396
loss: 1.075124740600586,grad_norm: 0.8156820662699357, iteration: 386397
loss: 1.0181933641433716,grad_norm: 0.869694035447833, iteration: 386398
loss: 1.0189012289047241,grad_norm: 0.7859721732469613, iteration: 386399
loss: 0.9991607666015625,grad_norm: 0.7539247391289547, iteration: 386400
loss: 1.012529969215393,grad_norm: 0.871579106530142, iteration: 386401
loss: 1.0863133668899536,grad_norm: 0.9999991144975972, iteration: 386402
loss: 1.1275382041931152,grad_norm: 0.999999608394998, iteration: 386403
loss: 1.1783312559127808,grad_norm: 0.9999991600413205, iteration: 386404
loss: 1.0203208923339844,grad_norm: 0.7714223650584044, iteration: 386405
loss: 1.0103563070297241,grad_norm: 0.9999992653991233, iteration: 386406
loss: 1.0125089883804321,grad_norm: 0.8264297798826339, iteration: 386407
loss: 1.0322909355163574,grad_norm: 0.9999995823287301, iteration: 386408
loss: 1.0192753076553345,grad_norm: 0.9999995338211979, iteration: 386409
loss: 1.0206676721572876,grad_norm: 0.99999986870045, iteration: 386410
loss: 1.0549105405807495,grad_norm: 0.9999999015008301, iteration: 386411
loss: 1.0995393991470337,grad_norm: 0.9999998959302573, iteration: 386412
loss: 1.0148956775665283,grad_norm: 0.975116368289008, iteration: 386413
loss: 1.107711911201477,grad_norm: 0.9999992216084099, iteration: 386414
loss: 1.1140849590301514,grad_norm: 0.8976426992591978, iteration: 386415
loss: 1.008634328842163,grad_norm: 0.9180080001751965, iteration: 386416
loss: 1.0148351192474365,grad_norm: 0.9605382371459332, iteration: 386417
loss: 1.0023757219314575,grad_norm: 0.7294101569095296, iteration: 386418
loss: 1.008400321006775,grad_norm: 0.8689854108901913, iteration: 386419
loss: 1.0497174263000488,grad_norm: 0.8865818431986694, iteration: 386420
loss: 1.4201593399047852,grad_norm: 0.9999997149543874, iteration: 386421
loss: 1.008011817932129,grad_norm: 0.9999990684698162, iteration: 386422
loss: 1.1330899000167847,grad_norm: 0.9999991860393844, iteration: 386423
loss: 1.0984059572219849,grad_norm: 0.9999997943228589, iteration: 386424
loss: 1.0868602991104126,grad_norm: 0.7116174186879584, iteration: 386425
loss: 1.1469565629959106,grad_norm: 0.9999994842300134, iteration: 386426
loss: 1.0065298080444336,grad_norm: 0.9999999067297426, iteration: 386427
loss: 1.0533124208450317,grad_norm: 0.847315233203608, iteration: 386428
loss: 1.188410758972168,grad_norm: 0.9999991302032532, iteration: 386429
loss: 1.056759238243103,grad_norm: 0.9301770830183899, iteration: 386430
loss: 1.010974407196045,grad_norm: 0.8431340654029433, iteration: 386431
loss: 1.1485322713851929,grad_norm: 1.0000000469746066, iteration: 386432
loss: 1.1277928352355957,grad_norm: 0.874452388240613, iteration: 386433
loss: 1.0054495334625244,grad_norm: 0.9999992244759626, iteration: 386434
loss: 1.0603572130203247,grad_norm: 0.948069400970558, iteration: 386435
loss: 1.0230625867843628,grad_norm: 0.9999991781745378, iteration: 386436
loss: 1.0791720151901245,grad_norm: 0.9999992856479867, iteration: 386437
loss: 1.0531599521636963,grad_norm: 0.9873575208076295, iteration: 386438
loss: 1.0839059352874756,grad_norm: 1.0000000289486544, iteration: 386439
loss: 1.1073248386383057,grad_norm: 0.9999995597541331, iteration: 386440
loss: 1.2343508005142212,grad_norm: 0.9999995499110269, iteration: 386441
loss: 1.0367108583450317,grad_norm: 0.9999993606530923, iteration: 386442
loss: 1.0209726095199585,grad_norm: 0.7532562706901832, iteration: 386443
loss: 1.0337361097335815,grad_norm: 0.7765771745785174, iteration: 386444
loss: 0.9889930486679077,grad_norm: 0.8302108133658153, iteration: 386445
loss: 1.1446171998977661,grad_norm: 0.9999992884213987, iteration: 386446
loss: 1.0235035419464111,grad_norm: 0.907971271775738, iteration: 386447
loss: 1.0594969987869263,grad_norm: 0.9999992036959424, iteration: 386448
loss: 1.0687029361724854,grad_norm: 0.933932935073353, iteration: 386449
loss: 1.046175479888916,grad_norm: 0.9999993448580484, iteration: 386450
loss: 1.022836685180664,grad_norm: 0.9999991227150502, iteration: 386451
loss: 1.0189220905303955,grad_norm: 0.8339980834416365, iteration: 386452
loss: 1.0167657136917114,grad_norm: 0.8194607433169083, iteration: 386453
loss: 1.0809904336929321,grad_norm: 0.9438643485618851, iteration: 386454
loss: 1.0743588209152222,grad_norm: 0.9999995650705009, iteration: 386455
loss: 1.022321343421936,grad_norm: 0.8575277192205654, iteration: 386456
loss: 1.0259958505630493,grad_norm: 0.999999878988187, iteration: 386457
loss: 0.9885933995246887,grad_norm: 0.8375009978576091, iteration: 386458
loss: 1.1439611911773682,grad_norm: 0.9999998667173761, iteration: 386459
loss: 1.0307461023330688,grad_norm: 0.8032740172749693, iteration: 386460
loss: 1.0717500448226929,grad_norm: 0.9999989817328173, iteration: 386461
loss: 1.1135640144348145,grad_norm: 0.999999306924243, iteration: 386462
loss: 0.9986414313316345,grad_norm: 0.670703780156694, iteration: 386463
loss: 1.1493359804153442,grad_norm: 0.9999997658834104, iteration: 386464
loss: 1.0070412158966064,grad_norm: 0.9999993752250603, iteration: 386465
loss: 0.9949142336845398,grad_norm: 0.9999992511277715, iteration: 386466
loss: 1.294874668121338,grad_norm: 0.9999998835311068, iteration: 386467
loss: 0.9670411944389343,grad_norm: 0.8582388653354668, iteration: 386468
loss: 1.0624969005584717,grad_norm: 0.891587929449406, iteration: 386469
loss: 1.0723329782485962,grad_norm: 1.0000000009559173, iteration: 386470
loss: 1.0153800249099731,grad_norm: 0.9999996409821807, iteration: 386471
loss: 1.2334413528442383,grad_norm: 0.9999995836423566, iteration: 386472
loss: 1.0661449432373047,grad_norm: 0.9999994790661705, iteration: 386473
loss: 1.0139179229736328,grad_norm: 0.9999995651479499, iteration: 386474
loss: 1.0167351961135864,grad_norm: 0.9999996004012756, iteration: 386475
loss: 0.9992718696594238,grad_norm: 0.7486265586389075, iteration: 386476
loss: 1.0461920499801636,grad_norm: 0.8461164097033962, iteration: 386477
loss: 1.1338406801223755,grad_norm: 0.9999992479081211, iteration: 386478
loss: 1.0582503080368042,grad_norm: 0.9999993853067606, iteration: 386479
loss: 1.0275992155075073,grad_norm: 0.9999991704218316, iteration: 386480
loss: 1.0542542934417725,grad_norm: 0.8462400155937808, iteration: 386481
loss: 1.1950029134750366,grad_norm: 0.9999994915911259, iteration: 386482
loss: 1.03831946849823,grad_norm: 0.8774747684523657, iteration: 386483
loss: 1.102860689163208,grad_norm: 0.9999998507934725, iteration: 386484
loss: 1.0883883237838745,grad_norm: 0.999999272757389, iteration: 386485
loss: 1.1339774131774902,grad_norm: 0.9999994372528412, iteration: 386486
loss: 1.0144739151000977,grad_norm: 0.9313255142315884, iteration: 386487
loss: 0.9949460625648499,grad_norm: 0.9314612534106567, iteration: 386488
loss: 1.1247979402542114,grad_norm: 0.999999639633416, iteration: 386489
loss: 1.0050075054168701,grad_norm: 0.9599279868841611, iteration: 386490
loss: 0.9741197228431702,grad_norm: 0.9139567946870589, iteration: 386491
loss: 1.0122365951538086,grad_norm: 0.7117677770621417, iteration: 386492
loss: 0.9999625086784363,grad_norm: 0.8216136312593938, iteration: 386493
loss: 0.9763879179954529,grad_norm: 0.8598240719014242, iteration: 386494
loss: 1.0160908699035645,grad_norm: 0.9999998044947491, iteration: 386495
loss: 1.183620810508728,grad_norm: 0.9999996983555165, iteration: 386496
loss: 1.1840236186981201,grad_norm: 0.9999992352897616, iteration: 386497
loss: 1.0381689071655273,grad_norm: 0.9749457164068144, iteration: 386498
loss: 0.9993716478347778,grad_norm: 0.8684162856207569, iteration: 386499
loss: 1.0909756422042847,grad_norm: 0.999999415273933, iteration: 386500
loss: 1.076001524925232,grad_norm: 0.9999991980624526, iteration: 386501
loss: 0.9796149730682373,grad_norm: 0.7102223353159103, iteration: 386502
loss: 0.9816609621047974,grad_norm: 0.7476212772781803, iteration: 386503
loss: 1.1308366060256958,grad_norm: 0.9999993215912532, iteration: 386504
loss: 1.041244626045227,grad_norm: 0.9999992434597932, iteration: 386505
loss: 1.0335159301757812,grad_norm: 0.8366152596991612, iteration: 386506
loss: 0.9993278384208679,grad_norm: 0.6862505653181853, iteration: 386507
loss: 1.052138090133667,grad_norm: 0.744137058835778, iteration: 386508
loss: 1.0596529245376587,grad_norm: 0.8951804396121811, iteration: 386509
loss: 1.0346205234527588,grad_norm: 0.9541675711313403, iteration: 386510
loss: 0.9582562446594238,grad_norm: 0.8093311522080076, iteration: 386511
loss: 0.9858407974243164,grad_norm: 0.8968643977066112, iteration: 386512
loss: 1.0123485326766968,grad_norm: 0.7184595783780012, iteration: 386513
loss: 1.0824216604232788,grad_norm: 0.9999999979368835, iteration: 386514
loss: 0.9880287647247314,grad_norm: 0.8078280017983213, iteration: 386515
loss: 1.040855884552002,grad_norm: 0.9999999952618241, iteration: 386516
loss: 1.0174858570098877,grad_norm: 0.6349790884795642, iteration: 386517
loss: 1.1123510599136353,grad_norm: 0.9999997709006307, iteration: 386518
loss: 1.0098652839660645,grad_norm: 0.7972579918123244, iteration: 386519
loss: 1.0669628381729126,grad_norm: 0.999999692656595, iteration: 386520
loss: 0.9812536239624023,grad_norm: 0.7233954505372996, iteration: 386521
loss: 0.9768991470336914,grad_norm: 0.684087487768232, iteration: 386522
loss: 1.038644552230835,grad_norm: 0.7060738696513429, iteration: 386523
loss: 1.0315014123916626,grad_norm: 0.8538008373685002, iteration: 386524
loss: 0.9947268962860107,grad_norm: 0.8329884548701098, iteration: 386525
loss: 1.0143520832061768,grad_norm: 0.9999993764100212, iteration: 386526
loss: 0.9768912196159363,grad_norm: 0.999999288371591, iteration: 386527
loss: 1.0023661851882935,grad_norm: 0.7019879089296435, iteration: 386528
loss: 1.020687222480774,grad_norm: 0.7518079708356776, iteration: 386529
loss: 1.0191229581832886,grad_norm: 0.7530975029275786, iteration: 386530
loss: 1.0202529430389404,grad_norm: 0.8829313696066388, iteration: 386531
loss: 0.970281720161438,grad_norm: 0.7147221797790431, iteration: 386532
loss: 0.9964872598648071,grad_norm: 0.9175778417808529, iteration: 386533
loss: 1.0631457567214966,grad_norm: 0.9177478425005359, iteration: 386534
loss: 1.0356550216674805,grad_norm: 0.9999995863213772, iteration: 386535
loss: 0.958918035030365,grad_norm: 0.835930494692695, iteration: 386536
loss: 1.0033715963363647,grad_norm: 0.9999991039704275, iteration: 386537
loss: 0.9902241826057434,grad_norm: 0.8153469217407093, iteration: 386538
loss: 0.9868580102920532,grad_norm: 0.9999992310372046, iteration: 386539
loss: 0.9868408441543579,grad_norm: 0.7335746640083116, iteration: 386540
loss: 0.9706231355667114,grad_norm: 0.7883864030164214, iteration: 386541
loss: 0.9940367341041565,grad_norm: 0.9037188037204164, iteration: 386542
loss: 1.078015685081482,grad_norm: 0.9999997948658119, iteration: 386543
loss: 1.0092496871948242,grad_norm: 0.9999991342568323, iteration: 386544
loss: 0.9945329427719116,grad_norm: 0.9999995406648117, iteration: 386545
loss: 1.070218563079834,grad_norm: 0.9999992867719449, iteration: 386546
loss: 1.021270990371704,grad_norm: 0.9999994198211853, iteration: 386547
loss: 1.0317065715789795,grad_norm: 0.7824284750028576, iteration: 386548
loss: 1.0122159719467163,grad_norm: 0.7592020034694251, iteration: 386549
loss: 0.9709896445274353,grad_norm: 0.9199538969610869, iteration: 386550
loss: 0.9916029572486877,grad_norm: 0.6830756684762376, iteration: 386551
loss: 1.003866195678711,grad_norm: 0.8370422147758142, iteration: 386552
loss: 1.0542503595352173,grad_norm: 0.898640662155061, iteration: 386553
loss: 0.9939330220222473,grad_norm: 0.999999252095576, iteration: 386554
loss: 1.103757381439209,grad_norm: 0.7591212944419898, iteration: 386555
loss: 0.9746021628379822,grad_norm: 0.7992733334409058, iteration: 386556
loss: 0.9764289855957031,grad_norm: 0.8060270835347603, iteration: 386557
loss: 1.0539765357971191,grad_norm: 0.7573999369384165, iteration: 386558
loss: 0.9724105000495911,grad_norm: 1.0000000135922804, iteration: 386559
loss: 0.9700387120246887,grad_norm: 0.7652747260112108, iteration: 386560
loss: 1.057987928390503,grad_norm: 0.9999997414023147, iteration: 386561
loss: 1.0188182592391968,grad_norm: 0.7502633892390582, iteration: 386562
loss: 1.02047860622406,grad_norm: 0.9690315948657127, iteration: 386563
loss: 0.9883655905723572,grad_norm: 0.9999994701820366, iteration: 386564
loss: 1.0581512451171875,grad_norm: 0.9999991947939088, iteration: 386565
loss: 0.9477220177650452,grad_norm: 0.7579106341700191, iteration: 386566
loss: 1.023403286933899,grad_norm: 0.9240295929405082, iteration: 386567
loss: 1.02244234085083,grad_norm: 0.6878704540939004, iteration: 386568
loss: 1.0434081554412842,grad_norm: 0.849064970800453, iteration: 386569
loss: 1.1163337230682373,grad_norm: 0.9999998427235557, iteration: 386570
loss: 1.0020325183868408,grad_norm: 0.8163286966281846, iteration: 386571
loss: 0.9817284345626831,grad_norm: 0.8138263879221865, iteration: 386572
loss: 1.033618688583374,grad_norm: 0.9564596370434514, iteration: 386573
loss: 1.029894232749939,grad_norm: 0.9999995474484755, iteration: 386574
loss: 1.0177505016326904,grad_norm: 0.9999999645694487, iteration: 386575
loss: 1.0018091201782227,grad_norm: 0.7548710308569899, iteration: 386576
loss: 1.051669955253601,grad_norm: 0.8607881998255658, iteration: 386577
loss: 0.9844089150428772,grad_norm: 0.6997987387424071, iteration: 386578
loss: 1.0055588483810425,grad_norm: 0.8413511281933538, iteration: 386579
loss: 0.979168713092804,grad_norm: 0.685029395286145, iteration: 386580
loss: 0.992110013961792,grad_norm: 0.7413213417832081, iteration: 386581
loss: 1.0273520946502686,grad_norm: 0.9999995132033962, iteration: 386582
loss: 1.159373164176941,grad_norm: 0.9999999252238092, iteration: 386583
loss: 0.9950813055038452,grad_norm: 0.7627813291350392, iteration: 386584
loss: 1.009808897972107,grad_norm: 0.7322089451236022, iteration: 386585
loss: 1.0708179473876953,grad_norm: 0.953952005262866, iteration: 386586
loss: 1.026068091392517,grad_norm: 0.9999995972695568, iteration: 386587
loss: 0.9830423593521118,grad_norm: 0.8083186723642639, iteration: 386588
loss: 1.0241230726242065,grad_norm: 0.9999996670710735, iteration: 386589
loss: 0.9928264617919922,grad_norm: 0.7966370410346587, iteration: 386590
loss: 0.9926601052284241,grad_norm: 0.999999612457857, iteration: 386591
loss: 1.0149635076522827,grad_norm: 0.8563477404024306, iteration: 386592
loss: 1.0428459644317627,grad_norm: 0.8653543819136084, iteration: 386593
loss: 0.999859094619751,grad_norm: 0.7528005844793606, iteration: 386594
loss: 0.9918385148048401,grad_norm: 0.7737605209990723, iteration: 386595
loss: 0.9965211749076843,grad_norm: 0.9085444938224545, iteration: 386596
loss: 1.0347274541854858,grad_norm: 0.7568444786449847, iteration: 386597
loss: 1.0693135261535645,grad_norm: 0.9999992363625564, iteration: 386598
loss: 1.0396478176116943,grad_norm: 0.9906886527422849, iteration: 386599
loss: 0.9767216444015503,grad_norm: 0.7663028355636055, iteration: 386600
loss: 0.9986764192581177,grad_norm: 0.653144666340927, iteration: 386601
loss: 1.0516928434371948,grad_norm: 0.8423850467362897, iteration: 386602
loss: 1.0442135334014893,grad_norm: 0.9999997667811489, iteration: 386603
loss: 0.9919308423995972,grad_norm: 0.7520436675942244, iteration: 386604
loss: 1.0221308469772339,grad_norm: 0.9999991142527997, iteration: 386605
loss: 1.023686408996582,grad_norm: 0.7076009410060053, iteration: 386606
loss: 1.0377694368362427,grad_norm: 0.794420263401405, iteration: 386607
loss: 1.080381155014038,grad_norm: 0.9999992614766589, iteration: 386608
loss: 1.0403798818588257,grad_norm: 0.9999999207928706, iteration: 386609
loss: 0.9689140915870667,grad_norm: 0.6908328905678469, iteration: 386610
loss: 1.0156066417694092,grad_norm: 0.9999995002955017, iteration: 386611
loss: 0.980194091796875,grad_norm: 0.8461821660935201, iteration: 386612
loss: 1.013203740119934,grad_norm: 0.9032710311482902, iteration: 386613
loss: 1.0637143850326538,grad_norm: 0.9999999009236135, iteration: 386614
loss: 1.0037990808486938,grad_norm: 0.9999989760749233, iteration: 386615
loss: 0.9765385985374451,grad_norm: 0.8094359004601037, iteration: 386616
loss: 1.009505271911621,grad_norm: 0.8058020736106218, iteration: 386617
loss: 1.0240461826324463,grad_norm: 0.6592361848607623, iteration: 386618
loss: 0.9485486745834351,grad_norm: 0.9999991540653189, iteration: 386619
loss: 1.00187087059021,grad_norm: 0.8245030431330318, iteration: 386620
loss: 0.9962504506111145,grad_norm: 0.999999036989266, iteration: 386621
loss: 1.0271683931350708,grad_norm: 0.7340724172703667, iteration: 386622
loss: 1.0195355415344238,grad_norm: 0.9228256280844022, iteration: 386623
loss: 1.045355200767517,grad_norm: 0.9999990586296216, iteration: 386624
loss: 0.9920365810394287,grad_norm: 0.7123237933939783, iteration: 386625
loss: 0.998382568359375,grad_norm: 0.8078626592014385, iteration: 386626
loss: 0.9826074838638306,grad_norm: 0.9666963396152486, iteration: 386627
loss: 1.053971767425537,grad_norm: 0.9999990961481052, iteration: 386628
loss: 1.056384801864624,grad_norm: 0.7993865010005079, iteration: 386629
loss: 1.0138152837753296,grad_norm: 0.8085227817088652, iteration: 386630
loss: 0.9788547158241272,grad_norm: 0.6861312938596286, iteration: 386631
loss: 0.9893284440040588,grad_norm: 0.862481216325654, iteration: 386632
loss: 0.9787545204162598,grad_norm: 0.8314362917931489, iteration: 386633
loss: 1.0130656957626343,grad_norm: 0.8183779360002679, iteration: 386634
loss: 0.9939199686050415,grad_norm: 0.7923682033245086, iteration: 386635
loss: 1.0077937841415405,grad_norm: 0.8512900942525676, iteration: 386636
loss: 1.0332494974136353,grad_norm: 0.9999996454146659, iteration: 386637
loss: 1.0748777389526367,grad_norm: 0.9678354565891509, iteration: 386638
loss: 1.007436990737915,grad_norm: 0.8156929204614695, iteration: 386639
loss: 1.0348557233810425,grad_norm: 0.8302881322151581, iteration: 386640
loss: 0.999485194683075,grad_norm: 0.999999813308466, iteration: 386641
loss: 1.0424950122833252,grad_norm: 0.9999998162793284, iteration: 386642
loss: 1.10465669631958,grad_norm: 0.9999996067665313, iteration: 386643
loss: 1.0195754766464233,grad_norm: 0.8772822827417256, iteration: 386644
loss: 1.0580390691757202,grad_norm: 0.9999991173877041, iteration: 386645
loss: 1.0413323640823364,grad_norm: 0.9679932112645095, iteration: 386646
loss: 1.032647967338562,grad_norm: 0.7006435008175441, iteration: 386647
loss: 0.9848486185073853,grad_norm: 0.8077631028880758, iteration: 386648
loss: 1.0377449989318848,grad_norm: 0.9999994535197468, iteration: 386649
loss: 1.066182017326355,grad_norm: 0.9999991336397838, iteration: 386650
loss: 1.033255934715271,grad_norm: 0.7397637250793392, iteration: 386651
loss: 0.9963808059692383,grad_norm: 0.9999998252626949, iteration: 386652
loss: 1.0868974924087524,grad_norm: 0.8932735351324304, iteration: 386653
loss: 1.0289193391799927,grad_norm: 0.7961016828115438, iteration: 386654
loss: 0.9842573404312134,grad_norm: 0.771640237584928, iteration: 386655
loss: 1.0099916458129883,grad_norm: 0.7476156630703221, iteration: 386656
loss: 1.0420500040054321,grad_norm: 0.820109703957073, iteration: 386657
loss: 0.9966903328895569,grad_norm: 0.9999990067342269, iteration: 386658
loss: 1.0830411911010742,grad_norm: 0.9999995003991489, iteration: 386659
loss: 1.0552183389663696,grad_norm: 0.8711129911272089, iteration: 386660
loss: 1.0127677917480469,grad_norm: 0.9999990944023791, iteration: 386661
loss: 0.9725492596626282,grad_norm: 0.7700874508371505, iteration: 386662
loss: 1.0209168195724487,grad_norm: 0.9999990443341716, iteration: 386663
loss: 1.046040654182434,grad_norm: 0.7619992541196509, iteration: 386664
loss: 1.0740911960601807,grad_norm: 0.9999998667662721, iteration: 386665
loss: 1.0223807096481323,grad_norm: 0.7565100275924314, iteration: 386666
loss: 0.9877722263336182,grad_norm: 0.8928335955014954, iteration: 386667
loss: 1.0146591663360596,grad_norm: 0.9999997017351968, iteration: 386668
loss: 0.9975579380989075,grad_norm: 0.7693611601787932, iteration: 386669
loss: 0.9911870360374451,grad_norm: 0.9065157197090574, iteration: 386670
loss: 1.0086034536361694,grad_norm: 0.9999991220749608, iteration: 386671
loss: 1.0762521028518677,grad_norm: 0.9999989767258453, iteration: 386672
loss: 0.9837278127670288,grad_norm: 0.8025549026348443, iteration: 386673
loss: 1.0215113162994385,grad_norm: 0.9999991682586482, iteration: 386674
loss: 1.0161954164505005,grad_norm: 0.7742928514840315, iteration: 386675
loss: 1.0277553796768188,grad_norm: 0.955560343601618, iteration: 386676
loss: 0.988132894039154,grad_norm: 0.7045868645672277, iteration: 386677
loss: 1.046315312385559,grad_norm: 0.7185641029563362, iteration: 386678
loss: 1.0285286903381348,grad_norm: 0.9999990883677696, iteration: 386679
loss: 1.0505422353744507,grad_norm: 0.7757047790412243, iteration: 386680
loss: 1.0488530397415161,grad_norm: 0.9999995107839375, iteration: 386681
loss: 1.0038820505142212,grad_norm: 0.9999992357999548, iteration: 386682
loss: 1.001690149307251,grad_norm: 0.9799511503667715, iteration: 386683
loss: 0.9856826066970825,grad_norm: 0.8016557304496367, iteration: 386684
loss: 1.1114652156829834,grad_norm: 0.9999996592623541, iteration: 386685
loss: 0.9942052960395813,grad_norm: 0.77608816159915, iteration: 386686
loss: 1.0696392059326172,grad_norm: 0.9999999158451256, iteration: 386687
loss: 1.0853873491287231,grad_norm: 0.9999998044814632, iteration: 386688
loss: 1.0603578090667725,grad_norm: 0.9999998483432657, iteration: 386689
loss: 1.0079916715621948,grad_norm: 0.7510854001050707, iteration: 386690
loss: 1.0219268798828125,grad_norm: 0.8601747335839426, iteration: 386691
loss: 1.035409688949585,grad_norm: 0.9999991712095816, iteration: 386692
loss: 1.0072553157806396,grad_norm: 0.7753784070169858, iteration: 386693
loss: 1.2101880311965942,grad_norm: 0.9999998379205779, iteration: 386694
loss: 1.0164486169815063,grad_norm: 0.9999991368214589, iteration: 386695
loss: 1.0333083868026733,grad_norm: 0.7655610842578369, iteration: 386696
loss: 1.0434751510620117,grad_norm: 0.9043464450814025, iteration: 386697
loss: 0.9901811480522156,grad_norm: 0.9999990677657843, iteration: 386698
loss: 1.0474355220794678,grad_norm: 0.7212885790073946, iteration: 386699
loss: 1.0514905452728271,grad_norm: 0.9999999335010532, iteration: 386700
loss: 0.9888405799865723,grad_norm: 0.8890915313956793, iteration: 386701
loss: 0.986545205116272,grad_norm: 0.752410185966223, iteration: 386702
loss: 1.0084834098815918,grad_norm: 0.9999991372598438, iteration: 386703
loss: 1.0680838823318481,grad_norm: 0.9650160272191867, iteration: 386704
loss: 1.0120609998703003,grad_norm: 0.9999992864800096, iteration: 386705
loss: 1.0051050186157227,grad_norm: 0.8566380981009074, iteration: 386706
loss: 1.0400489568710327,grad_norm: 0.8829164801873979, iteration: 386707
loss: 0.9521246552467346,grad_norm: 0.8933915250214995, iteration: 386708
loss: 1.0186667442321777,grad_norm: 0.7905111697177225, iteration: 386709
loss: 1.0159565210342407,grad_norm: 0.7904566159354931, iteration: 386710
loss: 1.0474603176116943,grad_norm: 0.8283115541241245, iteration: 386711
loss: 0.9810324311256409,grad_norm: 0.7464451736651931, iteration: 386712
loss: 1.012990951538086,grad_norm: 0.7074168962539611, iteration: 386713
loss: 1.0107126235961914,grad_norm: 0.7560853543845802, iteration: 386714
loss: 0.9692981839179993,grad_norm: 0.7177138557518181, iteration: 386715
loss: 1.025675654411316,grad_norm: 0.7445847483689763, iteration: 386716
loss: 0.9659605026245117,grad_norm: 0.9064465679267246, iteration: 386717
loss: 1.0024698972702026,grad_norm: 0.9670840100456609, iteration: 386718
loss: 0.9936225414276123,grad_norm: 0.889932760589501, iteration: 386719
loss: 0.9936421513557434,grad_norm: 0.9999996653767281, iteration: 386720
loss: 1.0023143291473389,grad_norm: 0.8051401555547859, iteration: 386721
loss: 1.0209797620773315,grad_norm: 0.999999025495787, iteration: 386722
loss: 0.9934996366500854,grad_norm: 0.8733706002568786, iteration: 386723
loss: 1.0309126377105713,grad_norm: 0.9563444507337479, iteration: 386724
loss: 0.9868872761726379,grad_norm: 0.9999995164205052, iteration: 386725
loss: 1.0279650688171387,grad_norm: 0.9406032647587658, iteration: 386726
loss: 0.9800073504447937,grad_norm: 0.8224430093415822, iteration: 386727
loss: 1.005711555480957,grad_norm: 0.6818957517388781, iteration: 386728
loss: 1.0569820404052734,grad_norm: 0.8912393348611426, iteration: 386729
loss: 0.9938846826553345,grad_norm: 0.723362106872573, iteration: 386730
loss: 1.0210803747177124,grad_norm: 0.9843037757872468, iteration: 386731
loss: 1.0435620546340942,grad_norm: 0.9999999574679533, iteration: 386732
loss: 1.0891773700714111,grad_norm: 0.9999991054522993, iteration: 386733
loss: 1.0003225803375244,grad_norm: 0.672392780496155, iteration: 386734
loss: 0.9806897044181824,grad_norm: 0.8476951984121063, iteration: 386735
loss: 1.1830543279647827,grad_norm: 0.9999994989620549, iteration: 386736
loss: 0.9692075252532959,grad_norm: 0.7910978912165678, iteration: 386737
loss: 0.9880746006965637,grad_norm: 0.9999999517979626, iteration: 386738
loss: 0.9923965930938721,grad_norm: 0.8173511224900878, iteration: 386739
loss: 1.0023771524429321,grad_norm: 0.7521788265131393, iteration: 386740
loss: 1.1681941747665405,grad_norm: 0.9999994845993663, iteration: 386741
loss: 1.1562305688858032,grad_norm: 0.9249365220166278, iteration: 386742
loss: 1.0369420051574707,grad_norm: 0.9999992405005365, iteration: 386743
loss: 1.0141854286193848,grad_norm: 0.9135285084521865, iteration: 386744
loss: 1.0322750806808472,grad_norm: 0.999999895085379, iteration: 386745
loss: 0.9556527733802795,grad_norm: 0.9999997251430071, iteration: 386746
loss: 0.9888975620269775,grad_norm: 0.6791699016627624, iteration: 386747
loss: 1.0477113723754883,grad_norm: 0.9999999578389048, iteration: 386748
loss: 1.0218106508255005,grad_norm: 0.8043203783163506, iteration: 386749
loss: 1.0158501863479614,grad_norm: 0.9868072289530718, iteration: 386750
loss: 0.9963279366493225,grad_norm: 0.8324299082352202, iteration: 386751
loss: 1.0239008665084839,grad_norm: 0.9999990900231585, iteration: 386752
loss: 0.9876148700714111,grad_norm: 0.7758907266171278, iteration: 386753
loss: 0.9932647943496704,grad_norm: 0.9999994747580432, iteration: 386754
loss: 1.0461844205856323,grad_norm: 0.9999998259581959, iteration: 386755
loss: 1.0188424587249756,grad_norm: 0.7761596950092117, iteration: 386756
loss: 1.0049551725387573,grad_norm: 0.9999990530188432, iteration: 386757
loss: 1.009630560874939,grad_norm: 0.9821646181673592, iteration: 386758
loss: 0.9667928814888,grad_norm: 0.8196046590401188, iteration: 386759
loss: 1.0818060636520386,grad_norm: 0.764735113671939, iteration: 386760
loss: 1.0512923002243042,grad_norm: 0.9561604544906623, iteration: 386761
loss: 1.0331037044525146,grad_norm: 0.9388881232518442, iteration: 386762
loss: 1.0622501373291016,grad_norm: 0.9991900947773514, iteration: 386763
loss: 1.0133178234100342,grad_norm: 0.9999992365607545, iteration: 386764
loss: 1.013076901435852,grad_norm: 0.77333523902882, iteration: 386765
loss: 1.0852417945861816,grad_norm: 0.999999912431906, iteration: 386766
loss: 1.0086181163787842,grad_norm: 0.7655496287270863, iteration: 386767
loss: 0.9718113541603088,grad_norm: 0.6856935033028477, iteration: 386768
loss: 1.0467056035995483,grad_norm: 0.9999992683099223, iteration: 386769
loss: 0.9567159414291382,grad_norm: 0.8079378790753449, iteration: 386770
loss: 1.0338162183761597,grad_norm: 1.0000000380418492, iteration: 386771
loss: 1.0034220218658447,grad_norm: 0.8347088002476017, iteration: 386772
loss: 1.0095728635787964,grad_norm: 0.9406620536695668, iteration: 386773
loss: 1.0474681854248047,grad_norm: 0.9999989732550568, iteration: 386774
loss: 1.0087087154388428,grad_norm: 0.6801965507087303, iteration: 386775
loss: 1.0234438180923462,grad_norm: 0.5912381667495609, iteration: 386776
loss: 1.0149667263031006,grad_norm: 0.7860099705328377, iteration: 386777
loss: 1.0111974477767944,grad_norm: 0.6964150603809709, iteration: 386778
loss: 1.0208591222763062,grad_norm: 0.9999991321452621, iteration: 386779
loss: 1.0310429334640503,grad_norm: 0.740711513272906, iteration: 386780
loss: 0.981596052646637,grad_norm: 0.7892774034710699, iteration: 386781
loss: 1.0058761835098267,grad_norm: 0.745568255099675, iteration: 386782
loss: 1.034023642539978,grad_norm: 0.8955482328047742, iteration: 386783
loss: 1.003675103187561,grad_norm: 0.878916452006161, iteration: 386784
loss: 1.08806574344635,grad_norm: 0.999999436735126, iteration: 386785
loss: 0.9783802628517151,grad_norm: 0.8636493560562307, iteration: 386786
loss: 1.058943510055542,grad_norm: 0.9999997276182958, iteration: 386787
loss: 0.9964248538017273,grad_norm: 0.888266365441147, iteration: 386788
loss: 1.008947730064392,grad_norm: 0.9999993154614688, iteration: 386789
loss: 0.9929584264755249,grad_norm: 0.8191014571996349, iteration: 386790
loss: 1.004775881767273,grad_norm: 0.8641296924223665, iteration: 386791
loss: 0.9990317821502686,grad_norm: 0.6268281488661032, iteration: 386792
loss: 1.0042849779129028,grad_norm: 0.999999456772251, iteration: 386793
loss: 1.0066461563110352,grad_norm: 0.7862718075387242, iteration: 386794
loss: 0.9466232061386108,grad_norm: 0.8864491008687315, iteration: 386795
loss: 0.9982113838195801,grad_norm: 0.7638881011854952, iteration: 386796
loss: 1.0756381750106812,grad_norm: 0.999999517123213, iteration: 386797
loss: 0.9904287457466125,grad_norm: 0.8010067709101679, iteration: 386798
loss: 1.0028311014175415,grad_norm: 0.7433177587449133, iteration: 386799
loss: 1.0033884048461914,grad_norm: 0.9999997381618667, iteration: 386800
loss: 1.011073112487793,grad_norm: 0.7920844069189301, iteration: 386801
loss: 1.0185396671295166,grad_norm: 0.8162994051987167, iteration: 386802
loss: 1.0438709259033203,grad_norm: 0.9170415280850225, iteration: 386803
loss: 1.0259114503860474,grad_norm: 0.7584417614179796, iteration: 386804
loss: 1.0222517251968384,grad_norm: 0.9999992201809268, iteration: 386805
loss: 0.9808293581008911,grad_norm: 0.8383996355475809, iteration: 386806
loss: 1.0822429656982422,grad_norm: 0.8841491295927091, iteration: 386807
loss: 1.0290635824203491,grad_norm: 0.8718688849044239, iteration: 386808
loss: 1.0087319612503052,grad_norm: 0.7250278569771698, iteration: 386809
loss: 0.9668858647346497,grad_norm: 0.8599955722982028, iteration: 386810
loss: 0.9901633858680725,grad_norm: 0.9999994718477145, iteration: 386811
loss: 1.0357969999313354,grad_norm: 0.7248903589187103, iteration: 386812
loss: 1.0137072801589966,grad_norm: 0.9999996427102105, iteration: 386813
loss: 1.0432811975479126,grad_norm: 0.7803935971464186, iteration: 386814
loss: 1.0036858320236206,grad_norm: 0.8399757413557124, iteration: 386815
loss: 1.0466700792312622,grad_norm: 0.9999990927055818, iteration: 386816
loss: 0.9823762774467468,grad_norm: 0.8204542181823125, iteration: 386817
loss: 1.0264312028884888,grad_norm: 0.8638913481333178, iteration: 386818
loss: 0.9850955009460449,grad_norm: 0.7582788954013131, iteration: 386819
loss: 1.004513144493103,grad_norm: 0.9999992180053572, iteration: 386820
loss: 1.0464097261428833,grad_norm: 0.9999994359884739, iteration: 386821
loss: 0.9675843119621277,grad_norm: 0.7958294986743834, iteration: 386822
loss: 1.1239639520645142,grad_norm: 1.0000000664549196, iteration: 386823
loss: 1.0597587823867798,grad_norm: 0.9189231276045947, iteration: 386824
loss: 1.0292234420776367,grad_norm: 0.99999991851236, iteration: 386825
loss: 0.9936383962631226,grad_norm: 0.7690298222770108, iteration: 386826
loss: 1.0002866983413696,grad_norm: 0.7650388461974834, iteration: 386827
loss: 0.9907517433166504,grad_norm: 0.8052264200164284, iteration: 386828
loss: 0.9836083054542542,grad_norm: 0.6963903132918491, iteration: 386829
loss: 1.0240720510482788,grad_norm: 0.808494592102599, iteration: 386830
loss: 0.9915784597396851,grad_norm: 0.7422636703996455, iteration: 386831
loss: 1.0080186128616333,grad_norm: 0.6651080711791097, iteration: 386832
loss: 0.9641214609146118,grad_norm: 0.8602826441337441, iteration: 386833
loss: 1.049248456954956,grad_norm: 0.8669016226017616, iteration: 386834
loss: 1.105062484741211,grad_norm: 0.9999993656145898, iteration: 386835
loss: 0.9988780617713928,grad_norm: 0.8567552641566437, iteration: 386836
loss: 1.0011813640594482,grad_norm: 0.856127395899895, iteration: 386837
loss: 0.9903842210769653,grad_norm: 0.7888179471249787, iteration: 386838
loss: 1.001646876335144,grad_norm: 0.7952238356563405, iteration: 386839
loss: 0.9372062683105469,grad_norm: 0.8589702309026156, iteration: 386840
loss: 1.0141271352767944,grad_norm: 0.8671052067181279, iteration: 386841
loss: 1.0058188438415527,grad_norm: 0.8955642315176466, iteration: 386842
loss: 1.0351784229278564,grad_norm: 0.8438418633628402, iteration: 386843
loss: 1.0022822618484497,grad_norm: 0.9999993295880603, iteration: 386844
loss: 0.9929715394973755,grad_norm: 0.6919235957949679, iteration: 386845
loss: 1.006122350692749,grad_norm: 0.7311494530555215, iteration: 386846
loss: 0.9860895276069641,grad_norm: 0.6850893774056116, iteration: 386847
loss: 1.1644906997680664,grad_norm: 0.9999991541294764, iteration: 386848
loss: 1.0415010452270508,grad_norm: 0.9999990794883911, iteration: 386849
loss: 1.0425293445587158,grad_norm: 0.9999999860716093, iteration: 386850
loss: 1.0127663612365723,grad_norm: 0.7552195142693419, iteration: 386851
loss: 1.1118786334991455,grad_norm: 0.7868200946981388, iteration: 386852
loss: 1.0884463787078857,grad_norm: 0.9999993520445226, iteration: 386853
loss: 1.009318232536316,grad_norm: 0.8519890336485599, iteration: 386854
loss: 1.0087436437606812,grad_norm: 0.823604437831503, iteration: 386855
loss: 1.0476475954055786,grad_norm: 0.8169115680865774, iteration: 386856
loss: 1.0180898904800415,grad_norm: 0.8176492034140086, iteration: 386857
loss: 1.054057002067566,grad_norm: 0.999999867325249, iteration: 386858
loss: 1.0140047073364258,grad_norm: 0.7227597628439554, iteration: 386859
loss: 1.0004910230636597,grad_norm: 0.8064041351492073, iteration: 386860
loss: 0.9552163481712341,grad_norm: 0.7550409550236202, iteration: 386861
loss: 1.0170574188232422,grad_norm: 0.6546097870628665, iteration: 386862
loss: 0.9995462894439697,grad_norm: 0.6732728830044035, iteration: 386863
loss: 0.9850294589996338,grad_norm: 0.8402311569781168, iteration: 386864
loss: 1.016903281211853,grad_norm: 0.8448997052348521, iteration: 386865
loss: 1.0217798948287964,grad_norm: 0.9999996126932269, iteration: 386866
loss: 1.0211020708084106,grad_norm: 0.9126296624814584, iteration: 386867
loss: 1.0167558193206787,grad_norm: 0.9999995002815543, iteration: 386868
loss: 1.039995551109314,grad_norm: 0.6813390090255391, iteration: 386869
loss: 0.9775490760803223,grad_norm: 0.9999994097253118, iteration: 386870
loss: 0.9943144917488098,grad_norm: 0.8406194714122377, iteration: 386871
loss: 1.010607123374939,grad_norm: 0.9999991181369816, iteration: 386872
loss: 0.9713273644447327,grad_norm: 0.7462224268178886, iteration: 386873
loss: 1.0496413707733154,grad_norm: 0.9999995218523453, iteration: 386874
loss: 1.0055482387542725,grad_norm: 0.8409308834581026, iteration: 386875
loss: 1.005409598350525,grad_norm: 0.6455189952094514, iteration: 386876
loss: 0.997797429561615,grad_norm: 0.8662488176610327, iteration: 386877
loss: 1.0706324577331543,grad_norm: 0.9999993824443549, iteration: 386878
loss: 1.178267478942871,grad_norm: 0.9999998105034289, iteration: 386879
loss: 0.9616871476173401,grad_norm: 0.9999989679370086, iteration: 386880
loss: 1.140712022781372,grad_norm: 0.9999999181040121, iteration: 386881
loss: 0.9750620722770691,grad_norm: 0.9619135468726024, iteration: 386882
loss: 1.0107734203338623,grad_norm: 0.920658327462707, iteration: 386883
loss: 1.0465939044952393,grad_norm: 0.999999985120982, iteration: 386884
loss: 1.1884675025939941,grad_norm: 0.999999340635882, iteration: 386885
loss: 1.0064761638641357,grad_norm: 0.7078002242181375, iteration: 386886
loss: 1.104913592338562,grad_norm: 0.9999994273570083, iteration: 386887
loss: 0.9845671653747559,grad_norm: 0.7787623551228419, iteration: 386888
loss: 1.1038212776184082,grad_norm: 0.9999999231079524, iteration: 386889
loss: 1.0285487174987793,grad_norm: 0.8603308658315947, iteration: 386890
loss: 1.0265275239944458,grad_norm: 0.6781110434124351, iteration: 386891
loss: 0.9932374358177185,grad_norm: 0.8555546326003366, iteration: 386892
loss: 1.1430007219314575,grad_norm: 0.9999993413088253, iteration: 386893
loss: 0.9669475555419922,grad_norm: 0.8806224167856895, iteration: 386894
loss: 1.0448002815246582,grad_norm: 0.8492001391995524, iteration: 386895
loss: 0.9933156371116638,grad_norm: 0.9999996123128874, iteration: 386896
loss: 0.9746736288070679,grad_norm: 0.8167621420149676, iteration: 386897
loss: 0.998416543006897,grad_norm: 0.9999994475033724, iteration: 386898
loss: 0.980303943157196,grad_norm: 0.7748345006257724, iteration: 386899
loss: 1.028485655784607,grad_norm: 0.8240948001545951, iteration: 386900
loss: 0.9997915625572205,grad_norm: 0.999999596779309, iteration: 386901
loss: 1.024287223815918,grad_norm: 0.9999990828839672, iteration: 386902
loss: 0.9883309006690979,grad_norm: 0.7715752444543711, iteration: 386903
loss: 1.0054535865783691,grad_norm: 0.9999996014250634, iteration: 386904
loss: 1.1869984865188599,grad_norm: 0.9999995251190126, iteration: 386905
loss: 0.9867072105407715,grad_norm: 0.9529482926984186, iteration: 386906
loss: 1.0131192207336426,grad_norm: 0.937502586781778, iteration: 386907
loss: 1.0163432359695435,grad_norm: 0.9999992993848624, iteration: 386908
loss: 1.0547056198120117,grad_norm: 0.735407721812453, iteration: 386909
loss: 1.006527066230774,grad_norm: 0.7822196808901083, iteration: 386910
loss: 0.9801239967346191,grad_norm: 0.7644721203948186, iteration: 386911
loss: 1.0018118619918823,grad_norm: 0.8068903329407983, iteration: 386912
loss: 1.0154461860656738,grad_norm: 0.9999991210022878, iteration: 386913
loss: 1.0310602188110352,grad_norm: 0.9693542179694161, iteration: 386914
loss: 1.0702576637268066,grad_norm: 0.8829903123585405, iteration: 386915
loss: 1.0319287776947021,grad_norm: 0.9173000443141474, iteration: 386916
loss: 1.0186703205108643,grad_norm: 0.9088582926169134, iteration: 386917
loss: 1.0588157176971436,grad_norm: 0.9999990651994145, iteration: 386918
loss: 0.9990740418434143,grad_norm: 0.7836850505312288, iteration: 386919
loss: 1.0152547359466553,grad_norm: 0.7906741827305146, iteration: 386920
loss: 0.9820457696914673,grad_norm: 0.7330032274204182, iteration: 386921
loss: 1.0075924396514893,grad_norm: 0.7501786269128462, iteration: 386922
loss: 1.005631446838379,grad_norm: 0.8435982961940511, iteration: 386923
loss: 1.0209630727767944,grad_norm: 0.9999990372852905, iteration: 386924
loss: 1.10990571975708,grad_norm: 0.999999777186311, iteration: 386925
loss: 0.9925470948219299,grad_norm: 0.8336481980165584, iteration: 386926
loss: 1.022755742073059,grad_norm: 0.9999991226737734, iteration: 386927
loss: 0.9689599275588989,grad_norm: 0.9018367744121168, iteration: 386928
loss: 1.0045974254608154,grad_norm: 0.6812650391632982, iteration: 386929
loss: 1.0586326122283936,grad_norm: 0.9999991407203215, iteration: 386930
loss: 1.0071946382522583,grad_norm: 0.7515629531880743, iteration: 386931
loss: 0.9959840774536133,grad_norm: 0.8400048872668932, iteration: 386932
loss: 1.0505248308181763,grad_norm: 0.9999998577056345, iteration: 386933
loss: 0.9878760576248169,grad_norm: 0.7269418308392055, iteration: 386934
loss: 1.0419416427612305,grad_norm: 0.9999992077461819, iteration: 386935
loss: 1.0388219356536865,grad_norm: 0.9999996540561813, iteration: 386936
loss: 1.0642625093460083,grad_norm: 0.7895113528738964, iteration: 386937
loss: 1.0208356380462646,grad_norm: 0.9647178418439406, iteration: 386938
loss: 1.1265181303024292,grad_norm: 0.9999993778272798, iteration: 386939
loss: 1.0685409307479858,grad_norm: 0.9999994048639677, iteration: 386940
loss: 0.9951996207237244,grad_norm: 0.660619274066572, iteration: 386941
loss: 0.9603211283683777,grad_norm: 0.7956858305010206, iteration: 386942
loss: 1.060433268547058,grad_norm: 0.9999995455001176, iteration: 386943
loss: 1.0449209213256836,grad_norm: 0.6532261401646684, iteration: 386944
loss: 1.0172061920166016,grad_norm: 0.7206975036248388, iteration: 386945
loss: 0.9883769750595093,grad_norm: 0.8236126386876917, iteration: 386946
loss: 1.0031803846359253,grad_norm: 0.7698838084383309, iteration: 386947
loss: 1.049308180809021,grad_norm: 0.69590435789552, iteration: 386948
loss: 0.9536445736885071,grad_norm: 0.7999113125583415, iteration: 386949
loss: 0.9801203608512878,grad_norm: 0.7310907885881811, iteration: 386950
loss: 1.0000308752059937,grad_norm: 0.8310961150396106, iteration: 386951
loss: 1.0049984455108643,grad_norm: 0.9999993105794196, iteration: 386952
loss: 1.0367612838745117,grad_norm: 0.8119534258087479, iteration: 386953
loss: 0.9927452802658081,grad_norm: 0.9247638409195118, iteration: 386954
loss: 1.0022412538528442,grad_norm: 0.9391187475344177, iteration: 386955
loss: 1.0109806060791016,grad_norm: 0.9999993240837807, iteration: 386956
loss: 1.0449219942092896,grad_norm: 0.9999991089558188, iteration: 386957
loss: 1.0193065404891968,grad_norm: 0.7857327003570181, iteration: 386958
loss: 0.998029351234436,grad_norm: 0.8357910010273283, iteration: 386959
loss: 1.0022817850112915,grad_norm: 0.8705473557780221, iteration: 386960
loss: 1.0793401002883911,grad_norm: 0.9452472428665422, iteration: 386961
loss: 1.0175210237503052,grad_norm: 0.8531557534836163, iteration: 386962
loss: 0.9883301854133606,grad_norm: 0.7460427830806374, iteration: 386963
loss: 0.9616143703460693,grad_norm: 0.7267971321938201, iteration: 386964
loss: 1.0221856832504272,grad_norm: 0.7521223295053959, iteration: 386965
loss: 0.979630172252655,grad_norm: 0.767228546861969, iteration: 386966
loss: 1.0060616731643677,grad_norm: 0.9999998215393959, iteration: 386967
loss: 1.0216377973556519,grad_norm: 0.9999990375095392, iteration: 386968
loss: 1.0491838455200195,grad_norm: 0.7219419186296016, iteration: 386969
loss: 0.959036648273468,grad_norm: 0.6683630773195701, iteration: 386970
loss: 1.0356944799423218,grad_norm: 0.7806663380196881, iteration: 386971
loss: 1.024100422859192,grad_norm: 0.8406477006419579, iteration: 386972
loss: 0.9963548183441162,grad_norm: 0.780595085092641, iteration: 386973
loss: 1.0205150842666626,grad_norm: 0.9825079445131157, iteration: 386974
loss: 0.9991401433944702,grad_norm: 0.9999994454650099, iteration: 386975
loss: 1.0218693017959595,grad_norm: 0.7535392011863065, iteration: 386976
loss: 1.079418659210205,grad_norm: 0.9999993797091292, iteration: 386977
loss: 0.9901430010795593,grad_norm: 0.8409604689091014, iteration: 386978
loss: 1.0446863174438477,grad_norm: 0.8814104957331131, iteration: 386979
loss: 1.082418441772461,grad_norm: 0.9999996318716639, iteration: 386980
loss: 1.000115990638733,grad_norm: 0.6579722352933985, iteration: 386981
loss: 0.9961057305335999,grad_norm: 0.693494949419669, iteration: 386982
loss: 0.9898402690887451,grad_norm: 0.6806054698370593, iteration: 386983
loss: 0.9754029512405396,grad_norm: 0.9324962530107195, iteration: 386984
loss: 0.9913480877876282,grad_norm: 0.7869920981404376, iteration: 386985
loss: 1.0764634609222412,grad_norm: 0.9999993833064221, iteration: 386986
loss: 1.0274193286895752,grad_norm: 0.7468572716990508, iteration: 386987
loss: 0.9728713035583496,grad_norm: 0.6771621679851267, iteration: 386988
loss: 1.024968147277832,grad_norm: 0.9900811250071598, iteration: 386989
loss: 1.0096803903579712,grad_norm: 0.9999990631966166, iteration: 386990
loss: 0.9930170178413391,grad_norm: 0.8868978838744969, iteration: 386991
loss: 1.021422028541565,grad_norm: 0.9999991723213458, iteration: 386992
loss: 0.9860154986381531,grad_norm: 0.8763891381450458, iteration: 386993
loss: 1.0120457410812378,grad_norm: 0.99999951518894, iteration: 386994
loss: 1.005967378616333,grad_norm: 0.7562104830425957, iteration: 386995
loss: 1.0831246376037598,grad_norm: 0.9999995757480934, iteration: 386996
loss: 1.0194449424743652,grad_norm: 0.9999998266396759, iteration: 386997
loss: 0.9803795218467712,grad_norm: 0.9448506958013303, iteration: 386998
loss: 1.0075404644012451,grad_norm: 0.9999990723967939, iteration: 386999
loss: 0.9656503796577454,grad_norm: 0.7067492183796759, iteration: 387000
loss: 1.0427178144454956,grad_norm: 0.8161020886353663, iteration: 387001
loss: 0.9984506964683533,grad_norm: 0.6986420159002874, iteration: 387002
loss: 0.9755547642707825,grad_norm: 0.8098619441796443, iteration: 387003
loss: 1.035183072090149,grad_norm: 0.9655421584600256, iteration: 387004
loss: 1.0084199905395508,grad_norm: 0.8254027971616437, iteration: 387005
loss: 0.9778541326522827,grad_norm: 0.9095427254178922, iteration: 387006
loss: 0.9794978499412537,grad_norm: 0.7406791922493833, iteration: 387007
loss: 1.0052123069763184,grad_norm: 0.6697583186422784, iteration: 387008
loss: 1.0020438432693481,grad_norm: 0.809844604109597, iteration: 387009
loss: 0.9975447654724121,grad_norm: 0.7438494810241507, iteration: 387010
loss: 0.9571505784988403,grad_norm: 0.7112833026840585, iteration: 387011
loss: 0.9912931323051453,grad_norm: 0.765108296358481, iteration: 387012
loss: 1.0149399042129517,grad_norm: 0.6788167938626706, iteration: 387013
loss: 1.0168756246566772,grad_norm: 0.7083724402601667, iteration: 387014
loss: 1.0268927812576294,grad_norm: 0.9884254080855236, iteration: 387015
loss: 1.0138368606567383,grad_norm: 0.7729690284005261, iteration: 387016
loss: 1.0035123825073242,grad_norm: 0.9999997474280009, iteration: 387017
loss: 0.980301558971405,grad_norm: 0.7251427545462397, iteration: 387018
loss: 0.9802921414375305,grad_norm: 0.7609756402912913, iteration: 387019
loss: 0.9729933142662048,grad_norm: 0.8132749490196599, iteration: 387020
loss: 0.9822772741317749,grad_norm: 0.7125725315281293, iteration: 387021
loss: 1.0422903299331665,grad_norm: 0.9999992909538955, iteration: 387022
loss: 1.0082529783248901,grad_norm: 0.9999991978047558, iteration: 387023
loss: 1.0073331594467163,grad_norm: 0.8011353194874391, iteration: 387024
loss: 0.9653943181037903,grad_norm: 0.7833103726982994, iteration: 387025
loss: 1.0177799463272095,grad_norm: 0.757423637177773, iteration: 387026
loss: 0.9776660203933716,grad_norm: 0.8932079602294868, iteration: 387027
loss: 1.00153648853302,grad_norm: 0.931316940086508, iteration: 387028
loss: 1.0218559503555298,grad_norm: 0.7762787070291626, iteration: 387029
loss: 0.9678515195846558,grad_norm: 0.9080956626970542, iteration: 387030
loss: 1.017682671546936,grad_norm: 0.9962022234179678, iteration: 387031
loss: 0.9968365430831909,grad_norm: 0.924582649149683, iteration: 387032
loss: 1.037410020828247,grad_norm: 0.8995716861075539, iteration: 387033
loss: 1.000174641609192,grad_norm: 0.8584046212709984, iteration: 387034
loss: 1.0096840858459473,grad_norm: 0.9999996540084208, iteration: 387035
loss: 1.0042133331298828,grad_norm: 0.9999992028291247, iteration: 387036
loss: 1.0685960054397583,grad_norm: 0.9999996978614049, iteration: 387037
loss: 1.0045427083969116,grad_norm: 0.7306002334490652, iteration: 387038
loss: 0.9976879358291626,grad_norm: 0.6791833365636119, iteration: 387039
loss: 1.0477447509765625,grad_norm: 0.999999190919274, iteration: 387040
loss: 0.9724075198173523,grad_norm: 0.7477590318047782, iteration: 387041
loss: 1.0113563537597656,grad_norm: 0.8684451892494812, iteration: 387042
loss: 1.0528531074523926,grad_norm: 0.779000137424145, iteration: 387043
loss: 0.994331955909729,grad_norm: 0.8383720169537441, iteration: 387044
loss: 1.0676428079605103,grad_norm: 0.8457472819058535, iteration: 387045
loss: 1.0500450134277344,grad_norm: 0.8792275003022813, iteration: 387046
loss: 1.0138145685195923,grad_norm: 0.9999996615888767, iteration: 387047
loss: 1.0190930366516113,grad_norm: 0.8611977951416602, iteration: 387048
loss: 0.9799110293388367,grad_norm: 0.6842592154326373, iteration: 387049
loss: 0.9920568466186523,grad_norm: 0.7846397353023354, iteration: 387050
loss: 0.9986289739608765,grad_norm: 0.9154075630770663, iteration: 387051
loss: 1.0439815521240234,grad_norm: 0.9989569902858609, iteration: 387052
loss: 1.0008965730667114,grad_norm: 0.9999992645898554, iteration: 387053
loss: 0.992482602596283,grad_norm: 0.698818805824453, iteration: 387054
loss: 0.9832730889320374,grad_norm: 0.7477052301866735, iteration: 387055
loss: 1.009305477142334,grad_norm: 0.9999991245997305, iteration: 387056
loss: 0.9984692931175232,grad_norm: 0.9999990778046565, iteration: 387057
loss: 0.9960206747055054,grad_norm: 0.6933074193019692, iteration: 387058
loss: 0.9983124732971191,grad_norm: 0.9004036704597793, iteration: 387059
loss: 0.9967586398124695,grad_norm: 0.7023913211427363, iteration: 387060
loss: 1.0062994956970215,grad_norm: 0.8381336117551402, iteration: 387061
loss: 1.1680388450622559,grad_norm: 0.9999999210698574, iteration: 387062
loss: 0.998840868473053,grad_norm: 0.6709470678772063, iteration: 387063
loss: 1.0564171075820923,grad_norm: 1.0000000147298458, iteration: 387064
loss: 1.0489715337753296,grad_norm: 0.7103620364450126, iteration: 387065
loss: 1.0083186626434326,grad_norm: 0.9474475325658853, iteration: 387066
loss: 0.9845302700996399,grad_norm: 0.9974349755586548, iteration: 387067
loss: 1.0284868478775024,grad_norm: 0.8274245964184488, iteration: 387068
loss: 1.0006537437438965,grad_norm: 0.818299909576867, iteration: 387069
loss: 1.0769479274749756,grad_norm: 0.9999998279717522, iteration: 387070
loss: 0.9762462377548218,grad_norm: 0.7677720284460895, iteration: 387071
loss: 0.9951049089431763,grad_norm: 0.9927688785523616, iteration: 387072
loss: 1.0497411489486694,grad_norm: 0.9999999328117638, iteration: 387073
loss: 1.0201927423477173,grad_norm: 0.627606239224072, iteration: 387074
loss: 0.9851681590080261,grad_norm: 0.6843925485307718, iteration: 387075
loss: 1.0900604724884033,grad_norm: 0.9999997020180801, iteration: 387076
loss: 1.0170376300811768,grad_norm: 0.914835319046217, iteration: 387077
loss: 0.9758562445640564,grad_norm: 0.999999203807937, iteration: 387078
loss: 1.016223669052124,grad_norm: 0.9999991819022447, iteration: 387079
loss: 1.074421763420105,grad_norm: 0.9999995594807862, iteration: 387080
loss: 0.9897103905677795,grad_norm: 0.9204429095273885, iteration: 387081
loss: 1.0212222337722778,grad_norm: 0.9714099455552127, iteration: 387082
loss: 1.0147427320480347,grad_norm: 0.9999991504130255, iteration: 387083
loss: 0.9775553941726685,grad_norm: 0.8076077655843698, iteration: 387084
loss: 1.0280967950820923,grad_norm: 0.7042421758153716, iteration: 387085
loss: 1.029263973236084,grad_norm: 0.9999992363174802, iteration: 387086
loss: 1.021493911743164,grad_norm: 0.9999991414503386, iteration: 387087
loss: 1.036848545074463,grad_norm: 0.9999994409792806, iteration: 387088
loss: 1.015285611152649,grad_norm: 0.8736526317347282, iteration: 387089
loss: 1.01529061794281,grad_norm: 0.999999965458478, iteration: 387090
loss: 1.009639024734497,grad_norm: 0.8317097070198791, iteration: 387091
loss: 1.268898367881775,grad_norm: 0.9999996734540023, iteration: 387092
loss: 1.0111829042434692,grad_norm: 0.7337031540326763, iteration: 387093
loss: 0.9844189286231995,grad_norm: 0.7370728526995357, iteration: 387094
loss: 1.0423797369003296,grad_norm: 0.9118083804387861, iteration: 387095
loss: 0.9849638938903809,grad_norm: 0.7525504527241262, iteration: 387096
loss: 0.9991210103034973,grad_norm: 0.7761010857860475, iteration: 387097
loss: 0.9946744441986084,grad_norm: 0.9999991180462661, iteration: 387098
loss: 0.9912769198417664,grad_norm: 0.7744830603499036, iteration: 387099
loss: 1.0171736478805542,grad_norm: 0.9999991003786344, iteration: 387100
loss: 1.0410242080688477,grad_norm: 0.8233582078349558, iteration: 387101
loss: 1.025183916091919,grad_norm: 0.7836129722629791, iteration: 387102
loss: 0.9605375528335571,grad_norm: 0.999999389785942, iteration: 387103
loss: 1.0128107070922852,grad_norm: 0.7499468855362381, iteration: 387104
loss: 0.9307265281677246,grad_norm: 0.8209855924069673, iteration: 387105
loss: 1.0132523775100708,grad_norm: 0.8348958276915096, iteration: 387106
loss: 0.9978652596473694,grad_norm: 0.7545020391168281, iteration: 387107
loss: 1.0945897102355957,grad_norm: 0.8993945623222988, iteration: 387108
loss: 1.0262880325317383,grad_norm: 0.7971810707815005, iteration: 387109
loss: 0.999862790107727,grad_norm: 0.7692206122663692, iteration: 387110
loss: 0.9873154759407043,grad_norm: 0.9314084203900375, iteration: 387111
loss: 1.021446943283081,grad_norm: 0.7975576586311699, iteration: 387112
loss: 0.9946389198303223,grad_norm: 0.8669019568336778, iteration: 387113
loss: 0.9926990866661072,grad_norm: 0.9999993264684945, iteration: 387114
loss: 1.0055466890335083,grad_norm: 0.7157803434162212, iteration: 387115
loss: 1.0258898735046387,grad_norm: 0.7589818913063244, iteration: 387116
loss: 1.0110605955123901,grad_norm: 0.7877814097307855, iteration: 387117
loss: 0.9719610214233398,grad_norm: 0.8990031254297632, iteration: 387118
loss: 0.9674319624900818,grad_norm: 0.70893383735762, iteration: 387119
loss: 1.0355626344680786,grad_norm: 0.9999992707404534, iteration: 387120
loss: 1.0018912553787231,grad_norm: 0.7695702747246973, iteration: 387121
loss: 1.0564981698989868,grad_norm: 0.9999993232068224, iteration: 387122
loss: 0.9958301186561584,grad_norm: 0.8967207028650094, iteration: 387123
loss: 0.9766979217529297,grad_norm: 0.9383384041672279, iteration: 387124
loss: 1.004721760749817,grad_norm: 0.7386582111349691, iteration: 387125
loss: 0.996820867061615,grad_norm: 0.8278134706414871, iteration: 387126
loss: 1.0125540494918823,grad_norm: 0.7445739934310899, iteration: 387127
loss: 0.9946317672729492,grad_norm: 0.7813014752049166, iteration: 387128
loss: 1.0189578533172607,grad_norm: 0.9875887330419407, iteration: 387129
loss: 1.0283066034317017,grad_norm: 0.9122709261751129, iteration: 387130
loss: 1.0041165351867676,grad_norm: 0.8189458929332436, iteration: 387131
loss: 1.003490924835205,grad_norm: 0.9999997248088658, iteration: 387132
loss: 0.9887874126434326,grad_norm: 0.8042364192231559, iteration: 387133
loss: 1.0166467428207397,grad_norm: 0.9171112854002136, iteration: 387134
loss: 1.0257046222686768,grad_norm: 0.6892225503634496, iteration: 387135
loss: 1.01774263381958,grad_norm: 0.999999514753781, iteration: 387136
loss: 1.1509422063827515,grad_norm: 0.9999996473756584, iteration: 387137
loss: 1.0358211994171143,grad_norm: 0.7865168873221253, iteration: 387138
loss: 1.0089225769042969,grad_norm: 0.8268768917771181, iteration: 387139
loss: 1.0294396877288818,grad_norm: 0.7297833676444492, iteration: 387140
loss: 1.0125072002410889,grad_norm: 0.8404932877940815, iteration: 387141
loss: 0.9629261493682861,grad_norm: 0.7588253362200204, iteration: 387142
loss: 0.9979615807533264,grad_norm: 0.9301123236160815, iteration: 387143
loss: 1.053154706954956,grad_norm: 0.7537751518533186, iteration: 387144
loss: 0.976402759552002,grad_norm: 0.9771256662859068, iteration: 387145
loss: 1.0238680839538574,grad_norm: 0.9148308634373845, iteration: 387146
loss: 0.9897192120552063,grad_norm: 0.7512028880054631, iteration: 387147
loss: 1.1101186275482178,grad_norm: 0.9999996921900117, iteration: 387148
loss: 1.0000156164169312,grad_norm: 0.8154406568024238, iteration: 387149
loss: 1.0316550731658936,grad_norm: 0.9999994432684249, iteration: 387150
loss: 1.0401549339294434,grad_norm: 0.999999574500472, iteration: 387151
loss: 0.9925138354301453,grad_norm: 0.9159368757144871, iteration: 387152
loss: 1.0062955617904663,grad_norm: 0.8435694841881554, iteration: 387153
loss: 0.9995391964912415,grad_norm: 0.7351993810367402, iteration: 387154
loss: 0.9880580902099609,grad_norm: 0.9999992963993648, iteration: 387155
loss: 1.065770149230957,grad_norm: 0.9999992297584087, iteration: 387156
loss: 1.0612119436264038,grad_norm: 0.9999991260221996, iteration: 387157
loss: 1.0093168020248413,grad_norm: 0.9999994949251876, iteration: 387158
loss: 0.9860290288925171,grad_norm: 0.7342436626693535, iteration: 387159
loss: 1.0173957347869873,grad_norm: 0.8261606821527548, iteration: 387160
loss: 1.019284725189209,grad_norm: 0.71357198709285, iteration: 387161
loss: 1.0063031911849976,grad_norm: 0.9320149978188145, iteration: 387162
loss: 0.988722026348114,grad_norm: 0.8421408287261215, iteration: 387163
loss: 1.0278855562210083,grad_norm: 0.9999996142364337, iteration: 387164
loss: 0.9876387715339661,grad_norm: 0.7430692494269263, iteration: 387165
loss: 1.0048643350601196,grad_norm: 0.7711478127164827, iteration: 387166
loss: 1.0023094415664673,grad_norm: 0.8274623163614764, iteration: 387167
loss: 0.9903640151023865,grad_norm: 0.751268715975412, iteration: 387168
loss: 0.974740207195282,grad_norm: 0.7909763107522749, iteration: 387169
loss: 0.9919564723968506,grad_norm: 0.88665309801942, iteration: 387170
loss: 1.028828501701355,grad_norm: 0.7369661680968105, iteration: 387171
loss: 1.0312550067901611,grad_norm: 0.7148985365143795, iteration: 387172
loss: 1.032368540763855,grad_norm: 0.756831673442004, iteration: 387173
loss: 1.032975435256958,grad_norm: 0.7290176802905978, iteration: 387174
loss: 0.9824034571647644,grad_norm: 0.7694339774707392, iteration: 387175
loss: 1.0055040121078491,grad_norm: 0.7612847916903093, iteration: 387176
loss: 1.0553982257843018,grad_norm: 0.880023626147258, iteration: 387177
loss: 0.9779481887817383,grad_norm: 0.7655042226634479, iteration: 387178
loss: 0.9592455625534058,grad_norm: 0.7812028274464573, iteration: 387179
loss: 1.024758219718933,grad_norm: 0.8243600115813094, iteration: 387180
loss: 1.0517698526382446,grad_norm: 0.8675741250454534, iteration: 387181
loss: 1.1170878410339355,grad_norm: 0.9386765434488539, iteration: 387182
loss: 1.0076532363891602,grad_norm: 0.9999994680952864, iteration: 387183
loss: 1.0204945802688599,grad_norm: 0.9999995408040495, iteration: 387184
loss: 1.034287929534912,grad_norm: 0.8456508040762515, iteration: 387185
loss: 1.0228906869888306,grad_norm: 0.7358572183144029, iteration: 387186
loss: 1.10964834690094,grad_norm: 0.9999994577448456, iteration: 387187
loss: 1.0951144695281982,grad_norm: 0.9999993515142691, iteration: 387188
loss: 1.0006358623504639,grad_norm: 0.9999998086630374, iteration: 387189
loss: 0.9703560471534729,grad_norm: 0.7037390309785372, iteration: 387190
loss: 0.9936996698379517,grad_norm: 0.9298866431775171, iteration: 387191
loss: 1.0217968225479126,grad_norm: 0.982545047333895, iteration: 387192
loss: 1.016774296760559,grad_norm: 0.7051677713175402, iteration: 387193
loss: 0.9950622916221619,grad_norm: 0.69424069174555, iteration: 387194
loss: 1.0424224138259888,grad_norm: 0.8847856443195813, iteration: 387195
loss: 1.0383310317993164,grad_norm: 0.9999999308289395, iteration: 387196
loss: 0.9738793969154358,grad_norm: 0.9999993646235088, iteration: 387197
loss: 0.9802698493003845,grad_norm: 0.8352927083015801, iteration: 387198
loss: 1.003071665763855,grad_norm: 0.7236780875386251, iteration: 387199
loss: 0.994549036026001,grad_norm: 0.8236976545089162, iteration: 387200
loss: 1.0209245681762695,grad_norm: 0.820442227120503, iteration: 387201
loss: 0.9979166984558105,grad_norm: 0.99999926651114, iteration: 387202
loss: 1.0979773998260498,grad_norm: 0.999999665702206, iteration: 387203
loss: 0.9945218563079834,grad_norm: 0.999999507110595, iteration: 387204
loss: 0.9776203632354736,grad_norm: 0.8182033051169718, iteration: 387205
loss: 0.9943950176239014,grad_norm: 0.999999120937783, iteration: 387206
loss: 0.9768031239509583,grad_norm: 0.9409862624420106, iteration: 387207
loss: 0.9930779337882996,grad_norm: 0.7968703958421348, iteration: 387208
loss: 1.0257608890533447,grad_norm: 0.9271850301461804, iteration: 387209
loss: 1.0049543380737305,grad_norm: 0.7286754679539561, iteration: 387210
loss: 1.0332765579223633,grad_norm: 0.9999996176384293, iteration: 387211
loss: 1.0086555480957031,grad_norm: 0.7169876159721662, iteration: 387212
loss: 0.9968811869621277,grad_norm: 0.9999991008840033, iteration: 387213
loss: 0.9840112328529358,grad_norm: 0.8847978703687451, iteration: 387214
loss: 1.225203037261963,grad_norm: 0.9999995924436437, iteration: 387215
loss: 1.0252069234848022,grad_norm: 0.7980982833961499, iteration: 387216
loss: 1.0016652345657349,grad_norm: 0.9999993158324375, iteration: 387217
loss: 0.9728214740753174,grad_norm: 0.9999998666346102, iteration: 387218
loss: 0.9912040829658508,grad_norm: 0.9999997853173556, iteration: 387219
loss: 0.9874172806739807,grad_norm: 0.767757090083427, iteration: 387220
loss: 1.0111616849899292,grad_norm: 0.8775598013005647, iteration: 387221
loss: 0.9998877644538879,grad_norm: 0.8554527120330987, iteration: 387222
loss: 0.9929163455963135,grad_norm: 0.665103751802243, iteration: 387223
loss: 0.9976115822792053,grad_norm: 0.8337985667932537, iteration: 387224
loss: 0.9725770354270935,grad_norm: 0.890571178269491, iteration: 387225
loss: 0.977173924446106,grad_norm: 0.7453126827731251, iteration: 387226
loss: 1.1144131422042847,grad_norm: 0.9999996160146811, iteration: 387227
loss: 1.006207823753357,grad_norm: 0.8093971644330386, iteration: 387228
loss: 0.9954986572265625,grad_norm: 0.8222163327211222, iteration: 387229
loss: 1.0086463689804077,grad_norm: 0.6992444486809918, iteration: 387230
loss: 1.0114951133728027,grad_norm: 0.8834262813208021, iteration: 387231
loss: 0.9944351315498352,grad_norm: 0.6149474078032696, iteration: 387232
loss: 1.062325119972229,grad_norm: 0.9623868995320989, iteration: 387233
loss: 0.9984411001205444,grad_norm: 0.9324712221382705, iteration: 387234
loss: 1.0226564407348633,grad_norm: 0.6998765585387626, iteration: 387235
loss: 1.0531885623931885,grad_norm: 0.999999138099776, iteration: 387236
loss: 1.0238959789276123,grad_norm: 0.9999991705545105, iteration: 387237
loss: 0.9754317402839661,grad_norm: 0.7178256670095916, iteration: 387238
loss: 1.0176475048065186,grad_norm: 0.8240809393699152, iteration: 387239
loss: 1.0005841255187988,grad_norm: 0.7331597541276007, iteration: 387240
loss: 0.9814680218696594,grad_norm: 0.8084047019478545, iteration: 387241
loss: 0.943980872631073,grad_norm: 0.999999546000176, iteration: 387242
loss: 0.9834993481636047,grad_norm: 0.8091473836403581, iteration: 387243
loss: 0.976070761680603,grad_norm: 0.9999996639017386, iteration: 387244
loss: 1.1278929710388184,grad_norm: 0.9999995326048501, iteration: 387245
loss: 1.0415046215057373,grad_norm: 0.9999990843694992, iteration: 387246
loss: 1.040786623954773,grad_norm: 0.8907616718098152, iteration: 387247
loss: 0.9469022750854492,grad_norm: 0.807288006674344, iteration: 387248
loss: 0.9792648553848267,grad_norm: 0.7056968869698429, iteration: 387249
loss: 0.996636688709259,grad_norm: 0.8010494349370093, iteration: 387250
loss: 0.9745460748672485,grad_norm: 0.9999997409989311, iteration: 387251
loss: 1.0091922283172607,grad_norm: 0.7653859825210048, iteration: 387252
loss: 0.9554029107093811,grad_norm: 0.8155084654313015, iteration: 387253
loss: 0.9878371953964233,grad_norm: 0.6368595021374422, iteration: 387254
loss: 0.9996744394302368,grad_norm: 0.7798788961069272, iteration: 387255
loss: 1.002168893814087,grad_norm: 0.9172847333248806, iteration: 387256
loss: 1.00136137008667,grad_norm: 0.7212953056926087, iteration: 387257
loss: 0.9841996431350708,grad_norm: 0.8337944979758357, iteration: 387258
loss: 0.9858717322349548,grad_norm: 0.7935085649804986, iteration: 387259
loss: 1.0475091934204102,grad_norm: 0.9999995489588299, iteration: 387260
loss: 1.0327889919281006,grad_norm: 0.9999996337244046, iteration: 387261
loss: 1.0641158819198608,grad_norm: 0.7781618292292857, iteration: 387262
loss: 1.0020604133605957,grad_norm: 0.8200921871882497, iteration: 387263
loss: 0.9818567037582397,grad_norm: 0.8056270418465882, iteration: 387264
loss: 1.0530940294265747,grad_norm: 0.8150170663169778, iteration: 387265
loss: 1.0088945627212524,grad_norm: 0.9270623576830453, iteration: 387266
loss: 0.9736618399620056,grad_norm: 0.7983299695577823, iteration: 387267
loss: 0.9986027479171753,grad_norm: 0.74592702117973, iteration: 387268
loss: 1.0512923002243042,grad_norm: 0.800735062112372, iteration: 387269
loss: 0.9616393446922302,grad_norm: 0.9255750080442134, iteration: 387270
loss: 0.9540293216705322,grad_norm: 0.7135260334381268, iteration: 387271
loss: 0.9684871435165405,grad_norm: 0.9897552920596919, iteration: 387272
loss: 1.0289759635925293,grad_norm: 0.8255718300263617, iteration: 387273
loss: 1.0770313739776611,grad_norm: 0.999999457523123, iteration: 387274
loss: 1.0056415796279907,grad_norm: 0.7120754707584668, iteration: 387275
loss: 1.0030266046524048,grad_norm: 0.9573485413180439, iteration: 387276
loss: 1.0246118307113647,grad_norm: 0.8158115353565506, iteration: 387277
loss: 0.9816949367523193,grad_norm: 0.8812367234759257, iteration: 387278
loss: 1.010095238685608,grad_norm: 0.9999999348067606, iteration: 387279
loss: 1.0270411968231201,grad_norm: 0.8621910746035268, iteration: 387280
loss: 0.998509407043457,grad_norm: 0.7884246868962304, iteration: 387281
loss: 1.0421137809753418,grad_norm: 0.7328309880992588, iteration: 387282
loss: 1.0107650756835938,grad_norm: 0.8750364468244008, iteration: 387283
loss: 1.032161831855774,grad_norm: 0.7355371631750056, iteration: 387284
loss: 1.0406267642974854,grad_norm: 0.999999185242219, iteration: 387285
loss: 0.998514711856842,grad_norm: 0.8851327572102689, iteration: 387286
loss: 0.9889364242553711,grad_norm: 0.8498727583529659, iteration: 387287
loss: 1.0166417360305786,grad_norm: 0.9377501704157118, iteration: 387288
loss: 1.0327543020248413,grad_norm: 0.8084334918702091, iteration: 387289
loss: 1.0292165279388428,grad_norm: 0.7025844662083404, iteration: 387290
loss: 1.0224930047988892,grad_norm: 0.7560013369641164, iteration: 387291
loss: 1.0121715068817139,grad_norm: 0.7959231936905069, iteration: 387292
loss: 0.9289922118186951,grad_norm: 0.8796704999022819, iteration: 387293
loss: 1.0119714736938477,grad_norm: 0.9184104602217534, iteration: 387294
loss: 1.0291086435317993,grad_norm: 0.81223894339022, iteration: 387295
loss: 1.0847671031951904,grad_norm: 0.8326842401456301, iteration: 387296
loss: 1.0228489637374878,grad_norm: 0.9999990574980575, iteration: 387297
loss: 1.0059990882873535,grad_norm: 0.9535896556669917, iteration: 387298
loss: 0.9645206928253174,grad_norm: 0.8518582237108134, iteration: 387299
loss: 1.0646562576293945,grad_norm: 0.999999649783358, iteration: 387300
loss: 1.066938042640686,grad_norm: 0.8294493399315536, iteration: 387301
loss: 0.9856882691383362,grad_norm: 0.6867591553734457, iteration: 387302
loss: 1.0178532600402832,grad_norm: 0.7442631076976732, iteration: 387303
loss: 0.9901014566421509,grad_norm: 0.7310554813690541, iteration: 387304
loss: 1.0038862228393555,grad_norm: 0.783911518329436, iteration: 387305
loss: 0.9833005666732788,grad_norm: 0.9061323256082078, iteration: 387306
loss: 1.0080028772354126,grad_norm: 0.8404236957249366, iteration: 387307
loss: 0.9536042213439941,grad_norm: 0.8144168676650902, iteration: 387308
loss: 0.9848896861076355,grad_norm: 0.7347998557736226, iteration: 387309
loss: 1.000616431236267,grad_norm: 0.7726357988846438, iteration: 387310
loss: 1.0198291540145874,grad_norm: 0.9332860456273592, iteration: 387311
loss: 0.9863452911376953,grad_norm: 0.7549498802454766, iteration: 387312
loss: 1.0383905172348022,grad_norm: 0.8517591951444978, iteration: 387313
loss: 0.9852243661880493,grad_norm: 0.9282202276913306, iteration: 387314
loss: 1.0266690254211426,grad_norm: 0.7837317686270546, iteration: 387315
loss: 1.0232954025268555,grad_norm: 0.9999991478769968, iteration: 387316
loss: 1.0123722553253174,grad_norm: 0.8912012397299128, iteration: 387317
loss: 1.0046167373657227,grad_norm: 0.9999994005692373, iteration: 387318
loss: 1.0967018604278564,grad_norm: 0.8609306959260493, iteration: 387319
loss: 1.0001171827316284,grad_norm: 0.7605294558524196, iteration: 387320
loss: 0.9844822287559509,grad_norm: 0.8634417945137381, iteration: 387321
loss: 1.014716386795044,grad_norm: 0.8253329869661016, iteration: 387322
loss: 0.9874055981636047,grad_norm: 0.8175150417563706, iteration: 387323
loss: 0.9902936220169067,grad_norm: 0.7674109966148795, iteration: 387324
loss: 1.0300756692886353,grad_norm: 0.8832612152799805, iteration: 387325
loss: 1.0308809280395508,grad_norm: 0.8538484391057762, iteration: 387326
loss: 1.0154836177825928,grad_norm: 0.6742453470247417, iteration: 387327
loss: 1.0518661737442017,grad_norm: 0.9999990882339361, iteration: 387328
loss: 1.0386056900024414,grad_norm: 0.7957972964948543, iteration: 387329
loss: 0.9920574426651001,grad_norm: 0.7186791215025121, iteration: 387330
loss: 0.9812301993370056,grad_norm: 0.7797811643979254, iteration: 387331
loss: 0.9731132984161377,grad_norm: 0.7984382682914997, iteration: 387332
loss: 1.0664269924163818,grad_norm: 0.8468260745183651, iteration: 387333
loss: 0.9425159692764282,grad_norm: 0.7495995300849042, iteration: 387334
loss: 1.0391732454299927,grad_norm: 0.999998919717363, iteration: 387335
loss: 1.0160731077194214,grad_norm: 0.7481185589425275, iteration: 387336
loss: 1.002724051475525,grad_norm: 0.9999996186284251, iteration: 387337
loss: 1.0295897722244263,grad_norm: 0.9999997369669407, iteration: 387338
loss: 1.0104421377182007,grad_norm: 0.9648126988548619, iteration: 387339
loss: 1.0311559438705444,grad_norm: 0.891292874312723, iteration: 387340
loss: 1.0068351030349731,grad_norm: 0.7010226950240558, iteration: 387341
loss: 1.1099175214767456,grad_norm: 0.8541861073956729, iteration: 387342
loss: 1.0466843843460083,grad_norm: 0.6334219536665946, iteration: 387343
loss: 1.0050082206726074,grad_norm: 0.76390134536893, iteration: 387344
loss: 0.9724181890487671,grad_norm: 0.6589183122276717, iteration: 387345
loss: 0.9902015328407288,grad_norm: 0.7454211235151597, iteration: 387346
loss: 1.0083889961242676,grad_norm: 0.8633833652369884, iteration: 387347
loss: 1.0226629972457886,grad_norm: 0.7643512922082716, iteration: 387348
loss: 1.0174015760421753,grad_norm: 0.7416631840128832, iteration: 387349
loss: 1.0023735761642456,grad_norm: 0.8892068786992461, iteration: 387350
loss: 0.970918595790863,grad_norm: 0.8320538508846461, iteration: 387351
loss: 1.1517986059188843,grad_norm: 0.9999997998597647, iteration: 387352
loss: 1.0715832710266113,grad_norm: 0.9999993585930174, iteration: 387353
loss: 1.0596562623977661,grad_norm: 0.9999992656306169, iteration: 387354
loss: 1.0134680271148682,grad_norm: 0.7152616536354115, iteration: 387355
loss: 0.9550905823707581,grad_norm: 0.8460626202679065, iteration: 387356
loss: 1.0574970245361328,grad_norm: 0.9999990100931615, iteration: 387357
loss: 0.9944363236427307,grad_norm: 0.6189359531057352, iteration: 387358
loss: 1.1420261859893799,grad_norm: 0.8788774371191582, iteration: 387359
loss: 1.0140022039413452,grad_norm: 0.817309205353411, iteration: 387360
loss: 0.9973690509796143,grad_norm: 0.7736651541429261, iteration: 387361
loss: 1.0471631288528442,grad_norm: 0.9999991616056683, iteration: 387362
loss: 0.9961758852005005,grad_norm: 0.8220119797064624, iteration: 387363
loss: 1.0299896001815796,grad_norm: 0.999999151334333, iteration: 387364
loss: 0.9940800666809082,grad_norm: 0.6731512502731679, iteration: 387365
loss: 1.247281551361084,grad_norm: 0.9999989581963845, iteration: 387366
loss: 1.0189958810806274,grad_norm: 0.8424673623753499, iteration: 387367
loss: 0.9874383807182312,grad_norm: 0.7179107085249439, iteration: 387368
loss: 1.0235066413879395,grad_norm: 0.6704719366009826, iteration: 387369
loss: 0.960699737071991,grad_norm: 0.8326090812464568, iteration: 387370
loss: 1.029910922050476,grad_norm: 0.821309632296694, iteration: 387371
loss: 1.0584872961044312,grad_norm: 0.7759653538351096, iteration: 387372
loss: 1.0013208389282227,grad_norm: 0.9228140796602701, iteration: 387373
loss: 0.9928713440895081,grad_norm: 0.7114119249185953, iteration: 387374
loss: 1.1743522882461548,grad_norm: 0.9999994519423583, iteration: 387375
loss: 1.0664232969284058,grad_norm: 0.9999996598342503, iteration: 387376
loss: 0.9584421515464783,grad_norm: 0.8063959095364868, iteration: 387377
loss: 1.0192557573318481,grad_norm: 0.7942737033598875, iteration: 387378
loss: 0.9866386651992798,grad_norm: 0.9412707673095827, iteration: 387379
loss: 1.0022850036621094,grad_norm: 0.7010047493804351, iteration: 387380
loss: 1.0713821649551392,grad_norm: 0.9999992267554206, iteration: 387381
loss: 1.0399580001831055,grad_norm: 0.9999993126452433, iteration: 387382
loss: 1.0107038021087646,grad_norm: 0.8945013470259162, iteration: 387383
loss: 1.0618313550949097,grad_norm: 0.7889048093032199, iteration: 387384
loss: 0.9987107515335083,grad_norm: 0.9999992182853525, iteration: 387385
loss: 1.0023804903030396,grad_norm: 0.8770308020513149, iteration: 387386
loss: 0.9974896311759949,grad_norm: 0.7851993904694219, iteration: 387387
loss: 0.9899027943611145,grad_norm: 0.8429426663184318, iteration: 387388
loss: 1.0444244146347046,grad_norm: 0.7199723368719533, iteration: 387389
loss: 0.9813920855522156,grad_norm: 0.8255956027447817, iteration: 387390
loss: 0.990459680557251,grad_norm: 0.7187741886695209, iteration: 387391
loss: 0.9568963050842285,grad_norm: 0.8672538539995356, iteration: 387392
loss: 0.9924324750900269,grad_norm: 0.8379635861027189, iteration: 387393
loss: 1.0259382724761963,grad_norm: 0.9999995564257069, iteration: 387394
loss: 0.995808482170105,grad_norm: 0.905095343935257, iteration: 387395
loss: 0.952303946018219,grad_norm: 0.8379715287061429, iteration: 387396
loss: 1.0412061214447021,grad_norm: 0.8833053729130437, iteration: 387397
loss: 0.9865790605545044,grad_norm: 0.7702659837669925, iteration: 387398
loss: 0.9764700531959534,grad_norm: 0.7845996618148477, iteration: 387399
loss: 1.0311810970306396,grad_norm: 0.9105178333562984, iteration: 387400
loss: 1.028242826461792,grad_norm: 0.9999990089578941, iteration: 387401
loss: 1.0537872314453125,grad_norm: 0.9999995232158824, iteration: 387402
loss: 1.225633144378662,grad_norm: 0.9697446830986539, iteration: 387403
loss: 1.0336400270462036,grad_norm: 0.6944434840238395, iteration: 387404
loss: 1.0169676542282104,grad_norm: 0.9999994004401613, iteration: 387405
loss: 1.016758680343628,grad_norm: 0.7802853730155893, iteration: 387406
loss: 1.0751426219940186,grad_norm: 0.999999052481898, iteration: 387407
loss: 1.0105177164077759,grad_norm: 0.8382748810444302, iteration: 387408
loss: 1.000217318534851,grad_norm: 0.8090758770873517, iteration: 387409
loss: 0.9808863997459412,grad_norm: 0.7859522394973092, iteration: 387410
loss: 1.1730828285217285,grad_norm: 0.9999995775174509, iteration: 387411
loss: 1.0144766569137573,grad_norm: 0.6754397521909087, iteration: 387412
loss: 0.988000214099884,grad_norm: 0.8414541112962589, iteration: 387413
loss: 0.9769632816314697,grad_norm: 0.8474640778911788, iteration: 387414
loss: 1.0481626987457275,grad_norm: 0.999999473732963, iteration: 387415
loss: 1.0814059972763062,grad_norm: 0.8734929278129216, iteration: 387416
loss: 1.1042256355285645,grad_norm: 0.9999998500598577, iteration: 387417
loss: 0.9718209505081177,grad_norm: 0.7736895869143562, iteration: 387418
loss: 0.9898582100868225,grad_norm: 0.6695213855245155, iteration: 387419
loss: 0.9939586520195007,grad_norm: 0.7635228322152027, iteration: 387420
loss: 1.0465619564056396,grad_norm: 0.9999990894646792, iteration: 387421
loss: 0.9960567355155945,grad_norm: 0.8966571147283168, iteration: 387422
loss: 1.107932209968567,grad_norm: 0.8339087813150088, iteration: 387423
loss: 0.9830424785614014,grad_norm: 0.9541241217469099, iteration: 387424
loss: 1.0095902681350708,grad_norm: 0.671958706624481, iteration: 387425
loss: 1.0252288579940796,grad_norm: 0.7374419774115907, iteration: 387426
loss: 0.9905957579612732,grad_norm: 0.9999991724691806, iteration: 387427
loss: 0.9628194570541382,grad_norm: 0.7579254591953031, iteration: 387428
loss: 1.1243302822113037,grad_norm: 0.9999999293835362, iteration: 387429
loss: 1.0484505891799927,grad_norm: 0.9999990951330949, iteration: 387430
loss: 1.0728789567947388,grad_norm: 0.9999994411833102, iteration: 387431
loss: 1.0201903581619263,grad_norm: 0.8969947016427976, iteration: 387432
loss: 1.0693304538726807,grad_norm: 0.8510433808206471, iteration: 387433
loss: 1.0233776569366455,grad_norm: 0.9999991073932004, iteration: 387434
loss: 0.9809046387672424,grad_norm: 0.7257577301622316, iteration: 387435
loss: 1.0050570964813232,grad_norm: 0.8202064292899011, iteration: 387436
loss: 0.9647006988525391,grad_norm: 0.7741262388916483, iteration: 387437
loss: 1.038446307182312,grad_norm: 0.922349377695985, iteration: 387438
loss: 1.0094860792160034,grad_norm: 0.7179398062088174, iteration: 387439
loss: 1.017852783203125,grad_norm: 0.8174881708058934, iteration: 387440
loss: 0.9844719767570496,grad_norm: 0.7900283786948039, iteration: 387441
loss: 1.05355966091156,grad_norm: 0.7317192857072216, iteration: 387442
loss: 0.9899733066558838,grad_norm: 0.6590321654560432, iteration: 387443
loss: 0.9978346228599548,grad_norm: 0.7724378600632054, iteration: 387444
loss: 1.0012896060943604,grad_norm: 0.8293829224347576, iteration: 387445
loss: 1.061084270477295,grad_norm: 0.9141563788542876, iteration: 387446
loss: 1.083819031715393,grad_norm: 0.9999996134612545, iteration: 387447
loss: 1.067566156387329,grad_norm: 0.9999990766265612, iteration: 387448
loss: 0.991839587688446,grad_norm: 0.9999998601081379, iteration: 387449
loss: 1.0417208671569824,grad_norm: 0.7559590656909586, iteration: 387450
loss: 0.9764761924743652,grad_norm: 0.9989529383791784, iteration: 387451
loss: 1.023681640625,grad_norm: 0.9777992979784528, iteration: 387452
loss: 1.0244324207305908,grad_norm: 0.8571900560613694, iteration: 387453
loss: 1.024425983428955,grad_norm: 0.8394856366692733, iteration: 387454
loss: 0.9736940860748291,grad_norm: 0.7864337269898344, iteration: 387455
loss: 0.9660463929176331,grad_norm: 0.7843527353148815, iteration: 387456
loss: 1.01166570186615,grad_norm: 0.8505861846912528, iteration: 387457
loss: 0.996621310710907,grad_norm: 0.7747537048170788, iteration: 387458
loss: 1.037219524383545,grad_norm: 0.9274930596578541, iteration: 387459
loss: 1.0903533697128296,grad_norm: 0.9999996212760017, iteration: 387460
loss: 1.00160813331604,grad_norm: 0.8708883010981536, iteration: 387461
loss: 1.0649696588516235,grad_norm: 0.8872960061183396, iteration: 387462
loss: 1.0997672080993652,grad_norm: 0.9999998047526761, iteration: 387463
loss: 0.9958161115646362,grad_norm: 0.8102910807732092, iteration: 387464
loss: 1.0060083866119385,grad_norm: 0.7777533381871182, iteration: 387465
loss: 0.9935469031333923,grad_norm: 0.7400675703914852, iteration: 387466
loss: 0.9612681865692139,grad_norm: 0.8804057534288496, iteration: 387467
loss: 1.0307738780975342,grad_norm: 0.999999926838701, iteration: 387468
loss: 1.012608528137207,grad_norm: 0.9999998510040141, iteration: 387469
loss: 1.0127496719360352,grad_norm: 0.7416780963072781, iteration: 387470
loss: 1.1173707246780396,grad_norm: 0.8361110728066296, iteration: 387471
loss: 0.9777927994728088,grad_norm: 0.6966832028192398, iteration: 387472
loss: 1.036534309387207,grad_norm: 0.6829028751324066, iteration: 387473
loss: 0.9911153316497803,grad_norm: 0.7333766015480221, iteration: 387474
loss: 1.017429232597351,grad_norm: 0.7156201088577603, iteration: 387475
loss: 1.008779525756836,grad_norm: 0.915923104334121, iteration: 387476
loss: 1.0081634521484375,grad_norm: 0.775952965716468, iteration: 387477
loss: 0.9964622855186462,grad_norm: 0.7790780606284308, iteration: 387478
loss: 0.9727990627288818,grad_norm: 0.7793514142806335, iteration: 387479
loss: 0.9794378280639648,grad_norm: 0.8539021309747534, iteration: 387480
loss: 0.9899744987487793,grad_norm: 0.9999991878713592, iteration: 387481
loss: 0.9545745849609375,grad_norm: 0.8698020641513771, iteration: 387482
loss: 0.9803257584571838,grad_norm: 0.7725872328517345, iteration: 387483
loss: 0.9820630550384521,grad_norm: 0.7963003095708221, iteration: 387484
loss: 0.9489527940750122,grad_norm: 0.6512170786463654, iteration: 387485
loss: 1.030058741569519,grad_norm: 0.9999997243428206, iteration: 387486
loss: 1.0305824279785156,grad_norm: 0.7411950414554747, iteration: 387487
loss: 1.0672223567962646,grad_norm: 0.9567441507522979, iteration: 387488
loss: 0.9844462275505066,grad_norm: 0.7549497181079345, iteration: 387489
loss: 1.0150113105773926,grad_norm: 0.8248159409277162, iteration: 387490
loss: 1.0448321104049683,grad_norm: 0.7750134621831851, iteration: 387491
loss: 1.0036956071853638,grad_norm: 0.754888190423935, iteration: 387492
loss: 0.9753136038780212,grad_norm: 0.8940450594359042, iteration: 387493
loss: 1.0149797201156616,grad_norm: 0.8667914733476724, iteration: 387494
loss: 0.9765744209289551,grad_norm: 0.6935297771439465, iteration: 387495
loss: 1.0077543258666992,grad_norm: 0.9004281788666598, iteration: 387496
loss: 1.0056358575820923,grad_norm: 0.8266390130528154, iteration: 387497
loss: 0.9723867774009705,grad_norm: 0.9999990298694776, iteration: 387498
loss: 0.9947199821472168,grad_norm: 0.7176477973701008, iteration: 387499
loss: 1.0792317390441895,grad_norm: 0.7932628443139341, iteration: 387500
loss: 1.006616473197937,grad_norm: 0.8953343359833273, iteration: 387501
loss: 1.0081663131713867,grad_norm: 0.9999990240626482, iteration: 387502
loss: 1.0352126359939575,grad_norm: 0.9999991283959138, iteration: 387503
loss: 1.0339304208755493,grad_norm: 0.8178772833993022, iteration: 387504
loss: 0.9799778461456299,grad_norm: 0.6668115504874884, iteration: 387505
loss: 0.989854097366333,grad_norm: 0.8039930286325216, iteration: 387506
loss: 0.9880794286727905,grad_norm: 0.6905562101760598, iteration: 387507
loss: 1.0380078554153442,grad_norm: 0.8238190118960442, iteration: 387508
loss: 0.996535062789917,grad_norm: 0.7033870426312989, iteration: 387509
loss: 0.9893549680709839,grad_norm: 0.8728464809594604, iteration: 387510
loss: 1.2734729051589966,grad_norm: 0.9999995649766544, iteration: 387511
loss: 1.0829483270645142,grad_norm: 0.9999991370962998, iteration: 387512
loss: 1.0230118036270142,grad_norm: 0.9399370848467284, iteration: 387513
loss: 1.0386079549789429,grad_norm: 0.999999152762483, iteration: 387514
loss: 0.980507493019104,grad_norm: 0.6583499740042799, iteration: 387515
loss: 0.9622604846954346,grad_norm: 0.9999990763434956, iteration: 387516
loss: 1.0301423072814941,grad_norm: 0.907634681292689, iteration: 387517
loss: 1.0095127820968628,grad_norm: 0.999999472215593, iteration: 387518
loss: 0.9737022519111633,grad_norm: 0.724559905408282, iteration: 387519
loss: 0.9655912518501282,grad_norm: 0.9386127830428082, iteration: 387520
loss: 0.9733949899673462,grad_norm: 0.7108909579436374, iteration: 387521
loss: 1.074318528175354,grad_norm: 0.7688667140658871, iteration: 387522
loss: 1.0301510095596313,grad_norm: 0.9091494729912678, iteration: 387523
loss: 0.9882185459136963,grad_norm: 0.9999991059996751, iteration: 387524
loss: 0.9429713487625122,grad_norm: 0.7554344510142248, iteration: 387525
loss: 0.97065669298172,grad_norm: 0.7904072306667886, iteration: 387526
loss: 0.9725738167762756,grad_norm: 0.9737173696090964, iteration: 387527
loss: 1.0142165422439575,grad_norm: 0.9600660898757957, iteration: 387528
loss: 1.0149341821670532,grad_norm: 0.6069047449709843, iteration: 387529
loss: 1.0137183666229248,grad_norm: 0.7600387545763596, iteration: 387530
loss: 0.9573704600334167,grad_norm: 0.8249066715345925, iteration: 387531
loss: 1.0256037712097168,grad_norm: 0.8795044318825073, iteration: 387532
loss: 1.021427869796753,grad_norm: 0.6493804615096574, iteration: 387533
loss: 1.0463825464248657,grad_norm: 0.9999999421954339, iteration: 387534
loss: 0.9773661494255066,grad_norm: 0.8617487280136749, iteration: 387535
loss: 0.9770612716674805,grad_norm: 0.7595391647010385, iteration: 387536
loss: 0.9798189997673035,grad_norm: 0.8503812532041389, iteration: 387537
loss: 0.985516369342804,grad_norm: 0.7204434391735067, iteration: 387538
loss: 1.0187045335769653,grad_norm: 0.7713165627075202, iteration: 387539
loss: 1.130691409111023,grad_norm: 0.9999997423010106, iteration: 387540
loss: 0.9922709465026855,grad_norm: 0.9999991561055012, iteration: 387541
loss: 1.1284574270248413,grad_norm: 0.9999996875975589, iteration: 387542
loss: 1.008560299873352,grad_norm: 0.643341662237521, iteration: 387543
loss: 0.993963897228241,grad_norm: 0.6794732760466681, iteration: 387544
loss: 1.0292736291885376,grad_norm: 0.904139153819084, iteration: 387545
loss: 1.0256913900375366,grad_norm: 0.9210740581224427, iteration: 387546
loss: 0.9871464967727661,grad_norm: 0.999999201085752, iteration: 387547
loss: 1.0035842657089233,grad_norm: 0.7899156918302763, iteration: 387548
loss: 1.054218053817749,grad_norm: 0.8119932587713389, iteration: 387549
loss: 0.9987537264823914,grad_norm: 0.9219727467011618, iteration: 387550
loss: 0.9851387739181519,grad_norm: 0.6417761890304433, iteration: 387551
loss: 0.950350284576416,grad_norm: 0.7656592692332453, iteration: 387552
loss: 0.995032787322998,grad_norm: 0.9476831178949034, iteration: 387553
loss: 0.9578835368156433,grad_norm: 0.9207698243767357, iteration: 387554
loss: 1.0596308708190918,grad_norm: 0.9999995229559535, iteration: 387555
loss: 1.0845328569412231,grad_norm: 0.9999996735315102, iteration: 387556
loss: 1.0755391120910645,grad_norm: 0.7010574303459456, iteration: 387557
loss: 0.9721300005912781,grad_norm: 0.9850504055823894, iteration: 387558
loss: 1.017888069152832,grad_norm: 0.9999995231710906, iteration: 387559
loss: 0.9673880934715271,grad_norm: 0.8215810555509979, iteration: 387560
loss: 1.0062311887741089,grad_norm: 0.9999994764240583, iteration: 387561
loss: 1.0932002067565918,grad_norm: 0.9999996304923351, iteration: 387562
loss: 0.9925791025161743,grad_norm: 0.8995502373688439, iteration: 387563
loss: 0.9898532032966614,grad_norm: 0.8351472471881038, iteration: 387564
loss: 1.0568772554397583,grad_norm: 0.7532389092909502, iteration: 387565
loss: 1.2100173234939575,grad_norm: 0.9999993010750767, iteration: 387566
loss: 0.9930183291435242,grad_norm: 0.9300238895994583, iteration: 387567
loss: 1.0105746984481812,grad_norm: 0.6988630199488585, iteration: 387568
loss: 0.9969344139099121,grad_norm: 0.8712017719242798, iteration: 387569
loss: 1.0330679416656494,grad_norm: 0.7130693187778786, iteration: 387570
loss: 1.0233126878738403,grad_norm: 0.7533499315340861, iteration: 387571
loss: 0.9970445036888123,grad_norm: 0.9999991322146153, iteration: 387572
loss: 1.1541049480438232,grad_norm: 0.9999998843252639, iteration: 387573
loss: 1.0556483268737793,grad_norm: 0.9999994752278037, iteration: 387574
loss: 0.997531533241272,grad_norm: 0.7278474858626192, iteration: 387575
loss: 1.0746468305587769,grad_norm: 0.8459431665254084, iteration: 387576
loss: 1.026260495185852,grad_norm: 0.999999194409227, iteration: 387577
loss: 0.990112841129303,grad_norm: 0.8072036905493373, iteration: 387578
loss: 1.0738637447357178,grad_norm: 0.9999996494240357, iteration: 387579
loss: 1.0233817100524902,grad_norm: 0.9999990002567213, iteration: 387580
loss: 0.9957486391067505,grad_norm: 0.8418057744339709, iteration: 387581
loss: 0.9900684356689453,grad_norm: 0.71881865386968, iteration: 387582
loss: 0.9964778423309326,grad_norm: 0.6398623461365468, iteration: 387583
loss: 1.0060160160064697,grad_norm: 0.9999990783169814, iteration: 387584
loss: 1.0184215307235718,grad_norm: 0.9301554793386635, iteration: 387585
loss: 0.9825916886329651,grad_norm: 0.8741058900507783, iteration: 387586
loss: 1.016052007675171,grad_norm: 0.770302878493433, iteration: 387587
loss: 1.0233395099639893,grad_norm: 0.9550598619588528, iteration: 387588
loss: 1.0440880060195923,grad_norm: 0.9999998523504856, iteration: 387589
loss: 0.973115861415863,grad_norm: 0.7873349932831223, iteration: 387590
loss: 1.0042290687561035,grad_norm: 0.8466674124585135, iteration: 387591
loss: 0.9748585820198059,grad_norm: 0.7522070022446099, iteration: 387592
loss: 1.1527295112609863,grad_norm: 0.9999999031195883, iteration: 387593
loss: 1.0467277765274048,grad_norm: 0.9004561014512854, iteration: 387594
loss: 1.0222798585891724,grad_norm: 0.9146020779593903, iteration: 387595
loss: 1.0261974334716797,grad_norm: 0.938129913750287, iteration: 387596
loss: 0.9817366003990173,grad_norm: 0.8933121954877449, iteration: 387597
loss: 0.9934895634651184,grad_norm: 0.7656119232341617, iteration: 387598
loss: 1.133118748664856,grad_norm: 0.999999019388211, iteration: 387599
loss: 0.9992934465408325,grad_norm: 0.8323848121547663, iteration: 387600
loss: 1.047285556793213,grad_norm: 0.9863706245758148, iteration: 387601
loss: 1.001410961151123,grad_norm: 0.9999992099818471, iteration: 387602
loss: 0.9957610964775085,grad_norm: 0.7580028384515372, iteration: 387603
loss: 0.9765219688415527,grad_norm: 0.7312281811795293, iteration: 387604
loss: 1.0544822216033936,grad_norm: 0.9517532069615781, iteration: 387605
loss: 1.0277647972106934,grad_norm: 0.6420517110374137, iteration: 387606
loss: 1.1024550199508667,grad_norm: 0.9999991952723147, iteration: 387607
loss: 0.9965744614601135,grad_norm: 0.8149844286103359, iteration: 387608
loss: 1.0400962829589844,grad_norm: 0.7110037258721739, iteration: 387609
loss: 0.998840868473053,grad_norm: 0.6359163031862012, iteration: 387610
loss: 1.0312652587890625,grad_norm: 0.9999994567374411, iteration: 387611
loss: 1.1732161045074463,grad_norm: 0.9999992030122299, iteration: 387612
loss: 1.0180537700653076,grad_norm: 0.9999998903689761, iteration: 387613
loss: 1.0195683240890503,grad_norm: 0.8292485874548291, iteration: 387614
loss: 1.063107967376709,grad_norm: 0.863600997090548, iteration: 387615
loss: 1.1310615539550781,grad_norm: 0.999999850894402, iteration: 387616
loss: 0.9686643481254578,grad_norm: 0.8412455181990247, iteration: 387617
loss: 1.0082553625106812,grad_norm: 0.7266816137810526, iteration: 387618
loss: 1.0967206954956055,grad_norm: 0.8526812967445165, iteration: 387619
loss: 0.9547256827354431,grad_norm: 0.7025651948590496, iteration: 387620
loss: 1.0182275772094727,grad_norm: 0.7292719634531392, iteration: 387621
loss: 1.0129003524780273,grad_norm: 0.6767758267444407, iteration: 387622
loss: 1.0198177099227905,grad_norm: 0.8074466769631038, iteration: 387623
loss: 1.08384108543396,grad_norm: 0.9999992151482038, iteration: 387624
loss: 0.9575105905532837,grad_norm: 0.8226174760255967, iteration: 387625
loss: 1.0304102897644043,grad_norm: 0.9999991200365689, iteration: 387626
loss: 1.0012403726577759,grad_norm: 0.7220576901592581, iteration: 387627
loss: 1.003606915473938,grad_norm: 0.9999992257330755, iteration: 387628
loss: 1.026637077331543,grad_norm: 0.9704886292889007, iteration: 387629
loss: 1.0744574069976807,grad_norm: 0.999999226754209, iteration: 387630
loss: 0.9807204008102417,grad_norm: 0.8828739787814637, iteration: 387631
loss: 0.977877140045166,grad_norm: 0.999999603634637, iteration: 387632
loss: 0.9904788136482239,grad_norm: 0.9858250215746952, iteration: 387633
loss: 1.0023785829544067,grad_norm: 0.6285848240910572, iteration: 387634
loss: 1.045185923576355,grad_norm: 0.9134967392716558, iteration: 387635
loss: 0.9961385130882263,grad_norm: 0.999999972162409, iteration: 387636
loss: 1.024629831314087,grad_norm: 0.9999998428513616, iteration: 387637
loss: 1.0640878677368164,grad_norm: 0.7238336047543145, iteration: 387638
loss: 1.0171818733215332,grad_norm: 0.81285827025193, iteration: 387639
loss: 1.018365740776062,grad_norm: 0.7861841148033883, iteration: 387640
loss: 1.0119762420654297,grad_norm: 0.7249874045868883, iteration: 387641
loss: 0.9854063391685486,grad_norm: 0.811018723220121, iteration: 387642
loss: 1.0211833715438843,grad_norm: 0.7126557155416099, iteration: 387643
loss: 0.9975349307060242,grad_norm: 0.7861427314596844, iteration: 387644
loss: 1.0247783660888672,grad_norm: 0.9872596347077014, iteration: 387645
loss: 0.9623176455497742,grad_norm: 0.7232705985961889, iteration: 387646
loss: 1.0337660312652588,grad_norm: 0.9999991208845015, iteration: 387647
loss: 1.0017794370651245,grad_norm: 0.8316979454350519, iteration: 387648
loss: 1.0039646625518799,grad_norm: 0.7522342902253346, iteration: 387649
loss: 0.9645203351974487,grad_norm: 0.8623892401575212, iteration: 387650
loss: 1.0043748617172241,grad_norm: 0.8413700822986915, iteration: 387651
loss: 1.0470545291900635,grad_norm: 0.8526935755748521, iteration: 387652
loss: 1.0086007118225098,grad_norm: 0.6877278489197628, iteration: 387653
loss: 0.9917987585067749,grad_norm: 0.8326259212501089, iteration: 387654
loss: 0.9974671602249146,grad_norm: 0.8465857911600292, iteration: 387655
loss: 1.0186690092086792,grad_norm: 1.0000000409733418, iteration: 387656
loss: 0.9899747967720032,grad_norm: 0.9881813576488211, iteration: 387657
loss: 1.021554708480835,grad_norm: 0.7934773426689692, iteration: 387658
loss: 1.004830002784729,grad_norm: 0.8807873974336831, iteration: 387659
loss: 1.0034009218215942,grad_norm: 0.7512861344372201, iteration: 387660
loss: 1.0772117376327515,grad_norm: 0.9999996607150677, iteration: 387661
loss: 0.9993715882301331,grad_norm: 0.8579252916212737, iteration: 387662
loss: 0.9443727135658264,grad_norm: 0.7546214557876518, iteration: 387663
loss: 1.0025126934051514,grad_norm: 0.9305349151102116, iteration: 387664
loss: 1.1025861501693726,grad_norm: 0.8122448463006353, iteration: 387665
loss: 0.9852139353752136,grad_norm: 0.7526270725226148, iteration: 387666
loss: 0.9783875942230225,grad_norm: 0.9999993644625156, iteration: 387667
loss: 1.0204999446868896,grad_norm: 0.8550710072508613, iteration: 387668
loss: 0.9959129691123962,grad_norm: 0.6885033479133362, iteration: 387669
loss: 1.0049251317977905,grad_norm: 0.74441842353711, iteration: 387670
loss: 1.007927656173706,grad_norm: 0.667674278362182, iteration: 387671
loss: 1.0312645435333252,grad_norm: 0.9609272946169017, iteration: 387672
loss: 0.9587306976318359,grad_norm: 0.841193878091541, iteration: 387673
loss: 0.9786436557769775,grad_norm: 0.734620331626769, iteration: 387674
loss: 0.9925158023834229,grad_norm: 0.8360432412297502, iteration: 387675
loss: 1.0025806427001953,grad_norm: 0.7207979377242762, iteration: 387676
loss: 0.9575626850128174,grad_norm: 0.7065616013974512, iteration: 387677
loss: 1.06407630443573,grad_norm: 0.9999998407003892, iteration: 387678
loss: 1.0116947889328003,grad_norm: 0.798325578665705, iteration: 387679
loss: 1.027122974395752,grad_norm: 0.9999999593026847, iteration: 387680
loss: 0.9550262093544006,grad_norm: 0.6919587041629521, iteration: 387681
loss: 0.9923020601272583,grad_norm: 0.6774905818280731, iteration: 387682
loss: 1.0038121938705444,grad_norm: 0.9893438148304136, iteration: 387683
loss: 0.9883928298950195,grad_norm: 0.7923405325582228, iteration: 387684
loss: 1.0115278959274292,grad_norm: 0.999999838014889, iteration: 387685
loss: 1.0120956897735596,grad_norm: 0.8218099993317933, iteration: 387686
loss: 1.003583550453186,grad_norm: 0.8100648805069357, iteration: 387687
loss: 1.0334060192108154,grad_norm: 0.7643607255289885, iteration: 387688
loss: 0.9969335794448853,grad_norm: 0.842412711557862, iteration: 387689
loss: 1.0883702039718628,grad_norm: 0.9999997346993387, iteration: 387690
loss: 1.0217342376708984,grad_norm: 0.793763177632173, iteration: 387691
loss: 0.9985012412071228,grad_norm: 0.8595914761013207, iteration: 387692
loss: 0.9926044940948486,grad_norm: 0.922021941836088, iteration: 387693
loss: 1.0925064086914062,grad_norm: 0.9999996617263966, iteration: 387694
loss: 0.9732368588447571,grad_norm: 0.8002051391005702, iteration: 387695
loss: 1.0766329765319824,grad_norm: 0.999999541799174, iteration: 387696
loss: 1.0648300647735596,grad_norm: 0.8119907687860349, iteration: 387697
loss: 1.1164761781692505,grad_norm: 0.9999994967404041, iteration: 387698
loss: 1.032894253730774,grad_norm: 0.9999991118837019, iteration: 387699
loss: 1.0807021856307983,grad_norm: 0.8441636109684753, iteration: 387700
loss: 1.0357016324996948,grad_norm: 0.8513958756819637, iteration: 387701
loss: 0.9938446283340454,grad_norm: 0.7768155695480315, iteration: 387702
loss: 0.950711727142334,grad_norm: 0.7202609430057823, iteration: 387703
loss: 1.0054844617843628,grad_norm: 0.646536428433202, iteration: 387704
loss: 1.0313993692398071,grad_norm: 0.9999996429028687, iteration: 387705
loss: 1.0456197261810303,grad_norm: 0.9999993560764021, iteration: 387706
loss: 1.0406821966171265,grad_norm: 0.9999993048122812, iteration: 387707
loss: 1.02603018283844,grad_norm: 0.9999991942443945, iteration: 387708
loss: 1.0919710397720337,grad_norm: 0.8609606599050769, iteration: 387709
loss: 1.006798267364502,grad_norm: 0.7313764945992097, iteration: 387710
loss: 1.0705136060714722,grad_norm: 0.9999992538674082, iteration: 387711
loss: 1.011350154876709,grad_norm: 0.9999991493940819, iteration: 387712
loss: 1.0035192966461182,grad_norm: 0.7486430718379385, iteration: 387713
loss: 0.9946432113647461,grad_norm: 0.8712076981517013, iteration: 387714
loss: 1.0148561000823975,grad_norm: 0.9999995511950182, iteration: 387715
loss: 1.0272663831710815,grad_norm: 0.7995784190815042, iteration: 387716
loss: 1.016196846961975,grad_norm: 0.7543085490697995, iteration: 387717
loss: 1.0409786701202393,grad_norm: 0.9999995325191169, iteration: 387718
loss: 1.0063389539718628,grad_norm: 0.9999991940493642, iteration: 387719
loss: 1.0286438465118408,grad_norm: 0.772782604486622, iteration: 387720
loss: 1.012276530265808,grad_norm: 0.7906937051106503, iteration: 387721
loss: 0.990569531917572,grad_norm: 0.9999991633487565, iteration: 387722
loss: 0.9955282807350159,grad_norm: 0.8940511131357897, iteration: 387723
loss: 1.026291847229004,grad_norm: 0.9999990771355938, iteration: 387724
loss: 1.0302685499191284,grad_norm: 0.7563386066363026, iteration: 387725
loss: 1.0001192092895508,grad_norm: 0.8768556134783115, iteration: 387726
loss: 1.047597885131836,grad_norm: 0.999999071732644, iteration: 387727
loss: 1.0436338186264038,grad_norm: 0.9999995048536603, iteration: 387728
loss: 1.0009456872940063,grad_norm: 0.8099892944109774, iteration: 387729
loss: 1.0553600788116455,grad_norm: 0.8114403814236033, iteration: 387730
loss: 1.0781328678131104,grad_norm: 0.9999995295836147, iteration: 387731
loss: 1.0170845985412598,grad_norm: 0.6926757901459761, iteration: 387732
loss: 1.0050803422927856,grad_norm: 0.903105882884915, iteration: 387733
loss: 1.0579217672348022,grad_norm: 0.8920481610007924, iteration: 387734
loss: 1.0149378776550293,grad_norm: 0.8369962209354864, iteration: 387735
loss: 1.023639440536499,grad_norm: 0.8662732859135165, iteration: 387736
loss: 1.0402835607528687,grad_norm: 0.9999991099596768, iteration: 387737
loss: 0.9658786654472351,grad_norm: 0.7228034763019674, iteration: 387738
loss: 1.093917965888977,grad_norm: 0.9999992166277261, iteration: 387739
loss: 0.9801421761512756,grad_norm: 0.6922553442325993, iteration: 387740
loss: 1.0049560070037842,grad_norm: 0.7657353581605963, iteration: 387741
loss: 0.9867115020751953,grad_norm: 0.9999999479687077, iteration: 387742
loss: 1.0534907579421997,grad_norm: 0.9999997705900905, iteration: 387743
loss: 0.9976168870925903,grad_norm: 0.999999060007256, iteration: 387744
loss: 1.0177839994430542,grad_norm: 0.8857182633272944, iteration: 387745
loss: 0.9844333529472351,grad_norm: 0.8729881736262548, iteration: 387746
loss: 0.9987912178039551,grad_norm: 0.8813279842117843, iteration: 387747
loss: 1.0256696939468384,grad_norm: 0.9999998869451439, iteration: 387748
loss: 1.0206719636917114,grad_norm: 0.9999990774172282, iteration: 387749
loss: 1.0398441553115845,grad_norm: 0.9999994872667567, iteration: 387750
loss: 1.0150017738342285,grad_norm: 0.7901490206628852, iteration: 387751
loss: 1.0275646448135376,grad_norm: 0.8128363026550564, iteration: 387752
loss: 1.0283231735229492,grad_norm: 0.9299856730634122, iteration: 387753
loss: 0.981471836566925,grad_norm: 0.845894847095058, iteration: 387754
loss: 0.9633849263191223,grad_norm: 0.9281504042395606, iteration: 387755
loss: 1.1004148721694946,grad_norm: 0.9560942096698976, iteration: 387756
loss: 0.9835510849952698,grad_norm: 0.7252712284624352, iteration: 387757
loss: 1.0387165546417236,grad_norm: 0.8363848089810043, iteration: 387758
loss: 1.0104374885559082,grad_norm: 0.9999998274985242, iteration: 387759
loss: 0.9960834383964539,grad_norm: 0.795512093845184, iteration: 387760
loss: 1.0692148208618164,grad_norm: 0.9999996818022923, iteration: 387761
loss: 0.9609021544456482,grad_norm: 0.7933017925671048, iteration: 387762
loss: 0.9868693947792053,grad_norm: 0.8375638757838337, iteration: 387763
loss: 1.0009572505950928,grad_norm: 0.8090501344838409, iteration: 387764
loss: 1.0652289390563965,grad_norm: 0.8201371854335844, iteration: 387765
loss: 1.069154977798462,grad_norm: 0.9999999523363524, iteration: 387766
loss: 0.9746764302253723,grad_norm: 0.8928957472710325, iteration: 387767
loss: 1.0556436777114868,grad_norm: 0.9350937948493562, iteration: 387768
loss: 1.0025211572647095,grad_norm: 0.8864620693687756, iteration: 387769
loss: 1.0669739246368408,grad_norm: 0.8677311322883119, iteration: 387770
loss: 1.0188621282577515,grad_norm: 0.8170188288835405, iteration: 387771
loss: 0.9948376417160034,grad_norm: 0.6924075362552929, iteration: 387772
loss: 1.0839054584503174,grad_norm: 0.8257465491620166, iteration: 387773
loss: 1.016663670539856,grad_norm: 0.9999991789012264, iteration: 387774
loss: 0.9748255014419556,grad_norm: 0.8040394753198378, iteration: 387775
loss: 0.9980207681655884,grad_norm: 0.9573834459240397, iteration: 387776
loss: 0.9749547243118286,grad_norm: 0.7049956855025654, iteration: 387777
loss: 1.074598789215088,grad_norm: 0.9999998798275563, iteration: 387778
loss: 0.9714495539665222,grad_norm: 0.8276487472941404, iteration: 387779
loss: 1.0106401443481445,grad_norm: 0.7695917407410837, iteration: 387780
loss: 1.1074591875076294,grad_norm: 0.9999996423898202, iteration: 387781
loss: 0.951079249382019,grad_norm: 0.9586940676060853, iteration: 387782
loss: 1.0311696529388428,grad_norm: 0.8847777531801977, iteration: 387783
loss: 0.993060827255249,grad_norm: 0.807598934152851, iteration: 387784
loss: 1.0118725299835205,grad_norm: 0.9999993538450809, iteration: 387785
loss: 1.0453922748565674,grad_norm: 0.999999315583014, iteration: 387786
loss: 0.9943321347236633,grad_norm: 0.8981043835671035, iteration: 387787
loss: 0.9671728014945984,grad_norm: 0.7105488909325636, iteration: 387788
loss: 1.0329489707946777,grad_norm: 0.9999993731633152, iteration: 387789
loss: 0.9718651175498962,grad_norm: 0.8497094951856136, iteration: 387790
loss: 1.019659161567688,grad_norm: 0.6354823882913266, iteration: 387791
loss: 1.0585730075836182,grad_norm: 0.7669190272605043, iteration: 387792
loss: 1.0263104438781738,grad_norm: 0.7528507586422408, iteration: 387793
loss: 1.0172523260116577,grad_norm: 0.7719362378684717, iteration: 387794
loss: 1.0010197162628174,grad_norm: 0.7260145957896348, iteration: 387795
loss: 1.0028022527694702,grad_norm: 0.7248616750066192, iteration: 387796
loss: 1.029503345489502,grad_norm: 0.9999992660386507, iteration: 387797
loss: 1.08767831325531,grad_norm: 0.8354152955473844, iteration: 387798
loss: 0.969900369644165,grad_norm: 0.7633919503282097, iteration: 387799
loss: 1.0342150926589966,grad_norm: 0.9999990783726235, iteration: 387800
loss: 1.013665795326233,grad_norm: 0.6880778743842896, iteration: 387801
loss: 0.9639678001403809,grad_norm: 0.9168467059065355, iteration: 387802
loss: 1.0102661848068237,grad_norm: 0.9999994178560097, iteration: 387803
loss: 0.9993780851364136,grad_norm: 0.9999996894278144, iteration: 387804
loss: 1.0429177284240723,grad_norm: 0.9999994132452942, iteration: 387805
loss: 1.0184311866760254,grad_norm: 0.6936777612115925, iteration: 387806
loss: 1.1331133842468262,grad_norm: 0.9999996580152252, iteration: 387807
loss: 1.0806286334991455,grad_norm: 0.9999993988929416, iteration: 387808
loss: 0.9966984987258911,grad_norm: 0.7454222480937526, iteration: 387809
loss: 1.0364232063293457,grad_norm: 0.9018439812111928, iteration: 387810
loss: 0.9878573417663574,grad_norm: 0.7717064439719133, iteration: 387811
loss: 0.9989902973175049,grad_norm: 0.9999990368161578, iteration: 387812
loss: 1.0079492330551147,grad_norm: 0.9999998038124277, iteration: 387813
loss: 1.037709355354309,grad_norm: 0.9999990722184767, iteration: 387814
loss: 0.9943310022354126,grad_norm: 0.826999391021234, iteration: 387815
loss: 1.0500901937484741,grad_norm: 0.9999991783213106, iteration: 387816
loss: 0.9692944288253784,grad_norm: 0.8897612490891192, iteration: 387817
loss: 1.03468918800354,grad_norm: 0.9948123362136486, iteration: 387818
loss: 0.9774944186210632,grad_norm: 0.9999997756928042, iteration: 387819
loss: 1.0301945209503174,grad_norm: 0.7657725841729428, iteration: 387820
loss: 1.0470246076583862,grad_norm: 0.9999998057285631, iteration: 387821
loss: 1.0002467632293701,grad_norm: 0.999999422679291, iteration: 387822
loss: 1.021833062171936,grad_norm: 0.8544566741914451, iteration: 387823
loss: 1.0568554401397705,grad_norm: 0.9999997651512509, iteration: 387824
loss: 0.9766365885734558,grad_norm: 0.9999995037846563, iteration: 387825
loss: 0.9888328313827515,grad_norm: 0.6891921107737745, iteration: 387826
loss: 0.9901593327522278,grad_norm: 0.8251505827885699, iteration: 387827
loss: 1.0028877258300781,grad_norm: 0.8172871907938357, iteration: 387828
loss: 1.0152500867843628,grad_norm: 0.8536911611925577, iteration: 387829
loss: 1.0155138969421387,grad_norm: 0.7171180106672501, iteration: 387830
loss: 1.0070900917053223,grad_norm: 0.7905101022631768, iteration: 387831
loss: 0.9888809323310852,grad_norm: 0.7274643318937621, iteration: 387832
loss: 0.9970031380653381,grad_norm: 0.6617486675744946, iteration: 387833
loss: 1.0662429332733154,grad_norm: 0.9999997550385278, iteration: 387834
loss: 1.0868827104568481,grad_norm: 0.841924315766266, iteration: 387835
loss: 1.0062710046768188,grad_norm: 0.9946988373164896, iteration: 387836
loss: 0.9815685153007507,grad_norm: 0.837550339893604, iteration: 387837
loss: 0.9790684580802917,grad_norm: 0.7397973630194657, iteration: 387838
loss: 1.0246241092681885,grad_norm: 0.9999993428572009, iteration: 387839
loss: 0.9594286680221558,grad_norm: 0.783727431597645, iteration: 387840
loss: 0.9954079985618591,grad_norm: 0.8881062466951043, iteration: 387841
loss: 1.0142139196395874,grad_norm: 0.9999992409303058, iteration: 387842
loss: 0.9871616959571838,grad_norm: 0.80550284668225, iteration: 387843
loss: 0.9713141322135925,grad_norm: 0.8240673021649206, iteration: 387844
loss: 1.0030860900878906,grad_norm: 0.999999768670726, iteration: 387845
loss: 0.9424338340759277,grad_norm: 0.7195329786447194, iteration: 387846
loss: 0.9746891856193542,grad_norm: 0.902970582161011, iteration: 387847
loss: 1.01865816116333,grad_norm: 0.7777260889423442, iteration: 387848
loss: 1.0002919435501099,grad_norm: 0.7171801855994544, iteration: 387849
loss: 0.9979584217071533,grad_norm: 0.7557720149032299, iteration: 387850
loss: 0.9892198443412781,grad_norm: 0.7582830272998982, iteration: 387851
loss: 0.9988672733306885,grad_norm: 0.8228678954443678, iteration: 387852
loss: 0.9925810694694519,grad_norm: 0.6009255668772825, iteration: 387853
loss: 0.9734369516372681,grad_norm: 0.9999998211973263, iteration: 387854
loss: 1.0822508335113525,grad_norm: 0.9999990288445763, iteration: 387855
loss: 1.1046245098114014,grad_norm: 0.9999992227831711, iteration: 387856
loss: 1.0599955320358276,grad_norm: 0.9999998171361227, iteration: 387857
loss: 0.9835785627365112,grad_norm: 0.6562173855222805, iteration: 387858
loss: 1.061661958694458,grad_norm: 0.7678267502435936, iteration: 387859
loss: 1.0313174724578857,grad_norm: 0.9999997996511902, iteration: 387860
loss: 1.055187702178955,grad_norm: 0.8584671335295596, iteration: 387861
loss: 1.0371407270431519,grad_norm: 0.999999729934856, iteration: 387862
loss: 1.0006264448165894,grad_norm: 0.7413293173407693, iteration: 387863
loss: 1.0956566333770752,grad_norm: 0.9999999821480159, iteration: 387864
loss: 0.9662011861801147,grad_norm: 0.7908322808091356, iteration: 387865
loss: 1.0059382915496826,grad_norm: 0.8319206978918017, iteration: 387866
loss: 1.034206509590149,grad_norm: 0.7542057817322609, iteration: 387867
loss: 1.0030126571655273,grad_norm: 0.9999997247140128, iteration: 387868
loss: 1.183546781539917,grad_norm: 0.9999992863809269, iteration: 387869
loss: 1.0193078517913818,grad_norm: 0.9999992601829458, iteration: 387870
loss: 1.0889112949371338,grad_norm: 0.919664924052444, iteration: 387871
loss: 1.0360015630722046,grad_norm: 0.907399240555907, iteration: 387872
loss: 1.0674896240234375,grad_norm: 0.7795427348157443, iteration: 387873
loss: 1.0276018381118774,grad_norm: 0.6799254668742585, iteration: 387874
loss: 1.010757565498352,grad_norm: 0.7761467439423612, iteration: 387875
loss: 0.9946271181106567,grad_norm: 0.7298183081357791, iteration: 387876
loss: 0.9930684566497803,grad_norm: 0.7656538575120392, iteration: 387877
loss: 0.9850453734397888,grad_norm: 0.9999993261372698, iteration: 387878
loss: 1.0234594345092773,grad_norm: 0.8156869504020705, iteration: 387879
loss: 0.9580457210540771,grad_norm: 0.8338504471536515, iteration: 387880
loss: 1.0622018575668335,grad_norm: 0.8692513061130059, iteration: 387881
loss: 1.0217854976654053,grad_norm: 0.8037852344770655, iteration: 387882
loss: 1.0586661100387573,grad_norm: 0.9999992276148824, iteration: 387883
loss: 1.0084455013275146,grad_norm: 0.8726555352925583, iteration: 387884
loss: 0.9499728679656982,grad_norm: 0.7748841950356973, iteration: 387885
loss: 1.0536396503448486,grad_norm: 0.90708641903211, iteration: 387886
loss: 0.9713016152381897,grad_norm: 0.6175791484479731, iteration: 387887
loss: 0.9562755823135376,grad_norm: 0.8146874974603522, iteration: 387888
loss: 1.0303243398666382,grad_norm: 0.9999995011353688, iteration: 387889
loss: 1.0071964263916016,grad_norm: 0.6912540514729926, iteration: 387890
loss: 1.00065279006958,grad_norm: 0.671713605330906, iteration: 387891
loss: 0.9872963428497314,grad_norm: 0.7697920141592255, iteration: 387892
loss: 1.0036532878875732,grad_norm: 0.7782767021486162, iteration: 387893
loss: 1.0041817426681519,grad_norm: 0.9999990236562905, iteration: 387894
loss: 1.0129523277282715,grad_norm: 0.9097886652915513, iteration: 387895
loss: 1.0641201734542847,grad_norm: 0.9999992685173977, iteration: 387896
loss: 1.0057029724121094,grad_norm: 0.8994097078473321, iteration: 387897
loss: 1.020883321762085,grad_norm: 0.9999994008275201, iteration: 387898
loss: 1.0916765928268433,grad_norm: 0.8190795451763254, iteration: 387899
loss: 0.9808341860771179,grad_norm: 0.7694305547434048, iteration: 387900
loss: 1.0290048122406006,grad_norm: 0.759554271893775, iteration: 387901
loss: 1.0701333284378052,grad_norm: 0.9999996288539791, iteration: 387902
loss: 1.0280544757843018,grad_norm: 0.7534070097790257, iteration: 387903
loss: 1.010341763496399,grad_norm: 0.8014837736482073, iteration: 387904
loss: 0.9956488609313965,grad_norm: 0.9125772474207544, iteration: 387905
loss: 0.9938562512397766,grad_norm: 0.7204836070106858, iteration: 387906
loss: 1.1550792455673218,grad_norm: 0.9999993676099542, iteration: 387907
loss: 0.9934024810791016,grad_norm: 0.9214566343108209, iteration: 387908
loss: 1.0603387355804443,grad_norm: 0.9999993027142942, iteration: 387909
loss: 1.010109305381775,grad_norm: 0.7203613671709413, iteration: 387910
loss: 1.076723337173462,grad_norm: 0.9999997995298815, iteration: 387911
loss: 1.0034008026123047,grad_norm: 0.7730354818207165, iteration: 387912
loss: 1.0332715511322021,grad_norm: 0.9244066328306595, iteration: 387913
loss: 0.98814857006073,grad_norm: 0.839330405305292, iteration: 387914
loss: 1.0036629438400269,grad_norm: 0.744099321109031, iteration: 387915
loss: 0.9794057607650757,grad_norm: 0.8544544580415999, iteration: 387916
loss: 0.9642859697341919,grad_norm: 0.7019239009672202, iteration: 387917
loss: 1.0025705099105835,grad_norm: 0.9999998447374949, iteration: 387918
loss: 1.0100653171539307,grad_norm: 0.8301415066722901, iteration: 387919
loss: 0.9473474621772766,grad_norm: 0.7710088405869138, iteration: 387920
loss: 1.0345009565353394,grad_norm: 0.8766974615936517, iteration: 387921
loss: 1.0018835067749023,grad_norm: 0.7440202907007009, iteration: 387922
loss: 1.0050148963928223,grad_norm: 0.7338833597519323, iteration: 387923
loss: 0.9845362901687622,grad_norm: 0.8321097667761009, iteration: 387924
loss: 1.0086702108383179,grad_norm: 0.8860658414389503, iteration: 387925
loss: 1.046681523323059,grad_norm: 0.9999992624441041, iteration: 387926
loss: 0.9671018719673157,grad_norm: 0.8928173496102079, iteration: 387927
loss: 1.017051100730896,grad_norm: 0.7579678609079951, iteration: 387928
loss: 1.0497647523880005,grad_norm: 0.6485928473848898, iteration: 387929
loss: 1.0274254083633423,grad_norm: 0.8052427941358744, iteration: 387930
loss: 1.0755635499954224,grad_norm: 0.9999997669266579, iteration: 387931
loss: 0.9562991857528687,grad_norm: 0.6647173731512537, iteration: 387932
loss: 1.0476279258728027,grad_norm: 0.8384135787643332, iteration: 387933
loss: 1.0267361402511597,grad_norm: 0.7127138697212491, iteration: 387934
loss: 0.9589220881462097,grad_norm: 0.680485057384074, iteration: 387935
loss: 1.023244023323059,grad_norm: 0.892696999875102, iteration: 387936
loss: 0.9918463826179504,grad_norm: 0.8000157203809427, iteration: 387937
loss: 0.9968643188476562,grad_norm: 0.8332601931402513, iteration: 387938
loss: 1.007508635520935,grad_norm: 0.7456305857131179, iteration: 387939
loss: 0.9564472436904907,grad_norm: 0.837802547239701, iteration: 387940
loss: 1.0099726915359497,grad_norm: 0.7738475341822734, iteration: 387941
loss: 0.9844212532043457,grad_norm: 0.8135182709786137, iteration: 387942
loss: 1.0402657985687256,grad_norm: 0.9450679496475427, iteration: 387943
loss: 1.0992563962936401,grad_norm: 0.9999998018317048, iteration: 387944
loss: 1.0825515985488892,grad_norm: 0.8020090491286807, iteration: 387945
loss: 1.0314891338348389,grad_norm: 0.9033074477156062, iteration: 387946
loss: 1.025124192237854,grad_norm: 0.9999997158971251, iteration: 387947
loss: 1.0046470165252686,grad_norm: 0.8582676925189943, iteration: 387948
loss: 1.0170772075653076,grad_norm: 0.7605389844064582, iteration: 387949
loss: 1.0114589929580688,grad_norm: 0.8460484210379494, iteration: 387950
loss: 1.0023967027664185,grad_norm: 0.7609417997644493, iteration: 387951
loss: 1.00956130027771,grad_norm: 0.9552873994055974, iteration: 387952
loss: 1.0047950744628906,grad_norm: 0.868066440768582, iteration: 387953
loss: 1.0332204103469849,grad_norm: 0.9999992813178976, iteration: 387954
loss: 1.0409698486328125,grad_norm: 0.9999994041059651, iteration: 387955
loss: 1.012062430381775,grad_norm: 0.704144908246441, iteration: 387956
loss: 0.9897508025169373,grad_norm: 0.9348735496730834, iteration: 387957
loss: 0.9914141297340393,grad_norm: 0.8050352655380212, iteration: 387958
loss: 1.1483062505722046,grad_norm: 0.9999999370456598, iteration: 387959
loss: 1.0360068082809448,grad_norm: 0.999999319432788, iteration: 387960
loss: 0.9930909276008606,grad_norm: 0.9999995108643411, iteration: 387961
loss: 1.000307559967041,grad_norm: 0.999890522887157, iteration: 387962
loss: 0.9279373288154602,grad_norm: 0.9616106307647259, iteration: 387963
loss: 0.986792802810669,grad_norm: 0.6935973008224686, iteration: 387964
loss: 1.1625899076461792,grad_norm: 0.898983693698611, iteration: 387965
loss: 1.0746121406555176,grad_norm: 0.9999992454787996, iteration: 387966
loss: 1.0365104675292969,grad_norm: 0.999999585678687, iteration: 387967
loss: 0.9718607068061829,grad_norm: 0.7983503902135335, iteration: 387968
loss: 0.9858814477920532,grad_norm: 0.7764357169425515, iteration: 387969
loss: 0.9871050715446472,grad_norm: 0.7004553247487766, iteration: 387970
loss: 0.9849482774734497,grad_norm: 0.9999999130804118, iteration: 387971
loss: 1.0652824640274048,grad_norm: 0.7205335496842904, iteration: 387972
loss: 1.0272397994995117,grad_norm: 0.8592094205916151, iteration: 387973
loss: 1.0560016632080078,grad_norm: 0.9286612180219497, iteration: 387974
loss: 0.9563460350036621,grad_norm: 0.9999993898336245, iteration: 387975
loss: 1.0062751770019531,grad_norm: 0.9999994106772095, iteration: 387976
loss: 0.9677723050117493,grad_norm: 0.8466216127358251, iteration: 387977
loss: 0.9956179857254028,grad_norm: 0.9999989353779718, iteration: 387978
loss: 1.0140490531921387,grad_norm: 0.9999994786629681, iteration: 387979
loss: 0.9887505173683167,grad_norm: 0.7955296647155357, iteration: 387980
loss: 0.9975166320800781,grad_norm: 0.7959445553080606, iteration: 387981
loss: 1.0213725566864014,grad_norm: 0.6481299471992688, iteration: 387982
loss: 1.0096898078918457,grad_norm: 0.9333097793088114, iteration: 387983
loss: 0.9691605567932129,grad_norm: 0.834758034844633, iteration: 387984
loss: 0.9835237264633179,grad_norm: 0.9190083245260534, iteration: 387985
loss: 1.0092412233352661,grad_norm: 0.8940140992600467, iteration: 387986
loss: 1.1005306243896484,grad_norm: 0.9999992833096807, iteration: 387987
loss: 1.0085219144821167,grad_norm: 0.7195870984655699, iteration: 387988
loss: 1.0461766719818115,grad_norm: 0.7310688455202211, iteration: 387989
loss: 1.0140618085861206,grad_norm: 0.9978879961126036, iteration: 387990
loss: 1.0004440546035767,grad_norm: 0.8936195333449978, iteration: 387991
loss: 0.9588044881820679,grad_norm: 0.7987370843771158, iteration: 387992
loss: 0.9941675066947937,grad_norm: 0.7947930214637914, iteration: 387993
loss: 0.9865473508834839,grad_norm: 0.8650361736428497, iteration: 387994
loss: 0.9914975762367249,grad_norm: 0.8515418958544259, iteration: 387995
loss: 1.0393171310424805,grad_norm: 0.9999993003121888, iteration: 387996
loss: 1.0064879655838013,grad_norm: 0.8745165602672942, iteration: 387997
loss: 1.0394606590270996,grad_norm: 0.9999990374287371, iteration: 387998
loss: 1.0329786539077759,grad_norm: 0.999999273289225, iteration: 387999
loss: 1.0560158491134644,grad_norm: 0.999999842227061, iteration: 388000
loss: 0.9825603365898132,grad_norm: 0.7351267915048131, iteration: 388001
loss: 1.0018926858901978,grad_norm: 0.999999030255113, iteration: 388002
loss: 0.9997047185897827,grad_norm: 0.6755373780098766, iteration: 388003
loss: 1.0154885053634644,grad_norm: 0.9999992132917034, iteration: 388004
loss: 0.9789101481437683,grad_norm: 0.7614445564364325, iteration: 388005
loss: 1.0009578466415405,grad_norm: 0.7773289215908061, iteration: 388006
loss: 0.9876284003257751,grad_norm: 0.850384859335924, iteration: 388007
loss: 0.9873209595680237,grad_norm: 0.9999994054860755, iteration: 388008
loss: 1.1329810619354248,grad_norm: 0.8099064066069136, iteration: 388009
loss: 0.9786877036094666,grad_norm: 0.7257419740581906, iteration: 388010
loss: 1.0234547853469849,grad_norm: 0.7933205467016916, iteration: 388011
loss: 0.9832214117050171,grad_norm: 0.8539510185016688, iteration: 388012
loss: 0.9906731843948364,grad_norm: 0.9319134449353377, iteration: 388013
loss: 1.0036622285842896,grad_norm: 0.8517826416249632, iteration: 388014
loss: 0.9984679222106934,grad_norm: 0.7737578239057832, iteration: 388015
loss: 1.0564881563186646,grad_norm: 0.9999992376764008, iteration: 388016
loss: 1.1426594257354736,grad_norm: 0.9999992136284208, iteration: 388017
loss: 1.0069001913070679,grad_norm: 0.9999991805080143, iteration: 388018
loss: 0.9751055836677551,grad_norm: 0.8582026260579952, iteration: 388019
loss: 1.035383939743042,grad_norm: 0.9999997453818132, iteration: 388020
loss: 1.0095014572143555,grad_norm: 0.9999992598426176, iteration: 388021
loss: 0.9882510304450989,grad_norm: 0.7809420446169092, iteration: 388022
loss: 0.9982849955558777,grad_norm: 0.9999994122891146, iteration: 388023
loss: 1.0191062688827515,grad_norm: 0.9999993299330414, iteration: 388024
loss: 0.9600892066955566,grad_norm: 0.7587436326906218, iteration: 388025
loss: 1.002015233039856,grad_norm: 0.7324281836257652, iteration: 388026
loss: 1.0267409086227417,grad_norm: 0.776330661851012, iteration: 388027
loss: 1.1192986965179443,grad_norm: 0.9451988834073777, iteration: 388028
loss: 1.049835205078125,grad_norm: 0.9999997215246941, iteration: 388029
loss: 1.0586010217666626,grad_norm: 0.9999992359858687, iteration: 388030
loss: 1.075920820236206,grad_norm: 0.9999992844499381, iteration: 388031
loss: 0.9729518294334412,grad_norm: 0.999999648626203, iteration: 388032
loss: 1.019243836402893,grad_norm: 0.832623999055054, iteration: 388033
loss: 1.0142931938171387,grad_norm: 0.9999992087868238, iteration: 388034
loss: 1.0072938203811646,grad_norm: 0.7277900839172178, iteration: 388035
loss: 1.0051591396331787,grad_norm: 0.959357951127101, iteration: 388036
loss: 1.0559967756271362,grad_norm: 0.9760963894033235, iteration: 388037
loss: 1.0279110670089722,grad_norm: 0.7845395426680628, iteration: 388038
loss: 1.0340723991394043,grad_norm: 0.7209507303084183, iteration: 388039
loss: 1.0270217657089233,grad_norm: 0.929426801295637, iteration: 388040
loss: 1.054341197013855,grad_norm: 0.9999999017916008, iteration: 388041
loss: 1.0175786018371582,grad_norm: 0.9999998179914215, iteration: 388042
loss: 1.0730749368667603,grad_norm: 0.9999998395692544, iteration: 388043
loss: 1.0832985639572144,grad_norm: 0.9999995875302685, iteration: 388044
loss: 1.0195051431655884,grad_norm: 0.9999997657872264, iteration: 388045
loss: 0.9901896119117737,grad_norm: 0.7716232580466139, iteration: 388046
loss: 0.9810367226600647,grad_norm: 0.8010099490757115, iteration: 388047
loss: 1.0216752290725708,grad_norm: 0.7851420718726518, iteration: 388048
loss: 0.982848048210144,grad_norm: 0.8248227211525346, iteration: 388049
loss: 1.0124703645706177,grad_norm: 0.9530967155630362, iteration: 388050
loss: 0.9843486547470093,grad_norm: 0.9999990623216302, iteration: 388051
loss: 0.9720321297645569,grad_norm: 0.7197968799697593, iteration: 388052
loss: 1.0700206756591797,grad_norm: 0.9999996452466254, iteration: 388053
loss: 0.9844052195549011,grad_norm: 0.8071068694916654, iteration: 388054
loss: 1.069153904914856,grad_norm: 0.9999997992173979, iteration: 388055
loss: 0.9936378002166748,grad_norm: 0.8421889934333515, iteration: 388056
loss: 1.0283445119857788,grad_norm: 0.8857319448827891, iteration: 388057
loss: 1.0365948677062988,grad_norm: 0.807774629344795, iteration: 388058
loss: 1.016318678855896,grad_norm: 0.8298147135131985, iteration: 388059
loss: 1.0247681140899658,grad_norm: 0.6913912584208337, iteration: 388060
loss: 1.0114710330963135,grad_norm: 0.9999994095377228, iteration: 388061
loss: 1.004854440689087,grad_norm: 0.727030410765371, iteration: 388062
loss: 1.0443671941757202,grad_norm: 0.9173033762157662, iteration: 388063
loss: 1.0089160203933716,grad_norm: 0.8738298005767932, iteration: 388064
loss: 0.9960995316505432,grad_norm: 0.9396936084509906, iteration: 388065
loss: 0.9707817435264587,grad_norm: 0.8133267332808192, iteration: 388066
loss: 0.9977458715438843,grad_norm: 0.7738393526233553, iteration: 388067
loss: 1.024937391281128,grad_norm: 0.8660786598608364, iteration: 388068
loss: 0.9499290585517883,grad_norm: 0.6302824994742997, iteration: 388069
loss: 0.9811673164367676,grad_norm: 0.7499644304473457, iteration: 388070
loss: 1.0026936531066895,grad_norm: 0.7327999915543822, iteration: 388071
loss: 0.9976773858070374,grad_norm: 0.9999992273408017, iteration: 388072
loss: 1.003517746925354,grad_norm: 0.8485033377614247, iteration: 388073
loss: 1.0211392641067505,grad_norm: 0.7549144199378638, iteration: 388074
loss: 1.0098797082901,grad_norm: 0.8030969925926508, iteration: 388075
loss: 0.967871904373169,grad_norm: 0.8017890604353187, iteration: 388076
loss: 1.008061170578003,grad_norm: 0.8859730189091624, iteration: 388077
loss: 0.9942132234573364,grad_norm: 0.9999996539580145, iteration: 388078
loss: 1.0574544668197632,grad_norm: 0.9999998598014307, iteration: 388079
loss: 0.9926562905311584,grad_norm: 0.9999993230914831, iteration: 388080
loss: 1.0384920835494995,grad_norm: 0.8821262929369375, iteration: 388081
loss: 1.0366911888122559,grad_norm: 0.8193446052188459, iteration: 388082
loss: 1.0160452127456665,grad_norm: 0.7982633234344415, iteration: 388083
loss: 0.985293447971344,grad_norm: 0.688807351294831, iteration: 388084
loss: 1.0654629468917847,grad_norm: 0.9001898738323608, iteration: 388085
loss: 1.0049597024917603,grad_norm: 0.9599066207821626, iteration: 388086
loss: 0.9877629280090332,grad_norm: 0.7947063690160509, iteration: 388087
loss: 1.0470669269561768,grad_norm: 0.8599431412600316, iteration: 388088
loss: 0.9847972393035889,grad_norm: 0.8108038035083801, iteration: 388089
loss: 1.0226540565490723,grad_norm: 0.9999998785729077, iteration: 388090
loss: 1.020545244216919,grad_norm: 0.9999992510076328, iteration: 388091
loss: 1.0328490734100342,grad_norm: 0.8448507927909168, iteration: 388092
loss: 1.0064555406570435,grad_norm: 0.9999998228897323, iteration: 388093
loss: 0.9867282509803772,grad_norm: 0.7299926344843396, iteration: 388094
loss: 0.9845417737960815,grad_norm: 0.9999999028340143, iteration: 388095
loss: 1.0862371921539307,grad_norm: 0.8346817789496832, iteration: 388096
loss: 1.0130292177200317,grad_norm: 0.723972865221359, iteration: 388097
loss: 1.0071964263916016,grad_norm: 0.7739514264492321, iteration: 388098
loss: 0.9818586707115173,grad_norm: 0.7756564785828108, iteration: 388099
loss: 1.1390331983566284,grad_norm: 0.9999992931226508, iteration: 388100
loss: 0.9890697598457336,grad_norm: 0.7689167128811748, iteration: 388101
loss: 1.0089064836502075,grad_norm: 0.8084377904024942, iteration: 388102
loss: 1.1088941097259521,grad_norm: 0.8039521890435266, iteration: 388103
loss: 1.0625786781311035,grad_norm: 0.8859674026308985, iteration: 388104
loss: 0.9857223033905029,grad_norm: 0.8282005601797144, iteration: 388105
loss: 1.0448685884475708,grad_norm: 0.7877810406552013, iteration: 388106
loss: 1.0825273990631104,grad_norm: 0.9999998671992797, iteration: 388107
loss: 1.0211316347122192,grad_norm: 0.8047696718417289, iteration: 388108
loss: 1.0190802812576294,grad_norm: 0.9999995817704832, iteration: 388109
loss: 0.9978585243225098,grad_norm: 0.8766608291440237, iteration: 388110
loss: 1.1124471426010132,grad_norm: 0.9999990962295638, iteration: 388111
loss: 1.0203917026519775,grad_norm: 0.6286841595696963, iteration: 388112
loss: 1.141671895980835,grad_norm: 0.999999241084156, iteration: 388113
loss: 1.0168131589889526,grad_norm: 0.8672291487964271, iteration: 388114
loss: 0.9816151261329651,grad_norm: 0.7083006938098942, iteration: 388115
loss: 1.10027015209198,grad_norm: 0.944211785972578, iteration: 388116
loss: 1.0789859294891357,grad_norm: 0.9999998466792784, iteration: 388117
loss: 1.0050592422485352,grad_norm: 0.7598077981040273, iteration: 388118
loss: 1.008898138999939,grad_norm: 0.9999990113531207, iteration: 388119
loss: 1.003067135810852,grad_norm: 0.8087728853864453, iteration: 388120
loss: 1.0402942895889282,grad_norm: 0.9999991791548232, iteration: 388121
loss: 0.9756396412849426,grad_norm: 0.847652271440386, iteration: 388122
loss: 1.0145721435546875,grad_norm: 0.9999996883509007, iteration: 388123
loss: 1.004074215888977,grad_norm: 0.7373905751854415, iteration: 388124
loss: 1.0267066955566406,grad_norm: 0.9999993730002918, iteration: 388125
loss: 1.1561399698257446,grad_norm: 0.9999994439203294, iteration: 388126
loss: 1.0375189781188965,grad_norm: 0.8086773992055233, iteration: 388127
loss: 0.9868248701095581,grad_norm: 0.8115038750544437, iteration: 388128
loss: 1.075736403465271,grad_norm: 0.9999997268920962, iteration: 388129
loss: 0.9728550910949707,grad_norm: 0.7707491940260718, iteration: 388130
loss: 1.0033090114593506,grad_norm: 0.947295125329662, iteration: 388131
loss: 0.9466882944107056,grad_norm: 0.6894264467424644, iteration: 388132
loss: 1.0183093547821045,grad_norm: 0.9999994068964155, iteration: 388133
loss: 1.0130927562713623,grad_norm: 0.7670502219762305, iteration: 388134
loss: 1.057070255279541,grad_norm: 0.9999998375232609, iteration: 388135
loss: 1.0276954174041748,grad_norm: 0.9979308062199201, iteration: 388136
loss: 1.0219111442565918,grad_norm: 0.9016013868100049, iteration: 388137
loss: 1.0262781381607056,grad_norm: 0.9382690606646141, iteration: 388138
loss: 1.0314998626708984,grad_norm: 0.9999998897207354, iteration: 388139
loss: 0.9869375824928284,grad_norm: 0.7709883091414186, iteration: 388140
loss: 1.010823369026184,grad_norm: 0.911475863659387, iteration: 388141
loss: 1.0201553106307983,grad_norm: 0.7323354077578177, iteration: 388142
loss: 1.0429610013961792,grad_norm: 0.9999991216202362, iteration: 388143
loss: 0.9959771633148193,grad_norm: 0.7578523507823173, iteration: 388144
loss: 1.0262726545333862,grad_norm: 0.9212418465872086, iteration: 388145
loss: 0.989254891872406,grad_norm: 0.867086575501642, iteration: 388146
loss: 1.0027190446853638,grad_norm: 0.9848937982880491, iteration: 388147
loss: 1.166654109954834,grad_norm: 0.9999997797644837, iteration: 388148
loss: 1.0213444232940674,grad_norm: 0.7725394482405116, iteration: 388149
loss: 0.9759002327919006,grad_norm: 0.6854077089039297, iteration: 388150
loss: 1.1367545127868652,grad_norm: 0.9999994022812001, iteration: 388151
loss: 1.0865834951400757,grad_norm: 0.815316812559362, iteration: 388152
loss: 1.0421593189239502,grad_norm: 0.9999999512807467, iteration: 388153
loss: 0.9711989760398865,grad_norm: 0.8248400091571295, iteration: 388154
loss: 0.9953824877738953,grad_norm: 0.787681272872688, iteration: 388155
loss: 1.0317779779434204,grad_norm: 0.9999999486905137, iteration: 388156
loss: 1.000836968421936,grad_norm: 0.866461271520042, iteration: 388157
loss: 1.0100764036178589,grad_norm: 0.760372737215563, iteration: 388158
loss: 0.9635884761810303,grad_norm: 0.8682913678382748, iteration: 388159
loss: 1.0601316690444946,grad_norm: 0.9999997668703378, iteration: 388160
loss: 0.9932988882064819,grad_norm: 0.9999990860623201, iteration: 388161
loss: 1.0132570266723633,grad_norm: 0.9999994827772138, iteration: 388162
loss: 1.048608422279358,grad_norm: 0.999999477093509, iteration: 388163
loss: 1.0280790328979492,grad_norm: 0.9999998509947347, iteration: 388164
loss: 1.123182773590088,grad_norm: 0.9999996021541907, iteration: 388165
loss: 0.9664942026138306,grad_norm: 0.7556829389553937, iteration: 388166
loss: 0.9862043261528015,grad_norm: 0.7225031739446012, iteration: 388167
loss: 0.9758228063583374,grad_norm: 0.9521728313490986, iteration: 388168
loss: 1.1705811023712158,grad_norm: 0.9999996549978994, iteration: 388169
loss: 0.9905001521110535,grad_norm: 0.8850127593990201, iteration: 388170
loss: 0.9794454574584961,grad_norm: 0.7744989959461217, iteration: 388171
loss: 1.0393807888031006,grad_norm: 0.795661850708164, iteration: 388172
loss: 1.0156184434890747,grad_norm: 0.9999990633019339, iteration: 388173
loss: 1.0081428289413452,grad_norm: 0.8355759609010788, iteration: 388174
loss: 1.0830978155136108,grad_norm: 0.999999655217833, iteration: 388175
loss: 1.0241292715072632,grad_norm: 0.8640494949821408, iteration: 388176
loss: 1.0670580863952637,grad_norm: 0.9999999290453891, iteration: 388177
loss: 1.017216444015503,grad_norm: 0.83531788576142, iteration: 388178
loss: 0.9591807723045349,grad_norm: 0.7757658906379481, iteration: 388179
loss: 1.0585308074951172,grad_norm: 0.9999994943324465, iteration: 388180
loss: 1.023169994354248,grad_norm: 0.6848180299340546, iteration: 388181
loss: 0.9914043545722961,grad_norm: 0.8055185178448403, iteration: 388182
loss: 1.0839965343475342,grad_norm: 0.8998409565235593, iteration: 388183
loss: 1.0008975267410278,grad_norm: 0.7278347972833239, iteration: 388184
loss: 0.9931878447532654,grad_norm: 0.6515848815715379, iteration: 388185
loss: 1.0157872438430786,grad_norm: 0.9579081869552897, iteration: 388186
loss: 1.0237075090408325,grad_norm: 0.7973689174963063, iteration: 388187
loss: 1.1152186393737793,grad_norm: 0.9999995546872452, iteration: 388188
loss: 1.0011184215545654,grad_norm: 0.7127639174672564, iteration: 388189
loss: 1.0195438861846924,grad_norm: 0.9999995449324146, iteration: 388190
loss: 0.9736336469650269,grad_norm: 0.7293707529352251, iteration: 388191
loss: 0.967494547367096,grad_norm: 0.8441490858996082, iteration: 388192
loss: 1.0216643810272217,grad_norm: 0.801150043050589, iteration: 388193
loss: 1.0127394199371338,grad_norm: 0.9666319988236605, iteration: 388194
loss: 1.0011645555496216,grad_norm: 0.7703889156851172, iteration: 388195
loss: 1.1336495876312256,grad_norm: 0.9751025769271487, iteration: 388196
loss: 0.9923964142799377,grad_norm: 0.8893535546800684, iteration: 388197
loss: 0.9982770681381226,grad_norm: 0.8538253953416098, iteration: 388198
loss: 1.0905499458312988,grad_norm: 0.9999996661112224, iteration: 388199
loss: 1.0057834386825562,grad_norm: 0.6843698332973196, iteration: 388200
loss: 1.0164663791656494,grad_norm: 0.8545282190253558, iteration: 388201
loss: 0.9842961430549622,grad_norm: 0.9593064584540197, iteration: 388202
loss: 0.9951521158218384,grad_norm: 0.8364925669387743, iteration: 388203
loss: 1.000096082687378,grad_norm: 0.6297117044113576, iteration: 388204
loss: 1.0489321947097778,grad_norm: 0.9999999314908395, iteration: 388205
loss: 1.055374026298523,grad_norm: 0.9999996012776097, iteration: 388206
loss: 1.0311450958251953,grad_norm: 0.8653102908477551, iteration: 388207
loss: 0.981400728225708,grad_norm: 0.8158299447477191, iteration: 388208
loss: 0.974047839641571,grad_norm: 0.7320553746673826, iteration: 388209
loss: 1.0299009084701538,grad_norm: 0.999999421590799, iteration: 388210
loss: 0.9961100220680237,grad_norm: 0.7386955773045081, iteration: 388211
loss: 0.9438793659210205,grad_norm: 0.6981156300570632, iteration: 388212
loss: 1.016756534576416,grad_norm: 0.8103390302436481, iteration: 388213
loss: 0.9937363862991333,grad_norm: 0.8274052493208897, iteration: 388214
loss: 0.9995625615119934,grad_norm: 0.7604329761086531, iteration: 388215
loss: 1.003151774406433,grad_norm: 0.6787385609811045, iteration: 388216
loss: 1.0365902185440063,grad_norm: 0.7411370275904836, iteration: 388217
loss: 0.9877131581306458,grad_norm: 0.9999992214827315, iteration: 388218
loss: 0.9715094566345215,grad_norm: 0.9753782238650541, iteration: 388219
loss: 1.0015974044799805,grad_norm: 0.968051384071526, iteration: 388220
loss: 1.1182359457015991,grad_norm: 0.999999329004547, iteration: 388221
loss: 0.9807724356651306,grad_norm: 0.9106963977102213, iteration: 388222
loss: 1.0012954473495483,grad_norm: 0.9999990209785183, iteration: 388223
loss: 1.0287779569625854,grad_norm: 0.8856607532283486, iteration: 388224
loss: 0.9868215322494507,grad_norm: 0.8821526983458488, iteration: 388225
loss: 0.9852885007858276,grad_norm: 0.7835910292157964, iteration: 388226
loss: 0.9668497443199158,grad_norm: 0.7783497840590828, iteration: 388227
loss: 1.0148459672927856,grad_norm: 0.8626652340790032, iteration: 388228
loss: 0.9906013011932373,grad_norm: 0.8080852448023, iteration: 388229
loss: 1.1061418056488037,grad_norm: 0.9999991759591225, iteration: 388230
loss: 1.0310524702072144,grad_norm: 0.9999991859326153, iteration: 388231
loss: 0.9926140904426575,grad_norm: 0.8532700642468372, iteration: 388232
loss: 1.0227532386779785,grad_norm: 0.8469856983363823, iteration: 388233
loss: 1.0545293092727661,grad_norm: 0.8775765651537034, iteration: 388234
loss: 0.9580702781677246,grad_norm: 0.8295216449640245, iteration: 388235
loss: 0.96840500831604,grad_norm: 0.8081112817765695, iteration: 388236
loss: 1.1028560400009155,grad_norm: 0.9999994363979313, iteration: 388237
loss: 1.0641350746154785,grad_norm: 0.9999992145072919, iteration: 388238
loss: 1.0117995738983154,grad_norm: 0.6722811786978615, iteration: 388239
loss: 0.9750401973724365,grad_norm: 0.853333993991517, iteration: 388240
loss: 0.9986405968666077,grad_norm: 0.7205939070068563, iteration: 388241
loss: 0.9848814010620117,grad_norm: 0.8485046286073458, iteration: 388242
loss: 0.983640193939209,grad_norm: 0.726436087747213, iteration: 388243
loss: 0.9988248944282532,grad_norm: 0.6909852181211958, iteration: 388244
loss: 1.1581751108169556,grad_norm: 0.9999999647137013, iteration: 388245
loss: 0.9975483417510986,grad_norm: 0.8636298046900256, iteration: 388246
loss: 1.0157191753387451,grad_norm: 0.7580943116631458, iteration: 388247
loss: 1.245701551437378,grad_norm: 0.9999999243655862, iteration: 388248
loss: 0.9999338388442993,grad_norm: 0.78662767424378, iteration: 388249
loss: 1.0259321928024292,grad_norm: 0.7338205399984689, iteration: 388250
loss: 1.0034782886505127,grad_norm: 0.8764295230231313, iteration: 388251
loss: 1.006697416305542,grad_norm: 0.9521927497854046, iteration: 388252
loss: 0.9957477450370789,grad_norm: 0.6860436917882485, iteration: 388253
loss: 1.0304731130599976,grad_norm: 0.942088799545963, iteration: 388254
loss: 1.020240306854248,grad_norm: 0.8998998539241816, iteration: 388255
loss: 1.091705322265625,grad_norm: 0.9999993905729273, iteration: 388256
loss: 0.9979027509689331,grad_norm: 0.6596862170137808, iteration: 388257
loss: 1.0066862106323242,grad_norm: 0.7828091994119708, iteration: 388258
loss: 1.0608725547790527,grad_norm: 0.7766159074039731, iteration: 388259
loss: 0.991350531578064,grad_norm: 0.7048330179199436, iteration: 388260
loss: 1.0467618703842163,grad_norm: 0.77990821662665, iteration: 388261
loss: 1.024795413017273,grad_norm: 0.7781354671589178, iteration: 388262
loss: 0.9945076704025269,grad_norm: 0.7521229098932446, iteration: 388263
loss: 0.9576285481452942,grad_norm: 0.9020198707770903, iteration: 388264
loss: 0.9923690557479858,grad_norm: 0.9999999072336069, iteration: 388265
loss: 1.026872158050537,grad_norm: 0.945526130699496, iteration: 388266
loss: 1.0266849994659424,grad_norm: 0.9681464113649103, iteration: 388267
loss: 1.0355873107910156,grad_norm: 0.9726288383940201, iteration: 388268
loss: 1.029734492301941,grad_norm: 0.9999995984131146, iteration: 388269
loss: 0.9687182903289795,grad_norm: 0.8731262382716013, iteration: 388270
loss: 0.989771842956543,grad_norm: 0.8133984521973038, iteration: 388271
loss: 0.9864268898963928,grad_norm: 0.6450251674352051, iteration: 388272
loss: 0.9882597327232361,grad_norm: 0.8299888176159697, iteration: 388273
loss: 1.0194233655929565,grad_norm: 0.9999991050501192, iteration: 388274
loss: 0.9926252365112305,grad_norm: 0.9362614560598697, iteration: 388275
loss: 1.0402882099151611,grad_norm: 0.845109908666363, iteration: 388276
loss: 0.9614272713661194,grad_norm: 0.718976061690209, iteration: 388277
loss: 1.0341464281082153,grad_norm: 0.9999999278606955, iteration: 388278
loss: 1.0770299434661865,grad_norm: 0.8860768027681918, iteration: 388279
loss: 0.9736047983169556,grad_norm: 0.7048681690869344, iteration: 388280
loss: 0.9622795581817627,grad_norm: 0.803812824233748, iteration: 388281
loss: 0.9396325945854187,grad_norm: 0.8432986342070637, iteration: 388282
loss: 1.0065921545028687,grad_norm: 0.7538098932251825, iteration: 388283
loss: 1.0339276790618896,grad_norm: 0.9999998588203903, iteration: 388284
loss: 1.070673942565918,grad_norm: 0.9999992364916808, iteration: 388285
loss: 1.0001312494277954,grad_norm: 0.8907609984226434, iteration: 388286
loss: 1.0225332975387573,grad_norm: 0.8960553290073136, iteration: 388287
loss: 1.12883722782135,grad_norm: 0.9524750129691493, iteration: 388288
loss: 1.0604135990142822,grad_norm: 0.99999923554108, iteration: 388289
loss: 1.0262049436569214,grad_norm: 0.9999995518647142, iteration: 388290
loss: 1.0489978790283203,grad_norm: 0.999999946450639, iteration: 388291
loss: 0.9965316653251648,grad_norm: 0.9999999983027843, iteration: 388292
loss: 1.0302228927612305,grad_norm: 0.9602860496948045, iteration: 388293
loss: 1.013430118560791,grad_norm: 1.0000000572549137, iteration: 388294
loss: 1.0520997047424316,grad_norm: 0.917668393622249, iteration: 388295
loss: 0.9824278950691223,grad_norm: 0.8377989993058385, iteration: 388296
loss: 1.00691556930542,grad_norm: 0.8521463467645777, iteration: 388297
loss: 1.1326472759246826,grad_norm: 0.80638720173641, iteration: 388298
loss: 0.9837160706520081,grad_norm: 0.8063538832093684, iteration: 388299
loss: 1.1081910133361816,grad_norm: 0.9999999275185782, iteration: 388300
loss: 1.0362448692321777,grad_norm: 0.8087872322883178, iteration: 388301
loss: 0.9877774715423584,grad_norm: 0.9999996304037785, iteration: 388302
loss: 0.9677074551582336,grad_norm: 0.8575594849060946, iteration: 388303
loss: 1.0319108963012695,grad_norm: 0.7455552859436924, iteration: 388304
loss: 1.1906542778015137,grad_norm: 0.999999283402533, iteration: 388305
loss: 1.0508760213851929,grad_norm: 0.86185724747197, iteration: 388306
loss: 1.071202039718628,grad_norm: 0.9999996521417831, iteration: 388307
loss: 0.9682113528251648,grad_norm: 0.7907997808233577, iteration: 388308
loss: 1.0850218534469604,grad_norm: 1.000000000566244, iteration: 388309
loss: 0.9807581901550293,grad_norm: 0.8527512964953389, iteration: 388310
loss: 1.0263465642929077,grad_norm: 0.9999991741845052, iteration: 388311
loss: 1.0184924602508545,grad_norm: 0.8596020642897889, iteration: 388312
loss: 1.0241940021514893,grad_norm: 0.9999993145288241, iteration: 388313
loss: 1.0088753700256348,grad_norm: 0.784896193135444, iteration: 388314
loss: 1.0097384452819824,grad_norm: 0.7928215780954744, iteration: 388315
loss: 1.0025346279144287,grad_norm: 0.7597705653034404, iteration: 388316
loss: 0.9953277111053467,grad_norm: 0.7976760041456588, iteration: 388317
loss: 1.0341135263442993,grad_norm: 0.9999998862401451, iteration: 388318
loss: 0.9969967007637024,grad_norm: 0.7910192017114671, iteration: 388319
loss: 1.0055367946624756,grad_norm: 0.7659256589196317, iteration: 388320
loss: 1.031389594078064,grad_norm: 0.7904384991181137, iteration: 388321
loss: 1.0001522302627563,grad_norm: 0.9999989409708369, iteration: 388322
loss: 1.0013976097106934,grad_norm: 0.7353281712699435, iteration: 388323
loss: 1.0263924598693848,grad_norm: 0.8561781922690612, iteration: 388324
loss: 1.0011463165283203,grad_norm: 0.7045942730505216, iteration: 388325
loss: 0.971335768699646,grad_norm: 0.7712163811417737, iteration: 388326
loss: 1.0139352083206177,grad_norm: 0.7505631203593666, iteration: 388327
loss: 1.0098789930343628,grad_norm: 0.8013485166037352, iteration: 388328
loss: 1.0775700807571411,grad_norm: 0.8540748009559238, iteration: 388329
loss: 1.11775541305542,grad_norm: 0.9999992584776468, iteration: 388330
loss: 1.0237149000167847,grad_norm: 0.9999993887577001, iteration: 388331
loss: 0.9822750091552734,grad_norm: 0.9941003582536422, iteration: 388332
loss: 0.9839823246002197,grad_norm: 0.7868446844975922, iteration: 388333
loss: 1.0338042974472046,grad_norm: 0.9999997295954464, iteration: 388334
loss: 1.0189557075500488,grad_norm: 0.8479203104440035, iteration: 388335
loss: 1.0374195575714111,grad_norm: 0.9999990979375548, iteration: 388336
loss: 0.9684692025184631,grad_norm: 0.9134684439501299, iteration: 388337
loss: 1.0182476043701172,grad_norm: 0.6804246210425969, iteration: 388338
loss: 0.9572601318359375,grad_norm: 0.7269725440388377, iteration: 388339
loss: 0.9899185299873352,grad_norm: 0.9218447605068083, iteration: 388340
loss: 0.9869499802589417,grad_norm: 0.7175775308747604, iteration: 388341
loss: 0.9698691368103027,grad_norm: 0.6661556643047795, iteration: 388342
loss: 1.0150864124298096,grad_norm: 0.8135956874999279, iteration: 388343
loss: 1.0206027030944824,grad_norm: 0.7348690831679843, iteration: 388344
loss: 0.9963575005531311,grad_norm: 0.8936217364150821, iteration: 388345
loss: 1.0853946208953857,grad_norm: 0.9999991837845856, iteration: 388346
loss: 1.0798228979110718,grad_norm: 0.8822182747395937, iteration: 388347
loss: 0.9607807397842407,grad_norm: 0.8575466457060537, iteration: 388348
loss: 1.015720248222351,grad_norm: 0.7791496538183967, iteration: 388349
loss: 1.0167357921600342,grad_norm: 0.8120495351452863, iteration: 388350
loss: 1.0729731321334839,grad_norm: 0.9999996358944975, iteration: 388351
loss: 1.0337998867034912,grad_norm: 0.9776179777909286, iteration: 388352
loss: 1.0043225288391113,grad_norm: 0.8469575432057863, iteration: 388353
loss: 1.0849348306655884,grad_norm: 0.9999995204340599, iteration: 388354
loss: 0.9926663637161255,grad_norm: 0.6089432073877633, iteration: 388355
loss: 1.001587986946106,grad_norm: 0.7443191925533859, iteration: 388356
loss: 1.0039007663726807,grad_norm: 0.6877223776663937, iteration: 388357
loss: 0.9751008749008179,grad_norm: 0.7306931507161316, iteration: 388358
loss: 0.9874395728111267,grad_norm: 0.760755398402766, iteration: 388359
loss: 0.9805212616920471,grad_norm: 0.916115053531021, iteration: 388360
loss: 1.0101648569107056,grad_norm: 0.7797721882385884, iteration: 388361
loss: 1.025777816772461,grad_norm: 0.6843782963923328, iteration: 388362
loss: 0.9725742340087891,grad_norm: 0.7880695016992808, iteration: 388363
loss: 1.0045766830444336,grad_norm: 0.9999991253693885, iteration: 388364
loss: 1.0520153045654297,grad_norm: 0.990486146607329, iteration: 388365
loss: 0.9963479042053223,grad_norm: 0.8696278513717612, iteration: 388366
loss: 1.0290244817733765,grad_norm: 0.7727821401436742, iteration: 388367
loss: 1.0256965160369873,grad_norm: 0.9082892099511252, iteration: 388368
loss: 1.005375623703003,grad_norm: 0.7470066310074652, iteration: 388369
loss: 0.9916502833366394,grad_norm: 0.7721538480676211, iteration: 388370
loss: 1.0176342725753784,grad_norm: 0.9999997379650284, iteration: 388371
loss: 1.0414658784866333,grad_norm: 0.7970667363777325, iteration: 388372
loss: 1.0060666799545288,grad_norm: 0.7384590689367906, iteration: 388373
loss: 1.0276604890823364,grad_norm: 0.99999957147383, iteration: 388374
loss: 0.9749700427055359,grad_norm: 0.7955117538155082, iteration: 388375
loss: 0.9879510402679443,grad_norm: 0.7883080564367211, iteration: 388376
loss: 0.9726681113243103,grad_norm: 0.9999998641082728, iteration: 388377
loss: 0.9964481592178345,grad_norm: 0.8126288398042651, iteration: 388378
loss: 0.98353511095047,grad_norm: 0.8297251703309788, iteration: 388379
loss: 1.0336883068084717,grad_norm: 0.9999993850342953, iteration: 388380
loss: 1.0270226001739502,grad_norm: 0.7972194868178956, iteration: 388381
loss: 1.0304999351501465,grad_norm: 0.8030632108471546, iteration: 388382
loss: 0.9925208687782288,grad_norm: 0.8110491704647002, iteration: 388383
loss: 1.0440142154693604,grad_norm: 0.9999998954178324, iteration: 388384
loss: 1.0185496807098389,grad_norm: 0.935318843531316, iteration: 388385
loss: 1.0199897289276123,grad_norm: 0.6878023860346701, iteration: 388386
loss: 1.041837215423584,grad_norm: 0.9837356330152105, iteration: 388387
loss: 1.0208771228790283,grad_norm: 0.8829298156808929, iteration: 388388
loss: 1.1093602180480957,grad_norm: 0.7623300092047608, iteration: 388389
loss: 1.0501594543457031,grad_norm: 0.9999992015736633, iteration: 388390
loss: 1.0140208005905151,grad_norm: 0.9999989771629727, iteration: 388391
loss: 1.0215604305267334,grad_norm: 0.9999993664491336, iteration: 388392
loss: 1.0016038417816162,grad_norm: 0.6482332746234251, iteration: 388393
loss: 0.996577799320221,grad_norm: 0.8544860510972169, iteration: 388394
loss: 0.9838683605194092,grad_norm: 0.7288841635756803, iteration: 388395
loss: 0.9861462712287903,grad_norm: 0.6277323006099467, iteration: 388396
loss: 1.0132359266281128,grad_norm: 0.7773437339742312, iteration: 388397
loss: 0.9855260252952576,grad_norm: 0.9157468514037096, iteration: 388398
loss: 0.9828759431838989,grad_norm: 0.7734671364936132, iteration: 388399
loss: 1.030505895614624,grad_norm: 0.8092096091215902, iteration: 388400
loss: 1.000162959098816,grad_norm: 0.8530534512313508, iteration: 388401
loss: 1.0445162057876587,grad_norm: 0.9999993021027547, iteration: 388402
loss: 0.9954214692115784,grad_norm: 0.7376652922429191, iteration: 388403
loss: 1.0081117153167725,grad_norm: 0.7002573655779739, iteration: 388404
loss: 1.0643571615219116,grad_norm: 0.9999997975658553, iteration: 388405
loss: 1.0437170267105103,grad_norm: 0.7855600372408028, iteration: 388406
loss: 1.0314801931381226,grad_norm: 0.999999682756378, iteration: 388407
loss: 1.0628284215927124,grad_norm: 0.9999998717351986, iteration: 388408
loss: 0.9502623081207275,grad_norm: 0.7083163219817357, iteration: 388409
loss: 1.0073343515396118,grad_norm: 0.8845171628991848, iteration: 388410
loss: 1.0057461261749268,grad_norm: 0.7508255315159793, iteration: 388411
loss: 1.0080053806304932,grad_norm: 0.7101492604672706, iteration: 388412
loss: 0.9914922714233398,grad_norm: 0.722279877714196, iteration: 388413
loss: 0.9731309413909912,grad_norm: 0.6824117742580192, iteration: 388414
loss: 1.0003968477249146,grad_norm: 0.9375674076309337, iteration: 388415
loss: 0.9741564393043518,grad_norm: 0.7580176439756495, iteration: 388416
loss: 0.9728896021842957,grad_norm: 0.8313678718486128, iteration: 388417
loss: 1.0413570404052734,grad_norm: 0.8875131290352561, iteration: 388418
loss: 0.9983649253845215,grad_norm: 0.9999992244986885, iteration: 388419
loss: 1.0099050998687744,grad_norm: 0.739428675560162, iteration: 388420
loss: 1.0221788883209229,grad_norm: 0.8663074646212954, iteration: 388421
loss: 1.0571599006652832,grad_norm: 0.9887650785270365, iteration: 388422
loss: 1.0100678205490112,grad_norm: 0.8924577355538151, iteration: 388423
loss: 1.0173759460449219,grad_norm: 0.7645142069320797, iteration: 388424
loss: 0.9925523400306702,grad_norm: 0.8369393876573034, iteration: 388425
loss: 1.0405759811401367,grad_norm: 0.8244380251643789, iteration: 388426
loss: 1.015213131904602,grad_norm: 0.9999996554914654, iteration: 388427
loss: 0.998944103717804,grad_norm: 0.8184930162373405, iteration: 388428
loss: 1.0539183616638184,grad_norm: 0.9999998950683092, iteration: 388429
loss: 1.0317696332931519,grad_norm: 0.7638254343800384, iteration: 388430
loss: 1.020142674446106,grad_norm: 0.7795390689881582, iteration: 388431
loss: 0.9685410857200623,grad_norm: 0.731814758627523, iteration: 388432
loss: 0.9989809989929199,grad_norm: 0.7022575386900256, iteration: 388433
loss: 1.0209730863571167,grad_norm: 0.8381624717053842, iteration: 388434
loss: 0.9731895327568054,grad_norm: 0.8370746248221472, iteration: 388435
loss: 0.9818057417869568,grad_norm: 0.7991010995006059, iteration: 388436
loss: 1.0091465711593628,grad_norm: 0.8051337751985983, iteration: 388437
loss: 0.9817183017730713,grad_norm: 0.8636732857711524, iteration: 388438
loss: 1.0153146982192993,grad_norm: 0.6768989025240382, iteration: 388439
loss: 1.0086268186569214,grad_norm: 0.7503366747493584, iteration: 388440
loss: 1.0509806871414185,grad_norm: 0.9729293469287178, iteration: 388441
loss: 0.9921301007270813,grad_norm: 0.608709361025296, iteration: 388442
loss: 1.0028889179229736,grad_norm: 0.9982314126858151, iteration: 388443
loss: 1.0025783777236938,grad_norm: 0.7087557165754322, iteration: 388444
loss: 1.0470731258392334,grad_norm: 0.9999991574956888, iteration: 388445
loss: 1.00448477268219,grad_norm: 0.97417842708696, iteration: 388446
loss: 1.0204890966415405,grad_norm: 0.9999994265276496, iteration: 388447
loss: 0.9674622416496277,grad_norm: 0.8603460610755684, iteration: 388448
loss: 0.9853057861328125,grad_norm: 0.8096146090932617, iteration: 388449
loss: 1.0557106733322144,grad_norm: 0.937158380035252, iteration: 388450
loss: 1.016938328742981,grad_norm: 0.64205228503949, iteration: 388451
loss: 0.9955730438232422,grad_norm: 0.9999990975969312, iteration: 388452
loss: 1.103772521018982,grad_norm: 0.9999998528654372, iteration: 388453
loss: 0.9889115691184998,grad_norm: 0.7258202406658048, iteration: 388454
loss: 0.9817858338356018,grad_norm: 0.8782608483743367, iteration: 388455
loss: 1.0615936517715454,grad_norm: 0.9999992149190434, iteration: 388456
loss: 1.0140851736068726,grad_norm: 0.7078986616629963, iteration: 388457
loss: 1.0356980562210083,grad_norm: 0.8368908955641429, iteration: 388458
loss: 1.0093613862991333,grad_norm: 0.7153782676269046, iteration: 388459
loss: 1.0361403226852417,grad_norm: 0.8544799138231894, iteration: 388460
loss: 0.9935682415962219,grad_norm: 0.7365333391147412, iteration: 388461
loss: 1.0722393989562988,grad_norm: 0.9999997303300118, iteration: 388462
loss: 0.9852612018585205,grad_norm: 0.9522172148882905, iteration: 388463
loss: 0.955460786819458,grad_norm: 0.781447457826217, iteration: 388464
loss: 1.012879729270935,grad_norm: 0.9999999538138626, iteration: 388465
loss: 1.0256683826446533,grad_norm: 0.8012591344418865, iteration: 388466
loss: 1.0776569843292236,grad_norm: 0.8400166386049428, iteration: 388467
loss: 1.0443025827407837,grad_norm: 0.9516741084238474, iteration: 388468
loss: 1.001827359199524,grad_norm: 0.9438480769654933, iteration: 388469
loss: 1.0770243406295776,grad_norm: 0.9999995457062283, iteration: 388470
loss: 0.9439674615859985,grad_norm: 0.928111212517067, iteration: 388471
loss: 0.9897139668464661,grad_norm: 0.8763483938684404, iteration: 388472
loss: 1.0099906921386719,grad_norm: 0.7285550163628908, iteration: 388473
loss: 0.9569063782691956,grad_norm: 0.8776065882903379, iteration: 388474
loss: 1.0368953943252563,grad_norm: 0.779541768692672, iteration: 388475
loss: 0.9968505501747131,grad_norm: 0.857640488354085, iteration: 388476
loss: 1.0180153846740723,grad_norm: 0.9999992661348802, iteration: 388477
loss: 1.0709468126296997,grad_norm: 0.9937818027965699, iteration: 388478
loss: 0.9586291313171387,grad_norm: 0.6688303577609814, iteration: 388479
loss: 0.9755488634109497,grad_norm: 0.818136755791828, iteration: 388480
loss: 1.0195350646972656,grad_norm: 0.7885595796171386, iteration: 388481
loss: 1.0498850345611572,grad_norm: 0.9999998412928718, iteration: 388482
loss: 0.97287917137146,grad_norm: 0.7203378513617229, iteration: 388483
loss: 1.003722906112671,grad_norm: 0.7890637891473982, iteration: 388484
loss: 0.9967539310455322,grad_norm: 0.7355094108856558, iteration: 388485
loss: 1.0680707693099976,grad_norm: 0.8624039034983926, iteration: 388486
loss: 0.9843109846115112,grad_norm: 0.681004925441465, iteration: 388487
loss: 1.0308817625045776,grad_norm: 0.7632269804853499, iteration: 388488
loss: 0.9881633520126343,grad_norm: 0.768743993411916, iteration: 388489
loss: 0.9873185157775879,grad_norm: 0.87642677795927, iteration: 388490
loss: 1.0071715116500854,grad_norm: 0.7102757171777937, iteration: 388491
loss: 1.0463931560516357,grad_norm: 0.8035493593367289, iteration: 388492
loss: 1.0091055631637573,grad_norm: 0.9048203467770728, iteration: 388493
loss: 1.0404976606369019,grad_norm: 0.7397248164337448, iteration: 388494
loss: 1.0002650022506714,grad_norm: 0.8264832978326504, iteration: 388495
loss: 0.986758291721344,grad_norm: 0.7167780508239042, iteration: 388496
loss: 1.0122038125991821,grad_norm: 0.9948171867791813, iteration: 388497
loss: 0.9987160563468933,grad_norm: 0.999999117235327, iteration: 388498
loss: 1.0559338331222534,grad_norm: 0.9999991758288301, iteration: 388499
loss: 1.0205988883972168,grad_norm: 0.8655058070568211, iteration: 388500
loss: 1.0007221698760986,grad_norm: 0.7236002178230333, iteration: 388501
loss: 1.0962053537368774,grad_norm: 0.9999997590284256, iteration: 388502
loss: 1.009281039237976,grad_norm: 0.7728936663067962, iteration: 388503
loss: 1.020889163017273,grad_norm: 0.863563004083734, iteration: 388504
loss: 1.019899845123291,grad_norm: 0.7845508437155703, iteration: 388505
loss: 1.0010318756103516,grad_norm: 0.8031044915031826, iteration: 388506
loss: 0.9788082242012024,grad_norm: 0.75747949041241, iteration: 388507
loss: 1.0395148992538452,grad_norm: 0.9999998263190828, iteration: 388508
loss: 1.0750541687011719,grad_norm: 0.9999999300217992, iteration: 388509
loss: 0.9982388615608215,grad_norm: 0.680108600324672, iteration: 388510
loss: 1.0392942428588867,grad_norm: 0.9999998460101709, iteration: 388511
loss: 0.9907670617103577,grad_norm: 0.7989932519188242, iteration: 388512
loss: 1.0029908418655396,grad_norm: 0.8842216873688631, iteration: 388513
loss: 1.049800992012024,grad_norm: 0.76490478252562, iteration: 388514
loss: 0.9684179425239563,grad_norm: 0.6730895279586839, iteration: 388515
loss: 1.0251370668411255,grad_norm: 0.999999361317921, iteration: 388516
loss: 1.0087294578552246,grad_norm: 0.8594660475411191, iteration: 388517
loss: 1.0237988233566284,grad_norm: 0.7858012173466551, iteration: 388518
loss: 1.025233268737793,grad_norm: 0.7659327387470686, iteration: 388519
loss: 1.007388710975647,grad_norm: 0.7321541716595982, iteration: 388520
loss: 1.0295302867889404,grad_norm: 0.7831860283883499, iteration: 388521
loss: 1.0355638265609741,grad_norm: 0.7509347231976892, iteration: 388522
loss: 1.0277987718582153,grad_norm: 0.9999990826453504, iteration: 388523
loss: 0.9897130131721497,grad_norm: 0.8195598908534945, iteration: 388524
loss: 0.9986963272094727,grad_norm: 0.6270197370837628, iteration: 388525
loss: 1.0139862298965454,grad_norm: 0.7670540619857091, iteration: 388526
loss: 1.000488042831421,grad_norm: 0.7632330720665487, iteration: 388527
loss: 0.978371262550354,grad_norm: 0.9999993348892432, iteration: 388528
loss: 1.028445839881897,grad_norm: 0.9391215903255953, iteration: 388529
loss: 1.050762414932251,grad_norm: 0.801749585804506, iteration: 388530
loss: 0.9973099231719971,grad_norm: 0.7978021591203114, iteration: 388531
loss: 1.0125410556793213,grad_norm: 0.7065154089468365, iteration: 388532
loss: 0.9768550992012024,grad_norm: 0.8188467400419925, iteration: 388533
loss: 1.0379791259765625,grad_norm: 0.7806003411301239, iteration: 388534
loss: 1.022416353225708,grad_norm: 0.8830525770788252, iteration: 388535
loss: 1.0439231395721436,grad_norm: 0.7099217689556735, iteration: 388536
loss: 1.016271710395813,grad_norm: 0.669086112158062, iteration: 388537
loss: 0.9833821654319763,grad_norm: 0.9999999305824888, iteration: 388538
loss: 1.040751338005066,grad_norm: 0.7458592739325368, iteration: 388539
loss: 1.0731911659240723,grad_norm: 0.9999998406948799, iteration: 388540
loss: 1.0491172075271606,grad_norm: 0.8979366024031649, iteration: 388541
loss: 1.0224696397781372,grad_norm: 0.7562574937688528, iteration: 388542
loss: 0.9820287823677063,grad_norm: 0.6723296731805364, iteration: 388543
loss: 0.9620721340179443,grad_norm: 0.8157743336362668, iteration: 388544
loss: 1.1382594108581543,grad_norm: 0.9999999458628924, iteration: 388545
loss: 0.9605269432067871,grad_norm: 0.9247170127968216, iteration: 388546
loss: 1.0016038417816162,grad_norm: 0.9937642169353338, iteration: 388547
loss: 1.0013642311096191,grad_norm: 0.8102488999912668, iteration: 388548
loss: 0.9981215596199036,grad_norm: 0.9444210652303641, iteration: 388549
loss: 1.0301674604415894,grad_norm: 0.6835566667746108, iteration: 388550
loss: 0.9913550615310669,grad_norm: 0.6848608268965933, iteration: 388551
loss: 1.051609992980957,grad_norm: 0.9999995289824355, iteration: 388552
loss: 1.0249563455581665,grad_norm: 0.6955994199939168, iteration: 388553
loss: 1.0065187215805054,grad_norm: 0.6270112514938769, iteration: 388554
loss: 1.0651812553405762,grad_norm: 0.9999992550823223, iteration: 388555
loss: 1.0073322057724,grad_norm: 0.9257941949250754, iteration: 388556
loss: 1.0156066417694092,grad_norm: 0.9405585350868925, iteration: 388557
loss: 1.0254870653152466,grad_norm: 0.7120691790602219, iteration: 388558
loss: 0.9692097306251526,grad_norm: 0.7098907795936212, iteration: 388559
loss: 1.0692408084869385,grad_norm: 0.9999994178199435, iteration: 388560
loss: 0.955845832824707,grad_norm: 0.8138060804539596, iteration: 388561
loss: 1.0432924032211304,grad_norm: 0.9197228846819707, iteration: 388562
loss: 1.045884370803833,grad_norm: 0.9999992415351534, iteration: 388563
loss: 0.9469394683837891,grad_norm: 0.8056685461601656, iteration: 388564
loss: 1.0465511083602905,grad_norm: 0.8925439050471932, iteration: 388565
loss: 1.0129389762878418,grad_norm: 0.7716697479064345, iteration: 388566
loss: 1.0184639692306519,grad_norm: 0.9999993558442574, iteration: 388567
loss: 0.9700620174407959,grad_norm: 0.8843470761076971, iteration: 388568
loss: 1.0225671529769897,grad_norm: 0.7762452730879064, iteration: 388569
loss: 0.9965711236000061,grad_norm: 0.7775395479203888, iteration: 388570
loss: 0.951091468334198,grad_norm: 0.742615708724056, iteration: 388571
loss: 1.0518187284469604,grad_norm: 0.9999996653745574, iteration: 388572
loss: 1.0573512315750122,grad_norm: 0.9337052277907766, iteration: 388573
loss: 0.9449547529220581,grad_norm: 0.8433063877214687, iteration: 388574
loss: 0.9723829627037048,grad_norm: 0.8623775932667921, iteration: 388575
loss: 0.9739260673522949,grad_norm: 0.8232353240561744, iteration: 388576
loss: 0.9745967984199524,grad_norm: 0.8120654593879968, iteration: 388577
loss: 1.2823950052261353,grad_norm: 0.9999996511327768, iteration: 388578
loss: 0.9884811043739319,grad_norm: 0.6814706711687945, iteration: 388579
loss: 1.005558967590332,grad_norm: 0.7720862168266404, iteration: 388580
loss: 0.9793861508369446,grad_norm: 0.9999995581426256, iteration: 388581
loss: 0.9872519969940186,grad_norm: 0.7343547395320414, iteration: 388582
loss: 1.0289160013198853,grad_norm: 0.7101160302218177, iteration: 388583
loss: 0.9838469624519348,grad_norm: 0.6799798331331813, iteration: 388584
loss: 1.0132192373275757,grad_norm: 0.7284446729559488, iteration: 388585
loss: 0.9871618151664734,grad_norm: 0.8145740577957578, iteration: 388586
loss: 1.022784948348999,grad_norm: 0.9999997684943444, iteration: 388587
loss: 0.9712320566177368,grad_norm: 0.9769553397231725, iteration: 388588
loss: 1.0058437585830688,grad_norm: 0.6997111869195765, iteration: 388589
loss: 1.0059187412261963,grad_norm: 0.9999991470229392, iteration: 388590
loss: 0.9773394465446472,grad_norm: 0.8040415686938419, iteration: 388591
loss: 1.0189855098724365,grad_norm: 0.8272249649026975, iteration: 388592
loss: 1.014304518699646,grad_norm: 0.9488292442775487, iteration: 388593
loss: 1.0358678102493286,grad_norm: 0.8709844917926279, iteration: 388594
loss: 0.9673178195953369,grad_norm: 0.7946958642985601, iteration: 388595
loss: 0.9965482354164124,grad_norm: 0.6870328983222099, iteration: 388596
loss: 1.0017656087875366,grad_norm: 0.8910135555347283, iteration: 388597
loss: 1.0226490497589111,grad_norm: 0.9999999282675242, iteration: 388598
loss: 1.1179890632629395,grad_norm: 1.0000000295624722, iteration: 388599
loss: 0.9706838726997375,grad_norm: 0.7585978116213093, iteration: 388600
loss: 1.019163727760315,grad_norm: 0.9999999163115252, iteration: 388601
loss: 1.0072604417800903,grad_norm: 0.7633118222745359, iteration: 388602
loss: 0.9963129162788391,grad_norm: 0.7283933466000232, iteration: 388603
loss: 1.145919919013977,grad_norm: 0.9999999495328957, iteration: 388604
loss: 0.9794767498970032,grad_norm: 0.6804521346162534, iteration: 388605
loss: 0.984236478805542,grad_norm: 0.7686097122347898, iteration: 388606
loss: 0.9827492833137512,grad_norm: 0.7746461807830415, iteration: 388607
loss: 0.9908654689788818,grad_norm: 0.7692635667632628, iteration: 388608
loss: 0.9888436198234558,grad_norm: 0.8374437503537259, iteration: 388609
loss: 0.9889918565750122,grad_norm: 0.7817446511755083, iteration: 388610
loss: 1.00431227684021,grad_norm: 0.66516005825445, iteration: 388611
loss: 0.9991728067398071,grad_norm: 0.705373638937834, iteration: 388612
loss: 1.0220264196395874,grad_norm: 0.8634016358547014, iteration: 388613
loss: 0.9780402779579163,grad_norm: 0.749167071913639, iteration: 388614
loss: 1.011845350265503,grad_norm: 0.868891246996941, iteration: 388615
loss: 1.030766487121582,grad_norm: 0.9114355035485504, iteration: 388616
loss: 0.9879915118217468,grad_norm: 0.7196920480046146, iteration: 388617
loss: 0.983856737613678,grad_norm: 0.9999993592809927, iteration: 388618
loss: 1.0322612524032593,grad_norm: 0.9999990028806683, iteration: 388619
loss: 1.0464011430740356,grad_norm: 0.999999209974692, iteration: 388620
loss: 1.0125151872634888,grad_norm: 0.7512500020293646, iteration: 388621
loss: 1.0534418821334839,grad_norm: 0.7582429338254503, iteration: 388622
loss: 0.9937213063240051,grad_norm: 0.9612859422803884, iteration: 388623
loss: 0.9963919520378113,grad_norm: 0.7478127834126489, iteration: 388624
loss: 1.0505411624908447,grad_norm: 0.9999999037213377, iteration: 388625
loss: 0.9790641665458679,grad_norm: 0.9455717769938781, iteration: 388626
loss: 0.9682304263114929,grad_norm: 0.8709317963241522, iteration: 388627
loss: 1.009048581123352,grad_norm: 0.725458565915248, iteration: 388628
loss: 1.032641887664795,grad_norm: 0.694271717060838, iteration: 388629
loss: 0.9685056805610657,grad_norm: 0.7876593790633424, iteration: 388630
loss: 0.9948829412460327,grad_norm: 0.8292280237151725, iteration: 388631
loss: 1.0165458917617798,grad_norm: 0.9999997281644929, iteration: 388632
loss: 0.9974921941757202,grad_norm: 0.7250308127889783, iteration: 388633
loss: 0.9965211153030396,grad_norm: 0.81255302402706, iteration: 388634
loss: 1.026137351989746,grad_norm: 0.7131958582405843, iteration: 388635
loss: 0.9869722127914429,grad_norm: 0.9421225121901133, iteration: 388636
loss: 0.987848162651062,grad_norm: 0.9999997599485067, iteration: 388637
loss: 0.967534601688385,grad_norm: 0.8316310040971681, iteration: 388638
loss: 1.0232183933258057,grad_norm: 0.8024423810453594, iteration: 388639
loss: 1.0300906896591187,grad_norm: 0.7995443308195168, iteration: 388640
loss: 1.144238829612732,grad_norm: 0.9281227018397313, iteration: 388641
loss: 0.9914006590843201,grad_norm: 0.7843675122478744, iteration: 388642
loss: 1.051815152168274,grad_norm: 0.9208678662309551, iteration: 388643
loss: 1.0863125324249268,grad_norm: 0.9999994142874171, iteration: 388644
loss: 1.037987470626831,grad_norm: 0.7219740168713796, iteration: 388645
loss: 1.003513216972351,grad_norm: 0.7244723954355972, iteration: 388646
loss: 1.0205914974212646,grad_norm: 0.7253208698799136, iteration: 388647
loss: 0.9910821914672852,grad_norm: 0.7269536235100882, iteration: 388648
loss: 1.0015708208084106,grad_norm: 0.8541159968694714, iteration: 388649
loss: 0.9985103011131287,grad_norm: 0.8199421079969346, iteration: 388650
loss: 1.1089328527450562,grad_norm: 0.9999991885174441, iteration: 388651
loss: 1.0671439170837402,grad_norm: 0.9999991769877963, iteration: 388652
loss: 1.0167535543441772,grad_norm: 0.7187170640832224, iteration: 388653
loss: 0.9752449989318848,grad_norm: 0.7449394288095053, iteration: 388654
loss: 1.04360032081604,grad_norm: 0.9999992024605774, iteration: 388655
loss: 0.9451463222503662,grad_norm: 0.6731265347821147, iteration: 388656
loss: 0.9777125716209412,grad_norm: 0.7452994105233922, iteration: 388657
loss: 1.0082850456237793,grad_norm: 0.9999991775879555, iteration: 388658
loss: 0.9817172288894653,grad_norm: 0.804694459748523, iteration: 388659
loss: 1.0709575414657593,grad_norm: 0.9999992504187588, iteration: 388660
loss: 1.0462291240692139,grad_norm: 0.7174019575721695, iteration: 388661
loss: 0.9989948868751526,grad_norm: 0.8043006335204566, iteration: 388662
loss: 1.005639672279358,grad_norm: 0.7626075513731434, iteration: 388663
loss: 0.9858850836753845,grad_norm: 0.727608764082557, iteration: 388664
loss: 0.9684595465660095,grad_norm: 0.8964242802220972, iteration: 388665
loss: 1.0331205129623413,grad_norm: 0.7916273557423716, iteration: 388666
loss: 0.996391236782074,grad_norm: 0.7151460820327682, iteration: 388667
loss: 1.026007890701294,grad_norm: 0.7183663963793898, iteration: 388668
loss: 0.9951522350311279,grad_norm: 0.893223223972774, iteration: 388669
loss: 1.0228064060211182,grad_norm: 0.8794052207903881, iteration: 388670
loss: 0.9863237738609314,grad_norm: 0.8742107340289057, iteration: 388671
loss: 0.9906957745552063,grad_norm: 0.7519914690419811, iteration: 388672
loss: 0.9711799025535583,grad_norm: 0.8070688660958203, iteration: 388673
loss: 1.00704026222229,grad_norm: 0.667556722499462, iteration: 388674
loss: 1.0965825319290161,grad_norm: 0.7455976303725782, iteration: 388675
loss: 1.004058837890625,grad_norm: 0.9084216088927984, iteration: 388676
loss: 1.0133436918258667,grad_norm: 0.7946604976126194, iteration: 388677
loss: 0.9965485334396362,grad_norm: 0.8276342482289054, iteration: 388678
loss: 1.0510127544403076,grad_norm: 0.8966947383594687, iteration: 388679
loss: 0.9808120727539062,grad_norm: 0.7402345396027562, iteration: 388680
loss: 1.036985158920288,grad_norm: 0.6943446326301719, iteration: 388681
loss: 1.0025700330734253,grad_norm: 0.7695103805370187, iteration: 388682
loss: 0.9810314178466797,grad_norm: 0.8813098041335943, iteration: 388683
loss: 0.9889867901802063,grad_norm: 0.7030405385273352, iteration: 388684
loss: 1.0227690935134888,grad_norm: 0.9999996626648839, iteration: 388685
loss: 1.0748732089996338,grad_norm: 0.9481086689811914, iteration: 388686
loss: 1.0112931728363037,grad_norm: 0.9719320334107832, iteration: 388687
loss: 1.0332413911819458,grad_norm: 0.999999354141531, iteration: 388688
loss: 0.9966807961463928,grad_norm: 0.693807575215004, iteration: 388689
loss: 1.05049467086792,grad_norm: 0.9999992627123298, iteration: 388690
loss: 1.0214964151382446,grad_norm: 0.9999990718311814, iteration: 388691
loss: 1.0388119220733643,grad_norm: 0.9698562592404993, iteration: 388692
loss: 0.9547540545463562,grad_norm: 0.752195356276652, iteration: 388693
loss: 1.0296584367752075,grad_norm: 0.8161389440817748, iteration: 388694
loss: 0.9698145389556885,grad_norm: 0.8312288745448134, iteration: 388695
loss: 1.0641398429870605,grad_norm: 0.9999991868491767, iteration: 388696
loss: 1.0175459384918213,grad_norm: 0.871724685801806, iteration: 388697
loss: 1.0226223468780518,grad_norm: 0.9999996551587276, iteration: 388698
loss: 1.0063858032226562,grad_norm: 0.6865516074268212, iteration: 388699
loss: 0.9662231802940369,grad_norm: 0.9340782547303977, iteration: 388700
loss: 1.000192403793335,grad_norm: 0.6351931890854633, iteration: 388701
loss: 1.0393983125686646,grad_norm: 0.8460688396705518, iteration: 388702
loss: 1.002895474433899,grad_norm: 0.8669249913073275, iteration: 388703
loss: 1.0068150758743286,grad_norm: 0.7507890163812553, iteration: 388704
loss: 1.0026382207870483,grad_norm: 0.999999118859451, iteration: 388705
loss: 1.0413203239440918,grad_norm: 0.7241114761535766, iteration: 388706
loss: 1.0593163967132568,grad_norm: 0.8079112525305069, iteration: 388707
loss: 1.248293161392212,grad_norm: 0.9999993424196856, iteration: 388708
loss: 1.0297741889953613,grad_norm: 0.9190463670595951, iteration: 388709
loss: 1.0093859434127808,grad_norm: 0.652077195569443, iteration: 388710
loss: 1.0499627590179443,grad_norm: 0.8070230147153399, iteration: 388711
loss: 0.9819251894950867,grad_norm: 0.9667910154145134, iteration: 388712
loss: 1.040826678276062,grad_norm: 0.7730227795126656, iteration: 388713
loss: 1.0447461605072021,grad_norm: 0.9999991149796256, iteration: 388714
loss: 1.027962565422058,grad_norm: 0.9999993737752969, iteration: 388715
loss: 0.9961419701576233,grad_norm: 0.7097268025879472, iteration: 388716
loss: 1.0211565494537354,grad_norm: 0.7331917758494978, iteration: 388717
loss: 1.0009106397628784,grad_norm: 0.7066679745184424, iteration: 388718
loss: 0.9581719040870667,grad_norm: 0.696492550730531, iteration: 388719
loss: 1.0658085346221924,grad_norm: 0.8308820283506857, iteration: 388720
loss: 1.0040867328643799,grad_norm: 0.6719483639562753, iteration: 388721
loss: 1.0107680559158325,grad_norm: 0.8100908025781497, iteration: 388722
loss: 1.0169677734375,grad_norm: 0.7430206649737188, iteration: 388723
loss: 1.0006341934204102,grad_norm: 0.8112723129005354, iteration: 388724
loss: 1.0141335725784302,grad_norm: 0.6786464018016732, iteration: 388725
loss: 0.9751380085945129,grad_norm: 0.8827125468716901, iteration: 388726
loss: 0.9929375052452087,grad_norm: 0.8605322356981286, iteration: 388727
loss: 1.0316063165664673,grad_norm: 0.7418670224049169, iteration: 388728
loss: 0.9528428912162781,grad_norm: 0.741912178214176, iteration: 388729
loss: 0.9856290817260742,grad_norm: 0.9217187692775197, iteration: 388730
loss: 1.0193169116973877,grad_norm: 0.8473403131661419, iteration: 388731
loss: 0.9893743991851807,grad_norm: 0.8250282015051256, iteration: 388732
loss: 1.0162655115127563,grad_norm: 0.8240561167259807, iteration: 388733
loss: 0.9934317469596863,grad_norm: 0.8161443804811035, iteration: 388734
loss: 0.9733611941337585,grad_norm: 0.7037637078384015, iteration: 388735
loss: 1.0038018226623535,grad_norm: 0.756427623000571, iteration: 388736
loss: 0.9747499823570251,grad_norm: 0.7550161411066982, iteration: 388737
loss: 1.0278407335281372,grad_norm: 0.8026613070774723, iteration: 388738
loss: 0.9884332418441772,grad_norm: 0.8245596959097912, iteration: 388739
loss: 0.9685813784599304,grad_norm: 0.883853650638001, iteration: 388740
loss: 1.0579261779785156,grad_norm: 0.8100144714895728, iteration: 388741
loss: 1.0045465230941772,grad_norm: 0.9526645170808988, iteration: 388742
loss: 1.0062123537063599,grad_norm: 0.8200056992355087, iteration: 388743
loss: 1.0273942947387695,grad_norm: 0.9492453212005251, iteration: 388744
loss: 0.9964029788970947,grad_norm: 0.7435360575407773, iteration: 388745
loss: 0.9869703054428101,grad_norm: 0.8109437337424975, iteration: 388746
loss: 1.0194675922393799,grad_norm: 0.7999357262541964, iteration: 388747
loss: 1.0099546909332275,grad_norm: 0.8371400527035218, iteration: 388748
loss: 1.0094488859176636,grad_norm: 0.9999998693453751, iteration: 388749
loss: 0.9967805743217468,grad_norm: 0.7675687277398535, iteration: 388750
loss: 0.9863205552101135,grad_norm: 0.7437073308229583, iteration: 388751
loss: 0.9903059601783752,grad_norm: 0.8276970072795096, iteration: 388752
loss: 1.0156464576721191,grad_norm: 0.9999997253614237, iteration: 388753
loss: 0.9853959679603577,grad_norm: 0.7358024806268119, iteration: 388754
loss: 1.0017414093017578,grad_norm: 0.8287016735750319, iteration: 388755
loss: 1.0141268968582153,grad_norm: 0.8005087486826818, iteration: 388756
loss: 0.9698920249938965,grad_norm: 0.732738854770715, iteration: 388757
loss: 0.9940321445465088,grad_norm: 0.723676282554115, iteration: 388758
loss: 1.0243054628372192,grad_norm: 0.7160529794932302, iteration: 388759
loss: 1.0145169496536255,grad_norm: 0.5804327564270988, iteration: 388760
loss: 0.9765557646751404,grad_norm: 0.9051893586328698, iteration: 388761
loss: 1.0291738510131836,grad_norm: 0.9497033200934767, iteration: 388762
loss: 0.9929928183555603,grad_norm: 0.8138745594407492, iteration: 388763
loss: 0.9868053197860718,grad_norm: 0.8378996001843529, iteration: 388764
loss: 1.0201385021209717,grad_norm: 0.8098069705912144, iteration: 388765
loss: 1.0284017324447632,grad_norm: 0.9999991789905232, iteration: 388766
loss: 0.9975699186325073,grad_norm: 0.6724936452225783, iteration: 388767
loss: 1.0108516216278076,grad_norm: 0.6614805185222541, iteration: 388768
loss: 1.014552354812622,grad_norm: 0.91515022622471, iteration: 388769
loss: 1.001367449760437,grad_norm: 0.7556279670678965, iteration: 388770
loss: 1.0053197145462036,grad_norm: 0.8477538787629291, iteration: 388771
loss: 1.0076645612716675,grad_norm: 0.9116976605250994, iteration: 388772
loss: 1.0456780195236206,grad_norm: 0.9999998617807022, iteration: 388773
loss: 1.015221357345581,grad_norm: 0.7001773585316482, iteration: 388774
loss: 1.1587202548980713,grad_norm: 0.999999293246228, iteration: 388775
loss: 1.0314496755599976,grad_norm: 0.9999998659777849, iteration: 388776
loss: 1.0010323524475098,grad_norm: 0.7833667589407256, iteration: 388777
loss: 0.9605867862701416,grad_norm: 0.9999991872955387, iteration: 388778
loss: 1.004947543144226,grad_norm: 0.999999868784095, iteration: 388779
loss: 1.0235739946365356,grad_norm: 0.8394598816397626, iteration: 388780
loss: 1.0231274366378784,grad_norm: 0.7440920216995143, iteration: 388781
loss: 1.0010014772415161,grad_norm: 0.677962078327997, iteration: 388782
loss: 1.0547654628753662,grad_norm: 0.9999989382485825, iteration: 388783
loss: 0.9849626421928406,grad_norm: 0.8503600347003241, iteration: 388784
loss: 1.016261339187622,grad_norm: 0.8266325942422618, iteration: 388785
loss: 0.982255756855011,grad_norm: 0.8618209637298325, iteration: 388786
loss: 0.9939727783203125,grad_norm: 0.7166491322674742, iteration: 388787
loss: 0.9822177290916443,grad_norm: 0.8073816370763492, iteration: 388788
loss: 0.9768546223640442,grad_norm: 0.8395863996200007, iteration: 388789
loss: 0.9849229454994202,grad_norm: 0.9093882214996583, iteration: 388790
loss: 0.975624680519104,grad_norm: 0.859764694570644, iteration: 388791
loss: 1.0014480352401733,grad_norm: 0.8630414613538802, iteration: 388792
loss: 0.9803867340087891,grad_norm: 0.7255012997700826, iteration: 388793
loss: 1.0139774084091187,grad_norm: 0.7655487060270736, iteration: 388794
loss: 0.9930113554000854,grad_norm: 0.761297238243878, iteration: 388795
loss: 1.0180662870407104,grad_norm: 0.8845581735616532, iteration: 388796
loss: 0.9841480851173401,grad_norm: 0.7482257813429353, iteration: 388797
loss: 0.9880638718605042,grad_norm: 0.6892951636141942, iteration: 388798
loss: 0.9597641825675964,grad_norm: 0.7284822423848444, iteration: 388799
loss: 1.0136208534240723,grad_norm: 0.8416849053049436, iteration: 388800
loss: 1.0016146898269653,grad_norm: 0.8507288777644035, iteration: 388801
loss: 0.996128261089325,grad_norm: 0.7862278524049188, iteration: 388802
loss: 1.0121822357177734,grad_norm: 0.9193917723529573, iteration: 388803
loss: 1.0035400390625,grad_norm: 0.9243755167100243, iteration: 388804
loss: 1.0434802770614624,grad_norm: 0.999999496733686, iteration: 388805
loss: 0.9834985136985779,grad_norm: 0.7237109236762237, iteration: 388806
loss: 0.9862373471260071,grad_norm: 0.634326837758296, iteration: 388807
loss: 1.0150010585784912,grad_norm: 0.8120982977142616, iteration: 388808
loss: 0.9989420771598816,grad_norm: 0.8158496733097454, iteration: 388809
loss: 1.009210467338562,grad_norm: 0.8162216628296356, iteration: 388810
loss: 1.0387336015701294,grad_norm: 0.9649028763894775, iteration: 388811
loss: 0.9825828671455383,grad_norm: 0.8226844659992251, iteration: 388812
loss: 0.9987474679946899,grad_norm: 0.7416732712319082, iteration: 388813
loss: 1.0057564973831177,grad_norm: 0.7593990445550156, iteration: 388814
loss: 0.9800308346748352,grad_norm: 0.7063976179882437, iteration: 388815
loss: 0.9819723963737488,grad_norm: 0.7110183637368974, iteration: 388816
loss: 1.0081449747085571,grad_norm: 0.7950592959100142, iteration: 388817
loss: 0.999895453453064,grad_norm: 0.8821079248302902, iteration: 388818
loss: 0.9731877446174622,grad_norm: 0.6854133093375564, iteration: 388819
loss: 1.0005756616592407,grad_norm: 0.7926281676011959, iteration: 388820
loss: 0.9911994338035583,grad_norm: 0.6930464931806589, iteration: 388821
loss: 1.0196733474731445,grad_norm: 0.9567002735441348, iteration: 388822
loss: 1.053357481956482,grad_norm: 0.8150382438477654, iteration: 388823
loss: 1.0174514055252075,grad_norm: 0.9999991141173604, iteration: 388824
loss: 0.978322446346283,grad_norm: 0.7745538555786888, iteration: 388825
loss: 1.0428321361541748,grad_norm: 0.7631358522280871, iteration: 388826
loss: 1.0986138582229614,grad_norm: 0.9999999206902832, iteration: 388827
loss: 1.0276929140090942,grad_norm: 0.8202462957377583, iteration: 388828
loss: 1.0063905715942383,grad_norm: 0.7422275757983778, iteration: 388829
loss: 0.9781672358512878,grad_norm: 0.6966479166019197, iteration: 388830
loss: 1.0149788856506348,grad_norm: 0.7800207299031969, iteration: 388831
loss: 0.9901397228240967,grad_norm: 0.7137216400877103, iteration: 388832
loss: 0.9856155514717102,grad_norm: 0.7151011734776263, iteration: 388833
loss: 1.0330336093902588,grad_norm: 0.7688416842712386, iteration: 388834
loss: 1.0396977663040161,grad_norm: 0.6582285639834582, iteration: 388835
loss: 1.0329644680023193,grad_norm: 0.7965222929616719, iteration: 388836
loss: 0.9829679727554321,grad_norm: 0.7688772575061384, iteration: 388837
loss: 0.9818572998046875,grad_norm: 0.7800544383947222, iteration: 388838
loss: 0.9862903356552124,grad_norm: 0.7153736222626691, iteration: 388839
loss: 1.0071663856506348,grad_norm: 0.8102530888854503, iteration: 388840
loss: 0.9587818384170532,grad_norm: 0.7094782831454386, iteration: 388841
loss: 0.9902505278587341,grad_norm: 0.6264149036323695, iteration: 388842
loss: 1.004082202911377,grad_norm: 0.7622501754524524, iteration: 388843
loss: 1.0043679475784302,grad_norm: 0.7424228491957934, iteration: 388844
loss: 1.0194835662841797,grad_norm: 0.8335423281977219, iteration: 388845
loss: 0.9568599462509155,grad_norm: 0.6656693777190088, iteration: 388846
loss: 1.032633662223816,grad_norm: 0.7128502187808735, iteration: 388847
loss: 0.9905148148536682,grad_norm: 0.5468156407910191, iteration: 388848
loss: 0.9993644952774048,grad_norm: 0.9144659707321481, iteration: 388849
loss: 1.0062962770462036,grad_norm: 0.9095609516523931, iteration: 388850
loss: 0.9881669878959656,grad_norm: 0.7594078514755833, iteration: 388851
loss: 0.9729700684547424,grad_norm: 0.9999994649947762, iteration: 388852
loss: 1.0047252178192139,grad_norm: 0.689619578478084, iteration: 388853
loss: 0.9994299411773682,grad_norm: 0.7485507133836251, iteration: 388854
loss: 1.020326852798462,grad_norm: 0.8613758866417134, iteration: 388855
loss: 0.9819121360778809,grad_norm: 0.8250412567613493, iteration: 388856
loss: 1.0247511863708496,grad_norm: 0.6772781421099792, iteration: 388857
loss: 1.0060150623321533,grad_norm: 0.7785106305638988, iteration: 388858
loss: 0.9744085073471069,grad_norm: 0.789512958501991, iteration: 388859
loss: 0.963301956653595,grad_norm: 0.8720071181780603, iteration: 388860
loss: 0.98313969373703,grad_norm: 0.7791881122697984, iteration: 388861
loss: 0.9676860570907593,grad_norm: 0.7584360490096591, iteration: 388862
loss: 0.9748594760894775,grad_norm: 0.8379192561638119, iteration: 388863
loss: 0.9812087416648865,grad_norm: 0.7892346136863677, iteration: 388864
loss: 1.0089102983474731,grad_norm: 0.8490720952187619, iteration: 388865
loss: 0.9789314866065979,grad_norm: 0.6770935883176794, iteration: 388866
loss: 0.9731133580207825,grad_norm: 0.7136568324002887, iteration: 388867
loss: 0.991364598274231,grad_norm: 0.6707330425744139, iteration: 388868
loss: 1.0044440031051636,grad_norm: 0.7188693521317624, iteration: 388869
loss: 0.9826355576515198,grad_norm: 0.795198257327845, iteration: 388870
loss: 1.0447845458984375,grad_norm: 0.9999990565489973, iteration: 388871
loss: 0.9646279215812683,grad_norm: 0.6673710811269167, iteration: 388872
loss: 0.9983765482902527,grad_norm: 0.9332519751353098, iteration: 388873
loss: 0.9651294946670532,grad_norm: 0.868871464302717, iteration: 388874
loss: 0.9996908903121948,grad_norm: 0.8049345667376754, iteration: 388875
loss: 0.9977004528045654,grad_norm: 0.7253463863639474, iteration: 388876
loss: 0.9740911722183228,grad_norm: 0.6974363017384261, iteration: 388877
loss: 0.997189998626709,grad_norm: 0.7216547027972019, iteration: 388878
loss: 1.0422179698944092,grad_norm: 0.7295419731484705, iteration: 388879
loss: 1.1039934158325195,grad_norm: 0.9999998847982186, iteration: 388880
loss: 1.0323677062988281,grad_norm: 0.7987737849896644, iteration: 388881
loss: 1.007412314414978,grad_norm: 0.7449847503123308, iteration: 388882
loss: 1.0533149242401123,grad_norm: 0.8566736677359462, iteration: 388883
loss: 0.9422944188117981,grad_norm: 0.9113625518726501, iteration: 388884
loss: 1.1880908012390137,grad_norm: 0.9999998916236836, iteration: 388885
loss: 0.9619193077087402,grad_norm: 0.6936962703708545, iteration: 388886
loss: 1.3952336311340332,grad_norm: 0.9999995225647682, iteration: 388887
loss: 0.9897199273109436,grad_norm: 0.7509651968260691, iteration: 388888
loss: 0.989393949508667,grad_norm: 0.8355027178676209, iteration: 388889
loss: 0.9661807417869568,grad_norm: 0.818032439816487, iteration: 388890
loss: 0.9825711250305176,grad_norm: 0.8414777404714909, iteration: 388891
loss: 1.0411088466644287,grad_norm: 0.990001357047903, iteration: 388892
loss: 1.0717300176620483,grad_norm: 0.9467814155900428, iteration: 388893
loss: 0.9986991882324219,grad_norm: 0.8932769522724375, iteration: 388894
loss: 0.9993873834609985,grad_norm: 0.9533088610524917, iteration: 388895
loss: 0.9762172698974609,grad_norm: 0.8015815287719285, iteration: 388896
loss: 1.0040539503097534,grad_norm: 0.7313523130118994, iteration: 388897
loss: 1.0186188220977783,grad_norm: 0.9339039092765113, iteration: 388898
loss: 1.0297749042510986,grad_norm: 0.8310734699881077, iteration: 388899
loss: 1.1209006309509277,grad_norm: 0.9999994116914832, iteration: 388900
loss: 1.0769952535629272,grad_norm: 0.9999994039444863, iteration: 388901
loss: 0.973690390586853,grad_norm: 0.8796396449996968, iteration: 388902
loss: 1.0024049282073975,grad_norm: 0.8192680722779958, iteration: 388903
loss: 0.9890580177307129,grad_norm: 0.8430700155665661, iteration: 388904
loss: 1.0463669300079346,grad_norm: 0.7847424108867755, iteration: 388905
loss: 1.0068241357803345,grad_norm: 0.8072133740580396, iteration: 388906
loss: 1.0950103998184204,grad_norm: 0.9999996787779674, iteration: 388907
loss: 1.0127993822097778,grad_norm: 0.7462083431753501, iteration: 388908
loss: 1.0466824769973755,grad_norm: 0.8143636810254375, iteration: 388909
loss: 0.9919872879981995,grad_norm: 0.861782100960199, iteration: 388910
loss: 1.0360387563705444,grad_norm: 0.7737913817675175, iteration: 388911
loss: 1.144309163093567,grad_norm: 0.9097505915044749, iteration: 388912
loss: 1.0015283823013306,grad_norm: 0.8247186116565094, iteration: 388913
loss: 0.9886136651039124,grad_norm: 0.8217391011432045, iteration: 388914
loss: 1.0278599262237549,grad_norm: 0.893000105815986, iteration: 388915
loss: 1.0825958251953125,grad_norm: 0.8572969625681621, iteration: 388916
loss: 1.0535510778427124,grad_norm: 0.7882679584851826, iteration: 388917
loss: 0.9966416954994202,grad_norm: 0.9181317361250787, iteration: 388918
loss: 0.9843471646308899,grad_norm: 0.8677326975523535, iteration: 388919
loss: 1.0214747190475464,grad_norm: 0.8103483343183479, iteration: 388920
loss: 0.9619767665863037,grad_norm: 0.7522255189217217, iteration: 388921
loss: 1.0324984788894653,grad_norm: 0.9244440594313602, iteration: 388922
loss: 1.0032120943069458,grad_norm: 0.6997720579411709, iteration: 388923
loss: 0.9859398603439331,grad_norm: 0.731143850350569, iteration: 388924
loss: 0.9731088280677795,grad_norm: 0.7029642357821736, iteration: 388925
loss: 1.000327706336975,grad_norm: 0.8212747395898322, iteration: 388926
loss: 0.970097005367279,grad_norm: 0.7038110684753656, iteration: 388927
loss: 0.9903624057769775,grad_norm: 0.7548935081681085, iteration: 388928
loss: 1.0228267908096313,grad_norm: 0.9999990969192001, iteration: 388929
loss: 1.0002272129058838,grad_norm: 0.9840201506095694, iteration: 388930
loss: 1.0334315299987793,grad_norm: 0.999999273310727, iteration: 388931
loss: 1.0066144466400146,grad_norm: 0.7521400654770406, iteration: 388932
loss: 1.0277018547058105,grad_norm: 0.8224626532776869, iteration: 388933
loss: 0.9705076217651367,grad_norm: 0.9999991559786906, iteration: 388934
loss: 0.958827555179596,grad_norm: 0.9717297470481348, iteration: 388935
loss: 1.0456011295318604,grad_norm: 0.9814260601977718, iteration: 388936
loss: 0.9906626343727112,grad_norm: 0.8112818504955362, iteration: 388937
loss: 0.9785051345825195,grad_norm: 0.8660904616086956, iteration: 388938
loss: 0.9801695346832275,grad_norm: 0.7464784003192769, iteration: 388939
loss: 1.011751651763916,grad_norm: 0.985052972200351, iteration: 388940
loss: 1.0002435445785522,grad_norm: 0.7097685722595662, iteration: 388941
loss: 0.9714630246162415,grad_norm: 0.8374267748549042, iteration: 388942
loss: 0.99051433801651,grad_norm: 0.7678741588514701, iteration: 388943
loss: 0.9639041423797607,grad_norm: 0.6716645348293755, iteration: 388944
loss: 0.9447829723358154,grad_norm: 0.7607484903001652, iteration: 388945
loss: 1.058592677116394,grad_norm: 0.9999998159373791, iteration: 388946
loss: 1.0210778713226318,grad_norm: 0.7421222924332428, iteration: 388947
loss: 1.122644305229187,grad_norm: 0.999999672270562, iteration: 388948
loss: 1.0048850774765015,grad_norm: 0.7331928737116592, iteration: 388949
loss: 1.0143357515335083,grad_norm: 0.880503978868447, iteration: 388950
loss: 0.9580074548721313,grad_norm: 0.786724680782286, iteration: 388951
loss: 1.022666573524475,grad_norm: 0.9407113196116027, iteration: 388952
loss: 1.0114516019821167,grad_norm: 0.7564113564705323, iteration: 388953
loss: 0.9866809844970703,grad_norm: 0.8511320664690374, iteration: 388954
loss: 1.0130255222320557,grad_norm: 0.6614415915403503, iteration: 388955
loss: 0.9806174635887146,grad_norm: 0.7529394937366981, iteration: 388956
loss: 1.007470726966858,grad_norm: 0.874365473418187, iteration: 388957
loss: 1.0445448160171509,grad_norm: 0.8424291208565411, iteration: 388958
loss: 0.981613039970398,grad_norm: 0.8226996310265905, iteration: 388959
loss: 1.007973551750183,grad_norm: 0.8986910394092106, iteration: 388960
loss: 1.0000382661819458,grad_norm: 0.7329969367683145, iteration: 388961
loss: 0.9691483974456787,grad_norm: 0.7383919452305001, iteration: 388962
loss: 0.97893226146698,grad_norm: 0.7864778956983616, iteration: 388963
loss: 1.0222818851470947,grad_norm: 0.8414876490949272, iteration: 388964
loss: 0.9917334914207458,grad_norm: 0.6428923138352015, iteration: 388965
loss: 0.9932208061218262,grad_norm: 0.8231278177079374, iteration: 388966
loss: 1.0035115480422974,grad_norm: 0.7197470375780424, iteration: 388967
loss: 0.9957705140113831,grad_norm: 0.655239040978319, iteration: 388968
loss: 0.9875653982162476,grad_norm: 0.9816448927012758, iteration: 388969
loss: 0.9664496779441833,grad_norm: 0.799605487471081, iteration: 388970
loss: 0.9779229760169983,grad_norm: 0.6804944236861056, iteration: 388971
loss: 1.0902996063232422,grad_norm: 0.8960485332440595, iteration: 388972
loss: 1.0375665426254272,grad_norm: 0.691465339994165, iteration: 388973
loss: 1.003804087638855,grad_norm: 0.6819298286849546, iteration: 388974
loss: 1.024504542350769,grad_norm: 0.8249601681302167, iteration: 388975
loss: 0.9851133227348328,grad_norm: 0.7560391976429692, iteration: 388976
loss: 1.0014533996582031,grad_norm: 0.791168245331669, iteration: 388977
loss: 1.0128953456878662,grad_norm: 0.9999990296457267, iteration: 388978
loss: 1.020743489265442,grad_norm: 0.8209624168944158, iteration: 388979
loss: 0.9889414310455322,grad_norm: 0.9494974075673274, iteration: 388980
loss: 1.042105793952942,grad_norm: 0.9999993448040627, iteration: 388981
loss: 1.0096131563186646,grad_norm: 0.7979325223679417, iteration: 388982
loss: 0.9952072501182556,grad_norm: 0.9999999639090845, iteration: 388983
loss: 1.0260002613067627,grad_norm: 0.9999997223966719, iteration: 388984
loss: 0.9876570105552673,grad_norm: 0.7977680744885924, iteration: 388985
loss: 1.034399151802063,grad_norm: 0.8103633852437842, iteration: 388986
loss: 0.9886438846588135,grad_norm: 0.8177338431194174, iteration: 388987
loss: 1.0050442218780518,grad_norm: 0.8089937034914106, iteration: 388988
loss: 1.0518325567245483,grad_norm: 0.999999260681549, iteration: 388989
loss: 0.948057234287262,grad_norm: 0.9532472825521375, iteration: 388990
loss: 1.0053980350494385,grad_norm: 0.8356324710935268, iteration: 388991
loss: 1.0169873237609863,grad_norm: 0.9999999306058759, iteration: 388992
loss: 0.9837607741355896,grad_norm: 0.9999993834973624, iteration: 388993
loss: 1.0263574123382568,grad_norm: 0.7441515313441583, iteration: 388994
loss: 1.0141102075576782,grad_norm: 0.9999993354269215, iteration: 388995
loss: 0.9949188828468323,grad_norm: 0.9999997368758606, iteration: 388996
loss: 1.0380498170852661,grad_norm: 0.9999998216077665, iteration: 388997
loss: 1.0020672082901,grad_norm: 0.6940590602070432, iteration: 388998
loss: 1.0008933544158936,grad_norm: 0.7217528622430336, iteration: 388999
loss: 1.0049763917922974,grad_norm: 0.6573200998038421, iteration: 389000
loss: 1.0203487873077393,grad_norm: 0.7579539456617455, iteration: 389001
loss: 1.0580507516860962,grad_norm: 0.7174829547358126, iteration: 389002
loss: 1.0077154636383057,grad_norm: 0.7680482236754842, iteration: 389003
loss: 1.0077892541885376,grad_norm: 0.7394520438718631, iteration: 389004
loss: 1.0226362943649292,grad_norm: 0.8611003752892954, iteration: 389005
loss: 0.9845815300941467,grad_norm: 0.8587661709571139, iteration: 389006
loss: 0.9778132438659668,grad_norm: 0.6689218580308768, iteration: 389007
loss: 0.9852434992790222,grad_norm: 0.7380995268360219, iteration: 389008
loss: 0.9958826899528503,grad_norm: 0.7225803703855257, iteration: 389009
loss: 0.9722587466239929,grad_norm: 0.7793790114541719, iteration: 389010
loss: 1.00182044506073,grad_norm: 0.9999989906774651, iteration: 389011
loss: 0.998322606086731,grad_norm: 0.9379307098636206, iteration: 389012
loss: 0.9778874516487122,grad_norm: 0.9999990161673924, iteration: 389013
loss: 0.9970642328262329,grad_norm: 0.9999999810653417, iteration: 389014
loss: 1.0492119789123535,grad_norm: 0.9999993576547395, iteration: 389015
loss: 0.9809855818748474,grad_norm: 0.7339573335271776, iteration: 389016
loss: 1.0240285396575928,grad_norm: 0.9049532445824047, iteration: 389017
loss: 1.0046581029891968,grad_norm: 0.9448933505162583, iteration: 389018
loss: 1.0565000772476196,grad_norm: 0.9999993945804713, iteration: 389019
loss: 0.9970798492431641,grad_norm: 0.9370240951609543, iteration: 389020
loss: 0.9605622291564941,grad_norm: 0.786920407364931, iteration: 389021
loss: 1.0242420434951782,grad_norm: 0.7726686067010068, iteration: 389022
loss: 0.9705058336257935,grad_norm: 0.7245545929829089, iteration: 389023
loss: 1.026422142982483,grad_norm: 0.9773990537010321, iteration: 389024
loss: 1.0130337476730347,grad_norm: 0.8995484931940914, iteration: 389025
loss: 0.9994893670082092,grad_norm: 0.769981586878311, iteration: 389026
loss: 1.035519003868103,grad_norm: 0.71099523233842, iteration: 389027
loss: 1.0643939971923828,grad_norm: 0.8825602879379824, iteration: 389028
loss: 1.0909221172332764,grad_norm: 0.7902505531150781, iteration: 389029
loss: 0.9553111791610718,grad_norm: 0.7542321007321385, iteration: 389030
loss: 0.9775090217590332,grad_norm: 0.8236151596403481, iteration: 389031
loss: 1.0803226232528687,grad_norm: 0.8595834551881004, iteration: 389032
loss: 0.981459379196167,grad_norm: 0.8184214604399704, iteration: 389033
loss: 0.94484943151474,grad_norm: 0.66295030825007, iteration: 389034
loss: 1.054616093635559,grad_norm: 0.9999996835553793, iteration: 389035
loss: 1.0639853477478027,grad_norm: 0.9999991492561245, iteration: 389036
loss: 0.9768756628036499,grad_norm: 0.696296768321846, iteration: 389037
loss: 1.0165534019470215,grad_norm: 0.8116882124340353, iteration: 389038
loss: 0.9990025758743286,grad_norm: 0.6578707116205428, iteration: 389039
loss: 1.0063767433166504,grad_norm: 0.9687287265415396, iteration: 389040
loss: 1.0776294469833374,grad_norm: 0.9999997871888214, iteration: 389041
loss: 0.9663148522377014,grad_norm: 0.6468203923740117, iteration: 389042
loss: 0.9974872469902039,grad_norm: 0.8727406911676606, iteration: 389043
loss: 1.041176199913025,grad_norm: 0.7348398154886693, iteration: 389044
loss: 1.04103684425354,grad_norm: 0.8334057057043418, iteration: 389045
loss: 1.0951951742172241,grad_norm: 0.9999989681821638, iteration: 389046
loss: 1.047753930091858,grad_norm: 0.9721040927113925, iteration: 389047
loss: 1.0269190073013306,grad_norm: 0.7996979336183343, iteration: 389048
loss: 1.0007340908050537,grad_norm: 0.8650271049284872, iteration: 389049
loss: 0.9924383163452148,grad_norm: 0.818810802512202, iteration: 389050
loss: 1.0060606002807617,grad_norm: 0.8045424978485654, iteration: 389051
loss: 0.9945119023323059,grad_norm: 0.7497008260874715, iteration: 389052
loss: 1.0003715753555298,grad_norm: 0.7601381426014056, iteration: 389053
loss: 1.0180306434631348,grad_norm: 0.9999992254869057, iteration: 389054
loss: 0.9912903308868408,grad_norm: 0.9999994503582172, iteration: 389055
loss: 0.9895362257957458,grad_norm: 0.7277611679658982, iteration: 389056
loss: 1.0356543064117432,grad_norm: 0.9334740858248093, iteration: 389057
loss: 0.9689974188804626,grad_norm: 0.769082928786583, iteration: 389058
loss: 0.9693150520324707,grad_norm: 0.7840759127412464, iteration: 389059
loss: 1.0041028261184692,grad_norm: 0.8977370290865307, iteration: 389060
loss: 1.1142922639846802,grad_norm: 0.9999995434352494, iteration: 389061
loss: 0.9794462323188782,grad_norm: 0.7220518461240433, iteration: 389062
loss: 1.009461522102356,grad_norm: 0.7787732616101386, iteration: 389063
loss: 1.0136064291000366,grad_norm: 0.9561915994085407, iteration: 389064
loss: 1.0690101385116577,grad_norm: 0.8852260341689546, iteration: 389065
loss: 0.962588369846344,grad_norm: 0.999998942855747, iteration: 389066
loss: 1.0473369359970093,grad_norm: 0.9999994990397449, iteration: 389067
loss: 1.0036561489105225,grad_norm: 0.663238296688158, iteration: 389068
loss: 0.993366539478302,grad_norm: 0.6917826624104273, iteration: 389069
loss: 0.979022741317749,grad_norm: 0.8293750217330285, iteration: 389070
loss: 1.0089685916900635,grad_norm: 0.8819497978498427, iteration: 389071
loss: 1.0023678541183472,grad_norm: 0.872578680111442, iteration: 389072
loss: 1.0067209005355835,grad_norm: 0.6718478956349668, iteration: 389073
loss: 1.054764986038208,grad_norm: 0.9999993239903434, iteration: 389074
loss: 0.9523971080780029,grad_norm: 0.8476038987040936, iteration: 389075
loss: 0.9777649641036987,grad_norm: 0.8723099576089585, iteration: 389076
loss: 0.9759267568588257,grad_norm: 0.9959981309598321, iteration: 389077
loss: 1.24723482131958,grad_norm: 1.0000000417745345, iteration: 389078
loss: 0.9323990345001221,grad_norm: 0.8337279221490147, iteration: 389079
loss: 0.9711276292800903,grad_norm: 0.6441685877323793, iteration: 389080
loss: 0.9839181303977966,grad_norm: 0.858191733377347, iteration: 389081
loss: 0.9681219458580017,grad_norm: 0.7599603584343825, iteration: 389082
loss: 0.960265576839447,grad_norm: 0.8195560039612697, iteration: 389083
loss: 1.0526877641677856,grad_norm: 0.9999996438311503, iteration: 389084
loss: 1.0469611883163452,grad_norm: 0.999999536818, iteration: 389085
loss: 1.0722016096115112,grad_norm: 0.999999513567254, iteration: 389086
loss: 1.0743722915649414,grad_norm: 0.7029845188437015, iteration: 389087
loss: 0.9901273250579834,grad_norm: 0.8208133023972968, iteration: 389088
loss: 1.0953999757766724,grad_norm: 0.9999992163404678, iteration: 389089
loss: 1.0891591310501099,grad_norm: 0.999999843943342, iteration: 389090
loss: 1.0368953943252563,grad_norm: 0.7727758760809449, iteration: 389091
loss: 0.9867240190505981,grad_norm: 0.9999992351070429, iteration: 389092
loss: 1.0093077421188354,grad_norm: 0.8167745807762707, iteration: 389093
loss: 1.0060267448425293,grad_norm: 0.7532382552257924, iteration: 389094
loss: 1.014302372932434,grad_norm: 0.9999991762802534, iteration: 389095
loss: 1.007042646408081,grad_norm: 0.8753951392892303, iteration: 389096
loss: 0.9924022555351257,grad_norm: 0.9001850946734636, iteration: 389097
loss: 1.0176961421966553,grad_norm: 0.6326878895645435, iteration: 389098
loss: 1.0153367519378662,grad_norm: 0.7658831045785653, iteration: 389099
loss: 0.9728199243545532,grad_norm: 0.7562931229776559, iteration: 389100
loss: 1.0152798891067505,grad_norm: 0.7241368799143306, iteration: 389101
loss: 0.9898810386657715,grad_norm: 0.966942528864041, iteration: 389102
loss: 1.0003331899642944,grad_norm: 0.9999990093790779, iteration: 389103
loss: 0.9977301955223083,grad_norm: 0.8098553280269513, iteration: 389104
loss: 1.0135445594787598,grad_norm: 0.7206112703629702, iteration: 389105
loss: 0.9902654886245728,grad_norm: 0.7463370709580907, iteration: 389106
loss: 1.0569641590118408,grad_norm: 0.9999991428431455, iteration: 389107
loss: 1.0102671384811401,grad_norm: 0.8081980318804597, iteration: 389108
loss: 1.036725401878357,grad_norm: 0.9999991379844446, iteration: 389109
loss: 1.015811562538147,grad_norm: 0.7516522522923371, iteration: 389110
loss: 1.002102255821228,grad_norm: 0.7745315275358972, iteration: 389111
loss: 1.0294402837753296,grad_norm: 0.999999180066975, iteration: 389112
loss: 1.0508276224136353,grad_norm: 0.9999992287986716, iteration: 389113
loss: 0.9618780612945557,grad_norm: 0.7464082051572717, iteration: 389114
loss: 0.9992658495903015,grad_norm: 0.7726735286262697, iteration: 389115
loss: 1.008725881576538,grad_norm: 0.6958957939344863, iteration: 389116
loss: 0.9804335236549377,grad_norm: 0.7732268430782635, iteration: 389117
loss: 1.0109922885894775,grad_norm: 0.7502006675974443, iteration: 389118
loss: 0.9943177103996277,grad_norm: 0.904026793806924, iteration: 389119
loss: 1.0161858797073364,grad_norm: 0.957836213325658, iteration: 389120
loss: 0.9891111254692078,grad_norm: 0.7917051498259439, iteration: 389121
loss: 1.0401482582092285,grad_norm: 0.999999778027032, iteration: 389122
loss: 0.9985731244087219,grad_norm: 0.725287401883627, iteration: 389123
loss: 1.0023256540298462,grad_norm: 0.811494281430321, iteration: 389124
loss: 1.021492600440979,grad_norm: 0.7549975938463828, iteration: 389125
loss: 1.0005269050598145,grad_norm: 0.7457734636492926, iteration: 389126
loss: 1.0007576942443848,grad_norm: 0.8472818776312302, iteration: 389127
loss: 0.9907087087631226,grad_norm: 0.9999996466551418, iteration: 389128
loss: 1.0143780708312988,grad_norm: 0.9999998911852738, iteration: 389129
loss: 1.0028473138809204,grad_norm: 0.7770123014927814, iteration: 389130
loss: 1.0171971321105957,grad_norm: 0.6882508532459992, iteration: 389131
loss: 1.0295346975326538,grad_norm: 0.8076620652790204, iteration: 389132
loss: 0.9900451898574829,grad_norm: 0.7945406583449163, iteration: 389133
loss: 1.176174283027649,grad_norm: 0.9999990601948765, iteration: 389134
loss: 1.01996910572052,grad_norm: 0.9614282440638442, iteration: 389135
loss: 1.0235673189163208,grad_norm: 0.7161469065559212, iteration: 389136
loss: 1.0202299356460571,grad_norm: 0.999999166576559, iteration: 389137
loss: 1.0648491382598877,grad_norm: 0.8437667068415081, iteration: 389138
loss: 1.0096964836120605,grad_norm: 0.9999992933868702, iteration: 389139
loss: 0.9841946959495544,grad_norm: 0.6841859080153226, iteration: 389140
loss: 1.0139126777648926,grad_norm: 0.766960628998652, iteration: 389141
loss: 1.0426307916641235,grad_norm: 0.6933197112056488, iteration: 389142
loss: 1.0271003246307373,grad_norm: 0.9257091876323775, iteration: 389143
loss: 1.0584627389907837,grad_norm: 0.7458525156762134, iteration: 389144
loss: 0.9562888741493225,grad_norm: 0.7692557691670732, iteration: 389145
loss: 1.02524995803833,grad_norm: 0.8532706274572368, iteration: 389146
loss: 0.9852434396743774,grad_norm: 0.8880831340147937, iteration: 389147
loss: 1.0055952072143555,grad_norm: 0.8314458837454807, iteration: 389148
loss: 1.029300570487976,grad_norm: 0.6060190683368863, iteration: 389149
loss: 1.003318190574646,grad_norm: 0.7270746699967652, iteration: 389150
loss: 1.0170713663101196,grad_norm: 0.9999991203819217, iteration: 389151
loss: 0.9991232752799988,grad_norm: 0.7062850823677118, iteration: 389152
loss: 0.9952454566955566,grad_norm: 0.7590658314847263, iteration: 389153
loss: 0.9726241230964661,grad_norm: 0.855456636599674, iteration: 389154
loss: 0.9817179441452026,grad_norm: 0.8047920126274355, iteration: 389155
loss: 1.212687611579895,grad_norm: 0.9999999161019975, iteration: 389156
loss: 0.9796084761619568,grad_norm: 0.6757327512813102, iteration: 389157
loss: 1.0010119676589966,grad_norm: 0.8162071998780389, iteration: 389158
loss: 0.9972097277641296,grad_norm: 0.999999849704954, iteration: 389159
loss: 0.9687975645065308,grad_norm: 0.7377052213088443, iteration: 389160
loss: 0.945556640625,grad_norm: 0.8827838433416472, iteration: 389161
loss: 1.0657384395599365,grad_norm: 0.9999996864449959, iteration: 389162
loss: 0.9449211359024048,grad_norm: 0.7585927087447542, iteration: 389163
loss: 1.0001425743103027,grad_norm: 0.8712313688008174, iteration: 389164
loss: 1.0049282312393188,grad_norm: 0.7094213005564671, iteration: 389165
loss: 0.9660529494285583,grad_norm: 0.7897364220103343, iteration: 389166
loss: 1.0896192789077759,grad_norm: 0.9999992337043315, iteration: 389167
loss: 1.1823205947875977,grad_norm: 0.9999998674764581, iteration: 389168
loss: 1.0129204988479614,grad_norm: 0.8868075505869747, iteration: 389169
loss: 0.999911367893219,grad_norm: 0.8135053136212078, iteration: 389170
loss: 0.9787734746932983,grad_norm: 0.8492962494051085, iteration: 389171
loss: 1.0023179054260254,grad_norm: 0.6761168715574583, iteration: 389172
loss: 0.9906453490257263,grad_norm: 0.744738658460588, iteration: 389173
loss: 1.0318686962127686,grad_norm: 0.7711838581546478, iteration: 389174
loss: 1.0413519144058228,grad_norm: 0.9999999560339582, iteration: 389175
loss: 1.1502182483673096,grad_norm: 0.9999998891197378, iteration: 389176
loss: 1.0612839460372925,grad_norm: 0.9999991253235538, iteration: 389177
loss: 1.015306830406189,grad_norm: 0.8170500226254421, iteration: 389178
loss: 1.1114386320114136,grad_norm: 1.0000000497110413, iteration: 389179
loss: 1.0572255849838257,grad_norm: 0.984750828389513, iteration: 389180
loss: 0.9754179120063782,grad_norm: 0.8461550288446511, iteration: 389181
loss: 1.0718449354171753,grad_norm: 0.9999999362086305, iteration: 389182
loss: 1.042034387588501,grad_norm: 0.9999998816295744, iteration: 389183
loss: 0.9966961741447449,grad_norm: 0.6896515346460745, iteration: 389184
loss: 0.9798458218574524,grad_norm: 0.9354677202260825, iteration: 389185
loss: 1.0287312269210815,grad_norm: 0.9999991579113193, iteration: 389186
loss: 1.0297298431396484,grad_norm: 0.9760051456844115, iteration: 389187
loss: 1.1002442836761475,grad_norm: 0.9999994355633741, iteration: 389188
loss: 1.0030579566955566,grad_norm: 0.6843138652840611, iteration: 389189
loss: 0.9947585463523865,grad_norm: 0.7656509642987129, iteration: 389190
loss: 0.999269962310791,grad_norm: 0.6934606129834846, iteration: 389191
loss: 1.0303494930267334,grad_norm: 0.9156646549698761, iteration: 389192
loss: 0.9874839186668396,grad_norm: 0.9383298578327292, iteration: 389193
loss: 1.0394084453582764,grad_norm: 0.9937418743235678, iteration: 389194
loss: 1.047896146774292,grad_norm: 0.9999995527828158, iteration: 389195
loss: 1.0100147724151611,grad_norm: 0.9013977148430716, iteration: 389196
loss: 0.9860422611236572,grad_norm: 0.7307508255872539, iteration: 389197
loss: 0.9718316793441772,grad_norm: 0.7196183544234513, iteration: 389198
loss: 1.000399112701416,grad_norm: 0.6684813513446152, iteration: 389199
loss: 0.9962807297706604,grad_norm: 0.8235616461785755, iteration: 389200
loss: 1.0019043684005737,grad_norm: 0.8383119183412691, iteration: 389201
loss: 1.0242316722869873,grad_norm: 0.9999998196495968, iteration: 389202
loss: 1.008604884147644,grad_norm: 0.9272572975588749, iteration: 389203
loss: 1.0156522989273071,grad_norm: 0.7773541935698586, iteration: 389204
loss: 0.9987507462501526,grad_norm: 0.999999320057367, iteration: 389205
loss: 0.9951515197753906,grad_norm: 0.8257653701654386, iteration: 389206
loss: 0.9941763877868652,grad_norm: 0.8009651744475902, iteration: 389207
loss: 1.032198429107666,grad_norm: 0.9999990258069658, iteration: 389208
loss: 1.020944595336914,grad_norm: 0.7167611582076505, iteration: 389209
loss: 0.9544568061828613,grad_norm: 0.8103966049525227, iteration: 389210
loss: 1.0825632810592651,grad_norm: 0.9999993205566864, iteration: 389211
loss: 1.004024624824524,grad_norm: 0.7499032673803544, iteration: 389212
loss: 1.0060728788375854,grad_norm: 0.9999996655397159, iteration: 389213
loss: 1.017956256866455,grad_norm: 0.9089915762531285, iteration: 389214
loss: 1.0328621864318848,grad_norm: 0.794486156316642, iteration: 389215
loss: 0.9914246201515198,grad_norm: 0.7007477121450353, iteration: 389216
loss: 1.0047837495803833,grad_norm: 0.6699980893208576, iteration: 389217
loss: 0.9687961935997009,grad_norm: 0.7762130474901086, iteration: 389218
loss: 1.007908582687378,grad_norm: 0.7778495459605831, iteration: 389219
loss: 1.0176401138305664,grad_norm: 0.7275461263512139, iteration: 389220
loss: 0.99518221616745,grad_norm: 0.690015933464289, iteration: 389221
loss: 0.9903603792190552,grad_norm: 0.7519742841008855, iteration: 389222
loss: 0.988537609577179,grad_norm: 0.9812979175517912, iteration: 389223
loss: 1.0146404504776,grad_norm: 0.7347080178173847, iteration: 389224
loss: 0.9810526371002197,grad_norm: 0.7431730033542108, iteration: 389225
loss: 0.9651331901550293,grad_norm: 0.8746288565279046, iteration: 389226
loss: 1.0313961505889893,grad_norm: 0.9196373548217228, iteration: 389227
loss: 1.075370192527771,grad_norm: 0.9999991347157611, iteration: 389228
loss: 0.9443104863166809,grad_norm: 0.7777163480635861, iteration: 389229
loss: 0.9979550242424011,grad_norm: 0.9226026319246952, iteration: 389230
loss: 1.0145224332809448,grad_norm: 0.9846438618970771, iteration: 389231
loss: 0.9735196232795715,grad_norm: 0.7553939721889354, iteration: 389232
loss: 1.0250188112258911,grad_norm: 0.9999991135867682, iteration: 389233
loss: 1.0177741050720215,grad_norm: 0.793840561798137, iteration: 389234
loss: 0.9822965264320374,grad_norm: 0.8959350316817727, iteration: 389235
loss: 0.9816321134567261,grad_norm: 0.9999991192321581, iteration: 389236
loss: 0.9896923303604126,grad_norm: 0.7370238262392724, iteration: 389237
loss: 1.0016021728515625,grad_norm: 0.7201857748628447, iteration: 389238
loss: 1.0560510158538818,grad_norm: 0.8799511642407261, iteration: 389239
loss: 0.992409348487854,grad_norm: 0.8260368555281212, iteration: 389240
loss: 1.1243624687194824,grad_norm: 0.9999997078283226, iteration: 389241
loss: 1.0415195226669312,grad_norm: 0.7670106932187869, iteration: 389242
loss: 1.0183948278427124,grad_norm: 0.6361223999138377, iteration: 389243
loss: 0.9878409504890442,grad_norm: 0.8477221297453355, iteration: 389244
loss: 0.9905551671981812,grad_norm: 0.8360639876470293, iteration: 389245
loss: 1.0351818799972534,grad_norm: 0.689816432276988, iteration: 389246
loss: 1.0075920820236206,grad_norm: 0.8275078994239164, iteration: 389247
loss: 1.0331588983535767,grad_norm: 0.770380474750994, iteration: 389248
loss: 1.0078614950180054,grad_norm: 0.8201895175119195, iteration: 389249
loss: 0.9873365759849548,grad_norm: 0.663349939093449, iteration: 389250
loss: 1.011066198348999,grad_norm: 0.811650123741755, iteration: 389251
loss: 1.297625184059143,grad_norm: 0.9999996972226722, iteration: 389252
loss: 1.0064293146133423,grad_norm: 0.818673712964, iteration: 389253
loss: 0.9950441718101501,grad_norm: 0.7096426861639278, iteration: 389254
loss: 1.0131268501281738,grad_norm: 0.8130173731948651, iteration: 389255
loss: 1.0351042747497559,grad_norm: 0.8797425273386026, iteration: 389256
loss: 1.0084202289581299,grad_norm: 0.8566299271440851, iteration: 389257
loss: 0.9799684882164001,grad_norm: 0.6629678337057011, iteration: 389258
loss: 1.0424655675888062,grad_norm: 0.7316623057227638, iteration: 389259
loss: 1.0024969577789307,grad_norm: 0.99999990642992, iteration: 389260
loss: 0.9442203044891357,grad_norm: 0.8393095839470314, iteration: 389261
loss: 1.1412290334701538,grad_norm: 0.999999113306871, iteration: 389262
loss: 1.2299102544784546,grad_norm: 0.9999995241299194, iteration: 389263
loss: 1.331201195716858,grad_norm: 0.9999994810898196, iteration: 389264
loss: 0.9915862083435059,grad_norm: 0.69216831763459, iteration: 389265
loss: 1.00832998752594,grad_norm: 0.973332885507161, iteration: 389266
loss: 0.9948457479476929,grad_norm: 0.7436443460962039, iteration: 389267
loss: 1.0746383666992188,grad_norm: 0.9691661898093588, iteration: 389268
loss: 0.9848396182060242,grad_norm: 0.7045321865918494, iteration: 389269
loss: 1.0110312700271606,grad_norm: 0.8397389266217647, iteration: 389270
loss: 1.0176995992660522,grad_norm: 1.0000000080456533, iteration: 389271
loss: 0.9885773658752441,grad_norm: 0.66755041257922, iteration: 389272
loss: 1.034738302230835,grad_norm: 0.9999993683473019, iteration: 389273
loss: 1.038691520690918,grad_norm: 0.8741422132175093, iteration: 389274
loss: 0.9659125208854675,grad_norm: 0.6823695345645425, iteration: 389275
loss: 1.0594241619110107,grad_norm: 0.9999997024304861, iteration: 389276
loss: 1.0087385177612305,grad_norm: 0.8348368103393964, iteration: 389277
loss: 0.9818828701972961,grad_norm: 0.6183938441987555, iteration: 389278
loss: 1.282204508781433,grad_norm: 0.9999994728662317, iteration: 389279
loss: 0.985981285572052,grad_norm: 0.9999990658703686, iteration: 389280
loss: 1.0247822999954224,grad_norm: 0.8564819165582866, iteration: 389281
loss: 1.0469006299972534,grad_norm: 0.7101895013586144, iteration: 389282
loss: 0.9871560335159302,grad_norm: 0.8724929455796657, iteration: 389283
loss: 0.9996715784072876,grad_norm: 0.8392082228017421, iteration: 389284
loss: 0.9810467958450317,grad_norm: 0.7667688896115341, iteration: 389285
loss: 1.0898500680923462,grad_norm: 0.8661471328848318, iteration: 389286
loss: 1.0005214214324951,grad_norm: 0.9934971848757593, iteration: 389287
loss: 1.022313117980957,grad_norm: 0.7582387783242703, iteration: 389288
loss: 1.017602801322937,grad_norm: 0.7961466957864302, iteration: 389289
loss: 0.9758896231651306,grad_norm: 0.7592723239683357, iteration: 389290
loss: 1.0070133209228516,grad_norm: 0.7182215115426414, iteration: 389291
loss: 1.0348784923553467,grad_norm: 0.999999667347956, iteration: 389292
loss: 1.0142040252685547,grad_norm: 0.6032907342003294, iteration: 389293
loss: 1.0168086290359497,grad_norm: 0.9999995768361417, iteration: 389294
loss: 0.9810456037521362,grad_norm: 0.9489041447305184, iteration: 389295
loss: 0.9925549626350403,grad_norm: 0.7523412563529042, iteration: 389296
loss: 0.9622624516487122,grad_norm: 0.8347694109863253, iteration: 389297
loss: 0.9905086159706116,grad_norm: 0.8645566289052107, iteration: 389298
loss: 0.9790953993797302,grad_norm: 0.7947890200793657, iteration: 389299
loss: 0.9720672369003296,grad_norm: 0.6071200269838585, iteration: 389300
loss: 0.9579369425773621,grad_norm: 0.8132051469998397, iteration: 389301
loss: 1.0429736375808716,grad_norm: 0.9999992622583411, iteration: 389302
loss: 1.0342681407928467,grad_norm: 0.8451836042207709, iteration: 389303
loss: 1.0182029008865356,grad_norm: 0.9591642856837559, iteration: 389304
loss: 1.043923020362854,grad_norm: 1.0000000537415192, iteration: 389305
loss: 0.9810562133789062,grad_norm: 0.8679219862546916, iteration: 389306
loss: 1.010084867477417,grad_norm: 0.9769920640390338, iteration: 389307
loss: 1.00520920753479,grad_norm: 0.9999993408031257, iteration: 389308
loss: 1.0623128414154053,grad_norm: 0.9999999411730999, iteration: 389309
loss: 1.0695098638534546,grad_norm: 0.9999992935480236, iteration: 389310
loss: 1.0295436382293701,grad_norm: 0.8139061673964944, iteration: 389311
loss: 0.992215096950531,grad_norm: 0.7935595393068048, iteration: 389312
loss: 0.9774673581123352,grad_norm: 0.7907383400429122, iteration: 389313
loss: 1.0228298902511597,grad_norm: 0.7688211905215345, iteration: 389314
loss: 1.0146307945251465,grad_norm: 0.6407866522397798, iteration: 389315
loss: 1.0108672380447388,grad_norm: 0.8191058596518351, iteration: 389316
loss: 0.9873638153076172,grad_norm: 0.7512880175690331, iteration: 389317
loss: 1.029007911682129,grad_norm: 0.7095576062167042, iteration: 389318
loss: 1.0054060220718384,grad_norm: 0.7854076796416669, iteration: 389319
loss: 1.026426911354065,grad_norm: 0.9999996227995863, iteration: 389320
loss: 0.9969783425331116,grad_norm: 0.7078901553837782, iteration: 389321
loss: 1.0142953395843506,grad_norm: 0.755094931960461, iteration: 389322
loss: 1.0642067193984985,grad_norm: 0.9999996397560322, iteration: 389323
loss: 0.9715004563331604,grad_norm: 0.7130032756586966, iteration: 389324
loss: 1.016196846961975,grad_norm: 0.7353805188600019, iteration: 389325
loss: 0.9991605877876282,grad_norm: 0.7325181175832407, iteration: 389326
loss: 1.0069767236709595,grad_norm: 0.8894261414861611, iteration: 389327
loss: 1.1054109334945679,grad_norm: 0.9999994040645025, iteration: 389328
loss: 0.9572208523750305,grad_norm: 0.7164500596898872, iteration: 389329
loss: 1.0093300342559814,grad_norm: 0.9999999124097717, iteration: 389330
loss: 1.0285578966140747,grad_norm: 0.9154594860827533, iteration: 389331
loss: 0.9816171526908875,grad_norm: 0.7199317246886128, iteration: 389332
loss: 1.043368935585022,grad_norm: 0.9999998311239051, iteration: 389333
loss: 1.0362606048583984,grad_norm: 0.7269488873601614, iteration: 389334
loss: 0.9552175402641296,grad_norm: 0.7510975684963779, iteration: 389335
loss: 1.1174739599227905,grad_norm: 0.9999995377457773, iteration: 389336
loss: 1.0062744617462158,grad_norm: 0.9999998976098664, iteration: 389337
loss: 0.9942511916160583,grad_norm: 0.9999993753834897, iteration: 389338
loss: 0.9692913293838501,grad_norm: 0.7610885350135475, iteration: 389339
loss: 1.0017056465148926,grad_norm: 0.7593038578758702, iteration: 389340
loss: 1.0071603059768677,grad_norm: 0.7506452950028205, iteration: 389341
loss: 0.9998171925544739,grad_norm: 0.7858718111682862, iteration: 389342
loss: 1.040358066558838,grad_norm: 0.9999997899100455, iteration: 389343
loss: 1.021957516670227,grad_norm: 0.9702371518111497, iteration: 389344
loss: 1.0189828872680664,grad_norm: 0.6872573107075338, iteration: 389345
loss: 0.9811866879463196,grad_norm: 0.7306002867469651, iteration: 389346
loss: 0.9943583607673645,grad_norm: 0.8301227309206133, iteration: 389347
loss: 1.0137237310409546,grad_norm: 0.740061607253376, iteration: 389348
loss: 0.9968332052230835,grad_norm: 0.7875564516405548, iteration: 389349
loss: 1.0100828409194946,grad_norm: 0.7927240229050829, iteration: 389350
loss: 1.018974781036377,grad_norm: 0.7197316984320083, iteration: 389351
loss: 0.983292818069458,grad_norm: 0.6911453614510026, iteration: 389352
loss: 1.088823676109314,grad_norm: 0.9999999272464128, iteration: 389353
loss: 0.9805184006690979,grad_norm: 0.8178551344878584, iteration: 389354
loss: 1.0145872831344604,grad_norm: 0.8649227788250372, iteration: 389355
loss: 0.98114413022995,grad_norm: 0.800288746431782, iteration: 389356
loss: 1.020574688911438,grad_norm: 0.9423485605456097, iteration: 389357
loss: 1.0611340999603271,grad_norm: 0.9999991346503567, iteration: 389358
loss: 0.9867088794708252,grad_norm: 0.7731516881277867, iteration: 389359
loss: 1.0337018966674805,grad_norm: 0.9545642865124297, iteration: 389360
loss: 1.0164841413497925,grad_norm: 0.9999995648895272, iteration: 389361
loss: 1.0080525875091553,grad_norm: 0.8178904307697272, iteration: 389362
loss: 0.9234417676925659,grad_norm: 0.7642231950457388, iteration: 389363
loss: 0.9678810834884644,grad_norm: 0.8449788745980888, iteration: 389364
loss: 0.9686644673347473,grad_norm: 0.7862923562202448, iteration: 389365
loss: 0.9519577026367188,grad_norm: 0.7065894056791485, iteration: 389366
loss: 1.018076777458191,grad_norm: 0.7950558507487381, iteration: 389367
loss: 1.0078620910644531,grad_norm: 0.9506755536660941, iteration: 389368
loss: 1.0076576471328735,grad_norm: 0.9999994932462697, iteration: 389369
loss: 0.9682916402816772,grad_norm: 0.9999990156596704, iteration: 389370
loss: 1.0092785358428955,grad_norm: 0.6806227743887147, iteration: 389371
loss: 0.9624503254890442,grad_norm: 0.9771926174830007, iteration: 389372
loss: 0.9388668537139893,grad_norm: 0.7823166029410192, iteration: 389373
loss: 0.9940352439880371,grad_norm: 0.7724373610912854, iteration: 389374
loss: 0.9746890068054199,grad_norm: 0.7129375356110871, iteration: 389375
loss: 0.9834536910057068,grad_norm: 0.7932412384303282, iteration: 389376
loss: 1.0091303586959839,grad_norm: 0.9999992765705314, iteration: 389377
loss: 0.9793125987052917,grad_norm: 0.9999997268610977, iteration: 389378
loss: 1.0022008419036865,grad_norm: 0.7451266003605588, iteration: 389379
loss: 0.994151771068573,grad_norm: 0.7471240762876566, iteration: 389380
loss: 1.0189107656478882,grad_norm: 0.7798149711227821, iteration: 389381
loss: 1.0222736597061157,grad_norm: 0.7872800360646266, iteration: 389382
loss: 0.977812647819519,grad_norm: 0.7105061651195715, iteration: 389383
loss: 0.9954752326011658,grad_norm: 0.9999997979701539, iteration: 389384
loss: 1.02489173412323,grad_norm: 0.8270016387694176, iteration: 389385
loss: 1.0002028942108154,grad_norm: 0.8619300632041816, iteration: 389386
loss: 1.0254040956497192,grad_norm: 0.8635143814510213, iteration: 389387
loss: 0.9576218724250793,grad_norm: 0.7420721576481101, iteration: 389388
loss: 1.0167934894561768,grad_norm: 0.6621747096220602, iteration: 389389
loss: 1.0775507688522339,grad_norm: 0.7886221580114477, iteration: 389390
loss: 0.9767411351203918,grad_norm: 0.7882555144336012, iteration: 389391
loss: 1.0384832620620728,grad_norm: 0.7721298982487281, iteration: 389392
loss: 0.9941874146461487,grad_norm: 0.7705779593879151, iteration: 389393
loss: 1.0232477188110352,grad_norm: 0.708396654650956, iteration: 389394
loss: 1.0370290279388428,grad_norm: 0.9999995964253726, iteration: 389395
loss: 0.9984157085418701,grad_norm: 0.8308613150702534, iteration: 389396
loss: 1.0342206954956055,grad_norm: 0.8467030395539212, iteration: 389397
loss: 1.0115970373153687,grad_norm: 0.6927429524997404, iteration: 389398
loss: 0.9710391759872437,grad_norm: 0.9999991376934906, iteration: 389399
loss: 0.9928733110427856,grad_norm: 0.7268099732533128, iteration: 389400
loss: 1.0189650058746338,grad_norm: 0.7464894519358264, iteration: 389401
loss: 1.0227488279342651,grad_norm: 0.8252808599149564, iteration: 389402
loss: 1.006840467453003,grad_norm: 0.9038883406485113, iteration: 389403
loss: 1.0185612440109253,grad_norm: 0.9072521306332518, iteration: 389404
loss: 1.0089187622070312,grad_norm: 0.8958416887971817, iteration: 389405
loss: 0.9947835803031921,grad_norm: 0.6797519019738391, iteration: 389406
loss: 0.9816521406173706,grad_norm: 0.6953970791990254, iteration: 389407
loss: 0.9914398789405823,grad_norm: 0.7531418714558119, iteration: 389408
loss: 0.9981683492660522,grad_norm: 0.8195412345839781, iteration: 389409
loss: 1.0059452056884766,grad_norm: 0.7944343777018421, iteration: 389410
loss: 1.0090422630310059,grad_norm: 0.9999991147915847, iteration: 389411
loss: 1.0159610509872437,grad_norm: 0.8440616677997077, iteration: 389412
loss: 0.9935920238494873,grad_norm: 0.8048127341415374, iteration: 389413
loss: 1.0058214664459229,grad_norm: 0.6766582494551414, iteration: 389414
loss: 0.9874832034111023,grad_norm: 0.6867249938031769, iteration: 389415
loss: 1.0356645584106445,grad_norm: 0.9999997360063371, iteration: 389416
loss: 1.008569598197937,grad_norm: 0.8310551556519958, iteration: 389417
loss: 1.0297791957855225,grad_norm: 0.9999997146413268, iteration: 389418
loss: 1.0188237428665161,grad_norm: 0.6964300703870878, iteration: 389419
loss: 0.9790148138999939,grad_norm: 0.7777336308774659, iteration: 389420
loss: 0.976680338382721,grad_norm: 0.7494038878478415, iteration: 389421
loss: 1.007261872291565,grad_norm: 0.7272847743891835, iteration: 389422
loss: 1.0210258960723877,grad_norm: 0.7601017585923489, iteration: 389423
loss: 0.9856764078140259,grad_norm: 0.7226299428516536, iteration: 389424
loss: 1.0085209608078003,grad_norm: 0.7499237795500622, iteration: 389425
loss: 1.0294541120529175,grad_norm: 0.6971233809241361, iteration: 389426
loss: 1.0279039144515991,grad_norm: 0.8817963098319939, iteration: 389427
loss: 0.9664274454116821,grad_norm: 0.7421684289271978, iteration: 389428
loss: 1.0163938999176025,grad_norm: 0.7078177619175184, iteration: 389429
loss: 0.9820849299430847,grad_norm: 0.7634943775605038, iteration: 389430
loss: 1.0438848733901978,grad_norm: 0.9999994146860876, iteration: 389431
loss: 1.0298233032226562,grad_norm: 0.7101985623820016, iteration: 389432
loss: 1.009146809577942,grad_norm: 0.6999765946513558, iteration: 389433
loss: 0.9783496856689453,grad_norm: 0.7301891713971979, iteration: 389434
loss: 0.9947783946990967,grad_norm: 0.8577985070127724, iteration: 389435
loss: 1.0122220516204834,grad_norm: 0.6904005878119684, iteration: 389436
loss: 0.967302680015564,grad_norm: 0.7255345212693031, iteration: 389437
loss: 0.9668774604797363,grad_norm: 0.7764801105621038, iteration: 389438
loss: 1.0012837648391724,grad_norm: 0.7637541803469263, iteration: 389439
loss: 1.0019603967666626,grad_norm: 0.7514141132317534, iteration: 389440
loss: 0.9885752201080322,grad_norm: 0.8167677130985926, iteration: 389441
loss: 1.0295462608337402,grad_norm: 0.8380405769462954, iteration: 389442
loss: 1.0056641101837158,grad_norm: 0.7851510563011886, iteration: 389443
loss: 0.9988585710525513,grad_norm: 0.7171196810432604, iteration: 389444
loss: 0.9902158379554749,grad_norm: 0.7463970655840912, iteration: 389445
loss: 1.0020246505737305,grad_norm: 0.8769245297005844, iteration: 389446
loss: 0.9876677989959717,grad_norm: 0.7372535678168195, iteration: 389447
loss: 0.9858632683753967,grad_norm: 0.798668720826848, iteration: 389448
loss: 1.0345089435577393,grad_norm: 0.8667492500216921, iteration: 389449
loss: 0.9714481830596924,grad_norm: 0.7763195398836855, iteration: 389450
loss: 0.9958927631378174,grad_norm: 0.7800835393949179, iteration: 389451
loss: 1.0005269050598145,grad_norm: 0.7949972168906309, iteration: 389452
loss: 1.0018833875656128,grad_norm: 0.8638976516190789, iteration: 389453
loss: 0.9722817540168762,grad_norm: 0.8172307810036883, iteration: 389454
loss: 1.0022971630096436,grad_norm: 0.6965539595882349, iteration: 389455
loss: 1.0061228275299072,grad_norm: 0.8150456016164803, iteration: 389456
loss: 0.9762202501296997,grad_norm: 0.8497021370069436, iteration: 389457
loss: 1.0282340049743652,grad_norm: 0.7706363644916454, iteration: 389458
loss: 1.0230046510696411,grad_norm: 0.7505797926666596, iteration: 389459
loss: 0.9743531346321106,grad_norm: 0.773622741340677, iteration: 389460
loss: 0.9550314545631409,grad_norm: 0.7543954728401531, iteration: 389461
loss: 1.0243737697601318,grad_norm: 0.6575641955432262, iteration: 389462
loss: 0.9832855463027954,grad_norm: 0.7896704812678985, iteration: 389463
loss: 1.0169563293457031,grad_norm: 0.999999086507255, iteration: 389464
loss: 0.9866153001785278,grad_norm: 0.7530027472367442, iteration: 389465
loss: 1.004118800163269,grad_norm: 0.8253758335977083, iteration: 389466
loss: 0.9671692252159119,grad_norm: 0.7472862059989734, iteration: 389467
loss: 0.991750955581665,grad_norm: 0.8177261308436428, iteration: 389468
loss: 1.0257351398468018,grad_norm: 0.7790656671157905, iteration: 389469
loss: 0.9888877868652344,grad_norm: 0.7318742628826683, iteration: 389470
loss: 1.0065704584121704,grad_norm: 0.8223469555614572, iteration: 389471
loss: 1.0146169662475586,grad_norm: 0.9999990876646716, iteration: 389472
loss: 0.9994664788246155,grad_norm: 0.7719346332060711, iteration: 389473
loss: 0.979778528213501,grad_norm: 0.6472859237749878, iteration: 389474
loss: 1.0061594247817993,grad_norm: 0.8630954476812157, iteration: 389475
loss: 1.0062334537506104,grad_norm: 0.9338664081269906, iteration: 389476
loss: 1.015205979347229,grad_norm: 0.9999990733007887, iteration: 389477
loss: 0.9978633522987366,grad_norm: 0.9999997414831336, iteration: 389478
loss: 1.0097123384475708,grad_norm: 0.6845502951237536, iteration: 389479
loss: 0.9830604791641235,grad_norm: 0.655993825753445, iteration: 389480
loss: 1.018030047416687,grad_norm: 0.6318449055148448, iteration: 389481
loss: 1.0096524953842163,grad_norm: 0.7144026562893924, iteration: 389482
loss: 1.046466588973999,grad_norm: 0.9999992741905452, iteration: 389483
loss: 1.0261754989624023,grad_norm: 0.818004909383309, iteration: 389484
loss: 1.0206985473632812,grad_norm: 0.8826606245082487, iteration: 389485
loss: 1.000695824623108,grad_norm: 0.6871100538424393, iteration: 389486
loss: 1.0085289478302002,grad_norm: 0.7605059516033528, iteration: 389487
loss: 0.9456490278244019,grad_norm: 0.9999991302858019, iteration: 389488
loss: 0.9931516647338867,grad_norm: 0.7932079500599857, iteration: 389489
loss: 0.9762567281723022,grad_norm: 0.9041581434944229, iteration: 389490
loss: 0.9794726967811584,grad_norm: 0.7347542747135861, iteration: 389491
loss: 0.970796525478363,grad_norm: 0.8773868424945782, iteration: 389492
loss: 0.9749559760093689,grad_norm: 0.7801600034443193, iteration: 389493
loss: 1.0245335102081299,grad_norm: 0.794652668980377, iteration: 389494
loss: 0.9984007477760315,grad_norm: 0.7631601816453331, iteration: 389495
loss: 1.0187352895736694,grad_norm: 0.719563982016883, iteration: 389496
loss: 1.0004701614379883,grad_norm: 0.8772591836240532, iteration: 389497
loss: 1.0410940647125244,grad_norm: 0.6900877932589102, iteration: 389498
loss: 1.0021073818206787,grad_norm: 0.8941410890452036, iteration: 389499
loss: 0.9966878890991211,grad_norm: 0.8238537148075714, iteration: 389500
loss: 0.987991988658905,grad_norm: 0.7333317901319155, iteration: 389501
loss: 1.0182830095291138,grad_norm: 0.907273014835633, iteration: 389502
loss: 0.9998881816864014,grad_norm: 0.780893469422943, iteration: 389503
loss: 0.9980981945991516,grad_norm: 0.8381632064141458, iteration: 389504
loss: 0.967646598815918,grad_norm: 0.8976659194326676, iteration: 389505
loss: 1.0120936632156372,grad_norm: 0.7321250959804196, iteration: 389506
loss: 1.020896315574646,grad_norm: 0.9999999119250597, iteration: 389507
loss: 0.9628300666809082,grad_norm: 0.776974015557791, iteration: 389508
loss: 0.9966833591461182,grad_norm: 0.8528846415552138, iteration: 389509
loss: 1.0236456394195557,grad_norm: 0.9341150799307587, iteration: 389510
loss: 0.9845142364501953,grad_norm: 0.9999991422617452, iteration: 389511
loss: 0.9790768027305603,grad_norm: 0.7637171157077006, iteration: 389512
loss: 1.0015639066696167,grad_norm: 0.7387221483555746, iteration: 389513
loss: 0.9766091704368591,grad_norm: 0.7412686866947078, iteration: 389514
loss: 0.9670885801315308,grad_norm: 0.9999991221847913, iteration: 389515
loss: 1.0020167827606201,grad_norm: 0.814591718749253, iteration: 389516
loss: 1.0174213647842407,grad_norm: 0.8346202319073868, iteration: 389517
loss: 0.9977940320968628,grad_norm: 0.8152379513352587, iteration: 389518
loss: 0.9786513447761536,grad_norm: 0.7918786700722497, iteration: 389519
loss: 0.9959464073181152,grad_norm: 0.9565959831494499, iteration: 389520
loss: 0.9948307871818542,grad_norm: 0.5570096439745641, iteration: 389521
loss: 0.995864748954773,grad_norm: 0.6982275863283708, iteration: 389522
loss: 0.9936644434928894,grad_norm: 0.7007004105248413, iteration: 389523
loss: 0.9762426018714905,grad_norm: 0.7738429639332921, iteration: 389524
loss: 1.0099278688430786,grad_norm: 0.6880584572765481, iteration: 389525
loss: 1.00621497631073,grad_norm: 0.7792765072552966, iteration: 389526
loss: 0.9993550181388855,grad_norm: 0.762035501053099, iteration: 389527
loss: 1.0122969150543213,grad_norm: 0.7838919836845344, iteration: 389528
loss: 0.9890451431274414,grad_norm: 0.7797558812532693, iteration: 389529
loss: 0.9943570494651794,grad_norm: 0.8930240945773454, iteration: 389530
loss: 1.0090073347091675,grad_norm: 0.8757168076446998, iteration: 389531
loss: 0.9904040098190308,grad_norm: 0.8069287521045261, iteration: 389532
loss: 1.014220952987671,grad_norm: 0.8586351785337015, iteration: 389533
loss: 0.9809799194335938,grad_norm: 0.6830092102734604, iteration: 389534
loss: 1.0209382772445679,grad_norm: 0.9999990465037669, iteration: 389535
loss: 0.9949232935905457,grad_norm: 0.8312428025120128, iteration: 389536
loss: 1.0063623189926147,grad_norm: 0.8505525220057963, iteration: 389537
loss: 1.023807406425476,grad_norm: 0.7581109797933848, iteration: 389538
loss: 0.9900326728820801,grad_norm: 0.8295156992959785, iteration: 389539
loss: 0.9845365285873413,grad_norm: 0.99999956869168, iteration: 389540
loss: 0.9695658087730408,grad_norm: 0.7621249057906784, iteration: 389541
loss: 1.0028926134109497,grad_norm: 0.6853251098816374, iteration: 389542
loss: 1.000684380531311,grad_norm: 0.7655571819419474, iteration: 389543
loss: 1.0098817348480225,grad_norm: 0.743869108866964, iteration: 389544
loss: 1.0070315599441528,grad_norm: 0.7456733304420363, iteration: 389545
loss: 1.0161713361740112,grad_norm: 0.8327151312134482, iteration: 389546
loss: 1.0145113468170166,grad_norm: 0.6531152420038612, iteration: 389547
loss: 0.9975853562355042,grad_norm: 0.7171907752066273, iteration: 389548
loss: 0.9549432396888733,grad_norm: 0.9216591092976282, iteration: 389549
loss: 0.9972912669181824,grad_norm: 0.7671141977100244, iteration: 389550
loss: 1.00029718875885,grad_norm: 0.9999997453832149, iteration: 389551
loss: 0.961335301399231,grad_norm: 0.8761833990884781, iteration: 389552
loss: 1.0043480396270752,grad_norm: 0.8879636819684938, iteration: 389553
loss: 1.0583158731460571,grad_norm: 0.9999994164120262, iteration: 389554
loss: 1.0084967613220215,grad_norm: 0.707497020172497, iteration: 389555
loss: 1.04121732711792,grad_norm: 0.7778634891454624, iteration: 389556
loss: 0.9792239665985107,grad_norm: 0.9130394266152679, iteration: 389557
loss: 1.0651384592056274,grad_norm: 0.761421774494997, iteration: 389558
loss: 1.0135447978973389,grad_norm: 0.7214647058205272, iteration: 389559
loss: 1.0081238746643066,grad_norm: 0.8434880692104313, iteration: 389560
loss: 1.0986865758895874,grad_norm: 0.9999991593527252, iteration: 389561
loss: 0.9339874386787415,grad_norm: 0.9120116306972351, iteration: 389562
loss: 0.976345419883728,grad_norm: 0.8552710152771877, iteration: 389563
loss: 1.007749319076538,grad_norm: 0.7593862116203469, iteration: 389564
loss: 1.0462690591812134,grad_norm: 0.840895459716117, iteration: 389565
loss: 1.0021828413009644,grad_norm: 0.8739957970938678, iteration: 389566
loss: 0.9662092328071594,grad_norm: 0.8130820135148686, iteration: 389567
loss: 0.9893971085548401,grad_norm: 0.7413250344366179, iteration: 389568
loss: 1.0111401081085205,grad_norm: 0.8261836466941559, iteration: 389569
loss: 0.9784110188484192,grad_norm: 0.7366482935305866, iteration: 389570
loss: 0.9801444411277771,grad_norm: 0.7375254446124783, iteration: 389571
loss: 1.0294244289398193,grad_norm: 0.8697710056984946, iteration: 389572
loss: 1.015952706336975,grad_norm: 0.999999713025131, iteration: 389573
loss: 0.9717175960540771,grad_norm: 0.6661764807410552, iteration: 389574
loss: 0.9998829364776611,grad_norm: 0.9999995372686559, iteration: 389575
loss: 1.0423330068588257,grad_norm: 0.999999608235715, iteration: 389576
loss: 1.0255389213562012,grad_norm: 0.8635671389571651, iteration: 389577
loss: 1.001242995262146,grad_norm: 0.9999994242309128, iteration: 389578
loss: 0.9765201807022095,grad_norm: 0.767052861197821, iteration: 389579
loss: 0.9726675748825073,grad_norm: 0.8083188966740332, iteration: 389580
loss: 0.9758497476577759,grad_norm: 0.6803361570804497, iteration: 389581
loss: 0.9919031262397766,grad_norm: 0.5841528214526407, iteration: 389582
loss: 1.005091905593872,grad_norm: 0.7336212021656018, iteration: 389583
loss: 0.980803906917572,grad_norm: 0.9014203078217821, iteration: 389584
loss: 0.9737239480018616,grad_norm: 0.7864312978852257, iteration: 389585
loss: 1.046670913696289,grad_norm: 0.9022394260657702, iteration: 389586
loss: 1.0129146575927734,grad_norm: 0.7537438107894417, iteration: 389587
loss: 1.0155727863311768,grad_norm: 0.9999990611723167, iteration: 389588
loss: 1.0140799283981323,grad_norm: 0.9047242442266636, iteration: 389589
loss: 1.003369688987732,grad_norm: 0.8517616895717781, iteration: 389590
loss: 0.9696240425109863,grad_norm: 0.8519209302192007, iteration: 389591
loss: 0.990147054195404,grad_norm: 0.813674444741062, iteration: 389592
loss: 1.0175868272781372,grad_norm: 0.8037839132940113, iteration: 389593
loss: 0.9985197186470032,grad_norm: 0.6495429634614501, iteration: 389594
loss: 0.9955695867538452,grad_norm: 0.933017581920495, iteration: 389595
loss: 1.016974925994873,grad_norm: 0.7782077790000684, iteration: 389596
loss: 1.0374127626419067,grad_norm: 0.684130874702984, iteration: 389597
loss: 0.9935103058815002,grad_norm: 0.7915976555218627, iteration: 389598
loss: 1.0531584024429321,grad_norm: 0.9999995661472216, iteration: 389599
loss: 1.0003242492675781,grad_norm: 0.8910224216348956, iteration: 389600
loss: 0.9917647838592529,grad_norm: 0.7837379701064507, iteration: 389601
loss: 1.0277681350708008,grad_norm: 0.7321052550242197, iteration: 389602
loss: 1.020354986190796,grad_norm: 0.7549615192048431, iteration: 389603
loss: 0.9794225096702576,grad_norm: 0.6194763977128221, iteration: 389604
loss: 1.0108968019485474,grad_norm: 0.834295114693942, iteration: 389605
loss: 1.0349736213684082,grad_norm: 0.999999528001912, iteration: 389606
loss: 1.001316785812378,grad_norm: 0.7300708352665396, iteration: 389607
loss: 0.9862990975379944,grad_norm: 0.9164566171059294, iteration: 389608
loss: 1.0115526914596558,grad_norm: 0.9999997692003944, iteration: 389609
loss: 1.0092445611953735,grad_norm: 0.9999997412128289, iteration: 389610
loss: 1.0053062438964844,grad_norm: 0.8441596059101656, iteration: 389611
loss: 0.9864895343780518,grad_norm: 0.8883523327038934, iteration: 389612
loss: 1.110072374343872,grad_norm: 0.8255226523631196, iteration: 389613
loss: 0.9959929585456848,grad_norm: 0.9709500930963223, iteration: 389614
loss: 1.0018473863601685,grad_norm: 0.7442691547816724, iteration: 389615
loss: 1.0102957487106323,grad_norm: 0.8903467160918469, iteration: 389616
loss: 0.9636942148208618,grad_norm: 0.6683110980760657, iteration: 389617
loss: 0.9889724850654602,grad_norm: 0.7676927138130444, iteration: 389618
loss: 1.048809289932251,grad_norm: 0.8875970589267381, iteration: 389619
loss: 1.0269283056259155,grad_norm: 0.6876885651772041, iteration: 389620
loss: 1.0141561031341553,grad_norm: 0.81500823231453, iteration: 389621
loss: 1.0255707502365112,grad_norm: 0.9192243832163948, iteration: 389622
loss: 1.009213924407959,grad_norm: 0.763857151990813, iteration: 389623
loss: 1.0084521770477295,grad_norm: 0.7516652558881468, iteration: 389624
loss: 1.0252364873886108,grad_norm: 0.9999992693259513, iteration: 389625
loss: 1.0152701139450073,grad_norm: 0.6837875310007677, iteration: 389626
loss: 1.0094194412231445,grad_norm: 0.80770000525426, iteration: 389627
loss: 0.9877555966377258,grad_norm: 0.7780732559735793, iteration: 389628
loss: 0.9758747816085815,grad_norm: 0.6252819101667363, iteration: 389629
loss: 1.0173583030700684,grad_norm: 0.7596519830921449, iteration: 389630
loss: 0.9768231511116028,grad_norm: 0.9155145367169476, iteration: 389631
loss: 1.0115317106246948,grad_norm: 0.7874903929867393, iteration: 389632
loss: 0.9991217851638794,grad_norm: 0.7178862474223292, iteration: 389633
loss: 1.0937284231185913,grad_norm: 0.9999996146078548, iteration: 389634
loss: 0.9981747269630432,grad_norm: 0.703570735832234, iteration: 389635
loss: 0.9795306324958801,grad_norm: 0.7334744022465599, iteration: 389636
loss: 0.979430079460144,grad_norm: 0.7553730624431968, iteration: 389637
loss: 1.0320699214935303,grad_norm: 0.6629614799901861, iteration: 389638
loss: 0.9907382130622864,grad_norm: 0.7045074557617118, iteration: 389639
loss: 0.9650805592536926,grad_norm: 0.7726952935317226, iteration: 389640
loss: 1.0254381895065308,grad_norm: 0.9094921241924004, iteration: 389641
loss: 0.9818981289863586,grad_norm: 0.7776371618231609, iteration: 389642
loss: 0.9622147083282471,grad_norm: 0.8053797533303194, iteration: 389643
loss: 1.0043904781341553,grad_norm: 0.9999992683622178, iteration: 389644
loss: 1.0226757526397705,grad_norm: 0.7884618138978909, iteration: 389645
loss: 0.9641404151916504,grad_norm: 0.7656499419495285, iteration: 389646
loss: 1.0041497945785522,grad_norm: 0.8240620422408127, iteration: 389647
loss: 0.9966069459915161,grad_norm: 0.7418452512098725, iteration: 389648
loss: 1.0199633836746216,grad_norm: 0.9999996435631989, iteration: 389649
loss: 1.018988847732544,grad_norm: 0.9999994548164434, iteration: 389650
loss: 1.006653904914856,grad_norm: 0.8347932248485185, iteration: 389651
loss: 1.0131715536117554,grad_norm: 0.7133222278369462, iteration: 389652
loss: 0.9872773885726929,grad_norm: 0.7458007060179808, iteration: 389653
loss: 0.9633480906486511,grad_norm: 0.7797873306323096, iteration: 389654
loss: 0.990917980670929,grad_norm: 0.6384625351927001, iteration: 389655
loss: 1.0212974548339844,grad_norm: 0.8150834051771976, iteration: 389656
loss: 1.0526121854782104,grad_norm: 0.9999996854787294, iteration: 389657
loss: 0.9686895608901978,grad_norm: 0.7821172826622431, iteration: 389658
loss: 1.013683557510376,grad_norm: 0.7142169472038835, iteration: 389659
loss: 1.194914698600769,grad_norm: 0.9999999298530895, iteration: 389660
loss: 0.9893285632133484,grad_norm: 0.7656191857430642, iteration: 389661
loss: 0.9929065704345703,grad_norm: 0.8239791351829655, iteration: 389662
loss: 0.9872302412986755,grad_norm: 0.7877152544952664, iteration: 389663
loss: 0.9930083751678467,grad_norm: 0.7180155386935856, iteration: 389664
loss: 1.0649291276931763,grad_norm: 0.783076713266305, iteration: 389665
loss: 1.0220963954925537,grad_norm: 0.8147832907682845, iteration: 389666
loss: 0.9899516701698303,grad_norm: 0.7523883022066897, iteration: 389667
loss: 0.9863344430923462,grad_norm: 0.7410882815928469, iteration: 389668
loss: 0.9852396845817566,grad_norm: 0.8347515509881865, iteration: 389669
loss: 1.0084792375564575,grad_norm: 0.6676456374233445, iteration: 389670
loss: 1.0781420469284058,grad_norm: 0.9999998470082316, iteration: 389671
loss: 1.0289298295974731,grad_norm: 0.6832660581068399, iteration: 389672
loss: 1.003780722618103,grad_norm: 0.8404082288661532, iteration: 389673
loss: 0.9912979602813721,grad_norm: 0.7666266739088307, iteration: 389674
loss: 1.0150645971298218,grad_norm: 0.6040462032592994, iteration: 389675
loss: 1.0165678262710571,grad_norm: 0.909244681643761, iteration: 389676
loss: 1.139783263206482,grad_norm: 0.999999950269157, iteration: 389677
loss: 1.017518162727356,grad_norm: 0.7835604018098218, iteration: 389678
loss: 1.0167087316513062,grad_norm: 0.6886744668577127, iteration: 389679
loss: 1.0021216869354248,grad_norm: 0.7094931818881591, iteration: 389680
loss: 0.969242513179779,grad_norm: 0.8472760943483397, iteration: 389681
loss: 1.0230023860931396,grad_norm: 0.7804959719045296, iteration: 389682
loss: 1.0214829444885254,grad_norm: 0.9446278657256734, iteration: 389683
loss: 0.9993301033973694,grad_norm: 0.8120392853377516, iteration: 389684
loss: 1.0669430494308472,grad_norm: 0.7869112392260715, iteration: 389685
loss: 0.9687963724136353,grad_norm: 0.6782033486677175, iteration: 389686
loss: 1.0106407403945923,grad_norm: 0.9999991489141915, iteration: 389687
loss: 0.9705591201782227,grad_norm: 0.9451740300166922, iteration: 389688
loss: 0.9690818786621094,grad_norm: 0.9114153315276494, iteration: 389689
loss: 0.9992373585700989,grad_norm: 0.7738578619487417, iteration: 389690
loss: 1.0051331520080566,grad_norm: 0.7596476563293617, iteration: 389691
loss: 0.9642100930213928,grad_norm: 0.7504895264295217, iteration: 389692
loss: 0.9605141282081604,grad_norm: 0.999999079912729, iteration: 389693
loss: 0.9925008416175842,grad_norm: 0.7686977798511259, iteration: 389694
loss: 0.9818399548530579,grad_norm: 0.653335237141397, iteration: 389695
loss: 0.9775447249412537,grad_norm: 0.7234037045681093, iteration: 389696
loss: 0.9726405739784241,grad_norm: 0.7118059231586439, iteration: 389697
loss: 1.0003315210342407,grad_norm: 0.7868492355391443, iteration: 389698
loss: 1.0139646530151367,grad_norm: 0.8056089622930312, iteration: 389699
loss: 0.997270941734314,grad_norm: 0.7753463798141182, iteration: 389700
loss: 1.0127639770507812,grad_norm: 0.9999997444626546, iteration: 389701
loss: 1.0117976665496826,grad_norm: 0.8327432901611446, iteration: 389702
loss: 1.0125732421875,grad_norm: 0.7325619478909379, iteration: 389703
loss: 1.0521414279937744,grad_norm: 0.894907218669484, iteration: 389704
loss: 1.0909366607666016,grad_norm: 0.8683168382350647, iteration: 389705
loss: 0.9878392815589905,grad_norm: 0.7829301006020134, iteration: 389706
loss: 1.0137684345245361,grad_norm: 0.9999993274188973, iteration: 389707
loss: 1.0195192098617554,grad_norm: 0.8705979837310885, iteration: 389708
loss: 0.9908303022384644,grad_norm: 0.6598421191464398, iteration: 389709
loss: 0.997357964515686,grad_norm: 0.7014214012272094, iteration: 389710
loss: 1.022311806678772,grad_norm: 0.7945207092686009, iteration: 389711
loss: 0.9932566285133362,grad_norm: 0.7821380091366668, iteration: 389712
loss: 0.9410434365272522,grad_norm: 0.8516793861277403, iteration: 389713
loss: 0.9709203839302063,grad_norm: 0.7849412761169718, iteration: 389714
loss: 0.9808462858200073,grad_norm: 0.8544704233519709, iteration: 389715
loss: 1.0034091472625732,grad_norm: 0.8948404904574194, iteration: 389716
loss: 0.9769933223724365,grad_norm: 0.7689949233719194, iteration: 389717
loss: 1.0009227991104126,grad_norm: 0.8558556737812478, iteration: 389718
loss: 0.9997164607048035,grad_norm: 0.7873326679621214, iteration: 389719
loss: 0.9459084272384644,grad_norm: 0.9999996746774313, iteration: 389720
loss: 1.0507155656814575,grad_norm: 0.8054022287945575, iteration: 389721
loss: 0.9887546300888062,grad_norm: 0.8378604657305785, iteration: 389722
loss: 1.0052282810211182,grad_norm: 0.8493020763062882, iteration: 389723
loss: 0.9778780341148376,grad_norm: 0.7237843850344413, iteration: 389724
loss: 1.0225346088409424,grad_norm: 0.9058409245626156, iteration: 389725
loss: 1.0675158500671387,grad_norm: 0.9999992716204841, iteration: 389726
loss: 0.995704710483551,grad_norm: 0.6019888825792219, iteration: 389727
loss: 1.027231216430664,grad_norm: 0.9105977027356229, iteration: 389728
loss: 0.9827038645744324,grad_norm: 0.7918863222858024, iteration: 389729
loss: 0.9986793398857117,grad_norm: 0.725462769603666, iteration: 389730
loss: 0.9697109460830688,grad_norm: 0.7989307257102249, iteration: 389731
loss: 1.0097694396972656,grad_norm: 0.7272684161271807, iteration: 389732
loss: 1.052445888519287,grad_norm: 0.9999992879482428, iteration: 389733
loss: 0.9862722158432007,grad_norm: 0.602105221081028, iteration: 389734
loss: 0.9865950345993042,grad_norm: 0.9288185663106495, iteration: 389735
loss: 1.0247477293014526,grad_norm: 0.7803702362967558, iteration: 389736
loss: 0.9842761754989624,grad_norm: 0.79940089553125, iteration: 389737
loss: 1.029088020324707,grad_norm: 0.9999990097513229, iteration: 389738
loss: 0.9961503744125366,grad_norm: 0.7069456529023951, iteration: 389739
loss: 0.9720556735992432,grad_norm: 0.8672174854369675, iteration: 389740
loss: 1.0577688217163086,grad_norm: 0.9999995265461937, iteration: 389741
loss: 1.0106703042984009,grad_norm: 0.8675544724426647, iteration: 389742
loss: 0.9688382148742676,grad_norm: 0.8609606818233733, iteration: 389743
loss: 1.0137776136398315,grad_norm: 0.9999997342325297, iteration: 389744
loss: 1.1412688493728638,grad_norm: 0.9999992220618454, iteration: 389745
loss: 1.0021207332611084,grad_norm: 0.9165309056519557, iteration: 389746
loss: 0.9885737895965576,grad_norm: 0.7387985779728664, iteration: 389747
loss: 1.014256477355957,grad_norm: 0.7616202936361098, iteration: 389748
loss: 1.0455694198608398,grad_norm: 0.9999997750740909, iteration: 389749
loss: 1.0325943231582642,grad_norm: 0.9222786020884705, iteration: 389750
loss: 0.9918695688247681,grad_norm: 0.70703796276146, iteration: 389751
loss: 0.9788112044334412,grad_norm: 0.529376185258305, iteration: 389752
loss: 0.9881943464279175,grad_norm: 0.6745495971268295, iteration: 389753
loss: 0.9703698754310608,grad_norm: 0.677916928343618, iteration: 389754
loss: 1.0001909732818604,grad_norm: 0.7164478722078044, iteration: 389755
loss: 1.0044193267822266,grad_norm: 0.7923280112648435, iteration: 389756
loss: 0.9910572171211243,grad_norm: 0.999999590391526, iteration: 389757
loss: 1.0093059539794922,grad_norm: 0.7466474233249749, iteration: 389758
loss: 0.9542827010154724,grad_norm: 0.8482315368232282, iteration: 389759
loss: 1.032474160194397,grad_norm: 0.8440007217341503, iteration: 389760
loss: 0.9871479868888855,grad_norm: 0.872425843024737, iteration: 389761
loss: 0.9930949211120605,grad_norm: 0.8235080499028857, iteration: 389762
loss: 0.9855676293373108,grad_norm: 0.9999995825809036, iteration: 389763
loss: 1.0328510999679565,grad_norm: 0.9999990380418678, iteration: 389764
loss: 1.0032678842544556,grad_norm: 0.9064024413849593, iteration: 389765
loss: 1.0011919736862183,grad_norm: 0.7778157215562131, iteration: 389766
loss: 0.9561871886253357,grad_norm: 0.7617403200487732, iteration: 389767
loss: 0.9771796464920044,grad_norm: 0.7323539004433702, iteration: 389768
loss: 1.0302067995071411,grad_norm: 0.7253477513917986, iteration: 389769
loss: 0.9950628876686096,grad_norm: 0.7829634773850006, iteration: 389770
loss: 0.9456859230995178,grad_norm: 0.8793701943690343, iteration: 389771
loss: 0.9986382126808167,grad_norm: 0.7148947884465738, iteration: 389772
loss: 0.9786888360977173,grad_norm: 0.8405355299184204, iteration: 389773
loss: 1.0120799541473389,grad_norm: 0.7757702157540995, iteration: 389774
loss: 1.0459660291671753,grad_norm: 0.6782673533768611, iteration: 389775
loss: 1.0070583820343018,grad_norm: 0.7848847868022241, iteration: 389776
loss: 0.9783561825752258,grad_norm: 0.9526148295076249, iteration: 389777
loss: 1.0008429288864136,grad_norm: 0.784992236339409, iteration: 389778
loss: 0.9455593228340149,grad_norm: 0.8262981861820836, iteration: 389779
loss: 0.9805151224136353,grad_norm: 0.8787799834512906, iteration: 389780
loss: 0.9674271941184998,grad_norm: 0.7764307664402167, iteration: 389781
loss: 1.0242269039154053,grad_norm: 0.9999998307815408, iteration: 389782
loss: 0.9544723033905029,grad_norm: 0.8625192946380132, iteration: 389783
loss: 1.0174362659454346,grad_norm: 0.7293860270861858, iteration: 389784
loss: 0.966684877872467,grad_norm: 0.8019950942846208, iteration: 389785
loss: 0.9724573493003845,grad_norm: 0.7129512675019286, iteration: 389786
loss: 0.9714230298995972,grad_norm: 0.8361492493851812, iteration: 389787
loss: 1.0190250873565674,grad_norm: 0.8000693263648317, iteration: 389788
loss: 0.9638234376907349,grad_norm: 0.7358796385828151, iteration: 389789
loss: 0.9930948615074158,grad_norm: 0.8913126894511049, iteration: 389790
loss: 0.9836105108261108,grad_norm: 0.8762023462187309, iteration: 389791
loss: 1.0283507108688354,grad_norm: 0.8945009525695933, iteration: 389792
loss: 0.9560941457748413,grad_norm: 0.6267160075365288, iteration: 389793
loss: 1.0272735357284546,grad_norm: 0.788346363822976, iteration: 389794
loss: 0.9826505780220032,grad_norm: 0.7504027789485211, iteration: 389795
loss: 0.9967246055603027,grad_norm: 0.9554530437699277, iteration: 389796
loss: 0.9722325205802917,grad_norm: 0.9326857621909289, iteration: 389797
loss: 0.988501787185669,grad_norm: 0.7229868556920335, iteration: 389798
loss: 0.9661412835121155,grad_norm: 0.7173731951329192, iteration: 389799
loss: 0.9649460911750793,grad_norm: 0.7825432794333133, iteration: 389800
loss: 0.9902631640434265,grad_norm: 0.7215610993396882, iteration: 389801
loss: 1.0115653276443481,grad_norm: 0.6945087558208554, iteration: 389802
loss: 1.0010619163513184,grad_norm: 0.8628249043461778, iteration: 389803
loss: 0.9717172384262085,grad_norm: 0.7588368649342658, iteration: 389804
loss: 1.0427788496017456,grad_norm: 0.8495488966700229, iteration: 389805
loss: 1.017664909362793,grad_norm: 0.7763363699597022, iteration: 389806
loss: 1.0761170387268066,grad_norm: 0.9195976768502542, iteration: 389807
loss: 0.9692901968955994,grad_norm: 0.6946776306065428, iteration: 389808
loss: 0.9920764565467834,grad_norm: 0.7991834880738182, iteration: 389809
loss: 0.9792560935020447,grad_norm: 0.9999990093360251, iteration: 389810
loss: 1.2259032726287842,grad_norm: 0.9999999224194616, iteration: 389811
loss: 0.9817971587181091,grad_norm: 0.999999242328595, iteration: 389812
loss: 1.0129778385162354,grad_norm: 0.7988298641524694, iteration: 389813
loss: 0.9827127456665039,grad_norm: 0.9999998005463229, iteration: 389814
loss: 0.9665019512176514,grad_norm: 0.7406854183877, iteration: 389815
loss: 1.0430257320404053,grad_norm: 0.7267189081050075, iteration: 389816
loss: 0.9942336082458496,grad_norm: 0.9129961229466748, iteration: 389817
loss: 0.9598655104637146,grad_norm: 0.7753813226088824, iteration: 389818
loss: 1.0120779275894165,grad_norm: 0.8618479927576221, iteration: 389819
loss: 0.9879672527313232,grad_norm: 0.7821700844323388, iteration: 389820
loss: 1.0050745010375977,grad_norm: 0.7117479595287135, iteration: 389821
loss: 1.0015243291854858,grad_norm: 0.7505798458946232, iteration: 389822
loss: 1.0103139877319336,grad_norm: 0.7985066369291756, iteration: 389823
loss: 1.0067392587661743,grad_norm: 0.7604755839135439, iteration: 389824
loss: 1.0030323266983032,grad_norm: 0.8473806753405332, iteration: 389825
loss: 1.090970516204834,grad_norm: 0.9999997731707352, iteration: 389826
loss: 1.046755313873291,grad_norm: 0.9999994636725842, iteration: 389827
loss: 0.9999828338623047,grad_norm: 0.8042357539319477, iteration: 389828
loss: 1.043375015258789,grad_norm: 0.8528132524705344, iteration: 389829
loss: 0.9974475502967834,grad_norm: 0.9133899091069019, iteration: 389830
loss: 1.0327361822128296,grad_norm: 0.9999996800702812, iteration: 389831
loss: 1.0002875328063965,grad_norm: 0.7323233937828685, iteration: 389832
loss: 0.9759124517440796,grad_norm: 0.7537699441310921, iteration: 389833
loss: 1.0163395404815674,grad_norm: 0.8708980399940534, iteration: 389834
loss: 1.040695071220398,grad_norm: 0.8359008776756347, iteration: 389835
loss: 0.970038652420044,grad_norm: 0.7992041061957054, iteration: 389836
loss: 0.9653251767158508,grad_norm: 0.626160211181909, iteration: 389837
loss: 1.0243690013885498,grad_norm: 0.7448624810455942, iteration: 389838
loss: 1.0737204551696777,grad_norm: 0.9999990193762791, iteration: 389839
loss: 1.0243617296218872,grad_norm: 0.6357079454303438, iteration: 389840
loss: 0.9758366346359253,grad_norm: 0.7153447704142286, iteration: 389841
loss: 0.9901955127716064,grad_norm: 0.7349083568043737, iteration: 389842
loss: 0.9987748265266418,grad_norm: 0.6643839146010526, iteration: 389843
loss: 1.0018384456634521,grad_norm: 0.9999991327144102, iteration: 389844
loss: 1.0027148723602295,grad_norm: 0.8092754166953777, iteration: 389845
loss: 1.0071529150009155,grad_norm: 0.9167010248379958, iteration: 389846
loss: 1.0252984762191772,grad_norm: 0.9999999807271757, iteration: 389847
loss: 0.9709091186523438,grad_norm: 0.9999992153130071, iteration: 389848
loss: 0.9838466048240662,grad_norm: 0.818940020582017, iteration: 389849
loss: 1.016626000404358,grad_norm: 0.726201621202882, iteration: 389850
loss: 1.0024681091308594,grad_norm: 0.7971726607355986, iteration: 389851
loss: 1.044089436531067,grad_norm: 0.8623665764097641, iteration: 389852
loss: 0.972204864025116,grad_norm: 0.649303506618289, iteration: 389853
loss: 1.1120281219482422,grad_norm: 0.9999998228006334, iteration: 389854
loss: 0.9490224719047546,grad_norm: 0.8866263738160218, iteration: 389855
loss: 1.0040513277053833,grad_norm: 0.754144132371456, iteration: 389856
loss: 1.1357669830322266,grad_norm: 0.8740708366853767, iteration: 389857
loss: 1.0244181156158447,grad_norm: 0.7373600743026287, iteration: 389858
loss: 1.1466318368911743,grad_norm: 0.8401622054515506, iteration: 389859
loss: 0.9740707278251648,grad_norm: 0.8155386477832336, iteration: 389860
loss: 1.095969557762146,grad_norm: 0.8254015481079249, iteration: 389861
loss: 1.0050920248031616,grad_norm: 0.8111314643851286, iteration: 389862
loss: 1.0259168148040771,grad_norm: 0.6555961710560738, iteration: 389863
loss: 1.0445157289505005,grad_norm: 0.9681778456601147, iteration: 389864
loss: 1.005775809288025,grad_norm: 0.880252930986721, iteration: 389865
loss: 1.0179898738861084,grad_norm: 0.9228911339376363, iteration: 389866
loss: 1.0180147886276245,grad_norm: 0.7567828365357796, iteration: 389867
loss: 1.071745753288269,grad_norm: 0.9999999461735836, iteration: 389868
loss: 1.0108078718185425,grad_norm: 0.599765327983831, iteration: 389869
loss: 0.9629307389259338,grad_norm: 0.8066239144528158, iteration: 389870
loss: 0.9764139652252197,grad_norm: 0.7786691805245728, iteration: 389871
loss: 1.071317195892334,grad_norm: 0.94092470829897, iteration: 389872
loss: 0.9783961772918701,grad_norm: 0.7411134956512936, iteration: 389873
loss: 0.9896625876426697,grad_norm: 0.8921921894505659, iteration: 389874
loss: 0.9740517139434814,grad_norm: 0.8538940360886058, iteration: 389875
loss: 0.9411787390708923,grad_norm: 0.7724687372085661, iteration: 389876
loss: 1.0375959873199463,grad_norm: 0.7168810345174149, iteration: 389877
loss: 0.9814484715461731,grad_norm: 0.7434579559889414, iteration: 389878
loss: 0.9868329763412476,grad_norm: 0.6549200107607094, iteration: 389879
loss: 1.0686882734298706,grad_norm: 0.8187371800780983, iteration: 389880
loss: 1.0154047012329102,grad_norm: 0.8987743253328048, iteration: 389881
loss: 1.0528297424316406,grad_norm: 0.8921075152051419, iteration: 389882
loss: 0.9596802592277527,grad_norm: 0.8262986656821337, iteration: 389883
loss: 0.9930415153503418,grad_norm: 0.751506153400179, iteration: 389884
loss: 0.9825687408447266,grad_norm: 0.7050849318288898, iteration: 389885
loss: 0.971893310546875,grad_norm: 0.633538513592922, iteration: 389886
loss: 0.9729446768760681,grad_norm: 0.7334871641219544, iteration: 389887
loss: 0.9701963663101196,grad_norm: 0.9327480598178494, iteration: 389888
loss: 0.9724982380867004,grad_norm: 0.5951612556917436, iteration: 389889
loss: 1.0710045099258423,grad_norm: 0.7814658978577792, iteration: 389890
loss: 0.997969388961792,grad_norm: 0.8049163631896622, iteration: 389891
loss: 1.0158863067626953,grad_norm: 0.9266993821022379, iteration: 389892
loss: 0.974354088306427,grad_norm: 0.9371477304307875, iteration: 389893
loss: 0.9833762049674988,grad_norm: 0.8831051970339662, iteration: 389894
loss: 0.9722266793251038,grad_norm: 0.8420047890384962, iteration: 389895
loss: 0.9658898115158081,grad_norm: 0.9279053236699584, iteration: 389896
loss: 1.0192866325378418,grad_norm: 0.900737562927447, iteration: 389897
loss: 1.0433261394500732,grad_norm: 0.9999991521463822, iteration: 389898
loss: 1.0065838098526,grad_norm: 0.6575850488040316, iteration: 389899
loss: 1.0051530599594116,grad_norm: 0.8494853046112697, iteration: 389900
loss: 1.0083301067352295,grad_norm: 0.7314418380818075, iteration: 389901
loss: 1.0154783725738525,grad_norm: 0.7778943130640799, iteration: 389902
loss: 1.0072866678237915,grad_norm: 0.9999994607612082, iteration: 389903
loss: 1.021426796913147,grad_norm: 0.9409630109294121, iteration: 389904
loss: 1.0368127822875977,grad_norm: 0.9999991965921683, iteration: 389905
loss: 1.0131902694702148,grad_norm: 0.9006080399708736, iteration: 389906
loss: 1.127882480621338,grad_norm: 0.929399902575288, iteration: 389907
loss: 0.9913190007209778,grad_norm: 0.806618199582584, iteration: 389908
loss: 1.0087459087371826,grad_norm: 0.7849218160132507, iteration: 389909
loss: 1.0101494789123535,grad_norm: 0.8321231006860654, iteration: 389910
loss: 1.0178790092468262,grad_norm: 0.9990586156427728, iteration: 389911
loss: 1.0130146741867065,grad_norm: 0.9999997509268582, iteration: 389912
loss: 0.9946988821029663,grad_norm: 0.8426755907926244, iteration: 389913
loss: 1.0126252174377441,grad_norm: 0.7554102852038055, iteration: 389914
loss: 1.0482571125030518,grad_norm: 0.999999713956815, iteration: 389915
loss: 1.0150765180587769,grad_norm: 0.9389143958632424, iteration: 389916
loss: 0.9628584980964661,grad_norm: 0.7242352068394747, iteration: 389917
loss: 1.015023112297058,grad_norm: 0.7738614614960426, iteration: 389918
loss: 1.024276852607727,grad_norm: 0.8205887211398404, iteration: 389919
loss: 0.966644287109375,grad_norm: 0.8018477310813857, iteration: 389920
loss: 0.9757413268089294,grad_norm: 0.8248902084125104, iteration: 389921
loss: 0.9752510190010071,grad_norm: 0.7386534958812885, iteration: 389922
loss: 1.030214786529541,grad_norm: 0.8200937179685028, iteration: 389923
loss: 0.9896037578582764,grad_norm: 0.8917598102784149, iteration: 389924
loss: 0.9834423065185547,grad_norm: 0.8695684458405345, iteration: 389925
loss: 0.9728416204452515,grad_norm: 0.8623819067956698, iteration: 389926
loss: 0.9868485331535339,grad_norm: 0.7008734146772148, iteration: 389927
loss: 1.0070350170135498,grad_norm: 0.7482032672655874, iteration: 389928
loss: 1.0003803968429565,grad_norm: 0.8070650281198288, iteration: 389929
loss: 0.9992620348930359,grad_norm: 0.7967792473151983, iteration: 389930
loss: 0.9918138384819031,grad_norm: 0.6298395057300624, iteration: 389931
loss: 1.0199096202850342,grad_norm: 0.8026373890983411, iteration: 389932
loss: 1.1120476722717285,grad_norm: 0.8061251866046092, iteration: 389933
loss: 1.0204224586486816,grad_norm: 0.7303391777390371, iteration: 389934
loss: 0.9946522116661072,grad_norm: 0.8277531921671734, iteration: 389935
loss: 1.0199265480041504,grad_norm: 0.7941677968914571, iteration: 389936
loss: 0.9908870458602905,grad_norm: 0.7171004275942234, iteration: 389937
loss: 0.9896835088729858,grad_norm: 0.7958551572544772, iteration: 389938
loss: 1.02536141872406,grad_norm: 0.928767762175814, iteration: 389939
loss: 0.981695830821991,grad_norm: 0.7311711689351926, iteration: 389940
loss: 1.0496162176132202,grad_norm: 0.9999998846690727, iteration: 389941
loss: 0.9631568789482117,grad_norm: 0.8062615991910954, iteration: 389942
loss: 1.0090742111206055,grad_norm: 0.9999991198534164, iteration: 389943
loss: 1.0695223808288574,grad_norm: 0.9691129410434595, iteration: 389944
loss: 1.0110520124435425,grad_norm: 0.7181632366211645, iteration: 389945
loss: 1.0032527446746826,grad_norm: 0.8947231984555535, iteration: 389946
loss: 0.951937198638916,grad_norm: 0.7476647528454042, iteration: 389947
loss: 1.1105625629425049,grad_norm: 0.6623055173554262, iteration: 389948
loss: 1.0173497200012207,grad_norm: 0.6804817124070187, iteration: 389949
loss: 0.9860039353370667,grad_norm: 0.9999989882319508, iteration: 389950
loss: 0.97776198387146,grad_norm: 0.834127127456286, iteration: 389951
loss: 1.015527606010437,grad_norm: 0.6941587433192691, iteration: 389952
loss: 0.9873103499412537,grad_norm: 0.7224496172610736, iteration: 389953
loss: 1.0099807977676392,grad_norm: 0.7268065979296233, iteration: 389954
loss: 1.0263208150863647,grad_norm: 0.7131260638538591, iteration: 389955
loss: 0.9566252827644348,grad_norm: 0.7720078356837478, iteration: 389956
loss: 0.9913694858551025,grad_norm: 0.8550854383141051, iteration: 389957
loss: 0.9907661080360413,grad_norm: 0.7351554545194003, iteration: 389958
loss: 0.9586729407310486,grad_norm: 0.8245837128503579, iteration: 389959
loss: 0.9962277412414551,grad_norm: 0.8017986565993936, iteration: 389960
loss: 1.0024316310882568,grad_norm: 0.7085767423904056, iteration: 389961
loss: 1.0176129341125488,grad_norm: 0.8210082091922436, iteration: 389962
loss: 1.044061303138733,grad_norm: 0.7649228536231375, iteration: 389963
loss: 1.0062429904937744,grad_norm: 0.8979999099629402, iteration: 389964
loss: 1.0950580835342407,grad_norm: 0.9716784669680631, iteration: 389965
loss: 0.9855291843414307,grad_norm: 0.7888059793572496, iteration: 389966
loss: 1.0442168712615967,grad_norm: 0.7774531778860958, iteration: 389967
loss: 0.9855250120162964,grad_norm: 0.68046635190402, iteration: 389968
loss: 1.0039341449737549,grad_norm: 0.8302468327819271, iteration: 389969
loss: 1.0479402542114258,grad_norm: 0.7571047424719313, iteration: 389970
loss: 0.9914445281028748,grad_norm: 0.7547835627357279, iteration: 389971
loss: 0.9669697284698486,grad_norm: 0.8286460400967501, iteration: 389972
loss: 0.9735344052314758,grad_norm: 0.8276430814093652, iteration: 389973
loss: 1.0145080089569092,grad_norm: 0.9298588376511489, iteration: 389974
loss: 1.047178864479065,grad_norm: 0.944366924068311, iteration: 389975
loss: 0.990665078163147,grad_norm: 0.9999990312643883, iteration: 389976
loss: 1.0084627866744995,grad_norm: 0.7700622533239221, iteration: 389977
loss: 0.9852964878082275,grad_norm: 0.6819811519126328, iteration: 389978
loss: 1.0050878524780273,grad_norm: 0.8962122209358416, iteration: 389979
loss: 1.0157133340835571,grad_norm: 0.8604700945248408, iteration: 389980
loss: 1.0154869556427002,grad_norm: 0.7717760795303966, iteration: 389981
loss: 0.9807698130607605,grad_norm: 0.8463861791436438, iteration: 389982
loss: 1.013981580734253,grad_norm: 0.7341620377667823, iteration: 389983
loss: 0.9585198163986206,grad_norm: 0.9818324872484483, iteration: 389984
loss: 0.9756336212158203,grad_norm: 0.999999126385309, iteration: 389985
loss: 0.9928930401802063,grad_norm: 0.7028482909224433, iteration: 389986
loss: 0.9797385334968567,grad_norm: 0.8505279568931261, iteration: 389987
loss: 0.9632925391197205,grad_norm: 0.6741190991048115, iteration: 389988
loss: 1.0478936433792114,grad_norm: 0.9407810707872564, iteration: 389989
loss: 1.017569899559021,grad_norm: 0.7221541839096491, iteration: 389990
loss: 0.9990323185920715,grad_norm: 0.7597184902236468, iteration: 389991
loss: 0.9955546855926514,grad_norm: 0.8934607770071091, iteration: 389992
loss: 1.0262552499771118,grad_norm: 0.939322990985498, iteration: 389993
loss: 1.0153321027755737,grad_norm: 0.9999998273061456, iteration: 389994
loss: 1.0256937742233276,grad_norm: 0.9999990633636314, iteration: 389995
loss: 1.0073039531707764,grad_norm: 0.7305468820910433, iteration: 389996
loss: 0.9526345133781433,grad_norm: 0.9339599330776798, iteration: 389997
loss: 0.9702387452125549,grad_norm: 0.8902226812480757, iteration: 389998
loss: 1.0134907960891724,grad_norm: 0.8128312186610993, iteration: 389999
loss: 1.0435905456542969,grad_norm: 0.815877163565857, iteration: 390000
Evaluating at step 390000
{'val': 0.994382455945015, 'test': 1.872380180480762}
loss: 1.0200074911117554,grad_norm: 0.9509462690525903, iteration: 390001
loss: 0.9968122839927673,grad_norm: 0.8722153722129242, iteration: 390002
loss: 1.0403746366500854,grad_norm: 0.9531481166649654, iteration: 390003
loss: 1.065664291381836,grad_norm: 0.9999991334248618, iteration: 390004
loss: 0.9752621650695801,grad_norm: 0.7551194337059081, iteration: 390005
loss: 0.9953230023384094,grad_norm: 0.9999991809123454, iteration: 390006
loss: 0.9850502610206604,grad_norm: 0.8936258739818606, iteration: 390007
loss: 0.9705642461776733,grad_norm: 0.9999995272195852, iteration: 390008
loss: 1.0437769889831543,grad_norm: 0.9390056987605511, iteration: 390009
loss: 0.9986148476600647,grad_norm: 0.8438836740513558, iteration: 390010
loss: 0.9984711408615112,grad_norm: 0.6525574878409253, iteration: 390011
loss: 0.9991569519042969,grad_norm: 0.7405908103810122, iteration: 390012
loss: 1.0915648937225342,grad_norm: 0.9962608370054138, iteration: 390013
loss: 1.0035655498504639,grad_norm: 0.8127385379002877, iteration: 390014
loss: 0.9687792658805847,grad_norm: 0.8140859234615238, iteration: 390015
loss: 0.9918948411941528,grad_norm: 0.6730794750066132, iteration: 390016
loss: 1.012250304222107,grad_norm: 0.8932526303734932, iteration: 390017
loss: 0.9894230961799622,grad_norm: 0.7638158317884108, iteration: 390018
loss: 1.0098075866699219,grad_norm: 0.8826721351773282, iteration: 390019
loss: 1.008769154548645,grad_norm: 0.8369042776454767, iteration: 390020
loss: 1.0916259288787842,grad_norm: 0.7782397083522349, iteration: 390021
loss: 0.9751536846160889,grad_norm: 0.7671646636551809, iteration: 390022
loss: 0.9808188676834106,grad_norm: 0.8416035262949251, iteration: 390023
loss: 0.9925951361656189,grad_norm: 0.7091079622144221, iteration: 390024
loss: 1.0204946994781494,grad_norm: 0.8748449331692558, iteration: 390025
loss: 1.0183485746383667,grad_norm: 0.7551803291217029, iteration: 390026
loss: 0.9720209240913391,grad_norm: 0.755605417557426, iteration: 390027
loss: 0.9902095198631287,grad_norm: 0.7166306678396406, iteration: 390028
loss: 0.9827266335487366,grad_norm: 0.7034877270064904, iteration: 390029
loss: 0.9887780547142029,grad_norm: 0.7463257770556406, iteration: 390030
loss: 1.0106704235076904,grad_norm: 0.6276744922512711, iteration: 390031
loss: 0.9733135104179382,grad_norm: 0.9999995461782047, iteration: 390032
loss: 1.009755253791809,grad_norm: 1.0000000405823033, iteration: 390033
loss: 0.9579771161079407,grad_norm: 0.7544229404608193, iteration: 390034
loss: 1.0091570615768433,grad_norm: 0.8033466763009571, iteration: 390035
loss: 1.0121848583221436,grad_norm: 0.7829187769194661, iteration: 390036
loss: 1.0000263452529907,grad_norm: 0.9367941486109066, iteration: 390037
loss: 0.9887068867683411,grad_norm: 0.8772623745334648, iteration: 390038
loss: 1.002394199371338,grad_norm: 0.9503287453393502, iteration: 390039
loss: 1.0102289915084839,grad_norm: 0.7824039245626382, iteration: 390040
loss: 0.9777137041091919,grad_norm: 0.7191871160993131, iteration: 390041
loss: 0.981748640537262,grad_norm: 0.7350705622570078, iteration: 390042
loss: 1.0122287273406982,grad_norm: 0.8019297643567743, iteration: 390043
loss: 0.9686695337295532,grad_norm: 0.8037568423909743, iteration: 390044
loss: 0.983528733253479,grad_norm: 0.8151836164684477, iteration: 390045
loss: 0.9793035984039307,grad_norm: 0.9409228031570145, iteration: 390046
loss: 1.0125973224639893,grad_norm: 0.6874751324705991, iteration: 390047
loss: 0.9867061972618103,grad_norm: 0.9695993585091656, iteration: 390048
loss: 1.0185071229934692,grad_norm: 0.7225512961212764, iteration: 390049
loss: 1.0581974983215332,grad_norm: 0.9999996779082895, iteration: 390050
loss: 1.0242953300476074,grad_norm: 0.8215631245327029, iteration: 390051
loss: 0.9915544390678406,grad_norm: 0.8975208056085853, iteration: 390052
loss: 1.0239086151123047,grad_norm: 0.7810273627220368, iteration: 390053
loss: 1.0097676515579224,grad_norm: 0.8810908288991436, iteration: 390054
loss: 1.005521297454834,grad_norm: 0.7365637346760927, iteration: 390055
loss: 0.9929970502853394,grad_norm: 0.9999996705878297, iteration: 390056
loss: 0.9667679667472839,grad_norm: 0.8993259511696869, iteration: 390057
loss: 1.049055576324463,grad_norm: 0.9999995700226273, iteration: 390058
loss: 0.9794965982437134,grad_norm: 0.9999991964707938, iteration: 390059
loss: 1.0329440832138062,grad_norm: 0.8296242326185004, iteration: 390060
loss: 0.9520906209945679,grad_norm: 0.912746165320831, iteration: 390061
loss: 1.0140306949615479,grad_norm: 0.7238605199410899, iteration: 390062
loss: 1.0116432905197144,grad_norm: 0.8193693894499432, iteration: 390063
loss: 1.0573318004608154,grad_norm: 0.780362451066955, iteration: 390064
loss: 1.0001838207244873,grad_norm: 0.7945090890270873, iteration: 390065
loss: 1.0707635879516602,grad_norm: 0.999999399710476, iteration: 390066
loss: 1.0147713422775269,grad_norm: 0.6817138939395067, iteration: 390067
loss: 0.9884583353996277,grad_norm: 0.7992414330960175, iteration: 390068
loss: 0.9942190647125244,grad_norm: 0.7538566908735186, iteration: 390069
loss: 1.0506315231323242,grad_norm: 0.7959270244942993, iteration: 390070
loss: 1.0640255212783813,grad_norm: 0.9999990673304257, iteration: 390071
loss: 0.9853662848472595,grad_norm: 0.948509358427062, iteration: 390072
loss: 1.0350897312164307,grad_norm: 0.8944067015864604, iteration: 390073
loss: 0.9948500990867615,grad_norm: 0.757587226064824, iteration: 390074
loss: 0.9874707460403442,grad_norm: 0.8259122077734475, iteration: 390075
loss: 1.0214366912841797,grad_norm: 0.9690082561941782, iteration: 390076
loss: 0.9967290163040161,grad_norm: 0.7214687679016181, iteration: 390077
loss: 1.0972697734832764,grad_norm: 0.9612451660199762, iteration: 390078
loss: 1.00087308883667,grad_norm: 0.965128207857009, iteration: 390079
loss: 0.9763398170471191,grad_norm: 0.8626621902031097, iteration: 390080
loss: 1.001210331916809,grad_norm: 0.5620993567941063, iteration: 390081
loss: 1.100947380065918,grad_norm: 0.9631940676924178, iteration: 390082
loss: 0.9966045022010803,grad_norm: 0.9128433141023924, iteration: 390083
loss: 0.9622954726219177,grad_norm: 0.6994098054488724, iteration: 390084
loss: 1.016829013824463,grad_norm: 0.6871900802557146, iteration: 390085
loss: 0.9723840951919556,grad_norm: 0.7347487060129928, iteration: 390086
loss: 1.0088857412338257,grad_norm: 0.7645481982458937, iteration: 390087
loss: 1.0107258558273315,grad_norm: 0.807604012964455, iteration: 390088
loss: 1.036645770072937,grad_norm: 0.9118188841897574, iteration: 390089
loss: 0.9844186305999756,grad_norm: 0.7598040667543712, iteration: 390090
loss: 1.019020676612854,grad_norm: 0.8092069713232821, iteration: 390091
loss: 0.9859809279441833,grad_norm: 0.7306570807164231, iteration: 390092
loss: 1.0511972904205322,grad_norm: 0.7594129973934931, iteration: 390093
loss: 1.0146851539611816,grad_norm: 0.9999996332103923, iteration: 390094
loss: 1.0708870887756348,grad_norm: 0.8238189418375443, iteration: 390095
loss: 1.0145940780639648,grad_norm: 0.8879096257145279, iteration: 390096
loss: 1.012994408607483,grad_norm: 0.7430945910737419, iteration: 390097
loss: 1.0123438835144043,grad_norm: 0.7312592687894226, iteration: 390098
loss: 1.012538194656372,grad_norm: 0.6921499216273602, iteration: 390099
loss: 1.0428582429885864,grad_norm: 0.9999997409989028, iteration: 390100
loss: 0.9882808327674866,grad_norm: 0.6274037179791294, iteration: 390101
loss: 0.9968439340591431,grad_norm: 0.6301555923479891, iteration: 390102
loss: 1.0274927616119385,grad_norm: 0.9101815483211699, iteration: 390103
loss: 1.0062202215194702,grad_norm: 0.674633492582268, iteration: 390104
loss: 0.977134644985199,grad_norm: 0.9655898023394185, iteration: 390105
loss: 0.9790441393852234,grad_norm: 0.9095544977310482, iteration: 390106
loss: 1.0286962985992432,grad_norm: 0.9127939707895288, iteration: 390107
loss: 0.9771228432655334,grad_norm: 0.7610146852967103, iteration: 390108
loss: 1.138749361038208,grad_norm: 0.9999990351091126, iteration: 390109
loss: 1.0228931903839111,grad_norm: 0.7239446793167063, iteration: 390110
loss: 0.9978170394897461,grad_norm: 0.6591727315610405, iteration: 390111
loss: 1.0539791584014893,grad_norm: 0.9999998697474382, iteration: 390112
loss: 0.9535331130027771,grad_norm: 0.687363231952698, iteration: 390113
loss: 1.0461326837539673,grad_norm: 0.886587329671175, iteration: 390114
loss: 0.99746173620224,grad_norm: 0.7121677246388508, iteration: 390115
loss: 1.0155889987945557,grad_norm: 0.7691316076512857, iteration: 390116
loss: 1.0125746726989746,grad_norm: 0.8841393523444827, iteration: 390117
loss: 1.0106867551803589,grad_norm: 0.8481921987651797, iteration: 390118
loss: 1.0401662588119507,grad_norm: 0.8629137137780368, iteration: 390119
loss: 1.0086817741394043,grad_norm: 0.766435405838415, iteration: 390120
loss: 0.9616031050682068,grad_norm: 0.6662009759555457, iteration: 390121
loss: 1.01492178440094,grad_norm: 0.964110014224243, iteration: 390122
loss: 1.0028629302978516,grad_norm: 0.756510115722248, iteration: 390123
loss: 1.0077576637268066,grad_norm: 0.7512392099702988, iteration: 390124
loss: 1.1212390661239624,grad_norm: 0.8295286850900627, iteration: 390125
loss: 1.0046281814575195,grad_norm: 0.7951991512839957, iteration: 390126
loss: 1.007158637046814,grad_norm: 0.7737850456474133, iteration: 390127
loss: 1.0253885984420776,grad_norm: 0.8086129801887713, iteration: 390128
loss: 0.9802528619766235,grad_norm: 0.8454571297888669, iteration: 390129
loss: 0.9851008057594299,grad_norm: 0.680924248629369, iteration: 390130
loss: 1.0128260850906372,grad_norm: 0.6948052856143823, iteration: 390131
loss: 1.0205248594284058,grad_norm: 0.6614409456868907, iteration: 390132
loss: 1.0273133516311646,grad_norm: 0.836171530879215, iteration: 390133
loss: 0.9944201707839966,grad_norm: 0.7727943914870948, iteration: 390134
loss: 1.054213523864746,grad_norm: 0.9999991494657899, iteration: 390135
loss: 1.000483751296997,grad_norm: 0.7295264776573392, iteration: 390136
loss: 1.001535177230835,grad_norm: 0.8259372402418134, iteration: 390137
loss: 0.9678647518157959,grad_norm: 0.8229070665702545, iteration: 390138
loss: 1.0311435461044312,grad_norm: 0.9259493790827937, iteration: 390139
loss: 1.1263792514801025,grad_norm: 0.9999990328478784, iteration: 390140
loss: 1.0397613048553467,grad_norm: 0.8231155141972347, iteration: 390141
loss: 1.102584958076477,grad_norm: 0.999999770840932, iteration: 390142
loss: 0.9912253022193909,grad_norm: 0.8047300447071771, iteration: 390143
loss: 1.0152419805526733,grad_norm: 0.81426523058335, iteration: 390144
loss: 1.008374810218811,grad_norm: 0.7307676322367057, iteration: 390145
loss: 0.9945746660232544,grad_norm: 0.7637239944249622, iteration: 390146
loss: 1.0066677331924438,grad_norm: 0.8082718145826028, iteration: 390147
loss: 0.9718953967094421,grad_norm: 0.803147751883317, iteration: 390148
loss: 0.9635114669799805,grad_norm: 0.8369944400303186, iteration: 390149
loss: 0.9888545870780945,grad_norm: 0.9134157266284613, iteration: 390150
loss: 1.0035649538040161,grad_norm: 0.8276159929487712, iteration: 390151
loss: 1.0242358446121216,grad_norm: 0.9201904206634848, iteration: 390152
loss: 0.9743893146514893,grad_norm: 0.8612703778075181, iteration: 390153
loss: 0.9825500249862671,grad_norm: 0.6418559466158976, iteration: 390154
loss: 0.9844523668289185,grad_norm: 0.769884754784818, iteration: 390155
loss: 0.938413679599762,grad_norm: 0.7096638781741741, iteration: 390156
loss: 1.0152066946029663,grad_norm: 0.7041733553154059, iteration: 390157
loss: 1.0046663284301758,grad_norm: 0.8852941032423224, iteration: 390158
loss: 0.9749953150749207,grad_norm: 0.8059979550461701, iteration: 390159
loss: 1.0250931978225708,grad_norm: 0.6051702328577854, iteration: 390160
loss: 1.0056097507476807,grad_norm: 0.6888276697098763, iteration: 390161
loss: 1.0196729898452759,grad_norm: 0.8912821244878423, iteration: 390162
loss: 0.9987491369247437,grad_norm: 0.8046062454069971, iteration: 390163
loss: 1.0871042013168335,grad_norm: 0.9999991289519363, iteration: 390164
loss: 0.9749784469604492,grad_norm: 0.8205252278714411, iteration: 390165
loss: 0.9912391901016235,grad_norm: 0.7039130052842073, iteration: 390166
loss: 1.0012398958206177,grad_norm: 0.8595730455542139, iteration: 390167
loss: 1.0793648958206177,grad_norm: 0.999999450861181, iteration: 390168
loss: 1.034574270248413,grad_norm: 0.7969513346612261, iteration: 390169
loss: 0.9969711303710938,grad_norm: 0.7704031144783506, iteration: 390170
loss: 1.0266776084899902,grad_norm: 0.7507178300854062, iteration: 390171
loss: 0.9872572422027588,grad_norm: 0.9999991447436243, iteration: 390172
loss: 0.9950053691864014,grad_norm: 0.6305261737579106, iteration: 390173
loss: 0.9794319868087769,grad_norm: 0.8735232538440597, iteration: 390174
loss: 0.9855967164039612,grad_norm: 0.6255858844586568, iteration: 390175
loss: 1.0143507719039917,grad_norm: 0.9999991786938033, iteration: 390176
loss: 1.0715513229370117,grad_norm: 0.9999997638010822, iteration: 390177
loss: 1.0047645568847656,grad_norm: 0.7859948493758506, iteration: 390178
loss: 0.9514010548591614,grad_norm: 0.7302368181173808, iteration: 390179
loss: 1.0025053024291992,grad_norm: 0.7117037456865618, iteration: 390180
loss: 0.9837753176689148,grad_norm: 0.8022514394686947, iteration: 390181
loss: 1.013056993484497,grad_norm: 0.6996243213327767, iteration: 390182
loss: 0.9782998561859131,grad_norm: 0.8716201523312586, iteration: 390183
loss: 1.0377001762390137,grad_norm: 0.6594952682974565, iteration: 390184
loss: 0.9836323857307434,grad_norm: 0.728347246234066, iteration: 390185
loss: 0.9882422685623169,grad_norm: 0.6953250871123192, iteration: 390186
loss: 0.9736961722373962,grad_norm: 0.8438475814126173, iteration: 390187
loss: 1.008316993713379,grad_norm: 0.8129101310047198, iteration: 390188
loss: 0.9624629616737366,grad_norm: 0.8797657711031135, iteration: 390189
loss: 0.9695510268211365,grad_norm: 0.8999896434944198, iteration: 390190
loss: 0.9681037068367004,grad_norm: 0.7362943806956798, iteration: 390191
loss: 0.9995298385620117,grad_norm: 0.8098235448758692, iteration: 390192
loss: 0.9826058745384216,grad_norm: 0.608953246092204, iteration: 390193
loss: 1.0209684371948242,grad_norm: 0.7857121689844954, iteration: 390194
loss: 1.0351223945617676,grad_norm: 0.9999995156352521, iteration: 390195
loss: 0.9511219263076782,grad_norm: 0.7823723940926275, iteration: 390196
loss: 1.018246054649353,grad_norm: 0.8120781541131434, iteration: 390197
loss: 1.041722059249878,grad_norm: 0.685846608262969, iteration: 390198
loss: 0.9906654357910156,grad_norm: 0.9999993950496885, iteration: 390199
loss: 0.9982783794403076,grad_norm: 0.9773084145528365, iteration: 390200
loss: 1.1401463747024536,grad_norm: 0.9999999588185541, iteration: 390201
loss: 1.0126912593841553,grad_norm: 0.9999991955223165, iteration: 390202
loss: 1.090325117111206,grad_norm: 0.999999869440195, iteration: 390203
loss: 0.9796432256698608,grad_norm: 0.803299094456652, iteration: 390204
loss: 0.9992244839668274,grad_norm: 0.7941823044592133, iteration: 390205
loss: 1.0016899108886719,grad_norm: 0.7806229319716569, iteration: 390206
loss: 1.0388600826263428,grad_norm: 0.7234336687006964, iteration: 390207
loss: 0.9652977585792542,grad_norm: 0.8511147607929771, iteration: 390208
loss: 0.996268630027771,grad_norm: 0.8433165391057056, iteration: 390209
loss: 1.0006436109542847,grad_norm: 0.9999991249341503, iteration: 390210
loss: 0.9828521609306335,grad_norm: 0.8526333746883322, iteration: 390211
loss: 1.0104600191116333,grad_norm: 0.7055975281798552, iteration: 390212
loss: 1.011497139930725,grad_norm: 0.8529187957385292, iteration: 390213
loss: 1.073185920715332,grad_norm: 0.9999996143673011, iteration: 390214
loss: 0.959877073764801,grad_norm: 0.7854778731845913, iteration: 390215
loss: 1.0044819116592407,grad_norm: 0.9999991752256151, iteration: 390216
loss: 0.9739686846733093,grad_norm: 0.718747382103644, iteration: 390217
loss: 1.035231113433838,grad_norm: 0.9999994181947697, iteration: 390218
loss: 1.0139272212982178,grad_norm: 0.7941231512693255, iteration: 390219
loss: 1.05295729637146,grad_norm: 0.7441915995635413, iteration: 390220
loss: 1.0172264575958252,grad_norm: 0.6360014169479998, iteration: 390221
loss: 0.9859398007392883,grad_norm: 0.7906845566425087, iteration: 390222
loss: 0.988044798374176,grad_norm: 0.6358675119244598, iteration: 390223
loss: 0.971934974193573,grad_norm: 0.7768393462466602, iteration: 390224
loss: 0.9647940993309021,grad_norm: 0.8204947820706406, iteration: 390225
loss: 1.0040669441223145,grad_norm: 0.8373835915229858, iteration: 390226
loss: 0.9965842962265015,grad_norm: 0.7413321139741189, iteration: 390227
loss: 0.9714580774307251,grad_norm: 0.999999313279059, iteration: 390228
loss: 0.9931897521018982,grad_norm: 0.9355163081827949, iteration: 390229
loss: 1.0339419841766357,grad_norm: 0.7465948258079406, iteration: 390230
loss: 0.9681684374809265,grad_norm: 0.7670790321712231, iteration: 390231
loss: 0.9507412314414978,grad_norm: 0.8254672497584689, iteration: 390232
loss: 0.955836832523346,grad_norm: 0.6775579915369865, iteration: 390233
loss: 1.0087705850601196,grad_norm: 0.7043090576500453, iteration: 390234
loss: 0.9932951331138611,grad_norm: 0.7707417501364391, iteration: 390235
loss: 1.0580823421478271,grad_norm: 0.7351930085870773, iteration: 390236
loss: 1.014026165008545,grad_norm: 0.9019644067527097, iteration: 390237
loss: 0.9903110265731812,grad_norm: 0.8353967130987122, iteration: 390238
loss: 0.9896305799484253,grad_norm: 0.8127623422914859, iteration: 390239
loss: 0.9849246740341187,grad_norm: 0.6895966906714223, iteration: 390240
loss: 1.0563420057296753,grad_norm: 0.750382464959532, iteration: 390241
loss: 1.0615681409835815,grad_norm: 0.9999989635790076, iteration: 390242
loss: 1.0566033124923706,grad_norm: 0.9999996546319638, iteration: 390243
loss: 0.996886134147644,grad_norm: 0.9699923681320991, iteration: 390244
loss: 1.0386793613433838,grad_norm: 0.9949420502778288, iteration: 390245
loss: 1.0057051181793213,grad_norm: 0.8116328712485937, iteration: 390246
loss: 1.0223876237869263,grad_norm: 0.7439747059540212, iteration: 390247
loss: 1.0019980669021606,grad_norm: 0.8203179816376684, iteration: 390248
loss: 0.9876216650009155,grad_norm: 0.9886835695009234, iteration: 390249
loss: 1.0002977848052979,grad_norm: 0.7148121540048291, iteration: 390250
loss: 0.9836758375167847,grad_norm: 0.6744779920113199, iteration: 390251
loss: 0.9860259890556335,grad_norm: 0.9505378894911412, iteration: 390252
loss: 1.060947299003601,grad_norm: 0.9999998885587302, iteration: 390253
loss: 1.0038650035858154,grad_norm: 0.7014186177439621, iteration: 390254
loss: 0.989677369594574,grad_norm: 0.819523259366904, iteration: 390255
loss: 0.9645581841468811,grad_norm: 0.8429362767540659, iteration: 390256
loss: 1.0020060539245605,grad_norm: 0.9999993723344502, iteration: 390257
loss: 1.0192890167236328,grad_norm: 0.867417528554679, iteration: 390258
loss: 0.9905176162719727,grad_norm: 0.8314487362358917, iteration: 390259
loss: 1.0028462409973145,grad_norm: 0.9094498763968583, iteration: 390260
loss: 1.0081067085266113,grad_norm: 0.7678740633644102, iteration: 390261
loss: 0.9765681624412537,grad_norm: 0.7032520808972106, iteration: 390262
loss: 1.0008177757263184,grad_norm: 0.7031766299087551, iteration: 390263
loss: 1.0214200019836426,grad_norm: 0.8543501681796244, iteration: 390264
loss: 1.0089460611343384,grad_norm: 0.9999990013273224, iteration: 390265
loss: 1.0203304290771484,grad_norm: 0.7639148390671889, iteration: 390266
loss: 0.9920545816421509,grad_norm: 0.7204063857464517, iteration: 390267
loss: 1.0101749897003174,grad_norm: 0.7548458314559319, iteration: 390268
loss: 1.0236495733261108,grad_norm: 0.8246013234070557, iteration: 390269
loss: 0.9567302465438843,grad_norm: 0.7360431846604105, iteration: 390270
loss: 1.002223014831543,grad_norm: 0.7422972856667185, iteration: 390271
loss: 0.9992868900299072,grad_norm: 0.8189531175854617, iteration: 390272
loss: 1.035251498222351,grad_norm: 0.7587389235540434, iteration: 390273
loss: 0.9742623567581177,grad_norm: 0.746273305205089, iteration: 390274
loss: 0.9768245816230774,grad_norm: 0.6762399262380765, iteration: 390275
loss: 1.0066132545471191,grad_norm: 0.7370525929961316, iteration: 390276
loss: 0.9933744668960571,grad_norm: 0.7214649055133138, iteration: 390277
loss: 1.0343507528305054,grad_norm: 0.7461662053281036, iteration: 390278
loss: 1.0362107753753662,grad_norm: 0.7651922929621102, iteration: 390279
loss: 0.9455959796905518,grad_norm: 0.7552952416353172, iteration: 390280
loss: 1.0105661153793335,grad_norm: 0.8458556946317665, iteration: 390281
loss: 0.9856290817260742,grad_norm: 0.7792967874423632, iteration: 390282
loss: 1.0668511390686035,grad_norm: 0.9999999849266371, iteration: 390283
loss: 1.0080821514129639,grad_norm: 0.8811914209135099, iteration: 390284
loss: 1.0274953842163086,grad_norm: 0.8983362868936745, iteration: 390285
loss: 0.9989885687828064,grad_norm: 0.7890662341922556, iteration: 390286
loss: 0.9407457113265991,grad_norm: 0.7878928458361878, iteration: 390287
loss: 0.984691321849823,grad_norm: 0.7654776955722506, iteration: 390288
loss: 0.9418964982032776,grad_norm: 0.7296364886801261, iteration: 390289
loss: 0.9819674491882324,grad_norm: 0.884098267352324, iteration: 390290
loss: 1.0139210224151611,grad_norm: 0.7219157496348272, iteration: 390291
loss: 0.9783834218978882,grad_norm: 0.7613271339240302, iteration: 390292
loss: 1.0454553365707397,grad_norm: 0.9773641223810899, iteration: 390293
loss: 0.9993896484375,grad_norm: 0.7949599094046811, iteration: 390294
loss: 1.022933006286621,grad_norm: 0.9048623561517924, iteration: 390295
loss: 1.0158021450042725,grad_norm: 0.999999665550253, iteration: 390296
loss: 0.9551528692245483,grad_norm: 0.8767153676252111, iteration: 390297
loss: 1.0236585140228271,grad_norm: 0.8098919094731657, iteration: 390298
loss: 1.0311009883880615,grad_norm: 0.8400051370008617, iteration: 390299
loss: 1.0052711963653564,grad_norm: 0.7880142811568837, iteration: 390300
loss: 1.0341880321502686,grad_norm: 0.701164383377382, iteration: 390301
loss: 0.9750745892524719,grad_norm: 0.7435116457981711, iteration: 390302
loss: 1.2443634271621704,grad_norm: 0.9999996549260961, iteration: 390303
loss: 0.9756870269775391,grad_norm: 0.9775175137848982, iteration: 390304
loss: 0.9959866404533386,grad_norm: 0.7856112649768183, iteration: 390305
loss: 1.0324950218200684,grad_norm: 0.6836436863923152, iteration: 390306
loss: 1.0250513553619385,grad_norm: 0.7734955125113674, iteration: 390307
loss: 1.0481840372085571,grad_norm: 0.7613534311616345, iteration: 390308
loss: 1.0079717636108398,grad_norm: 0.7523630352971526, iteration: 390309
loss: 1.0261633396148682,grad_norm: 0.7853627592128822, iteration: 390310
loss: 0.9659663438796997,grad_norm: 0.8312968834151514, iteration: 390311
loss: 1.0085678100585938,grad_norm: 0.6817412943578899, iteration: 390312
loss: 0.9398851990699768,grad_norm: 0.7093552474400598, iteration: 390313
loss: 1.0274103879928589,grad_norm: 0.795746161852264, iteration: 390314
loss: 1.0769871473312378,grad_norm: 0.999999791892925, iteration: 390315
loss: 1.0176901817321777,grad_norm: 0.9999993337308829, iteration: 390316
loss: 1.0432707071304321,grad_norm: 0.7682514879237644, iteration: 390317
loss: 0.9509530067443848,grad_norm: 0.7848017858640529, iteration: 390318
loss: 1.0083038806915283,grad_norm: 0.7276743161314723, iteration: 390319
loss: 1.0182881355285645,grad_norm: 0.9999993279144319, iteration: 390320
loss: 1.0107457637786865,grad_norm: 0.6839134703760594, iteration: 390321
loss: 0.998039186000824,grad_norm: 0.795776795468229, iteration: 390322
loss: 0.9822186827659607,grad_norm: 0.7128473071196122, iteration: 390323
loss: 0.9700947999954224,grad_norm: 0.8538575636212004, iteration: 390324
loss: 1.0159404277801514,grad_norm: 0.9076371954341003, iteration: 390325
loss: 1.0164823532104492,grad_norm: 0.9536507090043006, iteration: 390326
loss: 1.0079858303070068,grad_norm: 0.7818103907242975, iteration: 390327
loss: 0.9212110638618469,grad_norm: 0.71645793635186, iteration: 390328
loss: 1.0058995485305786,grad_norm: 0.8150551546725073, iteration: 390329
loss: 1.0477416515350342,grad_norm: 0.9304977577127858, iteration: 390330
loss: 0.9819362759590149,grad_norm: 0.9678744370429427, iteration: 390331
loss: 1.0501126050949097,grad_norm: 0.9113084519198882, iteration: 390332
loss: 0.9967764019966125,grad_norm: 0.8290563109797418, iteration: 390333
loss: 1.0142436027526855,grad_norm: 0.7669770561358369, iteration: 390334
loss: 0.9916601777076721,grad_norm: 0.7867261534539249, iteration: 390335
loss: 0.9940424561500549,grad_norm: 0.8735342815679026, iteration: 390336
loss: 0.9956018924713135,grad_norm: 0.9802185304118538, iteration: 390337
loss: 0.9986584782600403,grad_norm: 0.8812458502631554, iteration: 390338
loss: 0.9915914535522461,grad_norm: 0.743815198791959, iteration: 390339
loss: 0.9708073139190674,grad_norm: 0.6768976809152831, iteration: 390340
loss: 0.9464015960693359,grad_norm: 0.6932904209662293, iteration: 390341
loss: 0.9637250900268555,grad_norm: 0.7770039202687923, iteration: 390342
loss: 0.9729198813438416,grad_norm: 0.6721269322914643, iteration: 390343
loss: 0.9904542565345764,grad_norm: 0.8287980100736403, iteration: 390344
loss: 1.0383789539337158,grad_norm: 0.9396704043344931, iteration: 390345
loss: 0.9954671263694763,grad_norm: 0.802939237627768, iteration: 390346
loss: 0.9882218241691589,grad_norm: 0.9999990606677215, iteration: 390347
loss: 1.0319242477416992,grad_norm: 0.8441301037457043, iteration: 390348
loss: 0.9832499027252197,grad_norm: 0.8647147913324806, iteration: 390349
loss: 0.9940822720527649,grad_norm: 0.803221582993883, iteration: 390350
loss: 0.9875496029853821,grad_norm: 0.7610205877231825, iteration: 390351
loss: 0.9953113198280334,grad_norm: 0.7095925078376171, iteration: 390352
loss: 0.9845664501190186,grad_norm: 0.8764287223979904, iteration: 390353
loss: 0.9998818039894104,grad_norm: 0.7343910890869898, iteration: 390354
loss: 0.9961028695106506,grad_norm: 0.6664949342778966, iteration: 390355
loss: 1.0036920309066772,grad_norm: 0.7136488608959676, iteration: 390356
loss: 1.0241707563400269,grad_norm: 0.7069744728167873, iteration: 390357
loss: 0.9932729005813599,grad_norm: 0.7266111116689431, iteration: 390358
loss: 0.9891957640647888,grad_norm: 0.7707729706432287, iteration: 390359
loss: 1.0054659843444824,grad_norm: 0.7667468639225945, iteration: 390360
loss: 0.9917557239532471,grad_norm: 0.8850646042185691, iteration: 390361
loss: 1.019580364227295,grad_norm: 0.8288349262832867, iteration: 390362
loss: 0.9694329500198364,grad_norm: 0.7117131568434226, iteration: 390363
loss: 0.9997597336769104,grad_norm: 0.7154862895429924, iteration: 390364
loss: 1.0595611333847046,grad_norm: 0.9999994171109906, iteration: 390365
loss: 0.9959647059440613,grad_norm: 0.7820312871740964, iteration: 390366
loss: 1.0322496891021729,grad_norm: 0.9137805385518711, iteration: 390367
loss: 0.9958533048629761,grad_norm: 0.9400449114051288, iteration: 390368
loss: 1.031270980834961,grad_norm: 0.8331638377267429, iteration: 390369
loss: 1.0240594148635864,grad_norm: 0.7892126648682897, iteration: 390370
loss: 1.0395593643188477,grad_norm: 0.7719201517434456, iteration: 390371
loss: 0.9956036806106567,grad_norm: 0.7960033723370455, iteration: 390372
loss: 1.0173346996307373,grad_norm: 0.8460249602215888, iteration: 390373
loss: 0.9838780164718628,grad_norm: 0.7714556670781458, iteration: 390374
loss: 1.0040992498397827,grad_norm: 0.8134136859499419, iteration: 390375
loss: 0.9807273149490356,grad_norm: 0.680946803156579, iteration: 390376
loss: 0.9764088988304138,grad_norm: 0.7196427700131433, iteration: 390377
loss: 1.013742446899414,grad_norm: 0.7877503381718309, iteration: 390378
loss: 1.008265495300293,grad_norm: 0.8966477150346208, iteration: 390379
loss: 1.034724235534668,grad_norm: 0.830857429018409, iteration: 390380
loss: 0.9734333157539368,grad_norm: 0.8284288060360822, iteration: 390381
loss: 1.0106815099716187,grad_norm: 0.7416746228815299, iteration: 390382
loss: 1.023771047592163,grad_norm: 0.8791506713104833, iteration: 390383
loss: 1.0120964050292969,grad_norm: 0.9135286525862473, iteration: 390384
loss: 0.9695705771446228,grad_norm: 0.7480819707834956, iteration: 390385
loss: 0.9811826348304749,grad_norm: 0.6321050240340886, iteration: 390386
loss: 0.9767379760742188,grad_norm: 0.6916342174608406, iteration: 390387
loss: 1.0016525983810425,grad_norm: 0.733214243770702, iteration: 390388
loss: 0.9592482447624207,grad_norm: 0.8888750238360933, iteration: 390389
loss: 0.9930537939071655,grad_norm: 0.8049196938776839, iteration: 390390
loss: 0.9691410660743713,grad_norm: 0.7707184426501728, iteration: 390391
loss: 1.0205355882644653,grad_norm: 0.8542591815838397, iteration: 390392
loss: 0.9658684134483337,grad_norm: 0.7463851867869045, iteration: 390393
loss: 0.9966477155685425,grad_norm: 0.8343130124143713, iteration: 390394
loss: 0.9933931231498718,grad_norm: 0.7168956586060797, iteration: 390395
loss: 0.9830538034439087,grad_norm: 0.8042060442392404, iteration: 390396
loss: 0.9989495277404785,grad_norm: 0.7204765030286304, iteration: 390397
loss: 0.9979187846183777,grad_norm: 0.8534012335948703, iteration: 390398
loss: 1.0344862937927246,grad_norm: 0.9380003399483464, iteration: 390399
loss: 0.9793850183486938,grad_norm: 0.7833638173299684, iteration: 390400
loss: 0.9892698526382446,grad_norm: 0.7000394111624236, iteration: 390401
loss: 1.0145090818405151,grad_norm: 0.7159729577925503, iteration: 390402
loss: 1.0058274269104004,grad_norm: 0.6990139382743715, iteration: 390403
loss: 0.9782707691192627,grad_norm: 0.7664456285178777, iteration: 390404
loss: 0.9768674969673157,grad_norm: 0.8122208681916333, iteration: 390405
loss: 0.9818739295005798,grad_norm: 0.7134467193284731, iteration: 390406
loss: 1.0307003259658813,grad_norm: 0.8943509378365205, iteration: 390407
loss: 0.9872567057609558,grad_norm: 0.7175229318745713, iteration: 390408
loss: 0.9933393001556396,grad_norm: 0.8165776732031689, iteration: 390409
loss: 0.9943537712097168,grad_norm: 0.9999992597819073, iteration: 390410
loss: 0.9880462884902954,grad_norm: 0.8206160224559772, iteration: 390411
loss: 1.0095018148422241,grad_norm: 0.697553631261508, iteration: 390412
loss: 1.0009249448776245,grad_norm: 0.7423644563983421, iteration: 390413
loss: 1.003664493560791,grad_norm: 0.7639716783747434, iteration: 390414
loss: 1.046805500984192,grad_norm: 0.9950091338577989, iteration: 390415
loss: 1.0179475545883179,grad_norm: 0.9517674081244761, iteration: 390416
loss: 0.9648656845092773,grad_norm: 0.8848026149072116, iteration: 390417
loss: 1.0183796882629395,grad_norm: 0.7092989761667151, iteration: 390418
loss: 0.9731130003929138,grad_norm: 0.9642696756142588, iteration: 390419
loss: 0.9792752265930176,grad_norm: 0.701319124303425, iteration: 390420
loss: 1.0227936506271362,grad_norm: 0.7799671835502908, iteration: 390421
loss: 0.9977484345436096,grad_norm: 0.9972101491337937, iteration: 390422
loss: 1.010425090789795,grad_norm: 0.7551774054365122, iteration: 390423
loss: 0.9855510592460632,grad_norm: 0.7579046120786461, iteration: 390424
loss: 1.009726881980896,grad_norm: 0.7613959620299836, iteration: 390425
loss: 0.9691696166992188,grad_norm: 0.94825344738495, iteration: 390426
loss: 0.9925180673599243,grad_norm: 0.7660168816112406, iteration: 390427
loss: 0.9907362461090088,grad_norm: 0.7478650252175361, iteration: 390428
loss: 0.9967575669288635,grad_norm: 0.6389362248113625, iteration: 390429
loss: 0.9885509014129639,grad_norm: 0.7372612206970239, iteration: 390430
loss: 1.067549467086792,grad_norm: 0.9999992540330086, iteration: 390431
loss: 0.9872408509254456,grad_norm: 0.893789933862321, iteration: 390432
loss: 1.0272961854934692,grad_norm: 0.8340954215795806, iteration: 390433
loss: 1.0095248222351074,grad_norm: 0.7967521636924784, iteration: 390434
loss: 1.0020360946655273,grad_norm: 0.8811329817765128, iteration: 390435
loss: 1.0608241558074951,grad_norm: 0.9999996992036135, iteration: 390436
loss: 0.9611698389053345,grad_norm: 0.9864862535581462, iteration: 390437
loss: 1.0150645971298218,grad_norm: 0.9999989639436715, iteration: 390438
loss: 0.9745267629623413,grad_norm: 0.7175439866613319, iteration: 390439
loss: 0.954172670841217,grad_norm: 0.82576649678697, iteration: 390440
loss: 1.020553708076477,grad_norm: 0.88390234518414, iteration: 390441
loss: 1.0057830810546875,grad_norm: 0.9999990512495875, iteration: 390442
loss: 1.0338292121887207,grad_norm: 0.9999996080128684, iteration: 390443
loss: 1.0896505117416382,grad_norm: 0.9838575190643373, iteration: 390444
loss: 0.9745098948478699,grad_norm: 0.8663144598335593, iteration: 390445
loss: 0.9854332804679871,grad_norm: 0.7593817456180435, iteration: 390446
loss: 1.0265110731124878,grad_norm: 0.6872278326278404, iteration: 390447
loss: 0.983027994632721,grad_norm: 0.8110048268010668, iteration: 390448
loss: 1.0518877506256104,grad_norm: 0.9999995754092501, iteration: 390449
loss: 0.9609941244125366,grad_norm: 0.7630940820575615, iteration: 390450
loss: 1.0380263328552246,grad_norm: 0.8931078642274416, iteration: 390451
loss: 0.9863166213035583,grad_norm: 0.745249203980837, iteration: 390452
loss: 0.9984736442565918,grad_norm: 0.6750121922416151, iteration: 390453
loss: 1.0532910823822021,grad_norm: 0.9536175411118197, iteration: 390454
loss: 0.978354275226593,grad_norm: 0.9999991907458315, iteration: 390455
loss: 0.9722025990486145,grad_norm: 0.8496520656969767, iteration: 390456
loss: 1.0292876958847046,grad_norm: 0.8608696844447064, iteration: 390457
loss: 1.0084095001220703,grad_norm: 0.7860905930592916, iteration: 390458
loss: 0.9583678841590881,grad_norm: 0.9999991463095003, iteration: 390459
loss: 1.070909857749939,grad_norm: 0.79459095646356, iteration: 390460
loss: 1.0204195976257324,grad_norm: 0.6094739385604891, iteration: 390461
loss: 0.9759141802787781,grad_norm: 0.6807004416024345, iteration: 390462
loss: 0.995993435382843,grad_norm: 0.8185281880087993, iteration: 390463
loss: 1.1980687379837036,grad_norm: 0.9999993690202285, iteration: 390464
loss: 1.0792245864868164,grad_norm: 0.776368069310295, iteration: 390465
loss: 0.9717702269554138,grad_norm: 0.8236459548245585, iteration: 390466
loss: 0.9594370126724243,grad_norm: 0.9999990330833793, iteration: 390467
loss: 0.9624143242835999,grad_norm: 0.7933514939797874, iteration: 390468
loss: 0.9643268585205078,grad_norm: 0.9436564620195518, iteration: 390469
loss: 0.9856300950050354,grad_norm: 0.7528795932793625, iteration: 390470
loss: 1.033292293548584,grad_norm: 0.9999992006365778, iteration: 390471
loss: 1.0185472965240479,grad_norm: 0.8073078250481942, iteration: 390472
loss: 0.9983547925949097,grad_norm: 0.8335506634163279, iteration: 390473
loss: 1.0852100849151611,grad_norm: 0.9999998144422297, iteration: 390474
loss: 1.0027965307235718,grad_norm: 0.8261855920201973, iteration: 390475
loss: 0.9746890068054199,grad_norm: 0.8351403831391896, iteration: 390476
loss: 1.031046748161316,grad_norm: 0.84139503285634, iteration: 390477
loss: 0.9935262203216553,grad_norm: 0.8958662199909501, iteration: 390478
loss: 1.0038752555847168,grad_norm: 0.8785430130579674, iteration: 390479
loss: 0.9883512258529663,grad_norm: 0.7438176341063653, iteration: 390480
loss: 0.9662562608718872,grad_norm: 0.7650706041877877, iteration: 390481
loss: 0.9868857860565186,grad_norm: 0.9999998901551302, iteration: 390482
loss: 1.0932648181915283,grad_norm: 0.8068799122674584, iteration: 390483
loss: 0.9796071648597717,grad_norm: 0.7502722932917958, iteration: 390484
loss: 1.0122101306915283,grad_norm: 0.9376222744836846, iteration: 390485
loss: 0.9693588018417358,grad_norm: 0.6846589266376776, iteration: 390486
loss: 1.0241271257400513,grad_norm: 0.7900392987134484, iteration: 390487
loss: 0.9907233715057373,grad_norm: 0.9999992105344448, iteration: 390488
loss: 0.9958439469337463,grad_norm: 0.9996492553554038, iteration: 390489
loss: 1.0313018560409546,grad_norm: 0.66707545283851, iteration: 390490
loss: 0.9816102385520935,grad_norm: 0.7492175667251182, iteration: 390491
loss: 0.9760461449623108,grad_norm: 0.6427766671394526, iteration: 390492
loss: 0.9997435808181763,grad_norm: 0.8313823023093696, iteration: 390493
loss: 1.0125505924224854,grad_norm: 0.8848708731515028, iteration: 390494
loss: 0.9926854968070984,grad_norm: 0.7442469962308518, iteration: 390495
loss: 0.9762239456176758,grad_norm: 0.7782222630393981, iteration: 390496
loss: 0.9800862669944763,grad_norm: 0.8261152081907903, iteration: 390497
loss: 1.0550140142440796,grad_norm: 0.8414782410388927, iteration: 390498
loss: 1.0085313320159912,grad_norm: 0.8395614966021102, iteration: 390499
loss: 1.0063822269439697,grad_norm: 0.6784914339769516, iteration: 390500
loss: 0.9916582703590393,grad_norm: 0.7624527852349136, iteration: 390501
loss: 0.9944087266921997,grad_norm: 0.7943125534998148, iteration: 390502
loss: 0.9997689127922058,grad_norm: 0.7786412973868351, iteration: 390503
loss: 1.0132527351379395,grad_norm: 0.7564077064726605, iteration: 390504
loss: 0.99366295337677,grad_norm: 0.7365715255130678, iteration: 390505
loss: 0.9671007990837097,grad_norm: 0.705385353373963, iteration: 390506
loss: 0.9858661890029907,grad_norm: 0.8609753241767876, iteration: 390507
loss: 1.0288578271865845,grad_norm: 0.7556098754020787, iteration: 390508
loss: 0.9939664602279663,grad_norm: 0.8528388116465795, iteration: 390509
loss: 1.0377051830291748,grad_norm: 0.8922232145475238, iteration: 390510
loss: 1.0077786445617676,grad_norm: 0.8230057152381787, iteration: 390511
loss: 1.0007004737854004,grad_norm: 0.9999992838354933, iteration: 390512
loss: 0.9997377395629883,grad_norm: 0.8627945835802779, iteration: 390513
loss: 1.0445700883865356,grad_norm: 0.7419475103522585, iteration: 390514
loss: 1.0216628313064575,grad_norm: 0.9969667543666354, iteration: 390515
loss: 1.0332413911819458,grad_norm: 0.9999994833572493, iteration: 390516
loss: 0.9679543972015381,grad_norm: 0.6771475963613222, iteration: 390517
loss: 1.1152386665344238,grad_norm: 0.9999995322957845, iteration: 390518
loss: 0.9950732588768005,grad_norm: 0.7168876018192858, iteration: 390519
loss: 0.9600821733474731,grad_norm: 0.6993567006748656, iteration: 390520
loss: 1.0141429901123047,grad_norm: 0.8458643155827174, iteration: 390521
loss: 1.0874348878860474,grad_norm: 0.9474306510793616, iteration: 390522
loss: 0.9966102838516235,grad_norm: 0.8283177108396852, iteration: 390523
loss: 1.0326493978500366,grad_norm: 0.7179656241632033, iteration: 390524
loss: 0.9851946830749512,grad_norm: 0.6294833204674284, iteration: 390525
loss: 0.9979361891746521,grad_norm: 0.7441923373652031, iteration: 390526
loss: 0.9802635312080383,grad_norm: 0.7239586200430416, iteration: 390527
loss: 1.000138521194458,grad_norm: 0.7484159923073069, iteration: 390528
loss: 1.0295406579971313,grad_norm: 0.8478155007950111, iteration: 390529
loss: 1.0142908096313477,grad_norm: 0.9609285764374426, iteration: 390530
loss: 1.0137721300125122,grad_norm: 0.8774969004570164, iteration: 390531
loss: 0.993962287902832,grad_norm: 0.7563597048160273, iteration: 390532
loss: 0.9940575957298279,grad_norm: 0.9907152353775439, iteration: 390533
loss: 0.9819758534431458,grad_norm: 0.8901904120697173, iteration: 390534
loss: 0.9990428686141968,grad_norm: 0.8784501767629448, iteration: 390535
loss: 0.949030339717865,grad_norm: 0.7583679574529094, iteration: 390536
loss: 1.0192968845367432,grad_norm: 0.6784784962043515, iteration: 390537
loss: 1.0012328624725342,grad_norm: 0.8305556274226822, iteration: 390538
loss: 1.000120759010315,grad_norm: 0.8995832019091093, iteration: 390539
loss: 0.9990111589431763,grad_norm: 0.7130788930812171, iteration: 390540
loss: 1.0095360279083252,grad_norm: 0.7279902707388441, iteration: 390541
loss: 1.0212587118148804,grad_norm: 0.7301285351328113, iteration: 390542
loss: 0.9819220900535583,grad_norm: 0.750460951130101, iteration: 390543
loss: 0.9977066516876221,grad_norm: 0.9216687569856666, iteration: 390544
loss: 1.008776068687439,grad_norm: 0.6893910332399785, iteration: 390545
loss: 1.0169765949249268,grad_norm: 0.8179500980394825, iteration: 390546
loss: 1.0287775993347168,grad_norm: 0.7544272778042184, iteration: 390547
loss: 1.010110855102539,grad_norm: 0.7975311232425981, iteration: 390548
loss: 0.9757144451141357,grad_norm: 0.7996644743853574, iteration: 390549
loss: 1.011104941368103,grad_norm: 0.9527559545718736, iteration: 390550
loss: 1.0019906759262085,grad_norm: 0.7971644693858746, iteration: 390551
loss: 0.9911426901817322,grad_norm: 0.8444213229206062, iteration: 390552
loss: 1.0044755935668945,grad_norm: 0.7354382209553384, iteration: 390553
loss: 1.1618907451629639,grad_norm: 0.9609585958982712, iteration: 390554
loss: 1.0237163305282593,grad_norm: 0.7175040156918538, iteration: 390555
loss: 0.9706611037254333,grad_norm: 0.7004158712984668, iteration: 390556
loss: 1.025307297706604,grad_norm: 0.9999992701795805, iteration: 390557
loss: 1.0567296743392944,grad_norm: 0.9999997906986619, iteration: 390558
loss: 1.0480189323425293,grad_norm: 0.9999995728286573, iteration: 390559
loss: 0.9748183488845825,grad_norm: 0.8030451093569496, iteration: 390560
loss: 0.9998776316642761,grad_norm: 0.7197269828964897, iteration: 390561
loss: 1.0015143156051636,grad_norm: 0.7997600883612028, iteration: 390562
loss: 1.0138808488845825,grad_norm: 0.7760056787475539, iteration: 390563
loss: 0.9847908616065979,grad_norm: 0.6771295948485778, iteration: 390564
loss: 1.0609208345413208,grad_norm: 0.9999996918571261, iteration: 390565
loss: 0.9684137105941772,grad_norm: 0.7992930952849219, iteration: 390566
loss: 0.9976105093955994,grad_norm: 0.7143645835595643, iteration: 390567
loss: 0.9822385907173157,grad_norm: 0.9474076724616666, iteration: 390568
loss: 0.9882572889328003,grad_norm: 0.9999990845421904, iteration: 390569
loss: 0.9592450261116028,grad_norm: 0.822859741478277, iteration: 390570
loss: 1.006840705871582,grad_norm: 0.6225648539422224, iteration: 390571
loss: 0.9969475865364075,grad_norm: 0.8856589545685637, iteration: 390572
loss: 0.9945803284645081,grad_norm: 0.9655995861177922, iteration: 390573
loss: 1.0327378511428833,grad_norm: 0.8017434240497532, iteration: 390574
loss: 0.9934149980545044,grad_norm: 0.9212607740079659, iteration: 390575
loss: 1.0929759740829468,grad_norm: 0.9999999041025317, iteration: 390576
loss: 0.9916773438453674,grad_norm: 0.6553725778270023, iteration: 390577
loss: 1.002190113067627,grad_norm: 0.7323983225298105, iteration: 390578
loss: 0.9727880954742432,grad_norm: 0.7380006434930239, iteration: 390579
loss: 1.0612034797668457,grad_norm: 0.9999998351098981, iteration: 390580
loss: 1.0090250968933105,grad_norm: 0.8069981796166006, iteration: 390581
loss: 0.9971321225166321,grad_norm: 0.9599148178042726, iteration: 390582
loss: 1.0004342794418335,grad_norm: 0.785271050069537, iteration: 390583
loss: 1.1036664247512817,grad_norm: 0.7266017140000535, iteration: 390584
loss: 1.0251288414001465,grad_norm: 0.9999993216921982, iteration: 390585
loss: 0.9669042229652405,grad_norm: 0.7305888220973809, iteration: 390586
loss: 1.0039883852005005,grad_norm: 0.7144581794616663, iteration: 390587
loss: 0.9969791173934937,grad_norm: 0.7756607091514396, iteration: 390588
loss: 0.9841390252113342,grad_norm: 0.7828052964499568, iteration: 390589
loss: 0.9782435894012451,grad_norm: 0.709371479332475, iteration: 390590
loss: 0.9740080237388611,grad_norm: 0.7615577763108901, iteration: 390591
loss: 0.9930859804153442,grad_norm: 0.9999991962455314, iteration: 390592
loss: 0.9824506044387817,grad_norm: 0.9063482637841423, iteration: 390593
loss: 1.1769100427627563,grad_norm: 0.9999998233257419, iteration: 390594
loss: 0.9861926436424255,grad_norm: 0.8363911429104649, iteration: 390595
loss: 1.0314193964004517,grad_norm: 0.7313072207215713, iteration: 390596
loss: 1.0048059225082397,grad_norm: 0.7505199211572599, iteration: 390597
loss: 0.9619603157043457,grad_norm: 0.7233428793157682, iteration: 390598
loss: 1.0040150880813599,grad_norm: 0.9078089220561949, iteration: 390599
loss: 0.9943945407867432,grad_norm: 0.820522910750868, iteration: 390600
loss: 0.9934594631195068,grad_norm: 0.9999999241300968, iteration: 390601
loss: 1.0099220275878906,grad_norm: 0.999999745971087, iteration: 390602
loss: 0.9629677534103394,grad_norm: 0.8039658903648927, iteration: 390603
loss: 1.0112210512161255,grad_norm: 0.8223611944258437, iteration: 390604
loss: 1.0430794954299927,grad_norm: 0.8373222734541107, iteration: 390605
loss: 1.018912672996521,grad_norm: 0.7158684456210553, iteration: 390606
loss: 0.9794771075248718,grad_norm: 0.7520574214556157, iteration: 390607
loss: 1.00555419921875,grad_norm: 0.7529529121994736, iteration: 390608
loss: 0.9905906319618225,grad_norm: 0.7127761859536972, iteration: 390609
loss: 1.0249276161193848,grad_norm: 0.9337225191768906, iteration: 390610
loss: 0.968582808971405,grad_norm: 0.8619763702781768, iteration: 390611
loss: 1.0318669080734253,grad_norm: 0.8770060865447895, iteration: 390612
loss: 0.9795799851417542,grad_norm: 0.7102243792694427, iteration: 390613
loss: 0.9962191581726074,grad_norm: 0.9352060177559366, iteration: 390614
loss: 0.9919382333755493,grad_norm: 0.8311888547796673, iteration: 390615
loss: 1.043331503868103,grad_norm: 0.999999030050314, iteration: 390616
loss: 0.9873835444450378,grad_norm: 0.8458329294946723, iteration: 390617
loss: 1.028825044631958,grad_norm: 0.9999992704703343, iteration: 390618
loss: 1.0061700344085693,grad_norm: 0.7639686348327234, iteration: 390619
loss: 0.96926349401474,grad_norm: 0.7840117936521949, iteration: 390620
loss: 0.9934676885604858,grad_norm: 0.7880543281765111, iteration: 390621
loss: 0.9435266852378845,grad_norm: 0.7948900771055077, iteration: 390622
loss: 1.0063223838806152,grad_norm: 0.9113494948660845, iteration: 390623
loss: 0.9661898016929626,grad_norm: 0.767537163365338, iteration: 390624
loss: 0.9863524436950684,grad_norm: 0.8694393473523655, iteration: 390625
loss: 1.0249096155166626,grad_norm: 0.7915696883421294, iteration: 390626
loss: 0.9894310235977173,grad_norm: 0.9999993097097641, iteration: 390627
loss: 1.0240494012832642,grad_norm: 0.8820996207835369, iteration: 390628
loss: 0.9725509881973267,grad_norm: 0.7887050036903379, iteration: 390629
loss: 1.016963005065918,grad_norm: 0.7601741965284357, iteration: 390630
loss: 0.9833567142486572,grad_norm: 0.6097815152968571, iteration: 390631
loss: 0.9926843047142029,grad_norm: 0.732229079428722, iteration: 390632
loss: 0.9787312150001526,grad_norm: 0.8307691484911941, iteration: 390633
loss: 0.9830442070960999,grad_norm: 0.6478265843585241, iteration: 390634
loss: 1.0290125608444214,grad_norm: 0.7252859976050229, iteration: 390635
loss: 1.048635482788086,grad_norm: 0.9999995210795803, iteration: 390636
loss: 1.0081537961959839,grad_norm: 0.6883261354456577, iteration: 390637
loss: 0.9833085536956787,grad_norm: 0.837132861222516, iteration: 390638
loss: 1.0364218950271606,grad_norm: 0.8315547388787735, iteration: 390639
loss: 1.0024827718734741,grad_norm: 0.780275097512102, iteration: 390640
loss: 1.028694987297058,grad_norm: 0.7974676299114781, iteration: 390641
loss: 0.9972541332244873,grad_norm: 0.7979110379270349, iteration: 390642
loss: 0.9660709500312805,grad_norm: 0.7490661975494174, iteration: 390643
loss: 0.9963856935501099,grad_norm: 0.7534209538555521, iteration: 390644
loss: 0.9822781682014465,grad_norm: 0.8434698351911546, iteration: 390645
loss: 1.0268590450286865,grad_norm: 0.8037121797777854, iteration: 390646
loss: 1.00438392162323,grad_norm: 0.9862871611901098, iteration: 390647
loss: 0.9847031831741333,grad_norm: 0.7672410878068623, iteration: 390648
loss: 0.9616210460662842,grad_norm: 0.8057853786392549, iteration: 390649
loss: 0.9501011967658997,grad_norm: 0.8339212788299344, iteration: 390650
loss: 1.0317773818969727,grad_norm: 0.7287059915430698, iteration: 390651
loss: 0.9975701570510864,grad_norm: 0.9999998610461822, iteration: 390652
loss: 0.9617046117782593,grad_norm: 0.7617987800966345, iteration: 390653
loss: 0.9706366062164307,grad_norm: 0.887077963183216, iteration: 390654
loss: 1.0003387928009033,grad_norm: 0.9519715854603594, iteration: 390655
loss: 0.9792259335517883,grad_norm: 0.7068371956502639, iteration: 390656
loss: 1.036845088005066,grad_norm: 0.8055261255451024, iteration: 390657
loss: 0.9998387098312378,grad_norm: 0.9091780972831058, iteration: 390658
loss: 0.981904923915863,grad_norm: 0.7891403550457677, iteration: 390659
loss: 1.0607235431671143,grad_norm: 0.8901611234181386, iteration: 390660
loss: 1.0093077421188354,grad_norm: 0.999999373825471, iteration: 390661
loss: 1.00044846534729,grad_norm: 0.9999992000276555, iteration: 390662
loss: 0.9902928471565247,grad_norm: 0.6055637081940287, iteration: 390663
loss: 0.9849262237548828,grad_norm: 0.7768231143914732, iteration: 390664
loss: 1.0257668495178223,grad_norm: 0.7612345711544931, iteration: 390665
loss: 1.0375888347625732,grad_norm: 0.9999991657961412, iteration: 390666
loss: 1.0152864456176758,grad_norm: 0.9999995763217608, iteration: 390667
loss: 1.0294725894927979,grad_norm: 0.9999990338324506, iteration: 390668
loss: 0.9607340097427368,grad_norm: 0.941764471123732, iteration: 390669
loss: 0.9941306710243225,grad_norm: 0.82527490226837, iteration: 390670
loss: 1.0127341747283936,grad_norm: 0.9673816295855485, iteration: 390671
loss: 0.9609974026679993,grad_norm: 0.7076399833840055, iteration: 390672
loss: 0.9851135015487671,grad_norm: 0.7915701410239853, iteration: 390673
loss: 0.9867196679115295,grad_norm: 0.7136725425910878, iteration: 390674
loss: 1.0016661882400513,grad_norm: 0.9999991501669214, iteration: 390675
loss: 1.0315049886703491,grad_norm: 0.7514377523623628, iteration: 390676
loss: 0.9315995573997498,grad_norm: 0.7699659238366718, iteration: 390677
loss: 1.050990343093872,grad_norm: 0.9494884347700291, iteration: 390678
loss: 1.0366334915161133,grad_norm: 0.7076587882874411, iteration: 390679
loss: 0.9610951542854309,grad_norm: 0.8732834718995595, iteration: 390680
loss: 0.9730056524276733,grad_norm: 0.7475872528398863, iteration: 390681
loss: 1.0215034484863281,grad_norm: 0.741826123665961, iteration: 390682
loss: 0.9503463506698608,grad_norm: 0.8296876953195342, iteration: 390683
loss: 0.9960533380508423,grad_norm: 0.7630016067041653, iteration: 390684
loss: 0.9890337586402893,grad_norm: 0.9508131002298151, iteration: 390685
loss: 1.0164185762405396,grad_norm: 0.9737520882256328, iteration: 390686
loss: 0.9998854398727417,grad_norm: 0.784845292500289, iteration: 390687
loss: 0.9856414794921875,grad_norm: 0.796763076141537, iteration: 390688
loss: 1.0195709466934204,grad_norm: 0.6891919527504099, iteration: 390689
loss: 1.0136370658874512,grad_norm: 0.758360738224312, iteration: 390690
loss: 1.0279884338378906,grad_norm: 0.6665625409786478, iteration: 390691
loss: 1.0040853023529053,grad_norm: 0.9876354683166371, iteration: 390692
loss: 0.940771222114563,grad_norm: 0.8018163338632158, iteration: 390693
loss: 1.0023612976074219,grad_norm: 0.7508345023893307, iteration: 390694
loss: 0.9620718955993652,grad_norm: 0.6257127360629682, iteration: 390695
loss: 1.0392746925354004,grad_norm: 0.9999998674857834, iteration: 390696
loss: 1.0240740776062012,grad_norm: 0.9999992089372516, iteration: 390697
loss: 1.0227761268615723,grad_norm: 0.7291154535660331, iteration: 390698
loss: 0.9947349429130554,grad_norm: 0.6706618609883221, iteration: 390699
loss: 0.9758338332176208,grad_norm: 0.7084919498554244, iteration: 390700
loss: 0.9814149737358093,grad_norm: 0.6612377204840215, iteration: 390701
loss: 1.0100951194763184,grad_norm: 0.9999997873391252, iteration: 390702
loss: 0.9735236167907715,grad_norm: 0.8071325868042363, iteration: 390703
loss: 0.9744105339050293,grad_norm: 0.7160104597176954, iteration: 390704
loss: 1.0541614294052124,grad_norm: 0.9999991181319883, iteration: 390705
loss: 0.9597089886665344,grad_norm: 0.8132402115693548, iteration: 390706
loss: 0.9927928447723389,grad_norm: 0.895748792780681, iteration: 390707
loss: 0.9582713842391968,grad_norm: 0.8176634518866374, iteration: 390708
loss: 1.004571795463562,grad_norm: 0.7758496920021937, iteration: 390709
loss: 1.0160568952560425,grad_norm: 0.7681034330331741, iteration: 390710
loss: 0.9928917288780212,grad_norm: 0.8671647164959556, iteration: 390711
loss: 0.9978184700012207,grad_norm: 0.7418940118523394, iteration: 390712
loss: 1.0095317363739014,grad_norm: 0.9598592427551567, iteration: 390713
loss: 1.003677487373352,grad_norm: 0.7445216572725067, iteration: 390714
loss: 0.9963843822479248,grad_norm: 0.8089865540866477, iteration: 390715
loss: 1.0232161283493042,grad_norm: 0.9999991241985031, iteration: 390716
loss: 1.001755952835083,grad_norm: 0.7431580740695118, iteration: 390717
loss: 1.0494309663772583,grad_norm: 0.999999258632108, iteration: 390718
loss: 1.02241849899292,grad_norm: 0.8327906983240215, iteration: 390719
loss: 1.020632266998291,grad_norm: 0.999999294614431, iteration: 390720
loss: 1.0143131017684937,grad_norm: 0.7876913981026579, iteration: 390721
loss: 1.0211923122406006,grad_norm: 0.9999999242332153, iteration: 390722
loss: 1.0252565145492554,grad_norm: 0.9324985625381869, iteration: 390723
loss: 0.9456902146339417,grad_norm: 0.723514723859794, iteration: 390724
loss: 1.0050283670425415,grad_norm: 0.772802101438088, iteration: 390725
loss: 1.0513889789581299,grad_norm: 0.897516670677692, iteration: 390726
loss: 1.0197157859802246,grad_norm: 0.8562958479406776, iteration: 390727
loss: 1.0345810651779175,grad_norm: 0.8533230727208576, iteration: 390728
loss: 1.0260554552078247,grad_norm: 0.8710811936144792, iteration: 390729
loss: 1.0066157579421997,grad_norm: 0.8714705067849543, iteration: 390730
loss: 0.9355389475822449,grad_norm: 0.9248630216855663, iteration: 390731
loss: 1.0043879747390747,grad_norm: 0.8065189910531535, iteration: 390732
loss: 0.9935867190361023,grad_norm: 0.7643728398694909, iteration: 390733
loss: 0.9732735753059387,grad_norm: 0.9541587681162075, iteration: 390734
loss: 1.0058588981628418,grad_norm: 0.7158852601237644, iteration: 390735
loss: 1.0019985437393188,grad_norm: 0.8161793823406667, iteration: 390736
loss: 1.0010719299316406,grad_norm: 0.7592226699267512, iteration: 390737
loss: 1.0275304317474365,grad_norm: 0.9495731818500784, iteration: 390738
loss: 1.0274724960327148,grad_norm: 0.6844483585018684, iteration: 390739
loss: 0.9988349676132202,grad_norm: 0.9999994292881224, iteration: 390740
loss: 0.9879248142242432,grad_norm: 0.7852060968959497, iteration: 390741
loss: 1.0492349863052368,grad_norm: 0.999999548195703, iteration: 390742
loss: 1.0195205211639404,grad_norm: 0.9999993721722144, iteration: 390743
loss: 1.0549263954162598,grad_norm: 0.8040255972057377, iteration: 390744
loss: 1.2822911739349365,grad_norm: 0.9999998961425388, iteration: 390745
loss: 1.0062732696533203,grad_norm: 0.7388188807373114, iteration: 390746
loss: 0.9396045804023743,grad_norm: 0.9084552721623452, iteration: 390747
loss: 1.0418221950531006,grad_norm: 0.9999989989800304, iteration: 390748
loss: 1.0403779745101929,grad_norm: 0.8095328244604946, iteration: 390749
loss: 1.029573678970337,grad_norm: 0.6704931430265423, iteration: 390750
loss: 1.023153305053711,grad_norm: 0.9999995010475103, iteration: 390751
loss: 1.0000301599502563,grad_norm: 0.793588597558382, iteration: 390752
loss: 0.9888225793838501,grad_norm: 0.857082593577827, iteration: 390753
loss: 1.0417053699493408,grad_norm: 0.9999992591951424, iteration: 390754
loss: 0.9815856218338013,grad_norm: 0.999999343421887, iteration: 390755
loss: 0.9835529923439026,grad_norm: 0.7811516095205774, iteration: 390756
loss: 0.9658927321434021,grad_norm: 0.930353157940165, iteration: 390757
loss: 0.9630774855613708,grad_norm: 0.6675548420054601, iteration: 390758
loss: 1.027590036392212,grad_norm: 0.9117823765067803, iteration: 390759
loss: 1.0343363285064697,grad_norm: 0.7178288419203853, iteration: 390760
loss: 1.0051648616790771,grad_norm: 0.8785347105510714, iteration: 390761
loss: 0.9635436534881592,grad_norm: 0.9999996171916385, iteration: 390762
loss: 0.9657328128814697,grad_norm: 0.760063695138056, iteration: 390763
loss: 0.9984716773033142,grad_norm: 0.7982476147844534, iteration: 390764
loss: 0.9814613461494446,grad_norm: 0.8699601736716869, iteration: 390765
loss: 0.9703795909881592,grad_norm: 0.8045332475115539, iteration: 390766
loss: 1.018746018409729,grad_norm: 0.9999991322525443, iteration: 390767
loss: 0.9883193969726562,grad_norm: 0.9008724284698069, iteration: 390768
loss: 1.0633994340896606,grad_norm: 0.9999991772543474, iteration: 390769
loss: 1.0112911462783813,grad_norm: 0.676254325419943, iteration: 390770
loss: 0.9690277576446533,grad_norm: 0.8104140621917926, iteration: 390771
loss: 0.9791774153709412,grad_norm: 0.7440610554233326, iteration: 390772
loss: 1.03110671043396,grad_norm: 0.9999994975324438, iteration: 390773
loss: 0.9915321469306946,grad_norm: 0.7357692365403923, iteration: 390774
loss: 0.9863781929016113,grad_norm: 0.8365269368519735, iteration: 390775
loss: 1.029627799987793,grad_norm: 0.9999990200277679, iteration: 390776
loss: 1.0206258296966553,grad_norm: 0.8816551457352659, iteration: 390777
loss: 0.98932945728302,grad_norm: 0.8437115647378737, iteration: 390778
loss: 1.0039396286010742,grad_norm: 0.7679749370488862, iteration: 390779
loss: 1.0152866840362549,grad_norm: 0.7020722462582258, iteration: 390780
loss: 1.0063663721084595,grad_norm: 0.6734225195287443, iteration: 390781
loss: 0.9593000411987305,grad_norm: 0.7601517819768222, iteration: 390782
loss: 1.0771563053131104,grad_norm: 0.9881140726094796, iteration: 390783
loss: 1.0473897457122803,grad_norm: 0.7478130681122674, iteration: 390784
loss: 1.0310417413711548,grad_norm: 0.8487060519253605, iteration: 390785
loss: 0.9855640530586243,grad_norm: 0.8871763589801325, iteration: 390786
loss: 0.9938597679138184,grad_norm: 0.7651679453306713, iteration: 390787
loss: 1.0178000926971436,grad_norm: 0.7301285886560026, iteration: 390788
loss: 0.9754412174224854,grad_norm: 0.9999990831089225, iteration: 390789
loss: 1.0318067073822021,grad_norm: 0.7455217965587276, iteration: 390790
loss: 1.0505669116973877,grad_norm: 0.9999995164178521, iteration: 390791
loss: 1.0035656690597534,grad_norm: 0.7558654544173709, iteration: 390792
loss: 0.9639014005661011,grad_norm: 0.7504621440966, iteration: 390793
loss: 0.9607996344566345,grad_norm: 0.7485341343614379, iteration: 390794
loss: 1.0128823518753052,grad_norm: 0.7385427987767984, iteration: 390795
loss: 0.959602415561676,grad_norm: 0.7374485730250125, iteration: 390796
loss: 1.035641074180603,grad_norm: 0.9999994547193872, iteration: 390797
loss: 1.0162172317504883,grad_norm: 0.8746460859152242, iteration: 390798
loss: 1.0282973051071167,grad_norm: 0.9999994966505616, iteration: 390799
loss: 1.0194478034973145,grad_norm: 0.8730223511616902, iteration: 390800
loss: 1.0211793184280396,grad_norm: 0.7238223467378883, iteration: 390801
loss: 0.999411404132843,grad_norm: 0.8195445874647057, iteration: 390802
loss: 0.9819635152816772,grad_norm: 0.7549512149246862, iteration: 390803
loss: 1.0279834270477295,grad_norm: 0.8723477577118008, iteration: 390804
loss: 1.0101202726364136,grad_norm: 0.8868078999951526, iteration: 390805
loss: 1.008042812347412,grad_norm: 0.7470754035126682, iteration: 390806
loss: 1.009752631187439,grad_norm: 0.6637358117899398, iteration: 390807
loss: 0.9867833256721497,grad_norm: 0.6750658548191791, iteration: 390808
loss: 1.02916419506073,grad_norm: 0.8599554522857954, iteration: 390809
loss: 0.9797260165214539,grad_norm: 0.7373427525772756, iteration: 390810
loss: 0.9596572518348694,grad_norm: 0.8417822202980895, iteration: 390811
loss: 1.0032330751419067,grad_norm: 0.6398672789503719, iteration: 390812
loss: 0.9454526901245117,grad_norm: 0.7706966138329433, iteration: 390813
loss: 0.9607135653495789,grad_norm: 0.7700146483496042, iteration: 390814
loss: 1.018523097038269,grad_norm: 0.8221904282741929, iteration: 390815
loss: 0.9716445803642273,grad_norm: 0.7280896929584896, iteration: 390816
loss: 1.034035563468933,grad_norm: 0.7861882247848718, iteration: 390817
loss: 0.9741730690002441,grad_norm: 0.7388563404998394, iteration: 390818
loss: 0.9913576245307922,grad_norm: 0.7458424510044697, iteration: 390819
loss: 0.9955751299858093,grad_norm: 0.7531163257033732, iteration: 390820
loss: 1.0032457113265991,grad_norm: 0.8562682146538146, iteration: 390821
loss: 1.0384875535964966,grad_norm: 0.8450909224776878, iteration: 390822
loss: 1.0003347396850586,grad_norm: 0.7655991334132252, iteration: 390823
loss: 0.9834612607955933,grad_norm: 0.7876754937313964, iteration: 390824
loss: 1.033523678779602,grad_norm: 0.8847257981000425, iteration: 390825
loss: 1.0401207208633423,grad_norm: 0.7910598695747966, iteration: 390826
loss: 0.9852690696716309,grad_norm: 0.7854033265607008, iteration: 390827
loss: 1.0374791622161865,grad_norm: 0.8700109859805798, iteration: 390828
loss: 0.9961846470832825,grad_norm: 0.8272741654449218, iteration: 390829
loss: 0.9925503730773926,grad_norm: 0.9053601257581826, iteration: 390830
loss: 0.9767102003097534,grad_norm: 0.8257995431277102, iteration: 390831
loss: 0.9994046688079834,grad_norm: 0.7493034587999152, iteration: 390832
loss: 1.012474536895752,grad_norm: 0.8452940061291633, iteration: 390833
loss: 0.9724892377853394,grad_norm: 0.7332932736021434, iteration: 390834
loss: 0.998641312122345,grad_norm: 0.7543218329714567, iteration: 390835
loss: 1.0094070434570312,grad_norm: 0.816091482255973, iteration: 390836
loss: 1.0022934675216675,grad_norm: 0.8032708712146449, iteration: 390837
loss: 0.9976293444633484,grad_norm: 0.7886756768744365, iteration: 390838
loss: 0.9940598011016846,grad_norm: 0.732730515296593, iteration: 390839
loss: 1.0058202743530273,grad_norm: 0.833854585434783, iteration: 390840
loss: 1.0270226001739502,grad_norm: 0.8563482401003629, iteration: 390841
loss: 1.0444039106369019,grad_norm: 0.8587499534406052, iteration: 390842
loss: 1.051045298576355,grad_norm: 0.9999991400239108, iteration: 390843
loss: 0.9880551695823669,grad_norm: 0.7210219264735415, iteration: 390844
loss: 1.106977939605713,grad_norm: 0.8768909218740861, iteration: 390845
loss: 0.9705561995506287,grad_norm: 0.788277696353236, iteration: 390846
loss: 0.9798892736434937,grad_norm: 0.7220105747817075, iteration: 390847
loss: 1.02940034866333,grad_norm: 0.7349870487438283, iteration: 390848
loss: 0.9750901460647583,grad_norm: 0.9400185981993847, iteration: 390849
loss: 1.0309501886367798,grad_norm: 0.8439061468898938, iteration: 390850
loss: 1.017013430595398,grad_norm: 0.9999998582060996, iteration: 390851
loss: 0.9785361886024475,grad_norm: 0.9341550631733408, iteration: 390852
loss: 0.9786207675933838,grad_norm: 0.7374362228750531, iteration: 390853
loss: 0.9974676370620728,grad_norm: 0.9946399183944646, iteration: 390854
loss: 0.9915863275527954,grad_norm: 0.863100109770599, iteration: 390855
loss: 1.0064938068389893,grad_norm: 0.7916982828871757, iteration: 390856
loss: 1.0439062118530273,grad_norm: 0.797951708084613, iteration: 390857
loss: 1.0269874334335327,grad_norm: 0.8033587564233615, iteration: 390858
loss: 0.9913183450698853,grad_norm: 0.9999990728987841, iteration: 390859
loss: 1.0457465648651123,grad_norm: 0.7442496565079616, iteration: 390860
loss: 1.0295007228851318,grad_norm: 0.7899832180198071, iteration: 390861
loss: 1.0426486730575562,grad_norm: 0.999999727245673, iteration: 390862
loss: 1.0016837120056152,grad_norm: 0.6925529667654534, iteration: 390863
loss: 0.9509700536727905,grad_norm: 0.760223955146823, iteration: 390864
loss: 1.005541443824768,grad_norm: 0.6936221975564508, iteration: 390865
loss: 0.9855626821517944,grad_norm: 0.6951867486522989, iteration: 390866
loss: 1.0132482051849365,grad_norm: 0.8417880368393682, iteration: 390867
loss: 1.0201691389083862,grad_norm: 0.9999998654643005, iteration: 390868
loss: 1.0053651332855225,grad_norm: 0.7636983041234029, iteration: 390869
loss: 1.0015978813171387,grad_norm: 0.7002918819624373, iteration: 390870
loss: 1.0014225244522095,grad_norm: 0.7095252714027017, iteration: 390871
loss: 0.9878062605857849,grad_norm: 0.7695565357546905, iteration: 390872
loss: 0.999915599822998,grad_norm: 0.7842306161912992, iteration: 390873
loss: 1.0073695182800293,grad_norm: 0.726650026763414, iteration: 390874
loss: 0.9813116788864136,grad_norm: 0.6811670005690146, iteration: 390875
loss: 0.9871691465377808,grad_norm: 0.9042127575441707, iteration: 390876
loss: 0.9868685007095337,grad_norm: 0.7607783788421083, iteration: 390877
loss: 1.0227587223052979,grad_norm: 0.7746835225128502, iteration: 390878
loss: 0.9835202097892761,grad_norm: 0.9762115201338823, iteration: 390879
loss: 0.9397943019866943,grad_norm: 0.874408406789928, iteration: 390880
loss: 0.9719204902648926,grad_norm: 0.7485316700990495, iteration: 390881
loss: 0.9838478565216064,grad_norm: 0.7243760904074353, iteration: 390882
loss: 1.0243746042251587,grad_norm: 0.9999997498188768, iteration: 390883
loss: 1.0218968391418457,grad_norm: 0.9999992964713493, iteration: 390884
loss: 1.0319937467575073,grad_norm: 0.9521260552296559, iteration: 390885
loss: 0.9975560307502747,grad_norm: 0.6096931515846873, iteration: 390886
loss: 0.9942618608474731,grad_norm: 0.8753351179922176, iteration: 390887
loss: 0.9679840803146362,grad_norm: 0.8081391100779209, iteration: 390888
loss: 0.9852684140205383,grad_norm: 0.8642229408188472, iteration: 390889
loss: 0.9879916906356812,grad_norm: 0.7726579277489414, iteration: 390890
loss: 0.9789723753929138,grad_norm: 0.9192134354013329, iteration: 390891
loss: 1.0736668109893799,grad_norm: 0.9999991264900197, iteration: 390892
loss: 0.9788697957992554,grad_norm: 0.7676753391845077, iteration: 390893
loss: 1.0362160205841064,grad_norm: 0.9999996809922848, iteration: 390894
loss: 1.032537817955017,grad_norm: 0.9999997950346724, iteration: 390895
loss: 0.9820087552070618,grad_norm: 0.8415593413571779, iteration: 390896
loss: 0.9978165626525879,grad_norm: 0.7477446129427914, iteration: 390897
loss: 1.001684308052063,grad_norm: 0.6892619628535825, iteration: 390898
loss: 0.9871185421943665,grad_norm: 0.7936523016214603, iteration: 390899
loss: 1.0571725368499756,grad_norm: 0.7109453725542019, iteration: 390900
loss: 0.9967312812805176,grad_norm: 0.7073692911641204, iteration: 390901
loss: 1.0300695896148682,grad_norm: 0.9999991272776798, iteration: 390902
loss: 1.0064605474472046,grad_norm: 0.7405518892099425, iteration: 390903
loss: 0.9878680109977722,grad_norm: 0.8757755481500972, iteration: 390904
loss: 0.9959766268730164,grad_norm: 0.7480314021972644, iteration: 390905
loss: 1.0429532527923584,grad_norm: 0.7781148303343841, iteration: 390906
loss: 0.9640607237815857,grad_norm: 0.8014917592682888, iteration: 390907
loss: 1.0033552646636963,grad_norm: 0.9353373452371676, iteration: 390908
loss: 0.9914620518684387,grad_norm: 0.8183407755983426, iteration: 390909
loss: 0.9540769457817078,grad_norm: 0.7235800657690369, iteration: 390910
loss: 0.9722980260848999,grad_norm: 0.7997843806109719, iteration: 390911
loss: 1.0291502475738525,grad_norm: 0.8427573564638858, iteration: 390912
loss: 1.0136771202087402,grad_norm: 0.716957623145863, iteration: 390913
loss: 0.9961118102073669,grad_norm: 0.7379122174250032, iteration: 390914
loss: 0.9935401082038879,grad_norm: 0.9897629331766716, iteration: 390915
loss: 1.0320419073104858,grad_norm: 0.9746763153972151, iteration: 390916
loss: 1.0477875471115112,grad_norm: 0.9999989677044978, iteration: 390917
loss: 1.0222201347351074,grad_norm: 0.7500691352354709, iteration: 390918
loss: 0.9867391586303711,grad_norm: 0.7574362466179612, iteration: 390919
loss: 1.0816410779953003,grad_norm: 1.0000000115893837, iteration: 390920
loss: 0.9953330755233765,grad_norm: 0.9999997432984127, iteration: 390921
loss: 0.9813859462738037,grad_norm: 0.7575613015270148, iteration: 390922
loss: 0.9888505935668945,grad_norm: 0.9109947404265574, iteration: 390923
loss: 1.0083123445510864,grad_norm: 0.7314143013646021, iteration: 390924
loss: 1.0360549688339233,grad_norm: 0.7590855750835257, iteration: 390925
loss: 0.9996136426925659,grad_norm: 0.7989607060818648, iteration: 390926
loss: 1.0429414510726929,grad_norm: 0.7614849833046352, iteration: 390927
loss: 1.006232500076294,grad_norm: 0.6618465070323066, iteration: 390928
loss: 0.9681805968284607,grad_norm: 0.9141159096834454, iteration: 390929
loss: 0.9825387001037598,grad_norm: 0.7411630587775788, iteration: 390930
loss: 0.9857861995697021,grad_norm: 0.7838518445829064, iteration: 390931
loss: 0.9810487627983093,grad_norm: 0.9434878890015935, iteration: 390932
loss: 1.0084972381591797,grad_norm: 0.999999032385303, iteration: 390933
loss: 1.0433199405670166,grad_norm: 0.9999992619441537, iteration: 390934
loss: 0.9893865585327148,grad_norm: 0.7701883615159754, iteration: 390935
loss: 1.0270509719848633,grad_norm: 0.7812883909822521, iteration: 390936
loss: 1.0066606998443604,grad_norm: 0.7189915103460629, iteration: 390937
loss: 1.0445739030838013,grad_norm: 0.9999996005384122, iteration: 390938
loss: 0.979285478591919,grad_norm: 0.8579955849387795, iteration: 390939
loss: 1.00517737865448,grad_norm: 0.9999996971918012, iteration: 390940
loss: 0.9714235067367554,grad_norm: 0.7427022351539289, iteration: 390941
loss: 1.0199503898620605,grad_norm: 0.9137131452499547, iteration: 390942
loss: 0.9845128655433655,grad_norm: 0.8624476074922446, iteration: 390943
loss: 0.9835426807403564,grad_norm: 0.7066409927392282, iteration: 390944
loss: 0.969958484172821,grad_norm: 0.99999907632036, iteration: 390945
loss: 1.0400481224060059,grad_norm: 0.9414808794466828, iteration: 390946
loss: 1.0121318101882935,grad_norm: 0.9978807840483822, iteration: 390947
loss: 0.9938610792160034,grad_norm: 0.9721912976959431, iteration: 390948
loss: 0.9742890000343323,grad_norm: 0.7535235008959656, iteration: 390949
loss: 1.0517843961715698,grad_norm: 0.9713923595365327, iteration: 390950
loss: 0.9978054761886597,grad_norm: 0.7682080128163347, iteration: 390951
loss: 1.0444421768188477,grad_norm: 0.9999999535004074, iteration: 390952
loss: 0.9797144532203674,grad_norm: 0.7419498169825253, iteration: 390953
loss: 1.0077089071273804,grad_norm: 0.867071574388462, iteration: 390954
loss: 1.008026123046875,grad_norm: 0.8604853292245925, iteration: 390955
loss: 0.9858023524284363,grad_norm: 0.8156172310955238, iteration: 390956
loss: 0.9770615696907043,grad_norm: 0.749416923046252, iteration: 390957
loss: 0.9980996251106262,grad_norm: 0.788665326383971, iteration: 390958
loss: 1.0857411623001099,grad_norm: 0.9999999709109699, iteration: 390959
loss: 1.0335229635238647,grad_norm: 0.9999991430814574, iteration: 390960
loss: 0.9881084561347961,grad_norm: 0.8345669977659109, iteration: 390961
loss: 0.9851454496383667,grad_norm: 0.9999990523545736, iteration: 390962
loss: 1.000937819480896,grad_norm: 0.724056773101322, iteration: 390963
loss: 0.9611747860908508,grad_norm: 0.8690764353625, iteration: 390964
loss: 0.9892234802246094,grad_norm: 0.8003390252313941, iteration: 390965
loss: 1.0281950235366821,grad_norm: 0.8186771975359263, iteration: 390966
loss: 0.974148154258728,grad_norm: 0.7779813120099949, iteration: 390967
loss: 1.0088626146316528,grad_norm: 0.649811656749973, iteration: 390968
loss: 0.9930492043495178,grad_norm: 0.7031228083197198, iteration: 390969
loss: 1.0269607305526733,grad_norm: 0.8020530169838094, iteration: 390970
loss: 0.9841082096099854,grad_norm: 0.7979740758312119, iteration: 390971
loss: 0.9520196914672852,grad_norm: 0.7262373755706811, iteration: 390972
loss: 0.9991795420646667,grad_norm: 0.9412271998069116, iteration: 390973
loss: 0.9873160123825073,grad_norm: 0.9149442530438002, iteration: 390974
loss: 0.9888360500335693,grad_norm: 0.6817180960404977, iteration: 390975
loss: 0.9869056940078735,grad_norm: 0.759662518856029, iteration: 390976
loss: 1.0133529901504517,grad_norm: 0.9058781599331666, iteration: 390977
loss: 1.0005263090133667,grad_norm: 0.751239515962546, iteration: 390978
loss: 0.9782240390777588,grad_norm: 0.8608503968460258, iteration: 390979
loss: 1.0195945501327515,grad_norm: 0.7743973169033946, iteration: 390980
loss: 0.9777625203132629,grad_norm: 0.9636739307618747, iteration: 390981
loss: 1.0456910133361816,grad_norm: 0.9999996297851605, iteration: 390982
loss: 1.0260539054870605,grad_norm: 0.9999991886189442, iteration: 390983
loss: 1.0035845041275024,grad_norm: 0.7188973669780565, iteration: 390984
loss: 1.0048682689666748,grad_norm: 0.9365206810864027, iteration: 390985
loss: 0.9723917841911316,grad_norm: 0.9999991554508985, iteration: 390986
loss: 0.9853550791740417,grad_norm: 0.70779021195975, iteration: 390987
loss: 1.028720736503601,grad_norm: 0.7688082903933879, iteration: 390988
loss: 1.0253688097000122,grad_norm: 0.7255153919435511, iteration: 390989
loss: 1.0207161903381348,grad_norm: 0.8518877588911689, iteration: 390990
loss: 0.9602771997451782,grad_norm: 0.7248575061103625, iteration: 390991
loss: 1.00580632686615,grad_norm: 0.9999992378016199, iteration: 390992
loss: 0.995955228805542,grad_norm: 0.8367630980501518, iteration: 390993
loss: 0.9998490810394287,grad_norm: 0.7852044287647945, iteration: 390994
loss: 0.9724082350730896,grad_norm: 0.7724144612096534, iteration: 390995
loss: 1.0251609086990356,grad_norm: 0.9999991899520423, iteration: 390996
loss: 0.9875522255897522,grad_norm: 0.7625945349394522, iteration: 390997
loss: 1.0113410949707031,grad_norm: 0.6911337105599065, iteration: 390998
loss: 1.0059094429016113,grad_norm: 0.99999892977854, iteration: 390999
loss: 0.9881464242935181,grad_norm: 0.9999991699018886, iteration: 391000
loss: 0.9914700984954834,grad_norm: 0.8062780697982981, iteration: 391001
loss: 0.99202561378479,grad_norm: 0.9999991317079484, iteration: 391002
loss: 1.011572241783142,grad_norm: 0.8866984986803838, iteration: 391003
loss: 1.0288695096969604,grad_norm: 0.9999990858639044, iteration: 391004
loss: 1.010577917098999,grad_norm: 0.8663113179744851, iteration: 391005
loss: 1.006134271621704,grad_norm: 0.8550647925588338, iteration: 391006
loss: 1.2011306285858154,grad_norm: 0.9999999685153348, iteration: 391007
loss: 0.9968416690826416,grad_norm: 0.9999992780580284, iteration: 391008
loss: 0.9919040203094482,grad_norm: 0.7765060795826633, iteration: 391009
loss: 0.9787560701370239,grad_norm: 0.727023325448908, iteration: 391010
loss: 1.0110944509506226,grad_norm: 0.9194916120616293, iteration: 391011
loss: 1.0351604223251343,grad_norm: 0.9999994750053631, iteration: 391012
loss: 0.985420286655426,grad_norm: 0.843389883730051, iteration: 391013
loss: 1.057744026184082,grad_norm: 0.999999802185844, iteration: 391014
loss: 0.9644225835800171,grad_norm: 0.7591109707369055, iteration: 391015
loss: 1.152266263961792,grad_norm: 0.9999996212003287, iteration: 391016
loss: 0.9661203622817993,grad_norm: 0.7037207190205906, iteration: 391017
loss: 1.0189342498779297,grad_norm: 0.8367042937983384, iteration: 391018
loss: 0.9773818850517273,grad_norm: 0.7684304715983074, iteration: 391019
loss: 1.0028544664382935,grad_norm: 0.7863541466565541, iteration: 391020
loss: 0.9711659550666809,grad_norm: 0.7181282861621218, iteration: 391021
loss: 1.0413105487823486,grad_norm: 0.8707211814876143, iteration: 391022
loss: 1.053269624710083,grad_norm: 0.8753500826084176, iteration: 391023
loss: 0.9934611320495605,grad_norm: 0.9682435278483417, iteration: 391024
loss: 1.0325393676757812,grad_norm: 0.9999999759561577, iteration: 391025
loss: 1.0764907598495483,grad_norm: 0.9999994649347563, iteration: 391026
loss: 0.9911516308784485,grad_norm: 0.8156549652532809, iteration: 391027
loss: 0.9779199957847595,grad_norm: 0.6849408912939157, iteration: 391028
loss: 0.983891487121582,grad_norm: 0.6626822400724186, iteration: 391029
loss: 0.9855213165283203,grad_norm: 0.7163195127896066, iteration: 391030
loss: 0.9887662529945374,grad_norm: 0.6812412030750723, iteration: 391031
loss: 1.0719866752624512,grad_norm: 0.7478126432418045, iteration: 391032
loss: 0.9788604974746704,grad_norm: 0.8339240289410776, iteration: 391033
loss: 1.0114294290542603,grad_norm: 0.7664106893711989, iteration: 391034
loss: 1.0383186340332031,grad_norm: 0.7912668372075017, iteration: 391035
loss: 1.0346345901489258,grad_norm: 0.999999534623194, iteration: 391036
loss: 1.4790585041046143,grad_norm: 0.9999992814482223, iteration: 391037
loss: 0.9860906004905701,grad_norm: 0.820532800993236, iteration: 391038
loss: 0.9998593330383301,grad_norm: 0.7346962881879896, iteration: 391039
loss: 0.9762438535690308,grad_norm: 0.874748025608465, iteration: 391040
loss: 0.9881243109703064,grad_norm: 0.9663255452202353, iteration: 391041
loss: 0.9852029085159302,grad_norm: 0.7438823432790398, iteration: 391042
loss: 0.9962871074676514,grad_norm: 0.9999993770442566, iteration: 391043
loss: 1.011703610420227,grad_norm: 0.7685561060119324, iteration: 391044
loss: 1.0455931425094604,grad_norm: 0.8294789484537011, iteration: 391045
loss: 0.9622443914413452,grad_norm: 0.6924970361266183, iteration: 391046
loss: 1.0025079250335693,grad_norm: 0.9999999144148876, iteration: 391047
loss: 0.9821387529373169,grad_norm: 0.7695627404022726, iteration: 391048
loss: 1.040966510772705,grad_norm: 0.968476710981022, iteration: 391049
loss: 1.1964082717895508,grad_norm: 0.9999997780308248, iteration: 391050
loss: 1.0469048023223877,grad_norm: 0.9999999158538451, iteration: 391051
loss: 0.9969180822372437,grad_norm: 0.9999992476503188, iteration: 391052
loss: 1.0300562381744385,grad_norm: 0.7706021545784053, iteration: 391053
loss: 1.0057504177093506,grad_norm: 0.9999995573011808, iteration: 391054
loss: 1.1758397817611694,grad_norm: 0.9999997482956601, iteration: 391055
loss: 1.2397164106369019,grad_norm: 0.9999998577247612, iteration: 391056
loss: 1.0147079229354858,grad_norm: 0.8718481873933435, iteration: 391057
loss: 0.9961318969726562,grad_norm: 0.6857374901878025, iteration: 391058
loss: 0.9989421367645264,grad_norm: 0.9355480093946449, iteration: 391059
loss: 1.0156172513961792,grad_norm: 0.6895800981660469, iteration: 391060
loss: 1.0464298725128174,grad_norm: 1.000000028135476, iteration: 391061
loss: 1.0233803987503052,grad_norm: 0.789981871874892, iteration: 391062
loss: 1.0524494647979736,grad_norm: 0.8778449488404556, iteration: 391063
loss: 1.032309889793396,grad_norm: 0.8165938411871182, iteration: 391064
loss: 1.0650614500045776,grad_norm: 0.9999992721680919, iteration: 391065
loss: 0.9806057810783386,grad_norm: 0.7154662466682754, iteration: 391066
loss: 1.0405902862548828,grad_norm: 0.9999992958263062, iteration: 391067
loss: 1.0792574882507324,grad_norm: 0.9173425819370811, iteration: 391068
loss: 1.00835382938385,grad_norm: 0.8160766052671058, iteration: 391069
loss: 0.9675142765045166,grad_norm: 0.7149841460714635, iteration: 391070
loss: 1.0116814374923706,grad_norm: 0.9483984751248185, iteration: 391071
loss: 0.9893520474433899,grad_norm: 0.7817167562928825, iteration: 391072
loss: 1.048986792564392,grad_norm: 0.9999996885905945, iteration: 391073
loss: 1.0021069049835205,grad_norm: 0.9999997855294187, iteration: 391074
loss: 0.9694192409515381,grad_norm: 0.7761513382573779, iteration: 391075
loss: 1.1411739587783813,grad_norm: 0.9999996938888608, iteration: 391076
loss: 1.0146124362945557,grad_norm: 0.9999998183734691, iteration: 391077
loss: 0.9843705296516418,grad_norm: 0.8142852123400034, iteration: 391078
loss: 0.9815137982368469,grad_norm: 0.8008501352632464, iteration: 391079
loss: 1.0912383794784546,grad_norm: 0.9999996174107935, iteration: 391080
loss: 0.9844641089439392,grad_norm: 0.6896624978848233, iteration: 391081
loss: 1.020696759223938,grad_norm: 0.9999998621147078, iteration: 391082
loss: 0.9698203206062317,grad_norm: 0.7207299480357849, iteration: 391083
loss: 1.0308152437210083,grad_norm: 0.8184044774903322, iteration: 391084
loss: 0.949849009513855,grad_norm: 0.7140602095839791, iteration: 391085
loss: 0.9717387557029724,grad_norm: 0.694197238949044, iteration: 391086
loss: 1.0033092498779297,grad_norm: 0.7883085348700393, iteration: 391087
loss: 0.9707148671150208,grad_norm: 0.7830103171744579, iteration: 391088
loss: 1.0562900304794312,grad_norm: 0.9305376544440351, iteration: 391089
loss: 1.091626524925232,grad_norm: 0.914395271825145, iteration: 391090
loss: 0.9989843964576721,grad_norm: 0.6953653275699951, iteration: 391091
loss: 0.9833657145500183,grad_norm: 0.7923672750519528, iteration: 391092
loss: 1.0078375339508057,grad_norm: 0.6287069579692364, iteration: 391093
loss: 0.9819545745849609,grad_norm: 0.8435561783884575, iteration: 391094
loss: 1.2384339570999146,grad_norm: 0.9999998165722602, iteration: 391095
loss: 1.0479621887207031,grad_norm: 0.9068691318308532, iteration: 391096
loss: 1.015281319618225,grad_norm: 0.7194866970983402, iteration: 391097
loss: 0.9760408997535706,grad_norm: 0.7512170949718978, iteration: 391098
loss: 1.0688481330871582,grad_norm: 0.8725653332245115, iteration: 391099
loss: 1.0182722806930542,grad_norm: 0.814039473325468, iteration: 391100
loss: 1.060001254081726,grad_norm: 0.999999982432237, iteration: 391101
loss: 1.3242264986038208,grad_norm: 0.9999999590507758, iteration: 391102
loss: 1.0015590190887451,grad_norm: 0.7317793366579424, iteration: 391103
loss: 0.9668254852294922,grad_norm: 0.7903504061016796, iteration: 391104
loss: 0.9981406331062317,grad_norm: 0.7148985337705687, iteration: 391105
loss: 1.0275176763534546,grad_norm: 0.7513702623043662, iteration: 391106
loss: 1.0249965190887451,grad_norm: 0.792606676650022, iteration: 391107
loss: 0.9936373829841614,grad_norm: 0.70308238839231, iteration: 391108
loss: 0.9918412566184998,grad_norm: 0.750289404284002, iteration: 391109
loss: 1.170186161994934,grad_norm: 0.999999570694397, iteration: 391110
loss: 0.9843171834945679,grad_norm: 0.8494508567984397, iteration: 391111
loss: 1.0234118700027466,grad_norm: 0.934550032677801, iteration: 391112
loss: 0.9932810664176941,grad_norm: 0.7839361699018177, iteration: 391113
loss: 0.9742576479911804,grad_norm: 0.9079077784178184, iteration: 391114
loss: 1.0388880968093872,grad_norm: 0.8102542648434785, iteration: 391115
loss: 0.998854398727417,grad_norm: 0.9999995308882592, iteration: 391116
loss: 1.0061218738555908,grad_norm: 0.7486189315519858, iteration: 391117
loss: 0.9923786520957947,grad_norm: 0.8518296209496866, iteration: 391118
loss: 1.0402257442474365,grad_norm: 0.8404046905062852, iteration: 391119
loss: 1.0010026693344116,grad_norm: 0.9665913651148438, iteration: 391120
loss: 1.0135607719421387,grad_norm: 0.9005397118404709, iteration: 391121
loss: 1.0539458990097046,grad_norm: 0.8130791471739176, iteration: 391122
loss: 1.0342475175857544,grad_norm: 0.7639931869241501, iteration: 391123
loss: 1.0066832304000854,grad_norm: 0.8241131573651769, iteration: 391124
loss: 1.0202364921569824,grad_norm: 0.9625160466460182, iteration: 391125
loss: 1.0683355331420898,grad_norm: 0.9999998583197371, iteration: 391126
loss: 1.0267612934112549,grad_norm: 0.9999993711809344, iteration: 391127
loss: 1.0088905096054077,grad_norm: 0.9999996650591186, iteration: 391128
loss: 0.9683055281639099,grad_norm: 0.7785460651169674, iteration: 391129
loss: 0.9792144298553467,grad_norm: 0.7525009442793293, iteration: 391130
loss: 0.992485761642456,grad_norm: 0.8855106791687816, iteration: 391131
loss: 1.0194295644760132,grad_norm: 0.9999992303322086, iteration: 391132
loss: 0.9558457732200623,grad_norm: 0.7804631723942753, iteration: 391133
loss: 1.0705796480178833,grad_norm: 0.8603194017382956, iteration: 391134
loss: 1.0548248291015625,grad_norm: 0.963761518403813, iteration: 391135
loss: 1.055708885192871,grad_norm: 0.9999992640193239, iteration: 391136
loss: 1.0666935443878174,grad_norm: 0.9999998445001652, iteration: 391137
loss: 1.028709053993225,grad_norm: 0.8794917218410299, iteration: 391138
loss: 0.9664571285247803,grad_norm: 0.8407433766848259, iteration: 391139
loss: 0.9847785234451294,grad_norm: 0.8121589115749651, iteration: 391140
loss: 1.1639351844787598,grad_norm: 0.9443209130344895, iteration: 391141
loss: 1.03146493434906,grad_norm: 0.8732183585254208, iteration: 391142
loss: 0.9786582589149475,grad_norm: 0.8438519715111523, iteration: 391143
loss: 1.0834753513336182,grad_norm: 0.9555474442352981, iteration: 391144
loss: 1.0719964504241943,grad_norm: 0.7803762938841543, iteration: 391145
loss: 0.983448326587677,grad_norm: 0.8358905874891472, iteration: 391146
loss: 1.1942700147628784,grad_norm: 0.999999760760805, iteration: 391147
loss: 1.075378656387329,grad_norm: 0.9999991174143451, iteration: 391148
loss: 1.0206034183502197,grad_norm: 0.7136625706085375, iteration: 391149
loss: 1.061850666999817,grad_norm: 0.9999994241853736, iteration: 391150
loss: 1.0689789056777954,grad_norm: 0.9999991784162681, iteration: 391151
loss: 1.0661259889602661,grad_norm: 0.9251291415824621, iteration: 391152
loss: 1.0065157413482666,grad_norm: 0.6922468016725974, iteration: 391153
loss: 1.0504209995269775,grad_norm: 0.8259467883698127, iteration: 391154
loss: 1.049936294555664,grad_norm: 0.6886046496720295, iteration: 391155
loss: 1.001516580581665,grad_norm: 0.9999991239838562, iteration: 391156
loss: 1.0613614320755005,grad_norm: 0.9999990540021543, iteration: 391157
loss: 0.9887286424636841,grad_norm: 0.6632247220125054, iteration: 391158
loss: 0.9932628273963928,grad_norm: 0.7335757182807153, iteration: 391159
loss: 0.9974024295806885,grad_norm: 0.9283368200116832, iteration: 391160
loss: 1.023319959640503,grad_norm: 0.9614648019764821, iteration: 391161
loss: 0.9900056719779968,grad_norm: 0.7211180964454786, iteration: 391162
loss: 1.1754961013793945,grad_norm: 0.9999992496903382, iteration: 391163
loss: 1.0392143726348877,grad_norm: 0.9962565627202937, iteration: 391164
loss: 1.012285828590393,grad_norm: 0.8295622389614434, iteration: 391165
loss: 1.0180271863937378,grad_norm: 0.7346402614985382, iteration: 391166
loss: 1.0440115928649902,grad_norm: 0.9999993000776188, iteration: 391167
loss: 1.0009344816207886,grad_norm: 0.6943463468284686, iteration: 391168
loss: 1.0553381443023682,grad_norm: 0.7813042372955795, iteration: 391169
loss: 1.043023943901062,grad_norm: 0.6780473554805517, iteration: 391170
loss: 1.0013253688812256,grad_norm: 0.8286725807086696, iteration: 391171
loss: 1.012816309928894,grad_norm: 0.999999873975936, iteration: 391172
loss: 1.0937589406967163,grad_norm: 0.8468079328235428, iteration: 391173
loss: 0.9914917945861816,grad_norm: 0.8361620827622227, iteration: 391174
loss: 0.9666290283203125,grad_norm: 0.8683472942241818, iteration: 391175
loss: 0.9982863068580627,grad_norm: 0.9999990737800039, iteration: 391176
loss: 1.062214970588684,grad_norm: 0.9999997929423827, iteration: 391177
loss: 1.1442267894744873,grad_norm: 0.9999998786406369, iteration: 391178
loss: 1.0203261375427246,grad_norm: 0.8628906974081221, iteration: 391179
loss: 1.0133631229400635,grad_norm: 0.8269937763991141, iteration: 391180
loss: 1.0044991970062256,grad_norm: 0.6991977301783376, iteration: 391181
loss: 1.0978877544403076,grad_norm: 0.8863941297129435, iteration: 391182
loss: 1.0707008838653564,grad_norm: 0.9426346427989593, iteration: 391183
loss: 1.014951229095459,grad_norm: 0.9999991191911697, iteration: 391184
loss: 1.0169247388839722,grad_norm: 0.8857302542133658, iteration: 391185
loss: 1.016672968864441,grad_norm: 0.9201237319320797, iteration: 391186
loss: 1.1031113862991333,grad_norm: 1.0000000195042364, iteration: 391187
loss: 0.9838188290596008,grad_norm: 0.7560847639016466, iteration: 391188
loss: 1.0244418382644653,grad_norm: 0.98639642087893, iteration: 391189
loss: 1.0469133853912354,grad_norm: 0.8690984144436342, iteration: 391190
loss: 0.9954982995986938,grad_norm: 0.8140227217120565, iteration: 391191
loss: 1.0118927955627441,grad_norm: 0.7915318096985563, iteration: 391192
loss: 1.0194220542907715,grad_norm: 0.7104571255162199, iteration: 391193
loss: 1.020869493484497,grad_norm: 0.9999999189184772, iteration: 391194
loss: 0.9919857382774353,grad_norm: 0.8762688878024012, iteration: 391195
loss: 1.0561027526855469,grad_norm: 0.9999998479821492, iteration: 391196
loss: 1.0426360368728638,grad_norm: 0.8125547060806209, iteration: 391197
loss: 1.0305408239364624,grad_norm: 0.9999996544791813, iteration: 391198
loss: 1.0571439266204834,grad_norm: 0.9999997319901026, iteration: 391199
loss: 1.0179855823516846,grad_norm: 0.9999998331177073, iteration: 391200
loss: 1.023645281791687,grad_norm: 0.86088235792909, iteration: 391201
loss: 1.0025805234909058,grad_norm: 0.8080866616530538, iteration: 391202
loss: 0.9916533827781677,grad_norm: 0.7425016641859263, iteration: 391203
loss: 1.071707010269165,grad_norm: 0.9999995371837818, iteration: 391204
loss: 1.0052069425582886,grad_norm: 0.7811815248317557, iteration: 391205
loss: 1.0034215450286865,grad_norm: 0.900304557095092, iteration: 391206
loss: 1.1715794801712036,grad_norm: 0.9999998350988715, iteration: 391207
loss: 1.0104848146438599,grad_norm: 0.7806152804561296, iteration: 391208
loss: 0.9591920971870422,grad_norm: 0.8686314576544046, iteration: 391209
loss: 1.0388785600662231,grad_norm: 0.9999998116327525, iteration: 391210
loss: 0.9968370795249939,grad_norm: 0.9999999185508809, iteration: 391211
loss: 0.9966227412223816,grad_norm: 0.8356900745156312, iteration: 391212
loss: 0.9555678963661194,grad_norm: 0.6902762762297728, iteration: 391213
loss: 1.0069808959960938,grad_norm: 0.8498113956068084, iteration: 391214
loss: 1.0279587507247925,grad_norm: 0.9999991876432247, iteration: 391215
loss: 1.0050525665283203,grad_norm: 0.9404820700641912, iteration: 391216
loss: 1.0368553400039673,grad_norm: 0.6504601305973088, iteration: 391217
loss: 1.136061191558838,grad_norm: 0.999999552502321, iteration: 391218
loss: 0.9973821640014648,grad_norm: 0.8753276813530134, iteration: 391219
loss: 1.0038470029830933,grad_norm: 0.7480503371872729, iteration: 391220
loss: 1.0063951015472412,grad_norm: 0.6540649274591307, iteration: 391221
loss: 1.0121028423309326,grad_norm: 0.774195207996566, iteration: 391222
loss: 0.9747726917266846,grad_norm: 0.8175687230893648, iteration: 391223
loss: 1.0607255697250366,grad_norm: 1.0000000126013007, iteration: 391224
loss: 1.1053041219711304,grad_norm: 0.9999989424700814, iteration: 391225
loss: 1.0239481925964355,grad_norm: 0.7781683716765313, iteration: 391226
loss: 1.0481512546539307,grad_norm: 0.8952570314889772, iteration: 391227
loss: 1.0048576593399048,grad_norm: 0.9999998304385874, iteration: 391228
loss: 1.0420973300933838,grad_norm: 0.9999990028173756, iteration: 391229
loss: 1.0609798431396484,grad_norm: 0.8724935635165931, iteration: 391230
loss: 1.0561747550964355,grad_norm: 0.9999994170788741, iteration: 391231
loss: 1.0101203918457031,grad_norm: 0.9999990900102076, iteration: 391232
loss: 0.9825395941734314,grad_norm: 0.9999990570831232, iteration: 391233
loss: 1.0299484729766846,grad_norm: 0.999999186025332, iteration: 391234
loss: 1.03489089012146,grad_norm: 0.8572475592890065, iteration: 391235
loss: 0.9739223122596741,grad_norm: 0.8727283532138161, iteration: 391236
loss: 0.9935761094093323,grad_norm: 0.8328581243969616, iteration: 391237
loss: 1.0153980255126953,grad_norm: 0.742503495695718, iteration: 391238
loss: 1.0302733182907104,grad_norm: 0.9999997980144463, iteration: 391239
loss: 1.0020381212234497,grad_norm: 0.8922540836414393, iteration: 391240
loss: 1.0178617238998413,grad_norm: 0.999999520439495, iteration: 391241
loss: 1.1653708219528198,grad_norm: 0.9999993962260671, iteration: 391242
loss: 1.0059207677841187,grad_norm: 0.8047428483070763, iteration: 391243
loss: 0.9967033863067627,grad_norm: 0.6079386678841819, iteration: 391244
loss: 0.9886144995689392,grad_norm: 0.8673181995422015, iteration: 391245
loss: 0.9877733588218689,grad_norm: 0.8699611512188353, iteration: 391246
loss: 1.0507490634918213,grad_norm: 0.9999992102925533, iteration: 391247
loss: 1.0314127206802368,grad_norm: 0.9840147691546587, iteration: 391248
loss: 1.025587558746338,grad_norm: 0.8057343202389242, iteration: 391249
loss: 1.01386296749115,grad_norm: 0.7435065927290285, iteration: 391250
loss: 0.9663148522377014,grad_norm: 0.8636367355018081, iteration: 391251
loss: 1.0494338274002075,grad_norm: 0.7824209647107426, iteration: 391252
loss: 1.0671695470809937,grad_norm: 0.9999998662453129, iteration: 391253
loss: 0.9878225922584534,grad_norm: 0.7984023260836459, iteration: 391254
loss: 1.0305720567703247,grad_norm: 0.822360704269208, iteration: 391255
loss: 0.9835653901100159,grad_norm: 0.8127422641449688, iteration: 391256
loss: 1.048928141593933,grad_norm: 0.9999994253340282, iteration: 391257
loss: 1.0133413076400757,grad_norm: 0.7202198990490036, iteration: 391258
loss: 0.9962612390518188,grad_norm: 0.770430206241412, iteration: 391259
loss: 1.003358244895935,grad_norm: 0.7239297813823499, iteration: 391260
loss: 1.0543644428253174,grad_norm: 0.8269505247686026, iteration: 391261
loss: 1.082193374633789,grad_norm: 0.8974428570313818, iteration: 391262
loss: 1.1365026235580444,grad_norm: 0.9999996333611433, iteration: 391263
loss: 1.03409743309021,grad_norm: 0.8113191611182836, iteration: 391264
loss: 0.951979398727417,grad_norm: 0.8428752298978107, iteration: 391265
loss: 0.9886066913604736,grad_norm: 0.8941393696018819, iteration: 391266
loss: 1.0050259828567505,grad_norm: 0.8841732089838334, iteration: 391267
loss: 1.0396366119384766,grad_norm: 0.7282229357655317, iteration: 391268
loss: 0.9809046983718872,grad_norm: 0.8272388567392412, iteration: 391269
loss: 1.002010464668274,grad_norm: 0.7794559665517634, iteration: 391270
loss: 1.0282962322235107,grad_norm: 0.9999998473783398, iteration: 391271
loss: 1.0642808675765991,grad_norm: 0.7555403764868269, iteration: 391272
loss: 1.0152485370635986,grad_norm: 0.7430738983779333, iteration: 391273
loss: 1.0529541969299316,grad_norm: 0.9999994051940174, iteration: 391274
loss: 0.9950936436653137,grad_norm: 0.9999993842987406, iteration: 391275
loss: 1.1392313241958618,grad_norm: 1.000000023883453, iteration: 391276
loss: 0.9843575954437256,grad_norm: 0.8770444268357587, iteration: 391277
loss: 1.0563498735427856,grad_norm: 0.838924629573714, iteration: 391278
loss: 1.0607218742370605,grad_norm: 0.8242529533015387, iteration: 391279
loss: 0.9845393896102905,grad_norm: 0.9999990625002055, iteration: 391280
loss: 0.9747734069824219,grad_norm: 0.7354211514538592, iteration: 391281
loss: 0.9909500479698181,grad_norm: 0.6645929954231476, iteration: 391282
loss: 0.987356424331665,grad_norm: 0.7509053345983273, iteration: 391283
loss: 1.0826122760772705,grad_norm: 0.8758903953893679, iteration: 391284
loss: 0.9957258701324463,grad_norm: 0.7962110320362248, iteration: 391285
loss: 1.0214656591415405,grad_norm: 0.8899876904066666, iteration: 391286
loss: 0.9977905750274658,grad_norm: 0.9999998468679007, iteration: 391287
loss: 1.1326042413711548,grad_norm: 0.9999994758252834, iteration: 391288
loss: 0.9905204772949219,grad_norm: 0.761090667022368, iteration: 391289
loss: 1.0505385398864746,grad_norm: 0.999999349955655, iteration: 391290
loss: 0.9777317643165588,grad_norm: 0.779668934841264, iteration: 391291
loss: 1.0646016597747803,grad_norm: 0.9999991112105142, iteration: 391292
loss: 1.1108068227767944,grad_norm: 0.9999997459617568, iteration: 391293
loss: 1.0514657497406006,grad_norm: 0.9999990423826833, iteration: 391294
loss: 1.0704234838485718,grad_norm: 0.7554369689345377, iteration: 391295
loss: 1.1485785245895386,grad_norm: 0.9999996485006193, iteration: 391296
loss: 1.0398198366165161,grad_norm: 0.8483372025120187, iteration: 391297
loss: 1.060826301574707,grad_norm: 0.9999991509541775, iteration: 391298
loss: 1.022166132926941,grad_norm: 0.9999992899798593, iteration: 391299
loss: 1.1203571557998657,grad_norm: 0.9999995513039095, iteration: 391300
loss: 1.033090591430664,grad_norm: 0.9999998453347089, iteration: 391301
loss: 1.0244505405426025,grad_norm: 0.9119101160013089, iteration: 391302
loss: 1.0265792608261108,grad_norm: 0.9999997382644369, iteration: 391303
loss: 1.0025858879089355,grad_norm: 0.9485785218758417, iteration: 391304
loss: 1.0176281929016113,grad_norm: 0.6720655295037994, iteration: 391305
loss: 1.0244885683059692,grad_norm: 0.99999906101868, iteration: 391306
loss: 1.0130081176757812,grad_norm: 1.0000000210450024, iteration: 391307
loss: 1.070393443107605,grad_norm: 0.9999993493833021, iteration: 391308
loss: 0.9927435517311096,grad_norm: 0.9999999318694679, iteration: 391309
loss: 1.1412540674209595,grad_norm: 0.999999349933562, iteration: 391310
loss: 1.0291380882263184,grad_norm: 0.8292509550966896, iteration: 391311
loss: 1.018205165863037,grad_norm: 0.9999991995641022, iteration: 391312
loss: 1.0406700372695923,grad_norm: 0.9999990525098501, iteration: 391313
loss: 1.0187700986862183,grad_norm: 0.7358307370742307, iteration: 391314
loss: 1.0971558094024658,grad_norm: 0.9999993670660521, iteration: 391315
loss: 0.9967972040176392,grad_norm: 0.8119064648637893, iteration: 391316
loss: 1.1260801553726196,grad_norm: 0.9999994500651357, iteration: 391317
loss: 1.1367377042770386,grad_norm: 0.9999991602565531, iteration: 391318
loss: 1.0965501070022583,grad_norm: 0.9999998522816662, iteration: 391319
loss: 1.0416761636734009,grad_norm: 0.8527476817216518, iteration: 391320
loss: 1.0368338823318481,grad_norm: 0.9999998180647893, iteration: 391321
loss: 1.0078506469726562,grad_norm: 0.8157225591920425, iteration: 391322
loss: 1.0853787660598755,grad_norm: 0.9999998628316312, iteration: 391323
loss: 1.0280028581619263,grad_norm: 0.8824665948174129, iteration: 391324
loss: 1.064497947692871,grad_norm: 0.9999998078924062, iteration: 391325
loss: 1.0735881328582764,grad_norm: 0.9999991899790263, iteration: 391326
loss: 1.2354192733764648,grad_norm: 0.9999998741317452, iteration: 391327
loss: 1.0158220529556274,grad_norm: 0.8139607502862443, iteration: 391328
loss: 0.9932152032852173,grad_norm: 0.945299676378332, iteration: 391329
loss: 1.0427310466766357,grad_norm: 0.8925211855897973, iteration: 391330
loss: 0.9899432063102722,grad_norm: 0.779757531239185, iteration: 391331
loss: 1.0419594049453735,grad_norm: 0.9999998980576045, iteration: 391332
loss: 1.040525197982788,grad_norm: 0.9999992989240952, iteration: 391333
loss: 0.9720614552497864,grad_norm: 0.9999989984086555, iteration: 391334
loss: 1.0592889785766602,grad_norm: 0.9115222606596671, iteration: 391335
loss: 1.052206039428711,grad_norm: 0.9999996417480402, iteration: 391336
loss: 1.0334787368774414,grad_norm: 0.955734080782436, iteration: 391337
loss: 1.1266839504241943,grad_norm: 0.9999998986343015, iteration: 391338
loss: 0.9995241761207581,grad_norm: 0.8188450757840895, iteration: 391339
loss: 0.9929981231689453,grad_norm: 0.9999990904208989, iteration: 391340
loss: 0.9793991446495056,grad_norm: 0.9999992835650309, iteration: 391341
loss: 1.0086448192596436,grad_norm: 0.6394977261501565, iteration: 391342
loss: 1.0081654787063599,grad_norm: 0.6877276867200791, iteration: 391343
loss: 1.037065029144287,grad_norm: 0.8118666078367504, iteration: 391344
loss: 1.0050535202026367,grad_norm: 0.7393279013385169, iteration: 391345
loss: 1.0279943943023682,grad_norm: 0.9999997369353766, iteration: 391346
loss: 1.009612798690796,grad_norm: 0.7165018450397349, iteration: 391347
loss: 1.1216802597045898,grad_norm: 0.9999997048627903, iteration: 391348
loss: 1.0108722448349,grad_norm: 0.8590743909891467, iteration: 391349
loss: 1.008756160736084,grad_norm: 0.7226266646629516, iteration: 391350
loss: 1.0169516801834106,grad_norm: 0.8234230464210802, iteration: 391351
loss: 1.0004639625549316,grad_norm: 0.8745447219681638, iteration: 391352
loss: 0.9615200161933899,grad_norm: 0.7618852973430672, iteration: 391353
loss: 1.0168724060058594,grad_norm: 0.8711571774196438, iteration: 391354
loss: 1.1847809553146362,grad_norm: 0.9999993452563009, iteration: 391355
loss: 1.2296276092529297,grad_norm: 0.9999998164517109, iteration: 391356
loss: 0.9972196817398071,grad_norm: 0.9632445069301967, iteration: 391357
loss: 1.056749701499939,grad_norm: 0.83388326434628, iteration: 391358
loss: 0.9889129400253296,grad_norm: 0.9041957625243077, iteration: 391359
loss: 0.9845823645591736,grad_norm: 0.8282766989542517, iteration: 391360
loss: 1.0086781978607178,grad_norm: 0.7969392972343309, iteration: 391361
loss: 1.010403037071228,grad_norm: 0.8926290163180765, iteration: 391362
loss: 1.1294392347335815,grad_norm: 0.9999993366964007, iteration: 391363
loss: 0.9816307425498962,grad_norm: 0.7598338737280755, iteration: 391364
loss: 1.003908395767212,grad_norm: 0.8025062850540814, iteration: 391365
loss: 1.0142415761947632,grad_norm: 0.9999991856015287, iteration: 391366
loss: 1.0425713062286377,grad_norm: 0.9999991011332275, iteration: 391367
loss: 1.0333815813064575,grad_norm: 0.99999945505835, iteration: 391368
loss: 1.0029296875,grad_norm: 0.8823584930615801, iteration: 391369
loss: 1.04636812210083,grad_norm: 0.8676062053874781, iteration: 391370
loss: 0.9946141242980957,grad_norm: 0.9287271884326426, iteration: 391371
loss: 1.0413397550582886,grad_norm: 0.9999997248254776, iteration: 391372
loss: 0.9968435764312744,grad_norm: 0.6308061957357373, iteration: 391373
loss: 0.9993263483047485,grad_norm: 0.7800068098394536, iteration: 391374
loss: 1.0048775672912598,grad_norm: 0.9999992182802309, iteration: 391375
loss: 0.9893152713775635,grad_norm: 0.9999991855192801, iteration: 391376
loss: 1.061971664428711,grad_norm: 0.966021345132464, iteration: 391377
loss: 1.0434590578079224,grad_norm: 0.7986308930282945, iteration: 391378
loss: 0.9726772308349609,grad_norm: 0.918180628581934, iteration: 391379
loss: 1.0176976919174194,grad_norm: 0.7968015586710604, iteration: 391380
loss: 0.9991589784622192,grad_norm: 0.8318743660230343, iteration: 391381
loss: 1.0077617168426514,grad_norm: 0.9794733399040074, iteration: 391382
loss: 1.046953558921814,grad_norm: 0.8176748155924999, iteration: 391383
loss: 0.9982807040214539,grad_norm: 0.8160728610986789, iteration: 391384
loss: 1.0299569368362427,grad_norm: 0.6404236307186838, iteration: 391385
loss: 1.0244024991989136,grad_norm: 0.7653870605372681, iteration: 391386
loss: 1.01613450050354,grad_norm: 0.6932809743423264, iteration: 391387
loss: 1.013789176940918,grad_norm: 0.824648011024993, iteration: 391388
loss: 1.0262162685394287,grad_norm: 0.8322344063314215, iteration: 391389
loss: 1.112481713294983,grad_norm: 0.8870808273139646, iteration: 391390
loss: 1.0647892951965332,grad_norm: 0.9999996067868223, iteration: 391391
loss: 1.023004412651062,grad_norm: 0.9999990682251162, iteration: 391392
loss: 1.021677851676941,grad_norm: 0.9885563369436446, iteration: 391393
loss: 1.0248440504074097,grad_norm: 0.6840131489248292, iteration: 391394
loss: 0.9821919798851013,grad_norm: 0.9504674534877338, iteration: 391395
loss: 0.9918064475059509,grad_norm: 0.9999991320884974, iteration: 391396
loss: 1.0000234842300415,grad_norm: 0.9114886908479131, iteration: 391397
loss: 1.0137207508087158,grad_norm: 0.8133166505671504, iteration: 391398
loss: 1.0071271657943726,grad_norm: 0.6694898521464993, iteration: 391399
loss: 1.0096495151519775,grad_norm: 0.7181366319507708, iteration: 391400
loss: 1.0330380201339722,grad_norm: 0.9999991473435935, iteration: 391401
loss: 1.0255122184753418,grad_norm: 0.7484696424845843, iteration: 391402
loss: 0.9846999645233154,grad_norm: 0.9999999352486512, iteration: 391403
loss: 0.9614805579185486,grad_norm: 0.7463079581506138, iteration: 391404
loss: 0.9885468482971191,grad_norm: 0.9999996783187303, iteration: 391405
loss: 1.0042940378189087,grad_norm: 0.8371611923787743, iteration: 391406
loss: 1.0128859281539917,grad_norm: 0.9255078202507203, iteration: 391407
loss: 0.986712634563446,grad_norm: 0.9999996071368693, iteration: 391408
loss: 1.0190002918243408,grad_norm: 0.9999998791742277, iteration: 391409
loss: 1.054342269897461,grad_norm: 0.8872474150829983, iteration: 391410
loss: 1.023945927619934,grad_norm: 0.9215037024789386, iteration: 391411
loss: 1.0076254606246948,grad_norm: 0.7354701714439786, iteration: 391412
loss: 1.030165433883667,grad_norm: 0.7035338268939001, iteration: 391413
loss: 1.0678268671035767,grad_norm: 0.7540330113910749, iteration: 391414
loss: 1.0107218027114868,grad_norm: 0.740141551060165, iteration: 391415
loss: 1.1961171627044678,grad_norm: 0.9999996886873994, iteration: 391416
loss: 1.0139997005462646,grad_norm: 0.9999991841820016, iteration: 391417
loss: 1.0466448068618774,grad_norm: 0.9999999082840368, iteration: 391418
loss: 0.9836200475692749,grad_norm: 0.8381873680579351, iteration: 391419
loss: 1.033841609954834,grad_norm: 0.9999997029183604, iteration: 391420
loss: 0.9998565316200256,grad_norm: 0.7902791078201046, iteration: 391421
loss: 0.9767335653305054,grad_norm: 0.8857895847729879, iteration: 391422
loss: 1.0040395259857178,grad_norm: 0.9999990425656917, iteration: 391423
loss: 1.0015827417373657,grad_norm: 0.8854987493641209, iteration: 391424
loss: 1.009266972541809,grad_norm: 0.715237114446632, iteration: 391425
loss: 1.0153788328170776,grad_norm: 0.7572521457254532, iteration: 391426
loss: 1.0410568714141846,grad_norm: 0.9999997667298883, iteration: 391427
loss: 1.2047741413116455,grad_norm: 0.9999990696494915, iteration: 391428
loss: 1.017403244972229,grad_norm: 0.8420048920558791, iteration: 391429
loss: 0.9983641505241394,grad_norm: 0.9999990830126726, iteration: 391430
loss: 0.9872677326202393,grad_norm: 0.6731796803228592, iteration: 391431
loss: 0.9896091818809509,grad_norm: 0.8014220917352713, iteration: 391432
loss: 0.9700953960418701,grad_norm: 0.9034850811554849, iteration: 391433
loss: 1.0551689863204956,grad_norm: 0.9904794152330917, iteration: 391434
loss: 1.0664680004119873,grad_norm: 0.9999993281109859, iteration: 391435
loss: 1.0036559104919434,grad_norm: 0.7256485748498017, iteration: 391436
loss: 0.9965433478355408,grad_norm: 0.7641514145123209, iteration: 391437
loss: 0.986162543296814,grad_norm: 0.829259886653421, iteration: 391438
loss: 1.1159722805023193,grad_norm: 0.9349988284525622, iteration: 391439
loss: 1.0372211933135986,grad_norm: 0.7832955872390504, iteration: 391440
loss: 1.0009503364562988,grad_norm: 0.9999991624248031, iteration: 391441
loss: 0.9854128360748291,grad_norm: 0.8093368904984015, iteration: 391442
loss: 0.9916355609893799,grad_norm: 0.9549816385843021, iteration: 391443
loss: 1.0053266286849976,grad_norm: 0.7151214040956243, iteration: 391444
loss: 0.9727168679237366,grad_norm: 0.789122958102757, iteration: 391445
loss: 0.9843385219573975,grad_norm: 0.849623665022013, iteration: 391446
loss: 1.0151047706604004,grad_norm: 0.8182568892411359, iteration: 391447
loss: 1.0577090978622437,grad_norm: 0.9057990329201979, iteration: 391448
loss: 0.9919379353523254,grad_norm: 0.7859313239520709, iteration: 391449
loss: 1.0923279523849487,grad_norm: 0.7316366397131822, iteration: 391450
loss: 0.9757386445999146,grad_norm: 0.8504372522480944, iteration: 391451
loss: 1.001403570175171,grad_norm: 0.8376692651421814, iteration: 391452
loss: 0.9520012736320496,grad_norm: 0.7947745820388132, iteration: 391453
loss: 1.029281497001648,grad_norm: 0.952677707362688, iteration: 391454
loss: 0.9766689538955688,grad_norm: 0.7049185897863006, iteration: 391455
loss: 1.0044904947280884,grad_norm: 0.7962307381174333, iteration: 391456
loss: 0.9995556473731995,grad_norm: 0.8666525250908633, iteration: 391457
loss: 1.070228099822998,grad_norm: 0.880311805587965, iteration: 391458
loss: 1.1256881952285767,grad_norm: 0.9853723324825703, iteration: 391459
loss: 1.0124187469482422,grad_norm: 0.9925931178888332, iteration: 391460
loss: 1.0927753448486328,grad_norm: 0.7693591541107093, iteration: 391461
loss: 1.0273938179016113,grad_norm: 0.9999996409040219, iteration: 391462
loss: 1.0350449085235596,grad_norm: 0.8587562648964537, iteration: 391463
loss: 0.9992921352386475,grad_norm: 0.8915357843554564, iteration: 391464
loss: 1.0298534631729126,grad_norm: 0.9484039385439773, iteration: 391465
loss: 1.0169668197631836,grad_norm: 0.7934489750543288, iteration: 391466
loss: 1.0100098848342896,grad_norm: 0.771295849341064, iteration: 391467
loss: 1.0113420486450195,grad_norm: 0.7240085428061244, iteration: 391468
loss: 0.9913516640663147,grad_norm: 0.718567400008086, iteration: 391469
loss: 1.0647456645965576,grad_norm: 0.9999992457274395, iteration: 391470
loss: 1.0177326202392578,grad_norm: 0.9999999351337768, iteration: 391471
loss: 1.0019934177398682,grad_norm: 0.9999998782818099, iteration: 391472
loss: 0.9834169149398804,grad_norm: 0.671918296733345, iteration: 391473
loss: 1.0066955089569092,grad_norm: 0.9999992806744323, iteration: 391474
loss: 1.0142650604248047,grad_norm: 0.9661151260463785, iteration: 391475
loss: 0.9951273202896118,grad_norm: 0.9049306419973362, iteration: 391476
loss: 1.0156358480453491,grad_norm: 0.7332151325153992, iteration: 391477
loss: 0.9889919757843018,grad_norm: 0.7120149161726105, iteration: 391478
loss: 1.0132578611373901,grad_norm: 0.7275879058510578, iteration: 391479
loss: 0.9866958260536194,grad_norm: 0.7229980415220351, iteration: 391480
loss: 1.023469090461731,grad_norm: 0.8734246082673062, iteration: 391481
loss: 0.9977872967720032,grad_norm: 0.7437584658298583, iteration: 391482
loss: 0.9840140342712402,grad_norm: 0.7058079248616173, iteration: 391483
loss: 0.9981938004493713,grad_norm: 0.7640936288387875, iteration: 391484
loss: 0.998338520526886,grad_norm: 0.7946188723460482, iteration: 391485
loss: 0.9739328026771545,grad_norm: 0.9999990605205432, iteration: 391486
loss: 0.9916625618934631,grad_norm: 0.9999992352420634, iteration: 391487
loss: 0.9732308983802795,grad_norm: 0.7746353445974923, iteration: 391488
loss: 1.028384804725647,grad_norm: 0.8074140306870233, iteration: 391489
loss: 0.9987186789512634,grad_norm: 0.7500644727292892, iteration: 391490
loss: 1.0032989978790283,grad_norm: 0.8815668067180129, iteration: 391491
loss: 1.0231428146362305,grad_norm: 0.9999995054384178, iteration: 391492
loss: 0.9982302784919739,grad_norm: 0.7995515926144956, iteration: 391493
loss: 1.1654658317565918,grad_norm: 0.9999998066699902, iteration: 391494
loss: 1.0119823217391968,grad_norm: 0.9137811397543751, iteration: 391495
loss: 1.0280060768127441,grad_norm: 0.9999996955156832, iteration: 391496
loss: 1.0187448263168335,grad_norm: 0.9999999047657675, iteration: 391497
loss: 0.9884081482887268,grad_norm: 0.7183701665102991, iteration: 391498
loss: 0.9887975454330444,grad_norm: 0.8318343287249094, iteration: 391499
loss: 1.0454109907150269,grad_norm: 0.7711216911365812, iteration: 391500
loss: 0.9654383063316345,grad_norm: 0.8670363770286841, iteration: 391501
loss: 1.0447648763656616,grad_norm: 0.9999997430333589, iteration: 391502
loss: 1.0267584323883057,grad_norm: 0.6852061450426502, iteration: 391503
loss: 1.0241862535476685,grad_norm: 0.7386317420855135, iteration: 391504
loss: 1.0070838928222656,grad_norm: 0.6923470107558798, iteration: 391505
loss: 1.0232013463974,grad_norm: 0.820308173703823, iteration: 391506
loss: 0.9880723357200623,grad_norm: 0.7163562267493521, iteration: 391507
loss: 0.9854196310043335,grad_norm: 0.6587205450786834, iteration: 391508
loss: 0.9919549226760864,grad_norm: 0.7797723738276936, iteration: 391509
loss: 0.9953659772872925,grad_norm: 0.6603150434242717, iteration: 391510
loss: 0.9704462885856628,grad_norm: 0.7481269753339255, iteration: 391511
loss: 0.9690736532211304,grad_norm: 0.7271683394456565, iteration: 391512
loss: 1.007001519203186,grad_norm: 0.8365416504734458, iteration: 391513
loss: 1.011070966720581,grad_norm: 0.7110925558313055, iteration: 391514
loss: 1.0151050090789795,grad_norm: 0.7823588687665108, iteration: 391515
loss: 0.9998669624328613,grad_norm: 0.7823228736685626, iteration: 391516
loss: 1.0106782913208008,grad_norm: 0.7923385762624175, iteration: 391517
loss: 1.028666615486145,grad_norm: 0.7470617451880909, iteration: 391518
loss: 1.0241209268569946,grad_norm: 0.7269131757732511, iteration: 391519
loss: 1.010312795639038,grad_norm: 0.8997948345066754, iteration: 391520
loss: 0.9885831475257874,grad_norm: 0.7774594175951215, iteration: 391521
loss: 0.9886083006858826,grad_norm: 0.999999376647778, iteration: 391522
loss: 1.0133445262908936,grad_norm: 0.9656856922103123, iteration: 391523
loss: 0.9864163994789124,grad_norm: 0.8645180936163711, iteration: 391524
loss: 1.0203262567520142,grad_norm: 0.8804426859151411, iteration: 391525
loss: 1.0128567218780518,grad_norm: 0.7478955174237127, iteration: 391526
loss: 0.9764171242713928,grad_norm: 0.7772086086029154, iteration: 391527
loss: 1.024599552154541,grad_norm: 0.7582196029408106, iteration: 391528
loss: 1.0170496702194214,grad_norm: 0.880946644428744, iteration: 391529
loss: 1.0032614469528198,grad_norm: 0.9999990764959223, iteration: 391530
loss: 1.0091472864151,grad_norm: 0.9111857995781754, iteration: 391531
loss: 1.079454779624939,grad_norm: 0.7597768386321405, iteration: 391532
loss: 1.0200588703155518,grad_norm: 0.861550351194585, iteration: 391533
loss: 1.1652299165725708,grad_norm: 0.8347078780755166, iteration: 391534
loss: 0.989680826663971,grad_norm: 0.6726780694487997, iteration: 391535
loss: 0.9823344945907593,grad_norm: 0.6839172516787781, iteration: 391536
loss: 1.0153770446777344,grad_norm: 0.9999998570975491, iteration: 391537
loss: 1.0074063539505005,grad_norm: 0.9665688638341775, iteration: 391538
loss: 0.9953007698059082,grad_norm: 0.891600731214507, iteration: 391539
loss: 1.0852009057998657,grad_norm: 0.9999994518289171, iteration: 391540
loss: 0.9984965920448303,grad_norm: 0.7857829070747032, iteration: 391541
loss: 1.006385326385498,grad_norm: 0.7737169896986809, iteration: 391542
loss: 1.105329990386963,grad_norm: 0.9999998743624553, iteration: 391543
loss: 0.9956952929496765,grad_norm: 0.7071100144765808, iteration: 391544
loss: 1.0127880573272705,grad_norm: 0.7408748745148511, iteration: 391545
loss: 1.2170894145965576,grad_norm: 0.99999986972465, iteration: 391546
loss: 0.9740931987762451,grad_norm: 0.7573356187859422, iteration: 391547
loss: 0.9976245164871216,grad_norm: 0.7579113902986508, iteration: 391548
loss: 1.0253437757492065,grad_norm: 0.9999990737730691, iteration: 391549
loss: 0.9882363080978394,grad_norm: 0.8414814324956551, iteration: 391550
loss: 1.0212363004684448,grad_norm: 0.9999994343853148, iteration: 391551
loss: 1.0567585229873657,grad_norm: 0.8568853767601363, iteration: 391552
loss: 1.088243007659912,grad_norm: 0.9999997303057405, iteration: 391553
loss: 1.0171984434127808,grad_norm: 0.6996945057288392, iteration: 391554
loss: 1.0462983846664429,grad_norm: 0.7626588691702672, iteration: 391555
loss: 0.9953610897064209,grad_norm: 0.7146719024701677, iteration: 391556
loss: 0.9815238118171692,grad_norm: 0.7845899080266323, iteration: 391557
loss: 1.0183955430984497,grad_norm: 0.720161052484099, iteration: 391558
loss: 1.0149916410446167,grad_norm: 0.759131349648634, iteration: 391559
loss: 0.9869784712791443,grad_norm: 0.99999897520669, iteration: 391560
loss: 1.053438663482666,grad_norm: 0.8640442418585674, iteration: 391561
loss: 1.0318959951400757,grad_norm: 0.8713543645768987, iteration: 391562
loss: 1.0123952627182007,grad_norm: 0.726223228710364, iteration: 391563
loss: 0.992285430431366,grad_norm: 0.8313437033068, iteration: 391564
loss: 1.0439070463180542,grad_norm: 0.9999992977187737, iteration: 391565
loss: 1.0333483219146729,grad_norm: 0.9331469935171692, iteration: 391566
loss: 1.0354756116867065,grad_norm: 0.9999998971309381, iteration: 391567
loss: 1.0009253025054932,grad_norm: 0.6181870651914994, iteration: 391568
loss: 1.0282827615737915,grad_norm: 0.9527793651775053, iteration: 391569
loss: 1.0078020095825195,grad_norm: 0.8272945064426556, iteration: 391570
loss: 0.9926292300224304,grad_norm: 0.7848496885295169, iteration: 391571
loss: 1.0262726545333862,grad_norm: 0.8714150618357649, iteration: 391572
loss: 1.0999512672424316,grad_norm: 0.9999992889681528, iteration: 391573
loss: 0.9993720650672913,grad_norm: 0.9999992724520603, iteration: 391574
loss: 1.072989583015442,grad_norm: 0.9999991208080784, iteration: 391575
loss: 0.9907101392745972,grad_norm: 0.7825048191695845, iteration: 391576
loss: 1.015095829963684,grad_norm: 0.8047551214901042, iteration: 391577
loss: 1.0259960889816284,grad_norm: 0.7684036629290721, iteration: 391578
loss: 0.9902907609939575,grad_norm: 0.8619555929695322, iteration: 391579
loss: 1.0379348993301392,grad_norm: 0.6926038594069388, iteration: 391580
loss: 1.043190598487854,grad_norm: 0.7764294119637062, iteration: 391581
loss: 0.963861882686615,grad_norm: 0.692477714545424, iteration: 391582
loss: 1.061883807182312,grad_norm: 0.999999353410142, iteration: 391583
loss: 1.0424233675003052,grad_norm: 0.9999996836764521, iteration: 391584
loss: 1.0536617040634155,grad_norm: 0.7415536950722839, iteration: 391585
loss: 0.9754979014396667,grad_norm: 0.6160510785350087, iteration: 391586
loss: 0.9870628714561462,grad_norm: 0.7491544142043545, iteration: 391587
loss: 1.1307023763656616,grad_norm: 0.9999992758653855, iteration: 391588
loss: 0.9786161184310913,grad_norm: 0.7565122788630627, iteration: 391589
loss: 0.9950319528579712,grad_norm: 0.6565006515539412, iteration: 391590
loss: 1.0001397132873535,grad_norm: 0.8479553052963236, iteration: 391591
loss: 0.9771260023117065,grad_norm: 0.8347195941966672, iteration: 391592
loss: 0.9969059824943542,grad_norm: 0.7688153287720532, iteration: 391593
loss: 1.0268031358718872,grad_norm: 0.725667837544423, iteration: 391594
loss: 1.0334484577178955,grad_norm: 0.9999998375611321, iteration: 391595
loss: 0.9784783124923706,grad_norm: 0.8401135078912229, iteration: 391596
loss: 1.143905758857727,grad_norm: 1.0000000173553005, iteration: 391597
loss: 0.9760774970054626,grad_norm: 0.7622362283089565, iteration: 391598
loss: 0.9836779832839966,grad_norm: 0.8695487082584807, iteration: 391599
loss: 0.9849750995635986,grad_norm: 0.7951729083078737, iteration: 391600
loss: 1.0135846138000488,grad_norm: 0.8190588453749449, iteration: 391601
loss: 1.0276620388031006,grad_norm: 0.7192303851908017, iteration: 391602
loss: 0.9878605604171753,grad_norm: 0.9999989801549991, iteration: 391603
loss: 0.9995846152305603,grad_norm: 0.8417497324283943, iteration: 391604
loss: 0.9756031036376953,grad_norm: 0.8162450270624273, iteration: 391605
loss: 1.063630223274231,grad_norm: 0.9999994012925074, iteration: 391606
loss: 1.02239191532135,grad_norm: 0.976807151497454, iteration: 391607
loss: 1.0494334697723389,grad_norm: 0.9999990333587342, iteration: 391608
loss: 1.013078212738037,grad_norm: 0.6633045964070878, iteration: 391609
loss: 1.0178260803222656,grad_norm: 0.8525221474283092, iteration: 391610
loss: 0.9708031415939331,grad_norm: 0.8063408401538398, iteration: 391611
loss: 1.0112887620925903,grad_norm: 0.707371731875514, iteration: 391612
loss: 1.0147480964660645,grad_norm: 0.7345757265111464, iteration: 391613
loss: 0.9694552421569824,grad_norm: 0.8618879507854986, iteration: 391614
loss: 1.0172312259674072,grad_norm: 0.9734792934902925, iteration: 391615
loss: 0.9878352284431458,grad_norm: 0.7430852761421568, iteration: 391616
loss: 0.9944379329681396,grad_norm: 0.844635878863141, iteration: 391617
loss: 0.9963862895965576,grad_norm: 0.7088111051402459, iteration: 391618
loss: 0.9959635734558105,grad_norm: 0.8698590217061213, iteration: 391619
loss: 1.0107399225234985,grad_norm: 0.7444266986298607, iteration: 391620
loss: 1.0380140542984009,grad_norm: 0.6889391220186445, iteration: 391621
loss: 1.0771245956420898,grad_norm: 0.9999996145450412, iteration: 391622
loss: 1.0631521940231323,grad_norm: 0.8202711339032803, iteration: 391623
loss: 1.0123090744018555,grad_norm: 0.9999993254650299, iteration: 391624
loss: 1.0655444860458374,grad_norm: 0.801598850160337, iteration: 391625
loss: 0.9877589344978333,grad_norm: 0.8809428090246874, iteration: 391626
loss: 1.00715970993042,grad_norm: 0.8225053717728638, iteration: 391627
loss: 0.9799669981002808,grad_norm: 0.9999995234535956, iteration: 391628
loss: 1.033878207206726,grad_norm: 0.9999996496216338, iteration: 391629
loss: 0.9905057549476624,grad_norm: 0.8171919876276009, iteration: 391630
loss: 1.080062747001648,grad_norm: 0.9999998242471128, iteration: 391631
loss: 1.055862307548523,grad_norm: 0.850199606851932, iteration: 391632
loss: 0.9941860437393188,grad_norm: 0.7773605254966923, iteration: 391633
loss: 1.0628564357757568,grad_norm: 0.9999996297369972, iteration: 391634
loss: 1.0833449363708496,grad_norm: 0.7157490720156907, iteration: 391635
loss: 0.9709928631782532,grad_norm: 0.7254743333998634, iteration: 391636
loss: 0.9722661972045898,grad_norm: 0.8273615713236139, iteration: 391637
loss: 1.0580511093139648,grad_norm: 0.9999999493698016, iteration: 391638
loss: 1.0204561948776245,grad_norm: 0.9999997378642789, iteration: 391639
loss: 1.0712999105453491,grad_norm: 0.8877108115538781, iteration: 391640
loss: 1.0120810270309448,grad_norm: 0.914099095532644, iteration: 391641
loss: 1.1019923686981201,grad_norm: 0.9999993576187772, iteration: 391642
loss: 1.014203429222107,grad_norm: 0.8259535962299879, iteration: 391643
loss: 1.0074851512908936,grad_norm: 0.7066743711277459, iteration: 391644
loss: 0.9707109928131104,grad_norm: 0.7472425482217185, iteration: 391645
loss: 1.024665355682373,grad_norm: 0.7442910739851751, iteration: 391646
loss: 1.0243093967437744,grad_norm: 0.8184239826320481, iteration: 391647
loss: 0.9873473644256592,grad_norm: 0.7124539695472548, iteration: 391648
loss: 0.988722562789917,grad_norm: 0.7534367773175991, iteration: 391649
loss: 1.0609968900680542,grad_norm: 0.9999999683807771, iteration: 391650
loss: 0.9930712580680847,grad_norm: 0.9999995605379155, iteration: 391651
loss: 1.0274313688278198,grad_norm: 0.9999998037495551, iteration: 391652
loss: 0.9825952649116516,grad_norm: 0.7876716056444021, iteration: 391653
loss: 1.0071474313735962,grad_norm: 0.7021730731395358, iteration: 391654
loss: 1.0208901166915894,grad_norm: 0.8309653304233638, iteration: 391655
loss: 0.9824748039245605,grad_norm: 0.7191680266409989, iteration: 391656
loss: 0.9845414161682129,grad_norm: 0.7468366271229444, iteration: 391657
loss: 1.0683759450912476,grad_norm: 0.7384514208396394, iteration: 391658
loss: 1.0128637552261353,grad_norm: 0.8042859271972986, iteration: 391659
loss: 1.0489308834075928,grad_norm: 0.9999991145247948, iteration: 391660
loss: 0.9746027588844299,grad_norm: 0.8144280812653059, iteration: 391661
loss: 0.9909328818321228,grad_norm: 0.7399725550939984, iteration: 391662
loss: 0.9997124671936035,grad_norm: 0.7742285532665644, iteration: 391663
loss: 0.9897939562797546,grad_norm: 0.8846357559364934, iteration: 391664
loss: 0.9678842425346375,grad_norm: 0.8096267210575852, iteration: 391665
loss: 1.0056829452514648,grad_norm: 0.9999992634943674, iteration: 391666
loss: 1.0900276899337769,grad_norm: 0.7901633239111937, iteration: 391667
loss: 1.019256830215454,grad_norm: 0.7825652297852242, iteration: 391668
loss: 1.0372837781906128,grad_norm: 0.7858509686558863, iteration: 391669
loss: 0.9985986948013306,grad_norm: 0.8606309821220005, iteration: 391670
loss: 1.0354013442993164,grad_norm: 0.9999991207013538, iteration: 391671
loss: 0.997740626335144,grad_norm: 0.7425395112880575, iteration: 391672
loss: 1.0049513578414917,grad_norm: 0.999999730040048, iteration: 391673
loss: 1.0201419591903687,grad_norm: 0.8767566714437852, iteration: 391674
loss: 1.0162203311920166,grad_norm: 0.7831401486075807, iteration: 391675
loss: 0.9878284335136414,grad_norm: 0.6463895200875726, iteration: 391676
loss: 1.0219945907592773,grad_norm: 0.999999251148697, iteration: 391677
loss: 1.0408157110214233,grad_norm: 0.9999992845099186, iteration: 391678
loss: 1.0255590677261353,grad_norm: 0.8356389921143953, iteration: 391679
loss: 0.9829891920089722,grad_norm: 0.9999998857821659, iteration: 391680
loss: 1.0278013944625854,grad_norm: 0.999999181425551, iteration: 391681
loss: 1.0869508981704712,grad_norm: 0.9999992846588701, iteration: 391682
loss: 1.0126417875289917,grad_norm: 0.9999998121746466, iteration: 391683
loss: 0.9847863912582397,grad_norm: 0.7989926421969569, iteration: 391684
loss: 1.0250086784362793,grad_norm: 0.9517323269206466, iteration: 391685
loss: 1.0291755199432373,grad_norm: 0.9999994504870807, iteration: 391686
loss: 1.0147488117218018,grad_norm: 0.7396272337203109, iteration: 391687
loss: 1.005027174949646,grad_norm: 0.9590740300786961, iteration: 391688
loss: 1.0125136375427246,grad_norm: 0.6531630124499396, iteration: 391689
loss: 1.001298427581787,grad_norm: 0.8916325278268783, iteration: 391690
loss: 1.0040723085403442,grad_norm: 0.7949720523031969, iteration: 391691
loss: 1.0189534425735474,grad_norm: 0.9999992595784661, iteration: 391692
loss: 1.0232731103897095,grad_norm: 0.9999992762983698, iteration: 391693
loss: 1.0061588287353516,grad_norm: 0.8040022279246172, iteration: 391694
loss: 0.9936439394950867,grad_norm: 0.8863540027616132, iteration: 391695
loss: 0.9683679938316345,grad_norm: 0.7546700676446043, iteration: 391696
loss: 1.0203014612197876,grad_norm: 0.9999999638521192, iteration: 391697
loss: 0.993241012096405,grad_norm: 0.7071717781697955, iteration: 391698
loss: 1.042006492614746,grad_norm: 0.999999276222315, iteration: 391699
loss: 1.0417323112487793,grad_norm: 0.7900765676235532, iteration: 391700
loss: 0.9608244299888611,grad_norm: 0.9999994224339686, iteration: 391701
loss: 1.0252535343170166,grad_norm: 0.7560059077357537, iteration: 391702
loss: 1.0234897136688232,grad_norm: 0.761352078991701, iteration: 391703
loss: 1.164684772491455,grad_norm: 1.0000000467338255, iteration: 391704
loss: 0.9642213582992554,grad_norm: 0.716999350953667, iteration: 391705
loss: 1.0452558994293213,grad_norm: 0.8096133444373277, iteration: 391706
loss: 0.9900405406951904,grad_norm: 0.8340671543387195, iteration: 391707
loss: 1.002347707748413,grad_norm: 0.8624292387840076, iteration: 391708
loss: 1.0646312236785889,grad_norm: 0.9112742333018647, iteration: 391709
loss: 0.9791665077209473,grad_norm: 0.8157148271609705, iteration: 391710
loss: 1.0110270977020264,grad_norm: 0.663298314884327, iteration: 391711
loss: 1.0151749849319458,grad_norm: 0.7457368626626247, iteration: 391712
loss: 1.0372602939605713,grad_norm: 0.8285144961146226, iteration: 391713
loss: 1.0101178884506226,grad_norm: 0.8717838244158234, iteration: 391714
loss: 0.9984764456748962,grad_norm: 0.8555045043925619, iteration: 391715
loss: 0.9871653318405151,grad_norm: 0.824250755099672, iteration: 391716
loss: 1.0611909627914429,grad_norm: 0.9059541993062011, iteration: 391717
loss: 0.9767871499061584,grad_norm: 0.9119187355975948, iteration: 391718
loss: 0.9967603087425232,grad_norm: 0.9999994398851451, iteration: 391719
loss: 1.062604546546936,grad_norm: 0.8595878628347182, iteration: 391720
loss: 1.0918059349060059,grad_norm: 0.6835728099998925, iteration: 391721
loss: 1.00813889503479,grad_norm: 0.6466805420304796, iteration: 391722
loss: 1.0262045860290527,grad_norm: 0.802896232996552, iteration: 391723
loss: 1.1044167280197144,grad_norm: 0.7958726014463438, iteration: 391724
loss: 1.0316962003707886,grad_norm: 0.8631129952714633, iteration: 391725
loss: 1.017817497253418,grad_norm: 0.7968051954757983, iteration: 391726
loss: 1.0177521705627441,grad_norm: 0.8462247592115074, iteration: 391727
loss: 1.013883113861084,grad_norm: 0.7774620661106133, iteration: 391728
loss: 1.0064858198165894,grad_norm: 0.9207212974309117, iteration: 391729
loss: 0.9975507855415344,grad_norm: 0.9553187432036055, iteration: 391730
loss: 0.9937817454338074,grad_norm: 0.9705585194461385, iteration: 391731
loss: 0.9809261560440063,grad_norm: 0.736817463387142, iteration: 391732
loss: 0.9991611838340759,grad_norm: 0.9595278336517642, iteration: 391733
loss: 0.9893631935119629,grad_norm: 0.7905753075254727, iteration: 391734
loss: 1.0285792350769043,grad_norm: 0.7587772847498208, iteration: 391735
loss: 1.0086767673492432,grad_norm: 0.9999997294817387, iteration: 391736
loss: 1.0930429697036743,grad_norm: 0.9409991868045697, iteration: 391737
loss: 1.0250324010849,grad_norm: 0.7741078880337071, iteration: 391738
loss: 1.010543704032898,grad_norm: 0.6319494945237965, iteration: 391739
loss: 0.994626522064209,grad_norm: 0.6947633810407513, iteration: 391740
loss: 0.9960169792175293,grad_norm: 0.880006087567506, iteration: 391741
loss: 1.0352972745895386,grad_norm: 0.7942304597513581, iteration: 391742
loss: 1.0291082859039307,grad_norm: 0.747187588482566, iteration: 391743
loss: 1.0033153295516968,grad_norm: 0.7084896511247015, iteration: 391744
loss: 1.005824089050293,grad_norm: 0.7960061568343273, iteration: 391745
loss: 1.0546596050262451,grad_norm: 0.944550661973972, iteration: 391746
loss: 0.9742878675460815,grad_norm: 0.8899853230443604, iteration: 391747
loss: 1.0726735591888428,grad_norm: 0.9209178771735549, iteration: 391748
loss: 1.0104875564575195,grad_norm: 0.9978742674724855, iteration: 391749
loss: 0.9978384375572205,grad_norm: 0.9934518370035488, iteration: 391750
loss: 1.0036009550094604,grad_norm: 0.8239452116918049, iteration: 391751
loss: 0.9670312404632568,grad_norm: 0.8087296315828423, iteration: 391752
loss: 1.0104259252548218,grad_norm: 0.8014895025590115, iteration: 391753
loss: 1.0055344104766846,grad_norm: 0.9031598269752851, iteration: 391754
loss: 1.006272792816162,grad_norm: 0.8158663321046886, iteration: 391755
loss: 1.023901343345642,grad_norm: 0.999999717581257, iteration: 391756
loss: 0.9809204339981079,grad_norm: 0.8184417636736586, iteration: 391757
loss: 1.1033638715744019,grad_norm: 0.9999995427152353, iteration: 391758
loss: 1.0228674411773682,grad_norm: 0.9999996912166992, iteration: 391759
loss: 1.0387674570083618,grad_norm: 0.7225813233482103, iteration: 391760
loss: 1.0293172597885132,grad_norm: 0.9999993275558647, iteration: 391761
loss: 1.0255793333053589,grad_norm: 0.82614736251537, iteration: 391762
loss: 0.9892987608909607,grad_norm: 0.9999990110259689, iteration: 391763
loss: 0.9461351037025452,grad_norm: 0.9114582992933583, iteration: 391764
loss: 0.9797412157058716,grad_norm: 0.6964252254271448, iteration: 391765
loss: 1.0051180124282837,grad_norm: 0.9999991574870795, iteration: 391766
loss: 1.0662109851837158,grad_norm: 0.9999998906380324, iteration: 391767
loss: 1.050551176071167,grad_norm: 0.9999994522020667, iteration: 391768
loss: 1.0570268630981445,grad_norm: 0.8262723637542285, iteration: 391769
loss: 0.990799069404602,grad_norm: 0.9279734194853949, iteration: 391770
loss: 0.9998286962509155,grad_norm: 0.9045854650530254, iteration: 391771
loss: 1.0215048789978027,grad_norm: 0.8588083042968175, iteration: 391772
loss: 0.986527144908905,grad_norm: 0.891750618636219, iteration: 391773
loss: 0.9915075302124023,grad_norm: 0.7227764281443634, iteration: 391774
loss: 0.9559995532035828,grad_norm: 0.9999990809118497, iteration: 391775
loss: 0.9690719842910767,grad_norm: 0.9999991231283857, iteration: 391776
loss: 0.9903524518013,grad_norm: 0.6865606408665947, iteration: 391777
loss: 1.0290833711624146,grad_norm: 0.962062537296765, iteration: 391778
loss: 1.0381619930267334,grad_norm: 0.9999999601073484, iteration: 391779
loss: 0.9704177975654602,grad_norm: 0.8492042793503919, iteration: 391780
loss: 1.0389337539672852,grad_norm: 0.8018937427989133, iteration: 391781
loss: 0.982184112071991,grad_norm: 0.6986840912864444, iteration: 391782
loss: 1.0766757726669312,grad_norm: 0.9999992310452389, iteration: 391783
loss: 0.991790771484375,grad_norm: 0.7571609077182272, iteration: 391784
loss: 1.003889799118042,grad_norm: 0.8696631134980214, iteration: 391785
loss: 1.0357967615127563,grad_norm: 0.581296398592391, iteration: 391786
loss: 0.9783239364624023,grad_norm: 0.9331669258251042, iteration: 391787
loss: 1.0232800245285034,grad_norm: 0.9999991020461313, iteration: 391788
loss: 1.0521522760391235,grad_norm: 1.000000004388894, iteration: 391789
loss: 1.0905523300170898,grad_norm: 0.9999995984077901, iteration: 391790
loss: 0.9709196090698242,grad_norm: 0.999492751040903, iteration: 391791
loss: 1.0306624174118042,grad_norm: 0.9999994608056053, iteration: 391792
loss: 1.0220036506652832,grad_norm: 0.7784489361791629, iteration: 391793
loss: 1.0610872507095337,grad_norm: 0.9999991577507933, iteration: 391794
loss: 1.0544379949569702,grad_norm: 0.9999996855192009, iteration: 391795
loss: 0.9911639094352722,grad_norm: 0.9850369482978715, iteration: 391796
loss: 1.0110660791397095,grad_norm: 0.7994899120002991, iteration: 391797
loss: 1.0676156282424927,grad_norm: 1.000000050866427, iteration: 391798
loss: 1.0986655950546265,grad_norm: 0.9999996075422304, iteration: 391799
loss: 1.030921459197998,grad_norm: 0.8522314499771276, iteration: 391800
loss: 1.1096947193145752,grad_norm: 0.8500153861194069, iteration: 391801
loss: 1.0293500423431396,grad_norm: 0.9999999323086418, iteration: 391802
loss: 0.9848982691764832,grad_norm: 0.9999992131850113, iteration: 391803
loss: 1.02287757396698,grad_norm: 0.6664266173069208, iteration: 391804
loss: 0.9968229532241821,grad_norm: 0.999999681784479, iteration: 391805
loss: 1.0038743019104004,grad_norm: 0.7726172403754952, iteration: 391806
loss: 1.009342074394226,grad_norm: 0.8473270321949512, iteration: 391807
loss: 1.006118893623352,grad_norm: 0.8697352619829053, iteration: 391808
loss: 1.1565821170806885,grad_norm: 0.999999550118841, iteration: 391809
loss: 1.1408905982971191,grad_norm: 0.843369205047928, iteration: 391810
loss: 0.9936728477478027,grad_norm: 0.9999996918510495, iteration: 391811
loss: 1.0453613996505737,grad_norm: 0.999998961048496, iteration: 391812
loss: 1.039338231086731,grad_norm: 0.80623984846209, iteration: 391813
loss: 1.0016179084777832,grad_norm: 0.603820323264518, iteration: 391814
loss: 1.0403343439102173,grad_norm: 0.9999993960752217, iteration: 391815
loss: 0.984756588935852,grad_norm: 0.8569649706701933, iteration: 391816
loss: 1.0972198247909546,grad_norm: 0.9999994598721131, iteration: 391817
loss: 1.117094874382019,grad_norm: 0.9999995501190784, iteration: 391818
loss: 0.9670178294181824,grad_norm: 0.8379616946279688, iteration: 391819
loss: 1.0605648756027222,grad_norm: 0.9326262601513914, iteration: 391820
loss: 1.0056066513061523,grad_norm: 0.7575372769621056, iteration: 391821
loss: 0.9810658097267151,grad_norm: 0.8054325246916494, iteration: 391822
loss: 1.024253010749817,grad_norm: 0.9999992807717195, iteration: 391823
loss: 1.1064285039901733,grad_norm: 0.9999996140689937, iteration: 391824
loss: 1.0468593835830688,grad_norm: 0.9999997015989118, iteration: 391825
loss: 0.9708118438720703,grad_norm: 0.8548338902928674, iteration: 391826
loss: 1.0184494256973267,grad_norm: 0.6719567166948692, iteration: 391827
loss: 1.0090240240097046,grad_norm: 0.8670908592834564, iteration: 391828
loss: 1.0223995447158813,grad_norm: 0.7631061526243602, iteration: 391829
loss: 0.9739800691604614,grad_norm: 0.9999988592600233, iteration: 391830
loss: 1.0094164609909058,grad_norm: 0.7226855406764054, iteration: 391831
loss: 1.0172399282455444,grad_norm: 0.8420349915362426, iteration: 391832
loss: 0.9883571863174438,grad_norm: 0.706189109095261, iteration: 391833
loss: 1.0548659563064575,grad_norm: 0.9999990733067745, iteration: 391834
loss: 0.9972233176231384,grad_norm: 0.8630583929176647, iteration: 391835
loss: 1.212660312652588,grad_norm: 0.9999996143565513, iteration: 391836
loss: 0.9964495301246643,grad_norm: 0.7328461664844764, iteration: 391837
loss: 0.9595035314559937,grad_norm: 0.8593170784507321, iteration: 391838
loss: 1.000340461730957,grad_norm: 0.7167450624453405, iteration: 391839
loss: 1.114173173904419,grad_norm: 0.9999996181571331, iteration: 391840
loss: 1.083724856376648,grad_norm: 0.9999997514955666, iteration: 391841
loss: 1.1127320528030396,grad_norm: 0.9999991043627314, iteration: 391842
loss: 1.0087907314300537,grad_norm: 0.9098646624889084, iteration: 391843
loss: 1.0275193452835083,grad_norm: 0.714137386838049, iteration: 391844
loss: 0.9727150797843933,grad_norm: 0.9504487562707639, iteration: 391845
loss: 1.0126913785934448,grad_norm: 0.999999755184983, iteration: 391846
loss: 1.0131150484085083,grad_norm: 0.8317514064569401, iteration: 391847
loss: 1.0821231603622437,grad_norm: 0.9240792095755209, iteration: 391848
loss: 1.0941046476364136,grad_norm: 0.8665414150205609, iteration: 391849
loss: 1.01661217212677,grad_norm: 0.7043185666925927, iteration: 391850
loss: 1.0239416360855103,grad_norm: 0.9999992176109287, iteration: 391851
loss: 0.9779163002967834,grad_norm: 0.7837860264490966, iteration: 391852
loss: 0.9658289551734924,grad_norm: 0.8901161741573945, iteration: 391853
loss: 1.0028836727142334,grad_norm: 0.708251039981979, iteration: 391854
loss: 1.0081684589385986,grad_norm: 0.8580860916517181, iteration: 391855
loss: 0.9731616377830505,grad_norm: 0.7328266506669671, iteration: 391856
loss: 1.0475130081176758,grad_norm: 0.9999999048821374, iteration: 391857
loss: 1.03190016746521,grad_norm: 0.9205069701820204, iteration: 391858
loss: 1.0122950077056885,grad_norm: 0.5876580474792409, iteration: 391859
loss: 0.9859863519668579,grad_norm: 0.843144330403448, iteration: 391860
loss: 1.0226155519485474,grad_norm: 0.9999990701591119, iteration: 391861
loss: 0.9500603675842285,grad_norm: 0.7768599085623842, iteration: 391862
loss: 1.005122423171997,grad_norm: 0.705048364297929, iteration: 391863
loss: 1.0115035772323608,grad_norm: 0.9999993306977296, iteration: 391864
loss: 0.9840032458305359,grad_norm: 0.687202401181335, iteration: 391865
loss: 1.0203018188476562,grad_norm: 0.9999999000861587, iteration: 391866
loss: 1.0256710052490234,grad_norm: 0.6316431532407762, iteration: 391867
loss: 1.1260496377944946,grad_norm: 0.8006456512205798, iteration: 391868
loss: 1.0800966024398804,grad_norm: 0.8634319826312812, iteration: 391869
loss: 1.017963171005249,grad_norm: 0.7933190135137955, iteration: 391870
loss: 1.0351409912109375,grad_norm: 0.7555625876895581, iteration: 391871
loss: 1.0959526300430298,grad_norm: 0.8561119882516937, iteration: 391872
loss: 1.0773799419403076,grad_norm: 0.999999383211584, iteration: 391873
loss: 1.012656807899475,grad_norm: 0.7379801159914813, iteration: 391874
loss: 0.997792661190033,grad_norm: 0.9999991738510507, iteration: 391875
loss: 1.0056180953979492,grad_norm: 0.8147091326585459, iteration: 391876
loss: 1.1950286626815796,grad_norm: 0.9999999000857466, iteration: 391877
loss: 1.012128233909607,grad_norm: 0.9999990680909784, iteration: 391878
loss: 1.0818877220153809,grad_norm: 1.0000000062042191, iteration: 391879
loss: 1.0269008874893188,grad_norm: 0.858889428982301, iteration: 391880
loss: 0.981829822063446,grad_norm: 0.8448318357788303, iteration: 391881
loss: 1.019690990447998,grad_norm: 0.9999997371949482, iteration: 391882
loss: 1.0801399946212769,grad_norm: 0.8279968043153016, iteration: 391883
loss: 1.008660078048706,grad_norm: 0.8048820811673966, iteration: 391884
loss: 1.0906875133514404,grad_norm: 0.9999994522092346, iteration: 391885
loss: 1.0812419652938843,grad_norm: 0.9999994398097596, iteration: 391886
loss: 1.0150479078292847,grad_norm: 0.7414382164808095, iteration: 391887
loss: 1.0191961526870728,grad_norm: 0.7020995776581741, iteration: 391888
loss: 1.0544697046279907,grad_norm: 0.9999994252464164, iteration: 391889
loss: 1.0274993181228638,grad_norm: 0.9999992371604911, iteration: 391890
loss: 0.9853473901748657,grad_norm: 0.7713128019106943, iteration: 391891
loss: 1.0123374462127686,grad_norm: 0.9999993727873476, iteration: 391892
loss: 0.9815858602523804,grad_norm: 0.7525887029091928, iteration: 391893
loss: 1.0076518058776855,grad_norm: 0.9999993087349781, iteration: 391894
loss: 0.9973683953285217,grad_norm: 0.7642340940966784, iteration: 391895
loss: 1.0697047710418701,grad_norm: 0.9999994004983551, iteration: 391896
loss: 1.014318585395813,grad_norm: 0.6525849518033513, iteration: 391897
loss: 1.0662661790847778,grad_norm: 0.9999991110379248, iteration: 391898
loss: 1.0059378147125244,grad_norm: 0.7055919597646343, iteration: 391899
loss: 0.9673805832862854,grad_norm: 0.9744562127259129, iteration: 391900
loss: 0.9829584360122681,grad_norm: 0.9999996222308212, iteration: 391901
loss: 0.9545397162437439,grad_norm: 0.8421361049064252, iteration: 391902
loss: 0.963509738445282,grad_norm: 0.9999997869856746, iteration: 391903
loss: 0.984783411026001,grad_norm: 0.7833960083373755, iteration: 391904
loss: 0.9932201504707336,grad_norm: 0.7872012431437334, iteration: 391905
loss: 0.9757357835769653,grad_norm: 0.8670381359104303, iteration: 391906
loss: 1.0081006288528442,grad_norm: 0.9663530223056727, iteration: 391907
loss: 1.0258033275604248,grad_norm: 0.7475261303272136, iteration: 391908
loss: 1.0251320600509644,grad_norm: 0.8092060523379767, iteration: 391909
loss: 1.0262932777404785,grad_norm: 0.9736573026331767, iteration: 391910
loss: 1.005053997039795,grad_norm: 0.9301063161775713, iteration: 391911
loss: 1.0004128217697144,grad_norm: 0.9999998182496105, iteration: 391912
loss: 1.028026819229126,grad_norm: 0.8029703915427263, iteration: 391913
loss: 0.993215799331665,grad_norm: 0.7077754623925738, iteration: 391914
loss: 0.9905986189842224,grad_norm: 0.9999999922729101, iteration: 391915
loss: 1.0843186378479004,grad_norm: 0.9999991484132149, iteration: 391916
loss: 1.0349336862564087,grad_norm: 0.9999990627514704, iteration: 391917
loss: 1.0831472873687744,grad_norm: 0.9999994038206125, iteration: 391918
loss: 1.0000616312026978,grad_norm: 0.8022082777155557, iteration: 391919
loss: 0.9921576380729675,grad_norm: 0.811746083978704, iteration: 391920
loss: 1.0445255041122437,grad_norm: 0.9999992893860399, iteration: 391921
loss: 1.0156320333480835,grad_norm: 0.8053690259014855, iteration: 391922
loss: 1.0120247602462769,grad_norm: 0.8954109603147429, iteration: 391923
loss: 1.0182627439498901,grad_norm: 0.6987740758250016, iteration: 391924
loss: 1.0277845859527588,grad_norm: 0.999999213000439, iteration: 391925
loss: 1.0343959331512451,grad_norm: 0.9999992059486674, iteration: 391926
loss: 1.0092344284057617,grad_norm: 0.7267434890657851, iteration: 391927
loss: 1.001631259918213,grad_norm: 0.7640315408639458, iteration: 391928
loss: 1.0299506187438965,grad_norm: 0.8377871989549343, iteration: 391929
loss: 1.0116957426071167,grad_norm: 0.6810723590432398, iteration: 391930
loss: 1.0122921466827393,grad_norm: 0.7577040013027833, iteration: 391931
loss: 0.9973727464675903,grad_norm: 0.674820734121519, iteration: 391932
loss: 1.001753807067871,grad_norm: 0.7863038540754189, iteration: 391933
loss: 1.0259507894515991,grad_norm: 0.8048746964703324, iteration: 391934
loss: 1.0621298551559448,grad_norm: 0.9999996830553535, iteration: 391935
loss: 1.014771580696106,grad_norm: 0.8484891466018957, iteration: 391936
loss: 1.1587185859680176,grad_norm: 0.9999994846276294, iteration: 391937
loss: 0.9911993741989136,grad_norm: 0.7199500573705756, iteration: 391938
loss: 1.0018178224563599,grad_norm: 0.8364989779751905, iteration: 391939
loss: 0.9934825897216797,grad_norm: 0.8078208848341111, iteration: 391940
loss: 0.9945777058601379,grad_norm: 0.8789089360036655, iteration: 391941
loss: 1.072224736213684,grad_norm: 0.9999992439718253, iteration: 391942
loss: 0.983356773853302,grad_norm: 0.8513633221912036, iteration: 391943
loss: 1.0076415538787842,grad_norm: 0.692294855953523, iteration: 391944
loss: 1.0013068914413452,grad_norm: 0.8423842779104063, iteration: 391945
loss: 1.00932776927948,grad_norm: 0.8487142416381924, iteration: 391946
loss: 0.9962526559829712,grad_norm: 0.827846924115735, iteration: 391947
loss: 0.9876787066459656,grad_norm: 0.7345033248264868, iteration: 391948
loss: 1.0100014209747314,grad_norm: 0.7334534429749752, iteration: 391949
loss: 0.9853688478469849,grad_norm: 0.8682656837635045, iteration: 391950
loss: 0.9905575513839722,grad_norm: 0.7135956699433011, iteration: 391951
loss: 1.0336551666259766,grad_norm: 0.9999994709083949, iteration: 391952
loss: 1.002602458000183,grad_norm: 0.6964524911493494, iteration: 391953
loss: 1.017731785774231,grad_norm: 0.6895231763747621, iteration: 391954
loss: 1.0201956033706665,grad_norm: 0.9999990005760842, iteration: 391955
loss: 1.0057119131088257,grad_norm: 0.924570046017087, iteration: 391956
loss: 1.0228517055511475,grad_norm: 0.8441938583358588, iteration: 391957
loss: 1.0098669528961182,grad_norm: 0.8702990958316552, iteration: 391958
loss: 0.9710788130760193,grad_norm: 0.7621107998858975, iteration: 391959
loss: 1.0342026948928833,grad_norm: 0.7476586299412874, iteration: 391960
loss: 1.0131926536560059,grad_norm: 0.767734615239324, iteration: 391961
loss: 0.990794837474823,grad_norm: 0.7684206172669522, iteration: 391962
loss: 0.9850830435752869,grad_norm: 0.8556627262556851, iteration: 391963
loss: 1.0060468912124634,grad_norm: 0.7667300156877849, iteration: 391964
loss: 0.9938226938247681,grad_norm: 0.9999994484149912, iteration: 391965
loss: 0.9777422547340393,grad_norm: 0.7622370914757267, iteration: 391966
loss: 0.9705016613006592,grad_norm: 0.7588986162842787, iteration: 391967
loss: 1.0528522729873657,grad_norm: 0.9999992858677891, iteration: 391968
loss: 0.9849147796630859,grad_norm: 0.8435811342219401, iteration: 391969
loss: 1.0187855958938599,grad_norm: 0.8150467863676802, iteration: 391970
loss: 1.0137546062469482,grad_norm: 0.8291915952397949, iteration: 391971
loss: 1.021753191947937,grad_norm: 0.7950154258024159, iteration: 391972
loss: 0.9716731309890747,grad_norm: 0.8203904406465313, iteration: 391973
loss: 1.0212368965148926,grad_norm: 0.9432276269595681, iteration: 391974
loss: 1.0749621391296387,grad_norm: 0.9999997100369774, iteration: 391975
loss: 0.9496333003044128,grad_norm: 0.886382183314212, iteration: 391976
loss: 1.0013151168823242,grad_norm: 0.9999998104033742, iteration: 391977
loss: 1.0528111457824707,grad_norm: 0.9999991570115135, iteration: 391978
loss: 0.9960479736328125,grad_norm: 0.7375975799283502, iteration: 391979
loss: 0.9766413569450378,grad_norm: 0.7570146186551484, iteration: 391980
loss: 1.0123755931854248,grad_norm: 0.625566791130045, iteration: 391981
loss: 1.0191583633422852,grad_norm: 0.8415747004177805, iteration: 391982
loss: 0.9997841715812683,grad_norm: 0.759748605560281, iteration: 391983
loss: 0.9885191321372986,grad_norm: 0.7909144640784966, iteration: 391984
loss: 1.0924161672592163,grad_norm: 0.9999990498636923, iteration: 391985
loss: 0.9687387943267822,grad_norm: 0.9999990036536314, iteration: 391986
loss: 1.0099016427993774,grad_norm: 0.8922383322593701, iteration: 391987
loss: 0.9778550863265991,grad_norm: 0.8499620888467562, iteration: 391988
loss: 0.9808160662651062,grad_norm: 0.8929598959020213, iteration: 391989
loss: 0.9822056293487549,grad_norm: 0.7785999250224013, iteration: 391990
loss: 1.0173662900924683,grad_norm: 0.7085655441637093, iteration: 391991
loss: 0.9604145884513855,grad_norm: 0.7797954578841236, iteration: 391992
loss: 1.0393922328948975,grad_norm: 0.9570773762729108, iteration: 391993
loss: 0.9987956881523132,grad_norm: 0.8982464790493332, iteration: 391994
loss: 1.0169323682785034,grad_norm: 0.7219713140063253, iteration: 391995
loss: 0.9951267242431641,grad_norm: 0.8455231393765782, iteration: 391996
loss: 0.9967197179794312,grad_norm: 0.8683544521773039, iteration: 391997
loss: 1.0540897846221924,grad_norm: 0.9999998767410369, iteration: 391998
loss: 1.0806113481521606,grad_norm: 0.9999990693729301, iteration: 391999
loss: 1.0041223764419556,grad_norm: 0.8505856697163416, iteration: 392000
loss: 0.9758687615394592,grad_norm: 0.7927457006755497, iteration: 392001
loss: 0.9914358854293823,grad_norm: 0.6292595840220503, iteration: 392002
loss: 0.9945567846298218,grad_norm: 0.6192005458346028, iteration: 392003
loss: 0.9891311526298523,grad_norm: 0.918693665985558, iteration: 392004
loss: 1.0500551462173462,grad_norm: 0.653851203244676, iteration: 392005
loss: 1.0490648746490479,grad_norm: 0.9677877228346189, iteration: 392006
loss: 0.9915244579315186,grad_norm: 0.766287257647396, iteration: 392007
loss: 0.9508615732192993,grad_norm: 0.9105323848764618, iteration: 392008
loss: 1.0225926637649536,grad_norm: 0.831585571468613, iteration: 392009
loss: 0.9865064024925232,grad_norm: 0.8260877511685205, iteration: 392010
loss: 1.0147416591644287,grad_norm: 0.9999992001320035, iteration: 392011
loss: 1.0089421272277832,grad_norm: 0.9087374142609022, iteration: 392012
loss: 0.9880293011665344,grad_norm: 0.9570449272968676, iteration: 392013
loss: 0.9846611618995667,grad_norm: 0.8499580460525921, iteration: 392014
loss: 1.0212105512619019,grad_norm: 0.6961537885929696, iteration: 392015
loss: 1.0126270055770874,grad_norm: 0.9414890359472694, iteration: 392016
loss: 1.0356842279434204,grad_norm: 0.9999998951886506, iteration: 392017
loss: 0.9864712953567505,grad_norm: 0.7384793938663364, iteration: 392018
loss: 1.0077072381973267,grad_norm: 0.8803596911736324, iteration: 392019
loss: 1.0543837547302246,grad_norm: 0.9999996648414717, iteration: 392020
loss: 0.9356444478034973,grad_norm: 0.7343779351977776, iteration: 392021
loss: 0.9743056297302246,grad_norm: 0.7997736210115792, iteration: 392022
loss: 1.0004609823226929,grad_norm: 0.796323378077852, iteration: 392023
loss: 1.0021016597747803,grad_norm: 0.9565737235308134, iteration: 392024
loss: 1.0038375854492188,grad_norm: 0.818579046454016, iteration: 392025
loss: 0.9619566798210144,grad_norm: 0.6904647716864786, iteration: 392026
loss: 1.000031590461731,grad_norm: 0.7172172587432067, iteration: 392027
loss: 1.0173956155776978,grad_norm: 0.9999991845819085, iteration: 392028
loss: 1.0154807567596436,grad_norm: 0.6965881130634028, iteration: 392029
loss: 0.9917386770248413,grad_norm: 0.6242311570243961, iteration: 392030
loss: 1.0472919940948486,grad_norm: 0.836125120575043, iteration: 392031
loss: 1.0328000783920288,grad_norm: 0.8412518227835366, iteration: 392032
loss: 1.008244514465332,grad_norm: 0.9042501239793755, iteration: 392033
loss: 0.9854438304901123,grad_norm: 0.7950213312625048, iteration: 392034
loss: 0.9987195134162903,grad_norm: 0.8054678162035899, iteration: 392035
loss: 1.0235882997512817,grad_norm: 0.9275220792113927, iteration: 392036
loss: 0.9814721941947937,grad_norm: 0.8687072784564733, iteration: 392037
loss: 0.9908060431480408,grad_norm: 0.7399356403661229, iteration: 392038
loss: 1.1011091470718384,grad_norm: 0.9273911861416746, iteration: 392039
loss: 1.0083621740341187,grad_norm: 0.872785520776671, iteration: 392040
loss: 1.0171756744384766,grad_norm: 0.6239030479969632, iteration: 392041
loss: 1.009438157081604,grad_norm: 0.8112841614153117, iteration: 392042
loss: 0.9806941747665405,grad_norm: 0.8979054199875032, iteration: 392043
loss: 1.0313327312469482,grad_norm: 0.9999989929595936, iteration: 392044
loss: 0.9939714074134827,grad_norm: 0.7169308277816802, iteration: 392045
loss: 0.9978370070457458,grad_norm: 0.7451087870551198, iteration: 392046
loss: 1.01244056224823,grad_norm: 0.7672848057326409, iteration: 392047
loss: 0.9944174289703369,grad_norm: 0.7221457402056065, iteration: 392048
loss: 0.9713101387023926,grad_norm: 0.8298752271027372, iteration: 392049
loss: 1.0235346555709839,grad_norm: 0.857863499952991, iteration: 392050
loss: 1.0044389963150024,grad_norm: 0.7069373252884728, iteration: 392051
loss: 1.0177810192108154,grad_norm: 0.7946412239739706, iteration: 392052
loss: 1.090091347694397,grad_norm: 0.9999998306609337, iteration: 392053
loss: 0.9839026927947998,grad_norm: 0.8732969595711979, iteration: 392054
loss: 0.9912036657333374,grad_norm: 0.869509282235565, iteration: 392055
loss: 1.0093837976455688,grad_norm: 0.8123133235542107, iteration: 392056
loss: 0.9950982332229614,grad_norm: 0.6550113476142233, iteration: 392057
loss: 1.0250117778778076,grad_norm: 0.9540216630344132, iteration: 392058
loss: 1.036824107170105,grad_norm: 0.7707740012114028, iteration: 392059
loss: 0.9972488880157471,grad_norm: 0.8015247871935556, iteration: 392060
loss: 0.993619978427887,grad_norm: 0.6542339671313352, iteration: 392061
loss: 1.00309157371521,grad_norm: 0.710199831522822, iteration: 392062
loss: 1.1577956676483154,grad_norm: 0.9999995286605449, iteration: 392063
loss: 0.9925574660301208,grad_norm: 0.7110938877424803, iteration: 392064
loss: 0.9786707758903503,grad_norm: 0.9398303921446424, iteration: 392065
loss: 1.1433241367340088,grad_norm: 0.9614746958088969, iteration: 392066
loss: 1.0390660762786865,grad_norm: 0.8472956126472476, iteration: 392067
loss: 1.050866961479187,grad_norm: 0.8969243427089577, iteration: 392068
loss: 0.9943661689758301,grad_norm: 0.8060614737708842, iteration: 392069
loss: 1.0006074905395508,grad_norm: 0.8755447456219645, iteration: 392070
loss: 0.9853286147117615,grad_norm: 0.808879029254166, iteration: 392071
loss: 1.0052708387374878,grad_norm: 0.7109267015614882, iteration: 392072
loss: 1.0212607383728027,grad_norm: 0.9037648683255628, iteration: 392073
loss: 0.9678354263305664,grad_norm: 0.9332703427354565, iteration: 392074
loss: 0.9795752167701721,grad_norm: 0.9170011216309163, iteration: 392075
loss: 1.0059666633605957,grad_norm: 0.8800740888915657, iteration: 392076
loss: 0.9956085681915283,grad_norm: 0.9879372196645346, iteration: 392077
loss: 0.9729178547859192,grad_norm: 0.8162792023569203, iteration: 392078
loss: 0.9817540645599365,grad_norm: 0.859809074129745, iteration: 392079
loss: 1.0256060361862183,grad_norm: 0.8533040640984912, iteration: 392080
loss: 0.9920249581336975,grad_norm: 0.7783849810485348, iteration: 392081
loss: 0.9952337145805359,grad_norm: 0.9807875787767888, iteration: 392082
loss: 1.0024207830429077,grad_norm: 0.9999990914141682, iteration: 392083
loss: 1.0254639387130737,grad_norm: 0.7494936281335428, iteration: 392084
loss: 0.979394793510437,grad_norm: 0.6992469091688202, iteration: 392085
loss: 1.0308176279067993,grad_norm: 0.8744128711171595, iteration: 392086
loss: 1.0096646547317505,grad_norm: 0.8910168666590752, iteration: 392087
loss: 0.9873989820480347,grad_norm: 0.8374982276396261, iteration: 392088
loss: 0.995961606502533,grad_norm: 0.7381287416421566, iteration: 392089
loss: 1.0170354843139648,grad_norm: 0.7482790031896072, iteration: 392090
loss: 0.9870229363441467,grad_norm: 0.6938191038343918, iteration: 392091
loss: 0.9959379434585571,grad_norm: 0.7174552916366687, iteration: 392092
loss: 0.9999799728393555,grad_norm: 0.6202594892012553, iteration: 392093
loss: 0.9920004606246948,grad_norm: 0.8584691569914631, iteration: 392094
loss: 1.0111818313598633,grad_norm: 0.8598643741372882, iteration: 392095
loss: 0.9788926243782043,grad_norm: 0.9564929512214078, iteration: 392096
loss: 1.008759617805481,grad_norm: 0.9999992070919312, iteration: 392097
loss: 0.9870578050613403,grad_norm: 0.9167942629692805, iteration: 392098
loss: 0.9714120030403137,grad_norm: 0.8480230481513101, iteration: 392099
loss: 1.0488110780715942,grad_norm: 0.9085861664814276, iteration: 392100
loss: 1.015639305114746,grad_norm: 0.7573527148955991, iteration: 392101
loss: 1.0336768627166748,grad_norm: 0.8371576182511027, iteration: 392102
loss: 1.0126047134399414,grad_norm: 0.673311091193521, iteration: 392103
loss: 0.9816299676895142,grad_norm: 0.7920681836148414, iteration: 392104
loss: 1.0135241746902466,grad_norm: 0.8353026813531984, iteration: 392105
loss: 1.0288233757019043,grad_norm: 0.9948164379792918, iteration: 392106
loss: 0.9679949283599854,grad_norm: 0.7144587796792675, iteration: 392107
loss: 1.0213937759399414,grad_norm: 0.9999992071984912, iteration: 392108
loss: 1.006928563117981,grad_norm: 0.830969602190348, iteration: 392109
loss: 1.0336427688598633,grad_norm: 0.6623316091510627, iteration: 392110
loss: 1.0019055604934692,grad_norm: 0.9075074145475737, iteration: 392111
loss: 0.980840802192688,grad_norm: 0.7747320744202436, iteration: 392112
loss: 0.9818391799926758,grad_norm: 0.7277619441715732, iteration: 392113
loss: 0.9792736172676086,grad_norm: 0.9305737990108087, iteration: 392114
loss: 1.0357273817062378,grad_norm: 0.8820297756973344, iteration: 392115
loss: 0.9786707758903503,grad_norm: 0.6963343827585141, iteration: 392116
loss: 0.9978377819061279,grad_norm: 0.8240567074801252, iteration: 392117
loss: 1.0925283432006836,grad_norm: 0.9687153162963682, iteration: 392118
loss: 0.9957854151725769,grad_norm: 0.8102393409349491, iteration: 392119
loss: 0.9591931104660034,grad_norm: 0.6215238169356068, iteration: 392120
loss: 0.9508783221244812,grad_norm: 0.7055767704874432, iteration: 392121
loss: 0.9829277992248535,grad_norm: 0.8122119882641841, iteration: 392122
loss: 1.002668857574463,grad_norm: 0.7376426107883456, iteration: 392123
loss: 1.016808032989502,grad_norm: 0.8024757072708896, iteration: 392124
loss: 0.9944400787353516,grad_norm: 0.8332994932499734, iteration: 392125
loss: 1.000681757926941,grad_norm: 0.7763019673248024, iteration: 392126
loss: 0.9978846311569214,grad_norm: 0.8885655363148572, iteration: 392127
loss: 0.9772812128067017,grad_norm: 0.8255999197831507, iteration: 392128
loss: 1.0243598222732544,grad_norm: 0.7337112038557633, iteration: 392129
loss: 1.02464759349823,grad_norm: 0.7441141604896756, iteration: 392130
loss: 1.0147039890289307,grad_norm: 0.9999992379454041, iteration: 392131
loss: 1.005439043045044,grad_norm: 0.7682906045241997, iteration: 392132
loss: 1.0054340362548828,grad_norm: 0.7553130382513967, iteration: 392133
loss: 1.0132606029510498,grad_norm: 0.9999992552782944, iteration: 392134
loss: 0.9723837375640869,grad_norm: 0.6562279014920123, iteration: 392135
loss: 1.0099034309387207,grad_norm: 0.7357774386628316, iteration: 392136
loss: 1.002550721168518,grad_norm: 0.7558914563388854, iteration: 392137
loss: 0.9844460487365723,grad_norm: 0.7590815805846611, iteration: 392138
loss: 1.0176289081573486,grad_norm: 0.7464332978371204, iteration: 392139
loss: 1.008457899093628,grad_norm: 0.8937648311870743, iteration: 392140
loss: 0.9867370128631592,grad_norm: 0.8339342392976201, iteration: 392141
loss: 0.9900501370429993,grad_norm: 0.9999991923692202, iteration: 392142
loss: 0.9439501166343689,grad_norm: 0.8686031685496548, iteration: 392143
loss: 1.0198991298675537,grad_norm: 0.8158731792837376, iteration: 392144
loss: 1.003137230873108,grad_norm: 0.9999998111411473, iteration: 392145
loss: 0.9921574592590332,grad_norm: 0.7742697903158628, iteration: 392146
loss: 1.008536696434021,grad_norm: 0.9602525806123786, iteration: 392147
loss: 0.9942135810852051,grad_norm: 0.9999991076729475, iteration: 392148
loss: 0.9857221841812134,grad_norm: 0.6814202436112143, iteration: 392149
loss: 1.014599084854126,grad_norm: 0.7942053642571781, iteration: 392150
loss: 1.0996708869934082,grad_norm: 0.999999983303542, iteration: 392151
loss: 0.989253044128418,grad_norm: 0.8638251930939356, iteration: 392152
loss: 1.0289791822433472,grad_norm: 0.8761008276265169, iteration: 392153
loss: 1.0213693380355835,grad_norm: 0.9780936162500098, iteration: 392154
loss: 1.0153679847717285,grad_norm: 0.9999990810790422, iteration: 392155
loss: 1.0698868036270142,grad_norm: 0.9999998428606234, iteration: 392156
loss: 1.0136134624481201,grad_norm: 0.8086308345436877, iteration: 392157
loss: 0.9683197736740112,grad_norm: 0.8285167611269882, iteration: 392158
loss: 1.039976716041565,grad_norm: 0.9221018778772232, iteration: 392159
loss: 0.9743125438690186,grad_norm: 0.8319693101155106, iteration: 392160
loss: 1.0857654809951782,grad_norm: 0.7790217041213314, iteration: 392161
loss: 1.0157579183578491,grad_norm: 0.7101851652898337, iteration: 392162
loss: 1.056959629058838,grad_norm: 0.9999992974156873, iteration: 392163
loss: 0.9980882406234741,grad_norm: 0.9999990660931978, iteration: 392164
loss: 1.0485893487930298,grad_norm: 0.9999994386627261, iteration: 392165
loss: 1.0099682807922363,grad_norm: 0.6739827091167043, iteration: 392166
loss: 0.9685302972793579,grad_norm: 0.892091300379697, iteration: 392167
loss: 0.989892840385437,grad_norm: 0.7123576397741233, iteration: 392168
loss: 0.9947453737258911,grad_norm: 0.7906681911478498, iteration: 392169
loss: 1.1001249551773071,grad_norm: 0.9999993279545085, iteration: 392170
loss: 1.0172040462493896,grad_norm: 0.8799827676574509, iteration: 392171
loss: 0.9952507019042969,grad_norm: 0.757260165232661, iteration: 392172
loss: 1.0072767734527588,grad_norm: 0.963999143453619, iteration: 392173
loss: 1.0238749980926514,grad_norm: 0.9999991027032803, iteration: 392174
loss: 1.0382053852081299,grad_norm: 0.6980734750509934, iteration: 392175
loss: 0.9948824644088745,grad_norm: 0.7476656966471429, iteration: 392176
loss: 1.0256177186965942,grad_norm: 0.9582106881549037, iteration: 392177
loss: 1.0057785511016846,grad_norm: 0.8422910190573859, iteration: 392178
loss: 1.0374397039413452,grad_norm: 0.9999991234193092, iteration: 392179
loss: 1.03138267993927,grad_norm: 0.7105842528754549, iteration: 392180
loss: 0.9927254915237427,grad_norm: 0.7471604496437114, iteration: 392181
loss: 0.9733210802078247,grad_norm: 0.7177927231087075, iteration: 392182
loss: 1.0014922618865967,grad_norm: 0.864571770401629, iteration: 392183
loss: 1.0588023662567139,grad_norm: 0.999999270513352, iteration: 392184
loss: 1.0736486911773682,grad_norm: 0.9651885879730696, iteration: 392185
loss: 1.0259264707565308,grad_norm: 0.7632844254755355, iteration: 392186
loss: 0.9852774739265442,grad_norm: 0.8135723385139547, iteration: 392187
loss: 0.974800169467926,grad_norm: 0.6909641635660914, iteration: 392188
loss: 1.0265074968338013,grad_norm: 0.6936347240537746, iteration: 392189
loss: 1.020371437072754,grad_norm: 0.8050843721909832, iteration: 392190
loss: 1.0084130764007568,grad_norm: 0.8464883809702789, iteration: 392191
loss: 0.9971917271614075,grad_norm: 0.869350847690149, iteration: 392192
loss: 1.0332363843917847,grad_norm: 0.931772932589756, iteration: 392193
loss: 1.054349422454834,grad_norm: 0.9713249563705896, iteration: 392194
loss: 1.0074214935302734,grad_norm: 0.7059516891791296, iteration: 392195
loss: 0.9863492846488953,grad_norm: 0.9181166411620394, iteration: 392196
loss: 1.0480843782424927,grad_norm: 0.8273040573677533, iteration: 392197
loss: 0.9796294569969177,grad_norm: 0.8623310998944024, iteration: 392198
loss: 0.9913012981414795,grad_norm: 0.7225942194688694, iteration: 392199
loss: 1.003380537033081,grad_norm: 0.8139764840288706, iteration: 392200
loss: 1.0059208869934082,grad_norm: 0.8398841668353386, iteration: 392201
loss: 1.0108678340911865,grad_norm: 0.703251144580105, iteration: 392202
loss: 1.0126713514328003,grad_norm: 0.8256848210303049, iteration: 392203
loss: 1.0397746562957764,grad_norm: 0.9276184216613164, iteration: 392204
loss: 1.0310032367706299,grad_norm: 0.707025903564915, iteration: 392205
loss: 1.0314573049545288,grad_norm: 0.8289614004549444, iteration: 392206
loss: 0.9875165224075317,grad_norm: 0.7505020311511675, iteration: 392207
loss: 0.9852455854415894,grad_norm: 0.7379063938383277, iteration: 392208
loss: 1.007936954498291,grad_norm: 0.9672908795060694, iteration: 392209
loss: 1.0044997930526733,grad_norm: 0.9999991228221766, iteration: 392210
loss: 1.0045419931411743,grad_norm: 0.8151700750255626, iteration: 392211
loss: 1.0315215587615967,grad_norm: 0.9361732558610207, iteration: 392212
loss: 0.9848532676696777,grad_norm: 0.8542803987778944, iteration: 392213
loss: 1.0259628295898438,grad_norm: 0.9999993450282024, iteration: 392214
loss: 1.0908979177474976,grad_norm: 1.000000025006104, iteration: 392215
loss: 0.9736246466636658,grad_norm: 0.7738081608312001, iteration: 392216
loss: 0.9734324812889099,grad_norm: 0.8969979851698081, iteration: 392217
loss: 1.0267090797424316,grad_norm: 0.8996734717888131, iteration: 392218
loss: 1.0045807361602783,grad_norm: 0.9561179352864677, iteration: 392219
loss: 1.006293773651123,grad_norm: 0.759651583143632, iteration: 392220
loss: 1.0020769834518433,grad_norm: 0.7833261717915262, iteration: 392221
loss: 0.9997822642326355,grad_norm: 0.8577212932310063, iteration: 392222
loss: 1.007619857788086,grad_norm: 0.9999998556242016, iteration: 392223
loss: 1.0119389295578003,grad_norm: 0.6374532570198809, iteration: 392224
loss: 1.006278395652771,grad_norm: 0.7555105590233677, iteration: 392225
loss: 1.0050779581069946,grad_norm: 0.8615092204478055, iteration: 392226
loss: 1.0086932182312012,grad_norm: 0.8388452613686822, iteration: 392227
loss: 1.0381730794906616,grad_norm: 0.8318904338665383, iteration: 392228
loss: 1.016196608543396,grad_norm: 0.9999990820723137, iteration: 392229
loss: 1.0055046081542969,grad_norm: 0.9999994696540588, iteration: 392230
loss: 1.0095205307006836,grad_norm: 0.788811879919559, iteration: 392231
loss: 1.0228134393692017,grad_norm: 0.9217133945019406, iteration: 392232
loss: 1.0065486431121826,grad_norm: 0.6870596931821102, iteration: 392233
loss: 1.0491139888763428,grad_norm: 0.7284654961010619, iteration: 392234
loss: 0.9738154411315918,grad_norm: 0.5985459096931839, iteration: 392235
loss: 1.007936716079712,grad_norm: 0.7625677120877665, iteration: 392236
loss: 0.9912651777267456,grad_norm: 0.7217136526626563, iteration: 392237
loss: 1.0101330280303955,grad_norm: 0.818752334320069, iteration: 392238
loss: 0.9833202362060547,grad_norm: 0.8192854859248996, iteration: 392239
loss: 0.9704239964485168,grad_norm: 0.7418735924851112, iteration: 392240
loss: 0.9586873054504395,grad_norm: 0.9314247387261038, iteration: 392241
loss: 0.978793203830719,grad_norm: 0.7027639714074353, iteration: 392242
loss: 0.9984291195869446,grad_norm: 0.945128317174086, iteration: 392243
loss: 0.9847685098648071,grad_norm: 0.7298554058242521, iteration: 392244
loss: 0.9882979989051819,grad_norm: 0.9999998142410007, iteration: 392245
loss: 0.9955549240112305,grad_norm: 0.823673365512465, iteration: 392246
loss: 1.0046123266220093,grad_norm: 0.7677438747579695, iteration: 392247
loss: 1.0150666236877441,grad_norm: 0.8511498256024541, iteration: 392248
loss: 0.9765031933784485,grad_norm: 0.834818210007127, iteration: 392249
loss: 1.1277388334274292,grad_norm: 0.9334398076940686, iteration: 392250
loss: 0.9726825952529907,grad_norm: 0.9131004300302731, iteration: 392251
loss: 0.9857484698295593,grad_norm: 0.8988670069821805, iteration: 392252
loss: 1.0249959230422974,grad_norm: 0.9999995659874373, iteration: 392253
loss: 0.98178631067276,grad_norm: 0.9999999693749891, iteration: 392254
loss: 1.0058878660202026,grad_norm: 0.8189360319641044, iteration: 392255
loss: 1.1553274393081665,grad_norm: 0.8432311878016744, iteration: 392256
loss: 1.018417239189148,grad_norm: 0.7603437997453129, iteration: 392257
loss: 0.9982450008392334,grad_norm: 0.9675856599386747, iteration: 392258
loss: 0.9892788529396057,grad_norm: 0.8665423880968376, iteration: 392259
loss: 1.0390862226486206,grad_norm: 0.7241052285611494, iteration: 392260
loss: 0.9990943074226379,grad_norm: 0.9999990784234557, iteration: 392261
loss: 1.0171066522598267,grad_norm: 0.9999999403595325, iteration: 392262
loss: 1.0291401147842407,grad_norm: 0.7166914086551702, iteration: 392263
loss: 1.010127305984497,grad_norm: 0.7354532640651686, iteration: 392264
loss: 0.991770327091217,grad_norm: 0.9971403398104284, iteration: 392265
loss: 0.966532826423645,grad_norm: 0.6757784507394509, iteration: 392266
loss: 1.002793550491333,grad_norm: 0.7721817690937771, iteration: 392267
loss: 1.0420511960983276,grad_norm: 0.9999996183158429, iteration: 392268
loss: 1.0047236680984497,grad_norm: 0.768990679019526, iteration: 392269
loss: 1.0064564943313599,grad_norm: 0.7660898773078795, iteration: 392270
loss: 1.0161410570144653,grad_norm: 0.714128063403619, iteration: 392271
loss: 1.0394048690795898,grad_norm: 0.8881342097154813, iteration: 392272
loss: 0.9758320450782776,grad_norm: 0.7220576370722867, iteration: 392273
loss: 1.002212405204773,grad_norm: 0.7657190814747368, iteration: 392274
loss: 1.00655996799469,grad_norm: 0.8913836106861315, iteration: 392275
loss: 1.025396466255188,grad_norm: 0.9999991210824154, iteration: 392276
loss: 1.0028845071792603,grad_norm: 0.99999916189936, iteration: 392277
loss: 1.013414740562439,grad_norm: 0.71633919448387, iteration: 392278
loss: 0.9904270768165588,grad_norm: 0.7140355465707326, iteration: 392279
loss: 0.9685676693916321,grad_norm: 0.7684562489752029, iteration: 392280
loss: 1.059482455253601,grad_norm: 0.9999995778569747, iteration: 392281
loss: 0.9911983013153076,grad_norm: 0.9376201263740608, iteration: 392282
loss: 0.9998933672904968,grad_norm: 0.8576565607396969, iteration: 392283
loss: 0.9710201621055603,grad_norm: 0.9400646477240537, iteration: 392284
loss: 1.0106478929519653,grad_norm: 0.7884039681011997, iteration: 392285
loss: 1.0183182954788208,grad_norm: 0.9999992553479725, iteration: 392286
loss: 0.976582407951355,grad_norm: 0.9318373863732498, iteration: 392287
loss: 0.9677639007568359,grad_norm: 0.7105529209823774, iteration: 392288
loss: 1.030695915222168,grad_norm: 0.9856445048016174, iteration: 392289
loss: 1.1748113632202148,grad_norm: 0.9999997032056365, iteration: 392290
loss: 0.9931262731552124,grad_norm: 0.930661338922307, iteration: 392291
loss: 0.9770008325576782,grad_norm: 0.9999998957386399, iteration: 392292
loss: 1.008956789970398,grad_norm: 0.8478366670970443, iteration: 392293
loss: 1.0000463724136353,grad_norm: 0.777018298897883, iteration: 392294
loss: 0.9920927286148071,grad_norm: 0.8687426300712662, iteration: 392295
loss: 1.036365032196045,grad_norm: 0.8825150951319345, iteration: 392296
loss: 0.9461567401885986,grad_norm: 0.7755889277707702, iteration: 392297
loss: 0.9966279864311218,grad_norm: 0.8438961776545761, iteration: 392298
loss: 1.1062861680984497,grad_norm: 0.9392997634939382, iteration: 392299
loss: 1.0603705644607544,grad_norm: 0.9999998167865681, iteration: 392300
loss: 0.9915928840637207,grad_norm: 0.8180765646090801, iteration: 392301
loss: 1.0046616792678833,grad_norm: 0.9999997457111693, iteration: 392302
loss: 0.987304151058197,grad_norm: 0.7226884615289907, iteration: 392303
loss: 0.9994795918464661,grad_norm: 0.9251296744341081, iteration: 392304
loss: 1.0196815729141235,grad_norm: 0.8979662642709597, iteration: 392305
loss: 1.0401437282562256,grad_norm: 0.7059826751623764, iteration: 392306
loss: 0.9729072451591492,grad_norm: 0.7612375486621968, iteration: 392307
loss: 1.0230861902236938,grad_norm: 0.6866175400234871, iteration: 392308
loss: 1.0068185329437256,grad_norm: 0.8391191385536754, iteration: 392309
loss: 0.983819305896759,grad_norm: 0.853855653101599, iteration: 392310
loss: 0.9833067059516907,grad_norm: 0.7937975166799907, iteration: 392311
loss: 0.9746133685112,grad_norm: 0.8981292504795653, iteration: 392312
loss: 1.0509743690490723,grad_norm: 0.8619660298012171, iteration: 392313
loss: 0.9790278673171997,grad_norm: 0.7744466661745254, iteration: 392314
loss: 1.0231267213821411,grad_norm: 0.8993213639725998, iteration: 392315
loss: 1.094313383102417,grad_norm: 0.9999998093414073, iteration: 392316
loss: 0.9685436487197876,grad_norm: 0.8476435984293367, iteration: 392317
loss: 1.0262020826339722,grad_norm: 0.8539859752559997, iteration: 392318
loss: 0.9853496551513672,grad_norm: 0.8595086497906148, iteration: 392319
loss: 1.0049699544906616,grad_norm: 0.9642649761054917, iteration: 392320
loss: 1.0523498058319092,grad_norm: 0.9999991701370838, iteration: 392321
loss: 0.9878156781196594,grad_norm: 0.8120553458388574, iteration: 392322
loss: 1.0264790058135986,grad_norm: 0.8624863721187351, iteration: 392323
loss: 1.0441113710403442,grad_norm: 0.9999990413879825, iteration: 392324
loss: 0.9609963893890381,grad_norm: 0.7435514959958254, iteration: 392325
loss: 1.0274829864501953,grad_norm: 0.8633348434104275, iteration: 392326
loss: 1.0343143939971924,grad_norm: 0.6775188043192085, iteration: 392327
loss: 0.9886873364448547,grad_norm: 0.7933630540411951, iteration: 392328
loss: 0.9958842396736145,grad_norm: 0.7612031332460854, iteration: 392329
loss: 1.0775946378707886,grad_norm: 0.780548930110684, iteration: 392330
loss: 1.0511106252670288,grad_norm: 0.9999997450777316, iteration: 392331
loss: 1.0602325201034546,grad_norm: 0.9999999929236214, iteration: 392332
loss: 1.0145514011383057,grad_norm: 0.8163539511814271, iteration: 392333
loss: 0.9992806911468506,grad_norm: 0.7358561377103809, iteration: 392334
loss: 1.007432460784912,grad_norm: 0.8859872283875946, iteration: 392335
loss: 1.2371960878372192,grad_norm: 0.9999996441023825, iteration: 392336
loss: 1.0257680416107178,grad_norm: 0.999999754654554, iteration: 392337
loss: 0.9990726113319397,grad_norm: 0.9009157018636602, iteration: 392338
loss: 1.0015285015106201,grad_norm: 0.6635811842809639, iteration: 392339
loss: 1.0043597221374512,grad_norm: 0.9999991563738767, iteration: 392340
loss: 0.9936985373497009,grad_norm: 0.6875667703804613, iteration: 392341
loss: 0.9984535574913025,grad_norm: 0.8649759388056296, iteration: 392342
loss: 1.008076548576355,grad_norm: 0.8083646869184731, iteration: 392343
loss: 1.0872553586959839,grad_norm: 0.9999992078435088, iteration: 392344
loss: 1.0030416250228882,grad_norm: 0.8325826241214934, iteration: 392345
loss: 1.025117039680481,grad_norm: 0.9635333983008182, iteration: 392346
loss: 0.983477771282196,grad_norm: 0.7878815417107907, iteration: 392347
loss: 1.013885736465454,grad_norm: 0.9649143008750193, iteration: 392348
loss: 1.048612356185913,grad_norm: 0.9999996869578284, iteration: 392349
loss: 1.0350828170776367,grad_norm: 0.7574421575329183, iteration: 392350
loss: 1.0134440660476685,grad_norm: 0.999999719209827, iteration: 392351
loss: 0.9706916213035583,grad_norm: 0.9760976366049178, iteration: 392352
loss: 0.983959436416626,grad_norm: 0.7950463676537222, iteration: 392353
loss: 1.016684889793396,grad_norm: 0.7582407182467826, iteration: 392354
loss: 1.0483592748641968,grad_norm: 0.9143369022774227, iteration: 392355
loss: 0.989151656627655,grad_norm: 0.7283934313960969, iteration: 392356
loss: 1.0225545167922974,grad_norm: 0.9327492718603587, iteration: 392357
loss: 1.0336275100708008,grad_norm: 0.7565562497528008, iteration: 392358
loss: 1.00220787525177,grad_norm: 0.7602182331510957, iteration: 392359
loss: 1.0264990329742432,grad_norm: 0.773536294797007, iteration: 392360
loss: 0.9981821179389954,grad_norm: 0.8286222857716989, iteration: 392361
loss: 1.0209951400756836,grad_norm: 0.8808883495447626, iteration: 392362
loss: 1.0076876878738403,grad_norm: 0.7890864197366961, iteration: 392363
loss: 0.9774783849716187,grad_norm: 0.9999991708025006, iteration: 392364
loss: 0.9942004680633545,grad_norm: 0.8733811372395476, iteration: 392365
loss: 1.0706671476364136,grad_norm: 0.999999881821331, iteration: 392366
loss: 1.0543479919433594,grad_norm: 0.9999999487818201, iteration: 392367
loss: 0.9905912280082703,grad_norm: 0.7067257936182927, iteration: 392368
loss: 1.0148307085037231,grad_norm: 0.7794875542852067, iteration: 392369
loss: 0.9980971217155457,grad_norm: 0.7782931362363121, iteration: 392370
loss: 0.9809035062789917,grad_norm: 0.8853419359565355, iteration: 392371
loss: 0.9848264455795288,grad_norm: 0.9191739686425557, iteration: 392372
loss: 1.0187711715698242,grad_norm: 0.9999992344598725, iteration: 392373
loss: 1.0226013660430908,grad_norm: 0.8125062350754924, iteration: 392374
loss: 0.9802954792976379,grad_norm: 0.991097756275725, iteration: 392375
loss: 0.9276657700538635,grad_norm: 0.9267212752558441, iteration: 392376
loss: 1.0060838460922241,grad_norm: 0.6904436692862179, iteration: 392377
loss: 0.9747368097305298,grad_norm: 0.8812715742619479, iteration: 392378
loss: 1.0314480066299438,grad_norm: 0.7947442324654128, iteration: 392379
loss: 1.0507036447525024,grad_norm: 0.8565561809089554, iteration: 392380
loss: 0.9821383357048035,grad_norm: 0.7668483182624394, iteration: 392381
loss: 0.9849715828895569,grad_norm: 0.7224842800199125, iteration: 392382
loss: 1.032061219215393,grad_norm: 0.7825107232363893, iteration: 392383
loss: 0.9964742064476013,grad_norm: 0.7737275184229372, iteration: 392384
loss: 0.9793742895126343,grad_norm: 0.8344671775547673, iteration: 392385
loss: 1.0260928869247437,grad_norm: 0.8531815311488185, iteration: 392386
loss: 1.122743010520935,grad_norm: 0.9698544001652267, iteration: 392387
loss: 1.0120869874954224,grad_norm: 0.9999989193743879, iteration: 392388
loss: 0.9687833189964294,grad_norm: 0.836084979114273, iteration: 392389
loss: 1.0110247135162354,grad_norm: 0.7573370093457032, iteration: 392390
loss: 1.0126579999923706,grad_norm: 0.7034819508929353, iteration: 392391
loss: 1.1601279973983765,grad_norm: 0.999999837764281, iteration: 392392
loss: 1.035376787185669,grad_norm: 0.7564719287124582, iteration: 392393
loss: 1.0059263706207275,grad_norm: 0.8871909123444831, iteration: 392394
loss: 0.9863481521606445,grad_norm: 0.7990680172708452, iteration: 392395
loss: 0.9970337748527527,grad_norm: 0.8277727714717057, iteration: 392396
loss: 0.9934248924255371,grad_norm: 0.794077348041281, iteration: 392397
loss: 1.039327621459961,grad_norm: 0.9999999287431721, iteration: 392398
loss: 0.9638643264770508,grad_norm: 0.671365875810706, iteration: 392399
loss: 0.9753868579864502,grad_norm: 0.7124949985309308, iteration: 392400
loss: 1.0498276948928833,grad_norm: 0.9999993289649, iteration: 392401
loss: 0.9762671589851379,grad_norm: 0.999999510269557, iteration: 392402
loss: 1.015514850616455,grad_norm: 0.9999993878136105, iteration: 392403
loss: 1.03395676612854,grad_norm: 0.7772943288586907, iteration: 392404
loss: 0.9838886260986328,grad_norm: 0.7067240754419682, iteration: 392405
loss: 0.9907832741737366,grad_norm: 0.9999993005543889, iteration: 392406
loss: 1.0022473335266113,grad_norm: 0.9311251152823586, iteration: 392407
loss: 1.0334264039993286,grad_norm: 0.7769377643511485, iteration: 392408
loss: 0.9840067625045776,grad_norm: 0.7272142075294634, iteration: 392409
loss: 1.0167855024337769,grad_norm: 0.8558856136116993, iteration: 392410
loss: 1.1210577487945557,grad_norm: 0.7703725887941804, iteration: 392411
loss: 1.006009817123413,grad_norm: 0.7930872092629772, iteration: 392412
loss: 1.0694193840026855,grad_norm: 0.9091394023623696, iteration: 392413
loss: 1.065852165222168,grad_norm: 0.761742401650301, iteration: 392414
loss: 0.9818166494369507,grad_norm: 0.7395575258979489, iteration: 392415
loss: 0.9811748266220093,grad_norm: 0.7034942865820719, iteration: 392416
loss: 0.994425892829895,grad_norm: 0.6556991138907029, iteration: 392417
loss: 1.0212630033493042,grad_norm: 0.7528520843037435, iteration: 392418
loss: 1.0035089254379272,grad_norm: 0.8114193348291026, iteration: 392419
loss: 1.0269324779510498,grad_norm: 0.823640467885624, iteration: 392420
loss: 1.0108916759490967,grad_norm: 0.79200931028503, iteration: 392421
loss: 1.0097699165344238,grad_norm: 0.8838314303112911, iteration: 392422
loss: 0.9988292455673218,grad_norm: 0.7841225782766307, iteration: 392423
loss: 1.0506685972213745,grad_norm: 0.825955665393309, iteration: 392424
loss: 1.0765023231506348,grad_norm: 0.9999996462483605, iteration: 392425
loss: 1.026200294494629,grad_norm: 0.6538727975101946, iteration: 392426
loss: 1.0073325634002686,grad_norm: 0.7241526557219173, iteration: 392427
loss: 0.9775500297546387,grad_norm: 0.9999991299067903, iteration: 392428
loss: 1.0085808038711548,grad_norm: 0.9016210193438349, iteration: 392429
loss: 1.0347979068756104,grad_norm: 0.8208379939416726, iteration: 392430
loss: 1.0097392797470093,grad_norm: 0.6912551099191728, iteration: 392431
loss: 0.9926507472991943,grad_norm: 0.999999107056583, iteration: 392432
loss: 1.0578997135162354,grad_norm: 0.811647970961631, iteration: 392433
loss: 0.97430419921875,grad_norm: 0.8478485103795685, iteration: 392434
loss: 0.9748799800872803,grad_norm: 0.6703731368919098, iteration: 392435
loss: 1.0229636430740356,grad_norm: 0.6621264611618017, iteration: 392436
loss: 1.0197738409042358,grad_norm: 0.9999989953569, iteration: 392437
loss: 1.0317282676696777,grad_norm: 0.8299086597000026, iteration: 392438
loss: 0.9966728091239929,grad_norm: 0.6300103489933959, iteration: 392439
loss: 0.9882762432098389,grad_norm: 0.7191545559886657, iteration: 392440
loss: 0.9974302649497986,grad_norm: 0.8047674149846002, iteration: 392441
loss: 1.0190973281860352,grad_norm: 0.9549211756693837, iteration: 392442
loss: 1.026548147201538,grad_norm: 0.9999990767248883, iteration: 392443
loss: 1.0165965557098389,grad_norm: 0.8933485247701118, iteration: 392444
loss: 0.9549582600593567,grad_norm: 0.8358942915822041, iteration: 392445
loss: 1.0259499549865723,grad_norm: 0.7940477579698028, iteration: 392446
loss: 1.048469066619873,grad_norm: 0.9119049512935193, iteration: 392447
loss: 0.9656306505203247,grad_norm: 0.9999998995954261, iteration: 392448
loss: 1.011406660079956,grad_norm: 0.944056773703818, iteration: 392449
loss: 1.0211429595947266,grad_norm: 0.9999996288609678, iteration: 392450
loss: 0.964011549949646,grad_norm: 0.8256841821172719, iteration: 392451
loss: 0.9845029711723328,grad_norm: 0.6627155527046316, iteration: 392452
loss: 0.9873490929603577,grad_norm: 0.8204215306630886, iteration: 392453
loss: 1.0035150051116943,grad_norm: 0.8119450811586589, iteration: 392454
loss: 1.0317152738571167,grad_norm: 0.9999998920945588, iteration: 392455
loss: 1.009068489074707,grad_norm: 0.8211594229875439, iteration: 392456
loss: 1.0100085735321045,grad_norm: 0.7784804924276681, iteration: 392457
loss: 1.015974998474121,grad_norm: 0.8781362689743364, iteration: 392458
loss: 1.0253900289535522,grad_norm: 0.9999999211679349, iteration: 392459
loss: 1.0466655492782593,grad_norm: 0.9477002145583525, iteration: 392460
loss: 0.9891940951347351,grad_norm: 0.6256696156104975, iteration: 392461
loss: 1.0169862508773804,grad_norm: 0.8691516784931983, iteration: 392462
loss: 0.9910466074943542,grad_norm: 0.9999994537745158, iteration: 392463
loss: 1.0334786176681519,grad_norm: 0.8700827825032512, iteration: 392464
loss: 0.969738245010376,grad_norm: 0.7899161298002735, iteration: 392465
loss: 1.0765985250473022,grad_norm: 0.9999991318404727, iteration: 392466
loss: 1.033529281616211,grad_norm: 0.714720792376431, iteration: 392467
loss: 0.9994969964027405,grad_norm: 0.7501844434410426, iteration: 392468
loss: 1.0781066417694092,grad_norm: 0.9066992502912742, iteration: 392469
loss: 0.9480823874473572,grad_norm: 0.792117754233906, iteration: 392470
loss: 1.0443780422210693,grad_norm: 0.9999990039993815, iteration: 392471
loss: 1.0461865663528442,grad_norm: 0.8792293852406278, iteration: 392472
loss: 1.0024375915527344,grad_norm: 0.7900760178077152, iteration: 392473
loss: 0.9639366269111633,grad_norm: 0.9999991254171845, iteration: 392474
loss: 1.0419830083847046,grad_norm: 0.7308307227703934, iteration: 392475
loss: 0.9871911406517029,grad_norm: 0.6396073531468901, iteration: 392476
loss: 1.036450982093811,grad_norm: 0.9999997992850183, iteration: 392477
loss: 1.0123416185379028,grad_norm: 0.7362626377617749, iteration: 392478
loss: 1.0760999917984009,grad_norm: 0.9999990800871514, iteration: 392479
loss: 1.0176100730895996,grad_norm: 0.8672630386896801, iteration: 392480
loss: 0.9800949692726135,grad_norm: 0.8946799517835928, iteration: 392481
loss: 0.973996102809906,grad_norm: 0.7897184293299129, iteration: 392482
loss: 1.054817795753479,grad_norm: 0.9999997671059032, iteration: 392483
loss: 0.9761191010475159,grad_norm: 0.7720721316404275, iteration: 392484
loss: 0.9749032855033875,grad_norm: 0.8209521883292533, iteration: 392485
loss: 0.9892269372940063,grad_norm: 0.8330087810454895, iteration: 392486
loss: 0.9998747706413269,grad_norm: 0.6808404465805609, iteration: 392487
loss: 0.9991054534912109,grad_norm: 0.8075692286793091, iteration: 392488
loss: 1.0153393745422363,grad_norm: 0.9999995978403746, iteration: 392489
loss: 0.9989436268806458,grad_norm: 0.8779311807418262, iteration: 392490
loss: 1.014407992362976,grad_norm: 0.9668630869989503, iteration: 392491
loss: 1.0280505418777466,grad_norm: 0.9999996915781761, iteration: 392492
loss: 1.0236961841583252,grad_norm: 0.8608925709881696, iteration: 392493
loss: 1.02371346950531,grad_norm: 0.8327391348044534, iteration: 392494
loss: 1.008686900138855,grad_norm: 0.8255682309881164, iteration: 392495
loss: 0.9882563352584839,grad_norm: 0.6997063774697359, iteration: 392496
loss: 1.00542414188385,grad_norm: 0.9878902269100901, iteration: 392497
loss: 1.0265792608261108,grad_norm: 0.7441332189663176, iteration: 392498
loss: 1.0034016370773315,grad_norm: 0.7830678054373921, iteration: 392499
loss: 0.9941299557685852,grad_norm: 0.7380742758354569, iteration: 392500
loss: 0.9813488721847534,grad_norm: 0.7263201073067687, iteration: 392501
loss: 0.9625981450080872,grad_norm: 0.8098117567040601, iteration: 392502
loss: 1.0104773044586182,grad_norm: 0.684338501319812, iteration: 392503
loss: 1.00302255153656,grad_norm: 0.918577422008538, iteration: 392504
loss: 1.0239522457122803,grad_norm: 0.8119169135453329, iteration: 392505
loss: 0.9533662796020508,grad_norm: 0.7761535072143454, iteration: 392506
loss: 0.9911080598831177,grad_norm: 0.7454455634353306, iteration: 392507
loss: 1.0118608474731445,grad_norm: 0.8794235530095782, iteration: 392508
loss: 0.9949384331703186,grad_norm: 0.8747430454743853, iteration: 392509
loss: 0.998386800289154,grad_norm: 0.9999997956194999, iteration: 392510
loss: 1.0138375759124756,grad_norm: 0.9185313370012613, iteration: 392511
loss: 1.02366304397583,grad_norm: 0.7400458439997157, iteration: 392512
loss: 1.0524317026138306,grad_norm: 0.9451895714364466, iteration: 392513
loss: 1.0119446516036987,grad_norm: 0.7529604487059992, iteration: 392514
loss: 0.9792990684509277,grad_norm: 0.9999990762281719, iteration: 392515
loss: 0.996090292930603,grad_norm: 0.9999991275520019, iteration: 392516
loss: 0.9870162606239319,grad_norm: 0.7928003684690447, iteration: 392517
loss: 0.967962384223938,grad_norm: 0.6844131035424926, iteration: 392518
loss: 0.9817960262298584,grad_norm: 0.8170606302563101, iteration: 392519
loss: 1.000564694404602,grad_norm: 0.7561381397878709, iteration: 392520
loss: 0.963979959487915,grad_norm: 0.7742670706906298, iteration: 392521
loss: 0.9636654257774353,grad_norm: 0.8192422576130871, iteration: 392522
loss: 0.9801816940307617,grad_norm: 0.8429350070606351, iteration: 392523
loss: 0.960343062877655,grad_norm: 0.7957872112344825, iteration: 392524
loss: 0.9987730383872986,grad_norm: 0.7974467936809349, iteration: 392525
loss: 1.0040818452835083,grad_norm: 0.7114638628762366, iteration: 392526
loss: 1.0168863534927368,grad_norm: 0.9999995832716337, iteration: 392527
loss: 0.9973630905151367,grad_norm: 0.6475329473702933, iteration: 392528
loss: 0.9979499578475952,grad_norm: 0.7204964207238008, iteration: 392529
loss: 0.9941789507865906,grad_norm: 0.8074484837793198, iteration: 392530
loss: 1.0036789178848267,grad_norm: 0.8783248443554508, iteration: 392531
loss: 0.992767333984375,grad_norm: 0.746030995377664, iteration: 392532
loss: 1.0360652208328247,grad_norm: 0.6195939358040345, iteration: 392533
loss: 0.9926649332046509,grad_norm: 0.9496588429872833, iteration: 392534
loss: 0.979788601398468,grad_norm: 0.6783043786994952, iteration: 392535
loss: 1.0931370258331299,grad_norm: 0.9999992607663933, iteration: 392536
loss: 1.0025274753570557,grad_norm: 0.7652108384972609, iteration: 392537
loss: 0.9768734574317932,grad_norm: 0.8991840532287675, iteration: 392538
loss: 0.9969646334648132,grad_norm: 0.7937762045683653, iteration: 392539
loss: 0.9996493458747864,grad_norm: 0.7984857115187634, iteration: 392540
loss: 1.0022677183151245,grad_norm: 0.74745108675522, iteration: 392541
loss: 1.107950210571289,grad_norm: 0.9999999151268298, iteration: 392542
loss: 0.9975845813751221,grad_norm: 0.8752071960283132, iteration: 392543
loss: 1.001558542251587,grad_norm: 0.7730323602002795, iteration: 392544
loss: 0.9593489170074463,grad_norm: 0.7255751842124416, iteration: 392545
loss: 1.080756425857544,grad_norm: 0.9999999607577332, iteration: 392546
loss: 0.9548842310905457,grad_norm: 0.9174351237685614, iteration: 392547
loss: 1.0134507417678833,grad_norm: 0.8390710319892232, iteration: 392548
loss: 0.9768714308738708,grad_norm: 0.8981714516150504, iteration: 392549
loss: 1.1093806028366089,grad_norm: 0.9999992120431435, iteration: 392550
loss: 1.004824161529541,grad_norm: 0.7919439775440144, iteration: 392551
loss: 1.0225549936294556,grad_norm: 0.9999995374389572, iteration: 392552
loss: 0.9976279735565186,grad_norm: 0.6465600525593624, iteration: 392553
loss: 1.0443166494369507,grad_norm: 0.8474846333545812, iteration: 392554
loss: 0.9601176977157593,grad_norm: 0.7358736368664557, iteration: 392555
loss: 1.1373063325881958,grad_norm: 0.9999989810277733, iteration: 392556
loss: 0.9931226372718811,grad_norm: 0.9999993370203832, iteration: 392557
loss: 0.9973678588867188,grad_norm: 0.7606349582663976, iteration: 392558
loss: 1.0152337551116943,grad_norm: 0.9280146594810516, iteration: 392559
loss: 0.9772817492485046,grad_norm: 0.7810811181116445, iteration: 392560
loss: 0.9772264361381531,grad_norm: 0.7659953243161636, iteration: 392561
loss: 0.9670268893241882,grad_norm: 0.8631983366196145, iteration: 392562
loss: 0.9637600183486938,grad_norm: 0.7229893445623552, iteration: 392563
loss: 1.0491735935211182,grad_norm: 0.8934147690087751, iteration: 392564
loss: 0.9931781888008118,grad_norm: 0.8360299081804291, iteration: 392565
loss: 1.0658396482467651,grad_norm: 0.9999991527044406, iteration: 392566
loss: 0.9632518887519836,grad_norm: 0.8566516831922856, iteration: 392567
loss: 1.0405062437057495,grad_norm: 0.8540924988530664, iteration: 392568
loss: 1.0189753770828247,grad_norm: 0.919313174865552, iteration: 392569
loss: 1.119168758392334,grad_norm: 0.896963621696355, iteration: 392570
loss: 1.0190300941467285,grad_norm: 0.7701739143798131, iteration: 392571
loss: 0.9816750884056091,grad_norm: 0.8589239788742058, iteration: 392572
loss: 0.9901508092880249,grad_norm: 0.8765455529621095, iteration: 392573
loss: 0.9952333569526672,grad_norm: 0.6364660020564343, iteration: 392574
loss: 0.9974943399429321,grad_norm: 0.7667821647879097, iteration: 392575
loss: 0.9895225763320923,grad_norm: 0.985996231390846, iteration: 392576
loss: 1.0219054222106934,grad_norm: 0.7918621194430546, iteration: 392577
loss: 1.0279070138931274,grad_norm: 0.9999996798616816, iteration: 392578
loss: 0.9970447421073914,grad_norm: 0.702773405092324, iteration: 392579
loss: 1.0469423532485962,grad_norm: 0.7875583508262294, iteration: 392580
loss: 1.0225982666015625,grad_norm: 0.7479560811578292, iteration: 392581
loss: 1.029774785041809,grad_norm: 0.8903541644787021, iteration: 392582
loss: 0.9837599396705627,grad_norm: 0.8806346556886446, iteration: 392583
loss: 0.9889572262763977,grad_norm: 0.7805902747780368, iteration: 392584
loss: 0.965408980846405,grad_norm: 0.7810269177805337, iteration: 392585
loss: 1.1202744245529175,grad_norm: 0.9999996038774742, iteration: 392586
loss: 1.001538634300232,grad_norm: 0.7854444192601611, iteration: 392587
loss: 1.0463920831680298,grad_norm: 0.8054684227200457, iteration: 392588
loss: 0.9423894882202148,grad_norm: 0.7327075166614375, iteration: 392589
loss: 0.9880415201187134,grad_norm: 0.9999992497275881, iteration: 392590
loss: 0.9644882082939148,grad_norm: 0.8256958717770345, iteration: 392591
loss: 1.0484074354171753,grad_norm: 0.7994754448025047, iteration: 392592
loss: 0.9847667813301086,grad_norm: 0.7646781970501227, iteration: 392593
loss: 0.9951292872428894,grad_norm: 0.7825234964647781, iteration: 392594
loss: 0.9938737750053406,grad_norm: 0.9618623453226597, iteration: 392595
loss: 1.003835916519165,grad_norm: 0.6993075623275202, iteration: 392596
loss: 1.0134474039077759,grad_norm: 0.760802143885789, iteration: 392597
loss: 1.0003663301467896,grad_norm: 0.9564958433711254, iteration: 392598
loss: 0.9894042611122131,grad_norm: 0.8532942416326008, iteration: 392599
loss: 1.0531034469604492,grad_norm: 0.8280510986809614, iteration: 392600
loss: 1.0259318351745605,grad_norm: 0.8506227623080681, iteration: 392601
loss: 0.9883776307106018,grad_norm: 0.6873697347391291, iteration: 392602
loss: 1.0051544904708862,grad_norm: 0.9999998689515833, iteration: 392603
loss: 1.0183173418045044,grad_norm: 0.999999454434273, iteration: 392604
loss: 0.9955562949180603,grad_norm: 0.8804979415423886, iteration: 392605
loss: 0.9702304601669312,grad_norm: 0.843175189815387, iteration: 392606
loss: 0.9728866219520569,grad_norm: 0.823609767815698, iteration: 392607
loss: 0.9962872862815857,grad_norm: 0.8619413306750727, iteration: 392608
loss: 0.977735161781311,grad_norm: 0.7898952053483395, iteration: 392609
loss: 1.0452555418014526,grad_norm: 0.9999994297786958, iteration: 392610
loss: 1.0013328790664673,grad_norm: 0.8592838512972529, iteration: 392611
loss: 1.0495984554290771,grad_norm: 0.9999993979419933, iteration: 392612
loss: 1.0771358013153076,grad_norm: 0.9999994600966823, iteration: 392613
loss: 0.9793279767036438,grad_norm: 0.760232164185679, iteration: 392614
loss: 0.9746350049972534,grad_norm: 0.621718038382553, iteration: 392615
loss: 0.991366982460022,grad_norm: 0.7590606093095335, iteration: 392616
loss: 1.0281256437301636,grad_norm: 0.8794015154688543, iteration: 392617
loss: 0.9939355850219727,grad_norm: 0.7812610612517796, iteration: 392618
loss: 1.0429812669754028,grad_norm: 0.9831324669311685, iteration: 392619
loss: 1.0321991443634033,grad_norm: 0.8305811891629571, iteration: 392620
loss: 0.9905985593795776,grad_norm: 0.7772517663258609, iteration: 392621
loss: 0.9806262254714966,grad_norm: 0.7990966308093405, iteration: 392622
loss: 1.0052061080932617,grad_norm: 0.8918471009642629, iteration: 392623
loss: 0.9816661477088928,grad_norm: 0.6852987206869782, iteration: 392624
loss: 1.0284607410430908,grad_norm: 0.8854774652718976, iteration: 392625
loss: 1.132002353668213,grad_norm: 0.9999992546469422, iteration: 392626
loss: 1.0289335250854492,grad_norm: 0.8569168271025909, iteration: 392627
loss: 1.0330090522766113,grad_norm: 0.8402992097284817, iteration: 392628
loss: 1.0014671087265015,grad_norm: 0.8839800650414095, iteration: 392629
loss: 1.0977615118026733,grad_norm: 0.999999469861547, iteration: 392630
loss: 0.9915845990180969,grad_norm: 0.6688429483970018, iteration: 392631
loss: 1.004784107208252,grad_norm: 0.7824381158750221, iteration: 392632
loss: 1.046540379524231,grad_norm: 0.9999995544791627, iteration: 392633
loss: 0.9977837204933167,grad_norm: 0.8327685789718741, iteration: 392634
loss: 0.9644364714622498,grad_norm: 0.9999992772531984, iteration: 392635
loss: 0.9973247051239014,grad_norm: 0.9999999331722937, iteration: 392636
loss: 1.0395796298980713,grad_norm: 0.7348735291543171, iteration: 392637
loss: 0.9713765978813171,grad_norm: 0.8152420005508259, iteration: 392638
loss: 1.0428946018218994,grad_norm: 0.8277819711319342, iteration: 392639
loss: 1.009170651435852,grad_norm: 0.9999992584774933, iteration: 392640
loss: 1.0031852722167969,grad_norm: 0.8177055124934254, iteration: 392641
loss: 1.0287991762161255,grad_norm: 0.7358444645674754, iteration: 392642
loss: 0.9958942532539368,grad_norm: 0.8719375647047224, iteration: 392643
loss: 1.009440541267395,grad_norm: 0.683457116432414, iteration: 392644
loss: 0.9899016618728638,grad_norm: 0.8097587756926631, iteration: 392645
loss: 1.0130865573883057,grad_norm: 0.7746575432043236, iteration: 392646
loss: 0.9723832607269287,grad_norm: 0.7543484066234378, iteration: 392647
loss: 0.9849262237548828,grad_norm: 0.6959648338772583, iteration: 392648
loss: 0.9774272441864014,grad_norm: 0.8273659155066909, iteration: 392649
loss: 1.0292874574661255,grad_norm: 0.6763449530101245, iteration: 392650
loss: 1.0047551393508911,grad_norm: 0.999999334043213, iteration: 392651
loss: 0.9830839037895203,grad_norm: 0.898798021839194, iteration: 392652
loss: 0.982818603515625,grad_norm: 0.6772166798965241, iteration: 392653
loss: 0.9579903483390808,grad_norm: 0.8083606445256236, iteration: 392654
loss: 1.023209571838379,grad_norm: 0.8387687867900682, iteration: 392655
loss: 1.0681066513061523,grad_norm: 0.78788955429592, iteration: 392656
loss: 1.0195233821868896,grad_norm: 0.964616362672321, iteration: 392657
loss: 0.984870970249176,grad_norm: 0.698930602831488, iteration: 392658
loss: 1.00455641746521,grad_norm: 0.8560890891878125, iteration: 392659
loss: 1.0216537714004517,grad_norm: 0.7280202509941143, iteration: 392660
loss: 0.9594228267669678,grad_norm: 0.7540026396420603, iteration: 392661
loss: 0.9806165099143982,grad_norm: 0.8024876769481265, iteration: 392662
loss: 0.9707948565483093,grad_norm: 0.8343828861874264, iteration: 392663
loss: 1.0134133100509644,grad_norm: 0.8420229923717569, iteration: 392664
loss: 0.9864219427108765,grad_norm: 0.5872242965725263, iteration: 392665
loss: 0.9757860898971558,grad_norm: 0.7207926613585558, iteration: 392666
loss: 0.9721016883850098,grad_norm: 0.7583577571773474, iteration: 392667
loss: 0.9842534065246582,grad_norm: 0.7462877174407025, iteration: 392668
loss: 0.9968723654747009,grad_norm: 0.9999989885336263, iteration: 392669
loss: 0.9937933087348938,grad_norm: 0.7207214838934783, iteration: 392670
loss: 1.0931620597839355,grad_norm: 0.8491020661996947, iteration: 392671
loss: 1.0160483121871948,grad_norm: 0.9448264343957994, iteration: 392672
loss: 0.9721001982688904,grad_norm: 0.75248053379782, iteration: 392673
loss: 1.069656491279602,grad_norm: 0.9792337290796301, iteration: 392674
loss: 0.9942739605903625,grad_norm: 0.7806733552011966, iteration: 392675
loss: 1.0170387029647827,grad_norm: 0.8805247334239311, iteration: 392676
loss: 1.0068720579147339,grad_norm: 0.7480346746940431, iteration: 392677
loss: 0.9913195967674255,grad_norm: 0.7737048293282948, iteration: 392678
loss: 0.9878577589988708,grad_norm: 0.7583715367830131, iteration: 392679
loss: 1.0029906034469604,grad_norm: 0.8079736112319977, iteration: 392680
loss: 0.9802645444869995,grad_norm: 0.6737978225067611, iteration: 392681
loss: 0.9836003184318542,grad_norm: 0.8606044965652098, iteration: 392682
loss: 1.0286146402359009,grad_norm: 0.9237264145317494, iteration: 392683
loss: 1.0553638935089111,grad_norm: 0.9999994654569454, iteration: 392684
loss: 1.0146515369415283,grad_norm: 0.7281708071283899, iteration: 392685
loss: 0.9674981236457825,grad_norm: 0.7004772787650748, iteration: 392686
loss: 0.9779191613197327,grad_norm: 0.9450034801232837, iteration: 392687
loss: 0.9954536557197571,grad_norm: 0.7493447707901397, iteration: 392688
loss: 0.9927825927734375,grad_norm: 0.7388149295804068, iteration: 392689
loss: 1.0220108032226562,grad_norm: 0.9499573278046756, iteration: 392690
loss: 0.9865641593933105,grad_norm: 0.6990761610754892, iteration: 392691
loss: 0.9995943903923035,grad_norm: 0.7444958914629823, iteration: 392692
loss: 0.9949739575386047,grad_norm: 0.7717619448905774, iteration: 392693
loss: 1.0275862216949463,grad_norm: 0.9429217004249114, iteration: 392694
loss: 0.9866212010383606,grad_norm: 0.7911506599321646, iteration: 392695
loss: 0.9680156707763672,grad_norm: 0.7309670761264476, iteration: 392696
loss: 0.9957156777381897,grad_norm: 0.7274610444310264, iteration: 392697
loss: 1.0224061012268066,grad_norm: 0.9999991194674882, iteration: 392698
loss: 1.0097464323043823,grad_norm: 0.7165685484078927, iteration: 392699
loss: 0.9848407506942749,grad_norm: 0.7195348470125558, iteration: 392700
loss: 0.9794331789016724,grad_norm: 0.7921388035313143, iteration: 392701
loss: 1.0589148998260498,grad_norm: 0.9999994024671813, iteration: 392702
loss: 0.9808315634727478,grad_norm: 0.7194215263043234, iteration: 392703
loss: 0.962254524230957,grad_norm: 0.7223474115324656, iteration: 392704
loss: 1.0261486768722534,grad_norm: 0.8470427772914274, iteration: 392705
loss: 0.9869189858436584,grad_norm: 0.7766646572693736, iteration: 392706
loss: 1.0128974914550781,grad_norm: 0.7081616974218171, iteration: 392707
loss: 0.9918294548988342,grad_norm: 0.638820692230459, iteration: 392708
loss: 0.9953542351722717,grad_norm: 0.8389497892881963, iteration: 392709
loss: 1.0514219999313354,grad_norm: 0.9999991416739742, iteration: 392710
loss: 1.0160596370697021,grad_norm: 0.8430668929136748, iteration: 392711
loss: 0.9976726770401001,grad_norm: 0.903308750745743, iteration: 392712
loss: 1.001362681388855,grad_norm: 0.6344406331853427, iteration: 392713
loss: 1.0378745794296265,grad_norm: 0.8564430871843748, iteration: 392714
loss: 0.9941704273223877,grad_norm: 0.7684966398763147, iteration: 392715
loss: 0.9734036326408386,grad_norm: 0.8021067247386464, iteration: 392716
loss: 1.0159755945205688,grad_norm: 0.8312075332648037, iteration: 392717
loss: 1.0613774061203003,grad_norm: 0.8379314659905152, iteration: 392718
loss: 0.9778459072113037,grad_norm: 0.9304968021273102, iteration: 392719
loss: 1.038346529006958,grad_norm: 0.6928117538898704, iteration: 392720
loss: 1.0121173858642578,grad_norm: 0.9999992673390293, iteration: 392721
loss: 0.9769088625907898,grad_norm: 0.7706632998785612, iteration: 392722
loss: 1.0343272686004639,grad_norm: 0.9999992926197335, iteration: 392723
loss: 1.0077228546142578,grad_norm: 0.8418217954416086, iteration: 392724
loss: 1.0256948471069336,grad_norm: 0.9999991457757176, iteration: 392725
loss: 0.9718974828720093,grad_norm: 0.6156279629308402, iteration: 392726
loss: 0.9646266102790833,grad_norm: 0.7919405838566074, iteration: 392727
loss: 0.9834660887718201,grad_norm: 0.714408041113593, iteration: 392728
loss: 1.0103628635406494,grad_norm: 0.7251421953538895, iteration: 392729
loss: 1.0149765014648438,grad_norm: 0.8826714376555178, iteration: 392730
loss: 1.0332592725753784,grad_norm: 0.8527561200552216, iteration: 392731
loss: 1.000868797302246,grad_norm: 0.8513837438849413, iteration: 392732
loss: 1.0027379989624023,grad_norm: 0.9856532640750173, iteration: 392733
loss: 0.9768187403678894,grad_norm: 0.8133009346741835, iteration: 392734
loss: 0.9777712821960449,grad_norm: 0.7947245035130907, iteration: 392735
loss: 0.9739977121353149,grad_norm: 0.8133976031260646, iteration: 392736
loss: 1.106201410293579,grad_norm: 0.9999992879077493, iteration: 392737
loss: 0.9885084629058838,grad_norm: 0.799647398314564, iteration: 392738
loss: 1.0082722902297974,grad_norm: 0.7394484482459709, iteration: 392739
loss: 1.040940761566162,grad_norm: 0.8943718264844972, iteration: 392740
loss: 1.0179423093795776,grad_norm: 0.7978594135938661, iteration: 392741
loss: 0.9888453483581543,grad_norm: 0.8695304644062637, iteration: 392742
loss: 0.9855452179908752,grad_norm: 0.9155788729202617, iteration: 392743
loss: 0.9927313923835754,grad_norm: 0.8893640739171282, iteration: 392744
loss: 0.9929840564727783,grad_norm: 0.9999992080432374, iteration: 392745
loss: 1.0021724700927734,grad_norm: 0.8577999635430661, iteration: 392746
loss: 1.0145549774169922,grad_norm: 0.7788025993722878, iteration: 392747
loss: 1.0381652116775513,grad_norm: 0.9159506308339818, iteration: 392748
loss: 1.0110721588134766,grad_norm: 0.9999991012575716, iteration: 392749
loss: 1.024003505706787,grad_norm: 0.7772254963438874, iteration: 392750
loss: 1.0269443988800049,grad_norm: 0.7013833976629086, iteration: 392751
loss: 0.9664340019226074,grad_norm: 0.7592593958552786, iteration: 392752
loss: 1.0819423198699951,grad_norm: 0.9999996966725191, iteration: 392753
loss: 1.076413631439209,grad_norm: 0.9056944450671814, iteration: 392754
loss: 1.0836957693099976,grad_norm: 0.7731459501170278, iteration: 392755
loss: 0.9773191213607788,grad_norm: 0.6882186941794695, iteration: 392756
loss: 0.9693294167518616,grad_norm: 0.7463448647376705, iteration: 392757
loss: 1.0564429759979248,grad_norm: 0.7984668273233675, iteration: 392758
loss: 1.0045452117919922,grad_norm: 0.7353926349894883, iteration: 392759
loss: 1.0414974689483643,grad_norm: 0.9999990661354814, iteration: 392760
loss: 1.0357916355133057,grad_norm: 0.8295545280960624, iteration: 392761
loss: 1.0067270994186401,grad_norm: 0.684304744756268, iteration: 392762
loss: 1.006309986114502,grad_norm: 0.7907077259107876, iteration: 392763
loss: 1.0311100482940674,grad_norm: 0.7119499291787692, iteration: 392764
loss: 1.01165771484375,grad_norm: 0.7761739477872095, iteration: 392765
loss: 0.9989955425262451,grad_norm: 0.7970032159848677, iteration: 392766
loss: 1.0215412378311157,grad_norm: 0.8126024988118294, iteration: 392767
loss: 1.0280110836029053,grad_norm: 0.9862398829609832, iteration: 392768
loss: 0.9979369044303894,grad_norm: 0.8750391845914317, iteration: 392769
loss: 0.9924044013023376,grad_norm: 0.6571950425247303, iteration: 392770
loss: 1.0157976150512695,grad_norm: 0.9999992712582298, iteration: 392771
loss: 1.010383129119873,grad_norm: 0.7833441230749035, iteration: 392772
loss: 0.9968512058258057,grad_norm: 0.6828987764200016, iteration: 392773
loss: 0.9906678795814514,grad_norm: 0.8169753563561156, iteration: 392774
loss: 0.9460399150848389,grad_norm: 0.7069151792824044, iteration: 392775
loss: 1.0065118074417114,grad_norm: 0.8578121398722763, iteration: 392776
loss: 0.976468563079834,grad_norm: 0.8042534023419887, iteration: 392777
loss: 0.9906296133995056,grad_norm: 0.8940973826508998, iteration: 392778
loss: 1.0035134553909302,grad_norm: 0.7847062908358466, iteration: 392779
loss: 1.0040241479873657,grad_norm: 0.6701624329202174, iteration: 392780
loss: 0.9690689444541931,grad_norm: 0.8766658450778041, iteration: 392781
loss: 1.026984453201294,grad_norm: 0.806913273810492, iteration: 392782
loss: 1.0428051948547363,grad_norm: 0.7480743646246382, iteration: 392783
loss: 0.9605429172515869,grad_norm: 0.773589801955911, iteration: 392784
loss: 1.042075514793396,grad_norm: 0.7267010471929374, iteration: 392785
loss: 1.0044677257537842,grad_norm: 0.9999991518175364, iteration: 392786
loss: 1.0218740701675415,grad_norm: 0.8461639205554448, iteration: 392787
loss: 1.0246853828430176,grad_norm: 0.7429175527505295, iteration: 392788
loss: 0.9780300259590149,grad_norm: 0.9999989645313929, iteration: 392789
loss: 0.9925065636634827,grad_norm: 0.8566698078099252, iteration: 392790
loss: 0.9951852560043335,grad_norm: 0.8324809432902885, iteration: 392791
loss: 0.9684815406799316,grad_norm: 0.7695294169935734, iteration: 392792
loss: 0.9937553405761719,grad_norm: 0.8853350014884283, iteration: 392793
loss: 1.0060274600982666,grad_norm: 0.8552471808178082, iteration: 392794
loss: 0.9894496202468872,grad_norm: 0.7100199457116949, iteration: 392795
loss: 1.0260761976242065,grad_norm: 0.8017386676315895, iteration: 392796
loss: 0.9789716601371765,grad_norm: 0.7800507074641724, iteration: 392797
loss: 0.9797637462615967,grad_norm: 0.6952277426650305, iteration: 392798
loss: 0.9817456007003784,grad_norm: 0.7502259753891032, iteration: 392799
loss: 1.0593342781066895,grad_norm: 0.9999997363836797, iteration: 392800
loss: 0.9867348670959473,grad_norm: 0.6769998566782973, iteration: 392801
loss: 1.0126118659973145,grad_norm: 0.8569665195132808, iteration: 392802
loss: 1.0272674560546875,grad_norm: 0.7670793670606897, iteration: 392803
loss: 1.017391324043274,grad_norm: 0.6801505205299545, iteration: 392804
loss: 0.9685772061347961,grad_norm: 0.8718749927934134, iteration: 392805
loss: 1.0320078134536743,grad_norm: 0.6637016272837051, iteration: 392806
loss: 1.0352848768234253,grad_norm: 0.8596244047599453, iteration: 392807
loss: 0.992056667804718,grad_norm: 0.9999991928524411, iteration: 392808
loss: 1.0165053606033325,grad_norm: 0.7831861271452141, iteration: 392809
loss: 1.0623564720153809,grad_norm: 0.9982329346975608, iteration: 392810
loss: 1.0145436525344849,grad_norm: 0.7457526067674491, iteration: 392811
loss: 0.9671536684036255,grad_norm: 0.9999991022167443, iteration: 392812
loss: 0.9855541586875916,grad_norm: 0.7834257302562617, iteration: 392813
loss: 0.9921217560768127,grad_norm: 0.9451569125260219, iteration: 392814
loss: 1.0184301137924194,grad_norm: 0.9249706037257224, iteration: 392815
loss: 0.9952277541160583,grad_norm: 0.9999999502150277, iteration: 392816
loss: 0.9658911228179932,grad_norm: 0.786680472657374, iteration: 392817
loss: 1.0349200963974,grad_norm: 0.7898313892249238, iteration: 392818
loss: 0.9712963700294495,grad_norm: 0.7511464370657451, iteration: 392819
loss: 0.9934011697769165,grad_norm: 0.7978949365402369, iteration: 392820
loss: 0.9849135875701904,grad_norm: 0.72689311874805, iteration: 392821
loss: 0.9942232370376587,grad_norm: 0.7990675661492455, iteration: 392822
loss: 1.0469319820404053,grad_norm: 0.7886089489223738, iteration: 392823
loss: 0.9987539649009705,grad_norm: 0.8901946698429418, iteration: 392824
loss: 1.0043469667434692,grad_norm: 0.8986936795580788, iteration: 392825
loss: 0.9643048644065857,grad_norm: 0.9999990069580653, iteration: 392826
loss: 0.9904528260231018,grad_norm: 0.7991899936837429, iteration: 392827
loss: 1.0940016508102417,grad_norm: 0.7235003547693728, iteration: 392828
loss: 1.0307519435882568,grad_norm: 0.7071542577410986, iteration: 392829
loss: 0.9733191728591919,grad_norm: 0.7678258663658546, iteration: 392830
loss: 1.0963562726974487,grad_norm: 0.9999994212370236, iteration: 392831
loss: 1.00381600856781,grad_norm: 0.7998975229052481, iteration: 392832
loss: 1.014484167098999,grad_norm: 0.7640848128650496, iteration: 392833
loss: 1.0050066709518433,grad_norm: 0.7388418276996543, iteration: 392834
loss: 1.0168920755386353,grad_norm: 0.6913048709886068, iteration: 392835
loss: 1.044435739517212,grad_norm: 0.8058697032221598, iteration: 392836
loss: 0.9950977563858032,grad_norm: 0.8609615306704355, iteration: 392837
loss: 0.9702506065368652,grad_norm: 0.8892472493359338, iteration: 392838
loss: 1.0227231979370117,grad_norm: 0.6895061621913265, iteration: 392839
loss: 1.0075743198394775,grad_norm: 0.8766848839687992, iteration: 392840
loss: 1.00016188621521,grad_norm: 0.7478938085689165, iteration: 392841
loss: 1.006908893585205,grad_norm: 0.8770812329187385, iteration: 392842
loss: 0.9813038110733032,grad_norm: 0.7032912078266442, iteration: 392843
loss: 1.0199531316757202,grad_norm: 0.911915112122793, iteration: 392844
loss: 1.0352988243103027,grad_norm: 0.9249022307062346, iteration: 392845
loss: 0.9898965954780579,grad_norm: 0.8681777944376854, iteration: 392846
loss: 1.0766483545303345,grad_norm: 0.8023277214692468, iteration: 392847
loss: 0.99480140209198,grad_norm: 0.6276510881563183, iteration: 392848
loss: 1.0298410654067993,grad_norm: 0.8182875729845874, iteration: 392849
loss: 1.0409079790115356,grad_norm: 0.9999990734924276, iteration: 392850
loss: 1.0680201053619385,grad_norm: 0.9999990004125088, iteration: 392851
loss: 0.9586213827133179,grad_norm: 0.9063786023284274, iteration: 392852
loss: 0.9919922947883606,grad_norm: 0.9999992616107807, iteration: 392853
loss: 1.02321195602417,grad_norm: 0.8085731912904901, iteration: 392854
loss: 0.9663069248199463,grad_norm: 0.8882509100209892, iteration: 392855
loss: 1.066616415977478,grad_norm: 0.999999751423711, iteration: 392856
loss: 1.0021957159042358,grad_norm: 0.7300749101897961, iteration: 392857
loss: 1.0160270929336548,grad_norm: 0.8065894332001693, iteration: 392858
loss: 0.9768930077552795,grad_norm: 0.846126493254194, iteration: 392859
loss: 1.0268034934997559,grad_norm: 0.844879545916956, iteration: 392860
loss: 0.9922912120819092,grad_norm: 0.9223861168191366, iteration: 392861
loss: 1.005185604095459,grad_norm: 0.8872277812951315, iteration: 392862
loss: 1.0045312643051147,grad_norm: 0.9589229849550251, iteration: 392863
loss: 1.023676872253418,grad_norm: 0.7877791765967002, iteration: 392864
loss: 1.0028557777404785,grad_norm: 0.7683594188591376, iteration: 392865
loss: 0.976568877696991,grad_norm: 0.7605007814172419, iteration: 392866
loss: 0.9987832307815552,grad_norm: 0.8811442399144787, iteration: 392867
loss: 0.9688991904258728,grad_norm: 0.8501006460357601, iteration: 392868
loss: 1.0013378858566284,grad_norm: 0.7389121138351742, iteration: 392869
loss: 1.002846121788025,grad_norm: 0.8702984616256948, iteration: 392870
loss: 1.0171630382537842,grad_norm: 0.9999997387899519, iteration: 392871
loss: 1.0069512128829956,grad_norm: 0.7573521570047765, iteration: 392872
loss: 0.9963272213935852,grad_norm: 0.6911866030716322, iteration: 392873
loss: 1.005281925201416,grad_norm: 0.9999999147767892, iteration: 392874
loss: 1.0129717588424683,grad_norm: 0.930074947403422, iteration: 392875
loss: 1.0117748975753784,grad_norm: 0.7853389827596128, iteration: 392876
loss: 1.0037713050842285,grad_norm: 0.7356345665296126, iteration: 392877
loss: 0.9617619514465332,grad_norm: 0.8543936870543306, iteration: 392878
loss: 0.946894645690918,grad_norm: 0.7708750303671531, iteration: 392879
loss: 0.9840049147605896,grad_norm: 0.8978320116118012, iteration: 392880
loss: 0.9996160864830017,grad_norm: 0.8010328770442583, iteration: 392881
loss: 1.0286946296691895,grad_norm: 0.7473103418753696, iteration: 392882
loss: 1.014076828956604,grad_norm: 0.9999990809291502, iteration: 392883
loss: 1.0297101736068726,grad_norm: 0.8129597759560401, iteration: 392884
loss: 1.067062497138977,grad_norm: 0.9740901513375982, iteration: 392885
loss: 0.9856305718421936,grad_norm: 0.7906451407669929, iteration: 392886
loss: 1.0336353778839111,grad_norm: 0.6841377511060063, iteration: 392887
loss: 1.0535037517547607,grad_norm: 0.9999998487502121, iteration: 392888
loss: 1.0071367025375366,grad_norm: 0.840263540029307, iteration: 392889
loss: 1.0140459537506104,grad_norm: 0.7574238959806996, iteration: 392890
loss: 1.0530718564987183,grad_norm: 1.0000001235289295, iteration: 392891
loss: 1.0126944780349731,grad_norm: 0.9698859805861133, iteration: 392892
loss: 0.9773994088172913,grad_norm: 0.8011447429834317, iteration: 392893
loss: 0.963128924369812,grad_norm: 0.7732451763668665, iteration: 392894
loss: 1.014449119567871,grad_norm: 0.7300562690510103, iteration: 392895
loss: 1.0433303117752075,grad_norm: 0.7202892534224883, iteration: 392896
loss: 1.0512661933898926,grad_norm: 0.9999997906080077, iteration: 392897
loss: 1.0420252084732056,grad_norm: 0.826684825229383, iteration: 392898
loss: 1.0139161348342896,grad_norm: 0.9999989517926279, iteration: 392899
loss: 0.9930375814437866,grad_norm: 0.6746760761097091, iteration: 392900
loss: 1.0082608461380005,grad_norm: 0.9999992828619618, iteration: 392901
loss: 1.008835792541504,grad_norm: 0.7528806978345349, iteration: 392902
loss: 1.0090014934539795,grad_norm: 0.9156986972135194, iteration: 392903
loss: 0.9920095205307007,grad_norm: 0.7473621407008029, iteration: 392904
loss: 1.0107342004776,grad_norm: 0.9872915038894884, iteration: 392905
loss: 0.9802560210227966,grad_norm: 0.7370059185314136, iteration: 392906
loss: 1.0434772968292236,grad_norm: 0.9999994660848222, iteration: 392907
loss: 1.0312747955322266,grad_norm: 0.9999995712106061, iteration: 392908
loss: 0.9632731080055237,grad_norm: 0.7196919163517641, iteration: 392909
loss: 0.9817090034484863,grad_norm: 0.7193221560331974, iteration: 392910
loss: 1.0883358716964722,grad_norm: 0.9262004316952634, iteration: 392911
loss: 0.9678698778152466,grad_norm: 0.8781134842223078, iteration: 392912
loss: 1.010481834411621,grad_norm: 0.87966067967617, iteration: 392913
loss: 1.0367426872253418,grad_norm: 0.9999995637094444, iteration: 392914
loss: 0.9694488048553467,grad_norm: 0.7446778059949285, iteration: 392915
loss: 0.9952182769775391,grad_norm: 0.6828096739764096, iteration: 392916
loss: 1.0987299680709839,grad_norm: 0.999999242194202, iteration: 392917
loss: 0.9943529963493347,grad_norm: 0.8161203346926582, iteration: 392918
loss: 1.023276686668396,grad_norm: 0.7889009433168951, iteration: 392919
loss: 1.0192338228225708,grad_norm: 0.8922476510057901, iteration: 392920
loss: 0.9994449019432068,grad_norm: 0.9999993260801262, iteration: 392921
loss: 1.0062096118927002,grad_norm: 0.90483863506843, iteration: 392922
loss: 1.0026310682296753,grad_norm: 0.8719300967787432, iteration: 392923
loss: 0.9877166748046875,grad_norm: 0.8030412179502928, iteration: 392924
loss: 1.0649094581604004,grad_norm: 0.9999996891219265, iteration: 392925
loss: 1.0185407400131226,grad_norm: 0.7298186557390635, iteration: 392926
loss: 1.0126019716262817,grad_norm: 0.8305551585796935, iteration: 392927
loss: 1.0092791318893433,grad_norm: 0.8813265978380231, iteration: 392928
loss: 1.011910080909729,grad_norm: 0.9999992996674248, iteration: 392929
loss: 0.9871935844421387,grad_norm: 0.9999993597459761, iteration: 392930
loss: 1.013368010520935,grad_norm: 0.7224233382209873, iteration: 392931
loss: 0.9250422716140747,grad_norm: 0.9424609098337714, iteration: 392932
loss: 0.9847472310066223,grad_norm: 0.7022150169371832, iteration: 392933
loss: 1.0829166173934937,grad_norm: 0.845078704244825, iteration: 392934
loss: 0.9970141053199768,grad_norm: 0.9428836894064753, iteration: 392935
loss: 0.9941107034683228,grad_norm: 0.7942470352108351, iteration: 392936
loss: 0.9915081262588501,grad_norm: 0.8679481490042558, iteration: 392937
loss: 1.027227759361267,grad_norm: 0.897617099611945, iteration: 392938
loss: 0.9595401287078857,grad_norm: 0.6930408960350382, iteration: 392939
loss: 0.9698754549026489,grad_norm: 0.9890915173309718, iteration: 392940
loss: 0.9978227615356445,grad_norm: 0.7516217207454426, iteration: 392941
loss: 1.0170141458511353,grad_norm: 0.7556641105417398, iteration: 392942
loss: 1.0108568668365479,grad_norm: 0.9762430978104903, iteration: 392943
loss: 1.0170774459838867,grad_norm: 0.7064145796056311, iteration: 392944
loss: 0.9798975586891174,grad_norm: 0.8034466221900732, iteration: 392945
loss: 1.0161725282669067,grad_norm: 0.7516680099281694, iteration: 392946
loss: 1.0193564891815186,grad_norm: 0.7185696794222033, iteration: 392947
loss: 1.0204896926879883,grad_norm: 0.7245252707659209, iteration: 392948
loss: 0.9929930567741394,grad_norm: 0.9194610721874062, iteration: 392949
loss: 1.0306543111801147,grad_norm: 0.8052366088877866, iteration: 392950
loss: 1.0743904113769531,grad_norm: 0.9875477143388294, iteration: 392951
loss: 0.9904246926307678,grad_norm: 0.7961915032841005, iteration: 392952
loss: 1.0738948583602905,grad_norm: 0.9100701821824113, iteration: 392953
loss: 0.9932565093040466,grad_norm: 0.9922028580076713, iteration: 392954
loss: 1.002892017364502,grad_norm: 0.8232727470854925, iteration: 392955
loss: 1.0000449419021606,grad_norm: 0.7467657241527125, iteration: 392956
loss: 1.0034269094467163,grad_norm: 0.7626157938089576, iteration: 392957
loss: 0.9741120338439941,grad_norm: 0.7621218691464621, iteration: 392958
loss: 0.9856176376342773,grad_norm: 0.7987079326872789, iteration: 392959
loss: 1.214977741241455,grad_norm: 0.9999996489728826, iteration: 392960
loss: 1.0029351711273193,grad_norm: 0.8711611719583382, iteration: 392961
loss: 0.9822384715080261,grad_norm: 0.7939560331529536, iteration: 392962
loss: 0.9932496547698975,grad_norm: 0.7892611196950549, iteration: 392963
loss: 1.0872135162353516,grad_norm: 0.9610252047083102, iteration: 392964
loss: 1.0287307500839233,grad_norm: 0.9999993838074024, iteration: 392965
loss: 0.9778283834457397,grad_norm: 0.8823927238202319, iteration: 392966
loss: 0.9527910947799683,grad_norm: 0.7595816860469102, iteration: 392967
loss: 1.0048547983169556,grad_norm: 0.826298246160116, iteration: 392968
loss: 1.022567629814148,grad_norm: 0.844978663170066, iteration: 392969
loss: 0.9847666025161743,grad_norm: 0.7326596067656699, iteration: 392970
loss: 1.0033295154571533,grad_norm: 0.7975443615109518, iteration: 392971
loss: 1.001253604888916,grad_norm: 0.7385997835454763, iteration: 392972
loss: 1.0222231149673462,grad_norm: 0.9999996564061535, iteration: 392973
loss: 1.011650562286377,grad_norm: 0.7493940744584133, iteration: 392974
loss: 0.987168550491333,grad_norm: 0.6737869736245337, iteration: 392975
loss: 1.0109894275665283,grad_norm: 0.7366615246081887, iteration: 392976
loss: 0.9795215129852295,grad_norm: 0.9126850306895269, iteration: 392977
loss: 0.9772039651870728,grad_norm: 0.7151414611427989, iteration: 392978
loss: 1.2498779296875,grad_norm: 0.9999999616801016, iteration: 392979
loss: 1.075332522392273,grad_norm: 0.9569452910949793, iteration: 392980
loss: 1.0474188327789307,grad_norm: 0.9536839213299796, iteration: 392981
loss: 0.9971104860305786,grad_norm: 0.9759398108174262, iteration: 392982
loss: 1.0293924808502197,grad_norm: 0.7655130116555592, iteration: 392983
loss: 0.9664372801780701,grad_norm: 0.8079658864872916, iteration: 392984
loss: 0.9810487627983093,grad_norm: 0.8567630805423088, iteration: 392985
loss: 1.0353738069534302,grad_norm: 0.9999996642585086, iteration: 392986
loss: 1.011183261871338,grad_norm: 0.9133523510425366, iteration: 392987
loss: 0.9655519127845764,grad_norm: 0.6761803626572357, iteration: 392988
loss: 0.9815404415130615,grad_norm: 0.678639180600123, iteration: 392989
loss: 1.050568699836731,grad_norm: 0.9999994894875471, iteration: 392990
loss: 1.0045171976089478,grad_norm: 0.8061467436468793, iteration: 392991
loss: 1.0108124017715454,grad_norm: 0.8868696578214571, iteration: 392992
loss: 0.9809696078300476,grad_norm: 0.8626221132821768, iteration: 392993
loss: 1.002407431602478,grad_norm: 0.7326257724431116, iteration: 392994
loss: 1.0488003492355347,grad_norm: 0.8786802208112394, iteration: 392995
loss: 1.0098531246185303,grad_norm: 0.7083178861724013, iteration: 392996
loss: 1.0241081714630127,grad_norm: 0.8834199951229397, iteration: 392997
loss: 1.0345120429992676,grad_norm: 0.7389316555247596, iteration: 392998
loss: 0.996044397354126,grad_norm: 0.7380085983927935, iteration: 392999
loss: 0.9402704238891602,grad_norm: 0.769913620118069, iteration: 393000
loss: 1.03465735912323,grad_norm: 0.9557183706349004, iteration: 393001
loss: 0.9815019965171814,grad_norm: 0.7322616373027407, iteration: 393002
loss: 0.9851363301277161,grad_norm: 0.8672464658050757, iteration: 393003
loss: 1.0303475856781006,grad_norm: 0.7451284364755506, iteration: 393004
loss: 0.9966793656349182,grad_norm: 0.6665081631117592, iteration: 393005
loss: 0.9453686475753784,grad_norm: 0.7664939263301007, iteration: 393006
loss: 1.0368775129318237,grad_norm: 0.7636119099369479, iteration: 393007
loss: 1.023292899131775,grad_norm: 0.999999545966025, iteration: 393008
loss: 1.0625790357589722,grad_norm: 0.7472232255397376, iteration: 393009
loss: 1.0031818151474,grad_norm: 0.7864830684621805, iteration: 393010
loss: 0.9926174283027649,grad_norm: 0.9999991056564795, iteration: 393011
loss: 0.9727134108543396,grad_norm: 0.669517819471981, iteration: 393012
loss: 1.04855477809906,grad_norm: 0.8836174061518898, iteration: 393013
loss: 1.0022798776626587,grad_norm: 0.6384753253143028, iteration: 393014
loss: 1.0359777212142944,grad_norm: 0.9012830272362009, iteration: 393015
loss: 1.0152899026870728,grad_norm: 0.7237627448288012, iteration: 393016
loss: 1.2449790239334106,grad_norm: 0.99999973459541, iteration: 393017
loss: 0.9802670478820801,grad_norm: 0.730702138958732, iteration: 393018
loss: 0.95481276512146,grad_norm: 0.9999991472870097, iteration: 393019
loss: 1.0166856050491333,grad_norm: 0.7875286943658517, iteration: 393020
loss: 0.9993853569030762,grad_norm: 0.6631218019636471, iteration: 393021
loss: 0.9783512353897095,grad_norm: 0.7306044213822701, iteration: 393022
loss: 0.9724894165992737,grad_norm: 0.8935234833391181, iteration: 393023
loss: 1.0584347248077393,grad_norm: 0.8936457236439461, iteration: 393024
loss: 1.0250309705734253,grad_norm: 0.7864486458424486, iteration: 393025
loss: 1.0135053396224976,grad_norm: 0.7194308295840864, iteration: 393026
loss: 1.012381911277771,grad_norm: 0.8137618556021363, iteration: 393027
loss: 0.9858563542366028,grad_norm: 0.9638098074217906, iteration: 393028
loss: 0.9883253574371338,grad_norm: 0.7671972235077767, iteration: 393029
loss: 1.0269057750701904,grad_norm: 0.7402258907884613, iteration: 393030
loss: 0.9864941835403442,grad_norm: 0.8179373530812856, iteration: 393031
loss: 0.9890516400337219,grad_norm: 0.7786548444768226, iteration: 393032
loss: 1.0006181001663208,grad_norm: 0.8573558880154563, iteration: 393033
loss: 1.012160062789917,grad_norm: 0.7861163873531425, iteration: 393034
loss: 0.9968008399009705,grad_norm: 0.8254382210880961, iteration: 393035
loss: 0.9992827773094177,grad_norm: 0.9999991675407487, iteration: 393036
loss: 1.015625238418579,grad_norm: 0.8884498860909334, iteration: 393037
loss: 0.9607003331184387,grad_norm: 0.695556335874752, iteration: 393038
loss: 1.0128288269042969,grad_norm: 0.8193201891873975, iteration: 393039
loss: 1.0134775638580322,grad_norm: 0.8843092110652236, iteration: 393040
loss: 0.975145161151886,grad_norm: 0.7131079624495404, iteration: 393041
loss: 0.9732415676116943,grad_norm: 0.6753056352445611, iteration: 393042
loss: 0.9821307063102722,grad_norm: 0.8108917199428551, iteration: 393043
loss: 1.0260802507400513,grad_norm: 0.8930534827436208, iteration: 393044
loss: 1.0116312503814697,grad_norm: 0.9999992470563842, iteration: 393045
loss: 0.9752125144004822,grad_norm: 0.9999992114822384, iteration: 393046
loss: 1.0231564044952393,grad_norm: 0.7136427211307761, iteration: 393047
loss: 0.9593672752380371,grad_norm: 0.9140447420550086, iteration: 393048
loss: 1.0397124290466309,grad_norm: 0.7465132660471255, iteration: 393049
loss: 0.9935065507888794,grad_norm: 0.6882570636402278, iteration: 393050
loss: 1.0869776010513306,grad_norm: 0.784061924585724, iteration: 393051
loss: 1.028073787689209,grad_norm: 0.7532491806232279, iteration: 393052
loss: 0.9807450771331787,grad_norm: 0.7845510868490633, iteration: 393053
loss: 1.0064539909362793,grad_norm: 0.7752448965208547, iteration: 393054
loss: 0.9971078038215637,grad_norm: 0.888424986277973, iteration: 393055
loss: 1.000522494316101,grad_norm: 0.8263184611687023, iteration: 393056
loss: 1.0157842636108398,grad_norm: 0.8750644778014426, iteration: 393057
loss: 1.0056116580963135,grad_norm: 0.8758053397296576, iteration: 393058
loss: 1.020022988319397,grad_norm: 0.9999990576649641, iteration: 393059
loss: 1.0146889686584473,grad_norm: 0.6876651852592047, iteration: 393060
loss: 0.9915853142738342,grad_norm: 0.700041760839566, iteration: 393061
loss: 1.0006787776947021,grad_norm: 0.9079532199429575, iteration: 393062
loss: 1.0230059623718262,grad_norm: 0.9999996689029186, iteration: 393063
loss: 0.9811979532241821,grad_norm: 0.7228168630935738, iteration: 393064
loss: 0.9902761578559875,grad_norm: 0.924554257626015, iteration: 393065
loss: 0.9803786277770996,grad_norm: 0.7436414177185035, iteration: 393066
loss: 1.0452312231063843,grad_norm: 0.9999997778707969, iteration: 393067
loss: 1.0044959783554077,grad_norm: 0.8546950296415565, iteration: 393068
loss: 0.9769086241722107,grad_norm: 0.736145322917261, iteration: 393069
loss: 1.0335373878479004,grad_norm: 0.9999989918518163, iteration: 393070
loss: 1.006063461303711,grad_norm: 0.6958738894415476, iteration: 393071
loss: 0.9678670763969421,grad_norm: 0.9999991541471511, iteration: 393072
loss: 0.9886294603347778,grad_norm: 0.8178187131980331, iteration: 393073
loss: 0.9852038025856018,grad_norm: 0.8518041736329901, iteration: 393074
loss: 1.027662992477417,grad_norm: 0.714006602126814, iteration: 393075
loss: 0.994498610496521,grad_norm: 0.7458657044198338, iteration: 393076
loss: 0.9879531860351562,grad_norm: 0.6651174103907046, iteration: 393077
loss: 0.9847778081893921,grad_norm: 0.7434242228311377, iteration: 393078
loss: 0.9983727931976318,grad_norm: 0.7194785526843777, iteration: 393079
loss: 1.0085728168487549,grad_norm: 0.8036294223030573, iteration: 393080
loss: 0.97569739818573,grad_norm: 0.936380515634771, iteration: 393081
loss: 1.0731345415115356,grad_norm: 0.9999998992762524, iteration: 393082
loss: 0.9912564158439636,grad_norm: 0.8811287847264003, iteration: 393083
loss: 0.9850786328315735,grad_norm: 0.9510709082078496, iteration: 393084
loss: 1.031961441040039,grad_norm: 0.7392611470522639, iteration: 393085
loss: 1.0233814716339111,grad_norm: 1.0000000140773382, iteration: 393086
loss: 1.0967817306518555,grad_norm: 0.9999999656202582, iteration: 393087
loss: 0.9843475818634033,grad_norm: 0.9743191981092422, iteration: 393088
loss: 0.9731292724609375,grad_norm: 0.9999990272047802, iteration: 393089
loss: 0.9693559408187866,grad_norm: 0.6760109058976741, iteration: 393090
loss: 1.0050771236419678,grad_norm: 0.8508083801871861, iteration: 393091
loss: 0.9920722842216492,grad_norm: 0.7889339978514361, iteration: 393092
loss: 1.0173336267471313,grad_norm: 0.7855441068311629, iteration: 393093
loss: 1.0021475553512573,grad_norm: 0.7843999217522145, iteration: 393094
loss: 1.0017911195755005,grad_norm: 0.7492072181578809, iteration: 393095
loss: 1.0119324922561646,grad_norm: 0.844014282669215, iteration: 393096
loss: 1.020932674407959,grad_norm: 0.9223664796482695, iteration: 393097
loss: 1.0712238550186157,grad_norm: 0.9999998101032319, iteration: 393098
loss: 1.0148487091064453,grad_norm: 0.9680306245152217, iteration: 393099
loss: 0.9684109687805176,grad_norm: 0.8906784466968205, iteration: 393100
loss: 1.2785338163375854,grad_norm: 0.99999967175171, iteration: 393101
loss: 1.0046701431274414,grad_norm: 0.8659178798388985, iteration: 393102
loss: 1.0129305124282837,grad_norm: 0.7209009255801618, iteration: 393103
loss: 0.9650758504867554,grad_norm: 0.7532051853747629, iteration: 393104
loss: 1.0090007781982422,grad_norm: 0.7881590206322735, iteration: 393105
loss: 1.02028489112854,grad_norm: 0.9080970513058488, iteration: 393106
loss: 0.9637551307678223,grad_norm: 0.9754145875088452, iteration: 393107
loss: 1.0085047483444214,grad_norm: 0.6833233983751297, iteration: 393108
loss: 0.9799878597259521,grad_norm: 0.6461081659305246, iteration: 393109
loss: 1.0031814575195312,grad_norm: 0.72716855067813, iteration: 393110
loss: 1.0058715343475342,grad_norm: 0.9999992523854976, iteration: 393111
loss: 0.9969505667686462,grad_norm: 0.794250643378856, iteration: 393112
loss: 1.0118063688278198,grad_norm: 0.893961796230516, iteration: 393113
loss: 1.0190885066986084,grad_norm: 0.8833048673645508, iteration: 393114
loss: 0.9885165691375732,grad_norm: 0.9323183821680456, iteration: 393115
loss: 0.991540253162384,grad_norm: 0.7148797103513731, iteration: 393116
loss: 1.057185173034668,grad_norm: 0.9999997244004664, iteration: 393117
loss: 1.0231305360794067,grad_norm: 0.793380432247668, iteration: 393118
loss: 0.9911889433860779,grad_norm: 0.8377706985868911, iteration: 393119
loss: 1.0285972356796265,grad_norm: 0.8256426970254367, iteration: 393120
loss: 1.31252121925354,grad_norm: 0.9999997198365964, iteration: 393121
loss: 1.0120381116867065,grad_norm: 0.8218028891792688, iteration: 393122
loss: 1.000386118888855,grad_norm: 0.8916497807810848, iteration: 393123
loss: 0.9979419708251953,grad_norm: 0.9999991262268828, iteration: 393124
loss: 0.9813606142997742,grad_norm: 0.9612532712372382, iteration: 393125
loss: 0.9774112105369568,grad_norm: 0.8885042048844608, iteration: 393126
loss: 0.9829965829849243,grad_norm: 0.7721035371451868, iteration: 393127
loss: 1.0573910474777222,grad_norm: 0.9539982578423124, iteration: 393128
loss: 0.9550597071647644,grad_norm: 0.9072042811847655, iteration: 393129
loss: 0.9712182879447937,grad_norm: 0.829235582182391, iteration: 393130
loss: 0.9843180179595947,grad_norm: 0.8668167691379851, iteration: 393131
loss: 0.9629160165786743,grad_norm: 0.7236652073267393, iteration: 393132
loss: 0.9853103756904602,grad_norm: 0.8277232073646141, iteration: 393133
loss: 0.995810866355896,grad_norm: 0.7386114027792483, iteration: 393134
loss: 0.9887301325798035,grad_norm: 0.690199691500142, iteration: 393135
loss: 1.003699541091919,grad_norm: 0.8726660782425602, iteration: 393136
loss: 0.9750949740409851,grad_norm: 0.717375512223902, iteration: 393137
loss: 0.9814680218696594,grad_norm: 0.9052799220086569, iteration: 393138
loss: 1.0290073156356812,grad_norm: 0.9999998944680264, iteration: 393139
loss: 0.993901789188385,grad_norm: 0.8396186696828571, iteration: 393140
loss: 1.0030779838562012,grad_norm: 0.9201092050349143, iteration: 393141
loss: 1.0690014362335205,grad_norm: 0.8609890349316796, iteration: 393142
loss: 1.0133192539215088,grad_norm: 0.9999990583634659, iteration: 393143
loss: 1.0157748460769653,grad_norm: 0.9999995675406222, iteration: 393144
loss: 0.987637460231781,grad_norm: 0.7661515298989215, iteration: 393145
loss: 1.0315446853637695,grad_norm: 0.9262591955247629, iteration: 393146
loss: 1.0171828269958496,grad_norm: 0.8836327020684849, iteration: 393147
loss: 0.988167941570282,grad_norm: 0.8222744747103261, iteration: 393148
loss: 1.0680041313171387,grad_norm: 0.7352525698873731, iteration: 393149
loss: 1.0293904542922974,grad_norm: 0.8908606601522089, iteration: 393150
loss: 0.9806720614433289,grad_norm: 0.7336557562831605, iteration: 393151
loss: 0.9842227101325989,grad_norm: 0.6919011426750847, iteration: 393152
loss: 0.9876573085784912,grad_norm: 0.7432443983990359, iteration: 393153
loss: 1.2694514989852905,grad_norm: 0.9999998215261727, iteration: 393154
loss: 1.0120294094085693,grad_norm: 0.7957978087575789, iteration: 393155
loss: 1.0014550685882568,grad_norm: 0.737622794708721, iteration: 393156
loss: 1.0257172584533691,grad_norm: 0.9403905771717613, iteration: 393157
loss: 0.9744313955307007,grad_norm: 0.783441277058769, iteration: 393158
loss: 1.0183557271957397,grad_norm: 0.9999998649082497, iteration: 393159
loss: 1.0021183490753174,grad_norm: 0.7162743957612006, iteration: 393160
loss: 0.9882286787033081,grad_norm: 0.7761106369138151, iteration: 393161
loss: 0.972443699836731,grad_norm: 0.7479542112806905, iteration: 393162
loss: 0.9749541282653809,grad_norm: 0.7505717606851736, iteration: 393163
loss: 1.029966950416565,grad_norm: 0.9999991900282091, iteration: 393164
loss: 1.0086075067520142,grad_norm: 0.8084099847544245, iteration: 393165
loss: 1.0044115781784058,grad_norm: 0.757503699270197, iteration: 393166
loss: 1.0207242965698242,grad_norm: 0.9999995217481688, iteration: 393167
loss: 0.9930114150047302,grad_norm: 0.8185491018570616, iteration: 393168
loss: 1.0167893171310425,grad_norm: 0.7895721818089021, iteration: 393169
loss: 1.0132132768630981,grad_norm: 0.8169329203651042, iteration: 393170
loss: 1.00209379196167,grad_norm: 0.8638019761994346, iteration: 393171
loss: 1.0053743124008179,grad_norm: 0.9030995932650545, iteration: 393172
loss: 1.0244625806808472,grad_norm: 0.8338972462883082, iteration: 393173
loss: 1.1916064023971558,grad_norm: 0.9999995393534393, iteration: 393174
loss: 1.0150662660598755,grad_norm: 0.999999450065514, iteration: 393175
loss: 1.1624000072479248,grad_norm: 0.9999997411473507, iteration: 393176
loss: 0.9763400554656982,grad_norm: 0.8021912765310991, iteration: 393177
loss: 1.1052039861679077,grad_norm: 0.9999998830448473, iteration: 393178
loss: 1.264523983001709,grad_norm: 0.9999997685328891, iteration: 393179
loss: 1.0179619789123535,grad_norm: 0.8962564414347965, iteration: 393180
loss: 1.1212706565856934,grad_norm: 0.9999992019741142, iteration: 393181
loss: 0.9868454933166504,grad_norm: 0.7456068187012095, iteration: 393182
loss: 0.9744501709938049,grad_norm: 0.9665882235146199, iteration: 393183
loss: 1.0151395797729492,grad_norm: 0.7314281849465673, iteration: 393184
loss: 1.000138282775879,grad_norm: 0.6979670340255063, iteration: 393185
loss: 1.1659801006317139,grad_norm: 0.9999996869932937, iteration: 393186
loss: 1.0135776996612549,grad_norm: 0.737583003501293, iteration: 393187
loss: 1.1378086805343628,grad_norm: 0.9999995361064211, iteration: 393188
loss: 0.9794595837593079,grad_norm: 0.9999990319244626, iteration: 393189
loss: 1.001636028289795,grad_norm: 0.9999993371889028, iteration: 393190
loss: 1.1014509201049805,grad_norm: 0.9999996867573633, iteration: 393191
loss: 1.0789082050323486,grad_norm: 0.9999998604772742, iteration: 393192
loss: 1.0026509761810303,grad_norm: 0.7879604640269169, iteration: 393193
loss: 0.9843277931213379,grad_norm: 0.7524146286579825, iteration: 393194
loss: 0.9914222359657288,grad_norm: 0.8170915973517622, iteration: 393195
loss: 1.1420519351959229,grad_norm: 0.9999991504777069, iteration: 393196
loss: 1.1171319484710693,grad_norm: 0.9999999213888208, iteration: 393197
loss: 1.0220457315444946,grad_norm: 0.791227547834966, iteration: 393198
loss: 1.0528695583343506,grad_norm: 0.6967107288828245, iteration: 393199
loss: 0.9979521036148071,grad_norm: 0.792025122228558, iteration: 393200
loss: 1.003505825996399,grad_norm: 0.9999993062392549, iteration: 393201
loss: 0.9817448854446411,grad_norm: 0.744371538912604, iteration: 393202
loss: 1.0137306451797485,grad_norm: 0.999999044745474, iteration: 393203
loss: 0.9814925193786621,grad_norm: 0.7111451374073634, iteration: 393204
loss: 1.00847589969635,grad_norm: 0.865952729510482, iteration: 393205
loss: 1.030684232711792,grad_norm: 0.807993621498403, iteration: 393206
loss: 1.009279489517212,grad_norm: 0.7264938026222951, iteration: 393207
loss: 0.9787375330924988,grad_norm: 0.8431467306342655, iteration: 393208
loss: 0.9802820682525635,grad_norm: 0.7148887853816411, iteration: 393209
loss: 0.9739444851875305,grad_norm: 0.846246440320022, iteration: 393210
loss: 0.9630788564682007,grad_norm: 0.9384607235747154, iteration: 393211
loss: 0.959227979183197,grad_norm: 0.8919371948515029, iteration: 393212
loss: 1.0809576511383057,grad_norm: 0.920727517746861, iteration: 393213
loss: 1.3652253150939941,grad_norm: 0.9999996963311424, iteration: 393214
loss: 1.1420446634292603,grad_norm: 0.9999997663975739, iteration: 393215
loss: 0.9977429509162903,grad_norm: 0.7583834825859876, iteration: 393216
loss: 1.014244794845581,grad_norm: 0.8226167318991159, iteration: 393217
loss: 0.9944034814834595,grad_norm: 0.728408526499759, iteration: 393218
loss: 1.0022354125976562,grad_norm: 0.8160100574852741, iteration: 393219
loss: 1.0462875366210938,grad_norm: 0.9999994296475735, iteration: 393220
loss: 0.9903447031974792,grad_norm: 0.8434515793447901, iteration: 393221
loss: 0.9978609681129456,grad_norm: 0.7050730505680374, iteration: 393222
loss: 0.9770652651786804,grad_norm: 0.7477440452422355, iteration: 393223
loss: 1.0180273056030273,grad_norm: 0.8395856996256119, iteration: 393224
loss: 0.9620203971862793,grad_norm: 0.7825709666202413, iteration: 393225
loss: 0.9877163767814636,grad_norm: 0.9648867458061952, iteration: 393226
loss: 1.016569972038269,grad_norm: 0.7731316477784439, iteration: 393227
loss: 0.9992837905883789,grad_norm: 0.8716992707578277, iteration: 393228
loss: 0.9538347721099854,grad_norm: 0.999998999306442, iteration: 393229
loss: 1.1029504537582397,grad_norm: 0.9999997388146674, iteration: 393230
loss: 1.0235320329666138,grad_norm: 0.7713950214085321, iteration: 393231
loss: 0.9887499809265137,grad_norm: 0.834552832152951, iteration: 393232
loss: 0.9889704585075378,grad_norm: 0.761814347727211, iteration: 393233
loss: 0.9835444688796997,grad_norm: 0.8542214051792417, iteration: 393234
loss: 0.9424115419387817,grad_norm: 0.9911542568719242, iteration: 393235
loss: 0.9952706098556519,grad_norm: 0.7012179090655813, iteration: 393236
loss: 0.9986816644668579,grad_norm: 0.8076537425883237, iteration: 393237
loss: 1.0333914756774902,grad_norm: 0.9999998175113151, iteration: 393238
loss: 0.9931370615959167,grad_norm: 0.767618813868246, iteration: 393239
loss: 1.050296664237976,grad_norm: 0.9999993730034982, iteration: 393240
loss: 1.0083211660385132,grad_norm: 0.776061068586688, iteration: 393241
loss: 1.0006846189498901,grad_norm: 0.7892804763649166, iteration: 393242
loss: 1.0729600191116333,grad_norm: 0.7325010606371989, iteration: 393243
loss: 0.9916923642158508,grad_norm: 0.7303191926303425, iteration: 393244
loss: 0.9908085465431213,grad_norm: 0.7326233220708432, iteration: 393245
loss: 1.0064582824707031,grad_norm: 0.7263956906751745, iteration: 393246
loss: 0.9976658225059509,grad_norm: 0.9761980043471246, iteration: 393247
loss: 1.006002426147461,grad_norm: 0.8297201473888074, iteration: 393248
loss: 1.0383849143981934,grad_norm: 0.9999991402271268, iteration: 393249
loss: 1.0395511388778687,grad_norm: 0.7997881142183907, iteration: 393250
loss: 1.0313328504562378,grad_norm: 0.7979412771091238, iteration: 393251
loss: 1.039106845855713,grad_norm: 0.9999998485188639, iteration: 393252
loss: 0.9716446995735168,grad_norm: 0.8416370412266102, iteration: 393253
loss: 0.9975868463516235,grad_norm: 0.7249000515709817, iteration: 393254
loss: 1.0204991102218628,grad_norm: 0.9691376223342134, iteration: 393255
loss: 1.0304375886917114,grad_norm: 0.8174205639014885, iteration: 393256
loss: 1.0079436302185059,grad_norm: 0.8800022533388784, iteration: 393257
loss: 0.9746764898300171,grad_norm: 0.6596845287128216, iteration: 393258
loss: 0.9604009389877319,grad_norm: 0.6980894459690876, iteration: 393259
loss: 0.9680083990097046,grad_norm: 0.665766348339515, iteration: 393260
loss: 1.0115047693252563,grad_norm: 0.8778962912961527, iteration: 393261
loss: 1.019739031791687,grad_norm: 0.9742425886890584, iteration: 393262
loss: 1.028725028038025,grad_norm: 0.9999998838447598, iteration: 393263
loss: 0.9834703803062439,grad_norm: 0.7534967926786196, iteration: 393264
loss: 0.9910904169082642,grad_norm: 0.7291245863435285, iteration: 393265
loss: 0.9921977519989014,grad_norm: 0.8709652367910937, iteration: 393266
loss: 0.9732368588447571,grad_norm: 0.8600937655362128, iteration: 393267
loss: 1.0333844423294067,grad_norm: 0.6919464549549196, iteration: 393268
loss: 0.9938195943832397,grad_norm: 0.9999993437589003, iteration: 393269
loss: 1.0127733945846558,grad_norm: 0.7159328438526006, iteration: 393270
loss: 1.0248769521713257,grad_norm: 0.9444958847548947, iteration: 393271
loss: 0.985880970954895,grad_norm: 0.6520343691890498, iteration: 393272
loss: 0.9991500973701477,grad_norm: 1.00000002916756, iteration: 393273
loss: 1.1586707830429077,grad_norm: 0.9999992069906709, iteration: 393274
loss: 1.0235826969146729,grad_norm: 0.9999991071417896, iteration: 393275
loss: 1.0546174049377441,grad_norm: 0.9999991447951199, iteration: 393276
loss: 0.9886528253555298,grad_norm: 0.9962333208998931, iteration: 393277
loss: 1.046256422996521,grad_norm: 0.7926766637788234, iteration: 393278
loss: 1.043448805809021,grad_norm: 0.9999993368652182, iteration: 393279
loss: 0.9940432906150818,grad_norm: 0.999999671742002, iteration: 393280
loss: 1.0225063562393188,grad_norm: 0.6801849370108868, iteration: 393281
loss: 1.0077682733535767,grad_norm: 0.8777596973733304, iteration: 393282
loss: 0.9783356785774231,grad_norm: 0.8721434202040851, iteration: 393283
loss: 0.9791713356971741,grad_norm: 0.7250493916510024, iteration: 393284
loss: 0.9875202178955078,grad_norm: 0.7189620526972366, iteration: 393285
loss: 1.0092930793762207,grad_norm: 0.7942273595573104, iteration: 393286
loss: 0.9889295101165771,grad_norm: 0.9999992534356793, iteration: 393287
loss: 0.960428774356842,grad_norm: 0.6683241779279847, iteration: 393288
loss: 1.0369153022766113,grad_norm: 0.9999999053501525, iteration: 393289
loss: 1.037607192993164,grad_norm: 0.7663240094777014, iteration: 393290
loss: 1.0129348039627075,grad_norm: 0.7565065427015317, iteration: 393291
loss: 0.9575352668762207,grad_norm: 0.6701382074141159, iteration: 393292
loss: 0.9914340376853943,grad_norm: 0.999999159866844, iteration: 393293
loss: 0.9912335872650146,grad_norm: 0.77599044227027, iteration: 393294
loss: 1.010020136833191,grad_norm: 0.8986371876967563, iteration: 393295
loss: 0.9969345331192017,grad_norm: 0.7615111742115007, iteration: 393296
loss: 0.9791057705879211,grad_norm: 0.9999991380516392, iteration: 393297
loss: 1.0797879695892334,grad_norm: 0.7787504306368349, iteration: 393298
loss: 0.9798415303230286,grad_norm: 0.7521172938208133, iteration: 393299
loss: 1.001096248626709,grad_norm: 0.844647680805297, iteration: 393300
loss: 1.0009570121765137,grad_norm: 0.9999992404707656, iteration: 393301
loss: 0.9934611916542053,grad_norm: 0.860458958094335, iteration: 393302
loss: 1.0113035440444946,grad_norm: 0.9936186121246526, iteration: 393303
loss: 1.0453238487243652,grad_norm: 0.7825921079662582, iteration: 393304
loss: 0.9826383590698242,grad_norm: 0.6883362754154178, iteration: 393305
loss: 0.9668664932250977,grad_norm: 0.7449988704548316, iteration: 393306
loss: 1.03119695186615,grad_norm: 0.9999999334925125, iteration: 393307
loss: 0.9880268573760986,grad_norm: 0.7179815529653248, iteration: 393308
loss: 1.0213507413864136,grad_norm: 0.8129101666290441, iteration: 393309
loss: 1.0011247396469116,grad_norm: 0.7612998991957498, iteration: 393310
loss: 0.9974532127380371,grad_norm: 0.7770712298548232, iteration: 393311
loss: 1.1761361360549927,grad_norm: 0.9999996806122007, iteration: 393312
loss: 0.9775995016098022,grad_norm: 0.7575748828772942, iteration: 393313
loss: 1.0176384449005127,grad_norm: 0.9942532020417093, iteration: 393314
loss: 0.973943293094635,grad_norm: 0.7734286389058995, iteration: 393315
loss: 1.0077574253082275,grad_norm: 0.9999992140764228, iteration: 393316
loss: 1.0477198362350464,grad_norm: 0.9999990867214776, iteration: 393317
loss: 1.01811945438385,grad_norm: 0.9316776804771637, iteration: 393318
loss: 0.9918303489685059,grad_norm: 0.6918567380236286, iteration: 393319
loss: 1.050102710723877,grad_norm: 0.7506534080519717, iteration: 393320
loss: 0.9915367960929871,grad_norm: 0.7915105637025562, iteration: 393321
loss: 1.0017273426055908,grad_norm: 0.9999991047260456, iteration: 393322
loss: 1.0150086879730225,grad_norm: 0.7959651057880017, iteration: 393323
loss: 0.9791620969772339,grad_norm: 0.7431422973194943, iteration: 393324
loss: 1.1555445194244385,grad_norm: 0.9999994391503487, iteration: 393325
loss: 1.0249148607254028,grad_norm: 0.9999996281849544, iteration: 393326
loss: 0.9800916314125061,grad_norm: 0.7862377888923077, iteration: 393327
loss: 0.987659215927124,grad_norm: 0.9999990291335676, iteration: 393328
loss: 1.0300413370132446,grad_norm: 0.6754681969465527, iteration: 393329
loss: 1.0118540525436401,grad_norm: 0.8152097334409005, iteration: 393330
loss: 0.9919764399528503,grad_norm: 0.7665611557856776, iteration: 393331
loss: 1.0882312059402466,grad_norm: 0.999999131152426, iteration: 393332
loss: 1.0736156702041626,grad_norm: 0.9999998740756342, iteration: 393333
loss: 1.0387002229690552,grad_norm: 0.8402397904009148, iteration: 393334
loss: 0.9982219338417053,grad_norm: 0.7703703315613423, iteration: 393335
loss: 1.038625955581665,grad_norm: 0.9999997516235456, iteration: 393336
loss: 0.9750058054924011,grad_norm: 0.9199807308100618, iteration: 393337
loss: 1.0004463195800781,grad_norm: 0.7537440509928678, iteration: 393338
loss: 1.041783332824707,grad_norm: 0.9999991099724457, iteration: 393339
loss: 0.9801421761512756,grad_norm: 0.8447982463741078, iteration: 393340
loss: 1.0050994157791138,grad_norm: 0.9726716227594994, iteration: 393341
loss: 1.0038481950759888,grad_norm: 0.7441755219253533, iteration: 393342
loss: 1.0292413234710693,grad_norm: 0.7913684880421419, iteration: 393343
loss: 1.0956387519836426,grad_norm: 0.9999997028110081, iteration: 393344
loss: 1.0056108236312866,grad_norm: 0.9999993859084566, iteration: 393345
loss: 0.9487875699996948,grad_norm: 0.8187145538806975, iteration: 393346
loss: 1.00980544090271,grad_norm: 0.7341704470002844, iteration: 393347
loss: 1.018415093421936,grad_norm: 0.7777452732254776, iteration: 393348
loss: 0.9890896677970886,grad_norm: 0.8880118243538804, iteration: 393349
loss: 1.008534550666809,grad_norm: 0.7370718285509456, iteration: 393350
loss: 1.0138500928878784,grad_norm: 0.8224371837428343, iteration: 393351
loss: 1.0583094358444214,grad_norm: 0.9999994535015799, iteration: 393352
loss: 0.9816726446151733,grad_norm: 0.7059126472041151, iteration: 393353
loss: 1.025839924812317,grad_norm: 0.7959060519029524, iteration: 393354
loss: 1.0026806592941284,grad_norm: 0.6552825324067711, iteration: 393355
loss: 1.0673447847366333,grad_norm: 0.9999992314452921, iteration: 393356
loss: 1.0104269981384277,grad_norm: 0.8037920925348181, iteration: 393357
loss: 0.9847730398178101,grad_norm: 0.8672126473588868, iteration: 393358
loss: 1.0120071172714233,grad_norm: 0.8572913154245828, iteration: 393359
loss: 1.012491226196289,grad_norm: 0.7804558634745732, iteration: 393360
loss: 1.0137708187103271,grad_norm: 0.9144757471640378, iteration: 393361
loss: 1.0383515357971191,grad_norm: 0.8009826632366149, iteration: 393362
loss: 0.9674919843673706,grad_norm: 0.8294874645883126, iteration: 393363
loss: 0.9512784481048584,grad_norm: 0.8303111905669125, iteration: 393364
loss: 0.9998095035552979,grad_norm: 0.6881305455319691, iteration: 393365
loss: 0.9901597499847412,grad_norm: 0.9599127566182357, iteration: 393366
loss: 1.059952735900879,grad_norm: 0.9999991340819192, iteration: 393367
loss: 1.0127527713775635,grad_norm: 0.715638148541263, iteration: 393368
loss: 1.0326356887817383,grad_norm: 0.8610590444648697, iteration: 393369
loss: 1.0006827116012573,grad_norm: 0.9999990909028738, iteration: 393370
loss: 1.0226353406906128,grad_norm: 0.725393775753329, iteration: 393371
loss: 0.9875754117965698,grad_norm: 0.7929229433748464, iteration: 393372
loss: 0.9904525876045227,grad_norm: 0.8082097273535152, iteration: 393373
loss: 0.9902369379997253,grad_norm: 0.7109317223805544, iteration: 393374
loss: 1.0007338523864746,grad_norm: 0.9331519974855114, iteration: 393375
loss: 1.0261571407318115,grad_norm: 0.7920639254628642, iteration: 393376
loss: 0.9728793501853943,grad_norm: 0.8329637828695179, iteration: 393377
loss: 1.0264183282852173,grad_norm: 0.6388666989750732, iteration: 393378
loss: 0.9828535318374634,grad_norm: 0.9155432205913778, iteration: 393379
loss: 1.141640067100525,grad_norm: 0.9999998692175223, iteration: 393380
loss: 1.0083485841751099,grad_norm: 0.6308662332484624, iteration: 393381
loss: 0.9712574481964111,grad_norm: 0.729191146494084, iteration: 393382
loss: 0.9747127890586853,grad_norm: 0.7978393924291762, iteration: 393383
loss: 0.9775644540786743,grad_norm: 0.779793914455554, iteration: 393384
loss: 1.0108954906463623,grad_norm: 0.6952468078522823, iteration: 393385
loss: 0.9774243235588074,grad_norm: 0.7877846853761736, iteration: 393386
loss: 0.997933566570282,grad_norm: 0.9440164865411027, iteration: 393387
loss: 1.0154194831848145,grad_norm: 0.8607078037388872, iteration: 393388
loss: 1.0102736949920654,grad_norm: 0.7522065337227406, iteration: 393389
loss: 0.9954057931900024,grad_norm: 0.845531501900267, iteration: 393390
loss: 0.9887190461158752,grad_norm: 0.7753573868189647, iteration: 393391
loss: 0.9946978688240051,grad_norm: 0.7232145139108996, iteration: 393392
loss: 1.020894169807434,grad_norm: 0.6323118984812859, iteration: 393393
loss: 0.9613891243934631,grad_norm: 0.7228705047031281, iteration: 393394
loss: 0.9959445595741272,grad_norm: 0.6768008935441527, iteration: 393395
loss: 0.9771908521652222,grad_norm: 0.7880326574652377, iteration: 393396
loss: 0.9941968321800232,grad_norm: 0.733404846961951, iteration: 393397
loss: 0.9913687705993652,grad_norm: 0.770875511974149, iteration: 393398
loss: 0.9776337742805481,grad_norm: 0.8227081624035867, iteration: 393399
loss: 1.009009838104248,grad_norm: 0.6936393114304049, iteration: 393400
loss: 0.9980998635292053,grad_norm: 0.8225063948179379, iteration: 393401
loss: 1.0215898752212524,grad_norm: 0.9999995510653841, iteration: 393402
loss: 1.011666178703308,grad_norm: 0.8042510398046482, iteration: 393403
loss: 0.9934273362159729,grad_norm: 0.7799583621951599, iteration: 393404
loss: 1.0020525455474854,grad_norm: 0.9999991981222851, iteration: 393405
loss: 0.9935373663902283,grad_norm: 0.773741268924168, iteration: 393406
loss: 0.9664891362190247,grad_norm: 0.9999991926956786, iteration: 393407
loss: 0.9987617135047913,grad_norm: 0.802177990649185, iteration: 393408
loss: 0.9767076373100281,grad_norm: 0.7693577911734801, iteration: 393409
loss: 1.0511529445648193,grad_norm: 0.976008420651583, iteration: 393410
loss: 0.9746232032775879,grad_norm: 0.9041882517658025, iteration: 393411
loss: 1.0092164278030396,grad_norm: 0.9078622954102689, iteration: 393412
loss: 0.9667348861694336,grad_norm: 0.7441438309410334, iteration: 393413
loss: 1.0067602396011353,grad_norm: 0.778156534211452, iteration: 393414
loss: 0.9954286217689514,grad_norm: 0.8737749282598484, iteration: 393415
loss: 1.0626907348632812,grad_norm: 0.9999998732279386, iteration: 393416
loss: 0.9831592440605164,grad_norm: 0.9688258555859521, iteration: 393417
loss: 1.0087451934814453,grad_norm: 0.6496587435885421, iteration: 393418
loss: 0.9984391927719116,grad_norm: 0.9999990885231944, iteration: 393419
loss: 1.014562726020813,grad_norm: 0.7462315272716964, iteration: 393420
loss: 1.0127955675125122,grad_norm: 0.7407665926421225, iteration: 393421
loss: 1.0511071681976318,grad_norm: 0.6945671681499346, iteration: 393422
loss: 1.00831937789917,grad_norm: 0.779355903582331, iteration: 393423
loss: 1.0022315979003906,grad_norm: 0.819514687178388, iteration: 393424
loss: 0.9560050368309021,grad_norm: 0.6863145666089904, iteration: 393425
loss: 1.0074646472930908,grad_norm: 0.9999995259232591, iteration: 393426
loss: 0.9646776914596558,grad_norm: 0.7916611654274619, iteration: 393427
loss: 1.0081202983856201,grad_norm: 0.7920825971837604, iteration: 393428
loss: 1.017338514328003,grad_norm: 0.7836938439030985, iteration: 393429
loss: 0.9679691791534424,grad_norm: 0.8134105523578435, iteration: 393430
loss: 0.9487307071685791,grad_norm: 0.6876796663701098, iteration: 393431
loss: 0.9967974424362183,grad_norm: 0.8770681415964608, iteration: 393432
loss: 0.9924713969230652,grad_norm: 0.7715540595006506, iteration: 393433
loss: 1.0023996829986572,grad_norm: 0.9130285630305646, iteration: 393434
loss: 1.0277689695358276,grad_norm: 0.8219187501482167, iteration: 393435
loss: 0.9969720244407654,grad_norm: 0.8078832640350928, iteration: 393436
loss: 0.9964689612388611,grad_norm: 0.8319206737499231, iteration: 393437
loss: 0.9959747195243835,grad_norm: 0.8488817138144691, iteration: 393438
loss: 1.0048413276672363,grad_norm: 0.9999995285173542, iteration: 393439
loss: 1.027523159980774,grad_norm: 0.9999991960252558, iteration: 393440
loss: 1.0118099451065063,grad_norm: 0.7689966970619189, iteration: 393441
loss: 0.9755962491035461,grad_norm: 0.754518160838648, iteration: 393442
loss: 0.979792058467865,grad_norm: 0.8003673175080689, iteration: 393443
loss: 1.027952790260315,grad_norm: 0.9999992652510271, iteration: 393444
loss: 0.9935144782066345,grad_norm: 0.8011542689870436, iteration: 393445
loss: 1.0092039108276367,grad_norm: 0.9999999720004555, iteration: 393446
loss: 1.03520667552948,grad_norm: 0.7780918592161559, iteration: 393447
loss: 1.02488374710083,grad_norm: 0.9999995491104646, iteration: 393448
loss: 0.9827319979667664,grad_norm: 0.7649283085396353, iteration: 393449
loss: 1.0033667087554932,grad_norm: 0.8172424974138847, iteration: 393450
loss: 1.0222586393356323,grad_norm: 0.7664926491936597, iteration: 393451
loss: 0.9770883321762085,grad_norm: 0.8103491746368419, iteration: 393452
loss: 1.0207242965698242,grad_norm: 0.8273862670710439, iteration: 393453
loss: 0.9736266136169434,grad_norm: 0.6737306374802555, iteration: 393454
loss: 1.003923773765564,grad_norm: 0.7164888931732435, iteration: 393455
loss: 1.01810622215271,grad_norm: 0.9755308647995676, iteration: 393456
loss: 1.0043479204177856,grad_norm: 0.7629380537803317, iteration: 393457
loss: 0.9738100171089172,grad_norm: 0.8137590146541115, iteration: 393458
loss: 0.9779441356658936,grad_norm: 0.85254208852206, iteration: 393459
loss: 0.9908169507980347,grad_norm: 0.7528111977096489, iteration: 393460
loss: 0.9532812237739563,grad_norm: 0.9812945634689475, iteration: 393461
loss: 0.9945695400238037,grad_norm: 0.7485367009473605, iteration: 393462
loss: 0.9823804497718811,grad_norm: 0.8263669403006384, iteration: 393463
loss: 0.9916198253631592,grad_norm: 0.7594395439355734, iteration: 393464
loss: 0.9772146940231323,grad_norm: 0.6425684338314624, iteration: 393465
loss: 0.99636310338974,grad_norm: 0.7029642794693695, iteration: 393466
loss: 0.9942654967308044,grad_norm: 0.6784709881143378, iteration: 393467
loss: 0.9934176206588745,grad_norm: 0.7588976389244768, iteration: 393468
loss: 1.0114306211471558,grad_norm: 0.7522596464420529, iteration: 393469
loss: 0.9788808822631836,grad_norm: 0.8657751570082681, iteration: 393470
loss: 1.0274252891540527,grad_norm: 0.9049818422848206, iteration: 393471
loss: 0.9866095185279846,grad_norm: 0.9946563520348282, iteration: 393472
loss: 1.0015161037445068,grad_norm: 0.7835007181303091, iteration: 393473
loss: 0.9290582537651062,grad_norm: 0.9396913604512287, iteration: 393474
loss: 0.9806313514709473,grad_norm: 0.7283679903047532, iteration: 393475
loss: 0.9917500019073486,grad_norm: 0.6444351121504001, iteration: 393476
loss: 0.9840177893638611,grad_norm: 0.8561664707895382, iteration: 393477
loss: 0.9977249503135681,grad_norm: 0.8032183921463714, iteration: 393478
loss: 1.0134074687957764,grad_norm: 0.9999990862398679, iteration: 393479
loss: 1.044525384902954,grad_norm: 0.9999990779599062, iteration: 393480
loss: 0.9929267168045044,grad_norm: 0.7648555681811154, iteration: 393481
loss: 0.9777010083198547,grad_norm: 0.7235678937591953, iteration: 393482
loss: 1.0202631950378418,grad_norm: 0.7904933844033182, iteration: 393483
loss: 1.0205328464508057,grad_norm: 0.7693271612697122, iteration: 393484
loss: 0.9957841038703918,grad_norm: 0.7696422933474893, iteration: 393485
loss: 1.0529065132141113,grad_norm: 0.9999996737755268, iteration: 393486
loss: 1.104620337486267,grad_norm: 0.9999993615260373, iteration: 393487
loss: 1.0526154041290283,grad_norm: 0.9999996305974744, iteration: 393488
loss: 1.0446091890335083,grad_norm: 0.7654213354993116, iteration: 393489
loss: 1.0537899732589722,grad_norm: 0.7479568959391926, iteration: 393490
loss: 0.9951088428497314,grad_norm: 0.8198484964677021, iteration: 393491
loss: 1.0110836029052734,grad_norm: 0.7234370871678114, iteration: 393492
loss: 0.9841992855072021,grad_norm: 0.7589272757924026, iteration: 393493
loss: 0.9947596788406372,grad_norm: 0.9999991605727662, iteration: 393494
loss: 1.0517297983169556,grad_norm: 0.9999990689132412, iteration: 393495
loss: 1.1166937351226807,grad_norm: 0.999999662326139, iteration: 393496
loss: 1.0352776050567627,grad_norm: 0.8204126925271403, iteration: 393497
loss: 1.1657153367996216,grad_norm: 0.9999991010333807, iteration: 393498
loss: 0.9713862538337708,grad_norm: 0.7992214324939946, iteration: 393499
loss: 1.117874264717102,grad_norm: 0.9999993025911584, iteration: 393500
loss: 0.9865326881408691,grad_norm: 0.7990388921335053, iteration: 393501
loss: 0.9709189534187317,grad_norm: 0.9999991241454963, iteration: 393502
loss: 1.005997657775879,grad_norm: 0.8066898465398261, iteration: 393503
loss: 1.0040738582611084,grad_norm: 0.809697018743381, iteration: 393504
loss: 1.1430869102478027,grad_norm: 0.9999992747316607, iteration: 393505
loss: 0.9655030965805054,grad_norm: 0.8401126913932343, iteration: 393506
loss: 1.0439225435256958,grad_norm: 0.9999990819923794, iteration: 393507
loss: 1.05374276638031,grad_norm: 0.9999997417006125, iteration: 393508
loss: 1.0932660102844238,grad_norm: 0.9266231932014134, iteration: 393509
loss: 0.9972633123397827,grad_norm: 0.6544767902178288, iteration: 393510
loss: 1.030611276626587,grad_norm: 0.9999996899389872, iteration: 393511
loss: 0.9888885617256165,grad_norm: 0.8316246502771073, iteration: 393512
loss: 0.9924678802490234,grad_norm: 0.7323224267465274, iteration: 393513
loss: 1.0683907270431519,grad_norm: 0.9999995813823641, iteration: 393514
loss: 1.0408039093017578,grad_norm: 0.8818871185336987, iteration: 393515
loss: 1.0078206062316895,grad_norm: 0.9999995013930247, iteration: 393516
loss: 0.9979833960533142,grad_norm: 0.9999999902595744, iteration: 393517
loss: 0.9878931045532227,grad_norm: 0.8523246025287469, iteration: 393518
loss: 1.079586148262024,grad_norm: 0.9999990957560245, iteration: 393519
loss: 1.0145549774169922,grad_norm: 0.8258005692390223, iteration: 393520
loss: 0.9992907643318176,grad_norm: 0.9625709880005802, iteration: 393521
loss: 1.0164626836776733,grad_norm: 0.7639490116806935, iteration: 393522
loss: 1.0752946138381958,grad_norm: 0.7215246445484732, iteration: 393523
loss: 0.9756357669830322,grad_norm: 0.8221982941518395, iteration: 393524
loss: 1.0179574489593506,grad_norm: 0.8023175136119179, iteration: 393525
loss: 1.0362850427627563,grad_norm: 0.6289490362011628, iteration: 393526
loss: 1.1346101760864258,grad_norm: 0.8411282511417979, iteration: 393527
loss: 1.0879501104354858,grad_norm: 0.9999999519845195, iteration: 393528
loss: 1.1210274696350098,grad_norm: 0.999999363747782, iteration: 393529
loss: 0.9902718663215637,grad_norm: 0.8738810225833455, iteration: 393530
loss: 0.9900590777397156,grad_norm: 0.7551872888673472, iteration: 393531
loss: 1.1113120317459106,grad_norm: 0.9999997787282661, iteration: 393532
loss: 1.0785892009735107,grad_norm: 0.9999999875941195, iteration: 393533
loss: 1.0024011135101318,grad_norm: 0.9999990643499567, iteration: 393534
loss: 1.1459298133850098,grad_norm: 0.9999996478466822, iteration: 393535
loss: 1.0658385753631592,grad_norm: 0.9999989728285468, iteration: 393536
loss: 1.0658470392227173,grad_norm: 0.857291694958112, iteration: 393537
loss: 1.027417540550232,grad_norm: 0.8096180217806496, iteration: 393538
loss: 1.0202194452285767,grad_norm: 0.7662637100963401, iteration: 393539
loss: 1.141501545906067,grad_norm: 0.9999996576027184, iteration: 393540
loss: 1.0020326375961304,grad_norm: 0.8949060849377514, iteration: 393541
loss: 1.0307894945144653,grad_norm: 0.8975070058447607, iteration: 393542
loss: 1.0899512767791748,grad_norm: 0.9999990618283618, iteration: 393543
loss: 1.2953726053237915,grad_norm: 0.9999998577406345, iteration: 393544
loss: 0.9768959283828735,grad_norm: 0.8622562610862133, iteration: 393545
loss: 1.0151973962783813,grad_norm: 0.8833818528456597, iteration: 393546
loss: 1.0052088499069214,grad_norm: 0.8061180740615821, iteration: 393547
loss: 1.1746551990509033,grad_norm: 0.9999992942669395, iteration: 393548
loss: 1.1081784963607788,grad_norm: 0.9999993426211943, iteration: 393549
loss: 1.0488440990447998,grad_norm: 0.7195481446348657, iteration: 393550
loss: 1.1041107177734375,grad_norm: 1.0000000288383222, iteration: 393551
loss: 1.0167129039764404,grad_norm: 0.9949523336618712, iteration: 393552
loss: 1.0037097930908203,grad_norm: 0.69299267220974, iteration: 393553
loss: 1.0256527662277222,grad_norm: 0.9017188085465396, iteration: 393554
loss: 1.0492351055145264,grad_norm: 0.999999908043526, iteration: 393555
loss: 1.062251091003418,grad_norm: 0.9042666591739239, iteration: 393556
loss: 1.026383638381958,grad_norm: 0.9999994382204522, iteration: 393557
loss: 1.0253520011901855,grad_norm: 0.9999993177491223, iteration: 393558
loss: 1.0389580726623535,grad_norm: 0.9999995373626468, iteration: 393559
loss: 1.0028634071350098,grad_norm: 0.7462177829688641, iteration: 393560
loss: 0.9766446948051453,grad_norm: 0.8190328815531311, iteration: 393561
loss: 1.0756930112838745,grad_norm: 0.8165222014867739, iteration: 393562
loss: 1.0450012683868408,grad_norm: 0.9999992309069857, iteration: 393563
loss: 0.9649034738540649,grad_norm: 0.7095389068082849, iteration: 393564
loss: 1.1356189250946045,grad_norm: 0.9999998184448905, iteration: 393565
loss: 1.028784155845642,grad_norm: 0.9999996860956656, iteration: 393566
loss: 0.9778931736946106,grad_norm: 0.9316313537391547, iteration: 393567
loss: 1.1099047660827637,grad_norm: 0.8830670995037793, iteration: 393568
loss: 1.0028234720230103,grad_norm: 0.999999203134869, iteration: 393569
loss: 0.970425009727478,grad_norm: 0.746272899038005, iteration: 393570
loss: 1.070631504058838,grad_norm: 0.9999995991508068, iteration: 393571
loss: 0.9908528923988342,grad_norm: 0.9999995079504647, iteration: 393572
loss: 1.0743263959884644,grad_norm: 0.8984832997614705, iteration: 393573
loss: 0.9894886612892151,grad_norm: 0.9999993157244523, iteration: 393574
loss: 1.0248703956604004,grad_norm: 0.9999991901172011, iteration: 393575
loss: 1.0278000831604004,grad_norm: 0.8690058755060232, iteration: 393576
loss: 1.0572190284729004,grad_norm: 0.964061606033027, iteration: 393577
loss: 0.9994269013404846,grad_norm: 0.9237415211971902, iteration: 393578
loss: 1.1097320318222046,grad_norm: 1.0000000288419757, iteration: 393579
loss: 0.9846757054328918,grad_norm: 0.6709342371286542, iteration: 393580
loss: 0.9989323616027832,grad_norm: 0.6977649660561542, iteration: 393581
loss: 1.0127067565917969,grad_norm: 0.8317690664128797, iteration: 393582
loss: 1.0262168645858765,grad_norm: 0.809742457380212, iteration: 393583
loss: 0.9941430687904358,grad_norm: 0.8406695562195865, iteration: 393584
loss: 1.1002534627914429,grad_norm: 0.9999993402881395, iteration: 393585
loss: 0.9838520288467407,grad_norm: 0.6541968115450462, iteration: 393586
loss: 0.984954833984375,grad_norm: 0.9032797851695629, iteration: 393587
loss: 1.016136646270752,grad_norm: 0.7265254821742799, iteration: 393588
loss: 1.0236698389053345,grad_norm: 0.7156334618597049, iteration: 393589
loss: 1.0640102624893188,grad_norm: 0.9999999662690896, iteration: 393590
loss: 0.9786767363548279,grad_norm: 0.89471202353143, iteration: 393591
loss: 0.9907197952270508,grad_norm: 0.8302434820056922, iteration: 393592
loss: 1.001919150352478,grad_norm: 0.6786326872207699, iteration: 393593
loss: 1.0267257690429688,grad_norm: 0.9999994594949455, iteration: 393594
loss: 1.1120437383651733,grad_norm: 0.9999990613089297, iteration: 393595
loss: 1.1550240516662598,grad_norm: 0.9999995876920997, iteration: 393596
loss: 1.0210319757461548,grad_norm: 0.9999997329918336, iteration: 393597
loss: 1.0612250566482544,grad_norm: 0.8284476461663061, iteration: 393598
loss: 1.0031770467758179,grad_norm: 0.9999997707445881, iteration: 393599
loss: 1.0250557661056519,grad_norm: 0.8946438282503674, iteration: 393600
loss: 1.0751092433929443,grad_norm: 0.9999999649687485, iteration: 393601
loss: 1.0060442686080933,grad_norm: 0.7541448190224364, iteration: 393602
loss: 1.0303555727005005,grad_norm: 0.9999990421030801, iteration: 393603
loss: 0.9970076084136963,grad_norm: 0.8066441950084999, iteration: 393604
loss: 1.0046446323394775,grad_norm: 0.9999999723650866, iteration: 393605
loss: 1.0261808633804321,grad_norm: 0.9999991490770652, iteration: 393606
loss: 1.2489808797836304,grad_norm: 0.9999999332868157, iteration: 393607
loss: 0.9975427985191345,grad_norm: 0.7996899840548197, iteration: 393608
loss: 1.065276026725769,grad_norm: 0.9999995275161173, iteration: 393609
loss: 1.0721871852874756,grad_norm: 0.9999999338559361, iteration: 393610
loss: 1.0148167610168457,grad_norm: 0.9999992735297774, iteration: 393611
loss: 1.0101724863052368,grad_norm: 0.8623627772949929, iteration: 393612
loss: 1.0014251470565796,grad_norm: 0.901246623277378, iteration: 393613
loss: 1.0200588703155518,grad_norm: 0.9999989596070468, iteration: 393614
loss: 0.9949072599411011,grad_norm: 0.7992411629716064, iteration: 393615
loss: 0.9905075430870056,grad_norm: 0.9999997432093786, iteration: 393616
loss: 1.0746421813964844,grad_norm: 0.9999991779139494, iteration: 393617
loss: 1.0058009624481201,grad_norm: 0.7081812218954647, iteration: 393618
loss: 1.1385639905929565,grad_norm: 0.9999995187109514, iteration: 393619
loss: 1.0004836320877075,grad_norm: 0.9999991885975819, iteration: 393620
loss: 0.9920533895492554,grad_norm: 0.9316696116347082, iteration: 393621
loss: 0.9860880374908447,grad_norm: 0.8528860335302738, iteration: 393622
loss: 0.9718592762947083,grad_norm: 0.698243732916113, iteration: 393623
loss: 1.1957345008850098,grad_norm: 0.9999991772290706, iteration: 393624
loss: 0.9991401433944702,grad_norm: 0.8293238274638519, iteration: 393625
loss: 0.9942116737365723,grad_norm: 0.9999992115576292, iteration: 393626
loss: 1.0331933498382568,grad_norm: 0.9280288856035652, iteration: 393627
loss: 0.9820845723152161,grad_norm: 0.7002678589455292, iteration: 393628
loss: 0.9790932536125183,grad_norm: 0.6768626406520166, iteration: 393629
loss: 0.9948839545249939,grad_norm: 0.6850362430091238, iteration: 393630
loss: 1.0634325742721558,grad_norm: 0.9999999980344667, iteration: 393631
loss: 1.1050517559051514,grad_norm: 0.999999805448015, iteration: 393632
loss: 1.0159010887145996,grad_norm: 0.8345925238419183, iteration: 393633
loss: 0.9922736287117004,grad_norm: 0.8399396293103627, iteration: 393634
loss: 1.0029001235961914,grad_norm: 0.9649239260962483, iteration: 393635
loss: 1.0350275039672852,grad_norm: 0.9999997825658143, iteration: 393636
loss: 1.0087345838546753,grad_norm: 0.8543514248660085, iteration: 393637
loss: 1.0407366752624512,grad_norm: 0.8420612272469293, iteration: 393638
loss: 1.0087913274765015,grad_norm: 0.834791826618687, iteration: 393639
loss: 1.0239007472991943,grad_norm: 0.9343330054909715, iteration: 393640
loss: 1.1318974494934082,grad_norm: 0.9999999982894211, iteration: 393641
loss: 1.082000732421875,grad_norm: 0.99999901455204, iteration: 393642
loss: 1.0289411544799805,grad_norm: 0.9999992899601439, iteration: 393643
loss: 1.034928560256958,grad_norm: 0.9359974318162305, iteration: 393644
loss: 1.001132845878601,grad_norm: 0.6822451630713966, iteration: 393645
loss: 1.141680359840393,grad_norm: 0.8974165140550578, iteration: 393646
loss: 1.0138676166534424,grad_norm: 0.7600133075834725, iteration: 393647
loss: 0.9967475533485413,grad_norm: 0.9103630524933418, iteration: 393648
loss: 1.0000512599945068,grad_norm: 0.8598851414260448, iteration: 393649
loss: 1.046937346458435,grad_norm: 0.9999992534959034, iteration: 393650
loss: 0.9958850145339966,grad_norm: 0.9999990850710371, iteration: 393651
loss: 0.9853500127792358,grad_norm: 0.7374312497588352, iteration: 393652
loss: 1.011653184890747,grad_norm: 0.7871176247292287, iteration: 393653
loss: 1.0429394245147705,grad_norm: 0.9566919697965557, iteration: 393654
loss: 1.0027037858963013,grad_norm: 0.86396084654558, iteration: 393655
loss: 0.9776667356491089,grad_norm: 0.8739595552254585, iteration: 393656
loss: 1.026772141456604,grad_norm: 0.9999993660253815, iteration: 393657
loss: 1.0320433378219604,grad_norm: 0.7607219311958889, iteration: 393658
loss: 1.069498062133789,grad_norm: 0.7365549576512137, iteration: 393659
loss: 1.0430545806884766,grad_norm: 0.9999993511725713, iteration: 393660
loss: 1.112751841545105,grad_norm: 0.9999998500148481, iteration: 393661
loss: 1.001115322113037,grad_norm: 0.9131403308272109, iteration: 393662
loss: 1.0245689153671265,grad_norm: 0.8385828239432543, iteration: 393663
loss: 1.0346341133117676,grad_norm: 0.8604400773910309, iteration: 393664
loss: 1.0151281356811523,grad_norm: 0.7897968315468462, iteration: 393665
loss: 1.0634061098098755,grad_norm: 0.9999998198769549, iteration: 393666
loss: 1.0639290809631348,grad_norm: 0.9999993636158606, iteration: 393667
loss: 0.9759019613265991,grad_norm: 0.8573803855487535, iteration: 393668
loss: 1.0804944038391113,grad_norm: 0.9999998105049873, iteration: 393669
loss: 0.9718470573425293,grad_norm: 0.9218306666293576, iteration: 393670
loss: 1.1297847032546997,grad_norm: 0.9796594946260488, iteration: 393671
loss: 1.0386377573013306,grad_norm: 0.8354403816019139, iteration: 393672
loss: 1.0646519660949707,grad_norm: 0.9999999119666958, iteration: 393673
loss: 1.025874137878418,grad_norm: 0.6932282160078364, iteration: 393674
loss: 1.150042176246643,grad_norm: 0.9999998234723688, iteration: 393675
loss: 1.0519108772277832,grad_norm: 0.9999996706393087, iteration: 393676
loss: 1.038332462310791,grad_norm: 0.9999992483046221, iteration: 393677
loss: 1.0132477283477783,grad_norm: 0.917489981712956, iteration: 393678
loss: 1.0219597816467285,grad_norm: 0.8724115512868525, iteration: 393679
loss: 0.9984043836593628,grad_norm: 0.9999991583770962, iteration: 393680
loss: 1.1376125812530518,grad_norm: 0.9999997651921543, iteration: 393681
loss: 1.0775877237319946,grad_norm: 0.9999990478292146, iteration: 393682
loss: 1.0106546878814697,grad_norm: 0.7611820145000244, iteration: 393683
loss: 0.9857049584388733,grad_norm: 0.8188869913965211, iteration: 393684
loss: 0.9611970782279968,grad_norm: 0.7378000976050175, iteration: 393685
loss: 1.0260703563690186,grad_norm: 0.7090879379353475, iteration: 393686
loss: 1.0135838985443115,grad_norm: 0.9999998120064204, iteration: 393687
loss: 0.9906468987464905,grad_norm: 0.8076096915934201, iteration: 393688
loss: 1.042062759399414,grad_norm: 0.9999999426863837, iteration: 393689
loss: 1.012910008430481,grad_norm: 0.9999991028944241, iteration: 393690
loss: 1.0434819459915161,grad_norm: 0.9999991618967343, iteration: 393691
loss: 0.9881864190101624,grad_norm: 0.9999992053287939, iteration: 393692
loss: 1.0276806354522705,grad_norm: 0.8001682999117484, iteration: 393693
loss: 1.0148890018463135,grad_norm: 0.7890653988687025, iteration: 393694
loss: 1.0212290287017822,grad_norm: 0.9836792864919145, iteration: 393695
loss: 0.9896863102912903,grad_norm: 0.7574623059114093, iteration: 393696
loss: 1.0191866159439087,grad_norm: 0.8085570347595169, iteration: 393697
loss: 1.0174461603164673,grad_norm: 0.8143192697850903, iteration: 393698
loss: 0.9996950030326843,grad_norm: 0.9999991620620479, iteration: 393699
loss: 1.0681723356246948,grad_norm: 0.8371466799840636, iteration: 393700
loss: 1.0941832065582275,grad_norm: 0.9999998842080359, iteration: 393701
loss: 0.9817291498184204,grad_norm: 0.9999992028062448, iteration: 393702
loss: 0.9797514081001282,grad_norm: 0.6343015041182389, iteration: 393703
loss: 1.0366592407226562,grad_norm: 0.9999999210495966, iteration: 393704
loss: 0.9841179251670837,grad_norm: 0.9999990637306498, iteration: 393705
loss: 1.0420328378677368,grad_norm: 0.7576189175660454, iteration: 393706
loss: 0.9821318984031677,grad_norm: 0.8108166483554644, iteration: 393707
loss: 1.0739517211914062,grad_norm: 0.9999992873832739, iteration: 393708
loss: 1.0091352462768555,grad_norm: 0.9999995313926389, iteration: 393709
loss: 1.0164942741394043,grad_norm: 0.7523940702553042, iteration: 393710
loss: 1.037245512008667,grad_norm: 0.9053472176559206, iteration: 393711
loss: 1.0948033332824707,grad_norm: 0.879643751587682, iteration: 393712
loss: 1.10185706615448,grad_norm: 0.9696536951415028, iteration: 393713
loss: 1.1002174615859985,grad_norm: 0.8299707658071978, iteration: 393714
loss: 1.0173274278640747,grad_norm: 0.9999991149213117, iteration: 393715
loss: 1.1323723793029785,grad_norm: 0.9999999850085822, iteration: 393716
loss: 1.019223928451538,grad_norm: 0.9999995766027153, iteration: 393717
loss: 0.9882047176361084,grad_norm: 0.9999994507059099, iteration: 393718
loss: 1.133955478668213,grad_norm: 0.9999992194096577, iteration: 393719
loss: 0.9831936955451965,grad_norm: 0.9999991776444644, iteration: 393720
loss: 0.9642741084098816,grad_norm: 0.8166682660944844, iteration: 393721
loss: 1.118876576423645,grad_norm: 0.8875328810796329, iteration: 393722
loss: 1.0189521312713623,grad_norm: 0.9999994224563593, iteration: 393723
loss: 1.0522942543029785,grad_norm: 0.8593648639098231, iteration: 393724
loss: 0.9793682098388672,grad_norm: 0.9999996689086036, iteration: 393725
loss: 1.102158546447754,grad_norm: 0.9999997096611678, iteration: 393726
loss: 1.0413440465927124,grad_norm: 0.9336815321616954, iteration: 393727
loss: 1.232301115989685,grad_norm: 0.999999672101784, iteration: 393728
loss: 0.9657928347587585,grad_norm: 0.7078198005654858, iteration: 393729
loss: 1.0097264051437378,grad_norm: 0.7082043769995319, iteration: 393730
loss: 1.0077022314071655,grad_norm: 0.9744322884381305, iteration: 393731
loss: 1.1814358234405518,grad_norm: 0.9999994144203579, iteration: 393732
loss: 1.1704117059707642,grad_norm: 0.9999999347503004, iteration: 393733
loss: 1.1461923122406006,grad_norm: 0.9999996496130352, iteration: 393734
loss: 1.079596757888794,grad_norm: 0.9999999688556765, iteration: 393735
loss: 1.02119779586792,grad_norm: 0.9132370650878773, iteration: 393736
loss: 1.0097061395645142,grad_norm: 0.6999485231450601, iteration: 393737
loss: 1.1098048686981201,grad_norm: 0.9999997809963899, iteration: 393738
loss: 1.0098896026611328,grad_norm: 0.7751397220095504, iteration: 393739
loss: 1.2168992757797241,grad_norm: 0.9999997809144566, iteration: 393740
loss: 1.0699355602264404,grad_norm: 0.6709988431567186, iteration: 393741
loss: 1.260652780532837,grad_norm: 0.999999319375658, iteration: 393742
loss: 0.9965522885322571,grad_norm: 0.9999999584174549, iteration: 393743
loss: 1.2461531162261963,grad_norm: 0.9999993666756427, iteration: 393744
loss: 1.2146334648132324,grad_norm: 0.9999994777384703, iteration: 393745
loss: 1.0593074560165405,grad_norm: 0.9999999277643948, iteration: 393746
loss: 1.0056865215301514,grad_norm: 0.9999996202647062, iteration: 393747
loss: 1.0634177923202515,grad_norm: 0.7122142461124619, iteration: 393748
loss: 1.121941328048706,grad_norm: 0.9999996368555589, iteration: 393749
loss: 0.9880363941192627,grad_norm: 0.7485811360710672, iteration: 393750
loss: 1.0523929595947266,grad_norm: 0.9999994806527224, iteration: 393751
loss: 1.0370527505874634,grad_norm: 0.9999990264829146, iteration: 393752
loss: 0.9960097074508667,grad_norm: 0.9999996073609303, iteration: 393753
loss: 1.00933039188385,grad_norm: 0.9156905275935184, iteration: 393754
loss: 1.0942332744598389,grad_norm: 0.9999996765454003, iteration: 393755
loss: 1.0840024948120117,grad_norm: 0.999999234958394, iteration: 393756
loss: 1.043927550315857,grad_norm: 0.999999386516564, iteration: 393757
loss: 1.0169504880905151,grad_norm: 0.9999992058152027, iteration: 393758
loss: 1.0616587400436401,grad_norm: 0.9999994896377051, iteration: 393759
loss: 0.9680649042129517,grad_norm: 0.7610600590280497, iteration: 393760
loss: 1.0380983352661133,grad_norm: 0.7964765207247206, iteration: 393761
loss: 1.0259954929351807,grad_norm: 0.8242700974589711, iteration: 393762
loss: 1.0963033437728882,grad_norm: 0.9999995016024626, iteration: 393763
loss: 1.0667285919189453,grad_norm: 0.9999997870659898, iteration: 393764
loss: 1.1419718265533447,grad_norm: 0.9999991041337032, iteration: 393765
loss: 1.0100791454315186,grad_norm: 0.9077471797103834, iteration: 393766
loss: 1.0392194986343384,grad_norm: 0.7138928549319193, iteration: 393767
loss: 1.0266562700271606,grad_norm: 0.8122751942413107, iteration: 393768
loss: 1.0221768617630005,grad_norm: 0.7181201854806513, iteration: 393769
loss: 1.2258650064468384,grad_norm: 0.9999997044843864, iteration: 393770
loss: 1.1365618705749512,grad_norm: 0.999999705403695, iteration: 393771
loss: 1.1390113830566406,grad_norm: 0.9999998257776072, iteration: 393772
loss: 1.0595207214355469,grad_norm: 0.7054216689663672, iteration: 393773
loss: 1.1864620447158813,grad_norm: 0.999999724899308, iteration: 393774
loss: 1.1544502973556519,grad_norm: 0.9999996365529747, iteration: 393775
loss: 1.0931278467178345,grad_norm: 0.9999999088153602, iteration: 393776
loss: 0.9894043803215027,grad_norm: 0.8194305782629251, iteration: 393777
loss: 1.0670857429504395,grad_norm: 0.9999996495838277, iteration: 393778
loss: 1.035332441329956,grad_norm: 0.994646993886228, iteration: 393779
loss: 1.0391749143600464,grad_norm: 0.9464999403117674, iteration: 393780
loss: 1.0066992044448853,grad_norm: 0.9999990532902613, iteration: 393781
loss: 1.0395917892456055,grad_norm: 0.9898657252783465, iteration: 393782
loss: 1.0080112218856812,grad_norm: 0.8219876890538572, iteration: 393783
loss: 1.0194993019104004,grad_norm: 0.7574706473239932, iteration: 393784
loss: 1.0378024578094482,grad_norm: 0.9999993875847728, iteration: 393785
loss: 1.0277602672576904,grad_norm: 0.9999996420351899, iteration: 393786
loss: 0.9725792407989502,grad_norm: 0.8228999870352184, iteration: 393787
loss: 0.9984985589981079,grad_norm: 0.8261019611460761, iteration: 393788
loss: 1.211901068687439,grad_norm: 0.9999995259281036, iteration: 393789
loss: 0.9980581402778625,grad_norm: 0.8488590511395305, iteration: 393790
loss: 1.0903520584106445,grad_norm: 0.9999992282099129, iteration: 393791
loss: 1.002405047416687,grad_norm: 0.8545212774993894, iteration: 393792
loss: 1.0053086280822754,grad_norm: 0.9999992705287918, iteration: 393793
loss: 1.0647155046463013,grad_norm: 0.8602944836413422, iteration: 393794
loss: 0.9807881116867065,grad_norm: 0.8698459486166324, iteration: 393795
loss: 1.1434406042099,grad_norm: 0.9999998737440164, iteration: 393796
loss: 1.0067965984344482,grad_norm: 0.8396860956246948, iteration: 393797
loss: 1.0065367221832275,grad_norm: 0.9999992095320058, iteration: 393798
loss: 1.0324530601501465,grad_norm: 0.7134359047401546, iteration: 393799
loss: 1.0564717054367065,grad_norm: 0.9999999320517889, iteration: 393800
loss: 0.9802361130714417,grad_norm: 0.7567055155306351, iteration: 393801
loss: 1.0676908493041992,grad_norm: 0.9999990574172548, iteration: 393802
loss: 1.0399532318115234,grad_norm: 0.9999990441551322, iteration: 393803
loss: 0.9921636581420898,grad_norm: 0.5910060005771348, iteration: 393804
loss: 1.0351008176803589,grad_norm: 0.9999991430180744, iteration: 393805
loss: 1.0964072942733765,grad_norm: 0.9999998956176317, iteration: 393806
loss: 1.046868920326233,grad_norm: 0.999999111233492, iteration: 393807
loss: 1.0404996871948242,grad_norm: 0.891957792228849, iteration: 393808
loss: 0.9929306507110596,grad_norm: 0.9088527709865796, iteration: 393809
loss: 1.0418453216552734,grad_norm: 0.8024510282464112, iteration: 393810
loss: 1.0307890176773071,grad_norm: 0.9999992677389266, iteration: 393811
loss: 0.9944188594818115,grad_norm: 0.7455099826721022, iteration: 393812
loss: 1.0839518308639526,grad_norm: 0.9999996684511345, iteration: 393813
loss: 1.0399093627929688,grad_norm: 0.999999702872405, iteration: 393814
loss: 1.118993878364563,grad_norm: 0.999999722867262, iteration: 393815
loss: 1.0599781274795532,grad_norm: 0.9269296407784193, iteration: 393816
loss: 0.9989380836486816,grad_norm: 0.893838306250799, iteration: 393817
loss: 1.0290604829788208,grad_norm: 0.8073263954252765, iteration: 393818
loss: 1.0872646570205688,grad_norm: 0.9197616881283697, iteration: 393819
loss: 1.108883261680603,grad_norm: 0.9999990253231787, iteration: 393820
loss: 1.0116219520568848,grad_norm: 0.9999996917432226, iteration: 393821
loss: 1.147571086883545,grad_norm: 0.9220695925499987, iteration: 393822
loss: 0.9935652613639832,grad_norm: 0.7684223986514453, iteration: 393823
loss: 1.00156831741333,grad_norm: 0.7693031730449601, iteration: 393824
loss: 1.0359798669815063,grad_norm: 0.9206668926690796, iteration: 393825
loss: 0.9968951344490051,grad_norm: 0.9999999502003635, iteration: 393826
loss: 1.0065631866455078,grad_norm: 0.9301371319925147, iteration: 393827
loss: 1.0170228481292725,grad_norm: 0.8757115962822494, iteration: 393828
loss: 1.0522563457489014,grad_norm: 0.999999229234237, iteration: 393829
loss: 1.0306061506271362,grad_norm: 0.9999997571902548, iteration: 393830
loss: 0.9564666748046875,grad_norm: 0.7711921340502532, iteration: 393831
loss: 0.9685816168785095,grad_norm: 0.99999909132468, iteration: 393832
loss: 1.001171350479126,grad_norm: 0.9999992546713131, iteration: 393833
loss: 1.0539106130599976,grad_norm: 0.9999993498891976, iteration: 393834
loss: 1.0825408697128296,grad_norm: 0.9999990692177857, iteration: 393835
loss: 1.026660680770874,grad_norm: 0.9999999860987756, iteration: 393836
loss: 1.0467195510864258,grad_norm: 0.9999991717778149, iteration: 393837
loss: 1.0801893472671509,grad_norm: 0.9999997582350498, iteration: 393838
loss: 1.0620226860046387,grad_norm: 0.9999992176740665, iteration: 393839
loss: 1.0352920293807983,grad_norm: 0.8331037852063293, iteration: 393840
loss: 1.1721086502075195,grad_norm: 0.9999998399074532, iteration: 393841
loss: 1.212235450744629,grad_norm: 0.9999997049921769, iteration: 393842
loss: 1.0218554735183716,grad_norm: 0.9380389802661017, iteration: 393843
loss: 1.0094457864761353,grad_norm: 0.9137510626578288, iteration: 393844
loss: 1.0236023664474487,grad_norm: 0.9999990950907186, iteration: 393845
loss: 1.0146900415420532,grad_norm: 0.7449475134872434, iteration: 393846
loss: 1.0425381660461426,grad_norm: 0.9999994533911498, iteration: 393847
loss: 0.9938299655914307,grad_norm: 0.8758412641893455, iteration: 393848
loss: 1.0073031187057495,grad_norm: 0.7549079920677919, iteration: 393849
loss: 0.9901478886604309,grad_norm: 0.7978055105801666, iteration: 393850
loss: 1.1030967235565186,grad_norm: 0.9999997911251267, iteration: 393851
loss: 1.0409084558486938,grad_norm: 0.9999998532921207, iteration: 393852
loss: 0.9882314205169678,grad_norm: 0.7935480434540343, iteration: 393853
loss: 0.954579770565033,grad_norm: 0.9001875038543843, iteration: 393854
loss: 1.0008083581924438,grad_norm: 0.6900677308780779, iteration: 393855
loss: 1.0055681467056274,grad_norm: 0.768288529435988, iteration: 393856
loss: 1.0805031061172485,grad_norm: 0.9999996263805632, iteration: 393857
loss: 1.0198168754577637,grad_norm: 0.999999184644082, iteration: 393858
loss: 0.9914491772651672,grad_norm: 0.8426806413252776, iteration: 393859
loss: 1.1075186729431152,grad_norm: 1.0000000245536942, iteration: 393860
loss: 1.0467865467071533,grad_norm: 0.9345919386484178, iteration: 393861
loss: 0.996139645576477,grad_norm: 0.9999994201557736, iteration: 393862
loss: 1.1188985109329224,grad_norm: 0.9999998709120731, iteration: 393863
loss: 1.0840771198272705,grad_norm: 0.9999992604938028, iteration: 393864
loss: 1.0602697134017944,grad_norm: 0.9999993383419146, iteration: 393865
loss: 0.9729287624359131,grad_norm: 0.9567570801362953, iteration: 393866
loss: 1.0062496662139893,grad_norm: 0.83979930087084, iteration: 393867
loss: 1.0455070734024048,grad_norm: 0.9648032800156132, iteration: 393868
loss: 1.0186785459518433,grad_norm: 0.8631100525573957, iteration: 393869
loss: 1.001808762550354,grad_norm: 0.7709730820483816, iteration: 393870
loss: 1.091818928718567,grad_norm: 0.9999994426650236, iteration: 393871
loss: 1.0124272108078003,grad_norm: 0.7261841482861666, iteration: 393872
loss: 0.9898973703384399,grad_norm: 0.9999991877766821, iteration: 393873
loss: 1.0031728744506836,grad_norm: 0.8903114916420427, iteration: 393874
loss: 0.995540201663971,grad_norm: 0.9618812099491345, iteration: 393875
loss: 1.1784391403198242,grad_norm: 0.9999992582703193, iteration: 393876
loss: 1.0241838693618774,grad_norm: 0.8617244319107784, iteration: 393877
loss: 1.0469781160354614,grad_norm: 0.999999836650912, iteration: 393878
loss: 1.118850588798523,grad_norm: 0.9999999428087305, iteration: 393879
loss: 1.0422552824020386,grad_norm: 0.954713406410376, iteration: 393880
loss: 1.0646239519119263,grad_norm: 0.9999993712251097, iteration: 393881
loss: 1.1476342678070068,grad_norm: 0.9999990590819101, iteration: 393882
loss: 1.0562626123428345,grad_norm: 0.8637215950475027, iteration: 393883
loss: 1.075656533241272,grad_norm: 0.9999992294029448, iteration: 393884
loss: 1.161739706993103,grad_norm: 0.9999999006789245, iteration: 393885
loss: 1.0115020275115967,grad_norm: 0.8452620356707303, iteration: 393886
loss: 0.9955013990402222,grad_norm: 0.7909556785229097, iteration: 393887
loss: 1.0710879564285278,grad_norm: 0.9999993242946214, iteration: 393888
loss: 1.0163660049438477,grad_norm: 0.7569534393442575, iteration: 393889
loss: 0.9654039740562439,grad_norm: 0.9999994061570594, iteration: 393890
loss: 1.0800217390060425,grad_norm: 0.9999991184693253, iteration: 393891
loss: 1.0484358072280884,grad_norm: 0.9150967711178514, iteration: 393892
loss: 1.0119060277938843,grad_norm: 0.9999994721858367, iteration: 393893
loss: 1.1006853580474854,grad_norm: 0.9684731430653616, iteration: 393894
loss: 1.1166714429855347,grad_norm: 0.9999997479607262, iteration: 393895
loss: 1.024564266204834,grad_norm: 0.6918663203020669, iteration: 393896
loss: 1.0500394105911255,grad_norm: 0.999999782195351, iteration: 393897
loss: 1.0088869333267212,grad_norm: 0.7569267289953561, iteration: 393898
loss: 1.0717827081680298,grad_norm: 0.9999991923110069, iteration: 393899
loss: 1.018393635749817,grad_norm: 0.8985027024134264, iteration: 393900
loss: 1.0648291110992432,grad_norm: 0.9345629478462946, iteration: 393901
loss: 1.0042216777801514,grad_norm: 0.8976880371354102, iteration: 393902
loss: 0.9859117865562439,grad_norm: 0.7629261015204253, iteration: 393903
loss: 1.0445611476898193,grad_norm: 0.9418489286163929, iteration: 393904
loss: 1.079412817955017,grad_norm: 0.9754419630134459, iteration: 393905
loss: 1.2325414419174194,grad_norm: 0.9999995719898106, iteration: 393906
loss: 0.9489923119544983,grad_norm: 0.869233511813386, iteration: 393907
loss: 1.009179949760437,grad_norm: 0.744583320721263, iteration: 393908
loss: 1.0437852144241333,grad_norm: 0.701607164526849, iteration: 393909
loss: 1.0109740495681763,grad_norm: 0.7596413562592379, iteration: 393910
loss: 0.9913533926010132,grad_norm: 0.7324467927676499, iteration: 393911
loss: 0.9951371550559998,grad_norm: 0.7337148205157775, iteration: 393912
loss: 1.051694631576538,grad_norm: 0.9999990828370977, iteration: 393913
loss: 1.0968272686004639,grad_norm: 0.9999993196614794, iteration: 393914
loss: 1.0156325101852417,grad_norm: 0.9177298199794507, iteration: 393915
loss: 0.9703857898712158,grad_norm: 0.755132254443923, iteration: 393916
loss: 0.998841404914856,grad_norm: 0.7647681758874981, iteration: 393917
loss: 1.15530526638031,grad_norm: 0.9999995638826619, iteration: 393918
loss: 0.9631618857383728,grad_norm: 0.6840777016685219, iteration: 393919
loss: 1.0195568799972534,grad_norm: 0.6990373770354842, iteration: 393920
loss: 1.1734228134155273,grad_norm: 0.9999990269366194, iteration: 393921
loss: 1.096011996269226,grad_norm: 0.9999998398827477, iteration: 393922
loss: 1.0975004434585571,grad_norm: 0.9999990824588382, iteration: 393923
loss: 1.0612218379974365,grad_norm: 0.9999992678272496, iteration: 393924
loss: 0.9470513463020325,grad_norm: 0.9999993914766221, iteration: 393925
loss: 1.029510736465454,grad_norm: 0.9494643964158294, iteration: 393926
loss: 1.0607759952545166,grad_norm: 0.7149910305512991, iteration: 393927
loss: 1.1864817142486572,grad_norm: 0.9999999247616828, iteration: 393928
loss: 1.0307637453079224,grad_norm: 0.9155327468192836, iteration: 393929
loss: 1.0585986375808716,grad_norm: 0.9999999623396252, iteration: 393930
loss: 0.9788241386413574,grad_norm: 0.8764600092870803, iteration: 393931
loss: 1.0760287046432495,grad_norm: 0.9999995702248005, iteration: 393932
loss: 1.1220800876617432,grad_norm: 0.9999999686013442, iteration: 393933
loss: 1.0307304859161377,grad_norm: 0.7854655792338963, iteration: 393934
loss: 0.9474581480026245,grad_norm: 0.7707222325885051, iteration: 393935
loss: 1.0528355836868286,grad_norm: 0.9999993946551421, iteration: 393936
loss: 1.0847446918487549,grad_norm: 0.9999994928315751, iteration: 393937
loss: 1.0137298107147217,grad_norm: 0.728344749900331, iteration: 393938
loss: 1.014062762260437,grad_norm: 0.703897535826444, iteration: 393939
loss: 0.9860929250717163,grad_norm: 0.8760503247454553, iteration: 393940
loss: 1.2530673742294312,grad_norm: 0.9999999521610541, iteration: 393941
loss: 1.0102673768997192,grad_norm: 0.9999998530020651, iteration: 393942
loss: 0.975350558757782,grad_norm: 0.9999997186168514, iteration: 393943
loss: 1.022728443145752,grad_norm: 0.7645586826324267, iteration: 393944
loss: 1.0478407144546509,grad_norm: 0.9999992864049354, iteration: 393945
loss: 1.064091444015503,grad_norm: 0.9999990173899226, iteration: 393946
loss: 1.0069507360458374,grad_norm: 0.8157431601371978, iteration: 393947
loss: 1.0005450248718262,grad_norm: 0.8229389418350656, iteration: 393948
loss: 1.024207353591919,grad_norm: 0.8092707076037918, iteration: 393949
loss: 0.9993047118186951,grad_norm: 0.9999991996144741, iteration: 393950
loss: 1.028519868850708,grad_norm: 0.9999994013899448, iteration: 393951
loss: 0.9879553318023682,grad_norm: 0.9999990485439475, iteration: 393952
loss: 1.0758719444274902,grad_norm: 0.9791863436282013, iteration: 393953
loss: 1.0413943529129028,grad_norm: 0.7840618545743686, iteration: 393954
loss: 1.1084935665130615,grad_norm: 0.9999998995566146, iteration: 393955
loss: 0.9988289475440979,grad_norm: 0.9999997269322142, iteration: 393956
loss: 1.1450692415237427,grad_norm: 0.9999997749794051, iteration: 393957
loss: 1.060039758682251,grad_norm: 0.9808312502127698, iteration: 393958
loss: 1.2520158290863037,grad_norm: 0.9999990655428931, iteration: 393959
loss: 1.0003758668899536,grad_norm: 0.9999998980894438, iteration: 393960
loss: 1.1445399522781372,grad_norm: 0.9999995147110027, iteration: 393961
loss: 1.0479658842086792,grad_norm: 0.8745778506026451, iteration: 393962
loss: 1.0438860654830933,grad_norm: 0.9999991047365826, iteration: 393963
loss: 1.146381139755249,grad_norm: 0.9377599424228891, iteration: 393964
loss: 1.2218865156173706,grad_norm: 0.9999991507377135, iteration: 393965
loss: 0.9874935746192932,grad_norm: 0.9999993012156442, iteration: 393966
loss: 1.099032998085022,grad_norm: 0.8904680793127385, iteration: 393967
loss: 1.0414831638336182,grad_norm: 0.9999991221443921, iteration: 393968
loss: 1.2350451946258545,grad_norm: 0.9999999136052932, iteration: 393969
loss: 1.0380538702011108,grad_norm: 0.9999996678264218, iteration: 393970
loss: 1.1638635396957397,grad_norm: 0.9999992942070143, iteration: 393971
loss: 1.0353542566299438,grad_norm: 0.999999982185833, iteration: 393972
loss: 1.1184413433074951,grad_norm: 0.9999997682742925, iteration: 393973
loss: 0.9760224223136902,grad_norm: 0.8265231986709979, iteration: 393974
loss: 1.010451316833496,grad_norm: 0.7863131188819068, iteration: 393975
loss: 1.0611708164215088,grad_norm: 0.8551046470753942, iteration: 393976
loss: 1.0103306770324707,grad_norm: 0.9999995051901618, iteration: 393977
loss: 0.9881919026374817,grad_norm: 0.7117922187455341, iteration: 393978
loss: 1.1165997982025146,grad_norm: 0.9999998558264254, iteration: 393979
loss: 1.0729278326034546,grad_norm: 1.0000000057147835, iteration: 393980
loss: 1.140895128250122,grad_norm: 0.9999995851952189, iteration: 393981
loss: 1.054932713508606,grad_norm: 0.9999992931466086, iteration: 393982
loss: 1.0705528259277344,grad_norm: 0.9454676542266144, iteration: 393983
loss: 0.9892736077308655,grad_norm: 0.8597853629166138, iteration: 393984
loss: 1.0305533409118652,grad_norm: 0.9999997131906041, iteration: 393985
loss: 1.0500110387802124,grad_norm: 0.8774882477431352, iteration: 393986
loss: 1.0476397275924683,grad_norm: 0.9999990249259073, iteration: 393987
loss: 1.0282474756240845,grad_norm: 0.9999999916769112, iteration: 393988
loss: 1.0370395183563232,grad_norm: 0.9999991751556055, iteration: 393989
loss: 1.0497472286224365,grad_norm: 0.9999997222387735, iteration: 393990
loss: 1.030798316001892,grad_norm: 0.9999990144131641, iteration: 393991
loss: 1.0251343250274658,grad_norm: 0.8718332611134346, iteration: 393992
loss: 1.0487204790115356,grad_norm: 0.9999997478828545, iteration: 393993
loss: 1.0942182540893555,grad_norm: 0.9999997780415419, iteration: 393994
loss: 1.0984326601028442,grad_norm: 0.9999995440585918, iteration: 393995
loss: 1.157113790512085,grad_norm: 0.9999991265568831, iteration: 393996
loss: 1.0075047016143799,grad_norm: 0.9999990373222392, iteration: 393997
loss: 1.1192219257354736,grad_norm: 0.9999992493500187, iteration: 393998
loss: 1.0243052244186401,grad_norm: 0.8995086110130965, iteration: 393999
loss: 1.049970269203186,grad_norm: 0.9301415306547153, iteration: 394000
loss: 1.0436280965805054,grad_norm: 0.9999995076997289, iteration: 394001
loss: 1.009702205657959,grad_norm: 0.7933055080575058, iteration: 394002
loss: 0.9815908074378967,grad_norm: 0.8230849440221593, iteration: 394003
loss: 1.0264058113098145,grad_norm: 0.9999995642604352, iteration: 394004
loss: 1.0918385982513428,grad_norm: 0.9999993841512685, iteration: 394005
loss: 1.0428725481033325,grad_norm: 0.9092882643382716, iteration: 394006
loss: 1.3189945220947266,grad_norm: 0.9999998451407532, iteration: 394007
loss: 0.9708988070487976,grad_norm: 0.785311725118127, iteration: 394008
loss: 1.0459887981414795,grad_norm: 0.7913701617985058, iteration: 394009
loss: 0.9840124249458313,grad_norm: 0.9999999660222084, iteration: 394010
loss: 1.0128631591796875,grad_norm: 0.9662187769089473, iteration: 394011
loss: 0.9947648644447327,grad_norm: 0.9999999711416174, iteration: 394012
loss: 1.01356041431427,grad_norm: 0.7937405587953505, iteration: 394013
loss: 0.9933810234069824,grad_norm: 0.8294113280040525, iteration: 394014
loss: 1.0301063060760498,grad_norm: 0.9999993961909328, iteration: 394015
loss: 1.0116229057312012,grad_norm: 0.6794509754170208, iteration: 394016
loss: 0.9869336485862732,grad_norm: 0.989961219319742, iteration: 394017
loss: 0.989771842956543,grad_norm: 0.9999997903394215, iteration: 394018
loss: 1.1123480796813965,grad_norm: 1.0000000014245105, iteration: 394019
loss: 1.1304751634597778,grad_norm: 1.0000000404324114, iteration: 394020
loss: 1.033632516860962,grad_norm: 0.8337602455054951, iteration: 394021
loss: 0.9641222357749939,grad_norm: 0.8494819865080979, iteration: 394022
loss: 1.2157455682754517,grad_norm: 0.9999997161554662, iteration: 394023
loss: 1.0427155494689941,grad_norm: 1.000000008002393, iteration: 394024
loss: 1.0142149925231934,grad_norm: 0.7236757749817067, iteration: 394025
loss: 1.1601356267929077,grad_norm: 0.9999995281058802, iteration: 394026
loss: 1.0730681419372559,grad_norm: 0.9999996341599243, iteration: 394027
loss: 0.9856911897659302,grad_norm: 0.9999994164025805, iteration: 394028
loss: 1.0205769538879395,grad_norm: 0.9999995601958185, iteration: 394029
loss: 1.0224612951278687,grad_norm: 0.7291067632815499, iteration: 394030
loss: 1.0432114601135254,grad_norm: 0.9280316963285189, iteration: 394031
loss: 0.9956828355789185,grad_norm: 0.7629284054808569, iteration: 394032
loss: 1.091557264328003,grad_norm: 0.9490572105249714, iteration: 394033
loss: 1.0060272216796875,grad_norm: 0.9456956878274069, iteration: 394034
loss: 1.0196605920791626,grad_norm: 0.9999996229232433, iteration: 394035
loss: 1.0938729047775269,grad_norm: 0.9999997399409415, iteration: 394036
loss: 1.0782334804534912,grad_norm: 0.999999725773683, iteration: 394037
loss: 1.0731854438781738,grad_norm: 0.9999991607954952, iteration: 394038
loss: 1.0488924980163574,grad_norm: 0.9999990588998295, iteration: 394039
loss: 1.047406554222107,grad_norm: 0.9279374956394622, iteration: 394040
loss: 1.0192632675170898,grad_norm: 0.9999990725185827, iteration: 394041
loss: 1.0175087451934814,grad_norm: 0.999999077486072, iteration: 394042
loss: 1.1938014030456543,grad_norm: 0.9999997837220995, iteration: 394043
loss: 1.2815158367156982,grad_norm: 0.9999998851011449, iteration: 394044
loss: 0.9774226546287537,grad_norm: 0.7435842349980476, iteration: 394045
loss: 1.1571708917617798,grad_norm: 0.9999992454816976, iteration: 394046
loss: 1.0901756286621094,grad_norm: 0.9999999059159997, iteration: 394047
loss: 1.0362087488174438,grad_norm: 1.0000000206106532, iteration: 394048
loss: 1.0648698806762695,grad_norm: 0.9999996565875229, iteration: 394049
loss: 0.985241174697876,grad_norm: 0.7832729132032573, iteration: 394050
loss: 1.1472975015640259,grad_norm: 0.9999994125400825, iteration: 394051
loss: 1.0950480699539185,grad_norm: 0.9999998505899033, iteration: 394052
loss: 0.9857213497161865,grad_norm: 0.7937148339546238, iteration: 394053
loss: 0.9736880660057068,grad_norm: 0.8056671664761613, iteration: 394054
loss: 1.0115530490875244,grad_norm: 0.9999992121375474, iteration: 394055
loss: 1.0610911846160889,grad_norm: 0.9911396273266754, iteration: 394056
loss: 1.1678466796875,grad_norm: 0.9999993126896392, iteration: 394057
loss: 1.0408005714416504,grad_norm: 0.9999999224078344, iteration: 394058
loss: 1.00795316696167,grad_norm: 0.8412242269676945, iteration: 394059
loss: 1.0008143186569214,grad_norm: 0.9260158695301249, iteration: 394060
loss: 1.154787302017212,grad_norm: 0.9999997555390205, iteration: 394061
loss: 1.0263124704360962,grad_norm: 0.9999995167319987, iteration: 394062
loss: 1.0585116147994995,grad_norm: 0.9999994572170114, iteration: 394063
loss: 1.1707274913787842,grad_norm: 0.9999995086657089, iteration: 394064
loss: 1.0622719526290894,grad_norm: 0.9999999938671605, iteration: 394065
loss: 0.9951366782188416,grad_norm: 0.7435578530095455, iteration: 394066
loss: 1.0654375553131104,grad_norm: 0.9999999176346636, iteration: 394067
loss: 1.170579433441162,grad_norm: 0.9999996045565513, iteration: 394068
loss: 1.047996163368225,grad_norm: 0.9999999378460876, iteration: 394069
loss: 1.0077660083770752,grad_norm: 0.8308369349795417, iteration: 394070
loss: 0.9973175525665283,grad_norm: 0.7714519575266797, iteration: 394071
loss: 1.0679726600646973,grad_norm: 0.9447552590198156, iteration: 394072
loss: 1.0318968296051025,grad_norm: 0.7811209819940536, iteration: 394073
loss: 1.0807747840881348,grad_norm: 1.0000000377001903, iteration: 394074
loss: 1.1081607341766357,grad_norm: 0.999999928430166, iteration: 394075
loss: 0.9982096552848816,grad_norm: 0.8966902493976894, iteration: 394076
loss: 1.0414271354675293,grad_norm: 0.7234000870299294, iteration: 394077
loss: 1.035991907119751,grad_norm: 0.9999995325730145, iteration: 394078
loss: 1.126796841621399,grad_norm: 0.9999997203557505, iteration: 394079
loss: 1.1761631965637207,grad_norm: 0.9999997027759578, iteration: 394080
loss: 1.0034717321395874,grad_norm: 0.9999992856829292, iteration: 394081
loss: 1.1973066329956055,grad_norm: 0.9999997873394173, iteration: 394082
loss: 1.0831120014190674,grad_norm: 0.999999671922586, iteration: 394083
loss: 1.046995997428894,grad_norm: 0.9999993465097667, iteration: 394084
loss: 1.0850629806518555,grad_norm: 0.9999992597317359, iteration: 394085
loss: 1.011755108833313,grad_norm: 0.9381543680862605, iteration: 394086
loss: 1.0524022579193115,grad_norm: 0.9999994321764835, iteration: 394087
loss: 1.0080156326293945,grad_norm: 0.9999995646143802, iteration: 394088
loss: 0.9932288527488708,grad_norm: 0.90718121689975, iteration: 394089
loss: 1.0194799900054932,grad_norm: 0.8388127094300426, iteration: 394090
loss: 1.0243070125579834,grad_norm: 0.8645838397235727, iteration: 394091
loss: 1.0545116662979126,grad_norm: 0.8760231824156035, iteration: 394092
loss: 0.9892292022705078,grad_norm: 0.6718778460927753, iteration: 394093
loss: 1.0677133798599243,grad_norm: 0.9999993281894342, iteration: 394094
loss: 1.019773006439209,grad_norm: 0.8173126054767628, iteration: 394095
loss: 1.0106511116027832,grad_norm: 0.8326875114501531, iteration: 394096
loss: 1.0071810483932495,grad_norm: 0.869479923984304, iteration: 394097
loss: 1.0430880784988403,grad_norm: 1.0000000533271485, iteration: 394098
loss: 1.0544764995574951,grad_norm: 0.9999998777626491, iteration: 394099
loss: 1.0609554052352905,grad_norm: 1.0000000277622096, iteration: 394100
loss: 1.0168912410736084,grad_norm: 0.7610908054403888, iteration: 394101
loss: 0.9925293922424316,grad_norm: 0.8850764899033053, iteration: 394102
loss: 0.9754440188407898,grad_norm: 0.8385442677030706, iteration: 394103
loss: 1.0648218393325806,grad_norm: 0.9999997285374908, iteration: 394104
loss: 1.024883508682251,grad_norm: 0.9999992472880829, iteration: 394105
loss: 1.1875344514846802,grad_norm: 0.9999996570448609, iteration: 394106
loss: 0.9932783246040344,grad_norm: 0.9999992340451037, iteration: 394107
loss: 1.0404069423675537,grad_norm: 0.9867510467689654, iteration: 394108
loss: 1.0464451313018799,grad_norm: 0.9999995006892268, iteration: 394109
loss: 1.0377122163772583,grad_norm: 0.7203123769847025, iteration: 394110
loss: 1.0095760822296143,grad_norm: 0.7973865302787962, iteration: 394111
loss: 1.0291141271591187,grad_norm: 0.8693132373383934, iteration: 394112
loss: 1.1024380922317505,grad_norm: 0.9999998645987825, iteration: 394113
loss: 1.034557580947876,grad_norm: 0.9999999683031281, iteration: 394114
loss: 1.0090855360031128,grad_norm: 0.726867173648908, iteration: 394115
loss: 1.0416895151138306,grad_norm: 0.9999992521638358, iteration: 394116
loss: 1.1366735696792603,grad_norm: 0.9999996872102087, iteration: 394117
loss: 1.003814697265625,grad_norm: 0.7704404814799491, iteration: 394118
loss: 0.9552932977676392,grad_norm: 0.7222008256159861, iteration: 394119
loss: 0.9768335819244385,grad_norm: 0.833969057398596, iteration: 394120
loss: 1.0991301536560059,grad_norm: 0.9999997449108392, iteration: 394121
loss: 1.0469521284103394,grad_norm: 0.9999997180410509, iteration: 394122
loss: 1.041800618171692,grad_norm: 0.9999995601757083, iteration: 394123
loss: 1.0075353384017944,grad_norm: 0.7924013804633839, iteration: 394124
loss: 1.0442354679107666,grad_norm: 0.9999994168045303, iteration: 394125
loss: 1.1510034799575806,grad_norm: 0.9999997924919576, iteration: 394126
loss: 1.062577724456787,grad_norm: 0.9999991614525128, iteration: 394127
loss: 1.0263776779174805,grad_norm: 0.7782254336931058, iteration: 394128
loss: 1.1100187301635742,grad_norm: 0.9999991687984792, iteration: 394129
loss: 1.0292465686798096,grad_norm: 0.9999992834150652, iteration: 394130
loss: 0.9829432964324951,grad_norm: 0.7997270067564255, iteration: 394131
loss: 0.9904570579528809,grad_norm: 0.7691801907019064, iteration: 394132
loss: 1.1271425485610962,grad_norm: 0.9999994400790787, iteration: 394133
loss: 1.0115240812301636,grad_norm: 0.7314237321855775, iteration: 394134
loss: 1.0754966735839844,grad_norm: 0.9999998350656473, iteration: 394135
loss: 0.9782401323318481,grad_norm: 0.8630814927540295, iteration: 394136
loss: 1.0697814226150513,grad_norm: 0.9999994908001368, iteration: 394137
loss: 1.06979501247406,grad_norm: 0.9999994582336154, iteration: 394138
loss: 0.9833826422691345,grad_norm: 0.8658672651101044, iteration: 394139
loss: 0.9582967758178711,grad_norm: 0.999999872580856, iteration: 394140
loss: 1.356873869895935,grad_norm: 0.9999999848531422, iteration: 394141
loss: 1.1108061075210571,grad_norm: 0.9999994970736092, iteration: 394142
loss: 0.9809953570365906,grad_norm: 0.9999992535330329, iteration: 394143
loss: 0.9997091889381409,grad_norm: 0.8752600676596435, iteration: 394144
loss: 1.0084770917892456,grad_norm: 0.9999995685774634, iteration: 394145
loss: 1.007051944732666,grad_norm: 0.700051877137853, iteration: 394146
loss: 1.0487028360366821,grad_norm: 0.9999999026044374, iteration: 394147
loss: 1.0341845750808716,grad_norm: 0.9055280735566791, iteration: 394148
loss: 1.0611293315887451,grad_norm: 0.9379699158125996, iteration: 394149
loss: 1.0359008312225342,grad_norm: 0.9999993013717353, iteration: 394150
loss: 1.0409462451934814,grad_norm: 0.9999992388274285, iteration: 394151
loss: 1.0994442701339722,grad_norm: 0.9999994926491955, iteration: 394152
loss: 0.9624637365341187,grad_norm: 0.7859957559578186, iteration: 394153
loss: 1.0196499824523926,grad_norm: 0.8161352867143538, iteration: 394154
loss: 1.0681065320968628,grad_norm: 0.9999992425239185, iteration: 394155
loss: 1.104425072669983,grad_norm: 0.9999996863387192, iteration: 394156
loss: 1.0201722383499146,grad_norm: 0.8413305997910291, iteration: 394157
loss: 1.0269107818603516,grad_norm: 0.7409864367339587, iteration: 394158
loss: 0.9843577742576599,grad_norm: 0.7913560655439754, iteration: 394159
loss: 1.031775712966919,grad_norm: 0.9999995585486923, iteration: 394160
loss: 1.1191980838775635,grad_norm: 0.9999999175626298, iteration: 394161
loss: 1.0309056043624878,grad_norm: 0.9999997923804115, iteration: 394162
loss: 1.0468873977661133,grad_norm: 0.9999992948444864, iteration: 394163
loss: 1.2189457416534424,grad_norm: 0.999999549947624, iteration: 394164
loss: 1.0259175300598145,grad_norm: 0.880663973309008, iteration: 394165
loss: 1.0127651691436768,grad_norm: 0.7696799098825701, iteration: 394166
loss: 1.0112438201904297,grad_norm: 0.7860081968970994, iteration: 394167
loss: 1.0149339437484741,grad_norm: 0.8045776177854763, iteration: 394168
loss: 1.0004736185073853,grad_norm: 0.8435278453447153, iteration: 394169
loss: 0.9718648195266724,grad_norm: 0.960768016968076, iteration: 394170
loss: 1.0217255353927612,grad_norm: 0.9999993118329252, iteration: 394171
loss: 0.9970625638961792,grad_norm: 0.9756069369483256, iteration: 394172
loss: 1.0948132276535034,grad_norm: 0.9999999357693331, iteration: 394173
loss: 0.9886921644210815,grad_norm: 0.7498903981616514, iteration: 394174
loss: 1.0002332925796509,grad_norm: 0.977886889400903, iteration: 394175
loss: 1.060435175895691,grad_norm: 1.0000000134566713, iteration: 394176
loss: 1.1849478483200073,grad_norm: 0.9999999156076614, iteration: 394177
loss: 1.0530797243118286,grad_norm: 0.9633031950986684, iteration: 394178
loss: 0.9778409004211426,grad_norm: 0.8188021612505354, iteration: 394179
loss: 0.9932664632797241,grad_norm: 0.7055128858073076, iteration: 394180
loss: 0.9855130910873413,grad_norm: 0.8354764831891643, iteration: 394181
loss: 1.057018756866455,grad_norm: 0.9999995269223954, iteration: 394182
loss: 1.0702334642410278,grad_norm: 0.9999993718731777, iteration: 394183
loss: 1.0174719095230103,grad_norm: 0.709112499680533, iteration: 394184
loss: 0.9978749752044678,grad_norm: 0.9359242454354276, iteration: 394185
loss: 1.0703545808792114,grad_norm: 0.9999999556732374, iteration: 394186
loss: 1.064357876777649,grad_norm: 0.9999995550435132, iteration: 394187
loss: 1.0321440696716309,grad_norm: 0.9999991716489094, iteration: 394188
loss: 1.020526647567749,grad_norm: 0.8014116036986297, iteration: 394189
loss: 1.0693416595458984,grad_norm: 0.999999225241564, iteration: 394190
loss: 1.0538544654846191,grad_norm: 0.9999998168865708, iteration: 394191
loss: 1.0928937196731567,grad_norm: 0.8972906207135731, iteration: 394192
loss: 1.1015533208847046,grad_norm: 0.9175027028424422, iteration: 394193
loss: 1.078649878501892,grad_norm: 0.999999378084261, iteration: 394194
loss: 0.9926232099533081,grad_norm: 0.9535003638919747, iteration: 394195
loss: 1.062936782836914,grad_norm: 0.9999993463914594, iteration: 394196
loss: 0.992596447467804,grad_norm: 0.81962843821999, iteration: 394197
loss: 1.1514126062393188,grad_norm: 0.9999992384349615, iteration: 394198
loss: 1.0504816770553589,grad_norm: 0.9999992729986286, iteration: 394199
loss: 0.9999719262123108,grad_norm: 0.8233535733276403, iteration: 394200
loss: 1.0132731199264526,grad_norm: 0.7193689656408669, iteration: 394201
loss: 1.075610637664795,grad_norm: 0.9999996551598992, iteration: 394202
loss: 1.0374797582626343,grad_norm: 0.9999995245424174, iteration: 394203
loss: 0.9970032572746277,grad_norm: 0.8296363443755502, iteration: 394204
loss: 1.004841685295105,grad_norm: 0.9999990197454953, iteration: 394205
loss: 1.0864601135253906,grad_norm: 0.999999747040072, iteration: 394206
loss: 1.0374187231063843,grad_norm: 0.8403108421655469, iteration: 394207
loss: 1.0181856155395508,grad_norm: 0.9999998995740298, iteration: 394208
loss: 1.0854166746139526,grad_norm: 0.9135355907283434, iteration: 394209
loss: 1.0002349615097046,grad_norm: 0.9999997164846353, iteration: 394210
loss: 1.0159637928009033,grad_norm: 0.8470366435313097, iteration: 394211
loss: 1.2798802852630615,grad_norm: 1.0000000338264912, iteration: 394212
loss: 1.0843985080718994,grad_norm: 0.9999993018977728, iteration: 394213
loss: 0.9943236708641052,grad_norm: 0.8754350349151908, iteration: 394214
loss: 1.127655029296875,grad_norm: 0.9114631131530024, iteration: 394215
loss: 0.9834834337234497,grad_norm: 0.8595358291109068, iteration: 394216
loss: 1.0487757921218872,grad_norm: 0.9999996792051475, iteration: 394217
loss: 1.01997971534729,grad_norm: 0.9999999533782672, iteration: 394218
loss: 1.0244157314300537,grad_norm: 0.7495348094178177, iteration: 394219
loss: 1.023930311203003,grad_norm: 0.7931463590708936, iteration: 394220
loss: 1.0186530351638794,grad_norm: 0.9585079228892482, iteration: 394221
loss: 1.03999924659729,grad_norm: 0.9777471893239281, iteration: 394222
loss: 1.105307698249817,grad_norm: 0.9999998788524944, iteration: 394223
loss: 1.0343551635742188,grad_norm: 0.8863019072523145, iteration: 394224
loss: 0.9968739151954651,grad_norm: 0.7883054553621966, iteration: 394225
loss: 1.0740593671798706,grad_norm: 0.9999992697256856, iteration: 394226
loss: 1.056128978729248,grad_norm: 0.8398105471406737, iteration: 394227
loss: 0.992946982383728,grad_norm: 0.8526508495242494, iteration: 394228
loss: 1.0178898572921753,grad_norm: 0.6479322831061547, iteration: 394229
loss: 1.050278663635254,grad_norm: 0.9999996303401845, iteration: 394230
loss: 0.997810423374176,grad_norm: 0.8719474737201225, iteration: 394231
loss: 1.024375319480896,grad_norm: 0.9999997965323655, iteration: 394232
loss: 0.9876905679702759,grad_norm: 0.9999991335661036, iteration: 394233
loss: 1.0588710308074951,grad_norm: 0.9999998370707441, iteration: 394234
loss: 1.0168224573135376,grad_norm: 0.8113332841133025, iteration: 394235
loss: 1.0661941766738892,grad_norm: 0.9999999934430879, iteration: 394236
loss: 1.091661810874939,grad_norm: 0.9999997559573534, iteration: 394237
loss: 1.0034849643707275,grad_norm: 0.7824277507702649, iteration: 394238
loss: 1.0218884944915771,grad_norm: 0.8554290505739734, iteration: 394239
loss: 1.041083574295044,grad_norm: 0.9088058816766781, iteration: 394240
loss: 1.0076570510864258,grad_norm: 0.7377365054314771, iteration: 394241
loss: 0.9539010524749756,grad_norm: 1.0000000047542208, iteration: 394242
loss: 1.021785855293274,grad_norm: 0.999999064512765, iteration: 394243
loss: 1.1201374530792236,grad_norm: 0.9999996903985173, iteration: 394244
loss: 1.1362717151641846,grad_norm: 0.9999991693123852, iteration: 394245
loss: 1.0017188787460327,grad_norm: 0.9999994353527543, iteration: 394246
loss: 1.003949761390686,grad_norm: 0.7913079431952202, iteration: 394247
loss: 1.033389925956726,grad_norm: 0.9999999972619151, iteration: 394248
loss: 0.9934158325195312,grad_norm: 0.7243565279241605, iteration: 394249
loss: 1.0305817127227783,grad_norm: 0.7214472548307664, iteration: 394250
loss: 1.026258945465088,grad_norm: 0.8207244669622933, iteration: 394251
loss: 1.08855402469635,grad_norm: 0.9999990783924645, iteration: 394252
loss: 0.993511438369751,grad_norm: 0.8905313244966337, iteration: 394253
loss: 1.050806999206543,grad_norm: 0.7782313454108462, iteration: 394254
loss: 0.9972985982894897,grad_norm: 0.9999995510325962, iteration: 394255
loss: 1.0717226266860962,grad_norm: 0.9999997313147848, iteration: 394256
loss: 0.9869701862335205,grad_norm: 0.9273714249323233, iteration: 394257
loss: 1.0503969192504883,grad_norm: 0.9999998177880545, iteration: 394258
loss: 1.0324103832244873,grad_norm: 0.8952965597129575, iteration: 394259
loss: 1.1593278646469116,grad_norm: 0.9999999747528637, iteration: 394260
loss: 1.0858079195022583,grad_norm: 0.7008718185188267, iteration: 394261
loss: 1.0813847780227661,grad_norm: 0.9999991595908362, iteration: 394262
loss: 1.0122103691101074,grad_norm: 0.7583335118398725, iteration: 394263
loss: 0.9828153252601624,grad_norm: 0.8829350116645078, iteration: 394264
loss: 1.0591492652893066,grad_norm: 0.9999990817251028, iteration: 394265
loss: 1.0285038948059082,grad_norm: 0.7538414893398312, iteration: 394266
loss: 1.0207304954528809,grad_norm: 0.9999995433100654, iteration: 394267
loss: 0.99809730052948,grad_norm: 0.9816264997537787, iteration: 394268
loss: 1.0022354125976562,grad_norm: 0.9999991120712302, iteration: 394269
loss: 1.05600106716156,grad_norm: 0.999999416701351, iteration: 394270
loss: 1.0070905685424805,grad_norm: 0.9999991007063846, iteration: 394271
loss: 1.0278732776641846,grad_norm: 0.8458886000836391, iteration: 394272
loss: 1.0455633401870728,grad_norm: 0.7767637685520765, iteration: 394273
loss: 0.9806371331214905,grad_norm: 0.6407309285159506, iteration: 394274
loss: 1.0287938117980957,grad_norm: 0.8973951257017392, iteration: 394275
loss: 1.0312227010726929,grad_norm: 0.7923347596458241, iteration: 394276
loss: 1.0084733963012695,grad_norm: 0.8520217747886308, iteration: 394277
loss: 1.0399651527404785,grad_norm: 0.9999991852572959, iteration: 394278
loss: 1.0313740968704224,grad_norm: 0.9999995090166212, iteration: 394279
loss: 0.9853029847145081,grad_norm: 0.6483684324925735, iteration: 394280
loss: 0.9848369359970093,grad_norm: 0.6772952559458238, iteration: 394281
loss: 0.9597631692886353,grad_norm: 0.7284944617745645, iteration: 394282
loss: 0.9830201268196106,grad_norm: 0.7400904220582907, iteration: 394283
loss: 0.9857007265090942,grad_norm: 0.7009459891173104, iteration: 394284
loss: 0.9959328770637512,grad_norm: 0.9999990583932092, iteration: 394285
loss: 1.0483896732330322,grad_norm: 0.8062005674691344, iteration: 394286
loss: 1.0388455390930176,grad_norm: 0.8063404308412391, iteration: 394287
loss: 0.9822399616241455,grad_norm: 0.7910294473271725, iteration: 394288
loss: 1.0206656455993652,grad_norm: 0.999999225586339, iteration: 394289
loss: 0.9928846955299377,grad_norm: 0.6874532036495273, iteration: 394290
loss: 0.9909499287605286,grad_norm: 0.7459919721485547, iteration: 394291
loss: 0.9907883405685425,grad_norm: 0.8910538464966467, iteration: 394292
loss: 0.9792881011962891,grad_norm: 0.7350103494879745, iteration: 394293
loss: 0.9847605228424072,grad_norm: 0.72812870436552, iteration: 394294
loss: 1.0325233936309814,grad_norm: 0.9999993261951782, iteration: 394295
loss: 1.0749666690826416,grad_norm: 0.9999997192503509, iteration: 394296
loss: 0.9942173957824707,grad_norm: 0.6871435508772112, iteration: 394297
loss: 1.1578236818313599,grad_norm: 0.9999994355614271, iteration: 394298
loss: 1.0297085046768188,grad_norm: 0.8983041938251208, iteration: 394299
loss: 0.9871320724487305,grad_norm: 0.8173328073613905, iteration: 394300
loss: 0.9959030151367188,grad_norm: 0.7760242082171168, iteration: 394301
loss: 1.0481746196746826,grad_norm: 0.9999995646440492, iteration: 394302
loss: 1.0776244401931763,grad_norm: 0.7514185493881304, iteration: 394303
loss: 1.009974479675293,grad_norm: 0.9999994422112323, iteration: 394304
loss: 1.04341721534729,grad_norm: 0.7575916510836396, iteration: 394305
loss: 1.0163524150848389,grad_norm: 0.7801705609642403, iteration: 394306
loss: 1.0198850631713867,grad_norm: 0.8437463957400588, iteration: 394307
loss: 0.9694997072219849,grad_norm: 0.6703035558974608, iteration: 394308
loss: 0.9882647395133972,grad_norm: 0.9149786160464112, iteration: 394309
loss: 1.16018545627594,grad_norm: 0.880057781522061, iteration: 394310
loss: 0.9905118346214294,grad_norm: 0.7705306839976979, iteration: 394311
loss: 1.001466989517212,grad_norm: 0.7085470252907015, iteration: 394312
loss: 1.0886518955230713,grad_norm: 0.9999994942935511, iteration: 394313
loss: 1.000472068786621,grad_norm: 1.0000001023355254, iteration: 394314
loss: 1.0382697582244873,grad_norm: 0.8224649589145905, iteration: 394315
loss: 0.9798945188522339,grad_norm: 0.8897624126917533, iteration: 394316
loss: 1.08830726146698,grad_norm: 0.9999993760287191, iteration: 394317
loss: 1.006683349609375,grad_norm: 0.751732243897206, iteration: 394318
loss: 1.0119534730911255,grad_norm: 0.7792804044593444, iteration: 394319
loss: 0.9819230437278748,grad_norm: 0.7570456414616359, iteration: 394320
loss: 1.025641679763794,grad_norm: 0.9999992345857337, iteration: 394321
loss: 0.9942837953567505,grad_norm: 0.8718083259242909, iteration: 394322
loss: 1.0451984405517578,grad_norm: 0.9999994723202483, iteration: 394323
loss: 0.9935810565948486,grad_norm: 0.7417902705017866, iteration: 394324
loss: 0.9894928932189941,grad_norm: 0.8999799148286476, iteration: 394325
loss: 1.0652498006820679,grad_norm: 0.8227668945868633, iteration: 394326
loss: 1.026395559310913,grad_norm: 0.999999226578705, iteration: 394327
loss: 1.0174425840377808,grad_norm: 0.8183622541719083, iteration: 394328
loss: 1.0134094953536987,grad_norm: 0.8810738020978212, iteration: 394329
loss: 1.0263704061508179,grad_norm: 0.8002053915276283, iteration: 394330
loss: 0.9753377437591553,grad_norm: 0.6802177495850048, iteration: 394331
loss: 0.9777454137802124,grad_norm: 0.8442884693329845, iteration: 394332
loss: 0.9740151762962341,grad_norm: 0.8764554091216447, iteration: 394333
loss: 0.9733568429946899,grad_norm: 0.8124818405551733, iteration: 394334
loss: 1.058797001838684,grad_norm: 0.9999990518352586, iteration: 394335
loss: 1.0581556558609009,grad_norm: 0.91033277324835, iteration: 394336
loss: 0.998024046421051,grad_norm: 0.7864799486945158, iteration: 394337
loss: 1.021030068397522,grad_norm: 0.8792149857247262, iteration: 394338
loss: 1.1148159503936768,grad_norm: 0.9999996323878311, iteration: 394339
loss: 1.0071228742599487,grad_norm: 0.9999997506468122, iteration: 394340
loss: 1.0578923225402832,grad_norm: 0.8501275203657379, iteration: 394341
loss: 1.0146923065185547,grad_norm: 0.9999998758125938, iteration: 394342
loss: 0.9866494536399841,grad_norm: 0.8273727199976441, iteration: 394343
loss: 1.0249699354171753,grad_norm: 0.7654836663422838, iteration: 394344
loss: 1.0756977796554565,grad_norm: 0.9999998749060901, iteration: 394345
loss: 0.9657495617866516,grad_norm: 0.9999992042604429, iteration: 394346
loss: 1.0172110795974731,grad_norm: 0.9999991677187032, iteration: 394347
loss: 1.018937587738037,grad_norm: 0.9416761543487525, iteration: 394348
loss: 1.0106827020645142,grad_norm: 0.8174873615825586, iteration: 394349
loss: 1.1165101528167725,grad_norm: 1.0000000379766385, iteration: 394350
loss: 1.012151837348938,grad_norm: 0.9999997762905227, iteration: 394351
loss: 1.0079889297485352,grad_norm: 0.7929644293634484, iteration: 394352
loss: 0.9940975904464722,grad_norm: 0.9999994826650149, iteration: 394353
loss: 1.0499255657196045,grad_norm: 0.8144813208016851, iteration: 394354
loss: 0.9875289797782898,grad_norm: 0.816525224885638, iteration: 394355
loss: 0.9826099276542664,grad_norm: 0.8740813008043491, iteration: 394356
loss: 0.9966307878494263,grad_norm: 0.7703271199779388, iteration: 394357
loss: 1.020136833190918,grad_norm: 0.7665335244415272, iteration: 394358
loss: 1.0136680603027344,grad_norm: 0.9328311324261205, iteration: 394359
loss: 1.0477430820465088,grad_norm: 0.9999998968323646, iteration: 394360
loss: 1.1495870351791382,grad_norm: 0.9999995630872751, iteration: 394361
loss: 1.1092000007629395,grad_norm: 0.9999992225904714, iteration: 394362
loss: 1.0945481061935425,grad_norm: 0.9999994778175635, iteration: 394363
loss: 1.141021966934204,grad_norm: 0.9454252473314335, iteration: 394364
loss: 1.0313085317611694,grad_norm: 0.9999991337352963, iteration: 394365
loss: 1.0784155130386353,grad_norm: 0.9833696886143531, iteration: 394366
loss: 1.0003867149353027,grad_norm: 0.8288874831326887, iteration: 394367
loss: 1.0357897281646729,grad_norm: 0.9999998025482384, iteration: 394368
loss: 0.9671320915222168,grad_norm: 0.7408486289945606, iteration: 394369
loss: 1.0049818754196167,grad_norm: 0.719248596442628, iteration: 394370
loss: 1.0121549367904663,grad_norm: 0.7509922759780128, iteration: 394371
loss: 1.07102632522583,grad_norm: 0.9973580394402289, iteration: 394372
loss: 0.9929586052894592,grad_norm: 0.8666029277651288, iteration: 394373
loss: 1.0413610935211182,grad_norm: 0.9367801656551751, iteration: 394374
loss: 1.0479607582092285,grad_norm: 0.9999996334666034, iteration: 394375
loss: 1.070274829864502,grad_norm: 0.9999998624073311, iteration: 394376
loss: 1.1192426681518555,grad_norm: 0.999999729679706, iteration: 394377
loss: 1.0050303936004639,grad_norm: 0.6274631430661016, iteration: 394378
loss: 1.008570671081543,grad_norm: 0.8474362043973594, iteration: 394379
loss: 0.9577738642692566,grad_norm: 0.7240196506317244, iteration: 394380
loss: 1.1306569576263428,grad_norm: 0.9999993127903949, iteration: 394381
loss: 1.0798953771591187,grad_norm: 0.9999999396600789, iteration: 394382
loss: 1.0239667892456055,grad_norm: 0.8760981021222938, iteration: 394383
loss: 1.0968146324157715,grad_norm: 0.999999047739687, iteration: 394384
loss: 1.0567113161087036,grad_norm: 0.6813095306974977, iteration: 394385
loss: 1.0121361017227173,grad_norm: 0.6720065039370329, iteration: 394386
loss: 1.0627378225326538,grad_norm: 0.8502177319334313, iteration: 394387
loss: 1.0423762798309326,grad_norm: 0.9999993179220737, iteration: 394388
loss: 1.0390841960906982,grad_norm: 0.8562104068972902, iteration: 394389
loss: 1.0119829177856445,grad_norm: 0.7864226935727692, iteration: 394390
loss: 1.004622459411621,grad_norm: 0.8371565010096734, iteration: 394391
loss: 1.00710129737854,grad_norm: 0.9635181539333365, iteration: 394392
loss: 1.006705641746521,grad_norm: 0.6850213585375577, iteration: 394393
loss: 1.0451297760009766,grad_norm: 0.8539656746199622, iteration: 394394
loss: 1.0073529481887817,grad_norm: 0.7118265086109676, iteration: 394395
loss: 1.021834135055542,grad_norm: 0.9999991565288531, iteration: 394396
loss: 0.9781128168106079,grad_norm: 0.6934318607935027, iteration: 394397
loss: 1.0054740905761719,grad_norm: 0.6818507644409895, iteration: 394398
loss: 0.9856679439544678,grad_norm: 0.9999994456232393, iteration: 394399
loss: 1.0125248432159424,grad_norm: 0.7785848606536655, iteration: 394400
loss: 1.0490854978561401,grad_norm: 0.9999997513838222, iteration: 394401
loss: 0.9977307915687561,grad_norm: 0.7955323152537868, iteration: 394402
loss: 0.9561636447906494,grad_norm: 0.9376428694359896, iteration: 394403
loss: 0.9816890954971313,grad_norm: 0.8259406390101238, iteration: 394404
loss: 0.9624963998794556,grad_norm: 0.8680053228733592, iteration: 394405
loss: 0.9931391477584839,grad_norm: 0.8666622253511368, iteration: 394406
loss: 0.9575296640396118,grad_norm: 0.6840686147994828, iteration: 394407
loss: 0.9978459477424622,grad_norm: 0.7205144038890758, iteration: 394408
loss: 0.9963765740394592,grad_norm: 0.7733742052291313, iteration: 394409
loss: 0.9950926303863525,grad_norm: 0.7496956690977371, iteration: 394410
loss: 1.002695918083191,grad_norm: 0.7950919587834371, iteration: 394411
loss: 0.9839394688606262,grad_norm: 0.7860448581563213, iteration: 394412
loss: 1.0054060220718384,grad_norm: 0.8218735732367339, iteration: 394413
loss: 0.9824430346488953,grad_norm: 0.7177063414323924, iteration: 394414
loss: 1.0323550701141357,grad_norm: 0.8447131860837614, iteration: 394415
loss: 1.0051554441452026,grad_norm: 0.783070341253032, iteration: 394416
loss: 1.0274369716644287,grad_norm: 0.9455276045368467, iteration: 394417
loss: 0.9837219715118408,grad_norm: 0.9628334221824179, iteration: 394418
loss: 1.0059466361999512,grad_norm: 0.7035792810069746, iteration: 394419
loss: 1.061013102531433,grad_norm: 0.9999991061746334, iteration: 394420
loss: 1.0042154788970947,grad_norm: 0.7767524737889202, iteration: 394421
loss: 1.0665063858032227,grad_norm: 0.9999995420366846, iteration: 394422
loss: 1.0249756574630737,grad_norm: 0.9540047004659108, iteration: 394423
loss: 1.0188876390457153,grad_norm: 0.7052316169064451, iteration: 394424
loss: 0.992107093334198,grad_norm: 0.9999994692335622, iteration: 394425
loss: 0.9977814555168152,grad_norm: 0.999999598102425, iteration: 394426
loss: 1.011614203453064,grad_norm: 0.8155528460281932, iteration: 394427
loss: 1.0229252576828003,grad_norm: 0.7060638736389836, iteration: 394428
loss: 0.9948599934577942,grad_norm: 0.7597237119071886, iteration: 394429
loss: 1.0211677551269531,grad_norm: 0.8094508267589156, iteration: 394430
loss: 1.016309380531311,grad_norm: 0.7883525337653575, iteration: 394431
loss: 1.023299217224121,grad_norm: 0.8971226792760293, iteration: 394432
loss: 1.0866717100143433,grad_norm: 0.9020775434168737, iteration: 394433
loss: 1.0098505020141602,grad_norm: 0.7728991487393561, iteration: 394434
loss: 0.9751776456832886,grad_norm: 0.9291707746924741, iteration: 394435
loss: 1.02480149269104,grad_norm: 0.7676253858374481, iteration: 394436
loss: 1.0399614572525024,grad_norm: 0.9242704500509267, iteration: 394437
loss: 1.0301578044891357,grad_norm: 0.9999998931738047, iteration: 394438
loss: 1.0228692293167114,grad_norm: 0.639789469459974, iteration: 394439
loss: 1.0656001567840576,grad_norm: 1.0000000205622432, iteration: 394440
loss: 0.9731113314628601,grad_norm: 0.8066331302230465, iteration: 394441
loss: 0.9978298544883728,grad_norm: 0.7360722428474875, iteration: 394442
loss: 1.0395562648773193,grad_norm: 0.9576474999715766, iteration: 394443
loss: 0.9861297607421875,grad_norm: 0.8035830049926147, iteration: 394444
loss: 1.0023056268692017,grad_norm: 0.8214612554982422, iteration: 394445
loss: 0.9970239400863647,grad_norm: 0.6432861164163051, iteration: 394446
loss: 1.01529860496521,grad_norm: 0.7156896235878393, iteration: 394447
loss: 1.1243319511413574,grad_norm: 0.9999994590977366, iteration: 394448
loss: 0.9854333996772766,grad_norm: 0.8096005665500744, iteration: 394449
loss: 1.005639910697937,grad_norm: 0.868454255874834, iteration: 394450
loss: 1.003196120262146,grad_norm: 0.9999990601811412, iteration: 394451
loss: 1.0046578645706177,grad_norm: 0.9999993220618457, iteration: 394452
loss: 0.9688541889190674,grad_norm: 0.9999994199069736, iteration: 394453
loss: 0.9992994666099548,grad_norm: 0.8699155312731947, iteration: 394454
loss: 1.0274274349212646,grad_norm: 0.9325517616396386, iteration: 394455
loss: 1.0210283994674683,grad_norm: 0.8415838913291602, iteration: 394456
loss: 0.967534601688385,grad_norm: 0.7129834740846435, iteration: 394457
loss: 0.9851876497268677,grad_norm: 0.9999993064675545, iteration: 394458
loss: 1.0323792695999146,grad_norm: 0.8188728958898281, iteration: 394459
loss: 0.9822750091552734,grad_norm: 0.7315902451454359, iteration: 394460
loss: 1.099868893623352,grad_norm: 0.999999455058473, iteration: 394461
loss: 0.974450409412384,grad_norm: 0.8451915046001197, iteration: 394462
loss: 0.9940772652626038,grad_norm: 0.889549731677197, iteration: 394463
loss: 1.0018573999404907,grad_norm: 0.7301144403609808, iteration: 394464
loss: 0.9760900735855103,grad_norm: 0.9286567042330728, iteration: 394465
loss: 0.9804902672767639,grad_norm: 0.9308495729614489, iteration: 394466
loss: 0.9725099802017212,grad_norm: 0.9095197231033405, iteration: 394467
loss: 1.029238224029541,grad_norm: 0.9561038984755273, iteration: 394468
loss: 1.0477396249771118,grad_norm: 0.8629956185938688, iteration: 394469
loss: 1.0014103651046753,grad_norm: 0.9999992968328911, iteration: 394470
loss: 0.977319061756134,grad_norm: 0.7312351910294117, iteration: 394471
loss: 1.0067487955093384,grad_norm: 0.7460805719166524, iteration: 394472
loss: 1.0582619905471802,grad_norm: 0.8017306861303906, iteration: 394473
loss: 0.977716863155365,grad_norm: 0.8309080032111257, iteration: 394474
loss: 1.000551700592041,grad_norm: 0.7668899924698505, iteration: 394475
loss: 1.0058475732803345,grad_norm: 0.8028512167668808, iteration: 394476
loss: 0.9785501956939697,grad_norm: 0.9367612472970955, iteration: 394477
loss: 1.0074056386947632,grad_norm: 0.9350108231928702, iteration: 394478
loss: 0.9907562732696533,grad_norm: 0.9471055822535341, iteration: 394479
loss: 1.0633738040924072,grad_norm: 0.720515461253419, iteration: 394480
loss: 1.008644938468933,grad_norm: 0.6862383838187827, iteration: 394481
loss: 1.0104624032974243,grad_norm: 0.8322851132570409, iteration: 394482
loss: 0.9930906295776367,grad_norm: 0.7223295265984339, iteration: 394483
loss: 0.985860288143158,grad_norm: 0.7214631366705473, iteration: 394484
loss: 0.9640092253684998,grad_norm: 0.8956069480270245, iteration: 394485
loss: 0.9808282852172852,grad_norm: 0.8359778991144755, iteration: 394486
loss: 0.9179681539535522,grad_norm: 0.8380005370705326, iteration: 394487
loss: 1.026633381843567,grad_norm: 0.7215325984760498, iteration: 394488
loss: 1.0423134565353394,grad_norm: 0.652413907016111, iteration: 394489
loss: 0.9907428026199341,grad_norm: 0.8354655594177566, iteration: 394490
loss: 1.0825777053833008,grad_norm: 0.9999998412971904, iteration: 394491
loss: 0.9915673732757568,grad_norm: 0.9999995446384166, iteration: 394492
loss: 0.9587239027023315,grad_norm: 0.7919040526555372, iteration: 394493
loss: 0.9710336327552795,grad_norm: 0.7002271967068834, iteration: 394494
loss: 0.995405912399292,grad_norm: 0.7330137865666083, iteration: 394495
loss: 0.980648398399353,grad_norm: 0.8801975288584613, iteration: 394496
loss: 1.000455379486084,grad_norm: 0.8196462678797387, iteration: 394497
loss: 1.0426783561706543,grad_norm: 0.7217072648827467, iteration: 394498
loss: 1.0109747648239136,grad_norm: 0.8181759722412245, iteration: 394499
loss: 1.0247496366500854,grad_norm: 0.9999993920419353, iteration: 394500
loss: 1.0069867372512817,grad_norm: 0.829787110326557, iteration: 394501
loss: 0.9997677803039551,grad_norm: 0.9999990551119549, iteration: 394502
loss: 1.011283278465271,grad_norm: 0.9999993955308102, iteration: 394503
loss: 1.0212998390197754,grad_norm: 0.7922637572880312, iteration: 394504
loss: 0.9941434860229492,grad_norm: 0.9999999503376833, iteration: 394505
loss: 1.014440894126892,grad_norm: 0.8991399810930961, iteration: 394506
loss: 1.0041216611862183,grad_norm: 0.9998124970015967, iteration: 394507
loss: 0.9911982417106628,grad_norm: 0.8040204317468187, iteration: 394508
loss: 0.9733778834342957,grad_norm: 0.85072964502967, iteration: 394509
loss: 1.0367096662521362,grad_norm: 1.0000000191981797, iteration: 394510
loss: 1.0465466976165771,grad_norm: 0.8715541524826302, iteration: 394511
loss: 1.0060046911239624,grad_norm: 0.8439557926444755, iteration: 394512
loss: 0.9843760132789612,grad_norm: 0.8892442441188216, iteration: 394513
loss: 0.9802122712135315,grad_norm: 0.779883656714628, iteration: 394514
loss: 1.0162975788116455,grad_norm: 0.7348094724201636, iteration: 394515
loss: 1.0081403255462646,grad_norm: 0.8320235761204313, iteration: 394516
loss: 0.981436550617218,grad_norm: 0.8139345162901147, iteration: 394517
loss: 1.0028972625732422,grad_norm: 0.8909507463275307, iteration: 394518
loss: 1.0197612047195435,grad_norm: 0.8249557672261892, iteration: 394519
loss: 1.0173259973526,grad_norm: 0.9999994516040971, iteration: 394520
loss: 1.0172737836837769,grad_norm: 0.9999992401991468, iteration: 394521
loss: 0.9876536130905151,grad_norm: 1.0000000023827407, iteration: 394522
loss: 0.9775298237800598,grad_norm: 0.9662491745027213, iteration: 394523
loss: 1.0218247175216675,grad_norm: 0.7534029495289872, iteration: 394524
loss: 1.0554234981536865,grad_norm: 0.9664747301702881, iteration: 394525
loss: 1.0466595888137817,grad_norm: 0.7535547700821902, iteration: 394526
loss: 1.0102986097335815,grad_norm: 0.9999993874594502, iteration: 394527
loss: 1.0486854314804077,grad_norm: 0.999999540385638, iteration: 394528
loss: 1.0351972579956055,grad_norm: 0.9999993151294021, iteration: 394529
loss: 1.0196294784545898,grad_norm: 0.9555069180605714, iteration: 394530
loss: 1.038418173789978,grad_norm: 0.9999999013944718, iteration: 394531
loss: 1.0423129796981812,grad_norm: 0.9999992508593084, iteration: 394532
loss: 1.008696436882019,grad_norm: 0.744267648846094, iteration: 394533
loss: 1.011527419090271,grad_norm: 0.7750707558789037, iteration: 394534
loss: 0.9574934840202332,grad_norm: 0.898172394915824, iteration: 394535
loss: 1.0384317636489868,grad_norm: 0.9999998906343948, iteration: 394536
loss: 1.0232988595962524,grad_norm: 0.7077864743955736, iteration: 394537
loss: 1.0346921682357788,grad_norm: 0.9318601074918315, iteration: 394538
loss: 1.0082277059555054,grad_norm: 0.9105697382161816, iteration: 394539
loss: 1.0428004264831543,grad_norm: 0.8097708094398731, iteration: 394540
loss: 0.9991446137428284,grad_norm: 0.7737533615603092, iteration: 394541
loss: 1.015455961227417,grad_norm: 0.6721654998171227, iteration: 394542
loss: 1.0077648162841797,grad_norm: 0.6593547726863521, iteration: 394543
loss: 0.9753445982933044,grad_norm: 0.7885953989966233, iteration: 394544
loss: 1.0067017078399658,grad_norm: 0.953463183059185, iteration: 394545
loss: 0.9984346032142639,grad_norm: 0.8125675272496387, iteration: 394546
loss: 1.0025619268417358,grad_norm: 0.8040779027697486, iteration: 394547
loss: 0.9662555456161499,grad_norm: 0.7627902416083987, iteration: 394548
loss: 1.0105518102645874,grad_norm: 0.7902618481096007, iteration: 394549
loss: 1.030705451965332,grad_norm: 0.9999993197573392, iteration: 394550
loss: 1.0148807764053345,grad_norm: 0.7001262795331064, iteration: 394551
loss: 1.0105222463607788,grad_norm: 0.7968999832204667, iteration: 394552
loss: 1.0430784225463867,grad_norm: 0.9585401613439405, iteration: 394553
loss: 1.0106103420257568,grad_norm: 0.7021679266804982, iteration: 394554
loss: 1.005370020866394,grad_norm: 0.8080940960778019, iteration: 394555
loss: 1.0925734043121338,grad_norm: 0.9919892775021376, iteration: 394556
loss: 1.0517915487289429,grad_norm: 0.8891631807105923, iteration: 394557
loss: 1.0229167938232422,grad_norm: 0.7377089247037008, iteration: 394558
loss: 1.0026065111160278,grad_norm: 0.837438319714299, iteration: 394559
loss: 0.9757956266403198,grad_norm: 0.6842280524301981, iteration: 394560
loss: 0.9941417574882507,grad_norm: 0.6314983762381476, iteration: 394561
loss: 1.0192347764968872,grad_norm: 0.7452380954953575, iteration: 394562
loss: 0.9571334719657898,grad_norm: 0.7999515819649481, iteration: 394563
loss: 1.0162711143493652,grad_norm: 0.8303672412682864, iteration: 394564
loss: 1.0231554508209229,grad_norm: 0.9630351814469661, iteration: 394565
loss: 1.0100681781768799,grad_norm: 0.9572942215014147, iteration: 394566
loss: 1.0054858922958374,grad_norm: 0.6650694921825692, iteration: 394567
loss: 1.017401933670044,grad_norm: 0.9999992513820614, iteration: 394568
loss: 0.9780120849609375,grad_norm: 0.8432631916276919, iteration: 394569
loss: 1.0082379579544067,grad_norm: 0.8506110458240553, iteration: 394570
loss: 0.94227135181427,grad_norm: 0.8557800946294114, iteration: 394571
loss: 0.9670600891113281,grad_norm: 0.9999998410247832, iteration: 394572
loss: 1.0298151969909668,grad_norm: 0.9999992397272902, iteration: 394573
loss: 1.001112461090088,grad_norm: 0.7837065950971676, iteration: 394574
loss: 1.0039639472961426,grad_norm: 0.7995313103687246, iteration: 394575
loss: 1.047327995300293,grad_norm: 0.9999997725432869, iteration: 394576
loss: 1.0051766633987427,grad_norm: 0.6939173024775865, iteration: 394577
loss: 1.0168792009353638,grad_norm: 0.9999999467981007, iteration: 394578
loss: 1.0115309953689575,grad_norm: 0.7774217791055404, iteration: 394579
loss: 0.9778035879135132,grad_norm: 0.715653889087759, iteration: 394580
loss: 1.0115909576416016,grad_norm: 0.9999991756403025, iteration: 394581
loss: 1.0600301027297974,grad_norm: 0.999999463155259, iteration: 394582
loss: 1.0012528896331787,grad_norm: 0.7824522316753628, iteration: 394583
loss: 0.9756911993026733,grad_norm: 0.8297930458192777, iteration: 394584
loss: 0.9938175678253174,grad_norm: 0.772347128769526, iteration: 394585
loss: 1.0029593706130981,grad_norm: 0.7890605597743152, iteration: 394586
loss: 0.9969045519828796,grad_norm: 0.8175752509647901, iteration: 394587
loss: 0.9641692638397217,grad_norm: 0.8253298624320995, iteration: 394588
loss: 1.1272997856140137,grad_norm: 0.9999993132986983, iteration: 394589
loss: 1.012086033821106,grad_norm: 0.9360110738224922, iteration: 394590
loss: 1.042966365814209,grad_norm: 0.999999489539632, iteration: 394591
loss: 1.1341814994812012,grad_norm: 0.9999998715396862, iteration: 394592
loss: 1.0241754055023193,grad_norm: 0.9999997987658381, iteration: 394593
loss: 1.0422239303588867,grad_norm: 0.9999997249804925, iteration: 394594
loss: 0.9885329604148865,grad_norm: 0.7110424368091204, iteration: 394595
loss: 1.0342347621917725,grad_norm: 0.9999993857694209, iteration: 394596
loss: 1.004151701927185,grad_norm: 0.7451110611225293, iteration: 394597
loss: 1.0004843473434448,grad_norm: 0.7167854141822759, iteration: 394598
loss: 1.0017644166946411,grad_norm: 0.9999997476832255, iteration: 394599
loss: 0.984972357749939,grad_norm: 0.7502660166630143, iteration: 394600
loss: 0.9727239012718201,grad_norm: 0.8277441510018969, iteration: 394601
loss: 1.0039184093475342,grad_norm: 0.8708705792783384, iteration: 394602
loss: 1.0505608320236206,grad_norm: 1.0000000687695918, iteration: 394603
loss: 0.9974119663238525,grad_norm: 0.7908504402777625, iteration: 394604
loss: 1.0631918907165527,grad_norm: 0.9999990569822451, iteration: 394605
loss: 0.9758284091949463,grad_norm: 0.7338695803353192, iteration: 394606
loss: 0.9693288207054138,grad_norm: 0.9692781144754049, iteration: 394607
loss: 0.9997752904891968,grad_norm: 0.8197551682984551, iteration: 394608
loss: 1.0181602239608765,grad_norm: 0.9999993655130922, iteration: 394609
loss: 0.9849370121955872,grad_norm: 0.8731646821650799, iteration: 394610
loss: 1.0291054248809814,grad_norm: 0.7325401743826582, iteration: 394611
loss: 1.0398141145706177,grad_norm: 0.9333285286099037, iteration: 394612
loss: 1.1299662590026855,grad_norm: 0.9999994003308789, iteration: 394613
loss: 1.0177110433578491,grad_norm: 0.9999991835198238, iteration: 394614
loss: 1.007901906967163,grad_norm: 0.7966899535251142, iteration: 394615
loss: 1.0472896099090576,grad_norm: 0.9999998805151136, iteration: 394616
loss: 0.9424669146537781,grad_norm: 0.7525669512050179, iteration: 394617
loss: 1.0041583776474,grad_norm: 0.8407547490751399, iteration: 394618
loss: 0.9630309343338013,grad_norm: 0.8326482833037419, iteration: 394619
loss: 1.0008819103240967,grad_norm: 0.7562182811049324, iteration: 394620
loss: 0.998839259147644,grad_norm: 0.8726422497148307, iteration: 394621
loss: 1.0378541946411133,grad_norm: 0.8701323602428016, iteration: 394622
loss: 0.9839892387390137,grad_norm: 0.8166629616439284, iteration: 394623
loss: 0.9712255597114563,grad_norm: 0.7856773989927189, iteration: 394624
loss: 1.0080509185791016,grad_norm: 0.9999990811828353, iteration: 394625
loss: 1.0083644390106201,grad_norm: 0.9433654248638207, iteration: 394626
loss: 0.9811955690383911,grad_norm: 0.8411009753483781, iteration: 394627
loss: 0.9995085597038269,grad_norm: 0.7554679778597284, iteration: 394628
loss: 1.025352954864502,grad_norm: 0.8635105564581195, iteration: 394629
loss: 1.0027060508728027,grad_norm: 0.7289126108089768, iteration: 394630
loss: 1.0372673273086548,grad_norm: 0.8058332159193522, iteration: 394631
loss: 0.9975014925003052,grad_norm: 0.7001802462368288, iteration: 394632
loss: 1.016002893447876,grad_norm: 0.9999998681080108, iteration: 394633
loss: 1.0550689697265625,grad_norm: 0.8561051958300538, iteration: 394634
loss: 0.9913020133972168,grad_norm: 0.776036589037525, iteration: 394635
loss: 1.0526981353759766,grad_norm: 0.8149687665703459, iteration: 394636
loss: 1.0268117189407349,grad_norm: 0.9592401419077698, iteration: 394637
loss: 1.0355372428894043,grad_norm: 0.9999994563604884, iteration: 394638
loss: 0.9828673601150513,grad_norm: 0.9189751122247354, iteration: 394639
loss: 0.9717652797698975,grad_norm: 0.7550632126498461, iteration: 394640
loss: 0.9912742376327515,grad_norm: 0.8302358349023556, iteration: 394641
loss: 0.9885199666023254,grad_norm: 0.7938136855139456, iteration: 394642
loss: 0.9915722608566284,grad_norm: 0.7409691703453472, iteration: 394643
loss: 1.0007323026657104,grad_norm: 0.9207618683479146, iteration: 394644
loss: 1.0168203115463257,grad_norm: 0.8938386250810836, iteration: 394645
loss: 0.995979368686676,grad_norm: 0.6357100189464979, iteration: 394646
loss: 1.0889075994491577,grad_norm: 0.9842220628701104, iteration: 394647
loss: 1.0408090353012085,grad_norm: 0.9999996593811445, iteration: 394648
loss: 0.9943992495536804,grad_norm: 0.6991601094328082, iteration: 394649
loss: 1.0053505897521973,grad_norm: 0.7222172570502107, iteration: 394650
loss: 0.9960493445396423,grad_norm: 0.8707911277689149, iteration: 394651
loss: 1.0109095573425293,grad_norm: 0.9379551800038506, iteration: 394652
loss: 1.0033280849456787,grad_norm: 0.6937483619552467, iteration: 394653
loss: 1.0124825239181519,grad_norm: 0.9536448221148844, iteration: 394654
loss: 1.007380723953247,grad_norm: 0.806451051908025, iteration: 394655
loss: 0.9964451789855957,grad_norm: 0.8467260697629514, iteration: 394656
loss: 1.0038560628890991,grad_norm: 0.7847046374093511, iteration: 394657
loss: 0.9984056353569031,grad_norm: 0.7008155354730475, iteration: 394658
loss: 0.9710828065872192,grad_norm: 0.7993310723815948, iteration: 394659
loss: 1.012868881225586,grad_norm: 0.9145742034258718, iteration: 394660
loss: 1.0206992626190186,grad_norm: 0.7723320167635827, iteration: 394661
loss: 1.053197979927063,grad_norm: 0.7930863890170468, iteration: 394662
loss: 1.085375428199768,grad_norm: 0.7263730217613288, iteration: 394663
loss: 1.005963921546936,grad_norm: 0.7916982629226864, iteration: 394664
loss: 0.9817113876342773,grad_norm: 0.956172689151598, iteration: 394665
loss: 0.9965634942054749,grad_norm: 0.7031951421044138, iteration: 394666
loss: 1.0111687183380127,grad_norm: 0.9999991726873582, iteration: 394667
loss: 0.9462000727653503,grad_norm: 0.7225004117970022, iteration: 394668
loss: 1.1094202995300293,grad_norm: 0.9999998256219489, iteration: 394669
loss: 0.9819616675376892,grad_norm: 0.7675140919382734, iteration: 394670
loss: 1.0580432415008545,grad_norm: 0.8010707226544223, iteration: 394671
loss: 1.0300449132919312,grad_norm: 0.9999996681603436, iteration: 394672
loss: 1.000958800315857,grad_norm: 0.7914519915144415, iteration: 394673
loss: 1.0335158109664917,grad_norm: 0.7921159040742365, iteration: 394674
loss: 1.0181126594543457,grad_norm: 0.7088566650668411, iteration: 394675
loss: 0.9927650094032288,grad_norm: 0.8086866318790901, iteration: 394676
loss: 1.0169540643692017,grad_norm: 0.8665250491979476, iteration: 394677
loss: 1.0058820247650146,grad_norm: 0.9999991740025829, iteration: 394678
loss: 1.0135220289230347,grad_norm: 0.8896587345879998, iteration: 394679
loss: 0.9846705794334412,grad_norm: 0.9999990849068612, iteration: 394680
loss: 0.9604811668395996,grad_norm: 0.6483932759308467, iteration: 394681
loss: 1.0188950300216675,grad_norm: 0.811818212332927, iteration: 394682
loss: 1.001965880393982,grad_norm: 0.7697216570610157, iteration: 394683
loss: 0.9883598685264587,grad_norm: 0.7813217333937151, iteration: 394684
loss: 1.0435818433761597,grad_norm: 0.8715759626258522, iteration: 394685
loss: 0.9840227961540222,grad_norm: 0.9999992210908557, iteration: 394686
loss: 1.037787914276123,grad_norm: 0.7804532548244975, iteration: 394687
loss: 1.033422827720642,grad_norm: 0.99999940384943, iteration: 394688
loss: 0.9905507564544678,grad_norm: 0.7549057964271269, iteration: 394689
loss: 1.0011539459228516,grad_norm: 0.8191687055194462, iteration: 394690
loss: 0.9687054753303528,grad_norm: 0.8032933488371511, iteration: 394691
loss: 1.129961609840393,grad_norm: 0.9999991655763566, iteration: 394692
loss: 0.9875167608261108,grad_norm: 0.7828963555572614, iteration: 394693
loss: 0.9967461824417114,grad_norm: 0.6813135369544726, iteration: 394694
loss: 1.020738124847412,grad_norm: 0.7834371407450131, iteration: 394695
loss: 1.0387845039367676,grad_norm: 0.8889444147160941, iteration: 394696
loss: 1.0061836242675781,grad_norm: 0.9227825685227333, iteration: 394697
loss: 1.008591890335083,grad_norm: 0.7505790214236472, iteration: 394698
loss: 0.9755951762199402,grad_norm: 0.6874730087711515, iteration: 394699
loss: 0.9744128584861755,grad_norm: 0.6847576906638816, iteration: 394700
loss: 1.0001987218856812,grad_norm: 0.6886968575146146, iteration: 394701
loss: 0.9774481654167175,grad_norm: 0.9999991226178628, iteration: 394702
loss: 0.9837521910667419,grad_norm: 0.7608664737373065, iteration: 394703
loss: 0.9929777979850769,grad_norm: 0.9999995913716863, iteration: 394704
loss: 1.0149857997894287,grad_norm: 0.8022996036527773, iteration: 394705
loss: 1.001004934310913,grad_norm: 0.7216750992768769, iteration: 394706
loss: 0.9821446537971497,grad_norm: 0.83438734993856, iteration: 394707
loss: 1.1383172273635864,grad_norm: 0.9999992004782665, iteration: 394708
loss: 0.9798731207847595,grad_norm: 0.8132651039058839, iteration: 394709
loss: 0.9656466245651245,grad_norm: 0.9492218382816193, iteration: 394710
loss: 1.087494969367981,grad_norm: 0.9999997214489673, iteration: 394711
loss: 0.9999788403511047,grad_norm: 0.8085309043578951, iteration: 394712
loss: 0.9914231896400452,grad_norm: 0.6641511885425712, iteration: 394713
loss: 1.015535593032837,grad_norm: 0.7767243135624438, iteration: 394714
loss: 1.0057207345962524,grad_norm: 0.8374928482052332, iteration: 394715
loss: 1.0007083415985107,grad_norm: 0.9296769817995219, iteration: 394716
loss: 1.000662922859192,grad_norm: 0.7145216525042861, iteration: 394717
loss: 0.9527591466903687,grad_norm: 0.7959798661116655, iteration: 394718
loss: 0.98396897315979,grad_norm: 0.7923856131787762, iteration: 394719
loss: 0.9907321333885193,grad_norm: 0.9999998691502612, iteration: 394720
loss: 1.0571630001068115,grad_norm: 0.8986068795700881, iteration: 394721
loss: 1.0075753927230835,grad_norm: 0.999999123441562, iteration: 394722
loss: 1.007559895515442,grad_norm: 0.755464798454092, iteration: 394723
loss: 1.0125360488891602,grad_norm: 0.6989279878795982, iteration: 394724
loss: 1.0098938941955566,grad_norm: 0.8165890813833546, iteration: 394725
loss: 0.9905415177345276,grad_norm: 0.9284305175097829, iteration: 394726
loss: 0.9708663821220398,grad_norm: 0.8885414133310989, iteration: 394727
loss: 0.9949724674224854,grad_norm: 0.7436921072386103, iteration: 394728
loss: 0.9956212639808655,grad_norm: 0.6594636255473133, iteration: 394729
loss: 1.0219078063964844,grad_norm: 0.8377751929239287, iteration: 394730
loss: 1.0100845098495483,grad_norm: 0.8826314366842536, iteration: 394731
loss: 1.0150039196014404,grad_norm: 0.7739977203752816, iteration: 394732
loss: 1.0046964883804321,grad_norm: 0.7184168560092979, iteration: 394733
loss: 0.9921971559524536,grad_norm: 0.579072119607263, iteration: 394734
loss: 0.95032799243927,grad_norm: 0.7924563038526093, iteration: 394735
loss: 1.0164027214050293,grad_norm: 0.6837783115565834, iteration: 394736
loss: 0.9963296055793762,grad_norm: 0.9999994001205722, iteration: 394737
loss: 1.013868808746338,grad_norm: 0.7099385060165576, iteration: 394738
loss: 1.0239806175231934,grad_norm: 0.888697347350476, iteration: 394739
loss: 0.9361799359321594,grad_norm: 0.6783720843768938, iteration: 394740
loss: 1.005174994468689,grad_norm: 0.8674276815310629, iteration: 394741
loss: 0.9898927807807922,grad_norm: 0.7874243018995598, iteration: 394742
loss: 0.9873980283737183,grad_norm: 0.8820438031518618, iteration: 394743
loss: 1.0144914388656616,grad_norm: 0.7413923694508365, iteration: 394744
loss: 1.0353705883026123,grad_norm: 0.9999991884619611, iteration: 394745
loss: 1.0127731561660767,grad_norm: 0.764692815257742, iteration: 394746
loss: 0.9816188812255859,grad_norm: 0.7582075137635894, iteration: 394747
loss: 1.0143600702285767,grad_norm: 0.779760062519589, iteration: 394748
loss: 0.9787499904632568,grad_norm: 0.8970391568350942, iteration: 394749
loss: 1.1198958158493042,grad_norm: 0.9999999397958802, iteration: 394750
loss: 1.007826805114746,grad_norm: 0.9999997177473651, iteration: 394751
loss: 1.0357275009155273,grad_norm: 0.7288019839273846, iteration: 394752
loss: 1.02950119972229,grad_norm: 0.8772305140696345, iteration: 394753
loss: 0.9561887979507446,grad_norm: 0.7765975165989728, iteration: 394754
loss: 0.9815623164176941,grad_norm: 0.9869811992390162, iteration: 394755
loss: 1.030002474784851,grad_norm: 0.9999994785606783, iteration: 394756
loss: 0.9815963506698608,grad_norm: 0.8816774752888475, iteration: 394757
loss: 0.9747162461280823,grad_norm: 0.9999992607031153, iteration: 394758
loss: 1.0491517782211304,grad_norm: 0.9875436034131924, iteration: 394759
loss: 0.972914457321167,grad_norm: 0.7200306885882566, iteration: 394760
loss: 1.0487232208251953,grad_norm: 0.999999799076355, iteration: 394761
loss: 1.0406075716018677,grad_norm: 0.9517861320490987, iteration: 394762
loss: 0.984363853931427,grad_norm: 0.8157628684967236, iteration: 394763
loss: 1.0085463523864746,grad_norm: 0.683938619533386, iteration: 394764
loss: 1.0101895332336426,grad_norm: 0.8037359590265487, iteration: 394765
loss: 1.025920033454895,grad_norm: 0.8147491651886273, iteration: 394766
loss: 1.0014983415603638,grad_norm: 0.6739208474741585, iteration: 394767
loss: 1.009390115737915,grad_norm: 0.8152952547285566, iteration: 394768
loss: 0.9833436012268066,grad_norm: 0.7215342223969339, iteration: 394769
loss: 0.9989391565322876,grad_norm: 0.7511485824654754, iteration: 394770
loss: 0.9742447733879089,grad_norm: 0.9230784125842294, iteration: 394771
loss: 0.9956792593002319,grad_norm: 0.7898832466203902, iteration: 394772
loss: 0.9876968264579773,grad_norm: 0.7151733578081977, iteration: 394773
loss: 1.0193164348602295,grad_norm: 0.8519291326787747, iteration: 394774
loss: 0.9837698340415955,grad_norm: 0.9379884798609106, iteration: 394775
loss: 0.9639331102371216,grad_norm: 0.9131821570416447, iteration: 394776
loss: 0.9708295464515686,grad_norm: 0.6988743301585603, iteration: 394777
loss: 0.9990951418876648,grad_norm: 0.8489377291656198, iteration: 394778
loss: 0.9981662631034851,grad_norm: 0.7716709873320393, iteration: 394779
loss: 0.9962716102600098,grad_norm: 0.8802904243907137, iteration: 394780
loss: 1.0241233110427856,grad_norm: 0.7606375698601078, iteration: 394781
loss: 1.0491353273391724,grad_norm: 0.9999989950482455, iteration: 394782
loss: 0.99730384349823,grad_norm: 0.8010736502001379, iteration: 394783
loss: 0.9720056056976318,grad_norm: 0.7079400495190319, iteration: 394784
loss: 1.029955267906189,grad_norm: 0.7957998115171523, iteration: 394785
loss: 1.0043281316757202,grad_norm: 0.7531872809795558, iteration: 394786
loss: 1.024907112121582,grad_norm: 0.9207293380028299, iteration: 394787
loss: 1.0432422161102295,grad_norm: 0.7757995156235634, iteration: 394788
loss: 1.034104347229004,grad_norm: 0.999999630050835, iteration: 394789
loss: 0.9890291690826416,grad_norm: 0.7226707284246304, iteration: 394790
loss: 1.0035631656646729,grad_norm: 0.9999989593282796, iteration: 394791
loss: 0.9975126385688782,grad_norm: 0.8236572764238486, iteration: 394792
loss: 0.9931361675262451,grad_norm: 0.750836476363839, iteration: 394793
loss: 0.9769760370254517,grad_norm: 0.8110990815908452, iteration: 394794
loss: 0.9895617365837097,grad_norm: 0.7993910449770009, iteration: 394795
loss: 0.950722873210907,grad_norm: 0.7996098798238729, iteration: 394796
loss: 1.0062401294708252,grad_norm: 0.9799246915607717, iteration: 394797
loss: 1.0106250047683716,grad_norm: 0.9999992072419265, iteration: 394798
loss: 1.1056944131851196,grad_norm: 0.9999995065055172, iteration: 394799
loss: 1.0115110874176025,grad_norm: 0.73220879120799, iteration: 394800
loss: 0.9583818912506104,grad_norm: 0.7498495745124502, iteration: 394801
loss: 0.9751896858215332,grad_norm: 0.7076535727192275, iteration: 394802
loss: 0.9800865650177002,grad_norm: 0.8720472965820663, iteration: 394803
loss: 1.0032647848129272,grad_norm: 0.9999995735744085, iteration: 394804
loss: 0.9947277307510376,grad_norm: 0.7590550993534522, iteration: 394805
loss: 0.9787001013755798,grad_norm: 0.6671465241663006, iteration: 394806
loss: 0.944947361946106,grad_norm: 0.8493999037415865, iteration: 394807
loss: 1.0230796337127686,grad_norm: 0.9102742445374279, iteration: 394808
loss: 1.0105351209640503,grad_norm: 0.7984379692907606, iteration: 394809
loss: 0.9952332377433777,grad_norm: 0.690661950631317, iteration: 394810
loss: 0.9956299662590027,grad_norm: 0.8771558037497629, iteration: 394811
loss: 1.0353630781173706,grad_norm: 0.742657245270735, iteration: 394812
loss: 0.9955182671546936,grad_norm: 0.8732040293533934, iteration: 394813
loss: 1.0206514596939087,grad_norm: 0.7840043367752845, iteration: 394814
loss: 0.9728003144264221,grad_norm: 0.7699452861791883, iteration: 394815
loss: 0.9519651532173157,grad_norm: 0.7574289710260615, iteration: 394816
loss: 1.0470045804977417,grad_norm: 0.9999992539692489, iteration: 394817
loss: 0.9798387885093689,grad_norm: 0.7076098574382437, iteration: 394818
loss: 0.9978988170623779,grad_norm: 0.8667501632557615, iteration: 394819
loss: 0.9944119453430176,grad_norm: 0.982824186634101, iteration: 394820
loss: 1.0263375043869019,grad_norm: 0.7462210512553324, iteration: 394821
loss: 0.9669607877731323,grad_norm: 0.7243145100853988, iteration: 394822
loss: 0.996685266494751,grad_norm: 0.7723850562911113, iteration: 394823
loss: 1.01206636428833,grad_norm: 0.8867347315601318, iteration: 394824
loss: 0.9979211688041687,grad_norm: 0.7098098293631935, iteration: 394825
loss: 1.0245784521102905,grad_norm: 0.7600241552200035, iteration: 394826
loss: 1.0231457948684692,grad_norm: 0.6694916841938906, iteration: 394827
loss: 1.0570682287216187,grad_norm: 0.9999999589443173, iteration: 394828
loss: 0.9966633915901184,grad_norm: 0.8135781763385417, iteration: 394829
loss: 0.9989823698997498,grad_norm: 0.868445269429556, iteration: 394830
loss: 0.9676480889320374,grad_norm: 0.7716161339573311, iteration: 394831
loss: 1.0141499042510986,grad_norm: 0.7907296714136633, iteration: 394832
loss: 0.9736438393592834,grad_norm: 0.9691793298444168, iteration: 394833
loss: 1.0437062978744507,grad_norm: 0.7621567696658187, iteration: 394834
loss: 0.9984496831893921,grad_norm: 0.8051547471078497, iteration: 394835
loss: 0.9919172525405884,grad_norm: 0.6891171288274895, iteration: 394836
loss: 1.0041961669921875,grad_norm: 0.8107587432731428, iteration: 394837
loss: 0.983483076095581,grad_norm: 0.7189436208083699, iteration: 394838
loss: 0.9931027293205261,grad_norm: 0.7548995253245975, iteration: 394839
loss: 1.004997968673706,grad_norm: 0.6670080270204105, iteration: 394840
loss: 0.9855573773384094,grad_norm: 0.7496499676954892, iteration: 394841
loss: 1.0144044160842896,grad_norm: 0.7350063328486964, iteration: 394842
loss: 0.9956808686256409,grad_norm: 0.917001480119157, iteration: 394843
loss: 1.0367704629898071,grad_norm: 0.7947046698467493, iteration: 394844
loss: 1.0164968967437744,grad_norm: 0.8677228529034321, iteration: 394845
loss: 1.0282771587371826,grad_norm: 0.9999991850621488, iteration: 394846
loss: 0.9976479411125183,grad_norm: 0.8553938153169584, iteration: 394847
loss: 1.0442497730255127,grad_norm: 0.7714786629614832, iteration: 394848
loss: 0.9792963862419128,grad_norm: 0.8293601934843893, iteration: 394849
loss: 0.9712302088737488,grad_norm: 0.7685068234873217, iteration: 394850
loss: 1.0201269388198853,grad_norm: 0.99999937866987, iteration: 394851
loss: 1.0105602741241455,grad_norm: 0.9999991433199246, iteration: 394852
loss: 1.0808132886886597,grad_norm: 0.7605320369385496, iteration: 394853
loss: 1.0116859674453735,grad_norm: 0.7911002565749113, iteration: 394854
loss: 1.009120225906372,grad_norm: 0.9336622648821908, iteration: 394855
loss: 1.001062273979187,grad_norm: 0.8708947234992449, iteration: 394856
loss: 0.9849715828895569,grad_norm: 0.6816557892553706, iteration: 394857
loss: 1.052574634552002,grad_norm: 0.877885551386676, iteration: 394858
loss: 1.0059731006622314,grad_norm: 0.7143664342593231, iteration: 394859
loss: 1.0053071975708008,grad_norm: 0.9999990292973266, iteration: 394860
loss: 1.0508004426956177,grad_norm: 0.7524006485270606, iteration: 394861
loss: 1.0233855247497559,grad_norm: 0.9924709590309198, iteration: 394862
loss: 0.9856471419334412,grad_norm: 0.7028287834455568, iteration: 394863
loss: 1.0390934944152832,grad_norm: 0.7934064097238724, iteration: 394864
loss: 0.9903472065925598,grad_norm: 0.7704540388002433, iteration: 394865
loss: 1.0077461004257202,grad_norm: 0.6777807945335573, iteration: 394866
loss: 1.026404619216919,grad_norm: 0.913598043311885, iteration: 394867
loss: 1.0113950967788696,grad_norm: 0.7448355781853035, iteration: 394868
loss: 0.945827841758728,grad_norm: 0.848620724886905, iteration: 394869
loss: 1.0444016456604004,grad_norm: 0.8942800221155222, iteration: 394870
loss: 0.9976616501808167,grad_norm: 0.9799403722041532, iteration: 394871
loss: 1.0002094507217407,grad_norm: 0.7012620964969981, iteration: 394872
loss: 0.9710599184036255,grad_norm: 0.7803560065310278, iteration: 394873
loss: 1.015547752380371,grad_norm: 0.90545962642916, iteration: 394874
loss: 1.048183560371399,grad_norm: 0.8354975734693073, iteration: 394875
loss: 1.0010014772415161,grad_norm: 0.7175478979505743, iteration: 394876
loss: 1.0084903240203857,grad_norm: 0.9764842463017055, iteration: 394877
loss: 1.1325732469558716,grad_norm: 0.9999999250015807, iteration: 394878
loss: 0.9720038175582886,grad_norm: 0.9021578218688114, iteration: 394879
loss: 0.9915094375610352,grad_norm: 0.724925098629788, iteration: 394880
loss: 0.9986757040023804,grad_norm: 0.7759594988699916, iteration: 394881
loss: 1.0100189447402954,grad_norm: 0.7924029687547081, iteration: 394882
loss: 0.9693564176559448,grad_norm: 0.7376548325922425, iteration: 394883
loss: 0.9984019994735718,grad_norm: 0.8418889868111762, iteration: 394884
loss: 1.0015254020690918,grad_norm: 0.8089355081362868, iteration: 394885
loss: 0.9804887175559998,grad_norm: 0.792207627104475, iteration: 394886
loss: 1.0092130899429321,grad_norm: 0.699213388060756, iteration: 394887
loss: 1.0119746923446655,grad_norm: 0.7244723414530364, iteration: 394888
loss: 1.0209037065505981,grad_norm: 0.9999995254422253, iteration: 394889
loss: 0.965035617351532,grad_norm: 0.8429008104481606, iteration: 394890
loss: 1.0379548072814941,grad_norm: 0.6600237998341504, iteration: 394891
loss: 1.0430872440338135,grad_norm: 0.9314075708222842, iteration: 394892
loss: 0.9993167519569397,grad_norm: 0.6714113961060855, iteration: 394893
loss: 0.9950375556945801,grad_norm: 0.8128360699346231, iteration: 394894
loss: 0.9816738367080688,grad_norm: 0.7326705934382003, iteration: 394895
loss: 0.9754087924957275,grad_norm: 0.6795485039408197, iteration: 394896
loss: 0.9913161993026733,grad_norm: 0.8579976108271344, iteration: 394897
loss: 0.9852336645126343,grad_norm: 0.687756292807298, iteration: 394898
loss: 0.9825334548950195,grad_norm: 0.829998371003632, iteration: 394899
loss: 1.0010555982589722,grad_norm: 0.9999996287133026, iteration: 394900
loss: 1.01485013961792,grad_norm: 0.8436747172633534, iteration: 394901
loss: 0.9853284955024719,grad_norm: 0.7450127830404946, iteration: 394902
loss: 1.0158209800720215,grad_norm: 0.8166578357355218, iteration: 394903
loss: 1.0161341428756714,grad_norm: 0.870662682763035, iteration: 394904
loss: 1.015175223350525,grad_norm: 0.7501332635256269, iteration: 394905
loss: 0.9683265089988708,grad_norm: 0.6768237041337472, iteration: 394906
loss: 1.0495768785476685,grad_norm: 0.7916019830445994, iteration: 394907
loss: 1.014342188835144,grad_norm: 0.7266743619275375, iteration: 394908
loss: 1.049691081047058,grad_norm: 0.9524503815099555, iteration: 394909
loss: 0.9822133779525757,grad_norm: 0.7448648435460569, iteration: 394910
loss: 1.119777798652649,grad_norm: 0.9999999507850326, iteration: 394911
loss: 0.9862508177757263,grad_norm: 0.7018752127180834, iteration: 394912
loss: 0.9918278455734253,grad_norm: 0.7002200351820936, iteration: 394913
loss: 0.9901854395866394,grad_norm: 0.8228318020279995, iteration: 394914
loss: 0.9900786876678467,grad_norm: 0.9999993789439403, iteration: 394915
loss: 1.0372538566589355,grad_norm: 0.7664737278628532, iteration: 394916
loss: 0.9816517233848572,grad_norm: 0.7357235220608773, iteration: 394917
loss: 0.9592959880828857,grad_norm: 0.8677084875191549, iteration: 394918
loss: 1.0046783685684204,grad_norm: 0.9028064091851976, iteration: 394919
loss: 1.0327409505844116,grad_norm: 0.9694924678503433, iteration: 394920
loss: 1.0753426551818848,grad_norm: 0.9124955143066232, iteration: 394921
loss: 0.9966143369674683,grad_norm: 0.7636280371818193, iteration: 394922
loss: 1.0005064010620117,grad_norm: 0.9999992957444922, iteration: 394923
loss: 0.9996084570884705,grad_norm: 0.7384758247992925, iteration: 394924
loss: 0.9886841177940369,grad_norm: 0.7585221404860077, iteration: 394925
loss: 0.9498268365859985,grad_norm: 0.742194898023516, iteration: 394926
loss: 1.0059183835983276,grad_norm: 0.6340285454033887, iteration: 394927
loss: 0.9752365350723267,grad_norm: 0.7450230425737295, iteration: 394928
loss: 1.028720736503601,grad_norm: 0.8313776842672467, iteration: 394929
loss: 0.9963901042938232,grad_norm: 0.8302527497667257, iteration: 394930
loss: 1.0154691934585571,grad_norm: 0.8944629258949389, iteration: 394931
loss: 0.9945724606513977,grad_norm: 0.7673666657659332, iteration: 394932
loss: 0.9904956221580505,grad_norm: 0.7501861497231181, iteration: 394933
loss: 1.052682876586914,grad_norm: 0.999999257869974, iteration: 394934
loss: 0.9978656768798828,grad_norm: 0.8531351881005536, iteration: 394935
loss: 1.008770227432251,grad_norm: 0.9431137262032835, iteration: 394936
loss: 1.017449975013733,grad_norm: 0.7925057484891604, iteration: 394937
loss: 0.9940309524536133,grad_norm: 0.7713113339363417, iteration: 394938
loss: 1.0011811256408691,grad_norm: 0.9999993304032946, iteration: 394939
loss: 1.0213602781295776,grad_norm: 0.9341398623820164, iteration: 394940
loss: 0.9908238649368286,grad_norm: 0.6734531298228544, iteration: 394941
loss: 0.9730727076530457,grad_norm: 0.8574726492834284, iteration: 394942
loss: 0.976729691028595,grad_norm: 0.9156624544784981, iteration: 394943
loss: 0.9960143566131592,grad_norm: 0.753960959554025, iteration: 394944
loss: 0.9967108964920044,grad_norm: 0.8130603974888044, iteration: 394945
loss: 1.0306439399719238,grad_norm: 0.702468285685205, iteration: 394946
loss: 0.9839752912521362,grad_norm: 0.8406022964772432, iteration: 394947
loss: 0.9706366658210754,grad_norm: 0.796316890657978, iteration: 394948
loss: 1.0651557445526123,grad_norm: 0.9474527474820679, iteration: 394949
loss: 1.0450072288513184,grad_norm: 0.9999993043062643, iteration: 394950
loss: 1.0296626091003418,grad_norm: 0.9567816693060687, iteration: 394951
loss: 1.0146303176879883,grad_norm: 0.828551595011318, iteration: 394952
loss: 0.9896992444992065,grad_norm: 0.7344003726478416, iteration: 394953
loss: 0.975026547908783,grad_norm: 0.7988425146118567, iteration: 394954
loss: 1.0032631158828735,grad_norm: 0.8329394046889115, iteration: 394955
loss: 0.9988905787467957,grad_norm: 0.6670417493235645, iteration: 394956
loss: 0.978626012802124,grad_norm: 0.8305224830337125, iteration: 394957
loss: 1.016437292098999,grad_norm: 0.5854528650387666, iteration: 394958
loss: 1.00833261013031,grad_norm: 0.7090358354807368, iteration: 394959
loss: 1.0092602968215942,grad_norm: 0.9608736673773437, iteration: 394960
loss: 1.1135469675064087,grad_norm: 0.9999998752470921, iteration: 394961
loss: 1.0414267778396606,grad_norm: 0.9999999012383741, iteration: 394962
loss: 1.0307142734527588,grad_norm: 0.8577649446201894, iteration: 394963
loss: 1.0434049367904663,grad_norm: 0.8507765256166742, iteration: 394964
loss: 0.9778273701667786,grad_norm: 0.7075401153916924, iteration: 394965
loss: 0.9943450093269348,grad_norm: 0.7915543888663391, iteration: 394966
loss: 0.9866315126419067,grad_norm: 0.9347044728264018, iteration: 394967
loss: 1.0460296869277954,grad_norm: 0.8405545706530886, iteration: 394968
loss: 0.9626681208610535,grad_norm: 0.8806909827579681, iteration: 394969
loss: 0.9852808117866516,grad_norm: 0.6791349675373747, iteration: 394970
loss: 1.0058395862579346,grad_norm: 0.856281461489269, iteration: 394971
loss: 1.0370303392410278,grad_norm: 0.762290525831975, iteration: 394972
loss: 1.0011504888534546,grad_norm: 0.7554485220610091, iteration: 394973
loss: 1.0342354774475098,grad_norm: 0.9999996881514931, iteration: 394974
loss: 0.9962063431739807,grad_norm: 0.7623954947234716, iteration: 394975
loss: 0.9930416941642761,grad_norm: 0.7721700733518546, iteration: 394976
loss: 1.0138664245605469,grad_norm: 0.8615238479480212, iteration: 394977
loss: 1.0067278146743774,grad_norm: 0.6671791343506409, iteration: 394978
loss: 1.061936616897583,grad_norm: 0.9999995919192554, iteration: 394979
loss: 0.9713337421417236,grad_norm: 0.8822045570848611, iteration: 394980
loss: 1.0148924589157104,grad_norm: 0.9999990094940082, iteration: 394981
loss: 0.9832652807235718,grad_norm: 0.7803280866604787, iteration: 394982
loss: 0.992857813835144,grad_norm: 0.7462113283614824, iteration: 394983
loss: 0.9563384652137756,grad_norm: 0.7619449646202456, iteration: 394984
loss: 1.0139384269714355,grad_norm: 0.9375210094977352, iteration: 394985
loss: 1.0000300407409668,grad_norm: 0.7464194001444585, iteration: 394986
loss: 1.00596284866333,grad_norm: 0.9999998738409596, iteration: 394987
loss: 1.0870188474655151,grad_norm: 0.9999990103197991, iteration: 394988
loss: 1.0325146913528442,grad_norm: 0.7163466608421475, iteration: 394989
loss: 0.9910975694656372,grad_norm: 0.9999991001035362, iteration: 394990
loss: 0.9996448159217834,grad_norm: 0.7732064621250363, iteration: 394991
loss: 1.0019681453704834,grad_norm: 0.8442413458578368, iteration: 394992
loss: 0.9617841243743896,grad_norm: 0.803970796366035, iteration: 394993
loss: 0.9834940433502197,grad_norm: 0.6960247523241336, iteration: 394994
loss: 1.0785892009735107,grad_norm: 0.880362918746823, iteration: 394995
loss: 1.0030547380447388,grad_norm: 0.6970535615157141, iteration: 394996
loss: 0.9900299906730652,grad_norm: 0.5958733781510924, iteration: 394997
loss: 1.0096098184585571,grad_norm: 0.7493722544818687, iteration: 394998
loss: 1.0213823318481445,grad_norm: 0.8247064887374862, iteration: 394999
loss: 0.9912984371185303,grad_norm: 0.7256915121301168, iteration: 395000
loss: 0.9909802675247192,grad_norm: 0.8624121510289998, iteration: 395001
loss: 0.9869084358215332,grad_norm: 0.8034830476572602, iteration: 395002
loss: 0.991169273853302,grad_norm: 0.6506785051587415, iteration: 395003
loss: 1.086715579032898,grad_norm: 0.9756146958011225, iteration: 395004
loss: 0.9726709127426147,grad_norm: 0.7189126752350745, iteration: 395005
loss: 1.0296190977096558,grad_norm: 0.9199104723634276, iteration: 395006
loss: 1.0230638980865479,grad_norm: 0.9261090943715317, iteration: 395007
loss: 0.9901359677314758,grad_norm: 0.8096297642110971, iteration: 395008
loss: 1.0359935760498047,grad_norm: 0.7755927215149272, iteration: 395009
loss: 1.051910161972046,grad_norm: 0.7001909445896976, iteration: 395010
loss: 0.9840665459632874,grad_norm: 0.6793221278871316, iteration: 395011
loss: 0.9724907875061035,grad_norm: 0.989557177322191, iteration: 395012
loss: 1.041682243347168,grad_norm: 0.9999991046144032, iteration: 395013
loss: 0.9858726263046265,grad_norm: 0.7845659758529021, iteration: 395014
loss: 0.9924665689468384,grad_norm: 0.7582715738744968, iteration: 395015
loss: 1.0436460971832275,grad_norm: 0.7832177846601999, iteration: 395016
loss: 0.9857087135314941,grad_norm: 0.7862209308317484, iteration: 395017
loss: 0.9962558150291443,grad_norm: 0.9999990599246873, iteration: 395018
loss: 1.079465389251709,grad_norm: 0.9932426376653175, iteration: 395019
loss: 0.9956709146499634,grad_norm: 0.8728440728054793, iteration: 395020
loss: 1.0830987691879272,grad_norm: 0.7644653395821798, iteration: 395021
loss: 1.0083972215652466,grad_norm: 0.8178533546349799, iteration: 395022
loss: 1.0264121294021606,grad_norm: 0.8261217210452817, iteration: 395023
loss: 1.038398265838623,grad_norm: 0.7798401305835978, iteration: 395024
loss: 0.9635365009307861,grad_norm: 0.6737034268966688, iteration: 395025
loss: 1.0169209241867065,grad_norm: 0.8219160455401848, iteration: 395026
loss: 1.0147900581359863,grad_norm: 0.801036458698772, iteration: 395027
loss: 1.015976905822754,grad_norm: 0.7657532676724664, iteration: 395028
loss: 1.0109269618988037,grad_norm: 0.7861548699083851, iteration: 395029
loss: 1.0043001174926758,grad_norm: 0.7258457563226601, iteration: 395030
loss: 1.004652500152588,grad_norm: 0.6660339057911312, iteration: 395031
loss: 1.0113763809204102,grad_norm: 0.8034767508652247, iteration: 395032
loss: 0.9881733059883118,grad_norm: 0.9572415177109563, iteration: 395033
loss: 1.0312256813049316,grad_norm: 0.7715423361725401, iteration: 395034
loss: 0.9993424415588379,grad_norm: 0.7604077613327276, iteration: 395035
loss: 0.992379367351532,grad_norm: 0.7884220466738279, iteration: 395036
loss: 1.0112323760986328,grad_norm: 0.9999994316977968, iteration: 395037
loss: 0.9994935393333435,grad_norm: 0.8361395823659977, iteration: 395038
loss: 1.0347124338150024,grad_norm: 0.9999992407973443, iteration: 395039
loss: 1.056190848350525,grad_norm: 0.9999997621923803, iteration: 395040
loss: 0.9873430132865906,grad_norm: 0.7803859228912893, iteration: 395041
loss: 1.0486457347869873,grad_norm: 0.8034167362404993, iteration: 395042
loss: 1.043931245803833,grad_norm: 0.9137556601039815, iteration: 395043
loss: 0.9693284630775452,grad_norm: 0.7838997909895526, iteration: 395044
loss: 1.0110353231430054,grad_norm: 0.6938242399668442, iteration: 395045
loss: 0.9995659589767456,grad_norm: 0.8550737368751403, iteration: 395046
loss: 1.0028600692749023,grad_norm: 0.6697736057980477, iteration: 395047
loss: 1.0023785829544067,grad_norm: 0.9999991889693508, iteration: 395048
loss: 1.0250662565231323,grad_norm: 0.9446797161569521, iteration: 395049
loss: 0.9839224219322205,grad_norm: 0.7881089752520337, iteration: 395050
loss: 0.9758130311965942,grad_norm: 0.7513727507099288, iteration: 395051
loss: 1.0421011447906494,grad_norm: 0.9999991916056236, iteration: 395052
loss: 0.9759718775749207,grad_norm: 0.7972635908966598, iteration: 395053
loss: 1.205011010169983,grad_norm: 1.0000000780020644, iteration: 395054
loss: 0.999417245388031,grad_norm: 0.9999999586065493, iteration: 395055
loss: 0.9744168519973755,grad_norm: 0.9250028884883844, iteration: 395056
loss: 1.0015783309936523,grad_norm: 0.7157398447729856, iteration: 395057
loss: 1.0060311555862427,grad_norm: 0.8756996474653632, iteration: 395058
loss: 1.0092748403549194,grad_norm: 0.9321241457978593, iteration: 395059
loss: 1.0052027702331543,grad_norm: 0.796315384636871, iteration: 395060
loss: 1.1087095737457275,grad_norm: 0.7698811101801519, iteration: 395061
loss: 0.9930998086929321,grad_norm: 0.9565464994728364, iteration: 395062
loss: 0.9984922409057617,grad_norm: 0.9999994123696831, iteration: 395063
loss: 1.0067790746688843,grad_norm: 0.6726884195108905, iteration: 395064
loss: 1.007080316543579,grad_norm: 0.9999992872148001, iteration: 395065
loss: 0.9281255006790161,grad_norm: 0.7533985946488061, iteration: 395066
loss: 1.0619417428970337,grad_norm: 0.8379194844165042, iteration: 395067
loss: 1.0506428480148315,grad_norm: 0.8160893624408614, iteration: 395068
loss: 1.0276179313659668,grad_norm: 0.9999998978432807, iteration: 395069
loss: 0.9911156296730042,grad_norm: 0.7789782262875895, iteration: 395070
loss: 0.9614285826683044,grad_norm: 0.7681254025519848, iteration: 395071
loss: 0.9737393260002136,grad_norm: 0.7982414960293154, iteration: 395072
loss: 0.9706152677536011,grad_norm: 0.694370970326922, iteration: 395073
loss: 1.0478168725967407,grad_norm: 0.9968371908231506, iteration: 395074
loss: 1.0168440341949463,grad_norm: 0.9999992698660903, iteration: 395075
loss: 0.9914971590042114,grad_norm: 0.7974583061510963, iteration: 395076
loss: 1.09654700756073,grad_norm: 0.8512241113077968, iteration: 395077
loss: 1.0245270729064941,grad_norm: 0.9999992732169525, iteration: 395078
loss: 0.9972939491271973,grad_norm: 0.7186050130566135, iteration: 395079
loss: 1.100725531578064,grad_norm: 0.9999990847163548, iteration: 395080
loss: 0.9732223749160767,grad_norm: 0.8119254176938051, iteration: 395081
loss: 0.9841122031211853,grad_norm: 0.7809213949533295, iteration: 395082
loss: 0.9952543377876282,grad_norm: 0.8854260440447437, iteration: 395083
loss: 1.0058956146240234,grad_norm: 0.9999992108251754, iteration: 395084
loss: 0.9874464273452759,grad_norm: 0.8002674382525056, iteration: 395085
loss: 1.0169633626937866,grad_norm: 0.9536076394141505, iteration: 395086
loss: 1.0259138345718384,grad_norm: 0.9727237988951554, iteration: 395087
loss: 0.9958627820014954,grad_norm: 0.7634248900612192, iteration: 395088
loss: 1.0252995491027832,grad_norm: 0.8748818188771518, iteration: 395089
loss: 1.0100531578063965,grad_norm: 0.8253407275695309, iteration: 395090
loss: 0.9874660968780518,grad_norm: 0.888426787117046, iteration: 395091
loss: 0.9904863238334656,grad_norm: 0.9138698012828318, iteration: 395092
loss: 1.0386238098144531,grad_norm: 0.7945438909971214, iteration: 395093
loss: 1.0110422372817993,grad_norm: 0.9738521517410621, iteration: 395094
loss: 1.082623839378357,grad_norm: 0.7276424328518084, iteration: 395095
loss: 1.1022634506225586,grad_norm: 0.7974406303732849, iteration: 395096
loss: 0.9996044039726257,grad_norm: 0.7750831722221909, iteration: 395097
loss: 1.0649935007095337,grad_norm: 0.7052150611579736, iteration: 395098
loss: 0.9987067580223083,grad_norm: 0.871730317197963, iteration: 395099
loss: 0.9766156673431396,grad_norm: 0.7794433455990122, iteration: 395100
loss: 1.0322321653366089,grad_norm: 0.8139664059511129, iteration: 395101
loss: 1.0135899782180786,grad_norm: 0.6644741924263096, iteration: 395102
loss: 1.0423463582992554,grad_norm: 0.8424538586854744, iteration: 395103
loss: 0.9835362434387207,grad_norm: 0.7013171657590714, iteration: 395104
loss: 1.0708476305007935,grad_norm: 0.9999999133969947, iteration: 395105
loss: 0.9770344495773315,grad_norm: 0.7477067028822375, iteration: 395106
loss: 0.9987372756004333,grad_norm: 0.697816798443147, iteration: 395107
loss: 0.9967325925827026,grad_norm: 0.8213518233178877, iteration: 395108
loss: 1.002258539199829,grad_norm: 0.9999990398901013, iteration: 395109
loss: 0.9726063013076782,grad_norm: 0.8195727667482905, iteration: 395110
loss: 1.004716157913208,grad_norm: 0.7426056707490809, iteration: 395111
loss: 1.0686225891113281,grad_norm: 0.9999991391551142, iteration: 395112
loss: 1.0080013275146484,grad_norm: 0.7130968805038853, iteration: 395113
loss: 1.0558812618255615,grad_norm: 0.8820548851418821, iteration: 395114
loss: 0.9828606247901917,grad_norm: 0.7655526334013266, iteration: 395115
loss: 1.0028971433639526,grad_norm: 0.7996510053340201, iteration: 395116
loss: 0.9692604541778564,grad_norm: 0.7183405756413915, iteration: 395117
loss: 1.0306707620620728,grad_norm: 0.7803567296797674, iteration: 395118
loss: 1.034865140914917,grad_norm: 0.999999279041376, iteration: 395119
loss: 0.9766952991485596,grad_norm: 0.8465487307766023, iteration: 395120
loss: 1.006693720817566,grad_norm: 0.7762393487492736, iteration: 395121
loss: 0.9838595390319824,grad_norm: 0.8244177088548489, iteration: 395122
loss: 1.1235023736953735,grad_norm: 0.9552348345363834, iteration: 395123
loss: 0.9726293087005615,grad_norm: 0.9102814411065366, iteration: 395124
loss: 1.0433812141418457,grad_norm: 0.9999995090909364, iteration: 395125
loss: 0.980703592300415,grad_norm: 0.7524226427852373, iteration: 395126
loss: 1.0580302476882935,grad_norm: 0.749526106714708, iteration: 395127
loss: 1.0325617790222168,grad_norm: 0.6803597273063026, iteration: 395128
loss: 1.016351580619812,grad_norm: 0.7988935381448808, iteration: 395129
loss: 1.0021086931228638,grad_norm: 0.6790907315936756, iteration: 395130
loss: 1.0333620309829712,grad_norm: 0.9038489319823373, iteration: 395131
loss: 1.054673671722412,grad_norm: 0.9999999520285101, iteration: 395132
loss: 1.0012753009796143,grad_norm: 0.9183203176377653, iteration: 395133
loss: 1.0421260595321655,grad_norm: 0.7872024930437344, iteration: 395134
loss: 1.0259569883346558,grad_norm: 0.8186564231352537, iteration: 395135
loss: 0.9754579067230225,grad_norm: 0.9270023656007411, iteration: 395136
loss: 1.0029857158660889,grad_norm: 0.9999996276522092, iteration: 395137
loss: 0.9710540771484375,grad_norm: 0.9153495678382989, iteration: 395138
loss: 1.0522276163101196,grad_norm: 0.9243800605693008, iteration: 395139
loss: 0.9914847612380981,grad_norm: 0.950914438588081, iteration: 395140
loss: 1.021749496459961,grad_norm: 0.7135181688970109, iteration: 395141
loss: 1.0413424968719482,grad_norm: 0.7100245048738666, iteration: 395142
loss: 1.0105057954788208,grad_norm: 0.7168360233660224, iteration: 395143
loss: 1.0165501832962036,grad_norm: 0.7933898653231753, iteration: 395144
loss: 0.9970690011978149,grad_norm: 0.8241760908091714, iteration: 395145
loss: 1.0009945631027222,grad_norm: 0.8678278511370174, iteration: 395146
loss: 1.0151948928833008,grad_norm: 0.7037658529217675, iteration: 395147
loss: 1.0641783475875854,grad_norm: 0.9999991016126587, iteration: 395148
loss: 1.0135446786880493,grad_norm: 0.7335158495338782, iteration: 395149
loss: 0.9842939972877502,grad_norm: 0.7554102831387077, iteration: 395150
loss: 1.0139247179031372,grad_norm: 0.7513408148457748, iteration: 395151
loss: 1.042593240737915,grad_norm: 0.9999991995755744, iteration: 395152
loss: 1.0719616413116455,grad_norm: 0.9999994142194457, iteration: 395153
loss: 1.0171046257019043,grad_norm: 0.7497955148103638, iteration: 395154
loss: 1.0201300382614136,grad_norm: 0.714287806206005, iteration: 395155
loss: 1.0232254266738892,grad_norm: 0.7640140844717339, iteration: 395156
loss: 1.0466009378433228,grad_norm: 0.8986559175862774, iteration: 395157
loss: 0.9834195971488953,grad_norm: 0.6985047687993979, iteration: 395158
loss: 1.0015618801116943,grad_norm: 0.7858966513221662, iteration: 395159
loss: 1.0082037448883057,grad_norm: 0.7518101514204923, iteration: 395160
loss: 1.1760458946228027,grad_norm: 1.0000000630660757, iteration: 395161
loss: 0.9546951651573181,grad_norm: 0.950592320509159, iteration: 395162
loss: 1.0000876188278198,grad_norm: 0.9809429104303802, iteration: 395163
loss: 1.0277742147445679,grad_norm: 0.9999990948504258, iteration: 395164
loss: 1.0753295421600342,grad_norm: 0.9999991534256947, iteration: 395165
loss: 1.0069987773895264,grad_norm: 0.7453421778761674, iteration: 395166
loss: 1.020938515663147,grad_norm: 0.6065977490750596, iteration: 395167
loss: 1.0102125406265259,grad_norm: 0.8523714431884846, iteration: 395168
loss: 1.0112632513046265,grad_norm: 0.8387103688785624, iteration: 395169
loss: 0.988564133644104,grad_norm: 0.873722057846001, iteration: 395170
loss: 1.0248146057128906,grad_norm: 0.8205912187837601, iteration: 395171
loss: 1.0266776084899902,grad_norm: 0.8711558572536646, iteration: 395172
loss: 0.9971094131469727,grad_norm: 0.8326836065243471, iteration: 395173
loss: 0.9881972074508667,grad_norm: 0.7524785983415927, iteration: 395174
loss: 0.9846569299697876,grad_norm: 0.9146184655930811, iteration: 395175
loss: 1.0290144681930542,grad_norm: 0.8731691402682019, iteration: 395176
loss: 0.989209771156311,grad_norm: 0.9114355194148483, iteration: 395177
loss: 0.9998407959938049,grad_norm: 0.7181915398120725, iteration: 395178
loss: 1.024227261543274,grad_norm: 0.8247416694601585, iteration: 395179
loss: 1.0004280805587769,grad_norm: 0.8832201508385088, iteration: 395180
loss: 0.9700136780738831,grad_norm: 0.8189355022912828, iteration: 395181
loss: 1.0661894083023071,grad_norm: 0.9999990722533451, iteration: 395182
loss: 0.9568449258804321,grad_norm: 0.9263933465330854, iteration: 395183
loss: 1.0070747137069702,grad_norm: 0.8740073582747332, iteration: 395184
loss: 1.077957272529602,grad_norm: 0.9261205967187073, iteration: 395185
loss: 0.9840148687362671,grad_norm: 0.7875978459364682, iteration: 395186
loss: 0.9701076745986938,grad_norm: 0.7995794850961303, iteration: 395187
loss: 1.0338011980056763,grad_norm: 0.9999995565265632, iteration: 395188
loss: 1.022141933441162,grad_norm: 0.8578963830147094, iteration: 395189
loss: 1.0336089134216309,grad_norm: 0.7570835640559035, iteration: 395190
loss: 0.9808545708656311,grad_norm: 0.7995541982945862, iteration: 395191
loss: 0.9909599423408508,grad_norm: 0.7855167828082089, iteration: 395192
loss: 1.0952284336090088,grad_norm: 0.9999991973458373, iteration: 395193
loss: 0.9983855485916138,grad_norm: 0.7399770743569625, iteration: 395194
loss: 0.9981803297996521,grad_norm: 0.7638621942140623, iteration: 395195
loss: 1.0022109746932983,grad_norm: 0.8359813536502927, iteration: 395196
loss: 0.9776905179023743,grad_norm: 0.8749292560161265, iteration: 395197
loss: 0.9821596741676331,grad_norm: 0.7619579250605025, iteration: 395198
loss: 1.037230134010315,grad_norm: 0.7030054939282074, iteration: 395199
loss: 1.059226155281067,grad_norm: 0.8154335456647771, iteration: 395200
loss: 0.9775189161300659,grad_norm: 0.9480720317718939, iteration: 395201
loss: 1.0517988204956055,grad_norm: 0.850336341006094, iteration: 395202
loss: 1.0012743473052979,grad_norm: 0.7486609616091303, iteration: 395203
loss: 1.0174390077590942,grad_norm: 0.8499138659521779, iteration: 395204
loss: 0.9626411199569702,grad_norm: 0.7784977764559669, iteration: 395205
loss: 0.9352021813392639,grad_norm: 0.6761899874846896, iteration: 395206
loss: 0.996812641620636,grad_norm: 0.981940616458184, iteration: 395207
loss: 1.0322281122207642,grad_norm: 0.9999995358039442, iteration: 395208
loss: 1.0020021200180054,grad_norm: 0.9277635501624777, iteration: 395209
loss: 1.00100576877594,grad_norm: 0.811092824952461, iteration: 395210
loss: 1.0063910484313965,grad_norm: 0.7474469147433422, iteration: 395211
loss: 1.0143040418624878,grad_norm: 0.799615411585977, iteration: 395212
loss: 0.9837270975112915,grad_norm: 0.7710294269488629, iteration: 395213
loss: 0.9959919452667236,grad_norm: 0.7421995094752664, iteration: 395214
loss: 1.0033241510391235,grad_norm: 0.7182003075959751, iteration: 395215
loss: 1.0709270238876343,grad_norm: 0.7525624909292916, iteration: 395216
loss: 1.0407469272613525,grad_norm: 0.9999996832078925, iteration: 395217
loss: 1.02779221534729,grad_norm: 0.8470067767819075, iteration: 395218
loss: 0.9983179569244385,grad_norm: 0.7409043430378344, iteration: 395219
loss: 1.0012398958206177,grad_norm: 0.783416658457738, iteration: 395220
loss: 0.9909502267837524,grad_norm: 0.7858081540698195, iteration: 395221
loss: 1.0782935619354248,grad_norm: 0.9999992862200601, iteration: 395222
loss: 1.0040854215621948,grad_norm: 0.7996401645422242, iteration: 395223
loss: 1.010634183883667,grad_norm: 0.8220745540382193, iteration: 395224
loss: 0.9966731071472168,grad_norm: 0.7774778102144374, iteration: 395225
loss: 1.0114840269088745,grad_norm: 0.8284376020180889, iteration: 395226
loss: 0.9968059062957764,grad_norm: 0.7422807732144237, iteration: 395227
loss: 0.9865765571594238,grad_norm: 0.7631307673742681, iteration: 395228
loss: 1.0852526426315308,grad_norm: 0.9999996731410333, iteration: 395229
loss: 1.0160276889801025,grad_norm: 0.7321946600979039, iteration: 395230
loss: 1.025068998336792,grad_norm: 0.7912781185416412, iteration: 395231
loss: 0.9923906922340393,grad_norm: 0.7515806288444009, iteration: 395232
loss: 1.023603081703186,grad_norm: 0.945028518155136, iteration: 395233
loss: 0.9924686551094055,grad_norm: 0.8172538479971039, iteration: 395234
loss: 0.9832188487052917,grad_norm: 0.8441814592437825, iteration: 395235
loss: 0.990526020526886,grad_norm: 0.9475639971949363, iteration: 395236
loss: 1.0073394775390625,grad_norm: 0.999999023367732, iteration: 395237
loss: 1.0071104764938354,grad_norm: 0.6805625516138137, iteration: 395238
loss: 1.0321165323257446,grad_norm: 0.7823357029985432, iteration: 395239
loss: 0.9857136607170105,grad_norm: 0.8152016429629791, iteration: 395240
loss: 1.002949833869934,grad_norm: 0.7841138751420008, iteration: 395241
loss: 1.0441186428070068,grad_norm: 0.788382907545004, iteration: 395242
loss: 1.007427453994751,grad_norm: 0.7385571859972277, iteration: 395243
loss: 1.0189557075500488,grad_norm: 0.9130622979159622, iteration: 395244
loss: 1.0169872045516968,grad_norm: 0.8460519107274505, iteration: 395245
loss: 1.0005247592926025,grad_norm: 0.9999997031764533, iteration: 395246
loss: 0.9840115308761597,grad_norm: 0.7103038619880545, iteration: 395247
loss: 1.0966179370880127,grad_norm: 0.8742249298913796, iteration: 395248
loss: 1.0387794971466064,grad_norm: 0.999999835080541, iteration: 395249
loss: 1.1056100130081177,grad_norm: 0.9999998034084108, iteration: 395250
loss: 1.0060096979141235,grad_norm: 0.7929314795633682, iteration: 395251
loss: 1.0129356384277344,grad_norm: 0.8416011261959725, iteration: 395252
loss: 1.029611587524414,grad_norm: 0.9999996313849684, iteration: 395253
loss: 1.1347286701202393,grad_norm: 0.9999997623011792, iteration: 395254
loss: 0.9821839928627014,grad_norm: 0.654252938856766, iteration: 395255
loss: 0.9830831289291382,grad_norm: 0.7483853059358879, iteration: 395256
loss: 1.010061502456665,grad_norm: 0.7620013486504912, iteration: 395257
loss: 0.9794240593910217,grad_norm: 0.6293451977807301, iteration: 395258
loss: 1.0131288766860962,grad_norm: 0.7637173151001727, iteration: 395259
loss: 0.9938401579856873,grad_norm: 0.7012765279457099, iteration: 395260
loss: 1.014306902885437,grad_norm: 0.8844115235887073, iteration: 395261
loss: 1.0047881603240967,grad_norm: 0.6899955191232382, iteration: 395262
loss: 1.0838645696640015,grad_norm: 0.9999990841482553, iteration: 395263
loss: 1.0107784271240234,grad_norm: 0.9999991559949392, iteration: 395264
loss: 0.9895091652870178,grad_norm: 0.8247409873464155, iteration: 395265
loss: 1.1025500297546387,grad_norm: 0.9999993490071959, iteration: 395266
loss: 0.9589551091194153,grad_norm: 0.7779233287633848, iteration: 395267
loss: 1.0290111303329468,grad_norm: 0.9999991620500307, iteration: 395268
loss: 1.0027053356170654,grad_norm: 0.9999999279266499, iteration: 395269
loss: 1.054651141166687,grad_norm: 0.8200800325989066, iteration: 395270
loss: 0.9714799523353577,grad_norm: 0.7965664206477286, iteration: 395271
loss: 1.0228216648101807,grad_norm: 0.7145496780624808, iteration: 395272
loss: 1.076616644859314,grad_norm: 0.9999994367961382, iteration: 395273
loss: 1.0088775157928467,grad_norm: 0.9773138404246454, iteration: 395274
loss: 0.9866382479667664,grad_norm: 0.9525886593528204, iteration: 395275
loss: 1.0567206144332886,grad_norm: 0.8158852484013708, iteration: 395276
loss: 1.0496577024459839,grad_norm: 0.999999157144684, iteration: 395277
loss: 1.1040654182434082,grad_norm: 0.9999992359903321, iteration: 395278
loss: 1.0483254194259644,grad_norm: 0.9096883591649065, iteration: 395279
loss: 0.9820122718811035,grad_norm: 0.8351032240350011, iteration: 395280
loss: 0.9849196672439575,grad_norm: 0.8274239336185353, iteration: 395281
loss: 1.0033931732177734,grad_norm: 0.7658581823825507, iteration: 395282
loss: 0.9920456409454346,grad_norm: 0.7962150395386666, iteration: 395283
loss: 1.013159990310669,grad_norm: 0.8170978873894296, iteration: 395284
loss: 0.9907604455947876,grad_norm: 0.9999997436785134, iteration: 395285
loss: 0.9884905219078064,grad_norm: 0.8407877899524506, iteration: 395286
loss: 0.9980236887931824,grad_norm: 0.999999798840714, iteration: 395287
loss: 0.9689604640007019,grad_norm: 0.9999997670187042, iteration: 395288
loss: 1.0459781885147095,grad_norm: 0.8495526638575737, iteration: 395289
loss: 0.9942277073860168,grad_norm: 0.6853542054377251, iteration: 395290
loss: 1.0151878595352173,grad_norm: 0.8457599339810249, iteration: 395291
loss: 0.9818001985549927,grad_norm: 0.77840199981768, iteration: 395292
loss: 1.0238122940063477,grad_norm: 0.8267864113719823, iteration: 395293
loss: 1.0115752220153809,grad_norm: 0.6334516537723561, iteration: 395294
loss: 1.0294264554977417,grad_norm: 0.9999991541919493, iteration: 395295
loss: 1.0537819862365723,grad_norm: 0.8082039879039628, iteration: 395296
loss: 1.0003741979599,grad_norm: 0.7458717557474249, iteration: 395297
loss: 1.0004621744155884,grad_norm: 0.8888118703924514, iteration: 395298
loss: 1.0243607759475708,grad_norm: 0.999999210534234, iteration: 395299
loss: 1.012470006942749,grad_norm: 0.6537236871983274, iteration: 395300
loss: 1.0056923627853394,grad_norm: 0.9999994224088009, iteration: 395301
loss: 1.0009934902191162,grad_norm: 0.7479688247537871, iteration: 395302
loss: 0.9667778611183167,grad_norm: 0.7949300772686747, iteration: 395303
loss: 1.0217617750167847,grad_norm: 0.8033177117113603, iteration: 395304
loss: 1.1398710012435913,grad_norm: 0.9797424911193477, iteration: 395305
loss: 1.024943232536316,grad_norm: 0.9999993047841986, iteration: 395306
loss: 1.0945265293121338,grad_norm: 0.9999991679509829, iteration: 395307
loss: 0.9826840162277222,grad_norm: 0.7088226224681597, iteration: 395308
loss: 0.9933819770812988,grad_norm: 0.8611662844312683, iteration: 395309
loss: 1.0507036447525024,grad_norm: 0.9999990476998085, iteration: 395310
loss: 1.1068639755249023,grad_norm: 0.9999990725035967, iteration: 395311
loss: 1.0099705457687378,grad_norm: 0.7776302695714328, iteration: 395312
loss: 1.0684435367584229,grad_norm: 0.7692445822610086, iteration: 395313
loss: 1.0071675777435303,grad_norm: 0.7044529286473572, iteration: 395314
loss: 1.0190422534942627,grad_norm: 0.815558652712881, iteration: 395315
loss: 1.0303760766983032,grad_norm: 0.7634642750079919, iteration: 395316
loss: 0.9880742430686951,grad_norm: 0.9999993703769268, iteration: 395317
loss: 1.0085870027542114,grad_norm: 0.7294553596255421, iteration: 395318
loss: 1.0287984609603882,grad_norm: 0.920276931518072, iteration: 395319
loss: 1.0030549764633179,grad_norm: 0.8160791634910565, iteration: 395320
loss: 0.963748574256897,grad_norm: 0.83455217344462, iteration: 395321
loss: 0.9984143376350403,grad_norm: 0.6953353683835656, iteration: 395322
loss: 1.0162197351455688,grad_norm: 0.8414520430969977, iteration: 395323
loss: 1.0043617486953735,grad_norm: 0.8980129472447356, iteration: 395324
loss: 1.0288126468658447,grad_norm: 0.9999991343466587, iteration: 395325
loss: 1.0052322149276733,grad_norm: 0.9201673358480923, iteration: 395326
loss: 0.985709011554718,grad_norm: 0.9452886588428148, iteration: 395327
loss: 1.0166982412338257,grad_norm: 0.8804073541753613, iteration: 395328
loss: 0.9999896883964539,grad_norm: 0.9254714283588822, iteration: 395329
loss: 0.9740161895751953,grad_norm: 0.7710040505690156, iteration: 395330
loss: 1.0152002573013306,grad_norm: 0.7386133894688273, iteration: 395331
loss: 1.0317329168319702,grad_norm: 0.999999301905087, iteration: 395332
loss: 0.9740097522735596,grad_norm: 0.8570086220361504, iteration: 395333
loss: 1.0109591484069824,grad_norm: 0.985524629033095, iteration: 395334
loss: 1.0113847255706787,grad_norm: 0.9603763389408642, iteration: 395335
loss: 0.9932135939598083,grad_norm: 0.8976657429595428, iteration: 395336
loss: 0.9687127470970154,grad_norm: 0.999999388178066, iteration: 395337
loss: 1.004868984222412,grad_norm: 0.634225348855649, iteration: 395338
loss: 1.0345433950424194,grad_norm: 0.752491283508383, iteration: 395339
loss: 1.1157718896865845,grad_norm: 0.826878163405036, iteration: 395340
loss: 1.038346290588379,grad_norm: 0.8519981814942433, iteration: 395341
loss: 0.9941723942756653,grad_norm: 0.6114665014039962, iteration: 395342
loss: 1.030028223991394,grad_norm: 0.9999990771329722, iteration: 395343
loss: 1.0251150131225586,grad_norm: 0.9999992248130242, iteration: 395344
loss: 0.9741193056106567,grad_norm: 0.9449939226440649, iteration: 395345
loss: 0.9831990599632263,grad_norm: 0.7491854078956102, iteration: 395346
loss: 0.9884771704673767,grad_norm: 0.7294907935600214, iteration: 395347
loss: 0.9905836582183838,grad_norm: 0.9059766882213852, iteration: 395348
loss: 0.9991369843482971,grad_norm: 0.7976234903416369, iteration: 395349
loss: 0.9491012096405029,grad_norm: 0.7006517686826021, iteration: 395350
loss: 0.9440792798995972,grad_norm: 0.7008980936843909, iteration: 395351
loss: 0.9893350005149841,grad_norm: 0.7496142900889908, iteration: 395352
loss: 1.0210903882980347,grad_norm: 0.8315073277892412, iteration: 395353
loss: 1.0605124235153198,grad_norm: 0.7145613712756799, iteration: 395354
loss: 0.9845790863037109,grad_norm: 0.740439988493617, iteration: 395355
loss: 0.973347544670105,grad_norm: 0.7168607808933936, iteration: 395356
loss: 0.9921640157699585,grad_norm: 0.8811446619906432, iteration: 395357
loss: 1.0708551406860352,grad_norm: 0.9999992016956956, iteration: 395358
loss: 0.9830340147018433,grad_norm: 0.6819710194238908, iteration: 395359
loss: 0.9372538328170776,grad_norm: 0.8182025371768615, iteration: 395360
loss: 1.0438412427902222,grad_norm: 0.9118697419924884, iteration: 395361
loss: 0.9623486399650574,grad_norm: 0.8401662956229331, iteration: 395362
loss: 0.9931261539459229,grad_norm: 0.9999992334088137, iteration: 395363
loss: 0.9810113906860352,grad_norm: 0.8691207213436877, iteration: 395364
loss: 1.0051151514053345,grad_norm: 0.8816422396141063, iteration: 395365
loss: 0.9845867156982422,grad_norm: 0.7025903762804347, iteration: 395366
loss: 0.9891296625137329,grad_norm: 0.8868272244254566, iteration: 395367
loss: 1.0299738645553589,grad_norm: 0.8768518036507932, iteration: 395368
loss: 1.0325263738632202,grad_norm: 0.7922304719103771, iteration: 395369
loss: 1.0096248388290405,grad_norm: 0.9999993894064153, iteration: 395370
loss: 0.9993438720703125,grad_norm: 0.8871497068771792, iteration: 395371
loss: 1.0010513067245483,grad_norm: 0.8895639523874206, iteration: 395372
loss: 0.9698032140731812,grad_norm: 0.7602465247135402, iteration: 395373
loss: 0.9735578894615173,grad_norm: 0.674574746670421, iteration: 395374
loss: 0.9692823886871338,grad_norm: 0.8338284026033912, iteration: 395375
loss: 1.0054988861083984,grad_norm: 0.8778073633961435, iteration: 395376
loss: 0.9913845062255859,grad_norm: 0.8004195999447985, iteration: 395377
loss: 1.0255783796310425,grad_norm: 0.7603016139752224, iteration: 395378
loss: 0.9647395014762878,grad_norm: 0.8202392749315833, iteration: 395379
loss: 1.0101696252822876,grad_norm: 0.9325424004831766, iteration: 395380
loss: 1.0345854759216309,grad_norm: 0.8199366169145331, iteration: 395381
loss: 1.0146187543869019,grad_norm: 0.8101537033018199, iteration: 395382
loss: 1.020341157913208,grad_norm: 0.8215580945673598, iteration: 395383
loss: 0.982491672039032,grad_norm: 0.8050952181388227, iteration: 395384
loss: 0.9754420518875122,grad_norm: 0.8810265504594135, iteration: 395385
loss: 1.004044532775879,grad_norm: 0.7982244769967096, iteration: 395386
loss: 1.0546674728393555,grad_norm: 0.8106598830192839, iteration: 395387
loss: 0.9770559668540955,grad_norm: 0.8814907883078312, iteration: 395388
loss: 1.0084505081176758,grad_norm: 0.9999992891412276, iteration: 395389
loss: 1.0146070718765259,grad_norm: 0.7395610835029676, iteration: 395390
loss: 0.971610963344574,grad_norm: 0.7685059744995638, iteration: 395391
loss: 0.9910783767700195,grad_norm: 0.9672598517291029, iteration: 395392
loss: 1.0042964220046997,grad_norm: 0.7586535565896154, iteration: 395393
loss: 0.9998465180397034,grad_norm: 0.7482493443775107, iteration: 395394
loss: 1.0276576280593872,grad_norm: 0.9999998794166179, iteration: 395395
loss: 1.0283156633377075,grad_norm: 0.7424494759514018, iteration: 395396
loss: 1.087065577507019,grad_norm: 0.9218900097361706, iteration: 395397
loss: 0.9940295815467834,grad_norm: 0.7395201680246454, iteration: 395398
loss: 1.0424968004226685,grad_norm: 0.8934109280985739, iteration: 395399
loss: 0.9907453656196594,grad_norm: 0.9045663407063571, iteration: 395400
loss: 1.011731743812561,grad_norm: 0.862906874851081, iteration: 395401
loss: 1.0169801712036133,grad_norm: 0.8099779790362776, iteration: 395402
loss: 0.9726755023002625,grad_norm: 0.7463835427902995, iteration: 395403
loss: 1.0172392129898071,grad_norm: 0.8804923338765412, iteration: 395404
loss: 0.9692139029502869,grad_norm: 0.7113640356375465, iteration: 395405
loss: 1.071656584739685,grad_norm: 0.9052614112264427, iteration: 395406
loss: 1.0213966369628906,grad_norm: 0.7879873228754748, iteration: 395407
loss: 1.0015358924865723,grad_norm: 0.6605878972505471, iteration: 395408
loss: 1.00089430809021,grad_norm: 0.7891416307823085, iteration: 395409
loss: 1.006919503211975,grad_norm: 0.9999992023688503, iteration: 395410
loss: 1.0202540159225464,grad_norm: 0.9999993197168295, iteration: 395411
loss: 1.0224668979644775,grad_norm: 0.855366456196792, iteration: 395412
loss: 1.0249091386795044,grad_norm: 0.9009090348714155, iteration: 395413
loss: 1.0634151697158813,grad_norm: 0.9407958195867393, iteration: 395414
loss: 0.9831072688102722,grad_norm: 0.8084024594988375, iteration: 395415
loss: 0.9991063475608826,grad_norm: 0.6706577212776859, iteration: 395416
loss: 0.9935998320579529,grad_norm: 0.9789370117070545, iteration: 395417
loss: 0.9960368871688843,grad_norm: 0.7760254133057316, iteration: 395418
loss: 1.0402156114578247,grad_norm: 0.7586998056437836, iteration: 395419
loss: 1.0083569288253784,grad_norm: 0.7740212487521017, iteration: 395420
loss: 1.1007944345474243,grad_norm: 0.9999990916348017, iteration: 395421
loss: 1.0202301740646362,grad_norm: 0.9619497641295578, iteration: 395422
loss: 0.9757001996040344,grad_norm: 0.7041773626496772, iteration: 395423
loss: 1.0319937467575073,grad_norm: 0.999999111239956, iteration: 395424
loss: 0.9952437281608582,grad_norm: 0.8298189006194615, iteration: 395425
loss: 1.0882130861282349,grad_norm: 0.999999495416246, iteration: 395426
loss: 0.9355349540710449,grad_norm: 0.8620664994041356, iteration: 395427
loss: 1.0694184303283691,grad_norm: 0.9999999402930331, iteration: 395428
loss: 0.9835153818130493,grad_norm: 0.894041294294508, iteration: 395429
loss: 0.9939113855361938,grad_norm: 0.9446425173656495, iteration: 395430
loss: 1.0010349750518799,grad_norm: 0.8930991554086453, iteration: 395431
loss: 0.9474056959152222,grad_norm: 0.7989024382998745, iteration: 395432
loss: 0.9810498356819153,grad_norm: 0.9569784104866217, iteration: 395433
loss: 1.0190850496292114,grad_norm: 0.8718285865084608, iteration: 395434
loss: 1.0685175657272339,grad_norm: 0.907860568693634, iteration: 395435
loss: 1.0058785676956177,grad_norm: 0.6769039096937393, iteration: 395436
loss: 0.9827333092689514,grad_norm: 0.6907903427337868, iteration: 395437
loss: 1.0438705682754517,grad_norm: 0.8867020835941118, iteration: 395438
loss: 1.0382965803146362,grad_norm: 0.9999994680882631, iteration: 395439
loss: 1.042343020439148,grad_norm: 0.8260987815008852, iteration: 395440
loss: 0.9904559850692749,grad_norm: 0.643169656443539, iteration: 395441
loss: 1.0275282859802246,grad_norm: 0.9999998490843505, iteration: 395442
loss: 0.9585200548171997,grad_norm: 0.6983499786959638, iteration: 395443
loss: 1.012554407119751,grad_norm: 0.7750712976719006, iteration: 395444
loss: 0.9958750605583191,grad_norm: 0.9574749429466461, iteration: 395445
loss: 1.0173213481903076,grad_norm: 0.9999991266357836, iteration: 395446
loss: 0.9685808420181274,grad_norm: 0.8337892791094509, iteration: 395447
loss: 1.009744644165039,grad_norm: 0.9999989868683955, iteration: 395448
loss: 0.9988799095153809,grad_norm: 0.7361916407840559, iteration: 395449
loss: 0.9824651479721069,grad_norm: 0.7620068973335284, iteration: 395450
loss: 0.9986088871955872,grad_norm: 0.7942974637511249, iteration: 395451
loss: 0.9969338178634644,grad_norm: 0.7647604480653176, iteration: 395452
loss: 1.0551371574401855,grad_norm: 0.7811208056288554, iteration: 395453
loss: 0.9861133098602295,grad_norm: 0.8353495996121579, iteration: 395454
loss: 0.9855776429176331,grad_norm: 0.6861092966342883, iteration: 395455
loss: 1.0439953804016113,grad_norm: 0.8536741190618086, iteration: 395456
loss: 1.0024640560150146,grad_norm: 0.9999994210102829, iteration: 395457
loss: 0.9898274540901184,grad_norm: 0.8903331722523862, iteration: 395458
loss: 1.0076276063919067,grad_norm: 0.6092126617441298, iteration: 395459
loss: 0.9961346983909607,grad_norm: 0.8120055582553488, iteration: 395460
loss: 1.0866608619689941,grad_norm: 0.9999991045358738, iteration: 395461
loss: 1.0058878660202026,grad_norm: 0.8104268464292097, iteration: 395462
loss: 1.0100741386413574,grad_norm: 0.7596594551850638, iteration: 395463
loss: 0.9815806150436401,grad_norm: 0.8871040138707021, iteration: 395464
loss: 1.0074355602264404,grad_norm: 0.8352577507205343, iteration: 395465
loss: 1.0345262289047241,grad_norm: 0.9152965270776885, iteration: 395466
loss: 1.002715826034546,grad_norm: 0.8233906822019439, iteration: 395467
loss: 1.001968264579773,grad_norm: 0.9157193201007048, iteration: 395468
loss: 1.0300744771957397,grad_norm: 0.7806732291379161, iteration: 395469
loss: 1.0106061697006226,grad_norm: 0.9399327126842154, iteration: 395470
loss: 1.0126763582229614,grad_norm: 1.000000077599503, iteration: 395471
loss: 0.9975955486297607,grad_norm: 0.7050580243969231, iteration: 395472
loss: 0.9875343441963196,grad_norm: 0.812614199493327, iteration: 395473
loss: 1.0169130563735962,grad_norm: 0.8318364415203708, iteration: 395474
loss: 1.0303092002868652,grad_norm: 0.9903880800474629, iteration: 395475
loss: 1.0073238611221313,grad_norm: 0.8344438665052945, iteration: 395476
loss: 1.0047262907028198,grad_norm: 0.9112887919360766, iteration: 395477
loss: 0.9843327403068542,grad_norm: 0.9999997202113375, iteration: 395478
loss: 1.0039852857589722,grad_norm: 0.8766890568442679, iteration: 395479
loss: 1.0018731355667114,grad_norm: 0.688463851168728, iteration: 395480
loss: 0.9830594062805176,grad_norm: 0.81051279296197, iteration: 395481
loss: 0.9934470653533936,grad_norm: 0.9120688043305826, iteration: 395482
loss: 1.0532200336456299,grad_norm: 0.9938139638876712, iteration: 395483
loss: 1.0467430353164673,grad_norm: 0.999999568669424, iteration: 395484
loss: 1.162792444229126,grad_norm: 0.9999990855488261, iteration: 395485
loss: 0.9782170653343201,grad_norm: 0.7436637205187248, iteration: 395486
loss: 1.0201460123062134,grad_norm: 0.8344156138254061, iteration: 395487
loss: 0.9731388688087463,grad_norm: 0.6784774717042082, iteration: 395488
loss: 0.9826448559761047,grad_norm: 0.7870300652278849, iteration: 395489
loss: 1.0248632431030273,grad_norm: 0.8718726462371257, iteration: 395490
loss: 1.0045838356018066,grad_norm: 0.8154912073324757, iteration: 395491
loss: 1.0030124187469482,grad_norm: 0.6717004641076608, iteration: 395492
loss: 0.9740310311317444,grad_norm: 0.780513340455269, iteration: 395493
loss: 0.9830601215362549,grad_norm: 0.8152020795429823, iteration: 395494
loss: 1.0427885055541992,grad_norm: 0.8260393590119008, iteration: 395495
loss: 1.021373987197876,grad_norm: 0.8497502265760847, iteration: 395496
loss: 0.9990001916885376,grad_norm: 0.8483857710479785, iteration: 395497
loss: 0.9861047863960266,grad_norm: 0.7533195935937969, iteration: 395498
loss: 1.0204713344573975,grad_norm: 0.6693334857673877, iteration: 395499
loss: 1.0104424953460693,grad_norm: 0.9999994654599087, iteration: 395500
loss: 0.9733402729034424,grad_norm: 0.7709008145923774, iteration: 395501
loss: 1.0197190046310425,grad_norm: 0.9999992906842867, iteration: 395502
loss: 0.9596570134162903,grad_norm: 0.8736774047935801, iteration: 395503
loss: 1.0145387649536133,grad_norm: 0.7665415937544275, iteration: 395504
loss: 1.0387572050094604,grad_norm: 0.9999991860926629, iteration: 395505
loss: 1.001317024230957,grad_norm: 0.7604928271592917, iteration: 395506
loss: 1.0635430812835693,grad_norm: 0.8312330707229706, iteration: 395507
loss: 1.0010665655136108,grad_norm: 0.9999991535744802, iteration: 395508
loss: 1.0316250324249268,grad_norm: 0.9999993576999674, iteration: 395509
loss: 1.0064582824707031,grad_norm: 0.9086354452638016, iteration: 395510
loss: 0.9925674200057983,grad_norm: 0.6772409750473675, iteration: 395511
loss: 1.067221760749817,grad_norm: 0.8199789084418361, iteration: 395512
loss: 1.0151220560073853,grad_norm: 0.6816817101004561, iteration: 395513
loss: 1.1720110177993774,grad_norm: 0.9999998481896094, iteration: 395514
loss: 0.9772124290466309,grad_norm: 0.7902576525166197, iteration: 395515
loss: 0.9920205473899841,grad_norm: 0.9063422001124863, iteration: 395516
loss: 1.0150967836380005,grad_norm: 0.925588787351658, iteration: 395517
loss: 0.9838836193084717,grad_norm: 0.7309191381820815, iteration: 395518
loss: 1.0256321430206299,grad_norm: 0.7083578795002842, iteration: 395519
loss: 1.023939609527588,grad_norm: 0.7834888480942843, iteration: 395520
loss: 0.9937791228294373,grad_norm: 0.9999998626280182, iteration: 395521
loss: 1.0356836318969727,grad_norm: 0.9739325939770029, iteration: 395522
loss: 0.9682580828666687,grad_norm: 0.7444893990715532, iteration: 395523
loss: 0.9994478225708008,grad_norm: 0.809819521792499, iteration: 395524
loss: 1.048224687576294,grad_norm: 0.9650267678751734, iteration: 395525
loss: 1.082785964012146,grad_norm: 0.9999995756808042, iteration: 395526
loss: 1.1402331590652466,grad_norm: 0.9999990339362973, iteration: 395527
loss: 1.0158361196517944,grad_norm: 0.9999991391780806, iteration: 395528
loss: 1.0042792558670044,grad_norm: 0.758032197617728, iteration: 395529
loss: 1.0125112533569336,grad_norm: 0.6556245971044276, iteration: 395530
loss: 1.014233946800232,grad_norm: 0.8533002152606424, iteration: 395531
loss: 0.9766719937324524,grad_norm: 0.6546277696566515, iteration: 395532
loss: 0.9809300303459167,grad_norm: 0.8622662721063579, iteration: 395533
loss: 1.024854302406311,grad_norm: 0.655710710561772, iteration: 395534
loss: 1.0269330739974976,grad_norm: 0.6821320650980626, iteration: 395535
loss: 1.0245567560195923,grad_norm: 0.8708945341466946, iteration: 395536
loss: 1.0247141122817993,grad_norm: 0.7569539223688933, iteration: 395537
loss: 0.9659966826438904,grad_norm: 0.6507234399885513, iteration: 395538
loss: 0.9824793934822083,grad_norm: 0.6295611803824656, iteration: 395539
loss: 1.1199746131896973,grad_norm: 0.9999992777228782, iteration: 395540
loss: 0.9899120330810547,grad_norm: 0.67541148552862, iteration: 395541
loss: 0.9847186803817749,grad_norm: 0.8359481678934871, iteration: 395542
loss: 0.9839388132095337,grad_norm: 0.9999998797590258, iteration: 395543
loss: 1.015804409980774,grad_norm: 0.9010046644142401, iteration: 395544
loss: 1.0704070329666138,grad_norm: 0.8189897604509357, iteration: 395545
loss: 1.019756555557251,grad_norm: 0.8342163345315177, iteration: 395546
loss: 0.9798758625984192,grad_norm: 0.661635366825289, iteration: 395547
loss: 0.9666998386383057,grad_norm: 0.9310685202844284, iteration: 395548
loss: 0.9955378174781799,grad_norm: 0.881878344435968, iteration: 395549
loss: 1.0535110235214233,grad_norm: 0.9742394814066575, iteration: 395550
loss: 1.0219939947128296,grad_norm: 0.6550559020881718, iteration: 395551
loss: 0.9896466732025146,grad_norm: 0.8109899516158611, iteration: 395552
loss: 0.9651665091514587,grad_norm: 0.7506148075583269, iteration: 395553
loss: 0.9843952059745789,grad_norm: 0.7676574154854668, iteration: 395554
loss: 0.9992613792419434,grad_norm: 0.7795246512097577, iteration: 395555
loss: 1.0761688947677612,grad_norm: 0.7614174511135006, iteration: 395556
loss: 0.9720980525016785,grad_norm: 0.7993754745082021, iteration: 395557
loss: 1.0132606029510498,grad_norm: 0.7330816423077988, iteration: 395558
loss: 1.0219637155532837,grad_norm: 0.715592862288723, iteration: 395559
loss: 1.0375182628631592,grad_norm: 0.9045753086738103, iteration: 395560
loss: 0.9813920855522156,grad_norm: 0.7530885488484673, iteration: 395561
loss: 1.0328625440597534,grad_norm: 0.9878309171147903, iteration: 395562
loss: 1.0054047107696533,grad_norm: 0.772767590940873, iteration: 395563
loss: 0.9724695086479187,grad_norm: 0.7980218974103259, iteration: 395564
loss: 1.0221807956695557,grad_norm: 0.8613923731486517, iteration: 395565
loss: 1.003019094467163,grad_norm: 0.8096613176618586, iteration: 395566
loss: 1.0015629529953003,grad_norm: 0.9185565609745752, iteration: 395567
loss: 1.0765888690948486,grad_norm: 0.9853458023931153, iteration: 395568
loss: 0.9984210729598999,grad_norm: 0.8533638039435839, iteration: 395569
loss: 1.0019276142120361,grad_norm: 0.7064155580811871, iteration: 395570
loss: 1.0822134017944336,grad_norm: 0.9999990312016105, iteration: 395571
loss: 0.9844805598258972,grad_norm: 0.8327678699517287, iteration: 395572
loss: 0.9753044247627258,grad_norm: 0.8099957852758327, iteration: 395573
loss: 0.9918613433837891,grad_norm: 0.783797494304754, iteration: 395574
loss: 1.0080771446228027,grad_norm: 0.7821492443139455, iteration: 395575
loss: 0.9901724457740784,grad_norm: 0.6984345780427758, iteration: 395576
loss: 1.015678882598877,grad_norm: 0.8726531126238706, iteration: 395577
loss: 0.991682231426239,grad_norm: 0.9769723835451685, iteration: 395578
loss: 1.0974736213684082,grad_norm: 0.9999993098850062, iteration: 395579
loss: 0.9823290109634399,grad_norm: 0.8013318889982666, iteration: 395580
loss: 0.9924418330192566,grad_norm: 0.8036537088175594, iteration: 395581
loss: 1.0131733417510986,grad_norm: 0.6366632531087194, iteration: 395582
loss: 1.0070315599441528,grad_norm: 0.8463530737101116, iteration: 395583
loss: 0.9941161274909973,grad_norm: 0.7673494161516647, iteration: 395584
loss: 1.0261290073394775,grad_norm: 0.8432274023502693, iteration: 395585
loss: 1.0092456340789795,grad_norm: 0.8914265794259424, iteration: 395586
loss: 1.0097516775131226,grad_norm: 0.88980713159761, iteration: 395587
loss: 1.0049452781677246,grad_norm: 0.8117105562145736, iteration: 395588
loss: 0.9846343994140625,grad_norm: 0.6954202298524745, iteration: 395589
loss: 0.9833012223243713,grad_norm: 0.7603819441906517, iteration: 395590
loss: 0.9949641227722168,grad_norm: 0.712039460708161, iteration: 395591
loss: 1.000763177871704,grad_norm: 0.7551341410096594, iteration: 395592
loss: 0.9862590432167053,grad_norm: 0.66105511813995, iteration: 395593
loss: 1.0569149255752563,grad_norm: 0.9999991655639547, iteration: 395594
loss: 1.0282241106033325,grad_norm: 0.9473594553733704, iteration: 395595
loss: 1.0402318239212036,grad_norm: 0.8737909399798047, iteration: 395596
loss: 0.9889068603515625,grad_norm: 0.8482265555209441, iteration: 395597
loss: 1.0077322721481323,grad_norm: 0.7832169925572134, iteration: 395598
loss: 1.0167889595031738,grad_norm: 0.6197094182595847, iteration: 395599
loss: 1.0229161977767944,grad_norm: 0.9145013498314813, iteration: 395600
loss: 0.9839375019073486,grad_norm: 0.8847194883751752, iteration: 395601
loss: 0.9529510736465454,grad_norm: 0.7376160583152087, iteration: 395602
loss: 0.9911494851112366,grad_norm: 0.7961827694800309, iteration: 395603
loss: 1.0204570293426514,grad_norm: 0.7730914941674443, iteration: 395604
loss: 0.994653582572937,grad_norm: 0.7840161021969384, iteration: 395605
loss: 0.9899905920028687,grad_norm: 0.7880425016478972, iteration: 395606
loss: 1.0565084218978882,grad_norm: 0.999999417380208, iteration: 395607
loss: 0.9654523730278015,grad_norm: 0.7858190259740241, iteration: 395608
loss: 0.9656597375869751,grad_norm: 0.766388938050248, iteration: 395609
loss: 0.9730038046836853,grad_norm: 0.7204121909760174, iteration: 395610
loss: 0.9926465153694153,grad_norm: 0.7637072577671942, iteration: 395611
loss: 0.9907623529434204,grad_norm: 0.6997794345721761, iteration: 395612
loss: 1.0016971826553345,grad_norm: 0.733256659319238, iteration: 395613
loss: 0.9781916737556458,grad_norm: 0.7424028434287647, iteration: 395614
loss: 0.9853804111480713,grad_norm: 0.7642223222099294, iteration: 395615
loss: 1.0059545040130615,grad_norm: 0.7982236731934357, iteration: 395616
loss: 1.0091946125030518,grad_norm: 0.7659133948639627, iteration: 395617
loss: 0.9717388153076172,grad_norm: 0.7212654714445055, iteration: 395618
loss: 0.9856658577919006,grad_norm: 0.7705904478313006, iteration: 395619
loss: 0.9701564311981201,grad_norm: 0.8562715062150766, iteration: 395620
loss: 1.0167993307113647,grad_norm: 0.6517224192656965, iteration: 395621
loss: 1.0088540315628052,grad_norm: 0.9173724806915544, iteration: 395622
loss: 1.0258115530014038,grad_norm: 0.8043426806147664, iteration: 395623
loss: 1.018430233001709,grad_norm: 0.8054328014694181, iteration: 395624
loss: 1.0072318315505981,grad_norm: 0.7221189285515389, iteration: 395625
loss: 0.9786744713783264,grad_norm: 0.7780962527272411, iteration: 395626
loss: 1.0110955238342285,grad_norm: 0.6369969712448472, iteration: 395627
loss: 1.0016679763793945,grad_norm: 0.741154047802626, iteration: 395628
loss: 0.9876331686973572,grad_norm: 0.9723006196926225, iteration: 395629
loss: 0.9980964660644531,grad_norm: 0.7852501293903897, iteration: 395630
loss: 1.0050376653671265,grad_norm: 0.7788150738771877, iteration: 395631
loss: 0.9821200370788574,grad_norm: 0.8456800739952769, iteration: 395632
loss: 1.0036801099777222,grad_norm: 0.8497394754042064, iteration: 395633
loss: 0.9960619211196899,grad_norm: 0.7909102736478796, iteration: 395634
loss: 0.9974294900894165,grad_norm: 0.7319795983288779, iteration: 395635
loss: 0.998210072517395,grad_norm: 0.782851111396848, iteration: 395636
loss: 0.9648573398590088,grad_norm: 0.704526003555348, iteration: 395637
loss: 0.9584765434265137,grad_norm: 0.8381374160691958, iteration: 395638
loss: 1.0991657972335815,grad_norm: 0.9999997290376623, iteration: 395639
loss: 0.9711915254592896,grad_norm: 0.908483969453991, iteration: 395640
loss: 1.0018783807754517,grad_norm: 0.8302571974285498, iteration: 395641
loss: 0.9979173541069031,grad_norm: 0.7519194218773818, iteration: 395642
loss: 0.9719194173812866,grad_norm: 0.9999993734773813, iteration: 395643
loss: 1.0474693775177002,grad_norm: 0.9999994696688571, iteration: 395644
loss: 0.9898053407669067,grad_norm: 0.6934386724094507, iteration: 395645
loss: 1.0202276706695557,grad_norm: 0.9999993840088112, iteration: 395646
loss: 0.9749104976654053,grad_norm: 0.696308523481696, iteration: 395647
loss: 0.9678912162780762,grad_norm: 0.8467326035013099, iteration: 395648
loss: 0.9768769145011902,grad_norm: 0.9679568477527537, iteration: 395649
loss: 0.972805917263031,grad_norm: 0.7884073339144911, iteration: 395650
loss: 0.9994341135025024,grad_norm: 0.999999255836119, iteration: 395651
loss: 1.0241272449493408,grad_norm: 0.7035871309402757, iteration: 395652
loss: 0.9710018038749695,grad_norm: 0.8499703754990962, iteration: 395653
loss: 0.9861938953399658,grad_norm: 0.6552704634977378, iteration: 395654
loss: 0.9810912013053894,grad_norm: 0.6107456622364157, iteration: 395655
loss: 0.9790094494819641,grad_norm: 0.7803931216747159, iteration: 395656
loss: 1.0171010494232178,grad_norm: 0.8740111053397321, iteration: 395657
loss: 1.0096135139465332,grad_norm: 0.7003958580081001, iteration: 395658
loss: 1.0650889873504639,grad_norm: 0.9122272700296838, iteration: 395659
loss: 1.0005559921264648,grad_norm: 0.8363933957667992, iteration: 395660
loss: 0.9572598338127136,grad_norm: 0.8193194554434922, iteration: 395661
loss: 0.9953528046607971,grad_norm: 0.76295917689385, iteration: 395662
loss: 0.9930879473686218,grad_norm: 0.9124258553707496, iteration: 395663
loss: 0.970312237739563,grad_norm: 0.706674543753209, iteration: 395664
loss: 1.0253287553787231,grad_norm: 0.7422157542087843, iteration: 395665
loss: 1.044803500175476,grad_norm: 0.7596856562847181, iteration: 395666
loss: 0.9994229674339294,grad_norm: 0.9999991125277489, iteration: 395667
loss: 1.0219160318374634,grad_norm: 0.8969249723526368, iteration: 395668
loss: 1.0219733715057373,grad_norm: 0.750781550308867, iteration: 395669
loss: 1.055836796760559,grad_norm: 0.8809271988645673, iteration: 395670
loss: 1.00525963306427,grad_norm: 0.754849425158096, iteration: 395671
loss: 1.033107042312622,grad_norm: 0.7170564716239708, iteration: 395672
loss: 0.9860023260116577,grad_norm: 0.7493326475033066, iteration: 395673
loss: 0.982114315032959,grad_norm: 0.739895466288299, iteration: 395674
loss: 0.991322934627533,grad_norm: 0.999999136236316, iteration: 395675
loss: 0.922045886516571,grad_norm: 0.8703744352490568, iteration: 395676
loss: 1.0492925643920898,grad_norm: 0.999999618448692, iteration: 395677
loss: 0.9951235055923462,grad_norm: 0.858156935658931, iteration: 395678
loss: 0.9640021324157715,grad_norm: 0.6753698508620282, iteration: 395679
loss: 0.9731512069702148,grad_norm: 0.7524206033529781, iteration: 395680
loss: 0.9941710829734802,grad_norm: 0.7061365649285777, iteration: 395681
loss: 0.998409628868103,grad_norm: 0.7586959809531741, iteration: 395682
loss: 1.005859136581421,grad_norm: 0.8322935568568088, iteration: 395683
loss: 0.9879305958747864,grad_norm: 0.6948308153129555, iteration: 395684
loss: 0.949393630027771,grad_norm: 0.6963500747678011, iteration: 395685
loss: 0.998842179775238,grad_norm: 0.6721365582569307, iteration: 395686
loss: 1.0031111240386963,grad_norm: 0.8674504506113261, iteration: 395687
loss: 1.0231906175613403,grad_norm: 0.6503288334116424, iteration: 395688
loss: 1.0297470092773438,grad_norm: 0.8428352756621962, iteration: 395689
loss: 0.991159200668335,grad_norm: 0.8685705732577038, iteration: 395690
loss: 1.0068153142929077,grad_norm: 0.7648180227345962, iteration: 395691
loss: 0.9879644513130188,grad_norm: 0.8737776860960912, iteration: 395692
loss: 0.9977913498878479,grad_norm: 0.9999995687003936, iteration: 395693
loss: 1.0136964321136475,grad_norm: 0.8379413576233395, iteration: 395694
loss: 0.9992635846138,grad_norm: 0.7575239034167442, iteration: 395695
loss: 1.033057451248169,grad_norm: 0.9999990441284634, iteration: 395696
loss: 0.9671355485916138,grad_norm: 0.8066031074728552, iteration: 395697
loss: 0.9953545331954956,grad_norm: 0.6935070288399461, iteration: 395698
loss: 1.0032483339309692,grad_norm: 0.7477648660112772, iteration: 395699
loss: 0.9758660197257996,grad_norm: 0.7430070389727876, iteration: 395700
loss: 0.9820941090583801,grad_norm: 0.8362392690909306, iteration: 395701
loss: 1.010477900505066,grad_norm: 0.7328199228552561, iteration: 395702
loss: 1.0637459754943848,grad_norm: 0.8931369117079241, iteration: 395703
loss: 1.031604528427124,grad_norm: 0.8423256688228871, iteration: 395704
loss: 1.0198101997375488,grad_norm: 0.7071318310581002, iteration: 395705
loss: 0.9951795935630798,grad_norm: 0.7604189335179028, iteration: 395706
loss: 0.9968511462211609,grad_norm: 0.7684738325841235, iteration: 395707
loss: 0.9705453515052795,grad_norm: 0.7092076085022598, iteration: 395708
loss: 1.018010139465332,grad_norm: 0.8466561098853356, iteration: 395709
loss: 1.0437464714050293,grad_norm: 0.8191403465533349, iteration: 395710
loss: 0.9904406666755676,grad_norm: 0.7158426196754646, iteration: 395711
loss: 1.0084995031356812,grad_norm: 0.8571739106470322, iteration: 395712
loss: 0.9570664763450623,grad_norm: 0.7556151253280065, iteration: 395713
loss: 1.0026636123657227,grad_norm: 0.6659279599243549, iteration: 395714
loss: 1.0129640102386475,grad_norm: 0.8901893498496548, iteration: 395715
loss: 1.0235130786895752,grad_norm: 0.8449980577655021, iteration: 395716
loss: 0.9961799383163452,grad_norm: 0.7817863869961856, iteration: 395717
loss: 0.9782623648643494,grad_norm: 0.5999581688581839, iteration: 395718
loss: 1.0158090591430664,grad_norm: 0.875144112562623, iteration: 395719
loss: 1.0200788974761963,grad_norm: 0.8000900777995105, iteration: 395720
loss: 1.019561767578125,grad_norm: 0.7794048958465889, iteration: 395721
loss: 0.9827812910079956,grad_norm: 0.7884671929925522, iteration: 395722
loss: 1.0173453092575073,grad_norm: 0.8476202344682099, iteration: 395723
loss: 0.9993857145309448,grad_norm: 0.6409916168728095, iteration: 395724
loss: 0.9948294758796692,grad_norm: 0.8051129177372643, iteration: 395725
loss: 0.9945071935653687,grad_norm: 0.8807472978152315, iteration: 395726
loss: 0.9809681177139282,grad_norm: 0.8106041522053273, iteration: 395727
loss: 0.9940955638885498,grad_norm: 0.8435069768945022, iteration: 395728
loss: 1.001373291015625,grad_norm: 0.6736363733847353, iteration: 395729
loss: 1.0335524082183838,grad_norm: 0.8707785180868376, iteration: 395730
loss: 0.9927693605422974,grad_norm: 0.9508447876494197, iteration: 395731
loss: 0.9831852316856384,grad_norm: 0.6746772954069243, iteration: 395732
loss: 1.015725016593933,grad_norm: 0.8777364737242338, iteration: 395733
loss: 1.0159459114074707,grad_norm: 0.7904602594197526, iteration: 395734
loss: 1.009179711341858,grad_norm: 0.9999991168526488, iteration: 395735
loss: 1.0138145685195923,grad_norm: 0.7446200297219168, iteration: 395736
loss: 0.9812647104263306,grad_norm: 0.7165260277252733, iteration: 395737
loss: 0.9959915280342102,grad_norm: 0.7929800471240824, iteration: 395738
loss: 0.9799670577049255,grad_norm: 0.7028710864184128, iteration: 395739
loss: 0.9998012781143188,grad_norm: 0.6612846133072968, iteration: 395740
loss: 0.9785658717155457,grad_norm: 0.9999992151241366, iteration: 395741
loss: 1.0864793062210083,grad_norm: 0.9999992074021332, iteration: 395742
loss: 0.9558013081550598,grad_norm: 0.9143342248306874, iteration: 395743
loss: 1.0170644521713257,grad_norm: 0.8316812265983883, iteration: 395744
loss: 1.008477807044983,grad_norm: 0.7385151531334302, iteration: 395745
loss: 1.0237025022506714,grad_norm: 0.8033182871782175, iteration: 395746
loss: 0.98459792137146,grad_norm: 0.7567960954066886, iteration: 395747
loss: 0.9907695651054382,grad_norm: 0.9999992047971412, iteration: 395748
loss: 1.0681788921356201,grad_norm: 0.8205747493466675, iteration: 395749
loss: 1.032556176185608,grad_norm: 0.6990144490422527, iteration: 395750
loss: 1.0604455471038818,grad_norm: 0.9270375185272132, iteration: 395751
loss: 1.0220097303390503,grad_norm: 0.8383765402214144, iteration: 395752
loss: 1.0002963542938232,grad_norm: 0.7276159039426784, iteration: 395753
loss: 0.9923478960990906,grad_norm: 0.7076746964629482, iteration: 395754
loss: 1.0273935794830322,grad_norm: 0.9999993542082779, iteration: 395755
loss: 0.9585317969322205,grad_norm: 0.7497141093857161, iteration: 395756
loss: 0.998693585395813,grad_norm: 0.8238407402913902, iteration: 395757
loss: 0.9649117588996887,grad_norm: 0.8976262914590352, iteration: 395758
loss: 0.9915528893470764,grad_norm: 0.9761574837574993, iteration: 395759
loss: 1.010560393333435,grad_norm: 0.9999998795015161, iteration: 395760
loss: 1.0042665004730225,grad_norm: 0.762114703118339, iteration: 395761
loss: 1.0021198987960815,grad_norm: 0.7695357034039942, iteration: 395762
loss: 1.0155764818191528,grad_norm: 0.7249941445430457, iteration: 395763
loss: 1.0036064386367798,grad_norm: 0.7220796207966966, iteration: 395764
loss: 1.003355860710144,grad_norm: 0.7375408221626976, iteration: 395765
loss: 1.0366382598876953,grad_norm: 0.717645463595314, iteration: 395766
loss: 0.9577608108520508,grad_norm: 0.7689870930254245, iteration: 395767
loss: 1.01519775390625,grad_norm: 0.8861213213348419, iteration: 395768
loss: 0.9741851687431335,grad_norm: 0.7314512937192692, iteration: 395769
loss: 0.984096884727478,grad_norm: 0.720424316209648, iteration: 395770
loss: 0.9659632444381714,grad_norm: 0.8297950678942436, iteration: 395771
loss: 1.0216559171676636,grad_norm: 0.7732671997870952, iteration: 395772
loss: 0.9989832639694214,grad_norm: 0.6995339154995323, iteration: 395773
loss: 0.9889833331108093,grad_norm: 0.6872260497579386, iteration: 395774
loss: 1.0662771463394165,grad_norm: 0.9999996578986418, iteration: 395775
loss: 1.0032811164855957,grad_norm: 0.9999993929501205, iteration: 395776
loss: 0.9831891059875488,grad_norm: 0.8355517979546817, iteration: 395777
loss: 0.9974660277366638,grad_norm: 0.7370071459023398, iteration: 395778
loss: 0.9840906262397766,grad_norm: 0.9999995702805798, iteration: 395779
loss: 0.9895865321159363,grad_norm: 0.7508071384276346, iteration: 395780
loss: 1.0183404684066772,grad_norm: 0.7535366359240925, iteration: 395781
loss: 1.0254298448562622,grad_norm: 0.7586130082909658, iteration: 395782
loss: 0.9955043196678162,grad_norm: 0.882949596822998, iteration: 395783
loss: 1.0094749927520752,grad_norm: 0.6460492932641453, iteration: 395784
loss: 0.9782983660697937,grad_norm: 0.7762692463847293, iteration: 395785
loss: 0.9830989241600037,grad_norm: 0.7236828829466994, iteration: 395786
loss: 1.0059038400650024,grad_norm: 0.7357573489730166, iteration: 395787
loss: 0.9955428838729858,grad_norm: 0.7116860816339079, iteration: 395788
loss: 1.0221014022827148,grad_norm: 0.8685708147259683, iteration: 395789
loss: 0.9493945240974426,grad_norm: 0.7660613272376148, iteration: 395790
loss: 1.0385898351669312,grad_norm: 0.7296926261054595, iteration: 395791
loss: 0.9469560980796814,grad_norm: 0.7956791117275732, iteration: 395792
loss: 1.0125775337219238,grad_norm: 0.6646532633782487, iteration: 395793
loss: 0.9907960295677185,grad_norm: 0.8191994996687123, iteration: 395794
loss: 1.024398922920227,grad_norm: 0.7399872886601385, iteration: 395795
loss: 1.0378307104110718,grad_norm: 0.7917351317749771, iteration: 395796
loss: 1.0161136388778687,grad_norm: 0.7328692365256386, iteration: 395797
loss: 1.0028959512710571,grad_norm: 0.7785015114123532, iteration: 395798
loss: 0.989379346370697,grad_norm: 0.7088838529365625, iteration: 395799
loss: 1.0097581148147583,grad_norm: 0.8247232515633639, iteration: 395800
loss: 0.9963821172714233,grad_norm: 0.6433792991999657, iteration: 395801
loss: 0.9973108768463135,grad_norm: 0.8884262832339422, iteration: 395802
loss: 1.0185424089431763,grad_norm: 0.7384624501147596, iteration: 395803
loss: 1.0230258703231812,grad_norm: 0.6860606123476763, iteration: 395804
loss: 1.0081021785736084,grad_norm: 0.6710075492093679, iteration: 395805
loss: 0.9885094165802002,grad_norm: 0.713921145331261, iteration: 395806
loss: 1.042539358139038,grad_norm: 0.7998771063276248, iteration: 395807
loss: 1.0187183618545532,grad_norm: 0.7123488531372792, iteration: 395808
loss: 1.0226237773895264,grad_norm: 0.7325698563204948, iteration: 395809
loss: 1.0130679607391357,grad_norm: 0.7988270296284444, iteration: 395810
loss: 0.9875573515892029,grad_norm: 0.7259641334469119, iteration: 395811
loss: 0.9922653436660767,grad_norm: 0.6854485518671096, iteration: 395812
loss: 1.0047913789749146,grad_norm: 0.7195926963049997, iteration: 395813
loss: 1.0341887474060059,grad_norm: 0.7845071359981081, iteration: 395814
loss: 0.9653579592704773,grad_norm: 0.6863785686481462, iteration: 395815
loss: 1.0277882814407349,grad_norm: 0.9580143125538877, iteration: 395816
loss: 0.9467229843139648,grad_norm: 0.7990395054923379, iteration: 395817
loss: 0.9500849843025208,grad_norm: 0.8642066173347703, iteration: 395818
loss: 1.0190308094024658,grad_norm: 0.7940906537973614, iteration: 395819
loss: 1.0220969915390015,grad_norm: 0.8054211863267563, iteration: 395820
loss: 1.014477252960205,grad_norm: 0.739803377728321, iteration: 395821
loss: 0.985130786895752,grad_norm: 0.999999359336906, iteration: 395822
loss: 0.9842838644981384,grad_norm: 0.794483357302844, iteration: 395823
loss: 1.0097992420196533,grad_norm: 0.7795836246814346, iteration: 395824
loss: 1.0202561616897583,grad_norm: 0.7435387310594506, iteration: 395825
loss: 1.0141911506652832,grad_norm: 0.7006828021317044, iteration: 395826
loss: 0.9978488087654114,grad_norm: 0.9893491413003903, iteration: 395827
loss: 0.9973294138908386,grad_norm: 0.783199549078573, iteration: 395828
loss: 0.9784213900566101,grad_norm: 0.7569103611552509, iteration: 395829
loss: 0.9855998754501343,grad_norm: 0.6780249298763463, iteration: 395830
loss: 1.1044220924377441,grad_norm: 0.9999996147517103, iteration: 395831
loss: 0.9748120903968811,grad_norm: 0.8479663656696853, iteration: 395832
loss: 1.0224512815475464,grad_norm: 0.7902451232153208, iteration: 395833
loss: 1.1077622175216675,grad_norm: 0.9999995078839193, iteration: 395834
loss: 1.0162619352340698,grad_norm: 0.9999990836632118, iteration: 395835
loss: 1.017749547958374,grad_norm: 0.6989963827387211, iteration: 395836
loss: 1.007968544960022,grad_norm: 0.6780751062351844, iteration: 395837
loss: 0.9804844260215759,grad_norm: 0.8244573325845407, iteration: 395838
loss: 0.9689953327178955,grad_norm: 0.6278652230191275, iteration: 395839
loss: 0.996983528137207,grad_norm: 0.7783785956817714, iteration: 395840
loss: 0.9833295941352844,grad_norm: 0.8323826574082033, iteration: 395841
loss: 0.981519341468811,grad_norm: 0.8137254794799331, iteration: 395842
loss: 0.9786350131034851,grad_norm: 0.7096848852706629, iteration: 395843
loss: 0.9985691905021667,grad_norm: 0.7700033801500336, iteration: 395844
loss: 1.0206794738769531,grad_norm: 0.7743308354101811, iteration: 395845
loss: 1.0378479957580566,grad_norm: 0.7691224760634812, iteration: 395846
loss: 0.9800185561180115,grad_norm: 0.8203254659984134, iteration: 395847
loss: 1.161569595336914,grad_norm: 0.9999993864679948, iteration: 395848
loss: 0.9878368973731995,grad_norm: 0.8208666594843341, iteration: 395849
loss: 1.0376940965652466,grad_norm: 0.9999998634658854, iteration: 395850
loss: 1.0052767992019653,grad_norm: 0.6900663513988885, iteration: 395851
loss: 1.039288878440857,grad_norm: 0.9999996767303541, iteration: 395852
loss: 0.9978734850883484,grad_norm: 0.9999993763826173, iteration: 395853
loss: 0.9707421660423279,grad_norm: 0.7866846082845822, iteration: 395854
loss: 1.0195425748825073,grad_norm: 0.8865945382505599, iteration: 395855
loss: 0.9816356897354126,grad_norm: 0.7333696138679405, iteration: 395856
loss: 1.037953495979309,grad_norm: 0.6959973430802445, iteration: 395857
loss: 1.0112336874008179,grad_norm: 0.8019790546249076, iteration: 395858
loss: 0.9762595295906067,grad_norm: 0.8212405509547943, iteration: 395859
loss: 1.0715789794921875,grad_norm: 0.9999999332219514, iteration: 395860
loss: 0.9801518321037292,grad_norm: 0.7574409411419666, iteration: 395861
loss: 1.012012243270874,grad_norm: 0.7543145598794541, iteration: 395862
loss: 0.9917309880256653,grad_norm: 0.7741405523173066, iteration: 395863
loss: 0.9878246784210205,grad_norm: 0.7315083868138649, iteration: 395864
loss: 1.0371891260147095,grad_norm: 0.6665152175291984, iteration: 395865
loss: 1.0190173387527466,grad_norm: 0.7483448461478123, iteration: 395866
loss: 1.043726921081543,grad_norm: 0.8123749767267311, iteration: 395867
loss: 1.0560475587844849,grad_norm: 0.9999993795943507, iteration: 395868
loss: 0.9785698652267456,grad_norm: 0.6891200885875215, iteration: 395869
loss: 1.0112844705581665,grad_norm: 0.7601107942991943, iteration: 395870
loss: 1.0075907707214355,grad_norm: 0.713847157682783, iteration: 395871
loss: 1.0132163763046265,grad_norm: 0.999999339419771, iteration: 395872
loss: 0.9933953881263733,grad_norm: 0.8837514949383463, iteration: 395873
loss: 0.9883009195327759,grad_norm: 0.8642067655975509, iteration: 395874
loss: 0.9749232530593872,grad_norm: 0.7442332200530063, iteration: 395875
loss: 0.9691706299781799,grad_norm: 0.9999999483886655, iteration: 395876
loss: 1.0123218297958374,grad_norm: 0.9999994995063272, iteration: 395877
loss: 1.030003547668457,grad_norm: 0.8127551538692577, iteration: 395878
loss: 0.988457977771759,grad_norm: 0.7822739029160009, iteration: 395879
loss: 0.9715655446052551,grad_norm: 0.7529409524738541, iteration: 395880
loss: 0.9820048809051514,grad_norm: 0.9999994628129576, iteration: 395881
loss: 1.0460537672042847,grad_norm: 0.9999995481648757, iteration: 395882
loss: 0.9768478870391846,grad_norm: 0.7905306871779515, iteration: 395883
loss: 1.0134649276733398,grad_norm: 0.7775644208212308, iteration: 395884
loss: 1.005439043045044,grad_norm: 0.8311926671936202, iteration: 395885
loss: 1.029310703277588,grad_norm: 0.7656255558482621, iteration: 395886
loss: 0.9871113896369934,grad_norm: 0.9999996054222869, iteration: 395887
loss: 1.0168546438217163,grad_norm: 0.7221756879326185, iteration: 395888
loss: 0.9840391278266907,grad_norm: 0.9043463997981273, iteration: 395889
loss: 0.9899488091468811,grad_norm: 0.702732455916036, iteration: 395890
loss: 0.9955617189407349,grad_norm: 0.8111428376930148, iteration: 395891
loss: 1.0082510709762573,grad_norm: 0.7425910526299304, iteration: 395892
loss: 0.9887831807136536,grad_norm: 0.7531922277317316, iteration: 395893
loss: 1.000560998916626,grad_norm: 0.7952062955491109, iteration: 395894
loss: 0.980077862739563,grad_norm: 0.9999990342186317, iteration: 395895
loss: 1.0037646293640137,grad_norm: 0.6535211950005192, iteration: 395896
loss: 1.1344094276428223,grad_norm: 0.9999997535836184, iteration: 395897
loss: 0.9848036766052246,grad_norm: 0.7221165504065898, iteration: 395898
loss: 0.9777035713195801,grad_norm: 0.6319368561586636, iteration: 395899
loss: 1.081960678100586,grad_norm: 0.9999994017871512, iteration: 395900
loss: 1.0390374660491943,grad_norm: 0.8592691013497262, iteration: 395901
loss: 1.0093364715576172,grad_norm: 0.781957224488957, iteration: 395902
loss: 0.9852423071861267,grad_norm: 0.7781468680242349, iteration: 395903
loss: 1.0277845859527588,grad_norm: 0.7530866821721972, iteration: 395904
loss: 1.0121774673461914,grad_norm: 0.874726647316117, iteration: 395905
loss: 0.9921436905860901,grad_norm: 0.666759584749581, iteration: 395906
loss: 1.0094009637832642,grad_norm: 0.9999997018513423, iteration: 395907
loss: 0.9868301153182983,grad_norm: 0.6438710202845984, iteration: 395908
loss: 0.976443886756897,grad_norm: 0.8177442224035036, iteration: 395909
loss: 1.0252784490585327,grad_norm: 0.9199800632766213, iteration: 395910
loss: 0.9878838062286377,grad_norm: 0.7607347687306446, iteration: 395911
loss: 1.0551248788833618,grad_norm: 0.9999991036880377, iteration: 395912
loss: 1.0097945928573608,grad_norm: 0.999999139138355, iteration: 395913
loss: 0.9783867001533508,grad_norm: 0.7491950370276875, iteration: 395914
loss: 1.0037157535552979,grad_norm: 0.8109652213982139, iteration: 395915
loss: 0.9898683428764343,grad_norm: 0.6590265659364676, iteration: 395916
loss: 1.0522937774658203,grad_norm: 0.9999990947559304, iteration: 395917
loss: 0.9739606380462646,grad_norm: 0.663465488283822, iteration: 395918
loss: 1.0024616718292236,grad_norm: 0.7221309316790123, iteration: 395919
loss: 1.0074398517608643,grad_norm: 0.9999996336795905, iteration: 395920
loss: 1.00066077709198,grad_norm: 0.8401176660171507, iteration: 395921
loss: 1.005234718322754,grad_norm: 0.7135911563279916, iteration: 395922
loss: 1.0492502450942993,grad_norm: 0.9241230887983414, iteration: 395923
loss: 0.9686588644981384,grad_norm: 0.732879900140998, iteration: 395924
loss: 1.0369149446487427,grad_norm: 0.9999993835472186, iteration: 395925
loss: 0.9591152667999268,grad_norm: 0.9999991188170412, iteration: 395926
loss: 1.1974880695343018,grad_norm: 0.9999996035111297, iteration: 395927
loss: 0.9886824488639832,grad_norm: 0.7320371893396721, iteration: 395928
loss: 1.0106086730957031,grad_norm: 0.843693440417467, iteration: 395929
loss: 0.9591875076293945,grad_norm: 0.8577954927608222, iteration: 395930
loss: 0.9960982203483582,grad_norm: 0.7898707652996766, iteration: 395931
loss: 1.1423510313034058,grad_norm: 0.9999999134791794, iteration: 395932
loss: 0.9931912422180176,grad_norm: 0.6931578612888945, iteration: 395933
loss: 1.0706305503845215,grad_norm: 0.886941566582679, iteration: 395934
loss: 1.0108246803283691,grad_norm: 0.8610803459427474, iteration: 395935
loss: 0.9955695867538452,grad_norm: 0.7411910526097764, iteration: 395936
loss: 1.1288862228393555,grad_norm: 0.9999999423252514, iteration: 395937
loss: 1.0447092056274414,grad_norm: 0.9999991512121326, iteration: 395938
loss: 1.0719349384307861,grad_norm: 0.9458394713575735, iteration: 395939
loss: 1.0013868808746338,grad_norm: 0.7613227014630481, iteration: 395940
loss: 1.0264204740524292,grad_norm: 0.8066247701421222, iteration: 395941
loss: 1.0096408128738403,grad_norm: 0.9999996911388014, iteration: 395942
loss: 1.0179606676101685,grad_norm: 0.7368782496713792, iteration: 395943
loss: 1.0116716623306274,grad_norm: 0.9471285799113989, iteration: 395944
loss: 0.9949362277984619,grad_norm: 0.7767572378166866, iteration: 395945
loss: 0.982853353023529,grad_norm: 0.8276086339580789, iteration: 395946
loss: 1.0081769227981567,grad_norm: 0.6950604046016182, iteration: 395947
loss: 1.0138293504714966,grad_norm: 0.9999996348509814, iteration: 395948
loss: 1.034372329711914,grad_norm: 0.9400635176781255, iteration: 395949
loss: 1.097440481185913,grad_norm: 0.9999991420842502, iteration: 395950
loss: 0.9989842176437378,grad_norm: 0.6926356880269109, iteration: 395951
loss: 0.9996156096458435,grad_norm: 0.8779820599909727, iteration: 395952
loss: 1.0085264444351196,grad_norm: 0.9999989521611662, iteration: 395953
loss: 0.9989714622497559,grad_norm: 0.8295491763705091, iteration: 395954
loss: 1.024596929550171,grad_norm: 0.9313163287451863, iteration: 395955
loss: 0.9870659708976746,grad_norm: 0.7410722795096498, iteration: 395956
loss: 1.0461981296539307,grad_norm: 0.7609193329974411, iteration: 395957
loss: 1.0687880516052246,grad_norm: 0.9999993106081498, iteration: 395958
loss: 1.0362391471862793,grad_norm: 0.9999990999985621, iteration: 395959
loss: 1.071790337562561,grad_norm: 0.9999996732497574, iteration: 395960
loss: 1.036805272102356,grad_norm: 0.9999990435763838, iteration: 395961
loss: 1.0155247449874878,grad_norm: 0.7029780538500945, iteration: 395962
loss: 0.960479199886322,grad_norm: 0.7402552921935522, iteration: 395963
loss: 0.9618723392486572,grad_norm: 0.7966162231443059, iteration: 395964
loss: 0.993356466293335,grad_norm: 0.9456199186378768, iteration: 395965
loss: 0.9998463988304138,grad_norm: 0.7740713236621278, iteration: 395966
loss: 1.0011653900146484,grad_norm: 0.7168376012486951, iteration: 395967
loss: 1.025439739227295,grad_norm: 0.853774932724465, iteration: 395968
loss: 1.04204523563385,grad_norm: 0.7483084579112387, iteration: 395969
loss: 0.9923723340034485,grad_norm: 0.7475465795435566, iteration: 395970
loss: 0.9638579487800598,grad_norm: 0.7985461862285396, iteration: 395971
loss: 1.0270349979400635,grad_norm: 0.9999993956192336, iteration: 395972
loss: 1.0569993257522583,grad_norm: 0.9999997915338577, iteration: 395973
loss: 1.0272082090377808,grad_norm: 0.639522109517381, iteration: 395974
loss: 1.0398650169372559,grad_norm: 0.9999991233457944, iteration: 395975
loss: 1.0428160429000854,grad_norm: 0.7916578899170458, iteration: 395976
loss: 1.0072945356369019,grad_norm: 0.9709514490999022, iteration: 395977
loss: 0.9838607311248779,grad_norm: 0.6862951252136941, iteration: 395978
loss: 1.0916951894760132,grad_norm: 0.8896969987763794, iteration: 395979
loss: 0.9743322730064392,grad_norm: 0.8048063338783531, iteration: 395980
loss: 0.9841154217720032,grad_norm: 0.7966249042015352, iteration: 395981
loss: 1.0353330373764038,grad_norm: 0.9999994973384876, iteration: 395982
loss: 0.961144745349884,grad_norm: 0.6287184868531835, iteration: 395983
loss: 0.9930036067962646,grad_norm: 0.7187763405134846, iteration: 395984
loss: 1.018323302268982,grad_norm: 0.7517215810761909, iteration: 395985
loss: 1.0103989839553833,grad_norm: 0.7346656893681222, iteration: 395986
loss: 1.0609098672866821,grad_norm: 0.9999990530233365, iteration: 395987
loss: 1.0026624202728271,grad_norm: 0.893291843652763, iteration: 395988
loss: 1.0070778131484985,grad_norm: 0.8079511992645051, iteration: 395989
loss: 1.051916241645813,grad_norm: 0.9999991501452059, iteration: 395990
loss: 1.0815486907958984,grad_norm: 0.999999311003601, iteration: 395991
loss: 1.0306609869003296,grad_norm: 0.8999981508199718, iteration: 395992
loss: 1.0041872262954712,grad_norm: 0.9486520856567202, iteration: 395993
loss: 0.9977338910102844,grad_norm: 0.7328591581002272, iteration: 395994
loss: 0.9845093488693237,grad_norm: 0.7140640248679906, iteration: 395995
loss: 1.052808403968811,grad_norm: 0.9999998788133773, iteration: 395996
loss: 0.992100715637207,grad_norm: 0.817934546827187, iteration: 395997
loss: 1.0172629356384277,grad_norm: 0.6635672368614192, iteration: 395998
loss: 1.0113773345947266,grad_norm: 0.9710491849116875, iteration: 395999
loss: 0.990030825138092,grad_norm: 0.6638429715684203, iteration: 396000
loss: 1.0225796699523926,grad_norm: 0.7911031692931582, iteration: 396001
loss: 1.0202640295028687,grad_norm: 0.7762492104775365, iteration: 396002
loss: 0.9768885374069214,grad_norm: 0.7883446273540943, iteration: 396003
loss: 1.0163254737854004,grad_norm: 0.9999993643017445, iteration: 396004
loss: 0.9957361817359924,grad_norm: 0.7718384244286625, iteration: 396005
loss: 1.077436923980713,grad_norm: 0.7665225213048724, iteration: 396006
loss: 0.9985489249229431,grad_norm: 0.9999996274550637, iteration: 396007
loss: 0.9891173243522644,grad_norm: 0.7505799899304162, iteration: 396008
loss: 0.9747174382209778,grad_norm: 0.7978849541921083, iteration: 396009
loss: 1.00372314453125,grad_norm: 0.9292053690989384, iteration: 396010
loss: 1.0673338174819946,grad_norm: 0.999999491589491, iteration: 396011
loss: 1.078504204750061,grad_norm: 0.9999999341281135, iteration: 396012
loss: 1.028201699256897,grad_norm: 0.9999992974805969, iteration: 396013
loss: 0.9994754791259766,grad_norm: 0.9999991422072532, iteration: 396014
loss: 0.9651793241500854,grad_norm: 0.7732447944144333, iteration: 396015
loss: 0.9739661812782288,grad_norm: 0.7585334578262147, iteration: 396016
loss: 0.9949924945831299,grad_norm: 0.8247132105762142, iteration: 396017
loss: 0.9940250515937805,grad_norm: 0.6675354519114491, iteration: 396018
loss: 1.0581849813461304,grad_norm: 0.9272951309381058, iteration: 396019
loss: 1.0094693899154663,grad_norm: 0.6861022658672903, iteration: 396020
loss: 1.0293731689453125,grad_norm: 0.8832060882422851, iteration: 396021
loss: 1.0069857835769653,grad_norm: 0.6561961936884765, iteration: 396022
loss: 1.1047158241271973,grad_norm: 0.999999738109395, iteration: 396023
loss: 1.0827796459197998,grad_norm: 1.0000000625873535, iteration: 396024
loss: 0.9829672574996948,grad_norm: 0.754953147933081, iteration: 396025
loss: 1.0373910665512085,grad_norm: 0.9999992564448623, iteration: 396026
loss: 1.170233964920044,grad_norm: 0.9999998037781805, iteration: 396027
loss: 1.007637858390808,grad_norm: 0.8521918195191831, iteration: 396028
loss: 0.9821054935455322,grad_norm: 0.7456085187027096, iteration: 396029
loss: 1.009997844696045,grad_norm: 0.9999998911644297, iteration: 396030
loss: 1.0741766691207886,grad_norm: 0.9999994222782047, iteration: 396031
loss: 1.0739412307739258,grad_norm: 0.8965180854583846, iteration: 396032
loss: 1.0335719585418701,grad_norm: 0.9999998915858788, iteration: 396033
loss: 0.9820790886878967,grad_norm: 0.7536170943928744, iteration: 396034
loss: 1.0031514167785645,grad_norm: 0.7777206281923211, iteration: 396035
loss: 0.9806627631187439,grad_norm: 0.8194018719498196, iteration: 396036
loss: 1.1555980443954468,grad_norm: 0.9999998384009557, iteration: 396037
loss: 0.9819308519363403,grad_norm: 0.7386673834263077, iteration: 396038
loss: 1.003993034362793,grad_norm: 0.8288440896357335, iteration: 396039
loss: 1.006526231765747,grad_norm: 0.7253627724907918, iteration: 396040
loss: 1.0117133855819702,grad_norm: 0.7092154326779845, iteration: 396041
loss: 1.0361649990081787,grad_norm: 0.9999997399971827, iteration: 396042
loss: 1.0025075674057007,grad_norm: 0.9124520081271684, iteration: 396043
loss: 1.018742322921753,grad_norm: 0.9315198858275091, iteration: 396044
loss: 1.0708435773849487,grad_norm: 0.999999543428689, iteration: 396045
loss: 1.004387378692627,grad_norm: 0.6636454333593467, iteration: 396046
loss: 1.0133694410324097,grad_norm: 0.8619522570772096, iteration: 396047
loss: 1.0270912647247314,grad_norm: 0.9999990873152764, iteration: 396048
loss: 1.0183488130569458,grad_norm: 0.9999993439111505, iteration: 396049
loss: 0.9342299103736877,grad_norm: 0.7288181347186357, iteration: 396050
loss: 0.9948867559432983,grad_norm: 0.8841585432188012, iteration: 396051
loss: 0.9749121069908142,grad_norm: 0.7715833822607756, iteration: 396052
loss: 1.0015252828598022,grad_norm: 0.8422242297805312, iteration: 396053
loss: 0.9922470450401306,grad_norm: 0.6942088605560236, iteration: 396054
loss: 1.005996823310852,grad_norm: 0.7525149721082846, iteration: 396055
loss: 0.971320390701294,grad_norm: 0.8690011289235574, iteration: 396056
loss: 0.9816855788230896,grad_norm: 0.9999992064782188, iteration: 396057
loss: 1.0423356294631958,grad_norm: 0.9999998733942069, iteration: 396058
loss: 0.9695329666137695,grad_norm: 0.7402660097472783, iteration: 396059
loss: 1.0216610431671143,grad_norm: 0.6495235302250247, iteration: 396060
loss: 0.9796740412712097,grad_norm: 0.8513885817586533, iteration: 396061
loss: 0.9846506118774414,grad_norm: 0.6435818348243353, iteration: 396062
loss: 1.0796626806259155,grad_norm: 0.9999991298045001, iteration: 396063
loss: 1.0225811004638672,grad_norm: 0.6520812220781291, iteration: 396064
loss: 1.0722845792770386,grad_norm: 0.9106379927989108, iteration: 396065
loss: 0.9972521662712097,grad_norm: 0.7795652032791051, iteration: 396066
loss: 1.0705294609069824,grad_norm: 0.7068508512931386, iteration: 396067
loss: 0.9792378544807434,grad_norm: 0.9999997598945812, iteration: 396068
loss: 0.9759102463722229,grad_norm: 0.7239792489520062, iteration: 396069
loss: 1.0423823595046997,grad_norm: 0.999999249505469, iteration: 396070
loss: 1.0318292379379272,grad_norm: 0.7981377687953874, iteration: 396071
loss: 1.026774525642395,grad_norm: 0.9999997111161734, iteration: 396072
loss: 1.0974018573760986,grad_norm: 0.9999995301341956, iteration: 396073
loss: 0.985159158706665,grad_norm: 0.7093948184101644, iteration: 396074
loss: 0.982632040977478,grad_norm: 0.8568525917993027, iteration: 396075
loss: 1.0062922239303589,grad_norm: 0.678800295354604, iteration: 396076
loss: 1.0000931024551392,grad_norm: 0.7099192525853625, iteration: 396077
loss: 1.0264403820037842,grad_norm: 0.8834130645224418, iteration: 396078
loss: 0.9598249197006226,grad_norm: 0.6693632123031878, iteration: 396079
loss: 1.0331562757492065,grad_norm: 0.9999998924979443, iteration: 396080
loss: 1.0271968841552734,grad_norm: 0.875988128543059, iteration: 396081
loss: 0.9869756698608398,grad_norm: 0.7819830575229361, iteration: 396082
loss: 1.0126242637634277,grad_norm: 0.9758916479022067, iteration: 396083
loss: 1.0253379344940186,grad_norm: 0.7630232875377533, iteration: 396084
loss: 1.041965126991272,grad_norm: 0.9999991504876736, iteration: 396085
loss: 0.9969784617424011,grad_norm: 0.9076564263504558, iteration: 396086
loss: 1.0029911994934082,grad_norm: 0.6990980747658604, iteration: 396087
loss: 0.9943556189537048,grad_norm: 0.9999992758307633, iteration: 396088
loss: 0.9777703285217285,grad_norm: 0.7287046537113242, iteration: 396089
loss: 1.0236400365829468,grad_norm: 0.8745592774263875, iteration: 396090
loss: 0.9690315127372742,grad_norm: 0.8445250065588764, iteration: 396091
loss: 1.008781909942627,grad_norm: 0.6837124740913584, iteration: 396092
loss: 1.0208944082260132,grad_norm: 0.9999997573875616, iteration: 396093
loss: 0.9951239228248596,grad_norm: 0.7764559924323245, iteration: 396094
loss: 1.0813636779785156,grad_norm: 0.8954423762579077, iteration: 396095
loss: 1.0697782039642334,grad_norm: 0.9999992223015102, iteration: 396096
loss: 0.9822086691856384,grad_norm: 0.9999998483518461, iteration: 396097
loss: 0.9808088541030884,grad_norm: 0.7047668185541587, iteration: 396098
loss: 0.9836211800575256,grad_norm: 0.7652311177167084, iteration: 396099
loss: 0.9743618965148926,grad_norm: 0.7247031416080548, iteration: 396100
loss: 1.0057785511016846,grad_norm: 0.8673506537230301, iteration: 396101
loss: 1.2171862125396729,grad_norm: 0.9999994357427063, iteration: 396102
loss: 1.0270397663116455,grad_norm: 0.7688208392659253, iteration: 396103
loss: 1.0333831310272217,grad_norm: 0.86224622141245, iteration: 396104
loss: 1.0018490552902222,grad_norm: 0.7942306911561009, iteration: 396105
loss: 1.0415503978729248,grad_norm: 0.9999998061416842, iteration: 396106
loss: 1.0147089958190918,grad_norm: 0.9693412748563973, iteration: 396107
loss: 1.0005446672439575,grad_norm: 0.9385930624223607, iteration: 396108
loss: 1.007311224937439,grad_norm: 0.9501779219729051, iteration: 396109
loss: 0.9748342037200928,grad_norm: 0.7523626220530891, iteration: 396110
loss: 0.9610327482223511,grad_norm: 0.8208927955539871, iteration: 396111
loss: 1.0491769313812256,grad_norm: 0.8160014193063038, iteration: 396112
loss: 1.0665338039398193,grad_norm: 0.9999998093264904, iteration: 396113
loss: 0.9751110076904297,grad_norm: 0.5573024400729323, iteration: 396114
loss: 1.007753849029541,grad_norm: 0.7830258670888381, iteration: 396115
loss: 0.984610378742218,grad_norm: 0.8464407260747656, iteration: 396116
loss: 0.9931541681289673,grad_norm: 0.9999994796286655, iteration: 396117
loss: 0.9880838990211487,grad_norm: 0.821466862123375, iteration: 396118
loss: 0.9869607090950012,grad_norm: 0.7979333296933201, iteration: 396119
loss: 1.0106788873672485,grad_norm: 0.6708893893738205, iteration: 396120
loss: 1.0197676420211792,grad_norm: 0.7703253735678591, iteration: 396121
loss: 0.9906989336013794,grad_norm: 0.8127907943624437, iteration: 396122
loss: 1.0204952955245972,grad_norm: 0.7112830523258781, iteration: 396123
loss: 1.04135000705719,grad_norm: 0.9999995062793963, iteration: 396124
loss: 0.9917899966239929,grad_norm: 0.8884628039614961, iteration: 396125
loss: 1.0324110984802246,grad_norm: 0.9999992933905909, iteration: 396126
loss: 1.0129809379577637,grad_norm: 0.8685991911402949, iteration: 396127
loss: 1.0963939428329468,grad_norm: 0.8533859675873369, iteration: 396128
loss: 1.0275019407272339,grad_norm: 0.6452895799528025, iteration: 396129
loss: 1.0242763757705688,grad_norm: 0.9831383842271592, iteration: 396130
loss: 0.956169843673706,grad_norm: 0.812170803549601, iteration: 396131
loss: 1.0329402685165405,grad_norm: 0.7914548825084228, iteration: 396132
loss: 1.0431383848190308,grad_norm: 0.9152065037326242, iteration: 396133
loss: 1.0089199542999268,grad_norm: 0.8088184281427413, iteration: 396134
loss: 0.999125599861145,grad_norm: 0.9179333944578357, iteration: 396135
loss: 1.0356864929199219,grad_norm: 0.724285040457494, iteration: 396136
loss: 0.9844337105751038,grad_norm: 0.7510489525382545, iteration: 396137
loss: 0.9900988340377808,grad_norm: 0.7012446008122484, iteration: 396138
loss: 1.0689411163330078,grad_norm: 1.0000000111091416, iteration: 396139
loss: 1.0199706554412842,grad_norm: 0.8699045023485655, iteration: 396140
loss: 1.007872462272644,grad_norm: 0.9095064124995568, iteration: 396141
loss: 1.0197372436523438,grad_norm: 0.7957191282408975, iteration: 396142
loss: 1.000684380531311,grad_norm: 0.9999991946573428, iteration: 396143
loss: 1.0258336067199707,grad_norm: 0.9248135555394561, iteration: 396144
loss: 1.0011515617370605,grad_norm: 0.7671923897009412, iteration: 396145
loss: 0.9938685894012451,grad_norm: 0.7455070165171176, iteration: 396146
loss: 0.981948733329773,grad_norm: 0.7282175608060448, iteration: 396147
loss: 1.0036686658859253,grad_norm: 0.8985787736476953, iteration: 396148
loss: 1.0217207670211792,grad_norm: 0.8377419298122748, iteration: 396149
loss: 1.0184763669967651,grad_norm: 0.7750424068441107, iteration: 396150
loss: 0.9931505918502808,grad_norm: 0.7958921978393456, iteration: 396151
loss: 1.0259969234466553,grad_norm: 0.8466338970191204, iteration: 396152
loss: 0.9247537851333618,grad_norm: 0.8601936624602213, iteration: 396153
loss: 1.0505805015563965,grad_norm: 0.6894699715877405, iteration: 396154
loss: 0.9989309310913086,grad_norm: 0.7083031435466575, iteration: 396155
loss: 0.9850696921348572,grad_norm: 0.7784789367304191, iteration: 396156
loss: 1.0290546417236328,grad_norm: 0.999999767608537, iteration: 396157
loss: 0.9942644238471985,grad_norm: 0.6090326308898347, iteration: 396158
loss: 0.9934471845626831,grad_norm: 0.7695296347182036, iteration: 396159
loss: 1.0029921531677246,grad_norm: 0.8536479731225025, iteration: 396160
loss: 1.031894326210022,grad_norm: 0.7334951425892721, iteration: 396161
loss: 1.0044893026351929,grad_norm: 0.7431639019078244, iteration: 396162
loss: 1.0228157043457031,grad_norm: 0.772564360805358, iteration: 396163
loss: 1.0049200057983398,grad_norm: 0.8698862191365251, iteration: 396164
loss: 1.037165641784668,grad_norm: 0.8854016906001702, iteration: 396165
loss: 0.9885340332984924,grad_norm: 0.9748897258198528, iteration: 396166
loss: 1.054519534111023,grad_norm: 0.9999996931064137, iteration: 396167
loss: 1.0584341287612915,grad_norm: 0.9999991607458547, iteration: 396168
loss: 0.9630617499351501,grad_norm: 0.9437650579519642, iteration: 396169
loss: 0.9819965362548828,grad_norm: 0.6589229558534873, iteration: 396170
loss: 1.0206220149993896,grad_norm: 0.8424324486831154, iteration: 396171
loss: 0.9998480677604675,grad_norm: 0.7605005513981955, iteration: 396172
loss: 0.9731898307800293,grad_norm: 0.7308910779454659, iteration: 396173
loss: 1.0181705951690674,grad_norm: 0.8231636254365126, iteration: 396174
loss: 0.9960801005363464,grad_norm: 0.804630982059517, iteration: 396175
loss: 0.9807068705558777,grad_norm: 0.7958363715678424, iteration: 396176
loss: 1.033035397529602,grad_norm: 0.7797630441715444, iteration: 396177
loss: 0.9968008995056152,grad_norm: 0.6958734275260586, iteration: 396178
loss: 1.007772445678711,grad_norm: 0.8385744070315921, iteration: 396179
loss: 0.9327401518821716,grad_norm: 0.7033375187536347, iteration: 396180
loss: 1.0472203493118286,grad_norm: 0.7263328653086223, iteration: 396181
loss: 1.0189692974090576,grad_norm: 0.7291370465043512, iteration: 396182
loss: 0.9906336069107056,grad_norm: 0.955914577629107, iteration: 396183
loss: 1.0150305032730103,grad_norm: 0.6664653932321064, iteration: 396184
loss: 0.9992619156837463,grad_norm: 0.776332420752838, iteration: 396185
loss: 1.0114045143127441,grad_norm: 0.781166159799508, iteration: 396186
loss: 1.0328269004821777,grad_norm: 0.7283420291393469, iteration: 396187
loss: 0.9990193843841553,grad_norm: 0.9999992210582044, iteration: 396188
loss: 0.9577882885932922,grad_norm: 0.8608022063168906, iteration: 396189
loss: 1.0090250968933105,grad_norm: 0.9999992689771793, iteration: 396190
loss: 1.0120177268981934,grad_norm: 0.8772578096519883, iteration: 396191
loss: 1.0218312740325928,grad_norm: 0.785796518544448, iteration: 396192
loss: 1.0525617599487305,grad_norm: 0.8704499016439318, iteration: 396193
loss: 1.0061649084091187,grad_norm: 0.6380044879583145, iteration: 396194
loss: 0.9905748963356018,grad_norm: 0.7282027119353244, iteration: 396195
loss: 1.0232698917388916,grad_norm: 0.768007687119118, iteration: 396196
loss: 1.0261709690093994,grad_norm: 0.8033669972173235, iteration: 396197
loss: 0.9758726358413696,grad_norm: 0.6713741559896406, iteration: 396198
loss: 1.0271474123001099,grad_norm: 0.732239111728883, iteration: 396199
loss: 1.0085490942001343,grad_norm: 0.7717599506975875, iteration: 396200
loss: 1.0098767280578613,grad_norm: 0.9656658677167111, iteration: 396201
loss: 0.9804633855819702,grad_norm: 0.8798905622535743, iteration: 396202
loss: 0.9918448328971863,grad_norm: 0.7207756081702901, iteration: 396203
loss: 1.0000576972961426,grad_norm: 0.9576238979278089, iteration: 396204
loss: 1.0130937099456787,grad_norm: 0.7407978263412897, iteration: 396205
loss: 1.0231021642684937,grad_norm: 0.7524747435086275, iteration: 396206
loss: 1.0341989994049072,grad_norm: 0.7664889026150882, iteration: 396207
loss: 0.9788633584976196,grad_norm: 0.8430811559347687, iteration: 396208
loss: 0.9776276350021362,grad_norm: 0.6895231438163596, iteration: 396209
loss: 0.9991479516029358,grad_norm: 0.7229332109599458, iteration: 396210
loss: 0.9598012566566467,grad_norm: 0.8164923903402969, iteration: 396211
loss: 0.9971768260002136,grad_norm: 0.8278449157489608, iteration: 396212
loss: 0.9786891341209412,grad_norm: 0.7689736653192961, iteration: 396213
loss: 0.9779950976371765,grad_norm: 0.8960358196441598, iteration: 396214
loss: 1.0028740167617798,grad_norm: 0.6476722590436979, iteration: 396215
loss: 0.9986680746078491,grad_norm: 0.7837431057733227, iteration: 396216
loss: 1.01414155960083,grad_norm: 0.7496560152574515, iteration: 396217
loss: 0.9955992698669434,grad_norm: 0.6363753980065625, iteration: 396218
loss: 0.9789397716522217,grad_norm: 0.6728409091831374, iteration: 396219
loss: 1.0037215948104858,grad_norm: 0.8821347560448634, iteration: 396220
loss: 1.0325461626052856,grad_norm: 0.7595565060007121, iteration: 396221
loss: 1.0269019603729248,grad_norm: 0.8169934216728434, iteration: 396222
loss: 0.9785404801368713,grad_norm: 0.7598038156762488, iteration: 396223
loss: 1.023776888847351,grad_norm: 0.7503518036856076, iteration: 396224
loss: 0.9987271428108215,grad_norm: 0.7395888341037415, iteration: 396225
loss: 1.0423994064331055,grad_norm: 0.8889673037732486, iteration: 396226
loss: 0.9777622818946838,grad_norm: 0.9469910609675167, iteration: 396227
loss: 0.9842426180839539,grad_norm: 0.7817191460945372, iteration: 396228
loss: 1.0079940557479858,grad_norm: 0.9657902322303242, iteration: 396229
loss: 0.9684546589851379,grad_norm: 0.7297453994553457, iteration: 396230
loss: 0.9776431322097778,grad_norm: 0.6173416772228669, iteration: 396231
loss: 1.0128217935562134,grad_norm: 0.7083654601478689, iteration: 396232
loss: 1.0294759273529053,grad_norm: 0.9999999553371304, iteration: 396233
loss: 1.0326511859893799,grad_norm: 0.8886207699980515, iteration: 396234
loss: 1.0013031959533691,grad_norm: 0.8394228000793036, iteration: 396235
loss: 0.9727010130882263,grad_norm: 0.7076088849724688, iteration: 396236
loss: 1.0168730020523071,grad_norm: 0.6927290101875968, iteration: 396237
loss: 1.0436439514160156,grad_norm: 0.9999990851193327, iteration: 396238
loss: 1.0715771913528442,grad_norm: 0.9999989778320417, iteration: 396239
loss: 0.9810968041419983,grad_norm: 0.8099998051260521, iteration: 396240
loss: 1.0044053792953491,grad_norm: 0.7394758205875823, iteration: 396241
loss: 0.9820556640625,grad_norm: 0.7345896807207675, iteration: 396242
loss: 1.0340884923934937,grad_norm: 0.8116948529830191, iteration: 396243
loss: 1.0027103424072266,grad_norm: 0.8793058589125586, iteration: 396244
loss: 0.943136990070343,grad_norm: 0.7801361306759712, iteration: 396245
loss: 1.0020561218261719,grad_norm: 0.8502196798975593, iteration: 396246
loss: 1.011334776878357,grad_norm: 0.7411819901866387, iteration: 396247
loss: 1.0013298988342285,grad_norm: 0.645982839580555, iteration: 396248
loss: 1.0206191539764404,grad_norm: 0.8482371224961164, iteration: 396249
loss: 1.013156771659851,grad_norm: 0.9999990435431104, iteration: 396250
loss: 0.9495694637298584,grad_norm: 0.738767967365992, iteration: 396251
loss: 0.9979327917098999,grad_norm: 0.6942915351135499, iteration: 396252
loss: 0.9951963424682617,grad_norm: 0.7018848805793774, iteration: 396253
loss: 0.992580235004425,grad_norm: 0.8943399391512036, iteration: 396254
loss: 0.9808936715126038,grad_norm: 0.836566019530339, iteration: 396255
loss: 1.0140080451965332,grad_norm: 0.7383476002536044, iteration: 396256
loss: 0.9872434139251709,grad_norm: 0.7658791574436582, iteration: 396257
loss: 0.9895804524421692,grad_norm: 0.6538078436993607, iteration: 396258
loss: 1.0031713247299194,grad_norm: 0.7057500760309157, iteration: 396259
loss: 1.032105565071106,grad_norm: 0.9999991127247586, iteration: 396260
loss: 0.990953803062439,grad_norm: 0.7453126819724307, iteration: 396261
loss: 1.0161925554275513,grad_norm: 0.7509965802607204, iteration: 396262
loss: 0.9620845317840576,grad_norm: 0.7412296471228152, iteration: 396263
loss: 0.9940147995948792,grad_norm: 0.9999991607275844, iteration: 396264
loss: 0.9734666347503662,grad_norm: 0.7497254926495678, iteration: 396265
loss: 0.9700639843940735,grad_norm: 0.7368349072730804, iteration: 396266
loss: 0.9769299030303955,grad_norm: 0.9999993879667809, iteration: 396267
loss: 1.001155138015747,grad_norm: 0.6476994134613197, iteration: 396268
loss: 0.960722029209137,grad_norm: 0.7493105221086342, iteration: 396269
loss: 1.0249040126800537,grad_norm: 0.7045389185094795, iteration: 396270
loss: 1.029857873916626,grad_norm: 0.7647508120371082, iteration: 396271
loss: 1.006646752357483,grad_norm: 0.9597638892492313, iteration: 396272
loss: 0.984641432762146,grad_norm: 0.7031765646241181, iteration: 396273
loss: 1.0079951286315918,grad_norm: 0.6657127932724485, iteration: 396274
loss: 0.9743521809577942,grad_norm: 0.7642195090649896, iteration: 396275
loss: 0.9771016836166382,grad_norm: 0.8164727843996255, iteration: 396276
loss: 0.9825379848480225,grad_norm: 0.8366473442917614, iteration: 396277
loss: 0.9868484735488892,grad_norm: 0.7642280098112858, iteration: 396278
loss: 0.9746842384338379,grad_norm: 0.7005997412967218, iteration: 396279
loss: 1.0191532373428345,grad_norm: 0.8493567240347147, iteration: 396280
loss: 1.010360598564148,grad_norm: 0.8552532749838813, iteration: 396281
loss: 0.9721959233283997,grad_norm: 0.7508347718575025, iteration: 396282
loss: 0.996865451335907,grad_norm: 0.9999998121898046, iteration: 396283
loss: 1.0330580472946167,grad_norm: 0.6694451851845404, iteration: 396284
loss: 0.9800698757171631,grad_norm: 0.9926498136209932, iteration: 396285
loss: 1.0100643634796143,grad_norm: 0.8200185185744516, iteration: 396286
loss: 0.9976247549057007,grad_norm: 0.9999997540256881, iteration: 396287
loss: 1.032977819442749,grad_norm: 0.9999994184135513, iteration: 396288
loss: 1.0432324409484863,grad_norm: 0.9999994422215686, iteration: 396289
loss: 0.9891508221626282,grad_norm: 0.9999992330920894, iteration: 396290
loss: 1.0001840591430664,grad_norm: 0.9653123679559243, iteration: 396291
loss: 1.0182181596755981,grad_norm: 0.7913975324001294, iteration: 396292
loss: 1.0154435634613037,grad_norm: 0.7135270748759226, iteration: 396293
loss: 1.003184199333191,grad_norm: 0.729342865165345, iteration: 396294
loss: 1.007524013519287,grad_norm: 0.9891182202627479, iteration: 396295
loss: 0.978758692741394,grad_norm: 0.7048558454074434, iteration: 396296
loss: 0.9722582697868347,grad_norm: 0.7812130576379417, iteration: 396297
loss: 0.9809288382530212,grad_norm: 0.7639254101749514, iteration: 396298
loss: 1.0069594383239746,grad_norm: 0.7496268444073496, iteration: 396299
loss: 0.9774112701416016,grad_norm: 0.8651500483497357, iteration: 396300
loss: 0.9959080815315247,grad_norm: 0.7844370049475035, iteration: 396301
loss: 0.9790531992912292,grad_norm: 0.8536014094864793, iteration: 396302
loss: 1.0008405447006226,grad_norm: 0.8209369921359928, iteration: 396303
loss: 1.1093552112579346,grad_norm: 0.9999996062776244, iteration: 396304
loss: 0.9771323800086975,grad_norm: 0.897955422613302, iteration: 396305
loss: 1.0018774271011353,grad_norm: 0.7593768974071746, iteration: 396306
loss: 0.9804046750068665,grad_norm: 0.999999590892434, iteration: 396307
loss: 0.9827544689178467,grad_norm: 0.6978634106265661, iteration: 396308
loss: 1.0253455638885498,grad_norm: 0.8358210227812635, iteration: 396309
loss: 1.0198109149932861,grad_norm: 0.8514103713245011, iteration: 396310
loss: 0.9937248826026917,grad_norm: 0.8884046113011059, iteration: 396311
loss: 1.010340690612793,grad_norm: 0.7201075775367084, iteration: 396312
loss: 1.0651490688323975,grad_norm: 0.7866082508689218, iteration: 396313
loss: 1.0023415088653564,grad_norm: 0.8281009674926468, iteration: 396314
loss: 0.9547908306121826,grad_norm: 0.7766931988726996, iteration: 396315
loss: 0.9878934621810913,grad_norm: 0.7920830089047862, iteration: 396316
loss: 0.9843189716339111,grad_norm: 0.7222872873750665, iteration: 396317
loss: 1.0043432712554932,grad_norm: 0.9999993299489323, iteration: 396318
loss: 0.9908878207206726,grad_norm: 0.9648078860691924, iteration: 396319
loss: 1.025668978691101,grad_norm: 0.8514544861222015, iteration: 396320
loss: 1.0107483863830566,grad_norm: 0.7014204176119008, iteration: 396321
loss: 1.0106133222579956,grad_norm: 0.7934590390666323, iteration: 396322
loss: 0.995242714881897,grad_norm: 0.802714535330754, iteration: 396323
loss: 0.9871458411216736,grad_norm: 0.6633490625246922, iteration: 396324
loss: 1.024890422821045,grad_norm: 0.8308725261395066, iteration: 396325
loss: 1.0086218118667603,grad_norm: 0.7633317689863527, iteration: 396326
loss: 0.9949478507041931,grad_norm: 0.6930129704660458, iteration: 396327
loss: 0.9960507750511169,grad_norm: 0.9999992064202815, iteration: 396328
loss: 1.0383079051971436,grad_norm: 0.7584284228985216, iteration: 396329
loss: 0.9838054180145264,grad_norm: 0.7888822795495781, iteration: 396330
loss: 0.9772724509239197,grad_norm: 0.6855073971296871, iteration: 396331
loss: 0.9857559204101562,grad_norm: 0.8922506891837874, iteration: 396332
loss: 0.9826133251190186,grad_norm: 0.80505111222417, iteration: 396333
loss: 1.0025684833526611,grad_norm: 0.6411743220396622, iteration: 396334
loss: 1.0302654504776,grad_norm: 0.8301489813106073, iteration: 396335
loss: 1.0345326662063599,grad_norm: 0.9999991614517695, iteration: 396336
loss: 1.0208100080490112,grad_norm: 0.8375194515365834, iteration: 396337
loss: 0.9610372185707092,grad_norm: 0.9282603129451558, iteration: 396338
loss: 1.0202780961990356,grad_norm: 0.9999995332509986, iteration: 396339
loss: 0.983125627040863,grad_norm: 0.8063081851241592, iteration: 396340
loss: 1.064947247505188,grad_norm: 0.9999996186589026, iteration: 396341
loss: 1.026645302772522,grad_norm: 0.7905983876309777, iteration: 396342
loss: 1.0432136058807373,grad_norm: 0.9999991363374209, iteration: 396343
loss: 1.0552277565002441,grad_norm: 0.9999994114600773, iteration: 396344
loss: 1.01642906665802,grad_norm: 0.9999991938330582, iteration: 396345
loss: 1.046501636505127,grad_norm: 0.9999998064825989, iteration: 396346
loss: 1.0238639116287231,grad_norm: 0.6633557272483269, iteration: 396347
loss: 0.9984970092773438,grad_norm: 0.9999989823914529, iteration: 396348
loss: 1.029997706413269,grad_norm: 0.9571585318919, iteration: 396349
loss: 1.0800833702087402,grad_norm: 0.8171137719912505, iteration: 396350
loss: 0.9880695939064026,grad_norm: 0.7620194227267445, iteration: 396351
loss: 1.019913673400879,grad_norm: 0.73186567738988, iteration: 396352
loss: 1.0070316791534424,grad_norm: 0.7140740389619626, iteration: 396353
loss: 0.9865521788597107,grad_norm: 0.8106312944775236, iteration: 396354
loss: 1.0265628099441528,grad_norm: 0.9151661012025263, iteration: 396355
loss: 1.0098536014556885,grad_norm: 0.8060542648536302, iteration: 396356
loss: 0.9979735612869263,grad_norm: 0.8064114529278382, iteration: 396357
loss: 1.0058600902557373,grad_norm: 0.6459492471736333, iteration: 396358
loss: 0.9843244552612305,grad_norm: 0.9999990214353442, iteration: 396359
loss: 1.0100605487823486,grad_norm: 0.7278456196504547, iteration: 396360
loss: 0.9495682120323181,grad_norm: 0.8439588879789207, iteration: 396361
loss: 0.976466953754425,grad_norm: 0.8128561112142496, iteration: 396362
loss: 1.0173124074935913,grad_norm: 0.7572969188078045, iteration: 396363
loss: 1.0071864128112793,grad_norm: 0.8048263226618708, iteration: 396364
loss: 1.00666344165802,grad_norm: 0.8292933375567633, iteration: 396365
loss: 0.9936649799346924,grad_norm: 0.7936860041628371, iteration: 396366
loss: 0.9897018671035767,grad_norm: 0.8214510952850921, iteration: 396367
loss: 1.0463643074035645,grad_norm: 0.6819117175435125, iteration: 396368
loss: 0.9606269598007202,grad_norm: 0.7239469622101596, iteration: 396369
loss: 0.9848108291625977,grad_norm: 0.7698755514836917, iteration: 396370
loss: 1.003840684890747,grad_norm: 0.7130050625872315, iteration: 396371
loss: 0.9642348289489746,grad_norm: 0.8155968758269727, iteration: 396372
loss: 1.0172549486160278,grad_norm: 0.868843213015739, iteration: 396373
loss: 0.9978249669075012,grad_norm: 0.7494471027024489, iteration: 396374
loss: 1.0741147994995117,grad_norm: 0.8235297639228849, iteration: 396375
loss: 0.9970819354057312,grad_norm: 0.6754768814567288, iteration: 396376
loss: 1.04678213596344,grad_norm: 0.809255654431333, iteration: 396377
loss: 1.0418756008148193,grad_norm: 0.868559115958567, iteration: 396378
loss: 0.994856059551239,grad_norm: 0.8385438333317015, iteration: 396379
loss: 1.0284955501556396,grad_norm: 0.7033293249459637, iteration: 396380
loss: 1.0293071269989014,grad_norm: 0.8579169919936981, iteration: 396381
loss: 1.0021544694900513,grad_norm: 0.9999990817992339, iteration: 396382
loss: 1.0816128253936768,grad_norm: 0.8592421968458767, iteration: 396383
loss: 1.0067955255508423,grad_norm: 0.9999995731007439, iteration: 396384
loss: 0.985389769077301,grad_norm: 0.7779307411357134, iteration: 396385
loss: 0.9912061095237732,grad_norm: 0.8749260007409867, iteration: 396386
loss: 1.0649616718292236,grad_norm: 0.8010088811984402, iteration: 396387
loss: 0.9984961152076721,grad_norm: 0.9469371748708012, iteration: 396388
loss: 1.0105292797088623,grad_norm: 0.6860020922846797, iteration: 396389
loss: 1.0182361602783203,grad_norm: 0.8705787137538333, iteration: 396390
loss: 1.011711597442627,grad_norm: 0.7864474585289285, iteration: 396391
loss: 0.9939619302749634,grad_norm: 0.8879174928350445, iteration: 396392
loss: 0.9989233016967773,grad_norm: 0.9201044009330054, iteration: 396393
loss: 0.9642478823661804,grad_norm: 0.9640341628261537, iteration: 396394
loss: 0.9573109745979309,grad_norm: 0.7041547111378316, iteration: 396395
loss: 0.9799509644508362,grad_norm: 0.7754183706245951, iteration: 396396
loss: 0.9984911680221558,grad_norm: 0.761228773663765, iteration: 396397
loss: 1.0023332834243774,grad_norm: 0.6932475293380103, iteration: 396398
loss: 1.0599044561386108,grad_norm: 0.9999997554023504, iteration: 396399
loss: 0.9941264986991882,grad_norm: 0.8774222002749844, iteration: 396400
loss: 1.0186374187469482,grad_norm: 0.8041787543057475, iteration: 396401
loss: 0.9578849077224731,grad_norm: 0.8573942603333531, iteration: 396402
loss: 0.9851720333099365,grad_norm: 0.9999997165853197, iteration: 396403
loss: 0.9771024584770203,grad_norm: 0.8116379874604649, iteration: 396404
loss: 1.0016758441925049,grad_norm: 0.7765331951480211, iteration: 396405
loss: 1.0033234357833862,grad_norm: 0.7073842084407438, iteration: 396406
loss: 1.0191575288772583,grad_norm: 0.8298050847461672, iteration: 396407
loss: 1.0318742990493774,grad_norm: 0.8757738385790836, iteration: 396408
loss: 0.9960468411445618,grad_norm: 0.8504653799968096, iteration: 396409
loss: 1.008754849433899,grad_norm: 0.8850016139369352, iteration: 396410
loss: 0.9881245493888855,grad_norm: 0.9268671716267403, iteration: 396411
loss: 0.9926131963729858,grad_norm: 0.7464418063319062, iteration: 396412
loss: 1.0098546743392944,grad_norm: 0.8315247693084179, iteration: 396413
loss: 0.9819905161857605,grad_norm: 0.7520515790481542, iteration: 396414
loss: 0.9893031716346741,grad_norm: 0.8575847460439852, iteration: 396415
loss: 1.000230073928833,grad_norm: 0.6420877208233032, iteration: 396416
loss: 0.9896923899650574,grad_norm: 0.6584395969519303, iteration: 396417
loss: 0.9885361194610596,grad_norm: 0.8420248338905791, iteration: 396418
loss: 0.9860005974769592,grad_norm: 0.7500265084395289, iteration: 396419
loss: 1.0101791620254517,grad_norm: 0.8532795290057485, iteration: 396420
loss: 0.9612210988998413,grad_norm: 0.760977285904415, iteration: 396421
loss: 0.9716723561286926,grad_norm: 0.6484925474942271, iteration: 396422
loss: 1.0037814378738403,grad_norm: 0.8795418434291332, iteration: 396423
loss: 1.0136666297912598,grad_norm: 0.7889451229973756, iteration: 396424
loss: 1.0842266082763672,grad_norm: 0.8231170460863468, iteration: 396425
loss: 0.9810900688171387,grad_norm: 0.8441460465779727, iteration: 396426
loss: 1.0106825828552246,grad_norm: 0.813869435304614, iteration: 396427
loss: 0.9686040878295898,grad_norm: 0.6343291702691404, iteration: 396428
loss: 0.97110915184021,grad_norm: 0.8926989609025994, iteration: 396429
loss: 1.0592979192733765,grad_norm: 0.9999995428929163, iteration: 396430
loss: 0.9650467038154602,grad_norm: 0.9193908386026202, iteration: 396431
loss: 1.0113530158996582,grad_norm: 0.9999994295209538, iteration: 396432
loss: 0.972293496131897,grad_norm: 0.7738486560590867, iteration: 396433
loss: 1.0177979469299316,grad_norm: 0.759249507514881, iteration: 396434
loss: 1.034677505493164,grad_norm: 0.7198069848244093, iteration: 396435
loss: 0.9805187582969666,grad_norm: 0.7537734734271748, iteration: 396436
loss: 0.9869436025619507,grad_norm: 0.7240844859327162, iteration: 396437
loss: 0.9689167737960815,grad_norm: 0.7420022634660165, iteration: 396438
loss: 1.0194218158721924,grad_norm: 0.8363843058142685, iteration: 396439
loss: 1.0314487218856812,grad_norm: 0.9999991337394902, iteration: 396440
loss: 1.0291632413864136,grad_norm: 0.7629409855005225, iteration: 396441
loss: 0.9832879900932312,grad_norm: 0.7576577593535102, iteration: 396442
loss: 0.9476626515388489,grad_norm: 0.9999995548604226, iteration: 396443
loss: 0.9842056035995483,grad_norm: 0.8221440387281105, iteration: 396444
loss: 0.9880020618438721,grad_norm: 0.8575060561747171, iteration: 396445
loss: 0.98419189453125,grad_norm: 0.7156858341370396, iteration: 396446
loss: 0.998173713684082,grad_norm: 0.588012463760794, iteration: 396447
loss: 1.0045281648635864,grad_norm: 0.7380042734592065, iteration: 396448
loss: 1.0205962657928467,grad_norm: 0.999999193552635, iteration: 396449
loss: 0.998081624507904,grad_norm: 0.6274500067271802, iteration: 396450
loss: 1.033840537071228,grad_norm: 0.7386226935744481, iteration: 396451
loss: 1.0203548669815063,grad_norm: 0.918126319884262, iteration: 396452
loss: 1.0172107219696045,grad_norm: 0.6957106003287268, iteration: 396453
loss: 0.9892632961273193,grad_norm: 0.6592632791777505, iteration: 396454
loss: 1.0207041501998901,grad_norm: 0.7715101297744962, iteration: 396455
loss: 0.992505669593811,grad_norm: 0.9999997126860063, iteration: 396456
loss: 1.0338982343673706,grad_norm: 0.8106436192547333, iteration: 396457
loss: 0.9825170040130615,grad_norm: 0.608817251904855, iteration: 396458
loss: 1.044418215751648,grad_norm: 0.9454866913654715, iteration: 396459
loss: 0.9795972108840942,grad_norm: 0.9042576331167461, iteration: 396460
loss: 0.9702975749969482,grad_norm: 0.72761590523127, iteration: 396461
loss: 0.9851132035255432,grad_norm: 0.8240499957150192, iteration: 396462
loss: 1.034509539604187,grad_norm: 0.8051436717884501, iteration: 396463
loss: 0.9947408437728882,grad_norm: 0.8154892100934901, iteration: 396464
loss: 0.9882025718688965,grad_norm: 0.8332279911589359, iteration: 396465
loss: 1.0230653285980225,grad_norm: 0.7350404180601251, iteration: 396466
loss: 0.9813675284385681,grad_norm: 0.7732609476497766, iteration: 396467
loss: 1.0142773389816284,grad_norm: 0.8527800673468617, iteration: 396468
loss: 1.0026473999023438,grad_norm: 0.7749529728075852, iteration: 396469
loss: 0.9708890914916992,grad_norm: 0.8221476287747859, iteration: 396470
loss: 1.0362590551376343,grad_norm: 0.7927508992845788, iteration: 396471
loss: 1.0148471593856812,grad_norm: 0.7105807649190631, iteration: 396472
loss: 1.0226349830627441,grad_norm: 0.9999991595865808, iteration: 396473
loss: 1.0176790952682495,grad_norm: 0.8661184773245045, iteration: 396474
loss: 1.0294657945632935,grad_norm: 0.7854302019672458, iteration: 396475
loss: 0.9957951307296753,grad_norm: 0.7954991339676483, iteration: 396476
loss: 1.0343998670578003,grad_norm: 0.7050423231933434, iteration: 396477
loss: 0.9936654567718506,grad_norm: 0.8120366604161419, iteration: 396478
loss: 0.9776809811592102,grad_norm: 0.7779697899781948, iteration: 396479
loss: 0.9785474538803101,grad_norm: 0.7634917085950547, iteration: 396480
loss: 1.0210543870925903,grad_norm: 0.8662851321772522, iteration: 396481
loss: 1.0161938667297363,grad_norm: 0.7202159572134107, iteration: 396482
loss: 1.0228447914123535,grad_norm: 0.9999995423106499, iteration: 396483
loss: 1.0179383754730225,grad_norm: 0.875570065975242, iteration: 396484
loss: 1.0329047441482544,grad_norm: 0.9999995306477312, iteration: 396485
loss: 0.9889373779296875,grad_norm: 0.7346098789814128, iteration: 396486
loss: 1.0125973224639893,grad_norm: 0.78098360872388, iteration: 396487
loss: 0.9892156720161438,grad_norm: 0.7329350647000648, iteration: 396488
loss: 0.9762431979179382,grad_norm: 0.7259687443927267, iteration: 396489
loss: 0.9986205697059631,grad_norm: 0.8942144286969634, iteration: 396490
loss: 0.9961115121841431,grad_norm: 0.7898478566937196, iteration: 396491
loss: 1.0185816287994385,grad_norm: 0.7317556748596217, iteration: 396492
loss: 0.9706418514251709,grad_norm: 0.8560689792113759, iteration: 396493
loss: 0.970445454120636,grad_norm: 0.7574379410196412, iteration: 396494
loss: 0.9947408437728882,grad_norm: 0.9535660534412914, iteration: 396495
loss: 1.0160179138183594,grad_norm: 0.8309486293943608, iteration: 396496
loss: 0.9862897396087646,grad_norm: 0.7278464408538746, iteration: 396497
loss: 0.9901385307312012,grad_norm: 0.8318923172561012, iteration: 396498
loss: 1.0079089403152466,grad_norm: 0.789779973120589, iteration: 396499
loss: 1.0157109498977661,grad_norm: 0.8137183152832997, iteration: 396500
loss: 1.0022783279418945,grad_norm: 0.7251960211776205, iteration: 396501
loss: 1.0038753747940063,grad_norm: 0.8354210387475247, iteration: 396502
loss: 1.006697416305542,grad_norm: 0.8136287733941341, iteration: 396503
loss: 1.0322681665420532,grad_norm: 0.981071102278945, iteration: 396504
loss: 0.9821195602416992,grad_norm: 0.8185191561951337, iteration: 396505
loss: 1.0026475191116333,grad_norm: 0.712941306605341, iteration: 396506
loss: 1.0303446054458618,grad_norm: 0.7588734473829217, iteration: 396507
loss: 1.0166466236114502,grad_norm: 0.9999991473496767, iteration: 396508
loss: 0.9764086604118347,grad_norm: 0.7850257839254587, iteration: 396509
loss: 0.9827595949172974,grad_norm: 0.6870382766023929, iteration: 396510
loss: 0.9958290457725525,grad_norm: 0.7000537734299201, iteration: 396511
loss: 1.042858362197876,grad_norm: 0.9288449876366458, iteration: 396512
loss: 1.0455784797668457,grad_norm: 0.7940986716366523, iteration: 396513
loss: 1.00289785861969,grad_norm: 0.7529024324161512, iteration: 396514
loss: 1.0231729745864868,grad_norm: 0.9999990765097924, iteration: 396515
loss: 1.0092477798461914,grad_norm: 0.8816598710311367, iteration: 396516
loss: 0.9959515929222107,grad_norm: 0.7523685616377739, iteration: 396517
loss: 1.07062828540802,grad_norm: 0.7727282786071707, iteration: 396518
loss: 0.9744492769241333,grad_norm: 0.813752902822742, iteration: 396519
loss: 0.9637917876243591,grad_norm: 0.9999991762356272, iteration: 396520
loss: 1.0082272291183472,grad_norm: 0.9224043601062896, iteration: 396521
loss: 1.007032871246338,grad_norm: 0.8916386976314119, iteration: 396522
loss: 0.9690662622451782,grad_norm: 0.9281083586921586, iteration: 396523
loss: 0.958869218826294,grad_norm: 0.7624774133036984, iteration: 396524
loss: 0.9642146229743958,grad_norm: 0.7942202687512373, iteration: 396525
loss: 1.0229662656784058,grad_norm: 0.868612845611591, iteration: 396526
loss: 0.9922035336494446,grad_norm: 0.7648146417881961, iteration: 396527
loss: 0.9983634948730469,grad_norm: 0.7748769132753036, iteration: 396528
loss: 1.008551001548767,grad_norm: 0.8313748163521885, iteration: 396529
loss: 1.0086802244186401,grad_norm: 0.9999990452431146, iteration: 396530
loss: 0.9902429580688477,grad_norm: 0.7988651637181952, iteration: 396531
loss: 1.1384429931640625,grad_norm: 0.9999995624059163, iteration: 396532
loss: 1.0326911211013794,grad_norm: 0.9999992878334938, iteration: 396533
loss: 1.0214256048202515,grad_norm: 0.999999194374655, iteration: 396534
loss: 0.9738696217536926,grad_norm: 0.5989997684322451, iteration: 396535
loss: 0.991482138633728,grad_norm: 0.8927766544776431, iteration: 396536
loss: 0.9927488565444946,grad_norm: 0.8346677168409726, iteration: 396537
loss: 1.0101842880249023,grad_norm: 0.7625136961892202, iteration: 396538
loss: 1.0062965154647827,grad_norm: 0.8814095638998256, iteration: 396539
loss: 0.983461856842041,grad_norm: 0.8312481397070172, iteration: 396540
loss: 1.0166208744049072,grad_norm: 0.7207299556908998, iteration: 396541
loss: 0.9911221265792847,grad_norm: 0.6246693331451356, iteration: 396542
loss: 1.0351296663284302,grad_norm: 0.784326989151112, iteration: 396543
loss: 0.9812148213386536,grad_norm: 0.8543929971715134, iteration: 396544
loss: 0.9964762926101685,grad_norm: 0.733539407491807, iteration: 396545
loss: 1.1090961694717407,grad_norm: 0.9999999291360043, iteration: 396546
loss: 0.9826933145523071,grad_norm: 0.689147264978135, iteration: 396547
loss: 0.9900588393211365,grad_norm: 0.9999994264776125, iteration: 396548
loss: 1.0031989812850952,grad_norm: 0.7928078477976422, iteration: 396549
loss: 0.9949548244476318,grad_norm: 0.8927401491948945, iteration: 396550
loss: 1.112186312675476,grad_norm: 0.9999992406969829, iteration: 396551
loss: 1.0242846012115479,grad_norm: 0.647078654246245, iteration: 396552
loss: 1.019476056098938,grad_norm: 0.7561548091835503, iteration: 396553
loss: 1.0091488361358643,grad_norm: 0.7711773457106608, iteration: 396554
loss: 1.028306007385254,grad_norm: 0.7563893352331578, iteration: 396555
loss: 0.9738278388977051,grad_norm: 0.8684361262361573, iteration: 396556
loss: 1.081214189529419,grad_norm: 0.9999995877293534, iteration: 396557
loss: 0.9637671113014221,grad_norm: 0.840857370037183, iteration: 396558
loss: 1.0225262641906738,grad_norm: 0.9501031147995556, iteration: 396559
loss: 1.0285109281539917,grad_norm: 0.6686189365389589, iteration: 396560
loss: 0.9853898882865906,grad_norm: 0.77103990366073, iteration: 396561
loss: 0.9686362147331238,grad_norm: 0.8179397788931948, iteration: 396562
loss: 1.0046141147613525,grad_norm: 0.8530032142675589, iteration: 396563
loss: 0.9958716630935669,grad_norm: 0.7545835167829095, iteration: 396564
loss: 0.9856325387954712,grad_norm: 0.7462237976382619, iteration: 396565
loss: 1.0117775201797485,grad_norm: 0.9999999540151215, iteration: 396566
loss: 1.036786437034607,grad_norm: 0.7149360472683676, iteration: 396567
loss: 1.0178322792053223,grad_norm: 0.9999991840059061, iteration: 396568
loss: 1.0513452291488647,grad_norm: 0.9401428319096967, iteration: 396569
loss: 0.9399828314781189,grad_norm: 0.7009866831176461, iteration: 396570
loss: 0.967179000377655,grad_norm: 0.73035044367444, iteration: 396571
loss: 0.9757643938064575,grad_norm: 0.8888495005316224, iteration: 396572
loss: 1.0392295122146606,grad_norm: 0.7238631095140051, iteration: 396573
loss: 1.017836332321167,grad_norm: 0.720135751238933, iteration: 396574
loss: 0.9860506653785706,grad_norm: 0.7483392214398907, iteration: 396575
loss: 0.9955248236656189,grad_norm: 0.7877071030939237, iteration: 396576
loss: 0.9825926423072815,grad_norm: 0.7778397584573945, iteration: 396577
loss: 0.994868278503418,grad_norm: 0.9074026593794772, iteration: 396578
loss: 1.006665587425232,grad_norm: 0.9422146602471503, iteration: 396579
loss: 0.9757000207901001,grad_norm: 0.8039353471526147, iteration: 396580
loss: 0.9921669960021973,grad_norm: 0.8308334168734929, iteration: 396581
loss: 1.0055875778198242,grad_norm: 0.9999997367053906, iteration: 396582
loss: 0.9804255366325378,grad_norm: 0.8071995859000966, iteration: 396583
loss: 0.9756919741630554,grad_norm: 0.8482045358392556, iteration: 396584
loss: 1.0149247646331787,grad_norm: 0.7518865583673721, iteration: 396585
loss: 1.0231984853744507,grad_norm: 0.9121440392110451, iteration: 396586
loss: 1.0196808576583862,grad_norm: 0.7292367105596913, iteration: 396587
loss: 1.0017579793930054,grad_norm: 0.9604318256766027, iteration: 396588
loss: 0.9825596809387207,grad_norm: 0.7050765791091517, iteration: 396589
loss: 0.9650792479515076,grad_norm: 0.817507143071863, iteration: 396590
loss: 0.9793403148651123,grad_norm: 0.7473178200432599, iteration: 396591
loss: 0.9988717436790466,grad_norm: 0.8630597478226877, iteration: 396592
loss: 1.0135854482650757,grad_norm: 0.9999989642480227, iteration: 396593
loss: 1.0103046894073486,grad_norm: 0.7129598116609532, iteration: 396594
loss: 0.9828223586082458,grad_norm: 0.7400217126020285, iteration: 396595
loss: 1.0367231369018555,grad_norm: 0.7822311147116995, iteration: 396596
loss: 0.9933342933654785,grad_norm: 0.8420146770554385, iteration: 396597
loss: 1.0035603046417236,grad_norm: 0.8327443346053526, iteration: 396598
loss: 0.9816149473190308,grad_norm: 0.9999993570718684, iteration: 396599
loss: 1.1250531673431396,grad_norm: 0.9999994591653264, iteration: 396600
loss: 1.016026258468628,grad_norm: 0.8963470078749911, iteration: 396601
loss: 1.022867202758789,grad_norm: 0.8361364030737537, iteration: 396602
loss: 0.9523484110832214,grad_norm: 0.9314004061981557, iteration: 396603
loss: 1.0097261667251587,grad_norm: 0.8196048324355726, iteration: 396604
loss: 1.0045979022979736,grad_norm: 0.717399115205183, iteration: 396605
loss: 1.0327404737472534,grad_norm: 0.7185979230072648, iteration: 396606
loss: 0.9495241045951843,grad_norm: 0.8023622595447334, iteration: 396607
loss: 1.042339563369751,grad_norm: 0.9999998313230939, iteration: 396608
loss: 0.9789864420890808,grad_norm: 0.7943632379452451, iteration: 396609
loss: 0.9874538779258728,grad_norm: 0.9999995934629459, iteration: 396610
loss: 0.9832892417907715,grad_norm: 0.7478270767759309, iteration: 396611
loss: 0.9938588738441467,grad_norm: 0.9237989955335781, iteration: 396612
loss: 1.0208470821380615,grad_norm: 0.8565180065108852, iteration: 396613
loss: 1.013129711151123,grad_norm: 0.9448127965806059, iteration: 396614
loss: 1.017572045326233,grad_norm: 0.7912323142087699, iteration: 396615
loss: 0.9937577247619629,grad_norm: 0.6877620140989742, iteration: 396616
loss: 1.0407662391662598,grad_norm: 0.9494395787701888, iteration: 396617
loss: 0.9969834089279175,grad_norm: 0.7895473087796281, iteration: 396618
loss: 1.0244873762130737,grad_norm: 0.6887154118866382, iteration: 396619
loss: 1.0340769290924072,grad_norm: 0.7168739523546392, iteration: 396620
loss: 1.0014910697937012,grad_norm: 0.710738521627979, iteration: 396621
loss: 0.9620449542999268,grad_norm: 0.7366524480667267, iteration: 396622
loss: 1.0055724382400513,grad_norm: 0.9999995349935944, iteration: 396623
loss: 1.0117433071136475,grad_norm: 0.8853015152269493, iteration: 396624
loss: 0.9855395555496216,grad_norm: 0.7313200713114384, iteration: 396625
loss: 0.9810419678688049,grad_norm: 0.7065251722980623, iteration: 396626
loss: 1.015038013458252,grad_norm: 0.6808236146903462, iteration: 396627
loss: 1.022398829460144,grad_norm: 0.9999990920090118, iteration: 396628
loss: 1.0390492677688599,grad_norm: 0.7761218739558998, iteration: 396629
loss: 1.0800899267196655,grad_norm: 0.9999999215281381, iteration: 396630
loss: 0.979068398475647,grad_norm: 0.6962498708064198, iteration: 396631
loss: 0.9973169565200806,grad_norm: 0.898952653603869, iteration: 396632
loss: 1.0081231594085693,grad_norm: 0.8979475606323809, iteration: 396633
loss: 0.9924690127372742,grad_norm: 0.7862971856558485, iteration: 396634
loss: 1.038068175315857,grad_norm: 0.8213818395202659, iteration: 396635
loss: 0.9956902861595154,grad_norm: 0.674134963604975, iteration: 396636
loss: 0.9916408061981201,grad_norm: 0.7308067070652031, iteration: 396637
loss: 0.9724540710449219,grad_norm: 0.9071227296038404, iteration: 396638
loss: 1.0092918872833252,grad_norm: 0.7519200279722567, iteration: 396639
loss: 0.9857428073883057,grad_norm: 0.7901500424888058, iteration: 396640
loss: 1.0032963752746582,grad_norm: 0.9999998462607835, iteration: 396641
loss: 1.0158571004867554,grad_norm: 0.819096621811139, iteration: 396642
loss: 1.0527517795562744,grad_norm: 0.8224131348526189, iteration: 396643
loss: 1.0291717052459717,grad_norm: 0.8652563700371058, iteration: 396644
loss: 1.0091853141784668,grad_norm: 0.7931202072796274, iteration: 396645
loss: 1.029329776763916,grad_norm: 0.8950570430571007, iteration: 396646
loss: 1.033378005027771,grad_norm: 0.9999995686914365, iteration: 396647
loss: 1.0360462665557861,grad_norm: 0.7416787048998119, iteration: 396648
loss: 1.016310691833496,grad_norm: 0.6747085035203808, iteration: 396649
loss: 1.000792384147644,grad_norm: 0.6980312614515745, iteration: 396650
loss: 1.0154945850372314,grad_norm: 0.6924338113865056, iteration: 396651
loss: 1.0109087228775024,grad_norm: 0.9999990542936072, iteration: 396652
loss: 1.024167776107788,grad_norm: 0.7857805925782831, iteration: 396653
loss: 1.028889775276184,grad_norm: 0.9256802709198059, iteration: 396654
loss: 0.9799624681472778,grad_norm: 0.9999999252973518, iteration: 396655
loss: 0.9816773533821106,grad_norm: 0.7671681993421554, iteration: 396656
loss: 0.9415878057479858,grad_norm: 0.8033219921468084, iteration: 396657
loss: 0.9911758303642273,grad_norm: 0.7701539389189177, iteration: 396658
loss: 0.9509910941123962,grad_norm: 0.8790086086273712, iteration: 396659
loss: 0.9947601556777954,grad_norm: 0.7568401479645792, iteration: 396660
loss: 1.0121548175811768,grad_norm: 0.7372763501599418, iteration: 396661
loss: 1.0079963207244873,grad_norm: 0.7498666922138346, iteration: 396662
loss: 0.9914979338645935,grad_norm: 0.8184750263943374, iteration: 396663
loss: 0.9934598207473755,grad_norm: 0.8005907941063205, iteration: 396664
loss: 0.9591379761695862,grad_norm: 0.7911637276734934, iteration: 396665
loss: 1.0291516780853271,grad_norm: 0.7972634160992558, iteration: 396666
loss: 0.9832480549812317,grad_norm: 0.7475518941110528, iteration: 396667
loss: 1.0048531293869019,grad_norm: 0.9999998884321027, iteration: 396668
loss: 1.0097461938858032,grad_norm: 0.949263947951815, iteration: 396669
loss: 1.0395514965057373,grad_norm: 0.733813610604239, iteration: 396670
loss: 1.0088540315628052,grad_norm: 0.9999993906227989, iteration: 396671
loss: 1.0149850845336914,grad_norm: 0.9966422800949558, iteration: 396672
loss: 0.9811418056488037,grad_norm: 0.7929254449161649, iteration: 396673
loss: 0.984571635723114,grad_norm: 0.8769834738948636, iteration: 396674
loss: 1.0356861352920532,grad_norm: 0.9999991145942944, iteration: 396675
loss: 0.9894880056381226,grad_norm: 0.7247199998519556, iteration: 396676
loss: 1.0164674520492554,grad_norm: 0.7622117608058117, iteration: 396677
loss: 0.9788320064544678,grad_norm: 0.8248840416047853, iteration: 396678
loss: 1.0289945602416992,grad_norm: 0.8314541348492759, iteration: 396679
loss: 0.9948269724845886,grad_norm: 0.9005947511038243, iteration: 396680
loss: 1.012542963027954,grad_norm: 0.7421522782982496, iteration: 396681
loss: 1.0077736377716064,grad_norm: 0.6647860300025751, iteration: 396682
loss: 1.0032970905303955,grad_norm: 0.8319146396675687, iteration: 396683
loss: 0.9463403820991516,grad_norm: 0.8159044141100283, iteration: 396684
loss: 1.0002782344818115,grad_norm: 0.6954642507191664, iteration: 396685
loss: 1.0031216144561768,grad_norm: 0.7403558030557134, iteration: 396686
loss: 0.9618085026741028,grad_norm: 0.7257242474636088, iteration: 396687
loss: 1.0141024589538574,grad_norm: 0.9078960681528414, iteration: 396688
loss: 0.9904250502586365,grad_norm: 0.7598142156422626, iteration: 396689
loss: 0.9606218934059143,grad_norm: 0.7598663441800434, iteration: 396690
loss: 1.0377660989761353,grad_norm: 0.7101135712494502, iteration: 396691
loss: 1.0154497623443604,grad_norm: 0.6303291094004557, iteration: 396692
loss: 1.028387188911438,grad_norm: 0.9999992066093993, iteration: 396693
loss: 0.9730477333068848,grad_norm: 0.7733063092281295, iteration: 396694
loss: 1.0143426656723022,grad_norm: 0.7757790321617283, iteration: 396695
loss: 1.0143494606018066,grad_norm: 0.7874658496803394, iteration: 396696
loss: 0.9792361855506897,grad_norm: 0.8133834948678251, iteration: 396697
loss: 0.9907684326171875,grad_norm: 0.9999991375418955, iteration: 396698
loss: 0.994017481803894,grad_norm: 0.9311298896127729, iteration: 396699
loss: 1.0007890462875366,grad_norm: 0.80006828815175, iteration: 396700
loss: 1.0008294582366943,grad_norm: 0.7851640484018199, iteration: 396701
loss: 0.9804568886756897,grad_norm: 0.7107287196636543, iteration: 396702
loss: 0.9708642959594727,grad_norm: 0.6820645241075681, iteration: 396703
loss: 0.9948892593383789,grad_norm: 0.9294071470106723, iteration: 396704
loss: 0.9994394183158875,grad_norm: 0.8082396149840384, iteration: 396705
loss: 1.0184317827224731,grad_norm: 0.8878439574596562, iteration: 396706
loss: 1.0370986461639404,grad_norm: 0.6244092826787033, iteration: 396707
loss: 1.0028269290924072,grad_norm: 0.860325905465602, iteration: 396708
loss: 0.9573938846588135,grad_norm: 0.8824547718189689, iteration: 396709
loss: 1.0040315389633179,grad_norm: 0.6225196563657618, iteration: 396710
loss: 0.9765206575393677,grad_norm: 0.7335490924908137, iteration: 396711
loss: 1.0341088771820068,grad_norm: 0.7937848844745486, iteration: 396712
loss: 0.9967486262321472,grad_norm: 0.7698926730659151, iteration: 396713
loss: 1.0283515453338623,grad_norm: 0.9065009833470987, iteration: 396714
loss: 1.0263599157333374,grad_norm: 0.9999995806722568, iteration: 396715
loss: 0.9921097159385681,grad_norm: 0.7524574632216952, iteration: 396716
loss: 0.9732595086097717,grad_norm: 0.7300532423357776, iteration: 396717
loss: 1.0065182447433472,grad_norm: 0.9456170413112596, iteration: 396718
loss: 1.044140100479126,grad_norm: 0.9999995602487771, iteration: 396719
loss: 0.9713306427001953,grad_norm: 0.8223888578765992, iteration: 396720
loss: 1.0000373125076294,grad_norm: 0.8381992941476886, iteration: 396721
loss: 1.0566290616989136,grad_norm: 0.8109243201547878, iteration: 396722
loss: 0.9902967810630798,grad_norm: 0.7506465797613683, iteration: 396723
loss: 1.014416217803955,grad_norm: 0.8455440440604147, iteration: 396724
loss: 1.0253102779388428,grad_norm: 0.9999995724297219, iteration: 396725
loss: 0.9650136232376099,grad_norm: 0.6794010581193307, iteration: 396726
loss: 0.9763194918632507,grad_norm: 0.8973947474888494, iteration: 396727
loss: 0.9904030561447144,grad_norm: 0.6572884719432718, iteration: 396728
loss: 1.0209990739822388,grad_norm: 0.999999835971984, iteration: 396729
loss: 0.9913591146469116,grad_norm: 0.7602022023452261, iteration: 396730
loss: 1.0245381593704224,grad_norm: 0.7312054226214115, iteration: 396731
loss: 0.9677653908729553,grad_norm: 0.8964904500320205, iteration: 396732
loss: 0.9754297137260437,grad_norm: 0.6874186742907569, iteration: 396733
loss: 1.0107908248901367,grad_norm: 0.9118476140329493, iteration: 396734
loss: 0.9950305819511414,grad_norm: 0.7292785004037987, iteration: 396735
loss: 1.10430908203125,grad_norm: 0.9999998232980497, iteration: 396736
loss: 1.0177340507507324,grad_norm: 0.999999712704155, iteration: 396737
loss: 0.9923161268234253,grad_norm: 0.7122051143075662, iteration: 396738
loss: 1.004761815071106,grad_norm: 0.748280969085437, iteration: 396739
loss: 1.0312169790267944,grad_norm: 0.7261476738450915, iteration: 396740
loss: 0.9760841727256775,grad_norm: 0.747303786778969, iteration: 396741
loss: 1.0483962297439575,grad_norm: 0.9999999389932551, iteration: 396742
loss: 0.9776405692100525,grad_norm: 0.7676152955548082, iteration: 396743
loss: 0.9656898379325867,grad_norm: 0.7054373918813468, iteration: 396744
loss: 1.0124197006225586,grad_norm: 0.6440803103489048, iteration: 396745
loss: 1.1336644887924194,grad_norm: 0.9999999137783023, iteration: 396746
loss: 1.0066900253295898,grad_norm: 0.6617454937033361, iteration: 396747
loss: 0.9858394861221313,grad_norm: 0.7543601597360745, iteration: 396748
loss: 0.9975945353507996,grad_norm: 0.789428012689256, iteration: 396749
loss: 1.0154155492782593,grad_norm: 0.9999996948912063, iteration: 396750
loss: 0.9932975172996521,grad_norm: 0.6511971539128146, iteration: 396751
loss: 1.036086082458496,grad_norm: 0.9999991041047038, iteration: 396752
loss: 0.9955002665519714,grad_norm: 0.6144808319641965, iteration: 396753
loss: 1.3287105560302734,grad_norm: 0.9999994133330938, iteration: 396754
loss: 1.169630765914917,grad_norm: 1.0000000186594151, iteration: 396755
loss: 0.9947202801704407,grad_norm: 0.7869563400652679, iteration: 396756
loss: 1.0884214639663696,grad_norm: 0.9999998147969698, iteration: 396757
loss: 0.9929852485656738,grad_norm: 0.8435169335483691, iteration: 396758
loss: 1.0331759452819824,grad_norm: 0.8055850791055913, iteration: 396759
loss: 1.390663981437683,grad_norm: 0.9999996206752039, iteration: 396760
loss: 1.4056179523468018,grad_norm: 1.0000000156071223, iteration: 396761
loss: 0.9946653842926025,grad_norm: 0.9999996450730222, iteration: 396762
loss: 0.9957202672958374,grad_norm: 0.9749477117743893, iteration: 396763
loss: 1.0372285842895508,grad_norm: 0.9999994862581962, iteration: 396764
loss: 1.0606515407562256,grad_norm: 0.9999992250834442, iteration: 396765
loss: 1.0408687591552734,grad_norm: 0.9999996328297062, iteration: 396766
loss: 1.0185229778289795,grad_norm: 0.6354348544965295, iteration: 396767
loss: 1.0847978591918945,grad_norm: 0.999999085722855, iteration: 396768
loss: 1.1335022449493408,grad_norm: 0.9999992548005238, iteration: 396769
loss: 0.9897748231887817,grad_norm: 0.8397375551852138, iteration: 396770
loss: 1.058788537979126,grad_norm: 0.9999999016496525, iteration: 396771
loss: 1.0979344844818115,grad_norm: 0.9999993540918415, iteration: 396772
loss: 0.9973447918891907,grad_norm: 0.8746732394360273, iteration: 396773
loss: 0.9799017906188965,grad_norm: 0.7851883324193422, iteration: 396774
loss: 1.0450462102890015,grad_norm: 0.9999999497190502, iteration: 396775
loss: 0.9627178907394409,grad_norm: 0.9999993584929111, iteration: 396776
loss: 1.0307612419128418,grad_norm: 0.9374459550800048, iteration: 396777
loss: 1.0108674764633179,grad_norm: 0.9999990059790271, iteration: 396778
loss: 1.0269988775253296,grad_norm: 0.9999995035482274, iteration: 396779
loss: 1.061630129814148,grad_norm: 0.9999996536763288, iteration: 396780
loss: 1.2230618000030518,grad_norm: 0.9999999852337662, iteration: 396781
loss: 1.0078436136245728,grad_norm: 0.9053494702138393, iteration: 396782
loss: 1.1233046054840088,grad_norm: 0.9999994904425845, iteration: 396783
loss: 1.0188013315200806,grad_norm: 0.9999989196041018, iteration: 396784
loss: 0.9899991154670715,grad_norm: 0.83041206666444, iteration: 396785
loss: 1.031531572341919,grad_norm: 0.9999995300681797, iteration: 396786
loss: 1.00461745262146,grad_norm: 0.7249744366903275, iteration: 396787
loss: 1.0491944551467896,grad_norm: 0.9999993251415991, iteration: 396788
loss: 0.9964010715484619,grad_norm: 0.9173944702783752, iteration: 396789
loss: 0.98444002866745,grad_norm: 0.7756300748688801, iteration: 396790
loss: 1.0227179527282715,grad_norm: 0.8901573948172543, iteration: 396791
loss: 0.9668691158294678,grad_norm: 0.7919501750519847, iteration: 396792
loss: 1.0038602352142334,grad_norm: 0.8801625946396857, iteration: 396793
loss: 0.9680984020233154,grad_norm: 0.6929444598335811, iteration: 396794
loss: 1.0366231203079224,grad_norm: 0.9999993742317622, iteration: 396795
loss: 0.9707275629043579,grad_norm: 0.862867983175026, iteration: 396796
loss: 1.0142241716384888,grad_norm: 0.6780275360868915, iteration: 396797
loss: 0.9837763905525208,grad_norm: 0.662402551952254, iteration: 396798
loss: 1.0267183780670166,grad_norm: 0.8381689020205552, iteration: 396799
loss: 1.004199504852295,grad_norm: 0.8154586453666921, iteration: 396800
loss: 0.9941940307617188,grad_norm: 0.8623180519890493, iteration: 396801
loss: 0.9734359979629517,grad_norm: 0.6558018552271789, iteration: 396802
loss: 0.9909682273864746,grad_norm: 0.7440100747982886, iteration: 396803
loss: 0.9689996242523193,grad_norm: 0.7815868153545802, iteration: 396804
loss: 0.967811644077301,grad_norm: 0.560538921255712, iteration: 396805
loss: 1.005507469177246,grad_norm: 0.8332238226080576, iteration: 396806
loss: 0.9997930526733398,grad_norm: 0.7728330461595917, iteration: 396807
loss: 1.0183889865875244,grad_norm: 0.7979289621240604, iteration: 396808
loss: 0.9572423100471497,grad_norm: 0.8881823492192281, iteration: 396809
loss: 0.9929174780845642,grad_norm: 0.7507641266634177, iteration: 396810
loss: 0.9958432912826538,grad_norm: 0.8335501175675936, iteration: 396811
loss: 1.0046416521072388,grad_norm: 0.6831149760875667, iteration: 396812
loss: 1.0126959085464478,grad_norm: 0.9462939163064761, iteration: 396813
loss: 0.9529260396957397,grad_norm: 0.88656655856336, iteration: 396814
loss: 0.9817330241203308,grad_norm: 0.7918047091525799, iteration: 396815
loss: 1.047723650932312,grad_norm: 0.7831918597364655, iteration: 396816
loss: 0.948217511177063,grad_norm: 0.7066751826880328, iteration: 396817
loss: 0.994102954864502,grad_norm: 0.6706851906081117, iteration: 396818
loss: 0.943608820438385,grad_norm: 0.8331339925197906, iteration: 396819
loss: 1.0285073518753052,grad_norm: 0.8350966101760855, iteration: 396820
loss: 0.992509126663208,grad_norm: 0.8812237543324225, iteration: 396821
loss: 1.0279279947280884,grad_norm: 0.71721490873732, iteration: 396822
loss: 1.016594409942627,grad_norm: 0.8847505728143039, iteration: 396823
loss: 1.0047374963760376,grad_norm: 0.8572578476078277, iteration: 396824
loss: 0.978794276714325,grad_norm: 0.7883034743090009, iteration: 396825
loss: 1.0059030055999756,grad_norm: 0.8033684587506795, iteration: 396826
loss: 1.0382080078125,grad_norm: 0.9999995084796641, iteration: 396827
loss: 1.0012187957763672,grad_norm: 0.7079488379681766, iteration: 396828
loss: 0.9946305751800537,grad_norm: 0.8122236190074718, iteration: 396829
loss: 1.00630784034729,grad_norm: 0.8061318318816304, iteration: 396830
loss: 0.9974154233932495,grad_norm: 0.6832427538061684, iteration: 396831
loss: 1.0305310487747192,grad_norm: 0.848439603248685, iteration: 396832
loss: 1.0342509746551514,grad_norm: 0.9898318932793474, iteration: 396833
loss: 0.9760320782661438,grad_norm: 0.9353624579570379, iteration: 396834
loss: 1.0216740369796753,grad_norm: 0.9157922131249538, iteration: 396835
loss: 0.9882262945175171,grad_norm: 0.6794179060890357, iteration: 396836
loss: 0.9749523997306824,grad_norm: 0.7636301143325973, iteration: 396837
loss: 0.9886291027069092,grad_norm: 0.7133266325037492, iteration: 396838
loss: 0.9821237325668335,grad_norm: 0.7446114449498296, iteration: 396839
loss: 0.9980294704437256,grad_norm: 0.8896247945161817, iteration: 396840
loss: 0.9899711012840271,grad_norm: 0.764755874444222, iteration: 396841
loss: 1.033453106880188,grad_norm: 0.7956025026372606, iteration: 396842
loss: 0.9755730628967285,grad_norm: 0.80576677689063, iteration: 396843
loss: 0.9862969517707825,grad_norm: 0.8406669354598638, iteration: 396844
loss: 1.009684443473816,grad_norm: 0.7549676274861058, iteration: 396845
loss: 0.9697945713996887,grad_norm: 0.7389300816688994, iteration: 396846
loss: 0.986929178237915,grad_norm: 0.8569305186623648, iteration: 396847
loss: 1.0351645946502686,grad_norm: 0.8808985640284507, iteration: 396848
loss: 0.9775153994560242,grad_norm: 0.7048545086386778, iteration: 396849
loss: 1.0009578466415405,grad_norm: 0.6485339912219305, iteration: 396850
loss: 0.9932705760002136,grad_norm: 0.7956441576733247, iteration: 396851
loss: 1.0313501358032227,grad_norm: 0.9310493420013484, iteration: 396852
loss: 1.0134721994400024,grad_norm: 0.7642859097554024, iteration: 396853
loss: 1.006203055381775,grad_norm: 0.6588800474517299, iteration: 396854
loss: 1.0196975469589233,grad_norm: 0.8187868071581189, iteration: 396855
loss: 1.0357871055603027,grad_norm: 0.9999996528785825, iteration: 396856
loss: 1.0172008275985718,grad_norm: 0.8582382440320305, iteration: 396857
loss: 1.0173695087432861,grad_norm: 0.85716236290638, iteration: 396858
loss: 0.976223886013031,grad_norm: 0.8087155074735799, iteration: 396859
loss: 0.9725847244262695,grad_norm: 0.8055560927125129, iteration: 396860
loss: 1.0343161821365356,grad_norm: 0.9999990031109839, iteration: 396861
loss: 1.0180280208587646,grad_norm: 0.7929735234065534, iteration: 396862
loss: 1.0183607339859009,grad_norm: 0.7315104469245396, iteration: 396863
loss: 0.9575771689414978,grad_norm: 0.7088000712992031, iteration: 396864
loss: 0.9893566966056824,grad_norm: 0.8143594683113616, iteration: 396865
loss: 1.005066990852356,grad_norm: 0.6828970259650408, iteration: 396866
loss: 0.9964113831520081,grad_norm: 0.7323373020981813, iteration: 396867
loss: 1.0419325828552246,grad_norm: 0.8513094894843125, iteration: 396868
loss: 0.9874098300933838,grad_norm: 0.6975068604138582, iteration: 396869
loss: 1.0257748365402222,grad_norm: 0.8690099906142817, iteration: 396870
loss: 0.9823442101478577,grad_norm: 0.9999990569580541, iteration: 396871
loss: 0.9883182644844055,grad_norm: 0.7333733326376328, iteration: 396872
loss: 0.9899466633796692,grad_norm: 0.9999990881495224, iteration: 396873
loss: 0.9654850959777832,grad_norm: 0.6686917194442015, iteration: 396874
loss: 1.0021474361419678,grad_norm: 0.8861114778061808, iteration: 396875
loss: 1.0095160007476807,grad_norm: 0.8692584050695116, iteration: 396876
loss: 0.9933265447616577,grad_norm: 0.7155752674138607, iteration: 396877
loss: 0.9531617760658264,grad_norm: 0.8156472509444567, iteration: 396878
loss: 1.0182405710220337,grad_norm: 0.867604393620625, iteration: 396879
loss: 0.9973760843276978,grad_norm: 0.6751974250800784, iteration: 396880
loss: 1.050102710723877,grad_norm: 0.8220050201918276, iteration: 396881
loss: 0.9946274757385254,grad_norm: 0.8586041256820287, iteration: 396882
loss: 1.005133032798767,grad_norm: 0.9999990793814403, iteration: 396883
loss: 0.989641547203064,grad_norm: 0.7073366706236688, iteration: 396884
loss: 1.0021268129348755,grad_norm: 0.8242690035284826, iteration: 396885
loss: 0.9723751544952393,grad_norm: 0.8028482186630498, iteration: 396886
loss: 1.0009665489196777,grad_norm: 0.7938202923455834, iteration: 396887
loss: 0.9690186977386475,grad_norm: 0.7671886420459998, iteration: 396888
loss: 1.0120651721954346,grad_norm: 0.7876901852133931, iteration: 396889
loss: 0.9760103821754456,grad_norm: 0.8305973908409091, iteration: 396890
loss: 1.0018126964569092,grad_norm: 0.8482772257483462, iteration: 396891
loss: 1.0072929859161377,grad_norm: 0.999999135336685, iteration: 396892
loss: 1.018109917640686,grad_norm: 0.8873333373460439, iteration: 396893
loss: 1.0333619117736816,grad_norm: 0.9999995110834642, iteration: 396894
loss: 1.035054087638855,grad_norm: 0.9999991323863283, iteration: 396895
loss: 0.9963034987449646,grad_norm: 0.8069919669618437, iteration: 396896
loss: 1.004682183265686,grad_norm: 0.8856160156178707, iteration: 396897
loss: 0.9763027429580688,grad_norm: 0.7960329648051443, iteration: 396898
loss: 0.9911898970603943,grad_norm: 0.9999991901736437, iteration: 396899
loss: 1.0031070709228516,grad_norm: 0.7493376533449888, iteration: 396900
loss: 1.031599760055542,grad_norm: 0.8697682448012746, iteration: 396901
loss: 1.0107057094573975,grad_norm: 0.9999990830905845, iteration: 396902
loss: 0.9514921307563782,grad_norm: 0.9671738135537054, iteration: 396903
loss: 0.9787814617156982,grad_norm: 0.7922769299931867, iteration: 396904
loss: 1.0121608972549438,grad_norm: 0.8662635255851413, iteration: 396905
loss: 0.9988415241241455,grad_norm: 0.9999993850427285, iteration: 396906
loss: 0.9796457886695862,grad_norm: 0.6629301712469151, iteration: 396907
loss: 0.985842764377594,grad_norm: 0.7399093909688828, iteration: 396908
loss: 1.013508677482605,grad_norm: 0.8058050897127843, iteration: 396909
loss: 1.0084065198898315,grad_norm: 0.9999990535974455, iteration: 396910
loss: 1.0230532884597778,grad_norm: 0.7683463110526764, iteration: 396911
loss: 1.0180801153182983,grad_norm: 0.7760296286645663, iteration: 396912
loss: 0.9659987688064575,grad_norm: 0.6596657531862616, iteration: 396913
loss: 0.9909235239028931,grad_norm: 0.9999995968155021, iteration: 396914
loss: 1.0071353912353516,grad_norm: 0.7889327404834494, iteration: 396915
loss: 1.0095361471176147,grad_norm: 0.8153778968353563, iteration: 396916
loss: 0.9868742823600769,grad_norm: 0.8836576986837451, iteration: 396917
loss: 1.0162287950515747,grad_norm: 0.8276104793488899, iteration: 396918
loss: 1.022920846939087,grad_norm: 0.850025920893955, iteration: 396919
loss: 1.0062795877456665,grad_norm: 0.703228522223912, iteration: 396920
loss: 0.9931790828704834,grad_norm: 0.6876921354865773, iteration: 396921
loss: 0.9473193883895874,grad_norm: 0.7564992576504936, iteration: 396922
loss: 0.977893590927124,grad_norm: 0.7850413458483625, iteration: 396923
loss: 0.979246973991394,grad_norm: 0.8121514568587679, iteration: 396924
loss: 1.0043702125549316,grad_norm: 0.6436069433275583, iteration: 396925
loss: 1.0266754627227783,grad_norm: 0.9999998404350601, iteration: 396926
loss: 0.9811146259307861,grad_norm: 0.8062741843702967, iteration: 396927
loss: 0.971103310585022,grad_norm: 0.7869249579912486, iteration: 396928
loss: 1.0150341987609863,grad_norm: 0.6855033677107113, iteration: 396929
loss: 0.9671387076377869,grad_norm: 0.8344360840818831, iteration: 396930
loss: 0.9695847630500793,grad_norm: 0.9245240626336524, iteration: 396931
loss: 1.0385987758636475,grad_norm: 0.775152720291963, iteration: 396932
loss: 0.9999029636383057,grad_norm: 0.7636378802708755, iteration: 396933
loss: 1.060249924659729,grad_norm: 0.9999994408097383, iteration: 396934
loss: 1.0176421403884888,grad_norm: 0.8595801320094527, iteration: 396935
loss: 0.9586495161056519,grad_norm: 0.7501688806059933, iteration: 396936
loss: 1.0227795839309692,grad_norm: 0.9999996605438126, iteration: 396937
loss: 0.9653100371360779,grad_norm: 0.740387059163644, iteration: 396938
loss: 1.007752776145935,grad_norm: 0.8221418497464552, iteration: 396939
loss: 1.0235613584518433,grad_norm: 0.999999408241331, iteration: 396940
loss: 1.0198471546173096,grad_norm: 0.6955721051292573, iteration: 396941
loss: 0.9975160956382751,grad_norm: 0.936260670800167, iteration: 396942
loss: 0.9780527949333191,grad_norm: 0.9999996754433653, iteration: 396943
loss: 0.9502111673355103,grad_norm: 0.7325089986822028, iteration: 396944
loss: 1.0303269624710083,grad_norm: 0.7662460020562999, iteration: 396945
loss: 1.0161052942276,grad_norm: 0.8497120798989706, iteration: 396946
loss: 1.0000214576721191,grad_norm: 0.8960070217878644, iteration: 396947
loss: 0.9612789750099182,grad_norm: 0.606525652700894, iteration: 396948
loss: 1.0942400693893433,grad_norm: 0.7745534199807226, iteration: 396949
loss: 0.9714788794517517,grad_norm: 0.8269610174891211, iteration: 396950
loss: 0.9808600544929504,grad_norm: 0.7283529827622882, iteration: 396951
loss: 0.9733368158340454,grad_norm: 0.8251689271930631, iteration: 396952
loss: 1.0087980031967163,grad_norm: 0.7656462084257493, iteration: 396953
loss: 0.969292938709259,grad_norm: 0.7863625012882729, iteration: 396954
loss: 0.9791541695594788,grad_norm: 0.6924967341899826, iteration: 396955
loss: 1.0029746294021606,grad_norm: 0.7565440041703337, iteration: 396956
loss: 1.0010993480682373,grad_norm: 0.8571083434958924, iteration: 396957
loss: 1.0062590837478638,grad_norm: 0.910282510235658, iteration: 396958
loss: 1.012380838394165,grad_norm: 0.8226366375806957, iteration: 396959
loss: 1.0163801908493042,grad_norm: 0.9381777808306982, iteration: 396960
loss: 0.9898849725723267,grad_norm: 0.7482662972303301, iteration: 396961
loss: 0.9914384484291077,grad_norm: 0.7224866567111831, iteration: 396962
loss: 1.0032767057418823,grad_norm: 0.7273743251060977, iteration: 396963
loss: 1.0868372917175293,grad_norm: 0.7965643993385727, iteration: 396964
loss: 1.015317678451538,grad_norm: 0.7915079114875186, iteration: 396965
loss: 1.0371155738830566,grad_norm: 0.9999991031624034, iteration: 396966
loss: 0.9781621098518372,grad_norm: 0.9202400322705685, iteration: 396967
loss: 1.0116814374923706,grad_norm: 0.6605040082426374, iteration: 396968
loss: 0.9956520795822144,grad_norm: 0.8544098894263453, iteration: 396969
loss: 1.0208814144134521,grad_norm: 0.9044895042729838, iteration: 396970
loss: 1.0280898809432983,grad_norm: 0.7552401966441578, iteration: 396971
loss: 0.9777707457542419,grad_norm: 0.7603124623843833, iteration: 396972
loss: 1.0249899625778198,grad_norm: 0.823560625869892, iteration: 396973
loss: 0.9970276951789856,grad_norm: 0.6688511707548813, iteration: 396974
loss: 1.026548981666565,grad_norm: 0.9999998624562417, iteration: 396975
loss: 1.0183149576187134,grad_norm: 0.8202597376767543, iteration: 396976
loss: 1.008955717086792,grad_norm: 0.812238301375371, iteration: 396977
loss: 1.0595653057098389,grad_norm: 0.8267413080572091, iteration: 396978
loss: 0.9530705809593201,grad_norm: 0.643211563796878, iteration: 396979
loss: 1.031242847442627,grad_norm: 0.7551349731278962, iteration: 396980
loss: 0.9988117218017578,grad_norm: 0.6479389119464103, iteration: 396981
loss: 1.013650894165039,grad_norm: 0.7415277246099831, iteration: 396982
loss: 0.9963896870613098,grad_norm: 0.9110054319520496, iteration: 396983
loss: 0.9576399326324463,grad_norm: 0.7645122081815774, iteration: 396984
loss: 1.010988473892212,grad_norm: 0.6915817436489431, iteration: 396985
loss: 0.9847267866134644,grad_norm: 0.8495194239284937, iteration: 396986
loss: 1.0228818655014038,grad_norm: 0.99999904489595, iteration: 396987
loss: 0.9669937491416931,grad_norm: 0.8679124354118176, iteration: 396988
loss: 1.0225623846054077,grad_norm: 0.9999992444554995, iteration: 396989
loss: 0.991315484046936,grad_norm: 0.763785322779719, iteration: 396990
loss: 1.0060161352157593,grad_norm: 0.8027385353025361, iteration: 396991
loss: 1.006063461303711,grad_norm: 0.7863278089660866, iteration: 396992
loss: 0.9956067800521851,grad_norm: 0.7272759017653524, iteration: 396993
loss: 0.9759859442710876,grad_norm: 0.7595458729801263, iteration: 396994
loss: 1.018218994140625,grad_norm: 0.8614282675895444, iteration: 396995
loss: 1.034714698791504,grad_norm: 0.6755759522960392, iteration: 396996
loss: 1.0198557376861572,grad_norm: 0.8767861856300638, iteration: 396997
loss: 0.9727020859718323,grad_norm: 0.9999997850262322, iteration: 396998
loss: 1.024501919746399,grad_norm: 0.9999991794675198, iteration: 396999
loss: 0.9936263561248779,grad_norm: 0.8736569718437137, iteration: 397000
loss: 1.0211764574050903,grad_norm: 0.7996777895988293, iteration: 397001
loss: 0.9677077531814575,grad_norm: 0.8666809875939038, iteration: 397002
loss: 1.0013452768325806,grad_norm: 0.7635780967556915, iteration: 397003
loss: 0.995261013507843,grad_norm: 0.8467811416752218, iteration: 397004
loss: 0.9440625905990601,grad_norm: 0.7058214669881807, iteration: 397005
loss: 0.9981946349143982,grad_norm: 0.7343640461218621, iteration: 397006
loss: 0.9674971699714661,grad_norm: 0.9087808755121647, iteration: 397007
loss: 0.9989839792251587,grad_norm: 0.8750287107151979, iteration: 397008
loss: 0.9830067753791809,grad_norm: 0.7445899134133649, iteration: 397009
loss: 1.020579218864441,grad_norm: 0.8441886615858938, iteration: 397010
loss: 0.998931884765625,grad_norm: 0.7713417429408543, iteration: 397011
loss: 0.991532564163208,grad_norm: 0.6435790948495969, iteration: 397012
loss: 1.0130459070205688,grad_norm: 0.9999994755075812, iteration: 397013
loss: 1.0399887561798096,grad_norm: 0.9281271854215658, iteration: 397014
loss: 0.9972057342529297,grad_norm: 0.7615886709357546, iteration: 397015
loss: 1.0754839181900024,grad_norm: 0.7179100918019434, iteration: 397016
loss: 1.0966272354125977,grad_norm: 0.9999996091725789, iteration: 397017
loss: 0.9802280068397522,grad_norm: 0.8062245983566662, iteration: 397018
loss: 1.0784964561462402,grad_norm: 0.7685498329816095, iteration: 397019
loss: 1.0109963417053223,grad_norm: 0.7616462049617304, iteration: 397020
loss: 1.118164300918579,grad_norm: 0.8735356214314008, iteration: 397021
loss: 1.0252619981765747,grad_norm: 0.9999996785493901, iteration: 397022
loss: 1.0032671689987183,grad_norm: 0.826784335568883, iteration: 397023
loss: 1.0060374736785889,grad_norm: 0.9677486649557469, iteration: 397024
loss: 1.0058262348175049,grad_norm: 0.7198373002716336, iteration: 397025
loss: 0.9898834228515625,grad_norm: 0.7394921066554012, iteration: 397026
loss: 1.0004308223724365,grad_norm: 0.8363306524115856, iteration: 397027
loss: 0.9935155510902405,grad_norm: 0.8400431116372257, iteration: 397028
loss: 1.0378978252410889,grad_norm: 0.8947343518916813, iteration: 397029
loss: 0.970162034034729,grad_norm: 0.709071958622676, iteration: 397030
loss: 1.018066644668579,grad_norm: 0.7411763727890641, iteration: 397031
loss: 1.0375992059707642,grad_norm: 0.9999993279524035, iteration: 397032
loss: 1.0001482963562012,grad_norm: 0.8020953048899524, iteration: 397033
loss: 1.0197395086288452,grad_norm: 0.7092270218088362, iteration: 397034
loss: 1.0785475969314575,grad_norm: 0.9999995092646234, iteration: 397035
loss: 0.9912072420120239,grad_norm: 0.8306536911534741, iteration: 397036
loss: 0.9763693809509277,grad_norm: 0.9267962211173872, iteration: 397037
loss: 1.0312613248825073,grad_norm: 0.9999999534113366, iteration: 397038
loss: 0.9975662231445312,grad_norm: 0.7852017357042855, iteration: 397039
loss: 0.9869709014892578,grad_norm: 0.9596132042206568, iteration: 397040
loss: 1.0029971599578857,grad_norm: 0.9999991022094566, iteration: 397041
loss: 1.0223114490509033,grad_norm: 0.7339300253763945, iteration: 397042
loss: 1.0092790126800537,grad_norm: 0.7635373594785229, iteration: 397043
loss: 1.0147005319595337,grad_norm: 0.729456734642704, iteration: 397044
loss: 0.9715209603309631,grad_norm: 0.842629400247303, iteration: 397045
loss: 0.9844683408737183,grad_norm: 0.7763534883200821, iteration: 397046
loss: 1.0074635744094849,grad_norm: 0.8207221744277328, iteration: 397047
loss: 1.0067565441131592,grad_norm: 0.8338311155093593, iteration: 397048
loss: 1.0066030025482178,grad_norm: 0.9999994012625923, iteration: 397049
loss: 0.9583965539932251,grad_norm: 0.7811387061855247, iteration: 397050
loss: 1.0128700733184814,grad_norm: 0.8679956610749262, iteration: 397051
loss: 1.048407793045044,grad_norm: 0.9555692500268796, iteration: 397052
loss: 0.9985185861587524,grad_norm: 0.8548874838767833, iteration: 397053
loss: 1.0089572668075562,grad_norm: 0.8075253777532887, iteration: 397054
loss: 1.0062936544418335,grad_norm: 0.8427679434850568, iteration: 397055
loss: 1.0647813081741333,grad_norm: 0.8334852133728379, iteration: 397056
loss: 1.0267165899276733,grad_norm: 0.8263446242987428, iteration: 397057
loss: 0.9720143675804138,grad_norm: 0.8613570795000898, iteration: 397058
loss: 1.0057260990142822,grad_norm: 0.8039923017066946, iteration: 397059
loss: 1.0791491270065308,grad_norm: 0.999999944644514, iteration: 397060
loss: 1.0148988962173462,grad_norm: 0.858347007348286, iteration: 397061
loss: 1.074127435684204,grad_norm: 0.9404581839197715, iteration: 397062
loss: 0.9807271957397461,grad_norm: 0.7992783153757015, iteration: 397063
loss: 1.032071590423584,grad_norm: 0.9035893694512174, iteration: 397064
loss: 0.965869128704071,grad_norm: 0.7375905457237448, iteration: 397065
loss: 0.9883953928947449,grad_norm: 0.8513891739648793, iteration: 397066
loss: 0.9983984231948853,grad_norm: 0.672945245830533, iteration: 397067
loss: 1.0291173458099365,grad_norm: 0.709159859821358, iteration: 397068
loss: 1.0252114534378052,grad_norm: 0.9206009429874268, iteration: 397069
loss: 0.9894771575927734,grad_norm: 0.6045256416553099, iteration: 397070
loss: 1.0256927013397217,grad_norm: 0.9182273401099462, iteration: 397071
loss: 0.9985141754150391,grad_norm: 0.6110863195195506, iteration: 397072
loss: 1.0023980140686035,grad_norm: 0.7312749156645509, iteration: 397073
loss: 1.0297813415527344,grad_norm: 0.7541034529958365, iteration: 397074
loss: 0.9758846163749695,grad_norm: 0.8450189937539561, iteration: 397075
loss: 1.0123552083969116,grad_norm: 0.6818543117964716, iteration: 397076
loss: 0.9956583976745605,grad_norm: 0.8180089721792495, iteration: 397077
loss: 0.9953075051307678,grad_norm: 0.7438084839295004, iteration: 397078
loss: 1.0239410400390625,grad_norm: 0.9416658929717091, iteration: 397079
loss: 0.9751388430595398,grad_norm: 0.9999989599472507, iteration: 397080
loss: 1.0016578435897827,grad_norm: 0.8402493036289898, iteration: 397081
loss: 0.9779096245765686,grad_norm: 0.8629043458584144, iteration: 397082
loss: 1.0074130296707153,grad_norm: 0.8955645289256228, iteration: 397083
loss: 1.016174077987671,grad_norm: 0.928791858787392, iteration: 397084
loss: 1.0047131776809692,grad_norm: 0.6959248177854483, iteration: 397085
loss: 1.0320583581924438,grad_norm: 0.9999996311806834, iteration: 397086
loss: 1.0205671787261963,grad_norm: 0.8054792149718263, iteration: 397087
loss: 1.0090420246124268,grad_norm: 0.7561167693665846, iteration: 397088
loss: 0.9874900579452515,grad_norm: 0.8129318362179705, iteration: 397089
loss: 0.9942306876182556,grad_norm: 0.9432717619568014, iteration: 397090
loss: 0.9799846410751343,grad_norm: 0.7281535861759613, iteration: 397091
loss: 1.0159335136413574,grad_norm: 0.7421426926284709, iteration: 397092
loss: 0.9803285002708435,grad_norm: 0.6614332501575143, iteration: 397093
loss: 0.9829895496368408,grad_norm: 0.6771900542920334, iteration: 397094
loss: 1.0191560983657837,grad_norm: 0.999999423486965, iteration: 397095
loss: 1.0189894437789917,grad_norm: 0.8689660877069888, iteration: 397096
loss: 1.0227773189544678,grad_norm: 0.761830823291971, iteration: 397097
loss: 0.986053466796875,grad_norm: 0.7924475424171439, iteration: 397098
loss: 0.9830858707427979,grad_norm: 0.7610887024169236, iteration: 397099
loss: 1.0128796100616455,grad_norm: 0.725118810515901, iteration: 397100
loss: 0.9995233416557312,grad_norm: 0.7340117035631779, iteration: 397101
loss: 0.9996886253356934,grad_norm: 0.7978469091536954, iteration: 397102
loss: 1.0179835557937622,grad_norm: 0.6806499383878805, iteration: 397103
loss: 0.9839377999305725,grad_norm: 0.7635995906877592, iteration: 397104
loss: 1.0171456336975098,grad_norm: 0.9999991325448198, iteration: 397105
loss: 1.0031061172485352,grad_norm: 0.6965804813879237, iteration: 397106
loss: 1.0071470737457275,grad_norm: 0.6965350322061176, iteration: 397107
loss: 1.0029164552688599,grad_norm: 0.632250376891434, iteration: 397108
loss: 0.9741230607032776,grad_norm: 0.8710693687458652, iteration: 397109
loss: 1.0036964416503906,grad_norm: 0.7561850626659113, iteration: 397110
loss: 1.0564383268356323,grad_norm: 0.9784186823914123, iteration: 397111
loss: 1.0551657676696777,grad_norm: 0.9999998367782741, iteration: 397112
loss: 1.0491406917572021,grad_norm: 0.7456478359349515, iteration: 397113
loss: 0.9685518145561218,grad_norm: 0.7405408640109722, iteration: 397114
loss: 1.004468321800232,grad_norm: 0.8176681510008313, iteration: 397115
loss: 1.002442479133606,grad_norm: 0.9999991871653384, iteration: 397116
loss: 0.9671395421028137,grad_norm: 0.9999995275277171, iteration: 397117
loss: 1.004574179649353,grad_norm: 0.8491150492302402, iteration: 397118
loss: 1.0084301233291626,grad_norm: 0.9999993563823294, iteration: 397119
loss: 1.0765362977981567,grad_norm: 0.9999990374397575, iteration: 397120
loss: 0.9873852133750916,grad_norm: 0.6714187799623546, iteration: 397121
loss: 1.0135537385940552,grad_norm: 0.7988125191018896, iteration: 397122
loss: 0.9959738850593567,grad_norm: 0.9999999703420439, iteration: 397123
loss: 1.0304328203201294,grad_norm: 0.9273110521860253, iteration: 397124
loss: 1.0230000019073486,grad_norm: 0.8957287437923669, iteration: 397125
loss: 0.9915399551391602,grad_norm: 0.735984942294695, iteration: 397126
loss: 0.9893819689750671,grad_norm: 0.780719200353817, iteration: 397127
loss: 1.0071492195129395,grad_norm: 0.7624154902742067, iteration: 397128
loss: 1.0359513759613037,grad_norm: 0.7688718222808082, iteration: 397129
loss: 0.9957613348960876,grad_norm: 0.8804037701893266, iteration: 397130
loss: 0.9934750199317932,grad_norm: 0.8037440354668159, iteration: 397131
loss: 1.0044459104537964,grad_norm: 0.9489116898938643, iteration: 397132
loss: 0.9787097573280334,grad_norm: 0.776220065271819, iteration: 397133
loss: 1.020466685295105,grad_norm: 0.6993026001350527, iteration: 397134
loss: 0.9805482625961304,grad_norm: 0.7123448752106429, iteration: 397135
loss: 0.9618348479270935,grad_norm: 0.7690665039674772, iteration: 397136
loss: 1.0892071723937988,grad_norm: 0.8489570961438407, iteration: 397137
loss: 0.9984461665153503,grad_norm: 0.8012067098672749, iteration: 397138
loss: 0.9616149663925171,grad_norm: 0.78250790557968, iteration: 397139
loss: 1.030696988105774,grad_norm: 0.8544710546891858, iteration: 397140
loss: 1.0117192268371582,grad_norm: 0.7047756093343837, iteration: 397141
loss: 0.9978073835372925,grad_norm: 0.7808947439836817, iteration: 397142
loss: 1.0340144634246826,grad_norm: 0.7691443510282114, iteration: 397143
loss: 1.031165599822998,grad_norm: 0.8521988113264009, iteration: 397144
loss: 0.9757758975028992,grad_norm: 0.7603676661735669, iteration: 397145
loss: 0.9822882413864136,grad_norm: 0.8291285370448486, iteration: 397146
loss: 1.0298962593078613,grad_norm: 0.9473189285071115, iteration: 397147
loss: 1.0068767070770264,grad_norm: 0.9999991261645442, iteration: 397148
loss: 0.997296154499054,grad_norm: 0.6864852883095165, iteration: 397149
loss: 0.9965987205505371,grad_norm: 0.8874127620762727, iteration: 397150
loss: 0.9798452258110046,grad_norm: 0.8540812536842878, iteration: 397151
loss: 0.9910888671875,grad_norm: 0.8678243769138849, iteration: 397152
loss: 0.9751881957054138,grad_norm: 0.7680708414528463, iteration: 397153
loss: 0.9709547162055969,grad_norm: 0.7003395357729473, iteration: 397154
loss: 1.1543781757354736,grad_norm: 0.9999991317134312, iteration: 397155
loss: 1.0267075300216675,grad_norm: 0.7735619059517335, iteration: 397156
loss: 1.0166103839874268,grad_norm: 0.7932371532956772, iteration: 397157
loss: 1.0254877805709839,grad_norm: 0.9999998330487236, iteration: 397158
loss: 0.9628770351409912,grad_norm: 0.8505532325825633, iteration: 397159
loss: 1.0186454057693481,grad_norm: 0.7689590790964872, iteration: 397160
loss: 1.0071687698364258,grad_norm: 0.840779318556189, iteration: 397161
loss: 0.9993980526924133,grad_norm: 0.8956733995992463, iteration: 397162
loss: 0.9868392944335938,grad_norm: 0.8365798421765747, iteration: 397163
loss: 0.9723495841026306,grad_norm: 0.7044384904740886, iteration: 397164
loss: 1.0045169591903687,grad_norm: 0.6690701256837999, iteration: 397165
loss: 1.025894045829773,grad_norm: 0.8101555474911654, iteration: 397166
loss: 0.9669296145439148,grad_norm: 0.8373515395545794, iteration: 397167
loss: 1.023421287536621,grad_norm: 0.9602753268096934, iteration: 397168
loss: 1.0051603317260742,grad_norm: 0.7172267304082658, iteration: 397169
loss: 0.9942049980163574,grad_norm: 0.7940623079230285, iteration: 397170
loss: 0.9956558346748352,grad_norm: 0.8531951686596119, iteration: 397171
loss: 0.9839897751808167,grad_norm: 0.7966176978471808, iteration: 397172
loss: 1.0027492046356201,grad_norm: 0.761266113047649, iteration: 397173
loss: 0.9712439775466919,grad_norm: 0.6233080585983187, iteration: 397174
loss: 1.000335931777954,grad_norm: 0.900323078712336, iteration: 397175
loss: 0.9821079969406128,grad_norm: 0.6930574939364577, iteration: 397176
loss: 1.0138390064239502,grad_norm: 0.6899311783317355, iteration: 397177
loss: 0.9622417688369751,grad_norm: 0.8887026166569999, iteration: 397178
loss: 1.0516794919967651,grad_norm: 0.9236675122558601, iteration: 397179
loss: 1.0012346506118774,grad_norm: 0.7126538955637761, iteration: 397180
loss: 1.028087854385376,grad_norm: 0.9899553485541346, iteration: 397181
loss: 0.9656538367271423,grad_norm: 0.7438605657588143, iteration: 397182
loss: 1.0375488996505737,grad_norm: 1.0000000703173964, iteration: 397183
loss: 1.0102730989456177,grad_norm: 0.588108417366386, iteration: 397184
loss: 0.9761688709259033,grad_norm: 0.8878756503501236, iteration: 397185
loss: 1.0091898441314697,grad_norm: 0.7181846027410992, iteration: 397186
loss: 1.008535623550415,grad_norm: 0.9999992132738319, iteration: 397187
loss: 0.9937551617622375,grad_norm: 0.7826808149834213, iteration: 397188
loss: 1.0107378959655762,grad_norm: 0.752098651725885, iteration: 397189
loss: 1.0904364585876465,grad_norm: 0.9999997041574856, iteration: 397190
loss: 0.9675443768501282,grad_norm: 0.8303654051596343, iteration: 397191
loss: 1.0007585287094116,grad_norm: 0.88440959444065, iteration: 397192
loss: 1.000808835029602,grad_norm: 0.9999998900263611, iteration: 397193
loss: 1.0173991918563843,grad_norm: 0.9999995083547446, iteration: 397194
loss: 0.9882836937904358,grad_norm: 0.6701391433868823, iteration: 397195
loss: 0.9667932987213135,grad_norm: 0.8041231372060003, iteration: 397196
loss: 0.9951601624488831,grad_norm: 0.8545104590959306, iteration: 397197
loss: 0.9892143607139587,grad_norm: 0.8283763870096681, iteration: 397198
loss: 1.0093505382537842,grad_norm: 0.7966145343420388, iteration: 397199
loss: 1.008502721786499,grad_norm: 0.7747321670785188, iteration: 397200
loss: 0.9930794835090637,grad_norm: 0.8778283790576535, iteration: 397201
loss: 0.9988332390785217,grad_norm: 0.7788373301405227, iteration: 397202
loss: 0.9802578687667847,grad_norm: 0.6887307369850432, iteration: 397203
loss: 1.013898491859436,grad_norm: 0.9999990989906746, iteration: 397204
loss: 0.9740946888923645,grad_norm: 0.7777107375299152, iteration: 397205
loss: 0.9738079905509949,grad_norm: 0.9131334933792977, iteration: 397206
loss: 1.003857135772705,grad_norm: 0.7604809398389943, iteration: 397207
loss: 1.0180199146270752,grad_norm: 0.7989796227135508, iteration: 397208
loss: 0.9913469552993774,grad_norm: 0.810577427480414, iteration: 397209
loss: 0.9930269122123718,grad_norm: 0.7316413202494594, iteration: 397210
loss: 0.974402904510498,grad_norm: 0.8818518596003281, iteration: 397211
loss: 1.0079978704452515,grad_norm: 0.7324120802337661, iteration: 397212
loss: 0.9997893571853638,grad_norm: 0.9999993564138566, iteration: 397213
loss: 1.0135471820831299,grad_norm: 0.8902987030742604, iteration: 397214
loss: 1.0035991668701172,grad_norm: 0.9999996136348428, iteration: 397215
loss: 0.9959721565246582,grad_norm: 0.7439117793568988, iteration: 397216
loss: 0.9515308141708374,grad_norm: 0.803780981746688, iteration: 397217
loss: 1.1058412790298462,grad_norm: 0.9999996401305626, iteration: 397218
loss: 1.0217890739440918,grad_norm: 0.7537657175826802, iteration: 397219
loss: 1.009403944015503,grad_norm: 0.9013905496372825, iteration: 397220
loss: 1.0408457517623901,grad_norm: 0.7601915281892663, iteration: 397221
loss: 0.9834789037704468,grad_norm: 0.7991091443905031, iteration: 397222
loss: 1.0074185132980347,grad_norm: 0.6707487156838114, iteration: 397223
loss: 1.0368719100952148,grad_norm: 0.9999990366655394, iteration: 397224
loss: 0.9961923956871033,grad_norm: 0.8644062094994933, iteration: 397225
loss: 1.001807689666748,grad_norm: 0.7898367542097662, iteration: 397226
loss: 1.0176512002944946,grad_norm: 0.8183090063931179, iteration: 397227
loss: 1.0379459857940674,grad_norm: 0.7655919571905531, iteration: 397228
loss: 1.0437301397323608,grad_norm: 0.9999998393942763, iteration: 397229
loss: 1.0028430223464966,grad_norm: 0.8313274822179721, iteration: 397230
loss: 1.0209221839904785,grad_norm: 0.7002564065652059, iteration: 397231
loss: 1.0238478183746338,grad_norm: 0.9999994271915196, iteration: 397232
loss: 1.121546745300293,grad_norm: 0.999999842591688, iteration: 397233
loss: 0.9784000515937805,grad_norm: 0.7702434532704642, iteration: 397234
loss: 1.0262622833251953,grad_norm: 1.0000000545410936, iteration: 397235
loss: 0.9834652543067932,grad_norm: 0.9999995231092723, iteration: 397236
loss: 0.9873433709144592,grad_norm: 0.7721268676725834, iteration: 397237
loss: 0.9971588850021362,grad_norm: 0.8537267759118551, iteration: 397238
loss: 0.9940981864929199,grad_norm: 0.8347081550184429, iteration: 397239
loss: 0.9597951173782349,grad_norm: 0.7280724368185592, iteration: 397240
loss: 1.0254172086715698,grad_norm: 0.8675305608400128, iteration: 397241
loss: 0.9892850518226624,grad_norm: 0.8101342207376725, iteration: 397242
loss: 1.04757559299469,grad_norm: 0.9999998877646344, iteration: 397243
loss: 0.9800852537155151,grad_norm: 0.8025735430219153, iteration: 397244
loss: 0.9985420107841492,grad_norm: 0.6539018755966219, iteration: 397245
loss: 1.0063420534133911,grad_norm: 0.6240523875121065, iteration: 397246
loss: 0.9987165927886963,grad_norm: 0.63698073585477, iteration: 397247
loss: 0.970140278339386,grad_norm: 0.8140411765645406, iteration: 397248
loss: 0.9622502326965332,grad_norm: 0.9436408126745328, iteration: 397249
loss: 1.1454330682754517,grad_norm: 0.9999997328443763, iteration: 397250
loss: 1.005348801612854,grad_norm: 0.9999990617121813, iteration: 397251
loss: 0.9881072044372559,grad_norm: 0.6687164879154974, iteration: 397252
loss: 0.9982082843780518,grad_norm: 0.8482042404531014, iteration: 397253
loss: 1.0006251335144043,grad_norm: 0.7493072027173477, iteration: 397254
loss: 0.9945067763328552,grad_norm: 0.8208626703751049, iteration: 397255
loss: 0.978235125541687,grad_norm: 0.8039758025833224, iteration: 397256
loss: 0.9997299313545227,grad_norm: 0.7161197435350949, iteration: 397257
loss: 0.9835991859436035,grad_norm: 0.6836027558991102, iteration: 397258
loss: 0.9819740056991577,grad_norm: 0.7800101146597379, iteration: 397259
loss: 0.9955304861068726,grad_norm: 0.7012808110884803, iteration: 397260
loss: 1.0057734251022339,grad_norm: 0.7287310274867524, iteration: 397261
loss: 0.9985268712043762,grad_norm: 0.704173976368331, iteration: 397262
loss: 1.0258296728134155,grad_norm: 0.999999119046446, iteration: 397263
loss: 1.107783317565918,grad_norm: 0.999999939585164, iteration: 397264
loss: 0.9748249053955078,grad_norm: 0.844990074963143, iteration: 397265
loss: 1.0166679620742798,grad_norm: 0.7779129787680933, iteration: 397266
loss: 1.012426733970642,grad_norm: 0.7507903425912678, iteration: 397267
loss: 1.0399936437606812,grad_norm: 0.7564433109091889, iteration: 397268
loss: 1.0001500844955444,grad_norm: 0.7712430247464985, iteration: 397269
loss: 1.0271401405334473,grad_norm: 0.9999992264509513, iteration: 397270
loss: 0.9705337285995483,grad_norm: 0.9999996565106382, iteration: 397271
loss: 1.0480315685272217,grad_norm: 0.7409536007016421, iteration: 397272
loss: 1.040964126586914,grad_norm: 0.8454042858975629, iteration: 397273
loss: 1.0480395555496216,grad_norm: 0.8562225615811669, iteration: 397274
loss: 1.0120079517364502,grad_norm: 0.8870154765555298, iteration: 397275
loss: 1.0049630403518677,grad_norm: 0.9070822548320676, iteration: 397276
loss: 0.9993618130683899,grad_norm: 0.8583637746536588, iteration: 397277
loss: 1.0013079643249512,grad_norm: 0.8946960094024801, iteration: 397278
loss: 1.14629065990448,grad_norm: 0.895503571408774, iteration: 397279
loss: 0.970217764377594,grad_norm: 0.7718595610555871, iteration: 397280
loss: 0.998968243598938,grad_norm: 0.9559478982144137, iteration: 397281
loss: 0.9688745141029358,grad_norm: 0.7293688553042588, iteration: 397282
loss: 0.9817660450935364,grad_norm: 0.9999991453114713, iteration: 397283
loss: 1.0174157619476318,grad_norm: 0.9870089586666507, iteration: 397284
loss: 0.96908038854599,grad_norm: 0.8423377098842895, iteration: 397285
loss: 1.0067824125289917,grad_norm: 0.697641942367409, iteration: 397286
loss: 1.0040942430496216,grad_norm: 0.8102562763607061, iteration: 397287
loss: 0.9861149787902832,grad_norm: 0.690162009927624, iteration: 397288
loss: 1.028477668762207,grad_norm: 0.9041389942027644, iteration: 397289
loss: 1.0070921182632446,grad_norm: 0.6746880399331853, iteration: 397290
loss: 1.0317505598068237,grad_norm: 0.9999995193410172, iteration: 397291
loss: 1.0127125978469849,grad_norm: 0.7590908031328595, iteration: 397292
loss: 0.9710571765899658,grad_norm: 0.7844076776223825, iteration: 397293
loss: 1.0137577056884766,grad_norm: 0.9999991478701444, iteration: 397294
loss: 1.0190496444702148,grad_norm: 0.7681187248717619, iteration: 397295
loss: 0.989254891872406,grad_norm: 0.8902794398617345, iteration: 397296
loss: 1.0224722623825073,grad_norm: 0.8440857777525081, iteration: 397297
loss: 0.9880473017692566,grad_norm: 0.7888389155111826, iteration: 397298
loss: 1.0026711225509644,grad_norm: 0.7218031437736399, iteration: 397299
loss: 1.0003697872161865,grad_norm: 0.7013625727896848, iteration: 397300
loss: 1.0265651941299438,grad_norm: 0.9999997576579759, iteration: 397301
loss: 0.9667463898658752,grad_norm: 0.8201125400035367, iteration: 397302
loss: 0.9968017339706421,grad_norm: 0.824082009474024, iteration: 397303
loss: 1.0145407915115356,grad_norm: 0.8027175456337781, iteration: 397304
loss: 0.9956052899360657,grad_norm: 0.9292640177999364, iteration: 397305
loss: 1.008579969406128,grad_norm: 0.7725037790955571, iteration: 397306
loss: 0.9787324070930481,grad_norm: 0.999999033814827, iteration: 397307
loss: 0.9963010549545288,grad_norm: 0.8019166521950299, iteration: 397308
loss: 0.9712349772453308,grad_norm: 0.9079409432360609, iteration: 397309
loss: 1.0053365230560303,grad_norm: 0.705686728471088, iteration: 397310
loss: 1.0496668815612793,grad_norm: 0.9999996603712378, iteration: 397311
loss: 1.0129423141479492,grad_norm: 0.8888770903447863, iteration: 397312
loss: 1.052879810333252,grad_norm: 0.9999992526772397, iteration: 397313
loss: 1.0012937784194946,grad_norm: 0.7699035960733452, iteration: 397314
loss: 1.091478943824768,grad_norm: 0.9999999216201004, iteration: 397315
loss: 0.973643958568573,grad_norm: 0.6374977285812072, iteration: 397316
loss: 0.9672813415527344,grad_norm: 0.997677580059261, iteration: 397317
loss: 0.989132285118103,grad_norm: 0.7456212709793097, iteration: 397318
loss: 1.097983717918396,grad_norm: 0.9400473636952567, iteration: 397319
loss: 1.019460678100586,grad_norm: 0.8353131350842734, iteration: 397320
loss: 0.9885095357894897,grad_norm: 0.8881629714930807, iteration: 397321
loss: 1.0424972772598267,grad_norm: 0.7829065072803909, iteration: 397322
loss: 0.9720985889434814,grad_norm: 0.7250456111131157, iteration: 397323
loss: 0.9749529361724854,grad_norm: 0.6941259862684115, iteration: 397324
loss: 0.9869512915611267,grad_norm: 0.6978792430546198, iteration: 397325
loss: 0.9663389325141907,grad_norm: 0.8892449530068857, iteration: 397326
loss: 0.9717383980751038,grad_norm: 0.7817253828195322, iteration: 397327
loss: 1.0167772769927979,grad_norm: 0.7897438839544318, iteration: 397328
loss: 0.9886000156402588,grad_norm: 0.8049566150790567, iteration: 397329
loss: 1.0219229459762573,grad_norm: 0.9719711106525467, iteration: 397330
loss: 0.9761009216308594,grad_norm: 0.8872919572592932, iteration: 397331
loss: 0.9649364948272705,grad_norm: 0.7325732404115237, iteration: 397332
loss: 1.0207252502441406,grad_norm: 0.6842827052619315, iteration: 397333
loss: 0.9845447540283203,grad_norm: 0.6657536779538762, iteration: 397334
loss: 0.9940531849861145,grad_norm: 0.8664983551466481, iteration: 397335
loss: 1.0216031074523926,grad_norm: 0.6957083063935128, iteration: 397336
loss: 0.9959635734558105,grad_norm: 0.7714557056405528, iteration: 397337
loss: 1.0502322912216187,grad_norm: 0.9999992206866916, iteration: 397338
loss: 1.0252561569213867,grad_norm: 0.7599779153557775, iteration: 397339
loss: 0.9498217701911926,grad_norm: 0.8885236789734285, iteration: 397340
loss: 1.0249334573745728,grad_norm: 0.8116931020142942, iteration: 397341
loss: 0.9953101277351379,grad_norm: 0.7531935144489891, iteration: 397342
loss: 0.990156352519989,grad_norm: 0.7688204413734735, iteration: 397343
loss: 1.0144298076629639,grad_norm: 0.7473241224618457, iteration: 397344
loss: 0.9973276257514954,grad_norm: 0.7422724341485053, iteration: 397345
loss: 1.0606805086135864,grad_norm: 0.7749727402474572, iteration: 397346
loss: 0.98097163438797,grad_norm: 0.9999990380973389, iteration: 397347
loss: 0.9903281331062317,grad_norm: 0.7469286488716425, iteration: 397348
loss: 0.9869617223739624,grad_norm: 0.6333722460420941, iteration: 397349
loss: 1.0118627548217773,grad_norm: 0.7313351868898796, iteration: 397350
loss: 1.0164296627044678,grad_norm: 0.834237335965932, iteration: 397351
loss: 1.0004504919052124,grad_norm: 0.7860080317963046, iteration: 397352
loss: 1.0002490282058716,grad_norm: 0.7149831707012594, iteration: 397353
loss: 0.9876970648765564,grad_norm: 0.7323862326147381, iteration: 397354
loss: 0.9811152219772339,grad_norm: 0.8189401454323293, iteration: 397355
loss: 0.9785012602806091,grad_norm: 0.896342785726102, iteration: 397356
loss: 0.9779378175735474,grad_norm: 0.7010598912383205, iteration: 397357
loss: 0.9362668395042419,grad_norm: 0.8976405190164715, iteration: 397358
loss: 0.9977960586547852,grad_norm: 0.7526628504414018, iteration: 397359
loss: 1.0124610662460327,grad_norm: 0.753011048213985, iteration: 397360
loss: 0.9747814536094666,grad_norm: 0.8150858574636483, iteration: 397361
loss: 0.9931135177612305,grad_norm: 0.6823309461981725, iteration: 397362
loss: 1.0308222770690918,grad_norm: 0.760282782108056, iteration: 397363
loss: 0.9829948544502258,grad_norm: 0.6231195625763005, iteration: 397364
loss: 1.1796554327011108,grad_norm: 0.9999998154634264, iteration: 397365
loss: 1.000671625137329,grad_norm: 0.743751698006553, iteration: 397366
loss: 0.9887329339981079,grad_norm: 0.9999999864220576, iteration: 397367
loss: 0.968226432800293,grad_norm: 0.8092368271021158, iteration: 397368
loss: 1.0392634868621826,grad_norm: 0.9999994390431436, iteration: 397369
loss: 1.012976050376892,grad_norm: 0.7711010511630714, iteration: 397370
loss: 0.9906547665596008,grad_norm: 0.6023812192873519, iteration: 397371
loss: 0.9793936610221863,grad_norm: 0.7665680134066882, iteration: 397372
loss: 1.0158413648605347,grad_norm: 0.9230880298233871, iteration: 397373
loss: 0.9789730310440063,grad_norm: 0.8183299770161967, iteration: 397374
loss: 1.0848404169082642,grad_norm: 0.9999992460026171, iteration: 397375
loss: 1.1992677450180054,grad_norm: 0.9999997996739696, iteration: 397376
loss: 1.0264990329742432,grad_norm: 0.7718346213643666, iteration: 397377
loss: 1.1036131381988525,grad_norm: 0.9999996309866843, iteration: 397378
loss: 0.9918939471244812,grad_norm: 0.9999999490505492, iteration: 397379
loss: 1.0219194889068604,grad_norm: 0.9381374720540335, iteration: 397380
loss: 1.0820130109786987,grad_norm: 0.9999997644597886, iteration: 397381
loss: 1.0277304649353027,grad_norm: 0.7043137083021216, iteration: 397382
loss: 0.9920300841331482,grad_norm: 0.8837544097663778, iteration: 397383
loss: 1.0738052129745483,grad_norm: 0.9240019337955173, iteration: 397384
loss: 0.9878918528556824,grad_norm: 0.9885506259574582, iteration: 397385
loss: 1.0041706562042236,grad_norm: 0.8660868521894496, iteration: 397386
loss: 1.0366538763046265,grad_norm: 0.9182395470726906, iteration: 397387
loss: 1.0116510391235352,grad_norm: 0.8369902325351556, iteration: 397388
loss: 0.9924606680870056,grad_norm: 0.7223393153081814, iteration: 397389
loss: 0.9731737375259399,grad_norm: 0.7718267555521997, iteration: 397390
loss: 1.0368504524230957,grad_norm: 0.8141041610620373, iteration: 397391
loss: 1.022829294204712,grad_norm: 0.8020373559433132, iteration: 397392
loss: 0.9888870716094971,grad_norm: 0.9999996409446384, iteration: 397393
loss: 1.0244684219360352,grad_norm: 0.8135426496866237, iteration: 397394
loss: 0.9548665285110474,grad_norm: 0.7657156484943692, iteration: 397395
loss: 1.0109305381774902,grad_norm: 0.7388518397306759, iteration: 397396
loss: 0.999950647354126,grad_norm: 0.9080596902820955, iteration: 397397
loss: 1.056388020515442,grad_norm: 0.8817521476731586, iteration: 397398
loss: 1.0015642642974854,grad_norm: 0.9945987956169255, iteration: 397399
loss: 1.0246281623840332,grad_norm: 0.8782391349578937, iteration: 397400
loss: 0.9479836821556091,grad_norm: 0.7104779684191603, iteration: 397401
loss: 0.9992392063140869,grad_norm: 0.813930082650473, iteration: 397402
loss: 0.9983144998550415,grad_norm: 0.6820272035783856, iteration: 397403
loss: 1.0406357049942017,grad_norm: 0.9999995062647691, iteration: 397404
loss: 0.9492914080619812,grad_norm: 0.7036783648295795, iteration: 397405
loss: 0.9853761196136475,grad_norm: 0.7791614766122217, iteration: 397406
loss: 0.9879449009895325,grad_norm: 0.7776053440063908, iteration: 397407
loss: 0.9959164261817932,grad_norm: 0.8466857644499722, iteration: 397408
loss: 0.9943976998329163,grad_norm: 0.9999992018613728, iteration: 397409
loss: 1.004841923713684,grad_norm: 0.8557555847785588, iteration: 397410
loss: 1.0114566087722778,grad_norm: 0.73682117745434, iteration: 397411
loss: 0.9903180003166199,grad_norm: 0.9661271588415808, iteration: 397412
loss: 1.0297091007232666,grad_norm: 0.9543628096747472, iteration: 397413
loss: 0.9860277771949768,grad_norm: 0.8091522478096737, iteration: 397414
loss: 1.033464789390564,grad_norm: 0.8360933422967205, iteration: 397415
loss: 0.9899845719337463,grad_norm: 0.8534199602787451, iteration: 397416
loss: 0.9886826872825623,grad_norm: 0.8935258936026309, iteration: 397417
loss: 0.9930912256240845,grad_norm: 0.9999994741619513, iteration: 397418
loss: 0.9983665347099304,grad_norm: 0.9051831191980946, iteration: 397419
loss: 0.9844131469726562,grad_norm: 0.7969654778129577, iteration: 397420
loss: 1.0391652584075928,grad_norm: 0.9999995495035563, iteration: 397421
loss: 0.9925066232681274,grad_norm: 0.7738882757978379, iteration: 397422
loss: 1.0410192012786865,grad_norm: 0.9999992791867993, iteration: 397423
loss: 0.9840023517608643,grad_norm: 0.835736154982332, iteration: 397424
loss: 1.009050726890564,grad_norm: 0.7404367300626781, iteration: 397425
loss: 0.971734344959259,grad_norm: 0.6838088247532292, iteration: 397426
loss: 1.0210827589035034,grad_norm: 0.9999994244880829, iteration: 397427
loss: 1.0602245330810547,grad_norm: 0.9999997913992709, iteration: 397428
loss: 0.9931851625442505,grad_norm: 0.669688218111043, iteration: 397429
loss: 1.0354691743850708,grad_norm: 0.8978679910618332, iteration: 397430
loss: 1.0085855722427368,grad_norm: 0.9895278462481422, iteration: 397431
loss: 0.9767757058143616,grad_norm: 0.9370095358511609, iteration: 397432
loss: 1.0187779664993286,grad_norm: 0.992150497635345, iteration: 397433
loss: 1.0111680030822754,grad_norm: 0.7427453355586685, iteration: 397434
loss: 1.0504491329193115,grad_norm: 0.8361594672311846, iteration: 397435
loss: 0.9952645897865295,grad_norm: 0.6571814349086548, iteration: 397436
loss: 1.0084699392318726,grad_norm: 0.810710801358065, iteration: 397437
loss: 1.0066509246826172,grad_norm: 0.9084593537503567, iteration: 397438
loss: 1.0045186281204224,grad_norm: 0.9225156933582188, iteration: 397439
loss: 0.9811859130859375,grad_norm: 0.9999991499804536, iteration: 397440
loss: 1.0191057920455933,grad_norm: 0.8572540011152497, iteration: 397441
loss: 1.024715542793274,grad_norm: 0.7665694067265828, iteration: 397442
loss: 1.0348962545394897,grad_norm: 0.9249535754977533, iteration: 397443
loss: 1.0063453912734985,grad_norm: 0.7820301473192703, iteration: 397444
loss: 1.0019984245300293,grad_norm: 0.9705392268875903, iteration: 397445
loss: 0.9987632632255554,grad_norm: 0.784173925181671, iteration: 397446
loss: 0.9924446940422058,grad_norm: 0.7130877256953803, iteration: 397447
loss: 0.9385479092597961,grad_norm: 0.834503918479464, iteration: 397448
loss: 0.96518874168396,grad_norm: 0.8726407209969679, iteration: 397449
loss: 1.0091381072998047,grad_norm: 0.8567153241492331, iteration: 397450
loss: 1.0154510736465454,grad_norm: 0.8522093228508955, iteration: 397451
loss: 1.0147194862365723,grad_norm: 0.9403636805097164, iteration: 397452
loss: 1.0859049558639526,grad_norm: 0.9999991608925274, iteration: 397453
loss: 0.9569774270057678,grad_norm: 0.795918607181167, iteration: 397454
loss: 1.0108033418655396,grad_norm: 0.679786387883745, iteration: 397455
loss: 1.0112347602844238,grad_norm: 0.7309963125314097, iteration: 397456
loss: 1.0278874635696411,grad_norm: 0.87983185063101, iteration: 397457
loss: 1.003834843635559,grad_norm: 0.9296643443328408, iteration: 397458
loss: 1.0197479724884033,grad_norm: 0.9999992184972943, iteration: 397459
loss: 0.9898566603660583,grad_norm: 0.7332940840924336, iteration: 397460
loss: 1.0022766590118408,grad_norm: 0.8659092706022878, iteration: 397461
loss: 1.018072485923767,grad_norm: 0.652288925400805, iteration: 397462
loss: 0.9568719863891602,grad_norm: 0.9999991098354235, iteration: 397463
loss: 0.9898895621299744,grad_norm: 0.878455459156593, iteration: 397464
loss: 1.0159056186676025,grad_norm: 0.6662829164154065, iteration: 397465
loss: 1.0230309963226318,grad_norm: 0.993627024427909, iteration: 397466
loss: 0.995120644569397,grad_norm: 0.9999994680785427, iteration: 397467
loss: 0.9622987508773804,grad_norm: 0.8920205223989792, iteration: 397468
loss: 1.032353162765503,grad_norm: 0.7724109328378188, iteration: 397469
loss: 1.0065009593963623,grad_norm: 0.842636172015746, iteration: 397470
loss: 1.0445911884307861,grad_norm: 0.9999999957449722, iteration: 397471
loss: 1.0321781635284424,grad_norm: 0.9999990960882168, iteration: 397472
loss: 1.015857219696045,grad_norm: 0.9259042523393596, iteration: 397473
loss: 0.9467911720275879,grad_norm: 0.9343569718173733, iteration: 397474
loss: 1.0138394832611084,grad_norm: 0.6873405045644653, iteration: 397475
loss: 1.0229675769805908,grad_norm: 0.7663266505491967, iteration: 397476
loss: 0.9719143509864807,grad_norm: 0.7387507149551534, iteration: 397477
loss: 1.0219981670379639,grad_norm: 0.8539828155075654, iteration: 397478
loss: 1.0931681394577026,grad_norm: 0.9999992460998129, iteration: 397479
loss: 1.0340473651885986,grad_norm: 0.987975599611031, iteration: 397480
loss: 1.010235071182251,grad_norm: 0.9538776464769347, iteration: 397481
loss: 1.0293363332748413,grad_norm: 0.7421944200040083, iteration: 397482
loss: 1.0338913202285767,grad_norm: 0.9999996795121558, iteration: 397483
loss: 0.972331166267395,grad_norm: 0.8970998424192653, iteration: 397484
loss: 0.9954706430435181,grad_norm: 0.8186060776198485, iteration: 397485
loss: 1.0279744863510132,grad_norm: 0.9176860279856205, iteration: 397486
loss: 0.99790358543396,grad_norm: 0.6399356805366319, iteration: 397487
loss: 1.0645591020584106,grad_norm: 0.9480396129893421, iteration: 397488
loss: 0.9988138675689697,grad_norm: 0.6562937893085499, iteration: 397489
loss: 1.0485775470733643,grad_norm: 0.9152975608399131, iteration: 397490
loss: 1.1364456415176392,grad_norm: 0.9999998482772923, iteration: 397491
loss: 0.9891003966331482,grad_norm: 0.9999997902478973, iteration: 397492
loss: 1.1660879850387573,grad_norm: 0.9999997127187509, iteration: 397493
loss: 1.0855369567871094,grad_norm: 0.9185773396678928, iteration: 397494
loss: 1.0045098066329956,grad_norm: 0.9999998570871395, iteration: 397495
loss: 1.014480710029602,grad_norm: 0.9999999239259718, iteration: 397496
loss: 1.0535674095153809,grad_norm: 0.8904094747213184, iteration: 397497
loss: 1.0423754453659058,grad_norm: 0.9999995988586766, iteration: 397498
loss: 1.1540473699569702,grad_norm: 1.000000063246789, iteration: 397499
loss: 0.992415189743042,grad_norm: 0.6556726824764871, iteration: 397500
loss: 0.9556207656860352,grad_norm: 0.797614128995642, iteration: 397501
loss: 1.0174643993377686,grad_norm: 0.9999995530783272, iteration: 397502
loss: 0.9867005348205566,grad_norm: 0.9999991326715387, iteration: 397503
loss: 0.9906293749809265,grad_norm: 0.7766648371185593, iteration: 397504
loss: 0.9969200491905212,grad_norm: 0.9999990929604288, iteration: 397505
loss: 1.022168755531311,grad_norm: 0.9999991412978643, iteration: 397506
loss: 0.9867640733718872,grad_norm: 0.7873477774627339, iteration: 397507
loss: 1.0031570196151733,grad_norm: 0.9530198186363544, iteration: 397508
loss: 1.0085937976837158,grad_norm: 0.9082490048907109, iteration: 397509
loss: 0.9805443286895752,grad_norm: 0.7970083880689038, iteration: 397510
loss: 1.0128039121627808,grad_norm: 0.8033026905932381, iteration: 397511
loss: 1.0036091804504395,grad_norm: 0.73372064759254, iteration: 397512
loss: 0.9678059816360474,grad_norm: 0.9999995444381704, iteration: 397513
loss: 1.077540636062622,grad_norm: 0.9999993959881673, iteration: 397514
loss: 1.0150806903839111,grad_norm: 0.7701671562761652, iteration: 397515
loss: 1.0647194385528564,grad_norm: 0.9999996602821836, iteration: 397516
loss: 1.014634132385254,grad_norm: 0.7524379641114004, iteration: 397517
loss: 1.005365014076233,grad_norm: 0.7228850819938525, iteration: 397518
loss: 0.9877558350563049,grad_norm: 0.9999990958155195, iteration: 397519
loss: 1.0096291303634644,grad_norm: 0.9999994698101398, iteration: 397520
loss: 1.0285829305648804,grad_norm: 0.9999998663655438, iteration: 397521
loss: 1.0110379457473755,grad_norm: 0.7718811203274862, iteration: 397522
loss: 1.0129711627960205,grad_norm: 0.9999991482229108, iteration: 397523
loss: 0.9861588478088379,grad_norm: 0.8060632298734246, iteration: 397524
loss: 0.988171398639679,grad_norm: 0.9999989094403423, iteration: 397525
loss: 0.9956424236297607,grad_norm: 0.8287384489218799, iteration: 397526
loss: 1.0344847440719604,grad_norm: 0.7553870046527417, iteration: 397527
loss: 1.1147286891937256,grad_norm: 0.9999998538867632, iteration: 397528
loss: 1.0234293937683105,grad_norm: 0.9999999633643913, iteration: 397529
loss: 0.9494499564170837,grad_norm: 0.9999991557949234, iteration: 397530
loss: 1.0054128170013428,grad_norm: 0.7561345721566011, iteration: 397531
loss: 1.0233243703842163,grad_norm: 0.9999997930926967, iteration: 397532
loss: 1.0144284963607788,grad_norm: 0.9999998607359873, iteration: 397533
loss: 1.018829107284546,grad_norm: 0.8503796088522326, iteration: 397534
loss: 1.0713104009628296,grad_norm: 0.9999991420913052, iteration: 397535
loss: 1.0454719066619873,grad_norm: 0.9999994579365106, iteration: 397536
loss: 0.9920088648796082,grad_norm: 0.9999991121065496, iteration: 397537
loss: 1.1049495935440063,grad_norm: 0.8685878596113684, iteration: 397538
loss: 1.0438841581344604,grad_norm: 0.7521393369719367, iteration: 397539
loss: 1.0034233331680298,grad_norm: 0.8832039508393281, iteration: 397540
loss: 1.0453654527664185,grad_norm: 0.9999996907998588, iteration: 397541
loss: 1.026397705078125,grad_norm: 0.7580661989022736, iteration: 397542
loss: 1.047215223312378,grad_norm: 0.9999990293018283, iteration: 397543
loss: 1.0425699949264526,grad_norm: 0.7265961961395124, iteration: 397544
loss: 0.9900903105735779,grad_norm: 0.9234288004370997, iteration: 397545
loss: 1.0669935941696167,grad_norm: 0.9999995594242266, iteration: 397546
loss: 1.0747852325439453,grad_norm: 0.9999994476390401, iteration: 397547
loss: 1.0376375913619995,grad_norm: 0.7475590643604361, iteration: 397548
loss: 0.9957215785980225,grad_norm: 0.9505553461675963, iteration: 397549
loss: 1.0913050174713135,grad_norm: 0.9999991481981554, iteration: 397550
loss: 1.1558555364608765,grad_norm: 1.0000000431503537, iteration: 397551
loss: 1.122162938117981,grad_norm: 0.9999997107916664, iteration: 397552
loss: 1.019448161125183,grad_norm: 0.8512606061357969, iteration: 397553
loss: 1.110680341720581,grad_norm: 0.9999997872616162, iteration: 397554
loss: 1.0338104963302612,grad_norm: 0.9999990804577139, iteration: 397555
loss: 1.3916418552398682,grad_norm: 0.9999994554207946, iteration: 397556
loss: 1.0249824523925781,grad_norm: 0.9999992888740786, iteration: 397557
loss: 1.0693386793136597,grad_norm: 0.9999992680521709, iteration: 397558
loss: 1.0869444608688354,grad_norm: 0.9999995551070149, iteration: 397559
loss: 1.1592490673065186,grad_norm: 0.9999994116211894, iteration: 397560
loss: 1.134164571762085,grad_norm: 0.9999994450367524, iteration: 397561
loss: 1.1208573579788208,grad_norm: 0.9999997760166258, iteration: 397562
loss: 1.0226165056228638,grad_norm: 0.6861790061074563, iteration: 397563
loss: 1.0314644575119019,grad_norm: 0.999999288536385, iteration: 397564
loss: 1.0537641048431396,grad_norm: 0.9999993877305673, iteration: 397565
loss: 1.0495809316635132,grad_norm: 0.9999992816025715, iteration: 397566
loss: 1.016751766204834,grad_norm: 0.8488329853400133, iteration: 397567
loss: 1.107246994972229,grad_norm: 0.9999992293778848, iteration: 397568
loss: 1.0762600898742676,grad_norm: 0.6558098394062611, iteration: 397569
loss: 1.0307447910308838,grad_norm: 0.9145684971600947, iteration: 397570
loss: 1.1142592430114746,grad_norm: 0.9999998511961385, iteration: 397571
loss: 1.1124935150146484,grad_norm: 0.9999992002375985, iteration: 397572
loss: 1.1172940731048584,grad_norm: 0.999999278314055, iteration: 397573
loss: 1.139204502105713,grad_norm: 0.9999993428828801, iteration: 397574
loss: 1.0301711559295654,grad_norm: 0.830705984153258, iteration: 397575
loss: 1.1140413284301758,grad_norm: 0.9999996510362626, iteration: 397576
loss: 1.0337626934051514,grad_norm: 0.9796664861627773, iteration: 397577
loss: 1.0710811614990234,grad_norm: 1.0000000129480506, iteration: 397578
loss: 1.1430904865264893,grad_norm: 0.9999991545293941, iteration: 397579
loss: 1.0714104175567627,grad_norm: 0.9999999478011355, iteration: 397580
loss: 1.0610346794128418,grad_norm: 0.7140668804961422, iteration: 397581
loss: 1.0066158771514893,grad_norm: 0.763407895129589, iteration: 397582
loss: 1.0364158153533936,grad_norm: 0.8653961293985318, iteration: 397583
loss: 1.0302964448928833,grad_norm: 0.7613070265336936, iteration: 397584
loss: 1.180220127105713,grad_norm: 0.9999999603132812, iteration: 397585
loss: 0.9834603071212769,grad_norm: 0.8257279494533087, iteration: 397586
loss: 0.9693571925163269,grad_norm: 0.7092287699092096, iteration: 397587
loss: 1.0559066534042358,grad_norm: 0.9999990432493725, iteration: 397588
loss: 1.1552573442459106,grad_norm: 0.9999990247609988, iteration: 397589
loss: 1.1206319332122803,grad_norm: 0.9999993555808306, iteration: 397590
loss: 1.0354135036468506,grad_norm: 0.9999989792248838, iteration: 397591
loss: 1.0251796245574951,grad_norm: 0.9206900714906828, iteration: 397592
loss: 0.9582017660140991,grad_norm: 0.9999992899415472, iteration: 397593
loss: 1.0829122066497803,grad_norm: 0.9999993336179706, iteration: 397594
loss: 1.0651236772537231,grad_norm: 0.9999999260680976, iteration: 397595
loss: 1.0612629652023315,grad_norm: 0.9999992907808742, iteration: 397596
loss: 1.143067479133606,grad_norm: 0.9999997306950901, iteration: 397597
loss: 1.0932624340057373,grad_norm: 0.9999998680716357, iteration: 397598
loss: 0.9793906807899475,grad_norm: 0.8367676617151127, iteration: 397599
loss: 1.1635090112686157,grad_norm: 0.9999992936604881, iteration: 397600
loss: 1.129740834236145,grad_norm: 0.9757729964595087, iteration: 397601
loss: 1.0967992544174194,grad_norm: 0.9999994800657797, iteration: 397602
loss: 1.1314665079116821,grad_norm: 0.8164139743648213, iteration: 397603
loss: 1.044001817703247,grad_norm: 0.9999998984867989, iteration: 397604
loss: 1.0237979888916016,grad_norm: 0.9999991849279428, iteration: 397605
loss: 1.0857162475585938,grad_norm: 0.9999999707599347, iteration: 397606
loss: 1.0613559484481812,grad_norm: 0.9999998910625317, iteration: 397607
loss: 1.1183029413223267,grad_norm: 0.9999998361429271, iteration: 397608
loss: 1.0661576986312866,grad_norm: 0.793313070616398, iteration: 397609
loss: 1.1229132413864136,grad_norm: 0.9999996661991342, iteration: 397610
loss: 1.1092555522918701,grad_norm: 0.9999998877540037, iteration: 397611
loss: 1.002029538154602,grad_norm: 0.8302446417346449, iteration: 397612
loss: 1.0207122564315796,grad_norm: 0.9279634685734445, iteration: 397613
loss: 1.045314073562622,grad_norm: 0.9982069445585795, iteration: 397614
loss: 1.0104621648788452,grad_norm: 0.9999997111693762, iteration: 397615
loss: 0.9751765131950378,grad_norm: 0.7755839565789797, iteration: 397616
loss: 1.0568958520889282,grad_norm: 0.9999993874740105, iteration: 397617
loss: 1.0623866319656372,grad_norm: 0.8570706651715041, iteration: 397618
loss: 0.9925652146339417,grad_norm: 0.9456281792049348, iteration: 397619
loss: 1.0441547632217407,grad_norm: 0.6508865878256411, iteration: 397620
loss: 1.0541175603866577,grad_norm: 0.9812644665377694, iteration: 397621
loss: 1.0222896337509155,grad_norm: 0.8253793814047938, iteration: 397622
loss: 1.0259000062942505,grad_norm: 0.9999996030057398, iteration: 397623
loss: 1.0552293062210083,grad_norm: 0.7593708817605872, iteration: 397624
loss: 1.0060590505599976,grad_norm: 0.7684855060876923, iteration: 397625
loss: 1.077433705329895,grad_norm: 0.9999991092287502, iteration: 397626
loss: 1.013331651687622,grad_norm: 0.7999741569252945, iteration: 397627
loss: 1.0595602989196777,grad_norm: 0.9828226283760905, iteration: 397628
loss: 1.0855551958084106,grad_norm: 0.9999998752156256, iteration: 397629
loss: 1.0054042339324951,grad_norm: 0.9999992882985014, iteration: 397630
loss: 1.063549518585205,grad_norm: 0.9999991069402837, iteration: 397631
loss: 1.0507858991622925,grad_norm: 0.999999145810553, iteration: 397632
loss: 1.0643888711929321,grad_norm: 0.9046504840394185, iteration: 397633
loss: 1.0785102844238281,grad_norm: 0.9999997936048951, iteration: 397634
loss: 1.0400712490081787,grad_norm: 0.7997511059379874, iteration: 397635
loss: 1.0549992322921753,grad_norm: 0.8737428310458553, iteration: 397636
loss: 1.0369013547897339,grad_norm: 0.8688185466977535, iteration: 397637
loss: 0.9951821565628052,grad_norm: 0.9999998496528141, iteration: 397638
loss: 1.0268412828445435,grad_norm: 0.9999998931428344, iteration: 397639
loss: 1.0188124179840088,grad_norm: 0.9999998148326599, iteration: 397640
loss: 1.0412280559539795,grad_norm: 0.9999993527872412, iteration: 397641
loss: 0.9998014569282532,grad_norm: 0.9999994510646031, iteration: 397642
loss: 1.0145394802093506,grad_norm: 0.9999998526732216, iteration: 397643
loss: 1.0097568035125732,grad_norm: 0.7977527293530254, iteration: 397644
loss: 1.022650122642517,grad_norm: 0.707658666580286, iteration: 397645
loss: 0.9922635555267334,grad_norm: 0.8968610650128794, iteration: 397646
loss: 0.967262327671051,grad_norm: 0.8826478888069712, iteration: 397647
loss: 1.0311905145645142,grad_norm: 0.7444622233984226, iteration: 397648
loss: 1.0178160667419434,grad_norm: 0.9999992023004377, iteration: 397649
loss: 1.0335922241210938,grad_norm: 0.9677431004157651, iteration: 397650
loss: 1.016197919845581,grad_norm: 0.9999999064488093, iteration: 397651
loss: 1.0234841108322144,grad_norm: 0.9580130663646771, iteration: 397652
loss: 1.0099034309387207,grad_norm: 0.9999990820437437, iteration: 397653
loss: 0.9696903824806213,grad_norm: 0.7829184342012822, iteration: 397654
loss: 1.0184590816497803,grad_norm: 0.786800826968458, iteration: 397655
loss: 0.9778059124946594,grad_norm: 0.764124422867234, iteration: 397656
loss: 1.0236784219741821,grad_norm: 0.9999998432240219, iteration: 397657
loss: 1.0110725164413452,grad_norm: 0.8474729137980168, iteration: 397658
loss: 1.0080240964889526,grad_norm: 0.9356554332599546, iteration: 397659
loss: 1.0688345432281494,grad_norm: 0.9999992902322975, iteration: 397660
loss: 1.0722697973251343,grad_norm: 0.8774619007752141, iteration: 397661
loss: 1.0415383577346802,grad_norm: 0.9999995128362706, iteration: 397662
loss: 1.0054187774658203,grad_norm: 0.6092863608271594, iteration: 397663
loss: 1.025948405265808,grad_norm: 0.8507671770746768, iteration: 397664
loss: 0.9860144257545471,grad_norm: 0.9272101539132188, iteration: 397665
loss: 0.9978431463241577,grad_norm: 0.9233140630418951, iteration: 397666
loss: 1.113162875175476,grad_norm: 0.999999413036055, iteration: 397667
loss: 0.9935356974601746,grad_norm: 0.7043525391257734, iteration: 397668
loss: 0.9778146743774414,grad_norm: 0.8196802956693203, iteration: 397669
loss: 0.9997138381004333,grad_norm: 0.8616393250862779, iteration: 397670
loss: 1.0191670656204224,grad_norm: 0.817954493429377, iteration: 397671
loss: 1.0480766296386719,grad_norm: 0.9999991927741775, iteration: 397672
loss: 0.9833124876022339,grad_norm: 0.9999990960715018, iteration: 397673
loss: 0.9654188752174377,grad_norm: 0.9562873300994327, iteration: 397674
loss: 0.9485663771629333,grad_norm: 0.7340582062004459, iteration: 397675
loss: 0.994674026966095,grad_norm: 0.9288462496336353, iteration: 397676
loss: 1.0102970600128174,grad_norm: 0.9248549259704364, iteration: 397677
loss: 0.9700877070426941,grad_norm: 0.6232677224070007, iteration: 397678
loss: 1.0232585668563843,grad_norm: 0.651507415055022, iteration: 397679
loss: 1.0128096342086792,grad_norm: 0.8106372251810433, iteration: 397680
loss: 1.0539504289627075,grad_norm: 0.999999323688435, iteration: 397681
loss: 0.9997943639755249,grad_norm: 0.9698168748762673, iteration: 397682
loss: 0.9760787487030029,grad_norm: 0.7811415592472313, iteration: 397683
loss: 1.0281243324279785,grad_norm: 0.6651826971075593, iteration: 397684
loss: 1.0509599447250366,grad_norm: 0.9999993564043572, iteration: 397685
loss: 0.9881184697151184,grad_norm: 0.7254363916231009, iteration: 397686
loss: 1.0304181575775146,grad_norm: 0.7964009137525114, iteration: 397687
loss: 0.9786180853843689,grad_norm: 0.9999992108935076, iteration: 397688
loss: 0.9806191325187683,grad_norm: 0.6458001284567926, iteration: 397689
loss: 1.0230153799057007,grad_norm: 0.8629901062889392, iteration: 397690
loss: 1.0093201398849487,grad_norm: 0.8656161604682245, iteration: 397691
loss: 1.0080249309539795,grad_norm: 0.8111935999124518, iteration: 397692
loss: 0.9817715287208557,grad_norm: 0.7011660819900195, iteration: 397693
loss: 1.0303590297698975,grad_norm: 0.9296078386639555, iteration: 397694
loss: 1.0368469953536987,grad_norm: 0.8693956833959058, iteration: 397695
loss: 1.0044851303100586,grad_norm: 0.871116102623882, iteration: 397696
loss: 1.0018502473831177,grad_norm: 0.8768603896292646, iteration: 397697
loss: 1.0404585599899292,grad_norm: 0.9999993441021348, iteration: 397698
loss: 0.9855494499206543,grad_norm: 0.7997191301370734, iteration: 397699
loss: 1.018700361251831,grad_norm: 0.9999990222664112, iteration: 397700
loss: 0.9855496883392334,grad_norm: 0.9284905137197916, iteration: 397701
loss: 0.9701387882232666,grad_norm: 0.8949066031807963, iteration: 397702
loss: 0.9686696529388428,grad_norm: 0.9284875804218156, iteration: 397703
loss: 0.9862663745880127,grad_norm: 0.7942867419633833, iteration: 397704
loss: 1.04257071018219,grad_norm: 0.850821881231132, iteration: 397705
loss: 1.023359775543213,grad_norm: 0.7624549600172676, iteration: 397706
loss: 0.9818689227104187,grad_norm: 0.7912038950115333, iteration: 397707
loss: 0.9965141415596008,grad_norm: 0.7008761035768144, iteration: 397708
loss: 1.0171362161636353,grad_norm: 0.8540670776934802, iteration: 397709
loss: 1.0128850936889648,grad_norm: 0.881871816822775, iteration: 397710
loss: 1.0389209985733032,grad_norm: 0.9999991954520409, iteration: 397711
loss: 0.981462836265564,grad_norm: 0.7402557843060497, iteration: 397712
loss: 1.0408761501312256,grad_norm: 0.9999991777492271, iteration: 397713
loss: 1.014656901359558,grad_norm: 0.8673099928626209, iteration: 397714
loss: 1.0100616216659546,grad_norm: 0.7606688148530976, iteration: 397715
loss: 1.032305359840393,grad_norm: 0.6669605656161265, iteration: 397716
loss: 1.045258641242981,grad_norm: 0.9999998988733785, iteration: 397717
loss: 1.023288369178772,grad_norm: 1.0000001013794841, iteration: 397718
loss: 1.022829294204712,grad_norm: 0.780888768327192, iteration: 397719
loss: 0.9600590467453003,grad_norm: 0.8110420964094144, iteration: 397720
loss: 0.9895840883255005,grad_norm: 0.9999991925093609, iteration: 397721
loss: 1.0343854427337646,grad_norm: 0.9999999118792181, iteration: 397722
loss: 1.0019452571868896,grad_norm: 0.770375218185685, iteration: 397723
loss: 0.972339391708374,grad_norm: 0.9999995535559039, iteration: 397724
loss: 1.0709855556488037,grad_norm: 0.8637718252032808, iteration: 397725
loss: 1.0019323825836182,grad_norm: 0.7790965926012358, iteration: 397726
loss: 0.9924166798591614,grad_norm: 0.7634165410784897, iteration: 397727
loss: 1.017212986946106,grad_norm: 0.8486170532887977, iteration: 397728
loss: 1.0327757596969604,grad_norm: 0.8656748386848531, iteration: 397729
loss: 1.0421946048736572,grad_norm: 0.8385708952419553, iteration: 397730
loss: 0.9773918390274048,grad_norm: 0.9172396861963169, iteration: 397731
loss: 0.9908087849617004,grad_norm: 0.7991157886202652, iteration: 397732
loss: 1.1307690143585205,grad_norm: 0.9999992704175154, iteration: 397733
loss: 1.0030404329299927,grad_norm: 0.8038775343958117, iteration: 397734
loss: 0.978644073009491,grad_norm: 0.8003902163187985, iteration: 397735
loss: 1.0149507522583008,grad_norm: 0.7529190379590456, iteration: 397736
loss: 0.980303168296814,grad_norm: 0.7670973294716487, iteration: 397737
loss: 1.0762689113616943,grad_norm: 0.8772580563037545, iteration: 397738
loss: 1.006568431854248,grad_norm: 0.7181080706107784, iteration: 397739
loss: 1.0347236394882202,grad_norm: 0.7291321281592312, iteration: 397740
loss: 1.0080732107162476,grad_norm: 0.7635719824410753, iteration: 397741
loss: 0.9994581341743469,grad_norm: 0.7837601440393286, iteration: 397742
loss: 0.9748562574386597,grad_norm: 0.8093122716375086, iteration: 397743
loss: 0.9939928650856018,grad_norm: 0.7026811250282611, iteration: 397744
loss: 1.0164610147476196,grad_norm: 0.7796744111226963, iteration: 397745
loss: 0.9754177927970886,grad_norm: 0.7825179859377469, iteration: 397746
loss: 1.001065731048584,grad_norm: 0.7185977944137579, iteration: 397747
loss: 1.0254267454147339,grad_norm: 0.8028802340230682, iteration: 397748
loss: 1.0012439489364624,grad_norm: 0.6008101252246253, iteration: 397749
loss: 0.9986782073974609,grad_norm: 0.7394959032586444, iteration: 397750
loss: 0.9966472387313843,grad_norm: 0.8954814466427048, iteration: 397751
loss: 1.0334304571151733,grad_norm: 0.8588869660893134, iteration: 397752
loss: 0.9489341378211975,grad_norm: 0.9999990604172956, iteration: 397753
loss: 1.147782564163208,grad_norm: 0.999999117323337, iteration: 397754
loss: 0.9942917227745056,grad_norm: 0.9001554836911039, iteration: 397755
loss: 1.0234581232070923,grad_norm: 0.8575941237249823, iteration: 397756
loss: 1.0192159414291382,grad_norm: 0.7792610081749466, iteration: 397757
loss: 1.0269650220870972,grad_norm: 0.9999994221586347, iteration: 397758
loss: 1.030836582183838,grad_norm: 0.9367422076011898, iteration: 397759
loss: 1.2034047842025757,grad_norm: 0.999999877526341, iteration: 397760
loss: 0.9933492541313171,grad_norm: 0.8207772425962786, iteration: 397761
loss: 0.9785589575767517,grad_norm: 0.7566866830711446, iteration: 397762
loss: 0.9847017526626587,grad_norm: 0.7675960039484608, iteration: 397763
loss: 1.0303289890289307,grad_norm: 0.7837860408489316, iteration: 397764
loss: 1.0016998052597046,grad_norm: 0.892304009609117, iteration: 397765
loss: 1.0092298984527588,grad_norm: 0.8153457190473254, iteration: 397766
loss: 0.9990127086639404,grad_norm: 0.7214779317953569, iteration: 397767
loss: 1.0087827444076538,grad_norm: 0.9244791202934111, iteration: 397768
loss: 0.9646996855735779,grad_norm: 0.658599266122393, iteration: 397769
loss: 1.0007058382034302,grad_norm: 0.950299782161754, iteration: 397770
loss: 1.025059461593628,grad_norm: 0.9422310799013554, iteration: 397771
loss: 0.9754759073257446,grad_norm: 0.8491972224491848, iteration: 397772
loss: 0.9758029580116272,grad_norm: 0.7676196750312846, iteration: 397773
loss: 0.9890974760055542,grad_norm: 0.9204851350583381, iteration: 397774
loss: 1.0046831369400024,grad_norm: 0.86844085288348, iteration: 397775
loss: 1.0005477666854858,grad_norm: 0.999999435102188, iteration: 397776
loss: 0.9536862373352051,grad_norm: 0.9768818787854381, iteration: 397777
loss: 0.9853576421737671,grad_norm: 0.7646017289737468, iteration: 397778
loss: 1.0110721588134766,grad_norm: 0.9999994803869351, iteration: 397779
loss: 1.0508537292480469,grad_norm: 0.999999840705993, iteration: 397780
loss: 1.0405323505401611,grad_norm: 0.9999990518092557, iteration: 397781
loss: 1.0304187536239624,grad_norm: 0.9999997277659987, iteration: 397782
loss: 1.0140610933303833,grad_norm: 0.8366437506530928, iteration: 397783
loss: 1.0390865802764893,grad_norm: 0.6943207266904716, iteration: 397784
loss: 1.1938490867614746,grad_norm: 0.9999997266536352, iteration: 397785
loss: 0.9593706727027893,grad_norm: 0.8883850215845313, iteration: 397786
loss: 1.03375244140625,grad_norm: 0.8664918025224987, iteration: 397787
loss: 0.962226927280426,grad_norm: 0.8305302095491346, iteration: 397788
loss: 0.9701391458511353,grad_norm: 0.7256663569032342, iteration: 397789
loss: 1.0133676528930664,grad_norm: 0.9999989550246815, iteration: 397790
loss: 0.9946027398109436,grad_norm: 0.802317837060472, iteration: 397791
loss: 1.0600818395614624,grad_norm: 0.9999999837119352, iteration: 397792
loss: 0.9610611796379089,grad_norm: 0.7521934566851102, iteration: 397793
loss: 1.0106592178344727,grad_norm: 0.9999990378346657, iteration: 397794
loss: 1.0069968700408936,grad_norm: 0.890719149728004, iteration: 397795
loss: 1.0240285396575928,grad_norm: 0.7599014596925351, iteration: 397796
loss: 0.9639887809753418,grad_norm: 0.8736664363382283, iteration: 397797
loss: 1.0067038536071777,grad_norm: 0.81388565328792, iteration: 397798
loss: 1.009125828742981,grad_norm: 0.9999991404658659, iteration: 397799
loss: 1.0088233947753906,grad_norm: 0.9397473713482254, iteration: 397800
loss: 0.9919799566268921,grad_norm: 0.7433406189096358, iteration: 397801
loss: 1.019909143447876,grad_norm: 0.9999993720927607, iteration: 397802
loss: 1.0166088342666626,grad_norm: 0.9024592460099952, iteration: 397803
loss: 1.0151883363723755,grad_norm: 0.787838985522956, iteration: 397804
loss: 1.7126929759979248,grad_norm: 1.000000065554615, iteration: 397805
loss: 0.9922776222229004,grad_norm: 0.623039930137862, iteration: 397806
loss: 1.0081672668457031,grad_norm: 0.8481691381127087, iteration: 397807
loss: 1.0041334629058838,grad_norm: 0.7143009642348251, iteration: 397808
loss: 1.0658841133117676,grad_norm: 0.9999997487756717, iteration: 397809
loss: 1.120606541633606,grad_norm: 0.9999999192635268, iteration: 397810
loss: 1.0250678062438965,grad_norm: 0.9999992518671501, iteration: 397811
loss: 1.0050029754638672,grad_norm: 0.8222011779110233, iteration: 397812
loss: 0.9811009168624878,grad_norm: 0.9999991146485369, iteration: 397813
loss: 0.9824278950691223,grad_norm: 0.7300227245457936, iteration: 397814
loss: 1.0028371810913086,grad_norm: 0.8153376391020687, iteration: 397815
loss: 1.0206143856048584,grad_norm: 0.9999999275297013, iteration: 397816
loss: 1.0046108961105347,grad_norm: 0.7935038240039426, iteration: 397817
loss: 1.0004347562789917,grad_norm: 0.7503539922902247, iteration: 397818
loss: 0.9753798842430115,grad_norm: 0.6824306332200879, iteration: 397819
loss: 1.0652648210525513,grad_norm: 0.9999997217977153, iteration: 397820
loss: 1.0383938550949097,grad_norm: 0.7435525275736884, iteration: 397821
loss: 1.0209330320358276,grad_norm: 0.7993564913465533, iteration: 397822
loss: 0.970805287361145,grad_norm: 0.8946586341366733, iteration: 397823
loss: 1.0409135818481445,grad_norm: 0.779727173276327, iteration: 397824
loss: 1.0144579410552979,grad_norm: 0.9999991041500647, iteration: 397825
loss: 1.0664448738098145,grad_norm: 0.9999995484359178, iteration: 397826
loss: 0.9643129110336304,grad_norm: 0.9609901468191172, iteration: 397827
loss: 1.1042602062225342,grad_norm: 0.9999998584057617, iteration: 397828
loss: 1.0229766368865967,grad_norm: 0.8634918652779212, iteration: 397829
loss: 0.9879051446914673,grad_norm: 0.8797084711674198, iteration: 397830
loss: 0.9772072434425354,grad_norm: 0.7207474023745846, iteration: 397831
loss: 1.0131652355194092,grad_norm: 0.7860158150468237, iteration: 397832
loss: 1.0346540212631226,grad_norm: 0.9999993122941221, iteration: 397833
loss: 0.9632388949394226,grad_norm: 0.7612672057718428, iteration: 397834
loss: 1.091955304145813,grad_norm: 0.999999844809782, iteration: 397835
loss: 1.0441707372665405,grad_norm: 0.9999999384583214, iteration: 397836
loss: 1.179792046546936,grad_norm: 0.9999996977842327, iteration: 397837
loss: 1.0088818073272705,grad_norm: 0.8145063669003048, iteration: 397838
loss: 1.0260218381881714,grad_norm: 0.7270149944753609, iteration: 397839
loss: 1.0601633787155151,grad_norm: 0.9999991406647228, iteration: 397840
loss: 0.9696102738380432,grad_norm: 0.8872401540932745, iteration: 397841
loss: 1.0048655271530151,grad_norm: 0.7013603009338689, iteration: 397842
loss: 1.0377285480499268,grad_norm: 0.8303915195824848, iteration: 397843
loss: 0.9780057668685913,grad_norm: 0.7525180025811753, iteration: 397844
loss: 1.0311176776885986,grad_norm: 0.9999998795527744, iteration: 397845
loss: 1.060266137123108,grad_norm: 0.9189063635300309, iteration: 397846
loss: 1.0052366256713867,grad_norm: 0.7616679374865727, iteration: 397847
loss: 0.970350980758667,grad_norm: 0.9015880417564417, iteration: 397848
loss: 1.0180383920669556,grad_norm: 0.798809935089463, iteration: 397849
loss: 0.9860330820083618,grad_norm: 0.5862479819243004, iteration: 397850
loss: 0.951534628868103,grad_norm: 0.999999844690588, iteration: 397851
loss: 0.9745093584060669,grad_norm: 0.9193826459033411, iteration: 397852
loss: 0.9740525484085083,grad_norm: 0.7304135505775405, iteration: 397853
loss: 0.9899436831474304,grad_norm: 0.7502838683038793, iteration: 397854
loss: 1.007739543914795,grad_norm: 0.8346898159862063, iteration: 397855
loss: 0.9744415283203125,grad_norm: 0.8787916108077711, iteration: 397856
loss: 1.0278725624084473,grad_norm: 0.6901796140721984, iteration: 397857
loss: 0.989511251449585,grad_norm: 0.8356754299685775, iteration: 397858
loss: 1.0093783140182495,grad_norm: 0.7541551279019033, iteration: 397859
loss: 1.0491514205932617,grad_norm: 0.8345955997520973, iteration: 397860
loss: 1.0618821382522583,grad_norm: 0.8641587787443734, iteration: 397861
loss: 0.9901504516601562,grad_norm: 0.7240311127292882, iteration: 397862
loss: 0.9736801385879517,grad_norm: 0.7379701625691522, iteration: 397863
loss: 1.0979392528533936,grad_norm: 0.9999991677132807, iteration: 397864
loss: 1.0069853067398071,grad_norm: 0.8732540549575585, iteration: 397865
loss: 0.9971874356269836,grad_norm: 0.8221222634154064, iteration: 397866
loss: 1.008439302444458,grad_norm: 0.999999224554786, iteration: 397867
loss: 0.9780169129371643,grad_norm: 0.8756395189731077, iteration: 397868
loss: 1.0360499620437622,grad_norm: 0.999999426232149, iteration: 397869
loss: 1.0209845304489136,grad_norm: 0.7814730175750307, iteration: 397870
loss: 0.9976464509963989,grad_norm: 0.8175247392142065, iteration: 397871
loss: 0.9933565258979797,grad_norm: 0.7845332644774041, iteration: 397872
loss: 0.9834027290344238,grad_norm: 0.7394635648765356, iteration: 397873
loss: 0.9865918159484863,grad_norm: 0.9999994726402757, iteration: 397874
loss: 1.0909490585327148,grad_norm: 0.7903452237095541, iteration: 397875
loss: 1.0217459201812744,grad_norm: 0.6918901083304274, iteration: 397876
loss: 1.0039509534835815,grad_norm: 0.8585814052623896, iteration: 397877
loss: 0.9653079509735107,grad_norm: 0.7973540845597474, iteration: 397878
loss: 0.9881850481033325,grad_norm: 0.905764006058675, iteration: 397879
loss: 0.981342613697052,grad_norm: 0.7727306965128129, iteration: 397880
loss: 1.025160551071167,grad_norm: 0.7984256192942549, iteration: 397881
loss: 1.025642991065979,grad_norm: 0.9190886631998606, iteration: 397882
loss: 1.008009672164917,grad_norm: 0.72889704697352, iteration: 397883
loss: 0.9986041188240051,grad_norm: 0.7443897566409753, iteration: 397884
loss: 1.0254923105239868,grad_norm: 0.8425685301339935, iteration: 397885
loss: 1.0376765727996826,grad_norm: 0.9999993563398246, iteration: 397886
loss: 1.0284031629562378,grad_norm: 0.7572377052600504, iteration: 397887
loss: 0.9834151268005371,grad_norm: 0.999999810785547, iteration: 397888
loss: 1.004340410232544,grad_norm: 0.999999504109282, iteration: 397889
loss: 1.0093523263931274,grad_norm: 0.9999995544501986, iteration: 397890
loss: 0.9811884760856628,grad_norm: 0.6784406465628399, iteration: 397891
loss: 0.9999247789382935,grad_norm: 0.8426040459581345, iteration: 397892
loss: 1.0483866930007935,grad_norm: 0.9999995218290569, iteration: 397893
loss: 0.9959648251533508,grad_norm: 0.999999089007192, iteration: 397894
loss: 0.9898379445075989,grad_norm: 0.7136515026040094, iteration: 397895
loss: 0.9913031458854675,grad_norm: 0.7345296926670233, iteration: 397896
loss: 0.9903544783592224,grad_norm: 0.7548862970522672, iteration: 397897
loss: 0.981853723526001,grad_norm: 0.9319992033889982, iteration: 397898
loss: 0.9903786778450012,grad_norm: 0.7966582693014342, iteration: 397899
loss: 1.0067529678344727,grad_norm: 0.8352797673974859, iteration: 397900
loss: 0.9849571585655212,grad_norm: 0.7431279191562115, iteration: 397901
loss: 0.9490742683410645,grad_norm: 0.8360337177291072, iteration: 397902
loss: 1.0159441232681274,grad_norm: 0.9999993999769529, iteration: 397903
loss: 1.0245569944381714,grad_norm: 0.9073114314608739, iteration: 397904
loss: 1.0023384094238281,grad_norm: 0.709774015510835, iteration: 397905
loss: 1.0102537870407104,grad_norm: 0.9290232593115969, iteration: 397906
loss: 1.0109437704086304,grad_norm: 0.817403495656144, iteration: 397907
loss: 0.9809340834617615,grad_norm: 0.8587424917927616, iteration: 397908
loss: 1.013695240020752,grad_norm: 0.8835315667081052, iteration: 397909
loss: 0.9800844192504883,grad_norm: 0.827153579141233, iteration: 397910
loss: 1.0330263376235962,grad_norm: 0.9723577250507524, iteration: 397911
loss: 1.0654047727584839,grad_norm: 0.794099014706699, iteration: 397912
loss: 1.0332919359207153,grad_norm: 0.7655381289003341, iteration: 397913
loss: 1.0066794157028198,grad_norm: 0.8067914362079398, iteration: 397914
loss: 1.012673020362854,grad_norm: 0.7342632221082528, iteration: 397915
loss: 1.0382858514785767,grad_norm: 0.6894380260406205, iteration: 397916
loss: 0.9835055470466614,grad_norm: 0.7382871065339511, iteration: 397917
loss: 0.9924728870391846,grad_norm: 0.7838179039151855, iteration: 397918
loss: 0.9913758039474487,grad_norm: 0.9999994782755173, iteration: 397919
loss: 0.9784348607063293,grad_norm: 0.9999998382208739, iteration: 397920
loss: 0.9941055178642273,grad_norm: 0.7808921674488787, iteration: 397921
loss: 0.9948192834854126,grad_norm: 0.7191644011643988, iteration: 397922
loss: 0.9782431125640869,grad_norm: 0.8741966993034956, iteration: 397923
loss: 1.018568754196167,grad_norm: 0.671950852019425, iteration: 397924
loss: 1.0185898542404175,grad_norm: 0.9999989776938731, iteration: 397925
loss: 1.0021779537200928,grad_norm: 0.7768030141826612, iteration: 397926
loss: 1.0676696300506592,grad_norm: 0.9999989643891652, iteration: 397927
loss: 1.011075735092163,grad_norm: 0.7133603059778305, iteration: 397928
loss: 1.00101900100708,grad_norm: 0.9999992240776047, iteration: 397929
loss: 1.0038094520568848,grad_norm: 0.8446507801053965, iteration: 397930
loss: 0.9847761988639832,grad_norm: 0.7021502303135142, iteration: 397931
loss: 1.0023472309112549,grad_norm: 0.7941615836700473, iteration: 397932
loss: 0.9900439977645874,grad_norm: 0.6696014164877988, iteration: 397933
loss: 1.0135091543197632,grad_norm: 0.655982286444888, iteration: 397934
loss: 1.0101319551467896,grad_norm: 0.6930989748107628, iteration: 397935
loss: 0.983191728591919,grad_norm: 0.6962857826399897, iteration: 397936
loss: 0.9978266358375549,grad_norm: 0.8371202661410058, iteration: 397937
loss: 1.002776026725769,grad_norm: 0.8827044998251452, iteration: 397938
loss: 0.9961481094360352,grad_norm: 0.7461536543081767, iteration: 397939
loss: 0.9983159303665161,grad_norm: 0.7184689224208637, iteration: 397940
loss: 1.020016074180603,grad_norm: 0.7446565876890655, iteration: 397941
loss: 0.9669519066810608,grad_norm: 0.7374439599845026, iteration: 397942
loss: 1.033944010734558,grad_norm: 0.6532026091213934, iteration: 397943
loss: 1.0200444459915161,grad_norm: 0.7442287871849265, iteration: 397944
loss: 1.005124568939209,grad_norm: 0.7864800721544586, iteration: 397945
loss: 0.9518399834632874,grad_norm: 0.8047649064927813, iteration: 397946
loss: 0.9929350018501282,grad_norm: 0.7760519736956758, iteration: 397947
loss: 1.0054545402526855,grad_norm: 0.8329124578712236, iteration: 397948
loss: 0.994297981262207,grad_norm: 0.8894884361343939, iteration: 397949
loss: 0.9753463864326477,grad_norm: 0.9688380257250794, iteration: 397950
loss: 0.959183394908905,grad_norm: 0.9999989803824407, iteration: 397951
loss: 0.9780189990997314,grad_norm: 0.7280831166230254, iteration: 397952
loss: 1.014465093612671,grad_norm: 0.773872215132312, iteration: 397953
loss: 0.9932953119277954,grad_norm: 0.8281363203957395, iteration: 397954
loss: 0.972388505935669,grad_norm: 0.7008284873979106, iteration: 397955
loss: 1.0597443580627441,grad_norm: 0.999998998459132, iteration: 397956
loss: 0.9889035820960999,grad_norm: 0.8833842081786063, iteration: 397957
loss: 1.0148460865020752,grad_norm: 0.7599205817442739, iteration: 397958
loss: 1.0949153900146484,grad_norm: 0.9999991377894991, iteration: 397959
loss: 0.9986206889152527,grad_norm: 0.9999991512357712, iteration: 397960
loss: 0.9843106865882874,grad_norm: 0.814152166086731, iteration: 397961
loss: 0.9919446110725403,grad_norm: 0.8193608166504013, iteration: 397962
loss: 0.9791419506072998,grad_norm: 0.8407154869423605, iteration: 397963
loss: 1.0515122413635254,grad_norm: 0.8699367521140838, iteration: 397964
loss: 0.996292769908905,grad_norm: 0.735828572377143, iteration: 397965
loss: 1.0275213718414307,grad_norm: 0.6690467566232232, iteration: 397966
loss: 1.005216360092163,grad_norm: 0.7485411674966888, iteration: 397967
loss: 0.9820415377616882,grad_norm: 0.8661112981757209, iteration: 397968
loss: 1.0136908292770386,grad_norm: 0.7536148718340352, iteration: 397969
loss: 0.986732006072998,grad_norm: 0.804857061598085, iteration: 397970
loss: 1.04750657081604,grad_norm: 0.7859676042640062, iteration: 397971
loss: 0.9873501658439636,grad_norm: 0.884219123545775, iteration: 397972
loss: 1.022200345993042,grad_norm: 0.720637750443959, iteration: 397973
loss: 0.9642864465713501,grad_norm: 0.7222685554737276, iteration: 397974
loss: 1.009443998336792,grad_norm: 0.8132150893641662, iteration: 397975
loss: 0.9836068153381348,grad_norm: 0.875482023556087, iteration: 397976
loss: 0.978621244430542,grad_norm: 0.8483330643733283, iteration: 397977
loss: 1.0046539306640625,grad_norm: 0.7240701932886051, iteration: 397978
loss: 0.957827627658844,grad_norm: 0.9711501771539025, iteration: 397979
loss: 1.0215083360671997,grad_norm: 0.7394695394990198, iteration: 397980
loss: 0.9861906170845032,grad_norm: 0.9093363307393004, iteration: 397981
loss: 0.9842004776000977,grad_norm: 0.8116749098509611, iteration: 397982
loss: 1.0198827981948853,grad_norm: 0.8052678254071277, iteration: 397983
loss: 1.0028048753738403,grad_norm: 0.6683368856443368, iteration: 397984
loss: 1.0057824850082397,grad_norm: 0.7373199766739864, iteration: 397985
loss: 1.0213725566864014,grad_norm: 0.901128013010958, iteration: 397986
loss: 1.0688453912734985,grad_norm: 0.9999998414713662, iteration: 397987
loss: 0.9929547905921936,grad_norm: 0.9004476106889588, iteration: 397988
loss: 0.9943257570266724,grad_norm: 0.8368816255731265, iteration: 397989
loss: 0.9989504218101501,grad_norm: 0.9098609132133187, iteration: 397990
loss: 0.9892812967300415,grad_norm: 0.8362150858338124, iteration: 397991
loss: 1.0127134323120117,grad_norm: 0.823423650116752, iteration: 397992
loss: 0.9976853132247925,grad_norm: 0.733027239326756, iteration: 397993
loss: 1.0195894241333008,grad_norm: 0.9999991102701885, iteration: 397994
loss: 1.032906413078308,grad_norm: 0.8229734660034617, iteration: 397995
loss: 1.044560432434082,grad_norm: 0.9999996671165448, iteration: 397996
loss: 0.9861904978752136,grad_norm: 0.9754693646062972, iteration: 397997
loss: 0.9973834156990051,grad_norm: 0.7086894399533645, iteration: 397998
loss: 0.9989795088768005,grad_norm: 0.6570294264101131, iteration: 397999
loss: 0.9553840160369873,grad_norm: 0.8169769612538719, iteration: 398000
loss: 0.995942234992981,grad_norm: 0.7788456343588488, iteration: 398001
loss: 1.0091279745101929,grad_norm: 0.8985628893428423, iteration: 398002
loss: 0.9597991108894348,grad_norm: 0.8410724260760287, iteration: 398003
loss: 0.9675092101097107,grad_norm: 0.8332058445712947, iteration: 398004
loss: 0.9587063789367676,grad_norm: 0.8321299588694422, iteration: 398005
loss: 1.0053844451904297,grad_norm: 0.9999995876353833, iteration: 398006
loss: 1.0643396377563477,grad_norm: 0.9999992830197355, iteration: 398007
loss: 0.9921051859855652,grad_norm: 0.8154395639506773, iteration: 398008
loss: 1.0564929246902466,grad_norm: 0.9999999142079071, iteration: 398009
loss: 1.0116240978240967,grad_norm: 0.7507616941647123, iteration: 398010
loss: 1.0119065046310425,grad_norm: 0.9631878439184777, iteration: 398011
loss: 0.9755120277404785,grad_norm: 0.665445037311635, iteration: 398012
loss: 1.0194206237792969,grad_norm: 0.9999989064117752, iteration: 398013
loss: 1.032232403755188,grad_norm: 0.720439765570945, iteration: 398014
loss: 0.9986270666122437,grad_norm: 0.7932367189002404, iteration: 398015
loss: 0.9947420358657837,grad_norm: 0.8072708097885004, iteration: 398016
loss: 1.005754828453064,grad_norm: 0.8461088093498977, iteration: 398017
loss: 0.9787174463272095,grad_norm: 0.8787016838871284, iteration: 398018
loss: 1.0131874084472656,grad_norm: 0.7379262811645199, iteration: 398019
loss: 0.9928463101387024,grad_norm: 0.7775302674177275, iteration: 398020
loss: 0.9934372901916504,grad_norm: 0.7238703616099615, iteration: 398021
loss: 0.9808494448661804,grad_norm: 0.7698163731592738, iteration: 398022
loss: 0.9942691922187805,grad_norm: 0.7463740675536319, iteration: 398023
loss: 0.9860491156578064,grad_norm: 0.8056344693520933, iteration: 398024
loss: 1.0884400606155396,grad_norm: 0.8470672805617057, iteration: 398025
loss: 0.9890800714492798,grad_norm: 0.8318842160045243, iteration: 398026
loss: 0.9694817662239075,grad_norm: 0.7647829562286148, iteration: 398027
loss: 1.0014034509658813,grad_norm: 0.9600412952170313, iteration: 398028
loss: 0.9539505243301392,grad_norm: 0.828065243154809, iteration: 398029
loss: 1.1269793510437012,grad_norm: 0.8065308305728711, iteration: 398030
loss: 0.9749788045883179,grad_norm: 0.9235687838573076, iteration: 398031
loss: 0.9813774824142456,grad_norm: 0.8579670909761032, iteration: 398032
loss: 0.9881923198699951,grad_norm: 0.7628308631820537, iteration: 398033
loss: 0.9543560743331909,grad_norm: 0.7374821735819793, iteration: 398034
loss: 1.0343363285064697,grad_norm: 0.7849063219867833, iteration: 398035
loss: 0.9961962103843689,grad_norm: 0.8646849366327666, iteration: 398036
loss: 1.0443196296691895,grad_norm: 0.7889720064250989, iteration: 398037
loss: 0.9909521341323853,grad_norm: 0.7951322917040287, iteration: 398038
loss: 0.9833054542541504,grad_norm: 0.8513624246617439, iteration: 398039
loss: 0.9611561894416809,grad_norm: 0.7617504526615991, iteration: 398040
loss: 1.1237505674362183,grad_norm: 0.9999991702717236, iteration: 398041
loss: 1.002846121788025,grad_norm: 0.771355100104348, iteration: 398042
loss: 0.9620312452316284,grad_norm: 0.7152898858135622, iteration: 398043
loss: 0.9955217242240906,grad_norm: 0.662425842260925, iteration: 398044
loss: 1.0420881509780884,grad_norm: 0.7378702425160895, iteration: 398045
loss: 1.023307204246521,grad_norm: 0.7836963069120935, iteration: 398046
loss: 1.0726596117019653,grad_norm: 0.9999994466041822, iteration: 398047
loss: 0.968477189540863,grad_norm: 0.7558597779268049, iteration: 398048
loss: 0.9806668162345886,grad_norm: 0.8283744723262623, iteration: 398049
loss: 0.9974744319915771,grad_norm: 0.8649478713000326, iteration: 398050
loss: 1.0269540548324585,grad_norm: 0.7451984783624954, iteration: 398051
loss: 1.0460376739501953,grad_norm: 0.7426117669173528, iteration: 398052
loss: 1.003531575202942,grad_norm: 0.8391741288696251, iteration: 398053
loss: 1.0114991664886475,grad_norm: 0.789441908138347, iteration: 398054
loss: 0.9669655561447144,grad_norm: 0.8386152062332671, iteration: 398055
loss: 1.0764316320419312,grad_norm: 0.9999992421718501, iteration: 398056
loss: 0.9771947264671326,grad_norm: 0.7777831750166153, iteration: 398057
loss: 1.0065065622329712,grad_norm: 0.8162897218061028, iteration: 398058
loss: 1.0129231214523315,grad_norm: 0.7804645012022179, iteration: 398059
loss: 0.9627175331115723,grad_norm: 0.6618410970595308, iteration: 398060
loss: 1.0426523685455322,grad_norm: 0.9999990887611528, iteration: 398061
loss: 1.017056941986084,grad_norm: 0.8515063867032879, iteration: 398062
loss: 1.077176809310913,grad_norm: 0.9999999990467473, iteration: 398063
loss: 1.009072184562683,grad_norm: 0.8086437142045511, iteration: 398064
loss: 1.0226279497146606,grad_norm: 0.8614795153042228, iteration: 398065
loss: 0.9700416326522827,grad_norm: 0.8676441517705368, iteration: 398066
loss: 1.0005453824996948,grad_norm: 0.7780353637332145, iteration: 398067
loss: 1.0028233528137207,grad_norm: 0.651808349013246, iteration: 398068
loss: 0.989845871925354,grad_norm: 0.8944067044429734, iteration: 398069
loss: 0.9717081189155579,grad_norm: 0.8207415560374882, iteration: 398070
loss: 0.9729557037353516,grad_norm: 0.7460184982741517, iteration: 398071
loss: 0.9858763813972473,grad_norm: 0.8213892074510746, iteration: 398072
loss: 1.0221112966537476,grad_norm: 0.864955120725729, iteration: 398073
loss: 1.0200843811035156,grad_norm: 0.833492360044362, iteration: 398074
loss: 1.024075984954834,grad_norm: 0.6863710213570373, iteration: 398075
loss: 1.0059703588485718,grad_norm: 0.8496261590094296, iteration: 398076
loss: 0.9781498312950134,grad_norm: 0.675878449289039, iteration: 398077
loss: 1.0482300519943237,grad_norm: 0.8971472126341713, iteration: 398078
loss: 0.9946589469909668,grad_norm: 0.9999991114885345, iteration: 398079
loss: 1.0115013122558594,grad_norm: 0.7225757562168649, iteration: 398080
loss: 1.0057982206344604,grad_norm: 0.8566459827750604, iteration: 398081
loss: 0.9926365613937378,grad_norm: 0.7014931038082203, iteration: 398082
loss: 1.0096384286880493,grad_norm: 0.747073834136216, iteration: 398083
loss: 0.9933146834373474,grad_norm: 0.7332636190449572, iteration: 398084
loss: 0.9997763633728027,grad_norm: 0.8388363620244838, iteration: 398085
loss: 0.9757519960403442,grad_norm: 0.7032375656464719, iteration: 398086
loss: 0.9998798966407776,grad_norm: 0.8268840435031019, iteration: 398087
loss: 1.0012301206588745,grad_norm: 0.7268161296899723, iteration: 398088
loss: 1.0054829120635986,grad_norm: 0.8187056730449371, iteration: 398089
loss: 1.041447639465332,grad_norm: 0.9176528055451882, iteration: 398090
loss: 1.0093472003936768,grad_norm: 0.740865036830903, iteration: 398091
loss: 0.9493443965911865,grad_norm: 0.8636070234990709, iteration: 398092
loss: 1.0467427968978882,grad_norm: 0.8181713289429513, iteration: 398093
loss: 1.041736364364624,grad_norm: 0.7999819758057573, iteration: 398094
loss: 1.008267879486084,grad_norm: 0.8336697177199176, iteration: 398095
loss: 0.9802935719490051,grad_norm: 0.798286982486532, iteration: 398096
loss: 1.0052640438079834,grad_norm: 0.8022673389579594, iteration: 398097
loss: 1.0311528444290161,grad_norm: 0.9082951817100878, iteration: 398098
loss: 0.9569602608680725,grad_norm: 0.7597034141810798, iteration: 398099
loss: 1.0200927257537842,grad_norm: 0.8086305277226106, iteration: 398100
loss: 1.062062382698059,grad_norm: 0.7429930494549508, iteration: 398101
loss: 1.019087553024292,grad_norm: 0.83985197019074, iteration: 398102
loss: 0.992849588394165,grad_norm: 0.7593644476962378, iteration: 398103
loss: 1.0271985530853271,grad_norm: 0.999999091794875, iteration: 398104
loss: 0.9829480648040771,grad_norm: 0.7828064482331689, iteration: 398105
loss: 1.0275505781173706,grad_norm: 0.8846254050434701, iteration: 398106
loss: 1.0431644916534424,grad_norm: 0.999999920249741, iteration: 398107
loss: 0.9891321659088135,grad_norm: 0.6726452321722634, iteration: 398108
loss: 1.001525640487671,grad_norm: 0.9802248587330186, iteration: 398109
loss: 0.9933605790138245,grad_norm: 0.6397985821966163, iteration: 398110
loss: 1.009511113166809,grad_norm: 0.7479977362438511, iteration: 398111
loss: 1.006689429283142,grad_norm: 0.779608875629656, iteration: 398112
loss: 0.9941540956497192,grad_norm: 0.905625507032697, iteration: 398113
loss: 0.9699691534042358,grad_norm: 0.7145489110981948, iteration: 398114
loss: 0.970696747303009,grad_norm: 0.8058074117651561, iteration: 398115
loss: 1.0610814094543457,grad_norm: 0.999998999096005, iteration: 398116
loss: 1.0028681755065918,grad_norm: 0.8163730927250273, iteration: 398117
loss: 1.0102027654647827,grad_norm: 0.9965227353272659, iteration: 398118
loss: 1.0565019845962524,grad_norm: 0.8157450826451512, iteration: 398119
loss: 1.023524284362793,grad_norm: 0.9337946858960839, iteration: 398120
loss: 1.0172276496887207,grad_norm: 0.641741650221195, iteration: 398121
loss: 0.9782318472862244,grad_norm: 0.9999993056362657, iteration: 398122
loss: 1.007103443145752,grad_norm: 0.9501064079707956, iteration: 398123
loss: 0.9999663829803467,grad_norm: 0.9999991318694504, iteration: 398124
loss: 0.9951745271682739,grad_norm: 0.9109455108081783, iteration: 398125
loss: 0.9804697036743164,grad_norm: 0.5886074062891974, iteration: 398126
loss: 0.989570140838623,grad_norm: 0.7848803780430944, iteration: 398127
loss: 1.0197558403015137,grad_norm: 0.6513885588145796, iteration: 398128
loss: 1.025251865386963,grad_norm: 0.9999992624666627, iteration: 398129
loss: 1.0013831853866577,grad_norm: 0.9004870749823568, iteration: 398130
loss: 1.016237497329712,grad_norm: 0.8612956295516652, iteration: 398131
loss: 0.9984636902809143,grad_norm: 0.7844765575227247, iteration: 398132
loss: 0.9935377240180969,grad_norm: 0.9014217177619698, iteration: 398133
loss: 1.084949254989624,grad_norm: 0.999999213972468, iteration: 398134
loss: 0.9838772416114807,grad_norm: 0.6851073597544567, iteration: 398135
loss: 0.9795363545417786,grad_norm: 0.6675540587495853, iteration: 398136
loss: 1.0276423692703247,grad_norm: 0.6946651700684567, iteration: 398137
loss: 1.0011333227157593,grad_norm: 0.9999993886617056, iteration: 398138
loss: 1.0019807815551758,grad_norm: 0.6880740134583889, iteration: 398139
loss: 1.0283467769622803,grad_norm: 0.8633827267138962, iteration: 398140
loss: 1.0938726663589478,grad_norm: 0.9999992049606911, iteration: 398141
loss: 1.017425537109375,grad_norm: 0.8270438490147948, iteration: 398142
loss: 1.0335382223129272,grad_norm: 0.9999994489490995, iteration: 398143
loss: 1.018339991569519,grad_norm: 0.7071535651003509, iteration: 398144
loss: 0.9760045409202576,grad_norm: 0.9023023736960991, iteration: 398145
loss: 0.9810746908187866,grad_norm: 0.808281971163676, iteration: 398146
loss: 0.9922943115234375,grad_norm: 0.633849279304433, iteration: 398147
loss: 1.0736207962036133,grad_norm: 0.8828725640868471, iteration: 398148
loss: 0.999671995639801,grad_norm: 0.7450114201333922, iteration: 398149
loss: 0.9966555237770081,grad_norm: 0.7035044184805973, iteration: 398150
loss: 0.9983705282211304,grad_norm: 0.8976736873553666, iteration: 398151
loss: 0.9513640403747559,grad_norm: 0.7876574026894555, iteration: 398152
loss: 0.9818530678749084,grad_norm: 0.9999995261858483, iteration: 398153
loss: 1.0162700414657593,grad_norm: 0.8422231705351163, iteration: 398154
loss: 0.9963993430137634,grad_norm: 0.7155577290908003, iteration: 398155
loss: 1.0115841627120972,grad_norm: 0.7660591332492978, iteration: 398156
loss: 0.9856748580932617,grad_norm: 0.9119169159485925, iteration: 398157
loss: 1.0279061794281006,grad_norm: 0.7921710765579256, iteration: 398158
loss: 0.9699544906616211,grad_norm: 0.7280301824533079, iteration: 398159
loss: 1.0083363056182861,grad_norm: 0.8636035623786069, iteration: 398160
loss: 1.0073617696762085,grad_norm: 0.704522738978395, iteration: 398161
loss: 1.0662397146224976,grad_norm: 0.9999996558067015, iteration: 398162
loss: 0.9998908638954163,grad_norm: 0.8990112031020577, iteration: 398163
loss: 0.9574902653694153,grad_norm: 0.7558907430129835, iteration: 398164
loss: 1.068686604499817,grad_norm: 0.8110241492487303, iteration: 398165
loss: 0.992263674736023,grad_norm: 0.6694569354238072, iteration: 398166
loss: 1.0038982629776,grad_norm: 0.6986933104828364, iteration: 398167
loss: 1.0237181186676025,grad_norm: 0.7626596553661588, iteration: 398168
loss: 0.9733501076698303,grad_norm: 0.7216531562959526, iteration: 398169
loss: 0.9703391790390015,grad_norm: 0.8190352429823413, iteration: 398170
loss: 0.9929165840148926,grad_norm: 0.6375975824079839, iteration: 398171
loss: 0.98428875207901,grad_norm: 0.9872918972600143, iteration: 398172
loss: 1.00747811794281,grad_norm: 0.8861773900618588, iteration: 398173
loss: 1.0095142126083374,grad_norm: 0.7858833168248649, iteration: 398174
loss: 1.0184731483459473,grad_norm: 0.7860171892305055, iteration: 398175
loss: 0.9776825904846191,grad_norm: 0.9999995121133167, iteration: 398176
loss: 1.0103574991226196,grad_norm: 0.7347179392796624, iteration: 398177
loss: 0.9948074221611023,grad_norm: 0.8643789695747959, iteration: 398178
loss: 0.9826921224594116,grad_norm: 0.7101735056764422, iteration: 398179
loss: 0.976492166519165,grad_norm: 0.842253924241052, iteration: 398180
loss: 1.0150729417800903,grad_norm: 0.7163774374736843, iteration: 398181
loss: 0.9878563284873962,grad_norm: 0.7226981204772797, iteration: 398182
loss: 0.9749465584754944,grad_norm: 0.6187332044579029, iteration: 398183
loss: 0.9993210434913635,grad_norm: 0.8146542937597472, iteration: 398184
loss: 0.9787977337837219,grad_norm: 0.6440964233738717, iteration: 398185
loss: 1.0103631019592285,grad_norm: 0.8823889216657983, iteration: 398186
loss: 0.9824821949005127,grad_norm: 0.8201775489625489, iteration: 398187
loss: 1.002394437789917,grad_norm: 0.7929552008955525, iteration: 398188
loss: 0.9763027429580688,grad_norm: 0.7435595343476541, iteration: 398189
loss: 1.0160348415374756,grad_norm: 0.7335127331457199, iteration: 398190
loss: 0.9973084330558777,grad_norm: 0.6911016177962327, iteration: 398191
loss: 0.9567824006080627,grad_norm: 0.7917746781952902, iteration: 398192
loss: 1.0617711544036865,grad_norm: 0.9999993291517504, iteration: 398193
loss: 1.0131698846817017,grad_norm: 0.7707343112291959, iteration: 398194
loss: 1.027178168296814,grad_norm: 0.9736294908743953, iteration: 398195
loss: 0.9619202017784119,grad_norm: 0.799911235337555, iteration: 398196
loss: 0.9984898567199707,grad_norm: 0.7897163224850351, iteration: 398197
loss: 1.0149004459381104,grad_norm: 0.8088997842805163, iteration: 398198
loss: 0.99770188331604,grad_norm: 0.699679028434108, iteration: 398199
loss: 1.031535029411316,grad_norm: 0.8561376037392845, iteration: 398200
loss: 1.0215603113174438,grad_norm: 0.781101835336396, iteration: 398201
loss: 0.9903044700622559,grad_norm: 0.9999996739774993, iteration: 398202
loss: 0.995705246925354,grad_norm: 0.70291383439188, iteration: 398203
loss: 0.9840942621231079,grad_norm: 0.6263905061242148, iteration: 398204
loss: 1.0388414859771729,grad_norm: 0.9318198829720562, iteration: 398205
loss: 1.013776183128357,grad_norm: 0.8731537729293183, iteration: 398206
loss: 0.9801868796348572,grad_norm: 0.793997501709026, iteration: 398207
loss: 1.0296329259872437,grad_norm: 0.7320690354499585, iteration: 398208
loss: 0.9826557636260986,grad_norm: 0.7382417412884226, iteration: 398209
loss: 1.0065630674362183,grad_norm: 0.6978704686960988, iteration: 398210
loss: 1.0055830478668213,grad_norm: 0.7096017085063573, iteration: 398211
loss: 0.9631858468055725,grad_norm: 0.6507671137739052, iteration: 398212
loss: 0.9570302963256836,grad_norm: 0.8421497588216291, iteration: 398213
loss: 1.0458636283874512,grad_norm: 0.7820590273414235, iteration: 398214
loss: 1.0062196254730225,grad_norm: 0.9999991321906136, iteration: 398215
loss: 1.1428921222686768,grad_norm: 0.9999991569253714, iteration: 398216
loss: 1.0014914274215698,grad_norm: 0.677143448186694, iteration: 398217
loss: 1.0168243646621704,grad_norm: 0.8765770852819916, iteration: 398218
loss: 0.9978098273277283,grad_norm: 0.8422469355132616, iteration: 398219
loss: 0.9890274405479431,grad_norm: 0.999999744207611, iteration: 398220
loss: 0.9958412051200867,grad_norm: 0.7177184107787314, iteration: 398221
loss: 0.9880273938179016,grad_norm: 0.6941216807047991, iteration: 398222
loss: 0.9936119318008423,grad_norm: 0.6557283301631366, iteration: 398223
loss: 1.0152153968811035,grad_norm: 0.9315790058174003, iteration: 398224
loss: 1.0064293146133423,grad_norm: 0.8507328061042796, iteration: 398225
loss: 0.9669762849807739,grad_norm: 0.6908233438837076, iteration: 398226
loss: 1.0110948085784912,grad_norm: 0.99999902498593, iteration: 398227
loss: 1.0031554698944092,grad_norm: 0.9999991851523611, iteration: 398228
loss: 1.029528021812439,grad_norm: 0.7276882772101722, iteration: 398229
loss: 1.0047557353973389,grad_norm: 0.9999990931593271, iteration: 398230
loss: 1.0317609310150146,grad_norm: 0.7931289799737689, iteration: 398231
loss: 0.9938071370124817,grad_norm: 0.6471838872937096, iteration: 398232
loss: 1.0484384298324585,grad_norm: 0.9999993656959872, iteration: 398233
loss: 1.0180708169937134,grad_norm: 0.9999991313639467, iteration: 398234
loss: 1.0161018371582031,grad_norm: 0.9273128377803047, iteration: 398235
loss: 0.9825128316879272,grad_norm: 0.7677736453139391, iteration: 398236
loss: 1.0082077980041504,grad_norm: 0.7959696472145396, iteration: 398237
loss: 1.0020478963851929,grad_norm: 0.7345997070896791, iteration: 398238
loss: 1.046864628791809,grad_norm: 0.8805212076867236, iteration: 398239
loss: 1.020488977432251,grad_norm: 0.7750180896361952, iteration: 398240
loss: 1.0263513326644897,grad_norm: 0.8963733369852359, iteration: 398241
loss: 0.9929835796356201,grad_norm: 0.7525822204312589, iteration: 398242
loss: 1.0139403343200684,grad_norm: 0.9177447325373422, iteration: 398243
loss: 1.0248359441757202,grad_norm: 0.8643663368522637, iteration: 398244
loss: 1.0348042249679565,grad_norm: 0.7207531377180179, iteration: 398245
loss: 1.0041714906692505,grad_norm: 0.9999992193705052, iteration: 398246
loss: 0.9830219745635986,grad_norm: 0.7243563647164966, iteration: 398247
loss: 1.0377118587493896,grad_norm: 0.9297695099286155, iteration: 398248
loss: 0.9704068899154663,grad_norm: 0.6786966346777538, iteration: 398249
loss: 1.01205575466156,grad_norm: 0.6736102426202017, iteration: 398250
loss: 0.9616127610206604,grad_norm: 0.7928246617854914, iteration: 398251
loss: 1.0100311040878296,grad_norm: 0.927264488832924, iteration: 398252
loss: 0.9549205303192139,grad_norm: 0.8867258924867052, iteration: 398253
loss: 0.9697315692901611,grad_norm: 0.7685441581651882, iteration: 398254
loss: 1.0245754718780518,grad_norm: 0.7773646463718353, iteration: 398255
loss: 0.9895718693733215,grad_norm: 0.8541722026405723, iteration: 398256
loss: 0.9945385456085205,grad_norm: 0.7069502704774788, iteration: 398257
loss: 1.0112494230270386,grad_norm: 0.7863777807849184, iteration: 398258
loss: 0.9945762753486633,grad_norm: 0.8403117415693877, iteration: 398259
loss: 1.1630871295928955,grad_norm: 0.9999991540189478, iteration: 398260
loss: 0.999411404132843,grad_norm: 0.8249694797920342, iteration: 398261
loss: 1.056442379951477,grad_norm: 0.9999996816494688, iteration: 398262
loss: 0.9766474366188049,grad_norm: 0.7605118290198274, iteration: 398263
loss: 0.9453670978546143,grad_norm: 0.7589952809989748, iteration: 398264
loss: 1.0089620351791382,grad_norm: 0.7827298401160243, iteration: 398265
loss: 1.0242388248443604,grad_norm: 0.8814551385845275, iteration: 398266
loss: 0.9877403378486633,grad_norm: 0.8580342731905557, iteration: 398267
loss: 1.0017086267471313,grad_norm: 0.7373698509080037, iteration: 398268
loss: 1.0129989385604858,grad_norm: 0.8298567129297159, iteration: 398269
loss: 0.993189811706543,grad_norm: 0.6758079752616943, iteration: 398270
loss: 1.0025290250778198,grad_norm: 0.9086720584397885, iteration: 398271
loss: 1.009769082069397,grad_norm: 0.6930526445162777, iteration: 398272
loss: 0.9925816059112549,grad_norm: 0.9999989886460406, iteration: 398273
loss: 1.0645685195922852,grad_norm: 0.9999994188828651, iteration: 398274
loss: 1.0430203676223755,grad_norm: 0.8003618686895783, iteration: 398275
loss: 0.9791717529296875,grad_norm: 0.9044267663757812, iteration: 398276
loss: 1.0834497213363647,grad_norm: 0.8869079126680757, iteration: 398277
loss: 0.9932222962379456,grad_norm: 0.7369371761004362, iteration: 398278
loss: 0.9594596028327942,grad_norm: 0.725150609865552, iteration: 398279
loss: 1.0242539644241333,grad_norm: 0.7938330164927232, iteration: 398280
loss: 0.9933088421821594,grad_norm: 0.8627941359944712, iteration: 398281
loss: 1.0137977600097656,grad_norm: 0.8892110766081425, iteration: 398282
loss: 0.9978227019309998,grad_norm: 0.8489867301709279, iteration: 398283
loss: 1.0289862155914307,grad_norm: 0.5982715369472884, iteration: 398284
loss: 0.9863948822021484,grad_norm: 0.7537516934550431, iteration: 398285
loss: 0.9984926581382751,grad_norm: 0.7140144617208253, iteration: 398286
loss: 1.0115872621536255,grad_norm: 0.9999997223008439, iteration: 398287
loss: 1.003186821937561,grad_norm: 0.9019718463714922, iteration: 398288
loss: 1.0080227851867676,grad_norm: 0.7671396574738863, iteration: 398289
loss: 0.9975472688674927,grad_norm: 0.7879344150888054, iteration: 398290
loss: 1.05495023727417,grad_norm: 0.7745567866558973, iteration: 398291
loss: 1.012796401977539,grad_norm: 0.8965718342427004, iteration: 398292
loss: 1.0418375730514526,grad_norm: 0.6573058486436886, iteration: 398293
loss: 0.9954875111579895,grad_norm: 0.7924714108514346, iteration: 398294
loss: 1.0054826736450195,grad_norm: 0.7337223522341231, iteration: 398295
loss: 0.9746758937835693,grad_norm: 0.7102898025147946, iteration: 398296
loss: 1.070030689239502,grad_norm: 0.9999993988768072, iteration: 398297
loss: 1.0053205490112305,grad_norm: 0.6748814094316479, iteration: 398298
loss: 0.96920245885849,grad_norm: 0.8115473376000046, iteration: 398299
loss: 1.008514165878296,grad_norm: 0.8706137440125443, iteration: 398300
loss: 0.9975295066833496,grad_norm: 0.7406301367380766, iteration: 398301
loss: 1.0111740827560425,grad_norm: 0.7327755615980923, iteration: 398302
loss: 1.0196770429611206,grad_norm: 0.6082247722475842, iteration: 398303
loss: 1.0186046361923218,grad_norm: 0.8839437265504463, iteration: 398304
loss: 0.9641156792640686,grad_norm: 0.7169415129068921, iteration: 398305
loss: 1.0029857158660889,grad_norm: 0.885720785271477, iteration: 398306
loss: 0.9889169931411743,grad_norm: 0.7098069157518317, iteration: 398307
loss: 1.0104749202728271,grad_norm: 0.7851927948552692, iteration: 398308
loss: 1.0172258615493774,grad_norm: 0.8518622641421184, iteration: 398309
loss: 0.9952229857444763,grad_norm: 0.8227915027559681, iteration: 398310
loss: 1.0864523649215698,grad_norm: 0.8766889458032769, iteration: 398311
loss: 0.9971767067909241,grad_norm: 0.7694610868429849, iteration: 398312
loss: 1.0615334510803223,grad_norm: 0.8255753765552735, iteration: 398313
loss: 0.9825175404548645,grad_norm: 0.7923620126872821, iteration: 398314
loss: 1.0437625646591187,grad_norm: 0.7497698497622988, iteration: 398315
loss: 0.9288947582244873,grad_norm: 0.9258810390685571, iteration: 398316
loss: 0.9743340611457825,grad_norm: 0.9028487600025923, iteration: 398317
loss: 1.0214473009109497,grad_norm: 0.8291642572518778, iteration: 398318
loss: 0.9756090641021729,grad_norm: 0.7707988157611095, iteration: 398319
loss: 0.9836457967758179,grad_norm: 0.9490186710585502, iteration: 398320
loss: 0.9821352362632751,grad_norm: 0.7682383588083542, iteration: 398321
loss: 1.04642653465271,grad_norm: 0.9999989700214014, iteration: 398322
loss: 1.0385524034500122,grad_norm: 0.999999627017094, iteration: 398323
loss: 0.9968512058258057,grad_norm: 0.7469293626471523, iteration: 398324
loss: 0.9738622307777405,grad_norm: 0.74460579892452, iteration: 398325
loss: 0.9849025011062622,grad_norm: 0.7845110113760034, iteration: 398326
loss: 0.9540198445320129,grad_norm: 0.7406481597601161, iteration: 398327
loss: 1.0182933807373047,grad_norm: 0.8218304318576857, iteration: 398328
loss: 1.0044057369232178,grad_norm: 0.662038278284426, iteration: 398329
loss: 0.9909586906433105,grad_norm: 0.8559153322073221, iteration: 398330
loss: 1.0081565380096436,grad_norm: 0.7514915289078691, iteration: 398331
loss: 0.992990255355835,grad_norm: 0.7229094148553907, iteration: 398332
loss: 1.0199776887893677,grad_norm: 0.7013084516802314, iteration: 398333
loss: 1.0087977647781372,grad_norm: 0.7620848521361338, iteration: 398334
loss: 1.0309165716171265,grad_norm: 0.8326858691652697, iteration: 398335
loss: 1.0098577737808228,grad_norm: 0.9518298918194176, iteration: 398336
loss: 1.0764789581298828,grad_norm: 0.852528094323348, iteration: 398337
loss: 1.0819613933563232,grad_norm: 0.9999999783594344, iteration: 398338
loss: 1.033968210220337,grad_norm: 0.9054275924373153, iteration: 398339
loss: 1.0013506412506104,grad_norm: 0.8935637597899283, iteration: 398340
loss: 1.000771403312683,grad_norm: 0.7217366483240364, iteration: 398341
loss: 1.0191165208816528,grad_norm: 0.8921124688979158, iteration: 398342
loss: 1.0345146656036377,grad_norm: 0.9183492381715592, iteration: 398343
loss: 1.1243939399719238,grad_norm: 0.7387174307694085, iteration: 398344
loss: 1.0490261316299438,grad_norm: 0.9999994049917024, iteration: 398345
loss: 1.0117766857147217,grad_norm: 0.9225844859505117, iteration: 398346
loss: 1.0048999786376953,grad_norm: 0.9427301810100325, iteration: 398347
loss: 1.0099862813949585,grad_norm: 0.9999991766877414, iteration: 398348
loss: 0.9894410967826843,grad_norm: 0.8104284280807188, iteration: 398349
loss: 1.0051745176315308,grad_norm: 0.890413239285924, iteration: 398350
loss: 1.0080324411392212,grad_norm: 0.8135838466402053, iteration: 398351
loss: 0.9985777139663696,grad_norm: 0.779871514131587, iteration: 398352
loss: 0.9916498064994812,grad_norm: 0.7842798760119052, iteration: 398353
loss: 0.967886209487915,grad_norm: 0.7047621111545128, iteration: 398354
loss: 0.9939996600151062,grad_norm: 0.7263496801825348, iteration: 398355
loss: 1.0245225429534912,grad_norm: 0.7405563833151828, iteration: 398356
loss: 0.9988608956336975,grad_norm: 0.6366908353442958, iteration: 398357
loss: 1.0219496488571167,grad_norm: 0.8610704162723891, iteration: 398358
loss: 0.9913681745529175,grad_norm: 0.8312291413289333, iteration: 398359
loss: 0.966518759727478,grad_norm: 0.9082828653448287, iteration: 398360
loss: 1.0050228834152222,grad_norm: 0.6815923418067088, iteration: 398361
loss: 0.9687954187393188,grad_norm: 0.7749071930742266, iteration: 398362
loss: 1.0103033781051636,grad_norm: 0.9065211838561039, iteration: 398363
loss: 0.9932202100753784,grad_norm: 0.7730875952383576, iteration: 398364
loss: 1.017220377922058,grad_norm: 0.9999997740472788, iteration: 398365
loss: 0.9929152131080627,grad_norm: 0.7561923357394701, iteration: 398366
loss: 0.9831055402755737,grad_norm: 0.7488904457932338, iteration: 398367
loss: 0.9746103286743164,grad_norm: 0.9999991454070246, iteration: 398368
loss: 0.9990289211273193,grad_norm: 0.8264839302725623, iteration: 398369
loss: 0.9871639609336853,grad_norm: 0.8516277735498677, iteration: 398370
loss: 1.0202782154083252,grad_norm: 0.9192983784083928, iteration: 398371
loss: 1.0023092031478882,grad_norm: 0.9174802860359116, iteration: 398372
loss: 0.9611353874206543,grad_norm: 0.680247204188168, iteration: 398373
loss: 1.0019739866256714,grad_norm: 0.8369936508907061, iteration: 398374
loss: 0.9843910932540894,grad_norm: 0.8806389777904062, iteration: 398375
loss: 0.9555169343948364,grad_norm: 0.7792526228237561, iteration: 398376
loss: 1.0473254919052124,grad_norm: 0.8452714230624812, iteration: 398377
loss: 0.9891958832740784,grad_norm: 0.745665767145958, iteration: 398378
loss: 1.0305349826812744,grad_norm: 0.8326729266534292, iteration: 398379
loss: 1.017018437385559,grad_norm: 0.8225414836277577, iteration: 398380
loss: 1.0122535228729248,grad_norm: 0.8122158200161473, iteration: 398381
loss: 0.9989771246910095,grad_norm: 0.9999997500955788, iteration: 398382
loss: 1.0065689086914062,grad_norm: 0.7488305015361267, iteration: 398383
loss: 0.989840030670166,grad_norm: 0.8544798046732572, iteration: 398384
loss: 0.9910138249397278,grad_norm: 0.7791048795221535, iteration: 398385
loss: 1.0168452262878418,grad_norm: 0.7639843279166102, iteration: 398386
loss: 1.1391810178756714,grad_norm: 0.999999191290278, iteration: 398387
loss: 1.009342908859253,grad_norm: 0.6780264912480762, iteration: 398388
loss: 1.002267837524414,grad_norm: 0.7246758874203069, iteration: 398389
loss: 1.0310909748077393,grad_norm: 0.6904631716616154, iteration: 398390
loss: 0.9775964021682739,grad_norm: 0.7665370569247784, iteration: 398391
loss: 1.0157239437103271,grad_norm: 0.7258632031792619, iteration: 398392
loss: 0.9834967255592346,grad_norm: 0.7927273102203891, iteration: 398393
loss: 0.9927629232406616,grad_norm: 0.9176286248756923, iteration: 398394
loss: 0.9987969398498535,grad_norm: 0.8544834425564688, iteration: 398395
loss: 0.9792860150337219,grad_norm: 0.8654264175132929, iteration: 398396
loss: 1.014594554901123,grad_norm: 0.7491175886024646, iteration: 398397
loss: 1.0075173377990723,grad_norm: 0.6876533941795132, iteration: 398398
loss: 0.9972193241119385,grad_norm: 0.9999996875535692, iteration: 398399
loss: 1.079797387123108,grad_norm: 0.98697595631488, iteration: 398400
loss: 1.0122523307800293,grad_norm: 0.9999990758801479, iteration: 398401
loss: 0.9812416434288025,grad_norm: 0.8955880045662566, iteration: 398402
loss: 0.9743080735206604,grad_norm: 0.7791674940501074, iteration: 398403
loss: 0.979910671710968,grad_norm: 0.7466770101638079, iteration: 398404
loss: 1.0229310989379883,grad_norm: 0.6924628911183848, iteration: 398405
loss: 1.0063385963439941,grad_norm: 0.7498903822645199, iteration: 398406
loss: 0.9970481395721436,grad_norm: 0.7857521228031479, iteration: 398407
loss: 0.995832622051239,grad_norm: 0.7768724786221247, iteration: 398408
loss: 1.0034629106521606,grad_norm: 0.8805428016115855, iteration: 398409
loss: 1.0215312242507935,grad_norm: 0.7539889890997741, iteration: 398410
loss: 1.0169575214385986,grad_norm: 0.8725566321674081, iteration: 398411
loss: 0.9697524309158325,grad_norm: 0.6219474643169484, iteration: 398412
loss: 0.9991739988327026,grad_norm: 0.8379084292531439, iteration: 398413
loss: 1.0009057521820068,grad_norm: 0.6825310201514734, iteration: 398414
loss: 0.9834097623825073,grad_norm: 0.8229218067496054, iteration: 398415
loss: 0.9859340786933899,grad_norm: 0.790664338117303, iteration: 398416
loss: 0.9986940622329712,grad_norm: 0.7985102854454256, iteration: 398417
loss: 0.9768967032432556,grad_norm: 0.9545837388796662, iteration: 398418
loss: 0.970382571220398,grad_norm: 0.8422291374532989, iteration: 398419
loss: 1.0025279521942139,grad_norm: 0.7418589626455633, iteration: 398420
loss: 0.9977872967720032,grad_norm: 0.8977080562110932, iteration: 398421
loss: 0.9804416298866272,grad_norm: 0.7316047465469886, iteration: 398422
loss: 1.0230321884155273,grad_norm: 0.9999990905436614, iteration: 398423
loss: 0.9871839880943298,grad_norm: 0.999999322428178, iteration: 398424
loss: 0.9957456588745117,grad_norm: 0.8641082920005146, iteration: 398425
loss: 0.9776546359062195,grad_norm: 0.7439478214729374, iteration: 398426
loss: 1.0046963691711426,grad_norm: 0.9999990900273961, iteration: 398427
loss: 0.9787036776542664,grad_norm: 0.9787583205117864, iteration: 398428
loss: 0.9607474207878113,grad_norm: 0.6642701587587193, iteration: 398429
loss: 0.9907805919647217,grad_norm: 0.7147895577395955, iteration: 398430
loss: 0.986342191696167,grad_norm: 0.83154487405883, iteration: 398431
loss: 1.004517912864685,grad_norm: 0.9999995739073535, iteration: 398432
loss: 0.9929071664810181,grad_norm: 0.7960481311314476, iteration: 398433
loss: 0.9861695766448975,grad_norm: 0.7914348141558123, iteration: 398434
loss: 1.0115386247634888,grad_norm: 0.8166237167319508, iteration: 398435
loss: 1.0102565288543701,grad_norm: 0.8722459145661027, iteration: 398436
loss: 0.9898946285247803,grad_norm: 0.8354269423141956, iteration: 398437
loss: 1.4600893259048462,grad_norm: 0.9999996433556856, iteration: 398438
loss: 1.01069974899292,grad_norm: 0.8562324234497666, iteration: 398439
loss: 0.984123170375824,grad_norm: 0.7818192158333778, iteration: 398440
loss: 1.0295299291610718,grad_norm: 0.7668887021014291, iteration: 398441
loss: 1.0123441219329834,grad_norm: 0.9999990429211315, iteration: 398442
loss: 1.0091726779937744,grad_norm: 0.7221260785154574, iteration: 398443
loss: 1.0137057304382324,grad_norm: 0.9999989544283162, iteration: 398444
loss: 1.0651013851165771,grad_norm: 0.8760740406179989, iteration: 398445
loss: 0.9626718759536743,grad_norm: 0.7522226257436461, iteration: 398446
loss: 0.9743351340293884,grad_norm: 0.856724812100279, iteration: 398447
loss: 0.9965396523475647,grad_norm: 0.7193505889500351, iteration: 398448
loss: 1.0717233419418335,grad_norm: 0.9999998068473858, iteration: 398449
loss: 1.00388765335083,grad_norm: 0.8936517928906486, iteration: 398450
loss: 1.0228742361068726,grad_norm: 0.8014443404326571, iteration: 398451
loss: 1.0402820110321045,grad_norm: 0.9999996295218659, iteration: 398452
loss: 1.0137494802474976,grad_norm: 0.9999999679284084, iteration: 398453
loss: 1.094006061553955,grad_norm: 0.9999999302893128, iteration: 398454
loss: 0.9817686676979065,grad_norm: 0.8352203280002821, iteration: 398455
loss: 0.9888438582420349,grad_norm: 0.7567637009456291, iteration: 398456
loss: 0.9915984272956848,grad_norm: 0.8100538896667863, iteration: 398457
loss: 1.0350139141082764,grad_norm: 0.7724781273022687, iteration: 398458
loss: 1.043596625328064,grad_norm: 0.6962025972726116, iteration: 398459
loss: 0.9887233972549438,grad_norm: 0.6828863154103783, iteration: 398460
loss: 0.9348242878913879,grad_norm: 0.7760454774652189, iteration: 398461
loss: 0.9835795760154724,grad_norm: 0.793515273112386, iteration: 398462
loss: 0.9613381624221802,grad_norm: 0.8463295772408346, iteration: 398463
loss: 1.017581582069397,grad_norm: 0.7077748686064327, iteration: 398464
loss: 0.9956574440002441,grad_norm: 0.8123233154707473, iteration: 398465
loss: 0.9917818903923035,grad_norm: 0.7434109850803473, iteration: 398466
loss: 1.009228229522705,grad_norm: 0.9999989583641412, iteration: 398467
loss: 1.0103148221969604,grad_norm: 0.8351333869508419, iteration: 398468
loss: 1.0043277740478516,grad_norm: 0.8686201940909893, iteration: 398469
loss: 1.0266426801681519,grad_norm: 0.9999990851207686, iteration: 398470
loss: 0.989335298538208,grad_norm: 0.8609572791389047, iteration: 398471
loss: 0.9807586669921875,grad_norm: 0.8098453477111135, iteration: 398472
loss: 0.9749482274055481,grad_norm: 0.7516492603646139, iteration: 398473
loss: 1.0649683475494385,grad_norm: 0.8667617901664814, iteration: 398474
loss: 1.0170109272003174,grad_norm: 0.6568174555276416, iteration: 398475
loss: 1.0034655332565308,grad_norm: 0.9999994094190545, iteration: 398476
loss: 0.9938680529594421,grad_norm: 0.9168126764508262, iteration: 398477
loss: 1.0224907398223877,grad_norm: 0.7424321988384971, iteration: 398478
loss: 0.9796174168586731,grad_norm: 0.6785458538072408, iteration: 398479
loss: 1.0420337915420532,grad_norm: 0.7736957334959873, iteration: 398480
loss: 1.1176543235778809,grad_norm: 0.9999997930666058, iteration: 398481
loss: 0.9766613245010376,grad_norm: 0.9803821480934126, iteration: 398482
loss: 1.039800763130188,grad_norm: 0.764103954887448, iteration: 398483
loss: 1.054524540901184,grad_norm: 0.9190773068844766, iteration: 398484
loss: 1.0062127113342285,grad_norm: 0.8578496321387773, iteration: 398485
loss: 0.9776484966278076,grad_norm: 0.8743447137881436, iteration: 398486
loss: 1.008711814880371,grad_norm: 0.7499655769619933, iteration: 398487
loss: 1.0086581707000732,grad_norm: 0.890872241976266, iteration: 398488
loss: 0.9832077026367188,grad_norm: 0.7795587404162865, iteration: 398489
loss: 0.974083423614502,grad_norm: 0.8381243439388659, iteration: 398490
loss: 0.9957519173622131,grad_norm: 0.6908858037882483, iteration: 398491
loss: 1.0005370378494263,grad_norm: 0.8395075711356444, iteration: 398492
loss: 1.0770111083984375,grad_norm: 0.8542358353779412, iteration: 398493
loss: 0.9894525408744812,grad_norm: 0.6887316677190365, iteration: 398494
loss: 0.9749419093132019,grad_norm: 0.6321245737935259, iteration: 398495
loss: 0.9777355194091797,grad_norm: 0.8639315609634037, iteration: 398496
loss: 0.9829664826393127,grad_norm: 0.7545582850483722, iteration: 398497
loss: 0.9975282549858093,grad_norm: 0.811519387756719, iteration: 398498
loss: 1.0175203084945679,grad_norm: 0.9999997692960739, iteration: 398499
loss: 0.9845446348190308,grad_norm: 0.7874296370805594, iteration: 398500
loss: 0.9473842978477478,grad_norm: 0.7713484493532979, iteration: 398501
loss: 1.0032726526260376,grad_norm: 0.7944207831189398, iteration: 398502
loss: 1.0076560974121094,grad_norm: 0.8354488000458808, iteration: 398503
loss: 1.0189622640609741,grad_norm: 0.7730159394004323, iteration: 398504
loss: 1.0065444707870483,grad_norm: 0.9692868074859233, iteration: 398505
loss: 1.014567255973816,grad_norm: 0.7394295133770726, iteration: 398506
loss: 1.016393780708313,grad_norm: 0.7556230005566036, iteration: 398507
loss: 1.029747486114502,grad_norm: 0.8792243897440306, iteration: 398508
loss: 1.0342870950698853,grad_norm: 0.9999991210859278, iteration: 398509
loss: 0.9848145246505737,grad_norm: 0.850272993605194, iteration: 398510
loss: 1.0189603567123413,grad_norm: 0.8888028818327838, iteration: 398511
loss: 0.9868679046630859,grad_norm: 0.8726373774621857, iteration: 398512
loss: 0.9819514155387878,grad_norm: 0.739974238484696, iteration: 398513
loss: 0.991509199142456,grad_norm: 0.8457912897300244, iteration: 398514
loss: 1.0118744373321533,grad_norm: 0.7880937124226223, iteration: 398515
loss: 1.0070096254348755,grad_norm: 0.72205950696272, iteration: 398516
loss: 0.9922325015068054,grad_norm: 0.6852162654591414, iteration: 398517
loss: 0.9942828416824341,grad_norm: 0.7767214040693041, iteration: 398518
loss: 0.9917964935302734,grad_norm: 0.9999996618989797, iteration: 398519
loss: 0.9883238673210144,grad_norm: 0.889722700647787, iteration: 398520
loss: 0.9623702764511108,grad_norm: 0.8154127098203403, iteration: 398521
loss: 0.9714884757995605,grad_norm: 0.999999118956542, iteration: 398522
loss: 0.987518310546875,grad_norm: 0.7629482946878455, iteration: 398523
loss: 1.007465124130249,grad_norm: 0.9451211750071064, iteration: 398524
loss: 0.9951532483100891,grad_norm: 0.6630846442135978, iteration: 398525
loss: 0.9751448035240173,grad_norm: 0.8979170446995516, iteration: 398526
loss: 0.9637987613677979,grad_norm: 0.7138082909414365, iteration: 398527
loss: 1.0208467245101929,grad_norm: 0.6553762890846165, iteration: 398528
loss: 0.9830242991447449,grad_norm: 0.9999990656865724, iteration: 398529
loss: 1.0227019786834717,grad_norm: 0.9965192726591221, iteration: 398530
loss: 1.001632571220398,grad_norm: 0.9251537722203244, iteration: 398531
loss: 1.0288331508636475,grad_norm: 0.9175870372905406, iteration: 398532
loss: 0.9736592173576355,grad_norm: 0.7675366854272234, iteration: 398533
loss: 1.006503939628601,grad_norm: 0.6408612634577338, iteration: 398534
loss: 0.9764004945755005,grad_norm: 0.691641832972778, iteration: 398535
loss: 0.994422197341919,grad_norm: 0.7282208066540885, iteration: 398536
loss: 0.9909754395484924,grad_norm: 0.7961153381522903, iteration: 398537
loss: 0.996364414691925,grad_norm: 0.9999995029461052, iteration: 398538
loss: 0.9869064092636108,grad_norm: 0.6901925759359911, iteration: 398539
loss: 0.9949290752410889,grad_norm: 0.7755754806522083, iteration: 398540
loss: 0.980006217956543,grad_norm: 0.6900476597600911, iteration: 398541
loss: 0.9819801449775696,grad_norm: 0.7461009955216787, iteration: 398542
loss: 1.0085662603378296,grad_norm: 0.7707121308979971, iteration: 398543
loss: 0.9979232549667358,grad_norm: 0.7796287778225538, iteration: 398544
loss: 0.9867514371871948,grad_norm: 0.8707280084604991, iteration: 398545
loss: 1.0035841464996338,grad_norm: 0.6864690486939895, iteration: 398546
loss: 0.9973602294921875,grad_norm: 0.739484136695242, iteration: 398547
loss: 1.0256000757217407,grad_norm: 0.9181073640589863, iteration: 398548
loss: 1.038679599761963,grad_norm: 0.8287477977571598, iteration: 398549
loss: 1.0090301036834717,grad_norm: 0.890831974041049, iteration: 398550
loss: 1.004927158355713,grad_norm: 0.9999991486628309, iteration: 398551
loss: 1.0275346040725708,grad_norm: 0.8074829662935765, iteration: 398552
loss: 1.0360324382781982,grad_norm: 0.7376931046513988, iteration: 398553
loss: 1.01382315158844,grad_norm: 0.9999990266892674, iteration: 398554
loss: 0.9836610555648804,grad_norm: 0.8389554330549346, iteration: 398555
loss: 0.9936957359313965,grad_norm: 0.8590118028327605, iteration: 398556
loss: 1.0040000677108765,grad_norm: 0.7069470512545536, iteration: 398557
loss: 0.9717767238616943,grad_norm: 0.7174796785165221, iteration: 398558
loss: 1.030474066734314,grad_norm: 0.9999999591929769, iteration: 398559
loss: 0.9905825257301331,grad_norm: 0.8769869828632713, iteration: 398560
loss: 1.0079518556594849,grad_norm: 0.8524313979630606, iteration: 398561
loss: 0.9804380536079407,grad_norm: 0.7444089551414779, iteration: 398562
loss: 0.9985697865486145,grad_norm: 0.7992436123264961, iteration: 398563
loss: 1.0015276670455933,grad_norm: 0.813847148224584, iteration: 398564
loss: 0.9836679697036743,grad_norm: 0.7653917656949853, iteration: 398565
loss: 1.0039409399032593,grad_norm: 0.8136780142150156, iteration: 398566
loss: 0.9849998950958252,grad_norm: 0.7224915950515652, iteration: 398567
loss: 1.0009260177612305,grad_norm: 0.7045634310455745, iteration: 398568
loss: 1.02146577835083,grad_norm: 0.7471626682332209, iteration: 398569
loss: 1.0047063827514648,grad_norm: 0.8915683386187389, iteration: 398570
loss: 1.047683835029602,grad_norm: 0.9999995293366063, iteration: 398571
loss: 0.9763733148574829,grad_norm: 0.6184356675462545, iteration: 398572
loss: 1.0386964082717896,grad_norm: 0.80669553757591, iteration: 398573
loss: 1.0736262798309326,grad_norm: 0.9397507478104893, iteration: 398574
loss: 0.970346987247467,grad_norm: 1.000000000166428, iteration: 398575
loss: 1.0266966819763184,grad_norm: 0.9999991148720296, iteration: 398576
loss: 1.0117764472961426,grad_norm: 0.7688948275117425, iteration: 398577
loss: 0.9633028507232666,grad_norm: 0.9999990655117994, iteration: 398578
loss: 1.0041553974151611,grad_norm: 0.7002346293418048, iteration: 398579
loss: 0.9993299841880798,grad_norm: 0.7907307975245904, iteration: 398580
loss: 1.0750080347061157,grad_norm: 0.9985904930730576, iteration: 398581
loss: 1.001327395439148,grad_norm: 0.941341573613602, iteration: 398582
loss: 1.0227950811386108,grad_norm: 0.7983281106439947, iteration: 398583
loss: 0.9850098490715027,grad_norm: 0.7281766457126159, iteration: 398584
loss: 0.9937916398048401,grad_norm: 0.8607322463219408, iteration: 398585
loss: 0.9784829616546631,grad_norm: 0.8843834147812092, iteration: 398586
loss: 0.9847651124000549,grad_norm: 0.785780094613629, iteration: 398587
loss: 1.016746163368225,grad_norm: 0.7052538178014027, iteration: 398588
loss: 0.9977553486824036,grad_norm: 0.9117329476719063, iteration: 398589
loss: 1.0437061786651611,grad_norm: 0.8650981213282433, iteration: 398590
loss: 1.0340814590454102,grad_norm: 0.7761640071838786, iteration: 398591
loss: 0.9788104295730591,grad_norm: 0.8741906839810887, iteration: 398592
loss: 0.9852317571640015,grad_norm: 0.7203939929250041, iteration: 398593
loss: 1.0463122129440308,grad_norm: 0.9999999191623177, iteration: 398594
loss: 1.0604496002197266,grad_norm: 0.9999998233175977, iteration: 398595
loss: 1.034395456314087,grad_norm: 0.8608478267149884, iteration: 398596
loss: 0.9762881398200989,grad_norm: 0.8477564707245593, iteration: 398597
loss: 0.9954728484153748,grad_norm: 0.6965859155722606, iteration: 398598
loss: 1.034794807434082,grad_norm: 0.9999993650980972, iteration: 398599
loss: 0.9776158332824707,grad_norm: 0.8139540090347797, iteration: 398600
loss: 1.0153158903121948,grad_norm: 0.7956386895706246, iteration: 398601
loss: 0.9950966238975525,grad_norm: 0.6989578847887173, iteration: 398602
loss: 0.9916388988494873,grad_norm: 0.7090296784310088, iteration: 398603
loss: 0.9876397848129272,grad_norm: 0.8196431899385642, iteration: 398604
loss: 1.0563124418258667,grad_norm: 0.9999996739119358, iteration: 398605
loss: 1.0921273231506348,grad_norm: 1.0000000244224938, iteration: 398606
loss: 1.0404821634292603,grad_norm: 0.9118525910196041, iteration: 398607
loss: 1.0069650411605835,grad_norm: 0.9999992388789407, iteration: 398608
loss: 1.0155409574508667,grad_norm: 0.6425671548566514, iteration: 398609
loss: 0.9789787530899048,grad_norm: 0.699162597413864, iteration: 398610
loss: 1.0487061738967896,grad_norm: 0.7228745533730235, iteration: 398611
loss: 1.022144079208374,grad_norm: 0.9999993248238965, iteration: 398612
loss: 1.027375340461731,grad_norm: 0.9824574031783316, iteration: 398613
loss: 0.9792693853378296,grad_norm: 0.7619713639923625, iteration: 398614
loss: 1.025788426399231,grad_norm: 0.7388899863533495, iteration: 398615
loss: 1.0140326023101807,grad_norm: 0.8438819211922914, iteration: 398616
loss: 1.017342209815979,grad_norm: 0.6995265372774472, iteration: 398617
loss: 1.0424760580062866,grad_norm: 0.8028882321653116, iteration: 398618
loss: 1.0379804372787476,grad_norm: 0.8444197574076937, iteration: 398619
loss: 1.1097182035446167,grad_norm: 0.901172723125767, iteration: 398620
loss: 1.0277109146118164,grad_norm: 0.8941271193622868, iteration: 398621
loss: 1.0002535581588745,grad_norm: 0.9999994756104204, iteration: 398622
loss: 1.0119296312332153,grad_norm: 0.8594716040934878, iteration: 398623
loss: 0.9769562482833862,grad_norm: 0.9999998530226601, iteration: 398624
loss: 0.9581390619277954,grad_norm: 0.8619040971948966, iteration: 398625
loss: 1.0346194505691528,grad_norm: 0.9403425659581713, iteration: 398626
loss: 0.9714511036872864,grad_norm: 0.6662710622219704, iteration: 398627
loss: 1.0062485933303833,grad_norm: 0.9428738071536727, iteration: 398628
loss: 1.1540095806121826,grad_norm: 0.9999997583036528, iteration: 398629
loss: 1.022261619567871,grad_norm: 0.8564098285360537, iteration: 398630
loss: 1.0159493684768677,grad_norm: 0.7490019499101241, iteration: 398631
loss: 1.0033280849456787,grad_norm: 0.7476245990138743, iteration: 398632
loss: 0.9685888886451721,grad_norm: 0.9381632989860045, iteration: 398633
loss: 0.9669703245162964,grad_norm: 0.7276851265076875, iteration: 398634
loss: 0.9782090783119202,grad_norm: 0.7986490167314235, iteration: 398635
loss: 1.000325083732605,grad_norm: 0.7407790942455831, iteration: 398636
loss: 0.9966119527816772,grad_norm: 0.9077631175511476, iteration: 398637
loss: 1.0469697713851929,grad_norm: 0.781899525727837, iteration: 398638
loss: 1.0135903358459473,grad_norm: 0.8718461526127582, iteration: 398639
loss: 1.0397453308105469,grad_norm: 0.7619365126217796, iteration: 398640
loss: 1.0320487022399902,grad_norm: 0.8143374117834963, iteration: 398641
loss: 1.00066339969635,grad_norm: 0.9271912053508421, iteration: 398642
loss: 1.0002787113189697,grad_norm: 0.758885395983312, iteration: 398643
loss: 0.9873868227005005,grad_norm: 0.8343671094762924, iteration: 398644
loss: 0.9998542666435242,grad_norm: 0.8883430707371359, iteration: 398645
loss: 0.9857153296470642,grad_norm: 0.7961423507941036, iteration: 398646
loss: 0.9886104464530945,grad_norm: 0.6727565747791162, iteration: 398647
loss: 1.0220956802368164,grad_norm: 0.7383837864956548, iteration: 398648
loss: 1.0169711112976074,grad_norm: 0.8628538037988706, iteration: 398649
loss: 1.0228281021118164,grad_norm: 0.7573095839056287, iteration: 398650
loss: 1.0185025930404663,grad_norm: 0.7853779596506089, iteration: 398651
loss: 1.0268934965133667,grad_norm: 0.6614431162037583, iteration: 398652
loss: 0.9882694482803345,grad_norm: 0.767560418596468, iteration: 398653
loss: 1.017296552658081,grad_norm: 0.8847985945973303, iteration: 398654
loss: 1.0168795585632324,grad_norm: 0.7328529482559238, iteration: 398655
loss: 0.9877232909202576,grad_norm: 0.775904125021556, iteration: 398656
loss: 0.9901502728462219,grad_norm: 0.7831851978874961, iteration: 398657
loss: 0.9769846200942993,grad_norm: 0.8094341225660179, iteration: 398658
loss: 1.0150518417358398,grad_norm: 0.7796013210639172, iteration: 398659
loss: 1.0878938436508179,grad_norm: 0.8551410115441204, iteration: 398660
loss: 0.9763185381889343,grad_norm: 0.8371304059381163, iteration: 398661
loss: 0.9772980213165283,grad_norm: 0.7096943774957949, iteration: 398662
loss: 1.0097002983093262,grad_norm: 0.7222625156668333, iteration: 398663
loss: 1.002061128616333,grad_norm: 0.999999003103338, iteration: 398664
loss: 1.0310488939285278,grad_norm: 0.702261653546106, iteration: 398665
loss: 1.0021229982376099,grad_norm: 0.9026502648529793, iteration: 398666
loss: 1.0724505186080933,grad_norm: 0.8300947893120882, iteration: 398667
loss: 1.0058164596557617,grad_norm: 0.7533555432746897, iteration: 398668
loss: 0.9791972637176514,grad_norm: 0.8329131407468569, iteration: 398669
loss: 1.0801689624786377,grad_norm: 0.9999990842708754, iteration: 398670
loss: 0.9795106053352356,grad_norm: 0.7341276172671114, iteration: 398671
loss: 1.0107563734054565,grad_norm: 0.8896280262045173, iteration: 398672
loss: 0.9962955713272095,grad_norm: 0.7771188786672675, iteration: 398673
loss: 1.0222938060760498,grad_norm: 0.8168731774440828, iteration: 398674
loss: 0.9895064830780029,grad_norm: 0.7748765044182329, iteration: 398675
loss: 0.952214241027832,grad_norm: 0.781369191634706, iteration: 398676
loss: 1.0056365728378296,grad_norm: 0.9999995031303868, iteration: 398677
loss: 1.050625205039978,grad_norm: 0.8711233718415435, iteration: 398678
loss: 1.0131474733352661,grad_norm: 0.9999992339635912, iteration: 398679
loss: 0.9993634223937988,grad_norm: 0.681677249833645, iteration: 398680
loss: 0.9836084246635437,grad_norm: 0.6720804321820145, iteration: 398681
loss: 1.0061243772506714,grad_norm: 0.7304105035936161, iteration: 398682
loss: 1.027133584022522,grad_norm: 0.9999996971666849, iteration: 398683
loss: 0.9613069891929626,grad_norm: 0.7767719329619028, iteration: 398684
loss: 1.0012677907943726,grad_norm: 0.817037766997124, iteration: 398685
loss: 1.118330955505371,grad_norm: 0.9999994262004364, iteration: 398686
loss: 0.9796797633171082,grad_norm: 0.7817381347651353, iteration: 398687
loss: 1.0323947668075562,grad_norm: 0.9999990762874895, iteration: 398688
loss: 0.9867504835128784,grad_norm: 0.9199152945663471, iteration: 398689
loss: 0.9997226595878601,grad_norm: 0.8641050212415092, iteration: 398690
loss: 0.9841465950012207,grad_norm: 0.9610668284740881, iteration: 398691
loss: 0.9821044206619263,grad_norm: 0.8427041552835618, iteration: 398692
loss: 1.0008527040481567,grad_norm: 0.6693526212109149, iteration: 398693
loss: 1.0022848844528198,grad_norm: 0.883279219614856, iteration: 398694
loss: 0.9723218679428101,grad_norm: 0.7304067087126459, iteration: 398695
loss: 1.0388132333755493,grad_norm: 0.9999998721383125, iteration: 398696
loss: 0.9933194518089294,grad_norm: 0.9999991744760541, iteration: 398697
loss: 1.0057913064956665,grad_norm: 0.8335398910035201, iteration: 398698
loss: 0.9697438478469849,grad_norm: 0.9999996488487666, iteration: 398699
loss: 0.9907677173614502,grad_norm: 0.883689171811274, iteration: 398700
loss: 0.9955637454986572,grad_norm: 0.7615162178695256, iteration: 398701
loss: 1.0278445482254028,grad_norm: 0.833717601105476, iteration: 398702
loss: 0.9877117872238159,grad_norm: 0.7332771521847098, iteration: 398703
loss: 0.9572851657867432,grad_norm: 0.8097122212650223, iteration: 398704
loss: 1.0633269548416138,grad_norm: 0.9999999136163188, iteration: 398705
loss: 1.0090035200119019,grad_norm: 0.929632357957917, iteration: 398706
loss: 0.9857752323150635,grad_norm: 0.78732716160608, iteration: 398707
loss: 0.9892253875732422,grad_norm: 0.8407516780993919, iteration: 398708
loss: 1.0010716915130615,grad_norm: 0.957338112529377, iteration: 398709
loss: 1.016699194908142,grad_norm: 0.8441997629393595, iteration: 398710
loss: 1.0082371234893799,grad_norm: 0.840699018072328, iteration: 398711
loss: 1.1630078554153442,grad_norm: 0.9999992625394231, iteration: 398712
loss: 1.0211048126220703,grad_norm: 0.7178701251406944, iteration: 398713
loss: 1.0374696254730225,grad_norm: 0.9999998986588181, iteration: 398714
loss: 1.0010342597961426,grad_norm: 0.9999992222375275, iteration: 398715
loss: 1.030992031097412,grad_norm: 0.9107513933186717, iteration: 398716
loss: 1.0212322473526,grad_norm: 0.8454012423782523, iteration: 398717
loss: 1.0193357467651367,grad_norm: 0.8791471613373775, iteration: 398718
loss: 1.1012606620788574,grad_norm: 0.999999738819062, iteration: 398719
loss: 1.0244370698928833,grad_norm: 0.7187119241321562, iteration: 398720
loss: 1.0055352449417114,grad_norm: 0.620002856764807, iteration: 398721
loss: 0.9858539700508118,grad_norm: 0.6523179419166935, iteration: 398722
loss: 0.9631052613258362,grad_norm: 0.7772232594464388, iteration: 398723
loss: 0.9883323907852173,grad_norm: 0.7134195161443914, iteration: 398724
loss: 1.0234400033950806,grad_norm: 0.9999998640734581, iteration: 398725
loss: 1.0720107555389404,grad_norm: 0.740021698528489, iteration: 398726
loss: 1.0509710311889648,grad_norm: 0.9905578924086308, iteration: 398727
loss: 1.033011794090271,grad_norm: 0.9195783597485337, iteration: 398728
loss: 1.0003690719604492,grad_norm: 0.9999993564083812, iteration: 398729
loss: 1.0106921195983887,grad_norm: 0.8763023573253501, iteration: 398730
loss: 0.9947212338447571,grad_norm: 0.8290113022712097, iteration: 398731
loss: 1.05657958984375,grad_norm: 0.9999996200400983, iteration: 398732
loss: 0.9891079664230347,grad_norm: 0.7747170614636559, iteration: 398733
loss: 0.9466478824615479,grad_norm: 0.6211252383858771, iteration: 398734
loss: 1.0210589170455933,grad_norm: 0.8062586690177759, iteration: 398735
loss: 1.0159393548965454,grad_norm: 0.6275618240116408, iteration: 398736
loss: 1.0719026327133179,grad_norm: 0.9999995220843503, iteration: 398737
loss: 1.0299986600875854,grad_norm: 0.7540899061973015, iteration: 398738
loss: 1.028303623199463,grad_norm: 0.8305334562277316, iteration: 398739
loss: 0.9751789569854736,grad_norm: 0.8414055993251845, iteration: 398740
loss: 1.0023794174194336,grad_norm: 0.9306603419973093, iteration: 398741
loss: 1.0144565105438232,grad_norm: 0.9716674329351841, iteration: 398742
loss: 0.9836004376411438,grad_norm: 0.789040242311793, iteration: 398743
loss: 0.9856886267662048,grad_norm: 0.7607940373229394, iteration: 398744
loss: 1.032545804977417,grad_norm: 0.664414833793571, iteration: 398745
loss: 0.997061550617218,grad_norm: 0.8334389084400436, iteration: 398746
loss: 0.9887122511863708,grad_norm: 0.8947903386474307, iteration: 398747
loss: 1.008632779121399,grad_norm: 0.9849408014053237, iteration: 398748
loss: 0.9941080212593079,grad_norm: 0.7649282003061283, iteration: 398749
loss: 0.9882311224937439,grad_norm: 0.8253955593887379, iteration: 398750
loss: 1.0018343925476074,grad_norm: 0.9999990681884444, iteration: 398751
loss: 0.9840282201766968,grad_norm: 0.8354734142185454, iteration: 398752
loss: 0.9912514686584473,grad_norm: 0.6804396855379639, iteration: 398753
loss: 1.0176438093185425,grad_norm: 0.8204643115914353, iteration: 398754
loss: 1.038166880607605,grad_norm: 0.9999988476757297, iteration: 398755
loss: 1.0877755880355835,grad_norm: 0.8240378378429276, iteration: 398756
loss: 0.9567596912384033,grad_norm: 0.8347108750841387, iteration: 398757
loss: 0.9925393462181091,grad_norm: 0.8588570362643058, iteration: 398758
loss: 1.0259544849395752,grad_norm: 0.9271164275654825, iteration: 398759
loss: 1.037111759185791,grad_norm: 0.7616588812855745, iteration: 398760
loss: 1.0250468254089355,grad_norm: 0.860741034864632, iteration: 398761
loss: 1.0146300792694092,grad_norm: 0.7253488997880603, iteration: 398762
loss: 0.9949958324432373,grad_norm: 0.7502746076055306, iteration: 398763
loss: 1.0156242847442627,grad_norm: 0.8881694701436647, iteration: 398764
loss: 0.991421103477478,grad_norm: 0.857820773219063, iteration: 398765
loss: 0.9881716966629028,grad_norm: 0.900021868679633, iteration: 398766
loss: 1.0198040008544922,grad_norm: 0.7738746390384388, iteration: 398767
loss: 0.9977925419807434,grad_norm: 0.827331185640981, iteration: 398768
loss: 1.000059723854065,grad_norm: 0.9999991133141878, iteration: 398769
loss: 1.0398043394088745,grad_norm: 0.9999997000502413, iteration: 398770
loss: 1.0514562129974365,grad_norm: 0.8422423630853112, iteration: 398771
loss: 0.9774714112281799,grad_norm: 0.7062511192914334, iteration: 398772
loss: 1.015973687171936,grad_norm: 0.9999995414435049, iteration: 398773
loss: 1.0431151390075684,grad_norm: 0.9999995883066297, iteration: 398774
loss: 1.0271674394607544,grad_norm: 0.799140657753354, iteration: 398775
loss: 0.9855455160140991,grad_norm: 0.8460447527754154, iteration: 398776
loss: 0.9712983965873718,grad_norm: 0.7050462411583056, iteration: 398777
loss: 1.014723777770996,grad_norm: 0.734673312894624, iteration: 398778
loss: 0.9755405783653259,grad_norm: 0.8474372121918833, iteration: 398779
loss: 1.0150130987167358,grad_norm: 0.7369876936187563, iteration: 398780
loss: 0.9805110692977905,grad_norm: 0.8491916965686531, iteration: 398781
loss: 1.0368648767471313,grad_norm: 0.8693139590255168, iteration: 398782
loss: 1.0377790927886963,grad_norm: 0.9999991217557542, iteration: 398783
loss: 0.975961446762085,grad_norm: 0.9999991168635857, iteration: 398784
loss: 0.9915854334831238,grad_norm: 0.817920125512215, iteration: 398785
loss: 1.0064376592636108,grad_norm: 0.8427611418315011, iteration: 398786
loss: 0.9969738721847534,grad_norm: 0.7711377206017437, iteration: 398787
loss: 0.976917028427124,grad_norm: 0.7947612864366147, iteration: 398788
loss: 1.0242619514465332,grad_norm: 0.9040165759886495, iteration: 398789
loss: 1.0526198148727417,grad_norm: 0.8916229690076133, iteration: 398790
loss: 0.9423163533210754,grad_norm: 0.7775238291446008, iteration: 398791
loss: 0.9778463840484619,grad_norm: 0.6796423191064297, iteration: 398792
loss: 0.9712021350860596,grad_norm: 0.7349341050962522, iteration: 398793
loss: 1.0081735849380493,grad_norm: 0.7658321565197227, iteration: 398794
loss: 1.0007878541946411,grad_norm: 0.7752598020661696, iteration: 398795
loss: 1.0477025508880615,grad_norm: 0.9999999855565807, iteration: 398796
loss: 1.017598032951355,grad_norm: 0.6893083299374417, iteration: 398797
loss: 0.9630600810050964,grad_norm: 0.7451669118116844, iteration: 398798
loss: 0.9789016246795654,grad_norm: 0.7541945755524117, iteration: 398799
loss: 1.068498134613037,grad_norm: 0.9999998157232896, iteration: 398800
loss: 1.0086948871612549,grad_norm: 0.9999991410917052, iteration: 398801
loss: 1.0237184762954712,grad_norm: 0.9601687596847457, iteration: 398802
loss: 1.0147147178649902,grad_norm: 0.7161199194045877, iteration: 398803
loss: 0.958600640296936,grad_norm: 0.8461988752288049, iteration: 398804
loss: 1.0292620658874512,grad_norm: 0.7256559976683804, iteration: 398805
loss: 1.0156995058059692,grad_norm: 0.6829287547926622, iteration: 398806
loss: 1.0706827640533447,grad_norm: 0.7266943322988292, iteration: 398807
loss: 1.076183795928955,grad_norm: 0.8087449828956549, iteration: 398808
loss: 1.0059081315994263,grad_norm: 0.9999999813614606, iteration: 398809
loss: 1.067966341972351,grad_norm: 0.8089156295796333, iteration: 398810
loss: 1.0006879568099976,grad_norm: 0.7661696277742296, iteration: 398811
loss: 1.0249704122543335,grad_norm: 0.875476817291956, iteration: 398812
loss: 0.9781988263130188,grad_norm: 0.7744064233698637, iteration: 398813
loss: 1.0127151012420654,grad_norm: 0.9018322896884943, iteration: 398814
loss: 0.9836970567703247,grad_norm: 0.788610250839821, iteration: 398815
loss: 1.047365665435791,grad_norm: 0.9999990971949025, iteration: 398816
loss: 1.0243107080459595,grad_norm: 0.7433962305951041, iteration: 398817
loss: 0.9968323111534119,grad_norm: 0.8487012615235973, iteration: 398818
loss: 0.9681118726730347,grad_norm: 0.8644840156707875, iteration: 398819
loss: 1.010450839996338,grad_norm: 0.7546849970206728, iteration: 398820
loss: 0.9944781064987183,grad_norm: 0.9999993064192653, iteration: 398821
loss: 1.0090525150299072,grad_norm: 0.8586411047298393, iteration: 398822
loss: 1.018161416053772,grad_norm: 0.9999990863599216, iteration: 398823
loss: 0.9839215874671936,grad_norm: 0.7445620276730928, iteration: 398824
loss: 1.0040168762207031,grad_norm: 0.8698584198235072, iteration: 398825
loss: 0.9926378726959229,grad_norm: 0.9309558975453895, iteration: 398826
loss: 1.0038312673568726,grad_norm: 0.7407773686502068, iteration: 398827
loss: 0.9443988800048828,grad_norm: 0.8185041769543843, iteration: 398828
loss: 1.0338492393493652,grad_norm: 0.6423807759741371, iteration: 398829
loss: 0.9990044832229614,grad_norm: 0.8694366821999531, iteration: 398830
loss: 0.9845923185348511,grad_norm: 0.7023043930983225, iteration: 398831
loss: 1.0021556615829468,grad_norm: 0.7286754533164502, iteration: 398832
loss: 1.0137995481491089,grad_norm: 0.6858271658090715, iteration: 398833
loss: 0.9609958529472351,grad_norm: 0.8683668114043286, iteration: 398834
loss: 1.027287244796753,grad_norm: 0.8519521378869624, iteration: 398835
loss: 0.9848448634147644,grad_norm: 0.8093002576095821, iteration: 398836
loss: 1.029296875,grad_norm: 0.8413199907710128, iteration: 398837
loss: 0.957869291305542,grad_norm: 0.8040698536696922, iteration: 398838
loss: 1.0003621578216553,grad_norm: 0.7359523161024356, iteration: 398839
loss: 1.0205767154693604,grad_norm: 0.7498025362425088, iteration: 398840
loss: 1.0328367948532104,grad_norm: 0.7975615456966227, iteration: 398841
loss: 0.9723082184791565,grad_norm: 0.9999992432696131, iteration: 398842
loss: 1.0238662958145142,grad_norm: 0.7263128582968258, iteration: 398843
loss: 0.9784342050552368,grad_norm: 0.8676388467014364, iteration: 398844
loss: 0.9903867840766907,grad_norm: 0.8368244639962034, iteration: 398845
loss: 1.0860345363616943,grad_norm: 0.8962909105486575, iteration: 398846
loss: 0.9988612532615662,grad_norm: 0.8895946669333964, iteration: 398847
loss: 0.9829030632972717,grad_norm: 0.6948857571632607, iteration: 398848
loss: 0.9963299036026001,grad_norm: 0.8680971633287546, iteration: 398849
loss: 1.0178868770599365,grad_norm: 0.7873108975697892, iteration: 398850
loss: 1.0670750141143799,grad_norm: 0.9070705309316828, iteration: 398851
loss: 0.9870849251747131,grad_norm: 0.8147957497082159, iteration: 398852
loss: 0.9886359572410583,grad_norm: 0.8087152992386365, iteration: 398853
loss: 1.0247331857681274,grad_norm: 0.7287292380710158, iteration: 398854
loss: 0.9886471629142761,grad_norm: 0.8766455968700808, iteration: 398855
loss: 0.9904491901397705,grad_norm: 0.8347578837892726, iteration: 398856
loss: 0.9786937236785889,grad_norm: 0.9215162842576313, iteration: 398857
loss: 0.9914247393608093,grad_norm: 0.6909535363017263, iteration: 398858
loss: 1.0103845596313477,grad_norm: 0.9496665824477704, iteration: 398859
loss: 1.0861068964004517,grad_norm: 0.9303612848835937, iteration: 398860
loss: 1.0097047090530396,grad_norm: 0.7364781903530012, iteration: 398861
loss: 0.9775733351707458,grad_norm: 0.6572832963899584, iteration: 398862
loss: 0.9856476187705994,grad_norm: 0.5923180132202499, iteration: 398863
loss: 0.9945356845855713,grad_norm: 0.8162910219179741, iteration: 398864
loss: 1.1457321643829346,grad_norm: 0.9999996509448343, iteration: 398865
loss: 1.0048432350158691,grad_norm: 0.9004479940987155, iteration: 398866
loss: 0.9696533679962158,grad_norm: 0.83549397950913, iteration: 398867
loss: 0.982257604598999,grad_norm: 0.7627651129404969, iteration: 398868
loss: 0.9997218251228333,grad_norm: 0.8388366192109344, iteration: 398869
loss: 0.9728822708129883,grad_norm: 0.7321628236094581, iteration: 398870
loss: 0.9523376226425171,grad_norm: 0.7912607950174763, iteration: 398871
loss: 1.0976901054382324,grad_norm: 0.7701202845495798, iteration: 398872
loss: 0.9823009967803955,grad_norm: 0.9004811416156474, iteration: 398873
loss: 0.9720740914344788,grad_norm: 0.699920653331372, iteration: 398874
loss: 1.040541648864746,grad_norm: 0.8616791649083444, iteration: 398875
loss: 0.997600793838501,grad_norm: 0.7207566438752541, iteration: 398876
loss: 0.9924662113189697,grad_norm: 0.800803023984506, iteration: 398877
loss: 1.1724870204925537,grad_norm: 0.9999994541409402, iteration: 398878
loss: 1.017807960510254,grad_norm: 0.9999998145113963, iteration: 398879
loss: 1.0122077465057373,grad_norm: 0.7664077810356343, iteration: 398880
loss: 1.0074039697647095,grad_norm: 0.7198297877185393, iteration: 398881
loss: 0.9501562714576721,grad_norm: 0.7583847879682473, iteration: 398882
loss: 1.03398859500885,grad_norm: 0.8329824400685941, iteration: 398883
loss: 1.0208581686019897,grad_norm: 0.9999999170741621, iteration: 398884
loss: 1.0226075649261475,grad_norm: 0.7509701701252509, iteration: 398885
loss: 0.9886698126792908,grad_norm: 0.8232478446235971, iteration: 398886
loss: 0.9777475595474243,grad_norm: 0.9999990355600755, iteration: 398887
loss: 1.0244522094726562,grad_norm: 0.9099319523364126, iteration: 398888
loss: 1.0012617111206055,grad_norm: 0.6757426101235152, iteration: 398889
loss: 1.0998002290725708,grad_norm: 0.999999261484464, iteration: 398890
loss: 1.027804970741272,grad_norm: 0.9291765727494802, iteration: 398891
loss: 1.0383566617965698,grad_norm: 0.9999992610636853, iteration: 398892
loss: 1.0100706815719604,grad_norm: 0.6966362095244613, iteration: 398893
loss: 1.0034042596817017,grad_norm: 0.8207750362752766, iteration: 398894
loss: 1.003612756729126,grad_norm: 0.7162227730545296, iteration: 398895
loss: 1.082373023033142,grad_norm: 0.9999994873473245, iteration: 398896
loss: 1.0007323026657104,grad_norm: 0.8224401179221615, iteration: 398897
loss: 1.0328162908554077,grad_norm: 0.9999995231274961, iteration: 398898
loss: 1.0414762496948242,grad_norm: 0.9999995834534725, iteration: 398899
loss: 0.999997615814209,grad_norm: 0.7727200278911901, iteration: 398900
loss: 1.0087708234786987,grad_norm: 0.8328213669640656, iteration: 398901
loss: 0.9996287822723389,grad_norm: 0.8163415074579694, iteration: 398902
loss: 1.023272156715393,grad_norm: 0.9999995381174157, iteration: 398903
loss: 1.0464880466461182,grad_norm: 0.9999996282456848, iteration: 398904
loss: 0.973084568977356,grad_norm: 0.7047070342880125, iteration: 398905
loss: 0.9610370397567749,grad_norm: 0.6772445700315605, iteration: 398906
loss: 0.9910078048706055,grad_norm: 0.6828266183723315, iteration: 398907
loss: 1.0223336219787598,grad_norm: 0.8186502302853108, iteration: 398908
loss: 0.9904511570930481,grad_norm: 0.9042854790083125, iteration: 398909
loss: 1.1258138418197632,grad_norm: 0.9999991309433579, iteration: 398910
loss: 1.022077202796936,grad_norm: 0.7418833260135169, iteration: 398911
loss: 1.0041749477386475,grad_norm: 0.9999989714018626, iteration: 398912
loss: 1.0243130922317505,grad_norm: 0.9999997796079702, iteration: 398913
loss: 1.0353018045425415,grad_norm: 0.7122643264960008, iteration: 398914
loss: 1.0377438068389893,grad_norm: 0.9999993150625716, iteration: 398915
loss: 1.1761775016784668,grad_norm: 0.9999997819513943, iteration: 398916
loss: 1.1083524227142334,grad_norm: 0.9999996884122069, iteration: 398917
loss: 0.9772482514381409,grad_norm: 0.7260499715049623, iteration: 398918
loss: 1.0204190015792847,grad_norm: 0.7217408313481246, iteration: 398919
loss: 1.052056908607483,grad_norm: 0.8997798499323261, iteration: 398920
loss: 1.043304204940796,grad_norm: 0.9999992445240182, iteration: 398921
loss: 0.9393613934516907,grad_norm: 0.7502879494690328, iteration: 398922
loss: 1.0078532695770264,grad_norm: 0.8889403583467779, iteration: 398923
loss: 0.984380304813385,grad_norm: 0.8160589635364938, iteration: 398924
loss: 1.000900149345398,grad_norm: 0.6479492810760402, iteration: 398925
loss: 0.9403930306434631,grad_norm: 0.6996259164070365, iteration: 398926
loss: 0.9944214820861816,grad_norm: 0.8733184086844387, iteration: 398927
loss: 0.9938302040100098,grad_norm: 0.8970967341493982, iteration: 398928
loss: 1.0293464660644531,grad_norm: 0.6988446396585876, iteration: 398929
loss: 0.9857321977615356,grad_norm: 0.7886493396297165, iteration: 398930
loss: 1.0320812463760376,grad_norm: 0.8975411368293945, iteration: 398931
loss: 1.095518708229065,grad_norm: 0.8041145307309172, iteration: 398932
loss: 1.0213958024978638,grad_norm: 0.9803621423087031, iteration: 398933
loss: 0.9923124313354492,grad_norm: 0.6081165988463224, iteration: 398934
loss: 1.0023702383041382,grad_norm: 0.8399601281310315, iteration: 398935
loss: 1.0335197448730469,grad_norm: 0.8177968776001798, iteration: 398936
loss: 1.003356695175171,grad_norm: 0.9363556381638646, iteration: 398937
loss: 1.0054892301559448,grad_norm: 0.9999995789744432, iteration: 398938
loss: 1.014249563217163,grad_norm: 0.777072863177139, iteration: 398939
loss: 1.0110489130020142,grad_norm: 0.7408817546760174, iteration: 398940
loss: 1.0092875957489014,grad_norm: 0.9999993049524027, iteration: 398941
loss: 1.0054678916931152,grad_norm: 0.8015584057874094, iteration: 398942
loss: 1.0583759546279907,grad_norm: 0.954775080996369, iteration: 398943
loss: 0.9917123317718506,grad_norm: 0.7994180822075654, iteration: 398944
loss: 1.0104739665985107,grad_norm: 0.9999991568048274, iteration: 398945
loss: 1.0120798349380493,grad_norm: 0.780415976488201, iteration: 398946
loss: 1.0312163829803467,grad_norm: 0.8014230897658683, iteration: 398947
loss: 0.9767827987670898,grad_norm: 0.999999946249089, iteration: 398948
loss: 1.0470424890518188,grad_norm: 0.8559660814492409, iteration: 398949
loss: 1.0241971015930176,grad_norm: 0.9999996127467821, iteration: 398950
loss: 1.0088571310043335,grad_norm: 0.6590585887052177, iteration: 398951
loss: 1.021203875541687,grad_norm: 0.9999992153399109, iteration: 398952
loss: 1.0059682130813599,grad_norm: 0.8525792533606098, iteration: 398953
loss: 0.9745656251907349,grad_norm: 0.8022784802055724, iteration: 398954
loss: 0.9828519821166992,grad_norm: 0.7830734428429488, iteration: 398955
loss: 0.9904428720474243,grad_norm: 0.798091418887445, iteration: 398956
loss: 0.9761185646057129,grad_norm: 0.7335367477088846, iteration: 398957
loss: 0.9944246411323547,grad_norm: 0.7713315087895771, iteration: 398958
loss: 0.9950043559074402,grad_norm: 0.8713416603762655, iteration: 398959
loss: 0.9721584320068359,grad_norm: 0.8083235931878421, iteration: 398960
loss: 0.9729827642440796,grad_norm: 0.6058364231626534, iteration: 398961
loss: 1.0175161361694336,grad_norm: 0.8340689880516834, iteration: 398962
loss: 1.0042431354522705,grad_norm: 0.8039364626054745, iteration: 398963
loss: 1.0409860610961914,grad_norm: 0.9932389763380834, iteration: 398964
loss: 1.072835087776184,grad_norm: 0.9999999603079521, iteration: 398965
loss: 0.96281898021698,grad_norm: 0.9999991232972449, iteration: 398966
loss: 1.0052520036697388,grad_norm: 0.9999995667828391, iteration: 398967
loss: 0.9893925189971924,grad_norm: 0.902117159430187, iteration: 398968
loss: 0.9883641004562378,grad_norm: 0.8451050174075103, iteration: 398969
loss: 1.0348782539367676,grad_norm: 0.8226268847388353, iteration: 398970
loss: 1.087422251701355,grad_norm: 0.9999990028493229, iteration: 398971
loss: 0.978860080242157,grad_norm: 0.9099622674232583, iteration: 398972
loss: 1.0024996995925903,grad_norm: 0.9999997969624113, iteration: 398973
loss: 1.0332260131835938,grad_norm: 0.7648825491668887, iteration: 398974
loss: 1.128524899482727,grad_norm: 0.9999993970332178, iteration: 398975
loss: 0.9767924547195435,grad_norm: 0.8385920876965969, iteration: 398976
loss: 1.0362683534622192,grad_norm: 0.8705394437846234, iteration: 398977
loss: 1.0366343259811401,grad_norm: 0.6810814994073742, iteration: 398978
loss: 1.0229803323745728,grad_norm: 0.882165703882156, iteration: 398979
loss: 1.0537430047988892,grad_norm: 0.9999994830125765, iteration: 398980
loss: 0.9969494938850403,grad_norm: 0.8942647918015655, iteration: 398981
loss: 1.0680921077728271,grad_norm: 0.7227996616811979, iteration: 398982
loss: 1.0913913249969482,grad_norm: 0.999999184227431, iteration: 398983
loss: 1.038834810256958,grad_norm: 0.7793852761836061, iteration: 398984
loss: 0.9941878318786621,grad_norm: 0.8479221391839115, iteration: 398985
loss: 1.0055872201919556,grad_norm: 0.6878921465590366, iteration: 398986
loss: 1.0201137065887451,grad_norm: 0.6852387036524389, iteration: 398987
loss: 1.0181437730789185,grad_norm: 0.9999995648487835, iteration: 398988
loss: 0.9991061091423035,grad_norm: 0.7684968505749312, iteration: 398989
loss: 0.9871852397918701,grad_norm: 0.9764516317633812, iteration: 398990
loss: 0.9788863062858582,grad_norm: 0.6197304977198782, iteration: 398991
loss: 1.0550925731658936,grad_norm: 0.9999998994192317, iteration: 398992
loss: 0.9623411297798157,grad_norm: 0.8900545745374936, iteration: 398993
loss: 1.0574277639389038,grad_norm: 0.9244253488617415, iteration: 398994
loss: 1.0200098752975464,grad_norm: 0.8083592899075622, iteration: 398995
loss: 0.9703496098518372,grad_norm: 0.7310973348329148, iteration: 398996
loss: 0.9631592631340027,grad_norm: 0.7693124329270554, iteration: 398997
loss: 0.9795931577682495,grad_norm: 0.7426038644623961, iteration: 398998
loss: 0.9894790053367615,grad_norm: 0.8159910286268722, iteration: 398999
loss: 0.9975053668022156,grad_norm: 0.8129635172871499, iteration: 399000
loss: 0.9924731254577637,grad_norm: 0.7719198719508297, iteration: 399001
loss: 1.0066717863082886,grad_norm: 0.888148800773971, iteration: 399002
loss: 1.0147184133529663,grad_norm: 0.7863045921942761, iteration: 399003
loss: 1.0178475379943848,grad_norm: 0.7953150028915306, iteration: 399004
loss: 1.021733045578003,grad_norm: 0.9999998522268444, iteration: 399005
loss: 0.9875760078430176,grad_norm: 0.7968322160655166, iteration: 399006
loss: 0.9844486117362976,grad_norm: 0.6954096763905131, iteration: 399007
loss: 0.9965894818305969,grad_norm: 0.9378069215323056, iteration: 399008
loss: 1.0236103534698486,grad_norm: 0.849653288885751, iteration: 399009
loss: 0.9882846474647522,grad_norm: 0.7002785771181246, iteration: 399010
loss: 1.0070749521255493,grad_norm: 0.8390221729466413, iteration: 399011
loss: 1.040647268295288,grad_norm: 0.9999993130588883, iteration: 399012
loss: 0.9786805510520935,grad_norm: 0.7767592420886301, iteration: 399013
loss: 1.0170882940292358,grad_norm: 0.7296936318162232, iteration: 399014
loss: 0.9926033616065979,grad_norm: 0.8702341809759833, iteration: 399015
loss: 1.0202137231826782,grad_norm: 0.9999991284425946, iteration: 399016
loss: 0.9843838214874268,grad_norm: 0.8018685661775167, iteration: 399017
loss: 1.0135085582733154,grad_norm: 0.8132387639373103, iteration: 399018
loss: 0.983492910861969,grad_norm: 0.7280660717038995, iteration: 399019
loss: 0.9999822378158569,grad_norm: 0.9999998894743135, iteration: 399020
loss: 1.0031629800796509,grad_norm: 0.7455236407745157, iteration: 399021
loss: 1.059981346130371,grad_norm: 0.9999991945038891, iteration: 399022
loss: 0.9612318277359009,grad_norm: 0.7715461732551832, iteration: 399023
loss: 1.0434305667877197,grad_norm: 0.9999995771986513, iteration: 399024
loss: 1.0288935899734497,grad_norm: 0.999999068446656, iteration: 399025
loss: 1.091611385345459,grad_norm: 0.984023333503761, iteration: 399026
loss: 0.9627425074577332,grad_norm: 0.898677240903244, iteration: 399027
loss: 1.0670421123504639,grad_norm: 0.838194736032651, iteration: 399028
loss: 0.9784278273582458,grad_norm: 0.7737105714165315, iteration: 399029
loss: 1.0632065534591675,grad_norm: 0.9999991412041976, iteration: 399030
loss: 0.9670039415359497,grad_norm: 0.6821116586017352, iteration: 399031
loss: 1.0331023931503296,grad_norm: 0.8413680912065495, iteration: 399032
loss: 0.9776920080184937,grad_norm: 0.8777666106836893, iteration: 399033
loss: 1.0044918060302734,grad_norm: 0.8230728293712772, iteration: 399034
loss: 1.0273643732070923,grad_norm: 0.9999996059888773, iteration: 399035
loss: 1.0727651119232178,grad_norm: 0.9843289127007268, iteration: 399036
loss: 0.9959267973899841,grad_norm: 0.6600263945119407, iteration: 399037
loss: 1.0874847173690796,grad_norm: 0.9999991617193544, iteration: 399038
loss: 1.1481029987335205,grad_norm: 0.8599702559606504, iteration: 399039
loss: 0.9630106091499329,grad_norm: 0.8304108846492195, iteration: 399040
loss: 0.9984362721443176,grad_norm: 0.7382923070302674, iteration: 399041
loss: 1.0266671180725098,grad_norm: 0.6948110691962283, iteration: 399042
loss: 1.0120986700057983,grad_norm: 0.887916176259425, iteration: 399043
loss: 1.154043436050415,grad_norm: 0.9999996143297115, iteration: 399044
loss: 1.1873852014541626,grad_norm: 0.9999997149762841, iteration: 399045
loss: 0.96364426612854,grad_norm: 0.7836674067996254, iteration: 399046
loss: 1.0449284315109253,grad_norm: 0.9999995509203712, iteration: 399047
loss: 1.0516124963760376,grad_norm: 0.7352821861380011, iteration: 399048
loss: 0.9822858572006226,grad_norm: 0.7035561941415405, iteration: 399049
loss: 1.0620479583740234,grad_norm: 0.9602151692358354, iteration: 399050
loss: 0.9743162393569946,grad_norm: 0.9999999090022952, iteration: 399051
loss: 1.0352203845977783,grad_norm: 0.99999919971024, iteration: 399052
loss: 1.062929630279541,grad_norm: 0.9847250650891917, iteration: 399053
loss: 0.9976908564567566,grad_norm: 0.7697275911859779, iteration: 399054
loss: 1.0054075717926025,grad_norm: 0.8275015091251123, iteration: 399055
loss: 1.0321345329284668,grad_norm: 0.9999996580714309, iteration: 399056
loss: 0.9691284894943237,grad_norm: 0.8589822077011291, iteration: 399057
loss: 1.0546338558197021,grad_norm: 0.8237609217696862, iteration: 399058
loss: 1.033652424812317,grad_norm: 0.9999994005287399, iteration: 399059
loss: 0.9802799820899963,grad_norm: 0.7230024426084234, iteration: 399060
loss: 1.0217540264129639,grad_norm: 0.9999991609391343, iteration: 399061
loss: 0.9838929176330566,grad_norm: 0.8187641262680838, iteration: 399062
loss: 0.9966508746147156,grad_norm: 0.8186739018698652, iteration: 399063
loss: 1.0189099311828613,grad_norm: 0.999999852249999, iteration: 399064
loss: 1.0315061807632446,grad_norm: 0.8072659268815184, iteration: 399065
loss: 0.9807400703430176,grad_norm: 0.8531191616102934, iteration: 399066
loss: 0.9583899974822998,grad_norm: 0.9999999554290362, iteration: 399067
loss: 0.943722128868103,grad_norm: 0.9742686464182763, iteration: 399068
loss: 0.990577757358551,grad_norm: 0.999999029735578, iteration: 399069
loss: 1.0198590755462646,grad_norm: 0.8244313357445254, iteration: 399070
loss: 1.0595444440841675,grad_norm: 0.9999991820439093, iteration: 399071
loss: 1.0523972511291504,grad_norm: 0.8046969059533389, iteration: 399072
loss: 1.0294073820114136,grad_norm: 0.9999997788469297, iteration: 399073
loss: 0.9933405518531799,grad_norm: 0.9312030730915534, iteration: 399074
loss: 1.0340474843978882,grad_norm: 0.9636598861874067, iteration: 399075
loss: 0.988783061504364,grad_norm: 0.8615344960285449, iteration: 399076
loss: 0.9703851938247681,grad_norm: 0.6828178355438066, iteration: 399077
loss: 1.05275297164917,grad_norm: 0.9999999050386924, iteration: 399078
loss: 1.0047173500061035,grad_norm: 0.6851913440520425, iteration: 399079
loss: 1.058125615119934,grad_norm: 0.8572747000799396, iteration: 399080
loss: 1.0320409536361694,grad_norm: 0.8015244835312172, iteration: 399081
loss: 0.9969064593315125,grad_norm: 0.7498942800079593, iteration: 399082
loss: 1.0220965147018433,grad_norm: 0.7603661760887894, iteration: 399083
loss: 0.9850236177444458,grad_norm: 0.73482342987554, iteration: 399084
loss: 1.0059781074523926,grad_norm: 0.8437046571443083, iteration: 399085
loss: 1.0254058837890625,grad_norm: 0.6615912581112087, iteration: 399086
loss: 0.996248722076416,grad_norm: 0.9436825440721861, iteration: 399087
loss: 1.0506342649459839,grad_norm: 0.999999375218764, iteration: 399088
loss: 1.1498827934265137,grad_norm: 1.0000000653693284, iteration: 399089
loss: 0.9876677393913269,grad_norm: 0.7735949496980918, iteration: 399090
loss: 1.0143098831176758,grad_norm: 0.8136104073117891, iteration: 399091
loss: 1.017282485961914,grad_norm: 0.8673511617220772, iteration: 399092
loss: 0.9741277694702148,grad_norm: 0.7667094794100299, iteration: 399093
loss: 1.017738938331604,grad_norm: 0.7292547078031786, iteration: 399094
loss: 1.0461361408233643,grad_norm: 0.9169842790922835, iteration: 399095
loss: 0.9779186844825745,grad_norm: 0.8344081902962899, iteration: 399096
loss: 1.0377386808395386,grad_norm: 0.8750999224595539, iteration: 399097
loss: 0.967969536781311,grad_norm: 0.9185106613073533, iteration: 399098
loss: 0.9682058095932007,grad_norm: 0.8341978238421671, iteration: 399099
loss: 0.999294638633728,grad_norm: 0.6851874364176278, iteration: 399100
loss: 1.0445969104766846,grad_norm: 0.9999991870609121, iteration: 399101
loss: 1.0503426790237427,grad_norm: 0.9999993029866533, iteration: 399102
loss: 1.0349006652832031,grad_norm: 0.8440378107025297, iteration: 399103
loss: 1.021497368812561,grad_norm: 0.9999996250562697, iteration: 399104
loss: 0.9877963662147522,grad_norm: 0.822532788769619, iteration: 399105
loss: 1.1064385175704956,grad_norm: 0.9999993692151616, iteration: 399106
loss: 1.0338677167892456,grad_norm: 0.8531553980364153, iteration: 399107
loss: 1.0864301919937134,grad_norm: 1.0000000593933278, iteration: 399108
loss: 1.0737333297729492,grad_norm: 0.9999467513264131, iteration: 399109
loss: 1.0332337617874146,grad_norm: 0.8847833343699453, iteration: 399110
loss: 0.9904107451438904,grad_norm: 0.9999993047161886, iteration: 399111
loss: 1.05935800075531,grad_norm: 0.9999998532584988, iteration: 399112
loss: 1.0678497552871704,grad_norm: 0.999999884577112, iteration: 399113
loss: 0.969152569770813,grad_norm: 0.6463571123937231, iteration: 399114
loss: 1.0123484134674072,grad_norm: 0.9539380296181964, iteration: 399115
loss: 1.0347148180007935,grad_norm: 0.860299122422421, iteration: 399116
loss: 0.9953298568725586,grad_norm: 0.7401314530652061, iteration: 399117
loss: 0.9779124855995178,grad_norm: 0.7580724340817724, iteration: 399118
loss: 1.0239468812942505,grad_norm: 0.8118809604578873, iteration: 399119
loss: 1.0179851055145264,grad_norm: 0.8747507785387642, iteration: 399120
loss: 1.0740690231323242,grad_norm: 0.8115874878446082, iteration: 399121
loss: 0.9791867136955261,grad_norm: 0.846019452852145, iteration: 399122
loss: 0.9458966255187988,grad_norm: 0.6715513339264668, iteration: 399123
loss: 1.0108740329742432,grad_norm: 0.9112031836819563, iteration: 399124
loss: 1.0035951137542725,grad_norm: 0.9853696525826054, iteration: 399125
loss: 0.9787781834602356,grad_norm: 0.8526329640046884, iteration: 399126
loss: 0.9961621165275574,grad_norm: 0.7244167802308856, iteration: 399127
loss: 0.9756997227668762,grad_norm: 0.7204702939653322, iteration: 399128
loss: 0.9971885085105896,grad_norm: 0.6569600980309982, iteration: 399129
loss: 1.1212865114212036,grad_norm: 0.9331338142244193, iteration: 399130
loss: 1.0349212884902954,grad_norm: 0.8915112305092215, iteration: 399131
loss: 1.049448847770691,grad_norm: 0.7432537754848463, iteration: 399132
loss: 1.032974362373352,grad_norm: 0.7539981224172898, iteration: 399133
loss: 1.0016595125198364,grad_norm: 0.8297991000060719, iteration: 399134
loss: 1.020645022392273,grad_norm: 0.7717195216735342, iteration: 399135
loss: 1.0074198246002197,grad_norm: 0.7277970281926448, iteration: 399136
loss: 1.0120384693145752,grad_norm: 0.9999991253920052, iteration: 399137
loss: 1.0103328227996826,grad_norm: 0.8444098792728912, iteration: 399138
loss: 1.1083357334136963,grad_norm: 0.9999991841369283, iteration: 399139
loss: 1.0081579685211182,grad_norm: 0.7673344699452058, iteration: 399140
loss: 1.0087555646896362,grad_norm: 0.8265740854047227, iteration: 399141
loss: 0.9790670871734619,grad_norm: 0.7326476101841415, iteration: 399142
loss: 0.9773370623588562,grad_norm: 0.7039554998013074, iteration: 399143
loss: 1.015211582183838,grad_norm: 0.8767329619819828, iteration: 399144
loss: 1.0046921968460083,grad_norm: 0.9929149558730684, iteration: 399145
loss: 0.9750960469245911,grad_norm: 0.8641282113830437, iteration: 399146
loss: 0.9901295304298401,grad_norm: 0.7636307115713665, iteration: 399147
loss: 0.9811968207359314,grad_norm: 0.9803550082694973, iteration: 399148
loss: 0.9892608523368835,grad_norm: 0.8102558349103859, iteration: 399149
loss: 1.0400657653808594,grad_norm: 0.767388921742188, iteration: 399150
loss: 0.9818275570869446,grad_norm: 0.8066177153404135, iteration: 399151
loss: 1.0343737602233887,grad_norm: 0.8134511704775913, iteration: 399152
loss: 0.9838019609451294,grad_norm: 0.7231665353532195, iteration: 399153
loss: 1.0057154893875122,grad_norm: 0.8095249987576235, iteration: 399154
loss: 0.9790898561477661,grad_norm: 0.82261399064035, iteration: 399155
loss: 0.9721710681915283,grad_norm: 0.8931781410993614, iteration: 399156
loss: 0.965468168258667,grad_norm: 0.8009250042073027, iteration: 399157
loss: 1.025987982749939,grad_norm: 0.9999998834203156, iteration: 399158
loss: 0.9497242569923401,grad_norm: 0.8725753521122092, iteration: 399159
loss: 1.0157941579818726,grad_norm: 0.9999992181121151, iteration: 399160
loss: 1.01585054397583,grad_norm: 0.9676950722804157, iteration: 399161
loss: 1.043837308883667,grad_norm: 0.9999999994337668, iteration: 399162
loss: 0.999285101890564,grad_norm: 0.8556568259992564, iteration: 399163
loss: 0.9887656569480896,grad_norm: 0.8048784310368584, iteration: 399164
loss: 1.0188877582550049,grad_norm: 0.8883394593638357, iteration: 399165
loss: 0.9798221588134766,grad_norm: 0.7225913117913468, iteration: 399166
loss: 1.0775599479675293,grad_norm: 0.7425798003499039, iteration: 399167
loss: 1.1203945875167847,grad_norm: 0.7795332693456398, iteration: 399168
loss: 1.023095726966858,grad_norm: 0.9999992612301704, iteration: 399169
loss: 1.0932987928390503,grad_norm: 0.7998685295771452, iteration: 399170
loss: 0.9865371584892273,grad_norm: 0.7937091990233063, iteration: 399171
loss: 0.9685766696929932,grad_norm: 0.7581873517338008, iteration: 399172
loss: 0.9954853057861328,grad_norm: 0.7834512271022817, iteration: 399173
loss: 1.0205472707748413,grad_norm: 0.691660553201457, iteration: 399174
loss: 1.0951716899871826,grad_norm: 0.9999996223269497, iteration: 399175
loss: 0.9960405230522156,grad_norm: 0.8716925744099863, iteration: 399176
loss: 1.1324987411499023,grad_norm: 0.9999995013168225, iteration: 399177
loss: 1.0105854272842407,grad_norm: 0.9999991524997512, iteration: 399178
loss: 0.9976643919944763,grad_norm: 0.7178103137790474, iteration: 399179
loss: 1.0112413167953491,grad_norm: 0.8388212150309001, iteration: 399180
loss: 0.9945855736732483,grad_norm: 0.7375680242578566, iteration: 399181
loss: 0.9691219925880432,grad_norm: 0.7148763800505488, iteration: 399182
loss: 0.991274893283844,grad_norm: 0.8038408850036848, iteration: 399183
loss: 0.9661123156547546,grad_norm: 0.6095346090224708, iteration: 399184
loss: 1.0074350833892822,grad_norm: 0.8197778336630728, iteration: 399185
loss: 1.0172113180160522,grad_norm: 0.9443412331682421, iteration: 399186
loss: 0.9857425689697266,grad_norm: 0.7666204518138563, iteration: 399187
loss: 0.9748300313949585,grad_norm: 0.8985585950353673, iteration: 399188
loss: 0.9753798246383667,grad_norm: 0.6634461675169094, iteration: 399189
loss: 1.0292751789093018,grad_norm: 0.9924412404871561, iteration: 399190
loss: 1.032379150390625,grad_norm: 0.9075395617596193, iteration: 399191
loss: 0.9900288581848145,grad_norm: 0.8366296069438609, iteration: 399192
loss: 0.9863560795783997,grad_norm: 0.6974779282275172, iteration: 399193
loss: 1.0083848237991333,grad_norm: 0.7634796958405561, iteration: 399194
loss: 1.0632063150405884,grad_norm: 0.8584563592461741, iteration: 399195
loss: 1.0336437225341797,grad_norm: 0.8831858026392728, iteration: 399196
loss: 0.9978629946708679,grad_norm: 0.6104014429389568, iteration: 399197
loss: 1.0046406984329224,grad_norm: 0.7692388662986182, iteration: 399198
loss: 0.9888876676559448,grad_norm: 0.9999989958616011, iteration: 399199
loss: 0.951778769493103,grad_norm: 0.8537505413500543, iteration: 399200
loss: 0.9804219007492065,grad_norm: 0.9999999958562572, iteration: 399201
loss: 1.058072805404663,grad_norm: 0.9999992411082036, iteration: 399202
loss: 1.0278619527816772,grad_norm: 0.7610664095949305, iteration: 399203
loss: 1.036228895187378,grad_norm: 0.8684156419201068, iteration: 399204
loss: 1.0080013275146484,grad_norm: 0.8632096560466602, iteration: 399205
loss: 1.0463346242904663,grad_norm: 0.9999996157830433, iteration: 399206
loss: 0.991303026676178,grad_norm: 0.7812142767681232, iteration: 399207
loss: 1.0324088335037231,grad_norm: 0.817733359757762, iteration: 399208
loss: 1.0243562459945679,grad_norm: 0.8874409347192216, iteration: 399209
loss: 1.0656780004501343,grad_norm: 0.7522546814202127, iteration: 399210
loss: 1.0343260765075684,grad_norm: 0.8983175026921262, iteration: 399211
loss: 0.9944205284118652,grad_norm: 0.7756555937268755, iteration: 399212
loss: 1.001158595085144,grad_norm: 0.9999990795004509, iteration: 399213
loss: 1.0010141134262085,grad_norm: 0.8492031354582383, iteration: 399214
loss: 0.9687961339950562,grad_norm: 0.7567159643865579, iteration: 399215
loss: 0.979124128818512,grad_norm: 0.6797478180221912, iteration: 399216
loss: 0.9775377511978149,grad_norm: 0.9999999011210504, iteration: 399217
loss: 0.9829506874084473,grad_norm: 0.7911680109839, iteration: 399218
loss: 1.0182709693908691,grad_norm: 0.9999992906918536, iteration: 399219
loss: 1.0033658742904663,grad_norm: 0.7481360160248167, iteration: 399220
loss: 1.049007773399353,grad_norm: 0.9999997392851255, iteration: 399221
loss: 0.9931192994117737,grad_norm: 0.6339382210228524, iteration: 399222
loss: 0.9784772992134094,grad_norm: 0.7022366511532803, iteration: 399223
loss: 1.055282473564148,grad_norm: 0.9999995067509306, iteration: 399224
loss: 1.015596628189087,grad_norm: 0.8091232269882688, iteration: 399225
loss: 1.0135959386825562,grad_norm: 0.9773872844326053, iteration: 399226
loss: 0.9817090034484863,grad_norm: 0.8715755063726256, iteration: 399227
loss: 1.0615633726119995,grad_norm: 0.8154868804129515, iteration: 399228
loss: 0.9819366335868835,grad_norm: 0.8782455008657029, iteration: 399229
loss: 1.025830864906311,grad_norm: 0.7973646215041102, iteration: 399230
loss: 1.0191479921340942,grad_norm: 0.9141922326497949, iteration: 399231
loss: 1.0845555067062378,grad_norm: 0.8117796046426687, iteration: 399232
loss: 1.0085010528564453,grad_norm: 0.7470745615847518, iteration: 399233
loss: 0.9625105261802673,grad_norm: 0.7782293624164339, iteration: 399234
loss: 1.0161316394805908,grad_norm: 0.9999998357427521, iteration: 399235
loss: 0.9968865513801575,grad_norm: 0.9352933235784142, iteration: 399236
loss: 1.0136724710464478,grad_norm: 0.8045072843999984, iteration: 399237
loss: 0.9979985952377319,grad_norm: 0.8172372465286468, iteration: 399238
loss: 1.0234899520874023,grad_norm: 0.9217141126157111, iteration: 399239
loss: 1.1264798641204834,grad_norm: 0.8087250600224163, iteration: 399240
loss: 1.1059412956237793,grad_norm: 0.9719974785783778, iteration: 399241
loss: 0.9879631996154785,grad_norm: 0.8434531605170321, iteration: 399242
loss: 0.9646317362785339,grad_norm: 0.7745785488211444, iteration: 399243
loss: 1.0492616891860962,grad_norm: 0.7998165371947324, iteration: 399244
loss: 0.9660292267799377,grad_norm: 0.7713789505217536, iteration: 399245
loss: 1.0108357667922974,grad_norm: 0.9999992213136687, iteration: 399246
loss: 1.0102289915084839,grad_norm: 0.9999992477523808, iteration: 399247
loss: 0.9878795742988586,grad_norm: 0.7701028675339626, iteration: 399248
loss: 1.0314462184906006,grad_norm: 0.8032669904845072, iteration: 399249
loss: 1.0286781787872314,grad_norm: 0.8121704135030737, iteration: 399250
loss: 1.0317338705062866,grad_norm: 0.7522357090775699, iteration: 399251
loss: 0.9933947920799255,grad_norm: 0.8792405343947912, iteration: 399252
loss: 1.0452605485916138,grad_norm: 0.6744454586022578, iteration: 399253
loss: 1.0857493877410889,grad_norm: 0.999999097484435, iteration: 399254
loss: 1.046001672744751,grad_norm: 0.8714749347231777, iteration: 399255
loss: 1.0274627208709717,grad_norm: 0.9999992314798563, iteration: 399256
loss: 1.0094817876815796,grad_norm: 0.9999994480331557, iteration: 399257
loss: 1.0296598672866821,grad_norm: 0.6748490386231852, iteration: 399258
loss: 1.1460820436477661,grad_norm: 0.9284443831544232, iteration: 399259
loss: 1.0492026805877686,grad_norm: 0.8361437443814101, iteration: 399260
loss: 1.0696063041687012,grad_norm: 0.678814282264717, iteration: 399261
loss: 1.0073305368423462,grad_norm: 0.8087286819385553, iteration: 399262
loss: 1.0536869764328003,grad_norm: 0.9999994452045148, iteration: 399263
loss: 0.9845964312553406,grad_norm: 0.9999998607555701, iteration: 399264
loss: 1.0752335786819458,grad_norm: 0.8724421236831444, iteration: 399265
loss: 1.072313904762268,grad_norm: 0.7511508938978652, iteration: 399266
loss: 0.9626680612564087,grad_norm: 0.8986861886347755, iteration: 399267
loss: 0.9977249503135681,grad_norm: 0.9999991070451705, iteration: 399268
loss: 1.0681376457214355,grad_norm: 0.8979025667921072, iteration: 399269
loss: 1.1247401237487793,grad_norm: 0.9132253212440585, iteration: 399270
loss: 1.0856335163116455,grad_norm: 0.9999995448709105, iteration: 399271
loss: 1.005953311920166,grad_norm: 0.7091023555139829, iteration: 399272
loss: 1.007555603981018,grad_norm: 0.8451785268545975, iteration: 399273
loss: 1.109277606010437,grad_norm: 0.9999996596186415, iteration: 399274
loss: 1.0458133220672607,grad_norm: 0.877212395545659, iteration: 399275
loss: 1.069401502609253,grad_norm: 0.7006134894652216, iteration: 399276
loss: 1.0900044441223145,grad_norm: 0.9999992253874561, iteration: 399277
loss: 1.053215742111206,grad_norm: 0.8244137572020657, iteration: 399278
loss: 1.0806063413619995,grad_norm: 0.9999999462616335, iteration: 399279
loss: 1.0087006092071533,grad_norm: 0.9999993275892745, iteration: 399280
loss: 0.9834087491035461,grad_norm: 0.8691663971804197, iteration: 399281
loss: 1.2904257774353027,grad_norm: 0.9999994393931871, iteration: 399282
loss: 1.153395414352417,grad_norm: 0.9999994941128224, iteration: 399283
loss: 1.1828933954238892,grad_norm: 0.9999989809883175, iteration: 399284
loss: 1.05028235912323,grad_norm: 0.8106729983996949, iteration: 399285
loss: 1.0080809593200684,grad_norm: 0.7996091750349222, iteration: 399286
loss: 1.0303839445114136,grad_norm: 0.9999997950583006, iteration: 399287
loss: 0.9934479594230652,grad_norm: 0.985720014040665, iteration: 399288
loss: 1.0733815431594849,grad_norm: 0.9999989772287683, iteration: 399289
loss: 1.0486971139907837,grad_norm: 0.8161618686191462, iteration: 399290
loss: 1.1274566650390625,grad_norm: 0.9999999507239767, iteration: 399291
loss: 1.1848886013031006,grad_norm: 0.8543839946565243, iteration: 399292
loss: 1.0398541688919067,grad_norm: 0.9999991333547161, iteration: 399293
loss: 1.1461938619613647,grad_norm: 0.99999955753614, iteration: 399294
loss: 1.2477576732635498,grad_norm: 0.9711740172103146, iteration: 399295
loss: 1.0689269304275513,grad_norm: 0.7610201555718735, iteration: 399296
loss: 1.0571606159210205,grad_norm: 0.9815082395053276, iteration: 399297
loss: 1.0639147758483887,grad_norm: 0.7596944241660607, iteration: 399298
loss: 1.208788275718689,grad_norm: 0.9999995237185716, iteration: 399299
loss: 1.3148473501205444,grad_norm: 0.9999999177640603, iteration: 399300
loss: 1.087141513824463,grad_norm: 0.9999998847787787, iteration: 399301
loss: 1.326026439666748,grad_norm: 0.9999996988996903, iteration: 399302
loss: 1.1244789361953735,grad_norm: 0.8183724751514445, iteration: 399303
loss: 1.12578284740448,grad_norm: 0.9999992130171291, iteration: 399304
loss: 1.1200568675994873,grad_norm: 0.9999995523986679, iteration: 399305
loss: 1.1437945365905762,grad_norm: 0.9999997320727851, iteration: 399306
loss: 1.1909291744232178,grad_norm: 0.9483208099578169, iteration: 399307
loss: 1.100380539894104,grad_norm: 0.8567236613724628, iteration: 399308
loss: 1.1437584161758423,grad_norm: 0.966289087522084, iteration: 399309
loss: 1.0944658517837524,grad_norm: 0.8126701814482109, iteration: 399310
loss: 1.2170593738555908,grad_norm: 0.9999991202091448, iteration: 399311
loss: 1.108681082725525,grad_norm: 0.9999993496599848, iteration: 399312
loss: 1.1271204948425293,grad_norm: 0.8453252658254571, iteration: 399313
loss: 1.0687156915664673,grad_norm: 0.8337860016347809, iteration: 399314
loss: 1.051281452178955,grad_norm: 0.9171920272212464, iteration: 399315
loss: 1.012131690979004,grad_norm: 0.9999998400203531, iteration: 399316
loss: 1.0619808435440063,grad_norm: 0.9999990680787065, iteration: 399317
loss: 1.0758730173110962,grad_norm: 0.9999992115624059, iteration: 399318
loss: 1.0953052043914795,grad_norm: 0.9999997740148082, iteration: 399319
loss: 1.1310009956359863,grad_norm: 0.9999990585918873, iteration: 399320
loss: 1.1480695009231567,grad_norm: 0.9999991775400638, iteration: 399321
loss: 1.101311445236206,grad_norm: 0.9999993171310687, iteration: 399322
loss: 1.039768934249878,grad_norm: 0.9999991491164536, iteration: 399323
loss: 1.0358062982559204,grad_norm: 0.9999992843964992, iteration: 399324
loss: 1.1195646524429321,grad_norm: 0.9999997606547668, iteration: 399325
loss: 1.07420015335083,grad_norm: 0.999999017879897, iteration: 399326
loss: 1.0413017272949219,grad_norm: 0.9644964091075499, iteration: 399327
loss: 1.0344619750976562,grad_norm: 0.7124550011921933, iteration: 399328
loss: 1.1235496997833252,grad_norm: 0.8337148058036662, iteration: 399329
loss: 1.1274197101593018,grad_norm: 0.9999995171450836, iteration: 399330
loss: 1.04263174533844,grad_norm: 0.9999992216956709, iteration: 399331
loss: 1.0342127084732056,grad_norm: 0.7720823938576927, iteration: 399332
loss: 1.2684355974197388,grad_norm: 0.9999995643087742, iteration: 399333
loss: 1.2221839427947998,grad_norm: 0.9999990747641836, iteration: 399334
loss: 1.1283276081085205,grad_norm: 0.8433828675385863, iteration: 399335
loss: 1.0022706985473633,grad_norm: 0.892368710388624, iteration: 399336
loss: 1.14467191696167,grad_norm: 0.9999994313291267, iteration: 399337
loss: 1.0513330698013306,grad_norm: 0.7523403806898797, iteration: 399338
loss: 1.0112826824188232,grad_norm: 0.9057868157807126, iteration: 399339
loss: 1.1607722043991089,grad_norm: 0.9999989186570203, iteration: 399340
loss: 1.0681850910186768,grad_norm: 0.8068158591860148, iteration: 399341
loss: 1.035192608833313,grad_norm: 0.7873823214538425, iteration: 399342
loss: 1.1103107929229736,grad_norm: 0.8895459202889616, iteration: 399343
loss: 0.9999951124191284,grad_norm: 0.8612219403463118, iteration: 399344
loss: 1.0495373010635376,grad_norm: 0.9999991203205394, iteration: 399345
loss: 0.9747275114059448,grad_norm: 0.7499175031621562, iteration: 399346
loss: 1.0636357069015503,grad_norm: 0.9999997331725853, iteration: 399347
loss: 1.2377071380615234,grad_norm: 0.9999991762463492, iteration: 399348
loss: 1.078292965888977,grad_norm: 0.9111588468391895, iteration: 399349
loss: 1.0320262908935547,grad_norm: 0.9999991542728368, iteration: 399350
loss: 1.1309126615524292,grad_norm: 0.9999995579357449, iteration: 399351
loss: 1.184605598449707,grad_norm: 0.9999992955377608, iteration: 399352
loss: 1.0718107223510742,grad_norm: 0.9455569392948967, iteration: 399353
loss: 1.131101131439209,grad_norm: 0.8393928456890615, iteration: 399354
loss: 1.1558607816696167,grad_norm: 0.8469500652569706, iteration: 399355
loss: 1.0779516696929932,grad_norm: 0.9177495652068921, iteration: 399356
loss: 1.005412220954895,grad_norm: 0.999999115229911, iteration: 399357
loss: 1.0672173500061035,grad_norm: 0.8564800102043245, iteration: 399358
loss: 1.0114115476608276,grad_norm: 0.9999990910449053, iteration: 399359
loss: 1.185701847076416,grad_norm: 0.9999991823206678, iteration: 399360
loss: 1.133180856704712,grad_norm: 0.9999999875597361, iteration: 399361
loss: 1.0449343919754028,grad_norm: 0.8645553107892102, iteration: 399362
loss: 1.0081775188446045,grad_norm: 0.8824896863645106, iteration: 399363
loss: 1.0164527893066406,grad_norm: 0.8149751011009388, iteration: 399364
loss: 1.0234769582748413,grad_norm: 0.7535580957015106, iteration: 399365
loss: 1.0333659648895264,grad_norm: 0.7800299726785171, iteration: 399366
loss: 1.1441264152526855,grad_norm: 0.9585645316485459, iteration: 399367
loss: 1.1714988946914673,grad_norm: 0.999999476891141, iteration: 399368
loss: 1.178621768951416,grad_norm: 0.9999999563702451, iteration: 399369
loss: 1.083862543106079,grad_norm: 0.7915470954084636, iteration: 399370
loss: 1.2059687376022339,grad_norm: 0.9999994904661202, iteration: 399371
loss: 1.1703511476516724,grad_norm: 0.9999995573361183, iteration: 399372
loss: 1.1488593816757202,grad_norm: 0.8981521929177482, iteration: 399373
loss: 1.0699090957641602,grad_norm: 0.9999999502824731, iteration: 399374
loss: 1.220549464225769,grad_norm: 0.9999998579683226, iteration: 399375
loss: 1.101296305656433,grad_norm: 0.931699799331621, iteration: 399376
loss: 1.0480659008026123,grad_norm: 0.9376692763480741, iteration: 399377
loss: 1.1450964212417603,grad_norm: 0.8086825210976963, iteration: 399378
loss: 1.1740586757659912,grad_norm: 0.999999871779523, iteration: 399379
loss: 1.1340365409851074,grad_norm: 0.9999990923834078, iteration: 399380
loss: 1.2242053747177124,grad_norm: 0.9999999178940285, iteration: 399381
loss: 1.1464399099349976,grad_norm: 0.9999992057258827, iteration: 399382
loss: 1.0408662557601929,grad_norm: 0.9999991945066911, iteration: 399383
loss: 1.1108492612838745,grad_norm: 0.852669406947911, iteration: 399384
loss: 1.0834850072860718,grad_norm: 0.762403939636114, iteration: 399385
loss: 1.1670606136322021,grad_norm: 0.9999992747198426, iteration: 399386
loss: 1.1191298961639404,grad_norm: 0.905152292235443, iteration: 399387
loss: 1.0506460666656494,grad_norm: 0.7261208090185479, iteration: 399388
loss: 1.0224740505218506,grad_norm: 0.77109002994543, iteration: 399389
loss: 1.3476519584655762,grad_norm: 0.9999999265326693, iteration: 399390
loss: 1.072238564491272,grad_norm: 0.9999994862754612, iteration: 399391
loss: 1.0817785263061523,grad_norm: 0.7548162720423339, iteration: 399392
loss: 1.118496060371399,grad_norm: 0.9999993164151267, iteration: 399393
loss: 1.1573917865753174,grad_norm: 0.984879901777858, iteration: 399394
loss: 1.267263412475586,grad_norm: 0.9999997535069045, iteration: 399395
loss: 1.0939022302627563,grad_norm: 0.8199858412716922, iteration: 399396
loss: 1.0370211601257324,grad_norm: 0.7844739116020147, iteration: 399397
loss: 1.032136082649231,grad_norm: 0.9208482987728456, iteration: 399398
loss: 0.9800107479095459,grad_norm: 0.7888921062734441, iteration: 399399
loss: 1.1701048612594604,grad_norm: 0.9999994335604038, iteration: 399400
loss: 1.0891274213790894,grad_norm: 0.9999990721499018, iteration: 399401
loss: 1.0857422351837158,grad_norm: 0.6913502297890961, iteration: 399402
loss: 1.0025653839111328,grad_norm: 0.6943792811502457, iteration: 399403
loss: 1.0322099924087524,grad_norm: 0.9304333296049299, iteration: 399404
loss: 1.0084084272384644,grad_norm: 0.8745582545786734, iteration: 399405
loss: 1.0406391620635986,grad_norm: 0.9999992207021502, iteration: 399406
loss: 1.066502332687378,grad_norm: 0.8327881996243894, iteration: 399407
loss: 1.0453732013702393,grad_norm: 0.7432179443751438, iteration: 399408
loss: 1.070514440536499,grad_norm: 0.9090583261121803, iteration: 399409
loss: 1.1600661277770996,grad_norm: 0.9999999444011746, iteration: 399410
loss: 1.2246390581130981,grad_norm: 1.0000000007511507, iteration: 399411
loss: 1.0556209087371826,grad_norm: 0.8275475751094834, iteration: 399412
loss: 1.1677170991897583,grad_norm: 0.9999992294256763, iteration: 399413
loss: 1.1145975589752197,grad_norm: 0.9999997714997579, iteration: 399414
loss: 1.1785755157470703,grad_norm: 0.9999997714710618, iteration: 399415
loss: 1.0630390644073486,grad_norm: 0.8458030254236197, iteration: 399416
loss: 1.1237213611602783,grad_norm: 0.999999970398151, iteration: 399417
loss: 1.058941125869751,grad_norm: 0.6597129342537248, iteration: 399418
loss: 1.0534111261367798,grad_norm: 0.8476207925402242, iteration: 399419
loss: 1.0317884683609009,grad_norm: 0.8057315793085603, iteration: 399420
loss: 1.1574723720550537,grad_norm: 0.9999991141104791, iteration: 399421
loss: 1.031852126121521,grad_norm: 0.8793465680014384, iteration: 399422
loss: 1.082492470741272,grad_norm: 0.9899743442869602, iteration: 399423
loss: 1.0801337957382202,grad_norm: 0.9442542215423421, iteration: 399424
loss: 0.9642273783683777,grad_norm: 0.9999990570607442, iteration: 399425
loss: 1.1732978820800781,grad_norm: 0.9999995793987403, iteration: 399426
loss: 1.050147533416748,grad_norm: 0.7290778117007655, iteration: 399427
loss: 1.0668057203292847,grad_norm: 0.8992469377862511, iteration: 399428
loss: 1.0019086599349976,grad_norm: 0.8193286486076384, iteration: 399429
loss: 1.0818310976028442,grad_norm: 0.9999993968357127, iteration: 399430
loss: 1.1467204093933105,grad_norm: 0.9999997715544415, iteration: 399431
loss: 0.9729148149490356,grad_norm: 0.8153209161257906, iteration: 399432
loss: 1.0006409883499146,grad_norm: 0.8545443819168961, iteration: 399433
loss: 1.1203782558441162,grad_norm: 0.7951841882480247, iteration: 399434
loss: 1.0306849479675293,grad_norm: 0.9811408466210421, iteration: 399435
loss: 1.046118974685669,grad_norm: 0.7598305725622622, iteration: 399436
loss: 1.0179166793823242,grad_norm: 0.8761370964216332, iteration: 399437
loss: 1.0482254028320312,grad_norm: 0.9999994459284643, iteration: 399438
loss: 1.1143579483032227,grad_norm: 0.9999998808805954, iteration: 399439
loss: 1.0521212816238403,grad_norm: 0.8698641664945584, iteration: 399440
loss: 1.0019056797027588,grad_norm: 0.7650546643422699, iteration: 399441
loss: 0.9607219099998474,grad_norm: 0.6084006225778463, iteration: 399442
loss: 1.0169591903686523,grad_norm: 0.8986890314151144, iteration: 399443
loss: 1.008946418762207,grad_norm: 0.8525008841904149, iteration: 399444
loss: 1.0585418939590454,grad_norm: 0.7606970838906163, iteration: 399445
loss: 1.08955717086792,grad_norm: 0.803962166727899, iteration: 399446
loss: 1.1237421035766602,grad_norm: 0.973126045620663, iteration: 399447
loss: 1.0458757877349854,grad_norm: 0.7768001453271564, iteration: 399448
loss: 0.9979941844940186,grad_norm: 0.7592439852261184, iteration: 399449
loss: 1.00102698802948,grad_norm: 0.8785314165011187, iteration: 399450
loss: 0.9639822840690613,grad_norm: 0.7120315217495657, iteration: 399451
loss: 1.017635464668274,grad_norm: 0.8886042560205648, iteration: 399452
loss: 1.0241916179656982,grad_norm: 0.786747784102293, iteration: 399453
loss: 0.9896535873413086,grad_norm: 0.8848918557843797, iteration: 399454
loss: 1.0900129079818726,grad_norm: 0.8392554786963932, iteration: 399455
loss: 1.0618994235992432,grad_norm: 0.8168977681866203, iteration: 399456
loss: 1.008522391319275,grad_norm: 0.9184701875918441, iteration: 399457
loss: 0.993345320224762,grad_norm: 0.8365221121516339, iteration: 399458
loss: 1.1159361600875854,grad_norm: 0.9999990927283815, iteration: 399459
loss: 1.0364974737167358,grad_norm: 0.8329135285012425, iteration: 399460
loss: 1.0586743354797363,grad_norm: 0.9999996606897664, iteration: 399461
loss: 1.0276477336883545,grad_norm: 0.8490543563080605, iteration: 399462
loss: 1.0111751556396484,grad_norm: 0.9999991330508146, iteration: 399463
loss: 1.0786869525909424,grad_norm: 0.9999996568385962, iteration: 399464
loss: 1.0600712299346924,grad_norm: 0.9999989431674295, iteration: 399465
loss: 1.0099886655807495,grad_norm: 0.8149869571596138, iteration: 399466
loss: 1.0477772951126099,grad_norm: 0.9999995454929371, iteration: 399467
loss: 1.0567326545715332,grad_norm: 0.9999993579436833, iteration: 399468
loss: 1.0458455085754395,grad_norm: 0.9999997925382458, iteration: 399469
loss: 1.0329257249832153,grad_norm: 0.9999990382684627, iteration: 399470
loss: 1.0199673175811768,grad_norm: 0.8582663884721456, iteration: 399471
loss: 0.995552659034729,grad_norm: 0.9054573877108228, iteration: 399472
loss: 1.1314449310302734,grad_norm: 0.999999655689903, iteration: 399473
loss: 1.0705933570861816,grad_norm: 0.9933887885971434, iteration: 399474
loss: 1.0548086166381836,grad_norm: 0.820375860222206, iteration: 399475
loss: 1.1627811193466187,grad_norm: 0.9999992560527673, iteration: 399476
loss: 1.0862555503845215,grad_norm: 0.9633752415008003, iteration: 399477
loss: 1.0239251852035522,grad_norm: 0.9999995639285347, iteration: 399478
loss: 1.0574579238891602,grad_norm: 0.8088817207470701, iteration: 399479
loss: 0.9883026480674744,grad_norm: 0.8033881896670111, iteration: 399480
loss: 1.0221867561340332,grad_norm: 0.9999996479266193, iteration: 399481
loss: 1.0434226989746094,grad_norm: 0.999999355131912, iteration: 399482
loss: 1.0031288862228394,grad_norm: 0.7666289550797395, iteration: 399483
loss: 1.0066434144973755,grad_norm: 0.713441993190078, iteration: 399484
loss: 1.0129997730255127,grad_norm: 0.9999990966700812, iteration: 399485
loss: 1.0204825401306152,grad_norm: 0.9999999049601592, iteration: 399486
loss: 1.0907775163650513,grad_norm: 0.9133068356367802, iteration: 399487
loss: 1.076253890991211,grad_norm: 0.8701510035879091, iteration: 399488
loss: 1.0143839120864868,grad_norm: 0.9999991183109035, iteration: 399489
loss: 1.0428307056427002,grad_norm: 0.7215535767706842, iteration: 399490
loss: 1.0059024095535278,grad_norm: 0.8762902141792288, iteration: 399491
loss: 1.0667996406555176,grad_norm: 0.9752852317116636, iteration: 399492
loss: 1.0321167707443237,grad_norm: 0.9242160540029036, iteration: 399493
loss: 1.0808727741241455,grad_norm: 0.7598667277396148, iteration: 399494
loss: 1.0206490755081177,grad_norm: 0.7728874179410115, iteration: 399495
loss: 1.0040203332901,grad_norm: 0.7813418378290719, iteration: 399496
loss: 1.0408453941345215,grad_norm: 0.999999986339837, iteration: 399497
loss: 0.9919973611831665,grad_norm: 0.6917421552436966, iteration: 399498
loss: 1.0602288246154785,grad_norm: 0.9999999160743571, iteration: 399499
loss: 1.0849881172180176,grad_norm: 0.9999992007119339, iteration: 399500
loss: 1.030330777168274,grad_norm: 0.8437695873549304, iteration: 399501
loss: 0.9874529242515564,grad_norm: 0.8545542003326524, iteration: 399502
loss: 1.0419336557388306,grad_norm: 0.9806065550800789, iteration: 399503
loss: 1.02451491355896,grad_norm: 0.999999690493244, iteration: 399504
loss: 1.0330828428268433,grad_norm: 0.9999991664086647, iteration: 399505
loss: 0.9974883198738098,grad_norm: 0.7469468110568132, iteration: 399506
loss: 0.9808032512664795,grad_norm: 0.7498449858796651, iteration: 399507
loss: 1.0028979778289795,grad_norm: 0.999998987843349, iteration: 399508
loss: 1.0053783655166626,grad_norm: 0.820723021187679, iteration: 399509
loss: 1.0486260652542114,grad_norm: 0.9999997080120614, iteration: 399510
loss: 1.0172579288482666,grad_norm: 0.8966784745056294, iteration: 399511
loss: 1.0291208028793335,grad_norm: 0.8422156211875986, iteration: 399512
loss: 1.060970425605774,grad_norm: 0.9999995175714702, iteration: 399513
loss: 1.1293078660964966,grad_norm: 1.00000004354273, iteration: 399514
loss: 1.1152987480163574,grad_norm: 0.9755166026708769, iteration: 399515
loss: 1.15206778049469,grad_norm: 0.9999994038767817, iteration: 399516
loss: 0.9964035749435425,grad_norm: 0.7917516393681595, iteration: 399517
loss: 1.0156155824661255,grad_norm: 0.9823601952992088, iteration: 399518
loss: 1.0738554000854492,grad_norm: 0.9999994832607026, iteration: 399519
loss: 0.9796186089515686,grad_norm: 0.9039590870672448, iteration: 399520
loss: 0.9893038272857666,grad_norm: 0.7581950076814217, iteration: 399521
loss: 1.0032624006271362,grad_norm: 0.7248285495660932, iteration: 399522
loss: 1.032164454460144,grad_norm: 0.9999997800502399, iteration: 399523
loss: 1.0643686056137085,grad_norm: 0.9999996338354656, iteration: 399524
loss: 1.0141441822052002,grad_norm: 0.9696143621154459, iteration: 399525
loss: 1.0694550275802612,grad_norm: 0.9999997853449103, iteration: 399526
loss: 0.993595540523529,grad_norm: 0.7842192676054898, iteration: 399527
loss: 1.0154485702514648,grad_norm: 0.8140789280789563, iteration: 399528
loss: 1.0451716184616089,grad_norm: 0.988317883319643, iteration: 399529
loss: 0.9843370318412781,grad_norm: 0.758683765081686, iteration: 399530
loss: 1.0325976610183716,grad_norm: 0.8635750335176479, iteration: 399531
loss: 1.0235282182693481,grad_norm: 0.9999990653596807, iteration: 399532
loss: 1.060728669166565,grad_norm: 0.8393275920373219, iteration: 399533
loss: 1.0469553470611572,grad_norm: 0.9999998722785325, iteration: 399534
loss: 1.0700592994689941,grad_norm: 0.8501227812019928, iteration: 399535
loss: 1.014845609664917,grad_norm: 0.9952992836441258, iteration: 399536
loss: 1.041031002998352,grad_norm: 0.9999994961943399, iteration: 399537
loss: 0.964396595954895,grad_norm: 0.8455411259669366, iteration: 399538
loss: 1.093656063079834,grad_norm: 0.942839102884069, iteration: 399539
loss: 1.1101008653640747,grad_norm: 0.9381486652958085, iteration: 399540
loss: 1.0521366596221924,grad_norm: 1.0000000861547693, iteration: 399541
loss: 0.9882225394248962,grad_norm: 0.7998731371731799, iteration: 399542
loss: 1.0765485763549805,grad_norm: 0.9999991869357281, iteration: 399543
loss: 1.017223596572876,grad_norm: 0.6877026281302943, iteration: 399544
loss: 1.0523685216903687,grad_norm: 0.9999996253320557, iteration: 399545
loss: 1.0499773025512695,grad_norm: 0.9999991400436546, iteration: 399546
loss: 1.0343700647354126,grad_norm: 0.7175088047396628, iteration: 399547
loss: 0.9736372232437134,grad_norm: 0.9197320644438286, iteration: 399548
loss: 1.0003589391708374,grad_norm: 0.9431564419757544, iteration: 399549
loss: 1.0514214038848877,grad_norm: 0.7018542050794275, iteration: 399550
loss: 1.0168392658233643,grad_norm: 0.9999994638038716, iteration: 399551
loss: 1.008510947227478,grad_norm: 0.9999993209770892, iteration: 399552
loss: 1.0445927381515503,grad_norm: 0.7942926597201159, iteration: 399553
loss: 0.9680340886116028,grad_norm: 0.7115254251603246, iteration: 399554
loss: 1.0148824453353882,grad_norm: 0.8668653774713055, iteration: 399555
loss: 0.9811232686042786,grad_norm: 0.9999990900024412, iteration: 399556
loss: 0.9967988133430481,grad_norm: 0.8664470511995562, iteration: 399557
loss: 0.9832492470741272,grad_norm: 0.7667170441471373, iteration: 399558
loss: 1.1418039798736572,grad_norm: 0.9999992427954943, iteration: 399559
loss: 1.0709642171859741,grad_norm: 0.9999990231483294, iteration: 399560
loss: 0.9989975690841675,grad_norm: 0.7486941319862556, iteration: 399561
loss: 1.0407298803329468,grad_norm: 0.951809752485731, iteration: 399562
loss: 1.08766770362854,grad_norm: 0.8366855937887687, iteration: 399563
loss: 1.1358072757720947,grad_norm: 0.9999991974274559, iteration: 399564
loss: 0.9754514694213867,grad_norm: 0.7287732470019209, iteration: 399565
loss: 1.0053248405456543,grad_norm: 0.7504225560532549, iteration: 399566
loss: 1.0120199918746948,grad_norm: 0.8066250127330551, iteration: 399567
loss: 0.9944534301757812,grad_norm: 0.8342304526365414, iteration: 399568
loss: 1.0435127019882202,grad_norm: 0.9999992314372669, iteration: 399569
loss: 1.1424349546432495,grad_norm: 0.9999997997577805, iteration: 399570
loss: 0.9546847939491272,grad_norm: 0.8225540572638675, iteration: 399571
loss: 1.1052281856536865,grad_norm: 0.9999991769860805, iteration: 399572
loss: 1.0205116271972656,grad_norm: 0.9669580165952085, iteration: 399573
loss: 1.005019187927246,grad_norm: 0.7604887298963726, iteration: 399574
loss: 1.0380219221115112,grad_norm: 0.6775377679394695, iteration: 399575
loss: 0.9970645308494568,grad_norm: 0.8473334020716287, iteration: 399576
loss: 1.0854424238204956,grad_norm: 0.9854739910583216, iteration: 399577
loss: 0.9881864190101624,grad_norm: 0.7871657723481654, iteration: 399578
loss: 1.024742841720581,grad_norm: 0.8021624893606714, iteration: 399579
loss: 1.0174845457077026,grad_norm: 0.9999993599045267, iteration: 399580
loss: 1.0680702924728394,grad_norm: 0.8179184363801424, iteration: 399581
loss: 1.0499300956726074,grad_norm: 0.9999999509368753, iteration: 399582
loss: 0.9845357537269592,grad_norm: 0.739663121998798, iteration: 399583
loss: 1.0112037658691406,grad_norm: 0.9087779370283184, iteration: 399584
loss: 0.9985587000846863,grad_norm: 0.7684590504462919, iteration: 399585
loss: 1.0308209657669067,grad_norm: 0.8700997210832202, iteration: 399586
loss: 0.9954885840415955,grad_norm: 0.9999992452569455, iteration: 399587
loss: 1.0473353862762451,grad_norm: 0.8816383511147518, iteration: 399588
loss: 1.093075156211853,grad_norm: 0.9999995747203984, iteration: 399589
loss: 1.051621675491333,grad_norm: 0.9999998495525613, iteration: 399590
loss: 0.9668741226196289,grad_norm: 0.7779493280809402, iteration: 399591
loss: 1.0306689739227295,grad_norm: 0.8333753675866173, iteration: 399592
loss: 0.985508918762207,grad_norm: 0.8158968089423888, iteration: 399593
loss: 1.000515103340149,grad_norm: 0.8921054951987444, iteration: 399594
loss: 1.0174952745437622,grad_norm: 0.7948153430921837, iteration: 399595
loss: 0.9962740540504456,grad_norm: 0.7903509210837951, iteration: 399596
loss: 0.9815820455551147,grad_norm: 0.8172113510705031, iteration: 399597
loss: 1.075439214706421,grad_norm: 1.0000000267017517, iteration: 399598
loss: 0.9840754866600037,grad_norm: 0.7393406293853245, iteration: 399599
loss: 0.9633798599243164,grad_norm: 0.7877607956826251, iteration: 399600
loss: 1.0235586166381836,grad_norm: 0.8137074607107543, iteration: 399601
loss: 0.980157196521759,grad_norm: 0.731244013274054, iteration: 399602
loss: 1.0702311992645264,grad_norm: 0.9999992696613944, iteration: 399603
loss: 1.0021452903747559,grad_norm: 0.8633636396095998, iteration: 399604
loss: 1.0120658874511719,grad_norm: 0.7077993248612501, iteration: 399605
loss: 1.0252091884613037,grad_norm: 0.7856354277377995, iteration: 399606
loss: 1.0042721033096313,grad_norm: 0.999999169210779, iteration: 399607
loss: 1.0130879878997803,grad_norm: 0.8228889159469548, iteration: 399608
loss: 1.0020694732666016,grad_norm: 0.9889139338034414, iteration: 399609
loss: 0.9899617433547974,grad_norm: 0.8925513539821454, iteration: 399610
loss: 1.1004130840301514,grad_norm: 0.9999994132424188, iteration: 399611
loss: 1.0602967739105225,grad_norm: 0.7802359530780618, iteration: 399612
loss: 0.9669308066368103,grad_norm: 0.8149737531407353, iteration: 399613
loss: 1.0430172681808472,grad_norm: 0.9999990825414211, iteration: 399614
loss: 1.0413824319839478,grad_norm: 0.879296958138832, iteration: 399615
loss: 1.0410953760147095,grad_norm: 0.9820265783035439, iteration: 399616
loss: 1.0711545944213867,grad_norm: 0.7900269738872417, iteration: 399617
loss: 0.9923356771469116,grad_norm: 0.9999990271515178, iteration: 399618
loss: 0.9639685153961182,grad_norm: 0.777291945101285, iteration: 399619
loss: 1.0320860147476196,grad_norm: 0.8998604310506448, iteration: 399620
loss: 1.081343412399292,grad_norm: 0.7676788166570527, iteration: 399621
loss: 0.9917915463447571,grad_norm: 0.7831491508343734, iteration: 399622
loss: 0.9980311393737793,grad_norm: 0.7445885113355823, iteration: 399623
loss: 1.0050171613693237,grad_norm: 0.9768657608422489, iteration: 399624
loss: 1.021661400794983,grad_norm: 0.7009051054417744, iteration: 399625
loss: 1.0868607759475708,grad_norm: 0.9999998544901665, iteration: 399626
loss: 1.0288149118423462,grad_norm: 0.7469355563037275, iteration: 399627
loss: 0.9653271436691284,grad_norm: 0.8914383629554741, iteration: 399628
loss: 1.0365705490112305,grad_norm: 0.9999995769714406, iteration: 399629
loss: 1.0417627096176147,grad_norm: 0.9999997105591869, iteration: 399630
loss: 0.985270619392395,grad_norm: 0.8597488148954141, iteration: 399631
loss: 1.1044964790344238,grad_norm: 0.810816745807366, iteration: 399632
loss: 1.0097999572753906,grad_norm: 0.8528663332373996, iteration: 399633
loss: 1.026599645614624,grad_norm: 0.8515892706257461, iteration: 399634
loss: 1.115039587020874,grad_norm: 0.8666400915379587, iteration: 399635
loss: 1.013681411743164,grad_norm: 0.9178226279659049, iteration: 399636
loss: 0.9647805690765381,grad_norm: 0.8111501746505745, iteration: 399637
loss: 0.9696552157402039,grad_norm: 0.9999990699132493, iteration: 399638
loss: 1.080083966255188,grad_norm: 0.8253276216146761, iteration: 399639
loss: 1.007322072982788,grad_norm: 0.8235220804507721, iteration: 399640
loss: 1.0029547214508057,grad_norm: 0.6888331220275434, iteration: 399641
loss: 1.0196996927261353,grad_norm: 0.7470062039994054, iteration: 399642
loss: 1.0358037948608398,grad_norm: 0.7564843065883227, iteration: 399643
loss: 1.0236518383026123,grad_norm: 0.9999995218713347, iteration: 399644
loss: 0.9894371032714844,grad_norm: 0.8608722071110855, iteration: 399645
loss: 1.0211763381958008,grad_norm: 0.7661979270201358, iteration: 399646
loss: 0.9901431798934937,grad_norm: 0.8696468651291832, iteration: 399647
loss: 1.0376746654510498,grad_norm: 0.827948026905998, iteration: 399648
loss: 1.026863932609558,grad_norm: 0.9999992581176487, iteration: 399649
loss: 0.9827619194984436,grad_norm: 0.7548524529478327, iteration: 399650
loss: 0.9726249575614929,grad_norm: 0.8670271296910954, iteration: 399651
loss: 0.939386248588562,grad_norm: 0.875316883047921, iteration: 399652
loss: 0.996955156326294,grad_norm: 0.6503245074230293, iteration: 399653
loss: 1.000917911529541,grad_norm: 0.9999993208251239, iteration: 399654
loss: 0.9698266983032227,grad_norm: 0.6445006448724158, iteration: 399655
loss: 0.9567056894302368,grad_norm: 0.9999990436635411, iteration: 399656
loss: 0.997039258480072,grad_norm: 0.7252226303133099, iteration: 399657
loss: 0.9876976609230042,grad_norm: 0.6332882394615011, iteration: 399658
loss: 1.0360333919525146,grad_norm: 0.7358823436046544, iteration: 399659
loss: 1.0148371458053589,grad_norm: 0.8602992953250967, iteration: 399660
loss: 0.9950945973396301,grad_norm: 0.8513475402435043, iteration: 399661
loss: 1.0672028064727783,grad_norm: 0.8989338730413605, iteration: 399662
loss: 1.0447558164596558,grad_norm: 0.9157868385866369, iteration: 399663
loss: 1.0754023790359497,grad_norm: 0.9999999278086049, iteration: 399664
loss: 1.0668139457702637,grad_norm: 0.9999991035671218, iteration: 399665
loss: 1.0616552829742432,grad_norm: 0.8427123795628152, iteration: 399666
loss: 0.9923901557922363,grad_norm: 0.7926627112873328, iteration: 399667
loss: 1.007309079170227,grad_norm: 0.7911495359284477, iteration: 399668
loss: 1.085470199584961,grad_norm: 0.7519494547350343, iteration: 399669
loss: 0.9707410931587219,grad_norm: 0.7038766129194415, iteration: 399670
loss: 1.0648319721221924,grad_norm: 0.8806367533473468, iteration: 399671
loss: 0.9751689434051514,grad_norm: 0.9999989941414577, iteration: 399672
loss: 1.0038642883300781,grad_norm: 0.9928073331764965, iteration: 399673
loss: 1.0018284320831299,grad_norm: 0.8704692606063854, iteration: 399674
loss: 0.9966249465942383,grad_norm: 0.9999991514799048, iteration: 399675
loss: 0.9909936785697937,grad_norm: 0.7594029826326504, iteration: 399676
loss: 0.9484880566596985,grad_norm: 0.8439920884144846, iteration: 399677
loss: 1.003812313079834,grad_norm: 0.6404340444853905, iteration: 399678
loss: 0.9955177307128906,grad_norm: 0.729674782501056, iteration: 399679
loss: 1.0350300073623657,grad_norm: 0.8536466689427242, iteration: 399680
loss: 1.0629403591156006,grad_norm: 0.8412974424731875, iteration: 399681
loss: 1.0035526752471924,grad_norm: 0.8672868357614445, iteration: 399682
loss: 1.137094259262085,grad_norm: 0.9999998301997182, iteration: 399683
loss: 1.0099807977676392,grad_norm: 0.8364829887823366, iteration: 399684
loss: 1.041509985923767,grad_norm: 0.8730094528643003, iteration: 399685
loss: 1.0048496723175049,grad_norm: 0.8935035286649216, iteration: 399686
loss: 0.9618225693702698,grad_norm: 0.8123931264857066, iteration: 399687
loss: 0.9858640432357788,grad_norm: 0.7039560379275558, iteration: 399688
loss: 1.029512882232666,grad_norm: 0.7168264614367424, iteration: 399689
loss: 1.0065473318099976,grad_norm: 0.8033522868033174, iteration: 399690
loss: 0.9961996674537659,grad_norm: 0.7017449400216395, iteration: 399691
loss: 1.0432820320129395,grad_norm: 0.8020117071350646, iteration: 399692
loss: 0.9847488403320312,grad_norm: 0.8544869318709565, iteration: 399693
loss: 1.0496906042099,grad_norm: 0.9368245569787995, iteration: 399694
loss: 1.0129832029342651,grad_norm: 0.9489660729019225, iteration: 399695
loss: 1.0393767356872559,grad_norm: 0.999999641691407, iteration: 399696
loss: 1.0013165473937988,grad_norm: 0.7624280307939911, iteration: 399697
loss: 1.0674545764923096,grad_norm: 0.7769499124529369, iteration: 399698
loss: 1.0813361406326294,grad_norm: 0.9999991381183788, iteration: 399699
loss: 0.9991938471794128,grad_norm: 0.8675173945863919, iteration: 399700
loss: 1.004959225654602,grad_norm: 0.8392389327558727, iteration: 399701
loss: 0.9616543650627136,grad_norm: 0.8256571595452223, iteration: 399702
loss: 0.9974962472915649,grad_norm: 0.8527835118228199, iteration: 399703
loss: 1.0705373287200928,grad_norm: 0.9999996258522763, iteration: 399704
loss: 1.0582889318466187,grad_norm: 0.8642970320733436, iteration: 399705
loss: 0.9994089603424072,grad_norm: 0.8239813435182843, iteration: 399706
loss: 1.0033317804336548,grad_norm: 0.7917290303648428, iteration: 399707
loss: 1.0285823345184326,grad_norm: 0.6756697444972443, iteration: 399708
loss: 1.0429389476776123,grad_norm: 0.7959771058739633, iteration: 399709
loss: 0.9714703559875488,grad_norm: 0.9838964467233091, iteration: 399710
loss: 1.0142675638198853,grad_norm: 0.5872200311459539, iteration: 399711
loss: 1.0249086618423462,grad_norm: 0.8281088330390906, iteration: 399712
loss: 1.0507457256317139,grad_norm: 0.9999991383280632, iteration: 399713
loss: 0.9795194268226624,grad_norm: 0.847840988756556, iteration: 399714
loss: 0.9754509925842285,grad_norm: 0.7383991971132547, iteration: 399715
loss: 0.9830964207649231,grad_norm: 0.8578586665639126, iteration: 399716
loss: 0.9951412677764893,grad_norm: 0.6587054719477333, iteration: 399717
loss: 0.979218065738678,grad_norm: 0.7464726401608109, iteration: 399718
loss: 0.9909356832504272,grad_norm: 0.7417950291621945, iteration: 399719
loss: 0.9808970093727112,grad_norm: 0.7859322169511643, iteration: 399720
loss: 1.0021603107452393,grad_norm: 0.9999990714587735, iteration: 399721
loss: 0.9897215962409973,grad_norm: 0.7316345434917683, iteration: 399722
loss: 1.057692050933838,grad_norm: 0.999999511866629, iteration: 399723
loss: 1.043723702430725,grad_norm: 0.8223078591588986, iteration: 399724
loss: 1.0865143537521362,grad_norm: 1.0000000264429372, iteration: 399725
loss: 1.0033169984817505,grad_norm: 0.8088775056336823, iteration: 399726
loss: 1.006999135017395,grad_norm: 0.8736864207094351, iteration: 399727
loss: 0.9609421491622925,grad_norm: 0.7159400151974192, iteration: 399728
loss: 1.00373113155365,grad_norm: 0.8739553025655126, iteration: 399729
loss: 0.9846130609512329,grad_norm: 0.7878878244328082, iteration: 399730
loss: 1.0132997035980225,grad_norm: 0.9066672945343234, iteration: 399731
loss: 1.0194284915924072,grad_norm: 0.837927228928049, iteration: 399732
loss: 1.022750973701477,grad_norm: 0.9999992573752516, iteration: 399733
loss: 0.9609103202819824,grad_norm: 0.7969403593450914, iteration: 399734
loss: 1.0015537738800049,grad_norm: 0.7995451743431183, iteration: 399735
loss: 0.9321690797805786,grad_norm: 0.8015827504230785, iteration: 399736
loss: 0.9990724921226501,grad_norm: 0.8177271567850841, iteration: 399737
loss: 0.9748017191886902,grad_norm: 0.6887556516721366, iteration: 399738
loss: 0.9754462838172913,grad_norm: 0.6907210895172803, iteration: 399739
loss: 0.9821228384971619,grad_norm: 0.7779689958911058, iteration: 399740
loss: 1.0534526109695435,grad_norm: 0.9999989183745963, iteration: 399741
loss: 1.0015760660171509,grad_norm: 0.9999991366605441, iteration: 399742
loss: 1.0000382661819458,grad_norm: 0.9999998168486919, iteration: 399743
loss: 0.9832630753517151,grad_norm: 0.803708901966447, iteration: 399744
loss: 1.0739452838897705,grad_norm: 0.9188192321112427, iteration: 399745
loss: 1.043405532836914,grad_norm: 0.6462145457283425, iteration: 399746
loss: 0.9944453239440918,grad_norm: 0.7059017061325565, iteration: 399747
loss: 0.9913609027862549,grad_norm: 0.9999999713049571, iteration: 399748
loss: 0.9864774346351624,grad_norm: 0.7665254873318845, iteration: 399749
loss: 0.9896315932273865,grad_norm: 0.9117152352816391, iteration: 399750
loss: 1.0000466108322144,grad_norm: 0.8846141205068271, iteration: 399751
loss: 0.9697104096412659,grad_norm: 0.8470177409764948, iteration: 399752
loss: 1.0123924016952515,grad_norm: 0.8833525565059593, iteration: 399753
loss: 0.960439920425415,grad_norm: 0.6314296602789029, iteration: 399754
loss: 1.0396047830581665,grad_norm: 0.9079384976238711, iteration: 399755
loss: 0.967765212059021,grad_norm: 0.7782785039721043, iteration: 399756
loss: 1.0071420669555664,grad_norm: 0.7232348724023084, iteration: 399757
loss: 1.0243297815322876,grad_norm: 0.7964650101854185, iteration: 399758
loss: 0.9982174634933472,grad_norm: 0.8659642264630794, iteration: 399759
loss: 0.9839985370635986,grad_norm: 0.9971554816075753, iteration: 399760
loss: 1.013649344444275,grad_norm: 0.7450545490365627, iteration: 399761
loss: 0.9905807375907898,grad_norm: 0.9078643986644443, iteration: 399762
loss: 0.992124617099762,grad_norm: 0.7499320744101721, iteration: 399763
loss: 1.0225958824157715,grad_norm: 0.9999990188167635, iteration: 399764
loss: 0.9948284029960632,grad_norm: 0.6446456953842614, iteration: 399765
loss: 0.9984584450721741,grad_norm: 0.8510118750621556, iteration: 399766
loss: 0.9840310215950012,grad_norm: 0.752643550463902, iteration: 399767
loss: 1.038893461227417,grad_norm: 0.8498449647499248, iteration: 399768
loss: 1.0353668928146362,grad_norm: 0.8886490780626646, iteration: 399769
loss: 1.0141472816467285,grad_norm: 0.8714072862748242, iteration: 399770
loss: 0.9770896434783936,grad_norm: 0.9390090386250071, iteration: 399771
loss: 1.026720643043518,grad_norm: 0.8532659011453954, iteration: 399772
loss: 1.026819109916687,grad_norm: 0.637036849285938, iteration: 399773
loss: 1.0057364702224731,grad_norm: 0.714020519503868, iteration: 399774
loss: 0.9942375421524048,grad_norm: 0.8003120614614466, iteration: 399775
loss: 1.036529302597046,grad_norm: 0.9128696739409459, iteration: 399776
loss: 1.0512663125991821,grad_norm: 0.9579449507507368, iteration: 399777
loss: 1.0062395334243774,grad_norm: 0.9999993732779178, iteration: 399778
loss: 1.0763944387435913,grad_norm: 0.8776333670232548, iteration: 399779
loss: 1.0438908338546753,grad_norm: 0.8012857221357078, iteration: 399780
loss: 0.9685555100440979,grad_norm: 0.6695543189334747, iteration: 399781
loss: 0.9922411441802979,grad_norm: 0.8473963720805022, iteration: 399782
loss: 1.0048744678497314,grad_norm: 0.7414428881599471, iteration: 399783
loss: 0.9621145725250244,grad_norm: 0.7842549735646742, iteration: 399784
loss: 1.061865210533142,grad_norm: 0.8528648191496646, iteration: 399785
loss: 0.9848906993865967,grad_norm: 0.7297690767582937, iteration: 399786
loss: 1.0058557987213135,grad_norm: 0.6947640292067794, iteration: 399787
loss: 1.0515437126159668,grad_norm: 0.933369340008977, iteration: 399788
loss: 1.0468257665634155,grad_norm: 0.9260574370991955, iteration: 399789
loss: 1.0063941478729248,grad_norm: 0.9351626665070191, iteration: 399790
loss: 1.0318270921707153,grad_norm: 0.9507791078389838, iteration: 399791
loss: 1.0393702983856201,grad_norm: 0.7656237511878803, iteration: 399792
loss: 0.9735426306724548,grad_norm: 0.6274063656809511, iteration: 399793
loss: 1.033991813659668,grad_norm: 0.9999993044803447, iteration: 399794
loss: 0.9766857028007507,grad_norm: 0.7573982841569935, iteration: 399795
loss: 0.9896624684333801,grad_norm: 0.9634003420632332, iteration: 399796
loss: 0.9729579091072083,grad_norm: 0.8055033430257373, iteration: 399797
loss: 0.9837068319320679,grad_norm: 0.8980373021676452, iteration: 399798
loss: 1.1158292293548584,grad_norm: 0.9460162559723669, iteration: 399799
loss: 1.0241150856018066,grad_norm: 0.9025183198473955, iteration: 399800
loss: 0.9714447259902954,grad_norm: 0.7308607434238195, iteration: 399801
loss: 0.99278324842453,grad_norm: 0.9933514537625698, iteration: 399802
loss: 0.9776455163955688,grad_norm: 0.813479728126659, iteration: 399803
loss: 0.9756639003753662,grad_norm: 0.9999990721983868, iteration: 399804
loss: 1.0160425901412964,grad_norm: 0.8616243400494288, iteration: 399805
loss: 1.0254935026168823,grad_norm: 0.9704518434151121, iteration: 399806
loss: 0.9955633282661438,grad_norm: 0.8110130223593414, iteration: 399807
loss: 0.9986212849617004,grad_norm: 0.8560140152800091, iteration: 399808
loss: 0.9941514134407043,grad_norm: 0.7937794724527235, iteration: 399809
loss: 1.047098159790039,grad_norm: 0.7941843076826306, iteration: 399810
loss: 1.0472688674926758,grad_norm: 0.9999993233877974, iteration: 399811
loss: 0.9737737774848938,grad_norm: 0.7085874266725455, iteration: 399812
loss: 0.9841231107711792,grad_norm: 0.7242260334601714, iteration: 399813
loss: 0.9694004654884338,grad_norm: 0.7034641028236099, iteration: 399814
loss: 1.0014666318893433,grad_norm: 0.91124230409463, iteration: 399815
loss: 0.981070339679718,grad_norm: 0.7987968560356847, iteration: 399816
loss: 0.9927757978439331,grad_norm: 0.7002726172946683, iteration: 399817
loss: 1.0385195016860962,grad_norm: 0.9999996226314001, iteration: 399818
loss: 1.0047177076339722,grad_norm: 0.8501051578945252, iteration: 399819
loss: 1.0047599077224731,grad_norm: 0.9499303629250762, iteration: 399820
loss: 0.9879270792007446,grad_norm: 0.8388432512085672, iteration: 399821
loss: 1.0306371450424194,grad_norm: 0.7969405830168969, iteration: 399822
loss: 1.023370623588562,grad_norm: 0.9341651298604311, iteration: 399823
loss: 0.9921622276306152,grad_norm: 0.9800607595042341, iteration: 399824
loss: 1.0712761878967285,grad_norm: 0.9167184013541692, iteration: 399825
loss: 0.9838168621063232,grad_norm: 0.9161111592563019, iteration: 399826
loss: 1.0112099647521973,grad_norm: 0.772229595578371, iteration: 399827
loss: 0.9753046035766602,grad_norm: 0.745014150830075, iteration: 399828
loss: 1.0228301286697388,grad_norm: 0.7235831471066858, iteration: 399829
loss: 0.9762794375419617,grad_norm: 0.8568830303940833, iteration: 399830
loss: 0.9838325381278992,grad_norm: 0.761327984506856, iteration: 399831
loss: 1.0032671689987183,grad_norm: 0.6752794334741066, iteration: 399832
loss: 1.0335052013397217,grad_norm: 0.9999993578219866, iteration: 399833
loss: 0.9697624444961548,grad_norm: 0.8499461275117607, iteration: 399834
loss: 0.977327823638916,grad_norm: 0.7741133544509323, iteration: 399835
loss: 1.0022838115692139,grad_norm: 0.6003120207578174, iteration: 399836
loss: 1.0092660188674927,grad_norm: 0.9988306447951548, iteration: 399837
loss: 0.9909128546714783,grad_norm: 0.8274163752534541, iteration: 399838
loss: 1.0506885051727295,grad_norm: 0.7543335236590495, iteration: 399839
loss: 1.2634468078613281,grad_norm: 0.9999998029966806, iteration: 399840
loss: 1.0116275548934937,grad_norm: 0.9999990745012274, iteration: 399841
loss: 0.9784454107284546,grad_norm: 0.8479875010981441, iteration: 399842
loss: 0.9833112955093384,grad_norm: 0.880133652565001, iteration: 399843
loss: 1.0380653142929077,grad_norm: 0.6919131951191608, iteration: 399844
loss: 1.0064951181411743,grad_norm: 0.7878224691124325, iteration: 399845
loss: 1.0209763050079346,grad_norm: 0.9666285527082191, iteration: 399846
loss: 1.0671426057815552,grad_norm: 0.9999995105782113, iteration: 399847
loss: 0.9775999188423157,grad_norm: 0.7999813222500963, iteration: 399848
loss: 1.0398379564285278,grad_norm: 0.8019935928406762, iteration: 399849
loss: 1.036942481994629,grad_norm: 0.7691927091674359, iteration: 399850
loss: 1.0929458141326904,grad_norm: 0.9139901224055336, iteration: 399851
loss: 1.0307676792144775,grad_norm: 0.9238771662537086, iteration: 399852
loss: 1.0108821392059326,grad_norm: 0.8391567762106336, iteration: 399853
loss: 1.0305958986282349,grad_norm: 0.7633558564140791, iteration: 399854
loss: 1.062179446220398,grad_norm: 0.704181131632136, iteration: 399855
loss: 1.0103710889816284,grad_norm: 0.777552239701122, iteration: 399856
loss: 0.9757769107818604,grad_norm: 0.6649915396567855, iteration: 399857
loss: 1.006176471710205,grad_norm: 0.9999990390305493, iteration: 399858
loss: 0.9897704720497131,grad_norm: 0.8776163721244762, iteration: 399859
loss: 1.1157691478729248,grad_norm: 0.999999882000072, iteration: 399860
loss: 1.0055058002471924,grad_norm: 0.7207819869193236, iteration: 399861
loss: 0.9796056747436523,grad_norm: 0.8542607372315174, iteration: 399862
loss: 1.010715365409851,grad_norm: 0.735930845906921, iteration: 399863
loss: 1.0065499544143677,grad_norm: 0.9196137238802563, iteration: 399864
loss: 1.0620341300964355,grad_norm: 0.999999711812451, iteration: 399865
loss: 0.981547474861145,grad_norm: 0.8745541675728807, iteration: 399866
loss: 0.9621641635894775,grad_norm: 0.6549809767475543, iteration: 399867
loss: 0.9942084550857544,grad_norm: 0.7445546342943958, iteration: 399868
loss: 1.008614182472229,grad_norm: 0.9734128820962233, iteration: 399869
loss: 1.0338741540908813,grad_norm: 0.783511502720395, iteration: 399870
loss: 1.0052484273910522,grad_norm: 0.7601303461036477, iteration: 399871
loss: 0.9799439311027527,grad_norm: 0.7681947884238312, iteration: 399872
loss: 1.0248785018920898,grad_norm: 0.6901330724330982, iteration: 399873
loss: 1.0121127367019653,grad_norm: 0.8654339017557992, iteration: 399874
loss: 0.9842731952667236,grad_norm: 0.7885025693249103, iteration: 399875
loss: 1.0231614112854004,grad_norm: 0.8967894379516523, iteration: 399876
loss: 1.0080827474594116,grad_norm: 0.9999991567989472, iteration: 399877
loss: 0.9844270348548889,grad_norm: 0.7938555529006578, iteration: 399878
loss: 1.007603645324707,grad_norm: 0.9327054733555569, iteration: 399879
loss: 1.1000715494155884,grad_norm: 0.8336275455190297, iteration: 399880
loss: 0.9965944886207581,grad_norm: 0.8064021233004313, iteration: 399881
loss: 1.021642804145813,grad_norm: 0.7440603942254606, iteration: 399882
loss: 1.008251667022705,grad_norm: 0.9999990901272906, iteration: 399883
loss: 1.007186770439148,grad_norm: 0.9999992585759537, iteration: 399884
loss: 0.9899865984916687,grad_norm: 0.7226907657035813, iteration: 399885
loss: 0.9719998836517334,grad_norm: 0.8590111735710969, iteration: 399886
loss: 1.0084589719772339,grad_norm: 0.8442773261831199, iteration: 399887
loss: 1.0065159797668457,grad_norm: 0.8675691749877045, iteration: 399888
loss: 1.0090632438659668,grad_norm: 0.9999991457025995, iteration: 399889
loss: 0.9986040592193604,grad_norm: 0.7732086547017328, iteration: 399890
loss: 0.9771845936775208,grad_norm: 0.8316123701233321, iteration: 399891
loss: 1.0095598697662354,grad_norm: 0.6892847008335468, iteration: 399892
loss: 0.9846645593643188,grad_norm: 0.8686734125350523, iteration: 399893
loss: 1.0028387308120728,grad_norm: 0.7745440483754474, iteration: 399894
loss: 1.009636402130127,grad_norm: 0.8260881337995082, iteration: 399895
loss: 0.9968183040618896,grad_norm: 0.7443700239648928, iteration: 399896
loss: 0.9883757829666138,grad_norm: 0.7431757796513898, iteration: 399897
loss: 0.9905406832695007,grad_norm: 0.789207859908877, iteration: 399898
loss: 0.9917162656784058,grad_norm: 0.6571357585990064, iteration: 399899
loss: 0.9604083299636841,grad_norm: 0.7463261977670409, iteration: 399900
loss: 0.9705022573471069,grad_norm: 0.8510875988000306, iteration: 399901
loss: 0.9990020394325256,grad_norm: 0.8079540179053991, iteration: 399902
loss: 0.9829283952713013,grad_norm: 0.6706500050558966, iteration: 399903
loss: 1.0082387924194336,grad_norm: 0.7528450962698717, iteration: 399904
loss: 1.0116137266159058,grad_norm: 0.9999990756028768, iteration: 399905
loss: 0.9838852882385254,grad_norm: 0.7470160861882453, iteration: 399906
loss: 0.9786357879638672,grad_norm: 0.9999991089262518, iteration: 399907
loss: 1.0299028158187866,grad_norm: 0.9999997500613321, iteration: 399908
loss: 1.0001362562179565,grad_norm: 0.9999991524779103, iteration: 399909
loss: 0.9836738705635071,grad_norm: 0.9056290331254195, iteration: 399910
loss: 0.9763633608818054,grad_norm: 0.8172446586734375, iteration: 399911
loss: 1.0173909664154053,grad_norm: 0.9252193858481761, iteration: 399912
loss: 1.0299203395843506,grad_norm: 0.7724974600777028, iteration: 399913
loss: 0.970676839351654,grad_norm: 0.7613063528860609, iteration: 399914
loss: 1.0389256477355957,grad_norm: 0.7784731139305628, iteration: 399915
loss: 1.079960823059082,grad_norm: 0.9999995200367652, iteration: 399916
loss: 0.9979006052017212,grad_norm: 0.9999999196802849, iteration: 399917
loss: 0.9694239497184753,grad_norm: 0.7415761337499153, iteration: 399918
loss: 0.9761497378349304,grad_norm: 0.730269145914432, iteration: 399919
loss: 1.072266936302185,grad_norm: 0.9710381655918261, iteration: 399920
loss: 1.0318783521652222,grad_norm: 0.9741721879066019, iteration: 399921
loss: 0.975527286529541,grad_norm: 0.70258399084625, iteration: 399922
loss: 1.006027340888977,grad_norm: 0.9862968815014829, iteration: 399923
loss: 1.1041200160980225,grad_norm: 0.9999997024611946, iteration: 399924
loss: 0.9582226872444153,grad_norm: 0.7732739456553286, iteration: 399925
loss: 1.0035372972488403,grad_norm: 0.9999993242637203, iteration: 399926
loss: 0.9773420691490173,grad_norm: 0.7850572026404526, iteration: 399927
loss: 0.9959838390350342,grad_norm: 0.8597730558869883, iteration: 399928
loss: 1.0039149522781372,grad_norm: 0.7266765705913293, iteration: 399929
loss: 0.9942156076431274,grad_norm: 0.8923244038749003, iteration: 399930
loss: 0.99525386095047,grad_norm: 0.6913879537220516, iteration: 399931
loss: 1.0248780250549316,grad_norm: 0.8187992747677432, iteration: 399932
loss: 0.9776309728622437,grad_norm: 0.900078949607314, iteration: 399933
loss: 0.9691897034645081,grad_norm: 0.7513673724328855, iteration: 399934
loss: 1.006087303161621,grad_norm: 0.760022073196254, iteration: 399935
loss: 1.0037285089492798,grad_norm: 0.7256619506254739, iteration: 399936
loss: 1.0025626420974731,grad_norm: 0.999999286673804, iteration: 399937
loss: 1.0059906244277954,grad_norm: 0.7897215800018906, iteration: 399938
loss: 1.0091139078140259,grad_norm: 0.771627365754369, iteration: 399939
loss: 0.9731221795082092,grad_norm: 0.9149166573203571, iteration: 399940
loss: 1.0452582836151123,grad_norm: 0.746421877791401, iteration: 399941
loss: 1.0711156129837036,grad_norm: 0.9984614409213903, iteration: 399942
loss: 0.9736654162406921,grad_norm: 0.791446158979259, iteration: 399943
loss: 0.9960203766822815,grad_norm: 0.7601775368084479, iteration: 399944
loss: 1.0016438961029053,grad_norm: 0.7211339431377376, iteration: 399945
loss: 1.0667628049850464,grad_norm: 0.999999755864662, iteration: 399946
loss: 1.0269633531570435,grad_norm: 0.8749223676875603, iteration: 399947
loss: 1.013923168182373,grad_norm: 0.927100938694442, iteration: 399948
loss: 0.9924842119216919,grad_norm: 0.9999991678447152, iteration: 399949
loss: 1.042817234992981,grad_norm: 0.8370773966336625, iteration: 399950
loss: 1.0258985757827759,grad_norm: 0.7818470579853092, iteration: 399951
loss: 0.9818757176399231,grad_norm: 0.8072356629023035, iteration: 399952
loss: 0.991613507270813,grad_norm: 0.7742933772663027, iteration: 399953
loss: 0.9716781377792358,grad_norm: 0.7903689163021743, iteration: 399954
loss: 0.9736759066581726,grad_norm: 0.8321998608091423, iteration: 399955
loss: 1.0036545991897583,grad_norm: 0.9999994173438373, iteration: 399956
loss: 1.0021153688430786,grad_norm: 0.8721317609557685, iteration: 399957
loss: 1.071429967880249,grad_norm: 0.9999995437211376, iteration: 399958
loss: 0.9973725080490112,grad_norm: 0.793948987886701, iteration: 399959
loss: 1.0225893259048462,grad_norm: 0.9674697883045794, iteration: 399960
loss: 1.0229533910751343,grad_norm: 0.9999991452659638, iteration: 399961
loss: 1.005373477935791,grad_norm: 0.6790920031683815, iteration: 399962
loss: 0.9953920245170593,grad_norm: 0.6626727783901861, iteration: 399963
loss: 0.9922293424606323,grad_norm: 0.792519579694475, iteration: 399964
loss: 1.0031460523605347,grad_norm: 0.7782876049935231, iteration: 399965
loss: 1.0226304531097412,grad_norm: 0.8938116892415068, iteration: 399966
loss: 0.9959487318992615,grad_norm: 0.7683572288878181, iteration: 399967
loss: 1.034962773323059,grad_norm: 0.8683465946628444, iteration: 399968
loss: 0.9865798950195312,grad_norm: 0.8345741980164031, iteration: 399969
loss: 1.0293641090393066,grad_norm: 0.8991308339400803, iteration: 399970
loss: 0.987912118434906,grad_norm: 0.8499650514971364, iteration: 399971
loss: 1.1180082559585571,grad_norm: 0.9999991373152729, iteration: 399972
loss: 0.976372241973877,grad_norm: 0.7241829534894149, iteration: 399973
loss: 1.001879096031189,grad_norm: 0.803385330714583, iteration: 399974
loss: 0.9974858164787292,grad_norm: 0.7177095357882135, iteration: 399975
loss: 0.9859201312065125,grad_norm: 0.787747651888852, iteration: 399976
loss: 0.9942153692245483,grad_norm: 0.6917283893621469, iteration: 399977
loss: 0.9613981246948242,grad_norm: 0.9999992163311905, iteration: 399978
loss: 0.9969024658203125,grad_norm: 0.8170121999508577, iteration: 399979
loss: 1.0069552659988403,grad_norm: 0.7646206399980642, iteration: 399980
loss: 1.0309420824050903,grad_norm: 0.8398136745820591, iteration: 399981
loss: 0.9971781373023987,grad_norm: 0.9258729462061827, iteration: 399982
loss: 0.9806165099143982,grad_norm: 0.666714719108081, iteration: 399983
loss: 1.0437391996383667,grad_norm: 0.6554709682118294, iteration: 399984
loss: 1.0237491130828857,grad_norm: 0.7447512418640045, iteration: 399985
loss: 1.0669673681259155,grad_norm: 0.9999996060172076, iteration: 399986
loss: 1.0272860527038574,grad_norm: 0.7731057035492026, iteration: 399987
loss: 1.005214810371399,grad_norm: 0.8734597005714876, iteration: 399988
loss: 1.0237822532653809,grad_norm: 0.9999990021245189, iteration: 399989
loss: 1.055514931678772,grad_norm: 0.7077747092006599, iteration: 399990
loss: 1.0093495845794678,grad_norm: 0.8329747913570329, iteration: 399991
loss: 0.9712374806404114,grad_norm: 0.6892867607720288, iteration: 399992
loss: 0.9531250596046448,grad_norm: 0.7694655867641764, iteration: 399993
loss: 1.025023102760315,grad_norm: 0.7072193201735575, iteration: 399994
loss: 0.9888461232185364,grad_norm: 0.8943838081788852, iteration: 399995
loss: 0.984688401222229,grad_norm: 0.8078605032138105, iteration: 399996
loss: 1.0544930696487427,grad_norm: 0.7255954489364663, iteration: 399997
loss: 0.9977089762687683,grad_norm: 0.9999998679453106, iteration: 399998
loss: 0.9986904859542847,grad_norm: 0.81547101901729, iteration: 399999
loss: 1.0518300533294678,grad_norm: 0.8365210384657197, iteration: 400000
Evaluating at step 400000
{'val': 0.9955868627876043, 'test': 2.0396204832643745}
loss: 0.9557424187660217,grad_norm: 0.9656911441516393, iteration: 400001
loss: 0.9909968972206116,grad_norm: 0.9999991414353999, iteration: 400002
loss: 0.9818193316459656,grad_norm: 0.6961552391407179, iteration: 400003
loss: 0.9982321262359619,grad_norm: 0.7155145497804335, iteration: 400004
loss: 0.9913166761398315,grad_norm: 0.8539661054611022, iteration: 400005
loss: 1.0407518148422241,grad_norm: 0.9999998312309841, iteration: 400006
loss: 0.9756943583488464,grad_norm: 0.7695551866270609, iteration: 400007
loss: 0.9914283752441406,grad_norm: 0.8920869247918718, iteration: 400008
loss: 1.0058833360671997,grad_norm: 0.797401234166913, iteration: 400009
loss: 0.9944923520088196,grad_norm: 0.7032427333090822, iteration: 400010
loss: 1.025037169456482,grad_norm: 0.8633108542941144, iteration: 400011
loss: 0.9610989093780518,grad_norm: 0.8282170943253491, iteration: 400012
loss: 0.9737351536750793,grad_norm: 0.8391392290738359, iteration: 400013
loss: 0.9888036251068115,grad_norm: 0.816798877816111, iteration: 400014
loss: 1.021175503730774,grad_norm: 0.969447122275073, iteration: 400015
loss: 1.0242855548858643,grad_norm: 0.999999818949453, iteration: 400016
loss: 0.9453967213630676,grad_norm: 0.7953268097977956, iteration: 400017
loss: 1.0257923603057861,grad_norm: 0.8424443514880219, iteration: 400018
loss: 0.9677659869194031,grad_norm: 0.9676686959692137, iteration: 400019
loss: 1.0011482238769531,grad_norm: 0.6667607498944994, iteration: 400020
loss: 0.9959464073181152,grad_norm: 0.7054385630532192, iteration: 400021
loss: 0.9832503795623779,grad_norm: 0.699162346504573, iteration: 400022
loss: 0.997078537940979,grad_norm: 0.662365542045103, iteration: 400023
loss: 1.0971331596374512,grad_norm: 0.9999990892455062, iteration: 400024
loss: 0.9875014424324036,grad_norm: 0.7646770261487333, iteration: 400025
loss: 0.9919310212135315,grad_norm: 0.7527392063168438, iteration: 400026
loss: 0.9628764390945435,grad_norm: 0.7808103623644087, iteration: 400027
loss: 1.002539873123169,grad_norm: 0.6821791783734414, iteration: 400028
loss: 0.9831441044807434,grad_norm: 0.7585470584545699, iteration: 400029
loss: 1.0710526704788208,grad_norm: 0.9999999359262627, iteration: 400030
loss: 1.0155003070831299,grad_norm: 0.7239338650057909, iteration: 400031
loss: 1.0332701206207275,grad_norm: 0.8256665568690396, iteration: 400032
loss: 1.0342589616775513,grad_norm: 0.7154607145099403, iteration: 400033
loss: 1.048767328262329,grad_norm: 0.9999993878295291, iteration: 400034
loss: 1.0153467655181885,grad_norm: 0.8231222387798821, iteration: 400035
loss: 0.9728531837463379,grad_norm: 0.7734300296222869, iteration: 400036
loss: 0.9845609068870544,grad_norm: 0.9092504941597578, iteration: 400037
loss: 1.018437147140503,grad_norm: 0.7555094595562623, iteration: 400038
loss: 0.9822705388069153,grad_norm: 0.7517297455857098, iteration: 400039
loss: 1.0978518724441528,grad_norm: 0.9999994785632121, iteration: 400040
loss: 1.0055161714553833,grad_norm: 0.8471191529946082, iteration: 400041
loss: 0.9691398739814758,grad_norm: 0.7272589411594176, iteration: 400042
loss: 0.9798439741134644,grad_norm: 0.8084362317537004, iteration: 400043
loss: 0.9819684028625488,grad_norm: 0.9495863641802513, iteration: 400044
loss: 1.0447466373443604,grad_norm: 0.6930814511965383, iteration: 400045
loss: 1.0429201126098633,grad_norm: 0.9221938665704077, iteration: 400046
loss: 0.9966961145401001,grad_norm: 0.8760832341153431, iteration: 400047
loss: 1.0649791955947876,grad_norm: 0.9999991754955037, iteration: 400048
loss: 0.9649009108543396,grad_norm: 0.8306747606853815, iteration: 400049
loss: 0.9868023991584778,grad_norm: 0.858283951383399, iteration: 400050
loss: 1.0061213970184326,grad_norm: 0.704384648988919, iteration: 400051
loss: 0.9883350729942322,grad_norm: 0.7723301278072421, iteration: 400052
loss: 1.0223746299743652,grad_norm: 0.7553935997185005, iteration: 400053
loss: 0.9793974161148071,grad_norm: 0.7531320957505178, iteration: 400054
loss: 1.0012810230255127,grad_norm: 0.712503449647784, iteration: 400055
loss: 1.0224897861480713,grad_norm: 0.8811124424940605, iteration: 400056
loss: 0.9797458052635193,grad_norm: 0.8244270199491927, iteration: 400057
loss: 0.9810575842857361,grad_norm: 0.7321811641578027, iteration: 400058
loss: 0.9266290068626404,grad_norm: 0.8291561422972826, iteration: 400059
loss: 0.9825063347816467,grad_norm: 0.860557780529016, iteration: 400060
loss: 1.023437738418579,grad_norm: 0.9657912410150148, iteration: 400061
loss: 0.9523249268531799,grad_norm: 0.8157948675270371, iteration: 400062
loss: 0.9882557988166809,grad_norm: 0.7719867396069658, iteration: 400063
loss: 1.0110632181167603,grad_norm: 0.730912835873001, iteration: 400064
loss: 0.9984294176101685,grad_norm: 0.7462549278852346, iteration: 400065
loss: 1.0429534912109375,grad_norm: 0.9999997607363997, iteration: 400066
loss: 1.0005607604980469,grad_norm: 0.8334072851930634, iteration: 400067
loss: 1.0014125108718872,grad_norm: 0.7553604861897869, iteration: 400068
loss: 0.9982332587242126,grad_norm: 0.8111805724253763, iteration: 400069
loss: 1.1258479356765747,grad_norm: 0.7928572667602363, iteration: 400070
loss: 0.9568583965301514,grad_norm: 0.9037434228316017, iteration: 400071
loss: 1.0374890565872192,grad_norm: 0.9999990149466028, iteration: 400072
loss: 1.0209639072418213,grad_norm: 0.8832312782418872, iteration: 400073
loss: 0.9660038948059082,grad_norm: 0.756608477201301, iteration: 400074
loss: 1.0035293102264404,grad_norm: 0.7908731409468422, iteration: 400075
loss: 0.9903159737586975,grad_norm: 0.7377290199721614, iteration: 400076
loss: 0.9988296627998352,grad_norm: 0.7872204978183249, iteration: 400077
loss: 0.9872255325317383,grad_norm: 0.808976853478294, iteration: 400078
loss: 1.0060123205184937,grad_norm: 0.8987340737320356, iteration: 400079
loss: 0.9772688150405884,grad_norm: 0.9687724136499987, iteration: 400080
loss: 1.0107187032699585,grad_norm: 0.9058234919503513, iteration: 400081
loss: 1.006292462348938,grad_norm: 0.8508379490404809, iteration: 400082
loss: 1.003350019454956,grad_norm: 0.737708134460249, iteration: 400083
loss: 0.9838862419128418,grad_norm: 0.8333212002829742, iteration: 400084
loss: 1.021420955657959,grad_norm: 0.7913073451118663, iteration: 400085
loss: 1.0194894075393677,grad_norm: 0.713404796345287, iteration: 400086
loss: 0.9534319043159485,grad_norm: 0.7849079036122728, iteration: 400087
loss: 1.0057979822158813,grad_norm: 0.7822207028503111, iteration: 400088
loss: 1.0189440250396729,grad_norm: 0.9745918568431882, iteration: 400089
loss: 1.092905879020691,grad_norm: 0.8692788220633298, iteration: 400090
loss: 0.9891535639762878,grad_norm: 0.8324513297548105, iteration: 400091
loss: 0.9844689965248108,grad_norm: 0.762562958830383, iteration: 400092
loss: 0.9873598217964172,grad_norm: 0.7443173099358406, iteration: 400093
loss: 1.0037275552749634,grad_norm: 0.6955724282733036, iteration: 400094
loss: 1.0034077167510986,grad_norm: 0.731383440601799, iteration: 400095
loss: 1.0236520767211914,grad_norm: 0.8128985044789645, iteration: 400096
loss: 1.0218385457992554,grad_norm: 0.802189869256419, iteration: 400097
loss: 0.9946361780166626,grad_norm: 0.921082249480068, iteration: 400098
loss: 0.9745408892631531,grad_norm: 0.7371072096175775, iteration: 400099
loss: 1.0598849058151245,grad_norm: 0.9999997302972192, iteration: 400100
loss: 0.9922125935554504,grad_norm: 0.7634190750525606, iteration: 400101
loss: 1.0045104026794434,grad_norm: 0.7611122658356294, iteration: 400102
loss: 1.0114675760269165,grad_norm: 0.9598165809039739, iteration: 400103
loss: 0.9754469394683838,grad_norm: 0.9121481005119682, iteration: 400104
loss: 0.9719753861427307,grad_norm: 0.6657154965635207, iteration: 400105
loss: 1.055565595626831,grad_norm: 0.8855020662735977, iteration: 400106
loss: 0.9889010190963745,grad_norm: 0.907878219454887, iteration: 400107
loss: 0.9859452843666077,grad_norm: 0.7951100167832317, iteration: 400108
loss: 0.9846534132957458,grad_norm: 0.7371887758317854, iteration: 400109
loss: 1.0216753482818604,grad_norm: 0.8335793541977595, iteration: 400110
loss: 1.0001740455627441,grad_norm: 0.9189940284751155, iteration: 400111
loss: 1.0124164819717407,grad_norm: 0.7476051329518952, iteration: 400112
loss: 0.9921795129776001,grad_norm: 0.958912750674762, iteration: 400113
loss: 1.0212006568908691,grad_norm: 0.8020819387139994, iteration: 400114
loss: 1.0229803323745728,grad_norm: 0.9045749310006549, iteration: 400115
loss: 1.0023372173309326,grad_norm: 0.761444850766246, iteration: 400116
loss: 1.0142459869384766,grad_norm: 0.9999999288263257, iteration: 400117
loss: 1.0004016160964966,grad_norm: 0.7856959058436894, iteration: 400118
loss: 0.9734923839569092,grad_norm: 0.8101315868791198, iteration: 400119
loss: 0.9650516510009766,grad_norm: 0.8377680021046089, iteration: 400120
loss: 1.027574896812439,grad_norm: 0.7814709498281918, iteration: 400121
loss: 1.0124740600585938,grad_norm: 0.8640964106123254, iteration: 400122
loss: 1.0086588859558105,grad_norm: 0.8991097148971725, iteration: 400123
loss: 1.1319352388381958,grad_norm: 0.8888528345315241, iteration: 400124
loss: 1.0208936929702759,grad_norm: 0.7233638637397517, iteration: 400125
loss: 1.005733609199524,grad_norm: 0.7901162811355902, iteration: 400126
loss: 0.9965886473655701,grad_norm: 0.900987505419812, iteration: 400127
loss: 0.9669501781463623,grad_norm: 0.8872309037245069, iteration: 400128
loss: 1.0015721321105957,grad_norm: 0.7293081010793736, iteration: 400129
loss: 1.0414575338363647,grad_norm: 0.9999994787703812, iteration: 400130
loss: 1.0200414657592773,grad_norm: 0.8612588707761684, iteration: 400131
loss: 0.9818975329399109,grad_norm: 0.8146871211940665, iteration: 400132
loss: 1.0291423797607422,grad_norm: 0.9953415724055829, iteration: 400133
loss: 0.9908574223518372,grad_norm: 0.9999997078353132, iteration: 400134
loss: 1.0048044919967651,grad_norm: 0.7333149068291283, iteration: 400135
loss: 0.9687890410423279,grad_norm: 0.9999990921051863, iteration: 400136
loss: 1.0433508157730103,grad_norm: 0.7903056424521935, iteration: 400137
loss: 0.9896608591079712,grad_norm: 0.8616617589539335, iteration: 400138
loss: 1.0002555847167969,grad_norm: 0.6999340412445115, iteration: 400139
loss: 1.1489161252975464,grad_norm: 0.9999992129293557, iteration: 400140
loss: 1.0074876546859741,grad_norm: 0.9279929953906617, iteration: 400141
loss: 1.0842238664627075,grad_norm: 0.9999998717098436, iteration: 400142
loss: 1.0695019960403442,grad_norm: 0.8688635517256942, iteration: 400143
loss: 1.0311602354049683,grad_norm: 0.8075648087779733, iteration: 400144
loss: 1.049868106842041,grad_norm: 0.8135101592471655, iteration: 400145
loss: 1.0146430730819702,grad_norm: 0.9999999280018641, iteration: 400146
loss: 0.9823863506317139,grad_norm: 0.7989402784125652, iteration: 400147
loss: 1.018707036972046,grad_norm: 0.9190059585133108, iteration: 400148
loss: 1.0015196800231934,grad_norm: 0.730139453931282, iteration: 400149
loss: 0.9653307795524597,grad_norm: 0.8025313504803441, iteration: 400150
loss: 0.9763779044151306,grad_norm: 0.9016742294198107, iteration: 400151
loss: 1.0218734741210938,grad_norm: 0.7294943693656234, iteration: 400152
loss: 0.9970663189888,grad_norm: 0.8388831038691968, iteration: 400153
loss: 0.9787787795066833,grad_norm: 0.8013304825633878, iteration: 400154
loss: 1.0365034341812134,grad_norm: 0.7797466116518139, iteration: 400155
loss: 1.084521770477295,grad_norm: 0.9999995341267998, iteration: 400156
loss: 1.0283907651901245,grad_norm: 0.7861434414610023, iteration: 400157
loss: 1.0024161338806152,grad_norm: 0.9114729427107698, iteration: 400158
loss: 1.0278379917144775,grad_norm: 0.7910029949726676, iteration: 400159
loss: 1.0496175289154053,grad_norm: 0.8052261555355371, iteration: 400160
loss: 0.9832283854484558,grad_norm: 0.9999992292622742, iteration: 400161
loss: 1.0165847539901733,grad_norm: 0.7665807408401355, iteration: 400162
loss: 1.0183855295181274,grad_norm: 0.8273045556903801, iteration: 400163
loss: 0.993291974067688,grad_norm: 0.9450695595163602, iteration: 400164
loss: 0.985333263874054,grad_norm: 0.8634016729037515, iteration: 400165
loss: 1.0114761590957642,grad_norm: 0.685517249208782, iteration: 400166
loss: 0.9923627972602844,grad_norm: 0.710967143392796, iteration: 400167
loss: 1.0185580253601074,grad_norm: 0.6667810106668839, iteration: 400168
loss: 1.0146639347076416,grad_norm: 0.9087805143908598, iteration: 400169
loss: 0.9978513121604919,grad_norm: 0.9999997917808209, iteration: 400170
loss: 0.9412617683410645,grad_norm: 0.8666241576270695, iteration: 400171
loss: 0.982179582118988,grad_norm: 0.999999010158435, iteration: 400172
loss: 1.0194038152694702,grad_norm: 0.7657657134838778, iteration: 400173
loss: 0.960197925567627,grad_norm: 0.6954721288946888, iteration: 400174
loss: 1.0080090761184692,grad_norm: 0.9999991410551896, iteration: 400175
loss: 1.0031747817993164,grad_norm: 0.875286182093692, iteration: 400176
loss: 1.0125080347061157,grad_norm: 0.8756953202409394, iteration: 400177
loss: 0.9579789042472839,grad_norm: 0.8544823946001763, iteration: 400178
loss: 1.0111565589904785,grad_norm: 0.9999990513555765, iteration: 400179
loss: 0.9787634015083313,grad_norm: 0.8810592549756493, iteration: 400180
loss: 0.9984412789344788,grad_norm: 0.6923784406063986, iteration: 400181
loss: 1.0404157638549805,grad_norm: 0.9999997890346203, iteration: 400182
loss: 0.9769671559333801,grad_norm: 0.7338853331271731, iteration: 400183
loss: 1.0571928024291992,grad_norm: 0.9999995614350818, iteration: 400184
loss: 0.9668247103691101,grad_norm: 0.7681827437054405, iteration: 400185
loss: 0.9804421067237854,grad_norm: 0.7193840943180968, iteration: 400186
loss: 0.9668118953704834,grad_norm: 0.7977910977021215, iteration: 400187
loss: 1.0080015659332275,grad_norm: 0.99999902950898, iteration: 400188
loss: 1.0280156135559082,grad_norm: 0.8779239395187328, iteration: 400189
loss: 1.016762614250183,grad_norm: 0.8033453268377055, iteration: 400190
loss: 1.005784273147583,grad_norm: 0.7148426051957392, iteration: 400191
loss: 0.99228835105896,grad_norm: 0.9931338325506911, iteration: 400192
loss: 1.0421754121780396,grad_norm: 0.9999990725966929, iteration: 400193
loss: 0.9890188574790955,grad_norm: 0.8013434092585406, iteration: 400194
loss: 0.9754197001457214,grad_norm: 0.7366002366678517, iteration: 400195
loss: 0.9812977910041809,grad_norm: 0.999999443197876, iteration: 400196
loss: 0.9741653203964233,grad_norm: 0.7464099274344624, iteration: 400197
loss: 1.0119765996932983,grad_norm: 0.7313344428764235, iteration: 400198
loss: 0.9482134580612183,grad_norm: 0.7815359519611067, iteration: 400199
loss: 0.9857679605484009,grad_norm: 0.9571205175963765, iteration: 400200
loss: 1.0047680139541626,grad_norm: 0.7423915191228332, iteration: 400201
loss: 0.976678192615509,grad_norm: 0.819022981013079, iteration: 400202
loss: 1.0018244981765747,grad_norm: 0.7532783682868464, iteration: 400203
loss: 0.9927550554275513,grad_norm: 0.9999989890225617, iteration: 400204
loss: 0.9999819397926331,grad_norm: 0.7618290368214224, iteration: 400205
loss: 1.0032013654708862,grad_norm: 0.7535935528931675, iteration: 400206
loss: 1.0087542533874512,grad_norm: 0.9482247798851052, iteration: 400207
loss: 1.0196336507797241,grad_norm: 0.8676228224848919, iteration: 400208
loss: 1.0206273794174194,grad_norm: 0.7971526289665362, iteration: 400209
loss: 1.0026496648788452,grad_norm: 0.734723802270624, iteration: 400210
loss: 1.0182223320007324,grad_norm: 0.8471018765324287, iteration: 400211
loss: 0.9987366795539856,grad_norm: 0.9117792579868298, iteration: 400212
loss: 1.0196740627288818,grad_norm: 0.7443376154372042, iteration: 400213
loss: 0.9628832936286926,grad_norm: 0.8668268944434294, iteration: 400214
loss: 1.0156958103179932,grad_norm: 0.7882207225015225, iteration: 400215
loss: 1.0373327732086182,grad_norm: 0.8284266181540644, iteration: 400216
loss: 0.9831080436706543,grad_norm: 0.8225907250179909, iteration: 400217
loss: 1.1113178730010986,grad_norm: 0.8798531848025388, iteration: 400218
loss: 1.0176953077316284,grad_norm: 0.6697089851310368, iteration: 400219
loss: 0.9724312424659729,grad_norm: 0.8101457209382567, iteration: 400220
loss: 0.9780799150466919,grad_norm: 0.8498915557004744, iteration: 400221
loss: 1.003665804862976,grad_norm: 0.9999996146983929, iteration: 400222
loss: 0.9936568737030029,grad_norm: 0.7568585946229739, iteration: 400223
loss: 0.9872564077377319,grad_norm: 0.825835831449178, iteration: 400224
loss: 1.016953706741333,grad_norm: 0.8111544746960204, iteration: 400225
loss: 1.070798397064209,grad_norm: 0.9999996127086045, iteration: 400226
loss: 0.9939759373664856,grad_norm: 0.7921507930404639, iteration: 400227
loss: 0.9791164398193359,grad_norm: 0.8764521558557651, iteration: 400228
loss: 1.031715750694275,grad_norm: 0.8962824951403954, iteration: 400229
loss: 1.004157543182373,grad_norm: 0.6900571242449398, iteration: 400230
loss: 1.0219612121582031,grad_norm: 0.8963729260240936, iteration: 400231
loss: 1.0172045230865479,grad_norm: 0.7558455187920899, iteration: 400232
loss: 1.312395453453064,grad_norm: 0.9999999261093132, iteration: 400233
loss: 1.0319513082504272,grad_norm: 0.9999999603849207, iteration: 400234
loss: 1.110319972038269,grad_norm: 0.8819247764555933, iteration: 400235
loss: 0.9939090609550476,grad_norm: 0.7738982995503145, iteration: 400236
loss: 0.9672412872314453,grad_norm: 0.9440275137143725, iteration: 400237
loss: 0.9951610565185547,grad_norm: 0.7357904665725323, iteration: 400238
loss: 0.991233766078949,grad_norm: 0.860373664257846, iteration: 400239
loss: 1.0469900369644165,grad_norm: 0.7937650335029767, iteration: 400240
loss: 0.9849361181259155,grad_norm: 0.6753677008275845, iteration: 400241
loss: 1.0144195556640625,grad_norm: 0.6855278989087514, iteration: 400242
loss: 0.9969101548194885,grad_norm: 0.911837395001891, iteration: 400243
loss: 0.9892305135726929,grad_norm: 0.7461833147175674, iteration: 400244
loss: 0.9770836234092712,grad_norm: 0.8371666456720168, iteration: 400245
loss: 0.9693739414215088,grad_norm: 0.7958538948103142, iteration: 400246
loss: 1.0016231536865234,grad_norm: 0.7953277067064272, iteration: 400247
loss: 0.9911571741104126,grad_norm: 0.674206023670854, iteration: 400248
loss: 0.9398345947265625,grad_norm: 0.9105700813697419, iteration: 400249
loss: 1.0311901569366455,grad_norm: 0.9256537983286655, iteration: 400250
loss: 0.9983753561973572,grad_norm: 0.7693336963256354, iteration: 400251
loss: 1.0009020566940308,grad_norm: 0.7374842510309638, iteration: 400252
loss: 0.9696192145347595,grad_norm: 0.7685373481217426, iteration: 400253
loss: 1.0472133159637451,grad_norm: 0.8166288074802938, iteration: 400254
loss: 0.9692693948745728,grad_norm: 0.8148245990984413, iteration: 400255
loss: 0.9838592410087585,grad_norm: 0.9999990419088283, iteration: 400256
loss: 1.0080105066299438,grad_norm: 0.7723149281414943, iteration: 400257
loss: 0.9904080629348755,grad_norm: 0.8637481449693016, iteration: 400258
loss: 1.0007959604263306,grad_norm: 0.727381802211231, iteration: 400259
loss: 1.0770155191421509,grad_norm: 0.9999990492193419, iteration: 400260
loss: 0.9943240880966187,grad_norm: 0.9214817555356009, iteration: 400261
loss: 1.043174147605896,grad_norm: 0.7096863254021951, iteration: 400262
loss: 1.0067896842956543,grad_norm: 0.9727611643866588, iteration: 400263
loss: 1.007116436958313,grad_norm: 0.8692286695762097, iteration: 400264
loss: 0.9997700452804565,grad_norm: 0.7780979777043796, iteration: 400265
loss: 0.9980957508087158,grad_norm: 0.8026715519951714, iteration: 400266
loss: 1.0356760025024414,grad_norm: 0.8864510480190217, iteration: 400267
loss: 1.0241602659225464,grad_norm: 0.9999991531516185, iteration: 400268
loss: 0.9544931650161743,grad_norm: 0.8383103958518892, iteration: 400269
loss: 1.0142390727996826,grad_norm: 0.8560572855456167, iteration: 400270
loss: 1.0117120742797852,grad_norm: 0.7019364327015863, iteration: 400271
loss: 0.9828318953514099,grad_norm: 0.7112666007453988, iteration: 400272
loss: 1.0345985889434814,grad_norm: 0.9999990867315994, iteration: 400273
loss: 1.0043387413024902,grad_norm: 0.7812814069458992, iteration: 400274
loss: 0.9914836883544922,grad_norm: 0.6754890871136859, iteration: 400275
loss: 0.9741391539573669,grad_norm: 0.7703055304317581, iteration: 400276
loss: 0.9972834587097168,grad_norm: 0.8271339426195076, iteration: 400277
loss: 1.0479506254196167,grad_norm: 0.9999999686197979, iteration: 400278
loss: 1.0006595849990845,grad_norm: 0.8083101050754795, iteration: 400279
loss: 0.9405940771102905,grad_norm: 0.9047350163455619, iteration: 400280
loss: 1.014866590499878,grad_norm: 0.7254900472300906, iteration: 400281
loss: 1.006039023399353,grad_norm: 0.6997365554016572, iteration: 400282
loss: 1.0096391439437866,grad_norm: 0.7120469953105774, iteration: 400283
loss: 0.9999972581863403,grad_norm: 0.736715718899464, iteration: 400284
loss: 0.9974360466003418,grad_norm: 0.9999989977432682, iteration: 400285
loss: 1.0244768857955933,grad_norm: 0.6481727044108019, iteration: 400286
loss: 0.9983673095703125,grad_norm: 0.7451312374054965, iteration: 400287
loss: 0.9910779595375061,grad_norm: 0.7511474321624693, iteration: 400288
loss: 1.0324256420135498,grad_norm: 0.5743883520639309, iteration: 400289
loss: 0.9817093014717102,grad_norm: 0.7047008017728994, iteration: 400290
loss: 0.9886337518692017,grad_norm: 0.7127814312294058, iteration: 400291
loss: 1.0482195615768433,grad_norm: 0.8017828285604173, iteration: 400292
loss: 0.9889146685600281,grad_norm: 0.7672400572374349, iteration: 400293
loss: 0.9715324640274048,grad_norm: 0.8400126719978054, iteration: 400294
loss: 0.9986413717269897,grad_norm: 0.8164659913745455, iteration: 400295
loss: 1.002679705619812,grad_norm: 0.9999997795676948, iteration: 400296
loss: 0.9742463827133179,grad_norm: 0.6085138504667498, iteration: 400297
loss: 0.9935575127601624,grad_norm: 0.8130306391586152, iteration: 400298
loss: 1.0353344678878784,grad_norm: 0.9999996556046774, iteration: 400299
loss: 1.0292211771011353,grad_norm: 0.9999993910883644, iteration: 400300
loss: 1.0638861656188965,grad_norm: 0.9999997240305103, iteration: 400301
loss: 0.9992799758911133,grad_norm: 0.9268507506231001, iteration: 400302
loss: 0.984200656414032,grad_norm: 0.6330705029498632, iteration: 400303
loss: 1.0111619234085083,grad_norm: 0.9511924026967459, iteration: 400304
loss: 0.9896609783172607,grad_norm: 0.9186279559201249, iteration: 400305
loss: 1.0044243335723877,grad_norm: 0.7881901239236705, iteration: 400306
loss: 1.0494976043701172,grad_norm: 0.9999997199110573, iteration: 400307
loss: 1.0452277660369873,grad_norm: 0.8360704260542541, iteration: 400308
loss: 0.9886103868484497,grad_norm: 0.7826409346161923, iteration: 400309
loss: 0.9859302639961243,grad_norm: 0.7597000218515686, iteration: 400310
loss: 0.9636585712432861,grad_norm: 0.6824318447865854, iteration: 400311
loss: 1.0774569511413574,grad_norm: 0.8947543876579457, iteration: 400312
loss: 1.048896312713623,grad_norm: 0.9519812930751776, iteration: 400313
loss: 0.9984063506126404,grad_norm: 0.8173355892864842, iteration: 400314
loss: 1.020484209060669,grad_norm: 0.7644700125915769, iteration: 400315
loss: 0.9791095852851868,grad_norm: 0.8783079642022799, iteration: 400316
loss: 0.9693490862846375,grad_norm: 0.6964039556121002, iteration: 400317
loss: 0.9792894124984741,grad_norm: 0.7739574273090946, iteration: 400318
loss: 1.0176926851272583,grad_norm: 0.851759506535673, iteration: 400319
loss: 1.0839924812316895,grad_norm: 0.9999999462332936, iteration: 400320
loss: 0.9983958005905151,grad_norm: 0.7491781691070943, iteration: 400321
loss: 1.015653371810913,grad_norm: 0.7607653066168523, iteration: 400322
loss: 0.9936578869819641,grad_norm: 0.8175122667900222, iteration: 400323
loss: 0.9615281224250793,grad_norm: 0.8180505606994827, iteration: 400324
loss: 0.9996134042739868,grad_norm: 0.9999994115214987, iteration: 400325
loss: 1.0523960590362549,grad_norm: 0.9999990372016594, iteration: 400326
loss: 1.017890214920044,grad_norm: 0.8243468273415113, iteration: 400327
loss: 0.9693527221679688,grad_norm: 0.7573227687579072, iteration: 400328
loss: 1.035933494567871,grad_norm: 0.999999521881381, iteration: 400329
loss: 1.0268700122833252,grad_norm: 0.9999996136242727, iteration: 400330
loss: 0.9783617258071899,grad_norm: 0.779040192169975, iteration: 400331
loss: 0.9953802227973938,grad_norm: 0.7292280831082325, iteration: 400332
loss: 1.021134376525879,grad_norm: 0.7050288774178748, iteration: 400333
loss: 1.0878207683563232,grad_norm: 0.7761975886257929, iteration: 400334
loss: 1.0421156883239746,grad_norm: 0.8858622573088267, iteration: 400335
loss: 1.1665349006652832,grad_norm: 0.9999993800985492, iteration: 400336
loss: 0.9548091888427734,grad_norm: 0.7649639212819752, iteration: 400337
loss: 0.9910449385643005,grad_norm: 0.8192472304777665, iteration: 400338
loss: 0.9787423610687256,grad_norm: 0.9999998181666155, iteration: 400339
loss: 1.0185911655426025,grad_norm: 0.8171589336920703, iteration: 400340
loss: 1.009076714515686,grad_norm: 0.9706896921129564, iteration: 400341
loss: 0.9814140796661377,grad_norm: 0.6825915157552668, iteration: 400342
loss: 0.9996896386146545,grad_norm: 0.8193380547628464, iteration: 400343
loss: 1.0350687503814697,grad_norm: 0.9068817796224775, iteration: 400344
loss: 1.0241296291351318,grad_norm: 0.7737616433354069, iteration: 400345
loss: 1.0040256977081299,grad_norm: 0.8053521488640316, iteration: 400346
loss: 1.0054200887680054,grad_norm: 0.7759793697956124, iteration: 400347
loss: 0.993522047996521,grad_norm: 0.8992118854156754, iteration: 400348
loss: 0.9843834638595581,grad_norm: 0.7411136818291276, iteration: 400349
loss: 0.9666337966918945,grad_norm: 0.8491563496182979, iteration: 400350
loss: 1.038793683052063,grad_norm: 0.8191003897384266, iteration: 400351
loss: 1.02798330783844,grad_norm: 0.7926824865124067, iteration: 400352
loss: 1.080647587776184,grad_norm: 0.9999994587789883, iteration: 400353
loss: 0.9618867039680481,grad_norm: 0.675933813524532, iteration: 400354
loss: 0.9922987222671509,grad_norm: 0.7490579845317601, iteration: 400355
loss: 0.986903727054596,grad_norm: 0.813378607244649, iteration: 400356
loss: 0.9866589903831482,grad_norm: 0.6267789958084717, iteration: 400357
loss: 1.0061172246932983,grad_norm: 0.6787124144572134, iteration: 400358
loss: 1.0155243873596191,grad_norm: 0.9999995463883541, iteration: 400359
loss: 1.0290379524230957,grad_norm: 0.8815754256102349, iteration: 400360
loss: 1.0323336124420166,grad_norm: 0.7761433782881657, iteration: 400361
loss: 1.0025336742401123,grad_norm: 0.8966160198796912, iteration: 400362
loss: 1.051179051399231,grad_norm: 0.9999998373678097, iteration: 400363
loss: 0.980987012386322,grad_norm: 0.7214110862532842, iteration: 400364
loss: 0.9873087406158447,grad_norm: 0.916413057171418, iteration: 400365
loss: 0.9758224487304688,grad_norm: 0.801475572697296, iteration: 400366
loss: 1.0150972604751587,grad_norm: 0.7781833192188238, iteration: 400367
loss: 0.998509407043457,grad_norm: 0.7239601586406563, iteration: 400368
loss: 1.1294747591018677,grad_norm: 0.9999996004398272, iteration: 400369
loss: 1.01911199092865,grad_norm: 0.7758289095758899, iteration: 400370
loss: 0.9753586649894714,grad_norm: 0.7480595466816612, iteration: 400371
loss: 0.9875072240829468,grad_norm: 0.8067431954687891, iteration: 400372
loss: 1.0703939199447632,grad_norm: 0.8827069327068747, iteration: 400373
loss: 1.0285509824752808,grad_norm: 0.7375422586354508, iteration: 400374
loss: 1.0136444568634033,grad_norm: 0.9999997061804757, iteration: 400375
loss: 0.9738566875457764,grad_norm: 0.789776159004332, iteration: 400376
loss: 1.0129497051239014,grad_norm: 0.7257695191684136, iteration: 400377
loss: 1.0035631656646729,grad_norm: 0.999999143878112, iteration: 400378
loss: 1.0158488750457764,grad_norm: 0.6886649063840403, iteration: 400379
loss: 1.0290319919586182,grad_norm: 0.9999991567320643, iteration: 400380
loss: 1.071141004562378,grad_norm: 0.7719133753296095, iteration: 400381
loss: 0.987405002117157,grad_norm: 0.9999991906463895, iteration: 400382
loss: 1.0923612117767334,grad_norm: 0.9999991174095194, iteration: 400383
loss: 1.010323166847229,grad_norm: 0.7621887936009532, iteration: 400384
loss: 1.0435078144073486,grad_norm: 0.9999991320166116, iteration: 400385
loss: 1.0580228567123413,grad_norm: 0.8368680772992569, iteration: 400386
loss: 1.0336941480636597,grad_norm: 0.999999221752007, iteration: 400387
loss: 1.0010359287261963,grad_norm: 0.6936203858827459, iteration: 400388
loss: 0.999483585357666,grad_norm: 0.9999997130019683, iteration: 400389
loss: 1.0081095695495605,grad_norm: 0.8926025290025412, iteration: 400390
loss: 0.9964872002601624,grad_norm: 0.7560201988353523, iteration: 400391
loss: 1.0187993049621582,grad_norm: 0.7799853593562183, iteration: 400392
loss: 0.9948747158050537,grad_norm: 0.7813496435969846, iteration: 400393
loss: 0.9969483017921448,grad_norm: 0.8348274323332195, iteration: 400394
loss: 1.2163206338882446,grad_norm: 0.9999992555379873, iteration: 400395
loss: 1.022853970527649,grad_norm: 0.999999207070134, iteration: 400396
loss: 1.0045452117919922,grad_norm: 0.8070524830351884, iteration: 400397
loss: 0.9820149540901184,grad_norm: 0.9204578091131386, iteration: 400398
loss: 1.0045032501220703,grad_norm: 0.744560273492417, iteration: 400399
loss: 0.9985119104385376,grad_norm: 0.7416450182889605, iteration: 400400
loss: 0.9969848990440369,grad_norm: 0.7769606786397909, iteration: 400401
loss: 1.0274784564971924,grad_norm: 0.7152178287047488, iteration: 400402
loss: 1.1038898229599,grad_norm: 0.9999994657342551, iteration: 400403
loss: 0.995217502117157,grad_norm: 0.9379557607674261, iteration: 400404
loss: 1.013297200202942,grad_norm: 0.8256309900626607, iteration: 400405
loss: 1.005433440208435,grad_norm: 0.9999989931723331, iteration: 400406
loss: 0.9567314386367798,grad_norm: 0.8231778566127957, iteration: 400407
loss: 0.9902113676071167,grad_norm: 0.9769175922460622, iteration: 400408
loss: 0.9954417943954468,grad_norm: 0.7450579834842704, iteration: 400409
loss: 0.9995490312576294,grad_norm: 0.8322191268102486, iteration: 400410
loss: 1.0389018058776855,grad_norm: 0.7972807759738594, iteration: 400411
loss: 0.9809378385543823,grad_norm: 0.7640109618882271, iteration: 400412
loss: 0.9846912026405334,grad_norm: 0.9999991474196603, iteration: 400413
loss: 1.0054551362991333,grad_norm: 0.9999993224306089, iteration: 400414
loss: 1.0074363946914673,grad_norm: 0.8255970160917752, iteration: 400415
loss: 1.0230357646942139,grad_norm: 0.7221233132682207, iteration: 400416
loss: 1.0412553548812866,grad_norm: 0.9999992030323266, iteration: 400417
loss: 0.9764106869697571,grad_norm: 0.8116739356144358, iteration: 400418
loss: 1.004281759262085,grad_norm: 0.7942078240049523, iteration: 400419
loss: 0.9990555644035339,grad_norm: 0.7153326557911782, iteration: 400420
loss: 1.036134123802185,grad_norm: 0.8776901194900396, iteration: 400421
loss: 1.0825077295303345,grad_norm: 0.8900950796562805, iteration: 400422
loss: 1.0309571027755737,grad_norm: 0.9757859859620005, iteration: 400423
loss: 1.0347226858139038,grad_norm: 0.7622815332020872, iteration: 400424
loss: 0.9977618455886841,grad_norm: 0.8532047667223731, iteration: 400425
loss: 1.0079256296157837,grad_norm: 0.9583918276723629, iteration: 400426
loss: 1.0279760360717773,grad_norm: 0.7076516292759597, iteration: 400427
loss: 1.0143831968307495,grad_norm: 0.9999992057355502, iteration: 400428
loss: 1.013237476348877,grad_norm: 0.8515326207149676, iteration: 400429
loss: 1.0158653259277344,grad_norm: 0.8835000007168298, iteration: 400430
loss: 1.0234953165054321,grad_norm: 0.9999990461656499, iteration: 400431
loss: 1.028985619544983,grad_norm: 0.999999208696658, iteration: 400432
loss: 0.9887572526931763,grad_norm: 0.8558217078589095, iteration: 400433
loss: 1.0195188522338867,grad_norm: 0.8242869942696307, iteration: 400434
loss: 1.0248383283615112,grad_norm: 0.9999991675143817, iteration: 400435
loss: 1.0157153606414795,grad_norm: 0.7866476743785846, iteration: 400436
loss: 1.0388559103012085,grad_norm: 0.9011169279674556, iteration: 400437
loss: 0.995037317276001,grad_norm: 0.8547055520516672, iteration: 400438
loss: 0.9985944032669067,grad_norm: 0.8611136996474665, iteration: 400439
loss: 1.0993013381958008,grad_norm: 0.9999998139761215, iteration: 400440
loss: 0.9827461242675781,grad_norm: 0.7144110815853736, iteration: 400441
loss: 0.9995746612548828,grad_norm: 0.7179923429242155, iteration: 400442
loss: 1.0014442205429077,grad_norm: 0.7610837309921722, iteration: 400443
loss: 0.9929441213607788,grad_norm: 0.7452934242752867, iteration: 400444
loss: 1.006742238998413,grad_norm: 0.7496025512861196, iteration: 400445
loss: 1.0526564121246338,grad_norm: 0.8044874189408677, iteration: 400446
loss: 1.035334825515747,grad_norm: 0.8081856991321481, iteration: 400447
loss: 1.0137970447540283,grad_norm: 0.9999999721897329, iteration: 400448
loss: 1.0125068426132202,grad_norm: 0.8849371849013941, iteration: 400449
loss: 1.0251966714859009,grad_norm: 0.8503438201972763, iteration: 400450
loss: 1.007178783416748,grad_norm: 0.8418320423982149, iteration: 400451
loss: 0.9886077046394348,grad_norm: 0.7027419136625531, iteration: 400452
loss: 1.0118399858474731,grad_norm: 0.8795658066304006, iteration: 400453
loss: 1.0575839281082153,grad_norm: 0.8812896420700608, iteration: 400454
loss: 1.000809669494629,grad_norm: 0.8614040364293407, iteration: 400455
loss: 0.9806579351425171,grad_norm: 0.6999024045639155, iteration: 400456
loss: 1.0337512493133545,grad_norm: 0.8026163175359783, iteration: 400457
loss: 0.9694211483001709,grad_norm: 0.8971551884901714, iteration: 400458
loss: 0.9859857559204102,grad_norm: 0.8081182404344139, iteration: 400459
loss: 1.0015923976898193,grad_norm: 0.8093748813117225, iteration: 400460
loss: 1.0027492046356201,grad_norm: 0.690400036015723, iteration: 400461
loss: 0.9984918832778931,grad_norm: 0.7768337897717742, iteration: 400462
loss: 1.000930905342102,grad_norm: 0.9295801110268632, iteration: 400463
loss: 0.9910983443260193,grad_norm: 0.744146311340257, iteration: 400464
loss: 1.0303159952163696,grad_norm: 0.967183995328809, iteration: 400465
loss: 1.000623106956482,grad_norm: 0.8000358870064481, iteration: 400466
loss: 0.9791814684867859,grad_norm: 0.7322242646601744, iteration: 400467
loss: 1.047545313835144,grad_norm: 0.9999994825019388, iteration: 400468
loss: 1.000014305114746,grad_norm: 0.8867551788973806, iteration: 400469
loss: 1.0056235790252686,grad_norm: 0.7001364591443789, iteration: 400470
loss: 1.0503146648406982,grad_norm: 0.9999995073184188, iteration: 400471
loss: 1.020885705947876,grad_norm: 0.7586141106763541, iteration: 400472
loss: 1.0261967182159424,grad_norm: 0.828153163766397, iteration: 400473
loss: 1.0174709558486938,grad_norm: 0.7491126587677059, iteration: 400474
loss: 0.9666139483451843,grad_norm: 0.7521070106064118, iteration: 400475
loss: 1.0060882568359375,grad_norm: 0.6621277052200488, iteration: 400476
loss: 1.0273280143737793,grad_norm: 0.770363471152517, iteration: 400477
loss: 1.0277867317199707,grad_norm: 0.7929663947571406, iteration: 400478
loss: 0.9793987274169922,grad_norm: 0.846722237074444, iteration: 400479
loss: 0.9995110630989075,grad_norm: 0.938204412794945, iteration: 400480
loss: 0.9930754899978638,grad_norm: 0.834893690198762, iteration: 400481
loss: 0.9823099970817566,grad_norm: 0.8233543084733752, iteration: 400482
loss: 0.9802024364471436,grad_norm: 0.8378622656813652, iteration: 400483
loss: 1.0121933221817017,grad_norm: 0.7507411141538147, iteration: 400484
loss: 1.001633882522583,grad_norm: 0.7942162324438932, iteration: 400485
loss: 0.9935846924781799,grad_norm: 0.9999991534456211, iteration: 400486
loss: 0.9994544386863708,grad_norm: 0.8464619735790677, iteration: 400487
loss: 1.0397299528121948,grad_norm: 0.8508984953182929, iteration: 400488
loss: 0.997636079788208,grad_norm: 0.8088400118610332, iteration: 400489
loss: 0.9888009428977966,grad_norm: 0.697125846803031, iteration: 400490
loss: 0.9721413254737854,grad_norm: 0.6672355291428329, iteration: 400491
loss: 1.025353193283081,grad_norm: 0.8465887007498213, iteration: 400492
loss: 1.0005347728729248,grad_norm: 0.7230642955521265, iteration: 400493
loss: 1.0455739498138428,grad_norm: 0.8781802581278689, iteration: 400494
loss: 0.9722748398780823,grad_norm: 0.6922052993761493, iteration: 400495
loss: 1.0107167959213257,grad_norm: 0.8835887061122977, iteration: 400496
loss: 0.9609598517417908,grad_norm: 0.7541641013995148, iteration: 400497
loss: 0.9913874864578247,grad_norm: 0.6565588797661014, iteration: 400498
loss: 1.0044742822647095,grad_norm: 0.7804708198521912, iteration: 400499
loss: 1.0141733884811401,grad_norm: 0.9852091969010698, iteration: 400500
loss: 1.0244455337524414,grad_norm: 0.7621131300889504, iteration: 400501
loss: 1.0589566230773926,grad_norm: 0.9763612480543903, iteration: 400502
loss: 0.99278324842453,grad_norm: 0.8237494440953955, iteration: 400503
loss: 0.9943151473999023,grad_norm: 0.8849726147691593, iteration: 400504
loss: 1.006406545639038,grad_norm: 0.8803366469337887, iteration: 400505
loss: 1.0040457248687744,grad_norm: 0.7538453691815064, iteration: 400506
loss: 1.0322052240371704,grad_norm: 0.8449707691920775, iteration: 400507
loss: 1.0106829404830933,grad_norm: 0.7423751057584816, iteration: 400508
loss: 1.035353183746338,grad_norm: 0.9999998427773196, iteration: 400509
loss: 0.984811544418335,grad_norm: 0.9999994219260108, iteration: 400510
loss: 1.1305465698242188,grad_norm: 0.999999629556471, iteration: 400511
loss: 0.985504150390625,grad_norm: 0.7015727714818202, iteration: 400512
loss: 0.961772084236145,grad_norm: 0.9011555627828051, iteration: 400513
loss: 1.0058993101119995,grad_norm: 0.9414777549675114, iteration: 400514
loss: 0.9860098958015442,grad_norm: 0.7759205934519803, iteration: 400515
loss: 1.0436557531356812,grad_norm: 0.9999997805228442, iteration: 400516
loss: 1.0208702087402344,grad_norm: 0.8358290266758951, iteration: 400517
loss: 0.9896109104156494,grad_norm: 0.8965526103557883, iteration: 400518
loss: 1.0096148252487183,grad_norm: 0.999999573342382, iteration: 400519
loss: 0.9716016054153442,grad_norm: 0.8788217895586049, iteration: 400520
loss: 1.0078262090682983,grad_norm: 0.7922668957303128, iteration: 400521
loss: 1.0310953855514526,grad_norm: 0.9999991292646434, iteration: 400522
loss: 0.9917849898338318,grad_norm: 0.8584943028855956, iteration: 400523
loss: 1.039077639579773,grad_norm: 0.9999992825089323, iteration: 400524
loss: 1.034698247909546,grad_norm: 0.9999991123113328, iteration: 400525
loss: 0.9616641402244568,grad_norm: 0.6617370499272892, iteration: 400526
loss: 1.1171889305114746,grad_norm: 0.8393698220938778, iteration: 400527
loss: 0.9999231100082397,grad_norm: 0.865050671873639, iteration: 400528
loss: 1.0457539558410645,grad_norm: 0.6632345910567962, iteration: 400529
loss: 0.9953147172927856,grad_norm: 0.6707365089915291, iteration: 400530
loss: 1.0009821653366089,grad_norm: 0.7402324299160139, iteration: 400531
loss: 1.1126108169555664,grad_norm: 0.7930800520089132, iteration: 400532
loss: 0.9984406232833862,grad_norm: 0.8021972266798189, iteration: 400533
loss: 1.021553635597229,grad_norm: 0.69681773334674, iteration: 400534
loss: 1.019972801208496,grad_norm: 0.681742638128131, iteration: 400535
loss: 0.9851846694946289,grad_norm: 0.999998930796782, iteration: 400536
loss: 1.045961856842041,grad_norm: 0.8800764703080796, iteration: 400537
loss: 1.0283468961715698,grad_norm: 0.9999990766762158, iteration: 400538
loss: 1.009384036064148,grad_norm: 0.9999990258754091, iteration: 400539
loss: 0.9944946765899658,grad_norm: 0.7938584100487649, iteration: 400540
loss: 0.9737193584442139,grad_norm: 0.9446435066959381, iteration: 400541
loss: 0.9867566227912903,grad_norm: 0.7121121794004708, iteration: 400542
loss: 1.0702513456344604,grad_norm: 0.7768069144535699, iteration: 400543
loss: 1.00759756565094,grad_norm: 0.803286216169361, iteration: 400544
loss: 0.9814023375511169,grad_norm: 0.7734215811408298, iteration: 400545
loss: 0.992313802242279,grad_norm: 0.8549341674229898, iteration: 400546
loss: 1.0121899843215942,grad_norm: 0.8464938886114264, iteration: 400547
loss: 1.0331358909606934,grad_norm: 0.7022354382810396, iteration: 400548
loss: 0.9970657825469971,grad_norm: 0.826931568096695, iteration: 400549
loss: 1.0285576581954956,grad_norm: 0.999999405927265, iteration: 400550
loss: 1.00709867477417,grad_norm: 0.7924994311989548, iteration: 400551
loss: 0.9757698178291321,grad_norm: 0.9999995047548396, iteration: 400552
loss: 0.999715268611908,grad_norm: 0.9999990638065682, iteration: 400553
loss: 1.000177264213562,grad_norm: 0.6498423306087623, iteration: 400554
loss: 0.9977567791938782,grad_norm: 0.8326761851722939, iteration: 400555
loss: 1.0517899990081787,grad_norm: 0.99999987481004, iteration: 400556
loss: 0.987991988658905,grad_norm: 0.8610173763395768, iteration: 400557
loss: 0.9654219150543213,grad_norm: 0.916644277916557, iteration: 400558
loss: 0.9984200596809387,grad_norm: 0.6532943774656428, iteration: 400559
loss: 0.9996904730796814,grad_norm: 0.9999991388160001, iteration: 400560
loss: 0.9881023168563843,grad_norm: 0.7198507312218629, iteration: 400561
loss: 0.9958524107933044,grad_norm: 0.9189422167293049, iteration: 400562
loss: 0.9945431351661682,grad_norm: 0.7528687513126283, iteration: 400563
loss: 0.9889417886734009,grad_norm: 0.8752906175416825, iteration: 400564
loss: 1.037926197052002,grad_norm: 0.8292997161119396, iteration: 400565
loss: 1.0270761251449585,grad_norm: 0.8434761981552076, iteration: 400566
loss: 1.0671732425689697,grad_norm: 0.9999995682327663, iteration: 400567
loss: 1.0021859407424927,grad_norm: 0.7908981038631291, iteration: 400568
loss: 0.9938338398933411,grad_norm: 0.8119919640585129, iteration: 400569
loss: 0.985877513885498,grad_norm: 0.7806578721303449, iteration: 400570
loss: 1.023227572441101,grad_norm: 0.6094982546622835, iteration: 400571
loss: 0.9862505197525024,grad_norm: 0.8514018535307362, iteration: 400572
loss: 0.9967531561851501,grad_norm: 0.9345579686948158, iteration: 400573
loss: 0.9736577272415161,grad_norm: 0.8092739268737204, iteration: 400574
loss: 1.0206190347671509,grad_norm: 0.6861082460924824, iteration: 400575
loss: 1.0048739910125732,grad_norm: 0.8093019171087221, iteration: 400576
loss: 1.0262727737426758,grad_norm: 0.8665200318568361, iteration: 400577
loss: 0.9854638576507568,grad_norm: 0.747141454252994, iteration: 400578
loss: 0.9604367017745972,grad_norm: 0.8210249894166125, iteration: 400579
loss: 1.0186090469360352,grad_norm: 0.9183305864757142, iteration: 400580
loss: 1.0023680925369263,grad_norm: 0.6556852508948771, iteration: 400581
loss: 1.0123834609985352,grad_norm: 0.7111345559717823, iteration: 400582
loss: 1.0226224660873413,grad_norm: 0.8409303968780824, iteration: 400583
loss: 1.0711859464645386,grad_norm: 0.8848684092944614, iteration: 400584
loss: 1.047855019569397,grad_norm: 0.9999999073463098, iteration: 400585
loss: 1.0165555477142334,grad_norm: 0.7925291667952984, iteration: 400586
loss: 0.984399139881134,grad_norm: 0.8144919840599542, iteration: 400587
loss: 0.9896621108055115,grad_norm: 0.8621957464448297, iteration: 400588
loss: 1.151039958000183,grad_norm: 0.9999990902862721, iteration: 400589
loss: 1.0038306713104248,grad_norm: 0.9981899204904775, iteration: 400590
loss: 1.0073281526565552,grad_norm: 0.7378958338794438, iteration: 400591
loss: 1.1102936267852783,grad_norm: 0.9999992445914717, iteration: 400592
loss: 0.9958844780921936,grad_norm: 0.7168014423690555, iteration: 400593
loss: 0.982233464717865,grad_norm: 0.8820564321003479, iteration: 400594
loss: 0.9810548424720764,grad_norm: 0.9176079697346664, iteration: 400595
loss: 0.9848222136497498,grad_norm: 0.7869513850984199, iteration: 400596
loss: 0.9894964098930359,grad_norm: 0.9581338510140791, iteration: 400597
loss: 0.9801821112632751,grad_norm: 0.7223965250000464, iteration: 400598
loss: 1.0017222166061401,grad_norm: 0.7728222638770249, iteration: 400599
loss: 1.015450358390808,grad_norm: 0.754047014869642, iteration: 400600
loss: 1.019899845123291,grad_norm: 0.8623625343855661, iteration: 400601
loss: 1.015592336654663,grad_norm: 0.9159932742685045, iteration: 400602
loss: 0.9780398607254028,grad_norm: 0.7623530916327723, iteration: 400603
loss: 0.975902259349823,grad_norm: 0.7897323824756944, iteration: 400604
loss: 0.986727774143219,grad_norm: 0.8733763665757757, iteration: 400605
loss: 1.1109343767166138,grad_norm: 0.9999993094451978, iteration: 400606
loss: 0.9862157106399536,grad_norm: 0.8673501842235297, iteration: 400607
loss: 1.0026583671569824,grad_norm: 0.9999992445051993, iteration: 400608
loss: 0.9660109877586365,grad_norm: 0.7929430899522314, iteration: 400609
loss: 1.0065916776657104,grad_norm: 0.8191741426217926, iteration: 400610
loss: 1.0252954959869385,grad_norm: 0.8870987136230996, iteration: 400611
loss: 0.9809393882751465,grad_norm: 0.7262716582132352, iteration: 400612
loss: 1.0624542236328125,grad_norm: 0.9687771285714799, iteration: 400613
loss: 0.9741511940956116,grad_norm: 0.8672666165624524, iteration: 400614
loss: 1.0110974311828613,grad_norm: 0.9941949209230082, iteration: 400615
loss: 1.0851260423660278,grad_norm: 0.7364123082923522, iteration: 400616
loss: 1.0060697793960571,grad_norm: 0.8205268281664362, iteration: 400617
loss: 0.9876067042350769,grad_norm: 0.7343780719035112, iteration: 400618
loss: 1.0032087564468384,grad_norm: 0.8917488204408643, iteration: 400619
loss: 1.0295485258102417,grad_norm: 0.7260010192954575, iteration: 400620
loss: 1.0020406246185303,grad_norm: 0.8945978029057529, iteration: 400621
loss: 0.9993851184844971,grad_norm: 0.7287888586923955, iteration: 400622
loss: 1.04172945022583,grad_norm: 0.9999996800730471, iteration: 400623
loss: 0.9503301382064819,grad_norm: 0.79075590917553, iteration: 400624
loss: 1.0310639142990112,grad_norm: 0.9999998088853945, iteration: 400625
loss: 1.0475789308547974,grad_norm: 0.8176444294425464, iteration: 400626
loss: 0.991345226764679,grad_norm: 0.9999991519195216, iteration: 400627
loss: 1.0226446390151978,grad_norm: 0.7221983808926234, iteration: 400628
loss: 1.0113203525543213,grad_norm: 0.8103958566052082, iteration: 400629
loss: 0.9916843771934509,grad_norm: 0.695093877931646, iteration: 400630
loss: 1.024580717086792,grad_norm: 0.9999991560823249, iteration: 400631
loss: 1.0233912467956543,grad_norm: 0.9999990325317272, iteration: 400632
loss: 1.0517610311508179,grad_norm: 0.7493935612427632, iteration: 400633
loss: 0.9980385899543762,grad_norm: 0.8756210885246827, iteration: 400634
loss: 1.0039516687393188,grad_norm: 0.9383971827703207, iteration: 400635
loss: 0.9859265089035034,grad_norm: 0.7166871910944229, iteration: 400636
loss: 0.9890312552452087,grad_norm: 0.7251306513723219, iteration: 400637
loss: 1.0350931882858276,grad_norm: 0.9999993714146774, iteration: 400638
loss: 0.9965642094612122,grad_norm: 0.8239666196453476, iteration: 400639
loss: 1.0672694444656372,grad_norm: 0.8799400658271164, iteration: 400640
loss: 0.9612980484962463,grad_norm: 0.7395813366605742, iteration: 400641
loss: 1.0227510929107666,grad_norm: 0.7287218954460399, iteration: 400642
loss: 1.0206090211868286,grad_norm: 0.7892699971313092, iteration: 400643
loss: 1.0828642845153809,grad_norm: 0.9999992821419678, iteration: 400644
loss: 1.0100717544555664,grad_norm: 0.9068539822186299, iteration: 400645
loss: 1.0493489503860474,grad_norm: 0.7193330790605127, iteration: 400646
loss: 0.9714345335960388,grad_norm: 0.7310643333594818, iteration: 400647
loss: 1.0225673913955688,grad_norm: 0.9294307490957887, iteration: 400648
loss: 1.074264407157898,grad_norm: 0.821170651996474, iteration: 400649
loss: 1.1265809535980225,grad_norm: 0.9710393837231931, iteration: 400650
loss: 0.9763462543487549,grad_norm: 0.8530563738103165, iteration: 400651
loss: 1.0129450559616089,grad_norm: 0.8328759237627895, iteration: 400652
loss: 1.022275686264038,grad_norm: 0.8654147222350862, iteration: 400653
loss: 0.9989283084869385,grad_norm: 0.6394128511554817, iteration: 400654
loss: 0.9733145833015442,grad_norm: 0.936399288845149, iteration: 400655
loss: 1.0365537405014038,grad_norm: 0.7415236576624883, iteration: 400656
loss: 0.9941998720169067,grad_norm: 0.8464323864900559, iteration: 400657
loss: 1.0218003988265991,grad_norm: 0.7873731358429106, iteration: 400658
loss: 1.013668179512024,grad_norm: 0.7511285417487598, iteration: 400659
loss: 1.0420376062393188,grad_norm: 0.9999996364338686, iteration: 400660
loss: 0.98579341173172,grad_norm: 0.958566266552294, iteration: 400661
loss: 1.0075081586837769,grad_norm: 0.9062207366011762, iteration: 400662
loss: 0.9722558259963989,grad_norm: 0.9106383576893134, iteration: 400663
loss: 1.0094935894012451,grad_norm: 0.8555355133293314, iteration: 400664
loss: 1.0239059925079346,grad_norm: 0.9704547393792433, iteration: 400665
loss: 1.0554810762405396,grad_norm: 0.7161771045976857, iteration: 400666
loss: 0.9912170767784119,grad_norm: 0.6636690132016038, iteration: 400667
loss: 1.0350531339645386,grad_norm: 0.9999991178914233, iteration: 400668
loss: 0.9568982720375061,grad_norm: 0.6967198832756955, iteration: 400669
loss: 0.9419403076171875,grad_norm: 0.9062112784018362, iteration: 400670
loss: 1.0220261812210083,grad_norm: 0.9632508453460636, iteration: 400671
loss: 1.0689387321472168,grad_norm: 0.8204008807589578, iteration: 400672
loss: 1.0255210399627686,grad_norm: 0.8846128628355785, iteration: 400673
loss: 1.0587868690490723,grad_norm: 0.999999432942518, iteration: 400674
loss: 1.0002188682556152,grad_norm: 0.7232703797498344, iteration: 400675
loss: 1.0093066692352295,grad_norm: 0.8627298312907006, iteration: 400676
loss: 0.9863041043281555,grad_norm: 0.994514315712409, iteration: 400677
loss: 1.0507142543792725,grad_norm: 0.7662815093953548, iteration: 400678
loss: 0.9994611144065857,grad_norm: 0.7475537841851783, iteration: 400679
loss: 0.9695158004760742,grad_norm: 0.9138917052824195, iteration: 400680
loss: 1.0127242803573608,grad_norm: 0.9999990841650875, iteration: 400681
loss: 0.9913097620010376,grad_norm: 0.9999994030273957, iteration: 400682
loss: 0.9962897300720215,grad_norm: 0.7164880230104391, iteration: 400683
loss: 0.9992890357971191,grad_norm: 0.9999991720649484, iteration: 400684
loss: 1.000830054283142,grad_norm: 0.7809311396973719, iteration: 400685
loss: 1.0799485445022583,grad_norm: 0.8414239058982794, iteration: 400686
loss: 1.0246186256408691,grad_norm: 0.7081196627798709, iteration: 400687
loss: 1.0838747024536133,grad_norm: 0.9999990541628352, iteration: 400688
loss: 1.007943868637085,grad_norm: 0.715165343657496, iteration: 400689
loss: 0.9950700402259827,grad_norm: 0.7172211880553874, iteration: 400690
loss: 1.006766676902771,grad_norm: 0.7307195241122779, iteration: 400691
loss: 0.9737634062767029,grad_norm: 0.8112626127047177, iteration: 400692
loss: 1.0075539350509644,grad_norm: 0.7755714783647107, iteration: 400693
loss: 0.9975065588951111,grad_norm: 0.7275267908549304, iteration: 400694
loss: 1.0050190687179565,grad_norm: 0.9208475383256282, iteration: 400695
loss: 1.0034788846969604,grad_norm: 0.8972540245920441, iteration: 400696
loss: 1.0291905403137207,grad_norm: 0.8412784090529568, iteration: 400697
loss: 0.9273110628128052,grad_norm: 0.7470886413735268, iteration: 400698
loss: 1.0115082263946533,grad_norm: 0.9108850923316231, iteration: 400699
loss: 1.0185850858688354,grad_norm: 0.8598936608007918, iteration: 400700
loss: 1.0553725957870483,grad_norm: 0.940072342730007, iteration: 400701
loss: 1.079671025276184,grad_norm: 0.9999996018597026, iteration: 400702
loss: 0.959646463394165,grad_norm: 0.730056865430839, iteration: 400703
loss: 0.9862416386604309,grad_norm: 0.8542259995954867, iteration: 400704
loss: 0.9982740879058838,grad_norm: 0.8191301940123353, iteration: 400705
loss: 0.9800341129302979,grad_norm: 0.8348303560598397, iteration: 400706
loss: 0.9896487593650818,grad_norm: 0.7510468718485982, iteration: 400707
loss: 0.9979293942451477,grad_norm: 0.7875744768365893, iteration: 400708
loss: 1.0623912811279297,grad_norm: 0.9156675511924004, iteration: 400709
loss: 1.0076795816421509,grad_norm: 0.7009009630159916, iteration: 400710
loss: 1.0127296447753906,grad_norm: 0.7424624188315209, iteration: 400711
loss: 0.9932877421379089,grad_norm: 0.8023210772251934, iteration: 400712
loss: 0.98614501953125,grad_norm: 0.7623679249089229, iteration: 400713
loss: 0.98686283826828,grad_norm: 0.7632066527451992, iteration: 400714
loss: 1.1103112697601318,grad_norm: 0.9999999717880864, iteration: 400715
loss: 1.0132302045822144,grad_norm: 0.9999990916677215, iteration: 400716
loss: 1.0469019412994385,grad_norm: 0.7447906460374818, iteration: 400717
loss: 0.9715490937232971,grad_norm: 0.9999993595440034, iteration: 400718
loss: 1.0335792303085327,grad_norm: 0.719305975099101, iteration: 400719
loss: 1.019270658493042,grad_norm: 0.6691014679504274, iteration: 400720
loss: 0.9756152629852295,grad_norm: 0.7452430444355206, iteration: 400721
loss: 0.9579218626022339,grad_norm: 0.6848739484379575, iteration: 400722
loss: 1.033315896987915,grad_norm: 0.8776646753454758, iteration: 400723
loss: 1.0204154253005981,grad_norm: 0.7602841650785338, iteration: 400724
loss: 1.0189579725265503,grad_norm: 0.7515506240455714, iteration: 400725
loss: 1.075024962425232,grad_norm: 0.8482982289564307, iteration: 400726
loss: 1.0171880722045898,grad_norm: 0.7486601098278699, iteration: 400727
loss: 0.9797395467758179,grad_norm: 0.8556778114763886, iteration: 400728
loss: 0.9985631108283997,grad_norm: 0.837405258211526, iteration: 400729
loss: 1.0028839111328125,grad_norm: 0.8502685882614158, iteration: 400730
loss: 1.0071979761123657,grad_norm: 0.7874015703112826, iteration: 400731
loss: 1.0597585439682007,grad_norm: 0.9999998871511117, iteration: 400732
loss: 0.9847425818443298,grad_norm: 0.7485302406446344, iteration: 400733
loss: 0.9692938923835754,grad_norm: 0.7160975271353875, iteration: 400734
loss: 1.0417039394378662,grad_norm: 0.8233569685358069, iteration: 400735
loss: 1.0004914999008179,grad_norm: 0.8023492777383923, iteration: 400736
loss: 1.029058814048767,grad_norm: 0.7101257453905024, iteration: 400737
loss: 1.095670461654663,grad_norm: 0.9999999983473347, iteration: 400738
loss: 1.018649697303772,grad_norm: 0.9415524286597211, iteration: 400739
loss: 1.011843204498291,grad_norm: 0.9052555210162371, iteration: 400740
loss: 0.9719367623329163,grad_norm: 0.7390343054470072, iteration: 400741
loss: 0.9781345725059509,grad_norm: 0.7704383227049463, iteration: 400742
loss: 0.9815409183502197,grad_norm: 0.7861809348051368, iteration: 400743
loss: 0.9854756593704224,grad_norm: 0.6994658110184301, iteration: 400744
loss: 1.0501564741134644,grad_norm: 0.7803196358082048, iteration: 400745
loss: 1.0528000593185425,grad_norm: 0.7704339933204746, iteration: 400746
loss: 1.1067695617675781,grad_norm: 0.9863599691286304, iteration: 400747
loss: 0.9867643713951111,grad_norm: 0.8636469415276083, iteration: 400748
loss: 1.0045963525772095,grad_norm: 0.785529980607691, iteration: 400749
loss: 0.9888684153556824,grad_norm: 0.846208978516858, iteration: 400750
loss: 1.000376582145691,grad_norm: 0.8704528093139222, iteration: 400751
loss: 1.0034617185592651,grad_norm: 0.764438790293305, iteration: 400752
loss: 0.979278028011322,grad_norm: 0.7522301340692208, iteration: 400753
loss: 1.0567876100540161,grad_norm: 0.9999991775935152, iteration: 400754
loss: 1.031275987625122,grad_norm: 0.7337778559617223, iteration: 400755
loss: 1.010082721710205,grad_norm: 0.6830433070022921, iteration: 400756
loss: 0.9946383833885193,grad_norm: 0.7102047754119718, iteration: 400757
loss: 0.9829846620559692,grad_norm: 0.754011889832799, iteration: 400758
loss: 1.014055848121643,grad_norm: 0.878371419391034, iteration: 400759
loss: 1.0068252086639404,grad_norm: 0.7599200154676149, iteration: 400760
loss: 0.970012903213501,grad_norm: 0.9241390611979059, iteration: 400761
loss: 1.0054939985275269,grad_norm: 0.7328084769971762, iteration: 400762
loss: 0.9887734055519104,grad_norm: 0.7134882441309697, iteration: 400763
loss: 1.0122294425964355,grad_norm: 0.736776705492084, iteration: 400764
loss: 0.9865213632583618,grad_norm: 0.8842875068962905, iteration: 400765
loss: 1.0294322967529297,grad_norm: 0.9149319806216578, iteration: 400766
loss: 0.9850134253501892,grad_norm: 0.7754195973329796, iteration: 400767
loss: 1.009323239326477,grad_norm: 0.8881112756849017, iteration: 400768
loss: 1.041910171508789,grad_norm: 0.7680531314854069, iteration: 400769
loss: 1.0055451393127441,grad_norm: 0.7350429143652859, iteration: 400770
loss: 0.9810640811920166,grad_norm: 0.7765948152905459, iteration: 400771
loss: 0.9731571078300476,grad_norm: 0.9172662215711569, iteration: 400772
loss: 1.003491997718811,grad_norm: 0.7573057498394624, iteration: 400773
loss: 1.016087532043457,grad_norm: 0.7403016070228706, iteration: 400774
loss: 0.9901277422904968,grad_norm: 0.8665808749554689, iteration: 400775
loss: 1.0311135053634644,grad_norm: 0.8850336194331409, iteration: 400776
loss: 1.0027806758880615,grad_norm: 0.8689796568585948, iteration: 400777
loss: 1.0022871494293213,grad_norm: 0.9864981646769301, iteration: 400778
loss: 1.0310677289962769,grad_norm: 0.9999993926617202, iteration: 400779
loss: 0.9910939931869507,grad_norm: 0.7727902825945404, iteration: 400780
loss: 1.0092531442642212,grad_norm: 0.6930820507264419, iteration: 400781
loss: 0.9811214804649353,grad_norm: 0.7777783641958261, iteration: 400782
loss: 0.9703748822212219,grad_norm: 0.6707711012202541, iteration: 400783
loss: 0.9968733191490173,grad_norm: 0.7561146348293293, iteration: 400784
loss: 1.0438108444213867,grad_norm: 0.8501261218385029, iteration: 400785
loss: 1.009761095046997,grad_norm: 0.9593405512103078, iteration: 400786
loss: 1.0099433660507202,grad_norm: 0.7718916064575562, iteration: 400787
loss: 1.0037730932235718,grad_norm: 0.8741108677925437, iteration: 400788
loss: 1.0073916912078857,grad_norm: 0.87165982586055, iteration: 400789
loss: 1.0049316883087158,grad_norm: 0.8156509930602152, iteration: 400790
loss: 1.0889931917190552,grad_norm: 0.9999995563134959, iteration: 400791
loss: 0.9831400513648987,grad_norm: 0.7728716679788888, iteration: 400792
loss: 1.1028778553009033,grad_norm: 0.9999992436165827, iteration: 400793
loss: 0.9911727905273438,grad_norm: 0.7188689237396007, iteration: 400794
loss: 1.012441635131836,grad_norm: 0.741568798634315, iteration: 400795
loss: 1.0692675113677979,grad_norm: 0.9502030878615378, iteration: 400796
loss: 0.9733858108520508,grad_norm: 0.7704059674532866, iteration: 400797
loss: 1.0145364999771118,grad_norm: 0.7223878959936442, iteration: 400798
loss: 1.003442645072937,grad_norm: 0.9167975437658193, iteration: 400799
loss: 1.0148727893829346,grad_norm: 0.7401009141367494, iteration: 400800
loss: 0.9669037461280823,grad_norm: 0.7245949527409167, iteration: 400801
loss: 1.016829490661621,grad_norm: 0.8174611749045694, iteration: 400802
loss: 1.019526481628418,grad_norm: 0.6777831904692431, iteration: 400803
loss: 0.9644270539283752,grad_norm: 0.7314323361401558, iteration: 400804
loss: 1.0214500427246094,grad_norm: 0.8909415833278967, iteration: 400805
loss: 0.981499433517456,grad_norm: 0.6570490945584966, iteration: 400806
loss: 1.043587327003479,grad_norm: 0.786420105208699, iteration: 400807
loss: 0.9734750390052795,grad_norm: 0.7536052713995718, iteration: 400808
loss: 0.9713606834411621,grad_norm: 0.7046652524524514, iteration: 400809
loss: 1.0045452117919922,grad_norm: 0.6806364214190149, iteration: 400810
loss: 1.0154411792755127,grad_norm: 0.8520521949373697, iteration: 400811
loss: 1.006089210510254,grad_norm: 0.894768189720972, iteration: 400812
loss: 0.9627584218978882,grad_norm: 0.739924246819584, iteration: 400813
loss: 0.9851576685905457,grad_norm: 0.7368711343076594, iteration: 400814
loss: 1.0242244005203247,grad_norm: 0.7590770113773075, iteration: 400815
loss: 1.026461124420166,grad_norm: 0.719117681580444, iteration: 400816
loss: 1.0482836961746216,grad_norm: 0.8647305106549915, iteration: 400817
loss: 1.0412980318069458,grad_norm: 0.8337179567047164, iteration: 400818
loss: 1.0317195653915405,grad_norm: 0.9999996993400678, iteration: 400819
loss: 1.0454683303833008,grad_norm: 0.999999785278536, iteration: 400820
loss: 1.0942984819412231,grad_norm: 0.9999993866122198, iteration: 400821
loss: 0.9876439571380615,grad_norm: 0.9999991544161704, iteration: 400822
loss: 1.0091551542282104,grad_norm: 0.7983387158538766, iteration: 400823
loss: 0.9705508351325989,grad_norm: 0.7432847437910463, iteration: 400824
loss: 0.9754620790481567,grad_norm: 0.8109140170666338, iteration: 400825
loss: 1.0530917644500732,grad_norm: 0.9153285738610748, iteration: 400826
loss: 1.0067346096038818,grad_norm: 0.8571856535539973, iteration: 400827
loss: 0.9458290934562683,grad_norm: 0.8251026896694105, iteration: 400828
loss: 1.010849952697754,grad_norm: 0.6798407220382472, iteration: 400829
loss: 0.9826209545135498,grad_norm: 0.864661542060985, iteration: 400830
loss: 1.032557487487793,grad_norm: 0.7230805919474683, iteration: 400831
loss: 1.0585827827453613,grad_norm: 0.999999290140992, iteration: 400832
loss: 1.0005125999450684,grad_norm: 0.8777250598721632, iteration: 400833
loss: 1.0042511224746704,grad_norm: 0.8331818114688467, iteration: 400834
loss: 1.005798101425171,grad_norm: 0.7780381729678308, iteration: 400835
loss: 1.0095914602279663,grad_norm: 0.9581474083449469, iteration: 400836
loss: 1.0169939994812012,grad_norm: 0.7959382652920739, iteration: 400837
loss: 1.0501497983932495,grad_norm: 0.9285361862381446, iteration: 400838
loss: 0.9906708598136902,grad_norm: 0.7379647692003153, iteration: 400839
loss: 1.0129441022872925,grad_norm: 0.7843868129037134, iteration: 400840
loss: 0.9931704998016357,grad_norm: 0.7447542581109172, iteration: 400841
loss: 0.9792493581771851,grad_norm: 0.7098445862426007, iteration: 400842
loss: 0.9772863984107971,grad_norm: 0.6890676998435707, iteration: 400843
loss: 0.944313108921051,grad_norm: 0.7306177364610198, iteration: 400844
loss: 1.0072101354599,grad_norm: 0.8088687163018393, iteration: 400845
loss: 0.9769892692565918,grad_norm: 0.8853306898391909, iteration: 400846
loss: 1.1318585872650146,grad_norm: 0.8383003671929125, iteration: 400847
loss: 0.967156708240509,grad_norm: 0.7279594903508114, iteration: 400848
loss: 1.010550856590271,grad_norm: 0.6170311294199904, iteration: 400849
loss: 0.9792942404747009,grad_norm: 0.7450843248192587, iteration: 400850
loss: 1.0005443096160889,grad_norm: 0.8444421712695335, iteration: 400851
loss: 1.0085926055908203,grad_norm: 0.6790406048473041, iteration: 400852
loss: 1.0353084802627563,grad_norm: 0.9999992553746271, iteration: 400853
loss: 0.9992247223854065,grad_norm: 0.7656432111238859, iteration: 400854
loss: 0.9880983829498291,grad_norm: 0.7120630652738146, iteration: 400855
loss: 0.9738869071006775,grad_norm: 0.8067879670128606, iteration: 400856
loss: 1.0279731750488281,grad_norm: 0.8044077509313119, iteration: 400857
loss: 1.0080491304397583,grad_norm: 0.7617324629113323, iteration: 400858
loss: 0.9867105484008789,grad_norm: 0.7483206912889689, iteration: 400859
loss: 1.1560242176055908,grad_norm: 0.9999995826915927, iteration: 400860
loss: 0.9899255633354187,grad_norm: 0.680479076851828, iteration: 400861
loss: 1.112046718597412,grad_norm: 0.9999989609755265, iteration: 400862
loss: 1.0514612197875977,grad_norm: 0.7990866890215766, iteration: 400863
loss: 0.9981943964958191,grad_norm: 0.70071340179054, iteration: 400864
loss: 1.0119948387145996,grad_norm: 0.7461537812040805, iteration: 400865
loss: 0.9856072068214417,grad_norm: 0.859683664068819, iteration: 400866
loss: 0.9844357371330261,grad_norm: 0.7603085478362938, iteration: 400867
loss: 0.9966573715209961,grad_norm: 0.7357610517342323, iteration: 400868
loss: 1.0157055854797363,grad_norm: 0.7715405285576961, iteration: 400869
loss: 1.0259103775024414,grad_norm: 0.8544393208493448, iteration: 400870
loss: 0.9808729887008667,grad_norm: 0.8185363922203246, iteration: 400871
loss: 1.0154826641082764,grad_norm: 0.9581574180631108, iteration: 400872
loss: 0.9987874031066895,grad_norm: 0.7721741415546564, iteration: 400873
loss: 1.0012770891189575,grad_norm: 0.8090191040146805, iteration: 400874
loss: 1.0260365009307861,grad_norm: 0.8404060675150821, iteration: 400875
loss: 0.9811557531356812,grad_norm: 0.7772840126866759, iteration: 400876
loss: 1.0182547569274902,grad_norm: 0.9172854319061183, iteration: 400877
loss: 0.9798535704612732,grad_norm: 0.6538614654143224, iteration: 400878
loss: 0.9799142479896545,grad_norm: 0.7531739548394448, iteration: 400879
loss: 0.9547373056411743,grad_norm: 0.9484836468526457, iteration: 400880
loss: 0.9890291094779968,grad_norm: 0.6728517233927928, iteration: 400881
loss: 0.9935486316680908,grad_norm: 0.9999992843215614, iteration: 400882
loss: 1.004922866821289,grad_norm: 0.7486896863417376, iteration: 400883
loss: 1.0008710622787476,grad_norm: 0.6672218041002229, iteration: 400884
loss: 0.9795262217521667,grad_norm: 0.7566851214691785, iteration: 400885
loss: 0.998069703578949,grad_norm: 0.999999800191518, iteration: 400886
loss: 1.0047961473464966,grad_norm: 0.9999991716890164, iteration: 400887
loss: 1.0360350608825684,grad_norm: 0.7969753134964155, iteration: 400888
loss: 1.077770471572876,grad_norm: 0.9999992817206333, iteration: 400889
loss: 1.019896388053894,grad_norm: 0.9553610855226591, iteration: 400890
loss: 0.9792428612709045,grad_norm: 0.7661875455703641, iteration: 400891
loss: 0.9924576282501221,grad_norm: 0.7895381517797724, iteration: 400892
loss: 1.0160040855407715,grad_norm: 0.9999996228192506, iteration: 400893
loss: 1.0375748872756958,grad_norm: 0.9051328598314631, iteration: 400894
loss: 1.0017294883728027,grad_norm: 0.8566732807167274, iteration: 400895
loss: 1.0105509757995605,grad_norm: 0.9788073158798329, iteration: 400896
loss: 1.0713037252426147,grad_norm: 0.9999993290231123, iteration: 400897
loss: 0.9421988129615784,grad_norm: 0.8300140889578086, iteration: 400898
loss: 1.013137936592102,grad_norm: 0.816827865675159, iteration: 400899
loss: 1.0125261545181274,grad_norm: 0.8411240272728209, iteration: 400900
loss: 0.9619530439376831,grad_norm: 0.7140255625816845, iteration: 400901
loss: 0.9930163621902466,grad_norm: 0.9999998452109394, iteration: 400902
loss: 1.0088908672332764,grad_norm: 0.7668537030746385, iteration: 400903
loss: 0.9828984141349792,grad_norm: 0.780117730389392, iteration: 400904
loss: 1.0051084756851196,grad_norm: 0.7811222935924521, iteration: 400905
loss: 1.0116479396820068,grad_norm: 0.9999991145057691, iteration: 400906
loss: 0.9979938864707947,grad_norm: 0.6918933255596296, iteration: 400907
loss: 0.9938429594039917,grad_norm: 0.7483244876521491, iteration: 400908
loss: 1.0089962482452393,grad_norm: 0.7020541910190823, iteration: 400909
loss: 1.0142894983291626,grad_norm: 0.7652403992990683, iteration: 400910
loss: 0.9951512217521667,grad_norm: 0.8037305401902008, iteration: 400911
loss: 1.0024363994598389,grad_norm: 0.8420809359167575, iteration: 400912
loss: 0.9718756675720215,grad_norm: 0.7020610173176839, iteration: 400913
loss: 0.9895958304405212,grad_norm: 0.788046760823368, iteration: 400914
loss: 1.0014723539352417,grad_norm: 0.7212489845519945, iteration: 400915
loss: 0.9969095587730408,grad_norm: 0.723429224356927, iteration: 400916
loss: 1.006244421005249,grad_norm: 0.7944414329264013, iteration: 400917
loss: 1.0082868337631226,grad_norm: 0.6158061028505423, iteration: 400918
loss: 0.9758493304252625,grad_norm: 0.7525777999391393, iteration: 400919
loss: 0.9932150840759277,grad_norm: 0.8588535236290505, iteration: 400920
loss: 0.9919889569282532,grad_norm: 0.8708724355586717, iteration: 400921
loss: 1.001400113105774,grad_norm: 0.9258158144262487, iteration: 400922
loss: 1.0025098323822021,grad_norm: 0.821147567289896, iteration: 400923
loss: 1.0025159120559692,grad_norm: 0.680762443094951, iteration: 400924
loss: 0.9904680848121643,grad_norm: 0.7364176100574933, iteration: 400925
loss: 1.0225684642791748,grad_norm: 0.984920839722761, iteration: 400926
loss: 1.014435887336731,grad_norm: 0.7647130920589452, iteration: 400927
loss: 0.9912726283073425,grad_norm: 0.7691561726907304, iteration: 400928
loss: 1.00702965259552,grad_norm: 0.9999996731736503, iteration: 400929
loss: 1.0415631532669067,grad_norm: 0.7688682247746598, iteration: 400930
loss: 1.0127211809158325,grad_norm: 0.8692798578754974, iteration: 400931
loss: 0.9843907356262207,grad_norm: 0.7761564144436284, iteration: 400932
loss: 1.0438315868377686,grad_norm: 0.8440176512569222, iteration: 400933
loss: 1.0142700672149658,grad_norm: 0.9953405622929968, iteration: 400934
loss: 1.0305815935134888,grad_norm: 0.9614705700891574, iteration: 400935
loss: 0.9956174492835999,grad_norm: 0.8326397006305369, iteration: 400936
loss: 0.9699663519859314,grad_norm: 0.8364094507614241, iteration: 400937
loss: 0.9757464528083801,grad_norm: 0.7830325427890765, iteration: 400938
loss: 1.0094032287597656,grad_norm: 0.6709720205577743, iteration: 400939
loss: 0.9968267679214478,grad_norm: 0.9999991743695485, iteration: 400940
loss: 0.9937739372253418,grad_norm: 0.9999993311967866, iteration: 400941
loss: 1.041639804840088,grad_norm: 0.9075572472174495, iteration: 400942
loss: 1.0607184171676636,grad_norm: 0.7522244505595823, iteration: 400943
loss: 0.9886135458946228,grad_norm: 0.8050488328750741, iteration: 400944
loss: 0.9966888427734375,grad_norm: 0.9312328084204076, iteration: 400945
loss: 0.9984560608863831,grad_norm: 0.7930140116902663, iteration: 400946
loss: 0.992158055305481,grad_norm: 0.7109227950494579, iteration: 400947
loss: 1.047393560409546,grad_norm: 0.9999992701852666, iteration: 400948
loss: 0.9421494603157043,grad_norm: 0.9999992064143085, iteration: 400949
loss: 0.9920016527175903,grad_norm: 0.9164315713557168, iteration: 400950
loss: 1.0307533740997314,grad_norm: 0.8352862427872324, iteration: 400951
loss: 1.065513253211975,grad_norm: 0.6836453711309732, iteration: 400952
loss: 0.9915793538093567,grad_norm: 0.999999399675787, iteration: 400953
loss: 0.9912117719650269,grad_norm: 0.9999991012200138, iteration: 400954
loss: 1.0164058208465576,grad_norm: 0.8143500919261595, iteration: 400955
loss: 1.01149320602417,grad_norm: 0.9006497540906372, iteration: 400956
loss: 0.9972780346870422,grad_norm: 0.7152145435531408, iteration: 400957
loss: 0.9905397295951843,grad_norm: 0.7976626361193068, iteration: 400958
loss: 0.9795251488685608,grad_norm: 0.9995913412526408, iteration: 400959
loss: 1.0192420482635498,grad_norm: 0.8215290974444528, iteration: 400960
loss: 1.001814603805542,grad_norm: 0.866732936698672, iteration: 400961
loss: 0.9879579544067383,grad_norm: 0.7205488923713602, iteration: 400962
loss: 0.9933632016181946,grad_norm: 0.6633605603884319, iteration: 400963
loss: 0.9953663349151611,grad_norm: 0.891596185295645, iteration: 400964
loss: 0.9457698464393616,grad_norm: 0.7741537198970724, iteration: 400965
loss: 1.0212819576263428,grad_norm: 0.7492574045371028, iteration: 400966
loss: 1.0673301219940186,grad_norm: 0.8107990342420031, iteration: 400967
loss: 1.0027810335159302,grad_norm: 0.8031578203887176, iteration: 400968
loss: 1.0103483200073242,grad_norm: 0.8958937784987584, iteration: 400969
loss: 0.9973447918891907,grad_norm: 0.8311760617823348, iteration: 400970
loss: 1.0069169998168945,grad_norm: 0.9999990573217391, iteration: 400971
loss: 0.999629557132721,grad_norm: 0.7138449038531447, iteration: 400972
loss: 0.9684266448020935,grad_norm: 0.6601365240842644, iteration: 400973
loss: 1.0082805156707764,grad_norm: 0.7872290880755561, iteration: 400974
loss: 1.0133501291275024,grad_norm: 0.9327755437398567, iteration: 400975
loss: 1.0285851955413818,grad_norm: 0.8420641186954557, iteration: 400976
loss: 0.9734663963317871,grad_norm: 0.9642323083658498, iteration: 400977
loss: 0.9790295362472534,grad_norm: 0.8438885518487932, iteration: 400978
loss: 0.9500423669815063,grad_norm: 0.7213471351688923, iteration: 400979
loss: 1.0097875595092773,grad_norm: 0.884695100160763, iteration: 400980
loss: 1.03609037399292,grad_norm: 0.7972113386880033, iteration: 400981
loss: 1.0095019340515137,grad_norm: 0.870484871246721, iteration: 400982
loss: 1.0039857625961304,grad_norm: 0.872414926125535, iteration: 400983
loss: 0.9610744118690491,grad_norm: 0.7461306016284744, iteration: 400984
loss: 0.9574747681617737,grad_norm: 0.815076134771808, iteration: 400985
loss: 0.9991785287857056,grad_norm: 0.7624712748817388, iteration: 400986
loss: 1.0297795534133911,grad_norm: 0.8239015432496779, iteration: 400987
loss: 0.9840304255485535,grad_norm: 0.7624526130456534, iteration: 400988
loss: 0.9930272102355957,grad_norm: 0.8041636270326223, iteration: 400989
loss: 1.0309009552001953,grad_norm: 0.7687092830268837, iteration: 400990
loss: 1.0204051733016968,grad_norm: 0.999999278016573, iteration: 400991
loss: 1.0245163440704346,grad_norm: 0.87459523789037, iteration: 400992
loss: 1.047620415687561,grad_norm: 0.8301181971425403, iteration: 400993
loss: 1.0007740259170532,grad_norm: 0.7348110864203752, iteration: 400994
loss: 1.0371911525726318,grad_norm: 0.9263827588355248, iteration: 400995
loss: 1.0187751054763794,grad_norm: 0.6987841653854613, iteration: 400996
loss: 0.9976366758346558,grad_norm: 0.791993698808527, iteration: 400997
loss: 1.0382331609725952,grad_norm: 0.9999989862693347, iteration: 400998
loss: 1.0213961601257324,grad_norm: 0.7901075347744314, iteration: 400999
loss: 0.9729921817779541,grad_norm: 0.7100088545989222, iteration: 401000
loss: 0.9880002737045288,grad_norm: 0.9341027119839732, iteration: 401001
loss: 0.9976993799209595,grad_norm: 0.7813760898040876, iteration: 401002
loss: 1.0200953483581543,grad_norm: 0.8399701213618774, iteration: 401003
loss: 1.030328631401062,grad_norm: 0.8266146027368577, iteration: 401004
loss: 0.9660863876342773,grad_norm: 0.6628009424080384, iteration: 401005
loss: 1.0092871189117432,grad_norm: 0.6502002225351285, iteration: 401006
loss: 1.0089747905731201,grad_norm: 0.9999991042715878, iteration: 401007
loss: 1.005294919013977,grad_norm: 0.6898501683551396, iteration: 401008
loss: 0.9854401350021362,grad_norm: 0.8584406871554019, iteration: 401009
loss: 1.018637776374817,grad_norm: 0.8713344682382511, iteration: 401010
loss: 1.0019433498382568,grad_norm: 0.8703902624450557, iteration: 401011
loss: 0.95789635181427,grad_norm: 0.7096869814345861, iteration: 401012
loss: 0.9873329401016235,grad_norm: 0.9999995000066606, iteration: 401013
loss: 1.0187857151031494,grad_norm: 0.9679031923336192, iteration: 401014
loss: 0.9909669160842896,grad_norm: 0.7927878561986554, iteration: 401015
loss: 0.9904820919036865,grad_norm: 0.7863710385847954, iteration: 401016
loss: 1.003360390663147,grad_norm: 0.8417011313083288, iteration: 401017
loss: 0.9672713875770569,grad_norm: 0.8319442141900814, iteration: 401018
loss: 1.0270769596099854,grad_norm: 0.9075500931723246, iteration: 401019
loss: 1.042755126953125,grad_norm: 0.901280132180724, iteration: 401020
loss: 1.0458799600601196,grad_norm: 0.9460663949526621, iteration: 401021
loss: 0.9700688719749451,grad_norm: 0.7797657677826918, iteration: 401022
loss: 1.0253270864486694,grad_norm: 0.9409188540349092, iteration: 401023
loss: 0.9895208477973938,grad_norm: 0.8151040779624458, iteration: 401024
loss: 1.0141208171844482,grad_norm: 0.8465015126246976, iteration: 401025
loss: 1.0353258848190308,grad_norm: 0.8092192680654842, iteration: 401026
loss: 0.9799920320510864,grad_norm: 0.7643046096246388, iteration: 401027
loss: 1.0016230344772339,grad_norm: 0.6930145757350524, iteration: 401028
loss: 1.0270341634750366,grad_norm: 0.9388659449132191, iteration: 401029
loss: 0.9977297186851501,grad_norm: 0.6872034656281596, iteration: 401030
loss: 1.0074491500854492,grad_norm: 0.8482932755992567, iteration: 401031
loss: 0.9824979305267334,grad_norm: 0.7288586538370341, iteration: 401032
loss: 1.0044537782669067,grad_norm: 0.9999990770367556, iteration: 401033
loss: 1.0956766605377197,grad_norm: 0.9999999540547517, iteration: 401034
loss: 0.9923246502876282,grad_norm: 0.7673846321691703, iteration: 401035
loss: 0.9966344237327576,grad_norm: 0.8365431016287141, iteration: 401036
loss: 1.0007365942001343,grad_norm: 0.850728190061681, iteration: 401037
loss: 1.0071581602096558,grad_norm: 0.7322573210122713, iteration: 401038
loss: 0.9728614687919617,grad_norm: 0.8374692647185145, iteration: 401039
loss: 0.9788575172424316,grad_norm: 0.9999992048045373, iteration: 401040
loss: 1.0109695196151733,grad_norm: 0.7808073430240325, iteration: 401041
loss: 1.0418601036071777,grad_norm: 0.8125175590945994, iteration: 401042
loss: 0.9741093516349792,grad_norm: 0.9999992310437089, iteration: 401043
loss: 1.0137308835983276,grad_norm: 0.9533419564488927, iteration: 401044
loss: 1.030938982963562,grad_norm: 0.9397115288780707, iteration: 401045
loss: 0.9889131784439087,grad_norm: 0.807853398187956, iteration: 401046
loss: 0.976951003074646,grad_norm: 0.803381099047834, iteration: 401047
loss: 1.0140821933746338,grad_norm: 0.9144574417999078, iteration: 401048
loss: 1.0037133693695068,grad_norm: 0.8144494768890805, iteration: 401049
loss: 0.9959075450897217,grad_norm: 0.8343241035007151, iteration: 401050
loss: 0.9876022338867188,grad_norm: 0.8468911115288953, iteration: 401051
loss: 0.9835146069526672,grad_norm: 0.8503625464371718, iteration: 401052
loss: 1.0309512615203857,grad_norm: 0.7643880978041008, iteration: 401053
loss: 0.9844937920570374,grad_norm: 0.6946478882094, iteration: 401054
loss: 1.0025444030761719,grad_norm: 0.8365868094251561, iteration: 401055
loss: 0.969674825668335,grad_norm: 0.8952509098905118, iteration: 401056
loss: 0.9959713816642761,grad_norm: 0.9091347896524499, iteration: 401057
loss: 0.9797755479812622,grad_norm: 0.7772743101308704, iteration: 401058
loss: 0.974419891834259,grad_norm: 0.6826370985418103, iteration: 401059
loss: 0.9811655282974243,grad_norm: 0.8192762256409394, iteration: 401060
loss: 0.9707044959068298,grad_norm: 0.838401374682971, iteration: 401061
loss: 1.0300569534301758,grad_norm: 0.6693729640352237, iteration: 401062
loss: 0.9956910610198975,grad_norm: 0.7707232250836308, iteration: 401063
loss: 1.0778372287750244,grad_norm: 0.931323631608193, iteration: 401064
loss: 1.1412782669067383,grad_norm: 0.9999995843337642, iteration: 401065
loss: 1.0372356176376343,grad_norm: 0.968151890447667, iteration: 401066
loss: 0.9885708093643188,grad_norm: 0.8200469517295655, iteration: 401067
loss: 0.9909886717796326,grad_norm: 0.7103743716468243, iteration: 401068
loss: 0.9828793406486511,grad_norm: 0.8381412296366884, iteration: 401069
loss: 1.0338737964630127,grad_norm: 0.9999991880353932, iteration: 401070
loss: 0.9731130599975586,grad_norm: 0.8153103686399672, iteration: 401071
loss: 1.0138115882873535,grad_norm: 0.7221360465257592, iteration: 401072
loss: 1.0236387252807617,grad_norm: 0.87521512822304, iteration: 401073
loss: 1.0098854303359985,grad_norm: 0.9999992134141018, iteration: 401074
loss: 1.0818883180618286,grad_norm: 0.9999992230841767, iteration: 401075
loss: 0.9911455512046814,grad_norm: 0.6700208456087621, iteration: 401076
loss: 0.9745368361473083,grad_norm: 0.6972087504581641, iteration: 401077
loss: 0.9884123802185059,grad_norm: 0.9058211470662741, iteration: 401078
loss: 1.0425134897232056,grad_norm: 0.9999996330370386, iteration: 401079
loss: 1.0414187908172607,grad_norm: 0.5925155851882933, iteration: 401080
loss: 0.9730336666107178,grad_norm: 0.9999993848870263, iteration: 401081
loss: 0.9817060232162476,grad_norm: 0.8966085687130347, iteration: 401082
loss: 1.0322657823562622,grad_norm: 0.9831639419360381, iteration: 401083
loss: 1.0482417345046997,grad_norm: 0.7433345704084763, iteration: 401084
loss: 1.0086722373962402,grad_norm: 0.7533702780361327, iteration: 401085
loss: 1.0022709369659424,grad_norm: 0.7812417658400813, iteration: 401086
loss: 1.0533231496810913,grad_norm: 0.8830332536429616, iteration: 401087
loss: 1.0081804990768433,grad_norm: 0.8561237321557863, iteration: 401088
loss: 0.9940271377563477,grad_norm: 0.7165798196418384, iteration: 401089
loss: 1.016777753829956,grad_norm: 0.945374346593534, iteration: 401090
loss: 1.0240700244903564,grad_norm: 0.7436616461662005, iteration: 401091
loss: 1.036542534828186,grad_norm: 0.9563053550337264, iteration: 401092
loss: 0.9937445521354675,grad_norm: 0.7354572469183243, iteration: 401093
loss: 1.020317792892456,grad_norm: 0.890138967407776, iteration: 401094
loss: 1.0256773233413696,grad_norm: 0.8588655958038842, iteration: 401095
loss: 1.011605143547058,grad_norm: 0.7706256805983678, iteration: 401096
loss: 1.0104146003723145,grad_norm: 0.7373898446418368, iteration: 401097
loss: 1.0206774473190308,grad_norm: 0.778286199247661, iteration: 401098
loss: 0.9668639898300171,grad_norm: 0.7857592916122255, iteration: 401099
loss: 1.0121232271194458,grad_norm: 0.7808160559929266, iteration: 401100
loss: 0.9941082000732422,grad_norm: 0.7179013678228253, iteration: 401101
loss: 1.0676220655441284,grad_norm: 0.8137478497722246, iteration: 401102
loss: 0.991722822189331,grad_norm: 0.7712027707586354, iteration: 401103
loss: 0.9741235375404358,grad_norm: 0.7438072828281329, iteration: 401104
loss: 1.044930100440979,grad_norm: 0.9220813572522099, iteration: 401105
loss: 1.068993091583252,grad_norm: 0.9999991005718701, iteration: 401106
loss: 0.9815034866333008,grad_norm: 0.7265013266776357, iteration: 401107
loss: 1.0194058418273926,grad_norm: 0.931575143977529, iteration: 401108
loss: 1.0349169969558716,grad_norm: 0.845335322161454, iteration: 401109
loss: 1.0022963285446167,grad_norm: 0.7529338701544483, iteration: 401110
loss: 1.0066381692886353,grad_norm: 0.7445180447342311, iteration: 401111
loss: 0.9917197823524475,grad_norm: 0.8580070297740986, iteration: 401112
loss: 0.9591221809387207,grad_norm: 0.9469832680970872, iteration: 401113
loss: 0.9634133577346802,grad_norm: 0.8490944006587966, iteration: 401114
loss: 1.0016733407974243,grad_norm: 0.8556031800012689, iteration: 401115
loss: 1.0091272592544556,grad_norm: 0.8656370394266718, iteration: 401116
loss: 1.0077522993087769,grad_norm: 0.8087120380754044, iteration: 401117
loss: 1.0073996782302856,grad_norm: 0.999999169386866, iteration: 401118
loss: 1.002612829208374,grad_norm: 0.782756416179343, iteration: 401119
loss: 1.0242962837219238,grad_norm: 0.7001852610120012, iteration: 401120
loss: 0.9989259839057922,grad_norm: 0.7253480865924744, iteration: 401121
loss: 0.9696799516677856,grad_norm: 0.804583702959489, iteration: 401122
loss: 1.0306520462036133,grad_norm: 0.8440890929214965, iteration: 401123
loss: 0.977297306060791,grad_norm: 0.8448802356828745, iteration: 401124
loss: 0.9925968647003174,grad_norm: 0.6804270407934554, iteration: 401125
loss: 1.0005033016204834,grad_norm: 0.9999994753270416, iteration: 401126
loss: 0.993218183517456,grad_norm: 0.7803552190139761, iteration: 401127
loss: 1.01546049118042,grad_norm: 0.9999996228156108, iteration: 401128
loss: 1.0185315608978271,grad_norm: 0.7750298786244382, iteration: 401129
loss: 1.0039918422698975,grad_norm: 0.7647731668798775, iteration: 401130
loss: 0.9959940910339355,grad_norm: 0.7159847793495951, iteration: 401131
loss: 0.9978748559951782,grad_norm: 0.7855699458553906, iteration: 401132
loss: 0.9772976040840149,grad_norm: 0.6788578455559098, iteration: 401133
loss: 1.016159176826477,grad_norm: 0.6098208374866647, iteration: 401134
loss: 1.0485321283340454,grad_norm: 0.9999995509918657, iteration: 401135
loss: 1.0207897424697876,grad_norm: 0.7500548133475401, iteration: 401136
loss: 0.9929191470146179,grad_norm: 0.6984857864817472, iteration: 401137
loss: 0.9630440473556519,grad_norm: 0.76051925436832, iteration: 401138
loss: 0.9947300553321838,grad_norm: 0.8352119080886664, iteration: 401139
loss: 1.051923155784607,grad_norm: 0.999999559440576, iteration: 401140
loss: 1.0374157428741455,grad_norm: 0.9999993686909268, iteration: 401141
loss: 0.992066502571106,grad_norm: 0.7898151088465876, iteration: 401142
loss: 1.011091709136963,grad_norm: 0.8371113621574691, iteration: 401143
loss: 0.9985252022743225,grad_norm: 0.7542248545422129, iteration: 401144
loss: 1.0031620264053345,grad_norm: 0.8456871778320012, iteration: 401145
loss: 0.9966156482696533,grad_norm: 0.9999989966294119, iteration: 401146
loss: 1.0581356287002563,grad_norm: 0.7212848221711438, iteration: 401147
loss: 0.9608350992202759,grad_norm: 0.7748191281457958, iteration: 401148
loss: 1.0134721994400024,grad_norm: 0.9999998981105106, iteration: 401149
loss: 1.0134862661361694,grad_norm: 0.999999066972531, iteration: 401150
loss: 1.0266300439834595,grad_norm: 0.9119836815521659, iteration: 401151
loss: 0.9815673828125,grad_norm: 0.7801879759001719, iteration: 401152
loss: 1.0095794200897217,grad_norm: 0.7153019867216475, iteration: 401153
loss: 0.9692665338516235,grad_norm: 0.7138438802324477, iteration: 401154
loss: 0.9883045554161072,grad_norm: 0.8392697575408381, iteration: 401155
loss: 0.9803377985954285,grad_norm: 0.8327353048127549, iteration: 401156
loss: 1.0169297456741333,grad_norm: 0.7500889958168154, iteration: 401157
loss: 1.0155752897262573,grad_norm: 0.7432465418298873, iteration: 401158
loss: 0.9932319521903992,grad_norm: 0.9186142284283717, iteration: 401159
loss: 1.1505982875823975,grad_norm: 0.999999808417429, iteration: 401160
loss: 1.0504423379898071,grad_norm: 0.8575714052186574, iteration: 401161
loss: 1.0114142894744873,grad_norm: 0.7252193100167023, iteration: 401162
loss: 1.0103853940963745,grad_norm: 0.6923059513898425, iteration: 401163
loss: 0.9839656352996826,grad_norm: 0.8316002188741334, iteration: 401164
loss: 1.0199605226516724,grad_norm: 0.6327453273347484, iteration: 401165
loss: 0.9756180644035339,grad_norm: 0.7334463673531078, iteration: 401166
loss: 0.9925565123558044,grad_norm: 0.9999992248744649, iteration: 401167
loss: 1.220395803451538,grad_norm: 0.9999998489224509, iteration: 401168
loss: 1.0140379667282104,grad_norm: 0.9340706718772046, iteration: 401169
loss: 1.0340954065322876,grad_norm: 0.8275346839187495, iteration: 401170
loss: 0.992991030216217,grad_norm: 0.8728690338095733, iteration: 401171
loss: 0.9837134480476379,grad_norm: 0.7809508957939065, iteration: 401172
loss: 0.9919347763061523,grad_norm: 0.918474627174454, iteration: 401173
loss: 0.9890353679656982,grad_norm: 0.8092990263670966, iteration: 401174
loss: 1.0433378219604492,grad_norm: 0.8999882940513667, iteration: 401175
loss: 0.9685333371162415,grad_norm: 0.6446492711549533, iteration: 401176
loss: 1.0233181715011597,grad_norm: 0.8261549421194183, iteration: 401177
loss: 0.9805259704589844,grad_norm: 0.9132378227939, iteration: 401178
loss: 0.9834743738174438,grad_norm: 0.8186781691371879, iteration: 401179
loss: 1.1180683374404907,grad_norm: 1.0000000304239107, iteration: 401180
loss: 0.9989223480224609,grad_norm: 0.7443645907496307, iteration: 401181
loss: 1.016778826713562,grad_norm: 0.8631566453918776, iteration: 401182
loss: 0.9772682785987854,grad_norm: 0.9999997611718431, iteration: 401183
loss: 0.9875006079673767,grad_norm: 0.8600785068962292, iteration: 401184
loss: 0.9929361343383789,grad_norm: 0.7687342998410243, iteration: 401185
loss: 0.9960513710975647,grad_norm: 0.81731811763795, iteration: 401186
loss: 0.9967995285987854,grad_norm: 0.717801891229895, iteration: 401187
loss: 0.9951674938201904,grad_norm: 0.655712643360774, iteration: 401188
loss: 0.9934918880462646,grad_norm: 0.7919196103000541, iteration: 401189
loss: 1.0058001279830933,grad_norm: 0.8589927109942355, iteration: 401190
loss: 1.0371651649475098,grad_norm: 0.8238623965974109, iteration: 401191
loss: 0.9832960367202759,grad_norm: 0.8164451267239698, iteration: 401192
loss: 1.0146762132644653,grad_norm: 0.9999995105065146, iteration: 401193
loss: 1.0365147590637207,grad_norm: 0.9999995706975138, iteration: 401194
loss: 0.9897069931030273,grad_norm: 0.9130875994842468, iteration: 401195
loss: 0.976601243019104,grad_norm: 0.7409493354288706, iteration: 401196
loss: 0.9907981157302856,grad_norm: 0.9092578489553323, iteration: 401197
loss: 1.0112249851226807,grad_norm: 0.694660345254663, iteration: 401198
loss: 1.0291829109191895,grad_norm: 0.9109220509825945, iteration: 401199
loss: 0.9926711916923523,grad_norm: 0.7166302016953451, iteration: 401200
loss: 0.9790142774581909,grad_norm: 0.848181066068183, iteration: 401201
loss: 0.9987987875938416,grad_norm: 0.7441534824178413, iteration: 401202
loss: 0.9668780565261841,grad_norm: 0.6767179493796016, iteration: 401203
loss: 0.9838733077049255,grad_norm: 0.7418951900733314, iteration: 401204
loss: 1.0672521591186523,grad_norm: 1.0000000243570035, iteration: 401205
loss: 0.9949131608009338,grad_norm: 0.8113236988640643, iteration: 401206
loss: 1.0118846893310547,grad_norm: 0.7382989832865345, iteration: 401207
loss: 0.9765732288360596,grad_norm: 0.8393040374870708, iteration: 401208
loss: 1.0318071842193604,grad_norm: 0.999999191759973, iteration: 401209
loss: 0.9967256188392639,grad_norm: 0.8265125066143616, iteration: 401210
loss: 0.9967434406280518,grad_norm: 0.6884002522462979, iteration: 401211
loss: 1.0254050493240356,grad_norm: 0.8084435680292518, iteration: 401212
loss: 0.9900845885276794,grad_norm: 0.6931808844320801, iteration: 401213
loss: 1.0286643505096436,grad_norm: 0.8655943268542132, iteration: 401214
loss: 1.0114065408706665,grad_norm: 0.7377517456181568, iteration: 401215
loss: 1.0127066373825073,grad_norm: 0.7770817527064293, iteration: 401216
loss: 0.9553486108779907,grad_norm: 0.7453354480517875, iteration: 401217
loss: 1.0184619426727295,grad_norm: 0.8484300374724927, iteration: 401218
loss: 0.984307587146759,grad_norm: 0.7347811990523544, iteration: 401219
loss: 1.0960501432418823,grad_norm: 0.787048602837399, iteration: 401220
loss: 1.017583966255188,grad_norm: 0.9999991506360858, iteration: 401221
loss: 0.9548342823982239,grad_norm: 0.7922730615205179, iteration: 401222
loss: 0.9805552959442139,grad_norm: 0.7619790313506697, iteration: 401223
loss: 0.9976512789726257,grad_norm: 0.8009262682971015, iteration: 401224
loss: 1.0102423429489136,grad_norm: 0.9999991870590283, iteration: 401225
loss: 0.9677868485450745,grad_norm: 0.7853785682639115, iteration: 401226
loss: 0.9610219597816467,grad_norm: 0.8438863565687651, iteration: 401227
loss: 0.9934048652648926,grad_norm: 0.7710551337614766, iteration: 401228
loss: 1.0323814153671265,grad_norm: 0.9999998879096434, iteration: 401229
loss: 1.0237759351730347,grad_norm: 0.9999994417967399, iteration: 401230
loss: 0.9848899245262146,grad_norm: 0.875084796731337, iteration: 401231
loss: 0.9711868762969971,grad_norm: 0.7543119398921129, iteration: 401232
loss: 1.007299780845642,grad_norm: 0.6940837486979774, iteration: 401233
loss: 1.002467393875122,grad_norm: 0.673716607255777, iteration: 401234
loss: 1.0525743961334229,grad_norm: 0.7956332886475422, iteration: 401235
loss: 0.9947483539581299,grad_norm: 0.7038909008859221, iteration: 401236
loss: 0.9897251725196838,grad_norm: 0.707397936692145, iteration: 401237
loss: 0.9862309098243713,grad_norm: 0.8974853068590715, iteration: 401238
loss: 0.9964688420295715,grad_norm: 0.8399094966936796, iteration: 401239
loss: 1.0020580291748047,grad_norm: 0.7546091744737897, iteration: 401240
loss: 0.9799591302871704,grad_norm: 0.8512435712861838, iteration: 401241
loss: 0.9944392442703247,grad_norm: 0.6699685789992903, iteration: 401242
loss: 1.0221878290176392,grad_norm: 0.9892176385772599, iteration: 401243
loss: 1.0090404748916626,grad_norm: 0.6912546341174917, iteration: 401244
loss: 0.9901362657546997,grad_norm: 0.9010431833954721, iteration: 401245
loss: 1.004406452178955,grad_norm: 0.657052259333484, iteration: 401246
loss: 0.9914283156394958,grad_norm: 0.8781443061129116, iteration: 401247
loss: 0.9903327226638794,grad_norm: 0.8051090116995805, iteration: 401248
loss: 1.0394628047943115,grad_norm: 0.8276444507315284, iteration: 401249
loss: 1.0277506113052368,grad_norm: 0.9999996533645195, iteration: 401250
loss: 1.0026715993881226,grad_norm: 0.935083129782641, iteration: 401251
loss: 1.0086090564727783,grad_norm: 0.9436010932501374, iteration: 401252
loss: 0.9983137249946594,grad_norm: 0.7655770372834743, iteration: 401253
loss: 0.9708012342453003,grad_norm: 0.8124727368901147, iteration: 401254
loss: 0.981748640537262,grad_norm: 0.821787277238162, iteration: 401255
loss: 1.0105385780334473,grad_norm: 0.8414500267594613, iteration: 401256
loss: 0.9562786221504211,grad_norm: 0.65575294836623, iteration: 401257
loss: 0.9838130474090576,grad_norm: 0.7995496243709218, iteration: 401258
loss: 0.9863631129264832,grad_norm: 0.9999991295580568, iteration: 401259
loss: 0.9845831990242004,grad_norm: 0.6666922608034767, iteration: 401260
loss: 1.0138009786605835,grad_norm: 0.7102651455817532, iteration: 401261
loss: 1.0176429748535156,grad_norm: 0.7540172135983969, iteration: 401262
loss: 0.9916166663169861,grad_norm: 0.7431943403998169, iteration: 401263
loss: 0.9915165305137634,grad_norm: 0.905460062971666, iteration: 401264
loss: 0.9827334880828857,grad_norm: 0.8194474373754104, iteration: 401265
loss: 1.0224980115890503,grad_norm: 0.7466339529126564, iteration: 401266
loss: 0.9912359118461609,grad_norm: 0.7965521311213268, iteration: 401267
loss: 1.021673321723938,grad_norm: 0.9211871351448419, iteration: 401268
loss: 0.9941835403442383,grad_norm: 0.9517127222655846, iteration: 401269
loss: 1.0027735233306885,grad_norm: 0.6925004382134025, iteration: 401270
loss: 1.0211602449417114,grad_norm: 0.6924426025329077, iteration: 401271
loss: 1.0076344013214111,grad_norm: 0.8152713979296959, iteration: 401272
loss: 1.0225234031677246,grad_norm: 0.6610157384261253, iteration: 401273
loss: 0.9788814783096313,grad_norm: 0.817767341777295, iteration: 401274
loss: 1.0474705696105957,grad_norm: 0.8147273442130688, iteration: 401275
loss: 0.9776397943496704,grad_norm: 0.9203536000694529, iteration: 401276
loss: 0.9507939219474792,grad_norm: 0.9096959560798616, iteration: 401277
loss: 1.0362963676452637,grad_norm: 0.8415217082896499, iteration: 401278
loss: 1.0092908143997192,grad_norm: 0.8075936219624335, iteration: 401279
loss: 0.9894102215766907,grad_norm: 0.7750193014238442, iteration: 401280
loss: 0.984066367149353,grad_norm: 0.7656733207647851, iteration: 401281
loss: 1.0511555671691895,grad_norm: 0.7897844632432951, iteration: 401282
loss: 1.0406297445297241,grad_norm: 0.6852176124271285, iteration: 401283
loss: 0.9582371711730957,grad_norm: 0.794255977211978, iteration: 401284
loss: 0.9908469915390015,grad_norm: 0.7817061639038919, iteration: 401285
loss: 1.0818591117858887,grad_norm: 0.9152026395038807, iteration: 401286
loss: 1.0021576881408691,grad_norm: 0.8066835565538714, iteration: 401287
loss: 1.037114143371582,grad_norm: 0.7473044205101097, iteration: 401288
loss: 1.0435715913772583,grad_norm: 0.999999701391078, iteration: 401289
loss: 1.0095099210739136,grad_norm: 0.791328849434907, iteration: 401290
loss: 0.9923438429832458,grad_norm: 0.7458869768473513, iteration: 401291
loss: 1.0524338483810425,grad_norm: 0.7955582154289161, iteration: 401292
loss: 0.9529753923416138,grad_norm: 0.7879882975568337, iteration: 401293
loss: 1.0176022052764893,grad_norm: 0.7321815385635686, iteration: 401294
loss: 1.011559009552002,grad_norm: 0.7756427372107783, iteration: 401295
loss: 1.0140438079833984,grad_norm: 0.9231175393796041, iteration: 401296
loss: 1.010354995727539,grad_norm: 0.8541642831659181, iteration: 401297
loss: 0.9794546365737915,grad_norm: 0.7873484741561505, iteration: 401298
loss: 1.2101359367370605,grad_norm: 0.9999993103555093, iteration: 401299
loss: 1.0141866207122803,grad_norm: 0.7792344141855737, iteration: 401300
loss: 1.001747965812683,grad_norm: 0.9588437513531675, iteration: 401301
loss: 1.0303230285644531,grad_norm: 0.968985513124375, iteration: 401302
loss: 0.9459081888198853,grad_norm: 0.7561637334471091, iteration: 401303
loss: 1.0692386627197266,grad_norm: 0.8057964104055398, iteration: 401304
loss: 0.9367865324020386,grad_norm: 0.9515300734501643, iteration: 401305
loss: 1.0688382387161255,grad_norm: 0.9999996505387448, iteration: 401306
loss: 0.9654267430305481,grad_norm: 0.7107736622919094, iteration: 401307
loss: 1.0806994438171387,grad_norm: 0.9208514422978884, iteration: 401308
loss: 0.9832663536071777,grad_norm: 0.7145657506386677, iteration: 401309
loss: 0.9885784983634949,grad_norm: 0.8178690069267981, iteration: 401310
loss: 1.0011042356491089,grad_norm: 0.7467652351502113, iteration: 401311
loss: 0.994600236415863,grad_norm: 0.9999996288607177, iteration: 401312
loss: 1.030387043952942,grad_norm: 0.8436374551840717, iteration: 401313
loss: 0.9811804890632629,grad_norm: 0.8526723430260742, iteration: 401314
loss: 1.0026729106903076,grad_norm: 0.9931249145987865, iteration: 401315
loss: 1.0117231607437134,grad_norm: 0.7175525005269272, iteration: 401316
loss: 1.0162103176116943,grad_norm: 0.7118270164586631, iteration: 401317
loss: 1.0034455060958862,grad_norm: 0.7938167649273288, iteration: 401318
loss: 1.0541776418685913,grad_norm: 0.999999901831115, iteration: 401319
loss: 1.0097728967666626,grad_norm: 0.9999994462415072, iteration: 401320
loss: 1.00896155834198,grad_norm: 0.7783881377754424, iteration: 401321
loss: 0.9955736398696899,grad_norm: 0.7685752758312951, iteration: 401322
loss: 1.0796507596969604,grad_norm: 0.994426498128117, iteration: 401323
loss: 0.9872043132781982,grad_norm: 0.7748083957898615, iteration: 401324
loss: 0.9817710518836975,grad_norm: 0.795937227388111, iteration: 401325
loss: 1.0513405799865723,grad_norm: 0.9999997319014925, iteration: 401326
loss: 1.0226647853851318,grad_norm: 0.6670848024116902, iteration: 401327
loss: 0.997489333152771,grad_norm: 0.7689972642043322, iteration: 401328
loss: 0.9938420653343201,grad_norm: 0.8596697739230422, iteration: 401329
loss: 0.9885970950126648,grad_norm: 0.708489934974088, iteration: 401330
loss: 1.0049630403518677,grad_norm: 0.8056962779204431, iteration: 401331
loss: 1.0180590152740479,grad_norm: 0.9860870564871399, iteration: 401332
loss: 1.005435585975647,grad_norm: 0.653399360786643, iteration: 401333
loss: 1.02387273311615,grad_norm: 0.9999995597047676, iteration: 401334
loss: 1.0092235803604126,grad_norm: 0.9497366078667789, iteration: 401335
loss: 1.0018538236618042,grad_norm: 0.6866331618312493, iteration: 401336
loss: 1.108993411064148,grad_norm: 0.999999709179998, iteration: 401337
loss: 0.9953480958938599,grad_norm: 0.8533309930828419, iteration: 401338
loss: 1.0223299264907837,grad_norm: 0.7602389059548277, iteration: 401339
loss: 0.9993951916694641,grad_norm: 0.6931030245344844, iteration: 401340
loss: 0.9844117760658264,grad_norm: 0.8559341819702382, iteration: 401341
loss: 1.1092010736465454,grad_norm: 0.9999993523881625, iteration: 401342
loss: 1.006551742553711,grad_norm: 0.730609566680361, iteration: 401343
loss: 1.008279800415039,grad_norm: 0.785129137784079, iteration: 401344
loss: 1.0656108856201172,grad_norm: 0.9999992668687804, iteration: 401345
loss: 1.1574207544326782,grad_norm: 0.9999993594367419, iteration: 401346
loss: 0.9711076617240906,grad_norm: 0.8193346968244085, iteration: 401347
loss: 1.1052167415618896,grad_norm: 0.9026324018327521, iteration: 401348
loss: 1.0285834074020386,grad_norm: 0.7463377219416563, iteration: 401349
loss: 1.0207427740097046,grad_norm: 0.6484560113758624, iteration: 401350
loss: 0.9994658827781677,grad_norm: 0.640558273695899, iteration: 401351
loss: 1.029033899307251,grad_norm: 0.8127978682416331, iteration: 401352
loss: 0.9936061501502991,grad_norm: 0.6626016775300476, iteration: 401353
loss: 1.0462262630462646,grad_norm: 0.7885954450922907, iteration: 401354
loss: 1.1543892621994019,grad_norm: 0.9999997680678868, iteration: 401355
loss: 1.0301159620285034,grad_norm: 0.7477030058299957, iteration: 401356
loss: 1.020904302597046,grad_norm: 0.8493644004757706, iteration: 401357
loss: 1.0113154649734497,grad_norm: 0.9999995954641834, iteration: 401358
loss: 0.9777641296386719,grad_norm: 0.7549981234673868, iteration: 401359
loss: 1.0291050672531128,grad_norm: 0.8703413551146089, iteration: 401360
loss: 1.0459502935409546,grad_norm: 0.8137525815062366, iteration: 401361
loss: 0.99628746509552,grad_norm: 0.8406154262257963, iteration: 401362
loss: 0.997107982635498,grad_norm: 0.8085259225946908, iteration: 401363
loss: 0.9855096340179443,grad_norm: 0.6975486256019164, iteration: 401364
loss: 1.0180754661560059,grad_norm: 0.8996756107015372, iteration: 401365
loss: 1.0005624294281006,grad_norm: 0.9999991677582046, iteration: 401366
loss: 1.0069231986999512,grad_norm: 0.7759423734821133, iteration: 401367
loss: 1.0336531400680542,grad_norm: 0.7446353363835219, iteration: 401368
loss: 1.0447901487350464,grad_norm: 0.9999999339774336, iteration: 401369
loss: 0.9822540879249573,grad_norm: 0.722756969276718, iteration: 401370
loss: 0.9691758155822754,grad_norm: 0.7534252012501162, iteration: 401371
loss: 1.0618647336959839,grad_norm: 0.9999997397623851, iteration: 401372
loss: 0.9656471610069275,grad_norm: 0.7510196264013365, iteration: 401373
loss: 0.9787154197692871,grad_norm: 0.9903586961986921, iteration: 401374
loss: 0.9727537631988525,grad_norm: 0.8271918302886945, iteration: 401375
loss: 0.9524059891700745,grad_norm: 0.8249318258686482, iteration: 401376
loss: 1.0020842552185059,grad_norm: 0.6707160571363961, iteration: 401377
loss: 0.9800597429275513,grad_norm: 0.7442340123294081, iteration: 401378
loss: 1.0744729042053223,grad_norm: 0.9999999106196954, iteration: 401379
loss: 1.0118889808654785,grad_norm: 0.8179906298362923, iteration: 401380
loss: 1.0026583671569824,grad_norm: 0.7791844759425999, iteration: 401381
loss: 0.996541440486908,grad_norm: 0.6898067280758091, iteration: 401382
loss: 1.014931559562683,grad_norm: 0.7752753744107533, iteration: 401383
loss: 1.0483571290969849,grad_norm: 0.9999993582632912, iteration: 401384
loss: 1.0127729177474976,grad_norm: 0.9999996107123736, iteration: 401385
loss: 0.9947428107261658,grad_norm: 0.8872578863662609, iteration: 401386
loss: 1.0570865869522095,grad_norm: 0.9254277328097059, iteration: 401387
loss: 1.0261017084121704,grad_norm: 0.7750443629497674, iteration: 401388
loss: 0.9965311884880066,grad_norm: 0.7823327818003412, iteration: 401389
loss: 0.9861559867858887,grad_norm: 0.6885658637742599, iteration: 401390
loss: 0.9987808465957642,grad_norm: 0.8614725521528678, iteration: 401391
loss: 0.9604442715644836,grad_norm: 0.8836118304282153, iteration: 401392
loss: 1.0835789442062378,grad_norm: 0.7966213027269875, iteration: 401393
loss: 1.0109330415725708,grad_norm: 0.8588527617391044, iteration: 401394
loss: 0.9918126463890076,grad_norm: 0.8206641803091494, iteration: 401395
loss: 1.001449465751648,grad_norm: 0.9535286110454965, iteration: 401396
loss: 1.0115736722946167,grad_norm: 0.9999988948665691, iteration: 401397
loss: 1.0451356172561646,grad_norm: 0.8256434991640815, iteration: 401398
loss: 1.019049048423767,grad_norm: 0.8555675514468634, iteration: 401399
loss: 0.9884170293807983,grad_norm: 0.8353363375401236, iteration: 401400
loss: 1.036529541015625,grad_norm: 0.9999994960619781, iteration: 401401
loss: 1.0061750411987305,grad_norm: 0.7100432986497872, iteration: 401402
loss: 1.0267788171768188,grad_norm: 0.7063352708936335, iteration: 401403
loss: 0.9940604567527771,grad_norm: 0.9999997766092422, iteration: 401404
loss: 0.9800708293914795,grad_norm: 0.862459589612688, iteration: 401405
loss: 0.9595650434494019,grad_norm: 0.9172345534268328, iteration: 401406
loss: 0.9775789380073547,grad_norm: 0.8522564055421938, iteration: 401407
loss: 0.9874573349952698,grad_norm: 0.7672644950291597, iteration: 401408
loss: 1.0919605493545532,grad_norm: 0.9999995834977399, iteration: 401409
loss: 0.9548252820968628,grad_norm: 0.8021095951616132, iteration: 401410
loss: 1.041846513748169,grad_norm: 0.9999996940228191, iteration: 401411
loss: 1.0263350009918213,grad_norm: 0.6810919278363712, iteration: 401412
loss: 1.0066123008728027,grad_norm: 0.8206128747443814, iteration: 401413
loss: 0.9569670557975769,grad_norm: 0.9370430995580207, iteration: 401414
loss: 0.9980099201202393,grad_norm: 0.8085126788813587, iteration: 401415
loss: 1.0793089866638184,grad_norm: 0.9999998909616257, iteration: 401416
loss: 1.010866641998291,grad_norm: 0.7416354373480869, iteration: 401417
loss: 1.0059468746185303,grad_norm: 0.7868419863227412, iteration: 401418
loss: 0.9881145358085632,grad_norm: 0.7452194644581804, iteration: 401419
loss: 1.0064672231674194,grad_norm: 0.999999035053511, iteration: 401420
loss: 0.968916654586792,grad_norm: 0.8061099853355608, iteration: 401421
loss: 0.9811568856239319,grad_norm: 0.6851284783215755, iteration: 401422
loss: 0.9862779378890991,grad_norm: 0.8716495305453688, iteration: 401423
loss: 0.9576509594917297,grad_norm: 0.9882769663198244, iteration: 401424
loss: 0.9638423919677734,grad_norm: 0.7063275377009773, iteration: 401425
loss: 1.023417592048645,grad_norm: 0.8450290792606114, iteration: 401426
loss: 1.001558780670166,grad_norm: 0.9790605631859869, iteration: 401427
loss: 0.9989067912101746,grad_norm: 0.8231647842250593, iteration: 401428
loss: 0.9985209703445435,grad_norm: 0.7348074827058065, iteration: 401429
loss: 1.0327450037002563,grad_norm: 0.9938559559657604, iteration: 401430
loss: 1.007248044013977,grad_norm: 0.9999990632402974, iteration: 401431
loss: 1.0259913206100464,grad_norm: 0.9999998900608458, iteration: 401432
loss: 1.0195869207382202,grad_norm: 0.7544375140525105, iteration: 401433
loss: 0.9958528876304626,grad_norm: 0.877820380123041, iteration: 401434
loss: 0.998195230960846,grad_norm: 0.9999993016901051, iteration: 401435
loss: 0.9886577129364014,grad_norm: 0.9825024658227685, iteration: 401436
loss: 1.0172215700149536,grad_norm: 0.8149585236990379, iteration: 401437
loss: 1.084458827972412,grad_norm: 0.9816070372105028, iteration: 401438
loss: 1.017415165901184,grad_norm: 0.9999992360074031, iteration: 401439
loss: 0.967105507850647,grad_norm: 0.7611915718353401, iteration: 401440
loss: 1.0007660388946533,grad_norm: 0.9160696602136873, iteration: 401441
loss: 0.9846393465995789,grad_norm: 0.7125174207065883, iteration: 401442
loss: 0.9922699332237244,grad_norm: 0.9075001960167922, iteration: 401443
loss: 1.014896273612976,grad_norm: 0.8067967293543671, iteration: 401444
loss: 0.9794232249259949,grad_norm: 0.823083435267181, iteration: 401445
loss: 1.0093320608139038,grad_norm: 0.7012342043114891, iteration: 401446
loss: 0.9929565787315369,grad_norm: 0.7886152918531448, iteration: 401447
loss: 0.9941810369491577,grad_norm: 0.9999998647189163, iteration: 401448
loss: 0.9964421391487122,grad_norm: 0.8507918210228991, iteration: 401449
loss: 1.0138498544692993,grad_norm: 0.7000471608922061, iteration: 401450
loss: 0.9830158948898315,grad_norm: 0.8299896535069197, iteration: 401451
loss: 0.9949800968170166,grad_norm: 0.7954145052064124, iteration: 401452
loss: 0.9909469485282898,grad_norm: 0.8297905540702691, iteration: 401453
loss: 0.9751262664794922,grad_norm: 0.9999994410299294, iteration: 401454
loss: 0.9788707494735718,grad_norm: 0.7801249305094887, iteration: 401455
loss: 1.036238193511963,grad_norm: 0.7332796436666524, iteration: 401456
loss: 1.0124857425689697,grad_norm: 0.6971714134392831, iteration: 401457
loss: 1.0115667581558228,grad_norm: 0.701030230356088, iteration: 401458
loss: 1.0116701126098633,grad_norm: 0.7937026232640196, iteration: 401459
loss: 0.9647951126098633,grad_norm: 0.7400139124845235, iteration: 401460
loss: 1.0012651681900024,grad_norm: 0.99999916997405, iteration: 401461
loss: 0.995759129524231,grad_norm: 0.9929222188263862, iteration: 401462
loss: 1.038633942604065,grad_norm: 0.7945625977537213, iteration: 401463
loss: 0.9883845448493958,grad_norm: 0.8936525835575388, iteration: 401464
loss: 0.965798556804657,grad_norm: 0.9999991584995865, iteration: 401465
loss: 1.0053640604019165,grad_norm: 0.9999994120470199, iteration: 401466
loss: 0.976833164691925,grad_norm: 0.7205301424825438, iteration: 401467
loss: 1.0286548137664795,grad_norm: 0.9999995726238051, iteration: 401468
loss: 0.9858714938163757,grad_norm: 0.758803730633106, iteration: 401469
loss: 1.0094783306121826,grad_norm: 0.696727368094014, iteration: 401470
loss: 1.0141626596450806,grad_norm: 0.7829174867922144, iteration: 401471
loss: 0.9891573786735535,grad_norm: 0.7677815820588939, iteration: 401472
loss: 1.0198936462402344,grad_norm: 0.9725017739302909, iteration: 401473
loss: 0.9657146334648132,grad_norm: 0.7480219548949121, iteration: 401474
loss: 1.0036280155181885,grad_norm: 0.7637435659194273, iteration: 401475
loss: 0.9685848951339722,grad_norm: 0.8155599154848948, iteration: 401476
loss: 0.9893286228179932,grad_norm: 0.8364907554817776, iteration: 401477
loss: 1.031886339187622,grad_norm: 0.89492970467521, iteration: 401478
loss: 1.056198239326477,grad_norm: 0.9999992351270649, iteration: 401479
loss: 0.9769483804702759,grad_norm: 0.8169444546073186, iteration: 401480
loss: 0.9956763982772827,grad_norm: 0.7730782114223926, iteration: 401481
loss: 1.0179078578948975,grad_norm: 0.8546913404496695, iteration: 401482
loss: 1.0310499668121338,grad_norm: 0.9358975065815117, iteration: 401483
loss: 0.9853512048721313,grad_norm: 0.8426576961100987, iteration: 401484
loss: 0.9858413934707642,grad_norm: 0.8429687087375787, iteration: 401485
loss: 0.96977698802948,grad_norm: 0.7462355661816019, iteration: 401486
loss: 1.1663535833358765,grad_norm: 1.0000000416637722, iteration: 401487
loss: 1.0051640272140503,grad_norm: 0.6122123013365909, iteration: 401488
loss: 1.0344783067703247,grad_norm: 0.9999990925448211, iteration: 401489
loss: 0.9851747751235962,grad_norm: 0.8137475265656244, iteration: 401490
loss: 0.9642338752746582,grad_norm: 0.8007913437771216, iteration: 401491
loss: 1.0327644348144531,grad_norm: 0.8508898935411564, iteration: 401492
loss: 1.035452127456665,grad_norm: 0.748045437862101, iteration: 401493
loss: 0.9795159697532654,grad_norm: 0.8063939991315672, iteration: 401494
loss: 1.078147530555725,grad_norm: 0.9999989992883789, iteration: 401495
loss: 0.9402118921279907,grad_norm: 0.6851361766932288, iteration: 401496
loss: 1.0062979459762573,grad_norm: 0.8567847122331006, iteration: 401497
loss: 1.0192303657531738,grad_norm: 0.854258958916617, iteration: 401498
loss: 1.0228428840637207,grad_norm: 0.764153535584942, iteration: 401499
loss: 0.9827432632446289,grad_norm: 0.8987260858218215, iteration: 401500
loss: 1.094398856163025,grad_norm: 0.9999993383107209, iteration: 401501
loss: 0.978269636631012,grad_norm: 0.6849532587691488, iteration: 401502
loss: 1.0247623920440674,grad_norm: 0.9999990610041951, iteration: 401503
loss: 0.9990968704223633,grad_norm: 0.8379358471954557, iteration: 401504
loss: 1.018652319908142,grad_norm: 0.7765589830374375, iteration: 401505
loss: 1.0349147319793701,grad_norm: 0.9877467781220453, iteration: 401506
loss: 1.0158594846725464,grad_norm: 0.822124560000685, iteration: 401507
loss: 0.9813228845596313,grad_norm: 0.8204576570767238, iteration: 401508
loss: 0.9919252991676331,grad_norm: 0.7795041674631666, iteration: 401509
loss: 1.0149695873260498,grad_norm: 0.8995702157817225, iteration: 401510
loss: 0.971165120601654,grad_norm: 0.7509631901476743, iteration: 401511
loss: 1.0432170629501343,grad_norm: 0.6441671142349726, iteration: 401512
loss: 0.9803807139396667,grad_norm: 0.6503061014206967, iteration: 401513
loss: 1.0243070125579834,grad_norm: 0.9999993497956542, iteration: 401514
loss: 1.0225982666015625,grad_norm: 0.6695634285631225, iteration: 401515
loss: 1.0034741163253784,grad_norm: 0.8593809497324462, iteration: 401516
loss: 1.0137441158294678,grad_norm: 0.959079972448236, iteration: 401517
loss: 1.173474907875061,grad_norm: 0.9999995316555086, iteration: 401518
loss: 1.006412386894226,grad_norm: 0.843467888460705, iteration: 401519
loss: 1.023848533630371,grad_norm: 0.7746448411517908, iteration: 401520
loss: 0.9417807459831238,grad_norm: 0.8528434803921624, iteration: 401521
loss: 1.018027901649475,grad_norm: 0.8509436671070251, iteration: 401522
loss: 1.0093107223510742,grad_norm: 0.8067319510840252, iteration: 401523
loss: 0.9981614351272583,grad_norm: 0.8505044911727436, iteration: 401524
loss: 1.0122654438018799,grad_norm: 0.9830315948606083, iteration: 401525
loss: 0.9974517822265625,grad_norm: 0.7722739622250074, iteration: 401526
loss: 1.0181773900985718,grad_norm: 0.7886895283532949, iteration: 401527
loss: 1.000845193862915,grad_norm: 0.6843561955325074, iteration: 401528
loss: 1.0142521858215332,grad_norm: 0.8950699963275508, iteration: 401529
loss: 0.9796920418739319,grad_norm: 0.8552479987476117, iteration: 401530
loss: 1.0135276317596436,grad_norm: 0.7502205308313504, iteration: 401531
loss: 1.0092549324035645,grad_norm: 0.6974267183139976, iteration: 401532
loss: 0.9885575175285339,grad_norm: 0.8918320364826717, iteration: 401533
loss: 1.0280609130859375,grad_norm: 0.9176961735394835, iteration: 401534
loss: 0.9963045120239258,grad_norm: 0.8591165868287896, iteration: 401535
loss: 0.9777255654335022,grad_norm: 0.6110001060147608, iteration: 401536
loss: 1.0378271341323853,grad_norm: 0.7547287701561228, iteration: 401537
loss: 0.9948245286941528,grad_norm: 0.6854173819798925, iteration: 401538
loss: 1.005088210105896,grad_norm: 0.7009454987818758, iteration: 401539
loss: 1.027212142944336,grad_norm: 0.7887421426960921, iteration: 401540
loss: 0.9999204874038696,grad_norm: 0.7188455580909103, iteration: 401541
loss: 1.0048574209213257,grad_norm: 0.7074170331889162, iteration: 401542
loss: 0.9888255596160889,grad_norm: 0.8157133111102493, iteration: 401543
loss: 1.032019853591919,grad_norm: 0.793206667756996, iteration: 401544
loss: 0.9784388542175293,grad_norm: 0.6500055889238239, iteration: 401545
loss: 0.9784958362579346,grad_norm: 0.8447584940643048, iteration: 401546
loss: 0.9412822127342224,grad_norm: 0.8337742735628609, iteration: 401547
loss: 1.0016167163848877,grad_norm: 0.6753891880435715, iteration: 401548
loss: 0.9886406064033508,grad_norm: 0.7411754126848262, iteration: 401549
loss: 0.967950701713562,grad_norm: 0.9164262735048986, iteration: 401550
loss: 0.9865031838417053,grad_norm: 0.6682024146120342, iteration: 401551
loss: 1.003100872039795,grad_norm: 0.9833677912104184, iteration: 401552
loss: 0.9618805646896362,grad_norm: 0.8792791154791069, iteration: 401553
loss: 0.9895265102386475,grad_norm: 0.9865383578660883, iteration: 401554
loss: 0.9852150082588196,grad_norm: 0.8672484140002157, iteration: 401555
loss: 0.9935424327850342,grad_norm: 0.65306634402408, iteration: 401556
loss: 1.0452533960342407,grad_norm: 0.7895053716182928, iteration: 401557
loss: 1.0054426193237305,grad_norm: 0.7124432041122518, iteration: 401558
loss: 0.9785997867584229,grad_norm: 0.8351373937577047, iteration: 401559
loss: 0.9716789722442627,grad_norm: 0.7553987843378999, iteration: 401560
loss: 0.9864854216575623,grad_norm: 0.7039578360506803, iteration: 401561
loss: 0.985871434211731,grad_norm: 0.8651837302903264, iteration: 401562
loss: 1.0064831972122192,grad_norm: 0.7508708525590188, iteration: 401563
loss: 0.9736500978469849,grad_norm: 0.8662774003890917, iteration: 401564
loss: 1.1628026962280273,grad_norm: 0.9999993180621408, iteration: 401565
loss: 1.0160372257232666,grad_norm: 0.8890433352098414, iteration: 401566
loss: 1.0141699314117432,grad_norm: 0.9881274066172583, iteration: 401567
loss: 1.033332109451294,grad_norm: 0.7577022832440041, iteration: 401568
loss: 0.9885786175727844,grad_norm: 0.9038096009844941, iteration: 401569
loss: 0.9944862127304077,grad_norm: 0.7406401330571973, iteration: 401570
loss: 1.018923044204712,grad_norm: 0.7739301455661193, iteration: 401571
loss: 1.012945532798767,grad_norm: 0.999999816686159, iteration: 401572
loss: 0.9890828132629395,grad_norm: 0.9999992090739993, iteration: 401573
loss: 1.0079447031021118,grad_norm: 0.7190520525027024, iteration: 401574
loss: 0.9717146158218384,grad_norm: 0.6869847687956282, iteration: 401575
loss: 0.9943673014640808,grad_norm: 0.7942941127278809, iteration: 401576
loss: 0.9855518341064453,grad_norm: 0.7176934696723503, iteration: 401577
loss: 1.0103479623794556,grad_norm: 0.9982469529800484, iteration: 401578
loss: 1.0115257501602173,grad_norm: 0.8460475045134714, iteration: 401579
loss: 0.9874791502952576,grad_norm: 0.7963405281464108, iteration: 401580
loss: 1.014776587486267,grad_norm: 0.7353711650567291, iteration: 401581
loss: 0.9934704303741455,grad_norm: 0.8493042229078157, iteration: 401582
loss: 0.9796736240386963,grad_norm: 0.7969827115338467, iteration: 401583
loss: 1.0077776908874512,grad_norm: 0.8088253118859666, iteration: 401584
loss: 1.0011956691741943,grad_norm: 0.899480798603858, iteration: 401585
loss: 1.0442404747009277,grad_norm: 0.8208949126055733, iteration: 401586
loss: 0.9921025037765503,grad_norm: 0.7919848428147628, iteration: 401587
loss: 0.9882922768592834,grad_norm: 0.8017786137250323, iteration: 401588
loss: 1.0202088356018066,grad_norm: 0.7280736936986695, iteration: 401589
loss: 1.0202276706695557,grad_norm: 0.7223484396591787, iteration: 401590
loss: 1.0179790258407593,grad_norm: 0.9999993003359149, iteration: 401591
loss: 0.9916925430297852,grad_norm: 0.7069296901684062, iteration: 401592
loss: 0.9654614329338074,grad_norm: 0.9988578915513784, iteration: 401593
loss: 0.9757349491119385,grad_norm: 0.8490797509278759, iteration: 401594
loss: 1.0270490646362305,grad_norm: 0.7389782766975759, iteration: 401595
loss: 0.9904794096946716,grad_norm: 0.6985205464820443, iteration: 401596
loss: 1.0038506984710693,grad_norm: 0.8994926985318318, iteration: 401597
loss: 1.013425588607788,grad_norm: 0.7387191329225119, iteration: 401598
loss: 0.984825611114502,grad_norm: 0.9825919803140206, iteration: 401599
loss: 1.052076816558838,grad_norm: 0.9240761433959098, iteration: 401600
loss: 1.0292527675628662,grad_norm: 0.9999999048019326, iteration: 401601
loss: 1.0158926248550415,grad_norm: 0.9999998687701377, iteration: 401602
loss: 1.028140902519226,grad_norm: 0.999999899611753, iteration: 401603
loss: 0.9881811738014221,grad_norm: 0.8478597606343602, iteration: 401604
loss: 1.0247890949249268,grad_norm: 0.7667107704679454, iteration: 401605
loss: 1.073278784751892,grad_norm: 0.999999014761329, iteration: 401606
loss: 0.9888455867767334,grad_norm: 0.6563763122330579, iteration: 401607
loss: 1.02801513671875,grad_norm: 0.786825496621302, iteration: 401608
loss: 0.9826831817626953,grad_norm: 0.8649970571984744, iteration: 401609
loss: 1.0262001752853394,grad_norm: 0.8264608550753025, iteration: 401610
loss: 1.00734281539917,grad_norm: 0.7575138808973948, iteration: 401611
loss: 0.9677750468254089,grad_norm: 0.9707324342071142, iteration: 401612
loss: 0.9542847275733948,grad_norm: 0.6990893270555736, iteration: 401613
loss: 1.090075969696045,grad_norm: 0.9449290767717407, iteration: 401614
loss: 0.986116886138916,grad_norm: 0.999999853120385, iteration: 401615
loss: 1.0990787744522095,grad_norm: 0.999999119490617, iteration: 401616
loss: 0.9482269883155823,grad_norm: 0.7520218461232188, iteration: 401617
loss: 0.9695844650268555,grad_norm: 0.7844416369856156, iteration: 401618
loss: 1.0316541194915771,grad_norm: 0.8216126539144349, iteration: 401619
loss: 1.0095665454864502,grad_norm: 0.7644745060011691, iteration: 401620
loss: 1.0111274719238281,grad_norm: 0.6733331451572158, iteration: 401621
loss: 0.9750768542289734,grad_norm: 0.8379987929967572, iteration: 401622
loss: 0.9764247536659241,grad_norm: 0.6725856457419518, iteration: 401623
loss: 0.9799440503120422,grad_norm: 0.807947247902443, iteration: 401624
loss: 1.0073691606521606,grad_norm: 0.8803161575852007, iteration: 401625
loss: 0.9917442202568054,grad_norm: 0.7437821309451446, iteration: 401626
loss: 1.1200897693634033,grad_norm: 1.0000001047349836, iteration: 401627
loss: 1.022424578666687,grad_norm: 0.7573021277848841, iteration: 401628
loss: 0.9667551517486572,grad_norm: 0.9172970381243726, iteration: 401629
loss: 0.9831545948982239,grad_norm: 0.782905196893891, iteration: 401630
loss: 0.9821813702583313,grad_norm: 0.7335786358093579, iteration: 401631
loss: 1.0016453266143799,grad_norm: 0.717420292609518, iteration: 401632
loss: 0.996453583240509,grad_norm: 0.7428342456284147, iteration: 401633
loss: 1.017548680305481,grad_norm: 0.6539518702648163, iteration: 401634
loss: 1.007021427154541,grad_norm: 0.8203312189562669, iteration: 401635
loss: 0.9866149425506592,grad_norm: 0.9416562633058434, iteration: 401636
loss: 1.011807918548584,grad_norm: 0.9999996655696638, iteration: 401637
loss: 1.154442548751831,grad_norm: 0.9999998558388455, iteration: 401638
loss: 1.0359498262405396,grad_norm: 0.9999991993319365, iteration: 401639
loss: 0.9875786900520325,grad_norm: 0.833332671954254, iteration: 401640
loss: 1.0195335149765015,grad_norm: 0.9999998390245936, iteration: 401641
loss: 0.9986717700958252,grad_norm: 0.8771997278135736, iteration: 401642
loss: 0.9831288456916809,grad_norm: 0.7048872346387921, iteration: 401643
loss: 1.0780582427978516,grad_norm: 0.9999997582314236, iteration: 401644
loss: 1.0182929039001465,grad_norm: 0.9399618184039533, iteration: 401645
loss: 1.0231056213378906,grad_norm: 0.7625338227207092, iteration: 401646
loss: 0.9596569538116455,grad_norm: 0.7950580047808113, iteration: 401647
loss: 1.0794016122817993,grad_norm: 0.9999990249886705, iteration: 401648
loss: 1.0216100215911865,grad_norm: 0.961508424330243, iteration: 401649
loss: 1.019668698310852,grad_norm: 0.842115687108708, iteration: 401650
loss: 0.9456520676612854,grad_norm: 0.7720004445828196, iteration: 401651
loss: 0.9983992576599121,grad_norm: 0.6906878390372503, iteration: 401652
loss: 0.9595544338226318,grad_norm: 0.8346373383704743, iteration: 401653
loss: 1.039846420288086,grad_norm: 0.6857723699611854, iteration: 401654
loss: 0.9958419799804688,grad_norm: 0.9081056675801628, iteration: 401655
loss: 1.023003101348877,grad_norm: 0.8460054811912509, iteration: 401656
loss: 0.9848014116287231,grad_norm: 0.9573487634738165, iteration: 401657
loss: 1.059137225151062,grad_norm: 0.9999996366960429, iteration: 401658
loss: 1.0084933042526245,grad_norm: 0.8308530841188182, iteration: 401659
loss: 1.0151320695877075,grad_norm: 0.8620199361700179, iteration: 401660
loss: 1.0210353136062622,grad_norm: 0.7570858450769716, iteration: 401661
loss: 0.9828751683235168,grad_norm: 0.8372829372528486, iteration: 401662
loss: 1.0679408311843872,grad_norm: 0.6768497832499639, iteration: 401663
loss: 1.0676591396331787,grad_norm: 0.8239420891254847, iteration: 401664
loss: 0.982923686504364,grad_norm: 0.7902736859962003, iteration: 401665
loss: 1.0088846683502197,grad_norm: 0.8020000873423564, iteration: 401666
loss: 0.9823668599128723,grad_norm: 0.741303459296255, iteration: 401667
loss: 1.046028971672058,grad_norm: 0.7644723077163638, iteration: 401668
loss: 1.0023924112319946,grad_norm: 0.8417052011400902, iteration: 401669
loss: 1.0290436744689941,grad_norm: 0.9999993631881912, iteration: 401670
loss: 1.060896873474121,grad_norm: 0.999999373322656, iteration: 401671
loss: 0.9880940914154053,grad_norm: 0.8460283041871196, iteration: 401672
loss: 1.0054078102111816,grad_norm: 0.9218323376218241, iteration: 401673
loss: 0.97357577085495,grad_norm: 0.7781763751948975, iteration: 401674
loss: 0.9925261735916138,grad_norm: 0.9623795279018291, iteration: 401675
loss: 0.9927894473075867,grad_norm: 0.7481147040694629, iteration: 401676
loss: 0.9701899290084839,grad_norm: 0.8617128150753004, iteration: 401677
loss: 0.9746335744857788,grad_norm: 0.9999996322479281, iteration: 401678
loss: 1.0074243545532227,grad_norm: 0.7647439396907192, iteration: 401679
loss: 0.9706171154975891,grad_norm: 0.728847418413589, iteration: 401680
loss: 1.0077489614486694,grad_norm: 0.8095339695336113, iteration: 401681
loss: 1.0290099382400513,grad_norm: 0.7533984247273002, iteration: 401682
loss: 0.962280809879303,grad_norm: 0.8460400690423904, iteration: 401683
loss: 0.9835270643234253,grad_norm: 0.9999999393074464, iteration: 401684
loss: 1.0128264427185059,grad_norm: 0.9263710189437162, iteration: 401685
loss: 1.0088778734207153,grad_norm: 0.9999990507494514, iteration: 401686
loss: 0.9742884039878845,grad_norm: 0.814162677280475, iteration: 401687
loss: 0.9838757514953613,grad_norm: 0.9071897091992355, iteration: 401688
loss: 1.0496634244918823,grad_norm: 0.7390817957418503, iteration: 401689
loss: 0.9991657733917236,grad_norm: 0.827648673573501, iteration: 401690
loss: 1.00005042552948,grad_norm: 0.7394255770112026, iteration: 401691
loss: 0.9790899157524109,grad_norm: 0.7847963984058199, iteration: 401692
loss: 0.9836300611495972,grad_norm: 0.7126844131571649, iteration: 401693
loss: 1.017846941947937,grad_norm: 0.8366872663377297, iteration: 401694
loss: 0.9997748136520386,grad_norm: 0.872785619106345, iteration: 401695
loss: 1.3847284317016602,grad_norm: 0.9999997341604795, iteration: 401696
loss: 1.0348511934280396,grad_norm: 0.7214409600196432, iteration: 401697
loss: 1.1575082540512085,grad_norm: 0.9999991013289624, iteration: 401698
loss: 1.0086575746536255,grad_norm: 0.6549554464052114, iteration: 401699
loss: 0.984396755695343,grad_norm: 0.7231110882763148, iteration: 401700
loss: 1.0485005378723145,grad_norm: 0.9999995114288568, iteration: 401701
loss: 0.996503472328186,grad_norm: 0.7978096658334449, iteration: 401702
loss: 0.9623369574546814,grad_norm: 0.9999991022645741, iteration: 401703
loss: 0.9863995909690857,grad_norm: 0.7927246424782377, iteration: 401704
loss: 0.9623681306838989,grad_norm: 0.7704264827437782, iteration: 401705
loss: 1.094085693359375,grad_norm: 0.9999997172882857, iteration: 401706
loss: 1.0511075258255005,grad_norm: 0.9999996373351683, iteration: 401707
loss: 0.9933173060417175,grad_norm: 0.7853487437320468, iteration: 401708
loss: 0.9966670870780945,grad_norm: 0.9999995917281834, iteration: 401709
loss: 1.133471131324768,grad_norm: 0.999999273881076, iteration: 401710
loss: 0.9915996193885803,grad_norm: 0.8533173167958197, iteration: 401711
loss: 0.9996111392974854,grad_norm: 0.8251541204344451, iteration: 401712
loss: 0.9754213690757751,grad_norm: 0.7742752801827503, iteration: 401713
loss: 0.9881219863891602,grad_norm: 0.7801070254256753, iteration: 401714
loss: 0.9881108403205872,grad_norm: 0.9759591561815697, iteration: 401715
loss: 0.9530486464500427,grad_norm: 0.8784046253362705, iteration: 401716
loss: 1.0104823112487793,grad_norm: 0.6911735950246413, iteration: 401717
loss: 0.98712557554245,grad_norm: 0.9476795962784721, iteration: 401718
loss: 1.0240293741226196,grad_norm: 0.7980066883935808, iteration: 401719
loss: 1.0272789001464844,grad_norm: 0.8680030095561223, iteration: 401720
loss: 1.017092227935791,grad_norm: 0.7160986482622514, iteration: 401721
loss: 1.0102139711380005,grad_norm: 0.9161856028062807, iteration: 401722
loss: 0.991268515586853,grad_norm: 0.8741034083136703, iteration: 401723
loss: 1.0251245498657227,grad_norm: 0.7155078943583832, iteration: 401724
loss: 0.9733816385269165,grad_norm: 0.7156748126841921, iteration: 401725
loss: 0.9932465553283691,grad_norm: 0.8276163982186344, iteration: 401726
loss: 1.0060839653015137,grad_norm: 0.8681745390606069, iteration: 401727
loss: 1.021535038948059,grad_norm: 0.8675969773595662, iteration: 401728
loss: 1.0032340288162231,grad_norm: 0.6872383235230646, iteration: 401729
loss: 1.0117437839508057,grad_norm: 0.7415173469647672, iteration: 401730
loss: 1.025969386100769,grad_norm: 1.0000000328890275, iteration: 401731
loss: 1.009822130203247,grad_norm: 0.7708006517122592, iteration: 401732
loss: 1.0233497619628906,grad_norm: 0.8489246760628473, iteration: 401733
loss: 0.9613707661628723,grad_norm: 0.8056772596805227, iteration: 401734
loss: 1.0062750577926636,grad_norm: 0.9999994433648519, iteration: 401735
loss: 1.0434088706970215,grad_norm: 0.8587538128841519, iteration: 401736
loss: 1.1720730066299438,grad_norm: 0.9999998552008901, iteration: 401737
loss: 0.9924185276031494,grad_norm: 0.8323660628308348, iteration: 401738
loss: 1.0268750190734863,grad_norm: 0.9999993998350982, iteration: 401739
loss: 1.0182502269744873,grad_norm: 0.7303925750615025, iteration: 401740
loss: 1.0328693389892578,grad_norm: 0.7818492699343528, iteration: 401741
loss: 1.0181909799575806,grad_norm: 0.8144867412552156, iteration: 401742
loss: 1.0320144891738892,grad_norm: 0.7819027760454422, iteration: 401743
loss: 0.9761167168617249,grad_norm: 0.6902459849574266, iteration: 401744
loss: 1.0199010372161865,grad_norm: 0.6941371945918917, iteration: 401745
loss: 0.9837810397148132,grad_norm: 0.8635504968048429, iteration: 401746
loss: 0.9824522137641907,grad_norm: 0.9999999158601349, iteration: 401747
loss: 0.9775537848472595,grad_norm: 0.9999998156827021, iteration: 401748
loss: 0.9452069401741028,grad_norm: 0.9999991941432859, iteration: 401749
loss: 0.9962233304977417,grad_norm: 0.829866755943535, iteration: 401750
loss: 0.989275336265564,grad_norm: 0.7885697474056321, iteration: 401751
loss: 0.9898356199264526,grad_norm: 0.6930822375873458, iteration: 401752
loss: 1.0052920579910278,grad_norm: 0.7299881905765224, iteration: 401753
loss: 1.022006869316101,grad_norm: 0.9999999066660379, iteration: 401754
loss: 1.03469717502594,grad_norm: 0.9999997495892478, iteration: 401755
loss: 0.9708077311515808,grad_norm: 0.848916907003532, iteration: 401756
loss: 0.9993483424186707,grad_norm: 0.7628109260478196, iteration: 401757
loss: 0.9897100925445557,grad_norm: 0.8258935657370626, iteration: 401758
loss: 1.0167759656906128,grad_norm: 0.9999990936074206, iteration: 401759
loss: 1.091699242591858,grad_norm: 0.9999999848453832, iteration: 401760
loss: 1.0012352466583252,grad_norm: 0.820666014385878, iteration: 401761
loss: 1.0698957443237305,grad_norm: 0.8646082235485817, iteration: 401762
loss: 0.9888116121292114,grad_norm: 0.9999995894478653, iteration: 401763
loss: 1.0180531740188599,grad_norm: 0.669451666079384, iteration: 401764
loss: 1.0184720754623413,grad_norm: 0.805881319367137, iteration: 401765
loss: 1.0558611154556274,grad_norm: 0.8168648510709449, iteration: 401766
loss: 1.019614577293396,grad_norm: 0.868919496283768, iteration: 401767
loss: 0.9824380278587341,grad_norm: 0.9233791909381043, iteration: 401768
loss: 1.066798210144043,grad_norm: 0.8709046990394499, iteration: 401769
loss: 0.9929476976394653,grad_norm: 0.8112520508990594, iteration: 401770
loss: 1.0305482149124146,grad_norm: 0.8414600833601236, iteration: 401771
loss: 0.9830119609832764,grad_norm: 0.8114718747558967, iteration: 401772
loss: 1.080543041229248,grad_norm: 0.6244791710376221, iteration: 401773
loss: 0.9696077108383179,grad_norm: 0.7534796178079475, iteration: 401774
loss: 0.9807953834533691,grad_norm: 0.784074748860781, iteration: 401775
loss: 1.047216534614563,grad_norm: 0.685914523377188, iteration: 401776
loss: 0.9458953142166138,grad_norm: 0.8955027860041004, iteration: 401777
loss: 1.1410843133926392,grad_norm: 0.9999998197841288, iteration: 401778
loss: 0.9567425847053528,grad_norm: 0.7422284377731088, iteration: 401779
loss: 1.0148636102676392,grad_norm: 0.698947973121409, iteration: 401780
loss: 1.0158942937850952,grad_norm: 0.9999993810370128, iteration: 401781
loss: 1.1036440134048462,grad_norm: 0.8299003884361461, iteration: 401782
loss: 0.9987668395042419,grad_norm: 0.8462855617758337, iteration: 401783
loss: 1.0181807279586792,grad_norm: 0.9106063261783731, iteration: 401784
loss: 1.025156021118164,grad_norm: 0.7981736202472429, iteration: 401785
loss: 1.0052086114883423,grad_norm: 0.8341302009474648, iteration: 401786
loss: 0.9745716452598572,grad_norm: 0.7790817056429816, iteration: 401787
loss: 1.0002799034118652,grad_norm: 0.7622815582711201, iteration: 401788
loss: 1.0095151662826538,grad_norm: 0.6944454660852256, iteration: 401789
loss: 0.9834235310554504,grad_norm: 0.8044182265630349, iteration: 401790
loss: 1.0802725553512573,grad_norm: 0.7157396039768029, iteration: 401791
loss: 1.0605872869491577,grad_norm: 0.7964078600245001, iteration: 401792
loss: 1.0008370876312256,grad_norm: 0.749195918990041, iteration: 401793
loss: 1.0845258235931396,grad_norm: 0.999999205647893, iteration: 401794
loss: 0.9782174825668335,grad_norm: 0.6857998657146155, iteration: 401795
loss: 1.020363450050354,grad_norm: 0.7427041184276049, iteration: 401796
loss: 0.9945520162582397,grad_norm: 0.9999995786544822, iteration: 401797
loss: 1.0090866088867188,grad_norm: 0.9999991128347152, iteration: 401798
loss: 1.0141775608062744,grad_norm: 0.8849719423788773, iteration: 401799
loss: 0.9941386580467224,grad_norm: 0.9999997341178715, iteration: 401800
loss: 1.0002825260162354,grad_norm: 0.8161657396966254, iteration: 401801
loss: 0.9907808303833008,grad_norm: 0.9586765063424842, iteration: 401802
loss: 0.9972565770149231,grad_norm: 0.7879789907430746, iteration: 401803
loss: 0.9659944772720337,grad_norm: 0.7288722780725077, iteration: 401804
loss: 1.0185489654541016,grad_norm: 0.9999993365528851, iteration: 401805
loss: 1.008194088935852,grad_norm: 0.7153640267153997, iteration: 401806
loss: 0.9840821623802185,grad_norm: 0.7520957260435345, iteration: 401807
loss: 0.9925193190574646,grad_norm: 0.8816521102294834, iteration: 401808
loss: 1.0368701219558716,grad_norm: 0.8133655446454884, iteration: 401809
loss: 0.955864667892456,grad_norm: 0.7922525328083287, iteration: 401810
loss: 1.003293752670288,grad_norm: 0.7287718935893104, iteration: 401811
loss: 0.9766210913658142,grad_norm: 0.733363321432378, iteration: 401812
loss: 0.9922696352005005,grad_norm: 0.9114770221587061, iteration: 401813
loss: 1.0183238983154297,grad_norm: 0.9423559613246436, iteration: 401814
loss: 0.9840667247772217,grad_norm: 0.7981393024891297, iteration: 401815
loss: 0.9915614128112793,grad_norm: 0.8678104845277641, iteration: 401816
loss: 1.0071015357971191,grad_norm: 0.8688039279932052, iteration: 401817
loss: 1.0414377450942993,grad_norm: 0.9999996146507121, iteration: 401818
loss: 0.987633466720581,grad_norm: 0.7196116169426092, iteration: 401819
loss: 1.0726300477981567,grad_norm: 0.7467450770338161, iteration: 401820
loss: 1.0008326768875122,grad_norm: 0.7118254907890527, iteration: 401821
loss: 1.031419038772583,grad_norm: 0.9999997043786704, iteration: 401822
loss: 0.9998703598976135,grad_norm: 0.8449816809084024, iteration: 401823
loss: 1.017008662223816,grad_norm: 0.7437746169645016, iteration: 401824
loss: 1.0181885957717896,grad_norm: 0.7861239131757366, iteration: 401825
loss: 0.992177426815033,grad_norm: 0.8821413733252317, iteration: 401826
loss: 1.0667585134506226,grad_norm: 0.7882200564307558, iteration: 401827
loss: 0.9618009924888611,grad_norm: 0.8526266421258331, iteration: 401828
loss: 1.0816468000411987,grad_norm: 0.8687442524117368, iteration: 401829
loss: 1.0654752254486084,grad_norm: 0.9999992325637643, iteration: 401830
loss: 1.0093965530395508,grad_norm: 0.9485658072926147, iteration: 401831
loss: 1.0415455102920532,grad_norm: 0.9999995374551387, iteration: 401832
loss: 1.0136452913284302,grad_norm: 0.7201460841901701, iteration: 401833
loss: 1.0450024604797363,grad_norm: 0.7695831201941553, iteration: 401834
loss: 0.9873630404472351,grad_norm: 0.864198046881009, iteration: 401835
loss: 0.988961398601532,grad_norm: 0.7425443590756651, iteration: 401836
loss: 0.9986985325813293,grad_norm: 0.7497518515815285, iteration: 401837
loss: 1.0255911350250244,grad_norm: 0.9058462434553146, iteration: 401838
loss: 1.0097362995147705,grad_norm: 0.6990336451858755, iteration: 401839
loss: 1.1099765300750732,grad_norm: 0.9999990966307741, iteration: 401840
loss: 1.029287576675415,grad_norm: 0.9033969944272676, iteration: 401841
loss: 0.9894589781761169,grad_norm: 0.9117670604003162, iteration: 401842
loss: 1.0843309164047241,grad_norm: 0.99999939174035, iteration: 401843
loss: 1.052545189857483,grad_norm: 0.8696962758353499, iteration: 401844
loss: 0.997157871723175,grad_norm: 0.8403924705861172, iteration: 401845
loss: 0.9593667984008789,grad_norm: 0.9999990947759061, iteration: 401846
loss: 1.0143072605133057,grad_norm: 0.6476940112367361, iteration: 401847
loss: 1.004250407218933,grad_norm: 0.8327896053772947, iteration: 401848
loss: 1.0084141492843628,grad_norm: 0.7201968904901095, iteration: 401849
loss: 0.9830901622772217,grad_norm: 0.7044589543924493, iteration: 401850
loss: 1.0181174278259277,grad_norm: 0.9001379477797308, iteration: 401851
loss: 1.0227376222610474,grad_norm: 0.8370293615764589, iteration: 401852
loss: 0.9920965433120728,grad_norm: 0.9999997967338898, iteration: 401853
loss: 1.0710574388504028,grad_norm: 0.8750525994451346, iteration: 401854
loss: 1.0248748064041138,grad_norm: 0.8117710164683496, iteration: 401855
loss: 0.9797002077102661,grad_norm: 0.921173017646062, iteration: 401856
loss: 1.0544408559799194,grad_norm: 0.9999996305958566, iteration: 401857
loss: 0.9895417094230652,grad_norm: 0.9999995622067974, iteration: 401858
loss: 1.020594835281372,grad_norm: 0.9999997959056437, iteration: 401859
loss: 1.0089811086654663,grad_norm: 0.7242599063050157, iteration: 401860
loss: 1.0243220329284668,grad_norm: 0.9999999966071712, iteration: 401861
loss: 1.0120090246200562,grad_norm: 0.9999990594228365, iteration: 401862
loss: 0.9983451962471008,grad_norm: 0.8606333481349837, iteration: 401863
loss: 1.0229551792144775,grad_norm: 0.6186730500249722, iteration: 401864
loss: 1.052016258239746,grad_norm: 0.9999996184033266, iteration: 401865
loss: 0.979189932346344,grad_norm: 0.7216388529242808, iteration: 401866
loss: 0.9903305172920227,grad_norm: 0.9101866668398406, iteration: 401867
loss: 1.0077426433563232,grad_norm: 0.8023273731082444, iteration: 401868
loss: 1.0374767780303955,grad_norm: 0.7253482518591133, iteration: 401869
loss: 0.9925455451011658,grad_norm: 0.7606962938898315, iteration: 401870
loss: 0.973686695098877,grad_norm: 0.8982527333657312, iteration: 401871
loss: 1.025609016418457,grad_norm: 0.6817694547868649, iteration: 401872
loss: 1.295209527015686,grad_norm: 0.9999993793888474, iteration: 401873
loss: 0.9877581596374512,grad_norm: 0.7054369782352723, iteration: 401874
loss: 1.0264171361923218,grad_norm: 0.7822028655271068, iteration: 401875
loss: 1.0054452419281006,grad_norm: 0.7933276472923371, iteration: 401876
loss: 1.032355785369873,grad_norm: 0.8542455328993468, iteration: 401877
loss: 1.012381672859192,grad_norm: 0.8034175827474218, iteration: 401878
loss: 1.0536490678787231,grad_norm: 0.9999995513645175, iteration: 401879
loss: 0.9708572626113892,grad_norm: 0.6637393676287915, iteration: 401880
loss: 1.019856333732605,grad_norm: 0.6872337944708897, iteration: 401881
loss: 0.9927622675895691,grad_norm: 0.9999998553777232, iteration: 401882
loss: 0.9821519255638123,grad_norm: 0.9999999382500515, iteration: 401883
loss: 0.999222993850708,grad_norm: 0.9999995538488847, iteration: 401884
loss: 0.9836965203285217,grad_norm: 0.7855931186839805, iteration: 401885
loss: 1.0087224245071411,grad_norm: 0.6390257245009242, iteration: 401886
loss: 0.9962103962898254,grad_norm: 0.9074120249107697, iteration: 401887
loss: 1.0428065061569214,grad_norm: 0.7219563559224137, iteration: 401888
loss: 1.0020042657852173,grad_norm: 0.90133379830022, iteration: 401889
loss: 0.9926366806030273,grad_norm: 0.7834432751898963, iteration: 401890
loss: 1.0023630857467651,grad_norm: 0.8146687961124814, iteration: 401891
loss: 1.015254020690918,grad_norm: 0.8179280991128931, iteration: 401892
loss: 1.0465521812438965,grad_norm: 0.8965063521346481, iteration: 401893
loss: 0.9914903044700623,grad_norm: 0.9999997376782316, iteration: 401894
loss: 1.0381635427474976,grad_norm: 0.9784601549698189, iteration: 401895
loss: 1.056639313697815,grad_norm: 0.9947668776987644, iteration: 401896
loss: 0.9929126501083374,grad_norm: 0.9999993938877931, iteration: 401897
loss: 1.031121850013733,grad_norm: 0.8910519544153379, iteration: 401898
loss: 1.0161305665969849,grad_norm: 0.8123215816574263, iteration: 401899
loss: 1.1561681032180786,grad_norm: 0.999999321390475, iteration: 401900
loss: 1.0351004600524902,grad_norm: 0.8169113372559146, iteration: 401901
loss: 1.0090289115905762,grad_norm: 0.7224798268502571, iteration: 401902
loss: 0.977039635181427,grad_norm: 0.8313899891222375, iteration: 401903
loss: 0.9953215718269348,grad_norm: 0.8571039344677683, iteration: 401904
loss: 1.0553089380264282,grad_norm: 0.9999994709567149, iteration: 401905
loss: 0.9969422221183777,grad_norm: 0.7132153954231918, iteration: 401906
loss: 0.998930037021637,grad_norm: 0.7454368721519866, iteration: 401907
loss: 1.008846640586853,grad_norm: 0.9880895936422388, iteration: 401908
loss: 1.0677543878555298,grad_norm: 0.9999993294725734, iteration: 401909
loss: 1.0083045959472656,grad_norm: 0.7579741049036357, iteration: 401910
loss: 1.0945825576782227,grad_norm: 0.9999999168030292, iteration: 401911
loss: 1.0048713684082031,grad_norm: 0.9444191218023938, iteration: 401912
loss: 1.0027862787246704,grad_norm: 0.7821489097094428, iteration: 401913
loss: 0.9915432333946228,grad_norm: 0.7425610130704078, iteration: 401914
loss: 1.0022517442703247,grad_norm: 0.8817317998890827, iteration: 401915
loss: 1.068485140800476,grad_norm: 0.8201116611286647, iteration: 401916
loss: 1.0044218301773071,grad_norm: 0.9034029968440377, iteration: 401917
loss: 1.1060642004013062,grad_norm: 0.9683740545481347, iteration: 401918
loss: 1.0396617650985718,grad_norm: 0.9999993551289796, iteration: 401919
loss: 1.262933611869812,grad_norm: 1.0000000328525798, iteration: 401920
loss: 0.9753623604774475,grad_norm: 0.8683312005645712, iteration: 401921
loss: 0.9744318723678589,grad_norm: 0.8388612972995971, iteration: 401922
loss: 0.9875096678733826,grad_norm: 0.6690483240664068, iteration: 401923
loss: 1.1660277843475342,grad_norm: 0.9999998965830984, iteration: 401924
loss: 1.1165663003921509,grad_norm: 0.9999993923074303, iteration: 401925
loss: 1.0036424398422241,grad_norm: 0.7377076635881276, iteration: 401926
loss: 1.1332768201828003,grad_norm: 0.9999997407271775, iteration: 401927
loss: 1.1698548793792725,grad_norm: 0.9872849610131309, iteration: 401928
loss: 1.035706877708435,grad_norm: 0.7852721978518091, iteration: 401929
loss: 0.9997138381004333,grad_norm: 0.9999993034314654, iteration: 401930
loss: 1.0168412923812866,grad_norm: 0.7343974182055859, iteration: 401931
loss: 1.0650599002838135,grad_norm: 0.8970909416956542, iteration: 401932
loss: 1.0085399150848389,grad_norm: 0.9637488017723151, iteration: 401933
loss: 1.0081534385681152,grad_norm: 0.7818234913554898, iteration: 401934
loss: 1.0548465251922607,grad_norm: 0.9999993089611849, iteration: 401935
loss: 1.0669701099395752,grad_norm: 0.8199689862818031, iteration: 401936
loss: 0.994698166847229,grad_norm: 0.9999994378248677, iteration: 401937
loss: 0.9809287190437317,grad_norm: 0.7808607347280503, iteration: 401938
loss: 0.9917341470718384,grad_norm: 0.6961590473017452, iteration: 401939
loss: 1.0454481840133667,grad_norm: 0.9999996751341255, iteration: 401940
loss: 1.05214524269104,grad_norm: 0.897116096914582, iteration: 401941
loss: 1.0020711421966553,grad_norm: 0.8182837205579434, iteration: 401942
loss: 1.1399602890014648,grad_norm: 0.999999899331101, iteration: 401943
loss: 0.9905762672424316,grad_norm: 0.9810095107149603, iteration: 401944
loss: 1.077494740486145,grad_norm: 0.8278573295435856, iteration: 401945
loss: 0.95457923412323,grad_norm: 0.8621674075744747, iteration: 401946
loss: 0.9538454413414001,grad_norm: 0.6527280421952295, iteration: 401947
loss: 0.9965278506278992,grad_norm: 0.7654333300754962, iteration: 401948
loss: 1.0199174880981445,grad_norm: 0.7510373201005951, iteration: 401949
loss: 1.01661217212677,grad_norm: 0.7343611492849003, iteration: 401950
loss: 1.0410951375961304,grad_norm: 0.8871183089221979, iteration: 401951
loss: 0.9802024960517883,grad_norm: 0.7031800745431105, iteration: 401952
loss: 0.9772852659225464,grad_norm: 0.759392476666125, iteration: 401953
loss: 1.0270016193389893,grad_norm: 0.9043931225298805, iteration: 401954
loss: 1.0556387901306152,grad_norm: 0.8517670801014557, iteration: 401955
loss: 1.0204592943191528,grad_norm: 0.8007917126664111, iteration: 401956
loss: 1.0348647832870483,grad_norm: 0.7507552360088656, iteration: 401957
loss: 1.032999873161316,grad_norm: 0.7802086262723108, iteration: 401958
loss: 0.9993844628334045,grad_norm: 0.9999990912393918, iteration: 401959
loss: 1.0484143495559692,grad_norm: 0.8797587602279391, iteration: 401960
loss: 1.0021867752075195,grad_norm: 0.5940211706374829, iteration: 401961
loss: 0.9952351450920105,grad_norm: 0.8066637142921139, iteration: 401962
loss: 1.0003951787948608,grad_norm: 0.8069154180175914, iteration: 401963
loss: 0.9866255521774292,grad_norm: 0.9999992130826472, iteration: 401964
loss: 1.1105918884277344,grad_norm: 0.6991868938471971, iteration: 401965
loss: 1.0337553024291992,grad_norm: 0.9999999072193214, iteration: 401966
loss: 0.9897150993347168,grad_norm: 0.8930256481740051, iteration: 401967
loss: 1.0363682508468628,grad_norm: 0.8546938447448863, iteration: 401968
loss: 0.976203978061676,grad_norm: 0.8897982453081841, iteration: 401969
loss: 1.0593479871749878,grad_norm: 1.0000000670504878, iteration: 401970
loss: 1.030184030532837,grad_norm: 0.720429286522165, iteration: 401971
loss: 1.024600625038147,grad_norm: 0.9999993744913225, iteration: 401972
loss: 1.0019818544387817,grad_norm: 0.7389334667142974, iteration: 401973
loss: 0.9897730946540833,grad_norm: 0.8258513078944372, iteration: 401974
loss: 0.9861069917678833,grad_norm: 0.9931358542679648, iteration: 401975
loss: 1.115745186805725,grad_norm: 0.9999996696653054, iteration: 401976
loss: 1.0563323497772217,grad_norm: 0.999999388992636, iteration: 401977
loss: 0.9887513518333435,grad_norm: 0.8183806168309972, iteration: 401978
loss: 1.0228337049484253,grad_norm: 0.9999995192343212, iteration: 401979
loss: 0.9673301577568054,grad_norm: 0.8284595705726573, iteration: 401980
loss: 1.0227086544036865,grad_norm: 0.9999991887060754, iteration: 401981
loss: 0.9964498281478882,grad_norm: 0.6692844085683666, iteration: 401982
loss: 1.018740177154541,grad_norm: 0.8354183951797343, iteration: 401983
loss: 1.0207477807998657,grad_norm: 0.8233797371805032, iteration: 401984
loss: 0.9978748559951782,grad_norm: 0.6525313096814173, iteration: 401985
loss: 1.0037392377853394,grad_norm: 0.7519701425805643, iteration: 401986
loss: 0.9809716939926147,grad_norm: 0.7932283939320619, iteration: 401987
loss: 1.014618992805481,grad_norm: 0.8016758792161089, iteration: 401988
loss: 1.0140451192855835,grad_norm: 0.8332956459968967, iteration: 401989
loss: 0.9991616010665894,grad_norm: 0.7216750842887446, iteration: 401990
loss: 1.036521553993225,grad_norm: 0.8445781284520832, iteration: 401991
loss: 1.027461051940918,grad_norm: 0.8659430606226317, iteration: 401992
loss: 0.9943327307701111,grad_norm: 0.7054001564280985, iteration: 401993
loss: 1.0459864139556885,grad_norm: 0.9349196799709328, iteration: 401994
loss: 0.9883894324302673,grad_norm: 0.7718347166244676, iteration: 401995
loss: 1.0303068161010742,grad_norm: 0.9999997697617593, iteration: 401996
loss: 0.9991321563720703,grad_norm: 0.7599495736148942, iteration: 401997
loss: 0.9708120226860046,grad_norm: 0.8885509451509648, iteration: 401998
loss: 1.0027611255645752,grad_norm: 0.9044533456435707, iteration: 401999
loss: 1.0031580924987793,grad_norm: 0.8050635239272224, iteration: 402000
loss: 0.9825271964073181,grad_norm: 0.8637415462938812, iteration: 402001
loss: 1.0939055681228638,grad_norm: 0.8016747029962527, iteration: 402002
loss: 1.0246556997299194,grad_norm: 0.8037229693141177, iteration: 402003
loss: 0.9998224377632141,grad_norm: 0.9999992102535357, iteration: 402004
loss: 1.0709741115570068,grad_norm: 0.7120078868703568, iteration: 402005
loss: 1.12444007396698,grad_norm: 0.9999994029107911, iteration: 402006
loss: 0.9749576449394226,grad_norm: 0.7722223816390139, iteration: 402007
loss: 1.0200953483581543,grad_norm: 0.8847758374752914, iteration: 402008
loss: 1.030340552330017,grad_norm: 0.7794726800500076, iteration: 402009
loss: 1.0380765199661255,grad_norm: 0.8785082191182055, iteration: 402010
loss: 1.0098373889923096,grad_norm: 0.9999991491774459, iteration: 402011
loss: 1.0223604440689087,grad_norm: 0.824666511755302, iteration: 402012
loss: 1.0027132034301758,grad_norm: 0.7834899612237782, iteration: 402013
loss: 1.0227915048599243,grad_norm: 0.6793693388857003, iteration: 402014
loss: 1.022250771522522,grad_norm: 0.8434328251316325, iteration: 402015
loss: 0.9973176717758179,grad_norm: 0.873226649627249, iteration: 402016
loss: 1.0410563945770264,grad_norm: 0.9454468763211906, iteration: 402017
loss: 1.0088887214660645,grad_norm: 0.8795382144353306, iteration: 402018
loss: 0.9958150386810303,grad_norm: 0.8606375741555456, iteration: 402019
loss: 0.9972019791603088,grad_norm: 0.7471662911648588, iteration: 402020
loss: 1.0068858861923218,grad_norm: 0.919360996272315, iteration: 402021
loss: 0.9965575933456421,grad_norm: 0.7253723894073525, iteration: 402022
loss: 0.9841489195823669,grad_norm: 0.836001326926242, iteration: 402023
loss: 1.0445897579193115,grad_norm: 0.9999995569232947, iteration: 402024
loss: 1.0020732879638672,grad_norm: 0.8345567083829517, iteration: 402025
loss: 1.0045133829116821,grad_norm: 0.9999995673435795, iteration: 402026
loss: 1.0175248384475708,grad_norm: 0.7721786106355207, iteration: 402027
loss: 0.9788937568664551,grad_norm: 0.8129856250758482, iteration: 402028
loss: 1.0479052066802979,grad_norm: 0.9999996762781466, iteration: 402029
loss: 0.9782306551933289,grad_norm: 0.8003139552978273, iteration: 402030
loss: 0.9508558511734009,grad_norm: 0.8458859582819451, iteration: 402031
loss: 1.004071831703186,grad_norm: 0.7354511935412823, iteration: 402032
loss: 1.031227469444275,grad_norm: 0.9999999005509881, iteration: 402033
loss: 0.983748733997345,grad_norm: 0.6377080046749601, iteration: 402034
loss: 1.0243428945541382,grad_norm: 0.8653248055359641, iteration: 402035
loss: 1.026282548904419,grad_norm: 0.797797503081032, iteration: 402036
loss: 1.0184789896011353,grad_norm: 0.9999998462582959, iteration: 402037
loss: 0.9947647452354431,grad_norm: 0.8279011022970343, iteration: 402038
loss: 0.9901805520057678,grad_norm: 0.7773818334496511, iteration: 402039
loss: 0.9915890693664551,grad_norm: 0.8175564120406209, iteration: 402040
loss: 0.9686956405639648,grad_norm: 0.7697946205016993, iteration: 402041
loss: 0.9806168675422668,grad_norm: 0.8653004669080365, iteration: 402042
loss: 1.0206751823425293,grad_norm: 0.9999991701977186, iteration: 402043
loss: 1.0378459692001343,grad_norm: 0.7965342659551017, iteration: 402044
loss: 1.0511528253555298,grad_norm: 0.7602427670018538, iteration: 402045
loss: 1.0041759014129639,grad_norm: 0.9442391345564057, iteration: 402046
loss: 1.028075098991394,grad_norm: 0.8871775041067524, iteration: 402047
loss: 0.9927096366882324,grad_norm: 0.7462156253079582, iteration: 402048
loss: 1.0670572519302368,grad_norm: 0.8840651578155715, iteration: 402049
loss: 1.0279209613800049,grad_norm: 0.855143215884647, iteration: 402050
loss: 1.0037504434585571,grad_norm: 0.9999991478678935, iteration: 402051
loss: 1.0754001140594482,grad_norm: 0.8893795063245999, iteration: 402052
loss: 0.9859377145767212,grad_norm: 0.9999991123205708, iteration: 402053
loss: 0.9953474998474121,grad_norm: 0.9999991523759654, iteration: 402054
loss: 0.9867165088653564,grad_norm: 0.7122540720873853, iteration: 402055
loss: 0.9949386119842529,grad_norm: 0.7460879151216286, iteration: 402056
loss: 1.0346359014511108,grad_norm: 0.6846566198333118, iteration: 402057
loss: 1.0038410425186157,grad_norm: 0.9999999777712123, iteration: 402058
loss: 1.0058666467666626,grad_norm: 0.9999994161769881, iteration: 402059
loss: 1.0401910543441772,grad_norm: 0.9178547513013298, iteration: 402060
loss: 0.9627566337585449,grad_norm: 0.7886947181931491, iteration: 402061
loss: 1.0360217094421387,grad_norm: 0.7411625704433367, iteration: 402062
loss: 1.0787596702575684,grad_norm: 0.9999993608766168, iteration: 402063
loss: 1.106163501739502,grad_norm: 0.9999990370286258, iteration: 402064
loss: 1.0281977653503418,grad_norm: 0.8396631339099374, iteration: 402065
loss: 0.9998174905776978,grad_norm: 0.9334756086601586, iteration: 402066
loss: 0.9816646575927734,grad_norm: 0.762780102789608, iteration: 402067
loss: 1.0436863899230957,grad_norm: 0.9860537174807137, iteration: 402068
loss: 1.0334067344665527,grad_norm: 0.861038639495406, iteration: 402069
loss: 0.9836403131484985,grad_norm: 0.7533607502765644, iteration: 402070
loss: 1.0354368686676025,grad_norm: 0.8528290114939084, iteration: 402071
loss: 0.9884303212165833,grad_norm: 0.7522424649933761, iteration: 402072
loss: 1.0095573663711548,grad_norm: 0.8623376015586844, iteration: 402073
loss: 1.0423182249069214,grad_norm: 0.9999994636365855, iteration: 402074
loss: 0.9995524883270264,grad_norm: 0.7651660563994651, iteration: 402075
loss: 0.9643212556838989,grad_norm: 0.9867936154294991, iteration: 402076
loss: 1.0196285247802734,grad_norm: 0.9999999953397687, iteration: 402077
loss: 1.0752204656600952,grad_norm: 0.8190933959125425, iteration: 402078
loss: 0.9940475821495056,grad_norm: 0.7450814048857748, iteration: 402079
loss: 0.9945866465568542,grad_norm: 0.9999990659638652, iteration: 402080
loss: 1.022688627243042,grad_norm: 0.8152522017607813, iteration: 402081
loss: 0.9791508316993713,grad_norm: 0.9999991342075297, iteration: 402082
loss: 0.9890204071998596,grad_norm: 0.9444860058208834, iteration: 402083
loss: 1.078141212463379,grad_norm: 0.8285129173660567, iteration: 402084
loss: 1.0043362379074097,grad_norm: 0.7350138126968632, iteration: 402085
loss: 0.9726021885871887,grad_norm: 0.7283594555221035, iteration: 402086
loss: 0.9931308627128601,grad_norm: 0.6763778099767632, iteration: 402087
loss: 1.0099660158157349,grad_norm: 0.8563002089593295, iteration: 402088
loss: 0.9591602683067322,grad_norm: 0.9084625720772884, iteration: 402089
loss: 0.9878515005111694,grad_norm: 0.8484332100589305, iteration: 402090
loss: 1.0129225254058838,grad_norm: 0.716871863882897, iteration: 402091
loss: 0.9880222082138062,grad_norm: 0.8833377050377736, iteration: 402092
loss: 1.0121821165084839,grad_norm: 0.999999422827603, iteration: 402093
loss: 1.0220314264297485,grad_norm: 0.8604028668372594, iteration: 402094
loss: 1.0063179731369019,grad_norm: 0.735441543782726, iteration: 402095
loss: 0.9557962417602539,grad_norm: 0.8575690804898416, iteration: 402096
loss: 1.0163651704788208,grad_norm: 0.8041412259114868, iteration: 402097
loss: 0.9873713850975037,grad_norm: 0.7507560570085171, iteration: 402098
loss: 0.9799319505691528,grad_norm: 0.999999188039257, iteration: 402099
loss: 1.021636962890625,grad_norm: 0.8202916956288472, iteration: 402100
loss: 1.0284857749938965,grad_norm: 0.7961458213800366, iteration: 402101
loss: 1.016432523727417,grad_norm: 0.7048530954286765, iteration: 402102
loss: 1.0297045707702637,grad_norm: 0.7507157743122386, iteration: 402103
loss: 0.976220428943634,grad_norm: 0.7086578568314343, iteration: 402104
loss: 1.0174343585968018,grad_norm: 0.8156186557553722, iteration: 402105
loss: 0.9788839221000671,grad_norm: 0.999999113649341, iteration: 402106
loss: 1.2084283828735352,grad_norm: 0.9999995456837919, iteration: 402107
loss: 1.0095739364624023,grad_norm: 0.8451765290257527, iteration: 402108
loss: 0.9887911677360535,grad_norm: 0.8431116259271442, iteration: 402109
loss: 1.0395562648773193,grad_norm: 0.9300301597692254, iteration: 402110
loss: 1.0281635522842407,grad_norm: 0.999999211015093, iteration: 402111
loss: 1.0096389055252075,grad_norm: 0.888640528519838, iteration: 402112
loss: 1.1135824918746948,grad_norm: 0.9188023108844099, iteration: 402113
loss: 0.9707362651824951,grad_norm: 0.8581061045936685, iteration: 402114
loss: 1.0124441385269165,grad_norm: 0.9203100207684866, iteration: 402115
loss: 1.0132761001586914,grad_norm: 0.7581506543728392, iteration: 402116
loss: 0.9898553490638733,grad_norm: 0.9999994647443043, iteration: 402117
loss: 1.0365560054779053,grad_norm: 0.7108528526051473, iteration: 402118
loss: 1.0146772861480713,grad_norm: 0.725011074303643, iteration: 402119
loss: 0.9837437272071838,grad_norm: 0.925765296471741, iteration: 402120
loss: 1.0738087892532349,grad_norm: 0.7369719926323849, iteration: 402121
loss: 0.9828845262527466,grad_norm: 0.8662530155009189, iteration: 402122
loss: 1.0445846319198608,grad_norm: 0.9999998725452767, iteration: 402123
loss: 0.9919273853302002,grad_norm: 0.8399522642299664, iteration: 402124
loss: 1.0105690956115723,grad_norm: 0.9999999716657783, iteration: 402125
loss: 0.9884059429168701,grad_norm: 0.7335977918200414, iteration: 402126
loss: 1.00127112865448,grad_norm: 0.7542234929956072, iteration: 402127
loss: 0.9541473984718323,grad_norm: 0.8022463635384237, iteration: 402128
loss: 1.1598397493362427,grad_norm: 0.999999835486503, iteration: 402129
loss: 1.0312156677246094,grad_norm: 0.6977220018952931, iteration: 402130
loss: 1.0487381219863892,grad_norm: 0.7268634773292013, iteration: 402131
loss: 0.9987595677375793,grad_norm: 0.7662565031732104, iteration: 402132
loss: 0.9644668102264404,grad_norm: 0.9704108834703101, iteration: 402133
loss: 1.0253958702087402,grad_norm: 0.8716548114246545, iteration: 402134
loss: 1.0034430027008057,grad_norm: 0.9999998212475114, iteration: 402135
loss: 1.058239221572876,grad_norm: 0.9999990865749211, iteration: 402136
loss: 1.0166208744049072,grad_norm: 0.8397728829775886, iteration: 402137
loss: 1.1134710311889648,grad_norm: 0.9999997411795568, iteration: 402138
loss: 0.98191899061203,grad_norm: 0.680252354928048, iteration: 402139
loss: 0.989476203918457,grad_norm: 0.6849540169416497, iteration: 402140
loss: 0.9604642391204834,grad_norm: 0.8703564531179505, iteration: 402141
loss: 0.9952960014343262,grad_norm: 0.7351842918883811, iteration: 402142
loss: 1.0049558877944946,grad_norm: 0.8209510796282923, iteration: 402143
loss: 1.0566678047180176,grad_norm: 0.9999990537627867, iteration: 402144
loss: 1.0035505294799805,grad_norm: 0.779501811725735, iteration: 402145
loss: 1.0487250089645386,grad_norm: 0.7167714263377729, iteration: 402146
loss: 0.9932832717895508,grad_norm: 0.7498033656675083, iteration: 402147
loss: 0.995123565196991,grad_norm: 0.8031114874948418, iteration: 402148
loss: 1.0199886560440063,grad_norm: 0.8771260618478728, iteration: 402149
loss: 1.0059157609939575,grad_norm: 0.9387801419301399, iteration: 402150
loss: 0.9542742967605591,grad_norm: 0.7538004497374835, iteration: 402151
loss: 0.9965721964836121,grad_norm: 0.8843275755989057, iteration: 402152
loss: 0.9896895885467529,grad_norm: 0.6883446651598161, iteration: 402153
loss: 1.0060501098632812,grad_norm: 0.879342312189247, iteration: 402154
loss: 1.1563856601715088,grad_norm: 0.9999998990995923, iteration: 402155
loss: 0.9915732741355896,grad_norm: 0.951019788432472, iteration: 402156
loss: 0.9770987033843994,grad_norm: 0.8219622204585416, iteration: 402157
loss: 1.039869785308838,grad_norm: 0.7780355880910409, iteration: 402158
loss: 1.0873744487762451,grad_norm: 0.9999994196638265, iteration: 402159
loss: 0.9988395571708679,grad_norm: 0.7862870921426711, iteration: 402160
loss: 0.9784529209136963,grad_norm: 0.8294684060766346, iteration: 402161
loss: 1.0437577962875366,grad_norm: 0.9663175075929727, iteration: 402162
loss: 0.9958505630493164,grad_norm: 0.8045902926593995, iteration: 402163
loss: 1.003834843635559,grad_norm: 0.9441329314645067, iteration: 402164
loss: 1.0008717775344849,grad_norm: 0.7501103185934082, iteration: 402165
loss: 1.0238863229751587,grad_norm: 0.9999994158108534, iteration: 402166
loss: 0.9853833913803101,grad_norm: 0.7912707762288492, iteration: 402167
loss: 1.0317937135696411,grad_norm: 0.72470804194965, iteration: 402168
loss: 0.9833207130432129,grad_norm: 0.9999991961570122, iteration: 402169
loss: 1.074051856994629,grad_norm: 0.9999997898489768, iteration: 402170
loss: 0.9855513572692871,grad_norm: 0.8896285089246097, iteration: 402171
loss: 1.0002225637435913,grad_norm: 0.7452887079704844, iteration: 402172
loss: 1.017647385597229,grad_norm: 0.8327154271586302, iteration: 402173
loss: 1.0324641466140747,grad_norm: 0.9999992547321871, iteration: 402174
loss: 1.0501536130905151,grad_norm: 0.9999995597838248, iteration: 402175
loss: 0.97447270154953,grad_norm: 0.6845992645894718, iteration: 402176
loss: 0.982771635055542,grad_norm: 0.9821686982703959, iteration: 402177
loss: 0.9942829012870789,grad_norm: 0.76494119010443, iteration: 402178
loss: 1.0684411525726318,grad_norm: 0.9999992888387673, iteration: 402179
loss: 0.9918673038482666,grad_norm: 0.7887330049577208, iteration: 402180
loss: 0.9880999326705933,grad_norm: 0.7256499347548849, iteration: 402181
loss: 1.0064325332641602,grad_norm: 0.7797797211721799, iteration: 402182
loss: 1.0293772220611572,grad_norm: 0.9999991275182106, iteration: 402183
loss: 1.0736726522445679,grad_norm: 0.9999991127794464, iteration: 402184
loss: 1.0088050365447998,grad_norm: 0.8542027409926929, iteration: 402185
loss: 0.9713477492332458,grad_norm: 0.8631417970342801, iteration: 402186
loss: 1.0147018432617188,grad_norm: 0.9999996745777441, iteration: 402187
loss: 0.9829187989234924,grad_norm: 0.7337905178661747, iteration: 402188
loss: 1.0325015783309937,grad_norm: 0.9620677023816341, iteration: 402189
loss: 1.0039130449295044,grad_norm: 0.7036168769987737, iteration: 402190
loss: 0.9990298748016357,grad_norm: 0.908330277822517, iteration: 402191
loss: 1.0142900943756104,grad_norm: 0.9029820125430329, iteration: 402192
loss: 0.968381404876709,grad_norm: 0.6790857750258764, iteration: 402193
loss: 1.020520567893982,grad_norm: 0.881011747728874, iteration: 402194
loss: 1.0122636556625366,grad_norm: 0.7365586261797488, iteration: 402195
loss: 1.032130479812622,grad_norm: 0.9999995293896172, iteration: 402196
loss: 0.9986960291862488,grad_norm: 0.9713113107825804, iteration: 402197
loss: 1.0364607572555542,grad_norm: 0.9999999179627991, iteration: 402198
loss: 1.0084837675094604,grad_norm: 0.7411511145184102, iteration: 402199
loss: 1.0148205757141113,grad_norm: 0.9281438137369364, iteration: 402200
loss: 0.989741861820221,grad_norm: 0.7088812384357188, iteration: 402201
loss: 1.0334292650222778,grad_norm: 0.7475277921829849, iteration: 402202
loss: 1.0277230739593506,grad_norm: 0.7446438554863346, iteration: 402203
loss: 0.9870895147323608,grad_norm: 0.7099436394511193, iteration: 402204
loss: 1.0492537021636963,grad_norm: 0.9999992913966745, iteration: 402205
loss: 0.997636079788208,grad_norm: 0.6198396278542705, iteration: 402206
loss: 1.0578168630599976,grad_norm: 0.8225664458279497, iteration: 402207
loss: 1.0090352296829224,grad_norm: 0.6848929686514397, iteration: 402208
loss: 1.0299240350723267,grad_norm: 0.7966268407809869, iteration: 402209
loss: 0.9742594361305237,grad_norm: 0.9408838695483103, iteration: 402210
loss: 0.9714123010635376,grad_norm: 0.7835314578660438, iteration: 402211
loss: 0.970475435256958,grad_norm: 0.7109533621864107, iteration: 402212
loss: 1.0162445306777954,grad_norm: 0.8183778426709568, iteration: 402213
loss: 0.9987064003944397,grad_norm: 0.6816408245573353, iteration: 402214
loss: 0.9860557317733765,grad_norm: 0.6521205213148247, iteration: 402215
loss: 0.9766813516616821,grad_norm: 0.8033677363039041, iteration: 402216
loss: 1.129637598991394,grad_norm: 0.9999995204632012, iteration: 402217
loss: 1.0120604038238525,grad_norm: 0.9996413562424569, iteration: 402218
loss: 0.9873610138893127,grad_norm: 0.7564270508496879, iteration: 402219
loss: 1.0028355121612549,grad_norm: 0.8182325954451262, iteration: 402220
loss: 1.0231845378875732,grad_norm: 0.8951411660193281, iteration: 402221
loss: 0.9932379722595215,grad_norm: 0.7829026126252767, iteration: 402222
loss: 1.0165621042251587,grad_norm: 0.88242040265415, iteration: 402223
loss: 0.9650645852088928,grad_norm: 0.9464295090616646, iteration: 402224
loss: 1.026647686958313,grad_norm: 0.999999169211303, iteration: 402225
loss: 1.1148265600204468,grad_norm: 0.9999998172037186, iteration: 402226
loss: 1.0065313577651978,grad_norm: 0.9999990843182006, iteration: 402227
loss: 0.9726234078407288,grad_norm: 0.8847871335128038, iteration: 402228
loss: 1.112119436264038,grad_norm: 0.9999998308521953, iteration: 402229
loss: 1.0435670614242554,grad_norm: 0.838726674542999, iteration: 402230
loss: 0.9865663051605225,grad_norm: 0.6865672002441724, iteration: 402231
loss: 1.0078920125961304,grad_norm: 0.9999991888634576, iteration: 402232
loss: 0.9625331163406372,grad_norm: 0.961567266549386, iteration: 402233
loss: 1.0047637224197388,grad_norm: 0.7408356706652326, iteration: 402234
loss: 1.009576678276062,grad_norm: 0.7940280564905788, iteration: 402235
loss: 0.9883450865745544,grad_norm: 0.8312742594198788, iteration: 402236
loss: 0.9860363006591797,grad_norm: 0.9999992945496333, iteration: 402237
loss: 1.037118911743164,grad_norm: 0.9999993847330786, iteration: 402238
loss: 0.9919852018356323,grad_norm: 0.8227048736142377, iteration: 402239
loss: 0.9837559461593628,grad_norm: 0.7368712284413524, iteration: 402240
loss: 0.9825113415718079,grad_norm: 0.838682686753231, iteration: 402241
loss: 1.0186009407043457,grad_norm: 0.8282623778705626, iteration: 402242
loss: 0.9751579761505127,grad_norm: 0.8281477256944119, iteration: 402243
loss: 1.0553516149520874,grad_norm: 0.7417341392977074, iteration: 402244
loss: 1.0507934093475342,grad_norm: 0.9999996513218334, iteration: 402245
loss: 1.009280800819397,grad_norm: 0.8479275881164848, iteration: 402246
loss: 1.023952841758728,grad_norm: 0.7281824927424894, iteration: 402247
loss: 0.9988727569580078,grad_norm: 0.8554034740496895, iteration: 402248
loss: 1.0178186893463135,grad_norm: 0.6962812427100862, iteration: 402249
loss: 1.0096521377563477,grad_norm: 0.8383997412302372, iteration: 402250
loss: 0.9706220030784607,grad_norm: 0.7524224903174004, iteration: 402251
loss: 1.0368165969848633,grad_norm: 0.8274912987325038, iteration: 402252
loss: 0.9798755049705505,grad_norm: 0.6887156458378504, iteration: 402253
loss: 1.0232151746749878,grad_norm: 0.9999989284408946, iteration: 402254
loss: 0.9940789937973022,grad_norm: 0.719997685413263, iteration: 402255
loss: 1.0370302200317383,grad_norm: 0.9686842464999301, iteration: 402256
loss: 0.9906793236732483,grad_norm: 0.9086158960506111, iteration: 402257
loss: 0.9917837977409363,grad_norm: 0.7542468140117933, iteration: 402258
loss: 1.0244042873382568,grad_norm: 0.7263901870550028, iteration: 402259
loss: 0.9663726687431335,grad_norm: 0.878190296148074, iteration: 402260
loss: 1.0262203216552734,grad_norm: 0.7318722657551678, iteration: 402261
loss: 1.0065581798553467,grad_norm: 0.6934777233542498, iteration: 402262
loss: 1.0014519691467285,grad_norm: 0.7083962076677469, iteration: 402263
loss: 1.0037659406661987,grad_norm: 0.9999999713441551, iteration: 402264
loss: 0.9952260255813599,grad_norm: 0.9999991313240414, iteration: 402265
loss: 0.9431499242782593,grad_norm: 0.7562957727594005, iteration: 402266
loss: 1.0170766115188599,grad_norm: 0.8519985505085297, iteration: 402267
loss: 1.0189640522003174,grad_norm: 0.815523893653202, iteration: 402268
loss: 0.9926720261573792,grad_norm: 0.9032827836899691, iteration: 402269
loss: 1.0092530250549316,grad_norm: 0.9481345058394347, iteration: 402270
loss: 1.0024136304855347,grad_norm: 0.7210084972925744, iteration: 402271
loss: 1.0374507904052734,grad_norm: 0.9962793034752071, iteration: 402272
loss: 1.0207171440124512,grad_norm: 0.7177267763535463, iteration: 402273
loss: 0.9867639541625977,grad_norm: 0.8980027735389897, iteration: 402274
loss: 1.0854541063308716,grad_norm: 0.9999998447387485, iteration: 402275
loss: 1.0715241432189941,grad_norm: 0.6932664528934818, iteration: 402276
loss: 1.0350289344787598,grad_norm: 0.8566206712362127, iteration: 402277
loss: 1.0298683643341064,grad_norm: 0.7875083849979816, iteration: 402278
loss: 0.9920884966850281,grad_norm: 0.7011300859732944, iteration: 402279
loss: 0.9869720935821533,grad_norm: 0.7439125527628484, iteration: 402280
loss: 0.9841582775115967,grad_norm: 0.870260607347596, iteration: 402281
loss: 0.952947735786438,grad_norm: 0.7732329907428028, iteration: 402282
loss: 1.0061808824539185,grad_norm: 0.7847758180395721, iteration: 402283
loss: 0.9798247218132019,grad_norm: 0.7927075842863898, iteration: 402284
loss: 1.068640112876892,grad_norm: 0.8563088615031696, iteration: 402285
loss: 0.9926592111587524,grad_norm: 0.8905804113010338, iteration: 402286
loss: 1.0049070119857788,grad_norm: 0.765641651285376, iteration: 402287
loss: 1.0006288290023804,grad_norm: 0.8550604523726374, iteration: 402288
loss: 1.018025517463684,grad_norm: 0.7949785532012792, iteration: 402289
loss: 1.0180859565734863,grad_norm: 0.9614986201445707, iteration: 402290
loss: 0.9612482190132141,grad_norm: 0.7232207749556603, iteration: 402291
loss: 1.0257190465927124,grad_norm: 0.7398961410599478, iteration: 402292
loss: 1.008937954902649,grad_norm: 0.8732473641686579, iteration: 402293
loss: 0.9717196226119995,grad_norm: 0.6974488388290481, iteration: 402294
loss: 0.9827421307563782,grad_norm: 0.7510508373183746, iteration: 402295
loss: 1.043315052986145,grad_norm: 0.9999998951325403, iteration: 402296
loss: 0.9900527596473694,grad_norm: 0.686954727599643, iteration: 402297
loss: 1.04458749294281,grad_norm: 0.7867219140102397, iteration: 402298
loss: 1.0220990180969238,grad_norm: 0.8733074659475136, iteration: 402299
loss: 1.1163454055786133,grad_norm: 0.9999991848224207, iteration: 402300
loss: 0.9811578989028931,grad_norm: 0.7986283212635211, iteration: 402301
loss: 1.0102577209472656,grad_norm: 0.8945863006053157, iteration: 402302
loss: 1.023644208908081,grad_norm: 0.7862862947479913, iteration: 402303
loss: 1.001476526260376,grad_norm: 0.9999998044512161, iteration: 402304
loss: 1.011924147605896,grad_norm: 0.8033899965913492, iteration: 402305
loss: 0.9714807271957397,grad_norm: 0.9999996213675699, iteration: 402306
loss: 0.9829139113426208,grad_norm: 0.7850193426512617, iteration: 402307
loss: 1.0042592287063599,grad_norm: 0.8452204721490815, iteration: 402308
loss: 1.0276051759719849,grad_norm: 0.999999182668093, iteration: 402309
loss: 1.093545913696289,grad_norm: 0.9999995927650505, iteration: 402310
loss: 0.9846216440200806,grad_norm: 0.9148854311438869, iteration: 402311
loss: 1.019318699836731,grad_norm: 0.7790191488094633, iteration: 402312
loss: 0.9872000217437744,grad_norm: 0.7507271345359234, iteration: 402313
loss: 1.0360599756240845,grad_norm: 0.9999991228096565, iteration: 402314
loss: 0.9837039709091187,grad_norm: 0.7626108470974726, iteration: 402315
loss: 1.022528052330017,grad_norm: 0.7439739740093728, iteration: 402316
loss: 1.0420957803726196,grad_norm: 0.7581896782649482, iteration: 402317
loss: 0.9647596478462219,grad_norm: 0.8148392192340133, iteration: 402318
loss: 0.9826224446296692,grad_norm: 0.6499512980605772, iteration: 402319
loss: 1.015290379524231,grad_norm: 0.7707912910952677, iteration: 402320
loss: 1.056668996810913,grad_norm: 0.9999993369802441, iteration: 402321
loss: 1.033517837524414,grad_norm: 0.9999998744995934, iteration: 402322
loss: 1.025445818901062,grad_norm: 0.9999995260102623, iteration: 402323
loss: 1.075034499168396,grad_norm: 0.9999996954196964, iteration: 402324
loss: 1.0230128765106201,grad_norm: 0.9179317675824203, iteration: 402325
loss: 1.0703203678131104,grad_norm: 0.8402582702614739, iteration: 402326
loss: 1.03029465675354,grad_norm: 0.7225929164652429, iteration: 402327
loss: 0.9847532510757446,grad_norm: 0.9082724311354716, iteration: 402328
loss: 0.9838533401489258,grad_norm: 0.7025842844357585, iteration: 402329
loss: 1.0685362815856934,grad_norm: 1.0000000306886023, iteration: 402330
loss: 1.0029783248901367,grad_norm: 0.7205209089284665, iteration: 402331
loss: 0.9706220626831055,grad_norm: 0.6421885681351113, iteration: 402332
loss: 0.9548780918121338,grad_norm: 0.7258181023539264, iteration: 402333
loss: 1.0198525190353394,grad_norm: 0.8272145772179889, iteration: 402334
loss: 1.008767008781433,grad_norm: 0.9877262193375534, iteration: 402335
loss: 0.9907253384590149,grad_norm: 0.6941896981243755, iteration: 402336
loss: 1.0875262022018433,grad_norm: 0.9999990204831013, iteration: 402337
loss: 1.0081478357315063,grad_norm: 0.7175645540439137, iteration: 402338
loss: 1.0565403699874878,grad_norm: 0.9999998693879708, iteration: 402339
loss: 1.0205199718475342,grad_norm: 0.9324766479471608, iteration: 402340
loss: 0.9843090176582336,grad_norm: 0.7058680321658939, iteration: 402341
loss: 1.0264989137649536,grad_norm: 0.9999994217786567, iteration: 402342
loss: 0.9815225005149841,grad_norm: 0.9956491478111659, iteration: 402343
loss: 0.9875123500823975,grad_norm: 0.7970593716772346, iteration: 402344
loss: 1.0639828443527222,grad_norm: 0.7804039286377312, iteration: 402345
loss: 0.9956930875778198,grad_norm: 0.8315966409479749, iteration: 402346
loss: 1.054340124130249,grad_norm: 0.940972265740421, iteration: 402347
loss: 1.023902177810669,grad_norm: 0.6630309112936231, iteration: 402348
loss: 0.9966524839401245,grad_norm: 0.7624984500792608, iteration: 402349
loss: 1.0215214490890503,grad_norm: 0.7838903765363543, iteration: 402350
loss: 0.9901074767112732,grad_norm: 0.8281717644084161, iteration: 402351
loss: 1.0156422853469849,grad_norm: 0.9208531457508644, iteration: 402352
loss: 1.0168492794036865,grad_norm: 0.9999999004238728, iteration: 402353
loss: 0.9553539752960205,grad_norm: 0.8793928893401817, iteration: 402354
loss: 1.0213922262191772,grad_norm: 0.7566609408223729, iteration: 402355
loss: 1.0071403980255127,grad_norm: 0.8140297281094999, iteration: 402356
loss: 0.9840009212493896,grad_norm: 0.7126434731425914, iteration: 402357
loss: 0.9961227178573608,grad_norm: 0.7504921616236055, iteration: 402358
loss: 0.9792917370796204,grad_norm: 0.7379951155405943, iteration: 402359
loss: 0.9842544198036194,grad_norm: 0.6780332963923387, iteration: 402360
loss: 0.9753175973892212,grad_norm: 0.7462102741390185, iteration: 402361
loss: 0.9774917364120483,grad_norm: 0.7363074283508617, iteration: 402362
loss: 1.0054932832717896,grad_norm: 0.8437818753338544, iteration: 402363
loss: 1.0208237171173096,grad_norm: 0.9999999631260458, iteration: 402364
loss: 1.029615879058838,grad_norm: 0.692010265266497, iteration: 402365
loss: 1.0173583030700684,grad_norm: 0.7489116097873876, iteration: 402366
loss: 0.9890199899673462,grad_norm: 0.7643731337322258, iteration: 402367
loss: 1.0751256942749023,grad_norm: 0.9999999064679671, iteration: 402368
loss: 0.9664570689201355,grad_norm: 0.703166187002261, iteration: 402369
loss: 0.9785118699073792,grad_norm: 0.8627379220002356, iteration: 402370
loss: 1.0088781118392944,grad_norm: 0.6962693076400105, iteration: 402371
loss: 1.0100538730621338,grad_norm: 0.7718747405018074, iteration: 402372
loss: 1.095813512802124,grad_norm: 0.9999994793432312, iteration: 402373
loss: 1.089192509651184,grad_norm: 0.9999998680157177, iteration: 402374
loss: 0.991199254989624,grad_norm: 0.9461231941122148, iteration: 402375
loss: 1.0249663591384888,grad_norm: 0.9040829317744241, iteration: 402376
loss: 1.0198707580566406,grad_norm: 0.9533388549374634, iteration: 402377
loss: 0.9939354658126831,grad_norm: 1.0000000141236296, iteration: 402378
loss: 1.0041733980178833,grad_norm: 0.7215366513483016, iteration: 402379
loss: 1.0578250885009766,grad_norm: 0.9999992494810511, iteration: 402380
loss: 1.0615793466567993,grad_norm: 0.7675707799110251, iteration: 402381
loss: 0.9929997324943542,grad_norm: 0.7090753241727886, iteration: 402382
loss: 0.9869257807731628,grad_norm: 0.7629226394644771, iteration: 402383
loss: 1.007978081703186,grad_norm: 0.781625294842502, iteration: 402384
loss: 1.0003098249435425,grad_norm: 0.848141574286963, iteration: 402385
loss: 1.027394413948059,grad_norm: 0.7991321013652484, iteration: 402386
loss: 1.0171151161193848,grad_norm: 0.8003966287165428, iteration: 402387
loss: 0.9820005297660828,grad_norm: 0.8591004084644536, iteration: 402388
loss: 1.1204547882080078,grad_norm: 0.9999998403775094, iteration: 402389
loss: 0.980242133140564,grad_norm: 0.895826814963279, iteration: 402390
loss: 0.9974855184555054,grad_norm: 0.9999996414276096, iteration: 402391
loss: 1.0092172622680664,grad_norm: 0.9999994358329499, iteration: 402392
loss: 0.9771883487701416,grad_norm: 0.9222420557106931, iteration: 402393
loss: 1.0034548044204712,grad_norm: 0.6577674503081052, iteration: 402394
loss: 1.004544734954834,grad_norm: 0.8419260285505333, iteration: 402395
loss: 1.019537091255188,grad_norm: 0.7579981178489136, iteration: 402396
loss: 0.9878873229026794,grad_norm: 0.7618970928299297, iteration: 402397
loss: 1.0034456253051758,grad_norm: 0.9999993025802938, iteration: 402398
loss: 1.0051727294921875,grad_norm: 0.7172973841563525, iteration: 402399
loss: 1.0048949718475342,grad_norm: 0.7665775929761608, iteration: 402400
loss: 0.9808154106140137,grad_norm: 0.672800274582335, iteration: 402401
loss: 1.002748727798462,grad_norm: 0.7869976719616573, iteration: 402402
loss: 1.0023568868637085,grad_norm: 0.9837699120641368, iteration: 402403
loss: 0.9971169233322144,grad_norm: 0.7679436725908078, iteration: 402404
loss: 0.9637781977653503,grad_norm: 0.793810642015755, iteration: 402405
loss: 1.001914143562317,grad_norm: 0.9999998184392891, iteration: 402406
loss: 0.9859424829483032,grad_norm: 0.7684642072675235, iteration: 402407
loss: 0.9872803092002869,grad_norm: 0.9534409512086511, iteration: 402408
loss: 1.02114737033844,grad_norm: 0.6797335676392631, iteration: 402409
loss: 0.9809422492980957,grad_norm: 0.7003484693016566, iteration: 402410
loss: 1.0308289527893066,grad_norm: 0.8523340355887441, iteration: 402411
loss: 1.032008171081543,grad_norm: 0.7676368399498443, iteration: 402412
loss: 1.00290048122406,grad_norm: 0.7839316888144897, iteration: 402413
loss: 0.9932355880737305,grad_norm: 0.8514853182423749, iteration: 402414
loss: 1.0328056812286377,grad_norm: 0.9307200168973212, iteration: 402415
loss: 1.061198353767395,grad_norm: 0.9999997423499268, iteration: 402416
loss: 1.0714424848556519,grad_norm: 0.9253817062889357, iteration: 402417
loss: 1.0109279155731201,grad_norm: 0.9414673033346932, iteration: 402418
loss: 0.9608808159828186,grad_norm: 0.8608294967649339, iteration: 402419
loss: 0.9792605042457581,grad_norm: 0.9999992250072457, iteration: 402420
loss: 1.0988279581069946,grad_norm: 0.9999990764858436, iteration: 402421
loss: 0.9786573648452759,grad_norm: 0.7199890022995823, iteration: 402422
loss: 1.0005065202713013,grad_norm: 0.6357854651506287, iteration: 402423
loss: 0.9881427884101868,grad_norm: 0.8877818504930248, iteration: 402424
loss: 0.9967055916786194,grad_norm: 0.7172982325248163, iteration: 402425
loss: 1.0306756496429443,grad_norm: 0.7160422058581821, iteration: 402426
loss: 1.006554126739502,grad_norm: 0.7193469193518326, iteration: 402427
loss: 1.0053274631500244,grad_norm: 0.797351504950787, iteration: 402428
loss: 1.010674238204956,grad_norm: 0.9651878211209758, iteration: 402429
loss: 0.9834827184677124,grad_norm: 0.9661115154770574, iteration: 402430
loss: 1.0386332273483276,grad_norm: 0.9475101865575594, iteration: 402431
loss: 0.9462741017341614,grad_norm: 0.7435988158082041, iteration: 402432
loss: 0.9783369302749634,grad_norm: 0.9237340960474468, iteration: 402433
loss: 1.025235652923584,grad_norm: 0.8385721944351787, iteration: 402434
loss: 1.0192923545837402,grad_norm: 0.8284872357775274, iteration: 402435
loss: 1.0395931005477905,grad_norm: 0.9999996533607056, iteration: 402436
loss: 1.0827434062957764,grad_norm: 0.999999888370449, iteration: 402437
loss: 1.0274654626846313,grad_norm: 0.8636398028605948, iteration: 402438
loss: 0.9940206408500671,grad_norm: 0.8617961688092425, iteration: 402439
loss: 0.9819018840789795,grad_norm: 0.7478690062691862, iteration: 402440
loss: 1.0288987159729004,grad_norm: 0.6778458718326532, iteration: 402441
loss: 1.0768061876296997,grad_norm: 0.9083475762514029, iteration: 402442
loss: 0.9662778377532959,grad_norm: 0.7771313028212252, iteration: 402443
loss: 1.010664701461792,grad_norm: 0.9081922823752122, iteration: 402444
loss: 1.0555338859558105,grad_norm: 0.9999997091430256, iteration: 402445
loss: 0.9958521127700806,grad_norm: 0.7826762350751282, iteration: 402446
loss: 0.985787570476532,grad_norm: 0.8998401193620624, iteration: 402447
loss: 1.0486243963241577,grad_norm: 0.8716948413498993, iteration: 402448
loss: 0.9761292934417725,grad_norm: 0.846780717755863, iteration: 402449
loss: 1.0048831701278687,grad_norm: 0.6638588035392533, iteration: 402450
loss: 1.008730173110962,grad_norm: 0.7234510373730518, iteration: 402451
loss: 1.0037871599197388,grad_norm: 0.8287802712774639, iteration: 402452
loss: 1.0308374166488647,grad_norm: 0.9640077318649877, iteration: 402453
loss: 1.005465030670166,grad_norm: 0.7165064456703696, iteration: 402454
loss: 0.9935346841812134,grad_norm: 0.9455465989917015, iteration: 402455
loss: 1.0130455493927002,grad_norm: 0.8683634188677797, iteration: 402456
loss: 0.9850329756736755,grad_norm: 0.7598027633172305, iteration: 402457
loss: 1.0295898914337158,grad_norm: 0.8052403891701696, iteration: 402458
loss: 1.0292471647262573,grad_norm: 0.6462052097779705, iteration: 402459
loss: 0.9807678461074829,grad_norm: 0.9999994917310611, iteration: 402460
loss: 1.0201255083084106,grad_norm: 0.8846316330831759, iteration: 402461
loss: 1.0007727146148682,grad_norm: 0.8539559927970757, iteration: 402462
loss: 1.0235768556594849,grad_norm: 0.9999992659954116, iteration: 402463
loss: 1.001806378364563,grad_norm: 0.7315772176043837, iteration: 402464
loss: 0.9958102107048035,grad_norm: 0.8258027540940936, iteration: 402465
loss: 1.0097874402999878,grad_norm: 0.6668610896102919, iteration: 402466
loss: 0.996208906173706,grad_norm: 0.7689969943572719, iteration: 402467
loss: 0.986327052116394,grad_norm: 0.9999995687876715, iteration: 402468
loss: 1.0186644792556763,grad_norm: 0.796470205591722, iteration: 402469
loss: 0.9796433448791504,grad_norm: 0.7717631969487991, iteration: 402470
loss: 0.9857228994369507,grad_norm: 0.9035613805196546, iteration: 402471
loss: 1.0156745910644531,grad_norm: 0.7514665147151862, iteration: 402472
loss: 1.0736043453216553,grad_norm: 0.9999997431155413, iteration: 402473
loss: 1.011161208152771,grad_norm: 0.7202543515363257, iteration: 402474
loss: 0.9956101179122925,grad_norm: 0.9591412353781088, iteration: 402475
loss: 0.986622154712677,grad_norm: 0.7121146643595303, iteration: 402476
loss: 0.991186261177063,grad_norm: 0.9999995679206632, iteration: 402477
loss: 0.9825160503387451,grad_norm: 0.7713218913492643, iteration: 402478
loss: 0.99008709192276,grad_norm: 0.9223888453498194, iteration: 402479
loss: 0.9888296127319336,grad_norm: 0.8418030701458622, iteration: 402480
loss: 0.9913274645805359,grad_norm: 0.7647583148595662, iteration: 402481
loss: 0.9675319194793701,grad_norm: 0.6359034205792128, iteration: 402482
loss: 1.033591389656067,grad_norm: 0.8682882256149557, iteration: 402483
loss: 0.9936537742614746,grad_norm: 0.8445914117765732, iteration: 402484
loss: 1.0251537561416626,grad_norm: 0.8334534917244957, iteration: 402485
loss: 1.0252931118011475,grad_norm: 0.9999993462826167, iteration: 402486
loss: 0.9855175018310547,grad_norm: 0.8345594799837684, iteration: 402487
loss: 1.027733564376831,grad_norm: 0.7870146017186519, iteration: 402488
loss: 1.0461266040802002,grad_norm: 0.7252425825686492, iteration: 402489
loss: 1.0166069269180298,grad_norm: 0.7947160170422094, iteration: 402490
loss: 1.0115779638290405,grad_norm: 0.7809826767293407, iteration: 402491
loss: 0.9810757040977478,grad_norm: 0.7703677517240807, iteration: 402492
loss: 1.00678288936615,grad_norm: 0.7028422013052731, iteration: 402493
loss: 0.9979654550552368,grad_norm: 0.82291030168094, iteration: 402494
loss: 1.0097647905349731,grad_norm: 0.979403125102546, iteration: 402495
loss: 1.0090742111206055,grad_norm: 0.9590353346917326, iteration: 402496
loss: 1.0088127851486206,grad_norm: 0.7184298760598705, iteration: 402497
loss: 1.045575737953186,grad_norm: 0.9999997163070877, iteration: 402498
loss: 0.980677604675293,grad_norm: 0.6638086683063158, iteration: 402499
loss: 1.0348317623138428,grad_norm: 0.8358050172970273, iteration: 402500
loss: 1.007851004600525,grad_norm: 0.6916084232497124, iteration: 402501
loss: 0.949680507183075,grad_norm: 0.7955668011284345, iteration: 402502
loss: 0.9942430257797241,grad_norm: 0.9273217253848477, iteration: 402503
loss: 0.978424072265625,grad_norm: 0.7917776253516229, iteration: 402504
loss: 1.0014597177505493,grad_norm: 0.9317769939099197, iteration: 402505
loss: 0.9388010501861572,grad_norm: 0.8172193091822003, iteration: 402506
loss: 1.1344038248062134,grad_norm: 0.9999990071625262, iteration: 402507
loss: 0.9883020520210266,grad_norm: 0.7202024731318095, iteration: 402508
loss: 0.9977787733078003,grad_norm: 0.6767368881054309, iteration: 402509
loss: 0.9562458992004395,grad_norm: 0.7500296498013403, iteration: 402510
loss: 1.0128353834152222,grad_norm: 0.9999991038017408, iteration: 402511
loss: 1.0453599691390991,grad_norm: 0.9999994192267998, iteration: 402512
loss: 1.0163522958755493,grad_norm: 0.9516054033224794, iteration: 402513
loss: 1.0050692558288574,grad_norm: 0.7301447754754293, iteration: 402514
loss: 0.998784065246582,grad_norm: 0.7648725356987253, iteration: 402515
loss: 0.9932052493095398,grad_norm: 0.7091655891684087, iteration: 402516
loss: 1.0054961442947388,grad_norm: 0.9999998408902516, iteration: 402517
loss: 1.0052789449691772,grad_norm: 0.7395346867095847, iteration: 402518
loss: 0.9704047441482544,grad_norm: 0.7555738139482244, iteration: 402519
loss: 0.9849799871444702,grad_norm: 0.9999998621760067, iteration: 402520
loss: 1.0054625272750854,grad_norm: 0.7349827442099579, iteration: 402521
loss: 0.9873815178871155,grad_norm: 0.8933960859776442, iteration: 402522
loss: 1.0067540407180786,grad_norm: 0.8196400394796427, iteration: 402523
loss: 0.9916194677352905,grad_norm: 0.7493514633068569, iteration: 402524
loss: 0.9906483888626099,grad_norm: 0.6832229867493675, iteration: 402525
loss: 1.095011591911316,grad_norm: 0.869408825859271, iteration: 402526
loss: 1.041074275970459,grad_norm: 0.7096822067551732, iteration: 402527
loss: 1.0083503723144531,grad_norm: 0.6933395194059286, iteration: 402528
loss: 1.008750319480896,grad_norm: 0.871018348170116, iteration: 402529
loss: 0.9847415089607239,grad_norm: 0.7405603528260238, iteration: 402530
loss: 1.0029219388961792,grad_norm: 0.9999992111119761, iteration: 402531
loss: 0.9839121699333191,grad_norm: 0.6907276956739538, iteration: 402532
loss: 0.982320249080658,grad_norm: 0.7509190265549134, iteration: 402533
loss: 0.9880456328392029,grad_norm: 0.8237768866176766, iteration: 402534
loss: 0.9303613305091858,grad_norm: 0.7744048996309857, iteration: 402535
loss: 1.0708061456680298,grad_norm: 0.9999997616941515, iteration: 402536
loss: 1.031201720237732,grad_norm: 0.7474334624636367, iteration: 402537
loss: 0.9809783101081848,grad_norm: 0.79521856012537, iteration: 402538
loss: 0.9820442199707031,grad_norm: 0.8225565560103995, iteration: 402539
loss: 0.9974494576454163,grad_norm: 0.6792506872398736, iteration: 402540
loss: 1.0292845964431763,grad_norm: 0.9117834720724346, iteration: 402541
loss: 1.0349012613296509,grad_norm: 0.9999996351580709, iteration: 402542
loss: 0.9647798538208008,grad_norm: 0.768909272751595, iteration: 402543
loss: 0.995631992816925,grad_norm: 0.7216775356982917, iteration: 402544
loss: 0.9886529445648193,grad_norm: 0.793491720979886, iteration: 402545
loss: 0.9797885417938232,grad_norm: 0.7249812171411981, iteration: 402546
loss: 0.9978957772254944,grad_norm: 0.7134108337658146, iteration: 402547
loss: 0.9809706211090088,grad_norm: 0.7124679348817335, iteration: 402548
loss: 1.0008866786956787,grad_norm: 0.9664523461199395, iteration: 402549
loss: 0.9880956411361694,grad_norm: 0.629371431632311, iteration: 402550
loss: 0.9852166175842285,grad_norm: 0.7551076894167984, iteration: 402551
loss: 0.9907785058021545,grad_norm: 0.9999992399053678, iteration: 402552
loss: 0.9847946763038635,grad_norm: 0.7252325262342085, iteration: 402553
loss: 1.0117807388305664,grad_norm: 0.805927189648055, iteration: 402554
loss: 1.0070351362228394,grad_norm: 0.7973817166930841, iteration: 402555
loss: 1.014762043952942,grad_norm: 0.9576994274852308, iteration: 402556
loss: 1.0303502082824707,grad_norm: 0.8096984956445973, iteration: 402557
loss: 1.0249779224395752,grad_norm: 0.9999991568557548, iteration: 402558
loss: 0.9915953278541565,grad_norm: 0.7556695471822348, iteration: 402559
loss: 0.9990260601043701,grad_norm: 0.732029324047198, iteration: 402560
loss: 1.0948913097381592,grad_norm: 0.9999992722743031, iteration: 402561
loss: 1.0260781049728394,grad_norm: 0.6477563767645899, iteration: 402562
loss: 1.0222358703613281,grad_norm: 0.9161440084299814, iteration: 402563
loss: 1.001758337020874,grad_norm: 0.7089787998601123, iteration: 402564
loss: 0.9877586364746094,grad_norm: 0.7983832903869074, iteration: 402565
loss: 0.9933009147644043,grad_norm: 0.9041871007618058, iteration: 402566
loss: 1.1210451126098633,grad_norm: 0.9999998626023928, iteration: 402567
loss: 1.0073015689849854,grad_norm: 0.7867493553758892, iteration: 402568
loss: 1.0020604133605957,grad_norm: 0.6455924542931761, iteration: 402569
loss: 1.000866174697876,grad_norm: 0.821496367895149, iteration: 402570
loss: 1.0023964643478394,grad_norm: 0.8020236153497429, iteration: 402571
loss: 1.0068728923797607,grad_norm: 0.7400895672794062, iteration: 402572
loss: 1.0301340818405151,grad_norm: 0.9999994835793772, iteration: 402573
loss: 1.1068642139434814,grad_norm: 0.9999998468197187, iteration: 402574
loss: 0.998515248298645,grad_norm: 0.7153971252517649, iteration: 402575
loss: 1.0076990127563477,grad_norm: 0.6653782774608098, iteration: 402576
loss: 0.9910151362419128,grad_norm: 0.8002518504312467, iteration: 402577
loss: 1.0260685682296753,grad_norm: 0.7465798198409109, iteration: 402578
loss: 1.0245652198791504,grad_norm: 0.697066957825037, iteration: 402579
loss: 0.9779244065284729,grad_norm: 0.7979765542740455, iteration: 402580
loss: 0.9762775897979736,grad_norm: 0.7894674702717192, iteration: 402581
loss: 0.9831047654151917,grad_norm: 0.7409470492770008, iteration: 402582
loss: 1.003442645072937,grad_norm: 0.749744330550147, iteration: 402583
loss: 0.9754105806350708,grad_norm: 0.9112592753833827, iteration: 402584
loss: 1.0145469903945923,grad_norm: 0.6496255546788672, iteration: 402585
loss: 1.0194551944732666,grad_norm: 0.856803292640649, iteration: 402586
loss: 1.0209320783615112,grad_norm: 0.7382361478854249, iteration: 402587
loss: 0.9773967862129211,grad_norm: 0.9999997626775009, iteration: 402588
loss: 0.9896283149719238,grad_norm: 0.9999989139070773, iteration: 402589
loss: 1.0137678384780884,grad_norm: 0.8229346723682274, iteration: 402590
loss: 1.031590223312378,grad_norm: 0.9563383426106732, iteration: 402591
loss: 1.0592460632324219,grad_norm: 0.9999999756932462, iteration: 402592
loss: 1.0013941526412964,grad_norm: 0.8709877528658684, iteration: 402593
loss: 1.0007258653640747,grad_norm: 0.999999314490324, iteration: 402594
loss: 1.0256386995315552,grad_norm: 0.6948731613485811, iteration: 402595
loss: 1.0200912952423096,grad_norm: 0.7556189856471901, iteration: 402596
loss: 1.0127865076065063,grad_norm: 0.8880780534477037, iteration: 402597
loss: 0.9390761256217957,grad_norm: 0.9999990712170697, iteration: 402598
loss: 1.0070602893829346,grad_norm: 0.9999990811938313, iteration: 402599
loss: 0.9860506653785706,grad_norm: 0.8481024211697835, iteration: 402600
loss: 0.9684945940971375,grad_norm: 0.981633273663952, iteration: 402601
loss: 1.0678879022598267,grad_norm: 1.0000000425747704, iteration: 402602
loss: 1.0205693244934082,grad_norm: 0.8411630963516371, iteration: 402603
loss: 0.9842822551727295,grad_norm: 0.8340253228754521, iteration: 402604
loss: 1.0102243423461914,grad_norm: 0.846179970150354, iteration: 402605
loss: 1.0065363645553589,grad_norm: 0.6334656001999068, iteration: 402606
loss: 0.9914869666099548,grad_norm: 0.9999990751215614, iteration: 402607
loss: 0.9856284856796265,grad_norm: 0.8473169114945515, iteration: 402608
loss: 1.002458095550537,grad_norm: 0.7494425481128114, iteration: 402609
loss: 1.016689658164978,grad_norm: 0.7969848753486114, iteration: 402610
loss: 1.0195988416671753,grad_norm: 0.9999992822221249, iteration: 402611
loss: 0.9957202672958374,grad_norm: 0.7114749689629376, iteration: 402612
loss: 1.0141154527664185,grad_norm: 0.7934715671880914, iteration: 402613
loss: 1.0444514751434326,grad_norm: 0.9166771370163919, iteration: 402614
loss: 1.0895159244537354,grad_norm: 0.999999635356189, iteration: 402615
loss: 0.9686092138290405,grad_norm: 0.8262399873815586, iteration: 402616
loss: 1.0045480728149414,grad_norm: 0.67290746825824, iteration: 402617
loss: 1.0074951648712158,grad_norm: 0.7694263753540103, iteration: 402618
loss: 0.9913371205329895,grad_norm: 0.8770332316430688, iteration: 402619
loss: 1.0149734020233154,grad_norm: 0.8613020753541567, iteration: 402620
loss: 1.005050539970398,grad_norm: 0.7881622685245508, iteration: 402621
loss: 1.0090715885162354,grad_norm: 0.7492139580604636, iteration: 402622
loss: 0.973422646522522,grad_norm: 0.8381324980263956, iteration: 402623
loss: 1.018717646598816,grad_norm: 0.9999995215250609, iteration: 402624
loss: 0.9704065322875977,grad_norm: 0.7207456493337827, iteration: 402625
loss: 1.0373622179031372,grad_norm: 0.7377004767876693, iteration: 402626
loss: 0.9604692459106445,grad_norm: 0.7637081938157873, iteration: 402627
loss: 1.009551763534546,grad_norm: 0.8029857963919509, iteration: 402628
loss: 1.0755928754806519,grad_norm: 0.7086751134826597, iteration: 402629
loss: 1.0719494819641113,grad_norm: 0.8423764934520559, iteration: 402630
loss: 0.998763382434845,grad_norm: 0.8476095931523231, iteration: 402631
loss: 0.996191680431366,grad_norm: 0.749010058279286, iteration: 402632
loss: 1.0169755220413208,grad_norm: 0.7404024458302734, iteration: 402633
loss: 0.9852668046951294,grad_norm: 0.8163726910080737, iteration: 402634
loss: 0.9690080285072327,grad_norm: 0.8414465017901565, iteration: 402635
loss: 1.0096163749694824,grad_norm: 0.8011044951627174, iteration: 402636
loss: 1.000800609588623,grad_norm: 0.5954209207244449, iteration: 402637
loss: 1.0158926248550415,grad_norm: 0.8939325118531389, iteration: 402638
loss: 1.0426054000854492,grad_norm: 0.7423891545508958, iteration: 402639
loss: 1.0175156593322754,grad_norm: 0.7192327429702032, iteration: 402640
loss: 1.153361439704895,grad_norm: 0.9999996411690629, iteration: 402641
loss: 1.0332788228988647,grad_norm: 0.9388090152701329, iteration: 402642
loss: 1.0236862897872925,grad_norm: 0.6523203895289301, iteration: 402643
loss: 0.9820680022239685,grad_norm: 0.7726700988948669, iteration: 402644
loss: 0.9997244477272034,grad_norm: 0.703822185829333, iteration: 402645
loss: 1.0816924571990967,grad_norm: 0.8098834219958554, iteration: 402646
loss: 1.0419378280639648,grad_norm: 0.9999991276192906, iteration: 402647
loss: 1.082581877708435,grad_norm: 0.9999995050209105, iteration: 402648
loss: 1.0034769773483276,grad_norm: 0.8806175645870928, iteration: 402649
loss: 1.117848515510559,grad_norm: 0.9564719405952811, iteration: 402650
loss: 1.0086669921875,grad_norm: 0.8040649618718091, iteration: 402651
loss: 1.02466881275177,grad_norm: 0.859846691582359, iteration: 402652
loss: 1.01166832447052,grad_norm: 0.9012891403638345, iteration: 402653
loss: 1.0194703340530396,grad_norm: 0.9623119171033577, iteration: 402654
loss: 1.1105241775512695,grad_norm: 0.999999864331635, iteration: 402655
loss: 1.0289108753204346,grad_norm: 0.9999992017738777, iteration: 402656
loss: 1.0295466184616089,grad_norm: 0.8816324724505223, iteration: 402657
loss: 0.9924469590187073,grad_norm: 0.8719819723146751, iteration: 402658
loss: 0.9864764213562012,grad_norm: 0.8596044932652734, iteration: 402659
loss: 1.0786573886871338,grad_norm: 0.7857091206951237, iteration: 402660
loss: 1.0158737897872925,grad_norm: 0.8650008681375886, iteration: 402661
loss: 0.9885907173156738,grad_norm: 0.8475012515748654, iteration: 402662
loss: 0.9814432859420776,grad_norm: 0.7932532697257549, iteration: 402663
loss: 1.024763822555542,grad_norm: 0.8215008924327637, iteration: 402664
loss: 0.9952300786972046,grad_norm: 0.8221373830401534, iteration: 402665
loss: 0.993195652961731,grad_norm: 0.6832710182770898, iteration: 402666
loss: 0.9876005053520203,grad_norm: 0.7151756423128757, iteration: 402667
loss: 0.9888161420822144,grad_norm: 0.9999990375487282, iteration: 402668
loss: 1.0290366411209106,grad_norm: 0.999999441355731, iteration: 402669
loss: 0.9802894592285156,grad_norm: 0.5917368451220805, iteration: 402670
loss: 1.0455176830291748,grad_norm: 0.999999398384446, iteration: 402671
loss: 1.0333406925201416,grad_norm: 0.9999999475668966, iteration: 402672
loss: 1.0169858932495117,grad_norm: 0.9999995118445164, iteration: 402673
loss: 1.0082495212554932,grad_norm: 0.9999997436149509, iteration: 402674
loss: 1.0025535821914673,grad_norm: 0.815605703512474, iteration: 402675
loss: 1.0431780815124512,grad_norm: 0.9999998562274725, iteration: 402676
loss: 1.0091667175292969,grad_norm: 0.8429502707040469, iteration: 402677
loss: 0.9860399961471558,grad_norm: 0.9176343352436518, iteration: 402678
loss: 1.0034396648406982,grad_norm: 0.9999999157480036, iteration: 402679
loss: 1.0464428663253784,grad_norm: 0.9999994943246467, iteration: 402680
loss: 1.0044093132019043,grad_norm: 0.8063837535802303, iteration: 402681
loss: 1.0344160795211792,grad_norm: 0.8334942267814445, iteration: 402682
loss: 0.9869602918624878,grad_norm: 0.7462839035124389, iteration: 402683
loss: 0.9798552989959717,grad_norm: 0.867445311347216, iteration: 402684
loss: 0.9771090745925903,grad_norm: 0.8476319189826945, iteration: 402685
loss: 0.9539337754249573,grad_norm: 0.7154765283198142, iteration: 402686
loss: 1.037479043006897,grad_norm: 0.9862040062106164, iteration: 402687
loss: 1.0930132865905762,grad_norm: 0.9999992484199592, iteration: 402688
loss: 1.0561825037002563,grad_norm: 0.9999993449990399, iteration: 402689
loss: 0.986723005771637,grad_norm: 0.9999991207981168, iteration: 402690
loss: 1.0045733451843262,grad_norm: 0.7608434472315903, iteration: 402691
loss: 1.0428247451782227,grad_norm: 0.8189207017056045, iteration: 402692
loss: 1.03121018409729,grad_norm: 0.9999994895071561, iteration: 402693
loss: 0.9810817241668701,grad_norm: 0.7263911745674121, iteration: 402694
loss: 1.0252172946929932,grad_norm: 0.7783483003173628, iteration: 402695
loss: 0.9933269023895264,grad_norm: 0.7416959677760009, iteration: 402696
loss: 1.0689353942871094,grad_norm: 0.9403936850063257, iteration: 402697
loss: 1.0107470750808716,grad_norm: 0.9999996755842389, iteration: 402698
loss: 0.9719248414039612,grad_norm: 0.8126960874380315, iteration: 402699
loss: 0.9762389063835144,grad_norm: 0.9619061032305822, iteration: 402700
loss: 1.0276963710784912,grad_norm: 0.9999998962559109, iteration: 402701
loss: 1.0179859399795532,grad_norm: 0.7255528191208407, iteration: 402702
loss: 1.0445752143859863,grad_norm: 0.8099219924709397, iteration: 402703
loss: 1.0482001304626465,grad_norm: 0.9999990028317405, iteration: 402704
loss: 0.9842043519020081,grad_norm: 0.8965963370754956, iteration: 402705
loss: 1.0104954242706299,grad_norm: 0.8518812421184339, iteration: 402706
loss: 0.9785945415496826,grad_norm: 0.8327703700593043, iteration: 402707
loss: 0.9720471501350403,grad_norm: 0.8667357323279551, iteration: 402708
loss: 0.9763153195381165,grad_norm: 0.7624534972114211, iteration: 402709
loss: 1.0057792663574219,grad_norm: 0.7347904048035951, iteration: 402710
loss: 1.0139561891555786,grad_norm: 0.9999992920817196, iteration: 402711
loss: 0.9786614775657654,grad_norm: 0.7155955605437027, iteration: 402712
loss: 0.9832650423049927,grad_norm: 0.7315123460848849, iteration: 402713
loss: 1.0008295774459839,grad_norm: 0.8119227866881761, iteration: 402714
loss: 1.0023685693740845,grad_norm: 0.9738894527510352, iteration: 402715
loss: 0.9844956398010254,grad_norm: 0.9999995563981416, iteration: 402716
loss: 1.0595731735229492,grad_norm: 0.8919199940751983, iteration: 402717
loss: 0.9710590839385986,grad_norm: 0.7468700941072937, iteration: 402718
loss: 0.9884299039840698,grad_norm: 0.8632746617086889, iteration: 402719
loss: 1.0129262208938599,grad_norm: 1.0000000048578297, iteration: 402720
loss: 1.012949824333191,grad_norm: 0.775612018637976, iteration: 402721
loss: 0.9729650616645813,grad_norm: 0.761859194509544, iteration: 402722
loss: 1.0350946187973022,grad_norm: 0.9059112496600168, iteration: 402723
loss: 0.9959380626678467,grad_norm: 0.8313906288609416, iteration: 402724
loss: 1.0202207565307617,grad_norm: 0.8663644094219431, iteration: 402725
loss: 0.9856112599372864,grad_norm: 0.9177572338955956, iteration: 402726
loss: 0.9892431497573853,grad_norm: 0.7426369757211558, iteration: 402727
loss: 0.9872720241546631,grad_norm: 0.8125893626974371, iteration: 402728
loss: 0.9637624025344849,grad_norm: 0.81948802845955, iteration: 402729
loss: 1.0083024501800537,grad_norm: 0.7340098816108928, iteration: 402730
loss: 0.9829909801483154,grad_norm: 0.9999994362288424, iteration: 402731
loss: 1.0190268754959106,grad_norm: 0.8397382328799228, iteration: 402732
loss: 0.9965782761573792,grad_norm: 0.9999992000080934, iteration: 402733
loss: 1.1245015859603882,grad_norm: 0.9999999481747213, iteration: 402734
loss: 1.0387808084487915,grad_norm: 0.7987786669248477, iteration: 402735
loss: 0.9573182463645935,grad_norm: 0.9013425945961914, iteration: 402736
loss: 0.9535238146781921,grad_norm: 0.9053126251118183, iteration: 402737
loss: 0.9686208367347717,grad_norm: 0.7813261130570477, iteration: 402738
loss: 0.9811697006225586,grad_norm: 0.8645454021339449, iteration: 402739
loss: 1.008928894996643,grad_norm: 0.7509762020035075, iteration: 402740
loss: 0.9964532852172852,grad_norm: 0.7137752261475072, iteration: 402741
loss: 0.9867202639579773,grad_norm: 0.9999995525027396, iteration: 402742
loss: 1.0135469436645508,grad_norm: 0.9089996282248556, iteration: 402743
loss: 1.0174832344055176,grad_norm: 0.8576071143032793, iteration: 402744
loss: 1.0647263526916504,grad_norm: 0.9999998725453634, iteration: 402745
loss: 1.1047186851501465,grad_norm: 0.9999995001545745, iteration: 402746
loss: 1.1440469026565552,grad_norm: 0.9999997996955153, iteration: 402747
loss: 0.9911172389984131,grad_norm: 0.9999994192309125, iteration: 402748
loss: 1.0108485221862793,grad_norm: 0.982418355686435, iteration: 402749
loss: 1.0031135082244873,grad_norm: 0.8939853224702536, iteration: 402750
loss: 1.061660885810852,grad_norm: 0.9999991964266689, iteration: 402751
loss: 1.0015228986740112,grad_norm: 0.7532654122340486, iteration: 402752
loss: 0.971889317035675,grad_norm: 0.7587572713796586, iteration: 402753
loss: 0.9959304332733154,grad_norm: 0.7686226718776914, iteration: 402754
loss: 0.9904018044471741,grad_norm: 0.9396955948486425, iteration: 402755
loss: 0.9965298771858215,grad_norm: 0.7959905451263801, iteration: 402756
loss: 1.0371723175048828,grad_norm: 0.6272746440913616, iteration: 402757
loss: 1.0078710317611694,grad_norm: 0.7186890197686007, iteration: 402758
loss: 0.9952536225318909,grad_norm: 0.9999993447585618, iteration: 402759
loss: 1.0210864543914795,grad_norm: 0.8252574371628862, iteration: 402760
loss: 1.021846055984497,grad_norm: 0.8569356509454886, iteration: 402761
loss: 1.1096079349517822,grad_norm: 0.9028510738427815, iteration: 402762
loss: 0.9833161234855652,grad_norm: 0.6981099235302176, iteration: 402763
loss: 1.0331488847732544,grad_norm: 0.9999991114080108, iteration: 402764
loss: 0.9700675010681152,grad_norm: 0.7487629970905858, iteration: 402765
loss: 0.9888781905174255,grad_norm: 0.9999998993006481, iteration: 402766
loss: 0.9646519422531128,grad_norm: 0.7746539954611079, iteration: 402767
loss: 1.0105979442596436,grad_norm: 0.9999991852840375, iteration: 402768
loss: 0.9846763014793396,grad_norm: 0.7359299332169772, iteration: 402769
loss: 1.0125653743743896,grad_norm: 0.9999990872528677, iteration: 402770
loss: 0.9738789796829224,grad_norm: 0.7856414569093347, iteration: 402771
loss: 0.9989780783653259,grad_norm: 0.9999989873717904, iteration: 402772
loss: 1.0110681056976318,grad_norm: 0.9999998025160186, iteration: 402773
loss: 1.018890619277954,grad_norm: 0.9015516303317065, iteration: 402774
loss: 0.9992844462394714,grad_norm: 0.8805983069665813, iteration: 402775
loss: 1.0507750511169434,grad_norm: 0.6738342229105035, iteration: 402776
loss: 1.0038535594940186,grad_norm: 0.7660358257433344, iteration: 402777
loss: 0.9858307838439941,grad_norm: 0.8354290993303344, iteration: 402778
loss: 0.9498453140258789,grad_norm: 0.6998972063623177, iteration: 402779
loss: 1.011150598526001,grad_norm: 0.9453020828016575, iteration: 402780
loss: 0.9326395392417908,grad_norm: 0.794441688090184, iteration: 402781
loss: 0.9941940903663635,grad_norm: 0.9038650306486468, iteration: 402782
loss: 1.016281247138977,grad_norm: 0.7601314366104119, iteration: 402783
loss: 0.9912018179893494,grad_norm: 0.9999991076265347, iteration: 402784
loss: 0.9907072186470032,grad_norm: 0.7590551116403521, iteration: 402785
loss: 0.9805208444595337,grad_norm: 0.9316789499408576, iteration: 402786
loss: 0.9696197509765625,grad_norm: 0.764236023953698, iteration: 402787
loss: 0.9881028532981873,grad_norm: 0.766260071279644, iteration: 402788
loss: 0.984825849533081,grad_norm: 0.734498815094704, iteration: 402789
loss: 1.0000534057617188,grad_norm: 0.9999992695075113, iteration: 402790
loss: 0.9714729189872742,grad_norm: 0.999999986054289, iteration: 402791
loss: 0.9801568388938904,grad_norm: 0.8318311087753177, iteration: 402792
loss: 0.9779203534126282,grad_norm: 0.8226499073101274, iteration: 402793
loss: 1.017572283744812,grad_norm: 0.75541398518572, iteration: 402794
loss: 1.003846287727356,grad_norm: 0.6949296492794499, iteration: 402795
loss: 0.9771587252616882,grad_norm: 0.9999995111841352, iteration: 402796
loss: 1.005987286567688,grad_norm: 0.7348084669295549, iteration: 402797
loss: 0.9903424382209778,grad_norm: 0.7351332084658355, iteration: 402798
loss: 1.0300763845443726,grad_norm: 0.7772987008555454, iteration: 402799
loss: 1.0375710725784302,grad_norm: 0.8162787339184207, iteration: 402800
loss: 1.0220072269439697,grad_norm: 0.9999995707655147, iteration: 402801
loss: 1.00593101978302,grad_norm: 0.652161501618541, iteration: 402802
loss: 1.1006512641906738,grad_norm: 0.9999998357287805, iteration: 402803
loss: 1.1051106452941895,grad_norm: 0.9999997041325016, iteration: 402804
loss: 0.9706323146820068,grad_norm: 0.7098796839108166, iteration: 402805
loss: 1.0274394750595093,grad_norm: 0.7935249120366296, iteration: 402806
loss: 1.0244098901748657,grad_norm: 0.9999991020918618, iteration: 402807
loss: 1.0042659044265747,grad_norm: 0.8185870256082645, iteration: 402808
loss: 1.0948532819747925,grad_norm: 0.9999990430086129, iteration: 402809
loss: 0.9850754141807556,grad_norm: 0.7783014119890136, iteration: 402810
loss: 1.0255606174468994,grad_norm: 0.7500478194931626, iteration: 402811
loss: 1.0204589366912842,grad_norm: 0.8293970019738801, iteration: 402812
loss: 1.0286931991577148,grad_norm: 0.707955079200146, iteration: 402813
loss: 1.0080610513687134,grad_norm: 0.8642321442719814, iteration: 402814
loss: 1.1422353982925415,grad_norm: 0.99999966402173, iteration: 402815
loss: 0.9533820748329163,grad_norm: 0.9107458106873114, iteration: 402816
loss: 1.0657451152801514,grad_norm: 0.9999992597320049, iteration: 402817
loss: 1.0021709203720093,grad_norm: 0.7927429918582253, iteration: 402818
loss: 1.051032543182373,grad_norm: 0.7488541135354724, iteration: 402819
loss: 1.0108813047409058,grad_norm: 0.9999992099053567, iteration: 402820
loss: 0.9918482899665833,grad_norm: 0.8209716891441944, iteration: 402821
loss: 0.9697792530059814,grad_norm: 0.77868164461738, iteration: 402822
loss: 1.016501784324646,grad_norm: 0.8802924314557616, iteration: 402823
loss: 0.9488010406494141,grad_norm: 0.7247303990303817, iteration: 402824
loss: 1.0077465772628784,grad_norm: 0.9999991283791585, iteration: 402825
loss: 1.029767394065857,grad_norm: 0.7723782288638046, iteration: 402826
loss: 0.9987384676933289,grad_norm: 0.9523536413884216, iteration: 402827
loss: 1.0035226345062256,grad_norm: 0.8930655835836142, iteration: 402828
loss: 1.0025110244750977,grad_norm: 0.8066151572455441, iteration: 402829
loss: 1.0628520250320435,grad_norm: 0.8168233186966108, iteration: 402830
loss: 1.051835298538208,grad_norm: 0.9999992169731227, iteration: 402831
loss: 1.0276789665222168,grad_norm: 0.7478716771607192, iteration: 402832
loss: 1.0102547407150269,grad_norm: 0.7732775498219289, iteration: 402833
loss: 1.0101258754730225,grad_norm: 0.7028857394454715, iteration: 402834
loss: 1.0299296379089355,grad_norm: 0.9999994741954126, iteration: 402835
loss: 1.031043291091919,grad_norm: 0.9999990418803295, iteration: 402836
loss: 1.0852117538452148,grad_norm: 0.8497050306975377, iteration: 402837
loss: 1.0008479356765747,grad_norm: 0.7756330889345854, iteration: 402838
loss: 0.9855524301528931,grad_norm: 0.7456024381975134, iteration: 402839
loss: 0.9837176203727722,grad_norm: 0.7912994096025333, iteration: 402840
loss: 1.020839810371399,grad_norm: 0.9999999317326841, iteration: 402841
loss: 1.043291449546814,grad_norm: 0.9999992602775973, iteration: 402842
loss: 1.0004864931106567,grad_norm: 0.8799493108712733, iteration: 402843
loss: 1.0117720365524292,grad_norm: 0.8403942700158007, iteration: 402844
loss: 0.9897274971008301,grad_norm: 0.7478757431138348, iteration: 402845
loss: 0.9928549528121948,grad_norm: 0.6763151931303404, iteration: 402846
loss: 1.0319722890853882,grad_norm: 0.8131349429979218, iteration: 402847
loss: 1.120437502861023,grad_norm: 0.9999990840178362, iteration: 402848
loss: 1.0698388814926147,grad_norm: 0.9999995200439784, iteration: 402849
loss: 1.008471131324768,grad_norm: 0.7518302567012116, iteration: 402850
loss: 1.0260233879089355,grad_norm: 0.8025961438558654, iteration: 402851
loss: 0.9782627820968628,grad_norm: 0.8575640923566598, iteration: 402852
loss: 0.9667036533355713,grad_norm: 0.8579011089770335, iteration: 402853
loss: 1.0144309997558594,grad_norm: 0.7908323755414102, iteration: 402854
loss: 0.9938799738883972,grad_norm: 0.9999994945383975, iteration: 402855
loss: 1.0005592107772827,grad_norm: 0.7581811233349695, iteration: 402856
loss: 0.962656557559967,grad_norm: 0.9300857434730081, iteration: 402857
loss: 1.0079667568206787,grad_norm: 0.9999995353788307, iteration: 402858
loss: 0.9790531396865845,grad_norm: 0.8225687284279678, iteration: 402859
loss: 0.9714208841323853,grad_norm: 0.7582876327075556, iteration: 402860
loss: 1.035483717918396,grad_norm: 0.8402141731463338, iteration: 402861
loss: 1.008058786392212,grad_norm: 0.8072880764871396, iteration: 402862
loss: 1.0592564344406128,grad_norm: 0.9999990795133946, iteration: 402863
loss: 1.0140608549118042,grad_norm: 0.75396687263602, iteration: 402864
loss: 0.9941160678863525,grad_norm: 0.7271311976229015, iteration: 402865
loss: 1.0122908353805542,grad_norm: 0.9999992129217878, iteration: 402866
loss: 0.9930251836776733,grad_norm: 0.7999218419864369, iteration: 402867
loss: 0.9922384023666382,grad_norm: 0.8707648551890776, iteration: 402868
loss: 1.0134198665618896,grad_norm: 0.9089048020996794, iteration: 402869
loss: 1.0194393396377563,grad_norm: 0.7275901057567854, iteration: 402870
loss: 1.094327688217163,grad_norm: 0.9999994427017662, iteration: 402871
loss: 0.9909399747848511,grad_norm: 0.9999991829284176, iteration: 402872
loss: 1.0033403635025024,grad_norm: 0.6399929920458121, iteration: 402873
loss: 0.9815822839736938,grad_norm: 0.9999991213671614, iteration: 402874
loss: 1.0011992454528809,grad_norm: 0.9253049356083569, iteration: 402875
loss: 0.9878346920013428,grad_norm: 0.7841067802591086, iteration: 402876
loss: 0.9631143808364868,grad_norm: 0.8564120944330441, iteration: 402877
loss: 1.0184102058410645,grad_norm: 0.997435309600056, iteration: 402878
loss: 1.031131386756897,grad_norm: 0.7967457616539805, iteration: 402879
loss: 1.0200546979904175,grad_norm: 0.9561467752529388, iteration: 402880
loss: 1.0264886617660522,grad_norm: 0.9999991549915653, iteration: 402881
loss: 1.0423816442489624,grad_norm: 0.7597224735636183, iteration: 402882
loss: 1.0013169050216675,grad_norm: 0.8996685782208705, iteration: 402883
loss: 0.9835288524627686,grad_norm: 0.7648909382873071, iteration: 402884
loss: 0.9972320795059204,grad_norm: 0.9046305002265497, iteration: 402885
loss: 1.007242202758789,grad_norm: 0.9036086251124563, iteration: 402886
loss: 1.0313669443130493,grad_norm: 0.9999991573567824, iteration: 402887
loss: 1.0124367475509644,grad_norm: 0.9729218292153152, iteration: 402888
loss: 1.0513923168182373,grad_norm: 0.9999998219804526, iteration: 402889
loss: 1.087428331375122,grad_norm: 0.9999997813095298, iteration: 402890
loss: 0.9919565320014954,grad_norm: 0.9089346718908368, iteration: 402891
loss: 0.9647029638290405,grad_norm: 0.8350079673137512, iteration: 402892
loss: 0.993852436542511,grad_norm: 0.917842509043854, iteration: 402893
loss: 0.9726272821426392,grad_norm: 0.7865344310102398, iteration: 402894
loss: 1.002503514289856,grad_norm: 0.8318325096440343, iteration: 402895
loss: 0.9968802332878113,grad_norm: 0.797595461762734, iteration: 402896
loss: 1.0325201749801636,grad_norm: 0.9783422243753379, iteration: 402897
loss: 1.0873850584030151,grad_norm: 0.9999996607344046, iteration: 402898
loss: 0.9971854090690613,grad_norm: 0.914798693126225, iteration: 402899
loss: 1.0799753665924072,grad_norm: 0.9999991766146775, iteration: 402900
loss: 1.0965957641601562,grad_norm: 0.8326979987575088, iteration: 402901
loss: 1.0359668731689453,grad_norm: 0.7907715618678098, iteration: 402902
loss: 0.9960534572601318,grad_norm: 0.67049195948492, iteration: 402903
loss: 1.0081983804702759,grad_norm: 0.6925157420221044, iteration: 402904
loss: 1.0683318376541138,grad_norm: 0.9999991450032851, iteration: 402905
loss: 1.0508469343185425,grad_norm: 0.7914774471049449, iteration: 402906
loss: 1.0085980892181396,grad_norm: 0.8833722008499634, iteration: 402907
loss: 1.1193598508834839,grad_norm: 0.9435037121547815, iteration: 402908
loss: 1.0000159740447998,grad_norm: 0.9730351003116622, iteration: 402909
loss: 1.0488530397415161,grad_norm: 0.9999991586181899, iteration: 402910
loss: 1.0254793167114258,grad_norm: 0.7732055716733667, iteration: 402911
loss: 0.9958543181419373,grad_norm: 0.873407105000557, iteration: 402912
loss: 1.0321855545043945,grad_norm: 0.9999999701114873, iteration: 402913
loss: 1.007209062576294,grad_norm: 0.8910563584420181, iteration: 402914
loss: 1.0055646896362305,grad_norm: 0.8574009540945796, iteration: 402915
loss: 0.9876893162727356,grad_norm: 0.7924936104747832, iteration: 402916
loss: 0.9615294933319092,grad_norm: 0.8099803405057223, iteration: 402917
loss: 0.984944224357605,grad_norm: 0.7469539169360848, iteration: 402918
loss: 1.001602053642273,grad_norm: 0.8038502770322046, iteration: 402919
loss: 0.9819788932800293,grad_norm: 0.7366198010641142, iteration: 402920
loss: 1.0420196056365967,grad_norm: 0.8130748756813337, iteration: 402921
loss: 1.0182822942733765,grad_norm: 0.9999996991768884, iteration: 402922
loss: 0.988858699798584,grad_norm: 0.7900084698178493, iteration: 402923
loss: 0.9979742765426636,grad_norm: 0.7251049263615292, iteration: 402924
loss: 0.9962714910507202,grad_norm: 0.891637810690343, iteration: 402925
loss: 1.0016345977783203,grad_norm: 0.7048923461973038, iteration: 402926
loss: 1.0070096254348755,grad_norm: 0.9032474045832651, iteration: 402927
loss: 1.042486548423767,grad_norm: 0.9999992051890624, iteration: 402928
loss: 1.0113661289215088,grad_norm: 0.9551847557009688, iteration: 402929
loss: 0.9900938272476196,grad_norm: 0.8624754379823455, iteration: 402930
loss: 0.9960873126983643,grad_norm: 0.8175803648190979, iteration: 402931
loss: 1.0655525922775269,grad_norm: 0.9999993820894947, iteration: 402932
loss: 0.9903388619422913,grad_norm: 0.8200397727759441, iteration: 402933
loss: 0.995475709438324,grad_norm: 0.813543509389035, iteration: 402934
loss: 1.0052684545516968,grad_norm: 0.7074527524728256, iteration: 402935
loss: 1.0149554014205933,grad_norm: 0.9999990722839566, iteration: 402936
loss: 1.0104823112487793,grad_norm: 0.9562785961643595, iteration: 402937
loss: 1.019712209701538,grad_norm: 0.9462246797731384, iteration: 402938
loss: 1.0229606628417969,grad_norm: 0.99999989740661, iteration: 402939
loss: 1.0383960008621216,grad_norm: 0.8691862301931925, iteration: 402940
loss: 0.9875528812408447,grad_norm: 0.8176760849193582, iteration: 402941
loss: 1.0383812189102173,grad_norm: 0.9999993861238745, iteration: 402942
loss: 1.1374101638793945,grad_norm: 0.9999991842480729, iteration: 402943
loss: 1.0218678712844849,grad_norm: 0.9029247241092103, iteration: 402944
loss: 1.0044796466827393,grad_norm: 0.7711133447419974, iteration: 402945
loss: 1.1988602876663208,grad_norm: 0.9999998578639305, iteration: 402946
loss: 0.9890015125274658,grad_norm: 0.7230430769390379, iteration: 402947
loss: 1.0228962898254395,grad_norm: 0.9999999180744126, iteration: 402948
loss: 1.05403733253479,grad_norm: 0.9999994202928117, iteration: 402949
loss: 0.981052577495575,grad_norm: 0.8354445587741951, iteration: 402950
loss: 1.0663763284683228,grad_norm: 0.9392681812671896, iteration: 402951
loss: 0.9978569149971008,grad_norm: 0.8014424536795296, iteration: 402952
loss: 0.9701021313667297,grad_norm: 1.000000025644688, iteration: 402953
loss: 1.021646499633789,grad_norm: 0.8844664259490126, iteration: 402954
loss: 1.0127009153366089,grad_norm: 0.9999999420033057, iteration: 402955
loss: 0.9623972773551941,grad_norm: 0.7870189537077337, iteration: 402956
loss: 0.9354248046875,grad_norm: 0.7684292908557951, iteration: 402957
loss: 0.96272873878479,grad_norm: 0.6468814932900728, iteration: 402958
loss: 1.0220351219177246,grad_norm: 0.9999989653723594, iteration: 402959
loss: 0.9631276726722717,grad_norm: 0.7101450566822316, iteration: 402960
loss: 0.9898127913475037,grad_norm: 0.8188785245073332, iteration: 402961
loss: 0.9751204252243042,grad_norm: 0.9999996142576283, iteration: 402962
loss: 1.044598937034607,grad_norm: 0.999999978578744, iteration: 402963
loss: 1.000754475593567,grad_norm: 0.8343800697463473, iteration: 402964
loss: 1.0129244327545166,grad_norm: 0.8758791448035034, iteration: 402965
loss: 0.9849205613136292,grad_norm: 0.9551913069832698, iteration: 402966
loss: 0.9661597013473511,grad_norm: 0.9692620458288141, iteration: 402967
loss: 0.991393506526947,grad_norm: 0.8687904371559795, iteration: 402968
loss: 1.0515944957733154,grad_norm: 0.9999993534081463, iteration: 402969
loss: 1.0345149040222168,grad_norm: 0.9999991923082012, iteration: 402970
loss: 0.9840206503868103,grad_norm: 0.8423212368164046, iteration: 402971
loss: 0.991138756275177,grad_norm: 0.8163658186036119, iteration: 402972
loss: 1.1052275896072388,grad_norm: 0.8571741240867777, iteration: 402973
loss: 0.9543861150741577,grad_norm: 0.8179108245934962, iteration: 402974
loss: 1.0426418781280518,grad_norm: 0.9999990990193559, iteration: 402975
loss: 1.050202488899231,grad_norm: 0.9999993545843383, iteration: 402976
loss: 0.9876736998558044,grad_norm: 0.9913025938258515, iteration: 402977
loss: 1.10060453414917,grad_norm: 0.9999993485569638, iteration: 402978
loss: 1.022757887840271,grad_norm: 0.8813080992192899, iteration: 402979
loss: 1.0006201267242432,grad_norm: 0.8055898069017142, iteration: 402980
loss: 0.9871783256530762,grad_norm: 0.7696636028124126, iteration: 402981
loss: 0.9764184355735779,grad_norm: 0.8805935809044768, iteration: 402982
loss: 1.066717505455017,grad_norm: 0.9824186986541269, iteration: 402983
loss: 1.008216142654419,grad_norm: 0.782829014984875, iteration: 402984
loss: 0.9885542988777161,grad_norm: 0.7267335347026409, iteration: 402985
loss: 1.0251823663711548,grad_norm: 0.7029159249336195, iteration: 402986
loss: 1.0684062242507935,grad_norm: 0.8383068284767274, iteration: 402987
loss: 0.9822311401367188,grad_norm: 0.999999188811861, iteration: 402988
loss: 0.9930233359336853,grad_norm: 0.8053803874248743, iteration: 402989
loss: 1.0113677978515625,grad_norm: 0.6922653043421214, iteration: 402990
loss: 0.9996548891067505,grad_norm: 0.909557973500416, iteration: 402991
loss: 0.9782862663269043,grad_norm: 0.8171945746038898, iteration: 402992
loss: 1.1747794151306152,grad_norm: 0.999999937332087, iteration: 402993
loss: 1.014217734336853,grad_norm: 0.9999998104428915, iteration: 402994
loss: 0.9930263161659241,grad_norm: 0.8183500846832142, iteration: 402995
loss: 1.0381439924240112,grad_norm: 0.9999991344634948, iteration: 402996
loss: 1.0205135345458984,grad_norm: 0.8771446060256014, iteration: 402997
loss: 1.0118576288223267,grad_norm: 0.7498948862087436, iteration: 402998
loss: 1.0622481107711792,grad_norm: 0.8845391939454366, iteration: 402999
loss: 0.9763329029083252,grad_norm: 0.7898041839555013, iteration: 403000
loss: 1.0348548889160156,grad_norm: 0.99999951339616, iteration: 403001
loss: 1.0300596952438354,grad_norm: 0.7234517440541722, iteration: 403002
loss: 1.0015645027160645,grad_norm: 0.7125565714431593, iteration: 403003
loss: 1.004414677619934,grad_norm: 0.6023025651366672, iteration: 403004
loss: 1.0288909673690796,grad_norm: 0.941389574594419, iteration: 403005
loss: 1.0453252792358398,grad_norm: 0.9999996119509657, iteration: 403006
loss: 0.9872261881828308,grad_norm: 0.6786786957924402, iteration: 403007
loss: 1.041589379310608,grad_norm: 0.8266548079288999, iteration: 403008
loss: 1.0220532417297363,grad_norm: 0.6725770832918986, iteration: 403009
loss: 0.9868982434272766,grad_norm: 0.7127425186507437, iteration: 403010
loss: 1.049291729927063,grad_norm: 0.7905403258258022, iteration: 403011
loss: 1.0296337604522705,grad_norm: 0.8514877325463857, iteration: 403012
loss: 1.0888099670410156,grad_norm: 0.9999996035694458, iteration: 403013
loss: 1.0161033868789673,grad_norm: 0.8346281009997458, iteration: 403014
loss: 1.0061399936676025,grad_norm: 0.8593280057713617, iteration: 403015
loss: 1.0499180555343628,grad_norm: 0.8300327974428441, iteration: 403016
loss: 0.9811007976531982,grad_norm: 0.8291887938175572, iteration: 403017
loss: 1.0132564306259155,grad_norm: 0.7805994026941413, iteration: 403018
loss: 1.0142842531204224,grad_norm: 0.9999999302916447, iteration: 403019
loss: 1.0825704336166382,grad_norm: 0.9999997266087901, iteration: 403020
loss: 1.0292141437530518,grad_norm: 0.9999995594406507, iteration: 403021
loss: 0.9972343444824219,grad_norm: 0.8669852831373779, iteration: 403022
loss: 1.0141410827636719,grad_norm: 0.6868556353045714, iteration: 403023
loss: 1.0271930694580078,grad_norm: 0.9999991851148322, iteration: 403024
loss: 0.9953343272209167,grad_norm: 0.8954953178235815, iteration: 403025
loss: 0.9971110224723816,grad_norm: 0.8433465690156325, iteration: 403026
loss: 0.9843424558639526,grad_norm: 0.855666178057485, iteration: 403027
loss: 1.0001970529556274,grad_norm: 0.9686960083287919, iteration: 403028
loss: 0.983279824256897,grad_norm: 0.7851869113666596, iteration: 403029
loss: 1.0247684717178345,grad_norm: 0.8165260706890038, iteration: 403030
loss: 0.9729602932929993,grad_norm: 0.632456268181056, iteration: 403031
loss: 1.0021607875823975,grad_norm: 0.7572098801429075, iteration: 403032
loss: 1.0457764863967896,grad_norm: 0.999999286639128, iteration: 403033
loss: 1.0147966146469116,grad_norm: 0.8417782437217162, iteration: 403034
loss: 0.9997119903564453,grad_norm: 0.9999999425435006, iteration: 403035
loss: 1.0993642807006836,grad_norm: 0.8213842882994153, iteration: 403036
loss: 1.0002299547195435,grad_norm: 0.9999990212452814, iteration: 403037
loss: 1.0976746082305908,grad_norm: 0.9999999370613244, iteration: 403038
loss: 1.0796818733215332,grad_norm: 0.9999999396252828, iteration: 403039
loss: 0.9528618454933167,grad_norm: 0.7667992572718119, iteration: 403040
loss: 1.0185668468475342,grad_norm: 0.9999998331755519, iteration: 403041
loss: 1.0061432123184204,grad_norm: 0.8906148654570415, iteration: 403042
loss: 1.0186740159988403,grad_norm: 0.9999991289050056, iteration: 403043
loss: 0.992810070514679,grad_norm: 0.8041376723853891, iteration: 403044
loss: 0.9918607473373413,grad_norm: 0.9999991091801594, iteration: 403045
loss: 1.0072234869003296,grad_norm: 0.7406419242657859, iteration: 403046
loss: 1.0504447221755981,grad_norm: 0.9999996212493917, iteration: 403047
loss: 1.0164567232131958,grad_norm: 0.806490393237262, iteration: 403048
loss: 0.9776415824890137,grad_norm: 0.9638182078121625, iteration: 403049
loss: 1.0366681814193726,grad_norm: 0.8619926494577084, iteration: 403050
loss: 1.0268903970718384,grad_norm: 0.9785992698894004, iteration: 403051
loss: 1.1249970197677612,grad_norm: 0.9999999648207535, iteration: 403052
loss: 1.0319457054138184,grad_norm: 0.9272323483657839, iteration: 403053
loss: 1.0370591878890991,grad_norm: 0.8662928943448254, iteration: 403054
loss: 1.041813611984253,grad_norm: 0.772646455208935, iteration: 403055
loss: 1.0501179695129395,grad_norm: 0.8351017801816661, iteration: 403056
loss: 1.0107550621032715,grad_norm: 0.9594926917889065, iteration: 403057
loss: 0.9975115060806274,grad_norm: 0.7958877322469984, iteration: 403058
loss: 1.0642364025115967,grad_norm: 0.9999992295257312, iteration: 403059
loss: 1.014534592628479,grad_norm: 0.9999991400382248, iteration: 403060
loss: 0.9951193928718567,grad_norm: 0.7916265999153299, iteration: 403061
loss: 1.049935221672058,grad_norm: 0.999999065270816, iteration: 403062
loss: 1.0111273527145386,grad_norm: 0.9999997937429823, iteration: 403063
loss: 1.034009575843811,grad_norm: 0.7422144677744983, iteration: 403064
loss: 1.0368709564208984,grad_norm: 0.8984484421981384, iteration: 403065
loss: 1.0004768371582031,grad_norm: 0.8083654445876362, iteration: 403066
loss: 1.0096205472946167,grad_norm: 0.9999991405125938, iteration: 403067
loss: 0.9956024289131165,grad_norm: 0.861036267812783, iteration: 403068
loss: 1.0371363162994385,grad_norm: 0.9999998418087542, iteration: 403069
loss: 1.027336597442627,grad_norm: 0.9371739955102185, iteration: 403070
loss: 1.0735228061676025,grad_norm: 0.99999910857069, iteration: 403071
loss: 1.112112283706665,grad_norm: 0.9999993652665444, iteration: 403072
loss: 1.0745571851730347,grad_norm: 0.9999998368853492, iteration: 403073
loss: 0.9947484731674194,grad_norm: 0.7626490787248619, iteration: 403074
loss: 1.0080949068069458,grad_norm: 0.9999996515385481, iteration: 403075
loss: 1.1099234819412231,grad_norm: 0.9738068499599614, iteration: 403076
loss: 1.0976835489273071,grad_norm: 0.999999915821658, iteration: 403077
loss: 1.0192792415618896,grad_norm: 0.7823464541696842, iteration: 403078
loss: 1.094275951385498,grad_norm: 0.9999995130556716, iteration: 403079
loss: 1.0130083560943604,grad_norm: 0.8168921728408639, iteration: 403080
loss: 1.0405460596084595,grad_norm: 0.9999999008390555, iteration: 403081
loss: 0.9915742874145508,grad_norm: 0.8591572567277155, iteration: 403082
loss: 0.9883685111999512,grad_norm: 0.9999993718362176, iteration: 403083
loss: 1.0092192888259888,grad_norm: 0.999999111044706, iteration: 403084
loss: 1.0619359016418457,grad_norm: 0.8271908745845122, iteration: 403085
loss: 1.160571813583374,grad_norm: 0.9999999041180231, iteration: 403086
loss: 1.0305421352386475,grad_norm: 0.8746251585613313, iteration: 403087
loss: 0.9612317085266113,grad_norm: 0.8448807767138721, iteration: 403088
loss: 1.0171414613723755,grad_norm: 0.9999990767134571, iteration: 403089
loss: 1.0442553758621216,grad_norm: 0.935216476972269, iteration: 403090
loss: 1.04218590259552,grad_norm: 0.9999998230093885, iteration: 403091
loss: 1.0501192808151245,grad_norm: 0.9999990227570494, iteration: 403092
loss: 1.0814117193222046,grad_norm: 0.9999991240663365, iteration: 403093
loss: 1.0246740579605103,grad_norm: 0.9080889418725964, iteration: 403094
loss: 1.0530736446380615,grad_norm: 0.9973481588429693, iteration: 403095
loss: 1.0157232284545898,grad_norm: 0.7961473751001327, iteration: 403096
loss: 0.9904295206069946,grad_norm: 0.7910319134587966, iteration: 403097
loss: 0.9643579721450806,grad_norm: 0.8456007252609135, iteration: 403098
loss: 1.005380392074585,grad_norm: 0.6203177587580526, iteration: 403099
loss: 1.0121394395828247,grad_norm: 0.7100378854095664, iteration: 403100
loss: 1.0476609468460083,grad_norm: 0.9366958408485178, iteration: 403101
loss: 0.9841746687889099,grad_norm: 0.7262529409889854, iteration: 403102
loss: 1.091802716255188,grad_norm: 0.9999998639415236, iteration: 403103
loss: 1.0400604009628296,grad_norm: 0.9199198033363709, iteration: 403104
loss: 1.033739447593689,grad_norm: 0.7213539107296116, iteration: 403105
loss: 0.9931259751319885,grad_norm: 0.8279545217183716, iteration: 403106
loss: 1.0288432836532593,grad_norm: 0.8047411699797625, iteration: 403107
loss: 1.0091408491134644,grad_norm: 0.9089396975428947, iteration: 403108
loss: 0.9987245798110962,grad_norm: 0.715979597517577, iteration: 403109
loss: 1.001118779182434,grad_norm: 0.7528659663909502, iteration: 403110
loss: 1.0028126239776611,grad_norm: 0.9647750117562021, iteration: 403111
loss: 0.9791440963745117,grad_norm: 0.8715425433900247, iteration: 403112
loss: 0.9668339490890503,grad_norm: 0.7943813686375997, iteration: 403113
loss: 1.056066870689392,grad_norm: 0.9999998886779847, iteration: 403114
loss: 1.0353858470916748,grad_norm: 0.8424551336769096, iteration: 403115
loss: 1.0078117847442627,grad_norm: 0.7275118430026566, iteration: 403116
loss: 0.9755935072898865,grad_norm: 0.8769130128449756, iteration: 403117
loss: 1.0005908012390137,grad_norm: 0.9999991401896218, iteration: 403118
loss: 1.005987286567688,grad_norm: 0.8021032462218586, iteration: 403119
loss: 1.0181437730789185,grad_norm: 0.8602387506742333, iteration: 403120
loss: 1.0438748598098755,grad_norm: 0.8857821483543832, iteration: 403121
loss: 0.9682756662368774,grad_norm: 0.7865520533598912, iteration: 403122
loss: 1.0143152475357056,grad_norm: 0.999999278535785, iteration: 403123
loss: 0.9818873405456543,grad_norm: 0.7036178339922476, iteration: 403124
loss: 1.0551304817199707,grad_norm: 0.9999993946168972, iteration: 403125
loss: 1.0352150201797485,grad_norm: 0.8018320362322738, iteration: 403126
loss: 1.0409677028656006,grad_norm: 0.7530650133072709, iteration: 403127
loss: 0.998799204826355,grad_norm: 0.9165794766570508, iteration: 403128
loss: 1.0047012567520142,grad_norm: 0.756960554790855, iteration: 403129
loss: 0.9961289167404175,grad_norm: 0.792457144456275, iteration: 403130
loss: 1.092742919921875,grad_norm: 0.8186784002018539, iteration: 403131
loss: 1.02896249294281,grad_norm: 0.9378585713054335, iteration: 403132
loss: 0.9824381470680237,grad_norm: 0.9999991584435638, iteration: 403133
loss: 1.0338315963745117,grad_norm: 0.999999113734061, iteration: 403134
loss: 1.0418435335159302,grad_norm: 0.8498069003307113, iteration: 403135
loss: 1.0484334230422974,grad_norm: 0.9999990977883095, iteration: 403136
loss: 1.0707013607025146,grad_norm: 0.786435568383973, iteration: 403137
loss: 0.9973710775375366,grad_norm: 0.912807822187175, iteration: 403138
loss: 1.0860974788665771,grad_norm: 0.7126402544984617, iteration: 403139
loss: 1.082966685295105,grad_norm: 0.9383315551492895, iteration: 403140
loss: 1.1187877655029297,grad_norm: 0.9999997292003275, iteration: 403141
loss: 1.027148723602295,grad_norm: 0.9999993013716821, iteration: 403142
loss: 1.0441875457763672,grad_norm: 0.7430553736191643, iteration: 403143
loss: 1.2095814943313599,grad_norm: 0.9999997686534622, iteration: 403144
loss: 1.0589760541915894,grad_norm: 0.999999238906091, iteration: 403145
loss: 1.0047357082366943,grad_norm: 0.8688289346346851, iteration: 403146
loss: 1.0043392181396484,grad_norm: 1.0000000400060896, iteration: 403147
loss: 0.9920971989631653,grad_norm: 0.8114731529710154, iteration: 403148
loss: 1.040897011756897,grad_norm: 0.672768848378032, iteration: 403149
loss: 0.9987460970878601,grad_norm: 0.9838226229890537, iteration: 403150
loss: 1.1772829294204712,grad_norm: 0.999999898757484, iteration: 403151
loss: 1.041424036026001,grad_norm: 0.9999994124821476, iteration: 403152
loss: 1.0289057493209839,grad_norm: 0.9999996676679114, iteration: 403153
loss: 1.0264390707015991,grad_norm: 0.9999996655699714, iteration: 403154
loss: 1.062951922416687,grad_norm: 0.9999996791429222, iteration: 403155
loss: 1.024264931678772,grad_norm: 0.9999994398618903, iteration: 403156
loss: 1.0965133905410767,grad_norm: 0.9999992319148726, iteration: 403157
loss: 1.0408743619918823,grad_norm: 0.7899197447388401, iteration: 403158
loss: 0.9902054071426392,grad_norm: 0.818452970740241, iteration: 403159
loss: 1.0500136613845825,grad_norm: 0.9999990975573335, iteration: 403160
loss: 1.0225533246994019,grad_norm: 0.9999991327568043, iteration: 403161
loss: 1.0225507020950317,grad_norm: 0.8434595815264151, iteration: 403162
loss: 0.9778496026992798,grad_norm: 0.7625287364980622, iteration: 403163
loss: 1.0213549137115479,grad_norm: 0.9999995685342626, iteration: 403164
loss: 1.016061782836914,grad_norm: 0.958864642468125, iteration: 403165
loss: 1.0191806554794312,grad_norm: 0.7733253000135157, iteration: 403166
loss: 1.0139732360839844,grad_norm: 0.9999991779079718, iteration: 403167
loss: 1.0125700235366821,grad_norm: 0.8216911016681042, iteration: 403168
loss: 0.9971596598625183,grad_norm: 0.9331735694059416, iteration: 403169
loss: 0.9971175193786621,grad_norm: 0.7270187776600328, iteration: 403170
loss: 1.0234549045562744,grad_norm: 0.9846628444211439, iteration: 403171
loss: 1.0624074935913086,grad_norm: 0.9999992678353573, iteration: 403172
loss: 0.9653089642524719,grad_norm: 0.7781263643114268, iteration: 403173
loss: 1.0214048624038696,grad_norm: 0.8480172027633673, iteration: 403174
loss: 1.0735251903533936,grad_norm: 0.9999996568249143, iteration: 403175
loss: 1.0294800996780396,grad_norm: 0.819217609482496, iteration: 403176
loss: 1.0269083976745605,grad_norm: 0.9999997782502538, iteration: 403177
loss: 1.0271034240722656,grad_norm: 0.8092947417824023, iteration: 403178
loss: 1.005426287651062,grad_norm: 0.849887085365212, iteration: 403179
loss: 0.9960129857063293,grad_norm: 0.688820877396208, iteration: 403180
loss: 1.0420564413070679,grad_norm: 0.999999139815728, iteration: 403181
loss: 0.9802975058555603,grad_norm: 0.9999990727968158, iteration: 403182
loss: 1.0073895454406738,grad_norm: 0.8412062820079949, iteration: 403183
loss: 1.0497184991836548,grad_norm: 0.9213477274260293, iteration: 403184
loss: 0.9905959367752075,grad_norm: 0.6822570225015621, iteration: 403185
loss: 1.0079703330993652,grad_norm: 0.7858087378673636, iteration: 403186
loss: 0.9740318059921265,grad_norm: 0.7502235877960777, iteration: 403187
loss: 1.0233550071716309,grad_norm: 0.8740416275644808, iteration: 403188
loss: 0.9945840239524841,grad_norm: 0.9999990801557902, iteration: 403189
loss: 1.063742995262146,grad_norm: 0.9999990938148308, iteration: 403190
loss: 1.0124284029006958,grad_norm: 0.8063284303963336, iteration: 403191
loss: 0.9753861427307129,grad_norm: 0.7729919331914988, iteration: 403192
loss: 1.0129514932632446,grad_norm: 0.7143545639739824, iteration: 403193
loss: 0.9577801823616028,grad_norm: 0.9606383034672442, iteration: 403194
loss: 1.029191017150879,grad_norm: 0.9999991215219975, iteration: 403195
loss: 1.0770392417907715,grad_norm: 0.999999517040577, iteration: 403196
loss: 1.0208568572998047,grad_norm: 0.9784150094077717, iteration: 403197
loss: 1.0397647619247437,grad_norm: 0.7821058068157842, iteration: 403198
loss: 1.034164547920227,grad_norm: 0.8546253282205347, iteration: 403199
loss: 1.0338451862335205,grad_norm: 0.7140029788984544, iteration: 403200
loss: 0.9769672751426697,grad_norm: 0.7228495615187778, iteration: 403201
loss: 1.0294755697250366,grad_norm: 0.7643339293807402, iteration: 403202
loss: 0.9970341920852661,grad_norm: 0.6854452789727351, iteration: 403203
loss: 1.0697851181030273,grad_norm: 0.999999671097582, iteration: 403204
loss: 1.0610865354537964,grad_norm: 0.9999997730632177, iteration: 403205
loss: 0.9850285649299622,grad_norm: 0.953413383584366, iteration: 403206
loss: 1.0800871849060059,grad_norm: 0.8241551815634, iteration: 403207
loss: 1.0436421632766724,grad_norm: 0.999999696265749, iteration: 403208
loss: 0.995206892490387,grad_norm: 0.7650746815909125, iteration: 403209
loss: 0.9654496312141418,grad_norm: 0.918951529757129, iteration: 403210
loss: 1.048171877861023,grad_norm: 0.9329153969784455, iteration: 403211
loss: 0.9662265777587891,grad_norm: 0.8299944747513376, iteration: 403212
loss: 1.0454379320144653,grad_norm: 0.700801804033015, iteration: 403213
loss: 1.175658941268921,grad_norm: 0.999999666916677, iteration: 403214
loss: 1.0036911964416504,grad_norm: 0.7564512222296215, iteration: 403215
loss: 1.011236310005188,grad_norm: 0.7536319186055817, iteration: 403216
loss: 1.0666013956069946,grad_norm: 1.0000000075836064, iteration: 403217
loss: 0.9716976881027222,grad_norm: 0.9999991024121934, iteration: 403218
loss: 1.0182738304138184,grad_norm: 0.9999991777473841, iteration: 403219
loss: 1.1139992475509644,grad_norm: 0.9999990348123503, iteration: 403220
loss: 1.0489530563354492,grad_norm: 0.9215474698192898, iteration: 403221
loss: 1.0162123441696167,grad_norm: 0.9999991496993509, iteration: 403222
loss: 0.9927557110786438,grad_norm: 0.8627447700541621, iteration: 403223
loss: 1.0666136741638184,grad_norm: 0.9696744380815714, iteration: 403224
loss: 0.9899766445159912,grad_norm: 0.8695847291744493, iteration: 403225
loss: 1.017819881439209,grad_norm: 0.8552708060623881, iteration: 403226
loss: 1.0066568851470947,grad_norm: 0.8747028306347051, iteration: 403227
loss: 1.0700488090515137,grad_norm: 0.9999994338905404, iteration: 403228
loss: 1.0134307146072388,grad_norm: 0.8477188021692676, iteration: 403229
loss: 1.0112885236740112,grad_norm: 0.999999068220811, iteration: 403230
loss: 1.3038272857666016,grad_norm: 0.9999999242251748, iteration: 403231
loss: 1.0079624652862549,grad_norm: 0.6918898942777991, iteration: 403232
loss: 1.0273512601852417,grad_norm: 0.7322652383406926, iteration: 403233
loss: 1.0330554246902466,grad_norm: 0.7964375326834736, iteration: 403234
loss: 1.0443629026412964,grad_norm: 0.7912909198757145, iteration: 403235
loss: 0.9990897178649902,grad_norm: 0.7325702334931979, iteration: 403236
loss: 1.0246336460113525,grad_norm: 0.8006525731331643, iteration: 403237
loss: 1.0321415662765503,grad_norm: 0.9009435576061764, iteration: 403238
loss: 0.9841054677963257,grad_norm: 0.8251049393551559, iteration: 403239
loss: 1.0301591157913208,grad_norm: 0.9999999211501764, iteration: 403240
loss: 1.0142568349838257,grad_norm: 0.9999998309958859, iteration: 403241
loss: 1.077431082725525,grad_norm: 0.9999997925927234, iteration: 403242
loss: 0.9775566458702087,grad_norm: 0.7683496401369975, iteration: 403243
loss: 1.0807477235794067,grad_norm: 0.9999999263671376, iteration: 403244
loss: 1.1681404113769531,grad_norm: 0.9999999294209847, iteration: 403245
loss: 1.0739284753799438,grad_norm: 0.9999999159671084, iteration: 403246
loss: 1.028355598449707,grad_norm: 0.8348222093190436, iteration: 403247
loss: 1.0224974155426025,grad_norm: 0.9999998219856239, iteration: 403248
loss: 1.085490345954895,grad_norm: 0.7590110614089124, iteration: 403249
loss: 0.9640876650810242,grad_norm: 0.775535435011169, iteration: 403250
loss: 1.0260847806930542,grad_norm: 0.9999992850722269, iteration: 403251
loss: 0.998119056224823,grad_norm: 0.775194785142791, iteration: 403252
loss: 0.9837646484375,grad_norm: 0.9078011310670995, iteration: 403253
loss: 0.9625052213668823,grad_norm: 0.8207606206960681, iteration: 403254
loss: 0.9690544009208679,grad_norm: 0.9031395033200432, iteration: 403255
loss: 1.0131460428237915,grad_norm: 0.6894334579309984, iteration: 403256
loss: 1.0038414001464844,grad_norm: 0.9999995956927485, iteration: 403257
loss: 1.107836365699768,grad_norm: 0.758445059504019, iteration: 403258
loss: 1.1072282791137695,grad_norm: 0.9999998717665957, iteration: 403259
loss: 1.145741581916809,grad_norm: 1.0000000077403695, iteration: 403260
loss: 1.0151194334030151,grad_norm: 0.9999996714208352, iteration: 403261
loss: 1.0115059614181519,grad_norm: 0.753758500024716, iteration: 403262
loss: 0.9955810904502869,grad_norm: 0.819066699160124, iteration: 403263
loss: 1.0194001197814941,grad_norm: 0.8564854011158622, iteration: 403264
loss: 0.9817019104957581,grad_norm: 0.8007254205658918, iteration: 403265
loss: 1.0527063608169556,grad_norm: 0.999999208055878, iteration: 403266
loss: 1.0111045837402344,grad_norm: 0.9062479606519417, iteration: 403267
loss: 0.9725640416145325,grad_norm: 0.866455300969088, iteration: 403268
loss: 1.0169414281845093,grad_norm: 0.9376237547845179, iteration: 403269
loss: 1.017915964126587,grad_norm: 0.8807982782548613, iteration: 403270
loss: 0.987628161907196,grad_norm: 0.8027812176633817, iteration: 403271
loss: 1.0371918678283691,grad_norm: 0.8550836341235082, iteration: 403272
loss: 0.9722884297370911,grad_norm: 0.9788179429025924, iteration: 403273
loss: 1.1003352403640747,grad_norm: 0.9999997218816922, iteration: 403274
loss: 0.9882163405418396,grad_norm: 0.780923689500993, iteration: 403275
loss: 0.9825160503387451,grad_norm: 0.9999991762190118, iteration: 403276
loss: 1.0646147727966309,grad_norm: 0.7910489276001376, iteration: 403277
loss: 1.0279747247695923,grad_norm: 0.9999990417146774, iteration: 403278
loss: 1.0051414966583252,grad_norm: 0.7112528883939947, iteration: 403279
loss: 1.001663088798523,grad_norm: 0.8111837954212875, iteration: 403280
loss: 1.0250043869018555,grad_norm: 0.9999997278915769, iteration: 403281
loss: 0.9970222115516663,grad_norm: 0.8740047836245415, iteration: 403282
loss: 1.0481505393981934,grad_norm: 0.8711475816624236, iteration: 403283
loss: 1.0190651416778564,grad_norm: 0.8556237334893907, iteration: 403284
loss: 1.0001920461654663,grad_norm: 0.6968704516636287, iteration: 403285
loss: 0.9535218477249146,grad_norm: 0.8587823040698384, iteration: 403286
loss: 1.0128355026245117,grad_norm: 0.9999993628482432, iteration: 403287
loss: 1.008449912071228,grad_norm: 0.6897506439002079, iteration: 403288
loss: 1.0028479099273682,grad_norm: 0.7284221950942614, iteration: 403289
loss: 1.0425224304199219,grad_norm: 0.6703786910173255, iteration: 403290
loss: 1.0118533372879028,grad_norm: 0.9256781764973568, iteration: 403291
loss: 0.9666627645492554,grad_norm: 0.7838870967809537, iteration: 403292
loss: 1.0515978336334229,grad_norm: 0.8858750867565452, iteration: 403293
loss: 1.0473958253860474,grad_norm: 0.7866057184347353, iteration: 403294
loss: 0.9881290197372437,grad_norm: 0.999999654492915, iteration: 403295
loss: 1.0392696857452393,grad_norm: 0.9999991356526353, iteration: 403296
loss: 0.9886049032211304,grad_norm: 0.8101205828579155, iteration: 403297
loss: 1.019106149673462,grad_norm: 0.6525497172710144, iteration: 403298
loss: 0.9561613202095032,grad_norm: 0.9199960303658441, iteration: 403299
loss: 1.0052196979522705,grad_norm: 0.9999990058317259, iteration: 403300
loss: 0.9918878078460693,grad_norm: 0.9716925302773508, iteration: 403301
loss: 1.011428952217102,grad_norm: 0.9364310291362145, iteration: 403302
loss: 0.9832744002342224,grad_norm: 0.9999992163196756, iteration: 403303
loss: 0.9795266389846802,grad_norm: 0.9237736599974309, iteration: 403304
loss: 1.0279407501220703,grad_norm: 0.7605629292293343, iteration: 403305
loss: 1.0079665184020996,grad_norm: 0.9999999027295999, iteration: 403306
loss: 0.9785061478614807,grad_norm: 0.9999991172237379, iteration: 403307
loss: 0.9836745858192444,grad_norm: 0.7655746463075775, iteration: 403308
loss: 1.2124418020248413,grad_norm: 0.9999999131068351, iteration: 403309
loss: 0.9527519941329956,grad_norm: 0.9999991761418314, iteration: 403310
loss: 0.9738454222679138,grad_norm: 0.7697993214636144, iteration: 403311
loss: 0.9853506684303284,grad_norm: 0.868974823424232, iteration: 403312
loss: 0.9810558557510376,grad_norm: 0.999999013679543, iteration: 403313
loss: 0.9774413108825684,grad_norm: 0.9606593892443871, iteration: 403314
loss: 1.0280354022979736,grad_norm: 0.9032202602449582, iteration: 403315
loss: 0.9882370233535767,grad_norm: 0.953171171884088, iteration: 403316
loss: 1.0210405588150024,grad_norm: 0.9999992302805031, iteration: 403317
loss: 0.9947850704193115,grad_norm: 0.7581674559979765, iteration: 403318
loss: 1.055254340171814,grad_norm: 0.8548920805640912, iteration: 403319
loss: 1.0374094247817993,grad_norm: 0.9999994755840373, iteration: 403320
loss: 1.036191463470459,grad_norm: 0.9999994751230247, iteration: 403321
loss: 1.011757731437683,grad_norm: 0.7933614692025219, iteration: 403322
loss: 1.0206639766693115,grad_norm: 0.91539793434208, iteration: 403323
loss: 0.9990434646606445,grad_norm: 0.7365244849802502, iteration: 403324
loss: 1.0182279348373413,grad_norm: 0.7458546648063197, iteration: 403325
loss: 1.006215214729309,grad_norm: 0.9988871686613771, iteration: 403326
loss: 0.9840627908706665,grad_norm: 0.7298536829804467, iteration: 403327
loss: 1.000889778137207,grad_norm: 0.9276020863492739, iteration: 403328
loss: 1.061262845993042,grad_norm: 0.9999990796116949, iteration: 403329
loss: 1.0235118865966797,grad_norm: 0.9999998441267889, iteration: 403330
loss: 0.9907880425453186,grad_norm: 0.881818685560217, iteration: 403331
loss: 1.0450620651245117,grad_norm: 0.9999991430962054, iteration: 403332
loss: 0.9836118817329407,grad_norm: 0.8256386717496631, iteration: 403333
loss: 1.005023717880249,grad_norm: 0.7021485334248994, iteration: 403334
loss: 1.1227810382843018,grad_norm: 0.7982715534603873, iteration: 403335
loss: 1.3695329427719116,grad_norm: 0.999999852775388, iteration: 403336
loss: 1.004116177558899,grad_norm: 0.9588007047109217, iteration: 403337
loss: 1.0321506261825562,grad_norm: 0.7583958929329087, iteration: 403338
loss: 1.0899996757507324,grad_norm: 0.8551125946910237, iteration: 403339
loss: 1.0137248039245605,grad_norm: 0.9999990013350047, iteration: 403340
loss: 0.9869208931922913,grad_norm: 0.8555611968795618, iteration: 403341
loss: 1.0066384077072144,grad_norm: 0.9999993646815658, iteration: 403342
loss: 1.0143216848373413,grad_norm: 0.8640215037926446, iteration: 403343
loss: 1.1005957126617432,grad_norm: 0.8483434431503021, iteration: 403344
loss: 1.0337550640106201,grad_norm: 0.9999993735246416, iteration: 403345
loss: 1.085861086845398,grad_norm: 0.9999997357853799, iteration: 403346
loss: 1.0337413549423218,grad_norm: 0.933594544793497, iteration: 403347
loss: 1.0643389225006104,grad_norm: 0.9999993018265185, iteration: 403348
loss: 1.0922226905822754,grad_norm: 0.9999993744069025, iteration: 403349
loss: 1.0461478233337402,grad_norm: 0.9999999635967498, iteration: 403350
loss: 0.9916443228721619,grad_norm: 0.6588665784863813, iteration: 403351
loss: 0.9958246946334839,grad_norm: 0.7730195494460482, iteration: 403352
loss: 0.9896551370620728,grad_norm: 0.8965144842387961, iteration: 403353
loss: 1.0007250308990479,grad_norm: 0.9138592084360169, iteration: 403354
loss: 1.0430208444595337,grad_norm: 0.9999995506452658, iteration: 403355
loss: 0.9963011741638184,grad_norm: 0.853027760933483, iteration: 403356
loss: 1.0035542249679565,grad_norm: 0.6946595063868806, iteration: 403357
loss: 0.9826947450637817,grad_norm: 0.7001305818786431, iteration: 403358
loss: 1.0265729427337646,grad_norm: 0.7853200700402329, iteration: 403359
loss: 1.213853120803833,grad_norm: 0.9999998850892966, iteration: 403360
loss: 1.0691782236099243,grad_norm: 0.9999999597797921, iteration: 403361
loss: 1.0531384944915771,grad_norm: 0.7438326300297216, iteration: 403362
loss: 1.0306332111358643,grad_norm: 0.8750071719379922, iteration: 403363
loss: 1.0053157806396484,grad_norm: 0.9999991623297515, iteration: 403364
loss: 1.107589602470398,grad_norm: 0.9999991222534992, iteration: 403365
loss: 0.9909703731536865,grad_norm: 0.762408245047416, iteration: 403366
loss: 0.9640589356422424,grad_norm: 0.999999650207786, iteration: 403367
loss: 1.0013978481292725,grad_norm: 0.9999999094053512, iteration: 403368
loss: 1.0204296112060547,grad_norm: 0.7901221297432355, iteration: 403369
loss: 0.989854097366333,grad_norm: 0.8670783698728015, iteration: 403370
loss: 1.0661126375198364,grad_norm: 0.9999991758782191, iteration: 403371
loss: 0.986400842666626,grad_norm: 0.8366437707248724, iteration: 403372
loss: 0.9956647753715515,grad_norm: 0.7671313470209545, iteration: 403373
loss: 1.0112028121948242,grad_norm: 0.7686087342276742, iteration: 403374
loss: 1.075583577156067,grad_norm: 0.9999995229368598, iteration: 403375
loss: 1.0279825925827026,grad_norm: 0.8920378995176036, iteration: 403376
loss: 1.0078016519546509,grad_norm: 0.8198637462095374, iteration: 403377
loss: 1.0108342170715332,grad_norm: 0.7398311262512585, iteration: 403378
loss: 1.0161287784576416,grad_norm: 0.962615560726183, iteration: 403379
loss: 1.0495718717575073,grad_norm: 0.9999992977008049, iteration: 403380
loss: 1.0393706560134888,grad_norm: 0.9999990770699297, iteration: 403381
loss: 0.9711225628852844,grad_norm: 0.8483519462354061, iteration: 403382
loss: 1.0560284852981567,grad_norm: 0.9999992759471161, iteration: 403383
loss: 0.9906482100486755,grad_norm: 0.6894290645288402, iteration: 403384
loss: 0.9734283089637756,grad_norm: 0.8209232270202771, iteration: 403385
loss: 1.0670032501220703,grad_norm: 0.9999993883447197, iteration: 403386
loss: 0.9923340678215027,grad_norm: 0.8927335570410349, iteration: 403387
loss: 1.0895378589630127,grad_norm: 0.9254594418921335, iteration: 403388
loss: 0.9966799020767212,grad_norm: 0.999998953891976, iteration: 403389
loss: 1.1729819774627686,grad_norm: 0.9999999774387605, iteration: 403390
loss: 1.007857322692871,grad_norm: 0.8552155810230633, iteration: 403391
loss: 1.0153449773788452,grad_norm: 0.9999992346620068, iteration: 403392
loss: 0.9540765881538391,grad_norm: 0.7457977726805366, iteration: 403393
loss: 0.9788297414779663,grad_norm: 0.7464001320107013, iteration: 403394
loss: 1.0228028297424316,grad_norm: 0.670175497036113, iteration: 403395
loss: 1.0637707710266113,grad_norm: 0.9999991869956614, iteration: 403396
loss: 1.0158954858779907,grad_norm: 0.9999993267016727, iteration: 403397
loss: 0.9899290204048157,grad_norm: 0.7988131992781234, iteration: 403398
loss: 1.019290804862976,grad_norm: 1.0000000147807235, iteration: 403399
loss: 0.9703905582427979,grad_norm: 0.7407112287130616, iteration: 403400
loss: 1.0490837097167969,grad_norm: 0.9999992310327167, iteration: 403401
loss: 1.0072754621505737,grad_norm: 0.5576753914639505, iteration: 403402
loss: 0.9988251328468323,grad_norm: 0.7738656229912818, iteration: 403403
loss: 1.0356247425079346,grad_norm: 0.8222760277820559, iteration: 403404
loss: 0.9957863092422485,grad_norm: 0.8175628456722367, iteration: 403405
loss: 1.0127205848693848,grad_norm: 0.9999993407212217, iteration: 403406
loss: 0.9706522822380066,grad_norm: 0.8254469479720109, iteration: 403407
loss: 1.0271049737930298,grad_norm: 0.789150294808452, iteration: 403408
loss: 1.0036410093307495,grad_norm: 0.7650777934926407, iteration: 403409
loss: 1.0315277576446533,grad_norm: 0.794335519995874, iteration: 403410
loss: 0.9766765832901001,grad_norm: 0.7885555536850717, iteration: 403411
loss: 0.9871795773506165,grad_norm: 0.9095898266437176, iteration: 403412
loss: 0.9922235608100891,grad_norm: 0.7830767609441327, iteration: 403413
loss: 0.9972661733627319,grad_norm: 0.9999993091412374, iteration: 403414
loss: 1.0766022205352783,grad_norm: 0.9224404224192476, iteration: 403415
loss: 1.0008659362792969,grad_norm: 0.7710443553303116, iteration: 403416
loss: 1.1902365684509277,grad_norm: 0.9999998916980163, iteration: 403417
loss: 1.081687569618225,grad_norm: 0.9999999950072882, iteration: 403418
loss: 0.9747225046157837,grad_norm: 0.6879316770954914, iteration: 403419
loss: 0.9997803568840027,grad_norm: 0.7787977238980643, iteration: 403420
loss: 1.0562721490859985,grad_norm: 0.8075999882459535, iteration: 403421
loss: 1.027878761291504,grad_norm: 0.7242682285356878, iteration: 403422
loss: 1.0459023714065552,grad_norm: 0.7966232484108361, iteration: 403423
loss: 0.9786460995674133,grad_norm: 0.8220759245019151, iteration: 403424
loss: 1.0097553730010986,grad_norm: 0.8759333884238224, iteration: 403425
loss: 0.9942061305046082,grad_norm: 0.7144237829976703, iteration: 403426
loss: 1.0196845531463623,grad_norm: 0.8263819738895529, iteration: 403427
loss: 1.0749024152755737,grad_norm: 0.9057327876879566, iteration: 403428
loss: 0.9977121949195862,grad_norm: 0.6268562108987242, iteration: 403429
loss: 1.059943675994873,grad_norm: 0.8742686932925562, iteration: 403430
loss: 0.9933210611343384,grad_norm: 0.8116667706067464, iteration: 403431
loss: 1.0173522233963013,grad_norm: 0.8360433560150535, iteration: 403432
loss: 1.0298222303390503,grad_norm: 0.7817404472995145, iteration: 403433
loss: 0.9902945160865784,grad_norm: 0.6349774903775114, iteration: 403434
loss: 1.0851677656173706,grad_norm: 0.795004484088977, iteration: 403435
loss: 0.9885928630828857,grad_norm: 0.8629885341604273, iteration: 403436
loss: 1.0335248708724976,grad_norm: 0.8287019367481988, iteration: 403437
loss: 1.0261796712875366,grad_norm: 0.7823923727911893, iteration: 403438
loss: 0.9967414140701294,grad_norm: 0.9999991363128792, iteration: 403439
loss: 1.0295295715332031,grad_norm: 0.6864378465947516, iteration: 403440
loss: 0.9934667348861694,grad_norm: 0.6875813809548631, iteration: 403441
loss: 1.0058684349060059,grad_norm: 0.9999998391546409, iteration: 403442
loss: 1.0523139238357544,grad_norm: 0.895646675348742, iteration: 403443
loss: 1.0838321447372437,grad_norm: 0.9999998255679449, iteration: 403444
loss: 1.0002732276916504,grad_norm: 0.7896870587388406, iteration: 403445
loss: 1.003135085105896,grad_norm: 0.7197833471499387, iteration: 403446
loss: 1.016931414604187,grad_norm: 0.8689877543360935, iteration: 403447
loss: 1.1148532629013062,grad_norm: 0.8819333700532793, iteration: 403448
loss: 0.9604029655456543,grad_norm: 0.9449457640398211, iteration: 403449
loss: 0.9955402612686157,grad_norm: 0.849689877649245, iteration: 403450
loss: 1.029798984527588,grad_norm: 0.8785349338073254, iteration: 403451
loss: 1.0190439224243164,grad_norm: 0.7953273843989326, iteration: 403452
loss: 1.0376049280166626,grad_norm: 0.7873406124462516, iteration: 403453
loss: 1.0031992197036743,grad_norm: 0.822711456800607, iteration: 403454
loss: 1.0272188186645508,grad_norm: 0.829665361454954, iteration: 403455
loss: 0.9760375022888184,grad_norm: 0.8661279644086503, iteration: 403456
loss: 1.1502022743225098,grad_norm: 0.9999991211087351, iteration: 403457
loss: 0.9977801442146301,grad_norm: 0.730266860775247, iteration: 403458
loss: 0.9860807061195374,grad_norm: 0.7336244463912645, iteration: 403459
loss: 1.0097746849060059,grad_norm: 0.8207636752718673, iteration: 403460
loss: 1.0407153367996216,grad_norm: 0.9999990645343393, iteration: 403461
loss: 1.0062830448150635,grad_norm: 0.7678365054866807, iteration: 403462
loss: 0.9942917823791504,grad_norm: 0.7041310714734988, iteration: 403463
loss: 1.0243116617202759,grad_norm: 0.7895624977733829, iteration: 403464
loss: 1.024078130722046,grad_norm: 0.8133053726781716, iteration: 403465
loss: 1.2518213987350464,grad_norm: 0.9021308402010523, iteration: 403466
loss: 1.0058637857437134,grad_norm: 0.8790479463795291, iteration: 403467
loss: 1.0005096197128296,grad_norm: 0.960757147359899, iteration: 403468
loss: 0.9955095648765564,grad_norm: 0.8233062063954324, iteration: 403469
loss: 1.06351900100708,grad_norm: 0.8439821574543205, iteration: 403470
loss: 0.9847427606582642,grad_norm: 0.7811922111985851, iteration: 403471
loss: 0.9742501974105835,grad_norm: 0.9999990336792747, iteration: 403472
loss: 1.2570428848266602,grad_norm: 0.9999999835166, iteration: 403473
loss: 1.0157842636108398,grad_norm: 0.9414402411841877, iteration: 403474
loss: 1.0876941680908203,grad_norm: 0.9999994207781765, iteration: 403475
loss: 0.99443519115448,grad_norm: 0.9903906384335567, iteration: 403476
loss: 1.0135971307754517,grad_norm: 0.999999964146295, iteration: 403477
loss: 0.9633262753486633,grad_norm: 0.7370020109192532, iteration: 403478
loss: 1.0159395933151245,grad_norm: 0.9999992407006171, iteration: 403479
loss: 1.0037158727645874,grad_norm: 0.753093140112905, iteration: 403480
loss: 1.0369423627853394,grad_norm: 0.8003890065991336, iteration: 403481
loss: 0.9863982796669006,grad_norm: 0.8855501925939978, iteration: 403482
loss: 0.9886235594749451,grad_norm: 0.9066096697868564, iteration: 403483
loss: 1.0930004119873047,grad_norm: 0.8865461094731752, iteration: 403484
loss: 1.0089428424835205,grad_norm: 0.8758243818989123, iteration: 403485
loss: 1.0044885873794556,grad_norm: 0.8398515772125016, iteration: 403486
loss: 0.9820905923843384,grad_norm: 0.7871829403578472, iteration: 403487
loss: 1.0475448369979858,grad_norm: 0.951334509187589, iteration: 403488
loss: 1.0691821575164795,grad_norm: 0.9999999456963922, iteration: 403489
loss: 1.132738709449768,grad_norm: 0.999999862986215, iteration: 403490
loss: 1.012011170387268,grad_norm: 0.7038415314250295, iteration: 403491
loss: 0.9987910985946655,grad_norm: 0.7082826028776508, iteration: 403492
loss: 0.9932793974876404,grad_norm: 0.9999993845084999, iteration: 403493
loss: 1.0097101926803589,grad_norm: 0.6979920631379412, iteration: 403494
loss: 0.9717618227005005,grad_norm: 0.9514373955815025, iteration: 403495
loss: 1.0343762636184692,grad_norm: 0.8039737784616181, iteration: 403496
loss: 1.0111743211746216,grad_norm: 0.9999996631413052, iteration: 403497
loss: 1.054645299911499,grad_norm: 0.7261616616504177, iteration: 403498
loss: 1.0494276285171509,grad_norm: 0.7326168032729073, iteration: 403499
loss: 0.9917622804641724,grad_norm: 0.7826583473546456, iteration: 403500
loss: 1.0612937211990356,grad_norm: 0.9999992268667506, iteration: 403501
loss: 1.076889991760254,grad_norm: 0.9304497051193256, iteration: 403502
loss: 0.9683375954627991,grad_norm: 0.6955230742325141, iteration: 403503
loss: 0.9937498569488525,grad_norm: 0.8646588100575027, iteration: 403504
loss: 1.0429730415344238,grad_norm: 0.882521527507385, iteration: 403505
loss: 0.9997841119766235,grad_norm: 0.8823056339449132, iteration: 403506
loss: 1.091589331626892,grad_norm: 0.9686951598219131, iteration: 403507
loss: 1.0537163019180298,grad_norm: 0.8244865958667272, iteration: 403508
loss: 1.0027875900268555,grad_norm: 0.6896876013434566, iteration: 403509
loss: 0.9994257092475891,grad_norm: 0.9999990256420157, iteration: 403510
loss: 1.2440452575683594,grad_norm: 0.9999995821858493, iteration: 403511
loss: 1.1643407344818115,grad_norm: 0.9999995882877022, iteration: 403512
loss: 1.053413987159729,grad_norm: 0.7474997737092763, iteration: 403513
loss: 1.0143910646438599,grad_norm: 0.7456935829788881, iteration: 403514
loss: 1.0002464056015015,grad_norm: 0.8401567326179641, iteration: 403515
loss: 1.0384485721588135,grad_norm: 0.999999372283147, iteration: 403516
loss: 1.0192795991897583,grad_norm: 0.8483214020877065, iteration: 403517
loss: 0.9704445004463196,grad_norm: 0.9999990693479864, iteration: 403518
loss: 1.1034718751907349,grad_norm: 0.9999998556865364, iteration: 403519
loss: 0.9761344790458679,grad_norm: 0.7701206470418503, iteration: 403520
loss: 1.0054445266723633,grad_norm: 0.7481185651074233, iteration: 403521
loss: 1.0824928283691406,grad_norm: 0.9999999610826832, iteration: 403522
loss: 0.9800627827644348,grad_norm: 0.6661370832222436, iteration: 403523
loss: 0.9500262141227722,grad_norm: 0.9999990470270559, iteration: 403524
loss: 0.9755251407623291,grad_norm: 0.6790226489827832, iteration: 403525
loss: 0.9695183634757996,grad_norm: 0.8752205979612111, iteration: 403526
loss: 0.9961528182029724,grad_norm: 0.8248530621849773, iteration: 403527
loss: 1.056738257408142,grad_norm: 0.9999991264494064, iteration: 403528
loss: 0.9984264969825745,grad_norm: 0.811079518631354, iteration: 403529
loss: 1.0186268091201782,grad_norm: 0.9999990973481998, iteration: 403530
loss: 1.041088342666626,grad_norm: 0.7946853872080614, iteration: 403531
loss: 1.1426146030426025,grad_norm: 0.9999993753582815, iteration: 403532
loss: 0.9733708500862122,grad_norm: 0.776282951136913, iteration: 403533
loss: 0.9611926078796387,grad_norm: 0.7246307699923902, iteration: 403534
loss: 0.9737629294395447,grad_norm: 0.792631623444997, iteration: 403535
loss: 0.9851850867271423,grad_norm: 0.9812612957324255, iteration: 403536
loss: 0.9517888426780701,grad_norm: 0.896572196233544, iteration: 403537
loss: 1.0287705659866333,grad_norm: 0.914414570185361, iteration: 403538
loss: 1.0337773561477661,grad_norm: 0.7082944867093894, iteration: 403539
loss: 1.0612847805023193,grad_norm: 0.9999999408066664, iteration: 403540
loss: 1.0137691497802734,grad_norm: 0.7693670513572436, iteration: 403541
loss: 1.0228798389434814,grad_norm: 0.78479415912306, iteration: 403542
loss: 1.0277836322784424,grad_norm: 0.8332260594679557, iteration: 403543
loss: 1.019121766090393,grad_norm: 0.9999998202792795, iteration: 403544
loss: 1.046209454536438,grad_norm: 0.972760390112702, iteration: 403545
loss: 0.9930175542831421,grad_norm: 0.7601075022580779, iteration: 403546
loss: 1.1036968231201172,grad_norm: 0.9999995451639112, iteration: 403547
loss: 1.0336451530456543,grad_norm: 0.9999992181431092, iteration: 403548
loss: 1.0097668170928955,grad_norm: 0.6840913645531899, iteration: 403549
loss: 0.9928814172744751,grad_norm: 0.7042575484019066, iteration: 403550
loss: 1.0265666246414185,grad_norm: 0.7600991684398819, iteration: 403551
loss: 0.9837733507156372,grad_norm: 0.8691201696290037, iteration: 403552
loss: 1.0800954103469849,grad_norm: 0.9999998596505602, iteration: 403553
loss: 1.0319744348526,grad_norm: 0.9999989853506503, iteration: 403554
loss: 1.005203366279602,grad_norm: 0.6399778838749406, iteration: 403555
loss: 0.9838933944702148,grad_norm: 0.921672873459907, iteration: 403556
loss: 0.9959786534309387,grad_norm: 0.7780035038128089, iteration: 403557
loss: 1.0365766286849976,grad_norm: 0.8362688137909428, iteration: 403558
loss: 1.0045863389968872,grad_norm: 0.8996341178216594, iteration: 403559
loss: 1.0623095035552979,grad_norm: 0.9926517893078939, iteration: 403560
loss: 1.0162417888641357,grad_norm: 0.7816564183567417, iteration: 403561
loss: 0.9735819697380066,grad_norm: 0.7608369454482271, iteration: 403562
loss: 0.9638586640357971,grad_norm: 0.6992646639618859, iteration: 403563
loss: 1.1413778066635132,grad_norm: 0.9999996503283851, iteration: 403564
loss: 1.0137969255447388,grad_norm: 0.7898188116100411, iteration: 403565
loss: 0.980620801448822,grad_norm: 0.9999990613449296, iteration: 403566
loss: 1.0194177627563477,grad_norm: 0.9999995532660377, iteration: 403567
loss: 1.005091905593872,grad_norm: 0.7713334579631147, iteration: 403568
loss: 0.9694518446922302,grad_norm: 0.7045626823157136, iteration: 403569
loss: 0.9670808911323547,grad_norm: 0.8377830910990317, iteration: 403570
loss: 1.0114572048187256,grad_norm: 0.6449963075797153, iteration: 403571
loss: 0.9878259897232056,grad_norm: 0.7984740895221949, iteration: 403572
loss: 1.2369550466537476,grad_norm: 0.9999999633204292, iteration: 403573
loss: 1.011755108833313,grad_norm: 0.9635836760136939, iteration: 403574
loss: 0.9964655041694641,grad_norm: 0.7392258419036503, iteration: 403575
loss: 1.2673828601837158,grad_norm: 1.0000000240091136, iteration: 403576
loss: 0.9889747500419617,grad_norm: 0.837929626823279, iteration: 403577
loss: 1.0415465831756592,grad_norm: 0.9999992781976015, iteration: 403578
loss: 0.9739395976066589,grad_norm: 0.8450906258608148, iteration: 403579
loss: 1.0037074089050293,grad_norm: 0.8874299948918015, iteration: 403580
loss: 1.0367393493652344,grad_norm: 0.7033892136214377, iteration: 403581
loss: 0.9923552870750427,grad_norm: 0.8129892812192818, iteration: 403582
loss: 0.996177613735199,grad_norm: 0.7447174095220245, iteration: 403583
loss: 1.0042332410812378,grad_norm: 0.7322283979310363, iteration: 403584
loss: 1.0276455879211426,grad_norm: 0.9999991991126375, iteration: 403585
loss: 1.009850263595581,grad_norm: 0.85961740067918, iteration: 403586
loss: 0.98128741979599,grad_norm: 0.7298867305961109, iteration: 403587
loss: 1.0189553499221802,grad_norm: 0.8402266408834087, iteration: 403588
loss: 1.0441049337387085,grad_norm: 0.7290172581550793, iteration: 403589
loss: 1.0354121923446655,grad_norm: 0.7618374315575325, iteration: 403590
loss: 1.0525579452514648,grad_norm: 0.8374245610539249, iteration: 403591
loss: 1.011671781539917,grad_norm: 0.7852657396270839, iteration: 403592
loss: 1.0274537801742554,grad_norm: 0.9999992358113841, iteration: 403593
loss: 1.0543805360794067,grad_norm: 0.9943180657687299, iteration: 403594
loss: 1.0051524639129639,grad_norm: 0.8633344496642279, iteration: 403595
loss: 0.9689319729804993,grad_norm: 0.756260826124243, iteration: 403596
loss: 1.0329087972640991,grad_norm: 0.8169747758536, iteration: 403597
loss: 1.0548856258392334,grad_norm: 0.9999997200809848, iteration: 403598
loss: 0.9813461303710938,grad_norm: 0.7652753528726977, iteration: 403599
loss: 0.9949983954429626,grad_norm: 0.8447570635271929, iteration: 403600
loss: 0.9725645184516907,grad_norm: 0.7338618474787726, iteration: 403601
loss: 0.9381101131439209,grad_norm: 0.6836789692737592, iteration: 403602
loss: 1.0171325206756592,grad_norm: 0.8924431440917531, iteration: 403603
loss: 0.9952113032341003,grad_norm: 0.8066209728720238, iteration: 403604
loss: 1.0471781492233276,grad_norm: 0.9999995206647089, iteration: 403605
loss: 1.0055257081985474,grad_norm: 0.8052274316440458, iteration: 403606
loss: 1.0130939483642578,grad_norm: 0.8140932736793103, iteration: 403607
loss: 1.01588773727417,grad_norm: 0.790732179449098, iteration: 403608
loss: 1.0061137676239014,grad_norm: 0.8391982992105272, iteration: 403609
loss: 0.9739219546318054,grad_norm: 0.7086003387468339, iteration: 403610
loss: 1.0226640701293945,grad_norm: 0.7503928874910814, iteration: 403611
loss: 1.0383375883102417,grad_norm: 0.7566677528050674, iteration: 403612
loss: 1.0884143114089966,grad_norm: 0.9999999555808288, iteration: 403613
loss: 1.0061269998550415,grad_norm: 0.9999990607712775, iteration: 403614
loss: 0.9829824566841125,grad_norm: 0.8009260923891504, iteration: 403615
loss: 0.9594727754592896,grad_norm: 0.974917700300208, iteration: 403616
loss: 0.9886218905448914,grad_norm: 0.8445292121267554, iteration: 403617
loss: 1.0214571952819824,grad_norm: 0.9022222199171099, iteration: 403618
loss: 0.9867851734161377,grad_norm: 0.9041248464972923, iteration: 403619
loss: 1.0130434036254883,grad_norm: 0.8254591294772131, iteration: 403620
loss: 1.0453375577926636,grad_norm: 0.9999996371372604, iteration: 403621
loss: 0.9763063192367554,grad_norm: 0.7188082965430974, iteration: 403622
loss: 0.9940463900566101,grad_norm: 0.7837240792657135, iteration: 403623
loss: 1.0537645816802979,grad_norm: 0.945909245213507, iteration: 403624
loss: 1.0162562131881714,grad_norm: 0.8146285096079495, iteration: 403625
loss: 1.0144057273864746,grad_norm: 0.8002782188666426, iteration: 403626
loss: 0.9769728183746338,grad_norm: 0.7491580958811992, iteration: 403627
loss: 1.0903068780899048,grad_norm: 0.999999621903508, iteration: 403628
loss: 1.0132395029067993,grad_norm: 0.8229976808481337, iteration: 403629
loss: 1.030664324760437,grad_norm: 0.8328680324922743, iteration: 403630
loss: 1.0169360637664795,grad_norm: 0.8034096169559694, iteration: 403631
loss: 1.00242280960083,grad_norm: 0.7942618950855014, iteration: 403632
loss: 1.0196661949157715,grad_norm: 0.7548333574529635, iteration: 403633
loss: 1.0133061408996582,grad_norm: 0.7089767360324991, iteration: 403634
loss: 0.99580317735672,grad_norm: 0.9999995129765765, iteration: 403635
loss: 1.0353107452392578,grad_norm: 0.7353959490828481, iteration: 403636
loss: 1.036422610282898,grad_norm: 0.8960248325542256, iteration: 403637
loss: 0.9685451984405518,grad_norm: 0.7959192227815511, iteration: 403638
loss: 1.0868027210235596,grad_norm: 0.9999998492595331, iteration: 403639
loss: 1.0688917636871338,grad_norm: 0.7057477805907288, iteration: 403640
loss: 0.9863088726997375,grad_norm: 0.6594972074092365, iteration: 403641
loss: 1.0155357122421265,grad_norm: 0.7861035965961901, iteration: 403642
loss: 1.0293083190917969,grad_norm: 0.6898564801821245, iteration: 403643
loss: 0.9882376790046692,grad_norm: 0.7885498678141831, iteration: 403644
loss: 1.0182009935379028,grad_norm: 0.8396077486166629, iteration: 403645
loss: 0.9721301794052124,grad_norm: 0.7369861160137409, iteration: 403646
loss: 1.0187599658966064,grad_norm: 0.7591838252414348, iteration: 403647
loss: 1.0312436819076538,grad_norm: 0.9999999291514265, iteration: 403648
loss: 1.0096839666366577,grad_norm: 0.8692749533267029, iteration: 403649
loss: 0.9378105401992798,grad_norm: 0.7852956469365158, iteration: 403650
loss: 0.9916846752166748,grad_norm: 0.8477221549790666, iteration: 403651
loss: 1.000186800956726,grad_norm: 0.7735445550757258, iteration: 403652
loss: 1.0490353107452393,grad_norm: 0.8439284121047449, iteration: 403653
loss: 1.0481699705123901,grad_norm: 0.8087607429823197, iteration: 403654
loss: 1.0221989154815674,grad_norm: 0.862545558454035, iteration: 403655
loss: 0.9936695098876953,grad_norm: 0.7815637729921363, iteration: 403656
loss: 1.0189038515090942,grad_norm: 0.999999041822128, iteration: 403657
loss: 1.0141944885253906,grad_norm: 0.9574574742162055, iteration: 403658
loss: 1.0433101654052734,grad_norm: 0.8680843940279604, iteration: 403659
loss: 1.047465443611145,grad_norm: 0.9999997199105314, iteration: 403660
loss: 0.9753880500793457,grad_norm: 0.7667709450643901, iteration: 403661
loss: 1.0064287185668945,grad_norm: 0.6722979891877947, iteration: 403662
loss: 0.9988251328468323,grad_norm: 0.836976194037656, iteration: 403663
loss: 0.9896194934844971,grad_norm: 0.7925197146800275, iteration: 403664
loss: 1.0040173530578613,grad_norm: 0.7103104085716708, iteration: 403665
loss: 0.9905223250389099,grad_norm: 0.799663114972309, iteration: 403666
loss: 0.9852332472801208,grad_norm: 0.7339073066616906, iteration: 403667
loss: 1.0218559503555298,grad_norm: 0.9999997122132741, iteration: 403668
loss: 1.0231376886367798,grad_norm: 0.7220512550468826, iteration: 403669
loss: 0.9537526965141296,grad_norm: 0.7140845366872324, iteration: 403670
loss: 0.9788297414779663,grad_norm: 0.7087494156735415, iteration: 403671
loss: 1.0038282871246338,grad_norm: 0.8067285069262231, iteration: 403672
loss: 1.0114444494247437,grad_norm: 0.6782082715802262, iteration: 403673
loss: 1.0017039775848389,grad_norm: 0.9999993874424822, iteration: 403674
loss: 1.011123538017273,grad_norm: 0.7247033781698645, iteration: 403675
loss: 1.0117642879486084,grad_norm: 0.8507673030638027, iteration: 403676
loss: 0.9928628206253052,grad_norm: 0.8962619089059314, iteration: 403677
loss: 1.0042428970336914,grad_norm: 0.7684257054113866, iteration: 403678
loss: 1.0042386054992676,grad_norm: 0.7468779988754723, iteration: 403679
loss: 0.9919842481613159,grad_norm: 0.7631730286161815, iteration: 403680
loss: 1.0403140783309937,grad_norm: 0.6759373246185089, iteration: 403681
loss: 1.0415380001068115,grad_norm: 0.9999992076773825, iteration: 403682
loss: 1.0311684608459473,grad_norm: 0.7650605813764138, iteration: 403683
loss: 1.0135772228240967,grad_norm: 0.9999991676465487, iteration: 403684
loss: 1.0179550647735596,grad_norm: 0.9999993015494996, iteration: 403685
loss: 1.0051608085632324,grad_norm: 0.9603695001637833, iteration: 403686
loss: 0.93856281042099,grad_norm: 0.730246487283591, iteration: 403687
loss: 0.9876505732536316,grad_norm: 0.6764270943756993, iteration: 403688
loss: 1.0167133808135986,grad_norm: 0.8746741995080123, iteration: 403689
loss: 0.9571258425712585,grad_norm: 0.8151770411988697, iteration: 403690
loss: 1.0150223970413208,grad_norm: 0.8580873481901096, iteration: 403691
loss: 0.9769129157066345,grad_norm: 0.8868750930191989, iteration: 403692
loss: 1.0153814554214478,grad_norm: 0.7831010227839975, iteration: 403693
loss: 0.9792497754096985,grad_norm: 0.7732627581162437, iteration: 403694
loss: 1.003825306892395,grad_norm: 0.9999992865662432, iteration: 403695
loss: 0.9941592812538147,grad_norm: 0.6737348514974897, iteration: 403696
loss: 1.051618218421936,grad_norm: 0.9999996553061601, iteration: 403697
loss: 0.9619665145874023,grad_norm: 0.6793761920644317, iteration: 403698
loss: 1.1026736497879028,grad_norm: 0.9999999327842569, iteration: 403699
loss: 1.0520368814468384,grad_norm: 0.9999998548364205, iteration: 403700
loss: 0.978388249874115,grad_norm: 0.7901666886038001, iteration: 403701
loss: 0.9977484941482544,grad_norm: 0.8134710383858073, iteration: 403702
loss: 0.9781875610351562,grad_norm: 0.999999001060085, iteration: 403703
loss: 0.9516860246658325,grad_norm: 0.7266650030500935, iteration: 403704
loss: 1.013243317604065,grad_norm: 0.8706455673442729, iteration: 403705
loss: 1.0154834985733032,grad_norm: 0.8895912850799073, iteration: 403706
loss: 1.017736792564392,grad_norm: 0.7328105592197264, iteration: 403707
loss: 1.0029017925262451,grad_norm: 0.7474052388052785, iteration: 403708
loss: 1.0140464305877686,grad_norm: 0.9999997433665938, iteration: 403709
loss: 1.176464319229126,grad_norm: 0.9999995725751916, iteration: 403710
loss: 0.9660540819168091,grad_norm: 0.7346195050290978, iteration: 403711
loss: 1.0002260208129883,grad_norm: 0.8236409972990167, iteration: 403712
loss: 1.028383493423462,grad_norm: 0.732685091280844, iteration: 403713
loss: 1.0044196844100952,grad_norm: 0.9262336875140682, iteration: 403714
loss: 1.0055092573165894,grad_norm: 0.7215223860985146, iteration: 403715
loss: 0.959847629070282,grad_norm: 0.905904216380714, iteration: 403716
loss: 0.9953892827033997,grad_norm: 0.6900527722330586, iteration: 403717
loss: 0.9863957762718201,grad_norm: 0.7059267077757116, iteration: 403718
loss: 1.0312249660491943,grad_norm: 0.9999998842538502, iteration: 403719
loss: 1.0190236568450928,grad_norm: 0.8968151351303635, iteration: 403720
loss: 1.0006555318832397,grad_norm: 0.8818396208786657, iteration: 403721
loss: 1.0167603492736816,grad_norm: 0.9999998291242939, iteration: 403722
loss: 0.9934588670730591,grad_norm: 0.8737022087192143, iteration: 403723
loss: 1.0289348363876343,grad_norm: 0.6654179346295915, iteration: 403724
loss: 0.9957562685012817,grad_norm: 0.7777635319666197, iteration: 403725
loss: 1.0775864124298096,grad_norm: 0.9827591790895467, iteration: 403726
loss: 0.9923071265220642,grad_norm: 0.8866932849599221, iteration: 403727
loss: 1.0399116277694702,grad_norm: 0.6861667046512615, iteration: 403728
loss: 0.98409104347229,grad_norm: 0.8158156747271404, iteration: 403729
loss: 0.9892654418945312,grad_norm: 0.853774460420013, iteration: 403730
loss: 0.9641024470329285,grad_norm: 0.7198237795231137, iteration: 403731
loss: 1.023856282234192,grad_norm: 0.8564133979590226, iteration: 403732
loss: 1.022467851638794,grad_norm: 0.9195431597121093, iteration: 403733
loss: 0.9929812550544739,grad_norm: 0.778372958239834, iteration: 403734
loss: 0.9670279622077942,grad_norm: 0.8048710935511139, iteration: 403735
loss: 0.9873687624931335,grad_norm: 0.9999998264195823, iteration: 403736
loss: 1.0060474872589111,grad_norm: 0.9195379913437316, iteration: 403737
loss: 0.9942479133605957,grad_norm: 0.8918591802414554, iteration: 403738
loss: 1.0019553899765015,grad_norm: 0.9999989643887895, iteration: 403739
loss: 0.9984396696090698,grad_norm: 0.944817375277279, iteration: 403740
loss: 1.0006823539733887,grad_norm: 0.9139748188745416, iteration: 403741
loss: 1.0423774719238281,grad_norm: 0.7516542640311836, iteration: 403742
loss: 1.0300627946853638,grad_norm: 0.6593879337932876, iteration: 403743
loss: 1.012329339981079,grad_norm: 0.6806935833676612, iteration: 403744
loss: 1.0068871974945068,grad_norm: 0.8024284820499641, iteration: 403745
loss: 0.9919386506080627,grad_norm: 0.670655157975255, iteration: 403746
loss: 1.004546046257019,grad_norm: 0.9441260449716828, iteration: 403747
loss: 0.964216947555542,grad_norm: 0.8035582083107834, iteration: 403748
loss: 0.9666614532470703,grad_norm: 0.9999994830525224, iteration: 403749
loss: 1.0229090452194214,grad_norm: 0.999999740607802, iteration: 403750
loss: 1.0363472700119019,grad_norm: 0.9999991930213064, iteration: 403751
loss: 1.0032296180725098,grad_norm: 0.6855849159577492, iteration: 403752
loss: 0.9917258620262146,grad_norm: 0.8744807130503304, iteration: 403753
loss: 0.9915797114372253,grad_norm: 0.9999998092282282, iteration: 403754
loss: 1.01973295211792,grad_norm: 0.8388024585345328, iteration: 403755
loss: 1.0317875146865845,grad_norm: 0.9999998418292058, iteration: 403756
loss: 1.0254720449447632,grad_norm: 0.9832452861033277, iteration: 403757
loss: 0.9936343431472778,grad_norm: 0.9194671952282009, iteration: 403758
loss: 0.9854799509048462,grad_norm: 0.8864456092331062, iteration: 403759
loss: 1.0167683362960815,grad_norm: 0.8005024384319825, iteration: 403760
loss: 0.9900631308555603,grad_norm: 0.8457015623193208, iteration: 403761
loss: 0.9942929148674011,grad_norm: 0.7132522749902732, iteration: 403762
loss: 0.987224280834198,grad_norm: 0.6414217732260299, iteration: 403763
loss: 0.9953266382217407,grad_norm: 0.7484046561656136, iteration: 403764
loss: 1.0261598825454712,grad_norm: 0.8892747358300533, iteration: 403765
loss: 0.9935580492019653,grad_norm: 0.7878675454770616, iteration: 403766
loss: 1.079547643661499,grad_norm: 0.9999996374667409, iteration: 403767
loss: 1.0244888067245483,grad_norm: 0.99999918989972, iteration: 403768
loss: 1.0073273181915283,grad_norm: 0.9999993850743542, iteration: 403769
loss: 0.9879392385482788,grad_norm: 0.702037865073856, iteration: 403770
loss: 1.0124484300613403,grad_norm: 0.8957478454811443, iteration: 403771
loss: 1.0177525281906128,grad_norm: 0.7237478755506258, iteration: 403772
loss: 1.0471044778823853,grad_norm: 0.9999991372972067, iteration: 403773
loss: 1.0682036876678467,grad_norm: 0.9999993526943346, iteration: 403774
loss: 1.0076006650924683,grad_norm: 0.7836700426034379, iteration: 403775
loss: 1.0165990591049194,grad_norm: 0.9999990954227623, iteration: 403776
loss: 1.0270254611968994,grad_norm: 0.8403630940477067, iteration: 403777
loss: 1.012555718421936,grad_norm: 0.8778889458204538, iteration: 403778
loss: 0.98101407289505,grad_norm: 0.9999996080849701, iteration: 403779
loss: 1.038866400718689,grad_norm: 0.8226240337004492, iteration: 403780
loss: 0.9990990161895752,grad_norm: 0.8534161369361195, iteration: 403781
loss: 1.0089768171310425,grad_norm: 0.7394296931405543, iteration: 403782
loss: 0.9600113034248352,grad_norm: 0.7308273307543826, iteration: 403783
loss: 1.0441005229949951,grad_norm: 0.9999994026923106, iteration: 403784
loss: 0.9848833680152893,grad_norm: 0.9999990525407034, iteration: 403785
loss: 1.0219351053237915,grad_norm: 0.840492983533377, iteration: 403786
loss: 1.019120454788208,grad_norm: 0.686861518791227, iteration: 403787
loss: 0.996616005897522,grad_norm: 0.8740108497264953, iteration: 403788
loss: 1.0649452209472656,grad_norm: 0.9351946999431222, iteration: 403789
loss: 0.9845418930053711,grad_norm: 0.8034036762153496, iteration: 403790
loss: 1.0170314311981201,grad_norm: 0.9999991104419893, iteration: 403791
loss: 0.9959506988525391,grad_norm: 0.8847835883231361, iteration: 403792
loss: 1.0307939052581787,grad_norm: 0.7012825752075728, iteration: 403793
loss: 0.9935601353645325,grad_norm: 0.8019993436999517, iteration: 403794
loss: 0.987592875957489,grad_norm: 0.668706018783565, iteration: 403795
loss: 1.0198906660079956,grad_norm: 0.8326262371617552, iteration: 403796
loss: 1.0786806344985962,grad_norm: 0.9999996855706964, iteration: 403797
loss: 1.0015226602554321,grad_norm: 0.7102052499833273, iteration: 403798
loss: 1.031652808189392,grad_norm: 0.7548641460044169, iteration: 403799
loss: 1.0589144229888916,grad_norm: 0.8711930240667656, iteration: 403800
loss: 0.9917847514152527,grad_norm: 0.9181620361819174, iteration: 403801
loss: 1.012525200843811,grad_norm: 0.7544536982078435, iteration: 403802
loss: 1.0103305578231812,grad_norm: 0.7390157824260901, iteration: 403803
loss: 1.0065395832061768,grad_norm: 0.8257494190730412, iteration: 403804
loss: 0.9892293214797974,grad_norm: 0.7328074449767236, iteration: 403805
loss: 1.1134439706802368,grad_norm: 0.9999997720191676, iteration: 403806
loss: 0.9914636611938477,grad_norm: 0.7921798867956475, iteration: 403807
loss: 1.0341284275054932,grad_norm: 0.9999991211179795, iteration: 403808
loss: 1.014649510383606,grad_norm: 0.7028384817933309, iteration: 403809
loss: 1.0284980535507202,grad_norm: 0.8738609440386071, iteration: 403810
loss: 0.9889162182807922,grad_norm: 0.6536049972042759, iteration: 403811
loss: 1.0037297010421753,grad_norm: 0.8877695915778963, iteration: 403812
loss: 0.9756006002426147,grad_norm: 0.7953572421810963, iteration: 403813
loss: 1.0006721019744873,grad_norm: 0.8024472762899313, iteration: 403814
loss: 1.0329418182373047,grad_norm: 0.9288593549632492, iteration: 403815
loss: 1.0108444690704346,grad_norm: 0.8486573133921148, iteration: 403816
loss: 1.0154722929000854,grad_norm: 0.7253425607290145, iteration: 403817
loss: 1.0106476545333862,grad_norm: 0.8125861077189903, iteration: 403818
loss: 0.9790108799934387,grad_norm: 0.993832943263334, iteration: 403819
loss: 0.9998235702514648,grad_norm: 0.8202685758162699, iteration: 403820
loss: 1.0860223770141602,grad_norm: 0.9999997240219027, iteration: 403821
loss: 1.035169005393982,grad_norm: 0.7978153073403824, iteration: 403822
loss: 1.0406826734542847,grad_norm: 0.8093259265190664, iteration: 403823
loss: 1.014215350151062,grad_norm: 0.9538216707543861, iteration: 403824
loss: 0.9890727400779724,grad_norm: 0.7010113213587263, iteration: 403825
loss: 1.0531376600265503,grad_norm: 0.9999998697828679, iteration: 403826
loss: 0.998706579208374,grad_norm: 0.6844651796815338, iteration: 403827
loss: 1.0088481903076172,grad_norm: 0.9999997140917932, iteration: 403828
loss: 1.009619116783142,grad_norm: 0.8858910809524477, iteration: 403829
loss: 1.019027829170227,grad_norm: 0.9796556382438455, iteration: 403830
loss: 0.9821485877037048,grad_norm: 0.7968027177930752, iteration: 403831
loss: 1.0418893098831177,grad_norm: 0.7230765595487444, iteration: 403832
loss: 1.075348973274231,grad_norm: 0.9999992098403002, iteration: 403833
loss: 0.992176353931427,grad_norm: 0.7455879512317188, iteration: 403834
loss: 1.014212965965271,grad_norm: 0.8249418942894133, iteration: 403835
loss: 1.0047413110733032,grad_norm: 0.9682669658974717, iteration: 403836
loss: 0.9957618117332458,grad_norm: 0.7759891037770082, iteration: 403837
loss: 1.0136866569519043,grad_norm: 0.7191737733389758, iteration: 403838
loss: 0.9686161279678345,grad_norm: 0.7387900813621574, iteration: 403839
loss: 1.0082290172576904,grad_norm: 0.7474787501193945, iteration: 403840
loss: 0.9869921207427979,grad_norm: 0.8407637406993408, iteration: 403841
loss: 1.025283932685852,grad_norm: 0.8214514824867237, iteration: 403842
loss: 1.0017752647399902,grad_norm: 0.8904141492125989, iteration: 403843
loss: 1.0007158517837524,grad_norm: 0.7767283052845084, iteration: 403844
loss: 0.9974523186683655,grad_norm: 0.7894434818491709, iteration: 403845
loss: 1.026596188545227,grad_norm: 0.8077464931760551, iteration: 403846
loss: 0.9964025020599365,grad_norm: 0.9999997392196318, iteration: 403847
loss: 1.0187456607818604,grad_norm: 0.7739770762647331, iteration: 403848
loss: 1.082266926765442,grad_norm: 0.9999995247016494, iteration: 403849
loss: 1.0375404357910156,grad_norm: 0.9999999134303765, iteration: 403850
loss: 1.0234453678131104,grad_norm: 0.7719923262285707, iteration: 403851
loss: 0.9988876581192017,grad_norm: 0.847372510386643, iteration: 403852
loss: 1.0577033758163452,grad_norm: 0.7701604718869169, iteration: 403853
loss: 0.9779601693153381,grad_norm: 0.9999990487587982, iteration: 403854
loss: 0.9907311201095581,grad_norm: 0.9097880100784, iteration: 403855
loss: 0.9980954527854919,grad_norm: 0.9946832647533348, iteration: 403856
loss: 0.974830687046051,grad_norm: 0.8525972026102943, iteration: 403857
loss: 0.9930437803268433,grad_norm: 0.9999991371476777, iteration: 403858
loss: 1.0040220022201538,grad_norm: 0.7407607942351896, iteration: 403859
loss: 0.9935941696166992,grad_norm: 0.9999990751347521, iteration: 403860
loss: 1.011047601699829,grad_norm: 0.9133407401123761, iteration: 403861
loss: 1.0199472904205322,grad_norm: 0.9477298502614709, iteration: 403862
loss: 1.0902900695800781,grad_norm: 0.9999997416669472, iteration: 403863
loss: 0.9706140160560608,grad_norm: 0.753945957159366, iteration: 403864
loss: 1.0054246187210083,grad_norm: 0.7476895584805885, iteration: 403865
loss: 1.0112653970718384,grad_norm: 0.9999990159006474, iteration: 403866
loss: 0.9826553463935852,grad_norm: 0.7384858917222467, iteration: 403867
loss: 1.0015573501586914,grad_norm: 0.9464516899442458, iteration: 403868
loss: 1.0488460063934326,grad_norm: 0.9854060108335294, iteration: 403869
loss: 0.999813437461853,grad_norm: 0.8780024957229177, iteration: 403870
loss: 1.0083612203598022,grad_norm: 0.7736766620779812, iteration: 403871
loss: 0.9791511297225952,grad_norm: 0.6855492872692627, iteration: 403872
loss: 0.9893767237663269,grad_norm: 0.8120083230987533, iteration: 403873
loss: 1.005588173866272,grad_norm: 0.8362934008595336, iteration: 403874
loss: 1.0394420623779297,grad_norm: 0.7454886366230269, iteration: 403875
loss: 1.0057913064956665,grad_norm: 0.734902935829341, iteration: 403876
loss: 1.0396546125411987,grad_norm: 0.587097105221154, iteration: 403877
loss: 0.9613905549049377,grad_norm: 0.8996169040556556, iteration: 403878
loss: 0.9681984186172485,grad_norm: 0.763750851455216, iteration: 403879
loss: 1.0353718996047974,grad_norm: 0.8803642217258644, iteration: 403880
loss: 0.9674904346466064,grad_norm: 0.7562104619788076, iteration: 403881
loss: 1.006062388420105,grad_norm: 0.8109030037606773, iteration: 403882
loss: 1.06623113155365,grad_norm: 1.0000000460255758, iteration: 403883
loss: 0.9713801145553589,grad_norm: 0.7999286029323573, iteration: 403884
loss: 0.971620500087738,grad_norm: 0.8330014495027089, iteration: 403885
loss: 0.9881659746170044,grad_norm: 0.7177278791735447, iteration: 403886
loss: 1.028087854385376,grad_norm: 0.7329166733272324, iteration: 403887
loss: 0.9766464829444885,grad_norm: 0.7193913271953419, iteration: 403888
loss: 1.007408857345581,grad_norm: 0.7206719146097151, iteration: 403889
loss: 1.0126674175262451,grad_norm: 0.7774099573490225, iteration: 403890
loss: 1.0916589498519897,grad_norm: 0.9999992031410366, iteration: 403891
loss: 0.992590069770813,grad_norm: 0.7261672678875137, iteration: 403892
loss: 0.973689615726471,grad_norm: 0.8212012855649758, iteration: 403893
loss: 0.9834622144699097,grad_norm: 0.7236537226103809, iteration: 403894
loss: 0.9940467476844788,grad_norm: 0.6936241633203621, iteration: 403895
loss: 1.0705150365829468,grad_norm: 0.9999999025097863, iteration: 403896
loss: 0.9857681393623352,grad_norm: 0.792806466698951, iteration: 403897
loss: 1.000172734260559,grad_norm: 0.9999994899869493, iteration: 403898
loss: 1.0464178323745728,grad_norm: 0.999999831372441, iteration: 403899
loss: 0.9907907843589783,grad_norm: 0.9618398907781964, iteration: 403900
loss: 1.0181244611740112,grad_norm: 0.7792793071612607, iteration: 403901
loss: 1.0437626838684082,grad_norm: 0.9999990308545724, iteration: 403902
loss: 1.0099008083343506,grad_norm: 0.6646948638093044, iteration: 403903
loss: 0.9879031777381897,grad_norm: 0.7839713672249566, iteration: 403904
loss: 1.0264588594436646,grad_norm: 0.8511642300915558, iteration: 403905
loss: 1.0268698930740356,grad_norm: 0.7510224646098388, iteration: 403906
loss: 1.017806887626648,grad_norm: 0.8046911117668896, iteration: 403907
loss: 0.9933757781982422,grad_norm: 0.7995968804215011, iteration: 403908
loss: 1.0138702392578125,grad_norm: 0.7143700024350059, iteration: 403909
loss: 1.034522294998169,grad_norm: 0.9264117494854657, iteration: 403910
loss: 1.0244261026382446,grad_norm: 0.9483840777832544, iteration: 403911
loss: 1.0183990001678467,grad_norm: 0.9057360350993522, iteration: 403912
loss: 0.9933356046676636,grad_norm: 0.9853959668144486, iteration: 403913
loss: 1.0211955308914185,grad_norm: 0.9999994120385085, iteration: 403914
loss: 1.0097233057022095,grad_norm: 0.9999998163311347, iteration: 403915
loss: 0.9951719045639038,grad_norm: 0.8143323144848896, iteration: 403916
loss: 1.1026883125305176,grad_norm: 0.9999991665516694, iteration: 403917
loss: 1.0125826597213745,grad_norm: 0.8067986433553778, iteration: 403918
loss: 1.0183117389678955,grad_norm: 0.7333800894272865, iteration: 403919
loss: 0.9966068863868713,grad_norm: 0.9999991709830246, iteration: 403920
loss: 0.9776852130889893,grad_norm: 0.8101835305077496, iteration: 403921
loss: 1.0062137842178345,grad_norm: 0.7794849686036243, iteration: 403922
loss: 1.0286331176757812,grad_norm: 0.9365612050583964, iteration: 403923
loss: 1.0041011571884155,grad_norm: 0.999999333827505, iteration: 403924
loss: 0.9906413555145264,grad_norm: 0.7545261790906518, iteration: 403925
loss: 0.9831503033638,grad_norm: 0.803873935167921, iteration: 403926
loss: 1.0363260507583618,grad_norm: 0.8895713788423845, iteration: 403927
loss: 1.00577712059021,grad_norm: 0.7461516593329847, iteration: 403928
loss: 0.9561278820037842,grad_norm: 0.6755737049648081, iteration: 403929
loss: 1.0678564310073853,grad_norm: 0.9999993184754038, iteration: 403930
loss: 1.003449559211731,grad_norm: 0.8239400611413877, iteration: 403931
loss: 1.0013923645019531,grad_norm: 0.8397145231860232, iteration: 403932
loss: 1.0476497411727905,grad_norm: 0.7501816393844231, iteration: 403933
loss: 1.0332012176513672,grad_norm: 0.8813261994397525, iteration: 403934
loss: 1.0429508686065674,grad_norm: 0.7478633383676243, iteration: 403935
loss: 0.992821455001831,grad_norm: 0.7412080913173579, iteration: 403936
loss: 0.9871065020561218,grad_norm: 0.804340813949218, iteration: 403937
loss: 1.004595160484314,grad_norm: 0.9231195462572427, iteration: 403938
loss: 0.9976581335067749,grad_norm: 0.6755500317872366, iteration: 403939
loss: 1.0031445026397705,grad_norm: 0.999999038862741, iteration: 403940
loss: 1.0114045143127441,grad_norm: 0.785996602142205, iteration: 403941
loss: 1.043384313583374,grad_norm: 0.9105415339772702, iteration: 403942
loss: 1.0196070671081543,grad_norm: 0.8017117624916197, iteration: 403943
loss: 1.0132479667663574,grad_norm: 0.9245593138367295, iteration: 403944
loss: 1.008337378501892,grad_norm: 0.7754211221687707, iteration: 403945
loss: 1.0126270055770874,grad_norm: 0.9863951568519487, iteration: 403946
loss: 1.0185325145721436,grad_norm: 0.9949777593247767, iteration: 403947
loss: 1.0608184337615967,grad_norm: 0.9999996340814381, iteration: 403948
loss: 1.018105387687683,grad_norm: 0.6663788198841969, iteration: 403949
loss: 0.972221314907074,grad_norm: 0.794731064315935, iteration: 403950
loss: 0.9939326047897339,grad_norm: 0.7228542450438451, iteration: 403951
loss: 0.9981963634490967,grad_norm: 0.9999992148278322, iteration: 403952
loss: 1.0781587362289429,grad_norm: 0.9999995571823088, iteration: 403953
loss: 0.9985845685005188,grad_norm: 0.6769334988261114, iteration: 403954
loss: 1.1120185852050781,grad_norm: 0.9999999826296336, iteration: 403955
loss: 1.0449321269989014,grad_norm: 0.9999992776651673, iteration: 403956
loss: 1.0139503479003906,grad_norm: 0.7941282179537725, iteration: 403957
loss: 1.0403386354446411,grad_norm: 0.7393259762294092, iteration: 403958
loss: 1.0163148641586304,grad_norm: 0.784248526320493, iteration: 403959
loss: 1.004252314567566,grad_norm: 0.7939103317014934, iteration: 403960
loss: 1.0245187282562256,grad_norm: 0.9476166038615369, iteration: 403961
loss: 0.9942543506622314,grad_norm: 0.9056141387892973, iteration: 403962
loss: 1.0022591352462769,grad_norm: 0.8639369947427833, iteration: 403963
loss: 1.0192992687225342,grad_norm: 0.9999990250891755, iteration: 403964
loss: 1.0376588106155396,grad_norm: 0.689104788137904, iteration: 403965
loss: 1.00017511844635,grad_norm: 0.7910861729259766, iteration: 403966
loss: 0.9858221411705017,grad_norm: 0.793575299957518, iteration: 403967
loss: 1.006155014038086,grad_norm: 0.9999998692474089, iteration: 403968
loss: 1.0192967653274536,grad_norm: 0.7248836366667433, iteration: 403969
loss: 0.9866511821746826,grad_norm: 0.6953617289878395, iteration: 403970
loss: 1.0219948291778564,grad_norm: 0.8653216753361467, iteration: 403971
loss: 0.9760270714759827,grad_norm: 0.7042569947243971, iteration: 403972
loss: 0.9803305864334106,grad_norm: 0.6621846640729018, iteration: 403973
loss: 1.0271514654159546,grad_norm: 0.8511087867477078, iteration: 403974
loss: 1.2329604625701904,grad_norm: 0.9999990497307341, iteration: 403975
loss: 1.002907156944275,grad_norm: 0.7432348108587183, iteration: 403976
loss: 1.0272396802902222,grad_norm: 0.8411329349641337, iteration: 403977
loss: 1.0164397954940796,grad_norm: 0.7813412902770395, iteration: 403978
loss: 0.9892902970314026,grad_norm: 0.7288737774592235, iteration: 403979
loss: 1.0291786193847656,grad_norm: 0.7183141738826535, iteration: 403980
loss: 1.0051169395446777,grad_norm: 0.6935877147159512, iteration: 403981
loss: 0.9905968904495239,grad_norm: 0.8753935047741462, iteration: 403982
loss: 1.0023375749588013,grad_norm: 0.8500714257930346, iteration: 403983
loss: 1.007077932357788,grad_norm: 0.7120663064015725, iteration: 403984
loss: 1.0150760412216187,grad_norm: 0.6923273297218733, iteration: 403985
loss: 0.9637536406517029,grad_norm: 0.7086492857562494, iteration: 403986
loss: 0.9347817897796631,grad_norm: 0.8081742922928714, iteration: 403987
loss: 0.9990710020065308,grad_norm: 0.8217504779010576, iteration: 403988
loss: 0.9887146949768066,grad_norm: 0.6414722278016208, iteration: 403989
loss: 1.0106110572814941,grad_norm: 0.9999997167407508, iteration: 403990
loss: 0.9916320443153381,grad_norm: 0.7129542074777427, iteration: 403991
loss: 0.9982993006706238,grad_norm: 0.9311097730671722, iteration: 403992
loss: 1.0000874996185303,grad_norm: 0.8207178428640048, iteration: 403993
loss: 0.9965412616729736,grad_norm: 0.7930064562570394, iteration: 403994
loss: 1.009993076324463,grad_norm: 0.7288918737518323, iteration: 403995
loss: 1.0096144676208496,grad_norm: 0.7669043066651822, iteration: 403996
loss: 1.0107653141021729,grad_norm: 0.750480146673551, iteration: 403997
loss: 1.0075311660766602,grad_norm: 0.9603180266929878, iteration: 403998
loss: 1.0011167526245117,grad_norm: 0.7756931495020879, iteration: 403999
loss: 1.0431897640228271,grad_norm: 0.9999991185748006, iteration: 404000
loss: 1.0319191217422485,grad_norm: 0.9999990096172007, iteration: 404001
loss: 1.0028163194656372,grad_norm: 0.6871381383135502, iteration: 404002
loss: 0.9953461289405823,grad_norm: 0.7198080897934741, iteration: 404003
loss: 1.0269689559936523,grad_norm: 0.6836668665976313, iteration: 404004
loss: 1.0253132581710815,grad_norm: 0.7858603590222151, iteration: 404005
loss: 1.0192985534667969,grad_norm: 0.6495292886219535, iteration: 404006
loss: 0.9927229881286621,grad_norm: 0.813816688767335, iteration: 404007
loss: 0.9774690270423889,grad_norm: 0.5787258465933508, iteration: 404008
loss: 1.0666598081588745,grad_norm: 0.9999990450670557, iteration: 404009
loss: 0.9842469096183777,grad_norm: 0.7050176952740828, iteration: 404010
loss: 1.0153045654296875,grad_norm: 0.6967839189364866, iteration: 404011
loss: 0.9837592244148254,grad_norm: 0.9574911633292752, iteration: 404012
loss: 0.9911001920700073,grad_norm: 0.687789950373792, iteration: 404013
loss: 0.9907003045082092,grad_norm: 0.8426734111782612, iteration: 404014
loss: 0.9920015335083008,grad_norm: 0.7402584227846241, iteration: 404015
loss: 0.9910205602645874,grad_norm: 0.8997183261360384, iteration: 404016
loss: 0.9838812947273254,grad_norm: 0.6460054478056476, iteration: 404017
loss: 0.9897640943527222,grad_norm: 0.8514499534239849, iteration: 404018
loss: 0.9952271580696106,grad_norm: 0.7859674893962197, iteration: 404019
loss: 0.9963104128837585,grad_norm: 0.7150865502326623, iteration: 404020
loss: 1.0047671794891357,grad_norm: 0.8210859937810066, iteration: 404021
loss: 1.0271029472351074,grad_norm: 0.7674409568054914, iteration: 404022
loss: 1.0058364868164062,grad_norm: 0.9261709872989394, iteration: 404023
loss: 0.9996492266654968,grad_norm: 0.8545431652100511, iteration: 404024
loss: 0.9648090600967407,grad_norm: 0.836190525141954, iteration: 404025
loss: 0.9726932644844055,grad_norm: 0.749751151715882, iteration: 404026
loss: 0.9812437891960144,grad_norm: 0.7435216563829549, iteration: 404027
loss: 1.0185214281082153,grad_norm: 0.6893977041312653, iteration: 404028
loss: 0.9795989990234375,grad_norm: 0.6694394657650006, iteration: 404029
loss: 1.007434606552124,grad_norm: 0.7861857396480092, iteration: 404030
loss: 0.9874731302261353,grad_norm: 0.776052724718689, iteration: 404031
loss: 0.9964538812637329,grad_norm: 0.7796711803384475, iteration: 404032
loss: 1.0249310731887817,grad_norm: 0.9818313360413222, iteration: 404033
loss: 0.9865602850914001,grad_norm: 0.9081116763174294, iteration: 404034
loss: 1.0438923835754395,grad_norm: 0.9813053410339309, iteration: 404035
loss: 0.9651739597320557,grad_norm: 0.7908960292712018, iteration: 404036
loss: 1.020909070968628,grad_norm: 0.8567426844139515, iteration: 404037
loss: 1.0316307544708252,grad_norm: 0.7630814562298116, iteration: 404038
loss: 1.0626598596572876,grad_norm: 0.9519824454929224, iteration: 404039
loss: 0.9643563628196716,grad_norm: 0.7693684707657542, iteration: 404040
loss: 0.9860203266143799,grad_norm: 0.8712232168343823, iteration: 404041
loss: 0.9965029358863831,grad_norm: 0.9058409671487283, iteration: 404042
loss: 0.9987285733222961,grad_norm: 0.7256934742748251, iteration: 404043
loss: 0.9449430108070374,grad_norm: 0.8596050972191455, iteration: 404044
loss: 1.0230776071548462,grad_norm: 0.9999990784101116, iteration: 404045
loss: 1.0133769512176514,grad_norm: 0.719667765344666, iteration: 404046
loss: 1.00930655002594,grad_norm: 0.8223604088301324, iteration: 404047
loss: 1.0603989362716675,grad_norm: 0.9999998430547126, iteration: 404048
loss: 0.9808042645454407,grad_norm: 0.8076022428755472, iteration: 404049
loss: 0.9865958094596863,grad_norm: 0.7337515151633385, iteration: 404050
loss: 0.9988203644752502,grad_norm: 0.9999990677957724, iteration: 404051
loss: 1.0193651914596558,grad_norm: 0.7235213653567519, iteration: 404052
loss: 0.9972557425498962,grad_norm: 0.679321273478344, iteration: 404053
loss: 0.9992982745170593,grad_norm: 0.8806689542457595, iteration: 404054
loss: 0.9692655801773071,grad_norm: 0.6984347071035311, iteration: 404055
loss: 0.9983291625976562,grad_norm: 0.6528358182412949, iteration: 404056
loss: 0.9817181825637817,grad_norm: 0.7939793028395981, iteration: 404057
loss: 1.0141466856002808,grad_norm: 0.8303330020666865, iteration: 404058
loss: 1.008350133895874,grad_norm: 0.7582446241002616, iteration: 404059
loss: 0.9704105854034424,grad_norm: 0.8357675269317661, iteration: 404060
loss: 1.0074371099472046,grad_norm: 0.9999995505109084, iteration: 404061
loss: 0.9861032366752625,grad_norm: 0.9999990474067901, iteration: 404062
loss: 0.982539713382721,grad_norm: 0.759792239369287, iteration: 404063
loss: 0.9770906567573547,grad_norm: 0.8656227070007984, iteration: 404064
loss: 0.9976953268051147,grad_norm: 0.7521341654022444, iteration: 404065
loss: 1.0100363492965698,grad_norm: 0.7115134717427829, iteration: 404066
loss: 0.9979050159454346,grad_norm: 0.725660053120342, iteration: 404067
loss: 0.9798417091369629,grad_norm: 0.7026645847212972, iteration: 404068
loss: 0.9597228765487671,grad_norm: 0.7815232738225973, iteration: 404069
loss: 1.1346300840377808,grad_norm: 0.9999996663227358, iteration: 404070
loss: 1.0140678882598877,grad_norm: 0.7514341116233803, iteration: 404071
loss: 1.0254433155059814,grad_norm: 0.9142882424792469, iteration: 404072
loss: 0.98047935962677,grad_norm: 0.7807670289092998, iteration: 404073
loss: 1.029312014579773,grad_norm: 0.8191445531030751, iteration: 404074
loss: 1.0010559558868408,grad_norm: 0.9999994762134824, iteration: 404075
loss: 0.999783992767334,grad_norm: 0.9999996443423259, iteration: 404076
loss: 1.0371760129928589,grad_norm: 0.9999999954900578, iteration: 404077
loss: 0.9993518590927124,grad_norm: 0.999999355208882, iteration: 404078
loss: 0.9952694773674011,grad_norm: 0.7599743155934411, iteration: 404079
loss: 1.0003196001052856,grad_norm: 0.762451407470568, iteration: 404080
loss: 1.0378190279006958,grad_norm: 1.0000000137029945, iteration: 404081
loss: 0.9669696688652039,grad_norm: 0.8340053145725174, iteration: 404082
loss: 1.0417672395706177,grad_norm: 0.8337964234058228, iteration: 404083
loss: 1.0177749395370483,grad_norm: 0.7241751996793241, iteration: 404084
loss: 0.9510486721992493,grad_norm: 0.7962583633314484, iteration: 404085
loss: 1.0023895502090454,grad_norm: 0.8450177340112772, iteration: 404086
loss: 1.0651237964630127,grad_norm: 0.9999991719448781, iteration: 404087
loss: 1.0070955753326416,grad_norm: 0.7456669969578221, iteration: 404088
loss: 1.020612120628357,grad_norm: 0.7399283015380287, iteration: 404089
loss: 1.002700686454773,grad_norm: 0.8122854044610571, iteration: 404090
loss: 0.9740199446678162,grad_norm: 0.7306056034288073, iteration: 404091
loss: 0.9932148456573486,grad_norm: 0.7177172488610609, iteration: 404092
loss: 1.094887137413025,grad_norm: 0.9999998417576192, iteration: 404093
loss: 0.9773434996604919,grad_norm: 0.746766552194006, iteration: 404094
loss: 1.0227280855178833,grad_norm: 0.7699483921463756, iteration: 404095
loss: 0.947533905506134,grad_norm: 0.8575884192302626, iteration: 404096
loss: 1.0034267902374268,grad_norm: 0.8352298966658452, iteration: 404097
loss: 0.9956753849983215,grad_norm: 0.9871945512785968, iteration: 404098
loss: 0.9944588541984558,grad_norm: 0.9999991605086571, iteration: 404099
loss: 1.0172938108444214,grad_norm: 0.9654585759067031, iteration: 404100
loss: 1.0193188190460205,grad_norm: 0.7894091479271619, iteration: 404101
loss: 1.0016283988952637,grad_norm: 0.6859156975844868, iteration: 404102
loss: 0.9930403828620911,grad_norm: 0.8399738591754012, iteration: 404103
loss: 1.0287175178527832,grad_norm: 0.9812064525929992, iteration: 404104
loss: 1.0108023881912231,grad_norm: 0.8531452842382092, iteration: 404105
loss: 1.0296339988708496,grad_norm: 0.9999989660635071, iteration: 404106
loss: 0.9932302236557007,grad_norm: 0.9208989612383343, iteration: 404107
loss: 0.9824373722076416,grad_norm: 0.8656552487152576, iteration: 404108
loss: 0.9701122641563416,grad_norm: 0.8677895061517246, iteration: 404109
loss: 1.0825212001800537,grad_norm: 0.9150166137476918, iteration: 404110
loss: 1.0079002380371094,grad_norm: 0.8547917401773089, iteration: 404111
loss: 1.0106531381607056,grad_norm: 0.9204858567546955, iteration: 404112
loss: 1.0001509189605713,grad_norm: 0.8165445376262281, iteration: 404113
loss: 0.9959043264389038,grad_norm: 0.7008053687442558, iteration: 404114
loss: 1.0184110403060913,grad_norm: 0.8709526657083101, iteration: 404115
loss: 1.0052733421325684,grad_norm: 0.8551000646705881, iteration: 404116
loss: 1.0854930877685547,grad_norm: 0.9999994240217193, iteration: 404117
loss: 0.9946222305297852,grad_norm: 0.8373584475156086, iteration: 404118
loss: 0.9866159558296204,grad_norm: 0.8362862819686949, iteration: 404119
loss: 0.9473878741264343,grad_norm: 0.8744811618873419, iteration: 404120
loss: 1.0105820894241333,grad_norm: 0.8968164318850594, iteration: 404121
loss: 1.0023703575134277,grad_norm: 0.8605042505399045, iteration: 404122
loss: 1.0106931924819946,grad_norm: 0.9999994670171511, iteration: 404123
loss: 0.962319552898407,grad_norm: 0.77900145776309, iteration: 404124
loss: 0.9730921387672424,grad_norm: 0.7214898144929488, iteration: 404125
loss: 1.0046393871307373,grad_norm: 0.8947417322465162, iteration: 404126
loss: 1.0607714653015137,grad_norm: 0.9999992631225276, iteration: 404127
loss: 0.9707416892051697,grad_norm: 0.7816223746605535, iteration: 404128
loss: 1.0310826301574707,grad_norm: 0.7883395486743248, iteration: 404129
loss: 0.9957493543624878,grad_norm: 0.9400485238362745, iteration: 404130
loss: 0.988537073135376,grad_norm: 0.8114897061914691, iteration: 404131
loss: 1.0003161430358887,grad_norm: 0.7446419141639017, iteration: 404132
loss: 1.0070208311080933,grad_norm: 0.7485955440002544, iteration: 404133
loss: 1.013312816619873,grad_norm: 0.8394718243750033, iteration: 404134
loss: 0.96065753698349,grad_norm: 0.6502764428182745, iteration: 404135
loss: 1.0030252933502197,grad_norm: 0.7574348811831737, iteration: 404136
loss: 1.0297260284423828,grad_norm: 0.8400368401117699, iteration: 404137
loss: 1.0105032920837402,grad_norm: 0.6977549393233305, iteration: 404138
loss: 1.014346957206726,grad_norm: 0.7577419565231908, iteration: 404139
loss: 0.9583591222763062,grad_norm: 0.7180927359003108, iteration: 404140
loss: 1.0852664709091187,grad_norm: 0.9999991583365506, iteration: 404141
loss: 1.0145374536514282,grad_norm: 0.7690707495575784, iteration: 404142
loss: 1.0383108854293823,grad_norm: 0.8087180983784752, iteration: 404143
loss: 1.0053257942199707,grad_norm: 0.7787890378305155, iteration: 404144
loss: 0.9915181398391724,grad_norm: 0.869328769428839, iteration: 404145
loss: 1.011128544807434,grad_norm: 0.8224006151607469, iteration: 404146
loss: 0.9994133114814758,grad_norm: 0.9999991621606112, iteration: 404147
loss: 0.9687920212745667,grad_norm: 0.7890859437875019, iteration: 404148
loss: 0.9650378227233887,grad_norm: 0.7676805315997095, iteration: 404149
loss: 1.0163804292678833,grad_norm: 0.7806146647711052, iteration: 404150
loss: 1.038773536682129,grad_norm: 0.857053181303297, iteration: 404151
loss: 0.9700499773025513,grad_norm: 0.8421385773454217, iteration: 404152
loss: 1.0059548616409302,grad_norm: 0.7081616049843169, iteration: 404153
loss: 0.9748147130012512,grad_norm: 0.9123100237870339, iteration: 404154
loss: 0.9961555600166321,grad_norm: 0.6696748197212201, iteration: 404155
loss: 0.9965773224830627,grad_norm: 0.7254760653611528, iteration: 404156
loss: 1.023555874824524,grad_norm: 0.7779971838096879, iteration: 404157
loss: 0.9969611167907715,grad_norm: 0.8087201702920106, iteration: 404158
loss: 1.0219907760620117,grad_norm: 0.9171756016340508, iteration: 404159
loss: 0.9881330728530884,grad_norm: 0.8631536048468079, iteration: 404160
loss: 1.0008419752120972,grad_norm: 0.8132201454174501, iteration: 404161
loss: 1.0028175115585327,grad_norm: 0.9999999115758549, iteration: 404162
loss: 0.9947711825370789,grad_norm: 0.7851998296998882, iteration: 404163
loss: 1.0207793712615967,grad_norm: 0.7993604117207128, iteration: 404164
loss: 1.0468353033065796,grad_norm: 0.9999993745671446, iteration: 404165
loss: 1.0531035661697388,grad_norm: 0.7918272221402666, iteration: 404166
loss: 0.9728274345397949,grad_norm: 0.941958011360566, iteration: 404167
loss: 1.016946792602539,grad_norm: 0.758804711894373, iteration: 404168
loss: 1.0092527866363525,grad_norm: 0.6741474836046164, iteration: 404169
loss: 0.9712107181549072,grad_norm: 0.7120911883878842, iteration: 404170
loss: 0.9892014265060425,grad_norm: 0.7578248540141778, iteration: 404171
loss: 0.9881396293640137,grad_norm: 0.6459515108241115, iteration: 404172
loss: 1.0430196523666382,grad_norm: 0.9999993095652828, iteration: 404173
loss: 1.0259838104248047,grad_norm: 0.657273465709671, iteration: 404174
loss: 1.0252662897109985,grad_norm: 0.6886812467823635, iteration: 404175
loss: 0.9871501326560974,grad_norm: 0.9999999681162012, iteration: 404176
loss: 0.984705924987793,grad_norm: 0.8252807013221024, iteration: 404177
loss: 1.0411062240600586,grad_norm: 0.8702603781309937, iteration: 404178
loss: 0.997042179107666,grad_norm: 0.9999992917812924, iteration: 404179
loss: 0.990444540977478,grad_norm: 0.9589531425626551, iteration: 404180
loss: 0.9887006282806396,grad_norm: 0.7001269300673651, iteration: 404181
loss: 1.0083991289138794,grad_norm: 0.999999182199366, iteration: 404182
loss: 1.0106847286224365,grad_norm: 0.99999990618656, iteration: 404183
loss: 0.9801698327064514,grad_norm: 0.8709982043686979, iteration: 404184
loss: 1.0124835968017578,grad_norm: 0.836378593048857, iteration: 404185
loss: 1.014582633972168,grad_norm: 0.7197563603609124, iteration: 404186
loss: 0.9844972491264343,grad_norm: 0.817258480416672, iteration: 404187
loss: 1.0405879020690918,grad_norm: 0.8574113059692121, iteration: 404188
loss: 1.0585815906524658,grad_norm: 0.9831940242360181, iteration: 404189
loss: 1.0031988620758057,grad_norm: 0.8324515846571564, iteration: 404190
loss: 1.021427869796753,grad_norm: 0.6676486513545474, iteration: 404191
loss: 1.0123703479766846,grad_norm: 0.7683482164465457, iteration: 404192
loss: 1.0126665830612183,grad_norm: 0.7093570211370285, iteration: 404193
loss: 1.0778858661651611,grad_norm: 0.9992507259581876, iteration: 404194
loss: 1.195657730102539,grad_norm: 0.9999994691227264, iteration: 404195
loss: 0.9660565257072449,grad_norm: 0.926341361087252, iteration: 404196
loss: 0.9928932785987854,grad_norm: 0.6958732980187813, iteration: 404197
loss: 0.9758761525154114,grad_norm: 0.9013648143742067, iteration: 404198
loss: 0.9816904664039612,grad_norm: 0.8358426646435637, iteration: 404199
loss: 1.0281544923782349,grad_norm: 0.8398446869440636, iteration: 404200
loss: 1.0590550899505615,grad_norm: 0.81583117339103, iteration: 404201
loss: 1.0016252994537354,grad_norm: 0.7958166165098406, iteration: 404202
loss: 1.0002129077911377,grad_norm: 0.887522011710816, iteration: 404203
loss: 1.0565910339355469,grad_norm: 0.9171542031415062, iteration: 404204
loss: 1.0075289011001587,grad_norm: 0.9788673171928558, iteration: 404205
loss: 1.031130313873291,grad_norm: 0.8336507622779837, iteration: 404206
loss: 1.0078333616256714,grad_norm: 0.8405439128872647, iteration: 404207
loss: 1.0372873544692993,grad_norm: 0.7756435447405093, iteration: 404208
loss: 1.013067603111267,grad_norm: 0.9999991140001732, iteration: 404209
loss: 0.9886560440063477,grad_norm: 0.8050625126518643, iteration: 404210
loss: 1.016572117805481,grad_norm: 0.687904463679706, iteration: 404211
loss: 0.9915438294410706,grad_norm: 0.785650165479718, iteration: 404212
loss: 1.0185292959213257,grad_norm: 0.722344464380686, iteration: 404213
loss: 1.2865792512893677,grad_norm: 0.9999999047467524, iteration: 404214
loss: 1.0017894506454468,grad_norm: 0.7770378135836157, iteration: 404215
loss: 1.0108083486557007,grad_norm: 0.8949224363472987, iteration: 404216
loss: 0.9816438555717468,grad_norm: 0.6842739875106827, iteration: 404217
loss: 1.0098360776901245,grad_norm: 0.8897742709431005, iteration: 404218
loss: 0.9937747120857239,grad_norm: 0.7902659292075374, iteration: 404219
loss: 1.0107561349868774,grad_norm: 0.8104958459503705, iteration: 404220
loss: 0.9912661910057068,grad_norm: 0.6666044163176998, iteration: 404221
loss: 0.9757622480392456,grad_norm: 0.7403580084932398, iteration: 404222
loss: 1.0088520050048828,grad_norm: 0.8852901113310581, iteration: 404223
loss: 1.0273737907409668,grad_norm: 0.9999993181548608, iteration: 404224
loss: 1.0700687170028687,grad_norm: 0.8727702163912818, iteration: 404225
loss: 1.0204403400421143,grad_norm: 0.804224333707633, iteration: 404226
loss: 0.9886015057563782,grad_norm: 0.7371459289675396, iteration: 404227
loss: 1.0170667171478271,grad_norm: 0.9055260762919957, iteration: 404228
loss: 0.9863231182098389,grad_norm: 0.6889798164358442, iteration: 404229
loss: 1.0354489088058472,grad_norm: 0.9999998323508644, iteration: 404230
loss: 1.0072239637374878,grad_norm: 0.7709102374117472, iteration: 404231
loss: 1.015537142753601,grad_norm: 0.8433198143405023, iteration: 404232
loss: 1.0790008306503296,grad_norm: 0.6472446244454817, iteration: 404233
loss: 1.0368194580078125,grad_norm: 0.9863221199910498, iteration: 404234
loss: 0.9959402680397034,grad_norm: 0.7028657072938557, iteration: 404235
loss: 1.04581880569458,grad_norm: 0.6992541707138435, iteration: 404236
loss: 1.019673466682434,grad_norm: 0.999999878798235, iteration: 404237
loss: 1.003301739692688,grad_norm: 0.7893092211047231, iteration: 404238
loss: 0.9726828932762146,grad_norm: 0.8759420594371847, iteration: 404239
loss: 0.9644188284873962,grad_norm: 0.7511844871664919, iteration: 404240
loss: 0.9892924427986145,grad_norm: 0.8038646635023616, iteration: 404241
loss: 1.0416141748428345,grad_norm: 0.9999992988499572, iteration: 404242
loss: 1.0532995462417603,grad_norm: 0.6972107440474585, iteration: 404243
loss: 0.9577109217643738,grad_norm: 0.8164079532941232, iteration: 404244
loss: 1.0525439977645874,grad_norm: 0.9970013407167816, iteration: 404245
loss: 1.1019680500030518,grad_norm: 0.8201170166154748, iteration: 404246
loss: 0.977571427822113,grad_norm: 0.7235544208877517, iteration: 404247
loss: 0.999918520450592,grad_norm: 0.8536088455084644, iteration: 404248
loss: 1.0475718975067139,grad_norm: 0.7325709253394609, iteration: 404249
loss: 1.0066781044006348,grad_norm: 0.7179764519020733, iteration: 404250
loss: 0.9558996558189392,grad_norm: 0.7226529277073371, iteration: 404251
loss: 0.9866936206817627,grad_norm: 0.8134230693185226, iteration: 404252
loss: 1.0179963111877441,grad_norm: 0.7107891458363446, iteration: 404253
loss: 0.9866629838943481,grad_norm: 0.9999991689412346, iteration: 404254
loss: 0.967764675617218,grad_norm: 0.8041487667706193, iteration: 404255
loss: 0.990054190158844,grad_norm: 0.7556464912110193, iteration: 404256
loss: 0.9875432252883911,grad_norm: 0.7243811177100442, iteration: 404257
loss: 0.9611621499061584,grad_norm: 0.8131288248913939, iteration: 404258
loss: 1.018173336982727,grad_norm: 0.792925492322235, iteration: 404259
loss: 1.0320347547531128,grad_norm: 0.9999992410740796, iteration: 404260
loss: 1.0507662296295166,grad_norm: 0.9652964315024131, iteration: 404261
loss: 1.0206135511398315,grad_norm: 0.8119495075671664, iteration: 404262
loss: 1.025707721710205,grad_norm: 0.6660479561388065, iteration: 404263
loss: 0.9628512859344482,grad_norm: 0.6484580258699119, iteration: 404264
loss: 1.0899662971496582,grad_norm: 0.9999995488244128, iteration: 404265
loss: 0.9987190365791321,grad_norm: 0.9999993714633553, iteration: 404266
loss: 0.9703739285469055,grad_norm: 0.6883833175319034, iteration: 404267
loss: 1.0077056884765625,grad_norm: 0.8083756758587307, iteration: 404268
loss: 0.996837854385376,grad_norm: 0.6478242222222791, iteration: 404269
loss: 0.9838001132011414,grad_norm: 0.830465171573213, iteration: 404270
loss: 1.1441034078598022,grad_norm: 1.0000001012824158, iteration: 404271
loss: 0.9868230819702148,grad_norm: 0.8060717817693832, iteration: 404272
loss: 1.0117639303207397,grad_norm: 0.6790719309515088, iteration: 404273
loss: 0.9820183515548706,grad_norm: 0.999999106589902, iteration: 404274
loss: 0.9783053398132324,grad_norm: 0.8603629218505157, iteration: 404275
loss: 1.017354965209961,grad_norm: 0.7991508517757674, iteration: 404276
loss: 1.0029900074005127,grad_norm: 0.9863724406080405, iteration: 404277
loss: 1.0368702411651611,grad_norm: 0.717567533890392, iteration: 404278
loss: 0.998742401599884,grad_norm: 0.9999995791554773, iteration: 404279
loss: 0.9752777218818665,grad_norm: 0.8300563704426878, iteration: 404280
loss: 0.9724498391151428,grad_norm: 0.6941868892882366, iteration: 404281
loss: 1.0145467519760132,grad_norm: 0.9274395667359316, iteration: 404282
loss: 1.0789369344711304,grad_norm: 0.8798782279545998, iteration: 404283
loss: 1.0552035570144653,grad_norm: 0.9413075383098299, iteration: 404284
loss: 1.0656757354736328,grad_norm: 0.9999995507410355, iteration: 404285
loss: 0.9870986342430115,grad_norm: 0.955748068147167, iteration: 404286
loss: 0.9930675029754639,grad_norm: 0.9999998473804081, iteration: 404287
loss: 1.0776965618133545,grad_norm: 0.940483276477676, iteration: 404288
loss: 1.0528545379638672,grad_norm: 0.8489503664106853, iteration: 404289
loss: 1.0101609230041504,grad_norm: 0.9412505554663428, iteration: 404290
loss: 1.1076980829238892,grad_norm: 1.0000000098040134, iteration: 404291
loss: 1.0282297134399414,grad_norm: 0.9208654352397015, iteration: 404292
loss: 0.9836492538452148,grad_norm: 0.9999998011713752, iteration: 404293
loss: 1.0220266580581665,grad_norm: 0.7655056691539959, iteration: 404294
loss: 1.0120606422424316,grad_norm: 0.9999991123929515, iteration: 404295
loss: 1.021317958831787,grad_norm: 0.7980013153753547, iteration: 404296
loss: 0.9894254803657532,grad_norm: 0.6885770386371511, iteration: 404297
loss: 0.9501433968544006,grad_norm: 0.87496492664367, iteration: 404298
loss: 1.1040947437286377,grad_norm: 0.8760289815139867, iteration: 404299
loss: 0.9901718497276306,grad_norm: 0.8559789527427815, iteration: 404300
loss: 1.0167135000228882,grad_norm: 0.81735078180799, iteration: 404301
loss: 1.0540367364883423,grad_norm: 0.9999999614031387, iteration: 404302
loss: 0.9798579812049866,grad_norm: 0.6794225439357636, iteration: 404303
loss: 1.0129204988479614,grad_norm: 0.7134088004825875, iteration: 404304
loss: 0.9929658770561218,grad_norm: 0.6997701426879493, iteration: 404305
loss: 0.9825924038887024,grad_norm: 0.7297857941713279, iteration: 404306
loss: 1.0114829540252686,grad_norm: 0.857839107135218, iteration: 404307
loss: 1.0137568712234497,grad_norm: 0.862846498923287, iteration: 404308
loss: 1.0738766193389893,grad_norm: 0.9999997583437785, iteration: 404309
loss: 1.0163251161575317,grad_norm: 0.7702058951938837, iteration: 404310
loss: 1.0178422927856445,grad_norm: 0.765325244314421, iteration: 404311
loss: 1.0091768503189087,grad_norm: 0.8163416065251158, iteration: 404312
loss: 1.0174586772918701,grad_norm: 0.999999096239954, iteration: 404313
loss: 0.9863536953926086,grad_norm: 0.7837604406358892, iteration: 404314
loss: 1.0240329504013062,grad_norm: 0.8645435359900431, iteration: 404315
loss: 1.0219372510910034,grad_norm: 0.9999991819930214, iteration: 404316
loss: 0.984469473361969,grad_norm: 0.9999993058847725, iteration: 404317
loss: 1.1062541007995605,grad_norm: 0.9313691175961063, iteration: 404318
loss: 1.086251139640808,grad_norm: 0.9999993295414366, iteration: 404319
loss: 0.982030987739563,grad_norm: 0.7577170333057901, iteration: 404320
loss: 1.0166016817092896,grad_norm: 0.656267760438901, iteration: 404321
loss: 0.9983569383621216,grad_norm: 0.9999993234540301, iteration: 404322
loss: 0.9962373375892639,grad_norm: 0.7647509609009044, iteration: 404323
loss: 0.9886708855628967,grad_norm: 0.8177118998188964, iteration: 404324
loss: 1.0067120790481567,grad_norm: 0.999999679703052, iteration: 404325
loss: 1.0163193941116333,grad_norm: 0.7579155242346748, iteration: 404326
loss: 0.991736114025116,grad_norm: 0.7832359482502458, iteration: 404327
loss: 1.0810537338256836,grad_norm: 0.9999997168818326, iteration: 404328
loss: 1.004624366760254,grad_norm: 0.9999996677006011, iteration: 404329
loss: 1.0304763317108154,grad_norm: 0.7431065597621758, iteration: 404330
loss: 0.9718697667121887,grad_norm: 0.75565710370549, iteration: 404331
loss: 1.0098235607147217,grad_norm: 0.8420017314149957, iteration: 404332
loss: 1.0197051763534546,grad_norm: 0.7626923323066704, iteration: 404333
loss: 0.9913086295127869,grad_norm: 0.7762561141480177, iteration: 404334
loss: 0.987389862537384,grad_norm: 0.7307829121713615, iteration: 404335
loss: 1.0332930088043213,grad_norm: 0.9999990699805834, iteration: 404336
loss: 0.9635862112045288,grad_norm: 0.6696374919693092, iteration: 404337
loss: 1.161213755607605,grad_norm: 0.999999378546988, iteration: 404338
loss: 1.0477707386016846,grad_norm: 0.8441281349603992, iteration: 404339
loss: 1.0056294202804565,grad_norm: 0.9642485565404938, iteration: 404340
loss: 0.9787238240242004,grad_norm: 0.8648347470080936, iteration: 404341
loss: 1.0550146102905273,grad_norm: 0.9999992560461111, iteration: 404342
loss: 1.0017110109329224,grad_norm: 0.9999995720053492, iteration: 404343
loss: 0.999005138874054,grad_norm: 0.8582916875416278, iteration: 404344
loss: 0.9677684307098389,grad_norm: 0.9842937218426068, iteration: 404345
loss: 0.9776255488395691,grad_norm: 0.9439571173334516, iteration: 404346
loss: 1.0345853567123413,grad_norm: 0.9999993143941172, iteration: 404347
loss: 1.2242364883422852,grad_norm: 0.9999996643996457, iteration: 404348
loss: 1.025009274482727,grad_norm: 0.8659469756034134, iteration: 404349
loss: 1.1443020105361938,grad_norm: 0.9125464547464419, iteration: 404350
loss: 0.9536445736885071,grad_norm: 0.8410474624399402, iteration: 404351
loss: 0.9611844420433044,grad_norm: 0.738837113841407, iteration: 404352
loss: 1.0460225343704224,grad_norm: 0.9265623823310096, iteration: 404353
loss: 1.0313758850097656,grad_norm: 0.9999990580004855, iteration: 404354
loss: 1.0144141912460327,grad_norm: 0.7445903222418788, iteration: 404355
loss: 1.101858377456665,grad_norm: 0.8474374554446953, iteration: 404356
loss: 1.0957512855529785,grad_norm: 0.9999991544684445, iteration: 404357
loss: 1.0076556205749512,grad_norm: 0.778339573777342, iteration: 404358
loss: 1.0643864870071411,grad_norm: 0.9999994407628393, iteration: 404359
loss: 0.9637869596481323,grad_norm: 0.7992866899887547, iteration: 404360
loss: 1.1204475164413452,grad_norm: 0.9999998651164583, iteration: 404361
loss: 0.9844933748245239,grad_norm: 0.9685512243251779, iteration: 404362
loss: 1.1265010833740234,grad_norm: 0.9999993435247198, iteration: 404363
loss: 0.9712768793106079,grad_norm: 0.9999990754140802, iteration: 404364
loss: 0.9627531170845032,grad_norm: 0.9999990842743142, iteration: 404365
loss: 1.043564796447754,grad_norm: 0.8143221326302292, iteration: 404366
loss: 0.9615074992179871,grad_norm: 0.7503856833417982, iteration: 404367
loss: 1.0145423412322998,grad_norm: 0.9999992017433896, iteration: 404368
loss: 1.1199322938919067,grad_norm: 0.9999991379036594, iteration: 404369
loss: 1.032333254814148,grad_norm: 0.8788282233058038, iteration: 404370
loss: 0.9966609477996826,grad_norm: 0.9999996664056912, iteration: 404371
loss: 0.9810653924942017,grad_norm: 0.8412915414495148, iteration: 404372
loss: 0.9942352175712585,grad_norm: 0.7567033290260958, iteration: 404373
loss: 0.9948228001594543,grad_norm: 0.9999993843241826, iteration: 404374
loss: 1.0042344331741333,grad_norm: 0.6786068622860719, iteration: 404375
loss: 1.038720726966858,grad_norm: 0.8355412474110635, iteration: 404376
loss: 1.0166184902191162,grad_norm: 0.9999994892210344, iteration: 404377
loss: 0.9982882142066956,grad_norm: 0.6845174264889188, iteration: 404378
loss: 1.154826045036316,grad_norm: 0.9999998703946658, iteration: 404379
loss: 1.0061211585998535,grad_norm: 0.8289351020720398, iteration: 404380
loss: 1.0327539443969727,grad_norm: 0.8939144474587334, iteration: 404381
loss: 0.9910879135131836,grad_norm: 0.8534524511541063, iteration: 404382
loss: 1.0531760454177856,grad_norm: 0.9999998269874086, iteration: 404383
loss: 0.994055986404419,grad_norm: 0.9432661853563099, iteration: 404384
loss: 0.9682806134223938,grad_norm: 0.6851197240174454, iteration: 404385
loss: 1.0437277555465698,grad_norm: 0.8288941054171893, iteration: 404386
loss: 1.0204614400863647,grad_norm: 0.9999991131096295, iteration: 404387
loss: 1.0587025880813599,grad_norm: 0.9317572472729407, iteration: 404388
loss: 1.0066988468170166,grad_norm: 0.8374972895612254, iteration: 404389
loss: 0.9296261668205261,grad_norm: 0.7774577424155608, iteration: 404390
loss: 1.0274606943130493,grad_norm: 0.7041848104581567, iteration: 404391
loss: 1.0313557386398315,grad_norm: 0.8911811831924156, iteration: 404392
loss: 1.0597769021987915,grad_norm: 0.8951415990861968, iteration: 404393
loss: 1.1557300090789795,grad_norm: 0.9999997674901495, iteration: 404394
loss: 1.02019202709198,grad_norm: 0.7592183897700338, iteration: 404395
loss: 1.0227829217910767,grad_norm: 0.7148964452402164, iteration: 404396
loss: 1.0092909336090088,grad_norm: 0.9416778264172107, iteration: 404397
loss: 1.0617561340332031,grad_norm: 0.9999995247968587, iteration: 404398
loss: 0.9846953749656677,grad_norm: 0.7766697230722167, iteration: 404399
loss: 1.0062791109085083,grad_norm: 0.9999993594593394, iteration: 404400
loss: 1.0727369785308838,grad_norm: 0.9999994347634039, iteration: 404401
loss: 0.9925422072410583,grad_norm: 0.9999997097608565, iteration: 404402
loss: 1.014302134513855,grad_norm: 0.9999998084824264, iteration: 404403
loss: 0.99127197265625,grad_norm: 0.9999996306126365, iteration: 404404
loss: 1.0212119817733765,grad_norm: 0.9999994298425278, iteration: 404405
loss: 1.1772230863571167,grad_norm: 0.9999998967534444, iteration: 404406
loss: 1.0667784214019775,grad_norm: 0.7489456816185329, iteration: 404407
loss: 0.9899457097053528,grad_norm: 0.9999991925319733, iteration: 404408
loss: 1.0560941696166992,grad_norm: 0.8876761952377809, iteration: 404409
loss: 1.0530997514724731,grad_norm: 0.9999998860602647, iteration: 404410
loss: 0.9896948337554932,grad_norm: 0.8422493150055907, iteration: 404411
loss: 1.0626529455184937,grad_norm: 0.9999996314645281, iteration: 404412
loss: 1.0025233030319214,grad_norm: 0.8658205029495571, iteration: 404413
loss: 0.9958983063697815,grad_norm: 0.7581679732478874, iteration: 404414
loss: 1.0704470872879028,grad_norm: 0.7243946797248156, iteration: 404415
loss: 1.0124198198318481,grad_norm: 0.9999994850249403, iteration: 404416
loss: 1.0382667779922485,grad_norm: 0.8968123789642958, iteration: 404417
loss: 0.986549437046051,grad_norm: 0.8794961164161971, iteration: 404418
loss: 1.0344266891479492,grad_norm: 0.8557862025842194, iteration: 404419
loss: 1.0078203678131104,grad_norm: 0.9999994685924859, iteration: 404420
loss: 1.1318358182907104,grad_norm: 0.9999995253569283, iteration: 404421
loss: 1.1256706714630127,grad_norm: 0.9999996085603915, iteration: 404422
loss: 0.983647346496582,grad_norm: 0.8376739405265715, iteration: 404423
loss: 1.0178465843200684,grad_norm: 0.9038286814010305, iteration: 404424
loss: 1.0353024005889893,grad_norm: 0.8908308832673891, iteration: 404425
loss: 0.9997302293777466,grad_norm: 0.9999999429789538, iteration: 404426
loss: 1.2179163694381714,grad_norm: 0.9999994408467915, iteration: 404427
loss: 0.9958363175392151,grad_norm: 0.9999995956527139, iteration: 404428
loss: 1.096089482307434,grad_norm: 0.999999322551265, iteration: 404429
loss: 1.0281509160995483,grad_norm: 0.9999992236656515, iteration: 404430
loss: 1.0077439546585083,grad_norm: 0.9999998590022956, iteration: 404431
loss: 1.0650557279586792,grad_norm: 0.9999993087434773, iteration: 404432
loss: 1.0955291986465454,grad_norm: 0.9999998056370103, iteration: 404433
loss: 1.0313278436660767,grad_norm: 0.9999993208579933, iteration: 404434
loss: 1.025532841682434,grad_norm: 0.850015222425835, iteration: 404435
loss: 1.0298362970352173,grad_norm: 0.8531764214535739, iteration: 404436
loss: 1.207113265991211,grad_norm: 0.999999532643192, iteration: 404437
loss: 1.0382627248764038,grad_norm: 0.9833956286296883, iteration: 404438
loss: 0.991081178188324,grad_norm: 0.9999995969035606, iteration: 404439
loss: 0.9886699318885803,grad_norm: 0.7971385328062267, iteration: 404440
loss: 1.0524450540542603,grad_norm: 0.883723870205177, iteration: 404441
loss: 1.0939704179763794,grad_norm: 0.9999991771959675, iteration: 404442
loss: 1.0650321245193481,grad_norm: 0.9999995068534251, iteration: 404443
loss: 1.08163321018219,grad_norm: 0.9999994763590354, iteration: 404444
loss: 1.0069794654846191,grad_norm: 0.8164835635484283, iteration: 404445
loss: 1.0722088813781738,grad_norm: 0.9660126347533422, iteration: 404446
loss: 1.0670428276062012,grad_norm: 0.9999995191258102, iteration: 404447
loss: 1.0658053159713745,grad_norm: 0.879886747267727, iteration: 404448
loss: 1.0229785442352295,grad_norm: 0.8384813790844006, iteration: 404449
loss: 1.001703143119812,grad_norm: 0.9999995364722101, iteration: 404450
loss: 0.9746087193489075,grad_norm: 0.8059115490268093, iteration: 404451
loss: 1.0819989442825317,grad_norm: 0.99999925624303, iteration: 404452
loss: 1.1257069110870361,grad_norm: 0.9999997613251597, iteration: 404453
loss: 1.083151936531067,grad_norm: 0.9999999138523202, iteration: 404454
loss: 1.0234001874923706,grad_norm: 0.8020151204705871, iteration: 404455
loss: 0.9837651252746582,grad_norm: 0.7833929237407422, iteration: 404456
loss: 1.0989375114440918,grad_norm: 0.9999993633897636, iteration: 404457
loss: 1.0333133935928345,grad_norm: 0.9161903740438738, iteration: 404458
loss: 1.050399661064148,grad_norm: 0.9417204012427638, iteration: 404459
loss: 1.086771011352539,grad_norm: 0.9999998254701857, iteration: 404460
loss: 1.0037051439285278,grad_norm: 0.7704362983529789, iteration: 404461
loss: 1.1078894138336182,grad_norm: 0.9999994036157505, iteration: 404462
loss: 1.0282784700393677,grad_norm: 0.9146647012874061, iteration: 404463
loss: 1.0129172801971436,grad_norm: 0.9741876107544692, iteration: 404464
loss: 1.0401819944381714,grad_norm: 0.9999999173979902, iteration: 404465
loss: 1.0013891458511353,grad_norm: 0.9999996285762511, iteration: 404466
loss: 0.9676654934883118,grad_norm: 0.8892596184260169, iteration: 404467
loss: 0.9801853895187378,grad_norm: 0.999999266326156, iteration: 404468
loss: 1.0445760488510132,grad_norm: 0.9537511068080453, iteration: 404469
loss: 1.0988719463348389,grad_norm: 0.9999998889734465, iteration: 404470
loss: 1.0055676698684692,grad_norm: 0.991671855157093, iteration: 404471
loss: 0.9857000708580017,grad_norm: 0.7567696286526476, iteration: 404472
loss: 1.0287425518035889,grad_norm: 0.8515597971087073, iteration: 404473
loss: 0.9673746824264526,grad_norm: 0.7270566638377737, iteration: 404474
loss: 1.0864322185516357,grad_norm: 0.9999995534672501, iteration: 404475
loss: 1.0114518404006958,grad_norm: 0.839865184159009, iteration: 404476
loss: 0.9992828965187073,grad_norm: 0.8205432080597084, iteration: 404477
loss: 0.9632619619369507,grad_norm: 0.7739485688498202, iteration: 404478
loss: 0.9940643310546875,grad_norm: 0.9999993710512395, iteration: 404479
loss: 1.1623162031173706,grad_norm: 0.999999781234778, iteration: 404480
loss: 0.9792799353599548,grad_norm: 0.8107798290856615, iteration: 404481
loss: 1.0025403499603271,grad_norm: 0.8402287186894022, iteration: 404482
loss: 0.9584141373634338,grad_norm: 0.6549780013577771, iteration: 404483
loss: 0.9623774886131287,grad_norm: 0.9080516018310217, iteration: 404484
loss: 1.0519293546676636,grad_norm: 0.8731296336118457, iteration: 404485
loss: 1.005814790725708,grad_norm: 0.7442832393022947, iteration: 404486
loss: 1.0624054670333862,grad_norm: 0.999999074992946, iteration: 404487
loss: 1.00144624710083,grad_norm: 0.8167471358477668, iteration: 404488
loss: 1.0956292152404785,grad_norm: 0.9999994873942807, iteration: 404489
loss: 1.0838948488235474,grad_norm: 0.9999990787321201, iteration: 404490
loss: 0.9802234768867493,grad_norm: 0.7579120662486318, iteration: 404491
loss: 1.0315444469451904,grad_norm: 0.9999989678822645, iteration: 404492
loss: 1.0502679347991943,grad_norm: 0.9999998766222891, iteration: 404493
loss: 0.9619494676589966,grad_norm: 0.839571888470174, iteration: 404494
loss: 0.9984850883483887,grad_norm: 0.8410221914001985, iteration: 404495
loss: 1.032333254814148,grad_norm: 0.9267173971436586, iteration: 404496
loss: 1.0185455083847046,grad_norm: 0.9999999152530077, iteration: 404497
loss: 0.9990501999855042,grad_norm: 0.7968716577807677, iteration: 404498
loss: 1.018565058708191,grad_norm: 0.8256029416644334, iteration: 404499
loss: 0.9949349761009216,grad_norm: 0.999999798408032, iteration: 404500
loss: 0.9943192005157471,grad_norm: 0.9999996757075286, iteration: 404501
loss: 0.998240053653717,grad_norm: 0.8728155132111585, iteration: 404502
loss: 1.0148522853851318,grad_norm: 0.9999993308698886, iteration: 404503
loss: 1.0492005348205566,grad_norm: 0.9999996139244062, iteration: 404504
loss: 1.0645575523376465,grad_norm: 0.9999996004005803, iteration: 404505
loss: 1.0365242958068848,grad_norm: 0.9999993718433134, iteration: 404506
loss: 0.9971699118614197,grad_norm: 0.7495515534195781, iteration: 404507
loss: 1.0276458263397217,grad_norm: 0.7851489485075698, iteration: 404508
loss: 0.9952538013458252,grad_norm: 0.7201052851163839, iteration: 404509
loss: 0.9747694730758667,grad_norm: 0.9999991377410984, iteration: 404510
loss: 1.0068919658660889,grad_norm: 0.7588623631079575, iteration: 404511
loss: 1.0422905683517456,grad_norm: 0.9999997924615582, iteration: 404512
loss: 0.9637923836708069,grad_norm: 0.755507540424375, iteration: 404513
loss: 1.117737054824829,grad_norm: 0.8547972589652758, iteration: 404514
loss: 0.9823365211486816,grad_norm: 0.6978375064485348, iteration: 404515
loss: 1.006722092628479,grad_norm: 0.9999999842338604, iteration: 404516
loss: 1.0251775979995728,grad_norm: 0.8658887356171798, iteration: 404517
loss: 1.0831385850906372,grad_norm: 0.9999991652111119, iteration: 404518
loss: 0.9375113844871521,grad_norm: 0.8287624115597529, iteration: 404519
loss: 1.0282073020935059,grad_norm: 0.9999989776389202, iteration: 404520
loss: 1.0060936212539673,grad_norm: 0.8935620505678552, iteration: 404521
loss: 0.9990227818489075,grad_norm: 0.9832744698899432, iteration: 404522
loss: 1.0226515531539917,grad_norm: 0.7356760162620352, iteration: 404523
loss: 1.0636459589004517,grad_norm: 1.000000058411753, iteration: 404524
loss: 1.0041429996490479,grad_norm: 0.7595226714444784, iteration: 404525
loss: 1.009676218032837,grad_norm: 0.7905867623748173, iteration: 404526
loss: 1.013086199760437,grad_norm: 0.6653768509497696, iteration: 404527
loss: 1.0381340980529785,grad_norm: 0.8738172618855067, iteration: 404528
loss: 0.9706924557685852,grad_norm: 0.7443863333625832, iteration: 404529
loss: 1.0650864839553833,grad_norm: 0.8890900108825414, iteration: 404530
loss: 0.9765921831130981,grad_norm: 0.7384741635968474, iteration: 404531
loss: 0.9899318218231201,grad_norm: 0.9999989554220468, iteration: 404532
loss: 1.0507941246032715,grad_norm: 0.8774586130405491, iteration: 404533
loss: 0.9995120763778687,grad_norm: 0.7054974942300026, iteration: 404534
loss: 1.0411145687103271,grad_norm: 0.9999996622882342, iteration: 404535
loss: 0.9847803711891174,grad_norm: 0.8201393067349663, iteration: 404536
loss: 1.2482737302780151,grad_norm: 0.9999994338845382, iteration: 404537
loss: 0.9809815883636475,grad_norm: 0.8299732738288789, iteration: 404538
loss: 1.0039008855819702,grad_norm: 0.9999990232118687, iteration: 404539
loss: 1.1475924253463745,grad_norm: 0.9999997044136086, iteration: 404540
loss: 1.0833771228790283,grad_norm: 0.8450718859567397, iteration: 404541
loss: 1.1085436344146729,grad_norm: 0.9644075290558098, iteration: 404542
loss: 0.9950498938560486,grad_norm: 0.740841950899085, iteration: 404543
loss: 1.1005666255950928,grad_norm: 0.9999995750974764, iteration: 404544
loss: 1.0061358213424683,grad_norm: 0.7698612046914055, iteration: 404545
loss: 1.063167691230774,grad_norm: 0.9999991587287727, iteration: 404546
loss: 1.0442265272140503,grad_norm: 0.999999440886123, iteration: 404547
loss: 1.0127290487289429,grad_norm: 0.6502652335482697, iteration: 404548
loss: 0.9751057028770447,grad_norm: 0.8032760274523746, iteration: 404549
loss: 1.054685354232788,grad_norm: 0.8252837597591801, iteration: 404550
loss: 1.0189608335494995,grad_norm: 0.6357404149307595, iteration: 404551
loss: 0.9786315560340881,grad_norm: 0.8229148166341496, iteration: 404552
loss: 1.2012641429901123,grad_norm: 0.9999997523922755, iteration: 404553
loss: 1.1910439729690552,grad_norm: 0.999999829184734, iteration: 404554
loss: 1.0215808153152466,grad_norm: 0.9999997913108981, iteration: 404555
loss: 1.0172191858291626,grad_norm: 0.7714094230097617, iteration: 404556
loss: 1.0179691314697266,grad_norm: 0.9999998076239136, iteration: 404557
loss: 1.0102102756500244,grad_norm: 0.8041039403995898, iteration: 404558
loss: 1.1299666166305542,grad_norm: 0.8917623050381606, iteration: 404559
loss: 1.0325217247009277,grad_norm: 0.999999226371102, iteration: 404560
loss: 1.0305521488189697,grad_norm: 0.7545030402257253, iteration: 404561
loss: 1.054722785949707,grad_norm: 0.7636347926763077, iteration: 404562
loss: 0.9984099268913269,grad_norm: 0.8708073799142756, iteration: 404563
loss: 1.0660982131958008,grad_norm: 0.9999991600273501, iteration: 404564
loss: 0.99562668800354,grad_norm: 0.6999477814261459, iteration: 404565
loss: 1.0481514930725098,grad_norm: 0.8457794240958072, iteration: 404566
loss: 1.0615689754486084,grad_norm: 0.9999994838203691, iteration: 404567
loss: 0.9782374501228333,grad_norm: 0.7812516131239572, iteration: 404568
loss: 1.0376530885696411,grad_norm: 0.999999151272943, iteration: 404569
loss: 1.1361427307128906,grad_norm: 0.9999998080941865, iteration: 404570
loss: 1.0163449048995972,grad_norm: 0.8541205070935054, iteration: 404571
loss: 1.022115707397461,grad_norm: 0.8031716893356889, iteration: 404572
loss: 0.9630943536758423,grad_norm: 0.7793001180367481, iteration: 404573
loss: 1.0416558980941772,grad_norm: 0.7238752145952819, iteration: 404574
loss: 1.031538724899292,grad_norm: 0.683834383820305, iteration: 404575
loss: 0.9938888549804688,grad_norm: 0.7607944544569009, iteration: 404576
loss: 1.0421115159988403,grad_norm: 0.7365329421554276, iteration: 404577
loss: 1.0611785650253296,grad_norm: 0.9656778487182843, iteration: 404578
loss: 1.0454331636428833,grad_norm: 0.9999998573969344, iteration: 404579
loss: 0.9900588393211365,grad_norm: 0.9999990737021084, iteration: 404580
loss: 1.0214804410934448,grad_norm: 0.7225292611310288, iteration: 404581
loss: 1.0052489042282104,grad_norm: 0.8583153338175369, iteration: 404582
loss: 1.001688838005066,grad_norm: 0.8143004929609752, iteration: 404583
loss: 1.0069174766540527,grad_norm: 0.726081081807609, iteration: 404584
loss: 1.1172491312026978,grad_norm: 0.9999997207718565, iteration: 404585
loss: 1.0198986530303955,grad_norm: 0.8376743325987684, iteration: 404586
loss: 1.0320342779159546,grad_norm: 0.9999991819830016, iteration: 404587
loss: 1.0021463632583618,grad_norm: 0.7899755372938259, iteration: 404588
loss: 0.9811406135559082,grad_norm: 0.7166763766518517, iteration: 404589
loss: 1.0144047737121582,grad_norm: 0.8155245506134853, iteration: 404590
loss: 1.112806797027588,grad_norm: 0.9714949981790691, iteration: 404591
loss: 0.9936096668243408,grad_norm: 0.7463987577580465, iteration: 404592
loss: 1.0191760063171387,grad_norm: 0.7724346090886828, iteration: 404593
loss: 0.9721410870552063,grad_norm: 0.7884561334247372, iteration: 404594
loss: 0.9954179525375366,grad_norm: 0.794796018267915, iteration: 404595
loss: 0.9822950959205627,grad_norm: 0.6915591229402592, iteration: 404596
loss: 0.9773128032684326,grad_norm: 0.726549264471456, iteration: 404597
loss: 1.0360321998596191,grad_norm: 0.9258075086634963, iteration: 404598
loss: 0.9975186586380005,grad_norm: 0.9186182812409166, iteration: 404599
loss: 1.024169683456421,grad_norm: 0.7359420473786547, iteration: 404600
loss: 1.0630669593811035,grad_norm: 0.9999996960989266, iteration: 404601
loss: 1.266857624053955,grad_norm: 0.9999996787949815, iteration: 404602
loss: 1.034471035003662,grad_norm: 0.9626979772782459, iteration: 404603
loss: 1.0097827911376953,grad_norm: 0.8018815854544686, iteration: 404604
loss: 1.0570974349975586,grad_norm: 0.9999996006048468, iteration: 404605
loss: 1.0777589082717896,grad_norm: 0.9999998254439877, iteration: 404606
loss: 0.9848536252975464,grad_norm: 0.677174962415229, iteration: 404607
loss: 1.0347893238067627,grad_norm: 0.9999991132965276, iteration: 404608
loss: 1.02078115940094,grad_norm: 0.9728764761225929, iteration: 404609
loss: 1.0318833589553833,grad_norm: 0.8532037168020903, iteration: 404610
loss: 0.9783477783203125,grad_norm: 0.9018058041307851, iteration: 404611
loss: 1.0171332359313965,grad_norm: 0.9999999422576569, iteration: 404612
loss: 1.008007287979126,grad_norm: 0.9999997951481979, iteration: 404613
loss: 1.026537299156189,grad_norm: 0.8310317077384838, iteration: 404614
loss: 0.9858704209327698,grad_norm: 0.6997184973234185, iteration: 404615
loss: 1.0778740644454956,grad_norm: 0.9999999296411407, iteration: 404616
loss: 0.9823116064071655,grad_norm: 0.9999998068564011, iteration: 404617
loss: 1.0046998262405396,grad_norm: 0.9376320327329134, iteration: 404618
loss: 1.0202372074127197,grad_norm: 0.7888392351904305, iteration: 404619
loss: 0.9904805421829224,grad_norm: 0.7403726752765117, iteration: 404620
loss: 1.0324817895889282,grad_norm: 0.7999987803709773, iteration: 404621
loss: 1.0097200870513916,grad_norm: 0.9999996420059751, iteration: 404622
loss: 1.1292922496795654,grad_norm: 0.9999999641521309, iteration: 404623
loss: 0.9883039593696594,grad_norm: 0.815777160476003, iteration: 404624
loss: 1.1090998649597168,grad_norm: 1.0000000155869733, iteration: 404625
loss: 1.0740128755569458,grad_norm: 0.9999990749714601, iteration: 404626
loss: 1.0207346677780151,grad_norm: 0.9825235530606367, iteration: 404627
loss: 1.02182137966156,grad_norm: 0.9999994318474138, iteration: 404628
loss: 0.9674521684646606,grad_norm: 0.826851858852521, iteration: 404629
loss: 1.0198862552642822,grad_norm: 0.7795639492801659, iteration: 404630
loss: 0.9859082102775574,grad_norm: 0.8968029230253318, iteration: 404631
loss: 1.0524882078170776,grad_norm: 0.9093097161831273, iteration: 404632
loss: 1.1274837255477905,grad_norm: 0.904416030181911, iteration: 404633
loss: 1.0562312602996826,grad_norm: 0.9999998140548818, iteration: 404634
loss: 1.0744693279266357,grad_norm: 0.9999994875947089, iteration: 404635
loss: 0.9963176846504211,grad_norm: 0.8298677417095273, iteration: 404636
loss: 0.9965236186981201,grad_norm: 0.9891496899440723, iteration: 404637
loss: 1.057139277458191,grad_norm: 0.902720264882442, iteration: 404638
loss: 0.9686718583106995,grad_norm: 0.7694257472458439, iteration: 404639
loss: 1.1725744009017944,grad_norm: 0.9999990669665862, iteration: 404640
loss: 1.0171507596969604,grad_norm: 0.9962583043104954, iteration: 404641
loss: 1.170690655708313,grad_norm: 0.9999999427594343, iteration: 404642
loss: 1.0939284563064575,grad_norm: 0.9999996009605354, iteration: 404643
loss: 1.0718669891357422,grad_norm: 0.9999994566910142, iteration: 404644
loss: 0.9855587482452393,grad_norm: 0.9999996345413251, iteration: 404645
loss: 1.0693742036819458,grad_norm: 0.9999999305944428, iteration: 404646
loss: 1.0341792106628418,grad_norm: 0.8913054035961114, iteration: 404647
loss: 1.0043686628341675,grad_norm: 0.9999993055392781, iteration: 404648
loss: 1.0017448663711548,grad_norm: 0.9445237013484864, iteration: 404649
loss: 0.9964174032211304,grad_norm: 0.8472076005822998, iteration: 404650
loss: 1.0278472900390625,grad_norm: 0.9999990974316125, iteration: 404651
loss: 1.0505120754241943,grad_norm: 0.9999999624125608, iteration: 404652
loss: 1.0371596813201904,grad_norm: 0.9999992772863938, iteration: 404653
loss: 0.9916501045227051,grad_norm: 0.8360752565236582, iteration: 404654
loss: 1.0044622421264648,grad_norm: 0.8957706186157024, iteration: 404655
loss: 1.0262306928634644,grad_norm: 0.9999994476958729, iteration: 404656
loss: 1.0568336248397827,grad_norm: 0.8456881294391599, iteration: 404657
loss: 1.0351759195327759,grad_norm: 0.9999990750637286, iteration: 404658
loss: 1.0083531141281128,grad_norm: 0.729014820722353, iteration: 404659
loss: 1.244411587715149,grad_norm: 0.9999999743391275, iteration: 404660
loss: 1.0074937343597412,grad_norm: 0.8006375521090902, iteration: 404661
loss: 1.0459457635879517,grad_norm: 0.9728414574206838, iteration: 404662
loss: 1.0214929580688477,grad_norm: 0.9999998891997589, iteration: 404663
loss: 0.9742538928985596,grad_norm: 0.8927004134353194, iteration: 404664
loss: 1.042972445487976,grad_norm: 0.9999997931205143, iteration: 404665
loss: 1.0643678903579712,grad_norm: 0.9999999087974472, iteration: 404666
loss: 1.2045797109603882,grad_norm: 0.9999991716073201, iteration: 404667
loss: 1.0385782718658447,grad_norm: 0.9151328684655918, iteration: 404668
loss: 1.0753012895584106,grad_norm: 0.9999999144692209, iteration: 404669
loss: 1.1191761493682861,grad_norm: 0.9999995765172495, iteration: 404670
loss: 1.0217870473861694,grad_norm: 0.9999996100527103, iteration: 404671
loss: 1.0205564498901367,grad_norm: 0.9999995700751829, iteration: 404672
loss: 0.9931038618087769,grad_norm: 0.9999992723052086, iteration: 404673
loss: 1.00631582736969,grad_norm: 0.9091259895904689, iteration: 404674
loss: 1.0150924921035767,grad_norm: 0.7126585617884718, iteration: 404675
loss: 1.002451777458191,grad_norm: 0.8246284575816025, iteration: 404676
loss: 1.0735257863998413,grad_norm: 0.9999999164887302, iteration: 404677
loss: 1.0570147037506104,grad_norm: 1.0000000093771482, iteration: 404678
loss: 1.001107096672058,grad_norm: 1.0000000753529819, iteration: 404679
loss: 0.9857304096221924,grad_norm: 0.7980072554100015, iteration: 404680
loss: 1.1462886333465576,grad_norm: 0.9999997865953609, iteration: 404681
loss: 1.0892618894577026,grad_norm: 0.9999997065460547, iteration: 404682
loss: 1.0272763967514038,grad_norm: 0.9999999800027956, iteration: 404683
loss: 1.0139559507369995,grad_norm: 0.9999996452429093, iteration: 404684
loss: 1.0106511116027832,grad_norm: 0.9999991297090859, iteration: 404685
loss: 1.0302104949951172,grad_norm: 0.9361861026784543, iteration: 404686
loss: 1.0014573335647583,grad_norm: 0.9999996691849586, iteration: 404687
loss: 1.0600945949554443,grad_norm: 0.8973297206211512, iteration: 404688
loss: 1.0901291370391846,grad_norm: 1.000000018295252, iteration: 404689
loss: 1.0108846426010132,grad_norm: 0.8543491705284805, iteration: 404690
loss: 1.0105674266815186,grad_norm: 0.9999997534753521, iteration: 404691
loss: 0.9823885560035706,grad_norm: 0.828043917687699, iteration: 404692
loss: 1.0801950693130493,grad_norm: 0.9999993895304792, iteration: 404693
loss: 1.1726757287979126,grad_norm: 0.9999993551062935, iteration: 404694
loss: 0.9684759974479675,grad_norm: 0.7595492862947123, iteration: 404695
loss: 1.0464739799499512,grad_norm: 1.0000000187719107, iteration: 404696
loss: 0.99217689037323,grad_norm: 0.6910321537619817, iteration: 404697
loss: 1.017506718635559,grad_norm: 0.9999996510419843, iteration: 404698
loss: 1.038081169128418,grad_norm: 0.9730381160050712, iteration: 404699
loss: 1.0421375036239624,grad_norm: 0.8142818209953322, iteration: 404700
loss: 1.111318588256836,grad_norm: 0.9999993642358865, iteration: 404701
loss: 1.0324108600616455,grad_norm: 0.9999998143790079, iteration: 404702
loss: 1.0541921854019165,grad_norm: 0.99999988799562, iteration: 404703
loss: 1.0893115997314453,grad_norm: 0.9999997012730489, iteration: 404704
loss: 1.1088299751281738,grad_norm: 0.9999999247938589, iteration: 404705
loss: 1.0583240985870361,grad_norm: 0.999999731738681, iteration: 404706
loss: 1.0956201553344727,grad_norm: 0.9999990838501364, iteration: 404707
loss: 1.011947751045227,grad_norm: 0.9999996207414323, iteration: 404708
loss: 1.139769196510315,grad_norm: 1.0000000257068058, iteration: 404709
loss: 1.116950511932373,grad_norm: 0.9999993223242091, iteration: 404710
loss: 1.2032474279403687,grad_norm: 0.9999995800706168, iteration: 404711
loss: 1.2501291036605835,grad_norm: 0.9999999085109956, iteration: 404712
loss: 1.3143353462219238,grad_norm: 0.9999993083813985, iteration: 404713
loss: 1.1677638292312622,grad_norm: 0.9999999534640162, iteration: 404714
loss: 1.0128910541534424,grad_norm: 0.9999996882736288, iteration: 404715
loss: 1.064918041229248,grad_norm: 0.9999993264457517, iteration: 404716
loss: 1.112657904624939,grad_norm: 0.999999619947805, iteration: 404717
loss: 1.0786080360412598,grad_norm: 0.999999772913266, iteration: 404718
loss: 1.4864354133605957,grad_norm: 0.9999997264884372, iteration: 404719
loss: 1.0039007663726807,grad_norm: 0.9999991368329912, iteration: 404720
loss: 1.0746090412139893,grad_norm: 0.9999993660434956, iteration: 404721
loss: 1.1926143169403076,grad_norm: 0.9999997153340703, iteration: 404722
loss: 0.9832239151000977,grad_norm: 0.7822298496878508, iteration: 404723
loss: 1.0488383769989014,grad_norm: 0.9999993426965569, iteration: 404724
loss: 1.1171396970748901,grad_norm: 0.999999821530092, iteration: 404725
loss: 1.0007679462432861,grad_norm: 0.9999994050691934, iteration: 404726
loss: 1.2826659679412842,grad_norm: 0.9999998656731324, iteration: 404727
loss: 1.2841222286224365,grad_norm: 0.9999998805999193, iteration: 404728
loss: 1.0387369394302368,grad_norm: 0.999999686163887, iteration: 404729
loss: 1.0141607522964478,grad_norm: 0.9458725753148984, iteration: 404730
loss: 1.1493034362792969,grad_norm: 0.9999999270386644, iteration: 404731
loss: 1.0775293111801147,grad_norm: 0.9999993902025471, iteration: 404732
loss: 1.2131458520889282,grad_norm: 0.9999997973124969, iteration: 404733
loss: 0.995672345161438,grad_norm: 0.9999991402773876, iteration: 404734
loss: 1.0842695236206055,grad_norm: 0.9999998418208312, iteration: 404735
loss: 0.9909143447875977,grad_norm: 0.8365123541086597, iteration: 404736
loss: 1.2778654098510742,grad_norm: 0.9999999072754567, iteration: 404737
loss: 1.1351341009140015,grad_norm: 0.9999991973166207, iteration: 404738
loss: 1.2488278150558472,grad_norm: 0.9999998215402602, iteration: 404739
loss: 1.1457175016403198,grad_norm: 0.9999997237744468, iteration: 404740
loss: 1.218164324760437,grad_norm: 0.9999994759286905, iteration: 404741
loss: 1.0689945220947266,grad_norm: 0.9999994339264737, iteration: 404742
loss: 1.2802401781082153,grad_norm: 0.9999999053893132, iteration: 404743
loss: 1.0644418001174927,grad_norm: 0.999999935842454, iteration: 404744
loss: 1.0969728231430054,grad_norm: 0.9999999415342439, iteration: 404745
loss: 1.123029112815857,grad_norm: 0.9999998562126747, iteration: 404746
loss: 1.048754096031189,grad_norm: 0.999999467942628, iteration: 404747
loss: 1.1255700588226318,grad_norm: 0.9999998264020881, iteration: 404748
loss: 1.0933817625045776,grad_norm: 0.9999991735157283, iteration: 404749
loss: 1.0415644645690918,grad_norm: 0.8602797434962466, iteration: 404750
loss: 1.0240600109100342,grad_norm: 0.8171405740352194, iteration: 404751
loss: 1.1134929656982422,grad_norm: 0.9999996004788482, iteration: 404752
loss: 0.9885207414627075,grad_norm: 0.8818104895774235, iteration: 404753
loss: 1.0447484254837036,grad_norm: 0.9999991382557458, iteration: 404754
loss: 1.0207279920578003,grad_norm: 0.9181670374016828, iteration: 404755
loss: 0.9753419756889343,grad_norm: 0.8326332238106239, iteration: 404756
loss: 1.0621196031570435,grad_norm: 0.9999998761000843, iteration: 404757
loss: 1.014228105545044,grad_norm: 0.8847468265998714, iteration: 404758
loss: 1.125065803527832,grad_norm: 0.9999990904512316, iteration: 404759
loss: 1.1009618043899536,grad_norm: 0.9999999745122785, iteration: 404760
loss: 1.0774753093719482,grad_norm: 0.9186596101196878, iteration: 404761
loss: 0.9848142266273499,grad_norm: 0.9999990398801403, iteration: 404762
loss: 1.0144941806793213,grad_norm: 0.9999991292544738, iteration: 404763
loss: 1.17548406124115,grad_norm: 0.7729335488289633, iteration: 404764
loss: 1.1475334167480469,grad_norm: 0.9999998005596786, iteration: 404765
loss: 0.9952345490455627,grad_norm: 0.9999998039866971, iteration: 404766
loss: 1.1463263034820557,grad_norm: 0.9999992450264964, iteration: 404767
loss: 1.1849257946014404,grad_norm: 0.9999993056052521, iteration: 404768
loss: 1.1312940120697021,grad_norm: 0.9999997964247337, iteration: 404769
loss: 1.0332363843917847,grad_norm: 0.999999718975285, iteration: 404770
loss: 1.199755072593689,grad_norm: 0.9999995679304892, iteration: 404771
loss: 1.0682851076126099,grad_norm: 0.9999993205539044, iteration: 404772
loss: 1.0801985263824463,grad_norm: 0.9999994866491208, iteration: 404773
loss: 1.0609207153320312,grad_norm: 0.8861839736798138, iteration: 404774
loss: 1.0895522832870483,grad_norm: 0.9999995451867225, iteration: 404775
loss: 1.1111195087432861,grad_norm: 0.9999997982465939, iteration: 404776
loss: 1.0851881504058838,grad_norm: 0.999999938002425, iteration: 404777
loss: 1.0262298583984375,grad_norm: 0.9999990644431713, iteration: 404778
loss: 1.0268341302871704,grad_norm: 0.9999995148359996, iteration: 404779
loss: 1.0012753009796143,grad_norm: 0.9999993540802007, iteration: 404780
loss: 1.0065757036209106,grad_norm: 0.9999994300617058, iteration: 404781
loss: 1.0429459810256958,grad_norm: 0.9999990241594905, iteration: 404782
loss: 1.0606380701065063,grad_norm: 0.9999993978668695, iteration: 404783
loss: 0.997357964515686,grad_norm: 0.9999991908176461, iteration: 404784
loss: 0.9775488376617432,grad_norm: 0.9999992071251096, iteration: 404785
loss: 0.9738109707832336,grad_norm: 0.8113959205049087, iteration: 404786
loss: 1.1024397611618042,grad_norm: 0.7369133865642322, iteration: 404787
loss: 1.019953727722168,grad_norm: 0.7428961898452118, iteration: 404788
loss: 1.1141027212142944,grad_norm: 1.0000000137865157, iteration: 404789
loss: 1.043542504310608,grad_norm: 0.9999990582103616, iteration: 404790
loss: 1.08263099193573,grad_norm: 0.999999655851585, iteration: 404791
loss: 1.008903980255127,grad_norm: 0.9007943202310403, iteration: 404792
loss: 1.0635361671447754,grad_norm: 0.9150585287655929, iteration: 404793
loss: 1.016191005706787,grad_norm: 0.9999992233245178, iteration: 404794
loss: 1.1224414110183716,grad_norm: 0.9954295879812664, iteration: 404795
loss: 1.1018762588500977,grad_norm: 0.9999996608574302, iteration: 404796
loss: 1.0457240343093872,grad_norm: 0.7349978385417291, iteration: 404797
loss: 1.0349787473678589,grad_norm: 0.9999993564534971, iteration: 404798
loss: 1.1313682794570923,grad_norm: 1.000000043550097, iteration: 404799
loss: 1.1592572927474976,grad_norm: 1.0000001589622092, iteration: 404800
loss: 0.9577750563621521,grad_norm: 0.943228084768338, iteration: 404801
loss: 1.0475437641143799,grad_norm: 0.9999993366568604, iteration: 404802
loss: 1.1134933233261108,grad_norm: 0.9999993065170933, iteration: 404803
loss: 1.0782597064971924,grad_norm: 0.9999998770160977, iteration: 404804
loss: 1.2442375421524048,grad_norm: 0.9999994971000226, iteration: 404805
loss: 1.0815337896347046,grad_norm: 0.9543035212966651, iteration: 404806
loss: 1.151100516319275,grad_norm: 0.9999992019254522, iteration: 404807
loss: 1.0105658769607544,grad_norm: 0.9999996199049557, iteration: 404808
loss: 1.06698477268219,grad_norm: 0.8729757229119333, iteration: 404809
loss: 1.0648467540740967,grad_norm: 0.9999994927985297, iteration: 404810
loss: 1.0259019136428833,grad_norm: 0.7844773570463601, iteration: 404811
loss: 1.0274932384490967,grad_norm: 0.9999992703995408, iteration: 404812
loss: 1.017545223236084,grad_norm: 0.9999994179381776, iteration: 404813
loss: 1.2143675088882446,grad_norm: 0.9999999246219791, iteration: 404814
loss: 1.1433148384094238,grad_norm: 0.9999996029911794, iteration: 404815
loss: 1.2743991613388062,grad_norm: 1.0000000468487915, iteration: 404816
loss: 0.9697451591491699,grad_norm: 0.9999994319344657, iteration: 404817
loss: 1.0403791666030884,grad_norm: 0.8164638780433932, iteration: 404818
loss: 1.0981842279434204,grad_norm: 0.9999993733789933, iteration: 404819
loss: 1.0431172847747803,grad_norm: 0.9851467970040764, iteration: 404820
loss: 1.0818710327148438,grad_norm: 0.9999999550233611, iteration: 404821
loss: 1.068207859992981,grad_norm: 0.9999993708215119, iteration: 404822
loss: 1.0154345035552979,grad_norm: 0.7813557133138314, iteration: 404823
loss: 1.1971479654312134,grad_norm: 0.9999999028436363, iteration: 404824
loss: 1.062853455543518,grad_norm: 0.940675751563795, iteration: 404825
loss: 1.1239556074142456,grad_norm: 0.9999990890578352, iteration: 404826
loss: 1.2828643321990967,grad_norm: 0.9999993803897658, iteration: 404827
loss: 1.0641510486602783,grad_norm: 0.9329741629222584, iteration: 404828
loss: 1.055559515953064,grad_norm: 1.0000000481024887, iteration: 404829
loss: 1.2330793142318726,grad_norm: 0.9999996852084295, iteration: 404830
loss: 1.1245700120925903,grad_norm: 0.9999990265390841, iteration: 404831
loss: 1.167070746421814,grad_norm: 0.999999896704385, iteration: 404832
loss: 1.073179006576538,grad_norm: 0.9999991696258406, iteration: 404833
loss: 1.1766358613967896,grad_norm: 0.9999996599162023, iteration: 404834
loss: 1.1129939556121826,grad_norm: 0.9999999284109673, iteration: 404835
loss: 1.133629322052002,grad_norm: 0.9999995326907153, iteration: 404836
loss: 1.2169944047927856,grad_norm: 0.9999992593280572, iteration: 404837
loss: 0.9957193732261658,grad_norm: 0.7583645154083946, iteration: 404838
loss: 1.0022203922271729,grad_norm: 0.999999226449837, iteration: 404839
loss: 1.0365960597991943,grad_norm: 0.8696665155327271, iteration: 404840
loss: 1.0855532884597778,grad_norm: 0.99999937381034, iteration: 404841
loss: 1.1054712533950806,grad_norm: 0.9999999487163567, iteration: 404842
loss: 1.263480544090271,grad_norm: 0.9999999224924492, iteration: 404843
loss: 1.266701579093933,grad_norm: 0.9999997978422223, iteration: 404844
loss: 1.1376960277557373,grad_norm: 0.9999993856506315, iteration: 404845
loss: 1.0445194244384766,grad_norm: 0.9999997598363102, iteration: 404846
loss: 1.1072174310684204,grad_norm: 0.9083367837365228, iteration: 404847
loss: 1.100279688835144,grad_norm: 0.9999995531808339, iteration: 404848
loss: 1.3022688627243042,grad_norm: 0.9999996982193778, iteration: 404849
loss: 1.138939619064331,grad_norm: 0.9999994397893184, iteration: 404850
loss: 1.1602410078048706,grad_norm: 1.0000000336259152, iteration: 404851
loss: 1.0506023168563843,grad_norm: 0.9999991229174137, iteration: 404852
loss: 1.3856229782104492,grad_norm: 1.000000038571407, iteration: 404853
loss: 1.0499171018600464,grad_norm: 0.9999990786374373, iteration: 404854
loss: 1.0025614500045776,grad_norm: 0.8654124103154556, iteration: 404855
loss: 1.01786208152771,grad_norm: 0.9999999641662931, iteration: 404856
loss: 1.0681126117706299,grad_norm: 0.853864560161332, iteration: 404857
loss: 1.035090446472168,grad_norm: 0.9999990054353212, iteration: 404858
loss: 1.0211212635040283,grad_norm: 0.9999995164370495, iteration: 404859
loss: 1.1994673013687134,grad_norm: 0.9999996275255588, iteration: 404860
loss: 1.080939531326294,grad_norm: 0.9999996777122062, iteration: 404861
loss: 1.228264570236206,grad_norm: 0.9999998138355461, iteration: 404862
loss: 0.9825989007949829,grad_norm: 0.8070300639649824, iteration: 404863
loss: 1.1814273595809937,grad_norm: 0.9999993689257193, iteration: 404864
loss: 1.032866358757019,grad_norm: 0.8811347846024684, iteration: 404865
loss: 1.012279748916626,grad_norm: 0.8251205912489441, iteration: 404866
loss: 1.051403284072876,grad_norm: 0.9999997634890471, iteration: 404867
loss: 1.1031302213668823,grad_norm: 0.9999998363627182, iteration: 404868
loss: 1.0808240175247192,grad_norm: 0.9999992514059444, iteration: 404869
loss: 1.0524611473083496,grad_norm: 0.9999996113656024, iteration: 404870
loss: 1.0853357315063477,grad_norm: 0.9999999436738269, iteration: 404871
loss: 1.1207772493362427,grad_norm: 0.9999995590603928, iteration: 404872
loss: 1.0386968851089478,grad_norm: 0.8993176717387791, iteration: 404873
loss: 1.043153166770935,grad_norm: 0.9999994377215756, iteration: 404874
loss: 0.980908989906311,grad_norm: 0.6968108277428212, iteration: 404875
loss: 1.2164655923843384,grad_norm: 0.9999996266396302, iteration: 404876
loss: 1.115739345550537,grad_norm: 0.9999999817640688, iteration: 404877
loss: 1.0844907760620117,grad_norm: 0.9513362105695484, iteration: 404878
loss: 1.0132602453231812,grad_norm: 0.999999888094708, iteration: 404879
loss: 1.0355637073516846,grad_norm: 0.999999133739505, iteration: 404880
loss: 1.0544415712356567,grad_norm: 0.8328515403469904, iteration: 404881
loss: 1.1017017364501953,grad_norm: 0.8285077439743915, iteration: 404882
loss: 1.2247321605682373,grad_norm: 0.999999816183515, iteration: 404883
loss: 1.1332731246948242,grad_norm: 0.9999992158608152, iteration: 404884
loss: 1.3225435018539429,grad_norm: 0.9999993241256936, iteration: 404885
loss: 1.0654900074005127,grad_norm: 0.9999990750233609, iteration: 404886
loss: 0.9779379963874817,grad_norm: 0.9999990442943342, iteration: 404887
loss: 1.0128138065338135,grad_norm: 0.999999184224017, iteration: 404888
loss: 1.0374962091445923,grad_norm: 0.9999995384616825, iteration: 404889
loss: 0.9910066723823547,grad_norm: 0.9999991937773175, iteration: 404890
loss: 0.9788135886192322,grad_norm: 0.8100267555116171, iteration: 404891
loss: 1.0607995986938477,grad_norm: 0.999999010747774, iteration: 404892
loss: 1.0356287956237793,grad_norm: 0.99999937530097, iteration: 404893
loss: 1.1434868574142456,grad_norm: 0.9999993465535529, iteration: 404894
loss: 1.2160565853118896,grad_norm: 0.9999996324111662, iteration: 404895
loss: 1.035369634628296,grad_norm: 0.9482341107436967, iteration: 404896
loss: 1.0336483716964722,grad_norm: 0.954518647924047, iteration: 404897
loss: 1.1429616212844849,grad_norm: 0.9048549620750762, iteration: 404898
loss: 1.2467243671417236,grad_norm: 0.9999992551447562, iteration: 404899
loss: 1.1418050527572632,grad_norm: 0.9999991536717426, iteration: 404900
loss: 1.1149324178695679,grad_norm: 0.9999991368123394, iteration: 404901
loss: 0.9817520380020142,grad_norm: 0.9999997552776105, iteration: 404902
loss: 1.0272349119186401,grad_norm: 0.9999999815664748, iteration: 404903
loss: 1.1913527250289917,grad_norm: 0.999999748188685, iteration: 404904
loss: 1.0221447944641113,grad_norm: 0.9999999334961768, iteration: 404905
loss: 1.0139498710632324,grad_norm: 0.7338042920698565, iteration: 404906
loss: 1.039387583732605,grad_norm: 0.777097715009899, iteration: 404907
loss: 1.1102259159088135,grad_norm: 0.9999996460256136, iteration: 404908
loss: 1.1338001489639282,grad_norm: 1.0000000141797194, iteration: 404909
loss: 1.0339300632476807,grad_norm: 0.8928806521268626, iteration: 404910
loss: 1.0013666152954102,grad_norm: 0.9479037223836986, iteration: 404911
loss: 1.124535322189331,grad_norm: 0.9999995506133169, iteration: 404912
loss: 1.0188552141189575,grad_norm: 0.9999996282396526, iteration: 404913
loss: 1.0483105182647705,grad_norm: 0.9999993530631055, iteration: 404914
loss: 1.0478637218475342,grad_norm: 0.9999998923681953, iteration: 404915
loss: 1.080237865447998,grad_norm: 0.9999998378928289, iteration: 404916
loss: 1.0558558702468872,grad_norm: 0.9999993096427505, iteration: 404917
loss: 1.032821774482727,grad_norm: 0.9178131206940192, iteration: 404918
loss: 0.9584235548973083,grad_norm: 0.7768872383486485, iteration: 404919
loss: 1.014011263847351,grad_norm: 0.9999997785126142, iteration: 404920
loss: 1.0646300315856934,grad_norm: 0.9670972752973241, iteration: 404921
loss: 1.0616679191589355,grad_norm: 0.999999606707227, iteration: 404922
loss: 1.0255980491638184,grad_norm: 0.9999994240551179, iteration: 404923
loss: 1.0571606159210205,grad_norm: 0.9531308243471114, iteration: 404924
loss: 1.0241881608963013,grad_norm: 0.8875114209435864, iteration: 404925
loss: 1.108324408531189,grad_norm: 0.9565903288469126, iteration: 404926
loss: 1.0481065511703491,grad_norm: 0.9805017228919937, iteration: 404927
loss: 1.0217087268829346,grad_norm: 0.8112916248972774, iteration: 404928
loss: 1.0039079189300537,grad_norm: 0.8065992936049199, iteration: 404929
loss: 1.055475115776062,grad_norm: 0.8466618622849257, iteration: 404930
loss: 1.1061170101165771,grad_norm: 0.9999997148385721, iteration: 404931
loss: 1.0868477821350098,grad_norm: 0.999999675836645, iteration: 404932
loss: 1.185889720916748,grad_norm: 0.9999995750971135, iteration: 404933
loss: 1.0472301244735718,grad_norm: 0.9999999972762621, iteration: 404934
loss: 1.139654517173767,grad_norm: 0.9999992370534483, iteration: 404935
loss: 1.1260185241699219,grad_norm: 0.999999068303557, iteration: 404936
loss: 1.0095336437225342,grad_norm: 0.89381416304087, iteration: 404937
loss: 1.0071173906326294,grad_norm: 0.99999906046222, iteration: 404938
loss: 1.0320188999176025,grad_norm: 0.8799784647533491, iteration: 404939
loss: 1.0845328569412231,grad_norm: 0.999999625488914, iteration: 404940
loss: 1.1198248863220215,grad_norm: 0.756473188963288, iteration: 404941
loss: 1.0116407871246338,grad_norm: 0.8410909120034633, iteration: 404942
loss: 1.0652192831039429,grad_norm: 0.999999171281455, iteration: 404943
loss: 1.1562459468841553,grad_norm: 0.9999999008762981, iteration: 404944
loss: 1.0406333208084106,grad_norm: 0.9999993462175629, iteration: 404945
loss: 1.0730252265930176,grad_norm: 0.9663735528576795, iteration: 404946
loss: 1.062088966369629,grad_norm: 0.9999997581383167, iteration: 404947
loss: 1.076004981994629,grad_norm: 0.744144396066263, iteration: 404948
loss: 1.0497292280197144,grad_norm: 0.9999990947619967, iteration: 404949
loss: 1.0045818090438843,grad_norm: 0.9999991597057598, iteration: 404950
loss: 1.1166408061981201,grad_norm: 0.775038377337152, iteration: 404951
loss: 1.1085035800933838,grad_norm: 0.999999201263511, iteration: 404952
loss: 1.147727608680725,grad_norm: 0.9999994852256926, iteration: 404953
loss: 1.153219223022461,grad_norm: 0.9999995620965494, iteration: 404954
loss: 1.0643014907836914,grad_norm: 0.9999993455314964, iteration: 404955
loss: 1.105005145072937,grad_norm: 0.9999991079147202, iteration: 404956
loss: 1.0344308614730835,grad_norm: 0.8713961834273429, iteration: 404957
loss: 1.0304877758026123,grad_norm: 0.9015333268262959, iteration: 404958
loss: 1.0177704095840454,grad_norm: 0.7949435500579284, iteration: 404959
loss: 1.0877021551132202,grad_norm: 0.8621913906816338, iteration: 404960
loss: 1.0155470371246338,grad_norm: 0.7461966067114001, iteration: 404961
loss: 1.0225030183792114,grad_norm: 0.9999998221211003, iteration: 404962
loss: 1.0071067810058594,grad_norm: 0.9248138776652071, iteration: 404963
loss: 1.0548584461212158,grad_norm: 0.9999999772711017, iteration: 404964
loss: 1.0815986394882202,grad_norm: 0.9999992968434469, iteration: 404965
loss: 1.2157458066940308,grad_norm: 0.9999995856943188, iteration: 404966
loss: 1.013649344444275,grad_norm: 0.6779546407477823, iteration: 404967
loss: 0.9968057870864868,grad_norm: 0.7219239164774968, iteration: 404968
loss: 1.0471192598342896,grad_norm: 0.769382454322711, iteration: 404969
loss: 1.0535132884979248,grad_norm: 0.757430737749633, iteration: 404970
loss: 1.0736573934555054,grad_norm: 0.9999994593123097, iteration: 404971
loss: 1.0490002632141113,grad_norm: 0.7076858277022525, iteration: 404972
loss: 1.005733609199524,grad_norm: 0.9696255695636666, iteration: 404973
loss: 1.0231926441192627,grad_norm: 0.950472421680239, iteration: 404974
loss: 1.012848138809204,grad_norm: 0.7779229219719347, iteration: 404975
loss: 1.0492552518844604,grad_norm: 0.9999995870006771, iteration: 404976
loss: 1.0546585321426392,grad_norm: 0.9999995035279445, iteration: 404977
loss: 1.0249998569488525,grad_norm: 0.9999995507717032, iteration: 404978
loss: 1.0826672315597534,grad_norm: 0.7338341343601896, iteration: 404979
loss: 1.0551332235336304,grad_norm: 0.7529544970862894, iteration: 404980
loss: 1.0469201803207397,grad_norm: 0.9999998296656014, iteration: 404981
loss: 1.0191463232040405,grad_norm: 0.994925084956703, iteration: 404982
loss: 1.0278946161270142,grad_norm: 0.8619892742090087, iteration: 404983
loss: 1.0541698932647705,grad_norm: 0.8411892704052313, iteration: 404984
loss: 1.1032838821411133,grad_norm: 0.9999990284808689, iteration: 404985
loss: 1.008393406867981,grad_norm: 0.9999999340219573, iteration: 404986
loss: 1.1283735036849976,grad_norm: 0.9999999560389958, iteration: 404987
loss: 1.1740857362747192,grad_norm: 0.9999990925919219, iteration: 404988
loss: 1.076797604560852,grad_norm: 0.9999990869789045, iteration: 404989
loss: 1.041843056678772,grad_norm: 0.973257692971874, iteration: 404990
loss: 0.9995090961456299,grad_norm: 0.9999997251023885, iteration: 404991
loss: 1.1013051271438599,grad_norm: 0.9377095016404708, iteration: 404992
loss: 1.0088543891906738,grad_norm: 0.999999649418271, iteration: 404993
loss: 1.0683326721191406,grad_norm: 0.783939572868965, iteration: 404994
loss: 1.0704872608184814,grad_norm: 0.7335362675321321, iteration: 404995
loss: 1.0579791069030762,grad_norm: 0.9524176761933633, iteration: 404996
loss: 1.0865910053253174,grad_norm: 0.9390476452871116, iteration: 404997
loss: 1.147581934928894,grad_norm: 0.9999993890082022, iteration: 404998
loss: 1.090618371963501,grad_norm: 0.9999994745895348, iteration: 404999
loss: 1.1713054180145264,grad_norm: 0.9999990056424088, iteration: 405000
loss: 1.0387002229690552,grad_norm: 0.9999999710119755, iteration: 405001
loss: 1.0579484701156616,grad_norm: 0.8756198234837633, iteration: 405002
loss: 1.0288447141647339,grad_norm: 0.9999995632453763, iteration: 405003
loss: 1.083267331123352,grad_norm: 0.8965661251464158, iteration: 405004
loss: 1.0555413961410522,grad_norm: 0.9999999524371049, iteration: 405005
loss: 1.0551167726516724,grad_norm: 0.9999997131687497, iteration: 405006
loss: 1.1007429361343384,grad_norm: 0.9999991895988619, iteration: 405007
loss: 1.0510658025741577,grad_norm: 0.8877526996722592, iteration: 405008
loss: 1.0933841466903687,grad_norm: 0.9999994126771318, iteration: 405009
loss: 0.9536962509155273,grad_norm: 0.9450441185221503, iteration: 405010
loss: 1.0848217010498047,grad_norm: 0.9026177015771022, iteration: 405011
loss: 1.0753905773162842,grad_norm: 0.9999990960230215, iteration: 405012
loss: 1.0237958431243896,grad_norm: 0.9999998305173163, iteration: 405013
loss: 1.0180655717849731,grad_norm: 0.9999991489823596, iteration: 405014
loss: 1.0426872968673706,grad_norm: 0.999999293153592, iteration: 405015
loss: 1.1014405488967896,grad_norm: 0.9999997284227877, iteration: 405016
loss: 1.1347787380218506,grad_norm: 0.9999995328922424, iteration: 405017
loss: 1.1449308395385742,grad_norm: 0.9999992672121639, iteration: 405018
loss: 1.099819540977478,grad_norm: 0.9999991563969564, iteration: 405019
loss: 1.0874463319778442,grad_norm: 0.9999999003275405, iteration: 405020
loss: 1.0220545530319214,grad_norm: 0.8822496591075579, iteration: 405021
loss: 1.0095092058181763,grad_norm: 0.8073015918096834, iteration: 405022
loss: 0.9720958471298218,grad_norm: 0.7512962188101987, iteration: 405023
loss: 0.9776858687400818,grad_norm: 0.7678942300900415, iteration: 405024
loss: 1.022598385810852,grad_norm: 0.604941445471527, iteration: 405025
loss: 1.0624746084213257,grad_norm: 0.9999991009382242, iteration: 405026
loss: 1.1509952545166016,grad_norm: 0.9999999180090233, iteration: 405027
loss: 1.0336772203445435,grad_norm: 0.7816334689379699, iteration: 405028
loss: 1.1381677389144897,grad_norm: 0.9999992011981247, iteration: 405029
loss: 1.0463536977767944,grad_norm: 0.9999991179766797, iteration: 405030
loss: 1.0032647848129272,grad_norm: 0.9999992608750548, iteration: 405031
loss: 1.0816223621368408,grad_norm: 0.999999801475186, iteration: 405032
loss: 0.9958482384681702,grad_norm: 0.7830889228351818, iteration: 405033
loss: 1.105361819267273,grad_norm: 0.9999997800182945, iteration: 405034
loss: 1.0014818906784058,grad_norm: 0.8222139125351475, iteration: 405035
loss: 1.0173760652542114,grad_norm: 0.9999989137833849, iteration: 405036
loss: 1.0633903741836548,grad_norm: 0.9229542969400749, iteration: 405037
loss: 1.0441548824310303,grad_norm: 0.7534234479480066, iteration: 405038
loss: 1.0263521671295166,grad_norm: 0.999999840120427, iteration: 405039
loss: 1.023370385169983,grad_norm: 0.999999938051462, iteration: 405040
loss: 1.004948377609253,grad_norm: 0.9120198923922747, iteration: 405041
loss: 1.000685691833496,grad_norm: 0.915568846951423, iteration: 405042
loss: 1.0589512586593628,grad_norm: 0.9110831601443579, iteration: 405043
loss: 1.0665558576583862,grad_norm: 0.9999994959563192, iteration: 405044
loss: 0.9727514386177063,grad_norm: 0.8335326548645349, iteration: 405045
loss: 1.0526129007339478,grad_norm: 0.9999999881285728, iteration: 405046
loss: 1.0322004556655884,grad_norm: 0.8553698554498937, iteration: 405047
loss: 0.9504070281982422,grad_norm: 0.8022939780829866, iteration: 405048
loss: 0.9945473670959473,grad_norm: 0.8407461269755414, iteration: 405049
loss: 1.1506152153015137,grad_norm: 0.9999991051876718, iteration: 405050
loss: 1.0961087942123413,grad_norm: 0.9999992948604804, iteration: 405051
loss: 1.0095019340515137,grad_norm: 0.869507565847325, iteration: 405052
loss: 0.9918739795684814,grad_norm: 0.7083237666689008, iteration: 405053
loss: 1.0139610767364502,grad_norm: 0.9999993276660384, iteration: 405054
loss: 1.069840431213379,grad_norm: 0.9999995075317784, iteration: 405055
loss: 1.0209593772888184,grad_norm: 0.9999990653443571, iteration: 405056
loss: 1.0495189428329468,grad_norm: 0.9054073669247733, iteration: 405057
loss: 1.0163350105285645,grad_norm: 0.9999997953618144, iteration: 405058
loss: 1.0128787755966187,grad_norm: 0.8360748385563779, iteration: 405059
loss: 1.0160681009292603,grad_norm: 0.8238938323319542, iteration: 405060
loss: 1.0580865144729614,grad_norm: 0.9999997059675741, iteration: 405061
loss: 1.0370280742645264,grad_norm: 0.7426576361186856, iteration: 405062
loss: 1.0669833421707153,grad_norm: 0.8741745899320648, iteration: 405063
loss: 1.102716088294983,grad_norm: 0.9525904409529377, iteration: 405064
loss: 1.0345371961593628,grad_norm: 1.0000000026309346, iteration: 405065
loss: 1.0389280319213867,grad_norm: 0.899774281660313, iteration: 405066
loss: 1.1118268966674805,grad_norm: 0.9441980907278031, iteration: 405067
loss: 1.0168074369430542,grad_norm: 0.928484490489269, iteration: 405068
loss: 1.003690481185913,grad_norm: 0.9139708138721233, iteration: 405069
loss: 1.0913610458374023,grad_norm: 0.9999991078490799, iteration: 405070
loss: 1.0536494255065918,grad_norm: 0.764445767266022, iteration: 405071
loss: 0.9975451231002808,grad_norm: 0.8196587191757283, iteration: 405072
loss: 1.0389454364776611,grad_norm: 0.9999991597937326, iteration: 405073
loss: 1.0078892707824707,grad_norm: 0.7331062681932321, iteration: 405074
loss: 1.1026557683944702,grad_norm: 0.9200393644820458, iteration: 405075
loss: 0.991146981716156,grad_norm: 0.7392513453147411, iteration: 405076
loss: 1.005423665046692,grad_norm: 0.6765621869717636, iteration: 405077
loss: 0.9829525351524353,grad_norm: 0.9999992151717139, iteration: 405078
loss: 0.9942477345466614,grad_norm: 0.8948346522653765, iteration: 405079
loss: 1.0312985181808472,grad_norm: 0.8063963008583099, iteration: 405080
loss: 0.9879543781280518,grad_norm: 0.9756139357270827, iteration: 405081
loss: 1.0262356996536255,grad_norm: 0.911918372587194, iteration: 405082
loss: 1.0121009349822998,grad_norm: 0.7716734315256772, iteration: 405083
loss: 1.1455261707305908,grad_norm: 0.9999995467183487, iteration: 405084
loss: 1.003377914428711,grad_norm: 0.9999997148636749, iteration: 405085
loss: 1.0209017992019653,grad_norm: 0.8560451410026446, iteration: 405086
loss: 1.017775297164917,grad_norm: 0.9999991135922013, iteration: 405087
loss: 1.014617919921875,grad_norm: 0.9118283729569294, iteration: 405088
loss: 1.2147576808929443,grad_norm: 0.999999260087104, iteration: 405089
loss: 1.0488917827606201,grad_norm: 0.9137056012466139, iteration: 405090
loss: 1.023600459098816,grad_norm: 0.769940073182526, iteration: 405091
loss: 1.0421627759933472,grad_norm: 0.9657240732989847, iteration: 405092
loss: 1.0609925985336304,grad_norm: 0.9999994262165796, iteration: 405093
loss: 1.0744383335113525,grad_norm: 0.999999102371098, iteration: 405094
loss: 1.0109000205993652,grad_norm: 0.9402078720516551, iteration: 405095
loss: 1.2028019428253174,grad_norm: 0.999999708540945, iteration: 405096
loss: 1.0261473655700684,grad_norm: 0.6743592377973081, iteration: 405097
loss: 1.073720932006836,grad_norm: 0.9999998857444747, iteration: 405098
loss: 1.0562492609024048,grad_norm: 0.9999999332603324, iteration: 405099
loss: 0.994373619556427,grad_norm: 0.8758130633613883, iteration: 405100
loss: 1.0733683109283447,grad_norm: 0.9999990662087053, iteration: 405101
loss: 1.044402003288269,grad_norm: 0.9051874482300987, iteration: 405102
loss: 1.0727218389511108,grad_norm: 0.9341316219195004, iteration: 405103
loss: 1.0026992559432983,grad_norm: 0.7642818357132841, iteration: 405104
loss: 1.1800651550292969,grad_norm: 0.9999995796124866, iteration: 405105
loss: 1.006584644317627,grad_norm: 0.8248896055225142, iteration: 405106
loss: 1.004844307899475,grad_norm: 0.8386049633768234, iteration: 405107
loss: 1.0682320594787598,grad_norm: 0.6683902956730476, iteration: 405108
loss: 1.0242189168930054,grad_norm: 0.933341619904354, iteration: 405109
loss: 1.0495988130569458,grad_norm: 0.8993578985364747, iteration: 405110
loss: 1.1209170818328857,grad_norm: 0.9999999582679869, iteration: 405111
loss: 1.0992954969406128,grad_norm: 0.8992294858156225, iteration: 405112
loss: 1.051222801208496,grad_norm: 0.9999996604980366, iteration: 405113
loss: 1.017576813697815,grad_norm: 0.8406583800969755, iteration: 405114
loss: 1.078312873840332,grad_norm: 0.9600336292096042, iteration: 405115
loss: 1.0421452522277832,grad_norm: 0.8804740102292322, iteration: 405116
loss: 0.98870849609375,grad_norm: 0.842798713050492, iteration: 405117
loss: 1.018188238143921,grad_norm: 0.9999998398911517, iteration: 405118
loss: 0.9895814061164856,grad_norm: 0.9999991984316076, iteration: 405119
loss: 1.0266811847686768,grad_norm: 0.9789276721893508, iteration: 405120
loss: 0.9765824675559998,grad_norm: 0.9999991804679083, iteration: 405121
loss: 1.0386087894439697,grad_norm: 0.999999803654809, iteration: 405122
loss: 1.0286577939987183,grad_norm: 0.8246701539020194, iteration: 405123
loss: 0.9850395321846008,grad_norm: 0.8076048063977741, iteration: 405124
loss: 1.140205979347229,grad_norm: 0.7982052478628373, iteration: 405125
loss: 0.9995379447937012,grad_norm: 0.8220775019795544, iteration: 405126
loss: 1.0835728645324707,grad_norm: 0.9999998611628379, iteration: 405127
loss: 1.0197644233703613,grad_norm: 0.7020970915132588, iteration: 405128
loss: 1.0477399826049805,grad_norm: 0.7354729672230068, iteration: 405129
loss: 1.006546974182129,grad_norm: 0.6444570504787971, iteration: 405130
loss: 1.018140196800232,grad_norm: 0.9999990945910758, iteration: 405131
loss: 1.0452299118041992,grad_norm: 0.8907973383813061, iteration: 405132
loss: 1.040189266204834,grad_norm: 0.9999993962629578, iteration: 405133
loss: 1.023051381111145,grad_norm: 0.7536610394063534, iteration: 405134
loss: 1.0384663343429565,grad_norm: 0.7119174244675535, iteration: 405135
loss: 1.0362249612808228,grad_norm: 0.7768214761905817, iteration: 405136
loss: 0.9990086555480957,grad_norm: 0.9999997462904635, iteration: 405137
loss: 0.9783429503440857,grad_norm: 0.851952560284658, iteration: 405138
loss: 1.0004595518112183,grad_norm: 0.8705038010441423, iteration: 405139
loss: 0.9679774641990662,grad_norm: 0.9999994577261487, iteration: 405140
loss: 1.0229287147521973,grad_norm: 0.7282743847415193, iteration: 405141
loss: 1.0055854320526123,grad_norm: 0.9999997998469256, iteration: 405142
loss: 1.0676236152648926,grad_norm: 0.9999998965158529, iteration: 405143
loss: 0.9865666031837463,grad_norm: 0.8754665030824528, iteration: 405144
loss: 1.0599418878555298,grad_norm: 0.9999995012690965, iteration: 405145
loss: 1.1413503885269165,grad_norm: 0.873447807100233, iteration: 405146
loss: 1.0054657459259033,grad_norm: 0.7882864753882676, iteration: 405147
loss: 1.0813533067703247,grad_norm: 0.9995305678955955, iteration: 405148
loss: 1.0795238018035889,grad_norm: 0.9999996211366123, iteration: 405149
loss: 1.0029466152191162,grad_norm: 0.700886908754251, iteration: 405150
loss: 1.030150055885315,grad_norm: 0.9999997212337851, iteration: 405151
loss: 1.0201342105865479,grad_norm: 0.8639861417283691, iteration: 405152
loss: 0.9627806544303894,grad_norm: 0.9046768686794014, iteration: 405153
loss: 1.0316451787948608,grad_norm: 0.8415527301742822, iteration: 405154
loss: 1.0290533304214478,grad_norm: 0.7850539993891886, iteration: 405155
loss: 1.027020812034607,grad_norm: 0.9999998517117654, iteration: 405156
loss: 1.0139414072036743,grad_norm: 0.797361911651404, iteration: 405157
loss: 1.140847086906433,grad_norm: 0.9999994430966651, iteration: 405158
loss: 0.9377537369728088,grad_norm: 0.6935346630224319, iteration: 405159
loss: 0.9803563952445984,grad_norm: 0.784114816505024, iteration: 405160
loss: 1.0841416120529175,grad_norm: 0.9999992308001334, iteration: 405161
loss: 1.0107170343399048,grad_norm: 0.8678705550073784, iteration: 405162
loss: 1.0582927465438843,grad_norm: 0.9999993146327005, iteration: 405163
loss: 1.0073424577713013,grad_norm: 0.6218899583626527, iteration: 405164
loss: 0.9922021627426147,grad_norm: 0.8078448561612175, iteration: 405165
loss: 0.9894460439682007,grad_norm: 0.7516160630850887, iteration: 405166
loss: 0.9774032831192017,grad_norm: 0.7715536816514328, iteration: 405167
loss: 1.012500286102295,grad_norm: 0.86468679058969, iteration: 405168
loss: 1.030918836593628,grad_norm: 0.9999999313486354, iteration: 405169
loss: 1.1104472875595093,grad_norm: 0.9999997286026655, iteration: 405170
loss: 1.024125099182129,grad_norm: 0.7790695390274572, iteration: 405171
loss: 0.9808809161186218,grad_norm: 0.7126118187484021, iteration: 405172
loss: 1.025850534439087,grad_norm: 0.7993632153745354, iteration: 405173
loss: 1.0130794048309326,grad_norm: 0.7119167023512825, iteration: 405174
loss: 0.9967355132102966,grad_norm: 0.7849568726452325, iteration: 405175
loss: 1.1929206848144531,grad_norm: 0.999999152757743, iteration: 405176
loss: 0.9926429986953735,grad_norm: 0.7366012907754304, iteration: 405177
loss: 1.0134745836257935,grad_norm: 0.8210146816707672, iteration: 405178
loss: 1.0594009160995483,grad_norm: 0.7522269835104889, iteration: 405179
loss: 1.0530532598495483,grad_norm: 0.9999998927608706, iteration: 405180
loss: 1.0124977827072144,grad_norm: 0.8641125358121848, iteration: 405181
loss: 0.9583075046539307,grad_norm: 0.7615074042941863, iteration: 405182
loss: 1.0012778043746948,grad_norm: 0.7376633156990355, iteration: 405183
loss: 0.9955683350563049,grad_norm: 0.832173385282255, iteration: 405184
loss: 0.9961076974868774,grad_norm: 0.8439815302058148, iteration: 405185
loss: 1.0446659326553345,grad_norm: 0.6976823039867494, iteration: 405186
loss: 0.9960501790046692,grad_norm: 0.9999991160940359, iteration: 405187
loss: 1.018728256225586,grad_norm: 0.8347843870088871, iteration: 405188
loss: 1.0023014545440674,grad_norm: 0.9108283508506106, iteration: 405189
loss: 1.0161434412002563,grad_norm: 0.9999992633290204, iteration: 405190
loss: 1.0067682266235352,grad_norm: 0.933091084929219, iteration: 405191
loss: 1.0561593770980835,grad_norm: 0.7920824234878258, iteration: 405192
loss: 1.0014671087265015,grad_norm: 0.9504871790528785, iteration: 405193
loss: 0.9947428703308105,grad_norm: 0.9203649985411633, iteration: 405194
loss: 1.1185194253921509,grad_norm: 0.99999954641436, iteration: 405195
loss: 1.000644326210022,grad_norm: 0.7564384260686315, iteration: 405196
loss: 1.0035964250564575,grad_norm: 0.8195875631242122, iteration: 405197
loss: 1.1038625240325928,grad_norm: 0.9999999355136778, iteration: 405198
loss: 1.016237497329712,grad_norm: 0.9324920069082343, iteration: 405199
loss: 0.9915580749511719,grad_norm: 0.6960392947536913, iteration: 405200
loss: 1.0249779224395752,grad_norm: 0.9999991165832209, iteration: 405201
loss: 1.0359230041503906,grad_norm: 0.8196718393815233, iteration: 405202
loss: 1.0862112045288086,grad_norm: 0.9999995117808906, iteration: 405203
loss: 1.0016484260559082,grad_norm: 0.9725145648279835, iteration: 405204
loss: 0.9959339499473572,grad_norm: 0.9999990616945796, iteration: 405205
loss: 0.9727177023887634,grad_norm: 0.6511504409702277, iteration: 405206
loss: 0.9760453701019287,grad_norm: 0.7527986354675905, iteration: 405207
loss: 0.9827671051025391,grad_norm: 0.7697311454956284, iteration: 405208
loss: 0.9832751154899597,grad_norm: 0.8746614345480085, iteration: 405209
loss: 1.2027331590652466,grad_norm: 0.9999997058316165, iteration: 405210
loss: 1.0126293897628784,grad_norm: 0.9733716396927835, iteration: 405211
loss: 1.09295654296875,grad_norm: 0.9999997266571968, iteration: 405212
loss: 1.0128093957901,grad_norm: 0.9999997244925136, iteration: 405213
loss: 1.055228590965271,grad_norm: 0.9178892071958934, iteration: 405214
loss: 1.035968542098999,grad_norm: 0.8445971380176082, iteration: 405215
loss: 1.0046366453170776,grad_norm: 0.8576912693657173, iteration: 405216
loss: 1.039135456085205,grad_norm: 0.9885986536864412, iteration: 405217
loss: 0.9994218945503235,grad_norm: 0.7437672634432324, iteration: 405218
loss: 1.041130781173706,grad_norm: 0.7900314703160567, iteration: 405219
loss: 0.993068277835846,grad_norm: 0.7121630210897489, iteration: 405220
loss: 1.0162296295166016,grad_norm: 0.8271966922020583, iteration: 405221
loss: 1.0759795904159546,grad_norm: 0.9999994869805934, iteration: 405222
loss: 1.0006970167160034,grad_norm: 0.7993557652452508, iteration: 405223
loss: 1.0349112749099731,grad_norm: 0.9999991740870952, iteration: 405224
loss: 1.0431240797042847,grad_norm: 0.9999992605705696, iteration: 405225
loss: 0.988253653049469,grad_norm: 0.8072504709335948, iteration: 405226
loss: 1.0353013277053833,grad_norm: 0.999999991065864, iteration: 405227
loss: 1.0455402135849,grad_norm: 0.9999991252084356, iteration: 405228
loss: 0.9881852269172668,grad_norm: 0.7513540330309457, iteration: 405229
loss: 0.9985529780387878,grad_norm: 0.6199626854944048, iteration: 405230
loss: 1.012826919555664,grad_norm: 0.8828627204730526, iteration: 405231
loss: 1.0047529935836792,grad_norm: 0.9999994412942671, iteration: 405232
loss: 1.1389676332473755,grad_norm: 0.7797031512254502, iteration: 405233
loss: 1.0132739543914795,grad_norm: 0.7262383841311498, iteration: 405234
loss: 0.996200680732727,grad_norm: 0.6581520627608013, iteration: 405235
loss: 1.021462321281433,grad_norm: 0.9999998833610488, iteration: 405236
loss: 0.9695975184440613,grad_norm: 0.999999154134703, iteration: 405237
loss: 0.9893696904182434,grad_norm: 0.911845906692012, iteration: 405238
loss: 1.0402082204818726,grad_norm: 0.999999195900458, iteration: 405239
loss: 0.990617573261261,grad_norm: 0.6351546147182522, iteration: 405240
loss: 1.0059480667114258,grad_norm: 0.8926173523683424, iteration: 405241
loss: 1.0171087980270386,grad_norm: 0.7266713847271771, iteration: 405242
loss: 1.0130525827407837,grad_norm: 0.8135814496828138, iteration: 405243
loss: 1.053726077079773,grad_norm: 0.9999996970369204, iteration: 405244
loss: 1.0199005603790283,grad_norm: 0.9157429395390008, iteration: 405245
loss: 0.9964251518249512,grad_norm: 0.690579550474742, iteration: 405246
loss: 1.0205647945404053,grad_norm: 0.9999991795621656, iteration: 405247
loss: 1.078739881515503,grad_norm: 0.9999994108552498, iteration: 405248
loss: 1.0362516641616821,grad_norm: 0.8650328618134905, iteration: 405249
loss: 1.091055989265442,grad_norm: 0.9999998732938818, iteration: 405250
loss: 1.1372483968734741,grad_norm: 0.9999999725766066, iteration: 405251
loss: 1.0214481353759766,grad_norm: 0.8746908226756903, iteration: 405252
loss: 0.9607887864112854,grad_norm: 0.7452760999610294, iteration: 405253
loss: 0.9862627387046814,grad_norm: 0.850089919266436, iteration: 405254
loss: 1.0117113590240479,grad_norm: 0.6704520702956739, iteration: 405255
loss: 1.0185546875,grad_norm: 0.6748725829713812, iteration: 405256
loss: 1.0854110717773438,grad_norm: 0.9999989813914257, iteration: 405257
loss: 1.0462342500686646,grad_norm: 0.7773061491819253, iteration: 405258
loss: 0.9912973046302795,grad_norm: 0.7144172739740243, iteration: 405259
loss: 1.036149501800537,grad_norm: 0.8972092464538588, iteration: 405260
loss: 1.099915862083435,grad_norm: 0.9999996348677549, iteration: 405261
loss: 1.2610926628112793,grad_norm: 0.9999999624045497, iteration: 405262
loss: 1.0000133514404297,grad_norm: 0.8159023468754982, iteration: 405263
loss: 1.0412126779556274,grad_norm: 0.7639671741946749, iteration: 405264
loss: 1.0098494291305542,grad_norm: 0.8596011863126707, iteration: 405265
loss: 1.0432006120681763,grad_norm: 0.9999998774262138, iteration: 405266
loss: 0.9160791635513306,grad_norm: 0.7606295200898103, iteration: 405267
loss: 1.0147554874420166,grad_norm: 0.8257770623680826, iteration: 405268
loss: 1.0530909299850464,grad_norm: 0.8355836415523006, iteration: 405269
loss: 1.007530927658081,grad_norm: 0.8298135266493394, iteration: 405270
loss: 1.0737066268920898,grad_norm: 0.6979519528887976, iteration: 405271
loss: 0.9814390540122986,grad_norm: 0.7224770589975834, iteration: 405272
loss: 1.0082406997680664,grad_norm: 0.7962301939192573, iteration: 405273
loss: 1.0211650133132935,grad_norm: 0.7997109376520423, iteration: 405274
loss: 1.0075891017913818,grad_norm: 0.742412029397414, iteration: 405275
loss: 0.9560557007789612,grad_norm: 0.837649043266479, iteration: 405276
loss: 1.0464328527450562,grad_norm: 0.8352645678418288, iteration: 405277
loss: 1.0543243885040283,grad_norm: 0.9499806720851561, iteration: 405278
loss: 0.9780929684638977,grad_norm: 0.6587503864360353, iteration: 405279
loss: 1.0018596649169922,grad_norm: 0.9048980953775589, iteration: 405280
loss: 1.017420768737793,grad_norm: 0.9999994948723554, iteration: 405281
loss: 1.0044026374816895,grad_norm: 0.9999993616691583, iteration: 405282
loss: 1.1210120916366577,grad_norm: 0.9999997515539114, iteration: 405283
loss: 1.0065429210662842,grad_norm: 0.8291870424015005, iteration: 405284
loss: 0.9764072895050049,grad_norm: 0.6928906217970111, iteration: 405285
loss: 1.0200324058532715,grad_norm: 0.8901344648522664, iteration: 405286
loss: 1.054585337638855,grad_norm: 0.9999991785163023, iteration: 405287
loss: 1.0596705675125122,grad_norm: 0.7024844785610882, iteration: 405288
loss: 0.9894307851791382,grad_norm: 0.7741873623417148, iteration: 405289
loss: 0.9651533961296082,grad_norm: 0.8648231527845976, iteration: 405290
loss: 0.948906421661377,grad_norm: 0.8163379449873115, iteration: 405291
loss: 0.9937102198600769,grad_norm: 0.83813797660632, iteration: 405292
loss: 1.068538784980774,grad_norm: 0.7316846453676759, iteration: 405293
loss: 0.9702791571617126,grad_norm: 0.8087002593858499, iteration: 405294
loss: 1.0033680200576782,grad_norm: 0.735169441243943, iteration: 405295
loss: 0.9847001433372498,grad_norm: 0.7774172360913307, iteration: 405296
loss: 1.0174245834350586,grad_norm: 0.9999993523244629, iteration: 405297
loss: 1.0232937335968018,grad_norm: 0.8320444868207676, iteration: 405298
loss: 1.0381553173065186,grad_norm: 0.9999990632655196, iteration: 405299
loss: 1.089196801185608,grad_norm: 0.918827646078046, iteration: 405300
loss: 0.9988479614257812,grad_norm: 0.8215223717477199, iteration: 405301
loss: 1.0304112434387207,grad_norm: 0.927331459884904, iteration: 405302
loss: 1.0216221809387207,grad_norm: 0.8103692514832368, iteration: 405303
loss: 1.0059685707092285,grad_norm: 0.7370127652171803, iteration: 405304
loss: 0.9899781942367554,grad_norm: 0.6726253213663465, iteration: 405305
loss: 0.9706552624702454,grad_norm: 0.7566098501926236, iteration: 405306
loss: 0.9867632985115051,grad_norm: 0.9999990388639928, iteration: 405307
loss: 1.043282389640808,grad_norm: 0.7281418368037482, iteration: 405308
loss: 1.0102198123931885,grad_norm: 0.8439733610440845, iteration: 405309
loss: 1.0458834171295166,grad_norm: 0.784229051533149, iteration: 405310
loss: 1.1620911359786987,grad_norm: 0.9999999199896288, iteration: 405311
loss: 1.0049129724502563,grad_norm: 0.7991058704823744, iteration: 405312
loss: 0.9965614676475525,grad_norm: 0.8239625429700687, iteration: 405313
loss: 1.0031673908233643,grad_norm: 0.9815333036739161, iteration: 405314
loss: 1.0000420808792114,grad_norm: 0.777291537798878, iteration: 405315
loss: 1.0660501718521118,grad_norm: 0.8945973713472496, iteration: 405316
loss: 1.0091795921325684,grad_norm: 0.9423936038342235, iteration: 405317
loss: 0.9976350665092468,grad_norm: 0.8729937535329279, iteration: 405318
loss: 0.9704414010047913,grad_norm: 0.7618634052214132, iteration: 405319
loss: 1.0524442195892334,grad_norm: 0.9613226470872563, iteration: 405320
loss: 0.9855172038078308,grad_norm: 0.798478933101518, iteration: 405321
loss: 1.0045846700668335,grad_norm: 0.7948713643506797, iteration: 405322
loss: 1.0193997621536255,grad_norm: 0.787403491074579, iteration: 405323
loss: 1.0003873109817505,grad_norm: 0.7613107385001799, iteration: 405324
loss: 1.0449140071868896,grad_norm: 0.8404320940934232, iteration: 405325
loss: 1.0250695943832397,grad_norm: 0.8502645148024454, iteration: 405326
loss: 1.0319029092788696,grad_norm: 0.999999383294966, iteration: 405327
loss: 0.9861747026443481,grad_norm: 0.7790218814998676, iteration: 405328
loss: 0.9876166582107544,grad_norm: 0.6923363088735718, iteration: 405329
loss: 0.972465455532074,grad_norm: 0.777781269322824, iteration: 405330
loss: 1.0011979341506958,grad_norm: 0.8031696004992086, iteration: 405331
loss: 0.968781054019928,grad_norm: 0.7139408144255872, iteration: 405332
loss: 1.0103572607040405,grad_norm: 0.765634667806057, iteration: 405333
loss: 0.9725193381309509,grad_norm: 0.8303403488599626, iteration: 405334
loss: 0.9781157970428467,grad_norm: 0.841685701335465, iteration: 405335
loss: 0.9661178588867188,grad_norm: 0.9836753909950121, iteration: 405336
loss: 0.9532998204231262,grad_norm: 0.9999993933718528, iteration: 405337
loss: 1.0028177499771118,grad_norm: 0.7060391371741231, iteration: 405338
loss: 1.011218786239624,grad_norm: 0.6899377741817228, iteration: 405339
loss: 1.001939296722412,grad_norm: 0.8645350268827714, iteration: 405340
loss: 1.0736417770385742,grad_norm: 0.7958179159563179, iteration: 405341
loss: 0.9997029304504395,grad_norm: 0.785884502462103, iteration: 405342
loss: 1.0246460437774658,grad_norm: 0.7347282636418406, iteration: 405343
loss: 1.0275205373764038,grad_norm: 0.8812792041236507, iteration: 405344
loss: 0.9634562730789185,grad_norm: 0.7048963019092477, iteration: 405345
loss: 1.0350582599639893,grad_norm: 0.7563184155106822, iteration: 405346
loss: 1.0144965648651123,grad_norm: 0.7112322587728664, iteration: 405347
loss: 1.0199389457702637,grad_norm: 0.9724947398200529, iteration: 405348
loss: 1.0854389667510986,grad_norm: 0.9999991745757243, iteration: 405349
loss: 1.0076179504394531,grad_norm: 0.9207389778395976, iteration: 405350
loss: 1.0025149583816528,grad_norm: 0.8769342124958726, iteration: 405351
loss: 0.9571285247802734,grad_norm: 0.7202679882407292, iteration: 405352
loss: 0.9907398223876953,grad_norm: 0.968217020834581, iteration: 405353
loss: 0.974849283695221,grad_norm: 0.7633559584648627, iteration: 405354
loss: 1.0200772285461426,grad_norm: 0.8098788983469182, iteration: 405355
loss: 1.019390344619751,grad_norm: 0.9999999873731998, iteration: 405356
loss: 1.0353459119796753,grad_norm: 0.8579419340698129, iteration: 405357
loss: 1.0192646980285645,grad_norm: 0.7713925720422828, iteration: 405358
loss: 1.0182013511657715,grad_norm: 0.7647157490061692, iteration: 405359
loss: 0.976768434047699,grad_norm: 0.8081747089240847, iteration: 405360
loss: 1.0049761533737183,grad_norm: 0.7850066960868284, iteration: 405361
loss: 1.0138177871704102,grad_norm: 0.8770733412571756, iteration: 405362
loss: 0.9826337099075317,grad_norm: 0.728455847199056, iteration: 405363
loss: 1.0163044929504395,grad_norm: 0.8355338509507841, iteration: 405364
loss: 0.9884698987007141,grad_norm: 0.8357470958197226, iteration: 405365
loss: 1.0179381370544434,grad_norm: 0.8040580675603015, iteration: 405366
loss: 1.007797360420227,grad_norm: 0.999999860343857, iteration: 405367
loss: 0.9877059459686279,grad_norm: 0.8451984236685205, iteration: 405368
loss: 1.0759718418121338,grad_norm: 0.6609456346666462, iteration: 405369
loss: 0.9761800169944763,grad_norm: 0.8197802560795951, iteration: 405370
loss: 0.9879499077796936,grad_norm: 0.671235283671892, iteration: 405371
loss: 0.989323079586029,grad_norm: 0.7616384276694944, iteration: 405372
loss: 1.0272632837295532,grad_norm: 0.9999997653241809, iteration: 405373
loss: 0.9762255549430847,grad_norm: 0.8051477864874175, iteration: 405374
loss: 0.9829712510108948,grad_norm: 0.8023871129995815, iteration: 405375
loss: 0.9988266825675964,grad_norm: 0.7283538201170603, iteration: 405376
loss: 1.038031816482544,grad_norm: 0.9999990732831159, iteration: 405377
loss: 1.0214240550994873,grad_norm: 0.7266100816924987, iteration: 405378
loss: 0.9933182001113892,grad_norm: 0.720476279872135, iteration: 405379
loss: 0.9984532594680786,grad_norm: 0.6701809812719403, iteration: 405380
loss: 0.9771799445152283,grad_norm: 0.6748567200288655, iteration: 405381
loss: 0.9796651005744934,grad_norm: 0.7009376792782578, iteration: 405382
loss: 1.010071873664856,grad_norm: 0.7979482475843716, iteration: 405383
loss: 0.9676814079284668,grad_norm: 0.7843715663476494, iteration: 405384
loss: 0.9683967232704163,grad_norm: 0.747847602263753, iteration: 405385
loss: 1.0204936265945435,grad_norm: 0.8521785658177035, iteration: 405386
loss: 1.0097863674163818,grad_norm: 0.8449644192544302, iteration: 405387
loss: 1.0094951391220093,grad_norm: 0.9999991852584136, iteration: 405388
loss: 1.0380609035491943,grad_norm: 0.7074988007351273, iteration: 405389
loss: 0.9943398833274841,grad_norm: 0.7130444265433659, iteration: 405390
loss: 0.9799360036849976,grad_norm: 0.8948074337250633, iteration: 405391
loss: 1.095937967300415,grad_norm: 0.7925052853370048, iteration: 405392
loss: 0.9699659943580627,grad_norm: 0.7917268464302065, iteration: 405393
loss: 0.9604031443595886,grad_norm: 0.8974740706210045, iteration: 405394
loss: 0.9899640083312988,grad_norm: 0.9999994218817799, iteration: 405395
loss: 0.9902884364128113,grad_norm: 0.999999227977496, iteration: 405396
loss: 1.0393544435501099,grad_norm: 0.8162297446504475, iteration: 405397
loss: 1.0503087043762207,grad_norm: 0.9685434117150686, iteration: 405398
loss: 1.013818621635437,grad_norm: 0.9999998035736435, iteration: 405399
loss: 1.0129528045654297,grad_norm: 0.7673841662867643, iteration: 405400
loss: 0.9673580527305603,grad_norm: 0.6945153305372344, iteration: 405401
loss: 0.9935731887817383,grad_norm: 0.7771349033881411, iteration: 405402
loss: 1.0292996168136597,grad_norm: 0.9999999430698895, iteration: 405403
loss: 1.011687159538269,grad_norm: 0.7883944295128342, iteration: 405404
loss: 1.1046996116638184,grad_norm: 0.9999994296546891, iteration: 405405
loss: 1.0138033628463745,grad_norm: 0.7462236857548087, iteration: 405406
loss: 0.9587609171867371,grad_norm: 0.9999989588014766, iteration: 405407
loss: 0.9815534949302673,grad_norm: 0.7065162952429869, iteration: 405408
loss: 0.9943743944168091,grad_norm: 0.8480892628259042, iteration: 405409
loss: 1.0293861627578735,grad_norm: 0.952143989192456, iteration: 405410
loss: 1.0135127305984497,grad_norm: 0.7409908692365423, iteration: 405411
loss: 1.0323684215545654,grad_norm: 0.8466882012212668, iteration: 405412
loss: 0.969978928565979,grad_norm: 0.8255179008126822, iteration: 405413
loss: 0.9986241459846497,grad_norm: 0.9999997721879272, iteration: 405414
loss: 1.0351732969284058,grad_norm: 0.9411094406216891, iteration: 405415
loss: 1.038934588432312,grad_norm: 0.7116561601622589, iteration: 405416
loss: 1.0230486392974854,grad_norm: 0.9999997767247105, iteration: 405417
loss: 1.0096917152404785,grad_norm: 0.6695723654020727, iteration: 405418
loss: 0.9722332954406738,grad_norm: 0.7568557526731333, iteration: 405419
loss: 1.000841736793518,grad_norm: 0.7481601348400121, iteration: 405420
loss: 1.054651141166687,grad_norm: 0.9999992909288079, iteration: 405421
loss: 0.9870056509971619,grad_norm: 0.7721526800158657, iteration: 405422
loss: 1.0577287673950195,grad_norm: 0.9999996175969895, iteration: 405423
loss: 1.0367518663406372,grad_norm: 0.855720089327969, iteration: 405424
loss: 1.0069047212600708,grad_norm: 0.9999997369317067, iteration: 405425
loss: 1.0754859447479248,grad_norm: 0.9999998551010819, iteration: 405426
loss: 1.0025554895401,grad_norm: 0.7882268754008502, iteration: 405427
loss: 1.0722178220748901,grad_norm: 0.857556208691259, iteration: 405428
loss: 1.0167632102966309,grad_norm: 0.7633651890690739, iteration: 405429
loss: 1.014758586883545,grad_norm: 0.9999997239942282, iteration: 405430
loss: 0.9883266687393188,grad_norm: 0.9999992961025191, iteration: 405431
loss: 1.0082545280456543,grad_norm: 0.8938476732367486, iteration: 405432
loss: 1.0092390775680542,grad_norm: 0.7665730317307196, iteration: 405433
loss: 0.9979049563407898,grad_norm: 0.8029082422115059, iteration: 405434
loss: 1.0298317670822144,grad_norm: 0.6938546958498161, iteration: 405435
loss: 0.9795240163803101,grad_norm: 0.8710556661365614, iteration: 405436
loss: 1.0373414754867554,grad_norm: 0.9747732672757785, iteration: 405437
loss: 1.0238432884216309,grad_norm: 0.9999992286899734, iteration: 405438
loss: 1.0695618391036987,grad_norm: 0.9999999200752417, iteration: 405439
loss: 0.9825437664985657,grad_norm: 0.6946181681608191, iteration: 405440
loss: 1.0954903364181519,grad_norm: 0.7023383076694353, iteration: 405441
loss: 0.9913455843925476,grad_norm: 0.8305986762438717, iteration: 405442
loss: 0.9836395978927612,grad_norm: 0.7226935892380124, iteration: 405443
loss: 1.0198665857315063,grad_norm: 0.9060716487633076, iteration: 405444
loss: 1.1350840330123901,grad_norm: 0.8799498189212966, iteration: 405445
loss: 1.0122487545013428,grad_norm: 0.7777432378804523, iteration: 405446
loss: 1.0816644430160522,grad_norm: 0.9999997120647293, iteration: 405447
loss: 0.9863840937614441,grad_norm: 0.9999990879735331, iteration: 405448
loss: 0.999706506729126,grad_norm: 0.9999993733149739, iteration: 405449
loss: 1.0484131574630737,grad_norm: 0.9999999662594748, iteration: 405450
loss: 0.9838888049125671,grad_norm: 0.761979783948392, iteration: 405451
loss: 1.0009477138519287,grad_norm: 0.7516962679373762, iteration: 405452
loss: 1.2300758361816406,grad_norm: 0.9999992064854251, iteration: 405453
loss: 1.049288034439087,grad_norm: 0.9999992339906815, iteration: 405454
loss: 1.0162423849105835,grad_norm: 0.9999995200515034, iteration: 405455
loss: 1.015202522277832,grad_norm: 0.9018733382874857, iteration: 405456
loss: 0.9762579798698425,grad_norm: 0.9840657193986475, iteration: 405457
loss: 0.9548807740211487,grad_norm: 0.821064747659527, iteration: 405458
loss: 1.0069962739944458,grad_norm: 0.7878770219198346, iteration: 405459
loss: 1.0326645374298096,grad_norm: 0.960077484868645, iteration: 405460
loss: 1.0225601196289062,grad_norm: 0.8711067055681248, iteration: 405461
loss: 0.9586178660392761,grad_norm: 0.8722876860279593, iteration: 405462
loss: 1.0164310932159424,grad_norm: 0.9880322284121867, iteration: 405463
loss: 0.957908034324646,grad_norm: 0.7572355758564417, iteration: 405464
loss: 0.9532158970832825,grad_norm: 0.8343668867490411, iteration: 405465
loss: 1.006973147392273,grad_norm: 0.999999648200115, iteration: 405466
loss: 0.9849332571029663,grad_norm: 0.8732022516918573, iteration: 405467
loss: 1.1439192295074463,grad_norm: 0.9470181227770204, iteration: 405468
loss: 1.0510574579238892,grad_norm: 0.636373835040246, iteration: 405469
loss: 1.082118034362793,grad_norm: 0.999999920309818, iteration: 405470
loss: 1.040071725845337,grad_norm: 0.7716567602855814, iteration: 405471
loss: 0.9886146187782288,grad_norm: 0.9999989726150492, iteration: 405472
loss: 0.9941856861114502,grad_norm: 0.8029620348408985, iteration: 405473
loss: 1.0113445520401,grad_norm: 0.6426427038109749, iteration: 405474
loss: 0.9746325612068176,grad_norm: 0.7762447418464735, iteration: 405475
loss: 1.0050783157348633,grad_norm: 0.7045384505330697, iteration: 405476
loss: 1.0094091892242432,grad_norm: 0.7696518788787456, iteration: 405477
loss: 1.0379093885421753,grad_norm: 0.8296472355699348, iteration: 405478
loss: 0.9760386943817139,grad_norm: 0.8263853057338235, iteration: 405479
loss: 0.9965263605117798,grad_norm: 0.9999991168742416, iteration: 405480
loss: 1.042157530784607,grad_norm: 0.8747738681357774, iteration: 405481
loss: 1.0247334241867065,grad_norm: 0.9999995945101512, iteration: 405482
loss: 1.1358048915863037,grad_norm: 0.9999994479000439, iteration: 405483
loss: 1.0083765983581543,grad_norm: 0.9050934802921738, iteration: 405484
loss: 0.9949026107788086,grad_norm: 0.9999994077578824, iteration: 405485
loss: 0.9966518878936768,grad_norm: 0.9999999431145878, iteration: 405486
loss: 1.080417513847351,grad_norm: 0.9999999305709856, iteration: 405487
loss: 1.00279700756073,grad_norm: 0.9999990794636816, iteration: 405488
loss: 0.9730063080787659,grad_norm: 0.77309792775559, iteration: 405489
loss: 1.0157958269119263,grad_norm: 0.9999998829560282, iteration: 405490
loss: 0.9924129247665405,grad_norm: 0.8184392188089903, iteration: 405491
loss: 1.0065940618515015,grad_norm: 0.9084700428084707, iteration: 405492
loss: 1.0850985050201416,grad_norm: 0.9999999138252865, iteration: 405493
loss: 1.0680404901504517,grad_norm: 0.9999990548866999, iteration: 405494
loss: 0.9849057793617249,grad_norm: 0.6985065920884402, iteration: 405495
loss: 0.9819478988647461,grad_norm: 0.8323247058226858, iteration: 405496
loss: 1.0103703737258911,grad_norm: 0.7966341146089227, iteration: 405497
loss: 1.0762231349945068,grad_norm: 0.9999997763830917, iteration: 405498
loss: 1.0563981533050537,grad_norm: 0.8423394194693312, iteration: 405499
loss: 1.0116950273513794,grad_norm: 0.8308020679574812, iteration: 405500
loss: 1.0204739570617676,grad_norm: 0.9452323132880357, iteration: 405501
loss: 0.9813601970672607,grad_norm: 0.9999992491090813, iteration: 405502
loss: 0.9879453778266907,grad_norm: 0.8553273776585388, iteration: 405503
loss: 1.032317876815796,grad_norm: 0.9999991280714728, iteration: 405504
loss: 1.0405848026275635,grad_norm: 0.9297034314955979, iteration: 405505
loss: 1.030297875404358,grad_norm: 0.9999991135821932, iteration: 405506
loss: 0.9965178370475769,grad_norm: 0.8629904332216579, iteration: 405507
loss: 1.0131025314331055,grad_norm: 0.9999999605180039, iteration: 405508
loss: 1.0213242769241333,grad_norm: 0.770047261041215, iteration: 405509
loss: 1.0129014253616333,grad_norm: 0.83555097938974, iteration: 405510
loss: 1.0018922090530396,grad_norm: 0.8461928987281093, iteration: 405511
loss: 1.0105477571487427,grad_norm: 0.8529405444661134, iteration: 405512
loss: 1.0931886434555054,grad_norm: 0.9918777530615179, iteration: 405513
loss: 1.0149368047714233,grad_norm: 0.7898835972374776, iteration: 405514
loss: 1.0034250020980835,grad_norm: 0.9999991966677622, iteration: 405515
loss: 1.0479618310928345,grad_norm: 0.9999993235977491, iteration: 405516
loss: 1.021713376045227,grad_norm: 0.7615321091807068, iteration: 405517
loss: 1.0341827869415283,grad_norm: 0.9999990369257846, iteration: 405518
loss: 1.0406394004821777,grad_norm: 0.7276150220779022, iteration: 405519
loss: 1.0207154750823975,grad_norm: 0.8300386625000343, iteration: 405520
loss: 0.9680259227752686,grad_norm: 0.9256247176397938, iteration: 405521
loss: 0.9680459499359131,grad_norm: 0.7527928158921775, iteration: 405522
loss: 1.041285514831543,grad_norm: 0.8910729404868881, iteration: 405523
loss: 1.01392662525177,grad_norm: 0.7794540133484085, iteration: 405524
loss: 0.9951289296150208,grad_norm: 0.7371242072885275, iteration: 405525
loss: 0.9862828850746155,grad_norm: 0.7710787555095369, iteration: 405526
loss: 1.032645344734192,grad_norm: 0.9999989303327089, iteration: 405527
loss: 0.9759593605995178,grad_norm: 0.9999991796750196, iteration: 405528
loss: 1.260055422782898,grad_norm: 0.9999990719522674, iteration: 405529
loss: 0.9994667768478394,grad_norm: 0.8715924014592933, iteration: 405530
loss: 1.0047422647476196,grad_norm: 0.7259636816551893, iteration: 405531
loss: 0.9612113237380981,grad_norm: 0.8714868120976229, iteration: 405532
loss: 0.9970112442970276,grad_norm: 0.8924752711426258, iteration: 405533
loss: 1.0390321016311646,grad_norm: 0.8023415914010248, iteration: 405534
loss: 1.002266526222229,grad_norm: 0.8652770454550461, iteration: 405535
loss: 1.0296058654785156,grad_norm: 0.8369350679096806, iteration: 405536
loss: 0.9860140681266785,grad_norm: 0.8189095400951509, iteration: 405537
loss: 0.9867721199989319,grad_norm: 0.7848751695451945, iteration: 405538
loss: 1.0142732858657837,grad_norm: 0.9999994110106382, iteration: 405539
loss: 1.0018047094345093,grad_norm: 0.9392041409310028, iteration: 405540
loss: 1.02582848072052,grad_norm: 0.7490625196716187, iteration: 405541
loss: 0.9927040934562683,grad_norm: 0.7922913746411301, iteration: 405542
loss: 1.0692987442016602,grad_norm: 0.9436758451012309, iteration: 405543
loss: 1.0225681066513062,grad_norm: 1.0000000477350923, iteration: 405544
loss: 1.0116928815841675,grad_norm: 0.899033642986676, iteration: 405545
loss: 1.0086722373962402,grad_norm: 0.9756784197932767, iteration: 405546
loss: 0.9926482439041138,grad_norm: 0.716933802689752, iteration: 405547
loss: 1.0118993520736694,grad_norm: 0.9767715946972919, iteration: 405548
loss: 1.0616791248321533,grad_norm: 0.8174436638249044, iteration: 405549
loss: 0.9902226328849792,grad_norm: 0.7788422028393598, iteration: 405550
loss: 1.0255534648895264,grad_norm: 0.6565119776408416, iteration: 405551
loss: 0.9744501113891602,grad_norm: 0.8096193834355812, iteration: 405552
loss: 1.0234572887420654,grad_norm: 0.9999993060669431, iteration: 405553
loss: 1.003575325012207,grad_norm: 0.717548494004634, iteration: 405554
loss: 1.0058865547180176,grad_norm: 0.8208298629308538, iteration: 405555
loss: 1.00369393825531,grad_norm: 0.8756454249274439, iteration: 405556
loss: 1.0036470890045166,grad_norm: 0.7195698910786194, iteration: 405557
loss: 0.9866774082183838,grad_norm: 0.7612909046075212, iteration: 405558
loss: 1.0357666015625,grad_norm: 0.9999989841208883, iteration: 405559
loss: 1.0021984577178955,grad_norm: 0.805221206589798, iteration: 405560
loss: 0.9948333501815796,grad_norm: 0.9999994987318912, iteration: 405561
loss: 1.0217485427856445,grad_norm: 0.8186369841695835, iteration: 405562
loss: 1.0001896619796753,grad_norm: 0.7118634375112459, iteration: 405563
loss: 0.9605808854103088,grad_norm: 0.757897142934991, iteration: 405564
loss: 1.0210508108139038,grad_norm: 0.8655226264370726, iteration: 405565
loss: 1.0703009366989136,grad_norm: 0.8824792652921299, iteration: 405566
loss: 0.9990814328193665,grad_norm: 0.9999999900274331, iteration: 405567
loss: 0.9879025816917419,grad_norm: 0.9999992659293756, iteration: 405568
loss: 1.0456483364105225,grad_norm: 0.9453464404783757, iteration: 405569
loss: 0.939522385597229,grad_norm: 0.8364700552616735, iteration: 405570
loss: 1.002252221107483,grad_norm: 0.7801227094841753, iteration: 405571
loss: 1.0431677103042603,grad_norm: 0.8401666075110995, iteration: 405572
loss: 0.9976009130477905,grad_norm: 0.8323911175982145, iteration: 405573
loss: 1.0005501508712769,grad_norm: 0.8732109274792846, iteration: 405574
loss: 1.033382534980774,grad_norm: 0.6640419961944383, iteration: 405575
loss: 0.9846692085266113,grad_norm: 0.8796967911376831, iteration: 405576
loss: 1.089746117591858,grad_norm: 0.766891609627789, iteration: 405577
loss: 1.014423131942749,grad_norm: 0.9999996429473224, iteration: 405578
loss: 1.0095505714416504,grad_norm: 0.6735150287681398, iteration: 405579
loss: 0.9954767227172852,grad_norm: 0.9999996740666315, iteration: 405580
loss: 1.0193533897399902,grad_norm: 0.623892066296262, iteration: 405581
loss: 0.994862973690033,grad_norm: 0.9040485730942198, iteration: 405582
loss: 1.0217852592468262,grad_norm: 0.6493510174514684, iteration: 405583
loss: 0.9719983339309692,grad_norm: 0.6925634332099173, iteration: 405584
loss: 1.0081031322479248,grad_norm: 0.8620181827603973, iteration: 405585
loss: 0.9956326484680176,grad_norm: 0.7838568565199864, iteration: 405586
loss: 1.0097413063049316,grad_norm: 0.632276028299573, iteration: 405587
loss: 1.0007137060165405,grad_norm: 0.7446354822212083, iteration: 405588
loss: 0.980570912361145,grad_norm: 0.989970162447851, iteration: 405589
loss: 0.9784767627716064,grad_norm: 0.7609023890591623, iteration: 405590
loss: 1.0204886198043823,grad_norm: 0.69888212339385, iteration: 405591
loss: 0.9999123215675354,grad_norm: 0.7950121069083198, iteration: 405592
loss: 1.0041465759277344,grad_norm: 0.7129564361752305, iteration: 405593
loss: 0.9574013352394104,grad_norm: 0.7748719125911803, iteration: 405594
loss: 1.0024114847183228,grad_norm: 0.8436897089274676, iteration: 405595
loss: 0.9960520267486572,grad_norm: 0.7695347672396945, iteration: 405596
loss: 1.021446943283081,grad_norm: 0.9999996556527934, iteration: 405597
loss: 0.9951674342155457,grad_norm: 0.7817312684607545, iteration: 405598
loss: 1.0148847103118896,grad_norm: 0.9350286272621775, iteration: 405599
loss: 0.9761821031570435,grad_norm: 0.7766809626045206, iteration: 405600
loss: 0.9959012269973755,grad_norm: 0.6747155899306958, iteration: 405601
loss: 1.0146551132202148,grad_norm: 0.8148283520595567, iteration: 405602
loss: 1.0599321126937866,grad_norm: 0.9999989986211352, iteration: 405603
loss: 1.002629280090332,grad_norm: 0.9999992058402533, iteration: 405604
loss: 1.0165311098098755,grad_norm: 0.9999999271826632, iteration: 405605
loss: 0.9861692786216736,grad_norm: 0.9999998475816545, iteration: 405606
loss: 1.038187861442566,grad_norm: 0.9999991120325745, iteration: 405607
loss: 1.0313080549240112,grad_norm: 0.7572519787285438, iteration: 405608
loss: 1.1042605638504028,grad_norm: 0.8713369786242856, iteration: 405609
loss: 1.0677036046981812,grad_norm: 0.6806597604238285, iteration: 405610
loss: 0.9797422289848328,grad_norm: 0.8278257239427765, iteration: 405611
loss: 0.9375041127204895,grad_norm: 0.7649558500322746, iteration: 405612
loss: 1.1636967658996582,grad_norm: 0.9999998136162518, iteration: 405613
loss: 1.05177640914917,grad_norm: 0.9999998864666747, iteration: 405614
loss: 1.0004042387008667,grad_norm: 0.7330046877479859, iteration: 405615
loss: 1.0016262531280518,grad_norm: 0.7089325823199545, iteration: 405616
loss: 1.0853610038757324,grad_norm: 0.911635711644984, iteration: 405617
loss: 1.0157170295715332,grad_norm: 0.9999992584544212, iteration: 405618
loss: 1.0886411666870117,grad_norm: 0.9999995294441594, iteration: 405619
loss: 0.9617464542388916,grad_norm: 0.7128939325582777, iteration: 405620
loss: 0.9970074892044067,grad_norm: 0.6301089663065781, iteration: 405621
loss: 0.9498204588890076,grad_norm: 0.9420209160029402, iteration: 405622
loss: 0.9969959259033203,grad_norm: 0.6466393892620489, iteration: 405623
loss: 1.0082740783691406,grad_norm: 0.8490286424435323, iteration: 405624
loss: 0.962556779384613,grad_norm: 0.6450976152635447, iteration: 405625
loss: 1.0855823755264282,grad_norm: 0.9449080733350973, iteration: 405626
loss: 1.057175636291504,grad_norm: 0.9999993168086668, iteration: 405627
loss: 0.9972987174987793,grad_norm: 0.7536680210973886, iteration: 405628
loss: 0.9717776775360107,grad_norm: 0.7256598376175593, iteration: 405629
loss: 1.008479356765747,grad_norm: 0.7932248201887886, iteration: 405630
loss: 1.0610432624816895,grad_norm: 0.9999994652311837, iteration: 405631
loss: 1.0156127214431763,grad_norm: 0.9457566164760521, iteration: 405632
loss: 1.010134220123291,grad_norm: 0.7187409083061396, iteration: 405633
loss: 0.9755158424377441,grad_norm: 0.6229932635082596, iteration: 405634
loss: 1.1362063884735107,grad_norm: 0.9999990419428899, iteration: 405635
loss: 0.9938355088233948,grad_norm: 0.6615380280011007, iteration: 405636
loss: 1.0023869276046753,grad_norm: 0.8160929038733518, iteration: 405637
loss: 1.0777252912521362,grad_norm: 0.7864465316998547, iteration: 405638
loss: 0.9874612092971802,grad_norm: 0.7094852784875696, iteration: 405639
loss: 1.036667823791504,grad_norm: 0.8188106256902361, iteration: 405640
loss: 0.991316556930542,grad_norm: 0.7833382524609042, iteration: 405641
loss: 0.9854680895805359,grad_norm: 0.8474903217189037, iteration: 405642
loss: 0.9770767092704773,grad_norm: 0.944855843489585, iteration: 405643
loss: 0.9972319602966309,grad_norm: 0.7750533809512463, iteration: 405644
loss: 0.9946625828742981,grad_norm: 0.911647265413874, iteration: 405645
loss: 0.997675359249115,grad_norm: 0.7536528127919602, iteration: 405646
loss: 0.9642738699913025,grad_norm: 0.7533900169852891, iteration: 405647
loss: 1.0646673440933228,grad_norm: 0.9999996886271573, iteration: 405648
loss: 0.9919788837432861,grad_norm: 0.8470420389114315, iteration: 405649
loss: 0.9742088913917542,grad_norm: 0.6757323500713404, iteration: 405650
loss: 1.0139155387878418,grad_norm: 0.8238074553880818, iteration: 405651
loss: 1.0347269773483276,grad_norm: 0.9999992646084114, iteration: 405652
loss: 1.063995599746704,grad_norm: 0.9999992743221543, iteration: 405653
loss: 0.9661890864372253,grad_norm: 0.8191824928421814, iteration: 405654
loss: 1.0353617668151855,grad_norm: 0.9999993865295929, iteration: 405655
loss: 1.0198622941970825,grad_norm: 0.7247340644986711, iteration: 405656
loss: 1.0223355293273926,grad_norm: 0.8438377528988398, iteration: 405657
loss: 1.0469223260879517,grad_norm: 0.9999994721247046, iteration: 405658
loss: 1.0078014135360718,grad_norm: 0.705055058672986, iteration: 405659
loss: 0.9666802287101746,grad_norm: 0.8015228245747036, iteration: 405660
loss: 1.049835443496704,grad_norm: 0.8902902600778444, iteration: 405661
loss: 1.0133349895477295,grad_norm: 0.9440500760434637, iteration: 405662
loss: 1.1057387590408325,grad_norm: 0.8148880642038216, iteration: 405663
loss: 1.0026425123214722,grad_norm: 0.820822143953376, iteration: 405664
loss: 1.0328855514526367,grad_norm: 0.7106600172235781, iteration: 405665
loss: 0.9941716194152832,grad_norm: 0.7718272946276225, iteration: 405666
loss: 1.0103614330291748,grad_norm: 0.716354716551903, iteration: 405667
loss: 0.9980769753456116,grad_norm: 0.9999991118242575, iteration: 405668
loss: 1.0134127140045166,grad_norm: 0.903445325006871, iteration: 405669
loss: 1.015082597732544,grad_norm: 0.6734719266500274, iteration: 405670
loss: 0.9979632496833801,grad_norm: 0.9254553517343745, iteration: 405671
loss: 1.0191569328308105,grad_norm: 0.6646966357350359, iteration: 405672
loss: 1.055137038230896,grad_norm: 0.8396861043761137, iteration: 405673
loss: 1.04340398311615,grad_norm: 0.9999993419777374, iteration: 405674
loss: 1.0113070011138916,grad_norm: 0.9262018893520796, iteration: 405675
loss: 1.000217318534851,grad_norm: 0.5956174386659953, iteration: 405676
loss: 1.0190263986587524,grad_norm: 0.7265464017313636, iteration: 405677
loss: 1.0215152502059937,grad_norm: 0.8830195914026353, iteration: 405678
loss: 1.0079714059829712,grad_norm: 0.6230812452448594, iteration: 405679
loss: 0.9975308775901794,grad_norm: 0.9760132455979923, iteration: 405680
loss: 1.0002330541610718,grad_norm: 0.9612678813693342, iteration: 405681
loss: 0.9937275052070618,grad_norm: 0.9054131329461077, iteration: 405682
loss: 1.024364709854126,grad_norm: 0.8823601764008225, iteration: 405683
loss: 1.0012377500534058,grad_norm: 0.7175562934564051, iteration: 405684
loss: 1.0307705402374268,grad_norm: 0.9999998958287236, iteration: 405685
loss: 0.9948784708976746,grad_norm: 0.9999998924054412, iteration: 405686
loss: 1.2701655626296997,grad_norm: 0.9999998940932368, iteration: 405687
loss: 1.0202556848526,grad_norm: 0.8562158835898344, iteration: 405688
loss: 1.037518858909607,grad_norm: 0.9999991326420431, iteration: 405689
loss: 1.0044927597045898,grad_norm: 0.7893000591469805, iteration: 405690
loss: 1.0660364627838135,grad_norm: 0.9999992962560487, iteration: 405691
loss: 0.9856745600700378,grad_norm: 0.9999992289513197, iteration: 405692
loss: 0.9674992561340332,grad_norm: 0.7568922386800582, iteration: 405693
loss: 0.9961364269256592,grad_norm: 0.5943118390179962, iteration: 405694
loss: 0.9737736582756042,grad_norm: 0.7776116301757009, iteration: 405695
loss: 0.9486637115478516,grad_norm: 0.8951830982031934, iteration: 405696
loss: 0.997391939163208,grad_norm: 0.7791659960643624, iteration: 405697
loss: 0.9580361843109131,grad_norm: 0.7308794127864111, iteration: 405698
loss: 1.0601168870925903,grad_norm: 0.9999994471893431, iteration: 405699
loss: 1.106801152229309,grad_norm: 0.9999993087851837, iteration: 405700
loss: 1.0022586584091187,grad_norm: 0.9615173727692442, iteration: 405701
loss: 0.9964092969894409,grad_norm: 0.999999705960474, iteration: 405702
loss: 1.0487425327301025,grad_norm: 0.9999991486723746, iteration: 405703
loss: 1.0451260805130005,grad_norm: 0.9999999572537219, iteration: 405704
loss: 0.9914607405662537,grad_norm: 0.837924255203396, iteration: 405705
loss: 0.9990995526313782,grad_norm: 0.7789556405424823, iteration: 405706
loss: 1.0376479625701904,grad_norm: 0.7133519712435161, iteration: 405707
loss: 1.058326244354248,grad_norm: 0.7194723881682851, iteration: 405708
loss: 1.0584810972213745,grad_norm: 0.9476077233155326, iteration: 405709
loss: 1.0326279401779175,grad_norm: 0.7685947516984994, iteration: 405710
loss: 1.1027430295944214,grad_norm: 0.7124494003333453, iteration: 405711
loss: 1.0330190658569336,grad_norm: 0.999999654180222, iteration: 405712
loss: 1.0426729917526245,grad_norm: 0.999999695058726, iteration: 405713
loss: 1.0754629373550415,grad_norm: 0.8393152363917811, iteration: 405714
loss: 1.0022590160369873,grad_norm: 0.9999994576429805, iteration: 405715
loss: 1.0578579902648926,grad_norm: 0.9999998235650794, iteration: 405716
loss: 1.0320544242858887,grad_norm: 0.9999990749756713, iteration: 405717
loss: 1.0546678304672241,grad_norm: 0.9966534941369707, iteration: 405718
loss: 0.9850170016288757,grad_norm: 0.8594812624324448, iteration: 405719
loss: 1.1283925771713257,grad_norm: 0.9999991803267964, iteration: 405720
loss: 1.0234088897705078,grad_norm: 0.9999990973584302, iteration: 405721
loss: 0.9693177938461304,grad_norm: 0.896833254607525, iteration: 405722
loss: 1.1476974487304688,grad_norm: 0.9999998332623092, iteration: 405723
loss: 0.9864634871482849,grad_norm: 0.7589042275031196, iteration: 405724
loss: 1.0013484954833984,grad_norm: 0.7301689009908701, iteration: 405725
loss: 1.025536298751831,grad_norm: 0.8045220881623527, iteration: 405726
loss: 1.0005825757980347,grad_norm: 0.7199327760933908, iteration: 405727
loss: 1.0089424848556519,grad_norm: 0.9999997994895216, iteration: 405728
loss: 1.0923104286193848,grad_norm: 0.7987380140301753, iteration: 405729
loss: 1.0303314924240112,grad_norm: 0.7531654884714529, iteration: 405730
loss: 1.0236589908599854,grad_norm: 0.8246719767380039, iteration: 405731
loss: 1.0109363794326782,grad_norm: 0.7609685459187315, iteration: 405732
loss: 1.0227941274642944,grad_norm: 0.8546496415397384, iteration: 405733
loss: 1.0431084632873535,grad_norm: 0.8855591589939171, iteration: 405734
loss: 1.046697735786438,grad_norm: 0.7668569741199615, iteration: 405735
loss: 0.9849393367767334,grad_norm: 0.9999996503133199, iteration: 405736
loss: 1.1386843919754028,grad_norm: 0.9999991562212374, iteration: 405737
loss: 1.024991512298584,grad_norm: 0.938654165579265, iteration: 405738
loss: 1.0369484424591064,grad_norm: 0.7157254716435665, iteration: 405739
loss: 1.5244728326797485,grad_norm: 0.9999995822668358, iteration: 405740
loss: 1.042207956314087,grad_norm: 0.9484596245110671, iteration: 405741
loss: 0.9676021933555603,grad_norm: 0.9999994015663772, iteration: 405742
loss: 0.9916201829910278,grad_norm: 0.999999447374901, iteration: 405743
loss: 1.0242289304733276,grad_norm: 0.7773631578697301, iteration: 405744
loss: 1.176016092300415,grad_norm: 0.99999968090147, iteration: 405745
loss: 0.9930210113525391,grad_norm: 0.9890065130044706, iteration: 405746
loss: 0.9852979183197021,grad_norm: 0.7319502736371901, iteration: 405747
loss: 1.025926947593689,grad_norm: 0.7457207536953274, iteration: 405748
loss: 1.070551872253418,grad_norm: 0.8570712069989868, iteration: 405749
loss: 1.0561829805374146,grad_norm: 0.9042685477094389, iteration: 405750
loss: 1.067273497581482,grad_norm: 0.9999998466348623, iteration: 405751
loss: 1.0011374950408936,grad_norm: 0.6363814184059533, iteration: 405752
loss: 1.0040984153747559,grad_norm: 0.9999993440212022, iteration: 405753
loss: 1.0269626379013062,grad_norm: 0.7280780055352712, iteration: 405754
loss: 0.9437695741653442,grad_norm: 0.7303143565811047, iteration: 405755
loss: 0.9770097136497498,grad_norm: 0.8162975522700977, iteration: 405756
loss: 1.0732578039169312,grad_norm: 0.9999998276163778, iteration: 405757
loss: 1.059869647026062,grad_norm: 0.9347531636944729, iteration: 405758
loss: 1.084287405014038,grad_norm: 0.8100564207313361, iteration: 405759
loss: 1.0792878866195679,grad_norm: 0.9999996266657639, iteration: 405760
loss: 0.967383623123169,grad_norm: 0.7628114469492167, iteration: 405761
loss: 1.0303246974945068,grad_norm: 0.8871650790435012, iteration: 405762
loss: 1.1271026134490967,grad_norm: 0.9999993776897514, iteration: 405763
loss: 1.0028733015060425,grad_norm: 0.8481943218679672, iteration: 405764
loss: 1.030562400817871,grad_norm: 0.8892398824504042, iteration: 405765
loss: 1.0208556652069092,grad_norm: 0.9156577468653636, iteration: 405766
loss: 1.058152198791504,grad_norm: 0.9999999415620717, iteration: 405767
loss: 1.0618236064910889,grad_norm: 0.836221376150975, iteration: 405768
loss: 1.0418641567230225,grad_norm: 0.8755530888211438, iteration: 405769
loss: 1.0520105361938477,grad_norm: 0.8363625630166468, iteration: 405770
loss: 1.1333290338516235,grad_norm: 1.0000000102189335, iteration: 405771
loss: 1.0887610912322998,grad_norm: 0.9304525071254047, iteration: 405772
loss: 1.0411254167556763,grad_norm: 0.7248174007249139, iteration: 405773
loss: 1.0335015058517456,grad_norm: 0.9999991607766956, iteration: 405774
loss: 1.0148435831069946,grad_norm: 0.742199480908564, iteration: 405775
loss: 1.0445401668548584,grad_norm: 0.9999997449093203, iteration: 405776
loss: 1.0027765035629272,grad_norm: 0.9999996048018335, iteration: 405777
loss: 1.0940492153167725,grad_norm: 0.9999991359798236, iteration: 405778
loss: 0.9666716456413269,grad_norm: 0.8002791937642488, iteration: 405779
loss: 1.067970871925354,grad_norm: 0.999999514045389, iteration: 405780
loss: 1.039651870727539,grad_norm: 0.9808428948447604, iteration: 405781
loss: 1.0170763731002808,grad_norm: 0.8936493108625317, iteration: 405782
loss: 1.046947717666626,grad_norm: 0.9999992422438743, iteration: 405783
loss: 1.1332502365112305,grad_norm: 0.9254471371863391, iteration: 405784
loss: 1.101531982421875,grad_norm: 0.7615528037619907, iteration: 405785
loss: 1.1317459344863892,grad_norm: 0.9999997091517252, iteration: 405786
loss: 1.0128273963928223,grad_norm: 0.8144620811896297, iteration: 405787
loss: 0.9893248677253723,grad_norm: 0.8251101385437954, iteration: 405788
loss: 0.9476807117462158,grad_norm: 0.7119435203674961, iteration: 405789
loss: 1.0945795774459839,grad_norm: 0.9999999521545744, iteration: 405790
loss: 1.0876439809799194,grad_norm: 0.999999359806461, iteration: 405791
loss: 1.1082156896591187,grad_norm: 0.9999995510693187, iteration: 405792
loss: 1.0398616790771484,grad_norm: 0.9999991173558075, iteration: 405793
loss: 1.0020114183425903,grad_norm: 0.8769466810529789, iteration: 405794
loss: 1.0166643857955933,grad_norm: 0.773573541125748, iteration: 405795
loss: 1.122528314590454,grad_norm: 0.9999991364631325, iteration: 405796
loss: 1.0943784713745117,grad_norm: 0.9999995438869478, iteration: 405797
loss: 1.11599862575531,grad_norm: 0.9398550522378422, iteration: 405798
loss: 1.05585515499115,grad_norm: 0.9252753949617526, iteration: 405799
loss: 1.233070731163025,grad_norm: 0.999999725139868, iteration: 405800
loss: 1.1433067321777344,grad_norm: 0.9999991393602066, iteration: 405801
loss: 1.0529427528381348,grad_norm: 0.9999995050477222, iteration: 405802
loss: 1.0789504051208496,grad_norm: 0.7092645071992936, iteration: 405803
loss: 1.026180624961853,grad_norm: 1.0000000286761146, iteration: 405804
loss: 1.0527597665786743,grad_norm: 0.8443172898359019, iteration: 405805
loss: 1.0230815410614014,grad_norm: 0.7829880795062765, iteration: 405806
loss: 1.032503604888916,grad_norm: 0.9091791467244634, iteration: 405807
loss: 1.120206356048584,grad_norm: 0.9999991076276789, iteration: 405808
loss: 1.1213035583496094,grad_norm: 0.999999900302322, iteration: 405809
loss: 0.9979721307754517,grad_norm: 0.716607378049562, iteration: 405810
loss: 0.974841296672821,grad_norm: 0.7275929967818984, iteration: 405811
loss: 1.0319230556488037,grad_norm: 0.9999993522993949, iteration: 405812
loss: 1.0780954360961914,grad_norm: 0.999999376326633, iteration: 405813
loss: 0.9848371148109436,grad_norm: 0.9642307255132546, iteration: 405814
loss: 0.953731894493103,grad_norm: 0.7387889736399988, iteration: 405815
loss: 1.0557231903076172,grad_norm: 0.8866622626503012, iteration: 405816
loss: 1.0327882766723633,grad_norm: 0.8473673887114079, iteration: 405817
loss: 1.142508625984192,grad_norm: 0.9999991539749383, iteration: 405818
loss: 1.0183407068252563,grad_norm: 0.6601400864053455, iteration: 405819
loss: 1.1200965642929077,grad_norm: 0.8060199946845994, iteration: 405820
loss: 1.123284101486206,grad_norm: 0.9999997126518112, iteration: 405821
loss: 1.026795506477356,grad_norm: 0.8673317292528316, iteration: 405822
loss: 1.1045185327529907,grad_norm: 0.9999995232715738, iteration: 405823
loss: 1.09218430519104,grad_norm: 0.9228777610979807, iteration: 405824
loss: 0.9833263158798218,grad_norm: 0.7812913239880013, iteration: 405825
loss: 1.0607863664627075,grad_norm: 0.7914331973190403, iteration: 405826
loss: 1.1436318159103394,grad_norm: 0.9999990328073421, iteration: 405827
loss: 1.0302116870880127,grad_norm: 0.9175986117331861, iteration: 405828
loss: 1.0840331315994263,grad_norm: 0.9999993030068242, iteration: 405829
loss: 1.1307318210601807,grad_norm: 0.9999995157714912, iteration: 405830
loss: 1.0176048278808594,grad_norm: 0.7259945119198238, iteration: 405831
loss: 0.9648984670639038,grad_norm: 0.9039194541437259, iteration: 405832
loss: 1.0297707319259644,grad_norm: 0.7864593016770505, iteration: 405833
loss: 1.0439207553863525,grad_norm: 0.7532453329253284, iteration: 405834
loss: 1.0863585472106934,grad_norm: 0.9999994951032926, iteration: 405835
loss: 1.0903244018554688,grad_norm: 0.9999991206503622, iteration: 405836
loss: 1.0186275243759155,grad_norm: 0.7705531980415559, iteration: 405837
loss: 1.1232661008834839,grad_norm: 0.9176976111775823, iteration: 405838
loss: 1.0353225469589233,grad_norm: 0.900408232948954, iteration: 405839
loss: 1.076386570930481,grad_norm: 0.8522171613907036, iteration: 405840
loss: 1.0532318353652954,grad_norm: 0.8793980151278872, iteration: 405841
loss: 1.0417895317077637,grad_norm: 0.9999998304966682, iteration: 405842
loss: 1.0947062969207764,grad_norm: 0.8857510526671402, iteration: 405843
loss: 1.047799825668335,grad_norm: 0.9999998469019353, iteration: 405844
loss: 0.9993011951446533,grad_norm: 0.7194147384231181, iteration: 405845
loss: 1.034532070159912,grad_norm: 0.7921937200247055, iteration: 405846
loss: 0.9530739784240723,grad_norm: 0.6966634023959971, iteration: 405847
loss: 1.1180883646011353,grad_norm: 0.7971066680174406, iteration: 405848
loss: 1.091691255569458,grad_norm: 0.9534619297974058, iteration: 405849
loss: 1.1022213697433472,grad_norm: 0.9219892003729143, iteration: 405850
loss: 1.1029702425003052,grad_norm: 0.9120641993333167, iteration: 405851
loss: 1.0605595111846924,grad_norm: 0.7722218630310065, iteration: 405852
loss: 1.0348272323608398,grad_norm: 0.9999994491079588, iteration: 405853
loss: 1.016880989074707,grad_norm: 0.7218008573539595, iteration: 405854
loss: 1.013087511062622,grad_norm: 0.9999990832412442, iteration: 405855
loss: 1.0242832899093628,grad_norm: 0.7313038289012107, iteration: 405856
loss: 1.1447501182556152,grad_norm: 0.999999914006318, iteration: 405857
loss: 1.017322301864624,grad_norm: 0.9866803958293527, iteration: 405858
loss: 1.1140254735946655,grad_norm: 0.9999991248571776, iteration: 405859
loss: 1.115914225578308,grad_norm: 0.9999995106641747, iteration: 405860
loss: 1.0927003622055054,grad_norm: 0.728264484051135, iteration: 405861
loss: 1.2268344163894653,grad_norm: 0.9999999059518436, iteration: 405862
loss: 1.0528067350387573,grad_norm: 0.876434004667632, iteration: 405863
loss: 1.0608211755752563,grad_norm: 0.8708098042093583, iteration: 405864
loss: 1.1205432415008545,grad_norm: 0.9999990304349264, iteration: 405865
loss: 0.9851852655410767,grad_norm: 0.7245016206475756, iteration: 405866
loss: 1.0059815645217896,grad_norm: 0.8407885239987176, iteration: 405867
loss: 1.0093046426773071,grad_norm: 0.695139677445476, iteration: 405868
loss: 1.0276987552642822,grad_norm: 0.9999990597784488, iteration: 405869
loss: 0.981295645236969,grad_norm: 0.9999991088315797, iteration: 405870
loss: 1.1302485466003418,grad_norm: 0.9999995052773422, iteration: 405871
loss: 1.1275609731674194,grad_norm: 0.7471644436136221, iteration: 405872
loss: 0.993255615234375,grad_norm: 0.9392633872779149, iteration: 405873
loss: 1.1585694551467896,grad_norm: 0.9999994163886433, iteration: 405874
loss: 1.087543249130249,grad_norm: 1.0000000331967882, iteration: 405875
loss: 1.0827510356903076,grad_norm: 0.9999997745789616, iteration: 405876
loss: 0.9727862477302551,grad_norm: 0.7673799076658746, iteration: 405877
loss: 0.9671432971954346,grad_norm: 0.9999988648450377, iteration: 405878
loss: 1.0230817794799805,grad_norm: 0.8569267935125184, iteration: 405879
loss: 1.0963813066482544,grad_norm: 0.9567757650941404, iteration: 405880
loss: 1.0126711130142212,grad_norm: 0.9999991932545803, iteration: 405881
loss: 0.9838616251945496,grad_norm: 0.6100530897473616, iteration: 405882
loss: 1.0252071619033813,grad_norm: 0.8519345982526983, iteration: 405883
loss: 1.0583542585372925,grad_norm: 0.9999998593790241, iteration: 405884
loss: 1.0129610300064087,grad_norm: 0.9862607174768644, iteration: 405885
loss: 1.00948965549469,grad_norm: 0.9999996853669604, iteration: 405886
loss: 1.038407802581787,grad_norm: 0.9466125149018144, iteration: 405887
loss: 1.0990217924118042,grad_norm: 0.9999995037191917, iteration: 405888
loss: 1.155173659324646,grad_norm: 0.9999996803831607, iteration: 405889
loss: 1.0432342290878296,grad_norm: 0.8373340906991474, iteration: 405890
loss: 0.9935563802719116,grad_norm: 0.9050801120042417, iteration: 405891
loss: 1.0787826776504517,grad_norm: 0.8980909047301243, iteration: 405892
loss: 1.024656891822815,grad_norm: 0.8316553851109262, iteration: 405893
loss: 1.0297130346298218,grad_norm: 0.7505185668565807, iteration: 405894
loss: 1.0407849550247192,grad_norm: 0.7903786725466596, iteration: 405895
loss: 1.0335290431976318,grad_norm: 0.7343479164838962, iteration: 405896
loss: 1.0239689350128174,grad_norm: 0.9999998516701119, iteration: 405897
loss: 1.191149353981018,grad_norm: 0.9999996195572582, iteration: 405898
loss: 1.0368430614471436,grad_norm: 0.7726627965790447, iteration: 405899
loss: 1.0416240692138672,grad_norm: 0.95025897573019, iteration: 405900
loss: 1.1051092147827148,grad_norm: 0.9999993904200922, iteration: 405901
loss: 1.1036229133605957,grad_norm: 0.9999999203691733, iteration: 405902
loss: 1.0175808668136597,grad_norm: 0.773087389436826, iteration: 405903
loss: 1.0066019296646118,grad_norm: 0.6824315904314108, iteration: 405904
loss: 1.0569976568222046,grad_norm: 0.7957905018225018, iteration: 405905
loss: 0.99642014503479,grad_norm: 0.9999993346016688, iteration: 405906
loss: 1.0117955207824707,grad_norm: 0.9416059784681875, iteration: 405907
loss: 0.9937199950218201,grad_norm: 0.7447326253841591, iteration: 405908
loss: 0.9761638045310974,grad_norm: 0.9999992819549045, iteration: 405909
loss: 1.0717573165893555,grad_norm: 0.9999992464047459, iteration: 405910
loss: 1.0665266513824463,grad_norm: 0.8323563563151057, iteration: 405911
loss: 1.0756347179412842,grad_norm: 0.911107525333358, iteration: 405912
loss: 1.0688689947128296,grad_norm: 0.9999999002187667, iteration: 405913
loss: 1.081528663635254,grad_norm: 0.9247250296268194, iteration: 405914
loss: 1.0284212827682495,grad_norm: 0.7876666651073064, iteration: 405915
loss: 1.056426763534546,grad_norm: 1.0000000078071347, iteration: 405916
loss: 1.0161843299865723,grad_norm: 0.8512354430809297, iteration: 405917
loss: 1.046281099319458,grad_norm: 0.9999997127044037, iteration: 405918
loss: 0.9967445731163025,grad_norm: 0.7137289663389161, iteration: 405919
loss: 1.0328706502914429,grad_norm: 0.783635309673985, iteration: 405920
loss: 1.0101723670959473,grad_norm: 0.9999999325914708, iteration: 405921
loss: 1.023127555847168,grad_norm: 0.8530549910424233, iteration: 405922
loss: 1.0626938343048096,grad_norm: 0.9999998881306766, iteration: 405923
loss: 0.9960511326789856,grad_norm: 0.8723468098988406, iteration: 405924
loss: 0.9983872771263123,grad_norm: 0.9999999606410443, iteration: 405925
loss: 1.0690324306488037,grad_norm: 0.9999990633016816, iteration: 405926
loss: 0.9619688391685486,grad_norm: 0.8428871478586895, iteration: 405927
loss: 1.0087250471115112,grad_norm: 0.9999998730351864, iteration: 405928
loss: 1.0964516401290894,grad_norm: 0.9999990913432895, iteration: 405929
loss: 1.073662281036377,grad_norm: 0.9999990525505528, iteration: 405930
loss: 1.0722301006317139,grad_norm: 0.9999995275510423, iteration: 405931
loss: 1.0301289558410645,grad_norm: 0.7972602272478284, iteration: 405932
loss: 1.0445270538330078,grad_norm: 0.9999991353543023, iteration: 405933
loss: 1.0508652925491333,grad_norm: 0.6836584486428232, iteration: 405934
loss: 1.0176939964294434,grad_norm: 0.9999998376833887, iteration: 405935
loss: 1.0180895328521729,grad_norm: 0.9041572575625031, iteration: 405936
loss: 1.0491933822631836,grad_norm: 0.7713740233227214, iteration: 405937
loss: 1.0292271375656128,grad_norm: 0.9999995987387111, iteration: 405938
loss: 1.045038104057312,grad_norm: 0.7736734289719921, iteration: 405939
loss: 0.9867494702339172,grad_norm: 0.9460383058659997, iteration: 405940
loss: 1.1150153875350952,grad_norm: 0.9230391965667802, iteration: 405941
loss: 1.0245639085769653,grad_norm: 0.9620931460027364, iteration: 405942
loss: 1.0464823246002197,grad_norm: 0.949799984234565, iteration: 405943
loss: 0.9937271475791931,grad_norm: 0.999999011868459, iteration: 405944
loss: 1.0075485706329346,grad_norm: 0.9999996254442676, iteration: 405945
loss: 1.1316032409667969,grad_norm: 0.9999995791130231, iteration: 405946
loss: 1.0202885866165161,grad_norm: 0.9999994962882007, iteration: 405947
loss: 1.114574670791626,grad_norm: 0.9999999526627223, iteration: 405948
loss: 1.0339149236679077,grad_norm: 0.9999995808770881, iteration: 405949
loss: 1.0039451122283936,grad_norm: 0.6536014696896876, iteration: 405950
loss: 0.9962132573127747,grad_norm: 0.7917489112646867, iteration: 405951
loss: 1.0019454956054688,grad_norm: 0.6903739706503731, iteration: 405952
loss: 1.0213860273361206,grad_norm: 0.8268815817471017, iteration: 405953
loss: 1.0693142414093018,grad_norm: 0.818381462369229, iteration: 405954
loss: 1.1178683042526245,grad_norm: 0.9516628683186827, iteration: 405955
loss: 1.0775065422058105,grad_norm: 0.999999569368156, iteration: 405956
loss: 0.9871698021888733,grad_norm: 0.863022317975126, iteration: 405957
loss: 1.0645630359649658,grad_norm: 0.9999992012944836, iteration: 405958
loss: 1.0911307334899902,grad_norm: 0.9999997135081105, iteration: 405959
loss: 1.0200355052947998,grad_norm: 0.7764105877633968, iteration: 405960
loss: 1.0337599515914917,grad_norm: 0.9999997889139028, iteration: 405961
loss: 1.1917879581451416,grad_norm: 0.9999999515603605, iteration: 405962
loss: 1.0458168983459473,grad_norm: 0.9999992449638156, iteration: 405963
loss: 1.0229767560958862,grad_norm: 0.7933905034909179, iteration: 405964
loss: 0.9683757424354553,grad_norm: 0.6719310614646836, iteration: 405965
loss: 1.0439006090164185,grad_norm: 0.9553047772295915, iteration: 405966
loss: 0.9907520413398743,grad_norm: 0.9225693920359221, iteration: 405967
loss: 1.0466656684875488,grad_norm: 0.9999998717925147, iteration: 405968
loss: 1.0333528518676758,grad_norm: 0.8335747352067754, iteration: 405969
loss: 0.9852421283721924,grad_norm: 0.8071019903905111, iteration: 405970
loss: 1.044069766998291,grad_norm: 0.7309394865143244, iteration: 405971
loss: 1.022119164466858,grad_norm: 0.8391575074084459, iteration: 405972
loss: 0.9858300685882568,grad_norm: 0.6917251661991086, iteration: 405973
loss: 1.0079823732376099,grad_norm: 0.7546903876831567, iteration: 405974
loss: 1.086349606513977,grad_norm: 0.8099368671972169, iteration: 405975
loss: 1.058305025100708,grad_norm: 0.9999991910817361, iteration: 405976
loss: 0.9725130796432495,grad_norm: 0.7485807298685797, iteration: 405977
loss: 0.9463514685630798,grad_norm: 0.7838894010895789, iteration: 405978
loss: 1.0759834051132202,grad_norm: 0.6791177518869376, iteration: 405979
loss: 1.0534052848815918,grad_norm: 0.9999990762634073, iteration: 405980
loss: 1.0065659284591675,grad_norm: 0.850935978748986, iteration: 405981
loss: 1.0042424201965332,grad_norm: 0.9465470131957099, iteration: 405982
loss: 1.0013459920883179,grad_norm: 0.7650369675072983, iteration: 405983
loss: 1.015255331993103,grad_norm: 0.9665104021828049, iteration: 405984
loss: 1.0177499055862427,grad_norm: 0.9437536567515195, iteration: 405985
loss: 1.000009298324585,grad_norm: 0.611277199756428, iteration: 405986
loss: 1.0363643169403076,grad_norm: 0.9999998626489663, iteration: 405987
loss: 0.9974159002304077,grad_norm: 0.8275542682352041, iteration: 405988
loss: 1.1536157131195068,grad_norm: 0.9999996456760984, iteration: 405989
loss: 1.008350133895874,grad_norm: 0.6720301101168367, iteration: 405990
loss: 1.0664945840835571,grad_norm: 0.9797222215990591, iteration: 405991
loss: 1.0232187509536743,grad_norm: 0.8170244418198817, iteration: 405992
loss: 1.0399771928787231,grad_norm: 0.757952634917863, iteration: 405993
loss: 1.009335994720459,grad_norm: 0.8030289482910089, iteration: 405994
loss: 0.9808666706085205,grad_norm: 0.7484578195226307, iteration: 405995
loss: 0.9991003274917603,grad_norm: 0.9999992717212063, iteration: 405996
loss: 1.0091071128845215,grad_norm: 0.6923522745611671, iteration: 405997
loss: 1.012250542640686,grad_norm: 0.7631788323043496, iteration: 405998
loss: 1.0070725679397583,grad_norm: 0.8530799875369723, iteration: 405999
loss: 0.9803345799446106,grad_norm: 0.812301409999469, iteration: 406000
loss: 1.0208615064620972,grad_norm: 0.8395262519083376, iteration: 406001
loss: 0.9722243547439575,grad_norm: 0.9241910319721708, iteration: 406002
loss: 1.0354242324829102,grad_norm: 0.9999996589066663, iteration: 406003
loss: 0.9924764633178711,grad_norm: 0.7844380736926663, iteration: 406004
loss: 1.0099366903305054,grad_norm: 0.7562902922096466, iteration: 406005
loss: 1.0203567743301392,grad_norm: 0.7771976498014178, iteration: 406006
loss: 0.9813302159309387,grad_norm: 0.7134851616017668, iteration: 406007
loss: 1.0309274196624756,grad_norm: 0.9503061484815736, iteration: 406008
loss: 1.0511212348937988,grad_norm: 0.9999998939105774, iteration: 406009
loss: 1.0164545774459839,grad_norm: 0.9999993188343395, iteration: 406010
loss: 0.9880666136741638,grad_norm: 0.7879787064111895, iteration: 406011
loss: 1.0318777561187744,grad_norm: 0.8862072928063253, iteration: 406012
loss: 1.0132877826690674,grad_norm: 0.9999995660684026, iteration: 406013
loss: 1.0692282915115356,grad_norm: 0.9999992153795998, iteration: 406014
loss: 0.9527608156204224,grad_norm: 0.904546200833811, iteration: 406015
loss: 1.0839694738388062,grad_norm: 0.7409456433371675, iteration: 406016
loss: 1.0232205390930176,grad_norm: 0.9002231528479264, iteration: 406017
loss: 1.0027705430984497,grad_norm: 0.8837821793027786, iteration: 406018
loss: 0.9986912608146667,grad_norm: 0.9999993905634076, iteration: 406019
loss: 1.1093833446502686,grad_norm: 0.999999316847133, iteration: 406020
loss: 1.060255765914917,grad_norm: 0.9999992687638629, iteration: 406021
loss: 1.0044015645980835,grad_norm: 0.9224667075188177, iteration: 406022
loss: 1.007277250289917,grad_norm: 0.6996195458973906, iteration: 406023
loss: 0.9796995520591736,grad_norm: 0.7715944128923353, iteration: 406024
loss: 1.0718564987182617,grad_norm: 0.8720274184820037, iteration: 406025
loss: 1.0031611919403076,grad_norm: 0.8303597852419657, iteration: 406026
loss: 1.0053431987762451,grad_norm: 0.8259769641411384, iteration: 406027
loss: 0.9985296130180359,grad_norm: 0.6972592923399876, iteration: 406028
loss: 1.0603893995285034,grad_norm: 0.9999994621076912, iteration: 406029
loss: 1.0376852750778198,grad_norm: 0.9999995084303144, iteration: 406030
loss: 0.9482690691947937,grad_norm: 0.9506667764106939, iteration: 406031
loss: 1.0657843351364136,grad_norm: 0.7466143356391205, iteration: 406032
loss: 1.0007489919662476,grad_norm: 0.701773845430096, iteration: 406033
loss: 1.0603176355361938,grad_norm: 0.9101143975533743, iteration: 406034
loss: 1.0080811977386475,grad_norm: 0.9999999074351866, iteration: 406035
loss: 1.0230199098587036,grad_norm: 0.8538839069776791, iteration: 406036
loss: 1.0417366027832031,grad_norm: 0.9999993604106995, iteration: 406037
loss: 1.0102217197418213,grad_norm: 0.6886935276808223, iteration: 406038
loss: 1.0564082860946655,grad_norm: 0.9999996611093166, iteration: 406039
loss: 0.9894169569015503,grad_norm: 0.9999992485276714, iteration: 406040
loss: 1.0912176370620728,grad_norm: 0.9999999166626823, iteration: 406041
loss: 1.0113837718963623,grad_norm: 0.6814561348658507, iteration: 406042
loss: 1.1061004400253296,grad_norm: 0.9554102785530153, iteration: 406043
loss: 1.0224047899246216,grad_norm: 0.9112461248013644, iteration: 406044
loss: 1.0105115175247192,grad_norm: 0.8097033889692814, iteration: 406045
loss: 1.016585350036621,grad_norm: 0.7977665925959525, iteration: 406046
loss: 1.041847586631775,grad_norm: 0.9892353997684573, iteration: 406047
loss: 1.0098075866699219,grad_norm: 0.7451198089742886, iteration: 406048
loss: 1.0438371896743774,grad_norm: 0.9999991611701838, iteration: 406049
loss: 1.0465468168258667,grad_norm: 0.7355751363390802, iteration: 406050
loss: 0.9834012985229492,grad_norm: 0.7209566946392211, iteration: 406051
loss: 1.017060399055481,grad_norm: 0.7657274534392147, iteration: 406052
loss: 0.974351704120636,grad_norm: 0.8354940067008158, iteration: 406053
loss: 1.0170005559921265,grad_norm: 0.7991394534203835, iteration: 406054
loss: 1.0462822914123535,grad_norm: 0.8115308320003111, iteration: 406055
loss: 1.1204402446746826,grad_norm: 0.999999621408919, iteration: 406056
loss: 0.9703621864318848,grad_norm: 0.6493887950844324, iteration: 406057
loss: 1.0263004302978516,grad_norm: 0.8819265294712839, iteration: 406058
loss: 1.0032777786254883,grad_norm: 0.7798668008694192, iteration: 406059
loss: 1.0099233388900757,grad_norm: 0.7140852205357265, iteration: 406060
loss: 1.0107587575912476,grad_norm: 0.851908975529328, iteration: 406061
loss: 1.0235204696655273,grad_norm: 0.9999993622960626, iteration: 406062
loss: 0.9876511693000793,grad_norm: 0.8208324586885747, iteration: 406063
loss: 0.9630761742591858,grad_norm: 0.7446089623243506, iteration: 406064
loss: 0.9883686900138855,grad_norm: 0.9999995208096897, iteration: 406065
loss: 1.0048816204071045,grad_norm: 0.6825655172782037, iteration: 406066
loss: 1.0102062225341797,grad_norm: 0.7419588025602674, iteration: 406067
loss: 1.0134291648864746,grad_norm: 0.8007694204100609, iteration: 406068
loss: 1.0019969940185547,grad_norm: 0.7206925367960486, iteration: 406069
loss: 1.0198193788528442,grad_norm: 0.8667473995372075, iteration: 406070
loss: 0.9588245749473572,grad_norm: 0.7992007525425484, iteration: 406071
loss: 1.0046560764312744,grad_norm: 0.7253497585699494, iteration: 406072
loss: 0.9811858534812927,grad_norm: 0.8158877490130272, iteration: 406073
loss: 0.9882057309150696,grad_norm: 0.8121122534360781, iteration: 406074
loss: 1.0366582870483398,grad_norm: 0.9999995358612235, iteration: 406075
loss: 1.04465651512146,grad_norm: 0.999999453513585, iteration: 406076
loss: 0.9850239157676697,grad_norm: 0.9688874290674455, iteration: 406077
loss: 0.9848251342773438,grad_norm: 0.8090435513817932, iteration: 406078
loss: 1.0086017847061157,grad_norm: 0.8236397676602185, iteration: 406079
loss: 1.0037654638290405,grad_norm: 0.961324263555151, iteration: 406080
loss: 1.006544589996338,grad_norm: 0.9188685210450849, iteration: 406081
loss: 1.0007554292678833,grad_norm: 0.7595116894823828, iteration: 406082
loss: 1.0931346416473389,grad_norm: 0.8489911460921967, iteration: 406083
loss: 1.0142828226089478,grad_norm: 0.987468111647058, iteration: 406084
loss: 1.0261377096176147,grad_norm: 0.7701893047729408, iteration: 406085
loss: 0.9804514050483704,grad_norm: 0.7898155515966537, iteration: 406086
loss: 0.9568342566490173,grad_norm: 0.7594775579831149, iteration: 406087
loss: 1.054479956626892,grad_norm: 0.9999991462169447, iteration: 406088
loss: 1.0159357786178589,grad_norm: 0.8979914032133011, iteration: 406089
loss: 1.0481469631195068,grad_norm: 0.6653995722012872, iteration: 406090
loss: 0.9900979399681091,grad_norm: 0.8925557974883813, iteration: 406091
loss: 0.9979245066642761,grad_norm: 0.666611492045313, iteration: 406092
loss: 0.9859943985939026,grad_norm: 0.8556228575176019, iteration: 406093
loss: 1.0192441940307617,grad_norm: 0.7798408240317755, iteration: 406094
loss: 0.9627033472061157,grad_norm: 0.7976056122728191, iteration: 406095
loss: 1.0221750736236572,grad_norm: 0.7657931211230681, iteration: 406096
loss: 0.9787211418151855,grad_norm: 0.8616905779714497, iteration: 406097
loss: 1.0320813655853271,grad_norm: 0.9999991957174664, iteration: 406098
loss: 1.0490283966064453,grad_norm: 0.999999989490663, iteration: 406099
loss: 1.0132331848144531,grad_norm: 0.8140741959929096, iteration: 406100
loss: 1.0014079809188843,grad_norm: 0.7335083012434405, iteration: 406101
loss: 1.0244238376617432,grad_norm: 0.9999998852542511, iteration: 406102
loss: 0.9796754717826843,grad_norm: 0.7250250536345819, iteration: 406103
loss: 0.9872994422912598,grad_norm: 0.8059175949659662, iteration: 406104
loss: 0.9929036498069763,grad_norm: 0.8172629812409788, iteration: 406105
loss: 0.9688321352005005,grad_norm: 0.8325589510790627, iteration: 406106
loss: 0.9342878460884094,grad_norm: 0.6994448100971196, iteration: 406107
loss: 0.9939737319946289,grad_norm: 0.7434662175405616, iteration: 406108
loss: 1.0017149448394775,grad_norm: 0.9400784319058527, iteration: 406109
loss: 1.003163456916809,grad_norm: 0.7854650675783793, iteration: 406110
loss: 0.9899399876594543,grad_norm: 0.8510305429658822, iteration: 406111
loss: 1.0092689990997314,grad_norm: 0.9888612895131921, iteration: 406112
loss: 1.012200951576233,grad_norm: 0.8105686442075134, iteration: 406113
loss: 0.9742388129234314,grad_norm: 0.7161519868198846, iteration: 406114
loss: 0.9761937260627747,grad_norm: 0.8488303977669601, iteration: 406115
loss: 0.9862055778503418,grad_norm: 0.855407208221069, iteration: 406116
loss: 1.0287301540374756,grad_norm: 0.9999994265756978, iteration: 406117
loss: 1.0198404788970947,grad_norm: 0.7983240657139002, iteration: 406118
loss: 1.0300250053405762,grad_norm: 0.9304450335149937, iteration: 406119
loss: 1.0244193077087402,grad_norm: 0.9761138311982047, iteration: 406120
loss: 0.9955477118492126,grad_norm: 0.8243147430902052, iteration: 406121
loss: 1.0620626211166382,grad_norm: 0.8446777065118743, iteration: 406122
loss: 1.005445122718811,grad_norm: 0.8755625712571808, iteration: 406123
loss: 1.0250318050384521,grad_norm: 0.6634888549355695, iteration: 406124
loss: 1.0025227069854736,grad_norm: 0.8747173325645177, iteration: 406125
loss: 0.9562263488769531,grad_norm: 0.9219960510334766, iteration: 406126
loss: 0.9923686981201172,grad_norm: 0.74265260369605, iteration: 406127
loss: 1.012209177017212,grad_norm: 0.9999990813890898, iteration: 406128
loss: 1.09303879737854,grad_norm: 0.7774534425095392, iteration: 406129
loss: 1.0170972347259521,grad_norm: 0.753747172869705, iteration: 406130
loss: 1.0324324369430542,grad_norm: 0.9999996091843322, iteration: 406131
loss: 1.0092331171035767,grad_norm: 0.9438982505792539, iteration: 406132
loss: 0.9865122437477112,grad_norm: 0.8259873697009932, iteration: 406133
loss: 1.0012060403823853,grad_norm: 0.7283598449292821, iteration: 406134
loss: 0.9487433433532715,grad_norm: 0.8727780625226063, iteration: 406135
loss: 0.9733440279960632,grad_norm: 0.7481154091352603, iteration: 406136
loss: 1.0277366638183594,grad_norm: 0.7115527398696955, iteration: 406137
loss: 0.9906331896781921,grad_norm: 0.7755499890830305, iteration: 406138
loss: 1.0071995258331299,grad_norm: 0.8572154635488937, iteration: 406139
loss: 0.9724113941192627,grad_norm: 0.8458873308553203, iteration: 406140
loss: 1.0113760232925415,grad_norm: 0.7203352781277573, iteration: 406141
loss: 1.0143808126449585,grad_norm: 0.7091728380996464, iteration: 406142
loss: 0.9951053857803345,grad_norm: 0.7078748234372293, iteration: 406143
loss: 0.9853667616844177,grad_norm: 0.7487181916638103, iteration: 406144
loss: 0.9566900134086609,grad_norm: 0.8893101797210361, iteration: 406145
loss: 0.9951990246772766,grad_norm: 0.8465428695349345, iteration: 406146
loss: 1.0074493885040283,grad_norm: 0.8329906038982604, iteration: 406147
loss: 1.0386722087860107,grad_norm: 0.6425659419314808, iteration: 406148
loss: 1.0084505081176758,grad_norm: 0.7012421301699308, iteration: 406149
loss: 1.0004067420959473,grad_norm: 0.8355316471975373, iteration: 406150
loss: 0.9934072494506836,grad_norm: 0.7262849345615613, iteration: 406151
loss: 0.9898751974105835,grad_norm: 0.845669457563118, iteration: 406152
loss: 1.036505937576294,grad_norm: 0.9999997704915627, iteration: 406153
loss: 1.0176944732666016,grad_norm: 0.9999994082471748, iteration: 406154
loss: 1.0477631092071533,grad_norm: 0.9999990098741448, iteration: 406155
loss: 0.9987257122993469,grad_norm: 0.773853869847773, iteration: 406156
loss: 0.9880845546722412,grad_norm: 0.9603247866388209, iteration: 406157
loss: 0.9873853325843811,grad_norm: 0.6189915328902039, iteration: 406158
loss: 0.9592978358268738,grad_norm: 0.9521383850815238, iteration: 406159
loss: 1.0444973707199097,grad_norm: 0.9999995911833607, iteration: 406160
loss: 1.0766570568084717,grad_norm: 0.9999998445616484, iteration: 406161
loss: 1.0020819902420044,grad_norm: 0.7464731088084213, iteration: 406162
loss: 1.0130916833877563,grad_norm: 0.8578598928028732, iteration: 406163
loss: 1.027747631072998,grad_norm: 0.6856580510275808, iteration: 406164
loss: 0.9794618487358093,grad_norm: 0.7513112395235693, iteration: 406165
loss: 1.0064398050308228,grad_norm: 0.7785060299909502, iteration: 406166
loss: 0.9646313190460205,grad_norm: 0.7795822810779108, iteration: 406167
loss: 1.034027099609375,grad_norm: 0.8342835856526633, iteration: 406168
loss: 1.0650997161865234,grad_norm: 0.9999997260505848, iteration: 406169
loss: 0.9937371611595154,grad_norm: 0.8666339975412115, iteration: 406170
loss: 1.0182241201400757,grad_norm: 0.762680498748615, iteration: 406171
loss: 1.0018599033355713,grad_norm: 0.6887156255806127, iteration: 406172
loss: 0.9891636967658997,grad_norm: 0.977888892463422, iteration: 406173
loss: 1.0444194078445435,grad_norm: 0.9999990016875964, iteration: 406174
loss: 1.0250608921051025,grad_norm: 0.8017389993454175, iteration: 406175
loss: 0.9979544281959534,grad_norm: 0.7469638176882928, iteration: 406176
loss: 0.9854199886322021,grad_norm: 0.7763290548306767, iteration: 406177
loss: 0.9929983615875244,grad_norm: 0.8980781925794021, iteration: 406178
loss: 0.9943589568138123,grad_norm: 0.7991853574208011, iteration: 406179
loss: 1.0546681880950928,grad_norm: 0.9999998697787035, iteration: 406180
loss: 0.9597721099853516,grad_norm: 0.9354701409443262, iteration: 406181
loss: 1.046250343322754,grad_norm: 1.0000000009984826, iteration: 406182
loss: 1.0224072933197021,grad_norm: 0.7582885122637347, iteration: 406183
loss: 0.988300085067749,grad_norm: 0.8252069832453932, iteration: 406184
loss: 1.0092397928237915,grad_norm: 0.635230133168718, iteration: 406185
loss: 0.9999676942825317,grad_norm: 0.7896176629917272, iteration: 406186
loss: 1.0435523986816406,grad_norm: 0.9471993624540603, iteration: 406187
loss: 1.0258853435516357,grad_norm: 0.7858233324007122, iteration: 406188
loss: 0.9623938202857971,grad_norm: 0.7525121545915776, iteration: 406189
loss: 0.9778121709823608,grad_norm: 0.9712781885419549, iteration: 406190
loss: 1.0024709701538086,grad_norm: 0.720863962322778, iteration: 406191
loss: 1.0060946941375732,grad_norm: 0.7301039881664043, iteration: 406192
loss: 1.0245585441589355,grad_norm: 0.9999998795589763, iteration: 406193
loss: 0.9797303676605225,grad_norm: 0.8801772295535063, iteration: 406194
loss: 1.0051790475845337,grad_norm: 0.7086490910368036, iteration: 406195
loss: 0.9874400496482849,grad_norm: 0.7292758794683101, iteration: 406196
loss: 0.9900497794151306,grad_norm: 0.7058760129312684, iteration: 406197
loss: 1.022982120513916,grad_norm: 0.750310524023813, iteration: 406198
loss: 1.0142008066177368,grad_norm: 0.8231530775908522, iteration: 406199
loss: 1.0402636528015137,grad_norm: 0.9999996433458768, iteration: 406200
loss: 0.9848246574401855,grad_norm: 0.8495542400955893, iteration: 406201
loss: 0.993558943271637,grad_norm: 0.7875712258297124, iteration: 406202
loss: 1.0302780866622925,grad_norm: 0.9712333843138534, iteration: 406203
loss: 1.0279790163040161,grad_norm: 0.7650833652599343, iteration: 406204
loss: 0.9791021943092346,grad_norm: 0.6578209607871404, iteration: 406205
loss: 1.0095521211624146,grad_norm: 0.7695384518198315, iteration: 406206
loss: 1.0109152793884277,grad_norm: 0.7485023351738846, iteration: 406207
loss: 1.0064364671707153,grad_norm: 0.9999995661307826, iteration: 406208
loss: 1.0277371406555176,grad_norm: 0.9407559918087751, iteration: 406209
loss: 1.0062264204025269,grad_norm: 0.9254537241527511, iteration: 406210
loss: 1.0126909017562866,grad_norm: 0.6529783130724858, iteration: 406211
loss: 1.0240548849105835,grad_norm: 0.7150865405121439, iteration: 406212
loss: 0.9871941208839417,grad_norm: 0.7084266686772795, iteration: 406213
loss: 0.9999297857284546,grad_norm: 0.7341045506918649, iteration: 406214
loss: 0.9801740050315857,grad_norm: 0.7829409009499163, iteration: 406215
loss: 1.02840256690979,grad_norm: 0.7635314677809698, iteration: 406216
loss: 0.9837167859077454,grad_norm: 0.7671717521060255, iteration: 406217
loss: 1.0353233814239502,grad_norm: 0.9076506072965161, iteration: 406218
loss: 0.9945529699325562,grad_norm: 0.7925791976656935, iteration: 406219
loss: 0.983802318572998,grad_norm: 0.6855546023846761, iteration: 406220
loss: 1.0151418447494507,grad_norm: 0.8579381831331755, iteration: 406221
loss: 0.9864179491996765,grad_norm: 0.7864666083345367, iteration: 406222
loss: 1.002671480178833,grad_norm: 0.763383455174626, iteration: 406223
loss: 1.0575926303863525,grad_norm: 0.9999999350608267, iteration: 406224
loss: 1.0168957710266113,grad_norm: 0.89251019323154, iteration: 406225
loss: 0.9895890355110168,grad_norm: 0.8271899263312358, iteration: 406226
loss: 1.0335397720336914,grad_norm: 0.6785253681530367, iteration: 406227
loss: 0.9626954793930054,grad_norm: 0.6208659273136413, iteration: 406228
loss: 1.0002756118774414,grad_norm: 0.7792755852062151, iteration: 406229
loss: 1.0137046575546265,grad_norm: 0.5929889527794652, iteration: 406230
loss: 1.0258874893188477,grad_norm: 0.9433275924761177, iteration: 406231
loss: 1.024024248123169,grad_norm: 0.8574107159903424, iteration: 406232
loss: 0.9890211224555969,grad_norm: 0.8000925742621581, iteration: 406233
loss: 1.0319316387176514,grad_norm: 0.999998964315703, iteration: 406234
loss: 1.0180964469909668,grad_norm: 0.9140809684771761, iteration: 406235
loss: 0.9961137771606445,grad_norm: 0.7306076179301157, iteration: 406236
loss: 0.9542312622070312,grad_norm: 0.8557327138414357, iteration: 406237
loss: 0.9878441095352173,grad_norm: 0.7192330253332541, iteration: 406238
loss: 1.0148773193359375,grad_norm: 0.9999998047362361, iteration: 406239
loss: 1.027034878730774,grad_norm: 0.6813153742148093, iteration: 406240
loss: 1.0086315870285034,grad_norm: 0.7331988868222574, iteration: 406241
loss: 0.9928869009017944,grad_norm: 0.7309037081571104, iteration: 406242
loss: 0.9827478528022766,grad_norm: 0.8907534586298305, iteration: 406243
loss: 1.0032694339752197,grad_norm: 0.9247620834238657, iteration: 406244
loss: 1.0285953283309937,grad_norm: 0.999999186356847, iteration: 406245
loss: 1.0061756372451782,grad_norm: 0.8356341854992111, iteration: 406246
loss: 1.0390244722366333,grad_norm: 0.787500109407307, iteration: 406247
loss: 1.0578752756118774,grad_norm: 0.9999997468939545, iteration: 406248
loss: 1.0141043663024902,grad_norm: 0.7326022217457481, iteration: 406249
loss: 1.0140796899795532,grad_norm: 0.6955320164603725, iteration: 406250
loss: 1.0071206092834473,grad_norm: 0.8099002019602504, iteration: 406251
loss: 0.9724937081336975,grad_norm: 0.8486811831659666, iteration: 406252
loss: 0.9594452381134033,grad_norm: 0.8303004704819491, iteration: 406253
loss: 0.9979601502418518,grad_norm: 0.9530648902715325, iteration: 406254
loss: 1.0652730464935303,grad_norm: 0.977601156179213, iteration: 406255
loss: 0.9991228580474854,grad_norm: 0.8564522648477735, iteration: 406256
loss: 0.9972010850906372,grad_norm: 0.9999993558118074, iteration: 406257
loss: 1.0235552787780762,grad_norm: 0.778288970409012, iteration: 406258
loss: 1.022135615348816,grad_norm: 0.8820970091253137, iteration: 406259
loss: 1.0080647468566895,grad_norm: 0.7511049314441637, iteration: 406260
loss: 1.0084675550460815,grad_norm: 0.999999146062037, iteration: 406261
loss: 0.9681136012077332,grad_norm: 0.7892704900150026, iteration: 406262
loss: 1.0039501190185547,grad_norm: 0.9999991183675708, iteration: 406263
loss: 1.0030397176742554,grad_norm: 0.8980306383806942, iteration: 406264
loss: 1.0030487775802612,grad_norm: 0.7812948574145313, iteration: 406265
loss: 0.9931733012199402,grad_norm: 0.6776499799466975, iteration: 406266
loss: 0.9966151714324951,grad_norm: 0.6295618928500823, iteration: 406267
loss: 1.1448239088058472,grad_norm: 0.9999992481467692, iteration: 406268
loss: 1.0304605960845947,grad_norm: 0.7685379783344541, iteration: 406269
loss: 1.0651681423187256,grad_norm: 0.7970378312692034, iteration: 406270
loss: 1.033698558807373,grad_norm: 0.8158934615280432, iteration: 406271
loss: 1.0098873376846313,grad_norm: 0.6610302922885968, iteration: 406272
loss: 0.9825957417488098,grad_norm: 0.7970931344499141, iteration: 406273
loss: 1.024052381515503,grad_norm: 0.7932275335582893, iteration: 406274
loss: 1.0791661739349365,grad_norm: 0.8520094270130892, iteration: 406275
loss: 1.0133683681488037,grad_norm: 0.7771523428206478, iteration: 406276
loss: 0.9971135854721069,grad_norm: 0.9999997643096501, iteration: 406277
loss: 1.1071885824203491,grad_norm: 0.9999994844279558, iteration: 406278
loss: 1.0887949466705322,grad_norm: 0.9999997992054025, iteration: 406279
loss: 1.0096803903579712,grad_norm: 0.9999991456218904, iteration: 406280
loss: 1.0336685180664062,grad_norm: 0.9999992115170336, iteration: 406281
loss: 0.9953539967536926,grad_norm: 0.9999996350344948, iteration: 406282
loss: 1.0237444639205933,grad_norm: 0.8050576991370714, iteration: 406283
loss: 0.9952789545059204,grad_norm: 0.846180815276723, iteration: 406284
loss: 1.0464222431182861,grad_norm: 0.9999996929925518, iteration: 406285
loss: 0.9850789308547974,grad_norm: 0.7379258304717506, iteration: 406286
loss: 0.9945521950721741,grad_norm: 0.7430456558592122, iteration: 406287
loss: 1.1670464277267456,grad_norm: 0.9110675379468589, iteration: 406288
loss: 1.0477216243743896,grad_norm: 0.9999998074167988, iteration: 406289
loss: 0.982697069644928,grad_norm: 0.6789664303468792, iteration: 406290
loss: 1.0550028085708618,grad_norm: 0.7037166420559179, iteration: 406291
loss: 1.0312832593917847,grad_norm: 0.7723705676624477, iteration: 406292
loss: 0.9807596802711487,grad_norm: 0.6835887542084074, iteration: 406293
loss: 1.0566335916519165,grad_norm: 0.8694529336479069, iteration: 406294
loss: 1.0065419673919678,grad_norm: 0.7172541371597132, iteration: 406295
loss: 1.0144810676574707,grad_norm: 0.6598521971520058, iteration: 406296
loss: 0.9544978141784668,grad_norm: 0.8263719635994412, iteration: 406297
loss: 1.0377779006958008,grad_norm: 0.9999997661086106, iteration: 406298
loss: 1.0791960954666138,grad_norm: 0.9999994288632833, iteration: 406299
loss: 0.9820870757102966,grad_norm: 0.7941070499818561, iteration: 406300
loss: 1.058834433555603,grad_norm: 0.9999999039985578, iteration: 406301
loss: 1.0680248737335205,grad_norm: 0.9999992167486289, iteration: 406302
loss: 1.027713656425476,grad_norm: 0.9999995720385747, iteration: 406303
loss: 1.00709867477417,grad_norm: 0.7505203053637891, iteration: 406304
loss: 1.1045199632644653,grad_norm: 0.9744782279799071, iteration: 406305
loss: 0.9593522548675537,grad_norm: 0.7261114281415914, iteration: 406306
loss: 1.0472780466079712,grad_norm: 0.9999998431636397, iteration: 406307
loss: 0.9997674226760864,grad_norm: 0.852190779529627, iteration: 406308
loss: 0.9898508191108704,grad_norm: 0.9999998151992078, iteration: 406309
loss: 1.0234355926513672,grad_norm: 0.9999998239029111, iteration: 406310
loss: 1.0028549432754517,grad_norm: 0.7722307891793164, iteration: 406311
loss: 1.076865553855896,grad_norm: 0.9999997368524268, iteration: 406312
loss: 0.9506892561912537,grad_norm: 0.7095233822680137, iteration: 406313
loss: 1.0996677875518799,grad_norm: 0.8917355935713631, iteration: 406314
loss: 1.0391147136688232,grad_norm: 0.8617021865060847, iteration: 406315
loss: 1.0179458856582642,grad_norm: 0.7485602792999057, iteration: 406316
loss: 1.0290544033050537,grad_norm: 0.7821534216303633, iteration: 406317
loss: 0.9897470474243164,grad_norm: 0.9906483294032374, iteration: 406318
loss: 1.049322485923767,grad_norm: 0.9999996816691612, iteration: 406319
loss: 0.9637910723686218,grad_norm: 0.7844549008784865, iteration: 406320
loss: 1.1256707906723022,grad_norm: 0.9999999308223908, iteration: 406321
loss: 1.0945696830749512,grad_norm: 0.9999997143004754, iteration: 406322
loss: 1.0154038667678833,grad_norm: 0.6937458712954414, iteration: 406323
loss: 1.0434281826019287,grad_norm: 0.9999990730045635, iteration: 406324
loss: 1.0035178661346436,grad_norm: 0.9568386657186725, iteration: 406325
loss: 1.0468964576721191,grad_norm: 0.9999995807977525, iteration: 406326
loss: 0.9811003804206848,grad_norm: 0.8529448059841791, iteration: 406327
loss: 1.0071282386779785,grad_norm: 0.7908214724206746, iteration: 406328
loss: 0.9717568755149841,grad_norm: 0.677891741668111, iteration: 406329
loss: 1.0196187496185303,grad_norm: 0.9999996260822743, iteration: 406330
loss: 0.9899131655693054,grad_norm: 0.6901204469185422, iteration: 406331
loss: 0.9826084971427917,grad_norm: 0.7393950256047709, iteration: 406332
loss: 1.037421464920044,grad_norm: 0.8814054325744888, iteration: 406333
loss: 1.0440890789031982,grad_norm: 0.9999995063837325, iteration: 406334
loss: 1.01422119140625,grad_norm: 0.9147240947669814, iteration: 406335
loss: 0.9751520156860352,grad_norm: 0.9999990984322119, iteration: 406336
loss: 0.9650249481201172,grad_norm: 0.7687748867621693, iteration: 406337
loss: 0.974549412727356,grad_norm: 0.9999991230623576, iteration: 406338
loss: 1.0189075469970703,grad_norm: 0.9726660368715555, iteration: 406339
loss: 0.9960482716560364,grad_norm: 0.7944052096168414, iteration: 406340
loss: 0.9756595492362976,grad_norm: 0.8497827693610852, iteration: 406341
loss: 1.00261390209198,grad_norm: 0.9363547805740003, iteration: 406342
loss: 1.0584993362426758,grad_norm: 0.9999990163726699, iteration: 406343
loss: 0.9882035851478577,grad_norm: 0.7371987529052684, iteration: 406344
loss: 1.0078867673873901,grad_norm: 0.8619982676488048, iteration: 406345
loss: 1.0024372339248657,grad_norm: 0.8552630200346516, iteration: 406346
loss: 1.0183486938476562,grad_norm: 0.969309564204831, iteration: 406347
loss: 1.0329378843307495,grad_norm: 0.9999991693426876, iteration: 406348
loss: 1.0456948280334473,grad_norm: 0.9999995354164993, iteration: 406349
loss: 1.0344089269638062,grad_norm: 0.9999997130298514, iteration: 406350
loss: 0.9962337613105774,grad_norm: 0.8633999301753634, iteration: 406351
loss: 0.9888217449188232,grad_norm: 0.7416863670064086, iteration: 406352
loss: 0.9641504883766174,grad_norm: 0.8687753082857655, iteration: 406353
loss: 1.011907696723938,grad_norm: 0.77269747430839, iteration: 406354
loss: 1.0039395093917847,grad_norm: 0.7717355915930574, iteration: 406355
loss: 1.008530855178833,grad_norm: 0.6637872035954062, iteration: 406356
loss: 0.9875490069389343,grad_norm: 0.8144114875087114, iteration: 406357
loss: 0.9791639447212219,grad_norm: 0.8358466752264878, iteration: 406358
loss: 1.0180962085723877,grad_norm: 0.758587761554047, iteration: 406359
loss: 1.0019108057022095,grad_norm: 0.7570724252479368, iteration: 406360
loss: 1.0479670763015747,grad_norm: 0.7899656468567846, iteration: 406361
loss: 1.0333930253982544,grad_norm: 0.7741625034497583, iteration: 406362
loss: 0.9621924757957458,grad_norm: 0.7551375691558763, iteration: 406363
loss: 1.018202543258667,grad_norm: 0.6930948506345298, iteration: 406364
loss: 1.018190860748291,grad_norm: 0.7697667840791818, iteration: 406365
loss: 0.9986177086830139,grad_norm: 0.6674394128517989, iteration: 406366
loss: 0.9819657802581787,grad_norm: 0.9062608413537198, iteration: 406367
loss: 0.9950022101402283,grad_norm: 0.7147980997115438, iteration: 406368
loss: 1.0168471336364746,grad_norm: 0.9999990681672628, iteration: 406369
loss: 1.1388987302780151,grad_norm: 0.9999998935775573, iteration: 406370
loss: 1.0169858932495117,grad_norm: 0.9999989516919865, iteration: 406371
loss: 1.0105654001235962,grad_norm: 0.5852488247452122, iteration: 406372
loss: 0.9859886765480042,grad_norm: 0.6843082669436109, iteration: 406373
loss: 1.032439947128296,grad_norm: 0.9999995711539272, iteration: 406374
loss: 0.9961467981338501,grad_norm: 0.9999994004344597, iteration: 406375
loss: 0.9781985282897949,grad_norm: 0.999999761091289, iteration: 406376
loss: 1.0102289915084839,grad_norm: 0.9999994417788676, iteration: 406377
loss: 1.0025396347045898,grad_norm: 0.9999993484110227, iteration: 406378
loss: 0.9952546954154968,grad_norm: 0.7831563799653908, iteration: 406379
loss: 1.017473816871643,grad_norm: 0.9999996673442751, iteration: 406380
loss: 0.9739766120910645,grad_norm: 0.700476248601215, iteration: 406381
loss: 1.1305458545684814,grad_norm: 0.9999995692339456, iteration: 406382
loss: 1.0083369016647339,grad_norm: 0.9215830126086167, iteration: 406383
loss: 0.9983662366867065,grad_norm: 0.9999995115244185, iteration: 406384
loss: 1.0179569721221924,grad_norm: 0.999999934520433, iteration: 406385
loss: 1.072157859802246,grad_norm: 0.9999991941819482, iteration: 406386
loss: 1.0187419652938843,grad_norm: 0.9999990555829349, iteration: 406387
loss: 0.9857607483863831,grad_norm: 0.8429457228049738, iteration: 406388
loss: 1.042079210281372,grad_norm: 0.9999994313132805, iteration: 406389
loss: 1.004904866218567,grad_norm: 0.9999992528372328, iteration: 406390
loss: 0.9812761545181274,grad_norm: 0.7444611316463272, iteration: 406391
loss: 1.080163598060608,grad_norm: 0.8640300822028064, iteration: 406392
loss: 0.996626615524292,grad_norm: 0.7036440313965447, iteration: 406393
loss: 1.0635511875152588,grad_norm: 0.9999994166071443, iteration: 406394
loss: 1.047141432762146,grad_norm: 0.9999998607811419, iteration: 406395
loss: 1.088572382926941,grad_norm: 0.7489622362896522, iteration: 406396
loss: 1.0785783529281616,grad_norm: 0.9729098194495711, iteration: 406397
loss: 0.9777684807777405,grad_norm: 0.6720179725469507, iteration: 406398
loss: 1.006611704826355,grad_norm: 0.7016803999838553, iteration: 406399
loss: 0.9956556558609009,grad_norm: 0.8532883767074702, iteration: 406400
loss: 1.042414665222168,grad_norm: 0.9999993522742475, iteration: 406401
loss: 1.0839186906814575,grad_norm: 0.9999993177482467, iteration: 406402
loss: 1.0640395879745483,grad_norm: 0.9303387620930984, iteration: 406403
loss: 1.0741221904754639,grad_norm: 0.8342582778087045, iteration: 406404
loss: 1.023409366607666,grad_norm: 0.9999999454043704, iteration: 406405
loss: 1.0306023359298706,grad_norm: 0.688162998281577, iteration: 406406
loss: 0.9795008897781372,grad_norm: 0.8354367270359064, iteration: 406407
loss: 1.0351333618164062,grad_norm: 0.8229204334417511, iteration: 406408
loss: 1.0030542612075806,grad_norm: 0.7426023046172217, iteration: 406409
loss: 1.0015430450439453,grad_norm: 0.7033979341739307, iteration: 406410
loss: 1.1069960594177246,grad_norm: 0.9288373165987226, iteration: 406411
loss: 1.012466549873352,grad_norm: 0.7848423499688427, iteration: 406412
loss: 1.0380735397338867,grad_norm: 0.8493308299900566, iteration: 406413
loss: 0.9648504257202148,grad_norm: 0.8444571908302929, iteration: 406414
loss: 1.0519710779190063,grad_norm: 0.9999999185972382, iteration: 406415
loss: 1.0631699562072754,grad_norm: 0.9202454674975881, iteration: 406416
loss: 0.9910329580307007,grad_norm: 0.7035078436227463, iteration: 406417
loss: 0.99005126953125,grad_norm: 0.7424881692128595, iteration: 406418
loss: 0.9589085578918457,grad_norm: 0.8403739676265446, iteration: 406419
loss: 1.0074727535247803,grad_norm: 0.8768548040393798, iteration: 406420
loss: 1.117851972579956,grad_norm: 0.9052103754244346, iteration: 406421
loss: 1.0095041990280151,grad_norm: 0.698850883411984, iteration: 406422
loss: 1.015317440032959,grad_norm: 0.9999993309681134, iteration: 406423
loss: 0.9889854192733765,grad_norm: 0.9999997150529065, iteration: 406424
loss: 1.00237238407135,grad_norm: 0.8375740127531796, iteration: 406425
loss: 1.0026702880859375,grad_norm: 0.9999996937713491, iteration: 406426
loss: 1.0470188856124878,grad_norm: 0.9999993653479502, iteration: 406427
loss: 1.0409952402114868,grad_norm: 0.7763264106573685, iteration: 406428
loss: 0.9904800057411194,grad_norm: 0.9054490453449274, iteration: 406429
loss: 1.0142639875411987,grad_norm: 0.6827853383516311, iteration: 406430
loss: 1.0031721591949463,grad_norm: 0.7941030524905512, iteration: 406431
loss: 1.081991195678711,grad_norm: 0.9999993768630102, iteration: 406432
loss: 1.1183708906173706,grad_norm: 0.9999997941747546, iteration: 406433
loss: 1.061874508857727,grad_norm: 0.9999994839226097, iteration: 406434
loss: 1.0303059816360474,grad_norm: 0.9999990632377221, iteration: 406435
loss: 1.0352427959442139,grad_norm: 0.999999101244683, iteration: 406436
loss: 1.049322485923767,grad_norm: 0.9999992693795154, iteration: 406437
loss: 1.0029627084732056,grad_norm: 0.7396621574716373, iteration: 406438
loss: 1.018607497215271,grad_norm: 0.7043656283121732, iteration: 406439
loss: 0.9945648312568665,grad_norm: 0.9999997221419131, iteration: 406440
loss: 0.9770158529281616,grad_norm: 0.6522843744253389, iteration: 406441
loss: 1.0051391124725342,grad_norm: 0.9999990770149483, iteration: 406442
loss: 1.0170090198516846,grad_norm: 0.999999305548295, iteration: 406443
loss: 0.9638274312019348,grad_norm: 0.7663042864295525, iteration: 406444
loss: 0.9846664667129517,grad_norm: 0.9999990708274022, iteration: 406445
loss: 1.010317325592041,grad_norm: 0.918061818369281, iteration: 406446
loss: 0.9996588826179504,grad_norm: 0.732272149981092, iteration: 406447
loss: 1.1130770444869995,grad_norm: 0.9999991692379898, iteration: 406448
loss: 0.9984532594680786,grad_norm: 0.7825459371381314, iteration: 406449
loss: 1.0626299381256104,grad_norm: 0.8889709504486007, iteration: 406450
loss: 1.0357248783111572,grad_norm: 0.9999999035606774, iteration: 406451
loss: 1.0293587446212769,grad_norm: 0.9999998042122236, iteration: 406452
loss: 1.0247329473495483,grad_norm: 0.9999996113914521, iteration: 406453
loss: 1.074434757232666,grad_norm: 0.9866313766500477, iteration: 406454
loss: 1.0581265687942505,grad_norm: 0.9999997685232876, iteration: 406455
loss: 1.0251739025115967,grad_norm: 0.9258470680465545, iteration: 406456
loss: 0.9804455041885376,grad_norm: 0.7844442086664063, iteration: 406457
loss: 1.0038756132125854,grad_norm: 0.9999990328136054, iteration: 406458
loss: 1.0785040855407715,grad_norm: 0.9999989294161401, iteration: 406459
loss: 1.0138670206069946,grad_norm: 0.9999993326418997, iteration: 406460
loss: 0.9987779259681702,grad_norm: 0.8037415184232732, iteration: 406461
loss: 1.2346017360687256,grad_norm: 0.9999997779909298, iteration: 406462
loss: 1.0114821195602417,grad_norm: 0.9999990902980157, iteration: 406463
loss: 1.0030568838119507,grad_norm: 0.8395038366822215, iteration: 406464
loss: 1.0076348781585693,grad_norm: 0.6995928002916112, iteration: 406465
loss: 1.0212167501449585,grad_norm: 0.8199515699921655, iteration: 406466
loss: 1.0592962503433228,grad_norm: 0.9999993349015057, iteration: 406467
loss: 0.9794740080833435,grad_norm: 0.9416116675109253, iteration: 406468
loss: 1.0056633949279785,grad_norm: 0.8133105438144699, iteration: 406469
loss: 1.0318174362182617,grad_norm: 1.0000000583342334, iteration: 406470
loss: 1.0769940614700317,grad_norm: 0.9999995091110782, iteration: 406471
loss: 0.9954991936683655,grad_norm: 0.9170301309083225, iteration: 406472
loss: 0.9924092888832092,grad_norm: 0.9999994230335917, iteration: 406473
loss: 1.0132641792297363,grad_norm: 0.7892630194254598, iteration: 406474
loss: 1.1059443950653076,grad_norm: 0.999999294220753, iteration: 406475
loss: 0.9922208189964294,grad_norm: 0.8045257407830049, iteration: 406476
loss: 1.052347183227539,grad_norm: 0.9239172363964062, iteration: 406477
loss: 0.9888317584991455,grad_norm: 0.6859531575437786, iteration: 406478
loss: 0.9644205570220947,grad_norm: 0.8365012166910013, iteration: 406479
loss: 1.012153148651123,grad_norm: 0.884553398588845, iteration: 406480
loss: 1.006577968597412,grad_norm: 0.7671343908838126, iteration: 406481
loss: 1.0385252237319946,grad_norm: 0.9168032374369367, iteration: 406482
loss: 0.9379815459251404,grad_norm: 0.6272399025139813, iteration: 406483
loss: 1.0460259914398193,grad_norm: 0.8918621764730547, iteration: 406484
loss: 1.0096923112869263,grad_norm: 0.8501016420653021, iteration: 406485
loss: 0.988066554069519,grad_norm: 0.7325221910380277, iteration: 406486
loss: 1.009811282157898,grad_norm: 0.8887245030756348, iteration: 406487
loss: 0.9604713320732117,grad_norm: 0.9999991033055228, iteration: 406488
loss: 1.0465031862258911,grad_norm: 0.9999998410723058, iteration: 406489
loss: 0.9757182598114014,grad_norm: 0.7649930490037514, iteration: 406490
loss: 1.0696897506713867,grad_norm: 0.7708057245172256, iteration: 406491
loss: 1.0188205242156982,grad_norm: 0.8928388678578296, iteration: 406492
loss: 0.9928135871887207,grad_norm: 0.692948363122715, iteration: 406493
loss: 0.9689182043075562,grad_norm: 0.7508622980038179, iteration: 406494
loss: 1.0134273767471313,grad_norm: 0.8590906442428604, iteration: 406495
loss: 0.9658808708190918,grad_norm: 0.977082291293785, iteration: 406496
loss: 1.0449320077896118,grad_norm: 0.9999991298138601, iteration: 406497
loss: 0.9666816592216492,grad_norm: 0.7604530916322766, iteration: 406498
loss: 0.9751368761062622,grad_norm: 0.8531144300897123, iteration: 406499
loss: 1.129496693611145,grad_norm: 0.9999994573556495, iteration: 406500
loss: 1.016225814819336,grad_norm: 0.6727480268369803, iteration: 406501
loss: 0.9534595012664795,grad_norm: 0.8460527555214649, iteration: 406502
loss: 1.15861177444458,grad_norm: 0.9999997932341212, iteration: 406503
loss: 0.9738977551460266,grad_norm: 0.7806852633563602, iteration: 406504
loss: 1.0969864130020142,grad_norm: 0.8937085636183102, iteration: 406505
loss: 0.9539571404457092,grad_norm: 0.9035100419422452, iteration: 406506
loss: 1.0277458429336548,grad_norm: 0.849973109538525, iteration: 406507
loss: 0.9730276465415955,grad_norm: 0.9887996288447176, iteration: 406508
loss: 1.021945595741272,grad_norm: 0.744676895695195, iteration: 406509
loss: 1.027272343635559,grad_norm: 0.9161897172190719, iteration: 406510
loss: 1.0124655961990356,grad_norm: 0.9660463006478757, iteration: 406511
loss: 0.9913700222969055,grad_norm: 0.7443552388156991, iteration: 406512
loss: 1.0958099365234375,grad_norm: 0.8735149586172257, iteration: 406513
loss: 0.9828603863716125,grad_norm: 0.6496396726263297, iteration: 406514
loss: 1.044846534729004,grad_norm: 0.8433046276169883, iteration: 406515
loss: 1.0103778839111328,grad_norm: 0.7302100020557548, iteration: 406516
loss: 1.0573447942733765,grad_norm: 0.9277841254689332, iteration: 406517
loss: 0.9771624803543091,grad_norm: 0.9127460927546707, iteration: 406518
loss: 1.0421379804611206,grad_norm: 0.9999999553879898, iteration: 406519
loss: 0.9581576585769653,grad_norm: 0.7223065348371303, iteration: 406520
loss: 1.013338327407837,grad_norm: 0.865029183619976, iteration: 406521
loss: 1.0077728033065796,grad_norm: 0.7440959728937551, iteration: 406522
loss: 0.9822105169296265,grad_norm: 0.7659635454145544, iteration: 406523
loss: 1.0119848251342773,grad_norm: 0.9999992228806831, iteration: 406524
loss: 1.0470867156982422,grad_norm: 0.9999994818476866, iteration: 406525
loss: 1.0890125036239624,grad_norm: 0.718346296460035, iteration: 406526
loss: 1.0254892110824585,grad_norm: 0.6920577449143857, iteration: 406527
loss: 1.0300204753875732,grad_norm: 0.9999992571659251, iteration: 406528
loss: 1.0025285482406616,grad_norm: 0.9999990816398641, iteration: 406529
loss: 0.9667559266090393,grad_norm: 0.7887806489095998, iteration: 406530
loss: 0.9813175797462463,grad_norm: 0.7412586941868633, iteration: 406531
loss: 1.0297918319702148,grad_norm: 0.9427321426368499, iteration: 406532
loss: 0.9596598148345947,grad_norm: 0.6807579873104463, iteration: 406533
loss: 0.9774754643440247,grad_norm: 0.7543472434529251, iteration: 406534
loss: 1.0026772022247314,grad_norm: 0.7477922820760986, iteration: 406535
loss: 0.9515001177787781,grad_norm: 0.6940562000240813, iteration: 406536
loss: 1.0183647871017456,grad_norm: 0.999999417166873, iteration: 406537
loss: 0.9861743450164795,grad_norm: 0.6061013537911077, iteration: 406538
loss: 0.953301727771759,grad_norm: 0.8626310448194212, iteration: 406539
loss: 1.0284974575042725,grad_norm: 0.9999991340513805, iteration: 406540
loss: 1.0006879568099976,grad_norm: 0.766190000448239, iteration: 406541
loss: 1.0322068929672241,grad_norm: 0.9560564193641586, iteration: 406542
loss: 1.0048904418945312,grad_norm: 0.9367576116922129, iteration: 406543
loss: 0.9835116863250732,grad_norm: 0.7901102386128839, iteration: 406544
loss: 0.9880130290985107,grad_norm: 0.9999991717149486, iteration: 406545
loss: 1.0202871561050415,grad_norm: 0.9120516564430221, iteration: 406546
loss: 1.0420109033584595,grad_norm: 0.7415641856073103, iteration: 406547
loss: 1.0118647813796997,grad_norm: 0.8694187532159742, iteration: 406548
loss: 0.982484757900238,grad_norm: 0.8948481454487341, iteration: 406549
loss: 0.9939320683479309,grad_norm: 0.7446942884505323, iteration: 406550
loss: 0.9971903562545776,grad_norm: 0.8760201738910944, iteration: 406551
loss: 1.0049363374710083,grad_norm: 0.9999992517051736, iteration: 406552
loss: 1.0056581497192383,grad_norm: 0.8294581697253715, iteration: 406553
loss: 1.1569099426269531,grad_norm: 0.9999992702746324, iteration: 406554
loss: 0.9969809651374817,grad_norm: 0.6895476206712379, iteration: 406555
loss: 1.0628962516784668,grad_norm: 0.9999999183237732, iteration: 406556
loss: 1.0221037864685059,grad_norm: 0.551065496822643, iteration: 406557
loss: 0.9876755475997925,grad_norm: 0.9307358047165073, iteration: 406558
loss: 0.9912863373756409,grad_norm: 0.691673997247926, iteration: 406559
loss: 1.0239378213882446,grad_norm: 0.7676228463375441, iteration: 406560
loss: 1.0065170526504517,grad_norm: 0.8110612590691929, iteration: 406561
loss: 0.9812822937965393,grad_norm: 0.6177482248798486, iteration: 406562
loss: 1.0667715072631836,grad_norm: 0.7627932117030557, iteration: 406563
loss: 0.9968249201774597,grad_norm: 0.8463976832366136, iteration: 406564
loss: 0.9880203604698181,grad_norm: 0.6961994328761854, iteration: 406565
loss: 0.9999508261680603,grad_norm: 0.685945523909858, iteration: 406566
loss: 1.0546026229858398,grad_norm: 0.9196094173841739, iteration: 406567
loss: 1.0263196229934692,grad_norm: 0.6438596430673907, iteration: 406568
loss: 1.0062997341156006,grad_norm: 0.9999991028171049, iteration: 406569
loss: 1.0343986749649048,grad_norm: 0.9999998568579377, iteration: 406570
loss: 1.0236254930496216,grad_norm: 0.9999999322988675, iteration: 406571
loss: 1.0039236545562744,grad_norm: 0.839382229972863, iteration: 406572
loss: 0.9842182397842407,grad_norm: 0.9600230333334463, iteration: 406573
loss: 0.9748144149780273,grad_norm: 0.9999991375501975, iteration: 406574
loss: 0.9769137501716614,grad_norm: 0.69418093690727, iteration: 406575
loss: 0.9834312200546265,grad_norm: 0.7338140384680356, iteration: 406576
loss: 1.01958167552948,grad_norm: 0.8696090025748978, iteration: 406577
loss: 1.0030041933059692,grad_norm: 0.8160108093308073, iteration: 406578
loss: 0.9960876107215881,grad_norm: 0.919928911657988, iteration: 406579
loss: 1.0602608919143677,grad_norm: 0.9999998790729452, iteration: 406580
loss: 1.0364171266555786,grad_norm: 0.9027888065934798, iteration: 406581
loss: 1.010297179222107,grad_norm: 0.8472843698035696, iteration: 406582
loss: 1.0053895711898804,grad_norm: 0.8006188741857896, iteration: 406583
loss: 0.9989550113677979,grad_norm: 0.7216275302592284, iteration: 406584
loss: 1.052105188369751,grad_norm: 0.7280963665644983, iteration: 406585
loss: 1.0728250741958618,grad_norm: 0.9696409662898969, iteration: 406586
loss: 1.0218815803527832,grad_norm: 0.9285865812664569, iteration: 406587
loss: 1.0123404264450073,grad_norm: 0.7418401174969086, iteration: 406588
loss: 0.9811126589775085,grad_norm: 0.7294510270408122, iteration: 406589
loss: 1.0498161315917969,grad_norm: 0.8592976428321931, iteration: 406590
loss: 0.9847419261932373,grad_norm: 0.7040638509934422, iteration: 406591
loss: 1.0391901731491089,grad_norm: 0.9612481728865975, iteration: 406592
loss: 1.011639952659607,grad_norm: 0.696496387113237, iteration: 406593
loss: 1.0243061780929565,grad_norm: 0.9999993543629412, iteration: 406594
loss: 1.0022774934768677,grad_norm: 0.8375883227934591, iteration: 406595
loss: 1.1114790439605713,grad_norm: 0.9999999458029467, iteration: 406596
loss: 1.0730477571487427,grad_norm: 0.9999999186701765, iteration: 406597
loss: 0.9846000075340271,grad_norm: 0.6989590636728067, iteration: 406598
loss: 1.0423965454101562,grad_norm: 0.9999996169636384, iteration: 406599
loss: 1.0244526863098145,grad_norm: 0.7074028787454476, iteration: 406600
loss: 1.02741539478302,grad_norm: 0.8441970047509681, iteration: 406601
loss: 1.0448130369186401,grad_norm: 0.644718581036689, iteration: 406602
loss: 1.0278213024139404,grad_norm: 0.9999993537020149, iteration: 406603
loss: 0.9709550738334656,grad_norm: 0.8202485572805325, iteration: 406604
loss: 1.0022228956222534,grad_norm: 0.6832570466876888, iteration: 406605
loss: 0.9865267872810364,grad_norm: 0.7206959310106027, iteration: 406606
loss: 1.0125046968460083,grad_norm: 0.6853092497810169, iteration: 406607
loss: 0.976097047328949,grad_norm: 0.9475505005235919, iteration: 406608
loss: 0.9998641610145569,grad_norm: 0.7682778356688128, iteration: 406609
loss: 0.9930130243301392,grad_norm: 0.8621491301340044, iteration: 406610
loss: 0.9922443628311157,grad_norm: 0.5761996022657263, iteration: 406611
loss: 1.0041853189468384,grad_norm: 0.7383323602856617, iteration: 406612
loss: 0.971455454826355,grad_norm: 0.7892427395973491, iteration: 406613
loss: 0.9920636415481567,grad_norm: 0.6867264977003119, iteration: 406614
loss: 1.0074387788772583,grad_norm: 0.9999992186373523, iteration: 406615
loss: 0.9954767227172852,grad_norm: 0.7888362201173708, iteration: 406616
loss: 0.9889850616455078,grad_norm: 0.7298823905507317, iteration: 406617
loss: 0.9708414673805237,grad_norm: 0.6644224826361181, iteration: 406618
loss: 0.9822142720222473,grad_norm: 0.6769359314160198, iteration: 406619
loss: 1.0067330598831177,grad_norm: 0.725823627875163, iteration: 406620
loss: 1.0291869640350342,grad_norm: 0.8557182291134532, iteration: 406621
loss: 0.9580973982810974,grad_norm: 0.8367993322355354, iteration: 406622
loss: 1.0048823356628418,grad_norm: 0.7490448406966093, iteration: 406623
loss: 1.0630170106887817,grad_norm: 0.927967408437999, iteration: 406624
loss: 1.044598937034607,grad_norm: 0.9999996097832482, iteration: 406625
loss: 1.0199729204177856,grad_norm: 0.8489850328610279, iteration: 406626
loss: 1.0058965682983398,grad_norm: 0.8837538415499334, iteration: 406627
loss: 1.025789499282837,grad_norm: 0.7210187788755529, iteration: 406628
loss: 1.0527722835540771,grad_norm: 0.9999995929583828, iteration: 406629
loss: 1.0162897109985352,grad_norm: 0.8247937320597443, iteration: 406630
loss: 1.065114140510559,grad_norm: 0.9999999442501879, iteration: 406631
loss: 1.0063470602035522,grad_norm: 0.8500857552817066, iteration: 406632
loss: 0.9655410647392273,grad_norm: 0.5892345648701298, iteration: 406633
loss: 1.0311925411224365,grad_norm: 0.9988410259211884, iteration: 406634
loss: 1.0305293798446655,grad_norm: 0.7716746322799332, iteration: 406635
loss: 1.0417585372924805,grad_norm: 0.7813613825481955, iteration: 406636
loss: 0.9719403386116028,grad_norm: 0.7671157938905429, iteration: 406637
loss: 1.007277011871338,grad_norm: 0.651058140327231, iteration: 406638
loss: 0.9885622262954712,grad_norm: 0.8333491296540285, iteration: 406639
loss: 1.0013487339019775,grad_norm: 0.7553103316573695, iteration: 406640
loss: 0.9921422600746155,grad_norm: 0.8498756622717208, iteration: 406641
loss: 0.9634677767753601,grad_norm: 0.6595949086476818, iteration: 406642
loss: 1.1082168817520142,grad_norm: 0.9999994695865005, iteration: 406643
loss: 0.9714248180389404,grad_norm: 0.8547579869666632, iteration: 406644
loss: 0.9914420247077942,grad_norm: 0.8033013699784739, iteration: 406645
loss: 1.0449981689453125,grad_norm: 0.8603533550853316, iteration: 406646
loss: 0.9997237920761108,grad_norm: 0.8704744329765214, iteration: 406647
loss: 0.9544925689697266,grad_norm: 0.7056206786032608, iteration: 406648
loss: 1.0103574991226196,grad_norm: 0.8973831658488454, iteration: 406649
loss: 1.053121566772461,grad_norm: 0.8955000015606291, iteration: 406650
loss: 1.0125741958618164,grad_norm: 0.8892008600437198, iteration: 406651
loss: 1.076696753501892,grad_norm: 0.9999996801793934, iteration: 406652
loss: 1.0356394052505493,grad_norm: 0.6810946176495936, iteration: 406653
loss: 0.9916026592254639,grad_norm: 0.6997829360297191, iteration: 406654
loss: 1.0032528638839722,grad_norm: 0.7650059965369931, iteration: 406655
loss: 1.0108493566513062,grad_norm: 0.9550305757324193, iteration: 406656
loss: 1.0051872730255127,grad_norm: 0.869962878760387, iteration: 406657
loss: 0.9948520660400391,grad_norm: 0.6375717798837942, iteration: 406658
loss: 1.0096948146820068,grad_norm: 0.9999998753965273, iteration: 406659
loss: 0.9961638450622559,grad_norm: 0.8168826376705174, iteration: 406660
loss: 0.9547035098075867,grad_norm: 0.7160188651025554, iteration: 406661
loss: 1.0050328969955444,grad_norm: 0.9999991969484722, iteration: 406662
loss: 0.9959753751754761,grad_norm: 0.8410227823823648, iteration: 406663
loss: 1.0259299278259277,grad_norm: 0.671031835600528, iteration: 406664
loss: 0.996178150177002,grad_norm: 0.7411208224645133, iteration: 406665
loss: 0.9605799913406372,grad_norm: 0.8686020634656609, iteration: 406666
loss: 1.0068875551223755,grad_norm: 0.8693666364248066, iteration: 406667
loss: 1.071901559829712,grad_norm: 0.9822173709676938, iteration: 406668
loss: 1.0128538608551025,grad_norm: 1.0000000565901925, iteration: 406669
loss: 1.0218408107757568,grad_norm: 0.774012647905951, iteration: 406670
loss: 1.0323169231414795,grad_norm: 0.9999992445270635, iteration: 406671
loss: 1.020928978919983,grad_norm: 0.8952522567859582, iteration: 406672
loss: 0.9824600219726562,grad_norm: 0.6732991791293828, iteration: 406673
loss: 0.981308102607727,grad_norm: 0.612891680367596, iteration: 406674
loss: 1.055190920829773,grad_norm: 0.9999994395803635, iteration: 406675
loss: 1.0694100856781006,grad_norm: 0.9704394805960956, iteration: 406676
loss: 0.9624595046043396,grad_norm: 0.7712437663272395, iteration: 406677
loss: 0.9819715023040771,grad_norm: 0.754335008918815, iteration: 406678
loss: 0.9774326682090759,grad_norm: 0.8153830482669847, iteration: 406679
loss: 1.067976713180542,grad_norm: 0.8992164391987897, iteration: 406680
loss: 1.0184471607208252,grad_norm: 0.7052407489210692, iteration: 406681
loss: 0.9705596566200256,grad_norm: 0.7466803459300807, iteration: 406682
loss: 0.9946828484535217,grad_norm: 0.9235467374551823, iteration: 406683
loss: 1.0002559423446655,grad_norm: 0.8360385473785075, iteration: 406684
loss: 0.9997796416282654,grad_norm: 0.6961727116039482, iteration: 406685
loss: 0.983949601650238,grad_norm: 0.7368503262585142, iteration: 406686
loss: 1.0306706428527832,grad_norm: 0.7991607421727346, iteration: 406687
loss: 1.1683982610702515,grad_norm: 0.7986102842615681, iteration: 406688
loss: 1.031947135925293,grad_norm: 0.6781861645301546, iteration: 406689
loss: 0.991396427154541,grad_norm: 0.7041435679505595, iteration: 406690
loss: 0.9981177449226379,grad_norm: 0.9999995691915299, iteration: 406691
loss: 0.9881890416145325,grad_norm: 0.9218239599000106, iteration: 406692
loss: 1.0147477388381958,grad_norm: 0.8041933537956364, iteration: 406693
loss: 1.021843433380127,grad_norm: 0.8511512185826539, iteration: 406694
loss: 1.00288987159729,grad_norm: 0.9999994793025135, iteration: 406695
loss: 1.0372941493988037,grad_norm: 0.8803680136880528, iteration: 406696
loss: 0.9944977760314941,grad_norm: 0.7107713686974984, iteration: 406697
loss: 0.9989271759986877,grad_norm: 0.830708508485591, iteration: 406698
loss: 1.0100053548812866,grad_norm: 0.5581829367998238, iteration: 406699
loss: 0.9925515055656433,grad_norm: 0.8373474045595972, iteration: 406700
loss: 1.006861686706543,grad_norm: 0.9999996638922084, iteration: 406701
loss: 1.0156844854354858,grad_norm: 0.8381864639306832, iteration: 406702
loss: 0.970628023147583,grad_norm: 0.7446223874545109, iteration: 406703
loss: 1.0093638896942139,grad_norm: 0.7496363189584038, iteration: 406704
loss: 0.9981642961502075,grad_norm: 0.7463880215316713, iteration: 406705
loss: 1.0117024183273315,grad_norm: 0.710603930034031, iteration: 406706
loss: 1.0015898942947388,grad_norm: 0.9801830228258802, iteration: 406707
loss: 1.1851832866668701,grad_norm: 0.999999081825742, iteration: 406708
loss: 1.0609242916107178,grad_norm: 0.6941757431425924, iteration: 406709
loss: 1.01399564743042,grad_norm: 0.7580102097446042, iteration: 406710
loss: 0.9821618795394897,grad_norm: 0.885263888995121, iteration: 406711
loss: 0.9953652620315552,grad_norm: 0.8121472746939664, iteration: 406712
loss: 1.015273094177246,grad_norm: 0.5640313467801842, iteration: 406713
loss: 1.018769383430481,grad_norm: 0.9999996393155904, iteration: 406714
loss: 0.9833717346191406,grad_norm: 0.7173534870775216, iteration: 406715
loss: 1.0244441032409668,grad_norm: 0.7814193072965975, iteration: 406716
loss: 0.9982139468193054,grad_norm: 0.9999993135517536, iteration: 406717
loss: 0.9879627823829651,grad_norm: 0.9999996888411028, iteration: 406718
loss: 0.9981837868690491,grad_norm: 0.7161514627319043, iteration: 406719
loss: 0.9680585861206055,grad_norm: 0.8199492382902328, iteration: 406720
loss: 1.0317353010177612,grad_norm: 0.9492152718513784, iteration: 406721
loss: 0.9989103078842163,grad_norm: 0.7235051709286942, iteration: 406722
loss: 0.9915392994880676,grad_norm: 0.6619932885286939, iteration: 406723
loss: 1.0337097644805908,grad_norm: 0.8063149352575173, iteration: 406724
loss: 1.0141862630844116,grad_norm: 0.7454752135813608, iteration: 406725
loss: 1.0242083072662354,grad_norm: 0.9360301943132855, iteration: 406726
loss: 0.985897958278656,grad_norm: 0.7568580082725906, iteration: 406727
loss: 1.0297739505767822,grad_norm: 0.7817670934730881, iteration: 406728
loss: 0.9914462566375732,grad_norm: 0.7673776755256302, iteration: 406729
loss: 1.0289219617843628,grad_norm: 0.9037240731815523, iteration: 406730
loss: 0.9833132028579712,grad_norm: 0.7568573765407379, iteration: 406731
loss: 1.0223193168640137,grad_norm: 0.9101772360831918, iteration: 406732
loss: 1.118383765220642,grad_norm: 0.9999991868925636, iteration: 406733
loss: 1.0992120504379272,grad_norm: 0.9999991743756171, iteration: 406734
loss: 1.014800786972046,grad_norm: 0.809299911239892, iteration: 406735
loss: 0.9755287766456604,grad_norm: 0.851278427342983, iteration: 406736
loss: 1.0207515954971313,grad_norm: 0.8360387336025634, iteration: 406737
loss: 1.01396906375885,grad_norm: 0.9030398439117785, iteration: 406738
loss: 1.061732292175293,grad_norm: 0.9999994024711356, iteration: 406739
loss: 1.0654926300048828,grad_norm: 0.8524846720476542, iteration: 406740
loss: 0.9781623482704163,grad_norm: 0.8719183284800404, iteration: 406741
loss: 0.9647057056427002,grad_norm: 0.7947884749593507, iteration: 406742
loss: 1.0110708475112915,grad_norm: 0.7571078929264882, iteration: 406743
loss: 1.0213115215301514,grad_norm: 0.8923789617240012, iteration: 406744
loss: 1.0281189680099487,grad_norm: 0.7196070044410011, iteration: 406745
loss: 1.0150234699249268,grad_norm: 0.8170515374991406, iteration: 406746
loss: 1.0512793064117432,grad_norm: 0.9999993240967964, iteration: 406747
loss: 1.027571439743042,grad_norm: 0.9113608646268708, iteration: 406748
loss: 0.9942578673362732,grad_norm: 0.9634205115462812, iteration: 406749
loss: 1.0090467929840088,grad_norm: 0.8026189822457335, iteration: 406750
loss: 1.008737564086914,grad_norm: 0.6966002667383663, iteration: 406751
loss: 0.9876222014427185,grad_norm: 0.7336625460098187, iteration: 406752
loss: 1.0695816278457642,grad_norm: 0.9999998581051472, iteration: 406753
loss: 1.0076359510421753,grad_norm: 0.9999991106110099, iteration: 406754
loss: 1.0039364099502563,grad_norm: 1.0000000644113998, iteration: 406755
loss: 1.0063295364379883,grad_norm: 0.9566870012631589, iteration: 406756
loss: 1.011470913887024,grad_norm: 0.6704490239136601, iteration: 406757
loss: 0.9815757274627686,grad_norm: 0.7788849311544561, iteration: 406758
loss: 1.0041056871414185,grad_norm: 0.8058143553506091, iteration: 406759
loss: 1.0011157989501953,grad_norm: 0.7821638681578436, iteration: 406760
loss: 1.0175114870071411,grad_norm: 0.6997114952669375, iteration: 406761
loss: 1.0013902187347412,grad_norm: 0.8811606768766177, iteration: 406762
loss: 1.0176193714141846,grad_norm: 0.99999976175904, iteration: 406763
loss: 0.9740990996360779,grad_norm: 0.7431156342128948, iteration: 406764
loss: 1.0108157396316528,grad_norm: 0.8589166779435955, iteration: 406765
loss: 1.0012954473495483,grad_norm: 0.8389911129616637, iteration: 406766
loss: 1.0057756900787354,grad_norm: 0.7040426456543164, iteration: 406767
loss: 1.0043307542800903,grad_norm: 0.794568459331427, iteration: 406768
loss: 0.9891563057899475,grad_norm: 0.9999992128384411, iteration: 406769
loss: 1.0012104511260986,grad_norm: 0.9252077816594066, iteration: 406770
loss: 0.9937087297439575,grad_norm: 0.810221626182205, iteration: 406771
loss: 0.9979479908943176,grad_norm: 0.9999994524227962, iteration: 406772
loss: 1.0057899951934814,grad_norm: 0.7865960794209332, iteration: 406773
loss: 0.9974860548973083,grad_norm: 0.99999903437551, iteration: 406774
loss: 1.0217431783676147,grad_norm: 0.6434874081227987, iteration: 406775
loss: 0.9675970673561096,grad_norm: 0.8195289393088114, iteration: 406776
loss: 1.0700515508651733,grad_norm: 0.7702700735333621, iteration: 406777
loss: 0.9946929216384888,grad_norm: 0.7947268637957595, iteration: 406778
loss: 1.0767689943313599,grad_norm: 0.8249194236736214, iteration: 406779
loss: 0.9983907341957092,grad_norm: 0.77772726263163, iteration: 406780
loss: 1.019001841545105,grad_norm: 0.832220654798372, iteration: 406781
loss: 0.9998372793197632,grad_norm: 0.8449824925117523, iteration: 406782
loss: 1.009397029876709,grad_norm: 0.7488848687737027, iteration: 406783
loss: 0.9989450573921204,grad_norm: 0.9999997082691909, iteration: 406784
loss: 0.9996269345283508,grad_norm: 0.8249801731842734, iteration: 406785
loss: 1.009022831916809,grad_norm: 0.9999999065582553, iteration: 406786
loss: 1.035763144493103,grad_norm: 0.6882033262192879, iteration: 406787
loss: 1.0477443933486938,grad_norm: 0.9999990816619986, iteration: 406788
loss: 1.0705549716949463,grad_norm: 0.8308667047106206, iteration: 406789
loss: 1.0039304494857788,grad_norm: 0.7419024538949718, iteration: 406790
loss: 0.9867405891418457,grad_norm: 0.7568696155392058, iteration: 406791
loss: 1.0064020156860352,grad_norm: 0.8171246139051032, iteration: 406792
loss: 1.0111950635910034,grad_norm: 0.9999991930546603, iteration: 406793
loss: 1.0077228546142578,grad_norm: 0.999999903373352, iteration: 406794
loss: 0.9750545024871826,grad_norm: 0.8825778010945983, iteration: 406795
loss: 1.02652907371521,grad_norm: 0.7631151146490319, iteration: 406796
loss: 1.040437936782837,grad_norm: 0.9999992086519285, iteration: 406797
loss: 0.9934211373329163,grad_norm: 0.8211496344497811, iteration: 406798
loss: 1.0425382852554321,grad_norm: 0.7168167570950427, iteration: 406799
loss: 1.0735104084014893,grad_norm: 0.7645564054605806, iteration: 406800
loss: 1.1136081218719482,grad_norm: 0.9999999616484948, iteration: 406801
loss: 1.0216858386993408,grad_norm: 0.9999991854616849, iteration: 406802
loss: 1.0156210660934448,grad_norm: 0.810799835814298, iteration: 406803
loss: 1.0349799394607544,grad_norm: 0.9908638209029136, iteration: 406804
loss: 1.0162116289138794,grad_norm: 0.9999990080678911, iteration: 406805
loss: 1.017824411392212,grad_norm: 0.8213380120430858, iteration: 406806
loss: 0.9928734302520752,grad_norm: 0.7743166753677541, iteration: 406807
loss: 1.0288960933685303,grad_norm: 0.7371671971333381, iteration: 406808
loss: 0.9775766134262085,grad_norm: 0.8311615042793854, iteration: 406809
loss: 1.0462912321090698,grad_norm: 0.8707207610063835, iteration: 406810
loss: 0.9877609014511108,grad_norm: 0.927026278132074, iteration: 406811
loss: 1.0032116174697876,grad_norm: 0.7618036909285247, iteration: 406812
loss: 0.9791638851165771,grad_norm: 0.6672623218700455, iteration: 406813
loss: 0.9988489151000977,grad_norm: 0.8847749331112045, iteration: 406814
loss: 1.0220184326171875,grad_norm: 0.999999330430012, iteration: 406815
loss: 1.0209269523620605,grad_norm: 0.8529267696940868, iteration: 406816
loss: 1.0285663604736328,grad_norm: 0.8150099697647151, iteration: 406817
loss: 1.0034396648406982,grad_norm: 0.6450029224688018, iteration: 406818
loss: 1.0120245218276978,grad_norm: 0.8175971921963958, iteration: 406819
loss: 0.9932840466499329,grad_norm: 0.7544872861615716, iteration: 406820
loss: 1.032624363899231,grad_norm: 0.8372519545445571, iteration: 406821
loss: 1.0604640245437622,grad_norm: 0.9999999022963205, iteration: 406822
loss: 1.0713372230529785,grad_norm: 0.7122510486633685, iteration: 406823
loss: 0.9644452929496765,grad_norm: 0.7144428675054789, iteration: 406824
loss: 0.9884282946586609,grad_norm: 0.7758421187903866, iteration: 406825
loss: 1.0128059387207031,grad_norm: 0.7434682314871569, iteration: 406826
loss: 1.0067387819290161,grad_norm: 0.7641675436839471, iteration: 406827
loss: 0.9783012270927429,grad_norm: 0.7996016020723379, iteration: 406828
loss: 1.0500969886779785,grad_norm: 0.9999995955193731, iteration: 406829
loss: 1.022471308708191,grad_norm: 0.9138233241227768, iteration: 406830
loss: 1.009907603263855,grad_norm: 0.8264496012343252, iteration: 406831
loss: 1.071045994758606,grad_norm: 0.9999991876248322, iteration: 406832
loss: 0.9625328183174133,grad_norm: 0.9594680520726024, iteration: 406833
loss: 1.014278531074524,grad_norm: 0.824881718160323, iteration: 406834
loss: 0.9953046441078186,grad_norm: 0.9999991807998733, iteration: 406835
loss: 1.0091198682785034,grad_norm: 0.9917939805665681, iteration: 406836
loss: 0.9703782796859741,grad_norm: 0.9999998787768468, iteration: 406837
loss: 1.0523375272750854,grad_norm: 0.6178358887167904, iteration: 406838
loss: 0.9914662837982178,grad_norm: 0.7878017293004372, iteration: 406839
loss: 1.0454434156417847,grad_norm: 0.9999999284052645, iteration: 406840
loss: 1.2491523027420044,grad_norm: 0.9999990050632851, iteration: 406841
loss: 1.0136381387710571,grad_norm: 0.786147156715698, iteration: 406842
loss: 1.101402997970581,grad_norm: 0.9999997740591634, iteration: 406843
loss: 1.0401495695114136,grad_norm: 0.7854398192994829, iteration: 406844
loss: 0.9939755201339722,grad_norm: 0.7878931973268724, iteration: 406845
loss: 0.9935722351074219,grad_norm: 0.7773084319075442, iteration: 406846
loss: 0.977310836315155,grad_norm: 0.7845928376456294, iteration: 406847
loss: 1.0794140100479126,grad_norm: 0.9999994948688989, iteration: 406848
loss: 1.05076003074646,grad_norm: 0.8298460910551964, iteration: 406849
loss: 0.9948650598526001,grad_norm: 0.6786754914829565, iteration: 406850
loss: 0.9676676988601685,grad_norm: 0.7240221623093641, iteration: 406851
loss: 0.9625535011291504,grad_norm: 0.7085040684916772, iteration: 406852
loss: 1.051513433456421,grad_norm: 0.6924328084249931, iteration: 406853
loss: 0.9825422167778015,grad_norm: 0.8363016602722942, iteration: 406854
loss: 1.0212335586547852,grad_norm: 0.7834905959288987, iteration: 406855
loss: 1.0177966356277466,grad_norm: 0.9999991096519785, iteration: 406856
loss: 1.0111045837402344,grad_norm: 0.7178475805102009, iteration: 406857
loss: 1.0415666103363037,grad_norm: 0.8289351226285984, iteration: 406858
loss: 0.994086742401123,grad_norm: 0.9605438539562192, iteration: 406859
loss: 1.0813080072402954,grad_norm: 0.9999998033005062, iteration: 406860
loss: 1.0189106464385986,grad_norm: 0.9999992477856849, iteration: 406861
loss: 0.9929690957069397,grad_norm: 0.7176928646359422, iteration: 406862
loss: 1.002021074295044,grad_norm: 0.9543347128718841, iteration: 406863
loss: 1.02690851688385,grad_norm: 0.6868400690257408, iteration: 406864
loss: 1.0265789031982422,grad_norm: 0.8046917005287807, iteration: 406865
loss: 1.010624647140503,grad_norm: 0.7987690450611862, iteration: 406866
loss: 1.0280964374542236,grad_norm: 0.8078399390420739, iteration: 406867
loss: 1.0248216390609741,grad_norm: 0.832935813561821, iteration: 406868
loss: 1.0173053741455078,grad_norm: 0.8835759532153776, iteration: 406869
loss: 1.0153279304504395,grad_norm: 0.8132923121737556, iteration: 406870
loss: 0.9763396978378296,grad_norm: 0.8283590797732194, iteration: 406871
loss: 1.1047412157058716,grad_norm: 0.9999995064369207, iteration: 406872
loss: 0.9766947031021118,grad_norm: 0.8329078598684554, iteration: 406873
loss: 1.001589298248291,grad_norm: 0.717096763503771, iteration: 406874
loss: 1.0210299491882324,grad_norm: 0.9999994687367317, iteration: 406875
loss: 1.0832027196884155,grad_norm: 0.7851842760734271, iteration: 406876
loss: 1.0011463165283203,grad_norm: 0.8734426560576339, iteration: 406877
loss: 1.0445990562438965,grad_norm: 0.8916128516609145, iteration: 406878
loss: 1.116434931755066,grad_norm: 0.7961762745978614, iteration: 406879
loss: 0.9901149272918701,grad_norm: 0.6846214179856795, iteration: 406880
loss: 1.0121158361434937,grad_norm: 0.8023029265255723, iteration: 406881
loss: 0.968191385269165,grad_norm: 0.7758177778420551, iteration: 406882
loss: 0.972300112247467,grad_norm: 0.8622975254416757, iteration: 406883
loss: 0.9803167581558228,grad_norm: 0.9361225775025125, iteration: 406884
loss: 1.1155283451080322,grad_norm: 0.9999998558013132, iteration: 406885
loss: 1.0517569780349731,grad_norm: 0.7311143928782049, iteration: 406886
loss: 1.0443803071975708,grad_norm: 0.9644451370366822, iteration: 406887
loss: 0.9667856097221375,grad_norm: 0.9184833522764903, iteration: 406888
loss: 0.9546394944190979,grad_norm: 0.719310080606321, iteration: 406889
loss: 0.9519028663635254,grad_norm: 0.8846677497518458, iteration: 406890
loss: 0.9801484942436218,grad_norm: 0.7784113867522305, iteration: 406891
loss: 0.9854440689086914,grad_norm: 0.7479140549889479, iteration: 406892
loss: 0.9770209789276123,grad_norm: 0.9999991400586083, iteration: 406893
loss: 0.9912513494491577,grad_norm: 0.9551428768505341, iteration: 406894
loss: 1.0557905435562134,grad_norm: 0.9999992378638856, iteration: 406895
loss: 0.9791556000709534,grad_norm: 0.8155066366264949, iteration: 406896
loss: 1.0164217948913574,grad_norm: 0.6754567678714064, iteration: 406897
loss: 1.0786261558532715,grad_norm: 0.9999992564344026, iteration: 406898
loss: 0.962170422077179,grad_norm: 0.6985766754987769, iteration: 406899
loss: 1.0382689237594604,grad_norm: 0.9999996948743882, iteration: 406900
loss: 0.9718931913375854,grad_norm: 0.8736674966863114, iteration: 406901
loss: 0.9753957390785217,grad_norm: 0.9575293414500632, iteration: 406902
loss: 0.9865272045135498,grad_norm: 0.7987435816097107, iteration: 406903
loss: 1.2126821279525757,grad_norm: 0.9999992255706951, iteration: 406904
loss: 0.9977555871009827,grad_norm: 0.7676316400984871, iteration: 406905
loss: 1.0134609937667847,grad_norm: 0.7749805024481617, iteration: 406906
loss: 1.2028313875198364,grad_norm: 0.9999997192960935, iteration: 406907
loss: 0.9840505719184875,grad_norm: 0.9999990099179357, iteration: 406908
loss: 0.9669533371925354,grad_norm: 0.8080207854993882, iteration: 406909
loss: 1.0161985158920288,grad_norm: 0.5711645144815668, iteration: 406910
loss: 1.0288093090057373,grad_norm: 0.9999993576030074, iteration: 406911
loss: 1.009143590927124,grad_norm: 0.7794806053343317, iteration: 406912
loss: 0.9962723851203918,grad_norm: 0.6881662071065989, iteration: 406913
loss: 0.985248863697052,grad_norm: 0.893468481900873, iteration: 406914
loss: 1.0928491353988647,grad_norm: 0.9999997463152094, iteration: 406915
loss: 1.0243585109710693,grad_norm: 0.7338205391659826, iteration: 406916
loss: 0.9880643486976624,grad_norm: 0.6612576248873518, iteration: 406917
loss: 1.0068572759628296,grad_norm: 0.8523671304402096, iteration: 406918
loss: 1.0298327207565308,grad_norm: 0.6075835971351885, iteration: 406919
loss: 1.0063996315002441,grad_norm: 0.8313925570930267, iteration: 406920
loss: 1.0195385217666626,grad_norm: 0.9999996634653109, iteration: 406921
loss: 0.9983915090560913,grad_norm: 0.7857384673948961, iteration: 406922
loss: 1.0172656774520874,grad_norm: 0.9737445761203609, iteration: 406923
loss: 1.0597511529922485,grad_norm: 0.9999999247193178, iteration: 406924
loss: 1.1103932857513428,grad_norm: 0.9999994469157719, iteration: 406925
loss: 1.0155876874923706,grad_norm: 0.7581710804089378, iteration: 406926
loss: 0.9955931305885315,grad_norm: 0.8131570240545063, iteration: 406927
loss: 0.9883936643600464,grad_norm: 0.6971046892229082, iteration: 406928
loss: 1.0322240591049194,grad_norm: 0.959842649106725, iteration: 406929
loss: 0.9986588954925537,grad_norm: 0.7324707675548858, iteration: 406930
loss: 1.0118939876556396,grad_norm: 0.9369791886566164, iteration: 406931
loss: 1.0114505290985107,grad_norm: 0.6897953796680059, iteration: 406932
loss: 0.9882558584213257,grad_norm: 0.633905867334632, iteration: 406933
loss: 1.022006630897522,grad_norm: 0.7255056834627418, iteration: 406934
loss: 0.9784912467002869,grad_norm: 0.7510293295612436, iteration: 406935
loss: 1.1352262496948242,grad_norm: 0.9426483288491946, iteration: 406936
loss: 0.9813132882118225,grad_norm: 0.7261316842428446, iteration: 406937
loss: 1.0240880250930786,grad_norm: 0.7293519043650543, iteration: 406938
loss: 0.9815319180488586,grad_norm: 0.7591995259420403, iteration: 406939
loss: 1.0176188945770264,grad_norm: 0.8718807684432386, iteration: 406940
loss: 1.0334386825561523,grad_norm: 0.7538588071626727, iteration: 406941
loss: 1.138031005859375,grad_norm: 0.9999998195297025, iteration: 406942
loss: 0.9594511985778809,grad_norm: 0.844700856823937, iteration: 406943
loss: 0.9654393196105957,grad_norm: 0.7966859176675241, iteration: 406944
loss: 0.976470947265625,grad_norm: 0.7411838844441938, iteration: 406945
loss: 1.0125529766082764,grad_norm: 0.8666728334289571, iteration: 406946
loss: 1.0180504322052002,grad_norm: 0.7876413983658308, iteration: 406947
loss: 0.9958876967430115,grad_norm: 0.8115056206515366, iteration: 406948
loss: 1.0535770654678345,grad_norm: 0.9999989567431601, iteration: 406949
loss: 0.9939265251159668,grad_norm: 0.7473844294312584, iteration: 406950
loss: 1.0028066635131836,grad_norm: 0.999999810973058, iteration: 406951
loss: 1.2799873352050781,grad_norm: 0.9999996912171932, iteration: 406952
loss: 1.02983820438385,grad_norm: 0.999999959322836, iteration: 406953
loss: 1.1283854246139526,grad_norm: 0.9999996459861561, iteration: 406954
loss: 1.0292904376983643,grad_norm: 0.67314335677187, iteration: 406955
loss: 1.003943681716919,grad_norm: 0.7106393886203185, iteration: 406956
loss: 0.9814651012420654,grad_norm: 0.7246894745336482, iteration: 406957
loss: 1.0099502801895142,grad_norm: 0.9999988917014547, iteration: 406958
loss: 1.0787655115127563,grad_norm: 0.9999995490600151, iteration: 406959
loss: 1.0371193885803223,grad_norm: 0.7831909563144299, iteration: 406960
loss: 0.9666717052459717,grad_norm: 0.7748745528569609, iteration: 406961
loss: 1.0720893144607544,grad_norm: 0.8003637546483596, iteration: 406962
loss: 0.9905443787574768,grad_norm: 0.8403053423374902, iteration: 406963
loss: 0.9764794707298279,grad_norm: 0.6656060251551261, iteration: 406964
loss: 1.0566002130508423,grad_norm: 0.9999994488100841, iteration: 406965
loss: 1.039538860321045,grad_norm: 0.9999998742781473, iteration: 406966
loss: 0.9902901649475098,grad_norm: 0.8861741629941321, iteration: 406967
loss: 1.029531478881836,grad_norm: 0.9767934105667248, iteration: 406968
loss: 1.0817011594772339,grad_norm: 0.9999995928241442, iteration: 406969
loss: 1.042749047279358,grad_norm: 0.9999995619035985, iteration: 406970
loss: 0.9829863905906677,grad_norm: 0.7118088528154319, iteration: 406971
loss: 0.9963864684104919,grad_norm: 0.881324631277401, iteration: 406972
loss: 0.9982039332389832,grad_norm: 0.6470822561544561, iteration: 406973
loss: 0.9676578044891357,grad_norm: 0.7576281718991117, iteration: 406974
loss: 1.0192272663116455,grad_norm: 0.8294900753586106, iteration: 406975
loss: 1.000198483467102,grad_norm: 0.6625936005126962, iteration: 406976
loss: 1.0211944580078125,grad_norm: 0.9999999152916828, iteration: 406977
loss: 1.041245460510254,grad_norm: 0.8246712010208439, iteration: 406978
loss: 1.0201654434204102,grad_norm: 0.7787388860285137, iteration: 406979
loss: 0.9844649434089661,grad_norm: 0.999999712289742, iteration: 406980
loss: 0.9977669715881348,grad_norm: 0.7532632457967506, iteration: 406981
loss: 1.0751911401748657,grad_norm: 0.9999992974145603, iteration: 406982
loss: 0.9937679171562195,grad_norm: 0.9999990800641957, iteration: 406983
loss: 1.0006742477416992,grad_norm: 0.7699987712105519, iteration: 406984
loss: 1.045737624168396,grad_norm: 0.9342361483385826, iteration: 406985
loss: 0.9882909655570984,grad_norm: 0.889911305817921, iteration: 406986
loss: 1.0237274169921875,grad_norm: 0.7322774689008077, iteration: 406987
loss: 1.0105310678482056,grad_norm: 0.7176589500631535, iteration: 406988
loss: 1.0196514129638672,grad_norm: 0.7102267888245508, iteration: 406989
loss: 1.0001893043518066,grad_norm: 0.8513384334311832, iteration: 406990
loss: 1.0594195127487183,grad_norm: 0.9625085002457131, iteration: 406991
loss: 0.9900068044662476,grad_norm: 0.7734687972074745, iteration: 406992
loss: 0.996759295463562,grad_norm: 0.695559886190393, iteration: 406993
loss: 0.9288272857666016,grad_norm: 0.7799503726280479, iteration: 406994
loss: 0.9898696541786194,grad_norm: 0.9474734469126908, iteration: 406995
loss: 1.0026278495788574,grad_norm: 0.7231043491112426, iteration: 406996
loss: 1.0054515600204468,grad_norm: 0.6602837176048161, iteration: 406997
loss: 1.0340466499328613,grad_norm: 0.9999997188318558, iteration: 406998
loss: 1.033051609992981,grad_norm: 0.9999999407791792, iteration: 406999
loss: 0.9949931502342224,grad_norm: 0.8220232015723081, iteration: 407000
loss: 0.9718827605247498,grad_norm: 0.8306575797994125, iteration: 407001
loss: 1.0122524499893188,grad_norm: 0.6901772084115698, iteration: 407002
loss: 1.0280817747116089,grad_norm: 0.7788408424017297, iteration: 407003
loss: 0.9930712580680847,grad_norm: 0.9276775328670797, iteration: 407004
loss: 1.0243439674377441,grad_norm: 0.7887168133555258, iteration: 407005
loss: 1.0234284400939941,grad_norm: 0.9999992428784311, iteration: 407006
loss: 1.0188521146774292,grad_norm: 0.8254143015868173, iteration: 407007
loss: 0.9665051102638245,grad_norm: 0.8196666190305649, iteration: 407008
loss: 1.013927698135376,grad_norm: 0.7577454883426851, iteration: 407009
loss: 1.0094695091247559,grad_norm: 0.7778015053566382, iteration: 407010
loss: 1.0009957551956177,grad_norm: 0.7426147799610529, iteration: 407011
loss: 1.0121924877166748,grad_norm: 0.8794774415434543, iteration: 407012
loss: 0.9753840565681458,grad_norm: 0.7820884428624357, iteration: 407013
loss: 0.956793487071991,grad_norm: 0.6539973958802145, iteration: 407014
loss: 1.0454840660095215,grad_norm: 0.936806401322193, iteration: 407015
loss: 0.9970076084136963,grad_norm: 0.9999991012987826, iteration: 407016
loss: 1.0201222896575928,grad_norm: 0.8259509331936222, iteration: 407017
loss: 0.9426958560943604,grad_norm: 0.6980380028825092, iteration: 407018
loss: 0.9837068915367126,grad_norm: 0.7253707183360781, iteration: 407019
loss: 1.0014245510101318,grad_norm: 0.7244727758385536, iteration: 407020
loss: 1.0063031911849976,grad_norm: 0.7550682032085664, iteration: 407021
loss: 0.9795390963554382,grad_norm: 0.8921328735236973, iteration: 407022
loss: 1.025858759880066,grad_norm: 0.6958548914569738, iteration: 407023
loss: 1.0497373342514038,grad_norm: 0.8403406529394923, iteration: 407024
loss: 1.043530821800232,grad_norm: 0.999999648945291, iteration: 407025
loss: 1.0081110000610352,grad_norm: 0.7385999167429079, iteration: 407026
loss: 1.0260982513427734,grad_norm: 0.8446392837911013, iteration: 407027
loss: 0.9811128377914429,grad_norm: 0.8176870203808057, iteration: 407028
loss: 1.0016381740570068,grad_norm: 0.8243679965254934, iteration: 407029
loss: 1.0221918821334839,grad_norm: 0.9999999505846964, iteration: 407030
loss: 0.9949855804443359,grad_norm: 0.7449540465541553, iteration: 407031
loss: 0.9910748600959778,grad_norm: 0.921456954741156, iteration: 407032
loss: 0.9713516235351562,grad_norm: 0.9228257616878789, iteration: 407033
loss: 0.9559507966041565,grad_norm: 0.9087179260105186, iteration: 407034
loss: 1.0071661472320557,grad_norm: 0.9999997833301925, iteration: 407035
loss: 1.0353634357452393,grad_norm: 0.7355895441825046, iteration: 407036
loss: 1.0395900011062622,grad_norm: 0.6902488481123586, iteration: 407037
loss: 0.9858610033988953,grad_norm: 0.851354871780535, iteration: 407038
loss: 1.042919635772705,grad_norm: 0.9999994719949117, iteration: 407039
loss: 0.9623677730560303,grad_norm: 0.7704437017847796, iteration: 407040
loss: 0.9888776540756226,grad_norm: 0.9489893947586426, iteration: 407041
loss: 0.9949748516082764,grad_norm: 0.999999067619482, iteration: 407042
loss: 1.015908122062683,grad_norm: 1.0000000056284606, iteration: 407043
loss: 1.0016623735427856,grad_norm: 0.9999994449048242, iteration: 407044
loss: 1.002643346786499,grad_norm: 0.8276303730505429, iteration: 407045
loss: 1.0009626150131226,grad_norm: 0.8123371906911185, iteration: 407046
loss: 1.0010253190994263,grad_norm: 0.795449479020634, iteration: 407047
loss: 0.9984950423240662,grad_norm: 0.756495925192541, iteration: 407048
loss: 1.0444488525390625,grad_norm: 0.8364987023750006, iteration: 407049
loss: 1.0201025009155273,grad_norm: 0.73024660804224, iteration: 407050
loss: 1.0154805183410645,grad_norm: 0.9615963599340707, iteration: 407051
loss: 0.9751170873641968,grad_norm: 0.9999995833370459, iteration: 407052
loss: 0.9811834692955017,grad_norm: 0.7056253613230627, iteration: 407053
loss: 0.9873368740081787,grad_norm: 0.7182773058152846, iteration: 407054
loss: 0.9600296020507812,grad_norm: 0.7901158086589939, iteration: 407055
loss: 1.002350091934204,grad_norm: 0.8763252629291524, iteration: 407056
loss: 1.0219793319702148,grad_norm: 0.877632208942637, iteration: 407057
loss: 1.034724235534668,grad_norm: 0.7532857851158025, iteration: 407058
loss: 1.0257586240768433,grad_norm: 0.9999995747428444, iteration: 407059
loss: 1.0284425020217896,grad_norm: 0.7983081948505717, iteration: 407060
loss: 0.9645773768424988,grad_norm: 0.6334542421002975, iteration: 407061
loss: 0.9819843173027039,grad_norm: 0.7725782659910767, iteration: 407062
loss: 1.0038002729415894,grad_norm: 0.7745024105387843, iteration: 407063
loss: 0.9814226031303406,grad_norm: 0.6613265734173733, iteration: 407064
loss: 0.99533611536026,grad_norm: 0.6840405251314023, iteration: 407065
loss: 1.012166142463684,grad_norm: 0.9999996642383822, iteration: 407066
loss: 0.9881815314292908,grad_norm: 0.777210383768381, iteration: 407067
loss: 1.0203546285629272,grad_norm: 0.9999993745468545, iteration: 407068
loss: 1.2123233079910278,grad_norm: 0.9999998307388472, iteration: 407069
loss: 1.014847993850708,grad_norm: 0.7378229574923961, iteration: 407070
loss: 0.976176381111145,grad_norm: 0.9999992387653812, iteration: 407071
loss: 1.084019422531128,grad_norm: 0.9999999075994458, iteration: 407072
loss: 1.04400634765625,grad_norm: 0.9999996238822092, iteration: 407073
loss: 1.01512610912323,grad_norm: 0.767395916449679, iteration: 407074
loss: 1.139456033706665,grad_norm: 0.9999996035019414, iteration: 407075
loss: 1.0153697729110718,grad_norm: 0.82083623823274, iteration: 407076
loss: 1.0575792789459229,grad_norm: 0.8116184004036766, iteration: 407077
loss: 0.9903830885887146,grad_norm: 0.7250736090777771, iteration: 407078
loss: 0.982520580291748,grad_norm: 0.9999996073392079, iteration: 407079
loss: 1.0431466102600098,grad_norm: 0.9661093658766509, iteration: 407080
loss: 0.9865021705627441,grad_norm: 0.7709495254588455, iteration: 407081
loss: 1.083994746208191,grad_norm: 0.9999990718376859, iteration: 407082
loss: 1.075145959854126,grad_norm: 0.9999999086482245, iteration: 407083
loss: 1.0201903581619263,grad_norm: 0.9999994478027844, iteration: 407084
loss: 0.9964364171028137,grad_norm: 0.7642017914015834, iteration: 407085
loss: 0.9951784610748291,grad_norm: 0.7542641448051954, iteration: 407086
loss: 1.0935430526733398,grad_norm: 0.9999997112711044, iteration: 407087
loss: 1.0121254920959473,grad_norm: 0.8643794312016423, iteration: 407088
loss: 0.997492790222168,grad_norm: 0.688384630723811, iteration: 407089
loss: 1.0197241306304932,grad_norm: 0.6543220119567197, iteration: 407090
loss: 1.0280803442001343,grad_norm: 0.8383411948086602, iteration: 407091
loss: 0.9656383991241455,grad_norm: 0.7947153914877667, iteration: 407092
loss: 0.9894450306892395,grad_norm: 0.7824057534408573, iteration: 407093
loss: 0.9785058498382568,grad_norm: 0.9999992512623879, iteration: 407094
loss: 0.9907993078231812,grad_norm: 0.7257317757292716, iteration: 407095
loss: 0.997741162776947,grad_norm: 0.7214853968935296, iteration: 407096
loss: 1.0218558311462402,grad_norm: 0.6763972500862733, iteration: 407097
loss: 1.004120111465454,grad_norm: 0.7294527032311847, iteration: 407098
loss: 1.0143965482711792,grad_norm: 0.842025243072652, iteration: 407099
loss: 0.9777014255523682,grad_norm: 0.7773961891626614, iteration: 407100
loss: 1.0210696458816528,grad_norm: 0.9853093572489328, iteration: 407101
loss: 1.0032391548156738,grad_norm: 0.78224469878242, iteration: 407102
loss: 0.985180675983429,grad_norm: 0.7153560004618967, iteration: 407103
loss: 1.0326250791549683,grad_norm: 0.999999263130165, iteration: 407104
loss: 1.0012180805206299,grad_norm: 0.789732890335101, iteration: 407105
loss: 0.9943850040435791,grad_norm: 0.657616737616933, iteration: 407106
loss: 1.0077050924301147,grad_norm: 0.7143454976538678, iteration: 407107
loss: 1.0118601322174072,grad_norm: 0.8874002239053643, iteration: 407108
loss: 1.057091236114502,grad_norm: 0.9999994461121369, iteration: 407109
loss: 0.9778037071228027,grad_norm: 0.8726989229184128, iteration: 407110
loss: 1.099209189414978,grad_norm: 0.8726399465205634, iteration: 407111
loss: 0.9855794906616211,grad_norm: 0.8138248596213266, iteration: 407112
loss: 0.9811591506004333,grad_norm: 0.8255160583389581, iteration: 407113
loss: 1.1395729780197144,grad_norm: 0.9999992615560345, iteration: 407114
loss: 0.9867961406707764,grad_norm: 0.9999993797279021, iteration: 407115
loss: 1.0143940448760986,grad_norm: 0.8327756737619941, iteration: 407116
loss: 1.1047637462615967,grad_norm: 0.9999991454684596, iteration: 407117
loss: 1.1530946493148804,grad_norm: 0.9999992699188642, iteration: 407118
loss: 0.992639422416687,grad_norm: 0.7333340611532551, iteration: 407119
loss: 0.9879016280174255,grad_norm: 0.8383840060381117, iteration: 407120
loss: 1.021907091140747,grad_norm: 0.6905198045084602, iteration: 407121
loss: 0.9518789052963257,grad_norm: 0.8060527272753013, iteration: 407122
loss: 1.0745424032211304,grad_norm: 0.962068690617436, iteration: 407123
loss: 0.9992651343345642,grad_norm: 0.8248218789744661, iteration: 407124
loss: 0.9946499466896057,grad_norm: 0.7721907722202107, iteration: 407125
loss: 1.0018306970596313,grad_norm: 0.8360345309790245, iteration: 407126
loss: 1.0109745264053345,grad_norm: 0.7364840581541255, iteration: 407127
loss: 0.9595574736595154,grad_norm: 0.9999991718592488, iteration: 407128
loss: 1.020405650138855,grad_norm: 0.8786821867446712, iteration: 407129
loss: 1.0208953619003296,grad_norm: 0.7820572169532953, iteration: 407130
loss: 1.0247224569320679,grad_norm: 0.893528026826219, iteration: 407131
loss: 1.0283256769180298,grad_norm: 0.9999994158192025, iteration: 407132
loss: 0.9618441462516785,grad_norm: 0.7419895626299263, iteration: 407133
loss: 1.0120301246643066,grad_norm: 0.7483632695546518, iteration: 407134
loss: 0.9816496968269348,grad_norm: 0.8206912316181499, iteration: 407135
loss: 1.0273196697235107,grad_norm: 0.9999995056145008, iteration: 407136
loss: 0.9638511538505554,grad_norm: 0.8934101611609303, iteration: 407137
loss: 0.9905380606651306,grad_norm: 1.0000000066590748, iteration: 407138
loss: 0.9608733057975769,grad_norm: 0.9809554487832762, iteration: 407139
loss: 1.0664068460464478,grad_norm: 0.8852534077343506, iteration: 407140
loss: 0.9771907329559326,grad_norm: 0.7499830717464332, iteration: 407141
loss: 1.0166382789611816,grad_norm: 0.9999998306978021, iteration: 407142
loss: 1.0129461288452148,grad_norm: 0.7861218225548946, iteration: 407143
loss: 1.0019596815109253,grad_norm: 0.9999993900343574, iteration: 407144
loss: 1.0222270488739014,grad_norm: 0.9670018568772869, iteration: 407145
loss: 0.9901134371757507,grad_norm: 0.9605366923812012, iteration: 407146
loss: 0.963214635848999,grad_norm: 0.6847205038757215, iteration: 407147
loss: 1.003818154335022,grad_norm: 0.6950024726144195, iteration: 407148
loss: 0.9917923212051392,grad_norm: 0.9999991135420365, iteration: 407149
loss: 1.0199886560440063,grad_norm: 0.782572254187562, iteration: 407150
loss: 1.0200382471084595,grad_norm: 0.9999990922047108, iteration: 407151
loss: 1.0519484281539917,grad_norm: 0.999999170944688, iteration: 407152
loss: 1.0822099447250366,grad_norm: 0.9725300642755005, iteration: 407153
loss: 0.964758574962616,grad_norm: 0.7671520278508959, iteration: 407154
loss: 0.9578593969345093,grad_norm: 0.6313303600374863, iteration: 407155
loss: 1.0420849323272705,grad_norm: 0.7257067366932943, iteration: 407156
loss: 0.9883741140365601,grad_norm: 0.8079182029810469, iteration: 407157
loss: 1.0278669595718384,grad_norm: 0.9999995920923787, iteration: 407158
loss: 0.978008508682251,grad_norm: 0.7519770563693, iteration: 407159
loss: 0.9877789616584778,grad_norm: 0.721692883475868, iteration: 407160
loss: 1.0245811939239502,grad_norm: 0.7592473072036081, iteration: 407161
loss: 1.0099544525146484,grad_norm: 0.7387645328037933, iteration: 407162
loss: 1.002353310585022,grad_norm: 0.9626408280547489, iteration: 407163
loss: 0.9965202808380127,grad_norm: 0.8039416582752347, iteration: 407164
loss: 1.0491958856582642,grad_norm: 0.8535433800652451, iteration: 407165
loss: 1.0189838409423828,grad_norm: 0.8960539817801796, iteration: 407166
loss: 1.0553913116455078,grad_norm: 0.9999991196162727, iteration: 407167
loss: 0.9814957976341248,grad_norm: 0.7528911056154113, iteration: 407168
loss: 1.0187610387802124,grad_norm: 0.7079131705918618, iteration: 407169
loss: 1.0147007703781128,grad_norm: 0.9999999125363476, iteration: 407170
loss: 1.0279463529586792,grad_norm: 0.7557980525197782, iteration: 407171
loss: 1.006507158279419,grad_norm: 0.7318999609011915, iteration: 407172
loss: 1.005971074104309,grad_norm: 0.7065699378921058, iteration: 407173
loss: 0.9837242364883423,grad_norm: 0.6826425413887385, iteration: 407174
loss: 0.9758667945861816,grad_norm: 0.8436085770818573, iteration: 407175
loss: 0.9718593955039978,grad_norm: 0.895650424871113, iteration: 407176
loss: 0.9893120527267456,grad_norm: 0.9306653738192857, iteration: 407177
loss: 1.034191608428955,grad_norm: 0.9999991815386489, iteration: 407178
loss: 1.035469651222229,grad_norm: 0.83102169684454, iteration: 407179
loss: 1.0309487581253052,grad_norm: 0.9645723755066626, iteration: 407180
loss: 1.0222498178482056,grad_norm: 0.8645731112764695, iteration: 407181
loss: 1.0075790882110596,grad_norm: 0.8629288434607274, iteration: 407182
loss: 0.9722443222999573,grad_norm: 0.7614817444824951, iteration: 407183
loss: 0.9912368655204773,grad_norm: 0.936056157037293, iteration: 407184
loss: 0.9537410736083984,grad_norm: 0.880935789380821, iteration: 407185
loss: 1.0225801467895508,grad_norm: 0.9999990631723532, iteration: 407186
loss: 1.1803443431854248,grad_norm: 0.8944778730167599, iteration: 407187
loss: 1.0139150619506836,grad_norm: 0.6958512239558644, iteration: 407188
loss: 1.0061688423156738,grad_norm: 0.7401707300825601, iteration: 407189
loss: 1.0083413124084473,grad_norm: 0.8086934155239371, iteration: 407190
loss: 1.0051162242889404,grad_norm: 0.79418454533518, iteration: 407191
loss: 0.9918267130851746,grad_norm: 0.8477894243518986, iteration: 407192
loss: 0.9856864213943481,grad_norm: 0.8410629214645625, iteration: 407193
loss: 0.981485903263092,grad_norm: 0.999999004292014, iteration: 407194
loss: 1.0135077238082886,grad_norm: 0.6613084502787868, iteration: 407195
loss: 0.9758907556533813,grad_norm: 0.7636089683981362, iteration: 407196
loss: 0.9976447224617004,grad_norm: 0.7652235430178177, iteration: 407197
loss: 0.9956954717636108,grad_norm: 0.673380328588316, iteration: 407198
loss: 1.017094373703003,grad_norm: 0.7734500460866561, iteration: 407199
loss: 1.0079066753387451,grad_norm: 0.784319562447878, iteration: 407200
loss: 1.0230263471603394,grad_norm: 0.8621247814183454, iteration: 407201
loss: 0.9973188638687134,grad_norm: 0.7520824447033205, iteration: 407202
loss: 1.0390315055847168,grad_norm: 0.9770172737422712, iteration: 407203
loss: 1.0475339889526367,grad_norm: 0.9999997894110039, iteration: 407204
loss: 1.0600528717041016,grad_norm: 0.6771756866637194, iteration: 407205
loss: 1.013525366783142,grad_norm: 0.8155510990996948, iteration: 407206
loss: 0.9674777984619141,grad_norm: 0.715507058135581, iteration: 407207
loss: 1.0581728219985962,grad_norm: 0.9999994449649919, iteration: 407208
loss: 1.0044702291488647,grad_norm: 0.825780780054545, iteration: 407209
loss: 0.9655774831771851,grad_norm: 0.7831835519113493, iteration: 407210
loss: 1.0292936563491821,grad_norm: 0.7418473612443416, iteration: 407211
loss: 0.9579802751541138,grad_norm: 0.9168826389920024, iteration: 407212
loss: 1.0176491737365723,grad_norm: 0.6475538215128823, iteration: 407213
loss: 1.0328335762023926,grad_norm: 0.7887768685114382, iteration: 407214
loss: 1.1388602256774902,grad_norm: 0.9999996530212858, iteration: 407215
loss: 1.0535247325897217,grad_norm: 0.9999998881993514, iteration: 407216
loss: 1.0196397304534912,grad_norm: 0.9999992287298983, iteration: 407217
loss: 1.2797820568084717,grad_norm: 0.9999998145425625, iteration: 407218
loss: 1.0084878206253052,grad_norm: 0.8737791864211298, iteration: 407219
loss: 1.0094259977340698,grad_norm: 0.9354729412066968, iteration: 407220
loss: 1.0664373636245728,grad_norm: 0.9999999714677485, iteration: 407221
loss: 0.9937923550605774,grad_norm: 0.7355189667352534, iteration: 407222
loss: 0.9707317352294922,grad_norm: 0.8645128804293302, iteration: 407223
loss: 1.0088229179382324,grad_norm: 0.9451971559212137, iteration: 407224
loss: 1.0134702920913696,grad_norm: 0.7217967168168843, iteration: 407225
loss: 0.9862716794013977,grad_norm: 0.8690043898437335, iteration: 407226
loss: 0.9944618344306946,grad_norm: 0.7866549936912417, iteration: 407227
loss: 1.0015071630477905,grad_norm: 0.7259515476159581, iteration: 407228
loss: 1.0331151485443115,grad_norm: 0.7245310350524728, iteration: 407229
loss: 1.0137205123901367,grad_norm: 0.8107494179505171, iteration: 407230
loss: 0.9640375971794128,grad_norm: 0.8158684866682483, iteration: 407231
loss: 0.9468851089477539,grad_norm: 0.9999991682194042, iteration: 407232
loss: 1.0945340394973755,grad_norm: 0.7528744851050058, iteration: 407233
loss: 0.9816757440567017,grad_norm: 0.9026375612748917, iteration: 407234
loss: 0.9746431708335876,grad_norm: 0.7967968384878422, iteration: 407235
loss: 0.9687577486038208,grad_norm: 0.8379776042899579, iteration: 407236
loss: 0.9934964179992676,grad_norm: 0.7438830266096549, iteration: 407237
loss: 1.0185761451721191,grad_norm: 0.8889438913628088, iteration: 407238
loss: 0.9861141443252563,grad_norm: 0.7505153600171378, iteration: 407239
loss: 0.9964970350265503,grad_norm: 0.7921276097380452, iteration: 407240
loss: 1.0053589344024658,grad_norm: 0.689183516857813, iteration: 407241
loss: 1.1023660898208618,grad_norm: 0.8674821374571711, iteration: 407242
loss: 1.0565836429595947,grad_norm: 0.999999878547391, iteration: 407243
loss: 0.983421266078949,grad_norm: 0.7618331220309129, iteration: 407244
loss: 0.9870837926864624,grad_norm: 0.8218076156280631, iteration: 407245
loss: 1.0047630071640015,grad_norm: 0.6684563198128537, iteration: 407246
loss: 0.965036153793335,grad_norm: 0.8676713549851902, iteration: 407247
loss: 1.0104663372039795,grad_norm: 0.9224965939106791, iteration: 407248
loss: 0.9979254603385925,grad_norm: 0.7784752325970179, iteration: 407249
loss: 1.0182603597640991,grad_norm: 0.9197928185187756, iteration: 407250
loss: 1.0534864664077759,grad_norm: 0.7876928828407664, iteration: 407251
loss: 0.9865866303443909,grad_norm: 0.7991144658203467, iteration: 407252
loss: 0.9954859018325806,grad_norm: 0.782937349598377, iteration: 407253
loss: 1.0466116666793823,grad_norm: 0.7876720366283495, iteration: 407254
loss: 0.9884234070777893,grad_norm: 0.8079842368236971, iteration: 407255
loss: 1.0039448738098145,grad_norm: 0.8825597176767181, iteration: 407256
loss: 0.9974499940872192,grad_norm: 0.7066814362524931, iteration: 407257
loss: 1.048215627670288,grad_norm: 0.9999993374176265, iteration: 407258
loss: 0.9724122881889343,grad_norm: 0.8352220698352948, iteration: 407259
loss: 1.0404363870620728,grad_norm: 0.6516539649934876, iteration: 407260
loss: 1.0133079290390015,grad_norm: 0.9999994916854889, iteration: 407261
loss: 0.9612647294998169,grad_norm: 0.8726307179387004, iteration: 407262
loss: 0.9825834631919861,grad_norm: 0.7246879781039692, iteration: 407263
loss: 1.0371031761169434,grad_norm: 0.6907385921698531, iteration: 407264
loss: 1.0286592245101929,grad_norm: 0.8671149973433238, iteration: 407265
loss: 1.0150322914123535,grad_norm: 0.9999992551819373, iteration: 407266
loss: 0.9769556522369385,grad_norm: 0.6390977022309269, iteration: 407267
loss: 1.0554440021514893,grad_norm: 0.9703043225002532, iteration: 407268
loss: 1.0697320699691772,grad_norm: 0.999999302666422, iteration: 407269
loss: 0.9764199256896973,grad_norm: 0.8465538072822296, iteration: 407270
loss: 1.0029710531234741,grad_norm: 0.999999687904884, iteration: 407271
loss: 1.0017255544662476,grad_norm: 0.7316399102649437, iteration: 407272
loss: 1.0263385772705078,grad_norm: 0.999999103433338, iteration: 407273
loss: 0.9845448136329651,grad_norm: 0.706968153523167, iteration: 407274
loss: 0.9562942385673523,grad_norm: 0.7934168034158348, iteration: 407275
loss: 0.9639415740966797,grad_norm: 0.834907132458334, iteration: 407276
loss: 0.9504905343055725,grad_norm: 0.6525296141869947, iteration: 407277
loss: 0.995023250579834,grad_norm: 0.7281044443261535, iteration: 407278
loss: 1.019804835319519,grad_norm: 0.8023494792046286, iteration: 407279
loss: 0.9819557666778564,grad_norm: 0.9144780555717621, iteration: 407280
loss: 1.0465151071548462,grad_norm: 0.9999995872678993, iteration: 407281
loss: 0.9993477463722229,grad_norm: 0.782589221806036, iteration: 407282
loss: 1.0044875144958496,grad_norm: 0.6997020693245871, iteration: 407283
loss: 0.9793341755867004,grad_norm: 0.8436261669365951, iteration: 407284
loss: 0.9979310631752014,grad_norm: 0.8147839778848052, iteration: 407285
loss: 1.0349730253219604,grad_norm: 0.8092897807899797, iteration: 407286
loss: 1.0172003507614136,grad_norm: 0.7829027051690488, iteration: 407287
loss: 1.0204120874404907,grad_norm: 0.7286873862443137, iteration: 407288
loss: 0.9916338324546814,grad_norm: 0.7080414405005016, iteration: 407289
loss: 0.9613238573074341,grad_norm: 0.7995917272401989, iteration: 407290
loss: 1.0110664367675781,grad_norm: 0.7828585608471, iteration: 407291
loss: 1.0118309259414673,grad_norm: 0.9795735908727812, iteration: 407292
loss: 1.0552841424942017,grad_norm: 0.7841373961902118, iteration: 407293
loss: 1.0980949401855469,grad_norm: 0.999999769087928, iteration: 407294
loss: 1.0411943197250366,grad_norm: 0.8885732426100557, iteration: 407295
loss: 1.0192914009094238,grad_norm: 0.9633699883882613, iteration: 407296
loss: 1.0788780450820923,grad_norm: 0.9999994507291541, iteration: 407297
loss: 0.9510589838027954,grad_norm: 0.7677591447106944, iteration: 407298
loss: 1.0126476287841797,grad_norm: 0.6870238197764498, iteration: 407299
loss: 1.004279613494873,grad_norm: 0.8576984390861528, iteration: 407300
loss: 0.9869911670684814,grad_norm: 0.8786787309831954, iteration: 407301
loss: 1.000989556312561,grad_norm: 0.9999998834753349, iteration: 407302
loss: 0.9951962232589722,grad_norm: 0.809882124357547, iteration: 407303
loss: 0.9502153396606445,grad_norm: 0.9433547844675366, iteration: 407304
loss: 0.9786902666091919,grad_norm: 0.7899275873258277, iteration: 407305
loss: 1.056310772895813,grad_norm: 0.9999995116081601, iteration: 407306
loss: 0.9954169392585754,grad_norm: 0.7188512108834368, iteration: 407307
loss: 1.0127497911453247,grad_norm: 0.7306875887284991, iteration: 407308
loss: 1.0467208623886108,grad_norm: 0.9999995811697655, iteration: 407309
loss: 1.087239146232605,grad_norm: 0.9999991297409321, iteration: 407310
loss: 0.9845518469810486,grad_norm: 0.9999992048146576, iteration: 407311
loss: 0.9740043878555298,grad_norm: 0.7086427638785041, iteration: 407312
loss: 1.003228783607483,grad_norm: 0.9005535642797661, iteration: 407313
loss: 1.0105938911437988,grad_norm: 0.770166100000173, iteration: 407314
loss: 0.9908047914505005,grad_norm: 0.6993057248878278, iteration: 407315
loss: 1.0095398426055908,grad_norm: 0.721688405293658, iteration: 407316
loss: 1.1324636936187744,grad_norm: 0.9999995248219594, iteration: 407317
loss: 1.0012110471725464,grad_norm: 0.6784884037029897, iteration: 407318
loss: 0.9673622846603394,grad_norm: 0.9023416375109222, iteration: 407319
loss: 1.0327261686325073,grad_norm: 0.8118302432785831, iteration: 407320
loss: 1.0074267387390137,grad_norm: 0.6478410409188702, iteration: 407321
loss: 0.9956583380699158,grad_norm: 0.9174045648755147, iteration: 407322
loss: 1.0241892337799072,grad_norm: 0.7376752063763725, iteration: 407323
loss: 0.9947258830070496,grad_norm: 0.719524861988754, iteration: 407324
loss: 0.9657968878746033,grad_norm: 0.6469903904105218, iteration: 407325
loss: 1.0716028213500977,grad_norm: 0.9999996168778286, iteration: 407326
loss: 1.0424823760986328,grad_norm: 0.9999997737816045, iteration: 407327
loss: 1.050534963607788,grad_norm: 0.9999993898435199, iteration: 407328
loss: 1.1289516687393188,grad_norm: 0.9999993092755247, iteration: 407329
loss: 1.0041264295578003,grad_norm: 0.8237361441088368, iteration: 407330
loss: 1.031011939048767,grad_norm: 0.8081522494747221, iteration: 407331
loss: 1.003548264503479,grad_norm: 0.8765577468795015, iteration: 407332
loss: 0.9826065897941589,grad_norm: 0.9999996312067414, iteration: 407333
loss: 1.0591284036636353,grad_norm: 0.9999997684266818, iteration: 407334
loss: 1.024046540260315,grad_norm: 0.728805735622329, iteration: 407335
loss: 1.0398184061050415,grad_norm: 0.8035110254634281, iteration: 407336
loss: 1.0169357061386108,grad_norm: 0.814347965135059, iteration: 407337
loss: 0.9650566577911377,grad_norm: 0.6607901907159581, iteration: 407338
loss: 0.9672898054122925,grad_norm: 0.969339582589503, iteration: 407339
loss: 1.01853609085083,grad_norm: 0.7309398832819465, iteration: 407340
loss: 1.0065990686416626,grad_norm: 0.8277650305541489, iteration: 407341
loss: 1.0088151693344116,grad_norm: 0.7915674839235571, iteration: 407342
loss: 1.0099809169769287,grad_norm: 0.8927370962066182, iteration: 407343
loss: 1.0689008235931396,grad_norm: 0.999999065682027, iteration: 407344
loss: 0.9832881093025208,grad_norm: 0.8867085101806025, iteration: 407345
loss: 0.9960257411003113,grad_norm: 0.9607205063099729, iteration: 407346
loss: 1.0631256103515625,grad_norm: 0.9999992076199052, iteration: 407347
loss: 0.9841740131378174,grad_norm: 0.9643705341629741, iteration: 407348
loss: 1.0059901475906372,grad_norm: 0.7956244417107062, iteration: 407349
loss: 1.0032824277877808,grad_norm: 0.9999997439102803, iteration: 407350
loss: 0.9888715147972107,grad_norm: 0.7521286293886428, iteration: 407351
loss: 0.992959201335907,grad_norm: 0.8338394668684215, iteration: 407352
loss: 1.0153151750564575,grad_norm: 0.7051078017382412, iteration: 407353
loss: 0.9656787514686584,grad_norm: 0.8718485067206646, iteration: 407354
loss: 1.020477294921875,grad_norm: 0.7990093233646439, iteration: 407355
loss: 0.9748620390892029,grad_norm: 0.796639193767385, iteration: 407356
loss: 1.0039308071136475,grad_norm: 0.8512275261355329, iteration: 407357
loss: 0.9998593330383301,grad_norm: 0.720270625769901, iteration: 407358
loss: 1.0420703887939453,grad_norm: 0.999999542335089, iteration: 407359
loss: 1.0385318994522095,grad_norm: 0.6992033080446795, iteration: 407360
loss: 1.0199224948883057,grad_norm: 0.958102663442082, iteration: 407361
loss: 1.0338767766952515,grad_norm: 0.7581187538056258, iteration: 407362
loss: 1.0235826969146729,grad_norm: 0.6964553303616963, iteration: 407363
loss: 0.9778348803520203,grad_norm: 0.7313094317729002, iteration: 407364
loss: 0.9917044639587402,grad_norm: 0.8177647219792983, iteration: 407365
loss: 0.9803472757339478,grad_norm: 0.7107394529994662, iteration: 407366
loss: 1.0019104480743408,grad_norm: 0.6883344609020745, iteration: 407367
loss: 0.977959156036377,grad_norm: 0.7952515335925261, iteration: 407368
loss: 0.9586653113365173,grad_norm: 0.9999991339082026, iteration: 407369
loss: 0.9963291883468628,grad_norm: 0.8663778766783128, iteration: 407370
loss: 1.0385408401489258,grad_norm: 0.9999999283328677, iteration: 407371
loss: 1.0835992097854614,grad_norm: 0.9433323866503421, iteration: 407372
loss: 1.0130692720413208,grad_norm: 0.9999992173090319, iteration: 407373
loss: 0.9807178974151611,grad_norm: 0.7442806981164538, iteration: 407374
loss: 1.0215840339660645,grad_norm: 0.7738946253594903, iteration: 407375
loss: 0.9981768727302551,grad_norm: 0.9244817676818489, iteration: 407376
loss: 0.9912054538726807,grad_norm: 0.999999386141772, iteration: 407377
loss: 1.0715950727462769,grad_norm: 0.9999996334169866, iteration: 407378
loss: 0.9966627359390259,grad_norm: 0.851516012099697, iteration: 407379
loss: 1.0057252645492554,grad_norm: 0.6774056378643419, iteration: 407380
loss: 0.9757407903671265,grad_norm: 0.6937278649212066, iteration: 407381
loss: 1.0050534009933472,grad_norm: 0.7148550771602863, iteration: 407382
loss: 1.0141929388046265,grad_norm: 0.7169982407802491, iteration: 407383
loss: 1.0187840461730957,grad_norm: 0.8388133119499325, iteration: 407384
loss: 1.0005303621292114,grad_norm: 0.6982509644104057, iteration: 407385
loss: 1.0288041830062866,grad_norm: 0.7014101534647237, iteration: 407386
loss: 1.0150498151779175,grad_norm: 0.7225945787634089, iteration: 407387
loss: 0.997576117515564,grad_norm: 0.6669919508681122, iteration: 407388
loss: 1.0005311965942383,grad_norm: 0.8451142465048282, iteration: 407389
loss: 1.0390712022781372,grad_norm: 0.9999989328385109, iteration: 407390
loss: 1.0049948692321777,grad_norm: 0.8613502573122106, iteration: 407391
loss: 0.9868351817131042,grad_norm: 0.7554357875310099, iteration: 407392
loss: 1.057291865348816,grad_norm: 0.9999993569176993, iteration: 407393
loss: 0.9816755652427673,grad_norm: 0.7810471185504307, iteration: 407394
loss: 1.000600814819336,grad_norm: 0.9064559043204737, iteration: 407395
loss: 1.039442539215088,grad_norm: 0.800461289023017, iteration: 407396
loss: 0.9850464463233948,grad_norm: 0.7750621671453104, iteration: 407397
loss: 0.9851610064506531,grad_norm: 0.6439169699040124, iteration: 407398
loss: 1.0159186124801636,grad_norm: 0.8453761162004371, iteration: 407399
loss: 1.0194159746170044,grad_norm: 0.7940060225891141, iteration: 407400
loss: 1.1900945901870728,grad_norm: 0.9429942928381512, iteration: 407401
loss: 0.9968434572219849,grad_norm: 0.8900614344919767, iteration: 407402
loss: 1.022266149520874,grad_norm: 0.812518922640274, iteration: 407403
loss: 1.0083954334259033,grad_norm: 0.7110626417516729, iteration: 407404
loss: 0.9684589505195618,grad_norm: 0.9110331493971169, iteration: 407405
loss: 0.997478723526001,grad_norm: 0.7384102074184045, iteration: 407406
loss: 0.9880523085594177,grad_norm: 0.9205073867841143, iteration: 407407
loss: 1.0050861835479736,grad_norm: 0.6765529350830433, iteration: 407408
loss: 1.039265513420105,grad_norm: 0.9999991034857099, iteration: 407409
loss: 0.9944315552711487,grad_norm: 0.7798336456075726, iteration: 407410
loss: 1.0087240934371948,grad_norm: 0.8245765487771363, iteration: 407411
loss: 0.9952102303504944,grad_norm: 0.9999990109732806, iteration: 407412
loss: 0.9937973618507385,grad_norm: 0.7114556188620167, iteration: 407413
loss: 0.946405291557312,grad_norm: 0.8884719730601207, iteration: 407414
loss: 0.9905964136123657,grad_norm: 0.9628714594476758, iteration: 407415
loss: 1.0155024528503418,grad_norm: 0.7868849800948514, iteration: 407416
loss: 1.0667585134506226,grad_norm: 0.9999998770456873, iteration: 407417
loss: 1.0410826206207275,grad_norm: 0.99999924998098, iteration: 407418
loss: 1.0180680751800537,grad_norm: 0.7596735405312783, iteration: 407419
loss: 1.0041130781173706,grad_norm: 0.9378309294252397, iteration: 407420
loss: 1.0104870796203613,grad_norm: 0.8247208374378358, iteration: 407421
loss: 1.0089465379714966,grad_norm: 0.6886982295095282, iteration: 407422
loss: 1.0014979839324951,grad_norm: 0.956339983021571, iteration: 407423
loss: 1.0127702951431274,grad_norm: 0.8528362986893999, iteration: 407424
loss: 0.9923548102378845,grad_norm: 0.7773891293178066, iteration: 407425
loss: 0.9972743391990662,grad_norm: 0.9963293277885951, iteration: 407426
loss: 1.0777345895767212,grad_norm: 0.9999995204913984, iteration: 407427
loss: 1.0137733221054077,grad_norm: 0.7972924775693678, iteration: 407428
loss: 0.9817831516265869,grad_norm: 0.8185561654107613, iteration: 407429
loss: 1.048482894897461,grad_norm: 0.9092566250957396, iteration: 407430
loss: 1.0056346654891968,grad_norm: 0.7223115031872532, iteration: 407431
loss: 0.9939286708831787,grad_norm: 0.9571983677697016, iteration: 407432
loss: 1.015635371208191,grad_norm: 0.7290303635423416, iteration: 407433
loss: 0.9942976832389832,grad_norm: 0.8272007156725811, iteration: 407434
loss: 1.0101993083953857,grad_norm: 0.8993259830831849, iteration: 407435
loss: 0.9964050054550171,grad_norm: 0.7483570338383597, iteration: 407436
loss: 1.0033965110778809,grad_norm: 0.9445626715565801, iteration: 407437
loss: 0.996795654296875,grad_norm: 0.7339349022679335, iteration: 407438
loss: 0.9977375864982605,grad_norm: 0.7743823490602556, iteration: 407439
loss: 0.9715613722801208,grad_norm: 0.8070302738271234, iteration: 407440
loss: 0.981635332107544,grad_norm: 0.8847352555256229, iteration: 407441
loss: 0.978277325630188,grad_norm: 0.6800156318756635, iteration: 407442
loss: 1.0319358110427856,grad_norm: 0.9999991391866518, iteration: 407443
loss: 1.0922356843948364,grad_norm: 0.9271980453886448, iteration: 407444
loss: 1.0169636011123657,grad_norm: 0.7738809151676097, iteration: 407445
loss: 0.9706102609634399,grad_norm: 0.8511125772522817, iteration: 407446
loss: 1.012473464012146,grad_norm: 0.8088828549239435, iteration: 407447
loss: 1.0060133934020996,grad_norm: 0.570380739135949, iteration: 407448
loss: 0.9384925365447998,grad_norm: 0.7347610411255905, iteration: 407449
loss: 0.96036696434021,grad_norm: 0.7055270089835686, iteration: 407450
loss: 0.9873223900794983,grad_norm: 0.6590644890534688, iteration: 407451
loss: 1.0100575685501099,grad_norm: 0.8076625916146858, iteration: 407452
loss: 1.044479250907898,grad_norm: 0.7775511487832557, iteration: 407453
loss: 1.008870244026184,grad_norm: 0.9070983621255441, iteration: 407454
loss: 0.9651253819465637,grad_norm: 0.8560553687580782, iteration: 407455
loss: 0.9961931705474854,grad_norm: 0.7564144009224915, iteration: 407456
loss: 1.0028518438339233,grad_norm: 0.8547310807019901, iteration: 407457
loss: 1.0137449502944946,grad_norm: 0.8758679055232026, iteration: 407458
loss: 1.0108168125152588,grad_norm: 0.7720722982087826, iteration: 407459
loss: 0.9759979248046875,grad_norm: 0.797159526439773, iteration: 407460
loss: 0.990504264831543,grad_norm: 0.999999193833967, iteration: 407461
loss: 0.9955374598503113,grad_norm: 0.7007377661979706, iteration: 407462
loss: 1.0121417045593262,grad_norm: 0.7847174959568177, iteration: 407463
loss: 0.9763364195823669,grad_norm: 0.6654669263407877, iteration: 407464
loss: 0.9862045645713806,grad_norm: 0.7001800842293651, iteration: 407465
loss: 0.9959413409233093,grad_norm: 0.9147117242398276, iteration: 407466
loss: 0.9936872124671936,grad_norm: 0.8484167299735131, iteration: 407467
loss: 0.9963544011116028,grad_norm: 0.8096471978686373, iteration: 407468
loss: 0.9721155166625977,grad_norm: 0.6888990996409773, iteration: 407469
loss: 1.0060482025146484,grad_norm: 0.8073792874087239, iteration: 407470
loss: 1.0418450832366943,grad_norm: 0.9999992280083536, iteration: 407471
loss: 1.1162458658218384,grad_norm: 0.9441300196352439, iteration: 407472
loss: 1.023187279701233,grad_norm: 0.8667214610946395, iteration: 407473
loss: 0.9978097677230835,grad_norm: 0.817233411013896, iteration: 407474
loss: 1.0598869323730469,grad_norm: 0.7758195048753298, iteration: 407475
loss: 1.0179252624511719,grad_norm: 0.6817510363425159, iteration: 407476
loss: 1.0103206634521484,grad_norm: 0.8906844780898293, iteration: 407477
loss: 1.024495244026184,grad_norm: 0.8778370567501163, iteration: 407478
loss: 1.0173912048339844,grad_norm: 0.9999991335977362, iteration: 407479
loss: 1.0438709259033203,grad_norm: 0.7774899208429982, iteration: 407480
loss: 0.9816172122955322,grad_norm: 0.8096498068140191, iteration: 407481
loss: 1.0342497825622559,grad_norm: 0.851641132191017, iteration: 407482
loss: 1.0137320756912231,grad_norm: 0.867416522595064, iteration: 407483
loss: 1.0059181451797485,grad_norm: 0.9362934547005133, iteration: 407484
loss: 1.0255084037780762,grad_norm: 0.8815417346545776, iteration: 407485
loss: 1.0105375051498413,grad_norm: 0.8741937560735927, iteration: 407486
loss: 1.0193957090377808,grad_norm: 0.7471553132329174, iteration: 407487
loss: 0.9969648718833923,grad_norm: 0.8022070133651961, iteration: 407488
loss: 1.026475191116333,grad_norm: 0.916325508075199, iteration: 407489
loss: 1.0218441486358643,grad_norm: 0.7828367639323093, iteration: 407490
loss: 1.0873700380325317,grad_norm: 0.8912548556041417, iteration: 407491
loss: 0.9793091416358948,grad_norm: 0.7070489215431817, iteration: 407492
loss: 1.0971150398254395,grad_norm: 0.9999991018312173, iteration: 407493
loss: 0.9668893814086914,grad_norm: 0.714454349764262, iteration: 407494
loss: 0.9908600449562073,grad_norm: 0.6842490522389605, iteration: 407495
loss: 0.978121817111969,grad_norm: 0.7476736171655908, iteration: 407496
loss: 0.9848436713218689,grad_norm: 0.6770411073089173, iteration: 407497
loss: 0.9798197746276855,grad_norm: 0.8540428545862822, iteration: 407498
loss: 0.9954198002815247,grad_norm: 0.9999990285291229, iteration: 407499
loss: 0.9977930784225464,grad_norm: 0.694671309931347, iteration: 407500
loss: 1.0301260948181152,grad_norm: 0.8560773019511763, iteration: 407501
loss: 1.0176762342453003,grad_norm: 0.8105890049907051, iteration: 407502
loss: 1.0539026260375977,grad_norm: 0.8220544128638182, iteration: 407503
loss: 0.9739913940429688,grad_norm: 0.8341841513013835, iteration: 407504
loss: 0.9754997491836548,grad_norm: 0.7679900506816989, iteration: 407505
loss: 0.9580039381980896,grad_norm: 0.9444770972260342, iteration: 407506
loss: 0.9876841306686401,grad_norm: 0.8373875996301741, iteration: 407507
loss: 0.9801009297370911,grad_norm: 0.999999526752163, iteration: 407508
loss: 1.0265111923217773,grad_norm: 0.9999996624553839, iteration: 407509
loss: 1.0209136009216309,grad_norm: 0.8355093864174432, iteration: 407510
loss: 1.0247515439987183,grad_norm: 0.8011716530866647, iteration: 407511
loss: 1.0089507102966309,grad_norm: 0.9265382791730113, iteration: 407512
loss: 1.0319660902023315,grad_norm: 0.840664977838661, iteration: 407513
loss: 1.0734227895736694,grad_norm: 0.8546485121297506, iteration: 407514
loss: 1.0665196180343628,grad_norm: 0.999999111893547, iteration: 407515
loss: 0.9561132192611694,grad_norm: 0.8683012802861543, iteration: 407516
loss: 1.014120101928711,grad_norm: 0.7943314469915935, iteration: 407517
loss: 1.0054712295532227,grad_norm: 0.814423993301306, iteration: 407518
loss: 0.9948539137840271,grad_norm: 0.8181978896848265, iteration: 407519
loss: 0.9870997667312622,grad_norm: 0.7113759408548816, iteration: 407520
loss: 1.0029257535934448,grad_norm: 0.7170787258798528, iteration: 407521
loss: 0.9847912192344666,grad_norm: 0.7375444749302088, iteration: 407522
loss: 0.9889757633209229,grad_norm: 0.9999989993499913, iteration: 407523
loss: 0.9855720400810242,grad_norm: 0.9758972292178199, iteration: 407524
loss: 0.9897688627243042,grad_norm: 0.8636467541112399, iteration: 407525
loss: 0.9835769534111023,grad_norm: 0.6968296732319802, iteration: 407526
loss: 1.0496848821640015,grad_norm: 0.9999997136542956, iteration: 407527
loss: 0.9890304803848267,grad_norm: 0.8493470214934332, iteration: 407528
loss: 1.0193852186203003,grad_norm: 0.8422435037470158, iteration: 407529
loss: 1.0247602462768555,grad_norm: 0.999999836647055, iteration: 407530
loss: 1.021869421005249,grad_norm: 0.9119792930901626, iteration: 407531
loss: 0.9914210438728333,grad_norm: 0.8993216337427881, iteration: 407532
loss: 1.0370639562606812,grad_norm: 0.7892937122031195, iteration: 407533
loss: 0.9978092908859253,grad_norm: 0.7303761134300342, iteration: 407534
loss: 0.9937413334846497,grad_norm: 0.999999449745182, iteration: 407535
loss: 1.024322748184204,grad_norm: 0.9841218000397758, iteration: 407536
loss: 0.9894046783447266,grad_norm: 0.8102762864791581, iteration: 407537
loss: 1.004373550415039,grad_norm: 0.9706990941598185, iteration: 407538
loss: 1.025456428527832,grad_norm: 0.7904978182732919, iteration: 407539
loss: 0.9828641414642334,grad_norm: 0.7931400748076594, iteration: 407540
loss: 0.9876915812492371,grad_norm: 0.9222312779320568, iteration: 407541
loss: 1.0006771087646484,grad_norm: 0.8724471331133639, iteration: 407542
loss: 0.962843656539917,grad_norm: 0.8647369493283877, iteration: 407543
loss: 0.9628771543502808,grad_norm: 0.8612088799136633, iteration: 407544
loss: 0.9838329553604126,grad_norm: 0.6673535156151, iteration: 407545
loss: 1.0597015619277954,grad_norm: 0.8812762001560722, iteration: 407546
loss: 0.9885159134864807,grad_norm: 0.8100040226031512, iteration: 407547
loss: 1.0304750204086304,grad_norm: 0.8528504148018111, iteration: 407548
loss: 0.9910891652107239,grad_norm: 0.7631201357855614, iteration: 407549
loss: 0.9688307046890259,grad_norm: 0.7796863685243015, iteration: 407550
loss: 0.991492748260498,grad_norm: 0.9494327535060625, iteration: 407551
loss: 1.0010066032409668,grad_norm: 0.8853860828648314, iteration: 407552
loss: 1.0008478164672852,grad_norm: 0.6881558192429783, iteration: 407553
loss: 0.9906036853790283,grad_norm: 0.9317350954930047, iteration: 407554
loss: 1.1366699934005737,grad_norm: 0.9999992950647214, iteration: 407555
loss: 0.988116443157196,grad_norm: 0.6850243273342409, iteration: 407556
loss: 0.992861270904541,grad_norm: 0.9999990447551191, iteration: 407557
loss: 1.0100833177566528,grad_norm: 0.734854079378228, iteration: 407558
loss: 1.0116933584213257,grad_norm: 0.7044111416546726, iteration: 407559
loss: 0.9723625183105469,grad_norm: 0.7862530236452925, iteration: 407560
loss: 1.0134978294372559,grad_norm: 0.8678902305132522, iteration: 407561
loss: 1.111369252204895,grad_norm: 0.901840211570314, iteration: 407562
loss: 1.0050324201583862,grad_norm: 0.7311879101625618, iteration: 407563
loss: 0.9543512463569641,grad_norm: 0.8340577274911262, iteration: 407564
loss: 0.9757145643234253,grad_norm: 0.7396059732831872, iteration: 407565
loss: 0.9790407419204712,grad_norm: 0.7251635523190955, iteration: 407566
loss: 0.9764717817306519,grad_norm: 0.8031732478033395, iteration: 407567
loss: 0.9775469303131104,grad_norm: 0.7389328310361056, iteration: 407568
loss: 1.0004794597625732,grad_norm: 0.7878232023637745, iteration: 407569
loss: 0.9709439873695374,grad_norm: 0.8080327857734527, iteration: 407570
loss: 0.9662049412727356,grad_norm: 0.9999990847925719, iteration: 407571
loss: 1.0200424194335938,grad_norm: 0.8565549737146274, iteration: 407572
loss: 1.0293283462524414,grad_norm: 0.6700803307173772, iteration: 407573
loss: 1.0103726387023926,grad_norm: 0.9769008662989765, iteration: 407574
loss: 0.977435290813446,grad_norm: 0.7364626345094483, iteration: 407575
loss: 0.9697253108024597,grad_norm: 0.7859635076584616, iteration: 407576
loss: 1.0120939016342163,grad_norm: 0.7519818953738633, iteration: 407577
loss: 0.9831595420837402,grad_norm: 0.7600712636649215, iteration: 407578
loss: 0.9924198985099792,grad_norm: 0.7036825248171632, iteration: 407579
loss: 0.9940822124481201,grad_norm: 0.8471857592677781, iteration: 407580
loss: 0.9764035940170288,grad_norm: 0.7334705554278487, iteration: 407581
loss: 1.0702766180038452,grad_norm: 0.9999993975825707, iteration: 407582
loss: 1.0189486742019653,grad_norm: 0.8392602345878367, iteration: 407583
loss: 1.0015573501586914,grad_norm: 0.7929654674938641, iteration: 407584
loss: 1.0073474645614624,grad_norm: 0.7980423779060919, iteration: 407585
loss: 0.9622541666030884,grad_norm: 0.8669172492656524, iteration: 407586
loss: 1.0267149209976196,grad_norm: 0.9366387008004888, iteration: 407587
loss: 1.0264962911605835,grad_norm: 0.8374305201574599, iteration: 407588
loss: 1.0654510259628296,grad_norm: 0.9999996716873059, iteration: 407589
loss: 1.046029806137085,grad_norm: 0.8969610819246893, iteration: 407590
loss: 0.9962151646614075,grad_norm: 0.6821834517017675, iteration: 407591
loss: 1.0014902353286743,grad_norm: 0.9341157035925877, iteration: 407592
loss: 0.9930461049079895,grad_norm: 0.8111590212422155, iteration: 407593
loss: 0.9934428334236145,grad_norm: 0.7218198934481098, iteration: 407594
loss: 1.0859488248825073,grad_norm: 0.9999997216205182, iteration: 407595
loss: 1.0030131340026855,grad_norm: 0.692712970522938, iteration: 407596
loss: 0.9674597978591919,grad_norm: 0.7238945693503782, iteration: 407597
loss: 1.02305006980896,grad_norm: 0.9974866813245439, iteration: 407598
loss: 0.9789484143257141,grad_norm: 0.7140223692198786, iteration: 407599
loss: 0.9944010972976685,grad_norm: 0.7701394626278039, iteration: 407600
loss: 0.9886249899864197,grad_norm: 0.7087809701765253, iteration: 407601
loss: 0.9997817873954773,grad_norm: 0.7233667135869061, iteration: 407602
loss: 0.998598039150238,grad_norm: 0.9089439875670485, iteration: 407603
loss: 1.018273949623108,grad_norm: 0.8057255417203967, iteration: 407604
loss: 1.035529375076294,grad_norm: 0.9999993301121967, iteration: 407605
loss: 0.9873951077461243,grad_norm: 0.7634711737197944, iteration: 407606
loss: 1.0026171207427979,grad_norm: 0.7428877913351228, iteration: 407607
loss: 0.9921110272407532,grad_norm: 0.9999992996608833, iteration: 407608
loss: 1.054668664932251,grad_norm: 0.7900391112287474, iteration: 407609
loss: 1.0937005281448364,grad_norm: 0.8762837213551439, iteration: 407610
loss: 1.0151362419128418,grad_norm: 0.8147707125085949, iteration: 407611
loss: 0.9971455335617065,grad_norm: 0.8594613291036989, iteration: 407612
loss: 1.0244890451431274,grad_norm: 0.8100112959045895, iteration: 407613
loss: 1.0367239713668823,grad_norm: 0.7835777986728577, iteration: 407614
loss: 0.9764602184295654,grad_norm: 0.7997529623989833, iteration: 407615
loss: 0.9948307275772095,grad_norm: 0.7321433055646822, iteration: 407616
loss: 1.0597436428070068,grad_norm: 0.9999991066511168, iteration: 407617
loss: 0.9831515550613403,grad_norm: 0.8137821128407279, iteration: 407618
loss: 0.9950833916664124,grad_norm: 0.8411693106141229, iteration: 407619
loss: 1.0276730060577393,grad_norm: 0.9999999405841313, iteration: 407620
loss: 1.0954025983810425,grad_norm: 0.859535501609138, iteration: 407621
loss: 0.9909263849258423,grad_norm: 0.8042189476756142, iteration: 407622
loss: 1.0050581693649292,grad_norm: 0.7816049288940435, iteration: 407623
loss: 1.0039993524551392,grad_norm: 0.7334506045654983, iteration: 407624
loss: 1.0709755420684814,grad_norm: 0.9999990751876303, iteration: 407625
loss: 1.0027921199798584,grad_norm: 0.8260454504187915, iteration: 407626
loss: 1.001957654953003,grad_norm: 0.9999998331387916, iteration: 407627
loss: 1.016148567199707,grad_norm: 0.9999991823313552, iteration: 407628
loss: 1.0154136419296265,grad_norm: 0.8232880660706167, iteration: 407629
loss: 0.9810201525688171,grad_norm: 0.8160699769918481, iteration: 407630
loss: 0.9537062644958496,grad_norm: 0.7225059471239653, iteration: 407631
loss: 0.9953874945640564,grad_norm: 0.9999993415966891, iteration: 407632
loss: 0.9973089098930359,grad_norm: 0.9098006054517537, iteration: 407633
loss: 0.9559478759765625,grad_norm: 0.8353532525078391, iteration: 407634
loss: 0.9812335968017578,grad_norm: 0.7121958509794011, iteration: 407635
loss: 0.9593841433525085,grad_norm: 0.8402557781107439, iteration: 407636
loss: 0.9903544783592224,grad_norm: 0.7257790201458229, iteration: 407637
loss: 1.009142279624939,grad_norm: 0.8117899387052008, iteration: 407638
loss: 1.0492329597473145,grad_norm: 0.9999989726472834, iteration: 407639
loss: 1.0465904474258423,grad_norm: 0.7797828080110488, iteration: 407640
loss: 1.0265711545944214,grad_norm: 0.9896667300980974, iteration: 407641
loss: 1.0169566869735718,grad_norm: 0.9999998645498962, iteration: 407642
loss: 1.0812429189682007,grad_norm: 0.9999996934764502, iteration: 407643
loss: 0.9894885420799255,grad_norm: 0.846663367016922, iteration: 407644
loss: 1.0040512084960938,grad_norm: 0.9601827907603846, iteration: 407645
loss: 0.9792662262916565,grad_norm: 0.8956619385262952, iteration: 407646
loss: 0.9687995314598083,grad_norm: 0.9568044018038658, iteration: 407647
loss: 0.9905468225479126,grad_norm: 0.8921540338667411, iteration: 407648
loss: 1.0081948041915894,grad_norm: 0.9999993907672357, iteration: 407649
loss: 1.007836103439331,grad_norm: 0.7718612758726247, iteration: 407650
loss: 1.014365315437317,grad_norm: 0.8579170354879774, iteration: 407651
loss: 0.992682158946991,grad_norm: 0.984418286774687, iteration: 407652
loss: 0.9919085502624512,grad_norm: 0.8324284187682048, iteration: 407653
loss: 1.0342767238616943,grad_norm: 0.7726872986765548, iteration: 407654
loss: 1.0408294200897217,grad_norm: 0.8002601273700329, iteration: 407655
loss: 1.0476559400558472,grad_norm: 0.90543937517811, iteration: 407656
loss: 1.0463441610336304,grad_norm: 0.9999994343546288, iteration: 407657
loss: 0.9695663452148438,grad_norm: 0.7814228011609065, iteration: 407658
loss: 1.0046619176864624,grad_norm: 0.8648595141320876, iteration: 407659
loss: 0.9957971572875977,grad_norm: 0.9999994654607888, iteration: 407660
loss: 1.0544086694717407,grad_norm: 0.999999416653721, iteration: 407661
loss: 1.042338490486145,grad_norm: 0.9999995727971216, iteration: 407662
loss: 0.9677021503448486,grad_norm: 0.8650533447260433, iteration: 407663
loss: 0.9995333552360535,grad_norm: 0.872963173503288, iteration: 407664
loss: 0.9951609969139099,grad_norm: 0.8454580386868775, iteration: 407665
loss: 1.02914559841156,grad_norm: 0.9999991791381367, iteration: 407666
loss: 0.9816815257072449,grad_norm: 0.7540345175233699, iteration: 407667
loss: 0.9995357394218445,grad_norm: 0.7570063265839239, iteration: 407668
loss: 1.0011101961135864,grad_norm: 0.8187437482803873, iteration: 407669
loss: 0.990795373916626,grad_norm: 0.8018949824482634, iteration: 407670
loss: 1.015297293663025,grad_norm: 0.7375458881002703, iteration: 407671
loss: 0.9937281608581543,grad_norm: 0.7570580515718282, iteration: 407672
loss: 0.9806777834892273,grad_norm: 0.8304931954807024, iteration: 407673
loss: 0.9980759620666504,grad_norm: 0.8019060604547206, iteration: 407674
loss: 1.000752329826355,grad_norm: 0.7717600277675843, iteration: 407675
loss: 1.006988525390625,grad_norm: 0.7406004972108424, iteration: 407676
loss: 0.9820044636726379,grad_norm: 0.9018849079862187, iteration: 407677
loss: 0.9743603467941284,grad_norm: 0.7644352871709151, iteration: 407678
loss: 1.0092164278030396,grad_norm: 0.6736909385766671, iteration: 407679
loss: 1.0051448345184326,grad_norm: 0.7227543540961119, iteration: 407680
loss: 1.0273000001907349,grad_norm: 0.8255937957952469, iteration: 407681
loss: 0.9937413930892944,grad_norm: 0.7902835046887074, iteration: 407682
loss: 0.9892953634262085,grad_norm: 0.8627000363801838, iteration: 407683
loss: 1.0150117874145508,grad_norm: 0.9999996172620309, iteration: 407684
loss: 1.0060416460037231,grad_norm: 0.9999997938200945, iteration: 407685
loss: 1.070171594619751,grad_norm: 0.9999992908375311, iteration: 407686
loss: 1.0197759866714478,grad_norm: 0.9999994190223879, iteration: 407687
loss: 1.0113310813903809,grad_norm: 0.9915177347836853, iteration: 407688
loss: 0.9923977255821228,grad_norm: 0.7832359140478002, iteration: 407689
loss: 1.1272910833358765,grad_norm: 0.9999991841552257, iteration: 407690
loss: 1.0009948015213013,grad_norm: 0.9014050391313578, iteration: 407691
loss: 0.998014509677887,grad_norm: 0.8552083453299891, iteration: 407692
loss: 1.0060920715332031,grad_norm: 0.7164080767555017, iteration: 407693
loss: 0.9957602620124817,grad_norm: 0.7831527610124193, iteration: 407694
loss: 1.0108174085617065,grad_norm: 0.9999991702089273, iteration: 407695
loss: 0.9785731434822083,grad_norm: 0.8445373814122352, iteration: 407696
loss: 1.0051295757293701,grad_norm: 0.8282482843676029, iteration: 407697
loss: 1.0603606700897217,grad_norm: 0.9191061067380114, iteration: 407698
loss: 0.9971226453781128,grad_norm: 0.7873193042912524, iteration: 407699
loss: 1.0332984924316406,grad_norm: 0.9999995588177985, iteration: 407700
loss: 1.0245970487594604,grad_norm: 0.9999995379782219, iteration: 407701
loss: 1.0869982242584229,grad_norm: 0.7310360101165037, iteration: 407702
loss: 1.1139302253723145,grad_norm: 0.9999999391160671, iteration: 407703
loss: 1.0339806079864502,grad_norm: 0.9318319426882473, iteration: 407704
loss: 0.9943121671676636,grad_norm: 0.6884885187212143, iteration: 407705
loss: 1.0126779079437256,grad_norm: 0.765107826905964, iteration: 407706
loss: 1.0594909191131592,grad_norm: 0.9999991266013096, iteration: 407707
loss: 0.979236900806427,grad_norm: 0.8394018901854159, iteration: 407708
loss: 1.0062050819396973,grad_norm: 0.9004191853014822, iteration: 407709
loss: 1.0157512426376343,grad_norm: 0.6832355018752976, iteration: 407710
loss: 0.9536562561988831,grad_norm: 0.8038007286680087, iteration: 407711
loss: 1.2715600728988647,grad_norm: 0.9999998561500494, iteration: 407712
loss: 0.9837704300880432,grad_norm: 0.7546207542752964, iteration: 407713
loss: 1.0230330228805542,grad_norm: 0.910553193769585, iteration: 407714
loss: 0.9672032594680786,grad_norm: 0.9999992282794379, iteration: 407715
loss: 0.963810920715332,grad_norm: 0.9408105875850802, iteration: 407716
loss: 1.026595950126648,grad_norm: 0.8383384448986522, iteration: 407717
loss: 1.0474835634231567,grad_norm: 1.0000000882026137, iteration: 407718
loss: 1.000786304473877,grad_norm: 0.8636696653121234, iteration: 407719
loss: 0.9521181583404541,grad_norm: 0.7859669550799445, iteration: 407720
loss: 1.0409557819366455,grad_norm: 0.9165911409680451, iteration: 407721
loss: 0.9811006784439087,grad_norm: 0.7127557246507141, iteration: 407722
loss: 1.1072807312011719,grad_norm: 0.9999993627325436, iteration: 407723
loss: 1.0170068740844727,grad_norm: 0.7306899135617552, iteration: 407724
loss: 0.9766566753387451,grad_norm: 0.7045145165675656, iteration: 407725
loss: 1.1439810991287231,grad_norm: 0.9999993815026431, iteration: 407726
loss: 1.041200041770935,grad_norm: 0.8896493746307387, iteration: 407727
loss: 1.011159062385559,grad_norm: 0.7461731489498207, iteration: 407728
loss: 0.9813508987426758,grad_norm: 0.9706663002377064, iteration: 407729
loss: 1.015765905380249,grad_norm: 0.7334100607976529, iteration: 407730
loss: 0.9799594283103943,grad_norm: 0.8078389599134049, iteration: 407731
loss: 1.032301425933838,grad_norm: 0.8398156171439595, iteration: 407732
loss: 1.020373821258545,grad_norm: 0.8427472585871097, iteration: 407733
loss: 1.0129481554031372,grad_norm: 0.7415027051801064, iteration: 407734
loss: 0.996478796005249,grad_norm: 0.8042865440535274, iteration: 407735
loss: 1.030657410621643,grad_norm: 0.8807268025256408, iteration: 407736
loss: 0.9958525896072388,grad_norm: 0.8595707550739451, iteration: 407737
loss: 1.0240452289581299,grad_norm: 0.7258479597094684, iteration: 407738
loss: 1.0054022073745728,grad_norm: 0.7351978089447548, iteration: 407739
loss: 0.9988929629325867,grad_norm: 0.8619012384381899, iteration: 407740
loss: 0.9875032305717468,grad_norm: 0.9210951466809915, iteration: 407741
loss: 0.9591884016990662,grad_norm: 0.6559415166840076, iteration: 407742
loss: 1.0020145177841187,grad_norm: 0.7339040322286157, iteration: 407743
loss: 1.0027990341186523,grad_norm: 0.8197675224232741, iteration: 407744
loss: 0.9637218713760376,grad_norm: 0.6270847264573182, iteration: 407745
loss: 1.002703070640564,grad_norm: 0.7964399100971632, iteration: 407746
loss: 1.0243619680404663,grad_norm: 0.8463829670992065, iteration: 407747
loss: 1.016296148300171,grad_norm: 0.6744018875443306, iteration: 407748
loss: 0.9944624900817871,grad_norm: 0.8088282896082127, iteration: 407749
loss: 0.9684818983078003,grad_norm: 0.7441370552989913, iteration: 407750
loss: 0.9823326468467712,grad_norm: 0.8687100385586166, iteration: 407751
loss: 0.9884262084960938,grad_norm: 0.7512490964898221, iteration: 407752
loss: 0.9794322848320007,grad_norm: 0.884267569547834, iteration: 407753
loss: 1.0200934410095215,grad_norm: 0.7308135404216105, iteration: 407754
loss: 0.9821727275848389,grad_norm: 0.797352737518337, iteration: 407755
loss: 1.0078321695327759,grad_norm: 0.8099222932324627, iteration: 407756
loss: 0.9773663282394409,grad_norm: 0.7672624817967756, iteration: 407757
loss: 1.0662806034088135,grad_norm: 0.9999996161678119, iteration: 407758
loss: 1.0426305532455444,grad_norm: 0.8801403784128773, iteration: 407759
loss: 0.9768500328063965,grad_norm: 0.6865048066336358, iteration: 407760
loss: 1.0082855224609375,grad_norm: 0.7953023876348823, iteration: 407761
loss: 0.9855132102966309,grad_norm: 0.7232465487898271, iteration: 407762
loss: 1.0207114219665527,grad_norm: 0.999999884573917, iteration: 407763
loss: 0.9912137389183044,grad_norm: 0.7582548871345942, iteration: 407764
loss: 1.0357693433761597,grad_norm: 0.8422797865161241, iteration: 407765
loss: 1.014734148979187,grad_norm: 0.999999936141892, iteration: 407766
loss: 0.9875057935714722,grad_norm: 0.7451272165497291, iteration: 407767
loss: 0.9801079630851746,grad_norm: 0.7261134363827955, iteration: 407768
loss: 0.9676709771156311,grad_norm: 0.7144344943518681, iteration: 407769
loss: 1.0154011249542236,grad_norm: 0.9999991200021517, iteration: 407770
loss: 1.0026615858078003,grad_norm: 0.8120266820212874, iteration: 407771
loss: 0.9752630591392517,grad_norm: 0.7024360227033308, iteration: 407772
loss: 0.9746299982070923,grad_norm: 0.9999998118005142, iteration: 407773
loss: 1.0094568729400635,grad_norm: 0.9999998911612127, iteration: 407774
loss: 0.9540361166000366,grad_norm: 0.7517300413559406, iteration: 407775
loss: 0.9857221841812134,grad_norm: 0.7300454796726652, iteration: 407776
loss: 1.005480408668518,grad_norm: 0.9638233341099548, iteration: 407777
loss: 1.0325840711593628,grad_norm: 0.9999992433882626, iteration: 407778
loss: 1.0310872793197632,grad_norm: 0.999999904362493, iteration: 407779
loss: 1.0285075902938843,grad_norm: 0.9999993989959686, iteration: 407780
loss: 0.9880281686782837,grad_norm: 0.7736583297631175, iteration: 407781
loss: 1.076836109161377,grad_norm: 0.9231649663073553, iteration: 407782
loss: 1.0149599313735962,grad_norm: 0.9471838670448018, iteration: 407783
loss: 1.006192684173584,grad_norm: 0.9999996693772932, iteration: 407784
loss: 1.0067495107650757,grad_norm: 0.9999994391403977, iteration: 407785
loss: 0.9788565635681152,grad_norm: 0.7015895138993131, iteration: 407786
loss: 1.0194313526153564,grad_norm: 0.9226187414325526, iteration: 407787
loss: 0.9609923958778381,grad_norm: 0.852077144494592, iteration: 407788
loss: 0.9859529733657837,grad_norm: 0.7726717247890155, iteration: 407789
loss: 1.0120385885238647,grad_norm: 0.7437536072741312, iteration: 407790
loss: 1.0048296451568604,grad_norm: 0.9547332703916995, iteration: 407791
loss: 0.9679310321807861,grad_norm: 0.8346117740297716, iteration: 407792
loss: 0.9907419681549072,grad_norm: 0.9986232236219896, iteration: 407793
loss: 0.9789868593215942,grad_norm: 0.7438496987767, iteration: 407794
loss: 1.0524182319641113,grad_norm: 0.9999996934411596, iteration: 407795
loss: 1.0037586688995361,grad_norm: 0.7297233712077619, iteration: 407796
loss: 1.0443556308746338,grad_norm: 0.7998393430588652, iteration: 407797
loss: 0.968640923500061,grad_norm: 0.7570031209041659, iteration: 407798
loss: 1.0236358642578125,grad_norm: 0.9999991699609393, iteration: 407799
loss: 0.9672374725341797,grad_norm: 0.7491584995548509, iteration: 407800
loss: 1.1120896339416504,grad_norm: 0.9999999330808909, iteration: 407801
loss: 1.0198241472244263,grad_norm: 0.8172918294356952, iteration: 407802
loss: 0.9662867784500122,grad_norm: 0.9999989187461015, iteration: 407803
loss: 1.0116065740585327,grad_norm: 0.8255938743318779, iteration: 407804
loss: 0.9760456681251526,grad_norm: 0.9135718769720784, iteration: 407805
loss: 0.9779555797576904,grad_norm: 0.8863102713595706, iteration: 407806
loss: 1.0048006772994995,grad_norm: 0.8449667306755826, iteration: 407807
loss: 0.9895938038825989,grad_norm: 0.6632576710584263, iteration: 407808
loss: 1.0101431608200073,grad_norm: 0.7155492176995053, iteration: 407809
loss: 1.0193227529525757,grad_norm: 0.7339185254197762, iteration: 407810
loss: 0.9877845048904419,grad_norm: 0.8369156945736024, iteration: 407811
loss: 0.9820979833602905,grad_norm: 0.7217624302778815, iteration: 407812
loss: 1.0168105363845825,grad_norm: 0.9313507880304515, iteration: 407813
loss: 0.9837263226509094,grad_norm: 0.7959518299664087, iteration: 407814
loss: 1.0214195251464844,grad_norm: 0.9849222179937666, iteration: 407815
loss: 0.9999271631240845,grad_norm: 0.8070206850336662, iteration: 407816
loss: 0.9801490306854248,grad_norm: 0.8354021024057647, iteration: 407817
loss: 0.973971426486969,grad_norm: 0.9999996724439579, iteration: 407818
loss: 0.9931161999702454,grad_norm: 0.8328991241936679, iteration: 407819
loss: 0.9798330664634705,grad_norm: 0.6994726567976998, iteration: 407820
loss: 1.02675461769104,grad_norm: 0.9046318635527141, iteration: 407821
loss: 0.9974289536476135,grad_norm: 0.7548440516167297, iteration: 407822
loss: 1.0639604330062866,grad_norm: 0.8393809969788849, iteration: 407823
loss: 1.033806324005127,grad_norm: 0.6214830290354509, iteration: 407824
loss: 1.0024157762527466,grad_norm: 0.7123641266596178, iteration: 407825
loss: 1.0073717832565308,grad_norm: 0.7642598462523764, iteration: 407826
loss: 0.9669893980026245,grad_norm: 0.964614210232327, iteration: 407827
loss: 0.9802455306053162,grad_norm: 0.9572253282038224, iteration: 407828
loss: 1.0027693510055542,grad_norm: 0.9999989933918416, iteration: 407829
loss: 0.9930378198623657,grad_norm: 0.6813330680896724, iteration: 407830
loss: 0.9973071813583374,grad_norm: 0.7029659189849152, iteration: 407831
loss: 1.0194810628890991,grad_norm: 0.7142480291445543, iteration: 407832
loss: 0.9861771464347839,grad_norm: 0.7517351145272073, iteration: 407833
loss: 1.025831699371338,grad_norm: 0.852642280977846, iteration: 407834
loss: 0.9932835698127747,grad_norm: 0.8157461163413423, iteration: 407835
loss: 1.029967188835144,grad_norm: 0.9105047758709371, iteration: 407836
loss: 0.9919483661651611,grad_norm: 0.685792030121442, iteration: 407837
loss: 1.004526138305664,grad_norm: 0.9823044699517477, iteration: 407838
loss: 1.0142691135406494,grad_norm: 0.8035782125894041, iteration: 407839
loss: 0.9873065948486328,grad_norm: 0.8690866774146137, iteration: 407840
loss: 1.0055944919586182,grad_norm: 0.9999996773241914, iteration: 407841
loss: 1.045770287513733,grad_norm: 0.8969524750616015, iteration: 407842
loss: 0.9683126211166382,grad_norm: 0.7918074275565853, iteration: 407843
loss: 1.0173895359039307,grad_norm: 0.8257971827265937, iteration: 407844
loss: 1.030857801437378,grad_norm: 0.8423741376899212, iteration: 407845
loss: 0.9791387319564819,grad_norm: 0.8096094991465038, iteration: 407846
loss: 0.9852391481399536,grad_norm: 0.6466936953536452, iteration: 407847
loss: 1.0033113956451416,grad_norm: 0.743867153191224, iteration: 407848
loss: 0.9962071180343628,grad_norm: 0.7752114653570709, iteration: 407849
loss: 1.0060495138168335,grad_norm: 0.7475483912559093, iteration: 407850
loss: 0.985711395740509,grad_norm: 0.8627603145946796, iteration: 407851
loss: 1.03047513961792,grad_norm: 0.9999998555962902, iteration: 407852
loss: 0.9788528680801392,grad_norm: 0.7194932963812629, iteration: 407853
loss: 0.978377103805542,grad_norm: 0.7875062535049663, iteration: 407854
loss: 0.9964904189109802,grad_norm: 0.7668523091788023, iteration: 407855
loss: 0.9958427548408508,grad_norm: 0.773366599044401, iteration: 407856
loss: 0.9679199457168579,grad_norm: 0.9999994811405302, iteration: 407857
loss: 0.9882237315177917,grad_norm: 0.8273242531007762, iteration: 407858
loss: 0.9617946743965149,grad_norm: 0.9297231547038782, iteration: 407859
loss: 0.9736339449882507,grad_norm: 0.683718212197188, iteration: 407860
loss: 0.9912250638008118,grad_norm: 0.8360720123745702, iteration: 407861
loss: 1.0171595811843872,grad_norm: 0.7957909939774015, iteration: 407862
loss: 0.9831842184066772,grad_norm: 0.7215256563661238, iteration: 407863
loss: 1.0004855394363403,grad_norm: 0.9954360784054159, iteration: 407864
loss: 0.9954766631126404,grad_norm: 0.6949150274035282, iteration: 407865
loss: 0.9850225448608398,grad_norm: 0.7470402786221336, iteration: 407866
loss: 1.0358439683914185,grad_norm: 0.794678772643544, iteration: 407867
loss: 0.98711758852005,grad_norm: 0.6762184857434531, iteration: 407868
loss: 1.0191760063171387,grad_norm: 0.8248785960163513, iteration: 407869
loss: 0.9425233602523804,grad_norm: 0.7070933000405286, iteration: 407870
loss: 0.9867150187492371,grad_norm: 0.8809885926233851, iteration: 407871
loss: 0.9759624600410461,grad_norm: 0.9103321425574281, iteration: 407872
loss: 0.9944451451301575,grad_norm: 0.7450353183185177, iteration: 407873
loss: 0.9666382670402527,grad_norm: 0.6398138493255603, iteration: 407874
loss: 0.972421407699585,grad_norm: 0.6732551012728715, iteration: 407875
loss: 1.0109875202178955,grad_norm: 0.7116259214455797, iteration: 407876
loss: 1.0191529989242554,grad_norm: 0.7072707117442872, iteration: 407877
loss: 1.0247622728347778,grad_norm: 0.7476756089263267, iteration: 407878
loss: 1.013899803161621,grad_norm: 0.7471415117975687, iteration: 407879
loss: 0.9684387445449829,grad_norm: 0.8347508910539575, iteration: 407880
loss: 1.0345661640167236,grad_norm: 0.7087209723615163, iteration: 407881
loss: 0.9752956032752991,grad_norm: 0.695967124553798, iteration: 407882
loss: 1.010753870010376,grad_norm: 0.6994706127832988, iteration: 407883
loss: 0.9778633117675781,grad_norm: 0.8296940364042492, iteration: 407884
loss: 0.9980043768882751,grad_norm: 0.7287937816187512, iteration: 407885
loss: 0.9607495665550232,grad_norm: 0.7455094331215288, iteration: 407886
loss: 1.0398809909820557,grad_norm: 0.8317228255843604, iteration: 407887
loss: 0.9796398878097534,grad_norm: 0.6801688417889533, iteration: 407888
loss: 0.9807103872299194,grad_norm: 0.7898590383531382, iteration: 407889
loss: 0.9826769232749939,grad_norm: 0.7991842678199035, iteration: 407890
loss: 1.033861756324768,grad_norm: 0.8706184014300292, iteration: 407891
loss: 1.0009275674819946,grad_norm: 0.623005756153262, iteration: 407892
loss: 0.9832813143730164,grad_norm: 0.6669581050415088, iteration: 407893
loss: 0.9993858933448792,grad_norm: 0.7843567226784253, iteration: 407894
loss: 0.9935520887374878,grad_norm: 0.7910616863496838, iteration: 407895
loss: 0.9701594114303589,grad_norm: 0.7780735863247066, iteration: 407896
loss: 1.046297550201416,grad_norm: 0.9999990701060613, iteration: 407897
loss: 0.9815468788146973,grad_norm: 0.7903677523506788, iteration: 407898
loss: 0.9771330952644348,grad_norm: 0.7423694088726167, iteration: 407899
loss: 1.015100121498108,grad_norm: 0.6852163352276501, iteration: 407900
loss: 1.017979383468628,grad_norm: 0.7880527667741922, iteration: 407901
loss: 0.988271951675415,grad_norm: 0.7372744926370208, iteration: 407902
loss: 1.0276321172714233,grad_norm: 0.8086481381663604, iteration: 407903
loss: 0.9925695657730103,grad_norm: 0.6872162159100681, iteration: 407904
loss: 0.9805333614349365,grad_norm: 0.7873501746130709, iteration: 407905
loss: 1.0120304822921753,grad_norm: 0.7187986199874989, iteration: 407906
loss: 1.0021389722824097,grad_norm: 0.8327205196291481, iteration: 407907
loss: 1.0113219022750854,grad_norm: 0.7505186790735424, iteration: 407908
loss: 1.0344467163085938,grad_norm: 0.8046143785645067, iteration: 407909
loss: 1.0130139589309692,grad_norm: 0.8235558503553934, iteration: 407910
loss: 1.0113391876220703,grad_norm: 0.8653408080885044, iteration: 407911
loss: 0.9950023293495178,grad_norm: 0.7587169975294162, iteration: 407912
loss: 1.0152945518493652,grad_norm: 0.8052800607141188, iteration: 407913
loss: 1.0287925004959106,grad_norm: 0.7879595306320714, iteration: 407914
loss: 1.0030239820480347,grad_norm: 0.710705585430035, iteration: 407915
loss: 1.0188626050949097,grad_norm: 0.7206384799617513, iteration: 407916
loss: 0.995916485786438,grad_norm: 0.7087567446883368, iteration: 407917
loss: 1.0095070600509644,grad_norm: 0.886008507133693, iteration: 407918
loss: 1.0008866786956787,grad_norm: 0.7361314839569566, iteration: 407919
loss: 0.9545037150382996,grad_norm: 0.884899686269768, iteration: 407920
loss: 1.008201003074646,grad_norm: 0.7996367135601501, iteration: 407921
loss: 1.0181376934051514,grad_norm: 0.6796756231815373, iteration: 407922
loss: 1.029658317565918,grad_norm: 0.8703450219561121, iteration: 407923
loss: 1.0240870714187622,grad_norm: 0.9999995084187099, iteration: 407924
loss: 1.019579291343689,grad_norm: 0.9999995658746806, iteration: 407925
loss: 0.9989046454429626,grad_norm: 0.8298313704124216, iteration: 407926
loss: 1.0291285514831543,grad_norm: 0.9147904019509985, iteration: 407927
loss: 0.9817386269569397,grad_norm: 0.9064705334372765, iteration: 407928
loss: 1.0153656005859375,grad_norm: 0.9999991586374669, iteration: 407929
loss: 1.0021679401397705,grad_norm: 0.7833948837204511, iteration: 407930
loss: 0.9999306201934814,grad_norm: 0.7396422929074412, iteration: 407931
loss: 1.0505704879760742,grad_norm: 0.9999998055482762, iteration: 407932
loss: 1.0722320079803467,grad_norm: 0.9999998614804377, iteration: 407933
loss: 0.9642212986946106,grad_norm: 0.8602341730445808, iteration: 407934
loss: 0.9711306095123291,grad_norm: 0.7402347928578814, iteration: 407935
loss: 0.9897702932357788,grad_norm: 0.7671773354365952, iteration: 407936
loss: 0.9918442368507385,grad_norm: 0.7206651059097706, iteration: 407937
loss: 1.035964846611023,grad_norm: 0.8117624791450992, iteration: 407938
loss: 1.0140910148620605,grad_norm: 0.834534431334829, iteration: 407939
loss: 0.9950348734855652,grad_norm: 0.7745038406082619, iteration: 407940
loss: 1.0028702020645142,grad_norm: 0.805741146026582, iteration: 407941
loss: 1.0379141569137573,grad_norm: 0.9999994380150545, iteration: 407942
loss: 1.005167007446289,grad_norm: 0.7241248391516729, iteration: 407943
loss: 1.0201059579849243,grad_norm: 0.7522286527183424, iteration: 407944
loss: 0.9993014931678772,grad_norm: 0.7855087845939966, iteration: 407945
loss: 0.9953609704971313,grad_norm: 0.7501182891615706, iteration: 407946
loss: 0.9869710206985474,grad_norm: 0.7801024243098803, iteration: 407947
loss: 1.0053225755691528,grad_norm: 0.8035791492683292, iteration: 407948
loss: 1.0106455087661743,grad_norm: 0.7938596001483879, iteration: 407949
loss: 1.0211834907531738,grad_norm: 0.7113028834069023, iteration: 407950
loss: 0.9956591725349426,grad_norm: 0.7408372711512761, iteration: 407951
loss: 1.0226030349731445,grad_norm: 0.7370909983234457, iteration: 407952
loss: 0.9853110909461975,grad_norm: 0.7843635104147724, iteration: 407953
loss: 0.9731002449989319,grad_norm: 0.9999991904800687, iteration: 407954
loss: 1.0390797853469849,grad_norm: 0.9999992381525, iteration: 407955
loss: 0.9574383497238159,grad_norm: 0.6965007574242489, iteration: 407956
loss: 1.005881667137146,grad_norm: 0.9715762264375327, iteration: 407957
loss: 1.0054073333740234,grad_norm: 0.736385672631351, iteration: 407958
loss: 0.9809073805809021,grad_norm: 0.7988993933530227, iteration: 407959
loss: 1.0160064697265625,grad_norm: 0.9999993771657926, iteration: 407960
loss: 1.027524709701538,grad_norm: 0.7438977112368057, iteration: 407961
loss: 1.0386420488357544,grad_norm: 0.7645571113217121, iteration: 407962
loss: 1.0371252298355103,grad_norm: 0.7241922126064658, iteration: 407963
loss: 0.9803215265274048,grad_norm: 0.8459184864322249, iteration: 407964
loss: 0.9510326385498047,grad_norm: 0.7359016196162549, iteration: 407965
loss: 0.9727157950401306,grad_norm: 0.8267583014174474, iteration: 407966
loss: 1.044289469718933,grad_norm: 0.7145417279973236, iteration: 407967
loss: 0.982319712638855,grad_norm: 0.7390623277758352, iteration: 407968
loss: 1.011282205581665,grad_norm: 0.7621038788121703, iteration: 407969
loss: 0.9928362369537354,grad_norm: 0.6846642922100237, iteration: 407970
loss: 1.02015221118927,grad_norm: 0.939026317352172, iteration: 407971
loss: 0.9663847088813782,grad_norm: 0.7769711454369849, iteration: 407972
loss: 0.9756897687911987,grad_norm: 0.6850929594275595, iteration: 407973
loss: 1.0041465759277344,grad_norm: 0.8281511734584721, iteration: 407974
loss: 0.9908181428909302,grad_norm: 0.7498068346486502, iteration: 407975
loss: 1.0211454629898071,grad_norm: 0.7241639001179381, iteration: 407976
loss: 0.9939809441566467,grad_norm: 0.7514773012926016, iteration: 407977
loss: 0.9819793701171875,grad_norm: 0.8894883699721066, iteration: 407978
loss: 1.0033984184265137,grad_norm: 0.8133675978280807, iteration: 407979
loss: 0.9574804306030273,grad_norm: 0.8133985836929444, iteration: 407980
loss: 1.0094197988510132,grad_norm: 0.999998914713133, iteration: 407981
loss: 0.9450778365135193,grad_norm: 0.6305026822899681, iteration: 407982
loss: 1.0035598278045654,grad_norm: 0.6013809560524921, iteration: 407983
loss: 0.9530568718910217,grad_norm: 0.7222100027968206, iteration: 407984
loss: 0.9743919968605042,grad_norm: 0.9999991624017025, iteration: 407985
loss: 1.0258822441101074,grad_norm: 0.9317944218474893, iteration: 407986
loss: 1.0076420307159424,grad_norm: 0.9999995820733151, iteration: 407987
loss: 1.0338042974472046,grad_norm: 0.8653717591843851, iteration: 407988
loss: 0.9906932711601257,grad_norm: 0.9232282879758783, iteration: 407989
loss: 0.9970020055770874,grad_norm: 0.8753329090225546, iteration: 407990
loss: 1.0366754531860352,grad_norm: 0.9076250041839891, iteration: 407991
loss: 1.009074091911316,grad_norm: 0.7759410588071178, iteration: 407992
loss: 1.1054246425628662,grad_norm: 0.9999991901782639, iteration: 407993
loss: 1.0230529308319092,grad_norm: 0.7242570253461234, iteration: 407994
loss: 1.0348389148712158,grad_norm: 0.6957593749564379, iteration: 407995
loss: 1.0138449668884277,grad_norm: 0.8144312753093608, iteration: 407996
loss: 1.0075761079788208,grad_norm: 0.9884360855074409, iteration: 407997
loss: 0.9950143694877625,grad_norm: 0.8411761128603427, iteration: 407998
loss: 0.9761608242988586,grad_norm: 0.9090295116230312, iteration: 407999
loss: 1.013047695159912,grad_norm: 0.7947729175972473, iteration: 408000
loss: 1.1283522844314575,grad_norm: 0.9999993723554595, iteration: 408001
loss: 0.9965745806694031,grad_norm: 0.8563832719175789, iteration: 408002
loss: 1.0122361183166504,grad_norm: 0.7379641796531777, iteration: 408003
loss: 1.0169142484664917,grad_norm: 0.8479656490572057, iteration: 408004
loss: 1.090635895729065,grad_norm: 0.9999991126096522, iteration: 408005
loss: 1.0040345191955566,grad_norm: 0.8424652989221354, iteration: 408006
loss: 0.971674919128418,grad_norm: 0.9053931149096546, iteration: 408007
loss: 1.01102614402771,grad_norm: 0.9999995959778881, iteration: 408008
loss: 1.0053181648254395,grad_norm: 0.7038054430139719, iteration: 408009
loss: 1.0223097801208496,grad_norm: 0.6810606404918969, iteration: 408010
loss: 1.0057389736175537,grad_norm: 0.9999989729978767, iteration: 408011
loss: 0.9880615472793579,grad_norm: 0.7532354791366852, iteration: 408012
loss: 0.9639880657196045,grad_norm: 0.6357674013503736, iteration: 408013
loss: 0.9892372488975525,grad_norm: 0.6937018117381812, iteration: 408014
loss: 0.9705946445465088,grad_norm: 0.7656950404547204, iteration: 408015
loss: 0.9761431813240051,grad_norm: 0.9973943728184136, iteration: 408016
loss: 1.0711795091629028,grad_norm: 1.00000000242337, iteration: 408017
loss: 1.0054031610488892,grad_norm: 0.9786008416948464, iteration: 408018
loss: 0.9916066527366638,grad_norm: 0.8648152739326107, iteration: 408019
loss: 1.0206695795059204,grad_norm: 0.9999995271072434, iteration: 408020
loss: 1.0327892303466797,grad_norm: 0.9999991863682599, iteration: 408021
loss: 0.9897585511207581,grad_norm: 0.6187834510659044, iteration: 408022
loss: 1.4000556468963623,grad_norm: 0.99999991574271, iteration: 408023
loss: 0.9877627491950989,grad_norm: 0.8017493134841236, iteration: 408024
loss: 1.0015361309051514,grad_norm: 0.7883588569927473, iteration: 408025
loss: 1.0092264413833618,grad_norm: 0.9324107304141112, iteration: 408026
loss: 0.9929190278053284,grad_norm: 0.9172784766659754, iteration: 408027
loss: 1.0244649648666382,grad_norm: 0.804347993035793, iteration: 408028
loss: 1.0839016437530518,grad_norm: 0.9999994945241695, iteration: 408029
loss: 1.0729438066482544,grad_norm: 0.9999995240810224, iteration: 408030
loss: 0.97916579246521,grad_norm: 0.8178071611778587, iteration: 408031
loss: 1.1359260082244873,grad_norm: 0.9999991837887647, iteration: 408032
loss: 1.036483645439148,grad_norm: 0.8251122052122367, iteration: 408033
loss: 1.0215612649917603,grad_norm: 0.7793687603628882, iteration: 408034
loss: 1.0092264413833618,grad_norm: 0.7856940416238121, iteration: 408035
loss: 1.04200279712677,grad_norm: 0.8719864550212241, iteration: 408036
loss: 1.0645637512207031,grad_norm: 0.9999993534583597, iteration: 408037
loss: 0.9885745048522949,grad_norm: 0.7208154936565973, iteration: 408038
loss: 0.9775420427322388,grad_norm: 0.8200898491214803, iteration: 408039
loss: 0.9701001048088074,grad_norm: 0.8270344152571062, iteration: 408040
loss: 1.009464979171753,grad_norm: 0.7629059627538908, iteration: 408041
loss: 1.0072847604751587,grad_norm: 0.8687254104564149, iteration: 408042
loss: 1.025591254234314,grad_norm: 0.7253399839463294, iteration: 408043
loss: 1.044943928718567,grad_norm: 0.9683585945442477, iteration: 408044
loss: 0.9611465334892273,grad_norm: 0.6875081280169102, iteration: 408045
loss: 0.9943567514419556,grad_norm: 0.6685401743653413, iteration: 408046
loss: 0.9919301867485046,grad_norm: 0.7833634912031224, iteration: 408047
loss: 0.9853439331054688,grad_norm: 0.6833204104053517, iteration: 408048
loss: 0.9729921221733093,grad_norm: 0.9999992646227317, iteration: 408049
loss: 1.0129468441009521,grad_norm: 0.632966398419501, iteration: 408050
loss: 1.0112322568893433,grad_norm: 0.6801509326236704, iteration: 408051
loss: 0.9932085871696472,grad_norm: 0.9103453222571536, iteration: 408052
loss: 1.0019174814224243,grad_norm: 0.780969141019652, iteration: 408053
loss: 0.9833078980445862,grad_norm: 0.867396311807652, iteration: 408054
loss: 0.9827631711959839,grad_norm: 0.7921031410187637, iteration: 408055
loss: 1.0077943801879883,grad_norm: 0.9117363301244759, iteration: 408056
loss: 1.0097413063049316,grad_norm: 0.8457822202551776, iteration: 408057
loss: 1.0061287879943848,grad_norm: 0.883020465283638, iteration: 408058
loss: 0.9976310729980469,grad_norm: 0.6120452846644705, iteration: 408059
loss: 1.0001848936080933,grad_norm: 0.6933469970168462, iteration: 408060
loss: 0.9829224348068237,grad_norm: 0.9999994916064435, iteration: 408061
loss: 1.0217846632003784,grad_norm: 0.999999540402449, iteration: 408062
loss: 0.980567216873169,grad_norm: 0.6873794160741004, iteration: 408063
loss: 1.0429786443710327,grad_norm: 0.9999996213922477, iteration: 408064
loss: 1.013307809829712,grad_norm: 0.7689453704012819, iteration: 408065
loss: 0.9800966382026672,grad_norm: 0.8120556821190635, iteration: 408066
loss: 1.0167964696884155,grad_norm: 0.7621325089261886, iteration: 408067
loss: 1.0429282188415527,grad_norm: 0.9999993344298825, iteration: 408068
loss: 0.9928596615791321,grad_norm: 0.7265093477600844, iteration: 408069
loss: 0.9754889607429504,grad_norm: 0.8698884685870357, iteration: 408070
loss: 1.0060806274414062,grad_norm: 0.9999990917873662, iteration: 408071
loss: 1.0245476961135864,grad_norm: 0.9515895318434989, iteration: 408072
loss: 0.9911313056945801,grad_norm: 0.6663283762813867, iteration: 408073
loss: 0.9967090487480164,grad_norm: 0.6791370422812278, iteration: 408074
loss: 0.9583058953285217,grad_norm: 0.8622919692320521, iteration: 408075
loss: 1.0007706880569458,grad_norm: 0.8483636473244934, iteration: 408076
loss: 1.0036816596984863,grad_norm: 0.9032250055123298, iteration: 408077
loss: 1.0269274711608887,grad_norm: 0.9999999114001221, iteration: 408078
loss: 0.9663193225860596,grad_norm: 0.7441071174307742, iteration: 408079
loss: 0.9847436547279358,grad_norm: 0.8246531486597037, iteration: 408080
loss: 0.950343132019043,grad_norm: 0.7769712119342078, iteration: 408081
loss: 1.0083249807357788,grad_norm: 0.8021732755975839, iteration: 408082
loss: 0.9970316290855408,grad_norm: 0.7141467830748175, iteration: 408083
loss: 0.9983873963356018,grad_norm: 0.999999297216606, iteration: 408084
loss: 0.9808332324028015,grad_norm: 0.714192572102313, iteration: 408085
loss: 1.0111225843429565,grad_norm: 0.8106173614352972, iteration: 408086
loss: 1.019804835319519,grad_norm: 0.8495691663769867, iteration: 408087
loss: 1.0210192203521729,grad_norm: 0.9999995336639552, iteration: 408088
loss: 0.9560444355010986,grad_norm: 0.7633346453942114, iteration: 408089
loss: 1.0235495567321777,grad_norm: 0.8032723914064752, iteration: 408090
loss: 0.9956532716751099,grad_norm: 0.8696064326108267, iteration: 408091
loss: 0.9594234824180603,grad_norm: 0.9162231418922357, iteration: 408092
loss: 1.0029759407043457,grad_norm: 0.9931394906267488, iteration: 408093
loss: 0.982764720916748,grad_norm: 0.6580830302228964, iteration: 408094
loss: 1.01670503616333,grad_norm: 0.708759120947933, iteration: 408095
loss: 1.0218045711517334,grad_norm: 0.7812199672601278, iteration: 408096
loss: 1.0050359964370728,grad_norm: 0.8209243192167872, iteration: 408097
loss: 0.9991878271102905,grad_norm: 0.9896864732210534, iteration: 408098
loss: 0.9775662422180176,grad_norm: 0.7433454175478508, iteration: 408099
loss: 1.023241400718689,grad_norm: 0.7977705914624575, iteration: 408100
loss: 0.9789637923240662,grad_norm: 0.703181056595594, iteration: 408101
loss: 1.0248212814331055,grad_norm: 0.9325360695846926, iteration: 408102
loss: 1.0091544389724731,grad_norm: 0.9418022629000432, iteration: 408103
loss: 0.9947131276130676,grad_norm: 0.8569426814381703, iteration: 408104
loss: 0.9706231951713562,grad_norm: 0.8218580652770571, iteration: 408105
loss: 0.969466507434845,grad_norm: 0.8013149539754452, iteration: 408106
loss: 0.9869856834411621,grad_norm: 0.7574943066326408, iteration: 408107
loss: 0.9939615726470947,grad_norm: 0.7512938486784723, iteration: 408108
loss: 1.0434489250183105,grad_norm: 0.7723150398947379, iteration: 408109
loss: 0.9897533655166626,grad_norm: 0.7537552037390005, iteration: 408110
loss: 1.0160459280014038,grad_norm: 0.705198183720868, iteration: 408111
loss: 0.9775303602218628,grad_norm: 0.8104088933333439, iteration: 408112
loss: 1.009206771850586,grad_norm: 0.9999992923740303, iteration: 408113
loss: 1.006386399269104,grad_norm: 0.8500647663258065, iteration: 408114
loss: 1.0271203517913818,grad_norm: 0.8655709132516289, iteration: 408115
loss: 1.0080963373184204,grad_norm: 0.9910958097406056, iteration: 408116
loss: 1.0047271251678467,grad_norm: 0.7129678387908015, iteration: 408117
loss: 1.035522699356079,grad_norm: 0.6556778456203022, iteration: 408118
loss: 0.9844833612442017,grad_norm: 0.8503087647477495, iteration: 408119
loss: 1.0205150842666626,grad_norm: 0.6765207860002149, iteration: 408120
loss: 1.0440000295639038,grad_norm: 0.9999994074675692, iteration: 408121
loss: 0.9806994795799255,grad_norm: 0.7119552013178919, iteration: 408122
loss: 0.9982553720474243,grad_norm: 0.7292466843631022, iteration: 408123
loss: 1.013097882270813,grad_norm: 0.6478229379316892, iteration: 408124
loss: 1.0030795335769653,grad_norm: 0.7485364915159786, iteration: 408125
loss: 0.9995450377464294,grad_norm: 0.7487883003456868, iteration: 408126
loss: 0.9865511655807495,grad_norm: 0.8359782339079468, iteration: 408127
loss: 0.9907663464546204,grad_norm: 0.7857348496671458, iteration: 408128
loss: 0.9875395894050598,grad_norm: 0.8217433677508023, iteration: 408129
loss: 1.000132441520691,grad_norm: 0.7273576956894505, iteration: 408130
loss: 0.9657374024391174,grad_norm: 0.6785489051648874, iteration: 408131
loss: 1.026863694190979,grad_norm: 0.820831419894886, iteration: 408132
loss: 1.0010225772857666,grad_norm: 0.8762163173771823, iteration: 408133
loss: 1.0462098121643066,grad_norm: 0.6799000374008621, iteration: 408134
loss: 1.0097426176071167,grad_norm: 0.8117948733566333, iteration: 408135
loss: 0.9710692763328552,grad_norm: 0.745570798736676, iteration: 408136
loss: 0.9765738844871521,grad_norm: 0.7077118210006889, iteration: 408137
loss: 1.0043407678604126,grad_norm: 0.9999992565557063, iteration: 408138
loss: 1.0017222166061401,grad_norm: 0.7990302176252005, iteration: 408139
loss: 0.9741555452346802,grad_norm: 0.7078551160644286, iteration: 408140
loss: 1.018723487854004,grad_norm: 0.6788613899931829, iteration: 408141
loss: 1.008255124092102,grad_norm: 0.6250253791010889, iteration: 408142
loss: 0.9948799014091492,grad_norm: 0.7596016326045157, iteration: 408143
loss: 1.0017316341400146,grad_norm: 0.9999997266887902, iteration: 408144
loss: 0.9868601560592651,grad_norm: 0.7278501457372764, iteration: 408145
loss: 1.0608153343200684,grad_norm: 0.999999356246217, iteration: 408146
loss: 1.0230580568313599,grad_norm: 0.7705856400167163, iteration: 408147
loss: 1.008399486541748,grad_norm: 0.861098418343237, iteration: 408148
loss: 1.0102832317352295,grad_norm: 0.7785349105154269, iteration: 408149
loss: 1.033677101135254,grad_norm: 0.9999990962865803, iteration: 408150
loss: 0.9944854974746704,grad_norm: 0.9516094055065994, iteration: 408151
loss: 0.975738525390625,grad_norm: 0.7429922795100361, iteration: 408152
loss: 1.0048660039901733,grad_norm: 0.7718116308201106, iteration: 408153
loss: 1.0214065313339233,grad_norm: 0.9999999411454696, iteration: 408154
loss: 0.9741044640541077,grad_norm: 0.6674478396505596, iteration: 408155
loss: 1.0481057167053223,grad_norm: 0.7066993179865908, iteration: 408156
loss: 1.00739586353302,grad_norm: 0.8011811791710014, iteration: 408157
loss: 1.0006558895111084,grad_norm: 0.7730154081831457, iteration: 408158
loss: 1.0158486366271973,grad_norm: 0.7598996973567702, iteration: 408159
loss: 1.0109888315200806,grad_norm: 0.6969843233975028, iteration: 408160
loss: 0.9712603688240051,grad_norm: 0.999999370420941, iteration: 408161
loss: 0.9475963115692139,grad_norm: 0.7770193407767627, iteration: 408162
loss: 1.012168049812317,grad_norm: 0.7995611452834119, iteration: 408163
loss: 1.0363411903381348,grad_norm: 0.7751685502397899, iteration: 408164
loss: 0.993079662322998,grad_norm: 0.8480092655412255, iteration: 408165
loss: 0.9787563681602478,grad_norm: 0.9999994800811194, iteration: 408166
loss: 1.0291346311569214,grad_norm: 0.7873290112656618, iteration: 408167
loss: 0.9976957440376282,grad_norm: 0.7505719989992515, iteration: 408168
loss: 1.0358684062957764,grad_norm: 0.9999995052796982, iteration: 408169
loss: 1.0006539821624756,grad_norm: 0.7702017134682737, iteration: 408170
loss: 1.0027267932891846,grad_norm: 0.8362353678671243, iteration: 408171
loss: 1.0831242799758911,grad_norm: 0.9999996976945891, iteration: 408172
loss: 0.9964436292648315,grad_norm: 0.638252504677339, iteration: 408173
loss: 1.0051816701889038,grad_norm: 0.8487585397747265, iteration: 408174
loss: 0.997422456741333,grad_norm: 0.8414856109144834, iteration: 408175
loss: 1.0035083293914795,grad_norm: 0.774325554527035, iteration: 408176
loss: 0.9763554334640503,grad_norm: 0.9182811367670441, iteration: 408177
loss: 1.054465889930725,grad_norm: 0.9999998937546803, iteration: 408178
loss: 0.9880797266960144,grad_norm: 0.9999992254732294, iteration: 408179
loss: 1.0244694948196411,grad_norm: 0.9999991180334691, iteration: 408180
loss: 0.982392430305481,grad_norm: 0.99999944641823, iteration: 408181
loss: 0.9653911590576172,grad_norm: 0.7661422525129774, iteration: 408182
loss: 1.0171630382537842,grad_norm: 0.9257815173393537, iteration: 408183
loss: 0.9861025214195251,grad_norm: 0.7806965047477593, iteration: 408184
loss: 1.0189367532730103,grad_norm: 0.766399208272325, iteration: 408185
loss: 0.9919947385787964,grad_norm: 0.7699764435375643, iteration: 408186
loss: 0.949652910232544,grad_norm: 0.8926383996140397, iteration: 408187
loss: 1.0355889797210693,grad_norm: 0.714999894861281, iteration: 408188
loss: 1.0112075805664062,grad_norm: 0.8134206992680388, iteration: 408189
loss: 1.00197172164917,grad_norm: 0.7687697865658488, iteration: 408190
loss: 0.9773595333099365,grad_norm: 0.855315000102761, iteration: 408191
loss: 0.9283455014228821,grad_norm: 0.7603415649283325, iteration: 408192
loss: 1.026974081993103,grad_norm: 0.8179952634186483, iteration: 408193
loss: 1.030004620552063,grad_norm: 0.9622814007441255, iteration: 408194
loss: 0.9916896820068359,grad_norm: 0.7602636929907539, iteration: 408195
loss: 1.0144954919815063,grad_norm: 0.6953095270177732, iteration: 408196
loss: 1.0118123292922974,grad_norm: 0.8148338752931067, iteration: 408197
loss: 1.0254919528961182,grad_norm: 0.7417048715106234, iteration: 408198
loss: 1.0015655755996704,grad_norm: 0.7704856966577961, iteration: 408199
loss: 1.0022634267807007,grad_norm: 0.6430721343012025, iteration: 408200
loss: 1.009033203125,grad_norm: 0.7249608452566696, iteration: 408201
loss: 1.000812292098999,grad_norm: 0.8082430715559942, iteration: 408202
loss: 0.9985493421554565,grad_norm: 0.7569342196883461, iteration: 408203
loss: 0.9798808693885803,grad_norm: 0.806352811836402, iteration: 408204
loss: 1.0094760656356812,grad_norm: 0.7243502775514399, iteration: 408205
loss: 1.0155137777328491,grad_norm: 0.8097524774546504, iteration: 408206
loss: 1.0331950187683105,grad_norm: 0.8457858810243121, iteration: 408207
loss: 0.9948821067810059,grad_norm: 0.9958796194176454, iteration: 408208
loss: 0.9602597951889038,grad_norm: 0.9999990367009501, iteration: 408209
loss: 0.982524037361145,grad_norm: 0.7505394123410176, iteration: 408210
loss: 1.0145035982131958,grad_norm: 0.9999996690253067, iteration: 408211
loss: 1.011868953704834,grad_norm: 0.7357949893284412, iteration: 408212
loss: 0.9863413572311401,grad_norm: 0.8255823053449836, iteration: 408213
loss: 0.9649806618690491,grad_norm: 0.7595392722277448, iteration: 408214
loss: 0.9757930636405945,grad_norm: 0.785697734210899, iteration: 408215
loss: 0.9812107682228088,grad_norm: 0.663843506148239, iteration: 408216
loss: 1.0222784280776978,grad_norm: 0.8657550735957449, iteration: 408217
loss: 0.9770298600196838,grad_norm: 0.7432710283986956, iteration: 408218
loss: 0.9698450565338135,grad_norm: 0.854048854859712, iteration: 408219
loss: 0.9751689434051514,grad_norm: 0.9488430143604654, iteration: 408220
loss: 1.0212419033050537,grad_norm: 0.6485739284874272, iteration: 408221
loss: 0.9877084493637085,grad_norm: 0.848990382051293, iteration: 408222
loss: 0.9939888119697571,grad_norm: 0.8018388172427233, iteration: 408223
loss: 0.9821740388870239,grad_norm: 0.8816401325079589, iteration: 408224
loss: 0.9637281894683838,grad_norm: 0.7684127781883594, iteration: 408225
loss: 0.9703089594841003,grad_norm: 0.7061913733345189, iteration: 408226
loss: 1.0516343116760254,grad_norm: 0.9999998985057466, iteration: 408227
loss: 0.9704161882400513,grad_norm: 0.8502557686228517, iteration: 408228
loss: 1.005535364151001,grad_norm: 0.796049642058915, iteration: 408229
loss: 1.1039390563964844,grad_norm: 0.9999999505546716, iteration: 408230
loss: 0.9776777625083923,grad_norm: 0.9999991081639114, iteration: 408231
loss: 0.9860182404518127,grad_norm: 0.6725018943801985, iteration: 408232
loss: 1.0418825149536133,grad_norm: 0.999999151680209, iteration: 408233
loss: 0.9634348154067993,grad_norm: 0.9061681050620531, iteration: 408234
loss: 0.9977615475654602,grad_norm: 0.7296672067235376, iteration: 408235
loss: 1.0000605583190918,grad_norm: 0.7917205590456413, iteration: 408236
loss: 0.9891961812973022,grad_norm: 0.6878253592380051, iteration: 408237
loss: 1.015390396118164,grad_norm: 0.7367554713099632, iteration: 408238
loss: 0.991353452205658,grad_norm: 0.6531400617987442, iteration: 408239
loss: 1.0496550798416138,grad_norm: 0.7472657889381623, iteration: 408240
loss: 1.0385206937789917,grad_norm: 0.8739634110199385, iteration: 408241
loss: 0.9966712594032288,grad_norm: 0.7357890930560105, iteration: 408242
loss: 1.0007919073104858,grad_norm: 0.7757862170504634, iteration: 408243
loss: 1.0109070539474487,grad_norm: 0.7022605701403506, iteration: 408244
loss: 1.0060142278671265,grad_norm: 0.9486913173286889, iteration: 408245
loss: 1.0077639818191528,grad_norm: 0.7234014894195796, iteration: 408246
loss: 1.0266106128692627,grad_norm: 0.8169454594161516, iteration: 408247
loss: 1.0104197263717651,grad_norm: 0.7774673659072482, iteration: 408248
loss: 0.9874831438064575,grad_norm: 0.6776293287523587, iteration: 408249
loss: 1.0058057308197021,grad_norm: 0.9999991795041939, iteration: 408250
loss: 1.008558750152588,grad_norm: 0.918808717302519, iteration: 408251
loss: 0.9825962781906128,grad_norm: 0.952299638801972, iteration: 408252
loss: 0.9969736337661743,grad_norm: 0.9999994082815856, iteration: 408253
loss: 1.038382887840271,grad_norm: 0.8498069390379083, iteration: 408254
loss: 1.0191287994384766,grad_norm: 0.9098909032281657, iteration: 408255
loss: 1.0218191146850586,grad_norm: 0.7023280372333646, iteration: 408256
loss: 1.04865300655365,grad_norm: 0.9999995318351457, iteration: 408257
loss: 1.013545274734497,grad_norm: 0.9999990959818525, iteration: 408258
loss: 1.012272596359253,grad_norm: 0.8074634283039086, iteration: 408259
loss: 1.0272279977798462,grad_norm: 0.8017612892459248, iteration: 408260
loss: 1.0049781799316406,grad_norm: 0.9056454459459818, iteration: 408261
loss: 1.0084939002990723,grad_norm: 0.9590163106754557, iteration: 408262
loss: 1.0130195617675781,grad_norm: 0.8698447424098207, iteration: 408263
loss: 1.0379369258880615,grad_norm: 0.999999300804079, iteration: 408264
loss: 0.9997628331184387,grad_norm: 0.7608853679399168, iteration: 408265
loss: 1.000365138053894,grad_norm: 0.6727255844825522, iteration: 408266
loss: 0.9690780639648438,grad_norm: 0.769173748934663, iteration: 408267
loss: 1.008653998374939,grad_norm: 0.8571221315809923, iteration: 408268
loss: 0.9997475147247314,grad_norm: 0.641369441225371, iteration: 408269
loss: 0.9919354319572449,grad_norm: 0.712587145211963, iteration: 408270
loss: 0.963514506816864,grad_norm: 0.8064216292686203, iteration: 408271
loss: 0.9806003570556641,grad_norm: 0.8586568329404088, iteration: 408272
loss: 0.9837719798088074,grad_norm: 0.789343895479986, iteration: 408273
loss: 0.9929516911506653,grad_norm: 0.795640381908321, iteration: 408274
loss: 1.0345354080200195,grad_norm: 0.8271384078303828, iteration: 408275
loss: 1.0276408195495605,grad_norm: 0.7317053369969467, iteration: 408276
loss: 1.0128183364868164,grad_norm: 0.9999991043384115, iteration: 408277
loss: 1.0146381855010986,grad_norm: 0.7861410784347784, iteration: 408278
loss: 1.0028355121612549,grad_norm: 0.6927097603633298, iteration: 408279
loss: 0.9983274936676025,grad_norm: 0.7415518157770878, iteration: 408280
loss: 0.9459372758865356,grad_norm: 0.8296975576758285, iteration: 408281
loss: 1.0249338150024414,grad_norm: 0.7626674494742665, iteration: 408282
loss: 1.005547285079956,grad_norm: 0.8364438408594579, iteration: 408283
loss: 1.0007927417755127,grad_norm: 0.8323973766754855, iteration: 408284
loss: 1.0162757635116577,grad_norm: 0.7970536407810866, iteration: 408285
loss: 0.9910005927085876,grad_norm: 0.9131175350645571, iteration: 408286
loss: 1.0212312936782837,grad_norm: 0.9214588579329821, iteration: 408287
loss: 0.967598557472229,grad_norm: 0.895689633768739, iteration: 408288
loss: 1.0015754699707031,grad_norm: 0.7277224293948781, iteration: 408289
loss: 0.9740504026412964,grad_norm: 0.6886042175921088, iteration: 408290
loss: 1.0267479419708252,grad_norm: 0.9038539982456494, iteration: 408291
loss: 1.005346417427063,grad_norm: 0.9032802512650709, iteration: 408292
loss: 0.9983223080635071,grad_norm: 0.9999990987894115, iteration: 408293
loss: 1.05320405960083,grad_norm: 0.7927691248178792, iteration: 408294
loss: 0.9850974082946777,grad_norm: 0.7871363460692242, iteration: 408295
loss: 0.994985044002533,grad_norm: 0.8301491614377617, iteration: 408296
loss: 1.0109825134277344,grad_norm: 0.9999992514085575, iteration: 408297
loss: 1.0074865818023682,grad_norm: 0.7554998471140079, iteration: 408298
loss: 1.010726809501648,grad_norm: 0.9999994706955254, iteration: 408299
loss: 0.9931018352508545,grad_norm: 0.9916538583027082, iteration: 408300
loss: 0.9740915298461914,grad_norm: 0.9999991709069533, iteration: 408301
loss: 0.9629901647567749,grad_norm: 0.8800313716053171, iteration: 408302
loss: 1.035821557044983,grad_norm: 0.7038955694676137, iteration: 408303
loss: 1.0029329061508179,grad_norm: 0.7075310683097936, iteration: 408304
loss: 0.9742412567138672,grad_norm: 0.8675304220774225, iteration: 408305
loss: 0.990606963634491,grad_norm: 0.7798231413871026, iteration: 408306
loss: 0.9666846990585327,grad_norm: 0.8511702266712231, iteration: 408307
loss: 0.9921490550041199,grad_norm: 0.89782229143472, iteration: 408308
loss: 0.994176983833313,grad_norm: 0.703438342141641, iteration: 408309
loss: 0.9948055148124695,grad_norm: 0.8345058411718015, iteration: 408310
loss: 0.9926645159721375,grad_norm: 0.7505061188700418, iteration: 408311
loss: 1.0013023614883423,grad_norm: 0.7542442018940944, iteration: 408312
loss: 1.0581566095352173,grad_norm: 0.9437756897605801, iteration: 408313
loss: 0.9851177334785461,grad_norm: 0.8769004381851093, iteration: 408314
loss: 1.0199384689331055,grad_norm: 0.9124965832840721, iteration: 408315
loss: 1.0057201385498047,grad_norm: 0.8039673551901677, iteration: 408316
loss: 1.0100234746932983,grad_norm: 0.9664201483304051, iteration: 408317
loss: 1.0359057188034058,grad_norm: 0.8488138770427395, iteration: 408318
loss: 0.9771600961685181,grad_norm: 0.7277885652525291, iteration: 408319
loss: 0.9826807975769043,grad_norm: 0.9186928217482331, iteration: 408320
loss: 1.0075106620788574,grad_norm: 0.8042130471848106, iteration: 408321
loss: 0.997076690196991,grad_norm: 0.751178488780179, iteration: 408322
loss: 0.9899469614028931,grad_norm: 0.6832043153597717, iteration: 408323
loss: 0.9581711888313293,grad_norm: 0.7014191053059208, iteration: 408324
loss: 1.021391749382019,grad_norm: 0.9436715395675961, iteration: 408325
loss: 0.9828487634658813,grad_norm: 0.6481579400733082, iteration: 408326
loss: 1.0135563611984253,grad_norm: 0.6997274308657415, iteration: 408327
loss: 0.9982370138168335,grad_norm: 0.7285397395843686, iteration: 408328
loss: 1.0031267404556274,grad_norm: 0.9999996210218327, iteration: 408329
loss: 0.9992229342460632,grad_norm: 0.9999997084158481, iteration: 408330
loss: 0.9756394028663635,grad_norm: 0.738216540942814, iteration: 408331
loss: 1.0417639017105103,grad_norm: 0.6954787259817058, iteration: 408332
loss: 1.0133514404296875,grad_norm: 0.7743792258871872, iteration: 408333
loss: 0.9682710766792297,grad_norm: 0.8037232940960296, iteration: 408334
loss: 0.9660413861274719,grad_norm: 0.5985072504481538, iteration: 408335
loss: 0.9909765720367432,grad_norm: 0.7578686946265035, iteration: 408336
loss: 0.9950031042098999,grad_norm: 0.7919676980523058, iteration: 408337
loss: 1.0053163766860962,grad_norm: 0.8042991548211085, iteration: 408338
loss: 0.9977338314056396,grad_norm: 0.8055968685053284, iteration: 408339
loss: 1.019560694694519,grad_norm: 0.7835459289448552, iteration: 408340
loss: 1.000627875328064,grad_norm: 0.7415893969101431, iteration: 408341
loss: 0.9458110928535461,grad_norm: 0.7984975723760854, iteration: 408342
loss: 0.9888376593589783,grad_norm: 0.7521302214745135, iteration: 408343
loss: 1.072740077972412,grad_norm: 0.9999998228489578, iteration: 408344
loss: 1.0100637674331665,grad_norm: 0.6942073073885516, iteration: 408345
loss: 1.040435552597046,grad_norm: 0.8604174034611076, iteration: 408346
loss: 1.0207916498184204,grad_norm: 0.9999992730033705, iteration: 408347
loss: 1.0289050340652466,grad_norm: 0.852934078079118, iteration: 408348
loss: 1.045055866241455,grad_norm: 0.9999990480181966, iteration: 408349
loss: 0.956757664680481,grad_norm: 0.7751799931030082, iteration: 408350
loss: 0.9774243235588074,grad_norm: 0.7225339637807571, iteration: 408351
loss: 1.031131625175476,grad_norm: 0.6869207588699456, iteration: 408352
loss: 0.9942765235900879,grad_norm: 0.6571859689611076, iteration: 408353
loss: 1.1598389148712158,grad_norm: 0.9999999875581408, iteration: 408354
loss: 1.0116063356399536,grad_norm: 0.8101141285830508, iteration: 408355
loss: 1.017081618309021,grad_norm: 0.6512451997487453, iteration: 408356
loss: 0.9808736443519592,grad_norm: 0.6874672384347043, iteration: 408357
loss: 0.978210985660553,grad_norm: 0.7442683758917089, iteration: 408358
loss: 1.0298677682876587,grad_norm: 0.8757781303675072, iteration: 408359
loss: 0.9649994373321533,grad_norm: 0.7979549046255777, iteration: 408360
loss: 0.9991282820701599,grad_norm: 0.7358729419592296, iteration: 408361
loss: 1.023840069770813,grad_norm: 0.855671333908814, iteration: 408362
loss: 1.0119318962097168,grad_norm: 0.8158010063103347, iteration: 408363
loss: 0.984167754650116,grad_norm: 0.8574760105513358, iteration: 408364
loss: 0.9955679774284363,grad_norm: 0.7125476376764642, iteration: 408365
loss: 1.0022451877593994,grad_norm: 0.737735557510973, iteration: 408366
loss: 1.0374895334243774,grad_norm: 0.7438179725118388, iteration: 408367
loss: 0.9698581695556641,grad_norm: 0.7960536131346638, iteration: 408368
loss: 1.0883299112319946,grad_norm: 0.9999991112267161, iteration: 408369
loss: 1.0047485828399658,grad_norm: 0.9025689127076557, iteration: 408370
loss: 0.9920908808708191,grad_norm: 0.9999990974309598, iteration: 408371
loss: 0.9921943545341492,grad_norm: 0.7999620262937556, iteration: 408372
loss: 0.9811014533042908,grad_norm: 0.7375202403578995, iteration: 408373
loss: 1.0197120904922485,grad_norm: 0.8122380874313592, iteration: 408374
loss: 0.9860736131668091,grad_norm: 0.9999989760941634, iteration: 408375
loss: 1.0265547037124634,grad_norm: 0.7049700130525032, iteration: 408376
loss: 1.0005683898925781,grad_norm: 0.8344335250568387, iteration: 408377
loss: 0.9984439015388489,grad_norm: 0.815704305434074, iteration: 408378
loss: 0.9958215355873108,grad_norm: 0.9449517240722679, iteration: 408379
loss: 1.0123032331466675,grad_norm: 0.8172267333012878, iteration: 408380
loss: 0.9757524728775024,grad_norm: 0.7943796691412705, iteration: 408381
loss: 0.9776832461357117,grad_norm: 0.8342208820373643, iteration: 408382
loss: 1.0076987743377686,grad_norm: 0.8637576094863071, iteration: 408383
loss: 1.0274028778076172,grad_norm: 0.772711330719058, iteration: 408384
loss: 1.0329580307006836,grad_norm: 0.769430366613021, iteration: 408385
loss: 0.9805750250816345,grad_norm: 0.8055877859968392, iteration: 408386
loss: 0.9852588772773743,grad_norm: 0.894401853307062, iteration: 408387
loss: 0.9910410046577454,grad_norm: 0.8289875663565902, iteration: 408388
loss: 0.9933975338935852,grad_norm: 0.6142950844395509, iteration: 408389
loss: 1.0098416805267334,grad_norm: 0.8467825111294822, iteration: 408390
loss: 0.9911261200904846,grad_norm: 0.7949429323573712, iteration: 408391
loss: 1.0106329917907715,grad_norm: 0.8055833460042269, iteration: 408392
loss: 0.9919185042381287,grad_norm: 0.7763980273590878, iteration: 408393
loss: 1.0241090059280396,grad_norm: 0.8542735914603009, iteration: 408394
loss: 0.979722261428833,grad_norm: 0.7628597695807358, iteration: 408395
loss: 0.9851732850074768,grad_norm: 0.7980900971189678, iteration: 408396
loss: 1.010318636894226,grad_norm: 0.6836116292057324, iteration: 408397
loss: 1.0152347087860107,grad_norm: 0.7338854047132203, iteration: 408398
loss: 0.9872077107429504,grad_norm: 0.9999989862406672, iteration: 408399
loss: 1.0212613344192505,grad_norm: 0.7395142676343865, iteration: 408400
loss: 1.0168856382369995,grad_norm: 0.9999998357426532, iteration: 408401
loss: 0.9729759693145752,grad_norm: 0.8733524545965545, iteration: 408402
loss: 0.991449773311615,grad_norm: 0.725551439073957, iteration: 408403
loss: 0.9756680130958557,grad_norm: 0.7415271402101976, iteration: 408404
loss: 1.018363118171692,grad_norm: 0.8187810909951845, iteration: 408405
loss: 1.0728192329406738,grad_norm: 0.9999996601877927, iteration: 408406
loss: 1.0608950853347778,grad_norm: 0.8447396944303075, iteration: 408407
loss: 1.0147242546081543,grad_norm: 0.9036080643931975, iteration: 408408
loss: 1.002123475074768,grad_norm: 0.8969703702365841, iteration: 408409
loss: 1.0058560371398926,grad_norm: 0.8052321947880227, iteration: 408410
loss: 0.9821667075157166,grad_norm: 0.804957211145707, iteration: 408411
loss: 0.9820972084999084,grad_norm: 0.7344578402912361, iteration: 408412
loss: 0.9901312589645386,grad_norm: 0.9143842346222446, iteration: 408413
loss: 1.0052047967910767,grad_norm: 0.8257446281730662, iteration: 408414
loss: 1.029934287071228,grad_norm: 0.8270309654304052, iteration: 408415
loss: 1.030251145362854,grad_norm: 0.673517149865217, iteration: 408416
loss: 1.0188206434249878,grad_norm: 0.8131653497796735, iteration: 408417
loss: 0.9800058007240295,grad_norm: 0.7296094675975688, iteration: 408418
loss: 1.0742087364196777,grad_norm: 0.9999998370897949, iteration: 408419
loss: 0.9829277396202087,grad_norm: 0.7850830182120994, iteration: 408420
loss: 1.0218397378921509,grad_norm: 0.6786772030017062, iteration: 408421
loss: 0.991009533405304,grad_norm: 0.7400926769357677, iteration: 408422
loss: 0.9803745150566101,grad_norm: 0.7701958438565856, iteration: 408423
loss: 1.0094411373138428,grad_norm: 0.7462654463556759, iteration: 408424
loss: 1.03387451171875,grad_norm: 0.9794926610015863, iteration: 408425
loss: 0.9816787838935852,grad_norm: 0.8904979175349811, iteration: 408426
loss: 1.0201194286346436,grad_norm: 0.7196102984496773, iteration: 408427
loss: 1.02295982837677,grad_norm: 0.7849327500118629, iteration: 408428
loss: 1.0122979879379272,grad_norm: 0.7475564224767622, iteration: 408429
loss: 0.9641679525375366,grad_norm: 0.8165356222503882, iteration: 408430
loss: 1.0061230659484863,grad_norm: 0.7936370400907823, iteration: 408431
loss: 0.9968380331993103,grad_norm: 0.6616028641077762, iteration: 408432
loss: 1.0192404985427856,grad_norm: 0.6779013515274306, iteration: 408433
loss: 0.9703130125999451,grad_norm: 0.8694568412152368, iteration: 408434
loss: 1.0156599283218384,grad_norm: 0.9157378193178722, iteration: 408435
loss: 1.0030971765518188,grad_norm: 0.8909245386086965, iteration: 408436
loss: 1.140053153038025,grad_norm: 0.9999995522468276, iteration: 408437
loss: 1.040748119354248,grad_norm: 0.6499749476120975, iteration: 408438
loss: 1.0016268491744995,grad_norm: 0.7186745173157943, iteration: 408439
loss: 1.0424283742904663,grad_norm: 0.9999992059654074, iteration: 408440
loss: 0.9970511198043823,grad_norm: 0.9999993632067462, iteration: 408441
loss: 1.0157591104507446,grad_norm: 0.9999991353519829, iteration: 408442
loss: 1.0789811611175537,grad_norm: 0.9383844729846246, iteration: 408443
loss: 1.0074986219406128,grad_norm: 0.7647150268391313, iteration: 408444
loss: 1.1413921117782593,grad_norm: 0.9999994022738132, iteration: 408445
loss: 1.007901668548584,grad_norm: 0.9391489389621294, iteration: 408446
loss: 1.0194612741470337,grad_norm: 0.7525052979118031, iteration: 408447
loss: 1.0322785377502441,grad_norm: 0.8652638969014558, iteration: 408448
loss: 1.011649489402771,grad_norm: 0.7422559223783686, iteration: 408449
loss: 0.9981428384780884,grad_norm: 0.7469073853219576, iteration: 408450
loss: 0.9884876012802124,grad_norm: 0.9999991053617354, iteration: 408451
loss: 0.9975982904434204,grad_norm: 0.8450826353841566, iteration: 408452
loss: 0.9910545945167542,grad_norm: 0.7670720167450819, iteration: 408453
loss: 1.0032740831375122,grad_norm: 0.8732320105780607, iteration: 408454
loss: 0.9786394238471985,grad_norm: 0.7922137046818891, iteration: 408455
loss: 0.9930176734924316,grad_norm: 0.775302473971174, iteration: 408456
loss: 0.9884095191955566,grad_norm: 0.7030749625334636, iteration: 408457
loss: 0.969412624835968,grad_norm: 0.7984549363051814, iteration: 408458
loss: 1.0028986930847168,grad_norm: 0.8820278151561682, iteration: 408459
loss: 1.013458013534546,grad_norm: 0.869259291926699, iteration: 408460
loss: 0.9671755433082581,grad_norm: 0.6704457616638292, iteration: 408461
loss: 0.9795706868171692,grad_norm: 0.9271539403309398, iteration: 408462
loss: 1.0069228410720825,grad_norm: 0.7681581295284775, iteration: 408463
loss: 0.959331750869751,grad_norm: 0.8053864328417948, iteration: 408464
loss: 1.010469675064087,grad_norm: 0.9335168655488287, iteration: 408465
loss: 0.9957767724990845,grad_norm: 0.9038002568787535, iteration: 408466
loss: 0.9692205190658569,grad_norm: 0.9069769917416819, iteration: 408467
loss: 1.1505444049835205,grad_norm: 0.9999997094476237, iteration: 408468
loss: 0.9674794673919678,grad_norm: 0.7642098798966521, iteration: 408469
loss: 1.0211294889450073,grad_norm: 0.7241047970879159, iteration: 408470
loss: 1.0000923871994019,grad_norm: 0.9268891085163716, iteration: 408471
loss: 1.0165163278579712,grad_norm: 0.8377051559804067, iteration: 408472
loss: 0.9886620044708252,grad_norm: 0.8437489704408239, iteration: 408473
loss: 0.9830154776573181,grad_norm: 0.8425172963555746, iteration: 408474
loss: 0.9927866458892822,grad_norm: 0.8266619233226794, iteration: 408475
loss: 0.9955642223358154,grad_norm: 0.7368946596682896, iteration: 408476
loss: 1.0262250900268555,grad_norm: 0.9047932591601721, iteration: 408477
loss: 0.9804231524467468,grad_norm: 0.7608908115993543, iteration: 408478
loss: 1.0143582820892334,grad_norm: 0.8631747167134816, iteration: 408479
loss: 0.9512252807617188,grad_norm: 0.75501917003382, iteration: 408480
loss: 0.996738851070404,grad_norm: 0.8002108362552594, iteration: 408481
loss: 1.0173536539077759,grad_norm: 0.9871220206804833, iteration: 408482
loss: 1.0190012454986572,grad_norm: 0.7744537720130987, iteration: 408483
loss: 0.988079845905304,grad_norm: 0.7375324506034873, iteration: 408484
loss: 1.0069332122802734,grad_norm: 0.8080811706886847, iteration: 408485
loss: 0.9976012706756592,grad_norm: 0.8058863283755738, iteration: 408486
loss: 1.0190716981887817,grad_norm: 0.7330074787871736, iteration: 408487
loss: 1.0089943408966064,grad_norm: 0.8225645135370252, iteration: 408488
loss: 0.9742441773414612,grad_norm: 0.7640073283399802, iteration: 408489
loss: 0.9869369268417358,grad_norm: 0.6554215634348313, iteration: 408490
loss: 0.9864832162857056,grad_norm: 0.7370918544774713, iteration: 408491
loss: 0.9609566330909729,grad_norm: 0.7355664368808855, iteration: 408492
loss: 1.0250868797302246,grad_norm: 0.9190479195692349, iteration: 408493
loss: 0.9854262471199036,grad_norm: 0.7342754883027731, iteration: 408494
loss: 1.0048867464065552,grad_norm: 0.7329208973547564, iteration: 408495
loss: 0.9583336710929871,grad_norm: 0.7225153804435859, iteration: 408496
loss: 1.0071747303009033,grad_norm: 0.8983850381468106, iteration: 408497
loss: 0.9947009682655334,grad_norm: 0.736074323618427, iteration: 408498
loss: 1.0101709365844727,grad_norm: 0.6048845639805303, iteration: 408499
loss: 1.0282007455825806,grad_norm: 0.7300450061546578, iteration: 408500
loss: 0.9451017379760742,grad_norm: 0.7559473927356951, iteration: 408501
loss: 0.999184787273407,grad_norm: 0.9999990409932568, iteration: 408502
loss: 0.9841054081916809,grad_norm: 0.7097841444099574, iteration: 408503
loss: 1.021017074584961,grad_norm: 0.9344027069187162, iteration: 408504
loss: 0.9960629940032959,grad_norm: 0.7777412937335856, iteration: 408505
loss: 0.9551390409469604,grad_norm: 0.9928891319806044, iteration: 408506
loss: 0.9837583899497986,grad_norm: 0.7744341293742637, iteration: 408507
loss: 1.0100374221801758,grad_norm: 0.8384471238116111, iteration: 408508
loss: 0.984821617603302,grad_norm: 0.6820573013682797, iteration: 408509
loss: 1.007970929145813,grad_norm: 0.7270156592368572, iteration: 408510
loss: 0.9828116297721863,grad_norm: 0.8154483799077348, iteration: 408511
loss: 0.9949160218238831,grad_norm: 0.694029645362749, iteration: 408512
loss: 0.9375930428504944,grad_norm: 0.9065808651403304, iteration: 408513
loss: 0.9853819012641907,grad_norm: 0.888170613844901, iteration: 408514
loss: 0.9988259673118591,grad_norm: 0.9999992478717281, iteration: 408515
loss: 0.9950109720230103,grad_norm: 0.6552461148376801, iteration: 408516
loss: 1.0213602781295776,grad_norm: 0.8671903108992947, iteration: 408517
loss: 1.0075840950012207,grad_norm: 0.7228009109775865, iteration: 408518
loss: 1.0149641036987305,grad_norm: 0.8741163456005777, iteration: 408519
loss: 1.0204044580459595,grad_norm: 0.9999992595092898, iteration: 408520
loss: 0.9832291007041931,grad_norm: 0.7891472460505413, iteration: 408521
loss: 1.0058584213256836,grad_norm: 0.6703298364102321, iteration: 408522
loss: 1.0102620124816895,grad_norm: 0.8007075618232561, iteration: 408523
loss: 0.9553577303886414,grad_norm: 0.8040486455215964, iteration: 408524
loss: 0.9863467216491699,grad_norm: 0.7208601945331109, iteration: 408525
loss: 0.9891389012336731,grad_norm: 0.8804107974157919, iteration: 408526
loss: 1.019423007965088,grad_norm: 0.9999991812569955, iteration: 408527
loss: 0.9473727345466614,grad_norm: 0.8044548576241555, iteration: 408528
loss: 0.9847835302352905,grad_norm: 0.7562810832514621, iteration: 408529
loss: 1.0409526824951172,grad_norm: 0.7346749485450709, iteration: 408530
loss: 0.9900745153427124,grad_norm: 0.8464969744695813, iteration: 408531
loss: 1.007351279258728,grad_norm: 0.6939616608133946, iteration: 408532
loss: 0.9996314644813538,grad_norm: 0.7782467705344602, iteration: 408533
loss: 1.069833517074585,grad_norm: 0.9957892834703851, iteration: 408534
loss: 0.966325581073761,grad_norm: 0.8729056373969694, iteration: 408535
loss: 1.010390281677246,grad_norm: 0.838466624012458, iteration: 408536
loss: 0.9786195158958435,grad_norm: 0.795992390633624, iteration: 408537
loss: 1.002079963684082,grad_norm: 0.8338384487017564, iteration: 408538
loss: 0.9900176525115967,grad_norm: 0.7935890465633201, iteration: 408539
loss: 1.0077675580978394,grad_norm: 0.6951258937631467, iteration: 408540
loss: 1.0652856826782227,grad_norm: 0.9953381388015797, iteration: 408541
loss: 0.9936008453369141,grad_norm: 0.8097209106261073, iteration: 408542
loss: 1.029892086982727,grad_norm: 0.9166657575327138, iteration: 408543
loss: 1.0034009218215942,grad_norm: 0.7813338018084139, iteration: 408544
loss: 1.0106377601623535,grad_norm: 0.7149690276865558, iteration: 408545
loss: 0.9961369037628174,grad_norm: 0.8198003366225557, iteration: 408546
loss: 1.0031036138534546,grad_norm: 0.9491811413696359, iteration: 408547
loss: 1.0154259204864502,grad_norm: 0.7911078794151544, iteration: 408548
loss: 0.9998360276222229,grad_norm: 0.867024996331612, iteration: 408549
loss: 1.0156720876693726,grad_norm: 0.6543765123551821, iteration: 408550
loss: 1.0273598432540894,grad_norm: 0.9999992996442041, iteration: 408551
loss: 1.001160979270935,grad_norm: 0.7112811667090141, iteration: 408552
loss: 0.9988692998886108,grad_norm: 0.7420928552986166, iteration: 408553
loss: 1.0065685510635376,grad_norm: 0.9920514236756381, iteration: 408554
loss: 1.0679751634597778,grad_norm: 0.9691228563262662, iteration: 408555
loss: 1.0197709798812866,grad_norm: 0.9816257649365768, iteration: 408556
loss: 0.9748431444168091,grad_norm: 0.7351205370176283, iteration: 408557
loss: 0.9996415972709656,grad_norm: 0.8370751844873846, iteration: 408558
loss: 1.0071308612823486,grad_norm: 0.6936123887117365, iteration: 408559
loss: 0.9830135107040405,grad_norm: 0.8493322891147258, iteration: 408560
loss: 1.0009758472442627,grad_norm: 0.7403908281817967, iteration: 408561
loss: 1.0957037210464478,grad_norm: 0.999999691269309, iteration: 408562
loss: 0.9803404211997986,grad_norm: 0.7790208232619796, iteration: 408563
loss: 0.987491250038147,grad_norm: 0.674137882202605, iteration: 408564
loss: 1.0066354274749756,grad_norm: 0.7206277221248796, iteration: 408565
loss: 0.9865584969520569,grad_norm: 0.8976389317222435, iteration: 408566
loss: 1.0108147859573364,grad_norm: 0.9188743910167155, iteration: 408567
loss: 0.9802305102348328,grad_norm: 0.862762645781881, iteration: 408568
loss: 0.9814164638519287,grad_norm: 0.754670748491454, iteration: 408569
loss: 0.9850208759307861,grad_norm: 0.6989526325978128, iteration: 408570
loss: 1.0307508707046509,grad_norm: 0.9999990304337774, iteration: 408571
loss: 0.9852609634399414,grad_norm: 0.9999990906214273, iteration: 408572
loss: 1.0149669647216797,grad_norm: 0.8971480381207566, iteration: 408573
loss: 1.0312838554382324,grad_norm: 0.7299045592643625, iteration: 408574
loss: 1.0011118650436401,grad_norm: 0.7702508179222052, iteration: 408575
loss: 0.9998148083686829,grad_norm: 0.8785275207887233, iteration: 408576
loss: 1.0785514116287231,grad_norm: 0.999999081948086, iteration: 408577
loss: 1.074074149131775,grad_norm: 0.9999994164707416, iteration: 408578
loss: 1.102297067642212,grad_norm: 0.9999998101930048, iteration: 408579
loss: 1.0256413221359253,grad_norm: 0.7984398894765209, iteration: 408580
loss: 1.0194796323776245,grad_norm: 0.6357705054337996, iteration: 408581
loss: 0.9910383820533752,grad_norm: 0.7336624735651278, iteration: 408582
loss: 1.0254645347595215,grad_norm: 0.6934993651039807, iteration: 408583
loss: 1.001937985420227,grad_norm: 0.9177154691802716, iteration: 408584
loss: 0.9903055429458618,grad_norm: 0.7467672108684512, iteration: 408585
loss: 0.9674782752990723,grad_norm: 0.7647834567797287, iteration: 408586
loss: 1.0674943923950195,grad_norm: 0.8888834372349843, iteration: 408587
loss: 1.0053716897964478,grad_norm: 0.9765742700902005, iteration: 408588
loss: 0.9986602663993835,grad_norm: 0.6863611776674752, iteration: 408589
loss: 0.9650469422340393,grad_norm: 0.8246058195052632, iteration: 408590
loss: 0.9918586611747742,grad_norm: 0.8157799572113424, iteration: 408591
loss: 0.9999725818634033,grad_norm: 0.9999998286647683, iteration: 408592
loss: 0.9649556279182434,grad_norm: 0.7625799715456676, iteration: 408593
loss: 0.9607588052749634,grad_norm: 0.7765251069499086, iteration: 408594
loss: 0.9517481923103333,grad_norm: 0.7477361241726773, iteration: 408595
loss: 0.97991943359375,grad_norm: 0.9999991009704406, iteration: 408596
loss: 0.9995483160018921,grad_norm: 0.8412419172560687, iteration: 408597
loss: 0.9816621541976929,grad_norm: 0.7863787917815167, iteration: 408598
loss: 1.0056591033935547,grad_norm: 0.7965664414654217, iteration: 408599
loss: 1.031785488128662,grad_norm: 0.9999993688730139, iteration: 408600
loss: 0.9762055277824402,grad_norm: 0.9485370414994897, iteration: 408601
loss: 0.978936493396759,grad_norm: 0.8001514099999792, iteration: 408602
loss: 0.9963287711143494,grad_norm: 0.7682741588605768, iteration: 408603
loss: 1.0161226987838745,grad_norm: 0.7925545212418003, iteration: 408604
loss: 1.0439380407333374,grad_norm: 0.7760336296680873, iteration: 408605
loss: 1.0101139545440674,grad_norm: 0.9192535290669788, iteration: 408606
loss: 0.9842555522918701,grad_norm: 0.8716450831037643, iteration: 408607
loss: 1.009347677230835,grad_norm: 0.8082603681568938, iteration: 408608
loss: 1.0097274780273438,grad_norm: 0.6839086036844307, iteration: 408609
loss: 0.9992559552192688,grad_norm: 0.7376248201446539, iteration: 408610
loss: 0.9840191006660461,grad_norm: 0.7879195398788673, iteration: 408611
loss: 1.007541537284851,grad_norm: 0.8090690083418672, iteration: 408612
loss: 0.9986357092857361,grad_norm: 0.9550571347064409, iteration: 408613
loss: 0.9727373123168945,grad_norm: 0.6864605370161619, iteration: 408614
loss: 1.0209767818450928,grad_norm: 0.9999990499762773, iteration: 408615
loss: 1.0047670602798462,grad_norm: 0.8044684225022526, iteration: 408616
loss: 1.020412802696228,grad_norm: 0.8694218943813306, iteration: 408617
loss: 1.0111970901489258,grad_norm: 0.7303627977465521, iteration: 408618
loss: 0.9858105182647705,grad_norm: 0.7950150197739285, iteration: 408619
loss: 1.0034843683242798,grad_norm: 0.8488145412964839, iteration: 408620
loss: 1.02031672000885,grad_norm: 0.726680031447725, iteration: 408621
loss: 1.0090358257293701,grad_norm: 0.9085430065538798, iteration: 408622
loss: 0.9889870285987854,grad_norm: 0.8318874613870877, iteration: 408623
loss: 0.9791496396064758,grad_norm: 0.9156547474933032, iteration: 408624
loss: 1.0290228128433228,grad_norm: 0.7846032425292729, iteration: 408625
loss: 0.9838813543319702,grad_norm: 0.8798118486442901, iteration: 408626
loss: 0.9561618566513062,grad_norm: 0.7326962908854426, iteration: 408627
loss: 1.0106838941574097,grad_norm: 0.8462328924350458, iteration: 408628
loss: 1.0071288347244263,grad_norm: 0.7009457334912884, iteration: 408629
loss: 1.0684767961502075,grad_norm: 0.9999990758454089, iteration: 408630
loss: 0.9786404371261597,grad_norm: 0.999999285498259, iteration: 408631
loss: 1.016556739807129,grad_norm: 0.7456092605963716, iteration: 408632
loss: 0.9333855509757996,grad_norm: 0.8099298859703444, iteration: 408633
loss: 0.9924867153167725,grad_norm: 0.8324655761622872, iteration: 408634
loss: 1.0103936195373535,grad_norm: 0.99999923983325, iteration: 408635
loss: 1.015367031097412,grad_norm: 0.8004224998650682, iteration: 408636
loss: 0.9792127013206482,grad_norm: 0.8064539965896449, iteration: 408637
loss: 1.0062142610549927,grad_norm: 0.7937403268362339, iteration: 408638
loss: 1.05498206615448,grad_norm: 0.8471995245250953, iteration: 408639
loss: 0.9731499552726746,grad_norm: 0.9999991506913655, iteration: 408640
loss: 1.0013552904129028,grad_norm: 0.7020366110014108, iteration: 408641
loss: 0.9735930562019348,grad_norm: 0.8357498020699797, iteration: 408642
loss: 0.968909502029419,grad_norm: 0.751334247916831, iteration: 408643
loss: 1.0164985656738281,grad_norm: 0.6607346449982711, iteration: 408644
loss: 1.0141011476516724,grad_norm: 0.7536402750406038, iteration: 408645
loss: 0.9490084052085876,grad_norm: 0.9054809004425491, iteration: 408646
loss: 0.9698525071144104,grad_norm: 0.8167593655641754, iteration: 408647
loss: 1.0842797756195068,grad_norm: 0.8059891831520215, iteration: 408648
loss: 1.0086780786514282,grad_norm: 0.7331944266621288, iteration: 408649
loss: 0.9778980612754822,grad_norm: 0.816410852996658, iteration: 408650
loss: 0.9873818159103394,grad_norm: 0.8674633067381337, iteration: 408651
loss: 0.9637596011161804,grad_norm: 0.7072220159234663, iteration: 408652
loss: 1.014445424079895,grad_norm: 0.6923702510641305, iteration: 408653
loss: 0.979809045791626,grad_norm: 0.8127564171025402, iteration: 408654
loss: 0.9950860142707825,grad_norm: 0.737532184160783, iteration: 408655
loss: 0.9909818172454834,grad_norm: 0.8272658495641714, iteration: 408656
loss: 1.0181617736816406,grad_norm: 0.7563085391738661, iteration: 408657
loss: 0.988361120223999,grad_norm: 0.9999989256415293, iteration: 408658
loss: 1.0097129344940186,grad_norm: 0.802836460627367, iteration: 408659
loss: 1.0351370573043823,grad_norm: 0.7805211731716939, iteration: 408660
loss: 1.0469783544540405,grad_norm: 0.9626100846338581, iteration: 408661
loss: 0.9619449377059937,grad_norm: 0.8242534913498653, iteration: 408662
loss: 1.001681923866272,grad_norm: 0.6615808364151246, iteration: 408663
loss: 0.9634724855422974,grad_norm: 0.8995913183081218, iteration: 408664
loss: 0.996364951133728,grad_norm: 0.7228676530623, iteration: 408665
loss: 0.9926079511642456,grad_norm: 0.738671602409196, iteration: 408666
loss: 0.9969545006752014,grad_norm: 0.8281751461450416, iteration: 408667
loss: 0.9717420935630798,grad_norm: 0.7593811491696973, iteration: 408668
loss: 1.0040326118469238,grad_norm: 0.7721618135429701, iteration: 408669
loss: 1.0410479307174683,grad_norm: 0.8167312615073136, iteration: 408670
loss: 1.0114837884902954,grad_norm: 0.7134569678226677, iteration: 408671
loss: 0.9893324971199036,grad_norm: 0.7761191299199807, iteration: 408672
loss: 1.073331594467163,grad_norm: 0.9999995870848148, iteration: 408673
loss: 1.0116102695465088,grad_norm: 0.7660503187774719, iteration: 408674
loss: 1.0205520391464233,grad_norm: 0.9999990127795749, iteration: 408675
loss: 0.9735722541809082,grad_norm: 0.8054556309141465, iteration: 408676
loss: 0.9792324900627136,grad_norm: 0.869822925669315, iteration: 408677
loss: 0.9722644090652466,grad_norm: 0.8869139068593924, iteration: 408678
loss: 1.0733431577682495,grad_norm: 0.9999994338327254, iteration: 408679
loss: 0.9776140451431274,grad_norm: 0.857207224412795, iteration: 408680
loss: 0.96919184923172,grad_norm: 0.8923222073958895, iteration: 408681
loss: 0.9986112117767334,grad_norm: 0.7984921308131508, iteration: 408682
loss: 0.9884597659111023,grad_norm: 0.7938518212633423, iteration: 408683
loss: 1.013260006904602,grad_norm: 0.999999816921945, iteration: 408684
loss: 1.0060877799987793,grad_norm: 0.7911839956529426, iteration: 408685
loss: 0.9403969049453735,grad_norm: 0.8495311346572186, iteration: 408686
loss: 1.0631184577941895,grad_norm: 0.9835074476474692, iteration: 408687
loss: 1.0713186264038086,grad_norm: 0.8271112706924056, iteration: 408688
loss: 1.0302319526672363,grad_norm: 0.8358341894809774, iteration: 408689
loss: 1.0061951875686646,grad_norm: 0.7413672416077955, iteration: 408690
loss: 1.0109539031982422,grad_norm: 0.9999991318050284, iteration: 408691
loss: 1.028218150138855,grad_norm: 0.7365703051413115, iteration: 408692
loss: 1.019486427307129,grad_norm: 0.8803892884278484, iteration: 408693
loss: 1.0158430337905884,grad_norm: 0.8750156334472204, iteration: 408694
loss: 0.9974274039268494,grad_norm: 0.7585562275498313, iteration: 408695
loss: 0.9916462898254395,grad_norm: 0.8737638926341772, iteration: 408696
loss: 1.0348659753799438,grad_norm: 0.7101638574938641, iteration: 408697
loss: 0.9933595061302185,grad_norm: 0.7413972345235227, iteration: 408698
loss: 1.0496110916137695,grad_norm: 0.9243894060205172, iteration: 408699
loss: 1.0171347856521606,grad_norm: 0.9229253303667689, iteration: 408700
loss: 1.0332167148590088,grad_norm: 0.8063923458209169, iteration: 408701
loss: 1.0210388898849487,grad_norm: 0.9999999109794971, iteration: 408702
loss: 1.0073215961456299,grad_norm: 0.6995806102600866, iteration: 408703
loss: 0.9825670123100281,grad_norm: 0.9009985331558347, iteration: 408704
loss: 0.9988831281661987,grad_norm: 0.7765234070592668, iteration: 408705
loss: 1.078674077987671,grad_norm: 0.999999153208124, iteration: 408706
loss: 0.984872579574585,grad_norm: 0.8760107006250532, iteration: 408707
loss: 0.9863412976264954,grad_norm: 0.7175932485364072, iteration: 408708
loss: 0.9720218181610107,grad_norm: 0.7324527790433012, iteration: 408709
loss: 0.9969116449356079,grad_norm: 0.8113960585928685, iteration: 408710
loss: 1.0371960401535034,grad_norm: 0.9999993012272942, iteration: 408711
loss: 1.0043483972549438,grad_norm: 0.7606003285212212, iteration: 408712
loss: 1.0289225578308105,grad_norm: 0.9999990088972155, iteration: 408713
loss: 0.9803869724273682,grad_norm: 0.9816030923496787, iteration: 408714
loss: 1.0212260484695435,grad_norm: 0.7207876286065753, iteration: 408715
loss: 0.9930524230003357,grad_norm: 0.7425703104612358, iteration: 408716
loss: 1.0072659254074097,grad_norm: 0.8707987588152325, iteration: 408717
loss: 0.9802826046943665,grad_norm: 0.8081744678731181, iteration: 408718
loss: 1.0151344537734985,grad_norm: 0.9074531998143931, iteration: 408719
loss: 0.9638090133666992,grad_norm: 0.8657801201149155, iteration: 408720
loss: 1.0211573839187622,grad_norm: 0.8546700259290488, iteration: 408721
loss: 0.9890314936637878,grad_norm: 0.8199546020282422, iteration: 408722
loss: 0.9996092319488525,grad_norm: 0.8903238505098803, iteration: 408723
loss: 1.0228809118270874,grad_norm: 0.9999991464052846, iteration: 408724
loss: 1.0355232954025269,grad_norm: 0.6658373612943737, iteration: 408725
loss: 1.0019172430038452,grad_norm: 0.9510259282219283, iteration: 408726
loss: 1.0295462608337402,grad_norm: 0.9999994835974572, iteration: 408727
loss: 1.0046261548995972,grad_norm: 0.6813051954141215, iteration: 408728
loss: 1.0043666362762451,grad_norm: 0.6677844310207592, iteration: 408729
loss: 1.0419522523880005,grad_norm: 0.9999992257508697, iteration: 408730
loss: 0.9927247762680054,grad_norm: 0.7949963580144412, iteration: 408731
loss: 1.0244829654693604,grad_norm: 0.7975440612848751, iteration: 408732
loss: 0.980290412902832,grad_norm: 0.7183126104319778, iteration: 408733
loss: 1.0134333372116089,grad_norm: 0.8047312281063306, iteration: 408734
loss: 0.972273051738739,grad_norm: 0.8421960264735505, iteration: 408735
loss: 1.0286482572555542,grad_norm: 0.83686346698047, iteration: 408736
loss: 1.0136522054672241,grad_norm: 0.8792085872401708, iteration: 408737
loss: 0.9951759576797485,grad_norm: 0.6785171344471894, iteration: 408738
loss: 1.0210553407669067,grad_norm: 0.7367384513260626, iteration: 408739
loss: 1.1857774257659912,grad_norm: 0.9999996360975553, iteration: 408740
loss: 0.9935628771781921,grad_norm: 0.7568511704534994, iteration: 408741
loss: 1.0023335218429565,grad_norm: 0.7474039249374442, iteration: 408742
loss: 0.9883876442909241,grad_norm: 0.8639363454542494, iteration: 408743
loss: 1.0194354057312012,grad_norm: 0.7201145187496674, iteration: 408744
loss: 0.9944630861282349,grad_norm: 0.8382174160392358, iteration: 408745
loss: 1.011905312538147,grad_norm: 0.7437907260650313, iteration: 408746
loss: 1.0641961097717285,grad_norm: 0.9999999316709394, iteration: 408747
loss: 0.9806923270225525,grad_norm: 0.7888863222364815, iteration: 408748
loss: 0.9840858578681946,grad_norm: 0.7350601078391409, iteration: 408749
loss: 1.0127283334732056,grad_norm: 0.9999991117481734, iteration: 408750
loss: 0.9649165272712708,grad_norm: 0.8046179059293127, iteration: 408751
loss: 0.9765331149101257,grad_norm: 0.9999989385720905, iteration: 408752
loss: 1.0050020217895508,grad_norm: 0.7168775306299611, iteration: 408753
loss: 0.9931771755218506,grad_norm: 0.7411853373925388, iteration: 408754
loss: 0.9923714995384216,grad_norm: 0.9139402260880233, iteration: 408755
loss: 0.9940544962882996,grad_norm: 0.6244111624858469, iteration: 408756
loss: 1.0044559240341187,grad_norm: 0.5748594102242471, iteration: 408757
loss: 1.0552353858947754,grad_norm: 0.8545665742675309, iteration: 408758
loss: 1.0760425329208374,grad_norm: 0.7873159725550916, iteration: 408759
loss: 1.0265036821365356,grad_norm: 0.8140637469499624, iteration: 408760
loss: 1.0473651885986328,grad_norm: 0.992315405311856, iteration: 408761
loss: 0.997844398021698,grad_norm: 0.7478048065534973, iteration: 408762
loss: 1.0200713872909546,grad_norm: 0.9243737693569792, iteration: 408763
loss: 1.0065643787384033,grad_norm: 0.9999995676645236, iteration: 408764
loss: 1.0263296365737915,grad_norm: 0.7630980229644928, iteration: 408765
loss: 1.0357714891433716,grad_norm: 0.6518171819544215, iteration: 408766
loss: 0.9849741458892822,grad_norm: 0.7801037712060922, iteration: 408767
loss: 1.011193037033081,grad_norm: 0.8983336541608001, iteration: 408768
loss: 1.0120691061019897,grad_norm: 0.747748892379758, iteration: 408769
loss: 0.9885769486427307,grad_norm: 0.6678799358634322, iteration: 408770
loss: 0.985527753829956,grad_norm: 0.7584686040083871, iteration: 408771
loss: 1.0241446495056152,grad_norm: 0.7361475902566564, iteration: 408772
loss: 0.9755580425262451,grad_norm: 0.8155075986382118, iteration: 408773
loss: 0.9885019659996033,grad_norm: 0.8747717093452992, iteration: 408774
loss: 1.03754460811615,grad_norm: 0.999999236729341, iteration: 408775
loss: 1.0099016427993774,grad_norm: 0.7943246234744358, iteration: 408776
loss: 1.0356541872024536,grad_norm: 0.8500614680252776, iteration: 408777
loss: 1.001121997833252,grad_norm: 0.8708812198813148, iteration: 408778
loss: 1.015140175819397,grad_norm: 0.807435985043472, iteration: 408779
loss: 0.9926354289054871,grad_norm: 0.9999992162109744, iteration: 408780
loss: 1.018144965171814,grad_norm: 0.923969336745198, iteration: 408781
loss: 1.034880518913269,grad_norm: 0.8362708691467903, iteration: 408782
loss: 1.044784665107727,grad_norm: 0.9999996530290709, iteration: 408783
loss: 1.017704725265503,grad_norm: 0.7505421351541497, iteration: 408784
loss: 1.0371378660202026,grad_norm: 0.9999991004227428, iteration: 408785
loss: 1.0023486614227295,grad_norm: 0.7689943768951113, iteration: 408786
loss: 1.026667594909668,grad_norm: 0.8798623539412823, iteration: 408787
loss: 0.9929284453392029,grad_norm: 0.5967141969360339, iteration: 408788
loss: 1.0310158729553223,grad_norm: 0.9975876729888652, iteration: 408789
loss: 1.1079849004745483,grad_norm: 0.9999991989019046, iteration: 408790
loss: 1.0218102931976318,grad_norm: 0.7904403177047362, iteration: 408791
loss: 0.987570583820343,grad_norm: 0.7789587804959703, iteration: 408792
loss: 1.053091287612915,grad_norm: 0.9999993093888841, iteration: 408793
loss: 0.9886491298675537,grad_norm: 0.8014299488471821, iteration: 408794
loss: 1.028340458869934,grad_norm: 0.7109326732930301, iteration: 408795
loss: 1.0384891033172607,grad_norm: 0.9999993097360591, iteration: 408796
loss: 1.1017159223556519,grad_norm: 0.85728213195804, iteration: 408797
loss: 1.0281294584274292,grad_norm: 0.8046267426129031, iteration: 408798
loss: 0.9822413921356201,grad_norm: 0.769896287828487, iteration: 408799
loss: 0.9965018033981323,grad_norm: 0.8072708838717414, iteration: 408800
loss: 1.0145585536956787,grad_norm: 0.8947803324506562, iteration: 408801
loss: 0.9640212655067444,grad_norm: 0.7715248932172231, iteration: 408802
loss: 0.9956178069114685,grad_norm: 0.8500918901271555, iteration: 408803
loss: 1.0441418886184692,grad_norm: 0.9441614498897187, iteration: 408804
loss: 0.9707496166229248,grad_norm: 0.8710148252665948, iteration: 408805
loss: 0.9881316423416138,grad_norm: 0.7899573900603531, iteration: 408806
loss: 1.0044173002243042,grad_norm: 0.9853407344303894, iteration: 408807
loss: 0.9939684271812439,grad_norm: 0.7789147005943987, iteration: 408808
loss: 0.9933235049247742,grad_norm: 0.9999991525662754, iteration: 408809
loss: 0.9862762093544006,grad_norm: 0.6637769241827548, iteration: 408810
loss: 1.055293083190918,grad_norm: 0.7320321484907097, iteration: 408811
loss: 0.9808005690574646,grad_norm: 0.7654819957608843, iteration: 408812
loss: 1.057019829750061,grad_norm: 0.895349300183803, iteration: 408813
loss: 0.9885159730911255,grad_norm: 0.8645850077784181, iteration: 408814
loss: 1.0121594667434692,grad_norm: 0.6985002586953721, iteration: 408815
loss: 1.0790098905563354,grad_norm: 0.9473500026143712, iteration: 408816
loss: 1.033396601676941,grad_norm: 0.9999991078313156, iteration: 408817
loss: 0.9798489809036255,grad_norm: 0.6446209786478793, iteration: 408818
loss: 0.9994738101959229,grad_norm: 0.9999993714238031, iteration: 408819
loss: 1.0199599266052246,grad_norm: 0.8481148221699921, iteration: 408820
loss: 0.9961012601852417,grad_norm: 0.7640009120432507, iteration: 408821
loss: 1.0331497192382812,grad_norm: 0.9340708653053573, iteration: 408822
loss: 1.097883939743042,grad_norm: 0.9999993093554812, iteration: 408823
loss: 0.9877715110778809,grad_norm: 0.7246773257147099, iteration: 408824
loss: 1.0079468488693237,grad_norm: 0.999999271855754, iteration: 408825
loss: 0.9965646862983704,grad_norm: 0.9999990699511762, iteration: 408826
loss: 0.9766798615455627,grad_norm: 0.9088456589234382, iteration: 408827
loss: 1.0342050790786743,grad_norm: 0.8410089176886993, iteration: 408828
loss: 1.0471651554107666,grad_norm: 0.8552853541039684, iteration: 408829
loss: 1.0030686855316162,grad_norm: 1.0000000638611635, iteration: 408830
loss: 1.0849738121032715,grad_norm: 0.7601394138796117, iteration: 408831
loss: 0.9996103644371033,grad_norm: 0.6083931654255248, iteration: 408832
loss: 0.9710788726806641,grad_norm: 0.9999992460821793, iteration: 408833
loss: 1.2199205160140991,grad_norm: 0.9999997029787033, iteration: 408834
loss: 1.178472638130188,grad_norm: 0.9999997466900299, iteration: 408835
loss: 1.0337650775909424,grad_norm: 0.7585018678244501, iteration: 408836
loss: 1.1024444103240967,grad_norm: 0.9999996213168763, iteration: 408837
loss: 1.0070582628250122,grad_norm: 0.8516338684474273, iteration: 408838
loss: 1.02506422996521,grad_norm: 0.9999993185649043, iteration: 408839
loss: 1.0628122091293335,grad_norm: 0.9999999082682327, iteration: 408840
loss: 1.0096827745437622,grad_norm: 0.7773166384689822, iteration: 408841
loss: 1.03948175907135,grad_norm: 0.8488770363997802, iteration: 408842
loss: 1.0443202257156372,grad_norm: 0.8831098057013911, iteration: 408843
loss: 1.2636429071426392,grad_norm: 0.9999996204360534, iteration: 408844
loss: 1.1407192945480347,grad_norm: 0.9999993609293817, iteration: 408845
loss: 1.11721670627594,grad_norm: 0.9999990606499869, iteration: 408846
loss: 1.094772458076477,grad_norm: 0.7133534377365083, iteration: 408847
loss: 1.1253440380096436,grad_norm: 0.9999993310557317, iteration: 408848
loss: 1.046032428741455,grad_norm: 0.9999999615125299, iteration: 408849
loss: 1.0536915063858032,grad_norm: 0.999999221551723, iteration: 408850
loss: 1.0247070789337158,grad_norm: 0.9999996667663916, iteration: 408851
loss: 1.0610271692276,grad_norm: 0.8355961982089944, iteration: 408852
loss: 0.9998348951339722,grad_norm: 0.7595301452047163, iteration: 408853
loss: 1.0154013633728027,grad_norm: 0.9999992042312057, iteration: 408854
loss: 1.1053441762924194,grad_norm: 0.9044155807336448, iteration: 408855
loss: 1.075837254524231,grad_norm: 0.701255556808476, iteration: 408856
loss: 1.1075413227081299,grad_norm: 0.9999997191543127, iteration: 408857
loss: 1.0051952600479126,grad_norm: 0.8341795024714803, iteration: 408858
loss: 1.1940568685531616,grad_norm: 0.9999991159029741, iteration: 408859
loss: 1.0294685363769531,grad_norm: 0.9389867982730973, iteration: 408860
loss: 1.0592159032821655,grad_norm: 0.9999996667114148, iteration: 408861
loss: 1.1835002899169922,grad_norm: 0.9999999024470099, iteration: 408862
loss: 0.9860697388648987,grad_norm: 0.7437231757684468, iteration: 408863
loss: 1.0373907089233398,grad_norm: 0.8468445230318655, iteration: 408864
loss: 1.1125775575637817,grad_norm: 0.9999998398685876, iteration: 408865
loss: 1.0109000205993652,grad_norm: 0.6890316149351086, iteration: 408866
loss: 1.0104554891586304,grad_norm: 0.999999105261385, iteration: 408867
loss: 1.0048022270202637,grad_norm: 0.7401917893853927, iteration: 408868
loss: 0.9944126605987549,grad_norm: 0.9745900675349836, iteration: 408869
loss: 1.0113497972488403,grad_norm: 0.9999997655979108, iteration: 408870
loss: 0.960284948348999,grad_norm: 0.7120007793195473, iteration: 408871
loss: 1.0294114351272583,grad_norm: 0.9999995110533764, iteration: 408872
loss: 1.1078393459320068,grad_norm: 0.9999997511068939, iteration: 408873
loss: 1.0326085090637207,grad_norm: 0.9918740578537696, iteration: 408874
loss: 1.0552515983581543,grad_norm: 0.9999990872545342, iteration: 408875
loss: 1.1078273057937622,grad_norm: 0.9999996053508768, iteration: 408876
loss: 1.0000996589660645,grad_norm: 0.807879682890258, iteration: 408877
loss: 0.9802793860435486,grad_norm: 0.7078056271542188, iteration: 408878
loss: 1.0607051849365234,grad_norm: 0.8175925813990773, iteration: 408879
loss: 0.9989053606987,grad_norm: 0.833320322457284, iteration: 408880
loss: 1.0006654262542725,grad_norm: 0.7859318974452421, iteration: 408881
loss: 0.9769352674484253,grad_norm: 0.729235452189459, iteration: 408882
loss: 1.0043978691101074,grad_norm: 0.7689492093319404, iteration: 408883
loss: 1.0728659629821777,grad_norm: 0.9999996216904025, iteration: 408884
loss: 1.013371467590332,grad_norm: 0.7586451510579889, iteration: 408885
loss: 0.9961851239204407,grad_norm: 0.9999994688546998, iteration: 408886
loss: 1.0436110496520996,grad_norm: 0.9999990254548318, iteration: 408887
loss: 1.0174530744552612,grad_norm: 0.8970551373545512, iteration: 408888
loss: 1.0114904642105103,grad_norm: 0.9999994452702698, iteration: 408889
loss: 1.0363799333572388,grad_norm: 0.9999999091577735, iteration: 408890
loss: 0.9949703812599182,grad_norm: 0.8566608042611521, iteration: 408891
loss: 1.0456573963165283,grad_norm: 0.999999117135476, iteration: 408892
loss: 1.1221189498901367,grad_norm: 0.9999991300384949, iteration: 408893
loss: 0.9958505630493164,grad_norm: 0.6929192695515495, iteration: 408894
loss: 1.0174908638000488,grad_norm: 0.7115089196284725, iteration: 408895
loss: 0.9900848865509033,grad_norm: 0.9514812210226572, iteration: 408896
loss: 1.0497609376907349,grad_norm: 0.7304255403560668, iteration: 408897
loss: 1.1268943548202515,grad_norm: 0.999999210401009, iteration: 408898
loss: 0.9797506928443909,grad_norm: 0.7407936800193095, iteration: 408899
loss: 1.020684838294983,grad_norm: 0.9999995562900228, iteration: 408900
loss: 0.9932768940925598,grad_norm: 0.7576805312163081, iteration: 408901
loss: 1.027197241783142,grad_norm: 0.8588229059817036, iteration: 408902
loss: 1.0324580669403076,grad_norm: 0.818723665454023, iteration: 408903
loss: 1.0276837348937988,grad_norm: 0.7330923610903707, iteration: 408904
loss: 1.070996642112732,grad_norm: 0.9999992076926854, iteration: 408905
loss: 0.9625458717346191,grad_norm: 0.8365551437155404, iteration: 408906
loss: 0.9699370861053467,grad_norm: 0.7762921164236583, iteration: 408907
loss: 1.0189135074615479,grad_norm: 0.7437052096948945, iteration: 408908
loss: 0.9957090616226196,grad_norm: 0.7797002938015353, iteration: 408909
loss: 1.047402262687683,grad_norm: 0.9999993863105139, iteration: 408910
loss: 1.0746852159500122,grad_norm: 0.9999990576343164, iteration: 408911
loss: 1.032192587852478,grad_norm: 0.9442878461382218, iteration: 408912
loss: 1.0878572463989258,grad_norm: 0.8823906364798891, iteration: 408913
loss: 1.0581564903259277,grad_norm: 0.8234353477916592, iteration: 408914
loss: 0.9937933087348938,grad_norm: 0.6214365927902628, iteration: 408915
loss: 1.0309535264968872,grad_norm: 0.8631428356296424, iteration: 408916
loss: 0.9696490168571472,grad_norm: 0.7641877887230852, iteration: 408917
loss: 1.0121798515319824,grad_norm: 0.9157382665049598, iteration: 408918
loss: 1.0844963788986206,grad_norm: 0.9999994893165075, iteration: 408919
loss: 1.0043655633926392,grad_norm: 0.836168068366014, iteration: 408920
loss: 1.0191829204559326,grad_norm: 0.9999990706600694, iteration: 408921
loss: 1.033159613609314,grad_norm: 0.999999176987735, iteration: 408922
loss: 1.1105496883392334,grad_norm: 0.9999992313603038, iteration: 408923
loss: 0.9976434111595154,grad_norm: 0.7806442444508739, iteration: 408924
loss: 0.9902603030204773,grad_norm: 0.896525146117279, iteration: 408925
loss: 1.0334724187850952,grad_norm: 0.7937332877801081, iteration: 408926
loss: 1.0343680381774902,grad_norm: 0.8115982857852575, iteration: 408927
loss: 0.9872541427612305,grad_norm: 0.8218134014935182, iteration: 408928
loss: 1.0058830976486206,grad_norm: 0.8311999009912923, iteration: 408929
loss: 0.9563267230987549,grad_norm: 0.769646227179862, iteration: 408930
loss: 1.0528873205184937,grad_norm: 0.9999991231046743, iteration: 408931
loss: 1.0264625549316406,grad_norm: 0.8014348091836694, iteration: 408932
loss: 1.020491123199463,grad_norm: 0.7485523180529298, iteration: 408933
loss: 0.9949565529823303,grad_norm: 0.8600944424230937, iteration: 408934
loss: 1.1178336143493652,grad_norm: 0.9999989788783065, iteration: 408935
loss: 1.0271676778793335,grad_norm: 0.893219275278975, iteration: 408936
loss: 0.9930745959281921,grad_norm: 0.9999990335598342, iteration: 408937
loss: 0.9963486194610596,grad_norm: 0.77419485173841, iteration: 408938
loss: 1.046567678451538,grad_norm: 0.8479892947157139, iteration: 408939
loss: 0.9864751100540161,grad_norm: 0.6605525680679624, iteration: 408940
loss: 1.013034701347351,grad_norm: 0.8608120220139842, iteration: 408941
loss: 1.0798813104629517,grad_norm: 0.9999994910119155, iteration: 408942
loss: 0.998833954334259,grad_norm: 0.8844688511226456, iteration: 408943
loss: 1.0793523788452148,grad_norm: 0.9999999867021876, iteration: 408944
loss: 0.9988611936569214,grad_norm: 0.9601653046501892, iteration: 408945
loss: 1.0753802061080933,grad_norm: 0.9999997721075643, iteration: 408946
loss: 1.0098971128463745,grad_norm: 0.8699775201593691, iteration: 408947
loss: 1.015785813331604,grad_norm: 0.776699009062903, iteration: 408948
loss: 1.0035852193832397,grad_norm: 0.87065627354883, iteration: 408949
loss: 0.9666693210601807,grad_norm: 0.7941865029664104, iteration: 408950
loss: 1.0595672130584717,grad_norm: 0.9378806579687196, iteration: 408951
loss: 0.9805321097373962,grad_norm: 0.9999992742730889, iteration: 408952
loss: 0.9856205582618713,grad_norm: 0.9235961496244927, iteration: 408953
loss: 1.0167231559753418,grad_norm: 0.7954419064245815, iteration: 408954
loss: 0.9974241852760315,grad_norm: 0.6667792960861965, iteration: 408955
loss: 0.9586824774742126,grad_norm: 0.6658654033593483, iteration: 408956
loss: 1.0360461473464966,grad_norm: 0.999999478643163, iteration: 408957
loss: 1.004201054573059,grad_norm: 0.9048063994067383, iteration: 408958
loss: 0.9675406813621521,grad_norm: 0.9999988872807869, iteration: 408959
loss: 1.0182687044143677,grad_norm: 0.7767178523292247, iteration: 408960
loss: 0.9822700619697571,grad_norm: 0.7060048068024742, iteration: 408961
loss: 0.9722145199775696,grad_norm: 0.5548265828927242, iteration: 408962
loss: 0.9703298211097717,grad_norm: 0.8762919987666277, iteration: 408963
loss: 1.0426815748214722,grad_norm: 0.9955611855241682, iteration: 408964
loss: 1.035179615020752,grad_norm: 0.9466795830027368, iteration: 408965
loss: 1.010108470916748,grad_norm: 0.9837813698961333, iteration: 408966
loss: 0.9649760723114014,grad_norm: 0.7900419894493051, iteration: 408967
loss: 1.004387378692627,grad_norm: 0.8057089314341669, iteration: 408968
loss: 1.0111234188079834,grad_norm: 0.9999991955295878, iteration: 408969
loss: 0.9932425022125244,grad_norm: 0.9999993837990314, iteration: 408970
loss: 1.060646891593933,grad_norm: 0.7731097374636113, iteration: 408971
loss: 1.0006153583526611,grad_norm: 0.9704020770066324, iteration: 408972
loss: 1.0454292297363281,grad_norm: 0.8762514887684486, iteration: 408973
loss: 1.0619080066680908,grad_norm: 0.8126099892869909, iteration: 408974
loss: 1.0371345281600952,grad_norm: 0.9191683955413381, iteration: 408975
loss: 1.0190938711166382,grad_norm: 0.9999994915818801, iteration: 408976
loss: 1.099169135093689,grad_norm: 0.8608576100890086, iteration: 408977
loss: 0.983584463596344,grad_norm: 0.7615786649972347, iteration: 408978
loss: 1.0457854270935059,grad_norm: 0.9999993367824008, iteration: 408979
loss: 1.0045242309570312,grad_norm: 0.9999997793322027, iteration: 408980
loss: 0.9836540818214417,grad_norm: 0.763396960079801, iteration: 408981
loss: 1.0156821012496948,grad_norm: 0.8214329359874192, iteration: 408982
loss: 1.0258989334106445,grad_norm: 0.7547695005307717, iteration: 408983
loss: 0.9791831374168396,grad_norm: 0.6010459951666337, iteration: 408984
loss: 1.022546648979187,grad_norm: 0.7935386845467712, iteration: 408985
loss: 1.035740852355957,grad_norm: 0.872523459419613, iteration: 408986
loss: 1.0269447565078735,grad_norm: 0.8382748128560708, iteration: 408987
loss: 0.9744001030921936,grad_norm: 0.9118944419769017, iteration: 408988
loss: 1.0380054712295532,grad_norm: 0.9999990401629858, iteration: 408989
loss: 0.9721304178237915,grad_norm: 0.8249526832686318, iteration: 408990
loss: 0.9474455118179321,grad_norm: 0.7038268968639553, iteration: 408991
loss: 1.026593804359436,grad_norm: 0.6901538934959782, iteration: 408992
loss: 0.9783684611320496,grad_norm: 0.7265708796467543, iteration: 408993
loss: 0.994657039642334,grad_norm: 0.8911019606156623, iteration: 408994
loss: 0.9791823625564575,grad_norm: 0.8620089044607078, iteration: 408995
loss: 0.9775826334953308,grad_norm: 0.9235050102639685, iteration: 408996
loss: 1.039872407913208,grad_norm: 0.7594643174983909, iteration: 408997
loss: 0.9854093790054321,grad_norm: 0.7726096352602622, iteration: 408998
loss: 1.0043116807937622,grad_norm: 0.9761200038934352, iteration: 408999
loss: 1.0308146476745605,grad_norm: 0.7947728676915279, iteration: 409000
loss: 1.0029720067977905,grad_norm: 0.6713976095148838, iteration: 409001
loss: 1.025315761566162,grad_norm: 0.7289529697024222, iteration: 409002
loss: 1.0039243698120117,grad_norm: 0.80569853159273, iteration: 409003
loss: 0.9932153224945068,grad_norm: 0.771408498937397, iteration: 409004
loss: 1.0150060653686523,grad_norm: 0.8167723872289326, iteration: 409005
loss: 1.0137887001037598,grad_norm: 0.7066443705295254, iteration: 409006
loss: 1.0045727491378784,grad_norm: 0.9999992320219265, iteration: 409007
loss: 1.0203700065612793,grad_norm: 0.798274885287321, iteration: 409008
loss: 1.0038131475448608,grad_norm: 0.8223985519060669, iteration: 409009
loss: 1.1359224319458008,grad_norm: 0.9999992985617867, iteration: 409010
loss: 1.0032227039337158,grad_norm: 0.6815570011890201, iteration: 409011
loss: 0.9461530447006226,grad_norm: 0.8464526307657472, iteration: 409012
loss: 1.0688698291778564,grad_norm: 0.9999998059648874, iteration: 409013
loss: 0.9986514449119568,grad_norm: 0.6465512347466227, iteration: 409014
loss: 1.0053536891937256,grad_norm: 0.6607304634820972, iteration: 409015
loss: 1.0662362575531006,grad_norm: 0.7449171045731446, iteration: 409016
loss: 1.0342705249786377,grad_norm: 0.9999993364294724, iteration: 409017
loss: 1.076141357421875,grad_norm: 0.9999998337808863, iteration: 409018
loss: 1.0465576648712158,grad_norm: 0.9870508353350805, iteration: 409019
loss: 0.9814867377281189,grad_norm: 0.8449239819503991, iteration: 409020
loss: 0.9803720116615295,grad_norm: 0.9731637064036331, iteration: 409021
loss: 1.0027806758880615,grad_norm: 0.8759976763016888, iteration: 409022
loss: 0.9706113934516907,grad_norm: 0.8167804504411368, iteration: 409023
loss: 1.2141315937042236,grad_norm: 0.8644874180437305, iteration: 409024
loss: 1.0175248384475708,grad_norm: 0.697288226569788, iteration: 409025
loss: 1.0377405881881714,grad_norm: 0.9050144089168567, iteration: 409026
loss: 1.0055800676345825,grad_norm: 0.8501760326409995, iteration: 409027
loss: 1.0322649478912354,grad_norm: 0.999999874983164, iteration: 409028
loss: 1.0639333724975586,grad_norm: 0.9999995750817346, iteration: 409029
loss: 1.0436195135116577,grad_norm: 0.9999993932285525, iteration: 409030
loss: 1.048588752746582,grad_norm: 0.9999995065123958, iteration: 409031
loss: 0.9711964130401611,grad_norm: 0.8269195226998681, iteration: 409032
loss: 1.0155235528945923,grad_norm: 0.9048462144237435, iteration: 409033
loss: 1.0356254577636719,grad_norm: 0.9999993632726247, iteration: 409034
loss: 1.0262867212295532,grad_norm: 0.9037779317868252, iteration: 409035
loss: 1.0220454931259155,grad_norm: 0.9999999519805053, iteration: 409036
loss: 1.0029104948043823,grad_norm: 0.9285590213068304, iteration: 409037
loss: 1.1726419925689697,grad_norm: 0.9999999446678619, iteration: 409038
loss: 1.0650508403778076,grad_norm: 0.9999992079172282, iteration: 409039
loss: 1.0860674381256104,grad_norm: 0.9487789553888157, iteration: 409040
loss: 1.1643961668014526,grad_norm: 0.9999998833768533, iteration: 409041
loss: 0.990569531917572,grad_norm: 0.9999991067576327, iteration: 409042
loss: 1.0306004285812378,grad_norm: 0.9999990095614795, iteration: 409043
loss: 1.002118468284607,grad_norm: 0.9387035134382202, iteration: 409044
loss: 1.0530210733413696,grad_norm: 0.791570230120407, iteration: 409045
loss: 1.0099244117736816,grad_norm: 0.9606861805938495, iteration: 409046
loss: 1.046717643737793,grad_norm: 0.8192867994372381, iteration: 409047
loss: 0.9753926396369934,grad_norm: 0.6992886604275108, iteration: 409048
loss: 1.0587691068649292,grad_norm: 0.999999944389382, iteration: 409049
loss: 1.0439629554748535,grad_norm: 0.9999998439889862, iteration: 409050
loss: 1.0403022766113281,grad_norm: 0.9999993039555959, iteration: 409051
loss: 0.996432363986969,grad_norm: 0.8512278819282421, iteration: 409052
loss: 1.0300897359848022,grad_norm: 0.7916251115565518, iteration: 409053
loss: 1.1750011444091797,grad_norm: 0.9999993983712671, iteration: 409054
loss: 1.04929518699646,grad_norm: 1.0000000367494148, iteration: 409055
loss: 0.9606417417526245,grad_norm: 0.8189722429637193, iteration: 409056
loss: 0.9761762619018555,grad_norm: 0.7462751937671075, iteration: 409057
loss: 0.987637460231781,grad_norm: 0.7443004135876935, iteration: 409058
loss: 1.0343685150146484,grad_norm: 0.9763883169920023, iteration: 409059
loss: 1.0094208717346191,grad_norm: 0.9999991004151614, iteration: 409060
loss: 1.051221489906311,grad_norm: 0.9116531397491249, iteration: 409061
loss: 1.1486676931381226,grad_norm: 0.9999992899010196, iteration: 409062
loss: 1.0713281631469727,grad_norm: 0.9999994003715696, iteration: 409063
loss: 1.0251668691635132,grad_norm: 0.9463954772745223, iteration: 409064
loss: 0.9951814413070679,grad_norm: 0.7315018162768713, iteration: 409065
loss: 1.2221126556396484,grad_norm: 0.9999998932518156, iteration: 409066
loss: 1.0234475135803223,grad_norm: 0.7216356529451449, iteration: 409067
loss: 1.0656874179840088,grad_norm: 0.9999998944593997, iteration: 409068
loss: 1.0287929773330688,grad_norm: 0.9999993268877928, iteration: 409069
loss: 1.0363372564315796,grad_norm: 0.997632514671337, iteration: 409070
loss: 1.0082234144210815,grad_norm: 0.9350114060259717, iteration: 409071
loss: 1.0187257528305054,grad_norm: 0.9999993277087531, iteration: 409072
loss: 1.0563225746154785,grad_norm: 0.8758717833050005, iteration: 409073
loss: 1.1187160015106201,grad_norm: 0.9999995320388227, iteration: 409074
loss: 0.9861927628517151,grad_norm: 0.9999996327817485, iteration: 409075
loss: 1.0219634771347046,grad_norm: 0.9999990957921355, iteration: 409076
loss: 0.9983958601951599,grad_norm: 0.9015087110646776, iteration: 409077
loss: 1.013776183128357,grad_norm: 0.757830660443547, iteration: 409078
loss: 0.9972900748252869,grad_norm: 0.7612793928248227, iteration: 409079
loss: 1.0192153453826904,grad_norm: 0.9091600067196335, iteration: 409080
loss: 1.1102564334869385,grad_norm: 0.945609644379464, iteration: 409081
loss: 1.2141093015670776,grad_norm: 0.9999999534433267, iteration: 409082
loss: 1.013743281364441,grad_norm: 0.7809108837064853, iteration: 409083
loss: 1.0170812606811523,grad_norm: 0.7944059329576878, iteration: 409084
loss: 1.0772032737731934,grad_norm: 0.9999994904781506, iteration: 409085
loss: 1.1059106588363647,grad_norm: 0.9999997361667642, iteration: 409086
loss: 1.122717022895813,grad_norm: 0.9999995824473825, iteration: 409087
loss: 1.1442192792892456,grad_norm: 0.9999998744007169, iteration: 409088
loss: 0.9667580127716064,grad_norm: 0.9999991845040918, iteration: 409089
loss: 1.0865778923034668,grad_norm: 0.9999998397659902, iteration: 409090
loss: 1.0057820081710815,grad_norm: 0.8422014351411565, iteration: 409091
loss: 1.0285060405731201,grad_norm: 0.7426544587527978, iteration: 409092
loss: 0.9883265495300293,grad_norm: 0.8293459533376141, iteration: 409093
loss: 1.008780837059021,grad_norm: 0.9629801474774804, iteration: 409094
loss: 1.0431545972824097,grad_norm: 0.9999999091689623, iteration: 409095
loss: 0.9797676801681519,grad_norm: 0.8633579596630985, iteration: 409096
loss: 0.9984429478645325,grad_norm: 0.7294015623169928, iteration: 409097
loss: 1.0597094297409058,grad_norm: 0.9999998765683024, iteration: 409098
loss: 1.0256168842315674,grad_norm: 0.9999992930556165, iteration: 409099
loss: 0.9730070233345032,grad_norm: 0.748663814051854, iteration: 409100
loss: 1.0568567514419556,grad_norm: 0.9659746780189399, iteration: 409101
loss: 1.018243670463562,grad_norm: 0.812709080345334, iteration: 409102
loss: 0.9610577821731567,grad_norm: 0.832559097173814, iteration: 409103
loss: 1.3250951766967773,grad_norm: 0.9999997537114581, iteration: 409104
loss: 1.0270272493362427,grad_norm: 0.9999990890907997, iteration: 409105
loss: 1.0278645753860474,grad_norm: 0.7670611971440194, iteration: 409106
loss: 0.9938275814056396,grad_norm: 0.865024505005432, iteration: 409107
loss: 0.9913244843482971,grad_norm: 0.709776039963409, iteration: 409108
loss: 1.0043129920959473,grad_norm: 0.7392672741829838, iteration: 409109
loss: 0.9661609530448914,grad_norm: 0.9999998763884865, iteration: 409110
loss: 0.9718732237815857,grad_norm: 0.7533470631486261, iteration: 409111
loss: 1.0126490592956543,grad_norm: 0.9999999102938918, iteration: 409112
loss: 1.0994837284088135,grad_norm: 0.9649221788864667, iteration: 409113
loss: 1.0102050304412842,grad_norm: 0.683794446055568, iteration: 409114
loss: 1.0054452419281006,grad_norm: 0.9126420939678489, iteration: 409115
loss: 1.0112645626068115,grad_norm: 0.6508695529204923, iteration: 409116
loss: 0.9969419836997986,grad_norm: 0.9684647152481839, iteration: 409117
loss: 1.126510739326477,grad_norm: 0.9999998482396527, iteration: 409118
loss: 0.9802804589271545,grad_norm: 0.8689792017097865, iteration: 409119
loss: 0.9991448521614075,grad_norm: 0.9567726637764318, iteration: 409120
loss: 1.0144507884979248,grad_norm: 0.6990528465885234, iteration: 409121
loss: 1.0433337688446045,grad_norm: 0.9999994071182435, iteration: 409122
loss: 1.1377251148223877,grad_norm: 1.0000000166605123, iteration: 409123
loss: 0.9901437759399414,grad_norm: 0.6659844555781893, iteration: 409124
loss: 1.0525147914886475,grad_norm: 0.9999990290151379, iteration: 409125
loss: 0.9938194155693054,grad_norm: 0.697443325671164, iteration: 409126
loss: 1.0636143684387207,grad_norm: 0.860893572357848, iteration: 409127
loss: 1.0383729934692383,grad_norm: 0.977492952988603, iteration: 409128
loss: 1.0018103122711182,grad_norm: 0.9999995460224242, iteration: 409129
loss: 0.9853076934814453,grad_norm: 0.8143824459024912, iteration: 409130
loss: 1.0032594203948975,grad_norm: 0.9018460734109324, iteration: 409131
loss: 1.0005980730056763,grad_norm: 0.9999990401566637, iteration: 409132
loss: 1.0462572574615479,grad_norm: 0.9999997492311594, iteration: 409133
loss: 1.013823390007019,grad_norm: 0.7651762491243745, iteration: 409134
loss: 0.9767077565193176,grad_norm: 0.7220875292695184, iteration: 409135
loss: 1.014988899230957,grad_norm: 0.960925405670454, iteration: 409136
loss: 0.9935663342475891,grad_norm: 0.701454183101201, iteration: 409137
loss: 0.9805657267570496,grad_norm: 0.8913607256009717, iteration: 409138
loss: 0.9959748983383179,grad_norm: 0.9999995459465765, iteration: 409139
loss: 1.0013890266418457,grad_norm: 0.8718170052474734, iteration: 409140
loss: 0.9811621904373169,grad_norm: 0.7632371047688828, iteration: 409141
loss: 0.9790152311325073,grad_norm: 0.7407477388878362, iteration: 409142
loss: 0.9815320372581482,grad_norm: 0.9999997658878718, iteration: 409143
loss: 1.0252151489257812,grad_norm: 0.9559659923547704, iteration: 409144
loss: 1.0231318473815918,grad_norm: 0.7314158153133158, iteration: 409145
loss: 0.9791940450668335,grad_norm: 0.8147426331820063, iteration: 409146
loss: 0.9750135540962219,grad_norm: 0.7585345366816042, iteration: 409147
loss: 0.9982122182846069,grad_norm: 0.6779357049891798, iteration: 409148
loss: 0.9865968227386475,grad_norm: 0.776720434233468, iteration: 409149
loss: 0.9970674514770508,grad_norm: 0.796109163604044, iteration: 409150
loss: 1.0283807516098022,grad_norm: 0.7075789624588421, iteration: 409151
loss: 1.0263361930847168,grad_norm: 0.8794054228913494, iteration: 409152
loss: 0.9694990515708923,grad_norm: 0.8118408129298892, iteration: 409153
loss: 1.0918759107589722,grad_norm: 0.9999997634265984, iteration: 409154
loss: 1.0590214729309082,grad_norm: 0.9999994422534059, iteration: 409155
loss: 0.9676018357276917,grad_norm: 0.7736780442867569, iteration: 409156
loss: 1.0189530849456787,grad_norm: 0.838809277578762, iteration: 409157
loss: 0.9716347455978394,grad_norm: 0.6954224132367607, iteration: 409158
loss: 0.9666135907173157,grad_norm: 0.7860078293308853, iteration: 409159
loss: 1.1262856721878052,grad_norm: 0.9960154659795483, iteration: 409160
loss: 1.0261722803115845,grad_norm: 0.7951505866137301, iteration: 409161
loss: 1.0741701126098633,grad_norm: 0.9999993354637037, iteration: 409162
loss: 0.9985055923461914,grad_norm: 0.7403977851074961, iteration: 409163
loss: 0.9916757345199585,grad_norm: 0.7805873094726061, iteration: 409164
loss: 0.9812014102935791,grad_norm: 0.71955992543759, iteration: 409165
loss: 1.0032455921173096,grad_norm: 0.7163503478974756, iteration: 409166
loss: 1.0286444425582886,grad_norm: 0.8444358976641826, iteration: 409167
loss: 0.9946398735046387,grad_norm: 0.8595612636201304, iteration: 409168
loss: 0.9745083451271057,grad_norm: 0.939687146228766, iteration: 409169
loss: 0.9591863751411438,grad_norm: 0.8416178091560889, iteration: 409170
loss: 0.9981657862663269,grad_norm: 0.892951815669068, iteration: 409171
loss: 0.9709705710411072,grad_norm: 0.773436489233013, iteration: 409172
loss: 1.000246524810791,grad_norm: 0.7773276103558144, iteration: 409173
loss: 0.9373543858528137,grad_norm: 0.8208356947221774, iteration: 409174
loss: 1.0120704174041748,grad_norm: 0.7966589122126536, iteration: 409175
loss: 1.0635470151901245,grad_norm: 0.9999995058624349, iteration: 409176
loss: 0.9978264570236206,grad_norm: 0.999999144873487, iteration: 409177
loss: 0.9805753827095032,grad_norm: 0.8175096554994694, iteration: 409178
loss: 0.9872788786888123,grad_norm: 0.8391535170019775, iteration: 409179
loss: 0.9928469657897949,grad_norm: 0.7786286885934105, iteration: 409180
loss: 0.9491563439369202,grad_norm: 0.9468859205223842, iteration: 409181
loss: 1.0045496225357056,grad_norm: 0.7840139354173522, iteration: 409182
loss: 1.020727515220642,grad_norm: 0.6980663046482884, iteration: 409183
loss: 1.0122119188308716,grad_norm: 0.9999997559839281, iteration: 409184
loss: 0.9840463995933533,grad_norm: 0.8208487896415942, iteration: 409185
loss: 0.9961795806884766,grad_norm: 0.6600180242788273, iteration: 409186
loss: 1.0185480117797852,grad_norm: 0.6848345402342961, iteration: 409187
loss: 0.9870693683624268,grad_norm: 0.8097094686361233, iteration: 409188
loss: 1.0025790929794312,grad_norm: 0.7172031371617094, iteration: 409189
loss: 0.9728153347969055,grad_norm: 0.736320833409518, iteration: 409190
loss: 1.0093119144439697,grad_norm: 0.8123154356577066, iteration: 409191
loss: 1.0933655500411987,grad_norm: 0.9636137587817654, iteration: 409192
loss: 1.0292729139328003,grad_norm: 0.9467157321467319, iteration: 409193
loss: 0.959774911403656,grad_norm: 0.6929703252857335, iteration: 409194
loss: 1.0307722091674805,grad_norm: 0.7738059604710862, iteration: 409195
loss: 1.0119675397872925,grad_norm: 0.7455900581440796, iteration: 409196
loss: 1.0313866138458252,grad_norm: 0.8003364881715521, iteration: 409197
loss: 1.0012117624282837,grad_norm: 0.8440320123147116, iteration: 409198
loss: 0.9894400835037231,grad_norm: 0.8295500321984356, iteration: 409199
loss: 1.0068364143371582,grad_norm: 0.9999992845552238, iteration: 409200
loss: 0.980597734451294,grad_norm: 0.7721973148445761, iteration: 409201
loss: 0.9944019317626953,grad_norm: 0.7541281062612727, iteration: 409202
loss: 1.0185558795928955,grad_norm: 0.8722551961901865, iteration: 409203
loss: 0.9960989952087402,grad_norm: 0.8581519623589607, iteration: 409204
loss: 1.1011366844177246,grad_norm: 0.9999998544044955, iteration: 409205
loss: 0.9868870973587036,grad_norm: 0.6535516167746211, iteration: 409206
loss: 0.9882470369338989,grad_norm: 0.8775427659693961, iteration: 409207
loss: 0.9972708225250244,grad_norm: 0.9432190820686812, iteration: 409208
loss: 0.9740566611289978,grad_norm: 0.8388247600189048, iteration: 409209
loss: 0.9707016348838806,grad_norm: 0.8737583702536579, iteration: 409210
loss: 1.0085111856460571,grad_norm: 0.7439850492106256, iteration: 409211
loss: 0.9603191018104553,grad_norm: 0.9644171224446942, iteration: 409212
loss: 1.0268522500991821,grad_norm: 0.8365003497992384, iteration: 409213
loss: 1.007940649986267,grad_norm: 0.7584963930173447, iteration: 409214
loss: 1.021048665046692,grad_norm: 0.961240153280265, iteration: 409215
loss: 0.9761458039283752,grad_norm: 0.7500358787030328, iteration: 409216
loss: 0.9962104558944702,grad_norm: 0.7605507369436715, iteration: 409217
loss: 1.007758617401123,grad_norm: 0.7630778245108798, iteration: 409218
loss: 1.00379478931427,grad_norm: 0.7421358266084425, iteration: 409219
loss: 1.0314230918884277,grad_norm: 0.6429904512237798, iteration: 409220
loss: 0.9760124683380127,grad_norm: 0.714629031228094, iteration: 409221
loss: 0.9936274290084839,grad_norm: 0.7888330390393855, iteration: 409222
loss: 0.9744278788566589,grad_norm: 0.7007087502302798, iteration: 409223
loss: 0.9846346974372864,grad_norm: 0.6889155419038633, iteration: 409224
loss: 1.1019418239593506,grad_norm: 0.999999703252548, iteration: 409225
loss: 0.9965285658836365,grad_norm: 0.6448774246812862, iteration: 409226
loss: 1.0630135536193848,grad_norm: 0.9999999171582817, iteration: 409227
loss: 0.986282467842102,grad_norm: 0.8360163404324482, iteration: 409228
loss: 1.0237314701080322,grad_norm: 0.6965037982636006, iteration: 409229
loss: 1.0096875429153442,grad_norm: 0.6440314368307255, iteration: 409230
loss: 1.0041882991790771,grad_norm: 0.8695332031006086, iteration: 409231
loss: 1.0339587926864624,grad_norm: 0.81535547466244, iteration: 409232
loss: 1.0404144525527954,grad_norm: 0.7848578629948276, iteration: 409233
loss: 1.0598912239074707,grad_norm: 0.8727111223856292, iteration: 409234
loss: 1.0227885246276855,grad_norm: 0.8999521363453609, iteration: 409235
loss: 1.0173876285552979,grad_norm: 0.8170667419343652, iteration: 409236
loss: 1.0182572603225708,grad_norm: 0.8975333994479576, iteration: 409237
loss: 1.1289010047912598,grad_norm: 0.9999998683637004, iteration: 409238
loss: 1.0123627185821533,grad_norm: 0.7818944656048264, iteration: 409239
loss: 0.9723179340362549,grad_norm: 0.7747111739103447, iteration: 409240
loss: 0.9692817330360413,grad_norm: 0.7961955351629298, iteration: 409241
loss: 1.0013645887374878,grad_norm: 0.8441430449718471, iteration: 409242
loss: 1.0343296527862549,grad_norm: 0.9864283135886313, iteration: 409243
loss: 0.9725956916809082,grad_norm: 0.8270653517358793, iteration: 409244
loss: 1.0467779636383057,grad_norm: 0.9999992068999365, iteration: 409245
loss: 1.0133030414581299,grad_norm: 0.8703382724286828, iteration: 409246
loss: 0.9817326664924622,grad_norm: 0.9999994753376591, iteration: 409247
loss: 0.9868912696838379,grad_norm: 0.7009397658185741, iteration: 409248
loss: 1.003330945968628,grad_norm: 0.7688715139616784, iteration: 409249
loss: 0.9807250499725342,grad_norm: 0.8076307600952517, iteration: 409250
loss: 0.9979005455970764,grad_norm: 0.9414233898884382, iteration: 409251
loss: 1.0958118438720703,grad_norm: 0.9999991413934335, iteration: 409252
loss: 0.989884614944458,grad_norm: 0.8425817687898642, iteration: 409253
loss: 0.9768773913383484,grad_norm: 0.9134495170596292, iteration: 409254
loss: 0.9870902895927429,grad_norm: 0.8094024653473391, iteration: 409255
loss: 1.0029282569885254,grad_norm: 0.7784927052858691, iteration: 409256
loss: 0.995649516582489,grad_norm: 0.8159042329947062, iteration: 409257
loss: 1.0041991472244263,grad_norm: 0.7905008955797113, iteration: 409258
loss: 0.9932952523231506,grad_norm: 0.8912100295050913, iteration: 409259
loss: 0.9839252233505249,grad_norm: 0.7061562010804245, iteration: 409260
loss: 0.9766138195991516,grad_norm: 0.7653794779095552, iteration: 409261
loss: 1.0012562274932861,grad_norm: 0.6339045064554294, iteration: 409262
loss: 1.118125081062317,grad_norm: 0.9999992387416398, iteration: 409263
loss: 1.0193692445755005,grad_norm: 0.999999865981576, iteration: 409264
loss: 1.0020322799682617,grad_norm: 0.6530719635511423, iteration: 409265
loss: 0.9930316209793091,grad_norm: 0.8228879918025918, iteration: 409266
loss: 1.003389596939087,grad_norm: 0.7431707182084706, iteration: 409267
loss: 1.034708023071289,grad_norm: 0.8097013405970549, iteration: 409268
loss: 0.9808773994445801,grad_norm: 0.9173200293979867, iteration: 409269
loss: 1.0224385261535645,grad_norm: 0.8665533610156371, iteration: 409270
loss: 1.0072104930877686,grad_norm: 0.7435107673161905, iteration: 409271
loss: 0.9929071664810181,grad_norm: 0.9235462361663537, iteration: 409272
loss: 1.1064444780349731,grad_norm: 0.8018628762047229, iteration: 409273
loss: 1.1393572092056274,grad_norm: 0.7552499464692318, iteration: 409274
loss: 1.015368103981018,grad_norm: 0.860291490229232, iteration: 409275
loss: 0.9746182560920715,grad_norm: 0.9506077853506767, iteration: 409276
loss: 1.0230844020843506,grad_norm: 0.8141487329640389, iteration: 409277
loss: 1.0466127395629883,grad_norm: 0.9662375716837506, iteration: 409278
loss: 0.9941548109054565,grad_norm: 0.9575615890415571, iteration: 409279
loss: 1.0344117879867554,grad_norm: 0.6822600457062172, iteration: 409280
loss: 1.01514732837677,grad_norm: 0.8274139920635865, iteration: 409281
loss: 1.004775881767273,grad_norm: 0.7662684162717546, iteration: 409282
loss: 0.9893150925636292,grad_norm: 0.674973172422718, iteration: 409283
loss: 1.0076640844345093,grad_norm: 0.7682948023381653, iteration: 409284
loss: 1.0147556066513062,grad_norm: 0.7293016668998862, iteration: 409285
loss: 1.0238522291183472,grad_norm: 0.9043803632568506, iteration: 409286
loss: 0.9899499416351318,grad_norm: 0.7988549110671145, iteration: 409287
loss: 1.0151236057281494,grad_norm: 0.8501538928413512, iteration: 409288
loss: 0.9886081218719482,grad_norm: 0.809739768816374, iteration: 409289
loss: 1.045371174812317,grad_norm: 0.7746883841715999, iteration: 409290
loss: 1.0118221044540405,grad_norm: 0.8663124993204794, iteration: 409291
loss: 0.9973188042640686,grad_norm: 0.8074181186383692, iteration: 409292
loss: 0.988137423992157,grad_norm: 0.7646434098212953, iteration: 409293
loss: 1.0470050573349,grad_norm: 0.9999994820800863, iteration: 409294
loss: 0.9747825264930725,grad_norm: 0.6897061925269192, iteration: 409295
loss: 1.0060365200042725,grad_norm: 0.8314881194357475, iteration: 409296
loss: 1.0028897523880005,grad_norm: 0.7792745972680902, iteration: 409297
loss: 0.991112232208252,grad_norm: 0.7871268404885884, iteration: 409298
loss: 1.0279104709625244,grad_norm: 0.832499787574541, iteration: 409299
loss: 0.9699012637138367,grad_norm: 0.8329646104976353, iteration: 409300
loss: 1.033584475517273,grad_norm: 0.7836869753223483, iteration: 409301
loss: 1.0600696802139282,grad_norm: 0.7463433977357135, iteration: 409302
loss: 1.0021758079528809,grad_norm: 0.8126819408905583, iteration: 409303
loss: 1.0060148239135742,grad_norm: 0.9123084172756726, iteration: 409304
loss: 0.9771923422813416,grad_norm: 0.8636436048238463, iteration: 409305
loss: 0.9917542338371277,grad_norm: 0.8325434359773196, iteration: 409306
loss: 0.9766337871551514,grad_norm: 0.7731229262504263, iteration: 409307
loss: 0.9802632927894592,grad_norm: 0.8395397999857286, iteration: 409308
loss: 0.9830862283706665,grad_norm: 0.7849743596541234, iteration: 409309
loss: 0.9993842840194702,grad_norm: 0.8178175753843542, iteration: 409310
loss: 1.049819827079773,grad_norm: 0.7211705994092705, iteration: 409311
loss: 0.9939455389976501,grad_norm: 0.7957454126730891, iteration: 409312
loss: 0.9878877997398376,grad_norm: 0.7732503423021195, iteration: 409313
loss: 1.026900053024292,grad_norm: 0.8294867052703087, iteration: 409314
loss: 1.0265878438949585,grad_norm: 0.7699587630283253, iteration: 409315
loss: 0.992443859577179,grad_norm: 0.884523620323872, iteration: 409316
loss: 0.9819377660751343,grad_norm: 0.791664938521848, iteration: 409317
loss: 0.9940137267112732,grad_norm: 0.8416556143203763, iteration: 409318
loss: 1.035576343536377,grad_norm: 0.8669102952259343, iteration: 409319
loss: 1.0115365982055664,grad_norm: 0.8518748192759947, iteration: 409320
loss: 0.993660032749176,grad_norm: 0.8977466682266547, iteration: 409321
loss: 1.0462850332260132,grad_norm: 0.9999989855859815, iteration: 409322
loss: 1.0070868730545044,grad_norm: 0.937202424698861, iteration: 409323
loss: 1.06969153881073,grad_norm: 0.8750177676482706, iteration: 409324
loss: 1.010390281677246,grad_norm: 0.6824041357202031, iteration: 409325
loss: 0.9908146262168884,grad_norm: 0.9954361689379599, iteration: 409326
loss: 1.0324238538742065,grad_norm: 0.9999990242713979, iteration: 409327
loss: 1.0509973764419556,grad_norm: 0.9999993973054339, iteration: 409328
loss: 0.975804328918457,grad_norm: 0.8660207186797475, iteration: 409329
loss: 1.038347601890564,grad_norm: 0.6733795925127788, iteration: 409330
loss: 0.9574300646781921,grad_norm: 0.6719843813900469, iteration: 409331
loss: 1.0102448463439941,grad_norm: 0.9570853474828955, iteration: 409332
loss: 0.9854301810264587,grad_norm: 0.6917861207392167, iteration: 409333
loss: 0.9872382283210754,grad_norm: 0.8118912899773768, iteration: 409334
loss: 0.9918677806854248,grad_norm: 0.7018669336154238, iteration: 409335
loss: 1.000162959098816,grad_norm: 0.8601813148765911, iteration: 409336
loss: 1.018051266670227,grad_norm: 0.7789143658098806, iteration: 409337
loss: 0.9715036749839783,grad_norm: 0.999999477147171, iteration: 409338
loss: 1.0066249370574951,grad_norm: 0.8844204992107014, iteration: 409339
loss: 0.9879648685455322,grad_norm: 0.9999990503370819, iteration: 409340
loss: 0.9983313679695129,grad_norm: 0.8479489855180551, iteration: 409341
loss: 0.9842642545700073,grad_norm: 0.7216996308260991, iteration: 409342
loss: 1.030318021774292,grad_norm: 0.9125230889373095, iteration: 409343
loss: 1.0037691593170166,grad_norm: 0.7241998707782368, iteration: 409344
loss: 0.9889466762542725,grad_norm: 0.8513175515774756, iteration: 409345
loss: 1.007073998451233,grad_norm: 0.7714044270652716, iteration: 409346
loss: 1.0248653888702393,grad_norm: 0.9999989924903158, iteration: 409347
loss: 0.9802906513214111,grad_norm: 0.7649138129611338, iteration: 409348
loss: 0.9984870553016663,grad_norm: 0.742078032718707, iteration: 409349
loss: 0.9765806198120117,grad_norm: 0.7464022060589299, iteration: 409350
loss: 1.0161725282669067,grad_norm: 0.7611392461185156, iteration: 409351
loss: 1.0411789417266846,grad_norm: 0.8547507639880165, iteration: 409352
loss: 0.992669939994812,grad_norm: 0.8403104517195498, iteration: 409353
loss: 0.9771055579185486,grad_norm: 0.7579184288492751, iteration: 409354
loss: 1.0177148580551147,grad_norm: 0.9369475045750316, iteration: 409355
loss: 1.0138111114501953,grad_norm: 0.837936113569635, iteration: 409356
loss: 0.9879745244979858,grad_norm: 0.7946497284429156, iteration: 409357
loss: 1.0152498483657837,grad_norm: 0.8370109823099849, iteration: 409358
loss: 0.9942204356193542,grad_norm: 0.7947971252219072, iteration: 409359
loss: 0.96591717004776,grad_norm: 0.7623688071093361, iteration: 409360
loss: 0.9723148941993713,grad_norm: 0.8863742528026312, iteration: 409361
loss: 1.0191981792449951,grad_norm: 0.8922180391401722, iteration: 409362
loss: 1.1048939228057861,grad_norm: 0.999999564791696, iteration: 409363
loss: 1.0364603996276855,grad_norm: 0.6787784285441786, iteration: 409364
loss: 0.9970079660415649,grad_norm: 0.8227860661737606, iteration: 409365
loss: 1.0117194652557373,grad_norm: 0.9999991750467023, iteration: 409366
loss: 1.0744256973266602,grad_norm: 0.8465054074505839, iteration: 409367
loss: 0.978740930557251,grad_norm: 0.70968603694897, iteration: 409368
loss: 1.0200636386871338,grad_norm: 0.730890077269221, iteration: 409369
loss: 0.994217038154602,grad_norm: 0.826875434569087, iteration: 409370
loss: 1.014356255531311,grad_norm: 0.8091532695686797, iteration: 409371
loss: 0.9962252378463745,grad_norm: 0.9903566638969378, iteration: 409372
loss: 1.0509319305419922,grad_norm: 0.9999992688184797, iteration: 409373
loss: 1.0401636362075806,grad_norm: 0.9835391980383509, iteration: 409374
loss: 1.0829464197158813,grad_norm: 0.7741541516747759, iteration: 409375
loss: 1.0103226900100708,grad_norm: 0.8404430880555152, iteration: 409376
loss: 1.0250098705291748,grad_norm: 0.6834751270205822, iteration: 409377
loss: 1.028028964996338,grad_norm: 0.9999995816092562, iteration: 409378
loss: 1.0229023694992065,grad_norm: 0.8199125551581263, iteration: 409379
loss: 0.9682459831237793,grad_norm: 0.9246980780807209, iteration: 409380
loss: 1.0460457801818848,grad_norm: 0.8997543888805296, iteration: 409381
loss: 0.9654403328895569,grad_norm: 0.8976578256187432, iteration: 409382
loss: 1.0282111167907715,grad_norm: 0.9999995708799729, iteration: 409383
loss: 0.9910051822662354,grad_norm: 0.7949897484728521, iteration: 409384
loss: 1.0111026763916016,grad_norm: 0.9999990002244108, iteration: 409385
loss: 1.0045214891433716,grad_norm: 0.810154605164426, iteration: 409386
loss: 0.9788463115692139,grad_norm: 0.8488372699804028, iteration: 409387
loss: 1.0243428945541382,grad_norm: 0.9999991634917587, iteration: 409388
loss: 0.9894754886627197,grad_norm: 0.7253836194643698, iteration: 409389
loss: 0.9819269776344299,grad_norm: 0.6752310559653467, iteration: 409390
loss: 1.0859683752059937,grad_norm: 0.999999527028134, iteration: 409391
loss: 1.0143736600875854,grad_norm: 0.7593079411562345, iteration: 409392
loss: 0.9767916202545166,grad_norm: 0.9999998236574168, iteration: 409393
loss: 1.0335638523101807,grad_norm: 0.9999997612524731, iteration: 409394
loss: 1.0218559503555298,grad_norm: 0.9381986824243661, iteration: 409395
loss: 1.0170360803604126,grad_norm: 0.8180546492888869, iteration: 409396
loss: 0.9785314202308655,grad_norm: 0.7055430809250497, iteration: 409397
loss: 1.0103862285614014,grad_norm: 0.9999991890587688, iteration: 409398
loss: 1.0805118083953857,grad_norm: 0.7226737881339358, iteration: 409399
loss: 1.0394023656845093,grad_norm: 0.9352634293133789, iteration: 409400
loss: 0.9652205109596252,grad_norm: 0.7503955781177091, iteration: 409401
loss: 0.9674710035324097,grad_norm: 0.6460389963247325, iteration: 409402
loss: 1.0668405294418335,grad_norm: 0.7622147593424277, iteration: 409403
loss: 1.1201497316360474,grad_norm: 0.9999997168436837, iteration: 409404
loss: 0.9731826782226562,grad_norm: 0.6635655679819619, iteration: 409405
loss: 1.0041460990905762,grad_norm: 0.8538697733773607, iteration: 409406
loss: 1.0178395509719849,grad_norm: 0.6946849791929282, iteration: 409407
loss: 1.007066249847412,grad_norm: 0.7979581028882503, iteration: 409408
loss: 1.0137006044387817,grad_norm: 0.9999992164077797, iteration: 409409
loss: 1.004435420036316,grad_norm: 0.8119882546357784, iteration: 409410
loss: 1.0309052467346191,grad_norm: 0.7443776878500253, iteration: 409411
loss: 1.0127758979797363,grad_norm: 0.7133429731006802, iteration: 409412
loss: 1.0046873092651367,grad_norm: 0.8028207670612107, iteration: 409413
loss: 1.0116750001907349,grad_norm: 0.9999993257513109, iteration: 409414
loss: 1.0094317197799683,grad_norm: 0.765053070229848, iteration: 409415
loss: 0.9817364811897278,grad_norm: 0.8260258590344344, iteration: 409416
loss: 1.003707766532898,grad_norm: 0.8866163567806084, iteration: 409417
loss: 0.9863154292106628,grad_norm: 0.7929335292877328, iteration: 409418
loss: 1.0424574613571167,grad_norm: 0.7693077164547795, iteration: 409419
loss: 1.0428109169006348,grad_norm: 0.9015905687826975, iteration: 409420
loss: 1.0106959342956543,grad_norm: 0.8582763250710241, iteration: 409421
loss: 1.0240685939788818,grad_norm: 0.8073644552972257, iteration: 409422
loss: 0.9966362714767456,grad_norm: 0.984294063993709, iteration: 409423
loss: 1.0200474262237549,grad_norm: 0.7570283283217956, iteration: 409424
loss: 0.9939519762992859,grad_norm: 0.750446005591559, iteration: 409425
loss: 0.9640332460403442,grad_norm: 0.7882958143220192, iteration: 409426
loss: 1.0353704690933228,grad_norm: 0.7217439353464126, iteration: 409427
loss: 0.9817824363708496,grad_norm: 0.6658042513084205, iteration: 409428
loss: 1.0300593376159668,grad_norm: 0.9999997049293357, iteration: 409429
loss: 1.0034584999084473,grad_norm: 0.6745938762027464, iteration: 409430
loss: 1.0137529373168945,grad_norm: 0.8486661103500216, iteration: 409431
loss: 1.0118567943572998,grad_norm: 0.8587651150440904, iteration: 409432
loss: 0.983589231967926,grad_norm: 0.8500835664470805, iteration: 409433
loss: 1.0228251218795776,grad_norm: 0.8489766732703062, iteration: 409434
loss: 0.9716450572013855,grad_norm: 0.741082344753524, iteration: 409435
loss: 1.1254425048828125,grad_norm: 0.9999992030231891, iteration: 409436
loss: 1.0361024141311646,grad_norm: 0.7703576853885659, iteration: 409437
loss: 0.9943878054618835,grad_norm: 0.7380578315219924, iteration: 409438
loss: 1.0079082250595093,grad_norm: 0.9999996963503831, iteration: 409439
loss: 1.0021014213562012,grad_norm: 0.9813606392995408, iteration: 409440
loss: 0.9829050302505493,grad_norm: 0.8126553040137026, iteration: 409441
loss: 0.9988231062889099,grad_norm: 0.7992758290457472, iteration: 409442
loss: 0.9702471494674683,grad_norm: 0.6799068923485052, iteration: 409443
loss: 0.9792096018791199,grad_norm: 0.8215476241887469, iteration: 409444
loss: 0.9857885837554932,grad_norm: 0.7320655577474692, iteration: 409445
loss: 0.9769063591957092,grad_norm: 0.7219446096932896, iteration: 409446
loss: 0.9914698600769043,grad_norm: 0.7184754502616157, iteration: 409447
loss: 1.0073192119598389,grad_norm: 0.7998075858592356, iteration: 409448
loss: 1.0315220355987549,grad_norm: 0.9999992354344407, iteration: 409449
loss: 0.9948026537895203,grad_norm: 0.8820038973913985, iteration: 409450
loss: 0.9975981712341309,grad_norm: 0.812503143642665, iteration: 409451
loss: 1.0163482427597046,grad_norm: 0.8684159453332462, iteration: 409452
loss: 0.9983696341514587,grad_norm: 0.778140453896287, iteration: 409453
loss: 0.9973184466362,grad_norm: 0.7516509869620449, iteration: 409454
loss: 0.9889600276947021,grad_norm: 0.7808459811139272, iteration: 409455
loss: 0.9831330180168152,grad_norm: 0.7808325521887386, iteration: 409456
loss: 0.9839896559715271,grad_norm: 0.7786378493521704, iteration: 409457
loss: 0.9930437207221985,grad_norm: 0.7505729333125845, iteration: 409458
loss: 0.9818786382675171,grad_norm: 0.9017748304042869, iteration: 409459
loss: 1.02362060546875,grad_norm: 0.8184071589120182, iteration: 409460
loss: 1.055444359779358,grad_norm: 0.8626680583030972, iteration: 409461
loss: 0.9667581915855408,grad_norm: 0.6780503982343192, iteration: 409462
loss: 0.9604697227478027,grad_norm: 0.9368890884215973, iteration: 409463
loss: 0.9723714590072632,grad_norm: 0.826083740906779, iteration: 409464
loss: 1.0098940134048462,grad_norm: 0.8647561833018799, iteration: 409465
loss: 0.9988671541213989,grad_norm: 0.7842679631093097, iteration: 409466
loss: 1.0218243598937988,grad_norm: 0.7519225963217324, iteration: 409467
loss: 0.999459981918335,grad_norm: 0.6585432688553555, iteration: 409468
loss: 1.1283719539642334,grad_norm: 0.999999728940451, iteration: 409469
loss: 1.011339545249939,grad_norm: 0.8054161675451468, iteration: 409470
loss: 1.0759795904159546,grad_norm: 0.9999995318990273, iteration: 409471
loss: 0.9955438375473022,grad_norm: 0.6680586959932715, iteration: 409472
loss: 1.0210398435592651,grad_norm: 0.8012716446422917, iteration: 409473
loss: 1.0465917587280273,grad_norm: 0.8870324946156986, iteration: 409474
loss: 1.0036041736602783,grad_norm: 0.7625937759140692, iteration: 409475
loss: 0.9980050921440125,grad_norm: 0.7617654053587378, iteration: 409476
loss: 1.025886058807373,grad_norm: 0.7471937360701048, iteration: 409477
loss: 1.0005028247833252,grad_norm: 0.6940177194784946, iteration: 409478
loss: 0.9959555268287659,grad_norm: 0.9999998587285289, iteration: 409479
loss: 1.0364141464233398,grad_norm: 0.8822887550126145, iteration: 409480
loss: 0.9822390675544739,grad_norm: 0.8110728409276452, iteration: 409481
loss: 0.9901648759841919,grad_norm: 0.9358400889968436, iteration: 409482
loss: 0.9622517228126526,grad_norm: 0.7460890664383216, iteration: 409483
loss: 1.0081695318222046,grad_norm: 0.8372600930630417, iteration: 409484
loss: 0.9911830425262451,grad_norm: 0.7824349843260392, iteration: 409485
loss: 1.0070997476577759,grad_norm: 0.8465704847909844, iteration: 409486
loss: 0.9845203161239624,grad_norm: 0.8042788391503867, iteration: 409487
loss: 1.0427641868591309,grad_norm: 0.8512596679065526, iteration: 409488
loss: 0.9835383892059326,grad_norm: 0.6964312011058021, iteration: 409489
loss: 1.005256175994873,grad_norm: 0.821393794857547, iteration: 409490
loss: 0.9959942102432251,grad_norm: 0.997659026322844, iteration: 409491
loss: 0.9824501872062683,grad_norm: 0.70144691539972, iteration: 409492
loss: 1.001328945159912,grad_norm: 0.7808218995374273, iteration: 409493
loss: 1.1089333295822144,grad_norm: 0.9999993044111243, iteration: 409494
loss: 1.0048819780349731,grad_norm: 0.8355430041701369, iteration: 409495
loss: 0.9815809726715088,grad_norm: 0.8433668637244042, iteration: 409496
loss: 1.0006228685379028,grad_norm: 0.7689224033516662, iteration: 409497
loss: 1.012172818183899,grad_norm: 0.8853019357522288, iteration: 409498
loss: 1.0135343074798584,grad_norm: 0.8484083059566871, iteration: 409499
loss: 1.0033082962036133,grad_norm: 0.8434569750845335, iteration: 409500
loss: 1.013702392578125,grad_norm: 0.8315771453705022, iteration: 409501
loss: 1.0217183828353882,grad_norm: 0.9787198090117013, iteration: 409502
loss: 1.038022756576538,grad_norm: 0.7471714568595558, iteration: 409503
loss: 1.065427303314209,grad_norm: 0.6929048592986559, iteration: 409504
loss: 1.0373094081878662,grad_norm: 0.8318785802506201, iteration: 409505
loss: 0.9874022006988525,grad_norm: 0.9709971241765887, iteration: 409506
loss: 0.9946615695953369,grad_norm: 0.8165964229783392, iteration: 409507
loss: 1.0354375839233398,grad_norm: 0.9999996427918153, iteration: 409508
loss: 1.113812804222107,grad_norm: 0.9999998704386729, iteration: 409509
loss: 0.9987267851829529,grad_norm: 0.6658601147104546, iteration: 409510
loss: 0.9930443167686462,grad_norm: 0.7371975801583219, iteration: 409511
loss: 0.990164041519165,grad_norm: 0.9079061590040478, iteration: 409512
loss: 0.9949226975440979,grad_norm: 0.9207441667401408, iteration: 409513
loss: 0.9698055982589722,grad_norm: 0.8241188229769792, iteration: 409514
loss: 1.066774606704712,grad_norm: 0.9999997856326683, iteration: 409515
loss: 1.0227667093276978,grad_norm: 0.8806833312294022, iteration: 409516
loss: 0.9808545112609863,grad_norm: 0.7336235683832596, iteration: 409517
loss: 1.0033127069473267,grad_norm: 0.8684759180881514, iteration: 409518
loss: 1.0056254863739014,grad_norm: 0.832325724310625, iteration: 409519
loss: 1.0557559728622437,grad_norm: 0.6769236117434759, iteration: 409520
loss: 0.9801105856895447,grad_norm: 0.8219257899115531, iteration: 409521
loss: 1.0251622200012207,grad_norm: 0.8318967957570707, iteration: 409522
loss: 1.0153875350952148,grad_norm: 0.8294278988200039, iteration: 409523
loss: 1.0732513666152954,grad_norm: 0.81708530216395, iteration: 409524
loss: 1.0519042015075684,grad_norm: 1.0000000312936708, iteration: 409525
loss: 1.0099561214447021,grad_norm: 0.8178516577595928, iteration: 409526
loss: 1.006649374961853,grad_norm: 0.7585040067142815, iteration: 409527
loss: 1.036906123161316,grad_norm: 0.9046166524870557, iteration: 409528
loss: 1.0177379846572876,grad_norm: 0.9075110513225452, iteration: 409529
loss: 1.0573091506958008,grad_norm: 0.9999993982324379, iteration: 409530
loss: 1.008390188217163,grad_norm: 0.9224124340309636, iteration: 409531
loss: 1.0067033767700195,grad_norm: 0.7932937635137011, iteration: 409532
loss: 0.9895039796829224,grad_norm: 0.7642002687830581, iteration: 409533
loss: 1.0080126523971558,grad_norm: 0.9179585772351379, iteration: 409534
loss: 1.0319916009902954,grad_norm: 0.9999993423221903, iteration: 409535
loss: 1.047607421875,grad_norm: 0.9999995093737354, iteration: 409536
loss: 0.995771586894989,grad_norm: 0.8586264772067741, iteration: 409537
loss: 1.0037860870361328,grad_norm: 0.7120604432836715, iteration: 409538
loss: 1.0016404390335083,grad_norm: 0.8049340143082309, iteration: 409539
loss: 1.0662442445755005,grad_norm: 0.68161834959902, iteration: 409540
loss: 1.0506219863891602,grad_norm: 0.840294688566694, iteration: 409541
loss: 1.0186792612075806,grad_norm: 0.8225819675428925, iteration: 409542
loss: 1.106451392173767,grad_norm: 0.8141133507003065, iteration: 409543
loss: 1.0081350803375244,grad_norm: 0.7779652134378755, iteration: 409544
loss: 1.0401424169540405,grad_norm: 0.9668071639390268, iteration: 409545
loss: 1.026883602142334,grad_norm: 0.9999991496091398, iteration: 409546
loss: 1.0245121717453003,grad_norm: 0.8788338940110437, iteration: 409547
loss: 0.9740607738494873,grad_norm: 0.9999998247133377, iteration: 409548
loss: 1.0105557441711426,grad_norm: 0.749328027148681, iteration: 409549
loss: 1.0877453088760376,grad_norm: 0.9999995882922148, iteration: 409550
loss: 1.0341130495071411,grad_norm: 0.922878041799791, iteration: 409551
loss: 1.013173222541809,grad_norm: 0.9629923738944823, iteration: 409552
loss: 1.0396511554718018,grad_norm: 0.9999994011741261, iteration: 409553
loss: 0.9823490977287292,grad_norm: 0.9999992912943855, iteration: 409554
loss: 0.9876782894134521,grad_norm: 0.9999990558826807, iteration: 409555
loss: 1.0276044607162476,grad_norm: 0.999999008497275, iteration: 409556
loss: 1.0833748579025269,grad_norm: 0.9999990136708078, iteration: 409557
loss: 1.0441665649414062,grad_norm: 0.81994967043554, iteration: 409558
loss: 1.0107601881027222,grad_norm: 0.726671250867925, iteration: 409559
loss: 0.9727117419242859,grad_norm: 0.8274030268980341, iteration: 409560
loss: 1.1092264652252197,grad_norm: 0.9999997918685619, iteration: 409561
loss: 0.9855313301086426,grad_norm: 0.9999991126775494, iteration: 409562
loss: 1.0545600652694702,grad_norm: 0.999999958722192, iteration: 409563
loss: 0.981579065322876,grad_norm: 0.883517121587052, iteration: 409564
loss: 1.0262365341186523,grad_norm: 0.9999994285107155, iteration: 409565
loss: 0.9734997153282166,grad_norm: 0.7928434895814839, iteration: 409566
loss: 1.0256645679473877,grad_norm: 0.7432912213634544, iteration: 409567
loss: 1.0619736909866333,grad_norm: 0.9641958162434665, iteration: 409568
loss: 1.0231858491897583,grad_norm: 0.853207025772725, iteration: 409569
loss: 1.0160714387893677,grad_norm: 0.8053658125961406, iteration: 409570
loss: 1.022140383720398,grad_norm: 0.7422636881455885, iteration: 409571
loss: 0.9830079674720764,grad_norm: 0.9999992276407421, iteration: 409572
loss: 1.0038243532180786,grad_norm: 0.9999991891296853, iteration: 409573
loss: 0.9972545504570007,grad_norm: 0.8513708160566639, iteration: 409574
loss: 0.9939742684364319,grad_norm: 0.7837667135102279, iteration: 409575
loss: 1.0347975492477417,grad_norm: 0.9561697221767255, iteration: 409576
loss: 0.9692694544792175,grad_norm: 0.876431737056908, iteration: 409577
loss: 1.0026377439498901,grad_norm: 0.7268951292644881, iteration: 409578
loss: 1.0449085235595703,grad_norm: 0.9566768186869119, iteration: 409579
loss: 0.9970398545265198,grad_norm: 0.7756207690556468, iteration: 409580
loss: 0.9925603270530701,grad_norm: 0.8227851087070007, iteration: 409581
loss: 0.9846186637878418,grad_norm: 0.9072178779005498, iteration: 409582
loss: 1.013748288154602,grad_norm: 0.7977374501038605, iteration: 409583
loss: 1.013678789138794,grad_norm: 0.9999992708861215, iteration: 409584
loss: 0.9751821160316467,grad_norm: 0.8643552957293537, iteration: 409585
loss: 1.0249230861663818,grad_norm: 0.9999998705425552, iteration: 409586
loss: 0.9800459146499634,grad_norm: 0.8696767824945366, iteration: 409587
loss: 0.9955196976661682,grad_norm: 0.9999992375613341, iteration: 409588
loss: 1.0407975912094116,grad_norm: 0.7947195181110782, iteration: 409589
loss: 1.0871282815933228,grad_norm: 0.9999998730187274, iteration: 409590
loss: 0.9855826497077942,grad_norm: 0.8070705349807077, iteration: 409591
loss: 0.9961823225021362,grad_norm: 0.6513183703390802, iteration: 409592
loss: 1.0294755697250366,grad_norm: 0.999999464011972, iteration: 409593
loss: 1.0162848234176636,grad_norm: 0.9999998951950599, iteration: 409594
loss: 1.0013720989227295,grad_norm: 0.9999993401148408, iteration: 409595
loss: 1.0011799335479736,grad_norm: 0.6477648322412354, iteration: 409596
loss: 1.062461495399475,grad_norm: 0.8439395039880684, iteration: 409597
loss: 1.0204068422317505,grad_norm: 0.6751752843226297, iteration: 409598
loss: 1.0255106687545776,grad_norm: 0.955262069283871, iteration: 409599
loss: 1.1115390062332153,grad_norm: 0.7943182246273948, iteration: 409600
loss: 1.0403633117675781,grad_norm: 0.9999995062006884, iteration: 409601
loss: 0.9808936715126038,grad_norm: 0.8027436569103767, iteration: 409602
loss: 0.980024516582489,grad_norm: 0.8393068040840002, iteration: 409603
loss: 1.0948097705841064,grad_norm: 0.9999992933731877, iteration: 409604
loss: 1.0026684999465942,grad_norm: 0.6979937559088604, iteration: 409605
loss: 1.0378879308700562,grad_norm: 0.7764836704565208, iteration: 409606
loss: 1.0505527257919312,grad_norm: 0.8713555885707599, iteration: 409607
loss: 1.012164831161499,grad_norm: 0.6670217275782188, iteration: 409608
loss: 1.0586453676223755,grad_norm: 0.9999994947434353, iteration: 409609
loss: 0.9573436379432678,grad_norm: 0.8377646248164627, iteration: 409610
loss: 1.1455951929092407,grad_norm: 0.9999991658479382, iteration: 409611
loss: 1.0670890808105469,grad_norm: 0.9999997493578201, iteration: 409612
loss: 1.0864979028701782,grad_norm: 0.9253130959766828, iteration: 409613
loss: 1.0608021020889282,grad_norm: 0.9999994829490777, iteration: 409614
loss: 0.9875596165657043,grad_norm: 0.7460806470406715, iteration: 409615
loss: 0.9686570167541504,grad_norm: 0.8272989768915455, iteration: 409616
loss: 1.0039087533950806,grad_norm: 0.976480566115313, iteration: 409617
loss: 0.9869984984397888,grad_norm: 0.7044376269364148, iteration: 409618
loss: 0.9831885695457458,grad_norm: 0.999999476966553, iteration: 409619
loss: 1.0109537839889526,grad_norm: 0.7585381508870224, iteration: 409620
loss: 0.9848806858062744,grad_norm: 0.9999990445842719, iteration: 409621
loss: 1.0185366868972778,grad_norm: 0.8469647308974777, iteration: 409622
loss: 1.012013554573059,grad_norm: 0.7264935648237693, iteration: 409623
loss: 0.9784812331199646,grad_norm: 0.6660661499135733, iteration: 409624
loss: 1.0684568881988525,grad_norm: 0.999999298626667, iteration: 409625
loss: 1.0026642084121704,grad_norm: 0.9999991905219076, iteration: 409626
loss: 1.029272437095642,grad_norm: 0.9331197449738667, iteration: 409627
loss: 1.0846498012542725,grad_norm: 0.7790718201354931, iteration: 409628
loss: 1.0770666599273682,grad_norm: 0.9999991372182925, iteration: 409629
loss: 0.9996893405914307,grad_norm: 0.9178175737968105, iteration: 409630
loss: 0.9483004808425903,grad_norm: 0.8389534528496149, iteration: 409631
loss: 1.0800451040267944,grad_norm: 0.999999874887143, iteration: 409632
loss: 1.0316754579544067,grad_norm: 0.9999997636439315, iteration: 409633
loss: 0.9679045081138611,grad_norm: 0.7900938277737766, iteration: 409634
loss: 0.9510435461997986,grad_norm: 0.6522716212412532, iteration: 409635
loss: 0.9967605471611023,grad_norm: 0.8167394420754431, iteration: 409636
loss: 1.065941572189331,grad_norm: 0.9351874886232358, iteration: 409637
loss: 1.0305734872817993,grad_norm: 0.920244930058601, iteration: 409638
loss: 0.9846987724304199,grad_norm: 0.8703998237393459, iteration: 409639
loss: 1.011857271194458,grad_norm: 0.9835282135345013, iteration: 409640
loss: 0.9744532704353333,grad_norm: 0.848877938806227, iteration: 409641
loss: 1.0692110061645508,grad_norm: 0.826931547347458, iteration: 409642
loss: 0.9941627979278564,grad_norm: 0.7854109630312605, iteration: 409643
loss: 1.0082112550735474,grad_norm: 0.9999994784556437, iteration: 409644
loss: 1.0049760341644287,grad_norm: 0.7382858727133541, iteration: 409645
loss: 0.9334021210670471,grad_norm: 0.9288865731417993, iteration: 409646
loss: 0.9911934733390808,grad_norm: 0.9999996954501079, iteration: 409647
loss: 1.0483174324035645,grad_norm: 0.8443529872463269, iteration: 409648
loss: 1.0177819728851318,grad_norm: 0.9999991513626866, iteration: 409649
loss: 1.0110111236572266,grad_norm: 0.7770701778260584, iteration: 409650
loss: 0.9657677412033081,grad_norm: 0.885198031260684, iteration: 409651
loss: 0.9648536443710327,grad_norm: 0.9060543589592096, iteration: 409652
loss: 1.0697574615478516,grad_norm: 0.9191915519688746, iteration: 409653
loss: 1.0214396715164185,grad_norm: 0.8723805587130763, iteration: 409654
loss: 0.9929330348968506,grad_norm: 0.6265693954637744, iteration: 409655
loss: 0.9984618425369263,grad_norm: 0.9999995123336178, iteration: 409656
loss: 1.0284385681152344,grad_norm: 0.813793759533739, iteration: 409657
loss: 1.0374658107757568,grad_norm: 0.7539395096243718, iteration: 409658
loss: 0.9532012939453125,grad_norm: 0.7314004222603461, iteration: 409659
loss: 1.088273048400879,grad_norm: 0.95590254072993, iteration: 409660
loss: 0.9885478019714355,grad_norm: 0.7976576694531295, iteration: 409661
loss: 1.049787163734436,grad_norm: 0.7555346788427518, iteration: 409662
loss: 0.9893743395805359,grad_norm: 0.8179191778915404, iteration: 409663
loss: 1.0076218843460083,grad_norm: 0.7379619207619038, iteration: 409664
loss: 1.0423681735992432,grad_norm: 0.9421216385520219, iteration: 409665
loss: 1.0220223665237427,grad_norm: 0.9999997978810279, iteration: 409666
loss: 1.0101909637451172,grad_norm: 0.8973126311884982, iteration: 409667
loss: 1.0799765586853027,grad_norm: 0.8110494589351877, iteration: 409668
loss: 1.0073294639587402,grad_norm: 0.8672659222867611, iteration: 409669
loss: 1.0295523405075073,grad_norm: 0.999999497884211, iteration: 409670
loss: 1.0054062604904175,grad_norm: 0.6759287911046179, iteration: 409671
loss: 1.0205788612365723,grad_norm: 0.8367908572846272, iteration: 409672
loss: 1.0177961587905884,grad_norm: 0.9999991763549475, iteration: 409673
loss: 1.0472482442855835,grad_norm: 0.9999992097836924, iteration: 409674
loss: 1.0089236497879028,grad_norm: 0.7716583376857142, iteration: 409675
loss: 1.009150505065918,grad_norm: 0.7242354675334643, iteration: 409676
loss: 0.994372546672821,grad_norm: 0.5921905216344155, iteration: 409677
loss: 0.9988054633140564,grad_norm: 0.8070910664532084, iteration: 409678
loss: 0.9889003038406372,grad_norm: 0.6875449481231105, iteration: 409679
loss: 1.0126910209655762,grad_norm: 0.7682402020808099, iteration: 409680
loss: 1.0106327533721924,grad_norm: 0.8520753837655894, iteration: 409681
loss: 1.0574733018875122,grad_norm: 0.9598540758376622, iteration: 409682
loss: 0.9533971548080444,grad_norm: 0.7504488807153215, iteration: 409683
loss: 1.0140880346298218,grad_norm: 0.7037321362766548, iteration: 409684
loss: 0.9883394837379456,grad_norm: 0.7682938164548924, iteration: 409685
loss: 1.006733775138855,grad_norm: 0.6680676334429376, iteration: 409686
loss: 1.0535684823989868,grad_norm: 0.9999992152855366, iteration: 409687
loss: 1.0086172819137573,grad_norm: 0.8484030349613485, iteration: 409688
loss: 1.0168815851211548,grad_norm: 0.9973080519708953, iteration: 409689
loss: 1.018629789352417,grad_norm: 0.6977791478809269, iteration: 409690
loss: 1.0299553871154785,grad_norm: 0.9999992697194751, iteration: 409691
loss: 1.0621963739395142,grad_norm: 0.999999699390986, iteration: 409692
loss: 1.0513906478881836,grad_norm: 0.9556788755443569, iteration: 409693
loss: 1.0454903841018677,grad_norm: 0.9999995562925852, iteration: 409694
loss: 0.9734542369842529,grad_norm: 0.6728822893133628, iteration: 409695
loss: 0.981808602809906,grad_norm: 0.7656507265789267, iteration: 409696
loss: 1.032288670539856,grad_norm: 0.8341348714151569, iteration: 409697
loss: 0.9776125550270081,grad_norm: 0.7627989991637331, iteration: 409698
loss: 0.9934564828872681,grad_norm: 0.9999992908209985, iteration: 409699
loss: 1.0660220384597778,grad_norm: 0.787541981292855, iteration: 409700
loss: 0.9770036935806274,grad_norm: 0.9999991049994946, iteration: 409701
loss: 0.9804028868675232,grad_norm: 0.7860908329093513, iteration: 409702
loss: 0.9642065763473511,grad_norm: 0.99856717967942, iteration: 409703
loss: 0.9848332405090332,grad_norm: 0.7581203159044315, iteration: 409704
loss: 1.0071723461151123,grad_norm: 0.9006914544260819, iteration: 409705
loss: 1.0034695863723755,grad_norm: 0.9999992046258638, iteration: 409706
loss: 1.0276122093200684,grad_norm: 0.7137374600703106, iteration: 409707
loss: 1.0011963844299316,grad_norm: 0.7483598038670211, iteration: 409708
loss: 1.0376609563827515,grad_norm: 0.6584380005673308, iteration: 409709
loss: 1.0508060455322266,grad_norm: 0.7689815178290814, iteration: 409710
loss: 0.9765318036079407,grad_norm: 0.8568249218338834, iteration: 409711
loss: 0.9867436289787292,grad_norm: 0.728223212347947, iteration: 409712
loss: 1.0013784170150757,grad_norm: 0.9319366311332378, iteration: 409713
loss: 0.9759129881858826,grad_norm: 0.773933292141637, iteration: 409714
loss: 0.9826865792274475,grad_norm: 0.7066587211088675, iteration: 409715
loss: 0.9882657527923584,grad_norm: 0.8019492544563434, iteration: 409716
loss: 0.9917118549346924,grad_norm: 0.7990257550207751, iteration: 409717
loss: 1.0806422233581543,grad_norm: 0.813536697099526, iteration: 409718
loss: 1.0000988245010376,grad_norm: 0.6740772958559081, iteration: 409719
loss: 0.9746431112289429,grad_norm: 0.9255856153889934, iteration: 409720
loss: 0.9734999537467957,grad_norm: 0.7732804212344925, iteration: 409721
loss: 0.9778976440429688,grad_norm: 0.8692455937727818, iteration: 409722
loss: 0.9952130317687988,grad_norm: 0.7597708346856921, iteration: 409723
loss: 1.0065584182739258,grad_norm: 0.6342953440993664, iteration: 409724
loss: 0.9902740120887756,grad_norm: 0.7690152813790363, iteration: 409725
loss: 1.0482723712921143,grad_norm: 0.7807479311182829, iteration: 409726
loss: 1.0545860528945923,grad_norm: 0.9999990910307901, iteration: 409727
loss: 1.1481674909591675,grad_norm: 0.9819193397513404, iteration: 409728
loss: 0.9852277636528015,grad_norm: 0.9999990827440389, iteration: 409729
loss: 0.9743576645851135,grad_norm: 0.6815539556963917, iteration: 409730
loss: 1.0087668895721436,grad_norm: 0.7783195787494598, iteration: 409731
loss: 1.0085101127624512,grad_norm: 0.8909730885649743, iteration: 409732
loss: 1.021134376525879,grad_norm: 0.7394783706710276, iteration: 409733
loss: 1.0768007040023804,grad_norm: 0.9095117277230006, iteration: 409734
loss: 1.0450282096862793,grad_norm: 0.7429930146205831, iteration: 409735
loss: 0.9916031956672668,grad_norm: 0.6974276669209314, iteration: 409736
loss: 1.0091147422790527,grad_norm: 0.7289259449691133, iteration: 409737
loss: 1.0638878345489502,grad_norm: 0.6943408366451291, iteration: 409738
loss: 0.9984203577041626,grad_norm: 0.7796666355659243, iteration: 409739
loss: 1.017566442489624,grad_norm: 0.6560117907847324, iteration: 409740
loss: 0.9814777374267578,grad_norm: 0.9638917032178619, iteration: 409741
loss: 1.037864089012146,grad_norm: 0.9999995335110751, iteration: 409742
loss: 1.0012516975402832,grad_norm: 0.8692095960169912, iteration: 409743
loss: 1.0150129795074463,grad_norm: 0.9293228087735976, iteration: 409744
loss: 1.002018690109253,grad_norm: 0.6895471880241912, iteration: 409745
loss: 0.9799131751060486,grad_norm: 0.790031499180002, iteration: 409746
loss: 0.960601806640625,grad_norm: 0.6878206242993173, iteration: 409747
loss: 0.9808608293533325,grad_norm: 0.8462696750819656, iteration: 409748
loss: 0.9969083070755005,grad_norm: 0.7595435528836648, iteration: 409749
loss: 1.0193696022033691,grad_norm: 0.9999993491017681, iteration: 409750
loss: 0.9694355130195618,grad_norm: 0.7522933091699583, iteration: 409751
loss: 1.068055272102356,grad_norm: 0.9999993285544199, iteration: 409752
loss: 1.067030906677246,grad_norm: 0.6890331051001928, iteration: 409753
loss: 0.9972913861274719,grad_norm: 0.7432462868569584, iteration: 409754
loss: 1.0069756507873535,grad_norm: 0.7888600805194742, iteration: 409755
loss: 1.0143239498138428,grad_norm: 0.9999991431580337, iteration: 409756
loss: 0.9968644380569458,grad_norm: 0.7746543902156195, iteration: 409757
loss: 0.9858300089836121,grad_norm: 0.6292886792696225, iteration: 409758
loss: 1.0284157991409302,grad_norm: 0.8267070940992448, iteration: 409759
loss: 0.9817421436309814,grad_norm: 0.7823098651978803, iteration: 409760
loss: 1.0572131872177124,grad_norm: 0.9999992889721122, iteration: 409761
loss: 1.1835004091262817,grad_norm: 0.9999999190836809, iteration: 409762
loss: 0.964712917804718,grad_norm: 0.7886085055046685, iteration: 409763
loss: 1.0577020645141602,grad_norm: 0.7431728137430356, iteration: 409764
loss: 1.013344407081604,grad_norm: 0.7619840584603327, iteration: 409765
loss: 1.0120849609375,grad_norm: 0.7152756110881318, iteration: 409766
loss: 1.0870822668075562,grad_norm: 0.7717217310256943, iteration: 409767
loss: 1.0739762783050537,grad_norm: 0.915729734401038, iteration: 409768
loss: 1.0219340324401855,grad_norm: 0.8127814231757373, iteration: 409769
loss: 1.001334309577942,grad_norm: 0.715463510370847, iteration: 409770
loss: 0.983316957950592,grad_norm: 0.7418060220015573, iteration: 409771
loss: 0.9873827695846558,grad_norm: 0.7861001842478524, iteration: 409772
loss: 1.0066882371902466,grad_norm: 0.7820681728302029, iteration: 409773
loss: 0.9881904721260071,grad_norm: 0.7770334760251801, iteration: 409774
loss: 0.9582720994949341,grad_norm: 0.9061298553149865, iteration: 409775
loss: 1.0207273960113525,grad_norm: 0.7414765338964778, iteration: 409776
loss: 1.0177055597305298,grad_norm: 0.7107889642785069, iteration: 409777
loss: 1.044045329093933,grad_norm: 0.8515615069348036, iteration: 409778
loss: 0.9712943434715271,grad_norm: 0.7560261608930577, iteration: 409779
loss: 0.9964150190353394,grad_norm: 0.7390478955185403, iteration: 409780
loss: 1.070711374282837,grad_norm: 0.992823923464781, iteration: 409781
loss: 1.0291887521743774,grad_norm: 0.8282348059063274, iteration: 409782
loss: 0.9975874423980713,grad_norm: 0.864419793449534, iteration: 409783
loss: 1.0525184869766235,grad_norm: 0.9999992164190016, iteration: 409784
loss: 1.0079132318496704,grad_norm: 0.8207730655726103, iteration: 409785
loss: 0.9947534203529358,grad_norm: 0.7775366928629273, iteration: 409786
loss: 0.991163969039917,grad_norm: 0.8228509921953286, iteration: 409787
loss: 1.0180706977844238,grad_norm: 0.8729735319025342, iteration: 409788
loss: 0.991558313369751,grad_norm: 0.9999992092125203, iteration: 409789
loss: 0.9755352139472961,grad_norm: 0.736041906598743, iteration: 409790
loss: 0.9878291487693787,grad_norm: 0.7098156436186275, iteration: 409791
loss: 0.979691743850708,grad_norm: 0.8399816105561082, iteration: 409792
loss: 1.0233174562454224,grad_norm: 0.6766056782750925, iteration: 409793
loss: 1.017999529838562,grad_norm: 0.7191740733778917, iteration: 409794
loss: 0.9799091219902039,grad_norm: 0.7009771894153537, iteration: 409795
loss: 1.071083664894104,grad_norm: 0.9999994775237765, iteration: 409796
loss: 0.9701403975486755,grad_norm: 0.7677906782661509, iteration: 409797
loss: 1.01455557346344,grad_norm: 0.683392009619215, iteration: 409798
loss: 1.0314209461212158,grad_norm: 0.7001504860029399, iteration: 409799
loss: 0.9868762493133545,grad_norm: 0.9999989955961265, iteration: 409800
loss: 1.0000358819961548,grad_norm: 0.9844666610839368, iteration: 409801
loss: 1.0127032995224,grad_norm: 0.6722784847886717, iteration: 409802
loss: 0.9981250762939453,grad_norm: 0.8023999437119836, iteration: 409803
loss: 1.0040748119354248,grad_norm: 0.7851862303159154, iteration: 409804
loss: 1.0330666303634644,grad_norm: 0.7482293329676661, iteration: 409805
loss: 1.0386968851089478,grad_norm: 0.7611404846324793, iteration: 409806
loss: 0.9981957674026489,grad_norm: 0.8410038142543524, iteration: 409807
loss: 0.991020917892456,grad_norm: 0.679678424164856, iteration: 409808
loss: 0.9941452741622925,grad_norm: 0.8248617746117686, iteration: 409809
loss: 0.9856610298156738,grad_norm: 0.7980839282129484, iteration: 409810
loss: 1.0226975679397583,grad_norm: 0.8069017393615832, iteration: 409811
loss: 0.9862987399101257,grad_norm: 0.7806992040920985, iteration: 409812
loss: 1.0101641416549683,grad_norm: 0.9999991057687657, iteration: 409813
loss: 0.9825506210327148,grad_norm: 0.7918490713870049, iteration: 409814
loss: 0.996714174747467,grad_norm: 0.7234867879979469, iteration: 409815
loss: 1.0056946277618408,grad_norm: 0.7693864578364937, iteration: 409816
loss: 1.0442728996276855,grad_norm: 0.9999998536317624, iteration: 409817
loss: 1.0418782234191895,grad_norm: 0.7977292713850931, iteration: 409818
loss: 1.0250108242034912,grad_norm: 0.8443499070460175, iteration: 409819
loss: 1.035387396812439,grad_norm: 0.8656533865534007, iteration: 409820
loss: 1.0157592296600342,grad_norm: 0.7468434324714416, iteration: 409821
loss: 0.9911611080169678,grad_norm: 0.8199309779847103, iteration: 409822
loss: 0.9913451671600342,grad_norm: 0.9999999002635988, iteration: 409823
loss: 1.0292519330978394,grad_norm: 0.7200252076598953, iteration: 409824
loss: 1.0427632331848145,grad_norm: 0.7672534411242586, iteration: 409825
loss: 1.0009427070617676,grad_norm: 0.9560965550421094, iteration: 409826
loss: 1.0050837993621826,grad_norm: 0.6762981349256261, iteration: 409827
loss: 1.0003881454467773,grad_norm: 0.7808962008048419, iteration: 409828
loss: 0.9754688143730164,grad_norm: 0.7541507743003953, iteration: 409829
loss: 1.0604757070541382,grad_norm: 0.7132769564852005, iteration: 409830
loss: 0.998821496963501,grad_norm: 0.802855399446046, iteration: 409831
loss: 0.9986096024513245,grad_norm: 0.8136130756640391, iteration: 409832
loss: 1.0419378280639648,grad_norm: 0.9999997116822683, iteration: 409833
loss: 0.9674966335296631,grad_norm: 0.9015855343411128, iteration: 409834
loss: 0.9853038191795349,grad_norm: 0.772482866187982, iteration: 409835
loss: 1.0182069540023804,grad_norm: 0.8880473553499961, iteration: 409836
loss: 0.9947560429573059,grad_norm: 0.7030258164307278, iteration: 409837
loss: 0.9935886859893799,grad_norm: 0.7758374409585714, iteration: 409838
loss: 1.0134693384170532,grad_norm: 0.7192415270915803, iteration: 409839
loss: 1.030318021774292,grad_norm: 0.9999998985767546, iteration: 409840
loss: 1.025903582572937,grad_norm: 0.8684270107919602, iteration: 409841
loss: 0.9618892669677734,grad_norm: 0.6852350928771993, iteration: 409842
loss: 1.0904759168624878,grad_norm: 0.8593318012336371, iteration: 409843
loss: 0.9920450448989868,grad_norm: 0.6137296724827774, iteration: 409844
loss: 1.0035048723220825,grad_norm: 0.7959739542829537, iteration: 409845
loss: 0.9985844492912292,grad_norm: 0.7827706877989964, iteration: 409846
loss: 0.9985175132751465,grad_norm: 0.7189189735934514, iteration: 409847
loss: 1.043758511543274,grad_norm: 0.9999993056706136, iteration: 409848
loss: 1.0030022859573364,grad_norm: 0.6884200996393661, iteration: 409849
loss: 0.9967392683029175,grad_norm: 0.7114363765876158, iteration: 409850
loss: 1.0967192649841309,grad_norm: 0.9491947276803886, iteration: 409851
loss: 1.0068955421447754,grad_norm: 0.8863160318859752, iteration: 409852
loss: 0.9998754262924194,grad_norm: 0.8262621296968884, iteration: 409853
loss: 0.9558542966842651,grad_norm: 0.6739474820658243, iteration: 409854
loss: 1.0110838413238525,grad_norm: 0.8893429585250726, iteration: 409855
loss: 0.9901086091995239,grad_norm: 0.785844966452247, iteration: 409856
loss: 1.0093822479248047,grad_norm: 0.7330990751542887, iteration: 409857
loss: 1.0173646211624146,grad_norm: 0.6661155299049942, iteration: 409858
loss: 1.0114697217941284,grad_norm: 0.7189789999536121, iteration: 409859
loss: 1.0047385692596436,grad_norm: 0.7957529977758931, iteration: 409860
loss: 1.0096499919891357,grad_norm: 0.7772177532556802, iteration: 409861
loss: 1.0049200057983398,grad_norm: 0.814512630214817, iteration: 409862
loss: 1.0774962902069092,grad_norm: 0.9375247398514434, iteration: 409863
loss: 1.0364583730697632,grad_norm: 0.8698089459321328, iteration: 409864
loss: 0.9920918345451355,grad_norm: 0.6678565649941834, iteration: 409865
loss: 0.9653579592704773,grad_norm: 0.8622732501289805, iteration: 409866
loss: 1.0094417333602905,grad_norm: 0.6831590772911126, iteration: 409867
loss: 1.0172027349472046,grad_norm: 0.8751817538646595, iteration: 409868
loss: 0.9928926229476929,grad_norm: 0.7743138870690512, iteration: 409869
loss: 0.996904730796814,grad_norm: 0.8241797220031569, iteration: 409870
loss: 1.025697946548462,grad_norm: 0.7863597898849429, iteration: 409871
loss: 1.0187410116195679,grad_norm: 0.9999993549083996, iteration: 409872
loss: 0.9773961901664734,grad_norm: 0.9819863033624421, iteration: 409873
loss: 0.9812500476837158,grad_norm: 0.7927364624986836, iteration: 409874
loss: 1.0408145189285278,grad_norm: 0.7979330813302983, iteration: 409875
loss: 0.9973256587982178,grad_norm: 0.9999992870186085, iteration: 409876
loss: 1.0416041612625122,grad_norm: 0.9999998095487699, iteration: 409877
loss: 1.0054212808609009,grad_norm: 0.8401729986466214, iteration: 409878
loss: 1.0013453960418701,grad_norm: 0.9999990999887277, iteration: 409879
loss: 0.9928061366081238,grad_norm: 0.7671536385266797, iteration: 409880
loss: 0.9900505542755127,grad_norm: 0.6224119571516041, iteration: 409881
loss: 1.0463862419128418,grad_norm: 0.9999990626775759, iteration: 409882
loss: 1.0220811367034912,grad_norm: 0.8319332957430573, iteration: 409883
loss: 1.0332262516021729,grad_norm: 0.9121114764361771, iteration: 409884
loss: 0.9460451602935791,grad_norm: 0.694476631217162, iteration: 409885
loss: 1.0066299438476562,grad_norm: 0.9707746216676761, iteration: 409886
loss: 1.070762038230896,grad_norm: 0.9999991223218258, iteration: 409887
loss: 1.0203090906143188,grad_norm: 0.7153589611537068, iteration: 409888
loss: 0.9862663149833679,grad_norm: 0.9034157095717888, iteration: 409889
loss: 1.0099029541015625,grad_norm: 0.7935442378962102, iteration: 409890
loss: 0.9736546277999878,grad_norm: 0.8512598384103389, iteration: 409891
loss: 0.9912395477294922,grad_norm: 0.6303169740907967, iteration: 409892
loss: 0.9577147960662842,grad_norm: 0.8210916870280786, iteration: 409893
loss: 1.0296183824539185,grad_norm: 0.9578526015849592, iteration: 409894
loss: 1.0012166500091553,grad_norm: 0.9999999406108574, iteration: 409895
loss: 0.9664986729621887,grad_norm: 0.9058334886302177, iteration: 409896
loss: 1.0512772798538208,grad_norm: 0.8249733048068366, iteration: 409897
loss: 0.9650421142578125,grad_norm: 0.8308808710867636, iteration: 409898
loss: 1.0063109397888184,grad_norm: 0.7620565995367218, iteration: 409899
loss: 0.9966190457344055,grad_norm: 0.8794336654947201, iteration: 409900
loss: 1.0226680040359497,grad_norm: 0.7206031074279633, iteration: 409901
loss: 0.9969502687454224,grad_norm: 0.7255669483801196, iteration: 409902
loss: 0.9873524308204651,grad_norm: 0.8626194634381285, iteration: 409903
loss: 1.0019655227661133,grad_norm: 0.7366445538397033, iteration: 409904
loss: 0.9817230701446533,grad_norm: 0.855874739855767, iteration: 409905
loss: 0.9924067258834839,grad_norm: 0.6196269745659239, iteration: 409906
loss: 1.0140060186386108,grad_norm: 0.9999993690568784, iteration: 409907
loss: 1.0412741899490356,grad_norm: 0.8323935690186836, iteration: 409908
loss: 1.0250118970870972,grad_norm: 0.7497560963690605, iteration: 409909
loss: 1.0614337921142578,grad_norm: 0.7578206545352827, iteration: 409910
loss: 1.0086313486099243,grad_norm: 0.7970474200592914, iteration: 409911
loss: 1.005126953125,grad_norm: 0.9888930110913687, iteration: 409912
loss: 1.0265921354293823,grad_norm: 0.9866438424864129, iteration: 409913
loss: 1.0126279592514038,grad_norm: 0.7631693529365355, iteration: 409914
loss: 1.0188767910003662,grad_norm: 0.9999989820870518, iteration: 409915
loss: 0.9926245808601379,grad_norm: 0.8749914938956657, iteration: 409916
loss: 1.0101944208145142,grad_norm: 0.9999990965768735, iteration: 409917
loss: 1.0001473426818848,grad_norm: 0.7052821139214207, iteration: 409918
loss: 0.992459237575531,grad_norm: 0.6437357526539286, iteration: 409919
loss: 1.0010802745819092,grad_norm: 0.9486377661959566, iteration: 409920
loss: 0.969860851764679,grad_norm: 0.7557319547755618, iteration: 409921
loss: 1.01457679271698,grad_norm: 0.7813454481705403, iteration: 409922
loss: 1.0000386238098145,grad_norm: 0.7639368545807579, iteration: 409923
loss: 0.9797036051750183,grad_norm: 0.8776169598005287, iteration: 409924
loss: 1.009030818939209,grad_norm: 0.6868681103664004, iteration: 409925
loss: 1.0475871562957764,grad_norm: 0.7776491284610707, iteration: 409926
loss: 0.9619441032409668,grad_norm: 0.772896495529319, iteration: 409927
loss: 1.0163828134536743,grad_norm: 0.8156655842974794, iteration: 409928
loss: 1.0463918447494507,grad_norm: 0.9999992925442515, iteration: 409929
loss: 1.0044440031051636,grad_norm: 0.7107011706141725, iteration: 409930
loss: 1.0578352212905884,grad_norm: 0.7283543431923395, iteration: 409931
loss: 0.9946405291557312,grad_norm: 0.8575441483495762, iteration: 409932
loss: 1.034487247467041,grad_norm: 0.9999994976992729, iteration: 409933
loss: 0.9779743552207947,grad_norm: 0.999999347397409, iteration: 409934
loss: 1.0093960762023926,grad_norm: 0.7221015207552189, iteration: 409935
loss: 0.9764626622200012,grad_norm: 0.9400770988947634, iteration: 409936
loss: 1.0253061056137085,grad_norm: 0.7649858351015095, iteration: 409937
loss: 1.0350638628005981,grad_norm: 0.8938150808463654, iteration: 409938
loss: 0.9904942512512207,grad_norm: 0.7619789481271739, iteration: 409939
loss: 0.9599171876907349,grad_norm: 0.7638609865073954, iteration: 409940
loss: 1.0178474187850952,grad_norm: 0.6541069164932561, iteration: 409941
loss: 1.0631155967712402,grad_norm: 0.8446729892110642, iteration: 409942
loss: 0.9980735182762146,grad_norm: 0.9999991006677895, iteration: 409943
loss: 0.9978525638580322,grad_norm: 0.7562648591137503, iteration: 409944
loss: 1.0480153560638428,grad_norm: 0.9999993935238863, iteration: 409945
loss: 1.0101650953292847,grad_norm: 0.6860232729796897, iteration: 409946
loss: 0.9992244243621826,grad_norm: 0.6393389452591187, iteration: 409947
loss: 1.009499430656433,grad_norm: 0.731304234777695, iteration: 409948
loss: 0.9838641881942749,grad_norm: 0.756710763653833, iteration: 409949
loss: 0.9701517820358276,grad_norm: 0.9422132341135786, iteration: 409950
loss: 0.9610746502876282,grad_norm: 0.6591937168214789, iteration: 409951
loss: 0.9937339425086975,grad_norm: 0.9873219061452395, iteration: 409952
loss: 1.0998481512069702,grad_norm: 0.999999142653492, iteration: 409953
loss: 1.0183460712432861,grad_norm: 0.7729483871380108, iteration: 409954
loss: 1.0171555280685425,grad_norm: 0.7101015113798234, iteration: 409955
loss: 1.0274958610534668,grad_norm: 0.9111409504688212, iteration: 409956
loss: 1.011671543121338,grad_norm: 0.768217100626271, iteration: 409957
loss: 0.9992169737815857,grad_norm: 0.7534737993861873, iteration: 409958
loss: 0.9902912974357605,grad_norm: 0.8438453746213245, iteration: 409959
loss: 1.0097335577011108,grad_norm: 0.8441723285396101, iteration: 409960
loss: 1.0260578393936157,grad_norm: 0.6916499063067266, iteration: 409961
loss: 0.9988012313842773,grad_norm: 0.8077563914122863, iteration: 409962
loss: 0.9779317378997803,grad_norm: 0.6782640712647704, iteration: 409963
loss: 1.0171914100646973,grad_norm: 0.7498885381724093, iteration: 409964
loss: 1.021153211593628,grad_norm: 0.7152427421944701, iteration: 409965
loss: 1.048006534576416,grad_norm: 0.8149012160618682, iteration: 409966
loss: 1.0274678468704224,grad_norm: 0.7577561615513673, iteration: 409967
loss: 1.038747787475586,grad_norm: 0.7929280133354455, iteration: 409968
loss: 1.038787603378296,grad_norm: 0.9999994312722571, iteration: 409969
loss: 0.9974297285079956,grad_norm: 0.8963320934452975, iteration: 409970
loss: 1.0323679447174072,grad_norm: 0.8428902191029607, iteration: 409971
loss: 0.9737741351127625,grad_norm: 0.766843708004259, iteration: 409972
loss: 1.054339051246643,grad_norm: 0.705533914435185, iteration: 409973
loss: 1.0027296543121338,grad_norm: 0.9214461654736867, iteration: 409974
loss: 0.9981206655502319,grad_norm: 0.7612191242412844, iteration: 409975
loss: 0.9923093318939209,grad_norm: 0.7460771439992276, iteration: 409976
loss: 0.9920931458473206,grad_norm: 0.953322071139587, iteration: 409977
loss: 1.012396216392517,grad_norm: 0.7114506476603308, iteration: 409978
loss: 1.0259065628051758,grad_norm: 0.9999999606752306, iteration: 409979
loss: 0.984111487865448,grad_norm: 0.6603186912663114, iteration: 409980
loss: 0.9891647100448608,grad_norm: 0.9078269348679657, iteration: 409981
loss: 1.0014941692352295,grad_norm: 0.9999992814893633, iteration: 409982
loss: 1.0052828788757324,grad_norm: 0.7968342328093372, iteration: 409983
loss: 1.0033950805664062,grad_norm: 0.8783431312342046, iteration: 409984
loss: 1.0041849613189697,grad_norm: 0.999999766531589, iteration: 409985
loss: 1.0048671960830688,grad_norm: 0.755738831587157, iteration: 409986
loss: 0.99960857629776,grad_norm: 0.7354470417729951, iteration: 409987
loss: 1.0053025484085083,grad_norm: 0.6630326429721286, iteration: 409988
loss: 0.9763320088386536,grad_norm: 0.8950614874526711, iteration: 409989
loss: 0.9941534399986267,grad_norm: 0.9124553422518411, iteration: 409990
loss: 1.1360909938812256,grad_norm: 0.7569951164062644, iteration: 409991
loss: 0.9974538087844849,grad_norm: 0.6950633323781953, iteration: 409992
loss: 1.015167474746704,grad_norm: 0.8544527388541064, iteration: 409993
loss: 1.0108357667922974,grad_norm: 0.9999991929398219, iteration: 409994
loss: 0.9962635040283203,grad_norm: 0.729854844424408, iteration: 409995
loss: 1.0227880477905273,grad_norm: 0.8591036046640325, iteration: 409996
loss: 1.0273594856262207,grad_norm: 0.8261906444636689, iteration: 409997
loss: 0.9804620146751404,grad_norm: 0.8407462006565025, iteration: 409998
loss: 0.9598747491836548,grad_norm: 0.6813033246206427, iteration: 409999
loss: 1.0909647941589355,grad_norm: 0.9999996100333749, iteration: 410000
Evaluating at step 410000
{'val': 0.9945075456053019, 'test': 1.9613401722222472}
loss: 1.0189706087112427,grad_norm: 0.9999990566028647, iteration: 410001
loss: 0.9963159561157227,grad_norm: 0.8689862462938105, iteration: 410002
loss: 0.9805270433425903,grad_norm: 0.9999990144742074, iteration: 410003
loss: 1.0359077453613281,grad_norm: 0.9992604981919633, iteration: 410004
loss: 1.0505168437957764,grad_norm: 0.658079711360179, iteration: 410005
loss: 0.9632046818733215,grad_norm: 0.9358656901422339, iteration: 410006
loss: 1.0178066492080688,grad_norm: 0.7020712842358038, iteration: 410007
loss: 0.9903436303138733,grad_norm: 0.7301552084916099, iteration: 410008
loss: 1.006626009941101,grad_norm: 0.7771399626988732, iteration: 410009
loss: 1.0580908060073853,grad_norm: 0.9999994959734314, iteration: 410010
loss: 0.979880154132843,grad_norm: 0.6968928713124286, iteration: 410011
loss: 0.9764319658279419,grad_norm: 0.9058289651267589, iteration: 410012
loss: 1.0004390478134155,grad_norm: 0.659572169151671, iteration: 410013
loss: 0.9965388774871826,grad_norm: 0.7120177740397958, iteration: 410014
loss: 1.082633376121521,grad_norm: 0.9999996384572069, iteration: 410015
loss: 1.016729712486267,grad_norm: 0.9049125309732668, iteration: 410016
loss: 0.9858431816101074,grad_norm: 0.853480734774074, iteration: 410017
loss: 1.0386900901794434,grad_norm: 0.999999168908775, iteration: 410018
loss: 0.9895668029785156,grad_norm: 0.99999924691033, iteration: 410019
loss: 1.0336427688598633,grad_norm: 0.7725008960366317, iteration: 410020
loss: 1.0073307752609253,grad_norm: 0.8369830865429784, iteration: 410021
loss: 1.045552372932434,grad_norm: 0.8372798118957526, iteration: 410022
loss: 1.035109519958496,grad_norm: 0.8054000583063192, iteration: 410023
loss: 0.9759518504142761,grad_norm: 0.7758526878202824, iteration: 410024
loss: 1.0058997869491577,grad_norm: 0.9999997180645922, iteration: 410025
loss: 0.9603172540664673,grad_norm: 0.8058431559067104, iteration: 410026
loss: 1.0001119375228882,grad_norm: 0.9005587604372123, iteration: 410027
loss: 0.98115473985672,grad_norm: 0.681186792276428, iteration: 410028
loss: 1.0087690353393555,grad_norm: 0.9724131433682408, iteration: 410029
loss: 0.990818440914154,grad_norm: 0.9704465018719135, iteration: 410030
loss: 1.0000361204147339,grad_norm: 0.7662567859461878, iteration: 410031
loss: 0.9891620874404907,grad_norm: 0.6872854471783191, iteration: 410032
loss: 1.1274999380111694,grad_norm: 0.9999992204654155, iteration: 410033
loss: 0.9885438680648804,grad_norm: 0.9860311313866192, iteration: 410034
loss: 1.0162049531936646,grad_norm: 0.8509820523287398, iteration: 410035
loss: 1.001200556755066,grad_norm: 0.7565508383296169, iteration: 410036
loss: 1.0154616832733154,grad_norm: 0.9999994098746575, iteration: 410037
loss: 0.9759963154792786,grad_norm: 0.9999990934861275, iteration: 410038
loss: 1.0185787677764893,grad_norm: 0.7133255341826816, iteration: 410039
loss: 1.0356537103652954,grad_norm: 0.9999996779299691, iteration: 410040
loss: 1.0671576261520386,grad_norm: 0.999999686576474, iteration: 410041
loss: 0.9729459881782532,grad_norm: 0.9999991678510607, iteration: 410042
loss: 0.9855089783668518,grad_norm: 0.7042722796101368, iteration: 410043
loss: 0.9833565354347229,grad_norm: 0.808829144282517, iteration: 410044
loss: 1.026288628578186,grad_norm: 0.683991521526664, iteration: 410045
loss: 0.984588086605072,grad_norm: 0.9491763974305285, iteration: 410046
loss: 1.0079110860824585,grad_norm: 0.9999995303943436, iteration: 410047
loss: 1.0274401903152466,grad_norm: 0.9514934487545034, iteration: 410048
loss: 1.0890295505523682,grad_norm: 0.9999993446952636, iteration: 410049
loss: 0.9939292073249817,grad_norm: 0.7123559785569368, iteration: 410050
loss: 1.0101208686828613,grad_norm: 0.8258469889777506, iteration: 410051
loss: 0.9626474976539612,grad_norm: 0.9999999178545979, iteration: 410052
loss: 1.0155737400054932,grad_norm: 0.9999995520290035, iteration: 410053
loss: 1.0238170623779297,grad_norm: 0.7457397765021143, iteration: 410054
loss: 0.9852766394615173,grad_norm: 0.9999999381808691, iteration: 410055
loss: 0.9762190580368042,grad_norm: 0.8047262194637368, iteration: 410056
loss: 0.98707115650177,grad_norm: 0.9999994825315013, iteration: 410057
loss: 0.9905563592910767,grad_norm: 0.7865343183824485, iteration: 410058
loss: 1.0136288404464722,grad_norm: 0.835185674712376, iteration: 410059
loss: 1.2794115543365479,grad_norm: 0.999999565718366, iteration: 410060
loss: 0.9998213052749634,grad_norm: 0.7208162334194821, iteration: 410061
loss: 1.0174885988235474,grad_norm: 0.8512148755592747, iteration: 410062
loss: 1.0067623853683472,grad_norm: 0.8033267411954021, iteration: 410063
loss: 1.033515214920044,grad_norm: 0.9506433793718823, iteration: 410064
loss: 1.1220303773880005,grad_norm: 0.9999998551896104, iteration: 410065
loss: 0.9640297293663025,grad_norm: 0.8428496743202422, iteration: 410066
loss: 1.0267459154129028,grad_norm: 0.9999996556962611, iteration: 410067
loss: 1.1017699241638184,grad_norm: 0.9999997495909024, iteration: 410068
loss: 1.1006475687026978,grad_norm: 0.9999997579062977, iteration: 410069
loss: 1.0794774293899536,grad_norm: 0.9999994643433578, iteration: 410070
loss: 1.0411622524261475,grad_norm: 0.7393721728420819, iteration: 410071
loss: 1.05915105342865,grad_norm: 0.9989936985717178, iteration: 410072
loss: 1.0164117813110352,grad_norm: 0.9408087107266632, iteration: 410073
loss: 1.0305290222167969,grad_norm: 0.9999996781573279, iteration: 410074
loss: 1.0191104412078857,grad_norm: 0.999999112264209, iteration: 410075
loss: 0.9734776020050049,grad_norm: 0.8243127138768804, iteration: 410076
loss: 1.030207633972168,grad_norm: 0.8264099935688879, iteration: 410077
loss: 0.9985610246658325,grad_norm: 0.9999991212763126, iteration: 410078
loss: 1.0150169134140015,grad_norm: 0.9999991068297528, iteration: 410079
loss: 0.980398416519165,grad_norm: 0.8107002563516763, iteration: 410080
loss: 1.0338730812072754,grad_norm: 0.8092257278125767, iteration: 410081
loss: 1.0800139904022217,grad_norm: 0.9157175831071019, iteration: 410082
loss: 1.0145102739334106,grad_norm: 0.9403641971408434, iteration: 410083
loss: 0.9776157140731812,grad_norm: 0.7716113677978766, iteration: 410084
loss: 0.9902321696281433,grad_norm: 0.6951901051847004, iteration: 410085
loss: 1.0214967727661133,grad_norm: 0.9999992923351204, iteration: 410086
loss: 1.077946662902832,grad_norm: 0.9999990090000574, iteration: 410087
loss: 1.0157794952392578,grad_norm: 0.9999991384208134, iteration: 410088
loss: 0.9869068264961243,grad_norm: 0.8222657075697569, iteration: 410089
loss: 1.0005683898925781,grad_norm: 0.6812091402510605, iteration: 410090
loss: 0.9999865293502808,grad_norm: 0.7037069287038611, iteration: 410091
loss: 1.035068154335022,grad_norm: 0.999999772506388, iteration: 410092
loss: 1.0178416967391968,grad_norm: 0.7291421172958368, iteration: 410093
loss: 0.974389374256134,grad_norm: 0.8169368893171359, iteration: 410094
loss: 0.9701517820358276,grad_norm: 0.9378875403116355, iteration: 410095
loss: 1.026450753211975,grad_norm: 0.8692986475971677, iteration: 410096
loss: 1.0008267164230347,grad_norm: 0.9340960187782897, iteration: 410097
loss: 1.0653846263885498,grad_norm: 0.7816819560720483, iteration: 410098
loss: 1.0633701086044312,grad_norm: 0.9283505686896356, iteration: 410099
loss: 1.0597589015960693,grad_norm: 0.9999990666084645, iteration: 410100
loss: 1.0212773084640503,grad_norm: 0.7983928742301384, iteration: 410101
loss: 1.0013235807418823,grad_norm: 0.7413998657838566, iteration: 410102
loss: 1.1093965768814087,grad_norm: 0.9999991633382376, iteration: 410103
loss: 1.0038518905639648,grad_norm: 0.8097158535993745, iteration: 410104
loss: 0.975528359413147,grad_norm: 0.8121135772030049, iteration: 410105
loss: 0.9857321381568909,grad_norm: 0.9624797528599981, iteration: 410106
loss: 0.9626200795173645,grad_norm: 0.7510446325256419, iteration: 410107
loss: 0.9972736239433289,grad_norm: 0.7830058390845964, iteration: 410108
loss: 0.9957491755485535,grad_norm: 0.9011036290811635, iteration: 410109
loss: 1.0118627548217773,grad_norm: 0.78465243864083, iteration: 410110
loss: 1.1350882053375244,grad_norm: 0.9999992047408869, iteration: 410111
loss: 1.0653208494186401,grad_norm: 0.8872592052750901, iteration: 410112
loss: 1.1395890712738037,grad_norm: 0.9999990843497546, iteration: 410113
loss: 1.0133259296417236,grad_norm: 0.9999993615261747, iteration: 410114
loss: 1.0730957984924316,grad_norm: 0.8666618125158426, iteration: 410115
loss: 1.039443850517273,grad_norm: 0.7570281485515461, iteration: 410116
loss: 1.0257902145385742,grad_norm: 0.7372729054491257, iteration: 410117
loss: 1.0103460550308228,grad_norm: 0.749746308992146, iteration: 410118
loss: 1.0186086893081665,grad_norm: 0.857496943970892, iteration: 410119
loss: 1.0042415857315063,grad_norm: 0.6978945956539807, iteration: 410120
loss: 0.99740070104599,grad_norm: 0.8329737683082783, iteration: 410121
loss: 0.9749560356140137,grad_norm: 0.7686576548745994, iteration: 410122
loss: 1.1020288467407227,grad_norm: 0.9999997977104517, iteration: 410123
loss: 1.1168475151062012,grad_norm: 0.9999993406209893, iteration: 410124
loss: 1.014691710472107,grad_norm: 0.7990436451666857, iteration: 410125
loss: 1.0001630783081055,grad_norm: 0.9999991638917666, iteration: 410126
loss: 0.9997649788856506,grad_norm: 0.721895278544217, iteration: 410127
loss: 1.0060173273086548,grad_norm: 0.9999996813411999, iteration: 410128
loss: 1.0004611015319824,grad_norm: 0.8882823209291981, iteration: 410129
loss: 1.0244083404541016,grad_norm: 0.9999993885264112, iteration: 410130
loss: 1.0025358200073242,grad_norm: 0.9999996812602202, iteration: 410131
loss: 1.0992732048034668,grad_norm: 0.9999998601973362, iteration: 410132
loss: 1.0227833986282349,grad_norm: 0.9189292282594947, iteration: 410133
loss: 1.0612504482269287,grad_norm: 0.8290170021745908, iteration: 410134
loss: 0.9934024810791016,grad_norm: 0.7549745391159207, iteration: 410135
loss: 1.0146394968032837,grad_norm: 0.7608983493153768, iteration: 410136
loss: 0.9858412742614746,grad_norm: 0.7150003840576755, iteration: 410137
loss: 0.9867976307868958,grad_norm: 0.8431023536145209, iteration: 410138
loss: 1.0482230186462402,grad_norm: 0.999999650809038, iteration: 410139
loss: 0.984795093536377,grad_norm: 0.9999994791308491, iteration: 410140
loss: 0.9936528205871582,grad_norm: 0.8736104335839474, iteration: 410141
loss: 1.0117071866989136,grad_norm: 0.9999997095828344, iteration: 410142
loss: 1.004522681236267,grad_norm: 0.8182878856065006, iteration: 410143
loss: 1.03135085105896,grad_norm: 0.9148152262112997, iteration: 410144
loss: 1.019498348236084,grad_norm: 0.8381942411238293, iteration: 410145
loss: 0.9968071579933167,grad_norm: 0.9999990987342899, iteration: 410146
loss: 0.9843714833259583,grad_norm: 0.9999993544464186, iteration: 410147
loss: 1.027789831161499,grad_norm: 0.7757569606770994, iteration: 410148
loss: 1.0085017681121826,grad_norm: 0.8333024525442309, iteration: 410149
loss: 0.9709546566009521,grad_norm: 0.8738300946161681, iteration: 410150
loss: 1.0163264274597168,grad_norm: 0.7228053595419004, iteration: 410151
loss: 1.0274475812911987,grad_norm: 0.9999991471914421, iteration: 410152
loss: 1.032347321510315,grad_norm: 0.7547101193557729, iteration: 410153
loss: 0.9961186051368713,grad_norm: 0.8843133166198843, iteration: 410154
loss: 0.9861175417900085,grad_norm: 0.9999993247470564, iteration: 410155
loss: 0.9886215329170227,grad_norm: 0.8684070045552646, iteration: 410156
loss: 1.0399425029754639,grad_norm: 0.8051465191492928, iteration: 410157
loss: 0.965066134929657,grad_norm: 0.9532714532649667, iteration: 410158
loss: 1.0185719728469849,grad_norm: 0.8195172929535266, iteration: 410159
loss: 0.9823899269104004,grad_norm: 0.8106387607196213, iteration: 410160
loss: 1.057935118675232,grad_norm: 0.9999996168637576, iteration: 410161
loss: 0.9997267723083496,grad_norm: 0.7344044884725693, iteration: 410162
loss: 1.2140082120895386,grad_norm: 0.9999999455364779, iteration: 410163
loss: 1.1906026601791382,grad_norm: 0.9999998942542799, iteration: 410164
loss: 1.0032448768615723,grad_norm: 0.712385701848637, iteration: 410165
loss: 0.9892701506614685,grad_norm: 0.9999991080855081, iteration: 410166
loss: 0.9852693676948547,grad_norm: 0.6451424845301185, iteration: 410167
loss: 0.9976084232330322,grad_norm: 0.9999992254725784, iteration: 410168
loss: 1.0442768335342407,grad_norm: 0.9341419567603745, iteration: 410169
loss: 1.0604939460754395,grad_norm: 0.7556529681312644, iteration: 410170
loss: 1.1762522459030151,grad_norm: 0.8827563826633763, iteration: 410171
loss: 1.0136218070983887,grad_norm: 0.7449654224025988, iteration: 410172
loss: 1.008589267730713,grad_norm: 0.8010404703489952, iteration: 410173
loss: 0.9334431290626526,grad_norm: 0.8439319310529584, iteration: 410174
loss: 1.0579808950424194,grad_norm: 0.8484748068172451, iteration: 410175
loss: 1.007737636566162,grad_norm: 0.7351411467432805, iteration: 410176
loss: 0.9769042730331421,grad_norm: 0.9907267572801498, iteration: 410177
loss: 1.0194097757339478,grad_norm: 0.999999949730066, iteration: 410178
loss: 1.093056559562683,grad_norm: 0.9999994774116961, iteration: 410179
loss: 0.9753113985061646,grad_norm: 0.8189256617076485, iteration: 410180
loss: 0.9831143021583557,grad_norm: 0.8749742349491312, iteration: 410181
loss: 0.9999715685844421,grad_norm: 0.881760739343109, iteration: 410182
loss: 1.0055793523788452,grad_norm: 0.8084655590433265, iteration: 410183
loss: 0.9883922338485718,grad_norm: 0.6943722139914321, iteration: 410184
loss: 0.9901214838027954,grad_norm: 0.916791007279121, iteration: 410185
loss: 0.9687902927398682,grad_norm: 0.7100697421748073, iteration: 410186
loss: 1.0037648677825928,grad_norm: 0.7252659154899362, iteration: 410187
loss: 1.0056462287902832,grad_norm: 0.9999991396998752, iteration: 410188
loss: 1.0009692907333374,grad_norm: 0.9138995778482296, iteration: 410189
loss: 1.0043288469314575,grad_norm: 0.9068685150765766, iteration: 410190
loss: 1.0084933042526245,grad_norm: 0.6944960534573645, iteration: 410191
loss: 1.001151204109192,grad_norm: 0.6591303414961196, iteration: 410192
loss: 1.0049463510513306,grad_norm: 0.640772174640875, iteration: 410193
loss: 0.9854497909545898,grad_norm: 0.9674616020662485, iteration: 410194
loss: 1.0020095109939575,grad_norm: 0.834051879296369, iteration: 410195
loss: 1.0661258697509766,grad_norm: 0.9999993469063037, iteration: 410196
loss: 1.0000078678131104,grad_norm: 0.7422733035534523, iteration: 410197
loss: 1.0331190824508667,grad_norm: 0.7639640230680452, iteration: 410198
loss: 1.0603322982788086,grad_norm: 0.7663249968897062, iteration: 410199
loss: 1.005333662033081,grad_norm: 0.8831912842905479, iteration: 410200
loss: 0.977916419506073,grad_norm: 0.7739880162530187, iteration: 410201
loss: 0.9839566946029663,grad_norm: 0.7123258872487569, iteration: 410202
loss: 1.031064748764038,grad_norm: 0.7634806255556925, iteration: 410203
loss: 0.9964187145233154,grad_norm: 0.8028642554717107, iteration: 410204
loss: 1.0279452800750732,grad_norm: 0.8285907226157782, iteration: 410205
loss: 1.0047128200531006,grad_norm: 0.9999991015874783, iteration: 410206
loss: 1.0399739742279053,grad_norm: 0.7323431521497338, iteration: 410207
loss: 0.9811956286430359,grad_norm: 0.8358996963823148, iteration: 410208
loss: 1.1636748313903809,grad_norm: 0.9999999104696095, iteration: 410209
loss: 0.9792999625205994,grad_norm: 0.9999992310630089, iteration: 410210
loss: 1.033708095550537,grad_norm: 1.000000036283825, iteration: 410211
loss: 0.9948119521141052,grad_norm: 0.7573401449428583, iteration: 410212
loss: 0.9924371838569641,grad_norm: 0.6610471178068226, iteration: 410213
loss: 1.0040510892868042,grad_norm: 0.9999999080944488, iteration: 410214
loss: 0.9942998886108398,grad_norm: 0.784784104801126, iteration: 410215
loss: 0.9910366535186768,grad_norm: 0.8123677635648591, iteration: 410216
loss: 1.0129106044769287,grad_norm: 0.7267100018270377, iteration: 410217
loss: 0.9701665043830872,grad_norm: 0.9999996577218075, iteration: 410218
loss: 1.009628415107727,grad_norm: 0.7425467526784517, iteration: 410219
loss: 1.0092657804489136,grad_norm: 0.8999840704937034, iteration: 410220
loss: 1.0639369487762451,grad_norm: 0.9999994355830932, iteration: 410221
loss: 0.9823586344718933,grad_norm: 0.755198993035804, iteration: 410222
loss: 1.1514461040496826,grad_norm: 0.9999998032340276, iteration: 410223
loss: 1.2540490627288818,grad_norm: 0.9999999608067328, iteration: 410224
loss: 1.0684207677841187,grad_norm: 0.9999995543411082, iteration: 410225
loss: 1.0208040475845337,grad_norm: 0.6931827851177014, iteration: 410226
loss: 0.9714910387992859,grad_norm: 0.9805201992569283, iteration: 410227
loss: 1.0324828624725342,grad_norm: 0.9999992632405755, iteration: 410228
loss: 1.0115798711776733,grad_norm: 0.6600345519184135, iteration: 410229
loss: 1.0426840782165527,grad_norm: 0.9999996043897658, iteration: 410230
loss: 1.048947811126709,grad_norm: 0.9999994428691058, iteration: 410231
loss: 1.3590341806411743,grad_norm: 0.9999999754008024, iteration: 410232
loss: 0.9907006621360779,grad_norm: 0.9999996593878969, iteration: 410233
loss: 0.9912201166152954,grad_norm: 0.8149326443057594, iteration: 410234
loss: 1.01108980178833,grad_norm: 0.945176267968885, iteration: 410235
loss: 0.9982872009277344,grad_norm: 0.8574762401692183, iteration: 410236
loss: 1.0007063150405884,grad_norm: 0.8516349405238233, iteration: 410237
loss: 1.0010260343551636,grad_norm: 0.9999997055251935, iteration: 410238
loss: 0.995768129825592,grad_norm: 0.7025429242795679, iteration: 410239
loss: 1.0239123106002808,grad_norm: 0.9999990789840998, iteration: 410240
loss: 1.003891944885254,grad_norm: 0.7656468820194575, iteration: 410241
loss: 1.0002892017364502,grad_norm: 0.8336317740636584, iteration: 410242
loss: 1.0339279174804688,grad_norm: 0.7949203977812447, iteration: 410243
loss: 1.0738532543182373,grad_norm: 0.9999999957090709, iteration: 410244
loss: 1.0245964527130127,grad_norm: 0.7497356731968395, iteration: 410245
loss: 1.0540322065353394,grad_norm: 0.9999998259248893, iteration: 410246
loss: 1.0438427925109863,grad_norm: 0.9999991480600934, iteration: 410247
loss: 0.973629355430603,grad_norm: 0.8595943686439098, iteration: 410248
loss: 1.0295411348342896,grad_norm: 0.7243268247189164, iteration: 410249
loss: 1.0098354816436768,grad_norm: 0.9999998649101479, iteration: 410250
loss: 1.0646737813949585,grad_norm: 0.9999991368393573, iteration: 410251
loss: 0.9803885221481323,grad_norm: 0.726856313193454, iteration: 410252
loss: 0.9793570637702942,grad_norm: 0.7957714821019379, iteration: 410253
loss: 1.0096086263656616,grad_norm: 0.8263067214958647, iteration: 410254
loss: 1.008712649345398,grad_norm: 0.8740844497949601, iteration: 410255
loss: 1.011988878250122,grad_norm: 0.9999991950641901, iteration: 410256
loss: 1.0704751014709473,grad_norm: 0.9999991534031571, iteration: 410257
loss: 1.1320230960845947,grad_norm: 0.9999993867041815, iteration: 410258
loss: 1.0236973762512207,grad_norm: 0.8000096837709317, iteration: 410259
loss: 1.1428449153900146,grad_norm: 0.9999993809055461, iteration: 410260
loss: 1.0497359037399292,grad_norm: 0.7631437712612578, iteration: 410261
loss: 1.0518649816513062,grad_norm: 0.8658716058338748, iteration: 410262
loss: 1.0377740859985352,grad_norm: 0.9333504976764545, iteration: 410263
loss: 1.0248358249664307,grad_norm: 0.9170703494659245, iteration: 410264
loss: 1.0512346029281616,grad_norm: 0.9999990525446825, iteration: 410265
loss: 1.194438099861145,grad_norm: 0.9999998182203127, iteration: 410266
loss: 1.0472102165222168,grad_norm: 0.999999970372105, iteration: 410267
loss: 1.0100905895233154,grad_norm: 0.7604714199374306, iteration: 410268
loss: 1.0835314989089966,grad_norm: 0.9999992370845595, iteration: 410269
loss: 1.0968374013900757,grad_norm: 0.8181182897143437, iteration: 410270
loss: 1.01907479763031,grad_norm: 0.8154164651013659, iteration: 410271
loss: 0.9781956672668457,grad_norm: 0.6804867520190916, iteration: 410272
loss: 1.03462553024292,grad_norm: 0.9999994042030659, iteration: 410273
loss: 1.0124142169952393,grad_norm: 0.8298234489162453, iteration: 410274
loss: 1.0497595071792603,grad_norm: 1.0000000141125336, iteration: 410275
loss: 1.1108992099761963,grad_norm: 0.9999991664448051, iteration: 410276
loss: 1.0856008529663086,grad_norm: 0.9382576983962335, iteration: 410277
loss: 1.0521526336669922,grad_norm: 0.9999990937846959, iteration: 410278
loss: 1.0075428485870361,grad_norm: 0.999999119518984, iteration: 410279
loss: 1.102308750152588,grad_norm: 0.9999996945613715, iteration: 410280
loss: 1.0350648164749146,grad_norm: 0.999999076282228, iteration: 410281
loss: 1.0511372089385986,grad_norm: 0.9026091315879149, iteration: 410282
loss: 1.4173113107681274,grad_norm: 0.9999999782647667, iteration: 410283
loss: 1.3346184492111206,grad_norm: 0.9999996526426944, iteration: 410284
loss: 0.9700880646705627,grad_norm: 0.9999993651743186, iteration: 410285
loss: 1.0155245065689087,grad_norm: 0.8246078092707498, iteration: 410286
loss: 1.0298871994018555,grad_norm: 0.7267974835922747, iteration: 410287
loss: 0.9756402373313904,grad_norm: 0.9999997269650266, iteration: 410288
loss: 1.0426322221755981,grad_norm: 0.8292917297239061, iteration: 410289
loss: 1.009084701538086,grad_norm: 0.9999998590803536, iteration: 410290
loss: 1.1788018941879272,grad_norm: 0.999999713421637, iteration: 410291
loss: 1.0850694179534912,grad_norm: 0.9999997437509157, iteration: 410292
loss: 1.008943796157837,grad_norm: 0.8830855666306056, iteration: 410293
loss: 1.042303442955017,grad_norm: 0.9545030240284316, iteration: 410294
loss: 1.1256932020187378,grad_norm: 0.9999999797157312, iteration: 410295
loss: 1.0353906154632568,grad_norm: 0.7863256647574705, iteration: 410296
loss: 1.1060044765472412,grad_norm: 0.9999996136837077, iteration: 410297
loss: 1.0194883346557617,grad_norm: 0.9999990962686439, iteration: 410298
loss: 1.1159347295761108,grad_norm: 0.9999999311000404, iteration: 410299
loss: 1.0633156299591064,grad_norm: 0.7230591516382396, iteration: 410300
loss: 0.9600061178207397,grad_norm: 0.8532820629274958, iteration: 410301
loss: 1.0391108989715576,grad_norm: 0.9999991153907571, iteration: 410302
loss: 1.037262201309204,grad_norm: 0.9999990456767335, iteration: 410303
loss: 1.1158863306045532,grad_norm: 0.9999994823953147, iteration: 410304
loss: 1.0767992734909058,grad_norm: 0.9356632203434784, iteration: 410305
loss: 1.0507475137710571,grad_norm: 0.81436146644129, iteration: 410306
loss: 1.0377976894378662,grad_norm: 0.9999993610547122, iteration: 410307
loss: 0.9849176406860352,grad_norm: 0.8115234265370377, iteration: 410308
loss: 1.064560890197754,grad_norm: 0.9999999511766249, iteration: 410309
loss: 1.0168509483337402,grad_norm: 0.9306550770701012, iteration: 410310
loss: 1.036255955696106,grad_norm: 0.999999995035021, iteration: 410311
loss: 1.2340426445007324,grad_norm: 0.9999998522227864, iteration: 410312
loss: 1.0597258806228638,grad_norm: 0.9999991646121192, iteration: 410313
loss: 1.0500059127807617,grad_norm: 0.9543780021946485, iteration: 410314
loss: 1.0010470151901245,grad_norm: 0.9999995847625395, iteration: 410315
loss: 1.015623927116394,grad_norm: 0.9999994544403547, iteration: 410316
loss: 0.9989895224571228,grad_norm: 0.6342466467816954, iteration: 410317
loss: 1.0623588562011719,grad_norm: 0.9999990813984047, iteration: 410318
loss: 1.1753153800964355,grad_norm: 0.9999997425297167, iteration: 410319
loss: 0.9583835005760193,grad_norm: 0.8797396512422074, iteration: 410320
loss: 1.0117402076721191,grad_norm: 0.8773915008363943, iteration: 410321
loss: 1.0680102109909058,grad_norm: 0.999999340377765, iteration: 410322
loss: 1.0024019479751587,grad_norm: 0.9444671872912019, iteration: 410323
loss: 1.0549571514129639,grad_norm: 0.7267756077236431, iteration: 410324
loss: 1.046523928642273,grad_norm: 0.9518869303377825, iteration: 410325
loss: 1.0079177618026733,grad_norm: 0.8949490024446263, iteration: 410326
loss: 1.0459643602371216,grad_norm: 0.9999993166229175, iteration: 410327
loss: 1.036302089691162,grad_norm: 0.999999433815987, iteration: 410328
loss: 0.9801105856895447,grad_norm: 0.7803868950979689, iteration: 410329
loss: 1.0304256677627563,grad_norm: 0.9999996552780401, iteration: 410330
loss: 0.9580463767051697,grad_norm: 0.7045729564021993, iteration: 410331
loss: 1.0712640285491943,grad_norm: 0.9499063385391308, iteration: 410332
loss: 1.0029826164245605,grad_norm: 0.9999993915491409, iteration: 410333
loss: 1.0178461074829102,grad_norm: 0.8356222534600127, iteration: 410334
loss: 0.9720644354820251,grad_norm: 0.7248656414047798, iteration: 410335
loss: 1.0631572008132935,grad_norm: 0.8767267400547594, iteration: 410336
loss: 1.032405972480774,grad_norm: 0.9999993847062149, iteration: 410337
loss: 1.0427454710006714,grad_norm: 0.9999991316190764, iteration: 410338
loss: 1.0239624977111816,grad_norm: 0.999999507703184, iteration: 410339
loss: 0.9984810948371887,grad_norm: 0.7923470396361892, iteration: 410340
loss: 1.02339768409729,grad_norm: 0.9999998265839105, iteration: 410341
loss: 0.9781075119972229,grad_norm: 0.8895472962022304, iteration: 410342
loss: 0.9815873503684998,grad_norm: 0.8804071172761717, iteration: 410343
loss: 1.0216110944747925,grad_norm: 0.866833545488668, iteration: 410344
loss: 1.088064193725586,grad_norm: 0.9999994420620497, iteration: 410345
loss: 1.1128265857696533,grad_norm: 0.9999998716070108, iteration: 410346
loss: 0.9884560108184814,grad_norm: 0.8729853176350858, iteration: 410347
loss: 1.0795224905014038,grad_norm: 0.9999990836962271, iteration: 410348
loss: 1.0033329725265503,grad_norm: 0.7244485377558345, iteration: 410349
loss: 1.1179567575454712,grad_norm: 0.9999993111399144, iteration: 410350
loss: 1.0259966850280762,grad_norm: 0.9367680680951957, iteration: 410351
loss: 1.0436190366744995,grad_norm: 0.9999996312714657, iteration: 410352
loss: 0.9958704113960266,grad_norm: 0.683913899746495, iteration: 410353
loss: 1.1035146713256836,grad_norm: 0.9999997510696614, iteration: 410354
loss: 0.9946582913398743,grad_norm: 0.9999995508170968, iteration: 410355
loss: 0.977390468120575,grad_norm: 0.8478150018556305, iteration: 410356
loss: 1.019168496131897,grad_norm: 0.8095476291789901, iteration: 410357
loss: 1.0131494998931885,grad_norm: 0.9999992835972206, iteration: 410358
loss: 1.0651671886444092,grad_norm: 0.9999991492358481, iteration: 410359
loss: 1.1035306453704834,grad_norm: 0.9999997942518889, iteration: 410360
loss: 0.9867339730262756,grad_norm: 0.8205199144639067, iteration: 410361
loss: 1.0153018236160278,grad_norm: 0.9369425656914387, iteration: 410362
loss: 1.010469913482666,grad_norm: 0.8911546312198262, iteration: 410363
loss: 1.0523871183395386,grad_norm: 0.7453476041050073, iteration: 410364
loss: 0.998643159866333,grad_norm: 0.9830975386575985, iteration: 410365
loss: 0.9991543889045715,grad_norm: 0.7367147629737322, iteration: 410366
loss: 1.015244960784912,grad_norm: 0.8770185581225101, iteration: 410367
loss: 1.0224151611328125,grad_norm: 0.7866907370658207, iteration: 410368
loss: 1.056779146194458,grad_norm: 0.9999995463149541, iteration: 410369
loss: 1.065731167793274,grad_norm: 0.9999996281535051, iteration: 410370
loss: 1.066928744316101,grad_norm: 0.9999994136223287, iteration: 410371
loss: 1.248711347579956,grad_norm: 0.9999999251984262, iteration: 410372
loss: 1.0188957452774048,grad_norm: 0.7970306445763125, iteration: 410373
loss: 1.164384365081787,grad_norm: 0.9999992256619288, iteration: 410374
loss: 1.0203126668930054,grad_norm: 0.7060569421741067, iteration: 410375
loss: 1.0058825016021729,grad_norm: 0.9999991890865173, iteration: 410376
loss: 0.9711554050445557,grad_norm: 0.8893379670996628, iteration: 410377
loss: 1.0762722492218018,grad_norm: 0.9999997661362805, iteration: 410378
loss: 1.016126036643982,grad_norm: 0.9151513540035439, iteration: 410379
loss: 1.0101771354675293,grad_norm: 0.7309475164241593, iteration: 410380
loss: 1.075676679611206,grad_norm: 0.7923664176504523, iteration: 410381
loss: 1.0666922330856323,grad_norm: 0.9676548270149605, iteration: 410382
loss: 1.0080386400222778,grad_norm: 0.7744290719743528, iteration: 410383
loss: 0.9795346856117249,grad_norm: 0.8083913916978264, iteration: 410384
loss: 1.0088685750961304,grad_norm: 0.9999996220864316, iteration: 410385
loss: 1.2259470224380493,grad_norm: 0.9999999307518799, iteration: 410386
loss: 0.9776322245597839,grad_norm: 0.6316399332882504, iteration: 410387
loss: 1.0090365409851074,grad_norm: 0.9999991361630521, iteration: 410388
loss: 1.0194202661514282,grad_norm: 0.9999996010907691, iteration: 410389
loss: 1.0634576082229614,grad_norm: 0.9380590429373665, iteration: 410390
loss: 1.0027568340301514,grad_norm: 0.9999993530886113, iteration: 410391
loss: 1.070900797843933,grad_norm: 0.999999117586987, iteration: 410392
loss: 0.9760335087776184,grad_norm: 0.6793982266849946, iteration: 410393
loss: 1.1392241716384888,grad_norm: 0.9999991326807184, iteration: 410394
loss: 1.1426554918289185,grad_norm: 0.9999998567308841, iteration: 410395
loss: 1.0168110132217407,grad_norm: 0.9999996901214154, iteration: 410396
loss: 1.0237432718276978,grad_norm: 0.7677953958422331, iteration: 410397
loss: 1.0134618282318115,grad_norm: 0.8406065467440509, iteration: 410398
loss: 0.9982410073280334,grad_norm: 0.8004265536058751, iteration: 410399
loss: 1.0107803344726562,grad_norm: 0.7795651295684467, iteration: 410400
loss: 1.0083621740341187,grad_norm: 0.8286127022374472, iteration: 410401
loss: 0.9783620238304138,grad_norm: 0.8686805806270035, iteration: 410402
loss: 1.0772827863693237,grad_norm: 0.9999999422741772, iteration: 410403
loss: 1.0222446918487549,grad_norm: 0.9999994108795113, iteration: 410404
loss: 1.0342586040496826,grad_norm: 0.7050775330293997, iteration: 410405
loss: 0.9903745651245117,grad_norm: 0.7965287090261316, iteration: 410406
loss: 0.9933700561523438,grad_norm: 0.8147295654599113, iteration: 410407
loss: 1.0592745542526245,grad_norm: 0.9999990677481218, iteration: 410408
loss: 1.0253390073776245,grad_norm: 0.9803301966313246, iteration: 410409
loss: 1.1025387048721313,grad_norm: 0.9999993645185283, iteration: 410410
loss: 1.0397531986236572,grad_norm: 0.8950294544398882, iteration: 410411
loss: 1.0564438104629517,grad_norm: 0.9999996056346141, iteration: 410412
loss: 1.0261659622192383,grad_norm: 0.7805501159846715, iteration: 410413
loss: 1.1404204368591309,grad_norm: 0.9999993521050172, iteration: 410414
loss: 0.9961313605308533,grad_norm: 0.6274500999439268, iteration: 410415
loss: 1.0114134550094604,grad_norm: 0.7655626744488808, iteration: 410416
loss: 0.9700357913970947,grad_norm: 0.6881348108942923, iteration: 410417
loss: 0.9970094561576843,grad_norm: 0.9907389102694104, iteration: 410418
loss: 1.0062718391418457,grad_norm: 0.7979736690082208, iteration: 410419
loss: 1.0373908281326294,grad_norm: 0.999999646177055, iteration: 410420
loss: 1.0141321420669556,grad_norm: 0.819188688361346, iteration: 410421
loss: 1.089125156402588,grad_norm: 0.9999999674083309, iteration: 410422
loss: 1.0193779468536377,grad_norm: 0.9999998467072686, iteration: 410423
loss: 1.0065498352050781,grad_norm: 0.999999976655751, iteration: 410424
loss: 1.051010251045227,grad_norm: 0.7427829116174233, iteration: 410425
loss: 1.1085765361785889,grad_norm: 0.9999999093637606, iteration: 410426
loss: 1.0704319477081299,grad_norm: 0.8364387213246429, iteration: 410427
loss: 1.0104244947433472,grad_norm: 0.6985086543394067, iteration: 410428
loss: 1.0253292322158813,grad_norm: 0.9360207849687628, iteration: 410429
loss: 1.0461291074752808,grad_norm: 0.8633778332792417, iteration: 410430
loss: 1.1068246364593506,grad_norm: 0.8334726568002188, iteration: 410431
loss: 1.047467589378357,grad_norm: 0.9999992616352767, iteration: 410432
loss: 1.015671968460083,grad_norm: 0.706786667189059, iteration: 410433
loss: 1.0214648246765137,grad_norm: 0.7056279913486965, iteration: 410434
loss: 1.0842459201812744,grad_norm: 0.9999991953904797, iteration: 410435
loss: 0.9471721649169922,grad_norm: 0.7922875978148723, iteration: 410436
loss: 1.0093317031860352,grad_norm: 0.7848432570712814, iteration: 410437
loss: 0.9825124740600586,grad_norm: 0.7524721073287785, iteration: 410438
loss: 1.0088474750518799,grad_norm: 0.9999993769792397, iteration: 410439
loss: 1.0144697427749634,grad_norm: 0.9999991336080684, iteration: 410440
loss: 1.0070009231567383,grad_norm: 0.795551540064004, iteration: 410441
loss: 1.0068479776382446,grad_norm: 0.8614261065618297, iteration: 410442
loss: 1.1012353897094727,grad_norm: 0.8227818491117111, iteration: 410443
loss: 1.044798731803894,grad_norm: 0.9999992221857856, iteration: 410444
loss: 1.0030174255371094,grad_norm: 0.7443200187850567, iteration: 410445
loss: 1.005528211593628,grad_norm: 0.7933259378082375, iteration: 410446
loss: 1.0301038026809692,grad_norm: 0.7547104152795294, iteration: 410447
loss: 1.066375732421875,grad_norm: 0.9999991532216076, iteration: 410448
loss: 1.0083359479904175,grad_norm: 0.6427931869130588, iteration: 410449
loss: 1.078713059425354,grad_norm: 0.7388117439406577, iteration: 410450
loss: 1.010841965675354,grad_norm: 0.6750526815112661, iteration: 410451
loss: 1.0418055057525635,grad_norm: 0.7216662763397955, iteration: 410452
loss: 1.1133809089660645,grad_norm: 0.9999996265468101, iteration: 410453
loss: 0.9409696459770203,grad_norm: 0.9999994449511717, iteration: 410454
loss: 1.0740699768066406,grad_norm: 0.9999994457842221, iteration: 410455
loss: 1.0214108228683472,grad_norm: 0.7299350431905746, iteration: 410456
loss: 1.0195951461791992,grad_norm: 0.8798616120548072, iteration: 410457
loss: 0.9587525129318237,grad_norm: 0.7906300268267803, iteration: 410458
loss: 1.0065104961395264,grad_norm: 0.741167788079843, iteration: 410459
loss: 1.0070936679840088,grad_norm: 0.8975871135853253, iteration: 410460
loss: 1.1777944564819336,grad_norm: 1.0000000131517182, iteration: 410461
loss: 1.0305070877075195,grad_norm: 0.7332627671710931, iteration: 410462
loss: 0.9883095622062683,grad_norm: 0.7452933622027479, iteration: 410463
loss: 1.015396237373352,grad_norm: 0.7460751980767403, iteration: 410464
loss: 0.9676693677902222,grad_norm: 0.7060124378312289, iteration: 410465
loss: 1.0162830352783203,grad_norm: 0.7323558673443209, iteration: 410466
loss: 0.9962299466133118,grad_norm: 0.9601644109713341, iteration: 410467
loss: 1.0320360660552979,grad_norm: 0.7108009274378457, iteration: 410468
loss: 0.9925541281700134,grad_norm: 0.9193087339106237, iteration: 410469
loss: 1.0429816246032715,grad_norm: 0.9999993488346658, iteration: 410470
loss: 1.0411839485168457,grad_norm: 0.8603005848795603, iteration: 410471
loss: 0.9873303174972534,grad_norm: 0.6756035638874472, iteration: 410472
loss: 0.9868908524513245,grad_norm: 0.8696010157680736, iteration: 410473
loss: 1.0158971548080444,grad_norm: 0.8994831873186391, iteration: 410474
loss: 1.0100901126861572,grad_norm: 0.8118037365759004, iteration: 410475
loss: 0.9802366495132446,grad_norm: 0.7343970872167455, iteration: 410476
loss: 1.0236839056015015,grad_norm: 0.8434152160396547, iteration: 410477
loss: 0.9755064249038696,grad_norm: 0.7829142592584966, iteration: 410478
loss: 1.008337140083313,grad_norm: 0.7907373502994406, iteration: 410479
loss: 1.0768781900405884,grad_norm: 0.9999996567052856, iteration: 410480
loss: 1.002702236175537,grad_norm: 0.9839484254732231, iteration: 410481
loss: 0.9714959263801575,grad_norm: 0.8793023472663971, iteration: 410482
loss: 1.0473237037658691,grad_norm: 0.9174352678380837, iteration: 410483
loss: 1.0121045112609863,grad_norm: 0.7176142633872891, iteration: 410484
loss: 1.0210179090499878,grad_norm: 0.9999992576596282, iteration: 410485
loss: 1.0073250532150269,grad_norm: 0.9041047965182915, iteration: 410486
loss: 1.016128659248352,grad_norm: 0.934153334128584, iteration: 410487
loss: 1.155505657196045,grad_norm: 0.9769741839630237, iteration: 410488
loss: 1.0106065273284912,grad_norm: 0.7366538354623249, iteration: 410489
loss: 1.0156164169311523,grad_norm: 0.9999990637795502, iteration: 410490
loss: 0.9996665120124817,grad_norm: 0.7166463343050705, iteration: 410491
loss: 1.02333402633667,grad_norm: 0.8806248645471104, iteration: 410492
loss: 1.0034857988357544,grad_norm: 0.7037309282724277, iteration: 410493
loss: 0.9962114095687866,grad_norm: 0.825145898769631, iteration: 410494
loss: 0.9882502555847168,grad_norm: 0.8552619647998985, iteration: 410495
loss: 1.0236778259277344,grad_norm: 0.9999996472322116, iteration: 410496
loss: 1.0263620615005493,grad_norm: 0.8243242392379324, iteration: 410497
loss: 1.0242871046066284,grad_norm: 0.8988331706827716, iteration: 410498
loss: 0.9872857928276062,grad_norm: 0.9438985979785968, iteration: 410499
loss: 0.9979421496391296,grad_norm: 0.9461990724521303, iteration: 410500
loss: 0.9765164256095886,grad_norm: 0.9393739916486402, iteration: 410501
loss: 0.9984192848205566,grad_norm: 0.7141721666682733, iteration: 410502
loss: 1.0105876922607422,grad_norm: 0.7683339712920728, iteration: 410503
loss: 1.042240023612976,grad_norm: 0.9999992604349979, iteration: 410504
loss: 0.9782207012176514,grad_norm: 0.9999994212569797, iteration: 410505
loss: 1.0044904947280884,grad_norm: 0.9999995771540309, iteration: 410506
loss: 1.022197961807251,grad_norm: 0.8722132094587804, iteration: 410507
loss: 1.005437970161438,grad_norm: 0.7847454754745954, iteration: 410508
loss: 1.094519019126892,grad_norm: 0.9999997919047965, iteration: 410509
loss: 0.9995291829109192,grad_norm: 0.7589897713202207, iteration: 410510
loss: 1.03779935836792,grad_norm: 0.7861482356568081, iteration: 410511
loss: 1.0648329257965088,grad_norm: 0.7021767415101666, iteration: 410512
loss: 1.076422095298767,grad_norm: 0.999999957545441, iteration: 410513
loss: 0.9938329458236694,grad_norm: 0.9938864085607313, iteration: 410514
loss: 1.0086787939071655,grad_norm: 0.7821286454151231, iteration: 410515
loss: 1.0380501747131348,grad_norm: 0.9999991757164258, iteration: 410516
loss: 1.0092830657958984,grad_norm: 0.8835554909858245, iteration: 410517
loss: 0.9642963409423828,grad_norm: 0.8495795747312395, iteration: 410518
loss: 0.9896296858787537,grad_norm: 0.6700846995943355, iteration: 410519
loss: 1.0276029109954834,grad_norm: 0.9554088302651021, iteration: 410520
loss: 1.0253584384918213,grad_norm: 0.999999273129689, iteration: 410521
loss: 0.9892560839653015,grad_norm: 0.8783105578543849, iteration: 410522
loss: 1.0135538578033447,grad_norm: 0.9999991063563628, iteration: 410523
loss: 1.0060151815414429,grad_norm: 0.694789218305078, iteration: 410524
loss: 0.9907568097114563,grad_norm: 0.6544154943142394, iteration: 410525
loss: 1.0154881477355957,grad_norm: 0.9999993999934739, iteration: 410526
loss: 0.9435069561004639,grad_norm: 0.7178757730770683, iteration: 410527
loss: 1.080129861831665,grad_norm: 0.8806922419484341, iteration: 410528
loss: 0.9945727586746216,grad_norm: 0.8281252559145911, iteration: 410529
loss: 0.970741331577301,grad_norm: 0.7610857128548858, iteration: 410530
loss: 1.0580307245254517,grad_norm: 0.9999992074607343, iteration: 410531
loss: 1.040108561515808,grad_norm: 0.8178248493838896, iteration: 410532
loss: 0.9881113767623901,grad_norm: 0.8447935193906889, iteration: 410533
loss: 0.9945452213287354,grad_norm: 0.8559282903741181, iteration: 410534
loss: 1.0102766752243042,grad_norm: 0.7712329058323, iteration: 410535
loss: 0.9949913024902344,grad_norm: 0.9927370974014301, iteration: 410536
loss: 0.9800491333007812,grad_norm: 0.8851203451549378, iteration: 410537
loss: 1.0703388452529907,grad_norm: 0.8398653337510568, iteration: 410538
loss: 0.9987495541572571,grad_norm: 0.8633779655913946, iteration: 410539
loss: 0.961771547794342,grad_norm: 0.8711916043287709, iteration: 410540
loss: 1.0245813131332397,grad_norm: 0.9999997990207119, iteration: 410541
loss: 0.9689324498176575,grad_norm: 0.7375341148429578, iteration: 410542
loss: 1.0036391019821167,grad_norm: 0.6335826316494647, iteration: 410543
loss: 1.0161622762680054,grad_norm: 0.8019767513589782, iteration: 410544
loss: 1.0268834829330444,grad_norm: 0.6940420454363383, iteration: 410545
loss: 1.0430794954299927,grad_norm: 0.7771996299482515, iteration: 410546
loss: 0.9836369156837463,grad_norm: 0.869975222055953, iteration: 410547
loss: 0.987295389175415,grad_norm: 0.823772470626048, iteration: 410548
loss: 1.06636381149292,grad_norm: 0.9999991457634237, iteration: 410549
loss: 1.1465656757354736,grad_norm: 0.9999998078624196, iteration: 410550
loss: 1.0744729042053223,grad_norm: 0.8360439902974346, iteration: 410551
loss: 1.1483337879180908,grad_norm: 0.8297312936513179, iteration: 410552
loss: 1.0078363418579102,grad_norm: 0.734521694354875, iteration: 410553
loss: 1.0106984376907349,grad_norm: 0.6951122989323942, iteration: 410554
loss: 1.0006064176559448,grad_norm: 0.6459810186369835, iteration: 410555
loss: 1.0050278902053833,grad_norm: 0.9999990711145349, iteration: 410556
loss: 1.052159309387207,grad_norm: 0.9569223327756811, iteration: 410557
loss: 1.1237744092941284,grad_norm: 0.9999997040086201, iteration: 410558
loss: 1.0205622911453247,grad_norm: 0.7257441504839183, iteration: 410559
loss: 1.0091431140899658,grad_norm: 0.8449046519641226, iteration: 410560
loss: 0.9697983860969543,grad_norm: 0.999850969439689, iteration: 410561
loss: 1.045589804649353,grad_norm: 0.9999996645332102, iteration: 410562
loss: 1.0301190614700317,grad_norm: 0.8403648677341329, iteration: 410563
loss: 1.0611993074417114,grad_norm: 0.7660729719958758, iteration: 410564
loss: 0.9931074380874634,grad_norm: 0.7519398699487265, iteration: 410565
loss: 1.0102583169937134,grad_norm: 0.6901506844356772, iteration: 410566
loss: 1.03175950050354,grad_norm: 0.7976325658131553, iteration: 410567
loss: 1.0274759531021118,grad_norm: 0.8213100640906216, iteration: 410568
loss: 1.0301532745361328,grad_norm: 0.8899139559118475, iteration: 410569
loss: 1.0059692859649658,grad_norm: 0.8523802127746241, iteration: 410570
loss: 1.0283799171447754,grad_norm: 0.7940066388562663, iteration: 410571
loss: 1.015658974647522,grad_norm: 0.7166998917908484, iteration: 410572
loss: 0.9905282258987427,grad_norm: 0.6394395465031159, iteration: 410573
loss: 1.0126028060913086,grad_norm: 0.8363415569115333, iteration: 410574
loss: 0.9820649027824402,grad_norm: 0.7729654672799678, iteration: 410575
loss: 0.9695954918861389,grad_norm: 0.743875705892368, iteration: 410576
loss: 1.0471876859664917,grad_norm: 0.6932006443036365, iteration: 410577
loss: 1.0131839513778687,grad_norm: 0.8065883060187217, iteration: 410578
loss: 0.9984795451164246,grad_norm: 0.7748747554731543, iteration: 410579
loss: 1.2777072191238403,grad_norm: 0.9999998225039851, iteration: 410580
loss: 0.9887574911117554,grad_norm: 0.7686385312900589, iteration: 410581
loss: 1.0005160570144653,grad_norm: 0.9999994964208637, iteration: 410582
loss: 0.9791345596313477,grad_norm: 0.9999993304265248, iteration: 410583
loss: 0.9799966812133789,grad_norm: 0.6653180956500933, iteration: 410584
loss: 0.9934145212173462,grad_norm: 0.8040751115177732, iteration: 410585
loss: 0.9516710042953491,grad_norm: 0.7863870714109794, iteration: 410586
loss: 1.0651524066925049,grad_norm: 0.9999991205296415, iteration: 410587
loss: 1.0345180034637451,grad_norm: 0.9543710173173038, iteration: 410588
loss: 1.1802910566329956,grad_norm: 0.9999995356693707, iteration: 410589
loss: 1.0911352634429932,grad_norm: 0.9999998594741384, iteration: 410590
loss: 0.9938251376152039,grad_norm: 0.7372654543635881, iteration: 410591
loss: 1.0090351104736328,grad_norm: 0.6969617669760438, iteration: 410592
loss: 1.0027004480361938,grad_norm: 0.84594646079275, iteration: 410593
loss: 1.0067253112792969,grad_norm: 0.8324536196398034, iteration: 410594
loss: 0.9867434501647949,grad_norm: 0.7808878512510301, iteration: 410595
loss: 0.9527069926261902,grad_norm: 0.9623101194488569, iteration: 410596
loss: 0.9824216961860657,grad_norm: 0.8977600989837805, iteration: 410597
loss: 0.9856681227684021,grad_norm: 0.8914223730855412, iteration: 410598
loss: 0.9935185313224792,grad_norm: 0.7617100276299748, iteration: 410599
loss: 1.0362539291381836,grad_norm: 0.9715896723024797, iteration: 410600
loss: 1.0234233140945435,grad_norm: 0.9312409704618807, iteration: 410601
loss: 1.0296531915664673,grad_norm: 0.9999993401116419, iteration: 410602
loss: 0.9719381332397461,grad_norm: 0.6934334814563059, iteration: 410603
loss: 1.0577874183654785,grad_norm: 0.8295017971377108, iteration: 410604
loss: 1.0551702976226807,grad_norm: 0.9999998089838052, iteration: 410605
loss: 1.0639110803604126,grad_norm: 0.9999998495184814, iteration: 410606
loss: 1.0137017965316772,grad_norm: 0.9001688358817099, iteration: 410607
loss: 1.001192569732666,grad_norm: 0.7808432766467508, iteration: 410608
loss: 1.0572046041488647,grad_norm: 1.0000000317945132, iteration: 410609
loss: 0.952843964099884,grad_norm: 0.7028878257891694, iteration: 410610
loss: 1.0444337129592896,grad_norm: 0.9999997998345451, iteration: 410611
loss: 1.0906832218170166,grad_norm: 0.9999997144453071, iteration: 410612
loss: 1.0157592296600342,grad_norm: 0.999999731636338, iteration: 410613
loss: 1.232761025428772,grad_norm: 0.9999997350559433, iteration: 410614
loss: 0.9805757403373718,grad_norm: 0.7562210404325933, iteration: 410615
loss: 0.9876952767372131,grad_norm: 0.6872644284932037, iteration: 410616
loss: 1.013024091720581,grad_norm: 0.9999998529388261, iteration: 410617
loss: 1.0017544031143188,grad_norm: 0.9474317015642125, iteration: 410618
loss: 1.0398558378219604,grad_norm: 0.7469801650045176, iteration: 410619
loss: 1.010077714920044,grad_norm: 0.7529697919063151, iteration: 410620
loss: 1.024875283241272,grad_norm: 0.8902254936174347, iteration: 410621
loss: 1.0731629133224487,grad_norm: 0.9999999471246455, iteration: 410622
loss: 1.0054043531417847,grad_norm: 0.9999996729264308, iteration: 410623
loss: 1.0097721815109253,grad_norm: 0.7014937990671476, iteration: 410624
loss: 1.020516276359558,grad_norm: 0.9999999281409476, iteration: 410625
loss: 1.0260891914367676,grad_norm: 0.9999992250803927, iteration: 410626
loss: 0.9810378551483154,grad_norm: 0.9999993409929825, iteration: 410627
loss: 1.0737699270248413,grad_norm: 0.9999995335383451, iteration: 410628
loss: 0.9696623086929321,grad_norm: 0.7810871097685298, iteration: 410629
loss: 1.0099999904632568,grad_norm: 0.9999997913323386, iteration: 410630
loss: 1.0116913318634033,grad_norm: 0.9999992005693583, iteration: 410631
loss: 1.030399203300476,grad_norm: 0.9036696285192206, iteration: 410632
loss: 1.0153260231018066,grad_norm: 0.8598264478465705, iteration: 410633
loss: 0.9691774845123291,grad_norm: 0.8951116664592584, iteration: 410634
loss: 1.03798246383667,grad_norm: 0.7301635565843918, iteration: 410635
loss: 0.99903804063797,grad_norm: 0.978435588478492, iteration: 410636
loss: 1.0284289121627808,grad_norm: 0.9999991352818788, iteration: 410637
loss: 1.0309617519378662,grad_norm: 0.7886811065212337, iteration: 410638
loss: 0.968752920627594,grad_norm: 0.8350011902828895, iteration: 410639
loss: 0.9748393893241882,grad_norm: 0.903824200788708, iteration: 410640
loss: 1.0240488052368164,grad_norm: 0.756621439561256, iteration: 410641
loss: 1.0159565210342407,grad_norm: 0.6990719763569703, iteration: 410642
loss: 1.0905357599258423,grad_norm: 0.9999991765795482, iteration: 410643
loss: 0.9795014262199402,grad_norm: 0.7448701357043159, iteration: 410644
loss: 1.1226710081100464,grad_norm: 0.9999997753736318, iteration: 410645
loss: 1.0672119855880737,grad_norm: 0.9999992673371131, iteration: 410646
loss: 1.1354109048843384,grad_norm: 0.9999997111344294, iteration: 410647
loss: 1.0230677127838135,grad_norm: 0.7444025046881727, iteration: 410648
loss: 0.9715559482574463,grad_norm: 0.8444606512935451, iteration: 410649
loss: 1.0301252603530884,grad_norm: 0.7518740087870918, iteration: 410650
loss: 1.0373361110687256,grad_norm: 0.9999997944450675, iteration: 410651
loss: 1.0328162908554077,grad_norm: 0.8733228881778698, iteration: 410652
loss: 1.1005853414535522,grad_norm: 0.9999992094524656, iteration: 410653
loss: 1.0209016799926758,grad_norm: 0.7359153883683899, iteration: 410654
loss: 1.0105535984039307,grad_norm: 0.7520857929434249, iteration: 410655
loss: 1.046188235282898,grad_norm: 0.9999994735982982, iteration: 410656
loss: 1.0051954984664917,grad_norm: 0.708224753470481, iteration: 410657
loss: 1.0469588041305542,grad_norm: 0.9230654926509136, iteration: 410658
loss: 1.062970519065857,grad_norm: 0.9999990534592665, iteration: 410659
loss: 0.9859660267829895,grad_norm: 0.9999996861557914, iteration: 410660
loss: 0.9994565844535828,grad_norm: 0.9999992174178065, iteration: 410661
loss: 1.051052451133728,grad_norm: 0.9999990390579659, iteration: 410662
loss: 1.0560836791992188,grad_norm: 0.9999993279361895, iteration: 410663
loss: 1.037856936454773,grad_norm: 0.9999999947428362, iteration: 410664
loss: 1.0641635656356812,grad_norm: 0.9925743637941093, iteration: 410665
loss: 1.0280537605285645,grad_norm: 0.9999992033579138, iteration: 410666
loss: 1.0785720348358154,grad_norm: 0.9999993360920669, iteration: 410667
loss: 1.053607702255249,grad_norm: 0.9999992869670116, iteration: 410668
loss: 1.0266345739364624,grad_norm: 0.999999385969557, iteration: 410669
loss: 0.9900299906730652,grad_norm: 0.9999993681329139, iteration: 410670
loss: 1.0611287355422974,grad_norm: 0.8253635814082768, iteration: 410671
loss: 1.0004490613937378,grad_norm: 0.9999998087038099, iteration: 410672
loss: 1.0897216796875,grad_norm: 0.9666284350679591, iteration: 410673
loss: 0.9978272914886475,grad_norm: 0.6753325007597795, iteration: 410674
loss: 1.0205732583999634,grad_norm: 0.9712710412832267, iteration: 410675
loss: 1.0303208827972412,grad_norm: 0.6920502600246937, iteration: 410676
loss: 1.0110629796981812,grad_norm: 0.9999995454677792, iteration: 410677
loss: 1.0887157917022705,grad_norm: 0.9999997491648155, iteration: 410678
loss: 1.0037635564804077,grad_norm: 0.7072114117624029, iteration: 410679
loss: 1.025462031364441,grad_norm: 0.9999991676547606, iteration: 410680
loss: 1.0658620595932007,grad_norm: 0.9999993678039691, iteration: 410681
loss: 1.0308103561401367,grad_norm: 0.8625475717433995, iteration: 410682
loss: 1.005231499671936,grad_norm: 0.7860948453454247, iteration: 410683
loss: 0.9767746925354004,grad_norm: 0.8841451271124904, iteration: 410684
loss: 1.0231260061264038,grad_norm: 0.7573872582618557, iteration: 410685
loss: 0.9336085319519043,grad_norm: 0.7029093009245556, iteration: 410686
loss: 1.0213581323623657,grad_norm: 0.7655653436895684, iteration: 410687
loss: 1.034813404083252,grad_norm: 0.8544715462741621, iteration: 410688
loss: 0.9880252480506897,grad_norm: 0.8278114473318711, iteration: 410689
loss: 1.0490719079971313,grad_norm: 0.9642087613165427, iteration: 410690
loss: 1.0426942110061646,grad_norm: 0.8941021383606482, iteration: 410691
loss: 1.0746240615844727,grad_norm: 0.9999993263478753, iteration: 410692
loss: 1.055200219154358,grad_norm: 0.9999990470474786, iteration: 410693
loss: 0.9928264617919922,grad_norm: 0.7766901741856778, iteration: 410694
loss: 1.0345757007598877,grad_norm: 0.9999995740704173, iteration: 410695
loss: 1.030531644821167,grad_norm: 0.7196736763726432, iteration: 410696
loss: 1.140550971031189,grad_norm: 0.9999991880177572, iteration: 410697
loss: 1.0221821069717407,grad_norm: 0.8764307453632586, iteration: 410698
loss: 0.9840793609619141,grad_norm: 0.8246441888099392, iteration: 410699
loss: 1.027859091758728,grad_norm: 0.9999996725461218, iteration: 410700
loss: 1.0374224185943604,grad_norm: 0.9999993984656439, iteration: 410701
loss: 1.0519726276397705,grad_norm: 0.9999994775899617, iteration: 410702
loss: 1.0085209608078003,grad_norm: 0.9003965375227294, iteration: 410703
loss: 1.007172703742981,grad_norm: 0.77128152177559, iteration: 410704
loss: 1.1045302152633667,grad_norm: 0.9999993726855366, iteration: 410705
loss: 1.0497055053710938,grad_norm: 0.9803650065732155, iteration: 410706
loss: 1.123343586921692,grad_norm: 0.9999990283170924, iteration: 410707
loss: 1.0152876377105713,grad_norm: 0.9999997508929471, iteration: 410708
loss: 1.0345791578292847,grad_norm: 0.7721559307081782, iteration: 410709
loss: 1.0985887050628662,grad_norm: 0.8571608842857503, iteration: 410710
loss: 1.004320740699768,grad_norm: 0.9999994060029559, iteration: 410711
loss: 1.0729681253433228,grad_norm: 0.8861637809897097, iteration: 410712
loss: 1.0436984300613403,grad_norm: 0.9999997560909148, iteration: 410713
loss: 0.9988701939582825,grad_norm: 0.6253799220563392, iteration: 410714
loss: 1.0247465372085571,grad_norm: 0.7520050251235115, iteration: 410715
loss: 1.052468180656433,grad_norm: 0.9999996264055722, iteration: 410716
loss: 1.0438103675842285,grad_norm: 0.9029156996223869, iteration: 410717
loss: 1.0171475410461426,grad_norm: 0.9999996313676265, iteration: 410718
loss: 1.0080581903457642,grad_norm: 0.7700774602431756, iteration: 410719
loss: 1.0550154447555542,grad_norm: 0.9119188819880453, iteration: 410720
loss: 1.084682583808899,grad_norm: 0.9012579265307126, iteration: 410721
loss: 1.0772587060928345,grad_norm: 0.999999935778546, iteration: 410722
loss: 1.0497063398361206,grad_norm: 0.9999992944636023, iteration: 410723
loss: 0.9687375426292419,grad_norm: 0.780294614449378, iteration: 410724
loss: 0.9974766969680786,grad_norm: 0.9999994921636686, iteration: 410725
loss: 1.0437254905700684,grad_norm: 0.9999998516326528, iteration: 410726
loss: 1.2049297094345093,grad_norm: 0.9999999571834837, iteration: 410727
loss: 0.9702890515327454,grad_norm: 0.8347900590054196, iteration: 410728
loss: 0.9757365584373474,grad_norm: 0.9124818826940644, iteration: 410729
loss: 1.057133436203003,grad_norm: 0.755213822203267, iteration: 410730
loss: 0.9821910858154297,grad_norm: 0.858882539770099, iteration: 410731
loss: 1.0904020071029663,grad_norm: 0.9999990766981041, iteration: 410732
loss: 1.0033410787582397,grad_norm: 0.7713865867154456, iteration: 410733
loss: 0.9871407151222229,grad_norm: 0.8463732639827346, iteration: 410734
loss: 1.0061461925506592,grad_norm: 0.8398633814701524, iteration: 410735
loss: 1.0852274894714355,grad_norm: 0.8897016439518822, iteration: 410736
loss: 0.9919204115867615,grad_norm: 0.8872761789323327, iteration: 410737
loss: 0.9963364601135254,grad_norm: 0.6967919205884816, iteration: 410738
loss: 0.9953426122665405,grad_norm: 0.9999991343289567, iteration: 410739
loss: 0.9790017008781433,grad_norm: 0.6429652113784269, iteration: 410740
loss: 0.9908548593521118,grad_norm: 0.9555970706594575, iteration: 410741
loss: 0.9955890774726868,grad_norm: 0.7974274242552764, iteration: 410742
loss: 1.0033241510391235,grad_norm: 0.768024651262334, iteration: 410743
loss: 1.0183621644973755,grad_norm: 0.999999961806808, iteration: 410744
loss: 0.9749776124954224,grad_norm: 0.9999992283130374, iteration: 410745
loss: 1.0994068384170532,grad_norm: 0.9999992372377792, iteration: 410746
loss: 0.994067907333374,grad_norm: 0.8175702473979418, iteration: 410747
loss: 1.0731678009033203,grad_norm: 0.8752006761767441, iteration: 410748
loss: 0.9922558665275574,grad_norm: 0.8753522011865431, iteration: 410749
loss: 1.0246446132659912,grad_norm: 0.7589440225080791, iteration: 410750
loss: 1.001910924911499,grad_norm: 0.9061698199710128, iteration: 410751
loss: 1.0413402318954468,grad_norm: 0.999999620570103, iteration: 410752
loss: 1.0349925756454468,grad_norm: 0.9999998040837698, iteration: 410753
loss: 1.0055041313171387,grad_norm: 0.7213544042210607, iteration: 410754
loss: 1.0549306869506836,grad_norm: 0.8958214405147602, iteration: 410755
loss: 0.9562651515007019,grad_norm: 0.6777866695798002, iteration: 410756
loss: 0.999740481376648,grad_norm: 0.9999995071058723, iteration: 410757
loss: 1.009082555770874,grad_norm: 0.9999998585238887, iteration: 410758
loss: 1.1314793825149536,grad_norm: 0.9999992048713805, iteration: 410759
loss: 1.0165808200836182,grad_norm: 0.9999989868031024, iteration: 410760
loss: 1.0236072540283203,grad_norm: 0.7889453317714898, iteration: 410761
loss: 1.0227364301681519,grad_norm: 0.7287473566346329, iteration: 410762
loss: 1.0297669172286987,grad_norm: 0.9999996905552582, iteration: 410763
loss: 1.3032513856887817,grad_norm: 0.9999999966636421, iteration: 410764
loss: 1.0121266841888428,grad_norm: 0.8074548704795513, iteration: 410765
loss: 0.9901595711708069,grad_norm: 0.5913407872908598, iteration: 410766
loss: 1.0039377212524414,grad_norm: 0.7880562029398038, iteration: 410767
loss: 0.958260715007782,grad_norm: 0.7768400148561196, iteration: 410768
loss: 0.9960945844650269,grad_norm: 0.7885326961502933, iteration: 410769
loss: 1.0331205129623413,grad_norm: 0.9999997560865099, iteration: 410770
loss: 1.242108702659607,grad_norm: 0.9999998053489696, iteration: 410771
loss: 0.984965443611145,grad_norm: 0.846711076967136, iteration: 410772
loss: 0.9909566640853882,grad_norm: 0.7160957580712342, iteration: 410773
loss: 1.18059504032135,grad_norm: 0.9999993265787233, iteration: 410774
loss: 1.0109922885894775,grad_norm: 0.9999998676406759, iteration: 410775
loss: 1.0024669170379639,grad_norm: 0.8351969300182477, iteration: 410776
loss: 0.9858129620552063,grad_norm: 0.9999990624080175, iteration: 410777
loss: 1.0025047063827515,grad_norm: 0.752716117963405, iteration: 410778
loss: 0.9885981678962708,grad_norm: 0.789249833996558, iteration: 410779
loss: 0.967648446559906,grad_norm: 0.5701984748808506, iteration: 410780
loss: 1.1454402208328247,grad_norm: 0.9999999725401064, iteration: 410781
loss: 1.003295660018921,grad_norm: 0.9999991830312532, iteration: 410782
loss: 1.07635498046875,grad_norm: 0.9999995301178836, iteration: 410783
loss: 1.1812435388565063,grad_norm: 0.9999994657798011, iteration: 410784
loss: 1.0274207592010498,grad_norm: 0.9712416451624026, iteration: 410785
loss: 1.0976933240890503,grad_norm: 0.9999992247144842, iteration: 410786
loss: 0.9878526329994202,grad_norm: 0.8082199579494751, iteration: 410787
loss: 1.040732741355896,grad_norm: 0.8630689879292494, iteration: 410788
loss: 1.0660443305969238,grad_norm: 0.9999996910656278, iteration: 410789
loss: 0.9809126257896423,grad_norm: 0.902094007052366, iteration: 410790
loss: 1.0308306217193604,grad_norm: 0.8584895624971817, iteration: 410791
loss: 1.0081080198287964,grad_norm: 0.7086874935205657, iteration: 410792
loss: 0.9878105521202087,grad_norm: 0.8900744839143255, iteration: 410793
loss: 1.0739914178848267,grad_norm: 0.999999673777347, iteration: 410794
loss: 1.0095722675323486,grad_norm: 0.9999995737115974, iteration: 410795
loss: 0.9435864090919495,grad_norm: 0.9867884690906504, iteration: 410796
loss: 1.009608507156372,grad_norm: 0.8800122576986459, iteration: 410797
loss: 0.9938360452651978,grad_norm: 0.8299945110689633, iteration: 410798
loss: 1.0013070106506348,grad_norm: 0.8243904457969696, iteration: 410799
loss: 0.977825939655304,grad_norm: 0.9999991092745164, iteration: 410800
loss: 1.0769749879837036,grad_norm: 0.8326131863073792, iteration: 410801
loss: 1.020707607269287,grad_norm: 0.9999998283056643, iteration: 410802
loss: 0.9559226036071777,grad_norm: 0.7412613844318227, iteration: 410803
loss: 0.9981722831726074,grad_norm: 0.7973842370373406, iteration: 410804
loss: 1.0031198263168335,grad_norm: 0.949725589592759, iteration: 410805
loss: 0.9871084094047546,grad_norm: 0.6970775956500025, iteration: 410806
loss: 0.9736348986625671,grad_norm: 0.774643682661684, iteration: 410807
loss: 0.979176938533783,grad_norm: 0.9999992132051478, iteration: 410808
loss: 0.9826467037200928,grad_norm: 0.8199047589934569, iteration: 410809
loss: 1.001082420349121,grad_norm: 0.999999386423873, iteration: 410810
loss: 1.1000298261642456,grad_norm: 0.9999991097741753, iteration: 410811
loss: 1.0101553201675415,grad_norm: 0.8967383609600011, iteration: 410812
loss: 0.9984076619148254,grad_norm: 0.8044644585766733, iteration: 410813
loss: 0.9800523519515991,grad_norm: 0.8029198014277281, iteration: 410814
loss: 1.0141539573669434,grad_norm: 0.7374028859787863, iteration: 410815
loss: 0.9648597240447998,grad_norm: 0.7636120102553906, iteration: 410816
loss: 0.9909395575523376,grad_norm: 0.8314289633558538, iteration: 410817
loss: 0.9839622378349304,grad_norm: 0.680383902935189, iteration: 410818
loss: 1.0103062391281128,grad_norm: 0.8117979011741506, iteration: 410819
loss: 0.9984778165817261,grad_norm: 0.9584357022074956, iteration: 410820
loss: 0.9935986399650574,grad_norm: 0.9999993581156599, iteration: 410821
loss: 1.0537184476852417,grad_norm: 0.9999998007156315, iteration: 410822
loss: 1.0055029392242432,grad_norm: 0.9286832844664977, iteration: 410823
loss: 0.9864065647125244,grad_norm: 0.7750446986037255, iteration: 410824
loss: 1.1303117275238037,grad_norm: 0.9999998077533574, iteration: 410825
loss: 1.0131399631500244,grad_norm: 0.8756346654176995, iteration: 410826
loss: 1.0185563564300537,grad_norm: 0.9999991414413374, iteration: 410827
loss: 1.0563552379608154,grad_norm: 0.999999164830796, iteration: 410828
loss: 0.9636505246162415,grad_norm: 0.7692302045092985, iteration: 410829
loss: 1.0307550430297852,grad_norm: 0.8743970597571713, iteration: 410830
loss: 0.9774031043052673,grad_norm: 0.8439567795969323, iteration: 410831
loss: 1.0701327323913574,grad_norm: 0.9999990528853566, iteration: 410832
loss: 0.9935975074768066,grad_norm: 0.9885331258842245, iteration: 410833
loss: 0.9946027398109436,grad_norm: 0.6803234801435563, iteration: 410834
loss: 0.998593807220459,grad_norm: 0.9999992508402968, iteration: 410835
loss: 1.136695384979248,grad_norm: 0.9999998227939636, iteration: 410836
loss: 0.971066415309906,grad_norm: 0.791992843873591, iteration: 410837
loss: 1.0248810052871704,grad_norm: 0.9014887983015497, iteration: 410838
loss: 1.0154937505722046,grad_norm: 0.9854260249804391, iteration: 410839
loss: 0.969754159450531,grad_norm: 0.846733684174905, iteration: 410840
loss: 0.9801540970802307,grad_norm: 0.8412162105123605, iteration: 410841
loss: 1.0685864686965942,grad_norm: 0.9999990737020464, iteration: 410842
loss: 1.081642508506775,grad_norm: 0.9999991438260536, iteration: 410843
loss: 1.0154528617858887,grad_norm: 0.7458294849903346, iteration: 410844
loss: 0.9770073294639587,grad_norm: 0.9999991905067317, iteration: 410845
loss: 1.0355967283248901,grad_norm: 0.9001485466151306, iteration: 410846
loss: 1.0322476625442505,grad_norm: 0.8099984830501997, iteration: 410847
loss: 1.0917824506759644,grad_norm: 0.8224296928040021, iteration: 410848
loss: 1.0085558891296387,grad_norm: 0.9999993641414838, iteration: 410849
loss: 1.0014015436172485,grad_norm: 0.7728618617382238, iteration: 410850
loss: 1.0159180164337158,grad_norm: 0.866266849200721, iteration: 410851
loss: 1.0077520608901978,grad_norm: 0.9795469040547587, iteration: 410852
loss: 1.0674347877502441,grad_norm: 0.7950084328379352, iteration: 410853
loss: 0.9802383780479431,grad_norm: 0.9109349194961477, iteration: 410854
loss: 0.9791759252548218,grad_norm: 0.8926862473388727, iteration: 410855
loss: 1.0556596517562866,grad_norm: 0.9999995128819591, iteration: 410856
loss: 0.9901840686798096,grad_norm: 0.9999989753842886, iteration: 410857
loss: 1.00650155544281,grad_norm: 0.7917054579098981, iteration: 410858
loss: 1.0097678899765015,grad_norm: 0.8457589194025783, iteration: 410859
loss: 1.0152733325958252,grad_norm: 0.9999996500850001, iteration: 410860
loss: 1.0108219385147095,grad_norm: 0.9999992381975192, iteration: 410861
loss: 1.0332099199295044,grad_norm: 0.9999993729595145, iteration: 410862
loss: 0.9994714260101318,grad_norm: 0.9999998735120085, iteration: 410863
loss: 1.0313143730163574,grad_norm: 0.888632498355489, iteration: 410864
loss: 0.9981147646903992,grad_norm: 0.999999228257985, iteration: 410865
loss: 1.0120136737823486,grad_norm: 0.9999991405391762, iteration: 410866
loss: 0.9797344207763672,grad_norm: 0.6487767902916776, iteration: 410867
loss: 1.026993989944458,grad_norm: 0.9999996617286114, iteration: 410868
loss: 0.9844629764556885,grad_norm: 0.9067237521681892, iteration: 410869
loss: 1.1040629148483276,grad_norm: 0.9999997214940229, iteration: 410870
loss: 1.0147351026535034,grad_norm: 0.7330821338278631, iteration: 410871
loss: 0.9965229034423828,grad_norm: 0.8078849119980299, iteration: 410872
loss: 1.014889121055603,grad_norm: 0.8003540116889171, iteration: 410873
loss: 1.1005126237869263,grad_norm: 0.9999998961466948, iteration: 410874
loss: 1.0095430612564087,grad_norm: 0.786560076397402, iteration: 410875
loss: 0.9781359434127808,grad_norm: 0.8440029896684232, iteration: 410876
loss: 1.1384762525558472,grad_norm: 0.9999990310708917, iteration: 410877
loss: 0.9854001998901367,grad_norm: 0.846206708874136, iteration: 410878
loss: 0.9764139652252197,grad_norm: 0.7381548100881823, iteration: 410879
loss: 1.0030711889266968,grad_norm: 0.9999998244648018, iteration: 410880
loss: 1.0009557008743286,grad_norm: 0.7478833713863677, iteration: 410881
loss: 1.0158132314682007,grad_norm: 0.749504712458359, iteration: 410882
loss: 1.0165307521820068,grad_norm: 0.8106643201444801, iteration: 410883
loss: 1.0275013446807861,grad_norm: 0.692197338691207, iteration: 410884
loss: 1.0564100742340088,grad_norm: 0.9999992086525258, iteration: 410885
loss: 1.0324708223342896,grad_norm: 0.9999997019963826, iteration: 410886
loss: 1.1226527690887451,grad_norm: 0.9999994616952509, iteration: 410887
loss: 1.0455328226089478,grad_norm: 0.9999992295133046, iteration: 410888
loss: 0.9851247668266296,grad_norm: 0.8100915640012611, iteration: 410889
loss: 1.0485358238220215,grad_norm: 0.9999998047742833, iteration: 410890
loss: 1.013381838798523,grad_norm: 0.7078383554079898, iteration: 410891
loss: 1.1130322217941284,grad_norm: 0.9999995603714313, iteration: 410892
loss: 1.0415266752243042,grad_norm: 0.9999994618894222, iteration: 410893
loss: 1.0015908479690552,grad_norm: 0.8275719127973113, iteration: 410894
loss: 1.0091067552566528,grad_norm: 0.8402277889313876, iteration: 410895
loss: 1.0100666284561157,grad_norm: 0.7429604442535653, iteration: 410896
loss: 0.972166121006012,grad_norm: 0.6671316331129188, iteration: 410897
loss: 1.0117508172988892,grad_norm: 0.9999990789887295, iteration: 410898
loss: 1.1308917999267578,grad_norm: 1.0000001215670151, iteration: 410899
loss: 1.0807124376296997,grad_norm: 0.9999996933773978, iteration: 410900
loss: 1.0120913982391357,grad_norm: 0.6776008179290912, iteration: 410901
loss: 0.9742286801338196,grad_norm: 0.8748150166786437, iteration: 410902
loss: 0.948302149772644,grad_norm: 0.8737986913058555, iteration: 410903
loss: 1.036497950553894,grad_norm: 0.7330846813078058, iteration: 410904
loss: 1.0146989822387695,grad_norm: 0.8406675242089191, iteration: 410905
loss: 0.9674634337425232,grad_norm: 0.9999998867618892, iteration: 410906
loss: 0.9778825640678406,grad_norm: 0.8059836251903026, iteration: 410907
loss: 1.010884165763855,grad_norm: 0.6985043933130473, iteration: 410908
loss: 1.0240885019302368,grad_norm: 0.7957094220451277, iteration: 410909
loss: 1.0165730714797974,grad_norm: 0.9999990205341959, iteration: 410910
loss: 1.0152735710144043,grad_norm: 0.9372746964368968, iteration: 410911
loss: 1.212271809577942,grad_norm: 0.9999995661857084, iteration: 410912
loss: 0.994451642036438,grad_norm: 0.7592132281443649, iteration: 410913
loss: 1.0172501802444458,grad_norm: 0.9999993530977891, iteration: 410914
loss: 0.9643968939781189,grad_norm: 0.8882889766708864, iteration: 410915
loss: 0.9945119619369507,grad_norm: 0.7907057465174423, iteration: 410916
loss: 0.9817202687263489,grad_norm: 0.7666566987615183, iteration: 410917
loss: 0.9926870465278625,grad_norm: 0.9999993113164763, iteration: 410918
loss: 1.0494182109832764,grad_norm: 0.8076003182259098, iteration: 410919
loss: 0.9536985158920288,grad_norm: 0.7483635827710677, iteration: 410920
loss: 1.0380902290344238,grad_norm: 0.9999998337328547, iteration: 410921
loss: 1.0214413404464722,grad_norm: 0.8708757885151022, iteration: 410922
loss: 1.0410592555999756,grad_norm: 0.9999997012602821, iteration: 410923
loss: 1.1103435754776,grad_norm: 0.9999990243036874, iteration: 410924
loss: 1.0083117485046387,grad_norm: 0.7430905913465041, iteration: 410925
loss: 1.0292654037475586,grad_norm: 0.9999996287036503, iteration: 410926
loss: 0.9873945116996765,grad_norm: 0.8074869167076328, iteration: 410927
loss: 0.9913711547851562,grad_norm: 0.8987187894466762, iteration: 410928
loss: 0.9438339471817017,grad_norm: 0.9686207043462562, iteration: 410929
loss: 0.957173228263855,grad_norm: 0.7887228717180962, iteration: 410930
loss: 1.0775166749954224,grad_norm: 0.9999998122994532, iteration: 410931
loss: 1.082495093345642,grad_norm: 0.9999991233247553, iteration: 410932
loss: 0.9851812124252319,grad_norm: 0.7459733657472396, iteration: 410933
loss: 1.0251096487045288,grad_norm: 0.8764241721769522, iteration: 410934
loss: 0.9861792922019958,grad_norm: 0.6560038152866141, iteration: 410935
loss: 0.9695904850959778,grad_norm: 0.7435160640372843, iteration: 410936
loss: 1.0244815349578857,grad_norm: 0.7842622080685901, iteration: 410937
loss: 1.0383306741714478,grad_norm: 0.8698822649367279, iteration: 410938
loss: 1.0091785192489624,grad_norm: 0.7963364368437932, iteration: 410939
loss: 1.0082345008850098,grad_norm: 0.9999997215889004, iteration: 410940
loss: 0.9867744445800781,grad_norm: 0.7893850976073958, iteration: 410941
loss: 1.0294932126998901,grad_norm: 0.7272185109223077, iteration: 410942
loss: 1.0440164804458618,grad_norm: 0.9999995572727269, iteration: 410943
loss: 1.0089426040649414,grad_norm: 0.8047826092955793, iteration: 410944
loss: 1.0670268535614014,grad_norm: 0.9999999000786544, iteration: 410945
loss: 1.0859709978103638,grad_norm: 0.9999995315046, iteration: 410946
loss: 1.0096608400344849,grad_norm: 0.765656725902726, iteration: 410947
loss: 1.0169798135757446,grad_norm: 0.6358989338582602, iteration: 410948
loss: 0.9755826592445374,grad_norm: 0.8214765518575265, iteration: 410949
loss: 1.0898617506027222,grad_norm: 0.9064218937323698, iteration: 410950
loss: 1.1957378387451172,grad_norm: 0.9999999903147789, iteration: 410951
loss: 0.9613229036331177,grad_norm: 0.8834260651381745, iteration: 410952
loss: 0.9784519076347351,grad_norm: 0.829797939233806, iteration: 410953
loss: 1.0783110857009888,grad_norm: 0.9999998924171533, iteration: 410954
loss: 0.9794716835021973,grad_norm: 0.8321464593546003, iteration: 410955
loss: 1.0020709037780762,grad_norm: 0.9999998547145621, iteration: 410956
loss: 1.0129724740982056,grad_norm: 0.999999786784591, iteration: 410957
loss: 1.0656256675720215,grad_norm: 0.9999999176988307, iteration: 410958
loss: 1.0420254468917847,grad_norm: 0.7196338905398405, iteration: 410959
loss: 0.9670297503471375,grad_norm: 0.6802225454517786, iteration: 410960
loss: 1.0218559503555298,grad_norm: 0.9999994151306392, iteration: 410961
loss: 1.1032699346542358,grad_norm: 0.8845459520053015, iteration: 410962
loss: 1.0394216775894165,grad_norm: 0.9999991483726414, iteration: 410963
loss: 1.0743927955627441,grad_norm: 0.9999992432754864, iteration: 410964
loss: 0.969529390335083,grad_norm: 0.7401180411519417, iteration: 410965
loss: 0.9995843768119812,grad_norm: 0.74390094984474, iteration: 410966
loss: 1.0997072458267212,grad_norm: 0.9999999527396153, iteration: 410967
loss: 1.0828016996383667,grad_norm: 0.8858051428642243, iteration: 410968
loss: 1.1589014530181885,grad_norm: 0.9999996303256781, iteration: 410969
loss: 1.161215901374817,grad_norm: 0.9999999853593954, iteration: 410970
loss: 1.1682549715042114,grad_norm: 0.9999996596671362, iteration: 410971
loss: 1.027629017829895,grad_norm: 0.9999998053728435, iteration: 410972
loss: 1.1781255006790161,grad_norm: 1.0000000284792052, iteration: 410973
loss: 1.0679690837860107,grad_norm: 0.9999999014973658, iteration: 410974
loss: 1.1841059923171997,grad_norm: 0.9999996550357153, iteration: 410975
loss: 1.0503246784210205,grad_norm: 0.829031496409521, iteration: 410976
loss: 1.0696563720703125,grad_norm: 0.9999996340240901, iteration: 410977
loss: 1.034448266029358,grad_norm: 0.7301007299691448, iteration: 410978
loss: 1.0173839330673218,grad_norm: 0.6953708181242455, iteration: 410979
loss: 1.0925136804580688,grad_norm: 0.9999996538554238, iteration: 410980
loss: 1.2369835376739502,grad_norm: 0.9999996308306349, iteration: 410981
loss: 1.0110770463943481,grad_norm: 0.999999552178337, iteration: 410982
loss: 1.0110434293746948,grad_norm: 0.999999694800922, iteration: 410983
loss: 1.0603971481323242,grad_norm: 0.9999997395798272, iteration: 410984
loss: 1.0082036256790161,grad_norm: 0.7734559288044306, iteration: 410985
loss: 1.1406079530715942,grad_norm: 0.9999997682023689, iteration: 410986
loss: 1.1842880249023438,grad_norm: 1.000000010733888, iteration: 410987
loss: 1.0418813228607178,grad_norm: 0.9999998559835237, iteration: 410988
loss: 1.0411211252212524,grad_norm: 1.000000017594144, iteration: 410989
loss: 1.1195133924484253,grad_norm: 0.9999997870804821, iteration: 410990
loss: 1.2368155717849731,grad_norm: 0.9999999235813045, iteration: 410991
loss: 1.0776519775390625,grad_norm: 0.9999994429717292, iteration: 410992
loss: 1.1263915300369263,grad_norm: 0.9999997580789478, iteration: 410993
loss: 1.0105479955673218,grad_norm: 0.9771965310031946, iteration: 410994
loss: 1.3796775341033936,grad_norm: 0.99999963770567, iteration: 410995
loss: 1.0444018840789795,grad_norm: 0.999999232316699, iteration: 410996
loss: 1.2046418190002441,grad_norm: 0.9999997028309626, iteration: 410997
loss: 0.9997583627700806,grad_norm: 0.8147470567461795, iteration: 410998
loss: 1.2118418216705322,grad_norm: 0.999999851240939, iteration: 410999
loss: 0.9864603281021118,grad_norm: 0.8423155168366568, iteration: 411000
loss: 1.1413776874542236,grad_norm: 0.9999999630260531, iteration: 411001
loss: 1.0195441246032715,grad_norm: 0.99999969289027, iteration: 411002
loss: 1.0432982444763184,grad_norm: 0.9909049957506244, iteration: 411003
loss: 1.0744109153747559,grad_norm: 0.9999991260194481, iteration: 411004
loss: 1.053331971168518,grad_norm: 0.9999998382210302, iteration: 411005
loss: 1.0809872150421143,grad_norm: 0.9999997663272611, iteration: 411006
loss: 0.9894174337387085,grad_norm: 0.9999999177057941, iteration: 411007
loss: 1.134324550628662,grad_norm: 0.9999992920171461, iteration: 411008
loss: 1.1325169801712036,grad_norm: 0.9999998452818829, iteration: 411009
loss: 1.0230350494384766,grad_norm: 0.7495294197225452, iteration: 411010
loss: 1.043828010559082,grad_norm: 0.999999150937585, iteration: 411011
loss: 1.1428910493850708,grad_norm: 0.9999992823282455, iteration: 411012
loss: 1.1638984680175781,grad_norm: 0.9999998807721113, iteration: 411013
loss: 1.1613823175430298,grad_norm: 0.99999982925364, iteration: 411014
loss: 1.029496669769287,grad_norm: 0.6896263739151499, iteration: 411015
loss: 1.0384944677352905,grad_norm: 0.805445480539611, iteration: 411016
loss: 1.0440804958343506,grad_norm: 0.9999992182365467, iteration: 411017
loss: 1.137045979499817,grad_norm: 0.9999994278253943, iteration: 411018
loss: 1.2316750288009644,grad_norm: 0.9999999160662253, iteration: 411019
loss: 1.023482322692871,grad_norm: 0.867064598498682, iteration: 411020
loss: 1.2685407400131226,grad_norm: 0.9999999640909762, iteration: 411021
loss: 1.025644063949585,grad_norm: 0.99999925892308, iteration: 411022
loss: 1.459669828414917,grad_norm: 0.9999997239543037, iteration: 411023
loss: 1.0056788921356201,grad_norm: 0.8825062585563079, iteration: 411024
loss: 1.0287953615188599,grad_norm: 0.9504195381764152, iteration: 411025
loss: 1.1629455089569092,grad_norm: 0.9999993484309845, iteration: 411026
loss: 1.03476881980896,grad_norm: 0.9938617624576398, iteration: 411027
loss: 1.334038496017456,grad_norm: 0.9999998697517929, iteration: 411028
loss: 1.188841462135315,grad_norm: 0.9999998519543781, iteration: 411029
loss: 1.1119730472564697,grad_norm: 0.9999996664942533, iteration: 411030
loss: 1.0178614854812622,grad_norm: 0.8265135698520749, iteration: 411031
loss: 1.1906628608703613,grad_norm: 0.9999993151854986, iteration: 411032
loss: 1.039577841758728,grad_norm: 0.7355889100602863, iteration: 411033
loss: 0.9997954368591309,grad_norm: 0.8729343055693096, iteration: 411034
loss: 1.021852731704712,grad_norm: 0.7177566907410948, iteration: 411035
loss: 1.114227056503296,grad_norm: 0.999999490765495, iteration: 411036
loss: 1.1080764532089233,grad_norm: 0.9999999281693753, iteration: 411037
loss: 0.983528196811676,grad_norm: 0.9999997066995177, iteration: 411038
loss: 1.1952972412109375,grad_norm: 0.999999741532944, iteration: 411039
loss: 1.1833685636520386,grad_norm: 0.9999997214728624, iteration: 411040
loss: 1.1498324871063232,grad_norm: 0.99999921024837, iteration: 411041
loss: 1.0870693922042847,grad_norm: 0.9999995747481655, iteration: 411042
loss: 0.9870687127113342,grad_norm: 0.7984381449313939, iteration: 411043
loss: 0.9902483820915222,grad_norm: 0.7946751456351024, iteration: 411044
loss: 1.1393139362335205,grad_norm: 0.9999995944474216, iteration: 411045
loss: 0.998393177986145,grad_norm: 0.9766758754898531, iteration: 411046
loss: 1.015869379043579,grad_norm: 0.9999993155590321, iteration: 411047
loss: 1.00705885887146,grad_norm: 0.9301343420440413, iteration: 411048
loss: 1.4590044021606445,grad_norm: 0.9999998598599678, iteration: 411049
loss: 0.9508231282234192,grad_norm: 0.709938424017616, iteration: 411050
loss: 0.9428883790969849,grad_norm: 0.8686526964125248, iteration: 411051
loss: 1.07869553565979,grad_norm: 0.9999999118659072, iteration: 411052
loss: 1.0179381370544434,grad_norm: 0.7793686979257548, iteration: 411053
loss: 1.1582030057907104,grad_norm: 0.9999997318745334, iteration: 411054
loss: 1.127015471458435,grad_norm: 0.9999990416597071, iteration: 411055
loss: 1.147348403930664,grad_norm: 0.9999999173524058, iteration: 411056
loss: 1.2184256315231323,grad_norm: 0.999999561746373, iteration: 411057
loss: 1.1004040241241455,grad_norm: 0.9999997644079518, iteration: 411058
loss: 1.0437308549880981,grad_norm: 0.9999994878873099, iteration: 411059
loss: 1.207500696182251,grad_norm: 0.9999996155980131, iteration: 411060
loss: 1.2427681684494019,grad_norm: 0.9999996262131119, iteration: 411061
loss: 1.0602911710739136,grad_norm: 0.9999994468962492, iteration: 411062
loss: 1.3413101434707642,grad_norm: 1.0000000817343107, iteration: 411063
loss: 1.3540313243865967,grad_norm: 0.9999999234331644, iteration: 411064
loss: 1.0542891025543213,grad_norm: 0.9999991677835901, iteration: 411065
loss: 1.0740975141525269,grad_norm: 0.8429368980504883, iteration: 411066
loss: 1.03218412399292,grad_norm: 0.999999166868972, iteration: 411067
loss: 1.3394707441329956,grad_norm: 0.999999793628684, iteration: 411068
loss: 1.160528540611267,grad_norm: 0.9999995630478558, iteration: 411069
loss: 1.021881341934204,grad_norm: 0.9999991445067677, iteration: 411070
loss: 1.3868705034255981,grad_norm: 0.9999991489571612, iteration: 411071
loss: 1.2586075067520142,grad_norm: 0.999999302310565, iteration: 411072
loss: 1.2106211185455322,grad_norm: 0.9999994019630052, iteration: 411073
loss: 1.110954999923706,grad_norm: 0.9999990777139439, iteration: 411074
loss: 1.0203914642333984,grad_norm: 0.9999997155405523, iteration: 411075
loss: 1.3352134227752686,grad_norm: 0.9999997306865203, iteration: 411076
loss: 1.219673752784729,grad_norm: 0.999999939031509, iteration: 411077
loss: 1.3787033557891846,grad_norm: 0.9999997842669314, iteration: 411078
loss: 1.0052082538604736,grad_norm: 0.6341121917964356, iteration: 411079
loss: 1.1373647451400757,grad_norm: 0.9999998366667346, iteration: 411080
loss: 1.0789932012557983,grad_norm: 0.9999993519757248, iteration: 411081
loss: 1.2237207889556885,grad_norm: 0.9999997976703832, iteration: 411082
loss: 1.058542013168335,grad_norm: 0.999999349042108, iteration: 411083
loss: 0.9842537045478821,grad_norm: 0.6125394651512404, iteration: 411084
loss: 1.1591064929962158,grad_norm: 0.9999995839021898, iteration: 411085
loss: 1.2263085842132568,grad_norm: 1.0000000556213449, iteration: 411086
loss: 1.0616310834884644,grad_norm: 0.9999998159674326, iteration: 411087
loss: 1.030957818031311,grad_norm: 1.000000011840874, iteration: 411088
loss: 1.096442699432373,grad_norm: 0.9999994435681523, iteration: 411089
loss: 1.0514358282089233,grad_norm: 0.9999990900368493, iteration: 411090
loss: 1.2886677980422974,grad_norm: 0.9999997265808933, iteration: 411091
loss: 1.0533099174499512,grad_norm: 0.7027212408397714, iteration: 411092
loss: 1.0629117488861084,grad_norm: 0.8843600214094628, iteration: 411093
loss: 1.0297777652740479,grad_norm: 0.9999991426923771, iteration: 411094
loss: 1.3227695226669312,grad_norm: 0.9999994607230964, iteration: 411095
loss: 0.9933730363845825,grad_norm: 0.8304967360547547, iteration: 411096
loss: 1.1732498407363892,grad_norm: 0.9999999531640603, iteration: 411097
loss: 1.083105206489563,grad_norm: 0.9999991750036612, iteration: 411098
loss: 1.2507004737854004,grad_norm: 0.9999997424050947, iteration: 411099
loss: 1.0469363927841187,grad_norm: 0.9999996120975775, iteration: 411100
loss: 1.1329272985458374,grad_norm: 0.9999996396630464, iteration: 411101
loss: 1.0460623502731323,grad_norm: 0.9999998838541295, iteration: 411102
loss: 1.0984517335891724,grad_norm: 0.9999993530793748, iteration: 411103
loss: 1.096853256225586,grad_norm: 0.9999998957399188, iteration: 411104
loss: 1.1347873210906982,grad_norm: 0.9999994611953329, iteration: 411105
loss: 1.1575812101364136,grad_norm: 0.9999998434650875, iteration: 411106
loss: 1.2441236972808838,grad_norm: 0.9999999362980244, iteration: 411107
loss: 1.0427258014678955,grad_norm: 0.9640106459860841, iteration: 411108
loss: 1.0074166059494019,grad_norm: 0.786031394444822, iteration: 411109
loss: 1.1068717241287231,grad_norm: 0.7965143439410743, iteration: 411110
loss: 1.0560616254806519,grad_norm: 0.9999998635711859, iteration: 411111
loss: 1.2036164999008179,grad_norm: 0.9999997345647426, iteration: 411112
loss: 1.0991266965866089,grad_norm: 0.999999327137775, iteration: 411113
loss: 1.083483338356018,grad_norm: 0.9999994713629079, iteration: 411114
loss: 1.0865490436553955,grad_norm: 0.9999999755781779, iteration: 411115
loss: 1.1360900402069092,grad_norm: 0.999999234518048, iteration: 411116
loss: 1.0750422477722168,grad_norm: 0.8768144515356263, iteration: 411117
loss: 1.058942437171936,grad_norm: 0.8180781857595679, iteration: 411118
loss: 1.0748056173324585,grad_norm: 1.000000008020453, iteration: 411119
loss: 1.0163480043411255,grad_norm: 0.8140724465732114, iteration: 411120
loss: 1.1190191507339478,grad_norm: 0.8364638059911067, iteration: 411121
loss: 1.095445990562439,grad_norm: 0.8155836967663751, iteration: 411122
loss: 1.0800542831420898,grad_norm: 0.9999992947815164, iteration: 411123
loss: 1.003240704536438,grad_norm: 0.999999030865503, iteration: 411124
loss: 1.088855266571045,grad_norm: 0.9999997818764969, iteration: 411125
loss: 1.0041240453720093,grad_norm: 0.9192069971380639, iteration: 411126
loss: 1.173904299736023,grad_norm: 0.9999991013236983, iteration: 411127
loss: 1.0248507261276245,grad_norm: 0.9999994392279682, iteration: 411128
loss: 1.0349935293197632,grad_norm: 0.9999993509927823, iteration: 411129
loss: 0.9952224493026733,grad_norm: 0.9999996026948221, iteration: 411130
loss: 1.0045331716537476,grad_norm: 0.9999990962652666, iteration: 411131
loss: 1.098908543586731,grad_norm: 0.9999996306689113, iteration: 411132
loss: 1.103515625,grad_norm: 0.9999995390632037, iteration: 411133
loss: 1.0526618957519531,grad_norm: 0.9670019893986745, iteration: 411134
loss: 0.9727593064308167,grad_norm: 0.7820720962031726, iteration: 411135
loss: 1.0766427516937256,grad_norm: 0.8829052628127638, iteration: 411136
loss: 1.0522609949111938,grad_norm: 0.9999999908990056, iteration: 411137
loss: 1.0706102848052979,grad_norm: 0.9864891631155983, iteration: 411138
loss: 1.1728209257125854,grad_norm: 0.9999990964281872, iteration: 411139
loss: 1.0442326068878174,grad_norm: 0.9999995412882067, iteration: 411140
loss: 1.2514431476593018,grad_norm: 0.9999997023201126, iteration: 411141
loss: 1.0374294519424438,grad_norm: 0.999999778296358, iteration: 411142
loss: 1.2211410999298096,grad_norm: 0.9999998759805989, iteration: 411143
loss: 1.113465428352356,grad_norm: 0.9999993493288897, iteration: 411144
loss: 0.9977085590362549,grad_norm: 0.8128613872393458, iteration: 411145
loss: 0.9741717576980591,grad_norm: 0.9452209870915125, iteration: 411146
loss: 1.029473066329956,grad_norm: 0.8944447236873293, iteration: 411147
loss: 1.019659399986267,grad_norm: 0.9999995738353947, iteration: 411148
loss: 1.0080033540725708,grad_norm: 0.9999997824599234, iteration: 411149
loss: 1.0138041973114014,grad_norm: 0.9999995697828018, iteration: 411150
loss: 1.073939323425293,grad_norm: 1.0000000122115378, iteration: 411151
loss: 1.1069413423538208,grad_norm: 0.9999992047447217, iteration: 411152
loss: 1.044967532157898,grad_norm: 0.9999991728286983, iteration: 411153
loss: 1.0509955883026123,grad_norm: 0.9999993955028411, iteration: 411154
loss: 1.0432507991790771,grad_norm: 0.9712431426971634, iteration: 411155
loss: 1.036226749420166,grad_norm: 1.0000000120554517, iteration: 411156
loss: 1.0253973007202148,grad_norm: 0.861856095561781, iteration: 411157
loss: 0.9810227751731873,grad_norm: 0.6505304472894464, iteration: 411158
loss: 1.1225638389587402,grad_norm: 0.9999992177928233, iteration: 411159
loss: 1.1646584272384644,grad_norm: 0.9999992582462164, iteration: 411160
loss: 1.0507822036743164,grad_norm: 0.9052955923274864, iteration: 411161
loss: 0.9990782737731934,grad_norm: 0.9999999015754111, iteration: 411162
loss: 1.1500307321548462,grad_norm: 0.9999998523368419, iteration: 411163
loss: 1.0365463495254517,grad_norm: 0.7730015814883495, iteration: 411164
loss: 1.0818265676498413,grad_norm: 1.0000000581230228, iteration: 411165
loss: 1.1081418991088867,grad_norm: 0.9999993864788321, iteration: 411166
loss: 1.2256656885147095,grad_norm: 0.9999998642320737, iteration: 411167
loss: 1.0446138381958008,grad_norm: 0.9999997731108439, iteration: 411168
loss: 1.0398648977279663,grad_norm: 0.949125971693003, iteration: 411169
loss: 1.1333670616149902,grad_norm: 0.9999998959175911, iteration: 411170
loss: 1.10374915599823,grad_norm: 0.9999991949561455, iteration: 411171
loss: 1.0657442808151245,grad_norm: 0.9974487114557692, iteration: 411172
loss: 1.0349187850952148,grad_norm: 0.9618852736538656, iteration: 411173
loss: 1.0116673707962036,grad_norm: 0.9999992634959298, iteration: 411174
loss: 1.2528127431869507,grad_norm: 0.999999320007423, iteration: 411175
loss: 1.114425778388977,grad_norm: 0.9999996073573035, iteration: 411176
loss: 1.1601330041885376,grad_norm: 0.9999996211902447, iteration: 411177
loss: 1.1127172708511353,grad_norm: 0.9999991746007636, iteration: 411178
loss: 1.0596657991409302,grad_norm: 0.9999992397837838, iteration: 411179
loss: 1.0834380388259888,grad_norm: 0.9999998756799623, iteration: 411180
loss: 1.0372650623321533,grad_norm: 0.7801539962633575, iteration: 411181
loss: 1.006547451019287,grad_norm: 0.9999990906632413, iteration: 411182
loss: 0.9672569036483765,grad_norm: 0.6915453341154588, iteration: 411183
loss: 1.0181591510772705,grad_norm: 0.8086299591974696, iteration: 411184
loss: 1.0371241569519043,grad_norm: 0.9999999846176458, iteration: 411185
loss: 1.0052603483200073,grad_norm: 0.8566245647698086, iteration: 411186
loss: 0.9926262497901917,grad_norm: 0.930894586856068, iteration: 411187
loss: 1.0028793811798096,grad_norm: 0.8635847457219975, iteration: 411188
loss: 0.9926609396934509,grad_norm: 0.6515539873936822, iteration: 411189
loss: 1.0468478202819824,grad_norm: 0.9999990950377697, iteration: 411190
loss: 0.9862582087516785,grad_norm: 0.910674262047649, iteration: 411191
loss: 1.0140730142593384,grad_norm: 0.67431307538244, iteration: 411192
loss: 1.0503491163253784,grad_norm: 0.9999997739108334, iteration: 411193
loss: 1.0670892000198364,grad_norm: 0.9999995061845667, iteration: 411194
loss: 1.0268834829330444,grad_norm: 0.9999995536023862, iteration: 411195
loss: 1.0066224336624146,grad_norm: 0.8770823114632356, iteration: 411196
loss: 1.013344407081604,grad_norm: 0.8051237772588654, iteration: 411197
loss: 0.9908357262611389,grad_norm: 0.7330466010693354, iteration: 411198
loss: 0.9983623623847961,grad_norm: 0.8392478194086934, iteration: 411199
loss: 0.9671034216880798,grad_norm: 0.8449738700144981, iteration: 411200
loss: 0.9921219944953918,grad_norm: 0.7142421658568981, iteration: 411201
loss: 1.0823718309402466,grad_norm: 0.9999991401255653, iteration: 411202
loss: 1.0083398818969727,grad_norm: 0.6819612005792334, iteration: 411203
loss: 0.9996693134307861,grad_norm: 0.9038806792219477, iteration: 411204
loss: 1.0322879552841187,grad_norm: 0.9999990640788516, iteration: 411205
loss: 1.0376384258270264,grad_norm: 0.9999992514293254, iteration: 411206
loss: 1.0648891925811768,grad_norm: 0.9316278985867281, iteration: 411207
loss: 1.0382636785507202,grad_norm: 0.9999991898050986, iteration: 411208
loss: 1.0896403789520264,grad_norm: 0.9999999784396568, iteration: 411209
loss: 1.083619236946106,grad_norm: 0.9469076515084115, iteration: 411210
loss: 1.07673180103302,grad_norm: 0.9999992733193284, iteration: 411211
loss: 1.0517154932022095,grad_norm: 0.9405840473870927, iteration: 411212
loss: 1.0263301134109497,grad_norm: 0.9999993058997985, iteration: 411213
loss: 0.997058629989624,grad_norm: 0.9999995868010471, iteration: 411214
loss: 1.0559899806976318,grad_norm: 0.9999992039843167, iteration: 411215
loss: 1.025093674659729,grad_norm: 0.8115517392536985, iteration: 411216
loss: 1.0013221502304077,grad_norm: 0.7354486420148336, iteration: 411217
loss: 0.9875309467315674,grad_norm: 0.9999990124989339, iteration: 411218
loss: 1.0309703350067139,grad_norm: 0.8526240448225404, iteration: 411219
loss: 1.0385758876800537,grad_norm: 0.9999997294566174, iteration: 411220
loss: 1.0606211423873901,grad_norm: 0.9598725553864454, iteration: 411221
loss: 1.0344064235687256,grad_norm: 0.9370846426742531, iteration: 411222
loss: 1.193337082862854,grad_norm: 0.9999993670218097, iteration: 411223
loss: 1.1294275522232056,grad_norm: 0.9999999548129845, iteration: 411224
loss: 1.022724986076355,grad_norm: 0.7751598578139218, iteration: 411225
loss: 1.0228607654571533,grad_norm: 0.9999991926951407, iteration: 411226
loss: 1.1254470348358154,grad_norm: 0.9999996645701161, iteration: 411227
loss: 1.0057320594787598,grad_norm: 0.9999991996678435, iteration: 411228
loss: 1.0641365051269531,grad_norm: 0.9999992751464725, iteration: 411229
loss: 0.9813074469566345,grad_norm: 0.7581681515961233, iteration: 411230
loss: 1.0300370454788208,grad_norm: 0.999999351133233, iteration: 411231
loss: 1.1331877708435059,grad_norm: 0.9616185075918101, iteration: 411232
loss: 1.092972993850708,grad_norm: 0.9999993304803497, iteration: 411233
loss: 1.0232340097427368,grad_norm: 0.6877626881989067, iteration: 411234
loss: 1.0033830404281616,grad_norm: 0.9999996387008654, iteration: 411235
loss: 1.0085835456848145,grad_norm: 0.8943331167132333, iteration: 411236
loss: 1.0397918224334717,grad_norm: 0.9999992758710546, iteration: 411237
loss: 1.0045979022979736,grad_norm: 0.9999990997040095, iteration: 411238
loss: 1.0740503072738647,grad_norm: 0.8839997164991245, iteration: 411239
loss: 1.0325101613998413,grad_norm: 0.6951087045990384, iteration: 411240
loss: 1.0721837282180786,grad_norm: 0.9296264302827034, iteration: 411241
loss: 1.0491780042648315,grad_norm: 0.9999992310760375, iteration: 411242
loss: 1.0159398317337036,grad_norm: 0.7712372478096442, iteration: 411243
loss: 0.9931212067604065,grad_norm: 0.8215847722006049, iteration: 411244
loss: 1.0460011959075928,grad_norm: 0.8779930834268292, iteration: 411245
loss: 1.0702342987060547,grad_norm: 0.9999994645441672, iteration: 411246
loss: 0.9927282333374023,grad_norm: 0.9999999266349153, iteration: 411247
loss: 1.0460221767425537,grad_norm: 0.999999866611355, iteration: 411248
loss: 1.0347590446472168,grad_norm: 0.9999996077089444, iteration: 411249
loss: 1.1339566707611084,grad_norm: 0.9999995365192857, iteration: 411250
loss: 1.0415805578231812,grad_norm: 0.8251131438394457, iteration: 411251
loss: 0.9910517930984497,grad_norm: 0.9999999438593962, iteration: 411252
loss: 0.9895763397216797,grad_norm: 0.7356218452569161, iteration: 411253
loss: 1.0072413682937622,grad_norm: 0.759542368669572, iteration: 411254
loss: 1.007117748260498,grad_norm: 0.999999263868438, iteration: 411255
loss: 1.007712721824646,grad_norm: 0.8245750830931823, iteration: 411256
loss: 0.9968048334121704,grad_norm: 0.6851908876003029, iteration: 411257
loss: 0.9986486434936523,grad_norm: 0.795895423645799, iteration: 411258
loss: 0.9957708716392517,grad_norm: 0.9999998503269097, iteration: 411259
loss: 0.9812592267990112,grad_norm: 0.70635264428645, iteration: 411260
loss: 1.129185438156128,grad_norm: 0.9999996844476453, iteration: 411261
loss: 1.0051335096359253,grad_norm: 0.999999084566648, iteration: 411262
loss: 1.0396125316619873,grad_norm: 0.8798825835657986, iteration: 411263
loss: 1.0446335077285767,grad_norm: 0.8566798986428148, iteration: 411264
loss: 0.9854286909103394,grad_norm: 0.805838430306734, iteration: 411265
loss: 1.0120395421981812,grad_norm: 0.8425009208992993, iteration: 411266
loss: 0.9856810569763184,grad_norm: 0.9537155495314069, iteration: 411267
loss: 0.9679768085479736,grad_norm: 0.7939734700052546, iteration: 411268
loss: 1.0477426052093506,grad_norm: 0.999999814147993, iteration: 411269
loss: 0.9895085096359253,grad_norm: 0.910930853736775, iteration: 411270
loss: 0.9632390141487122,grad_norm: 0.7664918587641218, iteration: 411271
loss: 0.9953436255455017,grad_norm: 0.9999992125845639, iteration: 411272
loss: 0.967307448387146,grad_norm: 0.7807170898291904, iteration: 411273
loss: 0.9945379495620728,grad_norm: 0.685104545923967, iteration: 411274
loss: 1.0143530368804932,grad_norm: 0.7508950854145438, iteration: 411275
loss: 0.9974736571311951,grad_norm: 0.8272752191690408, iteration: 411276
loss: 1.0119469165802002,grad_norm: 0.7004400266593382, iteration: 411277
loss: 0.9979749917984009,grad_norm: 0.9631562601411718, iteration: 411278
loss: 1.0544400215148926,grad_norm: 0.9986258002165062, iteration: 411279
loss: 1.0229580402374268,grad_norm: 0.7689919422415078, iteration: 411280
loss: 1.0495754480361938,grad_norm: 0.7778632331823915, iteration: 411281
loss: 1.0158560276031494,grad_norm: 0.6824749091939979, iteration: 411282
loss: 0.9514343738555908,grad_norm: 0.7186191446750827, iteration: 411283
loss: 0.9892371296882629,grad_norm: 0.8097643699413397, iteration: 411284
loss: 1.0871683359146118,grad_norm: 0.9999990757984213, iteration: 411285
loss: 1.0202800035476685,grad_norm: 0.999999655471499, iteration: 411286
loss: 0.9979620575904846,grad_norm: 0.686622213256154, iteration: 411287
loss: 1.0131222009658813,grad_norm: 0.744320801843495, iteration: 411288
loss: 1.1344629526138306,grad_norm: 0.9999997784745319, iteration: 411289
loss: 1.0174282789230347,grad_norm: 0.7907601972266657, iteration: 411290
loss: 1.0935274362564087,grad_norm: 0.9776473932525929, iteration: 411291
loss: 0.9680500626564026,grad_norm: 0.784846318206925, iteration: 411292
loss: 0.9824613332748413,grad_norm: 0.7544628788344997, iteration: 411293
loss: 0.993453860282898,grad_norm: 0.7818022737726823, iteration: 411294
loss: 1.029991865158081,grad_norm: 0.8257592123833795, iteration: 411295
loss: 1.0010701417922974,grad_norm: 0.6469678667516926, iteration: 411296
loss: 1.0593417882919312,grad_norm: 0.873136756482351, iteration: 411297
loss: 1.2424836158752441,grad_norm: 0.9999998946582243, iteration: 411298
loss: 0.9673692584037781,grad_norm: 0.8332550774706741, iteration: 411299
loss: 0.9782778024673462,grad_norm: 0.7898606556396726, iteration: 411300
loss: 1.0106701850891113,grad_norm: 0.9999992887182905, iteration: 411301
loss: 1.325097918510437,grad_norm: 0.9999994582899094, iteration: 411302
loss: 1.2589294910430908,grad_norm: 0.999999439636108, iteration: 411303
loss: 1.0188604593276978,grad_norm: 0.8322759266614115, iteration: 411304
loss: 1.0098825693130493,grad_norm: 0.7437525475649839, iteration: 411305
loss: 0.99332594871521,grad_norm: 0.83929005815288, iteration: 411306
loss: 1.0293956995010376,grad_norm: 0.9999992273018959, iteration: 411307
loss: 1.0048047304153442,grad_norm: 0.9032765505698334, iteration: 411308
loss: 1.022633671760559,grad_norm: 0.9999992407203419, iteration: 411309
loss: 1.143326997756958,grad_norm: 0.9999995890154959, iteration: 411310
loss: 1.0016916990280151,grad_norm: 0.8399350003648517, iteration: 411311
loss: 1.0810328722000122,grad_norm: 0.9999998746214896, iteration: 411312
loss: 0.9845701456069946,grad_norm: 0.7578321852901775, iteration: 411313
loss: 1.0923818349838257,grad_norm: 0.9999990061981452, iteration: 411314
loss: 0.9968199729919434,grad_norm: 0.9999990695718277, iteration: 411315
loss: 1.0097301006317139,grad_norm: 0.7238378820478987, iteration: 411316
loss: 1.1345248222351074,grad_norm: 0.9277832795414058, iteration: 411317
loss: 1.013390302658081,grad_norm: 0.7548887469400204, iteration: 411318
loss: 0.9872629642486572,grad_norm: 0.6341367014146267, iteration: 411319
loss: 1.0004273653030396,grad_norm: 0.999999478215886, iteration: 411320
loss: 0.9837859272956848,grad_norm: 0.7767072544321751, iteration: 411321
loss: 1.0526504516601562,grad_norm: 0.9999997152788568, iteration: 411322
loss: 1.0902560949325562,grad_norm: 0.7843226508337615, iteration: 411323
loss: 1.080777883529663,grad_norm: 0.9999998353073694, iteration: 411324
loss: 0.9892762303352356,grad_norm: 0.7040668037889646, iteration: 411325
loss: 1.1014090776443481,grad_norm: 0.9999997873581952, iteration: 411326
loss: 1.0889499187469482,grad_norm: 0.9999992416577629, iteration: 411327
loss: 1.0091283321380615,grad_norm: 0.9999999812261693, iteration: 411328
loss: 1.0304388999938965,grad_norm: 0.8275838435134103, iteration: 411329
loss: 1.0449652671813965,grad_norm: 0.7946725733322472, iteration: 411330
loss: 1.0482748746871948,grad_norm: 0.9613319036801906, iteration: 411331
loss: 1.074598789215088,grad_norm: 0.9999995603824547, iteration: 411332
loss: 0.9840502142906189,grad_norm: 0.9999995445452458, iteration: 411333
loss: 1.0050740242004395,grad_norm: 0.8183499187484679, iteration: 411334
loss: 0.9847803711891174,grad_norm: 0.7098958337003136, iteration: 411335
loss: 0.9990699291229248,grad_norm: 0.999999726538563, iteration: 411336
loss: 1.0592496395111084,grad_norm: 0.9999992199238125, iteration: 411337
loss: 1.0642313957214355,grad_norm: 0.9999998117054757, iteration: 411338
loss: 1.0788331031799316,grad_norm: 0.9999992134726666, iteration: 411339
loss: 1.025524377822876,grad_norm: 0.7850808906705743, iteration: 411340
loss: 0.9642379879951477,grad_norm: 0.6903748418326715, iteration: 411341
loss: 0.9929335713386536,grad_norm: 0.7899939439361696, iteration: 411342
loss: 0.9937137365341187,grad_norm: 0.9860276687701696, iteration: 411343
loss: 1.0683064460754395,grad_norm: 0.9347349526704155, iteration: 411344
loss: 1.0060678720474243,grad_norm: 0.7830252235806532, iteration: 411345
loss: 0.9588412046432495,grad_norm: 0.6728158690426962, iteration: 411346
loss: 1.0333229303359985,grad_norm: 0.825575772239838, iteration: 411347
loss: 0.9969823956489563,grad_norm: 0.8063199027969975, iteration: 411348
loss: 1.0359526872634888,grad_norm: 0.8190494267059136, iteration: 411349
loss: 1.0520676374435425,grad_norm: 0.9999999825636554, iteration: 411350
loss: 1.0895256996154785,grad_norm: 0.99999993809129, iteration: 411351
loss: 0.9874476194381714,grad_norm: 0.7565628623550386, iteration: 411352
loss: 1.0244468450546265,grad_norm: 0.9999996498177945, iteration: 411353
loss: 1.0980579853057861,grad_norm: 0.9065736502885648, iteration: 411354
loss: 0.9895363450050354,grad_norm: 0.6235434222076773, iteration: 411355
loss: 0.9959210157394409,grad_norm: 0.7462455553909104, iteration: 411356
loss: 0.9973246455192566,grad_norm: 0.9320426719026381, iteration: 411357
loss: 0.9888181090354919,grad_norm: 0.9999999542027136, iteration: 411358
loss: 1.0324665307998657,grad_norm: 0.7497271538217947, iteration: 411359
loss: 1.029632806777954,grad_norm: 0.8970131722615452, iteration: 411360
loss: 1.0075856447219849,grad_norm: 0.7561835110831407, iteration: 411361
loss: 1.018040657043457,grad_norm: 0.8215995215580141, iteration: 411362
loss: 1.026922583580017,grad_norm: 0.9999990817322519, iteration: 411363
loss: 1.0506662130355835,grad_norm: 0.9999996711285434, iteration: 411364
loss: 0.958452582359314,grad_norm: 0.8677466749075765, iteration: 411365
loss: 1.0018733739852905,grad_norm: 0.7747385593125694, iteration: 411366
loss: 1.0000438690185547,grad_norm: 0.7776613944061089, iteration: 411367
loss: 0.9965118169784546,grad_norm: 0.8756747421240707, iteration: 411368
loss: 1.047339677810669,grad_norm: 0.9999991802754448, iteration: 411369
loss: 1.0509074926376343,grad_norm: 0.8836316855719124, iteration: 411370
loss: 1.026035189628601,grad_norm: 0.7088042482284921, iteration: 411371
loss: 1.0010432004928589,grad_norm: 0.6936351170171162, iteration: 411372
loss: 1.0563114881515503,grad_norm: 0.7760416442506004, iteration: 411373
loss: 0.9737972617149353,grad_norm: 0.7459948945509655, iteration: 411374
loss: 1.0118829011917114,grad_norm: 0.7586726362608953, iteration: 411375
loss: 1.032018780708313,grad_norm: 0.7845032475647904, iteration: 411376
loss: 0.9840115904808044,grad_norm: 0.8122133841736389, iteration: 411377
loss: 1.0935211181640625,grad_norm: 0.9999992260566929, iteration: 411378
loss: 0.985106348991394,grad_norm: 0.7931349334094381, iteration: 411379
loss: 0.9797424674034119,grad_norm: 0.8356082246886455, iteration: 411380
loss: 1.1629149913787842,grad_norm: 0.8760136259828585, iteration: 411381
loss: 1.002417802810669,grad_norm: 0.8097865178535634, iteration: 411382
loss: 0.9816184043884277,grad_norm: 0.8948075294008796, iteration: 411383
loss: 1.0206105709075928,grad_norm: 0.8409135108142167, iteration: 411384
loss: 1.0174732208251953,grad_norm: 0.7031703993978954, iteration: 411385
loss: 1.1846046447753906,grad_norm: 0.9661720886944928, iteration: 411386
loss: 1.022815227508545,grad_norm: 0.6448983005633468, iteration: 411387
loss: 1.0391747951507568,grad_norm: 0.8710442141944723, iteration: 411388
loss: 1.0119290351867676,grad_norm: 0.9999991754468353, iteration: 411389
loss: 0.9968074560165405,grad_norm: 0.8949830588021697, iteration: 411390
loss: 0.9949679374694824,grad_norm: 0.8037925259103796, iteration: 411391
loss: 1.0294103622436523,grad_norm: 0.7807382158159767, iteration: 411392
loss: 1.1045535802841187,grad_norm: 0.9999997058567764, iteration: 411393
loss: 1.069106101989746,grad_norm: 0.8389238670297333, iteration: 411394
loss: 1.007318139076233,grad_norm: 0.6994197111164883, iteration: 411395
loss: 1.0833860635757446,grad_norm: 0.9999991674578348, iteration: 411396
loss: 0.993904709815979,grad_norm: 0.7284679071366632, iteration: 411397
loss: 1.026917815208435,grad_norm: 0.7295122437048605, iteration: 411398
loss: 1.0044230222702026,grad_norm: 0.673871165976246, iteration: 411399
loss: 1.0321402549743652,grad_norm: 0.9999990560054776, iteration: 411400
loss: 1.0086396932601929,grad_norm: 0.725805147578613, iteration: 411401
loss: 0.9858137965202332,grad_norm: 0.9715769569884682, iteration: 411402
loss: 1.0326765775680542,grad_norm: 0.8147688358583428, iteration: 411403
loss: 0.9866068363189697,grad_norm: 0.8829638622361753, iteration: 411404
loss: 1.0312782526016235,grad_norm: 0.9999991185291885, iteration: 411405
loss: 0.9973009824752808,grad_norm: 1.000000023643858, iteration: 411406
loss: 0.9992806911468506,grad_norm: 0.7284659563276457, iteration: 411407
loss: 1.0136525630950928,grad_norm: 0.6508773971344396, iteration: 411408
loss: 1.0854254961013794,grad_norm: 0.9999991836221368, iteration: 411409
loss: 1.0226726531982422,grad_norm: 0.9999997530080793, iteration: 411410
loss: 0.9902659058570862,grad_norm: 0.6744319224150647, iteration: 411411
loss: 1.0197023153305054,grad_norm: 0.8143028984968796, iteration: 411412
loss: 0.9833724498748779,grad_norm: 0.9301158008414646, iteration: 411413
loss: 0.9796347618103027,grad_norm: 0.9999994871657466, iteration: 411414
loss: 0.9799943566322327,grad_norm: 0.8268100929966722, iteration: 411415
loss: 0.9747849702835083,grad_norm: 0.868736442956879, iteration: 411416
loss: 0.9497185349464417,grad_norm: 0.7580654733899442, iteration: 411417
loss: 0.9655125737190247,grad_norm: 0.7584976963649039, iteration: 411418
loss: 0.989563524723053,grad_norm: 0.8943702339466573, iteration: 411419
loss: 1.1088852882385254,grad_norm: 0.9999994763927371, iteration: 411420
loss: 1.0158816576004028,grad_norm: 0.8101819541378549, iteration: 411421
loss: 1.0384386777877808,grad_norm: 0.999999923648566, iteration: 411422
loss: 1.0168485641479492,grad_norm: 0.9999995133328945, iteration: 411423
loss: 1.0132452249526978,grad_norm: 0.7088102581958893, iteration: 411424
loss: 1.0249054431915283,grad_norm: 0.7849189492052816, iteration: 411425
loss: 0.9678879380226135,grad_norm: 0.7323868179635344, iteration: 411426
loss: 0.997586190700531,grad_norm: 0.9999996374819281, iteration: 411427
loss: 1.0273281335830688,grad_norm: 0.9146977959108209, iteration: 411428
loss: 1.0110775232315063,grad_norm: 0.7618345184854364, iteration: 411429
loss: 0.9981090426445007,grad_norm: 0.9718414245364955, iteration: 411430
loss: 1.2912421226501465,grad_norm: 0.8656840017846046, iteration: 411431
loss: 0.9785045981407166,grad_norm: 0.8532384708878045, iteration: 411432
loss: 1.1107096672058105,grad_norm: 0.9999995991193623, iteration: 411433
loss: 1.0055937767028809,grad_norm: 0.9609780675126803, iteration: 411434
loss: 1.0176345109939575,grad_norm: 0.8400847877755854, iteration: 411435
loss: 1.0062090158462524,grad_norm: 0.8146120894212795, iteration: 411436
loss: 0.998697817325592,grad_norm: 0.7064217164477323, iteration: 411437
loss: 1.019095540046692,grad_norm: 0.7742857049829691, iteration: 411438
loss: 1.0982975959777832,grad_norm: 0.9999995212671869, iteration: 411439
loss: 1.0184818506240845,grad_norm: 0.7292292908249264, iteration: 411440
loss: 0.9974461793899536,grad_norm: 0.8358943840907583, iteration: 411441
loss: 1.0110619068145752,grad_norm: 0.8067324901756836, iteration: 411442
loss: 1.0093737840652466,grad_norm: 0.8381228398458055, iteration: 411443
loss: 1.063370704650879,grad_norm: 0.9149671506892255, iteration: 411444
loss: 1.0199413299560547,grad_norm: 0.9428183853342897, iteration: 411445
loss: 0.992184042930603,grad_norm: 0.7235177997558182, iteration: 411446
loss: 1.004797101020813,grad_norm: 0.9504031620086936, iteration: 411447
loss: 1.0347415208816528,grad_norm: 0.9999995436628019, iteration: 411448
loss: 0.966831386089325,grad_norm: 0.8464003645884445, iteration: 411449
loss: 1.1343873739242554,grad_norm: 0.9999991353424765, iteration: 411450
loss: 1.011601209640503,grad_norm: 0.6640978330784435, iteration: 411451
loss: 1.0185227394104004,grad_norm: 0.8966812762241426, iteration: 411452
loss: 1.1412241458892822,grad_norm: 0.9366838118399208, iteration: 411453
loss: 1.0013712644577026,grad_norm: 0.7270182594790461, iteration: 411454
loss: 1.0215336084365845,grad_norm: 0.7906287150582447, iteration: 411455
loss: 0.9879677891731262,grad_norm: 0.8407393338793427, iteration: 411456
loss: 0.960049569606781,grad_norm: 0.8491274870815404, iteration: 411457
loss: 1.0169641971588135,grad_norm: 0.7750324706700706, iteration: 411458
loss: 0.9861114025115967,grad_norm: 0.6819502975088517, iteration: 411459
loss: 1.0222951173782349,grad_norm: 0.9999991001022271, iteration: 411460
loss: 1.1432167291641235,grad_norm: 0.9668911226600476, iteration: 411461
loss: 0.9979847073554993,grad_norm: 0.8528139667140721, iteration: 411462
loss: 0.9893435835838318,grad_norm: 0.9030792845393976, iteration: 411463
loss: 1.0061453580856323,grad_norm: 0.7198824242775916, iteration: 411464
loss: 1.1826634407043457,grad_norm: 0.9999998812044989, iteration: 411465
loss: 0.9727581739425659,grad_norm: 0.7187498057185876, iteration: 411466
loss: 0.9893695712089539,grad_norm: 0.9999990896002731, iteration: 411467
loss: 0.9988222122192383,grad_norm: 0.735946607865235, iteration: 411468
loss: 0.9938310980796814,grad_norm: 0.8476030800648532, iteration: 411469
loss: 1.0320944786071777,grad_norm: 0.7391720663083778, iteration: 411470
loss: 1.0041848421096802,grad_norm: 0.9999993598588807, iteration: 411471
loss: 1.019322156906128,grad_norm: 0.6841567560555515, iteration: 411472
loss: 1.0169070959091187,grad_norm: 0.6905083246579948, iteration: 411473
loss: 1.0276310443878174,grad_norm: 0.8439170681166958, iteration: 411474
loss: 0.9887692928314209,grad_norm: 0.715910655345287, iteration: 411475
loss: 0.9947855472564697,grad_norm: 0.9999997277519846, iteration: 411476
loss: 0.9488963484764099,grad_norm: 0.9800033337335302, iteration: 411477
loss: 1.01702082157135,grad_norm: 0.9999992995592244, iteration: 411478
loss: 1.0269396305084229,grad_norm: 0.9999993647953972, iteration: 411479
loss: 1.035888433456421,grad_norm: 0.7469022958381186, iteration: 411480
loss: 1.0384230613708496,grad_norm: 0.9999996407681041, iteration: 411481
loss: 1.0626834630966187,grad_norm: 0.999999134852772, iteration: 411482
loss: 0.9718414545059204,grad_norm: 0.7985656923868467, iteration: 411483
loss: 1.0059363842010498,grad_norm: 0.9999997062559562, iteration: 411484
loss: 0.9901940822601318,grad_norm: 0.7880902986595448, iteration: 411485
loss: 1.0050150156021118,grad_norm: 0.820178859564559, iteration: 411486
loss: 1.0078725814819336,grad_norm: 0.8012188066950692, iteration: 411487
loss: 1.0153042078018188,grad_norm: 0.7366107694465269, iteration: 411488
loss: 1.011670470237732,grad_norm: 0.7159597601030678, iteration: 411489
loss: 0.9841511249542236,grad_norm: 0.9999991479435769, iteration: 411490
loss: 1.0168198347091675,grad_norm: 0.7550515003598991, iteration: 411491
loss: 1.0124690532684326,grad_norm: 0.7784157693113719, iteration: 411492
loss: 1.020721673965454,grad_norm: 0.9090593324143407, iteration: 411493
loss: 1.1055792570114136,grad_norm: 0.9999993499961364, iteration: 411494
loss: 1.0230945348739624,grad_norm: 0.8969490702225484, iteration: 411495
loss: 0.9685202836990356,grad_norm: 0.7842351679354598, iteration: 411496
loss: 0.9765298962593079,grad_norm: 0.7316360489461143, iteration: 411497
loss: 0.9716872572898865,grad_norm: 0.7122621340136537, iteration: 411498
loss: 1.0243228673934937,grad_norm: 0.8756530622520545, iteration: 411499
loss: 1.014612078666687,grad_norm: 0.8763729223545197, iteration: 411500
loss: 0.980769157409668,grad_norm: 0.8717145879631549, iteration: 411501
loss: 0.9863056540489197,grad_norm: 0.8346334760676192, iteration: 411502
loss: 0.9907649755477905,grad_norm: 0.8111944787673204, iteration: 411503
loss: 0.9835193157196045,grad_norm: 0.7859673832237841, iteration: 411504
loss: 1.0302071571350098,grad_norm: 0.928618988435427, iteration: 411505
loss: 1.0484702587127686,grad_norm: 0.999999882965822, iteration: 411506
loss: 0.9446811079978943,grad_norm: 0.8153716373095407, iteration: 411507
loss: 1.0300427675247192,grad_norm: 0.8065801047568992, iteration: 411508
loss: 0.9673100113868713,grad_norm: 0.9355060399800367, iteration: 411509
loss: 1.044640302658081,grad_norm: 0.7314944721723211, iteration: 411510
loss: 0.9646179676055908,grad_norm: 0.8585613765676713, iteration: 411511
loss: 1.1129469871520996,grad_norm: 0.9999994531164021, iteration: 411512
loss: 1.010350227355957,grad_norm: 0.8290823683512377, iteration: 411513
loss: 0.9541186690330505,grad_norm: 0.6957542788450284, iteration: 411514
loss: 1.0569790601730347,grad_norm: 0.9535673928192638, iteration: 411515
loss: 0.9888198971748352,grad_norm: 0.9215046599965016, iteration: 411516
loss: 0.9642289876937866,grad_norm: 0.803236114782729, iteration: 411517
loss: 0.9749827980995178,grad_norm: 0.9999991537280348, iteration: 411518
loss: 1.0004611015319824,grad_norm: 0.7097867731991185, iteration: 411519
loss: 1.0001064538955688,grad_norm: 0.8593687731659195, iteration: 411520
loss: 1.0323801040649414,grad_norm: 0.7113837802353437, iteration: 411521
loss: 1.000320315361023,grad_norm: 0.9463343840915787, iteration: 411522
loss: 1.0713176727294922,grad_norm: 0.9999996317510639, iteration: 411523
loss: 1.0323054790496826,grad_norm: 0.9999991683407812, iteration: 411524
loss: 1.00532865524292,grad_norm: 0.8558685933236081, iteration: 411525
loss: 1.0131481885910034,grad_norm: 0.9999992120952622, iteration: 411526
loss: 1.0706982612609863,grad_norm: 0.9999992433581654, iteration: 411527
loss: 1.0319722890853882,grad_norm: 0.6623436077805194, iteration: 411528
loss: 0.9653490781784058,grad_norm: 0.999999097602768, iteration: 411529
loss: 0.9788739085197449,grad_norm: 0.8415399306475383, iteration: 411530
loss: 0.9877647161483765,grad_norm: 0.9375997231557957, iteration: 411531
loss: 1.0107051134109497,grad_norm: 0.7494764377948183, iteration: 411532
loss: 1.0047332048416138,grad_norm: 0.7271450075884553, iteration: 411533
loss: 0.974692165851593,grad_norm: 0.756171957392513, iteration: 411534
loss: 0.9917092323303223,grad_norm: 0.8455947144047197, iteration: 411535
loss: 1.016050100326538,grad_norm: 0.7680378053055583, iteration: 411536
loss: 1.1137558221817017,grad_norm: 0.9999996537381531, iteration: 411537
loss: 1.0543409585952759,grad_norm: 0.9689273744009059, iteration: 411538
loss: 1.0153472423553467,grad_norm: 0.8257294597439739, iteration: 411539
loss: 1.0176948308944702,grad_norm: 0.760335061865258, iteration: 411540
loss: 0.9903473854064941,grad_norm: 0.7640219550670871, iteration: 411541
loss: 0.9743850827217102,grad_norm: 0.7322408187447972, iteration: 411542
loss: 1.018311858177185,grad_norm: 0.8476149419594692, iteration: 411543
loss: 1.06548273563385,grad_norm: 0.9999999339572158, iteration: 411544
loss: 1.0134693384170532,grad_norm: 0.9999998735319917, iteration: 411545
loss: 1.0066629648208618,grad_norm: 0.7898638525188078, iteration: 411546
loss: 1.0117000341415405,grad_norm: 0.9999996386072166, iteration: 411547
loss: 1.0957818031311035,grad_norm: 0.9999991721034768, iteration: 411548
loss: 0.9875138998031616,grad_norm: 0.6779290825916962, iteration: 411549
loss: 1.0534210205078125,grad_norm: 0.9999992332745242, iteration: 411550
loss: 0.9897040128707886,grad_norm: 0.999999158097012, iteration: 411551
loss: 0.9958347082138062,grad_norm: 0.8585643153672345, iteration: 411552
loss: 1.009275197982788,grad_norm: 0.6945289164913233, iteration: 411553
loss: 0.9747274518013,grad_norm: 0.891502026581239, iteration: 411554
loss: 1.1883955001831055,grad_norm: 0.9999997621338633, iteration: 411555
loss: 0.9858624339103699,grad_norm: 0.9999991720339291, iteration: 411556
loss: 0.99623703956604,grad_norm: 0.7457463738552587, iteration: 411557
loss: 1.029909610748291,grad_norm: 0.6749305982836847, iteration: 411558
loss: 1.0852382183074951,grad_norm: 0.9999995030349573, iteration: 411559
loss: 1.042427659034729,grad_norm: 0.9486121192193204, iteration: 411560
loss: 1.0216193199157715,grad_norm: 0.9999993563240744, iteration: 411561
loss: 1.0644588470458984,grad_norm: 0.999999255124222, iteration: 411562
loss: 1.0406264066696167,grad_norm: 0.7895397456388885, iteration: 411563
loss: 0.9989100694656372,grad_norm: 0.694051813629978, iteration: 411564
loss: 1.0413553714752197,grad_norm: 0.7301522367040095, iteration: 411565
loss: 1.0042937994003296,grad_norm: 0.8184766955602633, iteration: 411566
loss: 1.0105916261672974,grad_norm: 0.950995044519005, iteration: 411567
loss: 0.9625057578086853,grad_norm: 0.821993409689718, iteration: 411568
loss: 1.043202519416809,grad_norm: 0.9094914303191933, iteration: 411569
loss: 0.9956589341163635,grad_norm: 0.9999999105286562, iteration: 411570
loss: 0.9877409338951111,grad_norm: 1.0000000641552718, iteration: 411571
loss: 0.9889096021652222,grad_norm: 0.9999996976696451, iteration: 411572
loss: 1.0356082916259766,grad_norm: 0.8392240943082747, iteration: 411573
loss: 0.9781039953231812,grad_norm: 0.7455936119084472, iteration: 411574
loss: 1.0270535945892334,grad_norm: 0.7573163634796574, iteration: 411575
loss: 0.9746271371841431,grad_norm: 0.7879987170097617, iteration: 411576
loss: 0.9759219884872437,grad_norm: 0.774883765117402, iteration: 411577
loss: 0.9737387299537659,grad_norm: 0.9208963799209631, iteration: 411578
loss: 1.0370980501174927,grad_norm: 0.7954526248449005, iteration: 411579
loss: 1.0167444944381714,grad_norm: 0.8746417208819288, iteration: 411580
loss: 1.0080476999282837,grad_norm: 0.9402314031953449, iteration: 411581
loss: 0.9657384157180786,grad_norm: 0.9574316900816265, iteration: 411582
loss: 1.031714916229248,grad_norm: 0.8606991493711098, iteration: 411583
loss: 1.0143866539001465,grad_norm: 0.6471553827810168, iteration: 411584
loss: 0.9782060980796814,grad_norm: 0.6945177611698358, iteration: 411585
loss: 1.0172687768936157,grad_norm: 0.713800371610363, iteration: 411586
loss: 0.9815183281898499,grad_norm: 0.7653239475912298, iteration: 411587
loss: 1.0001922845840454,grad_norm: 0.7675703237220763, iteration: 411588
loss: 1.01591157913208,grad_norm: 0.7224168236037649, iteration: 411589
loss: 1.1911909580230713,grad_norm: 0.9999999054490886, iteration: 411590
loss: 1.0665260553359985,grad_norm: 0.9999992282693763, iteration: 411591
loss: 1.0194091796875,grad_norm: 0.7660539192737216, iteration: 411592
loss: 1.0739377737045288,grad_norm: 0.7551617841128544, iteration: 411593
loss: 0.9937958717346191,grad_norm: 0.7567724531039706, iteration: 411594
loss: 0.967109203338623,grad_norm: 0.7639011197983846, iteration: 411595
loss: 0.9833047986030579,grad_norm: 0.7934058626717594, iteration: 411596
loss: 1.0160001516342163,grad_norm: 0.74572115439233, iteration: 411597
loss: 0.984103798866272,grad_norm: 0.8332159610985362, iteration: 411598
loss: 0.989460825920105,grad_norm: 0.6877228740063568, iteration: 411599
loss: 0.9487495422363281,grad_norm: 0.9667909999018078, iteration: 411600
loss: 1.0169339179992676,grad_norm: 0.7480757255786336, iteration: 411601
loss: 0.9766937494277954,grad_norm: 0.8176278747202566, iteration: 411602
loss: 0.989519476890564,grad_norm: 0.7488365331211567, iteration: 411603
loss: 1.0023809671401978,grad_norm: 0.7435003156100229, iteration: 411604
loss: 1.1391379833221436,grad_norm: 0.8619198143643254, iteration: 411605
loss: 0.9914063811302185,grad_norm: 0.7484420545948959, iteration: 411606
loss: 1.1437653303146362,grad_norm: 0.999999393765632, iteration: 411607
loss: 0.9758413434028625,grad_norm: 0.8491038412407439, iteration: 411608
loss: 0.9988057017326355,grad_norm: 0.6923665350983362, iteration: 411609
loss: 0.9851997494697571,grad_norm: 0.728975581886736, iteration: 411610
loss: 1.058206558227539,grad_norm: 0.7962840976846083, iteration: 411611
loss: 0.9520279765129089,grad_norm: 0.682852887748855, iteration: 411612
loss: 0.9502647519111633,grad_norm: 0.761510589463911, iteration: 411613
loss: 1.0573523044586182,grad_norm: 0.9999992416683474, iteration: 411614
loss: 0.9594039916992188,grad_norm: 0.9999995170276298, iteration: 411615
loss: 0.9684702157974243,grad_norm: 0.73280199693191, iteration: 411616
loss: 1.0363472700119019,grad_norm: 0.7453322343330852, iteration: 411617
loss: 0.9812551140785217,grad_norm: 0.8399415872095461, iteration: 411618
loss: 1.0064685344696045,grad_norm: 0.6726659831151797, iteration: 411619
loss: 0.9912328720092773,grad_norm: 0.7119941262797581, iteration: 411620
loss: 1.1513196229934692,grad_norm: 0.8474260869157392, iteration: 411621
loss: 1.0724221467971802,grad_norm: 0.9999999378754032, iteration: 411622
loss: 0.9968995451927185,grad_norm: 0.9999996980399871, iteration: 411623
loss: 0.9879879951477051,grad_norm: 0.8441618836079916, iteration: 411624
loss: 1.0250380039215088,grad_norm: 0.8493589918015472, iteration: 411625
loss: 0.9879857897758484,grad_norm: 0.9999990072341445, iteration: 411626
loss: 1.0127378702163696,grad_norm: 0.8529511848023116, iteration: 411627
loss: 1.015840768814087,grad_norm: 0.6949250458489518, iteration: 411628
loss: 1.1747100353240967,grad_norm: 0.999999264475676, iteration: 411629
loss: 0.9996916055679321,grad_norm: 0.8456735700109383, iteration: 411630
loss: 1.0421305894851685,grad_norm: 0.7959617943934472, iteration: 411631
loss: 0.9949213266372681,grad_norm: 0.7410577113752296, iteration: 411632
loss: 0.9928968548774719,grad_norm: 0.8640901359326273, iteration: 411633
loss: 1.0573164224624634,grad_norm: 0.8127165220871545, iteration: 411634
loss: 0.9842367172241211,grad_norm: 0.8534172421581647, iteration: 411635
loss: 1.0674617290496826,grad_norm: 0.9999998626727928, iteration: 411636
loss: 0.9795761108398438,grad_norm: 0.8776388479975749, iteration: 411637
loss: 1.1358509063720703,grad_norm: 0.9999999359522697, iteration: 411638
loss: 0.978560745716095,grad_norm: 0.7316012843049247, iteration: 411639
loss: 1.0377297401428223,grad_norm: 0.6910365877326292, iteration: 411640
loss: 0.9816299080848694,grad_norm: 0.7537190610472113, iteration: 411641
loss: 1.0005342960357666,grad_norm: 0.9999993288677603, iteration: 411642
loss: 1.0066108703613281,grad_norm: 0.9586386626504377, iteration: 411643
loss: 1.0143184661865234,grad_norm: 0.9027440665403569, iteration: 411644
loss: 1.0193402767181396,grad_norm: 0.7475331163351808, iteration: 411645
loss: 1.136733889579773,grad_norm: 0.9088039503420782, iteration: 411646
loss: 1.0397228002548218,grad_norm: 0.9999993258186812, iteration: 411647
loss: 1.028087854385376,grad_norm: 0.9075081030354921, iteration: 411648
loss: 0.9659242033958435,grad_norm: 0.9809649512233185, iteration: 411649
loss: 1.0762721300125122,grad_norm: 0.9626137575389121, iteration: 411650
loss: 1.0232800245285034,grad_norm: 0.9888529383519798, iteration: 411651
loss: 0.9746478199958801,grad_norm: 0.6943948445714654, iteration: 411652
loss: 1.0654679536819458,grad_norm: 0.7593428325812662, iteration: 411653
loss: 0.9781574606895447,grad_norm: 0.9999990620471679, iteration: 411654
loss: 0.9879667162895203,grad_norm: 0.6722379164858334, iteration: 411655
loss: 1.012871503829956,grad_norm: 0.6660969374901995, iteration: 411656
loss: 0.989594578742981,grad_norm: 0.7340751247982283, iteration: 411657
loss: 0.9820781946182251,grad_norm: 0.7616717338509852, iteration: 411658
loss: 0.9841910004615784,grad_norm: 0.693445754200059, iteration: 411659
loss: 0.995231568813324,grad_norm: 0.7854833490636272, iteration: 411660
loss: 1.049215316772461,grad_norm: 0.8326346123470532, iteration: 411661
loss: 1.0205352306365967,grad_norm: 0.8228135542458062, iteration: 411662
loss: 1.0127280950546265,grad_norm: 0.7982811176391145, iteration: 411663
loss: 1.0808666944503784,grad_norm: 0.8702216859917765, iteration: 411664
loss: 0.9881332516670227,grad_norm: 0.8375212395126457, iteration: 411665
loss: 1.0422265529632568,grad_norm: 0.8313799106489851, iteration: 411666
loss: 0.9913877248764038,grad_norm: 0.8174597706593015, iteration: 411667
loss: 1.0639241933822632,grad_norm: 0.999999117479826, iteration: 411668
loss: 0.9902281165122986,grad_norm: 0.7940375516242517, iteration: 411669
loss: 0.9385407567024231,grad_norm: 0.76840313356548, iteration: 411670
loss: 1.0060795545578003,grad_norm: 0.8164238773229967, iteration: 411671
loss: 1.0659011602401733,grad_norm: 0.8079474234270255, iteration: 411672
loss: 0.9563528895378113,grad_norm: 0.76023265646757, iteration: 411673
loss: 1.0190563201904297,grad_norm: 0.8932164435268144, iteration: 411674
loss: 1.0284844636917114,grad_norm: 0.9999991668665733, iteration: 411675
loss: 1.020096778869629,grad_norm: 0.7923548255190969, iteration: 411676
loss: 1.0434598922729492,grad_norm: 0.9793228244279281, iteration: 411677
loss: 1.0141770839691162,grad_norm: 0.7524952063986086, iteration: 411678
loss: 1.073607087135315,grad_norm: 0.9999995132703738, iteration: 411679
loss: 1.0161998271942139,grad_norm: 0.8106294910130887, iteration: 411680
loss: 0.9838603734970093,grad_norm: 0.9999992796575816, iteration: 411681
loss: 0.9830434322357178,grad_norm: 0.7913975860333043, iteration: 411682
loss: 0.9961450695991516,grad_norm: 0.7262263769201072, iteration: 411683
loss: 0.9751827120780945,grad_norm: 0.966079592869637, iteration: 411684
loss: 0.9736425876617432,grad_norm: 0.8611913959769332, iteration: 411685
loss: 1.011846661567688,grad_norm: 0.6688967801693816, iteration: 411686
loss: 1.01603102684021,grad_norm: 0.634172041401127, iteration: 411687
loss: 1.0380306243896484,grad_norm: 0.7937049963983202, iteration: 411688
loss: 1.0099668502807617,grad_norm: 0.7886612388375058, iteration: 411689
loss: 1.057013750076294,grad_norm: 0.8477864893606437, iteration: 411690
loss: 1.0010441541671753,grad_norm: 0.8030408024895188, iteration: 411691
loss: 1.0083669424057007,grad_norm: 0.9999993512325229, iteration: 411692
loss: 1.0404949188232422,grad_norm: 0.7641642451575856, iteration: 411693
loss: 1.019528865814209,grad_norm: 0.976505973441386, iteration: 411694
loss: 1.1748119592666626,grad_norm: 0.9999991685576836, iteration: 411695
loss: 1.0114450454711914,grad_norm: 0.7205702516317466, iteration: 411696
loss: 1.0255550146102905,grad_norm: 0.806598751116277, iteration: 411697
loss: 0.9875466823577881,grad_norm: 0.6502739457808724, iteration: 411698
loss: 0.9937331676483154,grad_norm: 0.935095672161065, iteration: 411699
loss: 1.090848684310913,grad_norm: 0.9999998217944432, iteration: 411700
loss: 1.0050945281982422,grad_norm: 0.8138061194472895, iteration: 411701
loss: 0.9959404468536377,grad_norm: 0.7340355025542781, iteration: 411702
loss: 1.0112913846969604,grad_norm: 0.9999998157985694, iteration: 411703
loss: 1.013074517250061,grad_norm: 0.9999998024186936, iteration: 411704
loss: 0.980477511882782,grad_norm: 0.7246133121683367, iteration: 411705
loss: 1.0820627212524414,grad_norm: 0.9999989983130486, iteration: 411706
loss: 0.9685961008071899,grad_norm: 0.7247945486939023, iteration: 411707
loss: 1.0124378204345703,grad_norm: 0.7749632416144286, iteration: 411708
loss: 0.9892331957817078,grad_norm: 0.6397078412723399, iteration: 411709
loss: 0.9749921560287476,grad_norm: 0.8304687896782226, iteration: 411710
loss: 1.063462734222412,grad_norm: 0.9041762402754295, iteration: 411711
loss: 1.0071146488189697,grad_norm: 0.9999996327472721, iteration: 411712
loss: 1.0116087198257446,grad_norm: 0.8933914145525071, iteration: 411713
loss: 1.041648507118225,grad_norm: 0.651686046079975, iteration: 411714
loss: 1.0050427913665771,grad_norm: 0.7617462542393679, iteration: 411715
loss: 1.0019367933273315,grad_norm: 0.9999998032003027, iteration: 411716
loss: 1.016702651977539,grad_norm: 0.6284259895935493, iteration: 411717
loss: 1.0889829397201538,grad_norm: 0.9999999362780575, iteration: 411718
loss: 1.0295159816741943,grad_norm: 0.7825599516610053, iteration: 411719
loss: 0.9792563915252686,grad_norm: 0.6300171614814932, iteration: 411720
loss: 0.9851468205451965,grad_norm: 0.8117399643633015, iteration: 411721
loss: 1.0279427766799927,grad_norm: 0.9892176785224063, iteration: 411722
loss: 1.0364278554916382,grad_norm: 0.8166640636726286, iteration: 411723
loss: 1.0066829919815063,grad_norm: 0.8704403484382551, iteration: 411724
loss: 0.9889752864837646,grad_norm: 0.778757027571021, iteration: 411725
loss: 1.0029515027999878,grad_norm: 0.9542791451666133, iteration: 411726
loss: 0.992332935333252,grad_norm: 0.6306725341501751, iteration: 411727
loss: 0.986405611038208,grad_norm: 0.6863373222771824, iteration: 411728
loss: 1.0655936002731323,grad_norm: 0.9158354305258143, iteration: 411729
loss: 1.004366159439087,grad_norm: 0.6980518046911782, iteration: 411730
loss: 0.9636696577072144,grad_norm: 0.9255403340044728, iteration: 411731
loss: 0.9585825800895691,grad_norm: 0.6265050502455224, iteration: 411732
loss: 1.007498025894165,grad_norm: 0.9245186849064285, iteration: 411733
loss: 1.023041844367981,grad_norm: 0.8991848548934231, iteration: 411734
loss: 1.0017977952957153,grad_norm: 0.7668586308825746, iteration: 411735
loss: 0.9896036386489868,grad_norm: 0.8353961778439077, iteration: 411736
loss: 1.124402642250061,grad_norm: 0.9999997006223228, iteration: 411737
loss: 0.9974021911621094,grad_norm: 0.7253930369727211, iteration: 411738
loss: 1.0147637128829956,grad_norm: 0.9999993643979624, iteration: 411739
loss: 1.0125588178634644,grad_norm: 0.867153930073425, iteration: 411740
loss: 0.9870437979698181,grad_norm: 0.7894308953862305, iteration: 411741
loss: 1.0318044424057007,grad_norm: 0.9260062261675355, iteration: 411742
loss: 1.0318247079849243,grad_norm: 0.7633921925727755, iteration: 411743
loss: 0.9889951348304749,grad_norm: 0.7610109821674128, iteration: 411744
loss: 1.0105671882629395,grad_norm: 0.8660095722463297, iteration: 411745
loss: 0.960606575012207,grad_norm: 0.7120710795981853, iteration: 411746
loss: 1.0259150266647339,grad_norm: 0.8331151644120766, iteration: 411747
loss: 0.9666516184806824,grad_norm: 0.8667412302049481, iteration: 411748
loss: 0.9832218289375305,grad_norm: 0.8969438946456303, iteration: 411749
loss: 0.9864682555198669,grad_norm: 0.7872906530122066, iteration: 411750
loss: 0.9964726567268372,grad_norm: 0.8069577084069425, iteration: 411751
loss: 1.028795599937439,grad_norm: 0.8111600270904065, iteration: 411752
loss: 0.9714978337287903,grad_norm: 0.9026089868746356, iteration: 411753
loss: 1.0015724897384644,grad_norm: 0.6950638511803631, iteration: 411754
loss: 1.0048692226409912,grad_norm: 0.9999993920934505, iteration: 411755
loss: 1.002389669418335,grad_norm: 0.7635901552630153, iteration: 411756
loss: 0.9912922978401184,grad_norm: 0.7117710470798748, iteration: 411757
loss: 0.9906653165817261,grad_norm: 0.8192639747780838, iteration: 411758
loss: 1.010455846786499,grad_norm: 0.8165000400875039, iteration: 411759
loss: 1.0026460886001587,grad_norm: 0.8318777144277758, iteration: 411760
loss: 1.0022916793823242,grad_norm: 0.8306681142625914, iteration: 411761
loss: 1.0591639280319214,grad_norm: 0.6613319660166312, iteration: 411762
loss: 0.9798546433448792,grad_norm: 0.7597656494975057, iteration: 411763
loss: 1.0120762586593628,grad_norm: 0.8103728396045451, iteration: 411764
loss: 1.0514689683914185,grad_norm: 0.8320926153340654, iteration: 411765
loss: 0.9885700345039368,grad_norm: 0.769481716145832, iteration: 411766
loss: 1.0627858638763428,grad_norm: 0.9999995874864582, iteration: 411767
loss: 1.0764057636260986,grad_norm: 0.9999993456788018, iteration: 411768
loss: 0.9847722053527832,grad_norm: 0.8688932630932981, iteration: 411769
loss: 0.9748955368995667,grad_norm: 0.6257035716962827, iteration: 411770
loss: 1.001913070678711,grad_norm: 0.7503109058601556, iteration: 411771
loss: 1.0156574249267578,grad_norm: 0.7656691819003808, iteration: 411772
loss: 0.9923421144485474,grad_norm: 0.7666872011712542, iteration: 411773
loss: 1.0685073137283325,grad_norm: 0.9999998124985773, iteration: 411774
loss: 1.037275791168213,grad_norm: 0.9999991607508055, iteration: 411775
loss: 0.9582051634788513,grad_norm: 0.7057079929483968, iteration: 411776
loss: 0.995293140411377,grad_norm: 0.8315381754856759, iteration: 411777
loss: 1.0111138820648193,grad_norm: 0.7028752318810674, iteration: 411778
loss: 0.9995878338813782,grad_norm: 0.8552741765996678, iteration: 411779
loss: 0.9569184184074402,grad_norm: 0.7754992719658145, iteration: 411780
loss: 1.0169854164123535,grad_norm: 0.7893047941398605, iteration: 411781
loss: 0.9711382985115051,grad_norm: 0.7970410941827469, iteration: 411782
loss: 1.0554505586624146,grad_norm: 0.9999997614266923, iteration: 411783
loss: 1.0038378238677979,grad_norm: 0.7621195249211593, iteration: 411784
loss: 1.0270943641662598,grad_norm: 0.7864398022141049, iteration: 411785
loss: 1.127382516860962,grad_norm: 0.9735519955521382, iteration: 411786
loss: 1.0358752012252808,grad_norm: 0.928302001353296, iteration: 411787
loss: 1.059851884841919,grad_norm: 0.9999999765856138, iteration: 411788
loss: 0.9494404792785645,grad_norm: 0.6435213149842085, iteration: 411789
loss: 0.9453882575035095,grad_norm: 0.8400324500757204, iteration: 411790
loss: 0.9808495044708252,grad_norm: 0.8245129094650794, iteration: 411791
loss: 0.959225058555603,grad_norm: 0.8865480095029701, iteration: 411792
loss: 1.0030946731567383,grad_norm: 0.8538614169290994, iteration: 411793
loss: 1.0057822465896606,grad_norm: 0.9999998290088621, iteration: 411794
loss: 1.0347169637680054,grad_norm: 0.7661344866406313, iteration: 411795
loss: 0.9678323268890381,grad_norm: 0.6747226512741066, iteration: 411796
loss: 1.0264198780059814,grad_norm: 0.7920465621781515, iteration: 411797
loss: 1.0008456707000732,grad_norm: 0.9999996589077297, iteration: 411798
loss: 1.0369762182235718,grad_norm: 0.6828336515049791, iteration: 411799
loss: 0.9909119606018066,grad_norm: 0.8951039093112302, iteration: 411800
loss: 1.0420618057250977,grad_norm: 0.7516083969062655, iteration: 411801
loss: 1.05302894115448,grad_norm: 0.7118388498898521, iteration: 411802
loss: 0.9778462052345276,grad_norm: 0.9999997244333706, iteration: 411803
loss: 1.0161235332489014,grad_norm: 0.8811907977718553, iteration: 411804
loss: 1.0264250040054321,grad_norm: 0.7017064201376727, iteration: 411805
loss: 1.0229809284210205,grad_norm: 0.6364495485059121, iteration: 411806
loss: 1.0476890802383423,grad_norm: 0.9999994412458628, iteration: 411807
loss: 1.0179182291030884,grad_norm: 0.9999992321603355, iteration: 411808
loss: 0.9994526505470276,grad_norm: 0.9999992726121341, iteration: 411809
loss: 1.047318458557129,grad_norm: 0.8189697482258269, iteration: 411810
loss: 1.0131453275680542,grad_norm: 0.7522214611550353, iteration: 411811
loss: 0.9640463590621948,grad_norm: 0.7989346490946932, iteration: 411812
loss: 0.971589982509613,grad_norm: 0.8435690767388019, iteration: 411813
loss: 0.966791033744812,grad_norm: 0.6603173101781687, iteration: 411814
loss: 1.0261808633804321,grad_norm: 0.8229369257050386, iteration: 411815
loss: 1.0068604946136475,grad_norm: 0.7316466363540148, iteration: 411816
loss: 1.0284316539764404,grad_norm: 0.8245573623574414, iteration: 411817
loss: 1.0734833478927612,grad_norm: 0.9999994881332689, iteration: 411818
loss: 1.0187323093414307,grad_norm: 0.8996840737352922, iteration: 411819
loss: 0.9858778119087219,grad_norm: 0.6727236407218529, iteration: 411820
loss: 0.9709879159927368,grad_norm: 0.8172140468386468, iteration: 411821
loss: 1.0007576942443848,grad_norm: 0.6571628935564803, iteration: 411822
loss: 0.9953984618186951,grad_norm: 0.8476306257630893, iteration: 411823
loss: 1.0048431158065796,grad_norm: 0.8361686542946328, iteration: 411824
loss: 0.9795679450035095,grad_norm: 0.9999997533963874, iteration: 411825
loss: 1.0022488832473755,grad_norm: 0.7815091384261678, iteration: 411826
loss: 1.0281195640563965,grad_norm: 0.9999991689576254, iteration: 411827
loss: 1.0082319974899292,grad_norm: 0.7584033860601043, iteration: 411828
loss: 0.9690470695495605,grad_norm: 0.870851432357267, iteration: 411829
loss: 1.00442636013031,grad_norm: 0.6953677239941612, iteration: 411830
loss: 1.005293846130371,grad_norm: 0.9556270796462751, iteration: 411831
loss: 0.9801044464111328,grad_norm: 0.7956556733329212, iteration: 411832
loss: 0.9923255443572998,grad_norm: 0.6388713062553096, iteration: 411833
loss: 1.0023574829101562,grad_norm: 0.898155025703795, iteration: 411834
loss: 1.0040559768676758,grad_norm: 0.6210279833385722, iteration: 411835
loss: 1.0183844566345215,grad_norm: 0.6960246619071727, iteration: 411836
loss: 1.0203977823257446,grad_norm: 0.7514897137040786, iteration: 411837
loss: 0.9463593363761902,grad_norm: 0.8729645354772035, iteration: 411838
loss: 1.0023233890533447,grad_norm: 0.7063868516227815, iteration: 411839
loss: 1.0178781747817993,grad_norm: 0.759847859979214, iteration: 411840
loss: 0.9707151651382446,grad_norm: 0.8130895246364529, iteration: 411841
loss: 1.0217581987380981,grad_norm: 0.9999991938288113, iteration: 411842
loss: 0.991599440574646,grad_norm: 0.9112175383599352, iteration: 411843
loss: 1.0657942295074463,grad_norm: 0.9999995700848269, iteration: 411844
loss: 1.0045497417449951,grad_norm: 0.9999991325082574, iteration: 411845
loss: 1.0191237926483154,grad_norm: 0.7694079495349639, iteration: 411846
loss: 1.0245863199234009,grad_norm: 0.8669144774041124, iteration: 411847
loss: 1.249872088432312,grad_norm: 0.9999998991909087, iteration: 411848
loss: 0.9971957802772522,grad_norm: 0.6593145013681375, iteration: 411849
loss: 0.9541050791740417,grad_norm: 0.8323610890510332, iteration: 411850
loss: 0.9974408745765686,grad_norm: 0.8216604007396542, iteration: 411851
loss: 0.9854622483253479,grad_norm: 0.7685209119577021, iteration: 411852
loss: 0.9951022863388062,grad_norm: 0.7804893225489894, iteration: 411853
loss: 0.9872603416442871,grad_norm: 0.6736744686881792, iteration: 411854
loss: 0.9977885484695435,grad_norm: 0.6896417054209274, iteration: 411855
loss: 1.0776373147964478,grad_norm: 0.8203512336265133, iteration: 411856
loss: 0.9906367659568787,grad_norm: 0.7920724405007791, iteration: 411857
loss: 0.9585897922515869,grad_norm: 0.850410510842209, iteration: 411858
loss: 0.9908968210220337,grad_norm: 0.7903718767485868, iteration: 411859
loss: 1.002832055091858,grad_norm: 0.7207337696894545, iteration: 411860
loss: 1.0280877351760864,grad_norm: 0.7295136095931671, iteration: 411861
loss: 0.9953157305717468,grad_norm: 0.6609283570394305, iteration: 411862
loss: 1.0085217952728271,grad_norm: 0.7176364147928787, iteration: 411863
loss: 0.9697101712226868,grad_norm: 0.8416622476399871, iteration: 411864
loss: 0.9636886119842529,grad_norm: 0.8786386043795097, iteration: 411865
loss: 0.9997105598449707,grad_norm: 0.7171828117663247, iteration: 411866
loss: 0.994346022605896,grad_norm: 0.8883842505353919, iteration: 411867
loss: 1.0043598413467407,grad_norm: 0.7907327071726954, iteration: 411868
loss: 0.9788922667503357,grad_norm: 0.9020911014344875, iteration: 411869
loss: 1.0153546333312988,grad_norm: 0.8588419514635459, iteration: 411870
loss: 0.998862624168396,grad_norm: 0.7969947555824352, iteration: 411871
loss: 0.979135274887085,grad_norm: 0.7162019380357315, iteration: 411872
loss: 1.0447258949279785,grad_norm: 0.7825474515099327, iteration: 411873
loss: 1.0115336179733276,grad_norm: 0.9999993678876453, iteration: 411874
loss: 1.03057861328125,grad_norm: 0.9453547592512093, iteration: 411875
loss: 1.0159320831298828,grad_norm: 0.9999989405188219, iteration: 411876
loss: 1.0354290008544922,grad_norm: 0.9999999471612545, iteration: 411877
loss: 1.0317167043685913,grad_norm: 0.8132521330591351, iteration: 411878
loss: 1.0298101902008057,grad_norm: 0.9290125191016648, iteration: 411879
loss: 0.986920952796936,grad_norm: 0.698688232289337, iteration: 411880
loss: 1.002380132675171,grad_norm: 0.9011764122778111, iteration: 411881
loss: 0.9806399345397949,grad_norm: 0.7550645947560193, iteration: 411882
loss: 1.0176970958709717,grad_norm: 0.8045146709628899, iteration: 411883
loss: 0.9932255148887634,grad_norm: 0.8011294783731252, iteration: 411884
loss: 1.0488691329956055,grad_norm: 0.7349164504515698, iteration: 411885
loss: 0.9708757996559143,grad_norm: 0.8599277116273947, iteration: 411886
loss: 0.9997239112854004,grad_norm: 0.7170155116369114, iteration: 411887
loss: 0.9748625755310059,grad_norm: 0.9999994789901931, iteration: 411888
loss: 0.9873477220535278,grad_norm: 0.791938129147954, iteration: 411889
loss: 0.9550991654396057,grad_norm: 0.9999998650736037, iteration: 411890
loss: 1.0577808618545532,grad_norm: 0.9818618931258507, iteration: 411891
loss: 0.9862509965896606,grad_norm: 0.8926291869584861, iteration: 411892
loss: 1.0013891458511353,grad_norm: 0.7358564822404032, iteration: 411893
loss: 1.019210696220398,grad_norm: 0.7735507512474855, iteration: 411894
loss: 1.0655808448791504,grad_norm: 1.0000000161114968, iteration: 411895
loss: 0.9692155122756958,grad_norm: 0.7014978032507382, iteration: 411896
loss: 1.0657758712768555,grad_norm: 0.8303366473052446, iteration: 411897
loss: 0.9853956699371338,grad_norm: 0.7649261701551201, iteration: 411898
loss: 1.02253258228302,grad_norm: 0.7992327797787826, iteration: 411899
loss: 0.9999990463256836,grad_norm: 0.858360833858065, iteration: 411900
loss: 1.030638575553894,grad_norm: 0.7651343932501299, iteration: 411901
loss: 1.0108232498168945,grad_norm: 0.7291514253420649, iteration: 411902
loss: 0.9981804490089417,grad_norm: 0.6997449483367566, iteration: 411903
loss: 0.9944887161254883,grad_norm: 0.8013852243335214, iteration: 411904
loss: 1.0260885953903198,grad_norm: 0.9999992338091949, iteration: 411905
loss: 0.981705367565155,grad_norm: 0.8516070863178046, iteration: 411906
loss: 1.005563497543335,grad_norm: 0.8496878205909028, iteration: 411907
loss: 1.0258409976959229,grad_norm: 0.6353188468894059, iteration: 411908
loss: 0.9981276392936707,grad_norm: 0.7779720856472686, iteration: 411909
loss: 1.0131064653396606,grad_norm: 0.8749779451342656, iteration: 411910
loss: 1.0143094062805176,grad_norm: 0.8136124879773436, iteration: 411911
loss: 0.9984585642814636,grad_norm: 0.804642801777415, iteration: 411912
loss: 0.9521179795265198,grad_norm: 0.7677563220271056, iteration: 411913
loss: 1.0074801445007324,grad_norm: 0.8068899048202401, iteration: 411914
loss: 0.9840408563613892,grad_norm: 0.7603754325208013, iteration: 411915
loss: 0.9949988722801208,grad_norm: 0.7894401601110856, iteration: 411916
loss: 0.9656243920326233,grad_norm: 0.8365482166184461, iteration: 411917
loss: 1.0648577213287354,grad_norm: 0.9999996458110783, iteration: 411918
loss: 1.0079303979873657,grad_norm: 0.7610242688489482, iteration: 411919
loss: 1.016926884651184,grad_norm: 0.9767650465582729, iteration: 411920
loss: 1.0044982433319092,grad_norm: 0.719465640032382, iteration: 411921
loss: 1.0556392669677734,grad_norm: 0.6760261717231243, iteration: 411922
loss: 1.0094399452209473,grad_norm: 0.9999990773373277, iteration: 411923
loss: 1.0399912595748901,grad_norm: 0.9999991519240119, iteration: 411924
loss: 1.0235744714736938,grad_norm: 0.8196243817622569, iteration: 411925
loss: 0.9941837787628174,grad_norm: 0.8022379755450965, iteration: 411926
loss: 1.072783350944519,grad_norm: 0.9999995634848418, iteration: 411927
loss: 1.0148258209228516,grad_norm: 0.8884902394764279, iteration: 411928
loss: 1.003599762916565,grad_norm: 0.9999990970315983, iteration: 411929
loss: 1.006037950515747,grad_norm: 0.8100440838331645, iteration: 411930
loss: 0.9745289087295532,grad_norm: 0.7016721672623903, iteration: 411931
loss: 1.0344138145446777,grad_norm: 0.9999994773795806, iteration: 411932
loss: 0.972420334815979,grad_norm: 0.7744525944888037, iteration: 411933
loss: 0.9783257246017456,grad_norm: 0.8811112949279414, iteration: 411934
loss: 1.0003582239151,grad_norm: 0.7847861783594287, iteration: 411935
loss: 1.0464823246002197,grad_norm: 0.7560000724227567, iteration: 411936
loss: 0.989123523235321,grad_norm: 0.7773641165626965, iteration: 411937
loss: 1.0376248359680176,grad_norm: 0.7366051406986083, iteration: 411938
loss: 0.989086389541626,grad_norm: 0.9999993664891865, iteration: 411939
loss: 1.0788456201553345,grad_norm: 0.7344877435121033, iteration: 411940
loss: 0.9535409808158875,grad_norm: 0.7947999782923322, iteration: 411941
loss: 0.9840683937072754,grad_norm: 0.6624221999376639, iteration: 411942
loss: 1.0906367301940918,grad_norm: 0.999999917030981, iteration: 411943
loss: 0.9809078574180603,grad_norm: 0.914500652395596, iteration: 411944
loss: 0.9898228645324707,grad_norm: 0.7166973499196929, iteration: 411945
loss: 1.0146832466125488,grad_norm: 0.9999996815935853, iteration: 411946
loss: 0.9985898733139038,grad_norm: 0.9238417629208318, iteration: 411947
loss: 0.9756588935852051,grad_norm: 0.8429265795565217, iteration: 411948
loss: 1.0507901906967163,grad_norm: 0.91522804292458, iteration: 411949
loss: 0.9727643132209778,grad_norm: 0.8370247412834, iteration: 411950
loss: 0.9761418700218201,grad_norm: 0.7342149866533475, iteration: 411951
loss: 1.0268608331680298,grad_norm: 0.9157553250548807, iteration: 411952
loss: 0.9779810905456543,grad_norm: 0.8607398669523543, iteration: 411953
loss: 0.9506374597549438,grad_norm: 0.8830087919009177, iteration: 411954
loss: 1.009761929512024,grad_norm: 0.7118746815732181, iteration: 411955
loss: 0.9708213210105896,grad_norm: 0.7762266740770521, iteration: 411956
loss: 0.9644696116447449,grad_norm: 0.7487587670200224, iteration: 411957
loss: 1.0429983139038086,grad_norm: 0.9999992401369686, iteration: 411958
loss: 1.0064140558242798,grad_norm: 0.8184655299708581, iteration: 411959
loss: 0.9597004652023315,grad_norm: 0.7493054317169893, iteration: 411960
loss: 1.002011775970459,grad_norm: 0.9814930134681704, iteration: 411961
loss: 1.0334630012512207,grad_norm: 0.8951431972084603, iteration: 411962
loss: 1.0349479913711548,grad_norm: 0.9999996444408533, iteration: 411963
loss: 1.0105513334274292,grad_norm: 0.7656925745711674, iteration: 411964
loss: 1.0017175674438477,grad_norm: 0.8010764092043267, iteration: 411965
loss: 1.0065680742263794,grad_norm: 0.7132376188721109, iteration: 411966
loss: 0.9818330407142639,grad_norm: 0.8213155952184963, iteration: 411967
loss: 1.0680755376815796,grad_norm: 0.9999993799917005, iteration: 411968
loss: 1.0026612281799316,grad_norm: 0.8979567452705458, iteration: 411969
loss: 1.0258209705352783,grad_norm: 1.0000000102041837, iteration: 411970
loss: 1.0355689525604248,grad_norm: 0.9999993442254285, iteration: 411971
loss: 1.0339019298553467,grad_norm: 0.9999993460203735, iteration: 411972
loss: 0.990862250328064,grad_norm: 0.7602225430705906, iteration: 411973
loss: 1.0023589134216309,grad_norm: 0.7103070006673705, iteration: 411974
loss: 0.9985001087188721,grad_norm: 0.9123910227742595, iteration: 411975
loss: 0.9986686110496521,grad_norm: 0.689985282906835, iteration: 411976
loss: 1.022592306137085,grad_norm: 0.9160394668644919, iteration: 411977
loss: 1.0207760334014893,grad_norm: 0.7283337748280675, iteration: 411978
loss: 0.9663061499595642,grad_norm: 0.8739876187884111, iteration: 411979
loss: 1.006545901298523,grad_norm: 0.9029706645257071, iteration: 411980
loss: 1.0447242259979248,grad_norm: 0.7650284389862645, iteration: 411981
loss: 1.0354384183883667,grad_norm: 0.7941537375888655, iteration: 411982
loss: 1.043447732925415,grad_norm: 0.8011189526703241, iteration: 411983
loss: 1.064097285270691,grad_norm: 0.9999996716428553, iteration: 411984
loss: 0.9789058566093445,grad_norm: 0.6644370605493285, iteration: 411985
loss: 1.0717312097549438,grad_norm: 0.9510707691442631, iteration: 411986
loss: 1.0389573574066162,grad_norm: 0.7729803021907251, iteration: 411987
loss: 1.0520740747451782,grad_norm: 0.8077363785159181, iteration: 411988
loss: 0.9587056636810303,grad_norm: 0.8493321009206578, iteration: 411989
loss: 1.0314098596572876,grad_norm: 0.8280770860684432, iteration: 411990
loss: 1.0256344079971313,grad_norm: 0.9593397875728401, iteration: 411991
loss: 0.9764116406440735,grad_norm: 0.8018817148983518, iteration: 411992
loss: 1.0896310806274414,grad_norm: 0.9999999264073482, iteration: 411993
loss: 0.976875901222229,grad_norm: 0.6633650284967156, iteration: 411994
loss: 0.991062581539154,grad_norm: 0.9857429027475356, iteration: 411995
loss: 1.042376160621643,grad_norm: 0.7224849426080244, iteration: 411996
loss: 1.0181821584701538,grad_norm: 0.9999992880180577, iteration: 411997
loss: 0.9725439548492432,grad_norm: 0.7605375295264301, iteration: 411998
loss: 1.0077407360076904,grad_norm: 0.999999094745999, iteration: 411999
loss: 0.9809052348136902,grad_norm: 0.7493041346688794, iteration: 412000
loss: 0.9773378968238831,grad_norm: 0.9081470698235009, iteration: 412001
loss: 0.9856573343276978,grad_norm: 0.7800080840706565, iteration: 412002
loss: 1.010763168334961,grad_norm: 0.8040074265028997, iteration: 412003
loss: 1.017077922821045,grad_norm: 0.9999998127425117, iteration: 412004
loss: 1.0199567079544067,grad_norm: 0.614818752044617, iteration: 412005
loss: 1.0179336071014404,grad_norm: 0.6738373003104967, iteration: 412006
loss: 0.99321448802948,grad_norm: 0.6851748591107374, iteration: 412007
loss: 1.0386651754379272,grad_norm: 0.7630403151270586, iteration: 412008
loss: 1.0480173826217651,grad_norm: 0.8205353143688146, iteration: 412009
loss: 0.9975837469100952,grad_norm: 0.9999991403674031, iteration: 412010
loss: 0.9982088804244995,grad_norm: 0.9674817734452134, iteration: 412011
loss: 0.9942047595977783,grad_norm: 0.9099493111798056, iteration: 412012
loss: 1.0167715549468994,grad_norm: 0.7740069090992171, iteration: 412013
loss: 0.9860773086547852,grad_norm: 0.7652539579319007, iteration: 412014
loss: 0.9890385866165161,grad_norm: 0.8177324776467606, iteration: 412015
loss: 0.9811475276947021,grad_norm: 0.6756363267478951, iteration: 412016
loss: 0.9729899764060974,grad_norm: 0.8499958599274423, iteration: 412017
loss: 1.0053457021713257,grad_norm: 0.7769170034915568, iteration: 412018
loss: 1.0353401899337769,grad_norm: 0.6720240172851121, iteration: 412019
loss: 1.0319215059280396,grad_norm: 0.9999990930041818, iteration: 412020
loss: 1.0110892057418823,grad_norm: 0.6723250262551814, iteration: 412021
loss: 0.9809376001358032,grad_norm: 0.8596688046350214, iteration: 412022
loss: 0.9583231210708618,grad_norm: 0.7202081388236946, iteration: 412023
loss: 0.947763204574585,grad_norm: 0.92825879478302, iteration: 412024
loss: 1.0139645338058472,grad_norm: 0.7983809776100201, iteration: 412025
loss: 1.0773165225982666,grad_norm: 0.6913813482442455, iteration: 412026
loss: 1.0474114418029785,grad_norm: 0.8923891287038663, iteration: 412027
loss: 1.0937925577163696,grad_norm: 0.9999995293336451, iteration: 412028
loss: 1.0040568113327026,grad_norm: 0.7866839730898176, iteration: 412029
loss: 1.0170518159866333,grad_norm: 0.7435213433567018, iteration: 412030
loss: 1.0157768726348877,grad_norm: 0.7025499850171791, iteration: 412031
loss: 1.0043144226074219,grad_norm: 0.6996897595775289, iteration: 412032
loss: 1.032820224761963,grad_norm: 0.7854954013269908, iteration: 412033
loss: 0.9864685535430908,grad_norm: 0.7992624439625566, iteration: 412034
loss: 1.0330560207366943,grad_norm: 0.6818213252010494, iteration: 412035
loss: 1.04850172996521,grad_norm: 0.7311266234604902, iteration: 412036
loss: 1.007171630859375,grad_norm: 0.9448422099012264, iteration: 412037
loss: 0.965436577796936,grad_norm: 0.7676718106975998, iteration: 412038
loss: 1.008980393409729,grad_norm: 0.9527169154038192, iteration: 412039
loss: 0.9893940687179565,grad_norm: 0.8310779865170094, iteration: 412040
loss: 1.0262547731399536,grad_norm: 0.9999994614484865, iteration: 412041
loss: 1.0128380060195923,grad_norm: 0.7414586623445126, iteration: 412042
loss: 1.0298655033111572,grad_norm: 0.9999999204019199, iteration: 412043
loss: 1.0458061695098877,grad_norm: 0.9999994798821041, iteration: 412044
loss: 1.0185490846633911,grad_norm: 0.9999991922152067, iteration: 412045
loss: 0.9552562832832336,grad_norm: 0.7315604265468804, iteration: 412046
loss: 0.9816340804100037,grad_norm: 0.712363198544458, iteration: 412047
loss: 1.0788496732711792,grad_norm: 0.8393740052971753, iteration: 412048
loss: 0.9779906272888184,grad_norm: 0.8656125916146743, iteration: 412049
loss: 1.0243324041366577,grad_norm: 0.8686700585220781, iteration: 412050
loss: 1.0141901969909668,grad_norm: 0.7744689683626542, iteration: 412051
loss: 1.0449655055999756,grad_norm: 0.8778436647929472, iteration: 412052
loss: 0.9732836484909058,grad_norm: 0.9999990630629154, iteration: 412053
loss: 0.9732872843742371,grad_norm: 0.7154958167678032, iteration: 412054
loss: 1.037488341331482,grad_norm: 0.9999991179300861, iteration: 412055
loss: 1.0114328861236572,grad_norm: 0.7325150901300803, iteration: 412056
loss: 1.028587818145752,grad_norm: 0.9743649160928775, iteration: 412057
loss: 1.0346218347549438,grad_norm: 0.6959273836784438, iteration: 412058
loss: 1.0420881509780884,grad_norm: 0.9999998721066745, iteration: 412059
loss: 1.0147675275802612,grad_norm: 0.8897594296650901, iteration: 412060
loss: 1.0079137086868286,grad_norm: 0.9999991692855161, iteration: 412061
loss: 1.0165746212005615,grad_norm: 0.9999996768640261, iteration: 412062
loss: 1.2154263257980347,grad_norm: 0.9999997353854386, iteration: 412063
loss: 1.0520931482315063,grad_norm: 0.9999996161654279, iteration: 412064
loss: 1.0080963373184204,grad_norm: 0.7741456203230592, iteration: 412065
loss: 0.9531944394111633,grad_norm: 0.6099323799401073, iteration: 412066
loss: 1.0025769472122192,grad_norm: 0.9999993311670045, iteration: 412067
loss: 1.0772764682769775,grad_norm: 0.9999999659628606, iteration: 412068
loss: 1.0482410192489624,grad_norm: 0.8933107087433526, iteration: 412069
loss: 1.0354737043380737,grad_norm: 0.9920113632579508, iteration: 412070
loss: 0.9941896796226501,grad_norm: 0.9999992211610127, iteration: 412071
loss: 0.9874680638313293,grad_norm: 0.7421378820856545, iteration: 412072
loss: 1.0316053628921509,grad_norm: 0.8905570743326822, iteration: 412073
loss: 0.9997655153274536,grad_norm: 0.7545836586084742, iteration: 412074
loss: 1.0193045139312744,grad_norm: 0.6882177838861004, iteration: 412075
loss: 1.017752766609192,grad_norm: 0.999999293364184, iteration: 412076
loss: 1.092580795288086,grad_norm: 0.9999992585628361, iteration: 412077
loss: 0.9943836331367493,grad_norm: 0.9999997938594214, iteration: 412078
loss: 1.025131106376648,grad_norm: 0.9999997168351151, iteration: 412079
loss: 0.9928410649299622,grad_norm: 0.7901031074656403, iteration: 412080
loss: 1.0256320238113403,grad_norm: 0.6719310742176933, iteration: 412081
loss: 0.9875606894493103,grad_norm: 0.9999990962642221, iteration: 412082
loss: 1.0153918266296387,grad_norm: 0.72756053693823, iteration: 412083
loss: 0.9975482225418091,grad_norm: 0.7351198010620095, iteration: 412084
loss: 0.9968736171722412,grad_norm: 0.8359696505117638, iteration: 412085
loss: 1.0321030616760254,grad_norm: 0.7116731588874522, iteration: 412086
loss: 0.9820247888565063,grad_norm: 0.8146345950961977, iteration: 412087
loss: 0.9724116325378418,grad_norm: 0.6964272305762899, iteration: 412088
loss: 1.0945101976394653,grad_norm: 0.9999994921189623, iteration: 412089
loss: 0.9924842715263367,grad_norm: 0.8856111937606493, iteration: 412090
loss: 1.0898864269256592,grad_norm: 0.9999998715958777, iteration: 412091
loss: 1.1139676570892334,grad_norm: 0.9999991881075183, iteration: 412092
loss: 1.0434163808822632,grad_norm: 0.9999999086844149, iteration: 412093
loss: 1.0047719478607178,grad_norm: 0.7284278289078497, iteration: 412094
loss: 1.0213450193405151,grad_norm: 0.9999999615739178, iteration: 412095
loss: 1.0313459634780884,grad_norm: 0.9999991456420793, iteration: 412096
loss: 0.9951866865158081,grad_norm: 0.6748375706471746, iteration: 412097
loss: 1.019966959953308,grad_norm: 0.7048526241780066, iteration: 412098
loss: 1.0473577976226807,grad_norm: 0.9999994509904925, iteration: 412099
loss: 1.0611777305603027,grad_norm: 0.9999991042569911, iteration: 412100
loss: 1.0114502906799316,grad_norm: 0.7223551963466858, iteration: 412101
loss: 0.9870981574058533,grad_norm: 0.8787910195557003, iteration: 412102
loss: 1.0016899108886719,grad_norm: 0.7944722449285961, iteration: 412103
loss: 1.0011974573135376,grad_norm: 0.8649960781957026, iteration: 412104
loss: 0.9689778089523315,grad_norm: 0.7182366031572986, iteration: 412105
loss: 1.021763563156128,grad_norm: 0.8653118490766245, iteration: 412106
loss: 1.0318350791931152,grad_norm: 0.9999998840274147, iteration: 412107
loss: 0.9904377460479736,grad_norm: 0.705722621916087, iteration: 412108
loss: 1.0079675912857056,grad_norm: 0.826081435685569, iteration: 412109
loss: 1.0036917924880981,grad_norm: 0.7942425337138972, iteration: 412110
loss: 1.0062105655670166,grad_norm: 0.7627330917660399, iteration: 412111
loss: 1.000043511390686,grad_norm: 0.8918269157338501, iteration: 412112
loss: 0.9785207509994507,grad_norm: 0.7389138938499534, iteration: 412113
loss: 1.0483534336090088,grad_norm: 0.8960643008870743, iteration: 412114
loss: 0.97663414478302,grad_norm: 0.7096532699512602, iteration: 412115
loss: 0.96832275390625,grad_norm: 0.7277013685483524, iteration: 412116
loss: 0.9903959035873413,grad_norm: 0.6874422187645791, iteration: 412117
loss: 1.0882023572921753,grad_norm: 0.9999994219308692, iteration: 412118
loss: 0.9870652556419373,grad_norm: 0.7804969377151744, iteration: 412119
loss: 1.0041872262954712,grad_norm: 0.6755049314687857, iteration: 412120
loss: 1.0370197296142578,grad_norm: 0.9999996237511293, iteration: 412121
loss: 1.0621272325515747,grad_norm: 0.9841893636887071, iteration: 412122
loss: 1.164249300956726,grad_norm: 0.999999371091645, iteration: 412123
loss: 0.9709479212760925,grad_norm: 0.8854179245256061, iteration: 412124
loss: 1.0158683061599731,grad_norm: 0.9999990890753688, iteration: 412125
loss: 0.9523144960403442,grad_norm: 0.7619282317738117, iteration: 412126
loss: 0.9855183959007263,grad_norm: 0.8867051222207989, iteration: 412127
loss: 0.997683048248291,grad_norm: 0.7144302953084274, iteration: 412128
loss: 1.0227996110916138,grad_norm: 0.8029813221146858, iteration: 412129
loss: 0.9681790471076965,grad_norm: 0.6626711432229123, iteration: 412130
loss: 1.0328161716461182,grad_norm: 0.8291680321140265, iteration: 412131
loss: 0.9931339025497437,grad_norm: 0.8991658146122279, iteration: 412132
loss: 1.0879510641098022,grad_norm: 0.9144184720803368, iteration: 412133
loss: 1.1062294244766235,grad_norm: 0.9999994696353629, iteration: 412134
loss: 1.0174821615219116,grad_norm: 0.7466934744239486, iteration: 412135
loss: 1.1010514497756958,grad_norm: 0.9591862729579802, iteration: 412136
loss: 1.1769886016845703,grad_norm: 0.9999996582527255, iteration: 412137
loss: 0.9939209818840027,grad_norm: 0.9197041984646935, iteration: 412138
loss: 0.9798157215118408,grad_norm: 0.7711603946824332, iteration: 412139
loss: 0.9791846871376038,grad_norm: 0.7457809661767785, iteration: 412140
loss: 1.0826438665390015,grad_norm: 0.999999061011687, iteration: 412141
loss: 1.1043651103973389,grad_norm: 0.9999998285722129, iteration: 412142
loss: 1.0930323600769043,grad_norm: 0.9999996167294471, iteration: 412143
loss: 0.9788331389427185,grad_norm: 0.8052255991693446, iteration: 412144
loss: 1.1127018928527832,grad_norm: 0.9999998343004523, iteration: 412145
loss: 1.1094071865081787,grad_norm: 0.9999997550647941, iteration: 412146
loss: 1.0962247848510742,grad_norm: 0.8777875044423461, iteration: 412147
loss: 0.9815832376480103,grad_norm: 0.8141463601859549, iteration: 412148
loss: 0.9823235869407654,grad_norm: 0.9091907285420725, iteration: 412149
loss: 1.0196737051010132,grad_norm: 0.7883070811076022, iteration: 412150
loss: 1.0509246587753296,grad_norm: 0.9999991221610175, iteration: 412151
loss: 1.0234043598175049,grad_norm: 0.8809080739050742, iteration: 412152
loss: 0.9642532467842102,grad_norm: 0.8095574293685689, iteration: 412153
loss: 1.0017415285110474,grad_norm: 0.6285679420544158, iteration: 412154
loss: 0.9931670427322388,grad_norm: 0.7606526629677116, iteration: 412155
loss: 1.0670137405395508,grad_norm: 0.999999444267889, iteration: 412156
loss: 0.9685758948326111,grad_norm: 0.999999245337747, iteration: 412157
loss: 0.9687922596931458,grad_norm: 0.6144225957849627, iteration: 412158
loss: 1.0254555940628052,grad_norm: 0.7559906395367202, iteration: 412159
loss: 1.0215353965759277,grad_norm: 0.9999992853276825, iteration: 412160
loss: 1.0444973707199097,grad_norm: 0.9999992215392793, iteration: 412161
loss: 1.0329315662384033,grad_norm: 0.9999999395429976, iteration: 412162
loss: 0.9744915962219238,grad_norm: 0.6706403758864293, iteration: 412163
loss: 0.9929665923118591,grad_norm: 0.7488390630991197, iteration: 412164
loss: 1.0195139646530151,grad_norm: 0.8638456278939487, iteration: 412165
loss: 1.0383069515228271,grad_norm: 0.9999991305814955, iteration: 412166
loss: 1.1261297464370728,grad_norm: 1.0000000577918207, iteration: 412167
loss: 0.9918724298477173,grad_norm: 0.8966156323704035, iteration: 412168
loss: 0.9697292447090149,grad_norm: 0.6722330502288562, iteration: 412169
loss: 1.0138918161392212,grad_norm: 0.6898333777262925, iteration: 412170
loss: 1.028952956199646,grad_norm: 0.875319856103929, iteration: 412171
loss: 0.9620108604431152,grad_norm: 0.8187341506936915, iteration: 412172
loss: 1.0049532651901245,grad_norm: 0.6509161517529816, iteration: 412173
loss: 1.0289510488510132,grad_norm: 0.8131950256115105, iteration: 412174
loss: 1.0079118013381958,grad_norm: 0.9999991050870766, iteration: 412175
loss: 1.0000994205474854,grad_norm: 0.8653821971969341, iteration: 412176
loss: 1.0306180715560913,grad_norm: 0.9076621473950285, iteration: 412177
loss: 1.0179673433303833,grad_norm: 0.8484385144490767, iteration: 412178
loss: 1.0255961418151855,grad_norm: 0.7245386733555808, iteration: 412179
loss: 1.0236738920211792,grad_norm: 0.8976935024666362, iteration: 412180
loss: 1.0037503242492676,grad_norm: 0.6851346399983379, iteration: 412181
loss: 1.00226891040802,grad_norm: 0.9999992071129616, iteration: 412182
loss: 1.0250540971755981,grad_norm: 0.855124838948582, iteration: 412183
loss: 1.0125007629394531,grad_norm: 0.7760928165990638, iteration: 412184
loss: 0.9928419589996338,grad_norm: 0.7018522440105828, iteration: 412185
loss: 1.0165293216705322,grad_norm: 0.7839655503179893, iteration: 412186
loss: 1.0297898054122925,grad_norm: 0.8208974175965623, iteration: 412187
loss: 0.9673402309417725,grad_norm: 0.610828770991993, iteration: 412188
loss: 1.0120313167572021,grad_norm: 0.7448095906428359, iteration: 412189
loss: 1.012150764465332,grad_norm: 0.999999577839709, iteration: 412190
loss: 0.9903050065040588,grad_norm: 0.6968058843414032, iteration: 412191
loss: 1.0276180505752563,grad_norm: 0.6978557133525677, iteration: 412192
loss: 1.0034921169281006,grad_norm: 0.9832655584526055, iteration: 412193
loss: 1.0005912780761719,grad_norm: 0.8115745719610629, iteration: 412194
loss: 0.9856879711151123,grad_norm: 0.9569709042797422, iteration: 412195
loss: 0.9969964623451233,grad_norm: 0.7466647209536085, iteration: 412196
loss: 1.008474349975586,grad_norm: 0.9999991774670791, iteration: 412197
loss: 1.0230021476745605,grad_norm: 1.000000047208335, iteration: 412198
loss: 0.9647025465965271,grad_norm: 0.913701014957046, iteration: 412199
loss: 0.970390260219574,grad_norm: 0.7841176976258882, iteration: 412200
loss: 1.0742640495300293,grad_norm: 1.000000013985109, iteration: 412201
loss: 0.9979962706565857,grad_norm: 0.9526538009238139, iteration: 412202
loss: 1.202571988105774,grad_norm: 0.9999997092672385, iteration: 412203
loss: 1.0311647653579712,grad_norm: 0.7745234957427951, iteration: 412204
loss: 1.0132856369018555,grad_norm: 0.8506062184787374, iteration: 412205
loss: 1.0314147472381592,grad_norm: 0.8798605197648973, iteration: 412206
loss: 0.9832899570465088,grad_norm: 0.6971472908886887, iteration: 412207
loss: 1.0659538507461548,grad_norm: 0.8758476947464653, iteration: 412208
loss: 0.9539157152175903,grad_norm: 0.7181604248096108, iteration: 412209
loss: 1.0105578899383545,grad_norm: 0.770739702645652, iteration: 412210
loss: 0.9674092531204224,grad_norm: 0.9955783882257658, iteration: 412211
loss: 1.010522723197937,grad_norm: 0.9524956218822163, iteration: 412212
loss: 0.969217836856842,grad_norm: 0.99999947443678, iteration: 412213
loss: 0.9822549223899841,grad_norm: 0.8439749654815039, iteration: 412214
loss: 1.080257773399353,grad_norm: 0.9046625805645476, iteration: 412215
loss: 1.00764799118042,grad_norm: 0.9430629465145298, iteration: 412216
loss: 1.0439271926879883,grad_norm: 0.8056258972790546, iteration: 412217
loss: 1.0245189666748047,grad_norm: 0.8602493428920902, iteration: 412218
loss: 1.0029438734054565,grad_norm: 0.6793908253253871, iteration: 412219
loss: 1.0116126537322998,grad_norm: 0.792720300613141, iteration: 412220
loss: 1.0152955055236816,grad_norm: 0.799583617862858, iteration: 412221
loss: 0.9764509201049805,grad_norm: 0.735366414413441, iteration: 412222
loss: 0.950866162776947,grad_norm: 0.8204066453376361, iteration: 412223
loss: 0.99372398853302,grad_norm: 0.7391644778564894, iteration: 412224
loss: 0.9967411756515503,grad_norm: 0.8762235744603487, iteration: 412225
loss: 0.9765048623085022,grad_norm: 0.8506215893488144, iteration: 412226
loss: 0.9982876777648926,grad_norm: 0.7242348542603029, iteration: 412227
loss: 1.0005587339401245,grad_norm: 0.7497603637196014, iteration: 412228
loss: 0.9906461834907532,grad_norm: 0.692250764626656, iteration: 412229
loss: 1.0342097282409668,grad_norm: 0.9376874443311707, iteration: 412230
loss: 0.9968953728675842,grad_norm: 0.6433913077838659, iteration: 412231
loss: 1.011778712272644,grad_norm: 0.8035187586059432, iteration: 412232
loss: 1.0275744199752808,grad_norm: 0.6982807569755863, iteration: 412233
loss: 1.0069239139556885,grad_norm: 0.8486168758539333, iteration: 412234
loss: 1.028887391090393,grad_norm: 0.8034333859755874, iteration: 412235
loss: 1.0199280977249146,grad_norm: 0.7442827185694294, iteration: 412236
loss: 1.02348792552948,grad_norm: 0.9147199198016104, iteration: 412237
loss: 1.0003775358200073,grad_norm: 0.8637734412010063, iteration: 412238
loss: 0.9990575909614563,grad_norm: 0.7309274678992791, iteration: 412239
loss: 0.9783973693847656,grad_norm: 0.8400814614838117, iteration: 412240
loss: 0.9993268847465515,grad_norm: 0.8544117335200085, iteration: 412241
loss: 0.9727567434310913,grad_norm: 0.946462117701532, iteration: 412242
loss: 1.068350076675415,grad_norm: 0.9999997011777821, iteration: 412243
loss: 0.9696174263954163,grad_norm: 0.8935765152033179, iteration: 412244
loss: 0.9559233784675598,grad_norm: 0.6595374715328223, iteration: 412245
loss: 0.9984332919120789,grad_norm: 0.7464754016114518, iteration: 412246
loss: 0.9790898561477661,grad_norm: 0.8114663939702006, iteration: 412247
loss: 0.983537495136261,grad_norm: 0.8058212511618572, iteration: 412248
loss: 0.9892212152481079,grad_norm: 0.6473960287344374, iteration: 412249
loss: 0.9896587133407593,grad_norm: 0.7899357216256018, iteration: 412250
loss: 1.1103816032409668,grad_norm: 0.8340737523584479, iteration: 412251
loss: 0.961141049861908,grad_norm: 0.9849766541270036, iteration: 412252
loss: 0.9523153305053711,grad_norm: 0.7991020019336127, iteration: 412253
loss: 1.020490288734436,grad_norm: 0.9910841972464639, iteration: 412254
loss: 0.9708069562911987,grad_norm: 0.8355562629483804, iteration: 412255
loss: 0.9998968243598938,grad_norm: 0.7415941364566165, iteration: 412256
loss: 1.0154199600219727,grad_norm: 0.8085081998879651, iteration: 412257
loss: 1.041273832321167,grad_norm: 0.7469258025339015, iteration: 412258
loss: 1.0252349376678467,grad_norm: 0.9616084310046144, iteration: 412259
loss: 1.0338526964187622,grad_norm: 0.9999999644608057, iteration: 412260
loss: 1.0337198972702026,grad_norm: 0.7650052833319554, iteration: 412261
loss: 0.9883532524108887,grad_norm: 0.7885243620879391, iteration: 412262
loss: 0.9980147480964661,grad_norm: 0.8459529841261064, iteration: 412263
loss: 1.0447430610656738,grad_norm: 0.7766802012184734, iteration: 412264
loss: 1.0480130910873413,grad_norm: 0.801194078102514, iteration: 412265
loss: 1.017470121383667,grad_norm: 0.686658016394067, iteration: 412266
loss: 1.0183968544006348,grad_norm: 0.8809633760561776, iteration: 412267
loss: 1.0016475915908813,grad_norm: 0.7977536801470635, iteration: 412268
loss: 1.0277410745620728,grad_norm: 0.6725818186141654, iteration: 412269
loss: 1.0190430879592896,grad_norm: 0.9539864877091391, iteration: 412270
loss: 1.005651831626892,grad_norm: 0.7406049209390902, iteration: 412271
loss: 0.9727330207824707,grad_norm: 0.7196012408568278, iteration: 412272
loss: 0.9712711572647095,grad_norm: 0.7061896578603858, iteration: 412273
loss: 0.9852479696273804,grad_norm: 0.8005050873005333, iteration: 412274
loss: 0.993716835975647,grad_norm: 0.7780429390053939, iteration: 412275
loss: 1.029078722000122,grad_norm: 0.7660328779848102, iteration: 412276
loss: 0.9810625910758972,grad_norm: 0.6346204233235023, iteration: 412277
loss: 1.0057449340820312,grad_norm: 0.9999992205855803, iteration: 412278
loss: 1.1507006883621216,grad_norm: 0.9999990796450595, iteration: 412279
loss: 0.991737961769104,grad_norm: 0.6871210646555965, iteration: 412280
loss: 0.9862647652626038,grad_norm: 0.8683190960563577, iteration: 412281
loss: 0.9865820407867432,grad_norm: 0.9147157241945314, iteration: 412282
loss: 1.012904405593872,grad_norm: 0.9999993912715616, iteration: 412283
loss: 1.0100566148757935,grad_norm: 0.7402366636042301, iteration: 412284
loss: 1.0035103559494019,grad_norm: 0.8433869128688063, iteration: 412285
loss: 1.0018150806427002,grad_norm: 0.8075908816943851, iteration: 412286
loss: 0.9928907155990601,grad_norm: 0.720267936627581, iteration: 412287
loss: 1.0068541765213013,grad_norm: 0.9384954908625917, iteration: 412288
loss: 1.0645976066589355,grad_norm: 0.9810244616778355, iteration: 412289
loss: 1.0268707275390625,grad_norm: 0.7648489280901345, iteration: 412290
loss: 1.0067713260650635,grad_norm: 0.8748194634883558, iteration: 412291
loss: 1.0368173122406006,grad_norm: 0.9999991079955158, iteration: 412292
loss: 0.9847908020019531,grad_norm: 0.733012819879715, iteration: 412293
loss: 0.9737749695777893,grad_norm: 0.7486471221189214, iteration: 412294
loss: 1.0175188779830933,grad_norm: 0.8393534471195478, iteration: 412295
loss: 0.9755743741989136,grad_norm: 0.7514750165846497, iteration: 412296
loss: 1.023993730545044,grad_norm: 0.7177799118232885, iteration: 412297
loss: 1.029929757118225,grad_norm: 0.8349395947293848, iteration: 412298
loss: 1.1115831136703491,grad_norm: 0.9999999972398279, iteration: 412299
loss: 1.002045750617981,grad_norm: 0.8110254333119173, iteration: 412300
loss: 1.0156015157699585,grad_norm: 0.6993280946281138, iteration: 412301
loss: 1.0365304946899414,grad_norm: 0.8792645101981632, iteration: 412302
loss: 1.087006688117981,grad_norm: 0.9999991184941267, iteration: 412303
loss: 1.0753402709960938,grad_norm: 0.9999997808589101, iteration: 412304
loss: 1.1129634380340576,grad_norm: 0.9999994126332259, iteration: 412305
loss: 0.9708399176597595,grad_norm: 0.7845774112046975, iteration: 412306
loss: 0.9822647571563721,grad_norm: 0.8577487218037962, iteration: 412307
loss: 1.06440007686615,grad_norm: 0.9999992272124377, iteration: 412308
loss: 1.0069642066955566,grad_norm: 0.9560264515592455, iteration: 412309
loss: 0.9886841773986816,grad_norm: 0.9121086348197556, iteration: 412310
loss: 1.011063575744629,grad_norm: 0.8516435276164855, iteration: 412311
loss: 0.9872074127197266,grad_norm: 0.7151171284638759, iteration: 412312
loss: 1.0086073875427246,grad_norm: 0.64888155915749, iteration: 412313
loss: 1.0282021760940552,grad_norm: 0.8912273729009318, iteration: 412314
loss: 0.990563154220581,grad_norm: 0.6829096569574765, iteration: 412315
loss: 1.0212005376815796,grad_norm: 0.8430172270923936, iteration: 412316
loss: 0.9736001491546631,grad_norm: 0.6933267795531903, iteration: 412317
loss: 0.9773945808410645,grad_norm: 0.9623919845001392, iteration: 412318
loss: 0.9845631718635559,grad_norm: 0.9379159142748454, iteration: 412319
loss: 1.0138365030288696,grad_norm: 0.8656199554933752, iteration: 412320
loss: 1.0205048322677612,grad_norm: 0.8149080916015929, iteration: 412321
loss: 1.0135390758514404,grad_norm: 0.9999993128657716, iteration: 412322
loss: 1.011022686958313,grad_norm: 0.9130947387416659, iteration: 412323
loss: 0.9940694570541382,grad_norm: 0.8708704662026195, iteration: 412324
loss: 1.0013973712921143,grad_norm: 0.9593316451167329, iteration: 412325
loss: 0.9952042698860168,grad_norm: 0.6948118304676367, iteration: 412326
loss: 1.1554301977157593,grad_norm: 0.8723468889475737, iteration: 412327
loss: 1.0324350595474243,grad_norm: 0.922372000503366, iteration: 412328
loss: 1.020917534828186,grad_norm: 0.9018070470944214, iteration: 412329
loss: 0.9856043457984924,grad_norm: 0.8846552692661672, iteration: 412330
loss: 0.9474254846572876,grad_norm: 0.7800458437843372, iteration: 412331
loss: 0.9912441372871399,grad_norm: 0.8789836951195465, iteration: 412332
loss: 1.0043095350265503,grad_norm: 0.9838869843334169, iteration: 412333
loss: 1.0095069408416748,grad_norm: 0.7124716179922902, iteration: 412334
loss: 1.074569821357727,grad_norm: 0.9999996627704508, iteration: 412335
loss: 0.9920254945755005,grad_norm: 0.8423178321336132, iteration: 412336
loss: 0.9554010629653931,grad_norm: 0.8004673786066578, iteration: 412337
loss: 1.002987027168274,grad_norm: 0.999999068080548, iteration: 412338
loss: 1.0085968971252441,grad_norm: 0.8653880551799699, iteration: 412339
loss: 0.9874840974807739,grad_norm: 0.7976524093274013, iteration: 412340
loss: 1.0009417533874512,grad_norm: 0.781454463863882, iteration: 412341
loss: 1.0589802265167236,grad_norm: 0.8233477264936218, iteration: 412342
loss: 1.0369255542755127,grad_norm: 0.7835672223141608, iteration: 412343
loss: 1.0228060483932495,grad_norm: 0.7256369311214338, iteration: 412344
loss: 0.9922491908073425,grad_norm: 0.9999992773898309, iteration: 412345
loss: 0.9856089353561401,grad_norm: 0.8111316711874573, iteration: 412346
loss: 1.0258302688598633,grad_norm: 0.6846468828322774, iteration: 412347
loss: 1.0089857578277588,grad_norm: 0.7336137824732664, iteration: 412348
loss: 0.9681609869003296,grad_norm: 0.7872285135691524, iteration: 412349
loss: 1.0012880563735962,grad_norm: 0.8351693680996656, iteration: 412350
loss: 0.98366379737854,grad_norm: 0.7941641526425008, iteration: 412351
loss: 0.9754605293273926,grad_norm: 0.6631803452331141, iteration: 412352
loss: 0.9680188894271851,grad_norm: 0.8565515153250883, iteration: 412353
loss: 0.9946314096450806,grad_norm: 0.7178707335291509, iteration: 412354
loss: 0.9706310033798218,grad_norm: 0.8259102568018107, iteration: 412355
loss: 1.0191866159439087,grad_norm: 0.8894879149794771, iteration: 412356
loss: 0.9656506776809692,grad_norm: 0.8570357675491631, iteration: 412357
loss: 1.0315289497375488,grad_norm: 0.8919597611345088, iteration: 412358
loss: 0.9924659132957458,grad_norm: 0.9999995378288956, iteration: 412359
loss: 0.9936347007751465,grad_norm: 0.7939473504048055, iteration: 412360
loss: 0.9698891639709473,grad_norm: 0.7554553586299134, iteration: 412361
loss: 0.9922703504562378,grad_norm: 0.84862302098926, iteration: 412362
loss: 1.0999757051467896,grad_norm: 0.8391837713330651, iteration: 412363
loss: 1.102182388305664,grad_norm: 0.9102185951571815, iteration: 412364
loss: 1.0445232391357422,grad_norm: 0.937704947112071, iteration: 412365
loss: 0.9593538641929626,grad_norm: 0.8608946957851951, iteration: 412366
loss: 1.1144111156463623,grad_norm: 0.9999998048056805, iteration: 412367
loss: 0.9926519989967346,grad_norm: 0.8457106509826855, iteration: 412368
loss: 0.9657658934593201,grad_norm: 0.6933188878894664, iteration: 412369
loss: 0.9725968241691589,grad_norm: 0.9859445504147706, iteration: 412370
loss: 1.01323664188385,grad_norm: 0.889834628447801, iteration: 412371
loss: 1.1640772819519043,grad_norm: 0.9999999034618949, iteration: 412372
loss: 1.0387917757034302,grad_norm: 1.0000000122373731, iteration: 412373
loss: 0.9861558079719543,grad_norm: 0.7597300714188463, iteration: 412374
loss: 1.0289793014526367,grad_norm: 0.7509095520616721, iteration: 412375
loss: 0.9690020084381104,grad_norm: 0.7228870255047095, iteration: 412376
loss: 1.0162992477416992,grad_norm: 0.7234110830399285, iteration: 412377
loss: 0.9973505735397339,grad_norm: 0.9133802102345895, iteration: 412378
loss: 1.024359941482544,grad_norm: 0.8029995158717221, iteration: 412379
loss: 1.03911292552948,grad_norm: 0.9999994932215645, iteration: 412380
loss: 1.0119904279708862,grad_norm: 0.9999992227859797, iteration: 412381
loss: 1.0067251920700073,grad_norm: 0.8246696274719281, iteration: 412382
loss: 1.116849422454834,grad_norm: 0.9999993997534838, iteration: 412383
loss: 1.0123248100280762,grad_norm: 0.7808303159687259, iteration: 412384
loss: 1.017248272895813,grad_norm: 0.6885098543411374, iteration: 412385
loss: 0.9983041882514954,grad_norm: 0.8729165602585419, iteration: 412386
loss: 1.1278727054595947,grad_norm: 0.9999989378347564, iteration: 412387
loss: 1.011721134185791,grad_norm: 0.7959588233020236, iteration: 412388
loss: 1.0814231634140015,grad_norm: 0.9999995716547252, iteration: 412389
loss: 1.0210838317871094,grad_norm: 0.7899143588503479, iteration: 412390
loss: 0.958397388458252,grad_norm: 0.946087899447087, iteration: 412391
loss: 0.9773603081703186,grad_norm: 0.8108806248466186, iteration: 412392
loss: 0.9845989346504211,grad_norm: 0.8775853186493248, iteration: 412393
loss: 1.0451940298080444,grad_norm: 0.9999996413925094, iteration: 412394
loss: 1.0403860807418823,grad_norm: 0.9999998491449105, iteration: 412395
loss: 1.0302413702011108,grad_norm: 0.8455578719100179, iteration: 412396
loss: 1.0476610660552979,grad_norm: 0.8154689705577743, iteration: 412397
loss: 1.0205144882202148,grad_norm: 0.7757329098753123, iteration: 412398
loss: 1.0089576244354248,grad_norm: 0.9999996904931594, iteration: 412399
loss: 1.0156075954437256,grad_norm: 0.7046761305718533, iteration: 412400
loss: 0.9826167225837708,grad_norm: 0.8119794503228492, iteration: 412401
loss: 1.0594069957733154,grad_norm: 0.9999993069443507, iteration: 412402
loss: 1.0067698955535889,grad_norm: 0.7198329482883794, iteration: 412403
loss: 0.9859636425971985,grad_norm: 0.7647834775541169, iteration: 412404
loss: 1.0124306678771973,grad_norm: 0.7775446550362515, iteration: 412405
loss: 0.9761461019515991,grad_norm: 0.9271134117730249, iteration: 412406
loss: 0.9948618412017822,grad_norm: 0.8497683933468959, iteration: 412407
loss: 1.0552793741226196,grad_norm: 0.9999999015208908, iteration: 412408
loss: 1.0132417678833008,grad_norm: 0.9999999298966175, iteration: 412409
loss: 1.0255321264266968,grad_norm: 0.8785852563616008, iteration: 412410
loss: 0.9911779761314392,grad_norm: 0.7262001435117761, iteration: 412411
loss: 0.9814103245735168,grad_norm: 0.9999991021294574, iteration: 412412
loss: 0.9799841046333313,grad_norm: 0.7603044717950294, iteration: 412413
loss: 0.9892852902412415,grad_norm: 0.9016076917841536, iteration: 412414
loss: 1.0395115613937378,grad_norm: 0.7596109727021829, iteration: 412415
loss: 0.9753478169441223,grad_norm: 0.7370013809652977, iteration: 412416
loss: 1.0098744630813599,grad_norm: 0.8628242166358249, iteration: 412417
loss: 1.0562678575515747,grad_norm: 0.943308767204548, iteration: 412418
loss: 1.0160119533538818,grad_norm: 0.8225582830057467, iteration: 412419
loss: 0.9657189846038818,grad_norm: 0.788888610296535, iteration: 412420
loss: 1.0109221935272217,grad_norm: 0.954066265111355, iteration: 412421
loss: 1.0529340505599976,grad_norm: 0.999999966958797, iteration: 412422
loss: 1.0562639236450195,grad_norm: 1.0000000592146847, iteration: 412423
loss: 1.0128605365753174,grad_norm: 0.831855914843724, iteration: 412424
loss: 1.0533736944198608,grad_norm: 0.8594007475063197, iteration: 412425
loss: 0.9751378893852234,grad_norm: 0.7221660042228475, iteration: 412426
loss: 1.0061204433441162,grad_norm: 0.8021948273897113, iteration: 412427
loss: 1.0187742710113525,grad_norm: 0.7261327133584714, iteration: 412428
loss: 1.019737958908081,grad_norm: 0.6497038429025561, iteration: 412429
loss: 1.0735503435134888,grad_norm: 0.9737926304896501, iteration: 412430
loss: 1.0162231922149658,grad_norm: 0.9999992070831752, iteration: 412431
loss: 0.9803386330604553,grad_norm: 0.7120922140137065, iteration: 412432
loss: 1.014318823814392,grad_norm: 0.7145302735104757, iteration: 412433
loss: 1.011004090309143,grad_norm: 0.7186033862914077, iteration: 412434
loss: 1.0008777379989624,grad_norm: 0.896353913156234, iteration: 412435
loss: 1.0529807806015015,grad_norm: 0.9999998761022996, iteration: 412436
loss: 1.0074321031570435,grad_norm: 0.7736834309800227, iteration: 412437
loss: 1.0213682651519775,grad_norm: 0.8420355312243751, iteration: 412438
loss: 1.0884445905685425,grad_norm: 0.9999998785089356, iteration: 412439
loss: 0.992883563041687,grad_norm: 0.8491470014966609, iteration: 412440
loss: 1.094904899597168,grad_norm: 0.9999993123056063, iteration: 412441
loss: 1.0020557641983032,grad_norm: 0.7147074748002877, iteration: 412442
loss: 1.0210564136505127,grad_norm: 0.8070637772330352, iteration: 412443
loss: 1.0451865196228027,grad_norm: 0.9999991889679697, iteration: 412444
loss: 0.9871781468391418,grad_norm: 0.6823459480586853, iteration: 412445
loss: 1.0330628156661987,grad_norm: 0.7075160737574265, iteration: 412446
loss: 0.991142213344574,grad_norm: 0.7630308668904259, iteration: 412447
loss: 1.0269001722335815,grad_norm: 0.9999994448212665, iteration: 412448
loss: 0.9591589570045471,grad_norm: 0.7884395947731379, iteration: 412449
loss: 0.9529447555541992,grad_norm: 0.6795580145512862, iteration: 412450
loss: 1.0438621044158936,grad_norm: 0.728592325418673, iteration: 412451
loss: 1.0144914388656616,grad_norm: 0.6657437101313228, iteration: 412452
loss: 0.9963318109512329,grad_norm: 0.7641010842216746, iteration: 412453
loss: 0.9997865557670593,grad_norm: 0.7047443087470564, iteration: 412454
loss: 0.9835236072540283,grad_norm: 0.8107858261670553, iteration: 412455
loss: 0.9934473633766174,grad_norm: 0.9999999610603593, iteration: 412456
loss: 0.9899203181266785,grad_norm: 0.7684272332337192, iteration: 412457
loss: 0.9745520949363708,grad_norm: 0.7468924505192551, iteration: 412458
loss: 0.9992212057113647,grad_norm: 0.8510933098694405, iteration: 412459
loss: 0.987527072429657,grad_norm: 0.7346888210013299, iteration: 412460
loss: 1.026676058769226,grad_norm: 0.7688655951238387, iteration: 412461
loss: 1.0029571056365967,grad_norm: 0.7266947397794711, iteration: 412462
loss: 1.0502700805664062,grad_norm: 0.8460801628678496, iteration: 412463
loss: 0.9851431846618652,grad_norm: 0.7678943778605628, iteration: 412464
loss: 1.032760739326477,grad_norm: 0.7719480269518275, iteration: 412465
loss: 1.0483624935150146,grad_norm: 0.762466132320763, iteration: 412466
loss: 1.0272376537322998,grad_norm: 0.9088246626898683, iteration: 412467
loss: 0.9878425002098083,grad_norm: 0.7342435991504467, iteration: 412468
loss: 1.0197060108184814,grad_norm: 0.7982399241467205, iteration: 412469
loss: 1.0157095193862915,grad_norm: 0.941462000102498, iteration: 412470
loss: 0.9954955577850342,grad_norm: 0.7948386227129103, iteration: 412471
loss: 1.0052235126495361,grad_norm: 0.882366202722994, iteration: 412472
loss: 1.0042027235031128,grad_norm: 0.9431037410000807, iteration: 412473
loss: 1.0151740312576294,grad_norm: 0.6715275488906538, iteration: 412474
loss: 0.9979296922683716,grad_norm: 0.8363500327807426, iteration: 412475
loss: 0.9777946472167969,grad_norm: 0.6914381294192149, iteration: 412476
loss: 1.049894094467163,grad_norm: 0.6591650652526866, iteration: 412477
loss: 0.9780590534210205,grad_norm: 0.7487972158214877, iteration: 412478
loss: 1.0295037031173706,grad_norm: 0.8530864672571328, iteration: 412479
loss: 0.9932213425636292,grad_norm: 0.7278101728065146, iteration: 412480
loss: 0.99336177110672,grad_norm: 0.7285474729347742, iteration: 412481
loss: 0.9949561953544617,grad_norm: 0.8055223425189136, iteration: 412482
loss: 0.9637802243232727,grad_norm: 0.746137465163713, iteration: 412483
loss: 1.034908413887024,grad_norm: 1.0000000430705405, iteration: 412484
loss: 1.0026464462280273,grad_norm: 0.8808741827410189, iteration: 412485
loss: 1.0616902112960815,grad_norm: 0.9999998418104649, iteration: 412486
loss: 1.0117148160934448,grad_norm: 0.8447018605553773, iteration: 412487
loss: 1.025556206703186,grad_norm: 0.6714167951757674, iteration: 412488
loss: 0.9909632205963135,grad_norm: 0.8635385157349176, iteration: 412489
loss: 0.9961933493614197,grad_norm: 0.7264269956373453, iteration: 412490
loss: 0.9752639532089233,grad_norm: 0.7624549487894853, iteration: 412491
loss: 0.9999611377716064,grad_norm: 0.8392778848065727, iteration: 412492
loss: 1.016813039779663,grad_norm: 0.7037634786499936, iteration: 412493
loss: 0.9656357765197754,grad_norm: 0.6437793689937722, iteration: 412494
loss: 1.0126162767410278,grad_norm: 0.9999993668208819, iteration: 412495
loss: 1.0123041868209839,grad_norm: 0.8202271035426019, iteration: 412496
loss: 0.9918659925460815,grad_norm: 0.6937576909724948, iteration: 412497
loss: 0.9892825484275818,grad_norm: 0.9999997695810511, iteration: 412498
loss: 1.038575291633606,grad_norm: 0.9999999118705121, iteration: 412499
loss: 0.9866252541542053,grad_norm: 0.8188840785978855, iteration: 412500
loss: 1.042331576347351,grad_norm: 0.9999998844863929, iteration: 412501
loss: 0.9950926303863525,grad_norm: 0.7085696649842035, iteration: 412502
loss: 0.9965585470199585,grad_norm: 0.6807787798339291, iteration: 412503
loss: 0.9912635684013367,grad_norm: 0.7154562667074037, iteration: 412504
loss: 0.9832331538200378,grad_norm: 0.7211408673855016, iteration: 412505
loss: 0.9771637916564941,grad_norm: 0.8183738110438206, iteration: 412506
loss: 1.1163004636764526,grad_norm: 0.9999990418984116, iteration: 412507
loss: 0.9726269841194153,grad_norm: 0.7072150925831848, iteration: 412508
loss: 1.0092259645462036,grad_norm: 0.8253586778125063, iteration: 412509
loss: 1.0973325967788696,grad_norm: 0.9999991787448544, iteration: 412510
loss: 0.9962486028671265,grad_norm: 0.7952054574223881, iteration: 412511
loss: 1.0027369260787964,grad_norm: 0.6660411629819545, iteration: 412512
loss: 0.9525772333145142,grad_norm: 0.8413389460546091, iteration: 412513
loss: 1.000763177871704,grad_norm: 0.7583751001288033, iteration: 412514
loss: 0.9745094776153564,grad_norm: 0.7638500064296884, iteration: 412515
loss: 1.0168343782424927,grad_norm: 0.8489075308084143, iteration: 412516
loss: 0.9782779216766357,grad_norm: 0.7756677774798398, iteration: 412517
loss: 1.0090652704238892,grad_norm: 0.8218656033235188, iteration: 412518
loss: 1.0234185457229614,grad_norm: 0.7973661718241676, iteration: 412519
loss: 1.0255719423294067,grad_norm: 0.9999990180819631, iteration: 412520
loss: 1.0033109188079834,grad_norm: 0.7433221684752446, iteration: 412521
loss: 0.9837138652801514,grad_norm: 0.7945856770909931, iteration: 412522
loss: 0.9783938527107239,grad_norm: 0.6425580071352627, iteration: 412523
loss: 0.990479052066803,grad_norm: 0.7051622288522622, iteration: 412524
loss: 0.9895254969596863,grad_norm: 0.8558990750057164, iteration: 412525
loss: 0.9974287748336792,grad_norm: 0.7068685947909225, iteration: 412526
loss: 0.9874266982078552,grad_norm: 0.999999453795812, iteration: 412527
loss: 0.9950195550918579,grad_norm: 0.7537768766939508, iteration: 412528
loss: 0.9942814111709595,grad_norm: 0.80598766792995, iteration: 412529
loss: 0.9769666790962219,grad_norm: 0.6695611804142881, iteration: 412530
loss: 1.2090392112731934,grad_norm: 1.0000000185370626, iteration: 412531
loss: 1.0396592617034912,grad_norm: 0.9041068460399138, iteration: 412532
loss: 0.998620331287384,grad_norm: 0.862872820485686, iteration: 412533
loss: 1.021297812461853,grad_norm: 0.7425072266574789, iteration: 412534
loss: 1.0354880094528198,grad_norm: 0.7897544584897804, iteration: 412535
loss: 0.9885218739509583,grad_norm: 0.8944710564866395, iteration: 412536
loss: 1.009469985961914,grad_norm: 0.7720422287028209, iteration: 412537
loss: 0.9933117628097534,grad_norm: 0.9144661803968409, iteration: 412538
loss: 0.9680593013763428,grad_norm: 0.6960914630355974, iteration: 412539
loss: 1.0325671434402466,grad_norm: 0.7444694099012312, iteration: 412540
loss: 1.0134165287017822,grad_norm: 0.8027104980895254, iteration: 412541
loss: 1.0092312097549438,grad_norm: 0.7022342812925392, iteration: 412542
loss: 0.9831218123435974,grad_norm: 0.7174035646330427, iteration: 412543
loss: 0.9919087290763855,grad_norm: 0.7310495869736956, iteration: 412544
loss: 1.0304319858551025,grad_norm: 0.8626238686631016, iteration: 412545
loss: 1.0222607851028442,grad_norm: 0.8073787183888952, iteration: 412546
loss: 1.00034499168396,grad_norm: 0.6643530206268065, iteration: 412547
loss: 1.030333161354065,grad_norm: 0.7630128519472557, iteration: 412548
loss: 1.0098594427108765,grad_norm: 0.8702481865592921, iteration: 412549
loss: 1.018782377243042,grad_norm: 0.9999993277893128, iteration: 412550
loss: 1.0023784637451172,grad_norm: 0.7719825533075696, iteration: 412551
loss: 1.0272390842437744,grad_norm: 0.8146564968707845, iteration: 412552
loss: 0.9787351489067078,grad_norm: 0.8182979361567645, iteration: 412553
loss: 1.039222002029419,grad_norm: 0.8007314663008622, iteration: 412554
loss: 0.9772700071334839,grad_norm: 0.7909533816686026, iteration: 412555
loss: 1.0483876466751099,grad_norm: 0.7291299080342001, iteration: 412556
loss: 1.0062681436538696,grad_norm: 0.7728643640217681, iteration: 412557
loss: 1.0191727876663208,grad_norm: 0.685391728249158, iteration: 412558
loss: 1.0214413404464722,grad_norm: 0.7221492381773241, iteration: 412559
loss: 0.9886505603790283,grad_norm: 0.7618065398031847, iteration: 412560
loss: 0.9935711026191711,grad_norm: 0.9052537165787498, iteration: 412561
loss: 0.9746323227882385,grad_norm: 0.6917045409783146, iteration: 412562
loss: 1.0003554821014404,grad_norm: 0.7390925147654849, iteration: 412563
loss: 0.9768145084381104,grad_norm: 0.9137902081870704, iteration: 412564
loss: 1.027780532836914,grad_norm: 0.6980168235087654, iteration: 412565
loss: 0.9869673252105713,grad_norm: 0.7673063365512754, iteration: 412566
loss: 1.001752257347107,grad_norm: 0.760894728719069, iteration: 412567
loss: 1.0040770769119263,grad_norm: 0.87044995650518, iteration: 412568
loss: 1.0116161108016968,grad_norm: 0.9999999050296783, iteration: 412569
loss: 1.00016450881958,grad_norm: 0.7568307220437644, iteration: 412570
loss: 1.0155373811721802,grad_norm: 0.695784766009906, iteration: 412571
loss: 1.0221399068832397,grad_norm: 0.8477715110637156, iteration: 412572
loss: 0.9759635925292969,grad_norm: 0.9999990701397724, iteration: 412573
loss: 1.0103009939193726,grad_norm: 0.8004270462817733, iteration: 412574
loss: 0.9546120166778564,grad_norm: 0.7720294888532605, iteration: 412575
loss: 1.0000134706497192,grad_norm: 0.8387961141149861, iteration: 412576
loss: 0.9546987414360046,grad_norm: 0.8212997471528752, iteration: 412577
loss: 1.0256839990615845,grad_norm: 0.8348680106970551, iteration: 412578
loss: 0.9883825778961182,grad_norm: 0.748504631237643, iteration: 412579
loss: 0.9890803098678589,grad_norm: 0.7395780686530982, iteration: 412580
loss: 1.0113540887832642,grad_norm: 0.9206900780722799, iteration: 412581
loss: 1.02317214012146,grad_norm: 0.6573038197027056, iteration: 412582
loss: 1.046539068222046,grad_norm: 0.7440438159351399, iteration: 412583
loss: 1.0048391819000244,grad_norm: 0.7837246475527677, iteration: 412584
loss: 0.9977807402610779,grad_norm: 0.6822010557059334, iteration: 412585
loss: 1.0226643085479736,grad_norm: 0.9999994273746221, iteration: 412586
loss: 1.0309667587280273,grad_norm: 0.8307583012302389, iteration: 412587
loss: 0.9673951268196106,grad_norm: 0.7312577497285563, iteration: 412588
loss: 0.9831519722938538,grad_norm: 0.7808965267922068, iteration: 412589
loss: 1.004588007926941,grad_norm: 0.9999994766872845, iteration: 412590
loss: 1.054412841796875,grad_norm: 0.9999991354263545, iteration: 412591
loss: 0.963763952255249,grad_norm: 0.6973939107954137, iteration: 412592
loss: 0.9868292808532715,grad_norm: 0.8510739694625314, iteration: 412593
loss: 1.0129493474960327,grad_norm: 0.8767391526370892, iteration: 412594
loss: 1.0234451293945312,grad_norm: 0.8760959886890589, iteration: 412595
loss: 1.0199624300003052,grad_norm: 0.807193045397402, iteration: 412596
loss: 1.0069855451583862,grad_norm: 0.8015056041790596, iteration: 412597
loss: 0.9645706415176392,grad_norm: 0.7752839325261417, iteration: 412598
loss: 0.9994772672653198,grad_norm: 0.9999996973261289, iteration: 412599
loss: 0.9893495440483093,grad_norm: 0.6156725926737623, iteration: 412600
loss: 1.0108258724212646,grad_norm: 0.9421754347358027, iteration: 412601
loss: 1.018372654914856,grad_norm: 0.999999196825542, iteration: 412602
loss: 0.9468801021575928,grad_norm: 0.7993461714485172, iteration: 412603
loss: 1.0347082614898682,grad_norm: 0.8677834260882645, iteration: 412604
loss: 0.9497146010398865,grad_norm: 0.9056339678743426, iteration: 412605
loss: 1.0264874696731567,grad_norm: 0.7267349077537495, iteration: 412606
loss: 0.9923917055130005,grad_norm: 0.7462315604179866, iteration: 412607
loss: 1.0187387466430664,grad_norm: 0.6452594645896297, iteration: 412608
loss: 1.0065879821777344,grad_norm: 0.7883430891729537, iteration: 412609
loss: 1.0216208696365356,grad_norm: 0.8782520512158372, iteration: 412610
loss: 0.979061484336853,grad_norm: 0.6691511978931317, iteration: 412611
loss: 1.00886070728302,grad_norm: 0.8298837912908128, iteration: 412612
loss: 1.0123134851455688,grad_norm: 0.8647671979199446, iteration: 412613
loss: 0.9772810935974121,grad_norm: 0.6988272821186542, iteration: 412614
loss: 1.003015160560608,grad_norm: 0.7474627768213832, iteration: 412615
loss: 1.010489821434021,grad_norm: 0.6453615052912292, iteration: 412616
loss: 1.0154122114181519,grad_norm: 0.7633701537784748, iteration: 412617
loss: 1.0703368186950684,grad_norm: 0.9999991471319284, iteration: 412618
loss: 0.9968993067741394,grad_norm: 0.8585435197295774, iteration: 412619
loss: 0.9989542365074158,grad_norm: 0.7794920355647134, iteration: 412620
loss: 1.050262451171875,grad_norm: 0.9999997492272075, iteration: 412621
loss: 1.0315494537353516,grad_norm: 0.8324857004262639, iteration: 412622
loss: 1.032875895500183,grad_norm: 0.8232295115972698, iteration: 412623
loss: 0.9974656701087952,grad_norm: 0.8213832421768111, iteration: 412624
loss: 0.9836688041687012,grad_norm: 0.7043441550002174, iteration: 412625
loss: 0.9977770447731018,grad_norm: 0.8058271601340197, iteration: 412626
loss: 0.9874463677406311,grad_norm: 0.6913092449487309, iteration: 412627
loss: 0.978540301322937,grad_norm: 0.7787451755180902, iteration: 412628
loss: 1.175403356552124,grad_norm: 0.9999991142532709, iteration: 412629
loss: 0.9662009477615356,grad_norm: 0.6874263668459482, iteration: 412630
loss: 1.0340595245361328,grad_norm: 0.8104651704394628, iteration: 412631
loss: 1.0331026315689087,grad_norm: 0.7718071551378828, iteration: 412632
loss: 0.9774351119995117,grad_norm: 0.6669265457552339, iteration: 412633
loss: 0.9504360556602478,grad_norm: 0.8383285083653105, iteration: 412634
loss: 0.9889622330665588,grad_norm: 0.7275121346613324, iteration: 412635
loss: 1.0291529893875122,grad_norm: 0.847918335885552, iteration: 412636
loss: 1.0090715885162354,grad_norm: 0.787378041258601, iteration: 412637
loss: 1.0245457887649536,grad_norm: 0.7886456506453771, iteration: 412638
loss: 0.9715638160705566,grad_norm: 0.9166270795389166, iteration: 412639
loss: 0.9652119874954224,grad_norm: 0.6974475754892, iteration: 412640
loss: 0.967684805393219,grad_norm: 0.8788665733885007, iteration: 412641
loss: 1.0036125183105469,grad_norm: 0.8865665111946233, iteration: 412642
loss: 1.00302255153656,grad_norm: 0.8326759773888431, iteration: 412643
loss: 1.0202393531799316,grad_norm: 0.7870058030910627, iteration: 412644
loss: 1.0838265419006348,grad_norm: 0.9999995538036313, iteration: 412645
loss: 0.9846763610839844,grad_norm: 0.8555180998599671, iteration: 412646
loss: 1.0151686668395996,grad_norm: 0.7835075608985872, iteration: 412647
loss: 1.0033124685287476,grad_norm: 0.9025349411431701, iteration: 412648
loss: 0.9822757840156555,grad_norm: 0.7592749918312394, iteration: 412649
loss: 1.1066590547561646,grad_norm: 1.0000000325175438, iteration: 412650
loss: 0.990714430809021,grad_norm: 0.6669198507652034, iteration: 412651
loss: 1.015288233757019,grad_norm: 0.803603983321727, iteration: 412652
loss: 1.0310777425765991,grad_norm: 0.9999993206668218, iteration: 412653
loss: 1.0108957290649414,grad_norm: 0.8404333287065257, iteration: 412654
loss: 0.9889301061630249,grad_norm: 1.0000000501416832, iteration: 412655
loss: 1.0841443538665771,grad_norm: 0.9999991231595297, iteration: 412656
loss: 1.0155754089355469,grad_norm: 0.7459753888263206, iteration: 412657
loss: 1.056258201599121,grad_norm: 0.9125808184953961, iteration: 412658
loss: 1.000185489654541,grad_norm: 0.8391936305263266, iteration: 412659
loss: 1.0087512731552124,grad_norm: 0.8562809315655139, iteration: 412660
loss: 0.9977293610572815,grad_norm: 0.7998768307180742, iteration: 412661
loss: 1.0067681074142456,grad_norm: 0.8328166188076137, iteration: 412662
loss: 1.0319206714630127,grad_norm: 0.7632580001585265, iteration: 412663
loss: 0.9439826607704163,grad_norm: 0.810235150526414, iteration: 412664
loss: 1.045544147491455,grad_norm: 0.8066930834485612, iteration: 412665
loss: 1.0199681520462036,grad_norm: 0.9999991142394137, iteration: 412666
loss: 1.0138410329818726,grad_norm: 0.7823480073800134, iteration: 412667
loss: 0.996829628944397,grad_norm: 0.6594885482309082, iteration: 412668
loss: 0.9909631013870239,grad_norm: 0.8417381934832705, iteration: 412669
loss: 0.9976087808609009,grad_norm: 0.7233456893259899, iteration: 412670
loss: 1.0025254487991333,grad_norm: 0.999999738250874, iteration: 412671
loss: 1.030960202217102,grad_norm: 0.9999990054037682, iteration: 412672
loss: 1.0033186674118042,grad_norm: 0.7698315202056213, iteration: 412673
loss: 0.9790492057800293,grad_norm: 0.7518234998361077, iteration: 412674
loss: 1.0174074172973633,grad_norm: 0.8297597063295842, iteration: 412675
loss: 1.0025137662887573,grad_norm: 0.9300080243948042, iteration: 412676
loss: 0.9885482788085938,grad_norm: 0.7812371852021844, iteration: 412677
loss: 1.005653977394104,grad_norm: 0.7782292454468709, iteration: 412678
loss: 0.9851857423782349,grad_norm: 0.9999993652500649, iteration: 412679
loss: 0.98534095287323,grad_norm: 0.732323241924203, iteration: 412680
loss: 1.0100549459457397,grad_norm: 0.7567323465071323, iteration: 412681
loss: 0.999212384223938,grad_norm: 0.9585640145187, iteration: 412682
loss: 0.9882540106773376,grad_norm: 0.6803794677398068, iteration: 412683
loss: 1.1378436088562012,grad_norm: 0.8906963125590749, iteration: 412684
loss: 0.9905502200126648,grad_norm: 0.7465432122026828, iteration: 412685
loss: 1.0463457107543945,grad_norm: 0.9999999400924086, iteration: 412686
loss: 1.004320740699768,grad_norm: 0.7901088818984883, iteration: 412687
loss: 1.0185648202896118,grad_norm: 0.7803294039866068, iteration: 412688
loss: 1.0250667333602905,grad_norm: 0.7323383268441019, iteration: 412689
loss: 1.0538240671157837,grad_norm: 0.999999955143618, iteration: 412690
loss: 0.9675470590591431,grad_norm: 0.8353677518805495, iteration: 412691
loss: 1.0350464582443237,grad_norm: 0.9999995427819439, iteration: 412692
loss: 0.9753079414367676,grad_norm: 0.7262410967655195, iteration: 412693
loss: 0.9569799900054932,grad_norm: 0.7202555960423755, iteration: 412694
loss: 0.9840794205665588,grad_norm: 0.9546385755950305, iteration: 412695
loss: 0.9905107617378235,grad_norm: 0.9999991552024091, iteration: 412696
loss: 1.0039633512496948,grad_norm: 0.7312640685446653, iteration: 412697
loss: 0.9915188550949097,grad_norm: 0.7015363367592499, iteration: 412698
loss: 1.0181734561920166,grad_norm: 0.9155842679441991, iteration: 412699
loss: 0.9606149196624756,grad_norm: 0.6363202530785177, iteration: 412700
loss: 0.9993396401405334,grad_norm: 0.6628245028787069, iteration: 412701
loss: 1.0002082586288452,grad_norm: 0.9277718450997948, iteration: 412702
loss: 1.0365173816680908,grad_norm: 0.7982858492373925, iteration: 412703
loss: 0.9909302592277527,grad_norm: 0.7123245227557116, iteration: 412704
loss: 1.028581142425537,grad_norm: 0.8612573389419116, iteration: 412705
loss: 1.0115488767623901,grad_norm: 0.8831526942775619, iteration: 412706
loss: 0.986914336681366,grad_norm: 0.8134155668198291, iteration: 412707
loss: 1.0156632661819458,grad_norm: 0.638919743905282, iteration: 412708
loss: 1.0803744792938232,grad_norm: 1.000000000595077, iteration: 412709
loss: 1.0152474641799927,grad_norm: 0.9175790037186173, iteration: 412710
loss: 1.0008751153945923,grad_norm: 0.7188605807958405, iteration: 412711
loss: 1.0514494180679321,grad_norm: 0.999999364287394, iteration: 412712
loss: 1.0444284677505493,grad_norm: 0.7671868603068166, iteration: 412713
loss: 1.0129588842391968,grad_norm: 0.9999991200109293, iteration: 412714
loss: 0.9812633395195007,grad_norm: 0.9999990278749297, iteration: 412715
loss: 1.002776026725769,grad_norm: 0.8822704790107851, iteration: 412716
loss: 1.001726508140564,grad_norm: 0.9515636540079906, iteration: 412717
loss: 0.9807971119880676,grad_norm: 0.8170021838200736, iteration: 412718
loss: 1.0499030351638794,grad_norm: 0.8634890521163883, iteration: 412719
loss: 1.0089783668518066,grad_norm: 0.9999999345190647, iteration: 412720
loss: 1.0040141344070435,grad_norm: 0.8536025230817426, iteration: 412721
loss: 1.0259486436843872,grad_norm: 0.9999990801772465, iteration: 412722
loss: 1.0335724353790283,grad_norm: 0.8431671677265169, iteration: 412723
loss: 0.979829728603363,grad_norm: 0.9872443334586124, iteration: 412724
loss: 1.0173536539077759,grad_norm: 0.8607474953305143, iteration: 412725
loss: 1.0442869663238525,grad_norm: 0.6897668678112469, iteration: 412726
loss: 0.9903780221939087,grad_norm: 0.8035440911644025, iteration: 412727
loss: 1.0082876682281494,grad_norm: 0.7270616596535294, iteration: 412728
loss: 0.9996000528335571,grad_norm: 0.7821621762295232, iteration: 412729
loss: 1.0122495889663696,grad_norm: 0.8608770912798873, iteration: 412730
loss: 1.0424416065216064,grad_norm: 0.999999138919457, iteration: 412731
loss: 1.0960826873779297,grad_norm: 0.9999997369608469, iteration: 412732
loss: 0.9765954613685608,grad_norm: 0.7459087658885132, iteration: 412733
loss: 0.9950852394104004,grad_norm: 0.7593905517344116, iteration: 412734
loss: 0.9638410806655884,grad_norm: 0.7007918098076258, iteration: 412735
loss: 1.0599888563156128,grad_norm: 0.8696400402352733, iteration: 412736
loss: 1.1298227310180664,grad_norm: 0.9267002863336231, iteration: 412737
loss: 1.0147991180419922,grad_norm: 1.000000031305368, iteration: 412738
loss: 1.0189834833145142,grad_norm: 0.7788195562305621, iteration: 412739
loss: 1.0152429342269897,grad_norm: 0.9999997187021658, iteration: 412740
loss: 1.0202445983886719,grad_norm: 0.8687831646079688, iteration: 412741
loss: 1.0155425071716309,grad_norm: 0.978498463355382, iteration: 412742
loss: 0.9899663329124451,grad_norm: 0.9481757445666513, iteration: 412743
loss: 0.9768739938735962,grad_norm: 0.8020212732142337, iteration: 412744
loss: 1.046181082725525,grad_norm: 0.9999997410391164, iteration: 412745
loss: 1.0230438709259033,grad_norm: 0.9999995431937815, iteration: 412746
loss: 1.0324082374572754,grad_norm: 0.8910479219961371, iteration: 412747
loss: 1.0008869171142578,grad_norm: 0.7042824936387606, iteration: 412748
loss: 1.0248421430587769,grad_norm: 0.896274206105826, iteration: 412749
loss: 1.0414271354675293,grad_norm: 0.8630397646225464, iteration: 412750
loss: 1.009952187538147,grad_norm: 0.8666850841682249, iteration: 412751
loss: 1.0229167938232422,grad_norm: 0.925948045128809, iteration: 412752
loss: 0.9954251050949097,grad_norm: 0.8582242970365921, iteration: 412753
loss: 0.9985510110855103,grad_norm: 0.8119264095566116, iteration: 412754
loss: 0.9858093857765198,grad_norm: 0.8710709998582546, iteration: 412755
loss: 1.090643048286438,grad_norm: 0.9999995033546161, iteration: 412756
loss: 0.9896948933601379,grad_norm: 0.8774266845727124, iteration: 412757
loss: 0.9689307808876038,grad_norm: 0.7274045681037994, iteration: 412758
loss: 1.0073764324188232,grad_norm: 0.6627932268734072, iteration: 412759
loss: 1.022983193397522,grad_norm: 0.7399721098669619, iteration: 412760
loss: 0.9858190417289734,grad_norm: 0.9101086084283463, iteration: 412761
loss: 0.9784497022628784,grad_norm: 0.8578410468647126, iteration: 412762
loss: 1.0407086610794067,grad_norm: 1.0000000548790289, iteration: 412763
loss: 1.0171507596969604,grad_norm: 0.7702032069248426, iteration: 412764
loss: 1.1632859706878662,grad_norm: 0.9999994971419682, iteration: 412765
loss: 1.020934820175171,grad_norm: 0.8833384137542841, iteration: 412766
loss: 1.1820037364959717,grad_norm: 0.9999995280545332, iteration: 412767
loss: 0.9726918935775757,grad_norm: 0.8558744282234183, iteration: 412768
loss: 0.9920310378074646,grad_norm: 0.8176737175231317, iteration: 412769
loss: 1.0338995456695557,grad_norm: 0.9999994093317601, iteration: 412770
loss: 1.0142978429794312,grad_norm: 0.999999931797711, iteration: 412771
loss: 1.0052647590637207,grad_norm: 0.9999994579755632, iteration: 412772
loss: 1.0190155506134033,grad_norm: 0.9999996582021297, iteration: 412773
loss: 1.0407347679138184,grad_norm: 0.9999996719380136, iteration: 412774
loss: 0.9672368764877319,grad_norm: 0.6859792311049154, iteration: 412775
loss: 0.993101179599762,grad_norm: 0.9999994348435658, iteration: 412776
loss: 0.9994516372680664,grad_norm: 0.8590480639293209, iteration: 412777
loss: 1.0809061527252197,grad_norm: 0.9999998603145559, iteration: 412778
loss: 0.9814654588699341,grad_norm: 0.7892275785474069, iteration: 412779
loss: 1.0754197835922241,grad_norm: 0.9999992607369611, iteration: 412780
loss: 1.203719139099121,grad_norm: 0.9999992786010385, iteration: 412781
loss: 1.1989163160324097,grad_norm: 0.9999991763526148, iteration: 412782
loss: 1.041654348373413,grad_norm: 0.7783849317781832, iteration: 412783
loss: 1.0031408071517944,grad_norm: 0.8874361312426148, iteration: 412784
loss: 0.9970964789390564,grad_norm: 0.8570188090921007, iteration: 412785
loss: 1.0012894868850708,grad_norm: 0.7619562222259055, iteration: 412786
loss: 1.1839673519134521,grad_norm: 0.9999999363430796, iteration: 412787
loss: 1.0002856254577637,grad_norm: 0.9279503800217495, iteration: 412788
loss: 0.972175657749176,grad_norm: 0.7569625044158335, iteration: 412789
loss: 0.9553551077842712,grad_norm: 0.8246443183106436, iteration: 412790
loss: 1.0269426107406616,grad_norm: 0.9979047863084599, iteration: 412791
loss: 1.0344080924987793,grad_norm: 0.8438467035378413, iteration: 412792
loss: 0.9974635243415833,grad_norm: 0.7602654075166101, iteration: 412793
loss: 0.9771459698677063,grad_norm: 0.7921920443600489, iteration: 412794
loss: 0.9905049204826355,grad_norm: 0.9505322282936913, iteration: 412795
loss: 0.9998483061790466,grad_norm: 0.9999991265401111, iteration: 412796
loss: 1.0283411741256714,grad_norm: 0.846124255572948, iteration: 412797
loss: 1.0157748460769653,grad_norm: 0.9210762101600912, iteration: 412798
loss: 0.9972583055496216,grad_norm: 0.696869135979095, iteration: 412799
loss: 1.0041499137878418,grad_norm: 0.8269997716808828, iteration: 412800
loss: 0.9876580238342285,grad_norm: 0.7419198939745028, iteration: 412801
loss: 1.0105892419815063,grad_norm: 0.8278441264870369, iteration: 412802
loss: 0.9643178582191467,grad_norm: 0.8119274738863127, iteration: 412803
loss: 0.9961830377578735,grad_norm: 0.7518763929418364, iteration: 412804
loss: 0.9649937152862549,grad_norm: 0.6700749461463006, iteration: 412805
loss: 1.0119081735610962,grad_norm: 0.9999996225974971, iteration: 412806
loss: 1.0122941732406616,grad_norm: 0.7978983281301214, iteration: 412807
loss: 0.9676857590675354,grad_norm: 0.8244760653089926, iteration: 412808
loss: 1.0217106342315674,grad_norm: 0.6826498360000153, iteration: 412809
loss: 1.034687876701355,grad_norm: 0.9999998011248665, iteration: 412810
loss: 1.0576485395431519,grad_norm: 0.9999992067612338, iteration: 412811
loss: 0.9808338284492493,grad_norm: 0.7224078965556222, iteration: 412812
loss: 0.9891257286071777,grad_norm: 0.78820775638214, iteration: 412813
loss: 1.0103508234024048,grad_norm: 0.9999998090927048, iteration: 412814
loss: 0.9890563488006592,grad_norm: 0.8872600306275267, iteration: 412815
loss: 1.001979947090149,grad_norm: 0.7669184022602146, iteration: 412816
loss: 1.1072405576705933,grad_norm: 0.9999991848023784, iteration: 412817
loss: 0.9952158331871033,grad_norm: 0.7124525259919764, iteration: 412818
loss: 0.9960821270942688,grad_norm: 0.7447742524414009, iteration: 412819
loss: 0.9722110629081726,grad_norm: 0.814003734062087, iteration: 412820
loss: 1.004634141921997,grad_norm: 0.7942116297413818, iteration: 412821
loss: 0.985835075378418,grad_norm: 0.8005900618563311, iteration: 412822
loss: 1.0055499076843262,grad_norm: 0.8072261971131215, iteration: 412823
loss: 0.9856793880462646,grad_norm: 0.8642198562771348, iteration: 412824
loss: 0.9796199202537537,grad_norm: 0.7889112831810182, iteration: 412825
loss: 1.042318344116211,grad_norm: 0.9420487624319892, iteration: 412826
loss: 1.0171993970870972,grad_norm: 0.708254543647661, iteration: 412827
loss: 1.013210654258728,grad_norm: 0.9999990731356452, iteration: 412828
loss: 1.0116559267044067,grad_norm: 0.735410134786254, iteration: 412829
loss: 0.9755359888076782,grad_norm: 0.7138544602070824, iteration: 412830
loss: 1.0229120254516602,grad_norm: 0.864323668898867, iteration: 412831
loss: 1.1008293628692627,grad_norm: 0.9999995383764205, iteration: 412832
loss: 1.173405647277832,grad_norm: 0.9699233816225481, iteration: 412833
loss: 1.1180490255355835,grad_norm: 0.9999995719967951, iteration: 412834
loss: 1.0236248970031738,grad_norm: 0.9999990358753038, iteration: 412835
loss: 1.00924551486969,grad_norm: 0.9999997007878316, iteration: 412836
loss: 1.1180665493011475,grad_norm: 0.912449897301809, iteration: 412837
loss: 1.1273386478424072,grad_norm: 0.9999996761064857, iteration: 412838
loss: 0.9802729487419128,grad_norm: 0.7211544412807525, iteration: 412839
loss: 1.224284291267395,grad_norm: 0.9999990773432266, iteration: 412840
loss: 1.2134053707122803,grad_norm: 0.9999992147145216, iteration: 412841
loss: 1.0192252397537231,grad_norm: 0.9999999097078275, iteration: 412842
loss: 1.0650696754455566,grad_norm: 0.8301966903771433, iteration: 412843
loss: 1.0269588232040405,grad_norm: 0.8822695981339705, iteration: 412844
loss: 1.010602593421936,grad_norm: 0.9999996347772171, iteration: 412845
loss: 1.0593091249465942,grad_norm: 1.000000025490342, iteration: 412846
loss: 1.2950924634933472,grad_norm: 0.9999998291372086, iteration: 412847
loss: 0.9939140677452087,grad_norm: 0.7591302964571343, iteration: 412848
loss: 1.0017226934432983,grad_norm: 0.649026196700635, iteration: 412849
loss: 1.0828324556350708,grad_norm: 0.9757673158056687, iteration: 412850
loss: 1.1610463857650757,grad_norm: 0.9999998236014624, iteration: 412851
loss: 1.1526641845703125,grad_norm: 0.9999995453632743, iteration: 412852
loss: 1.0108665227890015,grad_norm: 0.9271143548757784, iteration: 412853
loss: 1.0449210405349731,grad_norm: 0.9713768966315438, iteration: 412854
loss: 1.0070174932479858,grad_norm: 0.8062179618528039, iteration: 412855
loss: 0.9913357496261597,grad_norm: 0.9999991848992769, iteration: 412856
loss: 1.0772249698638916,grad_norm: 0.8475522790808465, iteration: 412857
loss: 1.1433712244033813,grad_norm: 0.9999996453480374, iteration: 412858
loss: 1.0199971199035645,grad_norm: 0.9999991560707937, iteration: 412859
loss: 1.0223803520202637,grad_norm: 0.7621704753446794, iteration: 412860
loss: 1.0923726558685303,grad_norm: 0.9999992223989065, iteration: 412861
loss: 1.119821548461914,grad_norm: 0.9999997714731095, iteration: 412862
loss: 0.9992184042930603,grad_norm: 0.6683096512599888, iteration: 412863
loss: 1.0195682048797607,grad_norm: 0.6051372933080601, iteration: 412864
loss: 1.0915480852127075,grad_norm: 0.827946263296997, iteration: 412865
loss: 1.1395823955535889,grad_norm: 0.9999996350486409, iteration: 412866
loss: 1.0435433387756348,grad_norm: 0.7040519835504504, iteration: 412867
loss: 1.1341358423233032,grad_norm: 0.9999996459593096, iteration: 412868
loss: 1.018096923828125,grad_norm: 0.819088642184229, iteration: 412869
loss: 0.9898220300674438,grad_norm: 0.7237425009779985, iteration: 412870
loss: 1.037941813468933,grad_norm: 0.7460531197336456, iteration: 412871
loss: 1.0417729616165161,grad_norm: 0.9708047599296642, iteration: 412872
loss: 1.0998507738113403,grad_norm: 1.000000040261311, iteration: 412873
loss: 0.9873328804969788,grad_norm: 0.9999997425851, iteration: 412874
loss: 1.0113656520843506,grad_norm: 0.9999997767838672, iteration: 412875
loss: 1.0181983709335327,grad_norm: 0.9999992031597533, iteration: 412876
loss: 1.0148869752883911,grad_norm: 0.9999996047500156, iteration: 412877
loss: 1.049064040184021,grad_norm: 0.7710070420604929, iteration: 412878
loss: 1.0852501392364502,grad_norm: 0.7432077144825358, iteration: 412879
loss: 0.9580910205841064,grad_norm: 0.8548204967148639, iteration: 412880
loss: 1.00248384475708,grad_norm: 0.769174240140956, iteration: 412881
loss: 1.0331614017486572,grad_norm: 0.7392768015843925, iteration: 412882
loss: 1.0610836744308472,grad_norm: 0.9999998909148199, iteration: 412883
loss: 0.9952595829963684,grad_norm: 0.8706738583124617, iteration: 412884
loss: 0.971828043460846,grad_norm: 0.8879223423956821, iteration: 412885
loss: 0.9726924300193787,grad_norm: 0.9434858070107742, iteration: 412886
loss: 1.0265305042266846,grad_norm: 0.9977423499796119, iteration: 412887
loss: 1.0179989337921143,grad_norm: 0.7658655720867337, iteration: 412888
loss: 0.9898748397827148,grad_norm: 0.841958510500753, iteration: 412889
loss: 0.9818181395530701,grad_norm: 0.7736039980005024, iteration: 412890
loss: 0.9876834154129028,grad_norm: 0.8817291755172139, iteration: 412891
loss: 1.0663353204727173,grad_norm: 0.8880038956672728, iteration: 412892
loss: 1.0035037994384766,grad_norm: 0.9619908376586982, iteration: 412893
loss: 1.1230353116989136,grad_norm: 0.8178856157533967, iteration: 412894
loss: 0.9900602102279663,grad_norm: 0.7835449225095564, iteration: 412895
loss: 1.0064195394515991,grad_norm: 0.7030818157432442, iteration: 412896
loss: 1.063761591911316,grad_norm: 0.9999998762029743, iteration: 412897
loss: 0.9943733811378479,grad_norm: 0.8653602892344403, iteration: 412898
loss: 1.017480492591858,grad_norm: 0.5524017209285021, iteration: 412899
loss: 1.0172491073608398,grad_norm: 0.9999999321826216, iteration: 412900
loss: 1.0373706817626953,grad_norm: 0.99999926857338, iteration: 412901
loss: 1.0085453987121582,grad_norm: 0.8536896926071692, iteration: 412902
loss: 0.9971596598625183,grad_norm: 0.9003237195952455, iteration: 412903
loss: 1.1314504146575928,grad_norm: 0.9999993575796106, iteration: 412904
loss: 1.0034080743789673,grad_norm: 0.8578088724864753, iteration: 412905
loss: 1.0278079509735107,grad_norm: 0.9382378530041247, iteration: 412906
loss: 1.0451502799987793,grad_norm: 0.7540697525881674, iteration: 412907
loss: 1.0020471811294556,grad_norm: 0.8827687203398705, iteration: 412908
loss: 1.0002937316894531,grad_norm: 0.9920646604463551, iteration: 412909
loss: 1.1272186040878296,grad_norm: 0.9999991986233357, iteration: 412910
loss: 1.023254632949829,grad_norm: 0.8520046366057229, iteration: 412911
loss: 1.0383975505828857,grad_norm: 0.9339922269502094, iteration: 412912
loss: 1.1117160320281982,grad_norm: 0.9999993865187442, iteration: 412913
loss: 1.0299502611160278,grad_norm: 0.9554231484957165, iteration: 412914
loss: 1.098179817199707,grad_norm: 0.9999990994621265, iteration: 412915
loss: 0.9637084603309631,grad_norm: 0.8034250956183631, iteration: 412916
loss: 1.053375244140625,grad_norm: 0.9999990231070857, iteration: 412917
loss: 0.9956278204917908,grad_norm: 0.9999990344263062, iteration: 412918
loss: 0.9546083807945251,grad_norm: 0.9748137688424512, iteration: 412919
loss: 0.9683377146720886,grad_norm: 0.8534885953249854, iteration: 412920
loss: 1.0475305318832397,grad_norm: 0.9999997129499937, iteration: 412921
loss: 0.9969397783279419,grad_norm: 0.786280260340585, iteration: 412922
loss: 1.0271531343460083,grad_norm: 0.9320505016025332, iteration: 412923
loss: 1.037601351737976,grad_norm: 0.8809210093445551, iteration: 412924
loss: 1.0131337642669678,grad_norm: 0.8939584659223734, iteration: 412925
loss: 1.2092534303665161,grad_norm: 0.999999990431865, iteration: 412926
loss: 1.0363516807556152,grad_norm: 0.7084241844186139, iteration: 412927
loss: 1.0113589763641357,grad_norm: 0.8300057878803861, iteration: 412928
loss: 1.072123408317566,grad_norm: 0.9999996263317273, iteration: 412929
loss: 1.0608824491500854,grad_norm: 0.7785618026312501, iteration: 412930
loss: 1.0038167238235474,grad_norm: 0.9100400985071894, iteration: 412931
loss: 0.9931172728538513,grad_norm: 0.8567368307664348, iteration: 412932
loss: 1.1492503881454468,grad_norm: 0.9999993507047404, iteration: 412933
loss: 1.0198360681533813,grad_norm: 0.9999999616956406, iteration: 412934
loss: 1.0022647380828857,grad_norm: 0.8430680934220403, iteration: 412935
loss: 1.0322480201721191,grad_norm: 0.9999991450118231, iteration: 412936
loss: 1.052553415298462,grad_norm: 1.0000000523787125, iteration: 412937
loss: 1.0702013969421387,grad_norm: 1.000000001014003, iteration: 412938
loss: 1.0371569395065308,grad_norm: 0.9098518992097187, iteration: 412939
loss: 1.0314490795135498,grad_norm: 0.7534005725095193, iteration: 412940
loss: 1.0233802795410156,grad_norm: 0.7902510046898314, iteration: 412941
loss: 1.0227901935577393,grad_norm: 0.999999065228733, iteration: 412942
loss: 0.9821214079856873,grad_norm: 0.9085513464898267, iteration: 412943
loss: 1.064293384552002,grad_norm: 0.9999995131052748, iteration: 412944
loss: 1.018990397453308,grad_norm: 0.9999990276076205, iteration: 412945
loss: 1.0411577224731445,grad_norm: 0.8849974638402871, iteration: 412946
loss: 1.199381947517395,grad_norm: 0.999999824725753, iteration: 412947
loss: 1.0189682245254517,grad_norm: 0.9036089240472758, iteration: 412948
loss: 1.1026076078414917,grad_norm: 0.9999999656366934, iteration: 412949
loss: 1.0397613048553467,grad_norm: 0.9999998642268796, iteration: 412950
loss: 1.0506855249404907,grad_norm: 0.9999993117648304, iteration: 412951
loss: 1.134811520576477,grad_norm: 0.8641306370019665, iteration: 412952
loss: 1.1573067903518677,grad_norm: 0.9999998692234924, iteration: 412953
loss: 1.4380546808242798,grad_norm: 0.9999999918146489, iteration: 412954
loss: 1.075740098953247,grad_norm: 0.9999998700067356, iteration: 412955
loss: 1.0518923997879028,grad_norm: 0.9999993497606461, iteration: 412956
loss: 1.0805493593215942,grad_norm: 0.9999999584293923, iteration: 412957
loss: 1.0725393295288086,grad_norm: 0.8179069844794097, iteration: 412958
loss: 1.1464225053787231,grad_norm: 0.9999995028221582, iteration: 412959
loss: 1.0010274648666382,grad_norm: 0.7342121784831823, iteration: 412960
loss: 1.0551906824111938,grad_norm: 0.9528643453441714, iteration: 412961
loss: 1.0019748210906982,grad_norm: 0.9999992313493207, iteration: 412962
loss: 1.1742898225784302,grad_norm: 0.9999993058031935, iteration: 412963
loss: 1.0578378438949585,grad_norm: 0.9999994338511246, iteration: 412964
loss: 0.9885626435279846,grad_norm: 0.9999997398382109, iteration: 412965
loss: 1.1025835275650024,grad_norm: 0.99999991571177, iteration: 412966
loss: 1.0138676166534424,grad_norm: 0.9999995713452748, iteration: 412967
loss: 1.085931420326233,grad_norm: 0.9999998823947343, iteration: 412968
loss: 0.9636455774307251,grad_norm: 0.7877135194984765, iteration: 412969
loss: 1.1686125993728638,grad_norm: 0.9999995476160856, iteration: 412970
loss: 0.998209536075592,grad_norm: 0.9999992150880586, iteration: 412971
loss: 1.13020658493042,grad_norm: 0.9999992076207573, iteration: 412972
loss: 1.0210342407226562,grad_norm: 0.9999997623704165, iteration: 412973
loss: 1.2486848831176758,grad_norm: 0.9999999682526547, iteration: 412974
loss: 1.0582871437072754,grad_norm: 0.9999990747930013, iteration: 412975
loss: 0.9676975011825562,grad_norm: 0.7853648043198057, iteration: 412976
loss: 1.0744802951812744,grad_norm: 0.9999995444956803, iteration: 412977
loss: 0.9905441999435425,grad_norm: 0.7581964813029362, iteration: 412978
loss: 0.9958434104919434,grad_norm: 0.9653970145470481, iteration: 412979
loss: 1.071777105331421,grad_norm: 0.8593001768982618, iteration: 412980
loss: 1.150323510169983,grad_norm: 0.9999996079348573, iteration: 412981
loss: 0.9699410796165466,grad_norm: 0.7779838842182358, iteration: 412982
loss: 0.9874481558799744,grad_norm: 0.8141363477245824, iteration: 412983
loss: 0.9997653365135193,grad_norm: 0.9999991610335612, iteration: 412984
loss: 1.0984172821044922,grad_norm: 0.9999995049418386, iteration: 412985
loss: 0.9925431609153748,grad_norm: 0.944372519410603, iteration: 412986
loss: 1.0849629640579224,grad_norm: 0.9999998306470964, iteration: 412987
loss: 0.9864112138748169,grad_norm: 0.6966287193802525, iteration: 412988
loss: 0.992034912109375,grad_norm: 0.7540303624756503, iteration: 412989
loss: 1.0116230249404907,grad_norm: 0.9118320817877038, iteration: 412990
loss: 1.003172516822815,grad_norm: 0.8340397538334705, iteration: 412991
loss: 1.0558698177337646,grad_norm: 0.8132905855065192, iteration: 412992
loss: 0.9961912035942078,grad_norm: 0.9999989176860647, iteration: 412993
loss: 0.9993166327476501,grad_norm: 0.999999954052148, iteration: 412994
loss: 0.9966806173324585,grad_norm: 0.716448318194577, iteration: 412995
loss: 1.0116815567016602,grad_norm: 0.8653385098566918, iteration: 412996
loss: 1.0228947401046753,grad_norm: 0.9999993344554738, iteration: 412997
loss: 1.0018600225448608,grad_norm: 0.7847900756679604, iteration: 412998
loss: 1.2110782861709595,grad_norm: 0.9999998396302859, iteration: 412999
loss: 1.0183006525039673,grad_norm: 0.8451039325095703, iteration: 413000
loss: 1.280735731124878,grad_norm: 0.9999990768269686, iteration: 413001
loss: 1.0157074928283691,grad_norm: 0.8333550574674412, iteration: 413002
loss: 1.0447510480880737,grad_norm: 0.7823782857182887, iteration: 413003
loss: 0.9816235303878784,grad_norm: 0.756897961075992, iteration: 413004
loss: 1.0357561111450195,grad_norm: 0.9999996831344331, iteration: 413005
loss: 1.0231382846832275,grad_norm: 0.781066420761738, iteration: 413006
loss: 0.9836344122886658,grad_norm: 0.9999991403639783, iteration: 413007
loss: 0.99147629737854,grad_norm: 0.8076825951084708, iteration: 413008
loss: 1.0133090019226074,grad_norm: 0.7438073766464439, iteration: 413009
loss: 0.9834325313568115,grad_norm: 0.8529115672519152, iteration: 413010
loss: 1.031732201576233,grad_norm: 0.9999991434265889, iteration: 413011
loss: 1.0950227975845337,grad_norm: 0.9999997200614161, iteration: 413012
loss: 1.0792548656463623,grad_norm: 0.9999998761118999, iteration: 413013
loss: 0.9829409718513489,grad_norm: 0.7086007462373116, iteration: 413014
loss: 0.9823704361915588,grad_norm: 0.7143258977403318, iteration: 413015
loss: 1.0465724468231201,grad_norm: 0.9525029026768133, iteration: 413016
loss: 0.9953038692474365,grad_norm: 0.8466054524683847, iteration: 413017
loss: 0.9982383847236633,grad_norm: 0.9999991816181855, iteration: 413018
loss: 1.111489176750183,grad_norm: 0.9999991551765139, iteration: 413019
loss: 1.0743207931518555,grad_norm: 0.9999993366343496, iteration: 413020
loss: 0.9792510867118835,grad_norm: 0.923503076445607, iteration: 413021
loss: 1.0042362213134766,grad_norm: 0.9456063549846221, iteration: 413022
loss: 1.1159030199050903,grad_norm: 0.9960853593535429, iteration: 413023
loss: 1.0470645427703857,grad_norm: 0.8165780476763977, iteration: 413024
loss: 1.1889094114303589,grad_norm: 0.999999496574926, iteration: 413025
loss: 1.1326755285263062,grad_norm: 0.7639910824648433, iteration: 413026
loss: 1.0092722177505493,grad_norm: 0.999999518373893, iteration: 413027
loss: 1.0249742269515991,grad_norm: 0.9999995876627404, iteration: 413028
loss: 0.9891988635063171,grad_norm: 0.8785826210271467, iteration: 413029
loss: 1.092447280883789,grad_norm: 0.9999997795042458, iteration: 413030
loss: 0.9964482188224792,grad_norm: 0.7117020228341211, iteration: 413031
loss: 0.9678102731704712,grad_norm: 0.7358565358818172, iteration: 413032
loss: 1.0358973741531372,grad_norm: 0.8648191855358733, iteration: 413033
loss: 0.9882959127426147,grad_norm: 0.8603037990146726, iteration: 413034
loss: 1.1047306060791016,grad_norm: 0.9999996231245046, iteration: 413035
loss: 1.0120904445648193,grad_norm: 0.9659321538459786, iteration: 413036
loss: 0.9770714044570923,grad_norm: 0.7312751497890732, iteration: 413037
loss: 1.0696884393692017,grad_norm: 0.890724498227729, iteration: 413038
loss: 1.0129300355911255,grad_norm: 0.9999995936200409, iteration: 413039
loss: 0.9940221309661865,grad_norm: 0.850732814166129, iteration: 413040
loss: 0.9983070492744446,grad_norm: 0.8095804342359336, iteration: 413041
loss: 1.0750950574874878,grad_norm: 0.9999990731530701, iteration: 413042
loss: 1.104543924331665,grad_norm: 0.999999915417767, iteration: 413043
loss: 1.0908620357513428,grad_norm: 0.9002800346286873, iteration: 413044
loss: 1.0146310329437256,grad_norm: 0.9652359813942207, iteration: 413045
loss: 1.003071665763855,grad_norm: 0.8616641406885123, iteration: 413046
loss: 1.0956004858016968,grad_norm: 0.9999991436979805, iteration: 413047
loss: 1.053385853767395,grad_norm: 0.8043251772242358, iteration: 413048
loss: 0.9684628844261169,grad_norm: 0.9999999727787482, iteration: 413049
loss: 0.9939581155776978,grad_norm: 0.9999997751403686, iteration: 413050
loss: 0.9813764095306396,grad_norm: 0.8355621204823568, iteration: 413051
loss: 1.0710361003875732,grad_norm: 0.9505372507097738, iteration: 413052
loss: 1.0139933824539185,grad_norm: 0.8679388835559194, iteration: 413053
loss: 1.1214818954467773,grad_norm: 0.9999991054122821, iteration: 413054
loss: 1.0374810695648193,grad_norm: 0.9999996108757626, iteration: 413055
loss: 1.0259193181991577,grad_norm: 1.0000000348976583, iteration: 413056
loss: 1.001028060913086,grad_norm: 0.9247180150596086, iteration: 413057
loss: 1.0419822931289673,grad_norm: 0.9999993576087682, iteration: 413058
loss: 0.9976809024810791,grad_norm: 0.7411328163865554, iteration: 413059
loss: 0.9771945476531982,grad_norm: 0.8099362083694048, iteration: 413060
loss: 1.0412274599075317,grad_norm: 0.8034270058205764, iteration: 413061
loss: 1.0326833724975586,grad_norm: 0.8821865040766542, iteration: 413062
loss: 0.9996309876441956,grad_norm: 0.846755220298904, iteration: 413063
loss: 0.9879348278045654,grad_norm: 0.7073558293337743, iteration: 413064
loss: 0.9906378984451294,grad_norm: 0.9999991953144862, iteration: 413065
loss: 1.0368974208831787,grad_norm: 0.9999991252054045, iteration: 413066
loss: 1.036832571029663,grad_norm: 1.0000000573991403, iteration: 413067
loss: 1.0335415601730347,grad_norm: 0.9999998466545299, iteration: 413068
loss: 0.9603683352470398,grad_norm: 0.9999991992214641, iteration: 413069
loss: 1.051092505455017,grad_norm: 0.9999994611715665, iteration: 413070
loss: 1.0011171102523804,grad_norm: 0.9999996359374408, iteration: 413071
loss: 1.1984460353851318,grad_norm: 0.9533030812181259, iteration: 413072
loss: 1.1445082426071167,grad_norm: 0.9999997836664167, iteration: 413073
loss: 0.9753226637840271,grad_norm: 0.7913094097222515, iteration: 413074
loss: 0.9890596270561218,grad_norm: 0.8982408438086711, iteration: 413075
loss: 0.9634720683097839,grad_norm: 0.9999991337728773, iteration: 413076
loss: 0.9808046221733093,grad_norm: 0.9671425453557392, iteration: 413077
loss: 1.006670594215393,grad_norm: 0.7100347553195439, iteration: 413078
loss: 1.159247636795044,grad_norm: 0.999999681507433, iteration: 413079
loss: 1.0470279455184937,grad_norm: 0.9999996218655518, iteration: 413080
loss: 0.997448205947876,grad_norm: 0.9999996257943085, iteration: 413081
loss: 1.0885355472564697,grad_norm: 0.9999999451386946, iteration: 413082
loss: 1.127584457397461,grad_norm: 0.9999999458266677, iteration: 413083
loss: 1.1133599281311035,grad_norm: 0.999999745548904, iteration: 413084
loss: 1.0916062593460083,grad_norm: 0.9999999460050074, iteration: 413085
loss: 1.1566321849822998,grad_norm: 0.9999999304028061, iteration: 413086
loss: 0.9963585138320923,grad_norm: 0.7915814060621749, iteration: 413087
loss: 0.9860761761665344,grad_norm: 0.9335328672717847, iteration: 413088
loss: 0.9731329679489136,grad_norm: 0.659051780241419, iteration: 413089
loss: 1.1142140626907349,grad_norm: 0.9999997401001114, iteration: 413090
loss: 1.1209132671356201,grad_norm: 0.9999996042534406, iteration: 413091
loss: 1.115796685218811,grad_norm: 0.9999998425656438, iteration: 413092
loss: 1.0433772802352905,grad_norm: 0.9193644736682991, iteration: 413093
loss: 1.1275839805603027,grad_norm: 0.99999969110773, iteration: 413094
loss: 1.1484800577163696,grad_norm: 0.9999998105787218, iteration: 413095
loss: 1.0357269048690796,grad_norm: 0.9999996830180254, iteration: 413096
loss: 1.0122089385986328,grad_norm: 0.7857389228396149, iteration: 413097
loss: 1.1490281820297241,grad_norm: 0.9999993366153181, iteration: 413098
loss: 1.2068233489990234,grad_norm: 0.9999998866378405, iteration: 413099
loss: 1.1294628381729126,grad_norm: 0.9999994088721149, iteration: 413100
loss: 1.0956082344055176,grad_norm: 0.9999997803000588, iteration: 413101
loss: 1.0565282106399536,grad_norm: 0.8811047413765051, iteration: 413102
loss: 1.1999388933181763,grad_norm: 0.9999998332775965, iteration: 413103
loss: 1.009342074394226,grad_norm: 0.9999997602590126, iteration: 413104
loss: 0.9741187691688538,grad_norm: 0.8271934338074338, iteration: 413105
loss: 0.9820342063903809,grad_norm: 0.9999992841596141, iteration: 413106
loss: 0.9999004602432251,grad_norm: 0.9999992144990314, iteration: 413107
loss: 1.0305787324905396,grad_norm: 0.9999998549977739, iteration: 413108
loss: 1.0210022926330566,grad_norm: 0.7797197264386091, iteration: 413109
loss: 1.066797137260437,grad_norm: 0.9999994304623498, iteration: 413110
loss: 1.204568862915039,grad_norm: 0.9999997272472808, iteration: 413111
loss: 1.0030992031097412,grad_norm: 0.815580939494525, iteration: 413112
loss: 1.0060815811157227,grad_norm: 0.74972889689585, iteration: 413113
loss: 1.0176708698272705,grad_norm: 0.9999998242200505, iteration: 413114
loss: 0.9399178624153137,grad_norm: 0.9999998879258887, iteration: 413115
loss: 1.0008225440979004,grad_norm: 0.7339278545935198, iteration: 413116
loss: 1.0246131420135498,grad_norm: 0.9999993530171816, iteration: 413117
loss: 1.0615545511245728,grad_norm: 0.820830648756418, iteration: 413118
loss: 1.0827546119689941,grad_norm: 0.7652584354234004, iteration: 413119
loss: 1.0619171857833862,grad_norm: 0.9999991188120103, iteration: 413120
loss: 1.072176218032837,grad_norm: 0.9999998443345487, iteration: 413121
loss: 0.9820030331611633,grad_norm: 0.7715130815267333, iteration: 413122
loss: 0.9995773434638977,grad_norm: 0.8044602004473382, iteration: 413123
loss: 0.9799660444259644,grad_norm: 0.6733738758535851, iteration: 413124
loss: 1.0047835111618042,grad_norm: 0.9999990444571105, iteration: 413125
loss: 1.0047218799591064,grad_norm: 0.8815462630668748, iteration: 413126
loss: 1.2110317945480347,grad_norm: 0.9999997639458069, iteration: 413127
loss: 1.266800045967102,grad_norm: 0.9999997731604487, iteration: 413128
loss: 1.1616019010543823,grad_norm: 0.9999989465118824, iteration: 413129
loss: 0.9980420470237732,grad_norm: 0.8085619751740355, iteration: 413130
loss: 1.0498080253601074,grad_norm: 0.9999999897797583, iteration: 413131
loss: 1.0149593353271484,grad_norm: 0.9102398828769245, iteration: 413132
loss: 1.056366205215454,grad_norm: 0.999999656772453, iteration: 413133
loss: 1.0972927808761597,grad_norm: 0.891860014160523, iteration: 413134
loss: 1.0525909662246704,grad_norm: 0.9999991548890733, iteration: 413135
loss: 1.0889644622802734,grad_norm: 0.834424404126055, iteration: 413136
loss: 1.0484962463378906,grad_norm: 0.9748681659789054, iteration: 413137
loss: 0.9834030866622925,grad_norm: 0.9999993174611143, iteration: 413138
loss: 0.9903967380523682,grad_norm: 0.9999993108093529, iteration: 413139
loss: 1.0008795261383057,grad_norm: 0.999999099258438, iteration: 413140
loss: 1.0069682598114014,grad_norm: 0.7760292313675208, iteration: 413141
loss: 1.020830750465393,grad_norm: 0.9999996766233099, iteration: 413142
loss: 1.1330878734588623,grad_norm: 0.9999999728544021, iteration: 413143
loss: 1.0245230197906494,grad_norm: 0.8525041688927144, iteration: 413144
loss: 1.146094799041748,grad_norm: 0.9999995294453186, iteration: 413145
loss: 1.0158807039260864,grad_norm: 0.698809023360006, iteration: 413146
loss: 1.039693832397461,grad_norm: 0.9069814896111568, iteration: 413147
loss: 1.0437489748001099,grad_norm: 0.8464979491920516, iteration: 413148
loss: 1.0434950590133667,grad_norm: 0.9812508357729811, iteration: 413149
loss: 1.0417197942733765,grad_norm: 0.9999996560616421, iteration: 413150
loss: 1.0036494731903076,grad_norm: 0.7466375834939734, iteration: 413151
loss: 1.0319583415985107,grad_norm: 0.8934958723241222, iteration: 413152
loss: 1.036536455154419,grad_norm: 0.9999994735612926, iteration: 413153
loss: 1.0304397344589233,grad_norm: 0.9999994368996705, iteration: 413154
loss: 1.005075216293335,grad_norm: 0.7638823115764499, iteration: 413155
loss: 1.0184303522109985,grad_norm: 0.9999995532760959, iteration: 413156
loss: 1.0343225002288818,grad_norm: 0.9999996177725748, iteration: 413157
loss: 1.0175344944000244,grad_norm: 0.9999991132379616, iteration: 413158
loss: 1.0105501413345337,grad_norm: 0.9999991472953659, iteration: 413159
loss: 1.0818599462509155,grad_norm: 0.9568904680902244, iteration: 413160
loss: 1.0283187627792358,grad_norm: 0.9999997272343901, iteration: 413161
loss: 0.9846014380455017,grad_norm: 0.9999998632764442, iteration: 413162
loss: 1.0389623641967773,grad_norm: 0.9720803407312776, iteration: 413163
loss: 1.0457918643951416,grad_norm: 0.9249416952537908, iteration: 413164
loss: 1.0269832611083984,grad_norm: 0.7802748717555551, iteration: 413165
loss: 0.9897037744522095,grad_norm: 0.7494666769016126, iteration: 413166
loss: 1.0545341968536377,grad_norm: 0.9080725733179797, iteration: 413167
loss: 1.0374161005020142,grad_norm: 0.7197860656725444, iteration: 413168
loss: 1.0240098237991333,grad_norm: 0.8054309971646738, iteration: 413169
loss: 1.032921552658081,grad_norm: 0.8352100556069108, iteration: 413170
loss: 1.0361452102661133,grad_norm: 0.8138387453928908, iteration: 413171
loss: 1.0201071500778198,grad_norm: 0.7850041097693558, iteration: 413172
loss: 0.9895573258399963,grad_norm: 0.8123961250734077, iteration: 413173
loss: 1.035101294517517,grad_norm: 0.9288016642861752, iteration: 413174
loss: 0.9940048456192017,grad_norm: 0.8820603757813976, iteration: 413175
loss: 1.03511643409729,grad_norm: 0.9999999961283127, iteration: 413176
loss: 1.0441688299179077,grad_norm: 0.9999998184698812, iteration: 413177
loss: 1.0188264846801758,grad_norm: 0.6371099732004378, iteration: 413178
loss: 1.1027804613113403,grad_norm: 0.9999992135144492, iteration: 413179
loss: 1.0767990350723267,grad_norm: 0.8065172906105917, iteration: 413180
loss: 1.025282859802246,grad_norm: 0.786767878541369, iteration: 413181
loss: 0.9730428457260132,grad_norm: 0.7656713341847959, iteration: 413182
loss: 1.0250385999679565,grad_norm: 0.9999990694319473, iteration: 413183
loss: 1.020105004310608,grad_norm: 0.9999994858558289, iteration: 413184
loss: 0.9327978491783142,grad_norm: 0.6930470003692769, iteration: 413185
loss: 1.1784635782241821,grad_norm: 0.9999995873565332, iteration: 413186
loss: 0.9996383786201477,grad_norm: 0.8144858247713379, iteration: 413187
loss: 1.029281497001648,grad_norm: 0.9999993863375118, iteration: 413188
loss: 1.0220026969909668,grad_norm: 0.6038321198744385, iteration: 413189
loss: 1.1766631603240967,grad_norm: 0.9999995553358834, iteration: 413190
loss: 0.9951983690261841,grad_norm: 0.774573274665905, iteration: 413191
loss: 1.007861852645874,grad_norm: 0.7554573234146412, iteration: 413192
loss: 1.0673025846481323,grad_norm: 0.9999996438251826, iteration: 413193
loss: 1.042874813079834,grad_norm: 0.9310175417509127, iteration: 413194
loss: 1.005857229232788,grad_norm: 0.7614830628250736, iteration: 413195
loss: 1.003796935081482,grad_norm: 0.8491210361240431, iteration: 413196
loss: 1.0735654830932617,grad_norm: 0.9666608264997449, iteration: 413197
loss: 0.9902182221412659,grad_norm: 0.7510345978717037, iteration: 413198
loss: 1.045261263847351,grad_norm: 0.767207602395988, iteration: 413199
loss: 0.9866101145744324,grad_norm: 0.7519606297447419, iteration: 413200
loss: 1.0104728937149048,grad_norm: 0.9999996290904365, iteration: 413201
loss: 0.9814711213111877,grad_norm: 0.7654885488154964, iteration: 413202
loss: 0.9911482334136963,grad_norm: 0.7497643345158593, iteration: 413203
loss: 1.1064661741256714,grad_norm: 0.9999999310615694, iteration: 413204
loss: 0.9845129251480103,grad_norm: 0.7901263070910534, iteration: 413205
loss: 1.0590779781341553,grad_norm: 0.9860700335723324, iteration: 413206
loss: 1.0191971063613892,grad_norm: 0.7621703636665206, iteration: 413207
loss: 1.0151495933532715,grad_norm: 0.9999991096191384, iteration: 413208
loss: 0.9832180142402649,grad_norm: 0.7564369389858695, iteration: 413209
loss: 0.9880041480064392,grad_norm: 0.976453551623142, iteration: 413210
loss: 0.9704865217208862,grad_norm: 0.7178943768488243, iteration: 413211
loss: 1.012332558631897,grad_norm: 0.8521711773472556, iteration: 413212
loss: 1.0056883096694946,grad_norm: 0.7287911167125473, iteration: 413213
loss: 1.0045102834701538,grad_norm: 0.7529842542183429, iteration: 413214
loss: 0.9996814131736755,grad_norm: 0.864694185384661, iteration: 413215
loss: 0.9761894941329956,grad_norm: 0.9999996815390935, iteration: 413216
loss: 1.0054917335510254,grad_norm: 0.865535724308799, iteration: 413217
loss: 1.0864427089691162,grad_norm: 0.790936552148727, iteration: 413218
loss: 1.0055217742919922,grad_norm: 0.8589368278768306, iteration: 413219
loss: 0.9843540787696838,grad_norm: 0.7591252594919815, iteration: 413220
loss: 1.044817566871643,grad_norm: 0.9999994180298236, iteration: 413221
loss: 1.0536832809448242,grad_norm: 0.7839442693174968, iteration: 413222
loss: 0.9849905371665955,grad_norm: 0.7839079855213723, iteration: 413223
loss: 0.9548327922821045,grad_norm: 0.8174754129746933, iteration: 413224
loss: 1.0626962184906006,grad_norm: 0.9999998256246017, iteration: 413225
loss: 1.033928394317627,grad_norm: 0.9999991647130001, iteration: 413226
loss: 1.136727213859558,grad_norm: 0.9999999932167759, iteration: 413227
loss: 1.042124629020691,grad_norm: 0.9999998484719851, iteration: 413228
loss: 1.0297414064407349,grad_norm: 0.8320457702259828, iteration: 413229
loss: 1.0188682079315186,grad_norm: 0.6917626540541331, iteration: 413230
loss: 1.0159729719161987,grad_norm: 0.8038066949592589, iteration: 413231
loss: 1.0210902690887451,grad_norm: 0.9999991537462228, iteration: 413232
loss: 1.0435528755187988,grad_norm: 0.9999995357875662, iteration: 413233
loss: 0.9881637692451477,grad_norm: 0.8087227517720443, iteration: 413234
loss: 0.9587202668190002,grad_norm: 0.6586308559472762, iteration: 413235
loss: 1.0846165418624878,grad_norm: 0.9999995695964972, iteration: 413236
loss: 1.038993239402771,grad_norm: 0.855245186648478, iteration: 413237
loss: 0.9662507772445679,grad_norm: 0.9189408003377482, iteration: 413238
loss: 1.014124870300293,grad_norm: 0.6688564174784001, iteration: 413239
loss: 1.038659691810608,grad_norm: 0.9999997747609808, iteration: 413240
loss: 0.9925287365913391,grad_norm: 0.99999953227126, iteration: 413241
loss: 0.9509417414665222,grad_norm: 0.7232275102650205, iteration: 413242
loss: 1.1076942682266235,grad_norm: 0.7996482458588746, iteration: 413243
loss: 0.9809125661849976,grad_norm: 0.8078975978239145, iteration: 413244
loss: 1.0327837467193604,grad_norm: 0.8485736650937333, iteration: 413245
loss: 1.04152512550354,grad_norm: 0.9999997542362863, iteration: 413246
loss: 1.0173274278640747,grad_norm: 0.9129391969415344, iteration: 413247
loss: 1.0151262283325195,grad_norm: 0.999999511565036, iteration: 413248
loss: 1.0188053846359253,grad_norm: 0.9999991393050485, iteration: 413249
loss: 0.9923041462898254,grad_norm: 0.883016151555583, iteration: 413250
loss: 1.0267233848571777,grad_norm: 0.9999991022899651, iteration: 413251
loss: 1.0081596374511719,grad_norm: 0.8439302303267286, iteration: 413252
loss: 1.0024181604385376,grad_norm: 0.73194548691577, iteration: 413253
loss: 1.1520487070083618,grad_norm: 0.9999993032051316, iteration: 413254
loss: 1.0081512928009033,grad_norm: 0.7743929577213717, iteration: 413255
loss: 0.9961179494857788,grad_norm: 0.6717944268517069, iteration: 413256
loss: 1.0326385498046875,grad_norm: 0.9999995425569097, iteration: 413257
loss: 1.0174555778503418,grad_norm: 0.7183821305238216, iteration: 413258
loss: 1.0078132152557373,grad_norm: 0.6608245839991833, iteration: 413259
loss: 1.0040429830551147,grad_norm: 0.6993918630658786, iteration: 413260
loss: 1.2781529426574707,grad_norm: 1.0000000616673117, iteration: 413261
loss: 0.9903538823127747,grad_norm: 0.6177806283306004, iteration: 413262
loss: 0.9894965291023254,grad_norm: 0.7511906348055394, iteration: 413263
loss: 0.9805657863616943,grad_norm: 0.6719475841328127, iteration: 413264
loss: 0.997941792011261,grad_norm: 0.6947634195241508, iteration: 413265
loss: 0.9909959435462952,grad_norm: 0.871411684822752, iteration: 413266
loss: 1.0550733804702759,grad_norm: 0.9999991584717383, iteration: 413267
loss: 0.9998294711112976,grad_norm: 0.688392475475848, iteration: 413268
loss: 0.9853951334953308,grad_norm: 0.9154133865290124, iteration: 413269
loss: 1.0568541288375854,grad_norm: 0.8608869978449674, iteration: 413270
loss: 1.0037678480148315,grad_norm: 0.6930630747826599, iteration: 413271
loss: 1.0032213926315308,grad_norm: 0.818604554599311, iteration: 413272
loss: 1.0090224742889404,grad_norm: 0.8918671993035795, iteration: 413273
loss: 0.999428927898407,grad_norm: 0.7384747134157713, iteration: 413274
loss: 0.9926878809928894,grad_norm: 0.8040440347451757, iteration: 413275
loss: 0.9883251190185547,grad_norm: 0.6843309499292042, iteration: 413276
loss: 1.0423943996429443,grad_norm: 0.8732245140587134, iteration: 413277
loss: 1.062803864479065,grad_norm: 0.9999999147476711, iteration: 413278
loss: 1.002008318901062,grad_norm: 0.9207020747659691, iteration: 413279
loss: 0.9860588312149048,grad_norm: 0.6910811122157636, iteration: 413280
loss: 1.0107250213623047,grad_norm: 0.9999991435600181, iteration: 413281
loss: 1.0400465726852417,grad_norm: 0.9999994767680144, iteration: 413282
loss: 1.0075551271438599,grad_norm: 0.6643882019213418, iteration: 413283
loss: 0.9878861904144287,grad_norm: 0.9999990796925593, iteration: 413284
loss: 0.9979519844055176,grad_norm: 0.9169832214508047, iteration: 413285
loss: 1.0228427648544312,grad_norm: 0.8624892343480098, iteration: 413286
loss: 1.017190933227539,grad_norm: 0.7771490009498127, iteration: 413287
loss: 0.9911296367645264,grad_norm: 0.8248691775990334, iteration: 413288
loss: 1.062066674232483,grad_norm: 0.666855734104307, iteration: 413289
loss: 0.9776389002799988,grad_norm: 0.835524975418229, iteration: 413290
loss: 1.0195660591125488,grad_norm: 0.8018603203205527, iteration: 413291
loss: 0.966232419013977,grad_norm: 0.7659045805886219, iteration: 413292
loss: 1.0481610298156738,grad_norm: 0.7228705198918611, iteration: 413293
loss: 0.9836549162864685,grad_norm: 0.7662229958611707, iteration: 413294
loss: 0.9827999472618103,grad_norm: 0.8085518765469749, iteration: 413295
loss: 0.992852509021759,grad_norm: 0.8021639892607999, iteration: 413296
loss: 0.9982941150665283,grad_norm: 0.8889616371945506, iteration: 413297
loss: 0.9848132133483887,grad_norm: 0.7437616262802044, iteration: 413298
loss: 1.0305743217468262,grad_norm: 0.7303342022761775, iteration: 413299
loss: 0.9655632376670837,grad_norm: 0.9729357314441065, iteration: 413300
loss: 1.0084693431854248,grad_norm: 0.7759996489676941, iteration: 413301
loss: 1.0099968910217285,grad_norm: 0.7035642871347163, iteration: 413302
loss: 0.963776707649231,grad_norm: 0.8590597148071145, iteration: 413303
loss: 1.1032145023345947,grad_norm: 0.9999990530199593, iteration: 413304
loss: 1.0050700902938843,grad_norm: 0.8191597727480358, iteration: 413305
loss: 1.005220651626587,grad_norm: 0.8321654477696815, iteration: 413306
loss: 0.9868528246879578,grad_norm: 0.8169453542483747, iteration: 413307
loss: 1.0216495990753174,grad_norm: 0.7857418829583295, iteration: 413308
loss: 0.9980619549751282,grad_norm: 0.8502721066163049, iteration: 413309
loss: 1.0003087520599365,grad_norm: 0.8825396350758353, iteration: 413310
loss: 0.9915916323661804,grad_norm: 0.7495759537402039, iteration: 413311
loss: 1.0455182790756226,grad_norm: 0.9999994217740598, iteration: 413312
loss: 1.0263056755065918,grad_norm: 0.8541759621378712, iteration: 413313
loss: 0.9594570398330688,grad_norm: 0.8044064614994128, iteration: 413314
loss: 0.9825747609138489,grad_norm: 0.6374633346511438, iteration: 413315
loss: 1.0190417766571045,grad_norm: 0.7435443539691833, iteration: 413316
loss: 0.9964889883995056,grad_norm: 0.8095847801012616, iteration: 413317
loss: 0.9855372905731201,grad_norm: 0.7015731242786768, iteration: 413318
loss: 1.0317964553833008,grad_norm: 0.7063121160128268, iteration: 413319
loss: 1.0214790105819702,grad_norm: 0.7788787292314662, iteration: 413320
loss: 1.0126018524169922,grad_norm: 0.736204012711428, iteration: 413321
loss: 1.0242592096328735,grad_norm: 0.9999991733005218, iteration: 413322
loss: 0.9532651305198669,grad_norm: 0.6915447276170179, iteration: 413323
loss: 1.038682460784912,grad_norm: 0.7195064693926008, iteration: 413324
loss: 0.9515117406845093,grad_norm: 0.7826477315940641, iteration: 413325
loss: 0.9960969686508179,grad_norm: 0.7273024073682823, iteration: 413326
loss: 1.0667234659194946,grad_norm: 0.7945213600870286, iteration: 413327
loss: 0.976212203502655,grad_norm: 0.829874339238454, iteration: 413328
loss: 1.0118024349212646,grad_norm: 0.7551585774233573, iteration: 413329
loss: 0.989591121673584,grad_norm: 0.8436248197922868, iteration: 413330
loss: 1.0073667764663696,grad_norm: 0.9524171109122852, iteration: 413331
loss: 0.9994583129882812,grad_norm: 0.7689883394764114, iteration: 413332
loss: 1.0331261157989502,grad_norm: 0.9999998294315312, iteration: 413333
loss: 0.983962893486023,grad_norm: 0.7870598309051484, iteration: 413334
loss: 1.0073306560516357,grad_norm: 0.7628234297534913, iteration: 413335
loss: 1.0109436511993408,grad_norm: 0.8569295657887959, iteration: 413336
loss: 1.0715787410736084,grad_norm: 0.8253092130798514, iteration: 413337
loss: 0.9973455667495728,grad_norm: 0.8758295546410179, iteration: 413338
loss: 0.9723540544509888,grad_norm: 0.9999992065135219, iteration: 413339
loss: 1.0144003629684448,grad_norm: 0.999999525254474, iteration: 413340
loss: 1.0553090572357178,grad_norm: 0.8171737900548446, iteration: 413341
loss: 0.9804796576499939,grad_norm: 0.7772770636652043, iteration: 413342
loss: 0.9978281259536743,grad_norm: 0.732975342509087, iteration: 413343
loss: 1.105238914489746,grad_norm: 0.9999997374041861, iteration: 413344
loss: 0.9915928840637207,grad_norm: 0.7322371319254708, iteration: 413345
loss: 0.9963124394416809,grad_norm: 0.8054282841769431, iteration: 413346
loss: 0.9760671854019165,grad_norm: 0.9999991797602076, iteration: 413347
loss: 1.0242149829864502,grad_norm: 0.9579038866295614, iteration: 413348
loss: 0.9935057163238525,grad_norm: 0.9999994034710097, iteration: 413349
loss: 1.0493049621582031,grad_norm: 0.9955591969410302, iteration: 413350
loss: 1.0027488470077515,grad_norm: 0.8567571739695341, iteration: 413351
loss: 1.0155205726623535,grad_norm: 0.6330808357052385, iteration: 413352
loss: 0.9740841388702393,grad_norm: 0.7513440581056138, iteration: 413353
loss: 0.9767879843711853,grad_norm: 0.7499981064649103, iteration: 413354
loss: 0.992095947265625,grad_norm: 0.6992439028698145, iteration: 413355
loss: 1.0121614933013916,grad_norm: 0.8552578408861393, iteration: 413356
loss: 1.0020394325256348,grad_norm: 0.8412441323230421, iteration: 413357
loss: 1.0581657886505127,grad_norm: 0.9999995129094166, iteration: 413358
loss: 1.0675232410430908,grad_norm: 0.8277896456990469, iteration: 413359
loss: 1.1509814262390137,grad_norm: 0.9999992962562589, iteration: 413360
loss: 0.9882933497428894,grad_norm: 0.734458196380776, iteration: 413361
loss: 1.0022037029266357,grad_norm: 0.6875526503499558, iteration: 413362
loss: 0.9931057691574097,grad_norm: 0.6844744217655654, iteration: 413363
loss: 0.9895327687263489,grad_norm: 0.8117769485813027, iteration: 413364
loss: 1.0032341480255127,grad_norm: 0.7563128354769594, iteration: 413365
loss: 1.0788671970367432,grad_norm: 0.8053858598475936, iteration: 413366
loss: 1.0460143089294434,grad_norm: 0.9999999166426343, iteration: 413367
loss: 1.0088038444519043,grad_norm: 0.7534596582769149, iteration: 413368
loss: 0.986896276473999,grad_norm: 0.7284045017124752, iteration: 413369
loss: 0.9780462384223938,grad_norm: 0.9516182418355182, iteration: 413370
loss: 1.0709199905395508,grad_norm: 0.6820438358270992, iteration: 413371
loss: 1.029247760772705,grad_norm: 0.7841087423203303, iteration: 413372
loss: 0.9665254950523376,grad_norm: 0.8852383669937256, iteration: 413373
loss: 0.9894008636474609,grad_norm: 0.9246092980320261, iteration: 413374
loss: 0.9593977332115173,grad_norm: 0.7745040519209252, iteration: 413375
loss: 1.0558339357376099,grad_norm: 0.9999991837482665, iteration: 413376
loss: 0.9658344984054565,grad_norm: 0.7119174126798744, iteration: 413377
loss: 0.9803234338760376,grad_norm: 0.6711738012651797, iteration: 413378
loss: 1.0559486150741577,grad_norm: 0.9999989793844406, iteration: 413379
loss: 0.9882997274398804,grad_norm: 0.7614873651054594, iteration: 413380
loss: 0.9900504350662231,grad_norm: 0.6830202215284049, iteration: 413381
loss: 0.982098400592804,grad_norm: 0.8766052146120219, iteration: 413382
loss: 1.0439718961715698,grad_norm: 0.921845048073283, iteration: 413383
loss: 0.9803900122642517,grad_norm: 0.781324660074978, iteration: 413384
loss: 1.0871628522872925,grad_norm: 1.000000105094592, iteration: 413385
loss: 1.0221316814422607,grad_norm: 0.8231454684907665, iteration: 413386
loss: 0.9897112846374512,grad_norm: 0.8456609063336923, iteration: 413387
loss: 0.9939258694648743,grad_norm: 0.6767509485592045, iteration: 413388
loss: 1.02131986618042,grad_norm: 0.9999998330790806, iteration: 413389
loss: 1.0485432147979736,grad_norm: 0.9999997166101807, iteration: 413390
loss: 0.9675695300102234,grad_norm: 0.7682948653373498, iteration: 413391
loss: 0.9983122944831848,grad_norm: 0.8183441143677763, iteration: 413392
loss: 1.0781409740447998,grad_norm: 0.9999992353297353, iteration: 413393
loss: 1.1220476627349854,grad_norm: 0.9999998502486525, iteration: 413394
loss: 0.9882631301879883,grad_norm: 0.7263412219262585, iteration: 413395
loss: 1.0441712141036987,grad_norm: 0.8530683926279866, iteration: 413396
loss: 1.0120820999145508,grad_norm: 0.9999990087208598, iteration: 413397
loss: 0.9834461808204651,grad_norm: 0.7620805175232659, iteration: 413398
loss: 1.0003865957260132,grad_norm: 0.9157951637910482, iteration: 413399
loss: 1.005548119544983,grad_norm: 0.9727867224855324, iteration: 413400
loss: 1.0078016519546509,grad_norm: 0.8443127146513907, iteration: 413401
loss: 0.968047022819519,grad_norm: 0.7415595491446204, iteration: 413402
loss: 0.9845896363258362,grad_norm: 0.7642288900709175, iteration: 413403
loss: 1.016137719154358,grad_norm: 0.9999996092706103, iteration: 413404
loss: 1.0162237882614136,grad_norm: 0.6682763835194148, iteration: 413405
loss: 1.0371934175491333,grad_norm: 0.8214185319112434, iteration: 413406
loss: 1.167099952697754,grad_norm: 0.9999997214766774, iteration: 413407
loss: 0.9817445278167725,grad_norm: 0.769492616246259, iteration: 413408
loss: 1.1917129755020142,grad_norm: 0.9999999462981994, iteration: 413409
loss: 1.0089607238769531,grad_norm: 0.7665188776665688, iteration: 413410
loss: 0.9945173859596252,grad_norm: 0.7226183252530242, iteration: 413411
loss: 0.9940598011016846,grad_norm: 0.7305898562311982, iteration: 413412
loss: 0.957133948802948,grad_norm: 0.7345122053779262, iteration: 413413
loss: 1.0274721384048462,grad_norm: 0.6643209867515, iteration: 413414
loss: 1.0181721448898315,grad_norm: 0.999999596609286, iteration: 413415
loss: 1.0113853216171265,grad_norm: 0.76011011438242, iteration: 413416
loss: 1.0169317722320557,grad_norm: 0.9558830588291088, iteration: 413417
loss: 1.0215893983840942,grad_norm: 0.999999984711132, iteration: 413418
loss: 0.9953320026397705,grad_norm: 0.8363916113655, iteration: 413419
loss: 0.9447572231292725,grad_norm: 0.7955108626316035, iteration: 413420
loss: 1.0186166763305664,grad_norm: 0.9999990136909842, iteration: 413421
loss: 1.0102704763412476,grad_norm: 0.847261895562723, iteration: 413422
loss: 0.9792883992195129,grad_norm: 0.7697008729654521, iteration: 413423
loss: 1.0380194187164307,grad_norm: 0.9999993033802298, iteration: 413424
loss: 1.0115960836410522,grad_norm: 0.9999999854349308, iteration: 413425
loss: 0.9900258183479309,grad_norm: 0.9999992688385909, iteration: 413426
loss: 0.9541345238685608,grad_norm: 0.9001491098416278, iteration: 413427
loss: 0.9938940405845642,grad_norm: 0.7929051341918034, iteration: 413428
loss: 0.9718410968780518,grad_norm: 0.8678480682994277, iteration: 413429
loss: 1.0951611995697021,grad_norm: 0.9999991334776817, iteration: 413430
loss: 1.053840160369873,grad_norm: 0.999999766443828, iteration: 413431
loss: 0.9906009435653687,grad_norm: 0.7772146884146579, iteration: 413432
loss: 1.0121746063232422,grad_norm: 0.7965713789475904, iteration: 413433
loss: 1.03643000125885,grad_norm: 0.8356849030231415, iteration: 413434
loss: 1.040167212486267,grad_norm: 0.8524568680271594, iteration: 413435
loss: 1.0205321311950684,grad_norm: 0.8224361116270609, iteration: 413436
loss: 0.9994098544120789,grad_norm: 0.6985805348104887, iteration: 413437
loss: 1.0217316150665283,grad_norm: 0.9999998297143163, iteration: 413438
loss: 1.045386552810669,grad_norm: 0.9076775811589258, iteration: 413439
loss: 0.9871265888214111,grad_norm: 0.9999998730649107, iteration: 413440
loss: 1.0037921667099,grad_norm: 0.6665518461944999, iteration: 413441
loss: 1.1099286079406738,grad_norm: 0.9999990939411277, iteration: 413442
loss: 1.008768081665039,grad_norm: 0.7481316035168072, iteration: 413443
loss: 1.0449143648147583,grad_norm: 0.750058703105089, iteration: 413444
loss: 1.0153597593307495,grad_norm: 0.9999998109826694, iteration: 413445
loss: 0.9603004455566406,grad_norm: 0.6973850342532283, iteration: 413446
loss: 0.9940366148948669,grad_norm: 0.999999257088122, iteration: 413447
loss: 1.0193102359771729,grad_norm: 0.8045467089474264, iteration: 413448
loss: 1.0440071821212769,grad_norm: 0.8645113448832072, iteration: 413449
loss: 1.02163565158844,grad_norm: 0.6782554669454154, iteration: 413450
loss: 1.0202741622924805,grad_norm: 0.9999995491295527, iteration: 413451
loss: 0.9784700870513916,grad_norm: 0.7502992212284173, iteration: 413452
loss: 1.0161978006362915,grad_norm: 0.6231067257543786, iteration: 413453
loss: 0.9446694850921631,grad_norm: 0.7379839102531526, iteration: 413454
loss: 1.0065281391143799,grad_norm: 0.7496530912999144, iteration: 413455
loss: 1.0223795175552368,grad_norm: 0.9999991808760618, iteration: 413456
loss: 0.9909961819648743,grad_norm: 0.938193976701203, iteration: 413457
loss: 1.0415765047073364,grad_norm: 0.6828662192580733, iteration: 413458
loss: 1.0231359004974365,grad_norm: 0.6827793727869919, iteration: 413459
loss: 1.0157326459884644,grad_norm: 0.7471963097936027, iteration: 413460
loss: 1.0120038986206055,grad_norm: 0.9999990765255568, iteration: 413461
loss: 1.0081678628921509,grad_norm: 0.8959084671293387, iteration: 413462
loss: 0.9846841096878052,grad_norm: 0.9086371253703066, iteration: 413463
loss: 0.9622201919555664,grad_norm: 0.9613019905119816, iteration: 413464
loss: 1.0868809223175049,grad_norm: 0.9999990668406812, iteration: 413465
loss: 1.0444905757904053,grad_norm: 0.9269762625797829, iteration: 413466
loss: 0.9982432126998901,grad_norm: 0.8863466607829396, iteration: 413467
loss: 0.953544557094574,grad_norm: 0.7819991939025658, iteration: 413468
loss: 0.9750742316246033,grad_norm: 0.9999991645412913, iteration: 413469
loss: 1.0299701690673828,grad_norm: 0.9390699693279541, iteration: 413470
loss: 0.9712774157524109,grad_norm: 0.7630493850044806, iteration: 413471
loss: 1.071744441986084,grad_norm: 0.9716238450237261, iteration: 413472
loss: 1.0100340843200684,grad_norm: 0.8486874687117206, iteration: 413473
loss: 0.9907312393188477,grad_norm: 0.9999996811650415, iteration: 413474
loss: 1.0152498483657837,grad_norm: 0.8049186562722336, iteration: 413475
loss: 1.0382708311080933,grad_norm: 0.7869169110842503, iteration: 413476
loss: 1.0378233194351196,grad_norm: 0.7910108046183707, iteration: 413477
loss: 1.0123624801635742,grad_norm: 0.7313220523357175, iteration: 413478
loss: 1.0567606687545776,grad_norm: 0.9999999744118884, iteration: 413479
loss: 1.0096379518508911,grad_norm: 0.9203018780385963, iteration: 413480
loss: 1.0601379871368408,grad_norm: 0.999999129666284, iteration: 413481
loss: 0.9769588112831116,grad_norm: 0.8119471798407951, iteration: 413482
loss: 1.030497670173645,grad_norm: 0.7847272315312988, iteration: 413483
loss: 1.0421974658966064,grad_norm: 0.8632922060737926, iteration: 413484
loss: 0.9957168102264404,grad_norm: 0.9561580260038365, iteration: 413485
loss: 1.023964524269104,grad_norm: 0.8145036958426983, iteration: 413486
loss: 0.9913268685340881,grad_norm: 0.7020114041274556, iteration: 413487
loss: 1.0114914178848267,grad_norm: 0.8440285493493853, iteration: 413488
loss: 0.9521576762199402,grad_norm: 0.7508313932892912, iteration: 413489
loss: 0.9822896122932434,grad_norm: 0.6320386016594806, iteration: 413490
loss: 1.020282506942749,grad_norm: 0.7572926068197003, iteration: 413491
loss: 1.1064963340759277,grad_norm: 0.836047790562885, iteration: 413492
loss: 1.0333622694015503,grad_norm: 0.99999939641869, iteration: 413493
loss: 1.0140295028686523,grad_norm: 0.8680359723737959, iteration: 413494
loss: 1.023219347000122,grad_norm: 0.9341688228819968, iteration: 413495
loss: 1.064946174621582,grad_norm: 0.698943448149722, iteration: 413496
loss: 0.9666460156440735,grad_norm: 0.8893916561754323, iteration: 413497
loss: 1.1044039726257324,grad_norm: 0.938034622296037, iteration: 413498
loss: 1.1216309070587158,grad_norm: 0.9999999999212575, iteration: 413499
loss: 1.0107617378234863,grad_norm: 0.9007243095854062, iteration: 413500
loss: 0.9991547465324402,grad_norm: 0.9999999943030283, iteration: 413501
loss: 1.025383472442627,grad_norm: 0.747384427231764, iteration: 413502
loss: 1.0022401809692383,grad_norm: 0.70400275324419, iteration: 413503
loss: 1.0845025777816772,grad_norm: 0.913712064163273, iteration: 413504
loss: 1.024915337562561,grad_norm: 0.9002636280137117, iteration: 413505
loss: 1.0147428512573242,grad_norm: 0.8705327721633888, iteration: 413506
loss: 1.0437746047973633,grad_norm: 0.9999990261952368, iteration: 413507
loss: 1.0613138675689697,grad_norm: 0.7134538860225355, iteration: 413508
loss: 0.995293915271759,grad_norm: 0.8279375029890391, iteration: 413509
loss: 1.00941801071167,grad_norm: 0.7274713777326456, iteration: 413510
loss: 0.9929790496826172,grad_norm: 0.8570343446682611, iteration: 413511
loss: 1.0095080137252808,grad_norm: 0.771530440185127, iteration: 413512
loss: 1.007811188697815,grad_norm: 0.9999997038456218, iteration: 413513
loss: 0.9851479530334473,grad_norm: 0.8663271611797705, iteration: 413514
loss: 0.9953049421310425,grad_norm: 0.7071876617729677, iteration: 413515
loss: 1.0168434381484985,grad_norm: 0.7335160548103017, iteration: 413516
loss: 1.0135873556137085,grad_norm: 0.7489386615124182, iteration: 413517
loss: 0.9468926191329956,grad_norm: 0.8398678328278248, iteration: 413518
loss: 0.9942739605903625,grad_norm: 0.8634706990401336, iteration: 413519
loss: 1.1508374214172363,grad_norm: 0.9160343195452829, iteration: 413520
loss: 1.0254993438720703,grad_norm: 0.9182293565632553, iteration: 413521
loss: 1.0374748706817627,grad_norm: 0.9999995106280047, iteration: 413522
loss: 1.0888980627059937,grad_norm: 0.999999485396997, iteration: 413523
loss: 0.9927354454994202,grad_norm: 0.7755624953278544, iteration: 413524
loss: 1.0052417516708374,grad_norm: 0.6949409840405675, iteration: 413525
loss: 1.0256465673446655,grad_norm: 0.9999994382651958, iteration: 413526
loss: 1.0499085187911987,grad_norm: 0.8017985220247039, iteration: 413527
loss: 1.0469473600387573,grad_norm: 0.8427414630018976, iteration: 413528
loss: 1.0558056831359863,grad_norm: 0.9763782609361359, iteration: 413529
loss: 0.9830083250999451,grad_norm: 0.8248794093594001, iteration: 413530
loss: 1.108460545539856,grad_norm: 0.9999997595976579, iteration: 413531
loss: 1.00269615650177,grad_norm: 0.8025091166695607, iteration: 413532
loss: 1.082221508026123,grad_norm: 0.9999999374381007, iteration: 413533
loss: 1.0181934833526611,grad_norm: 0.8114854544265622, iteration: 413534
loss: 1.0203351974487305,grad_norm: 0.9999999481204181, iteration: 413535
loss: 0.9928028583526611,grad_norm: 0.7573784642107618, iteration: 413536
loss: 1.0300158262252808,grad_norm: 0.9805963796953344, iteration: 413537
loss: 1.079613208770752,grad_norm: 0.9999991412966326, iteration: 413538
loss: 0.983507513999939,grad_norm: 0.8067442766116107, iteration: 413539
loss: 1.0125504732131958,grad_norm: 0.7348190367436835, iteration: 413540
loss: 1.0513474941253662,grad_norm: 0.7530864439260508, iteration: 413541
loss: 0.9946098327636719,grad_norm: 1.0000000145386667, iteration: 413542
loss: 0.9845283031463623,grad_norm: 0.6833682735222496, iteration: 413543
loss: 1.086185097694397,grad_norm: 0.9748707882646258, iteration: 413544
loss: 1.0233731269836426,grad_norm: 0.7869662771747701, iteration: 413545
loss: 1.0509977340698242,grad_norm: 0.9078515794235228, iteration: 413546
loss: 0.9961390495300293,grad_norm: 0.6570280343543208, iteration: 413547
loss: 1.0035619735717773,grad_norm: 0.7420624132467133, iteration: 413548
loss: 0.9759823679924011,grad_norm: 0.6868782017768301, iteration: 413549
loss: 0.9974985122680664,grad_norm: 0.9999999570108142, iteration: 413550
loss: 0.9882274270057678,grad_norm: 0.8606753897258728, iteration: 413551
loss: 1.0084925889968872,grad_norm: 0.738786897618112, iteration: 413552
loss: 1.0359877347946167,grad_norm: 0.779854020156293, iteration: 413553
loss: 1.0881516933441162,grad_norm: 0.8646744461801968, iteration: 413554
loss: 0.9972175359725952,grad_norm: 0.9999992632549486, iteration: 413555
loss: 1.066906452178955,grad_norm: 0.9999990922463835, iteration: 413556
loss: 1.0500901937484741,grad_norm: 0.7800460392202718, iteration: 413557
loss: 0.9992920756340027,grad_norm: 0.86143795397916, iteration: 413558
loss: 0.9643734693527222,grad_norm: 0.7643031416936151, iteration: 413559
loss: 0.9998142719268799,grad_norm: 0.8576502946467005, iteration: 413560
loss: 1.000403881072998,grad_norm: 0.6861130384730586, iteration: 413561
loss: 1.0004370212554932,grad_norm: 0.7476991264050515, iteration: 413562
loss: 1.0002140998840332,grad_norm: 0.7130083902840382, iteration: 413563
loss: 1.0003231763839722,grad_norm: 0.8101057800243057, iteration: 413564
loss: 0.9852587580680847,grad_norm: 0.6982623663449272, iteration: 413565
loss: 0.9910241961479187,grad_norm: 0.9999992007091862, iteration: 413566
loss: 1.011264681816101,grad_norm: 0.8428229636115512, iteration: 413567
loss: 1.010196328163147,grad_norm: 0.9999992049638561, iteration: 413568
loss: 0.9736143946647644,grad_norm: 0.8169021470617146, iteration: 413569
loss: 1.0128895044326782,grad_norm: 1.0000000057546865, iteration: 413570
loss: 0.9771770238876343,grad_norm: 0.9382993331657377, iteration: 413571
loss: 0.9826902747154236,grad_norm: 0.9902831343877702, iteration: 413572
loss: 1.04792058467865,grad_norm: 0.7917353955810551, iteration: 413573
loss: 0.9745718836784363,grad_norm: 0.7192893242683361, iteration: 413574
loss: 1.0670430660247803,grad_norm: 0.9999995361400029, iteration: 413575
loss: 0.9917768239974976,grad_norm: 0.950000563485264, iteration: 413576
loss: 0.9998477697372437,grad_norm: 0.7954099910870991, iteration: 413577
loss: 0.9828265309333801,grad_norm: 0.693626301888585, iteration: 413578
loss: 0.9972438812255859,grad_norm: 0.839202704380574, iteration: 413579
loss: 0.990031361579895,grad_norm: 0.7666780527792111, iteration: 413580
loss: 1.0381919145584106,grad_norm: 0.9999995912755064, iteration: 413581
loss: 1.019986867904663,grad_norm: 0.7296028224360025, iteration: 413582
loss: 1.0023428201675415,grad_norm: 0.9260656512209043, iteration: 413583
loss: 1.0254323482513428,grad_norm: 0.7734631861902717, iteration: 413584
loss: 0.9816258549690247,grad_norm: 0.8209848787159841, iteration: 413585
loss: 1.0454449653625488,grad_norm: 0.8559588682922369, iteration: 413586
loss: 1.043296217918396,grad_norm: 0.733265750351017, iteration: 413587
loss: 1.050980567932129,grad_norm: 0.9999990878263046, iteration: 413588
loss: 1.0334820747375488,grad_norm: 0.685500113846004, iteration: 413589
loss: 1.012596845626831,grad_norm: 0.8261822490403326, iteration: 413590
loss: 0.9956583976745605,grad_norm: 0.7198283082964844, iteration: 413591
loss: 1.0024718046188354,grad_norm: 0.6768232327671557, iteration: 413592
loss: 0.9681877493858337,grad_norm: 0.7911309685267262, iteration: 413593
loss: 1.0038336515426636,grad_norm: 0.949495649759952, iteration: 413594
loss: 0.9798088669776917,grad_norm: 0.7627311936996738, iteration: 413595
loss: 1.0221024751663208,grad_norm: 0.8517780995512624, iteration: 413596
loss: 0.992742657661438,grad_norm: 0.9999991827681232, iteration: 413597
loss: 1.0121856927871704,grad_norm: 0.9103447459408607, iteration: 413598
loss: 1.074467420578003,grad_norm: 0.9999991542444128, iteration: 413599
loss: 0.9957855939865112,grad_norm: 0.849382746189141, iteration: 413600
loss: 1.0026825666427612,grad_norm: 0.9999991553843595, iteration: 413601
loss: 1.016154170036316,grad_norm: 0.7667941739144579, iteration: 413602
loss: 0.99460369348526,grad_norm: 0.6496176259775109, iteration: 413603
loss: 0.9766493439674377,grad_norm: 0.7063986527761474, iteration: 413604
loss: 0.9831838011741638,grad_norm: 0.8417078603617463, iteration: 413605
loss: 0.9916877746582031,grad_norm: 0.9425354117908074, iteration: 413606
loss: 1.0102304220199585,grad_norm: 0.8972831779206045, iteration: 413607
loss: 1.0358617305755615,grad_norm: 0.9999993251712936, iteration: 413608
loss: 1.013590693473816,grad_norm: 0.8664166085510784, iteration: 413609
loss: 1.0090264081954956,grad_norm: 0.7421040158620547, iteration: 413610
loss: 1.0686695575714111,grad_norm: 0.9999993062574747, iteration: 413611
loss: 1.0169070959091187,grad_norm: 0.7808754607647508, iteration: 413612
loss: 1.034395456314087,grad_norm: 0.7875271442496753, iteration: 413613
loss: 1.024729609489441,grad_norm: 0.7913201639489978, iteration: 413614
loss: 0.9878959059715271,grad_norm: 0.8595800596256421, iteration: 413615
loss: 1.007707953453064,grad_norm: 0.9999995654935132, iteration: 413616
loss: 0.9953444600105286,grad_norm: 0.7123305850413508, iteration: 413617
loss: 1.0121406316757202,grad_norm: 0.8733486220516263, iteration: 413618
loss: 1.0263707637786865,grad_norm: 0.9999998656478726, iteration: 413619
loss: 1.012488842010498,grad_norm: 0.9011161221462802, iteration: 413620
loss: 0.9991563558578491,grad_norm: 0.7635514149447483, iteration: 413621
loss: 1.0133291482925415,grad_norm: 0.8293687282748404, iteration: 413622
loss: 1.0067452192306519,grad_norm: 0.9999994556984343, iteration: 413623
loss: 0.9932369589805603,grad_norm: 0.8118851302019423, iteration: 413624
loss: 0.9986171722412109,grad_norm: 0.7105920784662982, iteration: 413625
loss: 1.014481782913208,grad_norm: 0.9999996628878585, iteration: 413626
loss: 0.9989421367645264,grad_norm: 0.7288480989004139, iteration: 413627
loss: 1.0257916450500488,grad_norm: 0.7399458586499689, iteration: 413628
loss: 0.9906378984451294,grad_norm: 0.789711341699337, iteration: 413629
loss: 0.9862730503082275,grad_norm: 0.7593130034418369, iteration: 413630
loss: 1.0277862548828125,grad_norm: 0.7184550702910651, iteration: 413631
loss: 0.98807692527771,grad_norm: 0.8105334555377695, iteration: 413632
loss: 1.0021722316741943,grad_norm: 0.9132765335756399, iteration: 413633
loss: 1.0319628715515137,grad_norm: 0.6850534981155815, iteration: 413634
loss: 1.087051272392273,grad_norm: 0.9999990741623777, iteration: 413635
loss: 1.0157339572906494,grad_norm: 0.9692327127825892, iteration: 413636
loss: 1.0485831499099731,grad_norm: 0.9999993607131631, iteration: 413637
loss: 0.9941359758377075,grad_norm: 0.8123688451998471, iteration: 413638
loss: 1.017423152923584,grad_norm: 0.6649246214604823, iteration: 413639
loss: 1.006327748298645,grad_norm: 0.800615636030011, iteration: 413640
loss: 1.082730770111084,grad_norm: 0.7527472291691636, iteration: 413641
loss: 1.0254930257797241,grad_norm: 0.8967069483965958, iteration: 413642
loss: 0.9860442876815796,grad_norm: 0.7077048170258811, iteration: 413643
loss: 1.0558210611343384,grad_norm: 0.9614403442366767, iteration: 413644
loss: 1.0400468111038208,grad_norm: 0.9999998221184165, iteration: 413645
loss: 1.0091570615768433,grad_norm: 0.9999993751697541, iteration: 413646
loss: 1.0318098068237305,grad_norm: 0.9140974194600573, iteration: 413647
loss: 0.994502067565918,grad_norm: 0.9999995503595136, iteration: 413648
loss: 1.0645734071731567,grad_norm: 0.9999997922379528, iteration: 413649
loss: 0.9868580102920532,grad_norm: 0.9819573026277413, iteration: 413650
loss: 0.9935207962989807,grad_norm: 0.7252467285700329, iteration: 413651
loss: 0.9870210289955139,grad_norm: 0.8427932691881371, iteration: 413652
loss: 1.0054277181625366,grad_norm: 0.7607773034442475, iteration: 413653
loss: 1.089902639389038,grad_norm: 0.9999999863491901, iteration: 413654
loss: 1.3847171068191528,grad_norm: 0.9999998260957217, iteration: 413655
loss: 1.1000356674194336,grad_norm: 0.9999991349031028, iteration: 413656
loss: 1.1104755401611328,grad_norm: 0.9999995268305748, iteration: 413657
loss: 0.9978712201118469,grad_norm: 0.9719153326187338, iteration: 413658
loss: 1.031447410583496,grad_norm: 0.9999990842136975, iteration: 413659
loss: 0.9813690781593323,grad_norm: 0.7812928944091831, iteration: 413660
loss: 0.9959598183631897,grad_norm: 0.8075007724797366, iteration: 413661
loss: 1.023202657699585,grad_norm: 0.8396226095124145, iteration: 413662
loss: 0.9812013506889343,grad_norm: 0.7748366918493528, iteration: 413663
loss: 1.0260995626449585,grad_norm: 0.9999989716143824, iteration: 413664
loss: 1.0575027465820312,grad_norm: 0.809527453052185, iteration: 413665
loss: 1.045172095298767,grad_norm: 0.9999993305336534, iteration: 413666
loss: 0.967030942440033,grad_norm: 0.6431367334135014, iteration: 413667
loss: 1.0264164209365845,grad_norm: 0.9546020006183858, iteration: 413668
loss: 1.0283708572387695,grad_norm: 0.8679436713739378, iteration: 413669
loss: 0.9695737957954407,grad_norm: 0.8144178552711119, iteration: 413670
loss: 1.0218884944915771,grad_norm: 0.9999993117662428, iteration: 413671
loss: 0.9523308277130127,grad_norm: 0.7165427670882843, iteration: 413672
loss: 1.0938363075256348,grad_norm: 0.9999990833772568, iteration: 413673
loss: 1.0435270071029663,grad_norm: 0.8070881758339974, iteration: 413674
loss: 1.0508618354797363,grad_norm: 0.7521520863386508, iteration: 413675
loss: 1.0100780725479126,grad_norm: 0.9999999872811576, iteration: 413676
loss: 0.9577299952507019,grad_norm: 0.7086147767319271, iteration: 413677
loss: 0.9859027862548828,grad_norm: 0.8418656070137028, iteration: 413678
loss: 0.9708860516548157,grad_norm: 0.7066238408763791, iteration: 413679
loss: 1.0068585872650146,grad_norm: 0.8108610401382813, iteration: 413680
loss: 0.9904540777206421,grad_norm: 0.9362340961937835, iteration: 413681
loss: 0.9674919247627258,grad_norm: 0.7913570391376022, iteration: 413682
loss: 1.0646531581878662,grad_norm: 0.9999991131727247, iteration: 413683
loss: 0.9726472496986389,grad_norm: 0.8135647516477712, iteration: 413684
loss: 1.004799246788025,grad_norm: 0.7654090148350868, iteration: 413685
loss: 0.9827789664268494,grad_norm: 0.7908262863806492, iteration: 413686
loss: 1.0672473907470703,grad_norm: 0.9065348329024566, iteration: 413687
loss: 0.9864280223846436,grad_norm: 0.7565703786718995, iteration: 413688
loss: 1.1730940341949463,grad_norm: 0.999999826300736, iteration: 413689
loss: 1.0086992979049683,grad_norm: 0.8085352755409966, iteration: 413690
loss: 0.9954326152801514,grad_norm: 0.8417028784330276, iteration: 413691
loss: 1.0131126642227173,grad_norm: 0.9999990733998363, iteration: 413692
loss: 1.1055316925048828,grad_norm: 0.9999994290611948, iteration: 413693
loss: 0.9535267353057861,grad_norm: 0.8418362675664081, iteration: 413694
loss: 0.9691152572631836,grad_norm: 0.7276026960191826, iteration: 413695
loss: 1.014886736869812,grad_norm: 0.8161909869481326, iteration: 413696
loss: 0.977023720741272,grad_norm: 0.7437897625699984, iteration: 413697
loss: 0.9922857284545898,grad_norm: 0.709815419027914, iteration: 413698
loss: 0.9953097105026245,grad_norm: 0.8469760514952559, iteration: 413699
loss: 0.9761720895767212,grad_norm: 0.8304234012281664, iteration: 413700
loss: 0.9509467482566833,grad_norm: 0.7448648540933003, iteration: 413701
loss: 1.0386080741882324,grad_norm: 0.862833825835072, iteration: 413702
loss: 0.9942552447319031,grad_norm: 0.8565658054410442, iteration: 413703
loss: 0.9894374012947083,grad_norm: 0.8770111173916874, iteration: 413704
loss: 0.9859791994094849,grad_norm: 0.8656229036881791, iteration: 413705
loss: 1.0256906747817993,grad_norm: 0.9999991237824388, iteration: 413706
loss: 0.9770869612693787,grad_norm: 0.633034689917219, iteration: 413707
loss: 1.0148859024047852,grad_norm: 0.6644615167820869, iteration: 413708
loss: 1.1080257892608643,grad_norm: 0.9999990723562469, iteration: 413709
loss: 0.9535843729972839,grad_norm: 0.8456198080579979, iteration: 413710
loss: 1.0461864471435547,grad_norm: 0.9999992830304724, iteration: 413711
loss: 0.9836864471435547,grad_norm: 0.826613831373144, iteration: 413712
loss: 1.0100502967834473,grad_norm: 0.9419168605980689, iteration: 413713
loss: 1.0857056379318237,grad_norm: 0.9999996524378554, iteration: 413714
loss: 1.000145673751831,grad_norm: 0.671594971244381, iteration: 413715
loss: 0.9983603358268738,grad_norm: 0.9999994730993715, iteration: 413716
loss: 1.093522548675537,grad_norm: 0.8743623472635738, iteration: 413717
loss: 1.1900631189346313,grad_norm: 0.9999997206595328, iteration: 413718
loss: 1.0823901891708374,grad_norm: 0.9999998882568291, iteration: 413719
loss: 0.9933686852455139,grad_norm: 0.6897039846413189, iteration: 413720
loss: 1.1755850315093994,grad_norm: 0.794146688303494, iteration: 413721
loss: 1.0499699115753174,grad_norm: 0.999999149096549, iteration: 413722
loss: 0.9647080898284912,grad_norm: 0.6059724948345808, iteration: 413723
loss: 0.9691623449325562,grad_norm: 0.75884207306635, iteration: 413724
loss: 1.011091709136963,grad_norm: 0.7834149584733804, iteration: 413725
loss: 1.0045171976089478,grad_norm: 0.9999994666163782, iteration: 413726
loss: 1.00771963596344,grad_norm: 0.7475276192079329, iteration: 413727
loss: 0.9710177183151245,grad_norm: 0.7737017761342408, iteration: 413728
loss: 0.9611072540283203,grad_norm: 0.77898261767467, iteration: 413729
loss: 1.0264310836791992,grad_norm: 0.7884640155278838, iteration: 413730
loss: 1.0374590158462524,grad_norm: 0.8765470246413795, iteration: 413731
loss: 0.987094521522522,grad_norm: 0.739733124207153, iteration: 413732
loss: 1.0677952766418457,grad_norm: 0.9999992247869836, iteration: 413733
loss: 0.9746425151824951,grad_norm: 0.7002311809534179, iteration: 413734
loss: 0.9639370441436768,grad_norm: 0.7542412335288824, iteration: 413735
loss: 1.0788848400115967,grad_norm: 0.9999998892158969, iteration: 413736
loss: 1.1443896293640137,grad_norm: 0.9999992744487325, iteration: 413737
loss: 1.0215225219726562,grad_norm: 0.8854400413815362, iteration: 413738
loss: 0.9947212338447571,grad_norm: 0.7577467840319964, iteration: 413739
loss: 0.9803591370582581,grad_norm: 0.9999992653740283, iteration: 413740
loss: 1.0413812398910522,grad_norm: 0.9999998143831296, iteration: 413741
loss: 0.9984604716300964,grad_norm: 0.9481052666595277, iteration: 413742
loss: 0.9724767208099365,grad_norm: 0.7976368467798339, iteration: 413743
loss: 1.1322813034057617,grad_norm: 0.8613529105314225, iteration: 413744
loss: 0.9810290336608887,grad_norm: 0.7432726510032498, iteration: 413745
loss: 1.0420843362808228,grad_norm: 0.845606208051706, iteration: 413746
loss: 0.9989656805992126,grad_norm: 0.7100786030031214, iteration: 413747
loss: 0.9702006578445435,grad_norm: 0.8391676467331961, iteration: 413748
loss: 1.0141983032226562,grad_norm: 0.8164861830146973, iteration: 413749
loss: 1.0416266918182373,grad_norm: 0.9999996574737215, iteration: 413750
loss: 0.9963055849075317,grad_norm: 0.8707097451436705, iteration: 413751
loss: 1.0270171165466309,grad_norm: 0.9232958054712596, iteration: 413752
loss: 0.9913789629936218,grad_norm: 0.8084221787461947, iteration: 413753
loss: 1.1546566486358643,grad_norm: 0.9999992763697176, iteration: 413754
loss: 1.0712898969650269,grad_norm: 0.8157404918822571, iteration: 413755
loss: 1.0046687126159668,grad_norm: 0.7837254212445712, iteration: 413756
loss: 0.9721223711967468,grad_norm: 0.831169049839174, iteration: 413757
loss: 1.0860024690628052,grad_norm: 0.7939050793395072, iteration: 413758
loss: 0.9794636964797974,grad_norm: 0.7570819110345012, iteration: 413759
loss: 0.9470500349998474,grad_norm: 0.900266744107139, iteration: 413760
loss: 1.0244088172912598,grad_norm: 0.6483358916775399, iteration: 413761
loss: 1.0085182189941406,grad_norm: 0.9999992216878966, iteration: 413762
loss: 0.9884219765663147,grad_norm: 0.691432523560285, iteration: 413763
loss: 0.9530356526374817,grad_norm: 0.7544828104062355, iteration: 413764
loss: 0.963772177696228,grad_norm: 0.9999990525416752, iteration: 413765
loss: 1.0311254262924194,grad_norm: 0.7838902464118331, iteration: 413766
loss: 1.004286527633667,grad_norm: 0.8431612240264432, iteration: 413767
loss: 0.9797700643539429,grad_norm: 0.7300353398867075, iteration: 413768
loss: 0.945992648601532,grad_norm: 0.7358276348757697, iteration: 413769
loss: 0.9855524301528931,grad_norm: 0.649303753632723, iteration: 413770
loss: 1.0647387504577637,grad_norm: 0.9190143776864169, iteration: 413771
loss: 1.0187113285064697,grad_norm: 0.7434994821496785, iteration: 413772
loss: 1.0957608222961426,grad_norm: 0.9999992980125438, iteration: 413773
loss: 0.9885892271995544,grad_norm: 0.8491636133206246, iteration: 413774
loss: 0.9894855618476868,grad_norm: 0.8342429179101972, iteration: 413775
loss: 1.2498000860214233,grad_norm: 1.00000000862225, iteration: 413776
loss: 0.9845393896102905,grad_norm: 0.8515088342353903, iteration: 413777
loss: 1.0565084218978882,grad_norm: 0.983940749208499, iteration: 413778
loss: 0.9368652105331421,grad_norm: 0.9999997073304362, iteration: 413779
loss: 1.0158419609069824,grad_norm: 0.999999056257576, iteration: 413780
loss: 1.0009263753890991,grad_norm: 0.7133079104032042, iteration: 413781
loss: 0.994441568851471,grad_norm: 0.8580296379542338, iteration: 413782
loss: 1.2011020183563232,grad_norm: 0.9999997243740145, iteration: 413783
loss: 1.0303221940994263,grad_norm: 0.9338690810489781, iteration: 413784
loss: 1.0402461290359497,grad_norm: 0.7534929539294293, iteration: 413785
loss: 1.0235689878463745,grad_norm: 0.7179884650161134, iteration: 413786
loss: 1.0047054290771484,grad_norm: 0.783195928333948, iteration: 413787
loss: 1.016237497329712,grad_norm: 0.7268214853414778, iteration: 413788
loss: 0.9765332937240601,grad_norm: 0.6859216891972324, iteration: 413789
loss: 0.992803156375885,grad_norm: 0.6818043274764918, iteration: 413790
loss: 1.0487791299819946,grad_norm: 0.8863209418496997, iteration: 413791
loss: 1.1192926168441772,grad_norm: 0.8230951276134065, iteration: 413792
loss: 0.9961698651313782,grad_norm: 0.9999997386944078, iteration: 413793
loss: 1.016513705253601,grad_norm: 0.6975915125218426, iteration: 413794
loss: 1.0180888175964355,grad_norm: 0.9999992358255191, iteration: 413795
loss: 1.0039153099060059,grad_norm: 0.8620337774633023, iteration: 413796
loss: 0.9952083826065063,grad_norm: 0.9206037253514157, iteration: 413797
loss: 1.0525707006454468,grad_norm: 0.9999990937569919, iteration: 413798
loss: 0.9777730703353882,grad_norm: 0.6523564094124163, iteration: 413799
loss: 1.0188767910003662,grad_norm: 0.9999997517277709, iteration: 413800
loss: 1.0224803686141968,grad_norm: 0.828176998915373, iteration: 413801
loss: 1.0561493635177612,grad_norm: 0.9999993873121218, iteration: 413802
loss: 1.0126062631607056,grad_norm: 0.9999990608126059, iteration: 413803
loss: 0.955495297908783,grad_norm: 0.9082662439692021, iteration: 413804
loss: 1.0329654216766357,grad_norm: 0.8381172234054657, iteration: 413805
loss: 0.9656619429588318,grad_norm: 0.8959445938064156, iteration: 413806
loss: 0.9996762275695801,grad_norm: 0.9973981621786228, iteration: 413807
loss: 1.055548071861267,grad_norm: 0.7593575599578342, iteration: 413808
loss: 1.047624111175537,grad_norm: 0.8915095879457131, iteration: 413809
loss: 1.002346158027649,grad_norm: 0.7507725423408578, iteration: 413810
loss: 1.0112874507904053,grad_norm: 0.999999555691871, iteration: 413811
loss: 0.9992901682853699,grad_norm: 0.9999997764219363, iteration: 413812
loss: 1.1037765741348267,grad_norm: 0.8688532814031064, iteration: 413813
loss: 1.0056778192520142,grad_norm: 0.8566810275920355, iteration: 413814
loss: 1.0225650072097778,grad_norm: 0.7783454084841092, iteration: 413815
loss: 1.01079523563385,grad_norm: 0.8323396334793585, iteration: 413816
loss: 1.0199702978134155,grad_norm: 0.9999995806491099, iteration: 413817
loss: 0.9991239309310913,grad_norm: 0.9999991093348078, iteration: 413818
loss: 0.9828647375106812,grad_norm: 0.7964417506230627, iteration: 413819
loss: 1.093697428703308,grad_norm: 0.9999996997578406, iteration: 413820
loss: 1.3250372409820557,grad_norm: 0.999999714757681, iteration: 413821
loss: 0.9904409646987915,grad_norm: 0.8618728590171968, iteration: 413822
loss: 1.0136399269104004,grad_norm: 0.8027310700237419, iteration: 413823
loss: 0.9715176820755005,grad_norm: 0.7899391697503885, iteration: 413824
loss: 0.9801517128944397,grad_norm: 0.8015234561943455, iteration: 413825
loss: 1.0523370504379272,grad_norm: 0.935469123925321, iteration: 413826
loss: 0.9793325066566467,grad_norm: 0.8921464106980661, iteration: 413827
loss: 1.0945616960525513,grad_norm: 0.9999997282819265, iteration: 413828
loss: 0.9783042073249817,grad_norm: 0.6721180126046284, iteration: 413829
loss: 0.9979689717292786,grad_norm: 0.694829559800813, iteration: 413830
loss: 1.0527808666229248,grad_norm: 0.9999998405871728, iteration: 413831
loss: 1.0208632946014404,grad_norm: 0.8479980479508966, iteration: 413832
loss: 1.0372203588485718,grad_norm: 0.9999996881903, iteration: 413833
loss: 0.9857187867164612,grad_norm: 0.9999988656133008, iteration: 413834
loss: 1.006141185760498,grad_norm: 0.7884358773054556, iteration: 413835
loss: 1.0237550735473633,grad_norm: 0.9999995529900069, iteration: 413836
loss: 1.007903814315796,grad_norm: 0.7745452449031088, iteration: 413837
loss: 1.0045582056045532,grad_norm: 0.725313747784875, iteration: 413838
loss: 1.0220205783843994,grad_norm: 0.862564500466669, iteration: 413839
loss: 1.0105481147766113,grad_norm: 0.8228349083184499, iteration: 413840
loss: 1.0280518531799316,grad_norm: 0.8686644857629129, iteration: 413841
loss: 0.9881885051727295,grad_norm: 0.7505035767237686, iteration: 413842
loss: 1.0605205297470093,grad_norm: 0.9999990457658062, iteration: 413843
loss: 0.9867240190505981,grad_norm: 0.8156102356605408, iteration: 413844
loss: 0.9660691618919373,grad_norm: 0.9999992448498021, iteration: 413845
loss: 1.0045742988586426,grad_norm: 0.7334709564034199, iteration: 413846
loss: 0.9923714995384216,grad_norm: 0.8192542740313923, iteration: 413847
loss: 1.0372997522354126,grad_norm: 0.6943216451771027, iteration: 413848
loss: 0.9875433444976807,grad_norm: 0.8560865836179484, iteration: 413849
loss: 0.9868707656860352,grad_norm: 0.7674386771663088, iteration: 413850
loss: 1.0237876176834106,grad_norm: 0.6762417404648841, iteration: 413851
loss: 0.9859402775764465,grad_norm: 0.7452313948693011, iteration: 413852
loss: 0.9860031604766846,grad_norm: 0.931209159431829, iteration: 413853
loss: 1.0275741815567017,grad_norm: 0.8524364439363806, iteration: 413854
loss: 1.0650635957717896,grad_norm: 0.9999995517750938, iteration: 413855
loss: 1.0132163763046265,grad_norm: 0.8828763696673465, iteration: 413856
loss: 1.0461069345474243,grad_norm: 0.9023857226676808, iteration: 413857
loss: 0.9864477515220642,grad_norm: 0.7929906071370235, iteration: 413858
loss: 1.0142128467559814,grad_norm: 0.7484406559104799, iteration: 413859
loss: 0.9903021454811096,grad_norm: 0.9999993659764256, iteration: 413860
loss: 0.9768186211585999,grad_norm: 0.7651053553835282, iteration: 413861
loss: 0.9904912710189819,grad_norm: 0.7187264779874146, iteration: 413862
loss: 0.9937689900398254,grad_norm: 0.9999996109534458, iteration: 413863
loss: 1.0150846242904663,grad_norm: 0.9356269589070736, iteration: 413864
loss: 0.9953190088272095,grad_norm: 0.7305218578983712, iteration: 413865
loss: 1.0496898889541626,grad_norm: 0.9999996051551504, iteration: 413866
loss: 1.0087143182754517,grad_norm: 0.9999993521624898, iteration: 413867
loss: 0.9933984875679016,grad_norm: 0.99999970678814, iteration: 413868
loss: 0.999775230884552,grad_norm: 0.7470169410006593, iteration: 413869
loss: 0.9979234337806702,grad_norm: 0.9999993493335099, iteration: 413870
loss: 0.9642567038536072,grad_norm: 0.8315535876671193, iteration: 413871
loss: 1.0156630277633667,grad_norm: 0.7947572816967431, iteration: 413872
loss: 1.0388840436935425,grad_norm: 1.0000000497446353, iteration: 413873
loss: 1.0019596815109253,grad_norm: 0.9999999176502452, iteration: 413874
loss: 0.9821946024894714,grad_norm: 0.6882012324872511, iteration: 413875
loss: 1.0851188898086548,grad_norm: 0.9999999288758893, iteration: 413876
loss: 1.028342366218567,grad_norm: 0.9999993730299849, iteration: 413877
loss: 1.023726463317871,grad_norm: 0.9999995616494604, iteration: 413878
loss: 0.9866969585418701,grad_norm: 0.8653916082304186, iteration: 413879
loss: 1.0903257131576538,grad_norm: 0.9999997182142435, iteration: 413880
loss: 1.045985460281372,grad_norm: 0.9999994905561334, iteration: 413881
loss: 0.9851435422897339,grad_norm: 0.7302710285925178, iteration: 413882
loss: 0.9554259777069092,grad_norm: 0.8717023198284637, iteration: 413883
loss: 1.0060104131698608,grad_norm: 0.6123141966170992, iteration: 413884
loss: 1.0175831317901611,grad_norm: 0.9999997730239378, iteration: 413885
loss: 0.9901140332221985,grad_norm: 0.9999998652609786, iteration: 413886
loss: 1.039964199066162,grad_norm: 0.999999297465597, iteration: 413887
loss: 0.9818143844604492,grad_norm: 0.998426504936406, iteration: 413888
loss: 1.0340791940689087,grad_norm: 0.9942478433685006, iteration: 413889
loss: 1.0188088417053223,grad_norm: 0.6929688270703646, iteration: 413890
loss: 0.9594188332557678,grad_norm: 0.8441305087891341, iteration: 413891
loss: 0.9998699426651001,grad_norm: 0.9971157595487208, iteration: 413892
loss: 1.0104385614395142,grad_norm: 0.9006720287787545, iteration: 413893
loss: 0.9596140384674072,grad_norm: 0.9999991953269327, iteration: 413894
loss: 1.0167477130889893,grad_norm: 0.7896774971928676, iteration: 413895
loss: 1.265663743019104,grad_norm: 0.9999996757094223, iteration: 413896
loss: 1.0283076763153076,grad_norm: 0.9999999480246435, iteration: 413897
loss: 1.0262820720672607,grad_norm: 0.8458664407894844, iteration: 413898
loss: 0.9733151197433472,grad_norm: 0.8330476718387743, iteration: 413899
loss: 1.018273115158081,grad_norm: 0.837613769305513, iteration: 413900
loss: 0.9906325936317444,grad_norm: 0.6919806565434127, iteration: 413901
loss: 1.002327561378479,grad_norm: 0.8949067924081036, iteration: 413902
loss: 1.0281586647033691,grad_norm: 0.9882999846136914, iteration: 413903
loss: 0.991944432258606,grad_norm: 0.9681117564037851, iteration: 413904
loss: 0.9841017723083496,grad_norm: 0.7514681223144914, iteration: 413905
loss: 1.0024334192276,grad_norm: 0.8309526818359516, iteration: 413906
loss: 0.9933788776397705,grad_norm: 0.8127030005886157, iteration: 413907
loss: 1.0614614486694336,grad_norm: 0.7031910408895706, iteration: 413908
loss: 1.0083013772964478,grad_norm: 0.8197025153717631, iteration: 413909
loss: 1.1377924680709839,grad_norm: 0.9999997455156278, iteration: 413910
loss: 0.9956178665161133,grad_norm: 0.7841555893307881, iteration: 413911
loss: 1.0173702239990234,grad_norm: 0.6749168572782074, iteration: 413912
loss: 1.0081123113632202,grad_norm: 0.7725694754258959, iteration: 413913
loss: 1.0038838386535645,grad_norm: 0.7902139760740154, iteration: 413914
loss: 0.9888266921043396,grad_norm: 0.8109924131666777, iteration: 413915
loss: 1.0235828161239624,grad_norm: 0.8030462686125603, iteration: 413916
loss: 1.0275676250457764,grad_norm: 0.7654236364872867, iteration: 413917
loss: 0.99336838722229,grad_norm: 0.9999992901004885, iteration: 413918
loss: 0.9932924509048462,grad_norm: 0.6973710418817124, iteration: 413919
loss: 0.9912192821502686,grad_norm: 0.7515626802672773, iteration: 413920
loss: 1.018541932106018,grad_norm: 0.9999992084639313, iteration: 413921
loss: 0.9927268624305725,grad_norm: 0.7893051267674911, iteration: 413922
loss: 1.1514301300048828,grad_norm: 0.9999992652616061, iteration: 413923
loss: 1.0109463930130005,grad_norm: 0.8015901701668667, iteration: 413924
loss: 1.0084253549575806,grad_norm: 0.8769173036329695, iteration: 413925
loss: 1.0208379030227661,grad_norm: 0.8137107327226241, iteration: 413926
loss: 0.9968429803848267,grad_norm: 0.9434428151572712, iteration: 413927
loss: 1.003440499305725,grad_norm: 0.7925769371249555, iteration: 413928
loss: 1.0020378828048706,grad_norm: 0.7696288200534054, iteration: 413929
loss: 1.024613380432129,grad_norm: 0.8027666396649652, iteration: 413930
loss: 1.0017142295837402,grad_norm: 0.7043377154667254, iteration: 413931
loss: 1.011817455291748,grad_norm: 0.6469343091152125, iteration: 413932
loss: 0.9813600778579712,grad_norm: 0.7434690764654566, iteration: 413933
loss: 0.9911527633666992,grad_norm: 0.9050646357618218, iteration: 413934
loss: 1.0099520683288574,grad_norm: 0.7535912144976674, iteration: 413935
loss: 1.0111756324768066,grad_norm: 0.7748830690099135, iteration: 413936
loss: 1.0235294103622437,grad_norm: 0.7004147710536873, iteration: 413937
loss: 1.0396267175674438,grad_norm: 0.9129439448543033, iteration: 413938
loss: 1.0254637002944946,grad_norm: 0.956771833591821, iteration: 413939
loss: 1.0314905643463135,grad_norm: 0.8063036743979867, iteration: 413940
loss: 1.0111294984817505,grad_norm: 0.6987987495266705, iteration: 413941
loss: 0.9911545515060425,grad_norm: 0.7367176984857522, iteration: 413942
loss: 1.022879958152771,grad_norm: 0.664100077500877, iteration: 413943
loss: 0.9915626645088196,grad_norm: 0.7404585895890929, iteration: 413944
loss: 0.9920302033424377,grad_norm: 0.9999990055180107, iteration: 413945
loss: 1.0371371507644653,grad_norm: 0.6906337902572198, iteration: 413946
loss: 0.9901285171508789,grad_norm: 0.8150346750733761, iteration: 413947
loss: 0.9928289651870728,grad_norm: 0.8041501003131967, iteration: 413948
loss: 0.9762136936187744,grad_norm: 0.9778436806143899, iteration: 413949
loss: 1.002111554145813,grad_norm: 0.7130596194844897, iteration: 413950
loss: 1.0233019590377808,grad_norm: 0.9999999179930834, iteration: 413951
loss: 1.012990117073059,grad_norm: 0.7348921150585658, iteration: 413952
loss: 1.3124769926071167,grad_norm: 0.9999991524021222, iteration: 413953
loss: 0.97472083568573,grad_norm: 0.8548156783472606, iteration: 413954
loss: 0.9853056073188782,grad_norm: 0.700831513128414, iteration: 413955
loss: 1.0491464138031006,grad_norm: 0.99999981174984, iteration: 413956
loss: 0.9906285405158997,grad_norm: 0.8402600235852928, iteration: 413957
loss: 0.9727609157562256,grad_norm: 0.862082427575048, iteration: 413958
loss: 1.0308082103729248,grad_norm: 0.8936902320234776, iteration: 413959
loss: 0.9884759783744812,grad_norm: 0.7724326574964518, iteration: 413960
loss: 1.0383440256118774,grad_norm: 0.999999189692137, iteration: 413961
loss: 1.0342251062393188,grad_norm: 0.8521188328942585, iteration: 413962
loss: 0.9999211430549622,grad_norm: 0.8240411211355899, iteration: 413963
loss: 0.9641517996788025,grad_norm: 0.7628788200770292, iteration: 413964
loss: 1.0160832405090332,grad_norm: 0.9999995303182703, iteration: 413965
loss: 1.0050796270370483,grad_norm: 0.8860439138743785, iteration: 413966
loss: 0.9703244566917419,grad_norm: 0.7466853166763986, iteration: 413967
loss: 1.0570578575134277,grad_norm: 0.8252437463474059, iteration: 413968
loss: 0.9938573241233826,grad_norm: 0.7315362772040205, iteration: 413969
loss: 1.0050257444381714,grad_norm: 0.7495680211550232, iteration: 413970
loss: 0.9519293308258057,grad_norm: 0.8007119294154945, iteration: 413971
loss: 1.0550601482391357,grad_norm: 0.7507091833218442, iteration: 413972
loss: 1.008057951927185,grad_norm: 0.9999999386906181, iteration: 413973
loss: 0.9966022968292236,grad_norm: 0.7077919769425471, iteration: 413974
loss: 1.0164363384246826,grad_norm: 0.7476698827541463, iteration: 413975
loss: 1.0222777128219604,grad_norm: 0.8559721403507424, iteration: 413976
loss: 1.036922812461853,grad_norm: 0.8021084242881736, iteration: 413977
loss: 1.0244054794311523,grad_norm: 0.7910773213470343, iteration: 413978
loss: 1.0239784717559814,grad_norm: 0.9999995552207509, iteration: 413979
loss: 1.0211427211761475,grad_norm: 0.8336253601376763, iteration: 413980
loss: 1.0110013484954834,grad_norm: 0.7143853688859433, iteration: 413981
loss: 0.9793555736541748,grad_norm: 0.7987067930624747, iteration: 413982
loss: 1.0074272155761719,grad_norm: 0.9999999698523597, iteration: 413983
loss: 0.9598673582077026,grad_norm: 0.9450920819734951, iteration: 413984
loss: 0.975905179977417,grad_norm: 0.82860594621599, iteration: 413985
loss: 0.9920133352279663,grad_norm: 0.7395376611198169, iteration: 413986
loss: 0.9765352606773376,grad_norm: 0.9999992886999572, iteration: 413987
loss: 1.0134236812591553,grad_norm: 0.7551837052363515, iteration: 413988
loss: 1.014827847480774,grad_norm: 0.9175228127329579, iteration: 413989
loss: 1.0102179050445557,grad_norm: 0.7481467567671221, iteration: 413990
loss: 0.9955470561981201,grad_norm: 0.8670960904928283, iteration: 413991
loss: 1.0166842937469482,grad_norm: 0.835539979486198, iteration: 413992
loss: 0.986414909362793,grad_norm: 0.8850306149455133, iteration: 413993
loss: 0.9845490455627441,grad_norm: 0.8346984017941746, iteration: 413994
loss: 1.0178343057632446,grad_norm: 0.7625057427663868, iteration: 413995
loss: 0.9979590773582458,grad_norm: 0.6742112771804228, iteration: 413996
loss: 1.156286358833313,grad_norm: 0.9999992121338767, iteration: 413997
loss: 0.9990171194076538,grad_norm: 0.728020817544956, iteration: 413998
loss: 1.0129716396331787,grad_norm: 0.7612318921342384, iteration: 413999
loss: 0.9871973991394043,grad_norm: 0.6875147221889488, iteration: 414000
loss: 1.0185726881027222,grad_norm: 0.6356611926021787, iteration: 414001
loss: 1.0604372024536133,grad_norm: 0.814348017899062, iteration: 414002
loss: 1.0022196769714355,grad_norm: 0.999999252560366, iteration: 414003
loss: 0.9901212453842163,grad_norm: 0.9999992216869528, iteration: 414004
loss: 1.0173429250717163,grad_norm: 0.7892362225851408, iteration: 414005
loss: 1.0105730295181274,grad_norm: 0.8200776715515543, iteration: 414006
loss: 1.052667498588562,grad_norm: 0.9999992985440199, iteration: 414007
loss: 1.045451045036316,grad_norm: 0.9201494202127989, iteration: 414008
loss: 1.0111581087112427,grad_norm: 0.9864213050841372, iteration: 414009
loss: 0.980934202671051,grad_norm: 0.7092962615193938, iteration: 414010
loss: 0.9980906844139099,grad_norm: 0.7026659188355555, iteration: 414011
loss: 1.0556919574737549,grad_norm: 0.7518978369600628, iteration: 414012
loss: 0.9884833097457886,grad_norm: 0.787170861563757, iteration: 414013
loss: 0.9927603602409363,grad_norm: 0.9005691095157962, iteration: 414014
loss: 1.0093154907226562,grad_norm: 0.7638126991080613, iteration: 414015
loss: 0.9989736080169678,grad_norm: 0.7965196299379048, iteration: 414016
loss: 1.0202964544296265,grad_norm: 0.9688798836325029, iteration: 414017
loss: 1.004756212234497,grad_norm: 0.7635157322805758, iteration: 414018
loss: 0.9785320162773132,grad_norm: 0.907486000605559, iteration: 414019
loss: 1.0540329217910767,grad_norm: 0.9999990031423306, iteration: 414020
loss: 1.0353361368179321,grad_norm: 0.9882783743429869, iteration: 414021
loss: 1.0300594568252563,grad_norm: 0.8921813528243785, iteration: 414022
loss: 1.0204694271087646,grad_norm: 0.8174798481014591, iteration: 414023
loss: 0.9845935702323914,grad_norm: 0.6201550411701415, iteration: 414024
loss: 1.0154576301574707,grad_norm: 0.7109719062361285, iteration: 414025
loss: 0.9986497163772583,grad_norm: 0.8838636863801879, iteration: 414026
loss: 1.016080379486084,grad_norm: 0.696482788131234, iteration: 414027
loss: 1.0228745937347412,grad_norm: 0.7909928532374707, iteration: 414028
loss: 0.9764893054962158,grad_norm: 0.7920838665432699, iteration: 414029
loss: 0.9669369459152222,grad_norm: 0.7656188644383686, iteration: 414030
loss: 0.9734867215156555,grad_norm: 0.7637228784955412, iteration: 414031
loss: 0.9632250666618347,grad_norm: 0.6990937452986754, iteration: 414032
loss: 0.9658622145652771,grad_norm: 0.7493434222886382, iteration: 414033
loss: 1.0088502168655396,grad_norm: 0.9645526115612786, iteration: 414034
loss: 0.9956690073013306,grad_norm: 0.7220967923810572, iteration: 414035
loss: 0.9902154207229614,grad_norm: 0.7566068641260196, iteration: 414036
loss: 1.0040339231491089,grad_norm: 0.8765907439667002, iteration: 414037
loss: 1.0619245767593384,grad_norm: 0.9999991899158461, iteration: 414038
loss: 0.9976853728294373,grad_norm: 0.65162509488751, iteration: 414039
loss: 1.0137066841125488,grad_norm: 0.8158696363478384, iteration: 414040
loss: 0.995224118232727,grad_norm: 0.8038683696244241, iteration: 414041
loss: 1.0099197626113892,grad_norm: 0.6531656382316527, iteration: 414042
loss: 0.9886162281036377,grad_norm: 0.7693974696734717, iteration: 414043
loss: 0.9481253623962402,grad_norm: 0.7608750966104842, iteration: 414044
loss: 1.016035556793213,grad_norm: 0.9999993236620339, iteration: 414045
loss: 1.0117080211639404,grad_norm: 0.7203397183847373, iteration: 414046
loss: 0.9876587986946106,grad_norm: 0.7038050149332887, iteration: 414047
loss: 1.013392448425293,grad_norm: 0.9999992453254014, iteration: 414048
loss: 0.9759644269943237,grad_norm: 0.7177464881685977, iteration: 414049
loss: 0.9821720719337463,grad_norm: 0.7987025904665235, iteration: 414050
loss: 1.0087602138519287,grad_norm: 0.8002271310828956, iteration: 414051
loss: 1.0127509832382202,grad_norm: 0.9109572948711153, iteration: 414052
loss: 1.0079480409622192,grad_norm: 0.7401281767769399, iteration: 414053
loss: 1.0068634748458862,grad_norm: 0.7701736110695503, iteration: 414054
loss: 1.0104255676269531,grad_norm: 0.8129589777265278, iteration: 414055
loss: 0.9965476989746094,grad_norm: 0.8866585904869922, iteration: 414056
loss: 1.0128029584884644,grad_norm: 0.8265505967208435, iteration: 414057
loss: 0.9822691082954407,grad_norm: 0.7620503579539757, iteration: 414058
loss: 1.0032378435134888,grad_norm: 0.7959910077942568, iteration: 414059
loss: 1.038685917854309,grad_norm: 0.734002285251677, iteration: 414060
loss: 0.9916767477989197,grad_norm: 0.9435766890990794, iteration: 414061
loss: 1.0049673318862915,grad_norm: 0.756861999294358, iteration: 414062
loss: 0.9834759831428528,grad_norm: 0.6936130239613225, iteration: 414063
loss: 0.9757424592971802,grad_norm: 0.8869676796172332, iteration: 414064
loss: 1.0172770023345947,grad_norm: 0.83212268688067, iteration: 414065
loss: 0.9948078989982605,grad_norm: 0.7667498805617273, iteration: 414066
loss: 1.0038917064666748,grad_norm: 0.6927369704120443, iteration: 414067
loss: 1.0101817846298218,grad_norm: 0.7555143070492784, iteration: 414068
loss: 1.0096312761306763,grad_norm: 0.714023609813388, iteration: 414069
loss: 0.9904524683952332,grad_norm: 0.7100792466721532, iteration: 414070
loss: 1.0358468294143677,grad_norm: 0.8531100490773458, iteration: 414071
loss: 0.9926884174346924,grad_norm: 0.9999990099731829, iteration: 414072
loss: 1.0075905323028564,grad_norm: 0.7070887551687751, iteration: 414073
loss: 0.9586424231529236,grad_norm: 0.6694347954507326, iteration: 414074
loss: 1.0098440647125244,grad_norm: 0.9400203616754782, iteration: 414075
loss: 1.0585325956344604,grad_norm: 0.9999994270410758, iteration: 414076
loss: 0.9752346277236938,grad_norm: 0.7299458598291325, iteration: 414077
loss: 0.9999865293502808,grad_norm: 0.9999990018331316, iteration: 414078
loss: 0.9981105327606201,grad_norm: 0.7167219333429106, iteration: 414079
loss: 0.9958950877189636,grad_norm: 0.7611701225263484, iteration: 414080
loss: 0.9894869327545166,grad_norm: 0.7982718189367741, iteration: 414081
loss: 1.0139939785003662,grad_norm: 0.7553646406076849, iteration: 414082
loss: 1.0415986776351929,grad_norm: 0.6537499183820926, iteration: 414083
loss: 1.022990345954895,grad_norm: 0.8504251380007767, iteration: 414084
loss: 0.9987137317657471,grad_norm: 0.7163074627603243, iteration: 414085
loss: 1.0930026769638062,grad_norm: 0.963708997241073, iteration: 414086
loss: 0.9898009300231934,grad_norm: 0.7449456331902866, iteration: 414087
loss: 1.0234061479568481,grad_norm: 0.7945209808899157, iteration: 414088
loss: 1.0195770263671875,grad_norm: 0.7707972096519543, iteration: 414089
loss: 0.9610133171081543,grad_norm: 0.8358382977221731, iteration: 414090
loss: 0.9584356546401978,grad_norm: 0.6750053138319506, iteration: 414091
loss: 1.0040478706359863,grad_norm: 0.6750215229174668, iteration: 414092
loss: 0.9702311158180237,grad_norm: 0.6854794886350186, iteration: 414093
loss: 1.0605968236923218,grad_norm: 0.9999992154895833, iteration: 414094
loss: 1.0136569738388062,grad_norm: 0.7146886717206068, iteration: 414095
loss: 1.0370169878005981,grad_norm: 0.999999193165067, iteration: 414096
loss: 1.024045467376709,grad_norm: 0.7150068983557692, iteration: 414097
loss: 1.0063267946243286,grad_norm: 0.8910802367024153, iteration: 414098
loss: 0.9824231266975403,grad_norm: 0.8096343454831288, iteration: 414099
loss: 1.0111550092697144,grad_norm: 0.8201614060787037, iteration: 414100
loss: 1.1856598854064941,grad_norm: 0.9999996646047641, iteration: 414101
loss: 0.9923651814460754,grad_norm: 0.7528468083857899, iteration: 414102
loss: 0.9572426676750183,grad_norm: 0.7799869805349622, iteration: 414103
loss: 1.0199804306030273,grad_norm: 0.7734639160183161, iteration: 414104
loss: 1.0497517585754395,grad_norm: 0.8629277192589555, iteration: 414105
loss: 1.03219473361969,grad_norm: 0.8641798572195668, iteration: 414106
loss: 1.0318530797958374,grad_norm: 0.9999990513694739, iteration: 414107
loss: 1.0159763097763062,grad_norm: 0.6560385292304858, iteration: 414108
loss: 1.0368138551712036,grad_norm: 0.8065685139540155, iteration: 414109
loss: 0.9569746851921082,grad_norm: 0.8623998789708244, iteration: 414110
loss: 1.0081093311309814,grad_norm: 0.8580390599041017, iteration: 414111
loss: 1.0020886659622192,grad_norm: 0.8459447547468737, iteration: 414112
loss: 1.0022121667861938,grad_norm: 0.7390747153710292, iteration: 414113
loss: 1.0338560342788696,grad_norm: 0.6808688912318548, iteration: 414114
loss: 1.0216748714447021,grad_norm: 0.7007532550408504, iteration: 414115
loss: 1.0091979503631592,grad_norm: 0.618542058862748, iteration: 414116
loss: 1.021622657775879,grad_norm: 0.83740471281968, iteration: 414117
loss: 0.9669432044029236,grad_norm: 0.7052695043789184, iteration: 414118
loss: 1.0415030717849731,grad_norm: 0.9999989259207703, iteration: 414119
loss: 1.0088070631027222,grad_norm: 0.8279401947283628, iteration: 414120
loss: 0.9995177984237671,grad_norm: 0.8552286438612215, iteration: 414121
loss: 1.0109472274780273,grad_norm: 0.9655370854409805, iteration: 414122
loss: 1.0806154012680054,grad_norm: 0.773620066239995, iteration: 414123
loss: 0.9655845165252686,grad_norm: 0.8541182308338467, iteration: 414124
loss: 0.9985321164131165,grad_norm: 0.9999991669179961, iteration: 414125
loss: 1.0256483554840088,grad_norm: 0.6305204688905353, iteration: 414126
loss: 0.9560129642486572,grad_norm: 0.7923289445080655, iteration: 414127
loss: 1.0274605751037598,grad_norm: 0.9999989684110154, iteration: 414128
loss: 1.0378692150115967,grad_norm: 0.8734489587752926, iteration: 414129
loss: 1.0161809921264648,grad_norm: 0.7035491787794211, iteration: 414130
loss: 1.0348525047302246,grad_norm: 0.931652809849553, iteration: 414131
loss: 1.0218651294708252,grad_norm: 0.9999990328201007, iteration: 414132
loss: 0.9753016233444214,grad_norm: 0.9234396526586868, iteration: 414133
loss: 1.0067994594573975,grad_norm: 0.9774379725284607, iteration: 414134
loss: 1.0161279439926147,grad_norm: 0.9999996995805285, iteration: 414135
loss: 1.0356740951538086,grad_norm: 0.9999995788267845, iteration: 414136
loss: 1.0466315746307373,grad_norm: 0.9999993245511027, iteration: 414137
loss: 1.026056170463562,grad_norm: 0.8173223896838208, iteration: 414138
loss: 1.0049047470092773,grad_norm: 0.7349757614844162, iteration: 414139
loss: 1.000537633895874,grad_norm: 0.7566445744473098, iteration: 414140
loss: 1.0086021423339844,grad_norm: 0.9999990433339808, iteration: 414141
loss: 1.010669469833374,grad_norm: 0.9999995058787303, iteration: 414142
loss: 0.9944168925285339,grad_norm: 0.7222666657904857, iteration: 414143
loss: 0.9892999529838562,grad_norm: 0.7924540629232035, iteration: 414144
loss: 1.01503586769104,grad_norm: 0.9134868875293165, iteration: 414145
loss: 1.0029218196868896,grad_norm: 0.8972380962507633, iteration: 414146
loss: 0.9827612042427063,grad_norm: 0.7213660318102187, iteration: 414147
loss: 1.0214848518371582,grad_norm: 0.7833092143219956, iteration: 414148
loss: 0.9863084554672241,grad_norm: 0.9374207172965641, iteration: 414149
loss: 0.9934117794036865,grad_norm: 0.7918736264069566, iteration: 414150
loss: 0.9850504398345947,grad_norm: 0.8366453327746766, iteration: 414151
loss: 0.9833528399467468,grad_norm: 0.6926509130046803, iteration: 414152
loss: 1.0278905630111694,grad_norm: 0.8163576730364797, iteration: 414153
loss: 0.9816886186599731,grad_norm: 0.8361684767146954, iteration: 414154
loss: 0.9894927740097046,grad_norm: 0.8767628496038501, iteration: 414155
loss: 1.0631661415100098,grad_norm: 0.8493940340469815, iteration: 414156
loss: 0.9744930267333984,grad_norm: 0.8454624846387984, iteration: 414157
loss: 0.9620785117149353,grad_norm: 0.9856471472170784, iteration: 414158
loss: 1.0528334379196167,grad_norm: 0.8802198176686057, iteration: 414159
loss: 1.0346248149871826,grad_norm: 0.9999998358771128, iteration: 414160
loss: 1.0991761684417725,grad_norm: 0.894530599514685, iteration: 414161
loss: 0.9812082052230835,grad_norm: 0.8488273383369219, iteration: 414162
loss: 1.0251516103744507,grad_norm: 0.8424301262727036, iteration: 414163
loss: 1.0570815801620483,grad_norm: 0.9999996542694569, iteration: 414164
loss: 1.0021997690200806,grad_norm: 0.666200501467169, iteration: 414165
loss: 1.124995231628418,grad_norm: 0.9999992598524486, iteration: 414166
loss: 0.9922774434089661,grad_norm: 0.6120331065192877, iteration: 414167
loss: 0.9868885278701782,grad_norm: 0.7886370896997021, iteration: 414168
loss: 1.0168145895004272,grad_norm: 0.7450990688539564, iteration: 414169
loss: 1.0117861032485962,grad_norm: 0.8367496202312982, iteration: 414170
loss: 1.0517325401306152,grad_norm: 0.7567558892060797, iteration: 414171
loss: 1.0004675388336182,grad_norm: 0.9999998210131605, iteration: 414172
loss: 0.9784073233604431,grad_norm: 0.9999990347349118, iteration: 414173
loss: 0.9759155511856079,grad_norm: 0.9999992447138601, iteration: 414174
loss: 0.9937533736228943,grad_norm: 0.9999990800773552, iteration: 414175
loss: 1.0158182382583618,grad_norm: 0.8191410818195596, iteration: 414176
loss: 1.020249605178833,grad_norm: 0.710775992884644, iteration: 414177
loss: 1.006714105606079,grad_norm: 0.6857062715654348, iteration: 414178
loss: 0.9984793663024902,grad_norm: 0.8180244213508145, iteration: 414179
loss: 0.9793438911437988,grad_norm: 0.7049694057313036, iteration: 414180
loss: 1.012901782989502,grad_norm: 0.9999996706557985, iteration: 414181
loss: 0.9687309265136719,grad_norm: 0.8444067314868131, iteration: 414182
loss: 1.0141820907592773,grad_norm: 0.6905442826075804, iteration: 414183
loss: 1.0173442363739014,grad_norm: 0.7818313959368708, iteration: 414184
loss: 1.0009353160858154,grad_norm: 0.8473352011753067, iteration: 414185
loss: 1.043017029762268,grad_norm: 0.9999990793070879, iteration: 414186
loss: 1.003847360610962,grad_norm: 0.8403629020932747, iteration: 414187
loss: 1.0096567869186401,grad_norm: 0.820951702193436, iteration: 414188
loss: 0.973119854927063,grad_norm: 0.6738943473202053, iteration: 414189
loss: 0.9987849593162537,grad_norm: 0.642601867635018, iteration: 414190
loss: 1.0225461721420288,grad_norm: 0.9607584398247281, iteration: 414191
loss: 0.9994885325431824,grad_norm: 0.7633882578736186, iteration: 414192
loss: 1.057120680809021,grad_norm: 0.9528297592142586, iteration: 414193
loss: 1.0093936920166016,grad_norm: 0.7339224003361323, iteration: 414194
loss: 0.9831997156143188,grad_norm: 0.8744903644046883, iteration: 414195
loss: 0.9873898029327393,grad_norm: 0.7255576235735008, iteration: 414196
loss: 1.0148861408233643,grad_norm: 0.8521308752038105, iteration: 414197
loss: 1.0232361555099487,grad_norm: 0.9999991198767157, iteration: 414198
loss: 1.0128086805343628,grad_norm: 0.9999990678561161, iteration: 414199
loss: 1.0048850774765015,grad_norm: 0.7111102763537214, iteration: 414200
loss: 1.0220221281051636,grad_norm: 0.9999990567981661, iteration: 414201
loss: 0.999650776386261,grad_norm: 0.7771111847112628, iteration: 414202
loss: 1.0553239583969116,grad_norm: 0.8696248319562455, iteration: 414203
loss: 0.9781155586242676,grad_norm: 0.8801309054328379, iteration: 414204
loss: 1.006503939628601,grad_norm: 0.7151475382953547, iteration: 414205
loss: 0.9741302728652954,grad_norm: 0.6859054724714209, iteration: 414206
loss: 1.032978892326355,grad_norm: 0.99999994040222, iteration: 414207
loss: 1.0159307718276978,grad_norm: 0.6977665097360893, iteration: 414208
loss: 0.9802350401878357,grad_norm: 0.8512108693773598, iteration: 414209
loss: 1.0202620029449463,grad_norm: 0.8942108956936743, iteration: 414210
loss: 0.9849832653999329,grad_norm: 0.8163910045369822, iteration: 414211
loss: 0.9939805865287781,grad_norm: 0.7345539298614973, iteration: 414212
loss: 1.008101224899292,grad_norm: 0.9999989933483132, iteration: 414213
loss: 0.9840313196182251,grad_norm: 0.7347804567448235, iteration: 414214
loss: 1.0034031867980957,grad_norm: 0.7720363617358466, iteration: 414215
loss: 1.027750849723816,grad_norm: 0.7516325730344103, iteration: 414216
loss: 1.0229264497756958,grad_norm: 0.6514966252536663, iteration: 414217
loss: 1.0465307235717773,grad_norm: 0.8444477546113722, iteration: 414218
loss: 1.0159755945205688,grad_norm: 0.9999996216505094, iteration: 414219
loss: 0.9596526026725769,grad_norm: 0.8561137876441993, iteration: 414220
loss: 1.0030620098114014,grad_norm: 0.7637821242842091, iteration: 414221
loss: 1.032878041267395,grad_norm: 0.7019328313992758, iteration: 414222
loss: 0.979377806186676,grad_norm: 0.6668539424870235, iteration: 414223
loss: 0.9732586145401001,grad_norm: 0.6922171219616695, iteration: 414224
loss: 0.9967137575149536,grad_norm: 0.9822041039441237, iteration: 414225
loss: 0.9941019415855408,grad_norm: 0.8053202887377086, iteration: 414226
loss: 0.9778735041618347,grad_norm: 0.8540052403842413, iteration: 414227
loss: 1.0347954034805298,grad_norm: 0.8701265400272452, iteration: 414228
loss: 0.9616208076477051,grad_norm: 0.7198529105992942, iteration: 414229
loss: 1.0078331232070923,grad_norm: 0.6527888539437527, iteration: 414230
loss: 0.9903693795204163,grad_norm: 0.7760541429600097, iteration: 414231
loss: 1.0324552059173584,grad_norm: 0.9999991333331466, iteration: 414232
loss: 0.9940884113311768,grad_norm: 0.8745481367478061, iteration: 414233
loss: 1.011121153831482,grad_norm: 0.6242258658111093, iteration: 414234
loss: 1.0406156778335571,grad_norm: 0.7329506261588382, iteration: 414235
loss: 1.0030477046966553,grad_norm: 0.7206643321202779, iteration: 414236
loss: 1.0173454284667969,grad_norm: 0.7279323419490191, iteration: 414237
loss: 0.9770138263702393,grad_norm: 0.7241881883079171, iteration: 414238
loss: 1.0387728214263916,grad_norm: 0.6760712449890076, iteration: 414239
loss: 0.9652009606361389,grad_norm: 0.7451065838109903, iteration: 414240
loss: 0.9840817451477051,grad_norm: 0.6931162624153754, iteration: 414241
loss: 0.974381685256958,grad_norm: 0.809437613087062, iteration: 414242
loss: 0.9913718104362488,grad_norm: 0.8948176632813549, iteration: 414243
loss: 0.9994103908538818,grad_norm: 0.7642390524520389, iteration: 414244
loss: 0.9820376634597778,grad_norm: 0.7510107610343868, iteration: 414245
loss: 0.9776611328125,grad_norm: 0.887694003939593, iteration: 414246
loss: 0.9926941990852356,grad_norm: 0.7889068151176757, iteration: 414247
loss: 1.00576651096344,grad_norm: 0.9999994206973195, iteration: 414248
loss: 0.970027506351471,grad_norm: 0.6960867828062204, iteration: 414249
loss: 0.9735719561576843,grad_norm: 0.720176291035148, iteration: 414250
loss: 0.9978018999099731,grad_norm: 0.8115526429177631, iteration: 414251
loss: 1.0395562648773193,grad_norm: 0.7577730719944168, iteration: 414252
loss: 1.0244358777999878,grad_norm: 0.9999998781850811, iteration: 414253
loss: 1.0136152505874634,grad_norm: 0.8600351855156939, iteration: 414254
loss: 1.031623363494873,grad_norm: 0.999999795855608, iteration: 414255
loss: 1.0129059553146362,grad_norm: 1.000000101712978, iteration: 414256
loss: 0.9753401875495911,grad_norm: 0.8040599027093344, iteration: 414257
loss: 1.049668312072754,grad_norm: 0.7771517183294876, iteration: 414258
loss: 0.9615504741668701,grad_norm: 0.8446626770790899, iteration: 414259
loss: 1.0135016441345215,grad_norm: 0.6633966312033821, iteration: 414260
loss: 1.0338906049728394,grad_norm: 0.9999998353582612, iteration: 414261
loss: 0.9854627847671509,grad_norm: 0.7924839772589813, iteration: 414262
loss: 0.9888158440589905,grad_norm: 0.7746009951503104, iteration: 414263
loss: 0.9991378784179688,grad_norm: 0.7596543139230162, iteration: 414264
loss: 1.0142345428466797,grad_norm: 0.7889040731870633, iteration: 414265
loss: 1.0150916576385498,grad_norm: 0.7935015739121187, iteration: 414266
loss: 1.0130507946014404,grad_norm: 0.748010737566415, iteration: 414267
loss: 0.9683407545089722,grad_norm: 0.8530683350483084, iteration: 414268
loss: 1.0372028350830078,grad_norm: 0.8550693906404533, iteration: 414269
loss: 0.9723596572875977,grad_norm: 0.6731586260948319, iteration: 414270
loss: 1.0000053644180298,grad_norm: 0.8310516248806291, iteration: 414271
loss: 1.0099972486495972,grad_norm: 0.7136793679422361, iteration: 414272
loss: 0.9966229796409607,grad_norm: 0.741392763194857, iteration: 414273
loss: 0.9743831753730774,grad_norm: 0.8592105659844316, iteration: 414274
loss: 1.007601022720337,grad_norm: 0.9680817037065037, iteration: 414275
loss: 1.0223991870880127,grad_norm: 0.7678374793562991, iteration: 414276
loss: 0.9980751872062683,grad_norm: 0.8500891315227032, iteration: 414277
loss: 1.0516012907028198,grad_norm: 0.9999991317085615, iteration: 414278
loss: 1.012599229812622,grad_norm: 0.7611148437058803, iteration: 414279
loss: 0.9644880294799805,grad_norm: 0.7711659892517364, iteration: 414280
loss: 0.9981635808944702,grad_norm: 0.6190224473643786, iteration: 414281
loss: 0.9902591109275818,grad_norm: 0.9115395387406786, iteration: 414282
loss: 1.0314137935638428,grad_norm: 0.9999997558572643, iteration: 414283
loss: 0.9563917517662048,grad_norm: 0.9348525128995271, iteration: 414284
loss: 0.9903157949447632,grad_norm: 0.7379494867603531, iteration: 414285
loss: 1.0650079250335693,grad_norm: 0.6903061940746549, iteration: 414286
loss: 0.9643256664276123,grad_norm: 0.706341040783352, iteration: 414287
loss: 1.0161550045013428,grad_norm: 0.6791239302963897, iteration: 414288
loss: 1.0256274938583374,grad_norm: 0.6892898918740693, iteration: 414289
loss: 1.062699556350708,grad_norm: 0.9585671343039633, iteration: 414290
loss: 0.9926692843437195,grad_norm: 0.8213157030166715, iteration: 414291
loss: 0.9714645147323608,grad_norm: 0.9384960795876004, iteration: 414292
loss: 1.0174031257629395,grad_norm: 0.7293671892731254, iteration: 414293
loss: 1.0019254684448242,grad_norm: 0.9999995719362895, iteration: 414294
loss: 0.9907548427581787,grad_norm: 0.6716321385932936, iteration: 414295
loss: 1.0047810077667236,grad_norm: 0.5886011673968474, iteration: 414296
loss: 0.9865213632583618,grad_norm: 0.9999989964417411, iteration: 414297
loss: 1.0058361291885376,grad_norm: 0.9881398675410418, iteration: 414298
loss: 0.9713863134384155,grad_norm: 0.8689213000147176, iteration: 414299
loss: 1.0168554782867432,grad_norm: 0.7596107688663823, iteration: 414300
loss: 0.9998594522476196,grad_norm: 0.7742002772756027, iteration: 414301
loss: 0.9414323568344116,grad_norm: 0.7867871067219707, iteration: 414302
loss: 0.9978142380714417,grad_norm: 0.8946146919506497, iteration: 414303
loss: 1.032394528388977,grad_norm: 0.850148609778382, iteration: 414304
loss: 1.0163522958755493,grad_norm: 0.8685316831268111, iteration: 414305
loss: 1.0370745658874512,grad_norm: 0.925352157319787, iteration: 414306
loss: 0.9906036853790283,grad_norm: 0.8254284585558868, iteration: 414307
loss: 0.992337703704834,grad_norm: 0.9584028185878755, iteration: 414308
loss: 0.9760759472846985,grad_norm: 0.8105095669230519, iteration: 414309
loss: 1.0005099773406982,grad_norm: 0.6888171766352017, iteration: 414310
loss: 1.0897841453552246,grad_norm: 0.9782354681032608, iteration: 414311
loss: 0.9919009804725647,grad_norm: 0.825439089318553, iteration: 414312
loss: 1.01740300655365,grad_norm: 0.7138851473984443, iteration: 414313
loss: 1.0133544206619263,grad_norm: 0.9999998450000186, iteration: 414314
loss: 0.9921568036079407,grad_norm: 0.9152116399715876, iteration: 414315
loss: 1.0228204727172852,grad_norm: 0.7121270855348215, iteration: 414316
loss: 1.054358720779419,grad_norm: 0.9507384344447418, iteration: 414317
loss: 1.015907645225525,grad_norm: 0.822409681617977, iteration: 414318
loss: 1.0039446353912354,grad_norm: 0.6427001632549203, iteration: 414319
loss: 1.0480443239212036,grad_norm: 0.9481963165949264, iteration: 414320
loss: 1.0087175369262695,grad_norm: 0.7829807257463256, iteration: 414321
loss: 0.9881404638290405,grad_norm: 0.7653376652885362, iteration: 414322
loss: 1.0077601671218872,grad_norm: 0.8205715077124139, iteration: 414323
loss: 1.0060020685195923,grad_norm: 0.7808923339127516, iteration: 414324
loss: 1.016662836074829,grad_norm: 0.9999995614673562, iteration: 414325
loss: 1.0557173490524292,grad_norm: 0.9999998295492518, iteration: 414326
loss: 0.9985361695289612,grad_norm: 0.7700972566518707, iteration: 414327
loss: 1.001667857170105,grad_norm: 0.9999995529068013, iteration: 414328
loss: 0.9787901043891907,grad_norm: 0.7137180393486119, iteration: 414329
loss: 1.0310406684875488,grad_norm: 0.9246570662992194, iteration: 414330
loss: 1.004835605621338,grad_norm: 0.6059650615247385, iteration: 414331
loss: 1.0185149908065796,grad_norm: 0.8188392777949366, iteration: 414332
loss: 1.0039366483688354,grad_norm: 0.7409206882977359, iteration: 414333
loss: 1.1448326110839844,grad_norm: 0.9999991765395603, iteration: 414334
loss: 1.0359100103378296,grad_norm: 0.9999998154138868, iteration: 414335
loss: 1.0267010927200317,grad_norm: 0.7682126739658953, iteration: 414336
loss: 0.9823145270347595,grad_norm: 0.712874705814028, iteration: 414337
loss: 0.9763598442077637,grad_norm: 0.8694089746233604, iteration: 414338
loss: 1.0226545333862305,grad_norm: 0.9205267845231268, iteration: 414339
loss: 0.9868732690811157,grad_norm: 0.7540348985655169, iteration: 414340
loss: 1.0420598983764648,grad_norm: 0.9999993243995725, iteration: 414341
loss: 0.9968425631523132,grad_norm: 0.9999996842728468, iteration: 414342
loss: 1.00252366065979,grad_norm: 0.9205799987704991, iteration: 414343
loss: 0.9971486926078796,grad_norm: 0.708466838288438, iteration: 414344
loss: 1.0318013429641724,grad_norm: 0.7295213726689613, iteration: 414345
loss: 0.952798068523407,grad_norm: 0.8572197119988204, iteration: 414346
loss: 0.9887095093727112,grad_norm: 0.7869536473095753, iteration: 414347
loss: 0.9941520094871521,grad_norm: 0.6785342469476342, iteration: 414348
loss: 1.0112996101379395,grad_norm: 0.7019505252143353, iteration: 414349
loss: 0.9865862727165222,grad_norm: 0.7424190301845914, iteration: 414350
loss: 1.0119050741195679,grad_norm: 0.8029455894681083, iteration: 414351
loss: 0.985738217830658,grad_norm: 0.8170952079319753, iteration: 414352
loss: 1.0286648273468018,grad_norm: 0.8971048366101816, iteration: 414353
loss: 0.9685662984848022,grad_norm: 0.7501554660475719, iteration: 414354
loss: 0.9962238073348999,grad_norm: 0.8586763634961153, iteration: 414355
loss: 1.03621506690979,grad_norm: 0.999999428091477, iteration: 414356
loss: 1.0033758878707886,grad_norm: 0.9163301010716679, iteration: 414357
loss: 1.0162150859832764,grad_norm: 0.8550430100668102, iteration: 414358
loss: 1.0290993452072144,grad_norm: 0.9570940536189879, iteration: 414359
loss: 1.0077303647994995,grad_norm: 0.9999991777757427, iteration: 414360
loss: 0.9853591322898865,grad_norm: 0.7056764945766183, iteration: 414361
loss: 0.9676777124404907,grad_norm: 0.7882806366079447, iteration: 414362
loss: 1.0390084981918335,grad_norm: 0.7780946968308193, iteration: 414363
loss: 1.0388875007629395,grad_norm: 0.9999992038841157, iteration: 414364
loss: 1.0060746669769287,grad_norm: 0.8246698068062083, iteration: 414365
loss: 1.0198291540145874,grad_norm: 0.8391066852988626, iteration: 414366
loss: 0.9814172983169556,grad_norm: 0.9534159990877592, iteration: 414367
loss: 1.0263879299163818,grad_norm: 0.8122279425740059, iteration: 414368
loss: 1.01523756980896,grad_norm: 0.7929325121309377, iteration: 414369
loss: 1.003567099571228,grad_norm: 0.9574536422293404, iteration: 414370
loss: 0.9868786931037903,grad_norm: 0.9999996149144651, iteration: 414371
loss: 1.0353755950927734,grad_norm: 0.6874621765979662, iteration: 414372
loss: 1.0746546983718872,grad_norm: 0.9999996404618454, iteration: 414373
loss: 1.0451933145523071,grad_norm: 0.9679549030488053, iteration: 414374
loss: 0.9553737044334412,grad_norm: 0.8976852642057678, iteration: 414375
loss: 0.9696452617645264,grad_norm: 0.7758213879559309, iteration: 414376
loss: 1.0553780794143677,grad_norm: 0.869110393966534, iteration: 414377
loss: 0.9891907572746277,grad_norm: 0.9999992581947403, iteration: 414378
loss: 0.994240403175354,grad_norm: 0.6763572250358888, iteration: 414379
loss: 1.0088410377502441,grad_norm: 0.7024598610025028, iteration: 414380
loss: 1.021378993988037,grad_norm: 0.7942268863908251, iteration: 414381
loss: 0.9875298142433167,grad_norm: 0.7977612499131306, iteration: 414382
loss: 1.004127025604248,grad_norm: 0.7702057236301317, iteration: 414383
loss: 1.0226141214370728,grad_norm: 0.69262009902749, iteration: 414384
loss: 0.9975248575210571,grad_norm: 0.7772897027084296, iteration: 414385
loss: 0.999146044254303,grad_norm: 0.863793974912995, iteration: 414386
loss: 0.9897599220275879,grad_norm: 0.999999190882665, iteration: 414387
loss: 0.9748439192771912,grad_norm: 0.7900246529838, iteration: 414388
loss: 1.0094727277755737,grad_norm: 0.7500492853569438, iteration: 414389
loss: 1.0252825021743774,grad_norm: 0.781703842074075, iteration: 414390
loss: 1.0007846355438232,grad_norm: 0.858045312203958, iteration: 414391
loss: 0.9852284789085388,grad_norm: 0.9999991342417965, iteration: 414392
loss: 0.9701521396636963,grad_norm: 0.9565462300524611, iteration: 414393
loss: 1.0649124383926392,grad_norm: 0.950307029228396, iteration: 414394
loss: 0.9995792508125305,grad_norm: 0.7398477151746572, iteration: 414395
loss: 1.0061649084091187,grad_norm: 0.94903338498433, iteration: 414396
loss: 0.9886860251426697,grad_norm: 0.763653483115719, iteration: 414397
loss: 0.9793169498443604,grad_norm: 0.7931419002674386, iteration: 414398
loss: 1.000645637512207,grad_norm: 0.9999999726305578, iteration: 414399
loss: 1.0600464344024658,grad_norm: 0.8601442056928114, iteration: 414400
loss: 1.0056326389312744,grad_norm: 0.9301182166031753, iteration: 414401
loss: 1.0123660564422607,grad_norm: 0.6789691638493399, iteration: 414402
loss: 1.0051944255828857,grad_norm: 0.7573255537281378, iteration: 414403
loss: 0.990443229675293,grad_norm: 0.7430014456320355, iteration: 414404
loss: 1.03495192527771,grad_norm: 0.9350551151121116, iteration: 414405
loss: 0.9670877456665039,grad_norm: 0.8038955105714851, iteration: 414406
loss: 1.007685899734497,grad_norm: 0.5715396236022356, iteration: 414407
loss: 0.9753931164741516,grad_norm: 0.8599060775656536, iteration: 414408
loss: 1.0112261772155762,grad_norm: 0.7806055724747656, iteration: 414409
loss: 0.9868069887161255,grad_norm: 0.8324126012632523, iteration: 414410
loss: 0.9861530065536499,grad_norm: 0.9676234998028813, iteration: 414411
loss: 0.9815369248390198,grad_norm: 0.8988880613893537, iteration: 414412
loss: 1.007046103477478,grad_norm: 0.7504921564732936, iteration: 414413
loss: 1.0010461807250977,grad_norm: 0.9999991242016665, iteration: 414414
loss: 1.018964171409607,grad_norm: 0.9999996968132556, iteration: 414415
loss: 1.0171805620193481,grad_norm: 0.9999993976678219, iteration: 414416
loss: 1.09465491771698,grad_norm: 0.8784085789911872, iteration: 414417
loss: 0.9739276766777039,grad_norm: 0.8374311897679791, iteration: 414418
loss: 0.9936593770980835,grad_norm: 0.8039122214495439, iteration: 414419
loss: 1.0346375703811646,grad_norm: 0.8175252616871754, iteration: 414420
loss: 0.9998647570610046,grad_norm: 0.9999997781569104, iteration: 414421
loss: 0.9587251543998718,grad_norm: 0.7367502722808903, iteration: 414422
loss: 1.0309765338897705,grad_norm: 0.7699974267729355, iteration: 414423
loss: 1.1152894496917725,grad_norm: 0.999999454008746, iteration: 414424
loss: 1.0261738300323486,grad_norm: 0.762137932771869, iteration: 414425
loss: 1.0303672552108765,grad_norm: 0.9999998044396858, iteration: 414426
loss: 1.0514694452285767,grad_norm: 0.7740907263787064, iteration: 414427
loss: 0.9727849960327148,grad_norm: 0.7291079683405696, iteration: 414428
loss: 0.9970806241035461,grad_norm: 0.7464060008905725, iteration: 414429
loss: 0.9952724575996399,grad_norm: 0.837828126978539, iteration: 414430
loss: 1.004706621170044,grad_norm: 0.6902174589918249, iteration: 414431
loss: 0.9597057700157166,grad_norm: 0.7324133191429931, iteration: 414432
loss: 0.9974699020385742,grad_norm: 0.9881228367884239, iteration: 414433
loss: 0.9770511388778687,grad_norm: 0.5830031181404774, iteration: 414434
loss: 0.9668191075325012,grad_norm: 0.7648147533279407, iteration: 414435
loss: 0.9596799612045288,grad_norm: 0.7192481266222848, iteration: 414436
loss: 1.0187402963638306,grad_norm: 0.7729049461190599, iteration: 414437
loss: 0.9432851076126099,grad_norm: 0.9999991524098142, iteration: 414438
loss: 0.9641271233558655,grad_norm: 0.7151576983971761, iteration: 414439
loss: 1.0043668746948242,grad_norm: 0.6690019358772326, iteration: 414440
loss: 0.9937130808830261,grad_norm: 0.5551902478502215, iteration: 414441
loss: 1.0031862258911133,grad_norm: 0.8948354383268862, iteration: 414442
loss: 0.994399905204773,grad_norm: 0.8549198714476421, iteration: 414443
loss: 0.9835880994796753,grad_norm: 0.8489725194565623, iteration: 414444
loss: 0.9903537631034851,grad_norm: 0.9186478551139019, iteration: 414445
loss: 0.9512202739715576,grad_norm: 0.9067060459858963, iteration: 414446
loss: 1.0020796060562134,grad_norm: 0.8040168502523485, iteration: 414447
loss: 0.9961602091789246,grad_norm: 0.9999997239560976, iteration: 414448
loss: 1.0238133668899536,grad_norm: 0.8770130680123022, iteration: 414449
loss: 0.9680886268615723,grad_norm: 0.7333136865637774, iteration: 414450
loss: 0.9916632175445557,grad_norm: 0.8250775078875636, iteration: 414451
loss: 1.0079768896102905,grad_norm: 0.7048294481350877, iteration: 414452
loss: 0.9928731322288513,grad_norm: 0.753116746947585, iteration: 414453
loss: 0.9758725762367249,grad_norm: 0.7439059569877332, iteration: 414454
loss: 1.0022555589675903,grad_norm: 0.8496313331149051, iteration: 414455
loss: 1.0433012247085571,grad_norm: 0.7193174871879221, iteration: 414456
loss: 0.9786747694015503,grad_norm: 0.7254425871269149, iteration: 414457
loss: 0.9659708142280579,grad_norm: 0.703025202647072, iteration: 414458
loss: 1.021194338798523,grad_norm: 0.9999991581901398, iteration: 414459
loss: 1.0120046138763428,grad_norm: 0.902302571683454, iteration: 414460
loss: 0.9904745221138,grad_norm: 0.9999991257727077, iteration: 414461
loss: 1.0273702144622803,grad_norm: 0.9457989611834846, iteration: 414462
loss: 1.0000144243240356,grad_norm: 0.8502793526884379, iteration: 414463
loss: 1.0030165910720825,grad_norm: 0.7449635085585716, iteration: 414464
loss: 1.0084645748138428,grad_norm: 0.6839756548487106, iteration: 414465
loss: 0.9777215123176575,grad_norm: 0.7730054585572286, iteration: 414466
loss: 1.011812686920166,grad_norm: 0.9999993422372021, iteration: 414467
loss: 1.0342037677764893,grad_norm: 0.7981694560943897, iteration: 414468
loss: 1.02977454662323,grad_norm: 0.9999990900517683, iteration: 414469
loss: 1.0023939609527588,grad_norm: 0.8466807538172489, iteration: 414470
loss: 0.9804093837738037,grad_norm: 0.7932599782851582, iteration: 414471
loss: 0.9556390047073364,grad_norm: 0.7894272153625285, iteration: 414472
loss: 1.046931266784668,grad_norm: 0.8390786225264059, iteration: 414473
loss: 1.092431664466858,grad_norm: 0.999999151160101, iteration: 414474
loss: 1.0027388334274292,grad_norm: 0.6850146800037755, iteration: 414475
loss: 0.9916144609451294,grad_norm: 0.7756563169858507, iteration: 414476
loss: 0.9528104662895203,grad_norm: 0.7837309215542652, iteration: 414477
loss: 0.9766591191291809,grad_norm: 0.8806917436274456, iteration: 414478
loss: 1.0050387382507324,grad_norm: 0.7287034072384282, iteration: 414479
loss: 0.9832900762557983,grad_norm: 0.7771736057792253, iteration: 414480
loss: 0.984214186668396,grad_norm: 0.7391604687685102, iteration: 414481
loss: 1.0010172128677368,grad_norm: 0.8052886986835857, iteration: 414482
loss: 0.9554086923599243,grad_norm: 0.9021201115917667, iteration: 414483
loss: 1.0123965740203857,grad_norm: 0.9999991709531849, iteration: 414484
loss: 0.9848309755325317,grad_norm: 0.8881667879584524, iteration: 414485
loss: 1.0245128870010376,grad_norm: 0.7160734850184837, iteration: 414486
loss: 0.9839153289794922,grad_norm: 0.7640615181430709, iteration: 414487
loss: 0.9850310683250427,grad_norm: 0.921433771955233, iteration: 414488
loss: 1.0177290439605713,grad_norm: 0.8360255208072218, iteration: 414489
loss: 1.0051565170288086,grad_norm: 0.8585720564004724, iteration: 414490
loss: 1.0236860513687134,grad_norm: 0.7255090375352523, iteration: 414491
loss: 1.0234984159469604,grad_norm: 0.9151781566018838, iteration: 414492
loss: 0.9991252422332764,grad_norm: 0.9999993707867851, iteration: 414493
loss: 1.0026792287826538,grad_norm: 0.7089989736931595, iteration: 414494
loss: 0.9778022170066833,grad_norm: 0.8039317499011401, iteration: 414495
loss: 0.9992814660072327,grad_norm: 0.7461570575054572, iteration: 414496
loss: 0.9869171380996704,grad_norm: 0.8061887175664668, iteration: 414497
loss: 0.9909955263137817,grad_norm: 0.7457065632527835, iteration: 414498
loss: 1.0271832942962646,grad_norm: 0.7284309189020026, iteration: 414499
loss: 1.0311836004257202,grad_norm: 0.7759025945436988, iteration: 414500
loss: 1.030884027481079,grad_norm: 0.9695601590578233, iteration: 414501
loss: 0.9823848605155945,grad_norm: 0.8487760117809298, iteration: 414502
loss: 1.0490474700927734,grad_norm: 0.9695533812526198, iteration: 414503
loss: 0.9710396528244019,grad_norm: 0.898518799351899, iteration: 414504
loss: 0.9751056432723999,grad_norm: 0.8589475123233061, iteration: 414505
loss: 0.9733911752700806,grad_norm: 0.776914797600887, iteration: 414506
loss: 1.0103248357772827,grad_norm: 0.6264376564584595, iteration: 414507
loss: 1.0210119485855103,grad_norm: 0.9231140523820005, iteration: 414508
loss: 1.0390739440917969,grad_norm: 0.9999999402167978, iteration: 414509
loss: 0.9968221187591553,grad_norm: 0.9089621961703681, iteration: 414510
loss: 1.0215120315551758,grad_norm: 0.999999464385364, iteration: 414511
loss: 1.0057510137557983,grad_norm: 0.7776439722508992, iteration: 414512
loss: 1.0555237531661987,grad_norm: 0.9999991201444484, iteration: 414513
loss: 0.9901648759841919,grad_norm: 0.7252435986523639, iteration: 414514
loss: 1.034239411354065,grad_norm: 0.9999992788429524, iteration: 414515
loss: 1.034991979598999,grad_norm: 0.7888464430162462, iteration: 414516
loss: 0.9971556067466736,grad_norm: 0.8468567316974128, iteration: 414517
loss: 0.9677553176879883,grad_norm: 0.7669184625838243, iteration: 414518
loss: 1.019559383392334,grad_norm: 0.6853240258191485, iteration: 414519
loss: 0.9666594862937927,grad_norm: 0.7735254698723194, iteration: 414520
loss: 0.9744150042533875,grad_norm: 0.9999994412411406, iteration: 414521
loss: 1.0109326839447021,grad_norm: 0.9645174064181423, iteration: 414522
loss: 1.018550992012024,grad_norm: 0.9999999971218034, iteration: 414523
loss: 0.976686418056488,grad_norm: 0.7387973205709937, iteration: 414524
loss: 1.0059316158294678,grad_norm: 0.7587497821214584, iteration: 414525
loss: 1.0393593311309814,grad_norm: 0.8660279083307979, iteration: 414526
loss: 0.9891573190689087,grad_norm: 0.7288936362390844, iteration: 414527
loss: 1.0144226551055908,grad_norm: 0.8441803187581225, iteration: 414528
loss: 0.9995233416557312,grad_norm: 0.750524133658547, iteration: 414529
loss: 0.9936205744743347,grad_norm: 0.6791988584654952, iteration: 414530
loss: 1.1051671504974365,grad_norm: 0.7873412213516322, iteration: 414531
loss: 0.978736162185669,grad_norm: 0.779557019730672, iteration: 414532
loss: 1.0026222467422485,grad_norm: 0.7444405561811779, iteration: 414533
loss: 1.003804087638855,grad_norm: 0.7873073769475873, iteration: 414534
loss: 1.003188133239746,grad_norm: 0.7468673012155276, iteration: 414535
loss: 1.0147972106933594,grad_norm: 0.815385098371342, iteration: 414536
loss: 1.0028926134109497,grad_norm: 0.6654300726788561, iteration: 414537
loss: 0.9554670453071594,grad_norm: 0.7055311586945215, iteration: 414538
loss: 1.043002724647522,grad_norm: 0.9999995081418397, iteration: 414539
loss: 0.986558735370636,grad_norm: 0.840525361308804, iteration: 414540
loss: 1.0150490999221802,grad_norm: 0.7739394267687838, iteration: 414541
loss: 1.0184721946716309,grad_norm: 0.7997246917183014, iteration: 414542
loss: 1.0112015008926392,grad_norm: 0.7816735793032125, iteration: 414543
loss: 1.0282670259475708,grad_norm: 0.6989668923956619, iteration: 414544
loss: 1.0176498889923096,grad_norm: 0.7187788173098828, iteration: 414545
loss: 1.0840182304382324,grad_norm: 0.9999990573437285, iteration: 414546
loss: 1.1511746644973755,grad_norm: 0.9999996220216892, iteration: 414547
loss: 1.0285905599594116,grad_norm: 0.9288674943021263, iteration: 414548
loss: 1.043391466140747,grad_norm: 0.7910203789552732, iteration: 414549
loss: 0.9695302844047546,grad_norm: 0.8389112798360976, iteration: 414550
loss: 0.9742724299430847,grad_norm: 0.7132363853043457, iteration: 414551
loss: 0.9816890358924866,grad_norm: 0.8695756849194175, iteration: 414552
loss: 1.001163363456726,grad_norm: 0.9369497651400611, iteration: 414553
loss: 0.9952224493026733,grad_norm: 0.685103977587556, iteration: 414554
loss: 1.0341781377792358,grad_norm: 0.999999585031142, iteration: 414555
loss: 1.0198874473571777,grad_norm: 0.8274274914640684, iteration: 414556
loss: 1.0079642534255981,grad_norm: 0.7725513307812107, iteration: 414557
loss: 1.078608751296997,grad_norm: 0.9999998821094276, iteration: 414558
loss: 1.0117210149765015,grad_norm: 0.7652188130172586, iteration: 414559
loss: 0.9966595768928528,grad_norm: 0.7589922904064506, iteration: 414560
loss: 0.9930325746536255,grad_norm: 0.8169755267225642, iteration: 414561
loss: 1.020370364189148,grad_norm: 0.6883642294621113, iteration: 414562
loss: 1.0228497982025146,grad_norm: 0.9999998205131606, iteration: 414563
loss: 1.0203977823257446,grad_norm: 0.7718940813164927, iteration: 414564
loss: 1.0044951438903809,grad_norm: 0.7706292628220119, iteration: 414565
loss: 1.0282354354858398,grad_norm: 0.7114507135292593, iteration: 414566
loss: 1.00722074508667,grad_norm: 0.9999991534407863, iteration: 414567
loss: 0.9810053706169128,grad_norm: 0.8184198888118006, iteration: 414568
loss: 1.0051416158676147,grad_norm: 0.7494928797617312, iteration: 414569
loss: 0.9887309074401855,grad_norm: 0.8060004640320917, iteration: 414570
loss: 0.993377685546875,grad_norm: 0.7290608082008624, iteration: 414571
loss: 0.9704921245574951,grad_norm: 0.7380866107538935, iteration: 414572
loss: 0.9814646244049072,grad_norm: 0.8412496835013632, iteration: 414573
loss: 1.015461802482605,grad_norm: 0.6604142036742409, iteration: 414574
loss: 0.9770509600639343,grad_norm: 0.8490957582271146, iteration: 414575
loss: 1.0846824645996094,grad_norm: 1.0000000375068387, iteration: 414576
loss: 0.9909982681274414,grad_norm: 0.7780663500213595, iteration: 414577
loss: 0.9650314450263977,grad_norm: 0.7327273391235685, iteration: 414578
loss: 1.0220134258270264,grad_norm: 0.7826802228766767, iteration: 414579
loss: 0.9992915391921997,grad_norm: 0.8712162536395704, iteration: 414580
loss: 1.0145244598388672,grad_norm: 0.8558673178292628, iteration: 414581
loss: 1.0195748805999756,grad_norm: 0.8450631137794892, iteration: 414582
loss: 1.010408639907837,grad_norm: 0.7955536462797413, iteration: 414583
loss: 1.0905927419662476,grad_norm: 0.817128678836069, iteration: 414584
loss: 0.9946760535240173,grad_norm: 0.7212057741608678, iteration: 414585
loss: 1.0082108974456787,grad_norm: 0.9999995332221433, iteration: 414586
loss: 0.9860029220581055,grad_norm: 0.8689664362040403, iteration: 414587
loss: 0.9632001519203186,grad_norm: 0.8103374512732164, iteration: 414588
loss: 0.9784783124923706,grad_norm: 0.8645428550440954, iteration: 414589
loss: 1.1508761644363403,grad_norm: 0.9999993174090755, iteration: 414590
loss: 0.9953341484069824,grad_norm: 0.9650744889310543, iteration: 414591
loss: 1.042389988899231,grad_norm: 0.76149405384661, iteration: 414592
loss: 1.0390338897705078,grad_norm: 0.664516985120426, iteration: 414593
loss: 0.9499630928039551,grad_norm: 0.8305880920848618, iteration: 414594
loss: 1.0207464694976807,grad_norm: 0.9999990524887572, iteration: 414595
loss: 1.0352303981781006,grad_norm: 0.7386563387576606, iteration: 414596
loss: 0.9629424810409546,grad_norm: 0.9970366306372997, iteration: 414597
loss: 1.0129225254058838,grad_norm: 0.9999992036889491, iteration: 414598
loss: 0.9845919609069824,grad_norm: 0.9882149891076468, iteration: 414599
loss: 0.9971219897270203,grad_norm: 0.902298303190198, iteration: 414600
loss: 0.993975043296814,grad_norm: 0.999999110643482, iteration: 414601
loss: 0.9979150295257568,grad_norm: 0.8832194578793077, iteration: 414602
loss: 0.9978036880493164,grad_norm: 0.9393525692631598, iteration: 414603
loss: 0.9941748976707458,grad_norm: 0.6659609149993596, iteration: 414604
loss: 0.9933865070343018,grad_norm: 0.6420648636285581, iteration: 414605
loss: 1.0076370239257812,grad_norm: 0.876125692776127, iteration: 414606
loss: 0.9689887166023254,grad_norm: 0.999999153692228, iteration: 414607
loss: 0.970821738243103,grad_norm: 0.7703960524815859, iteration: 414608
loss: 0.9907106161117554,grad_norm: 0.662226412585948, iteration: 414609
loss: 1.003485083580017,grad_norm: 0.8518984208535225, iteration: 414610
loss: 1.1297410726547241,grad_norm: 0.9999997183286409, iteration: 414611
loss: 1.067010760307312,grad_norm: 0.9999997892960187, iteration: 414612
loss: 1.0425939559936523,grad_norm: 0.7722933279452686, iteration: 414613
loss: 1.037506103515625,grad_norm: 0.6510842215427256, iteration: 414614
loss: 1.0083106756210327,grad_norm: 0.8016615202825325, iteration: 414615
loss: 0.9581372141838074,grad_norm: 0.7766537735276259, iteration: 414616
loss: 1.2078818082809448,grad_norm: 0.9999999297070199, iteration: 414617
loss: 1.0020208358764648,grad_norm: 0.7625997860863799, iteration: 414618
loss: 0.979587972164154,grad_norm: 0.7785049538368595, iteration: 414619
loss: 0.9892351627349854,grad_norm: 0.999999740498481, iteration: 414620
loss: 0.9829848408699036,grad_norm: 0.7981368213922027, iteration: 414621
loss: 0.9915974736213684,grad_norm: 0.8433671106489196, iteration: 414622
loss: 0.9854282140731812,grad_norm: 0.7817814261034076, iteration: 414623
loss: 1.0431678295135498,grad_norm: 0.815548235756483, iteration: 414624
loss: 0.994766116142273,grad_norm: 0.9999990753711986, iteration: 414625
loss: 0.990785539150238,grad_norm: 0.7204004986371315, iteration: 414626
loss: 1.0042017698287964,grad_norm: 0.7554277821349484, iteration: 414627
loss: 1.0272657871246338,grad_norm: 0.6994140150770967, iteration: 414628
loss: 1.0099669694900513,grad_norm: 0.6469560645473268, iteration: 414629
loss: 0.9632859230041504,grad_norm: 0.9615873439614046, iteration: 414630
loss: 1.0310702323913574,grad_norm: 0.7474682473252253, iteration: 414631
loss: 0.9970459342002869,grad_norm: 0.8960065656285404, iteration: 414632
loss: 0.9716271162033081,grad_norm: 0.7901626501699509, iteration: 414633
loss: 1.0407800674438477,grad_norm: 0.7781968459806136, iteration: 414634
loss: 1.009029746055603,grad_norm: 0.8548209745456029, iteration: 414635
loss: 1.0131295919418335,grad_norm: 0.8702898364602691, iteration: 414636
loss: 1.0348740816116333,grad_norm: 0.7629116425132005, iteration: 414637
loss: 1.0006028413772583,grad_norm: 0.7615462433181195, iteration: 414638
loss: 0.9908610582351685,grad_norm: 0.8505918980818273, iteration: 414639
loss: 0.9657848477363586,grad_norm: 0.9302236555186381, iteration: 414640
loss: 1.0410114526748657,grad_norm: 0.8053731434069648, iteration: 414641
loss: 0.994351327419281,grad_norm: 0.9108614431188293, iteration: 414642
loss: 1.0038995742797852,grad_norm: 0.9203303603624664, iteration: 414643
loss: 1.0624754428863525,grad_norm: 0.6803295313582274, iteration: 414644
loss: 1.0808392763137817,grad_norm: 0.9999991538509012, iteration: 414645
loss: 1.0237849950790405,grad_norm: 0.9999991106070252, iteration: 414646
loss: 0.9895208477973938,grad_norm: 0.6801113848530089, iteration: 414647
loss: 1.0074256658554077,grad_norm: 0.76647767630918, iteration: 414648
loss: 0.9508001208305359,grad_norm: 0.6354693646605445, iteration: 414649
loss: 1.0258320569992065,grad_norm: 0.9170302946274602, iteration: 414650
loss: 1.03823721408844,grad_norm: 0.8386131062810539, iteration: 414651
loss: 0.9854717254638672,grad_norm: 0.716831657893103, iteration: 414652
loss: 0.9711759686470032,grad_norm: 0.696714222995102, iteration: 414653
loss: 0.9541750550270081,grad_norm: 0.956842603105775, iteration: 414654
loss: 0.9899253249168396,grad_norm: 0.6802823260164114, iteration: 414655
loss: 0.9489671587944031,grad_norm: 0.7325047444167471, iteration: 414656
loss: 1.0174068212509155,grad_norm: 0.9790428138390782, iteration: 414657
loss: 1.0368754863739014,grad_norm: 0.8889062827481267, iteration: 414658
loss: 0.9975380301475525,grad_norm: 0.7924520763687811, iteration: 414659
loss: 0.9698069095611572,grad_norm: 0.8435544858391928, iteration: 414660
loss: 1.0049179792404175,grad_norm: 0.78305249968346, iteration: 414661
loss: 0.974910318851471,grad_norm: 0.6329001096712988, iteration: 414662
loss: 1.0021107196807861,grad_norm: 0.7879078084636192, iteration: 414663
loss: 0.9906509518623352,grad_norm: 0.8347778198891961, iteration: 414664
loss: 1.01727294921875,grad_norm: 0.8205949326778309, iteration: 414665
loss: 0.9964569211006165,grad_norm: 0.7499243763727136, iteration: 414666
loss: 1.0562310218811035,grad_norm: 0.7876495528914199, iteration: 414667
loss: 1.0046072006225586,grad_norm: 0.7131327799845356, iteration: 414668
loss: 1.0396026372909546,grad_norm: 0.9999992618325075, iteration: 414669
loss: 1.0052648782730103,grad_norm: 0.6211844656915643, iteration: 414670
loss: 0.9689763188362122,grad_norm: 0.7371674817552475, iteration: 414671
loss: 1.001774549484253,grad_norm: 0.86331270457245, iteration: 414672
loss: 1.004571795463562,grad_norm: 0.8816843286201216, iteration: 414673
loss: 1.0172497034072876,grad_norm: 0.9999999958656697, iteration: 414674
loss: 1.0627764463424683,grad_norm: 0.8535787179392335, iteration: 414675
loss: 1.0105587244033813,grad_norm: 0.7344901629178792, iteration: 414676
loss: 0.9946189522743225,grad_norm: 0.9189203847477663, iteration: 414677
loss: 1.01870858669281,grad_norm: 0.9999991556424199, iteration: 414678
loss: 0.9811374545097351,grad_norm: 0.9999993800473114, iteration: 414679
loss: 0.9837190508842468,grad_norm: 0.7178321236719688, iteration: 414680
loss: 1.0065584182739258,grad_norm: 0.9999989472979646, iteration: 414681
loss: 0.9995816946029663,grad_norm: 0.8407475201498754, iteration: 414682
loss: 0.9577550292015076,grad_norm: 0.853288320435865, iteration: 414683
loss: 0.9967018961906433,grad_norm: 0.9918872439120928, iteration: 414684
loss: 0.9659775495529175,grad_norm: 0.7314747297615731, iteration: 414685
loss: 0.9703267812728882,grad_norm: 0.8159180423309889, iteration: 414686
loss: 0.977660059928894,grad_norm: 0.7324781951772195, iteration: 414687
loss: 1.0450421571731567,grad_norm: 0.9835434087362498, iteration: 414688
loss: 0.9934406876564026,grad_norm: 0.9999993952287564, iteration: 414689
loss: 1.0161224603652954,grad_norm: 0.6411583912774907, iteration: 414690
loss: 1.0685747861862183,grad_norm: 0.999999079098785, iteration: 414691
loss: 1.0246061086654663,grad_norm: 0.7929511793108056, iteration: 414692
loss: 1.0121914148330688,grad_norm: 0.6393889206239338, iteration: 414693
loss: 1.0034295320510864,grad_norm: 0.8933018882950141, iteration: 414694
loss: 1.0174983739852905,grad_norm: 0.6666826045885245, iteration: 414695
loss: 1.0259521007537842,grad_norm: 0.8557437603852602, iteration: 414696
loss: 1.0337145328521729,grad_norm: 0.9999990963577402, iteration: 414697
loss: 1.0232945680618286,grad_norm: 0.9999991235756605, iteration: 414698
loss: 0.9693337082862854,grad_norm: 0.7003797770546035, iteration: 414699
loss: 0.9721621870994568,grad_norm: 0.7884872522527308, iteration: 414700
loss: 1.017557144165039,grad_norm: 0.7883314840207035, iteration: 414701
loss: 1.0042150020599365,grad_norm: 0.6953414577358867, iteration: 414702
loss: 0.9445164799690247,grad_norm: 0.7615825941251033, iteration: 414703
loss: 1.0129187107086182,grad_norm: 0.7575861497588875, iteration: 414704
loss: 0.9846798777580261,grad_norm: 0.8609650517429419, iteration: 414705
loss: 1.0331404209136963,grad_norm: 0.7396100786729636, iteration: 414706
loss: 1.0036083459854126,grad_norm: 0.8203050894664693, iteration: 414707
loss: 1.0006095170974731,grad_norm: 0.9999992028477666, iteration: 414708
loss: 1.0154789686203003,grad_norm: 0.7487227747257635, iteration: 414709
loss: 1.0328317880630493,grad_norm: 0.999999627364516, iteration: 414710
loss: 1.042917013168335,grad_norm: 0.7805628056773886, iteration: 414711
loss: 1.0795973539352417,grad_norm: 0.9999990612136577, iteration: 414712
loss: 0.9721112847328186,grad_norm: 0.7119982042956556, iteration: 414713
loss: 1.0287790298461914,grad_norm: 0.9999996480430065, iteration: 414714
loss: 1.0026451349258423,grad_norm: 0.9135394792851681, iteration: 414715
loss: 0.9895886778831482,grad_norm: 0.7617160403227204, iteration: 414716
loss: 0.9966909885406494,grad_norm: 0.8628472892457176, iteration: 414717
loss: 0.9976642727851868,grad_norm: 0.6737553744456185, iteration: 414718
loss: 1.0168251991271973,grad_norm: 0.7908233604773436, iteration: 414719
loss: 0.984664261341095,grad_norm: 0.7625081748483202, iteration: 414720
loss: 1.009909749031067,grad_norm: 0.9999990799089262, iteration: 414721
loss: 0.9865341186523438,grad_norm: 0.903673260237521, iteration: 414722
loss: 1.0111931562423706,grad_norm: 0.8724359660653241, iteration: 414723
loss: 1.0210543870925903,grad_norm: 0.7804961643893548, iteration: 414724
loss: 1.010573387145996,grad_norm: 0.827226033722515, iteration: 414725
loss: 0.9963547587394714,grad_norm: 0.833022858344338, iteration: 414726
loss: 0.9735696911811829,grad_norm: 0.6826771679663123, iteration: 414727
loss: 0.9978108406066895,grad_norm: 0.6717570576903499, iteration: 414728
loss: 1.0627340078353882,grad_norm: 0.9999994615973231, iteration: 414729
loss: 1.0026509761810303,grad_norm: 0.836445259026229, iteration: 414730
loss: 1.0275431871414185,grad_norm: 0.9171468403370215, iteration: 414731
loss: 1.009114384651184,grad_norm: 0.6650526214925678, iteration: 414732
loss: 0.9846165180206299,grad_norm: 0.6554448280255014, iteration: 414733
loss: 1.0339910984039307,grad_norm: 0.6693375707274168, iteration: 414734
loss: 1.570229172706604,grad_norm: 0.999999851151812, iteration: 414735
loss: 1.0272741317749023,grad_norm: 0.8862387639299067, iteration: 414736
loss: 1.0881658792495728,grad_norm: 0.7892191378394245, iteration: 414737
loss: 0.9916141629219055,grad_norm: 0.727784957634114, iteration: 414738
loss: 0.9848054647445679,grad_norm: 0.7201433093825024, iteration: 414739
loss: 1.0174133777618408,grad_norm: 0.8045132707197694, iteration: 414740
loss: 0.9589405059814453,grad_norm: 0.6668860446682798, iteration: 414741
loss: 0.9993768334388733,grad_norm: 0.8092139790924417, iteration: 414742
loss: 0.9904307126998901,grad_norm: 0.9609076235219719, iteration: 414743
loss: 1.0506434440612793,grad_norm: 0.9443699590525655, iteration: 414744
loss: 1.0078006982803345,grad_norm: 0.855126933050626, iteration: 414745
loss: 1.0829167366027832,grad_norm: 0.9999998137904806, iteration: 414746
loss: 1.0495651960372925,grad_norm: 0.9999991316064822, iteration: 414747
loss: 1.0502431392669678,grad_norm: 0.7433714347280345, iteration: 414748
loss: 1.0707781314849854,grad_norm: 0.9999997819418618, iteration: 414749
loss: 0.9788260459899902,grad_norm: 0.8574263994880869, iteration: 414750
loss: 0.998715341091156,grad_norm: 0.83897342914263, iteration: 414751
loss: 1.0348118543624878,grad_norm: 0.9999995610545448, iteration: 414752
loss: 1.0432040691375732,grad_norm: 0.9999992605040492, iteration: 414753
loss: 0.999841034412384,grad_norm: 0.7593283658281601, iteration: 414754
loss: 1.000134825706482,grad_norm: 0.7096056790988984, iteration: 414755
loss: 1.0209236145019531,grad_norm: 0.9651063051870832, iteration: 414756
loss: 1.083752989768982,grad_norm: 0.9999990593941908, iteration: 414757
loss: 0.9874534606933594,grad_norm: 0.8910372690358955, iteration: 414758
loss: 1.0151007175445557,grad_norm: 0.8647361710713116, iteration: 414759
loss: 1.1435104608535767,grad_norm: 0.9999998780153877, iteration: 414760
loss: 1.003830075263977,grad_norm: 0.7950090153192746, iteration: 414761
loss: 0.9657955169677734,grad_norm: 0.848947192647366, iteration: 414762
loss: 0.974236249923706,grad_norm: 0.6586451783991074, iteration: 414763
loss: 1.0213255882263184,grad_norm: 0.9999991063044394, iteration: 414764
loss: 0.94523024559021,grad_norm: 0.724274913845888, iteration: 414765
loss: 0.9986972808837891,grad_norm: 0.8022530648125159, iteration: 414766
loss: 1.00969398021698,grad_norm: 0.9999990904731089, iteration: 414767
loss: 1.0370405912399292,grad_norm: 0.7657480056755345, iteration: 414768
loss: 1.0114870071411133,grad_norm: 0.7504068508967343, iteration: 414769
loss: 1.0170329809188843,grad_norm: 0.6769279062624376, iteration: 414770
loss: 1.0300081968307495,grad_norm: 0.7452465651195329, iteration: 414771
loss: 1.0044153928756714,grad_norm: 0.7711089330342974, iteration: 414772
loss: 1.012754201889038,grad_norm: 0.6847478076224566, iteration: 414773
loss: 1.0303795337677002,grad_norm: 0.8297751969992044, iteration: 414774
loss: 0.942658543586731,grad_norm: 0.839503455033966, iteration: 414775
loss: 1.0388696193695068,grad_norm: 0.7551527607384849, iteration: 414776
loss: 0.9797325730323792,grad_norm: 0.8723309547989415, iteration: 414777
loss: 0.9858700633049011,grad_norm: 0.8063659631499244, iteration: 414778
loss: 1.0693113803863525,grad_norm: 0.9917888886180501, iteration: 414779
loss: 0.9930588603019714,grad_norm: 0.9999989972186145, iteration: 414780
loss: 0.9976271986961365,grad_norm: 0.8167026017539516, iteration: 414781
loss: 1.018572211265564,grad_norm: 0.7445574050637535, iteration: 414782
loss: 0.9899533987045288,grad_norm: 0.9442881594251061, iteration: 414783
loss: 1.006544589996338,grad_norm: 0.9999992439324474, iteration: 414784
loss: 0.9825559854507446,grad_norm: 0.65344209192141, iteration: 414785
loss: 1.0340766906738281,grad_norm: 0.8586484322437381, iteration: 414786
loss: 1.0651679039001465,grad_norm: 0.9999992119035767, iteration: 414787
loss: 0.9967168569564819,grad_norm: 0.6059522371451243, iteration: 414788
loss: 1.0708322525024414,grad_norm: 0.999999065984281, iteration: 414789
loss: 1.0039111375808716,grad_norm: 0.8329596967195826, iteration: 414790
loss: 1.0409060716629028,grad_norm: 0.9782001459526071, iteration: 414791
loss: 0.9833115339279175,grad_norm: 0.6908965265224352, iteration: 414792
loss: 1.034459114074707,grad_norm: 0.9999991267638635, iteration: 414793
loss: 1.0149542093276978,grad_norm: 0.658878395502503, iteration: 414794
loss: 0.9885451197624207,grad_norm: 0.8165898013523412, iteration: 414795
loss: 0.9814416766166687,grad_norm: 0.8185841458009032, iteration: 414796
loss: 0.9740391373634338,grad_norm: 0.7969328211446466, iteration: 414797
loss: 0.9911617636680603,grad_norm: 0.694876611224636, iteration: 414798
loss: 1.0049986839294434,grad_norm: 0.812893322655529, iteration: 414799
loss: 1.0065973997116089,grad_norm: 0.900772842875174, iteration: 414800
loss: 1.0489819049835205,grad_norm: 0.8981137120045917, iteration: 414801
loss: 1.0050909519195557,grad_norm: 0.7915299681529925, iteration: 414802
loss: 0.9968264102935791,grad_norm: 0.8877016149713667, iteration: 414803
loss: 1.0163792371749878,grad_norm: 0.8394724812050163, iteration: 414804
loss: 1.0255502462387085,grad_norm: 0.8901887883294998, iteration: 414805
loss: 1.0557091236114502,grad_norm: 0.9999991190827711, iteration: 414806
loss: 1.0325627326965332,grad_norm: 0.9999999334848277, iteration: 414807
loss: 0.9779599905014038,grad_norm: 0.9116117516933848, iteration: 414808
loss: 1.0058873891830444,grad_norm: 0.9035714630856523, iteration: 414809
loss: 0.9889949560165405,grad_norm: 0.8060900796752152, iteration: 414810
loss: 0.9987544417381287,grad_norm: 0.814475220790095, iteration: 414811
loss: 1.0155482292175293,grad_norm: 0.810809458145756, iteration: 414812
loss: 0.9771102666854858,grad_norm: 0.6509386943482924, iteration: 414813
loss: 1.0105606317520142,grad_norm: 0.7554344166317777, iteration: 414814
loss: 1.0123518705368042,grad_norm: 0.6759784072318016, iteration: 414815
loss: 0.9883089065551758,grad_norm: 0.7839804630931321, iteration: 414816
loss: 1.0366160869598389,grad_norm: 0.7918055339588508, iteration: 414817
loss: 1.0396652221679688,grad_norm: 0.777397929491509, iteration: 414818
loss: 1.0050793886184692,grad_norm: 0.8175295744092095, iteration: 414819
loss: 1.0178550481796265,grad_norm: 0.6557194473686421, iteration: 414820
loss: 0.9895386099815369,grad_norm: 0.6820094201896866, iteration: 414821
loss: 1.0830836296081543,grad_norm: 0.999999250164825, iteration: 414822
loss: 0.9714141488075256,grad_norm: 0.72262815901321, iteration: 414823
loss: 1.0195232629776,grad_norm: 0.6992698193584154, iteration: 414824
loss: 0.9873566031455994,grad_norm: 0.9999997618450808, iteration: 414825
loss: 1.013282299041748,grad_norm: 0.8483051653539336, iteration: 414826
loss: 1.0132564306259155,grad_norm: 0.7046634737542495, iteration: 414827
loss: 0.9684050679206848,grad_norm: 0.8886433383783966, iteration: 414828
loss: 0.9976092576980591,grad_norm: 0.9999994631907307, iteration: 414829
loss: 1.0111137628555298,grad_norm: 0.8212912517458838, iteration: 414830
loss: 1.0027546882629395,grad_norm: 0.9999992935167679, iteration: 414831
loss: 1.0205583572387695,grad_norm: 0.7489177154450768, iteration: 414832
loss: 1.032133936882019,grad_norm: 0.8635753112211763, iteration: 414833
loss: 1.00435209274292,grad_norm: 0.836188581982348, iteration: 414834
loss: 1.006620168685913,grad_norm: 0.6979520800037736, iteration: 414835
loss: 0.9791900515556335,grad_norm: 0.7347663915247892, iteration: 414836
loss: 0.9981213808059692,grad_norm: 0.7844625409423835, iteration: 414837
loss: 1.0041404962539673,grad_norm: 0.8100249205914792, iteration: 414838
loss: 1.0940499305725098,grad_norm: 0.9999992673593968, iteration: 414839
loss: 1.0233802795410156,grad_norm: 0.7720341137159746, iteration: 414840
loss: 1.02327561378479,grad_norm: 0.7767994633680348, iteration: 414841
loss: 1.041707992553711,grad_norm: 0.7637715805953667, iteration: 414842
loss: 0.9583732485771179,grad_norm: 0.6937809938339439, iteration: 414843
loss: 0.9770811796188354,grad_norm: 0.8070143036595079, iteration: 414844
loss: 0.9909201264381409,grad_norm: 0.7995379750055526, iteration: 414845
loss: 0.9777672290802002,grad_norm: 0.8329853925938092, iteration: 414846
loss: 1.0290024280548096,grad_norm: 0.9842777014829193, iteration: 414847
loss: 1.0464229583740234,grad_norm: 0.9422789726966709, iteration: 414848
loss: 1.0176222324371338,grad_norm: 0.8691600837654494, iteration: 414849
loss: 0.9645869731903076,grad_norm: 0.8987047656708232, iteration: 414850
loss: 1.0361158847808838,grad_norm: 0.7559071775283464, iteration: 414851
loss: 1.0471282005310059,grad_norm: 0.7436694218939743, iteration: 414852
loss: 0.9845271110534668,grad_norm: 0.7584124474124918, iteration: 414853
loss: 1.0093162059783936,grad_norm: 0.6552714775887258, iteration: 414854
loss: 1.0240442752838135,grad_norm: 0.9027284269222446, iteration: 414855
loss: 0.9783390164375305,grad_norm: 0.7698432715233519, iteration: 414856
loss: 0.9718124866485596,grad_norm: 0.8836934335417316, iteration: 414857
loss: 1.0179767608642578,grad_norm: 0.6570172381284016, iteration: 414858
loss: 1.0095738172531128,grad_norm: 0.8876458278680006, iteration: 414859
loss: 0.9955267906188965,grad_norm: 0.6385963191622879, iteration: 414860
loss: 1.0065973997116089,grad_norm: 0.7868663749262446, iteration: 414861
loss: 0.9955695867538452,grad_norm: 0.9999997923467339, iteration: 414862
loss: 1.001554250717163,grad_norm: 0.7234417880337155, iteration: 414863
loss: 1.0275063514709473,grad_norm: 0.8619307117987629, iteration: 414864
loss: 1.0696656703948975,grad_norm: 0.9999999063497442, iteration: 414865
loss: 1.00718355178833,grad_norm: 0.9999990101468589, iteration: 414866
loss: 1.0074015855789185,grad_norm: 0.6308349591544103, iteration: 414867
loss: 0.9830254316329956,grad_norm: 0.861844824597949, iteration: 414868
loss: 0.9838920831680298,grad_norm: 0.8949050376892702, iteration: 414869
loss: 1.006851077079773,grad_norm: 0.9999992619971723, iteration: 414870
loss: 0.9601455330848694,grad_norm: 0.9526519170081288, iteration: 414871
loss: 1.0180988311767578,grad_norm: 0.7640951794218012, iteration: 414872
loss: 1.0233235359191895,grad_norm: 0.9999990841694925, iteration: 414873
loss: 0.9781305193901062,grad_norm: 0.8166626839087185, iteration: 414874
loss: 0.9862062931060791,grad_norm: 0.8791801623014665, iteration: 414875
loss: 1.0023695230484009,grad_norm: 0.9357191228420495, iteration: 414876
loss: 0.9835434556007385,grad_norm: 0.8240045917632918, iteration: 414877
loss: 1.0503555536270142,grad_norm: 0.8231447263580062, iteration: 414878
loss: 1.0297521352767944,grad_norm: 0.7313680343974007, iteration: 414879
loss: 0.9872241020202637,grad_norm: 0.7933868067581061, iteration: 414880
loss: 1.0029908418655396,grad_norm: 0.6327564522793288, iteration: 414881
loss: 1.0210663080215454,grad_norm: 0.7303551141852048, iteration: 414882
loss: 0.99627685546875,grad_norm: 0.783253282017734, iteration: 414883
loss: 1.0117645263671875,grad_norm: 0.6647908087105099, iteration: 414884
loss: 0.988832950592041,grad_norm: 0.773290828688404, iteration: 414885
loss: 1.0090031623840332,grad_norm: 0.8037084474347088, iteration: 414886
loss: 1.0093485116958618,grad_norm: 0.9099760311870474, iteration: 414887
loss: 0.9752987027168274,grad_norm: 0.8478894720156375, iteration: 414888
loss: 1.0090882778167725,grad_norm: 0.7988532112423962, iteration: 414889
loss: 0.9781889915466309,grad_norm: 0.808635844503825, iteration: 414890
loss: 0.9804604053497314,grad_norm: 0.6911785871532178, iteration: 414891
loss: 0.9608401656150818,grad_norm: 0.7725984256370938, iteration: 414892
loss: 0.9594546556472778,grad_norm: 0.8283441973979105, iteration: 414893
loss: 0.9879869222640991,grad_norm: 0.7794507798491781, iteration: 414894
loss: 0.9853962063789368,grad_norm: 0.7996475946565269, iteration: 414895
loss: 0.9804384708404541,grad_norm: 0.8268895094354817, iteration: 414896
loss: 1.0070186853408813,grad_norm: 0.9999991556148475, iteration: 414897
loss: 1.024814248085022,grad_norm: 0.7449951292780641, iteration: 414898
loss: 0.990405797958374,grad_norm: 0.7439037860094551, iteration: 414899
loss: 1.0042264461517334,grad_norm: 0.6836110175396868, iteration: 414900
loss: 0.9617905616760254,grad_norm: 0.7366752917103883, iteration: 414901
loss: 0.9723228216171265,grad_norm: 0.9218538564020666, iteration: 414902
loss: 1.0041154623031616,grad_norm: 0.777031020645592, iteration: 414903
loss: 0.9968267679214478,grad_norm: 0.7861435642225587, iteration: 414904
loss: 1.0077128410339355,grad_norm: 0.7540582456711187, iteration: 414905
loss: 1.0494188070297241,grad_norm: 0.7555943362853225, iteration: 414906
loss: 1.009793996810913,grad_norm: 0.819803413710106, iteration: 414907
loss: 1.0431594848632812,grad_norm: 0.7094666803751563, iteration: 414908
loss: 0.9647397398948669,grad_norm: 0.9020253192514162, iteration: 414909
loss: 1.0606119632720947,grad_norm: 0.9999990401769596, iteration: 414910
loss: 1.000388741493225,grad_norm: 0.7925754993069041, iteration: 414911
loss: 1.0319833755493164,grad_norm: 0.8147211528951575, iteration: 414912
loss: 1.002500295639038,grad_norm: 0.6863909716309636, iteration: 414913
loss: 0.9571754336357117,grad_norm: 0.8756219807751517, iteration: 414914
loss: 1.0090653896331787,grad_norm: 0.7975283192748135, iteration: 414915
loss: 1.01260244846344,grad_norm: 0.751687124599038, iteration: 414916
loss: 0.971488893032074,grad_norm: 0.9999995593684815, iteration: 414917
loss: 0.9868011474609375,grad_norm: 0.9999997670638979, iteration: 414918
loss: 1.0210750102996826,grad_norm: 0.700038072673988, iteration: 414919
loss: 1.0008689165115356,grad_norm: 0.8033142196434521, iteration: 414920
loss: 1.0072314739227295,grad_norm: 0.8069144645117539, iteration: 414921
loss: 1.0117634534835815,grad_norm: 0.5903114358122774, iteration: 414922
loss: 1.0259007215499878,grad_norm: 0.6901393295873924, iteration: 414923
loss: 1.0207089185714722,grad_norm: 0.9999993281363965, iteration: 414924
loss: 1.0345745086669922,grad_norm: 0.9482226852757482, iteration: 414925
loss: 1.0223517417907715,grad_norm: 0.6189120060688009, iteration: 414926
loss: 1.0014840364456177,grad_norm: 0.6197005637128679, iteration: 414927
loss: 0.9908778071403503,grad_norm: 0.7473870662004637, iteration: 414928
loss: 0.9984477162361145,grad_norm: 0.8659742133059509, iteration: 414929
loss: 0.9890355467796326,grad_norm: 0.8227714734455274, iteration: 414930
loss: 0.9882539510726929,grad_norm: 0.8617600052538993, iteration: 414931
loss: 1.0333116054534912,grad_norm: 0.9999991577127852, iteration: 414932
loss: 0.9972327947616577,grad_norm: 0.9637968957270351, iteration: 414933
loss: 1.033306360244751,grad_norm: 0.8838908473171259, iteration: 414934
loss: 0.9832401275634766,grad_norm: 0.8567515844794821, iteration: 414935
loss: 0.9972026944160461,grad_norm: 0.9999991960643447, iteration: 414936
loss: 0.9728726744651794,grad_norm: 0.9003213140471131, iteration: 414937
loss: 1.007086157798767,grad_norm: 0.8037156004855134, iteration: 414938
loss: 1.0113105773925781,grad_norm: 0.9208095938272574, iteration: 414939
loss: 0.9746584296226501,grad_norm: 0.8548266152418031, iteration: 414940
loss: 1.0292011499404907,grad_norm: 0.9190979893283492, iteration: 414941
loss: 1.0238786935806274,grad_norm: 0.8520333527720981, iteration: 414942
loss: 0.9953252673149109,grad_norm: 0.8512746110304292, iteration: 414943
loss: 1.0050749778747559,grad_norm: 0.7790819412349709, iteration: 414944
loss: 1.0062625408172607,grad_norm: 0.77773396679557, iteration: 414945
loss: 1.028388261795044,grad_norm: 0.9922063502470854, iteration: 414946
loss: 0.9947410821914673,grad_norm: 0.8941950994218807, iteration: 414947
loss: 1.016072154045105,grad_norm: 0.7158226939362068, iteration: 414948
loss: 0.9708870649337769,grad_norm: 0.7547718268326886, iteration: 414949
loss: 1.010124921798706,grad_norm: 0.8166659326359917, iteration: 414950
loss: 1.0199693441390991,grad_norm: 0.7126280177154375, iteration: 414951
loss: 1.0091644525527954,grad_norm: 0.7955445969071803, iteration: 414952
loss: 1.0024592876434326,grad_norm: 0.7438493421187903, iteration: 414953
loss: 1.0008432865142822,grad_norm: 0.9999998190506876, iteration: 414954
loss: 1.0084401369094849,grad_norm: 0.7894036799229025, iteration: 414955
loss: 0.9801433086395264,grad_norm: 0.8255032406893755, iteration: 414956
loss: 0.9634316563606262,grad_norm: 0.9999989479274759, iteration: 414957
loss: 0.9989475011825562,grad_norm: 0.7345790986513222, iteration: 414958
loss: 0.9733163118362427,grad_norm: 0.735535387980287, iteration: 414959
loss: 0.9727351665496826,grad_norm: 0.7958324966745929, iteration: 414960
loss: 1.012948989868164,grad_norm: 0.7247762165972907, iteration: 414961
loss: 1.0117465257644653,grad_norm: 0.8460339707493949, iteration: 414962
loss: 1.0164575576782227,grad_norm: 0.7143640320357879, iteration: 414963
loss: 0.9991902709007263,grad_norm: 0.683741138837871, iteration: 414964
loss: 0.9755852222442627,grad_norm: 0.8807579254038389, iteration: 414965
loss: 1.0267266035079956,grad_norm: 0.7621198730168728, iteration: 414966
loss: 0.9910259246826172,grad_norm: 0.8654083267672521, iteration: 414967
loss: 0.9840861558914185,grad_norm: 0.8001040208802862, iteration: 414968
loss: 1.0017902851104736,grad_norm: 0.907406860190144, iteration: 414969
loss: 1.00368332862854,grad_norm: 0.9999998764920949, iteration: 414970
loss: 1.0272517204284668,grad_norm: 0.9743805753859106, iteration: 414971
loss: 0.9702160358428955,grad_norm: 0.8024187325503527, iteration: 414972
loss: 0.990474283695221,grad_norm: 0.7986965555617115, iteration: 414973
loss: 1.078407645225525,grad_norm: 0.9556934083557903, iteration: 414974
loss: 1.0189383029937744,grad_norm: 0.840318745063582, iteration: 414975
loss: 0.9947185516357422,grad_norm: 0.6123145474720206, iteration: 414976
loss: 0.995535135269165,grad_norm: 0.7384407777164479, iteration: 414977
loss: 0.9550707340240479,grad_norm: 0.7076026298237285, iteration: 414978
loss: 1.0534210205078125,grad_norm: 0.8117856331812436, iteration: 414979
loss: 0.9576281309127808,grad_norm: 0.9201605096955358, iteration: 414980
loss: 1.0208402872085571,grad_norm: 0.9517035538350981, iteration: 414981
loss: 1.0072054862976074,grad_norm: 0.9999990422699089, iteration: 414982
loss: 1.0037645101547241,grad_norm: 0.7994411894477719, iteration: 414983
loss: 0.9779642224311829,grad_norm: 0.8194180863990282, iteration: 414984
loss: 1.0173258781433105,grad_norm: 0.7604324245642549, iteration: 414985
loss: 0.9954826235771179,grad_norm: 0.6510337480578943, iteration: 414986
loss: 1.0401548147201538,grad_norm: 0.9192887702407329, iteration: 414987
loss: 1.03003990650177,grad_norm: 0.7947693872769532, iteration: 414988
loss: 1.0033347606658936,grad_norm: 0.7828370819678128, iteration: 414989
loss: 0.9837005734443665,grad_norm: 0.8344643231066249, iteration: 414990
loss: 0.9334649443626404,grad_norm: 0.8279158111166907, iteration: 414991
loss: 0.9944867491722107,grad_norm: 0.8347713148859811, iteration: 414992
loss: 0.9844859838485718,grad_norm: 0.9999998043826039, iteration: 414993
loss: 1.027738332748413,grad_norm: 0.8714494705732188, iteration: 414994
loss: 0.9612347483634949,grad_norm: 0.8373433552001139, iteration: 414995
loss: 1.0063961744308472,grad_norm: 0.7124170917305024, iteration: 414996
loss: 0.9932202696800232,grad_norm: 0.8896397191209469, iteration: 414997
loss: 1.003318190574646,grad_norm: 0.6795488962352486, iteration: 414998
loss: 1.0056700706481934,grad_norm: 0.6203868834182789, iteration: 414999
loss: 0.9679020047187805,grad_norm: 0.7780260400626777, iteration: 415000
loss: 1.0256447792053223,grad_norm: 0.7796725941276139, iteration: 415001
loss: 0.9762709140777588,grad_norm: 0.8717715127915394, iteration: 415002
loss: 0.9839723110198975,grad_norm: 0.7366404900287523, iteration: 415003
loss: 1.0578982830047607,grad_norm: 0.5677533984223062, iteration: 415004
loss: 1.002683401107788,grad_norm: 0.6988374342844017, iteration: 415005
loss: 1.1100090742111206,grad_norm: 0.9361437808646557, iteration: 415006
loss: 1.0070700645446777,grad_norm: 0.999999082975952, iteration: 415007
loss: 1.031819462776184,grad_norm: 0.696190323063305, iteration: 415008
loss: 0.9752797484397888,grad_norm: 0.8387888076780263, iteration: 415009
loss: 0.9869334101676941,grad_norm: 0.8215643883118177, iteration: 415010
loss: 0.9905579686164856,grad_norm: 0.833938537336492, iteration: 415011
loss: 0.9689961075782776,grad_norm: 0.7220830746149494, iteration: 415012
loss: 0.9849081039428711,grad_norm: 0.9999998891240808, iteration: 415013
loss: 1.0251954793930054,grad_norm: 0.7170781735784816, iteration: 415014
loss: 1.0266122817993164,grad_norm: 0.9999995576673166, iteration: 415015
loss: 0.9682816863059998,grad_norm: 0.7122613451750831, iteration: 415016
loss: 1.0090693235397339,grad_norm: 0.8662936837949293, iteration: 415017
loss: 0.9920072555541992,grad_norm: 0.7416439309924159, iteration: 415018
loss: 1.00050950050354,grad_norm: 0.9853529999963023, iteration: 415019
loss: 0.9948652982711792,grad_norm: 0.9371425530202594, iteration: 415020
loss: 0.9756718873977661,grad_norm: 0.9669217660132646, iteration: 415021
loss: 1.0151463747024536,grad_norm: 0.690440088479194, iteration: 415022
loss: 1.0071921348571777,grad_norm: 0.7199203135998876, iteration: 415023
loss: 0.9727590084075928,grad_norm: 0.74590244429707, iteration: 415024
loss: 1.0727663040161133,grad_norm: 0.9999993223011499, iteration: 415025
loss: 0.9842655658721924,grad_norm: 0.8089466668055194, iteration: 415026
loss: 0.9944963455200195,grad_norm: 0.7664044382998058, iteration: 415027
loss: 1.0938831567764282,grad_norm: 0.9999991745502818, iteration: 415028
loss: 0.9721193313598633,grad_norm: 0.7235786513355259, iteration: 415029
loss: 1.014382243156433,grad_norm: 0.849199239731557, iteration: 415030
loss: 0.9851047396659851,grad_norm: 0.8559177724248053, iteration: 415031
loss: 0.9800865650177002,grad_norm: 0.7963250921008007, iteration: 415032
loss: 0.9993632435798645,grad_norm: 0.999999444228682, iteration: 415033
loss: 0.9595350623130798,grad_norm: 0.6885834529517696, iteration: 415034
loss: 1.0026963949203491,grad_norm: 0.8011103253322682, iteration: 415035
loss: 0.9557283520698547,grad_norm: 0.8431456144381678, iteration: 415036
loss: 1.0664615631103516,grad_norm: 0.9999999740681033, iteration: 415037
loss: 0.994438648223877,grad_norm: 0.8564889597248376, iteration: 415038
loss: 1.0068565607070923,grad_norm: 0.7616657327213058, iteration: 415039
loss: 0.9857460260391235,grad_norm: 0.8837046944718959, iteration: 415040
loss: 0.9891828894615173,grad_norm: 0.6731188414802394, iteration: 415041
loss: 1.0013011693954468,grad_norm: 0.878151091165902, iteration: 415042
loss: 0.9951119422912598,grad_norm: 0.9999993012338259, iteration: 415043
loss: 0.9785616993904114,grad_norm: 0.7182836276640214, iteration: 415044
loss: 0.9939678907394409,grad_norm: 0.759481493895413, iteration: 415045
loss: 1.002865195274353,grad_norm: 0.7714892327458592, iteration: 415046
loss: 0.9898529052734375,grad_norm: 0.6646176386033428, iteration: 415047
loss: 1.104525089263916,grad_norm: 0.9999997414061221, iteration: 415048
loss: 1.0108675956726074,grad_norm: 0.7730047016179385, iteration: 415049
loss: 0.9852786064147949,grad_norm: 0.737261262916173, iteration: 415050
loss: 1.0228066444396973,grad_norm: 0.7781780983034726, iteration: 415051
loss: 1.0160952806472778,grad_norm: 0.8864460937555803, iteration: 415052
loss: 0.9962545037269592,grad_norm: 0.793177776010919, iteration: 415053
loss: 1.0198771953582764,grad_norm: 0.9999994622813166, iteration: 415054
loss: 1.0175576210021973,grad_norm: 0.9999991167364787, iteration: 415055
loss: 1.0106797218322754,grad_norm: 0.8615504399550407, iteration: 415056
loss: 1.009550929069519,grad_norm: 0.8600210428890913, iteration: 415057
loss: 0.9925723075866699,grad_norm: 0.6639795998143628, iteration: 415058
loss: 0.9949366450309753,grad_norm: 0.7195634483487172, iteration: 415059
loss: 0.999849796295166,grad_norm: 0.8119934826249808, iteration: 415060
loss: 1.0231870412826538,grad_norm: 0.9999990217566562, iteration: 415061
loss: 1.0322974920272827,grad_norm: 0.9208844362074513, iteration: 415062
loss: 0.9945417642593384,grad_norm: 0.7774121277876443, iteration: 415063
loss: 1.0692088603973389,grad_norm: 0.9999991314457208, iteration: 415064
loss: 0.9975433349609375,grad_norm: 0.816192125962439, iteration: 415065
loss: 1.0460999011993408,grad_norm: 0.7540054110449512, iteration: 415066
loss: 0.9943562746047974,grad_norm: 0.7550332200523648, iteration: 415067
loss: 1.00077486038208,grad_norm: 0.825999623084828, iteration: 415068
loss: 1.014258861541748,grad_norm: 0.9441933354959132, iteration: 415069
loss: 0.9765143990516663,grad_norm: 0.7976447702616817, iteration: 415070
loss: 1.0146374702453613,grad_norm: 0.8037342511183191, iteration: 415071
loss: 0.9800252318382263,grad_norm: 0.8225440928632942, iteration: 415072
loss: 1.0153532028198242,grad_norm: 0.6578041928412567, iteration: 415073
loss: 0.9568758606910706,grad_norm: 0.7218808551510267, iteration: 415074
loss: 1.0111169815063477,grad_norm: 0.6996593160523522, iteration: 415075
loss: 0.997965395450592,grad_norm: 0.9999991601048024, iteration: 415076
loss: 1.0395427942276,grad_norm: 0.9999991015606091, iteration: 415077
loss: 1.039123773574829,grad_norm: 0.8520330217965424, iteration: 415078
loss: 0.9856570363044739,grad_norm: 0.7892238832702508, iteration: 415079
loss: 0.9968709945678711,grad_norm: 0.9999992560994944, iteration: 415080
loss: 1.0148448944091797,grad_norm: 0.7452038004697188, iteration: 415081
loss: 0.9823494553565979,grad_norm: 0.7196351930221674, iteration: 415082
loss: 0.9889212250709534,grad_norm: 0.9483183014247387, iteration: 415083
loss: 0.9743932485580444,grad_norm: 0.8199157886528105, iteration: 415084
loss: 0.9829283356666565,grad_norm: 0.7816792262709763, iteration: 415085
loss: 1.0295292139053345,grad_norm: 0.9999990089525876, iteration: 415086
loss: 1.0543135404586792,grad_norm: 0.9999991729583814, iteration: 415087
loss: 1.1000852584838867,grad_norm: 0.9999993587970617, iteration: 415088
loss: 0.9758641123771667,grad_norm: 0.8116142670728194, iteration: 415089
loss: 0.9798813462257385,grad_norm: 0.7000040598730085, iteration: 415090
loss: 0.9742119312286377,grad_norm: 0.78000937523607, iteration: 415091
loss: 0.9776838421821594,grad_norm: 0.8230206063291974, iteration: 415092
loss: 1.0254830121994019,grad_norm: 0.8127482276740726, iteration: 415093
loss: 1.000531792640686,grad_norm: 0.7198011320852888, iteration: 415094
loss: 0.9848389029502869,grad_norm: 0.7594562298510503, iteration: 415095
loss: 1.0087895393371582,grad_norm: 0.9999991249926313, iteration: 415096
loss: 1.0006166696548462,grad_norm: 0.7624184906240978, iteration: 415097
loss: 1.0324310064315796,grad_norm: 0.7589633015272964, iteration: 415098
loss: 0.9759922027587891,grad_norm: 0.8301834310623438, iteration: 415099
loss: 0.9721949696540833,grad_norm: 0.7564995731967556, iteration: 415100
loss: 0.9886462092399597,grad_norm: 0.8734859165402424, iteration: 415101
loss: 1.0370827913284302,grad_norm: 0.9999993894433539, iteration: 415102
loss: 0.9980528354644775,grad_norm: 0.7118623345418955, iteration: 415103
loss: 1.0291987657546997,grad_norm: 0.7883869590506808, iteration: 415104
loss: 1.0395156145095825,grad_norm: 0.9391143770657121, iteration: 415105
loss: 1.0032118558883667,grad_norm: 0.7260514023646767, iteration: 415106
loss: 0.9769207835197449,grad_norm: 0.9999998882479791, iteration: 415107
loss: 1.026187777519226,grad_norm: 0.748273158907856, iteration: 415108
loss: 0.9821985363960266,grad_norm: 0.7847020951945469, iteration: 415109
loss: 0.9719861745834351,grad_norm: 0.8862639480244913, iteration: 415110
loss: 0.98881995677948,grad_norm: 0.9290922603324387, iteration: 415111
loss: 0.9784042239189148,grad_norm: 0.8961403800586847, iteration: 415112
loss: 0.965398371219635,grad_norm: 0.7828143590115121, iteration: 415113
loss: 1.0422612428665161,grad_norm: 0.8578067926402235, iteration: 415114
loss: 0.9920374751091003,grad_norm: 0.6634045835728819, iteration: 415115
loss: 0.9747488498687744,grad_norm: 0.8686319914256218, iteration: 415116
loss: 0.9551084637641907,grad_norm: 0.8587662462504823, iteration: 415117
loss: 1.0101011991500854,grad_norm: 0.9999990071932429, iteration: 415118
loss: 1.0312339067459106,grad_norm: 0.9078347819667436, iteration: 415119
loss: 0.9900090098381042,grad_norm: 0.7814899287734474, iteration: 415120
loss: 0.9823082685470581,grad_norm: 0.7913461854128857, iteration: 415121
loss: 0.9631884098052979,grad_norm: 0.7942739026844082, iteration: 415122
loss: 1.009856104850769,grad_norm: 0.8751064631513996, iteration: 415123
loss: 1.0244008302688599,grad_norm: 0.7329950360351233, iteration: 415124
loss: 1.0419553518295288,grad_norm: 0.879923433084314, iteration: 415125
loss: 1.0023680925369263,grad_norm: 0.7720798278746487, iteration: 415126
loss: 0.9667680263519287,grad_norm: 0.8452328169051869, iteration: 415127
loss: 1.0649267435073853,grad_norm: 0.999999712584891, iteration: 415128
loss: 1.0118749141693115,grad_norm: 0.7169988206937152, iteration: 415129
loss: 1.0194013118743896,grad_norm: 0.8526342783843349, iteration: 415130
loss: 0.9746691584587097,grad_norm: 0.7748526729769117, iteration: 415131
loss: 0.9933922290802002,grad_norm: 0.8342990459613512, iteration: 415132
loss: 1.0431143045425415,grad_norm: 0.9268979914827916, iteration: 415133
loss: 1.0317909717559814,grad_norm: 0.946260043151665, iteration: 415134
loss: 1.0793606042861938,grad_norm: 0.9999991683541112, iteration: 415135
loss: 0.9810823202133179,grad_norm: 0.7612852344672656, iteration: 415136
loss: 1.0116249322891235,grad_norm: 0.8542066320283661, iteration: 415137
loss: 1.063197135925293,grad_norm: 0.9999997588808366, iteration: 415138
loss: 0.9923452138900757,grad_norm: 0.8131003326017567, iteration: 415139
loss: 1.0144659280776978,grad_norm: 0.7445371819347456, iteration: 415140
loss: 1.0038809776306152,grad_norm: 0.8097913377182103, iteration: 415141
loss: 0.9975258708000183,grad_norm: 0.7465495108903764, iteration: 415142
loss: 1.0003069639205933,grad_norm: 0.9524252805748431, iteration: 415143
loss: 1.028995156288147,grad_norm: 0.6851203843901773, iteration: 415144
loss: 0.9851899147033691,grad_norm: 0.8121064922679466, iteration: 415145
loss: 1.0080918073654175,grad_norm: 0.908740501594849, iteration: 415146
loss: 0.9520127773284912,grad_norm: 0.8737257878966167, iteration: 415147
loss: 0.9694533348083496,grad_norm: 0.8188950232911081, iteration: 415148
loss: 1.0186361074447632,grad_norm: 0.9999998858256811, iteration: 415149
loss: 0.9691648483276367,grad_norm: 0.787519007067117, iteration: 415150
loss: 1.004119873046875,grad_norm: 0.784519756086895, iteration: 415151
loss: 1.0891327857971191,grad_norm: 0.7823768924335351, iteration: 415152
loss: 0.9912893176078796,grad_norm: 0.8063657637743094, iteration: 415153
loss: 0.9923402667045593,grad_norm: 0.8324405897088581, iteration: 415154
loss: 1.0006210803985596,grad_norm: 0.7890037896913948, iteration: 415155
loss: 1.0084328651428223,grad_norm: 0.6943985979245484, iteration: 415156
loss: 1.04678475856781,grad_norm: 0.9507353576426795, iteration: 415157
loss: 1.0159157514572144,grad_norm: 0.5659965784664249, iteration: 415158
loss: 0.966364860534668,grad_norm: 0.8113985560364696, iteration: 415159
loss: 0.9966058135032654,grad_norm: 0.772394283280781, iteration: 415160
loss: 1.001682162284851,grad_norm: 0.7997556644273068, iteration: 415161
loss: 1.0203393697738647,grad_norm: 0.9999990078758684, iteration: 415162
loss: 1.0162043571472168,grad_norm: 0.6974262541805286, iteration: 415163
loss: 0.9702296853065491,grad_norm: 0.8322827408402629, iteration: 415164
loss: 1.0191153287887573,grad_norm: 0.9999991431556571, iteration: 415165
loss: 0.9718959331512451,grad_norm: 0.7288003663155489, iteration: 415166
loss: 0.9994604587554932,grad_norm: 0.8960999486086156, iteration: 415167
loss: 0.9388386607170105,grad_norm: 0.887198703696561, iteration: 415168
loss: 0.9799228310585022,grad_norm: 0.8060708756828576, iteration: 415169
loss: 0.9978452324867249,grad_norm: 0.6964461855470927, iteration: 415170
loss: 0.9896023273468018,grad_norm: 0.753930250160205, iteration: 415171
loss: 0.9918067455291748,grad_norm: 0.7838799734853436, iteration: 415172
loss: 0.9977946877479553,grad_norm: 0.6543466870257472, iteration: 415173
loss: 1.0097101926803589,grad_norm: 0.8539503756623983, iteration: 415174
loss: 0.9511023759841919,grad_norm: 0.7763574835998157, iteration: 415175
loss: 1.008817195892334,grad_norm: 0.7739387477677777, iteration: 415176
loss: 1.060391902923584,grad_norm: 0.7713835999939967, iteration: 415177
loss: 0.9834302663803101,grad_norm: 0.7410819957450332, iteration: 415178
loss: 0.9870951175689697,grad_norm: 0.8549245065727009, iteration: 415179
loss: 1.0126421451568604,grad_norm: 0.7940555294125708, iteration: 415180
loss: 1.0205076932907104,grad_norm: 0.9999990382419367, iteration: 415181
loss: 1.0112512111663818,grad_norm: 0.7923878427787335, iteration: 415182
loss: 0.9789890050888062,grad_norm: 0.7603516451149646, iteration: 415183
loss: 1.0132187604904175,grad_norm: 0.902951182252792, iteration: 415184
loss: 1.013393521308899,grad_norm: 0.905048755742639, iteration: 415185
loss: 1.0014489889144897,grad_norm: 0.8782207699109669, iteration: 415186
loss: 1.010901927947998,grad_norm: 0.7952348535660413, iteration: 415187
loss: 1.04597008228302,grad_norm: 0.9999993192406782, iteration: 415188
loss: 1.0023980140686035,grad_norm: 0.9398452860342492, iteration: 415189
loss: 1.002058744430542,grad_norm: 0.6119416456002492, iteration: 415190
loss: 1.0966167449951172,grad_norm: 0.8881966122375956, iteration: 415191
loss: 0.988888144493103,grad_norm: 0.7288677995591524, iteration: 415192
loss: 1.0053672790527344,grad_norm: 0.885288173449146, iteration: 415193
loss: 0.9965289831161499,grad_norm: 0.7083952582703865, iteration: 415194
loss: 0.9755493402481079,grad_norm: 0.8362907182910004, iteration: 415195
loss: 0.9931387305259705,grad_norm: 0.7209159349979706, iteration: 415196
loss: 1.004244089126587,grad_norm: 0.6903750081334386, iteration: 415197
loss: 1.02409827709198,grad_norm: 0.7641114975954255, iteration: 415198
loss: 1.0257747173309326,grad_norm: 0.8182441297966018, iteration: 415199
loss: 1.005143165588379,grad_norm: 0.9999993043711042, iteration: 415200
loss: 0.9697367548942566,grad_norm: 0.8314664718049215, iteration: 415201
loss: 1.0183823108673096,grad_norm: 0.790920549542576, iteration: 415202
loss: 0.9838259816169739,grad_norm: 0.8615485320911951, iteration: 415203
loss: 0.9766764640808105,grad_norm: 0.7870356069325208, iteration: 415204
loss: 1.0218265056610107,grad_norm: 0.7145246983182898, iteration: 415205
loss: 0.9932844042778015,grad_norm: 0.6725169163354273, iteration: 415206
loss: 0.9989699125289917,grad_norm: 0.9999991842063262, iteration: 415207
loss: 1.009253978729248,grad_norm: 0.9999992079881853, iteration: 415208
loss: 1.0309655666351318,grad_norm: 0.999999485507645, iteration: 415209
loss: 1.0141346454620361,grad_norm: 0.7202170869958248, iteration: 415210
loss: 1.0320435762405396,grad_norm: 0.850177285889385, iteration: 415211
loss: 1.017967700958252,grad_norm: 0.8302686578448802, iteration: 415212
loss: 0.9857872128486633,grad_norm: 0.7215836157240271, iteration: 415213
loss: 1.0315577983856201,grad_norm: 0.9999990780723049, iteration: 415214
loss: 0.9879406690597534,grad_norm: 0.9190683569792643, iteration: 415215
loss: 1.0402052402496338,grad_norm: 0.8552251838175451, iteration: 415216
loss: 1.0111619234085083,grad_norm: 0.8156622291870228, iteration: 415217
loss: 0.9983798861503601,grad_norm: 0.7026119667248939, iteration: 415218
loss: 1.0032622814178467,grad_norm: 0.7687489750174566, iteration: 415219
loss: 0.9743115901947021,grad_norm: 0.7398289054213032, iteration: 415220
loss: 1.081316590309143,grad_norm: 0.826127454412156, iteration: 415221
loss: 0.9700203537940979,grad_norm: 0.8260632886266029, iteration: 415222
loss: 0.988903284072876,grad_norm: 0.6845281747081705, iteration: 415223
loss: 0.9803224802017212,grad_norm: 0.8616679105887639, iteration: 415224
loss: 1.016853928565979,grad_norm: 0.7475017896618794, iteration: 415225
loss: 0.9834006428718567,grad_norm: 0.8298819289635473, iteration: 415226
loss: 1.0287827253341675,grad_norm: 0.8472479624067177, iteration: 415227
loss: 0.9685114622116089,grad_norm: 0.7635258302390372, iteration: 415228
loss: 1.003844141960144,grad_norm: 0.7105610481491301, iteration: 415229
loss: 0.9683672189712524,grad_norm: 0.6839736417495946, iteration: 415230
loss: 1.037299394607544,grad_norm: 0.912524036836341, iteration: 415231
loss: 1.04177725315094,grad_norm: 0.6969401221297199, iteration: 415232
loss: 0.9947824478149414,grad_norm: 0.862346016695556, iteration: 415233
loss: 0.9857160449028015,grad_norm: 0.836241901385067, iteration: 415234
loss: 1.027451992034912,grad_norm: 0.8052828413470485, iteration: 415235
loss: 0.9944042563438416,grad_norm: 0.6590348105959561, iteration: 415236
loss: 0.9846658110618591,grad_norm: 0.7376895144034796, iteration: 415237
loss: 0.9855630993843079,grad_norm: 0.859104260518458, iteration: 415238
loss: 0.9405598640441895,grad_norm: 0.8225931023556766, iteration: 415239
loss: 1.0147240161895752,grad_norm: 0.8264349213918133, iteration: 415240
loss: 1.001396656036377,grad_norm: 0.7831159866166526, iteration: 415241
loss: 1.0129482746124268,grad_norm: 0.7282742456771517, iteration: 415242
loss: 1.057826042175293,grad_norm: 0.7480852129118905, iteration: 415243
loss: 0.9804947972297668,grad_norm: 0.6741488230599127, iteration: 415244
loss: 0.9819560647010803,grad_norm: 0.9391228037024213, iteration: 415245
loss: 1.0175455808639526,grad_norm: 0.8766836941083088, iteration: 415246
loss: 1.0249552726745605,grad_norm: 0.9999992262175763, iteration: 415247
loss: 0.9913852214813232,grad_norm: 0.9771316851811788, iteration: 415248
loss: 0.9740919470787048,grad_norm: 0.9304469543532837, iteration: 415249
loss: 1.0324214696884155,grad_norm: 0.7838817576764576, iteration: 415250
loss: 1.028352975845337,grad_norm: 0.7868345809359099, iteration: 415251
loss: 1.0322765111923218,grad_norm: 0.9999990512174255, iteration: 415252
loss: 0.9706519246101379,grad_norm: 0.8209430565779909, iteration: 415253
loss: 0.9996391534805298,grad_norm: 0.7949292657732661, iteration: 415254
loss: 1.0200854539871216,grad_norm: 0.8073613790246071, iteration: 415255
loss: 1.0260168313980103,grad_norm: 0.8443854213815596, iteration: 415256
loss: 1.025353193283081,grad_norm: 0.7179599241244865, iteration: 415257
loss: 0.9645411372184753,grad_norm: 0.6974677033651756, iteration: 415258
loss: 0.9711941480636597,grad_norm: 0.8820271483353491, iteration: 415259
loss: 0.9981083273887634,grad_norm: 0.7284199617090954, iteration: 415260
loss: 0.9452337026596069,grad_norm: 0.8444671535909265, iteration: 415261
loss: 1.0015841722488403,grad_norm: 0.9331465440995586, iteration: 415262
loss: 0.9915566444396973,grad_norm: 0.8112710265067639, iteration: 415263
loss: 0.9854364395141602,grad_norm: 0.6436047591004878, iteration: 415264
loss: 1.0863595008850098,grad_norm: 1.0000000761800039, iteration: 415265
loss: 0.9899287223815918,grad_norm: 0.7577388763609935, iteration: 415266
loss: 0.9864442944526672,grad_norm: 0.8779560412277443, iteration: 415267
loss: 0.9806638956069946,grad_norm: 0.8992790310647599, iteration: 415268
loss: 1.0198577642440796,grad_norm: 0.8808372773146862, iteration: 415269
loss: 1.036535620689392,grad_norm: 0.9999993355015419, iteration: 415270
loss: 1.013474941253662,grad_norm: 0.7896684736645335, iteration: 415271
loss: 0.9995440244674683,grad_norm: 0.8442945684559243, iteration: 415272
loss: 0.9761370420455933,grad_norm: 0.6976345764879561, iteration: 415273
loss: 1.0537623167037964,grad_norm: 0.958373046061676, iteration: 415274
loss: 1.0219541788101196,grad_norm: 0.9999998319704394, iteration: 415275
loss: 0.9837579727172852,grad_norm: 0.8340652791580295, iteration: 415276
loss: 1.014591097831726,grad_norm: 0.8879598680155922, iteration: 415277
loss: 0.9518120884895325,grad_norm: 0.6809757424152509, iteration: 415278
loss: 0.9626306891441345,grad_norm: 0.7713396815527629, iteration: 415279
loss: 0.9984917640686035,grad_norm: 0.7524234579323397, iteration: 415280
loss: 0.9920814037322998,grad_norm: 0.8396203370241095, iteration: 415281
loss: 0.9749168753623962,grad_norm: 0.8834980674692633, iteration: 415282
loss: 0.9454565644264221,grad_norm: 0.6646988487093747, iteration: 415283
loss: 1.0087021589279175,grad_norm: 0.6844397903735815, iteration: 415284
loss: 1.009231448173523,grad_norm: 0.7065709099530709, iteration: 415285
loss: 1.0287060737609863,grad_norm: 0.655064914447567, iteration: 415286
loss: 0.9868645071983337,grad_norm: 0.6819960189651936, iteration: 415287
loss: 1.008696436882019,grad_norm: 0.775003952984847, iteration: 415288
loss: 0.9787760376930237,grad_norm: 0.7402522364532665, iteration: 415289
loss: 1.0057591199874878,grad_norm: 0.8447066266468694, iteration: 415290
loss: 1.027623176574707,grad_norm: 0.7492358234165727, iteration: 415291
loss: 0.9998751878738403,grad_norm: 0.8276909203916751, iteration: 415292
loss: 0.9927475452423096,grad_norm: 0.807603622197086, iteration: 415293
loss: 1.0649700164794922,grad_norm: 0.9999999406217797, iteration: 415294
loss: 1.0083492994308472,grad_norm: 0.7502056848905724, iteration: 415295
loss: 1.0051828622817993,grad_norm: 0.8506155362850698, iteration: 415296
loss: 0.998806893825531,grad_norm: 0.7522654990975457, iteration: 415297
loss: 0.9749842882156372,grad_norm: 0.8218491875322331, iteration: 415298
loss: 1.049190878868103,grad_norm: 0.77656944152646, iteration: 415299
loss: 1.0847610235214233,grad_norm: 0.999999085297007, iteration: 415300
loss: 0.9748767614364624,grad_norm: 0.8660949458115298, iteration: 415301
loss: 1.0272783041000366,grad_norm: 0.7216715818113021, iteration: 415302
loss: 0.9573984146118164,grad_norm: 0.7129336019839674, iteration: 415303
loss: 0.9999332427978516,grad_norm: 0.8680071001102355, iteration: 415304
loss: 1.0022748708724976,grad_norm: 0.7585427979936133, iteration: 415305
loss: 1.0937892198562622,grad_norm: 0.9058185115635559, iteration: 415306
loss: 1.0335274934768677,grad_norm: 0.770204904004311, iteration: 415307
loss: 1.0537528991699219,grad_norm: 0.9999990921074666, iteration: 415308
loss: 1.0099866390228271,grad_norm: 0.80165424446153, iteration: 415309
loss: 0.9841596484184265,grad_norm: 0.8154091259153601, iteration: 415310
loss: 1.000372052192688,grad_norm: 0.8508588245873647, iteration: 415311
loss: 1.039969801902771,grad_norm: 0.8151152854647967, iteration: 415312
loss: 1.0042835474014282,grad_norm: 0.9553909477484168, iteration: 415313
loss: 0.9763014912605286,grad_norm: 0.895420970132651, iteration: 415314
loss: 1.0281635522842407,grad_norm: 0.7104033694921423, iteration: 415315
loss: 1.0288455486297607,grad_norm: 0.7762819650606155, iteration: 415316
loss: 1.0095125436782837,grad_norm: 0.8561191627598612, iteration: 415317
loss: 1.0624243021011353,grad_norm: 0.9999999051073328, iteration: 415318
loss: 1.0552942752838135,grad_norm: 0.8261141323409178, iteration: 415319
loss: 0.9985894560813904,grad_norm: 0.722894515366763, iteration: 415320
loss: 0.9977059960365295,grad_norm: 0.730174117746521, iteration: 415321
loss: 1.0364058017730713,grad_norm: 0.7501297087033928, iteration: 415322
loss: 0.9650532007217407,grad_norm: 0.7089541429972076, iteration: 415323
loss: 0.9844828844070435,grad_norm: 0.6532974679316116, iteration: 415324
loss: 0.9918531179428101,grad_norm: 0.7697593252631805, iteration: 415325
loss: 0.9800485968589783,grad_norm: 0.6492282810456691, iteration: 415326
loss: 1.003310203552246,grad_norm: 0.7248198783706701, iteration: 415327
loss: 1.0323619842529297,grad_norm: 0.6721603021045452, iteration: 415328
loss: 1.0000261068344116,grad_norm: 0.8215099811095097, iteration: 415329
loss: 0.9862379431724548,grad_norm: 0.8410389275392466, iteration: 415330
loss: 0.9792347550392151,grad_norm: 0.899704287425748, iteration: 415331
loss: 1.0048699378967285,grad_norm: 0.8148512059812147, iteration: 415332
loss: 0.9715811610221863,grad_norm: 0.7318972263884992, iteration: 415333
loss: 0.9677345156669617,grad_norm: 0.8512796932034173, iteration: 415334
loss: 0.9863376021385193,grad_norm: 0.7588327134634334, iteration: 415335
loss: 1.0378552675247192,grad_norm: 0.8865231537100576, iteration: 415336
loss: 1.0065325498580933,grad_norm: 0.7344936445643718, iteration: 415337
loss: 0.978482186794281,grad_norm: 0.780707472292846, iteration: 415338
loss: 1.0104308128356934,grad_norm: 0.7274478041937076, iteration: 415339
loss: 1.0244812965393066,grad_norm: 0.7795610351736004, iteration: 415340
loss: 1.0058212280273438,grad_norm: 0.7235074336859304, iteration: 415341
loss: 1.0032941102981567,grad_norm: 0.9999995729642651, iteration: 415342
loss: 0.9912691712379456,grad_norm: 0.8788562353145222, iteration: 415343
loss: 0.9622939825057983,grad_norm: 0.7299896008977751, iteration: 415344
loss: 0.9775051474571228,grad_norm: 0.6888277960363539, iteration: 415345
loss: 1.0085207223892212,grad_norm: 0.7730711322800448, iteration: 415346
loss: 1.0223292112350464,grad_norm: 0.6813057685432866, iteration: 415347
loss: 0.9808475375175476,grad_norm: 0.7429874034393433, iteration: 415348
loss: 0.9993088841438293,grad_norm: 0.8217888154668096, iteration: 415349
loss: 0.9752709865570068,grad_norm: 0.8074640386064902, iteration: 415350
loss: 1.0082118511199951,grad_norm: 0.7244158840560015, iteration: 415351
loss: 1.0066860914230347,grad_norm: 0.9999991104287106, iteration: 415352
loss: 1.0132580995559692,grad_norm: 0.6696496154666696, iteration: 415353
loss: 1.0284686088562012,grad_norm: 0.8196790414212344, iteration: 415354
loss: 1.0177468061447144,grad_norm: 0.6912547552750357, iteration: 415355
loss: 0.9769060015678406,grad_norm: 0.7611669864537036, iteration: 415356
loss: 0.9692168235778809,grad_norm: 0.737067081487731, iteration: 415357
loss: 0.9743273854255676,grad_norm: 0.809659557922134, iteration: 415358
loss: 0.9704543352127075,grad_norm: 0.8998447349398865, iteration: 415359
loss: 0.9741271138191223,grad_norm: 0.8169877345441412, iteration: 415360
loss: 1.0175254344940186,grad_norm: 0.7623143440855361, iteration: 415361
loss: 0.9875453114509583,grad_norm: 0.7688286121911567, iteration: 415362
loss: 0.9929065108299255,grad_norm: 0.784050149121799, iteration: 415363
loss: 1.000024437904358,grad_norm: 0.7217507852654486, iteration: 415364
loss: 1.012273907661438,grad_norm: 0.7884857495816852, iteration: 415365
loss: 1.001276969909668,grad_norm: 0.8427481796634387, iteration: 415366
loss: 0.9922341108322144,grad_norm: 0.8010619093168111, iteration: 415367
loss: 1.0044676065444946,grad_norm: 0.6903547133965622, iteration: 415368
loss: 1.1256403923034668,grad_norm: 0.999999347934879, iteration: 415369
loss: 1.0092357397079468,grad_norm: 0.9392068801091077, iteration: 415370
loss: 1.0375699996948242,grad_norm: 0.8556078791545525, iteration: 415371
loss: 0.9643142223358154,grad_norm: 0.8825457205659528, iteration: 415372
loss: 0.9594449996948242,grad_norm: 0.8750483268521287, iteration: 415373
loss: 0.9930927753448486,grad_norm: 0.8710176709896477, iteration: 415374
loss: 0.9912335872650146,grad_norm: 0.6718822211384625, iteration: 415375
loss: 0.9927615523338318,grad_norm: 0.9141202755595375, iteration: 415376
loss: 1.005913257598877,grad_norm: 0.9658137611630209, iteration: 415377
loss: 0.9767573475837708,grad_norm: 0.8135001288257563, iteration: 415378
loss: 0.9725877642631531,grad_norm: 0.7189155921820043, iteration: 415379
loss: 1.0040111541748047,grad_norm: 0.7899019624512444, iteration: 415380
loss: 1.0107073783874512,grad_norm: 0.7027109792510505, iteration: 415381
loss: 0.9826623201370239,grad_norm: 0.7415950996258409, iteration: 415382
loss: 1.031991720199585,grad_norm: 0.9190900907398247, iteration: 415383
loss: 0.9770516157150269,grad_norm: 0.8842734888655546, iteration: 415384
loss: 0.9681853652000427,grad_norm: 0.9999990370185593, iteration: 415385
loss: 1.000205636024475,grad_norm: 0.8025070910921794, iteration: 415386
loss: 0.9842389225959778,grad_norm: 0.7371814887153443, iteration: 415387
loss: 1.0371205806732178,grad_norm: 1.000000026957326, iteration: 415388
loss: 0.9833418130874634,grad_norm: 0.8122497904708864, iteration: 415389
loss: 0.9847901463508606,grad_norm: 0.7104230652974529, iteration: 415390
loss: 1.009541392326355,grad_norm: 0.9012471113445437, iteration: 415391
loss: 1.006860613822937,grad_norm: 0.8385543927116138, iteration: 415392
loss: 0.9905116558074951,grad_norm: 0.9999998738897388, iteration: 415393
loss: 0.9954476356506348,grad_norm: 0.83392713841672, iteration: 415394
loss: 0.9837667942047119,grad_norm: 0.7840976937369389, iteration: 415395
loss: 1.0246187448501587,grad_norm: 0.8455131128704316, iteration: 415396
loss: 1.0243399143218994,grad_norm: 0.8657118208256198, iteration: 415397
loss: 0.9675153493881226,grad_norm: 0.6936334787078823, iteration: 415398
loss: 0.9918870329856873,grad_norm: 0.9275113667178426, iteration: 415399
loss: 1.0365060567855835,grad_norm: 0.8085540356426534, iteration: 415400
loss: 0.9860450625419617,grad_norm: 0.7574583111707281, iteration: 415401
loss: 1.0021157264709473,grad_norm: 0.8524722441386623, iteration: 415402
loss: 0.9746214747428894,grad_norm: 0.8639905418802044, iteration: 415403
loss: 1.0045932531356812,grad_norm: 0.7341417428439602, iteration: 415404
loss: 1.0128238201141357,grad_norm: 0.7995192764948812, iteration: 415405
loss: 1.0123218297958374,grad_norm: 0.6926772591862572, iteration: 415406
loss: 1.024859070777893,grad_norm: 0.9132617002977397, iteration: 415407
loss: 0.9852061867713928,grad_norm: 0.8704150983605426, iteration: 415408
loss: 1.0173863172531128,grad_norm: 0.6907362212231111, iteration: 415409
loss: 0.9853105545043945,grad_norm: 0.6695778583635481, iteration: 415410
loss: 1.0367940664291382,grad_norm: 0.8023769482774046, iteration: 415411
loss: 1.0234010219573975,grad_norm: 0.8124345868824302, iteration: 415412
loss: 1.0104384422302246,grad_norm: 0.6421785464333997, iteration: 415413
loss: 0.9843315482139587,grad_norm: 0.7612757650650648, iteration: 415414
loss: 1.0412460565567017,grad_norm: 0.9999991680694078, iteration: 415415
loss: 0.9758585095405579,grad_norm: 0.8234353395944392, iteration: 415416
loss: 0.9637153148651123,grad_norm: 0.7271246584265377, iteration: 415417
loss: 0.9725905060768127,grad_norm: 0.7771211120694038, iteration: 415418
loss: 1.0260310173034668,grad_norm: 0.7917762535644728, iteration: 415419
loss: 1.0329935550689697,grad_norm: 0.9711541871976445, iteration: 415420
loss: 0.9843860268592834,grad_norm: 0.8367903208214671, iteration: 415421
loss: 1.010947585105896,grad_norm: 0.8229204321470999, iteration: 415422
loss: 1.057296633720398,grad_norm: 0.8457060801600657, iteration: 415423
loss: 0.9716780185699463,grad_norm: 0.8153829011535755, iteration: 415424
loss: 0.9904659986495972,grad_norm: 0.919372071620891, iteration: 415425
loss: 0.9993069767951965,grad_norm: 0.8187676332632845, iteration: 415426
loss: 1.071363925933838,grad_norm: 0.999999100324651, iteration: 415427
loss: 1.013073444366455,grad_norm: 0.6614493173483021, iteration: 415428
loss: 1.0186514854431152,grad_norm: 0.7675001866815568, iteration: 415429
loss: 0.9787850379943848,grad_norm: 0.7244790450523864, iteration: 415430
loss: 0.9707031846046448,grad_norm: 0.920481917170953, iteration: 415431
loss: 0.9428526759147644,grad_norm: 0.8363143568681323, iteration: 415432
loss: 0.9973402619361877,grad_norm: 0.8928812048315541, iteration: 415433
loss: 1.0162436962127686,grad_norm: 0.7607027685240159, iteration: 415434
loss: 0.9830275774002075,grad_norm: 0.9361336852732177, iteration: 415435
loss: 0.9966171383857727,grad_norm: 0.6200710044665004, iteration: 415436
loss: 1.0667942762374878,grad_norm: 0.9999999348223513, iteration: 415437
loss: 1.0015788078308105,grad_norm: 0.8006167437447083, iteration: 415438
loss: 1.0668232440948486,grad_norm: 0.8320567747012788, iteration: 415439
loss: 1.0158716440200806,grad_norm: 0.8100906041000864, iteration: 415440
loss: 1.0599586963653564,grad_norm: 0.7092391721137462, iteration: 415441
loss: 0.9970731735229492,grad_norm: 0.6903985274513991, iteration: 415442
loss: 1.017673134803772,grad_norm: 0.66765986151182, iteration: 415443
loss: 0.9632313847541809,grad_norm: 0.8026343465761925, iteration: 415444
loss: 1.1094179153442383,grad_norm: 0.9999994582028664, iteration: 415445
loss: 0.9924889206886292,grad_norm: 0.804374556779842, iteration: 415446
loss: 1.003425121307373,grad_norm: 0.8583294982324656, iteration: 415447
loss: 0.9976064562797546,grad_norm: 0.755125175391406, iteration: 415448
loss: 0.9885042905807495,grad_norm: 0.7569462680451459, iteration: 415449
loss: 1.0051549673080444,grad_norm: 0.9595213563993454, iteration: 415450
loss: 0.9916297197341919,grad_norm: 0.8917304489732131, iteration: 415451
loss: 0.9866161346435547,grad_norm: 0.7900655356807219, iteration: 415452
loss: 1.0147348642349243,grad_norm: 0.8907590141074248, iteration: 415453
loss: 0.9799855947494507,grad_norm: 0.9043780799180408, iteration: 415454
loss: 1.0095856189727783,grad_norm: 0.7210963903926496, iteration: 415455
loss: 1.0173423290252686,grad_norm: 0.7735032559204158, iteration: 415456
loss: 0.997671365737915,grad_norm: 0.8363698020115492, iteration: 415457
loss: 0.9768768548965454,grad_norm: 0.6579493125876889, iteration: 415458
loss: 1.0000015497207642,grad_norm: 0.8303429617887084, iteration: 415459
loss: 1.0159811973571777,grad_norm: 0.8824760007310155, iteration: 415460
loss: 1.0099337100982666,grad_norm: 0.779276521391697, iteration: 415461
loss: 1.0113892555236816,grad_norm: 0.8657757139874515, iteration: 415462
loss: 0.9975915551185608,grad_norm: 0.7424049606727328, iteration: 415463
loss: 1.1277782917022705,grad_norm: 0.999999105695447, iteration: 415464
loss: 0.9901189208030701,grad_norm: 0.8295105269111822, iteration: 415465
loss: 0.9803563356399536,grad_norm: 0.8981152336066925, iteration: 415466
loss: 1.0832303762435913,grad_norm: 1.0000000502440993, iteration: 415467
loss: 0.9649587869644165,grad_norm: 0.8171697747588416, iteration: 415468
loss: 0.9834192395210266,grad_norm: 0.9999989973779795, iteration: 415469
loss: 0.9614351987838745,grad_norm: 0.6391623573816905, iteration: 415470
loss: 1.0088168382644653,grad_norm: 0.9560045488582488, iteration: 415471
loss: 1.0239278078079224,grad_norm: 0.9695738280457552, iteration: 415472
loss: 1.0494396686553955,grad_norm: 1.000000086323198, iteration: 415473
loss: 0.966181218624115,grad_norm: 0.9999990537687788, iteration: 415474
loss: 1.0151067972183228,grad_norm: 0.868600453230394, iteration: 415475
loss: 0.975589394569397,grad_norm: 0.9519948289703944, iteration: 415476
loss: 0.9418363571166992,grad_norm: 0.7113566440940596, iteration: 415477
loss: 0.973865270614624,grad_norm: 0.80249563269396, iteration: 415478
loss: 1.0291153192520142,grad_norm: 0.8242421033982298, iteration: 415479
loss: 1.0381532907485962,grad_norm: 1.000000058594614, iteration: 415480
loss: 1.0011918544769287,grad_norm: 0.982725960719009, iteration: 415481
loss: 0.9698581695556641,grad_norm: 0.9568699759796676, iteration: 415482
loss: 1.0058503150939941,grad_norm: 0.7853498399434894, iteration: 415483
loss: 1.0049437284469604,grad_norm: 0.7419386675333968, iteration: 415484
loss: 1.011154294013977,grad_norm: 0.7537092602475732, iteration: 415485
loss: 1.0209461450576782,grad_norm: 0.838703385078379, iteration: 415486
loss: 1.014639973640442,grad_norm: 0.6914406943061936, iteration: 415487
loss: 1.002516508102417,grad_norm: 0.7217621614819326, iteration: 415488
loss: 1.0755207538604736,grad_norm: 0.9999995377621099, iteration: 415489
loss: 1.0075619220733643,grad_norm: 0.6916960772131587, iteration: 415490
loss: 1.0596113204956055,grad_norm: 0.8575119466200436, iteration: 415491
loss: 0.9921727180480957,grad_norm: 0.6771484146177832, iteration: 415492
loss: 0.9839431047439575,grad_norm: 0.9788527197767182, iteration: 415493
loss: 1.0301944017410278,grad_norm: 0.8345586686691767, iteration: 415494
loss: 0.9899377822875977,grad_norm: 0.9784093670260123, iteration: 415495
loss: 0.9879253506660461,grad_norm: 0.7451110960354469, iteration: 415496
loss: 0.9948537349700928,grad_norm: 0.82226399372601, iteration: 415497
loss: 1.0199538469314575,grad_norm: 0.793611371234772, iteration: 415498
loss: 0.9964292645454407,grad_norm: 0.7795838191521302, iteration: 415499
loss: 1.0426347255706787,grad_norm: 0.5895796846097047, iteration: 415500
loss: 0.9907892346382141,grad_norm: 0.7147625322084019, iteration: 415501
loss: 0.9769133925437927,grad_norm: 0.9999997845137479, iteration: 415502
loss: 0.95008784532547,grad_norm: 0.688557592881481, iteration: 415503
loss: 1.0070234537124634,grad_norm: 0.802933371605827, iteration: 415504
loss: 1.0828819274902344,grad_norm: 0.8433929001234275, iteration: 415505
loss: 0.9570648670196533,grad_norm: 0.7541202025310811, iteration: 415506
loss: 0.9944183826446533,grad_norm: 0.8054063431330488, iteration: 415507
loss: 0.9938309192657471,grad_norm: 0.8199458333321359, iteration: 415508
loss: 0.9981398582458496,grad_norm: 0.6142137263668732, iteration: 415509
loss: 1.0329492092132568,grad_norm: 0.7766607780407074, iteration: 415510
loss: 1.0155903100967407,grad_norm: 0.7823235382369917, iteration: 415511
loss: 0.9958837628364563,grad_norm: 0.7999326668015488, iteration: 415512
loss: 1.0016794204711914,grad_norm: 0.7296654322607711, iteration: 415513
loss: 0.9824507832527161,grad_norm: 0.9321811569676328, iteration: 415514
loss: 1.066382646560669,grad_norm: 0.9003313004987008, iteration: 415515
loss: 0.9959345459938049,grad_norm: 0.9604649810869319, iteration: 415516
loss: 0.9959726333618164,grad_norm: 0.9153705350192655, iteration: 415517
loss: 0.9695382714271545,grad_norm: 0.7088258147981262, iteration: 415518
loss: 0.9951924681663513,grad_norm: 0.8020193389398302, iteration: 415519
loss: 1.0041940212249756,grad_norm: 0.812827997431929, iteration: 415520
loss: 1.0097676515579224,grad_norm: 0.6249351267490151, iteration: 415521
loss: 1.019061803817749,grad_norm: 0.7734872071755714, iteration: 415522
loss: 1.0073658227920532,grad_norm: 0.8890131859912497, iteration: 415523
loss: 1.0220155715942383,grad_norm: 0.743807125392245, iteration: 415524
loss: 0.9660568237304688,grad_norm: 0.8786951272878033, iteration: 415525
loss: 0.9765651226043701,grad_norm: 0.8999708046107256, iteration: 415526
loss: 1.014689564704895,grad_norm: 0.8227999366050742, iteration: 415527
loss: 1.0025537014007568,grad_norm: 0.9131509802177585, iteration: 415528
loss: 1.0078670978546143,grad_norm: 0.6601809449929713, iteration: 415529
loss: 1.0408034324645996,grad_norm: 0.9068700904544302, iteration: 415530
loss: 1.025145173072815,grad_norm: 0.879623905685614, iteration: 415531
loss: 0.9842737317085266,grad_norm: 0.8454130870620393, iteration: 415532
loss: 0.961862325668335,grad_norm: 0.9999995562740955, iteration: 415533
loss: 0.9773133993148804,grad_norm: 0.8335221071378455, iteration: 415534
loss: 0.9928383827209473,grad_norm: 0.8334312967894221, iteration: 415535
loss: 1.0087231397628784,grad_norm: 0.9999991593283563, iteration: 415536
loss: 1.0042539834976196,grad_norm: 0.8211071711517332, iteration: 415537
loss: 0.9892497062683105,grad_norm: 0.765415302544473, iteration: 415538
loss: 1.0163853168487549,grad_norm: 0.7794385200889092, iteration: 415539
loss: 1.035362720489502,grad_norm: 0.9999990728510278, iteration: 415540
loss: 0.9974328875541687,grad_norm: 0.8606227207147362, iteration: 415541
loss: 0.9778393507003784,grad_norm: 0.7696825040032338, iteration: 415542
loss: 0.9743809103965759,grad_norm: 0.629017963557934, iteration: 415543
loss: 1.0202137231826782,grad_norm: 0.8062023934308961, iteration: 415544
loss: 1.0072120428085327,grad_norm: 0.8097364655276498, iteration: 415545
loss: 1.0196270942687988,grad_norm: 0.7535950530212734, iteration: 415546
loss: 1.0252478122711182,grad_norm: 0.8204663552333831, iteration: 415547
loss: 1.0179919004440308,grad_norm: 0.8018232581471284, iteration: 415548
loss: 0.982039213180542,grad_norm: 0.9999999525992654, iteration: 415549
loss: 1.0223859548568726,grad_norm: 0.7483255832022542, iteration: 415550
loss: 1.1019015312194824,grad_norm: 0.9087372168024971, iteration: 415551
loss: 1.0143556594848633,grad_norm: 0.6545743968528601, iteration: 415552
loss: 1.0102413892745972,grad_norm: 0.6660532919887117, iteration: 415553
loss: 1.0201630592346191,grad_norm: 0.8201723398581804, iteration: 415554
loss: 0.9975318312644958,grad_norm: 0.9999993510888743, iteration: 415555
loss: 1.0625883340835571,grad_norm: 0.9096601793464065, iteration: 415556
loss: 0.9892823696136475,grad_norm: 0.6190046444061779, iteration: 415557
loss: 1.0215651988983154,grad_norm: 0.6632865663321346, iteration: 415558
loss: 1.0013147592544556,grad_norm: 0.7585911024664153, iteration: 415559
loss: 0.9902099967002869,grad_norm: 0.7666445718748507, iteration: 415560
loss: 0.9983164072036743,grad_norm: 0.8299801683455089, iteration: 415561
loss: 0.9724599719047546,grad_norm: 0.7571240602460563, iteration: 415562
loss: 1.0103166103363037,grad_norm: 0.9999997918454334, iteration: 415563
loss: 0.9845218658447266,grad_norm: 0.7283752522864766, iteration: 415564
loss: 1.0305719375610352,grad_norm: 0.8821731852928435, iteration: 415565
loss: 0.974024772644043,grad_norm: 0.891166979754714, iteration: 415566
loss: 1.0042535066604614,grad_norm: 0.9999993702846582, iteration: 415567
loss: 1.0094170570373535,grad_norm: 0.9041169577283303, iteration: 415568
loss: 1.016789197921753,grad_norm: 0.9999990866805859, iteration: 415569
loss: 1.1026594638824463,grad_norm: 0.9999990660408729, iteration: 415570
loss: 1.0068453550338745,grad_norm: 0.7700093198232971, iteration: 415571
loss: 0.9978439211845398,grad_norm: 0.8514489067301017, iteration: 415572
loss: 0.9996389746665955,grad_norm: 0.7286783848874003, iteration: 415573
loss: 1.0253958702087402,grad_norm: 0.9999990790803587, iteration: 415574
loss: 1.0096862316131592,grad_norm: 0.9097344651011132, iteration: 415575
loss: 1.0003328323364258,grad_norm: 0.7233279653651851, iteration: 415576
loss: 1.0240790843963623,grad_norm: 0.965511075181378, iteration: 415577
loss: 1.0155346393585205,grad_norm: 0.9176599878554305, iteration: 415578
loss: 1.0008503198623657,grad_norm: 0.7877928739684209, iteration: 415579
loss: 0.9821850061416626,grad_norm: 0.9999993873919277, iteration: 415580
loss: 0.9995861649513245,grad_norm: 0.7651663667116412, iteration: 415581
loss: 1.0639725923538208,grad_norm: 0.9999993542297474, iteration: 415582
loss: 0.9925832152366638,grad_norm: 0.7913234826671214, iteration: 415583
loss: 1.028253197669983,grad_norm: 0.7677831413489471, iteration: 415584
loss: 1.0045363903045654,grad_norm: 0.890437336886815, iteration: 415585
loss: 1.005901575088501,grad_norm: 0.6603730218334324, iteration: 415586
loss: 1.0133748054504395,grad_norm: 0.730231430477024, iteration: 415587
loss: 1.0020618438720703,grad_norm: 0.7635800050410595, iteration: 415588
loss: 1.0244818925857544,grad_norm: 0.7602218416917146, iteration: 415589
loss: 1.0367448329925537,grad_norm: 0.6963246305207393, iteration: 415590
loss: 0.983221709728241,grad_norm: 0.7867433629069608, iteration: 415591
loss: 1.1068909168243408,grad_norm: 0.9999995078867124, iteration: 415592
loss: 0.9529848694801331,grad_norm: 0.7579273821395919, iteration: 415593
loss: 1.0515992641448975,grad_norm: 0.7672047677601315, iteration: 415594
loss: 1.0290380716323853,grad_norm: 0.8670678489740594, iteration: 415595
loss: 1.0127061605453491,grad_norm: 0.6200426818899522, iteration: 415596
loss: 0.9924767017364502,grad_norm: 0.8049236753775475, iteration: 415597
loss: 1.0229175090789795,grad_norm: 0.7893035830609069, iteration: 415598
loss: 0.9905393719673157,grad_norm: 0.7384612292701748, iteration: 415599
loss: 0.9956785440444946,grad_norm: 0.7615976723694237, iteration: 415600
loss: 0.9827081561088562,grad_norm: 0.8882583096716605, iteration: 415601
loss: 0.9564969539642334,grad_norm: 0.8297266094015217, iteration: 415602
loss: 0.9842370748519897,grad_norm: 0.8175213546713705, iteration: 415603
loss: 0.9830802083015442,grad_norm: 0.6256154597357182, iteration: 415604
loss: 0.9831202626228333,grad_norm: 0.7829126338234147, iteration: 415605
loss: 1.097381353378296,grad_norm: 0.9999993421891921, iteration: 415606
loss: 1.0280053615570068,grad_norm: 0.9999993388653071, iteration: 415607
loss: 1.0179427862167358,grad_norm: 0.7692055040052397, iteration: 415608
loss: 0.9410443902015686,grad_norm: 0.9078035082549256, iteration: 415609
loss: 0.9994616508483887,grad_norm: 0.9148299595892763, iteration: 415610
loss: 1.0412486791610718,grad_norm: 0.9785411240426343, iteration: 415611
loss: 0.9817877411842346,grad_norm: 0.7711159877678309, iteration: 415612
loss: 0.9938300848007202,grad_norm: 0.7398272287497376, iteration: 415613
loss: 1.0180779695510864,grad_norm: 0.9999999466129139, iteration: 415614
loss: 1.0264811515808105,grad_norm: 0.9999995118727836, iteration: 415615
loss: 1.0695128440856934,grad_norm: 0.9999998950415601, iteration: 415616
loss: 1.1315710544586182,grad_norm: 0.999999054833301, iteration: 415617
loss: 1.0317542552947998,grad_norm: 0.8844727463841437, iteration: 415618
loss: 1.0237741470336914,grad_norm: 0.8368372106868137, iteration: 415619
loss: 0.9619820713996887,grad_norm: 0.8781131151189029, iteration: 415620
loss: 1.0231119394302368,grad_norm: 0.769276153165504, iteration: 415621
loss: 1.0546951293945312,grad_norm: 0.6864136244772955, iteration: 415622
loss: 1.130960464477539,grad_norm: 0.9999992666930827, iteration: 415623
loss: 1.0112112760543823,grad_norm: 0.7568885837887172, iteration: 415624
loss: 1.014868140220642,grad_norm: 0.9999992646927317, iteration: 415625
loss: 1.0097618103027344,grad_norm: 0.9999992578911937, iteration: 415626
loss: 1.0568395853042603,grad_norm: 0.9999992255811944, iteration: 415627
loss: 0.9995410442352295,grad_norm: 0.8617383838471646, iteration: 415628
loss: 1.0172309875488281,grad_norm: 0.9228552095635993, iteration: 415629
loss: 1.0123738050460815,grad_norm: 0.7919298915612997, iteration: 415630
loss: 1.0369007587432861,grad_norm: 0.7534547072247751, iteration: 415631
loss: 1.1020970344543457,grad_norm: 0.7792424632864443, iteration: 415632
loss: 1.018953800201416,grad_norm: 0.816223097278591, iteration: 415633
loss: 0.993011474609375,grad_norm: 0.749801990372593, iteration: 415634
loss: 1.0111627578735352,grad_norm: 0.8980482409853069, iteration: 415635
loss: 0.9762012958526611,grad_norm: 0.8410572902628305, iteration: 415636
loss: 0.9836379885673523,grad_norm: 0.9006983349696056, iteration: 415637
loss: 0.9841110110282898,grad_norm: 0.897659916089358, iteration: 415638
loss: 1.0122437477111816,grad_norm: 0.8399145457818109, iteration: 415639
loss: 0.9906358122825623,grad_norm: 0.7309725915523582, iteration: 415640
loss: 1.027849555015564,grad_norm: 0.9999998772517981, iteration: 415641
loss: 0.9960458278656006,grad_norm: 0.8513286373302276, iteration: 415642
loss: 1.0045323371887207,grad_norm: 0.8800217459741156, iteration: 415643
loss: 1.0040838718414307,grad_norm: 0.9999992145357098, iteration: 415644
loss: 1.0180466175079346,grad_norm: 0.7860979127069546, iteration: 415645
loss: 1.0848171710968018,grad_norm: 0.9999990121796996, iteration: 415646
loss: 0.9929523468017578,grad_norm: 0.6826652451422244, iteration: 415647
loss: 1.039067268371582,grad_norm: 0.7237188066654586, iteration: 415648
loss: 0.9788328409194946,grad_norm: 0.9999996533415544, iteration: 415649
loss: 1.0440709590911865,grad_norm: 0.7021060081311924, iteration: 415650
loss: 0.9914631247520447,grad_norm: 0.6793560592676151, iteration: 415651
loss: 0.9857834577560425,grad_norm: 0.73622482115008, iteration: 415652
loss: 1.025673747062683,grad_norm: 0.8779875622728984, iteration: 415653
loss: 1.0066134929656982,grad_norm: 0.8347529330930195, iteration: 415654
loss: 1.0677157640457153,grad_norm: 0.999999509814237, iteration: 415655
loss: 0.9652490019798279,grad_norm: 0.7893410593174962, iteration: 415656
loss: 0.9980101585388184,grad_norm: 0.8127907772481171, iteration: 415657
loss: 0.9628512263298035,grad_norm: 0.7883114113612208, iteration: 415658
loss: 1.0078765153884888,grad_norm: 0.9999995370996783, iteration: 415659
loss: 1.1475197076797485,grad_norm: 0.999999200585669, iteration: 415660
loss: 1.013242244720459,grad_norm: 0.7648121356654636, iteration: 415661
loss: 1.0020579099655151,grad_norm: 0.9999994075000176, iteration: 415662
loss: 1.0277293920516968,grad_norm: 0.7616995274481485, iteration: 415663
loss: 0.994810938835144,grad_norm: 0.7387884640120528, iteration: 415664
loss: 1.0799998044967651,grad_norm: 0.9999990875017004, iteration: 415665
loss: 1.0871392488479614,grad_norm: 0.9999991912218839, iteration: 415666
loss: 1.0272374153137207,grad_norm: 0.8215993142198122, iteration: 415667
loss: 1.0756525993347168,grad_norm: 0.9999999490880801, iteration: 415668
loss: 0.9853670597076416,grad_norm: 0.8041065059942587, iteration: 415669
loss: 1.0414648056030273,grad_norm: 0.9925837651020297, iteration: 415670
loss: 1.1163489818572998,grad_norm: 0.999999449537391, iteration: 415671
loss: 1.0643998384475708,grad_norm: 0.999999331323831, iteration: 415672
loss: 1.0097134113311768,grad_norm: 0.8362310994520937, iteration: 415673
loss: 0.9854409098625183,grad_norm: 0.723584162875448, iteration: 415674
loss: 0.9983766674995422,grad_norm: 0.7594242475698922, iteration: 415675
loss: 0.979964017868042,grad_norm: 0.8929794312455961, iteration: 415676
loss: 0.9772777557373047,grad_norm: 0.897425168188373, iteration: 415677
loss: 1.0457350015640259,grad_norm: 0.9999997211275072, iteration: 415678
loss: 1.0203416347503662,grad_norm: 0.7123704963585464, iteration: 415679
loss: 0.9885565638542175,grad_norm: 0.8301996587970076, iteration: 415680
loss: 1.029605746269226,grad_norm: 0.8131434260293235, iteration: 415681
loss: 1.0195237398147583,grad_norm: 0.7852059719158043, iteration: 415682
loss: 1.0858895778656006,grad_norm: 0.9079198259746496, iteration: 415683
loss: 1.075629472732544,grad_norm: 0.9999993295506912, iteration: 415684
loss: 1.0201003551483154,grad_norm: 0.9734503452397342, iteration: 415685
loss: 1.048216462135315,grad_norm: 0.9999994605952014, iteration: 415686
loss: 1.020241379737854,grad_norm: 0.8797816565932859, iteration: 415687
loss: 1.0604461431503296,grad_norm: 0.6365993459301953, iteration: 415688
loss: 1.060868501663208,grad_norm: 0.9999997745331721, iteration: 415689
loss: 1.0639913082122803,grad_norm: 0.7571561754324005, iteration: 415690
loss: 1.0006753206253052,grad_norm: 0.7611787571959857, iteration: 415691
loss: 1.065325379371643,grad_norm: 0.9999993823429655, iteration: 415692
loss: 1.0423643589019775,grad_norm: 0.8287912728622476, iteration: 415693
loss: 1.050352692604065,grad_norm: 0.999999237510896, iteration: 415694
loss: 1.0456647872924805,grad_norm: 0.9999999323292478, iteration: 415695
loss: 1.05501389503479,grad_norm: 0.9999998210848835, iteration: 415696
loss: 1.0327534675598145,grad_norm: 0.872611188001722, iteration: 415697
loss: 1.0195903778076172,grad_norm: 0.9999998635743659, iteration: 415698
loss: 1.0754636526107788,grad_norm: 0.9999998408062466, iteration: 415699
loss: 1.0086700916290283,grad_norm: 0.999999965291299, iteration: 415700
loss: 1.0765763521194458,grad_norm: 0.9999990673923863, iteration: 415701
loss: 1.0436792373657227,grad_norm: 0.99999920928262, iteration: 415702
loss: 0.9955898523330688,grad_norm: 0.8977539533220582, iteration: 415703
loss: 0.9757052063941956,grad_norm: 0.9999991781459167, iteration: 415704
loss: 0.996045708656311,grad_norm: 0.9999996467817701, iteration: 415705
loss: 1.0635967254638672,grad_norm: 0.999999236206881, iteration: 415706
loss: 1.1479058265686035,grad_norm: 0.9999996444067133, iteration: 415707
loss: 1.0610685348510742,grad_norm: 0.9999992817739006, iteration: 415708
loss: 1.0231318473815918,grad_norm: 0.9615246378359955, iteration: 415709
loss: 1.0896943807601929,grad_norm: 0.973582907786716, iteration: 415710
loss: 1.1055550575256348,grad_norm: 0.9999998071386302, iteration: 415711
loss: 1.053968906402588,grad_norm: 0.8615086761619659, iteration: 415712
loss: 1.1477969884872437,grad_norm: 0.9999997372604369, iteration: 415713
loss: 1.0412993431091309,grad_norm: 0.8030839125808125, iteration: 415714
loss: 1.319209098815918,grad_norm: 0.9999995128968594, iteration: 415715
loss: 1.0294945240020752,grad_norm: 0.9999998942039644, iteration: 415716
loss: 1.0374786853790283,grad_norm: 0.9999999282651336, iteration: 415717
loss: 1.0148603916168213,grad_norm: 0.9999989165317497, iteration: 415718
loss: 1.3140383958816528,grad_norm: 0.9999991765870612, iteration: 415719
loss: 0.9988318085670471,grad_norm: 0.7257794056112515, iteration: 415720
loss: 1.055265188217163,grad_norm: 0.9999993828452265, iteration: 415721
loss: 1.1137109994888306,grad_norm: 0.9999997846105755, iteration: 415722
loss: 1.0692930221557617,grad_norm: 0.9999999613788051, iteration: 415723
loss: 1.0538212060928345,grad_norm: 0.9999991632451521, iteration: 415724
loss: 1.081363558769226,grad_norm: 0.9999995796633145, iteration: 415725
loss: 1.2800240516662598,grad_norm: 0.9999998016210002, iteration: 415726
loss: 1.1004678010940552,grad_norm: 0.9999996593940387, iteration: 415727
loss: 1.0845768451690674,grad_norm: 0.9999992066066387, iteration: 415728
loss: 1.1618711948394775,grad_norm: 0.999999983629689, iteration: 415729
loss: 1.0894153118133545,grad_norm: 0.9999991761518598, iteration: 415730
loss: 1.1374521255493164,grad_norm: 0.9999998487220619, iteration: 415731
loss: 1.1391912698745728,grad_norm: 0.9999998776410988, iteration: 415732
loss: 1.150753378868103,grad_norm: 0.9999999874928813, iteration: 415733
loss: 1.093837022781372,grad_norm: 0.9999995557368382, iteration: 415734
loss: 1.0661766529083252,grad_norm: 0.9999993256801591, iteration: 415735
loss: 1.0573835372924805,grad_norm: 0.8750905799215402, iteration: 415736
loss: 1.1073781251907349,grad_norm: 0.9999991737433112, iteration: 415737
loss: 1.1814484596252441,grad_norm: 0.9999991291268707, iteration: 415738
loss: 1.0944523811340332,grad_norm: 0.9999995262516217, iteration: 415739
loss: 0.9913620352745056,grad_norm: 0.8151279768891916, iteration: 415740
loss: 1.117678165435791,grad_norm: 0.9340829509256422, iteration: 415741
loss: 1.1234177350997925,grad_norm: 1.000000044505658, iteration: 415742
loss: 1.0320396423339844,grad_norm: 0.999999759686104, iteration: 415743
loss: 1.182445764541626,grad_norm: 0.9999999644516806, iteration: 415744
loss: 1.0386196374893188,grad_norm: 0.9999990744361141, iteration: 415745
loss: 1.1906265020370483,grad_norm: 0.9999993810949498, iteration: 415746
loss: 1.1639755964279175,grad_norm: 0.9999992002364124, iteration: 415747
loss: 1.0544737577438354,grad_norm: 0.9999996255341707, iteration: 415748
loss: 1.008335828781128,grad_norm: 0.8459241591120408, iteration: 415749
loss: 1.1329282522201538,grad_norm: 0.9999996536762281, iteration: 415750
loss: 1.0765902996063232,grad_norm: 0.9154379563094337, iteration: 415751
loss: 1.2041420936584473,grad_norm: 0.999999441550093, iteration: 415752
loss: 1.058589220046997,grad_norm: 0.9999997001598956, iteration: 415753
loss: 1.041298508644104,grad_norm: 0.8988114697054608, iteration: 415754
loss: 1.109352469444275,grad_norm: 0.9131834509320814, iteration: 415755
loss: 0.9982503056526184,grad_norm: 0.8565787299940241, iteration: 415756
loss: 1.049315094947815,grad_norm: 0.9999991051659247, iteration: 415757
loss: 0.999671220779419,grad_norm: 0.729430019636418, iteration: 415758
loss: 1.0713943243026733,grad_norm: 0.9999993098046962, iteration: 415759
loss: 1.1021796464920044,grad_norm: 0.9999997494330758, iteration: 415760
loss: 1.1090337038040161,grad_norm: 0.9999994723797273, iteration: 415761
loss: 1.0460805892944336,grad_norm: 0.664236165151353, iteration: 415762
loss: 1.0154128074645996,grad_norm: 0.8744665477396493, iteration: 415763
loss: 0.9983446598052979,grad_norm: 0.733146037886723, iteration: 415764
loss: 1.0353137254714966,grad_norm: 0.9999991895462693, iteration: 415765
loss: 1.0140230655670166,grad_norm: 0.9489122000222672, iteration: 415766
loss: 1.078137993812561,grad_norm: 0.9032474571390298, iteration: 415767
loss: 1.0295699834823608,grad_norm: 0.9275639183380737, iteration: 415768
loss: 1.067962884902954,grad_norm: 0.9999991392350543, iteration: 415769
loss: 0.9693996906280518,grad_norm: 0.7627848527207611, iteration: 415770
loss: 0.994667112827301,grad_norm: 0.9999997440561794, iteration: 415771
loss: 1.036751389503479,grad_norm: 0.9386852802673321, iteration: 415772
loss: 1.0362640619277954,grad_norm: 0.9999997753160398, iteration: 415773
loss: 0.9823310971260071,grad_norm: 0.9069307403028176, iteration: 415774
loss: 1.1399425268173218,grad_norm: 0.88622093986877, iteration: 415775
loss: 1.011578917503357,grad_norm: 0.9999993385819161, iteration: 415776
loss: 1.15268874168396,grad_norm: 0.934173939702457, iteration: 415777
loss: 1.0108604431152344,grad_norm: 0.7184786978625727, iteration: 415778
loss: 1.0747349262237549,grad_norm: 0.9999993742741607, iteration: 415779
loss: 1.1418827772140503,grad_norm: 0.9999995618356229, iteration: 415780
loss: 1.0323582887649536,grad_norm: 0.7141125276002467, iteration: 415781
loss: 1.0149167776107788,grad_norm: 0.999999906451772, iteration: 415782
loss: 1.0773967504501343,grad_norm: 0.8871129738098716, iteration: 415783
loss: 1.0158028602600098,grad_norm: 0.8090614988723305, iteration: 415784
loss: 1.0612404346466064,grad_norm: 0.8232037713624827, iteration: 415785
loss: 0.9765768647193909,grad_norm: 0.7119345246060752, iteration: 415786
loss: 1.012027382850647,grad_norm: 0.6981160991506536, iteration: 415787
loss: 1.0110502243041992,grad_norm: 0.8421054381237378, iteration: 415788
loss: 0.9742433428764343,grad_norm: 0.6432667655040852, iteration: 415789
loss: 1.0394043922424316,grad_norm: 0.9999992623119786, iteration: 415790
loss: 1.0362560749053955,grad_norm: 0.999999841360325, iteration: 415791
loss: 1.0272046327590942,grad_norm: 0.9999990855340596, iteration: 415792
loss: 1.0110907554626465,grad_norm: 0.9999994891004931, iteration: 415793
loss: 0.9940903186798096,grad_norm: 0.9999990313645878, iteration: 415794
loss: 1.096522331237793,grad_norm: 0.9999998825430897, iteration: 415795
loss: 0.9856848120689392,grad_norm: 0.696195144685671, iteration: 415796
loss: 0.9782429933547974,grad_norm: 0.7673649123020957, iteration: 415797
loss: 1.0001143217086792,grad_norm: 0.999999360586179, iteration: 415798
loss: 1.040754795074463,grad_norm: 0.9678219829588831, iteration: 415799
loss: 1.005043387413025,grad_norm: 0.9963038180634726, iteration: 415800
loss: 0.9638380408287048,grad_norm: 0.9999991563572768, iteration: 415801
loss: 1.054072618484497,grad_norm: 0.9999995916693638, iteration: 415802
loss: 1.0499941110610962,grad_norm: 0.9999991887920983, iteration: 415803
loss: 1.1397578716278076,grad_norm: 0.9999998316518163, iteration: 415804
loss: 1.0744012594223022,grad_norm: 0.9999992815885527, iteration: 415805
loss: 1.10487961769104,grad_norm: 0.9999998260814056, iteration: 415806
loss: 1.0590075254440308,grad_norm: 1.000000065937309, iteration: 415807
loss: 1.0860977172851562,grad_norm: 0.9999997166409581, iteration: 415808
loss: 1.1633610725402832,grad_norm: 0.9999992934616239, iteration: 415809
loss: 1.0211193561553955,grad_norm: 0.9999993358038982, iteration: 415810
loss: 1.0260331630706787,grad_norm: 0.6823645889089238, iteration: 415811
loss: 1.0042729377746582,grad_norm: 0.8799748987393657, iteration: 415812
loss: 1.0064663887023926,grad_norm: 0.9999990051172888, iteration: 415813
loss: 0.9820508360862732,grad_norm: 0.8154703874306316, iteration: 415814
loss: 1.0119932889938354,grad_norm: 0.730129444321982, iteration: 415815
loss: 1.0081102848052979,grad_norm: 0.7082913839536153, iteration: 415816
loss: 0.9627119302749634,grad_norm: 0.7030096572406066, iteration: 415817
loss: 1.038569450378418,grad_norm: 0.9999992639339274, iteration: 415818
loss: 1.0131595134735107,grad_norm: 0.7829009141014471, iteration: 415819
loss: 0.9880635738372803,grad_norm: 0.6696898547446752, iteration: 415820
loss: 1.0133683681488037,grad_norm: 0.8007623499330945, iteration: 415821
loss: 1.0249966382980347,grad_norm: 0.7160412549967718, iteration: 415822
loss: 0.9932914972305298,grad_norm: 0.6950952566520654, iteration: 415823
loss: 0.9630979299545288,grad_norm: 0.8025121718671527, iteration: 415824
loss: 0.9995668530464172,grad_norm: 0.8376384164617336, iteration: 415825
loss: 1.0434027910232544,grad_norm: 0.9341406727731433, iteration: 415826
loss: 1.0136643648147583,grad_norm: 0.9400248018073241, iteration: 415827
loss: 1.045304775238037,grad_norm: 0.9044437893870029, iteration: 415828
loss: 1.0262254476547241,grad_norm: 0.9999993404427432, iteration: 415829
loss: 0.9904507994651794,grad_norm: 0.9671962558775662, iteration: 415830
loss: 1.0410536527633667,grad_norm: 0.9478744673557186, iteration: 415831
loss: 1.0818707942962646,grad_norm: 0.8158620355260034, iteration: 415832
loss: 0.9937943816184998,grad_norm: 0.9999998589006239, iteration: 415833
loss: 1.098069429397583,grad_norm: 0.9999991116198452, iteration: 415834
loss: 0.9701555371284485,grad_norm: 0.7427767993070824, iteration: 415835
loss: 1.1026464700698853,grad_norm: 0.7239086191897381, iteration: 415836
loss: 1.017978310585022,grad_norm: 0.7586103471008051, iteration: 415837
loss: 1.0223349332809448,grad_norm: 0.8886408853218557, iteration: 415838
loss: 1.0038905143737793,grad_norm: 0.6949676581964335, iteration: 415839
loss: 1.0030289888381958,grad_norm: 0.783257033582491, iteration: 415840
loss: 1.0283161401748657,grad_norm: 0.7579842394246338, iteration: 415841
loss: 1.0213686227798462,grad_norm: 0.7301236142681988, iteration: 415842
loss: 1.021393895149231,grad_norm: 0.699723987298836, iteration: 415843
loss: 1.0161104202270508,grad_norm: 0.7097808299234786, iteration: 415844
loss: 1.0295238494873047,grad_norm: 0.999999957739204, iteration: 415845
loss: 1.0255156755447388,grad_norm: 0.8038386893819793, iteration: 415846
loss: 0.9647510051727295,grad_norm: 0.8512308094551458, iteration: 415847
loss: 1.075260043144226,grad_norm: 0.9999998464618194, iteration: 415848
loss: 1.008539080619812,grad_norm: 0.841857652003556, iteration: 415849
loss: 1.0415115356445312,grad_norm: 1.0000000508594917, iteration: 415850
loss: 0.9980400204658508,grad_norm: 0.7544261439994369, iteration: 415851
loss: 1.0240126848220825,grad_norm: 0.7465704150120959, iteration: 415852
loss: 1.0485814809799194,grad_norm: 0.7650798656014486, iteration: 415853
loss: 0.9847308993339539,grad_norm: 0.7758069168141583, iteration: 415854
loss: 0.9684346318244934,grad_norm: 0.9568596506162117, iteration: 415855
loss: 1.096430778503418,grad_norm: 0.9999991484919315, iteration: 415856
loss: 1.0285024642944336,grad_norm: 0.8070897342037292, iteration: 415857
loss: 1.1138029098510742,grad_norm: 0.9999999991038392, iteration: 415858
loss: 1.0144346952438354,grad_norm: 0.9438415392034454, iteration: 415859
loss: 1.0278596878051758,grad_norm: 0.8337403410555375, iteration: 415860
loss: 0.977642834186554,grad_norm: 0.9999996207954075, iteration: 415861
loss: 1.0486693382263184,grad_norm: 0.796807648473217, iteration: 415862
loss: 1.0108590126037598,grad_norm: 0.7516793241634916, iteration: 415863
loss: 1.0271145105361938,grad_norm: 0.712167967381626, iteration: 415864
loss: 0.965735912322998,grad_norm: 0.839916354725757, iteration: 415865
loss: 0.9778291583061218,grad_norm: 0.7527868540050341, iteration: 415866
loss: 0.986249566078186,grad_norm: 0.999999271927968, iteration: 415867
loss: 1.0400638580322266,grad_norm: 0.9999992724818891, iteration: 415868
loss: 1.0082817077636719,grad_norm: 0.8276395828248272, iteration: 415869
loss: 1.0063554048538208,grad_norm: 0.7133338402526023, iteration: 415870
loss: 1.0489481687545776,grad_norm: 0.9999991563369638, iteration: 415871
loss: 0.996498703956604,grad_norm: 0.6759341584295386, iteration: 415872
loss: 1.0287573337554932,grad_norm: 0.9999999031337548, iteration: 415873
loss: 1.0474907159805298,grad_norm: 0.9999989874488538, iteration: 415874
loss: 1.0208598375320435,grad_norm: 0.772501740722017, iteration: 415875
loss: 0.9956154227256775,grad_norm: 0.8741123837084223, iteration: 415876
loss: 1.012067437171936,grad_norm: 0.6712658036854106, iteration: 415877
loss: 0.983704149723053,grad_norm: 0.7616342222145311, iteration: 415878
loss: 1.0006952285766602,grad_norm: 0.9999993411160937, iteration: 415879
loss: 1.1156283617019653,grad_norm: 0.9999991756397443, iteration: 415880
loss: 1.020905613899231,grad_norm: 0.6775872278529371, iteration: 415881
loss: 1.017948031425476,grad_norm: 0.7059192040600142, iteration: 415882
loss: 0.990453839302063,grad_norm: 0.7604180345070684, iteration: 415883
loss: 1.027523159980774,grad_norm: 0.9999996853488989, iteration: 415884
loss: 0.9886391162872314,grad_norm: 0.9644930966744293, iteration: 415885
loss: 1.0357909202575684,grad_norm: 0.8663014584583615, iteration: 415886
loss: 1.0713154077529907,grad_norm: 0.9999997048612504, iteration: 415887
loss: 1.0934476852416992,grad_norm: 0.8192858254530984, iteration: 415888
loss: 0.9718415141105652,grad_norm: 0.9613837064123428, iteration: 415889
loss: 0.9928406476974487,grad_norm: 0.823701903468089, iteration: 415890
loss: 1.0372291803359985,grad_norm: 0.8831899852282571, iteration: 415891
loss: 0.9842842817306519,grad_norm: 0.8783336106476491, iteration: 415892
loss: 1.0103602409362793,grad_norm: 0.9999989836169934, iteration: 415893
loss: 1.0197852849960327,grad_norm: 0.8569376115605155, iteration: 415894
loss: 1.0035176277160645,grad_norm: 0.7159398042407712, iteration: 415895
loss: 1.0030773878097534,grad_norm: 0.7497133502820947, iteration: 415896
loss: 1.017261266708374,grad_norm: 0.7440416020054562, iteration: 415897
loss: 0.9949547052383423,grad_norm: 0.7623712391611901, iteration: 415898
loss: 1.0191588401794434,grad_norm: 0.784532284232543, iteration: 415899
loss: 0.9896746277809143,grad_norm: 0.8638631063477864, iteration: 415900
loss: 1.0180761814117432,grad_norm: 0.8520007357445144, iteration: 415901
loss: 1.0805588960647583,grad_norm: 0.7863083907325207, iteration: 415902
loss: 1.1032741069793701,grad_norm: 0.9999998690812889, iteration: 415903
loss: 1.006417155265808,grad_norm: 0.6529555541264173, iteration: 415904
loss: 0.979184091091156,grad_norm: 0.7098237021548489, iteration: 415905
loss: 1.0150741338729858,grad_norm: 0.913364586714878, iteration: 415906
loss: 0.9806546568870544,grad_norm: 0.7689228332158518, iteration: 415907
loss: 0.9897190928459167,grad_norm: 0.8536875415675219, iteration: 415908
loss: 1.0144522190093994,grad_norm: 0.852869793567987, iteration: 415909
loss: 1.0121502876281738,grad_norm: 0.8090892152687168, iteration: 415910
loss: 1.0045957565307617,grad_norm: 0.7746502518747186, iteration: 415911
loss: 1.021278738975525,grad_norm: 0.9999991159333285, iteration: 415912
loss: 1.029139757156372,grad_norm: 0.9999993789616116, iteration: 415913
loss: 0.9913991689682007,grad_norm: 0.780541883849408, iteration: 415914
loss: 0.990077793598175,grad_norm: 0.9845665133636383, iteration: 415915
loss: 0.9773824214935303,grad_norm: 0.9999991594360724, iteration: 415916
loss: 0.9803200960159302,grad_norm: 0.6976642855362828, iteration: 415917
loss: 1.0320823192596436,grad_norm: 0.8570067581095192, iteration: 415918
loss: 0.9922627210617065,grad_norm: 0.7178247217239242, iteration: 415919
loss: 1.0098825693130493,grad_norm: 0.9999993386699325, iteration: 415920
loss: 1.0478847026824951,grad_norm: 0.9999992988841089, iteration: 415921
loss: 0.9778496026992798,grad_norm: 0.7046238403515872, iteration: 415922
loss: 1.0431938171386719,grad_norm: 0.9999995009485875, iteration: 415923
loss: 0.982860267162323,grad_norm: 0.6923033738955422, iteration: 415924
loss: 1.0232144594192505,grad_norm: 0.9999999187000006, iteration: 415925
loss: 1.028501033782959,grad_norm: 0.8395715921803479, iteration: 415926
loss: 1.00911545753479,grad_norm: 0.7222481010759234, iteration: 415927
loss: 0.9845201373100281,grad_norm: 0.8444806868154401, iteration: 415928
loss: 0.9943432211875916,grad_norm: 0.8626013570745322, iteration: 415929
loss: 1.0258721113204956,grad_norm: 0.7229058643392101, iteration: 415930
loss: 0.9990590810775757,grad_norm: 0.7849505512170193, iteration: 415931
loss: 1.002501368522644,grad_norm: 0.9999990833752765, iteration: 415932
loss: 1.014142632484436,grad_norm: 0.7562751169627453, iteration: 415933
loss: 1.0131962299346924,grad_norm: 0.8560196016432554, iteration: 415934
loss: 0.9832973480224609,grad_norm: 0.6951260625351785, iteration: 415935
loss: 1.0120583772659302,grad_norm: 0.9999994033716966, iteration: 415936
loss: 1.001158595085144,grad_norm: 0.7514712227196172, iteration: 415937
loss: 1.0203381776809692,grad_norm: 0.7640182641749771, iteration: 415938
loss: 0.9892006516456604,grad_norm: 0.9999991275443892, iteration: 415939
loss: 1.0188177824020386,grad_norm: 0.8034691804258647, iteration: 415940
loss: 1.0242964029312134,grad_norm: 0.9891925783682279, iteration: 415941
loss: 1.0145074129104614,grad_norm: 0.69016648548479, iteration: 415942
loss: 0.9822375178337097,grad_norm: 0.8228551262911858, iteration: 415943
loss: 0.9843952059745789,grad_norm: 0.8069892125236153, iteration: 415944
loss: 1.04458749294281,grad_norm: 0.9999990386485695, iteration: 415945
loss: 1.0516973733901978,grad_norm: 0.9999994868035175, iteration: 415946
loss: 1.0438271760940552,grad_norm: 0.7347019812413309, iteration: 415947
loss: 1.0744558572769165,grad_norm: 0.7073421424724393, iteration: 415948
loss: 1.0571882724761963,grad_norm: 0.9999997175441706, iteration: 415949
loss: 0.9889987111091614,grad_norm: 0.7520445878957632, iteration: 415950
loss: 0.9915385842323303,grad_norm: 0.8046615786293307, iteration: 415951
loss: 0.9960514307022095,grad_norm: 0.9999996498767253, iteration: 415952
loss: 0.9973998069763184,grad_norm: 0.7040693949711869, iteration: 415953
loss: 1.0577971935272217,grad_norm: 0.9999990465010407, iteration: 415954
loss: 0.959078311920166,grad_norm: 0.7747946980641347, iteration: 415955
loss: 1.0060750246047974,grad_norm: 0.77066945618651, iteration: 415956
loss: 1.034002661705017,grad_norm: 0.7772338597172166, iteration: 415957
loss: 1.024797797203064,grad_norm: 0.7441422463426047, iteration: 415958
loss: 0.9748746156692505,grad_norm: 0.9005436320972675, iteration: 415959
loss: 1.0535469055175781,grad_norm: 0.999999560352985, iteration: 415960
loss: 0.9888012409210205,grad_norm: 0.8121714717918517, iteration: 415961
loss: 1.0026359558105469,grad_norm: 0.8552010605399856, iteration: 415962
loss: 0.9466655254364014,grad_norm: 0.999999252966914, iteration: 415963
loss: 1.0126385688781738,grad_norm: 0.9999990868578323, iteration: 415964
loss: 1.0242129564285278,grad_norm: 0.9222806763260794, iteration: 415965
loss: 0.9998019337654114,grad_norm: 0.8470312232348591, iteration: 415966
loss: 0.9844132661819458,grad_norm: 0.9999990751478902, iteration: 415967
loss: 1.0518205165863037,grad_norm: 0.9530685962018719, iteration: 415968
loss: 1.0129027366638184,grad_norm: 0.7215702682108737, iteration: 415969
loss: 0.9778465032577515,grad_norm: 0.8596441694678851, iteration: 415970
loss: 0.9939188361167908,grad_norm: 0.973045234579781, iteration: 415971
loss: 1.0011417865753174,grad_norm: 0.7741018323764051, iteration: 415972
loss: 1.0303620100021362,grad_norm: 0.8195783592965368, iteration: 415973
loss: 1.0392372608184814,grad_norm: 0.7447734521778597, iteration: 415974
loss: 1.0031393766403198,grad_norm: 0.7680000974613049, iteration: 415975
loss: 1.067880630493164,grad_norm: 0.99999958492044, iteration: 415976
loss: 0.9965878129005432,grad_norm: 0.9999989905699311, iteration: 415977
loss: 1.0136176347732544,grad_norm: 0.7811476662504019, iteration: 415978
loss: 0.9928076863288879,grad_norm: 0.7577930671443148, iteration: 415979
loss: 1.0122803449630737,grad_norm: 0.7710352523612474, iteration: 415980
loss: 1.0309373140335083,grad_norm: 0.9999996074328247, iteration: 415981
loss: 1.0245611667633057,grad_norm: 0.8768102337532949, iteration: 415982
loss: 1.1273562908172607,grad_norm: 0.9999994531722688, iteration: 415983
loss: 0.9779108166694641,grad_norm: 0.7942809380323904, iteration: 415984
loss: 0.9400991797447205,grad_norm: 0.8646646876110021, iteration: 415985
loss: 0.9680269360542297,grad_norm: 0.8055612684902634, iteration: 415986
loss: 1.020447850227356,grad_norm: 0.9550246936780196, iteration: 415987
loss: 1.0176547765731812,grad_norm: 0.7559373125627298, iteration: 415988
loss: 1.004828929901123,grad_norm: 0.9125197022650957, iteration: 415989
loss: 0.9935780763626099,grad_norm: 0.9999991483617767, iteration: 415990
loss: 1.0259466171264648,grad_norm: 0.8638343318284727, iteration: 415991
loss: 0.991631031036377,grad_norm: 0.7148594422804911, iteration: 415992
loss: 1.0036993026733398,grad_norm: 0.9548508429007827, iteration: 415993
loss: 0.9393938779830933,grad_norm: 0.7216909717897855, iteration: 415994
loss: 0.9760484099388123,grad_norm: 0.8489382791904317, iteration: 415995
loss: 0.9905192255973816,grad_norm: 0.8327712833528098, iteration: 415996
loss: 1.0013552904129028,grad_norm: 0.9523881943752744, iteration: 415997
loss: 1.0320132970809937,grad_norm: 0.9999997085161754, iteration: 415998
loss: 1.019614338874817,grad_norm: 0.9999991541909884, iteration: 415999
loss: 1.013973593711853,grad_norm: 0.724374238919203, iteration: 416000
loss: 1.0453754663467407,grad_norm: 0.9321148479545096, iteration: 416001
loss: 0.9818167090415955,grad_norm: 0.7133684158087334, iteration: 416002
loss: 0.998331606388092,grad_norm: 0.7561474986398835, iteration: 416003
loss: 1.0789004564285278,grad_norm: 0.7707531305573316, iteration: 416004
loss: 1.0108855962753296,grad_norm: 0.7871183243319714, iteration: 416005
loss: 1.0034137964248657,grad_norm: 0.9999997152688749, iteration: 416006
loss: 0.9869851469993591,grad_norm: 0.9096211203679748, iteration: 416007
loss: 0.9882123470306396,grad_norm: 0.6474645652010321, iteration: 416008
loss: 1.0209678411483765,grad_norm: 0.8441662480347426, iteration: 416009
loss: 1.0185784101486206,grad_norm: 0.6920366110485351, iteration: 416010
loss: 1.0469192266464233,grad_norm: 0.6783858320132251, iteration: 416011
loss: 0.970120370388031,grad_norm: 0.9999991631927881, iteration: 416012
loss: 0.9496334195137024,grad_norm: 0.7933451072556773, iteration: 416013
loss: 0.9963293075561523,grad_norm: 0.8251836611453689, iteration: 416014
loss: 1.0074480772018433,grad_norm: 0.8879238115653222, iteration: 416015
loss: 1.0030019283294678,grad_norm: 0.7679731706009463, iteration: 416016
loss: 0.9856294393539429,grad_norm: 0.7532980401072823, iteration: 416017
loss: 0.9850903749465942,grad_norm: 0.7897080359238071, iteration: 416018
loss: 1.0010793209075928,grad_norm: 0.8030021946374458, iteration: 416019
loss: 1.031000018119812,grad_norm: 0.6197994144160005, iteration: 416020
loss: 1.0002834796905518,grad_norm: 0.7541310597765031, iteration: 416021
loss: 1.0311861038208008,grad_norm: 0.7004214881424778, iteration: 416022
loss: 1.0119963884353638,grad_norm: 0.7306588456054485, iteration: 416023
loss: 0.9627645611763,grad_norm: 0.8020802314041775, iteration: 416024
loss: 0.9787249565124512,grad_norm: 0.8433085933845555, iteration: 416025
loss: 1.0613560676574707,grad_norm: 0.768884852483473, iteration: 416026
loss: 1.0397567749023438,grad_norm: 0.9999997674312275, iteration: 416027
loss: 1.00296151638031,grad_norm: 0.722733831060238, iteration: 416028
loss: 1.0002343654632568,grad_norm: 0.6950150240322276, iteration: 416029
loss: 0.9904600381851196,grad_norm: 0.718150682622681, iteration: 416030
loss: 1.008720874786377,grad_norm: 0.6176996852677313, iteration: 416031
loss: 0.9602742195129395,grad_norm: 0.8008042373132869, iteration: 416032
loss: 1.0033469200134277,grad_norm: 0.9095981348943022, iteration: 416033
loss: 1.0055018663406372,grad_norm: 0.8862509614964209, iteration: 416034
loss: 1.1915762424468994,grad_norm: 0.9999990415145753, iteration: 416035
loss: 1.0707340240478516,grad_norm: 0.9999997531257555, iteration: 416036
loss: 1.0051969289779663,grad_norm: 0.7789740813441488, iteration: 416037
loss: 1.008102536201477,grad_norm: 0.9347362601314687, iteration: 416038
loss: 0.9760429859161377,grad_norm: 0.8731235227409297, iteration: 416039
loss: 1.0327826738357544,grad_norm: 0.790045566181493, iteration: 416040
loss: 1.0545817613601685,grad_norm: 0.9999996217711037, iteration: 416041
loss: 1.011559247970581,grad_norm: 0.9999991386315659, iteration: 416042
loss: 0.9964193105697632,grad_norm: 0.8605298078511732, iteration: 416043
loss: 1.0160596370697021,grad_norm: 0.8035742754553149, iteration: 416044
loss: 1.0511785745620728,grad_norm: 0.9999997638574236, iteration: 416045
loss: 1.0097078084945679,grad_norm: 0.9999990603429513, iteration: 416046
loss: 0.9705946445465088,grad_norm: 0.9915173438051128, iteration: 416047
loss: 1.0373519659042358,grad_norm: 0.6242630199761044, iteration: 416048
loss: 0.9846156239509583,grad_norm: 0.8620169951267889, iteration: 416049
loss: 1.0255099534988403,grad_norm: 0.8428931194854465, iteration: 416050
loss: 1.0479791164398193,grad_norm: 0.9999991252418446, iteration: 416051
loss: 0.986824095249176,grad_norm: 0.7674462261741896, iteration: 416052
loss: 0.9850444793701172,grad_norm: 0.9999994632978143, iteration: 416053
loss: 0.950185239315033,grad_norm: 0.8331141114749152, iteration: 416054
loss: 1.0273497104644775,grad_norm: 0.7112243180458464, iteration: 416055
loss: 1.0300171375274658,grad_norm: 0.8278923769432983, iteration: 416056
loss: 0.9650448560714722,grad_norm: 0.9204801007751389, iteration: 416057
loss: 1.034416913986206,grad_norm: 0.858988104932505, iteration: 416058
loss: 1.0014142990112305,grad_norm: 0.8049606841315886, iteration: 416059
loss: 1.023271083831787,grad_norm: 0.711536230218428, iteration: 416060
loss: 0.9755942225456238,grad_norm: 0.7836090536475893, iteration: 416061
loss: 0.960457444190979,grad_norm: 0.9200978925319928, iteration: 416062
loss: 1.0399690866470337,grad_norm: 0.8083330358439975, iteration: 416063
loss: 1.0189073085784912,grad_norm: 0.9017794289037435, iteration: 416064
loss: 1.022140622138977,grad_norm: 0.7874973276674864, iteration: 416065
loss: 0.9968082904815674,grad_norm: 0.6730163487220578, iteration: 416066
loss: 1.0276837348937988,grad_norm: 0.7938643630649674, iteration: 416067
loss: 1.0397179126739502,grad_norm: 0.9999997786323299, iteration: 416068
loss: 0.9967330098152161,grad_norm: 0.7409300602130835, iteration: 416069
loss: 1.0097129344940186,grad_norm: 0.7286194344170916, iteration: 416070
loss: 0.9929743409156799,grad_norm: 0.6608286831103131, iteration: 416071
loss: 0.9872217178344727,grad_norm: 0.9999999127486628, iteration: 416072
loss: 0.9977290034294128,grad_norm: 0.9999991329981481, iteration: 416073
loss: 0.997890293598175,grad_norm: 0.7321634909674877, iteration: 416074
loss: 0.9817906618118286,grad_norm: 0.7949388584653588, iteration: 416075
loss: 1.0387874841690063,grad_norm: 0.99999914027645, iteration: 416076
loss: 1.0085417032241821,grad_norm: 0.8140974739942782, iteration: 416077
loss: 0.9980146884918213,grad_norm: 0.9999996389540461, iteration: 416078
loss: 0.95838463306427,grad_norm: 0.738170928471659, iteration: 416079
loss: 1.0121990442276,grad_norm: 0.6846684226903023, iteration: 416080
loss: 1.0043225288391113,grad_norm: 0.7320779978161329, iteration: 416081
loss: 1.034075379371643,grad_norm: 0.8544190215903383, iteration: 416082
loss: 0.974359393119812,grad_norm: 0.8243169111009597, iteration: 416083
loss: 0.97935551404953,grad_norm: 0.6995886030445824, iteration: 416084
loss: 1.0306742191314697,grad_norm: 0.7500620696889349, iteration: 416085
loss: 1.0920926332473755,grad_norm: 0.8859821336553293, iteration: 416086
loss: 1.055939793586731,grad_norm: 0.9999993077829775, iteration: 416087
loss: 1.0119630098342896,grad_norm: 0.6430921574877199, iteration: 416088
loss: 0.9486117959022522,grad_norm: 0.8274321191429346, iteration: 416089
loss: 0.9657756090164185,grad_norm: 0.7579949888596691, iteration: 416090
loss: 1.0032585859298706,grad_norm: 0.9999995183785199, iteration: 416091
loss: 1.0216931104660034,grad_norm: 1.0000000191447092, iteration: 416092
loss: 0.9649031162261963,grad_norm: 0.8101552038261063, iteration: 416093
loss: 1.022796630859375,grad_norm: 0.827659683359625, iteration: 416094
loss: 1.0037713050842285,grad_norm: 0.705599710817825, iteration: 416095
loss: 0.9919596910476685,grad_norm: 0.7791331042449994, iteration: 416096
loss: 0.9785671234130859,grad_norm: 0.8190092832610679, iteration: 416097
loss: 1.009474754333496,grad_norm: 0.788974942526373, iteration: 416098
loss: 1.0311198234558105,grad_norm: 0.8850198922639765, iteration: 416099
loss: 0.9700199961662292,grad_norm: 0.9999992646449042, iteration: 416100
loss: 1.000622272491455,grad_norm: 0.7653693505926369, iteration: 416101
loss: 0.9694942831993103,grad_norm: 0.8456667761904638, iteration: 416102
loss: 1.004532814025879,grad_norm: 1.0000000814298526, iteration: 416103
loss: 1.0126795768737793,grad_norm: 0.999999232922448, iteration: 416104
loss: 1.0120207071304321,grad_norm: 0.666004612019335, iteration: 416105
loss: 0.9813814163208008,grad_norm: 0.8517177718211196, iteration: 416106
loss: 1.014048457145691,grad_norm: 0.8356896844923513, iteration: 416107
loss: 0.9886853098869324,grad_norm: 0.8325744448984601, iteration: 416108
loss: 1.0169240236282349,grad_norm: 0.7737458269496469, iteration: 416109
loss: 0.9771932363510132,grad_norm: 0.8594522662700805, iteration: 416110
loss: 0.9761965870857239,grad_norm: 0.7275899249383965, iteration: 416111
loss: 1.0075703859329224,grad_norm: 0.7864311026015478, iteration: 416112
loss: 1.0117801427841187,grad_norm: 0.7829422531941743, iteration: 416113
loss: 1.0226576328277588,grad_norm: 0.99999931402367, iteration: 416114
loss: 1.0083200931549072,grad_norm: 0.7526127917221749, iteration: 416115
loss: 0.9939242005348206,grad_norm: 0.7977525064962148, iteration: 416116
loss: 0.9884335994720459,grad_norm: 0.8704955156931264, iteration: 416117
loss: 0.9986625909805298,grad_norm: 0.6499939214676222, iteration: 416118
loss: 1.0485316514968872,grad_norm: 0.7621428191703021, iteration: 416119
loss: 1.0017752647399902,grad_norm: 0.9468869511284098, iteration: 416120
loss: 1.0203865766525269,grad_norm: 0.9097473101414906, iteration: 416121
loss: 1.009291410446167,grad_norm: 0.7918088387723189, iteration: 416122
loss: 0.9631913304328918,grad_norm: 0.75170507641544, iteration: 416123
loss: 1.0001139640808105,grad_norm: 0.6818759407320345, iteration: 416124
loss: 0.992383599281311,grad_norm: 0.7948187606242151, iteration: 416125
loss: 1.0615227222442627,grad_norm: 0.99999992007516, iteration: 416126
loss: 0.9840071201324463,grad_norm: 0.8300277758468044, iteration: 416127
loss: 0.9718785285949707,grad_norm: 0.7169107071198026, iteration: 416128
loss: 0.9852847456932068,grad_norm: 0.7859221421490749, iteration: 416129
loss: 1.0018137693405151,grad_norm: 0.6668983629953793, iteration: 416130
loss: 0.9863167405128479,grad_norm: 0.9999997061399237, iteration: 416131
loss: 1.0181668996810913,grad_norm: 0.8344635493569452, iteration: 416132
loss: 1.0444886684417725,grad_norm: 0.7601575955593399, iteration: 416133
loss: 1.0544068813323975,grad_norm: 0.9999992181235541, iteration: 416134
loss: 0.9286404252052307,grad_norm: 0.720016190858882, iteration: 416135
loss: 0.97865229845047,grad_norm: 0.6678748012990529, iteration: 416136
loss: 1.0193181037902832,grad_norm: 0.6708456619168223, iteration: 416137
loss: 0.9942163825035095,grad_norm: 0.9189730614207083, iteration: 416138
loss: 1.0311591625213623,grad_norm: 0.8768303912903511, iteration: 416139
loss: 1.0098849534988403,grad_norm: 0.8755852125365271, iteration: 416140
loss: 1.0002952814102173,grad_norm: 0.7271472166572996, iteration: 416141
loss: 1.0385459661483765,grad_norm: 0.7684227194023121, iteration: 416142
loss: 1.0193833112716675,grad_norm: 0.707164620080549, iteration: 416143
loss: 0.9920164346694946,grad_norm: 0.8604203252976506, iteration: 416144
loss: 1.0000370740890503,grad_norm: 0.7358095871170165, iteration: 416145
loss: 0.9407995343208313,grad_norm: 0.9142196016507883, iteration: 416146
loss: 0.9785972833633423,grad_norm: 0.7643359056608742, iteration: 416147
loss: 1.0202616453170776,grad_norm: 0.7034934783330037, iteration: 416148
loss: 0.9921621680259705,grad_norm: 0.6785175843263692, iteration: 416149
loss: 0.9799149632453918,grad_norm: 0.7558924773164802, iteration: 416150
loss: 1.0020345449447632,grad_norm: 0.9999990491771186, iteration: 416151
loss: 0.9986000657081604,grad_norm: 0.999999244652165, iteration: 416152
loss: 0.9511134028434753,grad_norm: 0.824231423808792, iteration: 416153
loss: 0.9957523345947266,grad_norm: 0.6914842193842462, iteration: 416154
loss: 0.9980477690696716,grad_norm: 0.8493909711475812, iteration: 416155
loss: 0.9845649003982544,grad_norm: 0.7505682576252704, iteration: 416156
loss: 0.9876507520675659,grad_norm: 0.9999992371656774, iteration: 416157
loss: 0.9705487489700317,grad_norm: 0.6793521698277528, iteration: 416158
loss: 0.9977626204490662,grad_norm: 0.6455590093581938, iteration: 416159
loss: 1.0144140720367432,grad_norm: 0.7592878515902366, iteration: 416160
loss: 1.0212849378585815,grad_norm: 0.700261001519803, iteration: 416161
loss: 0.9728153347969055,grad_norm: 0.8674636858180007, iteration: 416162
loss: 0.9678739309310913,grad_norm: 0.999999885652107, iteration: 416163
loss: 0.9925308227539062,grad_norm: 0.9335059503582076, iteration: 416164
loss: 1.0455880165100098,grad_norm: 0.9999991849493539, iteration: 416165
loss: 1.026572823524475,grad_norm: 0.7672017071039696, iteration: 416166
loss: 0.969586968421936,grad_norm: 0.8150594851366117, iteration: 416167
loss: 1.0433162450790405,grad_norm: 0.7796435711894133, iteration: 416168
loss: 1.0014616250991821,grad_norm: 0.7163747663663147, iteration: 416169
loss: 1.0005022287368774,grad_norm: 0.9610983645879692, iteration: 416170
loss: 1.0536473989486694,grad_norm: 0.7321547103099161, iteration: 416171
loss: 0.9330372214317322,grad_norm: 0.8701229774863716, iteration: 416172
loss: 1.0070233345031738,grad_norm: 0.9655815665231383, iteration: 416173
loss: 1.0974862575531006,grad_norm: 0.9778167947125622, iteration: 416174
loss: 0.9685328006744385,grad_norm: 0.8041674938969378, iteration: 416175
loss: 1.029070258140564,grad_norm: 0.7987706885251178, iteration: 416176
loss: 0.9786543846130371,grad_norm: 0.7865880963374237, iteration: 416177
loss: 0.9698272347450256,grad_norm: 0.9999991204531194, iteration: 416178
loss: 0.9866072535514832,grad_norm: 0.6923095139604027, iteration: 416179
loss: 1.0271302461624146,grad_norm: 0.9999996790465646, iteration: 416180
loss: 1.0102713108062744,grad_norm: 0.7421036576117037, iteration: 416181
loss: 0.9751778841018677,grad_norm: 0.785044720326637, iteration: 416182
loss: 0.9776736497879028,grad_norm: 0.7603525245269684, iteration: 416183
loss: 1.015451192855835,grad_norm: 0.786694912563362, iteration: 416184
loss: 1.1382075548171997,grad_norm: 0.9999999887624802, iteration: 416185
loss: 1.0448943376541138,grad_norm: 0.8881331273292206, iteration: 416186
loss: 0.9900871515274048,grad_norm: 0.7687226347705861, iteration: 416187
loss: 1.0191527605056763,grad_norm: 0.7775187199373625, iteration: 416188
loss: 1.001075267791748,grad_norm: 0.7791096668648394, iteration: 416189
loss: 0.9878513813018799,grad_norm: 0.9999992268492925, iteration: 416190
loss: 0.9722591042518616,grad_norm: 0.8721393864075889, iteration: 416191
loss: 0.9529865980148315,grad_norm: 0.8731082251000535, iteration: 416192
loss: 1.0407160520553589,grad_norm: 0.9459018481319373, iteration: 416193
loss: 0.995997965335846,grad_norm: 0.9772656976950267, iteration: 416194
loss: 0.9854803085327148,grad_norm: 0.6904652222724192, iteration: 416195
loss: 0.9585363268852234,grad_norm: 0.7191587920982109, iteration: 416196
loss: 0.9901030659675598,grad_norm: 0.6384542897362383, iteration: 416197
loss: 0.9944000244140625,grad_norm: 0.7655431010117522, iteration: 416198
loss: 1.020565390586853,grad_norm: 0.8104820730223761, iteration: 416199
loss: 0.9808770418167114,grad_norm: 0.9157287911822933, iteration: 416200
loss: 0.9887667894363403,grad_norm: 0.724886312533984, iteration: 416201
loss: 1.043013334274292,grad_norm: 0.9275834174573, iteration: 416202
loss: 1.0079959630966187,grad_norm: 0.8072396098266119, iteration: 416203
loss: 1.0586472749710083,grad_norm: 0.7939348671190736, iteration: 416204
loss: 0.9729030728340149,grad_norm: 0.7876543201359847, iteration: 416205
loss: 0.9995965957641602,grad_norm: 0.7797720741673404, iteration: 416206
loss: 0.9737902283668518,grad_norm: 0.697696520226698, iteration: 416207
loss: 1.0362037420272827,grad_norm: 0.8278232905944604, iteration: 416208
loss: 1.0242879390716553,grad_norm: 0.999999742964053, iteration: 416209
loss: 1.037932276725769,grad_norm: 0.7511010169990914, iteration: 416210
loss: 0.9836852550506592,grad_norm: 0.8204932237649787, iteration: 416211
loss: 0.992192268371582,grad_norm: 0.9999992512146414, iteration: 416212
loss: 0.9784332513809204,grad_norm: 0.6742211140914113, iteration: 416213
loss: 1.0764274597167969,grad_norm: 0.9780862287159563, iteration: 416214
loss: 1.0712106227874756,grad_norm: 0.8180645862793317, iteration: 416215
loss: 0.9995430707931519,grad_norm: 0.8147138772274272, iteration: 416216
loss: 1.0032007694244385,grad_norm: 0.9999990351517672, iteration: 416217
loss: 1.014313817024231,grad_norm: 0.7821314474899128, iteration: 416218
loss: 0.9896539449691772,grad_norm: 0.7651159995394597, iteration: 416219
loss: 1.0044972896575928,grad_norm: 0.9604371433148448, iteration: 416220
loss: 1.001219391822815,grad_norm: 0.6938160330197707, iteration: 416221
loss: 1.0248689651489258,grad_norm: 0.7455711452987451, iteration: 416222
loss: 1.048652172088623,grad_norm: 0.9999996981687552, iteration: 416223
loss: 0.9705382585525513,grad_norm: 0.9999990311787162, iteration: 416224
loss: 1.0363245010375977,grad_norm: 0.8208856804078177, iteration: 416225
loss: 1.0506843328475952,grad_norm: 0.9999996429870205, iteration: 416226
loss: 0.9669232368469238,grad_norm: 0.7494537637727773, iteration: 416227
loss: 0.9946945905685425,grad_norm: 0.643364860824983, iteration: 416228
loss: 0.9992274641990662,grad_norm: 0.6957501046797916, iteration: 416229
loss: 1.0001044273376465,grad_norm: 0.7208039648170325, iteration: 416230
loss: 1.0267678499221802,grad_norm: 0.9999990822020236, iteration: 416231
loss: 1.0029813051223755,grad_norm: 0.7811671541142875, iteration: 416232
loss: 1.054935336112976,grad_norm: 0.8990166038648292, iteration: 416233
loss: 1.0048199892044067,grad_norm: 0.8142011713560271, iteration: 416234
loss: 0.9880331158638,grad_norm: 0.7583060696210728, iteration: 416235
loss: 0.9885126948356628,grad_norm: 0.9917103829197224, iteration: 416236
loss: 1.0204706192016602,grad_norm: 0.7160017253281439, iteration: 416237
loss: 1.0041197538375854,grad_norm: 0.8860441996303757, iteration: 416238
loss: 1.0168533325195312,grad_norm: 0.7588502276897716, iteration: 416239
loss: 1.0329080820083618,grad_norm: 0.9999991229146574, iteration: 416240
loss: 1.0329570770263672,grad_norm: 0.9999992536236574, iteration: 416241
loss: 0.9802631735801697,grad_norm: 0.8466212068527051, iteration: 416242
loss: 1.0184613466262817,grad_norm: 0.7765660521146441, iteration: 416243
loss: 1.0093516111373901,grad_norm: 0.7991595187177074, iteration: 416244
loss: 1.024152398109436,grad_norm: 0.8293858727461675, iteration: 416245
loss: 1.0072059631347656,grad_norm: 0.6514814994213743, iteration: 416246
loss: 1.0082021951675415,grad_norm: 0.820193855009943, iteration: 416247
loss: 1.0107141733169556,grad_norm: 0.8060492703870079, iteration: 416248
loss: 0.995133638381958,grad_norm: 0.7485472562866708, iteration: 416249
loss: 0.9960517883300781,grad_norm: 0.7783310177089254, iteration: 416250
loss: 1.0315790176391602,grad_norm: 0.9999998262197721, iteration: 416251
loss: 0.9880363941192627,grad_norm: 0.7629389503286417, iteration: 416252
loss: 1.020349144935608,grad_norm: 0.6776418631473429, iteration: 416253
loss: 0.9908131957054138,grad_norm: 0.9037029899606512, iteration: 416254
loss: 1.0587841272354126,grad_norm: 0.8856367562271033, iteration: 416255
loss: 1.002583622932434,grad_norm: 0.7491427476804041, iteration: 416256
loss: 1.0267544984817505,grad_norm: 0.9999991519820349, iteration: 416257
loss: 1.0082637071609497,grad_norm: 0.8407797888722468, iteration: 416258
loss: 0.9792523980140686,grad_norm: 0.7924287420586628, iteration: 416259
loss: 0.9923166036605835,grad_norm: 0.9074708736273189, iteration: 416260
loss: 1.0115723609924316,grad_norm: 0.6903776030276979, iteration: 416261
loss: 1.0346072912216187,grad_norm: 0.8221673773186269, iteration: 416262
loss: 0.9602684378623962,grad_norm: 0.8766581250557532, iteration: 416263
loss: 0.9956516027450562,grad_norm: 0.7000173674509778, iteration: 416264
loss: 1.037532925605774,grad_norm: 0.7805379113139835, iteration: 416265
loss: 1.0320254564285278,grad_norm: 0.8185289938415719, iteration: 416266
loss: 1.034394383430481,grad_norm: 0.8802565989976624, iteration: 416267
loss: 0.9837949872016907,grad_norm: 0.830831130938459, iteration: 416268
loss: 0.9973053336143494,grad_norm: 0.7545608089606445, iteration: 416269
loss: 0.994349479675293,grad_norm: 0.9999990076611457, iteration: 416270
loss: 0.9828523397445679,grad_norm: 0.7115698465448453, iteration: 416271
loss: 0.9694731831550598,grad_norm: 0.9027960328611526, iteration: 416272
loss: 0.99644935131073,grad_norm: 0.753466108380084, iteration: 416273
loss: 1.028497576713562,grad_norm: 0.8269836445968863, iteration: 416274
loss: 1.0057021379470825,grad_norm: 0.7754405769624334, iteration: 416275
loss: 1.0240832567214966,grad_norm: 0.7918284108478646, iteration: 416276
loss: 0.9930242896080017,grad_norm: 0.7406187628296007, iteration: 416277
loss: 1.0202031135559082,grad_norm: 0.7017580767316965, iteration: 416278
loss: 0.9867265820503235,grad_norm: 0.7207045847788269, iteration: 416279
loss: 1.0367999076843262,grad_norm: 0.9999990697202471, iteration: 416280
loss: 0.980422854423523,grad_norm: 0.6941458661415993, iteration: 416281
loss: 0.9616315960884094,grad_norm: 0.9202852922067774, iteration: 416282
loss: 1.0210974216461182,grad_norm: 0.7857297853584363, iteration: 416283
loss: 1.055532693862915,grad_norm: 0.9999999090989907, iteration: 416284
loss: 1.002821683883667,grad_norm: 0.765592226965207, iteration: 416285
loss: 1.0430909395217896,grad_norm: 0.7763093337687769, iteration: 416286
loss: 1.0157431364059448,grad_norm: 0.7174822717024877, iteration: 416287
loss: 1.0122863054275513,grad_norm: 0.8082046517425222, iteration: 416288
loss: 1.0344911813735962,grad_norm: 0.7255149722823806, iteration: 416289
loss: 0.9805604815483093,grad_norm: 0.8077689451643262, iteration: 416290
loss: 0.9891992807388306,grad_norm: 0.8140481502887655, iteration: 416291
loss: 1.0185879468917847,grad_norm: 0.9233154821971887, iteration: 416292
loss: 1.0116536617279053,grad_norm: 0.8643189987864558, iteration: 416293
loss: 0.999485194683075,grad_norm: 0.8493140462756652, iteration: 416294
loss: 1.0080621242523193,grad_norm: 0.9143863622008344, iteration: 416295
loss: 0.9855452179908752,grad_norm: 0.9999995557244244, iteration: 416296
loss: 0.9572886824607849,grad_norm: 0.6844612349850231, iteration: 416297
loss: 0.99150151014328,grad_norm: 0.7638764254451258, iteration: 416298
loss: 1.0303176641464233,grad_norm: 0.999999040412388, iteration: 416299
loss: 1.0233235359191895,grad_norm: 0.7518209131877006, iteration: 416300
loss: 0.9921725392341614,grad_norm: 0.7854634629830585, iteration: 416301
loss: 1.0056049823760986,grad_norm: 0.9999989017383231, iteration: 416302
loss: 0.9880058765411377,grad_norm: 0.9480081914463777, iteration: 416303
loss: 1.037933111190796,grad_norm: 0.917112862258585, iteration: 416304
loss: 1.0050534009933472,grad_norm: 0.999999888162352, iteration: 416305
loss: 0.9671886563301086,grad_norm: 0.9999999399499896, iteration: 416306
loss: 0.972181499004364,grad_norm: 0.7251799983446084, iteration: 416307
loss: 1.0118873119354248,grad_norm: 0.7307560948642425, iteration: 416308
loss: 0.9913392663002014,grad_norm: 0.7989856492265516, iteration: 416309
loss: 0.9725006818771362,grad_norm: 0.7794232572765818, iteration: 416310
loss: 1.0328353643417358,grad_norm: 0.8947746152025075, iteration: 416311
loss: 0.987247884273529,grad_norm: 0.6839192405549084, iteration: 416312
loss: 1.0191798210144043,grad_norm: 0.6977006814485146, iteration: 416313
loss: 1.0339473485946655,grad_norm: 0.6492946040076757, iteration: 416314
loss: 1.0430104732513428,grad_norm: 0.9999992485699508, iteration: 416315
loss: 1.005700945854187,grad_norm: 0.7286701441594442, iteration: 416316
loss: 0.9987544417381287,grad_norm: 0.8217002652708054, iteration: 416317
loss: 1.0609502792358398,grad_norm: 0.749941750426939, iteration: 416318
loss: 1.017424464225769,grad_norm: 0.7925281826078718, iteration: 416319
loss: 0.9847915172576904,grad_norm: 0.7009011837305762, iteration: 416320
loss: 0.9674727916717529,grad_norm: 0.9999992984399236, iteration: 416321
loss: 0.9788120985031128,grad_norm: 0.7129753160448254, iteration: 416322
loss: 0.9697912335395813,grad_norm: 0.8724038682874289, iteration: 416323
loss: 1.0358214378356934,grad_norm: 0.799463194520122, iteration: 416324
loss: 0.9971000552177429,grad_norm: 0.7157962886547624, iteration: 416325
loss: 0.9723835587501526,grad_norm: 0.7064051969183046, iteration: 416326
loss: 1.0195742845535278,grad_norm: 0.94485229194821, iteration: 416327
loss: 1.0127971172332764,grad_norm: 0.9999992372424992, iteration: 416328
loss: 0.984717607498169,grad_norm: 0.7580245116152753, iteration: 416329
loss: 0.9669580459594727,grad_norm: 0.6916657716256098, iteration: 416330
loss: 0.991865873336792,grad_norm: 0.8333916851689677, iteration: 416331
loss: 1.0075966119766235,grad_norm: 0.9999998791451042, iteration: 416332
loss: 0.9926836490631104,grad_norm: 0.6877922724010241, iteration: 416333
loss: 1.0303046703338623,grad_norm: 0.9357003900383752, iteration: 416334
loss: 0.992378830909729,grad_norm: 0.7837866002561981, iteration: 416335
loss: 0.9919957518577576,grad_norm: 0.9217276081079133, iteration: 416336
loss: 0.9705489873886108,grad_norm: 0.838889958164845, iteration: 416337
loss: 0.9936250448226929,grad_norm: 0.8291005511063438, iteration: 416338
loss: 1.0202893018722534,grad_norm: 0.8658749928165148, iteration: 416339
loss: 0.9989156126976013,grad_norm: 0.8619493619523523, iteration: 416340
loss: 0.9445806741714478,grad_norm: 0.8553497943507812, iteration: 416341
loss: 0.9874136447906494,grad_norm: 0.7224482967908387, iteration: 416342
loss: 0.9572537541389465,grad_norm: 0.9564859458759947, iteration: 416343
loss: 1.0215275287628174,grad_norm: 0.9999990845892739, iteration: 416344
loss: 0.986866295337677,grad_norm: 0.9999995182796474, iteration: 416345
loss: 0.9987924695014954,grad_norm: 0.779086211744186, iteration: 416346
loss: 0.9849698543548584,grad_norm: 0.721168201161975, iteration: 416347
loss: 1.0241261720657349,grad_norm: 0.7403404962732772, iteration: 416348
loss: 1.0031174421310425,grad_norm: 0.6584385374421043, iteration: 416349
loss: 1.0044529438018799,grad_norm: 0.7696268564864835, iteration: 416350
loss: 0.9405170679092407,grad_norm: 0.708061706057232, iteration: 416351
loss: 1.0275678634643555,grad_norm: 0.9999993185806691, iteration: 416352
loss: 0.9906315207481384,grad_norm: 0.6803441508201374, iteration: 416353
loss: 0.9774942398071289,grad_norm: 0.7055822860813448, iteration: 416354
loss: 1.011154294013977,grad_norm: 0.7339948486136113, iteration: 416355
loss: 1.0105680227279663,grad_norm: 0.7037677345789691, iteration: 416356
loss: 1.014664649963379,grad_norm: 0.7942591888075133, iteration: 416357
loss: 1.049855351448059,grad_norm: 0.9999996558860283, iteration: 416358
loss: 0.9961301684379578,grad_norm: 0.6530809191947571, iteration: 416359
loss: 0.9596226811408997,grad_norm: 0.9191136829661585, iteration: 416360
loss: 1.0062577724456787,grad_norm: 0.9446603168557847, iteration: 416361
loss: 1.0508023500442505,grad_norm: 0.9253132745815199, iteration: 416362
loss: 1.0133416652679443,grad_norm: 0.9836126264361753, iteration: 416363
loss: 0.9918316006660461,grad_norm: 0.9999994238822182, iteration: 416364
loss: 1.0588443279266357,grad_norm: 0.9632253974674475, iteration: 416365
loss: 1.023921251296997,grad_norm: 0.810745618343564, iteration: 416366
loss: 1.0020612478256226,grad_norm: 0.9999992419410816, iteration: 416367
loss: 1.0301040410995483,grad_norm: 0.7272352582607989, iteration: 416368
loss: 1.0058609247207642,grad_norm: 0.7383667055203866, iteration: 416369
loss: 1.0454944372177124,grad_norm: 0.9999993479345975, iteration: 416370
loss: 1.0011391639709473,grad_norm: 0.6122868751541488, iteration: 416371
loss: 1.0137944221496582,grad_norm: 0.7739021469346977, iteration: 416372
loss: 0.9786908626556396,grad_norm: 0.8226990161141995, iteration: 416373
loss: 1.0701684951782227,grad_norm: 0.9999997380873519, iteration: 416374
loss: 1.0249605178833008,grad_norm: 0.9999998691987251, iteration: 416375
loss: 0.9758065342903137,grad_norm: 0.6984926728349269, iteration: 416376
loss: 0.961820662021637,grad_norm: 0.8204339284048424, iteration: 416377
loss: 0.9827796816825867,grad_norm: 0.8109260198586176, iteration: 416378
loss: 1.0960325002670288,grad_norm: 0.999999430650714, iteration: 416379
loss: 0.9936712384223938,grad_norm: 0.8119814064575936, iteration: 416380
loss: 1.0263420343399048,grad_norm: 0.8388563411919717, iteration: 416381
loss: 1.017721176147461,grad_norm: 0.7254198972050094, iteration: 416382
loss: 0.9483815431594849,grad_norm: 0.8064107309870381, iteration: 416383
loss: 1.0303220748901367,grad_norm: 0.6855274980041386, iteration: 416384
loss: 0.9926353693008423,grad_norm: 0.9240323143289774, iteration: 416385
loss: 0.9964331388473511,grad_norm: 0.693344072489537, iteration: 416386
loss: 1.0316451787948608,grad_norm: 0.8802482668766858, iteration: 416387
loss: 0.996494472026825,grad_norm: 0.8245203595577137, iteration: 416388
loss: 0.9972832202911377,grad_norm: 0.8934568230971257, iteration: 416389
loss: 0.9977845549583435,grad_norm: 0.7555110520223399, iteration: 416390
loss: 1.020806074142456,grad_norm: 0.9999995122561475, iteration: 416391
loss: 1.0120911598205566,grad_norm: 0.7636038564879364, iteration: 416392
loss: 1.028856873512268,grad_norm: 0.7292366349662974, iteration: 416393
loss: 0.9654896259307861,grad_norm: 0.7746860296957355, iteration: 416394
loss: 1.0075842142105103,grad_norm: 0.918860144827199, iteration: 416395
loss: 0.9934794306755066,grad_norm: 0.9414139331202782, iteration: 416396
loss: 0.9856753349304199,grad_norm: 0.7266611412187655, iteration: 416397
loss: 1.0143615007400513,grad_norm: 0.7021256149731044, iteration: 416398
loss: 0.9616369009017944,grad_norm: 0.8439429930265138, iteration: 416399
loss: 1.003324270248413,grad_norm: 0.9999990842378533, iteration: 416400
loss: 1.0104707479476929,grad_norm: 0.5672505837412622, iteration: 416401
loss: 1.0489329099655151,grad_norm: 0.7338893104298826, iteration: 416402
loss: 1.0304827690124512,grad_norm: 0.6895697454889241, iteration: 416403
loss: 1.0274176597595215,grad_norm: 0.9853634778716441, iteration: 416404
loss: 1.0233525037765503,grad_norm: 0.745944206874069, iteration: 416405
loss: 1.023330569267273,grad_norm: 0.6844825740649688, iteration: 416406
loss: 0.9956338405609131,grad_norm: 0.6777858919179787, iteration: 416407
loss: 1.014715552330017,grad_norm: 0.7223387638651698, iteration: 416408
loss: 1.0199028253555298,grad_norm: 0.8992962037196418, iteration: 416409
loss: 1.0058009624481201,grad_norm: 0.7334748767231942, iteration: 416410
loss: 0.9840680956840515,grad_norm: 0.7533727501637241, iteration: 416411
loss: 1.0290343761444092,grad_norm: 0.67694436049382, iteration: 416412
loss: 1.0006129741668701,grad_norm: 0.6549493342715471, iteration: 416413
loss: 0.9247636198997498,grad_norm: 0.8832192938087989, iteration: 416414
loss: 0.9402121901512146,grad_norm: 0.789118788806577, iteration: 416415
loss: 0.9866585731506348,grad_norm: 0.752243878211784, iteration: 416416
loss: 0.9815863370895386,grad_norm: 0.7478328757601473, iteration: 416417
loss: 0.9953917264938354,grad_norm: 0.7793944374381203, iteration: 416418
loss: 0.9833964109420776,grad_norm: 0.8229223341757941, iteration: 416419
loss: 0.9752532243728638,grad_norm: 0.6464292788481351, iteration: 416420
loss: 1.002297282218933,grad_norm: 0.7740233246740378, iteration: 416421
loss: 1.1108965873718262,grad_norm: 0.9999990731707069, iteration: 416422
loss: 0.9758642315864563,grad_norm: 0.9999991712874461, iteration: 416423
loss: 0.9778597354888916,grad_norm: 0.6931090487381589, iteration: 416424
loss: 0.9811315536499023,grad_norm: 0.6633465641973093, iteration: 416425
loss: 1.0200581550598145,grad_norm: 0.9543870693611292, iteration: 416426
loss: 0.9720747470855713,grad_norm: 0.7328350174956166, iteration: 416427
loss: 1.0433295965194702,grad_norm: 0.9340589251634009, iteration: 416428
loss: 1.0338019132614136,grad_norm: 0.8140468961074155, iteration: 416429
loss: 0.9580674767494202,grad_norm: 0.7555924947730274, iteration: 416430
loss: 0.9865875244140625,grad_norm: 0.742568839825979, iteration: 416431
loss: 0.9617192149162292,grad_norm: 0.8590650035206515, iteration: 416432
loss: 0.9689814448356628,grad_norm: 0.8216652069352942, iteration: 416433
loss: 1.0028883218765259,grad_norm: 0.7321370588971116, iteration: 416434
loss: 1.0297770500183105,grad_norm: 0.9999992900777145, iteration: 416435
loss: 0.9905061721801758,grad_norm: 0.6452832998232447, iteration: 416436
loss: 0.9912937879562378,grad_norm: 0.6398941864613253, iteration: 416437
loss: 1.0092695951461792,grad_norm: 0.7693154176063152, iteration: 416438
loss: 1.0201027393341064,grad_norm: 0.7876099088068743, iteration: 416439
loss: 1.0094411373138428,grad_norm: 0.8898320396329533, iteration: 416440
loss: 0.9810391664505005,grad_norm: 0.7037205547464058, iteration: 416441
loss: 1.0178483724594116,grad_norm: 0.7589804529250762, iteration: 416442
loss: 0.9707064628601074,grad_norm: 0.8153346398763437, iteration: 416443
loss: 1.0292936563491821,grad_norm: 0.6756104833197547, iteration: 416444
loss: 0.995639979839325,grad_norm: 0.7655008152259309, iteration: 416445
loss: 1.0001945495605469,grad_norm: 0.9277756347744954, iteration: 416446
loss: 0.9926205277442932,grad_norm: 0.9091201551498939, iteration: 416447
loss: 1.0252190828323364,grad_norm: 0.6640543842573604, iteration: 416448
loss: 0.9969656467437744,grad_norm: 0.9999993161270319, iteration: 416449
loss: 0.9758109450340271,grad_norm: 0.6728240371914636, iteration: 416450
loss: 0.969573974609375,grad_norm: 0.736453835722219, iteration: 416451
loss: 1.0236271619796753,grad_norm: 0.8107796689327345, iteration: 416452
loss: 0.9651654958724976,grad_norm: 0.8413358050363721, iteration: 416453
loss: 0.975205659866333,grad_norm: 0.6556549324817528, iteration: 416454
loss: 0.9717838764190674,grad_norm: 0.7099528791460938, iteration: 416455
loss: 0.9948732256889343,grad_norm: 0.8703227194765657, iteration: 416456
loss: 0.993241548538208,grad_norm: 0.940944078548275, iteration: 416457
loss: 1.084707260131836,grad_norm: 0.999999944281973, iteration: 416458
loss: 1.0407429933547974,grad_norm: 0.9547792487005611, iteration: 416459
loss: 0.9987045526504517,grad_norm: 0.7361414108516839, iteration: 416460
loss: 1.0206013917922974,grad_norm: 0.8729813928585808, iteration: 416461
loss: 0.9880381226539612,grad_norm: 0.6239235697778459, iteration: 416462
loss: 1.0501362085342407,grad_norm: 0.9999998122305662, iteration: 416463
loss: 0.9481990337371826,grad_norm: 0.7647089805776769, iteration: 416464
loss: 0.9989672303199768,grad_norm: 0.786225785303586, iteration: 416465
loss: 1.0061286687850952,grad_norm: 0.9198390363142199, iteration: 416466
loss: 0.9904947280883789,grad_norm: 0.77613670700121, iteration: 416467
loss: 1.0533946752548218,grad_norm: 0.9999997503618896, iteration: 416468
loss: 1.015036702156067,grad_norm: 0.9999999871234305, iteration: 416469
loss: 1.012896180152893,grad_norm: 0.8086036855954329, iteration: 416470
loss: 0.9587128758430481,grad_norm: 0.7480278579102559, iteration: 416471
loss: 1.0067623853683472,grad_norm: 0.9346274909442471, iteration: 416472
loss: 1.0418707132339478,grad_norm: 0.9999996304071146, iteration: 416473
loss: 0.9689701199531555,grad_norm: 0.7311201563039204, iteration: 416474
loss: 1.0687167644500732,grad_norm: 0.8212096596831564, iteration: 416475
loss: 0.9970027208328247,grad_norm: 0.6885994192781493, iteration: 416476
loss: 1.107533574104309,grad_norm: 0.7316668680851045, iteration: 416477
loss: 1.0103641748428345,grad_norm: 0.7564060145095207, iteration: 416478
loss: 0.9653428196907043,grad_norm: 0.9999992311285836, iteration: 416479
loss: 1.0138838291168213,grad_norm: 0.6732716580932784, iteration: 416480
loss: 0.9900752902030945,grad_norm: 0.8601211941901759, iteration: 416481
loss: 0.996798574924469,grad_norm: 0.8777538724446011, iteration: 416482
loss: 0.984238862991333,grad_norm: 0.8991265921104067, iteration: 416483
loss: 1.012332797050476,grad_norm: 0.8169945940571347, iteration: 416484
loss: 1.0336806774139404,grad_norm: 0.9999992761269707, iteration: 416485
loss: 1.0243890285491943,grad_norm: 0.8184126367752905, iteration: 416486
loss: 1.0508297681808472,grad_norm: 0.9999995126255858, iteration: 416487
loss: 1.0658907890319824,grad_norm: 0.8425124923806191, iteration: 416488
loss: 1.0245057344436646,grad_norm: 0.9372672179459335, iteration: 416489
loss: 1.05495285987854,grad_norm: 0.7805801099312452, iteration: 416490
loss: 0.9958328008651733,grad_norm: 0.999999179674352, iteration: 416491
loss: 1.0156686305999756,grad_norm: 0.7166618955800077, iteration: 416492
loss: 1.003832221031189,grad_norm: 0.7681016304887648, iteration: 416493
loss: 1.0703575611114502,grad_norm: 0.9999995871162832, iteration: 416494
loss: 0.99309903383255,grad_norm: 0.926426621351816, iteration: 416495
loss: 0.9874091148376465,grad_norm: 0.9999991200300717, iteration: 416496
loss: 0.9808382987976074,grad_norm: 0.6878817013531162, iteration: 416497
loss: 0.9996441602706909,grad_norm: 0.7487464763818131, iteration: 416498
loss: 1.0024288892745972,grad_norm: 0.6879923976455954, iteration: 416499
loss: 1.0146679878234863,grad_norm: 0.7042182443241713, iteration: 416500
loss: 1.0293607711791992,grad_norm: 0.7890582354348702, iteration: 416501
loss: 0.9874271750450134,grad_norm: 0.8482821543297311, iteration: 416502
loss: 0.9865349531173706,grad_norm: 0.7858373303207765, iteration: 416503
loss: 1.0480700731277466,grad_norm: 0.7489764097155645, iteration: 416504
loss: 1.0305874347686768,grad_norm: 0.8013057783636638, iteration: 416505
loss: 1.0431495904922485,grad_norm: 0.9999995066132285, iteration: 416506
loss: 0.9622068405151367,grad_norm: 0.8138579303385609, iteration: 416507
loss: 1.0097918510437012,grad_norm: 0.9999991467597307, iteration: 416508
loss: 0.9815235137939453,grad_norm: 0.9400046909743509, iteration: 416509
loss: 1.0618131160736084,grad_norm: 0.8680141392411391, iteration: 416510
loss: 0.9697349667549133,grad_norm: 0.9999994706227168, iteration: 416511
loss: 1.0104995965957642,grad_norm: 0.9999989574509797, iteration: 416512
loss: 1.07480788230896,grad_norm: 0.6932204112205647, iteration: 416513
loss: 0.9709099531173706,grad_norm: 0.7086700380904079, iteration: 416514
loss: 1.0299689769744873,grad_norm: 0.7679972307333985, iteration: 416515
loss: 0.9815379977226257,grad_norm: 0.7961184480211697, iteration: 416516
loss: 0.9779637455940247,grad_norm: 0.7229424670416215, iteration: 416517
loss: 1.0171465873718262,grad_norm: 0.6694458103085054, iteration: 416518
loss: 1.0300251245498657,grad_norm: 0.8455675297447438, iteration: 416519
loss: 0.9998616576194763,grad_norm: 0.7851556373159783, iteration: 416520
loss: 1.0007009506225586,grad_norm: 0.849971096975482, iteration: 416521
loss: 0.9749857783317566,grad_norm: 0.9999991774977558, iteration: 416522
loss: 0.9922583699226379,grad_norm: 0.6989031326314491, iteration: 416523
loss: 1.0134046077728271,grad_norm: 0.7867126283743907, iteration: 416524
loss: 1.0004876852035522,grad_norm: 0.9292078232062748, iteration: 416525
loss: 1.0129873752593994,grad_norm: 0.6611132037598387, iteration: 416526
loss: 1.0216186046600342,grad_norm: 0.7620941931744655, iteration: 416527
loss: 0.988966703414917,grad_norm: 0.6761031899160181, iteration: 416528
loss: 0.9894372224807739,grad_norm: 0.718564813981974, iteration: 416529
loss: 1.0473204851150513,grad_norm: 0.9912971689724827, iteration: 416530
loss: 0.9766488671302795,grad_norm: 0.7195002387817825, iteration: 416531
loss: 1.0120536088943481,grad_norm: 0.8279120447613005, iteration: 416532
loss: 1.0016714334487915,grad_norm: 0.7641150544918898, iteration: 416533
loss: 0.9903453588485718,grad_norm: 0.7805546157765669, iteration: 416534
loss: 0.9915663599967957,grad_norm: 0.6803676879943426, iteration: 416535
loss: 1.0217562913894653,grad_norm: 0.7671966598456321, iteration: 416536
loss: 1.0097521543502808,grad_norm: 0.9999991079448335, iteration: 416537
loss: 1.0452373027801514,grad_norm: 0.9999996456574279, iteration: 416538
loss: 0.9874234795570374,grad_norm: 0.8025170450498136, iteration: 416539
loss: 1.0080012083053589,grad_norm: 0.9999991163513442, iteration: 416540
loss: 0.9926255941390991,grad_norm: 0.8194495087098059, iteration: 416541
loss: 1.035127878189087,grad_norm: 0.9999992146247592, iteration: 416542
loss: 1.05832040309906,grad_norm: 0.9999996838796724, iteration: 416543
loss: 0.9661878943443298,grad_norm: 0.7016872900276492, iteration: 416544
loss: 0.9926185011863708,grad_norm: 0.8603537160269111, iteration: 416545
loss: 1.019065260887146,grad_norm: 0.8798014495182785, iteration: 416546
loss: 0.9995076656341553,grad_norm: 0.933889194114082, iteration: 416547
loss: 0.972316324710846,grad_norm: 0.940212695050996, iteration: 416548
loss: 1.0194602012634277,grad_norm: 0.973713548360446, iteration: 416549
loss: 1.002467393875122,grad_norm: 0.9999991743437433, iteration: 416550
loss: 0.9822790622711182,grad_norm: 0.8985802155990826, iteration: 416551
loss: 1.0534210205078125,grad_norm: 0.9999992234230946, iteration: 416552
loss: 1.0366548299789429,grad_norm: 0.6781771431517889, iteration: 416553
loss: 0.9945412874221802,grad_norm: 0.7631170975299325, iteration: 416554
loss: 0.9963635206222534,grad_norm: 0.7385077371882188, iteration: 416555
loss: 0.9649953246116638,grad_norm: 0.8585897229271942, iteration: 416556
loss: 1.0825961828231812,grad_norm: 0.773184776851992, iteration: 416557
loss: 0.9826720952987671,grad_norm: 0.8268491558454034, iteration: 416558
loss: 1.020175814628601,grad_norm: 0.8297049649217021, iteration: 416559
loss: 1.0215437412261963,grad_norm: 0.8466795345322945, iteration: 416560
loss: 0.992277204990387,grad_norm: 0.8405116655324988, iteration: 416561
loss: 1.0321778059005737,grad_norm: 0.9999998198671197, iteration: 416562
loss: 0.9669983386993408,grad_norm: 0.8972560053286236, iteration: 416563
loss: 1.0194395780563354,grad_norm: 0.995415048788587, iteration: 416564
loss: 1.0201317071914673,grad_norm: 0.774957537643939, iteration: 416565
loss: 1.0140804052352905,grad_norm: 0.6740129676156268, iteration: 416566
loss: 0.9692100882530212,grad_norm: 0.6839733752224908, iteration: 416567
loss: 1.0059605836868286,grad_norm: 0.7321538775130882, iteration: 416568
loss: 0.9628067016601562,grad_norm: 0.8923553826857227, iteration: 416569
loss: 1.0089118480682373,grad_norm: 0.875329037273749, iteration: 416570
loss: 1.0130064487457275,grad_norm: 0.6292537632309967, iteration: 416571
loss: 1.0109046697616577,grad_norm: 0.8581474498593042, iteration: 416572
loss: 0.9870617985725403,grad_norm: 0.8412629344227297, iteration: 416573
loss: 0.9731652736663818,grad_norm: 0.6440193012554541, iteration: 416574
loss: 0.9868009686470032,grad_norm: 0.8294238282901292, iteration: 416575
loss: 1.0742322206497192,grad_norm: 0.9999999239363003, iteration: 416576
loss: 1.0010310411453247,grad_norm: 0.9999990540961283, iteration: 416577
loss: 1.023199200630188,grad_norm: 0.731046615667328, iteration: 416578
loss: 1.0092766284942627,grad_norm: 0.9999992781477035, iteration: 416579
loss: 1.005522608757019,grad_norm: 0.9999991366115003, iteration: 416580
loss: 1.0169777870178223,grad_norm: 0.832448191630311, iteration: 416581
loss: 1.028083324432373,grad_norm: 0.9597205891100913, iteration: 416582
loss: 1.0236248970031738,grad_norm: 0.8559301396370509, iteration: 416583
loss: 1.0144637823104858,grad_norm: 0.7501760030537197, iteration: 416584
loss: 1.047597050666809,grad_norm: 0.9999993291583221, iteration: 416585
loss: 1.0257331132888794,grad_norm: 0.999999620112122, iteration: 416586
loss: 0.9878814220428467,grad_norm: 0.840073552530307, iteration: 416587
loss: 1.0096064805984497,grad_norm: 0.8455502773595891, iteration: 416588
loss: 1.0097019672393799,grad_norm: 0.7811149885162993, iteration: 416589
loss: 0.9970153570175171,grad_norm: 0.9385627881180346, iteration: 416590
loss: 0.9851428866386414,grad_norm: 0.8198650131808435, iteration: 416591
loss: 0.9763282537460327,grad_norm: 0.7327577716051764, iteration: 416592
loss: 0.9684435725212097,grad_norm: 0.7187925039617433, iteration: 416593
loss: 0.9597592949867249,grad_norm: 0.8413588581985746, iteration: 416594
loss: 1.0565687417984009,grad_norm: 0.9512195495549437, iteration: 416595
loss: 1.0422930717468262,grad_norm: 0.9545723553018799, iteration: 416596
loss: 0.9935793280601501,grad_norm: 0.8427879490729205, iteration: 416597
loss: 0.9926673173904419,grad_norm: 0.7871034123401737, iteration: 416598
loss: 1.017238974571228,grad_norm: 0.6659363944880236, iteration: 416599
loss: 0.9878652095794678,grad_norm: 0.6461060897815442, iteration: 416600
loss: 1.0135183334350586,grad_norm: 0.7386994468559105, iteration: 416601
loss: 1.0001587867736816,grad_norm: 0.6856126532803236, iteration: 416602
loss: 0.98406982421875,grad_norm: 0.7100316578928614, iteration: 416603
loss: 0.9764796495437622,grad_norm: 0.6809650377124455, iteration: 416604
loss: 0.9950939416885376,grad_norm: 0.9073601347497557, iteration: 416605
loss: 1.0056778192520142,grad_norm: 0.9999992724778212, iteration: 416606
loss: 1.042341947555542,grad_norm: 0.9999993248369278, iteration: 416607
loss: 1.0072945356369019,grad_norm: 0.9999991952871236, iteration: 416608
loss: 0.9779718518257141,grad_norm: 0.8081623062591311, iteration: 416609
loss: 1.0255038738250732,grad_norm: 0.8549710039709362, iteration: 416610
loss: 0.973684549331665,grad_norm: 0.7194763583874287, iteration: 416611
loss: 0.9760444164276123,grad_norm: 0.6820379544473577, iteration: 416612
loss: 0.9874979257583618,grad_norm: 0.7806255661438922, iteration: 416613
loss: 1.015166163444519,grad_norm: 0.7640224690919266, iteration: 416614
loss: 0.9883242845535278,grad_norm: 0.7773761004012929, iteration: 416615
loss: 0.9648041725158691,grad_norm: 0.9999993192303278, iteration: 416616
loss: 1.043484091758728,grad_norm: 0.7299791333702493, iteration: 416617
loss: 1.013059139251709,grad_norm: 0.9999991279227198, iteration: 416618
loss: 1.0052498579025269,grad_norm: 0.886411117378132, iteration: 416619
loss: 1.008166790008545,grad_norm: 0.9231080226735995, iteration: 416620
loss: 1.029457688331604,grad_norm: 0.8320892470796895, iteration: 416621
loss: 1.1730849742889404,grad_norm: 0.9999997465397791, iteration: 416622
loss: 1.013103723526001,grad_norm: 0.9999996300683186, iteration: 416623
loss: 1.051305890083313,grad_norm: 0.7289751019733729, iteration: 416624
loss: 0.9838520288467407,grad_norm: 0.7669412475333311, iteration: 416625
loss: 0.9982911944389343,grad_norm: 0.8934932773884668, iteration: 416626
loss: 0.9885216355323792,grad_norm: 0.6902962951248071, iteration: 416627
loss: 0.960540235042572,grad_norm: 0.5920102316012255, iteration: 416628
loss: 1.0203605890274048,grad_norm: 0.8273008881165999, iteration: 416629
loss: 1.0108171701431274,grad_norm: 0.8311913020759417, iteration: 416630
loss: 1.001914381980896,grad_norm: 0.6889552065374833, iteration: 416631
loss: 0.961499810218811,grad_norm: 0.8001735315397142, iteration: 416632
loss: 0.9831968545913696,grad_norm: 0.8437733724386668, iteration: 416633
loss: 1.0231081247329712,grad_norm: 0.9176337956597197, iteration: 416634
loss: 1.0256712436676025,grad_norm: 0.9999995754891049, iteration: 416635
loss: 0.9853367209434509,grad_norm: 0.8701386557550029, iteration: 416636
loss: 0.9649842381477356,grad_norm: 0.7829227836281275, iteration: 416637
loss: 0.9876716136932373,grad_norm: 0.7150425744264566, iteration: 416638
loss: 0.9776419997215271,grad_norm: 0.8255539481365267, iteration: 416639
loss: 1.0003998279571533,grad_norm: 0.7581851374914231, iteration: 416640
loss: 1.0331742763519287,grad_norm: 0.9999996593573725, iteration: 416641
loss: 1.0408577919006348,grad_norm: 0.6924043263462544, iteration: 416642
loss: 0.963080644607544,grad_norm: 0.7993229020095958, iteration: 416643
loss: 0.9644451141357422,grad_norm: 0.7402099078621295, iteration: 416644
loss: 1.0092148780822754,grad_norm: 0.7419262441886596, iteration: 416645
loss: 0.995273232460022,grad_norm: 0.77732650107227, iteration: 416646
loss: 0.9731215238571167,grad_norm: 0.955997563194599, iteration: 416647
loss: 1.0703986883163452,grad_norm: 0.9999999359300868, iteration: 416648
loss: 1.0028642416000366,grad_norm: 0.9175755072369108, iteration: 416649
loss: 1.009477138519287,grad_norm: 0.8835327793623409, iteration: 416650
loss: 1.0092943906784058,grad_norm: 0.8398770820978648, iteration: 416651
loss: 0.9954193830490112,grad_norm: 0.6837937913314567, iteration: 416652
loss: 1.001956820487976,grad_norm: 0.64963577038041, iteration: 416653
loss: 1.003371238708496,grad_norm: 0.7170967008196278, iteration: 416654
loss: 0.9553281664848328,grad_norm: 0.7361182573874387, iteration: 416655
loss: 0.9756170511245728,grad_norm: 0.8445323141792255, iteration: 416656
loss: 0.991409957408905,grad_norm: 0.8708442227330879, iteration: 416657
loss: 1.008474588394165,grad_norm: 0.7413791747432324, iteration: 416658
loss: 1.0074050426483154,grad_norm: 0.7443756972917683, iteration: 416659
loss: 1.027852177619934,grad_norm: 0.9999990812663546, iteration: 416660
loss: 1.0078881978988647,grad_norm: 0.7230159114176984, iteration: 416661
loss: 0.9670917391777039,grad_norm: 0.8252470337501393, iteration: 416662
loss: 1.0155552625656128,grad_norm: 0.774702056214138, iteration: 416663
loss: 0.9822335839271545,grad_norm: 0.6706155354494051, iteration: 416664
loss: 1.0331175327301025,grad_norm: 0.9999991921728257, iteration: 416665
loss: 1.0036554336547852,grad_norm: 0.9751453345011725, iteration: 416666
loss: 1.0347989797592163,grad_norm: 0.6754644683692332, iteration: 416667
loss: 1.0117347240447998,grad_norm: 0.8782117710142898, iteration: 416668
loss: 0.9504376649856567,grad_norm: 0.7866679836395234, iteration: 416669
loss: 0.9841085076332092,grad_norm: 0.740661575016578, iteration: 416670
loss: 1.019750714302063,grad_norm: 0.8613323878188106, iteration: 416671
loss: 1.024215817451477,grad_norm: 0.8588509013135902, iteration: 416672
loss: 1.0052225589752197,grad_norm: 0.9171581874293872, iteration: 416673
loss: 0.9980161786079407,grad_norm: 0.9999990462737554, iteration: 416674
loss: 1.014431357383728,grad_norm: 0.8831080484839441, iteration: 416675
loss: 0.9807783961296082,grad_norm: 0.7130138268829997, iteration: 416676
loss: 0.9657160043716431,grad_norm: 0.8623051793340133, iteration: 416677
loss: 0.9810065627098083,grad_norm: 0.7589255743655444, iteration: 416678
loss: 1.0069061517715454,grad_norm: 0.9999994572717257, iteration: 416679
loss: 1.0161911249160767,grad_norm: 0.6935756612612228, iteration: 416680
loss: 1.0399221181869507,grad_norm: 0.8140352985904533, iteration: 416681
loss: 0.9876811504364014,grad_norm: 0.8550324535078363, iteration: 416682
loss: 0.9994890093803406,grad_norm: 0.6341636621285374, iteration: 416683
loss: 1.0181593894958496,grad_norm: 0.9990346544266262, iteration: 416684
loss: 0.9746293425559998,grad_norm: 0.5941865271388783, iteration: 416685
loss: 1.005186676979065,grad_norm: 0.7452411475815366, iteration: 416686
loss: 1.0360773801803589,grad_norm: 0.7955400878337348, iteration: 416687
loss: 0.9818658828735352,grad_norm: 0.7031883336102589, iteration: 416688
loss: 0.9683636426925659,grad_norm: 0.6775506400937695, iteration: 416689
loss: 1.0271519422531128,grad_norm: 0.7474832540099912, iteration: 416690
loss: 1.03634774684906,grad_norm: 0.7901404729384531, iteration: 416691
loss: 0.9901968240737915,grad_norm: 0.7253395800805686, iteration: 416692
loss: 1.0106098651885986,grad_norm: 0.7808771164100454, iteration: 416693
loss: 0.9721130728721619,grad_norm: 0.7001260548977646, iteration: 416694
loss: 1.0296858549118042,grad_norm: 0.8273086525254975, iteration: 416695
loss: 1.0478841066360474,grad_norm: 0.9558475734734726, iteration: 416696
loss: 1.048445224761963,grad_norm: 0.811216169019898, iteration: 416697
loss: 1.0187501907348633,grad_norm: 0.7365221721102183, iteration: 416698
loss: 1.0501078367233276,grad_norm: 0.999999601408761, iteration: 416699
loss: 1.0096698999404907,grad_norm: 0.754835002721397, iteration: 416700
loss: 0.9897753000259399,grad_norm: 0.752196136925028, iteration: 416701
loss: 1.011031985282898,grad_norm: 0.8127736928062901, iteration: 416702
loss: 1.0032992362976074,grad_norm: 0.6681618591432749, iteration: 416703
loss: 0.9691348075866699,grad_norm: 0.7428147718881375, iteration: 416704
loss: 0.9950209856033325,grad_norm: 0.8477134163631639, iteration: 416705
loss: 1.104040265083313,grad_norm: 0.9999996580070473, iteration: 416706
loss: 1.047817349433899,grad_norm: 0.8264000038198732, iteration: 416707
loss: 1.000482201576233,grad_norm: 0.9061889416396813, iteration: 416708
loss: 0.9378455877304077,grad_norm: 0.999999316936416, iteration: 416709
loss: 1.0246233940124512,grad_norm: 0.7239989568148067, iteration: 416710
loss: 0.9842845797538757,grad_norm: 0.8186540786449782, iteration: 416711
loss: 1.0072568655014038,grad_norm: 0.7821300519240748, iteration: 416712
loss: 1.004066824913025,grad_norm: 0.7582523051966228, iteration: 416713
loss: 0.9991605877876282,grad_norm: 0.8889739848270406, iteration: 416714
loss: 0.9804818630218506,grad_norm: 0.7407395217620545, iteration: 416715
loss: 1.0007684230804443,grad_norm: 0.680600052477635, iteration: 416716
loss: 1.0588676929473877,grad_norm: 0.7756956789567094, iteration: 416717
loss: 1.0169620513916016,grad_norm: 0.679898466856469, iteration: 416718
loss: 1.022447109222412,grad_norm: 0.7664341072732603, iteration: 416719
loss: 1.001333475112915,grad_norm: 0.7073557615499365, iteration: 416720
loss: 0.9454967379570007,grad_norm: 0.9011090787972268, iteration: 416721
loss: 1.0050817728042603,grad_norm: 0.7348586930317362, iteration: 416722
loss: 1.0545059442520142,grad_norm: 0.8543907236946038, iteration: 416723
loss: 0.9830358624458313,grad_norm: 0.7517316212200487, iteration: 416724
loss: 0.9985957145690918,grad_norm: 0.6707656544533235, iteration: 416725
loss: 1.0068228244781494,grad_norm: 0.7086040798142443, iteration: 416726
loss: 1.0008481740951538,grad_norm: 0.8525804094806598, iteration: 416727
loss: 0.9868431091308594,grad_norm: 0.7480719579562625, iteration: 416728
loss: 1.0088101625442505,grad_norm: 0.7310276115500647, iteration: 416729
loss: 1.0119551420211792,grad_norm: 0.8453369110355305, iteration: 416730
loss: 1.0201892852783203,grad_norm: 0.8542561422600954, iteration: 416731
loss: 1.0060014724731445,grad_norm: 0.8562978166265189, iteration: 416732
loss: 1.110175371170044,grad_norm: 0.999999963037945, iteration: 416733
loss: 0.9604960680007935,grad_norm: 0.9331347953197041, iteration: 416734
loss: 0.9777070879936218,grad_norm: 0.7459534746077013, iteration: 416735
loss: 0.9783670902252197,grad_norm: 0.7351405092474164, iteration: 416736
loss: 1.011962890625,grad_norm: 0.714013168252344, iteration: 416737
loss: 1.01986825466156,grad_norm: 0.9999995655944697, iteration: 416738
loss: 1.0110421180725098,grad_norm: 0.7322513695355599, iteration: 416739
loss: 0.9883442521095276,grad_norm: 0.5836285101141014, iteration: 416740
loss: 1.0137066841125488,grad_norm: 0.7398932314056814, iteration: 416741
loss: 1.0034148693084717,grad_norm: 0.7750043654949217, iteration: 416742
loss: 0.9570461511611938,grad_norm: 0.9133947930437711, iteration: 416743
loss: 0.9930146336555481,grad_norm: 0.7876057494801156, iteration: 416744
loss: 0.9523317813873291,grad_norm: 0.6708595531491232, iteration: 416745
loss: 0.9616786241531372,grad_norm: 0.9999996770783807, iteration: 416746
loss: 0.997330904006958,grad_norm: 0.7648537938298963, iteration: 416747
loss: 1.0288519859313965,grad_norm: 0.7500193673969727, iteration: 416748
loss: 0.9894680976867676,grad_norm: 0.7783753811558636, iteration: 416749
loss: 1.0093023777008057,grad_norm: 0.6736702083996343, iteration: 416750
loss: 1.0048197507858276,grad_norm: 0.8230787424906261, iteration: 416751
loss: 1.0029308795928955,grad_norm: 0.9066918817760173, iteration: 416752
loss: 1.0128213167190552,grad_norm: 0.6771369680061613, iteration: 416753
loss: 1.0030300617218018,grad_norm: 0.7572381957068602, iteration: 416754
loss: 1.0260910987854004,grad_norm: 1.0000000378476777, iteration: 416755
loss: 1.0230262279510498,grad_norm: 0.7249822792733475, iteration: 416756
loss: 1.012699842453003,grad_norm: 0.9278935704939953, iteration: 416757
loss: 0.9439800977706909,grad_norm: 0.8354292033279265, iteration: 416758
loss: 0.9882680177688599,grad_norm: 0.6350607964127418, iteration: 416759
loss: 0.9907355904579163,grad_norm: 0.7105520499030835, iteration: 416760
loss: 1.0248618125915527,grad_norm: 0.885917773525094, iteration: 416761
loss: 1.0622271299362183,grad_norm: 0.8756937197173692, iteration: 416762
loss: 1.053415060043335,grad_norm: 0.7920684568986388, iteration: 416763
loss: 1.0021240711212158,grad_norm: 0.8909936012397358, iteration: 416764
loss: 1.2154650688171387,grad_norm: 0.9999997704413638, iteration: 416765
loss: 0.9941815733909607,grad_norm: 0.7774710657161302, iteration: 416766
loss: 1.0381426811218262,grad_norm: 0.6822886270635212, iteration: 416767
loss: 1.0110043287277222,grad_norm: 0.8274465902508638, iteration: 416768
loss: 1.0672718286514282,grad_norm: 0.7101912982096733, iteration: 416769
loss: 0.9813688397407532,grad_norm: 0.8591258757750936, iteration: 416770
loss: 0.9959551095962524,grad_norm: 0.8566277196108942, iteration: 416771
loss: 0.9804325699806213,grad_norm: 0.9810072619008563, iteration: 416772
loss: 1.0135600566864014,grad_norm: 0.8237256745163456, iteration: 416773
loss: 1.0393036603927612,grad_norm: 0.9999998538713845, iteration: 416774
loss: 1.0328670740127563,grad_norm: 0.8464955906275755, iteration: 416775
loss: 0.9705137014389038,grad_norm: 0.7646314241591988, iteration: 416776
loss: 1.0236002206802368,grad_norm: 0.6451636607993361, iteration: 416777
loss: 1.0295478105545044,grad_norm: 0.9402977843046951, iteration: 416778
loss: 1.0434118509292603,grad_norm: 0.8039118786181579, iteration: 416779
loss: 1.0170211791992188,grad_norm: 0.9400811579504611, iteration: 416780
loss: 0.9885532855987549,grad_norm: 0.8358543656738904, iteration: 416781
loss: 0.9833155870437622,grad_norm: 0.7155609035152108, iteration: 416782
loss: 0.9579275250434875,grad_norm: 0.8132947781497577, iteration: 416783
loss: 1.062831997871399,grad_norm: 0.8249156759998355, iteration: 416784
loss: 1.009324073791504,grad_norm: 0.662950934095041, iteration: 416785
loss: 0.983917772769928,grad_norm: 0.8256639584749883, iteration: 416786
loss: 1.0421879291534424,grad_norm: 0.7686563749095645, iteration: 416787
loss: 0.9951030015945435,grad_norm: 0.7633554298037681, iteration: 416788
loss: 0.9769207835197449,grad_norm: 0.6358819822055277, iteration: 416789
loss: 1.0101327896118164,grad_norm: 0.7586178174951365, iteration: 416790
loss: 1.0151517391204834,grad_norm: 0.9314377355613661, iteration: 416791
loss: 0.9904363751411438,grad_norm: 0.7438811638248752, iteration: 416792
loss: 0.9758540987968445,grad_norm: 0.6311445064468983, iteration: 416793
loss: 0.9884983897209167,grad_norm: 0.6971451346672775, iteration: 416794
loss: 0.9482169151306152,grad_norm: 0.8534919393656931, iteration: 416795
loss: 0.9912197589874268,grad_norm: 0.7738538767405047, iteration: 416796
loss: 0.9931201934814453,grad_norm: 0.7526787306750845, iteration: 416797
loss: 1.0008130073547363,grad_norm: 0.7792415513363868, iteration: 416798
loss: 0.9603567123413086,grad_norm: 0.870020419832352, iteration: 416799
loss: 0.9630856513977051,grad_norm: 0.8127695559639291, iteration: 416800
loss: 0.9961956739425659,grad_norm: 0.848723018966134, iteration: 416801
loss: 0.986602783203125,grad_norm: 0.7019367403189303, iteration: 416802
loss: 0.9952107667922974,grad_norm: 0.648323967277868, iteration: 416803
loss: 1.0124123096466064,grad_norm: 0.773830198376734, iteration: 416804
loss: 1.0348395109176636,grad_norm: 0.9123029482614168, iteration: 416805
loss: 1.0077695846557617,grad_norm: 0.8315802446914721, iteration: 416806
loss: 0.97050541639328,grad_norm: 0.7639219949763157, iteration: 416807
loss: 1.0570601224899292,grad_norm: 0.9999993055662068, iteration: 416808
loss: 0.9701196551322937,grad_norm: 0.8684822608379362, iteration: 416809
loss: 0.9871444702148438,grad_norm: 0.8887435362051617, iteration: 416810
loss: 1.022437572479248,grad_norm: 0.9390172230389503, iteration: 416811
loss: 1.0085477828979492,grad_norm: 0.8565221174889159, iteration: 416812
loss: 1.0117307901382446,grad_norm: 0.7739400683371801, iteration: 416813
loss: 0.9976125359535217,grad_norm: 0.8090241566703207, iteration: 416814
loss: 1.0427647829055786,grad_norm: 0.8298992660164352, iteration: 416815
loss: 1.0158768892288208,grad_norm: 0.838270096567176, iteration: 416816
loss: 1.0320637226104736,grad_norm: 0.9349812561496832, iteration: 416817
loss: 0.9911874532699585,grad_norm: 0.7459980101359674, iteration: 416818
loss: 1.0307739973068237,grad_norm: 0.9999990947391375, iteration: 416819
loss: 1.009033441543579,grad_norm: 0.8023795781731121, iteration: 416820
loss: 0.9585602283477783,grad_norm: 0.8093195394301047, iteration: 416821
loss: 0.982379674911499,grad_norm: 0.9999991221963166, iteration: 416822
loss: 1.0002743005752563,grad_norm: 0.6926590250191655, iteration: 416823
loss: 0.9830123782157898,grad_norm: 0.7901832771042959, iteration: 416824
loss: 1.0739214420318604,grad_norm: 0.8944024377832968, iteration: 416825
loss: 1.0389065742492676,grad_norm: 0.9099228507678218, iteration: 416826
loss: 1.0124021768569946,grad_norm: 0.8073652984756311, iteration: 416827
loss: 1.0140239000320435,grad_norm: 0.7814132534772704, iteration: 416828
loss: 0.9493734240531921,grad_norm: 0.6856811362161928, iteration: 416829
loss: 0.995513379573822,grad_norm: 0.8642833069404655, iteration: 416830
loss: 0.9594655632972717,grad_norm: 0.707718763217509, iteration: 416831
loss: 1.0178303718566895,grad_norm: 0.8171805852029166, iteration: 416832
loss: 0.9790263175964355,grad_norm: 0.7327298877357495, iteration: 416833
loss: 0.9848172664642334,grad_norm: 0.8480252123997367, iteration: 416834
loss: 1.0207700729370117,grad_norm: 0.8298501293303531, iteration: 416835
loss: 1.0120043754577637,grad_norm: 0.7650996662029868, iteration: 416836
loss: 1.054184913635254,grad_norm: 0.9153833134118557, iteration: 416837
loss: 0.9955253005027771,grad_norm: 0.6814038655742749, iteration: 416838
loss: 0.9555439352989197,grad_norm: 0.9677536443674619, iteration: 416839
loss: 0.9648198485374451,grad_norm: 0.8621332475703247, iteration: 416840
loss: 0.968081533908844,grad_norm: 0.8381518201186705, iteration: 416841
loss: 1.0341380834579468,grad_norm: 1.0000001465738002, iteration: 416842
loss: 0.9697510004043579,grad_norm: 0.7907427244333917, iteration: 416843
loss: 0.9903856515884399,grad_norm: 0.8784115813561679, iteration: 416844
loss: 1.02463698387146,grad_norm: 0.8836493708030007, iteration: 416845
loss: 1.0010871887207031,grad_norm: 0.6959914729784125, iteration: 416846
loss: 1.0147204399108887,grad_norm: 0.7861791401969367, iteration: 416847
loss: 0.9933314919471741,grad_norm: 0.7547343931198128, iteration: 416848
loss: 1.002367615699768,grad_norm: 0.9183260776036478, iteration: 416849
loss: 1.0075222253799438,grad_norm: 0.751489294079321, iteration: 416850
loss: 1.0407713651657104,grad_norm: 0.838107129046201, iteration: 416851
loss: 1.0429279804229736,grad_norm: 0.7844118833600715, iteration: 416852
loss: 0.9980174899101257,grad_norm: 0.7412418711330416, iteration: 416853
loss: 0.9896833896636963,grad_norm: 0.8294348061870471, iteration: 416854
loss: 0.9729376435279846,grad_norm: 0.7806040300818035, iteration: 416855
loss: 1.017925500869751,grad_norm: 0.7883957324315205, iteration: 416856
loss: 0.9899770617485046,grad_norm: 0.7819751970114337, iteration: 416857
loss: 0.991791307926178,grad_norm: 0.8670294786014737, iteration: 416858
loss: 0.997383713722229,grad_norm: 0.9021976758307555, iteration: 416859
loss: 0.9946590662002563,grad_norm: 0.8091149520182637, iteration: 416860
loss: 0.9915294647216797,grad_norm: 0.8292378544006273, iteration: 416861
loss: 1.0122194290161133,grad_norm: 0.889928333436882, iteration: 416862
loss: 1.0232505798339844,grad_norm: 0.7479538930228355, iteration: 416863
loss: 0.9753465056419373,grad_norm: 0.9999992340514039, iteration: 416864
loss: 0.9729973077774048,grad_norm: 0.6638131505291393, iteration: 416865
loss: 0.9631276726722717,grad_norm: 0.7823446413226811, iteration: 416866
loss: 0.998229444026947,grad_norm: 0.8331293881895058, iteration: 416867
loss: 0.9841728210449219,grad_norm: 0.7607936489309837, iteration: 416868
loss: 1.0042964220046997,grad_norm: 0.8811158143440031, iteration: 416869
loss: 1.0109235048294067,grad_norm: 0.8102078686730124, iteration: 416870
loss: 1.06084406375885,grad_norm: 0.7574424067144505, iteration: 416871
loss: 1.0266549587249756,grad_norm: 0.6828477783145191, iteration: 416872
loss: 0.9940761923789978,grad_norm: 0.9710802961894945, iteration: 416873
loss: 0.9724292159080505,grad_norm: 0.6638156891985829, iteration: 416874
loss: 1.0049989223480225,grad_norm: 0.6861870664931353, iteration: 416875
loss: 0.9949710965156555,grad_norm: 0.693141760501982, iteration: 416876
loss: 0.9968404769897461,grad_norm: 0.8705948723599483, iteration: 416877
loss: 1.0857479572296143,grad_norm: 0.8566012741092727, iteration: 416878
loss: 0.9956885576248169,grad_norm: 0.8287756895600762, iteration: 416879
loss: 0.9899386167526245,grad_norm: 0.6495426896384822, iteration: 416880
loss: 1.0146137475967407,grad_norm: 0.7885387826299701, iteration: 416881
loss: 0.9977536797523499,grad_norm: 0.6987243552520144, iteration: 416882
loss: 1.0255573987960815,grad_norm: 0.9328788804063332, iteration: 416883
loss: 1.010121464729309,grad_norm: 0.9999990152781307, iteration: 416884
loss: 1.022279977798462,grad_norm: 0.9507478617980271, iteration: 416885
loss: 1.0046290159225464,grad_norm: 0.8158447753144411, iteration: 416886
loss: 1.067218542098999,grad_norm: 0.9999998058093499, iteration: 416887
loss: 0.9953335523605347,grad_norm: 0.7569245692386148, iteration: 416888
loss: 1.006493091583252,grad_norm: 0.7455840671110873, iteration: 416889
loss: 1.0156357288360596,grad_norm: 0.7673774630290833, iteration: 416890
loss: 0.9795852899551392,grad_norm: 0.8264578375189336, iteration: 416891
loss: 1.0243912935256958,grad_norm: 0.7317293093122379, iteration: 416892
loss: 0.9763250350952148,grad_norm: 0.775824005123904, iteration: 416893
loss: 0.9966744780540466,grad_norm: 0.7575894090978365, iteration: 416894
loss: 1.0145540237426758,grad_norm: 0.76208175354703, iteration: 416895
loss: 0.9870304465293884,grad_norm: 0.7071601463200037, iteration: 416896
loss: 1.0099916458129883,grad_norm: 0.7997215355500521, iteration: 416897
loss: 1.0268661975860596,grad_norm: 0.8166486691507211, iteration: 416898
loss: 1.0190627574920654,grad_norm: 0.8454646025541952, iteration: 416899
loss: 0.9768523573875427,grad_norm: 0.7094071218836436, iteration: 416900
loss: 0.9935433864593506,grad_norm: 0.7503354793990563, iteration: 416901
loss: 0.9868387579917908,grad_norm: 0.7861472088643916, iteration: 416902
loss: 1.0001983642578125,grad_norm: 0.7934764680926801, iteration: 416903
loss: 0.984420120716095,grad_norm: 0.6738504432002977, iteration: 416904
loss: 0.998059093952179,grad_norm: 0.8275675035535041, iteration: 416905
loss: 0.9989855885505676,grad_norm: 0.7052243116224315, iteration: 416906
loss: 1.058672547340393,grad_norm: 0.8048423281293173, iteration: 416907
loss: 1.0210684537887573,grad_norm: 0.9727099164158318, iteration: 416908
loss: 0.9960918426513672,grad_norm: 0.7072966942983564, iteration: 416909
loss: 0.9902540445327759,grad_norm: 0.7309189749674342, iteration: 416910
loss: 1.0822100639343262,grad_norm: 0.9999995598819535, iteration: 416911
loss: 0.9976962804794312,grad_norm: 0.7522872752456137, iteration: 416912
loss: 0.9795283079147339,grad_norm: 0.7239754738187512, iteration: 416913
loss: 1.0040149688720703,grad_norm: 0.7579000173311773, iteration: 416914
loss: 1.0184545516967773,grad_norm: 0.8882627084995159, iteration: 416915
loss: 1.0802770853042603,grad_norm: 0.8536131931379497, iteration: 416916
loss: 0.9909022450447083,grad_norm: 0.8552552470972341, iteration: 416917
loss: 1.0228044986724854,grad_norm: 0.7920618547948598, iteration: 416918
loss: 1.009901523590088,grad_norm: 0.7007209930364241, iteration: 416919
loss: 1.0352654457092285,grad_norm: 0.7584339934832018, iteration: 416920
loss: 0.9930622577667236,grad_norm: 0.8703159218279669, iteration: 416921
loss: 0.998640239238739,grad_norm: 0.9254056782712334, iteration: 416922
loss: 1.0104995965957642,grad_norm: 0.679766290405877, iteration: 416923
loss: 1.0232982635498047,grad_norm: 0.8500212111557154, iteration: 416924
loss: 1.038517713546753,grad_norm: 0.9999999451907614, iteration: 416925
loss: 1.1240129470825195,grad_norm: 0.9999997774288562, iteration: 416926
loss: 0.9994703531265259,grad_norm: 0.9399289043350798, iteration: 416927
loss: 0.9953641295433044,grad_norm: 0.7222011992249069, iteration: 416928
loss: 0.9950190782546997,grad_norm: 0.9347249609591807, iteration: 416929
loss: 1.0546852350234985,grad_norm: 0.9999992548503724, iteration: 416930
loss: 1.0293906927108765,grad_norm: 0.8662106030307514, iteration: 416931
loss: 0.9892480373382568,grad_norm: 0.9532906483757437, iteration: 416932
loss: 1.0047612190246582,grad_norm: 0.9942764032847838, iteration: 416933
loss: 1.068648338317871,grad_norm: 0.96870089145365, iteration: 416934
loss: 1.012702226638794,grad_norm: 0.7880562545740823, iteration: 416935
loss: 0.986876904964447,grad_norm: 0.9999990689311453, iteration: 416936
loss: 1.0324960947036743,grad_norm: 0.8592801234200607, iteration: 416937
loss: 0.9804428219795227,grad_norm: 0.758017500442716, iteration: 416938
loss: 0.9618524312973022,grad_norm: 0.7350989586238567, iteration: 416939
loss: 1.1723313331604004,grad_norm: 0.9999997573766801, iteration: 416940
loss: 1.0194151401519775,grad_norm: 0.8995318716653333, iteration: 416941
loss: 1.0322246551513672,grad_norm: 0.9999995764207709, iteration: 416942
loss: 0.9885119199752808,grad_norm: 0.6743502937339603, iteration: 416943
loss: 0.9390270709991455,grad_norm: 0.8556535883388653, iteration: 416944
loss: 1.0189992189407349,grad_norm: 0.5682837574392109, iteration: 416945
loss: 1.0875802040100098,grad_norm: 0.7986817347631882, iteration: 416946
loss: 0.9606415629386902,grad_norm: 0.8616664920905878, iteration: 416947
loss: 0.9855544567108154,grad_norm: 0.830456834082932, iteration: 416948
loss: 0.9628052115440369,grad_norm: 0.7256102936547274, iteration: 416949
loss: 1.0522546768188477,grad_norm: 0.9999992278985277, iteration: 416950
loss: 0.9968240261077881,grad_norm: 0.8123501385693741, iteration: 416951
loss: 1.0090128183364868,grad_norm: 0.8105842359998703, iteration: 416952
loss: 1.021452784538269,grad_norm: 0.8656790661743298, iteration: 416953
loss: 1.0196956396102905,grad_norm: 0.9999991499902906, iteration: 416954
loss: 1.0278794765472412,grad_norm: 0.7675408442142335, iteration: 416955
loss: 0.9972679615020752,grad_norm: 0.6460621033411998, iteration: 416956
loss: 1.010144591331482,grad_norm: 0.9999995663489085, iteration: 416957
loss: 0.9810771942138672,grad_norm: 0.7442607923284973, iteration: 416958
loss: 1.0265579223632812,grad_norm: 0.8030529767365069, iteration: 416959
loss: 0.9821934700012207,grad_norm: 0.9999991235187718, iteration: 416960
loss: 0.9944109320640564,grad_norm: 0.8202513601337929, iteration: 416961
loss: 1.0029103755950928,grad_norm: 0.7638516098985455, iteration: 416962
loss: 0.9970474243164062,grad_norm: 0.7434687720737613, iteration: 416963
loss: 0.9975109696388245,grad_norm: 0.8632952767078728, iteration: 416964
loss: 1.0059341192245483,grad_norm: 0.7331602459710229, iteration: 416965
loss: 0.9867963194847107,grad_norm: 0.6384230341496925, iteration: 416966
loss: 0.9755441546440125,grad_norm: 0.7559730070757157, iteration: 416967
loss: 1.0120177268981934,grad_norm: 0.7411650555942766, iteration: 416968
loss: 1.084574580192566,grad_norm: 0.9662947497439083, iteration: 416969
loss: 0.9835386276245117,grad_norm: 0.8216664741592553, iteration: 416970
loss: 1.0244851112365723,grad_norm: 0.882280068751768, iteration: 416971
loss: 1.011641263961792,grad_norm: 0.8285634916959652, iteration: 416972
loss: 0.9950481653213501,grad_norm: 0.7610050485924373, iteration: 416973
loss: 1.0123138427734375,grad_norm: 0.8606943644214492, iteration: 416974
loss: 0.9769707322120667,grad_norm: 0.7121224926264802, iteration: 416975
loss: 1.0222047567367554,grad_norm: 0.7734969178867115, iteration: 416976
loss: 1.0392978191375732,grad_norm: 0.999999066474258, iteration: 416977
loss: 1.0302833318710327,grad_norm: 0.8972236748887937, iteration: 416978
loss: 1.0326343774795532,grad_norm: 0.9999999249533044, iteration: 416979
loss: 1.0092151165008545,grad_norm: 0.7053199364461882, iteration: 416980
loss: 1.106582522392273,grad_norm: 0.7819804914172078, iteration: 416981
loss: 1.0257182121276855,grad_norm: 0.5605316158106232, iteration: 416982
loss: 0.9847415685653687,grad_norm: 0.8501787090554893, iteration: 416983
loss: 1.0031613111495972,grad_norm: 0.9999990576084786, iteration: 416984
loss: 1.031057357788086,grad_norm: 0.9999997230257333, iteration: 416985
loss: 1.0193222761154175,grad_norm: 0.8060321545444862, iteration: 416986
loss: 1.0126489400863647,grad_norm: 0.7233783156806034, iteration: 416987
loss: 0.9735258221626282,grad_norm: 0.6916526068528847, iteration: 416988
loss: 0.988379716873169,grad_norm: 0.8869647608688458, iteration: 416989
loss: 1.0066046714782715,grad_norm: 0.6868765795325686, iteration: 416990
loss: 0.9748509526252747,grad_norm: 0.7071819451741919, iteration: 416991
loss: 0.9764987826347351,grad_norm: 0.6625926286262662, iteration: 416992
loss: 1.0422847270965576,grad_norm: 0.8403839300400449, iteration: 416993
loss: 1.000294804573059,grad_norm: 0.7752034172441437, iteration: 416994
loss: 1.0214030742645264,grad_norm: 0.8656798375165802, iteration: 416995
loss: 0.9879699945449829,grad_norm: 0.6595171576706405, iteration: 416996
loss: 1.0090500116348267,grad_norm: 0.7981187938918187, iteration: 416997
loss: 1.0173434019088745,grad_norm: 0.9817897179151973, iteration: 416998
loss: 0.990595281124115,grad_norm: 0.6927084538605455, iteration: 416999
loss: 0.9691095948219299,grad_norm: 0.7500917914762603, iteration: 417000
loss: 1.0292456150054932,grad_norm: 0.7516277968156104, iteration: 417001
loss: 1.0407359600067139,grad_norm: 0.9847260585519377, iteration: 417002
loss: 0.9961049556732178,grad_norm: 0.6253337745603511, iteration: 417003
loss: 1.0228618383407593,grad_norm: 1.0000000236598856, iteration: 417004
loss: 0.9964104890823364,grad_norm: 0.6771045453733716, iteration: 417005
loss: 0.9954124689102173,grad_norm: 0.6286682004331369, iteration: 417006
loss: 0.9983370900154114,grad_norm: 0.8529271521914005, iteration: 417007
loss: 1.055618166923523,grad_norm: 0.6797564416655031, iteration: 417008
loss: 1.0238990783691406,grad_norm: 0.9999993506971449, iteration: 417009
loss: 0.9871789216995239,grad_norm: 0.9999994434064507, iteration: 417010
loss: 1.0072124004364014,grad_norm: 0.7292459214031778, iteration: 417011
loss: 1.0414457321166992,grad_norm: 0.7396360108018202, iteration: 417012
loss: 1.0060445070266724,grad_norm: 0.8913604516153406, iteration: 417013
loss: 1.0184401273727417,grad_norm: 0.809178173068791, iteration: 417014
loss: 1.0094581842422485,grad_norm: 0.8676203971372479, iteration: 417015
loss: 0.9568296074867249,grad_norm: 0.8651674348661088, iteration: 417016
loss: 1.0366184711456299,grad_norm: 0.9035339070387424, iteration: 417017
loss: 1.006890058517456,grad_norm: 0.83799743671987, iteration: 417018
loss: 1.0262786149978638,grad_norm: 0.9999994075050411, iteration: 417019
loss: 1.0008074045181274,grad_norm: 0.8217655814855284, iteration: 417020
loss: 0.9935861229896545,grad_norm: 0.8122418744159365, iteration: 417021
loss: 1.0392494201660156,grad_norm: 0.8486492103760218, iteration: 417022
loss: 1.0667850971221924,grad_norm: 0.9999997342448758, iteration: 417023
loss: 0.9855930209159851,grad_norm: 0.7952231278447153, iteration: 417024
loss: 0.9772390127182007,grad_norm: 0.8382141729876013, iteration: 417025
loss: 1.006079077720642,grad_norm: 0.6500255115635314, iteration: 417026
loss: 1.0067644119262695,grad_norm: 0.7583053241061347, iteration: 417027
loss: 1.0531560182571411,grad_norm: 0.7804247305153689, iteration: 417028
loss: 1.0013657808303833,grad_norm: 0.8450702793370182, iteration: 417029
loss: 0.9660710096359253,grad_norm: 0.7960718941649454, iteration: 417030
loss: 1.011759877204895,grad_norm: 0.8240574278349381, iteration: 417031
loss: 1.008662462234497,grad_norm: 0.9999991211450662, iteration: 417032
loss: 0.9739453196525574,grad_norm: 0.999999438004945, iteration: 417033
loss: 1.0030359029769897,grad_norm: 0.6783686265251682, iteration: 417034
loss: 1.0660589933395386,grad_norm: 0.9999990861393732, iteration: 417035
loss: 1.031764268875122,grad_norm: 0.7603591042173866, iteration: 417036
loss: 1.0132368803024292,grad_norm: 0.8796021666880026, iteration: 417037
loss: 1.0176182985305786,grad_norm: 0.8903654209508531, iteration: 417038
loss: 0.9886292219161987,grad_norm: 0.6309091388586121, iteration: 417039
loss: 0.9876794219017029,grad_norm: 0.8076619458138713, iteration: 417040
loss: 1.002131700515747,grad_norm: 0.6847886386112622, iteration: 417041
loss: 0.9964321851730347,grad_norm: 0.7740659939851776, iteration: 417042
loss: 0.9980835914611816,grad_norm: 0.6680266688336923, iteration: 417043
loss: 0.9950214624404907,grad_norm: 0.9674061607077118, iteration: 417044
loss: 0.9823927879333496,grad_norm: 0.7290016672590023, iteration: 417045
loss: 1.0756057500839233,grad_norm: 0.8195289182162969, iteration: 417046
loss: 1.0044564008712769,grad_norm: 0.7589300917120833, iteration: 417047
loss: 1.0004903078079224,grad_norm: 0.6535980953748591, iteration: 417048
loss: 1.029447317123413,grad_norm: 0.999999580270695, iteration: 417049
loss: 1.024001955986023,grad_norm: 0.8801724594084073, iteration: 417050
loss: 0.9718148708343506,grad_norm: 0.7770718279242845, iteration: 417051
loss: 0.9585386514663696,grad_norm: 0.7432779171499273, iteration: 417052
loss: 1.09388267993927,grad_norm: 0.9999992476979679, iteration: 417053
loss: 1.0258536338806152,grad_norm: 0.6895761819614183, iteration: 417054
loss: 1.1465867757797241,grad_norm: 0.8884459855938461, iteration: 417055
loss: 1.0307053327560425,grad_norm: 0.9999995373791608, iteration: 417056
loss: 0.9951828718185425,grad_norm: 0.6810965346533314, iteration: 417057
loss: 1.2541706562042236,grad_norm: 0.9999999159262426, iteration: 417058
loss: 1.013419508934021,grad_norm: 0.6975571207319772, iteration: 417059
loss: 1.0022081136703491,grad_norm: 0.8900971363933275, iteration: 417060
loss: 0.9903832674026489,grad_norm: 0.9999994182978514, iteration: 417061
loss: 1.0725996494293213,grad_norm: 0.8252445231172436, iteration: 417062
loss: 0.9848089218139648,grad_norm: 0.8901818794709704, iteration: 417063
loss: 0.9596197009086609,grad_norm: 0.9999993080622375, iteration: 417064
loss: 0.9901977181434631,grad_norm: 0.652297393208865, iteration: 417065
loss: 1.016878604888916,grad_norm: 0.9999992185501729, iteration: 417066
loss: 1.029140591621399,grad_norm: 0.9999991202901856, iteration: 417067
loss: 0.9765556454658508,grad_norm: 0.7055833665899038, iteration: 417068
loss: 1.0240561962127686,grad_norm: 0.9999991921861742, iteration: 417069
loss: 1.098213791847229,grad_norm: 0.9225249244916407, iteration: 417070
loss: 1.0343044996261597,grad_norm: 0.7170054571215443, iteration: 417071
loss: 1.0659328699111938,grad_norm: 0.9999992324783494, iteration: 417072
loss: 1.0221019983291626,grad_norm: 0.9020805711478357, iteration: 417073
loss: 0.9944936633110046,grad_norm: 0.8058888789696564, iteration: 417074
loss: 0.9908508062362671,grad_norm: 0.8303282068823473, iteration: 417075
loss: 1.0207194089889526,grad_norm: 0.7336441117867121, iteration: 417076
loss: 0.9544800519943237,grad_norm: 0.8718454549701543, iteration: 417077
loss: 0.9811974167823792,grad_norm: 0.7533531035641788, iteration: 417078
loss: 0.9748014211654663,grad_norm: 0.7418733708215339, iteration: 417079
loss: 1.0198473930358887,grad_norm: 0.7238212487934026, iteration: 417080
loss: 1.0078049898147583,grad_norm: 0.7200253859881571, iteration: 417081
loss: 0.9726616144180298,grad_norm: 0.8410304721864578, iteration: 417082
loss: 0.9478062391281128,grad_norm: 0.9999992675659384, iteration: 417083
loss: 1.0058033466339111,grad_norm: 0.7795730134782396, iteration: 417084
loss: 1.0338175296783447,grad_norm: 0.8336931311583238, iteration: 417085
loss: 1.002437710762024,grad_norm: 0.8422457640164882, iteration: 417086
loss: 1.0011711120605469,grad_norm: 0.7394863585234143, iteration: 417087
loss: 0.9969010353088379,grad_norm: 0.9794883738268501, iteration: 417088
loss: 0.9855703115463257,grad_norm: 0.9999990609347604, iteration: 417089
loss: 0.9866902232170105,grad_norm: 0.9408558589730572, iteration: 417090
loss: 1.0849063396453857,grad_norm: 0.9999995294105004, iteration: 417091
loss: 1.1281650066375732,grad_norm: 1.0000000130710471, iteration: 417092
loss: 0.968714714050293,grad_norm: 0.7255297492172561, iteration: 417093
loss: 1.045150876045227,grad_norm: 0.8898296436907751, iteration: 417094
loss: 1.063338041305542,grad_norm: 0.857909868170761, iteration: 417095
loss: 0.9901123046875,grad_norm: 0.7088788798715452, iteration: 417096
loss: 0.9830915927886963,grad_norm: 0.8864605578394101, iteration: 417097
loss: 1.018688440322876,grad_norm: 0.8002410066313005, iteration: 417098
loss: 1.0007270574569702,grad_norm: 0.7998747021842091, iteration: 417099
loss: 1.0086356401443481,grad_norm: 0.9169374313025568, iteration: 417100
loss: 0.9803332090377808,grad_norm: 0.71566640409664, iteration: 417101
loss: 1.0506044626235962,grad_norm: 0.9999998746218973, iteration: 417102
loss: 0.9955850839614868,grad_norm: 0.9311588107679272, iteration: 417103
loss: 1.028301477432251,grad_norm: 0.9999996400121876, iteration: 417104
loss: 1.0568071603775024,grad_norm: 0.9999991476647371, iteration: 417105
loss: 0.9875624775886536,grad_norm: 0.8139856993212907, iteration: 417106
loss: 0.9587996602058411,grad_norm: 0.7844785217120804, iteration: 417107
loss: 1.01729416847229,grad_norm: 0.816640621324825, iteration: 417108
loss: 0.9966920018196106,grad_norm: 0.714830021533757, iteration: 417109
loss: 0.9951095581054688,grad_norm: 0.8273474847327351, iteration: 417110
loss: 1.016944408416748,grad_norm: 0.9999992744906145, iteration: 417111
loss: 1.0442054271697998,grad_norm: 0.9282970550330316, iteration: 417112
loss: 1.1064249277114868,grad_norm: 0.9999998504216367, iteration: 417113
loss: 0.9939338564872742,grad_norm: 0.8793495550561184, iteration: 417114
loss: 1.0149381160736084,grad_norm: 0.6779725365930884, iteration: 417115
loss: 1.0185638666152954,grad_norm: 0.6972234738113223, iteration: 417116
loss: 0.9920343160629272,grad_norm: 0.7658297677920405, iteration: 417117
loss: 0.9598201513290405,grad_norm: 0.9999997603632851, iteration: 417118
loss: 1.035443663597107,grad_norm: 0.9999990904968545, iteration: 417119
loss: 1.0546866655349731,grad_norm: 0.9999998656447685, iteration: 417120
loss: 1.0138460397720337,grad_norm: 0.8972478120117952, iteration: 417121
loss: 1.0136182308197021,grad_norm: 0.7451310831934829, iteration: 417122
loss: 0.9611827731132507,grad_norm: 0.7731142613524312, iteration: 417123
loss: 1.0029093027114868,grad_norm: 0.6852132686090134, iteration: 417124
loss: 1.093992829322815,grad_norm: 0.999999031339712, iteration: 417125
loss: 0.9842711091041565,grad_norm: 0.8119454475854626, iteration: 417126
loss: 1.0137605667114258,grad_norm: 0.9999992936136816, iteration: 417127
loss: 1.1984246969223022,grad_norm: 0.9999997956557167, iteration: 417128
loss: 0.9872527122497559,grad_norm: 0.7751831951553532, iteration: 417129
loss: 1.0175001621246338,grad_norm: 0.8464965639969713, iteration: 417130
loss: 1.0008231401443481,grad_norm: 0.7604377408621847, iteration: 417131
loss: 1.005387306213379,grad_norm: 0.7819908686021212, iteration: 417132
loss: 1.0269904136657715,grad_norm: 0.8714396338882829, iteration: 417133
loss: 1.0917118787765503,grad_norm: 0.9999992633909082, iteration: 417134
loss: 1.0051426887512207,grad_norm: 0.7201666203770082, iteration: 417135
loss: 0.9965646266937256,grad_norm: 0.9999991363843661, iteration: 417136
loss: 1.0166096687316895,grad_norm: 0.7300912721924447, iteration: 417137
loss: 1.0225945711135864,grad_norm: 0.6836172852092128, iteration: 417138
loss: 1.0010336637496948,grad_norm: 0.775818627382705, iteration: 417139
loss: 1.0242892503738403,grad_norm: 0.9999990522201095, iteration: 417140
loss: 0.959958553314209,grad_norm: 0.7414699169732282, iteration: 417141
loss: 1.0280591249465942,grad_norm: 0.9312688351850037, iteration: 417142
loss: 1.026973843574524,grad_norm: 0.7526516615612885, iteration: 417143
loss: 1.0044416189193726,grad_norm: 0.7420083073246264, iteration: 417144
loss: 1.001327633857727,grad_norm: 0.8004042417564713, iteration: 417145
loss: 1.0629335641860962,grad_norm: 0.9999997366098042, iteration: 417146
loss: 1.0236607789993286,grad_norm: 0.8410698958820478, iteration: 417147
loss: 1.089638590812683,grad_norm: 0.8026342228258916, iteration: 417148
loss: 1.0465575456619263,grad_norm: 0.999999780970788, iteration: 417149
loss: 1.086501955986023,grad_norm: 0.9732673817681352, iteration: 417150
loss: 0.997106671333313,grad_norm: 0.9999997989680577, iteration: 417151
loss: 0.977232813835144,grad_norm: 0.6100401914555761, iteration: 417152
loss: 0.9718399047851562,grad_norm: 0.714603175440623, iteration: 417153
loss: 1.0289908647537231,grad_norm: 0.8744157152903237, iteration: 417154
loss: 0.9944949746131897,grad_norm: 0.9258873364382563, iteration: 417155
loss: 1.072229027748108,grad_norm: 0.9371740625432387, iteration: 417156
loss: 1.0661619901657104,grad_norm: 0.9999996391820328, iteration: 417157
loss: 1.0390266180038452,grad_norm: 0.7076167908214319, iteration: 417158
loss: 1.0235459804534912,grad_norm: 0.9999993408346868, iteration: 417159
loss: 1.0206828117370605,grad_norm: 0.8294148142021804, iteration: 417160
loss: 0.987552285194397,grad_norm: 0.7030827972482139, iteration: 417161
loss: 1.03404700756073,grad_norm: 0.9999992478243714, iteration: 417162
loss: 0.96831876039505,grad_norm: 0.7571271275776729, iteration: 417163
loss: 1.073180913925171,grad_norm: 0.9999990599157053, iteration: 417164
loss: 0.9987480044364929,grad_norm: 0.8997954035202358, iteration: 417165
loss: 1.0498627424240112,grad_norm: 0.9999996368405945, iteration: 417166
loss: 1.0043184757232666,grad_norm: 0.8299526947321607, iteration: 417167
loss: 0.9711120128631592,grad_norm: 0.6399534087843703, iteration: 417168
loss: 0.9754322171211243,grad_norm: 0.8415854608480884, iteration: 417169
loss: 1.011484146118164,grad_norm: 0.7357397178566912, iteration: 417170
loss: 1.0439932346343994,grad_norm: 0.9999999923016999, iteration: 417171
loss: 1.031725525856018,grad_norm: 0.9999991016533716, iteration: 417172
loss: 0.9888716340065002,grad_norm: 0.9999996304184542, iteration: 417173
loss: 1.0154892206192017,grad_norm: 0.966308804975387, iteration: 417174
loss: 1.0063949823379517,grad_norm: 0.9999991608002656, iteration: 417175
loss: 1.0845030546188354,grad_norm: 0.9296697330837544, iteration: 417176
loss: 0.958551287651062,grad_norm: 0.8502720548067514, iteration: 417177
loss: 0.997122049331665,grad_norm: 0.8065452829041434, iteration: 417178
loss: 1.0127549171447754,grad_norm: 0.7145109200830223, iteration: 417179
loss: 1.0604889392852783,grad_norm: 0.8589635383208979, iteration: 417180
loss: 1.0250346660614014,grad_norm: 0.8342698162458341, iteration: 417181
loss: 0.9801220297813416,grad_norm: 0.8010137695908802, iteration: 417182
loss: 1.072619080543518,grad_norm: 0.9999999081268656, iteration: 417183
loss: 1.1046316623687744,grad_norm: 0.9999993848816758, iteration: 417184
loss: 1.0577900409698486,grad_norm: 0.8803583117758639, iteration: 417185
loss: 1.0382288694381714,grad_norm: 0.8544686274501805, iteration: 417186
loss: 0.9812004566192627,grad_norm: 0.9012858237901343, iteration: 417187
loss: 1.0209366083145142,grad_norm: 0.7283738666783053, iteration: 417188
loss: 1.0012917518615723,grad_norm: 0.8453219445622899, iteration: 417189
loss: 1.114321231842041,grad_norm: 0.8263834994821637, iteration: 417190
loss: 1.0261058807373047,grad_norm: 0.9999991752385159, iteration: 417191
loss: 1.016576886177063,grad_norm: 0.829841388008901, iteration: 417192
loss: 1.002219319343567,grad_norm: 0.7655602514333841, iteration: 417193
loss: 0.9951624870300293,grad_norm: 0.7761778578210153, iteration: 417194
loss: 0.9814914464950562,grad_norm: 0.8155074363314864, iteration: 417195
loss: 1.0832452774047852,grad_norm: 0.9999989726197561, iteration: 417196
loss: 1.0033215284347534,grad_norm: 0.852602256264432, iteration: 417197
loss: 1.050836205482483,grad_norm: 1.0000000142071301, iteration: 417198
loss: 0.9526365399360657,grad_norm: 0.7577708173225051, iteration: 417199
loss: 1.087146282196045,grad_norm: 0.9999998046577157, iteration: 417200
loss: 1.0115513801574707,grad_norm: 0.8057029819310982, iteration: 417201
loss: 1.0018316507339478,grad_norm: 0.8778787924806892, iteration: 417202
loss: 1.0430028438568115,grad_norm: 0.9999992598220621, iteration: 417203
loss: 1.1107069253921509,grad_norm: 0.9999999607614011, iteration: 417204
loss: 1.0630780458450317,grad_norm: 0.9999992631892681, iteration: 417205
loss: 1.0015774965286255,grad_norm: 0.9357044058756293, iteration: 417206
loss: 0.9789826273918152,grad_norm: 0.9999990653174861, iteration: 417207
loss: 1.0306429862976074,grad_norm: 0.9999989947800666, iteration: 417208
loss: 0.9821774959564209,grad_norm: 0.7650477340810564, iteration: 417209
loss: 1.0031167268753052,grad_norm: 0.9999998710463521, iteration: 417210
loss: 1.1240606307983398,grad_norm: 0.988463014488836, iteration: 417211
loss: 0.998573362827301,grad_norm: 0.7638714145675328, iteration: 417212
loss: 1.0024186372756958,grad_norm: 0.9999990518524268, iteration: 417213
loss: 1.0465372800827026,grad_norm: 0.7450691418229962, iteration: 417214
loss: 0.9913475513458252,grad_norm: 0.8537320965421931, iteration: 417215
loss: 0.9845956563949585,grad_norm: 0.999999576399947, iteration: 417216
loss: 1.0015723705291748,grad_norm: 0.8089831524936286, iteration: 417217
loss: 1.165096640586853,grad_norm: 0.9999993449313437, iteration: 417218
loss: 0.9715458750724792,grad_norm: 0.7004476364034561, iteration: 417219
loss: 1.0507107973098755,grad_norm: 0.9999997256053094, iteration: 417220
loss: 0.9970722794532776,grad_norm: 0.7368843231768327, iteration: 417221
loss: 0.9903825521469116,grad_norm: 0.7246866987078895, iteration: 417222
loss: 1.033943772315979,grad_norm: 0.9999990230867294, iteration: 417223
loss: 1.009830117225647,grad_norm: 0.9999991818525561, iteration: 417224
loss: 1.0033485889434814,grad_norm: 0.8825575191670185, iteration: 417225
loss: 1.3095147609710693,grad_norm: 0.9999997166155403, iteration: 417226
loss: 0.957362711429596,grad_norm: 0.9103988471214267, iteration: 417227
loss: 1.0839840173721313,grad_norm: 0.8308019874685064, iteration: 417228
loss: 1.041685700416565,grad_norm: 0.8579433505048933, iteration: 417229
loss: 1.0317401885986328,grad_norm: 0.9999998353699955, iteration: 417230
loss: 1.084262728691101,grad_norm: 0.9999994411688758, iteration: 417231
loss: 1.030140995979309,grad_norm: 0.9179503890792073, iteration: 417232
loss: 1.0008448362350464,grad_norm: 0.8671043588935636, iteration: 417233
loss: 1.0378979444503784,grad_norm: 0.9236071755258582, iteration: 417234
loss: 0.9654527902603149,grad_norm: 0.9172015318181678, iteration: 417235
loss: 1.0817925930023193,grad_norm: 0.7278877122465726, iteration: 417236
loss: 0.9918676614761353,grad_norm: 0.7575669781291301, iteration: 417237
loss: 1.0510786771774292,grad_norm: 0.999999817804002, iteration: 417238
loss: 1.0162492990493774,grad_norm: 0.9999996814433655, iteration: 417239
loss: 1.0305529832839966,grad_norm: 0.9999991585313314, iteration: 417240
loss: 1.0088796615600586,grad_norm: 0.9481346013469908, iteration: 417241
loss: 1.0098986625671387,grad_norm: 0.9999993311198615, iteration: 417242
loss: 1.1259924173355103,grad_norm: 0.999999210166975, iteration: 417243
loss: 1.0551539659500122,grad_norm: 0.9999992706182198, iteration: 417244
loss: 0.9968708157539368,grad_norm: 0.9072192443429197, iteration: 417245
loss: 1.15958571434021,grad_norm: 0.9999992460673841, iteration: 417246
loss: 1.0079388618469238,grad_norm: 0.7427069319340975, iteration: 417247
loss: 0.9757658839225769,grad_norm: 0.7558348703079052, iteration: 417248
loss: 1.0083497762680054,grad_norm: 0.8507183364756319, iteration: 417249
loss: 1.0183911323547363,grad_norm: 0.915901744787027, iteration: 417250
loss: 1.0233806371688843,grad_norm: 0.9151086830178534, iteration: 417251
loss: 1.0196884870529175,grad_norm: 0.999999554882109, iteration: 417252
loss: 1.0431996583938599,grad_norm: 0.999999078670951, iteration: 417253
loss: 0.9841575622558594,grad_norm: 0.9999992284589474, iteration: 417254
loss: 1.0160720348358154,grad_norm: 0.99999929837014, iteration: 417255
loss: 1.0597903728485107,grad_norm: 0.9999995047999373, iteration: 417256
loss: 1.0192731618881226,grad_norm: 0.9440527950540961, iteration: 417257
loss: 0.9901977777481079,grad_norm: 0.779917548375902, iteration: 417258
loss: 1.0832356214523315,grad_norm: 0.9999996903586187, iteration: 417259
loss: 1.1586649417877197,grad_norm: 0.9999994939367303, iteration: 417260
loss: 1.207489013671875,grad_norm: 0.9999999223895969, iteration: 417261
loss: 1.080958366394043,grad_norm: 0.8485281737732588, iteration: 417262
loss: 1.0263822078704834,grad_norm: 0.9999990861941157, iteration: 417263
loss: 0.9818414449691772,grad_norm: 0.7566968883572899, iteration: 417264
loss: 0.934412956237793,grad_norm: 0.8870209275693259, iteration: 417265
loss: 1.0440242290496826,grad_norm: 0.8397840493929614, iteration: 417266
loss: 1.047922968864441,grad_norm: 0.760958761517875, iteration: 417267
loss: 1.2416712045669556,grad_norm: 0.9999998199083127, iteration: 417268
loss: 0.9901707768440247,grad_norm: 0.8737246311521285, iteration: 417269
loss: 0.9634024500846863,grad_norm: 0.7937890082749122, iteration: 417270
loss: 1.0419539213180542,grad_norm: 0.763844700204008, iteration: 417271
loss: 1.1321622133255005,grad_norm: 0.9999993071407484, iteration: 417272
loss: 0.9961488246917725,grad_norm: 0.9999999486276583, iteration: 417273
loss: 1.0093982219696045,grad_norm: 0.726325818912915, iteration: 417274
loss: 1.0335971117019653,grad_norm: 0.9999999705871758, iteration: 417275
loss: 1.0560510158538818,grad_norm: 0.9999990453232889, iteration: 417276
loss: 1.0248037576675415,grad_norm: 0.9636348973613599, iteration: 417277
loss: 1.0093579292297363,grad_norm: 0.902402860421586, iteration: 417278
loss: 0.9871743321418762,grad_norm: 0.8247962480022459, iteration: 417279
loss: 1.0985323190689087,grad_norm: 0.8497480199956545, iteration: 417280
loss: 1.002977728843689,grad_norm: 0.9999993626239729, iteration: 417281
loss: 1.1510593891143799,grad_norm: 0.9999993351662694, iteration: 417282
loss: 1.036266803741455,grad_norm: 0.8639282681064342, iteration: 417283
loss: 1.1063125133514404,grad_norm: 0.8339488256793742, iteration: 417284
loss: 1.0814355611801147,grad_norm: 0.999999991578908, iteration: 417285
loss: 1.113101601600647,grad_norm: 0.8992679468875664, iteration: 417286
loss: 1.0169258117675781,grad_norm: 0.7302116054334559, iteration: 417287
loss: 1.0305025577545166,grad_norm: 0.8231466653060541, iteration: 417288
loss: 0.9606616497039795,grad_norm: 0.7887615965053318, iteration: 417289
loss: 0.9557315707206726,grad_norm: 0.7726718961581985, iteration: 417290
loss: 0.9765936732292175,grad_norm: 0.8113908091153401, iteration: 417291
loss: 1.058788537979126,grad_norm: 0.9063577517791394, iteration: 417292
loss: 1.025331735610962,grad_norm: 0.9900408310535984, iteration: 417293
loss: 1.0111215114593506,grad_norm: 0.7657637803430197, iteration: 417294
loss: 1.0154463052749634,grad_norm: 0.7066181283842218, iteration: 417295
loss: 1.038196086883545,grad_norm: 0.7386348220706553, iteration: 417296
loss: 1.0909106731414795,grad_norm: 0.8299628759307478, iteration: 417297
loss: 0.982480525970459,grad_norm: 0.9527821050186073, iteration: 417298
loss: 0.9663054943084717,grad_norm: 0.6408413268979459, iteration: 417299
loss: 1.0510274171829224,grad_norm: 0.9999998530335795, iteration: 417300
loss: 1.0011982917785645,grad_norm: 0.6593003717930102, iteration: 417301
loss: 0.961794376373291,grad_norm: 0.8131212855889273, iteration: 417302
loss: 1.0484293699264526,grad_norm: 0.9999997529362458, iteration: 417303
loss: 1.100489616394043,grad_norm: 0.7745352895797954, iteration: 417304
loss: 1.000369906425476,grad_norm: 0.8040989664171048, iteration: 417305
loss: 1.0248966217041016,grad_norm: 0.922576037264274, iteration: 417306
loss: 0.9684631824493408,grad_norm: 0.9613553232512716, iteration: 417307
loss: 1.0050145387649536,grad_norm: 0.9999991950976188, iteration: 417308
loss: 1.0181795358657837,grad_norm: 0.7625925707361971, iteration: 417309
loss: 0.9899722933769226,grad_norm: 0.9253036540206802, iteration: 417310
loss: 0.9662736058235168,grad_norm: 0.7892076572659348, iteration: 417311
loss: 1.0244722366333008,grad_norm: 0.9571807236241934, iteration: 417312
loss: 0.9816176295280457,grad_norm: 0.9999992665440344, iteration: 417313
loss: 1.029436707496643,grad_norm: 0.8471598275731099, iteration: 417314
loss: 1.04795503616333,grad_norm: 0.8470811249957598, iteration: 417315
loss: 0.9956687092781067,grad_norm: 0.6925188867585141, iteration: 417316
loss: 0.9836937189102173,grad_norm: 0.9999996220262094, iteration: 417317
loss: 1.0506538152694702,grad_norm: 0.8847481562778765, iteration: 417318
loss: 1.0231415033340454,grad_norm: 0.7174574580520579, iteration: 417319
loss: 1.0044801235198975,grad_norm: 0.7106755949593292, iteration: 417320
loss: 1.0023367404937744,grad_norm: 0.8154837851770474, iteration: 417321
loss: 0.9626857042312622,grad_norm: 0.7879222719054566, iteration: 417322
loss: 1.0284806489944458,grad_norm: 0.9999994557019074, iteration: 417323
loss: 1.0314650535583496,grad_norm: 0.7890511205634653, iteration: 417324
loss: 1.0461885929107666,grad_norm: 0.7928286167277079, iteration: 417325
loss: 1.1111594438552856,grad_norm: 1.0000000998969967, iteration: 417326
loss: 1.1711227893829346,grad_norm: 0.9999992280830758, iteration: 417327
loss: 1.0014283657073975,grad_norm: 0.7461595759352845, iteration: 417328
loss: 1.0255303382873535,grad_norm: 0.68431379196164, iteration: 417329
loss: 0.9836857914924622,grad_norm: 0.8546499014989393, iteration: 417330
loss: 0.9973961114883423,grad_norm: 0.9999990753667999, iteration: 417331
loss: 0.9828950762748718,grad_norm: 0.8877310047196779, iteration: 417332
loss: 0.99861079454422,grad_norm: 0.7183913194286115, iteration: 417333
loss: 1.0514692068099976,grad_norm: 0.9793025118387769, iteration: 417334
loss: 1.0540553331375122,grad_norm: 0.7759104781593337, iteration: 417335
loss: 0.9657851457595825,grad_norm: 0.9868680290113211, iteration: 417336
loss: 1.082251787185669,grad_norm: 0.9857225909484895, iteration: 417337
loss: 1.0462582111358643,grad_norm: 0.9999991537923141, iteration: 417338
loss: 1.0597479343414307,grad_norm: 0.9999990913778347, iteration: 417339
loss: 1.0259754657745361,grad_norm: 0.7162229064494068, iteration: 417340
loss: 0.9999282956123352,grad_norm: 0.7128877708975232, iteration: 417341
loss: 1.0234179496765137,grad_norm: 0.7183471723281751, iteration: 417342
loss: 1.1105653047561646,grad_norm: 0.9999999302263712, iteration: 417343
loss: 0.9769284725189209,grad_norm: 0.7903684293280939, iteration: 417344
loss: 1.0240004062652588,grad_norm: 0.7837440341307209, iteration: 417345
loss: 0.988409161567688,grad_norm: 0.6659044864918328, iteration: 417346
loss: 0.9865081906318665,grad_norm: 0.7907625429245683, iteration: 417347
loss: 1.0107485055923462,grad_norm: 0.9359437961667343, iteration: 417348
loss: 0.9898577332496643,grad_norm: 0.7545416901813216, iteration: 417349
loss: 0.977673351764679,grad_norm: 0.8869305353608734, iteration: 417350
loss: 1.005933403968811,grad_norm: 0.9999990618302113, iteration: 417351
loss: 0.9858871102333069,grad_norm: 0.7830101416338413, iteration: 417352
loss: 0.9972347617149353,grad_norm: 0.9999998093650114, iteration: 417353
loss: 0.9903236627578735,grad_norm: 0.8182693466869114, iteration: 417354
loss: 1.063379168510437,grad_norm: 0.9999994855063989, iteration: 417355
loss: 0.9649739861488342,grad_norm: 0.9999991191965522, iteration: 417356
loss: 1.1333723068237305,grad_norm: 0.756761681895938, iteration: 417357
loss: 1.0089725255966187,grad_norm: 0.9021910424677464, iteration: 417358
loss: 0.9959856271743774,grad_norm: 0.7843131459816008, iteration: 417359
loss: 1.0441935062408447,grad_norm: 0.8249456914602682, iteration: 417360
loss: 1.0356301069259644,grad_norm: 0.9904856045491608, iteration: 417361
loss: 1.001714825630188,grad_norm: 0.7729929856749974, iteration: 417362
loss: 1.0159432888031006,grad_norm: 0.7247717515124729, iteration: 417363
loss: 1.0052616596221924,grad_norm: 0.7353516928084327, iteration: 417364
loss: 0.9883846044540405,grad_norm: 0.7414382019750714, iteration: 417365
loss: 0.9527391791343689,grad_norm: 0.7564379090468504, iteration: 417366
loss: 1.0025523900985718,grad_norm: 0.796458098928591, iteration: 417367
loss: 0.9544966220855713,grad_norm: 0.9327760929335958, iteration: 417368
loss: 1.007938027381897,grad_norm: 0.7138472970978031, iteration: 417369
loss: 0.9725763201713562,grad_norm: 0.7479029373613597, iteration: 417370
loss: 0.9994999766349792,grad_norm: 0.5485172285692747, iteration: 417371
loss: 1.053413987159729,grad_norm: 0.9999998086067686, iteration: 417372
loss: 1.0116721391677856,grad_norm: 0.8951048495952894, iteration: 417373
loss: 0.9769799709320068,grad_norm: 0.7799793667275724, iteration: 417374
loss: 1.0292088985443115,grad_norm: 0.7794889735728575, iteration: 417375
loss: 1.0029053688049316,grad_norm: 0.776043257643444, iteration: 417376
loss: 1.019145131111145,grad_norm: 0.6637617448318655, iteration: 417377
loss: 1.024894118309021,grad_norm: 0.7707131970441297, iteration: 417378
loss: 1.0506808757781982,grad_norm: 0.772398685713413, iteration: 417379
loss: 0.9924201369285583,grad_norm: 0.9016889023036552, iteration: 417380
loss: 0.989534854888916,grad_norm: 0.8056016808954208, iteration: 417381
loss: 0.981291651725769,grad_norm: 0.7444908320031188, iteration: 417382
loss: 0.9816629886627197,grad_norm: 0.7529454353540197, iteration: 417383
loss: 1.0180469751358032,grad_norm: 0.7506248038501985, iteration: 417384
loss: 0.9987090229988098,grad_norm: 0.8554255097139263, iteration: 417385
loss: 0.9845020174980164,grad_norm: 0.900313431503281, iteration: 417386
loss: 1.0298689603805542,grad_norm: 0.6630423917027717, iteration: 417387
loss: 0.9817110300064087,grad_norm: 0.7057431724554413, iteration: 417388
loss: 0.9835010170936584,grad_norm: 0.999999347572618, iteration: 417389
loss: 1.0567190647125244,grad_norm: 0.8559722964453932, iteration: 417390
loss: 0.9791240096092224,grad_norm: 0.9819663737276493, iteration: 417391
loss: 0.9796499013900757,grad_norm: 0.9999990162842183, iteration: 417392
loss: 1.0459593534469604,grad_norm: 0.9999990487329847, iteration: 417393
loss: 1.071675419807434,grad_norm: 0.8346490713218284, iteration: 417394
loss: 1.0747395753860474,grad_norm: 0.835361717634426, iteration: 417395
loss: 0.9802164435386658,grad_norm: 0.8829731285201521, iteration: 417396
loss: 0.9967969655990601,grad_norm: 0.9059141315498989, iteration: 417397
loss: 1.094043254852295,grad_norm: 0.9999993449227691, iteration: 417398
loss: 1.0242451429367065,grad_norm: 0.7171055594301678, iteration: 417399
loss: 0.9589841365814209,grad_norm: 0.7234544306787745, iteration: 417400
loss: 1.0786380767822266,grad_norm: 0.9999998885713707, iteration: 417401
loss: 1.033079743385315,grad_norm: 0.9999995495203148, iteration: 417402
loss: 1.0282055139541626,grad_norm: 0.6957141754315134, iteration: 417403
loss: 0.9979567527770996,grad_norm: 0.6817286190839624, iteration: 417404
loss: 1.0681617259979248,grad_norm: 0.9999989703903484, iteration: 417405
loss: 0.9878721237182617,grad_norm: 0.9999991573895223, iteration: 417406
loss: 1.0217907428741455,grad_norm: 0.9999991296678984, iteration: 417407
loss: 1.037809133529663,grad_norm: 0.9999997140650647, iteration: 417408
loss: 0.9916284680366516,grad_norm: 0.8292674722300346, iteration: 417409
loss: 1.0197900533676147,grad_norm: 0.9999999008200084, iteration: 417410
loss: 0.9857566356658936,grad_norm: 0.7879929981301745, iteration: 417411
loss: 1.0406253337860107,grad_norm: 0.73669081991116, iteration: 417412
loss: 0.9556625485420227,grad_norm: 0.8461911127975559, iteration: 417413
loss: 1.035139560699463,grad_norm: 0.9999991546942633, iteration: 417414
loss: 1.023695945739746,grad_norm: 0.9999991785973643, iteration: 417415
loss: 0.9701424241065979,grad_norm: 0.7595710011109993, iteration: 417416
loss: 0.9986095428466797,grad_norm: 0.7594797690984715, iteration: 417417
loss: 1.0062814950942993,grad_norm: 0.9147239213702969, iteration: 417418
loss: 1.0009210109710693,grad_norm: 0.7679409094211268, iteration: 417419
loss: 1.0215381383895874,grad_norm: 0.8176547076844174, iteration: 417420
loss: 1.0018000602722168,grad_norm: 0.9176293276012021, iteration: 417421
loss: 0.9748857021331787,grad_norm: 0.7497540373824712, iteration: 417422
loss: 1.0514947175979614,grad_norm: 0.9999999455362177, iteration: 417423
loss: 0.993466317653656,grad_norm: 0.8326057339895357, iteration: 417424
loss: 0.963442325592041,grad_norm: 0.7964124437594009, iteration: 417425
loss: 1.195009469985962,grad_norm: 0.999999597132322, iteration: 417426
loss: 0.9989608526229858,grad_norm: 0.9058706899682437, iteration: 417427
loss: 0.9850713014602661,grad_norm: 0.9476487607100359, iteration: 417428
loss: 0.9922428727149963,grad_norm: 0.7907411663724369, iteration: 417429
loss: 1.0250352621078491,grad_norm: 0.7869095343922135, iteration: 417430
loss: 0.978351891040802,grad_norm: 0.7694753698167092, iteration: 417431
loss: 1.0289853811264038,grad_norm: 0.8384610685480722, iteration: 417432
loss: 1.2739367485046387,grad_norm: 0.9999998102411731, iteration: 417433
loss: 1.113525390625,grad_norm: 0.8061476860366441, iteration: 417434
loss: 1.030325174331665,grad_norm: 0.9999993780821096, iteration: 417435
loss: 0.9865534901618958,grad_norm: 0.8295735981451866, iteration: 417436
loss: 1.0141637325286865,grad_norm: 0.8203351473373376, iteration: 417437
loss: 0.9621280431747437,grad_norm: 0.7653549590814649, iteration: 417438
loss: 1.0239133834838867,grad_norm: 0.7192766957073246, iteration: 417439
loss: 0.9663246870040894,grad_norm: 0.8598944668151501, iteration: 417440
loss: 1.0642281770706177,grad_norm: 0.9999992998323141, iteration: 417441
loss: 0.9931027293205261,grad_norm: 0.9321907157209376, iteration: 417442
loss: 1.0022581815719604,grad_norm: 0.7839573869032941, iteration: 417443
loss: 1.050108790397644,grad_norm: 0.820484360873153, iteration: 417444
loss: 0.9885664582252502,grad_norm: 0.9591080826687945, iteration: 417445
loss: 0.9800257086753845,grad_norm: 0.6961328199317013, iteration: 417446
loss: 1.060674786567688,grad_norm: 0.9999990426233906, iteration: 417447
loss: 0.9937744140625,grad_norm: 0.79596528169656, iteration: 417448
loss: 0.993527352809906,grad_norm: 0.9568442650982119, iteration: 417449
loss: 1.0052133798599243,grad_norm: 0.9900545897687383, iteration: 417450
loss: 0.9755089282989502,grad_norm: 0.6437219177111579, iteration: 417451
loss: 1.0260071754455566,grad_norm: 0.9999992886620025, iteration: 417452
loss: 1.1674569845199585,grad_norm: 0.9081880251727463, iteration: 417453
loss: 1.0101525783538818,grad_norm: 0.7263328654371951, iteration: 417454
loss: 0.9853896498680115,grad_norm: 0.8238392478779878, iteration: 417455
loss: 0.9831326603889465,grad_norm: 0.9999996106344171, iteration: 417456
loss: 1.051162600517273,grad_norm: 0.6826800233985033, iteration: 417457
loss: 1.0031808614730835,grad_norm: 0.6920645758727223, iteration: 417458
loss: 0.9830952882766724,grad_norm: 0.9223546611576879, iteration: 417459
loss: 1.0290175676345825,grad_norm: 0.8289599750878017, iteration: 417460
loss: 1.001374363899231,grad_norm: 0.9999997407208399, iteration: 417461
loss: 0.9840921759605408,grad_norm: 0.9999992498194464, iteration: 417462
loss: 1.0511938333511353,grad_norm: 0.9999994570370059, iteration: 417463
loss: 0.9821794033050537,grad_norm: 0.99999943923205, iteration: 417464
loss: 1.0404353141784668,grad_norm: 0.8022672436681076, iteration: 417465
loss: 0.9634115695953369,grad_norm: 0.6931721310612172, iteration: 417466
loss: 0.9834821820259094,grad_norm: 0.9768083980878676, iteration: 417467
loss: 0.9951775074005127,grad_norm: 0.7203017309937133, iteration: 417468
loss: 0.9712512493133545,grad_norm: 0.9354380942372073, iteration: 417469
loss: 0.9856257438659668,grad_norm: 0.8410851413298859, iteration: 417470
loss: 1.0098296403884888,grad_norm: 0.8736326920469307, iteration: 417471
loss: 0.9811158180236816,grad_norm: 0.716068678181415, iteration: 417472
loss: 1.0152080059051514,grad_norm: 0.7353997213748398, iteration: 417473
loss: 1.0087224245071411,grad_norm: 0.7913043132800196, iteration: 417474
loss: 0.960261881351471,grad_norm: 0.716463301324182, iteration: 417475
loss: 0.987951934337616,grad_norm: 0.7747478932159564, iteration: 417476
loss: 1.0491559505462646,grad_norm: 0.7825730658559721, iteration: 417477
loss: 0.9683269262313843,grad_norm: 0.8318776138923498, iteration: 417478
loss: 1.021325945854187,grad_norm: 0.7081195631945921, iteration: 417479
loss: 1.065070390701294,grad_norm: 0.9999994862716288, iteration: 417480
loss: 0.9861729145050049,grad_norm: 0.7179871049042922, iteration: 417481
loss: 1.0515928268432617,grad_norm: 0.7696942158613861, iteration: 417482
loss: 1.003679871559143,grad_norm: 0.8345379386528595, iteration: 417483
loss: 1.0117213726043701,grad_norm: 0.8978503980394298, iteration: 417484
loss: 1.0137861967086792,grad_norm: 0.8224445060856267, iteration: 417485
loss: 1.008025884628296,grad_norm: 0.8110844045385798, iteration: 417486
loss: 1.0090194940567017,grad_norm: 0.8202995891708127, iteration: 417487
loss: 1.0032243728637695,grad_norm: 0.9237485838021843, iteration: 417488
loss: 1.0100096464157104,grad_norm: 0.844142072219066, iteration: 417489
loss: 1.0851625204086304,grad_norm: 0.7847502804571832, iteration: 417490
loss: 1.0035840272903442,grad_norm: 0.9999992226010955, iteration: 417491
loss: 1.088367223739624,grad_norm: 0.9999992960382583, iteration: 417492
loss: 0.9938120245933533,grad_norm: 0.8748247782248283, iteration: 417493
loss: 0.9757243394851685,grad_norm: 0.9999992864047329, iteration: 417494
loss: 1.0020966529846191,grad_norm: 0.837016395309915, iteration: 417495
loss: 1.0060468912124634,grad_norm: 0.7929894554581922, iteration: 417496
loss: 1.025990605354309,grad_norm: 0.9999990127270805, iteration: 417497
loss: 1.0101687908172607,grad_norm: 0.8302623560239494, iteration: 417498
loss: 1.0640952587127686,grad_norm: 0.9999992309899304, iteration: 417499
loss: 1.008064866065979,grad_norm: 0.9999993064844502, iteration: 417500
loss: 1.0128660202026367,grad_norm: 0.9877671966807666, iteration: 417501
loss: 1.0100204944610596,grad_norm: 0.9314965196555586, iteration: 417502
loss: 1.0013984441757202,grad_norm: 0.731693256654725, iteration: 417503
loss: 0.976148247718811,grad_norm: 0.7778875272183755, iteration: 417504
loss: 0.9744138121604919,grad_norm: 0.7516768960965272, iteration: 417505
loss: 0.9806256890296936,grad_norm: 0.7378819270381376, iteration: 417506
loss: 1.0081652402877808,grad_norm: 0.9999991549634208, iteration: 417507
loss: 1.0379890203475952,grad_norm: 0.7737812220434207, iteration: 417508
loss: 1.0168836116790771,grad_norm: 0.7624772312746576, iteration: 417509
loss: 1.0136903524398804,grad_norm: 0.7779886915138544, iteration: 417510
loss: 0.993126392364502,grad_norm: 0.804087199760325, iteration: 417511
loss: 1.0053707361221313,grad_norm: 0.9999990413974583, iteration: 417512
loss: 1.0065367221832275,grad_norm: 0.8623991160597563, iteration: 417513
loss: 1.0078487396240234,grad_norm: 0.8458727136424017, iteration: 417514
loss: 0.9791333079338074,grad_norm: 0.6590156532954407, iteration: 417515
loss: 1.0025105476379395,grad_norm: 0.9999999567662843, iteration: 417516
loss: 0.9900901913642883,grad_norm: 0.9532863891620372, iteration: 417517
loss: 0.98736172914505,grad_norm: 0.7876015648948422, iteration: 417518
loss: 1.003782868385315,grad_norm: 0.7643054672221065, iteration: 417519
loss: 0.9845423698425293,grad_norm: 0.7265144146602247, iteration: 417520
loss: 0.9964138269424438,grad_norm: 0.8148995275576354, iteration: 417521
loss: 0.9681311845779419,grad_norm: 0.7690807021790729, iteration: 417522
loss: 1.0346755981445312,grad_norm: 0.9535478695921769, iteration: 417523
loss: 0.987726092338562,grad_norm: 0.7652423314253382, iteration: 417524
loss: 1.0312798023223877,grad_norm: 0.6781016797260526, iteration: 417525
loss: 0.9943087697029114,grad_norm: 0.7112280068249244, iteration: 417526
loss: 1.0071184635162354,grad_norm: 0.9999993266468712, iteration: 417527
loss: 0.9792123436927795,grad_norm: 0.7459440646943193, iteration: 417528
loss: 0.9911799430847168,grad_norm: 0.6786311434590867, iteration: 417529
loss: 1.0149704217910767,grad_norm: 0.9567753200359491, iteration: 417530
loss: 0.9913033246994019,grad_norm: 0.9999989917916358, iteration: 417531
loss: 1.029564380645752,grad_norm: 0.7751484267971319, iteration: 417532
loss: 1.0081778764724731,grad_norm: 0.7091111423165138, iteration: 417533
loss: 0.9787756204605103,grad_norm: 0.8983466277815091, iteration: 417534
loss: 0.9954591393470764,grad_norm: 0.7656709055684185, iteration: 417535
loss: 1.0031547546386719,grad_norm: 0.7846411199606258, iteration: 417536
loss: 1.0222584009170532,grad_norm: 0.7253110863661046, iteration: 417537
loss: 1.0184868574142456,grad_norm: 0.899260246869494, iteration: 417538
loss: 0.9953436255455017,grad_norm: 0.8570263218606351, iteration: 417539
loss: 1.0071579217910767,grad_norm: 0.8769070214715293, iteration: 417540
loss: 1.0128216743469238,grad_norm: 0.8090835066872721, iteration: 417541
loss: 1.0284366607666016,grad_norm: 0.8930182193925252, iteration: 417542
loss: 0.9981856346130371,grad_norm: 0.9999995628616588, iteration: 417543
loss: 1.0013127326965332,grad_norm: 0.796845659287454, iteration: 417544
loss: 1.051616907119751,grad_norm: 0.9999994352978216, iteration: 417545
loss: 0.9591100215911865,grad_norm: 0.8000489209097527, iteration: 417546
loss: 1.0982165336608887,grad_norm: 0.8767768367008604, iteration: 417547
loss: 1.0580881834030151,grad_norm: 0.8440776236181364, iteration: 417548
loss: 0.9724317193031311,grad_norm: 0.6743379468045603, iteration: 417549
loss: 1.0912537574768066,grad_norm: 0.9999997636368972, iteration: 417550
loss: 0.9895541667938232,grad_norm: 0.7081038939470474, iteration: 417551
loss: 1.0139198303222656,grad_norm: 0.8770702245897276, iteration: 417552
loss: 1.0115625858306885,grad_norm: 0.9295205991953356, iteration: 417553
loss: 0.9905760288238525,grad_norm: 0.7792102241971534, iteration: 417554
loss: 1.023208737373352,grad_norm: 0.7223240516480117, iteration: 417555
loss: 1.0264616012573242,grad_norm: 0.7593540471984024, iteration: 417556
loss: 1.0063107013702393,grad_norm: 0.7123642344975556, iteration: 417557
loss: 0.996843695640564,grad_norm: 0.8261709957776479, iteration: 417558
loss: 1.0083317756652832,grad_norm: 0.8080283601931894, iteration: 417559
loss: 1.0150302648544312,grad_norm: 0.6756687564711509, iteration: 417560
loss: 0.9998435974121094,grad_norm: 0.9415566985962184, iteration: 417561
loss: 1.0162981748580933,grad_norm: 0.8666884419210972, iteration: 417562
loss: 0.9901936650276184,grad_norm: 0.7836205587432387, iteration: 417563
loss: 0.9685025215148926,grad_norm: 0.6961755854850418, iteration: 417564
loss: 0.99216628074646,grad_norm: 0.7489728874750132, iteration: 417565
loss: 0.9880760312080383,grad_norm: 0.6656285558707786, iteration: 417566
loss: 1.094462275505066,grad_norm: 0.7389010801391596, iteration: 417567
loss: 0.9846934676170349,grad_norm: 0.838558439846387, iteration: 417568
loss: 0.986807644367218,grad_norm: 0.7524394823948811, iteration: 417569
loss: 1.0858962535858154,grad_norm: 0.999999758173276, iteration: 417570
loss: 0.9891911745071411,grad_norm: 0.7489363750387726, iteration: 417571
loss: 1.0104116201400757,grad_norm: 0.7620681784103546, iteration: 417572
loss: 1.007799506187439,grad_norm: 0.7478954253936908, iteration: 417573
loss: 1.0084359645843506,grad_norm: 0.9999992122229773, iteration: 417574
loss: 1.089008092880249,grad_norm: 0.9999999592793304, iteration: 417575
loss: 1.032017469406128,grad_norm: 0.7899246150039568, iteration: 417576
loss: 0.9517857432365417,grad_norm: 0.7783825064198592, iteration: 417577
loss: 0.9982813000679016,grad_norm: 0.7680422898974417, iteration: 417578
loss: 1.0245420932769775,grad_norm: 0.8648638478716059, iteration: 417579
loss: 0.9804245829582214,grad_norm: 0.6727655931971819, iteration: 417580
loss: 1.0077922344207764,grad_norm: 0.7462544429142337, iteration: 417581
loss: 0.9730729460716248,grad_norm: 0.8606225256568751, iteration: 417582
loss: 0.9800485968589783,grad_norm: 0.7355924328379556, iteration: 417583
loss: 1.023321509361267,grad_norm: 0.9999991350418366, iteration: 417584
loss: 0.9878579378128052,grad_norm: 0.9414830422891434, iteration: 417585
loss: 0.9865773916244507,grad_norm: 0.7930955790980504, iteration: 417586
loss: 1.022182583808899,grad_norm: 0.9999990970808404, iteration: 417587
loss: 1.0356370210647583,grad_norm: 0.9999991807807903, iteration: 417588
loss: 0.975546658039093,grad_norm: 0.721157163534599, iteration: 417589
loss: 1.0540248155593872,grad_norm: 0.8116355007706888, iteration: 417590
loss: 0.965032160282135,grad_norm: 0.6500649900719971, iteration: 417591
loss: 1.0629065036773682,grad_norm: 0.9057897292973739, iteration: 417592
loss: 1.018942952156067,grad_norm: 0.6925661986033848, iteration: 417593
loss: 1.0158026218414307,grad_norm: 0.632751180491581, iteration: 417594
loss: 0.9622776508331299,grad_norm: 0.9819930247159093, iteration: 417595
loss: 1.0562015771865845,grad_norm: 0.7621765102112739, iteration: 417596
loss: 0.960831880569458,grad_norm: 0.9606182207246833, iteration: 417597
loss: 1.0068734884262085,grad_norm: 0.8107779637045094, iteration: 417598
loss: 1.075494647026062,grad_norm: 0.9999991960343695, iteration: 417599
loss: 1.0227302312850952,grad_norm: 0.8713044822818966, iteration: 417600
loss: 1.0135927200317383,grad_norm: 0.7532885285139137, iteration: 417601
loss: 1.0138604640960693,grad_norm: 0.6931361401984042, iteration: 417602
loss: 1.048396110534668,grad_norm: 0.9999991841535945, iteration: 417603
loss: 1.1527827978134155,grad_norm: 0.9323824042508935, iteration: 417604
loss: 0.9791145324707031,grad_norm: 0.7914790361999746, iteration: 417605
loss: 1.0162895917892456,grad_norm: 0.9388559166097821, iteration: 417606
loss: 0.9851094484329224,grad_norm: 0.7970890665921084, iteration: 417607
loss: 0.9853857159614563,grad_norm: 0.8830268231663696, iteration: 417608
loss: 1.0047197341918945,grad_norm: 0.9060358681479683, iteration: 417609
loss: 1.0043596029281616,grad_norm: 0.8059347860442823, iteration: 417610
loss: 0.9849328994750977,grad_norm: 0.6796634886751606, iteration: 417611
loss: 1.03990638256073,grad_norm: 0.9401886415143204, iteration: 417612
loss: 1.0133930444717407,grad_norm: 0.7981054779976684, iteration: 417613
loss: 0.9981644749641418,grad_norm: 0.8683842635886954, iteration: 417614
loss: 1.027536392211914,grad_norm: 0.8925060466562154, iteration: 417615
loss: 1.0220918655395508,grad_norm: 0.8441308780846182, iteration: 417616
loss: 1.0977721214294434,grad_norm: 0.9999996967175651, iteration: 417617
loss: 1.0123777389526367,grad_norm: 0.8044781084005443, iteration: 417618
loss: 1.012394666671753,grad_norm: 0.889894467739823, iteration: 417619
loss: 0.9937191605567932,grad_norm: 0.9368075121875393, iteration: 417620
loss: 0.999984622001648,grad_norm: 0.6395536427725874, iteration: 417621
loss: 1.061508059501648,grad_norm: 0.8845838524486648, iteration: 417622
loss: 0.9991176724433899,grad_norm: 0.7947121782097992, iteration: 417623
loss: 0.9886363744735718,grad_norm: 0.6575123088279063, iteration: 417624
loss: 0.9789584875106812,grad_norm: 0.6665517727809315, iteration: 417625
loss: 0.9972671866416931,grad_norm: 0.6161952838408089, iteration: 417626
loss: 0.9798094630241394,grad_norm: 0.7078848914392761, iteration: 417627
loss: 1.0016640424728394,grad_norm: 0.8767620177163625, iteration: 417628
loss: 0.9787265658378601,grad_norm: 0.9999993082165662, iteration: 417629
loss: 1.0383524894714355,grad_norm: 0.8167878016269733, iteration: 417630
loss: 1.0439624786376953,grad_norm: 0.7360518501436296, iteration: 417631
loss: 1.006206750869751,grad_norm: 0.70120236912232, iteration: 417632
loss: 0.9641372561454773,grad_norm: 0.913365529052291, iteration: 417633
loss: 1.011098861694336,grad_norm: 1.0000000121251527, iteration: 417634
loss: 1.0045970678329468,grad_norm: 0.8003130482215872, iteration: 417635
loss: 1.1116526126861572,grad_norm: 0.9999991702897502, iteration: 417636
loss: 0.9853911399841309,grad_norm: 0.7872626235387084, iteration: 417637
loss: 0.9405807256698608,grad_norm: 0.7104878201652569, iteration: 417638
loss: 1.0321614742279053,grad_norm: 0.7675379163664517, iteration: 417639
loss: 1.1081857681274414,grad_norm: 0.9999991216567036, iteration: 417640
loss: 0.9643396139144897,grad_norm: 0.8392239820352504, iteration: 417641
loss: 1.0165621042251587,grad_norm: 0.8122816167141657, iteration: 417642
loss: 1.0002100467681885,grad_norm: 0.6944922666012716, iteration: 417643
loss: 0.9731224179267883,grad_norm: 0.7889834044867329, iteration: 417644
loss: 1.1022568941116333,grad_norm: 0.9999996064791821, iteration: 417645
loss: 0.981622576713562,grad_norm: 0.8728364058812472, iteration: 417646
loss: 1.0139358043670654,grad_norm: 0.701504751012834, iteration: 417647
loss: 0.9839383363723755,grad_norm: 0.8013686358998736, iteration: 417648
loss: 0.997911810874939,grad_norm: 0.6594555510500195, iteration: 417649
loss: 1.0024945735931396,grad_norm: 0.757669052564191, iteration: 417650
loss: 0.9844287633895874,grad_norm: 0.8616543315900576, iteration: 417651
loss: 1.017029881477356,grad_norm: 1.00000001167192, iteration: 417652
loss: 0.9807503819465637,grad_norm: 0.8144091421133048, iteration: 417653
loss: 1.0065122842788696,grad_norm: 0.775013632983443, iteration: 417654
loss: 1.0331130027770996,grad_norm: 0.6895747961598083, iteration: 417655
loss: 1.029701590538025,grad_norm: 0.766961665716434, iteration: 417656
loss: 1.0733907222747803,grad_norm: 0.7352509315912596, iteration: 417657
loss: 1.0093327760696411,grad_norm: 0.8034305482464439, iteration: 417658
loss: 0.9791619181632996,grad_norm: 0.9999994283928193, iteration: 417659
loss: 1.0082168579101562,grad_norm: 0.6999609270503244, iteration: 417660
loss: 1.0844182968139648,grad_norm: 0.7895058549335403, iteration: 417661
loss: 1.0125095844268799,grad_norm: 0.7604488454964801, iteration: 417662
loss: 1.0252752304077148,grad_norm: 0.7186268324168349, iteration: 417663
loss: 1.0428180694580078,grad_norm: 0.779076031990886, iteration: 417664
loss: 1.0308220386505127,grad_norm: 0.9600322956817675, iteration: 417665
loss: 1.0310580730438232,grad_norm: 0.8195186143758053, iteration: 417666
loss: 0.9967432618141174,grad_norm: 0.9999992577919529, iteration: 417667
loss: 0.9616169333457947,grad_norm: 0.7256090651993264, iteration: 417668
loss: 1.1037312746047974,grad_norm: 0.9999998314514852, iteration: 417669
loss: 1.0094342231750488,grad_norm: 0.947408086759783, iteration: 417670
loss: 0.9912181496620178,grad_norm: 0.667624164717561, iteration: 417671
loss: 0.9961491823196411,grad_norm: 0.8243336186474101, iteration: 417672
loss: 0.9928345680236816,grad_norm: 0.66033872753579, iteration: 417673
loss: 0.9728422164916992,grad_norm: 0.7147093404875728, iteration: 417674
loss: 0.9877665042877197,grad_norm: 0.6911912181455677, iteration: 417675
loss: 1.0037384033203125,grad_norm: 0.9020978347248718, iteration: 417676
loss: 1.0250029563903809,grad_norm: 0.9999997757766931, iteration: 417677
loss: 1.0248873233795166,grad_norm: 0.8339679761785789, iteration: 417678
loss: 1.0175975561141968,grad_norm: 0.6721553345608221, iteration: 417679
loss: 1.0080132484436035,grad_norm: 0.9999990171066949, iteration: 417680
loss: 0.9962549209594727,grad_norm: 0.8731366302619792, iteration: 417681
loss: 0.9965041875839233,grad_norm: 0.8063211787020689, iteration: 417682
loss: 1.021794319152832,grad_norm: 0.7418473646578855, iteration: 417683
loss: 1.008806824684143,grad_norm: 0.939334458867799, iteration: 417684
loss: 0.9673181772232056,grad_norm: 0.7517014290517056, iteration: 417685
loss: 1.0216666460037231,grad_norm: 0.90268301932418, iteration: 417686
loss: 1.0199350118637085,grad_norm: 0.935856808206432, iteration: 417687
loss: 0.9866081476211548,grad_norm: 0.6444273634696371, iteration: 417688
loss: 0.9873400926589966,grad_norm: 0.7349196280401886, iteration: 417689
loss: 1.0029104948043823,grad_norm: 0.821777304922036, iteration: 417690
loss: 0.9858932495117188,grad_norm: 0.756295369933945, iteration: 417691
loss: 0.9574427604675293,grad_norm: 0.7610491883875947, iteration: 417692
loss: 0.9871457815170288,grad_norm: 0.6737079463055775, iteration: 417693
loss: 1.0115702152252197,grad_norm: 0.9999996485011531, iteration: 417694
loss: 1.0194075107574463,grad_norm: 0.7544556059761224, iteration: 417695
loss: 0.9865493774414062,grad_norm: 0.9224282546814486, iteration: 417696
loss: 1.0112292766571045,grad_norm: 0.856506640788781, iteration: 417697
loss: 0.9751254320144653,grad_norm: 0.9999994000134071, iteration: 417698
loss: 1.0137206315994263,grad_norm: 0.7618932970365574, iteration: 417699
loss: 1.0088067054748535,grad_norm: 0.6851350945190019, iteration: 417700
loss: 1.0134053230285645,grad_norm: 0.8166732805041202, iteration: 417701
loss: 1.0549418926239014,grad_norm: 0.8631300894108785, iteration: 417702
loss: 1.0144281387329102,grad_norm: 0.7953281977562596, iteration: 417703
loss: 1.0509511232376099,grad_norm: 0.995587911273082, iteration: 417704
loss: 0.9954688549041748,grad_norm: 0.9999993255842103, iteration: 417705
loss: 0.981522798538208,grad_norm: 0.9463832786647751, iteration: 417706
loss: 1.0406537055969238,grad_norm: 0.7408172852980082, iteration: 417707
loss: 0.9734520316123962,grad_norm: 0.7517828975652817, iteration: 417708
loss: 1.023335337638855,grad_norm: 0.6663982155052581, iteration: 417709
loss: 1.0434821844100952,grad_norm: 0.926074705980169, iteration: 417710
loss: 0.978125810623169,grad_norm: 0.9222020645332226, iteration: 417711
loss: 0.9530677199363708,grad_norm: 0.728042872423092, iteration: 417712
loss: 0.9703943729400635,grad_norm: 0.8031913815259049, iteration: 417713
loss: 1.0334033966064453,grad_norm: 0.773841483192557, iteration: 417714
loss: 1.1046546697616577,grad_norm: 0.9068127479436467, iteration: 417715
loss: 0.9712004661560059,grad_norm: 0.6902113952172189, iteration: 417716
loss: 1.041505217552185,grad_norm: 0.7891773666063009, iteration: 417717
loss: 1.022099256515503,grad_norm: 0.8631363306647736, iteration: 417718
loss: 1.0103073120117188,grad_norm: 0.9495304436759434, iteration: 417719
loss: 1.0069000720977783,grad_norm: 0.7109828835785816, iteration: 417720
loss: 0.9914106130599976,grad_norm: 0.9999991534007668, iteration: 417721
loss: 0.9932093024253845,grad_norm: 0.7064836009035745, iteration: 417722
loss: 1.0019221305847168,grad_norm: 0.9371308094318271, iteration: 417723
loss: 1.0191806554794312,grad_norm: 0.999999574576619, iteration: 417724
loss: 1.0649861097335815,grad_norm: 0.9999996504094306, iteration: 417725
loss: 1.0305442810058594,grad_norm: 0.9999995891463871, iteration: 417726
loss: 0.9779412746429443,grad_norm: 0.7519153246574033, iteration: 417727
loss: 1.0024516582489014,grad_norm: 0.9999992874111262, iteration: 417728
loss: 1.072656273841858,grad_norm: 0.9999996210150321, iteration: 417729
loss: 1.1316304206848145,grad_norm: 0.9999995312774921, iteration: 417730
loss: 0.9830976724624634,grad_norm: 0.6945608547265713, iteration: 417731
loss: 1.0004688501358032,grad_norm: 0.999999239620387, iteration: 417732
loss: 1.0169708728790283,grad_norm: 0.9999991520136556, iteration: 417733
loss: 1.0338752269744873,grad_norm: 0.8681038667612201, iteration: 417734
loss: 0.9824941754341125,grad_norm: 0.9219356762291857, iteration: 417735
loss: 1.1607739925384521,grad_norm: 0.9999998123929911, iteration: 417736
loss: 1.0530980825424194,grad_norm: 0.96290842564578, iteration: 417737
loss: 1.1341370344161987,grad_norm: 0.9999997611165333, iteration: 417738
loss: 1.12013840675354,grad_norm: 0.999999968248484, iteration: 417739
loss: 1.062121868133545,grad_norm: 0.9999991614148231, iteration: 417740
loss: 0.9633000493049622,grad_norm: 0.999999714579727, iteration: 417741
loss: 1.0544447898864746,grad_norm: 0.7238520204712777, iteration: 417742
loss: 0.9990963339805603,grad_norm: 0.7743589623748657, iteration: 417743
loss: 1.0370440483093262,grad_norm: 0.999999260566165, iteration: 417744
loss: 1.0343294143676758,grad_norm: 0.9999991497442768, iteration: 417745
loss: 0.9725647568702698,grad_norm: 0.8616446946057958, iteration: 417746
loss: 1.0292540788650513,grad_norm: 0.7118915879983548, iteration: 417747
loss: 1.1319150924682617,grad_norm: 0.7697594592524545, iteration: 417748
loss: 1.1564953327178955,grad_norm: 0.9999999579174174, iteration: 417749
loss: 1.0820204019546509,grad_norm: 0.9999994709194536, iteration: 417750
loss: 1.0145196914672852,grad_norm: 0.9999992933854026, iteration: 417751
loss: 0.9688211679458618,grad_norm: 0.9999997699480476, iteration: 417752
loss: 0.958843469619751,grad_norm: 0.9999995131006836, iteration: 417753
loss: 1.0408451557159424,grad_norm: 0.6797705160720985, iteration: 417754
loss: 1.022213101387024,grad_norm: 0.9999994844209853, iteration: 417755
loss: 1.0178046226501465,grad_norm: 0.9392520551028707, iteration: 417756
loss: 1.0371785163879395,grad_norm: 0.9999998534860781, iteration: 417757
loss: 1.0569767951965332,grad_norm: 0.9999996817275659, iteration: 417758
loss: 1.1894729137420654,grad_norm: 0.9999997863597396, iteration: 417759
loss: 1.0096435546875,grad_norm: 0.9999989763208038, iteration: 417760
loss: 1.0236703157424927,grad_norm: 0.9999999490780299, iteration: 417761
loss: 1.0378193855285645,grad_norm: 0.9999997852597312, iteration: 417762
loss: 0.9606174826622009,grad_norm: 0.9628891312431898, iteration: 417763
loss: 1.036191463470459,grad_norm: 0.749353688991393, iteration: 417764
loss: 1.0494641065597534,grad_norm: 0.999999028767884, iteration: 417765
loss: 1.0090315341949463,grad_norm: 0.7923294857058931, iteration: 417766
loss: 1.0967029333114624,grad_norm: 0.9999999022858549, iteration: 417767
loss: 1.070406436920166,grad_norm: 0.9999989565966951, iteration: 417768
loss: 1.0138334035873413,grad_norm: 0.9322411223753443, iteration: 417769
loss: 0.9944267272949219,grad_norm: 0.7466250388703074, iteration: 417770
loss: 0.9900716543197632,grad_norm: 0.6310441091837241, iteration: 417771
loss: 1.0187844038009644,grad_norm: 0.9029000630939819, iteration: 417772
loss: 0.9641104340553284,grad_norm: 0.9688994593463335, iteration: 417773
loss: 0.9909141063690186,grad_norm: 0.9999991296082543, iteration: 417774
loss: 1.0667684078216553,grad_norm: 0.9933925930163301, iteration: 417775
loss: 1.0441890954971313,grad_norm: 0.9999997648869506, iteration: 417776
loss: 0.9398601055145264,grad_norm: 0.9999991793639379, iteration: 417777
loss: 0.9463143348693848,grad_norm: 0.9999990708828792, iteration: 417778
loss: 1.0245013236999512,grad_norm: 0.9450623249247475, iteration: 417779
loss: 0.9996230006217957,grad_norm: 0.870438140086487, iteration: 417780
loss: 1.0771561861038208,grad_norm: 0.9257261218090059, iteration: 417781
loss: 0.9906512498855591,grad_norm: 0.8782275604727867, iteration: 417782
loss: 1.0302244424819946,grad_norm: 0.9999990139706381, iteration: 417783
loss: 0.9640588164329529,grad_norm: 0.7592133814518705, iteration: 417784
loss: 1.050898551940918,grad_norm: 0.9999993990869541, iteration: 417785
loss: 1.0316293239593506,grad_norm: 0.9999997837021962, iteration: 417786
loss: 0.9786994457244873,grad_norm: 0.9999999082651065, iteration: 417787
loss: 1.0678696632385254,grad_norm: 0.9201022416870818, iteration: 417788
loss: 1.0425673723220825,grad_norm: 0.9381563338816097, iteration: 417789
loss: 1.0508334636688232,grad_norm: 0.9999992633641296, iteration: 417790
loss: 1.0175118446350098,grad_norm: 0.999999231514981, iteration: 417791
loss: 1.00856351852417,grad_norm: 0.9999996232235613, iteration: 417792
loss: 0.9797263741493225,grad_norm: 0.7920482325551863, iteration: 417793
loss: 1.0244210958480835,grad_norm: 0.9842580013273085, iteration: 417794
loss: 1.0113567113876343,grad_norm: 0.999999037667805, iteration: 417795
loss: 0.9728325605392456,grad_norm: 0.8627549407787106, iteration: 417796
loss: 1.016535997390747,grad_norm: 0.8292868328081237, iteration: 417797
loss: 1.0507820844650269,grad_norm: 0.9999995594879344, iteration: 417798
loss: 0.9716929793357849,grad_norm: 0.7345037390259606, iteration: 417799
loss: 1.0085418224334717,grad_norm: 0.999999382528405, iteration: 417800
loss: 1.0174483060836792,grad_norm: 0.712223541377259, iteration: 417801
loss: 0.9990658164024353,grad_norm: 0.6857321810766125, iteration: 417802
loss: 1.0297160148620605,grad_norm: 0.9766301459982932, iteration: 417803
loss: 1.0604902505874634,grad_norm: 0.9999994635000028, iteration: 417804
loss: 1.048464298248291,grad_norm: 0.7576271856763561, iteration: 417805
loss: 0.9886668920516968,grad_norm: 0.7902971885007115, iteration: 417806
loss: 0.976578414440155,grad_norm: 0.8621865215019165, iteration: 417807
loss: 1.042066216468811,grad_norm: 0.9999994890592399, iteration: 417808
loss: 1.0227817296981812,grad_norm: 0.9007975322289602, iteration: 417809
loss: 1.0210939645767212,grad_norm: 0.8293753022894109, iteration: 417810
loss: 1.0129417181015015,grad_norm: 0.6637928998525074, iteration: 417811
loss: 1.1341640949249268,grad_norm: 0.9999997937488657, iteration: 417812
loss: 0.9826090335845947,grad_norm: 0.7723392559540875, iteration: 417813
loss: 1.0121914148330688,grad_norm: 0.9866421588705192, iteration: 417814
loss: 1.0090186595916748,grad_norm: 0.7022096617925875, iteration: 417815
loss: 1.0043801069259644,grad_norm: 0.9999990507444256, iteration: 417816
loss: 1.035353660583496,grad_norm: 0.8100513762124445, iteration: 417817
loss: 1.013758897781372,grad_norm: 0.680294086898585, iteration: 417818
loss: 0.9808821678161621,grad_norm: 0.7552948065967953, iteration: 417819
loss: 1.0591108798980713,grad_norm: 0.8231950209814598, iteration: 417820
loss: 1.0891332626342773,grad_norm: 0.8659896499805417, iteration: 417821
loss: 1.0135811567306519,grad_norm: 0.7719643426018835, iteration: 417822
loss: 1.000667929649353,grad_norm: 0.8342624381212437, iteration: 417823
loss: 0.9807493686676025,grad_norm: 0.9999994713038611, iteration: 417824
loss: 1.0144014358520508,grad_norm: 0.8744823690838464, iteration: 417825
loss: 1.0143550634384155,grad_norm: 0.749287822243559, iteration: 417826
loss: 1.010705828666687,grad_norm: 0.9999999662407048, iteration: 417827
loss: 0.986826479434967,grad_norm: 0.8013437307156185, iteration: 417828
loss: 0.9802018404006958,grad_norm: 0.9999990759902234, iteration: 417829
loss: 1.084714412689209,grad_norm: 0.8562845964049647, iteration: 417830
loss: 1.0649017095565796,grad_norm: 0.9999992726230955, iteration: 417831
loss: 1.0357619524002075,grad_norm: 0.9810824505152272, iteration: 417832
loss: 1.054809808731079,grad_norm: 0.9704964067803423, iteration: 417833
loss: 0.9846521615982056,grad_norm: 0.7446249928686873, iteration: 417834
loss: 1.0423957109451294,grad_norm: 0.823904356919122, iteration: 417835
loss: 1.0166646242141724,grad_norm: 0.8863002412158387, iteration: 417836
loss: 1.0723985433578491,grad_norm: 1.0000001111796122, iteration: 417837
loss: 1.0316259860992432,grad_norm: 0.7159373996259478, iteration: 417838
loss: 1.0785115957260132,grad_norm: 0.9999993927920608, iteration: 417839
loss: 1.064990758895874,grad_norm: 0.9999995605324663, iteration: 417840
loss: 1.0032267570495605,grad_norm: 0.804134133045421, iteration: 417841
loss: 1.0603581666946411,grad_norm: 0.9999995828763222, iteration: 417842
loss: 1.0257747173309326,grad_norm: 0.8914852732724279, iteration: 417843
loss: 1.0750172138214111,grad_norm: 0.9999996460509911, iteration: 417844
loss: 1.1848690509796143,grad_norm: 0.9999993978487199, iteration: 417845
loss: 0.9841946363449097,grad_norm: 0.7040898939128434, iteration: 417846
loss: 0.9795519113540649,grad_norm: 0.8299736078946764, iteration: 417847
loss: 1.0299030542373657,grad_norm: 0.827341097552104, iteration: 417848
loss: 1.0712835788726807,grad_norm: 0.866790874715349, iteration: 417849
loss: 1.02252995967865,grad_norm: 0.8306057254574455, iteration: 417850
loss: 1.0058358907699585,grad_norm: 0.9839577536209521, iteration: 417851
loss: 0.9908660054206848,grad_norm: 0.7595227496175763, iteration: 417852
loss: 0.9945778250694275,grad_norm: 0.7969255060185826, iteration: 417853
loss: 0.9918352365493774,grad_norm: 0.7114261472897189, iteration: 417854
loss: 0.9707415699958801,grad_norm: 0.8367570903265937, iteration: 417855
loss: 1.0227196216583252,grad_norm: 0.9999989953274732, iteration: 417856
loss: 1.0007014274597168,grad_norm: 0.771743542918576, iteration: 417857
loss: 0.9636200070381165,grad_norm: 0.9999996433254297, iteration: 417858
loss: 1.008389949798584,grad_norm: 0.9999999192988954, iteration: 417859
loss: 1.0532035827636719,grad_norm: 0.6120001987174515, iteration: 417860
loss: 1.0557528734207153,grad_norm: 0.9999991004423354, iteration: 417861
loss: 0.9411520957946777,grad_norm: 0.6819145440808498, iteration: 417862
loss: 0.9833959937095642,grad_norm: 0.7421764212581651, iteration: 417863
loss: 1.055055022239685,grad_norm: 0.8492677347501664, iteration: 417864
loss: 1.0336711406707764,grad_norm: 0.9999992299475926, iteration: 417865
loss: 1.004623293876648,grad_norm: 0.9999991744737668, iteration: 417866
loss: 1.2906832695007324,grad_norm: 0.9999991518282328, iteration: 417867
loss: 0.976036012172699,grad_norm: 0.7296940084864764, iteration: 417868
loss: 0.9787993431091309,grad_norm: 0.7584922185435317, iteration: 417869
loss: 0.9615695476531982,grad_norm: 0.8725101786149992, iteration: 417870
loss: 0.987095057964325,grad_norm: 0.9986144444346279, iteration: 417871
loss: 1.096238613128662,grad_norm: 0.999999661321971, iteration: 417872
loss: 0.9997352361679077,grad_norm: 0.7721072849627147, iteration: 417873
loss: 1.0319198369979858,grad_norm: 0.9999990546662926, iteration: 417874
loss: 0.9874126315116882,grad_norm: 0.9443673012656921, iteration: 417875
loss: 1.033420205116272,grad_norm: 0.8341108207381571, iteration: 417876
loss: 1.0610870122909546,grad_norm: 0.9999999621783355, iteration: 417877
loss: 1.0135853290557861,grad_norm: 0.7628132416853584, iteration: 417878
loss: 0.9974650740623474,grad_norm: 0.801019340479215, iteration: 417879
loss: 1.0252878665924072,grad_norm: 0.811071227189705, iteration: 417880
loss: 0.9737801551818848,grad_norm: 0.8966358841784056, iteration: 417881
loss: 1.0374644994735718,grad_norm: 0.8163112567858453, iteration: 417882
loss: 1.009116530418396,grad_norm: 0.7920257451239232, iteration: 417883
loss: 0.9872773885726929,grad_norm: 0.9999992858056111, iteration: 417884
loss: 1.0042909383773804,grad_norm: 0.8046430264439604, iteration: 417885
loss: 0.9970653057098389,grad_norm: 0.8113483992527513, iteration: 417886
loss: 1.0088067054748535,grad_norm: 0.7502823764567396, iteration: 417887
loss: 1.0112746953964233,grad_norm: 0.9153807532394793, iteration: 417888
loss: 1.0751913785934448,grad_norm: 0.9999993134128684, iteration: 417889
loss: 1.0376898050308228,grad_norm: 0.7658935266575676, iteration: 417890
loss: 1.0167919397354126,grad_norm: 0.8452214382796042, iteration: 417891
loss: 1.0387481451034546,grad_norm: 0.999999037846048, iteration: 417892
loss: 1.0130811929702759,grad_norm: 0.9310895180822619, iteration: 417893
loss: 1.0521327257156372,grad_norm: 0.7653911195610325, iteration: 417894
loss: 1.0802077054977417,grad_norm: 0.9917710221167524, iteration: 417895
loss: 1.0576602220535278,grad_norm: 0.9999992935547785, iteration: 417896
loss: 1.0137830972671509,grad_norm: 0.9999995846564457, iteration: 417897
loss: 0.9672512412071228,grad_norm: 0.7254260285772497, iteration: 417898
loss: 0.9929118156433105,grad_norm: 0.9959118676314624, iteration: 417899
loss: 0.9953505992889404,grad_norm: 0.6560596055872119, iteration: 417900
loss: 1.0082913637161255,grad_norm: 0.8868143421589476, iteration: 417901
loss: 1.1248841285705566,grad_norm: 0.9999996454369807, iteration: 417902
loss: 1.0400904417037964,grad_norm: 0.7897619695791107, iteration: 417903
loss: 1.0959250926971436,grad_norm: 0.9999992182786488, iteration: 417904
loss: 0.9738982915878296,grad_norm: 0.9999991839900383, iteration: 417905
loss: 0.9759252071380615,grad_norm: 0.9156559944710162, iteration: 417906
loss: 1.05867338180542,grad_norm: 0.9999992708535902, iteration: 417907
loss: 1.0071502923965454,grad_norm: 0.7748459902411683, iteration: 417908
loss: 0.9908447265625,grad_norm: 0.8288528659704598, iteration: 417909
loss: 1.041967749595642,grad_norm: 0.8563861680533825, iteration: 417910
loss: 1.004050374031067,grad_norm: 0.7058066173021933, iteration: 417911
loss: 1.058462142944336,grad_norm: 0.9999991682079773, iteration: 417912
loss: 1.0400152206420898,grad_norm: 0.9999990909401036, iteration: 417913
loss: 1.0294859409332275,grad_norm: 0.8359176016429406, iteration: 417914
loss: 1.025166392326355,grad_norm: 0.7647797832653221, iteration: 417915
loss: 1.0134252309799194,grad_norm: 0.9999995721068158, iteration: 417916
loss: 1.0048226118087769,grad_norm: 0.910241102749467, iteration: 417917
loss: 1.0088789463043213,grad_norm: 0.8541433907492333, iteration: 417918
loss: 1.004928469657898,grad_norm: 0.7094417378954191, iteration: 417919
loss: 1.0598499774932861,grad_norm: 0.9999999764259138, iteration: 417920
loss: 1.0064514875411987,grad_norm: 0.9067572431201488, iteration: 417921
loss: 1.0556679964065552,grad_norm: 0.9999993747650101, iteration: 417922
loss: 1.0187416076660156,grad_norm: 0.85670228794948, iteration: 417923
loss: 1.082221269607544,grad_norm: 0.999999729833181, iteration: 417924
loss: 1.0246249437332153,grad_norm: 0.9999993293874703, iteration: 417925
loss: 1.079847812652588,grad_norm: 1.0000001208237506, iteration: 417926
loss: 1.0104656219482422,grad_norm: 0.8339523230039051, iteration: 417927
loss: 1.023393154144287,grad_norm: 0.7776377952840268, iteration: 417928
loss: 1.0337092876434326,grad_norm: 0.9999990890025242, iteration: 417929
loss: 1.0374259948730469,grad_norm: 0.999998997768475, iteration: 417930
loss: 1.0154328346252441,grad_norm: 0.7733208807867475, iteration: 417931
loss: 1.0501388311386108,grad_norm: 0.999999194836201, iteration: 417932
loss: 1.0312093496322632,grad_norm: 0.9911810488409214, iteration: 417933
loss: 1.0679429769515991,grad_norm: 0.9999992092672694, iteration: 417934
loss: 1.0072451829910278,grad_norm: 0.8562409711496002, iteration: 417935
loss: 0.9922976493835449,grad_norm: 0.8239741838150151, iteration: 417936
loss: 1.0129106044769287,grad_norm: 0.999999948495464, iteration: 417937
loss: 0.9747757911682129,grad_norm: 0.6379140678803177, iteration: 417938
loss: 1.0024930238723755,grad_norm: 0.8281667535774024, iteration: 417939
loss: 0.9786089658737183,grad_norm: 0.7451718953182099, iteration: 417940
loss: 1.0287898778915405,grad_norm: 0.8240096450822005, iteration: 417941
loss: 1.0190730094909668,grad_norm: 0.7561951317882099, iteration: 417942
loss: 1.01522696018219,grad_norm: 0.75708178266322, iteration: 417943
loss: 1.0036989450454712,grad_norm: 0.6732158178608515, iteration: 417944
loss: 1.0095328092575073,grad_norm: 0.7532049940657234, iteration: 417945
loss: 1.015816569328308,grad_norm: 0.9116217010531463, iteration: 417946
loss: 0.9673287272453308,grad_norm: 0.8107306524587831, iteration: 417947
loss: 1.1748006343841553,grad_norm: 0.9999999968568499, iteration: 417948
loss: 1.024294137954712,grad_norm: 0.7277572542895814, iteration: 417949
loss: 0.9998499751091003,grad_norm: 0.9881109291248382, iteration: 417950
loss: 1.0284820795059204,grad_norm: 0.9999991265520044, iteration: 417951
loss: 0.9873118996620178,grad_norm: 0.6753220468269953, iteration: 417952
loss: 1.0230082273483276,grad_norm: 0.9522133374903561, iteration: 417953
loss: 1.0819180011749268,grad_norm: 0.9367768310688351, iteration: 417954
loss: 1.002373218536377,grad_norm: 0.6959096787232596, iteration: 417955
loss: 1.092073678970337,grad_norm: 0.9656463933004029, iteration: 417956
loss: 0.9695717096328735,grad_norm: 0.8423105380417911, iteration: 417957
loss: 1.0995416641235352,grad_norm: 0.9999991919816311, iteration: 417958
loss: 1.0061503648757935,grad_norm: 0.9999996337020625, iteration: 417959
loss: 0.9892069101333618,grad_norm: 0.7792684931053394, iteration: 417960
loss: 1.0030579566955566,grad_norm: 0.7848791691480168, iteration: 417961
loss: 0.9859277009963989,grad_norm: 0.9329259947190334, iteration: 417962
loss: 0.9805819988250732,grad_norm: 0.7704944160869723, iteration: 417963
loss: 1.0144011974334717,grad_norm: 0.9999991330502249, iteration: 417964
loss: 1.0022354125976562,grad_norm: 0.7060341207265701, iteration: 417965
loss: 1.0515851974487305,grad_norm: 0.9999989827591473, iteration: 417966
loss: 1.076444387435913,grad_norm: 0.9999994128395486, iteration: 417967
loss: 1.0367145538330078,grad_norm: 0.9999995060595218, iteration: 417968
loss: 0.999262273311615,grad_norm: 0.9999994991207559, iteration: 417969
loss: 1.031118392944336,grad_norm: 0.8239236492608013, iteration: 417970
loss: 1.0090010166168213,grad_norm: 0.8037096608148657, iteration: 417971
loss: 1.1828614473342896,grad_norm: 0.9999998438258659, iteration: 417972
loss: 0.9738078713417053,grad_norm: 0.8529451450818721, iteration: 417973
loss: 0.9862731695175171,grad_norm: 0.8452195754818403, iteration: 417974
loss: 1.0029959678649902,grad_norm: 0.9999991613113394, iteration: 417975
loss: 1.0460799932479858,grad_norm: 0.9492362005570582, iteration: 417976
loss: 1.0355794429779053,grad_norm: 0.999999045801274, iteration: 417977
loss: 1.1155307292938232,grad_norm: 0.9999998492422988, iteration: 417978
loss: 0.9993322491645813,grad_norm: 0.7764069613414172, iteration: 417979
loss: 1.056792140007019,grad_norm: 0.848240300196465, iteration: 417980
loss: 1.0097016096115112,grad_norm: 0.8159327935522703, iteration: 417981
loss: 0.9833613038063049,grad_norm: 0.9079976167435896, iteration: 417982
loss: 0.9894669055938721,grad_norm: 0.748262719184562, iteration: 417983
loss: 1.1105716228485107,grad_norm: 0.9999995298869464, iteration: 417984
loss: 1.0815297365188599,grad_norm: 0.9999999874502482, iteration: 417985
loss: 0.9726853370666504,grad_norm: 0.9213876005567637, iteration: 417986
loss: 1.1030089855194092,grad_norm: 0.9999997819927299, iteration: 417987
loss: 1.0736382007598877,grad_norm: 0.9999993817387763, iteration: 417988
loss: 0.9901491403579712,grad_norm: 0.7850453615875191, iteration: 417989
loss: 1.134894847869873,grad_norm: 0.9999999217612469, iteration: 417990
loss: 1.110123634338379,grad_norm: 0.9999993340105271, iteration: 417991
loss: 1.0314964056015015,grad_norm: 0.9120333053246397, iteration: 417992
loss: 1.0453227758407593,grad_norm: 0.9999990712914537, iteration: 417993
loss: 1.0939215421676636,grad_norm: 0.9999994763961084, iteration: 417994
loss: 1.13105046749115,grad_norm: 0.9281061953318283, iteration: 417995
loss: 1.04062819480896,grad_norm: 0.9614729295614556, iteration: 417996
loss: 1.0147218704223633,grad_norm: 0.8631524428355002, iteration: 417997
loss: 1.0199846029281616,grad_norm: 0.9999996491371039, iteration: 417998
loss: 1.0649961233139038,grad_norm: 0.9718410939961749, iteration: 417999
loss: 1.0689152479171753,grad_norm: 0.9999990935965566, iteration: 418000
loss: 1.0133191347122192,grad_norm: 0.7956151041925777, iteration: 418001
loss: 1.0728532075881958,grad_norm: 0.9999993431172989, iteration: 418002
loss: 1.0773801803588867,grad_norm: 0.9999993600596051, iteration: 418003
loss: 0.9902598857879639,grad_norm: 0.8467742443855512, iteration: 418004
loss: 1.0167500972747803,grad_norm: 0.8481179569697205, iteration: 418005
loss: 1.0427842140197754,grad_norm: 0.9999993313438268, iteration: 418006
loss: 1.0369502305984497,grad_norm: 0.9999991014522155, iteration: 418007
loss: 1.1316568851470947,grad_norm: 0.9999998825787355, iteration: 418008
loss: 1.051937460899353,grad_norm: 0.9999990127284495, iteration: 418009
loss: 1.0277141332626343,grad_norm: 0.9999992306229311, iteration: 418010
loss: 1.0110596418380737,grad_norm: 0.8849799025639346, iteration: 418011
loss: 1.066959261894226,grad_norm: 0.8152594171645724, iteration: 418012
loss: 0.9917564392089844,grad_norm: 0.6885549700110715, iteration: 418013
loss: 1.1244980096817017,grad_norm: 0.9999992023938361, iteration: 418014
loss: 1.0012996196746826,grad_norm: 0.779861199140255, iteration: 418015
loss: 1.0770822763442993,grad_norm: 0.999999260077525, iteration: 418016
loss: 1.0859028100967407,grad_norm: 0.9999996564025092, iteration: 418017
loss: 1.0476053953170776,grad_norm: 0.999999162149375, iteration: 418018
loss: 1.0440913438796997,grad_norm: 0.9072869192010188, iteration: 418019
loss: 1.048586368560791,grad_norm: 0.9747446016578765, iteration: 418020
loss: 1.0904961824417114,grad_norm: 0.9999997862391964, iteration: 418021
loss: 1.0187277793884277,grad_norm: 0.9999996694879851, iteration: 418022
loss: 0.9912728667259216,grad_norm: 0.9999990217406354, iteration: 418023
loss: 1.0294369459152222,grad_norm: 0.9999997888858467, iteration: 418024
loss: 1.035859227180481,grad_norm: 0.7343106340410875, iteration: 418025
loss: 1.0236183404922485,grad_norm: 0.8635190978237356, iteration: 418026
loss: 1.1223598718643188,grad_norm: 0.9999996070939617, iteration: 418027
loss: 1.0001857280731201,grad_norm: 0.7246671889164769, iteration: 418028
loss: 1.0798319578170776,grad_norm: 0.9073715413720878, iteration: 418029
loss: 1.16903555393219,grad_norm: 0.9999995216029992, iteration: 418030
loss: 0.9903960824012756,grad_norm: 0.9999998560531895, iteration: 418031
loss: 1.0305756330490112,grad_norm: 0.9999997961754324, iteration: 418032
loss: 1.083119511604309,grad_norm: 0.9999996194615502, iteration: 418033
loss: 1.0101771354675293,grad_norm: 0.9999993275059842, iteration: 418034
loss: 1.0253360271453857,grad_norm: 0.8203698002498, iteration: 418035
loss: 1.00471830368042,grad_norm: 0.7923966166257883, iteration: 418036
loss: 0.9944573044776917,grad_norm: 0.8300648279264286, iteration: 418037
loss: 0.9861465096473694,grad_norm: 0.8061373890775303, iteration: 418038
loss: 1.026065707206726,grad_norm: 0.9999991329318582, iteration: 418039
loss: 1.1334973573684692,grad_norm: 0.9999996454122513, iteration: 418040
loss: 1.0418695211410522,grad_norm: 0.9827381445802592, iteration: 418041
loss: 1.0777537822723389,grad_norm: 0.9999991765058921, iteration: 418042
loss: 1.0366002321243286,grad_norm: 0.9999997147810602, iteration: 418043
loss: 0.9706878066062927,grad_norm: 0.739817799388526, iteration: 418044
loss: 1.0338038206100464,grad_norm: 0.7446780348435825, iteration: 418045
loss: 1.0224411487579346,grad_norm: 0.999999741249204, iteration: 418046
loss: 1.0663471221923828,grad_norm: 0.7310197300092306, iteration: 418047
loss: 1.1534706354141235,grad_norm: 0.9042343302250816, iteration: 418048
loss: 1.0091698169708252,grad_norm: 0.8611281932829818, iteration: 418049
loss: 1.033109426498413,grad_norm: 0.9999997427647069, iteration: 418050
loss: 1.0150269269943237,grad_norm: 0.8857681435359209, iteration: 418051
loss: 1.0413501262664795,grad_norm: 0.873938581185464, iteration: 418052
loss: 1.112017273902893,grad_norm: 0.9999997095067263, iteration: 418053
loss: 1.0741674900054932,grad_norm: 0.9999990026471566, iteration: 418054
loss: 1.1988633871078491,grad_norm: 0.9999999857047309, iteration: 418055
loss: 0.9834696054458618,grad_norm: 0.7978686493710802, iteration: 418056
loss: 1.079453945159912,grad_norm: 1.0000000179523243, iteration: 418057
loss: 1.109776496887207,grad_norm: 0.9999990467770121, iteration: 418058
loss: 0.9985454082489014,grad_norm: 0.895898658105393, iteration: 418059
loss: 1.072989821434021,grad_norm: 0.9999996198250718, iteration: 418060
loss: 1.023888111114502,grad_norm: 0.9999999400847845, iteration: 418061
loss: 1.1291966438293457,grad_norm: 0.9999998819055346, iteration: 418062
loss: 1.059146761894226,grad_norm: 0.9999999565933216, iteration: 418063
loss: 1.0896601676940918,grad_norm: 0.9999994732195736, iteration: 418064
loss: 1.0174903869628906,grad_norm: 0.9999996894686146, iteration: 418065
loss: 1.0320816040039062,grad_norm: 0.9999999723472176, iteration: 418066
loss: 1.1030821800231934,grad_norm: 0.99999942410085, iteration: 418067
loss: 0.993312418460846,grad_norm: 0.999999838610098, iteration: 418068
loss: 1.0536965131759644,grad_norm: 0.8150283874378428, iteration: 418069
loss: 1.0470006465911865,grad_norm: 0.9999994929733388, iteration: 418070
loss: 1.1162257194519043,grad_norm: 0.9999999258717392, iteration: 418071
loss: 0.9980663061141968,grad_norm: 0.9999993685999287, iteration: 418072
loss: 1.131816029548645,grad_norm: 0.9999996363676802, iteration: 418073
loss: 1.0779436826705933,grad_norm: 0.9999997118433117, iteration: 418074
loss: 1.0198781490325928,grad_norm: 0.8182917672284823, iteration: 418075
loss: 1.0694321393966675,grad_norm: 0.7913676451172414, iteration: 418076
loss: 1.095432162284851,grad_norm: 0.9999997348051792, iteration: 418077
loss: 1.128685474395752,grad_norm: 0.9999998604642962, iteration: 418078
loss: 1.1344696283340454,grad_norm: 0.9999995324457381, iteration: 418079
loss: 1.156875729560852,grad_norm: 0.9999998271901164, iteration: 418080
loss: 1.0431660413742065,grad_norm: 1.0000000169422836, iteration: 418081
loss: 1.1259742975234985,grad_norm: 0.9999998419174317, iteration: 418082
loss: 1.1329402923583984,grad_norm: 0.9999999190688879, iteration: 418083
loss: 1.0811952352523804,grad_norm: 0.9999992829306323, iteration: 418084
loss: 0.9902330636978149,grad_norm: 0.9999993128146762, iteration: 418085
loss: 1.0437090396881104,grad_norm: 0.9999998470455611, iteration: 418086
loss: 1.2321875095367432,grad_norm: 0.9999998861169157, iteration: 418087
loss: 0.9939714074134827,grad_norm: 0.8149162539442704, iteration: 418088
loss: 0.9821851253509521,grad_norm: 0.8518385471317887, iteration: 418089
loss: 0.9965901374816895,grad_norm: 0.9999995454830533, iteration: 418090
loss: 1.1187573671340942,grad_norm: 0.9999999080788554, iteration: 418091
loss: 0.977092444896698,grad_norm: 0.7705720329513291, iteration: 418092
loss: 1.0046420097351074,grad_norm: 0.9999993310823718, iteration: 418093
loss: 1.2055190801620483,grad_norm: 0.999999850991161, iteration: 418094
loss: 1.2149076461791992,grad_norm: 0.9999991666374622, iteration: 418095
loss: 1.061193823814392,grad_norm: 0.8290168497804633, iteration: 418096
loss: 0.9897404313087463,grad_norm: 0.7454982552246545, iteration: 418097
loss: 1.0118210315704346,grad_norm: 0.999999131730827, iteration: 418098
loss: 1.091279149055481,grad_norm: 0.9863981151721587, iteration: 418099
loss: 1.0546114444732666,grad_norm: 0.9999995584097104, iteration: 418100
loss: 1.3286339044570923,grad_norm: 0.999999289367431, iteration: 418101
loss: 1.0817558765411377,grad_norm: 0.7984300956011395, iteration: 418102
loss: 1.0615122318267822,grad_norm: 0.9999999773224831, iteration: 418103
loss: 0.988528311252594,grad_norm: 0.9999997648092451, iteration: 418104
loss: 1.025832176208496,grad_norm: 0.9350910521102109, iteration: 418105
loss: 1.1061989068984985,grad_norm: 0.9999992973177779, iteration: 418106
loss: 1.0740631818771362,grad_norm: 0.9999992040571303, iteration: 418107
loss: 1.106924057006836,grad_norm: 0.9999998900126781, iteration: 418108
loss: 1.0182912349700928,grad_norm: 0.9999990319398969, iteration: 418109
loss: 1.0256714820861816,grad_norm: 0.9999990138359259, iteration: 418110
loss: 1.04618239402771,grad_norm: 0.8196307516375461, iteration: 418111
loss: 1.0205721855163574,grad_norm: 0.9999998163398641, iteration: 418112
loss: 1.0338455438613892,grad_norm: 0.999999176668014, iteration: 418113
loss: 1.0680571794509888,grad_norm: 0.9999998306384936, iteration: 418114
loss: 1.0370798110961914,grad_norm: 0.9999999796039805, iteration: 418115
loss: 1.0474554300308228,grad_norm: 0.9999996702949323, iteration: 418116
loss: 1.0817879438400269,grad_norm: 0.9999997252882922, iteration: 418117
loss: 1.1460853815078735,grad_norm: 0.9999996011783745, iteration: 418118
loss: 1.1335604190826416,grad_norm: 0.9999994688511864, iteration: 418119
loss: 0.9811172485351562,grad_norm: 0.9999999022435291, iteration: 418120
loss: 1.1198539733886719,grad_norm: 0.9999997529426214, iteration: 418121
loss: 1.1447128057479858,grad_norm: 0.9999996944328188, iteration: 418122
loss: 1.1941256523132324,grad_norm: 0.999999400401804, iteration: 418123
loss: 1.0253994464874268,grad_norm: 0.9999997798043669, iteration: 418124
loss: 1.0938212871551514,grad_norm: 0.9999995377836198, iteration: 418125
loss: 1.041108250617981,grad_norm: 0.9999997864186502, iteration: 418126
loss: 1.062006950378418,grad_norm: 0.9999990852313668, iteration: 418127
loss: 1.0804463624954224,grad_norm: 0.9999991114202106, iteration: 418128
loss: 1.0474365949630737,grad_norm: 0.8927438752290643, iteration: 418129
loss: 1.0294840335845947,grad_norm: 0.9999997629413362, iteration: 418130
loss: 1.0561416149139404,grad_norm: 0.8353334553635859, iteration: 418131
loss: 1.0691171884536743,grad_norm: 0.9679419627883454, iteration: 418132
loss: 1.0326924324035645,grad_norm: 0.9999992663099823, iteration: 418133
loss: 1.067725419998169,grad_norm: 0.9999991556204046, iteration: 418134
loss: 1.0327461957931519,grad_norm: 0.9999991528669516, iteration: 418135
loss: 1.0219496488571167,grad_norm: 0.9999991288780683, iteration: 418136
loss: 0.9918870329856873,grad_norm: 0.9155756627525485, iteration: 418137
loss: 1.0917540788650513,grad_norm: 0.9999997658564042, iteration: 418138
loss: 1.0789397954940796,grad_norm: 0.9999999272599406, iteration: 418139
loss: 0.991641640663147,grad_norm: 0.8789581365953877, iteration: 418140
loss: 1.1275454759597778,grad_norm: 0.9999998774618998, iteration: 418141
loss: 1.1630922555923462,grad_norm: 0.999999113286526, iteration: 418142
loss: 0.9876202940940857,grad_norm: 0.7566305568544119, iteration: 418143
loss: 1.0051426887512207,grad_norm: 0.9999991260567795, iteration: 418144
loss: 1.1956963539123535,grad_norm: 0.9999998558004397, iteration: 418145
loss: 1.042101263999939,grad_norm: 0.9999994201234189, iteration: 418146
loss: 1.130731225013733,grad_norm: 0.9999998039809779, iteration: 418147
loss: 1.1313199996948242,grad_norm: 0.999999883819448, iteration: 418148
loss: 1.0012359619140625,grad_norm: 0.9999999255112132, iteration: 418149
loss: 1.0224626064300537,grad_norm: 0.8993209989342706, iteration: 418150
loss: 1.1505112648010254,grad_norm: 0.9999999425470258, iteration: 418151
loss: 1.0442960262298584,grad_norm: 0.9999995952676822, iteration: 418152
loss: 1.1253635883331299,grad_norm: 0.9999994782186028, iteration: 418153
loss: 1.0436761379241943,grad_norm: 0.7888703072986257, iteration: 418154
loss: 1.0570074319839478,grad_norm: 0.9999997150719224, iteration: 418155
loss: 1.2525464296340942,grad_norm: 0.9999999113518852, iteration: 418156
loss: 1.0658892393112183,grad_norm: 0.9999996301647067, iteration: 418157
loss: 1.169158935546875,grad_norm: 0.9999996049357616, iteration: 418158
loss: 1.073508381843567,grad_norm: 0.9999990480282062, iteration: 418159
loss: 1.244646668434143,grad_norm: 1.0000000012122139, iteration: 418160
loss: 1.0027295351028442,grad_norm: 0.9395501044808633, iteration: 418161
loss: 1.1103274822235107,grad_norm: 0.9999999126379834, iteration: 418162
loss: 1.1277849674224854,grad_norm: 0.9999994227045125, iteration: 418163
loss: 1.0204721689224243,grad_norm: 0.9999990457779114, iteration: 418164
loss: 1.0232213735580444,grad_norm: 0.9999996783377406, iteration: 418165
loss: 1.0295432806015015,grad_norm: 0.9999994789791101, iteration: 418166
loss: 1.2309266328811646,grad_norm: 0.9999997802768701, iteration: 418167
loss: 1.0617700815200806,grad_norm: 0.9999996977367566, iteration: 418168
loss: 1.0882720947265625,grad_norm: 0.9999999821493705, iteration: 418169
loss: 1.132678747177124,grad_norm: 0.9999992909652571, iteration: 418170
loss: 1.1311123371124268,grad_norm: 0.9999998850656, iteration: 418171
loss: 1.0214462280273438,grad_norm: 0.9999995790427055, iteration: 418172
loss: 1.0832761526107788,grad_norm: 0.9999999964566919, iteration: 418173
loss: 1.1031078100204468,grad_norm: 1.000000097602458, iteration: 418174
loss: 1.0153799057006836,grad_norm: 0.8234949138713453, iteration: 418175
loss: 1.0425082445144653,grad_norm: 0.9999995546376408, iteration: 418176
loss: 1.1262599229812622,grad_norm: 0.999999852789945, iteration: 418177
loss: 1.0621567964553833,grad_norm: 0.9999997773217747, iteration: 418178
loss: 1.0577665567398071,grad_norm: 0.9999991888953683, iteration: 418179
loss: 1.140450119972229,grad_norm: 0.9999996373763924, iteration: 418180
loss: 1.026847243309021,grad_norm: 0.9999993084262955, iteration: 418181
loss: 1.0648788213729858,grad_norm: 0.9917289270228342, iteration: 418182
loss: 1.0523428916931152,grad_norm: 0.9999997188911789, iteration: 418183
loss: 1.0270825624465942,grad_norm: 0.9999998526592271, iteration: 418184
loss: 1.2090986967086792,grad_norm: 0.9999996308035564, iteration: 418185
loss: 1.1704708337783813,grad_norm: 0.9999992885156285, iteration: 418186
loss: 1.0542744398117065,grad_norm: 0.852084878264501, iteration: 418187
loss: 1.1337201595306396,grad_norm: 0.9999997616939826, iteration: 418188
loss: 1.0480860471725464,grad_norm: 0.8947695979196457, iteration: 418189
loss: 1.2610288858413696,grad_norm: 0.999999780078754, iteration: 418190
loss: 1.1414471864700317,grad_norm: 0.999999937502463, iteration: 418191
loss: 1.0817161798477173,grad_norm: 0.9999998485528255, iteration: 418192
loss: 1.0868080854415894,grad_norm: 0.9999994074084034, iteration: 418193
loss: 1.0016319751739502,grad_norm: 0.8760616596055327, iteration: 418194
loss: 0.9865097403526306,grad_norm: 0.7498501897029922, iteration: 418195
loss: 1.1244994401931763,grad_norm: 0.9999996704894374, iteration: 418196
loss: 0.9831652641296387,grad_norm: 0.7458836320100345, iteration: 418197
loss: 1.0822128057479858,grad_norm: 0.9999999058991681, iteration: 418198
loss: 1.0366990566253662,grad_norm: 0.8604352818889011, iteration: 418199
loss: 1.0149815082550049,grad_norm: 0.9999996478792466, iteration: 418200
loss: 1.0523699522018433,grad_norm: 0.9999994881643184, iteration: 418201
loss: 1.0600999593734741,grad_norm: 0.9999998826084792, iteration: 418202
loss: 1.123424768447876,grad_norm: 0.9999998352589028, iteration: 418203
loss: 1.0680732727050781,grad_norm: 0.9307329056625236, iteration: 418204
loss: 1.0372400283813477,grad_norm: 0.9999997010246838, iteration: 418205
loss: 1.100675344467163,grad_norm: 0.99999968348367, iteration: 418206
loss: 1.0818287134170532,grad_norm: 0.9999994281451281, iteration: 418207
loss: 1.0769749879837036,grad_norm: 0.9999993723788281, iteration: 418208
loss: 1.035027027130127,grad_norm: 0.999999143979434, iteration: 418209
loss: 1.0113836526870728,grad_norm: 0.7439206909472524, iteration: 418210
loss: 1.1163225173950195,grad_norm: 0.999999442053715, iteration: 418211
loss: 1.0295859575271606,grad_norm: 0.9999995045194374, iteration: 418212
loss: 1.0379189252853394,grad_norm: 0.9485273264446338, iteration: 418213
loss: 1.097836971282959,grad_norm: 0.9999997320847362, iteration: 418214
loss: 1.083124041557312,grad_norm: 0.9999999497741543, iteration: 418215
loss: 1.0699716806411743,grad_norm: 0.9999998663161355, iteration: 418216
loss: 1.0453437566757202,grad_norm: 0.9999997466020473, iteration: 418217
loss: 1.0074130296707153,grad_norm: 0.9441596976542843, iteration: 418218
loss: 1.0298378467559814,grad_norm: 0.9750444955423321, iteration: 418219
loss: 1.0539222955703735,grad_norm: 0.9999999885037952, iteration: 418220
loss: 1.038780689239502,grad_norm: 0.999999945446952, iteration: 418221
loss: 1.0763781070709229,grad_norm: 0.999999440357332, iteration: 418222
loss: 1.199556827545166,grad_norm: 0.9319041988699185, iteration: 418223
loss: 1.004105806350708,grad_norm: 0.7122751252214752, iteration: 418224
loss: 1.0504635572433472,grad_norm: 0.9999998233409624, iteration: 418225
loss: 1.0146243572235107,grad_norm: 0.945283690279283, iteration: 418226
loss: 1.0545644760131836,grad_norm: 0.8854517754673646, iteration: 418227
loss: 1.0466692447662354,grad_norm: 0.75243982428059, iteration: 418228
loss: 1.0002858638763428,grad_norm: 0.7822914118208492, iteration: 418229
loss: 1.0221266746520996,grad_norm: 0.8964845115111207, iteration: 418230
loss: 1.0510530471801758,grad_norm: 0.99999958387171, iteration: 418231
loss: 1.027540683746338,grad_norm: 0.9999991697422201, iteration: 418232
loss: 1.292709231376648,grad_norm: 0.9999998884858575, iteration: 418233
loss: 0.9878047704696655,grad_norm: 0.9367000381541692, iteration: 418234
loss: 1.0069643259048462,grad_norm: 0.6060677373516536, iteration: 418235
loss: 1.04285728931427,grad_norm: 0.9999998488005846, iteration: 418236
loss: 1.0038025379180908,grad_norm: 0.9999991881127923, iteration: 418237
loss: 0.9936206936836243,grad_norm: 0.7480654162426849, iteration: 418238
loss: 1.0433590412139893,grad_norm: 0.9999992288238378, iteration: 418239
loss: 1.0032202005386353,grad_norm: 0.8083558202484933, iteration: 418240
loss: 1.0639311075210571,grad_norm: 0.9999993443522884, iteration: 418241
loss: 1.1240757703781128,grad_norm: 0.9523812852300542, iteration: 418242
loss: 1.092373251914978,grad_norm: 0.99999988184902, iteration: 418243
loss: 1.0044933557510376,grad_norm: 0.951013646735137, iteration: 418244
loss: 1.0430430173873901,grad_norm: 0.9999996590918685, iteration: 418245
loss: 0.9756553173065186,grad_norm: 0.8782637777026703, iteration: 418246
loss: 1.0455094575881958,grad_norm: 0.9718845437313428, iteration: 418247
loss: 0.9866752028465271,grad_norm: 0.7423154025871442, iteration: 418248
loss: 1.0129480361938477,grad_norm: 0.8698205753132253, iteration: 418249
loss: 1.105062484741211,grad_norm: 0.8600228498535528, iteration: 418250
loss: 1.0769832134246826,grad_norm: 0.9999995841589044, iteration: 418251
loss: 1.0188974142074585,grad_norm: 0.999999197657326, iteration: 418252
loss: 1.0327340364456177,grad_norm: 0.9999991062523397, iteration: 418253
loss: 1.0805425643920898,grad_norm: 0.9999999526621537, iteration: 418254
loss: 1.0286649465560913,grad_norm: 0.9999996924066729, iteration: 418255
loss: 0.9965209364891052,grad_norm: 0.8498073533668915, iteration: 418256
loss: 0.9987229108810425,grad_norm: 0.9999997296571627, iteration: 418257
loss: 1.0326957702636719,grad_norm: 0.9999991559741385, iteration: 418258
loss: 0.9899603724479675,grad_norm: 0.7724038652345407, iteration: 418259
loss: 1.006394624710083,grad_norm: 0.7072633982387033, iteration: 418260
loss: 1.0735068321228027,grad_norm: 0.9999998813067983, iteration: 418261
loss: 1.044458031654358,grad_norm: 0.9999993136658856, iteration: 418262
loss: 1.0227607488632202,grad_norm: 0.9331242438502276, iteration: 418263
loss: 1.0070891380310059,grad_norm: 0.8886116495757044, iteration: 418264
loss: 1.1211769580841064,grad_norm: 0.9824031645285837, iteration: 418265
loss: 1.0636661052703857,grad_norm: 0.9999991020473903, iteration: 418266
loss: 1.1022087335586548,grad_norm: 0.9999994171818595, iteration: 418267
loss: 0.9987871050834656,grad_norm: 1.000000013412055, iteration: 418268
loss: 0.9818904995918274,grad_norm: 0.999999427059697, iteration: 418269
loss: 1.103654384613037,grad_norm: 0.9999995667002567, iteration: 418270
loss: 1.1096885204315186,grad_norm: 0.8723301876007958, iteration: 418271
loss: 1.047507882118225,grad_norm: 1.0000000348922833, iteration: 418272
loss: 1.030629277229309,grad_norm: 0.9999990621936504, iteration: 418273
loss: 1.0818828344345093,grad_norm: 0.9999998800501146, iteration: 418274
loss: 1.1031739711761475,grad_norm: 0.9999998322855574, iteration: 418275
loss: 1.0635435581207275,grad_norm: 0.7975162053634435, iteration: 418276
loss: 1.0065258741378784,grad_norm: 0.9999994256001438, iteration: 418277
loss: 0.996739387512207,grad_norm: 0.9999998133650435, iteration: 418278
loss: 1.0264662504196167,grad_norm: 0.8718856441855891, iteration: 418279
loss: 1.0937130451202393,grad_norm: 1.0000000956552604, iteration: 418280
loss: 1.0018452405929565,grad_norm: 0.7125286702146504, iteration: 418281
loss: 1.006534457206726,grad_norm: 0.9999995943448958, iteration: 418282
loss: 1.071068286895752,grad_norm: 0.9999994289852687, iteration: 418283
loss: 1.314628005027771,grad_norm: 0.9999997450462438, iteration: 418284
loss: 1.0314353704452515,grad_norm: 0.7151325459078502, iteration: 418285
loss: 1.0113587379455566,grad_norm: 0.9999999845484805, iteration: 418286
loss: 1.1782021522521973,grad_norm: 0.9999989980672366, iteration: 418287
loss: 1.1266051530838013,grad_norm: 0.9999992403764358, iteration: 418288
loss: 1.012105941772461,grad_norm: 0.8337925421221501, iteration: 418289
loss: 1.0872679948806763,grad_norm: 0.8280849400122974, iteration: 418290
loss: 1.0618339776992798,grad_norm: 0.9999994177016305, iteration: 418291
loss: 1.0256233215332031,grad_norm: 0.999999905729504, iteration: 418292
loss: 1.141558289527893,grad_norm: 0.9999997054844743, iteration: 418293
loss: 1.0471121072769165,grad_norm: 0.7017532892279904, iteration: 418294
loss: 1.1805979013442993,grad_norm: 0.9999991993985146, iteration: 418295
loss: 1.0297740697860718,grad_norm: 0.7960261675430431, iteration: 418296
loss: 1.0555380582809448,grad_norm: 0.9999990135315951, iteration: 418297
loss: 1.0327481031417847,grad_norm: 0.9999991801070702, iteration: 418298
loss: 1.0322189331054688,grad_norm: 0.8765061271876072, iteration: 418299
loss: 1.0162992477416992,grad_norm: 0.9999992092736805, iteration: 418300
loss: 1.0225765705108643,grad_norm: 0.908116523425533, iteration: 418301
loss: 0.9649702310562134,grad_norm: 0.7294368375141611, iteration: 418302
loss: 1.0354344844818115,grad_norm: 0.9295074995705416, iteration: 418303
loss: 1.0537289381027222,grad_norm: 0.8912175904746622, iteration: 418304
loss: 1.0054484605789185,grad_norm: 0.8170910725639708, iteration: 418305
loss: 1.0375310182571411,grad_norm: 0.9999999140098671, iteration: 418306
loss: 0.9944292306900024,grad_norm: 0.8467969109988291, iteration: 418307
loss: 1.0969101190567017,grad_norm: 0.9999998341233232, iteration: 418308
loss: 1.0366671085357666,grad_norm: 0.9527985782386097, iteration: 418309
loss: 1.0231029987335205,grad_norm: 0.7599717838538327, iteration: 418310
loss: 0.9728644490242004,grad_norm: 0.7564784416767908, iteration: 418311
loss: 1.0084177255630493,grad_norm: 0.999999291980909, iteration: 418312
loss: 1.0192004442214966,grad_norm: 0.9999997553845092, iteration: 418313
loss: 1.0102680921554565,grad_norm: 0.9999991018968677, iteration: 418314
loss: 1.0089490413665771,grad_norm: 0.9999997761439847, iteration: 418315
loss: 1.0210304260253906,grad_norm: 0.9260705239495753, iteration: 418316
loss: 1.003767967224121,grad_norm: 0.7192765587977031, iteration: 418317
loss: 1.028765082359314,grad_norm: 1.0000000596222725, iteration: 418318
loss: 0.9868066310882568,grad_norm: 0.6975381521398846, iteration: 418319
loss: 1.0297521352767944,grad_norm: 0.8369325696180897, iteration: 418320
loss: 1.0547477006912231,grad_norm: 0.9628948947044133, iteration: 418321
loss: 0.9917166829109192,grad_norm: 0.999999565034115, iteration: 418322
loss: 1.0174301862716675,grad_norm: 0.9999990197306057, iteration: 418323
loss: 1.0122653245925903,grad_norm: 0.8216417401523757, iteration: 418324
loss: 1.0153732299804688,grad_norm: 0.9999992681412303, iteration: 418325
loss: 1.0173838138580322,grad_norm: 0.9999993513024023, iteration: 418326
loss: 0.9908244609832764,grad_norm: 0.8844351482155419, iteration: 418327
loss: 1.0051510334014893,grad_norm: 0.9961111830012555, iteration: 418328
loss: 0.9686692357063293,grad_norm: 0.7623560388200217, iteration: 418329
loss: 1.0341359376907349,grad_norm: 0.9495754270885205, iteration: 418330
loss: 1.0168492794036865,grad_norm: 0.7997530979327682, iteration: 418331
loss: 0.9797999858856201,grad_norm: 0.8492055454064998, iteration: 418332
loss: 0.9917223453521729,grad_norm: 0.7457731683818002, iteration: 418333
loss: 1.0064631700515747,grad_norm: 0.9999990870881318, iteration: 418334
loss: 0.9913004636764526,grad_norm: 0.7748173063030263, iteration: 418335
loss: 1.0353344678878784,grad_norm: 0.8490009434901717, iteration: 418336
loss: 0.9983887672424316,grad_norm: 0.9999992806441867, iteration: 418337
loss: 0.9874793291091919,grad_norm: 0.8122709744032584, iteration: 418338
loss: 1.012612223625183,grad_norm: 0.7122768416035113, iteration: 418339
loss: 0.9791668653488159,grad_norm: 0.9999991056174292, iteration: 418340
loss: 1.1339843273162842,grad_norm: 0.9999997816989135, iteration: 418341
loss: 1.0247730016708374,grad_norm: 0.753056261364591, iteration: 418342
loss: 0.990123450756073,grad_norm: 0.9777843957789933, iteration: 418343
loss: 1.021994709968567,grad_norm: 0.7283933880526159, iteration: 418344
loss: 1.008323311805725,grad_norm: 0.7787365865297604, iteration: 418345
loss: 1.0163949728012085,grad_norm: 0.8494311130868167, iteration: 418346
loss: 1.0025489330291748,grad_norm: 0.7389654738738849, iteration: 418347
loss: 0.9893268942832947,grad_norm: 0.9999996149432874, iteration: 418348
loss: 1.0116831064224243,grad_norm: 0.9317919469890964, iteration: 418349
loss: 1.000569224357605,grad_norm: 0.8210099078794626, iteration: 418350
loss: 0.9748077392578125,grad_norm: 0.7833078381705323, iteration: 418351
loss: 0.9951925873756409,grad_norm: 0.8423087586963164, iteration: 418352
loss: 1.0335444211959839,grad_norm: 0.6862476747798855, iteration: 418353
loss: 0.9910308122634888,grad_norm: 0.7452612464385953, iteration: 418354
loss: 1.1682355403900146,grad_norm: 0.9999991921536797, iteration: 418355
loss: 1.0141277313232422,grad_norm: 0.7893284525016131, iteration: 418356
loss: 0.9948900938034058,grad_norm: 0.7969728277551843, iteration: 418357
loss: 1.0215692520141602,grad_norm: 0.9088767168564883, iteration: 418358
loss: 0.9947559237480164,grad_norm: 0.8537014022624826, iteration: 418359
loss: 1.0499924421310425,grad_norm: 0.7714963839516445, iteration: 418360
loss: 0.9986213445663452,grad_norm: 0.9999995585201504, iteration: 418361
loss: 1.0430312156677246,grad_norm: 0.9999996807969797, iteration: 418362
loss: 0.994204580783844,grad_norm: 0.7229764574923645, iteration: 418363
loss: 1.060746192932129,grad_norm: 0.99999989515599, iteration: 418364
loss: 1.0112502574920654,grad_norm: 0.8088234268804323, iteration: 418365
loss: 1.0286686420440674,grad_norm: 0.999999806855983, iteration: 418366
loss: 1.0004655122756958,grad_norm: 0.8892765740038309, iteration: 418367
loss: 1.0190716981887817,grad_norm: 0.7029691103735204, iteration: 418368
loss: 0.9590385556221008,grad_norm: 0.6936138857259622, iteration: 418369
loss: 1.0226786136627197,grad_norm: 0.7776346802806613, iteration: 418370
loss: 1.2476880550384521,grad_norm: 0.9999998199217464, iteration: 418371
loss: 1.1003607511520386,grad_norm: 0.9999994196985751, iteration: 418372
loss: 1.0029271841049194,grad_norm: 0.9999990739794383, iteration: 418373
loss: 1.0359805822372437,grad_norm: 0.9999993431782308, iteration: 418374
loss: 1.0677173137664795,grad_norm: 0.9999998632133716, iteration: 418375
loss: 1.0731397867202759,grad_norm: 0.9999990573450174, iteration: 418376
loss: 1.0222569704055786,grad_norm: 0.6459522045438003, iteration: 418377
loss: 0.9877321124076843,grad_norm: 0.9187351208246947, iteration: 418378
loss: 0.9866348505020142,grad_norm: 0.746456293295341, iteration: 418379
loss: 1.0117006301879883,grad_norm: 0.8099085352257351, iteration: 418380
loss: 1.0636366605758667,grad_norm: 0.9999995342993108, iteration: 418381
loss: 1.0125983953475952,grad_norm: 0.9336821588989828, iteration: 418382
loss: 1.2296515703201294,grad_norm: 0.9999998604467922, iteration: 418383
loss: 1.000401258468628,grad_norm: 0.9219610607462004, iteration: 418384
loss: 0.992149293422699,grad_norm: 0.9999993976428233, iteration: 418385
loss: 0.9782519340515137,grad_norm: 0.8747510505234235, iteration: 418386
loss: 0.9950624704360962,grad_norm: 0.7160196470817172, iteration: 418387
loss: 1.080179214477539,grad_norm: 0.9999993127594868, iteration: 418388
loss: 1.0200655460357666,grad_norm: 0.6877087542104601, iteration: 418389
loss: 1.0231660604476929,grad_norm: 0.6819850571604877, iteration: 418390
loss: 0.9847434759140015,grad_norm: 0.9999994425975496, iteration: 418391
loss: 1.0181881189346313,grad_norm: 0.9999991695695538, iteration: 418392
loss: 0.9699205756187439,grad_norm: 0.8286643315403437, iteration: 418393
loss: 1.025675654411316,grad_norm: 0.9615773892297355, iteration: 418394
loss: 1.028573989868164,grad_norm: 0.7567190911608719, iteration: 418395
loss: 0.9988600015640259,grad_norm: 0.6475074244327332, iteration: 418396
loss: 0.9985807538032532,grad_norm: 0.7561902516166196, iteration: 418397
loss: 0.990470826625824,grad_norm: 0.7141766124237341, iteration: 418398
loss: 1.0762250423431396,grad_norm: 0.6338914776724969, iteration: 418399
loss: 0.9987257122993469,grad_norm: 0.7452316118657702, iteration: 418400
loss: 0.9748948812484741,grad_norm: 0.8156114789675717, iteration: 418401
loss: 1.0208715200424194,grad_norm: 0.8663586664653857, iteration: 418402
loss: 1.0035878419876099,grad_norm: 0.9999995340275359, iteration: 418403
loss: 0.9812890887260437,grad_norm: 0.843893860248241, iteration: 418404
loss: 1.0064574480056763,grad_norm: 0.709397357231084, iteration: 418405
loss: 1.014014482498169,grad_norm: 0.8392634016390518, iteration: 418406
loss: 1.021069884300232,grad_norm: 0.9999995216099828, iteration: 418407
loss: 1.0154085159301758,grad_norm: 0.9999990699113769, iteration: 418408
loss: 1.023374080657959,grad_norm: 0.999999199834441, iteration: 418409
loss: 0.9706096053123474,grad_norm: 0.7729890600475909, iteration: 418410
loss: 1.0227760076522827,grad_norm: 0.6644571333632315, iteration: 418411
loss: 0.9757722020149231,grad_norm: 0.7673248531263425, iteration: 418412
loss: 1.0124613046646118,grad_norm: 0.7577224613830484, iteration: 418413
loss: 1.0240708589553833,grad_norm: 0.8592089981301664, iteration: 418414
loss: 1.020082712173462,grad_norm: 0.8148384052398416, iteration: 418415
loss: 0.990450918674469,grad_norm: 0.9999991537388991, iteration: 418416
loss: 1.0025172233581543,grad_norm: 0.8343942480705604, iteration: 418417
loss: 1.0570663213729858,grad_norm: 0.9999991592170852, iteration: 418418
loss: 1.0197842121124268,grad_norm: 0.9247771286466497, iteration: 418419
loss: 1.0074220895767212,grad_norm: 0.7778928563352879, iteration: 418420
loss: 1.0231010913848877,grad_norm: 0.8884115499459944, iteration: 418421
loss: 1.014135718345642,grad_norm: 0.9999990884298485, iteration: 418422
loss: 0.9895124435424805,grad_norm: 0.8623872731191932, iteration: 418423
loss: 1.003081202507019,grad_norm: 0.7982051673383024, iteration: 418424
loss: 0.9969354867935181,grad_norm: 0.8090392113952309, iteration: 418425
loss: 0.9904205203056335,grad_norm: 0.9999994968298597, iteration: 418426
loss: 1.0019378662109375,grad_norm: 0.72612672659995, iteration: 418427
loss: 0.9889833927154541,grad_norm: 0.8212697574142223, iteration: 418428
loss: 1.0001518726348877,grad_norm: 0.6072556863357491, iteration: 418429
loss: 0.9764066934585571,grad_norm: 0.8875342724898917, iteration: 418430
loss: 1.0308915376663208,grad_norm: 0.8582723337766872, iteration: 418431
loss: 0.9795697927474976,grad_norm: 0.9646955795763547, iteration: 418432
loss: 1.0274083614349365,grad_norm: 0.9233285184284655, iteration: 418433
loss: 1.0295401811599731,grad_norm: 0.8921415684588905, iteration: 418434
loss: 1.00701105594635,grad_norm: 0.7122098287882574, iteration: 418435
loss: 1.023971438407898,grad_norm: 0.8380471452420407, iteration: 418436
loss: 0.9903436303138733,grad_norm: 0.7234237989726893, iteration: 418437
loss: 1.0253080129623413,grad_norm: 0.9999999011341308, iteration: 418438
loss: 1.1140621900558472,grad_norm: 0.9999993542539785, iteration: 418439
loss: 1.015501856803894,grad_norm: 0.854070541699121, iteration: 418440
loss: 1.021645426750183,grad_norm: 0.9999997454578097, iteration: 418441
loss: 1.0075726509094238,grad_norm: 0.6845015940089587, iteration: 418442
loss: 1.0154142379760742,grad_norm: 0.9376665507507171, iteration: 418443
loss: 0.9658911228179932,grad_norm: 0.8658185122396416, iteration: 418444
loss: 1.0093108415603638,grad_norm: 0.6508424508856783, iteration: 418445
loss: 0.9738664627075195,grad_norm: 0.8747503652308406, iteration: 418446
loss: 1.0036159753799438,grad_norm: 0.8474090891464847, iteration: 418447
loss: 0.9805454015731812,grad_norm: 0.8325436883600218, iteration: 418448
loss: 1.03661048412323,grad_norm: 0.9999990347901744, iteration: 418449
loss: 1.0449782609939575,grad_norm: 0.7817244811159123, iteration: 418450
loss: 1.0135812759399414,grad_norm: 0.9999995815457241, iteration: 418451
loss: 1.0277431011199951,grad_norm: 0.9999990186896899, iteration: 418452
loss: 1.0372346639633179,grad_norm: 0.9999997926414155, iteration: 418453
loss: 1.0320295095443726,grad_norm: 0.9999997567642293, iteration: 418454
loss: 1.0218027830123901,grad_norm: 0.8375013191042766, iteration: 418455
loss: 1.0045658349990845,grad_norm: 0.7848740813889004, iteration: 418456
loss: 1.0379011631011963,grad_norm: 0.7309841513730759, iteration: 418457
loss: 1.0165475606918335,grad_norm: 0.752367923797268, iteration: 418458
loss: 1.0302263498306274,grad_norm: 0.9843542215067596, iteration: 418459
loss: 0.9847419261932373,grad_norm: 0.9999991013301379, iteration: 418460
loss: 1.0116353034973145,grad_norm: 0.9999991473900725, iteration: 418461
loss: 0.978158175945282,grad_norm: 0.8477637663889858, iteration: 418462
loss: 1.0651861429214478,grad_norm: 0.9999995759247977, iteration: 418463
loss: 0.980479896068573,grad_norm: 0.7328008503996597, iteration: 418464
loss: 1.0092685222625732,grad_norm: 0.8526279876161308, iteration: 418465
loss: 0.9919173717498779,grad_norm: 0.8971454780009301, iteration: 418466
loss: 1.0213874578475952,grad_norm: 0.9430728206919634, iteration: 418467
loss: 1.1036405563354492,grad_norm: 0.9999998773971064, iteration: 418468
loss: 1.028394341468811,grad_norm: 0.9999998374646116, iteration: 418469
loss: 0.9926010966300964,grad_norm: 0.8446814928943309, iteration: 418470
loss: 0.9470876455307007,grad_norm: 0.674784095428963, iteration: 418471
loss: 1.0616340637207031,grad_norm: 0.6373963927792357, iteration: 418472
loss: 1.0097908973693848,grad_norm: 0.6046100358040306, iteration: 418473
loss: 0.9987328052520752,grad_norm: 0.8094437353011681, iteration: 418474
loss: 0.9424602389335632,grad_norm: 0.9089219404834632, iteration: 418475
loss: 0.9990907311439514,grad_norm: 0.8537625421978182, iteration: 418476
loss: 1.0181764364242554,grad_norm: 0.9192097849949689, iteration: 418477
loss: 0.9643083214759827,grad_norm: 0.802755179966428, iteration: 418478
loss: 1.0235611200332642,grad_norm: 0.9999992162851254, iteration: 418479
loss: 1.0014671087265015,grad_norm: 0.756396887352208, iteration: 418480
loss: 0.9751567840576172,grad_norm: 0.8645551004215307, iteration: 418481
loss: 0.9958252310752869,grad_norm: 0.7522329320150846, iteration: 418482
loss: 1.054993987083435,grad_norm: 0.9999999154698768, iteration: 418483
loss: 1.063814640045166,grad_norm: 0.9599841354317161, iteration: 418484
loss: 0.940634548664093,grad_norm: 0.6697745297122578, iteration: 418485
loss: 1.0252681970596313,grad_norm: 0.8383287274959327, iteration: 418486
loss: 1.0051827430725098,grad_norm: 0.8138991706158936, iteration: 418487
loss: 1.0259451866149902,grad_norm: 0.6818999235623271, iteration: 418488
loss: 0.9966803193092346,grad_norm: 0.8016915521542105, iteration: 418489
loss: 0.9854480028152466,grad_norm: 0.848716011296724, iteration: 418490
loss: 1.024221658706665,grad_norm: 0.8384224796617487, iteration: 418491
loss: 1.022566795349121,grad_norm: 0.9999994654726939, iteration: 418492
loss: 1.0154162645339966,grad_norm: 0.6508530295924422, iteration: 418493
loss: 1.0688997507095337,grad_norm: 0.999999920128312, iteration: 418494
loss: 0.9963533878326416,grad_norm: 0.7525878969074727, iteration: 418495
loss: 1.019299030303955,grad_norm: 0.9322505032127836, iteration: 418496
loss: 1.132469654083252,grad_norm: 0.9999994364214118, iteration: 418497
loss: 1.0152013301849365,grad_norm: 0.678167355269723, iteration: 418498
loss: 1.0217082500457764,grad_norm: 0.9999991786336045, iteration: 418499
loss: 0.9441092610359192,grad_norm: 0.8972711141828614, iteration: 418500
loss: 0.966145932674408,grad_norm: 0.9760238812038392, iteration: 418501
loss: 1.0631043910980225,grad_norm: 0.9999999130372169, iteration: 418502
loss: 1.0045664310455322,grad_norm: 0.9999992908820148, iteration: 418503
loss: 0.9957571625709534,grad_norm: 0.871766849577581, iteration: 418504
loss: 0.9952915906906128,grad_norm: 0.9999992401181159, iteration: 418505
loss: 1.0269405841827393,grad_norm: 0.9926749795431512, iteration: 418506
loss: 1.3025044202804565,grad_norm: 0.999999568359212, iteration: 418507
loss: 1.0449634790420532,grad_norm: 0.715069835082273, iteration: 418508
loss: 0.9940215945243835,grad_norm: 0.9969814679364237, iteration: 418509
loss: 1.0156164169311523,grad_norm: 0.8441226862799698, iteration: 418510
loss: 0.9673888683319092,grad_norm: 0.671071741168762, iteration: 418511
loss: 1.0286954641342163,grad_norm: 0.6355515297620408, iteration: 418512
loss: 1.066876769065857,grad_norm: 0.8595136610224675, iteration: 418513
loss: 1.0390032529830933,grad_norm: 0.7023293353477327, iteration: 418514
loss: 1.27637779712677,grad_norm: 0.9999993659413023, iteration: 418515
loss: 0.9837843179702759,grad_norm: 0.805962149072629, iteration: 418516
loss: 1.0256187915802002,grad_norm: 0.9999992337410097, iteration: 418517
loss: 0.9846318364143372,grad_norm: 0.9999991054362771, iteration: 418518
loss: 1.0017110109329224,grad_norm: 0.8248772311016431, iteration: 418519
loss: 1.0350730419158936,grad_norm: 0.9771078398704435, iteration: 418520
loss: 1.0131371021270752,grad_norm: 0.8683186467248695, iteration: 418521
loss: 1.0059508085250854,grad_norm: 0.8323928074599889, iteration: 418522
loss: 1.015179991722107,grad_norm: 0.9999998928901623, iteration: 418523
loss: 1.0624998807907104,grad_norm: 0.87592974511594, iteration: 418524
loss: 0.9638165831565857,grad_norm: 0.8595015649317702, iteration: 418525
loss: 0.9882360100746155,grad_norm: 0.7704987522857287, iteration: 418526
loss: 1.0433000326156616,grad_norm: 0.8533504640769475, iteration: 418527
loss: 1.196158766746521,grad_norm: 0.9999994101136445, iteration: 418528
loss: 0.9901193976402283,grad_norm: 0.8066608129671446, iteration: 418529
loss: 0.9939368963241577,grad_norm: 0.9999994257952339, iteration: 418530
loss: 1.0232876539230347,grad_norm: 0.8865810462056007, iteration: 418531
loss: 1.0121183395385742,grad_norm: 0.8007475269498714, iteration: 418532
loss: 1.2408668994903564,grad_norm: 0.9999993703572879, iteration: 418533
loss: 1.0401458740234375,grad_norm: 0.9999991185211847, iteration: 418534
loss: 1.0234555006027222,grad_norm: 0.7453630338997318, iteration: 418535
loss: 1.0454955101013184,grad_norm: 0.7817199629724803, iteration: 418536
loss: 1.1721813678741455,grad_norm: 0.9999991160905208, iteration: 418537
loss: 1.075709581375122,grad_norm: 0.9506043407355845, iteration: 418538
loss: 1.0295283794403076,grad_norm: 0.9999992227592412, iteration: 418539
loss: 1.026699423789978,grad_norm: 0.829169408251779, iteration: 418540
loss: 1.048067331314087,grad_norm: 0.7977134691998493, iteration: 418541
loss: 1.0004940032958984,grad_norm: 0.9263611483402523, iteration: 418542
loss: 1.0437052249908447,grad_norm: 0.7546771852531907, iteration: 418543
loss: 1.0043116807937622,grad_norm: 0.818493380002058, iteration: 418544
loss: 1.0052180290222168,grad_norm: 0.7090406458204829, iteration: 418545
loss: 1.0281754732131958,grad_norm: 0.8353386815715911, iteration: 418546
loss: 0.9739369750022888,grad_norm: 0.8470821064847621, iteration: 418547
loss: 1.231766700744629,grad_norm: 0.9999997269036708, iteration: 418548
loss: 1.0140758752822876,grad_norm: 0.5846983181232763, iteration: 418549
loss: 1.025061845779419,grad_norm: 0.9999990997124847, iteration: 418550
loss: 0.9999445080757141,grad_norm: 0.6737515552917669, iteration: 418551
loss: 1.0207862854003906,grad_norm: 0.9079098787994914, iteration: 418552
loss: 0.9790959358215332,grad_norm: 0.7535045840192319, iteration: 418553
loss: 1.0308222770690918,grad_norm: 0.7595833160770268, iteration: 418554
loss: 1.0217492580413818,grad_norm: 0.999999594964429, iteration: 418555
loss: 1.016364574432373,grad_norm: 0.9999998610098526, iteration: 418556
loss: 0.9907297492027283,grad_norm: 0.8825630108831211, iteration: 418557
loss: 1.0693881511688232,grad_norm: 0.9999992805010586, iteration: 418558
loss: 1.007922887802124,grad_norm: 0.999999210578655, iteration: 418559
loss: 1.2216129302978516,grad_norm: 0.9999995425301248, iteration: 418560
loss: 1.0193837881088257,grad_norm: 0.7523134951273972, iteration: 418561
loss: 0.9915397763252258,grad_norm: 0.808892612610233, iteration: 418562
loss: 0.9791721105575562,grad_norm: 0.7561063301371315, iteration: 418563
loss: 1.0853087902069092,grad_norm: 0.9999999451766726, iteration: 418564
loss: 1.0885294675827026,grad_norm: 0.9999999109683037, iteration: 418565
loss: 1.0121017694473267,grad_norm: 0.6104685337101555, iteration: 418566
loss: 1.0487116575241089,grad_norm: 0.9999993370080367, iteration: 418567
loss: 0.9492479562759399,grad_norm: 0.6927803393175754, iteration: 418568
loss: 0.9960041046142578,grad_norm: 0.8534149166619664, iteration: 418569
loss: 1.0110520124435425,grad_norm: 0.7750596771721598, iteration: 418570
loss: 0.993111252784729,grad_norm: 0.7734616987674737, iteration: 418571
loss: 0.9953764081001282,grad_norm: 0.825770047985764, iteration: 418572
loss: 1.0085335969924927,grad_norm: 0.9999998046454833, iteration: 418573
loss: 1.093984842300415,grad_norm: 0.9999997684948636, iteration: 418574
loss: 1.018369436264038,grad_norm: 0.7767517733961078, iteration: 418575
loss: 1.0483083724975586,grad_norm: 0.9999996534478414, iteration: 418576
loss: 0.981065571308136,grad_norm: 0.817816963845776, iteration: 418577
loss: 1.0261772871017456,grad_norm: 0.7602516022277244, iteration: 418578
loss: 0.9996098875999451,grad_norm: 0.999999336518376, iteration: 418579
loss: 1.1018997430801392,grad_norm: 0.9999997424259651, iteration: 418580
loss: 1.0137577056884766,grad_norm: 0.813775851084172, iteration: 418581
loss: 0.9788342118263245,grad_norm: 0.8832988007584782, iteration: 418582
loss: 1.0288310050964355,grad_norm: 0.850301207627726, iteration: 418583
loss: 1.195939302444458,grad_norm: 0.9999995075717847, iteration: 418584
loss: 1.0418415069580078,grad_norm: 0.9999992916046138, iteration: 418585
loss: 0.9888108968734741,grad_norm: 0.7602995008423616, iteration: 418586
loss: 1.2113112211227417,grad_norm: 0.9999993675886154, iteration: 418587
loss: 0.946138322353363,grad_norm: 0.8294426733587433, iteration: 418588
loss: 1.0854536294937134,grad_norm: 0.9999993853826187, iteration: 418589
loss: 1.037963628768921,grad_norm: 0.735976190578514, iteration: 418590
loss: 1.1774359941482544,grad_norm: 0.999999916072252, iteration: 418591
loss: 0.9700189232826233,grad_norm: 0.8510461101482578, iteration: 418592
loss: 1.0099259614944458,grad_norm: 0.8599668391246265, iteration: 418593
loss: 0.9875386357307434,grad_norm: 0.8364051745342849, iteration: 418594
loss: 0.9695794582366943,grad_norm: 0.8001742092242842, iteration: 418595
loss: 0.9973659515380859,grad_norm: 0.8911321439000064, iteration: 418596
loss: 0.984389066696167,grad_norm: 0.9993949625562, iteration: 418597
loss: 0.9610304832458496,grad_norm: 0.8816817710652927, iteration: 418598
loss: 1.0037925243377686,grad_norm: 0.7850001719886142, iteration: 418599
loss: 1.0175939798355103,grad_norm: 0.9999995021768988, iteration: 418600
loss: 1.0101946592330933,grad_norm: 0.7685176811302924, iteration: 418601
loss: 1.017647385597229,grad_norm: 0.877106605418693, iteration: 418602
loss: 1.0300004482269287,grad_norm: 0.8901627473081101, iteration: 418603
loss: 1.0067102909088135,grad_norm: 0.6651399437377504, iteration: 418604
loss: 0.9928837418556213,grad_norm: 0.8985571716218453, iteration: 418605
loss: 0.9798229336738586,grad_norm: 0.9999998589192161, iteration: 418606
loss: 1.0304478406906128,grad_norm: 0.8625270759629413, iteration: 418607
loss: 1.0060369968414307,grad_norm: 0.755720416267536, iteration: 418608
loss: 1.034489631652832,grad_norm: 0.9359346724986423, iteration: 418609
loss: 0.9884288907051086,grad_norm: 0.7911457288521054, iteration: 418610
loss: 1.0242102146148682,grad_norm: 0.7695768987120831, iteration: 418611
loss: 1.0561243295669556,grad_norm: 0.9999989982190903, iteration: 418612
loss: 1.0760610103607178,grad_norm: 0.9999993845531381, iteration: 418613
loss: 0.9875979423522949,grad_norm: 0.8220506118152604, iteration: 418614
loss: 1.0046825408935547,grad_norm: 0.8392437086185869, iteration: 418615
loss: 1.0649861097335815,grad_norm: 0.8909126867354549, iteration: 418616
loss: 0.9933690428733826,grad_norm: 0.7540795785839871, iteration: 418617
loss: 1.0241261720657349,grad_norm: 0.9914233515817501, iteration: 418618
loss: 1.0183398723602295,grad_norm: 0.7434686687933887, iteration: 418619
loss: 1.0234431028366089,grad_norm: 0.756618086027167, iteration: 418620
loss: 0.9744603037834167,grad_norm: 0.8409709761912699, iteration: 418621
loss: 0.9666801691055298,grad_norm: 0.7500706598772757, iteration: 418622
loss: 1.0518473386764526,grad_norm: 0.9150576839583314, iteration: 418623
loss: 0.9583770036697388,grad_norm: 0.7931236732923844, iteration: 418624
loss: 1.02897047996521,grad_norm: 0.8503038264902053, iteration: 418625
loss: 1.000925898551941,grad_norm: 0.8463459882218332, iteration: 418626
loss: 0.9947425127029419,grad_norm: 0.7080553023150494, iteration: 418627
loss: 1.0140552520751953,grad_norm: 0.6148650138818678, iteration: 418628
loss: 1.0348647832870483,grad_norm: 0.7344791341979837, iteration: 418629
loss: 1.019388198852539,grad_norm: 0.6641235975748871, iteration: 418630
loss: 1.015712857246399,grad_norm: 0.9999993189550267, iteration: 418631
loss: 0.9543609023094177,grad_norm: 0.8536376807880286, iteration: 418632
loss: 0.983137845993042,grad_norm: 0.75412194771438, iteration: 418633
loss: 0.9858741164207458,grad_norm: 0.6771231459615202, iteration: 418634
loss: 1.0345964431762695,grad_norm: 0.9204970372487913, iteration: 418635
loss: 0.9871121644973755,grad_norm: 0.6977494354681483, iteration: 418636
loss: 1.028955101966858,grad_norm: 0.9741271899005893, iteration: 418637
loss: 0.9739788770675659,grad_norm: 0.6626186400394201, iteration: 418638
loss: 0.9888654351234436,grad_norm: 0.7772122570434888, iteration: 418639
loss: 1.039355993270874,grad_norm: 0.9999989806247297, iteration: 418640
loss: 1.0085307359695435,grad_norm: 0.9006758426814698, iteration: 418641
loss: 1.025304913520813,grad_norm: 0.8342251706767358, iteration: 418642
loss: 1.0228090286254883,grad_norm: 0.8356368235819757, iteration: 418643
loss: 1.0563892126083374,grad_norm: 0.8317461030741649, iteration: 418644
loss: 1.0074102878570557,grad_norm: 0.8073998175414399, iteration: 418645
loss: 1.0456516742706299,grad_norm: 0.9999992865985903, iteration: 418646
loss: 1.013201355934143,grad_norm: 0.8846523108051088, iteration: 418647
loss: 0.9985839128494263,grad_norm: 0.7151927117956439, iteration: 418648
loss: 1.0698856115341187,grad_norm: 0.9999993945262837, iteration: 418649
loss: 1.0334489345550537,grad_norm: 0.8797369512863426, iteration: 418650
loss: 1.0148197412490845,grad_norm: 0.803527960969039, iteration: 418651
loss: 1.0646599531173706,grad_norm: 0.8178949355502364, iteration: 418652
loss: 0.9776058197021484,grad_norm: 0.7866656756794819, iteration: 418653
loss: 1.0049386024475098,grad_norm: 0.9361504552207673, iteration: 418654
loss: 1.0742783546447754,grad_norm: 0.9999993951375519, iteration: 418655
loss: 0.995923638343811,grad_norm: 0.8051254868172572, iteration: 418656
loss: 0.9944916367530823,grad_norm: 0.7488552435332916, iteration: 418657
loss: 1.0315070152282715,grad_norm: 0.7393007652340583, iteration: 418658
loss: 1.0044753551483154,grad_norm: 0.7647229930781667, iteration: 418659
loss: 0.9751614928245544,grad_norm: 0.9999996125454831, iteration: 418660
loss: 1.0076254606246948,grad_norm: 0.8574352478037638, iteration: 418661
loss: 0.9702053070068359,grad_norm: 0.8274699373078603, iteration: 418662
loss: 1.0098254680633545,grad_norm: 0.8584268191709986, iteration: 418663
loss: 0.9868353009223938,grad_norm: 0.8683105599531203, iteration: 418664
loss: 1.0175800323486328,grad_norm: 0.6602264184521389, iteration: 418665
loss: 1.0387139320373535,grad_norm: 0.8517091043717931, iteration: 418666
loss: 0.9631808996200562,grad_norm: 0.7979254846896063, iteration: 418667
loss: 1.043604850769043,grad_norm: 0.7867132585685939, iteration: 418668
loss: 0.9859890341758728,grad_norm: 0.7769219817050288, iteration: 418669
loss: 1.010852336883545,grad_norm: 0.9186803387033013, iteration: 418670
loss: 0.9854983687400818,grad_norm: 0.8548150200089643, iteration: 418671
loss: 1.0061804056167603,grad_norm: 0.9322898600419167, iteration: 418672
loss: 1.0332292318344116,grad_norm: 0.860754420218578, iteration: 418673
loss: 1.0550121068954468,grad_norm: 0.7724629605665208, iteration: 418674
loss: 1.0535227060317993,grad_norm: 0.9999996679007218, iteration: 418675
loss: 0.989751398563385,grad_norm: 0.6854601590036428, iteration: 418676
loss: 0.9863888025283813,grad_norm: 0.7490988491638615, iteration: 418677
loss: 0.9804372191429138,grad_norm: 0.7646342450863827, iteration: 418678
loss: 0.9872921705245972,grad_norm: 0.793280606306196, iteration: 418679
loss: 0.9822288751602173,grad_norm: 0.9022956418859084, iteration: 418680
loss: 1.1104248762130737,grad_norm: 0.9416169559302572, iteration: 418681
loss: 0.984011173248291,grad_norm: 0.835313438224458, iteration: 418682
loss: 1.043709635734558,grad_norm: 0.8533101427391049, iteration: 418683
loss: 0.9969492554664612,grad_norm: 0.6772484826158427, iteration: 418684
loss: 1.0161470174789429,grad_norm: 0.9999990796267009, iteration: 418685
loss: 1.0144325494766235,grad_norm: 0.7285641079718534, iteration: 418686
loss: 1.0069248676300049,grad_norm: 0.7034261036734902, iteration: 418687
loss: 0.9932965040206909,grad_norm: 0.7984174342016794, iteration: 418688
loss: 1.010603427886963,grad_norm: 0.9585076043701669, iteration: 418689
loss: 1.0325764417648315,grad_norm: 0.9999998167940474, iteration: 418690
loss: 1.0793877840042114,grad_norm: 0.8187228952941041, iteration: 418691
loss: 1.070114016532898,grad_norm: 0.7614342585521509, iteration: 418692
loss: 1.0052026510238647,grad_norm: 0.8105744064085252, iteration: 418693
loss: 0.9932509660720825,grad_norm: 0.7878283850970773, iteration: 418694
loss: 0.9796037077903748,grad_norm: 0.6687694408504693, iteration: 418695
loss: 1.0076888799667358,grad_norm: 0.7101555520229267, iteration: 418696
loss: 1.02042818069458,grad_norm: 0.803050334914679, iteration: 418697
loss: 1.0383882522583008,grad_norm: 0.999999464840021, iteration: 418698
loss: 1.016170859336853,grad_norm: 0.8030159642422533, iteration: 418699
loss: 1.0817006826400757,grad_norm: 0.9999990413198996, iteration: 418700
loss: 1.0955634117126465,grad_norm: 0.9999991917942948, iteration: 418701
loss: 0.9814983606338501,grad_norm: 0.8095243945634519, iteration: 418702
loss: 1.0557923316955566,grad_norm: 0.8325789763235342, iteration: 418703
loss: 1.0255399942398071,grad_norm: 0.9225401394174914, iteration: 418704
loss: 1.008963704109192,grad_norm: 0.8315519406455768, iteration: 418705
loss: 0.9690196514129639,grad_norm: 0.7741214873749915, iteration: 418706
loss: 0.9985707998275757,grad_norm: 0.8451109122485875, iteration: 418707
loss: 0.9876542091369629,grad_norm: 0.7183462699812659, iteration: 418708
loss: 0.9967877864837646,grad_norm: 0.8662286444904521, iteration: 418709
loss: 1.0045154094696045,grad_norm: 0.7359405771523645, iteration: 418710
loss: 1.0537769794464111,grad_norm: 0.9999999046106357, iteration: 418711
loss: 0.991675853729248,grad_norm: 0.7962135495805408, iteration: 418712
loss: 1.0019257068634033,grad_norm: 0.7818069930120831, iteration: 418713
loss: 1.022721529006958,grad_norm: 0.8394721740959638, iteration: 418714
loss: 1.0250110626220703,grad_norm: 0.7746919946029753, iteration: 418715
loss: 1.023937702178955,grad_norm: 0.9999990633388501, iteration: 418716
loss: 0.9989820718765259,grad_norm: 0.8120188784010699, iteration: 418717
loss: 0.9947540760040283,grad_norm: 0.9575266467998038, iteration: 418718
loss: 1.0238595008850098,grad_norm: 0.7662990066306037, iteration: 418719
loss: 1.0013467073440552,grad_norm: 0.966303573283883, iteration: 418720
loss: 1.0188708305358887,grad_norm: 0.8099292908055554, iteration: 418721
loss: 1.0070064067840576,grad_norm: 0.7286731138892836, iteration: 418722
loss: 1.0366079807281494,grad_norm: 0.7452676574175102, iteration: 418723
loss: 0.9650850296020508,grad_norm: 0.8902304233014534, iteration: 418724
loss: 1.0052576065063477,grad_norm: 0.768825781333867, iteration: 418725
loss: 1.0372234582901,grad_norm: 0.9999992122320138, iteration: 418726
loss: 0.9966849088668823,grad_norm: 0.8331359451282817, iteration: 418727
loss: 1.0895922183990479,grad_norm: 0.9999998530875758, iteration: 418728
loss: 0.9555918574333191,grad_norm: 0.7839781156665727, iteration: 418729
loss: 0.9977552890777588,grad_norm: 0.7547549664990219, iteration: 418730
loss: 1.0126029253005981,grad_norm: 0.8330575974186453, iteration: 418731
loss: 0.9799941778182983,grad_norm: 0.7410061088537278, iteration: 418732
loss: 1.0191439390182495,grad_norm: 0.7776169466263073, iteration: 418733
loss: 0.9961300492286682,grad_norm: 0.7677593080705524, iteration: 418734
loss: 1.0350207090377808,grad_norm: 0.9999991026250034, iteration: 418735
loss: 0.9671225547790527,grad_norm: 0.8613118147443752, iteration: 418736
loss: 0.9696588516235352,grad_norm: 0.9999993755813904, iteration: 418737
loss: 1.0478192567825317,grad_norm: 0.9999995619374434, iteration: 418738
loss: 0.9860382080078125,grad_norm: 0.7623087693801596, iteration: 418739
loss: 0.9758642315864563,grad_norm: 0.9067834260017373, iteration: 418740
loss: 1.0079554319381714,grad_norm: 0.9999998871161818, iteration: 418741
loss: 1.0852375030517578,grad_norm: 0.8396735966036065, iteration: 418742
loss: 0.9795095324516296,grad_norm: 0.8160228904592921, iteration: 418743
loss: 1.1295865774154663,grad_norm: 0.9999998682230918, iteration: 418744
loss: 1.005811095237732,grad_norm: 0.8897942836601087, iteration: 418745
loss: 0.9947076439857483,grad_norm: 0.667229368605445, iteration: 418746
loss: 0.97216796875,grad_norm: 0.6805317683340636, iteration: 418747
loss: 1.0023739337921143,grad_norm: 0.792033227898378, iteration: 418748
loss: 1.0239522457122803,grad_norm: 0.9999991994853682, iteration: 418749
loss: 0.9948329925537109,grad_norm: 0.8805577327579514, iteration: 418750
loss: 1.0274462699890137,grad_norm: 0.8135436286682974, iteration: 418751
loss: 1.0569885969161987,grad_norm: 0.8024905110149158, iteration: 418752
loss: 0.9938749074935913,grad_norm: 0.840449886407427, iteration: 418753
loss: 0.99652498960495,grad_norm: 0.8222688963982726, iteration: 418754
loss: 1.0180261135101318,grad_norm: 0.8759575400656836, iteration: 418755
loss: 1.0252089500427246,grad_norm: 0.769802139032227, iteration: 418756
loss: 1.0056992769241333,grad_norm: 0.795381778297899, iteration: 418757
loss: 0.9980422258377075,grad_norm: 0.6777246834225544, iteration: 418758
loss: 1.011428952217102,grad_norm: 0.8186047116699301, iteration: 418759
loss: 0.952914834022522,grad_norm: 0.6667182825242832, iteration: 418760
loss: 1.0323396921157837,grad_norm: 0.934851482213719, iteration: 418761
loss: 0.990367591381073,grad_norm: 0.7985688368493009, iteration: 418762
loss: 0.9846990704536438,grad_norm: 0.7337336744060237, iteration: 418763
loss: 1.0510565042495728,grad_norm: 0.6248525408855555, iteration: 418764
loss: 1.0034303665161133,grad_norm: 0.8910632398549164, iteration: 418765
loss: 1.0105899572372437,grad_norm: 0.9547956158611611, iteration: 418766
loss: 0.9859312176704407,grad_norm: 0.681326971458866, iteration: 418767
loss: 0.9905117154121399,grad_norm: 0.9999995545284734, iteration: 418768
loss: 1.098284125328064,grad_norm: 0.999999655233383, iteration: 418769
loss: 1.0662145614624023,grad_norm: 0.9999989529173989, iteration: 418770
loss: 0.9650452733039856,grad_norm: 0.7165360348792656, iteration: 418771
loss: 1.0068649053573608,grad_norm: 0.7936754356023985, iteration: 418772
loss: 0.9934094548225403,grad_norm: 0.7750674672137989, iteration: 418773
loss: 1.000585913658142,grad_norm: 0.8007956974765295, iteration: 418774
loss: 0.9853231906890869,grad_norm: 0.8126037791264684, iteration: 418775
loss: 0.9609860777854919,grad_norm: 0.7810441471398515, iteration: 418776
loss: 0.9992207288742065,grad_norm: 0.9836291495169159, iteration: 418777
loss: 1.0340772867202759,grad_norm: 0.8763527859498188, iteration: 418778
loss: 1.027647614479065,grad_norm: 0.7266718769060231, iteration: 418779
loss: 0.9566686749458313,grad_norm: 0.8636939839521129, iteration: 418780
loss: 0.9931470155715942,grad_norm: 0.778858924049521, iteration: 418781
loss: 1.0163400173187256,grad_norm: 0.8734204181902949, iteration: 418782
loss: 1.0719727277755737,grad_norm: 0.9999998597570388, iteration: 418783
loss: 0.9576137661933899,grad_norm: 0.8547959015720114, iteration: 418784
loss: 1.048208475112915,grad_norm: 0.9999990790907294, iteration: 418785
loss: 0.9950610399246216,grad_norm: 0.6383502963531618, iteration: 418786
loss: 0.9956367611885071,grad_norm: 0.7167057255443505, iteration: 418787
loss: 1.0691102743148804,grad_norm: 0.6438874049563916, iteration: 418788
loss: 1.0160890817642212,grad_norm: 0.6398100744587031, iteration: 418789
loss: 1.0180500745773315,grad_norm: 0.8697901705954612, iteration: 418790
loss: 1.0040103197097778,grad_norm: 0.9444660916185761, iteration: 418791
loss: 0.9725354909896851,grad_norm: 0.9999992280255732, iteration: 418792
loss: 0.9595034718513489,grad_norm: 0.752991678012911, iteration: 418793
loss: 1.0182958841323853,grad_norm: 0.8559962467074071, iteration: 418794
loss: 1.0113794803619385,grad_norm: 0.9347112044209555, iteration: 418795
loss: 1.089205265045166,grad_norm: 0.9999998216598349, iteration: 418796
loss: 0.9958653450012207,grad_norm: 0.7961521855535645, iteration: 418797
loss: 0.9934405088424683,grad_norm: 0.8381691827098708, iteration: 418798
loss: 0.9925245046615601,grad_norm: 0.8923793889809168, iteration: 418799
loss: 1.0677117109298706,grad_norm: 0.8388877871320739, iteration: 418800
loss: 1.108166217803955,grad_norm: 0.9999992451951698, iteration: 418801
loss: 1.0331131219863892,grad_norm: 0.7603031985647473, iteration: 418802
loss: 1.0068877935409546,grad_norm: 0.9999991139515445, iteration: 418803
loss: 1.0094252824783325,grad_norm: 0.7442037533918031, iteration: 418804
loss: 0.9907693862915039,grad_norm: 0.6339993195764823, iteration: 418805
loss: 1.084011435508728,grad_norm: 0.9999990807322505, iteration: 418806
loss: 1.0474257469177246,grad_norm: 0.9999992924224019, iteration: 418807
loss: 0.981996476650238,grad_norm: 0.775307416722375, iteration: 418808
loss: 0.9916442036628723,grad_norm: 0.7730849112515401, iteration: 418809
loss: 1.0305827856063843,grad_norm: 0.9109369995795032, iteration: 418810
loss: 1.156396746635437,grad_norm: 0.9999998407277462, iteration: 418811
loss: 1.009157419204712,grad_norm: 0.999999877075264, iteration: 418812
loss: 1.0033252239227295,grad_norm: 0.8302712737304633, iteration: 418813
loss: 1.0074043273925781,grad_norm: 0.999999353929253, iteration: 418814
loss: 1.0078774690628052,grad_norm: 0.8662554682896807, iteration: 418815
loss: 1.052498459815979,grad_norm: 0.6816255701742968, iteration: 418816
loss: 1.01242196559906,grad_norm: 0.7372414708390254, iteration: 418817
loss: 0.9826250672340393,grad_norm: 0.8131764257103783, iteration: 418818
loss: 1.0365538597106934,grad_norm: 0.8321663439921108, iteration: 418819
loss: 0.963283360004425,grad_norm: 0.846309918638853, iteration: 418820
loss: 1.0085995197296143,grad_norm: 0.8069535661096106, iteration: 418821
loss: 0.9939325451850891,grad_norm: 0.8482501016686532, iteration: 418822
loss: 1.0733535289764404,grad_norm: 0.7391639750654428, iteration: 418823
loss: 1.092949390411377,grad_norm: 0.853474515561775, iteration: 418824
loss: 0.9922139644622803,grad_norm: 0.9707300081047561, iteration: 418825
loss: 0.9423556327819824,grad_norm: 0.8091860750550762, iteration: 418826
loss: 1.0033304691314697,grad_norm: 0.7387752531607037, iteration: 418827
loss: 1.0102896690368652,grad_norm: 0.6889403114090411, iteration: 418828
loss: 0.9920041561126709,grad_norm: 0.9999995745945264, iteration: 418829
loss: 1.0424615144729614,grad_norm: 0.9977878095722487, iteration: 418830
loss: 0.9773780107498169,grad_norm: 0.7221434528042839, iteration: 418831
loss: 0.9980207681655884,grad_norm: 0.8417864833053457, iteration: 418832
loss: 0.9968743920326233,grad_norm: 0.8994622119490786, iteration: 418833
loss: 0.9964773058891296,grad_norm: 0.8165947793117959, iteration: 418834
loss: 0.9867714047431946,grad_norm: 0.7078794271131681, iteration: 418835
loss: 0.9930393695831299,grad_norm: 0.9999991928183992, iteration: 418836
loss: 0.9385377764701843,grad_norm: 0.7931960706733846, iteration: 418837
loss: 1.0938224792480469,grad_norm: 0.9999995436087584, iteration: 418838
loss: 1.089585781097412,grad_norm: 0.9999991498450749, iteration: 418839
loss: 1.0188474655151367,grad_norm: 0.7929047576808305, iteration: 418840
loss: 1.037257194519043,grad_norm: 0.9999990875073916, iteration: 418841
loss: 1.0337294340133667,grad_norm: 0.8539946871939574, iteration: 418842
loss: 1.0386854410171509,grad_norm: 0.9999992265001766, iteration: 418843
loss: 1.0170913934707642,grad_norm: 0.7803213370852619, iteration: 418844
loss: 1.0131323337554932,grad_norm: 0.7502303439425163, iteration: 418845
loss: 1.0831905603408813,grad_norm: 0.9484129919988661, iteration: 418846
loss: 1.0738660097122192,grad_norm: 0.9345697522346146, iteration: 418847
loss: 1.0915194749832153,grad_norm: 0.8459740538718045, iteration: 418848
loss: 1.0003989934921265,grad_norm: 0.8562216923558632, iteration: 418849
loss: 1.005088448524475,grad_norm: 0.9999996168328839, iteration: 418850
loss: 1.0418708324432373,grad_norm: 0.7914344603757013, iteration: 418851
loss: 0.9838019609451294,grad_norm: 0.7491182595677233, iteration: 418852
loss: 1.216647982597351,grad_norm: 0.9999991426833068, iteration: 418853
loss: 0.9837868809700012,grad_norm: 0.8496936371852435, iteration: 418854
loss: 1.0282604694366455,grad_norm: 0.7363291450420234, iteration: 418855
loss: 0.9805883169174194,grad_norm: 0.9668530527430159, iteration: 418856
loss: 0.9880974888801575,grad_norm: 0.7373617152286384, iteration: 418857
loss: 0.962963342666626,grad_norm: 0.6837165680402925, iteration: 418858
loss: 1.0571781396865845,grad_norm: 0.9621606507897283, iteration: 418859
loss: 0.9944256544113159,grad_norm: 0.9546865643597404, iteration: 418860
loss: 1.0312385559082031,grad_norm: 0.8837769457055519, iteration: 418861
loss: 1.0479542016983032,grad_norm: 0.9999996628197795, iteration: 418862
loss: 0.9641784429550171,grad_norm: 0.8840794568262775, iteration: 418863
loss: 1.0343888998031616,grad_norm: 0.706054521990073, iteration: 418864
loss: 1.0078386068344116,grad_norm: 0.7110060501379029, iteration: 418865
loss: 0.997917890548706,grad_norm: 0.6950141452173773, iteration: 418866
loss: 0.98390793800354,grad_norm: 0.8010206496897122, iteration: 418867
loss: 1.0180944204330444,grad_norm: 0.7845886637390418, iteration: 418868
loss: 0.9835257530212402,grad_norm: 0.7435625598985497, iteration: 418869
loss: 1.0469207763671875,grad_norm: 0.8427514129078525, iteration: 418870
loss: 1.120418906211853,grad_norm: 0.999999657742183, iteration: 418871
loss: 1.0757718086242676,grad_norm: 0.9999999416947057, iteration: 418872
loss: 1.0089150667190552,grad_norm: 0.7173784752862739, iteration: 418873
loss: 1.0026121139526367,grad_norm: 0.9101131333190983, iteration: 418874
loss: 1.019074559211731,grad_norm: 0.694444324345185, iteration: 418875
loss: 0.9602851867675781,grad_norm: 0.7961714123413081, iteration: 418876
loss: 0.959439754486084,grad_norm: 0.7405173956034166, iteration: 418877
loss: 1.0183398723602295,grad_norm: 0.9999996150233973, iteration: 418878
loss: 0.9999923706054688,grad_norm: 0.880654306246173, iteration: 418879
loss: 1.025152564048767,grad_norm: 0.9465158979513846, iteration: 418880
loss: 1.0430903434753418,grad_norm: 0.7169233796181601, iteration: 418881
loss: 0.999132513999939,grad_norm: 0.7927272326951494, iteration: 418882
loss: 1.0220009088516235,grad_norm: 0.9999992176132549, iteration: 418883
loss: 1.0035855770111084,grad_norm: 0.9047482674830435, iteration: 418884
loss: 1.0090676546096802,grad_norm: 0.8090324387662249, iteration: 418885
loss: 1.020970106124878,grad_norm: 0.7697878912179573, iteration: 418886
loss: 0.9850828647613525,grad_norm: 0.8749978577824871, iteration: 418887
loss: 0.9992191791534424,grad_norm: 0.8319178334907722, iteration: 418888
loss: 1.0282504558563232,grad_norm: 0.7966662361491889, iteration: 418889
loss: 0.9445205330848694,grad_norm: 0.7838288541039645, iteration: 418890
loss: 1.0425786972045898,grad_norm: 0.9869918797634476, iteration: 418891
loss: 0.9790864586830139,grad_norm: 0.828302851939344, iteration: 418892
loss: 1.1638864278793335,grad_norm: 0.9999996875743268, iteration: 418893
loss: 0.9877557158470154,grad_norm: 0.7239155720074553, iteration: 418894
loss: 0.9849704504013062,grad_norm: 0.8348048352386335, iteration: 418895
loss: 1.1248438358306885,grad_norm: 0.9530046660154217, iteration: 418896
loss: 1.003441572189331,grad_norm: 0.7373745722500576, iteration: 418897
loss: 0.9981182217597961,grad_norm: 0.8459257160319538, iteration: 418898
loss: 1.0236010551452637,grad_norm: 0.7611077768662452, iteration: 418899
loss: 0.9784607887268066,grad_norm: 0.8797129886826, iteration: 418900
loss: 0.9861436486244202,grad_norm: 0.8112457676996171, iteration: 418901
loss: 0.9918856024742126,grad_norm: 0.9999999138382085, iteration: 418902
loss: 0.9799841642379761,grad_norm: 0.7467749098513432, iteration: 418903
loss: 1.0072109699249268,grad_norm: 0.7266439904716346, iteration: 418904
loss: 1.0036503076553345,grad_norm: 0.7355650970708462, iteration: 418905
loss: 1.0009405612945557,grad_norm: 0.7008957898732322, iteration: 418906
loss: 0.9769914746284485,grad_norm: 0.9999991188254568, iteration: 418907
loss: 0.984825074672699,grad_norm: 0.7872691469997636, iteration: 418908
loss: 1.1275229454040527,grad_norm: 0.9999994482416159, iteration: 418909
loss: 1.0271415710449219,grad_norm: 0.9999994795745898, iteration: 418910
loss: 1.068266749382019,grad_norm: 0.6396023934476215, iteration: 418911
loss: 1.0358021259307861,grad_norm: 0.999999084016924, iteration: 418912
loss: 1.077060341835022,grad_norm: 0.9999998841965169, iteration: 418913
loss: 1.0013986825942993,grad_norm: 0.7225540810553874, iteration: 418914
loss: 1.001096487045288,grad_norm: 0.7492118662051812, iteration: 418915
loss: 1.020166039466858,grad_norm: 0.923319865180167, iteration: 418916
loss: 1.1514407396316528,grad_norm: 0.9999999424899956, iteration: 418917
loss: 1.0491498708724976,grad_norm: 0.9999999928993937, iteration: 418918
loss: 1.064198613166809,grad_norm: 0.9999992516382341, iteration: 418919
loss: 0.990790069103241,grad_norm: 0.7429094127779207, iteration: 418920
loss: 1.0167919397354126,grad_norm: 0.8271743874698766, iteration: 418921
loss: 1.0199426412582397,grad_norm: 0.9758007652773911, iteration: 418922
loss: 1.0251144170761108,grad_norm: 0.9999989507071395, iteration: 418923
loss: 0.9749682545661926,grad_norm: 0.9999999354053224, iteration: 418924
loss: 1.030922293663025,grad_norm: 0.7607393200864211, iteration: 418925
loss: 1.1735215187072754,grad_norm: 0.9999996787659902, iteration: 418926
loss: 1.0135797262191772,grad_norm: 0.651779740687473, iteration: 418927
loss: 0.9916976690292358,grad_norm: 0.7658165693142481, iteration: 418928
loss: 0.9945918917655945,grad_norm: 0.6181598030798325, iteration: 418929
loss: 1.0621973276138306,grad_norm: 0.8531466173406246, iteration: 418930
loss: 0.994759202003479,grad_norm: 0.7636914696447873, iteration: 418931
loss: 1.0471595525741577,grad_norm: 0.751034075685536, iteration: 418932
loss: 0.9762976169586182,grad_norm: 0.8212821028272351, iteration: 418933
loss: 1.061561942100525,grad_norm: 1.0000000718689692, iteration: 418934
loss: 1.1069015264511108,grad_norm: 0.9999993335444042, iteration: 418935
loss: 1.0652668476104736,grad_norm: 0.9999999799156076, iteration: 418936
loss: 1.0125024318695068,grad_norm: 0.9999991116176021, iteration: 418937
loss: 1.0613889694213867,grad_norm: 0.7845550803332176, iteration: 418938
loss: 1.0237314701080322,grad_norm: 0.7955427662241888, iteration: 418939
loss: 1.1094317436218262,grad_norm: 1.0000000714755277, iteration: 418940
loss: 1.0239284038543701,grad_norm: 0.865594374953768, iteration: 418941
loss: 0.9853782057762146,grad_norm: 0.9742795842737806, iteration: 418942
loss: 1.0521292686462402,grad_norm: 0.999999383677868, iteration: 418943
loss: 1.0559275150299072,grad_norm: 0.7448853112027947, iteration: 418944
loss: 1.0611705780029297,grad_norm: 0.9999996941281594, iteration: 418945
loss: 1.0144317150115967,grad_norm: 0.859727838721967, iteration: 418946
loss: 1.1985670328140259,grad_norm: 0.9999998690439608, iteration: 418947
loss: 1.0606266260147095,grad_norm: 0.9193164423789199, iteration: 418948
loss: 1.3275041580200195,grad_norm: 0.9999996116446854, iteration: 418949
loss: 1.0398272275924683,grad_norm: 0.7873279333449522, iteration: 418950
loss: 1.0187959671020508,grad_norm: 0.9999999184954101, iteration: 418951
loss: 0.9914412498474121,grad_norm: 0.8640318183685203, iteration: 418952
loss: 1.2101184129714966,grad_norm: 0.9999997978722973, iteration: 418953
loss: 1.0584261417388916,grad_norm: 0.9999990148972782, iteration: 418954
loss: 0.9939016103744507,grad_norm: 0.9999991175037859, iteration: 418955
loss: 0.9787890911102295,grad_norm: 0.806356867891812, iteration: 418956
loss: 1.1569300889968872,grad_norm: 0.9999999721726733, iteration: 418957
loss: 1.27708101272583,grad_norm: 0.9999998795540463, iteration: 418958
loss: 0.9873330593109131,grad_norm: 0.7639852815719309, iteration: 418959
loss: 1.068575382232666,grad_norm: 0.9999998212518297, iteration: 418960
loss: 1.074773907661438,grad_norm: 0.7424869395613736, iteration: 418961
loss: 1.0009835958480835,grad_norm: 0.999999494899827, iteration: 418962
loss: 1.0147631168365479,grad_norm: 0.9999992819069407, iteration: 418963
loss: 0.9885040521621704,grad_norm: 0.9999998923299955, iteration: 418964
loss: 1.1061956882476807,grad_norm: 0.9999999282293792, iteration: 418965
loss: 1.0375498533248901,grad_norm: 0.9999993613464625, iteration: 418966
loss: 1.0685676336288452,grad_norm: 0.8380108791117346, iteration: 418967
loss: 1.0597814321517944,grad_norm: 0.9999998439353891, iteration: 418968
loss: 1.0045040845870972,grad_norm: 0.9999993397260342, iteration: 418969
loss: 1.0917335748672485,grad_norm: 0.999999518787632, iteration: 418970
loss: 0.954821765422821,grad_norm: 0.8040901990280256, iteration: 418971
loss: 1.1709346771240234,grad_norm: 0.9999990517980174, iteration: 418972
loss: 1.0485485792160034,grad_norm: 0.7155820868293993, iteration: 418973
loss: 1.0978835821151733,grad_norm: 0.9999999558420709, iteration: 418974
loss: 1.0394867658615112,grad_norm: 0.9999990046458066, iteration: 418975
loss: 1.032070517539978,grad_norm: 0.76513808910044, iteration: 418976
loss: 1.0572304725646973,grad_norm: 0.9999998328100299, iteration: 418977
loss: 1.0437588691711426,grad_norm: 0.9018277778285816, iteration: 418978
loss: 1.0175832509994507,grad_norm: 0.8242118633275902, iteration: 418979
loss: 1.0264849662780762,grad_norm: 0.9999994125096963, iteration: 418980
loss: 1.078914761543274,grad_norm: 0.9730894889685102, iteration: 418981
loss: 1.0587137937545776,grad_norm: 0.99999948710937, iteration: 418982
loss: 1.0519706010818481,grad_norm: 0.77596559499419, iteration: 418983
loss: 1.0007625818252563,grad_norm: 0.7859098888911383, iteration: 418984
loss: 1.0143414735794067,grad_norm: 0.7739515068069907, iteration: 418985
loss: 1.0039045810699463,grad_norm: 0.9999991399306144, iteration: 418986
loss: 0.9853261709213257,grad_norm: 0.8610401435671923, iteration: 418987
loss: 1.046696424484253,grad_norm: 0.9999997106152406, iteration: 418988
loss: 0.9786626696586609,grad_norm: 0.7490697118662377, iteration: 418989
loss: 1.0493472814559937,grad_norm: 0.9999991477834006, iteration: 418990
loss: 0.9788031578063965,grad_norm: 0.7904272915267521, iteration: 418991
loss: 1.0052552223205566,grad_norm: 0.9999992799984987, iteration: 418992
loss: 1.0082486867904663,grad_norm: 0.8174574897047718, iteration: 418993
loss: 1.0561944246292114,grad_norm: 0.7717755015097859, iteration: 418994
loss: 1.0307064056396484,grad_norm: 0.9999993080834241, iteration: 418995
loss: 1.0426241159439087,grad_norm: 0.6346676033468299, iteration: 418996
loss: 1.06015944480896,grad_norm: 0.9999995602601164, iteration: 418997
loss: 0.9897421002388,grad_norm: 0.9999997035369004, iteration: 418998
loss: 1.0023671388626099,grad_norm: 0.7561988495938636, iteration: 418999
loss: 0.9961411952972412,grad_norm: 0.9221563565761157, iteration: 419000
loss: 1.021030068397522,grad_norm: 0.9999994952540606, iteration: 419001
loss: 1.0034655332565308,grad_norm: 0.9999994398275008, iteration: 419002
loss: 0.9780052900314331,grad_norm: 0.7345743741373321, iteration: 419003
loss: 1.0290929079055786,grad_norm: 0.9999993707005502, iteration: 419004
loss: 1.0923817157745361,grad_norm: 0.9999991044818362, iteration: 419005
loss: 0.9884822964668274,grad_norm: 0.9999990205672722, iteration: 419006
loss: 1.0481154918670654,grad_norm: 0.8181169157176742, iteration: 419007
loss: 1.0294439792633057,grad_norm: 0.749921101509642, iteration: 419008
loss: 1.2279185056686401,grad_norm: 0.9999995863362359, iteration: 419009
loss: 1.013861894607544,grad_norm: 0.9999990846710821, iteration: 419010
loss: 1.0239499807357788,grad_norm: 0.8812881883290342, iteration: 419011
loss: 1.0093801021575928,grad_norm: 0.7329691681056264, iteration: 419012
loss: 1.0018646717071533,grad_norm: 0.9614845136903142, iteration: 419013
loss: 0.9583243131637573,grad_norm: 0.7771155401879852, iteration: 419014
loss: 0.9720520973205566,grad_norm: 0.8218375398270293, iteration: 419015
loss: 1.0362452268600464,grad_norm: 0.7405206190491316, iteration: 419016
loss: 1.0786257982254028,grad_norm: 0.8237614175039503, iteration: 419017
loss: 1.041494369506836,grad_norm: 0.9999990296717415, iteration: 419018
loss: 0.9991300702095032,grad_norm: 0.8730426608266983, iteration: 419019
loss: 1.0545096397399902,grad_norm: 0.8938964169766701, iteration: 419020
loss: 1.1532573699951172,grad_norm: 0.9999998473584528, iteration: 419021
loss: 1.0463573932647705,grad_norm: 0.999999753434848, iteration: 419022
loss: 1.017738938331604,grad_norm: 0.7275642058600139, iteration: 419023
loss: 1.007766842842102,grad_norm: 0.7115199867706666, iteration: 419024
loss: 1.0094764232635498,grad_norm: 0.999999702510791, iteration: 419025
loss: 1.0442943572998047,grad_norm: 0.9999996392997421, iteration: 419026
loss: 0.9844948053359985,grad_norm: 0.9999994226703245, iteration: 419027
loss: 1.0068386793136597,grad_norm: 0.8285503415810476, iteration: 419028
loss: 1.0333887338638306,grad_norm: 0.817176161651889, iteration: 419029
loss: 1.0059945583343506,grad_norm: 0.6763976671754108, iteration: 419030
loss: 0.9986602067947388,grad_norm: 0.6336038084841273, iteration: 419031
loss: 1.0892523527145386,grad_norm: 0.9999998887818534, iteration: 419032
loss: 0.9503716230392456,grad_norm: 0.8038258448554191, iteration: 419033
loss: 1.0165728330612183,grad_norm: 0.6510540852548444, iteration: 419034
loss: 0.9748408794403076,grad_norm: 0.6658393870116148, iteration: 419035
loss: 0.9573050737380981,grad_norm: 0.8887964055564255, iteration: 419036
loss: 0.9896190166473389,grad_norm: 0.6484583677099652, iteration: 419037
loss: 1.0221900939941406,grad_norm: 0.7458445213374583, iteration: 419038
loss: 0.9816117286682129,grad_norm: 0.8217739104709082, iteration: 419039
loss: 1.0318397283554077,grad_norm: 0.8012544114370089, iteration: 419040
loss: 1.1349835395812988,grad_norm: 0.9230965988847182, iteration: 419041
loss: 0.999043881893158,grad_norm: 0.8227088271071239, iteration: 419042
loss: 0.9743859171867371,grad_norm: 0.9999997364922936, iteration: 419043
loss: 1.0232384204864502,grad_norm: 0.7470211179922904, iteration: 419044
loss: 0.9809958934783936,grad_norm: 0.7401659637236596, iteration: 419045
loss: 1.0075103044509888,grad_norm: 0.9999991347688404, iteration: 419046
loss: 0.962533712387085,grad_norm: 0.9034966637412675, iteration: 419047
loss: 1.071998953819275,grad_norm: 0.9999995527195686, iteration: 419048
loss: 0.9962300658226013,grad_norm: 0.8004912001458158, iteration: 419049
loss: 1.0084562301635742,grad_norm: 0.9999998504291059, iteration: 419050
loss: 1.1165688037872314,grad_norm: 0.9999997344490672, iteration: 419051
loss: 1.0822213888168335,grad_norm: 0.9999991721166501, iteration: 419052
loss: 1.0724472999572754,grad_norm: 0.9999995178587171, iteration: 419053
loss: 1.0782246589660645,grad_norm: 0.8510264712967247, iteration: 419054
loss: 1.012067437171936,grad_norm: 0.7361137819081781, iteration: 419055
loss: 1.0111418962478638,grad_norm: 0.8287709913498033, iteration: 419056
loss: 0.997006356716156,grad_norm: 0.6955506937150916, iteration: 419057
loss: 1.0166934728622437,grad_norm: 0.9999993983808247, iteration: 419058
loss: 1.0330718755722046,grad_norm: 0.8039262968873558, iteration: 419059
loss: 1.0255314111709595,grad_norm: 0.9999991370092189, iteration: 419060
loss: 1.0352537631988525,grad_norm: 0.9999996294966268, iteration: 419061
loss: 1.028931736946106,grad_norm: 0.9999996686838466, iteration: 419062
loss: 1.0520915985107422,grad_norm: 0.6641550713283583, iteration: 419063
loss: 1.0343140363693237,grad_norm: 0.7566749071554921, iteration: 419064
loss: 1.06600821018219,grad_norm: 0.7659981453317972, iteration: 419065
loss: 1.0424416065216064,grad_norm: 0.9999997215150531, iteration: 419066
loss: 1.1170673370361328,grad_norm: 0.9999997314185064, iteration: 419067
loss: 1.0235638618469238,grad_norm: 0.999999894334796, iteration: 419068
loss: 0.9797937273979187,grad_norm: 0.8421148864626823, iteration: 419069
loss: 1.0802332162857056,grad_norm: 0.9999997542850843, iteration: 419070
loss: 1.0627847909927368,grad_norm: 0.9999998275833908, iteration: 419071
loss: 1.003597378730774,grad_norm: 0.8381961734657729, iteration: 419072
loss: 1.03617262840271,grad_norm: 0.8097484073148971, iteration: 419073
loss: 1.0117512941360474,grad_norm: 0.8259849096948191, iteration: 419074
loss: 1.0382823944091797,grad_norm: 0.901192739989685, iteration: 419075
loss: 1.0357903242111206,grad_norm: 0.9686702660132445, iteration: 419076
loss: 1.0706144571304321,grad_norm: 0.9999998991436643, iteration: 419077
loss: 1.0304605960845947,grad_norm: 0.711805576408833, iteration: 419078
loss: 1.074985146522522,grad_norm: 0.9999989681172858, iteration: 419079
loss: 0.9734361171722412,grad_norm: 0.6926186566125137, iteration: 419080
loss: 0.9991706013679504,grad_norm: 0.8342153237541454, iteration: 419081
loss: 1.0513954162597656,grad_norm: 0.9999990013932526, iteration: 419082
loss: 0.9998785257339478,grad_norm: 0.9092934025283096, iteration: 419083
loss: 1.0662959814071655,grad_norm: 0.9999996813332556, iteration: 419084
loss: 0.9617928266525269,grad_norm: 0.6773049563834306, iteration: 419085
loss: 1.0430909395217896,grad_norm: 0.9999999272068565, iteration: 419086
loss: 0.9814149737358093,grad_norm: 0.8348800516523573, iteration: 419087
loss: 0.9845118522644043,grad_norm: 0.7263760526657626, iteration: 419088
loss: 1.105860948562622,grad_norm: 0.999999351433021, iteration: 419089
loss: 1.0182857513427734,grad_norm: 0.6900385502151349, iteration: 419090
loss: 1.005023717880249,grad_norm: 0.77042417030011, iteration: 419091
loss: 1.035365104675293,grad_norm: 0.7752516944178626, iteration: 419092
loss: 0.9972378015518188,grad_norm: 0.7324861080920912, iteration: 419093
loss: 1.2736824750900269,grad_norm: 0.999999512832014, iteration: 419094
loss: 1.0451734066009521,grad_norm: 1.0000000444929562, iteration: 419095
loss: 1.0055818557739258,grad_norm: 0.8445569773749547, iteration: 419096
loss: 1.0818352699279785,grad_norm: 0.9999997351588465, iteration: 419097
loss: 1.1334890127182007,grad_norm: 0.9999997523708255, iteration: 419098
loss: 1.0254578590393066,grad_norm: 0.9999993267058405, iteration: 419099
loss: 0.9766168594360352,grad_norm: 0.7637653240795446, iteration: 419100
loss: 1.0387932062149048,grad_norm: 0.7893895255691368, iteration: 419101
loss: 1.173656702041626,grad_norm: 0.9999998589386053, iteration: 419102
loss: 0.9811669588088989,grad_norm: 0.729908522882349, iteration: 419103
loss: 1.0606681108474731,grad_norm: 0.9999991346350003, iteration: 419104
loss: 1.0798717737197876,grad_norm: 0.8766130927665314, iteration: 419105
loss: 0.99573814868927,grad_norm: 0.8882714437042251, iteration: 419106
loss: 1.0351134538650513,grad_norm: 0.9999990080997462, iteration: 419107
loss: 1.0574913024902344,grad_norm: 0.8568508370608382, iteration: 419108
loss: 1.0589109659194946,grad_norm: 0.7720137066703179, iteration: 419109
loss: 0.9835964441299438,grad_norm: 0.9363573463850207, iteration: 419110
loss: 1.109035849571228,grad_norm: 0.9999995026347118, iteration: 419111
loss: 0.9966962337493896,grad_norm: 0.9728774979725743, iteration: 419112
loss: 1.0064964294433594,grad_norm: 0.9657026795741646, iteration: 419113
loss: 0.9471803307533264,grad_norm: 0.7453283094184554, iteration: 419114
loss: 1.0013859272003174,grad_norm: 0.7761152026949626, iteration: 419115
loss: 1.0092839002609253,grad_norm: 0.9999992872847318, iteration: 419116
loss: 1.0146743059158325,grad_norm: 0.9999989681240877, iteration: 419117
loss: 0.9930988550186157,grad_norm: 0.7637761243525641, iteration: 419118
loss: 0.9794407486915588,grad_norm: 0.6860200338402709, iteration: 419119
loss: 1.039497971534729,grad_norm: 0.7477880952595172, iteration: 419120
loss: 1.0341262817382812,grad_norm: 0.6118489586855319, iteration: 419121
loss: 1.13571035861969,grad_norm: 0.9999990788439278, iteration: 419122
loss: 1.00872004032135,grad_norm: 0.9999999830347108, iteration: 419123
loss: 1.0011991262435913,grad_norm: 0.8834552195839857, iteration: 419124
loss: 1.0772558450698853,grad_norm: 0.8481540229433064, iteration: 419125
loss: 1.042051076889038,grad_norm: 0.7343044813090419, iteration: 419126
loss: 1.0623507499694824,grad_norm: 0.7978470338119178, iteration: 419127
loss: 1.025814414024353,grad_norm: 0.9200974792332978, iteration: 419128
loss: 1.0001786947250366,grad_norm: 0.9999990373797621, iteration: 419129
loss: 1.0132490396499634,grad_norm: 0.6062907676035821, iteration: 419130
loss: 1.0707862377166748,grad_norm: 0.712725466901507, iteration: 419131
loss: 1.0691763162612915,grad_norm: 0.9999995783996894, iteration: 419132
loss: 1.0428516864776611,grad_norm: 0.7176431267938579, iteration: 419133
loss: 1.0449419021606445,grad_norm: 0.999999108041503, iteration: 419134
loss: 1.1176059246063232,grad_norm: 0.7451067069246505, iteration: 419135
loss: 1.1393202543258667,grad_norm: 0.7674415381437624, iteration: 419136
loss: 1.0981495380401611,grad_norm: 0.9999998447834523, iteration: 419137
loss: 1.0046073198318481,grad_norm: 0.7083836856368309, iteration: 419138
loss: 1.0358108282089233,grad_norm: 0.9999996825177639, iteration: 419139
loss: 1.0727477073669434,grad_norm: 0.9999996070204036, iteration: 419140
loss: 1.1811933517456055,grad_norm: 0.9999990550038436, iteration: 419141
loss: 1.033619999885559,grad_norm: 0.7814868174383783, iteration: 419142
loss: 1.0455304384231567,grad_norm: 0.7246133553942959, iteration: 419143
loss: 1.0208983421325684,grad_norm: 0.7726715849351823, iteration: 419144
loss: 1.032588005065918,grad_norm: 0.9999994780663389, iteration: 419145
loss: 1.0018953084945679,grad_norm: 0.7372425545000758, iteration: 419146
loss: 1.1226664781570435,grad_norm: 0.9999996546662767, iteration: 419147
loss: 1.0383089780807495,grad_norm: 0.9999991359112348, iteration: 419148
loss: 1.1339958906173706,grad_norm: 0.9999994359807148, iteration: 419149
loss: 1.0698843002319336,grad_norm: 0.8292200459254139, iteration: 419150
loss: 1.0712312459945679,grad_norm: 0.9999996005098492, iteration: 419151
loss: 1.0351673364639282,grad_norm: 0.9999993074863274, iteration: 419152
loss: 1.0546433925628662,grad_norm: 0.9999990491608811, iteration: 419153
loss: 1.0427665710449219,grad_norm: 0.9999992385590987, iteration: 419154
loss: 0.9713696837425232,grad_norm: 0.9999992181901086, iteration: 419155
loss: 1.018119215965271,grad_norm: 0.8292305633306697, iteration: 419156
loss: 1.0999013185501099,grad_norm: 0.8712793068697205, iteration: 419157
loss: 1.0115832090377808,grad_norm: 0.9999993178035146, iteration: 419158
loss: 1.1111061573028564,grad_norm: 0.9999996597323834, iteration: 419159
loss: 1.0185929536819458,grad_norm: 0.9999991675144645, iteration: 419160
loss: 1.101075291633606,grad_norm: 0.9999993951452479, iteration: 419161
loss: 1.0377511978149414,grad_norm: 0.8397963550069826, iteration: 419162
loss: 1.1227110624313354,grad_norm: 0.9173034611189997, iteration: 419163
loss: 0.9848668575286865,grad_norm: 0.6782330040156019, iteration: 419164
loss: 1.1883541345596313,grad_norm: 0.9999997360967033, iteration: 419165
loss: 1.0749564170837402,grad_norm: 0.9999999734516682, iteration: 419166
loss: 1.1995915174484253,grad_norm: 0.9999994330975518, iteration: 419167
loss: 1.1903431415557861,grad_norm: 0.9999992996333453, iteration: 419168
loss: 1.0774353742599487,grad_norm: 0.7743118260100649, iteration: 419169
loss: 1.1274837255477905,grad_norm: 0.9999999097571522, iteration: 419170
loss: 1.1149120330810547,grad_norm: 0.9999998018590057, iteration: 419171
loss: 1.329085111618042,grad_norm: 0.9999999103853702, iteration: 419172
loss: 1.2157893180847168,grad_norm: 0.9999999285445599, iteration: 419173
loss: 1.0751616954803467,grad_norm: 1.0000000100065594, iteration: 419174
loss: 1.216709852218628,grad_norm: 0.9999997187728958, iteration: 419175
loss: 1.0758987665176392,grad_norm: 0.9999995549329977, iteration: 419176
loss: 1.0416929721832275,grad_norm: 0.9999991894135217, iteration: 419177
loss: 1.0093742609024048,grad_norm: 0.999999934923593, iteration: 419178
loss: 1.1929693222045898,grad_norm: 0.9999998802891191, iteration: 419179
loss: 1.0704742670059204,grad_norm: 0.9999991590249516, iteration: 419180
loss: 1.25260591506958,grad_norm: 0.9999995576051978, iteration: 419181
loss: 1.05302894115448,grad_norm: 0.8666981372088827, iteration: 419182
loss: 1.3611748218536377,grad_norm: 0.9999999200491081, iteration: 419183
loss: 1.1076973676681519,grad_norm: 0.9999997621422259, iteration: 419184
loss: 1.1457383632659912,grad_norm: 0.9999995677423084, iteration: 419185
loss: 1.046790599822998,grad_norm: 0.9999996624630562, iteration: 419186
loss: 1.0576331615447998,grad_norm: 0.9999997055433005, iteration: 419187
loss: 1.0714486837387085,grad_norm: 0.8450643060916452, iteration: 419188
loss: 1.1019481420516968,grad_norm: 0.9999995973037414, iteration: 419189
loss: 1.142726182937622,grad_norm: 0.9999994072211656, iteration: 419190
loss: 1.3271911144256592,grad_norm: 0.999999736969689, iteration: 419191
loss: 1.1201719045639038,grad_norm: 0.9679248984008293, iteration: 419192
loss: 1.1105681657791138,grad_norm: 0.9257359013374916, iteration: 419193
loss: 1.1278184652328491,grad_norm: 0.9999991449157664, iteration: 419194
loss: 0.9920729398727417,grad_norm: 0.9999992142518549, iteration: 419195
loss: 1.1645797491073608,grad_norm: 0.9999999908608878, iteration: 419196
loss: 1.1625295877456665,grad_norm: 0.9999989559089952, iteration: 419197
loss: 1.138098120689392,grad_norm: 0.9724630882502063, iteration: 419198
loss: 1.1353228092193604,grad_norm: 0.9999993570801675, iteration: 419199
loss: 1.1347370147705078,grad_norm: 0.9999991088742028, iteration: 419200
loss: 1.0179617404937744,grad_norm: 0.9999991803779291, iteration: 419201
loss: 1.0857954025268555,grad_norm: 0.999999410500834, iteration: 419202
loss: 1.0966469049453735,grad_norm: 0.7264005977192365, iteration: 419203
loss: 1.0847347974777222,grad_norm: 0.9999996782837207, iteration: 419204
loss: 1.0842276811599731,grad_norm: 0.9999998856376686, iteration: 419205
loss: 1.1506088972091675,grad_norm: 0.9999993609700644, iteration: 419206
loss: 1.1404403448104858,grad_norm: 0.999999749507199, iteration: 419207
loss: 0.9941284656524658,grad_norm: 0.9003977883011683, iteration: 419208
loss: 1.146647572517395,grad_norm: 0.914959235969424, iteration: 419209
loss: 1.6607413291931152,grad_norm: 0.9999998109042604, iteration: 419210
loss: 1.0066694021224976,grad_norm: 0.9999998851971588, iteration: 419211
loss: 1.1558756828308105,grad_norm: 0.9999997732593013, iteration: 419212
loss: 1.1061917543411255,grad_norm: 0.9999998609183237, iteration: 419213
loss: 1.0637251138687134,grad_norm: 0.657180503446303, iteration: 419214
loss: 1.3569836616516113,grad_norm: 0.9999995046661379, iteration: 419215
loss: 1.080032467842102,grad_norm: 0.9999992267402141, iteration: 419216
loss: 1.2043004035949707,grad_norm: 0.9999993040364005, iteration: 419217
loss: 1.1621981859207153,grad_norm: 1.0000001121905344, iteration: 419218
loss: 1.1216810941696167,grad_norm: 0.9999994845008597, iteration: 419219
loss: 1.0009840726852417,grad_norm: 0.9999992220423033, iteration: 419220
loss: 1.304344892501831,grad_norm: 0.9999998130086162, iteration: 419221
loss: 1.286324381828308,grad_norm: 0.9999996432522875, iteration: 419222
loss: 1.12814462184906,grad_norm: 0.9999991133765467, iteration: 419223
loss: 1.079066514968872,grad_norm: 0.9985995096699778, iteration: 419224
loss: 1.106827735900879,grad_norm: 0.9999992273455461, iteration: 419225
loss: 1.1692259311676025,grad_norm: 0.9999997412697381, iteration: 419226
loss: 1.1492469310760498,grad_norm: 0.9999999550365337, iteration: 419227
loss: 1.1166248321533203,grad_norm: 0.9999995442962656, iteration: 419228
loss: 1.1462211608886719,grad_norm: 0.9999999282591834, iteration: 419229
loss: 1.1144009828567505,grad_norm: 0.9999995547659335, iteration: 419230
loss: 1.1472978591918945,grad_norm: 0.999999663124788, iteration: 419231
loss: 1.388593316078186,grad_norm: 0.9999999066658325, iteration: 419232
loss: 1.2043845653533936,grad_norm: 0.999999437447873, iteration: 419233
loss: 1.0828720331192017,grad_norm: 0.9999994305746407, iteration: 419234
loss: 1.133252739906311,grad_norm: 1.000000122022497, iteration: 419235
loss: 1.1398842334747314,grad_norm: 0.9999996964897906, iteration: 419236
loss: 1.1588410139083862,grad_norm: 0.9999995340844761, iteration: 419237
loss: 1.1683869361877441,grad_norm: 0.9999999959146292, iteration: 419238
loss: 1.192039966583252,grad_norm: 0.9999994397067805, iteration: 419239
loss: 1.1191730499267578,grad_norm: 0.9999997781826128, iteration: 419240
loss: 1.3522626161575317,grad_norm: 0.9999998912818128, iteration: 419241
loss: 1.2912653684616089,grad_norm: 0.9999998650102172, iteration: 419242
loss: 1.077295184135437,grad_norm: 0.9999996888560355, iteration: 419243
loss: 1.2801886796951294,grad_norm: 0.9999997702238393, iteration: 419244
loss: 1.2571450471878052,grad_norm: 0.9999991548256825, iteration: 419245
loss: 1.1284786462783813,grad_norm: 0.9622328250926357, iteration: 419246
loss: 1.0456290245056152,grad_norm: 0.9999999969761115, iteration: 419247
loss: 1.1506588459014893,grad_norm: 0.9999992374960301, iteration: 419248
loss: 1.1398661136627197,grad_norm: 0.9999998457552786, iteration: 419249
loss: 1.0650653839111328,grad_norm: 0.7788765999301454, iteration: 419250
loss: 0.9891000986099243,grad_norm: 0.9999999175209693, iteration: 419251
loss: 1.1032962799072266,grad_norm: 0.9999995349101319, iteration: 419252
loss: 1.2449179887771606,grad_norm: 0.9999998154802847, iteration: 419253
loss: 1.1075470447540283,grad_norm: 0.9999991715455859, iteration: 419254
loss: 1.2288285493850708,grad_norm: 0.9999999453989675, iteration: 419255
loss: 1.0094339847564697,grad_norm: 0.9999990570713834, iteration: 419256
loss: 1.1522046327590942,grad_norm: 0.9999997997261569, iteration: 419257
loss: 1.071697473526001,grad_norm: 0.9193762339810585, iteration: 419258
loss: 1.0672917366027832,grad_norm: 0.9999993271138674, iteration: 419259
loss: 1.0417060852050781,grad_norm: 0.9999995679150518, iteration: 419260
loss: 1.0268073081970215,grad_norm: 0.9999996146270745, iteration: 419261
loss: 1.0796602964401245,grad_norm: 0.9999995375960011, iteration: 419262
loss: 1.0625946521759033,grad_norm: 0.8837449402514522, iteration: 419263
loss: 1.2531569004058838,grad_norm: 0.9999994605572787, iteration: 419264
loss: 1.0599488019943237,grad_norm: 0.9999999192031372, iteration: 419265
loss: 1.1605849266052246,grad_norm: 0.9999997919585686, iteration: 419266
loss: 1.051451563835144,grad_norm: 0.9999992778273388, iteration: 419267
loss: 1.017078161239624,grad_norm: 0.9999999659604195, iteration: 419268
loss: 1.0411193370819092,grad_norm: 0.9999996495491819, iteration: 419269
loss: 1.12959623336792,grad_norm: 0.9999996990477839, iteration: 419270
loss: 1.0294921398162842,grad_norm: 0.8711440269376477, iteration: 419271
loss: 1.1010732650756836,grad_norm: 0.9999993820151141, iteration: 419272
loss: 0.9944207072257996,grad_norm: 0.8826609779859493, iteration: 419273
loss: 1.34573233127594,grad_norm: 0.9999998483237139, iteration: 419274
loss: 1.1390572786331177,grad_norm: 0.9999995030541374, iteration: 419275
loss: 1.0995471477508545,grad_norm: 0.999999330803884, iteration: 419276
loss: 1.1337707042694092,grad_norm: 0.999999797618676, iteration: 419277
loss: 1.073436975479126,grad_norm: 0.9999990840514683, iteration: 419278
loss: 1.1512525081634521,grad_norm: 0.9999998574156749, iteration: 419279
loss: 1.0295028686523438,grad_norm: 0.9999992185815466, iteration: 419280
loss: 1.0581191778182983,grad_norm: 0.8393249011057692, iteration: 419281
loss: 1.113641381263733,grad_norm: 0.9999998775298439, iteration: 419282
loss: 1.088707685470581,grad_norm: 0.9999991247691494, iteration: 419283
loss: 1.0774829387664795,grad_norm: 0.9999998602619266, iteration: 419284
loss: 1.017586588859558,grad_norm: 0.9999992306717677, iteration: 419285
loss: 1.1557543277740479,grad_norm: 0.9999995652582455, iteration: 419286
loss: 1.0166622400283813,grad_norm: 0.999999102709764, iteration: 419287
loss: 1.0467631816864014,grad_norm: 0.7516182435617021, iteration: 419288
loss: 0.9824342131614685,grad_norm: 0.8976201679929297, iteration: 419289
loss: 1.109627604484558,grad_norm: 0.9999993079762954, iteration: 419290
loss: 1.0487931966781616,grad_norm: 0.9999998690942146, iteration: 419291
loss: 1.0705910921096802,grad_norm: 0.9999992670927124, iteration: 419292
loss: 1.1356927156448364,grad_norm: 0.9231492678312813, iteration: 419293
loss: 1.0398792028427124,grad_norm: 0.6880729322464266, iteration: 419294
loss: 1.084790587425232,grad_norm: 0.9999999128050436, iteration: 419295
loss: 1.0536937713623047,grad_norm: 0.9999999648175318, iteration: 419296
loss: 1.1661393642425537,grad_norm: 0.9999998547771073, iteration: 419297
loss: 1.0033379793167114,grad_norm: 0.8127452445708377, iteration: 419298
loss: 1.1887060403823853,grad_norm: 0.9999999419968807, iteration: 419299
loss: 1.104415774345398,grad_norm: 0.9999993464657587, iteration: 419300
loss: 1.250350832939148,grad_norm: 0.999999808229035, iteration: 419301
loss: 1.3031439781188965,grad_norm: 0.9999999797339073, iteration: 419302
loss: 1.0819096565246582,grad_norm: 0.9999997167926271, iteration: 419303
loss: 1.1317344903945923,grad_norm: 0.9999999370399755, iteration: 419304
loss: 1.0739996433258057,grad_norm: 0.9999994953694284, iteration: 419305
loss: 1.0709091424942017,grad_norm: 0.9999999120376067, iteration: 419306
loss: 1.1498552560806274,grad_norm: 0.9999994644760651, iteration: 419307
loss: 1.2416514158248901,grad_norm: 0.9999996750976958, iteration: 419308
loss: 1.1352020502090454,grad_norm: 0.9999997584034087, iteration: 419309
loss: 1.2669321298599243,grad_norm: 0.9999994068624581, iteration: 419310
loss: 1.147519588470459,grad_norm: 0.886015992722724, iteration: 419311
loss: 1.095975399017334,grad_norm: 0.9999991116348542, iteration: 419312
loss: 1.0385513305664062,grad_norm: 0.8663375140627464, iteration: 419313
loss: 1.0736446380615234,grad_norm: 0.9999999726107551, iteration: 419314
loss: 1.2371808290481567,grad_norm: 0.9999997872475869, iteration: 419315
loss: 1.0785133838653564,grad_norm: 0.9999993271729087, iteration: 419316
loss: 1.0794010162353516,grad_norm: 1.0000000919661352, iteration: 419317
loss: 1.061309576034546,grad_norm: 0.8638593819358402, iteration: 419318
loss: 1.1878165006637573,grad_norm: 0.999999678066884, iteration: 419319
loss: 1.2131285667419434,grad_norm: 0.9999998597025653, iteration: 419320
loss: 1.0340862274169922,grad_norm: 0.8159732131723832, iteration: 419321
loss: 1.0411536693572998,grad_norm: 0.8338739093144655, iteration: 419322
loss: 1.0421972274780273,grad_norm: 0.9999999814095618, iteration: 419323
loss: 0.9959903359413147,grad_norm: 0.9419559863118852, iteration: 419324
loss: 1.0160642862319946,grad_norm: 0.6705183516214274, iteration: 419325
loss: 1.0346494913101196,grad_norm: 0.9999991570474804, iteration: 419326
loss: 1.1397572755813599,grad_norm: 0.9999994014085909, iteration: 419327
loss: 1.0166078805923462,grad_norm: 0.9999991404950196, iteration: 419328
loss: 1.2320438623428345,grad_norm: 0.9999998483024277, iteration: 419329
loss: 1.0640628337860107,grad_norm: 0.8891664529818286, iteration: 419330
loss: 1.0139214992523193,grad_norm: 0.8144658190719472, iteration: 419331
loss: 1.0187604427337646,grad_norm: 0.8572257447571611, iteration: 419332
loss: 1.3528634309768677,grad_norm: 0.9999998042745916, iteration: 419333
loss: 1.0833877325057983,grad_norm: 0.8917857118438967, iteration: 419334
loss: 1.094272255897522,grad_norm: 0.9999998143984432, iteration: 419335
loss: 1.182206392288208,grad_norm: 0.9999995779994664, iteration: 419336
loss: 1.028889536857605,grad_norm: 0.9999998399574314, iteration: 419337
loss: 1.1109864711761475,grad_norm: 0.999999237998955, iteration: 419338
loss: 1.1026006937026978,grad_norm: 0.9999998035095138, iteration: 419339
loss: 1.029046654701233,grad_norm: 0.7629113931915324, iteration: 419340
loss: 0.978914201259613,grad_norm: 0.8578118707733869, iteration: 419341
loss: 1.1253467798233032,grad_norm: 0.9999996482710183, iteration: 419342
loss: 1.1337089538574219,grad_norm: 0.9999995795371617, iteration: 419343
loss: 1.0897529125213623,grad_norm: 0.9999997648017565, iteration: 419344
loss: 0.9792640805244446,grad_norm: 0.9575027005491636, iteration: 419345
loss: 1.1235172748565674,grad_norm: 0.9999993651251609, iteration: 419346
loss: 1.014350175857544,grad_norm: 0.9999995487570738, iteration: 419347
loss: 1.1811256408691406,grad_norm: 0.9999993500768857, iteration: 419348
loss: 1.0149509906768799,grad_norm: 0.9999990116672663, iteration: 419349
loss: 1.1005135774612427,grad_norm: 0.9999997179792532, iteration: 419350
loss: 1.1992833614349365,grad_norm: 0.9999995305502658, iteration: 419351
loss: 1.0021356344223022,grad_norm: 0.9999992646775324, iteration: 419352
loss: 1.0634998083114624,grad_norm: 0.9999999616649855, iteration: 419353
loss: 1.1651287078857422,grad_norm: 0.9999991201784845, iteration: 419354
loss: 0.9916560053825378,grad_norm: 0.9999993251639987, iteration: 419355
loss: 1.2320786714553833,grad_norm: 1.0000000198225603, iteration: 419356
loss: 1.1580586433410645,grad_norm: 1.0000000640963245, iteration: 419357
loss: 1.0383884906768799,grad_norm: 0.999999979050427, iteration: 419358
loss: 1.1083616018295288,grad_norm: 0.9999993517510876, iteration: 419359
loss: 1.1587492227554321,grad_norm: 0.9999996176336544, iteration: 419360
loss: 1.0000022649765015,grad_norm: 0.9999998066971417, iteration: 419361
loss: 1.048834204673767,grad_norm: 0.9999998753937785, iteration: 419362
loss: 1.0494308471679688,grad_norm: 0.9999997892833119, iteration: 419363
loss: 1.0845004320144653,grad_norm: 0.999999648386002, iteration: 419364
loss: 1.0153039693832397,grad_norm: 0.9999990382236228, iteration: 419365
loss: 1.0371949672698975,grad_norm: 0.9999996536357879, iteration: 419366
loss: 1.0857579708099365,grad_norm: 0.9999997466160913, iteration: 419367
loss: 1.0378328561782837,grad_norm: 0.9999991705147805, iteration: 419368
loss: 1.0271201133728027,grad_norm: 0.9999994367013576, iteration: 419369
loss: 1.0309418439865112,grad_norm: 0.9999989780858681, iteration: 419370
loss: 1.3306366205215454,grad_norm: 1.0000000091356176, iteration: 419371
loss: 1.0563234090805054,grad_norm: 0.99999937847176, iteration: 419372
loss: 1.0252199172973633,grad_norm: 0.9999997058951953, iteration: 419373
loss: 1.004785418510437,grad_norm: 0.8850759676394038, iteration: 419374
loss: 1.0533267259597778,grad_norm: 0.7726613244159266, iteration: 419375
loss: 1.01949143409729,grad_norm: 0.7810072407019304, iteration: 419376
loss: 1.148991346359253,grad_norm: 0.9999998680359271, iteration: 419377
loss: 1.0943015813827515,grad_norm: 0.9999998613192849, iteration: 419378
loss: 1.068718433380127,grad_norm: 0.9999993984168364, iteration: 419379
loss: 1.0550720691680908,grad_norm: 0.9999994587127765, iteration: 419380
loss: 0.9928930997848511,grad_norm: 0.7623193741996913, iteration: 419381
loss: 1.0529779195785522,grad_norm: 0.7486628087868751, iteration: 419382
loss: 1.036563515663147,grad_norm: 0.9999994696444205, iteration: 419383
loss: 1.1608115434646606,grad_norm: 0.9999999781099339, iteration: 419384
loss: 0.9567492008209229,grad_norm: 0.71992102681679, iteration: 419385
loss: 0.9910162091255188,grad_norm: 0.9999990907626789, iteration: 419386
loss: 1.0218418836593628,grad_norm: 0.8109247781167835, iteration: 419387
loss: 1.0184355974197388,grad_norm: 0.9372269346419487, iteration: 419388
loss: 0.998168408870697,grad_norm: 0.8688476986692961, iteration: 419389
loss: 0.9409064650535583,grad_norm: 0.8774224171124985, iteration: 419390
loss: 1.107710361480713,grad_norm: 0.9999996759306979, iteration: 419391
loss: 0.9655553698539734,grad_norm: 0.7365250766434837, iteration: 419392
loss: 1.0157908201217651,grad_norm: 0.999999867406433, iteration: 419393
loss: 1.1095322370529175,grad_norm: 0.9999992854641331, iteration: 419394
loss: 1.1339800357818604,grad_norm: 0.9999998672559841, iteration: 419395
loss: 1.025815725326538,grad_norm: 0.9999995072627943, iteration: 419396
loss: 1.0458416938781738,grad_norm: 0.9999999410467216, iteration: 419397
loss: 1.1060236692428589,grad_norm: 0.9999999121722153, iteration: 419398
loss: 1.1963067054748535,grad_norm: 0.9999993306578565, iteration: 419399
loss: 1.0864219665527344,grad_norm: 0.9999998418616743, iteration: 419400
loss: 1.0641634464263916,grad_norm: 0.9999997035678522, iteration: 419401
loss: 1.0432161092758179,grad_norm: 0.9999994626784824, iteration: 419402
loss: 1.0348871946334839,grad_norm: 0.9999997752951745, iteration: 419403
loss: 1.075045108795166,grad_norm: 0.999999912420296, iteration: 419404
loss: 1.1251261234283447,grad_norm: 0.8275705929593112, iteration: 419405
loss: 0.999089241027832,grad_norm: 0.9999998302067918, iteration: 419406
loss: 1.0064233541488647,grad_norm: 0.9999992359781431, iteration: 419407
loss: 1.1348211765289307,grad_norm: 0.9999995090979967, iteration: 419408
loss: 1.1149771213531494,grad_norm: 0.9999998375931693, iteration: 419409
loss: 0.9864435791969299,grad_norm: 0.9438070498701807, iteration: 419410
loss: 1.0480232238769531,grad_norm: 0.9999996749821284, iteration: 419411
loss: 1.2384392023086548,grad_norm: 0.999999947839349, iteration: 419412
loss: 1.0213199853897095,grad_norm: 0.7594574448441259, iteration: 419413
loss: 1.135028600692749,grad_norm: 0.999999898651495, iteration: 419414
loss: 1.0393259525299072,grad_norm: 0.9999991755168129, iteration: 419415
loss: 1.0386075973510742,grad_norm: 0.9999995657062428, iteration: 419416
loss: 1.1485682725906372,grad_norm: 0.9999995890551523, iteration: 419417
loss: 1.031076192855835,grad_norm: 0.999999951459969, iteration: 419418
loss: 1.096169114112854,grad_norm: 0.9999996582612354, iteration: 419419
loss: 1.0887806415557861,grad_norm: 0.9999996461828924, iteration: 419420
loss: 1.0989478826522827,grad_norm: 0.9429882684425515, iteration: 419421
loss: 1.256134033203125,grad_norm: 0.999999864501526, iteration: 419422
loss: 1.096354365348816,grad_norm: 0.9999994084403541, iteration: 419423
loss: 1.0889812707901,grad_norm: 0.9162622634322779, iteration: 419424
loss: 1.1910576820373535,grad_norm: 0.9999996871254528, iteration: 419425
loss: 1.1129454374313354,grad_norm: 0.9999996186211734, iteration: 419426
loss: 1.1722453832626343,grad_norm: 0.9999993344046327, iteration: 419427
loss: 1.3285220861434937,grad_norm: 0.9999998161833623, iteration: 419428
loss: 1.1240694522857666,grad_norm: 0.9999994675995388, iteration: 419429
loss: 1.0704939365386963,grad_norm: 1.000000003764423, iteration: 419430
loss: 1.2863929271697998,grad_norm: 0.9999997279529736, iteration: 419431
loss: 1.0477790832519531,grad_norm: 0.8377921292211262, iteration: 419432
loss: 1.2126941680908203,grad_norm: 0.9999999101044393, iteration: 419433
loss: 1.045754313468933,grad_norm: 0.9999998112429741, iteration: 419434
loss: 1.0836427211761475,grad_norm: 0.9401356110940667, iteration: 419435
loss: 1.036321997642517,grad_norm: 0.99999913671221, iteration: 419436
loss: 1.0314136743545532,grad_norm: 0.9198490126799019, iteration: 419437
loss: 1.114900827407837,grad_norm: 0.9694363703026971, iteration: 419438
loss: 1.0204589366912842,grad_norm: 0.9688751273649493, iteration: 419439
loss: 1.0090028047561646,grad_norm: 0.8751736045868541, iteration: 419440
loss: 1.1558558940887451,grad_norm: 0.9999997410482866, iteration: 419441
loss: 1.043272852897644,grad_norm: 0.9601160950193173, iteration: 419442
loss: 1.0447795391082764,grad_norm: 0.8624361121766911, iteration: 419443
loss: 1.1563752889633179,grad_norm: 0.9999991894396673, iteration: 419444
loss: 1.145901083946228,grad_norm: 0.999999575218302, iteration: 419445
loss: 1.0284160375595093,grad_norm: 0.9999994440428758, iteration: 419446
loss: 1.0713222026824951,grad_norm: 0.9999994860721814, iteration: 419447
loss: 1.034946322441101,grad_norm: 0.9999995782824724, iteration: 419448
loss: 1.0978803634643555,grad_norm: 0.9999997739244262, iteration: 419449
loss: 1.1111409664154053,grad_norm: 0.9999997122315911, iteration: 419450
loss: 1.1513673067092896,grad_norm: 0.9999999808364649, iteration: 419451
loss: 1.204902172088623,grad_norm: 0.9999997143833158, iteration: 419452
loss: 1.0033599138259888,grad_norm: 0.9999999611069827, iteration: 419453
loss: 1.1051851511001587,grad_norm: 0.9999998478397372, iteration: 419454
loss: 1.0809905529022217,grad_norm: 0.9999992092155952, iteration: 419455
loss: 1.0633842945098877,grad_norm: 0.8858592908680338, iteration: 419456
loss: 1.0789204835891724,grad_norm: 0.8094968339513456, iteration: 419457
loss: 1.1839052438735962,grad_norm: 0.9999993828918141, iteration: 419458
loss: 1.0119961500167847,grad_norm: 0.9999991416824254, iteration: 419459
loss: 1.1755783557891846,grad_norm: 1.000000009271525, iteration: 419460
loss: 1.1436940431594849,grad_norm: 0.9999992062712207, iteration: 419461
loss: 1.2129384279251099,grad_norm: 0.999999897285262, iteration: 419462
loss: 1.045048475265503,grad_norm: 0.9999994747722478, iteration: 419463
loss: 1.0779294967651367,grad_norm: 0.9999992207183064, iteration: 419464
loss: 1.0924227237701416,grad_norm: 0.9999996192682998, iteration: 419465
loss: 1.0813583135604858,grad_norm: 0.9999991468198459, iteration: 419466
loss: 1.068662405014038,grad_norm: 0.9999991359415055, iteration: 419467
loss: 1.0652711391448975,grad_norm: 0.999999931014315, iteration: 419468
loss: 1.0530110597610474,grad_norm: 0.9070724676189436, iteration: 419469
loss: 1.0005674362182617,grad_norm: 0.9061259638856377, iteration: 419470
loss: 1.0008667707443237,grad_norm: 0.9245557484735578, iteration: 419471
loss: 1.0975865125656128,grad_norm: 0.7260197273958269, iteration: 419472
loss: 1.1710853576660156,grad_norm: 0.999999596419421, iteration: 419473
loss: 1.0489023923873901,grad_norm: 0.9999994494330899, iteration: 419474
loss: 1.131014108657837,grad_norm: 0.9999998499721043, iteration: 419475
loss: 1.0767098665237427,grad_norm: 0.8877830274504859, iteration: 419476
loss: 1.0609097480773926,grad_norm: 0.9999997681946344, iteration: 419477
loss: 0.9726322293281555,grad_norm: 0.7859651422222466, iteration: 419478
loss: 1.144339680671692,grad_norm: 1.000000032548838, iteration: 419479
loss: 1.141408920288086,grad_norm: 0.99999995011928, iteration: 419480
loss: 1.0081158876419067,grad_norm: 0.8145811995138357, iteration: 419481
loss: 1.00368332862854,grad_norm: 0.8216695664751275, iteration: 419482
loss: 1.005033254623413,grad_norm: 0.7956251455647618, iteration: 419483
loss: 1.0087974071502686,grad_norm: 0.9559980721215394, iteration: 419484
loss: 1.0138750076293945,grad_norm: 0.930973654824305, iteration: 419485
loss: 1.0357390642166138,grad_norm: 0.7758045518066333, iteration: 419486
loss: 1.0209989547729492,grad_norm: 0.7895301625844008, iteration: 419487
loss: 1.0240826606750488,grad_norm: 0.7557660242170813, iteration: 419488
loss: 1.031415581703186,grad_norm: 0.9999991408995722, iteration: 419489
loss: 1.0270576477050781,grad_norm: 0.894926115068104, iteration: 419490
loss: 1.138248085975647,grad_norm: 0.9999991796842131, iteration: 419491
loss: 1.029895305633545,grad_norm: 0.7168809794545402, iteration: 419492
loss: 1.0194069147109985,grad_norm: 0.8381228239769304, iteration: 419493
loss: 1.0263793468475342,grad_norm: 0.999999704152037, iteration: 419494
loss: 1.008416771888733,grad_norm: 0.9999997450018313, iteration: 419495
loss: 0.9678946137428284,grad_norm: 0.8660146210015276, iteration: 419496
loss: 1.015830397605896,grad_norm: 0.824357905459888, iteration: 419497
loss: 1.0946096181869507,grad_norm: 0.9999999547755914, iteration: 419498
loss: 0.9990095496177673,grad_norm: 0.7731405656950205, iteration: 419499
loss: 1.0380301475524902,grad_norm: 0.9999993150271476, iteration: 419500
loss: 1.042635202407837,grad_norm: 0.9999998291818197, iteration: 419501
loss: 1.0366716384887695,grad_norm: 0.9999992732154493, iteration: 419502
loss: 1.0043447017669678,grad_norm: 0.8170026545185017, iteration: 419503
loss: 1.001417636871338,grad_norm: 0.99999967858606, iteration: 419504
loss: 0.9823102951049805,grad_norm: 0.9999996911978003, iteration: 419505
loss: 1.116020917892456,grad_norm: 0.999999438695202, iteration: 419506
loss: 0.981333315372467,grad_norm: 0.8738949768757809, iteration: 419507
loss: 1.0303564071655273,grad_norm: 0.9999993462821278, iteration: 419508
loss: 1.0106148719787598,grad_norm: 0.8901434907657247, iteration: 419509
loss: 1.0102152824401855,grad_norm: 0.6762149989713029, iteration: 419510
loss: 1.0439772605895996,grad_norm: 0.8036952817186285, iteration: 419511
loss: 1.0554991960525513,grad_norm: 0.839155670500396, iteration: 419512
loss: 1.0189790725708008,grad_norm: 0.9999990326172095, iteration: 419513
loss: 1.0344072580337524,grad_norm: 0.7906010005957629, iteration: 419514
loss: 0.9985612630844116,grad_norm: 0.9999998387614153, iteration: 419515
loss: 1.03309166431427,grad_norm: 0.9114813894172089, iteration: 419516
loss: 1.0053658485412598,grad_norm: 0.7798063914397805, iteration: 419517
loss: 0.9873483777046204,grad_norm: 0.7520961148968036, iteration: 419518
loss: 0.9954912066459656,grad_norm: 0.8681806393271415, iteration: 419519
loss: 0.9555710554122925,grad_norm: 0.7760352649858258, iteration: 419520
loss: 1.0098350048065186,grad_norm: 0.9999993682105499, iteration: 419521
loss: 1.002221941947937,grad_norm: 0.8013860495272935, iteration: 419522
loss: 0.9849600195884705,grad_norm: 0.7783525887929352, iteration: 419523
loss: 1.0395573377609253,grad_norm: 0.9999998608733672, iteration: 419524
loss: 1.0386053323745728,grad_norm: 0.6967522871612684, iteration: 419525
loss: 1.0227181911468506,grad_norm: 0.727440850599329, iteration: 419526
loss: 0.9991931319236755,grad_norm: 0.7679707661452252, iteration: 419527
loss: 1.010289192199707,grad_norm: 0.897958043800908, iteration: 419528
loss: 1.0679253339767456,grad_norm: 0.9999991513635299, iteration: 419529
loss: 0.9955036640167236,grad_norm: 0.9312769466660569, iteration: 419530
loss: 0.996049702167511,grad_norm: 0.8405744235666693, iteration: 419531
loss: 1.1035298109054565,grad_norm: 0.9999995852648753, iteration: 419532
loss: 1.064908742904663,grad_norm: 0.9999991844820559, iteration: 419533
loss: 0.9970510005950928,grad_norm: 0.999999952362954, iteration: 419534
loss: 1.032901644706726,grad_norm: 0.9999996590539909, iteration: 419535
loss: 0.973529577255249,grad_norm: 0.7337805997558194, iteration: 419536
loss: 0.9816571474075317,grad_norm: 0.9999991509544455, iteration: 419537
loss: 0.9809537529945374,grad_norm: 0.6908241878879826, iteration: 419538
loss: 1.0726884603500366,grad_norm: 0.9999992812630146, iteration: 419539
loss: 0.9869236350059509,grad_norm: 0.8274388392878053, iteration: 419540
loss: 0.9939010143280029,grad_norm: 0.600374895477464, iteration: 419541
loss: 0.9897216558456421,grad_norm: 0.9999991083673804, iteration: 419542
loss: 1.0082212686538696,grad_norm: 0.7840468762480266, iteration: 419543
loss: 1.0033245086669922,grad_norm: 0.7341320980685445, iteration: 419544
loss: 1.0331345796585083,grad_norm: 0.8215449431443034, iteration: 419545
loss: 1.0130467414855957,grad_norm: 0.7176780817741003, iteration: 419546
loss: 1.0081956386566162,grad_norm: 0.999999165904146, iteration: 419547
loss: 1.091878890991211,grad_norm: 0.9597796984152901, iteration: 419548
loss: 1.0879411697387695,grad_norm: 0.9999995377104741, iteration: 419549
loss: 1.0577194690704346,grad_norm: 0.9999997534108541, iteration: 419550
loss: 1.0119199752807617,grad_norm: 0.7050697234879799, iteration: 419551
loss: 1.057692289352417,grad_norm: 0.999999753063228, iteration: 419552
loss: 0.9947627186775208,grad_norm: 0.8164100647672771, iteration: 419553
loss: 1.0167522430419922,grad_norm: 0.8613095862397339, iteration: 419554
loss: 1.0028311014175415,grad_norm: 0.9999999556743332, iteration: 419555
loss: 0.983929455280304,grad_norm: 0.7855361046791861, iteration: 419556
loss: 1.0147743225097656,grad_norm: 0.7365190134231344, iteration: 419557
loss: 1.1091643571853638,grad_norm: 0.8218575622688112, iteration: 419558
loss: 1.0101747512817383,grad_norm: 0.9999991118017684, iteration: 419559
loss: 1.0103598833084106,grad_norm: 0.6524291500401991, iteration: 419560
loss: 1.0083889961242676,grad_norm: 0.7504435079945511, iteration: 419561
loss: 0.9888767600059509,grad_norm: 0.7360139524284158, iteration: 419562
loss: 1.0415905714035034,grad_norm: 0.9999995398457483, iteration: 419563
loss: 1.0312001705169678,grad_norm: 0.9999999272447457, iteration: 419564
loss: 1.2006546258926392,grad_norm: 0.9999998656430087, iteration: 419565
loss: 1.026995062828064,grad_norm: 0.9695445128985328, iteration: 419566
loss: 1.0047986507415771,grad_norm: 0.8679220292329158, iteration: 419567
loss: 0.9852273464202881,grad_norm: 0.8017931185298193, iteration: 419568
loss: 1.041395902633667,grad_norm: 0.874311078540066, iteration: 419569
loss: 1.0352067947387695,grad_norm: 0.9999997316157017, iteration: 419570
loss: 1.0115448236465454,grad_norm: 0.7449400498162495, iteration: 419571
loss: 0.9794003367424011,grad_norm: 0.8362671462107478, iteration: 419572
loss: 1.0270137786865234,grad_norm: 0.9569402599699327, iteration: 419573
loss: 0.9792285561561584,grad_norm: 0.7307832536937191, iteration: 419574
loss: 1.1663681268692017,grad_norm: 0.9999998149380463, iteration: 419575
loss: 0.9965184926986694,grad_norm: 0.6861229001893099, iteration: 419576
loss: 0.9869434833526611,grad_norm: 0.807671072610968, iteration: 419577
loss: 0.9831025004386902,grad_norm: 0.753024112121373, iteration: 419578
loss: 0.9793590903282166,grad_norm: 0.8333496765255132, iteration: 419579
loss: 1.046332597732544,grad_norm: 0.8122471462749975, iteration: 419580
loss: 1.0546205043792725,grad_norm: 0.6871587854291795, iteration: 419581
loss: 0.990744411945343,grad_norm: 0.6335594918377553, iteration: 419582
loss: 0.9942382574081421,grad_norm: 0.9189601962696928, iteration: 419583
loss: 1.069220781326294,grad_norm: 1.0000000170098255, iteration: 419584
loss: 0.9471526741981506,grad_norm: 0.8999044233289762, iteration: 419585
loss: 1.1030162572860718,grad_norm: 0.9999993889098087, iteration: 419586
loss: 1.0093275308609009,grad_norm: 0.9999993193520083, iteration: 419587
loss: 1.1073241233825684,grad_norm: 0.9999999119658426, iteration: 419588
loss: 0.9970498085021973,grad_norm: 0.8951542639501452, iteration: 419589
loss: 0.9871512055397034,grad_norm: 0.8090857345417166, iteration: 419590
loss: 1.0376338958740234,grad_norm: 0.8333733211214199, iteration: 419591
loss: 1.0270236730575562,grad_norm: 0.8232711844728325, iteration: 419592
loss: 1.046405553817749,grad_norm: 0.9999994033504181, iteration: 419593
loss: 0.9653268456459045,grad_norm: 0.6940810849791962, iteration: 419594
loss: 1.011334776878357,grad_norm: 0.7732444321659673, iteration: 419595
loss: 1.0562068223953247,grad_norm: 0.9999992146245766, iteration: 419596
loss: 1.0276520252227783,grad_norm: 0.8023600600837457, iteration: 419597
loss: 1.0706889629364014,grad_norm: 0.9999991346475189, iteration: 419598
loss: 0.9807388186454773,grad_norm: 0.7273688037972559, iteration: 419599
loss: 1.013190746307373,grad_norm: 0.6880708639747598, iteration: 419600
loss: 1.0142468214035034,grad_norm: 0.7907329184068708, iteration: 419601
loss: 1.0465459823608398,grad_norm: 0.7554355583449155, iteration: 419602
loss: 1.0146751403808594,grad_norm: 0.939818524837841, iteration: 419603
loss: 0.9942094683647156,grad_norm: 0.6682460610244062, iteration: 419604
loss: 1.0274399518966675,grad_norm: 0.9999991746913983, iteration: 419605
loss: 1.0125741958618164,grad_norm: 0.7556881049038973, iteration: 419606
loss: 1.0035866498947144,grad_norm: 0.8552053622792525, iteration: 419607
loss: 0.9938735961914062,grad_norm: 0.8389442125465858, iteration: 419608
loss: 0.9926954507827759,grad_norm: 0.6751877396830032, iteration: 419609
loss: 0.9977099895477295,grad_norm: 0.7499097778440736, iteration: 419610
loss: 0.990720808506012,grad_norm: 0.7725180681494442, iteration: 419611
loss: 1.020281195640564,grad_norm: 0.8211357565223614, iteration: 419612
loss: 1.083337426185608,grad_norm: 0.9014288367903641, iteration: 419613
loss: 1.0362931489944458,grad_norm: 0.7566045006524925, iteration: 419614
loss: 1.0552732944488525,grad_norm: 0.8985337218762953, iteration: 419615
loss: 1.0025382041931152,grad_norm: 0.9999994947539392, iteration: 419616
loss: 1.0493535995483398,grad_norm: 0.7840712724914638, iteration: 419617
loss: 1.0033353567123413,grad_norm: 0.7253288978130026, iteration: 419618
loss: 1.020334243774414,grad_norm: 0.8191907371856767, iteration: 419619
loss: 1.0118221044540405,grad_norm: 0.9106117462195047, iteration: 419620
loss: 1.0274978876113892,grad_norm: 0.8528330969003972, iteration: 419621
loss: 1.0682209730148315,grad_norm: 0.9085872299927658, iteration: 419622
loss: 1.0682834386825562,grad_norm: 0.8234673377480966, iteration: 419623
loss: 1.0570354461669922,grad_norm: 0.999998971147005, iteration: 419624
loss: 1.0616744756698608,grad_norm: 0.9999997223051798, iteration: 419625
loss: 1.046768307685852,grad_norm: 0.9616059366421413, iteration: 419626
loss: 1.0260047912597656,grad_norm: 0.9860735036233237, iteration: 419627
loss: 0.9906619787216187,grad_norm: 0.6932697741357716, iteration: 419628
loss: 1.0270572900772095,grad_norm: 0.9999993831629883, iteration: 419629
loss: 1.0228524208068848,grad_norm: 0.8895998512796057, iteration: 419630
loss: 0.96919846534729,grad_norm: 0.7861095724741838, iteration: 419631
loss: 1.0511866807937622,grad_norm: 0.9096872811969788, iteration: 419632
loss: 1.0040959119796753,grad_norm: 0.8993525714242699, iteration: 419633
loss: 0.9715876579284668,grad_norm: 0.7651300809674418, iteration: 419634
loss: 1.0197100639343262,grad_norm: 0.7591782620413946, iteration: 419635
loss: 0.9699536561965942,grad_norm: 0.766647503396877, iteration: 419636
loss: 0.995579183101654,grad_norm: 0.7969104697635361, iteration: 419637
loss: 1.0620204210281372,grad_norm: 0.7364826267878172, iteration: 419638
loss: 1.0035462379455566,grad_norm: 0.7022801479363772, iteration: 419639
loss: 0.9959515333175659,grad_norm: 0.831007865420549, iteration: 419640
loss: 0.9911074638366699,grad_norm: 0.8780749954715109, iteration: 419641
loss: 1.011527419090271,grad_norm: 0.8789160265351414, iteration: 419642
loss: 1.0357000827789307,grad_norm: 0.8823280713928783, iteration: 419643
loss: 0.9925013184547424,grad_norm: 0.7819744942015654, iteration: 419644
loss: 1.0514814853668213,grad_norm: 0.8541481187586465, iteration: 419645
loss: 1.0967178344726562,grad_norm: 0.8284289073793468, iteration: 419646
loss: 0.9993125796318054,grad_norm: 0.7311878380135974, iteration: 419647
loss: 1.022763729095459,grad_norm: 0.8093642892190609, iteration: 419648
loss: 1.0361824035644531,grad_norm: 0.8114622737440638, iteration: 419649
loss: 1.0182409286499023,grad_norm: 0.6857554971074437, iteration: 419650
loss: 0.9855376482009888,grad_norm: 0.7884394805914384, iteration: 419651
loss: 1.0368659496307373,grad_norm: 0.7708321026382565, iteration: 419652
loss: 0.9442787170410156,grad_norm: 0.6385702541504954, iteration: 419653
loss: 0.9875730872154236,grad_norm: 0.9178638254562058, iteration: 419654
loss: 0.9811253547668457,grad_norm: 0.696706997128519, iteration: 419655
loss: 0.9747826457023621,grad_norm: 0.7073901954669127, iteration: 419656
loss: 1.0132884979248047,grad_norm: 0.8619095234957722, iteration: 419657
loss: 1.221712350845337,grad_norm: 0.9999997697681253, iteration: 419658
loss: 1.0014647245407104,grad_norm: 0.9999991753329973, iteration: 419659
loss: 0.9986988306045532,grad_norm: 0.8369472243228506, iteration: 419660
loss: 0.9864642024040222,grad_norm: 0.8869560535851545, iteration: 419661
loss: 1.0243983268737793,grad_norm: 0.999999700314068, iteration: 419662
loss: 1.0148844718933105,grad_norm: 0.9999991954093114, iteration: 419663
loss: 1.0002413988113403,grad_norm: 0.8059776206695781, iteration: 419664
loss: 0.9932793974876404,grad_norm: 0.9999991285135992, iteration: 419665
loss: 1.0336813926696777,grad_norm: 0.7512414135190701, iteration: 419666
loss: 1.0376466512680054,grad_norm: 0.7591451709393131, iteration: 419667
loss: 1.1371616125106812,grad_norm: 0.9999994701588799, iteration: 419668
loss: 1.0643492937088013,grad_norm: 0.9999992825043056, iteration: 419669
loss: 1.0245649814605713,grad_norm: 0.8208397947565426, iteration: 419670
loss: 1.0404589176177979,grad_norm: 0.7352993441559534, iteration: 419671
loss: 1.0629689693450928,grad_norm: 0.999999863152751, iteration: 419672
loss: 1.0299450159072876,grad_norm: 0.9999992029253093, iteration: 419673
loss: 0.993963897228241,grad_norm: 0.9999989631933367, iteration: 419674
loss: 0.9893043041229248,grad_norm: 0.7740400543781671, iteration: 419675
loss: 1.0116283893585205,grad_norm: 0.9999993350482121, iteration: 419676
loss: 1.0176982879638672,grad_norm: 0.7749853103831216, iteration: 419677
loss: 1.010206937789917,grad_norm: 0.8108849751283809, iteration: 419678
loss: 1.023476481437683,grad_norm: 0.7129546299809564, iteration: 419679
loss: 1.0227733850479126,grad_norm: 0.7263301844817894, iteration: 419680
loss: 1.0365248918533325,grad_norm: 0.7653938775358345, iteration: 419681
loss: 1.064609408378601,grad_norm: 0.9999997498492397, iteration: 419682
loss: 0.9877909421920776,grad_norm: 0.819082785635209, iteration: 419683
loss: 1.040245771408081,grad_norm: 0.8205871957675465, iteration: 419684
loss: 1.005763053894043,grad_norm: 0.6733808551762452, iteration: 419685
loss: 1.0336732864379883,grad_norm: 0.8105565584730294, iteration: 419686
loss: 0.9829408526420593,grad_norm: 0.6846635171973657, iteration: 419687
loss: 1.0446163415908813,grad_norm: 0.9999995276496287, iteration: 419688
loss: 0.9745481610298157,grad_norm: 0.9999999051673207, iteration: 419689
loss: 1.0847463607788086,grad_norm: 0.8291674594402855, iteration: 419690
loss: 1.0015915632247925,grad_norm: 0.7434767025392544, iteration: 419691
loss: 1.0006130933761597,grad_norm: 0.8486728843716026, iteration: 419692
loss: 0.9781215786933899,grad_norm: 0.82359611303901, iteration: 419693
loss: 1.099710464477539,grad_norm: 0.9999994638865841, iteration: 419694
loss: 1.0764161348342896,grad_norm: 0.7513986165009978, iteration: 419695
loss: 1.001310110092163,grad_norm: 0.8428087578690566, iteration: 419696
loss: 1.0058040618896484,grad_norm: 0.957000329993, iteration: 419697
loss: 1.085282802581787,grad_norm: 0.7471762307821489, iteration: 419698
loss: 1.0192947387695312,grad_norm: 0.8982573476205534, iteration: 419699
loss: 1.026673674583435,grad_norm: 0.8414795582759116, iteration: 419700
loss: 1.0091837644577026,grad_norm: 0.9999994883923082, iteration: 419701
loss: 1.0272878408432007,grad_norm: 0.9999998189646677, iteration: 419702
loss: 1.0900696516036987,grad_norm: 0.9489505140542503, iteration: 419703
loss: 0.9979066848754883,grad_norm: 0.9241859418862127, iteration: 419704
loss: 1.0657055377960205,grad_norm: 0.9604062503967576, iteration: 419705
loss: 0.9877138137817383,grad_norm: 0.8707632810488287, iteration: 419706
loss: 1.0237817764282227,grad_norm: 0.9035374091087919, iteration: 419707
loss: 1.2301270961761475,grad_norm: 0.9999993733945031, iteration: 419708
loss: 0.9957485198974609,grad_norm: 0.7076534542180368, iteration: 419709
loss: 1.0371124744415283,grad_norm: 0.9999997178530327, iteration: 419710
loss: 1.0111316442489624,grad_norm: 0.7142129897464037, iteration: 419711
loss: 1.016426920890808,grad_norm: 0.9999993497373393, iteration: 419712
loss: 1.040229082107544,grad_norm: 0.8272129596478812, iteration: 419713
loss: 1.0976706743240356,grad_norm: 0.9999997919373282, iteration: 419714
loss: 1.0137461423873901,grad_norm: 0.9999991642828187, iteration: 419715
loss: 1.0006459951400757,grad_norm: 0.9999994552981454, iteration: 419716
loss: 1.0322163105010986,grad_norm: 0.9999991293022363, iteration: 419717
loss: 1.0083644390106201,grad_norm: 0.7590508601537327, iteration: 419718
loss: 1.0312575101852417,grad_norm: 0.6617850659562441, iteration: 419719
loss: 0.9760253429412842,grad_norm: 0.6449744771324396, iteration: 419720
loss: 1.0638272762298584,grad_norm: 0.9999998752321392, iteration: 419721
loss: 0.9838044047355652,grad_norm: 0.816368943758511, iteration: 419722
loss: 0.9724377989768982,grad_norm: 0.7869366246804711, iteration: 419723
loss: 1.0104610919952393,grad_norm: 0.7434002762170159, iteration: 419724
loss: 0.9754186272621155,grad_norm: 0.8069719244176882, iteration: 419725
loss: 1.1075876951217651,grad_norm: 0.9999996409126334, iteration: 419726
loss: 1.0370519161224365,grad_norm: 0.999998992002156, iteration: 419727
loss: 1.0124586820602417,grad_norm: 0.999999319292654, iteration: 419728
loss: 1.0268809795379639,grad_norm: 0.7104397245006775, iteration: 419729
loss: 1.020995855331421,grad_norm: 0.9775262662864251, iteration: 419730
loss: 0.9765769839286804,grad_norm: 0.9999995112595393, iteration: 419731
loss: 1.1470822095870972,grad_norm: 0.9999991131866778, iteration: 419732
loss: 1.089744210243225,grad_norm: 0.9999997493195526, iteration: 419733
loss: 0.9923373460769653,grad_norm: 0.9999998628154162, iteration: 419734
loss: 1.014922022819519,grad_norm: 0.6998932147558151, iteration: 419735
loss: 1.0772924423217773,grad_norm: 0.8867154037327911, iteration: 419736
loss: 1.00735342502594,grad_norm: 0.7331754409355011, iteration: 419737
loss: 1.000125765800476,grad_norm: 0.8201748223038545, iteration: 419738
loss: 1.112319827079773,grad_norm: 0.999999273424551, iteration: 419739
loss: 1.0546870231628418,grad_norm: 0.7203490591154794, iteration: 419740
loss: 1.0472744703292847,grad_norm: 0.999999401138541, iteration: 419741
loss: 0.9919100403785706,grad_norm: 0.8571701450367724, iteration: 419742
loss: 1.1103790998458862,grad_norm: 0.9999990905318532, iteration: 419743
loss: 1.0084373950958252,grad_norm: 0.7577111678127663, iteration: 419744
loss: 0.9970253705978394,grad_norm: 0.7233335448603655, iteration: 419745
loss: 1.0751184225082397,grad_norm: 0.9999990919370652, iteration: 419746
loss: 1.0427820682525635,grad_norm: 0.7136184453987456, iteration: 419747
loss: 1.0018346309661865,grad_norm: 0.8870157963327618, iteration: 419748
loss: 0.9866717457771301,grad_norm: 0.705929551993468, iteration: 419749
loss: 1.1986902952194214,grad_norm: 0.9999991720297743, iteration: 419750
loss: 1.0614888668060303,grad_norm: 0.8062876740138585, iteration: 419751
loss: 1.0239713191986084,grad_norm: 0.8147323399656824, iteration: 419752
loss: 1.0467995405197144,grad_norm: 0.9999993661479359, iteration: 419753
loss: 1.0112935304641724,grad_norm: 0.8118867563787093, iteration: 419754
loss: 1.1560076475143433,grad_norm: 0.8149577286657925, iteration: 419755
loss: 1.027617335319519,grad_norm: 0.9185811555178947, iteration: 419756
loss: 1.0004180669784546,grad_norm: 0.7785561835746441, iteration: 419757
loss: 1.0108449459075928,grad_norm: 0.9469277899436906, iteration: 419758
loss: 0.9980576038360596,grad_norm: 0.9999992534611462, iteration: 419759
loss: 0.9883679747581482,grad_norm: 0.6863992886373783, iteration: 419760
loss: 1.0008821487426758,grad_norm: 0.8602017061563031, iteration: 419761
loss: 1.0094077587127686,grad_norm: 0.7813178942551819, iteration: 419762
loss: 0.979790210723877,grad_norm: 0.8238685866717581, iteration: 419763
loss: 0.9799094796180725,grad_norm: 0.7826930600396304, iteration: 419764
loss: 0.9863734841346741,grad_norm: 0.7894066967375813, iteration: 419765
loss: 1.0726511478424072,grad_norm: 0.9120588381910283, iteration: 419766
loss: 0.979560911655426,grad_norm: 0.773784542694494, iteration: 419767
loss: 1.0589617490768433,grad_norm: 0.8954091292889063, iteration: 419768
loss: 1.0185903310775757,grad_norm: 0.9999997152634681, iteration: 419769
loss: 1.0565162897109985,grad_norm: 0.9999991544969271, iteration: 419770
loss: 1.0355888605117798,grad_norm: 0.9999991113747001, iteration: 419771
loss: 0.9989064335823059,grad_norm: 0.7239881046548109, iteration: 419772
loss: 0.9758038520812988,grad_norm: 0.8320579436454595, iteration: 419773
loss: 1.0001031160354614,grad_norm: 0.9999998883652289, iteration: 419774
loss: 1.0402185916900635,grad_norm: 0.8509082054513786, iteration: 419775
loss: 1.056801199913025,grad_norm: 0.9999991480665839, iteration: 419776
loss: 1.0542216300964355,grad_norm: 0.9265384006516647, iteration: 419777
loss: 0.9961571097373962,grad_norm: 0.7837810772833116, iteration: 419778
loss: 1.1031101942062378,grad_norm: 0.9046271916990467, iteration: 419779
loss: 1.017720341682434,grad_norm: 0.9999990592808872, iteration: 419780
loss: 1.000329613685608,grad_norm: 0.9999993374827758, iteration: 419781
loss: 1.1176518201828003,grad_norm: 0.9999995304234588, iteration: 419782
loss: 1.0366207361221313,grad_norm: 0.7295387730030086, iteration: 419783
loss: 1.1607838869094849,grad_norm: 0.8335326653464132, iteration: 419784
loss: 1.0115389823913574,grad_norm: 0.692953020640964, iteration: 419785
loss: 0.9850651621818542,grad_norm: 0.9016600695271214, iteration: 419786
loss: 0.973098874092102,grad_norm: 0.8101934583918844, iteration: 419787
loss: 1.0764538049697876,grad_norm: 0.9999990783532957, iteration: 419788
loss: 0.975817084312439,grad_norm: 0.7635497723463717, iteration: 419789
loss: 0.9926523566246033,grad_norm: 0.9999998954030058, iteration: 419790
loss: 1.0065563917160034,grad_norm: 0.7448025292156917, iteration: 419791
loss: 1.0904587507247925,grad_norm: 0.99999918943976, iteration: 419792
loss: 1.1291024684906006,grad_norm: 0.9999996525589807, iteration: 419793
loss: 1.0247067213058472,grad_norm: 0.744970153903299, iteration: 419794
loss: 0.9851922988891602,grad_norm: 0.9442295444311385, iteration: 419795
loss: 1.0624898672103882,grad_norm: 0.933270282964827, iteration: 419796
loss: 1.0397467613220215,grad_norm: 0.9999999503393638, iteration: 419797
loss: 1.0149850845336914,grad_norm: 0.8110084557478537, iteration: 419798
loss: 1.0673518180847168,grad_norm: 0.8106000692797425, iteration: 419799
loss: 1.0099612474441528,grad_norm: 0.7976933308175544, iteration: 419800
loss: 1.019059181213379,grad_norm: 0.7866822716836647, iteration: 419801
loss: 1.1044210195541382,grad_norm: 0.9999998324444722, iteration: 419802
loss: 0.9782065749168396,grad_norm: 0.9862924373011277, iteration: 419803
loss: 0.9985390305519104,grad_norm: 0.9999999118412444, iteration: 419804
loss: 1.0407077074050903,grad_norm: 0.8596450145711791, iteration: 419805
loss: 0.9956083297729492,grad_norm: 0.7084907155980535, iteration: 419806
loss: 0.9997234344482422,grad_norm: 0.9770499652982353, iteration: 419807
loss: 0.9847166538238525,grad_norm: 0.9172110451078217, iteration: 419808
loss: 0.9916719198226929,grad_norm: 0.7274156802036404, iteration: 419809
loss: 0.9664825797080994,grad_norm: 0.6889529345911604, iteration: 419810
loss: 1.0074172019958496,grad_norm: 0.7700809863752908, iteration: 419811
loss: 1.0409584045410156,grad_norm: 0.7741690892912786, iteration: 419812
loss: 1.1298940181732178,grad_norm: 0.9999996191694295, iteration: 419813
loss: 0.9991657733917236,grad_norm: 0.7188939314403909, iteration: 419814
loss: 0.9992364048957825,grad_norm: 0.6450452063035845, iteration: 419815
loss: 1.027236819267273,grad_norm: 0.6995403179132561, iteration: 419816
loss: 1.0465911626815796,grad_norm: 0.787839303776942, iteration: 419817
loss: 0.9751207828521729,grad_norm: 0.9999992644100115, iteration: 419818
loss: 1.0240226984024048,grad_norm: 0.8531352188660382, iteration: 419819
loss: 1.0365204811096191,grad_norm: 0.99999932132799, iteration: 419820
loss: 1.0022598505020142,grad_norm: 0.79704320319231, iteration: 419821
loss: 0.9981158375740051,grad_norm: 0.7234923653528701, iteration: 419822
loss: 1.0808279514312744,grad_norm: 0.9999999201287003, iteration: 419823
loss: 1.0195865631103516,grad_norm: 0.751922948866493, iteration: 419824
loss: 0.9961204528808594,grad_norm: 0.8605651607591863, iteration: 419825
loss: 1.0675169229507446,grad_norm: 0.9999994012940875, iteration: 419826
loss: 1.0035699605941772,grad_norm: 0.6834588102924657, iteration: 419827
loss: 0.9959602355957031,grad_norm: 0.9046924758877645, iteration: 419828
loss: 0.985826313495636,grad_norm: 0.803271020149285, iteration: 419829
loss: 1.0160963535308838,grad_norm: 0.8108512849863007, iteration: 419830
loss: 1.0148615837097168,grad_norm: 0.8092535878114464, iteration: 419831
loss: 1.005545973777771,grad_norm: 0.8640346582733769, iteration: 419832
loss: 0.968159556388855,grad_norm: 0.7628733289073975, iteration: 419833
loss: 1.0424654483795166,grad_norm: 0.8358888651842182, iteration: 419834
loss: 1.0052404403686523,grad_norm: 0.8488945179422742, iteration: 419835
loss: 1.2193658351898193,grad_norm: 0.8369832850899742, iteration: 419836
loss: 1.0396183729171753,grad_norm: 0.7060929877532698, iteration: 419837
loss: 0.9503326416015625,grad_norm: 0.7853759750619003, iteration: 419838
loss: 1.073035478591919,grad_norm: 0.9999999420474213, iteration: 419839
loss: 0.9899598956108093,grad_norm: 0.9999990903474237, iteration: 419840
loss: 1.1109338998794556,grad_norm: 0.965896545965756, iteration: 419841
loss: 1.0201663970947266,grad_norm: 0.7450263351612972, iteration: 419842
loss: 1.0676246881484985,grad_norm: 0.9999991115102688, iteration: 419843
loss: 1.0515694618225098,grad_norm: 0.7075957337524788, iteration: 419844
loss: 1.0272541046142578,grad_norm: 0.7945223506910487, iteration: 419845
loss: 1.1194955110549927,grad_norm: 0.999999789310847, iteration: 419846
loss: 1.0194461345672607,grad_norm: 0.7834302406069984, iteration: 419847
loss: 0.9855841994285583,grad_norm: 0.8364331767114895, iteration: 419848
loss: 0.9681534171104431,grad_norm: 0.6550947789926788, iteration: 419849
loss: 1.0322978496551514,grad_norm: 0.9257262473693308, iteration: 419850
loss: 1.0005043745040894,grad_norm: 0.8148209282047223, iteration: 419851
loss: 1.0015137195587158,grad_norm: 0.8274859680551794, iteration: 419852
loss: 0.9680274724960327,grad_norm: 0.7612661668587449, iteration: 419853
loss: 0.9609609246253967,grad_norm: 0.674889480156475, iteration: 419854
loss: 0.9761589765548706,grad_norm: 0.8608860823799993, iteration: 419855
loss: 1.073084831237793,grad_norm: 0.788241628105796, iteration: 419856
loss: 1.1225355863571167,grad_norm: 0.9999999971615229, iteration: 419857
loss: 0.9973965287208557,grad_norm: 0.7505566076243636, iteration: 419858
loss: 1.015159249305725,grad_norm: 0.9999994078815374, iteration: 419859
loss: 1.0374233722686768,grad_norm: 0.8026063165802056, iteration: 419860
loss: 1.0381250381469727,grad_norm: 0.8344404892722236, iteration: 419861
loss: 0.9330838918685913,grad_norm: 0.8041089393490457, iteration: 419862
loss: 1.1146241426467896,grad_norm: 0.9999997564013668, iteration: 419863
loss: 1.0652674436569214,grad_norm: 0.8105514228675812, iteration: 419864
loss: 1.001572847366333,grad_norm: 0.9999994525588829, iteration: 419865
loss: 1.028932809829712,grad_norm: 0.6315104633008608, iteration: 419866
loss: 1.0112651586532593,grad_norm: 0.7227171375922369, iteration: 419867
loss: 1.0066335201263428,grad_norm: 0.7362221623505887, iteration: 419868
loss: 1.0263617038726807,grad_norm: 0.9999991129809385, iteration: 419869
loss: 1.0144401788711548,grad_norm: 0.999999151510048, iteration: 419870
loss: 1.0133410692214966,grad_norm: 0.8079414130721158, iteration: 419871
loss: 0.9499993920326233,grad_norm: 0.9797319994875751, iteration: 419872
loss: 1.0149840116500854,grad_norm: 0.9999991396543254, iteration: 419873
loss: 1.0911959409713745,grad_norm: 0.9999994720382416, iteration: 419874
loss: 1.0078279972076416,grad_norm: 0.7551450584849164, iteration: 419875
loss: 1.0287543535232544,grad_norm: 0.8115630016295283, iteration: 419876
loss: 1.0497221946716309,grad_norm: 0.8032959665083681, iteration: 419877
loss: 1.1073087453842163,grad_norm: 0.999999252010187, iteration: 419878
loss: 1.0053211450576782,grad_norm: 0.7930912642248219, iteration: 419879
loss: 0.9914584755897522,grad_norm: 0.6704440935738853, iteration: 419880
loss: 1.026289939880371,grad_norm: 0.8110460628661255, iteration: 419881
loss: 0.9674679636955261,grad_norm: 0.9126329501750721, iteration: 419882
loss: 0.9803190231323242,grad_norm: 0.6867293215590213, iteration: 419883
loss: 0.9959987998008728,grad_norm: 0.745123347613346, iteration: 419884
loss: 0.9501680135726929,grad_norm: 0.7631201628713096, iteration: 419885
loss: 1.0099544525146484,grad_norm: 0.9185814137959606, iteration: 419886
loss: 0.9912081956863403,grad_norm: 0.7339650731176975, iteration: 419887
loss: 1.0049997568130493,grad_norm: 0.8023112870767697, iteration: 419888
loss: 1.0326370000839233,grad_norm: 0.841776238123277, iteration: 419889
loss: 0.9786319136619568,grad_norm: 0.7681411466769184, iteration: 419890
loss: 1.0030956268310547,grad_norm: 0.613242004162571, iteration: 419891
loss: 0.989362359046936,grad_norm: 0.7355867415993398, iteration: 419892
loss: 1.030242681503296,grad_norm: 0.7766957826336369, iteration: 419893
loss: 1.0658533573150635,grad_norm: 0.8919605631067787, iteration: 419894
loss: 1.000594139099121,grad_norm: 0.9113724803132097, iteration: 419895
loss: 1.0620927810668945,grad_norm: 0.9568653870060492, iteration: 419896
loss: 1.0081101655960083,grad_norm: 0.8982336236095101, iteration: 419897
loss: 1.0798710584640503,grad_norm: 0.9645462992182264, iteration: 419898
loss: 1.0849958658218384,grad_norm: 0.7688402099498483, iteration: 419899
loss: 1.034733772277832,grad_norm: 0.9999993692603093, iteration: 419900
loss: 0.9856849312782288,grad_norm: 0.7463823487307923, iteration: 419901
loss: 0.9741973876953125,grad_norm: 0.8733283573606689, iteration: 419902
loss: 0.9833109974861145,grad_norm: 0.7608406544179611, iteration: 419903
loss: 1.0389652252197266,grad_norm: 0.854730188211212, iteration: 419904
loss: 1.0094588994979858,grad_norm: 0.8111448388319963, iteration: 419905
loss: 0.9915727972984314,grad_norm: 0.9985940415866126, iteration: 419906
loss: 1.0029609203338623,grad_norm: 0.8949442675994695, iteration: 419907
loss: 1.011508822441101,grad_norm: 0.7836792355028221, iteration: 419908
loss: 1.1127701997756958,grad_norm: 0.8904710554861885, iteration: 419909
loss: 0.9612223505973816,grad_norm: 0.729107636621727, iteration: 419910
loss: 1.043370246887207,grad_norm: 0.9999991384845365, iteration: 419911
loss: 1.153272271156311,grad_norm: 0.9999993161137316, iteration: 419912
loss: 0.985196590423584,grad_norm: 0.8651423261082629, iteration: 419913
loss: 0.994818925857544,grad_norm: 0.8986851513221734, iteration: 419914
loss: 0.9972018599510193,grad_norm: 0.7161789594816943, iteration: 419915
loss: 1.0246217250823975,grad_norm: 0.8052364325570306, iteration: 419916
loss: 0.9797057509422302,grad_norm: 0.8502221761239759, iteration: 419917
loss: 0.9939101338386536,grad_norm: 0.9306826236316166, iteration: 419918
loss: 1.31954026222229,grad_norm: 0.9999997390187827, iteration: 419919
loss: 0.9814236164093018,grad_norm: 0.9819852724581275, iteration: 419920
loss: 0.9844947457313538,grad_norm: 0.6567021598063199, iteration: 419921
loss: 1.0013329982757568,grad_norm: 0.6861413661777392, iteration: 419922
loss: 1.0761091709136963,grad_norm: 0.7819345398317177, iteration: 419923
loss: 1.0178422927856445,grad_norm: 0.9999990541225061, iteration: 419924
loss: 0.9846817255020142,grad_norm: 0.8922428569170414, iteration: 419925
loss: 1.099166989326477,grad_norm: 0.9999994888567556, iteration: 419926
loss: 1.0350444316864014,grad_norm: 0.8335143811108603, iteration: 419927
loss: 0.9831029176712036,grad_norm: 0.844660015109603, iteration: 419928
loss: 0.9835073351860046,grad_norm: 0.8555403863860117, iteration: 419929
loss: 1.0259932279586792,grad_norm: 0.9999996100020104, iteration: 419930
loss: 1.021364688873291,grad_norm: 0.7860808039729241, iteration: 419931
loss: 1.0054547786712646,grad_norm: 0.8861611520671576, iteration: 419932
loss: 0.9898182153701782,grad_norm: 0.7636908361417252, iteration: 419933
loss: 1.041062831878662,grad_norm: 0.9999992437061186, iteration: 419934
loss: 1.01347815990448,grad_norm: 0.999999640486907, iteration: 419935
loss: 1.0197288990020752,grad_norm: 0.7216499948871884, iteration: 419936
loss: 1.1286627054214478,grad_norm: 1.0000000216680798, iteration: 419937
loss: 0.9946096539497375,grad_norm: 0.8962406840442046, iteration: 419938
loss: 1.015273928642273,grad_norm: 0.7324437296119124, iteration: 419939
loss: 0.9689129590988159,grad_norm: 0.8107834377214779, iteration: 419940
loss: 0.9532753229141235,grad_norm: 0.9999996803951958, iteration: 419941
loss: 1.0347422361373901,grad_norm: 0.999999182019623, iteration: 419942
loss: 1.0412402153015137,grad_norm: 0.9999997667222956, iteration: 419943
loss: 1.0149166584014893,grad_norm: 0.7587871402320367, iteration: 419944
loss: 1.0593119859695435,grad_norm: 0.9999991422342136, iteration: 419945
loss: 0.9937905073165894,grad_norm: 0.7388890175699436, iteration: 419946
loss: 1.0242730379104614,grad_norm: 0.8073493748648366, iteration: 419947
loss: 0.9676671028137207,grad_norm: 0.7760989433571783, iteration: 419948
loss: 1.0558265447616577,grad_norm: 0.9999994973199451, iteration: 419949
loss: 1.0534052848815918,grad_norm: 0.7259887366428596, iteration: 419950
loss: 1.0696823596954346,grad_norm: 0.9999997765758979, iteration: 419951
loss: 1.020755410194397,grad_norm: 0.7663495991636529, iteration: 419952
loss: 1.0607290267944336,grad_norm: 0.7894146465855196, iteration: 419953
loss: 1.0148842334747314,grad_norm: 0.821148155620404, iteration: 419954
loss: 0.9962506890296936,grad_norm: 0.7141800496359433, iteration: 419955
loss: 1.051620602607727,grad_norm: 0.9999998137791154, iteration: 419956
loss: 1.0147905349731445,grad_norm: 0.8372988327159493, iteration: 419957
loss: 0.9919037222862244,grad_norm: 0.7496042496798975, iteration: 419958
loss: 0.9963638186454773,grad_norm: 0.9856317418966786, iteration: 419959
loss: 1.005991816520691,grad_norm: 0.7371375455532394, iteration: 419960
loss: 1.0108891725540161,grad_norm: 0.8797421183276192, iteration: 419961
loss: 1.1037808656692505,grad_norm: 0.9999993454170675, iteration: 419962
loss: 0.9717051386833191,grad_norm: 0.9503131611636384, iteration: 419963
loss: 0.9663490653038025,grad_norm: 0.7391221041474598, iteration: 419964
loss: 1.0123183727264404,grad_norm: 0.9321586490278201, iteration: 419965
loss: 1.1067134141921997,grad_norm: 0.9999999300528408, iteration: 419966
loss: 1.0057090520858765,grad_norm: 0.9999991643538895, iteration: 419967
loss: 1.20375394821167,grad_norm: 0.9380264748549831, iteration: 419968
loss: 0.9864580035209656,grad_norm: 0.7344573055427279, iteration: 419969
loss: 1.0175223350524902,grad_norm: 0.9999997629384657, iteration: 419970
loss: 0.9730201363563538,grad_norm: 0.782182108397291, iteration: 419971
loss: 1.092881679534912,grad_norm: 0.999999170239524, iteration: 419972
loss: 1.0216389894485474,grad_norm: 0.9999992541174924, iteration: 419973
loss: 0.9903656244277954,grad_norm: 0.7365030700388462, iteration: 419974
loss: 0.9776232838630676,grad_norm: 0.9999991832494708, iteration: 419975
loss: 1.0070104598999023,grad_norm: 0.99999908437998, iteration: 419976
loss: 0.9827999472618103,grad_norm: 0.7050392692816799, iteration: 419977
loss: 0.9982761740684509,grad_norm: 0.9999995860839416, iteration: 419978
loss: 1.0213096141815186,grad_norm: 0.7671813564810966, iteration: 419979
loss: 0.9710312485694885,grad_norm: 0.9999999651773175, iteration: 419980
loss: 0.987900972366333,grad_norm: 0.9343241993935926, iteration: 419981
loss: 1.0187597274780273,grad_norm: 0.8817294187663947, iteration: 419982
loss: 1.0499136447906494,grad_norm: 0.8470153144154408, iteration: 419983
loss: 1.0339564085006714,grad_norm: 0.8893403355873797, iteration: 419984
loss: 0.9750101566314697,grad_norm: 0.7367456981733977, iteration: 419985
loss: 1.0726789236068726,grad_norm: 0.9999991110303885, iteration: 419986
loss: 1.0190497636795044,grad_norm: 0.7922334470098589, iteration: 419987
loss: 1.0444189310073853,grad_norm: 0.8268712253060556, iteration: 419988
loss: 1.0234830379486084,grad_norm: 0.820695297014469, iteration: 419989
loss: 1.0109015703201294,grad_norm: 0.6401499639331055, iteration: 419990
loss: 0.9952837228775024,grad_norm: 0.7078351820994079, iteration: 419991
loss: 1.0126091241836548,grad_norm: 0.9999999735669601, iteration: 419992
loss: 0.9764959812164307,grad_norm: 0.9999991247014408, iteration: 419993
loss: 0.9868101477622986,grad_norm: 0.8743800036489098, iteration: 419994
loss: 1.01323664188385,grad_norm: 0.9999993258488776, iteration: 419995
loss: 1.0030349493026733,grad_norm: 0.87674472139107, iteration: 419996
loss: 1.0811984539031982,grad_norm: 0.999999443140802, iteration: 419997
loss: 1.0136865377426147,grad_norm: 0.7493863878653694, iteration: 419998
loss: 1.0777437686920166,grad_norm: 0.7734964309251803, iteration: 419999
loss: 0.9727364182472229,grad_norm: 0.9999991469919242, iteration: 420000
Evaluating at step 420000
{'val': 0.995939901098609, 'test': 1.87787223243104}
loss: 0.9808089733123779,grad_norm: 0.9056230721486722, iteration: 420001
loss: 0.9845858812332153,grad_norm: 0.655844297133567, iteration: 420002
loss: 1.0137723684310913,grad_norm: 0.737232072424492, iteration: 420003
loss: 0.9890934228897095,grad_norm: 0.7948687145454976, iteration: 420004
loss: 0.9614576697349548,grad_norm: 0.8109054518028732, iteration: 420005
loss: 0.9737648367881775,grad_norm: 0.8223377168478508, iteration: 420006
loss: 1.0122954845428467,grad_norm: 0.6830873282065627, iteration: 420007
loss: 0.9957277774810791,grad_norm: 0.7835328593336535, iteration: 420008
loss: 1.0054771900177002,grad_norm: 0.7674552757374165, iteration: 420009
loss: 1.0012744665145874,grad_norm: 0.9999988426072909, iteration: 420010
loss: 1.0641995668411255,grad_norm: 0.9999994042813816, iteration: 420011
loss: 1.007001519203186,grad_norm: 0.8867700608508605, iteration: 420012
loss: 1.0273855924606323,grad_norm: 0.7550041685153774, iteration: 420013
loss: 0.9998493194580078,grad_norm: 0.7810805470477913, iteration: 420014
loss: 1.0139031410217285,grad_norm: 0.711033573902563, iteration: 420015
loss: 0.9972101449966431,grad_norm: 0.9028488574183011, iteration: 420016
loss: 1.0044678449630737,grad_norm: 0.770095919278983, iteration: 420017
loss: 1.0139516592025757,grad_norm: 0.909869561990722, iteration: 420018
loss: 0.9929494857788086,grad_norm: 0.9985568807951939, iteration: 420019
loss: 1.0181936025619507,grad_norm: 0.9999995540032056, iteration: 420020
loss: 1.018741488456726,grad_norm: 0.8985567116848073, iteration: 420021
loss: 0.9872052073478699,grad_norm: 0.9999991417143363, iteration: 420022
loss: 0.9899163246154785,grad_norm: 0.7561378327242886, iteration: 420023
loss: 1.0681991577148438,grad_norm: 0.9533422720495212, iteration: 420024
loss: 1.0316827297210693,grad_norm: 0.8344489883523801, iteration: 420025
loss: 0.9768192768096924,grad_norm: 0.6969542460886665, iteration: 420026
loss: 1.0350521802902222,grad_norm: 0.7762770826226466, iteration: 420027
loss: 0.9859338402748108,grad_norm: 0.7739295816675041, iteration: 420028
loss: 0.9895948171615601,grad_norm: 0.7363041920187763, iteration: 420029
loss: 0.9893433451652527,grad_norm: 0.8478447893078973, iteration: 420030
loss: 0.9921403527259827,grad_norm: 0.9999996225653269, iteration: 420031
loss: 0.9739364385604858,grad_norm: 0.6589574943087194, iteration: 420032
loss: 1.0278635025024414,grad_norm: 1.0000000316834494, iteration: 420033
loss: 1.0116801261901855,grad_norm: 0.6633754426770445, iteration: 420034
loss: 1.0390981435775757,grad_norm: 0.9999990987839458, iteration: 420035
loss: 1.0124422311782837,grad_norm: 0.825722601593758, iteration: 420036
loss: 1.0594844818115234,grad_norm: 0.9523748056767706, iteration: 420037
loss: 0.9834034442901611,grad_norm: 0.6907990308136003, iteration: 420038
loss: 1.0359876155853271,grad_norm: 0.9999996731300748, iteration: 420039
loss: 1.0204943418502808,grad_norm: 0.9999991074367528, iteration: 420040
loss: 1.0362868309020996,grad_norm: 0.9999991376738993, iteration: 420041
loss: 1.0288941860198975,grad_norm: 0.736605095358783, iteration: 420042
loss: 0.9546210169792175,grad_norm: 1.0000000296390528, iteration: 420043
loss: 1.0215113162994385,grad_norm: 0.6767705787505568, iteration: 420044
loss: 1.0188090801239014,grad_norm: 0.8425180725223101, iteration: 420045
loss: 0.9685739874839783,grad_norm: 0.8058045615902938, iteration: 420046
loss: 0.97784423828125,grad_norm: 0.6995300286048876, iteration: 420047
loss: 0.9887304306030273,grad_norm: 0.7346736534668764, iteration: 420048
loss: 1.0061352252960205,grad_norm: 0.7879623847731865, iteration: 420049
loss: 0.9867363572120667,grad_norm: 0.8588520760083833, iteration: 420050
loss: 1.0138388872146606,grad_norm: 0.7338663319303699, iteration: 420051
loss: 0.99761962890625,grad_norm: 0.8793476712929582, iteration: 420052
loss: 0.9993453025817871,grad_norm: 1.0000000076252178, iteration: 420053
loss: 1.0208855867385864,grad_norm: 0.7382974404433718, iteration: 420054
loss: 1.0092685222625732,grad_norm: 0.7854691407190937, iteration: 420055
loss: 0.9991981387138367,grad_norm: 0.7322759354602015, iteration: 420056
loss: 0.9668169617652893,grad_norm: 0.9999997031682696, iteration: 420057
loss: 1.0197198390960693,grad_norm: 0.7942280977264218, iteration: 420058
loss: 1.0417823791503906,grad_norm: 0.9999994707072913, iteration: 420059
loss: 1.1125751733779907,grad_norm: 0.9999991090647545, iteration: 420060
loss: 0.9652247428894043,grad_norm: 0.6964381068369943, iteration: 420061
loss: 0.9944326877593994,grad_norm: 0.7451435934964981, iteration: 420062
loss: 1.0381324291229248,grad_norm: 0.9999996589801453, iteration: 420063
loss: 1.0276308059692383,grad_norm: 0.7333708202756692, iteration: 420064
loss: 0.9879848957061768,grad_norm: 0.718644825396801, iteration: 420065
loss: 1.031897783279419,grad_norm: 0.9999993502403475, iteration: 420066
loss: 0.9922500848770142,grad_norm: 0.9999989934372542, iteration: 420067
loss: 1.0467584133148193,grad_norm: 0.7953092748795527, iteration: 420068
loss: 1.0208379030227661,grad_norm: 0.7699056199942782, iteration: 420069
loss: 0.9852128028869629,grad_norm: 0.7441577925066635, iteration: 420070
loss: 1.1322218179702759,grad_norm: 0.9999992359473407, iteration: 420071
loss: 1.0175247192382812,grad_norm: 0.8589722187920281, iteration: 420072
loss: 0.9942364692687988,grad_norm: 0.9999991391943105, iteration: 420073
loss: 1.0263731479644775,grad_norm: 0.8455039262881068, iteration: 420074
loss: 1.0714527368545532,grad_norm: 0.8439787119373615, iteration: 420075
loss: 1.0183817148208618,grad_norm: 0.9999991192797927, iteration: 420076
loss: 1.0028562545776367,grad_norm: 0.7618970912662152, iteration: 420077
loss: 1.015999436378479,grad_norm: 0.999999883333232, iteration: 420078
loss: 0.9955918192863464,grad_norm: 0.8052562321554667, iteration: 420079
loss: 0.9996524453163147,grad_norm: 0.7540089435634857, iteration: 420080
loss: 1.0142329931259155,grad_norm: 0.7036677204872063, iteration: 420081
loss: 1.0413497686386108,grad_norm: 0.8383726040047114, iteration: 420082
loss: 1.0132373571395874,grad_norm: 0.8396820878756545, iteration: 420083
loss: 1.085315465927124,grad_norm: 0.7911456344623904, iteration: 420084
loss: 0.9801660180091858,grad_norm: 0.9999993905965107, iteration: 420085
loss: 1.0093637704849243,grad_norm: 0.9501660797832273, iteration: 420086
loss: 0.981865644454956,grad_norm: 0.8624553417987078, iteration: 420087
loss: 1.0444689989089966,grad_norm: 0.866856665905456, iteration: 420088
loss: 0.9940868616104126,grad_norm: 0.7829435366533054, iteration: 420089
loss: 1.0628162622451782,grad_norm: 0.9121022111616556, iteration: 420090
loss: 0.9958041310310364,grad_norm: 0.7370521046949052, iteration: 420091
loss: 0.9669168591499329,grad_norm: 0.7860948246098844, iteration: 420092
loss: 1.0025815963745117,grad_norm: 0.6786085616170225, iteration: 420093
loss: 1.137795329093933,grad_norm: 0.9999997758408702, iteration: 420094
loss: 1.0315624475479126,grad_norm: 0.897437562143577, iteration: 420095
loss: 0.9890533089637756,grad_norm: 0.7014756437862426, iteration: 420096
loss: 1.0060194730758667,grad_norm: 0.8996163223951433, iteration: 420097
loss: 0.9699201583862305,grad_norm: 0.7665007695215842, iteration: 420098
loss: 0.984372615814209,grad_norm: 0.8040601930399927, iteration: 420099
loss: 0.9903461933135986,grad_norm: 0.8029339272207892, iteration: 420100
loss: 1.099731206893921,grad_norm: 0.9999991151736168, iteration: 420101
loss: 0.9812068939208984,grad_norm: 0.7824357728869513, iteration: 420102
loss: 1.145099401473999,grad_norm: 0.9999995107018502, iteration: 420103
loss: 0.9691171646118164,grad_norm: 0.6834828006994746, iteration: 420104
loss: 0.9960286617279053,grad_norm: 0.9256192086303787, iteration: 420105
loss: 1.0311193466186523,grad_norm: 0.7664690120006387, iteration: 420106
loss: 0.9609230756759644,grad_norm: 0.7089348980761392, iteration: 420107
loss: 0.9862295389175415,grad_norm: 0.8414094233624059, iteration: 420108
loss: 1.010286808013916,grad_norm: 0.7998931678882899, iteration: 420109
loss: 1.0316996574401855,grad_norm: 0.8184122631706847, iteration: 420110
loss: 1.0079731941223145,grad_norm: 0.8523388785328518, iteration: 420111
loss: 1.002208948135376,grad_norm: 0.7273680201924595, iteration: 420112
loss: 0.990024983882904,grad_norm: 0.9999992281656671, iteration: 420113
loss: 1.0847152471542358,grad_norm: 1.0000000088607197, iteration: 420114
loss: 0.9958125352859497,grad_norm: 0.8475796974306135, iteration: 420115
loss: 0.9871537685394287,grad_norm: 0.7827435050650453, iteration: 420116
loss: 1.200714349746704,grad_norm: 0.9999996714996934, iteration: 420117
loss: 0.9890341758728027,grad_norm: 0.9102588046998485, iteration: 420118
loss: 1.0361932516098022,grad_norm: 0.9999998981123868, iteration: 420119
loss: 1.0303256511688232,grad_norm: 0.9999993951263132, iteration: 420120
loss: 1.0115324258804321,grad_norm: 0.9999996677037947, iteration: 420121
loss: 0.9938557744026184,grad_norm: 0.6514959932229969, iteration: 420122
loss: 0.9873806834220886,grad_norm: 0.9999990921410722, iteration: 420123
loss: 1.0269042253494263,grad_norm: 0.7958127009403114, iteration: 420124
loss: 1.10736882686615,grad_norm: 0.9999995495520847, iteration: 420125
loss: 1.0040379762649536,grad_norm: 0.9999995578057601, iteration: 420126
loss: 1.002234697341919,grad_norm: 0.7243916188908804, iteration: 420127
loss: 0.9686053395271301,grad_norm: 0.7264252345935972, iteration: 420128
loss: 0.9997920393943787,grad_norm: 0.7731137789695741, iteration: 420129
loss: 1.0037733316421509,grad_norm: 0.7034857607698657, iteration: 420130
loss: 1.0029784440994263,grad_norm: 0.9138199487628262, iteration: 420131
loss: 0.9878743886947632,grad_norm: 0.7427159350356624, iteration: 420132
loss: 1.0406545400619507,grad_norm: 0.9412488901863723, iteration: 420133
loss: 0.9947194457054138,grad_norm: 0.999999148142553, iteration: 420134
loss: 0.9803183674812317,grad_norm: 0.8611999612111323, iteration: 420135
loss: 0.9717587828636169,grad_norm: 0.9999993023478566, iteration: 420136
loss: 1.0287214517593384,grad_norm: 0.9999996463579243, iteration: 420137
loss: 1.0083318948745728,grad_norm: 0.9999991412823841, iteration: 420138
loss: 1.0006684064865112,grad_norm: 0.9659047784285079, iteration: 420139
loss: 1.02154541015625,grad_norm: 0.7754102058131032, iteration: 420140
loss: 0.9786884784698486,grad_norm: 0.82799594575872, iteration: 420141
loss: 1.0361416339874268,grad_norm: 0.7915771072326947, iteration: 420142
loss: 1.0484386682510376,grad_norm: 0.9999999686826399, iteration: 420143
loss: 1.0155175924301147,grad_norm: 0.6804395241476244, iteration: 420144
loss: 1.024703860282898,grad_norm: 0.8614971016436942, iteration: 420145
loss: 0.9886577725410461,grad_norm: 0.9058105284929031, iteration: 420146
loss: 1.0046836137771606,grad_norm: 0.7846679446806423, iteration: 420147
loss: 1.0460659265518188,grad_norm: 0.9100934439274315, iteration: 420148
loss: 0.963183581829071,grad_norm: 0.7997733978740604, iteration: 420149
loss: 0.9600203037261963,grad_norm: 0.8177903638667263, iteration: 420150
loss: 1.0752453804016113,grad_norm: 0.9999994393907407, iteration: 420151
loss: 1.0372748374938965,grad_norm: 0.7920651579931678, iteration: 420152
loss: 0.9955291748046875,grad_norm: 0.8621996294460544, iteration: 420153
loss: 1.0002740621566772,grad_norm: 0.871820245587252, iteration: 420154
loss: 1.01802396774292,grad_norm: 0.7256009196013511, iteration: 420155
loss: 0.9954516887664795,grad_norm: 0.9999991787882248, iteration: 420156
loss: 0.9915062785148621,grad_norm: 0.9999992363879748, iteration: 420157
loss: 1.0221600532531738,grad_norm: 0.7006221749423405, iteration: 420158
loss: 1.0002905130386353,grad_norm: 0.8405059671175132, iteration: 420159
loss: 1.0173624753952026,grad_norm: 0.7776915973598877, iteration: 420160
loss: 0.9975416660308838,grad_norm: 0.9430179999809849, iteration: 420161
loss: 1.0727843046188354,grad_norm: 0.9594570806879691, iteration: 420162
loss: 0.9756272435188293,grad_norm: 0.8406322735501944, iteration: 420163
loss: 1.0126081705093384,grad_norm: 0.8336410127537732, iteration: 420164
loss: 1.0874584913253784,grad_norm: 0.9999996107927708, iteration: 420165
loss: 1.0191882848739624,grad_norm: 0.7962133446911204, iteration: 420166
loss: 0.9982984662055969,grad_norm: 0.8141783140318016, iteration: 420167
loss: 1.0096564292907715,grad_norm: 0.642402852140975, iteration: 420168
loss: 1.0330909490585327,grad_norm: 0.9555925294400431, iteration: 420169
loss: 1.0190925598144531,grad_norm: 0.9999995470704169, iteration: 420170
loss: 0.9712918996810913,grad_norm: 0.8584664815721494, iteration: 420171
loss: 0.9646751284599304,grad_norm: 0.9999999361663209, iteration: 420172
loss: 1.0083032846450806,grad_norm: 0.7273750568835308, iteration: 420173
loss: 0.9905706644058228,grad_norm: 0.8061568186601261, iteration: 420174
loss: 0.971294641494751,grad_norm: 0.9994760739833026, iteration: 420175
loss: 1.0048598051071167,grad_norm: 0.8710789312277779, iteration: 420176
loss: 0.9967308044433594,grad_norm: 0.9484359359040107, iteration: 420177
loss: 1.0647475719451904,grad_norm: 0.9999995464604252, iteration: 420178
loss: 1.059525966644287,grad_norm: 0.9999994562180213, iteration: 420179
loss: 1.0794354677200317,grad_norm: 0.9999997965587522, iteration: 420180
loss: 0.9977575540542603,grad_norm: 0.745075078505108, iteration: 420181
loss: 1.0692321062088013,grad_norm: 0.9999995001902892, iteration: 420182
loss: 1.0572623014450073,grad_norm: 0.9999991553636536, iteration: 420183
loss: 0.999975860118866,grad_norm: 0.9999992478157355, iteration: 420184
loss: 1.0047097206115723,grad_norm: 0.6196457959506673, iteration: 420185
loss: 1.0184074640274048,grad_norm: 0.8462821613503806, iteration: 420186
loss: 1.0320945978164673,grad_norm: 0.73579162127565, iteration: 420187
loss: 1.0190637111663818,grad_norm: 0.9179900543041966, iteration: 420188
loss: 1.0127934217453003,grad_norm: 0.7749351935205708, iteration: 420189
loss: 0.9851323366165161,grad_norm: 0.8172393140669039, iteration: 420190
loss: 0.9879018068313599,grad_norm: 0.7491524357119237, iteration: 420191
loss: 0.9717005491256714,grad_norm: 0.9532250645165066, iteration: 420192
loss: 1.0188117027282715,grad_norm: 0.8090118130862999, iteration: 420193
loss: 1.1606336832046509,grad_norm: 0.9999997391912754, iteration: 420194
loss: 1.025255560874939,grad_norm: 0.70846898954224, iteration: 420195
loss: 0.9924623370170593,grad_norm: 0.8033220340665967, iteration: 420196
loss: 0.9865735173225403,grad_norm: 0.7891548530311078, iteration: 420197
loss: 0.9797466993331909,grad_norm: 0.7065811114606324, iteration: 420198
loss: 1.062156081199646,grad_norm: 0.7928079477663829, iteration: 420199
loss: 1.0138797760009766,grad_norm: 0.8085531573128669, iteration: 420200
loss: 0.9578837156295776,grad_norm: 0.9345504894312899, iteration: 420201
loss: 0.9764270782470703,grad_norm: 0.7054535050263671, iteration: 420202
loss: 1.0126063823699951,grad_norm: 0.860137599132293, iteration: 420203
loss: 0.9653289914131165,grad_norm: 0.8672745060388967, iteration: 420204
loss: 0.9904418587684631,grad_norm: 0.6942267093471474, iteration: 420205
loss: 1.0167571306228638,grad_norm: 0.8591214038735774, iteration: 420206
loss: 0.962479829788208,grad_norm: 0.7306884290683622, iteration: 420207
loss: 0.9949324727058411,grad_norm: 0.7660663044942867, iteration: 420208
loss: 0.9806220531463623,grad_norm: 0.8194891305644386, iteration: 420209
loss: 1.0062181949615479,grad_norm: 0.6402328381262744, iteration: 420210
loss: 1.0193196535110474,grad_norm: 0.9999994141006766, iteration: 420211
loss: 0.9853134751319885,grad_norm: 0.7013573366426227, iteration: 420212
loss: 1.0047204494476318,grad_norm: 0.7411910002170982, iteration: 420213
loss: 1.0035711526870728,grad_norm: 0.7078011150391045, iteration: 420214
loss: 1.0055831670761108,grad_norm: 0.8859665873051789, iteration: 420215
loss: 0.9875819683074951,grad_norm: 0.7852218961794971, iteration: 420216
loss: 1.0009797811508179,grad_norm: 0.9999989057775024, iteration: 420217
loss: 1.0279743671417236,grad_norm: 0.9968557955058839, iteration: 420218
loss: 0.9872347116470337,grad_norm: 0.9240331883781232, iteration: 420219
loss: 0.9997749328613281,grad_norm: 0.8104710390272856, iteration: 420220
loss: 1.0088955163955688,grad_norm: 0.9999995905088391, iteration: 420221
loss: 0.9945846796035767,grad_norm: 0.8359608507491213, iteration: 420222
loss: 1.0253742933273315,grad_norm: 0.6135847899696161, iteration: 420223
loss: 1.0306077003479004,grad_norm: 0.7894443260756587, iteration: 420224
loss: 0.9851273894309998,grad_norm: 0.7149352671500939, iteration: 420225
loss: 1.0244842767715454,grad_norm: 0.8385580426751518, iteration: 420226
loss: 1.0500324964523315,grad_norm: 0.8620088193255925, iteration: 420227
loss: 1.0147947072982788,grad_norm: 0.8367326132062668, iteration: 420228
loss: 0.9631376266479492,grad_norm: 0.8710824782348744, iteration: 420229
loss: 1.0189861059188843,grad_norm: 0.7911157422352747, iteration: 420230
loss: 1.033272624015808,grad_norm: 0.846650032838634, iteration: 420231
loss: 1.15947687625885,grad_norm: 0.9666202866209405, iteration: 420232
loss: 1.0939643383026123,grad_norm: 0.9999996104880312, iteration: 420233
loss: 1.0289595127105713,grad_norm: 0.7577537163119469, iteration: 420234
loss: 1.0028307437896729,grad_norm: 0.9999997397494472, iteration: 420235
loss: 0.979049563407898,grad_norm: 0.775021341619364, iteration: 420236
loss: 1.0144603252410889,grad_norm: 0.6994857628220129, iteration: 420237
loss: 1.0276050567626953,grad_norm: 0.9999991824361236, iteration: 420238
loss: 1.0080957412719727,grad_norm: 0.7096820136026728, iteration: 420239
loss: 0.9810523390769958,grad_norm: 0.7065460334579681, iteration: 420240
loss: 1.0053468942642212,grad_norm: 0.6553185138465127, iteration: 420241
loss: 1.0332623720169067,grad_norm: 0.7437032694583461, iteration: 420242
loss: 1.0193634033203125,grad_norm: 0.8817220381258039, iteration: 420243
loss: 1.0296196937561035,grad_norm: 0.9999994243566758, iteration: 420244
loss: 1.034665822982788,grad_norm: 0.9004176619385597, iteration: 420245
loss: 0.9787519574165344,grad_norm: 0.7666204196584248, iteration: 420246
loss: 1.0539242029190063,grad_norm: 0.8258099890702534, iteration: 420247
loss: 0.9977531433105469,grad_norm: 0.7498728391251683, iteration: 420248
loss: 1.0393221378326416,grad_norm: 0.9926268129595802, iteration: 420249
loss: 0.9666745066642761,grad_norm: 0.7428830132178061, iteration: 420250
loss: 1.0281401872634888,grad_norm: 0.8318871380972834, iteration: 420251
loss: 1.0342661142349243,grad_norm: 0.824847942637178, iteration: 420252
loss: 1.0103580951690674,grad_norm: 0.7594199381151211, iteration: 420253
loss: 1.039129376411438,grad_norm: 0.997066120498407, iteration: 420254
loss: 1.0003526210784912,grad_norm: 0.8579880785341465, iteration: 420255
loss: 1.1077395677566528,grad_norm: 0.999999247345593, iteration: 420256
loss: 0.9640577435493469,grad_norm: 0.8036536271462906, iteration: 420257
loss: 1.0153173208236694,grad_norm: 0.6201565500764307, iteration: 420258
loss: 0.9885169863700867,grad_norm: 0.8440750285946215, iteration: 420259
loss: 1.012643814086914,grad_norm: 0.7381217508200425, iteration: 420260
loss: 1.0744208097457886,grad_norm: 0.9999991023643465, iteration: 420261
loss: 0.9913213849067688,grad_norm: 0.9785965699186631, iteration: 420262
loss: 0.9761285185813904,grad_norm: 0.8964894003537445, iteration: 420263
loss: 0.9747759699821472,grad_norm: 0.830485971342413, iteration: 420264
loss: 0.9936853051185608,grad_norm: 0.9999999478194256, iteration: 420265
loss: 1.0277857780456543,grad_norm: 0.8168854270679954, iteration: 420266
loss: 1.0038847923278809,grad_norm: 0.8276913248106083, iteration: 420267
loss: 1.013127326965332,grad_norm: 0.9999991147940392, iteration: 420268
loss: 0.9805054664611816,grad_norm: 0.7992047963632004, iteration: 420269
loss: 0.9892364740371704,grad_norm: 0.7636070214527566, iteration: 420270
loss: 1.010208249092102,grad_norm: 0.9999991152805776, iteration: 420271
loss: 1.002417802810669,grad_norm: 0.7341420399416348, iteration: 420272
loss: 1.0051788091659546,grad_norm: 0.824995249253103, iteration: 420273
loss: 1.0346169471740723,grad_norm: 0.8363772745972478, iteration: 420274
loss: 1.0374823808670044,grad_norm: 0.9999998639162085, iteration: 420275
loss: 0.9840410351753235,grad_norm: 0.6530646367268521, iteration: 420276
loss: 0.9721322059631348,grad_norm: 0.8349188361182992, iteration: 420277
loss: 0.9909778833389282,grad_norm: 0.8988539969125975, iteration: 420278
loss: 1.000054955482483,grad_norm: 0.7296312218729245, iteration: 420279
loss: 1.0486531257629395,grad_norm: 0.9999990760719071, iteration: 420280
loss: 0.9791345000267029,grad_norm: 0.6503168974779616, iteration: 420281
loss: 0.9956130981445312,grad_norm: 0.7891291212027879, iteration: 420282
loss: 1.0320231914520264,grad_norm: 0.7270389174883342, iteration: 420283
loss: 0.9462994337081909,grad_norm: 0.8163210536215417, iteration: 420284
loss: 1.0329838991165161,grad_norm: 0.9795982941283041, iteration: 420285
loss: 0.9904659390449524,grad_norm: 0.7070419221199743, iteration: 420286
loss: 0.9804365634918213,grad_norm: 0.7322018648271497, iteration: 420287
loss: 0.9827960133552551,grad_norm: 0.7559590858351569, iteration: 420288
loss: 0.9881914258003235,grad_norm: 0.795201998715004, iteration: 420289
loss: 0.9789145588874817,grad_norm: 0.999999677834013, iteration: 420290
loss: 1.0329911708831787,grad_norm: 0.8321174801326128, iteration: 420291
loss: 1.0488282442092896,grad_norm: 0.6569979598110768, iteration: 420292
loss: 1.058416724205017,grad_norm: 0.9999993751396064, iteration: 420293
loss: 0.9736283421516418,grad_norm: 0.76886835319401, iteration: 420294
loss: 1.0768060684204102,grad_norm: 0.9999999495161197, iteration: 420295
loss: 1.0010801553726196,grad_norm: 0.7994103545681788, iteration: 420296
loss: 1.0331236124038696,grad_norm: 0.829053222445929, iteration: 420297
loss: 1.0308313369750977,grad_norm: 0.8592345019666422, iteration: 420298
loss: 1.0151206254959106,grad_norm: 0.6841312334604915, iteration: 420299
loss: 0.9956168532371521,grad_norm: 0.7598176497522341, iteration: 420300
loss: 0.9993026852607727,grad_norm: 0.7672235647744409, iteration: 420301
loss: 0.9893192648887634,grad_norm: 0.7313379316166494, iteration: 420302
loss: 1.014561653137207,grad_norm: 0.7526878334110896, iteration: 420303
loss: 0.9871904253959656,grad_norm: 0.809706521623048, iteration: 420304
loss: 1.0002037286758423,grad_norm: 0.7326505580502238, iteration: 420305
loss: 1.0009301900863647,grad_norm: 0.8384884200567214, iteration: 420306
loss: 1.0082223415374756,grad_norm: 0.8008102648790842, iteration: 420307
loss: 0.9587710499763489,grad_norm: 0.7543979608142449, iteration: 420308
loss: 0.9788214564323425,grad_norm: 0.7521950958697421, iteration: 420309
loss: 1.0051658153533936,grad_norm: 0.714983405312338, iteration: 420310
loss: 0.980559229850769,grad_norm: 0.7588675213331864, iteration: 420311
loss: 0.966910719871521,grad_norm: 0.6519407865874066, iteration: 420312
loss: 1.000557780265808,grad_norm: 0.7262909019546128, iteration: 420313
loss: 1.0038182735443115,grad_norm: 0.7885293056742282, iteration: 420314
loss: 0.9790471196174622,grad_norm: 0.7060999407628538, iteration: 420315
loss: 1.0248738527297974,grad_norm: 0.8225581202332748, iteration: 420316
loss: 1.0369603633880615,grad_norm: 0.7869100258319259, iteration: 420317
loss: 1.001204013824463,grad_norm: 0.9999996038869886, iteration: 420318
loss: 1.0079550743103027,grad_norm: 0.6809628476789357, iteration: 420319
loss: 1.0147778987884521,grad_norm: 0.8462045191438893, iteration: 420320
loss: 0.9886103272438049,grad_norm: 0.7145178452322148, iteration: 420321
loss: 0.9825041890144348,grad_norm: 0.6184979497654796, iteration: 420322
loss: 1.0538783073425293,grad_norm: 0.956439992221889, iteration: 420323
loss: 1.0217622518539429,grad_norm: 0.7113962121333935, iteration: 420324
loss: 0.9825097322463989,grad_norm: 0.948153488405373, iteration: 420325
loss: 1.0082309246063232,grad_norm: 0.6176096137767312, iteration: 420326
loss: 1.0121678113937378,grad_norm: 0.795595010223722, iteration: 420327
loss: 1.0138667821884155,grad_norm: 0.9015048846252606, iteration: 420328
loss: 1.0136659145355225,grad_norm: 0.9095283436404337, iteration: 420329
loss: 0.9895902276039124,grad_norm: 0.7578714430135284, iteration: 420330
loss: 1.0040535926818848,grad_norm: 0.999999124492006, iteration: 420331
loss: 1.003724455833435,grad_norm: 0.6867463189712613, iteration: 420332
loss: 1.0049700736999512,grad_norm: 0.6734645586565281, iteration: 420333
loss: 0.9893653988838196,grad_norm: 0.8662425268696426, iteration: 420334
loss: 0.9768533706665039,grad_norm: 0.9999997965302228, iteration: 420335
loss: 1.0304828882217407,grad_norm: 0.9999994573571303, iteration: 420336
loss: 0.986314058303833,grad_norm: 0.8137248638905938, iteration: 420337
loss: 1.0312085151672363,grad_norm: 0.7838365715072303, iteration: 420338
loss: 0.9945454597473145,grad_norm: 0.6604274431767465, iteration: 420339
loss: 1.004605770111084,grad_norm: 0.8389990835079946, iteration: 420340
loss: 0.989381730556488,grad_norm: 0.7396460861798885, iteration: 420341
loss: 1.0067470073699951,grad_norm: 0.7392907541882598, iteration: 420342
loss: 1.0147258043289185,grad_norm: 0.8379981309557819, iteration: 420343
loss: 1.019646406173706,grad_norm: 0.7809086397902664, iteration: 420344
loss: 1.011806607246399,grad_norm: 0.8053586239550944, iteration: 420345
loss: 0.988916277885437,grad_norm: 0.6501151782618344, iteration: 420346
loss: 1.0249245166778564,grad_norm: 0.8444220110267635, iteration: 420347
loss: 0.9354941844940186,grad_norm: 0.7979507235972227, iteration: 420348
loss: 1.0412507057189941,grad_norm: 0.9999990205006152, iteration: 420349
loss: 0.9882205724716187,grad_norm: 0.914165523489195, iteration: 420350
loss: 1.000822901725769,grad_norm: 0.8105235173171161, iteration: 420351
loss: 1.015623927116394,grad_norm: 0.7045153855435778, iteration: 420352
loss: 1.0247373580932617,grad_norm: 0.6918853060529254, iteration: 420353
loss: 1.0155593156814575,grad_norm: 0.7676903096189576, iteration: 420354
loss: 0.9920293688774109,grad_norm: 0.9232659326226188, iteration: 420355
loss: 1.0404527187347412,grad_norm: 0.6148486140815345, iteration: 420356
loss: 0.9801572561264038,grad_norm: 0.7243200826198404, iteration: 420357
loss: 1.0165295600891113,grad_norm: 0.5926197313987954, iteration: 420358
loss: 1.0061156749725342,grad_norm: 0.950472385660554, iteration: 420359
loss: 1.0227346420288086,grad_norm: 0.8824543310210099, iteration: 420360
loss: 0.9917684197425842,grad_norm: 0.8433070831775937, iteration: 420361
loss: 0.9952259659767151,grad_norm: 0.682877584099437, iteration: 420362
loss: 1.0059654712677002,grad_norm: 0.7374405609265912, iteration: 420363
loss: 1.0208930969238281,grad_norm: 0.7478184581602149, iteration: 420364
loss: 0.9646380543708801,grad_norm: 0.8760930108891479, iteration: 420365
loss: 0.9560217261314392,grad_norm: 0.7468655369664209, iteration: 420366
loss: 0.9739788770675659,grad_norm: 0.9078601271147798, iteration: 420367
loss: 1.0609651803970337,grad_norm: 0.9999995340309359, iteration: 420368
loss: 1.00397527217865,grad_norm: 0.6985374425185003, iteration: 420369
loss: 0.9722282886505127,grad_norm: 0.8101425210426547, iteration: 420370
loss: 1.0203638076782227,grad_norm: 0.8129129678541372, iteration: 420371
loss: 1.0277700424194336,grad_norm: 0.7593073889485997, iteration: 420372
loss: 1.0195001363754272,grad_norm: 0.7217437667523527, iteration: 420373
loss: 1.0103484392166138,grad_norm: 0.9355100456561191, iteration: 420374
loss: 1.0038849115371704,grad_norm: 0.8909836026693322, iteration: 420375
loss: 1.0150192975997925,grad_norm: 0.8957799302001243, iteration: 420376
loss: 1.104308843612671,grad_norm: 0.9999999250058323, iteration: 420377
loss: 1.085983157157898,grad_norm: 0.772901458587765, iteration: 420378
loss: 0.9952750205993652,grad_norm: 0.9045427667211613, iteration: 420379
loss: 0.9986547827720642,grad_norm: 0.8216842126153924, iteration: 420380
loss: 1.0371384620666504,grad_norm: 0.7461179395124823, iteration: 420381
loss: 1.050273060798645,grad_norm: 0.7919288556860278, iteration: 420382
loss: 0.9998950958251953,grad_norm: 0.8248710716904007, iteration: 420383
loss: 1.0300683975219727,grad_norm: 0.8714381071197791, iteration: 420384
loss: 0.9775819182395935,grad_norm: 0.7408314757659408, iteration: 420385
loss: 0.976887047290802,grad_norm: 0.8811748507456163, iteration: 420386
loss: 0.9989600777626038,grad_norm: 0.8136588111684719, iteration: 420387
loss: 1.0282564163208008,grad_norm: 0.7140877371690088, iteration: 420388
loss: 1.03790283203125,grad_norm: 0.9999990675607797, iteration: 420389
loss: 1.0129374265670776,grad_norm: 0.8345539895309004, iteration: 420390
loss: 1.0516602993011475,grad_norm: 0.8780799334738145, iteration: 420391
loss: 1.016129970550537,grad_norm: 0.8256485218176581, iteration: 420392
loss: 1.1466138362884521,grad_norm: 0.9999994827687155, iteration: 420393
loss: 0.9875277876853943,grad_norm: 0.736199291766352, iteration: 420394
loss: 0.9895209074020386,grad_norm: 0.8933970891693259, iteration: 420395
loss: 1.048312783241272,grad_norm: 0.8348591660652899, iteration: 420396
loss: 1.0373106002807617,grad_norm: 0.757643299334138, iteration: 420397
loss: 1.0008678436279297,grad_norm: 0.7271062672341575, iteration: 420398
loss: 0.9835752248764038,grad_norm: 0.7990373905522292, iteration: 420399
loss: 1.0235581398010254,grad_norm: 0.8300805288959962, iteration: 420400
loss: 1.025101661682129,grad_norm: 0.7759844925227927, iteration: 420401
loss: 0.9678683876991272,grad_norm: 0.8177043211118586, iteration: 420402
loss: 0.9839693903923035,grad_norm: 0.6541311171876375, iteration: 420403
loss: 0.9930370450019836,grad_norm: 0.8481736809204422, iteration: 420404
loss: 1.0177124738693237,grad_norm: 0.9208177478957906, iteration: 420405
loss: 0.9824877381324768,grad_norm: 0.712891842646823, iteration: 420406
loss: 0.9328425526618958,grad_norm: 0.9055694811865681, iteration: 420407
loss: 1.0539987087249756,grad_norm: 0.9999993246101808, iteration: 420408
loss: 1.019490361213684,grad_norm: 0.9999991676583956, iteration: 420409
loss: 0.9965552687644958,grad_norm: 0.7972972295862248, iteration: 420410
loss: 1.0276941061019897,grad_norm: 0.9999999395180696, iteration: 420411
loss: 0.9930905699729919,grad_norm: 0.861096287656414, iteration: 420412
loss: 0.9514065384864807,grad_norm: 0.8522136915047911, iteration: 420413
loss: 1.0063915252685547,grad_norm: 0.9568200685074688, iteration: 420414
loss: 0.9904035925865173,grad_norm: 0.9438210737268751, iteration: 420415
loss: 1.023184061050415,grad_norm: 0.7379441971255772, iteration: 420416
loss: 0.9953371286392212,grad_norm: 0.8434706529551048, iteration: 420417
loss: 1.03641676902771,grad_norm: 0.7774392980014344, iteration: 420418
loss: 0.9747450947761536,grad_norm: 0.7067872390313096, iteration: 420419
loss: 0.9813336133956909,grad_norm: 0.8410385546446606, iteration: 420420
loss: 1.0164177417755127,grad_norm: 0.6873958019755562, iteration: 420421
loss: 1.0064570903778076,grad_norm: 0.64375360543945, iteration: 420422
loss: 0.9897514581680298,grad_norm: 0.8372929846646862, iteration: 420423
loss: 1.032167911529541,grad_norm: 0.8402534582330574, iteration: 420424
loss: 1.1023553609848022,grad_norm: 0.9999993604489619, iteration: 420425
loss: 1.0097514390945435,grad_norm: 0.8513881309649363, iteration: 420426
loss: 0.9642618894577026,grad_norm: 0.7987211353424895, iteration: 420427
loss: 0.9926868081092834,grad_norm: 0.7711602953103153, iteration: 420428
loss: 1.0446839332580566,grad_norm: 0.9614834603319666, iteration: 420429
loss: 1.0280994176864624,grad_norm: 0.8735397991217684, iteration: 420430
loss: 1.03067147731781,grad_norm: 0.9999995760592406, iteration: 420431
loss: 0.9941509366035461,grad_norm: 0.7422452524930289, iteration: 420432
loss: 1.0161981582641602,grad_norm: 0.7168562274585167, iteration: 420433
loss: 0.9905641674995422,grad_norm: 0.6544009506099903, iteration: 420434
loss: 0.9636569023132324,grad_norm: 0.7599322272603191, iteration: 420435
loss: 1.0186467170715332,grad_norm: 0.8098964459645323, iteration: 420436
loss: 1.0345447063446045,grad_norm: 0.6946277432937896, iteration: 420437
loss: 0.9841269254684448,grad_norm: 0.7268914077401931, iteration: 420438
loss: 1.070624828338623,grad_norm: 0.9109733302503209, iteration: 420439
loss: 0.979377269744873,grad_norm: 0.65905412252196, iteration: 420440
loss: 0.9893500804901123,grad_norm: 0.6713955860289267, iteration: 420441
loss: 1.0180823802947998,grad_norm: 0.6885351886090469, iteration: 420442
loss: 1.04619562625885,grad_norm: 0.7671677819991984, iteration: 420443
loss: 1.0230402946472168,grad_norm: 0.9879235506292896, iteration: 420444
loss: 1.0153576135635376,grad_norm: 0.7035793989272375, iteration: 420445
loss: 0.9843209981918335,grad_norm: 0.7970839976559589, iteration: 420446
loss: 0.9545392394065857,grad_norm: 0.8456366008302032, iteration: 420447
loss: 0.9678516983985901,grad_norm: 0.7215382287242977, iteration: 420448
loss: 0.9993571639060974,grad_norm: 0.7675866761624752, iteration: 420449
loss: 0.9774104952812195,grad_norm: 0.8065369726525105, iteration: 420450
loss: 1.0265083312988281,grad_norm: 0.6679444794872528, iteration: 420451
loss: 0.9963366389274597,grad_norm: 0.8198278224170222, iteration: 420452
loss: 1.001138687133789,grad_norm: 0.8417812062111139, iteration: 420453
loss: 1.0100131034851074,grad_norm: 0.7015953846696606, iteration: 420454
loss: 0.999788761138916,grad_norm: 0.7315022131180392, iteration: 420455
loss: 1.0107067823410034,grad_norm: 0.9616741837817921, iteration: 420456
loss: 1.0800412893295288,grad_norm: 0.7755633973265068, iteration: 420457
loss: 1.010082483291626,grad_norm: 0.8060345364910703, iteration: 420458
loss: 0.989890456199646,grad_norm: 0.7822103078897006, iteration: 420459
loss: 1.0156315565109253,grad_norm: 0.7671714277455125, iteration: 420460
loss: 1.0049830675125122,grad_norm: 0.8203433082760959, iteration: 420461
loss: 1.0298845767974854,grad_norm: 0.9119077807207547, iteration: 420462
loss: 1.028123140335083,grad_norm: 1.0000000623696639, iteration: 420463
loss: 0.9958524107933044,grad_norm: 0.6714610461127177, iteration: 420464
loss: 1.0153013467788696,grad_norm: 0.8301398231516599, iteration: 420465
loss: 0.9893612265586853,grad_norm: 0.8251504402705616, iteration: 420466
loss: 0.9747251868247986,grad_norm: 0.678221035868043, iteration: 420467
loss: 1.0011324882507324,grad_norm: 0.9317742862224068, iteration: 420468
loss: 0.9857826828956604,grad_norm: 0.7095613034454781, iteration: 420469
loss: 0.9747329354286194,grad_norm: 0.827748501553128, iteration: 420470
loss: 0.9899208545684814,grad_norm: 0.9999990195761871, iteration: 420471
loss: 1.009413719177246,grad_norm: 0.6703824549576587, iteration: 420472
loss: 1.0075925588607788,grad_norm: 0.8158370416313017, iteration: 420473
loss: 1.0007575750350952,grad_norm: 0.9999991394496455, iteration: 420474
loss: 0.9656495451927185,grad_norm: 0.7824232169244958, iteration: 420475
loss: 1.0177916288375854,grad_norm: 0.8031261884219439, iteration: 420476
loss: 1.0875275135040283,grad_norm: 0.9999994495585826, iteration: 420477
loss: 1.008034348487854,grad_norm: 0.6505534465799149, iteration: 420478
loss: 1.0101852416992188,grad_norm: 0.8104451418537082, iteration: 420479
loss: 0.9646764397621155,grad_norm: 0.7822165610414575, iteration: 420480
loss: 1.0193754434585571,grad_norm: 0.8438250810279607, iteration: 420481
loss: 1.0007374286651611,grad_norm: 0.8921579826450416, iteration: 420482
loss: 1.0094032287597656,grad_norm: 0.8302503373481676, iteration: 420483
loss: 1.0039273500442505,grad_norm: 0.7487364231726603, iteration: 420484
loss: 0.9864596724510193,grad_norm: 0.8933764033944768, iteration: 420485
loss: 1.0025055408477783,grad_norm: 0.7400473820706758, iteration: 420486
loss: 1.0107018947601318,grad_norm: 0.80191758031165, iteration: 420487
loss: 1.0081024169921875,grad_norm: 0.7997475335431928, iteration: 420488
loss: 1.0263837575912476,grad_norm: 0.9999992175593565, iteration: 420489
loss: 0.9966372847557068,grad_norm: 0.7134677611064644, iteration: 420490
loss: 1.1482152938842773,grad_norm: 0.9999994437410062, iteration: 420491
loss: 1.0017338991165161,grad_norm: 0.8786424972090832, iteration: 420492
loss: 0.9957000017166138,grad_norm: 0.8349888367853364, iteration: 420493
loss: 1.049660325050354,grad_norm: 0.8784757764657019, iteration: 420494
loss: 1.0029749870300293,grad_norm: 0.6864216519898767, iteration: 420495
loss: 0.9649127125740051,grad_norm: 0.9999991411701724, iteration: 420496
loss: 0.9634643197059631,grad_norm: 0.686708473439225, iteration: 420497
loss: 0.9962721467018127,grad_norm: 0.6098704529298683, iteration: 420498
loss: 1.003599762916565,grad_norm: 0.8289036404112835, iteration: 420499
loss: 0.9861486554145813,grad_norm: 0.849798039129221, iteration: 420500
loss: 1.0574984550476074,grad_norm: 0.8194046413491974, iteration: 420501
loss: 1.0277535915374756,grad_norm: 0.7286479566088125, iteration: 420502
loss: 1.0357085466384888,grad_norm: 0.7917317946836129, iteration: 420503
loss: 0.9917922616004944,grad_norm: 0.5959219996768194, iteration: 420504
loss: 1.0015394687652588,grad_norm: 0.7085602686109197, iteration: 420505
loss: 0.9762152433395386,grad_norm: 0.8111829644646249, iteration: 420506
loss: 1.0269635915756226,grad_norm: 0.882596105474255, iteration: 420507
loss: 1.052108883857727,grad_norm: 0.7197166924411458, iteration: 420508
loss: 0.9826222658157349,grad_norm: 0.8431460520840474, iteration: 420509
loss: 0.9793073534965515,grad_norm: 0.6843933134763293, iteration: 420510
loss: 0.9971491694450378,grad_norm: 0.8966116496471761, iteration: 420511
loss: 0.9829443097114563,grad_norm: 0.7554639292551492, iteration: 420512
loss: 0.9732922315597534,grad_norm: 0.7149291202327271, iteration: 420513
loss: 0.9966654777526855,grad_norm: 0.7237070479495405, iteration: 420514
loss: 1.0138843059539795,grad_norm: 0.7423068117291137, iteration: 420515
loss: 0.9989270567893982,grad_norm: 0.9999997445694273, iteration: 420516
loss: 1.0306315422058105,grad_norm: 0.6669495841254365, iteration: 420517
loss: 1.0466208457946777,grad_norm: 0.999999965752402, iteration: 420518
loss: 0.9961045384407043,grad_norm: 0.6143856973939259, iteration: 420519
loss: 1.0177167654037476,grad_norm: 0.8266135998648645, iteration: 420520
loss: 1.0093930959701538,grad_norm: 0.9999991097053054, iteration: 420521
loss: 1.0309284925460815,grad_norm: 0.7395440515962363, iteration: 420522
loss: 1.0120935440063477,grad_norm: 0.6934911228530091, iteration: 420523
loss: 0.9959201812744141,grad_norm: 0.6572307726782304, iteration: 420524
loss: 1.0266684293746948,grad_norm: 0.8663306469815615, iteration: 420525
loss: 0.9819237589836121,grad_norm: 0.7910401287086762, iteration: 420526
loss: 1.0344182252883911,grad_norm: 0.830176014376749, iteration: 420527
loss: 1.0171940326690674,grad_norm: 0.6686547333874842, iteration: 420528
loss: 0.983428955078125,grad_norm: 0.8193355380861361, iteration: 420529
loss: 1.0258771181106567,grad_norm: 0.8180043824648764, iteration: 420530
loss: 1.0218815803527832,grad_norm: 0.7405489365756682, iteration: 420531
loss: 1.046933650970459,grad_norm: 0.7112452379270344, iteration: 420532
loss: 0.983877420425415,grad_norm: 0.8385630809881598, iteration: 420533
loss: 1.0047327280044556,grad_norm: 0.7926623756538678, iteration: 420534
loss: 1.0189249515533447,grad_norm: 0.792101439283107, iteration: 420535
loss: 0.984642505645752,grad_norm: 0.636363584408754, iteration: 420536
loss: 0.9849533438682556,grad_norm: 0.7558657269142226, iteration: 420537
loss: 1.0617268085479736,grad_norm: 0.9373337471279751, iteration: 420538
loss: 0.9964888691902161,grad_norm: 0.7617707207892974, iteration: 420539
loss: 1.11551833152771,grad_norm: 0.999999757818941, iteration: 420540
loss: 0.9831993579864502,grad_norm: 0.7375391682307222, iteration: 420541
loss: 1.0308125019073486,grad_norm: 0.7685968875764996, iteration: 420542
loss: 1.0234535932540894,grad_norm: 0.7285339487293876, iteration: 420543
loss: 1.021797776222229,grad_norm: 0.7488423414858517, iteration: 420544
loss: 1.048182725906372,grad_norm: 0.9999996561338381, iteration: 420545
loss: 0.990454375743866,grad_norm: 0.7541836855549003, iteration: 420546
loss: 1.0089945793151855,grad_norm: 0.8372024069995516, iteration: 420547
loss: 1.0248708724975586,grad_norm: 0.8422494589440871, iteration: 420548
loss: 0.9829663038253784,grad_norm: 0.8647474629230637, iteration: 420549
loss: 1.0151995420455933,grad_norm: 0.654698150962689, iteration: 420550
loss: 1.0185855627059937,grad_norm: 0.7402146837763551, iteration: 420551
loss: 0.9484773874282837,grad_norm: 0.7357886517864934, iteration: 420552
loss: 1.023236632347107,grad_norm: 0.692304516558912, iteration: 420553
loss: 1.0316753387451172,grad_norm: 0.6788940912754, iteration: 420554
loss: 0.990268349647522,grad_norm: 0.7787619178484345, iteration: 420555
loss: 1.054535984992981,grad_norm: 0.7826443013265781, iteration: 420556
loss: 1.000550627708435,grad_norm: 0.8637051517711719, iteration: 420557
loss: 1.0127123594284058,grad_norm: 0.9999990532217473, iteration: 420558
loss: 0.9971333146095276,grad_norm: 0.8140451081980271, iteration: 420559
loss: 1.0477445125579834,grad_norm: 0.7482420990289005, iteration: 420560
loss: 1.045770525932312,grad_norm: 0.7241014493877876, iteration: 420561
loss: 1.0079418420791626,grad_norm: 0.7021336372139432, iteration: 420562
loss: 1.0052231550216675,grad_norm: 0.9128765779504673, iteration: 420563
loss: 0.9987591505050659,grad_norm: 0.7836025971146997, iteration: 420564
loss: 1.0784882307052612,grad_norm: 0.8043689243654324, iteration: 420565
loss: 1.004681944847107,grad_norm: 0.6854397520188623, iteration: 420566
loss: 0.9990537166595459,grad_norm: 0.6950048562355021, iteration: 420567
loss: 0.9723703861236572,grad_norm: 0.7232312758615731, iteration: 420568
loss: 1.011091709136963,grad_norm: 0.6922500214245025, iteration: 420569
loss: 1.023439884185791,grad_norm: 0.6993115935220898, iteration: 420570
loss: 1.0087225437164307,grad_norm: 0.7956912998933328, iteration: 420571
loss: 1.0099132061004639,grad_norm: 0.6381426414942681, iteration: 420572
loss: 1.0200046300888062,grad_norm: 0.8767908356307049, iteration: 420573
loss: 0.9924754500389099,grad_norm: 0.7106555782181321, iteration: 420574
loss: 0.9913331866264343,grad_norm: 0.7382322916571402, iteration: 420575
loss: 1.0361956357955933,grad_norm: 0.8572579711627745, iteration: 420576
loss: 1.024640679359436,grad_norm: 0.8696445894005032, iteration: 420577
loss: 1.017201542854309,grad_norm: 0.7817387234843387, iteration: 420578
loss: 0.999844491481781,grad_norm: 0.9125704556055232, iteration: 420579
loss: 1.0092597007751465,grad_norm: 0.6441372937738563, iteration: 420580
loss: 1.0132921934127808,grad_norm: 0.6640619463247995, iteration: 420581
loss: 1.013881802558899,grad_norm: 0.6300280823218382, iteration: 420582
loss: 0.998516321182251,grad_norm: 0.7292958257389076, iteration: 420583
loss: 1.0404977798461914,grad_norm: 0.9999998183320141, iteration: 420584
loss: 1.0099397897720337,grad_norm: 0.7980225689695364, iteration: 420585
loss: 1.017541766166687,grad_norm: 0.9999991043832339, iteration: 420586
loss: 0.9967283606529236,grad_norm: 0.7879344013858006, iteration: 420587
loss: 1.0584725141525269,grad_norm: 0.9999992366846486, iteration: 420588
loss: 0.9913759827613831,grad_norm: 0.6231358388507992, iteration: 420589
loss: 0.9871906042098999,grad_norm: 0.8849312302488038, iteration: 420590
loss: 1.1416021585464478,grad_norm: 0.9999997241250183, iteration: 420591
loss: 0.9944753050804138,grad_norm: 0.7144931661535813, iteration: 420592
loss: 1.0050857067108154,grad_norm: 0.7571254717490137, iteration: 420593
loss: 1.0044411420822144,grad_norm: 0.7121468063951956, iteration: 420594
loss: 0.9974201917648315,grad_norm: 0.7901094859963431, iteration: 420595
loss: 0.9834670424461365,grad_norm: 0.7125487214576552, iteration: 420596
loss: 1.0464529991149902,grad_norm: 0.8036860243232652, iteration: 420597
loss: 0.9894970059394836,grad_norm: 0.7332661762650023, iteration: 420598
loss: 1.006455421447754,grad_norm: 0.9733898806204814, iteration: 420599
loss: 0.9977850914001465,grad_norm: 0.8347709106564272, iteration: 420600
loss: 0.9764854311943054,grad_norm: 0.6511204677022296, iteration: 420601
loss: 1.0305168628692627,grad_norm: 0.9999994669516904, iteration: 420602
loss: 1.0091544389724731,grad_norm: 0.9999991983675496, iteration: 420603
loss: 1.000961422920227,grad_norm: 0.7544640075286195, iteration: 420604
loss: 0.9881259202957153,grad_norm: 0.815153653090793, iteration: 420605
loss: 0.9996368885040283,grad_norm: 0.7479182566035496, iteration: 420606
loss: 0.9892423152923584,grad_norm: 0.7217147567904192, iteration: 420607
loss: 1.0040390491485596,grad_norm: 0.8145110003888696, iteration: 420608
loss: 1.0128262042999268,grad_norm: 0.5943735001536585, iteration: 420609
loss: 1.0412859916687012,grad_norm: 0.7684128434140145, iteration: 420610
loss: 0.975013792514801,grad_norm: 0.8317150048245391, iteration: 420611
loss: 0.9854102730751038,grad_norm: 0.9999993230860883, iteration: 420612
loss: 1.0440541505813599,grad_norm: 0.9999997399680094, iteration: 420613
loss: 1.0066092014312744,grad_norm: 0.8608166470344195, iteration: 420614
loss: 1.004547119140625,grad_norm: 0.7533013957866991, iteration: 420615
loss: 1.0095727443695068,grad_norm: 0.8663977810301607, iteration: 420616
loss: 1.0023584365844727,grad_norm: 0.7469962106878577, iteration: 420617
loss: 0.9964784383773804,grad_norm: 0.8858846081251216, iteration: 420618
loss: 1.0123982429504395,grad_norm: 0.6816980831993948, iteration: 420619
loss: 1.0106298923492432,grad_norm: 0.7161913169107395, iteration: 420620
loss: 1.0013948678970337,grad_norm: 0.7364669122743782, iteration: 420621
loss: 1.0110514163970947,grad_norm: 0.652688308375083, iteration: 420622
loss: 1.0063930749893188,grad_norm: 0.661080962187747, iteration: 420623
loss: 1.0502303838729858,grad_norm: 0.9999992270735546, iteration: 420624
loss: 1.0031806230545044,grad_norm: 0.9063584647939413, iteration: 420625
loss: 0.982219934463501,grad_norm: 0.9999997419829698, iteration: 420626
loss: 1.005224347114563,grad_norm: 0.9115064903126906, iteration: 420627
loss: 0.9924591183662415,grad_norm: 0.8494742616832405, iteration: 420628
loss: 1.035738468170166,grad_norm: 0.9686630981296217, iteration: 420629
loss: 1.0272328853607178,grad_norm: 0.8856441383473485, iteration: 420630
loss: 1.0161545276641846,grad_norm: 0.7837840176877175, iteration: 420631
loss: 0.99862140417099,grad_norm: 0.773135371500931, iteration: 420632
loss: 0.991524875164032,grad_norm: 0.8502073365482146, iteration: 420633
loss: 0.9505985379219055,grad_norm: 0.8266212828824712, iteration: 420634
loss: 0.9709660410881042,grad_norm: 0.7730932696148467, iteration: 420635
loss: 0.9837473034858704,grad_norm: 0.6857813602318128, iteration: 420636
loss: 0.9685097336769104,grad_norm: 0.7866722444666787, iteration: 420637
loss: 0.989194929599762,grad_norm: 0.759224471297623, iteration: 420638
loss: 0.970702052116394,grad_norm: 0.7999890143029764, iteration: 420639
loss: 0.9655578136444092,grad_norm: 0.7658104302233426, iteration: 420640
loss: 0.9991844892501831,grad_norm: 0.8506792497044546, iteration: 420641
loss: 0.9700170755386353,grad_norm: 0.9182872119026256, iteration: 420642
loss: 0.9581363201141357,grad_norm: 0.7937780928158114, iteration: 420643
loss: 0.9800266027450562,grad_norm: 0.8535624051830998, iteration: 420644
loss: 0.9988294243812561,grad_norm: 0.8211817364766526, iteration: 420645
loss: 0.9727560877799988,grad_norm: 0.7375632914006248, iteration: 420646
loss: 1.0086016654968262,grad_norm: 0.7309822326052526, iteration: 420647
loss: 0.9913440346717834,grad_norm: 0.8536839223963578, iteration: 420648
loss: 1.0222476720809937,grad_norm: 0.7916887485211073, iteration: 420649
loss: 1.1569546461105347,grad_norm: 0.9999993251353854, iteration: 420650
loss: 0.9909053444862366,grad_norm: 0.805505436995122, iteration: 420651
loss: 0.989985466003418,grad_norm: 0.9821912206715169, iteration: 420652
loss: 1.0065934658050537,grad_norm: 0.7453558045365793, iteration: 420653
loss: 0.9713571071624756,grad_norm: 0.7501911252037248, iteration: 420654
loss: 1.0190436840057373,grad_norm: 1.000000001627832, iteration: 420655
loss: 1.0067832469940186,grad_norm: 0.7752427257966754, iteration: 420656
loss: 1.0115793943405151,grad_norm: 0.7135019371460285, iteration: 420657
loss: 0.9992030262947083,grad_norm: 0.814719770244237, iteration: 420658
loss: 1.006615400314331,grad_norm: 0.93993967295977, iteration: 420659
loss: 1.002642035484314,grad_norm: 0.7374489225699332, iteration: 420660
loss: 0.9807891845703125,grad_norm: 0.8166686617907893, iteration: 420661
loss: 1.017763376235962,grad_norm: 0.9010994427572765, iteration: 420662
loss: 1.1074326038360596,grad_norm: 0.878735758160573, iteration: 420663
loss: 1.008603572845459,grad_norm: 0.832103887688189, iteration: 420664
loss: 0.9725456833839417,grad_norm: 0.690494189396218, iteration: 420665
loss: 1.0739383697509766,grad_norm: 0.8584873354393089, iteration: 420666
loss: 1.0583610534667969,grad_norm: 0.7863813571632966, iteration: 420667
loss: 1.0297662019729614,grad_norm: 0.9999998644985323, iteration: 420668
loss: 1.0163553953170776,grad_norm: 0.8069859359338466, iteration: 420669
loss: 1.0104618072509766,grad_norm: 0.8574408997889539, iteration: 420670
loss: 0.9690032601356506,grad_norm: 0.6657637327099616, iteration: 420671
loss: 1.06317138671875,grad_norm: 0.9999997111022888, iteration: 420672
loss: 1.0120428800582886,grad_norm: 0.8489240235663329, iteration: 420673
loss: 1.00472891330719,grad_norm: 0.9552994782304295, iteration: 420674
loss: 1.0383715629577637,grad_norm: 0.8241337023054645, iteration: 420675
loss: 0.9985940456390381,grad_norm: 0.7590139291624705, iteration: 420676
loss: 1.095558762550354,grad_norm: 0.9999998558134711, iteration: 420677
loss: 0.9974641799926758,grad_norm: 0.8776881041493793, iteration: 420678
loss: 1.01206374168396,grad_norm: 0.7710301282891264, iteration: 420679
loss: 1.1032772064208984,grad_norm: 0.8458873153519008, iteration: 420680
loss: 0.9913270473480225,grad_norm: 0.7494902810742481, iteration: 420681
loss: 1.0007011890411377,grad_norm: 0.8077874366450426, iteration: 420682
loss: 1.0607928037643433,grad_norm: 0.9999999726948267, iteration: 420683
loss: 0.9793640971183777,grad_norm: 0.6797139389270449, iteration: 420684
loss: 0.9941967129707336,grad_norm: 0.7477492784951759, iteration: 420685
loss: 0.9822068810462952,grad_norm: 0.6788921220568404, iteration: 420686
loss: 0.9794231057167053,grad_norm: 0.7626049261535409, iteration: 420687
loss: 0.9382033348083496,grad_norm: 0.7849937755313628, iteration: 420688
loss: 0.9778668880462646,grad_norm: 0.9999994252507116, iteration: 420689
loss: 0.9809525012969971,grad_norm: 0.7093267021809911, iteration: 420690
loss: 0.9840192794799805,grad_norm: 0.8027969110493701, iteration: 420691
loss: 0.9940941333770752,grad_norm: 0.8047777155566488, iteration: 420692
loss: 0.9758316874504089,grad_norm: 0.7169022533754529, iteration: 420693
loss: 0.9968114495277405,grad_norm: 0.8196159261104815, iteration: 420694
loss: 1.0188463926315308,grad_norm: 0.7524384552492883, iteration: 420695
loss: 0.9940776824951172,grad_norm: 0.5956651222198375, iteration: 420696
loss: 0.9839894771575928,grad_norm: 0.6698899579677867, iteration: 420697
loss: 1.0045102834701538,grad_norm: 0.7607184336518393, iteration: 420698
loss: 1.0057178735733032,grad_norm: 0.8286837903860796, iteration: 420699
loss: 1.0063040256500244,grad_norm: 0.7442415276481195, iteration: 420700
loss: 1.0125330686569214,grad_norm: 0.7739101900368868, iteration: 420701
loss: 0.9946876168251038,grad_norm: 0.6385354855324188, iteration: 420702
loss: 0.9929664134979248,grad_norm: 0.7764073466627321, iteration: 420703
loss: 1.00435209274292,grad_norm: 0.7715789650069506, iteration: 420704
loss: 1.0157947540283203,grad_norm: 0.950625825372323, iteration: 420705
loss: 0.9901351928710938,grad_norm: 0.8879981496602279, iteration: 420706
loss: 1.0050300359725952,grad_norm: 0.7725348650247376, iteration: 420707
loss: 1.0228074789047241,grad_norm: 0.6990026490478656, iteration: 420708
loss: 1.0134199857711792,grad_norm: 0.7782168554743779, iteration: 420709
loss: 1.0118935108184814,grad_norm: 0.7942179221968758, iteration: 420710
loss: 1.1751726865768433,grad_norm: 0.9999999626117855, iteration: 420711
loss: 0.9850666522979736,grad_norm: 0.805317015654777, iteration: 420712
loss: 0.9961763620376587,grad_norm: 0.6773893251477, iteration: 420713
loss: 0.9854081869125366,grad_norm: 0.7730777351701761, iteration: 420714
loss: 1.0150063037872314,grad_norm: 0.8529142345117636, iteration: 420715
loss: 0.9574192762374878,grad_norm: 0.8602165653580259, iteration: 420716
loss: 1.0098353624343872,grad_norm: 0.8728838751107818, iteration: 420717
loss: 1.0222084522247314,grad_norm: 0.7692636301112605, iteration: 420718
loss: 0.9709456562995911,grad_norm: 0.9999998434075025, iteration: 420719
loss: 0.9666316509246826,grad_norm: 0.6235677449219609, iteration: 420720
loss: 0.9998605847358704,grad_norm: 0.7969896615521209, iteration: 420721
loss: 0.9837173819541931,grad_norm: 0.6694958674712098, iteration: 420722
loss: 0.9965463280677795,grad_norm: 0.7353730896198306, iteration: 420723
loss: 0.9844659566879272,grad_norm: 0.7830967644098471, iteration: 420724
loss: 1.0095453262329102,grad_norm: 0.8035247575804536, iteration: 420725
loss: 0.9913599491119385,grad_norm: 0.7727414535884028, iteration: 420726
loss: 0.9772351384162903,grad_norm: 0.6987570785106533, iteration: 420727
loss: 1.051344871520996,grad_norm: 0.8972407291927489, iteration: 420728
loss: 1.1071699857711792,grad_norm: 0.9999995020035268, iteration: 420729
loss: 0.9809642434120178,grad_norm: 0.83536722940819, iteration: 420730
loss: 1.026821255683899,grad_norm: 0.90342048341212, iteration: 420731
loss: 0.999980628490448,grad_norm: 0.6606135937427068, iteration: 420732
loss: 0.9918018579483032,grad_norm: 0.9999990716139364, iteration: 420733
loss: 0.9933152198791504,grad_norm: 0.9471253753032542, iteration: 420734
loss: 0.9734002947807312,grad_norm: 0.8913265130193283, iteration: 420735
loss: 0.9776179194450378,grad_norm: 0.7997047328805209, iteration: 420736
loss: 0.9577927589416504,grad_norm: 0.8033588258831665, iteration: 420737
loss: 0.9862880706787109,grad_norm: 0.8501739041010915, iteration: 420738
loss: 1.0303847789764404,grad_norm: 0.7774096249363353, iteration: 420739
loss: 1.0154953002929688,grad_norm: 0.7889969188913879, iteration: 420740
loss: 1.0233275890350342,grad_norm: 0.711554671959643, iteration: 420741
loss: 0.9498719573020935,grad_norm: 0.8326270759899999, iteration: 420742
loss: 0.999792754650116,grad_norm: 0.708223010638012, iteration: 420743
loss: 0.9879425168037415,grad_norm: 0.9146637724614789, iteration: 420744
loss: 0.9971451163291931,grad_norm: 0.9624211785791159, iteration: 420745
loss: 0.969299852848053,grad_norm: 0.6945188278251155, iteration: 420746
loss: 1.0496422052383423,grad_norm: 0.8414147333640315, iteration: 420747
loss: 0.9887300133705139,grad_norm: 0.8487968504496994, iteration: 420748
loss: 1.026252269744873,grad_norm: 0.99999979721783, iteration: 420749
loss: 0.9914489984512329,grad_norm: 0.9999991451101081, iteration: 420750
loss: 1.004569172859192,grad_norm: 0.8219671446487914, iteration: 420751
loss: 0.9485125541687012,grad_norm: 0.9999991010541267, iteration: 420752
loss: 1.000933289527893,grad_norm: 0.8418086180390013, iteration: 420753
loss: 1.0161374807357788,grad_norm: 0.872407965835771, iteration: 420754
loss: 0.9913395643234253,grad_norm: 0.68586340354448, iteration: 420755
loss: 1.0567419528961182,grad_norm: 0.9999990893052965, iteration: 420756
loss: 1.0759133100509644,grad_norm: 0.7272315478176988, iteration: 420757
loss: 1.0151609182357788,grad_norm: 0.7813423703752845, iteration: 420758
loss: 0.9869319200515747,grad_norm: 0.8396814758134257, iteration: 420759
loss: 1.0258817672729492,grad_norm: 0.8561635505681481, iteration: 420760
loss: 1.0500301122665405,grad_norm: 0.9020189325315238, iteration: 420761
loss: 1.0008869171142578,grad_norm: 0.756114186244078, iteration: 420762
loss: 0.9952228665351868,grad_norm: 0.7971502752663315, iteration: 420763
loss: 0.9949445128440857,grad_norm: 0.8101133545963258, iteration: 420764
loss: 1.0544559955596924,grad_norm: 0.999999621909184, iteration: 420765
loss: 1.0203561782836914,grad_norm: 0.762061839556173, iteration: 420766
loss: 1.0052322149276733,grad_norm: 0.6390414402785124, iteration: 420767
loss: 0.9804705381393433,grad_norm: 0.7721877597980464, iteration: 420768
loss: 1.0439963340759277,grad_norm: 0.8051135308719413, iteration: 420769
loss: 1.0327290296554565,grad_norm: 0.8058379563661903, iteration: 420770
loss: 1.0100802183151245,grad_norm: 0.8476071917207385, iteration: 420771
loss: 0.9969409704208374,grad_norm: 0.7797024543159654, iteration: 420772
loss: 0.9972418546676636,grad_norm: 0.8150190675168562, iteration: 420773
loss: 0.962675929069519,grad_norm: 0.76442866207979, iteration: 420774
loss: 1.0536309480667114,grad_norm: 0.817388650324973, iteration: 420775
loss: 0.9986007213592529,grad_norm: 0.9709213121355326, iteration: 420776
loss: 0.9862590432167053,grad_norm: 0.9424401263309414, iteration: 420777
loss: 0.9711238145828247,grad_norm: 0.7615206565483025, iteration: 420778
loss: 0.9963552355766296,grad_norm: 0.9279307338040506, iteration: 420779
loss: 0.9916630983352661,grad_norm: 0.7296854169465932, iteration: 420780
loss: 0.9976344108581543,grad_norm: 0.6487477731473541, iteration: 420781
loss: 0.9806479811668396,grad_norm: 0.6313786793365534, iteration: 420782
loss: 1.0064737796783447,grad_norm: 0.84508397523757, iteration: 420783
loss: 0.9702107310295105,grad_norm: 0.9090073035855084, iteration: 420784
loss: 0.9856002926826477,grad_norm: 0.7402121715511383, iteration: 420785
loss: 0.9734771847724915,grad_norm: 0.848625568914445, iteration: 420786
loss: 1.0064795017242432,grad_norm: 0.9001413632494145, iteration: 420787
loss: 0.993232011795044,grad_norm: 0.8625863523842292, iteration: 420788
loss: 0.9783057570457458,grad_norm: 0.7363683637373896, iteration: 420789
loss: 1.012265920639038,grad_norm: 0.8867418026896557, iteration: 420790
loss: 1.0270053148269653,grad_norm: 0.8078072592409227, iteration: 420791
loss: 0.9816237688064575,grad_norm: 0.6862048144518902, iteration: 420792
loss: 1.0291959047317505,grad_norm: 0.9999999515685702, iteration: 420793
loss: 0.980790913105011,grad_norm: 0.787910927125484, iteration: 420794
loss: 1.032836675643921,grad_norm: 0.8102246255443635, iteration: 420795
loss: 0.9777634143829346,grad_norm: 0.7653473690124164, iteration: 420796
loss: 0.9914044141769409,grad_norm: 0.8390479542635655, iteration: 420797
loss: 1.0828982591629028,grad_norm: 0.9999998520378237, iteration: 420798
loss: 0.9930776357650757,grad_norm: 0.8837062976867431, iteration: 420799
loss: 0.9772318601608276,grad_norm: 0.8912000258356388, iteration: 420800
loss: 0.9896954894065857,grad_norm: 0.8903459270345958, iteration: 420801
loss: 0.9975137710571289,grad_norm: 0.726126991027886, iteration: 420802
loss: 1.0028821229934692,grad_norm: 0.9258134285947655, iteration: 420803
loss: 0.9913966655731201,grad_norm: 0.7259368154131526, iteration: 420804
loss: 0.9784013628959656,grad_norm: 0.9999990985091879, iteration: 420805
loss: 1.0111600160598755,grad_norm: 0.6735284935873588, iteration: 420806
loss: 1.0258022546768188,grad_norm: 0.6630946690064513, iteration: 420807
loss: 0.9919536709785461,grad_norm: 0.7616694735544858, iteration: 420808
loss: 1.002792477607727,grad_norm: 0.7601021586414309, iteration: 420809
loss: 1.0199925899505615,grad_norm: 0.7199838822389838, iteration: 420810
loss: 0.996886670589447,grad_norm: 0.9071235481196933, iteration: 420811
loss: 0.9958553910255432,grad_norm: 0.9134867388255269, iteration: 420812
loss: 1.0231505632400513,grad_norm: 0.9999999497543743, iteration: 420813
loss: 1.0373421907424927,grad_norm: 0.7573113150000418, iteration: 420814
loss: 1.0249310731887817,grad_norm: 0.9501942213612015, iteration: 420815
loss: 1.02751886844635,grad_norm: 0.7951869395423177, iteration: 420816
loss: 0.9879187941551208,grad_norm: 0.7615835357849075, iteration: 420817
loss: 0.9812489151954651,grad_norm: 0.7483938079561121, iteration: 420818
loss: 1.1044750213623047,grad_norm: 0.8488701483187703, iteration: 420819
loss: 1.0319899320602417,grad_norm: 0.7854869518965064, iteration: 420820
loss: 1.0108513832092285,grad_norm: 0.7102738890920118, iteration: 420821
loss: 1.0519152879714966,grad_norm: 0.9999999094343051, iteration: 420822
loss: 0.9930059313774109,grad_norm: 0.9936919568678967, iteration: 420823
loss: 1.0837702751159668,grad_norm: 0.9999997767232284, iteration: 420824
loss: 1.0032120943069458,grad_norm: 0.9209019581842749, iteration: 420825
loss: 0.9747234582901001,grad_norm: 0.8164004625968921, iteration: 420826
loss: 1.023391842842102,grad_norm: 0.815538091816531, iteration: 420827
loss: 1.008033275604248,grad_norm: 0.8498147000177612, iteration: 420828
loss: 0.9765617847442627,grad_norm: 0.7085457510682814, iteration: 420829
loss: 0.9803822040557861,grad_norm: 0.9039374454089778, iteration: 420830
loss: 1.0159798860549927,grad_norm: 0.7863399437223001, iteration: 420831
loss: 1.0303146839141846,grad_norm: 0.8401282658853891, iteration: 420832
loss: 1.0015339851379395,grad_norm: 0.6702600375572835, iteration: 420833
loss: 1.060499668121338,grad_norm: 0.7044778868666272, iteration: 420834
loss: 1.0514675378799438,grad_norm: 0.9767399897351682, iteration: 420835
loss: 0.9903907179832458,grad_norm: 0.6756274423049682, iteration: 420836
loss: 1.0247578620910645,grad_norm: 0.7144334579040753, iteration: 420837
loss: 0.9973166584968567,grad_norm: 0.7310366946604748, iteration: 420838
loss: 1.033177137374878,grad_norm: 0.741395570364252, iteration: 420839
loss: 0.9881122708320618,grad_norm: 0.7672966074084142, iteration: 420840
loss: 0.9875454902648926,grad_norm: 0.7329329242265865, iteration: 420841
loss: 0.970058023929596,grad_norm: 0.7463964703961101, iteration: 420842
loss: 0.9919177293777466,grad_norm: 0.776213483609965, iteration: 420843
loss: 1.0057990550994873,grad_norm: 0.789476692934473, iteration: 420844
loss: 0.9799385666847229,grad_norm: 0.7890756431831737, iteration: 420845
loss: 0.9886060357093811,grad_norm: 0.9479313208772869, iteration: 420846
loss: 1.0440173149108887,grad_norm: 0.9999991228682191, iteration: 420847
loss: 0.9751229286193848,grad_norm: 0.8409145622896564, iteration: 420848
loss: 1.0473459959030151,grad_norm: 0.9999995204794033, iteration: 420849
loss: 0.9898033738136292,grad_norm: 0.8127444598063176, iteration: 420850
loss: 1.0015164613723755,grad_norm: 0.8159433272651877, iteration: 420851
loss: 1.0544171333312988,grad_norm: 0.9999996304648668, iteration: 420852
loss: 1.0100150108337402,grad_norm: 0.9551310152085656, iteration: 420853
loss: 1.0184478759765625,grad_norm: 0.9225316967462632, iteration: 420854
loss: 0.9653965830802917,grad_norm: 0.7035815921719669, iteration: 420855
loss: 1.0243895053863525,grad_norm: 0.99999903369089, iteration: 420856
loss: 0.9690816402435303,grad_norm: 0.8681943825074183, iteration: 420857
loss: 0.985925018787384,grad_norm: 0.798794858417, iteration: 420858
loss: 1.1116751432418823,grad_norm: 0.999999186590673, iteration: 420859
loss: 1.0036158561706543,grad_norm: 0.8759771591109892, iteration: 420860
loss: 0.9990582466125488,grad_norm: 0.9449777512471366, iteration: 420861
loss: 1.0034244060516357,grad_norm: 0.7886446681169249, iteration: 420862
loss: 1.0071861743927002,grad_norm: 0.7824829554268212, iteration: 420863
loss: 0.9993609189987183,grad_norm: 0.7632246403815711, iteration: 420864
loss: 1.0176836252212524,grad_norm: 0.7989186633432737, iteration: 420865
loss: 0.9667171835899353,grad_norm: 0.7171752749760123, iteration: 420866
loss: 1.0222346782684326,grad_norm: 0.7792181089756864, iteration: 420867
loss: 1.010562539100647,grad_norm: 0.9999992874661667, iteration: 420868
loss: 1.026756763458252,grad_norm: 0.7933289778982128, iteration: 420869
loss: 1.0379176139831543,grad_norm: 0.7645246087671501, iteration: 420870
loss: 0.9923774003982544,grad_norm: 0.6951562411734485, iteration: 420871
loss: 0.9963120818138123,grad_norm: 0.8988252287741884, iteration: 420872
loss: 1.0042049884796143,grad_norm: 0.9521811792211534, iteration: 420873
loss: 0.9837066531181335,grad_norm: 0.999999135394993, iteration: 420874
loss: 0.9492867588996887,grad_norm: 0.8298655127350141, iteration: 420875
loss: 0.9839257597923279,grad_norm: 0.8060749607520701, iteration: 420876
loss: 1.084378719329834,grad_norm: 0.9999994064466204, iteration: 420877
loss: 0.9913784861564636,grad_norm: 0.7656108610978694, iteration: 420878
loss: 0.9793678522109985,grad_norm: 0.7582593692857403, iteration: 420879
loss: 1.0497753620147705,grad_norm: 0.999999239424879, iteration: 420880
loss: 0.9893073439598083,grad_norm: 0.7501733924739175, iteration: 420881
loss: 0.9739442467689514,grad_norm: 0.9622858178348148, iteration: 420882
loss: 1.039455771446228,grad_norm: 0.9999992034734483, iteration: 420883
loss: 1.0113295316696167,grad_norm: 0.7477771968764937, iteration: 420884
loss: 0.9910791516304016,grad_norm: 0.8126792971396123, iteration: 420885
loss: 1.0053349733352661,grad_norm: 0.6713078301001769, iteration: 420886
loss: 1.0167415142059326,grad_norm: 0.8224250352987283, iteration: 420887
loss: 1.0063930749893188,grad_norm: 0.6906738003078009, iteration: 420888
loss: 1.065143346786499,grad_norm: 0.8004571121406712, iteration: 420889
loss: 1.0636794567108154,grad_norm: 0.9999990645521174, iteration: 420890
loss: 0.9835667610168457,grad_norm: 0.9999994995478986, iteration: 420891
loss: 1.003417730331421,grad_norm: 0.8545993869373528, iteration: 420892
loss: 1.0475398302078247,grad_norm: 0.9598351725215101, iteration: 420893
loss: 0.9607617259025574,grad_norm: 0.7542719289019683, iteration: 420894
loss: 0.998720645904541,grad_norm: 0.7424721591453098, iteration: 420895
loss: 0.9999518990516663,grad_norm: 0.8236103805808698, iteration: 420896
loss: 0.9837380051612854,grad_norm: 0.7453247384736829, iteration: 420897
loss: 1.0054988861083984,grad_norm: 0.8926331778164331, iteration: 420898
loss: 1.0129162073135376,grad_norm: 0.7349064590805222, iteration: 420899
loss: 0.9779786467552185,grad_norm: 0.7336624623913728, iteration: 420900
loss: 0.9714703559875488,grad_norm: 0.7708045212579165, iteration: 420901
loss: 0.9424722790718079,grad_norm: 0.8098645472058768, iteration: 420902
loss: 1.0049082040786743,grad_norm: 0.9999998165558532, iteration: 420903
loss: 0.9625210165977478,grad_norm: 0.8364892166503891, iteration: 420904
loss: 0.9857121706008911,grad_norm: 0.9589518438007566, iteration: 420905
loss: 1.0009759664535522,grad_norm: 0.8529939598908498, iteration: 420906
loss: 0.9559847116470337,grad_norm: 0.6762908395232299, iteration: 420907
loss: 0.9950240850448608,grad_norm: 0.779946706759477, iteration: 420908
loss: 1.0092134475708008,grad_norm: 0.9999991881545096, iteration: 420909
loss: 0.9584483504295349,grad_norm: 0.7616451528316582, iteration: 420910
loss: 0.9704351425170898,grad_norm: 0.8371260474076188, iteration: 420911
loss: 1.0258891582489014,grad_norm: 0.8560095319098738, iteration: 420912
loss: 1.0027287006378174,grad_norm: 0.8157530140967796, iteration: 420913
loss: 1.0002672672271729,grad_norm: 0.6956969933759226, iteration: 420914
loss: 1.0201871395111084,grad_norm: 0.708049588585668, iteration: 420915
loss: 1.02974534034729,grad_norm: 0.9645122945165966, iteration: 420916
loss: 1.0126049518585205,grad_norm: 0.8249819015265696, iteration: 420917
loss: 1.0514509677886963,grad_norm: 0.9999999760084717, iteration: 420918
loss: 1.007887601852417,grad_norm: 0.7642391600850649, iteration: 420919
loss: 1.0332891941070557,grad_norm: 0.8202166150416113, iteration: 420920
loss: 1.0480191707611084,grad_norm: 0.9391715131996111, iteration: 420921
loss: 0.9677030444145203,grad_norm: 0.7693735515533016, iteration: 420922
loss: 0.9989042282104492,grad_norm: 0.8528008177790247, iteration: 420923
loss: 1.005898356437683,grad_norm: 0.6859744866922499, iteration: 420924
loss: 0.9874805212020874,grad_norm: 0.7955986626163092, iteration: 420925
loss: 0.9696951508522034,grad_norm: 0.9999998131189273, iteration: 420926
loss: 1.02231764793396,grad_norm: 0.7900664527419066, iteration: 420927
loss: 1.0019786357879639,grad_norm: 0.718104291325959, iteration: 420928
loss: 1.0118427276611328,grad_norm: 0.8214835485165897, iteration: 420929
loss: 1.0232619047164917,grad_norm: 0.7437411575171685, iteration: 420930
loss: 1.0137407779693604,grad_norm: 0.7343070782327176, iteration: 420931
loss: 1.0771273374557495,grad_norm: 1.0000000455975768, iteration: 420932
loss: 1.012208342552185,grad_norm: 0.7724106549531714, iteration: 420933
loss: 1.022178292274475,grad_norm: 0.6657249139164871, iteration: 420934
loss: 0.9775872826576233,grad_norm: 0.9999996687751439, iteration: 420935
loss: 1.1345586776733398,grad_norm: 0.9999993916023523, iteration: 420936
loss: 1.0646711587905884,grad_norm: 0.9999993019074831, iteration: 420937
loss: 1.080560326576233,grad_norm: 0.914107488490692, iteration: 420938
loss: 1.0387245416641235,grad_norm: 0.999999950003177, iteration: 420939
loss: 1.0271046161651611,grad_norm: 0.9999991346246278, iteration: 420940
loss: 1.0327939987182617,grad_norm: 0.999999627216217, iteration: 420941
loss: 1.0113588571548462,grad_norm: 0.8574295005621512, iteration: 420942
loss: 0.9884754419326782,grad_norm: 0.8677325278973748, iteration: 420943
loss: 0.9872627854347229,grad_norm: 0.7661417676820667, iteration: 420944
loss: 0.9876885414123535,grad_norm: 0.7101382131584147, iteration: 420945
loss: 0.9862112998962402,grad_norm: 0.76377339082809, iteration: 420946
loss: 0.9964176416397095,grad_norm: 0.6986426427812519, iteration: 420947
loss: 1.0011818408966064,grad_norm: 0.8180722187607388, iteration: 420948
loss: 0.957229733467102,grad_norm: 0.8958289368131178, iteration: 420949
loss: 1.0196592807769775,grad_norm: 0.8248116528507836, iteration: 420950
loss: 1.0181230306625366,grad_norm: 0.7926350123733517, iteration: 420951
loss: 1.0247939825057983,grad_norm: 0.7834232948529649, iteration: 420952
loss: 0.9764382243156433,grad_norm: 0.7870598895606564, iteration: 420953
loss: 1.0204012393951416,grad_norm: 0.9999991591490377, iteration: 420954
loss: 0.9854171872138977,grad_norm: 0.9659091984910027, iteration: 420955
loss: 1.0046560764312744,grad_norm: 0.8825293923404006, iteration: 420956
loss: 0.979752779006958,grad_norm: 0.7684137921977015, iteration: 420957
loss: 0.9638596773147583,grad_norm: 0.7340255943954507, iteration: 420958
loss: 0.9925097227096558,grad_norm: 0.7799369449116024, iteration: 420959
loss: 0.9674686789512634,grad_norm: 0.650955472709901, iteration: 420960
loss: 1.0263394117355347,grad_norm: 0.9769826566756054, iteration: 420961
loss: 0.9884567260742188,grad_norm: 0.9999992520283821, iteration: 420962
loss: 1.0054833889007568,grad_norm: 0.8157959020483853, iteration: 420963
loss: 0.9692374467849731,grad_norm: 0.765998280632667, iteration: 420964
loss: 1.0240821838378906,grad_norm: 0.7205027445355843, iteration: 420965
loss: 1.0118460655212402,grad_norm: 0.7520755568399785, iteration: 420966
loss: 0.9959616661071777,grad_norm: 0.7729258492557661, iteration: 420967
loss: 0.9797516465187073,grad_norm: 0.7986147662897821, iteration: 420968
loss: 1.020784854888916,grad_norm: 0.8367015685983697, iteration: 420969
loss: 0.9678134918212891,grad_norm: 0.7168556059291437, iteration: 420970
loss: 0.98012775182724,grad_norm: 0.8269841198536347, iteration: 420971
loss: 1.0294058322906494,grad_norm: 0.9999990206198726, iteration: 420972
loss: 1.0236366987228394,grad_norm: 0.8591391402493399, iteration: 420973
loss: 0.9747856259346008,grad_norm: 0.8673252747827298, iteration: 420974
loss: 1.0392473936080933,grad_norm: 0.9999991735854584, iteration: 420975
loss: 0.9860645532608032,grad_norm: 0.832296409149278, iteration: 420976
loss: 0.987579345703125,grad_norm: 0.9999999375061778, iteration: 420977
loss: 0.9973605275154114,grad_norm: 0.7458376646271422, iteration: 420978
loss: 0.9991704821586609,grad_norm: 0.8127320867330751, iteration: 420979
loss: 1.0277400016784668,grad_norm: 0.9928444912035634, iteration: 420980
loss: 1.0070899724960327,grad_norm: 0.7194391774201443, iteration: 420981
loss: 1.039480209350586,grad_norm: 0.835244601400586, iteration: 420982
loss: 1.0462982654571533,grad_norm: 0.8351552198991857, iteration: 420983
loss: 1.0138192176818848,grad_norm: 0.7410237341760725, iteration: 420984
loss: 1.0083485841751099,grad_norm: 0.6901129156285712, iteration: 420985
loss: 1.031436562538147,grad_norm: 0.9999995801108167, iteration: 420986
loss: 1.0389623641967773,grad_norm: 0.999999563173, iteration: 420987
loss: 0.9892467856407166,grad_norm: 0.7314986291235909, iteration: 420988
loss: 1.0402292013168335,grad_norm: 0.9999992883934136, iteration: 420989
loss: 1.0801533460617065,grad_norm: 0.9525213157550491, iteration: 420990
loss: 1.0029617547988892,grad_norm: 0.7056156549445852, iteration: 420991
loss: 0.9978393912315369,grad_norm: 0.7609187455033098, iteration: 420992
loss: 1.1081502437591553,grad_norm: 0.8594576764659557, iteration: 420993
loss: 1.0309174060821533,grad_norm: 0.7853918066548964, iteration: 420994
loss: 0.9978305697441101,grad_norm: 0.9834579508389535, iteration: 420995
loss: 1.029419183731079,grad_norm: 0.8273574425630517, iteration: 420996
loss: 0.967697262763977,grad_norm: 0.806546236377322, iteration: 420997
loss: 1.0091623067855835,grad_norm: 0.8052282269227168, iteration: 420998
loss: 0.9623070955276489,grad_norm: 0.7813189049050449, iteration: 420999
loss: 1.0267586708068848,grad_norm: 0.999999516134537, iteration: 421000
loss: 1.0130705833435059,grad_norm: 0.6674227643393672, iteration: 421001
loss: 1.0002390146255493,grad_norm: 0.7613206819037953, iteration: 421002
loss: 0.9725421667098999,grad_norm: 0.7673950785091822, iteration: 421003
loss: 0.9762635231018066,grad_norm: 0.8269367429047335, iteration: 421004
loss: 1.0176881551742554,grad_norm: 0.8566786859432675, iteration: 421005
loss: 0.9848209023475647,grad_norm: 0.905826273154878, iteration: 421006
loss: 1.1775386333465576,grad_norm: 0.9999993266940187, iteration: 421007
loss: 0.9968440532684326,grad_norm: 0.884781993851466, iteration: 421008
loss: 0.9906511306762695,grad_norm: 0.7785018883383432, iteration: 421009
loss: 0.9948925375938416,grad_norm: 0.8021382549990087, iteration: 421010
loss: 1.030006766319275,grad_norm: 0.9999991951173389, iteration: 421011
loss: 1.0186841487884521,grad_norm: 0.8680728334455845, iteration: 421012
loss: 1.0698797702789307,grad_norm: 0.9999991462238492, iteration: 421013
loss: 1.03864324092865,grad_norm: 0.8845635815641776, iteration: 421014
loss: 1.0220556259155273,grad_norm: 0.6941440103870297, iteration: 421015
loss: 1.1255618333816528,grad_norm: 0.9999993205108344, iteration: 421016
loss: 0.9856550097465515,grad_norm: 0.7356336955770912, iteration: 421017
loss: 0.9795730710029602,grad_norm: 0.9264707020776751, iteration: 421018
loss: 1.0095844268798828,grad_norm: 0.9999991799532221, iteration: 421019
loss: 0.989305317401886,grad_norm: 0.8485698118648581, iteration: 421020
loss: 1.0330032110214233,grad_norm: 0.7149389362869667, iteration: 421021
loss: 1.0143260955810547,grad_norm: 0.6999580251610035, iteration: 421022
loss: 0.9728761911392212,grad_norm: 0.837858016619218, iteration: 421023
loss: 0.9701356887817383,grad_norm: 0.7030457010500055, iteration: 421024
loss: 1.011151671409607,grad_norm: 0.881339614178295, iteration: 421025
loss: 0.9999663829803467,grad_norm: 0.6864835709343722, iteration: 421026
loss: 1.0564186573028564,grad_norm: 0.7579598091267172, iteration: 421027
loss: 1.0087676048278809,grad_norm: 0.765877358659165, iteration: 421028
loss: 1.007189393043518,grad_norm: 0.9999993211602037, iteration: 421029
loss: 1.0189813375473022,grad_norm: 0.8041042223870213, iteration: 421030
loss: 0.9968218207359314,grad_norm: 0.7363334558617951, iteration: 421031
loss: 1.016213059425354,grad_norm: 0.86126907335099, iteration: 421032
loss: 0.9794148802757263,grad_norm: 0.7392836795217788, iteration: 421033
loss: 0.9732882976531982,grad_norm: 0.6744864316634644, iteration: 421034
loss: 1.0264489650726318,grad_norm: 0.9999991041933328, iteration: 421035
loss: 1.0176953077316284,grad_norm: 0.6931449719999264, iteration: 421036
loss: 0.9964783191680908,grad_norm: 0.7683349378983441, iteration: 421037
loss: 1.0212124586105347,grad_norm: 0.7733239147955852, iteration: 421038
loss: 1.0143667459487915,grad_norm: 0.7904389770913722, iteration: 421039
loss: 1.0156904458999634,grad_norm: 0.7607891279468265, iteration: 421040
loss: 0.980202317237854,grad_norm: 0.704092561417251, iteration: 421041
loss: 1.032210350036621,grad_norm: 0.8433022225344419, iteration: 421042
loss: 1.0013045072555542,grad_norm: 0.7142628406576079, iteration: 421043
loss: 1.0063186883926392,grad_norm: 0.7582633388990929, iteration: 421044
loss: 0.9948378205299377,grad_norm: 0.7100100079617135, iteration: 421045
loss: 1.0037906169891357,grad_norm: 0.8001048143739823, iteration: 421046
loss: 0.975504457950592,grad_norm: 0.7009757583826175, iteration: 421047
loss: 0.9958981275558472,grad_norm: 0.8433384905230736, iteration: 421048
loss: 1.0122565031051636,grad_norm: 0.7231170382327755, iteration: 421049
loss: 1.0167512893676758,grad_norm: 0.9422920438209075, iteration: 421050
loss: 1.0104119777679443,grad_norm: 0.795980082297776, iteration: 421051
loss: 0.9993340373039246,grad_norm: 0.7639487202498936, iteration: 421052
loss: 0.9913815259933472,grad_norm: 0.7435909881127655, iteration: 421053
loss: 0.9768072366714478,grad_norm: 0.7594597940080843, iteration: 421054
loss: 0.9864211082458496,grad_norm: 0.7508978720278877, iteration: 421055
loss: 0.9654802680015564,grad_norm: 0.7524860895639286, iteration: 421056
loss: 0.9922373294830322,grad_norm: 0.8117507622831427, iteration: 421057
loss: 0.9853028059005737,grad_norm: 0.7304337381764883, iteration: 421058
loss: 0.9798804521560669,grad_norm: 0.8133753635050934, iteration: 421059
loss: 1.078567624092102,grad_norm: 0.999999073596387, iteration: 421060
loss: 0.9995700120925903,grad_norm: 0.805327943532021, iteration: 421061
loss: 1.0908427238464355,grad_norm: 0.7747679509572667, iteration: 421062
loss: 1.0065178871154785,grad_norm: 0.7484419863465487, iteration: 421063
loss: 1.0081508159637451,grad_norm: 0.7196819626951053, iteration: 421064
loss: 0.9805516600608826,grad_norm: 0.9304617450717213, iteration: 421065
loss: 0.9793934226036072,grad_norm: 0.8409807220968697, iteration: 421066
loss: 1.0321069955825806,grad_norm: 0.9999991772206753, iteration: 421067
loss: 1.0119495391845703,grad_norm: 0.9742325050652335, iteration: 421068
loss: 0.9712358117103577,grad_norm: 0.8074286188061052, iteration: 421069
loss: 0.9798853397369385,grad_norm: 0.8114321499872551, iteration: 421070
loss: 1.0142850875854492,grad_norm: 0.9712345814969628, iteration: 421071
loss: 0.9664976596832275,grad_norm: 0.855732386182475, iteration: 421072
loss: 0.9886952638626099,grad_norm: 0.7272345581585247, iteration: 421073
loss: 1.1603235006332397,grad_norm: 0.9999999394439791, iteration: 421074
loss: 0.9749754667282104,grad_norm: 0.8621090528114115, iteration: 421075
loss: 1.0145386457443237,grad_norm: 0.7846567640683485, iteration: 421076
loss: 0.9871871471405029,grad_norm: 0.66992411666032, iteration: 421077
loss: 0.9657623171806335,grad_norm: 0.8393188028422337, iteration: 421078
loss: 0.9869336485862732,grad_norm: 0.7772774562885372, iteration: 421079
loss: 0.9984281659126282,grad_norm: 0.7598781048083985, iteration: 421080
loss: 0.9801467061042786,grad_norm: 0.8900655318949081, iteration: 421081
loss: 0.9873462915420532,grad_norm: 0.7089413090289758, iteration: 421082
loss: 1.0100476741790771,grad_norm: 0.8979897663147769, iteration: 421083
loss: 0.9904953241348267,grad_norm: 0.9999998596734643, iteration: 421084
loss: 1.0160762071609497,grad_norm: 0.7322786152954687, iteration: 421085
loss: 1.0182260274887085,grad_norm: 0.6569156650034543, iteration: 421086
loss: 1.0438823699951172,grad_norm: 0.9008539778322394, iteration: 421087
loss: 1.0032669305801392,grad_norm: 0.9330664225045266, iteration: 421088
loss: 1.0584009885787964,grad_norm: 0.9999991426955025, iteration: 421089
loss: 1.0413774251937866,grad_norm: 0.8883116130770415, iteration: 421090
loss: 1.0584906339645386,grad_norm: 0.8694353218193399, iteration: 421091
loss: 1.0051722526550293,grad_norm: 0.8420984702993779, iteration: 421092
loss: 1.0282868146896362,grad_norm: 0.6701030896248173, iteration: 421093
loss: 1.0110834836959839,grad_norm: 0.9436286057219084, iteration: 421094
loss: 1.3150449991226196,grad_norm: 0.9999995015772457, iteration: 421095
loss: 0.9923450350761414,grad_norm: 0.8006312341538346, iteration: 421096
loss: 1.0109041929244995,grad_norm: 0.7692631477363704, iteration: 421097
loss: 1.0257978439331055,grad_norm: 0.8964332857608589, iteration: 421098
loss: 1.0371595621109009,grad_norm: 0.9999994664574382, iteration: 421099
loss: 1.0463955402374268,grad_norm: 0.9999996061292842, iteration: 421100
loss: 1.0441385507583618,grad_norm: 0.8940259434103945, iteration: 421101
loss: 0.9800700545310974,grad_norm: 0.6748801961360623, iteration: 421102
loss: 0.9695362448692322,grad_norm: 0.8559590921380591, iteration: 421103
loss: 0.980356752872467,grad_norm: 0.7926383331082612, iteration: 421104
loss: 1.0280718803405762,grad_norm: 0.9999995983824117, iteration: 421105
loss: 0.9898477792739868,grad_norm: 0.9999990851561245, iteration: 421106
loss: 1.0287858247756958,grad_norm: 0.7349707550240823, iteration: 421107
loss: 1.0215390920639038,grad_norm: 0.9736712219436914, iteration: 421108
loss: 0.9561144709587097,grad_norm: 0.8113117652059851, iteration: 421109
loss: 1.0266668796539307,grad_norm: 0.7033839161277161, iteration: 421110
loss: 1.0495637655258179,grad_norm: 0.8013533975807641, iteration: 421111
loss: 1.023248314857483,grad_norm: 0.8830293364203708, iteration: 421112
loss: 0.9877152442932129,grad_norm: 0.7981429095301623, iteration: 421113
loss: 0.9795660376548767,grad_norm: 0.7426222760405391, iteration: 421114
loss: 1.040395975112915,grad_norm: 0.6616099938445807, iteration: 421115
loss: 0.9881687760353088,grad_norm: 0.8723206189699956, iteration: 421116
loss: 1.0895718336105347,grad_norm: 0.8191706603567099, iteration: 421117
loss: 1.0957716703414917,grad_norm: 0.9999993309046217, iteration: 421118
loss: 0.9842680096626282,grad_norm: 0.7468125448535867, iteration: 421119
loss: 1.0473827123641968,grad_norm: 0.9999996154964482, iteration: 421120
loss: 1.0106801986694336,grad_norm: 0.9999998060760585, iteration: 421121
loss: 1.0053291320800781,grad_norm: 0.670237981436157, iteration: 421122
loss: 1.0185221433639526,grad_norm: 0.6843191039169044, iteration: 421123
loss: 1.0401389598846436,grad_norm: 0.999999469448765, iteration: 421124
loss: 1.044978141784668,grad_norm: 0.7594066549192557, iteration: 421125
loss: 1.0121711492538452,grad_norm: 0.7926882600839887, iteration: 421126
loss: 1.1290522813796997,grad_norm: 0.9999994285436418, iteration: 421127
loss: 1.0391829013824463,grad_norm: 0.9090101420254467, iteration: 421128
loss: 0.9906938672065735,grad_norm: 0.7489080877757341, iteration: 421129
loss: 0.9888445734977722,grad_norm: 0.7474997383388018, iteration: 421130
loss: 0.9670619368553162,grad_norm: 0.9838029835758209, iteration: 421131
loss: 1.025627851486206,grad_norm: 0.9608852250121879, iteration: 421132
loss: 0.9939663410186768,grad_norm: 0.7142934139146377, iteration: 421133
loss: 1.017622470855713,grad_norm: 0.9999994198865528, iteration: 421134
loss: 0.9714375138282776,grad_norm: 0.7598395062011744, iteration: 421135
loss: 0.9848544597625732,grad_norm: 0.6180890871040464, iteration: 421136
loss: 1.0287615060806274,grad_norm: 0.951958839727484, iteration: 421137
loss: 0.971847414970398,grad_norm: 0.6873809514735535, iteration: 421138
loss: 0.9914589524269104,grad_norm: 0.9999991024881892, iteration: 421139
loss: 1.0001319646835327,grad_norm: 0.8018551211077393, iteration: 421140
loss: 1.1018017530441284,grad_norm: 0.999999118563231, iteration: 421141
loss: 1.0171637535095215,grad_norm: 0.9999989673702318, iteration: 421142
loss: 1.0201162099838257,grad_norm: 0.8155532134834039, iteration: 421143
loss: 0.9959381222724915,grad_norm: 0.8773580034563133, iteration: 421144
loss: 1.0106502771377563,grad_norm: 0.7880747579597543, iteration: 421145
loss: 0.9934861660003662,grad_norm: 0.8885456290738708, iteration: 421146
loss: 1.020244836807251,grad_norm: 0.7693304594354072, iteration: 421147
loss: 1.0387152433395386,grad_norm: 0.9999991008612811, iteration: 421148
loss: 1.0082361698150635,grad_norm: 0.7100081374454928, iteration: 421149
loss: 1.032111644744873,grad_norm: 0.999999176472789, iteration: 421150
loss: 1.0635261535644531,grad_norm: 0.9792114578750163, iteration: 421151
loss: 0.9951210021972656,grad_norm: 0.773644767122765, iteration: 421152
loss: 1.0257583856582642,grad_norm: 0.8214333146863603, iteration: 421153
loss: 1.0044742822647095,grad_norm: 0.7758584474768873, iteration: 421154
loss: 0.968946099281311,grad_norm: 0.7581345263987657, iteration: 421155
loss: 0.9862328767776489,grad_norm: 0.7143301831437519, iteration: 421156
loss: 1.030801773071289,grad_norm: 0.9999996570674555, iteration: 421157
loss: 1.0184730291366577,grad_norm: 0.798094771627872, iteration: 421158
loss: 1.0562342405319214,grad_norm: 0.8139654247479081, iteration: 421159
loss: 0.9920689463615417,grad_norm: 0.6816779729030817, iteration: 421160
loss: 1.064409613609314,grad_norm: 0.6987224079821164, iteration: 421161
loss: 0.9924867153167725,grad_norm: 0.8156290385835853, iteration: 421162
loss: 0.9822807312011719,grad_norm: 0.8953355574962519, iteration: 421163
loss: 1.0477977991104126,grad_norm: 0.7924946948764598, iteration: 421164
loss: 1.056725263595581,grad_norm: 0.9999999359517086, iteration: 421165
loss: 1.0117896795272827,grad_norm: 0.9999991411813672, iteration: 421166
loss: 1.0262588262557983,grad_norm: 0.6784740236845669, iteration: 421167
loss: 1.0107421875,grad_norm: 0.6719839490037219, iteration: 421168
loss: 1.0401098728179932,grad_norm: 0.9999990751577468, iteration: 421169
loss: 1.1258476972579956,grad_norm: 0.9255727310542253, iteration: 421170
loss: 1.0955941677093506,grad_norm: 0.9999990771059164, iteration: 421171
loss: 1.0102717876434326,grad_norm: 0.8027294454209853, iteration: 421172
loss: 1.033556580543518,grad_norm: 1.0000000349291343, iteration: 421173
loss: 1.0024300813674927,grad_norm: 0.8083956129784797, iteration: 421174
loss: 1.0061428546905518,grad_norm: 0.821612413415035, iteration: 421175
loss: 1.0232157707214355,grad_norm: 0.9999998626250362, iteration: 421176
loss: 0.9767968654632568,grad_norm: 0.8763983131250203, iteration: 421177
loss: 1.0131295919418335,grad_norm: 0.9999995702821809, iteration: 421178
loss: 1.0453641414642334,grad_norm: 0.9940652010361986, iteration: 421179
loss: 1.0419280529022217,grad_norm: 0.9999996472692197, iteration: 421180
loss: 1.1002869606018066,grad_norm: 0.9247160689134842, iteration: 421181
loss: 1.0668151378631592,grad_norm: 0.9999992018304701, iteration: 421182
loss: 1.030812382698059,grad_norm: 0.8443321118507442, iteration: 421183
loss: 1.0167913436889648,grad_norm: 0.853840392375221, iteration: 421184
loss: 0.9831952452659607,grad_norm: 0.7197313015567457, iteration: 421185
loss: 1.0024704933166504,grad_norm: 0.7496641097518043, iteration: 421186
loss: 0.9938403964042664,grad_norm: 0.8166447183017252, iteration: 421187
loss: 1.1281944513320923,grad_norm: 0.9999998302815789, iteration: 421188
loss: 1.0258818864822388,grad_norm: 0.954483598797254, iteration: 421189
loss: 1.1537525653839111,grad_norm: 0.9999998257483056, iteration: 421190
loss: 1.0217339992523193,grad_norm: 0.6644918723127572, iteration: 421191
loss: 1.0060229301452637,grad_norm: 0.9999997355275883, iteration: 421192
loss: 1.0182448625564575,grad_norm: 0.926356739160772, iteration: 421193
loss: 0.9899157285690308,grad_norm: 0.7885418846051927, iteration: 421194
loss: 0.9844595193862915,grad_norm: 0.7686862811396246, iteration: 421195
loss: 1.0221232175827026,grad_norm: 0.9999990524221757, iteration: 421196
loss: 1.03056800365448,grad_norm: 0.8003624908604571, iteration: 421197
loss: 0.9815654158592224,grad_norm: 0.8385363334762566, iteration: 421198
loss: 1.0053114891052246,grad_norm: 0.7235826006903956, iteration: 421199
loss: 0.9750944375991821,grad_norm: 0.7490061816586309, iteration: 421200
loss: 0.9916828870773315,grad_norm: 0.8301348750357488, iteration: 421201
loss: 1.0763472318649292,grad_norm: 0.7927129071654689, iteration: 421202
loss: 0.9882450103759766,grad_norm: 0.8742483125198467, iteration: 421203
loss: 0.9932739734649658,grad_norm: 0.6346236559412082, iteration: 421204
loss: 0.9994440674781799,grad_norm: 0.7199380678461047, iteration: 421205
loss: 1.0002974271774292,grad_norm: 0.999999739194003, iteration: 421206
loss: 0.9901009798049927,grad_norm: 0.7472438248808357, iteration: 421207
loss: 0.9690479636192322,grad_norm: 0.7418818811604126, iteration: 421208
loss: 1.0530824661254883,grad_norm: 0.7627357136529325, iteration: 421209
loss: 0.9746233224868774,grad_norm: 0.8415544545896151, iteration: 421210
loss: 1.0005770921707153,grad_norm: 0.6254806671337053, iteration: 421211
loss: 1.063419222831726,grad_norm: 0.9999993162722971, iteration: 421212
loss: 1.0157954692840576,grad_norm: 0.7716033382251722, iteration: 421213
loss: 0.9920510053634644,grad_norm: 0.7067744982661117, iteration: 421214
loss: 1.0105723142623901,grad_norm: 0.7095548497728179, iteration: 421215
loss: 0.9897751808166504,grad_norm: 0.669635399102255, iteration: 421216
loss: 0.9934358596801758,grad_norm: 0.8193203633660776, iteration: 421217
loss: 0.9984452724456787,grad_norm: 0.9999990995081142, iteration: 421218
loss: 1.0565329790115356,grad_norm: 0.9290250934080607, iteration: 421219
loss: 0.9705314636230469,grad_norm: 0.709035118824161, iteration: 421220
loss: 0.9908121824264526,grad_norm: 0.6835926266327113, iteration: 421221
loss: 1.02537202835083,grad_norm: 0.7799105885281987, iteration: 421222
loss: 1.039109230041504,grad_norm: 0.9920681617444655, iteration: 421223
loss: 0.9699932932853699,grad_norm: 0.6930763317271628, iteration: 421224
loss: 1.0029511451721191,grad_norm: 0.8316383206475828, iteration: 421225
loss: 0.9930051565170288,grad_norm: 0.9284335373675523, iteration: 421226
loss: 0.9957139492034912,grad_norm: 0.707475370403235, iteration: 421227
loss: 1.055580496788025,grad_norm: 1.000000001851177, iteration: 421228
loss: 1.1063528060913086,grad_norm: 0.999999672243618, iteration: 421229
loss: 1.009061336517334,grad_norm: 0.9470457709610743, iteration: 421230
loss: 0.9883975386619568,grad_norm: 0.9086222167437856, iteration: 421231
loss: 1.090336561203003,grad_norm: 0.9613565296266292, iteration: 421232
loss: 0.968083918094635,grad_norm: 0.8016885017124393, iteration: 421233
loss: 0.9831069111824036,grad_norm: 0.7285757926112498, iteration: 421234
loss: 1.005732774734497,grad_norm: 0.7706972518014756, iteration: 421235
loss: 0.9827666878700256,grad_norm: 0.8619312104849911, iteration: 421236
loss: 1.038068413734436,grad_norm: 0.9999994641446013, iteration: 421237
loss: 1.0388774871826172,grad_norm: 0.9933542229420022, iteration: 421238
loss: 1.0006306171417236,grad_norm: 0.8607888383653489, iteration: 421239
loss: 1.0640418529510498,grad_norm: 0.8430263199384721, iteration: 421240
loss: 1.1030253171920776,grad_norm: 0.9999998135234287, iteration: 421241
loss: 0.9750564694404602,grad_norm: 0.8304610175131726, iteration: 421242
loss: 0.9836776256561279,grad_norm: 0.9999998183854522, iteration: 421243
loss: 1.0343754291534424,grad_norm: 0.783135929319933, iteration: 421244
loss: 1.0188145637512207,grad_norm: 0.9999999383306991, iteration: 421245
loss: 1.0285110473632812,grad_norm: 0.7969193626036173, iteration: 421246
loss: 1.0388107299804688,grad_norm: 0.7534910823665211, iteration: 421247
loss: 0.9704490900039673,grad_norm: 0.7028212708444811, iteration: 421248
loss: 0.964487612247467,grad_norm: 0.7009572772565226, iteration: 421249
loss: 1.042886734008789,grad_norm: 0.9999990550880862, iteration: 421250
loss: 0.9739745855331421,grad_norm: 0.7428138694316029, iteration: 421251
loss: 1.0529111623764038,grad_norm: 0.9999994967190224, iteration: 421252
loss: 1.0446048974990845,grad_norm: 0.9999992599118711, iteration: 421253
loss: 1.0460797548294067,grad_norm: 0.999999475817246, iteration: 421254
loss: 0.9971291422843933,grad_norm: 0.9999990363128833, iteration: 421255
loss: 1.0108250379562378,grad_norm: 0.6421836173348396, iteration: 421256
loss: 1.1202291250228882,grad_norm: 0.9999991585001634, iteration: 421257
loss: 0.9691833257675171,grad_norm: 0.9999991182753147, iteration: 421258
loss: 1.0109708309173584,grad_norm: 0.9999995722268301, iteration: 421259
loss: 0.9850417375564575,grad_norm: 0.9999995577726267, iteration: 421260
loss: 1.0238646268844604,grad_norm: 0.9999995180411341, iteration: 421261
loss: 1.0494403839111328,grad_norm: 0.8561620825476236, iteration: 421262
loss: 1.022261142730713,grad_norm: 0.9999991726184881, iteration: 421263
loss: 1.036839485168457,grad_norm: 0.980592370668899, iteration: 421264
loss: 0.9735214710235596,grad_norm: 0.6156492913924881, iteration: 421265
loss: 0.9930752515792847,grad_norm: 0.9999992914593386, iteration: 421266
loss: 1.0409923791885376,grad_norm: 0.7818752759526436, iteration: 421267
loss: 0.9798778891563416,grad_norm: 0.9999992557377482, iteration: 421268
loss: 1.1004468202590942,grad_norm: 0.9999994056435553, iteration: 421269
loss: 1.0472056865692139,grad_norm: 0.9999991246055198, iteration: 421270
loss: 1.04984712600708,grad_norm: 0.892972487031122, iteration: 421271
loss: 1.046036958694458,grad_norm: 0.9999999303614887, iteration: 421272
loss: 1.0235083103179932,grad_norm: 0.8372005515727341, iteration: 421273
loss: 1.0099064111709595,grad_norm: 0.9999998642650243, iteration: 421274
loss: 1.041306495666504,grad_norm: 0.9999990477958378, iteration: 421275
loss: 1.0290615558624268,grad_norm: 0.999999328526116, iteration: 421276
loss: 1.0349788665771484,grad_norm: 0.9999999744431818, iteration: 421277
loss: 1.0514421463012695,grad_norm: 0.7008336977126642, iteration: 421278
loss: 1.039886474609375,grad_norm: 0.9999991549026569, iteration: 421279
loss: 1.0237390995025635,grad_norm: 0.999999518765873, iteration: 421280
loss: 1.0441851615905762,grad_norm: 0.8888046298757977, iteration: 421281
loss: 1.0385721921920776,grad_norm: 0.9999999114511038, iteration: 421282
loss: 0.9840207099914551,grad_norm: 0.9650227988704926, iteration: 421283
loss: 1.044533133506775,grad_norm: 0.9999997116911941, iteration: 421284
loss: 1.1239356994628906,grad_norm: 0.9999995700718427, iteration: 421285
loss: 1.0442274808883667,grad_norm: 0.6553238148170802, iteration: 421286
loss: 1.0719413757324219,grad_norm: 0.8933632982573463, iteration: 421287
loss: 0.9774724841117859,grad_norm: 0.8516990818300517, iteration: 421288
loss: 1.0491137504577637,grad_norm: 0.9999998331721687, iteration: 421289
loss: 1.0843228101730347,grad_norm: 0.9999991115188693, iteration: 421290
loss: 1.0590460300445557,grad_norm: 0.9999997156118051, iteration: 421291
loss: 1.121922254562378,grad_norm: 0.9999996026620886, iteration: 421292
loss: 0.9567344784736633,grad_norm: 0.778527234780767, iteration: 421293
loss: 1.0195631980895996,grad_norm: 0.9114437332585748, iteration: 421294
loss: 0.9856974482536316,grad_norm: 0.8892842289615516, iteration: 421295
loss: 1.0147770643234253,grad_norm: 0.9999990869219986, iteration: 421296
loss: 1.0424033403396606,grad_norm: 0.9042323005756087, iteration: 421297
loss: 1.0447051525115967,grad_norm: 0.9962116895122991, iteration: 421298
loss: 1.0454212427139282,grad_norm: 0.9999995288417606, iteration: 421299
loss: 0.9932226538658142,grad_norm: 0.8315765715671509, iteration: 421300
loss: 0.9951744675636292,grad_norm: 0.8987580255471613, iteration: 421301
loss: 1.0054655075073242,grad_norm: 0.8529748676975777, iteration: 421302
loss: 0.9920944571495056,grad_norm: 0.7678558474622638, iteration: 421303
loss: 1.0290693044662476,grad_norm: 0.9294827845468405, iteration: 421304
loss: 0.9781702160835266,grad_norm: 0.8027680690257976, iteration: 421305
loss: 0.9964024424552917,grad_norm: 0.8289813325424809, iteration: 421306
loss: 1.031476378440857,grad_norm: 0.7646674480328299, iteration: 421307
loss: 1.0063551664352417,grad_norm: 0.8340042021271359, iteration: 421308
loss: 1.0718191862106323,grad_norm: 0.7596619876479103, iteration: 421309
loss: 0.9725365042686462,grad_norm: 0.7805402064884303, iteration: 421310
loss: 0.970162570476532,grad_norm: 0.7258620514708486, iteration: 421311
loss: 1.0029969215393066,grad_norm: 0.8000137920790272, iteration: 421312
loss: 1.0268536806106567,grad_norm: 0.8259398582503745, iteration: 421313
loss: 1.0130804777145386,grad_norm: 0.8446816471417913, iteration: 421314
loss: 1.0280265808105469,grad_norm: 0.7516368516426263, iteration: 421315
loss: 1.0250707864761353,grad_norm: 0.7168169064469749, iteration: 421316
loss: 1.0050616264343262,grad_norm: 0.8901093205973721, iteration: 421317
loss: 0.9912394881248474,grad_norm: 0.8241008669415296, iteration: 421318
loss: 0.9895642399787903,grad_norm: 0.8450109325064715, iteration: 421319
loss: 1.0305027961730957,grad_norm: 0.8707179576835788, iteration: 421320
loss: 1.0228760242462158,grad_norm: 0.9172347987339843, iteration: 421321
loss: 0.9992277026176453,grad_norm: 0.7236053637573729, iteration: 421322
loss: 0.963146448135376,grad_norm: 0.8079102912085468, iteration: 421323
loss: 0.9855247139930725,grad_norm: 0.6747401004542495, iteration: 421324
loss: 1.0086417198181152,grad_norm: 0.8354527073055221, iteration: 421325
loss: 1.1156902313232422,grad_norm: 0.9999991135603867, iteration: 421326
loss: 0.9864858388900757,grad_norm: 0.8397273443283236, iteration: 421327
loss: 0.9962334632873535,grad_norm: 0.6654257166368325, iteration: 421328
loss: 1.0304207801818848,grad_norm: 0.7157604850843668, iteration: 421329
loss: 0.9956124424934387,grad_norm: 0.9999991011347952, iteration: 421330
loss: 1.0203142166137695,grad_norm: 0.7555719502265933, iteration: 421331
loss: 0.9839702248573303,grad_norm: 0.7791358389039759, iteration: 421332
loss: 0.9949918389320374,grad_norm: 0.8646081274217794, iteration: 421333
loss: 0.9940664172172546,grad_norm: 0.7659975635057187, iteration: 421334
loss: 0.9804320335388184,grad_norm: 0.8170928748647784, iteration: 421335
loss: 1.0199323892593384,grad_norm: 0.9999996007750681, iteration: 421336
loss: 0.9738146066665649,grad_norm: 0.8860600007709868, iteration: 421337
loss: 0.9790335297584534,grad_norm: 0.677186144180704, iteration: 421338
loss: 0.9695719480514526,grad_norm: 0.8940074771305901, iteration: 421339
loss: 1.0305464267730713,grad_norm: 0.8402126236887713, iteration: 421340
loss: 1.0004324913024902,grad_norm: 0.7713265745924316, iteration: 421341
loss: 0.9811329245567322,grad_norm: 0.9005027768223217, iteration: 421342
loss: 0.9946860671043396,grad_norm: 0.769129597443315, iteration: 421343
loss: 0.976966142654419,grad_norm: 0.8300983174439285, iteration: 421344
loss: 1.0083441734313965,grad_norm: 0.8002486179410779, iteration: 421345
loss: 1.0113177299499512,grad_norm: 0.9999993561019705, iteration: 421346
loss: 1.0032678842544556,grad_norm: 0.6993759469909143, iteration: 421347
loss: 1.020920753479004,grad_norm: 0.7644340182009868, iteration: 421348
loss: 1.0120645761489868,grad_norm: 0.7862015314674433, iteration: 421349
loss: 1.0431756973266602,grad_norm: 0.9012537758168901, iteration: 421350
loss: 1.0277349948883057,grad_norm: 0.7535835139689485, iteration: 421351
loss: 1.0336169004440308,grad_norm: 0.8216772027333366, iteration: 421352
loss: 1.0264301300048828,grad_norm: 0.9306660198882456, iteration: 421353
loss: 0.9639067649841309,grad_norm: 0.6884898892765536, iteration: 421354
loss: 0.9854279160499573,grad_norm: 0.773491118234878, iteration: 421355
loss: 1.0135878324508667,grad_norm: 1.0000000036033208, iteration: 421356
loss: 1.0245957374572754,grad_norm: 0.8153299772311995, iteration: 421357
loss: 1.0044972896575928,grad_norm: 0.8169510432701128, iteration: 421358
loss: 1.0194239616394043,grad_norm: 0.8148607119571133, iteration: 421359
loss: 1.0108054876327515,grad_norm: 0.7958582355232353, iteration: 421360
loss: 1.0057322978973389,grad_norm: 0.7617214157453044, iteration: 421361
loss: 1.0002806186676025,grad_norm: 0.999999211728362, iteration: 421362
loss: 0.9690098166465759,grad_norm: 0.9999999358839468, iteration: 421363
loss: 0.9558830857276917,grad_norm: 0.7773650319150303, iteration: 421364
loss: 1.0498517751693726,grad_norm: 0.9357978005317085, iteration: 421365
loss: 1.1501978635787964,grad_norm: 0.9999994902833717, iteration: 421366
loss: 1.0801373720169067,grad_norm: 0.8175077760743096, iteration: 421367
loss: 1.0224114656448364,grad_norm: 0.8104180039611226, iteration: 421368
loss: 0.9713184833526611,grad_norm: 0.7755366442047174, iteration: 421369
loss: 0.9809224009513855,grad_norm: 0.8818076755845548, iteration: 421370
loss: 1.0671275854110718,grad_norm: 0.8730868366280979, iteration: 421371
loss: 1.0389946699142456,grad_norm: 0.8977950275265684, iteration: 421372
loss: 0.9603002071380615,grad_norm: 0.6919997720495307, iteration: 421373
loss: 0.9812281727790833,grad_norm: 0.999999334007429, iteration: 421374
loss: 1.0034207105636597,grad_norm: 0.7744740308136076, iteration: 421375
loss: 0.9993412494659424,grad_norm: 0.9999990319398722, iteration: 421376
loss: 0.9874536395072937,grad_norm: 0.7599960256264922, iteration: 421377
loss: 1.0054988861083984,grad_norm: 0.6951025205643047, iteration: 421378
loss: 1.2754321098327637,grad_norm: 0.9999997325157995, iteration: 421379
loss: 1.0659575462341309,grad_norm: 0.9999991701597213, iteration: 421380
loss: 0.999447226524353,grad_norm: 0.724896847470254, iteration: 421381
loss: 1.0039052963256836,grad_norm: 0.7319327153978697, iteration: 421382
loss: 0.9877973198890686,grad_norm: 0.6938007733635729, iteration: 421383
loss: 1.0973639488220215,grad_norm: 0.893878182482352, iteration: 421384
loss: 0.9991945624351501,grad_norm: 0.8884424660860436, iteration: 421385
loss: 0.9779659509658813,grad_norm: 0.7484962951892705, iteration: 421386
loss: 1.0332187414169312,grad_norm: 0.8266545017544268, iteration: 421387
loss: 0.9712217450141907,grad_norm: 0.7611234134795856, iteration: 421388
loss: 1.0441229343414307,grad_norm: 0.926714036450779, iteration: 421389
loss: 0.9945929050445557,grad_norm: 0.6921851173156769, iteration: 421390
loss: 1.0521913766860962,grad_norm: 0.7674019764932117, iteration: 421391
loss: 1.009961485862732,grad_norm: 1.00000002785121, iteration: 421392
loss: 0.9766223430633545,grad_norm: 0.6203471885367391, iteration: 421393
loss: 1.0453829765319824,grad_norm: 0.8335630827574492, iteration: 421394
loss: 1.0050595998764038,grad_norm: 0.6954490953239211, iteration: 421395
loss: 0.9822791218757629,grad_norm: 0.7277494620777656, iteration: 421396
loss: 0.9464613199234009,grad_norm: 0.8360399737765867, iteration: 421397
loss: 0.9591025114059448,grad_norm: 0.7182696913465999, iteration: 421398
loss: 1.0739177465438843,grad_norm: 0.999999584713682, iteration: 421399
loss: 0.9668102860450745,grad_norm: 0.7652312897007358, iteration: 421400
loss: 1.000860571861267,grad_norm: 0.6840660042723854, iteration: 421401
loss: 1.009270191192627,grad_norm: 0.8875226740253681, iteration: 421402
loss: 1.0688199996948242,grad_norm: 0.9999999212179499, iteration: 421403
loss: 0.9661502838134766,grad_norm: 0.999998965174985, iteration: 421404
loss: 0.9784067869186401,grad_norm: 0.7317532284903067, iteration: 421405
loss: 1.0084456205368042,grad_norm: 0.7777246346332898, iteration: 421406
loss: 1.0099176168441772,grad_norm: 0.8770789521941577, iteration: 421407
loss: 0.9786620736122131,grad_norm: 0.8090884551426706, iteration: 421408
loss: 0.9690829515457153,grad_norm: 0.7975405223118756, iteration: 421409
loss: 1.051714539527893,grad_norm: 0.7482042791538013, iteration: 421410
loss: 0.9891787171363831,grad_norm: 0.9704535140238627, iteration: 421411
loss: 1.0239924192428589,grad_norm: 0.8791556846269906, iteration: 421412
loss: 1.0148833990097046,grad_norm: 0.9999992417921076, iteration: 421413
loss: 0.9989756941795349,grad_norm: 0.9999992761319733, iteration: 421414
loss: 0.9882122874259949,grad_norm: 0.6998498244855841, iteration: 421415
loss: 1.0269235372543335,grad_norm: 0.6382114547719235, iteration: 421416
loss: 1.1500132083892822,grad_norm: 0.9999999568174328, iteration: 421417
loss: 0.9883179068565369,grad_norm: 0.8475695876382292, iteration: 421418
loss: 0.9932907223701477,grad_norm: 0.7212834669419748, iteration: 421419
loss: 1.0079264640808105,grad_norm: 0.8573931386196715, iteration: 421420
loss: 1.0188690423965454,grad_norm: 0.6370802253203464, iteration: 421421
loss: 0.9925114512443542,grad_norm: 0.8443463404325716, iteration: 421422
loss: 1.0061514377593994,grad_norm: 0.8512713441512189, iteration: 421423
loss: 1.0014559030532837,grad_norm: 0.8713961325285366, iteration: 421424
loss: 0.9883396625518799,grad_norm: 0.7714702860580019, iteration: 421425
loss: 1.1051057577133179,grad_norm: 0.9999995872094181, iteration: 421426
loss: 0.996684193611145,grad_norm: 0.802918977502582, iteration: 421427
loss: 1.0281662940979004,grad_norm: 0.817960089386032, iteration: 421428
loss: 1.0156170129776,grad_norm: 0.8889558010264184, iteration: 421429
loss: 1.180474877357483,grad_norm: 0.9999999205313147, iteration: 421430
loss: 1.0374634265899658,grad_norm: 0.6995555315019164, iteration: 421431
loss: 1.008224368095398,grad_norm: 0.999999160454995, iteration: 421432
loss: 1.0280706882476807,grad_norm: 0.7487072776065425, iteration: 421433
loss: 1.0419507026672363,grad_norm: 0.6589809539606142, iteration: 421434
loss: 1.0324465036392212,grad_norm: 0.999999875454018, iteration: 421435
loss: 1.0113383531570435,grad_norm: 0.6202342914591807, iteration: 421436
loss: 1.075471043586731,grad_norm: 0.7208404918491748, iteration: 421437
loss: 1.0188689231872559,grad_norm: 0.7295319331747322, iteration: 421438
loss: 1.069198489189148,grad_norm: 0.9993595877546639, iteration: 421439
loss: 0.9846840500831604,grad_norm: 0.7902173464160583, iteration: 421440
loss: 0.9479354619979858,grad_norm: 0.8852604331873676, iteration: 421441
loss: 1.024925708770752,grad_norm: 0.9998301352619151, iteration: 421442
loss: 1.0074257850646973,grad_norm: 0.7600847091715933, iteration: 421443
loss: 0.9619730710983276,grad_norm: 0.8099842468018752, iteration: 421444
loss: 0.9963940382003784,grad_norm: 0.7667996476502453, iteration: 421445
loss: 1.0472450256347656,grad_norm: 0.8932153439529426, iteration: 421446
loss: 0.9621767401695251,grad_norm: 0.8434270561001506, iteration: 421447
loss: 0.9968417882919312,grad_norm: 0.8832919036608559, iteration: 421448
loss: 1.0204085111618042,grad_norm: 0.9999996759189345, iteration: 421449
loss: 1.004352331161499,grad_norm: 0.8815263567203809, iteration: 421450
loss: 0.9864983558654785,grad_norm: 0.9999992807783109, iteration: 421451
loss: 1.0313087701797485,grad_norm: 0.6598633536908847, iteration: 421452
loss: 1.0090429782867432,grad_norm: 0.8622398647949626, iteration: 421453
loss: 1.0598876476287842,grad_norm: 0.9999993434727623, iteration: 421454
loss: 1.0189439058303833,grad_norm: 0.7984337256436784, iteration: 421455
loss: 1.042823314666748,grad_norm: 0.7985645840794175, iteration: 421456
loss: 0.9935192465782166,grad_norm: 0.7816310362948002, iteration: 421457
loss: 0.9934596419334412,grad_norm: 0.9680544175336174, iteration: 421458
loss: 1.0229108333587646,grad_norm: 0.9161275855915356, iteration: 421459
loss: 1.0035524368286133,grad_norm: 0.8406586330302682, iteration: 421460
loss: 0.9659792184829712,grad_norm: 0.7891749923722624, iteration: 421461
loss: 1.0194661617279053,grad_norm: 0.9999992963852291, iteration: 421462
loss: 1.0900382995605469,grad_norm: 0.9999994776569199, iteration: 421463
loss: 1.0003349781036377,grad_norm: 0.7477290528053786, iteration: 421464
loss: 0.977694571018219,grad_norm: 0.8541287468171388, iteration: 421465
loss: 1.0004276037216187,grad_norm: 0.9213257869217822, iteration: 421466
loss: 1.0147396326065063,grad_norm: 0.8109237946193953, iteration: 421467
loss: 1.0028001070022583,grad_norm: 0.7987176953516656, iteration: 421468
loss: 1.0223095417022705,grad_norm: 0.8011595341455264, iteration: 421469
loss: 0.9841259717941284,grad_norm: 0.8327193719954266, iteration: 421470
loss: 1.0046800374984741,grad_norm: 0.7361340306547856, iteration: 421471
loss: 1.0331776142120361,grad_norm: 0.7549595764760936, iteration: 421472
loss: 0.9944073557853699,grad_norm: 0.6303598841699638, iteration: 421473
loss: 0.9897332787513733,grad_norm: 0.7403717289473282, iteration: 421474
loss: 0.9857252240180969,grad_norm: 0.6959628810062028, iteration: 421475
loss: 0.9787363409996033,grad_norm: 0.7296608632331614, iteration: 421476
loss: 0.9937183260917664,grad_norm: 0.951845425874101, iteration: 421477
loss: 1.0745036602020264,grad_norm: 0.9999991015441198, iteration: 421478
loss: 1.0137693881988525,grad_norm: 0.8970253745986885, iteration: 421479
loss: 1.0201430320739746,grad_norm: 0.8005937690353123, iteration: 421480
loss: 0.9806382656097412,grad_norm: 0.7592629783460431, iteration: 421481
loss: 0.9802595973014832,grad_norm: 0.918888735874979, iteration: 421482
loss: 0.990485429763794,grad_norm: 0.6777789552807125, iteration: 421483
loss: 0.9673064351081848,grad_norm: 0.6705953571377732, iteration: 421484
loss: 1.006808876991272,grad_norm: 0.7466592946483936, iteration: 421485
loss: 1.0079503059387207,grad_norm: 0.8197827670520874, iteration: 421486
loss: 1.0136346817016602,grad_norm: 0.6749652393638016, iteration: 421487
loss: 0.9967857003211975,grad_norm: 0.713109758346764, iteration: 421488
loss: 1.014095425605774,grad_norm: 0.9999991406304588, iteration: 421489
loss: 1.0812439918518066,grad_norm: 0.8126275719775241, iteration: 421490
loss: 1.0294064283370972,grad_norm: 0.7507387330341916, iteration: 421491
loss: 0.9801229238510132,grad_norm: 0.7085455510454581, iteration: 421492
loss: 0.9742229580879211,grad_norm: 0.7400073204317593, iteration: 421493
loss: 0.9847633242607117,grad_norm: 0.8497878322328173, iteration: 421494
loss: 1.1145763397216797,grad_norm: 0.9999997069438351, iteration: 421495
loss: 1.031458854675293,grad_norm: 0.9999991758228017, iteration: 421496
loss: 0.9818705320358276,grad_norm: 0.7179312037103434, iteration: 421497
loss: 1.0445668697357178,grad_norm: 0.9456310121204361, iteration: 421498
loss: 1.0047708749771118,grad_norm: 0.8963604189430083, iteration: 421499
loss: 0.9930150508880615,grad_norm: 0.9404519077103525, iteration: 421500
loss: 1.021713137626648,grad_norm: 0.7347290046679673, iteration: 421501
loss: 1.0607056617736816,grad_norm: 0.8403317703879545, iteration: 421502
loss: 0.989323079586029,grad_norm: 0.7811472647994333, iteration: 421503
loss: 1.0721300840377808,grad_norm: 0.9999993634366224, iteration: 421504
loss: 0.9790368676185608,grad_norm: 0.999999140019967, iteration: 421505
loss: 1.0016140937805176,grad_norm: 0.999999102474417, iteration: 421506
loss: 1.041117787361145,grad_norm: 0.9999993200638008, iteration: 421507
loss: 0.9835160970687866,grad_norm: 0.7570573687895754, iteration: 421508
loss: 0.9898563027381897,grad_norm: 0.8195263623664866, iteration: 421509
loss: 1.0372915267944336,grad_norm: 0.9194380951154231, iteration: 421510
loss: 0.9573441743850708,grad_norm: 0.999999090468091, iteration: 421511
loss: 0.973508894443512,grad_norm: 0.6854157951672019, iteration: 421512
loss: 0.9793381690979004,grad_norm: 0.932913902908247, iteration: 421513
loss: 1.0209629535675049,grad_norm: 0.7758703445279159, iteration: 421514
loss: 1.0431104898452759,grad_norm: 0.836575282164576, iteration: 421515
loss: 1.0242747068405151,grad_norm: 0.9999999079968873, iteration: 421516
loss: 0.9948717951774597,grad_norm: 0.8135697875316571, iteration: 421517
loss: 1.081172227859497,grad_norm: 0.8284288866192561, iteration: 421518
loss: 1.0145608186721802,grad_norm: 0.9032357714621405, iteration: 421519
loss: 1.0632216930389404,grad_norm: 0.9999992031663156, iteration: 421520
loss: 0.9784342050552368,grad_norm: 0.8092286632764405, iteration: 421521
loss: 1.0262714624404907,grad_norm: 0.9999997121129316, iteration: 421522
loss: 0.9946235418319702,grad_norm: 0.7430437024128496, iteration: 421523
loss: 1.0339972972869873,grad_norm: 0.9999998678432872, iteration: 421524
loss: 1.0676630735397339,grad_norm: 0.852439298238242, iteration: 421525
loss: 0.9842932224273682,grad_norm: 0.8991837374021844, iteration: 421526
loss: 0.9859564304351807,grad_norm: 0.7339016386651495, iteration: 421527
loss: 1.0234370231628418,grad_norm: 0.9999990443724207, iteration: 421528
loss: 1.0440189838409424,grad_norm: 0.8010810941217158, iteration: 421529
loss: 1.218716025352478,grad_norm: 0.9575157991946043, iteration: 421530
loss: 1.0957666635513306,grad_norm: 0.8036750577334079, iteration: 421531
loss: 1.018418312072754,grad_norm: 0.9752743054305183, iteration: 421532
loss: 0.9692897200584412,grad_norm: 0.7435595685852693, iteration: 421533
loss: 0.9883567094802856,grad_norm: 0.922297833269594, iteration: 421534
loss: 0.9870346188545227,grad_norm: 0.9999996551378205, iteration: 421535
loss: 1.043270230293274,grad_norm: 0.767565458632243, iteration: 421536
loss: 1.0231801271438599,grad_norm: 0.7051091566638412, iteration: 421537
loss: 1.0080933570861816,grad_norm: 0.999999061068953, iteration: 421538
loss: 1.0493682622909546,grad_norm: 0.9999993624085496, iteration: 421539
loss: 1.0654698610305786,grad_norm: 0.8988686413324474, iteration: 421540
loss: 0.9900227189064026,grad_norm: 0.9004465100306565, iteration: 421541
loss: 1.0540722608566284,grad_norm: 0.9999995699743792, iteration: 421542
loss: 0.9561682343482971,grad_norm: 0.7025040079112738, iteration: 421543
loss: 1.0097533464431763,grad_norm: 0.9999996995404871, iteration: 421544
loss: 1.0082290172576904,grad_norm: 0.9040206412468552, iteration: 421545
loss: 0.9800297021865845,grad_norm: 0.728285817962743, iteration: 421546
loss: 0.9806731939315796,grad_norm: 0.7810559412584489, iteration: 421547
loss: 1.0185730457305908,grad_norm: 0.6997056094351253, iteration: 421548
loss: 1.0597670078277588,grad_norm: 0.9999994534616343, iteration: 421549
loss: 1.0118356943130493,grad_norm: 0.821502335350662, iteration: 421550
loss: 1.017305850982666,grad_norm: 0.7154799143273862, iteration: 421551
loss: 0.9746531844139099,grad_norm: 0.9307158179677262, iteration: 421552
loss: 0.9890808463096619,grad_norm: 0.7665234618611259, iteration: 421553
loss: 1.068778395652771,grad_norm: 0.9999992126184619, iteration: 421554
loss: 0.9905482530593872,grad_norm: 0.9999999461302604, iteration: 421555
loss: 1.0274899005889893,grad_norm: 0.9999992537617258, iteration: 421556
loss: 1.0100514888763428,grad_norm: 0.8032182222934102, iteration: 421557
loss: 1.1321361064910889,grad_norm: 0.904436467326214, iteration: 421558
loss: 1.0888385772705078,grad_norm: 0.9999991407227455, iteration: 421559
loss: 0.9964829087257385,grad_norm: 0.8191859306662854, iteration: 421560
loss: 1.0239819288253784,grad_norm: 0.9999993911267426, iteration: 421561
loss: 0.9882437586784363,grad_norm: 0.6797521758204162, iteration: 421562
loss: 1.0168545246124268,grad_norm: 0.82825542568265, iteration: 421563
loss: 1.0290791988372803,grad_norm: 0.7607637129147112, iteration: 421564
loss: 0.9757260084152222,grad_norm: 0.8286701847816074, iteration: 421565
loss: 1.0175949335098267,grad_norm: 0.7737051797482982, iteration: 421566
loss: 1.0090299844741821,grad_norm: 0.9999992339350718, iteration: 421567
loss: 0.9905442595481873,grad_norm: 0.6752376994859962, iteration: 421568
loss: 1.0737597942352295,grad_norm: 0.8240629875407848, iteration: 421569
loss: 1.2019795179367065,grad_norm: 0.9999991038626792, iteration: 421570
loss: 1.0501420497894287,grad_norm: 0.9999990275675081, iteration: 421571
loss: 1.0428203344345093,grad_norm: 0.9257696498590399, iteration: 421572
loss: 1.062834620475769,grad_norm: 0.7531244199123625, iteration: 421573
loss: 1.140365481376648,grad_norm: 0.7421301590687859, iteration: 421574
loss: 1.0188515186309814,grad_norm: 0.7502864313454519, iteration: 421575
loss: 1.0143609046936035,grad_norm: 0.9999994729739896, iteration: 421576
loss: 1.0157427787780762,grad_norm: 0.9999996395122037, iteration: 421577
loss: 0.9699612259864807,grad_norm: 0.916667505619533, iteration: 421578
loss: 1.0113892555236816,grad_norm: 0.7630827232392575, iteration: 421579
loss: 0.995884358882904,grad_norm: 0.999999119621451, iteration: 421580
loss: 1.028756856918335,grad_norm: 0.9999992528704196, iteration: 421581
loss: 0.9906269311904907,grad_norm: 0.7685784414685031, iteration: 421582
loss: 1.02553129196167,grad_norm: 0.999999085855789, iteration: 421583
loss: 1.0252569913864136,grad_norm: 0.9999993171201869, iteration: 421584
loss: 0.9823559522628784,grad_norm: 0.9134257822357899, iteration: 421585
loss: 0.9998829364776611,grad_norm: 0.7547032345621817, iteration: 421586
loss: 1.1235655546188354,grad_norm: 0.9999999160482518, iteration: 421587
loss: 0.9894525408744812,grad_norm: 0.6682504658609073, iteration: 421588
loss: 1.0232453346252441,grad_norm: 0.7559808671224568, iteration: 421589
loss: 0.948947012424469,grad_norm: 0.6910307223403495, iteration: 421590
loss: 0.9991998672485352,grad_norm: 0.7398701862395096, iteration: 421591
loss: 1.030555009841919,grad_norm: 0.8108204426293543, iteration: 421592
loss: 1.0330729484558105,grad_norm: 0.8822762834598019, iteration: 421593
loss: 0.9623838067054749,grad_norm: 0.8549998131547651, iteration: 421594
loss: 1.0181697607040405,grad_norm: 0.7826369374846401, iteration: 421595
loss: 1.0185258388519287,grad_norm: 0.7915152827888644, iteration: 421596
loss: 0.9874134063720703,grad_norm: 0.8173284990641129, iteration: 421597
loss: 0.9702211618423462,grad_norm: 0.9632568860445465, iteration: 421598
loss: 1.009855031967163,grad_norm: 0.9999993672143869, iteration: 421599
loss: 1.006653904914856,grad_norm: 0.918020773982575, iteration: 421600
loss: 1.0234273672103882,grad_norm: 0.999999471565464, iteration: 421601
loss: 1.011101245880127,grad_norm: 0.9999996326476077, iteration: 421602
loss: 1.0259349346160889,grad_norm: 0.9999996654612604, iteration: 421603
loss: 1.0326120853424072,grad_norm: 0.7438692469511683, iteration: 421604
loss: 1.0177702903747559,grad_norm: 0.9999993806661829, iteration: 421605
loss: 1.0804048776626587,grad_norm: 0.9024053776606682, iteration: 421606
loss: 1.0288339853286743,grad_norm: 0.999999677740304, iteration: 421607
loss: 1.0190963745117188,grad_norm: 0.6936930902222825, iteration: 421608
loss: 0.9861651062965393,grad_norm: 0.8151384691721433, iteration: 421609
loss: 1.1378281116485596,grad_norm: 0.9999991946911478, iteration: 421610
loss: 0.9784783720970154,grad_norm: 0.8949743347071986, iteration: 421611
loss: 1.0046067237854004,grad_norm: 0.999999063628455, iteration: 421612
loss: 1.0349332094192505,grad_norm: 0.8255176792258244, iteration: 421613
loss: 1.0285438299179077,grad_norm: 0.9579107599277076, iteration: 421614
loss: 1.0162745714187622,grad_norm: 0.9636385463007133, iteration: 421615
loss: 0.9973270893096924,grad_norm: 0.8110493841260061, iteration: 421616
loss: 1.0029096603393555,grad_norm: 0.8519635698553405, iteration: 421617
loss: 0.9588963389396667,grad_norm: 0.6401478755182537, iteration: 421618
loss: 0.9989296793937683,grad_norm: 0.7694806759483903, iteration: 421619
loss: 0.9773590564727783,grad_norm: 0.9477291162188048, iteration: 421620
loss: 1.0196741819381714,grad_norm: 0.9999990983860024, iteration: 421621
loss: 0.992579460144043,grad_norm: 0.8877897057556691, iteration: 421622
loss: 1.0379687547683716,grad_norm: 0.9999999591269101, iteration: 421623
loss: 0.9765387773513794,grad_norm: 0.7781931179284102, iteration: 421624
loss: 0.9617250561714172,grad_norm: 0.8385836856122519, iteration: 421625
loss: 0.9879421591758728,grad_norm: 0.7402248133467938, iteration: 421626
loss: 1.0001243352890015,grad_norm: 0.7093896324057983, iteration: 421627
loss: 1.0887373685836792,grad_norm: 0.887340589536777, iteration: 421628
loss: 1.0081899166107178,grad_norm: 0.6609776376607376, iteration: 421629
loss: 1.0038353204727173,grad_norm: 0.7489982245221524, iteration: 421630
loss: 1.03090238571167,grad_norm: 0.8209177370805719, iteration: 421631
loss: 1.0286256074905396,grad_norm: 0.8889164863768537, iteration: 421632
loss: 0.9656145572662354,grad_norm: 0.8093364354287551, iteration: 421633
loss: 1.0003644227981567,grad_norm: 0.9999998170915356, iteration: 421634
loss: 1.0764856338500977,grad_norm: 0.7722028444464187, iteration: 421635
loss: 1.0079594850540161,grad_norm: 0.8844068193515738, iteration: 421636
loss: 0.9987773299217224,grad_norm: 0.5936290270030493, iteration: 421637
loss: 1.1521803140640259,grad_norm: 0.9921116232958603, iteration: 421638
loss: 1.0400793552398682,grad_norm: 0.6905793196341122, iteration: 421639
loss: 1.0569612979888916,grad_norm: 0.8788340810925982, iteration: 421640
loss: 1.0173650979995728,grad_norm: 0.8512469704107137, iteration: 421641
loss: 0.9870606064796448,grad_norm: 0.9780918820806649, iteration: 421642
loss: 0.9977125525474548,grad_norm: 0.7901988281530533, iteration: 421643
loss: 1.0641050338745117,grad_norm: 0.9999993815506776, iteration: 421644
loss: 0.9667431116104126,grad_norm: 0.7349392464580281, iteration: 421645
loss: 1.0454641580581665,grad_norm: 0.9769363680772453, iteration: 421646
loss: 1.0092012882232666,grad_norm: 0.7425239828128489, iteration: 421647
loss: 1.1051634550094604,grad_norm: 0.9999991267702603, iteration: 421648
loss: 1.0425901412963867,grad_norm: 0.9999999141415553, iteration: 421649
loss: 1.0376510620117188,grad_norm: 0.754983607726642, iteration: 421650
loss: 1.0708986520767212,grad_norm: 0.9999997765516068, iteration: 421651
loss: 1.0749282836914062,grad_norm: 0.9999995607211142, iteration: 421652
loss: 1.0757416486740112,grad_norm: 0.999999398625183, iteration: 421653
loss: 1.0007821321487427,grad_norm: 0.7720713182373447, iteration: 421654
loss: 0.9705369472503662,grad_norm: 0.6426532326883716, iteration: 421655
loss: 1.0102132558822632,grad_norm: 0.7721342659058493, iteration: 421656
loss: 1.017968773841858,grad_norm: 0.8565025100227882, iteration: 421657
loss: 1.0086941719055176,grad_norm: 0.6156141768203818, iteration: 421658
loss: 0.9882521033287048,grad_norm: 0.8033412411328728, iteration: 421659
loss: 1.001112461090088,grad_norm: 0.807448564908873, iteration: 421660
loss: 1.0009350776672363,grad_norm: 0.7116787406348639, iteration: 421661
loss: 1.004469394683838,grad_norm: 0.8464617996963211, iteration: 421662
loss: 0.9618250131607056,grad_norm: 0.7028383608433144, iteration: 421663
loss: 1.1359288692474365,grad_norm: 0.9999993836805381, iteration: 421664
loss: 1.0193191766738892,grad_norm: 0.9958381129434489, iteration: 421665
loss: 0.9972662925720215,grad_norm: 0.6855077643830211, iteration: 421666
loss: 0.9920871257781982,grad_norm: 0.7731440398574662, iteration: 421667
loss: 1.0079354047775269,grad_norm: 0.6916279039827732, iteration: 421668
loss: 1.0097877979278564,grad_norm: 0.9999991179263392, iteration: 421669
loss: 0.9742152094841003,grad_norm: 0.7641693448113189, iteration: 421670
loss: 0.9909366369247437,grad_norm: 0.8857566748065511, iteration: 421671
loss: 1.0640877485275269,grad_norm: 0.9999994017295839, iteration: 421672
loss: 0.974370539188385,grad_norm: 0.7543932543662044, iteration: 421673
loss: 1.0203115940093994,grad_norm: 0.7755206203243495, iteration: 421674
loss: 1.0151684284210205,grad_norm: 0.7736927760578747, iteration: 421675
loss: 1.0254911184310913,grad_norm: 0.769468984199996, iteration: 421676
loss: 1.0537289381027222,grad_norm: 0.9999993244582297, iteration: 421677
loss: 1.2374134063720703,grad_norm: 0.9999993997238319, iteration: 421678
loss: 0.9711589217185974,grad_norm: 0.9999998571010487, iteration: 421679
loss: 0.9928100109100342,grad_norm: 0.8009588826721478, iteration: 421680
loss: 1.023905873298645,grad_norm: 0.8311372824871586, iteration: 421681
loss: 0.9980062246322632,grad_norm: 0.7299383368125313, iteration: 421682
loss: 1.0017601251602173,grad_norm: 0.649426883658464, iteration: 421683
loss: 1.0386298894882202,grad_norm: 0.7628156728475989, iteration: 421684
loss: 0.9905489087104797,grad_norm: 0.9757206509175259, iteration: 421685
loss: 0.9946967959403992,grad_norm: 0.6265732213643259, iteration: 421686
loss: 1.001765251159668,grad_norm: 0.9999995880309919, iteration: 421687
loss: 1.1152479648590088,grad_norm: 0.8841968814403184, iteration: 421688
loss: 1.0531575679779053,grad_norm: 0.8221676799992238, iteration: 421689
loss: 1.0102118253707886,grad_norm: 0.709477215181766, iteration: 421690
loss: 1.0150779485702515,grad_norm: 0.7923104624759784, iteration: 421691
loss: 1.0544328689575195,grad_norm: 0.7670081346420378, iteration: 421692
loss: 0.9739863276481628,grad_norm: 0.7695186653104475, iteration: 421693
loss: 1.0492885112762451,grad_norm: 0.8062981030216102, iteration: 421694
loss: 1.0310932397842407,grad_norm: 0.999999636383128, iteration: 421695
loss: 1.010538935661316,grad_norm: 0.8564877468966147, iteration: 421696
loss: 1.0658249855041504,grad_norm: 0.9999996410767258, iteration: 421697
loss: 1.0656119585037231,grad_norm: 0.8148454775833645, iteration: 421698
loss: 1.0254292488098145,grad_norm: 0.9199661352381997, iteration: 421699
loss: 1.0126874446868896,grad_norm: 0.8010999240404685, iteration: 421700
loss: 1.0453591346740723,grad_norm: 0.8063081245322138, iteration: 421701
loss: 1.0371006727218628,grad_norm: 0.8161132581773299, iteration: 421702
loss: 1.0057932138442993,grad_norm: 0.9999990952918915, iteration: 421703
loss: 0.9604207873344421,grad_norm: 0.8705463249586001, iteration: 421704
loss: 0.9725783467292786,grad_norm: 0.757275142781455, iteration: 421705
loss: 1.0227413177490234,grad_norm: 0.9999998931673695, iteration: 421706
loss: 1.003825306892395,grad_norm: 0.7723176164187069, iteration: 421707
loss: 0.985503077507019,grad_norm: 0.8288909616194589, iteration: 421708
loss: 1.0025488138198853,grad_norm: 0.7808822553170706, iteration: 421709
loss: 0.970712423324585,grad_norm: 0.7986586668274195, iteration: 421710
loss: 1.0238368511199951,grad_norm: 0.7852267984393339, iteration: 421711
loss: 0.970985472202301,grad_norm: 0.7841525767835491, iteration: 421712
loss: 1.041316032409668,grad_norm: 0.732976390013692, iteration: 421713
loss: 1.005960464477539,grad_norm: 0.6687330398752618, iteration: 421714
loss: 1.0651851892471313,grad_norm: 0.8081822051097327, iteration: 421715
loss: 0.9998918771743774,grad_norm: 0.8442763206563999, iteration: 421716
loss: 1.015520691871643,grad_norm: 0.7513250341178314, iteration: 421717
loss: 0.9991118907928467,grad_norm: 0.8222666438304838, iteration: 421718
loss: 1.0480122566223145,grad_norm: 0.9999995124368279, iteration: 421719
loss: 1.0104727745056152,grad_norm: 0.8672299490583201, iteration: 421720
loss: 1.0465736389160156,grad_norm: 0.9999991625342516, iteration: 421721
loss: 0.988822877407074,grad_norm: 0.6036071721576388, iteration: 421722
loss: 0.9860803484916687,grad_norm: 0.821216764064142, iteration: 421723
loss: 1.0432902574539185,grad_norm: 0.8174436883675362, iteration: 421724
loss: 0.9650124907493591,grad_norm: 0.814546767115292, iteration: 421725
loss: 0.9957141280174255,grad_norm: 0.9586338029052364, iteration: 421726
loss: 1.0219544172286987,grad_norm: 0.8704854276048521, iteration: 421727
loss: 1.0858163833618164,grad_norm: 0.9999994936543468, iteration: 421728
loss: 1.0078247785568237,grad_norm: 0.8665830891681201, iteration: 421729
loss: 1.0121721029281616,grad_norm: 0.999999153245051, iteration: 421730
loss: 1.0271186828613281,grad_norm: 0.8678071332558651, iteration: 421731
loss: 1.0116251707077026,grad_norm: 0.8245006630692715, iteration: 421732
loss: 0.974754810333252,grad_norm: 0.8698786727427661, iteration: 421733
loss: 0.9987625479698181,grad_norm: 0.7899661831224917, iteration: 421734
loss: 1.0070915222167969,grad_norm: 0.9999992872511142, iteration: 421735
loss: 0.9953269958496094,grad_norm: 0.7021483559572278, iteration: 421736
loss: 1.0909719467163086,grad_norm: 0.9168112292652966, iteration: 421737
loss: 0.9787030816078186,grad_norm: 0.8093656707818968, iteration: 421738
loss: 1.0366277694702148,grad_norm: 0.9999998559907185, iteration: 421739
loss: 0.9876612424850464,grad_norm: 0.7396909423765381, iteration: 421740
loss: 1.1045758724212646,grad_norm: 0.8483583750989392, iteration: 421741
loss: 0.9846940040588379,grad_norm: 0.9584004888981601, iteration: 421742
loss: 1.0655845403671265,grad_norm: 0.9999998287579995, iteration: 421743
loss: 1.0200653076171875,grad_norm: 0.9999991986202609, iteration: 421744
loss: 1.0189064741134644,grad_norm: 0.901288425765882, iteration: 421745
loss: 1.0129116773605347,grad_norm: 0.7228198157231864, iteration: 421746
loss: 1.0644619464874268,grad_norm: 0.8392493735428818, iteration: 421747
loss: 1.0792737007141113,grad_norm: 0.9999991599397999, iteration: 421748
loss: 1.0016099214553833,grad_norm: 0.6940974609268781, iteration: 421749
loss: 0.9737501740455627,grad_norm: 0.7832006020635258, iteration: 421750
loss: 1.102002501487732,grad_norm: 0.9999994819341367, iteration: 421751
loss: 1.0820599794387817,grad_norm: 0.9627665281219868, iteration: 421752
loss: 1.0206198692321777,grad_norm: 0.8009014947905653, iteration: 421753
loss: 0.9725245833396912,grad_norm: 0.8094581554461319, iteration: 421754
loss: 0.974138617515564,grad_norm: 0.8543111068585627, iteration: 421755
loss: 0.9849987626075745,grad_norm: 0.8915267461009805, iteration: 421756
loss: 0.9893510341644287,grad_norm: 0.6625241153530369, iteration: 421757
loss: 1.011443853378296,grad_norm: 0.9478659608015476, iteration: 421758
loss: 0.9950637817382812,grad_norm: 0.7963067443520738, iteration: 421759
loss: 1.0004336833953857,grad_norm: 0.6402121102787472, iteration: 421760
loss: 0.9963592290878296,grad_norm: 0.7670554140784639, iteration: 421761
loss: 0.998526930809021,grad_norm: 0.9999996220571291, iteration: 421762
loss: 1.0791380405426025,grad_norm: 0.9999998445120548, iteration: 421763
loss: 1.08281672000885,grad_norm: 0.9999992176971301, iteration: 421764
loss: 1.0050597190856934,grad_norm: 0.9999998919179025, iteration: 421765
loss: 0.9983012080192566,grad_norm: 0.9999991385820546, iteration: 421766
loss: 1.0169975757598877,grad_norm: 0.7669797636010319, iteration: 421767
loss: 1.007665991783142,grad_norm: 0.7067923561319418, iteration: 421768
loss: 1.0670734643936157,grad_norm: 0.9213945226044974, iteration: 421769
loss: 1.034946322441101,grad_norm: 0.9999990193826664, iteration: 421770
loss: 1.0091265439987183,grad_norm: 0.7116351958705899, iteration: 421771
loss: 1.0654406547546387,grad_norm: 0.980527151453068, iteration: 421772
loss: 0.9685927033424377,grad_norm: 0.8989729613173859, iteration: 421773
loss: 0.9664485454559326,grad_norm: 0.9999996519628105, iteration: 421774
loss: 1.0147740840911865,grad_norm: 0.9999995719763902, iteration: 421775
loss: 1.0352267026901245,grad_norm: 0.9999990910239788, iteration: 421776
loss: 0.9954327344894409,grad_norm: 0.8210594075476487, iteration: 421777
loss: 1.202014684677124,grad_norm: 0.9999997720989807, iteration: 421778
loss: 0.9782508015632629,grad_norm: 0.866370970543142, iteration: 421779
loss: 0.9876356720924377,grad_norm: 0.8590094379857076, iteration: 421780
loss: 1.0309159755706787,grad_norm: 0.7894721403925745, iteration: 421781
loss: 1.0159380435943604,grad_norm: 0.7995984215590103, iteration: 421782
loss: 1.0040326118469238,grad_norm: 0.8608458726848545, iteration: 421783
loss: 1.01106595993042,grad_norm: 0.728891362256426, iteration: 421784
loss: 1.027536392211914,grad_norm: 0.7319392341470764, iteration: 421785
loss: 1.0291013717651367,grad_norm: 0.8004040108032288, iteration: 421786
loss: 0.9966089129447937,grad_norm: 0.8630723078674568, iteration: 421787
loss: 1.0325552225112915,grad_norm: 0.9999997833248447, iteration: 421788
loss: 1.0054699182510376,grad_norm: 0.999999136667109, iteration: 421789
loss: 1.0050313472747803,grad_norm: 0.842310381477822, iteration: 421790
loss: 1.065105676651001,grad_norm: 0.999999406975913, iteration: 421791
loss: 0.9905014038085938,grad_norm: 0.9595721819455274, iteration: 421792
loss: 1.061806559562683,grad_norm: 0.7934786099463798, iteration: 421793
loss: 1.1103156805038452,grad_norm: 0.9999991612926028, iteration: 421794
loss: 1.0418596267700195,grad_norm: 0.7948133188533806, iteration: 421795
loss: 1.0213747024536133,grad_norm: 0.7689502957090368, iteration: 421796
loss: 0.9861910939216614,grad_norm: 0.9999993640737525, iteration: 421797
loss: 0.9715327620506287,grad_norm: 0.8404650933759111, iteration: 421798
loss: 1.021826982498169,grad_norm: 0.9999994524246549, iteration: 421799
loss: 0.9947255849838257,grad_norm: 0.7231622667190938, iteration: 421800
loss: 1.047013282775879,grad_norm: 0.9999992836860019, iteration: 421801
loss: 1.0366359949111938,grad_norm: 0.9894047836919194, iteration: 421802
loss: 0.9743790626525879,grad_norm: 0.8505837615067101, iteration: 421803
loss: 1.0184043645858765,grad_norm: 0.733751216717638, iteration: 421804
loss: 1.0710258483886719,grad_norm: 0.9999991758478457, iteration: 421805
loss: 1.038746953010559,grad_norm: 0.90126301052495, iteration: 421806
loss: 1.0788177251815796,grad_norm: 0.7702085919279343, iteration: 421807
loss: 1.0229220390319824,grad_norm: 0.69181172544669, iteration: 421808
loss: 0.9852064847946167,grad_norm: 0.8891499125292553, iteration: 421809
loss: 1.147881269454956,grad_norm: 0.999999880900408, iteration: 421810
loss: 1.0181732177734375,grad_norm: 0.9999992964165789, iteration: 421811
loss: 0.9766319990158081,grad_norm: 0.7685306683199314, iteration: 421812
loss: 1.065338373184204,grad_norm: 0.9999998421139796, iteration: 421813
loss: 0.9746749997138977,grad_norm: 0.7536976473939411, iteration: 421814
loss: 0.9840477108955383,grad_norm: 0.9999995091106825, iteration: 421815
loss: 0.9841428995132446,grad_norm: 0.8293635615910419, iteration: 421816
loss: 1.0299898386001587,grad_norm: 0.8030705173928657, iteration: 421817
loss: 0.9906400442123413,grad_norm: 0.7463705745313555, iteration: 421818
loss: 1.0200421810150146,grad_norm: 0.776923323138249, iteration: 421819
loss: 1.0481054782867432,grad_norm: 0.8616592545606713, iteration: 421820
loss: 1.0413718223571777,grad_norm: 0.9349403427061479, iteration: 421821
loss: 1.140543818473816,grad_norm: 0.940696579600372, iteration: 421822
loss: 0.9797763824462891,grad_norm: 0.6866983004946429, iteration: 421823
loss: 0.9760374426841736,grad_norm: 0.856330795500291, iteration: 421824
loss: 0.9736841917037964,grad_norm: 0.7743883947132858, iteration: 421825
loss: 0.9462975263595581,grad_norm: 0.8530718752501665, iteration: 421826
loss: 0.9857807159423828,grad_norm: 0.8042934571971421, iteration: 421827
loss: 1.0091075897216797,grad_norm: 0.8477781674355155, iteration: 421828
loss: 1.029459834098816,grad_norm: 0.8064151835287806, iteration: 421829
loss: 1.0280653238296509,grad_norm: 0.9238719982139433, iteration: 421830
loss: 0.9874328970909119,grad_norm: 0.8308574724934521, iteration: 421831
loss: 1.015994906425476,grad_norm: 0.8145870419642965, iteration: 421832
loss: 0.968876302242279,grad_norm: 0.7242567280356768, iteration: 421833
loss: 1.0846045017242432,grad_norm: 0.9999993370590484, iteration: 421834
loss: 1.0435336828231812,grad_norm: 0.7912433710571485, iteration: 421835
loss: 1.013626217842102,grad_norm: 0.6751965491020919, iteration: 421836
loss: 1.0363768339157104,grad_norm: 0.999999299265349, iteration: 421837
loss: 1.0304487943649292,grad_norm: 0.997828347711583, iteration: 421838
loss: 1.034039855003357,grad_norm: 0.7951239555178675, iteration: 421839
loss: 0.9667412638664246,grad_norm: 0.7463237821576302, iteration: 421840
loss: 1.0046862363815308,grad_norm: 0.6682600098940223, iteration: 421841
loss: 1.0465348958969116,grad_norm: 0.9999994266683233, iteration: 421842
loss: 1.0039538145065308,grad_norm: 0.7473838420064308, iteration: 421843
loss: 0.9636216163635254,grad_norm: 0.7697379539934968, iteration: 421844
loss: 1.0389872789382935,grad_norm: 0.7830727861470026, iteration: 421845
loss: 1.0078860521316528,grad_norm: 0.8054385635439384, iteration: 421846
loss: 1.0023152828216553,grad_norm: 0.8532511651608458, iteration: 421847
loss: 0.99590003490448,grad_norm: 0.6964631527929741, iteration: 421848
loss: 0.9836595058441162,grad_norm: 0.6599358653696431, iteration: 421849
loss: 0.9909917712211609,grad_norm: 0.9999991129465287, iteration: 421850
loss: 1.0182398557662964,grad_norm: 0.7626996464458718, iteration: 421851
loss: 1.0192675590515137,grad_norm: 0.7665056277331495, iteration: 421852
loss: 0.9781338572502136,grad_norm: 0.9776555526266807, iteration: 421853
loss: 1.054911732673645,grad_norm: 0.999999800575675, iteration: 421854
loss: 0.9827724099159241,grad_norm: 0.8580642340678005, iteration: 421855
loss: 1.020268440246582,grad_norm: 0.9999993411980251, iteration: 421856
loss: 1.0296027660369873,grad_norm: 0.9279241208807536, iteration: 421857
loss: 1.0496959686279297,grad_norm: 0.7526128587424524, iteration: 421858
loss: 0.9820287823677063,grad_norm: 0.7955404638615453, iteration: 421859
loss: 1.0139312744140625,grad_norm: 0.8150550135711344, iteration: 421860
loss: 1.058126449584961,grad_norm: 0.9949403461360024, iteration: 421861
loss: 1.0124667882919312,grad_norm: 0.8676501512710495, iteration: 421862
loss: 0.9756177067756653,grad_norm: 0.7718340566152949, iteration: 421863
loss: 0.9757356643676758,grad_norm: 0.919930129394224, iteration: 421864
loss: 0.9971915483474731,grad_norm: 0.7033593994322396, iteration: 421865
loss: 1.042419195175171,grad_norm: 0.9999994363465894, iteration: 421866
loss: 0.9784753918647766,grad_norm: 0.9999995673001888, iteration: 421867
loss: 0.9931337237358093,grad_norm: 0.725120585971474, iteration: 421868
loss: 1.0374257564544678,grad_norm: 0.8258186971264173, iteration: 421869
loss: 0.9947980642318726,grad_norm: 0.7663223000794268, iteration: 421870
loss: 1.0056027173995972,grad_norm: 0.7606561495142955, iteration: 421871
loss: 0.9511816501617432,grad_norm: 0.7132310220484319, iteration: 421872
loss: 1.037084937095642,grad_norm: 0.8532045560716524, iteration: 421873
loss: 1.0081309080123901,grad_norm: 0.8402662117532129, iteration: 421874
loss: 1.045680284500122,grad_norm: 0.9999999463403784, iteration: 421875
loss: 1.0103157758712769,grad_norm: 0.9339298646328923, iteration: 421876
loss: 1.0856999158859253,grad_norm: 0.6829146151397816, iteration: 421877
loss: 1.0003610849380493,grad_norm: 0.8082147225063762, iteration: 421878
loss: 1.1011245250701904,grad_norm: 0.9999994771453427, iteration: 421879
loss: 1.113470196723938,grad_norm: 0.9999998091024785, iteration: 421880
loss: 0.9766984581947327,grad_norm: 0.7240948955878113, iteration: 421881
loss: 1.0933306217193604,grad_norm: 0.9999992187981784, iteration: 421882
loss: 1.007440447807312,grad_norm: 0.9999992843990023, iteration: 421883
loss: 1.1184501647949219,grad_norm: 0.9999994112293222, iteration: 421884
loss: 0.9841398000717163,grad_norm: 0.8248133692735026, iteration: 421885
loss: 0.9633237719535828,grad_norm: 0.6652020690146568, iteration: 421886
loss: 1.0597403049468994,grad_norm: 0.8352504668620504, iteration: 421887
loss: 1.0248041152954102,grad_norm: 0.7085232725208807, iteration: 421888
loss: 0.9971348643302917,grad_norm: 0.7789585946921689, iteration: 421889
loss: 1.0228415727615356,grad_norm: 0.9999994021855811, iteration: 421890
loss: 1.0859876871109009,grad_norm: 0.9999992559372172, iteration: 421891
loss: 0.980543851852417,grad_norm: 0.9999997714748301, iteration: 421892
loss: 1.0160197019577026,grad_norm: 0.9999993332407584, iteration: 421893
loss: 0.9866737723350525,grad_norm: 0.880485721767493, iteration: 421894
loss: 1.0069783926010132,grad_norm: 0.6243170213958417, iteration: 421895
loss: 0.9974212646484375,grad_norm: 0.9706849378227683, iteration: 421896
loss: 0.9967732429504395,grad_norm: 0.7254454787986215, iteration: 421897
loss: 1.0754519701004028,grad_norm: 0.9999998645442042, iteration: 421898
loss: 0.9747012853622437,grad_norm: 0.9999991156693046, iteration: 421899
loss: 1.0378284454345703,grad_norm: 0.9400543824716243, iteration: 421900
loss: 1.1085190773010254,grad_norm: 0.9999991757480672, iteration: 421901
loss: 0.9840641617774963,grad_norm: 0.7784666875546412, iteration: 421902
loss: 1.026908040046692,grad_norm: 0.9528762549786657, iteration: 421903
loss: 1.0011652708053589,grad_norm: 0.8516959398279321, iteration: 421904
loss: 1.1476635932922363,grad_norm: 0.9999991447437961, iteration: 421905
loss: 0.9716130495071411,grad_norm: 0.6787803909025125, iteration: 421906
loss: 1.0503709316253662,grad_norm: 0.9999994009816092, iteration: 421907
loss: 1.0193276405334473,grad_norm: 0.7543669415991615, iteration: 421908
loss: 1.0027960538864136,grad_norm: 0.655308869638764, iteration: 421909
loss: 1.0090153217315674,grad_norm: 0.9999994899821002, iteration: 421910
loss: 1.0260783433914185,grad_norm: 0.7812707551905884, iteration: 421911
loss: 1.0020866394042969,grad_norm: 0.7679272704922092, iteration: 421912
loss: 0.9955762624740601,grad_norm: 0.9613811915917233, iteration: 421913
loss: 1.0048866271972656,grad_norm: 0.9999996434978026, iteration: 421914
loss: 0.9938925504684448,grad_norm: 0.9999994267439084, iteration: 421915
loss: 1.0002634525299072,grad_norm: 0.8430991143793857, iteration: 421916
loss: 1.0260655879974365,grad_norm: 0.7350646336505129, iteration: 421917
loss: 1.0201528072357178,grad_norm: 0.73394427608427, iteration: 421918
loss: 0.9777428507804871,grad_norm: 0.8347295959437225, iteration: 421919
loss: 0.9936087131500244,grad_norm: 0.7416961441884474, iteration: 421920
loss: 0.9690829515457153,grad_norm: 0.6078291912945893, iteration: 421921
loss: 0.9919461607933044,grad_norm: 0.7638831940940118, iteration: 421922
loss: 0.9934415221214294,grad_norm: 0.9999997754042022, iteration: 421923
loss: 1.0892561674118042,grad_norm: 0.9999997310860436, iteration: 421924
loss: 1.0081604719161987,grad_norm: 0.8271894943698663, iteration: 421925
loss: 1.0095107555389404,grad_norm: 0.8049499900015153, iteration: 421926
loss: 1.0538426637649536,grad_norm: 0.7217254642557202, iteration: 421927
loss: 1.041528344154358,grad_norm: 0.999999964861207, iteration: 421928
loss: 0.9883440732955933,grad_norm: 0.9999993139099379, iteration: 421929
loss: 0.9785778522491455,grad_norm: 0.7567201462661238, iteration: 421930
loss: 0.9948389530181885,grad_norm: 0.9421246339115049, iteration: 421931
loss: 1.0488628149032593,grad_norm: 0.77024970366567, iteration: 421932
loss: 0.9852367639541626,grad_norm: 0.7847685912376083, iteration: 421933
loss: 0.9695566296577454,grad_norm: 0.7161301989154786, iteration: 421934
loss: 0.9654935598373413,grad_norm: 0.9999990867441065, iteration: 421935
loss: 1.1333959102630615,grad_norm: 0.9999999870725378, iteration: 421936
loss: 0.99334317445755,grad_norm: 0.689394259478206, iteration: 421937
loss: 1.0138664245605469,grad_norm: 0.7850920437240435, iteration: 421938
loss: 1.002181053161621,grad_norm: 0.8758971575580572, iteration: 421939
loss: 0.9634475111961365,grad_norm: 0.7174078885708453, iteration: 421940
loss: 1.0124740600585938,grad_norm: 0.7232798729992428, iteration: 421941
loss: 1.0006372928619385,grad_norm: 0.8080411134466156, iteration: 421942
loss: 0.9746294021606445,grad_norm: 0.6737056696173365, iteration: 421943
loss: 0.9752243161201477,grad_norm: 0.774473134218373, iteration: 421944
loss: 1.0136529207229614,grad_norm: 0.9999997232339302, iteration: 421945
loss: 1.037797212600708,grad_norm: 0.8169107606096645, iteration: 421946
loss: 1.02128005027771,grad_norm: 0.9999992404594538, iteration: 421947
loss: 1.1066553592681885,grad_norm: 0.848626817202509, iteration: 421948
loss: 1.2386975288391113,grad_norm: 0.9999997281334653, iteration: 421949
loss: 1.01896333694458,grad_norm: 0.7046450676319148, iteration: 421950
loss: 1.014841914176941,grad_norm: 0.8651296345758903, iteration: 421951
loss: 0.9762970209121704,grad_norm: 0.7273068874355775, iteration: 421952
loss: 0.9743382334709167,grad_norm: 0.7993030304840476, iteration: 421953
loss: 0.9811452031135559,grad_norm: 0.6886786425568889, iteration: 421954
loss: 0.9529290795326233,grad_norm: 0.7473974626292889, iteration: 421955
loss: 1.0012062788009644,grad_norm: 0.952186281981076, iteration: 421956
loss: 1.006121277809143,grad_norm: 0.8178472021799925, iteration: 421957
loss: 0.9917297959327698,grad_norm: 0.7465566909763677, iteration: 421958
loss: 1.0658788681030273,grad_norm: 0.9339994277927874, iteration: 421959
loss: 1.093651294708252,grad_norm: 0.9999993217626156, iteration: 421960
loss: 0.9774932265281677,grad_norm: 0.808554030777016, iteration: 421961
loss: 1.0210381746292114,grad_norm: 0.8784015926865815, iteration: 421962
loss: 1.0318504571914673,grad_norm: 0.9999991958574894, iteration: 421963
loss: 0.9887911081314087,grad_norm: 0.8345815220648719, iteration: 421964
loss: 1.0047760009765625,grad_norm: 0.797130102564497, iteration: 421965
loss: 0.9828086495399475,grad_norm: 0.8447514785938169, iteration: 421966
loss: 0.9962272047996521,grad_norm: 0.7376067628243436, iteration: 421967
loss: 0.9872972965240479,grad_norm: 1.0000000525173043, iteration: 421968
loss: 0.9872049689292908,grad_norm: 0.8470391750669246, iteration: 421969
loss: 0.9584804773330688,grad_norm: 0.7482789061177904, iteration: 421970
loss: 0.9944773316383362,grad_norm: 0.7359109440769704, iteration: 421971
loss: 1.0237852334976196,grad_norm: 0.8425514264955829, iteration: 421972
loss: 1.0175201892852783,grad_norm: 0.9225100188226298, iteration: 421973
loss: 1.0135561227798462,grad_norm: 0.8992207195915374, iteration: 421974
loss: 1.0292038917541504,grad_norm: 0.9999993714461226, iteration: 421975
loss: 0.9957646727561951,grad_norm: 0.8608026116916421, iteration: 421976
loss: 0.97556072473526,grad_norm: 0.9254018524440176, iteration: 421977
loss: 1.0149602890014648,grad_norm: 0.7505419856420681, iteration: 421978
loss: 0.9841797947883606,grad_norm: 0.8178677157265372, iteration: 421979
loss: 0.9716405868530273,grad_norm: 0.9999992051860156, iteration: 421980
loss: 1.0331814289093018,grad_norm: 0.9408201679916247, iteration: 421981
loss: 0.9736872911453247,grad_norm: 0.6928526791232257, iteration: 421982
loss: 1.03919517993927,grad_norm: 0.9999997220153588, iteration: 421983
loss: 1.01862633228302,grad_norm: 0.8039513997552563, iteration: 421984
loss: 0.9813435673713684,grad_norm: 0.789360611546028, iteration: 421985
loss: 1.0125043392181396,grad_norm: 0.9999994309570699, iteration: 421986
loss: 0.9581366777420044,grad_norm: 0.8502638220129496, iteration: 421987
loss: 0.9513658285140991,grad_norm: 0.767108191309285, iteration: 421988
loss: 1.0014933347702026,grad_norm: 0.794093162255553, iteration: 421989
loss: 0.9675690531730652,grad_norm: 0.7142133188616299, iteration: 421990
loss: 1.021960973739624,grad_norm: 0.9209390226756258, iteration: 421991
loss: 1.0423704385757446,grad_norm: 0.9086871694392087, iteration: 421992
loss: 0.9695470333099365,grad_norm: 0.7354275323247076, iteration: 421993
loss: 0.9905432462692261,grad_norm: 0.8076189526598323, iteration: 421994
loss: 1.0293984413146973,grad_norm: 0.8937191635988413, iteration: 421995
loss: 0.9786386489868164,grad_norm: 0.6613993688615335, iteration: 421996
loss: 1.0102401971817017,grad_norm: 0.7844726670669733, iteration: 421997
loss: 1.0110570192337036,grad_norm: 0.74540858241255, iteration: 421998
loss: 1.0150253772735596,grad_norm: 0.7550233070984002, iteration: 421999
loss: 1.0046885013580322,grad_norm: 0.7666861519811908, iteration: 422000
loss: 0.9643149375915527,grad_norm: 0.8194306691649437, iteration: 422001
loss: 1.0082042217254639,grad_norm: 0.7517951111846378, iteration: 422002
loss: 1.0208227634429932,grad_norm: 0.9999991799399387, iteration: 422003
loss: 0.9889468550682068,grad_norm: 0.7996077351658869, iteration: 422004
loss: 1.004492998123169,grad_norm: 0.7310137688443624, iteration: 422005
loss: 0.9933556914329529,grad_norm: 0.797363006007503, iteration: 422006
loss: 0.9978619813919067,grad_norm: 0.7622553153918729, iteration: 422007
loss: 1.0103896856307983,grad_norm: 0.7607939036469583, iteration: 422008
loss: 1.0053117275238037,grad_norm: 0.8279282644334118, iteration: 422009
loss: 0.9807831048965454,grad_norm: 0.7466853248590513, iteration: 422010
loss: 0.9741281867027283,grad_norm: 0.7400769660766876, iteration: 422011
loss: 0.9295110106468201,grad_norm: 0.8129532169717657, iteration: 422012
loss: 0.9969196319580078,grad_norm: 0.9999997737352901, iteration: 422013
loss: 1.0370054244995117,grad_norm: 0.828317308726592, iteration: 422014
loss: 1.0219146013259888,grad_norm: 0.7895972695628067, iteration: 422015
loss: 0.9979457259178162,grad_norm: 0.7331095117567248, iteration: 422016
loss: 0.998801589012146,grad_norm: 0.7629895579540105, iteration: 422017
loss: 0.9984938502311707,grad_norm: 0.8979504467667397, iteration: 422018
loss: 1.0163540840148926,grad_norm: 0.8658011474246532, iteration: 422019
loss: 0.9482067227363586,grad_norm: 0.8989803481584582, iteration: 422020
loss: 1.0109471082687378,grad_norm: 0.8102408175277114, iteration: 422021
loss: 1.0518088340759277,grad_norm: 0.9999997783199029, iteration: 422022
loss: 0.9904724359512329,grad_norm: 0.9233506832582558, iteration: 422023
loss: 1.0080702304840088,grad_norm: 0.6974428696221011, iteration: 422024
loss: 0.9804121255874634,grad_norm: 0.7546265854742187, iteration: 422025
loss: 0.9949471354484558,grad_norm: 0.9999992000300623, iteration: 422026
loss: 1.019736647605896,grad_norm: 0.8529968529904933, iteration: 422027
loss: 1.034360408782959,grad_norm: 0.8137809504804562, iteration: 422028
loss: 1.0272973775863647,grad_norm: 0.7012432783350094, iteration: 422029
loss: 0.9738426804542542,grad_norm: 0.8222336135526244, iteration: 422030
loss: 0.9386541843414307,grad_norm: 0.6872451042467129, iteration: 422031
loss: 0.9911015033721924,grad_norm: 0.8909059167776726, iteration: 422032
loss: 1.0333261489868164,grad_norm: 0.9999992230982582, iteration: 422033
loss: 1.0863707065582275,grad_norm: 0.9999993050180722, iteration: 422034
loss: 0.987193763256073,grad_norm: 0.9999998448046179, iteration: 422035
loss: 1.203732967376709,grad_norm: 0.9999991392773525, iteration: 422036
loss: 1.0887541770935059,grad_norm: 0.9999991730580228, iteration: 422037
loss: 1.0170341730117798,grad_norm: 0.7367896926557924, iteration: 422038
loss: 1.0470744371414185,grad_norm: 0.9999993157448399, iteration: 422039
loss: 0.9892699718475342,grad_norm: 0.999999726275808, iteration: 422040
loss: 1.0373479127883911,grad_norm: 0.9672543317994873, iteration: 422041
loss: 0.9951516389846802,grad_norm: 0.8800365412680237, iteration: 422042
loss: 1.1453258991241455,grad_norm: 0.999999421660861, iteration: 422043
loss: 0.9956299066543579,grad_norm: 0.7584539042999662, iteration: 422044
loss: 1.0584802627563477,grad_norm: 0.8303528256745495, iteration: 422045
loss: 0.9607179164886475,grad_norm: 0.7509754568171625, iteration: 422046
loss: 0.9512273669242859,grad_norm: 0.8240291197787919, iteration: 422047
loss: 1.013350009918213,grad_norm: 0.9325607501311629, iteration: 422048
loss: 0.9919347763061523,grad_norm: 0.8244948106152723, iteration: 422049
loss: 0.9808931350708008,grad_norm: 0.6982073232427006, iteration: 422050
loss: 0.9876245856285095,grad_norm: 0.6796769488273976, iteration: 422051
loss: 1.0802319049835205,grad_norm: 0.9999991579990426, iteration: 422052
loss: 1.0360666513442993,grad_norm: 0.9131955384580923, iteration: 422053
loss: 1.0070464611053467,grad_norm: 0.7779938179248835, iteration: 422054
loss: 1.0160905122756958,grad_norm: 0.9216337753157883, iteration: 422055
loss: 1.0270380973815918,grad_norm: 0.744604559449588, iteration: 422056
loss: 0.9707016348838806,grad_norm: 0.8897130233841195, iteration: 422057
loss: 0.9804955124855042,grad_norm: 0.8631403970108491, iteration: 422058
loss: 1.0510971546173096,grad_norm: 0.7197247659093146, iteration: 422059
loss: 0.9883056282997131,grad_norm: 0.8345325562889611, iteration: 422060
loss: 0.9940887689590454,grad_norm: 0.999999383867453, iteration: 422061
loss: 1.0756912231445312,grad_norm: 0.9999995266402409, iteration: 422062
loss: 1.0066540241241455,grad_norm: 0.6927644018157039, iteration: 422063
loss: 1.0321905612945557,grad_norm: 0.8315670783545481, iteration: 422064
loss: 1.0069835186004639,grad_norm: 0.7291623281363236, iteration: 422065
loss: 1.001198410987854,grad_norm: 0.9071178445392738, iteration: 422066
loss: 1.0084683895111084,grad_norm: 0.9710010344981593, iteration: 422067
loss: 1.0409518480300903,grad_norm: 0.8933842160396809, iteration: 422068
loss: 0.9835805296897888,grad_norm: 0.7560564680334756, iteration: 422069
loss: 1.0107790231704712,grad_norm: 0.70552135091257, iteration: 422070
loss: 1.0374431610107422,grad_norm: 0.7795697202579437, iteration: 422071
loss: 0.9950461983680725,grad_norm: 0.7362563560192209, iteration: 422072
loss: 1.0059834718704224,grad_norm: 0.7985525119224117, iteration: 422073
loss: 0.9741583466529846,grad_norm: 0.7317720490778892, iteration: 422074
loss: 1.0375308990478516,grad_norm: 0.7827928931488833, iteration: 422075
loss: 1.0025198459625244,grad_norm: 0.8986040420938783, iteration: 422076
loss: 0.9956294298171997,grad_norm: 0.7494308651280545, iteration: 422077
loss: 0.9995971322059631,grad_norm: 0.692237744017841, iteration: 422078
loss: 0.9985833168029785,grad_norm: 0.7338067609743308, iteration: 422079
loss: 0.9855870008468628,grad_norm: 0.9031679279890631, iteration: 422080
loss: 1.0740172863006592,grad_norm: 0.9999997028631787, iteration: 422081
loss: 0.9701594710350037,grad_norm: 0.9999989783465708, iteration: 422082
loss: 1.0450785160064697,grad_norm: 0.9999997600523645, iteration: 422083
loss: 0.9836851954460144,grad_norm: 0.8361694982830925, iteration: 422084
loss: 1.006090760231018,grad_norm: 0.6093066054537554, iteration: 422085
loss: 0.9783444404602051,grad_norm: 0.788857150992658, iteration: 422086
loss: 0.9876640439033508,grad_norm: 0.7212526678360515, iteration: 422087
loss: 1.0004699230194092,grad_norm: 0.8156278950446244, iteration: 422088
loss: 1.0226831436157227,grad_norm: 0.8398051266334049, iteration: 422089
loss: 0.9979109764099121,grad_norm: 0.7401927410446414, iteration: 422090
loss: 1.0032827854156494,grad_norm: 0.733322853562272, iteration: 422091
loss: 0.9776225686073303,grad_norm: 0.8820465708608739, iteration: 422092
loss: 0.9874457716941833,grad_norm: 0.639660010736749, iteration: 422093
loss: 0.9630601406097412,grad_norm: 0.7165301049231066, iteration: 422094
loss: 1.045111894607544,grad_norm: 0.8542937159244162, iteration: 422095
loss: 1.0833786725997925,grad_norm: 0.9999994172511874, iteration: 422096
loss: 1.0112370252609253,grad_norm: 0.7185052007888226, iteration: 422097
loss: 1.0332322120666504,grad_norm: 0.803128095081568, iteration: 422098
loss: 1.1458882093429565,grad_norm: 0.9999999638005945, iteration: 422099
loss: 1.0203062295913696,grad_norm: 0.9999992110496844, iteration: 422100
loss: 0.9590088725090027,grad_norm: 0.9999989670123062, iteration: 422101
loss: 0.9705499410629272,grad_norm: 0.8345508272281006, iteration: 422102
loss: 1.0930405855178833,grad_norm: 0.8971349112731837, iteration: 422103
loss: 0.9807219505310059,grad_norm: 0.9999993300114238, iteration: 422104
loss: 1.0003477334976196,grad_norm: 0.8965109822166931, iteration: 422105
loss: 0.98546302318573,grad_norm: 0.706811077730903, iteration: 422106
loss: 0.9806041717529297,grad_norm: 0.8359564817098866, iteration: 422107
loss: 0.998382568359375,grad_norm: 0.9099428890110077, iteration: 422108
loss: 1.0645722150802612,grad_norm: 0.9999998361564003, iteration: 422109
loss: 1.0206365585327148,grad_norm: 0.9999992124720218, iteration: 422110
loss: 0.9765082001686096,grad_norm: 0.6768384289972722, iteration: 422111
loss: 1.015701413154602,grad_norm: 0.87625155483673, iteration: 422112
loss: 1.013832688331604,grad_norm: 0.7203750097901495, iteration: 422113
loss: 0.9902172088623047,grad_norm: 0.9060098141594753, iteration: 422114
loss: 0.9757047891616821,grad_norm: 0.9999990482964892, iteration: 422115
loss: 0.9970314502716064,grad_norm: 0.81443127300142, iteration: 422116
loss: 1.0592706203460693,grad_norm: 0.9999991231407773, iteration: 422117
loss: 0.9746535420417786,grad_norm: 0.8320532542664806, iteration: 422118
loss: 0.9878998398780823,grad_norm: 0.7543664382434477, iteration: 422119
loss: 0.9583050012588501,grad_norm: 0.8421681103397888, iteration: 422120
loss: 1.0197675228118896,grad_norm: 0.7594685676995462, iteration: 422121
loss: 1.0247164964675903,grad_norm: 0.8814675154602709, iteration: 422122
loss: 0.9929052591323853,grad_norm: 0.7515255250232723, iteration: 422123
loss: 0.9808979034423828,grad_norm: 0.9999991591288856, iteration: 422124
loss: 0.9837291240692139,grad_norm: 0.8000917211944549, iteration: 422125
loss: 0.968745768070221,grad_norm: 0.9116937030793362, iteration: 422126
loss: 1.0429607629776,grad_norm: 0.9999999017122084, iteration: 422127
loss: 1.0817599296569824,grad_norm: 0.9413395261617528, iteration: 422128
loss: 0.9838200807571411,grad_norm: 0.739868862479238, iteration: 422129
loss: 1.0002069473266602,grad_norm: 0.9784338675062879, iteration: 422130
loss: 1.076208472251892,grad_norm: 0.9999999395881911, iteration: 422131
loss: 1.0314937829971313,grad_norm: 0.9999994540267197, iteration: 422132
loss: 0.9838855862617493,grad_norm: 0.7287497256380405, iteration: 422133
loss: 1.0616145133972168,grad_norm: 0.9999994224807417, iteration: 422134
loss: 1.1389670372009277,grad_norm: 0.999999611499428, iteration: 422135
loss: 1.1595934629440308,grad_norm: 0.9999991669670512, iteration: 422136
loss: 0.9852635860443115,grad_norm: 0.7890293531909831, iteration: 422137
loss: 0.9733671545982361,grad_norm: 0.668017987539366, iteration: 422138
loss: 1.1183977127075195,grad_norm: 0.9999992329618871, iteration: 422139
loss: 0.9976821541786194,grad_norm: 0.6786529133358559, iteration: 422140
loss: 1.011008381843567,grad_norm: 0.8586185252628263, iteration: 422141
loss: 1.0261327028274536,grad_norm: 0.7027761891007228, iteration: 422142
loss: 1.0117422342300415,grad_norm: 0.8276338812677886, iteration: 422143
loss: 1.0456393957138062,grad_norm: 0.8018155487324012, iteration: 422144
loss: 1.0441354513168335,grad_norm: 0.9777288182486984, iteration: 422145
loss: 1.0014334917068481,grad_norm: 0.8433855383237886, iteration: 422146
loss: 1.0699528455734253,grad_norm: 0.8425132730796706, iteration: 422147
loss: 1.0188714265823364,grad_norm: 0.9487612472152889, iteration: 422148
loss: 1.0000946521759033,grad_norm: 0.9999991711066959, iteration: 422149
loss: 1.0075052976608276,grad_norm: 0.9326582338204261, iteration: 422150
loss: 1.0042288303375244,grad_norm: 0.8464388351164694, iteration: 422151
loss: 0.9851104021072388,grad_norm: 0.6580402676053474, iteration: 422152
loss: 0.9523393511772156,grad_norm: 0.9181455120331043, iteration: 422153
loss: 1.0265988111495972,grad_norm: 0.9999998688503579, iteration: 422154
loss: 1.0719103813171387,grad_norm: 0.8893270260515816, iteration: 422155
loss: 0.9998289942741394,grad_norm: 0.7963187379358316, iteration: 422156
loss: 0.9965758323669434,grad_norm: 0.8292883387140888, iteration: 422157
loss: 1.0388737916946411,grad_norm: 0.7981791595594174, iteration: 422158
loss: 1.0153661966323853,grad_norm: 0.8900932141852762, iteration: 422159
loss: 1.009566068649292,grad_norm: 0.7307030618251125, iteration: 422160
loss: 0.9514060020446777,grad_norm: 0.8303517331193063, iteration: 422161
loss: 1.0002367496490479,grad_norm: 0.6783935568983309, iteration: 422162
loss: 1.0049668550491333,grad_norm: 0.780177246187189, iteration: 422163
loss: 1.003594994544983,grad_norm: 0.7088461968039663, iteration: 422164
loss: 0.9981387257575989,grad_norm: 0.9999999630763359, iteration: 422165
loss: 0.981773853302002,grad_norm: 0.9527271859065973, iteration: 422166
loss: 1.0065946578979492,grad_norm: 0.8097930798189344, iteration: 422167
loss: 0.9856287240982056,grad_norm: 0.695498740598293, iteration: 422168
loss: 1.0001486539840698,grad_norm: 0.8140070569046975, iteration: 422169
loss: 1.011885643005371,grad_norm: 0.76874926526631, iteration: 422170
loss: 1.0428787469863892,grad_norm: 0.999999439371209, iteration: 422171
loss: 1.0212719440460205,grad_norm: 0.7368670097451887, iteration: 422172
loss: 1.0156142711639404,grad_norm: 0.6960897193772368, iteration: 422173
loss: 0.9962254762649536,grad_norm: 0.7557876581682369, iteration: 422174
loss: 0.9853498339653015,grad_norm: 0.6813520248815015, iteration: 422175
loss: 1.0058391094207764,grad_norm: 0.9999999203628464, iteration: 422176
loss: 0.9477464556694031,grad_norm: 0.7579671285331432, iteration: 422177
loss: 0.9945219159126282,grad_norm: 0.8361915963506802, iteration: 422178
loss: 1.0284192562103271,grad_norm: 0.8024591621361193, iteration: 422179
loss: 1.0182081460952759,grad_norm: 0.9999993641248625, iteration: 422180
loss: 1.0687987804412842,grad_norm: 0.9318250137788416, iteration: 422181
loss: 0.9692320823669434,grad_norm: 0.6657540773753471, iteration: 422182
loss: 1.080514907836914,grad_norm: 0.9746642036786848, iteration: 422183
loss: 1.0392838716506958,grad_norm: 0.8835524917792804, iteration: 422184
loss: 1.0147984027862549,grad_norm: 0.8426399069442589, iteration: 422185
loss: 0.990935206413269,grad_norm: 0.8502349557430452, iteration: 422186
loss: 0.9694504141807556,grad_norm: 0.834932890097327, iteration: 422187
loss: 1.0128318071365356,grad_norm: 0.9105576745286196, iteration: 422188
loss: 0.9960466027259827,grad_norm: 0.8798611067655918, iteration: 422189
loss: 0.9870863556861877,grad_norm: 0.7343377858445905, iteration: 422190
loss: 1.0309593677520752,grad_norm: 0.748523293113341, iteration: 422191
loss: 1.0143487453460693,grad_norm: 0.8067834260937202, iteration: 422192
loss: 1.0001310110092163,grad_norm: 0.9999995630871227, iteration: 422193
loss: 0.98476243019104,grad_norm: 0.9155026640134316, iteration: 422194
loss: 1.0178250074386597,grad_norm: 0.8544794524365757, iteration: 422195
loss: 1.019081473350525,grad_norm: 0.9999991005747063, iteration: 422196
loss: 0.9785299301147461,grad_norm: 0.7757769974771812, iteration: 422197
loss: 1.0112128257751465,grad_norm: 0.811836617893227, iteration: 422198
loss: 1.0064997673034668,grad_norm: 0.7700228462088725, iteration: 422199
loss: 1.0247390270233154,grad_norm: 0.9999996848295931, iteration: 422200
loss: 0.9737306833267212,grad_norm: 0.9067636822443977, iteration: 422201
loss: 1.0172042846679688,grad_norm: 0.8223302571134914, iteration: 422202
loss: 1.0298230648040771,grad_norm: 0.8030829981627035, iteration: 422203
loss: 1.0191583633422852,grad_norm: 0.8025536758281493, iteration: 422204
loss: 1.0263670682907104,grad_norm: 0.9547373518040821, iteration: 422205
loss: 0.974739134311676,grad_norm: 0.7790307880740729, iteration: 422206
loss: 1.0009150505065918,grad_norm: 0.6662630994923324, iteration: 422207
loss: 0.9924612045288086,grad_norm: 0.7930424151406742, iteration: 422208
loss: 1.030658483505249,grad_norm: 0.7446077707569074, iteration: 422209
loss: 1.0553373098373413,grad_norm: 0.6855664351279845, iteration: 422210
loss: 1.0192933082580566,grad_norm: 0.7994317252821697, iteration: 422211
loss: 0.9809008240699768,grad_norm: 0.8880213230225185, iteration: 422212
loss: 1.0217198133468628,grad_norm: 0.9999992941328161, iteration: 422213
loss: 0.9840449690818787,grad_norm: 0.9999996792932231, iteration: 422214
loss: 0.9820095896720886,grad_norm: 0.9897907980466889, iteration: 422215
loss: 0.9844471216201782,grad_norm: 0.8490540176441148, iteration: 422216
loss: 0.9878467917442322,grad_norm: 0.9999990567297597, iteration: 422217
loss: 1.0211931467056274,grad_norm: 0.6583248548618976, iteration: 422218
loss: 1.054646372795105,grad_norm: 0.9999991431608249, iteration: 422219
loss: 1.023847222328186,grad_norm: 0.7865445841851522, iteration: 422220
loss: 1.004433035850525,grad_norm: 0.7440174285054554, iteration: 422221
loss: 1.03444504737854,grad_norm: 0.7924698691491396, iteration: 422222
loss: 1.017359972000122,grad_norm: 0.7883556752079409, iteration: 422223
loss: 1.0919278860092163,grad_norm: 0.9999996477827665, iteration: 422224
loss: 0.9638249278068542,grad_norm: 0.7128926784978841, iteration: 422225
loss: 0.9985408782958984,grad_norm: 0.843995671884993, iteration: 422226
loss: 0.9973907470703125,grad_norm: 0.9697963829362012, iteration: 422227
loss: 1.0974539518356323,grad_norm: 0.9029680688847146, iteration: 422228
loss: 0.9700796604156494,grad_norm: 0.7793717386880334, iteration: 422229
loss: 0.9930115342140198,grad_norm: 0.7383332778291264, iteration: 422230
loss: 1.015080451965332,grad_norm: 0.8536111430568181, iteration: 422231
loss: 1.0081926584243774,grad_norm: 0.999999564578202, iteration: 422232
loss: 1.0113102197647095,grad_norm: 0.7730714491117225, iteration: 422233
loss: 1.0136256217956543,grad_norm: 0.978428313968055, iteration: 422234
loss: 1.0158120393753052,grad_norm: 0.6184812461629118, iteration: 422235
loss: 1.0056970119476318,grad_norm: 0.6654268320676727, iteration: 422236
loss: 1.0320515632629395,grad_norm: 0.7341975954166738, iteration: 422237
loss: 1.0390937328338623,grad_norm: 0.9999991598392922, iteration: 422238
loss: 1.0242383480072021,grad_norm: 0.999999085655395, iteration: 422239
loss: 1.0318903923034668,grad_norm: 0.7109820554010318, iteration: 422240
loss: 1.0194048881530762,grad_norm: 0.7572245272700229, iteration: 422241
loss: 0.9862369894981384,grad_norm: 0.7652020838667943, iteration: 422242
loss: 1.0369738340377808,grad_norm: 0.9261969286941083, iteration: 422243
loss: 1.041477084159851,grad_norm: 0.9999997323892158, iteration: 422244
loss: 0.9985767006874084,grad_norm: 0.8742861504480185, iteration: 422245
loss: 1.0441083908081055,grad_norm: 0.9999992802563569, iteration: 422246
loss: 0.9707992076873779,grad_norm: 0.7430388570905577, iteration: 422247
loss: 0.9727602601051331,grad_norm: 0.81465311468469, iteration: 422248
loss: 0.9955489635467529,grad_norm: 0.8457103714520505, iteration: 422249
loss: 1.015967845916748,grad_norm: 0.8010818268948453, iteration: 422250
loss: 0.9963199496269226,grad_norm: 0.9999998775029408, iteration: 422251
loss: 1.003629446029663,grad_norm: 0.7709032348649778, iteration: 422252
loss: 1.015689492225647,grad_norm: 0.7986888802062788, iteration: 422253
loss: 0.984427273273468,grad_norm: 0.9999989662479921, iteration: 422254
loss: 0.990875780582428,grad_norm: 0.7222279711771504, iteration: 422255
loss: 1.0392998456954956,grad_norm: 0.7660836923253311, iteration: 422256
loss: 0.9927005171775818,grad_norm: 0.7220490413379873, iteration: 422257
loss: 1.0835366249084473,grad_norm: 0.9566305788793521, iteration: 422258
loss: 1.064743995666504,grad_norm: 0.9999996711089392, iteration: 422259
loss: 1.0678504705429077,grad_norm: 0.8299218251651821, iteration: 422260
loss: 1.0089422464370728,grad_norm: 0.8784865786425964, iteration: 422261
loss: 1.0054070949554443,grad_norm: 0.7198006234403075, iteration: 422262
loss: 1.0438592433929443,grad_norm: 0.7885089560859994, iteration: 422263
loss: 0.9811196327209473,grad_norm: 0.9999999614982191, iteration: 422264
loss: 0.9998265504837036,grad_norm: 0.884838988496575, iteration: 422265
loss: 0.9897232055664062,grad_norm: 0.8028342720930257, iteration: 422266
loss: 0.999028205871582,grad_norm: 0.6838065652286929, iteration: 422267
loss: 1.0199769735336304,grad_norm: 0.999999529251186, iteration: 422268
loss: 1.0081855058670044,grad_norm: 0.9999996043732156, iteration: 422269
loss: 1.0103960037231445,grad_norm: 0.8899148160671815, iteration: 422270
loss: 0.9784332513809204,grad_norm: 0.7298479464993823, iteration: 422271
loss: 0.9895135164260864,grad_norm: 0.9121699656266428, iteration: 422272
loss: 0.9613144397735596,grad_norm: 0.9248226467081888, iteration: 422273
loss: 1.0347477197647095,grad_norm: 0.7162974785956845, iteration: 422274
loss: 1.0154296159744263,grad_norm: 0.805065262549428, iteration: 422275
loss: 0.991594135761261,grad_norm: 0.7214748048712641, iteration: 422276
loss: 1.0362129211425781,grad_norm: 0.9999995647796639, iteration: 422277
loss: 1.0091450214385986,grad_norm: 0.9683842316975738, iteration: 422278
loss: 0.9701179265975952,grad_norm: 0.9399703814796988, iteration: 422279
loss: 1.032935619354248,grad_norm: 0.8533964989448972, iteration: 422280
loss: 0.9649177193641663,grad_norm: 0.8432408310197659, iteration: 422281
loss: 1.0543302297592163,grad_norm: 0.9999992775735264, iteration: 422282
loss: 1.037744402885437,grad_norm: 0.999999147043808, iteration: 422283
loss: 1.0245672464370728,grad_norm: 0.9999994294456894, iteration: 422284
loss: 1.010481595993042,grad_norm: 0.6954030960321618, iteration: 422285
loss: 1.1167734861373901,grad_norm: 0.999999224816857, iteration: 422286
loss: 1.025400996208191,grad_norm: 0.7488946434538859, iteration: 422287
loss: 1.0609575510025024,grad_norm: 0.7745479587239051, iteration: 422288
loss: 1.017575979232788,grad_norm: 0.840861604291658, iteration: 422289
loss: 1.0308996438980103,grad_norm: 0.862251951624424, iteration: 422290
loss: 1.0004749298095703,grad_norm: 0.7504549755611349, iteration: 422291
loss: 0.9863733649253845,grad_norm: 0.7988603987991694, iteration: 422292
loss: 1.0051552057266235,grad_norm: 0.6754123279166119, iteration: 422293
loss: 1.1096553802490234,grad_norm: 0.9999997769798638, iteration: 422294
loss: 0.9961246848106384,grad_norm: 0.9999998182161107, iteration: 422295
loss: 0.9906526207923889,grad_norm: 0.8424844364577961, iteration: 422296
loss: 0.9987460970878601,grad_norm: 0.870376756904222, iteration: 422297
loss: 1.062269926071167,grad_norm: 0.8695978009634987, iteration: 422298
loss: 1.033206582069397,grad_norm: 0.8581426475469929, iteration: 422299
loss: 1.0192971229553223,grad_norm: 0.7342058088973148, iteration: 422300
loss: 1.1069629192352295,grad_norm: 0.9999996592529109, iteration: 422301
loss: 1.0031696557998657,grad_norm: 0.9331805800904281, iteration: 422302
loss: 0.9739645719528198,grad_norm: 0.6353251144788489, iteration: 422303
loss: 1.0137900114059448,grad_norm: 0.9999993340722458, iteration: 422304
loss: 1.0040298700332642,grad_norm: 0.8660573860626143, iteration: 422305
loss: 0.9827566146850586,grad_norm: 0.7629373325837584, iteration: 422306
loss: 1.0707857608795166,grad_norm: 0.9999997079007336, iteration: 422307
loss: 0.9553884863853455,grad_norm: 0.775847188020982, iteration: 422308
loss: 0.926345944404602,grad_norm: 0.8354223360492529, iteration: 422309
loss: 1.2486110925674438,grad_norm: 0.9999998172452347, iteration: 422310
loss: 1.0073857307434082,grad_norm: 0.7576611968738942, iteration: 422311
loss: 0.9581043124198914,grad_norm: 0.8666677893021242, iteration: 422312
loss: 1.0463932752609253,grad_norm: 0.999999758489547, iteration: 422313
loss: 1.0557713508605957,grad_norm: 0.999999210521067, iteration: 422314
loss: 0.9938739538192749,grad_norm: 0.9999998089960235, iteration: 422315
loss: 0.9875380992889404,grad_norm: 0.9425738808101731, iteration: 422316
loss: 0.9757788777351379,grad_norm: 0.7892922706192933, iteration: 422317
loss: 1.03273606300354,grad_norm: 0.747267819242138, iteration: 422318
loss: 1.0003629922866821,grad_norm: 0.7981837321280999, iteration: 422319
loss: 1.0505945682525635,grad_norm: 0.9999995418727061, iteration: 422320
loss: 0.9937005043029785,grad_norm: 0.857590991942333, iteration: 422321
loss: 1.0079505443572998,grad_norm: 0.9999996330802458, iteration: 422322
loss: 1.0537363290786743,grad_norm: 0.7473249602195917, iteration: 422323
loss: 1.1097960472106934,grad_norm: 0.9999996793814329, iteration: 422324
loss: 1.0811585187911987,grad_norm: 0.901742818889514, iteration: 422325
loss: 0.9840909838676453,grad_norm: 0.9999990715788781, iteration: 422326
loss: 0.9745548963546753,grad_norm: 0.9911595521071496, iteration: 422327
loss: 0.9887029528617859,grad_norm: 0.999999552315647, iteration: 422328
loss: 1.0163785219192505,grad_norm: 0.9358581157441995, iteration: 422329
loss: 0.9893407821655273,grad_norm: 0.7915680182693331, iteration: 422330
loss: 1.2442986965179443,grad_norm: 0.9999992294301312, iteration: 422331
loss: 1.0166380405426025,grad_norm: 0.6986686287298051, iteration: 422332
loss: 1.0520141124725342,grad_norm: 0.844092404214282, iteration: 422333
loss: 1.0364993810653687,grad_norm: 0.9999993743646735, iteration: 422334
loss: 0.9927417635917664,grad_norm: 0.8759620305545204, iteration: 422335
loss: 1.149425745010376,grad_norm: 0.9999999061179792, iteration: 422336
loss: 1.043692946434021,grad_norm: 0.9999998476709289, iteration: 422337
loss: 1.028308391571045,grad_norm: 0.781484929483593, iteration: 422338
loss: 1.0627427101135254,grad_norm: 1.00000002373693, iteration: 422339
loss: 1.1017342805862427,grad_norm: 0.9999992590663418, iteration: 422340
loss: 1.0774363279342651,grad_norm: 0.9999996842314082, iteration: 422341
loss: 1.0042999982833862,grad_norm: 0.7410112549100322, iteration: 422342
loss: 0.9934666156768799,grad_norm: 0.9999993801971138, iteration: 422343
loss: 1.0715062618255615,grad_norm: 0.9999993269740499, iteration: 422344
loss: 1.0162650346755981,grad_norm: 0.9950625879640185, iteration: 422345
loss: 1.0062174797058105,grad_norm: 0.9999996102885896, iteration: 422346
loss: 1.0008231401443481,grad_norm: 0.6473338512669728, iteration: 422347
loss: 0.9706308841705322,grad_norm: 0.8400837034678461, iteration: 422348
loss: 0.9933956265449524,grad_norm: 0.8640454280669206, iteration: 422349
loss: 0.932129979133606,grad_norm: 0.9999991604622169, iteration: 422350
loss: 1.0310852527618408,grad_norm: 0.8305949220191973, iteration: 422351
loss: 1.0640596151351929,grad_norm: 0.9999993321002418, iteration: 422352
loss: 1.0667392015457153,grad_norm: 0.9999990443958033, iteration: 422353
loss: 0.9703328013420105,grad_norm: 0.6696018502305339, iteration: 422354
loss: 1.0102559328079224,grad_norm: 0.7005212903535796, iteration: 422355
loss: 0.9864431023597717,grad_norm: 0.9296447946563747, iteration: 422356
loss: 0.9849827289581299,grad_norm: 0.7975310412139995, iteration: 422357
loss: 1.01886785030365,grad_norm: 0.9999992743024558, iteration: 422358
loss: 1.0032168626785278,grad_norm: 0.7653077820189483, iteration: 422359
loss: 1.0237349271774292,grad_norm: 0.9999998275159532, iteration: 422360
loss: 0.9991589784622192,grad_norm: 0.8931452308814635, iteration: 422361
loss: 1.0233163833618164,grad_norm: 0.8200358204599283, iteration: 422362
loss: 0.9909068942070007,grad_norm: 0.8001442800604042, iteration: 422363
loss: 0.9811354875564575,grad_norm: 0.722783321039324, iteration: 422364
loss: 0.985877513885498,grad_norm: 0.7495478326691022, iteration: 422365
loss: 1.01952064037323,grad_norm: 0.9772878913713993, iteration: 422366
loss: 1.00410795211792,grad_norm: 0.8133839337543205, iteration: 422367
loss: 1.0691618919372559,grad_norm: 0.9100703452415778, iteration: 422368
loss: 0.9879427552223206,grad_norm: 0.5773295813542133, iteration: 422369
loss: 1.0417901277542114,grad_norm: 0.9585807604910115, iteration: 422370
loss: 0.9947410225868225,grad_norm: 0.9399406853935657, iteration: 422371
loss: 1.0002151727676392,grad_norm: 0.7981331090492789, iteration: 422372
loss: 0.9882985353469849,grad_norm: 0.7531917643285144, iteration: 422373
loss: 1.020601511001587,grad_norm: 0.8085477649261441, iteration: 422374
loss: 1.0230920314788818,grad_norm: 0.7739063202052632, iteration: 422375
loss: 1.0371971130371094,grad_norm: 0.9999996610532415, iteration: 422376
loss: 0.9919358491897583,grad_norm: 0.9999991120716692, iteration: 422377
loss: 0.9864335656166077,grad_norm: 0.7411815224451588, iteration: 422378
loss: 0.9582189321517944,grad_norm: 0.8873429540180469, iteration: 422379
loss: 1.0541762113571167,grad_norm: 0.8501346374791318, iteration: 422380
loss: 1.0150842666625977,grad_norm: 0.9999996150645661, iteration: 422381
loss: 0.9494507908821106,grad_norm: 0.8071851387682012, iteration: 422382
loss: 1.046103835105896,grad_norm: 0.9999994079928743, iteration: 422383
loss: 1.0466992855072021,grad_norm: 0.9999992988770076, iteration: 422384
loss: 1.0075644254684448,grad_norm: 0.7689909356245146, iteration: 422385
loss: 1.0579750537872314,grad_norm: 0.7724132356708191, iteration: 422386
loss: 1.053313136100769,grad_norm: 0.9952379684420156, iteration: 422387
loss: 1.0312700271606445,grad_norm: 0.8090350624860834, iteration: 422388
loss: 1.0144928693771362,grad_norm: 0.9999991055586843, iteration: 422389
loss: 0.9816232323646545,grad_norm: 0.7393753364475667, iteration: 422390
loss: 1.000846266746521,grad_norm: 0.9999997327716569, iteration: 422391
loss: 1.0365630388259888,grad_norm: 0.9974900534420771, iteration: 422392
loss: 1.0057469606399536,grad_norm: 0.7261465939191201, iteration: 422393
loss: 1.1424506902694702,grad_norm: 0.9999992605654071, iteration: 422394
loss: 1.0022951364517212,grad_norm: 0.9550865879282169, iteration: 422395
loss: 1.1599384546279907,grad_norm: 0.9999994705303937, iteration: 422396
loss: 1.0047999620437622,grad_norm: 0.7754321691913167, iteration: 422397
loss: 1.0007374286651611,grad_norm: 0.855510797453144, iteration: 422398
loss: 1.0456148386001587,grad_norm: 0.9647274785051704, iteration: 422399
loss: 1.038069486618042,grad_norm: 0.8617083869803918, iteration: 422400
loss: 1.0086339712142944,grad_norm: 0.856556808863343, iteration: 422401
loss: 0.998343288898468,grad_norm: 0.9999991569141474, iteration: 422402
loss: 0.9904415607452393,grad_norm: 0.8373744731486378, iteration: 422403
loss: 1.0040960311889648,grad_norm: 0.8392412003028601, iteration: 422404
loss: 1.036562204360962,grad_norm: 0.8740765290831493, iteration: 422405
loss: 0.9937278032302856,grad_norm: 0.7354392382457807, iteration: 422406
loss: 0.9619935154914856,grad_norm: 0.7821397229324777, iteration: 422407
loss: 1.0464283227920532,grad_norm: 0.748363424802737, iteration: 422408
loss: 1.0125489234924316,grad_norm: 0.6551732252707989, iteration: 422409
loss: 1.047861099243164,grad_norm: 0.7878864613799335, iteration: 422410
loss: 0.9953145384788513,grad_norm: 0.9040191104874218, iteration: 422411
loss: 1.036767840385437,grad_norm: 0.8643200010396219, iteration: 422412
loss: 1.2273975610733032,grad_norm: 0.9999995297028783, iteration: 422413
loss: 0.956428050994873,grad_norm: 0.8389927164263553, iteration: 422414
loss: 0.9786549210548401,grad_norm: 0.6767761478576516, iteration: 422415
loss: 1.0180349349975586,grad_norm: 0.9999999451003782, iteration: 422416
loss: 1.1741501092910767,grad_norm: 0.8936381715019371, iteration: 422417
loss: 1.023162841796875,grad_norm: 0.9442645599557313, iteration: 422418
loss: 1.0412273406982422,grad_norm: 0.9999999587512797, iteration: 422419
loss: 1.0157270431518555,grad_norm: 0.9967005545936677, iteration: 422420
loss: 1.024144172668457,grad_norm: 0.9999998633040056, iteration: 422421
loss: 0.9954310655593872,grad_norm: 0.7525604428038377, iteration: 422422
loss: 1.037704348564148,grad_norm: 0.9999990223523855, iteration: 422423
loss: 0.9762662649154663,grad_norm: 0.8326390883203675, iteration: 422424
loss: 0.9762941002845764,grad_norm: 0.9999997634464924, iteration: 422425
loss: 1.013541340827942,grad_norm: 0.8590151525091444, iteration: 422426
loss: 1.1194766759872437,grad_norm: 0.9999990714881206, iteration: 422427
loss: 1.014851450920105,grad_norm: 0.770561911837415, iteration: 422428
loss: 1.029282569885254,grad_norm: 0.9366508850465041, iteration: 422429
loss: 1.0024023056030273,grad_norm: 0.7655706705025898, iteration: 422430
loss: 0.9811046123504639,grad_norm: 0.7915023984345337, iteration: 422431
loss: 1.1361533403396606,grad_norm: 0.9999997728006156, iteration: 422432
loss: 0.9895185828208923,grad_norm: 0.8102116744954927, iteration: 422433
loss: 1.1441494226455688,grad_norm: 0.9999996129868723, iteration: 422434
loss: 1.0084768533706665,grad_norm: 0.7840749944435392, iteration: 422435
loss: 1.061355710029602,grad_norm: 0.9999997958050447, iteration: 422436
loss: 0.9984419941902161,grad_norm: 0.901907115485698, iteration: 422437
loss: 1.0249536037445068,grad_norm: 0.9270133379058747, iteration: 422438
loss: 0.9954763054847717,grad_norm: 0.950784082966931, iteration: 422439
loss: 1.022997260093689,grad_norm: 0.7684502945661831, iteration: 422440
loss: 1.053214192390442,grad_norm: 0.8551382609280794, iteration: 422441
loss: 0.9815425872802734,grad_norm: 0.7372232416050137, iteration: 422442
loss: 0.9624050259590149,grad_norm: 0.9111930866702529, iteration: 422443
loss: 1.0210728645324707,grad_norm: 0.7112854022523258, iteration: 422444
loss: 1.0189365148544312,grad_norm: 0.8421952846795809, iteration: 422445
loss: 0.9855479001998901,grad_norm: 0.8371001299734268, iteration: 422446
loss: 0.9919039607048035,grad_norm: 0.6835794808954758, iteration: 422447
loss: 1.006736397743225,grad_norm: 0.6704190587157036, iteration: 422448
loss: 1.06207275390625,grad_norm: 0.9024045702451111, iteration: 422449
loss: 1.0398719310760498,grad_norm: 0.8917807512679831, iteration: 422450
loss: 0.954636812210083,grad_norm: 0.7023466537183127, iteration: 422451
loss: 1.0079238414764404,grad_norm: 0.7801095442615598, iteration: 422452
loss: 1.0593359470367432,grad_norm: 0.9999990231652288, iteration: 422453
loss: 0.9948814511299133,grad_norm: 0.7939884642012591, iteration: 422454
loss: 1.013505458831787,grad_norm: 0.8323918967387912, iteration: 422455
loss: 1.0427364110946655,grad_norm: 0.9411224427789325, iteration: 422456
loss: 0.9789981245994568,grad_norm: 0.8802147540618744, iteration: 422457
loss: 1.0161584615707397,grad_norm: 0.99999925681757, iteration: 422458
loss: 0.9948701858520508,grad_norm: 0.7323987927803906, iteration: 422459
loss: 1.0897812843322754,grad_norm: 0.9999996291712946, iteration: 422460
loss: 0.9672088623046875,grad_norm: 0.5978529142539158, iteration: 422461
loss: 1.010733723640442,grad_norm: 0.8001316048159209, iteration: 422462
loss: 1.0409823656082153,grad_norm: 0.999999256751651, iteration: 422463
loss: 0.9999587535858154,grad_norm: 0.6828744595785943, iteration: 422464
loss: 1.0206246376037598,grad_norm: 0.801191801885384, iteration: 422465
loss: 1.0944344997406006,grad_norm: 0.9999996773250461, iteration: 422466
loss: 1.0318068265914917,grad_norm: 0.9999997726610538, iteration: 422467
loss: 0.9920361042022705,grad_norm: 0.9999991535954933, iteration: 422468
loss: 1.0528972148895264,grad_norm: 0.7108366944381161, iteration: 422469
loss: 1.1163221597671509,grad_norm: 0.9999993540728068, iteration: 422470
loss: 0.9862310886383057,grad_norm: 0.8277620946886325, iteration: 422471
loss: 1.0128697156906128,grad_norm: 0.8224788552394908, iteration: 422472
loss: 1.006131887435913,grad_norm: 0.7464180676875503, iteration: 422473
loss: 1.0033130645751953,grad_norm: 0.6607598145239851, iteration: 422474
loss: 0.9825538396835327,grad_norm: 0.7663708475055393, iteration: 422475
loss: 1.0754575729370117,grad_norm: 0.9999991262761079, iteration: 422476
loss: 1.041164517402649,grad_norm: 0.7470059377547942, iteration: 422477
loss: 0.961796224117279,grad_norm: 0.6892950321918789, iteration: 422478
loss: 0.9833582639694214,grad_norm: 0.8808788283375909, iteration: 422479
loss: 0.9749361276626587,grad_norm: 0.7933343374495351, iteration: 422480
loss: 1.0345396995544434,grad_norm: 0.9375262901606494, iteration: 422481
loss: 1.0365285873413086,grad_norm: 0.999999988162996, iteration: 422482
loss: 1.0038626194000244,grad_norm: 0.8072128760502979, iteration: 422483
loss: 0.9888028502464294,grad_norm: 0.7753737501303174, iteration: 422484
loss: 1.020357370376587,grad_norm: 0.8101131678338018, iteration: 422485
loss: 0.9892170429229736,grad_norm: 0.9999999718497858, iteration: 422486
loss: 0.9815266132354736,grad_norm: 0.7690132517933287, iteration: 422487
loss: 0.9878864884376526,grad_norm: 0.8329767341810242, iteration: 422488
loss: 0.9826187491416931,grad_norm: 0.740422290171278, iteration: 422489
loss: 1.0645724534988403,grad_norm: 0.999999633823089, iteration: 422490
loss: 1.0012155771255493,grad_norm: 0.7948072872996628, iteration: 422491
loss: 0.9613693356513977,grad_norm: 0.7931142555220974, iteration: 422492
loss: 0.9683682322502136,grad_norm: 0.8008874469896123, iteration: 422493
loss: 0.9920423030853271,grad_norm: 0.7148386158252038, iteration: 422494
loss: 0.995123028755188,grad_norm: 0.9999992799218723, iteration: 422495
loss: 0.994324803352356,grad_norm: 0.9110222633722624, iteration: 422496
loss: 1.1236751079559326,grad_norm: 0.9999995725698236, iteration: 422497
loss: 1.0252026319503784,grad_norm: 0.8628296327119997, iteration: 422498
loss: 1.0454254150390625,grad_norm: 0.73805450774249, iteration: 422499
loss: 1.0270644426345825,grad_norm: 0.8282390184665217, iteration: 422500
loss: 1.0263983011245728,grad_norm: 0.9999990896357087, iteration: 422501
loss: 0.9789838790893555,grad_norm: 0.7758129555441945, iteration: 422502
loss: 1.0251184701919556,grad_norm: 0.999999207156153, iteration: 422503
loss: 1.0387537479400635,grad_norm: 0.9678646337604494, iteration: 422504
loss: 0.9910976886749268,grad_norm: 0.8440920456131448, iteration: 422505
loss: 0.9556160569190979,grad_norm: 0.873935149406025, iteration: 422506
loss: 1.0465823411941528,grad_norm: 0.8923430148613093, iteration: 422507
loss: 1.0304359197616577,grad_norm: 0.8377380965928438, iteration: 422508
loss: 1.006665825843811,grad_norm: 0.7439114808139716, iteration: 422509
loss: 1.013461947441101,grad_norm: 0.7815730978474565, iteration: 422510
loss: 1.0159461498260498,grad_norm: 0.7897322428495245, iteration: 422511
loss: 0.9702547788619995,grad_norm: 0.8040447524472782, iteration: 422512
loss: 0.9875767827033997,grad_norm: 0.7412110150369727, iteration: 422513
loss: 1.120402216911316,grad_norm: 0.9043162240929464, iteration: 422514
loss: 1.049721360206604,grad_norm: 0.9999998639979645, iteration: 422515
loss: 1.0580813884735107,grad_norm: 0.8344982692272094, iteration: 422516
loss: 1.000557541847229,grad_norm: 0.7039699930671899, iteration: 422517
loss: 1.0297822952270508,grad_norm: 0.7219006370289325, iteration: 422518
loss: 0.992083728313446,grad_norm: 0.8836344323195303, iteration: 422519
loss: 0.9868394732475281,grad_norm: 0.9161786645057141, iteration: 422520
loss: 1.1190654039382935,grad_norm: 0.9999993780547913, iteration: 422521
loss: 0.9957759380340576,grad_norm: 0.7220999498704783, iteration: 422522
loss: 1.0101888179779053,grad_norm: 0.865767612658953, iteration: 422523
loss: 1.0044344663619995,grad_norm: 0.8920085631387272, iteration: 422524
loss: 1.0182204246520996,grad_norm: 0.6682209246794915, iteration: 422525
loss: 0.9624214768409729,grad_norm: 0.7148096086359758, iteration: 422526
loss: 1.0006704330444336,grad_norm: 0.7539312191245205, iteration: 422527
loss: 1.11091947555542,grad_norm: 0.999999448806842, iteration: 422528
loss: 1.0483328104019165,grad_norm: 0.7673380090271309, iteration: 422529
loss: 1.068267583847046,grad_norm: 0.9999989890642561, iteration: 422530
loss: 1.0773520469665527,grad_norm: 0.7752311439355851, iteration: 422531
loss: 1.0334771871566772,grad_norm: 0.9999996874987258, iteration: 422532
loss: 1.0515393018722534,grad_norm: 0.9079556557707006, iteration: 422533
loss: 0.974070131778717,grad_norm: 0.9451272767640164, iteration: 422534
loss: 0.981976330280304,grad_norm: 0.8599727626401253, iteration: 422535
loss: 1.0772817134857178,grad_norm: 0.8921010982289307, iteration: 422536
loss: 0.9985408186912537,grad_norm: 0.999999100240302, iteration: 422537
loss: 1.0752637386322021,grad_norm: 0.9999993336211677, iteration: 422538
loss: 1.1997556686401367,grad_norm: 0.9999999778476614, iteration: 422539
loss: 0.9827831387519836,grad_norm: 0.6695142300941722, iteration: 422540
loss: 1.0320998430252075,grad_norm: 0.895603243491736, iteration: 422541
loss: 1.3905729055404663,grad_norm: 0.9999999143821163, iteration: 422542
loss: 1.005385160446167,grad_norm: 0.9999991050657092, iteration: 422543
loss: 0.9581732749938965,grad_norm: 0.7101220458614655, iteration: 422544
loss: 0.9992203712463379,grad_norm: 0.7791489051126604, iteration: 422545
loss: 1.0127025842666626,grad_norm: 0.7465776186002298, iteration: 422546
loss: 1.2353777885437012,grad_norm: 0.9999994718224778, iteration: 422547
loss: 0.9782659411430359,grad_norm: 0.7958814693846848, iteration: 422548
loss: 0.9782567024230957,grad_norm: 0.74380634123228, iteration: 422549
loss: 1.0073487758636475,grad_norm: 0.999999192144775, iteration: 422550
loss: 0.9839393496513367,grad_norm: 0.7175797509981617, iteration: 422551
loss: 1.0435328483581543,grad_norm: 0.9999993842180805, iteration: 422552
loss: 1.0425238609313965,grad_norm: 0.7020581867185788, iteration: 422553
loss: 1.0027154684066772,grad_norm: 0.9514115035962553, iteration: 422554
loss: 0.9811436533927917,grad_norm: 0.9144680544017353, iteration: 422555
loss: 1.026463270187378,grad_norm: 0.9999995231914656, iteration: 422556
loss: 0.9751035571098328,grad_norm: 0.794183013573264, iteration: 422557
loss: 0.9754395484924316,grad_norm: 0.9243937033956846, iteration: 422558
loss: 1.0350521802902222,grad_norm: 0.7962451278157056, iteration: 422559
loss: 1.0022695064544678,grad_norm: 0.686175540598661, iteration: 422560
loss: 1.015759825706482,grad_norm: 0.8628843739846794, iteration: 422561
loss: 0.9675920009613037,grad_norm: 0.7347028211543898, iteration: 422562
loss: 0.9561575651168823,grad_norm: 0.7646915076664035, iteration: 422563
loss: 1.0341798067092896,grad_norm: 0.8564507525000414, iteration: 422564
loss: 0.9630280137062073,grad_norm: 0.7592293529413956, iteration: 422565
loss: 1.021520733833313,grad_norm: 0.8827167713060537, iteration: 422566
loss: 1.0479655265808105,grad_norm: 0.9999991370051067, iteration: 422567
loss: 0.9824780821800232,grad_norm: 0.8013519824051186, iteration: 422568
loss: 1.0281697511672974,grad_norm: 0.7534573663321762, iteration: 422569
loss: 1.0176969766616821,grad_norm: 0.9999991252547878, iteration: 422570
loss: 1.0013189315795898,grad_norm: 0.7402924210024503, iteration: 422571
loss: 0.9895358681678772,grad_norm: 0.6392348392879038, iteration: 422572
loss: 0.9801929593086243,grad_norm: 0.9999989135712741, iteration: 422573
loss: 0.9846870303153992,grad_norm: 0.9407912461287766, iteration: 422574
loss: 1.006367564201355,grad_norm: 0.8554213528058162, iteration: 422575
loss: 0.9750136137008667,grad_norm: 0.9999991471352666, iteration: 422576
loss: 1.0063762664794922,grad_norm: 0.717849318927833, iteration: 422577
loss: 1.017767310142517,grad_norm: 0.8590806311977054, iteration: 422578
loss: 1.0162577629089355,grad_norm: 0.7962128415662858, iteration: 422579
loss: 1.0015270709991455,grad_norm: 0.7485557043888754, iteration: 422580
loss: 1.0259873867034912,grad_norm: 0.9130338184365709, iteration: 422581
loss: 1.031399130821228,grad_norm: 0.7127661846619127, iteration: 422582
loss: 1.0211231708526611,grad_norm: 0.8135396642891364, iteration: 422583
loss: 1.035031795501709,grad_norm: 0.7529297174169748, iteration: 422584
loss: 0.9820656180381775,grad_norm: 0.9999991107815231, iteration: 422585
loss: 1.0006223917007446,grad_norm: 0.8229461098327798, iteration: 422586
loss: 1.0247801542282104,grad_norm: 0.9999993055017724, iteration: 422587
loss: 1.0528978109359741,grad_norm: 0.9999996432777853, iteration: 422588
loss: 0.999710738658905,grad_norm: 0.7115956468134809, iteration: 422589
loss: 0.9693130254745483,grad_norm: 0.9999999577550368, iteration: 422590
loss: 1.0341897010803223,grad_norm: 0.8320801930388226, iteration: 422591
loss: 0.9961774945259094,grad_norm: 0.670752716807723, iteration: 422592
loss: 1.0343283414840698,grad_norm: 0.6923796085242626, iteration: 422593
loss: 1.0226030349731445,grad_norm: 0.7447797941772711, iteration: 422594
loss: 0.99681556224823,grad_norm: 0.6777387615765279, iteration: 422595
loss: 0.991325855255127,grad_norm: 0.6712684479255226, iteration: 422596
loss: 0.9982219934463501,grad_norm: 0.8130398666098364, iteration: 422597
loss: 1.0182064771652222,grad_norm: 0.766863100866307, iteration: 422598
loss: 0.9891610741615295,grad_norm: 0.722427855951641, iteration: 422599
loss: 0.9936455488204956,grad_norm: 0.7971291465031413, iteration: 422600
loss: 1.0152238607406616,grad_norm: 0.8758128235475878, iteration: 422601
loss: 0.9878612160682678,grad_norm: 0.9999992671655307, iteration: 422602
loss: 0.9979589581489563,grad_norm: 0.9026998932288738, iteration: 422603
loss: 1.0284686088562012,grad_norm: 0.8096123504896569, iteration: 422604
loss: 1.0128874778747559,grad_norm: 0.7711964480536972, iteration: 422605
loss: 1.0210567712783813,grad_norm: 0.9999992228374004, iteration: 422606
loss: 1.0848267078399658,grad_norm: 0.9999999954426984, iteration: 422607
loss: 1.025381088256836,grad_norm: 0.8175331227164969, iteration: 422608
loss: 0.9943887591362,grad_norm: 0.6525510201596268, iteration: 422609
loss: 1.0881999731063843,grad_norm: 0.9999991312660926, iteration: 422610
loss: 1.0186688899993896,grad_norm: 0.8395841898757883, iteration: 422611
loss: 0.9531123638153076,grad_norm: 0.6919041896950641, iteration: 422612
loss: 1.0402458906173706,grad_norm: 0.9999992876836676, iteration: 422613
loss: 1.0024902820587158,grad_norm: 0.6686476290450749, iteration: 422614
loss: 0.9977983832359314,grad_norm: 0.6905310261985756, iteration: 422615
loss: 0.958441972732544,grad_norm: 0.8648274572444185, iteration: 422616
loss: 1.0567466020584106,grad_norm: 0.9999999645691765, iteration: 422617
loss: 1.0009464025497437,grad_norm: 0.7656267604690961, iteration: 422618
loss: 1.014047622680664,grad_norm: 0.7238934479985772, iteration: 422619
loss: 1.0083537101745605,grad_norm: 0.7386091070312932, iteration: 422620
loss: 1.00260329246521,grad_norm: 0.6266570180964062, iteration: 422621
loss: 0.9810879826545715,grad_norm: 0.7799769614879202, iteration: 422622
loss: 0.9930451512336731,grad_norm: 0.7344668785839926, iteration: 422623
loss: 1.0002872943878174,grad_norm: 0.8098766975525702, iteration: 422624
loss: 0.9855839014053345,grad_norm: 0.6845485808459281, iteration: 422625
loss: 1.005930781364441,grad_norm: 0.8895167551373845, iteration: 422626
loss: 1.0491013526916504,grad_norm: 0.9999991947755759, iteration: 422627
loss: 0.9847564697265625,grad_norm: 0.7863082339526272, iteration: 422628
loss: 1.000023365020752,grad_norm: 0.7449601925584842, iteration: 422629
loss: 0.9980014562606812,grad_norm: 0.9334155295697902, iteration: 422630
loss: 1.0257338285446167,grad_norm: 0.9492524443895981, iteration: 422631
loss: 1.0362547636032104,grad_norm: 0.9999990726650105, iteration: 422632
loss: 0.9598829746246338,grad_norm: 0.9999991717317837, iteration: 422633
loss: 1.0611307621002197,grad_norm: 0.7082724734814819, iteration: 422634
loss: 0.9932293891906738,grad_norm: 0.9041060701747731, iteration: 422635
loss: 0.9576681852340698,grad_norm: 0.9999996080008946, iteration: 422636
loss: 1.0149506330490112,grad_norm: 0.9999993674257445, iteration: 422637
loss: 1.01408851146698,grad_norm: 0.6975842080390593, iteration: 422638
loss: 0.9893617630004883,grad_norm: 0.9299737527728831, iteration: 422639
loss: 1.0886576175689697,grad_norm: 0.9999998998273653, iteration: 422640
loss: 1.0300686359405518,grad_norm: 0.8987032980478382, iteration: 422641
loss: 1.0157489776611328,grad_norm: 0.6760807039625253, iteration: 422642
loss: 1.006391167640686,grad_norm: 0.7111084764543181, iteration: 422643
loss: 1.0903555154800415,grad_norm: 0.8238856656270594, iteration: 422644
loss: 0.994998037815094,grad_norm: 0.7591913836960262, iteration: 422645
loss: 1.068368673324585,grad_norm: 0.9999993575393165, iteration: 422646
loss: 1.021885871887207,grad_norm: 0.9999990955493528, iteration: 422647
loss: 1.0261034965515137,grad_norm: 0.7694977309402404, iteration: 422648
loss: 1.003743290901184,grad_norm: 0.6972412811427927, iteration: 422649
loss: 1.0070128440856934,grad_norm: 0.8904807918789354, iteration: 422650
loss: 0.9975196123123169,grad_norm: 0.8432231973745529, iteration: 422651
loss: 1.0958377122879028,grad_norm: 0.8322825753510242, iteration: 422652
loss: 1.02003014087677,grad_norm: 0.7326663076686316, iteration: 422653
loss: 1.0227370262145996,grad_norm: 0.6813186153862437, iteration: 422654
loss: 0.9843546152114868,grad_norm: 0.9999995111826439, iteration: 422655
loss: 0.9754592180252075,grad_norm: 0.796838284683939, iteration: 422656
loss: 0.9891733527183533,grad_norm: 0.9999998250925252, iteration: 422657
loss: 1.0100822448730469,grad_norm: 0.8285093916010886, iteration: 422658
loss: 1.0572326183319092,grad_norm: 0.7994852621997444, iteration: 422659
loss: 1.0273345708847046,grad_norm: 0.702320909456853, iteration: 422660
loss: 1.0274137258529663,grad_norm: 0.7698896647696944, iteration: 422661
loss: 0.9646337628364563,grad_norm: 0.7498965893213885, iteration: 422662
loss: 0.9861022233963013,grad_norm: 0.9999996787615277, iteration: 422663
loss: 1.0122720003128052,grad_norm: 0.8753688958995832, iteration: 422664
loss: 1.127311110496521,grad_norm: 0.7509520775620884, iteration: 422665
loss: 1.0084160566329956,grad_norm: 0.845852160158203, iteration: 422666
loss: 0.9924235939979553,grad_norm: 0.9999990723478719, iteration: 422667
loss: 1.2794487476348877,grad_norm: 0.9999998591675817, iteration: 422668
loss: 0.9742277264595032,grad_norm: 0.8451474296607547, iteration: 422669
loss: 0.995888888835907,grad_norm: 0.9335531386968074, iteration: 422670
loss: 1.0173275470733643,grad_norm: 0.7775931065867812, iteration: 422671
loss: 1.0383034944534302,grad_norm: 0.7561365522541107, iteration: 422672
loss: 0.9862431287765503,grad_norm: 0.8469010927035732, iteration: 422673
loss: 1.0370174646377563,grad_norm: 0.8205593303885046, iteration: 422674
loss: 0.9641074538230896,grad_norm: 0.7986772373073036, iteration: 422675
loss: 1.0503653287887573,grad_norm: 0.6805634546109265, iteration: 422676
loss: 1.0039061307907104,grad_norm: 0.8519426085882471, iteration: 422677
loss: 0.9775944352149963,grad_norm: 0.7955428447640924, iteration: 422678
loss: 1.078681230545044,grad_norm: 0.9999993975643131, iteration: 422679
loss: 1.013979434967041,grad_norm: 0.999999663540467, iteration: 422680
loss: 1.0146619081497192,grad_norm: 0.9999999290756376, iteration: 422681
loss: 1.004949688911438,grad_norm: 0.8332722717733343, iteration: 422682
loss: 1.0881966352462769,grad_norm: 0.99999989524145, iteration: 422683
loss: 0.9885431528091431,grad_norm: 0.7260240070889096, iteration: 422684
loss: 0.9880127310752869,grad_norm: 0.7455708288382713, iteration: 422685
loss: 1.0401599407196045,grad_norm: 0.8880656851322404, iteration: 422686
loss: 1.0142028331756592,grad_norm: 0.9999999143251831, iteration: 422687
loss: 1.036146640777588,grad_norm: 0.8356853743345489, iteration: 422688
loss: 1.048661231994629,grad_norm: 0.9999999143658954, iteration: 422689
loss: 1.0346113443374634,grad_norm: 0.999999010627022, iteration: 422690
loss: 0.9864861965179443,grad_norm: 0.7270057642212531, iteration: 422691
loss: 1.009812355041504,grad_norm: 0.7049102272773472, iteration: 422692
loss: 1.0210542678833008,grad_norm: 0.6734834836683213, iteration: 422693
loss: 1.005837321281433,grad_norm: 0.8075560826559512, iteration: 422694
loss: 1.0033150911331177,grad_norm: 0.9058452891238263, iteration: 422695
loss: 0.9792714715003967,grad_norm: 0.7310586102488367, iteration: 422696
loss: 1.0141451358795166,grad_norm: 0.9999998498143744, iteration: 422697
loss: 1.052099347114563,grad_norm: 0.8889507111173949, iteration: 422698
loss: 0.9767780303955078,grad_norm: 0.7139156656645619, iteration: 422699
loss: 0.9864720702171326,grad_norm: 0.7643439923276066, iteration: 422700
loss: 0.9796182513237,grad_norm: 0.8933916502955002, iteration: 422701
loss: 1.011643886566162,grad_norm: 0.7937421687729688, iteration: 422702
loss: 0.9811568260192871,grad_norm: 0.6745343376107386, iteration: 422703
loss: 1.0054128170013428,grad_norm: 0.9999996442567999, iteration: 422704
loss: 1.0553724765777588,grad_norm: 0.9999990707736995, iteration: 422705
loss: 1.022807240486145,grad_norm: 0.8259424951693187, iteration: 422706
loss: 0.9802823066711426,grad_norm: 0.7667536180236943, iteration: 422707
loss: 1.0139065980911255,grad_norm: 0.9999993795606776, iteration: 422708
loss: 1.0373064279556274,grad_norm: 0.7602777104350579, iteration: 422709
loss: 1.0101839303970337,grad_norm: 0.8667636227672694, iteration: 422710
loss: 1.0739136934280396,grad_norm: 0.9999996980318063, iteration: 422711
loss: 0.9642624855041504,grad_norm: 0.9594936710656783, iteration: 422712
loss: 1.1471903324127197,grad_norm: 1.0000000606542678, iteration: 422713
loss: 1.0187841653823853,grad_norm: 0.7774847055371782, iteration: 422714
loss: 0.9906019568443298,grad_norm: 0.9938935276446512, iteration: 422715
loss: 1.0279052257537842,grad_norm: 0.9630477684266959, iteration: 422716
loss: 0.9756378531455994,grad_norm: 0.86160878084442, iteration: 422717
loss: 1.0171226263046265,grad_norm: 0.9999995520124917, iteration: 422718
loss: 1.001341462135315,grad_norm: 0.8914429884530687, iteration: 422719
loss: 1.0329093933105469,grad_norm: 0.9515129761535528, iteration: 422720
loss: 1.0114470720291138,grad_norm: 0.9999999534111864, iteration: 422721
loss: 1.0426311492919922,grad_norm: 0.7363275884823758, iteration: 422722
loss: 0.9906090497970581,grad_norm: 0.8334963602453908, iteration: 422723
loss: 0.9929019808769226,grad_norm: 0.7249026919671124, iteration: 422724
loss: 1.0248664617538452,grad_norm: 0.9260316783170218, iteration: 422725
loss: 1.0606216192245483,grad_norm: 0.6758768722758394, iteration: 422726
loss: 0.9690644145011902,grad_norm: 0.6953851068618092, iteration: 422727
loss: 1.0146632194519043,grad_norm: 0.6508639428436507, iteration: 422728
loss: 1.0906602144241333,grad_norm: 0.8625777638607961, iteration: 422729
loss: 1.0129374265670776,grad_norm: 0.8408053143914977, iteration: 422730
loss: 1.0076992511749268,grad_norm: 0.7727903630299775, iteration: 422731
loss: 0.9902337193489075,grad_norm: 0.8585963295661655, iteration: 422732
loss: 1.0350921154022217,grad_norm: 0.8180709932202799, iteration: 422733
loss: 0.9899512529373169,grad_norm: 0.7708347775345257, iteration: 422734
loss: 1.0022746324539185,grad_norm: 0.7192692732820277, iteration: 422735
loss: 0.9953029751777649,grad_norm: 0.7338999078258222, iteration: 422736
loss: 0.9874497056007385,grad_norm: 0.7803698802054978, iteration: 422737
loss: 1.0307315587997437,grad_norm: 0.9028108713694131, iteration: 422738
loss: 1.0275070667266846,grad_norm: 0.923903449579798, iteration: 422739
loss: 0.9859330058097839,grad_norm: 0.749333936063874, iteration: 422740
loss: 0.9799056649208069,grad_norm: 0.7852969934359939, iteration: 422741
loss: 1.022653341293335,grad_norm: 0.9031700155551644, iteration: 422742
loss: 0.9765872955322266,grad_norm: 0.7839043997864487, iteration: 422743
loss: 1.0248160362243652,grad_norm: 0.8361671432309701, iteration: 422744
loss: 1.020660161972046,grad_norm: 0.7973681659947218, iteration: 422745
loss: 1.0362250804901123,grad_norm: 0.7801683058109976, iteration: 422746
loss: 1.0208057165145874,grad_norm: 0.7280739126236987, iteration: 422747
loss: 1.1287251710891724,grad_norm: 0.9999992945866698, iteration: 422748
loss: 1.0999352931976318,grad_norm: 0.9999991808623004, iteration: 422749
loss: 0.992950439453125,grad_norm: 0.8457836121562993, iteration: 422750
loss: 1.013525128364563,grad_norm: 0.8724567450191689, iteration: 422751
loss: 1.090064525604248,grad_norm: 0.8455278736481104, iteration: 422752
loss: 0.9930157661437988,grad_norm: 0.624241692446891, iteration: 422753
loss: 1.0224101543426514,grad_norm: 0.7120209895747275, iteration: 422754
loss: 0.9665734171867371,grad_norm: 0.6936966385121255, iteration: 422755
loss: 1.0095560550689697,grad_norm: 0.7635678823487823, iteration: 422756
loss: 0.9992924332618713,grad_norm: 0.9999991389997682, iteration: 422757
loss: 1.0087757110595703,grad_norm: 0.9900070042502369, iteration: 422758
loss: 1.0026715993881226,grad_norm: 0.6968234707728209, iteration: 422759
loss: 1.0002820491790771,grad_norm: 0.7023089082561869, iteration: 422760
loss: 1.0546422004699707,grad_norm: 0.7178161410103402, iteration: 422761
loss: 0.9655429720878601,grad_norm: 0.9506202421894964, iteration: 422762
loss: 0.9819262623786926,grad_norm: 0.8134066957721675, iteration: 422763
loss: 1.0142902135849,grad_norm: 0.9757279203014847, iteration: 422764
loss: 0.9870697259902954,grad_norm: 0.7646026187688356, iteration: 422765
loss: 1.105175256729126,grad_norm: 0.9999998753626236, iteration: 422766
loss: 0.9870609045028687,grad_norm: 0.9327923835291636, iteration: 422767
loss: 1.0361541509628296,grad_norm: 0.9999995581574975, iteration: 422768
loss: 1.0570493936538696,grad_norm: 0.9212325258923656, iteration: 422769
loss: 1.0814807415008545,grad_norm: 0.8036724369001864, iteration: 422770
loss: 1.0157305002212524,grad_norm: 0.8463158748173938, iteration: 422771
loss: 1.0968924760818481,grad_norm: 0.9668104730043674, iteration: 422772
loss: 0.9988012909889221,grad_norm: 0.7372629008910055, iteration: 422773
loss: 0.9677467346191406,grad_norm: 0.827463704243417, iteration: 422774
loss: 0.9737384915351868,grad_norm: 0.7970539616659119, iteration: 422775
loss: 1.038223147392273,grad_norm: 0.7701006314195494, iteration: 422776
loss: 0.9957810640335083,grad_norm: 0.8820143905834326, iteration: 422777
loss: 1.0561429262161255,grad_norm: 0.9999996502320763, iteration: 422778
loss: 0.9877797365188599,grad_norm: 0.7667027450206195, iteration: 422779
loss: 1.0184953212738037,grad_norm: 0.7215751949679329, iteration: 422780
loss: 1.0874155759811401,grad_norm: 0.9999991882043506, iteration: 422781
loss: 1.0229685306549072,grad_norm: 0.8724107516954122, iteration: 422782
loss: 1.0894207954406738,grad_norm: 0.9999991828850784, iteration: 422783
loss: 1.0080459117889404,grad_norm: 0.8054487167794427, iteration: 422784
loss: 1.0140466690063477,grad_norm: 0.7999650836521573, iteration: 422785
loss: 0.9864005446434021,grad_norm: 0.8854971204122722, iteration: 422786
loss: 1.0454341173171997,grad_norm: 0.9999990802341285, iteration: 422787
loss: 0.9699682593345642,grad_norm: 0.7636601801872427, iteration: 422788
loss: 0.9889187216758728,grad_norm: 0.7085915315471354, iteration: 422789
loss: 1.0078994035720825,grad_norm: 0.7579355448401995, iteration: 422790
loss: 0.9791235327720642,grad_norm: 0.6704659166211503, iteration: 422791
loss: 1.0367701053619385,grad_norm: 0.7496755402051128, iteration: 422792
loss: 1.021954894065857,grad_norm: 0.8360417032193319, iteration: 422793
loss: 1.108349084854126,grad_norm: 0.7865643044187308, iteration: 422794
loss: 1.0016214847564697,grad_norm: 0.8686307639587776, iteration: 422795
loss: 1.0348045825958252,grad_norm: 0.6537200168360741, iteration: 422796
loss: 1.0015087127685547,grad_norm: 0.7840378370035191, iteration: 422797
loss: 1.022899866104126,grad_norm: 0.9999992137886227, iteration: 422798
loss: 0.9889301657676697,grad_norm: 0.8023225028981055, iteration: 422799
loss: 0.992790937423706,grad_norm: 0.7209189864171306, iteration: 422800
loss: 1.0033609867095947,grad_norm: 0.7854049668667594, iteration: 422801
loss: 1.0043737888336182,grad_norm: 0.6768741623475748, iteration: 422802
loss: 0.9981980323791504,grad_norm: 0.7131840820926709, iteration: 422803
loss: 1.008194088935852,grad_norm: 0.8736998785526664, iteration: 422804
loss: 1.0146474838256836,grad_norm: 0.8849646603899087, iteration: 422805
loss: 1.0102283954620361,grad_norm: 0.7299682823377904, iteration: 422806
loss: 0.9757124781608582,grad_norm: 0.7237113012506508, iteration: 422807
loss: 0.9828261733055115,grad_norm: 0.684480103888948, iteration: 422808
loss: 0.9597339630126953,grad_norm: 0.9999996792726791, iteration: 422809
loss: 1.0023159980773926,grad_norm: 0.7607669096715007, iteration: 422810
loss: 1.027138113975525,grad_norm: 0.8047129488599726, iteration: 422811
loss: 1.0811280012130737,grad_norm: 0.7094916748628731, iteration: 422812
loss: 1.0118041038513184,grad_norm: 0.774621646593255, iteration: 422813
loss: 1.0053483247756958,grad_norm: 0.7474134640318622, iteration: 422814
loss: 1.0100253820419312,grad_norm: 0.6297065761892049, iteration: 422815
loss: 1.0118274688720703,grad_norm: 0.9058045319967748, iteration: 422816
loss: 0.9896118640899658,grad_norm: 0.6618501951113697, iteration: 422817
loss: 0.9949094653129578,grad_norm: 0.7790120251199092, iteration: 422818
loss: 1.0049101114273071,grad_norm: 0.8448408783917551, iteration: 422819
loss: 1.0150169134140015,grad_norm: 0.7251741124396458, iteration: 422820
loss: 1.0249515771865845,grad_norm: 0.9962019701426374, iteration: 422821
loss: 1.0443991422653198,grad_norm: 0.8823593053534924, iteration: 422822
loss: 1.003799557685852,grad_norm: 0.8177477101697699, iteration: 422823
loss: 1.1054883003234863,grad_norm: 0.999998981923719, iteration: 422824
loss: 1.0307269096374512,grad_norm: 0.8368590034679839, iteration: 422825
loss: 0.998749852180481,grad_norm: 0.7581563278910848, iteration: 422826
loss: 0.9923139214515686,grad_norm: 0.6513473332144355, iteration: 422827
loss: 1.0140092372894287,grad_norm: 0.9999993628575523, iteration: 422828
loss: 1.0132673978805542,grad_norm: 0.7421048635389421, iteration: 422829
loss: 1.038783073425293,grad_norm: 0.9999997904170935, iteration: 422830
loss: 1.0099751949310303,grad_norm: 0.7868522920685599, iteration: 422831
loss: 1.0004396438598633,grad_norm: 0.7579062195429506, iteration: 422832
loss: 1.0535844564437866,grad_norm: 0.8923907105146772, iteration: 422833
loss: 1.0437668561935425,grad_norm: 0.9999996559423455, iteration: 422834
loss: 1.051939606666565,grad_norm: 0.9999996006978569, iteration: 422835
loss: 0.9981096982955933,grad_norm: 0.7718416461788443, iteration: 422836
loss: 1.0139923095703125,grad_norm: 0.8015182857738528, iteration: 422837
loss: 1.0195391178131104,grad_norm: 0.7842155599597774, iteration: 422838
loss: 0.9882526993751526,grad_norm: 0.557630317533626, iteration: 422839
loss: 1.0196505784988403,grad_norm: 0.9999993046593612, iteration: 422840
loss: 1.015233039855957,grad_norm: 0.7131100994052277, iteration: 422841
loss: 0.9711855053901672,grad_norm: 0.7959377860314202, iteration: 422842
loss: 1.0018339157104492,grad_norm: 0.7616068728188853, iteration: 422843
loss: 0.9838210344314575,grad_norm: 0.6981767022023103, iteration: 422844
loss: 1.001838207244873,grad_norm: 0.9999991615400277, iteration: 422845
loss: 1.0038286447525024,grad_norm: 0.7516735190179356, iteration: 422846
loss: 0.9875348806381226,grad_norm: 0.8607296053195548, iteration: 422847
loss: 0.9909746050834656,grad_norm: 0.8669289051244689, iteration: 422848
loss: 0.9879779815673828,grad_norm: 0.9999991555629189, iteration: 422849
loss: 0.9978876113891602,grad_norm: 0.7320066204678984, iteration: 422850
loss: 0.9927194714546204,grad_norm: 0.8736538155663583, iteration: 422851
loss: 0.9961215257644653,grad_norm: 0.7390169515773418, iteration: 422852
loss: 1.047242522239685,grad_norm: 0.9999993313792387, iteration: 422853
loss: 0.9773510694503784,grad_norm: 0.8865488821811224, iteration: 422854
loss: 1.060715675354004,grad_norm: 0.9686498749455256, iteration: 422855
loss: 1.0138661861419678,grad_norm: 0.6697681980157261, iteration: 422856
loss: 0.973510205745697,grad_norm: 0.7703862710775226, iteration: 422857
loss: 1.0531578063964844,grad_norm: 0.9401567467090173, iteration: 422858
loss: 1.0113147497177124,grad_norm: 0.9999992579579045, iteration: 422859
loss: 1.0353103876113892,grad_norm: 0.9999995648060697, iteration: 422860
loss: 1.0303696393966675,grad_norm: 0.9999998939895697, iteration: 422861
loss: 1.023342490196228,grad_norm: 0.8849138496667207, iteration: 422862
loss: 1.0206265449523926,grad_norm: 0.8404731034344491, iteration: 422863
loss: 1.000230073928833,grad_norm: 0.9999998805134185, iteration: 422864
loss: 0.9772602319717407,grad_norm: 0.9999991249799569, iteration: 422865
loss: 1.0121990442276,grad_norm: 0.7874113386911863, iteration: 422866
loss: 0.9971718788146973,grad_norm: 0.7516023710863807, iteration: 422867
loss: 1.0864782333374023,grad_norm: 0.8604650325641275, iteration: 422868
loss: 0.9589577317237854,grad_norm: 0.9999991398205956, iteration: 422869
loss: 1.009520173072815,grad_norm: 0.7163724321526805, iteration: 422870
loss: 0.9624878168106079,grad_norm: 0.9237768105638048, iteration: 422871
loss: 0.9642148017883301,grad_norm: 0.6045022371971897, iteration: 422872
loss: 1.0176488161087036,grad_norm: 0.6914061855148994, iteration: 422873
loss: 0.9950645565986633,grad_norm: 0.6995408116026, iteration: 422874
loss: 1.042203426361084,grad_norm: 0.8590635417311291, iteration: 422875
loss: 0.9592069983482361,grad_norm: 0.6727539827389871, iteration: 422876
loss: 0.9631975293159485,grad_norm: 0.8362111917279823, iteration: 422877
loss: 1.02631676197052,grad_norm: 0.7364170806481142, iteration: 422878
loss: 0.9845989346504211,grad_norm: 0.7805254390530876, iteration: 422879
loss: 1.067385196685791,grad_norm: 0.8708291035233638, iteration: 422880
loss: 0.9984089732170105,grad_norm: 0.7090898347008385, iteration: 422881
loss: 0.9649057984352112,grad_norm: 0.7651270262111406, iteration: 422882
loss: 1.0396548509597778,grad_norm: 0.9354294727779727, iteration: 422883
loss: 0.9796615242958069,grad_norm: 0.8229450277350755, iteration: 422884
loss: 0.9683877229690552,grad_norm: 0.7390741598525086, iteration: 422885
loss: 0.9722381830215454,grad_norm: 0.7735539733763395, iteration: 422886
loss: 1.0230460166931152,grad_norm: 0.8762522212527852, iteration: 422887
loss: 0.9827421307563782,grad_norm: 0.9354032259761553, iteration: 422888
loss: 1.0369423627853394,grad_norm: 0.6643307516799032, iteration: 422889
loss: 1.1815625429153442,grad_norm: 0.9745247625667863, iteration: 422890
loss: 0.9913898706436157,grad_norm: 0.6730909364218771, iteration: 422891
loss: 0.9666125774383545,grad_norm: 0.8256103742399337, iteration: 422892
loss: 1.0823529958724976,grad_norm: 0.9999995745623407, iteration: 422893
loss: 0.9820195436477661,grad_norm: 0.7558701358744622, iteration: 422894
loss: 0.9984824061393738,grad_norm: 0.6400129841649169, iteration: 422895
loss: 0.9949242472648621,grad_norm: 0.8217334900942835, iteration: 422896
loss: 0.9943761825561523,grad_norm: 0.8632282412816844, iteration: 422897
loss: 1.0268272161483765,grad_norm: 0.9999997126348164, iteration: 422898
loss: 1.0088483095169067,grad_norm: 0.7002450472663857, iteration: 422899
loss: 1.004683256149292,grad_norm: 0.7263086141335328, iteration: 422900
loss: 0.9695245623588562,grad_norm: 0.7345898587056865, iteration: 422901
loss: 1.0136638879776,grad_norm: 0.7478876603707669, iteration: 422902
loss: 1.0457085371017456,grad_norm: 0.7283227317142693, iteration: 422903
loss: 1.015367865562439,grad_norm: 0.9293984431306342, iteration: 422904
loss: 0.9785401821136475,grad_norm: 0.7226073786538896, iteration: 422905
loss: 1.0315531492233276,grad_norm: 0.695544202916561, iteration: 422906
loss: 1.021262288093567,grad_norm: 0.834369075491178, iteration: 422907
loss: 1.0230422019958496,grad_norm: 0.7296619861717202, iteration: 422908
loss: 1.0009785890579224,grad_norm: 0.8589255459001917, iteration: 422909
loss: 1.025633692741394,grad_norm: 0.8306642851675266, iteration: 422910
loss: 1.1311695575714111,grad_norm: 0.8551012970808988, iteration: 422911
loss: 1.0156139135360718,grad_norm: 0.7832133290242042, iteration: 422912
loss: 1.031957745552063,grad_norm: 0.9999990652248977, iteration: 422913
loss: 0.9823310971260071,grad_norm: 0.7470007321700279, iteration: 422914
loss: 0.9998936057090759,grad_norm: 0.7450291876565633, iteration: 422915
loss: 0.9761171340942383,grad_norm: 0.8465515305655195, iteration: 422916
loss: 1.0185580253601074,grad_norm: 0.9999994978001304, iteration: 422917
loss: 0.9925445318222046,grad_norm: 0.6586227177709101, iteration: 422918
loss: 0.9943494200706482,grad_norm: 0.6060875290948272, iteration: 422919
loss: 1.0152690410614014,grad_norm: 0.9999999258712221, iteration: 422920
loss: 0.9988242983818054,grad_norm: 0.9999998687605948, iteration: 422921
loss: 1.0016095638275146,grad_norm: 0.9286845468118405, iteration: 422922
loss: 1.0355944633483887,grad_norm: 0.9345515735449781, iteration: 422923
loss: 1.010218858718872,grad_norm: 0.6760283766534659, iteration: 422924
loss: 0.9852890372276306,grad_norm: 0.6842111860347627, iteration: 422925
loss: 1.002247929573059,grad_norm: 0.6060738196558506, iteration: 422926
loss: 1.012534737586975,grad_norm: 0.6531411671332269, iteration: 422927
loss: 1.008589744567871,grad_norm: 0.8301889956034907, iteration: 422928
loss: 1.0340842008590698,grad_norm: 0.9701570430715208, iteration: 422929
loss: 0.9541431665420532,grad_norm: 0.6644835595476505, iteration: 422930
loss: 0.9767192602157593,grad_norm: 0.773355781193772, iteration: 422931
loss: 1.0102330446243286,grad_norm: 0.927237234260897, iteration: 422932
loss: 0.9823218584060669,grad_norm: 0.8178304032130345, iteration: 422933
loss: 1.0113927125930786,grad_norm: 0.7184011794226453, iteration: 422934
loss: 1.0060943365097046,grad_norm: 0.72242430766195, iteration: 422935
loss: 1.0058057308197021,grad_norm: 0.6862759570216921, iteration: 422936
loss: 0.9458338618278503,grad_norm: 0.8175331126683153, iteration: 422937
loss: 1.0350128412246704,grad_norm: 0.8966306686945199, iteration: 422938
loss: 0.9760396480560303,grad_norm: 0.7145473520138137, iteration: 422939
loss: 1.0233337879180908,grad_norm: 0.7268787975451685, iteration: 422940
loss: 1.0019299983978271,grad_norm: 0.8563125673240815, iteration: 422941
loss: 1.0439647436141968,grad_norm: 0.9216796408400912, iteration: 422942
loss: 1.0055077075958252,grad_norm: 0.8577787336137744, iteration: 422943
loss: 1.0223329067230225,grad_norm: 0.8355156725430081, iteration: 422944
loss: 1.0036698579788208,grad_norm: 0.9999990584526864, iteration: 422945
loss: 1.0417404174804688,grad_norm: 0.8406777623590104, iteration: 422946
loss: 0.9890473484992981,grad_norm: 0.9381207199108643, iteration: 422947
loss: 1.0393997430801392,grad_norm: 0.7757041521786725, iteration: 422948
loss: 1.0247758626937866,grad_norm: 0.798518269444656, iteration: 422949
loss: 1.0318535566329956,grad_norm: 0.9169537996757651, iteration: 422950
loss: 1.0280271768569946,grad_norm: 0.8139525788084478, iteration: 422951
loss: 0.9907619953155518,grad_norm: 0.6933970860548622, iteration: 422952
loss: 1.0090739727020264,grad_norm: 0.6710310153396324, iteration: 422953
loss: 0.9797650575637817,grad_norm: 0.667029336879744, iteration: 422954
loss: 0.9869505763053894,grad_norm: 0.845082544859177, iteration: 422955
loss: 0.9824024438858032,grad_norm: 0.7307784094639099, iteration: 422956
loss: 0.9919083714485168,grad_norm: 0.7906509341977637, iteration: 422957
loss: 1.1106436252593994,grad_norm: 0.8654907323841257, iteration: 422958
loss: 0.9967124462127686,grad_norm: 0.7859264385543745, iteration: 422959
loss: 0.975573718547821,grad_norm: 0.7500791675028854, iteration: 422960
loss: 1.0899776220321655,grad_norm: 0.8783059371218701, iteration: 422961
loss: 1.0245542526245117,grad_norm: 0.9218075458292169, iteration: 422962
loss: 1.0016953945159912,grad_norm: 0.6598154922373444, iteration: 422963
loss: 0.9524921774864197,grad_norm: 0.967981172633181, iteration: 422964
loss: 0.982255756855011,grad_norm: 0.9999991188745648, iteration: 422965
loss: 0.9721880555152893,grad_norm: 0.7194550926425722, iteration: 422966
loss: 1.0015769004821777,grad_norm: 0.9035004117663279, iteration: 422967
loss: 0.9890294671058655,grad_norm: 0.5832566786555021, iteration: 422968
loss: 0.9902375936508179,grad_norm: 0.8290054153765949, iteration: 422969
loss: 0.967821478843689,grad_norm: 0.785776410320746, iteration: 422970
loss: 0.9955217242240906,grad_norm: 0.8275351651831268, iteration: 422971
loss: 0.987952470779419,grad_norm: 0.8142869017542607, iteration: 422972
loss: 1.0642260313034058,grad_norm: 0.8098169302222198, iteration: 422973
loss: 0.9882264137268066,grad_norm: 0.7673788160517407, iteration: 422974
loss: 0.9900748133659363,grad_norm: 0.8423195873140317, iteration: 422975
loss: 0.9785246253013611,grad_norm: 0.8295710785149318, iteration: 422976
loss: 0.9951000213623047,grad_norm: 0.6990829339787205, iteration: 422977
loss: 0.9852036237716675,grad_norm: 0.9110895311559966, iteration: 422978
loss: 0.9859602451324463,grad_norm: 0.656523668353804, iteration: 422979
loss: 0.992312490940094,grad_norm: 0.8834745424801406, iteration: 422980
loss: 1.01523756980896,grad_norm: 0.8212356392721966, iteration: 422981
loss: 1.0559887886047363,grad_norm: 0.9999993124666664, iteration: 422982
loss: 0.9946346879005432,grad_norm: 0.7664043535809772, iteration: 422983
loss: 0.9841032028198242,grad_norm: 0.7423669553550158, iteration: 422984
loss: 0.9972804188728333,grad_norm: 0.7524176721130073, iteration: 422985
loss: 0.9915190935134888,grad_norm: 0.9999998738337479, iteration: 422986
loss: 1.118053674697876,grad_norm: 0.9690078517368624, iteration: 422987
loss: 1.0229012966156006,grad_norm: 0.8834705931833414, iteration: 422988
loss: 0.9670557975769043,grad_norm: 0.6860306059935602, iteration: 422989
loss: 0.9817057251930237,grad_norm: 0.870606777903454, iteration: 422990
loss: 1.0474562644958496,grad_norm: 0.9999990852998624, iteration: 422991
loss: 0.9937385320663452,grad_norm: 0.9999989310420213, iteration: 422992
loss: 1.0129601955413818,grad_norm: 0.7238083058455743, iteration: 422993
loss: 1.0135605335235596,grad_norm: 0.8195703670160782, iteration: 422994
loss: 1.0059173107147217,grad_norm: 0.9999996838895152, iteration: 422995
loss: 0.9726716876029968,grad_norm: 0.766535152519238, iteration: 422996
loss: 1.0515278577804565,grad_norm: 0.7922239219914344, iteration: 422997
loss: 0.9920909404754639,grad_norm: 0.7710836913457566, iteration: 422998
loss: 1.0170572996139526,grad_norm: 0.8398379578482409, iteration: 422999
loss: 1.0035390853881836,grad_norm: 0.787599129493774, iteration: 423000
loss: 0.9726846218109131,grad_norm: 0.6878944968437497, iteration: 423001
loss: 0.9717545509338379,grad_norm: 0.8797435358805468, iteration: 423002
loss: 0.9921713471412659,grad_norm: 0.8987666774785255, iteration: 423003
loss: 1.0824000835418701,grad_norm: 0.9999991791836012, iteration: 423004
loss: 1.0235165357589722,grad_norm: 0.7192791355127774, iteration: 423005
loss: 0.9611904621124268,grad_norm: 0.6865787674815824, iteration: 423006
loss: 1.021537184715271,grad_norm: 0.9791709695589, iteration: 423007
loss: 0.9875290393829346,grad_norm: 0.9999994183505297, iteration: 423008
loss: 1.0587232112884521,grad_norm: 0.999999438028442, iteration: 423009
loss: 1.0700076818466187,grad_norm: 0.7140877890395457, iteration: 423010
loss: 0.9861669540405273,grad_norm: 0.8606290237126671, iteration: 423011
loss: 1.0081485509872437,grad_norm: 0.8773483360805723, iteration: 423012
loss: 1.0075905323028564,grad_norm: 0.9980556543226019, iteration: 423013
loss: 1.099915862083435,grad_norm: 0.9999998650166465, iteration: 423014
loss: 1.0297499895095825,grad_norm: 0.6446976243801226, iteration: 423015
loss: 0.9974151253700256,grad_norm: 0.7858600531319198, iteration: 423016
loss: 0.9793861508369446,grad_norm: 0.9408373422563392, iteration: 423017
loss: 0.977949857711792,grad_norm: 0.6820445150577185, iteration: 423018
loss: 1.1941730976104736,grad_norm: 0.9999994633338668, iteration: 423019
loss: 1.0171139240264893,grad_norm: 0.6815186966011451, iteration: 423020
loss: 0.9928861260414124,grad_norm: 0.7130708929318387, iteration: 423021
loss: 0.9508117437362671,grad_norm: 0.7332689055127496, iteration: 423022
loss: 1.0295335054397583,grad_norm: 0.721881991567696, iteration: 423023
loss: 0.9834303259849548,grad_norm: 0.8055610811827439, iteration: 423024
loss: 1.014381766319275,grad_norm: 1.0000000079515505, iteration: 423025
loss: 1.0018130540847778,grad_norm: 0.9999995023010267, iteration: 423026
loss: 0.9978829026222229,grad_norm: 0.7773249264310147, iteration: 423027
loss: 1.0041412115097046,grad_norm: 0.8583785174784094, iteration: 423028
loss: 0.9859464168548584,grad_norm: 0.8220103601561394, iteration: 423029
loss: 1.034138560295105,grad_norm: 0.8283878359773449, iteration: 423030
loss: 1.0733921527862549,grad_norm: 0.999999002377975, iteration: 423031
loss: 1.0363078117370605,grad_norm: 0.8764509422503791, iteration: 423032
loss: 1.001194953918457,grad_norm: 0.7960976098029728, iteration: 423033
loss: 0.9948492646217346,grad_norm: 1.0000000086651935, iteration: 423034
loss: 1.0080347061157227,grad_norm: 0.8473717612741551, iteration: 423035
loss: 0.970487117767334,grad_norm: 0.7253405303453814, iteration: 423036
loss: 1.0020408630371094,grad_norm: 0.8832584144007383, iteration: 423037
loss: 0.9716770052909851,grad_norm: 0.6767725746023318, iteration: 423038
loss: 0.9899657368659973,grad_norm: 0.8651512650574947, iteration: 423039
loss: 0.9803146719932556,grad_norm: 0.7749831229499001, iteration: 423040
loss: 0.9847548007965088,grad_norm: 0.673517427783369, iteration: 423041
loss: 0.9946186542510986,grad_norm: 0.8766154694750155, iteration: 423042
loss: 1.0098155736923218,grad_norm: 0.8560754592419922, iteration: 423043
loss: 1.0216732025146484,grad_norm: 0.8211391129101522, iteration: 423044
loss: 1.0562700033187866,grad_norm: 0.958878582356906, iteration: 423045
loss: 1.04975163936615,grad_norm: 0.8499593995184902, iteration: 423046
loss: 0.9909808039665222,grad_norm: 0.9443629261979722, iteration: 423047
loss: 1.0504707098007202,grad_norm: 0.7890980561891295, iteration: 423048
loss: 1.0333073139190674,grad_norm: 0.9999991667964038, iteration: 423049
loss: 1.0012445449829102,grad_norm: 0.747353786631027, iteration: 423050
loss: 0.9718618988990784,grad_norm: 0.6760851180073724, iteration: 423051
loss: 1.0131598711013794,grad_norm: 0.7694884882969192, iteration: 423052
loss: 1.1229784488677979,grad_norm: 0.99999961323062, iteration: 423053
loss: 1.0046290159225464,grad_norm: 0.8292480750750486, iteration: 423054
loss: 1.0082876682281494,grad_norm: 0.9117204348476976, iteration: 423055
loss: 1.0434726476669312,grad_norm: 0.9999991739507474, iteration: 423056
loss: 1.0243052244186401,grad_norm: 0.7798862331999403, iteration: 423057
loss: 0.9707517027854919,grad_norm: 0.9444711222424078, iteration: 423058
loss: 0.990639865398407,grad_norm: 0.683848685740075, iteration: 423059
loss: 1.0045123100280762,grad_norm: 0.8403910701292568, iteration: 423060
loss: 1.0183061361312866,grad_norm: 0.8322649637457139, iteration: 423061
loss: 1.0166290998458862,grad_norm: 0.7775143496974207, iteration: 423062
loss: 1.0071698427200317,grad_norm: 0.9624919793647821, iteration: 423063
loss: 1.0935676097869873,grad_norm: 0.7610488972030027, iteration: 423064
loss: 0.9835352301597595,grad_norm: 0.7558554525064726, iteration: 423065
loss: 1.009259819984436,grad_norm: 0.8680847146413048, iteration: 423066
loss: 0.9549441933631897,grad_norm: 0.6580580074628365, iteration: 423067
loss: 1.0122603178024292,grad_norm: 0.7244134929530381, iteration: 423068
loss: 0.9817419052124023,grad_norm: 0.9999998802638459, iteration: 423069
loss: 1.0216525793075562,grad_norm: 0.8146495131202454, iteration: 423070
loss: 0.9872435927391052,grad_norm: 0.7221959460429465, iteration: 423071
loss: 1.0046800374984741,grad_norm: 0.6950421368541078, iteration: 423072
loss: 0.9683701992034912,grad_norm: 0.6159407629548188, iteration: 423073
loss: 0.9750069379806519,grad_norm: 0.8141736705338499, iteration: 423074
loss: 1.044884204864502,grad_norm: 0.8845797873948451, iteration: 423075
loss: 0.9477299451828003,grad_norm: 0.8443804363003833, iteration: 423076
loss: 0.9901860952377319,grad_norm: 0.8588295032882537, iteration: 423077
loss: 1.000329613685608,grad_norm: 0.6659790271118036, iteration: 423078
loss: 1.0217399597167969,grad_norm: 0.6304372828365084, iteration: 423079
loss: 1.0268020629882812,grad_norm: 0.8137183234950024, iteration: 423080
loss: 1.0207185745239258,grad_norm: 0.7288197114598015, iteration: 423081
loss: 1.0781000852584839,grad_norm: 0.9999991944126638, iteration: 423082
loss: 0.9857405424118042,grad_norm: 0.7946056966505693, iteration: 423083
loss: 1.002974033355713,grad_norm: 0.7906615933004256, iteration: 423084
loss: 1.0097991228103638,grad_norm: 0.9053512749134434, iteration: 423085
loss: 1.036049246788025,grad_norm: 0.8680222013261122, iteration: 423086
loss: 1.0043140649795532,grad_norm: 0.7535251976760139, iteration: 423087
loss: 0.9837632775306702,grad_norm: 0.7250150546802342, iteration: 423088
loss: 0.9895909428596497,grad_norm: 0.7528986134806386, iteration: 423089
loss: 1.0448951721191406,grad_norm: 0.9999998014055594, iteration: 423090
loss: 0.985840380191803,grad_norm: 0.8568380850015207, iteration: 423091
loss: 0.9966616630554199,grad_norm: 0.9618888168831446, iteration: 423092
loss: 1.0189789533615112,grad_norm: 0.7609108948895117, iteration: 423093
loss: 0.9327553510665894,grad_norm: 0.7355739220533579, iteration: 423094
loss: 1.0055181980133057,grad_norm: 0.8361463897949194, iteration: 423095
loss: 0.995724081993103,grad_norm: 0.7850332618891682, iteration: 423096
loss: 1.0235166549682617,grad_norm: 0.917806423823938, iteration: 423097
loss: 0.9804892539978027,grad_norm: 0.7798884637346601, iteration: 423098
loss: 1.0008649826049805,grad_norm: 0.7513031155489447, iteration: 423099
loss: 1.0067198276519775,grad_norm: 0.7908357726403922, iteration: 423100
loss: 0.9639554619789124,grad_norm: 0.8378174373771944, iteration: 423101
loss: 1.0018857717514038,grad_norm: 0.8722694432919949, iteration: 423102
loss: 1.0137593746185303,grad_norm: 0.7738840215876269, iteration: 423103
loss: 0.9843652844429016,grad_norm: 0.8044687395537858, iteration: 423104
loss: 1.0055675506591797,grad_norm: 0.7723507985934607, iteration: 423105
loss: 0.9842686057090759,grad_norm: 0.9999992348381461, iteration: 423106
loss: 1.0066241025924683,grad_norm: 0.6895111515761013, iteration: 423107
loss: 1.0200724601745605,grad_norm: 0.999999369648628, iteration: 423108
loss: 1.0218887329101562,grad_norm: 0.808679808806235, iteration: 423109
loss: 1.0032603740692139,grad_norm: 0.7781879621340818, iteration: 423110
loss: 1.0177583694458008,grad_norm: 0.7045785775531387, iteration: 423111
loss: 1.0265496969223022,grad_norm: 0.8207849222072205, iteration: 423112
loss: 1.0787696838378906,grad_norm: 0.9435615482814022, iteration: 423113
loss: 1.004370093345642,grad_norm: 0.9185010080636101, iteration: 423114
loss: 1.005197525024414,grad_norm: 0.7078567458145224, iteration: 423115
loss: 0.9704971313476562,grad_norm: 0.746157424450843, iteration: 423116
loss: 1.0120891332626343,grad_norm: 0.8481520095457132, iteration: 423117
loss: 1.0144425630569458,grad_norm: 0.9999995716560823, iteration: 423118
loss: 0.950384259223938,grad_norm: 0.7254787230858334, iteration: 423119
loss: 1.0186210870742798,grad_norm: 0.9999995185431939, iteration: 423120
loss: 0.9865097403526306,grad_norm: 0.6678342987356162, iteration: 423121
loss: 1.051298975944519,grad_norm: 0.9710621858196088, iteration: 423122
loss: 1.0326902866363525,grad_norm: 0.8535574786819208, iteration: 423123
loss: 0.985113263130188,grad_norm: 0.767559672030145, iteration: 423124
loss: 0.993764340877533,grad_norm: 0.8132609170590206, iteration: 423125
loss: 1.0079855918884277,grad_norm: 0.7763161577934121, iteration: 423126
loss: 0.9776442646980286,grad_norm: 0.6739254330414202, iteration: 423127
loss: 0.9936266541481018,grad_norm: 0.8064815022647511, iteration: 423128
loss: 1.0080375671386719,grad_norm: 0.71776462971992, iteration: 423129
loss: 0.9655338525772095,grad_norm: 0.8879749232504683, iteration: 423130
loss: 0.9819633960723877,grad_norm: 0.8836852062290902, iteration: 423131
loss: 0.9906113743782043,grad_norm: 0.8252389258104857, iteration: 423132
loss: 1.0023517608642578,grad_norm: 0.7031212607254052, iteration: 423133
loss: 1.0035626888275146,grad_norm: 0.7356554113050288, iteration: 423134
loss: 1.0766444206237793,grad_norm: 0.8123421700627049, iteration: 423135
loss: 0.9834689497947693,grad_norm: 0.7977983465805601, iteration: 423136
loss: 0.9713780879974365,grad_norm: 0.6905972395769446, iteration: 423137
loss: 1.015955924987793,grad_norm: 0.8367902863809555, iteration: 423138
loss: 1.0465878248214722,grad_norm: 0.7747438884909198, iteration: 423139
loss: 0.9730270504951477,grad_norm: 0.8778572479158654, iteration: 423140
loss: 1.015442967414856,grad_norm: 0.7339229447200397, iteration: 423141
loss: 1.0252193212509155,grad_norm: 0.9387509215369246, iteration: 423142
loss: 1.0225515365600586,grad_norm: 0.8954105014291192, iteration: 423143
loss: 1.022972822189331,grad_norm: 0.9169553555995003, iteration: 423144
loss: 1.1266200542449951,grad_norm: 0.9605335905109363, iteration: 423145
loss: 1.0190470218658447,grad_norm: 0.9999996378623007, iteration: 423146
loss: 1.0089505910873413,grad_norm: 0.8817114420715185, iteration: 423147
loss: 1.0047980546951294,grad_norm: 0.8271078950219949, iteration: 423148
loss: 0.9885828495025635,grad_norm: 0.7474170678048981, iteration: 423149
loss: 1.0173603296279907,grad_norm: 0.827964382450026, iteration: 423150
loss: 1.0109424591064453,grad_norm: 0.653584821762087, iteration: 423151
loss: 1.0054315328598022,grad_norm: 0.6342414186071589, iteration: 423152
loss: 1.1007575988769531,grad_norm: 0.8882605614000482, iteration: 423153
loss: 1.0006638765335083,grad_norm: 0.8630565410975686, iteration: 423154
loss: 0.9746339321136475,grad_norm: 0.6516467416174505, iteration: 423155
loss: 0.9958309531211853,grad_norm: 0.9999999173763269, iteration: 423156
loss: 0.984684944152832,grad_norm: 0.630358763318513, iteration: 423157
loss: 0.9967058300971985,grad_norm: 0.7010140197711631, iteration: 423158
loss: 0.9595370292663574,grad_norm: 0.81789852979753, iteration: 423159
loss: 1.0858970880508423,grad_norm: 0.999999449783227, iteration: 423160
loss: 1.0353550910949707,grad_norm: 0.841306909845558, iteration: 423161
loss: 0.9919405579566956,grad_norm: 0.8318408040105023, iteration: 423162
loss: 1.0084441900253296,grad_norm: 0.712732786441876, iteration: 423163
loss: 1.0003381967544556,grad_norm: 0.6483401895022282, iteration: 423164
loss: 0.9973931908607483,grad_norm: 0.7175407780929837, iteration: 423165
loss: 0.993312418460846,grad_norm: 0.850390948597782, iteration: 423166
loss: 1.015062689781189,grad_norm: 0.8263233220000168, iteration: 423167
loss: 1.0884648561477661,grad_norm: 0.999999851883384, iteration: 423168
loss: 0.9895704388618469,grad_norm: 0.8325669180647286, iteration: 423169
loss: 0.9656544923782349,grad_norm: 0.6822732273955192, iteration: 423170
loss: 1.168440580368042,grad_norm: 0.9999995752576861, iteration: 423171
loss: 1.0230491161346436,grad_norm: 0.9999995732748409, iteration: 423172
loss: 1.0335915088653564,grad_norm: 0.7981902626614575, iteration: 423173
loss: 0.9893921613693237,grad_norm: 0.7421999643374925, iteration: 423174
loss: 1.0264371633529663,grad_norm: 0.9842541585892448, iteration: 423175
loss: 1.1024833917617798,grad_norm: 0.999999131131351, iteration: 423176
loss: 1.0086342096328735,grad_norm: 0.9999998581104631, iteration: 423177
loss: 0.9914485812187195,grad_norm: 0.6794076904140322, iteration: 423178
loss: 1.0890291929244995,grad_norm: 0.9999998383174933, iteration: 423179
loss: 0.9708089232444763,grad_norm: 0.8104110900084247, iteration: 423180
loss: 1.030784010887146,grad_norm: 0.9999991026900525, iteration: 423181
loss: 1.0181411504745483,grad_norm: 0.84932924362397, iteration: 423182
loss: 1.0174788236618042,grad_norm: 0.7075818651768448, iteration: 423183
loss: 1.0141979455947876,grad_norm: 0.7158143331531176, iteration: 423184
loss: 0.9897361397743225,grad_norm: 0.9208537346648403, iteration: 423185
loss: 1.038996696472168,grad_norm: 0.9999995906285307, iteration: 423186
loss: 0.9802058339118958,grad_norm: 0.6659687525403022, iteration: 423187
loss: 1.0258876085281372,grad_norm: 0.7123635892665778, iteration: 423188
loss: 0.9964641332626343,grad_norm: 0.7906394903841867, iteration: 423189
loss: 0.9836433529853821,grad_norm: 0.8161947445685585, iteration: 423190
loss: 0.9485071897506714,grad_norm: 0.8043977238822878, iteration: 423191
loss: 0.9894694089889526,grad_norm: 0.7700566522463507, iteration: 423192
loss: 1.0148507356643677,grad_norm: 0.6719604928714431, iteration: 423193
loss: 0.978990375995636,grad_norm: 0.7728516742865378, iteration: 423194
loss: 1.072920560836792,grad_norm: 0.8814570775000036, iteration: 423195
loss: 0.9921912550926208,grad_norm: 0.7837025941602617, iteration: 423196
loss: 1.0769966840744019,grad_norm: 0.9082334081280768, iteration: 423197
loss: 0.992740273475647,grad_norm: 0.7697597349999656, iteration: 423198
loss: 1.0377596616744995,grad_norm: 0.9999997997051078, iteration: 423199
loss: 1.0069690942764282,grad_norm: 0.8208062754419209, iteration: 423200
loss: 1.0738937854766846,grad_norm: 0.7846057848427616, iteration: 423201
loss: 1.0349372625350952,grad_norm: 0.8246776648763866, iteration: 423202
loss: 1.0129191875457764,grad_norm: 0.7057380467421437, iteration: 423203
loss: 1.0310814380645752,grad_norm: 0.7880459387984747, iteration: 423204
loss: 0.9915984869003296,grad_norm: 0.6878027189944076, iteration: 423205
loss: 1.0415400266647339,grad_norm: 0.9999999093894777, iteration: 423206
loss: 0.9772067070007324,grad_norm: 0.7565758051146255, iteration: 423207
loss: 1.0271309614181519,grad_norm: 0.7971279577527736, iteration: 423208
loss: 1.02182137966156,grad_norm: 0.7036734916112828, iteration: 423209
loss: 1.0347988605499268,grad_norm: 0.7570313081083481, iteration: 423210
loss: 1.047869086265564,grad_norm: 0.8488585153181918, iteration: 423211
loss: 1.0115692615509033,grad_norm: 0.7992625067574989, iteration: 423212
loss: 1.0038337707519531,grad_norm: 0.7256001404482072, iteration: 423213
loss: 0.9821381568908691,grad_norm: 0.7135211389887491, iteration: 423214
loss: 1.0008233785629272,grad_norm: 0.8284953634233108, iteration: 423215
loss: 1.0873907804489136,grad_norm: 0.9446254835913845, iteration: 423216
loss: 0.967689037322998,grad_norm: 0.7690767110768107, iteration: 423217
loss: 1.0362547636032104,grad_norm: 0.7485908746906981, iteration: 423218
loss: 0.999161422252655,grad_norm: 0.7957146668296761, iteration: 423219
loss: 1.0189812183380127,grad_norm: 0.7835353762105629, iteration: 423220
loss: 0.9799894094467163,grad_norm: 0.6714177375614656, iteration: 423221
loss: 1.0195473432540894,grad_norm: 0.8669609268190378, iteration: 423222
loss: 1.0257295370101929,grad_norm: 0.7039956404008921, iteration: 423223
loss: 0.9945054054260254,grad_norm: 0.7328688980878771, iteration: 423224
loss: 0.9737710356712341,grad_norm: 0.7120960280564127, iteration: 423225
loss: 0.9820541739463806,grad_norm: 0.7035599492463379, iteration: 423226
loss: 1.0045664310455322,grad_norm: 0.8252280488491675, iteration: 423227
loss: 1.0013937950134277,grad_norm: 0.8748717972168026, iteration: 423228
loss: 0.9643382430076599,grad_norm: 0.738985869566043, iteration: 423229
loss: 1.040168285369873,grad_norm: 0.7064409312042981, iteration: 423230
loss: 1.001663327217102,grad_norm: 0.7268613412928254, iteration: 423231
loss: 1.0541157722473145,grad_norm: 0.9999996207535296, iteration: 423232
loss: 0.9891607165336609,grad_norm: 0.7835846477503339, iteration: 423233
loss: 1.0265511274337769,grad_norm: 0.9999995579738326, iteration: 423234
loss: 1.0349507331848145,grad_norm: 0.9999994195009492, iteration: 423235
loss: 1.0467790365219116,grad_norm: 0.8857056011884973, iteration: 423236
loss: 1.0080618858337402,grad_norm: 0.6918029309217403, iteration: 423237
loss: 1.038323163986206,grad_norm: 0.8499911535196398, iteration: 423238
loss: 0.996921718120575,grad_norm: 0.8176756156901911, iteration: 423239
loss: 1.0004528760910034,grad_norm: 0.6970387439594177, iteration: 423240
loss: 1.0637348890304565,grad_norm: 0.9999993136912709, iteration: 423241
loss: 0.9809304475784302,grad_norm: 0.8686076133740528, iteration: 423242
loss: 0.9966564774513245,grad_norm: 0.6876098839568485, iteration: 423243
loss: 0.9876213669776917,grad_norm: 0.6725448032872761, iteration: 423244
loss: 1.0172507762908936,grad_norm: 0.7704129866024102, iteration: 423245
loss: 1.0985137224197388,grad_norm: 0.9518443184157573, iteration: 423246
loss: 0.9878867864608765,grad_norm: 0.9475444088098991, iteration: 423247
loss: 1.0438964366912842,grad_norm: 0.9999995892095989, iteration: 423248
loss: 1.0347787141799927,grad_norm: 1.0000001064512327, iteration: 423249
loss: 1.0025532245635986,grad_norm: 0.720779317487001, iteration: 423250
loss: 1.002845287322998,grad_norm: 0.7440766078588982, iteration: 423251
loss: 1.0144199132919312,grad_norm: 0.7798457428926259, iteration: 423252
loss: 0.9949195981025696,grad_norm: 0.6883171266907102, iteration: 423253
loss: 1.09075129032135,grad_norm: 0.9291697157888659, iteration: 423254
loss: 1.0006977319717407,grad_norm: 0.875948308150334, iteration: 423255
loss: 1.0352815389633179,grad_norm: 0.7543099897053649, iteration: 423256
loss: 0.9989402890205383,grad_norm: 0.7520527268597085, iteration: 423257
loss: 0.9958137273788452,grad_norm: 0.7784557115885535, iteration: 423258
loss: 1.026587724685669,grad_norm: 0.8254808921651716, iteration: 423259
loss: 1.0006144046783447,grad_norm: 0.7724278479301437, iteration: 423260
loss: 0.9981529712677002,grad_norm: 0.7919878374977345, iteration: 423261
loss: 1.0077104568481445,grad_norm: 0.8226448718651328, iteration: 423262
loss: 1.009354829788208,grad_norm: 0.836463940704299, iteration: 423263
loss: 0.996776819229126,grad_norm: 0.9181983370564512, iteration: 423264
loss: 1.034419298171997,grad_norm: 0.9999993326269149, iteration: 423265
loss: 1.125663161277771,grad_norm: 0.9112481253610664, iteration: 423266
loss: 1.0205109119415283,grad_norm: 0.8133466365955899, iteration: 423267
loss: 1.0209553241729736,grad_norm: 0.9999993433329584, iteration: 423268
loss: 0.9989969730377197,grad_norm: 0.681570431158405, iteration: 423269
loss: 0.9897997975349426,grad_norm: 0.7306785167483456, iteration: 423270
loss: 0.9731423258781433,grad_norm: 0.8114635084297125, iteration: 423271
loss: 1.0010452270507812,grad_norm: 0.9580175679434477, iteration: 423272
loss: 0.94305819272995,grad_norm: 0.7078544224908663, iteration: 423273
loss: 1.0086604356765747,grad_norm: 0.6784912114736366, iteration: 423274
loss: 1.0152860879898071,grad_norm: 0.6613022509313841, iteration: 423275
loss: 1.0160164833068848,grad_norm: 0.6886524926127159, iteration: 423276
loss: 1.0003018379211426,grad_norm: 0.6794690431423195, iteration: 423277
loss: 1.0306740999221802,grad_norm: 0.8180244184922827, iteration: 423278
loss: 1.0033903121948242,grad_norm: 0.774212891977369, iteration: 423279
loss: 1.048488974571228,grad_norm: 0.6945573002334227, iteration: 423280
loss: 0.9711171984672546,grad_norm: 0.719259705022517, iteration: 423281
loss: 0.9914698600769043,grad_norm: 0.7137252512754544, iteration: 423282
loss: 1.0206599235534668,grad_norm: 0.7006072561574226, iteration: 423283
loss: 0.9936451315879822,grad_norm: 0.921115082136507, iteration: 423284
loss: 1.0268452167510986,grad_norm: 0.793723285774471, iteration: 423285
loss: 1.0499392747879028,grad_norm: 0.8551494753963774, iteration: 423286
loss: 1.1221920251846313,grad_norm: 0.9999990359828288, iteration: 423287
loss: 1.0479652881622314,grad_norm: 0.7307748621783221, iteration: 423288
loss: 1.0272630453109741,grad_norm: 0.9999991122274292, iteration: 423289
loss: 0.9712439179420471,grad_norm: 0.6397129309672316, iteration: 423290
loss: 0.9881514310836792,grad_norm: 0.9999991691177409, iteration: 423291
loss: 1.0043308734893799,grad_norm: 0.7257866243543611, iteration: 423292
loss: 1.0105137825012207,grad_norm: 0.9999991544332989, iteration: 423293
loss: 0.9928767085075378,grad_norm: 0.8318102094889022, iteration: 423294
loss: 1.0139598846435547,grad_norm: 1.000000016903409, iteration: 423295
loss: 0.9959691762924194,grad_norm: 0.6792636531733035, iteration: 423296
loss: 1.000087857246399,grad_norm: 0.9574511024861241, iteration: 423297
loss: 1.0758739709854126,grad_norm: 0.9999992280993437, iteration: 423298
loss: 1.0746943950653076,grad_norm: 0.9999995435256132, iteration: 423299
loss: 0.9893701076507568,grad_norm: 0.6848726771422606, iteration: 423300
loss: 1.0563117265701294,grad_norm: 0.9999993079629536, iteration: 423301
loss: 0.9823235869407654,grad_norm: 0.7307833566439531, iteration: 423302
loss: 1.0400675535202026,grad_norm: 0.8230877073766695, iteration: 423303
loss: 0.9952085018157959,grad_norm: 0.772304938863463, iteration: 423304
loss: 0.9927544593811035,grad_norm: 0.8291166950089851, iteration: 423305
loss: 1.0138522386550903,grad_norm: 0.9999993875565936, iteration: 423306
loss: 1.0173709392547607,grad_norm: 0.6985580378031025, iteration: 423307
loss: 1.0096731185913086,grad_norm: 0.8944458313586927, iteration: 423308
loss: 1.0417852401733398,grad_norm: 0.8251462515813023, iteration: 423309
loss: 1.0206066370010376,grad_norm: 0.9153101809857516, iteration: 423310
loss: 1.0000442266464233,grad_norm: 0.8985541627782205, iteration: 423311
loss: 1.0206583738327026,grad_norm: 0.6927707859305228, iteration: 423312
loss: 1.0057353973388672,grad_norm: 0.8347651523091678, iteration: 423313
loss: 0.9617587924003601,grad_norm: 0.7196050283188332, iteration: 423314
loss: 1.005040168762207,grad_norm: 0.7187251903620416, iteration: 423315
loss: 0.9802660942077637,grad_norm: 0.6704403106491047, iteration: 423316
loss: 1.0168129205703735,grad_norm: 0.9999996156879353, iteration: 423317
loss: 1.00261652469635,grad_norm: 0.8556166269417492, iteration: 423318
loss: 1.1836897134780884,grad_norm: 0.9999999465528971, iteration: 423319
loss: 1.061653733253479,grad_norm: 0.9014707549448768, iteration: 423320
loss: 1.0246447324752808,grad_norm: 0.720118897517324, iteration: 423321
loss: 1.010216474533081,grad_norm: 0.8096967138062455, iteration: 423322
loss: 1.013865351676941,grad_norm: 0.8846949538221623, iteration: 423323
loss: 0.9932869076728821,grad_norm: 0.8643900840942813, iteration: 423324
loss: 1.0316660404205322,grad_norm: 0.7195330745451939, iteration: 423325
loss: 0.9822829961776733,grad_norm: 0.8197770938539041, iteration: 423326
loss: 1.0639560222625732,grad_norm: 0.9999995064842923, iteration: 423327
loss: 1.0353055000305176,grad_norm: 0.8683968308800234, iteration: 423328
loss: 1.0262783765792847,grad_norm: 0.9999991033178824, iteration: 423329
loss: 0.9962988495826721,grad_norm: 0.9999991050899003, iteration: 423330
loss: 1.0939091444015503,grad_norm: 0.9999991957819173, iteration: 423331
loss: 0.9982345700263977,grad_norm: 0.8462324313401562, iteration: 423332
loss: 0.9665427803993225,grad_norm: 0.9999989691152084, iteration: 423333
loss: 0.9994794726371765,grad_norm: 0.9999996313334031, iteration: 423334
loss: 0.9958487153053284,grad_norm: 0.8661502802686258, iteration: 423335
loss: 0.9672508239746094,grad_norm: 0.8031572849393569, iteration: 423336
loss: 1.0159767866134644,grad_norm: 0.9999999727490793, iteration: 423337
loss: 0.9972624778747559,grad_norm: 0.9999990727471988, iteration: 423338
loss: 0.9829621315002441,grad_norm: 0.7510709598322215, iteration: 423339
loss: 1.0111039876937866,grad_norm: 0.7814262150689942, iteration: 423340
loss: 1.008658766746521,grad_norm: 0.9999999480096746, iteration: 423341
loss: 1.111914873123169,grad_norm: 0.9999999184723786, iteration: 423342
loss: 0.9460821151733398,grad_norm: 0.695271208548565, iteration: 423343
loss: 1.015106201171875,grad_norm: 0.7576324298947661, iteration: 423344
loss: 0.9867361783981323,grad_norm: 0.8063318945259575, iteration: 423345
loss: 0.9998170733451843,grad_norm: 0.7448057767258829, iteration: 423346
loss: 0.9575698971748352,grad_norm: 0.810790312491786, iteration: 423347
loss: 1.0090363025665283,grad_norm: 0.648789107177403, iteration: 423348
loss: 1.010015606880188,grad_norm: 0.9999992266895718, iteration: 423349
loss: 0.9887946248054504,grad_norm: 0.9999993017642352, iteration: 423350
loss: 1.016981840133667,grad_norm: 0.9999991325651058, iteration: 423351
loss: 0.9659627676010132,grad_norm: 0.8551576542734252, iteration: 423352
loss: 0.9864646792411804,grad_norm: 0.7480231059074907, iteration: 423353
loss: 0.9726055860519409,grad_norm: 0.7705642752775165, iteration: 423354
loss: 1.0337711572647095,grad_norm: 0.9999999209381596, iteration: 423355
loss: 1.0172357559204102,grad_norm: 0.7774469021350232, iteration: 423356
loss: 1.046953558921814,grad_norm: 0.812146095427165, iteration: 423357
loss: 1.0078270435333252,grad_norm: 0.9999995536239203, iteration: 423358
loss: 0.987318754196167,grad_norm: 0.6389696625779872, iteration: 423359
loss: 1.105316400527954,grad_norm: 0.9447573980289131, iteration: 423360
loss: 0.9816092848777771,grad_norm: 0.6201676749490009, iteration: 423361
loss: 1.0125362873077393,grad_norm: 0.7698415567901242, iteration: 423362
loss: 0.9736097455024719,grad_norm: 0.797600840949829, iteration: 423363
loss: 0.9918332099914551,grad_norm: 0.6453214692271056, iteration: 423364
loss: 1.02392578125,grad_norm: 0.9029765521586727, iteration: 423365
loss: 1.0106432437896729,grad_norm: 0.7896622412989329, iteration: 423366
loss: 1.0231337547302246,grad_norm: 0.9999999791763606, iteration: 423367
loss: 0.9842102527618408,grad_norm: 0.7303601729308749, iteration: 423368
loss: 1.0090341567993164,grad_norm: 0.9999996190422253, iteration: 423369
loss: 1.0103996992111206,grad_norm: 0.9137135044235388, iteration: 423370
loss: 1.0023540258407593,grad_norm: 0.8311165972838656, iteration: 423371
loss: 0.9578902721405029,grad_norm: 0.7641434122410756, iteration: 423372
loss: 0.9933975338935852,grad_norm: 0.7188953714727969, iteration: 423373
loss: 0.9690908789634705,grad_norm: 0.717793344319111, iteration: 423374
loss: 1.0466699600219727,grad_norm: 0.8333392000336116, iteration: 423375
loss: 1.0197516679763794,grad_norm: 0.7216769017418452, iteration: 423376
loss: 1.0279256105422974,grad_norm: 0.9999993111229282, iteration: 423377
loss: 0.9765303730964661,grad_norm: 0.8878600673527535, iteration: 423378
loss: 1.0351365804672241,grad_norm: 0.8861819917239461, iteration: 423379
loss: 0.98434978723526,grad_norm: 0.948525305086011, iteration: 423380
loss: 1.0287773609161377,grad_norm: 0.9999998162266233, iteration: 423381
loss: 0.9717621207237244,grad_norm: 0.7151403829939608, iteration: 423382
loss: 1.0022447109222412,grad_norm: 0.7531366701098641, iteration: 423383
loss: 0.9958673715591431,grad_norm: 0.7779144797560872, iteration: 423384
loss: 0.9847061634063721,grad_norm: 0.7945321363454758, iteration: 423385
loss: 0.9893019199371338,grad_norm: 0.7931622207327635, iteration: 423386
loss: 0.9852802753448486,grad_norm: 0.8612423576829701, iteration: 423387
loss: 1.011715292930603,grad_norm: 0.7303906348787051, iteration: 423388
loss: 0.9950097799301147,grad_norm: 0.8919509817649162, iteration: 423389
loss: 1.0067802667617798,grad_norm: 0.7447800411548127, iteration: 423390
loss: 0.9981060028076172,grad_norm: 0.8258607354485521, iteration: 423391
loss: 1.0375959873199463,grad_norm: 0.9999993255329882, iteration: 423392
loss: 1.0153237581253052,grad_norm: 0.7954169911645004, iteration: 423393
loss: 1.0060688257217407,grad_norm: 0.7712087960769418, iteration: 423394
loss: 1.0362451076507568,grad_norm: 0.828518403261134, iteration: 423395
loss: 1.0058636665344238,grad_norm: 0.712483166359525, iteration: 423396
loss: 1.0159372091293335,grad_norm: 0.8402582587379462, iteration: 423397
loss: 1.0019596815109253,grad_norm: 0.8526300095286012, iteration: 423398
loss: 0.9964679479598999,grad_norm: 0.7487459325258952, iteration: 423399
loss: 0.9896717071533203,grad_norm: 0.6827377085447238, iteration: 423400
loss: 0.9810067415237427,grad_norm: 0.7583224110138316, iteration: 423401
loss: 1.006361722946167,grad_norm: 0.7072084032070075, iteration: 423402
loss: 1.0079089403152466,grad_norm: 0.8654297903248372, iteration: 423403
loss: 0.9883192777633667,grad_norm: 0.6811666808535412, iteration: 423404
loss: 0.9852271676063538,grad_norm: 0.7962133204920575, iteration: 423405
loss: 0.9927465319633484,grad_norm: 0.7531910547689175, iteration: 423406
loss: 0.9955416917800903,grad_norm: 0.7217441526228178, iteration: 423407
loss: 0.9942381978034973,grad_norm: 0.9262965837726032, iteration: 423408
loss: 1.0132484436035156,grad_norm: 0.6849514840910645, iteration: 423409
loss: 1.0107228755950928,grad_norm: 0.9999993670749191, iteration: 423410
loss: 1.02532160282135,grad_norm: 0.8093934267021887, iteration: 423411
loss: 1.0014055967330933,grad_norm: 0.7918173225612966, iteration: 423412
loss: 1.004740834236145,grad_norm: 0.9203633379918409, iteration: 423413
loss: 0.9869697093963623,grad_norm: 0.8181678643554424, iteration: 423414
loss: 1.0572060346603394,grad_norm: 0.9999992407059878, iteration: 423415
loss: 0.9710325598716736,grad_norm: 0.898820130356533, iteration: 423416
loss: 1.0152556896209717,grad_norm: 0.8181209009467016, iteration: 423417
loss: 0.9781404733657837,grad_norm: 0.6737318211111103, iteration: 423418
loss: 0.9882978796958923,grad_norm: 0.7350788125473897, iteration: 423419
loss: 1.013146996498108,grad_norm: 0.8265986358824287, iteration: 423420
loss: 1.0024126768112183,grad_norm: 0.7527897494253037, iteration: 423421
loss: 1.0866529941558838,grad_norm: 0.8444407746664048, iteration: 423422
loss: 0.9543638229370117,grad_norm: 0.6294579677775227, iteration: 423423
loss: 1.0166428089141846,grad_norm: 0.8130171399223222, iteration: 423424
loss: 0.9435461163520813,grad_norm: 0.8944731720243948, iteration: 423425
loss: 1.0309083461761475,grad_norm: 0.6702891737983231, iteration: 423426
loss: 1.0427745580673218,grad_norm: 0.7793210237618112, iteration: 423427
loss: 1.0186673402786255,grad_norm: 0.8688414098106638, iteration: 423428
loss: 1.020437240600586,grad_norm: 0.9999990429870848, iteration: 423429
loss: 1.059321403503418,grad_norm: 0.898641698953887, iteration: 423430
loss: 0.9963644742965698,grad_norm: 0.9999992539917155, iteration: 423431
loss: 1.0582857131958008,grad_norm: 0.8737248120244329, iteration: 423432
loss: 0.9825527667999268,grad_norm: 0.7882500621253383, iteration: 423433
loss: 1.0668336153030396,grad_norm: 0.9999994521273708, iteration: 423434
loss: 1.0057755708694458,grad_norm: 0.8918602828683282, iteration: 423435
loss: 1.004285216331482,grad_norm: 0.8612506591411201, iteration: 423436
loss: 1.0105254650115967,grad_norm: 0.7675341481422431, iteration: 423437
loss: 1.03195321559906,grad_norm: 0.907750821547252, iteration: 423438
loss: 1.0490127801895142,grad_norm: 0.9999998106002871, iteration: 423439
loss: 1.0705097913742065,grad_norm: 0.9999993720978353, iteration: 423440
loss: 1.0622856616973877,grad_norm: 0.9999996388497119, iteration: 423441
loss: 1.0131546258926392,grad_norm: 0.7929347266821772, iteration: 423442
loss: 0.9924406409263611,grad_norm: 0.9999993408805913, iteration: 423443
loss: 1.020652413368225,grad_norm: 0.8520263202976598, iteration: 423444
loss: 1.03899085521698,grad_norm: 0.999999221362623, iteration: 423445
loss: 0.9987103939056396,grad_norm: 0.8882817538613373, iteration: 423446
loss: 0.9707908034324646,grad_norm: 0.7893127338414977, iteration: 423447
loss: 0.9751492142677307,grad_norm: 0.700536617617671, iteration: 423448
loss: 1.025191068649292,grad_norm: 0.7279626330905934, iteration: 423449
loss: 1.0602980852127075,grad_norm: 0.9999990682678107, iteration: 423450
loss: 1.0309444665908813,grad_norm: 0.8379165445882246, iteration: 423451
loss: 0.9852207899093628,grad_norm: 0.71744651702712, iteration: 423452
loss: 1.1216700077056885,grad_norm: 0.9999999116879459, iteration: 423453
loss: 0.9766895771026611,grad_norm: 0.7595507756587881, iteration: 423454
loss: 0.9929746389389038,grad_norm: 0.9999996997642893, iteration: 423455
loss: 1.006407380104065,grad_norm: 0.9314967183959569, iteration: 423456
loss: 1.0696438550949097,grad_norm: 0.7137656511699046, iteration: 423457
loss: 1.0332601070404053,grad_norm: 0.8501848382543775, iteration: 423458
loss: 1.075781226158142,grad_norm: 0.8398488415479874, iteration: 423459
loss: 0.9844407439231873,grad_norm: 0.9999999116428571, iteration: 423460
loss: 1.0313931703567505,grad_norm: 0.7127188565570041, iteration: 423461
loss: 0.9935654401779175,grad_norm: 0.8860317955295087, iteration: 423462
loss: 0.9741538763046265,grad_norm: 0.8340102583349582, iteration: 423463
loss: 0.9848082661628723,grad_norm: 0.6471876451839146, iteration: 423464
loss: 0.9966539144515991,grad_norm: 0.9999992221559514, iteration: 423465
loss: 0.9956578612327576,grad_norm: 0.7765125396829767, iteration: 423466
loss: 1.0175197124481201,grad_norm: 0.8895438441862759, iteration: 423467
loss: 1.0077202320098877,grad_norm: 0.7447838571073225, iteration: 423468
loss: 1.0490354299545288,grad_norm: 0.9999997558155196, iteration: 423469
loss: 0.9935626983642578,grad_norm: 0.8862273365792495, iteration: 423470
loss: 0.9902847409248352,grad_norm: 0.7969743386207951, iteration: 423471
loss: 0.9873780608177185,grad_norm: 0.7906426948962514, iteration: 423472
loss: 1.0167851448059082,grad_norm: 0.8251012831674132, iteration: 423473
loss: 0.9870460629463196,grad_norm: 0.8914032580015224, iteration: 423474
loss: 1.0030237436294556,grad_norm: 0.9043477519508399, iteration: 423475
loss: 1.0162665843963623,grad_norm: 0.8271392610758308, iteration: 423476
loss: 1.0161412954330444,grad_norm: 0.8109198304693639, iteration: 423477
loss: 1.0001397132873535,grad_norm: 0.7886735200147594, iteration: 423478
loss: 1.0484575033187866,grad_norm: 0.6693242189215718, iteration: 423479
loss: 0.9747592210769653,grad_norm: 0.8074646277370936, iteration: 423480
loss: 1.033453345298767,grad_norm: 0.8540148525562723, iteration: 423481
loss: 0.9748112559318542,grad_norm: 0.7477270209100213, iteration: 423482
loss: 1.050137996673584,grad_norm: 0.778724749337966, iteration: 423483
loss: 0.9641404151916504,grad_norm: 0.7540176886941874, iteration: 423484
loss: 0.9940580725669861,grad_norm: 0.8461130275996928, iteration: 423485
loss: 1.0199308395385742,grad_norm: 0.908572549322087, iteration: 423486
loss: 1.0335773229599,grad_norm: 0.7831224137772108, iteration: 423487
loss: 1.081551432609558,grad_norm: 0.9999996384320625, iteration: 423488
loss: 1.1277157068252563,grad_norm: 0.9999991839328134, iteration: 423489
loss: 1.0495778322219849,grad_norm: 0.6549901332103724, iteration: 423490
loss: 0.986564040184021,grad_norm: 0.9476339441122578, iteration: 423491
loss: 1.0375252962112427,grad_norm: 0.9999998051940121, iteration: 423492
loss: 1.0019736289978027,grad_norm: 0.7806640937350948, iteration: 423493
loss: 1.0623950958251953,grad_norm: 0.9999999675102618, iteration: 423494
loss: 1.0254319906234741,grad_norm: 0.953904100254318, iteration: 423495
loss: 0.9981339573860168,grad_norm: 0.7334866473401712, iteration: 423496
loss: 0.9708045125007629,grad_norm: 0.6980314684672495, iteration: 423497
loss: 1.0198369026184082,grad_norm: 0.9999994267265003, iteration: 423498
loss: 0.9566311836242676,grad_norm: 0.9122322429934033, iteration: 423499
loss: 1.0087971687316895,grad_norm: 0.8594489126220534, iteration: 423500
loss: 1.0996778011322021,grad_norm: 0.9013561362683528, iteration: 423501
loss: 1.0082138776779175,grad_norm: 0.8470845163307926, iteration: 423502
loss: 0.974555253982544,grad_norm: 0.9254573341795376, iteration: 423503
loss: 1.0146520137786865,grad_norm: 0.999999691842818, iteration: 423504
loss: 1.040250301361084,grad_norm: 0.9999993106418789, iteration: 423505
loss: 0.98747718334198,grad_norm: 0.6134997920237795, iteration: 423506
loss: 0.9526422023773193,grad_norm: 0.8238568876657137, iteration: 423507
loss: 1.0161378383636475,grad_norm: 0.7365241804727735, iteration: 423508
loss: 1.0179381370544434,grad_norm: 0.7332145160658347, iteration: 423509
loss: 1.0240734815597534,grad_norm: 0.9999999703539789, iteration: 423510
loss: 0.9957689046859741,grad_norm: 0.97793319525643, iteration: 423511
loss: 1.0033986568450928,grad_norm: 0.8610295284759927, iteration: 423512
loss: 1.0451940298080444,grad_norm: 0.9999998284341346, iteration: 423513
loss: 1.0459619760513306,grad_norm: 0.9999999472909444, iteration: 423514
loss: 0.9449672102928162,grad_norm: 0.8285553449974478, iteration: 423515
loss: 0.9985981583595276,grad_norm: 0.7115960640798511, iteration: 423516
loss: 0.9963862895965576,grad_norm: 0.7227052824543447, iteration: 423517
loss: 1.1105180978775024,grad_norm: 0.9999999758632273, iteration: 423518
loss: 0.9984756112098694,grad_norm: 0.6830236962430102, iteration: 423519
loss: 1.0252952575683594,grad_norm: 0.9999990988497237, iteration: 423520
loss: 1.0912853479385376,grad_norm: 0.9399370784312829, iteration: 423521
loss: 1.0447031259536743,grad_norm: 0.8157487834071858, iteration: 423522
loss: 0.9631773829460144,grad_norm: 0.758301937442345, iteration: 423523
loss: 1.032538652420044,grad_norm: 0.713279338610326, iteration: 423524
loss: 0.9765581488609314,grad_norm: 0.7451170322417869, iteration: 423525
loss: 1.0160433053970337,grad_norm: 0.7150155624219192, iteration: 423526
loss: 1.0259054899215698,grad_norm: 0.8341056866803694, iteration: 423527
loss: 1.0140752792358398,grad_norm: 0.889235988495313, iteration: 423528
loss: 1.0403375625610352,grad_norm: 0.9999993798635023, iteration: 423529
loss: 1.0096783638000488,grad_norm: 0.999999054057678, iteration: 423530
loss: 0.9799965620040894,grad_norm: 0.881368243557187, iteration: 423531
loss: 1.026299238204956,grad_norm: 0.7423199538508805, iteration: 423532
loss: 1.037048578262329,grad_norm: 0.8622529511639137, iteration: 423533
loss: 1.0028414726257324,grad_norm: 0.8240629532841799, iteration: 423534
loss: 1.0074266195297241,grad_norm: 0.9999990427379641, iteration: 423535
loss: 1.0500447750091553,grad_norm: 0.9999998991319177, iteration: 423536
loss: 1.1278868913650513,grad_norm: 0.9999995198502788, iteration: 423537
loss: 0.9941834211349487,grad_norm: 0.7047584993359194, iteration: 423538
loss: 1.0404003858566284,grad_norm: 0.8668667769532528, iteration: 423539
loss: 1.0547926425933838,grad_norm: 0.9955063688906446, iteration: 423540
loss: 1.0114151239395142,grad_norm: 0.8098581518457794, iteration: 423541
loss: 0.9982377886772156,grad_norm: 0.8104847117317051, iteration: 423542
loss: 0.9897558093070984,grad_norm: 0.9646534319635791, iteration: 423543
loss: 1.004331111907959,grad_norm: 0.8643855457564014, iteration: 423544
loss: 1.0766570568084717,grad_norm: 0.9999992097174041, iteration: 423545
loss: 0.9967522621154785,grad_norm: 0.9999995771687583, iteration: 423546
loss: 1.0181312561035156,grad_norm: 0.8530979191265453, iteration: 423547
loss: 1.0051991939544678,grad_norm: 0.7863220261494339, iteration: 423548
loss: 1.0141814947128296,grad_norm: 0.8730904328319378, iteration: 423549
loss: 1.007232666015625,grad_norm: 0.9079269399949882, iteration: 423550
loss: 1.0908539295196533,grad_norm: 0.9999992389380132, iteration: 423551
loss: 1.0817036628723145,grad_norm: 0.9999991657206168, iteration: 423552
loss: 1.0446664094924927,grad_norm: 0.8164006953637305, iteration: 423553
loss: 1.04648756980896,grad_norm: 0.824948321606331, iteration: 423554
loss: 1.082650899887085,grad_norm: 0.8975587596230876, iteration: 423555
loss: 1.0311493873596191,grad_norm: 0.7985109694739136, iteration: 423556
loss: 1.0704474449157715,grad_norm: 0.9062471163015002, iteration: 423557
loss: 0.9671761393547058,grad_norm: 0.6621599099377659, iteration: 423558
loss: 1.0048832893371582,grad_norm: 0.8259612845356297, iteration: 423559
loss: 1.006147027015686,grad_norm: 0.8982823760890539, iteration: 423560
loss: 0.99534672498703,grad_norm: 0.6785728619803111, iteration: 423561
loss: 0.9949384927749634,grad_norm: 0.8090441287123522, iteration: 423562
loss: 0.9796314835548401,grad_norm: 0.8485810536768406, iteration: 423563
loss: 0.974640429019928,grad_norm: 0.7176794528175754, iteration: 423564
loss: 1.029544711112976,grad_norm: 0.6367538682934287, iteration: 423565
loss: 1.0078349113464355,grad_norm: 0.9999991560303186, iteration: 423566
loss: 1.021626353263855,grad_norm: 0.8061819029620608, iteration: 423567
loss: 1.0375794172286987,grad_norm: 0.7766844680121936, iteration: 423568
loss: 1.0042780637741089,grad_norm: 0.7466344848689475, iteration: 423569
loss: 1.0664396286010742,grad_norm: 0.9999994087966004, iteration: 423570
loss: 1.119089961051941,grad_norm: 0.9999999644468384, iteration: 423571
loss: 1.0094478130340576,grad_norm: 0.7158219842768874, iteration: 423572
loss: 1.064276933670044,grad_norm: 0.9999999307797738, iteration: 423573
loss: 1.0179520845413208,grad_norm: 0.9999996593712315, iteration: 423574
loss: 1.023758053779602,grad_norm: 0.8178276023667633, iteration: 423575
loss: 1.0240697860717773,grad_norm: 0.892505740786061, iteration: 423576
loss: 1.0534288883209229,grad_norm: 0.8832650801064399, iteration: 423577
loss: 1.0038056373596191,grad_norm: 0.8222160711596193, iteration: 423578
loss: 0.9795119166374207,grad_norm: 0.6678036864757773, iteration: 423579
loss: 0.971204400062561,grad_norm: 0.8447717851298456, iteration: 423580
loss: 0.9680063724517822,grad_norm: 0.8225915540105672, iteration: 423581
loss: 1.0012255907058716,grad_norm: 0.9281360269447377, iteration: 423582
loss: 1.006310224533081,grad_norm: 0.793223079324352, iteration: 423583
loss: 1.0198702812194824,grad_norm: 0.9999997650464123, iteration: 423584
loss: 1.0131055116653442,grad_norm: 0.8572495286575754, iteration: 423585
loss: 1.0043933391571045,grad_norm: 0.8102299099080429, iteration: 423586
loss: 0.9907866716384888,grad_norm: 0.8457353808317127, iteration: 423587
loss: 0.9820521473884583,grad_norm: 0.6613361097214225, iteration: 423588
loss: 0.9822218418121338,grad_norm: 0.7216445399893611, iteration: 423589
loss: 1.0072592496871948,grad_norm: 0.7747114700677477, iteration: 423590
loss: 1.0321693420410156,grad_norm: 0.7907570778012613, iteration: 423591
loss: 1.009052038192749,grad_norm: 0.9053752105090842, iteration: 423592
loss: 1.116729736328125,grad_norm: 0.9999991149753372, iteration: 423593
loss: 1.0067992210388184,grad_norm: 0.9999993026644669, iteration: 423594
loss: 1.0653138160705566,grad_norm: 0.7561715112652869, iteration: 423595
loss: 0.9748798608779907,grad_norm: 0.835754771905704, iteration: 423596
loss: 1.0161223411560059,grad_norm: 0.7256330507559944, iteration: 423597
loss: 1.000619888305664,grad_norm: 0.6723553584356554, iteration: 423598
loss: 1.0123846530914307,grad_norm: 0.8942029744836852, iteration: 423599
loss: 1.0196888446807861,grad_norm: 0.7700416246309121, iteration: 423600
loss: 0.9668907523155212,grad_norm: 0.8713398565544523, iteration: 423601
loss: 1.0184937715530396,grad_norm: 0.8653670592553497, iteration: 423602
loss: 0.9977437853813171,grad_norm: 0.7913492019073669, iteration: 423603
loss: 1.0388866662979126,grad_norm: 0.7878286850609281, iteration: 423604
loss: 1.0524059534072876,grad_norm: 0.999999069936373, iteration: 423605
loss: 0.9954158067703247,grad_norm: 0.7875569213653418, iteration: 423606
loss: 1.0063798427581787,grad_norm: 0.8157672847969311, iteration: 423607
loss: 1.0036101341247559,grad_norm: 0.9999992417721408, iteration: 423608
loss: 0.9609049558639526,grad_norm: 0.9999990255887092, iteration: 423609
loss: 1.1001217365264893,grad_norm: 0.9999996522295408, iteration: 423610
loss: 1.0060844421386719,grad_norm: 0.7090780844893225, iteration: 423611
loss: 1.0519452095031738,grad_norm: 1.000000044428847, iteration: 423612
loss: 0.9922294020652771,grad_norm: 0.8847628839202707, iteration: 423613
loss: 0.9580383896827698,grad_norm: 0.7632125826355438, iteration: 423614
loss: 1.0007362365722656,grad_norm: 0.9326614225807378, iteration: 423615
loss: 1.003623366355896,grad_norm: 0.661939759628953, iteration: 423616
loss: 1.0012543201446533,grad_norm: 0.7744752354362263, iteration: 423617
loss: 1.035671591758728,grad_norm: 0.9999993533063678, iteration: 423618
loss: 0.9833834767341614,grad_norm: 0.6751245300134476, iteration: 423619
loss: 0.9983251094818115,grad_norm: 0.7532469129482675, iteration: 423620
loss: 1.0254203081130981,grad_norm: 0.999999074135329, iteration: 423621
loss: 0.9908447265625,grad_norm: 0.7216155037787211, iteration: 423622
loss: 1.087761402130127,grad_norm: 0.7833662216296076, iteration: 423623
loss: 1.0237802267074585,grad_norm: 0.9999999044448409, iteration: 423624
loss: 1.0068126916885376,grad_norm: 0.6411225783066449, iteration: 423625
loss: 1.0213024616241455,grad_norm: 0.8588289459486315, iteration: 423626
loss: 0.9708924889564514,grad_norm: 0.7986449840688589, iteration: 423627
loss: 1.020865797996521,grad_norm: 0.9999993328785963, iteration: 423628
loss: 0.9853150844573975,grad_norm: 0.7684452258848117, iteration: 423629
loss: 1.0494228601455688,grad_norm: 0.9999996950544334, iteration: 423630
loss: 1.0082454681396484,grad_norm: 0.7560635337979907, iteration: 423631
loss: 1.0304503440856934,grad_norm: 0.827409734992069, iteration: 423632
loss: 1.0321967601776123,grad_norm: 0.9999994083823592, iteration: 423633
loss: 0.9657500386238098,grad_norm: 0.8450991390945807, iteration: 423634
loss: 1.0058943033218384,grad_norm: 0.8622427802503292, iteration: 423635
loss: 0.9859905242919922,grad_norm: 0.7539720531320571, iteration: 423636
loss: 1.0261489152908325,grad_norm: 0.8294948736221004, iteration: 423637
loss: 1.0800610780715942,grad_norm: 0.9999998754080341, iteration: 423638
loss: 1.008168339729309,grad_norm: 0.8971380576626514, iteration: 423639
loss: 1.0266623497009277,grad_norm: 0.9999999719245203, iteration: 423640
loss: 0.9734598398208618,grad_norm: 0.7347850968348717, iteration: 423641
loss: 1.0479100942611694,grad_norm: 0.9999991811314797, iteration: 423642
loss: 0.9747790694236755,grad_norm: 0.8751797686425842, iteration: 423643
loss: 0.9910531044006348,grad_norm: 0.6484500119326534, iteration: 423644
loss: 1.0279033184051514,grad_norm: 0.8469833713352822, iteration: 423645
loss: 1.0116158723831177,grad_norm: 0.6913096871306641, iteration: 423646
loss: 0.9926827549934387,grad_norm: 0.7383853909960811, iteration: 423647
loss: 0.9622713923454285,grad_norm: 0.9550451413233223, iteration: 423648
loss: 1.141839623451233,grad_norm: 0.9999992135656658, iteration: 423649
loss: 0.9964967370033264,grad_norm: 0.8867335012226552, iteration: 423650
loss: 0.9912692308425903,grad_norm: 0.8456736017278604, iteration: 423651
loss: 0.9853960275650024,grad_norm: 0.6387370668055358, iteration: 423652
loss: 1.0077036619186401,grad_norm: 0.7754938851732863, iteration: 423653
loss: 1.0468298196792603,grad_norm: 0.9999990539178175, iteration: 423654
loss: 1.0405484437942505,grad_norm: 0.794353240815645, iteration: 423655
loss: 1.019454836845398,grad_norm: 0.7279998164134168, iteration: 423656
loss: 1.0128499269485474,grad_norm: 0.8926938791113986, iteration: 423657
loss: 1.009645700454712,grad_norm: 0.8863824857429844, iteration: 423658
loss: 1.0529736280441284,grad_norm: 0.739499668872144, iteration: 423659
loss: 1.0153018236160278,grad_norm: 0.7608262236861701, iteration: 423660
loss: 0.9944089651107788,grad_norm: 0.8029669574547201, iteration: 423661
loss: 1.0025755167007446,grad_norm: 0.8497757462835424, iteration: 423662
loss: 1.0297595262527466,grad_norm: 0.854782322093989, iteration: 423663
loss: 1.023801565170288,grad_norm: 0.8233942376817623, iteration: 423664
loss: 1.0023773908615112,grad_norm: 0.7191969380765335, iteration: 423665
loss: 1.0006219148635864,grad_norm: 0.8538510756185551, iteration: 423666
loss: 0.9954159259796143,grad_norm: 0.7034717070574933, iteration: 423667
loss: 0.9900414943695068,grad_norm: 0.7199401550053579, iteration: 423668
loss: 0.9394992589950562,grad_norm: 0.9999991307061106, iteration: 423669
loss: 1.0213465690612793,grad_norm: 0.7684173353492896, iteration: 423670
loss: 0.9847502708435059,grad_norm: 0.8343934938732532, iteration: 423671
loss: 0.9983505010604858,grad_norm: 0.9999995711875983, iteration: 423672
loss: 1.1013033390045166,grad_norm: 0.9999994043407158, iteration: 423673
loss: 1.0102208852767944,grad_norm: 0.7997465311579287, iteration: 423674
loss: 0.9948725700378418,grad_norm: 0.7036242276905952, iteration: 423675
loss: 1.0561041831970215,grad_norm: 0.8194081137574557, iteration: 423676
loss: 0.9971925616264343,grad_norm: 0.7911549774312651, iteration: 423677
loss: 1.0481805801391602,grad_norm: 0.8049177259826205, iteration: 423678
loss: 1.0190718173980713,grad_norm: 0.7621115142245831, iteration: 423679
loss: 1.0091724395751953,grad_norm: 0.638083949322804, iteration: 423680
loss: 1.0039831399917603,grad_norm: 0.9681788110394491, iteration: 423681
loss: 1.0342094898223877,grad_norm: 0.8472036138789281, iteration: 423682
loss: 1.0113060474395752,grad_norm: 0.7876113560396691, iteration: 423683
loss: 0.9962971210479736,grad_norm: 0.8461207753349707, iteration: 423684
loss: 1.0524860620498657,grad_norm: 0.7832949419453166, iteration: 423685
loss: 0.9726599454879761,grad_norm: 0.7488664200298897, iteration: 423686
loss: 0.9637449383735657,grad_norm: 0.7900196046591352, iteration: 423687
loss: 0.9816930890083313,grad_norm: 0.9999996422280435, iteration: 423688
loss: 1.0100643634796143,grad_norm: 0.7778577817055387, iteration: 423689
loss: 1.1467816829681396,grad_norm: 0.9999991761686974, iteration: 423690
loss: 1.0492123365402222,grad_norm: 0.9158674586906291, iteration: 423691
loss: 1.025529146194458,grad_norm: 0.8420652813580519, iteration: 423692
loss: 1.0050835609436035,grad_norm: 0.7268926790249901, iteration: 423693
loss: 0.971811056137085,grad_norm: 0.6958115087945046, iteration: 423694
loss: 0.9796643257141113,grad_norm: 0.8102499746978293, iteration: 423695
loss: 0.9768213629722595,grad_norm: 0.7923819087566423, iteration: 423696
loss: 1.0073037147521973,grad_norm: 0.8254230290958515, iteration: 423697
loss: 0.9821608066558838,grad_norm: 0.7910608418687195, iteration: 423698
loss: 0.9741052985191345,grad_norm: 0.7852349528693751, iteration: 423699
loss: 1.0079703330993652,grad_norm: 0.7263605898748181, iteration: 423700
loss: 1.0144490003585815,grad_norm: 0.9999995228899883, iteration: 423701
loss: 0.9735809564590454,grad_norm: 0.7161382089489674, iteration: 423702
loss: 1.0093852281570435,grad_norm: 0.8412997406725533, iteration: 423703
loss: 1.0016541481018066,grad_norm: 0.9999992908330873, iteration: 423704
loss: 0.990429162979126,grad_norm: 0.6605256054785043, iteration: 423705
loss: 0.9773198366165161,grad_norm: 0.8634584706184737, iteration: 423706
loss: 1.0301495790481567,grad_norm: 0.7930772776604469, iteration: 423707
loss: 0.9884964227676392,grad_norm: 0.8780991277953899, iteration: 423708
loss: 0.9974533319473267,grad_norm: 0.7514369066861584, iteration: 423709
loss: 1.0170643329620361,grad_norm: 0.9999991154365612, iteration: 423710
loss: 1.211264729499817,grad_norm: 0.9999991913540964, iteration: 423711
loss: 1.0259674787521362,grad_norm: 0.8521633092051006, iteration: 423712
loss: 1.0019886493682861,grad_norm: 0.7992678134123595, iteration: 423713
loss: 1.060013771057129,grad_norm: 0.8170696656383595, iteration: 423714
loss: 1.025625467300415,grad_norm: 0.6691444405340976, iteration: 423715
loss: 0.9940382838249207,grad_norm: 0.7879126301043741, iteration: 423716
loss: 0.9924775958061218,grad_norm: 0.7595869906854771, iteration: 423717
loss: 0.9707300662994385,grad_norm: 0.712073020929881, iteration: 423718
loss: 1.0198274850845337,grad_norm: 0.9231937042064473, iteration: 423719
loss: 0.987759530544281,grad_norm: 0.7296340916862127, iteration: 423720
loss: 1.0003108978271484,grad_norm: 0.8595618767303654, iteration: 423721
loss: 1.0187019109725952,grad_norm: 0.7350388989432102, iteration: 423722
loss: 0.9531378149986267,grad_norm: 0.8505229202376754, iteration: 423723
loss: 1.0528401136398315,grad_norm: 0.8270180682335815, iteration: 423724
loss: 1.0587735176086426,grad_norm: 0.9999992838700331, iteration: 423725
loss: 1.0885370969772339,grad_norm: 0.9999994128564484, iteration: 423726
loss: 0.9706093668937683,grad_norm: 0.705121107900858, iteration: 423727
loss: 0.9923156499862671,grad_norm: 0.8612927218725641, iteration: 423728
loss: 1.0124038457870483,grad_norm: 0.7128960643014398, iteration: 423729
loss: 0.9877884387969971,grad_norm: 0.8631920655608477, iteration: 423730
loss: 1.0109082460403442,grad_norm: 0.7225671511746659, iteration: 423731
loss: 1.0277115106582642,grad_norm: 0.9031552719284026, iteration: 423732
loss: 1.0044796466827393,grad_norm: 0.9583865824215259, iteration: 423733
loss: 1.099509596824646,grad_norm: 0.9503605070177036, iteration: 423734
loss: 0.9970053434371948,grad_norm: 0.7729411193968264, iteration: 423735
loss: 1.006238341331482,grad_norm: 0.684433845393005, iteration: 423736
loss: 1.0627673864364624,grad_norm: 0.8341225408661493, iteration: 423737
loss: 0.9872941374778748,grad_norm: 0.7505611649829504, iteration: 423738
loss: 1.0372978448867798,grad_norm: 0.9999994811597057, iteration: 423739
loss: 0.9877932667732239,grad_norm: 0.6671738441380927, iteration: 423740
loss: 1.1004548072814941,grad_norm: 0.8425469386839275, iteration: 423741
loss: 1.0002214908599854,grad_norm: 0.792617820845905, iteration: 423742
loss: 0.9980127811431885,grad_norm: 0.6751086789104465, iteration: 423743
loss: 0.9730415344238281,grad_norm: 0.7216865805978666, iteration: 423744
loss: 0.9697136878967285,grad_norm: 0.7919459324281224, iteration: 423745
loss: 1.0021084547042847,grad_norm: 0.801115062819096, iteration: 423746
loss: 1.0542157888412476,grad_norm: 0.9999994182580122, iteration: 423747
loss: 1.009094476699829,grad_norm: 0.9999997286151137, iteration: 423748
loss: 1.0107371807098389,grad_norm: 0.8482670763669563, iteration: 423749
loss: 0.9659423828125,grad_norm: 0.6663417659818355, iteration: 423750
loss: 1.0342262983322144,grad_norm: 0.8225982065289611, iteration: 423751
loss: 0.9882086515426636,grad_norm: 0.9999990756962293, iteration: 423752
loss: 0.9877969622612,grad_norm: 0.792846288781378, iteration: 423753
loss: 1.030910849571228,grad_norm: 0.7445784115190885, iteration: 423754
loss: 0.9928171634674072,grad_norm: 0.8768636902553841, iteration: 423755
loss: 1.0041162967681885,grad_norm: 0.8716500301328791, iteration: 423756
loss: 1.036370873451233,grad_norm: 0.9999992129431442, iteration: 423757
loss: 1.1002154350280762,grad_norm: 0.999999540480271, iteration: 423758
loss: 1.0075727701187134,grad_norm: 0.8241160648778765, iteration: 423759
loss: 1.0386079549789429,grad_norm: 0.8019320510009627, iteration: 423760
loss: 0.9459768533706665,grad_norm: 0.7196814066447922, iteration: 423761
loss: 1.0003803968429565,grad_norm: 0.7095340580398546, iteration: 423762
loss: 0.983494222164154,grad_norm: 0.7628834453998258, iteration: 423763
loss: 1.0222362279891968,grad_norm: 0.9172944680729752, iteration: 423764
loss: 1.080967664718628,grad_norm: 0.9999991287950482, iteration: 423765
loss: 1.0153539180755615,grad_norm: 1.0000000023732358, iteration: 423766
loss: 1.0169368982315063,grad_norm: 0.7499733768342319, iteration: 423767
loss: 0.9715319871902466,grad_norm: 0.7103899337344728, iteration: 423768
loss: 0.9974306225776672,grad_norm: 0.9256163968052522, iteration: 423769
loss: 0.9820076823234558,grad_norm: 0.7215234348823819, iteration: 423770
loss: 1.0549378395080566,grad_norm: 0.8314731519603543, iteration: 423771
loss: 0.9813724756240845,grad_norm: 0.9912211646209772, iteration: 423772
loss: 1.0055325031280518,grad_norm: 0.9746641693865213, iteration: 423773
loss: 0.989747166633606,grad_norm: 0.6734743794808307, iteration: 423774
loss: 1.0003703832626343,grad_norm: 0.7786163596022226, iteration: 423775
loss: 0.9824092984199524,grad_norm: 0.7356922400155549, iteration: 423776
loss: 1.0026386976242065,grad_norm: 0.7880488899499476, iteration: 423777
loss: 1.0035685300827026,grad_norm: 0.8578985171320765, iteration: 423778
loss: 0.9549385905265808,grad_norm: 0.7630869818912118, iteration: 423779
loss: 1.0555537939071655,grad_norm: 0.8337174153038088, iteration: 423780
loss: 1.003871202468872,grad_norm: 0.7470983200960646, iteration: 423781
loss: 0.9996228814125061,grad_norm: 0.8888617623156597, iteration: 423782
loss: 0.9845555424690247,grad_norm: 0.8338149535545409, iteration: 423783
loss: 0.9415032267570496,grad_norm: 0.8209606862192272, iteration: 423784
loss: 1.0067781209945679,grad_norm: 0.8658347909366955, iteration: 423785
loss: 1.0117135047912598,grad_norm: 0.7957225778660252, iteration: 423786
loss: 1.0106778144836426,grad_norm: 0.8515458819309205, iteration: 423787
loss: 1.0183016061782837,grad_norm: 0.9999997049357765, iteration: 423788
loss: 0.9924918413162231,grad_norm: 0.7387843500325105, iteration: 423789
loss: 1.2173100709915161,grad_norm: 1.0000000415358787, iteration: 423790
loss: 1.204788088798523,grad_norm: 0.9999996717551901, iteration: 423791
loss: 1.025031328201294,grad_norm: 0.8285820383038811, iteration: 423792
loss: 0.9663082361221313,grad_norm: 0.7078029969249974, iteration: 423793
loss: 1.1393061876296997,grad_norm: 0.9999996324361762, iteration: 423794
loss: 0.9866730570793152,grad_norm: 0.6560440245881741, iteration: 423795
loss: 1.055273175239563,grad_norm: 0.7862673256554219, iteration: 423796
loss: 1.0279532670974731,grad_norm: 0.7928092258152635, iteration: 423797
loss: 0.9859187602996826,grad_norm: 0.7063473123930274, iteration: 423798
loss: 1.0120865106582642,grad_norm: 0.6671249579287475, iteration: 423799
loss: 0.9707121253013611,grad_norm: 0.7052617432280611, iteration: 423800
loss: 0.9929303526878357,grad_norm: 0.7195933822583382, iteration: 423801
loss: 0.9804035425186157,grad_norm: 0.8050098079290531, iteration: 423802
loss: 1.0488353967666626,grad_norm: 0.9999999457211091, iteration: 423803
loss: 0.9818100929260254,grad_norm: 0.7332277753201113, iteration: 423804
loss: 0.98419189453125,grad_norm: 0.9300051459634568, iteration: 423805
loss: 1.0255601406097412,grad_norm: 0.9999996169875714, iteration: 423806
loss: 0.9847791790962219,grad_norm: 0.9999995074790347, iteration: 423807
loss: 1.0656626224517822,grad_norm: 0.910689630197241, iteration: 423808
loss: 0.9726930856704712,grad_norm: 0.7731418816762357, iteration: 423809
loss: 0.956263542175293,grad_norm: 0.7316248628652838, iteration: 423810
loss: 0.9985930323600769,grad_norm: 0.9016380772395922, iteration: 423811
loss: 0.9779626131057739,grad_norm: 0.7854483006532493, iteration: 423812
loss: 1.007686972618103,grad_norm: 0.9999991492974509, iteration: 423813
loss: 0.9857056736946106,grad_norm: 0.8095366331444629, iteration: 423814
loss: 1.0196243524551392,grad_norm: 0.9999990884714449, iteration: 423815
loss: 0.9713858962059021,grad_norm: 0.9999989858922664, iteration: 423816
loss: 1.0127997398376465,grad_norm: 0.91637961160519, iteration: 423817
loss: 1.016594648361206,grad_norm: 0.7594451884694189, iteration: 423818
loss: 1.0096858739852905,grad_norm: 0.7414246311985045, iteration: 423819
loss: 0.9734821319580078,grad_norm: 0.8243482085341908, iteration: 423820
loss: 1.0519946813583374,grad_norm: 0.9999996536902683, iteration: 423821
loss: 0.9961341619491577,grad_norm: 0.6683478409412339, iteration: 423822
loss: 0.9744175672531128,grad_norm: 0.8830353117707387, iteration: 423823
loss: 1.0041393041610718,grad_norm: 0.8138943937162852, iteration: 423824
loss: 1.0306931734085083,grad_norm: 0.9999993796897015, iteration: 423825
loss: 1.0252058506011963,grad_norm: 0.7988062760312727, iteration: 423826
loss: 1.0109926462173462,grad_norm: 0.7133431448142312, iteration: 423827
loss: 0.9890890717506409,grad_norm: 0.7450552165369014, iteration: 423828
loss: 1.0186896324157715,grad_norm: 0.8243248694301143, iteration: 423829
loss: 0.9825604557991028,grad_norm: 0.9999991115794515, iteration: 423830
loss: 1.0767333507537842,grad_norm: 0.9999991702943599, iteration: 423831
loss: 0.9746251702308655,grad_norm: 0.6566001199556786, iteration: 423832
loss: 1.0325998067855835,grad_norm: 0.9999990744352822, iteration: 423833
loss: 1.007046103477478,grad_norm: 0.7694774255416551, iteration: 423834
loss: 1.0335047245025635,grad_norm: 0.6390524797212119, iteration: 423835
loss: 0.9893012642860413,grad_norm: 0.7100715162755261, iteration: 423836
loss: 0.9299604296684265,grad_norm: 0.7740330885962585, iteration: 423837
loss: 0.9728166460990906,grad_norm: 0.9337389657880071, iteration: 423838
loss: 1.0797231197357178,grad_norm: 0.9999992685660557, iteration: 423839
loss: 1.000199556350708,grad_norm: 0.9999991752076596, iteration: 423840
loss: 0.9926819801330566,grad_norm: 0.7360038370179599, iteration: 423841
loss: 0.9608957767486572,grad_norm: 0.7125322600541861, iteration: 423842
loss: 1.0578762292861938,grad_norm: 0.9269631756753272, iteration: 423843
loss: 1.0624076128005981,grad_norm: 0.9999992248673808, iteration: 423844
loss: 1.0216155052185059,grad_norm: 0.7821718480077708, iteration: 423845
loss: 0.989142119884491,grad_norm: 0.9999990255533581, iteration: 423846
loss: 1.0434616804122925,grad_norm: 0.657507449733537, iteration: 423847
loss: 1.012427568435669,grad_norm: 0.8583916678890554, iteration: 423848
loss: 1.0165480375289917,grad_norm: 0.9676812126654992, iteration: 423849
loss: 0.9915685057640076,grad_norm: 0.8609984540064407, iteration: 423850
loss: 0.9983564615249634,grad_norm: 0.7449884639502287, iteration: 423851
loss: 0.9832556247711182,grad_norm: 0.758740414079993, iteration: 423852
loss: 0.9989686608314514,grad_norm: 0.7693024824480791, iteration: 423853
loss: 1.0372065305709839,grad_norm: 0.651685836638536, iteration: 423854
loss: 1.0211182832717896,grad_norm: 0.7191545461770417, iteration: 423855
loss: 1.0386159420013428,grad_norm: 0.9999991938912214, iteration: 423856
loss: 1.0589570999145508,grad_norm: 0.8797156089550738, iteration: 423857
loss: 0.9857357740402222,grad_norm: 0.8284174796889261, iteration: 423858
loss: 1.0164985656738281,grad_norm: 0.7763105440809597, iteration: 423859
loss: 1.0246484279632568,grad_norm: 0.6928086217054031, iteration: 423860
loss: 0.9665777683258057,grad_norm: 0.6932555944558728, iteration: 423861
loss: 1.083132028579712,grad_norm: 0.9999998225970469, iteration: 423862
loss: 1.029284954071045,grad_norm: 0.9999997029898724, iteration: 423863
loss: 1.0346121788024902,grad_norm: 0.9999998300005563, iteration: 423864
loss: 0.990166187286377,grad_norm: 0.9727972880173464, iteration: 423865
loss: 1.0370564460754395,grad_norm: 0.9684883069576173, iteration: 423866
loss: 0.9853747487068176,grad_norm: 0.705362389682422, iteration: 423867
loss: 1.0011504888534546,grad_norm: 0.7371048231745261, iteration: 423868
loss: 1.0812656879425049,grad_norm: 0.7922872959672691, iteration: 423869
loss: 0.988190770149231,grad_norm: 0.7146983451197332, iteration: 423870
loss: 0.9978019595146179,grad_norm: 0.6934833863295228, iteration: 423871
loss: 1.0045849084854126,grad_norm: 0.7178709853917701, iteration: 423872
loss: 1.0262176990509033,grad_norm: 0.999999599444754, iteration: 423873
loss: 0.9733424186706543,grad_norm: 0.8419757803871577, iteration: 423874
loss: 0.976586639881134,grad_norm: 0.8852353818948037, iteration: 423875
loss: 1.0015968084335327,grad_norm: 0.7113198447917654, iteration: 423876
loss: 1.017879605293274,grad_norm: 0.9999995932556505, iteration: 423877
loss: 0.9973272681236267,grad_norm: 0.7624409177979329, iteration: 423878
loss: 1.0400429964065552,grad_norm: 0.9999993799311868, iteration: 423879
loss: 1.0280258655548096,grad_norm: 0.9999992959101041, iteration: 423880
loss: 1.0262326002120972,grad_norm: 0.8535297515827879, iteration: 423881
loss: 1.0509840250015259,grad_norm: 0.9999997760810329, iteration: 423882
loss: 0.9852963089942932,grad_norm: 0.8193391113358984, iteration: 423883
loss: 1.033463478088379,grad_norm: 0.9807454870344872, iteration: 423884
loss: 1.0450694561004639,grad_norm: 0.9999994723287572, iteration: 423885
loss: 0.9748194217681885,grad_norm: 0.7738521906785405, iteration: 423886
loss: 1.1283323764801025,grad_norm: 0.9999993908851842, iteration: 423887
loss: 1.0272852182388306,grad_norm: 0.6473244888695587, iteration: 423888
loss: 1.018781065940857,grad_norm: 0.7615574682968688, iteration: 423889
loss: 0.9660797715187073,grad_norm: 0.771024353250901, iteration: 423890
loss: 0.9878470301628113,grad_norm: 0.8049863888649569, iteration: 423891
loss: 0.9718477725982666,grad_norm: 0.9999998759819495, iteration: 423892
loss: 1.0249043703079224,grad_norm: 0.6800230882125485, iteration: 423893
loss: 1.0218228101730347,grad_norm: 0.8023856104698966, iteration: 423894
loss: 1.0174856185913086,grad_norm: 0.8111190123187082, iteration: 423895
loss: 1.0140386819839478,grad_norm: 0.999999125995781, iteration: 423896
loss: 1.011953592300415,grad_norm: 0.7762228195156945, iteration: 423897
loss: 1.0334389209747314,grad_norm: 0.999999303433611, iteration: 423898
loss: 0.9852975606918335,grad_norm: 0.8882174233394569, iteration: 423899
loss: 0.9485717415809631,grad_norm: 0.7752169110719606, iteration: 423900
loss: 0.999235212802887,grad_norm: 0.8846683056903378, iteration: 423901
loss: 0.949268102645874,grad_norm: 0.8672625278467433, iteration: 423902
loss: 1.0393378734588623,grad_norm: 0.5983081947493338, iteration: 423903
loss: 0.9828823208808899,grad_norm: 0.7824725312009919, iteration: 423904
loss: 0.9984361529350281,grad_norm: 0.7866098811208747, iteration: 423905
loss: 0.981330156326294,grad_norm: 0.8671122463802912, iteration: 423906
loss: 0.975834846496582,grad_norm: 0.9332231088100263, iteration: 423907
loss: 1.011887788772583,grad_norm: 0.9210327816512561, iteration: 423908
loss: 0.9936072826385498,grad_norm: 0.7870014928085012, iteration: 423909
loss: 0.9965288043022156,grad_norm: 0.878927810048987, iteration: 423910
loss: 0.9817065596580505,grad_norm: 0.7752697628050674, iteration: 423911
loss: 1.018969178199768,grad_norm: 0.999999972366807, iteration: 423912
loss: 1.019553542137146,grad_norm: 0.902213872589462, iteration: 423913
loss: 1.0515234470367432,grad_norm: 0.8550686823002794, iteration: 423914
loss: 1.0004229545593262,grad_norm: 0.764929663308189, iteration: 423915
loss: 0.998192310333252,grad_norm: 0.7781726375555965, iteration: 423916
loss: 1.005354881286621,grad_norm: 0.7621395603638408, iteration: 423917
loss: 1.1576026678085327,grad_norm: 0.999999533885703, iteration: 423918
loss: 1.0102468729019165,grad_norm: 0.7264777848790653, iteration: 423919
loss: 0.9495636224746704,grad_norm: 0.8061517936651759, iteration: 423920
loss: 1.0188438892364502,grad_norm: 0.6877474455843741, iteration: 423921
loss: 0.9685032367706299,grad_norm: 0.7998235068062435, iteration: 423922
loss: 0.9896575212478638,grad_norm: 0.9999991837122544, iteration: 423923
loss: 0.9841059446334839,grad_norm: 0.7396483908117796, iteration: 423924
loss: 0.9743643999099731,grad_norm: 0.8163355042907131, iteration: 423925
loss: 0.9897904396057129,grad_norm: 0.8019777950918138, iteration: 423926
loss: 1.0130529403686523,grad_norm: 0.958341759434528, iteration: 423927
loss: 0.9921849370002747,grad_norm: 0.749306319109471, iteration: 423928
loss: 0.9771021008491516,grad_norm: 0.9999994163456288, iteration: 423929
loss: 1.0248503684997559,grad_norm: 0.9999998634837791, iteration: 423930
loss: 0.9892257452011108,grad_norm: 0.8400482247440568, iteration: 423931
loss: 1.0007072687149048,grad_norm: 0.7240107709286493, iteration: 423932
loss: 0.984257161617279,grad_norm: 0.8621417603623963, iteration: 423933
loss: 0.9951218366622925,grad_norm: 0.5971051264981518, iteration: 423934
loss: 0.971258819103241,grad_norm: 0.7433819552629949, iteration: 423935
loss: 1.0383026599884033,grad_norm: 0.7770852717868276, iteration: 423936
loss: 0.9887262582778931,grad_norm: 0.7957406058001445, iteration: 423937
loss: 0.9733640551567078,grad_norm: 0.8746456331923577, iteration: 423938
loss: 1.0133047103881836,grad_norm: 0.8249399524675445, iteration: 423939
loss: 0.9914366602897644,grad_norm: 0.9807620816848976, iteration: 423940
loss: 1.0057752132415771,grad_norm: 0.6268178828046842, iteration: 423941
loss: 0.9487425684928894,grad_norm: 0.964159202193598, iteration: 423942
loss: 0.9376741647720337,grad_norm: 0.7687336533133825, iteration: 423943
loss: 1.0062226057052612,grad_norm: 0.7482157685691214, iteration: 423944
loss: 0.9891617298126221,grad_norm: 0.8050144543068635, iteration: 423945
loss: 0.9942864179611206,grad_norm: 0.8079322537826007, iteration: 423946
loss: 0.990543007850647,grad_norm: 0.7254588633626535, iteration: 423947
loss: 0.9621729850769043,grad_norm: 0.8911494497511305, iteration: 423948
loss: 0.995272696018219,grad_norm: 0.6897119935564796, iteration: 423949
loss: 1.0672121047973633,grad_norm: 0.9999991272621293, iteration: 423950
loss: 0.9778478145599365,grad_norm: 0.8264776676655475, iteration: 423951
loss: 1.054344654083252,grad_norm: 0.8300116520609855, iteration: 423952
loss: 1.0005230903625488,grad_norm: 0.7662920398247732, iteration: 423953
loss: 1.0320765972137451,grad_norm: 0.7228328435810727, iteration: 423954
loss: 1.0296332836151123,grad_norm: 0.958993080119517, iteration: 423955
loss: 1.0196309089660645,grad_norm: 0.6880369647520314, iteration: 423956
loss: 1.004348635673523,grad_norm: 0.7614501235283406, iteration: 423957
loss: 1.0305819511413574,grad_norm: 0.7893654869538256, iteration: 423958
loss: 1.0003753900527954,grad_norm: 0.7425248928880237, iteration: 423959
loss: 0.9744241833686829,grad_norm: 0.7565748154176534, iteration: 423960
loss: 0.9932729005813599,grad_norm: 0.9999999047556006, iteration: 423961
loss: 0.9990759491920471,grad_norm: 0.764380252046025, iteration: 423962
loss: 1.0181574821472168,grad_norm: 0.8813427490550395, iteration: 423963
loss: 1.0067089796066284,grad_norm: 0.767603620347756, iteration: 423964
loss: 1.036005973815918,grad_norm: 0.9999999461028021, iteration: 423965
loss: 1.0004581212997437,grad_norm: 0.7985468939572853, iteration: 423966
loss: 1.0214368104934692,grad_norm: 0.9016630627554099, iteration: 423967
loss: 1.0197477340698242,grad_norm: 0.9999994876644933, iteration: 423968
loss: 1.0067503452301025,grad_norm: 0.9999999149845825, iteration: 423969
loss: 0.9860848188400269,grad_norm: 0.773930669510438, iteration: 423970
loss: 1.037143349647522,grad_norm: 0.8323384059589063, iteration: 423971
loss: 0.9773070812225342,grad_norm: 0.7937596979701695, iteration: 423972
loss: 1.0377235412597656,grad_norm: 0.976355889315282, iteration: 423973
loss: 0.9831671118736267,grad_norm: 0.9352782586813685, iteration: 423974
loss: 1.0829025506973267,grad_norm: 0.9999992864772755, iteration: 423975
loss: 1.000577688217163,grad_norm: 0.9290005911299717, iteration: 423976
loss: 0.9824808239936829,grad_norm: 0.7209026309945771, iteration: 423977
loss: 1.0677040815353394,grad_norm: 0.9999992282374668, iteration: 423978
loss: 0.9847663640975952,grad_norm: 0.7618515592568132, iteration: 423979
loss: 0.986083447933197,grad_norm: 0.8471190055056228, iteration: 423980
loss: 0.98990797996521,grad_norm: 0.7240876222062907, iteration: 423981
loss: 0.9973763227462769,grad_norm: 0.7125864188805167, iteration: 423982
loss: 1.0275808572769165,grad_norm: 0.6909250806707732, iteration: 423983
loss: 1.0379235744476318,grad_norm: 0.9573653845268487, iteration: 423984
loss: 1.0087649822235107,grad_norm: 0.8365557444173976, iteration: 423985
loss: 1.0220552682876587,grad_norm: 0.7930160395403566, iteration: 423986
loss: 1.0048167705535889,grad_norm: 0.7387765940284717, iteration: 423987
loss: 0.9968245029449463,grad_norm: 0.8910440864480589, iteration: 423988
loss: 0.9965718984603882,grad_norm: 0.7319926551471401, iteration: 423989
loss: 0.9918019771575928,grad_norm: 0.8695352340215041, iteration: 423990
loss: 1.036305546760559,grad_norm: 0.7504225772741656, iteration: 423991
loss: 1.015243411064148,grad_norm: 0.9470117960304638, iteration: 423992
loss: 0.999433159828186,grad_norm: 0.8851404679470678, iteration: 423993
loss: 1.000787377357483,grad_norm: 0.8049390520196394, iteration: 423994
loss: 0.9760778546333313,grad_norm: 0.7607897667800887, iteration: 423995
loss: 0.9933610558509827,grad_norm: 0.7311250288552594, iteration: 423996
loss: 0.9728411436080933,grad_norm: 0.859570676371039, iteration: 423997
loss: 1.0647811889648438,grad_norm: 0.8956579953075223, iteration: 423998
loss: 1.0246827602386475,grad_norm: 0.7956416005490315, iteration: 423999
loss: 1.0073497295379639,grad_norm: 0.8939410387870748, iteration: 424000
loss: 0.9884087443351746,grad_norm: 0.6907193763886553, iteration: 424001
loss: 0.982382595539093,grad_norm: 0.6757288785057737, iteration: 424002
loss: 1.0780346393585205,grad_norm: 0.8152372187947129, iteration: 424003
loss: 1.000312089920044,grad_norm: 0.832125677312621, iteration: 424004
loss: 0.9926491379737854,grad_norm: 0.7325194711984381, iteration: 424005
loss: 1.0144330263137817,grad_norm: 0.9999991923767075, iteration: 424006
loss: 1.0272730588912964,grad_norm: 0.9999991290231586, iteration: 424007
loss: 1.011015772819519,grad_norm: 0.8167590906005481, iteration: 424008
loss: 0.9842802286148071,grad_norm: 0.8368418066277898, iteration: 424009
loss: 0.9730204939842224,grad_norm: 0.9999998482291004, iteration: 424010
loss: 1.0510071516036987,grad_norm: 0.9171551644515401, iteration: 424011
loss: 1.0489935874938965,grad_norm: 0.9735628958523004, iteration: 424012
loss: 0.975631058216095,grad_norm: 0.7081195018462036, iteration: 424013
loss: 1.0193185806274414,grad_norm: 0.7581266549211435, iteration: 424014
loss: 0.9788410067558289,grad_norm: 0.7489702357882786, iteration: 424015
loss: 0.9935061931610107,grad_norm: 0.9231103088954603, iteration: 424016
loss: 1.002565622329712,grad_norm: 0.8186362535434991, iteration: 424017
loss: 0.9924634099006653,grad_norm: 0.7685002956617988, iteration: 424018
loss: 1.0366566181182861,grad_norm: 0.9999990198998073, iteration: 424019
loss: 1.003127932548523,grad_norm: 0.7842539129110537, iteration: 424020
loss: 1.0416394472122192,grad_norm: 0.8577510375195504, iteration: 424021
loss: 1.1023820638656616,grad_norm: 0.9999998785269134, iteration: 424022
loss: 1.0137684345245361,grad_norm: 0.9999994568404915, iteration: 424023
loss: 1.015723705291748,grad_norm: 0.8411809741875513, iteration: 424024
loss: 0.9670343995094299,grad_norm: 0.76598848476931, iteration: 424025
loss: 1.0041544437408447,grad_norm: 0.8062147796956636, iteration: 424026
loss: 1.030329704284668,grad_norm: 0.879074594487501, iteration: 424027
loss: 0.9763447642326355,grad_norm: 0.9999989228534989, iteration: 424028
loss: 1.0065377950668335,grad_norm: 0.7283768386007645, iteration: 424029
loss: 0.9991070032119751,grad_norm: 0.8281037776450093, iteration: 424030
loss: 1.1277605295181274,grad_norm: 0.9999998032699841, iteration: 424031
loss: 0.9776766300201416,grad_norm: 0.9999991123218198, iteration: 424032
loss: 1.001645565032959,grad_norm: 0.6680019464874211, iteration: 424033
loss: 0.9607394337654114,grad_norm: 0.6590718342500668, iteration: 424034
loss: 1.0580394268035889,grad_norm: 0.9999996638355121, iteration: 424035
loss: 1.0251306295394897,grad_norm: 0.7943195571959988, iteration: 424036
loss: 1.0319243669509888,grad_norm: 0.8100316717585984, iteration: 424037
loss: 0.9939159750938416,grad_norm: 0.7590236744761754, iteration: 424038
loss: 1.052351474761963,grad_norm: 0.6880614960260933, iteration: 424039
loss: 0.9790701270103455,grad_norm: 0.8703068757950703, iteration: 424040
loss: 1.0162951946258545,grad_norm: 0.7695581300746971, iteration: 424041
loss: 1.160284161567688,grad_norm: 0.9823515821732521, iteration: 424042
loss: 0.9988424777984619,grad_norm: 0.7316979743809453, iteration: 424043
loss: 0.9710941910743713,grad_norm: 0.9616337339120133, iteration: 424044
loss: 1.0101447105407715,grad_norm: 0.7250946203473806, iteration: 424045
loss: 1.0126025676727295,grad_norm: 0.8157422129545151, iteration: 424046
loss: 0.9761707782745361,grad_norm: 0.8042161934099183, iteration: 424047
loss: 1.0747292041778564,grad_norm: 0.9999999336877635, iteration: 424048
loss: 1.034159779548645,grad_norm: 0.9999992118074706, iteration: 424049
loss: 0.9971944093704224,grad_norm: 0.9143960381007074, iteration: 424050
loss: 0.9795536994934082,grad_norm: 0.8056008480378561, iteration: 424051
loss: 1.014440655708313,grad_norm: 0.7599005074720693, iteration: 424052
loss: 0.974281907081604,grad_norm: 0.8279450698967068, iteration: 424053
loss: 1.0487459897994995,grad_norm: 1.0000000516456424, iteration: 424054
loss: 0.9840568900108337,grad_norm: 0.7256730646501691, iteration: 424055
loss: 1.0832823514938354,grad_norm: 0.9999996547161506, iteration: 424056
loss: 0.9870595335960388,grad_norm: 0.7762158853161176, iteration: 424057
loss: 1.0068906545639038,grad_norm: 0.7151346314235003, iteration: 424058
loss: 1.008749008178711,grad_norm: 0.7742316943995299, iteration: 424059
loss: 0.9961220026016235,grad_norm: 0.9264258635580934, iteration: 424060
loss: 0.9796075820922852,grad_norm: 0.7387081451982199, iteration: 424061
loss: 0.9833595752716064,grad_norm: 0.812750239885244, iteration: 424062
loss: 1.0105805397033691,grad_norm: 0.8773948306769832, iteration: 424063
loss: 1.0097311735153198,grad_norm: 0.9999994189876501, iteration: 424064
loss: 0.9866220951080322,grad_norm: 0.9168112499053522, iteration: 424065
loss: 1.005142092704773,grad_norm: 0.8854830817921415, iteration: 424066
loss: 1.0057607889175415,grad_norm: 0.8390607417012573, iteration: 424067
loss: 1.0509403944015503,grad_norm: 0.9999995170475026, iteration: 424068
loss: 1.0002597570419312,grad_norm: 0.8331096791269016, iteration: 424069
loss: 1.0049210786819458,grad_norm: 0.9410768192288865, iteration: 424070
loss: 1.0316811800003052,grad_norm: 0.9999996667591097, iteration: 424071
loss: 0.9692238569259644,grad_norm: 0.7391834490112144, iteration: 424072
loss: 1.0068588256835938,grad_norm: 0.7156302127192544, iteration: 424073
loss: 1.0739227533340454,grad_norm: 0.8039338979278924, iteration: 424074
loss: 1.0649579763412476,grad_norm: 0.9343154576776593, iteration: 424075
loss: 1.0108650922775269,grad_norm: 0.7950652569303951, iteration: 424076
loss: 0.9970555901527405,grad_norm: 0.7745798922311338, iteration: 424077
loss: 0.9885532259941101,grad_norm: 0.9999995796644675, iteration: 424078
loss: 1.014654517173767,grad_norm: 0.9999999748438849, iteration: 424079
loss: 0.9987293481826782,grad_norm: 0.7061459794390744, iteration: 424080
loss: 1.0621588230133057,grad_norm: 0.8873881235141272, iteration: 424081
loss: 1.000468373298645,grad_norm: 0.8443899958908533, iteration: 424082
loss: 0.9809858798980713,grad_norm: 0.804764507655454, iteration: 424083
loss: 1.0283770561218262,grad_norm: 0.9999997877603065, iteration: 424084
loss: 1.030670404434204,grad_norm: 0.7888651591011765, iteration: 424085
loss: 1.0113928318023682,grad_norm: 0.9999990967611033, iteration: 424086
loss: 0.9941995143890381,grad_norm: 0.7828317625665607, iteration: 424087
loss: 0.9535955190658569,grad_norm: 0.8621775814750547, iteration: 424088
loss: 1.2127041816711426,grad_norm: 0.9685876317112786, iteration: 424089
loss: 0.9971204400062561,grad_norm: 0.7528146048993271, iteration: 424090
loss: 0.9802665114402771,grad_norm: 0.8631853317058981, iteration: 424091
loss: 1.0232279300689697,grad_norm: 0.8113397359427068, iteration: 424092
loss: 0.979394793510437,grad_norm: 0.7998496492640056, iteration: 424093
loss: 1.0564937591552734,grad_norm: 0.8496672710231632, iteration: 424094
loss: 1.0296258926391602,grad_norm: 0.8124480096432266, iteration: 424095
loss: 1.0270463228225708,grad_norm: 0.6206455112323441, iteration: 424096
loss: 0.9852175116539001,grad_norm: 0.738471555072092, iteration: 424097
loss: 1.0346423387527466,grad_norm: 0.648860061417913, iteration: 424098
loss: 1.038404941558838,grad_norm: 0.9999991200239302, iteration: 424099
loss: 0.9896332025527954,grad_norm: 0.6908698666668075, iteration: 424100
loss: 1.0143406391143799,grad_norm: 0.9999990611916277, iteration: 424101
loss: 1.0755404233932495,grad_norm: 0.9999991294494202, iteration: 424102
loss: 0.9793605804443359,grad_norm: 0.8367555789677299, iteration: 424103
loss: 1.0106678009033203,grad_norm: 0.9999996383254272, iteration: 424104
loss: 1.0262659788131714,grad_norm: 0.7912388019126612, iteration: 424105
loss: 1.005800724029541,grad_norm: 0.7216001771136356, iteration: 424106
loss: 1.0377029180526733,grad_norm: 0.6851083801422981, iteration: 424107
loss: 1.0188137292861938,grad_norm: 0.999999669839204, iteration: 424108
loss: 1.0050297975540161,grad_norm: 0.868589519284354, iteration: 424109
loss: 0.989972710609436,grad_norm: 0.7698951263756342, iteration: 424110
loss: 1.004643440246582,grad_norm: 0.7314095554494388, iteration: 424111
loss: 1.00588858127594,grad_norm: 0.7238506015767262, iteration: 424112
loss: 0.978291392326355,grad_norm: 0.9999994420823377, iteration: 424113
loss: 1.0392460823059082,grad_norm: 0.7370938750103201, iteration: 424114
loss: 0.9923338294029236,grad_norm: 0.8163113970512436, iteration: 424115
loss: 1.0333349704742432,grad_norm: 0.9999997018651856, iteration: 424116
loss: 1.0397447347640991,grad_norm: 0.7801028944558046, iteration: 424117
loss: 1.0563547611236572,grad_norm: 0.8156042664208348, iteration: 424118
loss: 0.9998042583465576,grad_norm: 0.6728973300358078, iteration: 424119
loss: 1.0981881618499756,grad_norm: 0.7889285755163578, iteration: 424120
loss: 0.985899031162262,grad_norm: 0.7216165607487501, iteration: 424121
loss: 0.997429370880127,grad_norm: 0.7375615431325604, iteration: 424122
loss: 1.0305746793746948,grad_norm: 0.7585893397380915, iteration: 424123
loss: 1.002537488937378,grad_norm: 0.7749745438794903, iteration: 424124
loss: 1.0174037218093872,grad_norm: 0.7726634500188776, iteration: 424125
loss: 1.0260331630706787,grad_norm: 0.6470916387120654, iteration: 424126
loss: 1.0135196447372437,grad_norm: 0.6790750558604369, iteration: 424127
loss: 1.006940245628357,grad_norm: 0.7940036012346018, iteration: 424128
loss: 1.0297565460205078,grad_norm: 0.9985739089880239, iteration: 424129
loss: 1.0451102256774902,grad_norm: 0.9999990119259029, iteration: 424130
loss: 0.9759697914123535,grad_norm: 0.9750054685513067, iteration: 424131
loss: 0.9989233613014221,grad_norm: 0.9999999300513563, iteration: 424132
loss: 0.974111795425415,grad_norm: 0.7326124125731633, iteration: 424133
loss: 0.9690576791763306,grad_norm: 0.6610908604724562, iteration: 424134
loss: 0.9934064745903015,grad_norm: 0.6603401534153084, iteration: 424135
loss: 1.0252997875213623,grad_norm: 0.7353391211171728, iteration: 424136
loss: 1.0284956693649292,grad_norm: 0.9999991251768717, iteration: 424137
loss: 1.0428550243377686,grad_norm: 0.9999998040239665, iteration: 424138
loss: 1.0096532106399536,grad_norm: 0.8173546655117682, iteration: 424139
loss: 0.9926104545593262,grad_norm: 0.9999991974465716, iteration: 424140
loss: 0.9934485554695129,grad_norm: 0.8069618993374548, iteration: 424141
loss: 1.0081796646118164,grad_norm: 0.7979667610550912, iteration: 424142
loss: 0.9875804781913757,grad_norm: 0.8025486475597258, iteration: 424143
loss: 0.9913597106933594,grad_norm: 0.8185915026846767, iteration: 424144
loss: 1.0232503414154053,grad_norm: 0.7186153038608368, iteration: 424145
loss: 1.0442358255386353,grad_norm: 0.7618887933977461, iteration: 424146
loss: 1.0308467149734497,grad_norm: 0.8138988393362654, iteration: 424147
loss: 1.0112202167510986,grad_norm: 0.6798128040381695, iteration: 424148
loss: 1.0838788747787476,grad_norm: 0.9999991273489995, iteration: 424149
loss: 1.0030584335327148,grad_norm: 0.8533725967516772, iteration: 424150
loss: 1.0107455253601074,grad_norm: 0.9999994124500322, iteration: 424151
loss: 1.0216538906097412,grad_norm: 0.999999166822932, iteration: 424152
loss: 0.9868343472480774,grad_norm: 0.9355315931843856, iteration: 424153
loss: 1.0288254022598267,grad_norm: 0.7216491066333833, iteration: 424154
loss: 0.9565891623497009,grad_norm: 0.8491651166687448, iteration: 424155
loss: 1.0285638570785522,grad_norm: 0.8435369148503482, iteration: 424156
loss: 0.9869410395622253,grad_norm: 0.9999994427433919, iteration: 424157
loss: 0.9856853485107422,grad_norm: 0.8618640459776041, iteration: 424158
loss: 1.0126233100891113,grad_norm: 0.8257632957353778, iteration: 424159
loss: 0.9829000234603882,grad_norm: 0.748731274768377, iteration: 424160
loss: 0.9899160861968994,grad_norm: 0.7344134545314857, iteration: 424161
loss: 1.010387897491455,grad_norm: 0.9906149970217282, iteration: 424162
loss: 1.0313912630081177,grad_norm: 0.9999999305756239, iteration: 424163
loss: 0.9926482439041138,grad_norm: 0.7669875266030726, iteration: 424164
loss: 0.9695881605148315,grad_norm: 0.9097588710059845, iteration: 424165
loss: 1.0205267667770386,grad_norm: 0.820944215418965, iteration: 424166
loss: 1.0425660610198975,grad_norm: 0.9187459067587359, iteration: 424167
loss: 1.0235505104064941,grad_norm: 0.8867978525048091, iteration: 424168
loss: 1.0679988861083984,grad_norm: 0.8402928205637521, iteration: 424169
loss: 1.0277190208435059,grad_norm: 0.9999993539818594, iteration: 424170
loss: 1.0035526752471924,grad_norm: 0.9999991611877346, iteration: 424171
loss: 1.0028430223464966,grad_norm: 0.8350546198288333, iteration: 424172
loss: 1.1119358539581299,grad_norm: 0.999999280079293, iteration: 424173
loss: 1.0048656463623047,grad_norm: 0.9005227838930132, iteration: 424174
loss: 0.9514402151107788,grad_norm: 0.8032503424666663, iteration: 424175
loss: 0.9982903003692627,grad_norm: 0.6957566492960432, iteration: 424176
loss: 1.001782774925232,grad_norm: 0.8308944940387123, iteration: 424177
loss: 0.9704424142837524,grad_norm: 0.8950206393361979, iteration: 424178
loss: 1.0027647018432617,grad_norm: 0.8863858216608117, iteration: 424179
loss: 0.9804407358169556,grad_norm: 0.7797206866628281, iteration: 424180
loss: 1.000499963760376,grad_norm: 0.8314471952486807, iteration: 424181
loss: 1.0347340106964111,grad_norm: 0.9999998879783087, iteration: 424182
loss: 1.0691165924072266,grad_norm: 0.9854063135042577, iteration: 424183
loss: 1.0290558338165283,grad_norm: 0.7313759859127983, iteration: 424184
loss: 1.0643730163574219,grad_norm: 0.9999999436488914, iteration: 424185
loss: 1.0397837162017822,grad_norm: 0.6798610884739651, iteration: 424186
loss: 1.0762300491333008,grad_norm: 0.9999992947658736, iteration: 424187
loss: 0.9979211091995239,grad_norm: 0.7769486449091825, iteration: 424188
loss: 1.0053250789642334,grad_norm: 0.9999992948664858, iteration: 424189
loss: 1.0103507041931152,grad_norm: 0.6929987713053486, iteration: 424190
loss: 0.994674026966095,grad_norm: 0.8786043063340214, iteration: 424191
loss: 0.9847402572631836,grad_norm: 0.7448976585632228, iteration: 424192
loss: 0.98497074842453,grad_norm: 0.7137113639995907, iteration: 424193
loss: 1.0036009550094604,grad_norm: 0.9999997081021854, iteration: 424194
loss: 0.9904676675796509,grad_norm: 0.95113243654179, iteration: 424195
loss: 1.0614547729492188,grad_norm: 0.9999996305605594, iteration: 424196
loss: 0.9792637228965759,grad_norm: 0.7575309004061809, iteration: 424197
loss: 0.9912115335464478,grad_norm: 0.7306222017191709, iteration: 424198
loss: 1.045250654220581,grad_norm: 0.9999999350236943, iteration: 424199
loss: 1.0220708847045898,grad_norm: 0.6714217056605133, iteration: 424200
loss: 0.9935006499290466,grad_norm: 0.6778253524784063, iteration: 424201
loss: 0.993007242679596,grad_norm: 0.8369669347322541, iteration: 424202
loss: 0.9972666501998901,grad_norm: 0.8892209928844732, iteration: 424203
loss: 0.9936512112617493,grad_norm: 1.0000000035655225, iteration: 424204
loss: 0.981889545917511,grad_norm: 0.9690511824692478, iteration: 424205
loss: 1.031928300857544,grad_norm: 0.8089288450079352, iteration: 424206
loss: 1.0139943361282349,grad_norm: 0.8637641928409504, iteration: 424207
loss: 1.0005804300308228,grad_norm: 0.8074556094944358, iteration: 424208
loss: 0.9718254804611206,grad_norm: 0.949472883209731, iteration: 424209
loss: 1.021520972251892,grad_norm: 0.786598327952994, iteration: 424210
loss: 1.0043537616729736,grad_norm: 0.7428743596891983, iteration: 424211
loss: 1.0239874124526978,grad_norm: 0.7278987909078872, iteration: 424212
loss: 0.9886801242828369,grad_norm: 0.7260827128861924, iteration: 424213
loss: 0.9751402735710144,grad_norm: 0.7246323306351086, iteration: 424214
loss: 1.0869160890579224,grad_norm: 0.9999993737295514, iteration: 424215
loss: 1.0660178661346436,grad_norm: 0.6840352042062228, iteration: 424216
loss: 1.0252286195755005,grad_norm: 0.853049065856181, iteration: 424217
loss: 0.9954138398170471,grad_norm: 0.7151148373043332, iteration: 424218
loss: 1.006295084953308,grad_norm: 0.7418225442582481, iteration: 424219
loss: 1.1179744005203247,grad_norm: 0.9999997397556611, iteration: 424220
loss: 0.9816786050796509,grad_norm: 0.8746573835441637, iteration: 424221
loss: 1.0200929641723633,grad_norm: 0.9627383296839651, iteration: 424222
loss: 1.0139456987380981,grad_norm: 0.9999993188447044, iteration: 424223
loss: 1.028678059577942,grad_norm: 0.896302921354279, iteration: 424224
loss: 1.017543077468872,grad_norm: 0.8212729219820268, iteration: 424225
loss: 1.1174758672714233,grad_norm: 0.9999993962836001, iteration: 424226
loss: 1.0101126432418823,grad_norm: 0.8218035603818026, iteration: 424227
loss: 1.0127673149108887,grad_norm: 0.889116282303369, iteration: 424228
loss: 1.0043107271194458,grad_norm: 0.7533759704305467, iteration: 424229
loss: 0.9389669299125671,grad_norm: 0.8682587311155499, iteration: 424230
loss: 0.9788036346435547,grad_norm: 0.8774690527261539, iteration: 424231
loss: 0.976360023021698,grad_norm: 0.904741971143659, iteration: 424232
loss: 0.9693655967712402,grad_norm: 0.7175288195763331, iteration: 424233
loss: 1.0079030990600586,grad_norm: 0.6927499552407934, iteration: 424234
loss: 0.9820579886436462,grad_norm: 0.8892011924235095, iteration: 424235
loss: 1.0106122493743896,grad_norm: 0.9999990985199595, iteration: 424236
loss: 1.0276070833206177,grad_norm: 0.7932174040319235, iteration: 424237
loss: 1.0000689029693604,grad_norm: 0.6896046691428164, iteration: 424238
loss: 1.0146815776824951,grad_norm: 0.9444594751821622, iteration: 424239
loss: 0.9740833044052124,grad_norm: 0.9999990061033388, iteration: 424240
loss: 1.0000035762786865,grad_norm: 0.717856425544724, iteration: 424241
loss: 1.0351773500442505,grad_norm: 0.6106725845644446, iteration: 424242
loss: 1.0585155487060547,grad_norm: 0.6702454472264322, iteration: 424243
loss: 1.0145416259765625,grad_norm: 0.9199856658201631, iteration: 424244
loss: 1.0963361263275146,grad_norm: 0.9999989435987819, iteration: 424245
loss: 1.036038875579834,grad_norm: 0.9186183052597707, iteration: 424246
loss: 0.9993062615394592,grad_norm: 0.8131321761585732, iteration: 424247
loss: 1.0174932479858398,grad_norm: 0.7525305122434238, iteration: 424248
loss: 1.1390857696533203,grad_norm: 0.9999993157504298, iteration: 424249
loss: 1.0121060609817505,grad_norm: 0.9999992232888044, iteration: 424250
loss: 1.063549280166626,grad_norm: 0.8489287658083841, iteration: 424251
loss: 1.013934850692749,grad_norm: 0.9999990288261481, iteration: 424252
loss: 1.0683765411376953,grad_norm: 0.9590533069511552, iteration: 424253
loss: 1.0373073816299438,grad_norm: 0.915783226449935, iteration: 424254
loss: 0.951193630695343,grad_norm: 0.92723373325234, iteration: 424255
loss: 0.9975972175598145,grad_norm: 0.772196128121706, iteration: 424256
loss: 0.9918840527534485,grad_norm: 0.7697563042930126, iteration: 424257
loss: 1.0067143440246582,grad_norm: 0.9622113208958533, iteration: 424258
loss: 0.9972062706947327,grad_norm: 0.8556997912151199, iteration: 424259
loss: 1.0125179290771484,grad_norm: 0.7480623895914471, iteration: 424260
loss: 1.0317083597183228,grad_norm: 0.7231838045902902, iteration: 424261
loss: 0.9777277708053589,grad_norm: 0.9518549203945859, iteration: 424262
loss: 0.9822694659233093,grad_norm: 0.7896864987220263, iteration: 424263
loss: 1.0050866603851318,grad_norm: 0.8071489649127316, iteration: 424264
loss: 1.0007812976837158,grad_norm: 0.7442560330288593, iteration: 424265
loss: 1.0325384140014648,grad_norm: 0.9999998599390837, iteration: 424266
loss: 0.9971736669540405,grad_norm: 0.7693695627877452, iteration: 424267
loss: 1.0007129907608032,grad_norm: 0.7374999366880179, iteration: 424268
loss: 1.0077863931655884,grad_norm: 0.8414577779895013, iteration: 424269
loss: 1.022294044494629,grad_norm: 0.844658968576865, iteration: 424270
loss: 0.9843280911445618,grad_norm: 0.744601478194065, iteration: 424271
loss: 0.9823591113090515,grad_norm: 0.6937314492586051, iteration: 424272
loss: 1.016384243965149,grad_norm: 0.999999108849687, iteration: 424273
loss: 0.9996250867843628,grad_norm: 0.8035798834334096, iteration: 424274
loss: 0.9986094832420349,grad_norm: 0.8225180464353127, iteration: 424275
loss: 0.960605263710022,grad_norm: 0.7637109396169094, iteration: 424276
loss: 0.973101019859314,grad_norm: 0.9105716906652499, iteration: 424277
loss: 0.9788053035736084,grad_norm: 0.7694685875904276, iteration: 424278
loss: 0.9875780344009399,grad_norm: 0.8467478848803174, iteration: 424279
loss: 0.9894431233406067,grad_norm: 0.9999991198677368, iteration: 424280
loss: 1.0027021169662476,grad_norm: 0.8875773329046492, iteration: 424281
loss: 0.9868236184120178,grad_norm: 0.8482235376899782, iteration: 424282
loss: 1.0382457971572876,grad_norm: 0.8778825617047504, iteration: 424283
loss: 1.0397871732711792,grad_norm: 0.9999995016423098, iteration: 424284
loss: 1.0053857564926147,grad_norm: 0.849781420344318, iteration: 424285
loss: 1.0636464357376099,grad_norm: 0.7169767349061398, iteration: 424286
loss: 0.9622578024864197,grad_norm: 0.7618694410323616, iteration: 424287
loss: 0.9958805441856384,grad_norm: 0.679118892491696, iteration: 424288
loss: 1.0154869556427002,grad_norm: 0.999999782886607, iteration: 424289
loss: 0.9870304465293884,grad_norm: 0.7740057894690509, iteration: 424290
loss: 0.9982571005821228,grad_norm: 0.8488162071948556, iteration: 424291
loss: 1.0120887756347656,grad_norm: 0.8061271325927128, iteration: 424292
loss: 1.0518451929092407,grad_norm: 0.9999999615725715, iteration: 424293
loss: 1.0344384908676147,grad_norm: 0.9999990269863331, iteration: 424294
loss: 1.003265380859375,grad_norm: 0.6800543839993825, iteration: 424295
loss: 1.0019268989562988,grad_norm: 0.721897183094431, iteration: 424296
loss: 1.0139086246490479,grad_norm: 0.8116215804587477, iteration: 424297
loss: 1.0035669803619385,grad_norm: 0.7878634791831779, iteration: 424298
loss: 0.9619662761688232,grad_norm: 0.7638103999387287, iteration: 424299
loss: 0.9809794425964355,grad_norm: 0.8358800081516146, iteration: 424300
loss: 0.9910573959350586,grad_norm: 0.8955629870881517, iteration: 424301
loss: 1.0178035497665405,grad_norm: 0.9999999259494939, iteration: 424302
loss: 0.991576611995697,grad_norm: 0.7568161379383956, iteration: 424303
loss: 0.9830094575881958,grad_norm: 0.8612247381673978, iteration: 424304
loss: 0.9766390323638916,grad_norm: 0.8502366314067998, iteration: 424305
loss: 1.0082660913467407,grad_norm: 0.8188235955832444, iteration: 424306
loss: 0.9993338584899902,grad_norm: 0.7451662672578493, iteration: 424307
loss: 0.9597877264022827,grad_norm: 0.803239992237339, iteration: 424308
loss: 1.027273416519165,grad_norm: 0.7828811474027586, iteration: 424309
loss: 0.9811188578605652,grad_norm: 0.8257746505580107, iteration: 424310
loss: 1.013160228729248,grad_norm: 0.9999993404016341, iteration: 424311
loss: 0.992832601070404,grad_norm: 0.7165214700354844, iteration: 424312
loss: 1.1798261404037476,grad_norm: 0.9999997680258451, iteration: 424313
loss: 1.012303352355957,grad_norm: 0.7453198632854815, iteration: 424314
loss: 0.9826334714889526,grad_norm: 0.8389646595068702, iteration: 424315
loss: 1.00057053565979,grad_norm: 0.7983738107765467, iteration: 424316
loss: 0.9667909741401672,grad_norm: 0.707434582521116, iteration: 424317
loss: 0.9647238850593567,grad_norm: 0.7649868166314933, iteration: 424318
loss: 1.062583565711975,grad_norm: 0.9999993074700888, iteration: 424319
loss: 1.0401972532272339,grad_norm: 0.99999956339357, iteration: 424320
loss: 1.0030101537704468,grad_norm: 0.8609900326156645, iteration: 424321
loss: 0.9404378533363342,grad_norm: 0.7783441515700555, iteration: 424322
loss: 0.9710029363632202,grad_norm: 0.8120386964883077, iteration: 424323
loss: 0.9827688932418823,grad_norm: 0.9999988823136213, iteration: 424324
loss: 1.0644785165786743,grad_norm: 0.9999994609968526, iteration: 424325
loss: 0.992698609828949,grad_norm: 0.7734717999327847, iteration: 424326
loss: 1.0066077709197998,grad_norm: 0.708047271018283, iteration: 424327
loss: 0.984603762626648,grad_norm: 0.688730441182085, iteration: 424328
loss: 1.0086455345153809,grad_norm: 0.7825715725431573, iteration: 424329
loss: 1.0151629447937012,grad_norm: 0.8680595712119038, iteration: 424330
loss: 1.1195563077926636,grad_norm: 0.9999991068188664, iteration: 424331
loss: 1.0032591819763184,grad_norm: 0.9999998643204102, iteration: 424332
loss: 1.0665193796157837,grad_norm: 0.9999993658617375, iteration: 424333
loss: 1.0025269985198975,grad_norm: 0.79127454911586, iteration: 424334
loss: 1.0501551628112793,grad_norm: 0.7143454619485516, iteration: 424335
loss: 0.9885076880455017,grad_norm: 0.8229101455747504, iteration: 424336
loss: 0.9752643704414368,grad_norm: 0.845647928494864, iteration: 424337
loss: 1.050565481185913,grad_norm: 0.9999992636432126, iteration: 424338
loss: 1.0139755010604858,grad_norm: 0.7387089921374088, iteration: 424339
loss: 1.036855697631836,grad_norm: 0.9999998820837742, iteration: 424340
loss: 1.0570378303527832,grad_norm: 1.000000014670148, iteration: 424341
loss: 0.9938949346542358,grad_norm: 0.7937178970828596, iteration: 424342
loss: 0.9700539112091064,grad_norm: 0.7698332906494423, iteration: 424343
loss: 0.9775792360305786,grad_norm: 0.7860052494408138, iteration: 424344
loss: 1.0450165271759033,grad_norm: 0.9999995380068636, iteration: 424345
loss: 1.0507524013519287,grad_norm: 0.8367231835618858, iteration: 424346
loss: 1.0408347845077515,grad_norm: 0.869129701568292, iteration: 424347
loss: 0.9955634474754333,grad_norm: 0.7310550864298865, iteration: 424348
loss: 0.9595299959182739,grad_norm: 0.8686791756148237, iteration: 424349
loss: 1.0194579362869263,grad_norm: 0.9999996437556581, iteration: 424350
loss: 1.0339665412902832,grad_norm: 0.6610475202414251, iteration: 424351
loss: 1.0376540422439575,grad_norm: 0.9999993489441037, iteration: 424352
loss: 1.0518488883972168,grad_norm: 0.7389911583541698, iteration: 424353
loss: 0.959805965423584,grad_norm: 0.9510834774150905, iteration: 424354
loss: 1.023155689239502,grad_norm: 0.7172449198274318, iteration: 424355
loss: 1.1303223371505737,grad_norm: 0.9999999922197069, iteration: 424356
loss: 0.9954801797866821,grad_norm: 0.9999991869330331, iteration: 424357
loss: 0.9580304622650146,grad_norm: 0.8232914861513391, iteration: 424358
loss: 1.005262851715088,grad_norm: 0.6903813271200957, iteration: 424359
loss: 0.9883872270584106,grad_norm: 0.664824937269378, iteration: 424360
loss: 0.9985137581825256,grad_norm: 0.7865458813415328, iteration: 424361
loss: 1.036769151687622,grad_norm: 0.9999993329247147, iteration: 424362
loss: 1.0526974201202393,grad_norm: 0.714453043996631, iteration: 424363
loss: 1.01187002658844,grad_norm: 0.7562472824675505, iteration: 424364
loss: 1.0315617322921753,grad_norm: 0.9999995785551009, iteration: 424365
loss: 0.978610634803772,grad_norm: 0.7365845431668758, iteration: 424366
loss: 0.9720631241798401,grad_norm: 0.6853535346660997, iteration: 424367
loss: 1.0121700763702393,grad_norm: 0.9999992502810992, iteration: 424368
loss: 1.0033107995986938,grad_norm: 0.7019952849933189, iteration: 424369
loss: 0.9778778553009033,grad_norm: 0.7227002812219521, iteration: 424370
loss: 1.0376319885253906,grad_norm: 0.8369457126342346, iteration: 424371
loss: 0.9749547243118286,grad_norm: 0.6745367539561171, iteration: 424372
loss: 1.0001112222671509,grad_norm: 0.9999992723666675, iteration: 424373
loss: 1.0960841178894043,grad_norm: 0.8466457130763074, iteration: 424374
loss: 1.0109270811080933,grad_norm: 0.6912099599453434, iteration: 424375
loss: 0.9764447212219238,grad_norm: 0.9776745365587883, iteration: 424376
loss: 0.952807605266571,grad_norm: 0.7855539033163719, iteration: 424377
loss: 0.995069146156311,grad_norm: 0.8149386984740961, iteration: 424378
loss: 0.9970003366470337,grad_norm: 0.6755096609382167, iteration: 424379
loss: 1.0102667808532715,grad_norm: 0.7051224227207027, iteration: 424380
loss: 1.0215338468551636,grad_norm: 0.9999996030514766, iteration: 424381
loss: 1.015484094619751,grad_norm: 0.9179478727471562, iteration: 424382
loss: 0.9887902140617371,grad_norm: 0.8074523329064722, iteration: 424383
loss: 1.0151573419570923,grad_norm: 0.7024181835530248, iteration: 424384
loss: 1.0095477104187012,grad_norm: 0.9999994339332094, iteration: 424385
loss: 1.0193270444869995,grad_norm: 0.9352016749750715, iteration: 424386
loss: 0.990932047367096,grad_norm: 0.7024696730306567, iteration: 424387
loss: 0.9716442823410034,grad_norm: 0.6806760241168002, iteration: 424388
loss: 1.041854977607727,grad_norm: 0.9999995595100628, iteration: 424389
loss: 0.9705597162246704,grad_norm: 0.7231744437254791, iteration: 424390
loss: 1.0817983150482178,grad_norm: 0.8850742517205977, iteration: 424391
loss: 1.0721702575683594,grad_norm: 0.7347073227536001, iteration: 424392
loss: 1.0217280387878418,grad_norm: 0.803024333528317, iteration: 424393
loss: 0.9726788401603699,grad_norm: 0.6845491768303058, iteration: 424394
loss: 1.0035680532455444,grad_norm: 0.7359450398460341, iteration: 424395
loss: 0.9826377034187317,grad_norm: 0.7729696202600717, iteration: 424396
loss: 0.9941705465316772,grad_norm: 0.7895856797288869, iteration: 424397
loss: 1.171061396598816,grad_norm: 0.999999507740319, iteration: 424398
loss: 1.0021214485168457,grad_norm: 0.999998964659092, iteration: 424399
loss: 1.074172854423523,grad_norm: 0.9999992848690926, iteration: 424400
loss: 1.029404878616333,grad_norm: 0.7976964057012423, iteration: 424401
loss: 1.0310113430023193,grad_norm: 0.9410298667989371, iteration: 424402
loss: 1.0090452432632446,grad_norm: 0.757613090127161, iteration: 424403
loss: 0.9990941882133484,grad_norm: 0.7465146857727, iteration: 424404
loss: 1.0485591888427734,grad_norm: 0.7363803283590741, iteration: 424405
loss: 0.9775305986404419,grad_norm: 0.8428262455508934, iteration: 424406
loss: 0.9977880120277405,grad_norm: 0.6793989044564765, iteration: 424407
loss: 1.024588704109192,grad_norm: 0.6684578083925152, iteration: 424408
loss: 0.9755493998527527,grad_norm: 0.6559799370531124, iteration: 424409
loss: 1.270158290863037,grad_norm: 0.9999993769023712, iteration: 424410
loss: 1.0310136079788208,grad_norm: 0.9999999303963767, iteration: 424411
loss: 0.978546142578125,grad_norm: 0.7499571876563098, iteration: 424412
loss: 0.9868150949478149,grad_norm: 0.9179307941794304, iteration: 424413
loss: 1.007515549659729,grad_norm: 0.8212115455601692, iteration: 424414
loss: 1.0256539583206177,grad_norm: 0.631769146695681, iteration: 424415
loss: 1.0245083570480347,grad_norm: 0.8401839008839156, iteration: 424416
loss: 1.0044262409210205,grad_norm: 0.7768243040126622, iteration: 424417
loss: 1.0899338722229004,grad_norm: 0.9335702002500889, iteration: 424418
loss: 0.9883012771606445,grad_norm: 0.7688341688483553, iteration: 424419
loss: 1.015777587890625,grad_norm: 0.7165265874569499, iteration: 424420
loss: 1.0259727239608765,grad_norm: 0.7452515446933136, iteration: 424421
loss: 1.011873722076416,grad_norm: 0.9999990264407941, iteration: 424422
loss: 0.9783270955085754,grad_norm: 0.8585505018636982, iteration: 424423
loss: 1.0327670574188232,grad_norm: 0.9999989890775891, iteration: 424424
loss: 1.0127003192901611,grad_norm: 0.8458568611594633, iteration: 424425
loss: 0.9831453561782837,grad_norm: 0.7375117303322601, iteration: 424426
loss: 1.0121196508407593,grad_norm: 0.8213524087693203, iteration: 424427
loss: 0.9938839673995972,grad_norm: 0.9999995837193795, iteration: 424428
loss: 0.9820711612701416,grad_norm: 0.7174900881980085, iteration: 424429
loss: 1.0249483585357666,grad_norm: 0.7435263391464384, iteration: 424430
loss: 0.9994576573371887,grad_norm: 0.8413406411352866, iteration: 424431
loss: 1.0016865730285645,grad_norm: 0.9203402688534291, iteration: 424432
loss: 0.9803946614265442,grad_norm: 0.7371838553170991, iteration: 424433
loss: 1.0415014028549194,grad_norm: 0.765914979532019, iteration: 424434
loss: 0.9963980913162231,grad_norm: 0.7416206179558904, iteration: 424435
loss: 0.9585606455802917,grad_norm: 0.716739198790324, iteration: 424436
loss: 1.0030673742294312,grad_norm: 0.7510806709630093, iteration: 424437
loss: 0.9648523330688477,grad_norm: 0.660741787264294, iteration: 424438
loss: 1.0246576070785522,grad_norm: 0.858914223756759, iteration: 424439
loss: 0.956640899181366,grad_norm: 0.9250210837501434, iteration: 424440
loss: 0.9764995574951172,grad_norm: 0.9031309606610763, iteration: 424441
loss: 1.0122671127319336,grad_norm: 0.7273942032436365, iteration: 424442
loss: 1.0244791507720947,grad_norm: 0.8085536040175434, iteration: 424443
loss: 0.9773086905479431,grad_norm: 0.9162131521825588, iteration: 424444
loss: 1.001057505607605,grad_norm: 0.8031666400880535, iteration: 424445
loss: 0.9764657020568848,grad_norm: 0.896068595718702, iteration: 424446
loss: 0.9877327084541321,grad_norm: 0.7421421319468752, iteration: 424447
loss: 1.0245226621627808,grad_norm: 0.7711180362186042, iteration: 424448
loss: 1.0123100280761719,grad_norm: 0.6580457744328917, iteration: 424449
loss: 1.0186561346054077,grad_norm: 0.9465765362239372, iteration: 424450
loss: 1.0081123113632202,grad_norm: 0.7368731014672169, iteration: 424451
loss: 0.9872966408729553,grad_norm: 0.766762142521872, iteration: 424452
loss: 1.0204052925109863,grad_norm: 0.9999995209225533, iteration: 424453
loss: 0.9986323118209839,grad_norm: 0.7653000524007733, iteration: 424454
loss: 1.006571888923645,grad_norm: 0.7451631567654137, iteration: 424455
loss: 1.0348976850509644,grad_norm: 0.9999993902215341, iteration: 424456
loss: 1.0077520608901978,grad_norm: 0.9999999635865974, iteration: 424457
loss: 0.950570285320282,grad_norm: 0.7413793683164356, iteration: 424458
loss: 1.0502501726150513,grad_norm: 0.6907211503175471, iteration: 424459
loss: 0.9766403436660767,grad_norm: 0.6582970424134771, iteration: 424460
loss: 1.0118837356567383,grad_norm: 0.7494658111198708, iteration: 424461
loss: 0.9767972826957703,grad_norm: 0.6892943069576157, iteration: 424462
loss: 1.0118054151535034,grad_norm: 0.9205892559659693, iteration: 424463
loss: 0.976950466632843,grad_norm: 0.893489815071295, iteration: 424464
loss: 0.9916291832923889,grad_norm: 0.7010398338124312, iteration: 424465
loss: 1.0140900611877441,grad_norm: 0.7854342753587938, iteration: 424466
loss: 0.9734108448028564,grad_norm: 0.7054301823582921, iteration: 424467
loss: 1.0576858520507812,grad_norm: 0.9159204690949544, iteration: 424468
loss: 1.0121006965637207,grad_norm: 0.6862531128015329, iteration: 424469
loss: 0.9776824116706848,grad_norm: 0.7878738065778633, iteration: 424470
loss: 0.9903432726860046,grad_norm: 0.7225496344073746, iteration: 424471
loss: 0.9945340156555176,grad_norm: 0.849047594125367, iteration: 424472
loss: 1.0271165370941162,grad_norm: 0.8459484378019346, iteration: 424473
loss: 0.9884725213050842,grad_norm: 0.7764710503851474, iteration: 424474
loss: 0.954944908618927,grad_norm: 0.6517955348086792, iteration: 424475
loss: 1.0003252029418945,grad_norm: 0.8287619228169614, iteration: 424476
loss: 1.0333914756774902,grad_norm: 0.9326420923836692, iteration: 424477
loss: 0.9898935556411743,grad_norm: 0.9733914338773153, iteration: 424478
loss: 0.9980374574661255,grad_norm: 0.6989716625465225, iteration: 424479
loss: 0.9685335159301758,grad_norm: 0.6963222284351874, iteration: 424480
loss: 0.9519116282463074,grad_norm: 0.7848014843542874, iteration: 424481
loss: 1.000437617301941,grad_norm: 0.8822718348803108, iteration: 424482
loss: 0.9943093657493591,grad_norm: 0.76091582623565, iteration: 424483
loss: 1.0287660360336304,grad_norm: 0.765903481087081, iteration: 424484
loss: 0.9852879047393799,grad_norm: 0.7950304916222402, iteration: 424485
loss: 0.9874743223190308,grad_norm: 0.935582633873826, iteration: 424486
loss: 1.0177489519119263,grad_norm: 0.804528742362395, iteration: 424487
loss: 0.9773595929145813,grad_norm: 0.9999995142496134, iteration: 424488
loss: 0.9828271865844727,grad_norm: 0.8338703774647435, iteration: 424489
loss: 1.0370608568191528,grad_norm: 0.680072090363135, iteration: 424490
loss: 1.0170133113861084,grad_norm: 0.6657974286123335, iteration: 424491
loss: 0.9891719818115234,grad_norm: 0.9999992121020544, iteration: 424492
loss: 0.9994490742683411,grad_norm: 0.7163379539191357, iteration: 424493
loss: 1.0371922254562378,grad_norm: 0.9999998939091954, iteration: 424494
loss: 0.9641188979148865,grad_norm: 0.7722873440112291, iteration: 424495
loss: 0.9992861151695251,grad_norm: 0.9704327011622877, iteration: 424496
loss: 1.0270328521728516,grad_norm: 0.9999996929157879, iteration: 424497
loss: 1.014114499092102,grad_norm: 0.7161289754667474, iteration: 424498
loss: 0.9370193481445312,grad_norm: 0.6047011077325989, iteration: 424499
loss: 1.0312424898147583,grad_norm: 0.7945437214954475, iteration: 424500
loss: 0.9464969635009766,grad_norm: 0.7059082834187185, iteration: 424501
loss: 1.066501498222351,grad_norm: 0.6371133034528587, iteration: 424502
loss: 1.016776442527771,grad_norm: 0.8143148725684444, iteration: 424503
loss: 0.9603614211082458,grad_norm: 0.7753827111332596, iteration: 424504
loss: 0.9993290901184082,grad_norm: 0.7421873116516415, iteration: 424505
loss: 0.9641265869140625,grad_norm: 0.691645570529081, iteration: 424506
loss: 1.0421866178512573,grad_norm: 0.9999991899052977, iteration: 424507
loss: 0.9930822849273682,grad_norm: 0.6829133165055558, iteration: 424508
loss: 0.9824024438858032,grad_norm: 0.7274581464361753, iteration: 424509
loss: 1.0435516834259033,grad_norm: 0.7791675140114583, iteration: 424510
loss: 1.0082558393478394,grad_norm: 0.940671674898551, iteration: 424511
loss: 1.014595627784729,grad_norm: 0.6593644021764546, iteration: 424512
loss: 1.0740272998809814,grad_norm: 0.9999991819728501, iteration: 424513
loss: 1.0265085697174072,grad_norm: 0.7049182340778547, iteration: 424514
loss: 1.0253691673278809,grad_norm: 0.9999990379261989, iteration: 424515
loss: 0.9516820311546326,grad_norm: 0.8597186910808767, iteration: 424516
loss: 0.9791589379310608,grad_norm: 0.6506733073789693, iteration: 424517
loss: 1.0364704132080078,grad_norm: 0.7103690177498471, iteration: 424518
loss: 1.0170787572860718,grad_norm: 0.8383919665627777, iteration: 424519
loss: 1.0105412006378174,grad_norm: 0.6674080744522415, iteration: 424520
loss: 1.006277084350586,grad_norm: 0.6748168360478612, iteration: 424521
loss: 0.9810400009155273,grad_norm: 0.8723454042824058, iteration: 424522
loss: 1.0076600313186646,grad_norm: 0.8290648087776966, iteration: 424523
loss: 1.0206644535064697,grad_norm: 0.7179025114369975, iteration: 424524
loss: 1.3656541109085083,grad_norm: 0.9999996847669306, iteration: 424525
loss: 0.979704737663269,grad_norm: 0.6783806353859797, iteration: 424526
loss: 0.9800458550453186,grad_norm: 0.7747815046179714, iteration: 424527
loss: 1.0200035572052002,grad_norm: 0.7606512620253374, iteration: 424528
loss: 1.028416633605957,grad_norm: 0.9999999114234315, iteration: 424529
loss: 1.019762635231018,grad_norm: 0.8363130152935065, iteration: 424530
loss: 0.9969148635864258,grad_norm: 0.9462005467711503, iteration: 424531
loss: 1.0046015977859497,grad_norm: 0.8409233551286069, iteration: 424532
loss: 0.9948316216468811,grad_norm: 0.9775002910695446, iteration: 424533
loss: 1.032144546508789,grad_norm: 0.9999992962134738, iteration: 424534
loss: 1.027127742767334,grad_norm: 0.8181047815513594, iteration: 424535
loss: 1.0189775228500366,grad_norm: 0.8036434147571899, iteration: 424536
loss: 1.0355058908462524,grad_norm: 0.7469553566477438, iteration: 424537
loss: 0.9993377327919006,grad_norm: 0.9167717184286002, iteration: 424538
loss: 1.051340103149414,grad_norm: 0.9999992840733922, iteration: 424539
loss: 1.021114706993103,grad_norm: 0.9999996199559199, iteration: 424540
loss: 1.0043225288391113,grad_norm: 0.8520721465458956, iteration: 424541
loss: 1.0129362344741821,grad_norm: 0.7375601092746413, iteration: 424542
loss: 1.0651828050613403,grad_norm: 0.9999990992216771, iteration: 424543
loss: 0.9955188035964966,grad_norm: 0.9441143835645075, iteration: 424544
loss: 1.011153221130371,grad_norm: 0.6951558303391774, iteration: 424545
loss: 1.0084033012390137,grad_norm: 0.8871809282708104, iteration: 424546
loss: 1.0278204679489136,grad_norm: 0.9005708440293786, iteration: 424547
loss: 1.0720229148864746,grad_norm: 0.8761677805230625, iteration: 424548
loss: 0.9986522793769836,grad_norm: 0.6525519279907785, iteration: 424549
loss: 1.00661301612854,grad_norm: 0.6826626278924777, iteration: 424550
loss: 1.0591554641723633,grad_norm: 0.762966704497466, iteration: 424551
loss: 0.985883355140686,grad_norm: 0.716464223243617, iteration: 424552
loss: 1.0253181457519531,grad_norm: 0.8651433094497409, iteration: 424553
loss: 0.9895836114883423,grad_norm: 0.6783314746845454, iteration: 424554
loss: 1.0044167041778564,grad_norm: 0.8854730960439382, iteration: 424555
loss: 1.0244957208633423,grad_norm: 0.8510234530391233, iteration: 424556
loss: 1.0847867727279663,grad_norm: 0.9999992052532011, iteration: 424557
loss: 1.0654278993606567,grad_norm: 0.9999997107562805, iteration: 424558
loss: 1.0398989915847778,grad_norm: 0.7627350492078987, iteration: 424559
loss: 1.036512851715088,grad_norm: 0.9999991719941116, iteration: 424560
loss: 0.9782528281211853,grad_norm: 0.8179876413167444, iteration: 424561
loss: 1.0091071128845215,grad_norm: 0.7441861370979367, iteration: 424562
loss: 0.9792407155036926,grad_norm: 0.8573667495612555, iteration: 424563
loss: 1.0104732513427734,grad_norm: 0.7717758077294076, iteration: 424564
loss: 1.0796436071395874,grad_norm: 0.9999998752251441, iteration: 424565
loss: 0.9742897152900696,grad_norm: 0.9999990241183752, iteration: 424566
loss: 1.0036215782165527,grad_norm: 0.7745862627462429, iteration: 424567
loss: 0.9664708971977234,grad_norm: 0.6536495752266792, iteration: 424568
loss: 1.0066890716552734,grad_norm: 0.7961693068990354, iteration: 424569
loss: 1.0740238428115845,grad_norm: 0.7168530292306311, iteration: 424570
loss: 0.9736155867576599,grad_norm: 0.850248192309169, iteration: 424571
loss: 1.0517207384109497,grad_norm: 0.9999992779121009, iteration: 424572
loss: 1.0184409618377686,grad_norm: 0.9729565444930551, iteration: 424573
loss: 0.9815777540206909,grad_norm: 0.7570346905777273, iteration: 424574
loss: 1.0450282096862793,grad_norm: 0.8908008346847106, iteration: 424575
loss: 0.9877285361289978,grad_norm: 0.7709169845194727, iteration: 424576
loss: 1.0171771049499512,grad_norm: 0.9999998659889633, iteration: 424577
loss: 1.023142695426941,grad_norm: 0.7348564283411985, iteration: 424578
loss: 1.0287067890167236,grad_norm: 0.7161896502682025, iteration: 424579
loss: 1.0022627115249634,grad_norm: 0.6931851048049875, iteration: 424580
loss: 1.1108145713806152,grad_norm: 0.9999998835944335, iteration: 424581
loss: 1.0604459047317505,grad_norm: 0.9999994927883817, iteration: 424582
loss: 0.9839142560958862,grad_norm: 0.9999991543148403, iteration: 424583
loss: 0.9847468137741089,grad_norm: 0.9503315881219894, iteration: 424584
loss: 1.025192379951477,grad_norm: 0.8005635168201666, iteration: 424585
loss: 1.0541356801986694,grad_norm: 0.8889647613729943, iteration: 424586
loss: 1.0268138647079468,grad_norm: 0.7518770229433855, iteration: 424587
loss: 1.0732526779174805,grad_norm: 0.9434260509969722, iteration: 424588
loss: 1.001158595085144,grad_norm: 0.8744292456861839, iteration: 424589
loss: 1.0338647365570068,grad_norm: 0.9485520321575096, iteration: 424590
loss: 0.9526718258857727,grad_norm: 0.8121678980649872, iteration: 424591
loss: 0.9811341762542725,grad_norm: 0.924505439451878, iteration: 424592
loss: 1.0050692558288574,grad_norm: 0.6856047393332849, iteration: 424593
loss: 0.9988002777099609,grad_norm: 0.7518993945912562, iteration: 424594
loss: 1.081068992614746,grad_norm: 0.9999991794624844, iteration: 424595
loss: 0.9880258440971375,grad_norm: 0.7863264659096773, iteration: 424596
loss: 1.0085033178329468,grad_norm: 0.999999117347129, iteration: 424597
loss: 1.0042277574539185,grad_norm: 0.7754695532023731, iteration: 424598
loss: 0.991439163684845,grad_norm: 0.7806827169757294, iteration: 424599
loss: 1.0346344709396362,grad_norm: 0.8293918049831948, iteration: 424600
loss: 0.9820241332054138,grad_norm: 0.8603808534902576, iteration: 424601
loss: 1.0219804048538208,grad_norm: 0.7259326606587493, iteration: 424602
loss: 0.9952003955841064,grad_norm: 0.7120552437524094, iteration: 424603
loss: 1.0129259824752808,grad_norm: 0.8127055232466835, iteration: 424604
loss: 0.9787799715995789,grad_norm: 0.7314303568299286, iteration: 424605
loss: 1.0154780149459839,grad_norm: 0.7514775001637166, iteration: 424606
loss: 1.0110470056533813,grad_norm: 0.8978792564273091, iteration: 424607
loss: 0.9971270561218262,grad_norm: 0.8620170795691391, iteration: 424608
loss: 1.0162147283554077,grad_norm: 0.7649494549885787, iteration: 424609
loss: 0.9749902486801147,grad_norm: 0.729809660123058, iteration: 424610
loss: 1.0010839700698853,grad_norm: 0.8963989848262283, iteration: 424611
loss: 1.0293821096420288,grad_norm: 0.7387869213874247, iteration: 424612
loss: 0.9771817326545715,grad_norm: 0.9999990962098634, iteration: 424613
loss: 1.0520144701004028,grad_norm: 0.7498237235565506, iteration: 424614
loss: 1.001271367073059,grad_norm: 0.9999990855145973, iteration: 424615
loss: 0.9787917137145996,grad_norm: 0.8764500382747153, iteration: 424616
loss: 0.9845673441886902,grad_norm: 0.8518128186644618, iteration: 424617
loss: 0.9605849981307983,grad_norm: 0.8252337155616022, iteration: 424618
loss: 1.0069111585617065,grad_norm: 0.7707608483997219, iteration: 424619
loss: 1.0802714824676514,grad_norm: 0.9999992164714889, iteration: 424620
loss: 1.0351101160049438,grad_norm: 0.8385132697155853, iteration: 424621
loss: 1.074749231338501,grad_norm: 0.770448360181108, iteration: 424622
loss: 0.9616563320159912,grad_norm: 0.6517433674210519, iteration: 424623
loss: 1.0030587911605835,grad_norm: 0.9999996125085824, iteration: 424624
loss: 1.0464651584625244,grad_norm: 0.7298729154652406, iteration: 424625
loss: 0.9958637952804565,grad_norm: 0.8249368431880321, iteration: 424626
loss: 1.0047695636749268,grad_norm: 0.7503025545207249, iteration: 424627
loss: 1.0347049236297607,grad_norm: 0.9299811356370562, iteration: 424628
loss: 1.0154966115951538,grad_norm: 0.9999991753293106, iteration: 424629
loss: 0.9892885088920593,grad_norm: 0.8715213125601111, iteration: 424630
loss: 0.9916098117828369,grad_norm: 0.7915064188311582, iteration: 424631
loss: 0.9677829742431641,grad_norm: 0.756893965663312, iteration: 424632
loss: 1.0231692790985107,grad_norm: 0.8501637354137399, iteration: 424633
loss: 0.9910017251968384,grad_norm: 0.8074171772823084, iteration: 424634
loss: 1.0084257125854492,grad_norm: 0.6824001820251521, iteration: 424635
loss: 1.0281134843826294,grad_norm: 0.7197876678088605, iteration: 424636
loss: 1.0552606582641602,grad_norm: 0.9800421587720137, iteration: 424637
loss: 0.996086835861206,grad_norm: 0.7766871643006638, iteration: 424638
loss: 1.1583874225616455,grad_norm: 0.9999999832866496, iteration: 424639
loss: 0.9683803915977478,grad_norm: 0.7722902849364287, iteration: 424640
loss: 1.000807762145996,grad_norm: 0.6902305744585806, iteration: 424641
loss: 1.0330710411071777,grad_norm: 0.9999998602265854, iteration: 424642
loss: 1.0254387855529785,grad_norm: 0.8563298651306331, iteration: 424643
loss: 1.0458877086639404,grad_norm: 0.8419805571314546, iteration: 424644
loss: 0.956182599067688,grad_norm: 0.7959124178791385, iteration: 424645
loss: 0.9992543458938599,grad_norm: 0.6622967789479508, iteration: 424646
loss: 1.024698257446289,grad_norm: 0.9068658624890692, iteration: 424647
loss: 1.0150115489959717,grad_norm: 0.6868173528620106, iteration: 424648
loss: 1.0258828401565552,grad_norm: 0.9999992143154953, iteration: 424649
loss: 0.9671509861946106,grad_norm: 0.7348401930102126, iteration: 424650
loss: 0.9602756500244141,grad_norm: 0.9999991771507015, iteration: 424651
loss: 0.9635478258132935,grad_norm: 0.7510975291923044, iteration: 424652
loss: 1.0300407409667969,grad_norm: 0.751010479027469, iteration: 424653
loss: 1.1110445261001587,grad_norm: 0.999999387322613, iteration: 424654
loss: 1.0884120464324951,grad_norm: 0.9999991369799093, iteration: 424655
loss: 1.0308722257614136,grad_norm: 0.9470867790947277, iteration: 424656
loss: 1.0041558742523193,grad_norm: 0.7776221394915095, iteration: 424657
loss: 1.0162581205368042,grad_norm: 0.8024274538893651, iteration: 424658
loss: 1.0017247200012207,grad_norm: 0.9999991267042441, iteration: 424659
loss: 1.0112301111221313,grad_norm: 1.000000020206222, iteration: 424660
loss: 1.0207902193069458,grad_norm: 0.8242738395312307, iteration: 424661
loss: 1.0199108123779297,grad_norm: 0.6864129993755209, iteration: 424662
loss: 1.0055508613586426,grad_norm: 0.7972734041830023, iteration: 424663
loss: 1.000223159790039,grad_norm: 0.789870001501234, iteration: 424664
loss: 1.0303329229354858,grad_norm: 0.642307174236298, iteration: 424665
loss: 0.9925212860107422,grad_norm: 0.7870768415910425, iteration: 424666
loss: 1.0026097297668457,grad_norm: 0.7191081792470176, iteration: 424667
loss: 0.9833813309669495,grad_norm: 0.708747135903467, iteration: 424668
loss: 1.0145328044891357,grad_norm: 0.7755221826519175, iteration: 424669
loss: 1.1296051740646362,grad_norm: 0.9999998306053738, iteration: 424670
loss: 1.0667790174484253,grad_norm: 0.8108121478876286, iteration: 424671
loss: 1.0077468156814575,grad_norm: 0.9124809553456713, iteration: 424672
loss: 0.9792538285255432,grad_norm: 0.8127420023351287, iteration: 424673
loss: 0.9896109104156494,grad_norm: 0.7720871008596122, iteration: 424674
loss: 0.9765093922615051,grad_norm: 0.9547608705739038, iteration: 424675
loss: 1.1090975999832153,grad_norm: 0.999999715473541, iteration: 424676
loss: 1.0124824047088623,grad_norm: 0.638491938405741, iteration: 424677
loss: 0.9999957084655762,grad_norm: 0.720342343018016, iteration: 424678
loss: 0.9697623252868652,grad_norm: 0.7902849026617685, iteration: 424679
loss: 1.0070675611495972,grad_norm: 0.7345174706068379, iteration: 424680
loss: 0.9862842559814453,grad_norm: 0.9355347584108787, iteration: 424681
loss: 0.9912777543067932,grad_norm: 0.7564949888218417, iteration: 424682
loss: 1.048681616783142,grad_norm: 0.7053131064469449, iteration: 424683
loss: 1.0686157941818237,grad_norm: 0.9999989547841921, iteration: 424684
loss: 1.0126993656158447,grad_norm: 0.6894286978754111, iteration: 424685
loss: 0.9885988831520081,grad_norm: 0.7150244980452208, iteration: 424686
loss: 1.0090384483337402,grad_norm: 0.7142323614654854, iteration: 424687
loss: 1.0210392475128174,grad_norm: 0.7457228001426804, iteration: 424688
loss: 1.0197666883468628,grad_norm: 0.7796662562357987, iteration: 424689
loss: 1.016908049583435,grad_norm: 0.7424396671373047, iteration: 424690
loss: 1.0357664823532104,grad_norm: 1.00000003815433, iteration: 424691
loss: 1.0691643953323364,grad_norm: 0.8296044133454765, iteration: 424692
loss: 0.9932025671005249,grad_norm: 0.7412496941084943, iteration: 424693
loss: 0.9844494462013245,grad_norm: 0.7687516476204423, iteration: 424694
loss: 0.9840080738067627,grad_norm: 0.7550157206353537, iteration: 424695
loss: 0.9840443134307861,grad_norm: 0.8649090780131935, iteration: 424696
loss: 0.9484641551971436,grad_norm: 0.7659137769184794, iteration: 424697
loss: 0.9769864082336426,grad_norm: 0.7034127406307447, iteration: 424698
loss: 0.9767595529556274,grad_norm: 0.6187404448058276, iteration: 424699
loss: 1.006324291229248,grad_norm: 0.814282214539083, iteration: 424700
loss: 0.9810763001441956,grad_norm: 0.8687883121551618, iteration: 424701
loss: 0.9997519850730896,grad_norm: 0.7573535099739117, iteration: 424702
loss: 0.9599285125732422,grad_norm: 0.8803021726369518, iteration: 424703
loss: 0.9830731153488159,grad_norm: 0.7326439593545911, iteration: 424704
loss: 1.0083949565887451,grad_norm: 0.9166349335208477, iteration: 424705
loss: 0.9775851368904114,grad_norm: 0.8409796018283185, iteration: 424706
loss: 0.9765992760658264,grad_norm: 0.8203998758989928, iteration: 424707
loss: 1.0242478847503662,grad_norm: 0.8013492107267417, iteration: 424708
loss: 1.0003679990768433,grad_norm: 0.9588917039925823, iteration: 424709
loss: 0.9449515342712402,grad_norm: 0.8258131398834059, iteration: 424710
loss: 0.9626308679580688,grad_norm: 0.807560542866577, iteration: 424711
loss: 0.9532631039619446,grad_norm: 0.6423352935435976, iteration: 424712
loss: 0.9870898723602295,grad_norm: 0.8727501376537278, iteration: 424713
loss: 0.9939982295036316,grad_norm: 0.7723230896777518, iteration: 424714
loss: 0.9961071610450745,grad_norm: 0.7422129099140923, iteration: 424715
loss: 0.9962066411972046,grad_norm: 0.7543117330300141, iteration: 424716
loss: 1.0010675191879272,grad_norm: 0.6865448167803276, iteration: 424717
loss: 1.026200294494629,grad_norm: 0.7859791008305625, iteration: 424718
loss: 1.005011796951294,grad_norm: 0.6887938553438044, iteration: 424719
loss: 1.0233967304229736,grad_norm: 0.7995989321691386, iteration: 424720
loss: 1.0047515630722046,grad_norm: 0.9622434216700068, iteration: 424721
loss: 0.9912775158882141,grad_norm: 0.8306534111658681, iteration: 424722
loss: 1.0243184566497803,grad_norm: 0.7961978620187813, iteration: 424723
loss: 0.9785054326057434,grad_norm: 0.7237170607632896, iteration: 424724
loss: 0.9701476097106934,grad_norm: 0.7472481321340614, iteration: 424725
loss: 1.0112853050231934,grad_norm: 0.8218205776139769, iteration: 424726
loss: 0.9677610993385315,grad_norm: 0.7183904832414245, iteration: 424727
loss: 1.022624135017395,grad_norm: 0.7324346218856157, iteration: 424728
loss: 0.9842750430107117,grad_norm: 0.9481016669563966, iteration: 424729
loss: 1.028599500656128,grad_norm: 0.7690070093001842, iteration: 424730
loss: 1.0527902841567993,grad_norm: 0.837805713708895, iteration: 424731
loss: 1.0564311742782593,grad_norm: 0.9999999914767007, iteration: 424732
loss: 1.017770528793335,grad_norm: 0.9999990740020327, iteration: 424733
loss: 1.0023640394210815,grad_norm: 0.7473974040412011, iteration: 424734
loss: 1.0070334672927856,grad_norm: 0.8987411275540905, iteration: 424735
loss: 0.9824349880218506,grad_norm: 0.6796241845014664, iteration: 424736
loss: 0.9600207805633545,grad_norm: 0.7383699348235198, iteration: 424737
loss: 0.9822500944137573,grad_norm: 0.7305498779403161, iteration: 424738
loss: 0.9604504108428955,grad_norm: 0.7182028741976342, iteration: 424739
loss: 1.083667278289795,grad_norm: 0.7344020678156845, iteration: 424740
loss: 0.9783120155334473,grad_norm: 0.999999876930303, iteration: 424741
loss: 0.9602740406990051,grad_norm: 0.7987184009085295, iteration: 424742
loss: 0.9995383024215698,grad_norm: 0.8687438317363774, iteration: 424743
loss: 1.060020923614502,grad_norm: 0.7283424174328393, iteration: 424744
loss: 0.9566232562065125,grad_norm: 0.8615063841322929, iteration: 424745
loss: 0.9386391639709473,grad_norm: 0.7374698033521221, iteration: 424746
loss: 0.9936547875404358,grad_norm: 0.9999993383840227, iteration: 424747
loss: 0.9836652874946594,grad_norm: 0.854612187204337, iteration: 424748
loss: 0.9898203611373901,grad_norm: 0.7830721970683535, iteration: 424749
loss: 1.0084296464920044,grad_norm: 0.7476125884860993, iteration: 424750
loss: 1.0265921354293823,grad_norm: 0.7072406349942334, iteration: 424751
loss: 0.9831259846687317,grad_norm: 0.834444163564789, iteration: 424752
loss: 1.0478743314743042,grad_norm: 0.6306201759003328, iteration: 424753
loss: 1.018600344657898,grad_norm: 0.9999997897874967, iteration: 424754
loss: 1.0409873723983765,grad_norm: 0.8778570158548842, iteration: 424755
loss: 1.0008225440979004,grad_norm: 0.7990520620361499, iteration: 424756
loss: 0.9730821847915649,grad_norm: 0.7945499283404878, iteration: 424757
loss: 1.0246022939682007,grad_norm: 0.8008081252105486, iteration: 424758
loss: 1.0170940160751343,grad_norm: 0.9660934988918872, iteration: 424759
loss: 1.0336271524429321,grad_norm: 0.9126859136580993, iteration: 424760
loss: 1.00265634059906,grad_norm: 0.6819937464543464, iteration: 424761
loss: 1.015305757522583,grad_norm: 0.7597202141522046, iteration: 424762
loss: 1.0397197008132935,grad_norm: 0.7688506053773106, iteration: 424763
loss: 1.0206904411315918,grad_norm: 0.8061397544202542, iteration: 424764
loss: 1.0632001161575317,grad_norm: 0.7362112673330415, iteration: 424765
loss: 1.0125073194503784,grad_norm: 0.8127289621414221, iteration: 424766
loss: 1.0380536317825317,grad_norm: 0.9999999447177915, iteration: 424767
loss: 1.064303994178772,grad_norm: 0.971938733003585, iteration: 424768
loss: 1.0241427421569824,grad_norm: 0.7488319419751528, iteration: 424769
loss: 1.0115025043487549,grad_norm: 0.7080954405321989, iteration: 424770
loss: 0.992726743221283,grad_norm: 0.9829125969444619, iteration: 424771
loss: 0.9740391969680786,grad_norm: 0.8867452342637657, iteration: 424772
loss: 0.9917622804641724,grad_norm: 0.7954503713454321, iteration: 424773
loss: 1.0144816637039185,grad_norm: 0.9999998690346729, iteration: 424774
loss: 0.986043393611908,grad_norm: 0.8153380144944593, iteration: 424775
loss: 1.0257892608642578,grad_norm: 0.7204765660107619, iteration: 424776
loss: 0.9560753703117371,grad_norm: 0.7855718470914168, iteration: 424777
loss: 0.9789084196090698,grad_norm: 0.7004253453623069, iteration: 424778
loss: 1.0313316583633423,grad_norm: 0.912495772634497, iteration: 424779
loss: 1.0240533351898193,grad_norm: 0.7541555367345394, iteration: 424780
loss: 0.9481514692306519,grad_norm: 0.6332594819794576, iteration: 424781
loss: 0.9983851313591003,grad_norm: 0.8092449989589989, iteration: 424782
loss: 1.0709978342056274,grad_norm: 0.8135175490665368, iteration: 424783
loss: 0.9941264986991882,grad_norm: 0.6969381033787264, iteration: 424784
loss: 1.0674322843551636,grad_norm: 0.9733709704298233, iteration: 424785
loss: 1.0190722942352295,grad_norm: 0.7352502712452316, iteration: 424786
loss: 1.0463823080062866,grad_norm: 0.7824108714621777, iteration: 424787
loss: 0.9555416703224182,grad_norm: 0.6766013325856, iteration: 424788
loss: 1.0477501153945923,grad_norm: 0.9999999401500872, iteration: 424789
loss: 1.0142101049423218,grad_norm: 0.7442368754852171, iteration: 424790
loss: 0.991151750087738,grad_norm: 0.9642726248039387, iteration: 424791
loss: 1.0395820140838623,grad_norm: 0.8162909958717993, iteration: 424792
loss: 0.9811070561408997,grad_norm: 0.7180022233234775, iteration: 424793
loss: 0.985813319683075,grad_norm: 0.7670058880004941, iteration: 424794
loss: 1.0156383514404297,grad_norm: 0.84704638534585, iteration: 424795
loss: 1.0700656175613403,grad_norm: 0.8862374115487234, iteration: 424796
loss: 1.0968499183654785,grad_norm: 0.8180863175062977, iteration: 424797
loss: 1.0219475030899048,grad_norm: 0.9999998891930854, iteration: 424798
loss: 1.043819785118103,grad_norm: 0.9636543357782633, iteration: 424799
loss: 1.0430445671081543,grad_norm: 0.9296470652565796, iteration: 424800
loss: 0.9819468855857849,grad_norm: 0.7284726740872468, iteration: 424801
loss: 0.9950219988822937,grad_norm: 0.9999998106892557, iteration: 424802
loss: 1.0047417879104614,grad_norm: 0.7678619761915441, iteration: 424803
loss: 1.0053324699401855,grad_norm: 0.8072287940488506, iteration: 424804
loss: 0.9629220366477966,grad_norm: 0.7731366348250037, iteration: 424805
loss: 1.045615553855896,grad_norm: 0.8070602505902094, iteration: 424806
loss: 0.98810875415802,grad_norm: 0.7999335799874637, iteration: 424807
loss: 1.0148917436599731,grad_norm: 0.9018767782611874, iteration: 424808
loss: 1.020135760307312,grad_norm: 0.8631601085507302, iteration: 424809
loss: 0.993732750415802,grad_norm: 0.5959213712542386, iteration: 424810
loss: 1.0606989860534668,grad_norm: 0.9999998485067209, iteration: 424811
loss: 0.993601381778717,grad_norm: 0.67515070729597, iteration: 424812
loss: 1.0432003736495972,grad_norm: 0.9059661238300893, iteration: 424813
loss: 1.0103517770767212,grad_norm: 0.7199495014507248, iteration: 424814
loss: 1.0317317247390747,grad_norm: 0.9179579551567676, iteration: 424815
loss: 0.9868668913841248,grad_norm: 0.9999991766407027, iteration: 424816
loss: 0.9752851128578186,grad_norm: 0.6811099350321348, iteration: 424817
loss: 1.0012593269348145,grad_norm: 0.86590654920595, iteration: 424818
loss: 0.9860419034957886,grad_norm: 0.7333770575378413, iteration: 424819
loss: 0.9947205185890198,grad_norm: 0.839105604257554, iteration: 424820
loss: 1.016174554824829,grad_norm: 0.6240351882505307, iteration: 424821
loss: 0.9990622401237488,grad_norm: 0.9194622424777185, iteration: 424822
loss: 1.0374314785003662,grad_norm: 0.9999995011728988, iteration: 424823
loss: 0.997082531452179,grad_norm: 0.7981370613678033, iteration: 424824
loss: 1.0488349199295044,grad_norm: 0.9999998262874599, iteration: 424825
loss: 1.0103895664215088,grad_norm: 0.6942888352023975, iteration: 424826
loss: 1.0533874034881592,grad_norm: 0.7745073079713095, iteration: 424827
loss: 1.1077172756195068,grad_norm: 0.999999290828099, iteration: 424828
loss: 1.018365740776062,grad_norm: 0.9176763190234919, iteration: 424829
loss: 0.9815746545791626,grad_norm: 0.7858796485630425, iteration: 424830
loss: 0.9922140836715698,grad_norm: 0.7394782460752086, iteration: 424831
loss: 0.9874423146247864,grad_norm: 0.7175487307680749, iteration: 424832
loss: 1.0118650197982788,grad_norm: 0.6934220220671664, iteration: 424833
loss: 1.0360352993011475,grad_norm: 0.7953292851387863, iteration: 424834
loss: 1.0587412118911743,grad_norm: 0.9754888253584262, iteration: 424835
loss: 1.0376267433166504,grad_norm: 0.8687743233359148, iteration: 424836
loss: 0.9764269590377808,grad_norm: 0.8394784158713987, iteration: 424837
loss: 1.002121925354004,grad_norm: 0.7249932905624934, iteration: 424838
loss: 1.0760087966918945,grad_norm: 0.9999991660514497, iteration: 424839
loss: 1.0310529470443726,grad_norm: 0.7287972206850086, iteration: 424840
loss: 0.9936636686325073,grad_norm: 0.7653685746401108, iteration: 424841
loss: 0.9494497776031494,grad_norm: 0.7570245022681431, iteration: 424842
loss: 0.9877869486808777,grad_norm: 0.6895055367861166, iteration: 424843
loss: 1.089402437210083,grad_norm: 0.9999995687329635, iteration: 424844
loss: 1.060691475868225,grad_norm: 0.9999992281916199, iteration: 424845
loss: 1.0109620094299316,grad_norm: 0.693734010967053, iteration: 424846
loss: 1.080446481704712,grad_norm: 0.7944357359787025, iteration: 424847
loss: 0.9750060439109802,grad_norm: 0.7377680867023714, iteration: 424848
loss: 1.0216087102890015,grad_norm: 0.878518097444205, iteration: 424849
loss: 0.9958182573318481,grad_norm: 0.7868131607018023, iteration: 424850
loss: 0.9963411092758179,grad_norm: 0.6657287341525054, iteration: 424851
loss: 0.9859582781791687,grad_norm: 0.7074020356612989, iteration: 424852
loss: 1.0780044794082642,grad_norm: 0.9999999141897413, iteration: 424853
loss: 1.0234323740005493,grad_norm: 0.9290454829010748, iteration: 424854
loss: 0.998077929019928,grad_norm: 0.8406364763946234, iteration: 424855
loss: 1.0037636756896973,grad_norm: 0.8499648738527736, iteration: 424856
loss: 1.0071361064910889,grad_norm: 0.9999998743998858, iteration: 424857
loss: 1.033226728439331,grad_norm: 0.9098176762548217, iteration: 424858
loss: 0.9936808347702026,grad_norm: 0.9207998663887308, iteration: 424859
loss: 0.9940503835678101,grad_norm: 0.9999991895658872, iteration: 424860
loss: 0.9608898758888245,grad_norm: 0.7230128474063569, iteration: 424861
loss: 1.035578966140747,grad_norm: 0.9600406543995417, iteration: 424862
loss: 0.990168035030365,grad_norm: 0.999999284354247, iteration: 424863
loss: 1.004471778869629,grad_norm: 0.8546379205593696, iteration: 424864
loss: 0.98088538646698,grad_norm: 0.7476547284757649, iteration: 424865
loss: 0.9957694411277771,grad_norm: 0.8694952972732582, iteration: 424866
loss: 1.0420637130737305,grad_norm: 0.7561571001343008, iteration: 424867
loss: 0.9924299120903015,grad_norm: 0.8870644481826206, iteration: 424868
loss: 0.9941234588623047,grad_norm: 0.813645365530916, iteration: 424869
loss: 1.0659544467926025,grad_norm: 0.9999994106993778, iteration: 424870
loss: 0.995996356010437,grad_norm: 0.8167982271803997, iteration: 424871
loss: 0.9749679565429688,grad_norm: 0.9999996944734464, iteration: 424872
loss: 1.0048907995224,grad_norm: 0.7511417421428437, iteration: 424873
loss: 1.0110830068588257,grad_norm: 0.6937228789693024, iteration: 424874
loss: 1.0597584247589111,grad_norm: 0.999999189908034, iteration: 424875
loss: 1.0007539987564087,grad_norm: 0.7906825725303128, iteration: 424876
loss: 1.0203510522842407,grad_norm: 0.8462711677257667, iteration: 424877
loss: 0.9980556964874268,grad_norm: 0.9778889703268283, iteration: 424878
loss: 0.9957839250564575,grad_norm: 0.8743227181482875, iteration: 424879
loss: 1.0316659212112427,grad_norm: 0.9117819486610478, iteration: 424880
loss: 1.0090274810791016,grad_norm: 0.6399394715175739, iteration: 424881
loss: 1.0739368200302124,grad_norm: 0.999999163746852, iteration: 424882
loss: 0.9857679605484009,grad_norm: 0.7180616405591241, iteration: 424883
loss: 1.0140639543533325,grad_norm: 0.9999991597606949, iteration: 424884
loss: 0.9642186164855957,grad_norm: 0.7459501037033025, iteration: 424885
loss: 1.0609021186828613,grad_norm: 0.9999999751342127, iteration: 424886
loss: 0.9991201162338257,grad_norm: 0.8794237281382907, iteration: 424887
loss: 1.0045276880264282,grad_norm: 0.9169788075304186, iteration: 424888
loss: 1.0043610334396362,grad_norm: 0.7116430084807638, iteration: 424889
loss: 0.9985798597335815,grad_norm: 0.7568116329183718, iteration: 424890
loss: 1.0201427936553955,grad_norm: 0.70761800646827, iteration: 424891
loss: 0.974661648273468,grad_norm: 0.7581761147541453, iteration: 424892
loss: 0.9888332486152649,grad_norm: 0.807700400209869, iteration: 424893
loss: 1.043618083000183,grad_norm: 0.7205313463411056, iteration: 424894
loss: 0.9583380818367004,grad_norm: 0.7449752640508825, iteration: 424895
loss: 1.0105469226837158,grad_norm: 0.7654179450801178, iteration: 424896
loss: 1.0641167163848877,grad_norm: 0.9999995034535794, iteration: 424897
loss: 1.0166916847229004,grad_norm: 0.7861043593345842, iteration: 424898
loss: 0.9996059536933899,grad_norm: 0.7319444273683657, iteration: 424899
loss: 1.0017789602279663,grad_norm: 0.9999998600688379, iteration: 424900
loss: 1.0074256658554077,grad_norm: 0.7378294967560234, iteration: 424901
loss: 0.9872956871986389,grad_norm: 0.7795267451115208, iteration: 424902
loss: 1.02472984790802,grad_norm: 0.7313088952403101, iteration: 424903
loss: 0.9846370816230774,grad_norm: 0.8112985641894144, iteration: 424904
loss: 0.9950810670852661,grad_norm: 0.6680265743716629, iteration: 424905
loss: 1.021298885345459,grad_norm: 0.8103690499185282, iteration: 424906
loss: 0.9769154787063599,grad_norm: 0.676348028006812, iteration: 424907
loss: 1.0069074630737305,grad_norm: 0.834277630219482, iteration: 424908
loss: 1.0242336988449097,grad_norm: 0.999999770631788, iteration: 424909
loss: 0.9995893239974976,grad_norm: 0.9999990875691954, iteration: 424910
loss: 0.9890886545181274,grad_norm: 0.873187168862614, iteration: 424911
loss: 1.0629336833953857,grad_norm: 1.0000000413011982, iteration: 424912
loss: 1.037533164024353,grad_norm: 0.7422355421827914, iteration: 424913
loss: 0.992457389831543,grad_norm: 0.847709632763574, iteration: 424914
loss: 0.9863638877868652,grad_norm: 0.6779492108182734, iteration: 424915
loss: 0.9770957827568054,grad_norm: 0.9999993457576664, iteration: 424916
loss: 0.9928049445152283,grad_norm: 0.7935352124169762, iteration: 424917
loss: 1.0349006652832031,grad_norm: 0.8507993039131141, iteration: 424918
loss: 1.0254684686660767,grad_norm: 0.8197082057891208, iteration: 424919
loss: 1.0721266269683838,grad_norm: 0.9999989969053346, iteration: 424920
loss: 1.0093623399734497,grad_norm: 0.8950236800657381, iteration: 424921
loss: 1.0198832750320435,grad_norm: 0.9448745641175834, iteration: 424922
loss: 1.019206166267395,grad_norm: 0.8858396579535452, iteration: 424923
loss: 1.0295588970184326,grad_norm: 0.7872011221245818, iteration: 424924
loss: 0.9931406378746033,grad_norm: 0.7763374400583549, iteration: 424925
loss: 0.989431619644165,grad_norm: 0.6706845503574018, iteration: 424926
loss: 1.0131144523620605,grad_norm: 0.7531373517988325, iteration: 424927
loss: 1.0444735288619995,grad_norm: 0.928301989102729, iteration: 424928
loss: 1.017863154411316,grad_norm: 0.9856809506515756, iteration: 424929
loss: 1.0264674425125122,grad_norm: 0.8083949837510609, iteration: 424930
loss: 1.0275791883468628,grad_norm: 0.8214953356947098, iteration: 424931
loss: 1.0053765773773193,grad_norm: 0.7820983953996271, iteration: 424932
loss: 1.0233596563339233,grad_norm: 0.6627739177140309, iteration: 424933
loss: 0.991981029510498,grad_norm: 0.7301698556521686, iteration: 424934
loss: 0.9898506999015808,grad_norm: 0.832549776739604, iteration: 424935
loss: 1.009372353553772,grad_norm: 0.9999998124488457, iteration: 424936
loss: 1.0266164541244507,grad_norm: 0.9999997141595327, iteration: 424937
loss: 0.9831261038780212,grad_norm: 0.7848858490613838, iteration: 424938
loss: 1.044805645942688,grad_norm: 0.9026515440794256, iteration: 424939
loss: 1.021094560623169,grad_norm: 0.6584127949345613, iteration: 424940
loss: 0.9726689457893372,grad_norm: 0.9999992891930403, iteration: 424941
loss: 0.9847927093505859,grad_norm: 0.8791271137711139, iteration: 424942
loss: 1.0234403610229492,grad_norm: 0.9999993167178017, iteration: 424943
loss: 1.063283920288086,grad_norm: 0.8591306362928426, iteration: 424944
loss: 0.9887908101081848,grad_norm: 0.7712625575459746, iteration: 424945
loss: 0.9795244932174683,grad_norm: 0.7367012927255324, iteration: 424946
loss: 1.0200210809707642,grad_norm: 0.7940306200910995, iteration: 424947
loss: 1.1080067157745361,grad_norm: 0.9999989788917434, iteration: 424948
loss: 1.0685484409332275,grad_norm: 0.9331493608847661, iteration: 424949
loss: 0.9895517826080322,grad_norm: 0.8089932510034673, iteration: 424950
loss: 1.0392520427703857,grad_norm: 0.7901868213682128, iteration: 424951
loss: 0.9503579139709473,grad_norm: 0.7679070558665244, iteration: 424952
loss: 0.9834660887718201,grad_norm: 0.8284822780539959, iteration: 424953
loss: 0.9827193021774292,grad_norm: 0.7603030139699533, iteration: 424954
loss: 0.9901047945022583,grad_norm: 0.7540010475953317, iteration: 424955
loss: 0.9958270788192749,grad_norm: 0.8467809152685405, iteration: 424956
loss: 0.9873053431510925,grad_norm: 0.706286290888591, iteration: 424957
loss: 0.9858900308609009,grad_norm: 0.6732878265052276, iteration: 424958
loss: 0.9958106279373169,grad_norm: 0.7187192942265661, iteration: 424959
loss: 1.0182852745056152,grad_norm: 0.9999990985116692, iteration: 424960
loss: 0.9981478452682495,grad_norm: 0.6834259663442629, iteration: 424961
loss: 0.9704104661941528,grad_norm: 0.6858439943444498, iteration: 424962
loss: 1.0305664539337158,grad_norm: 0.9999990935049788, iteration: 424963
loss: 1.0190812349319458,grad_norm: 0.8244584691324152, iteration: 424964
loss: 1.0002723932266235,grad_norm: 0.6552610631010143, iteration: 424965
loss: 0.9993356466293335,grad_norm: 0.6424690495139752, iteration: 424966
loss: 0.9870785474777222,grad_norm: 0.7153529727630783, iteration: 424967
loss: 1.0001899003982544,grad_norm: 0.8022979550936353, iteration: 424968
loss: 1.0302647352218628,grad_norm: 0.8523934504979072, iteration: 424969
loss: 0.9972409009933472,grad_norm: 0.9871045924744541, iteration: 424970
loss: 1.0131481885910034,grad_norm: 0.8128789869304314, iteration: 424971
loss: 1.012279987335205,grad_norm: 0.86486359466909, iteration: 424972
loss: 0.9997076988220215,grad_norm: 0.7758921715127205, iteration: 424973
loss: 1.0461446046829224,grad_norm: 0.9999994681162581, iteration: 424974
loss: 1.035866618156433,grad_norm: 0.7469718015051231, iteration: 424975
loss: 1.0163601636886597,grad_norm: 0.6446343853169271, iteration: 424976
loss: 1.0088603496551514,grad_norm: 0.8125550000670929, iteration: 424977
loss: 1.0409871339797974,grad_norm: 0.6985688808040016, iteration: 424978
loss: 0.9621127843856812,grad_norm: 0.8404257116295718, iteration: 424979
loss: 1.044039249420166,grad_norm: 0.99999920553275, iteration: 424980
loss: 1.0179005861282349,grad_norm: 0.7565018042037499, iteration: 424981
loss: 0.95995032787323,grad_norm: 0.7276900896748842, iteration: 424982
loss: 1.0022913217544556,grad_norm: 0.7944351482717663, iteration: 424983
loss: 1.0182477235794067,grad_norm: 0.9999991503618203, iteration: 424984
loss: 1.0193846225738525,grad_norm: 0.951435990774562, iteration: 424985
loss: 0.9994063973426819,grad_norm: 0.7213854920738264, iteration: 424986
loss: 0.9871081113815308,grad_norm: 0.7383650085986377, iteration: 424987
loss: 1.0008339881896973,grad_norm: 0.9270149573642362, iteration: 424988
loss: 1.0151526927947998,grad_norm: 0.9863177947975915, iteration: 424989
loss: 0.9827050566673279,grad_norm: 0.9999993972885115, iteration: 424990
loss: 0.9855943322181702,grad_norm: 0.873230661021178, iteration: 424991
loss: 1.0069581270217896,grad_norm: 0.7756466690141424, iteration: 424992
loss: 1.0674806833267212,grad_norm: 0.7654195107781987, iteration: 424993
loss: 0.9711681008338928,grad_norm: 0.7233128581016841, iteration: 424994
loss: 1.0159369707107544,grad_norm: 0.9999990934064161, iteration: 424995
loss: 1.038090705871582,grad_norm: 0.9999996572718429, iteration: 424996
loss: 0.9803403615951538,grad_norm: 0.9999992396175285, iteration: 424997
loss: 1.0108133554458618,grad_norm: 0.8459776499041983, iteration: 424998
loss: 0.9677256941795349,grad_norm: 0.7655912928633086, iteration: 424999
loss: 0.9881383776664734,grad_norm: 0.6835675271220717, iteration: 425000
loss: 0.9827711582183838,grad_norm: 0.8487960518114491, iteration: 425001
loss: 0.9841207265853882,grad_norm: 0.6808494664553942, iteration: 425002
loss: 0.9362779259681702,grad_norm: 0.7642780664504253, iteration: 425003
loss: 0.9806426763534546,grad_norm: 0.7335353340584567, iteration: 425004
loss: 1.0298219919204712,grad_norm: 0.8463031609772598, iteration: 425005
loss: 0.976709246635437,grad_norm: 0.8819900201811698, iteration: 425006
loss: 1.0028141736984253,grad_norm: 0.9999994412216978, iteration: 425007
loss: 0.9710474610328674,grad_norm: 0.6692465583015773, iteration: 425008
loss: 1.0256402492523193,grad_norm: 0.8834364652338618, iteration: 425009
loss: 0.9962425231933594,grad_norm: 0.7927908411087138, iteration: 425010
loss: 1.000467300415039,grad_norm: 0.6972000813861854, iteration: 425011
loss: 0.9997488260269165,grad_norm: 0.9999994694599853, iteration: 425012
loss: 1.0082898139953613,grad_norm: 0.9309800154397997, iteration: 425013
loss: 1.0020041465759277,grad_norm: 0.7129341895761528, iteration: 425014
loss: 1.0599251985549927,grad_norm: 0.7548782893297382, iteration: 425015
loss: 1.0051485300064087,grad_norm: 0.7414005913262989, iteration: 425016
loss: 1.0212455987930298,grad_norm: 0.7476177306735277, iteration: 425017
loss: 0.9682310819625854,grad_norm: 0.8307651903304087, iteration: 425018
loss: 0.9875392317771912,grad_norm: 0.7384489142443664, iteration: 425019
loss: 0.9969562292098999,grad_norm: 0.7722777353653979, iteration: 425020
loss: 1.0092405080795288,grad_norm: 0.6929562814651091, iteration: 425021
loss: 0.9968790411949158,grad_norm: 0.9999994828627928, iteration: 425022
loss: 1.0034797191619873,grad_norm: 0.7819633213998638, iteration: 425023
loss: 0.9586670994758606,grad_norm: 0.8894956917742562, iteration: 425024
loss: 0.9808646440505981,grad_norm: 0.8503094196714394, iteration: 425025
loss: 0.9878909587860107,grad_norm: 0.7399158768150467, iteration: 425026
loss: 0.9829892516136169,grad_norm: 0.6889320447484139, iteration: 425027
loss: 1.018356204032898,grad_norm: 0.8976125075903966, iteration: 425028
loss: 1.024564266204834,grad_norm: 0.5875911728214908, iteration: 425029
loss: 0.9933488965034485,grad_norm: 0.7892362759295094, iteration: 425030
loss: 1.0049365758895874,grad_norm: 0.7248017165964492, iteration: 425031
loss: 0.9593176245689392,grad_norm: 0.7079923233033008, iteration: 425032
loss: 1.0279372930526733,grad_norm: 0.6234788482768298, iteration: 425033
loss: 1.0188215970993042,grad_norm: 0.7557766038157273, iteration: 425034
loss: 1.0027669668197632,grad_norm: 0.8649630547107157, iteration: 425035
loss: 1.0314785242080688,grad_norm: 0.6910643272211758, iteration: 425036
loss: 1.0468034744262695,grad_norm: 0.779858844658218, iteration: 425037
loss: 0.9697161912918091,grad_norm: 0.7618736946470429, iteration: 425038
loss: 0.9535555243492126,grad_norm: 0.8230229345912953, iteration: 425039
loss: 1.0040100812911987,grad_norm: 0.9999991541263983, iteration: 425040
loss: 0.9749851822853088,grad_norm: 0.7562524426681607, iteration: 425041
loss: 1.0218191146850586,grad_norm: 0.720085650216316, iteration: 425042
loss: 1.0294634103775024,grad_norm: 0.9789237984107417, iteration: 425043
loss: 1.0084004402160645,grad_norm: 0.6982112504013899, iteration: 425044
loss: 0.9704269170761108,grad_norm: 0.6730544601908397, iteration: 425045
loss: 0.9972773790359497,grad_norm: 0.9999990898766942, iteration: 425046
loss: 1.0212697982788086,grad_norm: 0.9999991232932103, iteration: 425047
loss: 0.9979535341262817,grad_norm: 0.8709464926347102, iteration: 425048
loss: 0.9753305912017822,grad_norm: 0.7361392624160669, iteration: 425049
loss: 0.98795485496521,grad_norm: 0.6724487468978186, iteration: 425050
loss: 0.9807024002075195,grad_norm: 0.6699397558489836, iteration: 425051
loss: 1.0191140174865723,grad_norm: 0.8147428534794843, iteration: 425052
loss: 0.9838665127754211,grad_norm: 0.6082688194439596, iteration: 425053
loss: 0.9728369116783142,grad_norm: 0.7016036119591866, iteration: 425054
loss: 0.9865361452102661,grad_norm: 0.7871794019468913, iteration: 425055
loss: 1.0088907480239868,grad_norm: 0.6805831626700715, iteration: 425056
loss: 1.0055357217788696,grad_norm: 0.7557085290340824, iteration: 425057
loss: 0.9999810457229614,grad_norm: 0.7135921389412251, iteration: 425058
loss: 1.0231842994689941,grad_norm: 0.9999992888550696, iteration: 425059
loss: 1.0392886400222778,grad_norm: 0.9050931983835129, iteration: 425060
loss: 1.0396523475646973,grad_norm: 0.7520837641731304, iteration: 425061
loss: 1.0103453397750854,grad_norm: 0.8373730675789007, iteration: 425062
loss: 0.9945189952850342,grad_norm: 0.6600988002781593, iteration: 425063
loss: 0.980410099029541,grad_norm: 0.7958682374274014, iteration: 425064
loss: 1.0369082689285278,grad_norm: 0.6777429405051351, iteration: 425065
loss: 0.9759387969970703,grad_norm: 0.8998812173129855, iteration: 425066
loss: 1.0072882175445557,grad_norm: 0.7254792124879524, iteration: 425067
loss: 0.9395334720611572,grad_norm: 0.8820336052544949, iteration: 425068
loss: 1.0104455947875977,grad_norm: 0.9999990189241365, iteration: 425069
loss: 1.0264601707458496,grad_norm: 0.85039009697117, iteration: 425070
loss: 1.0203872919082642,grad_norm: 0.8331308316083141, iteration: 425071
loss: 0.9879108667373657,grad_norm: 0.7243999358325789, iteration: 425072
loss: 1.0055063962936401,grad_norm: 0.8040436850161499, iteration: 425073
loss: 1.013508677482605,grad_norm: 0.7160217558953393, iteration: 425074
loss: 1.02084219455719,grad_norm: 0.8153074151466383, iteration: 425075
loss: 0.983052670955658,grad_norm: 0.8411218973970025, iteration: 425076
loss: 0.938040018081665,grad_norm: 0.7975355273991862, iteration: 425077
loss: 0.9924115538597107,grad_norm: 0.8505581196553071, iteration: 425078
loss: 0.9966613054275513,grad_norm: 0.9999996484528012, iteration: 425079
loss: 0.9978436827659607,grad_norm: 0.9999997475306587, iteration: 425080
loss: 1.0051575899124146,grad_norm: 0.6673983147337657, iteration: 425081
loss: 1.0070750713348389,grad_norm: 0.6754733016458682, iteration: 425082
loss: 1.0484200716018677,grad_norm: 0.8036378120633945, iteration: 425083
loss: 0.9982641935348511,grad_norm: 0.7947309354171618, iteration: 425084
loss: 1.0026304721832275,grad_norm: 0.9999997581407015, iteration: 425085
loss: 1.0283734798431396,grad_norm: 0.9999991201243503, iteration: 425086
loss: 0.9723083972930908,grad_norm: 0.7751517046621033, iteration: 425087
loss: 0.967559814453125,grad_norm: 0.7525636164530545, iteration: 425088
loss: 0.977950394153595,grad_norm: 0.8677526101666181, iteration: 425089
loss: 1.0163259506225586,grad_norm: 0.7703674553794159, iteration: 425090
loss: 1.1553888320922852,grad_norm: 0.9999992429663862, iteration: 425091
loss: 1.0055712461471558,grad_norm: 1.0000000711608898, iteration: 425092
loss: 0.9865489602088928,grad_norm: 0.9525641587399042, iteration: 425093
loss: 0.9899324178695679,grad_norm: 0.65916040470234, iteration: 425094
loss: 0.9587432146072388,grad_norm: 0.6959943625979274, iteration: 425095
loss: 1.0042184591293335,grad_norm: 0.8048824394583546, iteration: 425096
loss: 1.002813458442688,grad_norm: 0.8317519849714112, iteration: 425097
loss: 0.993473470211029,grad_norm: 0.67681922930573, iteration: 425098
loss: 0.952204704284668,grad_norm: 0.7649924355803053, iteration: 425099
loss: 0.9987309575080872,grad_norm: 0.9825381260051074, iteration: 425100
loss: 1.0005924701690674,grad_norm: 0.8276248775802071, iteration: 425101
loss: 1.0907968282699585,grad_norm: 0.9999992608932733, iteration: 425102
loss: 0.979343056678772,grad_norm: 0.7569038223294149, iteration: 425103
loss: 1.0692771673202515,grad_norm: 0.9999996166788871, iteration: 425104
loss: 1.0444574356079102,grad_norm: 0.8850514800015504, iteration: 425105
loss: 1.041956901550293,grad_norm: 0.8070902617686151, iteration: 425106
loss: 1.0816419124603271,grad_norm: 0.8326303500811982, iteration: 425107
loss: 0.9948217272758484,grad_norm: 0.8034325275396611, iteration: 425108
loss: 1.0173450708389282,grad_norm: 0.8368990636963326, iteration: 425109
loss: 1.0315481424331665,grad_norm: 0.832907311339627, iteration: 425110
loss: 1.0228976011276245,grad_norm: 0.7196644246481756, iteration: 425111
loss: 0.9939034581184387,grad_norm: 0.8296686943775144, iteration: 425112
loss: 0.9679659008979797,grad_norm: 0.7615598219633026, iteration: 425113
loss: 0.9768741726875305,grad_norm: 0.7520592608360305, iteration: 425114
loss: 1.0212875604629517,grad_norm: 0.600095721112604, iteration: 425115
loss: 1.0444433689117432,grad_norm: 0.9999994553236536, iteration: 425116
loss: 0.9860242009162903,grad_norm: 0.9465425496325264, iteration: 425117
loss: 1.0283318758010864,grad_norm: 0.7853685518423771, iteration: 425118
loss: 1.0113385915756226,grad_norm: 0.9369846658279166, iteration: 425119
loss: 1.0324088335037231,grad_norm: 0.6551981388410101, iteration: 425120
loss: 1.0151493549346924,grad_norm: 0.7458852972709242, iteration: 425121
loss: 1.0272879600524902,grad_norm: 0.9033425304056686, iteration: 425122
loss: 1.010283350944519,grad_norm: 0.800402921804088, iteration: 425123
loss: 0.9987886548042297,grad_norm: 0.8836027863130741, iteration: 425124
loss: 1.1097410917282104,grad_norm: 0.9999995086735021, iteration: 425125
loss: 1.0308836698532104,grad_norm: 0.764745649629966, iteration: 425126
loss: 1.0560656785964966,grad_norm: 0.6841812109975866, iteration: 425127
loss: 1.0070958137512207,grad_norm: 0.9978473433203309, iteration: 425128
loss: 1.0873866081237793,grad_norm: 0.9306306118449957, iteration: 425129
loss: 1.0808008909225464,grad_norm: 0.999999599661973, iteration: 425130
loss: 0.9779791235923767,grad_norm: 0.999999630047296, iteration: 425131
loss: 1.0384728908538818,grad_norm: 0.999999340107637, iteration: 425132
loss: 1.0012247562408447,grad_norm: 0.9999995602987429, iteration: 425133
loss: 1.05290687084198,grad_norm: 0.9999995831569073, iteration: 425134
loss: 1.0233651399612427,grad_norm: 0.9999996649523645, iteration: 425135
loss: 1.0251713991165161,grad_norm: 0.9999998552933367, iteration: 425136
loss: 1.2564444541931152,grad_norm: 0.9999995105307365, iteration: 425137
loss: 1.0347111225128174,grad_norm: 0.9999999587609383, iteration: 425138
loss: 1.05984365940094,grad_norm: 0.9435474563926448, iteration: 425139
loss: 1.0281522274017334,grad_norm: 0.8505633147322955, iteration: 425140
loss: 0.9620741009712219,grad_norm: 0.8006906447448056, iteration: 425141
loss: 1.021567702293396,grad_norm: 0.8585447912724086, iteration: 425142
loss: 0.9894446730613708,grad_norm: 0.7465487907723496, iteration: 425143
loss: 1.0099409818649292,grad_norm: 0.7973476884055613, iteration: 425144
loss: 1.0156811475753784,grad_norm: 0.999999595633954, iteration: 425145
loss: 1.0248844623565674,grad_norm: 0.6513743310480045, iteration: 425146
loss: 1.0335959196090698,grad_norm: 0.7751339838810226, iteration: 425147
loss: 0.9854812622070312,grad_norm: 0.8403721405764387, iteration: 425148
loss: 1.0059385299682617,grad_norm: 0.6429559149590416, iteration: 425149
loss: 1.010707974433899,grad_norm: 0.8946450694507352, iteration: 425150
loss: 0.9890890121459961,grad_norm: 0.7942990035918034, iteration: 425151
loss: 0.9894146919250488,grad_norm: 0.8512163439830942, iteration: 425152
loss: 0.9967849850654602,grad_norm: 0.8196184233806753, iteration: 425153
loss: 1.261606216430664,grad_norm: 0.9999993744950374, iteration: 425154
loss: 0.9719104766845703,grad_norm: 0.8440592235544311, iteration: 425155
loss: 1.0155442953109741,grad_norm: 0.9944468520253592, iteration: 425156
loss: 1.0105564594268799,grad_norm: 0.9229473222793612, iteration: 425157
loss: 1.009390115737915,grad_norm: 0.7828677787266601, iteration: 425158
loss: 1.000618815422058,grad_norm: 0.9716181821113539, iteration: 425159
loss: 0.9914444088935852,grad_norm: 0.7973343605930653, iteration: 425160
loss: 1.027533769607544,grad_norm: 0.9131128782211484, iteration: 425161
loss: 1.0229445695877075,grad_norm: 0.9999991795497576, iteration: 425162
loss: 0.968372106552124,grad_norm: 0.9057215927973354, iteration: 425163
loss: 1.0329434871673584,grad_norm: 0.7527156045683224, iteration: 425164
loss: 0.9884496927261353,grad_norm: 0.6416174562158068, iteration: 425165
loss: 1.0070161819458008,grad_norm: 0.8932379283442833, iteration: 425166
loss: 0.9767947793006897,grad_norm: 0.9999992634222223, iteration: 425167
loss: 1.2487635612487793,grad_norm: 0.9999993707549126, iteration: 425168
loss: 1.0130583047866821,grad_norm: 0.814749077433156, iteration: 425169
loss: 1.0050599575042725,grad_norm: 0.9999999268074917, iteration: 425170
loss: 1.0264633893966675,grad_norm: 0.7606660425466318, iteration: 425171
loss: 1.0354962348937988,grad_norm: 0.731985365219333, iteration: 425172
loss: 1.005059003829956,grad_norm: 0.8317917537203018, iteration: 425173
loss: 1.0004768371582031,grad_norm: 0.7373625355302691, iteration: 425174
loss: 1.0064462423324585,grad_norm: 0.8207168479625683, iteration: 425175
loss: 1.0178383588790894,grad_norm: 0.8956284976832279, iteration: 425176
loss: 1.058279037475586,grad_norm: 0.9999992674540982, iteration: 425177
loss: 1.0097852945327759,grad_norm: 0.854158405419097, iteration: 425178
loss: 1.007260799407959,grad_norm: 0.8419396267445554, iteration: 425179
loss: 1.023405909538269,grad_norm: 0.70044155639001, iteration: 425180
loss: 1.0784952640533447,grad_norm: 0.9211249943410494, iteration: 425181
loss: 1.048317551612854,grad_norm: 0.9341486780932102, iteration: 425182
loss: 1.0425974130630493,grad_norm: 0.7507757023188352, iteration: 425183
loss: 1.0473581552505493,grad_norm: 0.7926556335511133, iteration: 425184
loss: 1.0921403169631958,grad_norm: 0.9999999441405863, iteration: 425185
loss: 1.1967928409576416,grad_norm: 0.9362816690669544, iteration: 425186
loss: 1.1310036182403564,grad_norm: 0.9659648421078497, iteration: 425187
loss: 1.031832218170166,grad_norm: 0.9999995791949772, iteration: 425188
loss: 1.0230635404586792,grad_norm: 0.7754439862137634, iteration: 425189
loss: 1.0020382404327393,grad_norm: 0.7408141293741171, iteration: 425190
loss: 0.9973592162132263,grad_norm: 0.9075987747082098, iteration: 425191
loss: 1.000578761100769,grad_norm: 0.7224455526635446, iteration: 425192
loss: 1.0270122289657593,grad_norm: 0.7032726157801773, iteration: 425193
loss: 1.0280370712280273,grad_norm: 0.6838736680082358, iteration: 425194
loss: 1.049216866493225,grad_norm: 0.9999995208563244, iteration: 425195
loss: 1.014664888381958,grad_norm: 0.6740024771493837, iteration: 425196
loss: 1.0015907287597656,grad_norm: 0.9625144488123564, iteration: 425197
loss: 0.9654082655906677,grad_norm: 0.7213838428417727, iteration: 425198
loss: 1.0658458471298218,grad_norm: 0.9999991895085597, iteration: 425199
loss: 1.0011078119277954,grad_norm: 0.7375273666318065, iteration: 425200
loss: 1.01750648021698,grad_norm: 0.6722710977389789, iteration: 425201
loss: 1.0407549142837524,grad_norm: 0.7168544900994085, iteration: 425202
loss: 0.966220498085022,grad_norm: 0.7029419317414679, iteration: 425203
loss: 1.099162220954895,grad_norm: 0.898651869466638, iteration: 425204
loss: 1.0697633028030396,grad_norm: 0.9740031800761759, iteration: 425205
loss: 1.0247427225112915,grad_norm: 0.9999991280190703, iteration: 425206
loss: 0.9656112790107727,grad_norm: 0.7734135787306109, iteration: 425207
loss: 1.0353164672851562,grad_norm: 0.802577586359025, iteration: 425208
loss: 1.1501400470733643,grad_norm: 0.8441573721912101, iteration: 425209
loss: 0.9713449478149414,grad_norm: 0.8306963020366154, iteration: 425210
loss: 1.0390859842300415,grad_norm: 0.7524552835733339, iteration: 425211
loss: 1.0164835453033447,grad_norm: 0.7904215072134035, iteration: 425212
loss: 0.9563435316085815,grad_norm: 0.7694449442141523, iteration: 425213
loss: 1.0000754594802856,grad_norm: 0.7755006366228561, iteration: 425214
loss: 0.9938817024230957,grad_norm: 0.7876042095090996, iteration: 425215
loss: 0.995941162109375,grad_norm: 0.7316562481747434, iteration: 425216
loss: 1.0565861463546753,grad_norm: 0.6084256548604307, iteration: 425217
loss: 0.971760630607605,grad_norm: 0.7779243664688764, iteration: 425218
loss: 0.997551679611206,grad_norm: 0.8261764827558843, iteration: 425219
loss: 1.0148667097091675,grad_norm: 0.9999990133121986, iteration: 425220
loss: 1.0042507648468018,grad_norm: 0.6765178500865829, iteration: 425221
loss: 0.9860756993293762,grad_norm: 0.720427598326496, iteration: 425222
loss: 1.093392014503479,grad_norm: 0.8046683445529744, iteration: 425223
loss: 0.965179443359375,grad_norm: 0.9269481096905052, iteration: 425224
loss: 0.9654266238212585,grad_norm: 0.6773178715240502, iteration: 425225
loss: 0.9642066359519958,grad_norm: 0.8066387305680384, iteration: 425226
loss: 0.9963318705558777,grad_norm: 0.6450644406842886, iteration: 425227
loss: 1.035946249961853,grad_norm: 0.6448396811039829, iteration: 425228
loss: 0.9664310216903687,grad_norm: 0.7161897594461168, iteration: 425229
loss: 1.0181915760040283,grad_norm: 0.7769624117540019, iteration: 425230
loss: 0.9703059792518616,grad_norm: 0.8155381476195321, iteration: 425231
loss: 1.0296046733856201,grad_norm: 0.9999998668637793, iteration: 425232
loss: 1.0591543912887573,grad_norm: 0.7612637745357378, iteration: 425233
loss: 0.9841656684875488,grad_norm: 0.7942811480419796, iteration: 425234
loss: 1.086114525794983,grad_norm: 0.9999996471970386, iteration: 425235
loss: 1.02866792678833,grad_norm: 0.7749246806750578, iteration: 425236
loss: 1.0306835174560547,grad_norm: 0.7414745812450052, iteration: 425237
loss: 0.9865154027938843,grad_norm: 0.8484974132656848, iteration: 425238
loss: 0.9792706966400146,grad_norm: 0.7093923359741818, iteration: 425239
loss: 0.9773238897323608,grad_norm: 0.624540765041933, iteration: 425240
loss: 1.0017329454421997,grad_norm: 0.9723436528747491, iteration: 425241
loss: 1.1009058952331543,grad_norm: 0.967188232770773, iteration: 425242
loss: 1.018782615661621,grad_norm: 0.8817716111396022, iteration: 425243
loss: 1.042662501335144,grad_norm: 0.8643482872900613, iteration: 425244
loss: 1.0382250547409058,grad_norm: 0.7596920223235796, iteration: 425245
loss: 0.9864182472229004,grad_norm: 0.6723994550606873, iteration: 425246
loss: 1.0149641036987305,grad_norm: 0.7551375741148916, iteration: 425247
loss: 1.1243964433670044,grad_norm: 0.9999995337153971, iteration: 425248
loss: 1.0183591842651367,grad_norm: 0.7916909213156542, iteration: 425249
loss: 1.0469386577606201,grad_norm: 0.7038522075889733, iteration: 425250
loss: 0.9944996237754822,grad_norm: 0.8384804262962331, iteration: 425251
loss: 0.986173689365387,grad_norm: 0.8842829492781287, iteration: 425252
loss: 1.0272756814956665,grad_norm: 0.7373687310539058, iteration: 425253
loss: 0.975183367729187,grad_norm: 0.8229414878658196, iteration: 425254
loss: 1.0096194744110107,grad_norm: 0.7598291459815869, iteration: 425255
loss: 1.018642544746399,grad_norm: 0.8056775736309424, iteration: 425256
loss: 1.0283727645874023,grad_norm: 0.7325260033307645, iteration: 425257
loss: 0.9998500347137451,grad_norm: 0.8092269435141567, iteration: 425258
loss: 1.0025039911270142,grad_norm: 0.9416764367564344, iteration: 425259
loss: 1.003179907798767,grad_norm: 0.7733418121437778, iteration: 425260
loss: 0.99225252866745,grad_norm: 0.7043920296131176, iteration: 425261
loss: 0.9959200620651245,grad_norm: 0.8917692031165373, iteration: 425262
loss: 1.0002332925796509,grad_norm: 0.7460920348291722, iteration: 425263
loss: 1.0224961042404175,grad_norm: 0.7618241655890693, iteration: 425264
loss: 1.0185770988464355,grad_norm: 0.8493724099281232, iteration: 425265
loss: 1.0971674919128418,grad_norm: 0.9999992229959939, iteration: 425266
loss: 1.0833224058151245,grad_norm: 0.9999994306490878, iteration: 425267
loss: 0.9668251276016235,grad_norm: 0.6889733500152074, iteration: 425268
loss: 1.0203911066055298,grad_norm: 0.6354429918302059, iteration: 425269
loss: 0.9711073637008667,grad_norm: 0.8402009292610624, iteration: 425270
loss: 0.9917137026786804,grad_norm: 0.6875958550118527, iteration: 425271
loss: 0.9889528751373291,grad_norm: 0.7599416375017547, iteration: 425272
loss: 0.9726353883743286,grad_norm: 0.9208071542761384, iteration: 425273
loss: 1.1198841333389282,grad_norm: 0.9999999485750145, iteration: 425274
loss: 0.9871368408203125,grad_norm: 0.8311735932820017, iteration: 425275
loss: 0.9913524389266968,grad_norm: 0.7642904153917696, iteration: 425276
loss: 1.0524743795394897,grad_norm: 0.7311813404230406, iteration: 425277
loss: 0.9728580117225647,grad_norm: 0.9999999608944737, iteration: 425278
loss: 0.9970181584358215,grad_norm: 0.7149335914576809, iteration: 425279
loss: 1.0217020511627197,grad_norm: 0.8461280516257559, iteration: 425280
loss: 0.9535127878189087,grad_norm: 0.8142241255639542, iteration: 425281
loss: 0.9887214303016663,grad_norm: 0.8166683014229679, iteration: 425282
loss: 1.0493121147155762,grad_norm: 0.7645674104290793, iteration: 425283
loss: 1.0408382415771484,grad_norm: 0.9999991467329482, iteration: 425284
loss: 1.0007779598236084,grad_norm: 0.8267730269142801, iteration: 425285
loss: 1.0186874866485596,grad_norm: 0.775780258055163, iteration: 425286
loss: 0.9811277389526367,grad_norm: 0.7016454079702714, iteration: 425287
loss: 1.0058079957962036,grad_norm: 0.8381540039423101, iteration: 425288
loss: 1.0123289823532104,grad_norm: 0.7290761335624593, iteration: 425289
loss: 1.0456122159957886,grad_norm: 0.797630175403109, iteration: 425290
loss: 1.0171197652816772,grad_norm: 0.8895162853704285, iteration: 425291
loss: 1.0198508501052856,grad_norm: 0.6699810509740146, iteration: 425292
loss: 0.9938501715660095,grad_norm: 0.7921912861542723, iteration: 425293
loss: 1.0109964609146118,grad_norm: 0.9999998056007835, iteration: 425294
loss: 0.9742845892906189,grad_norm: 0.7958850493679307, iteration: 425295
loss: 1.0368791818618774,grad_norm: 0.8578402962241636, iteration: 425296
loss: 1.0212864875793457,grad_norm: 0.7244287743672271, iteration: 425297
loss: 1.0054454803466797,grad_norm: 0.9219541693132427, iteration: 425298
loss: 1.1008410453796387,grad_norm: 0.9999992497128785, iteration: 425299
loss: 0.9700114727020264,grad_norm: 0.791964229711067, iteration: 425300
loss: 1.0341814756393433,grad_norm: 0.8365009307218674, iteration: 425301
loss: 1.0094590187072754,grad_norm: 0.9999996350073163, iteration: 425302
loss: 1.0084772109985352,grad_norm: 0.7507720924752573, iteration: 425303
loss: 0.9759637713432312,grad_norm: 0.7894035558366737, iteration: 425304
loss: 0.991033673286438,grad_norm: 0.8299695605652024, iteration: 425305
loss: 1.073817491531372,grad_norm: 0.8669509987773645, iteration: 425306
loss: 0.9823130369186401,grad_norm: 0.7598683516853669, iteration: 425307
loss: 0.984372079372406,grad_norm: 0.8076579008181471, iteration: 425308
loss: 0.9789872765541077,grad_norm: 0.7629192370799862, iteration: 425309
loss: 1.0942600965499878,grad_norm: 0.9999991287049395, iteration: 425310
loss: 0.9643981456756592,grad_norm: 0.7251928233526723, iteration: 425311
loss: 1.0344785451889038,grad_norm: 0.774463600767766, iteration: 425312
loss: 1.0326189994812012,grad_norm: 0.7123072181739266, iteration: 425313
loss: 1.139513373374939,grad_norm: 0.9999999180053115, iteration: 425314
loss: 1.018930435180664,grad_norm: 0.7134298985653429, iteration: 425315
loss: 1.035285472869873,grad_norm: 0.9999997092752999, iteration: 425316
loss: 0.9982007741928101,grad_norm: 0.7151281260006512, iteration: 425317
loss: 1.0207289457321167,grad_norm: 0.8009259244647939, iteration: 425318
loss: 1.005674958229065,grad_norm: 0.8191765115123916, iteration: 425319
loss: 0.9916303753852844,grad_norm: 0.7561561015825222, iteration: 425320
loss: 0.9860826134681702,grad_norm: 0.7784815157963653, iteration: 425321
loss: 1.0637593269348145,grad_norm: 0.9999996848968632, iteration: 425322
loss: 1.0024068355560303,grad_norm: 0.7689055027481625, iteration: 425323
loss: 1.007375717163086,grad_norm: 0.8318555046674452, iteration: 425324
loss: 0.9789240956306458,grad_norm: 0.7855418291367893, iteration: 425325
loss: 1.0191065073013306,grad_norm: 0.8194558968116443, iteration: 425326
loss: 1.0029085874557495,grad_norm: 0.7127466232584685, iteration: 425327
loss: 1.0072822570800781,grad_norm: 0.6913603656986028, iteration: 425328
loss: 1.0055569410324097,grad_norm: 0.7600027622793563, iteration: 425329
loss: 0.986373782157898,grad_norm: 0.620147277903925, iteration: 425330
loss: 1.0103739500045776,grad_norm: 0.771492485699369, iteration: 425331
loss: 0.9789566993713379,grad_norm: 0.7543099009871709, iteration: 425332
loss: 1.0298539400100708,grad_norm: 0.9999991525166795, iteration: 425333
loss: 1.168630838394165,grad_norm: 0.9999991198219755, iteration: 425334
loss: 0.9919502139091492,grad_norm: 0.7405737021243701, iteration: 425335
loss: 1.0181154012680054,grad_norm: 0.6153606713306229, iteration: 425336
loss: 1.0451960563659668,grad_norm: 0.9999996101387569, iteration: 425337
loss: 1.0080347061157227,grad_norm: 0.6868413324193576, iteration: 425338
loss: 1.0319280624389648,grad_norm: 0.8495320912940593, iteration: 425339
loss: 1.0005590915679932,grad_norm: 0.8542254190584594, iteration: 425340
loss: 0.9936220049858093,grad_norm: 0.8430556101318417, iteration: 425341
loss: 0.9994109869003296,grad_norm: 0.7850846753336269, iteration: 425342
loss: 0.9770150184631348,grad_norm: 0.815621935467825, iteration: 425343
loss: 1.0517115592956543,grad_norm: 0.8441436795326384, iteration: 425344
loss: 1.028430700302124,grad_norm: 0.8677109796614773, iteration: 425345
loss: 0.999092161655426,grad_norm: 0.8092338868724942, iteration: 425346
loss: 1.0023887157440186,grad_norm: 0.9999997901720724, iteration: 425347
loss: 1.0403169393539429,grad_norm: 0.9999991918665612, iteration: 425348
loss: 1.0393565893173218,grad_norm: 0.8034518715778346, iteration: 425349
loss: 0.9739988446235657,grad_norm: 0.7682809732664198, iteration: 425350
loss: 0.9763585925102234,grad_norm: 0.8512374706885011, iteration: 425351
loss: 0.9960429668426514,grad_norm: 0.7351689782889177, iteration: 425352
loss: 0.9890845417976379,grad_norm: 0.7614377176311207, iteration: 425353
loss: 1.031553030014038,grad_norm: 0.7703657681115897, iteration: 425354
loss: 1.0025086402893066,grad_norm: 0.8468171709084007, iteration: 425355
loss: 0.9804466366767883,grad_norm: 0.6867946810356945, iteration: 425356
loss: 1.017572283744812,grad_norm: 0.8041915487736302, iteration: 425357
loss: 1.0008257627487183,grad_norm: 0.7201170623644694, iteration: 425358
loss: 1.0011255741119385,grad_norm: 0.9747183174959908, iteration: 425359
loss: 0.9928945302963257,grad_norm: 0.7113644934057878, iteration: 425360
loss: 0.9730974435806274,grad_norm: 0.7275886385316561, iteration: 425361
loss: 0.9812880158424377,grad_norm: 0.7443246349482814, iteration: 425362
loss: 1.0221821069717407,grad_norm: 0.999999871075354, iteration: 425363
loss: 1.0203752517700195,grad_norm: 0.9999997641241383, iteration: 425364
loss: 1.0182452201843262,grad_norm: 0.9999994750801545, iteration: 425365
loss: 0.9855272769927979,grad_norm: 0.8616270539909427, iteration: 425366
loss: 1.0003243684768677,grad_norm: 0.6476250956621842, iteration: 425367
loss: 0.9829508066177368,grad_norm: 0.8430302133590369, iteration: 425368
loss: 1.0092345476150513,grad_norm: 0.7274012751264115, iteration: 425369
loss: 0.9995046854019165,grad_norm: 0.7469341946830163, iteration: 425370
loss: 0.9495827555656433,grad_norm: 0.6560092730565641, iteration: 425371
loss: 1.0010634660720825,grad_norm: 0.6882515899453996, iteration: 425372
loss: 1.016452431678772,grad_norm: 0.7772448528252376, iteration: 425373
loss: 1.107573390007019,grad_norm: 0.9999999004645972, iteration: 425374
loss: 1.009409785270691,grad_norm: 0.7135121554730427, iteration: 425375
loss: 1.0025709867477417,grad_norm: 0.5883262461917874, iteration: 425376
loss: 1.013628363609314,grad_norm: 0.7377921103836094, iteration: 425377
loss: 1.0452330112457275,grad_norm: 0.9609013808437742, iteration: 425378
loss: 1.0180565118789673,grad_norm: 0.887905265228657, iteration: 425379
loss: 0.9948779940605164,grad_norm: 0.8324820227068813, iteration: 425380
loss: 1.0573934316635132,grad_norm: 0.7953298465784804, iteration: 425381
loss: 0.9627371430397034,grad_norm: 0.6223619182535781, iteration: 425382
loss: 1.004879117012024,grad_norm: 0.7881287537674714, iteration: 425383
loss: 0.9975008964538574,grad_norm: 0.7424187768445804, iteration: 425384
loss: 0.9665406942367554,grad_norm: 0.8610216138974862, iteration: 425385
loss: 1.009428858757019,grad_norm: 0.8108077751988193, iteration: 425386
loss: 0.9878785014152527,grad_norm: 0.7232955084545053, iteration: 425387
loss: 0.9640499353408813,grad_norm: 0.7018120550950991, iteration: 425388
loss: 0.9916818141937256,grad_norm: 0.7622858784324559, iteration: 425389
loss: 0.9577641487121582,grad_norm: 0.7272090977096531, iteration: 425390
loss: 1.0170153379440308,grad_norm: 0.7715363725511374, iteration: 425391
loss: 0.9816727042198181,grad_norm: 0.6237795924872738, iteration: 425392
loss: 0.9914852380752563,grad_norm: 0.7166651457269524, iteration: 425393
loss: 1.0343399047851562,grad_norm: 0.8394191781197072, iteration: 425394
loss: 1.0196537971496582,grad_norm: 0.7478413059281999, iteration: 425395
loss: 1.08657705783844,grad_norm: 0.8914845662288284, iteration: 425396
loss: 1.0026308298110962,grad_norm: 0.7403954555017875, iteration: 425397
loss: 1.1154192686080933,grad_norm: 0.9999996130033671, iteration: 425398
loss: 1.0062118768692017,grad_norm: 0.68841499974971, iteration: 425399
loss: 1.0538063049316406,grad_norm: 0.9999997202281565, iteration: 425400
loss: 0.9678041934967041,grad_norm: 0.7529271776867684, iteration: 425401
loss: 0.9833011627197266,grad_norm: 0.7289978040469814, iteration: 425402
loss: 1.0442113876342773,grad_norm: 0.8387387310813337, iteration: 425403
loss: 1.0054765939712524,grad_norm: 0.7688082417613722, iteration: 425404
loss: 1.0340735912322998,grad_norm: 0.838753806978497, iteration: 425405
loss: 0.9983041286468506,grad_norm: 0.6747971358388586, iteration: 425406
loss: 0.9775910973548889,grad_norm: 0.8321399464838921, iteration: 425407
loss: 1.0281153917312622,grad_norm: 0.8410239903357716, iteration: 425408
loss: 1.0051132440567017,grad_norm: 0.8269723455124895, iteration: 425409
loss: 0.981377363204956,grad_norm: 0.6770323327049594, iteration: 425410
loss: 1.0262705087661743,grad_norm: 0.7467703363066105, iteration: 425411
loss: 1.0688596963882446,grad_norm: 0.7589673877026291, iteration: 425412
loss: 0.9724568724632263,grad_norm: 0.7812931942483201, iteration: 425413
loss: 1.077332854270935,grad_norm: 1.0000000566932195, iteration: 425414
loss: 1.0120019912719727,grad_norm: 0.6826602575263515, iteration: 425415
loss: 1.0121270418167114,grad_norm: 0.7559349807348995, iteration: 425416
loss: 1.0053462982177734,grad_norm: 0.704583207242035, iteration: 425417
loss: 0.9810149073600769,grad_norm: 0.671412388507733, iteration: 425418
loss: 1.0074745416641235,grad_norm: 0.792428725651328, iteration: 425419
loss: 1.0203640460968018,grad_norm: 0.9401404468694651, iteration: 425420
loss: 1.0051147937774658,grad_norm: 0.6814241298601844, iteration: 425421
loss: 1.2419220209121704,grad_norm: 0.9999999872777359, iteration: 425422
loss: 0.9785703420639038,grad_norm: 0.9622731613071686, iteration: 425423
loss: 0.9918233156204224,grad_norm: 0.6931474233514738, iteration: 425424
loss: 1.040002703666687,grad_norm: 0.8763819397788197, iteration: 425425
loss: 1.008445143699646,grad_norm: 0.7602217565534161, iteration: 425426
loss: 1.2228989601135254,grad_norm: 0.9999996739806983, iteration: 425427
loss: 0.9871944785118103,grad_norm: 0.8233653227862495, iteration: 425428
loss: 0.9928539395332336,grad_norm: 0.6455426905855242, iteration: 425429
loss: 1.0025153160095215,grad_norm: 0.6783062292773082, iteration: 425430
loss: 1.1977378129959106,grad_norm: 0.9999998675657438, iteration: 425431
loss: 1.1670925617218018,grad_norm: 0.9999999487402811, iteration: 425432
loss: 0.9718764424324036,grad_norm: 0.6719354048690395, iteration: 425433
loss: 0.9973336458206177,grad_norm: 0.8054024153203072, iteration: 425434
loss: 0.975853681564331,grad_norm: 0.7507012257798967, iteration: 425435
loss: 1.0206429958343506,grad_norm: 0.8028117436118399, iteration: 425436
loss: 1.0096771717071533,grad_norm: 0.7731919115990047, iteration: 425437
loss: 1.0248522758483887,grad_norm: 0.7909148119145706, iteration: 425438
loss: 1.1181808710098267,grad_norm: 0.9999991489351807, iteration: 425439
loss: 0.9959038496017456,grad_norm: 0.7324317357461855, iteration: 425440
loss: 1.0296565294265747,grad_norm: 0.9430332653849167, iteration: 425441
loss: 0.989883542060852,grad_norm: 0.999999259698423, iteration: 425442
loss: 1.037329912185669,grad_norm: 0.999999219232971, iteration: 425443
loss: 0.9868897199630737,grad_norm: 0.716071513005257, iteration: 425444
loss: 1.019791841506958,grad_norm: 0.9186988920409908, iteration: 425445
loss: 0.9898563027381897,grad_norm: 0.9999993343362004, iteration: 425446
loss: 0.9772997498512268,grad_norm: 0.821918968939795, iteration: 425447
loss: 0.9677315354347229,grad_norm: 0.83038167159628, iteration: 425448
loss: 0.9702626466751099,grad_norm: 0.8190362844698849, iteration: 425449
loss: 1.0165185928344727,grad_norm: 0.9999996352973697, iteration: 425450
loss: 1.0082356929779053,grad_norm: 0.607799058114034, iteration: 425451
loss: 0.9730915427207947,grad_norm: 0.7747937277063396, iteration: 425452
loss: 0.9782082438468933,grad_norm: 0.6490751989892574, iteration: 425453
loss: 0.9760932326316833,grad_norm: 0.8529787692015743, iteration: 425454
loss: 1.0125502347946167,grad_norm: 0.8275823374914686, iteration: 425455
loss: 1.0215526819229126,grad_norm: 0.9999990529682184, iteration: 425456
loss: 1.045432448387146,grad_norm: 0.6995882995554626, iteration: 425457
loss: 0.9969377517700195,grad_norm: 0.8596856005975564, iteration: 425458
loss: 0.9681209921836853,grad_norm: 0.9999992090989052, iteration: 425459
loss: 1.0418270826339722,grad_norm: 0.8470131022705585, iteration: 425460
loss: 1.0183508396148682,grad_norm: 0.7823460948651243, iteration: 425461
loss: 0.9767895340919495,grad_norm: 0.815926348236885, iteration: 425462
loss: 0.9793300032615662,grad_norm: 0.7518073375600888, iteration: 425463
loss: 1.1544177532196045,grad_norm: 0.999999732745622, iteration: 425464
loss: 1.0292065143585205,grad_norm: 0.9710485609343259, iteration: 425465
loss: 0.9781716465950012,grad_norm: 0.6106064063995348, iteration: 425466
loss: 1.001413106918335,grad_norm: 0.8133065044785304, iteration: 425467
loss: 1.1078733205795288,grad_norm: 0.9999998298574639, iteration: 425468
loss: 0.97366863489151,grad_norm: 0.7520504500163984, iteration: 425469
loss: 1.008251428604126,grad_norm: 0.6701584710362875, iteration: 425470
loss: 0.9629946351051331,grad_norm: 0.6538509071998576, iteration: 425471
loss: 0.9910446405410767,grad_norm: 0.8088263604932033, iteration: 425472
loss: 0.9936320781707764,grad_norm: 0.8635890748959385, iteration: 425473
loss: 0.9988119006156921,grad_norm: 0.8056517852979926, iteration: 425474
loss: 1.0101404190063477,grad_norm: 0.9999992072302276, iteration: 425475
loss: 0.9943770170211792,grad_norm: 0.7181999384166297, iteration: 425476
loss: 1.0159348249435425,grad_norm: 0.8015028211040776, iteration: 425477
loss: 1.0102323293685913,grad_norm: 0.7284667677713623, iteration: 425478
loss: 0.9958412051200867,grad_norm: 0.8370658820540346, iteration: 425479
loss: 0.9750545620918274,grad_norm: 0.7783233352169459, iteration: 425480
loss: 1.0057259798049927,grad_norm: 0.786758264443859, iteration: 425481
loss: 0.9660380482673645,grad_norm: 0.8165714952616766, iteration: 425482
loss: 0.947896420955658,grad_norm: 0.8462112027770125, iteration: 425483
loss: 0.9792668223381042,grad_norm: 0.822877566955297, iteration: 425484
loss: 0.9939259886741638,grad_norm: 0.8576022035954702, iteration: 425485
loss: 0.9936105608940125,grad_norm: 0.6806326846200962, iteration: 425486
loss: 0.9647157192230225,grad_norm: 0.6915808271265322, iteration: 425487
loss: 0.9454838633537292,grad_norm: 0.7275230608589783, iteration: 425488
loss: 0.9804052114486694,grad_norm: 0.7678515755832881, iteration: 425489
loss: 0.9899449944496155,grad_norm: 0.8107203857174624, iteration: 425490
loss: 1.1978226900100708,grad_norm: 0.9999999736575278, iteration: 425491
loss: 1.01613450050354,grad_norm: 0.9881207082314941, iteration: 425492
loss: 1.0151853561401367,grad_norm: 0.6689527520837028, iteration: 425493
loss: 1.0251028537750244,grad_norm: 0.8250794799644253, iteration: 425494
loss: 1.0028305053710938,grad_norm: 0.8336610981213158, iteration: 425495
loss: 0.9622200131416321,grad_norm: 0.8881362126706034, iteration: 425496
loss: 1.1047682762145996,grad_norm: 0.8830081987484315, iteration: 425497
loss: 1.030622959136963,grad_norm: 0.8421594923492919, iteration: 425498
loss: 0.9767778515815735,grad_norm: 0.7281770143146964, iteration: 425499
loss: 1.0176078081130981,grad_norm: 0.9999991708391481, iteration: 425500
loss: 0.9991305470466614,grad_norm: 0.7456914470175187, iteration: 425501
loss: 1.0197468996047974,grad_norm: 0.8766969041973491, iteration: 425502
loss: 1.0168360471725464,grad_norm: 0.9999998427829061, iteration: 425503
loss: 1.0955426692962646,grad_norm: 0.9999991098176761, iteration: 425504
loss: 1.0065289735794067,grad_norm: 0.7282085720063473, iteration: 425505
loss: 0.9820789694786072,grad_norm: 0.8576600427724963, iteration: 425506
loss: 0.9999387860298157,grad_norm: 0.6440456868399056, iteration: 425507
loss: 1.022808313369751,grad_norm: 0.7010672364555081, iteration: 425508
loss: 1.056182861328125,grad_norm: 0.8992533751707477, iteration: 425509
loss: 1.0374130010604858,grad_norm: 0.8789591381798355, iteration: 425510
loss: 1.0350066423416138,grad_norm: 0.9286216573346736, iteration: 425511
loss: 1.0133758783340454,grad_norm: 0.8453370618919778, iteration: 425512
loss: 1.0005089044570923,grad_norm: 0.7991272455318061, iteration: 425513
loss: 0.9963263869285583,grad_norm: 0.8566409647235493, iteration: 425514
loss: 0.9730947017669678,grad_norm: 0.7161305306680869, iteration: 425515
loss: 0.9940974116325378,grad_norm: 0.9999992051646172, iteration: 425516
loss: 0.9753202795982361,grad_norm: 0.677647876050269, iteration: 425517
loss: 0.9803245663642883,grad_norm: 0.7985959099453351, iteration: 425518
loss: 0.9864402413368225,grad_norm: 0.786106708079174, iteration: 425519
loss: 1.0379801988601685,grad_norm: 0.7420869517524575, iteration: 425520
loss: 0.9742972254753113,grad_norm: 0.7683331679112968, iteration: 425521
loss: 0.9972309470176697,grad_norm: 0.8384307448663735, iteration: 425522
loss: 0.9879404902458191,grad_norm: 0.7661986699526879, iteration: 425523
loss: 1.0445835590362549,grad_norm: 0.8611264899283433, iteration: 425524
loss: 0.9866946339607239,grad_norm: 0.8562572915138681, iteration: 425525
loss: 1.0089904069900513,grad_norm: 0.7610123348098337, iteration: 425526
loss: 0.9505491852760315,grad_norm: 0.8210565503335293, iteration: 425527
loss: 0.9985623955726624,grad_norm: 0.7050524148610035, iteration: 425528
loss: 0.9819537997245789,grad_norm: 0.6202273608644228, iteration: 425529
loss: 1.034144401550293,grad_norm: 0.6778149894495241, iteration: 425530
loss: 1.0068702697753906,grad_norm: 0.7778673281213045, iteration: 425531
loss: 1.017033338546753,grad_norm: 0.751876889983918, iteration: 425532
loss: 1.026153564453125,grad_norm: 0.8403442336970253, iteration: 425533
loss: 0.9982860684394836,grad_norm: 0.9529779919938911, iteration: 425534
loss: 0.9636861085891724,grad_norm: 0.7484434479715554, iteration: 425535
loss: 1.028511881828308,grad_norm: 0.9978349705320406, iteration: 425536
loss: 1.00929594039917,grad_norm: 0.940594413008025, iteration: 425537
loss: 0.9780145883560181,grad_norm: 0.969405500243483, iteration: 425538
loss: 0.9914291501045227,grad_norm: 0.7739092818566853, iteration: 425539
loss: 0.9871702790260315,grad_norm: 0.7452878699954716, iteration: 425540
loss: 1.010934829711914,grad_norm: 0.9127774756798932, iteration: 425541
loss: 0.9842495918273926,grad_norm: 0.8791530754423884, iteration: 425542
loss: 0.967863142490387,grad_norm: 0.8950444071632239, iteration: 425543
loss: 0.9712991118431091,grad_norm: 0.7918193046156428, iteration: 425544
loss: 0.9814616441726685,grad_norm: 0.9999997381628679, iteration: 425545
loss: 0.9858906269073486,grad_norm: 0.7160043722093621, iteration: 425546
loss: 0.9979324340820312,grad_norm: 0.6974290208347098, iteration: 425547
loss: 0.9946083426475525,grad_norm: 0.771465764390515, iteration: 425548
loss: 1.0338261127471924,grad_norm: 0.6963405026915827, iteration: 425549
loss: 1.0434943437576294,grad_norm: 0.9999994741785573, iteration: 425550
loss: 1.017686128616333,grad_norm: 0.8774730820483615, iteration: 425551
loss: 0.9902486205101013,grad_norm: 0.833286394857927, iteration: 425552
loss: 0.9906835556030273,grad_norm: 0.6587106220668874, iteration: 425553
loss: 1.0140671730041504,grad_norm: 0.8023037575955059, iteration: 425554
loss: 0.9831677079200745,grad_norm: 0.7956995212403174, iteration: 425555
loss: 0.9938609004020691,grad_norm: 0.6247734182144988, iteration: 425556
loss: 0.9859465956687927,grad_norm: 0.795455543464587, iteration: 425557
loss: 1.0102996826171875,grad_norm: 0.6348382474836186, iteration: 425558
loss: 1.0139364004135132,grad_norm: 0.8281149961217481, iteration: 425559
loss: 1.0685441493988037,grad_norm: 0.999999334458864, iteration: 425560
loss: 1.0017468929290771,grad_norm: 0.9285051960162142, iteration: 425561
loss: 1.0823943614959717,grad_norm: 0.9999995570236464, iteration: 425562
loss: 0.996012806892395,grad_norm: 0.7431505342214706, iteration: 425563
loss: 1.0306673049926758,grad_norm: 0.7698089248358173, iteration: 425564
loss: 1.014430046081543,grad_norm: 0.9999995690803153, iteration: 425565
loss: 1.0199350118637085,grad_norm: 0.790888759521753, iteration: 425566
loss: 0.9711286425590515,grad_norm: 0.999999034795243, iteration: 425567
loss: 1.029798150062561,grad_norm: 0.7179014703880549, iteration: 425568
loss: 1.0373142957687378,grad_norm: 0.8410240334194349, iteration: 425569
loss: 1.0983991622924805,grad_norm: 0.9999993668197541, iteration: 425570
loss: 0.9891578555107117,grad_norm: 0.8888464516041086, iteration: 425571
loss: 1.0021578073501587,grad_norm: 0.78819733417752, iteration: 425572
loss: 1.0005996227264404,grad_norm: 0.9999996518890233, iteration: 425573
loss: 1.1018835306167603,grad_norm: 0.7622968187839002, iteration: 425574
loss: 1.0464963912963867,grad_norm: 0.9999991604713184, iteration: 425575
loss: 0.9879509806632996,grad_norm: 0.752778132645572, iteration: 425576
loss: 0.9909622669219971,grad_norm: 0.9999990500902034, iteration: 425577
loss: 1.0147424936294556,grad_norm: 0.9887883675707257, iteration: 425578
loss: 1.004718542098999,grad_norm: 0.8140073264897328, iteration: 425579
loss: 1.048651933670044,grad_norm: 0.9999997486869255, iteration: 425580
loss: 1.0550917387008667,grad_norm: 0.7899729826451375, iteration: 425581
loss: 0.9864650964736938,grad_norm: 0.8390697840734702, iteration: 425582
loss: 1.0074471235275269,grad_norm: 0.7462083044417602, iteration: 425583
loss: 1.015171766281128,grad_norm: 0.7612021901256142, iteration: 425584
loss: 1.0150585174560547,grad_norm: 0.655833680992558, iteration: 425585
loss: 1.006291389465332,grad_norm: 0.8843141431440972, iteration: 425586
loss: 0.9684303402900696,grad_norm: 0.9469925691022792, iteration: 425587
loss: 0.9583979845046997,grad_norm: 0.7974909680301938, iteration: 425588
loss: 1.0755677223205566,grad_norm: 0.8972486792874418, iteration: 425589
loss: 1.003654956817627,grad_norm: 0.9523676123266945, iteration: 425590
loss: 1.0611927509307861,grad_norm: 0.8756156895595701, iteration: 425591
loss: 0.9933059215545654,grad_norm: 0.8702389124615413, iteration: 425592
loss: 0.987462043762207,grad_norm: 0.7503708896809594, iteration: 425593
loss: 1.0395092964172363,grad_norm: 0.9137639348447425, iteration: 425594
loss: 1.0100001096725464,grad_norm: 0.9881729970196744, iteration: 425595
loss: 1.0916889905929565,grad_norm: 0.8627252791586538, iteration: 425596
loss: 1.0033912658691406,grad_norm: 0.8822739972614522, iteration: 425597
loss: 1.0890856981277466,grad_norm: 0.8166641684290511, iteration: 425598
loss: 1.017400860786438,grad_norm: 0.6974741034652022, iteration: 425599
loss: 1.041758418083191,grad_norm: 0.9694926292088594, iteration: 425600
loss: 0.9806073904037476,grad_norm: 0.9999995375298766, iteration: 425601
loss: 1.0195579528808594,grad_norm: 0.8150076320339739, iteration: 425602
loss: 1.121375322341919,grad_norm: 0.7134560400127999, iteration: 425603
loss: 1.028814673423767,grad_norm: 0.718698750305645, iteration: 425604
loss: 1.0859954357147217,grad_norm: 0.999999376790471, iteration: 425605
loss: 1.0002858638763428,grad_norm: 0.7781358104638151, iteration: 425606
loss: 1.0184810161590576,grad_norm: 0.8350060871799545, iteration: 425607
loss: 0.9989105463027954,grad_norm: 0.9577180934288112, iteration: 425608
loss: 0.9974921345710754,grad_norm: 0.8403552961844905, iteration: 425609
loss: 0.98698890209198,grad_norm: 0.7832856496263142, iteration: 425610
loss: 0.9837655425071716,grad_norm: 0.999999095565259, iteration: 425611
loss: 0.994165301322937,grad_norm: 0.7712835144227275, iteration: 425612
loss: 0.9729112982749939,grad_norm: 0.7434526383518192, iteration: 425613
loss: 1.0057753324508667,grad_norm: 0.9999990015791891, iteration: 425614
loss: 0.9894334077835083,grad_norm: 0.7207396790233225, iteration: 425615
loss: 1.0013383626937866,grad_norm: 0.7806724210320297, iteration: 425616
loss: 0.96482914686203,grad_norm: 0.833980419775264, iteration: 425617
loss: 1.0351059436798096,grad_norm: 0.8185683357211959, iteration: 425618
loss: 0.9848572015762329,grad_norm: 0.6606161796814782, iteration: 425619
loss: 0.9618343114852905,grad_norm: 0.8301909071640926, iteration: 425620
loss: 1.1049844026565552,grad_norm: 0.999999428127945, iteration: 425621
loss: 1.0008022785186768,grad_norm: 0.9999997953823292, iteration: 425622
loss: 0.9713740348815918,grad_norm: 0.7490411117103122, iteration: 425623
loss: 1.010094165802002,grad_norm: 0.8050408042017532, iteration: 425624
loss: 1.0139237642288208,grad_norm: 0.7716747795740567, iteration: 425625
loss: 1.0023168325424194,grad_norm: 0.772240609174355, iteration: 425626
loss: 1.0288699865341187,grad_norm: 0.77633334077648, iteration: 425627
loss: 0.9906934499740601,grad_norm: 0.7386283127298726, iteration: 425628
loss: 0.9968945980072021,grad_norm: 0.6819292235325567, iteration: 425629
loss: 1.0182486772537231,grad_norm: 0.999999115059049, iteration: 425630
loss: 1.0137832164764404,grad_norm: 0.8136953559603333, iteration: 425631
loss: 1.1526737213134766,grad_norm: 0.9999997931090858, iteration: 425632
loss: 1.0446208715438843,grad_norm: 0.780369173109816, iteration: 425633
loss: 1.0217781066894531,grad_norm: 0.6840976566718627, iteration: 425634
loss: 1.0464096069335938,grad_norm: 0.9999996817474943, iteration: 425635
loss: 0.9764661192893982,grad_norm: 0.8597123943314438, iteration: 425636
loss: 1.007591724395752,grad_norm: 0.7544799336599576, iteration: 425637
loss: 1.0150530338287354,grad_norm: 0.7071433982993608, iteration: 425638
loss: 1.0483404397964478,grad_norm: 0.9999995297753235, iteration: 425639
loss: 1.0179444551467896,grad_norm: 0.8310259261066839, iteration: 425640
loss: 1.0066882371902466,grad_norm: 0.7046712615806399, iteration: 425641
loss: 1.0036823749542236,grad_norm: 0.7122614563043169, iteration: 425642
loss: 0.9764143824577332,grad_norm: 0.9999993638506015, iteration: 425643
loss: 0.9919561743736267,grad_norm: 0.6840559537706085, iteration: 425644
loss: 1.0394678115844727,grad_norm: 0.9999997252140316, iteration: 425645
loss: 1.0144221782684326,grad_norm: 0.8230506042893488, iteration: 425646
loss: 0.9806905388832092,grad_norm: 0.7775922663722228, iteration: 425647
loss: 1.0216635465621948,grad_norm: 0.894356798538572, iteration: 425648
loss: 0.988618791103363,grad_norm: 0.8454999502208026, iteration: 425649
loss: 1.0501313209533691,grad_norm: 0.9999999373551187, iteration: 425650
loss: 0.995177686214447,grad_norm: 0.8087162615559901, iteration: 425651
loss: 1.0058090686798096,grad_norm: 0.7622848705463908, iteration: 425652
loss: 0.9740895628929138,grad_norm: 0.7492342927337364, iteration: 425653
loss: 0.9636684656143188,grad_norm: 0.6913889919970988, iteration: 425654
loss: 0.9462247490882874,grad_norm: 0.7713988535343915, iteration: 425655
loss: 0.9955165982246399,grad_norm: 0.7386042820614569, iteration: 425656
loss: 1.079278588294983,grad_norm: 0.9999992691890263, iteration: 425657
loss: 1.0355322360992432,grad_norm: 0.8696608576340895, iteration: 425658
loss: 1.1011013984680176,grad_norm: 0.9999995419032494, iteration: 425659
loss: 1.0216692686080933,grad_norm: 0.8068679360343177, iteration: 425660
loss: 0.9873449802398682,grad_norm: 0.6568128412586527, iteration: 425661
loss: 0.9798107743263245,grad_norm: 0.8656104228628586, iteration: 425662
loss: 1.0145883560180664,grad_norm: 0.7935513547129982, iteration: 425663
loss: 1.0491644144058228,grad_norm: 0.9999993553699597, iteration: 425664
loss: 0.9859118461608887,grad_norm: 0.9999992212044283, iteration: 425665
loss: 0.9752474427223206,grad_norm: 0.6841833460010054, iteration: 425666
loss: 0.9903474450111389,grad_norm: 0.7385137098397164, iteration: 425667
loss: 1.0779368877410889,grad_norm: 0.9999998335222573, iteration: 425668
loss: 0.993043839931488,grad_norm: 0.7916639451458768, iteration: 425669
loss: 1.035241723060608,grad_norm: 0.9653856504580233, iteration: 425670
loss: 0.9848397374153137,grad_norm: 0.8234363704800296, iteration: 425671
loss: 0.9871952533721924,grad_norm: 0.7620248231639365, iteration: 425672
loss: 1.0671721696853638,grad_norm: 0.9528108622172636, iteration: 425673
loss: 0.9837801456451416,grad_norm: 0.688392656651915, iteration: 425674
loss: 0.9764385223388672,grad_norm: 0.9660479904052888, iteration: 425675
loss: 1.0593901872634888,grad_norm: 0.930375959306202, iteration: 425676
loss: 0.9669874310493469,grad_norm: 0.9282247222741186, iteration: 425677
loss: 1.0844874382019043,grad_norm: 0.9999996585805925, iteration: 425678
loss: 0.9994915723800659,grad_norm: 0.7954734389587875, iteration: 425679
loss: 0.9859141111373901,grad_norm: 0.6621048976313905, iteration: 425680
loss: 0.9981502890586853,grad_norm: 0.7108444776243914, iteration: 425681
loss: 1.0096243619918823,grad_norm: 0.8301502071612091, iteration: 425682
loss: 1.0054981708526611,grad_norm: 0.6960886735207917, iteration: 425683
loss: 1.0013923645019531,grad_norm: 0.833641299278328, iteration: 425684
loss: 0.9841216802597046,grad_norm: 0.9280207249407175, iteration: 425685
loss: 1.0013313293457031,grad_norm: 0.6786072029379137, iteration: 425686
loss: 0.987811267375946,grad_norm: 0.9999991561774831, iteration: 425687
loss: 0.9754523634910583,grad_norm: 0.9688497816447065, iteration: 425688
loss: 0.9968529343605042,grad_norm: 0.6578849623205106, iteration: 425689
loss: 0.951345682144165,grad_norm: 0.6959556536533091, iteration: 425690
loss: 0.9772232174873352,grad_norm: 0.9999997472890932, iteration: 425691
loss: 1.0421935319900513,grad_norm: 0.9999990902863776, iteration: 425692
loss: 1.0453081130981445,grad_norm: 0.7960069859423916, iteration: 425693
loss: 1.0639691352844238,grad_norm: 0.9207550550694354, iteration: 425694
loss: 0.9832024574279785,grad_norm: 0.7620980601004251, iteration: 425695
loss: 0.9712333083152771,grad_norm: 0.5936284174202424, iteration: 425696
loss: 0.9522345066070557,grad_norm: 0.7139206364606951, iteration: 425697
loss: 1.0057870149612427,grad_norm: 0.8123761107840993, iteration: 425698
loss: 0.9991813898086548,grad_norm: 0.860694880317917, iteration: 425699
loss: 1.0055904388427734,grad_norm: 0.6607536486146166, iteration: 425700
loss: 0.9972425103187561,grad_norm: 0.8047451083597401, iteration: 425701
loss: 1.0385276079177856,grad_norm: 0.6873744532730872, iteration: 425702
loss: 0.9869664311408997,grad_norm: 0.774007918988115, iteration: 425703
loss: 1.0147943496704102,grad_norm: 0.9033506864479347, iteration: 425704
loss: 0.9996846318244934,grad_norm: 0.7408899658299695, iteration: 425705
loss: 1.0326427221298218,grad_norm: 0.9999998773076275, iteration: 425706
loss: 1.000077486038208,grad_norm: 0.6793000698182429, iteration: 425707
loss: 1.0185400247573853,grad_norm: 0.7571956450694258, iteration: 425708
loss: 1.0270812511444092,grad_norm: 0.9356229154876026, iteration: 425709
loss: 1.0005284547805786,grad_norm: 0.9652362831465546, iteration: 425710
loss: 0.989348292350769,grad_norm: 0.9999992079019003, iteration: 425711
loss: 0.9823007583618164,grad_norm: 0.8491818586412926, iteration: 425712
loss: 1.018142819404602,grad_norm: 0.7239971705615609, iteration: 425713
loss: 0.9672629237174988,grad_norm: 0.6320932653441115, iteration: 425714
loss: 1.0127211809158325,grad_norm: 0.7290577476105293, iteration: 425715
loss: 1.0140260457992554,grad_norm: 0.7584998594245077, iteration: 425716
loss: 1.0004386901855469,grad_norm: 0.889535308637898, iteration: 425717
loss: 1.0341724157333374,grad_norm: 0.9999990274234521, iteration: 425718
loss: 0.9780315160751343,grad_norm: 0.9842729210956812, iteration: 425719
loss: 0.9529361724853516,grad_norm: 0.8405729812981623, iteration: 425720
loss: 1.0013612508773804,grad_norm: 0.9256367102123277, iteration: 425721
loss: 1.019486904144287,grad_norm: 0.7104897107418654, iteration: 425722
loss: 0.9987125992774963,grad_norm: 0.7403832308357998, iteration: 425723
loss: 0.9838679432868958,grad_norm: 0.763784463675911, iteration: 425724
loss: 0.9712715744972229,grad_norm: 0.7489590997761578, iteration: 425725
loss: 1.011520504951477,grad_norm: 0.7473953532958388, iteration: 425726
loss: 1.027099847793579,grad_norm: 0.8866768187170806, iteration: 425727
loss: 0.9825006127357483,grad_norm: 0.9243674827746352, iteration: 425728
loss: 0.9928410649299622,grad_norm: 0.8602291892739939, iteration: 425729
loss: 1.0065863132476807,grad_norm: 0.7574489362190692, iteration: 425730
loss: 1.0342859029769897,grad_norm: 0.7586917481282223, iteration: 425731
loss: 1.0089961290359497,grad_norm: 0.859309518215185, iteration: 425732
loss: 0.9985840320587158,grad_norm: 0.8086511698363512, iteration: 425733
loss: 0.9592071771621704,grad_norm: 0.8004351264481813, iteration: 425734
loss: 0.9639829397201538,grad_norm: 0.727552642728833, iteration: 425735
loss: 1.0535248517990112,grad_norm: 0.9423861737953432, iteration: 425736
loss: 1.0176007747650146,grad_norm: 0.8026165169464321, iteration: 425737
loss: 0.9916211366653442,grad_norm: 0.8385380706000909, iteration: 425738
loss: 0.99676513671875,grad_norm: 0.9999992223756226, iteration: 425739
loss: 0.9766900539398193,grad_norm: 0.901965902526888, iteration: 425740
loss: 1.0210192203521729,grad_norm: 0.7281192699167418, iteration: 425741
loss: 0.9908501505851746,grad_norm: 0.891153797829021, iteration: 425742
loss: 1.1898295879364014,grad_norm: 0.9999998360538306, iteration: 425743
loss: 0.9644309282302856,grad_norm: 0.8005134574370317, iteration: 425744
loss: 1.0210148096084595,grad_norm: 0.9999997773531756, iteration: 425745
loss: 0.9924213886260986,grad_norm: 0.7979783434931361, iteration: 425746
loss: 0.949297308921814,grad_norm: 0.7959250673013598, iteration: 425747
loss: 1.0194350481033325,grad_norm: 0.879509837548245, iteration: 425748
loss: 1.0094739198684692,grad_norm: 0.7914051540562836, iteration: 425749
loss: 0.9873824715614319,grad_norm: 0.7930921037549153, iteration: 425750
loss: 0.987637460231781,grad_norm: 0.8627829004689526, iteration: 425751
loss: 1.032954454421997,grad_norm: 0.6760169487282917, iteration: 425752
loss: 1.0960158109664917,grad_norm: 0.9999999236680327, iteration: 425753
loss: 1.0324878692626953,grad_norm: 0.9554096680356988, iteration: 425754
loss: 1.003091812133789,grad_norm: 0.7289921381013856, iteration: 425755
loss: 0.9893984198570251,grad_norm: 0.725085752239302, iteration: 425756
loss: 0.9950607419013977,grad_norm: 0.7657004330304017, iteration: 425757
loss: 0.9638280272483826,grad_norm: 0.999999728617823, iteration: 425758
loss: 1.0257114171981812,grad_norm: 0.782249769905705, iteration: 425759
loss: 1.0239676237106323,grad_norm: 0.9999991552443106, iteration: 425760
loss: 0.9808377027511597,grad_norm: 0.810406138196494, iteration: 425761
loss: 1.0483845472335815,grad_norm: 0.9999992819422966, iteration: 425762
loss: 0.9565734267234802,grad_norm: 0.638271241328467, iteration: 425763
loss: 0.9479466676712036,grad_norm: 0.7661867079731343, iteration: 425764
loss: 1.0454981327056885,grad_norm: 0.7754257426774767, iteration: 425765
loss: 1.0359447002410889,grad_norm: 0.6916026580902953, iteration: 425766
loss: 0.9757639169692993,grad_norm: 0.7812382183882529, iteration: 425767
loss: 1.0927644968032837,grad_norm: 0.9897717252459313, iteration: 425768
loss: 0.9948498606681824,grad_norm: 0.7103761745696192, iteration: 425769
loss: 1.003472924232483,grad_norm: 0.7590510452575018, iteration: 425770
loss: 1.0646157264709473,grad_norm: 0.999999659293508, iteration: 425771
loss: 0.9885151386260986,grad_norm: 0.9690417604570986, iteration: 425772
loss: 1.0793914794921875,grad_norm: 0.9999991754854843, iteration: 425773
loss: 1.0058681964874268,grad_norm: 0.7707100095700203, iteration: 425774
loss: 1.0738204717636108,grad_norm: 0.9024091839439493, iteration: 425775
loss: 1.1899772882461548,grad_norm: 0.857975067066986, iteration: 425776
loss: 0.986295759677887,grad_norm: 0.6913846657508502, iteration: 425777
loss: 1.1350414752960205,grad_norm: 0.9999992022758707, iteration: 425778
loss: 1.0418365001678467,grad_norm: 0.9999998914325232, iteration: 425779
loss: 1.0573478937149048,grad_norm: 0.9999998428839014, iteration: 425780
loss: 1.067309856414795,grad_norm: 0.8667008513060991, iteration: 425781
loss: 1.046196699142456,grad_norm: 0.999999471393372, iteration: 425782
loss: 1.088959813117981,grad_norm: 0.8605554904806068, iteration: 425783
loss: 1.0645073652267456,grad_norm: 0.7934036266044617, iteration: 425784
loss: 0.9943516850471497,grad_norm: 0.808047464120597, iteration: 425785
loss: 0.9974760413169861,grad_norm: 0.7256993334669316, iteration: 425786
loss: 1.0329036712646484,grad_norm: 0.7735619606813127, iteration: 425787
loss: 0.9828633069992065,grad_norm: 0.9307238860873355, iteration: 425788
loss: 1.0027673244476318,grad_norm: 0.7803152928697931, iteration: 425789
loss: 1.279987096786499,grad_norm: 0.9999993807821852, iteration: 425790
loss: 1.0141698122024536,grad_norm: 0.9243767343913867, iteration: 425791
loss: 0.9623469114303589,grad_norm: 0.7111694219514763, iteration: 425792
loss: 0.9793758988380432,grad_norm: 0.7314538608183043, iteration: 425793
loss: 1.0110046863555908,grad_norm: 0.7220336414506304, iteration: 425794
loss: 0.9792662858963013,grad_norm: 0.7403677014896434, iteration: 425795
loss: 1.012789011001587,grad_norm: 0.7349324297619193, iteration: 425796
loss: 1.0128517150878906,grad_norm: 0.8505249592818397, iteration: 425797
loss: 1.0086885690689087,grad_norm: 0.9999991256181062, iteration: 425798
loss: 1.0543403625488281,grad_norm: 0.638383819257633, iteration: 425799
loss: 1.0251104831695557,grad_norm: 0.8077914920617771, iteration: 425800
loss: 0.9920815825462341,grad_norm: 0.9999992303749072, iteration: 425801
loss: 0.9663790464401245,grad_norm: 0.9166238856095588, iteration: 425802
loss: 1.0337915420532227,grad_norm: 0.6971556943316072, iteration: 425803
loss: 1.0089285373687744,grad_norm: 0.7296273044172928, iteration: 425804
loss: 1.0316585302352905,grad_norm: 0.7968718967573788, iteration: 425805
loss: 1.0250698328018188,grad_norm: 0.9999995863170802, iteration: 425806
loss: 1.0864205360412598,grad_norm: 0.9999997421832254, iteration: 425807
loss: 1.0138989686965942,grad_norm: 0.8984914471993598, iteration: 425808
loss: 0.9607521295547485,grad_norm: 0.807572948728906, iteration: 425809
loss: 1.0256624221801758,grad_norm: 0.9999998633544, iteration: 425810
loss: 1.0372390747070312,grad_norm: 0.954559148288535, iteration: 425811
loss: 0.9698233604431152,grad_norm: 0.8300766212177344, iteration: 425812
loss: 1.050674319267273,grad_norm: 0.7964484464464922, iteration: 425813
loss: 0.9961296916007996,grad_norm: 0.8081453038550632, iteration: 425814
loss: 1.0447609424591064,grad_norm: 0.7882866978029525, iteration: 425815
loss: 1.0310627222061157,grad_norm: 0.9324008321124169, iteration: 425816
loss: 1.0113918781280518,grad_norm: 0.8596431584878418, iteration: 425817
loss: 0.9870291352272034,grad_norm: 0.6759491197766624, iteration: 425818
loss: 1.0418825149536133,grad_norm: 0.9362597076089572, iteration: 425819
loss: 1.0332531929016113,grad_norm: 0.6770602438235638, iteration: 425820
loss: 1.0072568655014038,grad_norm: 0.7831711487206776, iteration: 425821
loss: 1.0133384466171265,grad_norm: 0.5789296014642986, iteration: 425822
loss: 1.0185540914535522,grad_norm: 0.8161188285749262, iteration: 425823
loss: 1.0585607290267944,grad_norm: 0.9999993581745332, iteration: 425824
loss: 1.0415571928024292,grad_norm: 0.7185450684643657, iteration: 425825
loss: 1.0127158164978027,grad_norm: 0.6229187549201332, iteration: 425826
loss: 0.9822918176651001,grad_norm: 0.8225121701789455, iteration: 425827
loss: 0.982860803604126,grad_norm: 0.9999991475662023, iteration: 425828
loss: 1.0274351835250854,grad_norm: 0.9999991085933339, iteration: 425829
loss: 1.1663557291030884,grad_norm: 0.9999991753714322, iteration: 425830
loss: 1.051989197731018,grad_norm: 0.9005374497253129, iteration: 425831
loss: 1.0102133750915527,grad_norm: 0.8733213976652245, iteration: 425832
loss: 1.0132321119308472,grad_norm: 0.7450300177241133, iteration: 425833
loss: 0.9861730337142944,grad_norm: 0.9999990543737357, iteration: 425834
loss: 0.965498685836792,grad_norm: 0.7018336681419801, iteration: 425835
loss: 1.0240141153335571,grad_norm: 0.7530621589424985, iteration: 425836
loss: 1.0023077726364136,grad_norm: 0.6245455105929767, iteration: 425837
loss: 1.0123000144958496,grad_norm: 0.9076139910833443, iteration: 425838
loss: 0.9995753169059753,grad_norm: 0.7978432420077683, iteration: 425839
loss: 1.0172336101531982,grad_norm: 0.8952276765791188, iteration: 425840
loss: 0.9880688786506653,grad_norm: 0.9999993251690908, iteration: 425841
loss: 0.9935836791992188,grad_norm: 0.7425966654477637, iteration: 425842
loss: 1.0008809566497803,grad_norm: 0.6574268596705011, iteration: 425843
loss: 0.9810832738876343,grad_norm: 0.7788611836339218, iteration: 425844
loss: 1.0156410932540894,grad_norm: 0.7455104164829511, iteration: 425845
loss: 1.0192975997924805,grad_norm: 0.9999996945120005, iteration: 425846
loss: 0.9949691295623779,grad_norm: 0.7001030135237094, iteration: 425847
loss: 0.9979935884475708,grad_norm: 0.7841687772062085, iteration: 425848
loss: 0.9962563514709473,grad_norm: 0.9999993225202884, iteration: 425849
loss: 0.9881071448326111,grad_norm: 0.6745373108610534, iteration: 425850
loss: 1.0259326696395874,grad_norm: 0.9999999426734286, iteration: 425851
loss: 1.0580555200576782,grad_norm: 0.962886585746843, iteration: 425852
loss: 1.3327089548110962,grad_norm: 0.9999998415688056, iteration: 425853
loss: 0.990508496761322,grad_norm: 0.685235989109456, iteration: 425854
loss: 1.0155223608016968,grad_norm: 0.9137207692163427, iteration: 425855
loss: 0.9961798191070557,grad_norm: 0.9999998649165007, iteration: 425856
loss: 0.9980611205101013,grad_norm: 0.9999993963178759, iteration: 425857
loss: 1.0174416303634644,grad_norm: 0.9999992256584148, iteration: 425858
loss: 1.020656704902649,grad_norm: 0.8000692559522561, iteration: 425859
loss: 1.0472478866577148,grad_norm: 0.999999700684963, iteration: 425860
loss: 1.0223901271820068,grad_norm: 0.8163652966649374, iteration: 425861
loss: 0.992838442325592,grad_norm: 0.7360630311971209, iteration: 425862
loss: 1.0668561458587646,grad_norm: 0.9177247621863915, iteration: 425863
loss: 0.9947542548179626,grad_norm: 0.8223496994396269, iteration: 425864
loss: 1.004214882850647,grad_norm: 0.6870838671789421, iteration: 425865
loss: 1.2837300300598145,grad_norm: 0.9999993354401161, iteration: 425866
loss: 1.0208975076675415,grad_norm: 0.7430862653797299, iteration: 425867
loss: 1.0400983095169067,grad_norm: 0.9999995664429885, iteration: 425868
loss: 1.2067973613739014,grad_norm: 0.9722480397575636, iteration: 425869
loss: 0.9853231310844421,grad_norm: 0.7123059600173234, iteration: 425870
loss: 0.9900870323181152,grad_norm: 0.7367692350600807, iteration: 425871
loss: 1.1745092868804932,grad_norm: 0.8974603784498489, iteration: 425872
loss: 1.001425862312317,grad_norm: 0.9999995874287375, iteration: 425873
loss: 0.9671765565872192,grad_norm: 0.90584583918055, iteration: 425874
loss: 1.0385541915893555,grad_norm: 0.9999990856425524, iteration: 425875
loss: 0.9613664150238037,grad_norm: 0.7591208476706511, iteration: 425876
loss: 1.0054908990859985,grad_norm: 0.9999999345312816, iteration: 425877
loss: 0.9738622903823853,grad_norm: 0.6623631110688991, iteration: 425878
loss: 1.0243972539901733,grad_norm: 0.9999992513416667, iteration: 425879
loss: 1.0193015336990356,grad_norm: 0.907493206233971, iteration: 425880
loss: 0.9807516932487488,grad_norm: 0.9999998160000971, iteration: 425881
loss: 1.0398966073989868,grad_norm: 0.8080085449237507, iteration: 425882
loss: 1.1595791578292847,grad_norm: 0.9999991691653515, iteration: 425883
loss: 1.0821315050125122,grad_norm: 0.9999999295268448, iteration: 425884
loss: 0.9987149834632874,grad_norm: 0.6882881853482261, iteration: 425885
loss: 0.9867978096008301,grad_norm: 0.8508855979431352, iteration: 425886
loss: 1.0062601566314697,grad_norm: 0.9999993641225726, iteration: 425887
loss: 1.0243420600891113,grad_norm: 0.7977600889950796, iteration: 425888
loss: 1.0034568309783936,grad_norm: 0.9999999990534819, iteration: 425889
loss: 0.9895326495170593,grad_norm: 0.7400631415053642, iteration: 425890
loss: 1.008616328239441,grad_norm: 0.9999992990580918, iteration: 425891
loss: 0.9967097043991089,grad_norm: 0.8705524293351156, iteration: 425892
loss: 1.0140583515167236,grad_norm: 0.7864566142567232, iteration: 425893
loss: 1.080824613571167,grad_norm: 0.999999220887542, iteration: 425894
loss: 0.9994245171546936,grad_norm: 0.8077760356385381, iteration: 425895
loss: 1.0513663291931152,grad_norm: 0.9999999574603893, iteration: 425896
loss: 1.0750021934509277,grad_norm: 0.9288810831735765, iteration: 425897
loss: 0.9862776398658752,grad_norm: 0.7675459704488785, iteration: 425898
loss: 0.9849101901054382,grad_norm: 0.7732527001045658, iteration: 425899
loss: 1.0517765283584595,grad_norm: 0.9999994789535458, iteration: 425900
loss: 0.9843124151229858,grad_norm: 0.8710142765697784, iteration: 425901
loss: 0.9857290387153625,grad_norm: 0.9507226415530673, iteration: 425902
loss: 1.0070565938949585,grad_norm: 0.9514825722915173, iteration: 425903
loss: 0.9603402614593506,grad_norm: 0.79018765675143, iteration: 425904
loss: 1.0450552701950073,grad_norm: 0.9774716202396952, iteration: 425905
loss: 1.0286931991577148,grad_norm: 0.9999994628597756, iteration: 425906
loss: 1.060099482536316,grad_norm: 0.8071598630061765, iteration: 425907
loss: 0.9850360751152039,grad_norm: 0.7996622486824223, iteration: 425908
loss: 0.9966070652008057,grad_norm: 0.7830389032479917, iteration: 425909
loss: 0.9953675270080566,grad_norm: 0.674087335622756, iteration: 425910
loss: 0.9919599890708923,grad_norm: 0.6608635032192972, iteration: 425911
loss: 1.0012924671173096,grad_norm: 0.8602971885715406, iteration: 425912
loss: 1.0216015577316284,grad_norm: 0.7682511559574797, iteration: 425913
loss: 0.9833453893661499,grad_norm: 0.8853385771683174, iteration: 425914
loss: 1.0034152269363403,grad_norm: 0.7082834659117244, iteration: 425915
loss: 1.0106840133666992,grad_norm: 0.6861305116859048, iteration: 425916
loss: 1.146176815032959,grad_norm: 0.9999999416522375, iteration: 425917
loss: 0.9997843503952026,grad_norm: 0.7347937026816102, iteration: 425918
loss: 1.0770999193191528,grad_norm: 0.8117883982004732, iteration: 425919
loss: 1.0034600496292114,grad_norm: 0.8458995621343054, iteration: 425920
loss: 1.0017127990722656,grad_norm: 0.8274300213731113, iteration: 425921
loss: 1.015592336654663,grad_norm: 0.9625009378875601, iteration: 425922
loss: 0.9487045407295227,grad_norm: 0.7148770020230624, iteration: 425923
loss: 1.1341043710708618,grad_norm: 0.975709010481922, iteration: 425924
loss: 1.0752613544464111,grad_norm: 0.859559296923959, iteration: 425925
loss: 1.0270429849624634,grad_norm: 0.9999992226858245, iteration: 425926
loss: 1.0115736722946167,grad_norm: 0.8385980219962004, iteration: 425927
loss: 0.9766321778297424,grad_norm: 0.9999993200500114, iteration: 425928
loss: 1.0629664659500122,grad_norm: 0.8676015854489918, iteration: 425929
loss: 1.0702401399612427,grad_norm: 0.9999993978014364, iteration: 425930
loss: 1.1622107028961182,grad_norm: 0.922461463460442, iteration: 425931
loss: 1.0512079000473022,grad_norm: 0.8603686032113873, iteration: 425932
loss: 1.1123954057693481,grad_norm: 0.999999819818121, iteration: 425933
loss: 1.0021371841430664,grad_norm: 0.7268591634699874, iteration: 425934
loss: 1.012711524963379,grad_norm: 0.8847541771117745, iteration: 425935
loss: 1.0684921741485596,grad_norm: 0.7707804199093555, iteration: 425936
loss: 1.081994652748108,grad_norm: 0.8473196021041969, iteration: 425937
loss: 1.010489583015442,grad_norm: 0.7210854970027673, iteration: 425938
loss: 1.032607078552246,grad_norm: 0.999999755995502, iteration: 425939
loss: 1.0043748617172241,grad_norm: 0.8678180938835194, iteration: 425940
loss: 1.0098015069961548,grad_norm: 0.9992550034660473, iteration: 425941
loss: 1.0389941930770874,grad_norm: 0.9411751466306402, iteration: 425942
loss: 1.0129667520523071,grad_norm: 0.7352815574810665, iteration: 425943
loss: 0.9771851301193237,grad_norm: 0.8773984987343362, iteration: 425944
loss: 1.021971583366394,grad_norm: 0.890639558615083, iteration: 425945
loss: 1.0310161113739014,grad_norm: 0.8202657008883216, iteration: 425946
loss: 1.0376871824264526,grad_norm: 0.9999998392272692, iteration: 425947
loss: 1.0005191564559937,grad_norm: 0.7441185140580954, iteration: 425948
loss: 1.0193060636520386,grad_norm: 0.7741251314243007, iteration: 425949
loss: 1.0462827682495117,grad_norm: 0.762544110007011, iteration: 425950
loss: 0.9780804514884949,grad_norm: 0.999999470640439, iteration: 425951
loss: 0.975831925868988,grad_norm: 0.7595554063811826, iteration: 425952
loss: 0.9881335496902466,grad_norm: 0.830631787771691, iteration: 425953
loss: 0.9727160930633545,grad_norm: 0.9185319322283881, iteration: 425954
loss: 1.1105189323425293,grad_norm: 0.9999993259355177, iteration: 425955
loss: 1.068220853805542,grad_norm: 0.8342425732214493, iteration: 425956
loss: 1.0208899974822998,grad_norm: 0.8950872059438059, iteration: 425957
loss: 0.9929967522621155,grad_norm: 0.8727106330932864, iteration: 425958
loss: 0.9983676671981812,grad_norm: 0.7249998974034416, iteration: 425959
loss: 0.9998694062232971,grad_norm: 0.9999989550313926, iteration: 425960
loss: 1.0009797811508179,grad_norm: 0.8443535583038636, iteration: 425961
loss: 1.1452354192733765,grad_norm: 0.999999127727694, iteration: 425962
loss: 1.0378206968307495,grad_norm: 0.8491184731294739, iteration: 425963
loss: 0.9865618348121643,grad_norm: 0.6859994191412215, iteration: 425964
loss: 1.0245120525360107,grad_norm: 0.9149503248085509, iteration: 425965
loss: 0.9960651397705078,grad_norm: 0.9999992142571628, iteration: 425966
loss: 1.007378101348877,grad_norm: 0.7454745396249584, iteration: 425967
loss: 0.9946994185447693,grad_norm: 0.7132905046314411, iteration: 425968
loss: 1.061681866645813,grad_norm: 0.7382304121488336, iteration: 425969
loss: 1.0284013748168945,grad_norm: 0.7432368924277793, iteration: 425970
loss: 1.0012376308441162,grad_norm: 0.9978873981130852, iteration: 425971
loss: 1.0171259641647339,grad_norm: 0.9999993077667216, iteration: 425972
loss: 0.9531633853912354,grad_norm: 0.805632374047589, iteration: 425973
loss: 0.994972825050354,grad_norm: 0.8081970242092648, iteration: 425974
loss: 0.9602838158607483,grad_norm: 0.6820614037056341, iteration: 425975
loss: 1.0416017770767212,grad_norm: 0.9999992715903133, iteration: 425976
loss: 1.0329699516296387,grad_norm: 0.6720150417437001, iteration: 425977
loss: 0.9979721307754517,grad_norm: 0.7668519137665789, iteration: 425978
loss: 1.0231155157089233,grad_norm: 0.8514572719457729, iteration: 425979
loss: 0.9742633104324341,grad_norm: 0.9345537770000706, iteration: 425980
loss: 0.9879955053329468,grad_norm: 0.752944959645363, iteration: 425981
loss: 0.96036297082901,grad_norm: 0.8124179927700458, iteration: 425982
loss: 0.9798932075500488,grad_norm: 0.7188479531862125, iteration: 425983
loss: 0.9842696785926819,grad_norm: 0.797271683774898, iteration: 425984
loss: 1.005967617034912,grad_norm: 0.7365722982646616, iteration: 425985
loss: 1.0405926704406738,grad_norm: 0.9999996672395227, iteration: 425986
loss: 0.9944053292274475,grad_norm: 0.7197911661511304, iteration: 425987
loss: 1.0473250150680542,grad_norm: 0.89767091470836, iteration: 425988
loss: 1.0183892250061035,grad_norm: 0.999999270535386, iteration: 425989
loss: 1.056280255317688,grad_norm: 0.6135881398010621, iteration: 425990
loss: 0.9791015386581421,grad_norm: 0.77948851841612, iteration: 425991
loss: 1.0108814239501953,grad_norm: 0.7049724129645895, iteration: 425992
loss: 1.0787893533706665,grad_norm: 0.8530063563136842, iteration: 425993
loss: 1.0253292322158813,grad_norm: 0.6979021360201559, iteration: 425994
loss: 0.9953694343566895,grad_norm: 0.9482107801307501, iteration: 425995
loss: 0.9655303359031677,grad_norm: 0.726764717898971, iteration: 425996
loss: 1.0106608867645264,grad_norm: 0.9999993682043287, iteration: 425997
loss: 0.9804147481918335,grad_norm: 0.7301100072739242, iteration: 425998
loss: 1.0203683376312256,grad_norm: 0.9999991241964801, iteration: 425999
loss: 1.0303397178649902,grad_norm: 0.7944481502066325, iteration: 426000
loss: 1.0190916061401367,grad_norm: 0.9999991132234102, iteration: 426001
loss: 0.9774410128593445,grad_norm: 0.7241740176558459, iteration: 426002
loss: 1.0122711658477783,grad_norm: 0.7802456683291344, iteration: 426003
loss: 1.0413230657577515,grad_norm: 0.7883991399972832, iteration: 426004
loss: 1.0673561096191406,grad_norm: 0.803790204192794, iteration: 426005
loss: 0.9654896855354309,grad_norm: 0.7151009492203748, iteration: 426006
loss: 0.9986379742622375,grad_norm: 0.6681201320605541, iteration: 426007
loss: 0.9959697127342224,grad_norm: 0.885005150709899, iteration: 426008
loss: 1.0043870210647583,grad_norm: 0.7795260852080864, iteration: 426009
loss: 1.003473162651062,grad_norm: 0.7290300251141736, iteration: 426010
loss: 1.0094356536865234,grad_norm: 0.9152896632889305, iteration: 426011
loss: 0.995512843132019,grad_norm: 0.8324548832353751, iteration: 426012
loss: 1.0002467632293701,grad_norm: 0.7839027659033747, iteration: 426013
loss: 0.9806768894195557,grad_norm: 0.7507126971549943, iteration: 426014
loss: 1.0344955921173096,grad_norm: 0.9652368411533019, iteration: 426015
loss: 1.0025169849395752,grad_norm: 0.7443055234020807, iteration: 426016
loss: 1.113980770111084,grad_norm: 0.9999993206342288, iteration: 426017
loss: 1.0176012516021729,grad_norm: 0.7581578745161999, iteration: 426018
loss: 1.0317943096160889,grad_norm: 0.8977267952178218, iteration: 426019
loss: 0.9894579648971558,grad_norm: 0.9999990394816399, iteration: 426020
loss: 1.0281931161880493,grad_norm: 0.8561594649515125, iteration: 426021
loss: 0.9688615798950195,grad_norm: 0.9999995440785383, iteration: 426022
loss: 0.9902058243751526,grad_norm: 0.6661470937876531, iteration: 426023
loss: 1.1216142177581787,grad_norm: 0.9649393851570044, iteration: 426024
loss: 1.0092161893844604,grad_norm: 0.9999997950678192, iteration: 426025
loss: 1.0171555280685425,grad_norm: 0.6827308678538078, iteration: 426026
loss: 0.9795039296150208,grad_norm: 0.8363791437137199, iteration: 426027
loss: 0.9928533434867859,grad_norm: 0.868969529227293, iteration: 426028
loss: 0.9975230097770691,grad_norm: 0.8450563021657403, iteration: 426029
loss: 1.0886389017105103,grad_norm: 0.8455985515068366, iteration: 426030
loss: 0.9808322787284851,grad_norm: 0.7549076639238924, iteration: 426031
loss: 0.979179859161377,grad_norm: 0.7287510675699012, iteration: 426032
loss: 0.9915486574172974,grad_norm: 0.7013180669508263, iteration: 426033
loss: 1.0069984197616577,grad_norm: 0.7605398701146461, iteration: 426034
loss: 0.9851048588752747,grad_norm: 0.6914594723340334, iteration: 426035
loss: 1.0255478620529175,grad_norm: 0.7424079411113537, iteration: 426036
loss: 1.0131715536117554,grad_norm: 0.8168488589772606, iteration: 426037
loss: 0.992985725402832,grad_norm: 0.7562568748538312, iteration: 426038
loss: 1.0812828540802002,grad_norm: 0.9438568836043743, iteration: 426039
loss: 1.0321481227874756,grad_norm: 0.7854723904019, iteration: 426040
loss: 0.983660101890564,grad_norm: 0.7657990356584814, iteration: 426041
loss: 1.0596065521240234,grad_norm: 0.9999998896642706, iteration: 426042
loss: 0.9998412728309631,grad_norm: 0.7378926407459065, iteration: 426043
loss: 0.9947822093963623,grad_norm: 0.6208421698672579, iteration: 426044
loss: 1.010127305984497,grad_norm: 0.9999991442534335, iteration: 426045
loss: 1.0220658779144287,grad_norm: 0.7438784405812958, iteration: 426046
loss: 0.9879302978515625,grad_norm: 0.7752472532328191, iteration: 426047
loss: 1.0930739641189575,grad_norm: 0.9999990932198745, iteration: 426048
loss: 1.0046970844268799,grad_norm: 0.6071063198899621, iteration: 426049
loss: 1.0504707098007202,grad_norm: 0.9327958696588419, iteration: 426050
loss: 0.9749108552932739,grad_norm: 0.7541154813479586, iteration: 426051
loss: 0.9848707318305969,grad_norm: 0.8292242266410206, iteration: 426052
loss: 0.9543541073799133,grad_norm: 0.632797643773833, iteration: 426053
loss: 1.0277647972106934,grad_norm: 0.8654091319918376, iteration: 426054
loss: 1.0786528587341309,grad_norm: 0.7312204111730022, iteration: 426055
loss: 1.0025794506072998,grad_norm: 0.7561934244062993, iteration: 426056
loss: 1.0983834266662598,grad_norm: 0.9999997257216351, iteration: 426057
loss: 0.98826664686203,grad_norm: 0.7680402132706695, iteration: 426058
loss: 1.024755597114563,grad_norm: 0.873896814064501, iteration: 426059
loss: 1.0155720710754395,grad_norm: 0.7742920523160024, iteration: 426060
loss: 0.970274806022644,grad_norm: 0.8601044235175475, iteration: 426061
loss: 0.9871987104415894,grad_norm: 0.834854808666879, iteration: 426062
loss: 1.0050336122512817,grad_norm: 0.6625856314438491, iteration: 426063
loss: 0.9965332746505737,grad_norm: 0.7506848999706324, iteration: 426064
loss: 0.9811593890190125,grad_norm: 0.7235469253401436, iteration: 426065
loss: 1.0016191005706787,grad_norm: 0.9999997596110852, iteration: 426066
loss: 0.9960756301879883,grad_norm: 0.9999991118628968, iteration: 426067
loss: 0.9914454221725464,grad_norm: 0.6957476583071754, iteration: 426068
loss: 1.0327479839324951,grad_norm: 0.8183288637197408, iteration: 426069
loss: 1.1143134832382202,grad_norm: 0.999999845637901, iteration: 426070
loss: 0.990843653678894,grad_norm: 0.7718943627643735, iteration: 426071
loss: 0.992071270942688,grad_norm: 0.7636284861768766, iteration: 426072
loss: 1.0038868188858032,grad_norm: 0.925775003965069, iteration: 426073
loss: 1.0258525609970093,grad_norm: 0.8945685785268886, iteration: 426074
loss: 0.9817481637001038,grad_norm: 0.8679462863782988, iteration: 426075
loss: 1.0053472518920898,grad_norm: 0.7860451699519849, iteration: 426076
loss: 0.979496419429779,grad_norm: 0.71047927465337, iteration: 426077
loss: 1.0882622003555298,grad_norm: 0.9999992691972512, iteration: 426078
loss: 1.0636987686157227,grad_norm: 0.9384546348756989, iteration: 426079
loss: 0.9859036803245544,grad_norm: 0.7091837353287898, iteration: 426080
loss: 1.0102338790893555,grad_norm: 0.9999992559629955, iteration: 426081
loss: 0.9938792586326599,grad_norm: 0.7591227215720311, iteration: 426082
loss: 0.9567427039146423,grad_norm: 0.8401840021098621, iteration: 426083
loss: 0.9756997227668762,grad_norm: 0.9999991346265995, iteration: 426084
loss: 1.0731064081192017,grad_norm: 0.8181590258345365, iteration: 426085
loss: 0.9828025102615356,grad_norm: 0.7022161024997702, iteration: 426086
loss: 0.9945319890975952,grad_norm: 0.8720973571063585, iteration: 426087
loss: 1.0044903755187988,grad_norm: 0.8745001682913478, iteration: 426088
loss: 0.9869153499603271,grad_norm: 0.8040561697675527, iteration: 426089
loss: 1.0736459493637085,grad_norm: 0.8898485862190502, iteration: 426090
loss: 0.9761086702346802,grad_norm: 0.8100661237996466, iteration: 426091
loss: 1.061212182044983,grad_norm: 0.6778434437633182, iteration: 426092
loss: 0.9984797835350037,grad_norm: 0.866364360257104, iteration: 426093
loss: 0.9827712774276733,grad_norm: 0.8497148716246735, iteration: 426094
loss: 1.0108956098556519,grad_norm: 0.9999992256848184, iteration: 426095
loss: 0.9818307757377625,grad_norm: 0.8350161429733248, iteration: 426096
loss: 1.0486540794372559,grad_norm: 0.7592029893508453, iteration: 426097
loss: 1.0278205871582031,grad_norm: 1.0000000020320314, iteration: 426098
loss: 1.0069692134857178,grad_norm: 0.9999996277208613, iteration: 426099
loss: 0.9978270530700684,grad_norm: 0.8647003317589537, iteration: 426100
loss: 1.0341435670852661,grad_norm: 0.7756203150312946, iteration: 426101
loss: 0.9773572683334351,grad_norm: 0.7404193250235869, iteration: 426102
loss: 1.006120204925537,grad_norm: 0.8471729182210379, iteration: 426103
loss: 1.0706160068511963,grad_norm: 0.9999992028726815, iteration: 426104
loss: 1.0582221746444702,grad_norm: 0.9999995103212527, iteration: 426105
loss: 0.9843847155570984,grad_norm: 0.8239353484628091, iteration: 426106
loss: 0.9854820370674133,grad_norm: 0.8144725243686669, iteration: 426107
loss: 1.0284472703933716,grad_norm: 0.7453406568065704, iteration: 426108
loss: 1.1263364553451538,grad_norm: 0.9999990450687741, iteration: 426109
loss: 0.9920246601104736,grad_norm: 0.7738650414223872, iteration: 426110
loss: 1.0459789037704468,grad_norm: 0.9999998940661058, iteration: 426111
loss: 1.0216423273086548,grad_norm: 0.7768403450259483, iteration: 426112
loss: 1.0105398893356323,grad_norm: 0.9441000990412118, iteration: 426113
loss: 1.0386875867843628,grad_norm: 0.8187427468120421, iteration: 426114
loss: 1.002983808517456,grad_norm: 0.6556614527499082, iteration: 426115
loss: 1.0113202333450317,grad_norm: 0.7922322117178183, iteration: 426116
loss: 0.9804291725158691,grad_norm: 0.7218359810389252, iteration: 426117
loss: 1.0232422351837158,grad_norm: 0.7610532639634526, iteration: 426118
loss: 1.0299172401428223,grad_norm: 0.8797595892680472, iteration: 426119
loss: 0.9927390813827515,grad_norm: 0.7921386070802596, iteration: 426120
loss: 0.9656075835227966,grad_norm: 0.840756100149648, iteration: 426121
loss: 1.000464677810669,grad_norm: 0.8381079828547746, iteration: 426122
loss: 1.0015531778335571,grad_norm: 0.9194514132853945, iteration: 426123
loss: 1.0101996660232544,grad_norm: 0.7619675442015228, iteration: 426124
loss: 1.016676425933838,grad_norm: 0.8132817377067924, iteration: 426125
loss: 0.9943224787712097,grad_norm: 0.9999992971632288, iteration: 426126
loss: 0.9607296586036682,grad_norm: 0.6315373974880777, iteration: 426127
loss: 0.9932057857513428,grad_norm: 0.9270240410790137, iteration: 426128
loss: 1.0235282182693481,grad_norm: 0.7188465482465822, iteration: 426129
loss: 1.1079076528549194,grad_norm: 0.9322937307375843, iteration: 426130
loss: 1.0409927368164062,grad_norm: 0.9999989791294221, iteration: 426131
loss: 1.0126248598098755,grad_norm: 0.8359398511153651, iteration: 426132
loss: 1.0327504873275757,grad_norm: 0.6702717848953452, iteration: 426133
loss: 1.0313433408737183,grad_norm: 0.6784153379017082, iteration: 426134
loss: 1.0030587911605835,grad_norm: 0.8295255266641269, iteration: 426135
loss: 0.9897201657295227,grad_norm: 0.9999999493338667, iteration: 426136
loss: 0.9337295293807983,grad_norm: 0.9067295461932484, iteration: 426137
loss: 0.9773783683776855,grad_norm: 0.7555065484459265, iteration: 426138
loss: 0.9794568419456482,grad_norm: 0.7987544633603383, iteration: 426139
loss: 0.9910030961036682,grad_norm: 0.7403646703999078, iteration: 426140
loss: 1.0202653408050537,grad_norm: 0.99999954626443, iteration: 426141
loss: 0.9944496750831604,grad_norm: 0.7022618726270481, iteration: 426142
loss: 0.9905959963798523,grad_norm: 0.80150492416754, iteration: 426143
loss: 1.0712931156158447,grad_norm: 0.6989437005610657, iteration: 426144
loss: 0.9929269552230835,grad_norm: 0.7217617836868658, iteration: 426145
loss: 0.9900758266448975,grad_norm: 0.999999118479806, iteration: 426146
loss: 1.2075612545013428,grad_norm: 0.9999997383984077, iteration: 426147
loss: 0.9923853874206543,grad_norm: 0.7186448657540394, iteration: 426148
loss: 0.9676961302757263,grad_norm: 0.8264472162947056, iteration: 426149
loss: 1.0154390335083008,grad_norm: 0.850334007718198, iteration: 426150
loss: 1.064115285873413,grad_norm: 0.9999992744200272, iteration: 426151
loss: 1.0216995477676392,grad_norm: 0.7974155882255193, iteration: 426152
loss: 1.0679829120635986,grad_norm: 0.9999995535506615, iteration: 426153
loss: 0.998762309551239,grad_norm: 0.8438883914716485, iteration: 426154
loss: 0.9995442628860474,grad_norm: 0.6706891580891031, iteration: 426155
loss: 0.9953066110610962,grad_norm: 0.7075859452910694, iteration: 426156
loss: 0.9710835814476013,grad_norm: 0.7957235342260421, iteration: 426157
loss: 0.9845135807991028,grad_norm: 0.999999151209972, iteration: 426158
loss: 1.006879210472107,grad_norm: 0.8210103708152033, iteration: 426159
loss: 1.0341105461120605,grad_norm: 0.8269801500232473, iteration: 426160
loss: 0.9694472551345825,grad_norm: 0.8508063031441341, iteration: 426161
loss: 1.0204358100891113,grad_norm: 0.7480044478655945, iteration: 426162
loss: 1.050828456878662,grad_norm: 0.8261457619439629, iteration: 426163
loss: 1.0250104665756226,grad_norm: 0.9476198491333735, iteration: 426164
loss: 1.1803796291351318,grad_norm: 0.9999991504234979, iteration: 426165
loss: 1.0042765140533447,grad_norm: 0.999999690045566, iteration: 426166
loss: 1.1592789888381958,grad_norm: 0.99999970018017, iteration: 426167
loss: 1.0077714920043945,grad_norm: 0.8800700470720746, iteration: 426168
loss: 1.0551170110702515,grad_norm: 0.8315516249304561, iteration: 426169
loss: 1.057701587677002,grad_norm: 0.9521721250412911, iteration: 426170
loss: 1.1128497123718262,grad_norm: 0.999999482071458, iteration: 426171
loss: 1.0882567167282104,grad_norm: 0.9030011944814541, iteration: 426172
loss: 1.0239593982696533,grad_norm: 0.9999992164126809, iteration: 426173
loss: 1.0060639381408691,grad_norm: 0.7944274462339966, iteration: 426174
loss: 0.9742863178253174,grad_norm: 0.7352874950448668, iteration: 426175
loss: 1.0022106170654297,grad_norm: 0.7497286410621148, iteration: 426176
loss: 0.9976619482040405,grad_norm: 0.8613107687433745, iteration: 426177
loss: 1.0128999948501587,grad_norm: 0.7708395080846245, iteration: 426178
loss: 0.9899917244911194,grad_norm: 0.7717507405386937, iteration: 426179
loss: 1.065439224243164,grad_norm: 0.9999991538438356, iteration: 426180
loss: 0.9824278354644775,grad_norm: 0.7240899680449874, iteration: 426181
loss: 1.0076078176498413,grad_norm: 0.7594186503628078, iteration: 426182
loss: 1.009157419204712,grad_norm: 0.7046398435392376, iteration: 426183
loss: 0.9501963257789612,grad_norm: 0.9051180364100151, iteration: 426184
loss: 1.0039329528808594,grad_norm: 0.7229744737792309, iteration: 426185
loss: 0.990020215511322,grad_norm: 0.8760463706459359, iteration: 426186
loss: 1.0512328147888184,grad_norm: 0.8374668760805672, iteration: 426187
loss: 1.0299445390701294,grad_norm: 0.9999999795408065, iteration: 426188
loss: 1.0176795721054077,grad_norm: 0.6929097156691322, iteration: 426189
loss: 1.0654182434082031,grad_norm: 0.674634245073903, iteration: 426190
loss: 0.9632774591445923,grad_norm: 0.7304034622524485, iteration: 426191
loss: 0.9982009530067444,grad_norm: 0.8023265702892218, iteration: 426192
loss: 0.9664673805236816,grad_norm: 0.8334825913875256, iteration: 426193
loss: 0.9658643007278442,grad_norm: 0.7714652095100494, iteration: 426194
loss: 1.0130728483200073,grad_norm: 0.7603439195869032, iteration: 426195
loss: 0.9690239429473877,grad_norm: 0.7980705335411847, iteration: 426196
loss: 0.9901013374328613,grad_norm: 0.7401741702128305, iteration: 426197
loss: 0.9875204563140869,grad_norm: 0.9999992308702699, iteration: 426198
loss: 0.9604133367538452,grad_norm: 0.7527453848849124, iteration: 426199
loss: 1.0180848836898804,grad_norm: 0.999999499578616, iteration: 426200
loss: 0.9778144359588623,grad_norm: 0.9999989403799399, iteration: 426201
loss: 1.1459624767303467,grad_norm: 0.9999997738059352, iteration: 426202
loss: 1.003931999206543,grad_norm: 0.8699098031389364, iteration: 426203
loss: 1.047123908996582,grad_norm: 0.9999993658831756, iteration: 426204
loss: 1.0238230228424072,grad_norm: 0.8074200906381644, iteration: 426205
loss: 1.024697184562683,grad_norm: 0.8858960146805168, iteration: 426206
loss: 1.0610098838806152,grad_norm: 0.9877255204455917, iteration: 426207
loss: 1.0657262802124023,grad_norm: 0.8266342966794952, iteration: 426208
loss: 1.0215154886245728,grad_norm: 0.8653915888066788, iteration: 426209
loss: 1.0048705339431763,grad_norm: 0.9999995481384674, iteration: 426210
loss: 1.0140535831451416,grad_norm: 0.8339638344364617, iteration: 426211
loss: 1.015875220298767,grad_norm: 0.9999990048360659, iteration: 426212
loss: 0.9928271770477295,grad_norm: 0.8118312067668733, iteration: 426213
loss: 1.018470287322998,grad_norm: 0.9999989995462033, iteration: 426214
loss: 0.9833084940910339,grad_norm: 0.7107007426108378, iteration: 426215
loss: 0.9989467263221741,grad_norm: 0.8809733005417206, iteration: 426216
loss: 1.0043123960494995,grad_norm: 0.8989397485542504, iteration: 426217
loss: 1.0214207172393799,grad_norm: 0.9536688862298603, iteration: 426218
loss: 0.993945837020874,grad_norm: 0.7563165745988599, iteration: 426219
loss: 0.9938690662384033,grad_norm: 0.8026149392011216, iteration: 426220
loss: 0.9927698969841003,grad_norm: 0.9339878292321043, iteration: 426221
loss: 1.010541558265686,grad_norm: 0.9427659361837026, iteration: 426222
loss: 1.0189590454101562,grad_norm: 0.7999898825629167, iteration: 426223
loss: 1.016558051109314,grad_norm: 0.8442942719141154, iteration: 426224
loss: 0.9932550191879272,grad_norm: 0.8017285858169042, iteration: 426225
loss: 0.9683102965354919,grad_norm: 0.8213860887602268, iteration: 426226
loss: 0.9568591713905334,grad_norm: 0.7470258352656088, iteration: 426227
loss: 1.0127943754196167,grad_norm: 0.661924927450049, iteration: 426228
loss: 1.0505236387252808,grad_norm: 0.9999993747593031, iteration: 426229
loss: 1.106892466545105,grad_norm: 0.9999998910522273, iteration: 426230
loss: 0.9924542307853699,grad_norm: 0.7078040283806847, iteration: 426231
loss: 0.9961844682693481,grad_norm: 0.7632709320969909, iteration: 426232
loss: 1.044256567955017,grad_norm: 0.8493782993515541, iteration: 426233
loss: 0.9839556217193604,grad_norm: 0.8065355597165786, iteration: 426234
loss: 0.9672164916992188,grad_norm: 0.8181251671675589, iteration: 426235
loss: 1.0047343969345093,grad_norm: 0.7352720780103906, iteration: 426236
loss: 1.0225898027420044,grad_norm: 0.8090152786812002, iteration: 426237
loss: 0.9844821095466614,grad_norm: 0.8209229745614681, iteration: 426238
loss: 1.040245532989502,grad_norm: 0.9999995865541569, iteration: 426239
loss: 0.9909597635269165,grad_norm: 0.6648529887247189, iteration: 426240
loss: 1.0033727884292603,grad_norm: 0.7156189757714416, iteration: 426241
loss: 1.0073107481002808,grad_norm: 0.7701529064191702, iteration: 426242
loss: 1.0257593393325806,grad_norm: 0.9999991545731998, iteration: 426243
loss: 1.058591604232788,grad_norm: 0.9999989598924152, iteration: 426244
loss: 0.9989818930625916,grad_norm: 0.8583215330501663, iteration: 426245
loss: 1.0123424530029297,grad_norm: 0.6586464803286882, iteration: 426246
loss: 0.9937283396720886,grad_norm: 0.8331347428566704, iteration: 426247
loss: 1.0162721872329712,grad_norm: 0.9999999519778895, iteration: 426248
loss: 1.1168502569198608,grad_norm: 0.9999991641324902, iteration: 426249
loss: 1.0361429452896118,grad_norm: 0.8844688872936233, iteration: 426250
loss: 1.0045857429504395,grad_norm: 0.8776474856398498, iteration: 426251
loss: 0.9817708134651184,grad_norm: 0.8847099459071572, iteration: 426252
loss: 0.9866400361061096,grad_norm: 0.900294546156716, iteration: 426253
loss: 1.0008070468902588,grad_norm: 0.7040403594187999, iteration: 426254
loss: 1.0116181373596191,grad_norm: 0.9999991586798769, iteration: 426255
loss: 1.047882080078125,grad_norm: 0.9999992458696734, iteration: 426256
loss: 1.0014643669128418,grad_norm: 0.7130670425560626, iteration: 426257
loss: 0.9767693877220154,grad_norm: 0.8784220320476958, iteration: 426258
loss: 1.0454010963439941,grad_norm: 0.9999992480895371, iteration: 426259
loss: 0.9848697185516357,grad_norm: 0.9999991293151512, iteration: 426260
loss: 1.0146689414978027,grad_norm: 0.9194942467587168, iteration: 426261
loss: 1.0378297567367554,grad_norm: 0.6364601679366175, iteration: 426262
loss: 0.9626911878585815,grad_norm: 0.7324304010750543, iteration: 426263
loss: 0.9891079664230347,grad_norm: 0.6978460918531627, iteration: 426264
loss: 0.9915635585784912,grad_norm: 0.7477734068082906, iteration: 426265
loss: 0.9926539659500122,grad_norm: 0.7547953631439789, iteration: 426266
loss: 1.0677709579467773,grad_norm: 0.8829987278036524, iteration: 426267
loss: 1.0475034713745117,grad_norm: 0.9999996347314697, iteration: 426268
loss: 0.9998505115509033,grad_norm: 0.7582390437561493, iteration: 426269
loss: 1.0013965368270874,grad_norm: 0.6702083539531047, iteration: 426270
loss: 1.0492653846740723,grad_norm: 0.8657325561476817, iteration: 426271
loss: 0.96446692943573,grad_norm: 0.9138835886723213, iteration: 426272
loss: 1.0733184814453125,grad_norm: 0.999999942165204, iteration: 426273
loss: 1.0196408033370972,grad_norm: 0.6725207624059956, iteration: 426274
loss: 1.0485323667526245,grad_norm: 0.8733141175759717, iteration: 426275
loss: 0.9755666255950928,grad_norm: 0.7551147870072554, iteration: 426276
loss: 0.9978436231613159,grad_norm: 0.7908987550920927, iteration: 426277
loss: 0.9895508289337158,grad_norm: 0.8408414941858411, iteration: 426278
loss: 1.0063656568527222,grad_norm: 0.9047380809232652, iteration: 426279
loss: 0.9972289800643921,grad_norm: 0.8877465721295038, iteration: 426280
loss: 1.1101738214492798,grad_norm: 1.0000000456048275, iteration: 426281
loss: 1.0278899669647217,grad_norm: 0.9062961060631752, iteration: 426282
loss: 1.0200239419937134,grad_norm: 0.6782206132291856, iteration: 426283
loss: 1.011134386062622,grad_norm: 0.8432819123530468, iteration: 426284
loss: 1.093510389328003,grad_norm: 0.999999918365413, iteration: 426285
loss: 1.0117987394332886,grad_norm: 0.9790466400666669, iteration: 426286
loss: 0.9802682995796204,grad_norm: 0.988430180820446, iteration: 426287
loss: 1.0121961832046509,grad_norm: 0.8300172939707456, iteration: 426288
loss: 1.0584614276885986,grad_norm: 0.9999991527477847, iteration: 426289
loss: 0.9872502088546753,grad_norm: 0.8433118750707402, iteration: 426290
loss: 1.0353844165802002,grad_norm: 0.8811316935897776, iteration: 426291
loss: 1.0057486295700073,grad_norm: 0.7914319135661965, iteration: 426292
loss: 0.9783363938331604,grad_norm: 0.8951794455424203, iteration: 426293
loss: 1.0221004486083984,grad_norm: 0.8130033407752295, iteration: 426294
loss: 0.9865221977233887,grad_norm: 0.7095470727016034, iteration: 426295
loss: 1.026701807975769,grad_norm: 0.8605248598509375, iteration: 426296
loss: 0.9932291507720947,grad_norm: 0.716143705026105, iteration: 426297
loss: 0.9877262115478516,grad_norm: 0.7234998650753964, iteration: 426298
loss: 0.9863356351852417,grad_norm: 0.9125913539555455, iteration: 426299
loss: 1.0676814317703247,grad_norm: 0.9999989971680924, iteration: 426300
loss: 1.0477718114852905,grad_norm: 0.999998938901936, iteration: 426301
loss: 1.0209267139434814,grad_norm: 0.8859881144011301, iteration: 426302
loss: 1.0334429740905762,grad_norm: 0.9338792493170025, iteration: 426303
loss: 1.0056817531585693,grad_norm: 0.7874334605683802, iteration: 426304
loss: 0.9913720488548279,grad_norm: 0.8948897842059743, iteration: 426305
loss: 0.9921314716339111,grad_norm: 0.7874196940215417, iteration: 426306
loss: 0.9945178031921387,grad_norm: 0.7893248267191408, iteration: 426307
loss: 1.0218201875686646,grad_norm: 0.9999995611640197, iteration: 426308
loss: 0.9876858592033386,grad_norm: 0.9814355598840583, iteration: 426309
loss: 0.9858811497688293,grad_norm: 0.8594003878470653, iteration: 426310
loss: 1.0102899074554443,grad_norm: 0.7795430886617155, iteration: 426311
loss: 0.9920170307159424,grad_norm: 0.7033504139130959, iteration: 426312
loss: 1.0027382373809814,grad_norm: 0.9053360276941105, iteration: 426313
loss: 1.0169332027435303,grad_norm: 0.8682697406942455, iteration: 426314
loss: 1.0018799304962158,grad_norm: 0.7821725313353709, iteration: 426315
loss: 0.9919844269752502,grad_norm: 0.7828775338608055, iteration: 426316
loss: 1.015397310256958,grad_norm: 0.8277040524556817, iteration: 426317
loss: 0.9967425465583801,grad_norm: 0.8811262459939371, iteration: 426318
loss: 0.9775480031967163,grad_norm: 0.6513217924599426, iteration: 426319
loss: 0.9769105315208435,grad_norm: 0.7634194095763337, iteration: 426320
loss: 1.0138063430786133,grad_norm: 0.6505694936340575, iteration: 426321
loss: 1.0047181844711304,grad_norm: 0.8181117534112321, iteration: 426322
loss: 1.0060780048370361,grad_norm: 0.9999990897938799, iteration: 426323
loss: 0.9945195913314819,grad_norm: 0.9999990616456292, iteration: 426324
loss: 1.2656641006469727,grad_norm: 0.9999990605153661, iteration: 426325
loss: 1.0131839513778687,grad_norm: 0.8644878664244763, iteration: 426326
loss: 1.0508475303649902,grad_norm: 0.9500498011572721, iteration: 426327
loss: 0.9888497591018677,grad_norm: 0.8142780100592878, iteration: 426328
loss: 1.0200047492980957,grad_norm: 0.8496169710091364, iteration: 426329
loss: 0.9982025623321533,grad_norm: 0.7185688894992314, iteration: 426330
loss: 1.0223253965377808,grad_norm: 0.6487909300429585, iteration: 426331
loss: 0.9850427508354187,grad_norm: 0.7795707775040789, iteration: 426332
loss: 1.0728727579116821,grad_norm: 0.7781068893787312, iteration: 426333
loss: 0.9865915179252625,grad_norm: 0.8764894297591208, iteration: 426334
loss: 0.9961869716644287,grad_norm: 0.829823310429394, iteration: 426335
loss: 1.011044979095459,grad_norm: 0.6364915532368316, iteration: 426336
loss: 1.0026272535324097,grad_norm: 0.645023463178475, iteration: 426337
loss: 0.9793166518211365,grad_norm: 0.7783623726762757, iteration: 426338
loss: 1.0197361707687378,grad_norm: 0.7726672845429453, iteration: 426339
loss: 1.0217467546463013,grad_norm: 0.9317950748071675, iteration: 426340
loss: 1.001950979232788,grad_norm: 0.8645166854353281, iteration: 426341
loss: 1.0687437057495117,grad_norm: 0.9999999111512007, iteration: 426342
loss: 1.0766785144805908,grad_norm: 0.9999998412160604, iteration: 426343
loss: 0.9940639734268188,grad_norm: 0.7443872277290968, iteration: 426344
loss: 1.0252939462661743,grad_norm: 0.7576368823044589, iteration: 426345
loss: 1.01474928855896,grad_norm: 0.9742227745734133, iteration: 426346
loss: 1.0002025365829468,grad_norm: 0.8559734563180794, iteration: 426347
loss: 1.0205069780349731,grad_norm: 0.8549738520779431, iteration: 426348
loss: 1.0168490409851074,grad_norm: 0.6972452518993744, iteration: 426349
loss: 1.0060335397720337,grad_norm: 0.826022412769494, iteration: 426350
loss: 0.9991816878318787,grad_norm: 0.8494022363762243, iteration: 426351
loss: 0.997314453125,grad_norm: 0.9474563979271694, iteration: 426352
loss: 0.998069167137146,grad_norm: 0.9999998416913454, iteration: 426353
loss: 0.989545464515686,grad_norm: 0.7593074564131446, iteration: 426354
loss: 0.9806958436965942,grad_norm: 0.8733364478196547, iteration: 426355
loss: 0.944421648979187,grad_norm: 0.8546788332821827, iteration: 426356
loss: 1.052880048751831,grad_norm: 0.9999995678160634, iteration: 426357
loss: 0.971331775188446,grad_norm: 0.9531422524753502, iteration: 426358
loss: 0.9943010210990906,grad_norm: 0.6950901534322146, iteration: 426359
loss: 0.9627499580383301,grad_norm: 0.9067355901057644, iteration: 426360
loss: 1.066411018371582,grad_norm: 0.999999897518063, iteration: 426361
loss: 0.997957706451416,grad_norm: 0.7367478983845956, iteration: 426362
loss: 0.9732358455657959,grad_norm: 0.8265441709912186, iteration: 426363
loss: 0.9907262921333313,grad_norm: 0.7027242858258423, iteration: 426364
loss: 1.1009745597839355,grad_norm: 0.9999991911927032, iteration: 426365
loss: 1.0001298189163208,grad_norm: 0.8765656911756718, iteration: 426366
loss: 0.9701446890830994,grad_norm: 0.7254399535228008, iteration: 426367
loss: 0.993811309337616,grad_norm: 0.811754088970923, iteration: 426368
loss: 1.0128145217895508,grad_norm: 0.646120869659666, iteration: 426369
loss: 1.0180131196975708,grad_norm: 0.7379673234777722, iteration: 426370
loss: 0.9984400868415833,grad_norm: 0.8981782216209684, iteration: 426371
loss: 1.0167471170425415,grad_norm: 0.8042951067959196, iteration: 426372
loss: 1.020719289779663,grad_norm: 0.879630214182207, iteration: 426373
loss: 1.0185903310775757,grad_norm: 0.8096849912328777, iteration: 426374
loss: 1.0114551782608032,grad_norm: 0.9999999601577597, iteration: 426375
loss: 0.978489875793457,grad_norm: 0.8312778294357569, iteration: 426376
loss: 1.0076770782470703,grad_norm: 0.7814656156049004, iteration: 426377
loss: 1.0027819871902466,grad_norm: 0.7809610696934199, iteration: 426378
loss: 0.9799901247024536,grad_norm: 0.7657709102465413, iteration: 426379
loss: 1.0191962718963623,grad_norm: 0.7242800096267806, iteration: 426380
loss: 1.0191104412078857,grad_norm: 0.8614012459947771, iteration: 426381
loss: 1.0259190797805786,grad_norm: 0.7362756287307762, iteration: 426382
loss: 0.9706963300704956,grad_norm: 0.7277066419130801, iteration: 426383
loss: 0.9958623051643372,grad_norm: 0.707306758140132, iteration: 426384
loss: 1.0591317415237427,grad_norm: 0.9999995729165951, iteration: 426385
loss: 0.9960256218910217,grad_norm: 0.9999991485604165, iteration: 426386
loss: 1.046039342880249,grad_norm: 0.9999992141090069, iteration: 426387
loss: 0.999128520488739,grad_norm: 0.794951642727963, iteration: 426388
loss: 0.9777319431304932,grad_norm: 0.7700591875012616, iteration: 426389
loss: 1.0036633014678955,grad_norm: 0.6635995061764655, iteration: 426390
loss: 1.0081262588500977,grad_norm: 0.8536465773321033, iteration: 426391
loss: 1.0239391326904297,grad_norm: 0.834605514067956, iteration: 426392
loss: 1.0274578332901,grad_norm: 0.9999994410053981, iteration: 426393
loss: 1.0033873319625854,grad_norm: 0.7066623221054801, iteration: 426394
loss: 1.03873610496521,grad_norm: 0.999999467921502, iteration: 426395
loss: 0.9683275818824768,grad_norm: 0.750890643600521, iteration: 426396
loss: 1.0741922855377197,grad_norm: 0.9999991399300154, iteration: 426397
loss: 0.9816555380821228,grad_norm: 0.7755357632293592, iteration: 426398
loss: 1.0082471370697021,grad_norm: 0.7140126184523624, iteration: 426399
loss: 1.0426876544952393,grad_norm: 0.9999993520983872, iteration: 426400
loss: 1.0078271627426147,grad_norm: 0.8636161979618037, iteration: 426401
loss: 1.044758677482605,grad_norm: 0.9133661290746578, iteration: 426402
loss: 1.1042605638504028,grad_norm: 0.7760672826252141, iteration: 426403
loss: 1.0079987049102783,grad_norm: 0.8729777360843637, iteration: 426404
loss: 1.0395289659500122,grad_norm: 0.999999283718649, iteration: 426405
loss: 1.0153133869171143,grad_norm: 0.7402796462089062, iteration: 426406
loss: 1.0294867753982544,grad_norm: 0.9999993286390849, iteration: 426407
loss: 1.029781460762024,grad_norm: 0.8346045016703495, iteration: 426408
loss: 1.00755774974823,grad_norm: 0.9590229638101675, iteration: 426409
loss: 1.0166434049606323,grad_norm: 0.999999218548936, iteration: 426410
loss: 1.1361305713653564,grad_norm: 0.9999991870379498, iteration: 426411
loss: 1.008797526359558,grad_norm: 0.7251285879130469, iteration: 426412
loss: 1.0378328561782837,grad_norm: 0.9138900358057287, iteration: 426413
loss: 0.9961687922477722,grad_norm: 0.675067272916205, iteration: 426414
loss: 0.9835416674613953,grad_norm: 0.7567121068408299, iteration: 426415
loss: 0.9727221131324768,grad_norm: 0.8676345206499567, iteration: 426416
loss: 0.9732415080070496,grad_norm: 0.8562689158197835, iteration: 426417
loss: 1.0008152723312378,grad_norm: 0.9283621375968409, iteration: 426418
loss: 1.035256266593933,grad_norm: 0.966698308680806, iteration: 426419
loss: 0.9957103133201599,grad_norm: 0.6930363011679711, iteration: 426420
loss: 1.0050551891326904,grad_norm: 0.8863891116034744, iteration: 426421
loss: 0.9958353638648987,grad_norm: 0.701058996966522, iteration: 426422
loss: 0.9800924062728882,grad_norm: 0.6676141457511718, iteration: 426423
loss: 1.0322555303573608,grad_norm: 0.7521075639455681, iteration: 426424
loss: 1.0068219900131226,grad_norm: 0.8144523699861296, iteration: 426425
loss: 1.0338338613510132,grad_norm: 0.6913662641525358, iteration: 426426
loss: 1.0147576332092285,grad_norm: 0.6640643172270605, iteration: 426427
loss: 0.998422384262085,grad_norm: 0.6699631424579046, iteration: 426428
loss: 1.0073665380477905,grad_norm: 0.7248196623551205, iteration: 426429
loss: 0.9722444415092468,grad_norm: 0.7404027771867615, iteration: 426430
loss: 0.987243115901947,grad_norm: 0.7710358077205426, iteration: 426431
loss: 0.9878562688827515,grad_norm: 0.8043052609283032, iteration: 426432
loss: 1.0444530248641968,grad_norm: 0.7344921316255031, iteration: 426433
loss: 1.0064527988433838,grad_norm: 0.747545269836484, iteration: 426434
loss: 1.0175483226776123,grad_norm: 0.9999994750962787, iteration: 426435
loss: 1.0466569662094116,grad_norm: 0.8313475124774381, iteration: 426436
loss: 1.175751805305481,grad_norm: 0.9999998339566697, iteration: 426437
loss: 1.022537350654602,grad_norm: 0.8544042937227107, iteration: 426438
loss: 0.9855477213859558,grad_norm: 0.7569128026505684, iteration: 426439
loss: 0.9982300400733948,grad_norm: 0.8184219117064532, iteration: 426440
loss: 1.039137840270996,grad_norm: 0.9562504965094455, iteration: 426441
loss: 1.0607320070266724,grad_norm: 0.8103173094366598, iteration: 426442
loss: 0.9700267314910889,grad_norm: 0.76387515898094, iteration: 426443
loss: 1.0116446018218994,grad_norm: 0.8488281948816322, iteration: 426444
loss: 1.0095434188842773,grad_norm: 0.8027920068430994, iteration: 426445
loss: 0.9743754863739014,grad_norm: 0.7079696989841684, iteration: 426446
loss: 1.046607255935669,grad_norm: 0.8388828518739445, iteration: 426447
loss: 1.0066505670547485,grad_norm: 0.7279909840222812, iteration: 426448
loss: 0.9801182150840759,grad_norm: 0.7506256323761259, iteration: 426449
loss: 1.0417605638504028,grad_norm: 0.7137693851411174, iteration: 426450
loss: 0.9734172821044922,grad_norm: 0.7661732392369109, iteration: 426451
loss: 1.0317771434783936,grad_norm: 0.7087058018500754, iteration: 426452
loss: 1.0166537761688232,grad_norm: 0.590110854847151, iteration: 426453
loss: 1.111063003540039,grad_norm: 0.9999996842812527, iteration: 426454
loss: 1.030182957649231,grad_norm: 0.9200148942559835, iteration: 426455
loss: 1.0054285526275635,grad_norm: 0.7105305940372793, iteration: 426456
loss: 1.0977925062179565,grad_norm: 0.999999416005874, iteration: 426457
loss: 1.0516927242279053,grad_norm: 0.9841722566585194, iteration: 426458
loss: 1.026908278465271,grad_norm: 0.9999994356112669, iteration: 426459
loss: 1.0343085527420044,grad_norm: 0.8524646948174612, iteration: 426460
loss: 0.9413005113601685,grad_norm: 0.7734207439009482, iteration: 426461
loss: 0.9862576127052307,grad_norm: 0.9999991304350869, iteration: 426462
loss: 1.0182570219039917,grad_norm: 0.9999995695026906, iteration: 426463
loss: 1.009217619895935,grad_norm: 0.8420306049985455, iteration: 426464
loss: 1.0042121410369873,grad_norm: 0.8831133245456179, iteration: 426465
loss: 1.0395983457565308,grad_norm: 0.9245036443303972, iteration: 426466
loss: 1.0152952671051025,grad_norm: 0.8713529422819603, iteration: 426467
loss: 0.9940226078033447,grad_norm: 0.896953621771252, iteration: 426468
loss: 0.9689545035362244,grad_norm: 0.8036010040237344, iteration: 426469
loss: 1.0042579174041748,grad_norm: 0.7927066570946285, iteration: 426470
loss: 1.015001893043518,grad_norm: 0.7872817735681228, iteration: 426471
loss: 1.0205800533294678,grad_norm: 0.9161405234647118, iteration: 426472
loss: 0.9721167683601379,grad_norm: 0.8369426128491619, iteration: 426473
loss: 0.9774025678634644,grad_norm: 0.7920333052856624, iteration: 426474
loss: 0.983788013458252,grad_norm: 0.7563541912990542, iteration: 426475
loss: 0.9751073718070984,grad_norm: 0.9585303013761973, iteration: 426476
loss: 1.1284297704696655,grad_norm: 0.7880010512458758, iteration: 426477
loss: 1.0133891105651855,grad_norm: 0.7659681438689147, iteration: 426478
loss: 1.2159961462020874,grad_norm: 0.9999996276783394, iteration: 426479
loss: 0.9898802042007446,grad_norm: 0.771395826129853, iteration: 426480
loss: 0.9674646854400635,grad_norm: 0.7721491902456795, iteration: 426481
loss: 1.094318151473999,grad_norm: 0.9999998478563181, iteration: 426482
loss: 1.0328779220581055,grad_norm: 0.8052391822330602, iteration: 426483
loss: 0.9751498103141785,grad_norm: 0.8600821685801531, iteration: 426484
loss: 1.0088857412338257,grad_norm: 0.8190926009724263, iteration: 426485
loss: 1.0755863189697266,grad_norm: 0.8440273272242175, iteration: 426486
loss: 1.0157312154769897,grad_norm: 0.9999998893621321, iteration: 426487
loss: 1.0002793073654175,grad_norm: 0.8297776327303091, iteration: 426488
loss: 0.9733304977416992,grad_norm: 0.7406828736888882, iteration: 426489
loss: 1.025403618812561,grad_norm: 0.7269738486937813, iteration: 426490
loss: 1.0129146575927734,grad_norm: 0.7411030673365537, iteration: 426491
loss: 0.9962927103042603,grad_norm: 0.8055681055721454, iteration: 426492
loss: 1.0237212181091309,grad_norm: 0.8949042463453492, iteration: 426493
loss: 1.0117533206939697,grad_norm: 0.8719998725399506, iteration: 426494
loss: 0.9930838942527771,grad_norm: 0.8259380798614521, iteration: 426495
loss: 0.982688844203949,grad_norm: 0.8591333003165063, iteration: 426496
loss: 1.0094044208526611,grad_norm: 0.9162627886169956, iteration: 426497
loss: 1.05225670337677,grad_norm: 0.8604681820710477, iteration: 426498
loss: 0.9951722025871277,grad_norm: 0.7032975443773924, iteration: 426499
loss: 1.0072970390319824,grad_norm: 0.7397463118512644, iteration: 426500
loss: 1.0106887817382812,grad_norm: 0.7467839631991863, iteration: 426501
loss: 0.9925625324249268,grad_norm: 0.6448207032991283, iteration: 426502
loss: 0.9939919710159302,grad_norm: 0.7321614206755621, iteration: 426503
loss: 1.0026994943618774,grad_norm: 0.994915451749498, iteration: 426504
loss: 0.9892387390136719,grad_norm: 0.8268907003479466, iteration: 426505
loss: 0.9937767386436462,grad_norm: 0.6924468094151819, iteration: 426506
loss: 1.0201643705368042,grad_norm: 0.942596257312787, iteration: 426507
loss: 0.9830668568611145,grad_norm: 0.9999991774617769, iteration: 426508
loss: 1.0789885520935059,grad_norm: 0.9999998732618688, iteration: 426509
loss: 1.1169520616531372,grad_norm: 0.9999992601920371, iteration: 426510
loss: 0.9880141615867615,grad_norm: 0.8960580908606113, iteration: 426511
loss: 1.0110043287277222,grad_norm: 0.8942859219681384, iteration: 426512
loss: 0.9722620844841003,grad_norm: 0.8073478863849672, iteration: 426513
loss: 1.0190678834915161,grad_norm: 0.6937817612875801, iteration: 426514
loss: 0.992524266242981,grad_norm: 0.9999991602222877, iteration: 426515
loss: 1.0170180797576904,grad_norm: 0.8006930186677098, iteration: 426516
loss: 1.0143541097640991,grad_norm: 0.9999992055592689, iteration: 426517
loss: 0.9936512112617493,grad_norm: 0.6712206398234056, iteration: 426518
loss: 1.0637634992599487,grad_norm: 0.8602873186864117, iteration: 426519
loss: 0.9972229599952698,grad_norm: 0.9999989801859962, iteration: 426520
loss: 0.9694425463676453,grad_norm: 0.8847996881583697, iteration: 426521
loss: 0.9892781376838684,grad_norm: 0.9999990125288678, iteration: 426522
loss: 1.0120233297348022,grad_norm: 0.9999991460771824, iteration: 426523
loss: 0.9729666113853455,grad_norm: 0.7990652819618419, iteration: 426524
loss: 1.008743166923523,grad_norm: 0.8879603419431936, iteration: 426525
loss: 0.99857497215271,grad_norm: 0.7825373476064521, iteration: 426526
loss: 1.0281345844268799,grad_norm: 0.8559941888520135, iteration: 426527
loss: 0.984272301197052,grad_norm: 0.7817625375671072, iteration: 426528
loss: 0.9760463237762451,grad_norm: 0.8283076756690625, iteration: 426529
loss: 1.0482374429702759,grad_norm: 0.9999992032805263, iteration: 426530
loss: 1.0114377737045288,grad_norm: 0.7656387001302749, iteration: 426531
loss: 0.9866988062858582,grad_norm: 0.75106053661056, iteration: 426532
loss: 1.013002872467041,grad_norm: 0.7961387448410703, iteration: 426533
loss: 1.0392991304397583,grad_norm: 0.9999992654145669, iteration: 426534
loss: 0.9609586596488953,grad_norm: 0.7862232692247257, iteration: 426535
loss: 1.009050965309143,grad_norm: 0.9476871882755508, iteration: 426536
loss: 1.0304852724075317,grad_norm: 0.9521579058068265, iteration: 426537
loss: 0.9955537915229797,grad_norm: 0.7630839503480312, iteration: 426538
loss: 1.0001115798950195,grad_norm: 0.8189975524083566, iteration: 426539
loss: 1.0024036169052124,grad_norm: 0.7349606049943205, iteration: 426540
loss: 1.0134559869766235,grad_norm: 0.8785610657433963, iteration: 426541
loss: 1.1141940355300903,grad_norm: 0.981973513620167, iteration: 426542
loss: 1.0189876556396484,grad_norm: 0.7232933462362563, iteration: 426543
loss: 1.0318056344985962,grad_norm: 0.8602566277438238, iteration: 426544
loss: 1.0138942003250122,grad_norm: 0.7398054394002733, iteration: 426545
loss: 1.0391017198562622,grad_norm: 0.9999994145444495, iteration: 426546
loss: 1.0164963006973267,grad_norm: 0.8162959270425356, iteration: 426547
loss: 1.0025659799575806,grad_norm: 0.9999998233145599, iteration: 426548
loss: 1.0075373649597168,grad_norm: 0.9572190437714116, iteration: 426549
loss: 1.0037732124328613,grad_norm: 0.9999994350563526, iteration: 426550
loss: 1.0326389074325562,grad_norm: 0.8065609478268939, iteration: 426551
loss: 1.0172624588012695,grad_norm: 0.6908378677952854, iteration: 426552
loss: 1.0295054912567139,grad_norm: 0.9969649141198744, iteration: 426553
loss: 1.0510506629943848,grad_norm: 0.9999999181903241, iteration: 426554
loss: 1.0323165655136108,grad_norm: 0.8561367301924262, iteration: 426555
loss: 1.0205727815628052,grad_norm: 0.6993907890839967, iteration: 426556
loss: 1.0639630556106567,grad_norm: 0.9152370599545524, iteration: 426557
loss: 1.1162457466125488,grad_norm: 0.8191604645900963, iteration: 426558
loss: 0.9998691082000732,grad_norm: 0.6611461320294535, iteration: 426559
loss: 0.9974720478057861,grad_norm: 0.7932166314610855, iteration: 426560
loss: 1.0117614269256592,grad_norm: 0.6927273162273727, iteration: 426561
loss: 0.9914672374725342,grad_norm: 0.8805820649424663, iteration: 426562
loss: 0.9655240774154663,grad_norm: 0.8208379317366227, iteration: 426563
loss: 1.026106357574463,grad_norm: 0.9999993527653993, iteration: 426564
loss: 1.0968173742294312,grad_norm: 0.999999876946005, iteration: 426565
loss: 0.9806386232376099,grad_norm: 0.7719122602621369, iteration: 426566
loss: 1.0391238927841187,grad_norm: 0.7790810475314682, iteration: 426567
loss: 1.0177972316741943,grad_norm: 0.8436973088325297, iteration: 426568
loss: 1.0224881172180176,grad_norm: 0.7148907106692036, iteration: 426569
loss: 0.9765259027481079,grad_norm: 0.6219685504246709, iteration: 426570
loss: 0.9953754544258118,grad_norm: 0.7470064975179204, iteration: 426571
loss: 1.02388596534729,grad_norm: 0.7223759227118876, iteration: 426572
loss: 1.086576223373413,grad_norm: 0.783048227045392, iteration: 426573
loss: 1.0273427963256836,grad_norm: 0.7509224567452436, iteration: 426574
loss: 0.971217930316925,grad_norm: 0.7320705436637412, iteration: 426575
loss: 0.9734647274017334,grad_norm: 0.9211401588500597, iteration: 426576
loss: 1.089156985282898,grad_norm: 0.9999994120157736, iteration: 426577
loss: 1.0420417785644531,grad_norm: 0.8780240722147196, iteration: 426578
loss: 0.9778593182563782,grad_norm: 0.7216311833197749, iteration: 426579
loss: 1.0288691520690918,grad_norm: 0.9999992428485588, iteration: 426580
loss: 1.0292037725448608,grad_norm: 0.8469680031198209, iteration: 426581
loss: 0.9484760165214539,grad_norm: 0.777647061107368, iteration: 426582
loss: 1.0148766040802002,grad_norm: 0.7193245892572222, iteration: 426583
loss: 1.0012273788452148,grad_norm: 0.9999996113563837, iteration: 426584
loss: 1.0358327627182007,grad_norm: 0.7556804219080707, iteration: 426585
loss: 0.9796792268753052,grad_norm: 0.7672554656697911, iteration: 426586
loss: 0.9861520528793335,grad_norm: 0.8005302709943913, iteration: 426587
loss: 0.9857536554336548,grad_norm: 0.8600861893243392, iteration: 426588
loss: 0.9882729649543762,grad_norm: 0.7684136589847909, iteration: 426589
loss: 1.000200867652893,grad_norm: 0.6538774136671371, iteration: 426590
loss: 1.0001142024993896,grad_norm: 0.8583677933778366, iteration: 426591
loss: 0.9917043447494507,grad_norm: 0.9662247881627779, iteration: 426592
loss: 0.9986642599105835,grad_norm: 0.9999997951228662, iteration: 426593
loss: 1.073854923248291,grad_norm: 0.9999991637405162, iteration: 426594
loss: 1.2550904750823975,grad_norm: 0.9999998902242321, iteration: 426595
loss: 0.9946973323822021,grad_norm: 0.6483718642267957, iteration: 426596
loss: 0.9743103981018066,grad_norm: 0.8547093390765398, iteration: 426597
loss: 1.0004678964614868,grad_norm: 0.8396576712133658, iteration: 426598
loss: 1.0235962867736816,grad_norm: 0.7464476176258575, iteration: 426599
loss: 1.0423567295074463,grad_norm: 0.8719174331432862, iteration: 426600
loss: 1.0167838335037231,grad_norm: 0.9130538978319842, iteration: 426601
loss: 0.9823594093322754,grad_norm: 0.6817860899073656, iteration: 426602
loss: 1.0168159008026123,grad_norm: 0.8862843911974346, iteration: 426603
loss: 0.9963974952697754,grad_norm: 0.9843985022293583, iteration: 426604
loss: 1.0071890354156494,grad_norm: 0.871714145720021, iteration: 426605
loss: 1.2708762884140015,grad_norm: 0.9999999970915344, iteration: 426606
loss: 1.0283350944519043,grad_norm: 0.7436051202152972, iteration: 426607
loss: 1.002449631690979,grad_norm: 0.7518361431933172, iteration: 426608
loss: 1.009110689163208,grad_norm: 0.9999992593352348, iteration: 426609
loss: 1.009231448173523,grad_norm: 0.8306260519201226, iteration: 426610
loss: 0.9525094032287598,grad_norm: 0.711127241851362, iteration: 426611
loss: 1.0085211992263794,grad_norm: 0.72915041038094, iteration: 426612
loss: 1.0332386493682861,grad_norm: 0.8611890063557017, iteration: 426613
loss: 1.023313045501709,grad_norm: 0.8256538613302048, iteration: 426614
loss: 1.0242185592651367,grad_norm: 0.7799060536763024, iteration: 426615
loss: 0.9452171921730042,grad_norm: 0.8125117756997421, iteration: 426616
loss: 1.038669228553772,grad_norm: 0.9999990991511838, iteration: 426617
loss: 1.0028724670410156,grad_norm: 0.8530056352606484, iteration: 426618
loss: 1.0137789249420166,grad_norm: 0.9999996409274623, iteration: 426619
loss: 1.0058833360671997,grad_norm: 0.8363095863539478, iteration: 426620
loss: 0.9934129118919373,grad_norm: 0.7369120742753519, iteration: 426621
loss: 1.01152765750885,grad_norm: 0.7560020429997019, iteration: 426622
loss: 1.001737356185913,grad_norm: 0.8579114424016967, iteration: 426623
loss: 1.0199369192123413,grad_norm: 0.9999991239543857, iteration: 426624
loss: 0.9913246035575867,grad_norm: 0.864527839998997, iteration: 426625
loss: 1.021941065788269,grad_norm: 0.7002606962542995, iteration: 426626
loss: 1.0257395505905151,grad_norm: 0.857605687165054, iteration: 426627
loss: 1.0197210311889648,grad_norm: 0.8285504591921599, iteration: 426628
loss: 0.9491023421287537,grad_norm: 0.6826689138210548, iteration: 426629
loss: 1.0311447381973267,grad_norm: 0.9999992789803458, iteration: 426630
loss: 1.0529100894927979,grad_norm: 0.9999992945124581, iteration: 426631
loss: 1.0262746810913086,grad_norm: 0.7761125439591863, iteration: 426632
loss: 1.0997066497802734,grad_norm: 0.8453686168848297, iteration: 426633
loss: 1.0022412538528442,grad_norm: 0.802570313888868, iteration: 426634
loss: 1.0179518461227417,grad_norm: 0.9264823980977726, iteration: 426635
loss: 1.020493984222412,grad_norm: 0.9999991948036532, iteration: 426636
loss: 1.0258992910385132,grad_norm: 0.9999990450314515, iteration: 426637
loss: 1.0167287588119507,grad_norm: 0.8900905249084268, iteration: 426638
loss: 0.9582340717315674,grad_norm: 0.7728829229789708, iteration: 426639
loss: 1.0019699335098267,grad_norm: 0.6893939704634471, iteration: 426640
loss: 1.0275089740753174,grad_norm: 0.9999990843350536, iteration: 426641
loss: 1.1120938062667847,grad_norm: 0.9999991168794459, iteration: 426642
loss: 1.037270188331604,grad_norm: 0.999999405088459, iteration: 426643
loss: 1.0871598720550537,grad_norm: 0.7163189709234908, iteration: 426644
loss: 0.9973952174186707,grad_norm: 0.7133501360869332, iteration: 426645
loss: 1.0175197124481201,grad_norm: 0.9999992321500416, iteration: 426646
loss: 1.0033611059188843,grad_norm: 0.7965053758972264, iteration: 426647
loss: 1.0409293174743652,grad_norm: 0.9999996581717457, iteration: 426648
loss: 1.0525965690612793,grad_norm: 0.9999991459061982, iteration: 426649
loss: 1.0035936832427979,grad_norm: 0.9115389125971638, iteration: 426650
loss: 0.9920488595962524,grad_norm: 0.8907332699494863, iteration: 426651
loss: 1.0292776823043823,grad_norm: 0.7074729439467835, iteration: 426652
loss: 1.0620555877685547,grad_norm: 0.8185285042366849, iteration: 426653
loss: 0.994645893573761,grad_norm: 0.8741998267487363, iteration: 426654
loss: 1.03420889377594,grad_norm: 0.9999999202827851, iteration: 426655
loss: 0.9755758047103882,grad_norm: 0.8254254563502772, iteration: 426656
loss: 1.0680887699127197,grad_norm: 0.7847899422384028, iteration: 426657
loss: 1.0164029598236084,grad_norm: 0.91452362773693, iteration: 426658
loss: 1.0184898376464844,grad_norm: 0.7860269157424958, iteration: 426659
loss: 1.1446506977081299,grad_norm: 0.8418737319749163, iteration: 426660
loss: 1.0240567922592163,grad_norm: 0.75707184747454, iteration: 426661
loss: 0.9926784634590149,grad_norm: 0.6885663691735605, iteration: 426662
loss: 1.0801466703414917,grad_norm: 0.9324502635950089, iteration: 426663
loss: 0.9906271696090698,grad_norm: 0.8226810140155115, iteration: 426664
loss: 1.0109257698059082,grad_norm: 0.999999649058881, iteration: 426665
loss: 0.9821134209632874,grad_norm: 0.8272348589699795, iteration: 426666
loss: 1.025154709815979,grad_norm: 0.9999994803931835, iteration: 426667
loss: 1.0052355527877808,grad_norm: 0.9149346789035032, iteration: 426668
loss: 1.0241483449935913,grad_norm: 0.9999997227593116, iteration: 426669
loss: 1.0718687772750854,grad_norm: 0.999999894618206, iteration: 426670
loss: 1.0288585424423218,grad_norm: 0.999999145105419, iteration: 426671
loss: 1.0479590892791748,grad_norm: 0.9999992878168585, iteration: 426672
loss: 1.038940191268921,grad_norm: 0.6735492962616908, iteration: 426673
loss: 0.9641324877738953,grad_norm: 0.8576194350038042, iteration: 426674
loss: 1.0321922302246094,grad_norm: 0.83109372343419, iteration: 426675
loss: 0.9913647770881653,grad_norm: 0.9999991299687804, iteration: 426676
loss: 0.9844258427619934,grad_norm: 0.827701722416867, iteration: 426677
loss: 0.9726843237876892,grad_norm: 0.797824834673539, iteration: 426678
loss: 1.044007420539856,grad_norm: 0.9999992599314333, iteration: 426679
loss: 0.96898353099823,grad_norm: 0.9999992360741002, iteration: 426680
loss: 1.0624337196350098,grad_norm: 0.8843398693993959, iteration: 426681
loss: 1.0664418935775757,grad_norm: 0.9999992571021104, iteration: 426682
loss: 1.0089852809906006,grad_norm: 0.763513927373329, iteration: 426683
loss: 1.0107133388519287,grad_norm: 0.9999993205935994, iteration: 426684
loss: 1.1060951948165894,grad_norm: 0.9999999795072326, iteration: 426685
loss: 1.0071849822998047,grad_norm: 0.8067786310206784, iteration: 426686
loss: 1.0102053880691528,grad_norm: 0.7608747561064959, iteration: 426687
loss: 1.0114665031433105,grad_norm: 0.7337230111514359, iteration: 426688
loss: 0.9602822065353394,grad_norm: 0.8160116031040492, iteration: 426689
loss: 1.01740562915802,grad_norm: 0.8019117018286092, iteration: 426690
loss: 0.9762680530548096,grad_norm: 0.7397850173216691, iteration: 426691
loss: 1.0080842971801758,grad_norm: 1.0000000423065887, iteration: 426692
loss: 0.9959942698478699,grad_norm: 0.988620181690784, iteration: 426693
loss: 1.0192553997039795,grad_norm: 0.9405044995959779, iteration: 426694
loss: 1.039978265762329,grad_norm: 0.8572472713790321, iteration: 426695
loss: 1.0457791090011597,grad_norm: 0.9173949259776788, iteration: 426696
loss: 0.9631009697914124,grad_norm: 0.7891294337155775, iteration: 426697
loss: 1.0271086692810059,grad_norm: 1.0000000701753324, iteration: 426698
loss: 1.0252947807312012,grad_norm: 0.9999992689621571, iteration: 426699
loss: 1.1352100372314453,grad_norm: 0.9999993073767207, iteration: 426700
loss: 0.9919804334640503,grad_norm: 0.8229780014309922, iteration: 426701
loss: 0.9785111546516418,grad_norm: 0.999999610194338, iteration: 426702
loss: 0.983376681804657,grad_norm: 0.8703709066969895, iteration: 426703
loss: 0.9725223779678345,grad_norm: 0.9211063394551676, iteration: 426704
loss: 1.0674594640731812,grad_norm: 0.9999999387850942, iteration: 426705
loss: 1.0275366306304932,grad_norm: 0.8125243397733659, iteration: 426706
loss: 1.0728026628494263,grad_norm: 0.999999918070986, iteration: 426707
loss: 1.0003520250320435,grad_norm: 0.8013114186466974, iteration: 426708
loss: 1.0259242057800293,grad_norm: 0.9639167570791676, iteration: 426709
loss: 1.026199460029602,grad_norm: 0.9999990414776039, iteration: 426710
loss: 1.0128573179244995,grad_norm: 0.7896994659603611, iteration: 426711
loss: 1.0182616710662842,grad_norm: 0.727551706820446, iteration: 426712
loss: 1.0610047578811646,grad_norm: 0.9999998776133538, iteration: 426713
loss: 1.090842843055725,grad_norm: 0.8798544069311208, iteration: 426714
loss: 0.9907838106155396,grad_norm: 0.9999996742130663, iteration: 426715
loss: 1.0022557973861694,grad_norm: 0.7375112558473719, iteration: 426716
loss: 1.0339970588684082,grad_norm: 0.8663956249649959, iteration: 426717
loss: 1.018898606300354,grad_norm: 0.8492876203892478, iteration: 426718
loss: 0.9827196002006531,grad_norm: 0.8757574846485392, iteration: 426719
loss: 0.9751080870628357,grad_norm: 0.796395121480566, iteration: 426720
loss: 0.9819969534873962,grad_norm: 0.7954578441431783, iteration: 426721
loss: 1.0654234886169434,grad_norm: 0.7918757824624654, iteration: 426722
loss: 0.9893234968185425,grad_norm: 0.9999997272076362, iteration: 426723
loss: 1.0047236680984497,grad_norm: 0.7550261234289208, iteration: 426724
loss: 0.9794629812240601,grad_norm: 0.7176617045171941, iteration: 426725
loss: 0.9644087553024292,grad_norm: 0.9153484877360022, iteration: 426726
loss: 1.0821337699890137,grad_norm: 0.9999995408776858, iteration: 426727
loss: 0.9823997020721436,grad_norm: 0.7582900858879621, iteration: 426728
loss: 1.007365345954895,grad_norm: 0.7415647838781945, iteration: 426729
loss: 1.0634034872055054,grad_norm: 0.935893764150582, iteration: 426730
loss: 1.018448829650879,grad_norm: 0.9999997344246816, iteration: 426731
loss: 1.0052998065948486,grad_norm: 0.7239109537700215, iteration: 426732
loss: 1.0554912090301514,grad_norm: 0.9817079742830548, iteration: 426733
loss: 1.0026569366455078,grad_norm: 0.9160144731485597, iteration: 426734
loss: 1.1570377349853516,grad_norm: 0.9999991999305423, iteration: 426735
loss: 1.0329526662826538,grad_norm: 0.9999990815096251, iteration: 426736
loss: 1.0098458528518677,grad_norm: 0.9999993105788699, iteration: 426737
loss: 1.025544285774231,grad_norm: 0.682435476037699, iteration: 426738
loss: 1.0311694145202637,grad_norm: 0.9999992101506971, iteration: 426739
loss: 0.9939058423042297,grad_norm: 0.7443956985046595, iteration: 426740
loss: 1.036607027053833,grad_norm: 0.9005979353776352, iteration: 426741
loss: 1.0392193794250488,grad_norm: 0.9999990085439387, iteration: 426742
loss: 0.9795800447463989,grad_norm: 0.7135948863387324, iteration: 426743
loss: 1.0058945417404175,grad_norm: 0.9999989973256982, iteration: 426744
loss: 1.079055905342102,grad_norm: 0.9999992152141527, iteration: 426745
loss: 0.9858958721160889,grad_norm: 0.7418192041488948, iteration: 426746
loss: 0.9799466729164124,grad_norm: 0.9999998482153296, iteration: 426747
loss: 1.0303212404251099,grad_norm: 0.9999998446944341, iteration: 426748
loss: 1.024588704109192,grad_norm: 0.9999996076226269, iteration: 426749
loss: 0.9832198023796082,grad_norm: 0.7858743369437312, iteration: 426750
loss: 0.9927124381065369,grad_norm: 0.8523852053994073, iteration: 426751
loss: 1.0554977655410767,grad_norm: 0.9999990006103384, iteration: 426752
loss: 1.0428404808044434,grad_norm: 0.999999265802029, iteration: 426753
loss: 1.0164238214492798,grad_norm: 0.9999998200689117, iteration: 426754
loss: 1.1190440654754639,grad_norm: 0.9999995037580598, iteration: 426755
loss: 1.0087980031967163,grad_norm: 0.8806624674131416, iteration: 426756
loss: 1.0660127401351929,grad_norm: 0.7855804905904291, iteration: 426757
loss: 1.0081148147583008,grad_norm: 0.8250866840587114, iteration: 426758
loss: 1.033771276473999,grad_norm: 0.8187342317962075, iteration: 426759
loss: 1.0076693296432495,grad_norm: 0.8255410932824221, iteration: 426760
loss: 1.0237444639205933,grad_norm: 0.8879196532430883, iteration: 426761
loss: 1.01089346408844,grad_norm: 0.999999596423308, iteration: 426762
loss: 1.0526132583618164,grad_norm: 0.8168840565662963, iteration: 426763
loss: 0.9729363322257996,grad_norm: 0.8132777354469518, iteration: 426764
loss: 1.04851233959198,grad_norm: 0.9999994312440933, iteration: 426765
loss: 0.9825395345687866,grad_norm: 0.7286272567595569, iteration: 426766
loss: 1.011592149734497,grad_norm: 0.8573823530964317, iteration: 426767
loss: 1.0567522048950195,grad_norm: 0.9542721925254535, iteration: 426768
loss: 0.9960955381393433,grad_norm: 0.8117020564980244, iteration: 426769
loss: 0.9895616173744202,grad_norm: 0.9999990229094062, iteration: 426770
loss: 1.02678382396698,grad_norm: 0.9999990219039309, iteration: 426771
loss: 1.122681975364685,grad_norm: 0.9999992359871768, iteration: 426772
loss: 0.9856264591217041,grad_norm: 0.9947636466712335, iteration: 426773
loss: 1.0580558776855469,grad_norm: 0.8064364166304471, iteration: 426774
loss: 1.0096361637115479,grad_norm: 0.8388197941473873, iteration: 426775
loss: 0.9605013728141785,grad_norm: 0.9973915131083975, iteration: 426776
loss: 1.0592812299728394,grad_norm: 0.8287915981532168, iteration: 426777
loss: 0.9638754725456238,grad_norm: 0.8328015187820319, iteration: 426778
loss: 1.104088306427002,grad_norm: 0.99999965729357, iteration: 426779
loss: 0.9960283637046814,grad_norm: 0.9999998442471344, iteration: 426780
loss: 1.053713321685791,grad_norm: 0.864346035340587, iteration: 426781
loss: 1.0011894702911377,grad_norm: 0.999999090515773, iteration: 426782
loss: 1.0095633268356323,grad_norm: 0.9999990716859123, iteration: 426783
loss: 0.9643101096153259,grad_norm: 0.7889864752689367, iteration: 426784
loss: 1.0225505828857422,grad_norm: 0.7981429296269761, iteration: 426785
loss: 1.1782371997833252,grad_norm: 0.8400191490415103, iteration: 426786
loss: 1.0366628170013428,grad_norm: 0.7355239417414987, iteration: 426787
loss: 1.0048459768295288,grad_norm: 0.7865095379352113, iteration: 426788
loss: 1.0379853248596191,grad_norm: 0.9264098401983775, iteration: 426789
loss: 0.9771499633789062,grad_norm: 0.9359477887929443, iteration: 426790
loss: 0.9712371230125427,grad_norm: 0.9999991034801287, iteration: 426791
loss: 1.001242756843567,grad_norm: 0.821805143715024, iteration: 426792
loss: 1.0597128868103027,grad_norm: 0.8815825782637872, iteration: 426793
loss: 1.0479493141174316,grad_norm: 0.7045460204293847, iteration: 426794
loss: 1.0955326557159424,grad_norm: 0.9999998273881571, iteration: 426795
loss: 1.0200207233428955,grad_norm: 0.9999993990388117, iteration: 426796
loss: 0.9915818572044373,grad_norm: 0.8927555776101695, iteration: 426797
loss: 1.0076991319656372,grad_norm: 0.7257638085726441, iteration: 426798
loss: 1.00418221950531,grad_norm: 0.6383591541145224, iteration: 426799
loss: 1.0393143892288208,grad_norm: 0.9357277011189636, iteration: 426800
loss: 0.9895895719528198,grad_norm: 0.7599001535301406, iteration: 426801
loss: 1.0422117710113525,grad_norm: 0.8077886009562073, iteration: 426802
loss: 0.9979217052459717,grad_norm: 0.9999991081645054, iteration: 426803
loss: 1.0394294261932373,grad_norm: 0.9999995135377316, iteration: 426804
loss: 0.9827023148536682,grad_norm: 0.9999992074018816, iteration: 426805
loss: 1.030452847480774,grad_norm: 0.8906678442821229, iteration: 426806
loss: 1.0169668197631836,grad_norm: 0.9999994323487669, iteration: 426807
loss: 1.0083885192871094,grad_norm: 0.8516570386414478, iteration: 426808
loss: 0.9638888835906982,grad_norm: 0.7976405530464281, iteration: 426809
loss: 1.046072244644165,grad_norm: 0.9999999964612964, iteration: 426810
loss: 0.9865249991416931,grad_norm: 0.862919184580435, iteration: 426811
loss: 0.9949537515640259,grad_norm: 0.9999992668697566, iteration: 426812
loss: 0.9865410327911377,grad_norm: 0.708733946565608, iteration: 426813
loss: 1.1425706148147583,grad_norm: 0.9999997728809622, iteration: 426814
loss: 0.9956778883934021,grad_norm: 0.7896197726405761, iteration: 426815
loss: 1.0161494016647339,grad_norm: 0.752220872655264, iteration: 426816
loss: 1.0383909940719604,grad_norm: 0.9305440132375652, iteration: 426817
loss: 1.0395667552947998,grad_norm: 1.0000000833233411, iteration: 426818
loss: 0.9755122661590576,grad_norm: 0.9476445604061432, iteration: 426819
loss: 1.0105432271957397,grad_norm: 0.6311323845863422, iteration: 426820
loss: 1.0425570011138916,grad_norm: 0.9853108687165016, iteration: 426821
loss: 1.0745763778686523,grad_norm: 0.9999997082905694, iteration: 426822
loss: 1.084423303604126,grad_norm: 0.9999994753477983, iteration: 426823
loss: 0.9919099807739258,grad_norm: 0.9999996772073316, iteration: 426824
loss: 1.0034798383712769,grad_norm: 0.6867367051667376, iteration: 426825
loss: 1.0479995012283325,grad_norm: 0.8333780997220838, iteration: 426826
loss: 0.981767475605011,grad_norm: 0.9999991884035567, iteration: 426827
loss: 0.9892123341560364,grad_norm: 0.6332225926903283, iteration: 426828
loss: 0.9968469738960266,grad_norm: 0.6653394722536373, iteration: 426829
loss: 0.9921464323997498,grad_norm: 0.8600516972062436, iteration: 426830
loss: 0.9789965152740479,grad_norm: 0.7578179977300963, iteration: 426831
loss: 1.0078665018081665,grad_norm: 0.7843987095140582, iteration: 426832
loss: 1.0062397718429565,grad_norm: 0.8120622404721501, iteration: 426833
loss: 0.9929496049880981,grad_norm: 0.9282667530840292, iteration: 426834
loss: 0.9772891402244568,grad_norm: 0.8841664839149007, iteration: 426835
loss: 1.0302976369857788,grad_norm: 0.999999441166971, iteration: 426836
loss: 0.9693074822425842,grad_norm: 0.8230629806659421, iteration: 426837
loss: 1.011500358581543,grad_norm: 0.9999999307090862, iteration: 426838
loss: 1.0508719682693481,grad_norm: 0.9999996651219755, iteration: 426839
loss: 0.9988096356391907,grad_norm: 0.7058869715984989, iteration: 426840
loss: 1.042602300643921,grad_norm: 0.7618331821178541, iteration: 426841
loss: 0.9863768219947815,grad_norm: 0.9999997016714324, iteration: 426842
loss: 1.0038195848464966,grad_norm: 0.6069388735321335, iteration: 426843
loss: 0.9911928176879883,grad_norm: 0.8361308600516724, iteration: 426844
loss: 1.0215718746185303,grad_norm: 0.8283605803940666, iteration: 426845
loss: 1.0030337572097778,grad_norm: 0.9173349848751461, iteration: 426846
loss: 1.05043625831604,grad_norm: 0.932337606440705, iteration: 426847
loss: 1.0101698637008667,grad_norm: 0.7501680712641106, iteration: 426848
loss: 0.9943286776542664,grad_norm: 0.9999991075543297, iteration: 426849
loss: 1.0203673839569092,grad_norm: 0.721677727819533, iteration: 426850
loss: 1.0349414348602295,grad_norm: 0.9999992380925705, iteration: 426851
loss: 1.0194146633148193,grad_norm: 0.8162829058681518, iteration: 426852
loss: 0.9900388121604919,grad_norm: 0.8406928780237414, iteration: 426853
loss: 1.029451847076416,grad_norm: 0.9800372145478746, iteration: 426854
loss: 1.0245630741119385,grad_norm: 0.7241761900439548, iteration: 426855
loss: 1.0130021572113037,grad_norm: 0.7340300545378717, iteration: 426856
loss: 0.9855651259422302,grad_norm: 0.8715755779160023, iteration: 426857
loss: 0.9964839816093445,grad_norm: 0.847447825356219, iteration: 426858
loss: 1.0036859512329102,grad_norm: 0.9076016044218063, iteration: 426859
loss: 0.9921505451202393,grad_norm: 0.7149600911984886, iteration: 426860
loss: 1.0406312942504883,grad_norm: 0.7584265171643547, iteration: 426861
loss: 0.9760235548019409,grad_norm: 0.8343054509278318, iteration: 426862
loss: 1.012182593345642,grad_norm: 0.9999990602876822, iteration: 426863
loss: 0.9985363483428955,grad_norm: 0.8982828229056667, iteration: 426864
loss: 1.0186859369277954,grad_norm: 0.9674497777254518, iteration: 426865
loss: 1.0151017904281616,grad_norm: 0.8093465013131188, iteration: 426866
loss: 0.9902778267860413,grad_norm: 0.9854713280746262, iteration: 426867
loss: 1.0661858320236206,grad_norm: 0.999999369051633, iteration: 426868
loss: 0.9990459680557251,grad_norm: 0.7236186307066146, iteration: 426869
loss: 1.0643936395645142,grad_norm: 0.9999993402776068, iteration: 426870
loss: 1.0704330205917358,grad_norm: 0.7491350126281476, iteration: 426871
loss: 1.0518534183502197,grad_norm: 0.6655911996076364, iteration: 426872
loss: 1.010969638824463,grad_norm: 0.983346055590354, iteration: 426873
loss: 1.0157403945922852,grad_norm: 0.9077671235687981, iteration: 426874
loss: 0.9867327213287354,grad_norm: 0.8944146440734342, iteration: 426875
loss: 1.000563621520996,grad_norm: 0.8813690930908996, iteration: 426876
loss: 0.9974073767662048,grad_norm: 0.9331809703118921, iteration: 426877
loss: 1.124324917793274,grad_norm: 0.9999993882980546, iteration: 426878
loss: 1.0782445669174194,grad_norm: 0.999999352290768, iteration: 426879
loss: 1.0312525033950806,grad_norm: 0.8047387307872039, iteration: 426880
loss: 1.06390380859375,grad_norm: 0.9086947098441154, iteration: 426881
loss: 1.085841178894043,grad_norm: 0.9999999207531388, iteration: 426882
loss: 1.0021268129348755,grad_norm: 0.8902438153629008, iteration: 426883
loss: 1.0030431747436523,grad_norm: 0.9999993212520524, iteration: 426884
loss: 1.023550033569336,grad_norm: 0.8528181105147941, iteration: 426885
loss: 1.0124605894088745,grad_norm: 0.7094066412115887, iteration: 426886
loss: 0.9722087979316711,grad_norm: 0.8448005941148391, iteration: 426887
loss: 1.0218300819396973,grad_norm: 0.66561214159033, iteration: 426888
loss: 0.9785160422325134,grad_norm: 0.8499331703030509, iteration: 426889
loss: 0.9712788462638855,grad_norm: 0.8615086766558395, iteration: 426890
loss: 1.0337841510772705,grad_norm: 0.9394075763607427, iteration: 426891
loss: 1.001763105392456,grad_norm: 0.6896785889198657, iteration: 426892
loss: 0.9734905362129211,grad_norm: 0.8234481011330768, iteration: 426893
loss: 0.9846796989440918,grad_norm: 0.7139721184108694, iteration: 426894
loss: 0.9704902172088623,grad_norm: 0.7226619693338954, iteration: 426895
loss: 1.0373104810714722,grad_norm: 0.999999370607977, iteration: 426896
loss: 0.9489421248435974,grad_norm: 0.8298529236506725, iteration: 426897
loss: 0.9807754158973694,grad_norm: 0.70521698414716, iteration: 426898
loss: 0.9642897844314575,grad_norm: 0.6966509456592802, iteration: 426899
loss: 0.9932743906974792,grad_norm: 0.9999994062527523, iteration: 426900
loss: 0.9886049628257751,grad_norm: 0.7760217145102603, iteration: 426901
loss: 1.028589129447937,grad_norm: 0.7385980348559202, iteration: 426902
loss: 0.9725362658500671,grad_norm: 0.8317906155613898, iteration: 426903
loss: 1.0507032871246338,grad_norm: 0.8028975685453769, iteration: 426904
loss: 1.0142086744308472,grad_norm: 0.8901244566183728, iteration: 426905
loss: 1.013515591621399,grad_norm: 0.664760061112394, iteration: 426906
loss: 0.9972996115684509,grad_norm: 0.6831930555008372, iteration: 426907
loss: 0.9666550755500793,grad_norm: 0.7589180144533323, iteration: 426908
loss: 1.0166199207305908,grad_norm: 0.9999994793730084, iteration: 426909
loss: 1.0152606964111328,grad_norm: 0.8466158757562526, iteration: 426910
loss: 1.03787362575531,grad_norm: 0.8541625260257288, iteration: 426911
loss: 1.0711547136306763,grad_norm: 0.9999998874875862, iteration: 426912
loss: 1.0607370138168335,grad_norm: 0.9999998966239524, iteration: 426913
loss: 1.0065619945526123,grad_norm: 0.6577290636757899, iteration: 426914
loss: 0.9971078038215637,grad_norm: 0.7640341941606992, iteration: 426915
loss: 1.0122873783111572,grad_norm: 0.74162139922438, iteration: 426916
loss: 0.9655615091323853,grad_norm: 0.7786566640377633, iteration: 426917
loss: 1.0092535018920898,grad_norm: 0.7510072440507377, iteration: 426918
loss: 1.0373237133026123,grad_norm: 0.8422605462440654, iteration: 426919
loss: 0.9968153238296509,grad_norm: 0.6831128890541475, iteration: 426920
loss: 0.9904611706733704,grad_norm: 0.9126791560860653, iteration: 426921
loss: 1.053642988204956,grad_norm: 0.8621585065197822, iteration: 426922
loss: 1.0080357789993286,grad_norm: 0.8072642122244407, iteration: 426923
loss: 0.9734988212585449,grad_norm: 0.6389241872252589, iteration: 426924
loss: 1.0094798803329468,grad_norm: 0.5894684597486983, iteration: 426925
loss: 0.9745571613311768,grad_norm: 0.8246113076101494, iteration: 426926
loss: 0.933891773223877,grad_norm: 0.9888698687242512, iteration: 426927
loss: 0.9950564503669739,grad_norm: 0.9999990733150005, iteration: 426928
loss: 0.9845250248908997,grad_norm: 0.7204869486911439, iteration: 426929
loss: 1.021304965019226,grad_norm: 0.6912260905414906, iteration: 426930
loss: 1.0109505653381348,grad_norm: 0.7641047601509617, iteration: 426931
loss: 1.0244812965393066,grad_norm: 0.8280296954307081, iteration: 426932
loss: 1.006564736366272,grad_norm: 0.8008563874920666, iteration: 426933
loss: 0.9988136887550354,grad_norm: 0.7339798637387982, iteration: 426934
loss: 1.0621223449707031,grad_norm: 0.9999991656075841, iteration: 426935
loss: 1.045152187347412,grad_norm: 0.8689195780238026, iteration: 426936
loss: 1.0151128768920898,grad_norm: 0.99999985322008, iteration: 426937
loss: 1.058894395828247,grad_norm: 0.9197797217614169, iteration: 426938
loss: 1.0031006336212158,grad_norm: 0.7748587855138804, iteration: 426939
loss: 0.9961702227592468,grad_norm: 0.6562152112233945, iteration: 426940
loss: 1.0159937143325806,grad_norm: 0.6854698903401694, iteration: 426941
loss: 1.0017136335372925,grad_norm: 0.7065705255561663, iteration: 426942
loss: 0.9793021082878113,grad_norm: 0.7479813019366864, iteration: 426943
loss: 1.0070346593856812,grad_norm: 0.7007590297010031, iteration: 426944
loss: 0.9625692963600159,grad_norm: 0.7677031780777025, iteration: 426945
loss: 0.9722474217414856,grad_norm: 0.7049384794113895, iteration: 426946
loss: 0.9785902500152588,grad_norm: 0.7859437118872971, iteration: 426947
loss: 0.9596350789070129,grad_norm: 0.6777167196982615, iteration: 426948
loss: 0.9915447235107422,grad_norm: 0.7337830009103357, iteration: 426949
loss: 0.9820327758789062,grad_norm: 0.6687809817456694, iteration: 426950
loss: 1.0289101600646973,grad_norm: 0.7529377856545442, iteration: 426951
loss: 0.9870677590370178,grad_norm: 0.9999997787151006, iteration: 426952
loss: 0.9836515188217163,grad_norm: 0.9196266666768901, iteration: 426953
loss: 0.9757465720176697,grad_norm: 0.579603654082142, iteration: 426954
loss: 1.0110013484954834,grad_norm: 0.8532225795869752, iteration: 426955
loss: 1.0307163000106812,grad_norm: 0.7310901719499616, iteration: 426956
loss: 1.028296947479248,grad_norm: 0.9999996661414098, iteration: 426957
loss: 1.0286062955856323,grad_norm: 0.8618862134001476, iteration: 426958
loss: 1.072899580001831,grad_norm: 0.9999991720437902, iteration: 426959
loss: 1.0027905702590942,grad_norm: 0.9999990669111467, iteration: 426960
loss: 0.9744726419448853,grad_norm: 0.6966466434172556, iteration: 426961
loss: 0.9964108467102051,grad_norm: 0.8546797814395882, iteration: 426962
loss: 1.0086935758590698,grad_norm: 0.8445818262597878, iteration: 426963
loss: 1.0248337984085083,grad_norm: 0.8444520311524586, iteration: 426964
loss: 0.982219398021698,grad_norm: 0.7431973139459314, iteration: 426965
loss: 1.0012764930725098,grad_norm: 0.6717652547203945, iteration: 426966
loss: 1.0673023462295532,grad_norm: 0.9073082544096431, iteration: 426967
loss: 0.9988675117492676,grad_norm: 0.6689432740077775, iteration: 426968
loss: 0.9977357387542725,grad_norm: 0.8523870405831164, iteration: 426969
loss: 0.9894077181816101,grad_norm: 0.8207396823276313, iteration: 426970
loss: 0.9753316640853882,grad_norm: 0.8075384990322592, iteration: 426971
loss: 1.034913420677185,grad_norm: 0.6953698430989051, iteration: 426972
loss: 0.9710906147956848,grad_norm: 0.7427853175227201, iteration: 426973
loss: 1.0249756574630737,grad_norm: 0.7792835873698268, iteration: 426974
loss: 1.0008405447006226,grad_norm: 0.736071291537543, iteration: 426975
loss: 0.9918261170387268,grad_norm: 0.999999514154437, iteration: 426976
loss: 1.0949835777282715,grad_norm: 0.9999989353760932, iteration: 426977
loss: 0.9830939769744873,grad_norm: 0.7693873986817844, iteration: 426978
loss: 0.99427330493927,grad_norm: 0.9999999557892086, iteration: 426979
loss: 1.0061547756195068,grad_norm: 0.8166377161998439, iteration: 426980
loss: 1.0285431146621704,grad_norm: 0.7688876022957963, iteration: 426981
loss: 1.1110073328018188,grad_norm: 0.99999983640445, iteration: 426982
loss: 1.0512197017669678,grad_norm: 0.9996959709674005, iteration: 426983
loss: 1.0000168085098267,grad_norm: 0.9033851835438023, iteration: 426984
loss: 1.0295164585113525,grad_norm: 0.6702979319164806, iteration: 426985
loss: 0.9707685708999634,grad_norm: 0.8069969658888719, iteration: 426986
loss: 0.9445837140083313,grad_norm: 0.7879580330316399, iteration: 426987
loss: 1.010701060295105,grad_norm: 0.800092960860397, iteration: 426988
loss: 1.0411123037338257,grad_norm: 0.7346161671954833, iteration: 426989
loss: 1.038560390472412,grad_norm: 0.9999997144090995, iteration: 426990
loss: 0.9819998741149902,grad_norm: 0.6860092555550824, iteration: 426991
loss: 0.9964303374290466,grad_norm: 0.9809223628516509, iteration: 426992
loss: 0.9865173697471619,grad_norm: 0.7718088102989062, iteration: 426993
loss: 1.028744101524353,grad_norm: 0.8567725644446589, iteration: 426994
loss: 0.9850854873657227,grad_norm: 0.8067227301552226, iteration: 426995
loss: 1.0842474699020386,grad_norm: 0.8689808713030043, iteration: 426996
loss: 1.0033116340637207,grad_norm: 0.8408891383205166, iteration: 426997
loss: 1.0374959707260132,grad_norm: 0.8413184799748047, iteration: 426998
loss: 1.0192288160324097,grad_norm: 0.7991332960755827, iteration: 426999
loss: 1.0239152908325195,grad_norm: 0.7559401961054752, iteration: 427000
loss: 0.9966859817504883,grad_norm: 0.7999518836472083, iteration: 427001
loss: 1.055324912071228,grad_norm: 0.9598196311712689, iteration: 427002
loss: 0.9845259785652161,grad_norm: 0.6460675792313721, iteration: 427003
loss: 1.0162636041641235,grad_norm: 0.9999997984225972, iteration: 427004
loss: 1.0515931844711304,grad_norm: 0.7484812026836127, iteration: 427005
loss: 0.9900072813034058,grad_norm: 0.7767173546786174, iteration: 427006
loss: 0.9806091785430908,grad_norm: 0.9999991061216963, iteration: 427007
loss: 1.0255037546157837,grad_norm: 0.9999996612286953, iteration: 427008
loss: 1.0413886308670044,grad_norm: 0.9999994735443039, iteration: 427009
loss: 0.9726303219795227,grad_norm: 0.9999995439793531, iteration: 427010
loss: 1.075567603111267,grad_norm: 0.9999998439277136, iteration: 427011
loss: 0.9783772826194763,grad_norm: 0.8858864588728103, iteration: 427012
loss: 1.0521671772003174,grad_norm: 0.8391027029254833, iteration: 427013
loss: 0.99931401014328,grad_norm: 0.9360965684153673, iteration: 427014
loss: 0.9950600862503052,grad_norm: 0.9220827406604888, iteration: 427015
loss: 0.9945728182792664,grad_norm: 0.7229572709428753, iteration: 427016
loss: 1.002914547920227,grad_norm: 0.7287779631578459, iteration: 427017
loss: 0.9869917035102844,grad_norm: 0.7018760475022667, iteration: 427018
loss: 0.9885905385017395,grad_norm: 0.7668073941396227, iteration: 427019
loss: 1.0234198570251465,grad_norm: 0.8670018017333346, iteration: 427020
loss: 1.0135937929153442,grad_norm: 0.8068811056752963, iteration: 427021
loss: 1.042913556098938,grad_norm: 0.9999990727743047, iteration: 427022
loss: 1.119991421699524,grad_norm: 0.9999998055687606, iteration: 427023
loss: 0.9813678860664368,grad_norm: 0.7304613621512324, iteration: 427024
loss: 0.9878187775611877,grad_norm: 0.8814155999503628, iteration: 427025
loss: 0.990938127040863,grad_norm: 0.9509775176437846, iteration: 427026
loss: 0.9906690716743469,grad_norm: 0.7019964051131582, iteration: 427027
loss: 0.9652389883995056,grad_norm: 0.7126023423831916, iteration: 427028
loss: 0.9758673310279846,grad_norm: 0.7334268336031067, iteration: 427029
loss: 1.0195306539535522,grad_norm: 0.8618921960436905, iteration: 427030
loss: 1.007248878479004,grad_norm: 0.7581173758350335, iteration: 427031
loss: 0.9628416299819946,grad_norm: 0.6870474298559959, iteration: 427032
loss: 0.9848528504371643,grad_norm: 0.7633749736581692, iteration: 427033
loss: 1.0010145902633667,grad_norm: 0.8151205780914382, iteration: 427034
loss: 0.9709706902503967,grad_norm: 0.7423043473472246, iteration: 427035
loss: 1.0240724086761475,grad_norm: 0.789340511851926, iteration: 427036
loss: 1.030434489250183,grad_norm: 0.7742607563663438, iteration: 427037
loss: 0.977553129196167,grad_norm: 0.7746601024906765, iteration: 427038
loss: 1.1388169527053833,grad_norm: 0.957820373766963, iteration: 427039
loss: 1.034548282623291,grad_norm: 0.8058230630950524, iteration: 427040
loss: 0.9941889643669128,grad_norm: 0.9037585491352429, iteration: 427041
loss: 1.0468385219573975,grad_norm: 0.7689569950601397, iteration: 427042
loss: 0.9911891222000122,grad_norm: 0.7578680274013868, iteration: 427043
loss: 1.0606801509857178,grad_norm: 0.8376385221429367, iteration: 427044
loss: 1.0370914936065674,grad_norm: 0.9999990946517763, iteration: 427045
loss: 1.0113552808761597,grad_norm: 0.9999994277683466, iteration: 427046
loss: 0.9885028004646301,grad_norm: 0.8011253703283708, iteration: 427047
loss: 0.980753481388092,grad_norm: 0.8393275407567229, iteration: 427048
loss: 1.0080931186676025,grad_norm: 0.9664069206660499, iteration: 427049
loss: 0.9927032589912415,grad_norm: 0.9999996596192156, iteration: 427050
loss: 1.0192006826400757,grad_norm: 0.9037621090629054, iteration: 427051
loss: 1.1091172695159912,grad_norm: 0.97069027021026, iteration: 427052
loss: 1.016132116317749,grad_norm: 0.8112158459497465, iteration: 427053
loss: 1.0004607439041138,grad_norm: 0.9999998400244899, iteration: 427054
loss: 1.0109530687332153,grad_norm: 0.846382700843519, iteration: 427055
loss: 0.9662289023399353,grad_norm: 0.6784843881844149, iteration: 427056
loss: 1.0161545276641846,grad_norm: 0.7232188165504657, iteration: 427057
loss: 0.9694975018501282,grad_norm: 0.8962561060514379, iteration: 427058
loss: 0.9918319582939148,grad_norm: 0.6908309718914865, iteration: 427059
loss: 1.0613926649093628,grad_norm: 0.6991577929760642, iteration: 427060
loss: 1.0476844310760498,grad_norm: 0.7580131172425051, iteration: 427061
loss: 0.995350182056427,grad_norm: 0.8031756676220195, iteration: 427062
loss: 0.9945293664932251,grad_norm: 0.9999992048949383, iteration: 427063
loss: 0.9898324012756348,grad_norm: 0.8403973777819991, iteration: 427064
loss: 1.0040853023529053,grad_norm: 0.7468460510180476, iteration: 427065
loss: 1.0173825025558472,grad_norm: 0.7510015788978056, iteration: 427066
loss: 1.0126396417617798,grad_norm: 0.8131892454490177, iteration: 427067
loss: 0.9697723388671875,grad_norm: 0.7739860800281209, iteration: 427068
loss: 0.9864209294319153,grad_norm: 0.7819332180014925, iteration: 427069
loss: 0.9840705990791321,grad_norm: 0.9163680690166383, iteration: 427070
loss: 1.0041073560714722,grad_norm: 0.8170045432273838, iteration: 427071
loss: 1.0054799318313599,grad_norm: 0.7390036497233765, iteration: 427072
loss: 1.0299173593521118,grad_norm: 0.7590917264235864, iteration: 427073
loss: 0.9843658208847046,grad_norm: 0.9999990248523246, iteration: 427074
loss: 0.969094455242157,grad_norm: 0.8488437068120973, iteration: 427075
loss: 1.007980227470398,grad_norm: 0.7266283063758749, iteration: 427076
loss: 0.9845688939094543,grad_norm: 0.754994836218256, iteration: 427077
loss: 0.9702667593955994,grad_norm: 0.9054191369024491, iteration: 427078
loss: 1.0295907258987427,grad_norm: 0.6502351935422345, iteration: 427079
loss: 1.021303653717041,grad_norm: 0.6998205148154761, iteration: 427080
loss: 1.0639622211456299,grad_norm: 0.7426263790078494, iteration: 427081
loss: 1.0832992792129517,grad_norm: 0.8275113517565914, iteration: 427082
loss: 0.9820772409439087,grad_norm: 0.8493532119733103, iteration: 427083
loss: 0.9642370939254761,grad_norm: 0.889966273579968, iteration: 427084
loss: 0.9767489433288574,grad_norm: 0.7134032439856841, iteration: 427085
loss: 0.9786619544029236,grad_norm: 0.7813646640317594, iteration: 427086
loss: 1.007185697555542,grad_norm: 0.9055801543903416, iteration: 427087
loss: 1.0052207708358765,grad_norm: 0.7220947755826133, iteration: 427088
loss: 1.0423102378845215,grad_norm: 0.8300049192848392, iteration: 427089
loss: 0.9908319711685181,grad_norm: 0.7743005997229545, iteration: 427090
loss: 1.0176210403442383,grad_norm: 0.8112397430850005, iteration: 427091
loss: 1.0031508207321167,grad_norm: 0.7715459652287007, iteration: 427092
loss: 1.0070605278015137,grad_norm: 0.8857097069739216, iteration: 427093
loss: 0.9972047209739685,grad_norm: 0.7794171849426828, iteration: 427094
loss: 1.049005389213562,grad_norm: 0.768701445439004, iteration: 427095
loss: 0.9986405968666077,grad_norm: 0.7241620583074918, iteration: 427096
loss: 0.9938713312149048,grad_norm: 0.7471552935577802, iteration: 427097
loss: 0.9994047284126282,grad_norm: 0.9254073982207762, iteration: 427098
loss: 0.9770646691322327,grad_norm: 0.8394298409366486, iteration: 427099
loss: 0.9986553192138672,grad_norm: 0.7822285979901877, iteration: 427100
loss: 1.0302777290344238,grad_norm: 0.6246665754112196, iteration: 427101
loss: 0.9801881909370422,grad_norm: 0.6874771985086385, iteration: 427102
loss: 0.9738029837608337,grad_norm: 0.7213966482651051, iteration: 427103
loss: 1.0049327611923218,grad_norm: 0.7143464531362609, iteration: 427104
loss: 1.000167727470398,grad_norm: 0.9690195557149996, iteration: 427105
loss: 0.9852738976478577,grad_norm: 0.7435105643835085, iteration: 427106
loss: 0.97629314661026,grad_norm: 0.7607981953127809, iteration: 427107
loss: 1.0052343606948853,grad_norm: 0.8997465241034874, iteration: 427108
loss: 0.9988951086997986,grad_norm: 0.9013196601554806, iteration: 427109
loss: 1.0366482734680176,grad_norm: 0.90652840639215, iteration: 427110
loss: 1.0246467590332031,grad_norm: 0.7005178721455271, iteration: 427111
loss: 1.035603642463684,grad_norm: 0.8081583629252456, iteration: 427112
loss: 0.9961757063865662,grad_norm: 0.7025107651764381, iteration: 427113
loss: 1.0282866954803467,grad_norm: 0.9999994905698619, iteration: 427114
loss: 0.9994111061096191,grad_norm: 0.848088025887488, iteration: 427115
loss: 1.014319896697998,grad_norm: 0.8312031051682951, iteration: 427116
loss: 1.0650229454040527,grad_norm: 0.9999998029980053, iteration: 427117
loss: 0.9853725433349609,grad_norm: 0.809854106777647, iteration: 427118
loss: 1.0119264125823975,grad_norm: 0.9999996171429923, iteration: 427119
loss: 1.0235737562179565,grad_norm: 0.999999533255178, iteration: 427120
loss: 1.0034881830215454,grad_norm: 0.847976843143261, iteration: 427121
loss: 1.0046632289886475,grad_norm: 0.8771633380528795, iteration: 427122
loss: 0.9835392832756042,grad_norm: 0.868201932848538, iteration: 427123
loss: 0.9609875082969666,grad_norm: 0.9999999129602761, iteration: 427124
loss: 1.0150166749954224,grad_norm: 0.9784483267692087, iteration: 427125
loss: 1.0200583934783936,grad_norm: 0.8857517611881986, iteration: 427126
loss: 0.9537054300308228,grad_norm: 0.8062700628494093, iteration: 427127
loss: 1.011211633682251,grad_norm: 0.6432448407689566, iteration: 427128
loss: 1.0204354524612427,grad_norm: 0.8213654647290914, iteration: 427129
loss: 0.9976015090942383,grad_norm: 0.8866874613508654, iteration: 427130
loss: 1.0347263813018799,grad_norm: 0.8389567514974434, iteration: 427131
loss: 1.035008192062378,grad_norm: 0.6799261306527903, iteration: 427132
loss: 0.9943140149116516,grad_norm: 0.8240805550278529, iteration: 427133
loss: 1.030474066734314,grad_norm: 0.7852650910814596, iteration: 427134
loss: 1.0304816961288452,grad_norm: 0.9391468796819893, iteration: 427135
loss: 1.0071706771850586,grad_norm: 0.7317266967316692, iteration: 427136
loss: 1.005206823348999,grad_norm: 0.8826798300724281, iteration: 427137
loss: 0.9794076681137085,grad_norm: 0.6667377300206326, iteration: 427138
loss: 0.9884944558143616,grad_norm: 0.7139663516069101, iteration: 427139
loss: 1.0292764902114868,grad_norm: 0.7543203012331999, iteration: 427140
loss: 1.034152865409851,grad_norm: 0.9999999166888991, iteration: 427141
loss: 1.0497313737869263,grad_norm: 0.6564831590419266, iteration: 427142
loss: 1.052427053451538,grad_norm: 0.9999995997485615, iteration: 427143
loss: 1.0021024942398071,grad_norm: 0.7720818778152102, iteration: 427144
loss: 0.9929350018501282,grad_norm: 0.7597375328109022, iteration: 427145
loss: 0.9866390228271484,grad_norm: 0.9859258154045368, iteration: 427146
loss: 0.9876704812049866,grad_norm: 0.9999991150975588, iteration: 427147
loss: 0.9965405464172363,grad_norm: 0.8054432722901701, iteration: 427148
loss: 1.0162689685821533,grad_norm: 0.6747998928195387, iteration: 427149
loss: 0.9914649128913879,grad_norm: 0.834820961729799, iteration: 427150
loss: 1.0405864715576172,grad_norm: 0.646231883511496, iteration: 427151
loss: 0.981863260269165,grad_norm: 0.8619311095220268, iteration: 427152
loss: 0.9720799326896667,grad_norm: 0.9999994220934478, iteration: 427153
loss: 1.0148828029632568,grad_norm: 0.7179758822052957, iteration: 427154
loss: 1.0338612794876099,grad_norm: 0.8983200001335692, iteration: 427155
loss: 0.9846364259719849,grad_norm: 0.7212385218008536, iteration: 427156
loss: 1.0318467617034912,grad_norm: 0.672637135486408, iteration: 427157
loss: 0.9997204542160034,grad_norm: 0.9286351381512912, iteration: 427158
loss: 1.0063375234603882,grad_norm: 0.7527175625637451, iteration: 427159
loss: 0.9985361099243164,grad_norm: 0.7340328063835994, iteration: 427160
loss: 0.9860994815826416,grad_norm: 0.7840068805688537, iteration: 427161
loss: 1.0261280536651611,grad_norm: 0.8258410510422478, iteration: 427162
loss: 0.9740185737609863,grad_norm: 0.8294451601675513, iteration: 427163
loss: 1.0152188539505005,grad_norm: 0.7042061797019676, iteration: 427164
loss: 0.9805017709732056,grad_norm: 0.697431291583867, iteration: 427165
loss: 1.0030235052108765,grad_norm: 0.8689361627291647, iteration: 427166
loss: 1.027688980102539,grad_norm: 0.9999991952785466, iteration: 427167
loss: 0.9915904998779297,grad_norm: 0.7705465142563388, iteration: 427168
loss: 1.112529993057251,grad_norm: 0.9999994578067406, iteration: 427169
loss: 1.01643967628479,grad_norm: 0.8322099209888449, iteration: 427170
loss: 1.0704070329666138,grad_norm: 0.9999990473165619, iteration: 427171
loss: 0.9898685216903687,grad_norm: 0.8675571404941875, iteration: 427172
loss: 1.0160117149353027,grad_norm: 0.9591419609385183, iteration: 427173
loss: 0.9923213720321655,grad_norm: 0.8516897846844563, iteration: 427174
loss: 1.0269572734832764,grad_norm: 0.9999995246047534, iteration: 427175
loss: 0.9730380177497864,grad_norm: 0.8372819458898026, iteration: 427176
loss: 1.0888123512268066,grad_norm: 0.999999827465784, iteration: 427177
loss: 1.027563214302063,grad_norm: 0.9999991944450265, iteration: 427178
loss: 1.0552208423614502,grad_norm: 0.7279944013897078, iteration: 427179
loss: 0.9882343411445618,grad_norm: 0.9593016232738729, iteration: 427180
loss: 0.9933177828788757,grad_norm: 0.7447530826936052, iteration: 427181
loss: 1.0015517473220825,grad_norm: 0.7277325861996146, iteration: 427182
loss: 1.012084722518921,grad_norm: 0.9999998099225358, iteration: 427183
loss: 1.016269326210022,grad_norm: 0.6882626628892992, iteration: 427184
loss: 0.9993826746940613,grad_norm: 0.8076866962657696, iteration: 427185
loss: 1.040912389755249,grad_norm: 0.7645314136254276, iteration: 427186
loss: 1.0576002597808838,grad_norm: 0.900053938036718, iteration: 427187
loss: 1.0320639610290527,grad_norm: 0.9999995848916722, iteration: 427188
loss: 1.0173959732055664,grad_norm: 0.98386777691976, iteration: 427189
loss: 1.1014634370803833,grad_norm: 0.9999995365577719, iteration: 427190
loss: 0.9831134080886841,grad_norm: 0.8382082010450932, iteration: 427191
loss: 1.0361568927764893,grad_norm: 0.8406050379632506, iteration: 427192
loss: 1.007033348083496,grad_norm: 0.7772800455766856, iteration: 427193
loss: 1.0281695127487183,grad_norm: 0.7797709591090237, iteration: 427194
loss: 1.0116101503372192,grad_norm: 0.8349468057135053, iteration: 427195
loss: 1.0256844758987427,grad_norm: 0.7268058404432488, iteration: 427196
loss: 0.9729546308517456,grad_norm: 0.744602879831331, iteration: 427197
loss: 1.1352554559707642,grad_norm: 0.958027699332883, iteration: 427198
loss: 0.9980359673500061,grad_norm: 0.7442335148211625, iteration: 427199
loss: 0.9846537709236145,grad_norm: 0.7166700845427979, iteration: 427200
loss: 0.992875874042511,grad_norm: 0.7649459899605561, iteration: 427201
loss: 1.0223015546798706,grad_norm: 0.9745212229188526, iteration: 427202
loss: 1.0242427587509155,grad_norm: 0.8493708473073003, iteration: 427203
loss: 0.9909433126449585,grad_norm: 0.7193135081383314, iteration: 427204
loss: 1.0174108743667603,grad_norm: 0.8708031590522083, iteration: 427205
loss: 0.9846925735473633,grad_norm: 0.9999990962484967, iteration: 427206
loss: 1.0058262348175049,grad_norm: 0.7140928503161306, iteration: 427207
loss: 1.0357666015625,grad_norm: 0.9442131467670541, iteration: 427208
loss: 0.9662678837776184,grad_norm: 0.643462240896753, iteration: 427209
loss: 1.01978600025177,grad_norm: 0.9999993890078085, iteration: 427210
loss: 0.9550342559814453,grad_norm: 0.7953992340137699, iteration: 427211
loss: 1.0107579231262207,grad_norm: 0.8151093710039693, iteration: 427212
loss: 1.0807483196258545,grad_norm: 0.9999994214788431, iteration: 427213
loss: 1.0007706880569458,grad_norm: 0.8627383363836375, iteration: 427214
loss: 0.9903262853622437,grad_norm: 0.8488104207376184, iteration: 427215
loss: 0.9944813847541809,grad_norm: 0.8774222295071428, iteration: 427216
loss: 0.988611102104187,grad_norm: 0.7115928290082539, iteration: 427217
loss: 1.0301129817962646,grad_norm: 0.6311075810769121, iteration: 427218
loss: 1.00745689868927,grad_norm: 0.7507279950208212, iteration: 427219
loss: 0.9911401271820068,grad_norm: 0.7695269672233138, iteration: 427220
loss: 0.9759393334388733,grad_norm: 0.9125724145307357, iteration: 427221
loss: 0.9885738492012024,grad_norm: 0.9864743505592158, iteration: 427222
loss: 1.0159317255020142,grad_norm: 0.9999991915732102, iteration: 427223
loss: 0.9926023483276367,grad_norm: 0.8654467769841385, iteration: 427224
loss: 1.0658154487609863,grad_norm: 0.9999998679874569, iteration: 427225
loss: 1.0025231838226318,grad_norm: 0.7867913692098418, iteration: 427226
loss: 0.9956178069114685,grad_norm: 0.9572593468145626, iteration: 427227
loss: 0.9906954169273376,grad_norm: 0.7103681809717177, iteration: 427228
loss: 1.0211957693099976,grad_norm: 0.6159625864539751, iteration: 427229
loss: 1.0594780445098877,grad_norm: 0.9999991894442004, iteration: 427230
loss: 0.984306275844574,grad_norm: 0.665522724000797, iteration: 427231
loss: 0.9971894025802612,grad_norm: 0.7960204626007705, iteration: 427232
loss: 1.0192633867263794,grad_norm: 0.9886220788482944, iteration: 427233
loss: 1.126247525215149,grad_norm: 0.9999997035848323, iteration: 427234
loss: 1.0018349885940552,grad_norm: 0.9447446059926031, iteration: 427235
loss: 1.0343817472457886,grad_norm: 0.7781479474171664, iteration: 427236
loss: 1.017701506614685,grad_norm: 0.710413146751774, iteration: 427237
loss: 1.0504122972488403,grad_norm: 0.9999990043496774, iteration: 427238
loss: 1.0547550916671753,grad_norm: 0.9999992211981517, iteration: 427239
loss: 1.0563973188400269,grad_norm: 0.9999995046240963, iteration: 427240
loss: 1.0243473052978516,grad_norm: 0.9999992107691452, iteration: 427241
loss: 1.0172555446624756,grad_norm: 0.8477349954533179, iteration: 427242
loss: 1.0010641813278198,grad_norm: 0.7508754488039013, iteration: 427243
loss: 1.0226198434829712,grad_norm: 0.7704417569218833, iteration: 427244
loss: 0.981913685798645,grad_norm: 0.8051076516515975, iteration: 427245
loss: 1.077727198600769,grad_norm: 0.9647024925560372, iteration: 427246
loss: 1.0005370378494263,grad_norm: 0.9999993363665775, iteration: 427247
loss: 1.1952937841415405,grad_norm: 0.9999994024466328, iteration: 427248
loss: 1.0128753185272217,grad_norm: 0.7740438029342906, iteration: 427249
loss: 0.9658620357513428,grad_norm: 0.7664303819320641, iteration: 427250
loss: 0.9853817224502563,grad_norm: 0.7515119282480217, iteration: 427251
loss: 0.9935091733932495,grad_norm: 0.825088106997026, iteration: 427252
loss: 1.0177682638168335,grad_norm: 0.7679425850542105, iteration: 427253
loss: 0.9798070788383484,grad_norm: 0.7621268964319406, iteration: 427254
loss: 0.9611018300056458,grad_norm: 0.9999991708229286, iteration: 427255
loss: 1.0433834791183472,grad_norm: 0.8504694596066095, iteration: 427256
loss: 0.9686798453330994,grad_norm: 0.8311429902675759, iteration: 427257
loss: 0.970815122127533,grad_norm: 0.7070679875139657, iteration: 427258
loss: 0.9597026109695435,grad_norm: 0.7696362737666143, iteration: 427259
loss: 1.0083545446395874,grad_norm: 0.9999996247001016, iteration: 427260
loss: 0.9839694499969482,grad_norm: 0.8662359444312218, iteration: 427261
loss: 0.9914020895957947,grad_norm: 0.8162641362612011, iteration: 427262
loss: 1.0347589254379272,grad_norm: 0.8566059917072355, iteration: 427263
loss: 0.9798451662063599,grad_norm: 0.8145587941743831, iteration: 427264
loss: 1.0273205041885376,grad_norm: 0.8197391857515272, iteration: 427265
loss: 0.9885122776031494,grad_norm: 0.7539335286465298, iteration: 427266
loss: 0.9874122142791748,grad_norm: 0.7378734300597068, iteration: 427267
loss: 1.084346890449524,grad_norm: 0.9147756577755545, iteration: 427268
loss: 1.149845838546753,grad_norm: 0.9999998972849875, iteration: 427269
loss: 0.9851526618003845,grad_norm: 0.8172335034871417, iteration: 427270
loss: 1.0107736587524414,grad_norm: 0.7753015913451693, iteration: 427271
loss: 0.9892747402191162,grad_norm: 0.7164579278863891, iteration: 427272
loss: 0.994429886341095,grad_norm: 0.8172389481445416, iteration: 427273
loss: 1.0046842098236084,grad_norm: 0.7768542750885444, iteration: 427274
loss: 0.9992852807044983,grad_norm: 0.690092733609397, iteration: 427275
loss: 1.0055135488510132,grad_norm: 0.7404404089293677, iteration: 427276
loss: 1.0021387338638306,grad_norm: 0.7468602156058062, iteration: 427277
loss: 0.9997866749763489,grad_norm: 0.6960467510864962, iteration: 427278
loss: 1.0334680080413818,grad_norm: 0.7281782154874679, iteration: 427279
loss: 1.0260803699493408,grad_norm: 0.828208722485947, iteration: 427280
loss: 1.045859456062317,grad_norm: 0.8463903480000404, iteration: 427281
loss: 1.0369240045547485,grad_norm: 0.9773740628341471, iteration: 427282
loss: 0.9750336408615112,grad_norm: 0.6626046018419601, iteration: 427283
loss: 0.9909438490867615,grad_norm: 0.8186414012729122, iteration: 427284
loss: 0.9868186712265015,grad_norm: 0.830592690344682, iteration: 427285
loss: 0.9903799295425415,grad_norm: 0.9385511905327784, iteration: 427286
loss: 1.021554708480835,grad_norm: 0.9999991199856313, iteration: 427287
loss: 1.021446943283081,grad_norm: 0.9195336374151785, iteration: 427288
loss: 1.0328171253204346,grad_norm: 0.7345074350427916, iteration: 427289
loss: 0.9884564876556396,grad_norm: 0.7779863652714017, iteration: 427290
loss: 1.0152415037155151,grad_norm: 0.6792804753641299, iteration: 427291
loss: 1.0335949659347534,grad_norm: 0.8105498266517628, iteration: 427292
loss: 1.0172396898269653,grad_norm: 0.7614188596580378, iteration: 427293
loss: 1.0303142070770264,grad_norm: 0.7153807033923159, iteration: 427294
loss: 1.0288679599761963,grad_norm: 0.820291803445972, iteration: 427295
loss: 0.9680903553962708,grad_norm: 0.9850661410769675, iteration: 427296
loss: 1.0814988613128662,grad_norm: 0.789611873779812, iteration: 427297
loss: 1.0094691514968872,grad_norm: 0.9999991967039477, iteration: 427298
loss: 1.0230761766433716,grad_norm: 0.7935706000956585, iteration: 427299
loss: 0.9489256143569946,grad_norm: 0.7828185655119716, iteration: 427300
loss: 0.972804605960846,grad_norm: 0.7188901385750445, iteration: 427301
loss: 1.0294948816299438,grad_norm: 0.8833719415563838, iteration: 427302
loss: 1.0095146894454956,grad_norm: 0.9999998336362754, iteration: 427303
loss: 1.0100632905960083,grad_norm: 0.7093696539024972, iteration: 427304
loss: 1.0115591287612915,grad_norm: 0.9044931587524118, iteration: 427305
loss: 0.9888486862182617,grad_norm: 0.8616093500623832, iteration: 427306
loss: 1.0355219841003418,grad_norm: 0.8987786362906852, iteration: 427307
loss: 1.0119764804840088,grad_norm: 0.9999990264895664, iteration: 427308
loss: 0.9984021782875061,grad_norm: 0.6845361165071018, iteration: 427309
loss: 0.9821770787239075,grad_norm: 0.8059396416091077, iteration: 427310
loss: 1.0204856395721436,grad_norm: 0.7759629247104317, iteration: 427311
loss: 1.0327203273773193,grad_norm: 0.7562046555059271, iteration: 427312
loss: 0.9888664484024048,grad_norm: 0.874845629567737, iteration: 427313
loss: 1.0204930305480957,grad_norm: 0.803935177842009, iteration: 427314
loss: 0.9813944697380066,grad_norm: 0.9033574811569675, iteration: 427315
loss: 0.9930391907691956,grad_norm: 0.7715432952048655, iteration: 427316
loss: 0.9789671301841736,grad_norm: 0.728650929835576, iteration: 427317
loss: 0.9835322499275208,grad_norm: 0.9058609500676229, iteration: 427318
loss: 0.9900140166282654,grad_norm: 0.8538222058402384, iteration: 427319
loss: 0.973278820514679,grad_norm: 0.7051700928580665, iteration: 427320
loss: 1.059729814529419,grad_norm: 0.9999991880985714, iteration: 427321
loss: 0.9566174149513245,grad_norm: 0.7595148829205033, iteration: 427322
loss: 1.0021684169769287,grad_norm: 0.7058890672797217, iteration: 427323
loss: 1.0377893447875977,grad_norm: 0.7209971902575163, iteration: 427324
loss: 0.9929011464118958,grad_norm: 0.6724220741158482, iteration: 427325
loss: 0.9534710049629211,grad_norm: 0.896007649622283, iteration: 427326
loss: 1.0251134634017944,grad_norm: 0.6686967290673966, iteration: 427327
loss: 1.0765013694763184,grad_norm: 0.9999993180517928, iteration: 427328
loss: 1.0618407726287842,grad_norm: 0.9999994776378299, iteration: 427329
loss: 1.024418592453003,grad_norm: 0.8619125000603619, iteration: 427330
loss: 1.0088762044906616,grad_norm: 0.854040521018256, iteration: 427331
loss: 1.1151795387268066,grad_norm: 0.9999994771582167, iteration: 427332
loss: 1.010809063911438,grad_norm: 0.7928328875637476, iteration: 427333
loss: 0.9748770594596863,grad_norm: 0.7577843670588573, iteration: 427334
loss: 1.0040045976638794,grad_norm: 0.9822176810825395, iteration: 427335
loss: 0.9854318499565125,grad_norm: 0.681595738542541, iteration: 427336
loss: 1.0664137601852417,grad_norm: 0.8916244522352549, iteration: 427337
loss: 0.9811420440673828,grad_norm: 0.8275691520382873, iteration: 427338
loss: 0.9894317984580994,grad_norm: 0.8067597037626402, iteration: 427339
loss: 0.9982457756996155,grad_norm: 0.7313027131204417, iteration: 427340
loss: 0.979936420917511,grad_norm: 0.763787653093851, iteration: 427341
loss: 1.0038552284240723,grad_norm: 0.767871560214676, iteration: 427342
loss: 0.9974896907806396,grad_norm: 0.7907288658602984, iteration: 427343
loss: 0.9838650822639465,grad_norm: 0.6574390859229727, iteration: 427344
loss: 0.9886006116867065,grad_norm: 0.7267014044753939, iteration: 427345
loss: 1.0296028852462769,grad_norm: 0.9119475713944766, iteration: 427346
loss: 0.9668736457824707,grad_norm: 0.7795746420151637, iteration: 427347
loss: 1.0200716257095337,grad_norm: 0.6789410563848008, iteration: 427348
loss: 0.9636728167533875,grad_norm: 0.9999991409366302, iteration: 427349
loss: 0.9963897466659546,grad_norm: 0.9999992164838405, iteration: 427350
loss: 1.018602967262268,grad_norm: 0.8508425826640269, iteration: 427351
loss: 1.0089362859725952,grad_norm: 0.6458093583016193, iteration: 427352
loss: 0.9723649024963379,grad_norm: 0.9310447266904591, iteration: 427353
loss: 1.006091833114624,grad_norm: 0.8708752228306882, iteration: 427354
loss: 0.9776293039321899,grad_norm: 0.9688268373190864, iteration: 427355
loss: 1.0961012840270996,grad_norm: 0.8253750485376001, iteration: 427356
loss: 0.9958533048629761,grad_norm: 0.8494181994082178, iteration: 427357
loss: 1.0097346305847168,grad_norm: 0.7961636254693656, iteration: 427358
loss: 1.0309473276138306,grad_norm: 0.9999989897279132, iteration: 427359
loss: 1.0412123203277588,grad_norm: 0.7826904067509441, iteration: 427360
loss: 1.009006142616272,grad_norm: 0.7221750910769412, iteration: 427361
loss: 0.9885224103927612,grad_norm: 0.7199029531005059, iteration: 427362
loss: 1.0000070333480835,grad_norm: 0.7511921954267285, iteration: 427363
loss: 0.9876089692115784,grad_norm: 0.7111008180775304, iteration: 427364
loss: 1.0202932357788086,grad_norm: 0.8835675006665639, iteration: 427365
loss: 0.9762080311775208,grad_norm: 0.6542673715926945, iteration: 427366
loss: 0.9815162420272827,grad_norm: 0.7571942754533574, iteration: 427367
loss: 0.996293842792511,grad_norm: 0.8559388016420667, iteration: 427368
loss: 0.979997456073761,grad_norm: 0.9999990549148962, iteration: 427369
loss: 1.0432608127593994,grad_norm: 0.7784830854640586, iteration: 427370
loss: 1.0204312801361084,grad_norm: 0.793000567661575, iteration: 427371
loss: 1.0041157007217407,grad_norm: 0.8449616055301876, iteration: 427372
loss: 0.9972953796386719,grad_norm: 0.7549309329773701, iteration: 427373
loss: 0.9844700694084167,grad_norm: 0.8828729197420141, iteration: 427374
loss: 1.009827733039856,grad_norm: 0.999999259866474, iteration: 427375
loss: 1.0246803760528564,grad_norm: 0.7526392240462094, iteration: 427376
loss: 0.9853981137275696,grad_norm: 0.7767388449918333, iteration: 427377
loss: 0.9600111246109009,grad_norm: 0.9999990579040111, iteration: 427378
loss: 0.964845597743988,grad_norm: 0.7399105637007992, iteration: 427379
loss: 0.9785299301147461,grad_norm: 0.7224949935080679, iteration: 427380
loss: 1.0038695335388184,grad_norm: 0.8174197721760506, iteration: 427381
loss: 1.0231002569198608,grad_norm: 0.7851117470653447, iteration: 427382
loss: 1.0035055875778198,grad_norm: 0.9031854083211885, iteration: 427383
loss: 1.226749062538147,grad_norm: 0.7900097335931805, iteration: 427384
loss: 1.0099163055419922,grad_norm: 0.7085984301354814, iteration: 427385
loss: 0.9820035696029663,grad_norm: 0.7119794745373618, iteration: 427386
loss: 0.9914103746414185,grad_norm: 0.7901275846492805, iteration: 427387
loss: 0.9673799276351929,grad_norm: 0.8308430535125765, iteration: 427388
loss: 0.9931605458259583,grad_norm: 0.7394046179332395, iteration: 427389
loss: 1.0077450275421143,grad_norm: 0.5710055202476148, iteration: 427390
loss: 1.0120052099227905,grad_norm: 0.7202625678759668, iteration: 427391
loss: 1.0080205202102661,grad_norm: 0.7971264032971984, iteration: 427392
loss: 1.0438400506973267,grad_norm: 0.8528024541785749, iteration: 427393
loss: 1.0283273458480835,grad_norm: 0.8961510100127994, iteration: 427394
loss: 1.0012563467025757,grad_norm: 0.7181864249206917, iteration: 427395
loss: 1.0165810585021973,grad_norm: 0.9013794235791142, iteration: 427396
loss: 0.9690890312194824,grad_norm: 0.7810132137600537, iteration: 427397
loss: 0.9483957886695862,grad_norm: 0.6037522432669382, iteration: 427398
loss: 1.0113015174865723,grad_norm: 0.9159888529891557, iteration: 427399
loss: 1.039770245552063,grad_norm: 0.8849465878664395, iteration: 427400
loss: 1.0053225755691528,grad_norm: 0.8413446610105639, iteration: 427401
loss: 0.9844819903373718,grad_norm: 0.6272961285474115, iteration: 427402
loss: 0.9969596266746521,grad_norm: 0.7773031429981823, iteration: 427403
loss: 0.9950889945030212,grad_norm: 0.7544665646964912, iteration: 427404
loss: 1.000730276107788,grad_norm: 0.8640123127707313, iteration: 427405
loss: 0.9974396228790283,grad_norm: 0.6698302784212294, iteration: 427406
loss: 1.0141321420669556,grad_norm: 0.7529492221797922, iteration: 427407
loss: 1.0048705339431763,grad_norm: 0.8942532908483417, iteration: 427408
loss: 0.9763347506523132,grad_norm: 0.6947394153530899, iteration: 427409
loss: 1.0056642293930054,grad_norm: 0.9074073661566774, iteration: 427410
loss: 0.9752436280250549,grad_norm: 0.7162011431263237, iteration: 427411
loss: 1.002305030822754,grad_norm: 0.7036111189138233, iteration: 427412
loss: 1.0547082424163818,grad_norm: 0.9999994329924097, iteration: 427413
loss: 1.0322701930999756,grad_norm: 0.896155449204017, iteration: 427414
loss: 1.039570689201355,grad_norm: 0.9999997450317526, iteration: 427415
loss: 1.020581603050232,grad_norm: 0.6768520909992909, iteration: 427416
loss: 1.0340994596481323,grad_norm: 0.8396189180303882, iteration: 427417
loss: 0.9738724827766418,grad_norm: 0.7108174096710105, iteration: 427418
loss: 0.9555536508560181,grad_norm: 0.693378849824252, iteration: 427419
loss: 1.0298256874084473,grad_norm: 0.7752894547928164, iteration: 427420
loss: 1.016645908355713,grad_norm: 0.7006745545109617, iteration: 427421
loss: 0.9845304489135742,grad_norm: 0.7699261559793181, iteration: 427422
loss: 0.9383397102355957,grad_norm: 0.8179815023245846, iteration: 427423
loss: 0.9773063659667969,grad_norm: 0.7778644820930584, iteration: 427424
loss: 0.9936578869819641,grad_norm: 0.8838459744900057, iteration: 427425
loss: 1.0298165082931519,grad_norm: 0.6683183000185748, iteration: 427426
loss: 0.989946722984314,grad_norm: 0.7838322133390118, iteration: 427427
loss: 0.9854845404624939,grad_norm: 0.9613543139650919, iteration: 427428
loss: 1.0040842294692993,grad_norm: 0.7804753322123592, iteration: 427429
loss: 1.0022321939468384,grad_norm: 0.999999027114582, iteration: 427430
loss: 0.9802576899528503,grad_norm: 0.7694596319321246, iteration: 427431
loss: 1.0112056732177734,grad_norm: 0.6759846065273075, iteration: 427432
loss: 1.048754334449768,grad_norm: 0.8904326554169797, iteration: 427433
loss: 0.9893596768379211,grad_norm: 0.8229552558383767, iteration: 427434
loss: 1.0357478857040405,grad_norm: 0.7681821055590039, iteration: 427435
loss: 0.9615475535392761,grad_norm: 0.7442373065078183, iteration: 427436
loss: 1.0173720121383667,grad_norm: 0.8587672322307222, iteration: 427437
loss: 1.0085639953613281,grad_norm: 0.667186217000901, iteration: 427438
loss: 0.952853262424469,grad_norm: 0.9999995981385466, iteration: 427439
loss: 1.0128028392791748,grad_norm: 0.999999145339902, iteration: 427440
loss: 1.0819474458694458,grad_norm: 0.9999992353020528, iteration: 427441
loss: 1.0038982629776,grad_norm: 0.6908900689175786, iteration: 427442
loss: 0.9990625977516174,grad_norm: 0.8078965605352902, iteration: 427443
loss: 1.1177321672439575,grad_norm: 0.9999991117783421, iteration: 427444
loss: 0.9478201270103455,grad_norm: 0.8288478277474078, iteration: 427445
loss: 0.9866222739219666,grad_norm: 0.7845673703861901, iteration: 427446
loss: 0.9712807536125183,grad_norm: 0.7402140353451924, iteration: 427447
loss: 1.0166808366775513,grad_norm: 0.7383728724679438, iteration: 427448
loss: 0.9996561408042908,grad_norm: 0.8525230929489557, iteration: 427449
loss: 0.9857637882232666,grad_norm: 0.5917237527865498, iteration: 427450
loss: 1.0258914232254028,grad_norm: 0.886505525005751, iteration: 427451
loss: 1.035469651222229,grad_norm: 0.9386976796670745, iteration: 427452
loss: 0.9782931208610535,grad_norm: 0.6965783490125375, iteration: 427453
loss: 1.0222868919372559,grad_norm: 0.7826720246471308, iteration: 427454
loss: 0.9983245134353638,grad_norm: 0.7580961221222821, iteration: 427455
loss: 1.0568314790725708,grad_norm: 0.8472093489455366, iteration: 427456
loss: 0.9761197566986084,grad_norm: 0.7041415802330291, iteration: 427457
loss: 1.0128496885299683,grad_norm: 0.9999991501760804, iteration: 427458
loss: 1.0248380899429321,grad_norm: 0.6679441151419802, iteration: 427459
loss: 0.962249219417572,grad_norm: 0.751610653043627, iteration: 427460
loss: 1.0311726331710815,grad_norm: 0.7854854143654276, iteration: 427461
loss: 1.0215235948562622,grad_norm: 0.8079988631966921, iteration: 427462
loss: 1.0090996026992798,grad_norm: 0.7123292084380555, iteration: 427463
loss: 1.0142035484313965,grad_norm: 0.7390012452146638, iteration: 427464
loss: 1.0263657569885254,grad_norm: 0.8471127784692546, iteration: 427465
loss: 0.9816223978996277,grad_norm: 0.8140746899098446, iteration: 427466
loss: 0.9948627948760986,grad_norm: 0.999999976225717, iteration: 427467
loss: 0.9932814836502075,grad_norm: 0.8470850243594967, iteration: 427468
loss: 1.015217661857605,grad_norm: 0.886106573238506, iteration: 427469
loss: 0.990693986415863,grad_norm: 0.8541292102132216, iteration: 427470
loss: 0.9743978381156921,grad_norm: 0.6491420342381544, iteration: 427471
loss: 1.0012582540512085,grad_norm: 0.7750634299459643, iteration: 427472
loss: 0.9875887036323547,grad_norm: 0.6995333740939806, iteration: 427473
loss: 1.0017136335372925,grad_norm: 0.6221240231431358, iteration: 427474
loss: 1.084878921508789,grad_norm: 0.9999990356823263, iteration: 427475
loss: 0.9801220893859863,grad_norm: 0.7980877178662218, iteration: 427476
loss: 0.975609540939331,grad_norm: 0.8246003634239385, iteration: 427477
loss: 1.0010510683059692,grad_norm: 0.7630195844414833, iteration: 427478
loss: 0.986044704914093,grad_norm: 0.7598467168986046, iteration: 427479
loss: 1.0181825160980225,grad_norm: 0.7155447682491844, iteration: 427480
loss: 1.0050675868988037,grad_norm: 0.7440875478223816, iteration: 427481
loss: 1.0007336139678955,grad_norm: 0.6862308961756899, iteration: 427482
loss: 0.9868599772453308,grad_norm: 0.7922518176694272, iteration: 427483
loss: 1.051919937133789,grad_norm: 0.9999998310062607, iteration: 427484
loss: 1.0374552011489868,grad_norm: 0.7835079668061057, iteration: 427485
loss: 1.0392999649047852,grad_norm: 0.757062790945999, iteration: 427486
loss: 0.9913380146026611,grad_norm: 0.6874929611971148, iteration: 427487
loss: 1.0223827362060547,grad_norm: 0.9999991612650098, iteration: 427488
loss: 1.0449192523956299,grad_norm: 0.8915604222505134, iteration: 427489
loss: 0.9838271141052246,grad_norm: 0.6757882942560008, iteration: 427490
loss: 0.9791532754898071,grad_norm: 0.8022792253259888, iteration: 427491
loss: 0.977658212184906,grad_norm: 0.8153664660288432, iteration: 427492
loss: 1.0074676275253296,grad_norm: 0.8698880432786144, iteration: 427493
loss: 1.0118722915649414,grad_norm: 0.8615703659150038, iteration: 427494
loss: 0.973612904548645,grad_norm: 0.7453109631194751, iteration: 427495
loss: 1.0053592920303345,grad_norm: 0.8543553247500991, iteration: 427496
loss: 0.9791710376739502,grad_norm: 0.7994014771849663, iteration: 427497
loss: 1.0264562368392944,grad_norm: 0.6671131626477901, iteration: 427498
loss: 1.0179612636566162,grad_norm: 0.671685114582306, iteration: 427499
loss: 1.0050479173660278,grad_norm: 0.7294683435313009, iteration: 427500
loss: 1.013373613357544,grad_norm: 0.7905888082947846, iteration: 427501
loss: 1.0504488945007324,grad_norm: 0.9999992770374685, iteration: 427502
loss: 0.961096465587616,grad_norm: 0.7162040697961466, iteration: 427503
loss: 1.0160109996795654,grad_norm: 0.8234132009866273, iteration: 427504
loss: 0.9873475432395935,grad_norm: 0.9999992488594418, iteration: 427505
loss: 1.03289794921875,grad_norm: 0.7697424817642708, iteration: 427506
loss: 1.0069360733032227,grad_norm: 0.7565075932535633, iteration: 427507
loss: 1.012441873550415,grad_norm: 0.720540046504418, iteration: 427508
loss: 1.0410720109939575,grad_norm: 0.999999169970092, iteration: 427509
loss: 1.003277063369751,grad_norm: 0.6440946754294228, iteration: 427510
loss: 1.0544739961624146,grad_norm: 0.7708702307967519, iteration: 427511
loss: 1.0588563680648804,grad_norm: 0.9823939526266873, iteration: 427512
loss: 0.9648550748825073,grad_norm: 0.7251487898000756, iteration: 427513
loss: 0.9741058349609375,grad_norm: 0.8418866140973379, iteration: 427514
loss: 0.995461642742157,grad_norm: 0.637626828632975, iteration: 427515
loss: 0.9815331697463989,grad_norm: 0.731282334862544, iteration: 427516
loss: 1.0009769201278687,grad_norm: 0.5968823024207321, iteration: 427517
loss: 0.9861705899238586,grad_norm: 0.7471577919435577, iteration: 427518
loss: 1.0880579948425293,grad_norm: 0.8657534097823207, iteration: 427519
loss: 0.9920253157615662,grad_norm: 0.6792492057958042, iteration: 427520
loss: 1.0075544118881226,grad_norm: 0.9042839783426436, iteration: 427521
loss: 0.9925717115402222,grad_norm: 0.6667255998119919, iteration: 427522
loss: 1.0118656158447266,grad_norm: 0.9110440347710714, iteration: 427523
loss: 1.004675269126892,grad_norm: 0.9999991639871301, iteration: 427524
loss: 0.9900684356689453,grad_norm: 0.8916091765565269, iteration: 427525
loss: 0.9924502968788147,grad_norm: 0.910262898701914, iteration: 427526
loss: 1.088741660118103,grad_norm: 0.8071161926706049, iteration: 427527
loss: 1.0185025930404663,grad_norm: 0.8409428511450839, iteration: 427528
loss: 1.0084339380264282,grad_norm: 0.9076280184750093, iteration: 427529
loss: 1.02877938747406,grad_norm: 0.6915256309612517, iteration: 427530
loss: 0.9939780235290527,grad_norm: 0.8500608227141759, iteration: 427531
loss: 1.0177624225616455,grad_norm: 0.7628941696444996, iteration: 427532
loss: 1.009404182434082,grad_norm: 0.7058542413995424, iteration: 427533
loss: 0.9975541830062866,grad_norm: 0.6754593046370793, iteration: 427534
loss: 1.0567623376846313,grad_norm: 0.9264472096362657, iteration: 427535
loss: 1.0363531112670898,grad_norm: 0.932658106427254, iteration: 427536
loss: 0.9924761056900024,grad_norm: 0.77040413863099, iteration: 427537
loss: 1.0011522769927979,grad_norm: 0.7226448604519526, iteration: 427538
loss: 0.9891386032104492,grad_norm: 0.9999991088819792, iteration: 427539
loss: 1.03824782371521,grad_norm: 0.8844556165741579, iteration: 427540
loss: 0.9962817430496216,grad_norm: 0.6472348560462868, iteration: 427541
loss: 0.9716299176216125,grad_norm: 0.8993166552292192, iteration: 427542
loss: 1.0420583486557007,grad_norm: 0.8705591436456213, iteration: 427543
loss: 1.074373722076416,grad_norm: 0.7934593396582337, iteration: 427544
loss: 1.1220475435256958,grad_norm: 0.7940763902719822, iteration: 427545
loss: 0.992107093334198,grad_norm: 0.7374979342584383, iteration: 427546
loss: 0.9914659857749939,grad_norm: 0.8457622505268143, iteration: 427547
loss: 1.0366288423538208,grad_norm: 0.692449671905545, iteration: 427548
loss: 1.014890193939209,grad_norm: 0.708152496810839, iteration: 427549
loss: 1.0321722030639648,grad_norm: 0.9582217752872714, iteration: 427550
loss: 1.0341322422027588,grad_norm: 0.9940884560248534, iteration: 427551
loss: 0.9948130249977112,grad_norm: 0.7531030977097102, iteration: 427552
loss: 1.0052297115325928,grad_norm: 0.8282121864453394, iteration: 427553
loss: 1.047406792640686,grad_norm: 0.7710745075310064, iteration: 427554
loss: 1.0162253379821777,grad_norm: 0.6851404563742771, iteration: 427555
loss: 0.996349573135376,grad_norm: 0.8386908987011849, iteration: 427556
loss: 1.0767656564712524,grad_norm: 0.699231860321686, iteration: 427557
loss: 0.951076328754425,grad_norm: 0.7264915086062683, iteration: 427558
loss: 1.0418294668197632,grad_norm: 0.9999999349285268, iteration: 427559
loss: 1.0136399269104004,grad_norm: 0.7862764658436306, iteration: 427560
loss: 1.0355719327926636,grad_norm: 0.7145560990639146, iteration: 427561
loss: 1.011128306388855,grad_norm: 0.678083866365385, iteration: 427562
loss: 1.054051160812378,grad_norm: 0.6444708878409307, iteration: 427563
loss: 1.0186653137207031,grad_norm: 0.9999990933168017, iteration: 427564
loss: 1.0033785104751587,grad_norm: 0.8514392114221971, iteration: 427565
loss: 0.9929026365280151,grad_norm: 0.7177255560043097, iteration: 427566
loss: 0.9624322652816772,grad_norm: 0.833200463246611, iteration: 427567
loss: 0.9743589758872986,grad_norm: 0.6806459983432277, iteration: 427568
loss: 1.0168776512145996,grad_norm: 0.7053365188541746, iteration: 427569
loss: 1.008449673652649,grad_norm: 0.7485678507127087, iteration: 427570
loss: 0.9874994158744812,grad_norm: 0.8350024915773867, iteration: 427571
loss: 1.0555577278137207,grad_norm: 0.8431985209569692, iteration: 427572
loss: 0.9668020606040955,grad_norm: 0.6986237585816834, iteration: 427573
loss: 1.043177843093872,grad_norm: 0.7887875425441774, iteration: 427574
loss: 1.0209907293319702,grad_norm: 0.747964610532074, iteration: 427575
loss: 0.9667381644248962,grad_norm: 0.7740257436152722, iteration: 427576
loss: 0.99909907579422,grad_norm: 0.7493002720556163, iteration: 427577
loss: 0.9896800518035889,grad_norm: 0.8244362496176559, iteration: 427578
loss: 0.976285994052887,grad_norm: 0.7172897925033425, iteration: 427579
loss: 1.0289496183395386,grad_norm: 0.8658435846589693, iteration: 427580
loss: 0.9725426435470581,grad_norm: 0.8900761068641364, iteration: 427581
loss: 1.0061767101287842,grad_norm: 0.7861412333839758, iteration: 427582
loss: 0.9739917516708374,grad_norm: 0.7043716914164609, iteration: 427583
loss: 0.9526808857917786,grad_norm: 0.7570447326273757, iteration: 427584
loss: 1.0598820447921753,grad_norm: 0.999999160283291, iteration: 427585
loss: 1.0003563165664673,grad_norm: 0.7834821473003946, iteration: 427586
loss: 0.9880505800247192,grad_norm: 0.7190145681628487, iteration: 427587
loss: 1.0184437036514282,grad_norm: 0.8979731150306194, iteration: 427588
loss: 0.9824981689453125,grad_norm: 0.6666860838187512, iteration: 427589
loss: 1.0049524307250977,grad_norm: 0.8035890550320737, iteration: 427590
loss: 0.9789882898330688,grad_norm: 0.8054329760692482, iteration: 427591
loss: 1.0010840892791748,grad_norm: 0.7446602106835848, iteration: 427592
loss: 0.9936957359313965,grad_norm: 0.994767457730788, iteration: 427593
loss: 1.0089218616485596,grad_norm: 0.6691777862973702, iteration: 427594
loss: 1.0156807899475098,grad_norm: 0.6419628139689287, iteration: 427595
loss: 1.0007739067077637,grad_norm: 0.9999990106668677, iteration: 427596
loss: 1.0076024532318115,grad_norm: 0.7547160788258432, iteration: 427597
loss: 1.0055123567581177,grad_norm: 0.922404402674264, iteration: 427598
loss: 1.0367833375930786,grad_norm: 0.8939501745120095, iteration: 427599
loss: 1.009895920753479,grad_norm: 0.7937313247710476, iteration: 427600
loss: 0.972414493560791,grad_norm: 0.6845850186496683, iteration: 427601
loss: 0.9756031036376953,grad_norm: 0.6509874077103346, iteration: 427602
loss: 1.0498487949371338,grad_norm: 0.8460159664563378, iteration: 427603
loss: 0.9840870499610901,grad_norm: 0.7290678098804766, iteration: 427604
loss: 0.990260660648346,grad_norm: 0.7451672478111235, iteration: 427605
loss: 0.9617457389831543,grad_norm: 0.779643099285506, iteration: 427606
loss: 0.9798967838287354,grad_norm: 0.6929623348553317, iteration: 427607
loss: 1.1199328899383545,grad_norm: 0.9999998360235773, iteration: 427608
loss: 1.0116609334945679,grad_norm: 0.75627969702048, iteration: 427609
loss: 0.9891905188560486,grad_norm: 0.575636188063296, iteration: 427610
loss: 0.9817094802856445,grad_norm: 0.8943863518951556, iteration: 427611
loss: 0.9908837080001831,grad_norm: 0.9999992434313804, iteration: 427612
loss: 0.9954430460929871,grad_norm: 0.9185327183432962, iteration: 427613
loss: 1.0220950841903687,grad_norm: 0.7791931300664613, iteration: 427614
loss: 1.0214511156082153,grad_norm: 0.7864037402412323, iteration: 427615
loss: 0.9569486975669861,grad_norm: 0.9262917776415882, iteration: 427616
loss: 1.0046398639678955,grad_norm: 0.7626226295707046, iteration: 427617
loss: 1.0349806547164917,grad_norm: 0.8036713963292674, iteration: 427618
loss: 1.021728277206421,grad_norm: 0.7074392500814726, iteration: 427619
loss: 1.004119634628296,grad_norm: 0.8694533607694614, iteration: 427620
loss: 1.010210633277893,grad_norm: 0.9991727550712404, iteration: 427621
loss: 1.0094908475875854,grad_norm: 0.7746901398707929, iteration: 427622
loss: 0.9775875210762024,grad_norm: 0.8033495667547612, iteration: 427623
loss: 0.9943065643310547,grad_norm: 0.9548764387355212, iteration: 427624
loss: 1.0010417699813843,grad_norm: 0.7232023303291827, iteration: 427625
loss: 0.9870796799659729,grad_norm: 0.8254580188305448, iteration: 427626
loss: 0.9902427792549133,grad_norm: 0.8736390724326009, iteration: 427627
loss: 1.0093327760696411,grad_norm: 0.9329520259993249, iteration: 427628
loss: 1.019480586051941,grad_norm: 0.737276447740957, iteration: 427629
loss: 1.0037463903427124,grad_norm: 0.9472834834961922, iteration: 427630
loss: 1.0097306966781616,grad_norm: 0.818227837367213, iteration: 427631
loss: 0.9486083984375,grad_norm: 0.9848462365248749, iteration: 427632
loss: 1.006330132484436,grad_norm: 0.6917390092151439, iteration: 427633
loss: 1.009428858757019,grad_norm: 0.8095206639599738, iteration: 427634
loss: 1.0411262512207031,grad_norm: 0.9467259682429352, iteration: 427635
loss: 0.9912886023521423,grad_norm: 0.8829234877124699, iteration: 427636
loss: 1.0200598239898682,grad_norm: 0.7548857242062069, iteration: 427637
loss: 0.9861131906509399,grad_norm: 0.7369102176945056, iteration: 427638
loss: 0.978111207485199,grad_norm: 0.6500154620101088, iteration: 427639
loss: 1.0176364183425903,grad_norm: 0.7650979185432336, iteration: 427640
loss: 0.958038330078125,grad_norm: 0.872390959567401, iteration: 427641
loss: 1.011028528213501,grad_norm: 0.7494755663078848, iteration: 427642
loss: 1.0175282955169678,grad_norm: 0.7894352692195403, iteration: 427643
loss: 1.0314512252807617,grad_norm: 0.7836889276466735, iteration: 427644
loss: 1.0016158819198608,grad_norm: 0.9999990731005771, iteration: 427645
loss: 1.0173461437225342,grad_norm: 0.7519830311316789, iteration: 427646
loss: 0.9844262599945068,grad_norm: 0.833965311942827, iteration: 427647
loss: 0.9448714256286621,grad_norm: 0.82099936907552, iteration: 427648
loss: 1.020460605621338,grad_norm: 0.8535454602663098, iteration: 427649
loss: 1.0000838041305542,grad_norm: 0.7228197573366955, iteration: 427650
loss: 0.976024329662323,grad_norm: 0.6949297926491999, iteration: 427651
loss: 0.96877121925354,grad_norm: 0.8336588815541195, iteration: 427652
loss: 0.977340042591095,grad_norm: 0.8883965866103716, iteration: 427653
loss: 1.0309404134750366,grad_norm: 0.7960300413504053, iteration: 427654
loss: 1.0049718618392944,grad_norm: 0.7103036113680762, iteration: 427655
loss: 1.0058341026306152,grad_norm: 0.8134689030150861, iteration: 427656
loss: 1.0469021797180176,grad_norm: 0.9999991477126621, iteration: 427657
loss: 0.9862001538276672,grad_norm: 0.7592880723156815, iteration: 427658
loss: 0.9913929104804993,grad_norm: 0.9116025860390786, iteration: 427659
loss: 0.9588642120361328,grad_norm: 0.7315201926755055, iteration: 427660
loss: 1.0287483930587769,grad_norm: 0.717968191719258, iteration: 427661
loss: 0.9878122210502625,grad_norm: 0.7285642864655204, iteration: 427662
loss: 1.003822922706604,grad_norm: 0.9109318957596818, iteration: 427663
loss: 0.9902738332748413,grad_norm: 0.7334845322134833, iteration: 427664
loss: 0.9962002038955688,grad_norm: 0.8247586531290876, iteration: 427665
loss: 0.9757236242294312,grad_norm: 0.730612409313801, iteration: 427666
loss: 1.0047086477279663,grad_norm: 0.8586581027563461, iteration: 427667
loss: 0.9768595099449158,grad_norm: 0.7242080832052119, iteration: 427668
loss: 1.0071135759353638,grad_norm: 0.6791978785121606, iteration: 427669
loss: 1.034544825553894,grad_norm: 0.7717794524777163, iteration: 427670
loss: 1.0176118612289429,grad_norm: 0.8459761905492916, iteration: 427671
loss: 1.0756585597991943,grad_norm: 0.9999995793006411, iteration: 427672
loss: 1.0370486974716187,grad_norm: 0.8591664599442219, iteration: 427673
loss: 1.0023199319839478,grad_norm: 0.8641739195251794, iteration: 427674
loss: 0.9787366390228271,grad_norm: 0.709025155682161, iteration: 427675
loss: 1.0189731121063232,grad_norm: 0.9999998765136524, iteration: 427676
loss: 1.017316222190857,grad_norm: 0.7424991537696322, iteration: 427677
loss: 1.0014728307724,grad_norm: 0.8226711963142953, iteration: 427678
loss: 1.003100037574768,grad_norm: 0.8641392931442071, iteration: 427679
loss: 0.9608145356178284,grad_norm: 0.7703248270572616, iteration: 427680
loss: 1.0078023672103882,grad_norm: 0.7296681927982944, iteration: 427681
loss: 1.0085252523422241,grad_norm: 0.6770266276294635, iteration: 427682
loss: 1.013788104057312,grad_norm: 0.7742375762961032, iteration: 427683
loss: 1.046114206314087,grad_norm: 0.9026808464138956, iteration: 427684
loss: 0.9798774719238281,grad_norm: 0.6459634790787844, iteration: 427685
loss: 1.006824254989624,grad_norm: 0.795047912033854, iteration: 427686
loss: 0.9889790415763855,grad_norm: 0.9999998253139595, iteration: 427687
loss: 1.0201363563537598,grad_norm: 0.8913400283734618, iteration: 427688
loss: 1.0547155141830444,grad_norm: 0.7852567208757986, iteration: 427689
loss: 0.9949918389320374,grad_norm: 0.7313140896016314, iteration: 427690
loss: 1.0085514783859253,grad_norm: 0.7646391518149214, iteration: 427691
loss: 1.0035134553909302,grad_norm: 0.7775054903883265, iteration: 427692
loss: 1.0005511045455933,grad_norm: 0.9999992268173258, iteration: 427693
loss: 1.00265634059906,grad_norm: 0.7030258816290011, iteration: 427694
loss: 1.028115153312683,grad_norm: 0.7920668127661479, iteration: 427695
loss: 1.012635350227356,grad_norm: 0.6647369663549978, iteration: 427696
loss: 0.9748175144195557,grad_norm: 0.7983464458222876, iteration: 427697
loss: 1.0231494903564453,grad_norm: 0.8125086990961977, iteration: 427698
loss: 1.0027565956115723,grad_norm: 0.8171121319889397, iteration: 427699
loss: 0.9709722399711609,grad_norm: 0.7156586406003181, iteration: 427700
loss: 0.9582350850105286,grad_norm: 0.7407149759639661, iteration: 427701
loss: 1.001757025718689,grad_norm: 0.6399569328581388, iteration: 427702
loss: 1.0239720344543457,grad_norm: 0.7753419011717714, iteration: 427703
loss: 0.9824230670928955,grad_norm: 0.7544153101159554, iteration: 427704
loss: 0.9562976360321045,grad_norm: 0.7697383246936136, iteration: 427705
loss: 0.9961953163146973,grad_norm: 0.7660969696252578, iteration: 427706
loss: 1.0338966846466064,grad_norm: 0.7941142577024484, iteration: 427707
loss: 1.0205026865005493,grad_norm: 0.6925430046239687, iteration: 427708
loss: 1.0059622526168823,grad_norm: 0.9999992260846744, iteration: 427709
loss: 1.0582906007766724,grad_norm: 0.8064986068436737, iteration: 427710
loss: 0.9551621675491333,grad_norm: 0.7965873912034674, iteration: 427711
loss: 0.9624879956245422,grad_norm: 0.8079999027100084, iteration: 427712
loss: 0.9902867674827576,grad_norm: 0.9776709270400855, iteration: 427713
loss: 1.0005098581314087,grad_norm: 0.7285398480953856, iteration: 427714
loss: 0.9777647256851196,grad_norm: 0.7376735627993243, iteration: 427715
loss: 1.0005253553390503,grad_norm: 0.8173776822495221, iteration: 427716
loss: 1.0161285400390625,grad_norm: 0.8052159128743767, iteration: 427717
loss: 1.040015459060669,grad_norm: 0.7146616202998053, iteration: 427718
loss: 1.0276947021484375,grad_norm: 0.9331749931827292, iteration: 427719
loss: 0.9698832035064697,grad_norm: 0.9456015991399274, iteration: 427720
loss: 1.0255357027053833,grad_norm: 0.811292154872696, iteration: 427721
loss: 0.9991531372070312,grad_norm: 0.8582502060675266, iteration: 427722
loss: 1.0164296627044678,grad_norm: 0.7504466187978036, iteration: 427723
loss: 0.9612916111946106,grad_norm: 0.843238876254536, iteration: 427724
loss: 0.9974511861801147,grad_norm: 0.8621597006269193, iteration: 427725
loss: 1.0242927074432373,grad_norm: 0.9577348077038954, iteration: 427726
loss: 0.95847487449646,grad_norm: 0.6767710344494481, iteration: 427727
loss: 0.988478422164917,grad_norm: 0.7079169257752738, iteration: 427728
loss: 1.0318291187286377,grad_norm: 0.8901792374198542, iteration: 427729
loss: 1.0096794366836548,grad_norm: 0.7550026590662323, iteration: 427730
loss: 0.9935283064842224,grad_norm: 0.7838614555344456, iteration: 427731
loss: 0.9761280417442322,grad_norm: 0.7883195547318301, iteration: 427732
loss: 0.995646059513092,grad_norm: 0.7598850439982695, iteration: 427733
loss: 0.9986856579780579,grad_norm: 0.8758638143569601, iteration: 427734
loss: 1.000468373298645,grad_norm: 0.9999999099071485, iteration: 427735
loss: 0.9832729697227478,grad_norm: 0.7463413786489161, iteration: 427736
loss: 0.9684275984764099,grad_norm: 0.9229261385754186, iteration: 427737
loss: 0.9957828521728516,grad_norm: 0.7807530683856918, iteration: 427738
loss: 1.0031747817993164,grad_norm: 0.842522613652843, iteration: 427739
loss: 0.9986343383789062,grad_norm: 0.7561805794942897, iteration: 427740
loss: 1.068381667137146,grad_norm: 0.9999990836974519, iteration: 427741
loss: 0.9932687282562256,grad_norm: 0.884894390398159, iteration: 427742
loss: 1.0634328126907349,grad_norm: 0.9999992000273265, iteration: 427743
loss: 0.9917149543762207,grad_norm: 0.9564739185915091, iteration: 427744
loss: 1.003377914428711,grad_norm: 0.741926146357513, iteration: 427745
loss: 1.0162986516952515,grad_norm: 0.704930421696992, iteration: 427746
loss: 1.004787564277649,grad_norm: 0.759898237048304, iteration: 427747
loss: 0.9982085824012756,grad_norm: 0.9999995128475994, iteration: 427748
loss: 0.9766049385070801,grad_norm: 0.775407453775881, iteration: 427749
loss: 0.9757047891616821,grad_norm: 0.6866275725394728, iteration: 427750
loss: 0.9570859670639038,grad_norm: 0.7867393532676242, iteration: 427751
loss: 0.9951834678649902,grad_norm: 0.8708204884976636, iteration: 427752
loss: 0.9660487771034241,grad_norm: 0.6733763360293198, iteration: 427753
loss: 0.9947213530540466,grad_norm: 0.7995377964292639, iteration: 427754
loss: 1.0315170288085938,grad_norm: 0.6733370599495822, iteration: 427755
loss: 0.9645028710365295,grad_norm: 0.8033046808018287, iteration: 427756
loss: 0.9771663546562195,grad_norm: 0.8528136170963763, iteration: 427757
loss: 1.002505898475647,grad_norm: 0.7251599307556893, iteration: 427758
loss: 1.0025309324264526,grad_norm: 0.8812029976702344, iteration: 427759
loss: 0.9859547019004822,grad_norm: 0.8616824844529202, iteration: 427760
loss: 1.008317470550537,grad_norm: 0.8179849222893295, iteration: 427761
loss: 1.0124698877334595,grad_norm: 0.7702295877938476, iteration: 427762
loss: 1.0110327005386353,grad_norm: 0.625379684588984, iteration: 427763
loss: 0.9879502654075623,grad_norm: 0.9623272334119285, iteration: 427764
loss: 0.9523146748542786,grad_norm: 0.8124539456394237, iteration: 427765
loss: 1.0158277750015259,grad_norm: 0.9999991218258463, iteration: 427766
loss: 1.0306363105773926,grad_norm: 0.8642456329473365, iteration: 427767
loss: 0.9926075339317322,grad_norm: 0.7616676986693509, iteration: 427768
loss: 1.0026315450668335,grad_norm: 0.8540311659347035, iteration: 427769
loss: 1.208726167678833,grad_norm: 0.9999999395440979, iteration: 427770
loss: 1.030213475227356,grad_norm: 0.6708797507195684, iteration: 427771
loss: 0.9956541061401367,grad_norm: 0.7467377028858497, iteration: 427772
loss: 1.0137165784835815,grad_norm: 0.7355985727361652, iteration: 427773
loss: 1.0233447551727295,grad_norm: 0.7267997803822943, iteration: 427774
loss: 1.0217434167861938,grad_norm: 0.7924905771435358, iteration: 427775
loss: 0.9320979118347168,grad_norm: 0.999999354093234, iteration: 427776
loss: 1.0094916820526123,grad_norm: 0.8556680625849304, iteration: 427777
loss: 1.0319082736968994,grad_norm: 0.76635735330833, iteration: 427778
loss: 0.9657867550849915,grad_norm: 0.737475819819741, iteration: 427779
loss: 0.9568195343017578,grad_norm: 0.7572395644733024, iteration: 427780
loss: 1.036792278289795,grad_norm: 0.753506240860221, iteration: 427781
loss: 1.0239657163619995,grad_norm: 0.8288672102114574, iteration: 427782
loss: 0.9572237730026245,grad_norm: 0.6846749530495112, iteration: 427783
loss: 0.9362184405326843,grad_norm: 0.6603265438030735, iteration: 427784
loss: 1.01163911819458,grad_norm: 0.8345480292610296, iteration: 427785
loss: 0.9685348272323608,grad_norm: 0.9630032620293145, iteration: 427786
loss: 0.9831520915031433,grad_norm: 0.8984654828435378, iteration: 427787
loss: 0.9662919640541077,grad_norm: 0.8268026595640895, iteration: 427788
loss: 0.9779196977615356,grad_norm: 0.7658344542783732, iteration: 427789
loss: 1.0305570363998413,grad_norm: 0.765630480480569, iteration: 427790
loss: 1.080609679222107,grad_norm: 0.9999997330781616, iteration: 427791
loss: 1.0068399906158447,grad_norm: 0.8567411834212507, iteration: 427792
loss: 1.0218417644500732,grad_norm: 0.7816963327956444, iteration: 427793
loss: 0.9943399429321289,grad_norm: 0.7523559848524403, iteration: 427794
loss: 1.0218623876571655,grad_norm: 0.7212949732091846, iteration: 427795
loss: 1.1481724977493286,grad_norm: 0.9999992740063943, iteration: 427796
loss: 1.0065206289291382,grad_norm: 0.737182487641968, iteration: 427797
loss: 1.0077691078186035,grad_norm: 0.9999993978294199, iteration: 427798
loss: 1.001794457435608,grad_norm: 0.9999994618725405, iteration: 427799
loss: 1.027652621269226,grad_norm: 0.7246242348043708, iteration: 427800
loss: 1.0039926767349243,grad_norm: 0.7889437493095971, iteration: 427801
loss: 1.0036561489105225,grad_norm: 0.7781967607521773, iteration: 427802
loss: 1.0272365808486938,grad_norm: 0.8249846774444448, iteration: 427803
loss: 1.0082341432571411,grad_norm: 0.6499157258934525, iteration: 427804
loss: 1.000065565109253,grad_norm: 0.8538417940306213, iteration: 427805
loss: 1.0003877878189087,grad_norm: 0.863685613465218, iteration: 427806
loss: 1.011547327041626,grad_norm: 0.8403136228348196, iteration: 427807
loss: 0.998485267162323,grad_norm: 0.8683380882063074, iteration: 427808
loss: 1.031760573387146,grad_norm: 0.7315170143715992, iteration: 427809
loss: 1.1364158391952515,grad_norm: 0.9999994318584735, iteration: 427810
loss: 1.0689764022827148,grad_norm: 0.9999994073620666, iteration: 427811
loss: 0.9995568990707397,grad_norm: 0.9999992493847548, iteration: 427812
loss: 0.9987854957580566,grad_norm: 0.6843515213882779, iteration: 427813
loss: 0.9481661319732666,grad_norm: 0.7085132405416726, iteration: 427814
loss: 0.9754040837287903,grad_norm: 0.9999991960722941, iteration: 427815
loss: 1.0057533979415894,grad_norm: 0.7706303125379607, iteration: 427816
loss: 0.9523743987083435,grad_norm: 0.6885884519865357, iteration: 427817
loss: 0.9892337918281555,grad_norm: 0.7509932291850965, iteration: 427818
loss: 0.9892165660858154,grad_norm: 0.7601278757296956, iteration: 427819
loss: 1.0009880065917969,grad_norm: 0.7889979160527331, iteration: 427820
loss: 0.9959450364112854,grad_norm: 0.7733480181990557, iteration: 427821
loss: 0.9992290735244751,grad_norm: 0.8487828251389571, iteration: 427822
loss: 1.0275593996047974,grad_norm: 0.653148867206236, iteration: 427823
loss: 1.0606038570404053,grad_norm: 0.9999994034311466, iteration: 427824
loss: 0.9805044531822205,grad_norm: 0.708097956067578, iteration: 427825
loss: 1.046066164970398,grad_norm: 0.7774693622245396, iteration: 427826
loss: 0.9967415928840637,grad_norm: 0.8332974699016455, iteration: 427827
loss: 1.027892827987671,grad_norm: 0.8169245326101126, iteration: 427828
loss: 1.0076427459716797,grad_norm: 0.7893088375398035, iteration: 427829
loss: 1.0706499814987183,grad_norm: 0.9999991658396453, iteration: 427830
loss: 0.977430522441864,grad_norm: 0.9999990119845263, iteration: 427831
loss: 1.0287058353424072,grad_norm: 0.999999033544914, iteration: 427832
loss: 1.0156288146972656,grad_norm: 0.6938331362115622, iteration: 427833
loss: 1.0231515169143677,grad_norm: 0.8757726261587071, iteration: 427834
loss: 1.0420700311660767,grad_norm: 0.8215536268747646, iteration: 427835
loss: 0.9833604693412781,grad_norm: 0.7601728491985513, iteration: 427836
loss: 0.9454718232154846,grad_norm: 0.7539123249459855, iteration: 427837
loss: 1.023062825202942,grad_norm: 0.6195379484481615, iteration: 427838
loss: 1.0179640054702759,grad_norm: 0.8167749763711445, iteration: 427839
loss: 1.0339809656143188,grad_norm: 0.999999891210526, iteration: 427840
loss: 1.001368522644043,grad_norm: 0.9999998614885665, iteration: 427841
loss: 0.9453039765357971,grad_norm: 0.610102767467765, iteration: 427842
loss: 1.1064233779907227,grad_norm: 0.9999999673928243, iteration: 427843
loss: 1.0095462799072266,grad_norm: 0.819681138478529, iteration: 427844
loss: 1.009813666343689,grad_norm: 0.6742574221858622, iteration: 427845
loss: 0.9924022555351257,grad_norm: 0.8486905782930075, iteration: 427846
loss: 1.002744436264038,grad_norm: 0.7780961710287909, iteration: 427847
loss: 0.993929386138916,grad_norm: 0.9999992623866433, iteration: 427848
loss: 1.0255922079086304,grad_norm: 0.7928957626468244, iteration: 427849
loss: 1.0038765668869019,grad_norm: 0.8728685664435654, iteration: 427850
loss: 1.0029951333999634,grad_norm: 0.7477580558219183, iteration: 427851
loss: 0.9886158108711243,grad_norm: 0.7838946231514845, iteration: 427852
loss: 0.9973676204681396,grad_norm: 0.8115381423971565, iteration: 427853
loss: 1.0644422769546509,grad_norm: 0.9999993851014157, iteration: 427854
loss: 0.9584864974021912,grad_norm: 0.7083781237068815, iteration: 427855
loss: 0.9744365215301514,grad_norm: 0.7294526980541788, iteration: 427856
loss: 1.0005687475204468,grad_norm: 0.9929097546972291, iteration: 427857
loss: 0.9917094707489014,grad_norm: 0.714156232014039, iteration: 427858
loss: 1.124358892440796,grad_norm: 0.9999997885138073, iteration: 427859
loss: 0.976699948310852,grad_norm: 0.8465053760804117, iteration: 427860
loss: 0.9905079007148743,grad_norm: 0.7988507826014958, iteration: 427861
loss: 0.9793726205825806,grad_norm: 0.7640921127382155, iteration: 427862
loss: 1.0092556476593018,grad_norm: 0.8342895135610234, iteration: 427863
loss: 0.9737892746925354,grad_norm: 0.8038947734319428, iteration: 427864
loss: 1.0141897201538086,grad_norm: 0.6808470324868864, iteration: 427865
loss: 1.0241376161575317,grad_norm: 0.8591565022633837, iteration: 427866
loss: 0.9967309236526489,grad_norm: 0.7289141399733682, iteration: 427867
loss: 0.9773020148277283,grad_norm: 0.7133306298074574, iteration: 427868
loss: 0.985717236995697,grad_norm: 0.8013952990021074, iteration: 427869
loss: 1.0126854181289673,grad_norm: 0.6186523490759582, iteration: 427870
loss: 0.9683061838150024,grad_norm: 0.8421346208287385, iteration: 427871
loss: 1.0282862186431885,grad_norm: 0.8448803981510365, iteration: 427872
loss: 0.9693279266357422,grad_norm: 0.7926589861972365, iteration: 427873
loss: 1.0287400484085083,grad_norm: 0.7196664999687776, iteration: 427874
loss: 0.9806665778160095,grad_norm: 0.9741107191481687, iteration: 427875
loss: 1.0033947229385376,grad_norm: 0.8998238650111622, iteration: 427876
loss: 1.0166443586349487,grad_norm: 0.7708280012060835, iteration: 427877
loss: 0.9764742851257324,grad_norm: 0.9880357458587433, iteration: 427878
loss: 1.0361227989196777,grad_norm: 0.9679616655097183, iteration: 427879
loss: 0.9980098605155945,grad_norm: 0.7208549789899255, iteration: 427880
loss: 1.0074598789215088,grad_norm: 0.7890513557477603, iteration: 427881
loss: 0.9662145376205444,grad_norm: 0.7292543127250448, iteration: 427882
loss: 1.0763804912567139,grad_norm: 0.999999458400188, iteration: 427883
loss: 1.0163910388946533,grad_norm: 0.7433610178872235, iteration: 427884
loss: 0.9912658333778381,grad_norm: 0.76931851598445, iteration: 427885
loss: 1.060471534729004,grad_norm: 0.9999995959880309, iteration: 427886
loss: 1.0020867586135864,grad_norm: 0.7842004367862696, iteration: 427887
loss: 0.9974498748779297,grad_norm: 0.8313006775334447, iteration: 427888
loss: 1.0179353952407837,grad_norm: 0.7876092186832635, iteration: 427889
loss: 0.9727478623390198,grad_norm: 0.6877979412198023, iteration: 427890
loss: 1.0514416694641113,grad_norm: 0.9255657561779367, iteration: 427891
loss: 0.9949968457221985,grad_norm: 0.951679083168277, iteration: 427892
loss: 1.00983464717865,grad_norm: 0.7477346765931717, iteration: 427893
loss: 1.00607168674469,grad_norm: 0.706126643587875, iteration: 427894
loss: 0.9398269057273865,grad_norm: 0.6701657814193865, iteration: 427895
loss: 1.0027912855148315,grad_norm: 0.7254831135099457, iteration: 427896
loss: 0.9917873740196228,grad_norm: 0.7951828805773874, iteration: 427897
loss: 1.0051788091659546,grad_norm: 0.7437515793813553, iteration: 427898
loss: 0.9946725964546204,grad_norm: 0.7244211561120675, iteration: 427899
loss: 0.9934636950492859,grad_norm: 0.8383901164173386, iteration: 427900
loss: 1.023908257484436,grad_norm: 0.7897789605046751, iteration: 427901
loss: 0.9765153527259827,grad_norm: 0.7389250299761407, iteration: 427902
loss: 0.9991155862808228,grad_norm: 0.7185601414821677, iteration: 427903
loss: 0.9920251369476318,grad_norm: 0.9480682761099485, iteration: 427904
loss: 1.0152051448822021,grad_norm: 0.8243655234375948, iteration: 427905
loss: 0.9936246275901794,grad_norm: 0.8043853436424218, iteration: 427906
loss: 0.9944841265678406,grad_norm: 0.8109637386393163, iteration: 427907
loss: 1.0172853469848633,grad_norm: 0.927810659678792, iteration: 427908
loss: 0.9827245473861694,grad_norm: 0.7748739547430358, iteration: 427909
loss: 0.9775336384773254,grad_norm: 0.8274006270531943, iteration: 427910
loss: 1.0019781589508057,grad_norm: 0.7389109591552327, iteration: 427911
loss: 0.9997535943984985,grad_norm: 0.9999999217481651, iteration: 427912
loss: 1.0425050258636475,grad_norm: 0.8313773100290905, iteration: 427913
loss: 1.0096657276153564,grad_norm: 0.7662073069718963, iteration: 427914
loss: 0.9902613162994385,grad_norm: 0.8522370210933329, iteration: 427915
loss: 0.9942175149917603,grad_norm: 0.7977161701149748, iteration: 427916
loss: 0.9937260746955872,grad_norm: 0.6532616794814565, iteration: 427917
loss: 1.009002447128296,grad_norm: 0.7035169721277127, iteration: 427918
loss: 1.059679627418518,grad_norm: 0.7659266176782235, iteration: 427919
loss: 1.0013092756271362,grad_norm: 0.8345510657983926, iteration: 427920
loss: 1.025691032409668,grad_norm: 0.8205694653893378, iteration: 427921
loss: 0.9921551942825317,grad_norm: 0.7223135062346979, iteration: 427922
loss: 0.9761536121368408,grad_norm: 0.8165789102611111, iteration: 427923
loss: 0.9505261778831482,grad_norm: 0.7102538516867395, iteration: 427924
loss: 1.0128055810928345,grad_norm: 0.9067584920742607, iteration: 427925
loss: 0.9849478006362915,grad_norm: 0.9895342936872396, iteration: 427926
loss: 0.9708740711212158,grad_norm: 0.7215060352168092, iteration: 427927
loss: 1.054924726486206,grad_norm: 0.9999991253328072, iteration: 427928
loss: 1.0058519840240479,grad_norm: 0.6868609081736422, iteration: 427929
loss: 1.007515549659729,grad_norm: 0.8266487761636084, iteration: 427930
loss: 1.0138123035430908,grad_norm: 0.8570652825223993, iteration: 427931
loss: 0.990059494972229,grad_norm: 0.8521186638265443, iteration: 427932
loss: 0.98769211769104,grad_norm: 0.8635661774677171, iteration: 427933
loss: 0.9776439666748047,grad_norm: 0.7220894799202138, iteration: 427934
loss: 0.9877656698226929,grad_norm: 0.9999991884641196, iteration: 427935
loss: 1.0229583978652954,grad_norm: 0.9999990670446341, iteration: 427936
loss: 1.0236262083053589,grad_norm: 0.902839248733988, iteration: 427937
loss: 1.0190733671188354,grad_norm: 0.6557681359889348, iteration: 427938
loss: 1.028355598449707,grad_norm: 0.9442951467424788, iteration: 427939
loss: 1.0275516510009766,grad_norm: 0.9158591478400665, iteration: 427940
loss: 1.0780537128448486,grad_norm: 0.7661251907433729, iteration: 427941
loss: 1.014158010482788,grad_norm: 0.6861284458479414, iteration: 427942
loss: 0.996676504611969,grad_norm: 0.845907577995019, iteration: 427943
loss: 1.0078736543655396,grad_norm: 0.9179478494311656, iteration: 427944
loss: 1.0434468984603882,grad_norm: 0.7777211827312795, iteration: 427945
loss: 0.9895402193069458,grad_norm: 0.8551614533260794, iteration: 427946
loss: 0.9586324691772461,grad_norm: 0.7438159772114401, iteration: 427947
loss: 0.9737768769264221,grad_norm: 0.7133518901563793, iteration: 427948
loss: 0.9866408705711365,grad_norm: 0.77099609666547, iteration: 427949
loss: 0.9694486856460571,grad_norm: 0.8077033141790149, iteration: 427950
loss: 1.0151557922363281,grad_norm: 0.8062116792590641, iteration: 427951
loss: 1.0073341131210327,grad_norm: 0.8128746027358603, iteration: 427952
loss: 1.0131031274795532,grad_norm: 0.8745782508449492, iteration: 427953
loss: 0.995147705078125,grad_norm: 0.9786237142132906, iteration: 427954
loss: 1.000546932220459,grad_norm: 0.7813667073618805, iteration: 427955
loss: 1.0373824834823608,grad_norm: 0.9238692325374512, iteration: 427956
loss: 0.9884011149406433,grad_norm: 0.8330465252715273, iteration: 427957
loss: 1.0002347230911255,grad_norm: 0.7749897223016115, iteration: 427958
loss: 0.99608314037323,grad_norm: 0.9013835369442714, iteration: 427959
loss: 1.0120668411254883,grad_norm: 0.6510864553945864, iteration: 427960
loss: 0.9961184859275818,grad_norm: 0.8087102227967139, iteration: 427961
loss: 1.0059791803359985,grad_norm: 0.7708355447865312, iteration: 427962
loss: 0.9811347723007202,grad_norm: 0.7339312231761892, iteration: 427963
loss: 1.099106788635254,grad_norm: 0.8459328923296612, iteration: 427964
loss: 0.9810318350791931,grad_norm: 0.7846389435755928, iteration: 427965
loss: 1.0639889240264893,grad_norm: 0.9999991947421858, iteration: 427966
loss: 1.0182304382324219,grad_norm: 0.9999991492601853, iteration: 427967
loss: 0.9860280156135559,grad_norm: 0.7820429447670673, iteration: 427968
loss: 0.9949391484260559,grad_norm: 0.8055804607018919, iteration: 427969
loss: 0.988156795501709,grad_norm: 0.7277185033507745, iteration: 427970
loss: 0.985087513923645,grad_norm: 0.781560030314656, iteration: 427971
loss: 0.9776832461357117,grad_norm: 0.6542529037639803, iteration: 427972
loss: 0.9881010055541992,grad_norm: 0.9323913496148952, iteration: 427973
loss: 0.9640095233917236,grad_norm: 0.767314784249578, iteration: 427974
loss: 0.9894939661026001,grad_norm: 0.8048783842362973, iteration: 427975
loss: 0.962583601474762,grad_norm: 0.8632569075989853, iteration: 427976
loss: 0.9946741461753845,grad_norm: 0.7078664183168168, iteration: 427977
loss: 1.0077273845672607,grad_norm: 0.6282068392778559, iteration: 427978
loss: 0.9678784608840942,grad_norm: 0.6981312202691178, iteration: 427979
loss: 0.9659492373466492,grad_norm: 0.8020549063047187, iteration: 427980
loss: 0.9946265816688538,grad_norm: 0.7368190019451103, iteration: 427981
loss: 1.0669606924057007,grad_norm: 0.9999996096205239, iteration: 427982
loss: 1.0083420276641846,grad_norm: 0.7282367748288068, iteration: 427983
loss: 0.9719936847686768,grad_norm: 0.7707434200439295, iteration: 427984
loss: 0.9874621033668518,grad_norm: 0.7065445476588815, iteration: 427985
loss: 1.0080479383468628,grad_norm: 0.7994949272618853, iteration: 427986
loss: 1.0150628089904785,grad_norm: 0.8856993871257081, iteration: 427987
loss: 0.9914053082466125,grad_norm: 0.8396795236079918, iteration: 427988
loss: 1.042407512664795,grad_norm: 0.7455666540159125, iteration: 427989
loss: 0.9819819927215576,grad_norm: 0.8431314196654521, iteration: 427990
loss: 1.0071618556976318,grad_norm: 0.6964707891598482, iteration: 427991
loss: 1.01534104347229,grad_norm: 0.9999990446421319, iteration: 427992
loss: 1.031579613685608,grad_norm: 0.7542406225592512, iteration: 427993
loss: 1.0112665891647339,grad_norm: 0.9448697384738857, iteration: 427994
loss: 0.9765625,grad_norm: 0.8237438105122303, iteration: 427995
loss: 0.9765036702156067,grad_norm: 0.7463790045869872, iteration: 427996
loss: 0.9905694127082825,grad_norm: 0.8431572150711741, iteration: 427997
loss: 1.0225389003753662,grad_norm: 0.8816453795210061, iteration: 427998
loss: 0.990687370300293,grad_norm: 0.7021427924146373, iteration: 427999
loss: 1.0139602422714233,grad_norm: 0.7524139390250144, iteration: 428000
loss: 1.0086768865585327,grad_norm: 0.9930602350824097, iteration: 428001
loss: 0.9967002272605896,grad_norm: 0.7592016244449898, iteration: 428002
loss: 0.9800568222999573,grad_norm: 0.7348025536582967, iteration: 428003
loss: 1.0354313850402832,grad_norm: 0.7710655203532684, iteration: 428004
loss: 0.9790897369384766,grad_norm: 0.949787239012628, iteration: 428005
loss: 0.9709457159042358,grad_norm: 0.8207154571992402, iteration: 428006
loss: 0.9927440881729126,grad_norm: 0.8120572037273678, iteration: 428007
loss: 0.9767176508903503,grad_norm: 0.7057720629756847, iteration: 428008
loss: 1.0097323656082153,grad_norm: 0.8065060216594157, iteration: 428009
loss: 0.9924595952033997,grad_norm: 0.6711566600171611, iteration: 428010
loss: 1.0059833526611328,grad_norm: 0.7254933455310961, iteration: 428011
loss: 1.0074325799942017,grad_norm: 0.7764560416102808, iteration: 428012
loss: 0.9449607729911804,grad_norm: 0.7605060539843351, iteration: 428013
loss: 0.9977625012397766,grad_norm: 0.7472205618885494, iteration: 428014
loss: 1.0257374048233032,grad_norm: 0.7591718790943124, iteration: 428015
loss: 1.0044301748275757,grad_norm: 0.8311385352509363, iteration: 428016
loss: 1.0309405326843262,grad_norm: 0.8446315324448145, iteration: 428017
loss: 0.9895076751708984,grad_norm: 0.7911884707428231, iteration: 428018
loss: 0.9647548198699951,grad_norm: 0.6544791817588894, iteration: 428019
loss: 1.013932466506958,grad_norm: 0.7467979667148349, iteration: 428020
loss: 1.0054552555084229,grad_norm: 0.7089756239565906, iteration: 428021
loss: 0.9899171590805054,grad_norm: 0.7910415856807305, iteration: 428022
loss: 1.0063180923461914,grad_norm: 0.7834140946631444, iteration: 428023
loss: 0.9996093511581421,grad_norm: 0.6672820570945268, iteration: 428024
loss: 0.986221194267273,grad_norm: 0.8259384882287999, iteration: 428025
loss: 1.0728611946105957,grad_norm: 0.9153045630374964, iteration: 428026
loss: 0.9710162281990051,grad_norm: 0.7742875583251887, iteration: 428027
loss: 1.0475531816482544,grad_norm: 0.8546043595681476, iteration: 428028
loss: 0.9889376759529114,grad_norm: 0.7593029571276385, iteration: 428029
loss: 0.9905381202697754,grad_norm: 0.7196038066066294, iteration: 428030
loss: 1.0078184604644775,grad_norm: 0.7909752691139651, iteration: 428031
loss: 1.0162429809570312,grad_norm: 0.8970146259363434, iteration: 428032
loss: 0.9999788999557495,grad_norm: 0.9103291026054056, iteration: 428033
loss: 0.9854000210762024,grad_norm: 0.7274791667015859, iteration: 428034
loss: 0.9773713946342468,grad_norm: 0.7877285392670518, iteration: 428035
loss: 1.0893430709838867,grad_norm: 0.9125880863492137, iteration: 428036
loss: 1.0382795333862305,grad_norm: 0.8198643495234811, iteration: 428037
loss: 1.0049220323562622,grad_norm: 0.7136871947171503, iteration: 428038
loss: 1.1485028266906738,grad_norm: 0.9999996156879264, iteration: 428039
loss: 1.0347764492034912,grad_norm: 0.8843943338065291, iteration: 428040
loss: 1.0230157375335693,grad_norm: 0.789170520111237, iteration: 428041
loss: 0.9990645051002502,grad_norm: 0.8467082093577523, iteration: 428042
loss: 0.9851881265640259,grad_norm: 0.6652614183799314, iteration: 428043
loss: 1.0222727060317993,grad_norm: 0.6617729696189844, iteration: 428044
loss: 1.0197882652282715,grad_norm: 0.7273086770213278, iteration: 428045
loss: 1.0314686298370361,grad_norm: 0.6758877342750209, iteration: 428046
loss: 1.0558820962905884,grad_norm: 0.9999990805217631, iteration: 428047
loss: 1.0093010663986206,grad_norm: 0.7306244853914448, iteration: 428048
loss: 0.9724425673484802,grad_norm: 0.7531698742725629, iteration: 428049
loss: 0.9865725040435791,grad_norm: 0.8150873675396781, iteration: 428050
loss: 1.000855565071106,grad_norm: 0.7625935410749847, iteration: 428051
loss: 1.0039113759994507,grad_norm: 0.9519921730094921, iteration: 428052
loss: 1.0073271989822388,grad_norm: 0.7048772050004986, iteration: 428053
loss: 0.9829739928245544,grad_norm: 0.7224204798401582, iteration: 428054
loss: 1.03178071975708,grad_norm: 0.8897132883461819, iteration: 428055
loss: 0.9997567534446716,grad_norm: 0.7798285619033292, iteration: 428056
loss: 0.9997923970222473,grad_norm: 0.9999991990256112, iteration: 428057
loss: 1.0056347846984863,grad_norm: 0.9790962235675872, iteration: 428058
loss: 0.9904359579086304,grad_norm: 0.8094360143544886, iteration: 428059
loss: 0.9806244373321533,grad_norm: 0.757723559318678, iteration: 428060
loss: 1.0061142444610596,grad_norm: 0.7108032934920685, iteration: 428061
loss: 0.976352870464325,grad_norm: 0.999999583041747, iteration: 428062
loss: 1.0026013851165771,grad_norm: 0.8080092537341089, iteration: 428063
loss: 0.9518412947654724,grad_norm: 0.9690317167184513, iteration: 428064
loss: 0.9937329888343811,grad_norm: 0.8427116858719609, iteration: 428065
loss: 1.0088931322097778,grad_norm: 0.9139706015318175, iteration: 428066
loss: 1.0009335279464722,grad_norm: 0.6554705955371867, iteration: 428067
loss: 1.0025415420532227,grad_norm: 0.7302149705360588, iteration: 428068
loss: 0.9865384101867676,grad_norm: 0.7093333274303668, iteration: 428069
loss: 0.9852460622787476,grad_norm: 0.7036016158691248, iteration: 428070
loss: 1.0262470245361328,grad_norm: 0.6999964896697307, iteration: 428071
loss: 1.0115721225738525,grad_norm: 0.7381097965093423, iteration: 428072
loss: 1.0075572729110718,grad_norm: 0.6695005472444676, iteration: 428073
loss: 1.0065593719482422,grad_norm: 0.7366265689150392, iteration: 428074
loss: 0.9717575311660767,grad_norm: 0.7355309403209513, iteration: 428075
loss: 1.0225125551223755,grad_norm: 0.7858352497646113, iteration: 428076
loss: 1.0244921445846558,grad_norm: 0.909197636247338, iteration: 428077
loss: 1.0036500692367554,grad_norm: 0.8540910958240079, iteration: 428078
loss: 1.0169166326522827,grad_norm: 0.9017857882675163, iteration: 428079
loss: 1.1852800846099854,grad_norm: 0.999999419732891, iteration: 428080
loss: 0.9987026453018188,grad_norm: 0.8585282923526378, iteration: 428081
loss: 1.0035804510116577,grad_norm: 0.7503527165718658, iteration: 428082
loss: 1.0270037651062012,grad_norm: 0.7228938013924182, iteration: 428083
loss: 1.0024893283843994,grad_norm: 0.7222809888890382, iteration: 428084
loss: 0.9952695369720459,grad_norm: 0.99999941936906, iteration: 428085
loss: 1.0357738733291626,grad_norm: 0.6920515948598911, iteration: 428086
loss: 1.0542402267456055,grad_norm: 0.810023981944142, iteration: 428087
loss: 0.99368816614151,grad_norm: 0.6874297204701038, iteration: 428088
loss: 1.0300004482269287,grad_norm: 0.9999991657545793, iteration: 428089
loss: 0.9919852018356323,grad_norm: 0.8410946795816534, iteration: 428090
loss: 0.9780789017677307,grad_norm: 0.6673225375898637, iteration: 428091
loss: 0.963274359703064,grad_norm: 0.7083974782845945, iteration: 428092
loss: 0.9739859104156494,grad_norm: 0.8033975908336835, iteration: 428093
loss: 1.0502928495407104,grad_norm: 0.999999101610365, iteration: 428094
loss: 0.9656739234924316,grad_norm: 0.9648347223834987, iteration: 428095
loss: 1.045020341873169,grad_norm: 0.9070138779258665, iteration: 428096
loss: 1.0055806636810303,grad_norm: 0.6618638623998212, iteration: 428097
loss: 1.0824874639511108,grad_norm: 0.9167872547874597, iteration: 428098
loss: 0.9971152544021606,grad_norm: 0.8512181615918767, iteration: 428099
loss: 1.1946310997009277,grad_norm: 0.9999998588151746, iteration: 428100
loss: 1.012252926826477,grad_norm: 0.8285278196638879, iteration: 428101
loss: 1.0239726305007935,grad_norm: 0.8103263756689295, iteration: 428102
loss: 1.007278323173523,grad_norm: 0.7667441647726521, iteration: 428103
loss: 1.0121803283691406,grad_norm: 0.9007353453476951, iteration: 428104
loss: 1.0135815143585205,grad_norm: 0.7076716088931303, iteration: 428105
loss: 1.0361297130584717,grad_norm: 0.8065334416099126, iteration: 428106
loss: 1.0079948902130127,grad_norm: 0.799472695688699, iteration: 428107
loss: 1.0235819816589355,grad_norm: 0.8054092856239602, iteration: 428108
loss: 1.0051093101501465,grad_norm: 0.7867827678077507, iteration: 428109
loss: 1.0503642559051514,grad_norm: 0.8906401061824077, iteration: 428110
loss: 1.0125459432601929,grad_norm: 0.7476025263823509, iteration: 428111
loss: 0.9944950342178345,grad_norm: 0.8258184419013842, iteration: 428112
loss: 0.9959086775779724,grad_norm: 0.9999998673717714, iteration: 428113
loss: 0.9923280477523804,grad_norm: 0.7317878489585719, iteration: 428114
loss: 1.0082721710205078,grad_norm: 0.709265783177494, iteration: 428115
loss: 1.0615848302841187,grad_norm: 0.7156999139295989, iteration: 428116
loss: 0.9688671231269836,grad_norm: 0.650205438919448, iteration: 428117
loss: 1.031654715538025,grad_norm: 0.9999998947339709, iteration: 428118
loss: 1.0099848508834839,grad_norm: 0.6803599926422977, iteration: 428119
loss: 0.9985328912734985,grad_norm: 0.7603372665359984, iteration: 428120
loss: 0.993869423866272,grad_norm: 0.7157455245657642, iteration: 428121
loss: 0.9897561073303223,grad_norm: 0.7098749124530946, iteration: 428122
loss: 1.0058811902999878,grad_norm: 0.702860530862432, iteration: 428123
loss: 1.0042344331741333,grad_norm: 0.8022497053077922, iteration: 428124
loss: 0.966344952583313,grad_norm: 0.7336509301399886, iteration: 428125
loss: 1.0171480178833008,grad_norm: 0.7833075981173022, iteration: 428126
loss: 0.9872753024101257,grad_norm: 0.7217799056599942, iteration: 428127
loss: 0.9583715200424194,grad_norm: 0.7635060684016846, iteration: 428128
loss: 0.9709194898605347,grad_norm: 0.6840253267644608, iteration: 428129
loss: 0.9829688668251038,grad_norm: 0.8733655205600589, iteration: 428130
loss: 1.0061684846878052,grad_norm: 0.6652433462252193, iteration: 428131
loss: 1.0001999139785767,grad_norm: 0.7701502428554279, iteration: 428132
loss: 0.9947482943534851,grad_norm: 0.8884690379297994, iteration: 428133
loss: 0.9697953462600708,grad_norm: 0.7095288498037704, iteration: 428134
loss: 1.130528211593628,grad_norm: 0.9999993574146907, iteration: 428135
loss: 0.9995470643043518,grad_norm: 0.7862086994964386, iteration: 428136
loss: 0.990263044834137,grad_norm: 0.9346628703417619, iteration: 428137
loss: 0.9829057455062866,grad_norm: 0.8412546874178625, iteration: 428138
loss: 0.9551706314086914,grad_norm: 0.6388859116374122, iteration: 428139
loss: 1.0305413007736206,grad_norm: 0.8140872410984555, iteration: 428140
loss: 1.0087281465530396,grad_norm: 0.7064135140135632, iteration: 428141
loss: 0.9660771489143372,grad_norm: 0.7568822485710068, iteration: 428142
loss: 0.9978212118148804,grad_norm: 0.7909366961165485, iteration: 428143
loss: 1.0445752143859863,grad_norm: 0.689852599523109, iteration: 428144
loss: 0.9901250600814819,grad_norm: 0.7191319698242307, iteration: 428145
loss: 1.0036641359329224,grad_norm: 0.9999995991480964, iteration: 428146
loss: 1.001405954360962,grad_norm: 0.9244774094441971, iteration: 428147
loss: 1.0494685173034668,grad_norm: 0.819310072957054, iteration: 428148
loss: 0.9653086066246033,grad_norm: 0.7460806821193442, iteration: 428149
loss: 0.9741945266723633,grad_norm: 0.7922758707317097, iteration: 428150
loss: 0.9738974571228027,grad_norm: 0.7606621373149045, iteration: 428151
loss: 0.998347282409668,grad_norm: 0.6441612059455576, iteration: 428152
loss: 1.0066710710525513,grad_norm: 0.8314508376249765, iteration: 428153
loss: 0.9633225202560425,grad_norm: 0.7297837719305341, iteration: 428154
loss: 1.003242015838623,grad_norm: 0.7993219132497098, iteration: 428155
loss: 1.0372366905212402,grad_norm: 0.8452608214440486, iteration: 428156
loss: 1.002720594406128,grad_norm: 0.8794280425290646, iteration: 428157
loss: 1.0546139478683472,grad_norm: 0.999999235931507, iteration: 428158
loss: 1.0271764993667603,grad_norm: 0.8932173721253304, iteration: 428159
loss: 1.0223711729049683,grad_norm: 0.9999998186063668, iteration: 428160
loss: 0.9827407598495483,grad_norm: 0.707385971331529, iteration: 428161
loss: 1.0135160684585571,grad_norm: 0.9999990509310455, iteration: 428162
loss: 1.0078952312469482,grad_norm: 0.8230211937033175, iteration: 428163
loss: 0.9463738799095154,grad_norm: 0.7333334911371385, iteration: 428164
loss: 1.0018311738967896,grad_norm: 0.8707117138681013, iteration: 428165
loss: 0.9922490119934082,grad_norm: 0.8298489776411403, iteration: 428166
loss: 1.0221731662750244,grad_norm: 0.8681366527294532, iteration: 428167
loss: 1.0321722030639648,grad_norm: 0.7146768109591548, iteration: 428168
loss: 1.0210694074630737,grad_norm: 0.7367192418008766, iteration: 428169
loss: 1.0188877582550049,grad_norm: 0.9378547665732743, iteration: 428170
loss: 0.999093234539032,grad_norm: 0.7962714766852169, iteration: 428171
loss: 0.9984117746353149,grad_norm: 0.9052047535520217, iteration: 428172
loss: 1.0398756265640259,grad_norm: 0.7760770210266884, iteration: 428173
loss: 1.0065346956253052,grad_norm: 0.7536731809855528, iteration: 428174
loss: 1.0200791358947754,grad_norm: 0.8863970163552869, iteration: 428175
loss: 0.9748453497886658,grad_norm: 0.7477666768804311, iteration: 428176
loss: 1.032646656036377,grad_norm: 0.6732537275670243, iteration: 428177
loss: 1.0145444869995117,grad_norm: 0.7588480503560018, iteration: 428178
loss: 1.0030512809753418,grad_norm: 0.8358977814594815, iteration: 428179
loss: 0.9994138479232788,grad_norm: 0.8057905304472082, iteration: 428180
loss: 1.0252622365951538,grad_norm: 0.7958985211042268, iteration: 428181
loss: 1.0112175941467285,grad_norm: 0.7509292793289521, iteration: 428182
loss: 0.9774878025054932,grad_norm: 0.729623632257443, iteration: 428183
loss: 0.985612690448761,grad_norm: 0.7873950092227712, iteration: 428184
loss: 0.9716536402702332,grad_norm: 0.7094106168043602, iteration: 428185
loss: 0.9815837144851685,grad_norm: 0.7290879555828322, iteration: 428186
loss: 1.0056774616241455,grad_norm: 0.7898323621235497, iteration: 428187
loss: 1.0179331302642822,grad_norm: 0.6365144357305049, iteration: 428188
loss: 1.0274385213851929,grad_norm: 0.9999998411961548, iteration: 428189
loss: 0.966601550579071,grad_norm: 0.7925651695577891, iteration: 428190
loss: 1.0129228830337524,grad_norm: 0.786371310440484, iteration: 428191
loss: 0.9785578846931458,grad_norm: 0.7006813510228626, iteration: 428192
loss: 1.0146749019622803,grad_norm: 0.8239577777822404, iteration: 428193
loss: 0.99102783203125,grad_norm: 0.8330619066257705, iteration: 428194
loss: 0.9719006419181824,grad_norm: 0.8729622207096996, iteration: 428195
loss: 0.9890838861465454,grad_norm: 0.6999940741596388, iteration: 428196
loss: 1.0144646167755127,grad_norm: 0.999999317772623, iteration: 428197
loss: 0.9929930567741394,grad_norm: 0.789451680581565, iteration: 428198
loss: 0.9759610295295715,grad_norm: 0.7224178067122023, iteration: 428199
loss: 0.9912797212600708,grad_norm: 0.8091574796476915, iteration: 428200
loss: 1.0238316059112549,grad_norm: 0.7575132935353113, iteration: 428201
loss: 0.9902278780937195,grad_norm: 0.7591874513624757, iteration: 428202
loss: 1.0220304727554321,grad_norm: 0.8379806840141466, iteration: 428203
loss: 1.0103237628936768,grad_norm: 0.819983747555186, iteration: 428204
loss: 0.9736689925193787,grad_norm: 0.9242056935576037, iteration: 428205
loss: 1.0195112228393555,grad_norm: 0.8661853733192215, iteration: 428206
loss: 0.9820858836174011,grad_norm: 0.6687421671880431, iteration: 428207
loss: 0.9936730861663818,grad_norm: 0.8331895639711097, iteration: 428208
loss: 0.9782704710960388,grad_norm: 0.7813632302855001, iteration: 428209
loss: 1.007175087928772,grad_norm: 0.7063252581016197, iteration: 428210
loss: 1.0089067220687866,grad_norm: 0.7581035399944094, iteration: 428211
loss: 0.9713780283927917,grad_norm: 0.6983869338441946, iteration: 428212
loss: 0.9702476263046265,grad_norm: 0.774399529609147, iteration: 428213
loss: 0.9990187883377075,grad_norm: 0.7889887014621391, iteration: 428214
loss: 1.0249251127243042,grad_norm: 0.9034694219036247, iteration: 428215
loss: 1.0138038396835327,grad_norm: 0.8513619529595085, iteration: 428216
loss: 1.056247591972351,grad_norm: 0.8564471588414381, iteration: 428217
loss: 1.0033948421478271,grad_norm: 0.845464488401837, iteration: 428218
loss: 1.0025156736373901,grad_norm: 0.8375385871754941, iteration: 428219
loss: 1.0241293907165527,grad_norm: 0.829675794666132, iteration: 428220
loss: 1.0162670612335205,grad_norm: 0.8355096520970852, iteration: 428221
loss: 0.989178478717804,grad_norm: 0.8495904721177084, iteration: 428222
loss: 1.0145201683044434,grad_norm: 0.8704587571903459, iteration: 428223
loss: 1.0274856090545654,grad_norm: 0.724423936888578, iteration: 428224
loss: 1.020789384841919,grad_norm: 0.9999993354027686, iteration: 428225
loss: 0.9623586535453796,grad_norm: 0.7637554711827997, iteration: 428226
loss: 0.9938984513282776,grad_norm: 0.7945274361198955, iteration: 428227
loss: 0.9746834635734558,grad_norm: 0.6416563976665228, iteration: 428228
loss: 0.9761874675750732,grad_norm: 0.7762962252187183, iteration: 428229
loss: 0.9922465682029724,grad_norm: 0.9999992003330896, iteration: 428230
loss: 1.0151907205581665,grad_norm: 0.7898782421659867, iteration: 428231
loss: 0.9984142780303955,grad_norm: 0.6810303834738936, iteration: 428232
loss: 1.059059500694275,grad_norm: 0.8065235195669178, iteration: 428233
loss: 1.0737518072128296,grad_norm: 0.9876649512248447, iteration: 428234
loss: 1.023248553276062,grad_norm: 0.705991045436572, iteration: 428235
loss: 0.9698049426078796,grad_norm: 0.9999991670249125, iteration: 428236
loss: 0.9937259554862976,grad_norm: 0.8020038487654128, iteration: 428237
loss: 0.9901784062385559,grad_norm: 0.7512024500437541, iteration: 428238
loss: 0.9752672910690308,grad_norm: 0.7733190264345939, iteration: 428239
loss: 1.0093791484832764,grad_norm: 0.6518546571123508, iteration: 428240
loss: 1.0101157426834106,grad_norm: 0.7645186555034759, iteration: 428241
loss: 0.9982901215553284,grad_norm: 0.6427005318689069, iteration: 428242
loss: 1.0084069967269897,grad_norm: 0.7795724737518862, iteration: 428243
loss: 0.9810886979103088,grad_norm: 0.9999991494832918, iteration: 428244
loss: 0.977252721786499,grad_norm: 0.8228696034978074, iteration: 428245
loss: 1.0023577213287354,grad_norm: 0.8531055820338515, iteration: 428246
loss: 0.9905899167060852,grad_norm: 0.684133890009271, iteration: 428247
loss: 0.9921534657478333,grad_norm: 0.7356986612038731, iteration: 428248
loss: 1.0406396389007568,grad_norm: 0.9999997830709298, iteration: 428249
loss: 1.002312183380127,grad_norm: 0.8430477250729353, iteration: 428250
loss: 1.0125541687011719,grad_norm: 0.9999991140732546, iteration: 428251
loss: 0.987543523311615,grad_norm: 0.7524447322663345, iteration: 428252
loss: 1.012396216392517,grad_norm: 0.7135878230532041, iteration: 428253
loss: 1.041089415550232,grad_norm: 0.7535542005532538, iteration: 428254
loss: 1.0414317846298218,grad_norm: 0.9999999990852927, iteration: 428255
loss: 0.9899830222129822,grad_norm: 0.6822576841372161, iteration: 428256
loss: 0.9846734404563904,grad_norm: 0.8071754752433443, iteration: 428257
loss: 0.9803394079208374,grad_norm: 0.9125052713637123, iteration: 428258
loss: 1.0788676738739014,grad_norm: 0.8700262726266661, iteration: 428259
loss: 0.9769155979156494,grad_norm: 0.7219138124255622, iteration: 428260
loss: 1.011718511581421,grad_norm: 0.7980428035649569, iteration: 428261
loss: 1.00044584274292,grad_norm: 0.7829236959359146, iteration: 428262
loss: 1.0131930112838745,grad_norm: 0.9524261385417415, iteration: 428263
loss: 0.978850245475769,grad_norm: 0.7356733079084816, iteration: 428264
loss: 1.0112546682357788,grad_norm: 0.8016486481398962, iteration: 428265
loss: 0.9990548491477966,grad_norm: 0.7455080806505335, iteration: 428266
loss: 1.0278751850128174,grad_norm: 0.8972500568246672, iteration: 428267
loss: 0.9682530760765076,grad_norm: 0.9999993727496926, iteration: 428268
loss: 0.9746993780136108,grad_norm: 0.83000001457181, iteration: 428269
loss: 1.0115399360656738,grad_norm: 0.9063018461195832, iteration: 428270
loss: 0.9998922944068909,grad_norm: 0.7528836904099878, iteration: 428271
loss: 0.9645527005195618,grad_norm: 0.7157733296241242, iteration: 428272
loss: 1.039035439491272,grad_norm: 0.9999997533334143, iteration: 428273
loss: 1.0247979164123535,grad_norm: 0.735338028847606, iteration: 428274
loss: 1.0185997486114502,grad_norm: 0.7883215525541444, iteration: 428275
loss: 0.9889838695526123,grad_norm: 0.7632768923692382, iteration: 428276
loss: 1.027108907699585,grad_norm: 0.6931168444323497, iteration: 428277
loss: 0.9757583737373352,grad_norm: 0.7386278995998323, iteration: 428278
loss: 0.9555280804634094,grad_norm: 0.7267319268033849, iteration: 428279
loss: 0.9434363842010498,grad_norm: 0.8820384793981344, iteration: 428280
loss: 0.9776604175567627,grad_norm: 0.6833515129066162, iteration: 428281
loss: 1.0405433177947998,grad_norm: 0.9999994047820558, iteration: 428282
loss: 0.9577496647834778,grad_norm: 0.9039795878833375, iteration: 428283
loss: 0.9683824181556702,grad_norm: 0.818145436520738, iteration: 428284
loss: 1.0086774826049805,grad_norm: 0.9999991298956855, iteration: 428285
loss: 0.9855855107307434,grad_norm: 0.7824445348957828, iteration: 428286
loss: 0.9989721179008484,grad_norm: 0.7997475563850727, iteration: 428287
loss: 1.0294203758239746,grad_norm: 0.9714987469214691, iteration: 428288
loss: 1.00799560546875,grad_norm: 0.7668232349730023, iteration: 428289
loss: 1.049407958984375,grad_norm: 0.7754730457000798, iteration: 428290
loss: 1.0272963047027588,grad_norm: 0.725137964530633, iteration: 428291
loss: 0.9791432023048401,grad_norm: 0.8236607849157918, iteration: 428292
loss: 0.991878867149353,grad_norm: 0.7603183096772184, iteration: 428293
loss: 1.0529725551605225,grad_norm: 0.8569912976372097, iteration: 428294
loss: 0.9900498986244202,grad_norm: 0.6996586849657384, iteration: 428295
loss: 0.9780347943305969,grad_norm: 0.7232740551472898, iteration: 428296
loss: 1.0453165769577026,grad_norm: 0.809893674359548, iteration: 428297
loss: 0.998613178730011,grad_norm: 0.7130901230026864, iteration: 428298
loss: 1.0219454765319824,grad_norm: 0.7615939320203899, iteration: 428299
loss: 1.0321557521820068,grad_norm: 0.9999990462706703, iteration: 428300
loss: 0.9762625694274902,grad_norm: 0.6919398330582236, iteration: 428301
loss: 1.028096079826355,grad_norm: 0.8313448351027246, iteration: 428302
loss: 0.9990471005439758,grad_norm: 0.8029005575001504, iteration: 428303
loss: 1.0190707445144653,grad_norm: 0.7928414404528253, iteration: 428304
loss: 1.0020204782485962,grad_norm: 0.7995669979772941, iteration: 428305
loss: 1.0017428398132324,grad_norm: 0.8241327345198866, iteration: 428306
loss: 0.9890840649604797,grad_norm: 0.8398490314789514, iteration: 428307
loss: 1.0452598333358765,grad_norm: 0.7722539052091135, iteration: 428308
loss: 0.9987431168556213,grad_norm: 0.6575756839555659, iteration: 428309
loss: 1.0251864194869995,grad_norm: 0.7763828116785481, iteration: 428310
loss: 0.9796637892723083,grad_norm: 0.6757609597244035, iteration: 428311
loss: 1.0180392265319824,grad_norm: 0.7338305954839867, iteration: 428312
loss: 1.0667303800582886,grad_norm: 0.7004466244101671, iteration: 428313
loss: 1.0162694454193115,grad_norm: 0.908456829503359, iteration: 428314
loss: 1.0440864562988281,grad_norm: 0.8104120164629962, iteration: 428315
loss: 0.9577708840370178,grad_norm: 0.7330270962798289, iteration: 428316
loss: 1.003696084022522,grad_norm: 0.9999992289259203, iteration: 428317
loss: 1.028711199760437,grad_norm: 0.994586168995028, iteration: 428318
loss: 0.9864457845687866,grad_norm: 0.7717562565423065, iteration: 428319
loss: 0.9926977157592773,grad_norm: 0.7952307471704122, iteration: 428320
loss: 0.9844765663146973,grad_norm: 0.7870875254973554, iteration: 428321
loss: 1.0201539993286133,grad_norm: 0.7707910136094481, iteration: 428322
loss: 1.0216238498687744,grad_norm: 0.6522714482696695, iteration: 428323
loss: 1.0003024339675903,grad_norm: 0.9999989980486644, iteration: 428324
loss: 1.0430846214294434,grad_norm: 0.7967240701603001, iteration: 428325
loss: 0.9868515729904175,grad_norm: 0.7255164271891632, iteration: 428326
loss: 0.98250812292099,grad_norm: 0.6889867771390699, iteration: 428327
loss: 0.9804897308349609,grad_norm: 0.8692692595006445, iteration: 428328
loss: 0.9929855465888977,grad_norm: 0.803090438965758, iteration: 428329
loss: 1.0863006114959717,grad_norm: 0.9999994726493435, iteration: 428330
loss: 1.0059152841567993,grad_norm: 0.9110598643896519, iteration: 428331
loss: 0.9613364934921265,grad_norm: 0.8019182165056331, iteration: 428332
loss: 1.105277180671692,grad_norm: 0.9999996336459627, iteration: 428333
loss: 1.179464340209961,grad_norm: 0.9999993964217596, iteration: 428334
loss: 1.0149532556533813,grad_norm: 0.719599449344725, iteration: 428335
loss: 0.9626607298851013,grad_norm: 0.9999991211337893, iteration: 428336
loss: 1.0042822360992432,grad_norm: 0.7563268806213761, iteration: 428337
loss: 0.9857084155082703,grad_norm: 0.9264576415285597, iteration: 428338
loss: 0.9841071367263794,grad_norm: 0.8229871957969789, iteration: 428339
loss: 0.9772698283195496,grad_norm: 0.7288039885970711, iteration: 428340
loss: 1.0063908100128174,grad_norm: 0.758490679275947, iteration: 428341
loss: 0.9678488373756409,grad_norm: 0.8581834806676171, iteration: 428342
loss: 1.0072083473205566,grad_norm: 0.697651433204928, iteration: 428343
loss: 0.980614185333252,grad_norm: 0.7512804309778338, iteration: 428344
loss: 0.9947240948677063,grad_norm: 0.8832407447437409, iteration: 428345
loss: 0.9996742606163025,grad_norm: 0.795811985360691, iteration: 428346
loss: 0.9607962369918823,grad_norm: 0.7520938404782528, iteration: 428347
loss: 1.0076992511749268,grad_norm: 0.6894037414940595, iteration: 428348
loss: 1.002364993095398,grad_norm: 0.9393961748192733, iteration: 428349
loss: 1.0225683450698853,grad_norm: 0.8263001632439367, iteration: 428350
loss: 1.016567349433899,grad_norm: 0.999999176020813, iteration: 428351
loss: 1.020013451576233,grad_norm: 0.9999993138318672, iteration: 428352
loss: 1.033060908317566,grad_norm: 0.7768238683845053, iteration: 428353
loss: 1.0014904737472534,grad_norm: 0.813741422328334, iteration: 428354
loss: 1.0105605125427246,grad_norm: 0.7471937160319271, iteration: 428355
loss: 1.0136120319366455,grad_norm: 0.7206289432127416, iteration: 428356
loss: 1.0266196727752686,grad_norm: 0.710317732096755, iteration: 428357
loss: 0.990682065486908,grad_norm: 0.7495761606770167, iteration: 428358
loss: 0.963538408279419,grad_norm: 0.9969778575070397, iteration: 428359
loss: 0.9700537919998169,grad_norm: 0.7037025616379344, iteration: 428360
loss: 1.0082651376724243,grad_norm: 0.7279390574105363, iteration: 428361
loss: 0.9970616102218628,grad_norm: 0.7508038077641548, iteration: 428362
loss: 0.9901478886604309,grad_norm: 0.7027101578875717, iteration: 428363
loss: 1.0005842447280884,grad_norm: 0.7301763901695788, iteration: 428364
loss: 0.9893910884857178,grad_norm: 0.7544975205801059, iteration: 428365
loss: 1.0391287803649902,grad_norm: 0.9999998193790295, iteration: 428366
loss: 1.0183998346328735,grad_norm: 0.8862335123326255, iteration: 428367
loss: 0.9710667729377747,grad_norm: 0.8069420757575047, iteration: 428368
loss: 1.0147571563720703,grad_norm: 0.7507005598578559, iteration: 428369
loss: 0.9877949357032776,grad_norm: 0.9999989993661592, iteration: 428370
loss: 1.0193718671798706,grad_norm: 0.9921357932809918, iteration: 428371
loss: 1.011895775794983,grad_norm: 0.637597527396438, iteration: 428372
loss: 0.9800025820732117,grad_norm: 0.7196379636783368, iteration: 428373
loss: 0.9717009663581848,grad_norm: 0.9999995135759071, iteration: 428374
loss: 0.99093097448349,grad_norm: 0.6207282642632536, iteration: 428375
loss: 0.9984718561172485,grad_norm: 0.6991518956654343, iteration: 428376
loss: 1.0158687829971313,grad_norm: 0.7316889848735888, iteration: 428377
loss: 0.9973491430282593,grad_norm: 0.6171009340767436, iteration: 428378
loss: 1.0192461013793945,grad_norm: 0.7142552903963275, iteration: 428379
loss: 1.049041509628296,grad_norm: 0.8942307868427486, iteration: 428380
loss: 1.0152840614318848,grad_norm: 0.9999993562495167, iteration: 428381
loss: 1.0138146877288818,grad_norm: 0.9999998903518459, iteration: 428382
loss: 0.9898423552513123,grad_norm: 0.7497246348193133, iteration: 428383
loss: 0.988506019115448,grad_norm: 0.7551704874976036, iteration: 428384
loss: 0.9953480958938599,grad_norm: 0.8171968605273483, iteration: 428385
loss: 1.0013601779937744,grad_norm: 0.8680296949032091, iteration: 428386
loss: 1.0083900690078735,grad_norm: 0.7244158614556129, iteration: 428387
loss: 1.009896159172058,grad_norm: 0.7982149222960558, iteration: 428388
loss: 1.0158004760742188,grad_norm: 0.8775677084137261, iteration: 428389
loss: 1.0155153274536133,grad_norm: 0.8149906070268329, iteration: 428390
loss: 0.9979556202888489,grad_norm: 0.7361153589515006, iteration: 428391
loss: 1.0226233005523682,grad_norm: 0.7111202090023477, iteration: 428392
loss: 0.9883731603622437,grad_norm: 0.6928984084534558, iteration: 428393
loss: 1.0103245973587036,grad_norm: 0.7300505929694049, iteration: 428394
loss: 1.0042492151260376,grad_norm: 0.7855098665516931, iteration: 428395
loss: 0.9930809736251831,grad_norm: 0.8475450957249575, iteration: 428396
loss: 1.0636168718338013,grad_norm: 0.8377230940664718, iteration: 428397
loss: 1.0045483112335205,grad_norm: 0.9999991047230683, iteration: 428398
loss: 0.9594310522079468,grad_norm: 0.8456516521532373, iteration: 428399
loss: 0.9755558371543884,grad_norm: 0.8173856021948803, iteration: 428400
loss: 0.9869216084480286,grad_norm: 0.7891302390677541, iteration: 428401
loss: 1.0256681442260742,grad_norm: 0.8166991275154631, iteration: 428402
loss: 1.0000977516174316,grad_norm: 0.7016256868103231, iteration: 428403
loss: 1.012468934059143,grad_norm: 0.6571985533305094, iteration: 428404
loss: 1.0146147012710571,grad_norm: 0.6969688761857087, iteration: 428405
loss: 0.9785326719284058,grad_norm: 0.8229165929149811, iteration: 428406
loss: 1.0092089176177979,grad_norm: 0.7176321321822707, iteration: 428407
loss: 0.9675796031951904,grad_norm: 0.8222198462894885, iteration: 428408
loss: 1.0076696872711182,grad_norm: 0.9358832752518524, iteration: 428409
loss: 1.0226614475250244,grad_norm: 0.7800224804262887, iteration: 428410
loss: 0.9688792824745178,grad_norm: 0.7076209598069316, iteration: 428411
loss: 0.9683595895767212,grad_norm: 0.7998749803964863, iteration: 428412
loss: 1.0692603588104248,grad_norm: 0.9693546154138412, iteration: 428413
loss: 0.9919078946113586,grad_norm: 0.7806387323117858, iteration: 428414
loss: 0.9991363286972046,grad_norm: 0.6709402046674323, iteration: 428415
loss: 0.9652299880981445,grad_norm: 0.7900277775314389, iteration: 428416
loss: 0.9522367715835571,grad_norm: 0.8543855649715814, iteration: 428417
loss: 0.9864610433578491,grad_norm: 0.8176096570580448, iteration: 428418
loss: 1.0120161771774292,grad_norm: 0.7430785657559297, iteration: 428419
loss: 1.0029467344284058,grad_norm: 0.7397761172534757, iteration: 428420
loss: 1.0411193370819092,grad_norm: 0.7529506079561327, iteration: 428421
loss: 0.9985122680664062,grad_norm: 0.7496180624602479, iteration: 428422
loss: 1.004782795906067,grad_norm: 0.7945125656802823, iteration: 428423
loss: 0.9883809685707092,grad_norm: 0.9999998360559977, iteration: 428424
loss: 0.9838193655014038,grad_norm: 0.7914965307036996, iteration: 428425
loss: 0.9823669791221619,grad_norm: 0.757182392052043, iteration: 428426
loss: 1.0194836854934692,grad_norm: 0.7593349247194444, iteration: 428427
loss: 0.9658119678497314,grad_norm: 0.7150573906213525, iteration: 428428
loss: 0.9894278645515442,grad_norm: 0.892531109358629, iteration: 428429
loss: 0.9848507046699524,grad_norm: 0.8830314515845029, iteration: 428430
loss: 1.0087621212005615,grad_norm: 0.7074712641258644, iteration: 428431
loss: 0.9871729016304016,grad_norm: 0.7558402093150162, iteration: 428432
loss: 1.0029163360595703,grad_norm: 0.671636497384764, iteration: 428433
loss: 1.0567314624786377,grad_norm: 0.9356927409051481, iteration: 428434
loss: 1.0023193359375,grad_norm: 0.9384599234616394, iteration: 428435
loss: 1.1111048460006714,grad_norm: 0.9999993670777892, iteration: 428436
loss: 1.1167020797729492,grad_norm: 0.9999997612585777, iteration: 428437
loss: 0.9747720956802368,grad_norm: 0.6980573877180811, iteration: 428438
loss: 0.9968016147613525,grad_norm: 0.8716411551583702, iteration: 428439
loss: 0.9904249310493469,grad_norm: 0.8994724745551735, iteration: 428440
loss: 1.0104552507400513,grad_norm: 0.8135453970900194, iteration: 428441
loss: 1.0013213157653809,grad_norm: 0.882227093616052, iteration: 428442
loss: 0.9964324831962585,grad_norm: 0.7426175309668108, iteration: 428443
loss: 1.0013469457626343,grad_norm: 0.748866873047647, iteration: 428444
loss: 0.9984900951385498,grad_norm: 0.8074079447875133, iteration: 428445
loss: 0.9589313268661499,grad_norm: 0.7457589384689126, iteration: 428446
loss: 1.0121725797653198,grad_norm: 0.704547162541321, iteration: 428447
loss: 1.1173701286315918,grad_norm: 0.9490271405786578, iteration: 428448
loss: 0.9677715301513672,grad_norm: 0.886347034520938, iteration: 428449
loss: 0.933465301990509,grad_norm: 0.9717961675336465, iteration: 428450
loss: 1.0043498277664185,grad_norm: 0.7812242228716135, iteration: 428451
loss: 0.9756785035133362,grad_norm: 0.7629532113229347, iteration: 428452
loss: 0.9980702996253967,grad_norm: 0.6639486640606294, iteration: 428453
loss: 0.9864596128463745,grad_norm: 0.7850939092558598, iteration: 428454
loss: 1.0166549682617188,grad_norm: 0.8706126182551137, iteration: 428455
loss: 0.9818289279937744,grad_norm: 0.8456474520009728, iteration: 428456
loss: 1.1050851345062256,grad_norm: 0.9999991864896455, iteration: 428457
loss: 1.0593159198760986,grad_norm: 0.9681420904605894, iteration: 428458
loss: 0.9701142907142639,grad_norm: 0.7959206987907724, iteration: 428459
loss: 1.0122740268707275,grad_norm: 0.6831577989526642, iteration: 428460
loss: 0.9864398837089539,grad_norm: 0.7050049807324309, iteration: 428461
loss: 1.031206488609314,grad_norm: 0.8324999470294612, iteration: 428462
loss: 1.0303198099136353,grad_norm: 0.8028325484606447, iteration: 428463
loss: 1.0071839094161987,grad_norm: 0.876545313558909, iteration: 428464
loss: 0.970460832118988,grad_norm: 0.7637863523585295, iteration: 428465
loss: 1.0033224821090698,grad_norm: 0.7294244475698461, iteration: 428466
loss: 1.0031797885894775,grad_norm: 0.8369876631393987, iteration: 428467
loss: 0.9915051460266113,grad_norm: 0.8332819453616197, iteration: 428468
loss: 1.0085736513137817,grad_norm: 0.6978009636557033, iteration: 428469
loss: 1.0349088907241821,grad_norm: 0.7734360217497415, iteration: 428470
loss: 0.9688500761985779,grad_norm: 0.8895524463284571, iteration: 428471
loss: 0.9853423833847046,grad_norm: 0.8080832157246156, iteration: 428472
loss: 1.0065890550613403,grad_norm: 0.7639616968012092, iteration: 428473
loss: 0.9795476198196411,grad_norm: 0.8411461206372207, iteration: 428474
loss: 1.085876703262329,grad_norm: 0.9910171553996723, iteration: 428475
loss: 1.0599143505096436,grad_norm: 0.7667620048422555, iteration: 428476
loss: 0.9648204445838928,grad_norm: 0.7356793444165406, iteration: 428477
loss: 1.0070738792419434,grad_norm: 0.7415804046468739, iteration: 428478
loss: 0.9752053022384644,grad_norm: 0.916487926535361, iteration: 428479
loss: 1.0536620616912842,grad_norm: 0.800456066293377, iteration: 428480
loss: 1.108681559562683,grad_norm: 0.8535607790599471, iteration: 428481
loss: 1.008988380432129,grad_norm: 0.6892548237811157, iteration: 428482
loss: 0.9795764088630676,grad_norm: 0.7754871444964272, iteration: 428483
loss: 0.9825628399848938,grad_norm: 0.7533069395291414, iteration: 428484
loss: 1.0532633066177368,grad_norm: 0.7575673573628711, iteration: 428485
loss: 0.9771823287010193,grad_norm: 0.7615024558101974, iteration: 428486
loss: 0.9976047873497009,grad_norm: 0.879227414802977, iteration: 428487
loss: 1.0136282444000244,grad_norm: 0.7803723386494698, iteration: 428488
loss: 1.014278769493103,grad_norm: 0.7940914001642022, iteration: 428489
loss: 0.9499208331108093,grad_norm: 0.8054899374159048, iteration: 428490
loss: 0.981519341468811,grad_norm: 0.7704980637996616, iteration: 428491
loss: 0.9284050464630127,grad_norm: 0.8836643509094532, iteration: 428492
loss: 1.008435845375061,grad_norm: 0.7989617024424436, iteration: 428493
loss: 0.9832009077072144,grad_norm: 0.6682376679713012, iteration: 428494
loss: 1.0160603523254395,grad_norm: 0.6045000677343061, iteration: 428495
loss: 0.9917033910751343,grad_norm: 0.999999577167935, iteration: 428496
loss: 1.0351674556732178,grad_norm: 0.8352827412262249, iteration: 428497
loss: 1.0233570337295532,grad_norm: 0.9991780471352503, iteration: 428498
loss: 1.0930333137512207,grad_norm: 0.9999992929934401, iteration: 428499
loss: 1.0084331035614014,grad_norm: 0.8187522528838191, iteration: 428500
loss: 1.0019643306732178,grad_norm: 0.7705872394121631, iteration: 428501
loss: 1.002611756324768,grad_norm: 0.8530381600625375, iteration: 428502
loss: 1.0229724645614624,grad_norm: 0.8052649999499283, iteration: 428503
loss: 1.0793489217758179,grad_norm: 0.9999991891294998, iteration: 428504
loss: 0.9936085343360901,grad_norm: 0.7318924937066895, iteration: 428505
loss: 1.0076520442962646,grad_norm: 0.7016425296151851, iteration: 428506
loss: 0.9983952641487122,grad_norm: 0.8585907401107693, iteration: 428507
loss: 0.9925122857093811,grad_norm: 0.8667540395740486, iteration: 428508
loss: 1.016461968421936,grad_norm: 0.8601054185787163, iteration: 428509
loss: 1.0205408334732056,grad_norm: 0.6982955014273876, iteration: 428510
loss: 0.9842816591262817,grad_norm: 0.7176621347065386, iteration: 428511
loss: 0.970115602016449,grad_norm: 0.7142560517478755, iteration: 428512
loss: 0.9999917149543762,grad_norm: 0.9143660624679991, iteration: 428513
loss: 1.0274109840393066,grad_norm: 0.9999995744493747, iteration: 428514
loss: 1.0247701406478882,grad_norm: 0.9684918900732384, iteration: 428515
loss: 1.0085867643356323,grad_norm: 0.9303396164471509, iteration: 428516
loss: 1.0454424619674683,grad_norm: 0.7532105938013359, iteration: 428517
loss: 1.0967092514038086,grad_norm: 0.9999990778889815, iteration: 428518
loss: 1.1221859455108643,grad_norm: 0.9999991311777947, iteration: 428519
loss: 0.9723236560821533,grad_norm: 0.7073768311278319, iteration: 428520
loss: 1.0147371292114258,grad_norm: 0.7096812801320422, iteration: 428521
loss: 0.9875416159629822,grad_norm: 0.7960024040145192, iteration: 428522
loss: 1.0133625268936157,grad_norm: 0.8949495789961062, iteration: 428523
loss: 0.9888216853141785,grad_norm: 0.714134796502085, iteration: 428524
loss: 1.0116784572601318,grad_norm: 0.962680825943535, iteration: 428525
loss: 1.0030924081802368,grad_norm: 0.999999879634813, iteration: 428526
loss: 1.042731523513794,grad_norm: 0.7871310687755086, iteration: 428527
loss: 0.991642415523529,grad_norm: 0.722517296347971, iteration: 428528
loss: 1.0247657299041748,grad_norm: 0.7689929432964193, iteration: 428529
loss: 1.0221505165100098,grad_norm: 0.8038147367786282, iteration: 428530
loss: 1.1082561016082764,grad_norm: 0.7989436394880037, iteration: 428531
loss: 1.0533922910690308,grad_norm: 0.9999990574811657, iteration: 428532
loss: 1.0181821584701538,grad_norm: 0.9936037673566713, iteration: 428533
loss: 1.0154824256896973,grad_norm: 0.777746316838641, iteration: 428534
loss: 1.0712047815322876,grad_norm: 0.9999999555379464, iteration: 428535
loss: 0.9907165765762329,grad_norm: 0.7452734443325137, iteration: 428536
loss: 1.032623291015625,grad_norm: 0.9999993024012768, iteration: 428537
loss: 0.9721956849098206,grad_norm: 0.7288958204060719, iteration: 428538
loss: 0.9896757006645203,grad_norm: 0.7638188249384049, iteration: 428539
loss: 1.0280112028121948,grad_norm: 0.7004024335830032, iteration: 428540
loss: 1.0157750844955444,grad_norm: 0.6802012808668542, iteration: 428541
loss: 1.006817102432251,grad_norm: 0.7717040262893263, iteration: 428542
loss: 1.0420578718185425,grad_norm: 0.8803269705902699, iteration: 428543
loss: 0.9997089505195618,grad_norm: 0.9845424857343822, iteration: 428544
loss: 1.0154764652252197,grad_norm: 0.77042902935298, iteration: 428545
loss: 0.9717581868171692,grad_norm: 0.9999999393283473, iteration: 428546
loss: 1.0083087682724,grad_norm: 0.76868398158441, iteration: 428547
loss: 0.9921678304672241,grad_norm: 0.6482434065964758, iteration: 428548
loss: 1.0122621059417725,grad_norm: 0.7007670662662037, iteration: 428549
loss: 0.98676997423172,grad_norm: 0.7846139116117904, iteration: 428550
loss: 1.009325623512268,grad_norm: 0.8171623829847618, iteration: 428551
loss: 1.0843662023544312,grad_norm: 0.8949050932696424, iteration: 428552
loss: 0.9944756031036377,grad_norm: 0.8056459198321818, iteration: 428553
loss: 1.0003299713134766,grad_norm: 0.747058769973635, iteration: 428554
loss: 0.9842358231544495,grad_norm: 0.835348973951363, iteration: 428555
loss: 1.0312831401824951,grad_norm: 0.6555817787138218, iteration: 428556
loss: 1.009570837020874,grad_norm: 0.8884077621686469, iteration: 428557
loss: 1.0592336654663086,grad_norm: 0.6257088900393784, iteration: 428558
loss: 1.0003712177276611,grad_norm: 0.9999998108625877, iteration: 428559
loss: 0.9859824776649475,grad_norm: 0.7469817045809127, iteration: 428560
loss: 0.9711176156997681,grad_norm: 0.712725162582258, iteration: 428561
loss: 0.9961371421813965,grad_norm: 0.7953278480280267, iteration: 428562
loss: 0.9936463236808777,grad_norm: 0.7391901422764228, iteration: 428563
loss: 0.956761360168457,grad_norm: 0.8738814873458471, iteration: 428564
loss: 0.9777014851570129,grad_norm: 0.9119621620604318, iteration: 428565
loss: 0.9943214058876038,grad_norm: 0.9509828819448704, iteration: 428566
loss: 1.0361758470535278,grad_norm: 0.7173916609847077, iteration: 428567
loss: 0.9995442628860474,grad_norm: 0.7331081985683267, iteration: 428568
loss: 1.0021365880966187,grad_norm: 0.9000622791464412, iteration: 428569
loss: 1.033675193786621,grad_norm: 0.7243000619037215, iteration: 428570
loss: 1.0095309019088745,grad_norm: 0.8717616088960214, iteration: 428571
loss: 1.0053341388702393,grad_norm: 0.7835949549862243, iteration: 428572
loss: 0.9938763976097107,grad_norm: 0.7750239139940988, iteration: 428573
loss: 0.974709689617157,grad_norm: 0.7706397560731166, iteration: 428574
loss: 1.0477014780044556,grad_norm: 0.9999992196973486, iteration: 428575
loss: 1.012298583984375,grad_norm: 0.8721682245076594, iteration: 428576
loss: 0.9879897832870483,grad_norm: 0.875157388916921, iteration: 428577
loss: 1.019403338432312,grad_norm: 0.6058879721953674, iteration: 428578
loss: 0.9820664525032043,grad_norm: 0.8029945061650123, iteration: 428579
loss: 0.968044102191925,grad_norm: 0.5985647961808257, iteration: 428580
loss: 1.0366380214691162,grad_norm: 0.7520107371155482, iteration: 428581
loss: 0.9584347009658813,grad_norm: 0.854508478710648, iteration: 428582
loss: 0.9733370542526245,grad_norm: 0.6969775262589007, iteration: 428583
loss: 0.9989574551582336,grad_norm: 0.7872818269687553, iteration: 428584
loss: 1.0193628072738647,grad_norm: 0.868617450861895, iteration: 428585
loss: 0.9871766567230225,grad_norm: 0.7493038830029499, iteration: 428586
loss: 0.9566444754600525,grad_norm: 0.9999992974084532, iteration: 428587
loss: 1.0058196783065796,grad_norm: 0.6811402719917531, iteration: 428588
loss: 0.9939579963684082,grad_norm: 0.8264180630956651, iteration: 428589
loss: 1.00581693649292,grad_norm: 0.8714683887409933, iteration: 428590
loss: 1.024410367012024,grad_norm: 0.7041473136850097, iteration: 428591
loss: 0.9832858443260193,grad_norm: 0.879440841384057, iteration: 428592
loss: 1.0462646484375,grad_norm: 0.8799974093310089, iteration: 428593
loss: 1.0444355010986328,grad_norm: 0.9397854877013783, iteration: 428594
loss: 1.0015813112258911,grad_norm: 0.7350352969160511, iteration: 428595
loss: 0.9691066145896912,grad_norm: 0.7721722733628622, iteration: 428596
loss: 1.0101568698883057,grad_norm: 0.8253757447150186, iteration: 428597
loss: 0.9622346758842468,grad_norm: 0.8981655615996522, iteration: 428598
loss: 0.9806778430938721,grad_norm: 0.7454874835319982, iteration: 428599
loss: 0.9981289505958557,grad_norm: 0.8985895855199897, iteration: 428600
loss: 1.0111651420593262,grad_norm: 0.8552557121638702, iteration: 428601
loss: 1.020742654800415,grad_norm: 0.9134711609582087, iteration: 428602
loss: 1.0126545429229736,grad_norm: 0.7817850121476172, iteration: 428603
loss: 1.0535852909088135,grad_norm: 0.9530182264930898, iteration: 428604
loss: 0.9660473465919495,grad_norm: 0.7250074940707232, iteration: 428605
loss: 1.0257277488708496,grad_norm: 0.826231655390666, iteration: 428606
loss: 1.0262726545333862,grad_norm: 0.7089906026281824, iteration: 428607
loss: 0.9882189631462097,grad_norm: 0.694390066862945, iteration: 428608
loss: 0.9949813485145569,grad_norm: 0.9999992455884842, iteration: 428609
loss: 0.9796869158744812,grad_norm: 0.8699717324930527, iteration: 428610
loss: 1.0043082237243652,grad_norm: 0.7682302074015428, iteration: 428611
loss: 0.973763644695282,grad_norm: 0.700732004175493, iteration: 428612
loss: 0.9902974963188171,grad_norm: 0.8941513154194036, iteration: 428613
loss: 1.036847710609436,grad_norm: 0.8772279251227441, iteration: 428614
loss: 1.0123380422592163,grad_norm: 0.7435146474777532, iteration: 428615
loss: 0.9739227294921875,grad_norm: 0.669340141560798, iteration: 428616
loss: 0.9970253705978394,grad_norm: 0.7115336673365292, iteration: 428617
loss: 1.0135905742645264,grad_norm: 0.7782277824049173, iteration: 428618
loss: 1.0479024648666382,grad_norm: 0.9398135162691154, iteration: 428619
loss: 0.9934080839157104,grad_norm: 0.7984380538857007, iteration: 428620
loss: 0.9590153098106384,grad_norm: 0.8753542532566126, iteration: 428621
loss: 1.0083025693893433,grad_norm: 0.9999997445735388, iteration: 428622
loss: 1.0347844362258911,grad_norm: 0.8537861879823226, iteration: 428623
loss: 1.032469391822815,grad_norm: 0.9332619815589909, iteration: 428624
loss: 0.97319096326828,grad_norm: 0.6755336634992526, iteration: 428625
loss: 1.067083716392517,grad_norm: 0.9999993776791387, iteration: 428626
loss: 0.9938242435455322,grad_norm: 0.8191542703401503, iteration: 428627
loss: 1.0181328058242798,grad_norm: 0.9775442280458978, iteration: 428628
loss: 0.9769986271858215,grad_norm: 0.9213951980087347, iteration: 428629
loss: 1.0242878198623657,grad_norm: 0.8389873239850262, iteration: 428630
loss: 1.0504424571990967,grad_norm: 0.7915646196820275, iteration: 428631
loss: 1.0042229890823364,grad_norm: 0.9313705896416621, iteration: 428632
loss: 0.9644948840141296,grad_norm: 0.7409452925882383, iteration: 428633
loss: 0.9669124484062195,grad_norm: 0.6897081528256789, iteration: 428634
loss: 0.9656346440315247,grad_norm: 0.9999991558164776, iteration: 428635
loss: 0.9981771111488342,grad_norm: 0.8684686650947272, iteration: 428636
loss: 0.955183744430542,grad_norm: 0.8109782451746979, iteration: 428637
loss: 1.0701922178268433,grad_norm: 0.9999999260453846, iteration: 428638
loss: 0.9521830677986145,grad_norm: 0.7142324825009704, iteration: 428639
loss: 0.9890435338020325,grad_norm: 0.8373644257927354, iteration: 428640
loss: 0.9733718037605286,grad_norm: 0.6980575214911998, iteration: 428641
loss: 1.138433575630188,grad_norm: 0.9999993969661589, iteration: 428642
loss: 1.046242117881775,grad_norm: 0.717221357445141, iteration: 428643
loss: 1.040515422821045,grad_norm: 0.6953140099567835, iteration: 428644
loss: 0.9702693223953247,grad_norm: 0.6645622564783378, iteration: 428645
loss: 1.0101872682571411,grad_norm: 0.7541964284053211, iteration: 428646
loss: 1.0653376579284668,grad_norm: 0.8440252700806458, iteration: 428647
loss: 0.9729688763618469,grad_norm: 0.7158104822672415, iteration: 428648
loss: 0.9622321724891663,grad_norm: 0.7155711900137248, iteration: 428649
loss: 1.0073013305664062,grad_norm: 0.6267620655469291, iteration: 428650
loss: 1.0044403076171875,grad_norm: 0.7480806880696872, iteration: 428651
loss: 1.004859447479248,grad_norm: 0.743836319929002, iteration: 428652
loss: 1.0162684917449951,grad_norm: 0.7335042899295375, iteration: 428653
loss: 0.9926636219024658,grad_norm: 0.9167566608378815, iteration: 428654
loss: 1.0193308591842651,grad_norm: 0.6516469670514151, iteration: 428655
loss: 1.0176031589508057,grad_norm: 0.9270754166568649, iteration: 428656
loss: 1.0086082220077515,grad_norm: 0.762506678132787, iteration: 428657
loss: 0.9806848764419556,grad_norm: 0.9999994190576635, iteration: 428658
loss: 1.0818982124328613,grad_norm: 0.8274716842744461, iteration: 428659
loss: 0.99521803855896,grad_norm: 0.9119819674058843, iteration: 428660
loss: 0.9732751846313477,grad_norm: 0.8434099905822163, iteration: 428661
loss: 1.0727245807647705,grad_norm: 0.9792070888435876, iteration: 428662
loss: 0.9891869425773621,grad_norm: 0.9235909824750403, iteration: 428663
loss: 0.9959974884986877,grad_norm: 0.687069492372421, iteration: 428664
loss: 0.9745664596557617,grad_norm: 0.8065041746364606, iteration: 428665
loss: 0.9760650396347046,grad_norm: 0.7158237921585351, iteration: 428666
loss: 0.988963782787323,grad_norm: 0.8361888482666422, iteration: 428667
loss: 0.9885331392288208,grad_norm: 0.8911807837527738, iteration: 428668
loss: 1.0388445854187012,grad_norm: 0.9999995615337458, iteration: 428669
loss: 1.1200475692749023,grad_norm: 0.9999998286297778, iteration: 428670
loss: 1.0383762121200562,grad_norm: 0.7185236754659482, iteration: 428671
loss: 0.968563973903656,grad_norm: 0.6828080289726866, iteration: 428672
loss: 0.9972527027130127,grad_norm: 0.7516119756235464, iteration: 428673
loss: 0.9926525950431824,grad_norm: 0.7213215056344168, iteration: 428674
loss: 1.0042697191238403,grad_norm: 0.8907531126655654, iteration: 428675
loss: 1.0172466039657593,grad_norm: 0.8611832323245339, iteration: 428676
loss: 1.0259956121444702,grad_norm: 0.9999991530985407, iteration: 428677
loss: 0.985248327255249,grad_norm: 0.7537444040301364, iteration: 428678
loss: 0.9632120728492737,grad_norm: 0.7018772500632694, iteration: 428679
loss: 1.0230337381362915,grad_norm: 0.761447646981952, iteration: 428680
loss: 1.023500680923462,grad_norm: 0.9496758792182377, iteration: 428681
loss: 0.9953107833862305,grad_norm: 0.8038251698985508, iteration: 428682
loss: 0.9996705055236816,grad_norm: 0.6925590416760863, iteration: 428683
loss: 0.9945553541183472,grad_norm: 0.8170683476914993, iteration: 428684
loss: 1.1036523580551147,grad_norm: 0.7653537859286118, iteration: 428685
loss: 1.0021247863769531,grad_norm: 0.6825610838759218, iteration: 428686
loss: 1.0135632753372192,grad_norm: 0.6518435837083161, iteration: 428687
loss: 0.9921678304672241,grad_norm: 0.7425609380972764, iteration: 428688
loss: 1.1058014631271362,grad_norm: 1.0000000012513228, iteration: 428689
loss: 1.0039607286453247,grad_norm: 1.00000004894138, iteration: 428690
loss: 0.9597887992858887,grad_norm: 0.9545085182950587, iteration: 428691
loss: 0.9949377775192261,grad_norm: 0.7242745139903842, iteration: 428692
loss: 1.0083914995193481,grad_norm: 0.6901696039175854, iteration: 428693
loss: 0.9951764941215515,grad_norm: 0.8896663584347605, iteration: 428694
loss: 1.0782647132873535,grad_norm: 0.7539203391462398, iteration: 428695
loss: 0.9797395467758179,grad_norm: 0.7713733824071998, iteration: 428696
loss: 1.0560108423233032,grad_norm: 0.9999989412359093, iteration: 428697
loss: 1.001526951789856,grad_norm: 0.7519822288139557, iteration: 428698
loss: 1.0412750244140625,grad_norm: 0.9226307059014839, iteration: 428699
loss: 0.990822434425354,grad_norm: 0.8148596204234538, iteration: 428700
loss: 1.0011754035949707,grad_norm: 0.9197589883137907, iteration: 428701
loss: 1.0205073356628418,grad_norm: 0.6433350407910816, iteration: 428702
loss: 1.0077805519104004,grad_norm: 0.9804316119933559, iteration: 428703
loss: 0.9916207790374756,grad_norm: 0.8593254729713568, iteration: 428704
loss: 1.0323837995529175,grad_norm: 0.7116449146865965, iteration: 428705
loss: 0.9858774542808533,grad_norm: 0.67792114092524, iteration: 428706
loss: 1.001057505607605,grad_norm: 0.9647177468874994, iteration: 428707
loss: 1.0065650939941406,grad_norm: 0.8229131381990348, iteration: 428708
loss: 1.0021858215332031,grad_norm: 0.7553833697270329, iteration: 428709
loss: 0.9969168305397034,grad_norm: 0.6733754191994916, iteration: 428710
loss: 0.9694737792015076,grad_norm: 0.9488544946832889, iteration: 428711
loss: 1.0449923276901245,grad_norm: 0.6679171890349125, iteration: 428712
loss: 1.0010701417922974,grad_norm: 0.708968581441685, iteration: 428713
loss: 1.0153130292892456,grad_norm: 0.701745623748007, iteration: 428714
loss: 0.9889457821846008,grad_norm: 0.7808097163030585, iteration: 428715
loss: 0.9626213908195496,grad_norm: 0.8642854003771848, iteration: 428716
loss: 1.0428730249404907,grad_norm: 0.9999994172380335, iteration: 428717
loss: 1.0008127689361572,grad_norm: 0.7003137813473831, iteration: 428718
loss: 1.1713662147521973,grad_norm: 0.9999998503786437, iteration: 428719
loss: 1.255929708480835,grad_norm: 0.9999997558627175, iteration: 428720
loss: 1.014205813407898,grad_norm: 0.7576314432471888, iteration: 428721
loss: 0.9882895946502686,grad_norm: 0.7223186335459356, iteration: 428722
loss: 0.9844658374786377,grad_norm: 0.8344758039684934, iteration: 428723
loss: 0.9922118186950684,grad_norm: 0.9073312757344505, iteration: 428724
loss: 0.9786627292633057,grad_norm: 0.9159321771999753, iteration: 428725
loss: 1.0010100603103638,grad_norm: 0.7989084412414685, iteration: 428726
loss: 1.001325011253357,grad_norm: 0.8663513020583503, iteration: 428727
loss: 0.9796625971794128,grad_norm: 0.6907485641091593, iteration: 428728
loss: 1.007044792175293,grad_norm: 0.7027491603079846, iteration: 428729
loss: 1.015421748161316,grad_norm: 0.9027370835622608, iteration: 428730
loss: 1.012096643447876,grad_norm: 0.7722328981121697, iteration: 428731
loss: 1.0003145933151245,grad_norm: 0.7492691908513077, iteration: 428732
loss: 0.994163990020752,grad_norm: 0.8174023189875331, iteration: 428733
loss: 1.0122889280319214,grad_norm: 0.7808140020495884, iteration: 428734
loss: 1.0751184225082397,grad_norm: 0.7130016774251556, iteration: 428735
loss: 1.0194406509399414,grad_norm: 0.9999995418544873, iteration: 428736
loss: 1.0033801794052124,grad_norm: 0.7812143884049371, iteration: 428737
loss: 1.0068696737289429,grad_norm: 0.9817573877890314, iteration: 428738
loss: 0.9796597361564636,grad_norm: 0.8687872556818443, iteration: 428739
loss: 0.9946909546852112,grad_norm: 0.6746904565466486, iteration: 428740
loss: 0.9723565578460693,grad_norm: 0.7632101699122942, iteration: 428741
loss: 1.1511915922164917,grad_norm: 0.9999999098279962, iteration: 428742
loss: 0.9660566449165344,grad_norm: 0.8213483562659739, iteration: 428743
loss: 0.9881619811058044,grad_norm: 0.7874283256828787, iteration: 428744
loss: 0.9945387840270996,grad_norm: 0.7259041692231584, iteration: 428745
loss: 1.0274386405944824,grad_norm: 0.9785246298322897, iteration: 428746
loss: 1.003643274307251,grad_norm: 0.6337887461542652, iteration: 428747
loss: 1.0642696619033813,grad_norm: 0.90837848088858, iteration: 428748
loss: 1.0044090747833252,grad_norm: 0.738904115226529, iteration: 428749
loss: 1.0431625843048096,grad_norm: 0.9999996108487269, iteration: 428750
loss: 0.9826493859291077,grad_norm: 0.7277384971583652, iteration: 428751
loss: 1.001511573791504,grad_norm: 0.850155093797035, iteration: 428752
loss: 0.9967042803764343,grad_norm: 0.8076430269507692, iteration: 428753
loss: 0.9843951463699341,grad_norm: 0.8095473038657509, iteration: 428754
loss: 0.9760528802871704,grad_norm: 0.7879474600084092, iteration: 428755
loss: 0.9700873494148254,grad_norm: 0.7464504626177546, iteration: 428756
loss: 1.0024393796920776,grad_norm: 0.999999424597398, iteration: 428757
loss: 1.0184955596923828,grad_norm: 0.8490764328833028, iteration: 428758
loss: 0.9845701456069946,grad_norm: 0.9999992304086099, iteration: 428759
loss: 0.9942463040351868,grad_norm: 0.806141267948769, iteration: 428760
loss: 1.0808876752853394,grad_norm: 0.9999991310194006, iteration: 428761
loss: 1.02873694896698,grad_norm: 0.8076400389242735, iteration: 428762
loss: 0.9890764355659485,grad_norm: 0.6252718280405083, iteration: 428763
loss: 0.9748866558074951,grad_norm: 0.8115364841275345, iteration: 428764
loss: 1.0226383209228516,grad_norm: 0.7669352956612272, iteration: 428765
loss: 1.045207142829895,grad_norm: 0.9059321174531413, iteration: 428766
loss: 1.004692792892456,grad_norm: 0.8589010196921558, iteration: 428767
loss: 0.9932826161384583,grad_norm: 0.6713187785629718, iteration: 428768
loss: 0.990973174571991,grad_norm: 0.8333075231645225, iteration: 428769
loss: 0.996387779712677,grad_norm: 0.775442384501995, iteration: 428770
loss: 1.0347267389297485,grad_norm: 0.7815410031506522, iteration: 428771
loss: 0.9811365008354187,grad_norm: 0.8682845884429198, iteration: 428772
loss: 1.1105443239212036,grad_norm: 0.9999991243071049, iteration: 428773
loss: 1.009418249130249,grad_norm: 0.802981030839862, iteration: 428774
loss: 0.9608175754547119,grad_norm: 0.8759090759170365, iteration: 428775
loss: 1.0208746194839478,grad_norm: 0.8648424231101068, iteration: 428776
loss: 0.9884704947471619,grad_norm: 0.7450104350068945, iteration: 428777
loss: 0.9543763399124146,grad_norm: 0.8536198242143913, iteration: 428778
loss: 0.9604166746139526,grad_norm: 0.8176179701905238, iteration: 428779
loss: 0.9902040958404541,grad_norm: 0.8393383959914701, iteration: 428780
loss: 1.009002447128296,grad_norm: 0.7933771084470095, iteration: 428781
loss: 0.9788538217544556,grad_norm: 0.8109413027219007, iteration: 428782
loss: 0.9788187146186829,grad_norm: 0.6275186191886615, iteration: 428783
loss: 0.9931999444961548,grad_norm: 0.9348861011293819, iteration: 428784
loss: 1.014536738395691,grad_norm: 0.7038259384437938, iteration: 428785
loss: 0.9755131006240845,grad_norm: 0.7848793513071101, iteration: 428786
loss: 1.0279563665390015,grad_norm: 0.7000915731754176, iteration: 428787
loss: 1.101492166519165,grad_norm: 0.9999992651460357, iteration: 428788
loss: 0.9741422533988953,grad_norm: 0.7985203549321664, iteration: 428789
loss: 0.9611894488334656,grad_norm: 0.7851597271030898, iteration: 428790
loss: 1.093364953994751,grad_norm: 0.9135370189037701, iteration: 428791
loss: 0.9818061590194702,grad_norm: 0.8410314674294838, iteration: 428792
loss: 0.9881712198257446,grad_norm: 0.70908228525831, iteration: 428793
loss: 0.9978824853897095,grad_norm: 0.8660002915107903, iteration: 428794
loss: 1.0050510168075562,grad_norm: 0.7588841189591914, iteration: 428795
loss: 0.9746397733688354,grad_norm: 0.82923884438088, iteration: 428796
loss: 1.0049118995666504,grad_norm: 0.651147483961549, iteration: 428797
loss: 1.0077667236328125,grad_norm: 0.8202882335836054, iteration: 428798
loss: 0.9890682697296143,grad_norm: 0.7704360668398178, iteration: 428799
loss: 0.9923220276832581,grad_norm: 0.7223278220445195, iteration: 428800
loss: 1.022498607635498,grad_norm: 0.7654445936473092, iteration: 428801
loss: 0.9794018864631653,grad_norm: 0.9875743784553392, iteration: 428802
loss: 1.0093214511871338,grad_norm: 0.8856312101935148, iteration: 428803
loss: 1.0064624547958374,grad_norm: 0.6186515988655238, iteration: 428804
loss: 1.0273877382278442,grad_norm: 0.7004643000921504, iteration: 428805
loss: 1.0506186485290527,grad_norm: 0.9999999477853091, iteration: 428806
loss: 1.0109862089157104,grad_norm: 0.7435343504133857, iteration: 428807
loss: 1.0663342475891113,grad_norm: 0.9999990666144013, iteration: 428808
loss: 0.9699223637580872,grad_norm: 0.6257007444093329, iteration: 428809
loss: 0.9873291254043579,grad_norm: 0.7430970863012589, iteration: 428810
loss: 0.9989496469497681,grad_norm: 0.6807369233090537, iteration: 428811
loss: 0.984406590461731,grad_norm: 0.757011396660625, iteration: 428812
loss: 1.0082433223724365,grad_norm: 0.7734693823314581, iteration: 428813
loss: 1.016980767250061,grad_norm: 0.8805383791075934, iteration: 428814
loss: 0.990263819694519,grad_norm: 0.7787677112997362, iteration: 428815
loss: 1.0171090364456177,grad_norm: 0.999999863408511, iteration: 428816
loss: 0.9800541400909424,grad_norm: 0.807031537997771, iteration: 428817
loss: 1.029531717300415,grad_norm: 0.8209925655118273, iteration: 428818
loss: 1.0037363767623901,grad_norm: 0.844069800814676, iteration: 428819
loss: 0.997717022895813,grad_norm: 0.7724891133976715, iteration: 428820
loss: 0.9549605250358582,grad_norm: 0.9052361953469173, iteration: 428821
loss: 1.0009405612945557,grad_norm: 0.8833889639531001, iteration: 428822
loss: 1.047921061515808,grad_norm: 0.9041619869667887, iteration: 428823
loss: 0.9883455038070679,grad_norm: 0.699335630836234, iteration: 428824
loss: 0.9974355101585388,grad_norm: 0.7655351958888806, iteration: 428825
loss: 1.0417301654815674,grad_norm: 0.9127513480627076, iteration: 428826
loss: 0.993403971195221,grad_norm: 0.7850195128656624, iteration: 428827
loss: 1.0209606885910034,grad_norm: 0.7314660991463103, iteration: 428828
loss: 1.0635558366775513,grad_norm: 0.7139550622597005, iteration: 428829
loss: 1.0189464092254639,grad_norm: 0.8015060724399883, iteration: 428830
loss: 1.0662553310394287,grad_norm: 0.8245910773295897, iteration: 428831
loss: 1.0237370729446411,grad_norm: 0.7556481626654429, iteration: 428832
loss: 1.0177855491638184,grad_norm: 0.7703120454821863, iteration: 428833
loss: 0.9645141959190369,grad_norm: 0.7544999728344723, iteration: 428834
loss: 1.0030485391616821,grad_norm: 0.8730322584112638, iteration: 428835
loss: 1.0037826299667358,grad_norm: 0.6948588749881113, iteration: 428836
loss: 1.018144130706787,grad_norm: 0.7632034668906501, iteration: 428837
loss: 1.022294282913208,grad_norm: 0.8277872917508707, iteration: 428838
loss: 0.9915359020233154,grad_norm: 0.7945210604195543, iteration: 428839
loss: 1.0110000371932983,grad_norm: 0.999999990223709, iteration: 428840
loss: 0.9444908499717712,grad_norm: 0.7923109040123176, iteration: 428841
loss: 0.9608817100524902,grad_norm: 0.810940532275216, iteration: 428842
loss: 0.9978054165840149,grad_norm: 0.9750112687523466, iteration: 428843
loss: 0.9975126385688782,grad_norm: 0.7311058976823942, iteration: 428844
loss: 1.0152860879898071,grad_norm: 0.8762618097376131, iteration: 428845
loss: 1.0380626916885376,grad_norm: 0.8000317782642604, iteration: 428846
loss: 0.9904643297195435,grad_norm: 0.7805353934688785, iteration: 428847
loss: 0.9856244921684265,grad_norm: 0.8726457320730207, iteration: 428848
loss: 1.0157276391983032,grad_norm: 0.7358868177629452, iteration: 428849
loss: 1.0130058526992798,grad_norm: 0.8623695331153799, iteration: 428850
loss: 1.0294008255004883,grad_norm: 0.6647362857297697, iteration: 428851
loss: 0.9921602606773376,grad_norm: 0.6620608404855587, iteration: 428852
loss: 1.018574595451355,grad_norm: 0.7902310284058505, iteration: 428853
loss: 0.9502736926078796,grad_norm: 0.7545741385227102, iteration: 428854
loss: 0.9889995455741882,grad_norm: 0.8401775924634712, iteration: 428855
loss: 1.012783169746399,grad_norm: 0.6794940959520911, iteration: 428856
loss: 0.9826899170875549,grad_norm: 0.7103442620849074, iteration: 428857
loss: 0.9777833223342896,grad_norm: 0.7658718352042474, iteration: 428858
loss: 0.9983484745025635,grad_norm: 0.9169461800437718, iteration: 428859
loss: 0.9801860451698303,grad_norm: 0.6478696536673595, iteration: 428860
loss: 0.9845794439315796,grad_norm: 0.741107632197697, iteration: 428861
loss: 0.9826438426971436,grad_norm: 0.7913244493797019, iteration: 428862
loss: 0.9799231290817261,grad_norm: 0.6839903100569145, iteration: 428863
loss: 0.9823470115661621,grad_norm: 0.7260506616903355, iteration: 428864
loss: 0.9923335909843445,grad_norm: 0.9999994376635232, iteration: 428865
loss: 0.9887400269508362,grad_norm: 0.7303729440102372, iteration: 428866
loss: 0.9715811014175415,grad_norm: 0.7238743123669147, iteration: 428867
loss: 0.984636127948761,grad_norm: 0.8855869556233746, iteration: 428868
loss: 0.9938713312149048,grad_norm: 0.7566626316959257, iteration: 428869
loss: 1.0025631189346313,grad_norm: 0.6910733568403195, iteration: 428870
loss: 1.0174957513809204,grad_norm: 0.7456531798176252, iteration: 428871
loss: 0.9813838601112366,grad_norm: 0.7966748788382148, iteration: 428872
loss: 0.9950857162475586,grad_norm: 0.8174398830169854, iteration: 428873
loss: 1.04339599609375,grad_norm: 0.9736354740867637, iteration: 428874
loss: 0.979379415512085,grad_norm: 0.7955714782589786, iteration: 428875
loss: 1.0352296829223633,grad_norm: 0.875657319160658, iteration: 428876
loss: 1.0028738975524902,grad_norm: 0.6333355014121019, iteration: 428877
loss: 1.0180871486663818,grad_norm: 0.8471845898720737, iteration: 428878
loss: 0.9849955439567566,grad_norm: 0.8696285291350426, iteration: 428879
loss: 1.0114538669586182,grad_norm: 0.8543247402808756, iteration: 428880
loss: 0.9812573790550232,grad_norm: 0.7356954573461026, iteration: 428881
loss: 0.9834842085838318,grad_norm: 0.8100229357623815, iteration: 428882
loss: 0.9711465239524841,grad_norm: 0.8720825776895086, iteration: 428883
loss: 1.0212105512619019,grad_norm: 0.9599924446117565, iteration: 428884
loss: 0.9946257472038269,grad_norm: 0.8961030356225167, iteration: 428885
loss: 1.0233912467956543,grad_norm: 0.7526376218870738, iteration: 428886
loss: 0.9858320355415344,grad_norm: 0.7827015467081281, iteration: 428887
loss: 1.0392498970031738,grad_norm: 0.9999996582377051, iteration: 428888
loss: 1.0131431818008423,grad_norm: 0.7863130962709539, iteration: 428889
loss: 0.9636961817741394,grad_norm: 0.7682421055097798, iteration: 428890
loss: 1.0248445272445679,grad_norm: 0.6652322458189391, iteration: 428891
loss: 1.049627423286438,grad_norm: 0.7338131491736626, iteration: 428892
loss: 1.0265730619430542,grad_norm: 0.8658309295355997, iteration: 428893
loss: 1.00165593624115,grad_norm: 0.6911984026789926, iteration: 428894
loss: 0.9990643262863159,grad_norm: 0.8291702539641493, iteration: 428895
loss: 0.968451201915741,grad_norm: 0.7724324846333345, iteration: 428896
loss: 0.9667173624038696,grad_norm: 0.9690154034615798, iteration: 428897
loss: 1.0380855798721313,grad_norm: 0.8937302725037671, iteration: 428898
loss: 0.9863148927688599,grad_norm: 0.794869392893252, iteration: 428899
loss: 1.023600459098816,grad_norm: 0.6544660157012911, iteration: 428900
loss: 0.986093282699585,grad_norm: 0.9028625707686208, iteration: 428901
loss: 1.0141053199768066,grad_norm: 0.9581512314723801, iteration: 428902
loss: 0.9981666803359985,grad_norm: 0.7681275376734491, iteration: 428903
loss: 1.008345603942871,grad_norm: 0.9999993991839081, iteration: 428904
loss: 0.9697567820549011,grad_norm: 0.8568784640890218, iteration: 428905
loss: 1.0201833248138428,grad_norm: 0.7538065306819898, iteration: 428906
loss: 0.9679130911827087,grad_norm: 0.7347018861211282, iteration: 428907
loss: 0.951303243637085,grad_norm: 0.7975005164137593, iteration: 428908
loss: 0.9852089285850525,grad_norm: 0.7675270374627597, iteration: 428909
loss: 1.026171326637268,grad_norm: 0.9358614585806373, iteration: 428910
loss: 0.9730420708656311,grad_norm: 0.8323838484231977, iteration: 428911
loss: 1.0025453567504883,grad_norm: 0.9999994146099107, iteration: 428912
loss: 1.0142152309417725,grad_norm: 0.6923643001905498, iteration: 428913
loss: 1.0267339944839478,grad_norm: 0.9999998958546585, iteration: 428914
loss: 1.0247597694396973,grad_norm: 0.9999996622001138, iteration: 428915
loss: 1.0476570129394531,grad_norm: 0.9999993055380243, iteration: 428916
loss: 1.029935598373413,grad_norm: 0.7625838568937028, iteration: 428917
loss: 0.9904166460037231,grad_norm: 0.76194484992418, iteration: 428918
loss: 1.0167357921600342,grad_norm: 0.9999989724720817, iteration: 428919
loss: 1.0274392366409302,grad_norm: 0.9999991624382196, iteration: 428920
loss: 0.9925649762153625,grad_norm: 0.7590931068902025, iteration: 428921
loss: 1.0129518508911133,grad_norm: 0.7734067072330426, iteration: 428922
loss: 1.0130599737167358,grad_norm: 0.9999992020083878, iteration: 428923
loss: 0.9799043536186218,grad_norm: 0.7027566467115199, iteration: 428924
loss: 1.0083175897598267,grad_norm: 0.7305540558908018, iteration: 428925
loss: 1.0344700813293457,grad_norm: 0.8064623395406756, iteration: 428926
loss: 0.995941162109375,grad_norm: 0.9228465405588477, iteration: 428927
loss: 1.010468602180481,grad_norm: 0.9999989806828412, iteration: 428928
loss: 0.9950177073478699,grad_norm: 0.6892370720126776, iteration: 428929
loss: 0.9866183996200562,grad_norm: 0.9285777923352174, iteration: 428930
loss: 0.9826686382293701,grad_norm: 0.8248496366163452, iteration: 428931
loss: 1.019843578338623,grad_norm: 0.9060400887454104, iteration: 428932
loss: 0.9776986241340637,grad_norm: 0.7875020706410518, iteration: 428933
loss: 0.9686753749847412,grad_norm: 0.926033036634743, iteration: 428934
loss: 1.024678349494934,grad_norm: 0.8399170951008416, iteration: 428935
loss: 1.0215036869049072,grad_norm: 0.9945105036424214, iteration: 428936
loss: 1.0105541944503784,grad_norm: 0.7170639319063845, iteration: 428937
loss: 1.0710266828536987,grad_norm: 0.9999995436376268, iteration: 428938
loss: 1.0006566047668457,grad_norm: 0.8083717735775889, iteration: 428939
loss: 1.0050820112228394,grad_norm: 0.9824154685154356, iteration: 428940
loss: 0.9754135608673096,grad_norm: 0.9791265315912884, iteration: 428941
loss: 0.9908827543258667,grad_norm: 0.7549983206382981, iteration: 428942
loss: 1.0112926959991455,grad_norm: 0.9999996322621616, iteration: 428943
loss: 0.9821736216545105,grad_norm: 0.8698099734267138, iteration: 428944
loss: 1.0412709712982178,grad_norm: 0.8394181533883563, iteration: 428945
loss: 0.9844017624855042,grad_norm: 0.6928810457275442, iteration: 428946
loss: 1.0192354917526245,grad_norm: 0.9999992551613465, iteration: 428947
loss: 1.020082950592041,grad_norm: 0.6450524858293444, iteration: 428948
loss: 0.9944106936454773,grad_norm: 0.8690321975471561, iteration: 428949
loss: 0.9870666861534119,grad_norm: 0.7678043997186007, iteration: 428950
loss: 0.9945066571235657,grad_norm: 0.786130659598969, iteration: 428951
loss: 0.9363264441490173,grad_norm: 0.6972479338859264, iteration: 428952
loss: 0.9608224034309387,grad_norm: 0.7333196838175179, iteration: 428953
loss: 1.0022616386413574,grad_norm: 0.7325697727035506, iteration: 428954
loss: 1.020788550376892,grad_norm: 0.8260441166010495, iteration: 428955
loss: 0.9889458417892456,grad_norm: 0.7191026872094944, iteration: 428956
loss: 1.0038970708847046,grad_norm: 0.6137365719862858, iteration: 428957
loss: 1.0516248941421509,grad_norm: 0.8422027592306506, iteration: 428958
loss: 0.9834696054458618,grad_norm: 0.7321489670411548, iteration: 428959
loss: 0.9660977721214294,grad_norm: 0.7336451297201757, iteration: 428960
loss: 1.0346165895462036,grad_norm: 0.6856552772969341, iteration: 428961
loss: 1.091354250907898,grad_norm: 0.762374748930471, iteration: 428962
loss: 0.9796457290649414,grad_norm: 0.8593021898895378, iteration: 428963
loss: 1.0034213066101074,grad_norm: 0.7053196134700733, iteration: 428964
loss: 0.9808651804924011,grad_norm: 0.7976410328563204, iteration: 428965
loss: 1.0046343803405762,grad_norm: 0.9999995597079309, iteration: 428966
loss: 1.0062360763549805,grad_norm: 0.9848080768732638, iteration: 428967
loss: 0.9816017746925354,grad_norm: 0.9999990238126574, iteration: 428968
loss: 1.0410475730895996,grad_norm: 0.8185431168127167, iteration: 428969
loss: 1.0239425897598267,grad_norm: 0.7293434523297043, iteration: 428970
loss: 0.9854200482368469,grad_norm: 0.7631409625692602, iteration: 428971
loss: 1.0226266384124756,grad_norm: 0.8756722317021632, iteration: 428972
loss: 1.003270149230957,grad_norm: 0.5964739177658429, iteration: 428973
loss: 0.9938512444496155,grad_norm: 0.7544532827104541, iteration: 428974
loss: 0.9954807162284851,grad_norm: 0.8503106241238753, iteration: 428975
loss: 0.9543715119361877,grad_norm: 0.798080160234258, iteration: 428976
loss: 0.9765943288803101,grad_norm: 0.7518029844549635, iteration: 428977
loss: 0.9914102554321289,grad_norm: 0.6897605430542004, iteration: 428978
loss: 1.078122615814209,grad_norm: 0.811858074575568, iteration: 428979
loss: 0.9764888882637024,grad_norm: 0.7403355854387874, iteration: 428980
loss: 1.0350825786590576,grad_norm: 0.9999998915279091, iteration: 428981
loss: 1.010085940361023,grad_norm: 0.8325405994181126, iteration: 428982
loss: 0.9582370519638062,grad_norm: 0.9588074434541782, iteration: 428983
loss: 1.0438015460968018,grad_norm: 0.8115788212191416, iteration: 428984
loss: 1.0217467546463013,grad_norm: 0.8762727068587536, iteration: 428985
loss: 1.0874305963516235,grad_norm: 0.8096935643200346, iteration: 428986
loss: 1.0262888669967651,grad_norm: 0.99999921768372, iteration: 428987
loss: 0.9857779741287231,grad_norm: 0.9211038606018727, iteration: 428988
loss: 0.995790421962738,grad_norm: 0.766839486069439, iteration: 428989
loss: 0.9959719777107239,grad_norm: 0.9999994635088381, iteration: 428990
loss: 1.0145193338394165,grad_norm: 0.735911860277926, iteration: 428991
loss: 0.9565414786338806,grad_norm: 0.7181184512807711, iteration: 428992
loss: 0.9921998977661133,grad_norm: 0.8013301206601239, iteration: 428993
loss: 1.0019845962524414,grad_norm: 0.6486380568876412, iteration: 428994
loss: 0.9937591552734375,grad_norm: 0.8433483533735578, iteration: 428995
loss: 0.9749563932418823,grad_norm: 0.8042287302039824, iteration: 428996
loss: 0.9890525937080383,grad_norm: 0.9601069943052422, iteration: 428997
loss: 0.995341420173645,grad_norm: 0.7047636200207036, iteration: 428998
loss: 0.9842243790626526,grad_norm: 0.825841995778429, iteration: 428999
loss: 1.0392545461654663,grad_norm: 0.6754703386856086, iteration: 429000
loss: 1.020768165588379,grad_norm: 0.7861223285226857, iteration: 429001
loss: 0.9538459181785583,grad_norm: 0.7564362621542154, iteration: 429002
loss: 0.9980566501617432,grad_norm: 0.7374546183174104, iteration: 429003
loss: 1.0162317752838135,grad_norm: 0.9999993456604789, iteration: 429004
loss: 1.0113708972930908,grad_norm: 0.7753008956101335, iteration: 429005
loss: 0.9861176013946533,grad_norm: 0.6971562332249785, iteration: 429006
loss: 1.0115834474563599,grad_norm: 0.8974315617326072, iteration: 429007
loss: 0.9768414497375488,grad_norm: 0.8209060764364796, iteration: 429008
loss: 1.0630062818527222,grad_norm: 0.8818632287989011, iteration: 429009
loss: 1.0793944597244263,grad_norm: 0.9999999201232078, iteration: 429010
loss: 1.0050910711288452,grad_norm: 0.8194151252940393, iteration: 429011
loss: 0.9830272197723389,grad_norm: 0.7696978599144594, iteration: 429012
loss: 0.9840754270553589,grad_norm: 0.8008073762714333, iteration: 429013
loss: 0.9723904728889465,grad_norm: 0.6826100772571044, iteration: 429014
loss: 0.9894287586212158,grad_norm: 0.9063753010543533, iteration: 429015
loss: 1.0671783685684204,grad_norm: 0.8893462842941935, iteration: 429016
loss: 1.012695550918579,grad_norm: 0.8089029690906782, iteration: 429017
loss: 0.9771998524665833,grad_norm: 0.7613947836882932, iteration: 429018
loss: 0.9990672469139099,grad_norm: 0.8557557463301445, iteration: 429019
loss: 0.9817990660667419,grad_norm: 0.7037145403282743, iteration: 429020
loss: 1.0069599151611328,grad_norm: 0.7376415950298791, iteration: 429021
loss: 0.9849609732627869,grad_norm: 0.7528169843969067, iteration: 429022
loss: 0.9944257140159607,grad_norm: 0.7737634680401807, iteration: 429023
loss: 1.0115971565246582,grad_norm: 0.9678320675663801, iteration: 429024
loss: 1.001562476158142,grad_norm: 0.8793266070876065, iteration: 429025
loss: 1.0418686866760254,grad_norm: 0.6944110667813151, iteration: 429026
loss: 1.0033901929855347,grad_norm: 0.9999993005538262, iteration: 429027
loss: 1.0320440530776978,grad_norm: 0.7072583287259494, iteration: 429028
loss: 0.9961902499198914,grad_norm: 0.6694308871458491, iteration: 429029
loss: 0.9939981698989868,grad_norm: 0.8279146506253328, iteration: 429030
loss: 0.9948312044143677,grad_norm: 0.9835297387837818, iteration: 429031
loss: 1.0147180557250977,grad_norm: 0.6927234912107557, iteration: 429032
loss: 1.0064254999160767,grad_norm: 0.8843090625608907, iteration: 429033
loss: 1.0707521438598633,grad_norm: 0.9999991755567775, iteration: 429034
loss: 1.0171533823013306,grad_norm: 0.7070830202256607, iteration: 429035
loss: 0.9824391007423401,grad_norm: 0.7864153125790022, iteration: 429036
loss: 1.0196467638015747,grad_norm: 0.891762440885412, iteration: 429037
loss: 0.995693564414978,grad_norm: 0.7392922459054037, iteration: 429038
loss: 0.9947962164878845,grad_norm: 0.713538319115128, iteration: 429039
loss: 1.0304102897644043,grad_norm: 0.6750141899415624, iteration: 429040
loss: 0.9667485356330872,grad_norm: 0.8031205671913858, iteration: 429041
loss: 0.9696515798568726,grad_norm: 0.7072580628256948, iteration: 429042
loss: 0.9657781720161438,grad_norm: 0.724057106631479, iteration: 429043
loss: 0.9960490465164185,grad_norm: 0.7856852029949651, iteration: 429044
loss: 0.9861122965812683,grad_norm: 0.9999992846487793, iteration: 429045
loss: 0.9880857467651367,grad_norm: 0.8815474648393177, iteration: 429046
loss: 0.9917357563972473,grad_norm: 0.7277195391459014, iteration: 429047
loss: 1.0439542531967163,grad_norm: 0.9509285016118157, iteration: 429048
loss: 0.964664101600647,grad_norm: 0.7295851917119252, iteration: 429049
loss: 1.019107699394226,grad_norm: 0.8226680827130534, iteration: 429050
loss: 0.9881872534751892,grad_norm: 0.7325357963194631, iteration: 429051
loss: 0.987356960773468,grad_norm: 0.751048875997547, iteration: 429052
loss: 0.9588009715080261,grad_norm: 0.709177097420381, iteration: 429053
loss: 1.0211856365203857,grad_norm: 0.6225484407492561, iteration: 429054
loss: 1.0226515531539917,grad_norm: 0.6976829186995801, iteration: 429055
loss: 1.010120153427124,grad_norm: 0.9999995220115906, iteration: 429056
loss: 0.9826538562774658,grad_norm: 0.7284395171493666, iteration: 429057
loss: 0.9823158383369446,grad_norm: 0.6658953293028445, iteration: 429058
loss: 1.0272481441497803,grad_norm: 0.9096153692184412, iteration: 429059
loss: 0.9927759766578674,grad_norm: 0.9999997905850279, iteration: 429060
loss: 0.964677631855011,grad_norm: 0.8462630463352578, iteration: 429061
loss: 0.991652250289917,grad_norm: 0.7489168576581806, iteration: 429062
loss: 0.9973663091659546,grad_norm: 0.8002378170997312, iteration: 429063
loss: 1.016403317451477,grad_norm: 0.8430486444388028, iteration: 429064
loss: 0.9832332730293274,grad_norm: 0.7877444485761264, iteration: 429065
loss: 1.0107389688491821,grad_norm: 0.7547431747862351, iteration: 429066
loss: 0.9762784838676453,grad_norm: 0.7001733718273497, iteration: 429067
loss: 1.012539029121399,grad_norm: 0.8006128934583927, iteration: 429068
loss: 1.0266377925872803,grad_norm: 0.9999999524673897, iteration: 429069
loss: 1.0224647521972656,grad_norm: 0.8019941429151088, iteration: 429070
loss: 1.038276195526123,grad_norm: 0.9285459335010253, iteration: 429071
loss: 1.0179601907730103,grad_norm: 0.7866557997580743, iteration: 429072
loss: 0.9807290434837341,grad_norm: 0.8099585944171801, iteration: 429073
loss: 0.9985163807868958,grad_norm: 0.7879812273000958, iteration: 429074
loss: 1.024369478225708,grad_norm: 0.9479441599672067, iteration: 429075
loss: 0.9920306205749512,grad_norm: 0.7351563099744459, iteration: 429076
loss: 1.010650396347046,grad_norm: 0.7882790201519086, iteration: 429077
loss: 0.9954959154129028,grad_norm: 0.9008786663597907, iteration: 429078
loss: 1.0244593620300293,grad_norm: 0.6905046685174729, iteration: 429079
loss: 0.966973602771759,grad_norm: 0.7873683192303256, iteration: 429080
loss: 0.9930094480514526,grad_norm: 0.9999991031719122, iteration: 429081
loss: 1.0078158378601074,grad_norm: 0.9999998513050862, iteration: 429082
loss: 0.9744491577148438,grad_norm: 0.8722286992852331, iteration: 429083
loss: 0.986527144908905,grad_norm: 0.9065377116612275, iteration: 429084
loss: 1.009256362915039,grad_norm: 0.8097391120118798, iteration: 429085
loss: 1.0806316137313843,grad_norm: 0.9572874527684428, iteration: 429086
loss: 1.0058549642562866,grad_norm: 0.7956275293958216, iteration: 429087
loss: 1.0179673433303833,grad_norm: 0.8584256117890623, iteration: 429088
loss: 0.993725061416626,grad_norm: 0.7386677714699924, iteration: 429089
loss: 1.024660348892212,grad_norm: 0.9452045202179672, iteration: 429090
loss: 1.025871992111206,grad_norm: 0.9999998279545876, iteration: 429091
loss: 0.965326189994812,grad_norm: 0.717840884401344, iteration: 429092
loss: 0.9968650937080383,grad_norm: 0.9999992923781924, iteration: 429093
loss: 1.0058021545410156,grad_norm: 0.7250174344961738, iteration: 429094
loss: 0.9828224778175354,grad_norm: 0.8109567314825404, iteration: 429095
loss: 1.11152982711792,grad_norm: 0.7585155020475881, iteration: 429096
loss: 0.9708616137504578,grad_norm: 0.649970087790894, iteration: 429097
loss: 0.9764689803123474,grad_norm: 0.8185022554162967, iteration: 429098
loss: 1.0017801523208618,grad_norm: 0.8184366427229159, iteration: 429099
loss: 0.9907988905906677,grad_norm: 0.8456196726114582, iteration: 429100
loss: 0.9827397465705872,grad_norm: 0.8339450256057538, iteration: 429101
loss: 0.9737439751625061,grad_norm: 0.8862144481481187, iteration: 429102
loss: 1.016546368598938,grad_norm: 0.9999990901678968, iteration: 429103
loss: 0.9813902378082275,grad_norm: 0.7672824627750267, iteration: 429104
loss: 1.000518560409546,grad_norm: 0.8946234652259083, iteration: 429105
loss: 1.0755136013031006,grad_norm: 0.9708490511141277, iteration: 429106
loss: 0.9761945605278015,grad_norm: 0.8870807236884237, iteration: 429107
loss: 1.0049036741256714,grad_norm: 0.9999994050756589, iteration: 429108
loss: 0.9451680183410645,grad_norm: 0.9507176205930736, iteration: 429109
loss: 1.0181753635406494,grad_norm: 0.7885338873148352, iteration: 429110
loss: 0.9973934292793274,grad_norm: 0.8102470034759481, iteration: 429111
loss: 1.1678781509399414,grad_norm: 0.9999992755512703, iteration: 429112
loss: 0.990481972694397,grad_norm: 0.9681899308123273, iteration: 429113
loss: 1.0168311595916748,grad_norm: 0.9999990284998753, iteration: 429114
loss: 1.0019385814666748,grad_norm: 0.834763847356112, iteration: 429115
loss: 1.0129315853118896,grad_norm: 0.9999992711099202, iteration: 429116
loss: 1.0019304752349854,grad_norm: 0.7024658992177534, iteration: 429117
loss: 0.9569427967071533,grad_norm: 0.912723180285532, iteration: 429118
loss: 0.983778178691864,grad_norm: 0.6373289773376671, iteration: 429119
loss: 1.0335873365402222,grad_norm: 0.8554490267235624, iteration: 429120
loss: 0.9866036772727966,grad_norm: 0.6684808705436711, iteration: 429121
loss: 1.0245249271392822,grad_norm: 0.7715193671657544, iteration: 429122
loss: 1.0345485210418701,grad_norm: 0.9548644393748364, iteration: 429123
loss: 1.0178465843200684,grad_norm: 0.7963002832937269, iteration: 429124
loss: 1.0261883735656738,grad_norm: 0.8035747841194837, iteration: 429125
loss: 1.0044151544570923,grad_norm: 0.7833300284228527, iteration: 429126
loss: 1.1609737873077393,grad_norm: 0.9999995467825317, iteration: 429127
loss: 0.982275664806366,grad_norm: 0.8184133205449104, iteration: 429128
loss: 1.0391101837158203,grad_norm: 0.9999991508908449, iteration: 429129
loss: 0.9775391817092896,grad_norm: 0.9208792210879297, iteration: 429130
loss: 1.0370023250579834,grad_norm: 0.8423929726001601, iteration: 429131
loss: 0.9691455364227295,grad_norm: 0.6770480081538122, iteration: 429132
loss: 1.0356807708740234,grad_norm: 0.7586907620711224, iteration: 429133
loss: 0.9784089922904968,grad_norm: 0.7547403145752817, iteration: 429134
loss: 0.9994938969612122,grad_norm: 0.9139835354761336, iteration: 429135
loss: 0.9918012619018555,grad_norm: 0.796098467395349, iteration: 429136
loss: 0.9728301763534546,grad_norm: 0.7594270894713631, iteration: 429137
loss: 0.9760765433311462,grad_norm: 0.8711329396288282, iteration: 429138
loss: 1.0865634679794312,grad_norm: 0.8271717919709963, iteration: 429139
loss: 1.0286126136779785,grad_norm: 0.8070551946037047, iteration: 429140
loss: 1.025025486946106,grad_norm: 0.775101829350897, iteration: 429141
loss: 1.001636028289795,grad_norm: 0.8103228015729657, iteration: 429142
loss: 1.0086615085601807,grad_norm: 0.6505913348130826, iteration: 429143
loss: 0.9739720821380615,grad_norm: 0.8065589287871393, iteration: 429144
loss: 1.0248743295669556,grad_norm: 0.999999572205467, iteration: 429145
loss: 0.9922896027565002,grad_norm: 0.7645423232173796, iteration: 429146
loss: 1.0039950609207153,grad_norm: 0.9315538790322646, iteration: 429147
loss: 1.027565598487854,grad_norm: 0.8811964935849282, iteration: 429148
loss: 0.9884502291679382,grad_norm: 0.8165291640546263, iteration: 429149
loss: 0.9981435537338257,grad_norm: 0.9044286796972721, iteration: 429150
loss: 1.0135611295700073,grad_norm: 0.9999991390178965, iteration: 429151
loss: 0.9873624444007874,grad_norm: 0.7933729538615515, iteration: 429152
loss: 1.0244020223617554,grad_norm: 0.8868664474213573, iteration: 429153
loss: 0.9734406471252441,grad_norm: 0.8644951899201029, iteration: 429154
loss: 1.015043020248413,grad_norm: 0.7357902350331209, iteration: 429155
loss: 0.972150981426239,grad_norm: 0.675243117279631, iteration: 429156
loss: 1.0128508806228638,grad_norm: 0.711225632895261, iteration: 429157
loss: 0.9756375551223755,grad_norm: 0.8389715846892906, iteration: 429158
loss: 1.0139869451522827,grad_norm: 0.7401518134667296, iteration: 429159
loss: 0.9921995401382446,grad_norm: 0.8709614241486543, iteration: 429160
loss: 1.0303786993026733,grad_norm: 0.8830612876466437, iteration: 429161
loss: 0.9693208932876587,grad_norm: 0.7798732688600266, iteration: 429162
loss: 1.0095783472061157,grad_norm: 0.7956056086383002, iteration: 429163
loss: 0.9809728860855103,grad_norm: 0.6798246804540722, iteration: 429164
loss: 1.0188509225845337,grad_norm: 0.6156940960092061, iteration: 429165
loss: 1.0371310710906982,grad_norm: 0.867504243463343, iteration: 429166
loss: 1.0894266366958618,grad_norm: 0.9999993182487749, iteration: 429167
loss: 0.9731850028038025,grad_norm: 0.8067287731430939, iteration: 429168
loss: 1.016858696937561,grad_norm: 0.7900694638566402, iteration: 429169
loss: 0.9535355567932129,grad_norm: 0.7359326062024188, iteration: 429170
loss: 1.0361493825912476,grad_norm: 0.9999993813272435, iteration: 429171
loss: 1.0361708402633667,grad_norm: 0.9999998486601763, iteration: 429172
loss: 1.0224242210388184,grad_norm: 0.7167882745713218, iteration: 429173
loss: 0.9753319621086121,grad_norm: 0.7042530077744549, iteration: 429174
loss: 1.0218919515609741,grad_norm: 0.9999998626977916, iteration: 429175
loss: 1.0226984024047852,grad_norm: 0.9150929017634404, iteration: 429176
loss: 0.9650445580482483,grad_norm: 0.7723283908620333, iteration: 429177
loss: 1.0373119115829468,grad_norm: 0.772181616731009, iteration: 429178
loss: 0.9752522706985474,grad_norm: 0.8472767611757829, iteration: 429179
loss: 1.033994197845459,grad_norm: 0.8323193907744726, iteration: 429180
loss: 1.012794852256775,grad_norm: 0.9999990975453296, iteration: 429181
loss: 0.9987518191337585,grad_norm: 0.8940420724186386, iteration: 429182
loss: 1.007928729057312,grad_norm: 0.8014168619670965, iteration: 429183
loss: 1.026012897491455,grad_norm: 0.8037323513086608, iteration: 429184
loss: 1.0043609142303467,grad_norm: 0.8477381783434756, iteration: 429185
loss: 0.9798253178596497,grad_norm: 0.7844459030038914, iteration: 429186
loss: 1.0171505212783813,grad_norm: 0.9370262165444694, iteration: 429187
loss: 0.9862001538276672,grad_norm: 0.6811729905911345, iteration: 429188
loss: 1.023897647857666,grad_norm: 0.9999992959029235, iteration: 429189
loss: 0.9922794103622437,grad_norm: 0.76754304741117, iteration: 429190
loss: 1.0051010847091675,grad_norm: 0.822160245061017, iteration: 429191
loss: 0.9818787574768066,grad_norm: 0.6710977277510665, iteration: 429192
loss: 1.0124815702438354,grad_norm: 0.7697799894904239, iteration: 429193
loss: 1.0670963525772095,grad_norm: 0.9924290342678503, iteration: 429194
loss: 0.9697629809379578,grad_norm: 0.8570391599152175, iteration: 429195
loss: 1.0148985385894775,grad_norm: 0.7690879473753579, iteration: 429196
loss: 1.0691033601760864,grad_norm: 0.7791667637695757, iteration: 429197
loss: 1.020820140838623,grad_norm: 0.9999992693866072, iteration: 429198
loss: 1.06174635887146,grad_norm: 0.8435785705640043, iteration: 429199
loss: 1.0027023553848267,grad_norm: 0.8317639100028755, iteration: 429200
loss: 1.0116584300994873,grad_norm: 0.8663159687555628, iteration: 429201
loss: 1.0550919771194458,grad_norm: 0.9999991728598644, iteration: 429202
loss: 1.0260491371154785,grad_norm: 0.7759708624002769, iteration: 429203
loss: 1.0238232612609863,grad_norm: 0.9999990739192035, iteration: 429204
loss: 1.0783170461654663,grad_norm: 0.9999997747457791, iteration: 429205
loss: 1.0167455673217773,grad_norm: 0.8645019761523195, iteration: 429206
loss: 1.0138739347457886,grad_norm: 0.7864590980473375, iteration: 429207
loss: 1.0131253004074097,grad_norm: 0.9999991453839242, iteration: 429208
loss: 0.9925888180732727,grad_norm: 0.9999990549568247, iteration: 429209
loss: 0.9887251853942871,grad_norm: 0.9713738186584623, iteration: 429210
loss: 1.0405126810073853,grad_norm: 0.7590988699866469, iteration: 429211
loss: 0.9947912693023682,grad_norm: 0.6742812810415102, iteration: 429212
loss: 1.0401198863983154,grad_norm: 0.9999991338544144, iteration: 429213
loss: 1.04374098777771,grad_norm: 0.9999995030521296, iteration: 429214
loss: 1.000296711921692,grad_norm: 0.7494217244849747, iteration: 429215
loss: 1.0426263809204102,grad_norm: 0.6896386664438827, iteration: 429216
loss: 0.9978014230728149,grad_norm: 0.7802121997844398, iteration: 429217
loss: 1.0236809253692627,grad_norm: 0.9719231974761499, iteration: 429218
loss: 0.9915832281112671,grad_norm: 0.7747769638781036, iteration: 429219
loss: 1.012663722038269,grad_norm: 0.7037926705812431, iteration: 429220
loss: 1.0249866247177124,grad_norm: 0.7872041682949306, iteration: 429221
loss: 0.9651451110839844,grad_norm: 0.709155221827907, iteration: 429222
loss: 1.0096765756607056,grad_norm: 0.860566287915812, iteration: 429223
loss: 0.9832677245140076,grad_norm: 0.7889200379290927, iteration: 429224
loss: 1.245118260383606,grad_norm: 0.9999997435639599, iteration: 429225
loss: 1.0004019737243652,grad_norm: 0.8161162937765887, iteration: 429226
loss: 0.9910012483596802,grad_norm: 0.7299848804921077, iteration: 429227
loss: 0.9504033327102661,grad_norm: 0.958852270974058, iteration: 429228
loss: 1.0183384418487549,grad_norm: 0.8337483036280542, iteration: 429229
loss: 0.9652336239814758,grad_norm: 0.7302744105429353, iteration: 429230
loss: 1.0430617332458496,grad_norm: 0.6802469993519898, iteration: 429231
loss: 1.1091985702514648,grad_norm: 0.9999991899858864, iteration: 429232
loss: 0.9840575456619263,grad_norm: 0.7679109262686289, iteration: 429233
loss: 1.1259814500808716,grad_norm: 0.868053830199371, iteration: 429234
loss: 1.0091782808303833,grad_norm: 0.6825947112649029, iteration: 429235
loss: 0.9977815747261047,grad_norm: 0.9999993447284536, iteration: 429236
loss: 0.9797326922416687,grad_norm: 0.8546668986589182, iteration: 429237
loss: 0.9646475911140442,grad_norm: 0.8836427223880711, iteration: 429238
loss: 0.9781211018562317,grad_norm: 0.8759853814027303, iteration: 429239
loss: 0.993382453918457,grad_norm: 0.9190278105305105, iteration: 429240
loss: 1.0132323503494263,grad_norm: 0.9999992299432373, iteration: 429241
loss: 1.006678581237793,grad_norm: 0.90567378715788, iteration: 429242
loss: 0.9905404448509216,grad_norm: 0.9999991434140801, iteration: 429243
loss: 1.0299053192138672,grad_norm: 0.7794384028967124, iteration: 429244
loss: 0.9942220449447632,grad_norm: 0.773507239607946, iteration: 429245
loss: 0.9821488857269287,grad_norm: 0.7424313116074545, iteration: 429246
loss: 1.092966079711914,grad_norm: 0.8341475634517124, iteration: 429247
loss: 0.9928898811340332,grad_norm: 0.78462179356776, iteration: 429248
loss: 1.0191916227340698,grad_norm: 0.66933908055562, iteration: 429249
loss: 0.9741004705429077,grad_norm: 0.7084072524341788, iteration: 429250
loss: 1.0814428329467773,grad_norm: 0.940225780624768, iteration: 429251
loss: 0.9885901808738708,grad_norm: 0.7500835728428289, iteration: 429252
loss: 1.0081522464752197,grad_norm: 0.8414190494874603, iteration: 429253
loss: 0.9838038682937622,grad_norm: 0.8057998307572444, iteration: 429254
loss: 1.0668879747390747,grad_norm: 0.9999997533684267, iteration: 429255
loss: 0.997158408164978,grad_norm: 0.6918209057624174, iteration: 429256
loss: 1.0302486419677734,grad_norm: 0.7769531236673584, iteration: 429257
loss: 1.0715484619140625,grad_norm: 0.6936779674573635, iteration: 429258
loss: 1.0080623626708984,grad_norm: 0.8603316504419057, iteration: 429259
loss: 1.036523461341858,grad_norm: 0.8727212315543935, iteration: 429260
loss: 0.9626976847648621,grad_norm: 0.8119474877975917, iteration: 429261
loss: 0.985201895236969,grad_norm: 0.7049408958440239, iteration: 429262
loss: 0.9701054096221924,grad_norm: 0.7975844280066441, iteration: 429263
loss: 0.9794443845748901,grad_norm: 0.8981325538743954, iteration: 429264
loss: 1.0221550464630127,grad_norm: 0.9999994029322635, iteration: 429265
loss: 1.1155588626861572,grad_norm: 0.9999990651604821, iteration: 429266
loss: 1.0428953170776367,grad_norm: 0.6825103427174075, iteration: 429267
loss: 0.9632914662361145,grad_norm: 0.7736164776677802, iteration: 429268
loss: 1.006819248199463,grad_norm: 0.723953335146638, iteration: 429269
loss: 1.0550100803375244,grad_norm: 0.7092822481878808, iteration: 429270
loss: 0.9711206555366516,grad_norm: 0.9999989980595049, iteration: 429271
loss: 1.017432451248169,grad_norm: 0.9078533209751146, iteration: 429272
loss: 0.9938079118728638,grad_norm: 0.6975598786918161, iteration: 429273
loss: 1.0513807535171509,grad_norm: 0.9999999533058737, iteration: 429274
loss: 0.9791533946990967,grad_norm: 0.7166216192996752, iteration: 429275
loss: 0.9775221943855286,grad_norm: 0.7725943793129856, iteration: 429276
loss: 0.966992974281311,grad_norm: 0.7924445373912928, iteration: 429277
loss: 1.0509716272354126,grad_norm: 0.9595037803164211, iteration: 429278
loss: 1.0133028030395508,grad_norm: 0.7062779232043321, iteration: 429279
loss: 0.9701477885246277,grad_norm: 0.7908885134255137, iteration: 429280
loss: 0.9944791197776794,grad_norm: 0.9999992450064102, iteration: 429281
loss: 1.0252221822738647,grad_norm: 0.834689694293913, iteration: 429282
loss: 1.0604876279830933,grad_norm: 0.9197466002718336, iteration: 429283
loss: 1.0257627964019775,grad_norm: 0.7681421407247991, iteration: 429284
loss: 0.9875829815864563,grad_norm: 0.8214479510912878, iteration: 429285
loss: 0.9837679862976074,grad_norm: 0.7278683674669254, iteration: 429286
loss: 1.0112364292144775,grad_norm: 0.6347998714210571, iteration: 429287
loss: 1.057044506072998,grad_norm: 0.8194989824183458, iteration: 429288
loss: 1.0095736980438232,grad_norm: 0.9999993323484159, iteration: 429289
loss: 0.9464776515960693,grad_norm: 0.8554137325865595, iteration: 429290
loss: 0.9635291695594788,grad_norm: 0.7658305636036606, iteration: 429291
loss: 1.0436018705368042,grad_norm: 0.7329789953640141, iteration: 429292
loss: 0.9851880073547363,grad_norm: 0.6885076844934973, iteration: 429293
loss: 1.0119171142578125,grad_norm: 0.7565988102132466, iteration: 429294
loss: 1.012479305267334,grad_norm: 0.8684180508343904, iteration: 429295
loss: 1.0088515281677246,grad_norm: 0.8583995668162684, iteration: 429296
loss: 1.0387961864471436,grad_norm: 0.8941510323050462, iteration: 429297
loss: 1.0105763673782349,grad_norm: 0.8240938421412458, iteration: 429298
loss: 1.0084631443023682,grad_norm: 0.7088482803116186, iteration: 429299
loss: 1.006042242050171,grad_norm: 0.7291150253371587, iteration: 429300
loss: 0.9751420021057129,grad_norm: 0.7508062727325768, iteration: 429301
loss: 0.9824284315109253,grad_norm: 0.9999990550462738, iteration: 429302
loss: 0.9807043075561523,grad_norm: 0.6698815393854493, iteration: 429303
loss: 1.0184656381607056,grad_norm: 0.8028870082548384, iteration: 429304
loss: 0.997868001461029,grad_norm: 0.7263572387292205, iteration: 429305
loss: 1.0182290077209473,grad_norm: 0.7519308553484206, iteration: 429306
loss: 0.9710447192192078,grad_norm: 0.9199077957532604, iteration: 429307
loss: 1.135984182357788,grad_norm: 0.9999996073242354, iteration: 429308
loss: 1.0005028247833252,grad_norm: 0.7603182477252663, iteration: 429309
loss: 0.9895111918449402,grad_norm: 0.8576069476544164, iteration: 429310
loss: 1.0514159202575684,grad_norm: 0.9553590434262619, iteration: 429311
loss: 1.0395548343658447,grad_norm: 0.6937884777466319, iteration: 429312
loss: 0.9773223400115967,grad_norm: 0.6420432600201145, iteration: 429313
loss: 1.0325814485549927,grad_norm: 0.7780220068990817, iteration: 429314
loss: 1.0128960609436035,grad_norm: 0.7972444248063106, iteration: 429315
loss: 0.9966538548469543,grad_norm: 0.7287087579938994, iteration: 429316
loss: 1.0448696613311768,grad_norm: 0.999999623220492, iteration: 429317
loss: 1.0005978345870972,grad_norm: 0.9179398277047716, iteration: 429318
loss: 1.0185025930404663,grad_norm: 0.8506590839827042, iteration: 429319
loss: 1.0212754011154175,grad_norm: 0.9999996047740859, iteration: 429320
loss: 0.9889285564422607,grad_norm: 0.9394984971202737, iteration: 429321
loss: 0.9721233248710632,grad_norm: 0.744676949628356, iteration: 429322
loss: 0.9918403029441833,grad_norm: 0.9150731202194061, iteration: 429323
loss: 0.9939674139022827,grad_norm: 0.7561246513476931, iteration: 429324
loss: 1.010807991027832,grad_norm: 0.8396392000311059, iteration: 429325
loss: 0.9824814796447754,grad_norm: 0.771830583699125, iteration: 429326
loss: 1.0156304836273193,grad_norm: 0.8554145322799169, iteration: 429327
loss: 1.03360915184021,grad_norm: 0.6290591860045451, iteration: 429328
loss: 1.0532658100128174,grad_norm: 0.9549117645029931, iteration: 429329
loss: 1.0360084772109985,grad_norm: 0.6852267443306994, iteration: 429330
loss: 1.028177261352539,grad_norm: 0.9999995459200037, iteration: 429331
loss: 1.0325894355773926,grad_norm: 0.7153075527170355, iteration: 429332
loss: 1.0637450218200684,grad_norm: 0.9839086194136709, iteration: 429333
loss: 1.000465989112854,grad_norm: 0.8282626705075031, iteration: 429334
loss: 0.9892001748085022,grad_norm: 0.7589868155625894, iteration: 429335
loss: 1.005194067955017,grad_norm: 0.6717202718184924, iteration: 429336
loss: 0.9651371240615845,grad_norm: 0.7629102400324352, iteration: 429337
loss: 1.0226075649261475,grad_norm: 0.7356930058185367, iteration: 429338
loss: 1.0143091678619385,grad_norm: 0.6839859577866603, iteration: 429339
loss: 1.0186535120010376,grad_norm: 0.8473900177631966, iteration: 429340
loss: 0.9892176985740662,grad_norm: 0.7026772801862865, iteration: 429341
loss: 1.0250937938690186,grad_norm: 0.8406709948645389, iteration: 429342
loss: 1.0296536684036255,grad_norm: 0.7146008695591017, iteration: 429343
loss: 0.9745301604270935,grad_norm: 0.8279688257972931, iteration: 429344
loss: 0.9965933561325073,grad_norm: 0.7464185890249062, iteration: 429345
loss: 1.0131951570510864,grad_norm: 0.6917545563201204, iteration: 429346
loss: 1.0011255741119385,grad_norm: 0.7182007833266104, iteration: 429347
loss: 0.9683073163032532,grad_norm: 0.8362945943603067, iteration: 429348
loss: 1.0137025117874146,grad_norm: 0.676407958323596, iteration: 429349
loss: 0.9837757349014282,grad_norm: 0.8564346484898934, iteration: 429350
loss: 1.0797580480575562,grad_norm: 0.8264964699860398, iteration: 429351
loss: 1.0669103860855103,grad_norm: 0.9999996145726563, iteration: 429352
loss: 0.9986384510993958,grad_norm: 0.9999990985391977, iteration: 429353
loss: 0.9425172209739685,grad_norm: 0.8074826324002666, iteration: 429354
loss: 1.0095796585083008,grad_norm: 0.7837824020218089, iteration: 429355
loss: 1.017686128616333,grad_norm: 0.7975799969838913, iteration: 429356
loss: 0.9978131651878357,grad_norm: 0.7055909506317373, iteration: 429357
loss: 1.002270221710205,grad_norm: 0.9575967685869095, iteration: 429358
loss: 1.0467491149902344,grad_norm: 0.770831791598272, iteration: 429359
loss: 0.9703902006149292,grad_norm: 0.8181176301232709, iteration: 429360
loss: 1.017484426498413,grad_norm: 0.9999992801739759, iteration: 429361
loss: 0.9834489226341248,grad_norm: 0.6899569949946963, iteration: 429362
loss: 0.9941869974136353,grad_norm: 0.8073972175405387, iteration: 429363
loss: 0.990979015827179,grad_norm: 0.8390729056549234, iteration: 429364
loss: 1.0107464790344238,grad_norm: 0.9999989584039328, iteration: 429365
loss: 0.9944432973861694,grad_norm: 0.719806764355261, iteration: 429366
loss: 1.001689076423645,grad_norm: 0.7673113820564647, iteration: 429367
loss: 0.9882580637931824,grad_norm: 0.8373495049410357, iteration: 429368
loss: 0.983227014541626,grad_norm: 0.9999996474039818, iteration: 429369
loss: 0.9864100813865662,grad_norm: 0.6757337513235014, iteration: 429370
loss: 0.9640231132507324,grad_norm: 0.7946084090101295, iteration: 429371
loss: 1.0212911367416382,grad_norm: 0.6335443825653826, iteration: 429372
loss: 1.0255173444747925,grad_norm: 0.7609145427301838, iteration: 429373
loss: 0.9747149348258972,grad_norm: 0.6906777559771506, iteration: 429374
loss: 0.9995179772377014,grad_norm: 0.7266710674547668, iteration: 429375
loss: 0.9986640214920044,grad_norm: 0.805669273292222, iteration: 429376
loss: 1.0216566324234009,grad_norm: 0.999999959638175, iteration: 429377
loss: 1.0125175714492798,grad_norm: 0.7521760328458268, iteration: 429378
loss: 1.1270685195922852,grad_norm: 0.9999999726305446, iteration: 429379
loss: 0.9865847229957581,grad_norm: 0.8997374870522717, iteration: 429380
loss: 1.0143460035324097,grad_norm: 0.6104616451637936, iteration: 429381
loss: 1.0073095560073853,grad_norm: 0.6623957106071157, iteration: 429382
loss: 0.9840666651725769,grad_norm: 0.8018380651046106, iteration: 429383
loss: 0.9828405380249023,grad_norm: 0.6458125437397393, iteration: 429384
loss: 1.0073026418685913,grad_norm: 0.6934255680354506, iteration: 429385
loss: 0.99101722240448,grad_norm: 0.8465799617815294, iteration: 429386
loss: 1.0376800298690796,grad_norm: 0.9999994138203488, iteration: 429387
loss: 0.9914324283599854,grad_norm: 0.7126047773402346, iteration: 429388
loss: 0.9829539656639099,grad_norm: 0.8521213490803534, iteration: 429389
loss: 0.9935949444770813,grad_norm: 0.7653856816603973, iteration: 429390
loss: 1.0053119659423828,grad_norm: 0.7171405910815568, iteration: 429391
loss: 1.0060330629348755,grad_norm: 0.7919392458722649, iteration: 429392
loss: 1.0057282447814941,grad_norm: 0.7617413680124637, iteration: 429393
loss: 1.0079799890518188,grad_norm: 0.8012709389741711, iteration: 429394
loss: 0.9794288277626038,grad_norm: 0.90324561308146, iteration: 429395
loss: 0.9762481451034546,grad_norm: 0.7644477099499238, iteration: 429396
loss: 0.9871240258216858,grad_norm: 0.8087405521689416, iteration: 429397
loss: 1.013025164604187,grad_norm: 0.7868856518947657, iteration: 429398
loss: 1.03743314743042,grad_norm: 0.7565434651994317, iteration: 429399
loss: 0.9963751435279846,grad_norm: 0.8415887795268562, iteration: 429400
loss: 1.036616563796997,grad_norm: 0.8687117790889882, iteration: 429401
loss: 1.0113818645477295,grad_norm: 0.886245573304071, iteration: 429402
loss: 1.0311115980148315,grad_norm: 0.8235345214731132, iteration: 429403
loss: 1.033595085144043,grad_norm: 0.9999994755047332, iteration: 429404
loss: 0.9980338215827942,grad_norm: 0.9999994978983957, iteration: 429405
loss: 1.0738426446914673,grad_norm: 0.8995644823858647, iteration: 429406
loss: 1.0143390893936157,grad_norm: 0.7280849112333472, iteration: 429407
loss: 1.0057313442230225,grad_norm: 0.8456484179600863, iteration: 429408
loss: 0.9436612129211426,grad_norm: 0.9959118409104065, iteration: 429409
loss: 0.991539716720581,grad_norm: 0.7251425034135184, iteration: 429410
loss: 0.9903437495231628,grad_norm: 0.6946883816564786, iteration: 429411
loss: 0.9602924585342407,grad_norm: 0.823625555972491, iteration: 429412
loss: 1.0200669765472412,grad_norm: 0.7461311265037246, iteration: 429413
loss: 0.9908161759376526,grad_norm: 0.7660408856234401, iteration: 429414
loss: 1.0159945487976074,grad_norm: 0.7849550158743673, iteration: 429415
loss: 1.115485429763794,grad_norm: 0.9291879832287019, iteration: 429416
loss: 1.0288327932357788,grad_norm: 0.7316136358164015, iteration: 429417
loss: 1.0482484102249146,grad_norm: 0.8357481420867615, iteration: 429418
loss: 1.005331039428711,grad_norm: 0.8833777785043695, iteration: 429419
loss: 0.9488660097122192,grad_norm: 0.7944868005129916, iteration: 429420
loss: 0.9925113916397095,grad_norm: 0.8268951861934324, iteration: 429421
loss: 1.0044472217559814,grad_norm: 0.8085942526257305, iteration: 429422
loss: 0.995927631855011,grad_norm: 0.6934396438999993, iteration: 429423
loss: 0.9478265047073364,grad_norm: 0.7679969992029382, iteration: 429424
loss: 0.9880469441413879,grad_norm: 0.8752800851630445, iteration: 429425
loss: 0.9932199716567993,grad_norm: 0.7022185123587583, iteration: 429426
loss: 1.0343691110610962,grad_norm: 0.7196807837004423, iteration: 429427
loss: 0.9954149127006531,grad_norm: 0.9999998484640608, iteration: 429428
loss: 0.9965914487838745,grad_norm: 0.7984556988311258, iteration: 429429
loss: 0.9854276776313782,grad_norm: 0.9999997470308083, iteration: 429430
loss: 1.0803148746490479,grad_norm: 0.9412316826100453, iteration: 429431
loss: 1.0785809755325317,grad_norm: 0.999999357704457, iteration: 429432
loss: 0.9970179796218872,grad_norm: 0.6924145297524521, iteration: 429433
loss: 1.0037363767623901,grad_norm: 0.9999994947349412, iteration: 429434
loss: 1.0303412675857544,grad_norm: 0.8294002864146488, iteration: 429435
loss: 0.9844281077384949,grad_norm: 0.7485203115480266, iteration: 429436
loss: 1.009762167930603,grad_norm: 0.7561788838248344, iteration: 429437
loss: 0.9864025712013245,grad_norm: 0.7240949247610382, iteration: 429438
loss: 0.9796315431594849,grad_norm: 0.7040089861334508, iteration: 429439
loss: 0.98368901014328,grad_norm: 0.6651651983635665, iteration: 429440
loss: 1.0351955890655518,grad_norm: 0.76477106474186, iteration: 429441
loss: 0.9788818359375,grad_norm: 0.6797586850693867, iteration: 429442
loss: 0.9933591485023499,grad_norm: 0.768352716772523, iteration: 429443
loss: 0.9944881200790405,grad_norm: 0.7528584370126195, iteration: 429444
loss: 0.9891647696495056,grad_norm: 0.6468737737965614, iteration: 429445
loss: 0.9411181807518005,grad_norm: 0.8372107495676594, iteration: 429446
loss: 1.072737216949463,grad_norm: 0.9806024687224547, iteration: 429447
loss: 1.0842108726501465,grad_norm: 0.9999992091100078, iteration: 429448
loss: 0.9977238774299622,grad_norm: 0.7652539472455147, iteration: 429449
loss: 0.9962771534919739,grad_norm: 0.7621129674728547, iteration: 429450
loss: 0.9982702732086182,grad_norm: 0.88611563390865, iteration: 429451
loss: 0.9996762275695801,grad_norm: 0.9005612258741519, iteration: 429452
loss: 1.0512062311172485,grad_norm: 0.6980298852398551, iteration: 429453
loss: 1.0170010328292847,grad_norm: 0.9999990100362612, iteration: 429454
loss: 0.9853230714797974,grad_norm: 0.6968821035784775, iteration: 429455
loss: 0.9917718768119812,grad_norm: 0.6539680472623526, iteration: 429456
loss: 0.9692326188087463,grad_norm: 0.7482803563610785, iteration: 429457
loss: 1.002625823020935,grad_norm: 0.7224909675576007, iteration: 429458
loss: 1.017496109008789,grad_norm: 0.9483590889377879, iteration: 429459
loss: 0.9737679362297058,grad_norm: 0.8717342691121708, iteration: 429460
loss: 1.0857887268066406,grad_norm: 0.8507969112121507, iteration: 429461
loss: 1.0157432556152344,grad_norm: 0.7507053602716552, iteration: 429462
loss: 1.046069622039795,grad_norm: 0.852431857575745, iteration: 429463
loss: 0.9885197877883911,grad_norm: 0.6559357546200223, iteration: 429464
loss: 0.9842124581336975,grad_norm: 0.7781826409883983, iteration: 429465
loss: 1.0237083435058594,grad_norm: 0.9999994163728262, iteration: 429466
loss: 1.0159552097320557,grad_norm: 0.8713824024013351, iteration: 429467
loss: 1.012764811515808,grad_norm: 0.7710581439601435, iteration: 429468
loss: 1.0568796396255493,grad_norm: 0.9999992945388139, iteration: 429469
loss: 1.0234266519546509,grad_norm: 0.8571968562420997, iteration: 429470
loss: 1.0168471336364746,grad_norm: 0.9999994756243817, iteration: 429471
loss: 1.0138907432556152,grad_norm: 0.8552030646287523, iteration: 429472
loss: 1.0318130254745483,grad_norm: 0.7920651514239518, iteration: 429473
loss: 1.0254993438720703,grad_norm: 0.7685448539187492, iteration: 429474
loss: 0.9769458770751953,grad_norm: 0.7441342508725155, iteration: 429475
loss: 1.0116078853607178,grad_norm: 0.7768043423306374, iteration: 429476
loss: 1.046604871749878,grad_norm: 0.885899581088593, iteration: 429477
loss: 1.032457709312439,grad_norm: 0.6755287417259703, iteration: 429478
loss: 1.0216318368911743,grad_norm: 0.895168771678533, iteration: 429479
loss: 0.9971015453338623,grad_norm: 0.6961061692279329, iteration: 429480
loss: 0.9671550989151001,grad_norm: 0.7706389755697868, iteration: 429481
loss: 1.042479395866394,grad_norm: 0.9999991345943042, iteration: 429482
loss: 0.9944249391555786,grad_norm: 0.8075293368530788, iteration: 429483
loss: 1.011034369468689,grad_norm: 0.7732843600684609, iteration: 429484
loss: 1.0496230125427246,grad_norm: 0.7312404092725496, iteration: 429485
loss: 0.997279942035675,grad_norm: 0.6620806763388871, iteration: 429486
loss: 0.992737352848053,grad_norm: 0.8161185862917878, iteration: 429487
loss: 0.9971789121627808,grad_norm: 0.7967913255044992, iteration: 429488
loss: 0.9759252071380615,grad_norm: 0.7833861628849064, iteration: 429489
loss: 1.06743323802948,grad_norm: 1.0000000892691734, iteration: 429490
loss: 1.012408971786499,grad_norm: 0.8543455811008835, iteration: 429491
loss: 0.9854286909103394,grad_norm: 0.9999990848598167, iteration: 429492
loss: 1.0448119640350342,grad_norm: 0.9999993840352268, iteration: 429493
loss: 1.0059503316879272,grad_norm: 0.937756277254977, iteration: 429494
loss: 0.9837200045585632,grad_norm: 0.6981650464416492, iteration: 429495
loss: 0.9774065017700195,grad_norm: 0.7052560139067714, iteration: 429496
loss: 0.9922881722450256,grad_norm: 0.8023620039134328, iteration: 429497
loss: 0.9855836629867554,grad_norm: 0.7574040203466326, iteration: 429498
loss: 1.0199007987976074,grad_norm: 0.999999132097395, iteration: 429499
loss: 1.0218318700790405,grad_norm: 0.9608343064633634, iteration: 429500
loss: 0.9905451536178589,grad_norm: 0.8910856663734442, iteration: 429501
loss: 1.0250598192214966,grad_norm: 0.7823151747847396, iteration: 429502
loss: 0.9630193710327148,grad_norm: 0.7738038927483534, iteration: 429503
loss: 0.9994510412216187,grad_norm: 0.797281844513814, iteration: 429504
loss: 1.0332555770874023,grad_norm: 0.9255016452557773, iteration: 429505
loss: 0.9767012000083923,grad_norm: 0.7532646725107307, iteration: 429506
loss: 0.9875730872154236,grad_norm: 0.6927381588374103, iteration: 429507
loss: 1.0047333240509033,grad_norm: 0.9999999057279203, iteration: 429508
loss: 0.9921053051948547,grad_norm: 0.7283603870451384, iteration: 429509
loss: 0.9988061189651489,grad_norm: 0.9529144419221102, iteration: 429510
loss: 0.9735428690910339,grad_norm: 0.9999997774352243, iteration: 429511
loss: 1.0018054246902466,grad_norm: 0.7180291479643698, iteration: 429512
loss: 0.9735597372055054,grad_norm: 0.7820746022884087, iteration: 429513
loss: 1.0411626100540161,grad_norm: 0.8502819731588773, iteration: 429514
loss: 1.0866059064865112,grad_norm: 0.8999401322426894, iteration: 429515
loss: 0.9891054034233093,grad_norm: 0.7406076143218181, iteration: 429516
loss: 1.0414283275604248,grad_norm: 1.0000000058531056, iteration: 429517
loss: 0.967003583908081,grad_norm: 0.8289260129044494, iteration: 429518
loss: 1.0130547285079956,grad_norm: 0.6861490759215174, iteration: 429519
loss: 1.081997036933899,grad_norm: 1.000000012224663, iteration: 429520
loss: 1.062861442565918,grad_norm: 0.9999991385835081, iteration: 429521
loss: 0.9942257404327393,grad_norm: 0.8872418305677311, iteration: 429522
loss: 0.9789912700653076,grad_norm: 0.8284237235992155, iteration: 429523
loss: 0.9830830097198486,grad_norm: 0.9999990822672664, iteration: 429524
loss: 1.076661467552185,grad_norm: 0.99999921451655, iteration: 429525
loss: 0.9716182351112366,grad_norm: 0.734617679308766, iteration: 429526
loss: 1.1129075288772583,grad_norm: 0.9999996613358139, iteration: 429527
loss: 1.033201813697815,grad_norm: 0.9999999747483868, iteration: 429528
loss: 1.0102946758270264,grad_norm: 0.9999991477421766, iteration: 429529
loss: 0.9988289475440979,grad_norm: 0.8538344537702043, iteration: 429530
loss: 1.1256537437438965,grad_norm: 0.999999475342412, iteration: 429531
loss: 0.9456548690795898,grad_norm: 0.8072841799342833, iteration: 429532
loss: 0.9956403970718384,grad_norm: 0.7662928566168831, iteration: 429533
loss: 1.1463626623153687,grad_norm: 0.9999996432262535, iteration: 429534
loss: 0.9847519993782043,grad_norm: 0.7940737753966742, iteration: 429535
loss: 0.9864466786384583,grad_norm: 0.7505020452436452, iteration: 429536
loss: 0.9765861630439758,grad_norm: 0.9780150257741205, iteration: 429537
loss: 0.9891761541366577,grad_norm: 0.9018053723003002, iteration: 429538
loss: 1.027111530303955,grad_norm: 0.7018649869934603, iteration: 429539
loss: 0.9959257245063782,grad_norm: 0.8585847036460108, iteration: 429540
loss: 0.9558217525482178,grad_norm: 0.8546664024319996, iteration: 429541
loss: 0.990718424320221,grad_norm: 0.8729113859135312, iteration: 429542
loss: 1.0353084802627563,grad_norm: 0.7272501669660973, iteration: 429543
loss: 0.9504380226135254,grad_norm: 0.8798537144652708, iteration: 429544
loss: 1.0370384454727173,grad_norm: 0.8361471082267706, iteration: 429545
loss: 1.057183027267456,grad_norm: 0.999999573932686, iteration: 429546
loss: 1.01839280128479,grad_norm: 0.6462406638469753, iteration: 429547
loss: 0.9899327754974365,grad_norm: 0.9999994034259679, iteration: 429548
loss: 0.9886723160743713,grad_norm: 0.7505716198032083, iteration: 429549
loss: 1.0012153387069702,grad_norm: 0.9241879904853629, iteration: 429550
loss: 0.9624571204185486,grad_norm: 0.7969622901955552, iteration: 429551
loss: 0.9853233695030212,grad_norm: 0.75233087429306, iteration: 429552
loss: 0.9678375720977783,grad_norm: 0.7518632409648224, iteration: 429553
loss: 0.9977722764015198,grad_norm: 0.8336813793617991, iteration: 429554
loss: 1.0226590633392334,grad_norm: 0.7698965480846515, iteration: 429555
loss: 1.013621211051941,grad_norm: 0.8475321183817986, iteration: 429556
loss: 0.9719005227088928,grad_norm: 0.8553539541422918, iteration: 429557
loss: 0.9903959631919861,grad_norm: 0.8488208483052937, iteration: 429558
loss: 1.0103145837783813,grad_norm: 0.8531669085080893, iteration: 429559
loss: 1.0096197128295898,grad_norm: 0.8829969701335665, iteration: 429560
loss: 0.9892674088478088,grad_norm: 0.8931150678129268, iteration: 429561
loss: 1.0103821754455566,grad_norm: 0.9791482933878963, iteration: 429562
loss: 1.0161608457565308,grad_norm: 0.718018590320602, iteration: 429563
loss: 1.0388303995132446,grad_norm: 0.9073650825541957, iteration: 429564
loss: 1.0186021327972412,grad_norm: 0.9067079442358927, iteration: 429565
loss: 1.0372425317764282,grad_norm: 0.9999997253967244, iteration: 429566
loss: 0.9994813799858093,grad_norm: 0.999999084215297, iteration: 429567
loss: 0.9594631791114807,grad_norm: 0.8200583295525046, iteration: 429568
loss: 1.014094352722168,grad_norm: 0.9187907078727916, iteration: 429569
loss: 0.9966322779655457,grad_norm: 0.8556054679404983, iteration: 429570
loss: 1.0738986730575562,grad_norm: 0.9999997190340326, iteration: 429571
loss: 1.0781432390213013,grad_norm: 0.9476113112112121, iteration: 429572
loss: 1.006783366203308,grad_norm: 0.8223166097042087, iteration: 429573
loss: 0.9937622547149658,grad_norm: 0.7505622512098232, iteration: 429574
loss: 1.0588465929031372,grad_norm: 0.8116933308529369, iteration: 429575
loss: 1.0328994989395142,grad_norm: 0.9999997280421825, iteration: 429576
loss: 0.995696485042572,grad_norm: 0.8329545778402861, iteration: 429577
loss: 0.9692738652229309,grad_norm: 0.898588884280096, iteration: 429578
loss: 0.9992766380310059,grad_norm: 0.9645039604502591, iteration: 429579
loss: 1.057205080986023,grad_norm: 0.9505729543667276, iteration: 429580
loss: 1.0246071815490723,grad_norm: 0.7468605665311866, iteration: 429581
loss: 0.9687309265136719,grad_norm: 0.8887602006891652, iteration: 429582
loss: 1.1405149698257446,grad_norm: 0.9999996782121106, iteration: 429583
loss: 1.006117582321167,grad_norm: 0.8265006776125714, iteration: 429584
loss: 1.084309697151184,grad_norm: 0.9999991677134945, iteration: 429585
loss: 0.9923901557922363,grad_norm: 0.7917109212825065, iteration: 429586
loss: 1.0184353590011597,grad_norm: 0.761069089549702, iteration: 429587
loss: 1.0159821510314941,grad_norm: 0.6986424230163546, iteration: 429588
loss: 0.9554638266563416,grad_norm: 0.8280270168919892, iteration: 429589
loss: 1.034629464149475,grad_norm: 0.8508276667217471, iteration: 429590
loss: 0.9999378323554993,grad_norm: 0.7903223415539218, iteration: 429591
loss: 0.9968039989471436,grad_norm: 0.6844726547270576, iteration: 429592
loss: 1.0127102136611938,grad_norm: 0.9999999154694527, iteration: 429593
loss: 1.0329257249832153,grad_norm: 0.9828063396799984, iteration: 429594
loss: 1.021590232849121,grad_norm: 0.9999994777847235, iteration: 429595
loss: 1.0645331144332886,grad_norm: 0.9206526853292806, iteration: 429596
loss: 0.9657644629478455,grad_norm: 0.8007756413135679, iteration: 429597
loss: 0.9865943193435669,grad_norm: 0.742682499556732, iteration: 429598
loss: 1.028924584388733,grad_norm: 0.9999992647017549, iteration: 429599
loss: 0.9929385781288147,grad_norm: 0.8244386426025674, iteration: 429600
loss: 1.0285778045654297,grad_norm: 0.9999997458122694, iteration: 429601
loss: 1.0253928899765015,grad_norm: 0.8316285065337938, iteration: 429602
loss: 0.9998184442520142,grad_norm: 0.837153988185734, iteration: 429603
loss: 0.977988600730896,grad_norm: 0.6420783847740423, iteration: 429604
loss: 0.9757335186004639,grad_norm: 0.8300097414777213, iteration: 429605
loss: 1.039310336112976,grad_norm: 0.8964189958885217, iteration: 429606
loss: 0.9758224487304688,grad_norm: 0.8365650770818839, iteration: 429607
loss: 1.2163218259811401,grad_norm: 0.779158435391115, iteration: 429608
loss: 1.0048929452896118,grad_norm: 0.7801667698665551, iteration: 429609
loss: 0.9730086922645569,grad_norm: 0.9999991646814885, iteration: 429610
loss: 0.9978529214859009,grad_norm: 0.7645216567329733, iteration: 429611
loss: 1.0316108465194702,grad_norm: 0.598825332970409, iteration: 429612
loss: 1.0149067640304565,grad_norm: 0.778116436109187, iteration: 429613
loss: 1.0572823286056519,grad_norm: 0.799597687219421, iteration: 429614
loss: 1.0268418788909912,grad_norm: 0.5961564035227677, iteration: 429615
loss: 0.9882714152336121,grad_norm: 0.7468732581304764, iteration: 429616
loss: 0.9759910702705383,grad_norm: 0.8839079893211219, iteration: 429617
loss: 1.0269986391067505,grad_norm: 0.9652932624526598, iteration: 429618
loss: 1.0165213346481323,grad_norm: 0.8114715350112561, iteration: 429619
loss: 1.010581612586975,grad_norm: 0.7985726980807272, iteration: 429620
loss: 1.001937747001648,grad_norm: 0.9999991768618337, iteration: 429621
loss: 0.9885526895523071,grad_norm: 0.7441357206313233, iteration: 429622
loss: 0.9756311178207397,grad_norm: 0.7775281722812268, iteration: 429623
loss: 0.9897869229316711,grad_norm: 0.7431659253726572, iteration: 429624
loss: 1.020155906677246,grad_norm: 0.9999997669626898, iteration: 429625
loss: 0.9782865643501282,grad_norm: 0.999999504172878, iteration: 429626
loss: 0.9955673217773438,grad_norm: 0.8139724123335008, iteration: 429627
loss: 1.002886414527893,grad_norm: 0.7442217495364671, iteration: 429628
loss: 0.9806030988693237,grad_norm: 0.7927091254618963, iteration: 429629
loss: 0.9637616276741028,grad_norm: 0.6921418933567263, iteration: 429630
loss: 1.0061907768249512,grad_norm: 0.8660600399586444, iteration: 429631
loss: 1.0226203203201294,grad_norm: 0.7592618561859399, iteration: 429632
loss: 0.9816015958786011,grad_norm: 0.720513236734072, iteration: 429633
loss: 1.0674680471420288,grad_norm: 0.9999997485766056, iteration: 429634
loss: 1.0059828758239746,grad_norm: 0.9999995276759462, iteration: 429635
loss: 1.015308141708374,grad_norm: 0.9142321681808191, iteration: 429636
loss: 1.0481470823287964,grad_norm: 0.9999994093751464, iteration: 429637
loss: 1.007438063621521,grad_norm: 0.8460462339496195, iteration: 429638
loss: 0.9761713147163391,grad_norm: 0.696376511093889, iteration: 429639
loss: 1.0856379270553589,grad_norm: 0.9999991449608076, iteration: 429640
loss: 1.0327651500701904,grad_norm: 0.8515543805155842, iteration: 429641
loss: 0.9975282549858093,grad_norm: 0.8527070936070001, iteration: 429642
loss: 0.9766891598701477,grad_norm: 0.7987941038062587, iteration: 429643
loss: 1.0149712562561035,grad_norm: 0.8446999276667008, iteration: 429644
loss: 1.0000416040420532,grad_norm: 0.7637066537723909, iteration: 429645
loss: 1.0029784440994263,grad_norm: 0.9999998637813037, iteration: 429646
loss: 0.9983879923820496,grad_norm: 0.71302596480414, iteration: 429647
loss: 1.0507720708847046,grad_norm: 0.8241908778438995, iteration: 429648
loss: 1.0254909992218018,grad_norm: 0.8592429771955981, iteration: 429649
loss: 1.0242868661880493,grad_norm: 0.7515577116745947, iteration: 429650
loss: 0.9928642511367798,grad_norm: 0.7552690130310965, iteration: 429651
loss: 1.0132508277893066,grad_norm: 0.9999995676519913, iteration: 429652
loss: 1.008224606513977,grad_norm: 0.7661069659920771, iteration: 429653
loss: 1.0239877700805664,grad_norm: 0.8255056356200591, iteration: 429654
loss: 0.997144877910614,grad_norm: 0.8329874628673463, iteration: 429655
loss: 0.9945695400238037,grad_norm: 0.7607778101565685, iteration: 429656
loss: 0.9949331879615784,grad_norm: 0.893964539887909, iteration: 429657
loss: 1.0123107433319092,grad_norm: 0.8617168903755598, iteration: 429658
loss: 1.0243418216705322,grad_norm: 0.7142038512855493, iteration: 429659
loss: 0.9983960390090942,grad_norm: 0.8526669191232953, iteration: 429660
loss: 0.9777671694755554,grad_norm: 0.904422821886569, iteration: 429661
loss: 0.9951719641685486,grad_norm: 0.8524122565808627, iteration: 429662
loss: 1.0400854349136353,grad_norm: 0.7557916573336441, iteration: 429663
loss: 0.9857929348945618,grad_norm: 0.8658354505019626, iteration: 429664
loss: 1.0175710916519165,grad_norm: 0.6622465560847856, iteration: 429665
loss: 1.0412721633911133,grad_norm: 0.9999997823774042, iteration: 429666
loss: 0.9909308552742004,grad_norm: 0.7415292339180385, iteration: 429667
loss: 0.9958387613296509,grad_norm: 0.827133384633178, iteration: 429668
loss: 0.980413556098938,grad_norm: 0.8314535640392199, iteration: 429669
loss: 0.985351026058197,grad_norm: 0.7481107898550151, iteration: 429670
loss: 1.0035889148712158,grad_norm: 0.8106204875635241, iteration: 429671
loss: 1.062650442123413,grad_norm: 0.6867844166173517, iteration: 429672
loss: 1.181292176246643,grad_norm: 0.9999992254132261, iteration: 429673
loss: 0.9628701210021973,grad_norm: 0.8533391075902065, iteration: 429674
loss: 0.9903913140296936,grad_norm: 0.8906010306858048, iteration: 429675
loss: 1.0272321701049805,grad_norm: 0.9999990020998113, iteration: 429676
loss: 1.0020911693572998,grad_norm: 0.9064591062395004, iteration: 429677
loss: 0.9844058156013489,grad_norm: 0.7861190170397667, iteration: 429678
loss: 0.9645612835884094,grad_norm: 0.8180682878067584, iteration: 429679
loss: 0.9946901202201843,grad_norm: 0.7440995946067203, iteration: 429680
loss: 1.0043448209762573,grad_norm: 0.729967238011084, iteration: 429681
loss: 1.0429959297180176,grad_norm: 0.8675935635612969, iteration: 429682
loss: 0.988609254360199,grad_norm: 0.851555698305729, iteration: 429683
loss: 1.025945782661438,grad_norm: 0.9999999277712673, iteration: 429684
loss: 0.9966081380844116,grad_norm: 0.7170872211229046, iteration: 429685
loss: 0.9752505421638489,grad_norm: 0.6575997583652305, iteration: 429686
loss: 1.0086967945098877,grad_norm: 0.8852841211730159, iteration: 429687
loss: 1.01248300075531,grad_norm: 0.9215843545402166, iteration: 429688
loss: 0.9870457649230957,grad_norm: 0.7242217314746651, iteration: 429689
loss: 1.0360891819000244,grad_norm: 0.9999998297706074, iteration: 429690
loss: 1.0060598850250244,grad_norm: 0.7087201164445619, iteration: 429691
loss: 1.0256538391113281,grad_norm: 0.7016828079717248, iteration: 429692
loss: 1.031349778175354,grad_norm: 0.8457878311707475, iteration: 429693
loss: 1.013018012046814,grad_norm: 0.8507395731839685, iteration: 429694
loss: 1.0056912899017334,grad_norm: 0.788491398369868, iteration: 429695
loss: 1.0211732387542725,grad_norm: 0.7687540464335418, iteration: 429696
loss: 1.0507595539093018,grad_norm: 0.8336687944607618, iteration: 429697
loss: 1.0260473489761353,grad_norm: 0.9999994182829757, iteration: 429698
loss: 1.0258177518844604,grad_norm: 0.9999996992369803, iteration: 429699
loss: 0.984406054019928,grad_norm: 0.8326657582021183, iteration: 429700
loss: 1.0175343751907349,grad_norm: 0.8887916489701136, iteration: 429701
loss: 1.052511215209961,grad_norm: 0.8209332580649059, iteration: 429702
loss: 1.0205190181732178,grad_norm: 0.8346200275685313, iteration: 429703
loss: 1.026985764503479,grad_norm: 0.688711809390339, iteration: 429704
loss: 1.0106315612792969,grad_norm: 0.784389196320167, iteration: 429705
loss: 0.9626542329788208,grad_norm: 0.9140759196333978, iteration: 429706
loss: 1.0342026948928833,grad_norm: 0.9035822361926376, iteration: 429707
loss: 1.0379961729049683,grad_norm: 0.999999977754371, iteration: 429708
loss: 1.0156679153442383,grad_norm: 0.762290437734196, iteration: 429709
loss: 1.02460515499115,grad_norm: 0.9999995530871609, iteration: 429710
loss: 1.0018295049667358,grad_norm: 0.7270321971119034, iteration: 429711
loss: 1.0370292663574219,grad_norm: 0.9999996093398931, iteration: 429712
loss: 1.0612590312957764,grad_norm: 0.9965325661659546, iteration: 429713
loss: 0.9881090521812439,grad_norm: 0.9035338542657928, iteration: 429714
loss: 1.0232192277908325,grad_norm: 0.7638886722079908, iteration: 429715
loss: 1.002593994140625,grad_norm: 0.7962755497683917, iteration: 429716
loss: 1.0463216304779053,grad_norm: 0.8437138338726712, iteration: 429717
loss: 1.0309644937515259,grad_norm: 0.7911809432376817, iteration: 429718
loss: 1.0942223072052002,grad_norm: 1.0000000449529245, iteration: 429719
loss: 0.9647481441497803,grad_norm: 0.7946904083366172, iteration: 429720
loss: 0.9750685095787048,grad_norm: 0.9447917255336109, iteration: 429721
loss: 1.0324755907058716,grad_norm: 0.999999304326332, iteration: 429722
loss: 1.014137625694275,grad_norm: 0.7467397058181134, iteration: 429723
loss: 1.0735501050949097,grad_norm: 0.9610518026753023, iteration: 429724
loss: 1.1007540225982666,grad_norm: 0.9999990514686005, iteration: 429725
loss: 0.9956708550453186,grad_norm: 0.9119948948296296, iteration: 429726
loss: 1.273383378982544,grad_norm: 0.9999997691711003, iteration: 429727
loss: 1.0365608930587769,grad_norm: 0.7537832796055319, iteration: 429728
loss: 1.0305184125900269,grad_norm: 0.9999991079960056, iteration: 429729
loss: 0.995358407497406,grad_norm: 0.819673716246205, iteration: 429730
loss: 1.0168704986572266,grad_norm: 0.7885819064806855, iteration: 429731
loss: 0.9996767640113831,grad_norm: 0.9251115618917767, iteration: 429732
loss: 1.1228716373443604,grad_norm: 0.8086278458800571, iteration: 429733
loss: 0.9718112349510193,grad_norm: 0.9553689533367248, iteration: 429734
loss: 1.0031960010528564,grad_norm: 0.8086988555689394, iteration: 429735
loss: 0.9886311888694763,grad_norm: 0.7061643479922894, iteration: 429736
loss: 0.986140251159668,grad_norm: 0.7948751034152943, iteration: 429737
loss: 0.9991422295570374,grad_norm: 0.7512619459624931, iteration: 429738
loss: 1.0211777687072754,grad_norm: 0.9439402476044307, iteration: 429739
loss: 0.9899927377700806,grad_norm: 0.7345413555638651, iteration: 429740
loss: 1.0273222923278809,grad_norm: 0.9999997699090113, iteration: 429741
loss: 0.9874646067619324,grad_norm: 0.9118573442186936, iteration: 429742
loss: 0.9954484701156616,grad_norm: 0.9562557725415537, iteration: 429743
loss: 0.985340416431427,grad_norm: 0.6677529526444655, iteration: 429744
loss: 1.0050251483917236,grad_norm: 0.7804453658765595, iteration: 429745
loss: 1.0015273094177246,grad_norm: 0.8512316696740071, iteration: 429746
loss: 1.0091602802276611,grad_norm: 0.9761903446910635, iteration: 429747
loss: 1.005470871925354,grad_norm: 0.673862820234772, iteration: 429748
loss: 1.0244758129119873,grad_norm: 0.8978714505037707, iteration: 429749
loss: 0.9679306745529175,grad_norm: 0.7990889347934957, iteration: 429750
loss: 1.0135016441345215,grad_norm: 0.9112272267928562, iteration: 429751
loss: 0.9891241788864136,grad_norm: 0.7871315633033401, iteration: 429752
loss: 0.964946985244751,grad_norm: 0.7910831502522435, iteration: 429753
loss: 1.025612711906433,grad_norm: 0.8468725543289383, iteration: 429754
loss: 1.0392487049102783,grad_norm: 0.7799500488896896, iteration: 429755
loss: 1.0583198070526123,grad_norm: 0.8898286215021921, iteration: 429756
loss: 1.0356731414794922,grad_norm: 0.9999993472622288, iteration: 429757
loss: 1.035129189491272,grad_norm: 0.7095409377116906, iteration: 429758
loss: 1.0331017971038818,grad_norm: 0.9999989958138132, iteration: 429759
loss: 1.033443570137024,grad_norm: 0.9765907796766717, iteration: 429760
loss: 1.1983851194381714,grad_norm: 0.9999992039240139, iteration: 429761
loss: 0.9851874709129333,grad_norm: 0.8903329076220066, iteration: 429762
loss: 1.005382776260376,grad_norm: 0.775304454106787, iteration: 429763
loss: 1.023833990097046,grad_norm: 0.8636972082815331, iteration: 429764
loss: 0.9665051698684692,grad_norm: 0.8144021862328881, iteration: 429765
loss: 0.997448205947876,grad_norm: 0.7258005253931974, iteration: 429766
loss: 1.0067825317382812,grad_norm: 0.8369592267506177, iteration: 429767
loss: 1.0164419412612915,grad_norm: 0.5841274872605521, iteration: 429768
loss: 0.9998456239700317,grad_norm: 0.8304262494691848, iteration: 429769
loss: 0.9928403496742249,grad_norm: 0.9878546485801107, iteration: 429770
loss: 1.054927110671997,grad_norm: 0.7851355182393164, iteration: 429771
loss: 1.0601766109466553,grad_norm: 0.9999998905506154, iteration: 429772
loss: 1.039379358291626,grad_norm: 0.9999991427259339, iteration: 429773
loss: 0.992481529712677,grad_norm: 0.6994756030804151, iteration: 429774
loss: 1.0031789541244507,grad_norm: 0.8265124711274494, iteration: 429775
loss: 1.0060712099075317,grad_norm: 0.8447743718411538, iteration: 429776
loss: 1.0073320865631104,grad_norm: 0.788511166092061, iteration: 429777
loss: 1.024679183959961,grad_norm: 0.8339980348306639, iteration: 429778
loss: 1.001639723777771,grad_norm: 0.9216786752076325, iteration: 429779
loss: 1.007086157798767,grad_norm: 0.8112641687872746, iteration: 429780
loss: 1.0340402126312256,grad_norm: 0.9999996567652066, iteration: 429781
loss: 1.0600836277008057,grad_norm: 0.9999999483951069, iteration: 429782
loss: 0.9747717976570129,grad_norm: 0.7978646019732925, iteration: 429783
loss: 1.0974586009979248,grad_norm: 0.848806074482516, iteration: 429784
loss: 1.1352910995483398,grad_norm: 0.9268715017495058, iteration: 429785
loss: 1.0382312536239624,grad_norm: 0.999999596928619, iteration: 429786
loss: 1.0112837553024292,grad_norm: 0.702211866713534, iteration: 429787
loss: 0.980292558670044,grad_norm: 0.8708454050300879, iteration: 429788
loss: 1.0021758079528809,grad_norm: 0.8621213221692798, iteration: 429789
loss: 0.999565839767456,grad_norm: 0.8165959093055734, iteration: 429790
loss: 1.0283409357070923,grad_norm: 0.7801491582497911, iteration: 429791
loss: 1.0023022890090942,grad_norm: 0.9999993460955046, iteration: 429792
loss: 1.037225365638733,grad_norm: 0.9793334571573982, iteration: 429793
loss: 1.0102392435073853,grad_norm: 1.0000000664660158, iteration: 429794
loss: 1.0153065919876099,grad_norm: 0.8445828807928644, iteration: 429795
loss: 1.0214204788208008,grad_norm: 0.9765301500332513, iteration: 429796
loss: 1.123528003692627,grad_norm: 0.884931951370864, iteration: 429797
loss: 0.9796888828277588,grad_norm: 0.8469538263053312, iteration: 429798
loss: 1.0034306049346924,grad_norm: 0.8619410301514019, iteration: 429799
loss: 1.1085684299468994,grad_norm: 0.9777871527857945, iteration: 429800
loss: 1.059902548789978,grad_norm: 0.999999107454894, iteration: 429801
loss: 0.9737260341644287,grad_norm: 0.7454337530640852, iteration: 429802
loss: 0.9928839802742004,grad_norm: 0.7335091950416882, iteration: 429803
loss: 1.025080919265747,grad_norm: 0.6615180024529457, iteration: 429804
loss: 0.9770218133926392,grad_norm: 0.7755786418425072, iteration: 429805
loss: 1.0034302473068237,grad_norm: 0.7366917617264201, iteration: 429806
loss: 1.0607726573944092,grad_norm: 0.999999115244598, iteration: 429807
loss: 1.0417505502700806,grad_norm: 0.9999997514980632, iteration: 429808
loss: 1.0184378623962402,grad_norm: 0.8820168579700228, iteration: 429809
loss: 1.0430535078048706,grad_norm: 0.9999992765954108, iteration: 429810
loss: 1.0567411184310913,grad_norm: 0.7829958449059075, iteration: 429811
loss: 0.9607911705970764,grad_norm: 0.7833752649837121, iteration: 429812
loss: 1.0311089754104614,grad_norm: 0.94574571907873, iteration: 429813
loss: 1.0245909690856934,grad_norm: 0.7848299804403192, iteration: 429814
loss: 1.006789207458496,grad_norm: 0.7777534295661879, iteration: 429815
loss: 1.0057955980300903,grad_norm: 0.7229266201730699, iteration: 429816
loss: 0.9728983044624329,grad_norm: 0.8083923951645864, iteration: 429817
loss: 1.026599645614624,grad_norm: 0.9077008591743668, iteration: 429818
loss: 1.059427261352539,grad_norm: 0.9999990605527876, iteration: 429819
loss: 0.9987488389015198,grad_norm: 0.7645722375680483, iteration: 429820
loss: 0.986976683139801,grad_norm: 0.6992646930633112, iteration: 429821
loss: 1.1626828908920288,grad_norm: 0.9999997934678272, iteration: 429822
loss: 0.9795095920562744,grad_norm: 0.7770506250171445, iteration: 429823
loss: 0.998374342918396,grad_norm: 0.8655062444015079, iteration: 429824
loss: 1.0247291326522827,grad_norm: 0.7852769748374825, iteration: 429825
loss: 1.004897952079773,grad_norm: 0.9999996340764911, iteration: 429826
loss: 1.1930755376815796,grad_norm: 0.999999450285075, iteration: 429827
loss: 1.108606219291687,grad_norm: 0.8278257848173979, iteration: 429828
loss: 1.0271916389465332,grad_norm: 0.999999879043818, iteration: 429829
loss: 1.00864577293396,grad_norm: 0.8257109305714946, iteration: 429830
loss: 1.0297579765319824,grad_norm: 0.8697050390616176, iteration: 429831
loss: 1.0223466157913208,grad_norm: 0.9999990722261863, iteration: 429832
loss: 0.9653134942054749,grad_norm: 0.999999012872302, iteration: 429833
loss: 1.0300647020339966,grad_norm: 0.9999992147684004, iteration: 429834
loss: 1.0008647441864014,grad_norm: 0.7975731957275016, iteration: 429835
loss: 1.040846824645996,grad_norm: 0.9719447963462041, iteration: 429836
loss: 1.0275198221206665,grad_norm: 0.7194115766372948, iteration: 429837
loss: 0.9991266131401062,grad_norm: 0.9999992990526573, iteration: 429838
loss: 1.0463495254516602,grad_norm: 0.7316335025824987, iteration: 429839
loss: 0.9994525909423828,grad_norm: 0.9999994181478195, iteration: 429840
loss: 1.015753984451294,grad_norm: 0.9999993949714405, iteration: 429841
loss: 1.0757453441619873,grad_norm: 0.9999992108780736, iteration: 429842
loss: 0.989878237247467,grad_norm: 0.7519350577268309, iteration: 429843
loss: 0.9852527976036072,grad_norm: 0.8460882622951196, iteration: 429844
loss: 1.0257189273834229,grad_norm: 0.9624037341877497, iteration: 429845
loss: 1.03567636013031,grad_norm: 0.9999997885728892, iteration: 429846
loss: 0.9880508780479431,grad_norm: 0.9935842483383756, iteration: 429847
loss: 0.9876556992530823,grad_norm: 0.7396434058067649, iteration: 429848
loss: 1.010050892829895,grad_norm: 0.8092859952411274, iteration: 429849
loss: 0.9961592555046082,grad_norm: 0.8524027982570219, iteration: 429850
loss: 1.0153526067733765,grad_norm: 0.6941750749339688, iteration: 429851
loss: 1.0553398132324219,grad_norm: 0.7644886076018872, iteration: 429852
loss: 0.9670887589454651,grad_norm: 0.7617580929263764, iteration: 429853
loss: 1.022644281387329,grad_norm: 0.9999995153107063, iteration: 429854
loss: 0.9925356507301331,grad_norm: 0.8373132339927596, iteration: 429855
loss: 0.9919677376747131,grad_norm: 0.9999996474083775, iteration: 429856
loss: 1.0402065515518188,grad_norm: 0.9999998864488531, iteration: 429857
loss: 1.027465581893921,grad_norm: 0.7180089088310843, iteration: 429858
loss: 0.9893804788589478,grad_norm: 0.7520855215517767, iteration: 429859
loss: 0.99644535779953,grad_norm: 0.9894298854589352, iteration: 429860
loss: 1.0501033067703247,grad_norm: 0.8098880149332771, iteration: 429861
loss: 0.9945186972618103,grad_norm: 0.8523220323505479, iteration: 429862
loss: 1.0160493850708008,grad_norm: 0.9999994381178258, iteration: 429863
loss: 0.9933893084526062,grad_norm: 0.9999989809630391, iteration: 429864
loss: 1.014894723892212,grad_norm: 0.7782164371506535, iteration: 429865
loss: 1.0194164514541626,grad_norm: 0.8514695135626151, iteration: 429866
loss: 1.006323218345642,grad_norm: 0.690866293165521, iteration: 429867
loss: 1.0015146732330322,grad_norm: 0.7972977238616253, iteration: 429868
loss: 1.0248892307281494,grad_norm: 0.8316999692111259, iteration: 429869
loss: 1.0227675437927246,grad_norm: 0.773473184813652, iteration: 429870
loss: 1.0141597986221313,grad_norm: 0.7825817910535364, iteration: 429871
loss: 1.0112022161483765,grad_norm: 0.999999437477882, iteration: 429872
loss: 0.9878551959991455,grad_norm: 0.9732074851497254, iteration: 429873
loss: 0.9914507865905762,grad_norm: 0.8074601200088342, iteration: 429874
loss: 1.0406460762023926,grad_norm: 0.9999993819388752, iteration: 429875
loss: 1.0836516618728638,grad_norm: 0.9840926526126522, iteration: 429876
loss: 1.020514965057373,grad_norm: 0.7531637351119959, iteration: 429877
loss: 1.1346641778945923,grad_norm: 0.9999995696795387, iteration: 429878
loss: 1.0092456340789795,grad_norm: 0.7087695190108333, iteration: 429879
loss: 1.0259120464324951,grad_norm: 0.9185443748682256, iteration: 429880
loss: 0.9930490255355835,grad_norm: 0.6522373573480154, iteration: 429881
loss: 1.0333154201507568,grad_norm: 0.7752836694664091, iteration: 429882
loss: 1.0197031497955322,grad_norm: 0.841090672692433, iteration: 429883
loss: 0.9807769656181335,grad_norm: 0.8068269271788912, iteration: 429884
loss: 0.9812204837799072,grad_norm: 0.8815896355655362, iteration: 429885
loss: 0.9824485182762146,grad_norm: 0.7512412869527774, iteration: 429886
loss: 0.9844673871994019,grad_norm: 0.9999995210461191, iteration: 429887
loss: 0.9845741391181946,grad_norm: 0.828680888117001, iteration: 429888
loss: 1.0227819681167603,grad_norm: 0.8496369065414529, iteration: 429889
loss: 1.0207245349884033,grad_norm: 0.7694059671858455, iteration: 429890
loss: 1.0396784543991089,grad_norm: 0.8638404224739086, iteration: 429891
loss: 1.0598622560501099,grad_norm: 0.739487238831618, iteration: 429892
loss: 1.020125150680542,grad_norm: 0.8683093808578035, iteration: 429893
loss: 0.9841321110725403,grad_norm: 0.7140567020441635, iteration: 429894
loss: 0.983115017414093,grad_norm: 0.774075369833268, iteration: 429895
loss: 0.9769948124885559,grad_norm: 0.6952602846196423, iteration: 429896
loss: 1.036983609199524,grad_norm: 0.772602875345261, iteration: 429897
loss: 1.0185579061508179,grad_norm: 0.5724840865037055, iteration: 429898
loss: 1.164873719215393,grad_norm: 0.99999978195297, iteration: 429899
loss: 1.0271964073181152,grad_norm: 0.8779989791524054, iteration: 429900
loss: 0.9766198396682739,grad_norm: 0.7477427390902248, iteration: 429901
loss: 0.9958726763725281,grad_norm: 0.8566354081616294, iteration: 429902
loss: 1.138633370399475,grad_norm: 0.9999994950514192, iteration: 429903
loss: 1.033113956451416,grad_norm: 0.9999997475694845, iteration: 429904
loss: 1.0981746912002563,grad_norm: 0.9999999242142188, iteration: 429905
loss: 1.1111029386520386,grad_norm: 0.9999994899729518, iteration: 429906
loss: 1.028254747390747,grad_norm: 0.9999997078278319, iteration: 429907
loss: 1.0119481086730957,grad_norm: 0.7805124777503389, iteration: 429908
loss: 1.037042260169983,grad_norm: 0.6955409405039142, iteration: 429909
loss: 0.9959906935691833,grad_norm: 0.7705093750546584, iteration: 429910
loss: 1.0050315856933594,grad_norm: 0.6614583313906456, iteration: 429911
loss: 0.9929139018058777,grad_norm: 0.7573260051953239, iteration: 429912
loss: 1.0285664796829224,grad_norm: 0.9421089994854246, iteration: 429913
loss: 0.9878146648406982,grad_norm: 0.8040809093265402, iteration: 429914
loss: 1.0804023742675781,grad_norm: 0.8529248232380918, iteration: 429915
loss: 0.9702427387237549,grad_norm: 0.9589073415882013, iteration: 429916
loss: 0.9906120896339417,grad_norm: 0.9536724333704683, iteration: 429917
loss: 1.0322598218917847,grad_norm: 0.9999998916307963, iteration: 429918
loss: 0.9929238557815552,grad_norm: 0.8226912988399282, iteration: 429919
loss: 1.0403653383255005,grad_norm: 0.9258028485122549, iteration: 429920
loss: 0.9778614640235901,grad_norm: 0.8271451483766303, iteration: 429921
loss: 0.9833273887634277,grad_norm: 0.7794759613937684, iteration: 429922
loss: 1.0381464958190918,grad_norm: 0.8441674297807951, iteration: 429923
loss: 1.0036399364471436,grad_norm: 0.7895139714709397, iteration: 429924
loss: 1.0059549808502197,grad_norm: 0.722673206744049, iteration: 429925
loss: 1.0204335451126099,grad_norm: 0.8231492445256821, iteration: 429926
loss: 1.0447973012924194,grad_norm: 0.8179867743345787, iteration: 429927
loss: 0.9990667700767517,grad_norm: 0.8023866011357118, iteration: 429928
loss: 1.017754077911377,grad_norm: 0.723552300534516, iteration: 429929
loss: 0.9979169964790344,grad_norm: 0.9840572698110537, iteration: 429930
loss: 0.9935246706008911,grad_norm: 0.6954450645389385, iteration: 429931
loss: 0.9752294421195984,grad_norm: 0.7776012238334359, iteration: 429932
loss: 1.013177514076233,grad_norm: 0.846115989016647, iteration: 429933
loss: 0.9776942729949951,grad_norm: 0.9999993769159239, iteration: 429934
loss: 1.0431538820266724,grad_norm: 0.9372258238903699, iteration: 429935
loss: 0.9672694802284241,grad_norm: 0.6500422767598291, iteration: 429936
loss: 1.0298268795013428,grad_norm: 0.7342944664614593, iteration: 429937
loss: 1.0130144357681274,grad_norm: 0.9358078522905877, iteration: 429938
loss: 0.9772221446037292,grad_norm: 0.8167574456420421, iteration: 429939
loss: 1.0189374685287476,grad_norm: 0.8953553444688233, iteration: 429940
loss: 0.9632688760757446,grad_norm: 0.6497718949983282, iteration: 429941
loss: 0.975095272064209,grad_norm: 0.8695485596349605, iteration: 429942
loss: 1.001832127571106,grad_norm: 0.6922311049575257, iteration: 429943
loss: 0.9790080785751343,grad_norm: 0.7927055423233607, iteration: 429944
loss: 0.9873327612876892,grad_norm: 0.9493769976113596, iteration: 429945
loss: 0.9751343131065369,grad_norm: 0.8297258189353578, iteration: 429946
loss: 0.9832808971405029,grad_norm: 0.7293216749539725, iteration: 429947
loss: 1.0128583908081055,grad_norm: 0.8069526063871155, iteration: 429948
loss: 0.9913588166236877,grad_norm: 0.6173598377411429, iteration: 429949
loss: 0.9883509874343872,grad_norm: 0.8416667347152078, iteration: 429950
loss: 1.006596565246582,grad_norm: 0.6358035367405719, iteration: 429951
loss: 0.9818406105041504,grad_norm: 0.870117914990324, iteration: 429952
loss: 1.0028564929962158,grad_norm: 0.6960216762009452, iteration: 429953
loss: 1.015053629875183,grad_norm: 0.7237880408011063, iteration: 429954
loss: 0.9754857420921326,grad_norm: 0.6810528781235891, iteration: 429955
loss: 1.049201488494873,grad_norm: 0.999999164144462, iteration: 429956
loss: 1.0279053449630737,grad_norm: 0.7461411789283213, iteration: 429957
loss: 1.0051568746566772,grad_norm: 0.9756063250407409, iteration: 429958
loss: 1.0276106595993042,grad_norm: 0.9625420663801801, iteration: 429959
loss: 0.93342125415802,grad_norm: 0.870216248039219, iteration: 429960
loss: 1.0276544094085693,grad_norm: 0.8446208129482249, iteration: 429961
loss: 1.0605177879333496,grad_norm: 0.9493379306409894, iteration: 429962
loss: 1.0076086521148682,grad_norm: 0.6957545344585919, iteration: 429963
loss: 1.0497044324874878,grad_norm: 0.7315062507153356, iteration: 429964
loss: 1.0027132034301758,grad_norm: 0.7410823505683845, iteration: 429965
loss: 1.0216294527053833,grad_norm: 0.804970818598932, iteration: 429966
loss: 0.9872114658355713,grad_norm: 0.7496418028971066, iteration: 429967
loss: 1.0013909339904785,grad_norm: 0.9554632773178998, iteration: 429968
loss: 0.9541688561439514,grad_norm: 0.6592705234573933, iteration: 429969
loss: 1.0150654315948486,grad_norm: 0.7698418173373294, iteration: 429970
loss: 0.9830248355865479,grad_norm: 0.7427950772403662, iteration: 429971
loss: 1.0202279090881348,grad_norm: 0.7765536692567188, iteration: 429972
loss: 1.0157594680786133,grad_norm: 0.6795609375695656, iteration: 429973
loss: 0.9516760110855103,grad_norm: 0.7214984490873203, iteration: 429974
loss: 0.993657648563385,grad_norm: 0.6878914797317865, iteration: 429975
loss: 0.9888049960136414,grad_norm: 0.7358849507448428, iteration: 429976
loss: 1.0533052682876587,grad_norm: 0.7021593213402657, iteration: 429977
loss: 1.0324926376342773,grad_norm: 0.9999992630948795, iteration: 429978
loss: 1.033765435218811,grad_norm: 0.9999996622719132, iteration: 429979
loss: 0.9896909594535828,grad_norm: 0.9999989480585821, iteration: 429980
loss: 0.979394793510437,grad_norm: 0.7264812198077584, iteration: 429981
loss: 1.02106773853302,grad_norm: 0.9999999954668483, iteration: 429982
loss: 1.0156394243240356,grad_norm: 0.8430202155871489, iteration: 429983
loss: 1.0680190324783325,grad_norm: 0.9999996449987296, iteration: 429984
loss: 1.0789821147918701,grad_norm: 0.9225343166303095, iteration: 429985
loss: 0.9958297610282898,grad_norm: 0.9982684406552654, iteration: 429986
loss: 1.0461779832839966,grad_norm: 0.7162182308550444, iteration: 429987
loss: 0.9981276392936707,grad_norm: 0.6681519660985313, iteration: 429988
loss: 0.9870067834854126,grad_norm: 0.8561422247594239, iteration: 429989
loss: 1.0245004892349243,grad_norm: 0.8539979205586749, iteration: 429990
loss: 1.0126516819000244,grad_norm: 0.7018401330490348, iteration: 429991
loss: 0.9684579968452454,grad_norm: 0.789122054255712, iteration: 429992
loss: 1.0009821653366089,grad_norm: 0.9307685297106828, iteration: 429993
loss: 0.999605655670166,grad_norm: 0.9999990668572635, iteration: 429994
loss: 1.0555105209350586,grad_norm: 0.7566852268751493, iteration: 429995
loss: 1.0945745706558228,grad_norm: 0.9999992442273621, iteration: 429996
loss: 1.017411708831787,grad_norm: 0.6479505689999696, iteration: 429997
loss: 0.9864872694015503,grad_norm: 0.6587360953379386, iteration: 429998
loss: 0.9940148591995239,grad_norm: 0.6835828365318759, iteration: 429999
loss: 0.999739408493042,grad_norm: 0.7366821890759172, iteration: 430000
Evaluating at step 430000
{'val': 0.9942834805697203, 'test': 1.9185886927686941}
loss: 0.9845962524414062,grad_norm: 0.8143413771499421, iteration: 430001
loss: 1.033301591873169,grad_norm: 0.7718085661818629, iteration: 430002
loss: 1.002898097038269,grad_norm: 0.7075050193953036, iteration: 430003
loss: 1.014878749847412,grad_norm: 0.7128945260295312, iteration: 430004
loss: 0.9993546009063721,grad_norm: 0.7616336851242828, iteration: 430005
loss: 0.9818750023841858,grad_norm: 0.7192099079480851, iteration: 430006
loss: 1.0564996004104614,grad_norm: 0.7556274470059617, iteration: 430007
loss: 0.9956634044647217,grad_norm: 0.8478620770475177, iteration: 430008
loss: 0.978022038936615,grad_norm: 0.6529416154216601, iteration: 430009
loss: 1.0493800640106201,grad_norm: 0.8242074131386834, iteration: 430010
loss: 0.9984611868858337,grad_norm: 0.9999997931400149, iteration: 430011
loss: 0.9892741441726685,grad_norm: 0.8405582747272257, iteration: 430012
loss: 1.0555318593978882,grad_norm: 0.9999997684993565, iteration: 430013
loss: 1.0102726221084595,grad_norm: 0.7855723920635349, iteration: 430014
loss: 1.0160605907440186,grad_norm: 0.9786709153986004, iteration: 430015
loss: 1.035185694694519,grad_norm: 0.8275209506847809, iteration: 430016
loss: 0.9420572519302368,grad_norm: 0.8655958242750136, iteration: 430017
loss: 0.9938076734542847,grad_norm: 0.8242693065667259, iteration: 430018
loss: 1.0238182544708252,grad_norm: 0.8328805360445947, iteration: 430019
loss: 1.2752765417099,grad_norm: 0.9999991964941276, iteration: 430020
loss: 0.974847674369812,grad_norm: 0.7801761235158986, iteration: 430021
loss: 1.0448148250579834,grad_norm: 0.9999997490724569, iteration: 430022
loss: 0.9718173146247864,grad_norm: 0.7300915781229008, iteration: 430023
loss: 1.0420987606048584,grad_norm: 0.9999990721624561, iteration: 430024
loss: 0.9980466961860657,grad_norm: 0.8873296063446722, iteration: 430025
loss: 0.9561915993690491,grad_norm: 0.783591828363407, iteration: 430026
loss: 1.1190663576126099,grad_norm: 0.9999996470194111, iteration: 430027
loss: 0.9999702572822571,grad_norm: 0.9412464080337366, iteration: 430028
loss: 0.9683700203895569,grad_norm: 0.7754700315975411, iteration: 430029
loss: 1.0115630626678467,grad_norm: 0.7555512058478838, iteration: 430030
loss: 0.9902582168579102,grad_norm: 0.7049349022877989, iteration: 430031
loss: 0.9699023962020874,grad_norm: 0.7550725456526592, iteration: 430032
loss: 1.0099691152572632,grad_norm: 0.6892595175094017, iteration: 430033
loss: 0.9838508367538452,grad_norm: 0.7650268986548194, iteration: 430034
loss: 0.991496205329895,grad_norm: 0.8184273792611325, iteration: 430035
loss: 0.981945276260376,grad_norm: 0.749161611741843, iteration: 430036
loss: 0.9842069745063782,grad_norm: 0.718321121888354, iteration: 430037
loss: 0.9931498169898987,grad_norm: 0.747510967741632, iteration: 430038
loss: 1.0092322826385498,grad_norm: 0.7673394401341235, iteration: 430039
loss: 1.0108413696289062,grad_norm: 0.8910174044529157, iteration: 430040
loss: 1.0250205993652344,grad_norm: 0.8160105564866119, iteration: 430041
loss: 1.0377429723739624,grad_norm: 0.9999990053323832, iteration: 430042
loss: 0.9728236794471741,grad_norm: 0.8361396587648899, iteration: 430043
loss: 1.0016319751739502,grad_norm: 0.9976433917689685, iteration: 430044
loss: 0.991251528263092,grad_norm: 0.9432842548652048, iteration: 430045
loss: 1.0127063989639282,grad_norm: 0.9999990112231985, iteration: 430046
loss: 0.9686933755874634,grad_norm: 0.9999991177247689, iteration: 430047
loss: 1.0862611532211304,grad_norm: 0.9120566196711042, iteration: 430048
loss: 1.0157393217086792,grad_norm: 0.8452780658839867, iteration: 430049
loss: 1.041920781135559,grad_norm: 0.9256690429455233, iteration: 430050
loss: 1.000606894493103,grad_norm: 0.7525353066474483, iteration: 430051
loss: 0.9950284957885742,grad_norm: 0.696537611063211, iteration: 430052
loss: 0.9710190296173096,grad_norm: 0.8038157221260247, iteration: 430053
loss: 0.9703507423400879,grad_norm: 0.8145972791860553, iteration: 430054
loss: 0.9917561411857605,grad_norm: 0.7265014964576099, iteration: 430055
loss: 1.0156747102737427,grad_norm: 0.8103947292205896, iteration: 430056
loss: 1.0086121559143066,grad_norm: 0.948272414239205, iteration: 430057
loss: 1.0570982694625854,grad_norm: 0.9999998274149142, iteration: 430058
loss: 0.9880523681640625,grad_norm: 0.7952878439066008, iteration: 430059
loss: 1.0648794174194336,grad_norm: 0.6952742254980996, iteration: 430060
loss: 0.9952849745750427,grad_norm: 0.7709996995079325, iteration: 430061
loss: 1.018100619316101,grad_norm: 0.9369802933999064, iteration: 430062
loss: 0.9894335269927979,grad_norm: 0.8129498803809331, iteration: 430063
loss: 0.970836341381073,grad_norm: 0.7904520602632311, iteration: 430064
loss: 0.9510318636894226,grad_norm: 0.7012628631150365, iteration: 430065
loss: 0.9701570868492126,grad_norm: 0.7555063699606412, iteration: 430066
loss: 0.9844510555267334,grad_norm: 0.8027718231998604, iteration: 430067
loss: 1.0316321849822998,grad_norm: 0.7585575896592052, iteration: 430068
loss: 1.0002732276916504,grad_norm: 0.8072518188123798, iteration: 430069
loss: 1.0073068141937256,grad_norm: 0.9999991493483771, iteration: 430070
loss: 0.9870649576187134,grad_norm: 0.8275683556751753, iteration: 430071
loss: 0.993537187576294,grad_norm: 0.9999992180877516, iteration: 430072
loss: 1.0023595094680786,grad_norm: 0.8188828144879752, iteration: 430073
loss: 1.0058093070983887,grad_norm: 0.9999995372368264, iteration: 430074
loss: 0.9962105751037598,grad_norm: 0.6313430838308836, iteration: 430075
loss: 1.0058057308197021,grad_norm: 0.697014976889934, iteration: 430076
loss: 1.1014207601547241,grad_norm: 0.9999992222942184, iteration: 430077
loss: 1.0246169567108154,grad_norm: 0.8654933567705653, iteration: 430078
loss: 1.0014631748199463,grad_norm: 0.8329784261777737, iteration: 430079
loss: 1.0217739343643188,grad_norm: 0.9949640117084114, iteration: 430080
loss: 1.0322985649108887,grad_norm: 0.701596569746866, iteration: 430081
loss: 1.0359077453613281,grad_norm: 0.8018875376650829, iteration: 430082
loss: 1.0088499784469604,grad_norm: 0.7511120237911194, iteration: 430083
loss: 1.0003795623779297,grad_norm: 0.8360372408197241, iteration: 430084
loss: 0.9997316598892212,grad_norm: 0.7304549372178887, iteration: 430085
loss: 0.9903337359428406,grad_norm: 0.8461838161360193, iteration: 430086
loss: 0.9842413067817688,grad_norm: 0.7875174096652421, iteration: 430087
loss: 0.9832957983016968,grad_norm: 0.7152718621802086, iteration: 430088
loss: 0.9659178256988525,grad_norm: 0.6345352703724786, iteration: 430089
loss: 0.9985119700431824,grad_norm: 0.709485555073763, iteration: 430090
loss: 0.9832375049591064,grad_norm: 0.9999993006018514, iteration: 430091
loss: 0.9636112451553345,grad_norm: 0.910283397930504, iteration: 430092
loss: 1.0164918899536133,grad_norm: 0.8305975639944916, iteration: 430093
loss: 1.06300950050354,grad_norm: 0.7636740630203697, iteration: 430094
loss: 0.9859023094177246,grad_norm: 0.9999992113911006, iteration: 430095
loss: 1.0306119918823242,grad_norm: 0.676129981985269, iteration: 430096
loss: 0.9668912887573242,grad_norm: 0.6813184534082217, iteration: 430097
loss: 1.0260037183761597,grad_norm: 0.8006365142310522, iteration: 430098
loss: 1.016697883605957,grad_norm: 0.8589046300614012, iteration: 430099
loss: 0.9915055632591248,grad_norm: 0.9992068355633817, iteration: 430100
loss: 0.9944810271263123,grad_norm: 0.8310181471411019, iteration: 430101
loss: 0.9767142534255981,grad_norm: 0.9999992774762925, iteration: 430102
loss: 0.9938641786575317,grad_norm: 0.7765492762007361, iteration: 430103
loss: 1.0090196132659912,grad_norm: 0.7665674018468496, iteration: 430104
loss: 0.9923160672187805,grad_norm: 0.9072733950067023, iteration: 430105
loss: 0.9873881936073303,grad_norm: 0.7194508904437361, iteration: 430106
loss: 1.021621584892273,grad_norm: 0.8025556011872409, iteration: 430107
loss: 1.0172557830810547,grad_norm: 0.7840691868407605, iteration: 430108
loss: 0.9640635848045349,grad_norm: 0.7256418814578711, iteration: 430109
loss: 1.0248526334762573,grad_norm: 0.7553697236098099, iteration: 430110
loss: 1.0332186222076416,grad_norm: 0.7702498144803676, iteration: 430111
loss: 1.0024973154067993,grad_norm: 0.7385800493826011, iteration: 430112
loss: 1.059219241142273,grad_norm: 0.74885410211415, iteration: 430113
loss: 0.9880867600440979,grad_norm: 0.692625904044236, iteration: 430114
loss: 0.9705815315246582,grad_norm: 0.8001232533598395, iteration: 430115
loss: 1.0050952434539795,grad_norm: 0.7980567172112227, iteration: 430116
loss: 1.0439878702163696,grad_norm: 0.9999992661864192, iteration: 430117
loss: 1.0354996919631958,grad_norm: 0.9999992356756031, iteration: 430118
loss: 0.995462954044342,grad_norm: 0.663310326157342, iteration: 430119
loss: 1.0511685609817505,grad_norm: 0.9999990596540345, iteration: 430120
loss: 1.0097095966339111,grad_norm: 0.78364773063333, iteration: 430121
loss: 1.0061590671539307,grad_norm: 0.9999990693825478, iteration: 430122
loss: 0.9848468899726868,grad_norm: 0.7051950561683243, iteration: 430123
loss: 0.9913479685783386,grad_norm: 0.6990133303223193, iteration: 430124
loss: 1.0121856927871704,grad_norm: 0.7658517295950683, iteration: 430125
loss: 0.993582010269165,grad_norm: 0.9724999480412471, iteration: 430126
loss: 0.9992625117301941,grad_norm: 0.7948186053308741, iteration: 430127
loss: 1.0902202129364014,grad_norm: 0.9999999433714628, iteration: 430128
loss: 0.9850201606750488,grad_norm: 0.7697068523301294, iteration: 430129
loss: 1.1071698665618896,grad_norm: 0.9999990879549973, iteration: 430130
loss: 0.9800387024879456,grad_norm: 0.7673891194805068, iteration: 430131
loss: 1.046134352684021,grad_norm: 0.999999419878093, iteration: 430132
loss: 1.0932176113128662,grad_norm: 0.9999994794393785, iteration: 430133
loss: 1.006003975868225,grad_norm: 0.9412814875956962, iteration: 430134
loss: 0.9972166419029236,grad_norm: 0.7239789190734189, iteration: 430135
loss: 0.9988757967948914,grad_norm: 0.8204707443420272, iteration: 430136
loss: 1.0012917518615723,grad_norm: 0.6550215973834947, iteration: 430137
loss: 1.0089565515518188,grad_norm: 0.7532042781885819, iteration: 430138
loss: 1.0282394886016846,grad_norm: 0.7783430316366939, iteration: 430139
loss: 0.9981480240821838,grad_norm: 0.8469798629973196, iteration: 430140
loss: 0.9728715419769287,grad_norm: 0.7013646028474398, iteration: 430141
loss: 0.9420274496078491,grad_norm: 0.7737455806192541, iteration: 430142
loss: 0.970794677734375,grad_norm: 0.770023942885409, iteration: 430143
loss: 1.0010759830474854,grad_norm: 0.6869856755346655, iteration: 430144
loss: 1.012703776359558,grad_norm: 0.9213357011928984, iteration: 430145
loss: 0.994536280632019,grad_norm: 0.8276962316439459, iteration: 430146
loss: 1.0150119066238403,grad_norm: 0.99125823451811, iteration: 430147
loss: 1.032160758972168,grad_norm: 0.8240592268069661, iteration: 430148
loss: 0.9725869297981262,grad_norm: 0.8053694173195964, iteration: 430149
loss: 1.0420619249343872,grad_norm: 0.9218042994331578, iteration: 430150
loss: 1.0790202617645264,grad_norm: 0.8108307403320987, iteration: 430151
loss: 1.001466989517212,grad_norm: 0.8183326644922916, iteration: 430152
loss: 0.9809235334396362,grad_norm: 0.7584008407870174, iteration: 430153
loss: 1.0251452922821045,grad_norm: 0.684571422950876, iteration: 430154
loss: 0.9965084195137024,grad_norm: 0.8402670828357115, iteration: 430155
loss: 1.0022904872894287,grad_norm: 0.7644926454497499, iteration: 430156
loss: 0.9912198781967163,grad_norm: 0.7303844315164085, iteration: 430157
loss: 1.0206729173660278,grad_norm: 0.7909461568306299, iteration: 430158
loss: 1.0023664236068726,grad_norm: 0.9999996781586024, iteration: 430159
loss: 0.9819390177726746,grad_norm: 0.7259191543293186, iteration: 430160
loss: 0.9946834444999695,grad_norm: 0.7038440392424524, iteration: 430161
loss: 1.0673898458480835,grad_norm: 0.7140503859227862, iteration: 430162
loss: 1.1178475618362427,grad_norm: 0.7997181379657323, iteration: 430163
loss: 0.9999290108680725,grad_norm: 0.9449484673370877, iteration: 430164
loss: 1.0222234725952148,grad_norm: 0.8320874857331253, iteration: 430165
loss: 1.0764223337173462,grad_norm: 0.7238321562452619, iteration: 430166
loss: 0.9980800151824951,grad_norm: 0.849753245444785, iteration: 430167
loss: 1.040558934211731,grad_norm: 0.9955952302958442, iteration: 430168
loss: 0.9982007741928101,grad_norm: 0.8385288982014015, iteration: 430169
loss: 1.0222209692001343,grad_norm: 0.6885022774530395, iteration: 430170
loss: 1.0653780698776245,grad_norm: 0.7311573097233482, iteration: 430171
loss: 1.1179628372192383,grad_norm: 0.987571609223744, iteration: 430172
loss: 1.0294594764709473,grad_norm: 0.8239128645764379, iteration: 430173
loss: 1.0016181468963623,grad_norm: 0.8576511303987032, iteration: 430174
loss: 1.0101276636123657,grad_norm: 0.8908035642833538, iteration: 430175
loss: 0.9919438362121582,grad_norm: 0.7706418340886317, iteration: 430176
loss: 1.0406476259231567,grad_norm: 0.9999995875675053, iteration: 430177
loss: 1.029411792755127,grad_norm: 0.8432028592309597, iteration: 430178
loss: 1.0812532901763916,grad_norm: 0.9999999005053833, iteration: 430179
loss: 0.9884570837020874,grad_norm: 0.8037232946099666, iteration: 430180
loss: 0.999527096748352,grad_norm: 0.8758656906428681, iteration: 430181
loss: 0.9947596192359924,grad_norm: 0.6782523395818452, iteration: 430182
loss: 1.0450116395950317,grad_norm: 0.8395358359417738, iteration: 430183
loss: 1.0225080251693726,grad_norm: 0.8965245288448748, iteration: 430184
loss: 1.0281741619110107,grad_norm: 0.763384281503558, iteration: 430185
loss: 1.009468674659729,grad_norm: 0.7849057536410958, iteration: 430186
loss: 0.9878629446029663,grad_norm: 0.820690679054905, iteration: 430187
loss: 1.0366917848587036,grad_norm: 0.8017619911677628, iteration: 430188
loss: 1.00314199924469,grad_norm: 0.8239149459484376, iteration: 430189
loss: 1.0601348876953125,grad_norm: 0.8064125567971823, iteration: 430190
loss: 0.980812132358551,grad_norm: 0.8199647763399951, iteration: 430191
loss: 1.032529592514038,grad_norm: 0.763455863626902, iteration: 430192
loss: 1.060530662536621,grad_norm: 0.7507087336245344, iteration: 430193
loss: 1.0534887313842773,grad_norm: 0.9999997988147413, iteration: 430194
loss: 1.0044039487838745,grad_norm: 0.8704469831588406, iteration: 430195
loss: 0.9618293046951294,grad_norm: 0.9371888089262992, iteration: 430196
loss: 1.0741325616836548,grad_norm: 0.7965491444446158, iteration: 430197
loss: 0.9926897287368774,grad_norm: 0.7474178819261749, iteration: 430198
loss: 1.0271679162979126,grad_norm: 0.7363156937690498, iteration: 430199
loss: 1.0154365301132202,grad_norm: 0.7264978052794041, iteration: 430200
loss: 1.0529643297195435,grad_norm: 0.9999991972002337, iteration: 430201
loss: 0.9868434071540833,grad_norm: 0.8897426615365948, iteration: 430202
loss: 1.096960425376892,grad_norm: 0.7341769462295324, iteration: 430203
loss: 1.0082483291625977,grad_norm: 0.8817406565849046, iteration: 430204
loss: 0.9781721234321594,grad_norm: 0.8784206559940346, iteration: 430205
loss: 1.0207716226577759,grad_norm: 0.7545684228085197, iteration: 430206
loss: 0.9706085324287415,grad_norm: 0.7740849683754844, iteration: 430207
loss: 0.9882817268371582,grad_norm: 0.7743329897091131, iteration: 430208
loss: 1.0981392860412598,grad_norm: 0.9999992561593921, iteration: 430209
loss: 1.0314645767211914,grad_norm: 0.8093031276226328, iteration: 430210
loss: 0.9934939742088318,grad_norm: 0.944837981169421, iteration: 430211
loss: 1.0133877992630005,grad_norm: 0.8349420565155703, iteration: 430212
loss: 1.0617784261703491,grad_norm: 0.9999993161394437, iteration: 430213
loss: 0.9969630837440491,grad_norm: 0.7597781215959698, iteration: 430214
loss: 0.9926412105560303,grad_norm: 0.9999997928668667, iteration: 430215
loss: 1.0301787853240967,grad_norm: 0.8820966835459644, iteration: 430216
loss: 1.0027892589569092,grad_norm: 0.8049775384524231, iteration: 430217
loss: 1.0446068048477173,grad_norm: 0.7642305807357184, iteration: 430218
loss: 1.0419642925262451,grad_norm: 0.7083426184873342, iteration: 430219
loss: 0.9586052298545837,grad_norm: 0.7734503365127539, iteration: 430220
loss: 1.0139801502227783,grad_norm: 0.9828055277809801, iteration: 430221
loss: 1.0321035385131836,grad_norm: 0.9132114801094157, iteration: 430222
loss: 0.996481716632843,grad_norm: 0.8503431601251292, iteration: 430223
loss: 0.9685025215148926,grad_norm: 0.7481546040346754, iteration: 430224
loss: 0.9912816882133484,grad_norm: 0.7862868925491484, iteration: 430225
loss: 1.057551622390747,grad_norm: 0.9999994816521922, iteration: 430226
loss: 1.0105624198913574,grad_norm: 0.719589565265697, iteration: 430227
loss: 0.9848636388778687,grad_norm: 0.8318278548547832, iteration: 430228
loss: 1.0178544521331787,grad_norm: 0.6483362826600185, iteration: 430229
loss: 1.0497727394104004,grad_norm: 0.9999996811263016, iteration: 430230
loss: 0.9824812412261963,grad_norm: 0.866880857486304, iteration: 430231
loss: 0.9849216938018799,grad_norm: 0.8678782469035755, iteration: 430232
loss: 1.0139319896697998,grad_norm: 0.8243662944882124, iteration: 430233
loss: 0.9866705536842346,grad_norm: 0.8853958960850755, iteration: 430234
loss: 1.0073457956314087,grad_norm: 0.9517128662986368, iteration: 430235
loss: 1.027854084968567,grad_norm: 0.7373829133719955, iteration: 430236
loss: 1.0387163162231445,grad_norm: 0.7120221923696848, iteration: 430237
loss: 0.9777493476867676,grad_norm: 0.6998632032405615, iteration: 430238
loss: 1.0019729137420654,grad_norm: 0.8655835023270159, iteration: 430239
loss: 0.9847145080566406,grad_norm: 0.6459623012273515, iteration: 430240
loss: 1.00925612449646,grad_norm: 0.8110446520782152, iteration: 430241
loss: 1.0845088958740234,grad_norm: 0.8681207242623499, iteration: 430242
loss: 0.9902364611625671,grad_norm: 0.8440941436167435, iteration: 430243
loss: 1.000144600868225,grad_norm: 0.7744879076087756, iteration: 430244
loss: 1.0204172134399414,grad_norm: 0.9999996246149869, iteration: 430245
loss: 1.0309680700302124,grad_norm: 0.9335247012431136, iteration: 430246
loss: 1.0726323127746582,grad_norm: 0.9415416025520955, iteration: 430247
loss: 0.9932991862297058,grad_norm: 0.7615385119516372, iteration: 430248
loss: 1.017883539199829,grad_norm: 0.8150821299869535, iteration: 430249
loss: 0.9849570393562317,grad_norm: 0.7463578217314247, iteration: 430250
loss: 1.0083508491516113,grad_norm: 0.9540575463974159, iteration: 430251
loss: 1.0363667011260986,grad_norm: 0.7382317922145234, iteration: 430252
loss: 0.9490735530853271,grad_norm: 0.8955980375420075, iteration: 430253
loss: 0.9913062453269958,grad_norm: 0.7188237286982893, iteration: 430254
loss: 1.0314639806747437,grad_norm: 0.9748183698557187, iteration: 430255
loss: 0.9497281312942505,grad_norm: 0.7700661040529241, iteration: 430256
loss: 1.0629510879516602,grad_norm: 0.7827420624297958, iteration: 430257
loss: 0.9659101963043213,grad_norm: 0.8912028955801274, iteration: 430258
loss: 0.9833686351776123,grad_norm: 0.8243876719328928, iteration: 430259
loss: 1.0476469993591309,grad_norm: 0.7407988227964261, iteration: 430260
loss: 1.010655164718628,grad_norm: 0.846210757081335, iteration: 430261
loss: 0.9606887698173523,grad_norm: 0.7124442031547296, iteration: 430262
loss: 1.016045093536377,grad_norm: 0.8613003778692796, iteration: 430263
loss: 0.9882134199142456,grad_norm: 0.999999756075034, iteration: 430264
loss: 0.9944114089012146,grad_norm: 0.692085121706565, iteration: 430265
loss: 1.0123729705810547,grad_norm: 0.8561678707235058, iteration: 430266
loss: 0.9817584156990051,grad_norm: 0.7546137018492788, iteration: 430267
loss: 0.9780720472335815,grad_norm: 0.7095471459456909, iteration: 430268
loss: 1.0259093046188354,grad_norm: 0.7701020954431503, iteration: 430269
loss: 1.1156952381134033,grad_norm: 0.9897608371280127, iteration: 430270
loss: 1.005844235420227,grad_norm: 0.8274398780139504, iteration: 430271
loss: 0.9894784092903137,grad_norm: 0.619376418365325, iteration: 430272
loss: 1.0226263999938965,grad_norm: 0.8864680557262196, iteration: 430273
loss: 1.0171164274215698,grad_norm: 0.9999990133016035, iteration: 430274
loss: 0.9936994910240173,grad_norm: 0.8152928046537607, iteration: 430275
loss: 0.9878525137901306,grad_norm: 0.8370821917262102, iteration: 430276
loss: 0.9900504946708679,grad_norm: 0.9999990133985853, iteration: 430277
loss: 1.0070476531982422,grad_norm: 0.763595712525337, iteration: 430278
loss: 1.02095627784729,grad_norm: 0.7389305006324005, iteration: 430279
loss: 1.0306435823440552,grad_norm: 0.7745468290165781, iteration: 430280
loss: 1.0128873586654663,grad_norm: 0.659188685306127, iteration: 430281
loss: 0.9988545775413513,grad_norm: 0.901250046997045, iteration: 430282
loss: 1.0558247566223145,grad_norm: 0.8585436196830517, iteration: 430283
loss: 1.1346451044082642,grad_norm: 0.9999992244539271, iteration: 430284
loss: 1.0021867752075195,grad_norm: 0.999999738891304, iteration: 430285
loss: 1.0057578086853027,grad_norm: 0.7124726397759537, iteration: 430286
loss: 1.0423966646194458,grad_norm: 0.7432578778768663, iteration: 430287
loss: 1.0223833322525024,grad_norm: 0.6315140026487991, iteration: 430288
loss: 0.9975308775901794,grad_norm: 0.7483157095184193, iteration: 430289
loss: 1.0205645561218262,grad_norm: 0.9999991100714379, iteration: 430290
loss: 0.9931055903434753,grad_norm: 0.6740215018559311, iteration: 430291
loss: 0.9886307716369629,grad_norm: 0.8261265562554517, iteration: 430292
loss: 0.9981426000595093,grad_norm: 0.8046894437161359, iteration: 430293
loss: 1.0174221992492676,grad_norm: 0.8090594367790689, iteration: 430294
loss: 0.9906888008117676,grad_norm: 0.9054076352495452, iteration: 430295
loss: 0.9799777865409851,grad_norm: 0.9999992316795453, iteration: 430296
loss: 1.015048623085022,grad_norm: 0.7457948925963717, iteration: 430297
loss: 0.9793267846107483,grad_norm: 0.6995307042200012, iteration: 430298
loss: 0.972881555557251,grad_norm: 0.7236996745666651, iteration: 430299
loss: 1.006481409072876,grad_norm: 0.9398161746659166, iteration: 430300
loss: 0.9878197908401489,grad_norm: 0.797004999713189, iteration: 430301
loss: 0.9931454062461853,grad_norm: 0.818125540501213, iteration: 430302
loss: 1.0001314878463745,grad_norm: 0.8391238902654938, iteration: 430303
loss: 1.012980341911316,grad_norm: 0.7299249472846615, iteration: 430304
loss: 1.0061924457550049,grad_norm: 0.7777704908752278, iteration: 430305
loss: 1.0089572668075562,grad_norm: 0.8061711024742685, iteration: 430306
loss: 0.9787921905517578,grad_norm: 0.8938175135767857, iteration: 430307
loss: 0.9832387566566467,grad_norm: 0.6496879750147353, iteration: 430308
loss: 1.011385202407837,grad_norm: 0.7282965380126671, iteration: 430309
loss: 0.9804409742355347,grad_norm: 0.801132638740264, iteration: 430310
loss: 0.9921560883522034,grad_norm: 0.7508645941427801, iteration: 430311
loss: 1.0100998878479004,grad_norm: 0.999999149089203, iteration: 430312
loss: 1.1117489337921143,grad_norm: 0.9999990268824638, iteration: 430313
loss: 0.9897026419639587,grad_norm: 0.7428601377076581, iteration: 430314
loss: 1.0211529731750488,grad_norm: 0.8624912437946592, iteration: 430315
loss: 0.9874732494354248,grad_norm: 0.9999998398885388, iteration: 430316
loss: 1.220912218093872,grad_norm: 0.9999995487323596, iteration: 430317
loss: 1.0602444410324097,grad_norm: 0.8102187393814034, iteration: 430318
loss: 1.1667479276657104,grad_norm: 0.9945418395402736, iteration: 430319
loss: 1.6791177988052368,grad_norm: 0.9999998594369339, iteration: 430320
loss: 0.9566932916641235,grad_norm: 0.999999231965788, iteration: 430321
loss: 1.0198283195495605,grad_norm: 0.9999999456391137, iteration: 430322
loss: 0.977142333984375,grad_norm: 0.8189999669003473, iteration: 430323
loss: 1.0360196828842163,grad_norm: 0.7492662734907221, iteration: 430324
loss: 1.0334933996200562,grad_norm: 0.6712825659179619, iteration: 430325
loss: 1.3045084476470947,grad_norm: 0.9999995098579416, iteration: 430326
loss: 1.09417724609375,grad_norm: 0.9999999640378987, iteration: 430327
loss: 0.999911904335022,grad_norm: 0.9765267835743348, iteration: 430328
loss: 1.0787029266357422,grad_norm: 1.0000000678369039, iteration: 430329
loss: 1.0075194835662842,grad_norm: 0.6890776630406101, iteration: 430330
loss: 0.9612893462181091,grad_norm: 0.7335659556534299, iteration: 430331
loss: 1.1579874753952026,grad_norm: 0.9999997332583124, iteration: 430332
loss: 0.9792082905769348,grad_norm: 0.7375539041610192, iteration: 430333
loss: 1.0919981002807617,grad_norm: 0.9999992451015839, iteration: 430334
loss: 0.9561981558799744,grad_norm: 0.8099826032551026, iteration: 430335
loss: 0.9496731758117676,grad_norm: 0.6967166517305284, iteration: 430336
loss: 1.0043913125991821,grad_norm: 0.8664723198218315, iteration: 430337
loss: 1.0351954698562622,grad_norm: 0.8444050228885649, iteration: 430338
loss: 0.9723129272460938,grad_norm: 0.8288020600727914, iteration: 430339
loss: 0.9874337315559387,grad_norm: 0.7634499141819354, iteration: 430340
loss: 0.98237144947052,grad_norm: 0.7486126643742751, iteration: 430341
loss: 0.9815076589584351,grad_norm: 0.782587419741658, iteration: 430342
loss: 1.0149377584457397,grad_norm: 0.7511682830125438, iteration: 430343
loss: 1.005967140197754,grad_norm: 0.7723321789465932, iteration: 430344
loss: 0.9818318486213684,grad_norm: 0.7086474649767806, iteration: 430345
loss: 0.9980157017707825,grad_norm: 0.999999995548953, iteration: 430346
loss: 1.0335513353347778,grad_norm: 0.9541822143186759, iteration: 430347
loss: 0.9739691615104675,grad_norm: 0.7592615548163297, iteration: 430348
loss: 0.9779032468795776,grad_norm: 0.8006120598337948, iteration: 430349
loss: 1.0164076089859009,grad_norm: 0.8451881570329798, iteration: 430350
loss: 0.9622500538825989,grad_norm: 0.8825799148463761, iteration: 430351
loss: 0.9847189784049988,grad_norm: 0.7975735874791642, iteration: 430352
loss: 1.015907883644104,grad_norm: 0.999999988429084, iteration: 430353
loss: 0.9901875257492065,grad_norm: 0.7600210515305846, iteration: 430354
loss: 1.0064548254013062,grad_norm: 0.82314212771482, iteration: 430355
loss: 1.019734501838684,grad_norm: 0.8030106896345381, iteration: 430356
loss: 0.9386324286460876,grad_norm: 0.9999989126442222, iteration: 430357
loss: 0.9957374930381775,grad_norm: 0.6934943988155056, iteration: 430358
loss: 0.9798768162727356,grad_norm: 0.7007465502883627, iteration: 430359
loss: 0.968247652053833,grad_norm: 0.984012526890633, iteration: 430360
loss: 1.008280873298645,grad_norm: 0.9999991687392981, iteration: 430361
loss: 1.0402076244354248,grad_norm: 0.9577276415000795, iteration: 430362
loss: 0.9733439683914185,grad_norm: 0.7722415083253681, iteration: 430363
loss: 0.9697823524475098,grad_norm: 0.8192334052836755, iteration: 430364
loss: 0.947593629360199,grad_norm: 0.6981708635132147, iteration: 430365
loss: 0.9753933548927307,grad_norm: 0.8093313558524305, iteration: 430366
loss: 0.965074896812439,grad_norm: 0.7828447481420442, iteration: 430367
loss: 1.0079560279846191,grad_norm: 0.7531629031528186, iteration: 430368
loss: 1.034631371498108,grad_norm: 0.9999995422458797, iteration: 430369
loss: 0.9975126385688782,grad_norm: 0.9929630256751172, iteration: 430370
loss: 0.9711320996284485,grad_norm: 0.8081337670747111, iteration: 430371
loss: 1.0322123765945435,grad_norm: 0.9011315617321677, iteration: 430372
loss: 0.9867954254150391,grad_norm: 0.6907922710026083, iteration: 430373
loss: 1.0910032987594604,grad_norm: 0.9999993671048436, iteration: 430374
loss: 1.0228512287139893,grad_norm: 0.75585879905546, iteration: 430375
loss: 1.0008163452148438,grad_norm: 0.8374666664016849, iteration: 430376
loss: 0.9938058853149414,grad_norm: 0.765321391116666, iteration: 430377
loss: 1.0199202299118042,grad_norm: 0.9999997595538646, iteration: 430378
loss: 0.9995869994163513,grad_norm: 0.772303524230527, iteration: 430379
loss: 1.0786845684051514,grad_norm: 0.9999996313378381, iteration: 430380
loss: 0.9745567440986633,grad_norm: 0.7187491858368533, iteration: 430381
loss: 1.0692263841629028,grad_norm: 0.9999996545492555, iteration: 430382
loss: 0.9953511357307434,grad_norm: 0.7694975204163395, iteration: 430383
loss: 0.9717422723770142,grad_norm: 0.8934575740277039, iteration: 430384
loss: 1.0223064422607422,grad_norm: 0.9999996750109315, iteration: 430385
loss: 1.0132945775985718,grad_norm: 0.7284277745617281, iteration: 430386
loss: 0.9823834300041199,grad_norm: 0.7346078679347608, iteration: 430387
loss: 0.9764187932014465,grad_norm: 0.9365316169907505, iteration: 430388
loss: 1.0068156719207764,grad_norm: 0.8439326395576622, iteration: 430389
loss: 0.9867932796478271,grad_norm: 0.7233714696319946, iteration: 430390
loss: 1.0027137994766235,grad_norm: 0.7837196886057591, iteration: 430391
loss: 1.0072702169418335,grad_norm: 0.9083252734574797, iteration: 430392
loss: 0.9961897730827332,grad_norm: 0.9905849813206964, iteration: 430393
loss: 0.980782151222229,grad_norm: 0.8702120444361472, iteration: 430394
loss: 1.0080162286758423,grad_norm: 0.9007175775466115, iteration: 430395
loss: 1.1643211841583252,grad_norm: 0.9999996038312945, iteration: 430396
loss: 0.9893996715545654,grad_norm: 0.6687189048055499, iteration: 430397
loss: 0.9397211670875549,grad_norm: 0.779199021440739, iteration: 430398
loss: 0.98661869764328,grad_norm: 0.7061386382828874, iteration: 430399
loss: 0.9658137559890747,grad_norm: 0.6834640367881112, iteration: 430400
loss: 1.0387651920318604,grad_norm: 0.683178947877818, iteration: 430401
loss: 1.0663913488388062,grad_norm: 0.9999999685595732, iteration: 430402
loss: 0.9914448261260986,grad_norm: 0.6742632909034968, iteration: 430403
loss: 1.1173584461212158,grad_norm: 0.9999991408176813, iteration: 430404
loss: 0.9675264954566956,grad_norm: 0.7274806940373894, iteration: 430405
loss: 1.0179756879806519,grad_norm: 0.807955515432242, iteration: 430406
loss: 1.028529167175293,grad_norm: 0.9999992730776185, iteration: 430407
loss: 1.0062272548675537,grad_norm: 0.8182426096351, iteration: 430408
loss: 1.0015345811843872,grad_norm: 0.7211636082396313, iteration: 430409
loss: 0.9936099052429199,grad_norm: 0.6957866999132372, iteration: 430410
loss: 0.9828686118125916,grad_norm: 0.6997031969850396, iteration: 430411
loss: 0.9411585927009583,grad_norm: 0.7863155444125464, iteration: 430412
loss: 0.9716340899467468,grad_norm: 0.762496023686938, iteration: 430413
loss: 0.9816556572914124,grad_norm: 0.7480704576002749, iteration: 430414
loss: 0.9935384392738342,grad_norm: 0.7048221351487565, iteration: 430415
loss: 0.9555769562721252,grad_norm: 0.7324979173437418, iteration: 430416
loss: 0.9764934182167053,grad_norm: 0.6707040415398706, iteration: 430417
loss: 0.9736420512199402,grad_norm: 0.9274019727768874, iteration: 430418
loss: 1.000178575515747,grad_norm: 0.8296099376767346, iteration: 430419
loss: 0.9883984923362732,grad_norm: 0.7934541883364231, iteration: 430420
loss: 1.0101431608200073,grad_norm: 0.6908268520194693, iteration: 430421
loss: 1.0053446292877197,grad_norm: 0.9394145007739858, iteration: 430422
loss: 1.0086363554000854,grad_norm: 0.7654737049112483, iteration: 430423
loss: 1.0346269607543945,grad_norm: 0.9915138914463972, iteration: 430424
loss: 1.0498543977737427,grad_norm: 0.7516420456464072, iteration: 430425
loss: 1.014294147491455,grad_norm: 0.7683935250610476, iteration: 430426
loss: 0.9954113960266113,grad_norm: 0.9999996260617933, iteration: 430427
loss: 0.9825195670127869,grad_norm: 0.7313648768913219, iteration: 430428
loss: 1.0955398082733154,grad_norm: 0.9480771771143847, iteration: 430429
loss: 1.0186221599578857,grad_norm: 0.8702765135641611, iteration: 430430
loss: 1.0011837482452393,grad_norm: 0.9044261414277102, iteration: 430431
loss: 1.0382145643234253,grad_norm: 0.6872463423576639, iteration: 430432
loss: 0.9627206325531006,grad_norm: 0.7891730754058469, iteration: 430433
loss: 1.0129778385162354,grad_norm: 0.6690124173339874, iteration: 430434
loss: 1.0077348947525024,grad_norm: 0.6694372312580671, iteration: 430435
loss: 1.0206166505813599,grad_norm: 0.8714312830765683, iteration: 430436
loss: 1.0044931173324585,grad_norm: 0.7146626579152158, iteration: 430437
loss: 1.0282026529312134,grad_norm: 0.6955077196794366, iteration: 430438
loss: 0.9944106340408325,grad_norm: 0.7419657381604486, iteration: 430439
loss: 1.002012014389038,grad_norm: 0.8044883271772214, iteration: 430440
loss: 0.9818046689033508,grad_norm: 0.7275021498522966, iteration: 430441
loss: 1.0023285150527954,grad_norm: 0.6660877160869187, iteration: 430442
loss: 1.0023670196533203,grad_norm: 0.8179895139121978, iteration: 430443
loss: 1.0191702842712402,grad_norm: 0.8547243936432998, iteration: 430444
loss: 1.1094807386398315,grad_norm: 0.9150139274699871, iteration: 430445
loss: 0.9717499613761902,grad_norm: 0.7110513398464338, iteration: 430446
loss: 0.9774841666221619,grad_norm: 0.8632194032309353, iteration: 430447
loss: 0.9889911413192749,grad_norm: 0.7729451499620317, iteration: 430448
loss: 1.0240081548690796,grad_norm: 0.9170238257566239, iteration: 430449
loss: 0.9653090834617615,grad_norm: 0.958895923369448, iteration: 430450
loss: 0.9918084144592285,grad_norm: 0.7809491325515969, iteration: 430451
loss: 0.991525411605835,grad_norm: 0.7695882440873011, iteration: 430452
loss: 0.9677993655204773,grad_norm: 0.7702566672158768, iteration: 430453
loss: 0.9390140175819397,grad_norm: 0.907497367489484, iteration: 430454
loss: 0.9990624785423279,grad_norm: 0.8152386353776367, iteration: 430455
loss: 1.0148952007293701,grad_norm: 0.8282123104020508, iteration: 430456
loss: 0.9735609292984009,grad_norm: 0.7881632608349819, iteration: 430457
loss: 1.0171329975128174,grad_norm: 0.7654124187253022, iteration: 430458
loss: 0.9866626262664795,grad_norm: 0.7619385283869948, iteration: 430459
loss: 1.025452733039856,grad_norm: 0.818854048556715, iteration: 430460
loss: 1.030590295791626,grad_norm: 0.7140122775409575, iteration: 430461
loss: 0.9638358950614929,grad_norm: 0.726801189299565, iteration: 430462
loss: 1.0305997133255005,grad_norm: 0.7400302058991585, iteration: 430463
loss: 0.9685484170913696,grad_norm: 0.6710185762758337, iteration: 430464
loss: 1.0065863132476807,grad_norm: 0.895214622981783, iteration: 430465
loss: 1.000991702079773,grad_norm: 0.8162544982295344, iteration: 430466
loss: 1.0119339227676392,grad_norm: 0.9999996294928883, iteration: 430467
loss: 1.0294338464736938,grad_norm: 0.8062355397456777, iteration: 430468
loss: 0.9771865010261536,grad_norm: 0.8006202741169254, iteration: 430469
loss: 1.010960578918457,grad_norm: 0.7070713916713244, iteration: 430470
loss: 1.0553412437438965,grad_norm: 0.7694918892789657, iteration: 430471
loss: 1.0482428073883057,grad_norm: 0.8417114586458747, iteration: 430472
loss: 1.0263466835021973,grad_norm: 0.665548661192774, iteration: 430473
loss: 1.0364423990249634,grad_norm: 0.9999999640059082, iteration: 430474
loss: 1.0399338006973267,grad_norm: 0.9012255058126845, iteration: 430475
loss: 0.9999476075172424,grad_norm: 0.8050747529071003, iteration: 430476
loss: 0.9730708003044128,grad_norm: 0.7150170749366734, iteration: 430477
loss: 0.9629929661750793,grad_norm: 0.841993583352736, iteration: 430478
loss: 0.9887034893035889,grad_norm: 0.7814848641167199, iteration: 430479
loss: 1.0154095888137817,grad_norm: 0.8781078352013938, iteration: 430480
loss: 0.9934518933296204,grad_norm: 0.8708080518426852, iteration: 430481
loss: 1.0571922063827515,grad_norm: 0.9999992013590857, iteration: 430482
loss: 1.0343520641326904,grad_norm: 0.9229344181724568, iteration: 430483
loss: 1.0672394037246704,grad_norm: 0.9741691463848284, iteration: 430484
loss: 0.9830443263053894,grad_norm: 0.6802994536063389, iteration: 430485
loss: 1.0171664953231812,grad_norm: 0.7723384641190028, iteration: 430486
loss: 1.0029208660125732,grad_norm: 0.9344662807821612, iteration: 430487
loss: 1.024303913116455,grad_norm: 0.7984759871238147, iteration: 430488
loss: 1.007435917854309,grad_norm: 0.8367977823753354, iteration: 430489
loss: 0.9957500696182251,grad_norm: 0.7867288864759403, iteration: 430490
loss: 0.9809290766716003,grad_norm: 0.6828337559808807, iteration: 430491
loss: 0.9875994324684143,grad_norm: 0.8339591838720151, iteration: 430492
loss: 0.9906471371650696,grad_norm: 0.8689189130092448, iteration: 430493
loss: 1.0171607732772827,grad_norm: 0.6225588166415277, iteration: 430494
loss: 0.9838463068008423,grad_norm: 0.7005324845267402, iteration: 430495
loss: 1.0225582122802734,grad_norm: 0.864585527582363, iteration: 430496
loss: 0.9650776386260986,grad_norm: 0.837131137625396, iteration: 430497
loss: 0.9748767614364624,grad_norm: 0.7593422960547018, iteration: 430498
loss: 0.981320321559906,grad_norm: 0.7765352917082198, iteration: 430499
loss: 0.9486538171768188,grad_norm: 0.6702557477131651, iteration: 430500
loss: 1.0067464113235474,grad_norm: 0.939659005878414, iteration: 430501
loss: 1.037366271018982,grad_norm: 0.7234318620614444, iteration: 430502
loss: 1.0030823945999146,grad_norm: 0.9238819659749103, iteration: 430503
loss: 1.021532654762268,grad_norm: 0.911715323781699, iteration: 430504
loss: 0.972716212272644,grad_norm: 0.7684492117778808, iteration: 430505
loss: 1.0228043794631958,grad_norm: 0.9999994456606772, iteration: 430506
loss: 1.0196106433868408,grad_norm: 0.9999992574070732, iteration: 430507
loss: 0.962374746799469,grad_norm: 0.7135349315203451, iteration: 430508
loss: 1.002322793006897,grad_norm: 0.8071105585874655, iteration: 430509
loss: 1.105053424835205,grad_norm: 0.99999955192551, iteration: 430510
loss: 1.0325144529342651,grad_norm: 0.8075363619455913, iteration: 430511
loss: 0.9908543229103088,grad_norm: 0.6820357229320312, iteration: 430512
loss: 1.1127612590789795,grad_norm: 0.9323470275074593, iteration: 430513
loss: 0.9961944818496704,grad_norm: 0.6931092198636192, iteration: 430514
loss: 0.9795919060707092,grad_norm: 0.7944966702640838, iteration: 430515
loss: 0.9767066836357117,grad_norm: 0.7589047297964311, iteration: 430516
loss: 1.034890055656433,grad_norm: 0.7524155726761297, iteration: 430517
loss: 0.9787203073501587,grad_norm: 0.9313115780391794, iteration: 430518
loss: 1.0802217721939087,grad_norm: 0.9999992242628215, iteration: 430519
loss: 0.9618946313858032,grad_norm: 0.7013703136864222, iteration: 430520
loss: 0.9803066253662109,grad_norm: 0.7336320605870753, iteration: 430521
loss: 1.029141902923584,grad_norm: 0.8166026139995812, iteration: 430522
loss: 1.0145045518875122,grad_norm: 0.9005681458682046, iteration: 430523
loss: 0.9983882904052734,grad_norm: 0.7362812433074349, iteration: 430524
loss: 1.0216574668884277,grad_norm: 0.8869735647430607, iteration: 430525
loss: 1.125019907951355,grad_norm: 0.9999994866827803, iteration: 430526
loss: 0.9939619302749634,grad_norm: 0.749072334410965, iteration: 430527
loss: 1.0064599514007568,grad_norm: 0.8953246189630815, iteration: 430528
loss: 1.00357186794281,grad_norm: 0.7681939086328744, iteration: 430529
loss: 1.0580329895019531,grad_norm: 0.9406546139903613, iteration: 430530
loss: 0.9888402223587036,grad_norm: 0.6923992969048287, iteration: 430531
loss: 1.039871335029602,grad_norm: 0.794126100486761, iteration: 430532
loss: 1.0159194469451904,grad_norm: 0.8813383069787766, iteration: 430533
loss: 1.004285454750061,grad_norm: 0.8530857485932652, iteration: 430534
loss: 1.001861333847046,grad_norm: 0.6234500562993077, iteration: 430535
loss: 0.9759242534637451,grad_norm: 0.7227114178061095, iteration: 430536
loss: 1.0292049646377563,grad_norm: 0.7978363782504204, iteration: 430537
loss: 1.0303503274917603,grad_norm: 0.7444340117869325, iteration: 430538
loss: 1.0330283641815186,grad_norm: 0.7778936846254391, iteration: 430539
loss: 0.980408251285553,grad_norm: 0.8313443771499841, iteration: 430540
loss: 1.0008512735366821,grad_norm: 0.9999998669475396, iteration: 430541
loss: 0.9967904686927795,grad_norm: 0.7894571200017393, iteration: 430542
loss: 0.989165186882019,grad_norm: 0.8336850209305393, iteration: 430543
loss: 0.9864128232002258,grad_norm: 0.7315390485948702, iteration: 430544
loss: 1.0023409128189087,grad_norm: 0.7345452132794086, iteration: 430545
loss: 1.017599105834961,grad_norm: 0.8249801414681598, iteration: 430546
loss: 1.0317862033843994,grad_norm: 0.7111944520167615, iteration: 430547
loss: 0.9858241081237793,grad_norm: 0.8639735297908065, iteration: 430548
loss: 1.0229601860046387,grad_norm: 0.7554383257730622, iteration: 430549
loss: 1.0280756950378418,grad_norm: 0.8045161726875852, iteration: 430550
loss: 1.0225955247879028,grad_norm: 0.9999993129434173, iteration: 430551
loss: 0.9600342512130737,grad_norm: 0.6949081322306709, iteration: 430552
loss: 0.9827051162719727,grad_norm: 0.8894907918636056, iteration: 430553
loss: 0.9738643169403076,grad_norm: 0.768287331057047, iteration: 430554
loss: 1.0340393781661987,grad_norm: 0.8293849465930214, iteration: 430555
loss: 0.9929145574569702,grad_norm: 0.6705240523782339, iteration: 430556
loss: 0.9586551785469055,grad_norm: 0.7797173175089268, iteration: 430557
loss: 1.009516954421997,grad_norm: 0.7521905238938411, iteration: 430558
loss: 1.0055330991744995,grad_norm: 0.8107943092688352, iteration: 430559
loss: 0.9805064797401428,grad_norm: 0.9363214878167345, iteration: 430560
loss: 0.9786547422409058,grad_norm: 0.7842070228791784, iteration: 430561
loss: 0.9747783541679382,grad_norm: 0.704237000409938, iteration: 430562
loss: 1.0353493690490723,grad_norm: 0.9737693338770448, iteration: 430563
loss: 0.9921268224716187,grad_norm: 0.6552139020131061, iteration: 430564
loss: 1.016839861869812,grad_norm: 0.7101640201584134, iteration: 430565
loss: 1.067495346069336,grad_norm: 0.9999997043674053, iteration: 430566
loss: 1.048052430152893,grad_norm: 0.8381506661444222, iteration: 430567
loss: 0.9767698645591736,grad_norm: 0.8753906787131339, iteration: 430568
loss: 1.0020179748535156,grad_norm: 0.7762130140336063, iteration: 430569
loss: 0.9944232106208801,grad_norm: 0.9914347657968275, iteration: 430570
loss: 1.1325920820236206,grad_norm: 0.938481022537023, iteration: 430571
loss: 1.0563091039657593,grad_norm: 0.7690931011162223, iteration: 430572
loss: 1.0346500873565674,grad_norm: 0.8694083223868393, iteration: 430573
loss: 1.063090443611145,grad_norm: 0.8009028371696626, iteration: 430574
loss: 1.0100637674331665,grad_norm: 0.7387419157867515, iteration: 430575
loss: 0.9811419248580933,grad_norm: 0.8976479755182407, iteration: 430576
loss: 1.0346734523773193,grad_norm: 0.8158251506452782, iteration: 430577
loss: 1.032904863357544,grad_norm: 0.7327054275447417, iteration: 430578
loss: 1.0074397325515747,grad_norm: 0.878591549940217, iteration: 430579
loss: 1.1014946699142456,grad_norm: 0.9999992635653535, iteration: 430580
loss: 1.0317115783691406,grad_norm: 0.9999991196953216, iteration: 430581
loss: 0.9754567742347717,grad_norm: 0.9722717578807505, iteration: 430582
loss: 1.0140198469161987,grad_norm: 0.851233882688685, iteration: 430583
loss: 0.9692670106887817,grad_norm: 0.7982391730607045, iteration: 430584
loss: 1.022566556930542,grad_norm: 0.7571033567888485, iteration: 430585
loss: 1.0166423320770264,grad_norm: 0.8175331963532751, iteration: 430586
loss: 1.0166046619415283,grad_norm: 0.816226917385136, iteration: 430587
loss: 0.9894932508468628,grad_norm: 0.8680095926843069, iteration: 430588
loss: 0.9633262157440186,grad_norm: 0.7637743468217624, iteration: 430589
loss: 0.9987578988075256,grad_norm: 0.7433234432759691, iteration: 430590
loss: 0.948045551776886,grad_norm: 0.8008644756079865, iteration: 430591
loss: 0.9850477576255798,grad_norm: 0.7526138653537969, iteration: 430592
loss: 0.9961734414100647,grad_norm: 0.9560234930208531, iteration: 430593
loss: 0.997193455696106,grad_norm: 0.8981148132168338, iteration: 430594
loss: 1.006446123123169,grad_norm: 0.7046849866636776, iteration: 430595
loss: 0.9998977780342102,grad_norm: 0.9999995019639293, iteration: 430596
loss: 0.972500741481781,grad_norm: 0.8630128798732247, iteration: 430597
loss: 0.9451627135276794,grad_norm: 0.9999997706422481, iteration: 430598
loss: 0.9970624446868896,grad_norm: 0.8528122956633131, iteration: 430599
loss: 0.9712103009223938,grad_norm: 0.8759757295575288, iteration: 430600
loss: 1.0233443975448608,grad_norm: 0.9270546912077938, iteration: 430601
loss: 1.0117712020874023,grad_norm: 0.7627843932495701, iteration: 430602
loss: 1.0738362073898315,grad_norm: 0.9999997681011187, iteration: 430603
loss: 0.9599564075469971,grad_norm: 0.8089832386569796, iteration: 430604
loss: 1.0140527486801147,grad_norm: 0.7478654580777845, iteration: 430605
loss: 1.0026592016220093,grad_norm: 0.7595587202029117, iteration: 430606
loss: 0.991199791431427,grad_norm: 0.9999991272259446, iteration: 430607
loss: 1.0038782358169556,grad_norm: 0.7959901053474899, iteration: 430608
loss: 0.9698440432548523,grad_norm: 0.9389518239077823, iteration: 430609
loss: 1.0039058923721313,grad_norm: 0.7328729550585764, iteration: 430610
loss: 1.0487794876098633,grad_norm: 0.7998528066253247, iteration: 430611
loss: 0.9956548810005188,grad_norm: 0.6283581551302387, iteration: 430612
loss: 0.949799120426178,grad_norm: 0.7326071160831015, iteration: 430613
loss: 1.0534881353378296,grad_norm: 0.9999990232877872, iteration: 430614
loss: 1.0075105428695679,grad_norm: 0.6988604699765565, iteration: 430615
loss: 1.0311695337295532,grad_norm: 0.7504478170722584, iteration: 430616
loss: 0.9988887906074524,grad_norm: 0.7761079796904178, iteration: 430617
loss: 1.0217492580413818,grad_norm: 0.9999990601151848, iteration: 430618
loss: 0.9583417177200317,grad_norm: 0.6897731080041883, iteration: 430619
loss: 0.9767654538154602,grad_norm: 0.9125933211959899, iteration: 430620
loss: 0.9746614694595337,grad_norm: 0.7529745248192686, iteration: 430621
loss: 1.000626802444458,grad_norm: 0.7963107693452647, iteration: 430622
loss: 1.0677530765533447,grad_norm: 0.8614702361597578, iteration: 430623
loss: 0.977104663848877,grad_norm: 0.7237085809980313, iteration: 430624
loss: 1.0264840126037598,grad_norm: 0.9999992710340638, iteration: 430625
loss: 0.9763761162757874,grad_norm: 0.6406597247685373, iteration: 430626
loss: 1.0634502172470093,grad_norm: 0.9999995365644575, iteration: 430627
loss: 1.0564301013946533,grad_norm: 0.9999990504759412, iteration: 430628
loss: 0.9925930500030518,grad_norm: 0.7844285184803366, iteration: 430629
loss: 1.0219024419784546,grad_norm: 0.8036747451566111, iteration: 430630
loss: 1.0299644470214844,grad_norm: 0.6893377108382279, iteration: 430631
loss: 1.017708659172058,grad_norm: 0.6582239650146006, iteration: 430632
loss: 1.0360294580459595,grad_norm: 0.999999119893885, iteration: 430633
loss: 1.0100042819976807,grad_norm: 0.7114172872951536, iteration: 430634
loss: 1.0010292530059814,grad_norm: 0.7252888565921859, iteration: 430635
loss: 1.0183606147766113,grad_norm: 0.8835605856002264, iteration: 430636
loss: 1.0538105964660645,grad_norm: 0.7048459913246067, iteration: 430637
loss: 1.030800700187683,grad_norm: 0.6514329285664682, iteration: 430638
loss: 0.9934913516044617,grad_norm: 0.8418287699655799, iteration: 430639
loss: 1.0613685846328735,grad_norm: 0.9999995727692816, iteration: 430640
loss: 1.0626676082611084,grad_norm: 0.8224384329597227, iteration: 430641
loss: 1.0139522552490234,grad_norm: 0.9999991880623134, iteration: 430642
loss: 1.009827733039856,grad_norm: 0.7648062007536816, iteration: 430643
loss: 0.9970172643661499,grad_norm: 0.9397644825619187, iteration: 430644
loss: 1.0078948736190796,grad_norm: 0.7040605657668612, iteration: 430645
loss: 0.9477812647819519,grad_norm: 0.765705294620678, iteration: 430646
loss: 1.0252474546432495,grad_norm: 0.9999992408040713, iteration: 430647
loss: 0.9707475900650024,grad_norm: 0.9086391007109708, iteration: 430648
loss: 0.999535858631134,grad_norm: 0.8387624226159848, iteration: 430649
loss: 1.0046542882919312,grad_norm: 0.9999999934876947, iteration: 430650
loss: 0.9648312926292419,grad_norm: 0.8474054995805729, iteration: 430651
loss: 0.9678444266319275,grad_norm: 0.9999999437068581, iteration: 430652
loss: 1.0036977529525757,grad_norm: 0.9999990633680282, iteration: 430653
loss: 0.9793291687965393,grad_norm: 0.809941819925835, iteration: 430654
loss: 1.0090912580490112,grad_norm: 0.7370424845267299, iteration: 430655
loss: 1.0310721397399902,grad_norm: 0.9999991876445238, iteration: 430656
loss: 0.9967263340950012,grad_norm: 0.7304666255623975, iteration: 430657
loss: 1.004195213317871,grad_norm: 0.7462733794307932, iteration: 430658
loss: 0.9946309924125671,grad_norm: 0.8182146503193926, iteration: 430659
loss: 1.0635980367660522,grad_norm: 0.9999992765876504, iteration: 430660
loss: 1.0069761276245117,grad_norm: 0.7994293238809894, iteration: 430661
loss: 1.0096293687820435,grad_norm: 0.7809983869326843, iteration: 430662
loss: 0.9893545508384705,grad_norm: 0.7903600298091843, iteration: 430663
loss: 1.0194332599639893,grad_norm: 0.9999996981040716, iteration: 430664
loss: 0.963150680065155,grad_norm: 0.7458558523966946, iteration: 430665
loss: 0.9717826247215271,grad_norm: 0.6327789688256552, iteration: 430666
loss: 1.0319485664367676,grad_norm: 0.9999990542768085, iteration: 430667
loss: 0.9827713966369629,grad_norm: 0.9999992830113303, iteration: 430668
loss: 1.001214623451233,grad_norm: 0.7183681965570836, iteration: 430669
loss: 1.0776807069778442,grad_norm: 0.9999990596719075, iteration: 430670
loss: 0.9824227094650269,grad_norm: 0.7411914394632284, iteration: 430671
loss: 1.0760910511016846,grad_norm: 0.9999999054668147, iteration: 430672
loss: 1.0094325542449951,grad_norm: 0.7935951009293504, iteration: 430673
loss: 1.0594359636306763,grad_norm: 0.9999998327910192, iteration: 430674
loss: 1.0322571992874146,grad_norm: 0.7946349278441833, iteration: 430675
loss: 1.0341421365737915,grad_norm: 0.7510897490351601, iteration: 430676
loss: 1.0432310104370117,grad_norm: 0.6983459418879582, iteration: 430677
loss: 1.0720162391662598,grad_norm: 1.0000000169909644, iteration: 430678
loss: 0.9904652237892151,grad_norm: 0.7071991516232718, iteration: 430679
loss: 1.0632914304733276,grad_norm: 0.8998072702433411, iteration: 430680
loss: 1.0954725742340088,grad_norm: 0.8509313840139452, iteration: 430681
loss: 1.0232069492340088,grad_norm: 0.9384731530110465, iteration: 430682
loss: 1.0074119567871094,grad_norm: 0.7353739795011561, iteration: 430683
loss: 1.0339690446853638,grad_norm: 0.9156720182999506, iteration: 430684
loss: 0.9931036829948425,grad_norm: 0.8842900103471348, iteration: 430685
loss: 0.9879962205886841,grad_norm: 0.8062706465364974, iteration: 430686
loss: 0.9749647378921509,grad_norm: 0.8299022232523543, iteration: 430687
loss: 1.0232576131820679,grad_norm: 0.717348869944559, iteration: 430688
loss: 0.9763255715370178,grad_norm: 0.9141901574655177, iteration: 430689
loss: 0.9610202312469482,grad_norm: 0.8679926748545349, iteration: 430690
loss: 0.9899857640266418,grad_norm: 0.9083633829220302, iteration: 430691
loss: 1.0025033950805664,grad_norm: 0.8100012754411432, iteration: 430692
loss: 0.9994741082191467,grad_norm: 0.7403637213582562, iteration: 430693
loss: 0.9949040412902832,grad_norm: 0.851026345780462, iteration: 430694
loss: 1.0392494201660156,grad_norm: 0.6832475118720871, iteration: 430695
loss: 1.0119214057922363,grad_norm: 0.8937978896017051, iteration: 430696
loss: 0.9548617005348206,grad_norm: 0.7670279580095406, iteration: 430697
loss: 1.0526756048202515,grad_norm: 0.9999990330548821, iteration: 430698
loss: 1.00091552734375,grad_norm: 0.7905013744966234, iteration: 430699
loss: 1.0103117227554321,grad_norm: 0.695634538219351, iteration: 430700
loss: 0.987099289894104,grad_norm: 0.9999990373150776, iteration: 430701
loss: 1.0482277870178223,grad_norm: 0.9999999376137797, iteration: 430702
loss: 1.0089168548583984,grad_norm: 0.7528677000957578, iteration: 430703
loss: 1.001453161239624,grad_norm: 0.8205521139418852, iteration: 430704
loss: 0.9744693040847778,grad_norm: 0.8748999742768029, iteration: 430705
loss: 0.9797718524932861,grad_norm: 0.8571928025211962, iteration: 430706
loss: 0.9962325096130371,grad_norm: 0.8518579314295422, iteration: 430707
loss: 0.9832268357276917,grad_norm: 0.7282009135491178, iteration: 430708
loss: 0.9722375273704529,grad_norm: 0.8456274775610771, iteration: 430709
loss: 1.0387176275253296,grad_norm: 0.9999992338433996, iteration: 430710
loss: 0.9963652491569519,grad_norm: 0.7910487314540916, iteration: 430711
loss: 0.9883689880371094,grad_norm: 0.6977194335647643, iteration: 430712
loss: 1.038427472114563,grad_norm: 0.8009461685966217, iteration: 430713
loss: 1.0538146495819092,grad_norm: 0.8706312068706552, iteration: 430714
loss: 1.0204001665115356,grad_norm: 0.7771638343075827, iteration: 430715
loss: 0.9971531629562378,grad_norm: 0.6642580498496361, iteration: 430716
loss: 1.0341811180114746,grad_norm: 0.9226911007768628, iteration: 430717
loss: 0.985470175743103,grad_norm: 0.7611327822623547, iteration: 430718
loss: 1.026676058769226,grad_norm: 0.6363920358310287, iteration: 430719
loss: 0.9890564680099487,grad_norm: 0.7034338322431465, iteration: 430720
loss: 0.9686456918716431,grad_norm: 0.794873994852868, iteration: 430721
loss: 1.0122272968292236,grad_norm: 0.793551046452975, iteration: 430722
loss: 0.9360952973365784,grad_norm: 0.757012743293787, iteration: 430723
loss: 1.0038719177246094,grad_norm: 0.7912409975152193, iteration: 430724
loss: 1.015860915184021,grad_norm: 0.7047315515062172, iteration: 430725
loss: 0.9909806251525879,grad_norm: 0.7336958418220754, iteration: 430726
loss: 0.9864017963409424,grad_norm: 0.8190203371591861, iteration: 430727
loss: 1.0108616352081299,grad_norm: 0.9999990251302511, iteration: 430728
loss: 0.9721891283988953,grad_norm: 0.8740869685345343, iteration: 430729
loss: 1.0032637119293213,grad_norm: 0.7630991530348145, iteration: 430730
loss: 1.036716103553772,grad_norm: 0.8447660621632993, iteration: 430731
loss: 0.9810923933982849,grad_norm: 0.6315176368271528, iteration: 430732
loss: 1.031434416770935,grad_norm: 0.7248383824533143, iteration: 430733
loss: 1.0032356977462769,grad_norm: 0.7925397475622699, iteration: 430734
loss: 1.0639195442199707,grad_norm: 0.9999998347344446, iteration: 430735
loss: 1.005463719367981,grad_norm: 0.9999993060896881, iteration: 430736
loss: 1.0522791147232056,grad_norm: 0.9999996121267587, iteration: 430737
loss: 0.9653980135917664,grad_norm: 0.7153821237794873, iteration: 430738
loss: 1.0571998357772827,grad_norm: 0.6734404015106942, iteration: 430739
loss: 1.1395705938339233,grad_norm: 0.9999998794550361, iteration: 430740
loss: 0.9688180088996887,grad_norm: 0.6613979591344583, iteration: 430741
loss: 1.009827971458435,grad_norm: 0.8754342110290823, iteration: 430742
loss: 1.0021584033966064,grad_norm: 0.9798391133135701, iteration: 430743
loss: 0.9646770358085632,grad_norm: 0.7505234965336245, iteration: 430744
loss: 1.0632789134979248,grad_norm: 0.6987212338740545, iteration: 430745
loss: 0.9630264043807983,grad_norm: 0.7544113110308147, iteration: 430746
loss: 0.997994065284729,grad_norm: 0.9999997318080039, iteration: 430747
loss: 0.9987561702728271,grad_norm: 0.8588873867108033, iteration: 430748
loss: 0.9633914232254028,grad_norm: 0.9110728513069175, iteration: 430749
loss: 0.9901500940322876,grad_norm: 0.9999992355237238, iteration: 430750
loss: 0.978918194770813,grad_norm: 0.8293445796360218, iteration: 430751
loss: 1.0054632425308228,grad_norm: 0.935012082392011, iteration: 430752
loss: 1.0011656284332275,grad_norm: 0.9999992048547823, iteration: 430753
loss: 0.9869762063026428,grad_norm: 0.8117020784524382, iteration: 430754
loss: 0.969857394695282,grad_norm: 0.7010311262945248, iteration: 430755
loss: 0.9999129772186279,grad_norm: 0.666763155907712, iteration: 430756
loss: 0.998124897480011,grad_norm: 0.673378500911064, iteration: 430757
loss: 0.9971992373466492,grad_norm: 0.859784416647156, iteration: 430758
loss: 0.9620503187179565,grad_norm: 0.776466822696558, iteration: 430759
loss: 0.9716769456863403,grad_norm: 0.6844722328144197, iteration: 430760
loss: 0.9737051725387573,grad_norm: 0.8059437836159652, iteration: 430761
loss: 1.0290400981903076,grad_norm: 0.9999989976038851, iteration: 430762
loss: 1.0034501552581787,grad_norm: 0.9362586176828244, iteration: 430763
loss: 0.9811369776725769,grad_norm: 0.8725484645027943, iteration: 430764
loss: 0.9886482357978821,grad_norm: 0.9845076370944253, iteration: 430765
loss: 0.9966943860054016,grad_norm: 0.8228287691043409, iteration: 430766
loss: 0.9829970598220825,grad_norm: 0.8367604787593764, iteration: 430767
loss: 0.9977809190750122,grad_norm: 0.773478893475924, iteration: 430768
loss: 0.9901325106620789,grad_norm: 0.999999072322823, iteration: 430769
loss: 1.0202796459197998,grad_norm: 0.8316603132307783, iteration: 430770
loss: 1.0694111585617065,grad_norm: 0.7770936581352317, iteration: 430771
loss: 0.995366632938385,grad_norm: 0.7596179519375137, iteration: 430772
loss: 1.0027048587799072,grad_norm: 0.7864311635145135, iteration: 430773
loss: 0.9941130876541138,grad_norm: 0.8392264989157938, iteration: 430774
loss: 1.078857183456421,grad_norm: 0.9999998244657455, iteration: 430775
loss: 1.0904561281204224,grad_norm: 0.936911835016879, iteration: 430776
loss: 0.9923431277275085,grad_norm: 0.9999995390645006, iteration: 430777
loss: 1.0161710977554321,grad_norm: 0.9999991637629821, iteration: 430778
loss: 0.9938633441925049,grad_norm: 0.8690576467610656, iteration: 430779
loss: 0.9816181063652039,grad_norm: 0.8196014922621235, iteration: 430780
loss: 1.0067496299743652,grad_norm: 0.6795845732985201, iteration: 430781
loss: 0.9761719107627869,grad_norm: 0.801076940822618, iteration: 430782
loss: 0.9998480081558228,grad_norm: 0.7653305893514826, iteration: 430783
loss: 1.0185654163360596,grad_norm: 0.7575646824507072, iteration: 430784
loss: 1.007145643234253,grad_norm: 0.6814688738608603, iteration: 430785
loss: 1.016764521598816,grad_norm: 0.7700303125020003, iteration: 430786
loss: 0.9922720789909363,grad_norm: 0.898145617914123, iteration: 430787
loss: 0.9759085774421692,grad_norm: 0.7888014949314246, iteration: 430788
loss: 1.026377558708191,grad_norm: 0.9999991589248074, iteration: 430789
loss: 0.9658263325691223,grad_norm: 0.8016392883751448, iteration: 430790
loss: 1.0043103694915771,grad_norm: 0.9999993042100469, iteration: 430791
loss: 1.0776240825653076,grad_norm: 0.8950754729176783, iteration: 430792
loss: 1.0222313404083252,grad_norm: 0.9999992724791487, iteration: 430793
loss: 1.157548427581787,grad_norm: 0.9998267659822383, iteration: 430794
loss: 1.0201737880706787,grad_norm: 0.8141065744818526, iteration: 430795
loss: 1.0470187664031982,grad_norm: 0.9999996786837985, iteration: 430796
loss: 0.9975924491882324,grad_norm: 0.7389598171390217, iteration: 430797
loss: 1.0711987018585205,grad_norm: 0.7549407102344547, iteration: 430798
loss: 1.1127413511276245,grad_norm: 0.9999997919282058, iteration: 430799
loss: 1.0280545949935913,grad_norm: 0.7123569482737343, iteration: 430800
loss: 1.0093244314193726,grad_norm: 0.6242276963853689, iteration: 430801
loss: 1.0302258729934692,grad_norm: 0.9999991493034611, iteration: 430802
loss: 0.9782187938690186,grad_norm: 0.9291842788372344, iteration: 430803
loss: 1.1990445852279663,grad_norm: 0.9999995284676314, iteration: 430804
loss: 0.9561306238174438,grad_norm: 0.7780506618807076, iteration: 430805
loss: 0.9885883331298828,grad_norm: 0.8802388692096181, iteration: 430806
loss: 1.0218642950057983,grad_norm: 0.7992100410858194, iteration: 430807
loss: 1.016050100326538,grad_norm: 0.9999994924833537, iteration: 430808
loss: 1.019321084022522,grad_norm: 0.999999726590304, iteration: 430809
loss: 1.0417437553405762,grad_norm: 0.8462946932116495, iteration: 430810
loss: 1.0011054277420044,grad_norm: 0.7629819114841394, iteration: 430811
loss: 1.0096752643585205,grad_norm: 0.7929386660329047, iteration: 430812
loss: 0.9830952882766724,grad_norm: 0.7938887892091108, iteration: 430813
loss: 1.047423243522644,grad_norm: 0.9999999244439914, iteration: 430814
loss: 1.1264021396636963,grad_norm: 0.9999997721125544, iteration: 430815
loss: 0.9971148371696472,grad_norm: 0.8272911209450975, iteration: 430816
loss: 1.0042215585708618,grad_norm: 0.9999997917099648, iteration: 430817
loss: 0.9916889071464539,grad_norm: 0.7116750643696802, iteration: 430818
loss: 1.0039907693862915,grad_norm: 0.8765450280909262, iteration: 430819
loss: 1.0009323358535767,grad_norm: 0.9999991420635261, iteration: 430820
loss: 1.0298224687576294,grad_norm: 0.9999993190043972, iteration: 430821
loss: 1.0103819370269775,grad_norm: 0.8204628925498687, iteration: 430822
loss: 1.042506456375122,grad_norm: 0.9999991823885508, iteration: 430823
loss: 0.9890552759170532,grad_norm: 0.6211703937079077, iteration: 430824
loss: 1.0074958801269531,grad_norm: 0.8455648213252641, iteration: 430825
loss: 1.0644878149032593,grad_norm: 0.9999994817933338, iteration: 430826
loss: 0.9910760521888733,grad_norm: 0.8355131745214576, iteration: 430827
loss: 0.994322657585144,grad_norm: 0.7055154905853682, iteration: 430828
loss: 0.9833822846412659,grad_norm: 0.7961215778559537, iteration: 430829
loss: 0.9720393419265747,grad_norm: 0.7898005739916479, iteration: 430830
loss: 1.0469557046890259,grad_norm: 0.9999997042027874, iteration: 430831
loss: 0.9850296974182129,grad_norm: 0.8273623579732089, iteration: 430832
loss: 0.9704104065895081,grad_norm: 0.8075192617244288, iteration: 430833
loss: 1.008683443069458,grad_norm: 0.8711505357198248, iteration: 430834
loss: 1.1261812448501587,grad_norm: 0.9999997978189594, iteration: 430835
loss: 0.9867106676101685,grad_norm: 0.766119201353776, iteration: 430836
loss: 0.9489260911941528,grad_norm: 0.8080129424776432, iteration: 430837
loss: 1.0039688348770142,grad_norm: 0.6854119255331685, iteration: 430838
loss: 0.9936599135398865,grad_norm: 0.9999989765872889, iteration: 430839
loss: 0.9626038670539856,grad_norm: 0.9999990619057815, iteration: 430840
loss: 0.9943554401397705,grad_norm: 0.8458538934492718, iteration: 430841
loss: 0.9834776520729065,grad_norm: 0.7013856968402092, iteration: 430842
loss: 1.042635202407837,grad_norm: 0.9999999210892625, iteration: 430843
loss: 1.0293383598327637,grad_norm: 0.9218735195912805, iteration: 430844
loss: 0.9751443862915039,grad_norm: 0.8952601472090058, iteration: 430845
loss: 1.0436904430389404,grad_norm: 0.7622888012033898, iteration: 430846
loss: 0.978741466999054,grad_norm: 0.7358179976886279, iteration: 430847
loss: 1.0189727544784546,grad_norm: 0.7324738816672081, iteration: 430848
loss: 1.0477265119552612,grad_norm: 0.7216487195459848, iteration: 430849
loss: 0.9758322238922119,grad_norm: 0.802380180713017, iteration: 430850
loss: 0.9716973900794983,grad_norm: 0.9999999886625419, iteration: 430851
loss: 1.0674235820770264,grad_norm: 0.9999998438259331, iteration: 430852
loss: 1.0554289817810059,grad_norm: 0.9999997346881477, iteration: 430853
loss: 1.035398006439209,grad_norm: 0.9999996232932306, iteration: 430854
loss: 1.0014023780822754,grad_norm: 0.5598971822897986, iteration: 430855
loss: 1.0142964124679565,grad_norm: 0.799323212608616, iteration: 430856
loss: 1.040485143661499,grad_norm: 0.9041310627530754, iteration: 430857
loss: 1.0174052715301514,grad_norm: 0.7404358702351564, iteration: 430858
loss: 0.9816346168518066,grad_norm: 0.9999992646094166, iteration: 430859
loss: 1.009122371673584,grad_norm: 0.8181014232592082, iteration: 430860
loss: 0.9939714074134827,grad_norm: 0.8169632448646319, iteration: 430861
loss: 0.991375744342804,grad_norm: 0.9999994512651966, iteration: 430862
loss: 0.9939699172973633,grad_norm: 0.6746576477549032, iteration: 430863
loss: 1.0019609928131104,grad_norm: 0.7310133254409249, iteration: 430864
loss: 0.9621918797492981,grad_norm: 0.7662900696082574, iteration: 430865
loss: 1.0806156396865845,grad_norm: 0.6943341715223498, iteration: 430866
loss: 1.0584760904312134,grad_norm: 0.754321877684689, iteration: 430867
loss: 1.0178104639053345,grad_norm: 0.7270350967150622, iteration: 430868
loss: 1.1342899799346924,grad_norm: 0.9999999513316881, iteration: 430869
loss: 1.049813151359558,grad_norm: 0.9999995514514967, iteration: 430870
loss: 0.975615382194519,grad_norm: 0.6527464076127666, iteration: 430871
loss: 1.1217120885849,grad_norm: 0.9999997724844158, iteration: 430872
loss: 1.0688562393188477,grad_norm: 0.9656022263844833, iteration: 430873
loss: 1.0258305072784424,grad_norm: 0.9999999673421323, iteration: 430874
loss: 1.0378679037094116,grad_norm: 0.9381228549242228, iteration: 430875
loss: 1.0086332559585571,grad_norm: 0.811625684172989, iteration: 430876
loss: 1.017189860343933,grad_norm: 0.9999992175644767, iteration: 430877
loss: 1.1224565505981445,grad_norm: 0.9999993894416245, iteration: 430878
loss: 1.0002399682998657,grad_norm: 0.6766894711996362, iteration: 430879
loss: 1.0382442474365234,grad_norm: 0.9362377400077174, iteration: 430880
loss: 1.0339733362197876,grad_norm: 0.8310498446045645, iteration: 430881
loss: 1.00241219997406,grad_norm: 0.8478152971009808, iteration: 430882
loss: 1.0033481121063232,grad_norm: 0.9150055518917716, iteration: 430883
loss: 0.9700223207473755,grad_norm: 0.9999990491134828, iteration: 430884
loss: 1.0226751565933228,grad_norm: 0.9999998590812208, iteration: 430885
loss: 1.0169366598129272,grad_norm: 0.9135872763213237, iteration: 430886
loss: 1.0039716958999634,grad_norm: 0.7832161655814436, iteration: 430887
loss: 1.1359212398529053,grad_norm: 0.7775143037905037, iteration: 430888
loss: 1.1655168533325195,grad_norm: 0.9999998263541551, iteration: 430889
loss: 1.0504429340362549,grad_norm: 0.7802979250125223, iteration: 430890
loss: 1.002378225326538,grad_norm: 0.9294820151662269, iteration: 430891
loss: 0.9823580384254456,grad_norm: 0.7929893687347581, iteration: 430892
loss: 1.0972036123275757,grad_norm: 0.8964449344599366, iteration: 430893
loss: 1.0362157821655273,grad_norm: 0.8208788286353974, iteration: 430894
loss: 1.0096848011016846,grad_norm: 0.8160605243385205, iteration: 430895
loss: 1.0254069566726685,grad_norm: 0.9999998318842128, iteration: 430896
loss: 1.162129521369934,grad_norm: 0.9999995769037735, iteration: 430897
loss: 1.0094503164291382,grad_norm: 0.8260740513125574, iteration: 430898
loss: 1.039466142654419,grad_norm: 0.9999989934860398, iteration: 430899
loss: 0.9554975628852844,grad_norm: 0.9999990319695748, iteration: 430900
loss: 1.0382449626922607,grad_norm: 0.7790015511586381, iteration: 430901
loss: 1.0127476453781128,grad_norm: 0.8844158552705913, iteration: 430902
loss: 0.9939108490943909,grad_norm: 0.7418053125840427, iteration: 430903
loss: 0.9967958331108093,grad_norm: 0.7361620484260123, iteration: 430904
loss: 1.00166654586792,grad_norm: 0.7306210727357586, iteration: 430905
loss: 1.1396056413650513,grad_norm: 0.9999997698276782, iteration: 430906
loss: 1.0570625066757202,grad_norm: 0.9999991494026441, iteration: 430907
loss: 0.9918738603591919,grad_norm: 0.9408241659311611, iteration: 430908
loss: 1.012467384338379,grad_norm: 0.7636200807081764, iteration: 430909
loss: 1.0348254442214966,grad_norm: 0.8057890024859843, iteration: 430910
loss: 1.0539913177490234,grad_norm: 0.7935721848923342, iteration: 430911
loss: 1.01041841506958,grad_norm: 0.9999996433003182, iteration: 430912
loss: 1.1633803844451904,grad_norm: 0.999999755514562, iteration: 430913
loss: 0.9968864321708679,grad_norm: 0.7520630631967973, iteration: 430914
loss: 1.034521222114563,grad_norm: 0.9330807130204851, iteration: 430915
loss: 0.9834598302841187,grad_norm: 0.8404247682578002, iteration: 430916
loss: 0.9871006011962891,grad_norm: 0.7837249930701343, iteration: 430917
loss: 1.2526123523712158,grad_norm: 0.9999998750812648, iteration: 430918
loss: 1.0873171091079712,grad_norm: 0.9999998360678668, iteration: 430919
loss: 1.1961456537246704,grad_norm: 0.9999998955926317, iteration: 430920
loss: 0.9616534113883972,grad_norm: 0.7690491410996361, iteration: 430921
loss: 1.1468464136123657,grad_norm: 0.9999991388878736, iteration: 430922
loss: 1.145546793937683,grad_norm: 0.9999993667712018, iteration: 430923
loss: 1.0700510740280151,grad_norm: 0.9529433107223869, iteration: 430924
loss: 0.9922013282775879,grad_norm: 0.8951149995152581, iteration: 430925
loss: 1.0187551975250244,grad_norm: 0.9093204729279963, iteration: 430926
loss: 1.0801043510437012,grad_norm: 0.8737996642206777, iteration: 430927
loss: 1.1698668003082275,grad_norm: 0.9999993344024569, iteration: 430928
loss: 1.0348947048187256,grad_norm: 0.6894244696705554, iteration: 430929
loss: 0.9955819845199585,grad_norm: 0.8418613458162285, iteration: 430930
loss: 1.0638400316238403,grad_norm: 0.8430924975488666, iteration: 430931
loss: 1.111295223236084,grad_norm: 0.9999994978984401, iteration: 430932
loss: 1.0871134996414185,grad_norm: 0.9999993629908294, iteration: 430933
loss: 1.1323049068450928,grad_norm: 0.9999992710326979, iteration: 430934
loss: 0.9883242845535278,grad_norm: 0.6385914218328422, iteration: 430935
loss: 1.0459085702896118,grad_norm: 0.9999993436408527, iteration: 430936
loss: 1.1366561651229858,grad_norm: 0.8553736305433379, iteration: 430937
loss: 1.2989898920059204,grad_norm: 0.9999994730474511, iteration: 430938
loss: 1.110064148902893,grad_norm: 0.9999994743642593, iteration: 430939
loss: 1.2204151153564453,grad_norm: 0.9999993863484263, iteration: 430940
loss: 1.2853845357894897,grad_norm: 0.9999996121444005, iteration: 430941
loss: 1.2963801622390747,grad_norm: 0.9512031343193157, iteration: 430942
loss: 1.0968527793884277,grad_norm: 0.9999998588798612, iteration: 430943
loss: 1.1202495098114014,grad_norm: 0.9999991789510775, iteration: 430944
loss: 1.2447501420974731,grad_norm: 0.9999999683913596, iteration: 430945
loss: 1.1103547811508179,grad_norm: 0.9999996348075129, iteration: 430946
loss: 1.3038227558135986,grad_norm: 0.9999998636011646, iteration: 430947
loss: 1.254652738571167,grad_norm: 1.0000000025410842, iteration: 430948
loss: 1.333715796470642,grad_norm: 0.9999999965377108, iteration: 430949
loss: 1.170313835144043,grad_norm: 0.9999992667334283, iteration: 430950
loss: 1.1142618656158447,grad_norm: 0.9999990050948601, iteration: 430951
loss: 1.2275424003601074,grad_norm: 0.9999995107574862, iteration: 430952
loss: 1.379496455192566,grad_norm: 0.9999997128693204, iteration: 430953
loss: 1.3369040489196777,grad_norm: 0.9999996207269227, iteration: 430954
loss: 1.3106110095977783,grad_norm: 0.9999997509087246, iteration: 430955
loss: 1.1431703567504883,grad_norm: 0.9999997787746259, iteration: 430956
loss: 1.101969838142395,grad_norm: 0.9999995657501145, iteration: 430957
loss: 1.0514365434646606,grad_norm: 0.8004457108336903, iteration: 430958
loss: 1.116050124168396,grad_norm: 0.9999996896015406, iteration: 430959
loss: 1.2423394918441772,grad_norm: 0.9999992664293001, iteration: 430960
loss: 1.1034096479415894,grad_norm: 0.9999992576522171, iteration: 430961
loss: 1.1245781183242798,grad_norm: 0.9999992116487937, iteration: 430962
loss: 1.2902898788452148,grad_norm: 0.9999992397670816, iteration: 430963
loss: 1.270432472229004,grad_norm: 0.9999997706203947, iteration: 430964
loss: 1.1133517026901245,grad_norm: 0.9999994301317322, iteration: 430965
loss: 1.178202509880066,grad_norm: 0.999999255274925, iteration: 430966
loss: 1.3347547054290771,grad_norm: 0.999999975291228, iteration: 430967
loss: 1.1771259307861328,grad_norm: 0.9999996337764329, iteration: 430968
loss: 1.391608715057373,grad_norm: 0.9999996716612571, iteration: 430969
loss: 1.184971570968628,grad_norm: 0.9999992244681788, iteration: 430970
loss: 1.2195161581039429,grad_norm: 0.9999995528120746, iteration: 430971
loss: 1.0740516185760498,grad_norm: 0.9999991189204018, iteration: 430972
loss: 1.1307706832885742,grad_norm: 0.9999990629554779, iteration: 430973
loss: 1.1339912414550781,grad_norm: 0.9999998384112447, iteration: 430974
loss: 1.0551130771636963,grad_norm: 0.9999996658145289, iteration: 430975
loss: 1.294952392578125,grad_norm: 0.999999893829704, iteration: 430976
loss: 1.2349467277526855,grad_norm: 0.999999724092624, iteration: 430977
loss: 1.1125363111495972,grad_norm: 0.9999994139768765, iteration: 430978
loss: 1.1980164051055908,grad_norm: 0.9999997727614948, iteration: 430979
loss: 1.1486843824386597,grad_norm: 0.9999994704368961, iteration: 430980
loss: 1.348170518875122,grad_norm: 0.9999999373473232, iteration: 430981
loss: 1.395756483078003,grad_norm: 0.999999869090338, iteration: 430982
loss: 1.2638256549835205,grad_norm: 0.999999469875837, iteration: 430983
loss: 1.1478708982467651,grad_norm: 0.9999992796049387, iteration: 430984
loss: 1.0142353773117065,grad_norm: 0.999999270752426, iteration: 430985
loss: 1.070093035697937,grad_norm: 0.9509508485436577, iteration: 430986
loss: 1.0056136846542358,grad_norm: 0.9999993300116156, iteration: 430987
loss: 1.280128002166748,grad_norm: 0.9999994458708609, iteration: 430988
loss: 1.1868504285812378,grad_norm: 0.999999412367929, iteration: 430989
loss: 1.079881191253662,grad_norm: 0.9999999952948904, iteration: 430990
loss: 1.4489389657974243,grad_norm: 0.9999998153808015, iteration: 430991
loss: 1.2313284873962402,grad_norm: 0.9999998874241496, iteration: 430992
loss: 1.0838011503219604,grad_norm: 0.8851582799334219, iteration: 430993
loss: 1.0369824171066284,grad_norm: 0.9999998070118166, iteration: 430994
loss: 1.461719274520874,grad_norm: 0.9999997433964538, iteration: 430995
loss: 1.1309125423431396,grad_norm: 0.999999959344626, iteration: 430996
loss: 1.193688988685608,grad_norm: 0.9747610023359468, iteration: 430997
loss: 1.0436925888061523,grad_norm: 0.9999993116899694, iteration: 430998
loss: 1.3742196559906006,grad_norm: 0.9999993313344709, iteration: 430999
loss: 0.9868888258934021,grad_norm: 0.6994446290912953, iteration: 431000
loss: 1.1423207521438599,grad_norm: 0.9999991021623228, iteration: 431001
loss: 1.0837708711624146,grad_norm: 0.9555542444016676, iteration: 431002
loss: 1.2105906009674072,grad_norm: 0.9999992776793857, iteration: 431003
loss: 1.004875659942627,grad_norm: 0.8523678928415395, iteration: 431004
loss: 1.0347704887390137,grad_norm: 0.9706365090276546, iteration: 431005
loss: 1.0899465084075928,grad_norm: 0.9378537838214707, iteration: 431006
loss: 1.2408593893051147,grad_norm: 0.9999997357777075, iteration: 431007
loss: 1.0773712396621704,grad_norm: 0.9999991056439461, iteration: 431008
loss: 1.0129387378692627,grad_norm: 0.7647273899182682, iteration: 431009
loss: 1.0330142974853516,grad_norm: 0.8972373077588476, iteration: 431010
loss: 1.0688432455062866,grad_norm: 0.7398846862350299, iteration: 431011
loss: 0.9524185061454773,grad_norm: 0.7636625486735138, iteration: 431012
loss: 1.0203231573104858,grad_norm: 0.8791514788737061, iteration: 431013
loss: 0.9776114821434021,grad_norm: 0.766063163107692, iteration: 431014
loss: 1.1303789615631104,grad_norm: 0.9999992280665371, iteration: 431015
loss: 0.9871761798858643,grad_norm: 0.8290582517151831, iteration: 431016
loss: 0.986488938331604,grad_norm: 0.8555628499926018, iteration: 431017
loss: 0.9724454283714294,grad_norm: 0.8858873595036766, iteration: 431018
loss: 0.9906830191612244,grad_norm: 0.9020375109554659, iteration: 431019
loss: 1.0548532009124756,grad_norm: 0.9999999078782302, iteration: 431020
loss: 1.0382314920425415,grad_norm: 0.8172910564170346, iteration: 431021
loss: 1.0376676321029663,grad_norm: 0.9999996199688587, iteration: 431022
loss: 1.0483394861221313,grad_norm: 0.7379781804231912, iteration: 431023
loss: 0.976958692073822,grad_norm: 0.879495051321203, iteration: 431024
loss: 1.0721548795700073,grad_norm: 0.999999271153582, iteration: 431025
loss: 0.9986341595649719,grad_norm: 0.7514546672191045, iteration: 431026
loss: 1.050331950187683,grad_norm: 0.9999992196618044, iteration: 431027
loss: 1.0069271326065063,grad_norm: 0.7405127051495206, iteration: 431028
loss: 1.0315983295440674,grad_norm: 0.9999995837439128, iteration: 431029
loss: 1.0269196033477783,grad_norm: 0.9999993145183232, iteration: 431030
loss: 0.9350879788398743,grad_norm: 0.9229193722115973, iteration: 431031
loss: 1.1085739135742188,grad_norm: 0.9999996150504437, iteration: 431032
loss: 1.2159730195999146,grad_norm: 0.8852531355633695, iteration: 431033
loss: 1.0502099990844727,grad_norm: 0.8537066094140183, iteration: 431034
loss: 1.0098323822021484,grad_norm: 0.7179806744755042, iteration: 431035
loss: 1.0140676498413086,grad_norm: 0.7437214743070173, iteration: 431036
loss: 1.0278589725494385,grad_norm: 0.8726074067004755, iteration: 431037
loss: 1.0030732154846191,grad_norm: 0.7841867255949018, iteration: 431038
loss: 1.0093573331832886,grad_norm: 0.7780088950816394, iteration: 431039
loss: 0.9862490892410278,grad_norm: 0.8615131062303942, iteration: 431040
loss: 1.0506089925765991,grad_norm: 0.9999994132667297, iteration: 431041
loss: 1.0243655443191528,grad_norm: 0.7585164151295869, iteration: 431042
loss: 1.0118422508239746,grad_norm: 0.7498011437660493, iteration: 431043
loss: 1.0307894945144653,grad_norm: 0.9999992570743985, iteration: 431044
loss: 1.1496126651763916,grad_norm: 0.9999991858333254, iteration: 431045
loss: 1.0084834098815918,grad_norm: 0.976922648729666, iteration: 431046
loss: 1.0495550632476807,grad_norm: 0.845715915515144, iteration: 431047
loss: 0.995895266532898,grad_norm: 0.9764566639300639, iteration: 431048
loss: 1.0035208463668823,grad_norm: 0.8432030303904104, iteration: 431049
loss: 1.1567257642745972,grad_norm: 0.9999997206132923, iteration: 431050
loss: 1.0006800889968872,grad_norm: 0.7682277412455474, iteration: 431051
loss: 1.0402610301971436,grad_norm: 0.9999994574794512, iteration: 431052
loss: 1.0967947244644165,grad_norm: 0.974138231010665, iteration: 431053
loss: 0.9750527739524841,grad_norm: 0.7421150977841637, iteration: 431054
loss: 1.2026114463806152,grad_norm: 0.9999994475471606, iteration: 431055
loss: 1.1909339427947998,grad_norm: 0.9999989741998785, iteration: 431056
loss: 1.0172781944274902,grad_norm: 0.9999990308457954, iteration: 431057
loss: 1.0191878080368042,grad_norm: 0.9999990156913974, iteration: 431058
loss: 1.0480434894561768,grad_norm: 0.9999996735971769, iteration: 431059
loss: 1.0180882215499878,grad_norm: 0.8623786662911548, iteration: 431060
loss: 1.1334220170974731,grad_norm: 0.999999305671707, iteration: 431061
loss: 1.0423208475112915,grad_norm: 0.9999990665467484, iteration: 431062
loss: 1.0700430870056152,grad_norm: 0.8853621296997577, iteration: 431063
loss: 0.996263325214386,grad_norm: 0.7230684018927042, iteration: 431064
loss: 1.0942553281784058,grad_norm: 0.9999991132040215, iteration: 431065
loss: 1.0386979579925537,grad_norm: 0.8502810312029565, iteration: 431066
loss: 1.0031315088272095,grad_norm: 0.8407379160844966, iteration: 431067
loss: 1.0157777070999146,grad_norm: 0.8103730345885193, iteration: 431068
loss: 0.9993017911911011,grad_norm: 0.8603835050459724, iteration: 431069
loss: 0.9684175252914429,grad_norm: 0.6318121661334681, iteration: 431070
loss: 1.018757939338684,grad_norm: 0.9237480431119944, iteration: 431071
loss: 1.0038033723831177,grad_norm: 0.8335130645169152, iteration: 431072
loss: 1.2051951885223389,grad_norm: 0.9999993645861428, iteration: 431073
loss: 1.0824254751205444,grad_norm: 0.9999990367508406, iteration: 431074
loss: 1.024573802947998,grad_norm: 0.9999993681515082, iteration: 431075
loss: 1.0464274883270264,grad_norm: 0.9999998854068, iteration: 431076
loss: 1.0665940046310425,grad_norm: 0.9999999736133098, iteration: 431077
loss: 1.120081901550293,grad_norm: 0.9999991230276568, iteration: 431078
loss: 1.082823395729065,grad_norm: 0.9999998313757185, iteration: 431079
loss: 0.984591543674469,grad_norm: 0.9999993908107587, iteration: 431080
loss: 1.0874472856521606,grad_norm: 0.9999999113170801, iteration: 431081
loss: 1.0193915367126465,grad_norm: 0.6669394793900706, iteration: 431082
loss: 0.9815311431884766,grad_norm: 0.8089577894782205, iteration: 431083
loss: 1.0288313627243042,grad_norm: 0.8926927516959373, iteration: 431084
loss: 1.1418628692626953,grad_norm: 0.9999994985814946, iteration: 431085
loss: 0.9795944094657898,grad_norm: 0.8349007806539798, iteration: 431086
loss: 1.007485270500183,grad_norm: 0.8459501815365162, iteration: 431087
loss: 1.029502272605896,grad_norm: 0.7656780163774927, iteration: 431088
loss: 0.9826991558074951,grad_norm: 0.8468208975661053, iteration: 431089
loss: 0.988397479057312,grad_norm: 0.9999991264880007, iteration: 431090
loss: 1.028303623199463,grad_norm: 0.9999995561971757, iteration: 431091
loss: 1.0222041606903076,grad_norm: 1.00000002318792, iteration: 431092
loss: 0.9689754843711853,grad_norm: 0.7742306647287596, iteration: 431093
loss: 1.0095298290252686,grad_norm: 0.8325448516681375, iteration: 431094
loss: 0.995345413684845,grad_norm: 0.9999999107825095, iteration: 431095
loss: 1.0371358394622803,grad_norm: 0.8772632366924259, iteration: 431096
loss: 1.0324251651763916,grad_norm: 0.6297306104537981, iteration: 431097
loss: 0.97493976354599,grad_norm: 0.7412230412437696, iteration: 431098
loss: 1.0506935119628906,grad_norm: 0.999999436355619, iteration: 431099
loss: 0.9719841480255127,grad_norm: 0.7579743267458087, iteration: 431100
loss: 0.9814187288284302,grad_norm: 0.7175083027854828, iteration: 431101
loss: 1.0380278825759888,grad_norm: 0.9999993410002385, iteration: 431102
loss: 1.0209733247756958,grad_norm: 0.9476002972752867, iteration: 431103
loss: 1.0405718088150024,grad_norm: 0.8348336442002472, iteration: 431104
loss: 1.18215811252594,grad_norm: 0.9999993381181689, iteration: 431105
loss: 1.019667148590088,grad_norm: 0.6880675554130615, iteration: 431106
loss: 0.984532356262207,grad_norm: 0.6525390029715248, iteration: 431107
loss: 0.9936568737030029,grad_norm: 0.9093003019921508, iteration: 431108
loss: 1.0876834392547607,grad_norm: 0.9999997264276506, iteration: 431109
loss: 0.9824535846710205,grad_norm: 0.7957144905976603, iteration: 431110
loss: 0.9669798016548157,grad_norm: 0.7096955225125472, iteration: 431111
loss: 1.0845146179199219,grad_norm: 0.9999992539079237, iteration: 431112
loss: 1.0441492795944214,grad_norm: 0.9999992248660278, iteration: 431113
loss: 1.0083768367767334,grad_norm: 0.8488313249474289, iteration: 431114
loss: 1.0079783201217651,grad_norm: 0.6948374145367817, iteration: 431115
loss: 1.0720982551574707,grad_norm: 0.9999994148518949, iteration: 431116
loss: 1.1747796535491943,grad_norm: 0.9764506972602719, iteration: 431117
loss: 1.0143007040023804,grad_norm: 0.8502156386701598, iteration: 431118
loss: 0.993340253829956,grad_norm: 0.8091116654783148, iteration: 431119
loss: 1.0016225576400757,grad_norm: 0.6570274132964765, iteration: 431120
loss: 1.0057557821273804,grad_norm: 0.7915219790208692, iteration: 431121
loss: 1.0761646032333374,grad_norm: 0.8954393804443869, iteration: 431122
loss: 1.0340921878814697,grad_norm: 0.9640757608816465, iteration: 431123
loss: 0.9807448983192444,grad_norm: 0.8088537385871009, iteration: 431124
loss: 1.0320816040039062,grad_norm: 0.8536053964778277, iteration: 431125
loss: 1.0747251510620117,grad_norm: 0.9999992987209018, iteration: 431126
loss: 1.006150722503662,grad_norm: 0.8338317961455916, iteration: 431127
loss: 0.9900553822517395,grad_norm: 0.6926961047100052, iteration: 431128
loss: 1.0000782012939453,grad_norm: 0.7683409582422216, iteration: 431129
loss: 0.967673659324646,grad_norm: 0.7619593607707336, iteration: 431130
loss: 1.0351115465164185,grad_norm: 0.8384057168880633, iteration: 431131
loss: 1.0377064943313599,grad_norm: 0.9999991474512603, iteration: 431132
loss: 1.031946063041687,grad_norm: 0.9142214237580234, iteration: 431133
loss: 1.0207046270370483,grad_norm: 0.9999997398118243, iteration: 431134
loss: 1.0315660238265991,grad_norm: 0.9999991957234361, iteration: 431135
loss: 1.0437748432159424,grad_norm: 0.9999990362140407, iteration: 431136
loss: 1.0083045959472656,grad_norm: 0.9999999015896116, iteration: 431137
loss: 1.0017240047454834,grad_norm: 0.9999993586592477, iteration: 431138
loss: 1.0499032735824585,grad_norm: 0.9999996230759113, iteration: 431139
loss: 1.0176199674606323,grad_norm: 0.8332407853149633, iteration: 431140
loss: 0.9686095118522644,grad_norm: 0.9812501814144731, iteration: 431141
loss: 1.0025163888931274,grad_norm: 0.9999991256191909, iteration: 431142
loss: 0.9738727807998657,grad_norm: 0.764273935453169, iteration: 431143
loss: 0.9870274066925049,grad_norm: 0.999998932942605, iteration: 431144
loss: 1.0031529664993286,grad_norm: 0.7913027778293563, iteration: 431145
loss: 1.0105468034744263,grad_norm: 0.9999991191360693, iteration: 431146
loss: 1.005670189857483,grad_norm: 0.8758427329918148, iteration: 431147
loss: 0.9771027565002441,grad_norm: 0.782481334240412, iteration: 431148
loss: 0.9721413850784302,grad_norm: 0.8858270982318738, iteration: 431149
loss: 1.0268508195877075,grad_norm: 0.6731075411605223, iteration: 431150
loss: 1.0494086742401123,grad_norm: 1.0000001168691612, iteration: 431151
loss: 0.9960809350013733,grad_norm: 0.7194651771618255, iteration: 431152
loss: 0.9940749406814575,grad_norm: 0.7391149389874471, iteration: 431153
loss: 1.06411874294281,grad_norm: 0.8661795958128161, iteration: 431154
loss: 1.0430152416229248,grad_norm: 0.9999992059271536, iteration: 431155
loss: 1.0087814331054688,grad_norm: 0.8197082445292577, iteration: 431156
loss: 1.037295937538147,grad_norm: 0.9999990905665441, iteration: 431157
loss: 1.015269160270691,grad_norm: 0.7926745061271356, iteration: 431158
loss: 0.9190764427185059,grad_norm: 0.8101211367391101, iteration: 431159
loss: 0.9795135259628296,grad_norm: 0.6675062463802202, iteration: 431160
loss: 1.012865424156189,grad_norm: 0.8237010285036527, iteration: 431161
loss: 0.9889187216758728,grad_norm: 0.7547572382309162, iteration: 431162
loss: 1.0346570014953613,grad_norm: 0.999999151328528, iteration: 431163
loss: 1.051193118095398,grad_norm: 0.9999993416031178, iteration: 431164
loss: 0.9850564002990723,grad_norm: 0.9725173829590789, iteration: 431165
loss: 1.0099745988845825,grad_norm: 0.7384084958024968, iteration: 431166
loss: 0.9698212146759033,grad_norm: 0.7603265364589314, iteration: 431167
loss: 0.9707760214805603,grad_norm: 0.9474751571917871, iteration: 431168
loss: 0.9904957413673401,grad_norm: 0.8864609532835421, iteration: 431169
loss: 0.9710783362388611,grad_norm: 0.749153136062297, iteration: 431170
loss: 0.9786534309387207,grad_norm: 0.7330344449494572, iteration: 431171
loss: 1.038230299949646,grad_norm: 0.9999991755196964, iteration: 431172
loss: 1.0728728771209717,grad_norm: 0.9999996587591623, iteration: 431173
loss: 1.0311955213546753,grad_norm: 0.7287216140656751, iteration: 431174
loss: 1.0357824563980103,grad_norm: 0.7809955629095776, iteration: 431175
loss: 1.0361738204956055,grad_norm: 0.8744247045278454, iteration: 431176
loss: 1.0064629316329956,grad_norm: 0.9897714501347635, iteration: 431177
loss: 1.0105929374694824,grad_norm: 0.8396220649224337, iteration: 431178
loss: 1.0684548616409302,grad_norm: 0.81223683769042, iteration: 431179
loss: 0.9792411923408508,grad_norm: 0.6943706163552601, iteration: 431180
loss: 0.9594644904136658,grad_norm: 0.792502749298948, iteration: 431181
loss: 1.0442191362380981,grad_norm: 0.9999991868027928, iteration: 431182
loss: 0.9682526588439941,grad_norm: 0.8806508233536001, iteration: 431183
loss: 1.0206749439239502,grad_norm: 0.8263078909165661, iteration: 431184
loss: 1.0047591924667358,grad_norm: 0.9999998369694308, iteration: 431185
loss: 1.0119566917419434,grad_norm: 0.8198354211252425, iteration: 431186
loss: 0.9874988794326782,grad_norm: 0.9999990821492368, iteration: 431187
loss: 1.0018408298492432,grad_norm: 0.751062218235916, iteration: 431188
loss: 0.9650535583496094,grad_norm: 0.7439171965296613, iteration: 431189
loss: 1.0256142616271973,grad_norm: 0.9999992256094098, iteration: 431190
loss: 1.0019516944885254,grad_norm: 0.7010477647708611, iteration: 431191
loss: 1.0029062032699585,grad_norm: 0.7549280137510253, iteration: 431192
loss: 1.0632644891738892,grad_norm: 0.9999990624017693, iteration: 431193
loss: 1.0000191926956177,grad_norm: 0.7207097482838921, iteration: 431194
loss: 1.0484553575515747,grad_norm: 0.7715123970113378, iteration: 431195
loss: 1.026044487953186,grad_norm: 0.9836487981348878, iteration: 431196
loss: 1.0304102897644043,grad_norm: 0.7370735081910188, iteration: 431197
loss: 1.0095503330230713,grad_norm: 0.8812685350658261, iteration: 431198
loss: 1.0874764919281006,grad_norm: 0.9999998932397987, iteration: 431199
loss: 1.039259672164917,grad_norm: 0.9999995195873514, iteration: 431200
loss: 1.0010852813720703,grad_norm: 0.9999999509371842, iteration: 431201
loss: 1.0212643146514893,grad_norm: 0.9204605612144146, iteration: 431202
loss: 1.065022587776184,grad_norm: 0.7489619255098926, iteration: 431203
loss: 0.9909952282905579,grad_norm: 0.6977043194422822, iteration: 431204
loss: 1.0045490264892578,grad_norm: 0.6955895851883479, iteration: 431205
loss: 0.9904396533966064,grad_norm: 0.6602466816866297, iteration: 431206
loss: 1.0288416147232056,grad_norm: 0.8977721004903269, iteration: 431207
loss: 0.9891822338104248,grad_norm: 0.7525089692812174, iteration: 431208
loss: 1.1561782360076904,grad_norm: 1.000000000929153, iteration: 431209
loss: 1.0083123445510864,grad_norm: 0.68141745908837, iteration: 431210
loss: 0.989748477935791,grad_norm: 0.7191911504944574, iteration: 431211
loss: 0.9727305173873901,grad_norm: 0.7586110833521633, iteration: 431212
loss: 0.991126298904419,grad_norm: 0.9999998120104141, iteration: 431213
loss: 0.9717203974723816,grad_norm: 0.8665542422248477, iteration: 431214
loss: 1.0572341680526733,grad_norm: 0.8227821543100808, iteration: 431215
loss: 0.9852232933044434,grad_norm: 0.9043638053922459, iteration: 431216
loss: 1.0145807266235352,grad_norm: 0.7720287758396039, iteration: 431217
loss: 1.002241611480713,grad_norm: 0.7750410984265572, iteration: 431218
loss: 1.0611462593078613,grad_norm: 0.9999990562825344, iteration: 431219
loss: 0.9981991648674011,grad_norm: 0.7281348892018181, iteration: 431220
loss: 1.0254967212677002,grad_norm: 0.941282390701296, iteration: 431221
loss: 0.9853106737136841,grad_norm: 0.8986109340389274, iteration: 431222
loss: 0.9860379695892334,grad_norm: 0.8738776900228048, iteration: 431223
loss: 0.9808646440505981,grad_norm: 0.716825413045089, iteration: 431224
loss: 0.9915805459022522,grad_norm: 0.7998575124995025, iteration: 431225
loss: 1.0011695623397827,grad_norm: 0.8067606833512007, iteration: 431226
loss: 1.002822995185852,grad_norm: 0.7074001417908596, iteration: 431227
loss: 1.0640677213668823,grad_norm: 0.8044047990514053, iteration: 431228
loss: 0.9927926659584045,grad_norm: 0.7575626511145547, iteration: 431229
loss: 0.9768063426017761,grad_norm: 0.775245148233167, iteration: 431230
loss: 0.9981258511543274,grad_norm: 0.7374250777753673, iteration: 431231
loss: 1.0292160511016846,grad_norm: 0.799316479956329, iteration: 431232
loss: 0.9686772227287292,grad_norm: 0.8130628936440868, iteration: 431233
loss: 1.0213361978530884,grad_norm: 0.8253019806837522, iteration: 431234
loss: 0.9950748682022095,grad_norm: 0.6733748474114095, iteration: 431235
loss: 0.9792946577072144,grad_norm: 0.7519397083339365, iteration: 431236
loss: 0.9899183511734009,grad_norm: 0.9602219921210917, iteration: 431237
loss: 1.0377579927444458,grad_norm: 0.8733232343880654, iteration: 431238
loss: 0.9989499449729919,grad_norm: 0.7060873118311897, iteration: 431239
loss: 1.0258753299713135,grad_norm: 0.9999990785895324, iteration: 431240
loss: 1.0303014516830444,grad_norm: 0.9999991657828557, iteration: 431241
loss: 1.0483601093292236,grad_norm: 0.9999990207720362, iteration: 431242
loss: 1.0448753833770752,grad_norm: 0.8475356361120837, iteration: 431243
loss: 1.0113165378570557,grad_norm: 0.8228794157958309, iteration: 431244
loss: 1.0181330442428589,grad_norm: 0.8374747535390137, iteration: 431245
loss: 1.000340223312378,grad_norm: 0.8722356354464113, iteration: 431246
loss: 0.9671611189842224,grad_norm: 0.8418690535880571, iteration: 431247
loss: 0.9704110026359558,grad_norm: 0.6687007010938195, iteration: 431248
loss: 0.9935949444770813,grad_norm: 0.8447287421461881, iteration: 431249
loss: 1.0940179824829102,grad_norm: 0.9134996300040448, iteration: 431250
loss: 1.0549849271774292,grad_norm: 0.9998069578314451, iteration: 431251
loss: 1.1759716272354126,grad_norm: 0.9999993726357109, iteration: 431252
loss: 1.0511354207992554,grad_norm: 0.9999993883474023, iteration: 431253
loss: 0.9792333245277405,grad_norm: 0.7748450067634668, iteration: 431254
loss: 0.9783989191055298,grad_norm: 0.8651440969659142, iteration: 431255
loss: 0.9862590432167053,grad_norm: 0.7185531916132332, iteration: 431256
loss: 1.1363693475723267,grad_norm: 0.9999991611657761, iteration: 431257
loss: 0.9777060747146606,grad_norm: 0.726174347580005, iteration: 431258
loss: 0.9737671613693237,grad_norm: 0.8143584124238544, iteration: 431259
loss: 1.060942530632019,grad_norm: 0.9999992589458826, iteration: 431260
loss: 1.1049673557281494,grad_norm: 0.9999990289191981, iteration: 431261
loss: 1.0225087404251099,grad_norm: 0.8239229338387901, iteration: 431262
loss: 1.0027343034744263,grad_norm: 0.9999989568868073, iteration: 431263
loss: 0.9636742472648621,grad_norm: 0.8677474093152575, iteration: 431264
loss: 1.0439993143081665,grad_norm: 0.9999995634151361, iteration: 431265
loss: 0.9711376428604126,grad_norm: 0.8613368934080695, iteration: 431266
loss: 0.9852648973464966,grad_norm: 0.8371080915493618, iteration: 431267
loss: 0.9881212711334229,grad_norm: 0.8795329515294431, iteration: 431268
loss: 1.0184160470962524,grad_norm: 0.9999991602840256, iteration: 431269
loss: 1.0151910781860352,grad_norm: 0.8013521509312477, iteration: 431270
loss: 0.9672729969024658,grad_norm: 0.9999993009661494, iteration: 431271
loss: 1.0360873937606812,grad_norm: 0.89156009985966, iteration: 431272
loss: 1.1086798906326294,grad_norm: 0.9999996401512498, iteration: 431273
loss: 0.9993435144424438,grad_norm: 0.757119420595861, iteration: 431274
loss: 1.0276669263839722,grad_norm: 0.7175531714252444, iteration: 431275
loss: 0.9982941150665283,grad_norm: 0.8273910134652185, iteration: 431276
loss: 1.041870355606079,grad_norm: 0.9999994026300096, iteration: 431277
loss: 0.9876825213432312,grad_norm: 0.7974546445244747, iteration: 431278
loss: 1.1796519756317139,grad_norm: 0.9999993048420088, iteration: 431279
loss: 0.9561433792114258,grad_norm: 0.7152120066846221, iteration: 431280
loss: 1.0038901567459106,grad_norm: 0.7647149682211636, iteration: 431281
loss: 0.9820769429206848,grad_norm: 0.8198984514309199, iteration: 431282
loss: 1.0339393615722656,grad_norm: 0.7841074772825078, iteration: 431283
loss: 0.9572392106056213,grad_norm: 0.7281836528728867, iteration: 431284
loss: 0.9871509671211243,grad_norm: 0.7217976773550713, iteration: 431285
loss: 1.2264589071273804,grad_norm: 0.9999999026649815, iteration: 431286
loss: 1.0486729145050049,grad_norm: 0.8557044104812292, iteration: 431287
loss: 1.0200541019439697,grad_norm: 0.9999997810605645, iteration: 431288
loss: 1.0085043907165527,grad_norm: 0.8728751650708972, iteration: 431289
loss: 1.0002402067184448,grad_norm: 0.9999990924341656, iteration: 431290
loss: 1.0329805612564087,grad_norm: 0.8590282423976717, iteration: 431291
loss: 1.0192320346832275,grad_norm: 0.8228873846285145, iteration: 431292
loss: 1.0249040126800537,grad_norm: 0.9999999304005661, iteration: 431293
loss: 0.9896527528762817,grad_norm: 0.9526341338562009, iteration: 431294
loss: 1.022346019744873,grad_norm: 0.7686417090129796, iteration: 431295
loss: 1.0212079286575317,grad_norm: 0.6990899331235775, iteration: 431296
loss: 0.9900376200675964,grad_norm: 0.7745189341787794, iteration: 431297
loss: 1.0043448209762573,grad_norm: 0.6904592126071608, iteration: 431298
loss: 0.9969331622123718,grad_norm: 0.8823737055363718, iteration: 431299
loss: 0.996614933013916,grad_norm: 0.8758799083322569, iteration: 431300
loss: 1.0012692213058472,grad_norm: 0.6842989209773591, iteration: 431301
loss: 0.9967932105064392,grad_norm: 0.8570991328716722, iteration: 431302
loss: 1.0090656280517578,grad_norm: 0.7982417406469368, iteration: 431303
loss: 0.9561712741851807,grad_norm: 0.6840842740937847, iteration: 431304
loss: 1.0358967781066895,grad_norm: 0.8308474883222158, iteration: 431305
loss: 0.9970903992652893,grad_norm: 0.744766221189123, iteration: 431306
loss: 0.9679537415504456,grad_norm: 0.8850889185165259, iteration: 431307
loss: 1.0181543827056885,grad_norm: 0.9999991529544545, iteration: 431308
loss: 1.0499399900436401,grad_norm: 0.9999995229279146, iteration: 431309
loss: 1.0026016235351562,grad_norm: 0.9539647070853324, iteration: 431310
loss: 1.0197875499725342,grad_norm: 0.8085742034905166, iteration: 431311
loss: 1.027099370956421,grad_norm: 0.9999996159269382, iteration: 431312
loss: 0.9868564605712891,grad_norm: 0.6741434788139686, iteration: 431313
loss: 0.9877616167068481,grad_norm: 0.8361380196928215, iteration: 431314
loss: 0.93853759765625,grad_norm: 0.7434954518485468, iteration: 431315
loss: 1.0066320896148682,grad_norm: 0.910883599370803, iteration: 431316
loss: 1.0309665203094482,grad_norm: 0.8162123145511964, iteration: 431317
loss: 0.9915252327919006,grad_norm: 0.7854567911118661, iteration: 431318
loss: 0.9430685639381409,grad_norm: 0.9999999146255262, iteration: 431319
loss: 1.0136735439300537,grad_norm: 0.7957723543005017, iteration: 431320
loss: 0.996677577495575,grad_norm: 0.8213339856855424, iteration: 431321
loss: 1.0120255947113037,grad_norm: 0.71526205024419, iteration: 431322
loss: 0.9771031141281128,grad_norm: 0.6924863573317007, iteration: 431323
loss: 0.9776840806007385,grad_norm: 0.7630559186187318, iteration: 431324
loss: 0.992405891418457,grad_norm: 0.850354622499086, iteration: 431325
loss: 0.9802507162094116,grad_norm: 0.7409229803317728, iteration: 431326
loss: 1.0220764875411987,grad_norm: 0.6660554697458386, iteration: 431327
loss: 1.0765647888183594,grad_norm: 0.9999990557065714, iteration: 431328
loss: 1.0402814149856567,grad_norm: 0.7329385032325938, iteration: 431329
loss: 0.9721419215202332,grad_norm: 0.7458857983175398, iteration: 431330
loss: 0.9757626056671143,grad_norm: 0.7309882293368659, iteration: 431331
loss: 1.0025646686553955,grad_norm: 0.9999994991009327, iteration: 431332
loss: 1.0216381549835205,grad_norm: 0.7927285779030627, iteration: 431333
loss: 0.9612632393836975,grad_norm: 0.9180324052940566, iteration: 431334
loss: 1.03143310546875,grad_norm: 0.999999042517206, iteration: 431335
loss: 1.018345832824707,grad_norm: 0.7644858270460136, iteration: 431336
loss: 1.0117932558059692,grad_norm: 0.6983191250374106, iteration: 431337
loss: 1.0110621452331543,grad_norm: 0.8059562762940107, iteration: 431338
loss: 1.0267785787582397,grad_norm: 0.9975615263232328, iteration: 431339
loss: 0.9785262942314148,grad_norm: 0.6866721640472423, iteration: 431340
loss: 0.9659699201583862,grad_norm: 0.8090432857209006, iteration: 431341
loss: 0.9925379157066345,grad_norm: 0.5919707483327958, iteration: 431342
loss: 1.0028879642486572,grad_norm: 0.883529530046568, iteration: 431343
loss: 0.9860620498657227,grad_norm: 0.7986691388894058, iteration: 431344
loss: 0.9970807433128357,grad_norm: 0.8213524378989195, iteration: 431345
loss: 0.9923901557922363,grad_norm: 0.8276824364925236, iteration: 431346
loss: 1.0246952772140503,grad_norm: 0.9750875725938324, iteration: 431347
loss: 1.0265823602676392,grad_norm: 0.7627715316143945, iteration: 431348
loss: 1.0217748880386353,grad_norm: 0.999999510642305, iteration: 431349
loss: 0.9786781072616577,grad_norm: 0.8517899746309833, iteration: 431350
loss: 1.0031646490097046,grad_norm: 0.790377862594048, iteration: 431351
loss: 0.9836775064468384,grad_norm: 0.9445126823381206, iteration: 431352
loss: 0.9838055372238159,grad_norm: 0.7936084735758544, iteration: 431353
loss: 1.0147905349731445,grad_norm: 0.9999993103515403, iteration: 431354
loss: 1.0484493970870972,grad_norm: 0.7707633442645482, iteration: 431355
loss: 0.9942606687545776,grad_norm: 0.7981225044412057, iteration: 431356
loss: 0.9975111484527588,grad_norm: 0.7699810621302752, iteration: 431357
loss: 1.001549243927002,grad_norm: 0.9999997683849485, iteration: 431358
loss: 0.9846038222312927,grad_norm: 0.99999927141931, iteration: 431359
loss: 1.0489978790283203,grad_norm: 0.813411376606838, iteration: 431360
loss: 0.9819628596305847,grad_norm: 0.8797464779510344, iteration: 431361
loss: 0.980846643447876,grad_norm: 0.9212627699532688, iteration: 431362
loss: 1.0206327438354492,grad_norm: 0.697528345163803, iteration: 431363
loss: 1.1678988933563232,grad_norm: 0.9999997142541743, iteration: 431364
loss: 1.0244519710540771,grad_norm: 0.9999996222695106, iteration: 431365
loss: 1.0225776433944702,grad_norm: 0.8255634741333214, iteration: 431366
loss: 1.046363115310669,grad_norm: 0.787572241746383, iteration: 431367
loss: 1.0698587894439697,grad_norm: 0.9999991934959018, iteration: 431368
loss: 1.0101613998413086,grad_norm: 0.7729471064341538, iteration: 431369
loss: 1.043045163154602,grad_norm: 0.8597199336402425, iteration: 431370
loss: 1.1660789251327515,grad_norm: 0.9999998107218374, iteration: 431371
loss: 1.1037116050720215,grad_norm: 0.9521845058829287, iteration: 431372
loss: 1.0296038389205933,grad_norm: 0.9999991377641183, iteration: 431373
loss: 0.983526349067688,grad_norm: 0.6955985092154925, iteration: 431374
loss: 1.017540454864502,grad_norm: 0.9812992967646572, iteration: 431375
loss: 0.9942985773086548,grad_norm: 0.9999992044826177, iteration: 431376
loss: 0.9818278551101685,grad_norm: 0.7458906891617664, iteration: 431377
loss: 1.018728494644165,grad_norm: 0.8901472074043021, iteration: 431378
loss: 0.9908266067504883,grad_norm: 0.636907366044631, iteration: 431379
loss: 0.9758875370025635,grad_norm: 0.8381513190427826, iteration: 431380
loss: 1.005021572113037,grad_norm: 0.9023857612980545, iteration: 431381
loss: 0.9889641404151917,grad_norm: 0.6360081137686586, iteration: 431382
loss: 0.9900715947151184,grad_norm: 0.6987042513581946, iteration: 431383
loss: 1.0291532278060913,grad_norm: 0.8931981740771181, iteration: 431384
loss: 1.0197378396987915,grad_norm: 0.9999995902751718, iteration: 431385
loss: 0.9508838057518005,grad_norm: 0.6389283247582085, iteration: 431386
loss: 0.9899107813835144,grad_norm: 0.7941406634313556, iteration: 431387
loss: 1.0006253719329834,grad_norm: 0.8659213280767858, iteration: 431388
loss: 0.9927436709403992,grad_norm: 0.7199811404698213, iteration: 431389
loss: 0.993948221206665,grad_norm: 0.9717440370283016, iteration: 431390
loss: 1.0347726345062256,grad_norm: 0.8133318196361393, iteration: 431391
loss: 0.9837918281555176,grad_norm: 0.8542722419581101, iteration: 431392
loss: 0.9995083808898926,grad_norm: 0.8053663933355061, iteration: 431393
loss: 1.2225236892700195,grad_norm: 0.9999995060226744, iteration: 431394
loss: 0.9923200011253357,grad_norm: 0.7411391667359649, iteration: 431395
loss: 0.9861918687820435,grad_norm: 0.7074487139483666, iteration: 431396
loss: 0.9608389139175415,grad_norm: 0.8026104273179225, iteration: 431397
loss: 1.0460718870162964,grad_norm: 0.9999995250429516, iteration: 431398
loss: 0.9956285357475281,grad_norm: 0.8339455978787734, iteration: 431399
loss: 1.0234187841415405,grad_norm: 0.7405705833267663, iteration: 431400
loss: 0.9911802411079407,grad_norm: 0.9999990707307983, iteration: 431401
loss: 1.0240790843963623,grad_norm: 0.821302834984402, iteration: 431402
loss: 0.9797122478485107,grad_norm: 0.9809488718786282, iteration: 431403
loss: 1.0113457441329956,grad_norm: 0.789468502170437, iteration: 431404
loss: 0.9712367057800293,grad_norm: 0.8133507955276881, iteration: 431405
loss: 0.9876447319984436,grad_norm: 0.733995421009162, iteration: 431406
loss: 0.9918001294136047,grad_norm: 0.8512305332238, iteration: 431407
loss: 1.0147984027862549,grad_norm: 0.7129587358917493, iteration: 431408
loss: 0.9820835590362549,grad_norm: 0.9407391616098871, iteration: 431409
loss: 1.04082453250885,grad_norm: 0.7695173528298004, iteration: 431410
loss: 0.9705360531806946,grad_norm: 0.6421962035608236, iteration: 431411
loss: 1.0518876314163208,grad_norm: 0.890482050491005, iteration: 431412
loss: 0.9834204316139221,grad_norm: 0.6908826951419403, iteration: 431413
loss: 0.9937202334403992,grad_norm: 0.7982059810567612, iteration: 431414
loss: 1.0138003826141357,grad_norm: 0.6546581600715464, iteration: 431415
loss: 0.9985513091087341,grad_norm: 0.8135825086490378, iteration: 431416
loss: 1.0306602716445923,grad_norm: 0.7914550543604285, iteration: 431417
loss: 1.0444029569625854,grad_norm: 0.7710558453819493, iteration: 431418
loss: 1.0167332887649536,grad_norm: 0.9999990844021703, iteration: 431419
loss: 0.9733088612556458,grad_norm: 0.8173806297598776, iteration: 431420
loss: 1.0164555311203003,grad_norm: 0.9051019586283517, iteration: 431421
loss: 1.046265959739685,grad_norm: 0.7805758363388834, iteration: 431422
loss: 1.014739751815796,grad_norm: 0.9261733265470484, iteration: 431423
loss: 1.0102252960205078,grad_norm: 0.7734716451235799, iteration: 431424
loss: 1.0298309326171875,grad_norm: 0.8113908910425922, iteration: 431425
loss: 1.0510380268096924,grad_norm: 0.9208710263809147, iteration: 431426
loss: 1.0078041553497314,grad_norm: 0.8842251393358194, iteration: 431427
loss: 1.0068671703338623,grad_norm: 0.9999990980055279, iteration: 431428
loss: 0.9893782138824463,grad_norm: 0.7041299119858572, iteration: 431429
loss: 1.0593186616897583,grad_norm: 0.7902855812281819, iteration: 431430
loss: 0.9765252470970154,grad_norm: 0.696116750021272, iteration: 431431
loss: 1.055436134338379,grad_norm: 0.9606955308453013, iteration: 431432
loss: 1.0072773694992065,grad_norm: 0.7410979368560506, iteration: 431433
loss: 0.9812443852424622,grad_norm: 0.6796245675650481, iteration: 431434
loss: 1.0047842264175415,grad_norm: 0.8572173818356934, iteration: 431435
loss: 1.0157747268676758,grad_norm: 0.785111769211369, iteration: 431436
loss: 0.9799605011940002,grad_norm: 0.800615679287187, iteration: 431437
loss: 0.981390118598938,grad_norm: 0.6433073483374745, iteration: 431438
loss: 1.0003618001937866,grad_norm: 0.7931028879634331, iteration: 431439
loss: 0.9743607044219971,grad_norm: 0.7369433105159706, iteration: 431440
loss: 0.9982207417488098,grad_norm: 0.7971142762886994, iteration: 431441
loss: 0.9969502687454224,grad_norm: 0.7620236086090341, iteration: 431442
loss: 0.9927729964256287,grad_norm: 0.7672912272910479, iteration: 431443
loss: 1.0091086626052856,grad_norm: 0.7120567609742466, iteration: 431444
loss: 0.9924327731132507,grad_norm: 0.8558800981331698, iteration: 431445
loss: 1.0132397413253784,grad_norm: 0.8466485100264141, iteration: 431446
loss: 0.9799391031265259,grad_norm: 0.7459425568636269, iteration: 431447
loss: 1.0468798875808716,grad_norm: 0.9740330876636492, iteration: 431448
loss: 0.9724069237709045,grad_norm: 0.9999999604645445, iteration: 431449
loss: 0.9977071285247803,grad_norm: 0.724941839199138, iteration: 431450
loss: 0.98263019323349,grad_norm: 0.7675138743690566, iteration: 431451
loss: 1.019388198852539,grad_norm: 0.874062531078447, iteration: 431452
loss: 1.045627236366272,grad_norm: 0.9212093488282257, iteration: 431453
loss: 1.0382825136184692,grad_norm: 0.8384062722457366, iteration: 431454
loss: 0.9717376828193665,grad_norm: 0.9999991045149436, iteration: 431455
loss: 1.0073848962783813,grad_norm: 0.8010340151542082, iteration: 431456
loss: 0.9939833283424377,grad_norm: 0.8767307632974368, iteration: 431457
loss: 1.007515549659729,grad_norm: 0.9999996431563423, iteration: 431458
loss: 1.0293992757797241,grad_norm: 0.811910623106977, iteration: 431459
loss: 1.0174760818481445,grad_norm: 0.7411318800296594, iteration: 431460
loss: 1.0265308618545532,grad_norm: 0.8587416609151365, iteration: 431461
loss: 0.9963892698287964,grad_norm: 0.8211957673290354, iteration: 431462
loss: 1.0373526811599731,grad_norm: 0.7460040477743939, iteration: 431463
loss: 1.0207511186599731,grad_norm: 0.9999992601480336, iteration: 431464
loss: 0.9965453147888184,grad_norm: 0.9999995062436788, iteration: 431465
loss: 1.0800868272781372,grad_norm: 0.9999997022387815, iteration: 431466
loss: 0.9839425683021545,grad_norm: 0.9492653880433563, iteration: 431467
loss: 0.9972591996192932,grad_norm: 0.7904225086140778, iteration: 431468
loss: 1.0253732204437256,grad_norm: 0.8129446015289983, iteration: 431469
loss: 1.0344465970993042,grad_norm: 0.8782805238081587, iteration: 431470
loss: 1.0380138158798218,grad_norm: 0.9999992093663795, iteration: 431471
loss: 0.9992669820785522,grad_norm: 0.7208220316841245, iteration: 431472
loss: 0.974892258644104,grad_norm: 0.9999994895209285, iteration: 431473
loss: 1.0052214860916138,grad_norm: 0.9045625895068491, iteration: 431474
loss: 1.056541919708252,grad_norm: 0.9999998389834842, iteration: 431475
loss: 0.9584314227104187,grad_norm: 0.8216242184453509, iteration: 431476
loss: 1.0412685871124268,grad_norm: 0.999999129187209, iteration: 431477
loss: 1.0002648830413818,grad_norm: 0.8556943723327816, iteration: 431478
loss: 0.979531466960907,grad_norm: 0.7218993793776742, iteration: 431479
loss: 1.016955018043518,grad_norm: 0.8852695555302, iteration: 431480
loss: 1.002378225326538,grad_norm: 0.8257200801538903, iteration: 431481
loss: 1.0377963781356812,grad_norm: 0.9044984462794846, iteration: 431482
loss: 1.0315202474594116,grad_norm: 0.7754627243779486, iteration: 431483
loss: 1.0961767435073853,grad_norm: 0.9999995182264451, iteration: 431484
loss: 1.0324102640151978,grad_norm: 0.8886821873740259, iteration: 431485
loss: 0.9799227118492126,grad_norm: 0.7057643453666598, iteration: 431486
loss: 0.9757423400878906,grad_norm: 0.7361688621677311, iteration: 431487
loss: 1.0972706079483032,grad_norm: 0.9151072100698167, iteration: 431488
loss: 1.044951319694519,grad_norm: 0.720957923825167, iteration: 431489
loss: 1.0013842582702637,grad_norm: 0.8061469120108231, iteration: 431490
loss: 0.9803741574287415,grad_norm: 0.765752517668729, iteration: 431491
loss: 0.982132613658905,grad_norm: 0.8939243011625483, iteration: 431492
loss: 1.0176211595535278,grad_norm: 0.7910592916368905, iteration: 431493
loss: 1.0363584756851196,grad_norm: 0.7270069320367221, iteration: 431494
loss: 1.038859248161316,grad_norm: 0.9999995148097917, iteration: 431495
loss: 1.0016002655029297,grad_norm: 0.7729739036373467, iteration: 431496
loss: 0.9937617778778076,grad_norm: 0.8253906288965047, iteration: 431497
loss: 0.9974204301834106,grad_norm: 0.7366348840165141, iteration: 431498
loss: 0.9748929738998413,grad_norm: 0.7497689440265456, iteration: 431499
loss: 0.9880005717277527,grad_norm: 0.9813396429595659, iteration: 431500
loss: 0.985611617565155,grad_norm: 0.8327814711277428, iteration: 431501
loss: 1.0207104682922363,grad_norm: 0.715631641335826, iteration: 431502
loss: 1.1060177087783813,grad_norm: 0.9999991997086104, iteration: 431503
loss: 1.0079647302627563,grad_norm: 0.8253929053694283, iteration: 431504
loss: 1.0007202625274658,grad_norm: 0.7610450280742087, iteration: 431505
loss: 0.9990931153297424,grad_norm: 0.6403345178975869, iteration: 431506
loss: 1.0189071893692017,grad_norm: 0.7691087776933996, iteration: 431507
loss: 1.0110340118408203,grad_norm: 0.8077500483047745, iteration: 431508
loss: 1.0293872356414795,grad_norm: 0.9999991984806328, iteration: 431509
loss: 1.060772180557251,grad_norm: 0.9999999921758757, iteration: 431510
loss: 0.9898362159729004,grad_norm: 0.8243383165469895, iteration: 431511
loss: 0.9831255674362183,grad_norm: 0.820444400924483, iteration: 431512
loss: 1.0275239944458008,grad_norm: 0.6564870895379317, iteration: 431513
loss: 1.0612807273864746,grad_norm: 0.7518968295165197, iteration: 431514
loss: 1.0199027061462402,grad_norm: 0.7289873011260827, iteration: 431515
loss: 1.0669794082641602,grad_norm: 0.9999991379949211, iteration: 431516
loss: 1.001022219657898,grad_norm: 0.8056123655505741, iteration: 431517
loss: 0.9838051199913025,grad_norm: 0.8377744159850231, iteration: 431518
loss: 1.0040762424468994,grad_norm: 0.7944876003015245, iteration: 431519
loss: 0.9981334805488586,grad_norm: 0.8174687069379974, iteration: 431520
loss: 1.0380058288574219,grad_norm: 0.9067782607635312, iteration: 431521
loss: 0.9874128699302673,grad_norm: 0.6618051111401597, iteration: 431522
loss: 0.9870417714118958,grad_norm: 0.7847976120841279, iteration: 431523
loss: 1.0361266136169434,grad_norm: 0.8656899503175278, iteration: 431524
loss: 1.0703988075256348,grad_norm: 0.8130181118189359, iteration: 431525
loss: 1.068841576576233,grad_norm: 0.9999991574028159, iteration: 431526
loss: 0.9662925601005554,grad_norm: 0.7841252585760149, iteration: 431527
loss: 1.0086045265197754,grad_norm: 0.7807112561871903, iteration: 431528
loss: 1.0237932205200195,grad_norm: 0.8159916537491609, iteration: 431529
loss: 0.9958188533782959,grad_norm: 0.780750560604541, iteration: 431530
loss: 0.9386418461799622,grad_norm: 0.8552709018090184, iteration: 431531
loss: 0.9974285364151001,grad_norm: 0.9427623778747998, iteration: 431532
loss: 1.0327935218811035,grad_norm: 0.7139198502598622, iteration: 431533
loss: 1.0079398155212402,grad_norm: 0.7851528575745673, iteration: 431534
loss: 1.0118991136550903,grad_norm: 0.7933029411735406, iteration: 431535
loss: 0.9821058511734009,grad_norm: 0.9184256896788034, iteration: 431536
loss: 0.9972425699234009,grad_norm: 0.7165097447980795, iteration: 431537
loss: 0.9898577332496643,grad_norm: 0.7991446546780614, iteration: 431538
loss: 1.0136187076568604,grad_norm: 0.7830381632048697, iteration: 431539
loss: 1.0727193355560303,grad_norm: 0.9999998634688946, iteration: 431540
loss: 1.0340107679367065,grad_norm: 0.797399466937944, iteration: 431541
loss: 1.0022997856140137,grad_norm: 0.953131130924897, iteration: 431542
loss: 1.0030996799468994,grad_norm: 0.6632348546312322, iteration: 431543
loss: 1.013709545135498,grad_norm: 0.803804767814079, iteration: 431544
loss: 1.000407099723816,grad_norm: 0.7879677182916908, iteration: 431545
loss: 0.9936953186988831,grad_norm: 0.7874392739385715, iteration: 431546
loss: 0.9981852173805237,grad_norm: 0.8844851400260132, iteration: 431547
loss: 1.0031630992889404,grad_norm: 0.7863625586821044, iteration: 431548
loss: 1.0003788471221924,grad_norm: 0.7016699074394184, iteration: 431549
loss: 1.0006587505340576,grad_norm: 0.6537541132349054, iteration: 431550
loss: 0.9927813410758972,grad_norm: 0.9928018386364995, iteration: 431551
loss: 0.9713008999824524,grad_norm: 0.8393343002418943, iteration: 431552
loss: 0.9897653460502625,grad_norm: 0.6680472357807652, iteration: 431553
loss: 1.0020509958267212,grad_norm: 0.9999991617324462, iteration: 431554
loss: 1.0538063049316406,grad_norm: 0.999999817849427, iteration: 431555
loss: 0.9640841484069824,grad_norm: 0.8543743988909034, iteration: 431556
loss: 0.9935983419418335,grad_norm: 0.7095550918521973, iteration: 431557
loss: 1.0120363235473633,grad_norm: 0.8357696118297147, iteration: 431558
loss: 0.9943121671676636,grad_norm: 0.6828015059746794, iteration: 431559
loss: 0.9784551858901978,grad_norm: 0.6722058432160181, iteration: 431560
loss: 1.0303152799606323,grad_norm: 0.7057891077626245, iteration: 431561
loss: 0.9828458428382874,grad_norm: 0.7954177879498601, iteration: 431562
loss: 1.023024320602417,grad_norm: 0.7718255018094672, iteration: 431563
loss: 1.0341209173202515,grad_norm: 0.7826630855340247, iteration: 431564
loss: 1.0053600072860718,grad_norm: 0.7221172066790906, iteration: 431565
loss: 0.9947172999382019,grad_norm: 0.8652087507025876, iteration: 431566
loss: 1.017653465270996,grad_norm: 0.9782916649963517, iteration: 431567
loss: 1.0161857604980469,grad_norm: 0.6836006748527731, iteration: 431568
loss: 0.9992530345916748,grad_norm: 0.6932981983058362, iteration: 431569
loss: 0.9890615344047546,grad_norm: 0.8997766713800283, iteration: 431570
loss: 0.9926937222480774,grad_norm: 0.8245927686538271, iteration: 431571
loss: 1.034886360168457,grad_norm: 0.819116326910681, iteration: 431572
loss: 0.9937077760696411,grad_norm: 0.9999998571918918, iteration: 431573
loss: 0.9710363149642944,grad_norm: 0.9999997293274088, iteration: 431574
loss: 0.9919230341911316,grad_norm: 0.7280650757709234, iteration: 431575
loss: 0.9997494220733643,grad_norm: 0.7426361789789085, iteration: 431576
loss: 1.0385327339172363,grad_norm: 0.8628049950597048, iteration: 431577
loss: 1.013047456741333,grad_norm: 0.883347910615024, iteration: 431578
loss: 1.0029717683792114,grad_norm: 0.7586419308178736, iteration: 431579
loss: 1.0219446420669556,grad_norm: 0.630611557377651, iteration: 431580
loss: 1.0128767490386963,grad_norm: 0.8634227134333199, iteration: 431581
loss: 1.0111136436462402,grad_norm: 1.0000000256847614, iteration: 431582
loss: 1.0751112699508667,grad_norm: 0.999999089173625, iteration: 431583
loss: 0.9622222185134888,grad_norm: 0.7314842466595002, iteration: 431584
loss: 0.9942218661308289,grad_norm: 0.6968942912583364, iteration: 431585
loss: 1.0048353672027588,grad_norm: 0.9064149085781597, iteration: 431586
loss: 1.0078685283660889,grad_norm: 0.8616292256831529, iteration: 431587
loss: 1.0359644889831543,grad_norm: 0.9488248359703433, iteration: 431588
loss: 1.000903844833374,grad_norm: 0.8337794294095809, iteration: 431589
loss: 0.9797492027282715,grad_norm: 0.7060033007745894, iteration: 431590
loss: 1.0082635879516602,grad_norm: 0.7363764629823966, iteration: 431591
loss: 1.0076922178268433,grad_norm: 0.6723826925837113, iteration: 431592
loss: 1.050091028213501,grad_norm: 0.9999997334580897, iteration: 431593
loss: 1.1301454305648804,grad_norm: 0.9999997931179018, iteration: 431594
loss: 1.0696210861206055,grad_norm: 0.7599789567079869, iteration: 431595
loss: 0.9896852374076843,grad_norm: 0.7055214095132623, iteration: 431596
loss: 0.9558689594268799,grad_norm: 0.790335056984596, iteration: 431597
loss: 1.095467448234558,grad_norm: 0.9999999760889795, iteration: 431598
loss: 0.9750845432281494,grad_norm: 0.8157885498323784, iteration: 431599
loss: 0.9954221844673157,grad_norm: 0.7123457712529232, iteration: 431600
loss: 0.9651095271110535,grad_norm: 0.8230385615107982, iteration: 431601
loss: 0.9926677346229553,grad_norm: 0.8818906844792417, iteration: 431602
loss: 1.074970006942749,grad_norm: 0.9999991709592084, iteration: 431603
loss: 1.0191389322280884,grad_norm: 0.7671135678905259, iteration: 431604
loss: 1.0264242887496948,grad_norm: 0.8225441622526506, iteration: 431605
loss: 1.0033771991729736,grad_norm: 0.8882421230469839, iteration: 431606
loss: 1.027880311012268,grad_norm: 0.9999999911775381, iteration: 431607
loss: 1.0038399696350098,grad_norm: 0.804997058796712, iteration: 431608
loss: 1.0041289329528809,grad_norm: 0.8554877967821001, iteration: 431609
loss: 1.0068104267120361,grad_norm: 0.7710279146914913, iteration: 431610
loss: 1.0098439455032349,grad_norm: 0.6883105514800294, iteration: 431611
loss: 1.011979579925537,grad_norm: 0.743991559534307, iteration: 431612
loss: 0.986423909664154,grad_norm: 0.6702824845405603, iteration: 431613
loss: 0.9917459487915039,grad_norm: 0.6856402307929209, iteration: 431614
loss: 0.9984047412872314,grad_norm: 0.8839044167331008, iteration: 431615
loss: 1.0409973859786987,grad_norm: 0.9999997211319237, iteration: 431616
loss: 0.972087562084198,grad_norm: 0.8221920323484915, iteration: 431617
loss: 0.9734891057014465,grad_norm: 0.7941849021177348, iteration: 431618
loss: 0.9855392575263977,grad_norm: 0.9999990134315506, iteration: 431619
loss: 1.0117980241775513,grad_norm: 0.6521587084277787, iteration: 431620
loss: 1.033233880996704,grad_norm: 0.7311024942515708, iteration: 431621
loss: 1.0588115453720093,grad_norm: 0.9999990796329273, iteration: 431622
loss: 0.9657864570617676,grad_norm: 0.8454889250335053, iteration: 431623
loss: 0.9883362650871277,grad_norm: 0.915681754953853, iteration: 431624
loss: 1.0530515909194946,grad_norm: 0.9225942696563263, iteration: 431625
loss: 1.0018492937088013,grad_norm: 0.7526881622245354, iteration: 431626
loss: 1.0138635635375977,grad_norm: 0.7933561395903933, iteration: 431627
loss: 0.9741616249084473,grad_norm: 0.7848910345768939, iteration: 431628
loss: 1.0792739391326904,grad_norm: 0.9999991202758906, iteration: 431629
loss: 0.9907215237617493,grad_norm: 0.9714971771057911, iteration: 431630
loss: 0.9730759263038635,grad_norm: 0.7284126112464226, iteration: 431631
loss: 1.048126220703125,grad_norm: 0.818395713380985, iteration: 431632
loss: 1.0263440608978271,grad_norm: 0.891690040888631, iteration: 431633
loss: 1.0082606077194214,grad_norm: 0.9999996811468135, iteration: 431634
loss: 1.0039225816726685,grad_norm: 0.8479624388413515, iteration: 431635
loss: 1.0296615362167358,grad_norm: 0.7630930409235804, iteration: 431636
loss: 1.0883806943893433,grad_norm: 0.9999993791568844, iteration: 431637
loss: 0.9997751712799072,grad_norm: 0.8483929582645968, iteration: 431638
loss: 1.0472389459609985,grad_norm: 0.8400952189444577, iteration: 431639
loss: 1.0012990236282349,grad_norm: 0.7317057399781036, iteration: 431640
loss: 0.967457115650177,grad_norm: 0.8918087335629737, iteration: 431641
loss: 0.9950727224349976,grad_norm: 0.8121286049058608, iteration: 431642
loss: 0.9846063256263733,grad_norm: 0.9399724186068139, iteration: 431643
loss: 1.0069864988327026,grad_norm: 0.8616679399493052, iteration: 431644
loss: 0.9630318284034729,grad_norm: 0.7542537238298836, iteration: 431645
loss: 1.0203793048858643,grad_norm: 0.7759537011193617, iteration: 431646
loss: 0.9868990182876587,grad_norm: 0.8432841488847937, iteration: 431647
loss: 1.020670771598816,grad_norm: 0.8672645466304546, iteration: 431648
loss: 0.9833849668502808,grad_norm: 0.7735552545471515, iteration: 431649
loss: 0.9876280426979065,grad_norm: 0.8268382690400865, iteration: 431650
loss: 0.965078592300415,grad_norm: 0.7636943706766879, iteration: 431651
loss: 0.992209792137146,grad_norm: 0.7150679241146602, iteration: 431652
loss: 1.0784021615982056,grad_norm: 1.0000000202357984, iteration: 431653
loss: 0.9843094348907471,grad_norm: 0.7914190723142419, iteration: 431654
loss: 0.9998584985733032,grad_norm: 0.8392239340177501, iteration: 431655
loss: 1.003701090812683,grad_norm: 0.729960536195914, iteration: 431656
loss: 0.9820476770401001,grad_norm: 0.7580173912988414, iteration: 431657
loss: 1.0007967948913574,grad_norm: 0.8647663939548267, iteration: 431658
loss: 0.9420769214630127,grad_norm: 0.7630511512060716, iteration: 431659
loss: 1.0204695463180542,grad_norm: 0.7285162262973074, iteration: 431660
loss: 0.9864323139190674,grad_norm: 0.9999992374317866, iteration: 431661
loss: 1.039657473564148,grad_norm: 0.7477799070150924, iteration: 431662
loss: 1.0583058595657349,grad_norm: 0.7786319986909337, iteration: 431663
loss: 0.9531769156455994,grad_norm: 0.7511082186074756, iteration: 431664
loss: 0.9566614627838135,grad_norm: 0.6642240579014881, iteration: 431665
loss: 0.9740051627159119,grad_norm: 0.7314840983509519, iteration: 431666
loss: 0.9988387823104858,grad_norm: 0.6662666903996105, iteration: 431667
loss: 1.0372496843338013,grad_norm: 1.000000017902796, iteration: 431668
loss: 1.0000454187393188,grad_norm: 0.8721511039043225, iteration: 431669
loss: 0.9877812266349792,grad_norm: 0.7906017679439618, iteration: 431670
loss: 1.2533303499221802,grad_norm: 1.000000026420912, iteration: 431671
loss: 0.9966134428977966,grad_norm: 0.999999567454649, iteration: 431672
loss: 1.0829771757125854,grad_norm: 0.9999995702865091, iteration: 431673
loss: 1.0703703165054321,grad_norm: 0.8566892982621263, iteration: 431674
loss: 1.039718747138977,grad_norm: 0.9999990642370501, iteration: 431675
loss: 1.097191572189331,grad_norm: 0.99999948005861, iteration: 431676
loss: 1.0465266704559326,grad_norm: 0.9518390657145135, iteration: 431677
loss: 1.0393964052200317,grad_norm: 0.8407006394297437, iteration: 431678
loss: 0.9973834156990051,grad_norm: 0.9999999389616759, iteration: 431679
loss: 0.98822021484375,grad_norm: 0.8251209280012504, iteration: 431680
loss: 0.9887043237686157,grad_norm: 0.7749113001899401, iteration: 431681
loss: 1.0407755374908447,grad_norm: 0.8308724910509301, iteration: 431682
loss: 1.0639704465866089,grad_norm: 0.789659042467088, iteration: 431683
loss: 1.02095365524292,grad_norm: 0.7271888762756639, iteration: 431684
loss: 0.9977015256881714,grad_norm: 0.7706802688273829, iteration: 431685
loss: 1.021873116493225,grad_norm: 0.803273151458503, iteration: 431686
loss: 1.0135289430618286,grad_norm: 0.8190817870963913, iteration: 431687
loss: 1.017683744430542,grad_norm: 0.8646437503713903, iteration: 431688
loss: 1.0654163360595703,grad_norm: 0.9999991453255458, iteration: 431689
loss: 0.9999417066574097,grad_norm: 0.6344370498903046, iteration: 431690
loss: 0.9759868383407593,grad_norm: 0.7818496763882076, iteration: 431691
loss: 1.0336254835128784,grad_norm: 0.8242786995601958, iteration: 431692
loss: 0.9841252565383911,grad_norm: 0.8042677990449316, iteration: 431693
loss: 1.023034930229187,grad_norm: 0.9999994113552472, iteration: 431694
loss: 1.014952540397644,grad_norm: 0.7819375448507735, iteration: 431695
loss: 1.0243821144104004,grad_norm: 0.8486726027466032, iteration: 431696
loss: 0.985046923160553,grad_norm: 0.6028929105192629, iteration: 431697
loss: 1.0005757808685303,grad_norm: 0.8748525882342563, iteration: 431698
loss: 1.0205090045928955,grad_norm: 0.7944798494826546, iteration: 431699
loss: 1.0162525177001953,grad_norm: 0.8570556886306399, iteration: 431700
loss: 1.0125678777694702,grad_norm: 0.8174548788271749, iteration: 431701
loss: 1.0025532245635986,grad_norm: 0.9999991136257015, iteration: 431702
loss: 1.0318461656570435,grad_norm: 0.9126940434322045, iteration: 431703
loss: 0.991479218006134,grad_norm: 0.9999993391016594, iteration: 431704
loss: 0.9874464273452759,grad_norm: 0.8482479036508931, iteration: 431705
loss: 1.022573471069336,grad_norm: 0.9062338731332135, iteration: 431706
loss: 0.9748760461807251,grad_norm: 0.738388907061457, iteration: 431707
loss: 1.0221612453460693,grad_norm: 0.6308385221470537, iteration: 431708
loss: 1.0506157875061035,grad_norm: 0.8262468647857129, iteration: 431709
loss: 1.0922454595565796,grad_norm: 0.8941787358076672, iteration: 431710
loss: 1.051755666732788,grad_norm: 0.8452662291916698, iteration: 431711
loss: 0.9547339677810669,grad_norm: 0.8097425804288263, iteration: 431712
loss: 0.9855338931083679,grad_norm: 0.7998291155693488, iteration: 431713
loss: 1.0169607400894165,grad_norm: 0.7910706172164276, iteration: 431714
loss: 1.0035834312438965,grad_norm: 0.8282698951647721, iteration: 431715
loss: 1.080137014389038,grad_norm: 0.9999995727749907, iteration: 431716
loss: 1.0060062408447266,grad_norm: 0.712046097096796, iteration: 431717
loss: 0.9871490597724915,grad_norm: 0.6930956068831665, iteration: 431718
loss: 1.0340523719787598,grad_norm: 0.8908298891603779, iteration: 431719
loss: 1.0048792362213135,grad_norm: 0.6339241845980658, iteration: 431720
loss: 1.158523678779602,grad_norm: 0.9999990932926133, iteration: 431721
loss: 1.0270462036132812,grad_norm: 0.8858425034692803, iteration: 431722
loss: 1.0250049829483032,grad_norm: 0.8235595232874539, iteration: 431723
loss: 0.9913976788520813,grad_norm: 0.8180893576655482, iteration: 431724
loss: 1.024490237236023,grad_norm: 0.6169733801990586, iteration: 431725
loss: 1.016465187072754,grad_norm: 0.9999996103770228, iteration: 431726
loss: 0.9825217723846436,grad_norm: 0.9011630063856486, iteration: 431727
loss: 1.0007507801055908,grad_norm: 0.7875347083009744, iteration: 431728
loss: 0.9968275427818298,grad_norm: 0.9103919314660021, iteration: 431729
loss: 0.9920496940612793,grad_norm: 0.8834843303839613, iteration: 431730
loss: 1.0891447067260742,grad_norm: 0.8429536962659632, iteration: 431731
loss: 0.9837364554405212,grad_norm: 0.9999990390304997, iteration: 431732
loss: 1.033837914466858,grad_norm: 0.9238935965444707, iteration: 431733
loss: 1.0090574026107788,grad_norm: 0.6560052323821078, iteration: 431734
loss: 1.0089447498321533,grad_norm: 0.8490003419194151, iteration: 431735
loss: 1.0159893035888672,grad_norm: 0.7250703366607715, iteration: 431736
loss: 1.0026577711105347,grad_norm: 0.999999169056048, iteration: 431737
loss: 0.9590457081794739,grad_norm: 0.6741687287101162, iteration: 431738
loss: 1.0344666242599487,grad_norm: 0.7473965604263902, iteration: 431739
loss: 1.0436817407608032,grad_norm: 0.7435503934694725, iteration: 431740
loss: 1.010190725326538,grad_norm: 0.707742964599659, iteration: 431741
loss: 0.9994078874588013,grad_norm: 0.9999993890150086, iteration: 431742
loss: 1.022279977798462,grad_norm: 0.9832980356971683, iteration: 431743
loss: 0.9950774908065796,grad_norm: 0.8471188283385492, iteration: 431744
loss: 1.0496107339859009,grad_norm: 0.9458607615354637, iteration: 431745
loss: 0.9735486507415771,grad_norm: 0.8820666440999825, iteration: 431746
loss: 1.0159814357757568,grad_norm: 0.702110067139421, iteration: 431747
loss: 0.9973372220993042,grad_norm: 0.8257920266600728, iteration: 431748
loss: 1.0161200761795044,grad_norm: 0.8948779174843058, iteration: 431749
loss: 1.2121556997299194,grad_norm: 0.9999999486552292, iteration: 431750
loss: 1.0201865434646606,grad_norm: 0.6959965884216335, iteration: 431751
loss: 1.024612307548523,grad_norm: 0.9999993181259973, iteration: 431752
loss: 1.0442875623703003,grad_norm: 0.9999996905924619, iteration: 431753
loss: 0.9642805457115173,grad_norm: 0.9999996205972386, iteration: 431754
loss: 0.9897186756134033,grad_norm: 0.770533271699335, iteration: 431755
loss: 1.016416311264038,grad_norm: 0.9553968202110777, iteration: 431756
loss: 1.006435751914978,grad_norm: 0.7774426054829776, iteration: 431757
loss: 0.9892875552177429,grad_norm: 0.7955302732306723, iteration: 431758
loss: 1.0577996969223022,grad_norm: 0.6602928772011691, iteration: 431759
loss: 1.143019676208496,grad_norm: 0.9450933092872649, iteration: 431760
loss: 1.044047474861145,grad_norm: 0.999999321017799, iteration: 431761
loss: 1.042303442955017,grad_norm: 0.7668314412246912, iteration: 431762
loss: 1.0361263751983643,grad_norm: 0.9583025888715765, iteration: 431763
loss: 1.0357378721237183,grad_norm: 0.9741871859633651, iteration: 431764
loss: 1.0229532718658447,grad_norm: 0.9085025127356405, iteration: 431765
loss: 1.0325818061828613,grad_norm: 0.7899579977682009, iteration: 431766
loss: 1.0461716651916504,grad_norm: 0.8150479745080154, iteration: 431767
loss: 1.0106180906295776,grad_norm: 0.8454839641330467, iteration: 431768
loss: 0.9956013560295105,grad_norm: 0.6916672160604763, iteration: 431769
loss: 0.9760339856147766,grad_norm: 0.7235857295887028, iteration: 431770
loss: 1.0463658571243286,grad_norm: 0.9999995708069618, iteration: 431771
loss: 1.0275312662124634,grad_norm: 0.8847857193043818, iteration: 431772
loss: 1.0030306577682495,grad_norm: 0.7607716710838299, iteration: 431773
loss: 1.0079399347305298,grad_norm: 0.9999994945578234, iteration: 431774
loss: 1.002192735671997,grad_norm: 0.8374980017871603, iteration: 431775
loss: 0.9866847991943359,grad_norm: 0.8635358806313533, iteration: 431776
loss: 1.0387991666793823,grad_norm: 0.7078840489972833, iteration: 431777
loss: 0.9789873957633972,grad_norm: 0.6104331940794783, iteration: 431778
loss: 1.0185073614120483,grad_norm: 0.7363940369337585, iteration: 431779
loss: 0.975664496421814,grad_norm: 0.7809983985164258, iteration: 431780
loss: 1.0061864852905273,grad_norm: 0.8230637081021427, iteration: 431781
loss: 1.008652687072754,grad_norm: 0.6997398729421027, iteration: 431782
loss: 1.0441519021987915,grad_norm: 0.8121649462628299, iteration: 431783
loss: 1.1175413131713867,grad_norm: 0.9792355441757084, iteration: 431784
loss: 0.9863129258155823,grad_norm: 0.7652610409307927, iteration: 431785
loss: 1.0067909955978394,grad_norm: 0.8090335805895736, iteration: 431786
loss: 1.020401954650879,grad_norm: 0.7498341882817406, iteration: 431787
loss: 0.9941127896308899,grad_norm: 0.9999993317135293, iteration: 431788
loss: 1.0572307109832764,grad_norm: 0.8485659113653038, iteration: 431789
loss: 1.0290288925170898,grad_norm: 0.9999999349608414, iteration: 431790
loss: 1.02166748046875,grad_norm: 0.9218231427477919, iteration: 431791
loss: 0.9924615621566772,grad_norm: 0.8467763504859288, iteration: 431792
loss: 0.9404686689376831,grad_norm: 0.7133408742331719, iteration: 431793
loss: 1.0633232593536377,grad_norm: 0.9604171234159543, iteration: 431794
loss: 0.999207615852356,grad_norm: 0.9999993096666694, iteration: 431795
loss: 1.038418173789978,grad_norm: 0.6632511811356555, iteration: 431796
loss: 0.9865034818649292,grad_norm: 0.8453751912720414, iteration: 431797
loss: 1.0021876096725464,grad_norm: 0.9588627098398448, iteration: 431798
loss: 0.9908743500709534,grad_norm: 0.8415396838661314, iteration: 431799
loss: 1.0264322757720947,grad_norm: 0.7656021869219134, iteration: 431800
loss: 0.988183319568634,grad_norm: 0.7504538413532579, iteration: 431801
loss: 0.9991491436958313,grad_norm: 0.8765156640049918, iteration: 431802
loss: 1.0103834867477417,grad_norm: 0.816521277162962, iteration: 431803
loss: 1.2285126447677612,grad_norm: 0.9999997317521362, iteration: 431804
loss: 0.9992280006408691,grad_norm: 0.9999991368079043, iteration: 431805
loss: 0.9710532426834106,grad_norm: 0.746668841325058, iteration: 431806
loss: 0.9947472810745239,grad_norm: 0.7575033971501118, iteration: 431807
loss: 1.127601981163025,grad_norm: 0.9999992135382281, iteration: 431808
loss: 1.017330527305603,grad_norm: 0.999999116864262, iteration: 431809
loss: 1.0412236452102661,grad_norm: 0.9999993571270425, iteration: 431810
loss: 0.9976750016212463,grad_norm: 0.7283240999226744, iteration: 431811
loss: 0.9694196581840515,grad_norm: 0.7488864240335589, iteration: 431812
loss: 0.9852573275566101,grad_norm: 0.9999997890284331, iteration: 431813
loss: 0.9821575880050659,grad_norm: 0.9494611404295017, iteration: 431814
loss: 0.9734017252922058,grad_norm: 0.7724543581188428, iteration: 431815
loss: 1.1148247718811035,grad_norm: 0.9356083485643368, iteration: 431816
loss: 0.9789272546768188,grad_norm: 0.7690019874914494, iteration: 431817
loss: 0.988849937915802,grad_norm: 0.7001186046387814, iteration: 431818
loss: 1.0747392177581787,grad_norm: 0.9757648218645145, iteration: 431819
loss: 1.093721866607666,grad_norm: 0.9999998243737934, iteration: 431820
loss: 0.9779022932052612,grad_norm: 0.9999996709616664, iteration: 431821
loss: 1.0242012739181519,grad_norm: 0.7950004810686051, iteration: 431822
loss: 1.0043601989746094,grad_norm: 0.8598464760346409, iteration: 431823
loss: 1.00434148311615,grad_norm: 0.7496879408467251, iteration: 431824
loss: 1.0009040832519531,grad_norm: 0.7888266872195265, iteration: 431825
loss: 1.0580718517303467,grad_norm: 0.9999995165434163, iteration: 431826
loss: 0.9882925748825073,grad_norm: 0.7688290961357778, iteration: 431827
loss: 1.0063234567642212,grad_norm: 0.7625112208272078, iteration: 431828
loss: 1.0287917852401733,grad_norm: 0.9999994744247765, iteration: 431829
loss: 0.9821798205375671,grad_norm: 0.8050325977489962, iteration: 431830
loss: 1.012579083442688,grad_norm: 0.6957355879630543, iteration: 431831
loss: 1.0487892627716064,grad_norm: 0.6756349838070072, iteration: 431832
loss: 1.0080081224441528,grad_norm: 0.7917257323214877, iteration: 431833
loss: 0.994968056678772,grad_norm: 0.7434509796487465, iteration: 431834
loss: 1.0176571607589722,grad_norm: 0.6801333841646602, iteration: 431835
loss: 1.0984086990356445,grad_norm: 0.9999991825810984, iteration: 431836
loss: 1.0119197368621826,grad_norm: 0.730191653288373, iteration: 431837
loss: 1.0311400890350342,grad_norm: 0.7673133105421992, iteration: 431838
loss: 1.056771993637085,grad_norm: 0.7865408618464514, iteration: 431839
loss: 1.0516457557678223,grad_norm: 0.7477750111060177, iteration: 431840
loss: 0.9657447934150696,grad_norm: 0.9999998562101187, iteration: 431841
loss: 1.030908226966858,grad_norm: 0.8529913295937654, iteration: 431842
loss: 1.021950602531433,grad_norm: 0.7529384209813696, iteration: 431843
loss: 0.9943414926528931,grad_norm: 0.9137986865856224, iteration: 431844
loss: 0.9923413991928101,grad_norm: 0.8859486690962228, iteration: 431845
loss: 1.017789363861084,grad_norm: 0.8156046927172004, iteration: 431846
loss: 1.0133804082870483,grad_norm: 0.6432130173256108, iteration: 431847
loss: 1.1606688499450684,grad_norm: 0.9999997581372989, iteration: 431848
loss: 1.0017582178115845,grad_norm: 0.8534124310487967, iteration: 431849
loss: 0.9763544201850891,grad_norm: 0.9999999032209002, iteration: 431850
loss: 1.0073107481002808,grad_norm: 0.6512899527607655, iteration: 431851
loss: 1.0769394636154175,grad_norm: 0.9999994912053897, iteration: 431852
loss: 0.9789226055145264,grad_norm: 0.6585204780805721, iteration: 431853
loss: 1.039660096168518,grad_norm: 0.7866793944969315, iteration: 431854
loss: 1.0470247268676758,grad_norm: 0.9999991071562786, iteration: 431855
loss: 0.9831411242485046,grad_norm: 0.8680052518454993, iteration: 431856
loss: 1.0095834732055664,grad_norm: 0.7944459015045998, iteration: 431857
loss: 1.0274076461791992,grad_norm: 0.8530720577115374, iteration: 431858
loss: 1.0213714838027954,grad_norm: 0.8260119773662044, iteration: 431859
loss: 1.1143615245819092,grad_norm: 0.8667463992530641, iteration: 431860
loss: 0.9945666193962097,grad_norm: 0.7203283032646335, iteration: 431861
loss: 0.9935640096664429,grad_norm: 0.7259366687809411, iteration: 431862
loss: 0.9875454306602478,grad_norm: 0.9128320012345367, iteration: 431863
loss: 1.0537337064743042,grad_norm: 0.9999990961468185, iteration: 431864
loss: 0.9994349479675293,grad_norm: 0.8311892771351642, iteration: 431865
loss: 0.9907200336456299,grad_norm: 0.7914091618788289, iteration: 431866
loss: 0.9646376967430115,grad_norm: 0.7395276388595963, iteration: 431867
loss: 1.0988106727600098,grad_norm: 0.9999991283261301, iteration: 431868
loss: 0.9683094024658203,grad_norm: 0.7347944847510207, iteration: 431869
loss: 0.9701381325721741,grad_norm: 0.7263223982622071, iteration: 431870
loss: 1.0528137683868408,grad_norm: 0.9999993505871164, iteration: 431871
loss: 1.1006697416305542,grad_norm: 0.9999998453313499, iteration: 431872
loss: 1.0034117698669434,grad_norm: 0.7293894146387615, iteration: 431873
loss: 1.0186365842819214,grad_norm: 0.9999991097720271, iteration: 431874
loss: 1.0282617807388306,grad_norm: 0.6819547680961554, iteration: 431875
loss: 1.0194193124771118,grad_norm: 0.8050272892982924, iteration: 431876
loss: 1.000910997390747,grad_norm: 0.8229429761968875, iteration: 431877
loss: 1.0602420568466187,grad_norm: 0.9999997212188382, iteration: 431878
loss: 0.9839001893997192,grad_norm: 0.867939551241495, iteration: 431879
loss: 0.9746032953262329,grad_norm: 0.9999995578410675, iteration: 431880
loss: 1.0251414775848389,grad_norm: 0.9480641311776555, iteration: 431881
loss: 1.0383715629577637,grad_norm: 0.999999746856237, iteration: 431882
loss: 1.1988093852996826,grad_norm: 0.9999994448669887, iteration: 431883
loss: 1.0021216869354248,grad_norm: 0.7739898250505125, iteration: 431884
loss: 0.9963032603263855,grad_norm: 0.7722362703251302, iteration: 431885
loss: 1.0690901279449463,grad_norm: 0.9999991121162358, iteration: 431886
loss: 0.9967000484466553,grad_norm: 0.7660244921813463, iteration: 431887
loss: 1.099700927734375,grad_norm: 1.0000000623607272, iteration: 431888
loss: 0.9810906648635864,grad_norm: 0.8271774210356803, iteration: 431889
loss: 1.0031111240386963,grad_norm: 0.8296036447969884, iteration: 431890
loss: 1.0025311708450317,grad_norm: 0.7723505506983906, iteration: 431891
loss: 0.9644113183021545,grad_norm: 0.7497887406148896, iteration: 431892
loss: 0.9934473037719727,grad_norm: 0.8769867468957769, iteration: 431893
loss: 0.9965044856071472,grad_norm: 0.6628393599515328, iteration: 431894
loss: 0.9850645661354065,grad_norm: 0.7574342718175873, iteration: 431895
loss: 0.9884598851203918,grad_norm: 0.7931233694494234, iteration: 431896
loss: 0.9580128192901611,grad_norm: 0.8525708684089026, iteration: 431897
loss: 0.9495984315872192,grad_norm: 0.8033689231588534, iteration: 431898
loss: 1.0041579008102417,grad_norm: 0.8400367997528674, iteration: 431899
loss: 1.0200002193450928,grad_norm: 0.744631227146142, iteration: 431900
loss: 1.0159622430801392,grad_norm: 0.8885982442606719, iteration: 431901
loss: 1.0179389715194702,grad_norm: 0.7491232219208364, iteration: 431902
loss: 1.0064340829849243,grad_norm: 0.856950091331838, iteration: 431903
loss: 0.9973005056381226,grad_norm: 0.7399561714921613, iteration: 431904
loss: 0.9960165023803711,grad_norm: 0.8007395779837946, iteration: 431905
loss: 1.0370129346847534,grad_norm: 0.8031834064065364, iteration: 431906
loss: 0.9722405672073364,grad_norm: 0.7427726873301498, iteration: 431907
loss: 0.9704387187957764,grad_norm: 0.6665769560902417, iteration: 431908
loss: 1.0016326904296875,grad_norm: 0.99999967452037, iteration: 431909
loss: 0.991197943687439,grad_norm: 0.7411256325649952, iteration: 431910
loss: 0.9886470437049866,grad_norm: 0.7049819874352696, iteration: 431911
loss: 0.9945997595787048,grad_norm: 0.79819603767439, iteration: 431912
loss: 1.0467551946640015,grad_norm: 0.7888465798576231, iteration: 431913
loss: 1.0073490142822266,grad_norm: 0.725442442272873, iteration: 431914
loss: 0.9964858889579773,grad_norm: 0.8043355138608634, iteration: 431915
loss: 1.0077861547470093,grad_norm: 0.9999991187985765, iteration: 431916
loss: 1.0335108041763306,grad_norm: 0.9999994408338461, iteration: 431917
loss: 0.9834776520729065,grad_norm: 0.7728730275976116, iteration: 431918
loss: 0.9566584825515747,grad_norm: 0.9316878403758883, iteration: 431919
loss: 0.972637414932251,grad_norm: 0.8790137670301861, iteration: 431920
loss: 0.9778863191604614,grad_norm: 0.9273028121585983, iteration: 431921
loss: 0.9690186381340027,grad_norm: 0.7009964594976887, iteration: 431922
loss: 1.008251667022705,grad_norm: 0.6997276068874315, iteration: 431923
loss: 1.029416561126709,grad_norm: 0.7856626082233276, iteration: 431924
loss: 1.0752042531967163,grad_norm: 0.8846326490881848, iteration: 431925
loss: 1.0096566677093506,grad_norm: 0.8946312561081656, iteration: 431926
loss: 0.9941772818565369,grad_norm: 0.7112647856402174, iteration: 431927
loss: 1.0037126541137695,grad_norm: 0.9999990873942132, iteration: 431928
loss: 1.0166904926300049,grad_norm: 0.8626855903077975, iteration: 431929
loss: 0.9989027380943298,grad_norm: 0.6412247830842418, iteration: 431930
loss: 1.0187969207763672,grad_norm: 0.838678972606842, iteration: 431931
loss: 0.9972637295722961,grad_norm: 0.6612217442008389, iteration: 431932
loss: 1.0070886611938477,grad_norm: 0.7992247536877252, iteration: 431933
loss: 0.9625608921051025,grad_norm: 0.7413891081955719, iteration: 431934
loss: 0.9829486012458801,grad_norm: 0.7755158239494836, iteration: 431935
loss: 1.0484791994094849,grad_norm: 0.9999993321864434, iteration: 431936
loss: 0.974013090133667,grad_norm: 0.8064052337378925, iteration: 431937
loss: 0.9575854539871216,grad_norm: 0.7342084353990934, iteration: 431938
loss: 1.0090471506118774,grad_norm: 0.7964763670848011, iteration: 431939
loss: 1.017859697341919,grad_norm: 0.9459187817534236, iteration: 431940
loss: 0.983411967754364,grad_norm: 0.9535481459652104, iteration: 431941
loss: 0.9731348752975464,grad_norm: 0.6521800811640883, iteration: 431942
loss: 0.9797127842903137,grad_norm: 0.8244176951789624, iteration: 431943
loss: 1.010737657546997,grad_norm: 0.6452551556510995, iteration: 431944
loss: 0.9544745087623596,grad_norm: 0.9073308362111326, iteration: 431945
loss: 0.9628520011901855,grad_norm: 0.8968390919582712, iteration: 431946
loss: 1.0009115934371948,grad_norm: 0.7242425670883091, iteration: 431947
loss: 1.0242975950241089,grad_norm: 0.6688140708917378, iteration: 431948
loss: 1.0237168073654175,grad_norm: 0.7479087044106071, iteration: 431949
loss: 0.9904561638832092,grad_norm: 0.6942457718587585, iteration: 431950
loss: 1.0205916166305542,grad_norm: 0.8561882812883029, iteration: 431951
loss: 1.0058783292770386,grad_norm: 0.7695475006781232, iteration: 431952
loss: 1.05348539352417,grad_norm: 0.9999995435899507, iteration: 431953
loss: 1.0743764638900757,grad_norm: 0.999999209287495, iteration: 431954
loss: 0.9871046543121338,grad_norm: 0.7194296022288088, iteration: 431955
loss: 1.0464822053909302,grad_norm: 0.9999998042055712, iteration: 431956
loss: 1.0150939226150513,grad_norm: 0.7761854426004577, iteration: 431957
loss: 1.0052675008773804,grad_norm: 0.8453167287877473, iteration: 431958
loss: 1.0101964473724365,grad_norm: 0.7701015336551379, iteration: 431959
loss: 0.9734148979187012,grad_norm: 0.8292709513607809, iteration: 431960
loss: 1.0526126623153687,grad_norm: 0.8386733251667089, iteration: 431961
loss: 1.1211397647857666,grad_norm: 0.9503204978101231, iteration: 431962
loss: 1.007900357246399,grad_norm: 0.7641960819993571, iteration: 431963
loss: 1.0113396644592285,grad_norm: 0.9999991097994656, iteration: 431964
loss: 1.0431920289993286,grad_norm: 0.8864154521233198, iteration: 431965
loss: 0.9489977359771729,grad_norm: 0.7411042438839451, iteration: 431966
loss: 1.0001939535140991,grad_norm: 0.6535776553863222, iteration: 431967
loss: 1.0068023204803467,grad_norm: 0.791601143671116, iteration: 431968
loss: 0.9688140153884888,grad_norm: 0.830230318661704, iteration: 431969
loss: 1.0202492475509644,grad_norm: 0.6924559967040563, iteration: 431970
loss: 0.9940903782844543,grad_norm: 0.8607627539319032, iteration: 431971
loss: 1.0108178853988647,grad_norm: 0.8071057332060525, iteration: 431972
loss: 1.0119643211364746,grad_norm: 0.8673716790664744, iteration: 431973
loss: 1.0258623361587524,grad_norm: 0.9166956427151824, iteration: 431974
loss: 0.9833201169967651,grad_norm: 0.8049455130585794, iteration: 431975
loss: 0.9899098873138428,grad_norm: 0.8857417314655832, iteration: 431976
loss: 1.0139819383621216,grad_norm: 0.8841280436776086, iteration: 431977
loss: 1.0160025358200073,grad_norm: 0.9032337791453084, iteration: 431978
loss: 1.0131757259368896,grad_norm: 0.6991240637194327, iteration: 431979
loss: 1.0222867727279663,grad_norm: 0.6993965404466547, iteration: 431980
loss: 0.9960266351699829,grad_norm: 0.8072171151589155, iteration: 431981
loss: 0.9895232915878296,grad_norm: 0.9176161552825476, iteration: 431982
loss: 0.9532803893089294,grad_norm: 0.7124261675681839, iteration: 431983
loss: 0.993435263633728,grad_norm: 0.70219824199131, iteration: 431984
loss: 0.9934876561164856,grad_norm: 0.8080618811480136, iteration: 431985
loss: 0.9790224432945251,grad_norm: 0.6859369751323373, iteration: 431986
loss: 1.0426496267318726,grad_norm: 0.8404617034232668, iteration: 431987
loss: 0.9937812089920044,grad_norm: 0.7787518624623175, iteration: 431988
loss: 1.0176613330841064,grad_norm: 0.7066840826070048, iteration: 431989
loss: 1.0493546724319458,grad_norm: 0.999999725442895, iteration: 431990
loss: 0.9736797213554382,grad_norm: 0.7168039956732133, iteration: 431991
loss: 0.9966440200805664,grad_norm: 0.9999992079515702, iteration: 431992
loss: 1.0416141748428345,grad_norm: 0.842775318865252, iteration: 431993
loss: 1.0470552444458008,grad_norm: 0.8255508106565259, iteration: 431994
loss: 1.0026545524597168,grad_norm: 0.7399706985936368, iteration: 431995
loss: 1.023329734802246,grad_norm: 0.8067344116651947, iteration: 431996
loss: 1.012628436088562,grad_norm: 0.7559332205250183, iteration: 431997
loss: 1.0608799457550049,grad_norm: 0.9999993423709499, iteration: 431998
loss: 0.9847233295440674,grad_norm: 0.9999992784807124, iteration: 431999
loss: 1.009428858757019,grad_norm: 0.7277599432558436, iteration: 432000
loss: 1.0069735050201416,grad_norm: 0.8583341277704224, iteration: 432001
loss: 0.9697479009628296,grad_norm: 0.9804314806698499, iteration: 432002
loss: 1.005557656288147,grad_norm: 0.7980954279863243, iteration: 432003
loss: 0.9689449071884155,grad_norm: 0.69628604823453, iteration: 432004
loss: 0.9837808609008789,grad_norm: 0.7666867315247122, iteration: 432005
loss: 0.9907575845718384,grad_norm: 0.757000330347605, iteration: 432006
loss: 0.9693959951400757,grad_norm: 0.9068363432033735, iteration: 432007
loss: 1.0353683233261108,grad_norm: 0.864879610744634, iteration: 432008
loss: 0.9931589365005493,grad_norm: 0.9526053669418186, iteration: 432009
loss: 0.9969902038574219,grad_norm: 0.9999992639069547, iteration: 432010
loss: 1.0105537176132202,grad_norm: 0.8498448182147398, iteration: 432011
loss: 1.0120010375976562,grad_norm: 0.9999998745682788, iteration: 432012
loss: 0.9652686715126038,grad_norm: 0.7138680372955893, iteration: 432013
loss: 0.9734985828399658,grad_norm: 0.6862760363457383, iteration: 432014
loss: 1.0094462633132935,grad_norm: 0.8083341125304913, iteration: 432015
loss: 0.9931202530860901,grad_norm: 0.7538550379749528, iteration: 432016
loss: 0.9656295776367188,grad_norm: 0.7564937932314292, iteration: 432017
loss: 0.9786946773529053,grad_norm: 0.9023254310035139, iteration: 432018
loss: 0.9733449220657349,grad_norm: 0.7710846178310731, iteration: 432019
loss: 0.9728745222091675,grad_norm: 0.7227921351404736, iteration: 432020
loss: 0.9527276754379272,grad_norm: 0.8679381942372522, iteration: 432021
loss: 1.0261445045471191,grad_norm: 0.7123836114897363, iteration: 432022
loss: 1.0195250511169434,grad_norm: 0.7855191496616979, iteration: 432023
loss: 0.9917693138122559,grad_norm: 0.7562380884602888, iteration: 432024
loss: 1.005894660949707,grad_norm: 0.8601384749319116, iteration: 432025
loss: 1.0266082286834717,grad_norm: 0.9999995064788234, iteration: 432026
loss: 0.9964484572410583,grad_norm: 0.9388007208310836, iteration: 432027
loss: 0.9889515042304993,grad_norm: 0.8688516943067421, iteration: 432028
loss: 0.9975112080574036,grad_norm: 0.7072317125814922, iteration: 432029
loss: 1.003187656402588,grad_norm: 0.74900576969905, iteration: 432030
loss: 1.0037298202514648,grad_norm: 0.8074433701395239, iteration: 432031
loss: 0.9718345999717712,grad_norm: 0.7911902112307073, iteration: 432032
loss: 0.9884406924247742,grad_norm: 0.8326449622018987, iteration: 432033
loss: 0.9996548295021057,grad_norm: 0.8196665394826701, iteration: 432034
loss: 1.0245683193206787,grad_norm: 0.8685851793731643, iteration: 432035
loss: 0.9860360026359558,grad_norm: 0.8926350807680183, iteration: 432036
loss: 1.0254180431365967,grad_norm: 0.9001269147439046, iteration: 432037
loss: 1.0004984140396118,grad_norm: 0.893342641445207, iteration: 432038
loss: 1.0271028280258179,grad_norm: 0.9999996190615054, iteration: 432039
loss: 0.9997543096542358,grad_norm: 0.6785133404685153, iteration: 432040
loss: 1.0089244842529297,grad_norm: 0.6999078658864744, iteration: 432041
loss: 1.0446985960006714,grad_norm: 0.8275953430808645, iteration: 432042
loss: 1.0222747325897217,grad_norm: 0.8999965341026619, iteration: 432043
loss: 1.0716899633407593,grad_norm: 0.9264983670605511, iteration: 432044
loss: 0.9776453375816345,grad_norm: 0.8011993812585739, iteration: 432045
loss: 0.9576225280761719,grad_norm: 0.7502885873605523, iteration: 432046
loss: 0.9471338987350464,grad_norm: 0.8513442430111695, iteration: 432047
loss: 0.9957819581031799,grad_norm: 0.9999999234994125, iteration: 432048
loss: 1.0126703977584839,grad_norm: 0.7309915421516634, iteration: 432049
loss: 1.003092885017395,grad_norm: 0.7365242753988349, iteration: 432050
loss: 0.985303521156311,grad_norm: 0.7210572271198386, iteration: 432051
loss: 1.004729986190796,grad_norm: 0.859523271549862, iteration: 432052
loss: 1.0106661319732666,grad_norm: 0.7329026618021969, iteration: 432053
loss: 0.9891341328620911,grad_norm: 0.8418107771176893, iteration: 432054
loss: 0.9647167921066284,grad_norm: 0.7675720723348568, iteration: 432055
loss: 0.9855849742889404,grad_norm: 0.7089346854089696, iteration: 432056
loss: 0.9992290735244751,grad_norm: 0.7780966795151074, iteration: 432057
loss: 0.9657627940177917,grad_norm: 0.7905740525080529, iteration: 432058
loss: 0.9721421599388123,grad_norm: 0.7368702919919727, iteration: 432059
loss: 0.9800654053688049,grad_norm: 0.8509173403502199, iteration: 432060
loss: 0.9829382300376892,grad_norm: 0.7218489592504778, iteration: 432061
loss: 0.9876067042350769,grad_norm: 0.7459651740325595, iteration: 432062
loss: 0.9995068311691284,grad_norm: 0.8074515352482501, iteration: 432063
loss: 0.9731171727180481,grad_norm: 0.8685313313781124, iteration: 432064
loss: 0.9925356507301331,grad_norm: 0.9999998635907337, iteration: 432065
loss: 1.0036673545837402,grad_norm: 0.7625985648608574, iteration: 432066
loss: 1.0060635805130005,grad_norm: 0.8545304321766323, iteration: 432067
loss: 1.0274484157562256,grad_norm: 0.6894046842107925, iteration: 432068
loss: 0.9981221556663513,grad_norm: 0.8025617468904416, iteration: 432069
loss: 1.0592068433761597,grad_norm: 0.9469877773055929, iteration: 432070
loss: 0.9904625415802002,grad_norm: 0.6679087107207526, iteration: 432071
loss: 1.0041451454162598,grad_norm: 0.739086303742878, iteration: 432072
loss: 0.9883525371551514,grad_norm: 0.7754326160535135, iteration: 432073
loss: 0.9690566658973694,grad_norm: 0.8167610303754521, iteration: 432074
loss: 0.9836254715919495,grad_norm: 0.7933349590640408, iteration: 432075
loss: 0.996964156627655,grad_norm: 0.8691860050782142, iteration: 432076
loss: 0.9740311503410339,grad_norm: 0.7259288837027151, iteration: 432077
loss: 0.9662096500396729,grad_norm: 0.7041042341056166, iteration: 432078
loss: 1.0491299629211426,grad_norm: 0.9999998876638295, iteration: 432079
loss: 0.9808092713356018,grad_norm: 0.8114296118825671, iteration: 432080
loss: 0.9892892241477966,grad_norm: 0.7646447635813863, iteration: 432081
loss: 1.0018930435180664,grad_norm: 0.7922124834969055, iteration: 432082
loss: 0.9812250733375549,grad_norm: 0.725017242175761, iteration: 432083
loss: 1.0346553325653076,grad_norm: 0.9953292815188405, iteration: 432084
loss: 1.0622073411941528,grad_norm: 0.7395479110306332, iteration: 432085
loss: 0.990473210811615,grad_norm: 0.9999991625939375, iteration: 432086
loss: 1.0065916776657104,grad_norm: 0.7233265925546465, iteration: 432087
loss: 0.9838437438011169,grad_norm: 0.6948108180922481, iteration: 432088
loss: 0.9865541458129883,grad_norm: 0.8080873935733801, iteration: 432089
loss: 1.0207746028900146,grad_norm: 0.7644084521309689, iteration: 432090
loss: 1.0236543416976929,grad_norm: 0.9999998867933199, iteration: 432091
loss: 1.0074087381362915,grad_norm: 0.7042714956338928, iteration: 432092
loss: 1.0225014686584473,grad_norm: 0.8577270832616587, iteration: 432093
loss: 1.018918514251709,grad_norm: 0.8470469363789741, iteration: 432094
loss: 1.029829740524292,grad_norm: 0.8979512451321403, iteration: 432095
loss: 0.9990113973617554,grad_norm: 0.6676166208957628, iteration: 432096
loss: 0.9775972366333008,grad_norm: 0.7806725692229457, iteration: 432097
loss: 1.0151968002319336,grad_norm: 0.7723961994254875, iteration: 432098
loss: 1.0231388807296753,grad_norm: 0.8563053187189933, iteration: 432099
loss: 0.9488376975059509,grad_norm: 0.8508536859602623, iteration: 432100
loss: 1.006748080253601,grad_norm: 0.7853725418870421, iteration: 432101
loss: 0.9925566911697388,grad_norm: 0.9165725281843244, iteration: 432102
loss: 1.047921895980835,grad_norm: 0.6661736416288739, iteration: 432103
loss: 0.9805092215538025,grad_norm: 0.6891597861655587, iteration: 432104
loss: 1.015289545059204,grad_norm: 0.9999999676396556, iteration: 432105
loss: 0.9638155698776245,grad_norm: 0.8665020531163865, iteration: 432106
loss: 1.0147850513458252,grad_norm: 0.8496734446454761, iteration: 432107
loss: 0.9931009411811829,grad_norm: 0.8124387934898135, iteration: 432108
loss: 1.0122958421707153,grad_norm: 0.8461032365569038, iteration: 432109
loss: 0.9691300392150879,grad_norm: 0.7807970627553632, iteration: 432110
loss: 1.0301300287246704,grad_norm: 0.6262979641206595, iteration: 432111
loss: 1.002469778060913,grad_norm: 0.9999989692977084, iteration: 432112
loss: 0.9990271329879761,grad_norm: 0.8859827913378483, iteration: 432113
loss: 0.9923804402351379,grad_norm: 0.7382445446610507, iteration: 432114
loss: 1.0088108777999878,grad_norm: 0.8435028112505425, iteration: 432115
loss: 1.0178148746490479,grad_norm: 0.8055198967147688, iteration: 432116
loss: 0.97902512550354,grad_norm: 0.7156341929048219, iteration: 432117
loss: 1.0263563394546509,grad_norm: 0.7185145359029708, iteration: 432118
loss: 1.0884144306182861,grad_norm: 0.7889675214250775, iteration: 432119
loss: 0.9654905796051025,grad_norm: 0.7396404032079785, iteration: 432120
loss: 0.9861985445022583,grad_norm: 0.8295634045556263, iteration: 432121
loss: 1.009903907775879,grad_norm: 0.9999999577871314, iteration: 432122
loss: 0.987402081489563,grad_norm: 0.9999991609544322, iteration: 432123
loss: 1.0174654722213745,grad_norm: 0.87446693267521, iteration: 432124
loss: 0.989670991897583,grad_norm: 0.9999995798369818, iteration: 432125
loss: 0.9961189031600952,grad_norm: 0.6796075112931736, iteration: 432126
loss: 0.985943615436554,grad_norm: 0.7286175599005288, iteration: 432127
loss: 0.981524646282196,grad_norm: 0.8581000264230418, iteration: 432128
loss: 1.019843578338623,grad_norm: 0.883599688948549, iteration: 432129
loss: 0.9752578735351562,grad_norm: 0.7344597665414926, iteration: 432130
loss: 0.9916722178459167,grad_norm: 0.808229111214203, iteration: 432131
loss: 1.011091947555542,grad_norm: 0.6478346329480839, iteration: 432132
loss: 1.0311402082443237,grad_norm: 0.9999996364828426, iteration: 432133
loss: 0.9483329057693481,grad_norm: 0.7054581564782229, iteration: 432134
loss: 1.0555824041366577,grad_norm: 0.9999991521803098, iteration: 432135
loss: 1.0843323469161987,grad_norm: 0.9999999242237785, iteration: 432136
loss: 1.0049570798873901,grad_norm: 0.7469929196625127, iteration: 432137
loss: 0.9895222187042236,grad_norm: 0.8008042365832662, iteration: 432138
loss: 1.0535948276519775,grad_norm: 0.9999995791538124, iteration: 432139
loss: 1.0070281028747559,grad_norm: 0.9999995048177697, iteration: 432140
loss: 1.0040395259857178,grad_norm: 0.9880751458228901, iteration: 432141
loss: 0.9792671203613281,grad_norm: 0.6988718803619781, iteration: 432142
loss: 1.0038154125213623,grad_norm: 0.8678508244864097, iteration: 432143
loss: 1.0135630369186401,grad_norm: 0.8095999936722444, iteration: 432144
loss: 0.9972579479217529,grad_norm: 0.7622616288478605, iteration: 432145
loss: 0.9966700077056885,grad_norm: 0.6799924628847928, iteration: 432146
loss: 1.002463698387146,grad_norm: 0.806871001368181, iteration: 432147
loss: 1.0199058055877686,grad_norm: 0.8721767791829356, iteration: 432148
loss: 1.0563527345657349,grad_norm: 0.7907980584739345, iteration: 432149
loss: 0.9639405608177185,grad_norm: 0.9881199612634556, iteration: 432150
loss: 1.0797792673110962,grad_norm: 0.9106453401434571, iteration: 432151
loss: 0.9972239136695862,grad_norm: 0.7646948207221613, iteration: 432152
loss: 0.9807557463645935,grad_norm: 0.8201917424644175, iteration: 432153
loss: 0.997684895992279,grad_norm: 0.9999994459907783, iteration: 432154
loss: 0.9757348299026489,grad_norm: 0.7697268866782432, iteration: 432155
loss: 1.0048606395721436,grad_norm: 0.7012022742958859, iteration: 432156
loss: 0.9869064688682556,grad_norm: 0.7278506589297863, iteration: 432157
loss: 0.9997360706329346,grad_norm: 0.7883727425615413, iteration: 432158
loss: 0.9618939757347107,grad_norm: 0.6824205665396648, iteration: 432159
loss: 1.0051418542861938,grad_norm: 0.7665929651213798, iteration: 432160
loss: 1.0375950336456299,grad_norm: 0.9030214442347214, iteration: 432161
loss: 0.9832906723022461,grad_norm: 0.8056785205209365, iteration: 432162
loss: 1.0275546312332153,grad_norm: 0.6849294307398345, iteration: 432163
loss: 1.0047286748886108,grad_norm: 0.7567835408825687, iteration: 432164
loss: 0.9929701089859009,grad_norm: 0.8148001428565848, iteration: 432165
loss: 0.9764416813850403,grad_norm: 0.8012176723598954, iteration: 432166
loss: 1.0091360807418823,grad_norm: 0.7281194513832169, iteration: 432167
loss: 0.9960631132125854,grad_norm: 0.8188220160901221, iteration: 432168
loss: 1.004780650138855,grad_norm: 0.9031550921437627, iteration: 432169
loss: 1.0100027322769165,grad_norm: 0.9999990171965326, iteration: 432170
loss: 0.9443062543869019,grad_norm: 0.9999996021735815, iteration: 432171
loss: 1.007887840270996,grad_norm: 0.8263465042988805, iteration: 432172
loss: 1.0062192678451538,grad_norm: 0.7264639644885988, iteration: 432173
loss: 1.0215380191802979,grad_norm: 0.9421847580437608, iteration: 432174
loss: 1.0286204814910889,grad_norm: 0.9999992025167844, iteration: 432175
loss: 0.9967979192733765,grad_norm: 0.7336329033564677, iteration: 432176
loss: 0.9817549586296082,grad_norm: 0.7521146934538727, iteration: 432177
loss: 1.0339806079864502,grad_norm: 0.8235446346968558, iteration: 432178
loss: 1.031435251235962,grad_norm: 0.793034080587894, iteration: 432179
loss: 0.9991023540496826,grad_norm: 0.7887166110719941, iteration: 432180
loss: 1.025974154472351,grad_norm: 0.9999992659863226, iteration: 432181
loss: 0.9988923072814941,grad_norm: 0.7023783232856232, iteration: 432182
loss: 1.0207035541534424,grad_norm: 0.85196786598652, iteration: 432183
loss: 1.0233012437820435,grad_norm: 0.9999997580874498, iteration: 432184
loss: 1.0233395099639893,grad_norm: 0.7801545808780636, iteration: 432185
loss: 0.9988561868667603,grad_norm: 0.7580612091237209, iteration: 432186
loss: 1.0249470472335815,grad_norm: 0.7203251682387863, iteration: 432187
loss: 1.050717830657959,grad_norm: 0.9185516558177651, iteration: 432188
loss: 0.9841490983963013,grad_norm: 0.7355463873748774, iteration: 432189
loss: 0.9843342900276184,grad_norm: 0.9697728704555542, iteration: 432190
loss: 1.017906904220581,grad_norm: 0.999999126491374, iteration: 432191
loss: 0.9493858218193054,grad_norm: 0.9999995737641235, iteration: 432192
loss: 0.9683890342712402,grad_norm: 0.9801813462965073, iteration: 432193
loss: 1.011622428894043,grad_norm: 0.8501030881646308, iteration: 432194
loss: 0.9848195910453796,grad_norm: 0.7690828961420111, iteration: 432195
loss: 0.9948421120643616,grad_norm: 0.7690499864235592, iteration: 432196
loss: 0.9858816862106323,grad_norm: 0.8086283126642136, iteration: 432197
loss: 0.9738675355911255,grad_norm: 0.9999991013159747, iteration: 432198
loss: 0.999602735042572,grad_norm: 0.8648259709586958, iteration: 432199
loss: 0.9657554030418396,grad_norm: 0.8904033112046091, iteration: 432200
loss: 1.0254740715026855,grad_norm: 0.8869701516843338, iteration: 432201
loss: 1.0012445449829102,grad_norm: 0.718860851998206, iteration: 432202
loss: 1.0138083696365356,grad_norm: 0.7352993977039188, iteration: 432203
loss: 1.1335880756378174,grad_norm: 0.6776776948438382, iteration: 432204
loss: 0.9959548711776733,grad_norm: 0.7428828594083647, iteration: 432205
loss: 1.0261653661727905,grad_norm: 0.71649229854825, iteration: 432206
loss: 1.0188095569610596,grad_norm: 1.0000000499083117, iteration: 432207
loss: 1.0056959390640259,grad_norm: 0.8709901832614158, iteration: 432208
loss: 1.0270172357559204,grad_norm: 0.8429443754958714, iteration: 432209
loss: 0.9806643128395081,grad_norm: 0.7713877986419395, iteration: 432210
loss: 1.0221298933029175,grad_norm: 0.7848442120842383, iteration: 432211
loss: 0.9931236505508423,grad_norm: 0.9999998163697509, iteration: 432212
loss: 0.9751919507980347,grad_norm: 0.8232257184261198, iteration: 432213
loss: 0.9718603491783142,grad_norm: 0.7183604847111238, iteration: 432214
loss: 1.0153117179870605,grad_norm: 0.8191446589997877, iteration: 432215
loss: 1.0271430015563965,grad_norm: 0.8191070920610025, iteration: 432216
loss: 1.0083580017089844,grad_norm: 0.7680152262637785, iteration: 432217
loss: 0.9978367686271667,grad_norm: 0.7704425276165753, iteration: 432218
loss: 0.9926166534423828,grad_norm: 0.7213365992769354, iteration: 432219
loss: 1.0098083019256592,grad_norm: 0.7041024945107455, iteration: 432220
loss: 1.0512874126434326,grad_norm: 0.8824880756215585, iteration: 432221
loss: 0.96989905834198,grad_norm: 0.6450527509210828, iteration: 432222
loss: 0.9799320101737976,grad_norm: 0.785537152178959, iteration: 432223
loss: 0.9858569502830505,grad_norm: 0.915095998351952, iteration: 432224
loss: 1.0042754411697388,grad_norm: 0.912762497856526, iteration: 432225
loss: 1.2077285051345825,grad_norm: 0.9999990425905978, iteration: 432226
loss: 0.9925677180290222,grad_norm: 0.613616456404313, iteration: 432227
loss: 0.9647213220596313,grad_norm: 0.6958616779274713, iteration: 432228
loss: 0.9991546273231506,grad_norm: 0.7708308454542583, iteration: 432229
loss: 1.0069234371185303,grad_norm: 0.9999989521495583, iteration: 432230
loss: 1.0019309520721436,grad_norm: 0.7920138884347604, iteration: 432231
loss: 0.99070143699646,grad_norm: 0.9999991312750552, iteration: 432232
loss: 1.0040252208709717,grad_norm: 0.7487427241455019, iteration: 432233
loss: 0.9969286918640137,grad_norm: 0.8105724660981531, iteration: 432234
loss: 1.0160596370697021,grad_norm: 0.6513858620955282, iteration: 432235
loss: 0.9580253958702087,grad_norm: 0.7955483467781393, iteration: 432236
loss: 0.9943374395370483,grad_norm: 0.8711653617891606, iteration: 432237
loss: 0.9926834106445312,grad_norm: 0.8309476949013825, iteration: 432238
loss: 1.0355168581008911,grad_norm: 0.9999991797641355, iteration: 432239
loss: 0.9856600165367126,grad_norm: 0.778920080662249, iteration: 432240
loss: 1.0449936389923096,grad_norm: 0.7514578824825351, iteration: 432241
loss: 0.9952486753463745,grad_norm: 0.77375175620369, iteration: 432242
loss: 1.0219871997833252,grad_norm: 0.999999635819764, iteration: 432243
loss: 1.004948616027832,grad_norm: 0.9999997759740985, iteration: 432244
loss: 1.003104567527771,grad_norm: 0.9431519425986366, iteration: 432245
loss: 1.0311665534973145,grad_norm: 0.8183874347435536, iteration: 432246
loss: 1.0598880052566528,grad_norm: 0.8124615314810941, iteration: 432247
loss: 0.9593913555145264,grad_norm: 0.7077902639154312, iteration: 432248
loss: 0.9970777630805969,grad_norm: 0.8392598404956173, iteration: 432249
loss: 1.0083823204040527,grad_norm: 0.7363615940204628, iteration: 432250
loss: 1.222219467163086,grad_norm: 0.9999997317284895, iteration: 432251
loss: 0.976795494556427,grad_norm: 0.9226735621138217, iteration: 432252
loss: 0.9838700294494629,grad_norm: 0.6948705010629862, iteration: 432253
loss: 0.9876636266708374,grad_norm: 0.7223394042266952, iteration: 432254
loss: 0.9532663822174072,grad_norm: 0.7718104065855285, iteration: 432255
loss: 0.9741268157958984,grad_norm: 0.8776694671958891, iteration: 432256
loss: 1.0140807628631592,grad_norm: 0.6504128202690239, iteration: 432257
loss: 1.1187840700149536,grad_norm: 0.9664988862736605, iteration: 432258
loss: 0.977853536605835,grad_norm: 0.9999995323887034, iteration: 432259
loss: 1.3155157566070557,grad_norm: 0.9999998226564901, iteration: 432260
loss: 1.0116525888442993,grad_norm: 0.7904409435344562, iteration: 432261
loss: 0.9686232805252075,grad_norm: 0.7351060495634305, iteration: 432262
loss: 0.9848148822784424,grad_norm: 0.8851491954805573, iteration: 432263
loss: 1.0746382474899292,grad_norm: 0.9999991333004944, iteration: 432264
loss: 0.9752165079116821,grad_norm: 0.7408072910106483, iteration: 432265
loss: 1.0164960622787476,grad_norm: 0.9366320532830961, iteration: 432266
loss: 0.9868770241737366,grad_norm: 0.6798499839155719, iteration: 432267
loss: 0.9988431334495544,grad_norm: 0.7529037305335358, iteration: 432268
loss: 1.0243747234344482,grad_norm: 0.777574572422705, iteration: 432269
loss: 1.0056705474853516,grad_norm: 0.8353639408476274, iteration: 432270
loss: 0.983171820640564,grad_norm: 0.6937223316403263, iteration: 432271
loss: 0.98396897315979,grad_norm: 0.7933178220764033, iteration: 432272
loss: 1.0670949220657349,grad_norm: 0.9999991647923818, iteration: 432273
loss: 1.0215874910354614,grad_norm: 0.9999998395221914, iteration: 432274
loss: 0.9817855358123779,grad_norm: 0.7127067405568076, iteration: 432275
loss: 0.9890566468238831,grad_norm: 0.6599993008242642, iteration: 432276
loss: 1.0498650074005127,grad_norm: 0.8261259463135131, iteration: 432277
loss: 0.9912307858467102,grad_norm: 0.9856932431503463, iteration: 432278
loss: 1.0137031078338623,grad_norm: 0.9999994345401355, iteration: 432279
loss: 1.0858567953109741,grad_norm: 0.9276772399388501, iteration: 432280
loss: 1.0746575593948364,grad_norm: 0.9999992563053024, iteration: 432281
loss: 1.0082743167877197,grad_norm: 0.8544035133345712, iteration: 432282
loss: 0.9619126915931702,grad_norm: 0.9540278544884727, iteration: 432283
loss: 0.9977598190307617,grad_norm: 0.7456450963803339, iteration: 432284
loss: 0.9876198172569275,grad_norm: 0.7135111043507655, iteration: 432285
loss: 1.0315202474594116,grad_norm: 0.712796817882638, iteration: 432286
loss: 0.9741007089614868,grad_norm: 0.7458449558255771, iteration: 432287
loss: 1.012846827507019,grad_norm: 0.9126628585839516, iteration: 432288
loss: 1.1091029644012451,grad_norm: 0.999999776344587, iteration: 432289
loss: 0.9792324304580688,grad_norm: 0.6966223004002944, iteration: 432290
loss: 1.0242196321487427,grad_norm: 0.7844127810191268, iteration: 432291
loss: 0.9770290851593018,grad_norm: 0.8053154189652229, iteration: 432292
loss: 1.0038955211639404,grad_norm: 0.7702784014090602, iteration: 432293
loss: 0.9819615483283997,grad_norm: 0.6775787349075468, iteration: 432294
loss: 1.0200355052947998,grad_norm: 0.7522443527965262, iteration: 432295
loss: 1.0002689361572266,grad_norm: 0.9999990764861502, iteration: 432296
loss: 0.9860499501228333,grad_norm: 0.7838417302225985, iteration: 432297
loss: 0.9765777587890625,grad_norm: 0.8388060674811791, iteration: 432298
loss: 1.0119500160217285,grad_norm: 0.6597296107878301, iteration: 432299
loss: 1.018162727355957,grad_norm: 0.834330736489545, iteration: 432300
loss: 1.0037686824798584,grad_norm: 0.8245098185152313, iteration: 432301
loss: 1.0063096284866333,grad_norm: 0.9999990694558605, iteration: 432302
loss: 1.0321439504623413,grad_norm: 0.784670333961053, iteration: 432303
loss: 1.0236790180206299,grad_norm: 0.769592694530616, iteration: 432304
loss: 0.9694957137107849,grad_norm: 0.7500272857539894, iteration: 432305
loss: 1.0102730989456177,grad_norm: 0.85830043007592, iteration: 432306
loss: 0.9904126524925232,grad_norm: 0.7418492116624923, iteration: 432307
loss: 0.9833077192306519,grad_norm: 0.7512490003721272, iteration: 432308
loss: 1.035735011100769,grad_norm: 0.9999990370730116, iteration: 432309
loss: 1.0087260007858276,grad_norm: 0.6781480679702934, iteration: 432310
loss: 0.9699432849884033,grad_norm: 0.6954436843370477, iteration: 432311
loss: 0.9957674145698547,grad_norm: 0.7232067964871769, iteration: 432312
loss: 1.0860692262649536,grad_norm: 0.8823213121386337, iteration: 432313
loss: 0.9468719959259033,grad_norm: 0.8621646767162583, iteration: 432314
loss: 1.015900731086731,grad_norm: 0.6752818909635946, iteration: 432315
loss: 1.0144453048706055,grad_norm: 0.8440773554841045, iteration: 432316
loss: 0.9926238059997559,grad_norm: 0.8316947027047651, iteration: 432317
loss: 0.9935606122016907,grad_norm: 0.7076938367738644, iteration: 432318
loss: 0.9888841509819031,grad_norm: 0.7859478594258426, iteration: 432319
loss: 0.9929085373878479,grad_norm: 0.7046825804613092, iteration: 432320
loss: 0.9899308681488037,grad_norm: 0.9086847194091939, iteration: 432321
loss: 0.9546141028404236,grad_norm: 0.7181547686148007, iteration: 432322
loss: 1.0004886388778687,grad_norm: 0.803155411542412, iteration: 432323
loss: 0.9841980934143066,grad_norm: 0.793158243892333, iteration: 432324
loss: 0.9777354001998901,grad_norm: 0.9999993344042302, iteration: 432325
loss: 1.0106920003890991,grad_norm: 0.7856163149739672, iteration: 432326
loss: 0.973908543586731,grad_norm: 0.7334656839069245, iteration: 432327
loss: 0.9992820620536804,grad_norm: 0.8103224786985315, iteration: 432328
loss: 1.0093165636062622,grad_norm: 0.7465555217997546, iteration: 432329
loss: 0.96051025390625,grad_norm: 0.7712093444389032, iteration: 432330
loss: 1.0262503623962402,grad_norm: 0.9999992726629119, iteration: 432331
loss: 0.9765051603317261,grad_norm: 0.9282660412407714, iteration: 432332
loss: 0.9899121522903442,grad_norm: 0.6170085662397861, iteration: 432333
loss: 0.9665462970733643,grad_norm: 0.910995485468734, iteration: 432334
loss: 1.0137630701065063,grad_norm: 0.999999321302689, iteration: 432335
loss: 1.039747953414917,grad_norm: 0.999999370623967, iteration: 432336
loss: 0.9861540198326111,grad_norm: 0.811386909888289, iteration: 432337
loss: 0.9702047109603882,grad_norm: 0.8637589280254275, iteration: 432338
loss: 1.0446785688400269,grad_norm: 0.9999990104059328, iteration: 432339
loss: 0.9974837899208069,grad_norm: 0.9999992790468466, iteration: 432340
loss: 0.9818546175956726,grad_norm: 0.8033147563961255, iteration: 432341
loss: 1.0034070014953613,grad_norm: 0.7008063253238374, iteration: 432342
loss: 1.0431954860687256,grad_norm: 0.9999991868354323, iteration: 432343
loss: 1.0226298570632935,grad_norm: 0.8217305724656263, iteration: 432344
loss: 1.0054646730422974,grad_norm: 0.9471063334882963, iteration: 432345
loss: 1.008377194404602,grad_norm: 0.696448353826244, iteration: 432346
loss: 1.0488066673278809,grad_norm: 0.7219210035364513, iteration: 432347
loss: 0.9946088790893555,grad_norm: 0.9413700605697961, iteration: 432348
loss: 1.0057041645050049,grad_norm: 0.8409285271253188, iteration: 432349
loss: 1.0001152753829956,grad_norm: 0.7863131373675696, iteration: 432350
loss: 0.982304036617279,grad_norm: 0.7679920482128831, iteration: 432351
loss: 0.9969438314437866,grad_norm: 0.7812883584644953, iteration: 432352
loss: 1.0489811897277832,grad_norm: 0.9999997627377336, iteration: 432353
loss: 0.9865070581436157,grad_norm: 0.9999992171042907, iteration: 432354
loss: 1.0169295072555542,grad_norm: 0.8182733079936271, iteration: 432355
loss: 1.0064430236816406,grad_norm: 0.7678603254381262, iteration: 432356
loss: 1.0083235502243042,grad_norm: 0.8617193356435245, iteration: 432357
loss: 1.0291367769241333,grad_norm: 0.821144640508662, iteration: 432358
loss: 0.984093427658081,grad_norm: 0.793449380232632, iteration: 432359
loss: 1.0042861700057983,grad_norm: 0.8939037972679562, iteration: 432360
loss: 1.008388638496399,grad_norm: 0.7805303201535644, iteration: 432361
loss: 0.9849988222122192,grad_norm: 0.8188267561500457, iteration: 432362
loss: 0.993477463722229,grad_norm: 0.7451364192186171, iteration: 432363
loss: 0.988269567489624,grad_norm: 0.9421634075964814, iteration: 432364
loss: 0.9862351417541504,grad_norm: 0.6285953886909182, iteration: 432365
loss: 1.0134761333465576,grad_norm: 0.9999991355315133, iteration: 432366
loss: 1.0000660419464111,grad_norm: 0.9013763233944636, iteration: 432367
loss: 1.0037297010421753,grad_norm: 0.7349078199227397, iteration: 432368
loss: 1.0026051998138428,grad_norm: 0.8216234221460306, iteration: 432369
loss: 0.9725860357284546,grad_norm: 0.7345962688595727, iteration: 432370
loss: 1.0163660049438477,grad_norm: 0.7359111114862834, iteration: 432371
loss: 1.0053901672363281,grad_norm: 0.8341063777472801, iteration: 432372
loss: 0.9916753768920898,grad_norm: 0.7406532136169015, iteration: 432373
loss: 1.1829107999801636,grad_norm: 0.796468799743092, iteration: 432374
loss: 1.1194977760314941,grad_norm: 0.9999997811973219, iteration: 432375
loss: 0.9970138072967529,grad_norm: 0.6954730247350127, iteration: 432376
loss: 1.009177803993225,grad_norm: 0.6067482719320276, iteration: 432377
loss: 0.9888085126876831,grad_norm: 0.7543871360257529, iteration: 432378
loss: 1.0071402788162231,grad_norm: 0.8132903485776566, iteration: 432379
loss: 1.050041675567627,grad_norm: 0.8139232108987576, iteration: 432380
loss: 0.9569529294967651,grad_norm: 0.7438387555659425, iteration: 432381
loss: 1.0022993087768555,grad_norm: 0.7531839683090358, iteration: 432382
loss: 1.0158705711364746,grad_norm: 0.709401058362687, iteration: 432383
loss: 0.9812642335891724,grad_norm: 0.7658833716521735, iteration: 432384
loss: 1.0023272037506104,grad_norm: 0.6621407557087191, iteration: 432385
loss: 0.979866623878479,grad_norm: 0.7198517058602186, iteration: 432386
loss: 0.9813660979270935,grad_norm: 0.8144519571627783, iteration: 432387
loss: 0.9903975129127502,grad_norm: 0.7458767738984982, iteration: 432388
loss: 1.0423706769943237,grad_norm: 0.9999989835209675, iteration: 432389
loss: 0.9903846979141235,grad_norm: 0.7223675438408581, iteration: 432390
loss: 1.0001568794250488,grad_norm: 0.6373701687500711, iteration: 432391
loss: 0.9738360643386841,grad_norm: 0.7121649806576681, iteration: 432392
loss: 1.0267034769058228,grad_norm: 0.757308426436642, iteration: 432393
loss: 0.9896292686462402,grad_norm: 0.8171789391261595, iteration: 432394
loss: 0.9791340827941895,grad_norm: 0.6743619951926259, iteration: 432395
loss: 0.9548363089561462,grad_norm: 0.9039260471761816, iteration: 432396
loss: 0.9811651110649109,grad_norm: 0.8632468271315248, iteration: 432397
loss: 0.9775261878967285,grad_norm: 0.7620548989104291, iteration: 432398
loss: 1.0104334354400635,grad_norm: 0.8217363050283851, iteration: 432399
loss: 1.0137113332748413,grad_norm: 0.7827774717438235, iteration: 432400
loss: 0.9900836944580078,grad_norm: 0.9111495736953751, iteration: 432401
loss: 1.0019829273223877,grad_norm: 0.7685793087425261, iteration: 432402
loss: 1.0140068531036377,grad_norm: 0.7761057429851972, iteration: 432403
loss: 1.0107665061950684,grad_norm: 0.7185099743526471, iteration: 432404
loss: 1.0386477708816528,grad_norm: 0.7003589659308899, iteration: 432405
loss: 0.9649913907051086,grad_norm: 0.7327852643798414, iteration: 432406
loss: 0.9997014403343201,grad_norm: 0.6786649934669443, iteration: 432407
loss: 1.005954623222351,grad_norm: 0.8426141491484942, iteration: 432408
loss: 1.018262267112732,grad_norm: 0.7774508578020092, iteration: 432409
loss: 0.9832184910774231,grad_norm: 0.804278071325641, iteration: 432410
loss: 0.9630131125450134,grad_norm: 0.8182088925850105, iteration: 432411
loss: 0.9944456219673157,grad_norm: 0.7545102393220372, iteration: 432412
loss: 1.0023504495620728,grad_norm: 0.9734557043203108, iteration: 432413
loss: 0.9857168197631836,grad_norm: 0.6106468627897782, iteration: 432414
loss: 1.0110139846801758,grad_norm: 0.7708944296503267, iteration: 432415
loss: 0.9687104225158691,grad_norm: 0.7430885331761623, iteration: 432416
loss: 1.0102440118789673,grad_norm: 0.7774726204146041, iteration: 432417
loss: 0.9877459406852722,grad_norm: 0.6279365484714382, iteration: 432418
loss: 1.0193507671356201,grad_norm: 0.8855246054219054, iteration: 432419
loss: 1.0064899921417236,grad_norm: 0.8963543039888644, iteration: 432420
loss: 1.0245966911315918,grad_norm: 0.9999995594512311, iteration: 432421
loss: 1.025901436805725,grad_norm: 0.9943678478799689, iteration: 432422
loss: 1.0063139200210571,grad_norm: 0.783639854212967, iteration: 432423
loss: 1.0236196517944336,grad_norm: 0.8203295474088959, iteration: 432424
loss: 1.0065749883651733,grad_norm: 0.7502635962289914, iteration: 432425
loss: 0.9948819279670715,grad_norm: 0.7018849857851402, iteration: 432426
loss: 1.037996768951416,grad_norm: 0.7153100414934754, iteration: 432427
loss: 0.9664671421051025,grad_norm: 0.8150274141828029, iteration: 432428
loss: 0.9997967481613159,grad_norm: 0.7204006827353981, iteration: 432429
loss: 1.0252015590667725,grad_norm: 0.779751962700124, iteration: 432430
loss: 0.9922734498977661,grad_norm: 0.8007544095545275, iteration: 432431
loss: 0.9794076681137085,grad_norm: 0.7421185049543958, iteration: 432432
loss: 1.0553616285324097,grad_norm: 0.8185967136891156, iteration: 432433
loss: 0.9754291772842407,grad_norm: 0.7036148517110848, iteration: 432434
loss: 0.9838465452194214,grad_norm: 0.7751935402724048, iteration: 432435
loss: 0.9792510867118835,grad_norm: 0.835602791316947, iteration: 432436
loss: 0.98887699842453,grad_norm: 0.7234792327772169, iteration: 432437
loss: 1.0076833963394165,grad_norm: 0.7551868017452026, iteration: 432438
loss: 0.9741793274879456,grad_norm: 0.6996137050005959, iteration: 432439
loss: 0.9437821507453918,grad_norm: 0.7982021825952256, iteration: 432440
loss: 1.0184584856033325,grad_norm: 0.8375370415700595, iteration: 432441
loss: 1.001392126083374,grad_norm: 0.6724885081649618, iteration: 432442
loss: 0.9925358295440674,grad_norm: 0.8398748452244988, iteration: 432443
loss: 1.0136926174163818,grad_norm: 0.8104912013775989, iteration: 432444
loss: 1.0258926153182983,grad_norm: 0.712676565125736, iteration: 432445
loss: 0.9971638321876526,grad_norm: 0.7541916287342244, iteration: 432446
loss: 1.0209670066833496,grad_norm: 0.9020460715705129, iteration: 432447
loss: 1.0848733186721802,grad_norm: 0.9999994345333574, iteration: 432448
loss: 0.983787477016449,grad_norm: 0.700206893880252, iteration: 432449
loss: 1.0010830163955688,grad_norm: 0.6711481962841201, iteration: 432450
loss: 1.0557467937469482,grad_norm: 0.9999990909238089, iteration: 432451
loss: 1.0260580778121948,grad_norm: 0.7706468280028279, iteration: 432452
loss: 1.0734187364578247,grad_norm: 0.9999994735688853, iteration: 432453
loss: 1.0024946928024292,grad_norm: 0.780673469252514, iteration: 432454
loss: 1.009643316268921,grad_norm: 0.7339658772393597, iteration: 432455
loss: 1.0234193801879883,grad_norm: 0.9999991168408978, iteration: 432456
loss: 1.007919192314148,grad_norm: 0.9842383481390843, iteration: 432457
loss: 1.0039737224578857,grad_norm: 0.645182497298765, iteration: 432458
loss: 0.9483939409255981,grad_norm: 0.7604235949605359, iteration: 432459
loss: 1.0335737466812134,grad_norm: 0.7266460860474179, iteration: 432460
loss: 0.9999105334281921,grad_norm: 0.7309141389319496, iteration: 432461
loss: 0.9863294959068298,grad_norm: 0.8268506235183432, iteration: 432462
loss: 1.008363962173462,grad_norm: 0.742564689951227, iteration: 432463
loss: 0.9827715158462524,grad_norm: 0.8319005237831079, iteration: 432464
loss: 0.9864827990531921,grad_norm: 0.711990434991663, iteration: 432465
loss: 0.9647552967071533,grad_norm: 0.7092538072553985, iteration: 432466
loss: 1.028752326965332,grad_norm: 0.844001518598394, iteration: 432467
loss: 0.9837643504142761,grad_norm: 0.9338205414924394, iteration: 432468
loss: 0.9974679350852966,grad_norm: 0.9698920617251293, iteration: 432469
loss: 1.0161372423171997,grad_norm: 0.8815451578309376, iteration: 432470
loss: 1.0198943614959717,grad_norm: 0.7732770543911804, iteration: 432471
loss: 1.0639921426773071,grad_norm: 0.9198624713516317, iteration: 432472
loss: 1.0176905393600464,grad_norm: 0.668028724249381, iteration: 432473
loss: 1.0089666843414307,grad_norm: 0.8205278741397102, iteration: 432474
loss: 1.0332616567611694,grad_norm: 0.7355202130562204, iteration: 432475
loss: 1.00863778591156,grad_norm: 0.8916125588312266, iteration: 432476
loss: 0.9778007864952087,grad_norm: 0.7821570619881574, iteration: 432477
loss: 1.0145450830459595,grad_norm: 0.7580714353646564, iteration: 432478
loss: 1.00466787815094,grad_norm: 0.6037577475093038, iteration: 432479
loss: 1.0096396207809448,grad_norm: 0.7207004957976061, iteration: 432480
loss: 1.023397445678711,grad_norm: 0.8613484273606824, iteration: 432481
loss: 1.0153597593307495,grad_norm: 0.9999991410601804, iteration: 432482
loss: 0.9930899143218994,grad_norm: 0.8153829215245864, iteration: 432483
loss: 1.009185791015625,grad_norm: 0.891808759439792, iteration: 432484
loss: 1.18734610080719,grad_norm: 0.9999999834458201, iteration: 432485
loss: 0.9844624400138855,grad_norm: 0.8915594238250331, iteration: 432486
loss: 0.9917874336242676,grad_norm: 0.6553848382786986, iteration: 432487
loss: 0.9811835289001465,grad_norm: 0.7698750477401353, iteration: 432488
loss: 0.976155161857605,grad_norm: 0.6486256476158452, iteration: 432489
loss: 1.014757513999939,grad_norm: 0.8396187370407997, iteration: 432490
loss: 0.9752945899963379,grad_norm: 0.9020929218584537, iteration: 432491
loss: 1.0000193119049072,grad_norm: 0.7305970277127618, iteration: 432492
loss: 0.9898176789283752,grad_norm: 0.7748112103784481, iteration: 432493
loss: 0.965944230556488,grad_norm: 0.7875122511646975, iteration: 432494
loss: 0.9740506410598755,grad_norm: 0.7377850475523303, iteration: 432495
loss: 1.0270743370056152,grad_norm: 0.7826881031308057, iteration: 432496
loss: 1.017877221107483,grad_norm: 0.8845937367797756, iteration: 432497
loss: 0.9902146458625793,grad_norm: 0.7531766358189134, iteration: 432498
loss: 1.0170034170150757,grad_norm: 0.7735832334158956, iteration: 432499
loss: 1.0122166872024536,grad_norm: 0.7697049724822066, iteration: 432500
loss: 1.017771601676941,grad_norm: 0.7034218208950749, iteration: 432501
loss: 0.9904081225395203,grad_norm: 0.7921194519993954, iteration: 432502
loss: 0.9991937875747681,grad_norm: 0.6867336687534743, iteration: 432503
loss: 0.975333034992218,grad_norm: 0.8102352371568886, iteration: 432504
loss: 0.9868648648262024,grad_norm: 0.8062033888689617, iteration: 432505
loss: 1.0068330764770508,grad_norm: 0.7512131753722813, iteration: 432506
loss: 1.0261608362197876,grad_norm: 0.7067829580995509, iteration: 432507
loss: 1.0115817785263062,grad_norm: 0.7757815708315532, iteration: 432508
loss: 0.998199999332428,grad_norm: 0.9997018148060376, iteration: 432509
loss: 1.0599617958068848,grad_norm: 0.8891716944475647, iteration: 432510
loss: 0.9907181859016418,grad_norm: 0.7537537965458136, iteration: 432511
loss: 0.9951961040496826,grad_norm: 0.6586571017403057, iteration: 432512
loss: 1.0127840042114258,grad_norm: 0.8216440753719387, iteration: 432513
loss: 0.9669811725616455,grad_norm: 0.7947970358237234, iteration: 432514
loss: 1.048439383506775,grad_norm: 0.9999990574041432, iteration: 432515
loss: 0.9867123365402222,grad_norm: 0.7824411630798189, iteration: 432516
loss: 1.0547152757644653,grad_norm: 0.7210718471958346, iteration: 432517
loss: 1.013122797012329,grad_norm: 0.7909142215018516, iteration: 432518
loss: 0.9753301739692688,grad_norm: 0.6495114385092098, iteration: 432519
loss: 1.0008877515792847,grad_norm: 0.9635993639653337, iteration: 432520
loss: 0.9916254878044128,grad_norm: 0.999999131343923, iteration: 432521
loss: 0.9988402724266052,grad_norm: 0.7487100200363698, iteration: 432522
loss: 1.0015201568603516,grad_norm: 0.7510105068612567, iteration: 432523
loss: 0.9930912852287292,grad_norm: 0.9187190144142148, iteration: 432524
loss: 1.0483784675598145,grad_norm: 0.7097852520552063, iteration: 432525
loss: 0.9494989514350891,grad_norm: 0.7952938242061589, iteration: 432526
loss: 1.0473763942718506,grad_norm: 0.9999990529852052, iteration: 432527
loss: 1.0080658197402954,grad_norm: 0.8066792117531588, iteration: 432528
loss: 1.076075792312622,grad_norm: 0.904808908216767, iteration: 432529
loss: 0.9690195322036743,grad_norm: 0.7268637560363237, iteration: 432530
loss: 1.0184061527252197,grad_norm: 0.9254773319502296, iteration: 432531
loss: 1.0247093439102173,grad_norm: 0.7850013703160883, iteration: 432532
loss: 0.995408296585083,grad_norm: 0.8705677480404211, iteration: 432533
loss: 1.0302987098693848,grad_norm: 0.99999914836679, iteration: 432534
loss: 1.0222376585006714,grad_norm: 0.9646816801769083, iteration: 432535
loss: 1.0135290622711182,grad_norm: 0.7360051297312954, iteration: 432536
loss: 0.9520536065101624,grad_norm: 0.7602940642616414, iteration: 432537
loss: 0.972508430480957,grad_norm: 0.8022395716927577, iteration: 432538
loss: 1.0175925493240356,grad_norm: 0.6923241020256633, iteration: 432539
loss: 0.9749875664710999,grad_norm: 0.8664160862031687, iteration: 432540
loss: 0.9910409450531006,grad_norm: 0.97490987980158, iteration: 432541
loss: 0.9973634481430054,grad_norm: 0.6914610295129898, iteration: 432542
loss: 0.9870008826255798,grad_norm: 0.8884404625887254, iteration: 432543
loss: 0.9741814136505127,grad_norm: 0.7623247389295486, iteration: 432544
loss: 0.9574807286262512,grad_norm: 0.7646518450795389, iteration: 432545
loss: 1.0524020195007324,grad_norm: 0.9999990634167639, iteration: 432546
loss: 0.9817026257514954,grad_norm: 0.704582069537091, iteration: 432547
loss: 0.9976224303245544,grad_norm: 0.7327682074635444, iteration: 432548
loss: 1.0236586332321167,grad_norm: 0.9974432814539971, iteration: 432549
loss: 1.0276668071746826,grad_norm: 0.7613261995329903, iteration: 432550
loss: 0.9992361068725586,grad_norm: 0.8103635296269442, iteration: 432551
loss: 1.0477250814437866,grad_norm: 0.7669349803995873, iteration: 432552
loss: 0.9732194542884827,grad_norm: 0.779128270458552, iteration: 432553
loss: 0.9757780432701111,grad_norm: 0.7898116346779903, iteration: 432554
loss: 0.9873540997505188,grad_norm: 0.8850886149855627, iteration: 432555
loss: 1.0151901245117188,grad_norm: 0.8078755313611312, iteration: 432556
loss: 0.9846477508544922,grad_norm: 0.854283867024709, iteration: 432557
loss: 1.0069957971572876,grad_norm: 0.9065673012440778, iteration: 432558
loss: 0.9549404382705688,grad_norm: 0.6740496781198334, iteration: 432559
loss: 0.9354147911071777,grad_norm: 0.8461370861094522, iteration: 432560
loss: 0.9586185216903687,grad_norm: 0.793139753498499, iteration: 432561
loss: 1.0140689611434937,grad_norm: 0.8080905545374699, iteration: 432562
loss: 0.987296462059021,grad_norm: 0.7832316257873518, iteration: 432563
loss: 0.988239049911499,grad_norm: 0.7356650706320924, iteration: 432564
loss: 0.9632056355476379,grad_norm: 0.7468542237525285, iteration: 432565
loss: 0.9932798743247986,grad_norm: 0.7878012054319575, iteration: 432566
loss: 1.2109532356262207,grad_norm: 0.9999992649747216, iteration: 432567
loss: 0.9944540858268738,grad_norm: 0.8171267399117238, iteration: 432568
loss: 0.9853794574737549,grad_norm: 0.8018354125049397, iteration: 432569
loss: 0.9986340403556824,grad_norm: 0.6843601586565217, iteration: 432570
loss: 1.0254467725753784,grad_norm: 0.8719890609996657, iteration: 432571
loss: 0.9795046448707581,grad_norm: 0.8730698187151191, iteration: 432572
loss: 0.9928638339042664,grad_norm: 0.8097255858849052, iteration: 432573
loss: 0.9961602687835693,grad_norm: 0.7749553002356617, iteration: 432574
loss: 0.9763655066490173,grad_norm: 0.7074894397138787, iteration: 432575
loss: 0.9980694055557251,grad_norm: 0.7982864515037162, iteration: 432576
loss: 0.9952453970909119,grad_norm: 0.773900477500638, iteration: 432577
loss: 1.0150686502456665,grad_norm: 0.7287019009193605, iteration: 432578
loss: 1.0746444463729858,grad_norm: 0.9999992034509481, iteration: 432579
loss: 0.984423816204071,grad_norm: 0.7705005243072608, iteration: 432580
loss: 0.97493577003479,grad_norm: 0.999999697295764, iteration: 432581
loss: 1.0331906080245972,grad_norm: 0.7585573998615283, iteration: 432582
loss: 1.0037322044372559,grad_norm: 0.6308665613312544, iteration: 432583
loss: 0.9992915987968445,grad_norm: 0.6813207559091928, iteration: 432584
loss: 0.992730975151062,grad_norm: 0.7636974122389977, iteration: 432585
loss: 0.9893837571144104,grad_norm: 0.6491837487329356, iteration: 432586
loss: 0.9902222156524658,grad_norm: 0.6849747403044149, iteration: 432587
loss: 0.9922119379043579,grad_norm: 0.637304834276697, iteration: 432588
loss: 0.9980424642562866,grad_norm: 0.9999995886713613, iteration: 432589
loss: 1.047219157218933,grad_norm: 0.8263113620592384, iteration: 432590
loss: 1.0139271020889282,grad_norm: 0.8341329927129902, iteration: 432591
loss: 1.0497831106185913,grad_norm: 0.7274100477372667, iteration: 432592
loss: 0.9822696447372437,grad_norm: 0.7567464218218941, iteration: 432593
loss: 1.0083787441253662,grad_norm: 0.7308330554965624, iteration: 432594
loss: 0.9699313640594482,grad_norm: 0.9777176468493052, iteration: 432595
loss: 1.0077663660049438,grad_norm: 0.7338786365097835, iteration: 432596
loss: 1.0035626888275146,grad_norm: 0.6986443507877156, iteration: 432597
loss: 1.0152839422225952,grad_norm: 0.8154323024917225, iteration: 432598
loss: 1.014194369316101,grad_norm: 0.999998986044542, iteration: 432599
loss: 1.0138695240020752,grad_norm: 0.8128541704916828, iteration: 432600
loss: 1.0249332189559937,grad_norm: 0.8310436999206203, iteration: 432601
loss: 0.9631638526916504,grad_norm: 0.9184244348426169, iteration: 432602
loss: 0.9729816913604736,grad_norm: 0.8208649634147586, iteration: 432603
loss: 0.9970535635948181,grad_norm: 0.859485999179839, iteration: 432604
loss: 1.0213173627853394,grad_norm: 0.7486813691242931, iteration: 432605
loss: 0.9874469041824341,grad_norm: 0.9756831431843972, iteration: 432606
loss: 0.9993905425071716,grad_norm: 0.728386235114202, iteration: 432607
loss: 1.1514387130737305,grad_norm: 0.9999999162914844, iteration: 432608
loss: 1.0186432600021362,grad_norm: 0.8274180221475779, iteration: 432609
loss: 0.9846593737602234,grad_norm: 0.7987507692413803, iteration: 432610
loss: 1.0239167213439941,grad_norm: 0.90349324993519, iteration: 432611
loss: 0.9273320436477661,grad_norm: 0.7081657573398124, iteration: 432612
loss: 0.9692933559417725,grad_norm: 0.8424330199405676, iteration: 432613
loss: 1.0027198791503906,grad_norm: 0.886998366532987, iteration: 432614
loss: 0.9304445385932922,grad_norm: 0.7922968990811715, iteration: 432615
loss: 1.0315579175949097,grad_norm: 0.9415760017893375, iteration: 432616
loss: 0.998285710811615,grad_norm: 0.7787077163908152, iteration: 432617
loss: 1.0255366563796997,grad_norm: 0.985021219599337, iteration: 432618
loss: 1.0078660249710083,grad_norm: 0.7379996558079219, iteration: 432619
loss: 1.0165425539016724,grad_norm: 0.7220541002707136, iteration: 432620
loss: 1.0110708475112915,grad_norm: 0.8074018703353826, iteration: 432621
loss: 0.9968513250350952,grad_norm: 0.7679955183230519, iteration: 432622
loss: 1.057533621788025,grad_norm: 0.9020855659852429, iteration: 432623
loss: 0.9768279194831848,grad_norm: 0.7716581571774837, iteration: 432624
loss: 1.0082085132598877,grad_norm: 0.7462460723128886, iteration: 432625
loss: 1.068374514579773,grad_norm: 0.9999996760718325, iteration: 432626
loss: 0.9810708165168762,grad_norm: 0.854253732602448, iteration: 432627
loss: 1.024779200553894,grad_norm: 0.6616867331987949, iteration: 432628
loss: 1.0060434341430664,grad_norm: 0.8053828992241727, iteration: 432629
loss: 1.0041450262069702,grad_norm: 0.5918048556261469, iteration: 432630
loss: 0.9884858727455139,grad_norm: 0.8438992614664158, iteration: 432631
loss: 1.0075621604919434,grad_norm: 0.7607057886913091, iteration: 432632
loss: 1.2027490139007568,grad_norm: 0.9999993425366303, iteration: 432633
loss: 1.025931477546692,grad_norm: 0.7298215626098495, iteration: 432634
loss: 1.0488554239273071,grad_norm: 0.999999868780243, iteration: 432635
loss: 0.9929568767547607,grad_norm: 0.9708552327721119, iteration: 432636
loss: 0.9989607334136963,grad_norm: 0.8881984208355941, iteration: 432637
loss: 1.00456702709198,grad_norm: 0.6510314549305373, iteration: 432638
loss: 1.0081580877304077,grad_norm: 0.7798173066000141, iteration: 432639
loss: 0.9752532243728638,grad_norm: 0.8865337401090015, iteration: 432640
loss: 0.9276552796363831,grad_norm: 0.8212008049954774, iteration: 432641
loss: 1.0203198194503784,grad_norm: 0.7209706920024557, iteration: 432642
loss: 1.0090968608856201,grad_norm: 0.7492023237921626, iteration: 432643
loss: 0.9924820065498352,grad_norm: 0.635600663889648, iteration: 432644
loss: 0.9882771372795105,grad_norm: 0.7350700765614578, iteration: 432645
loss: 1.0731948614120483,grad_norm: 0.9999997813732109, iteration: 432646
loss: 1.0484200716018677,grad_norm: 0.6905781491289552, iteration: 432647
loss: 1.0191912651062012,grad_norm: 0.746964466504706, iteration: 432648
loss: 0.9478027820587158,grad_norm: 0.8526982366753861, iteration: 432649
loss: 0.9701811671257019,grad_norm: 0.6808346916399126, iteration: 432650
loss: 1.000424861907959,grad_norm: 0.7676427938487371, iteration: 432651
loss: 0.9646055698394775,grad_norm: 0.7847026202396503, iteration: 432652
loss: 1.0034369230270386,grad_norm: 0.7732452733756882, iteration: 432653
loss: 1.0017642974853516,grad_norm: 0.7956812481657408, iteration: 432654
loss: 1.0084116458892822,grad_norm: 0.784239912515561, iteration: 432655
loss: 1.0670676231384277,grad_norm: 0.9999994333169849, iteration: 432656
loss: 0.9662275314331055,grad_norm: 0.7677210630181217, iteration: 432657
loss: 0.9667741656303406,grad_norm: 0.7527203256603537, iteration: 432658
loss: 0.9612130522727966,grad_norm: 0.6706399265493383, iteration: 432659
loss: 1.0382752418518066,grad_norm: 0.9999991572573446, iteration: 432660
loss: 1.0179784297943115,grad_norm: 0.7982070895754627, iteration: 432661
loss: 1.1244841814041138,grad_norm: 0.9999992938004811, iteration: 432662
loss: 1.1714966297149658,grad_norm: 0.9999997516814243, iteration: 432663
loss: 0.9947196841239929,grad_norm: 0.8900617788098606, iteration: 432664
loss: 0.999843180179596,grad_norm: 0.764439534795129, iteration: 432665
loss: 0.9919142723083496,grad_norm: 0.5951112080689545, iteration: 432666
loss: 0.9809345006942749,grad_norm: 0.7960606845077854, iteration: 432667
loss: 0.9849696159362793,grad_norm: 0.6545765292244655, iteration: 432668
loss: 1.0098059177398682,grad_norm: 0.834845127966438, iteration: 432669
loss: 0.9881856441497803,grad_norm: 0.7913851260017798, iteration: 432670
loss: 1.021052360534668,grad_norm: 0.7420527500979957, iteration: 432671
loss: 1.0515401363372803,grad_norm: 0.8199397842696217, iteration: 432672
loss: 0.9998121857643127,grad_norm: 0.7163019133577787, iteration: 432673
loss: 0.9930717945098877,grad_norm: 0.7246894491594023, iteration: 432674
loss: 1.035645842552185,grad_norm: 0.789685465547425, iteration: 432675
loss: 0.9632683992385864,grad_norm: 0.8583224214226373, iteration: 432676
loss: 1.0765105485916138,grad_norm: 1.000000007707095, iteration: 432677
loss: 0.974270224571228,grad_norm: 0.8354719680556478, iteration: 432678
loss: 1.0196834802627563,grad_norm: 0.7105816391105296, iteration: 432679
loss: 0.9784716367721558,grad_norm: 0.8143846538861279, iteration: 432680
loss: 1.0532891750335693,grad_norm: 0.9999991726674244, iteration: 432681
loss: 1.0045974254608154,grad_norm: 0.6760094042509859, iteration: 432682
loss: 0.9821241497993469,grad_norm: 0.7618407616168219, iteration: 432683
loss: 1.0156307220458984,grad_norm: 0.7382571460952225, iteration: 432684
loss: 0.9516534209251404,grad_norm: 0.8186388260588355, iteration: 432685
loss: 0.9707949161529541,grad_norm: 0.9449434573329865, iteration: 432686
loss: 0.983529806137085,grad_norm: 0.8414385154916532, iteration: 432687
loss: 1.009969711303711,grad_norm: 0.7184553481148876, iteration: 432688
loss: 1.0140435695648193,grad_norm: 0.7366853479100828, iteration: 432689
loss: 0.9562539458274841,grad_norm: 0.9745836963117886, iteration: 432690
loss: 1.0100352764129639,grad_norm: 0.9417799923139099, iteration: 432691
loss: 1.0284274816513062,grad_norm: 0.8564576437422305, iteration: 432692
loss: 0.9992082715034485,grad_norm: 0.8765708059036703, iteration: 432693
loss: 0.9848368167877197,grad_norm: 0.8119935054763305, iteration: 432694
loss: 0.9936655163764954,grad_norm: 0.7771521176976266, iteration: 432695
loss: 1.0674587488174438,grad_norm: 0.9999997520366467, iteration: 432696
loss: 0.9883712530136108,grad_norm: 0.6774740871372752, iteration: 432697
loss: 0.9972941875457764,grad_norm: 0.8738266681008119, iteration: 432698
loss: 0.9956879615783691,grad_norm: 0.8407222144039171, iteration: 432699
loss: 0.9924911260604858,grad_norm: 0.7752440011638235, iteration: 432700
loss: 0.9983670711517334,grad_norm: 0.8061177866719758, iteration: 432701
loss: 1.0054006576538086,grad_norm: 0.7141885718491068, iteration: 432702
loss: 1.008017659187317,grad_norm: 0.7126645515858852, iteration: 432703
loss: 0.9996481537818909,grad_norm: 0.7229435310281901, iteration: 432704
loss: 0.9779154658317566,grad_norm: 0.6010794112532518, iteration: 432705
loss: 0.9866840839385986,grad_norm: 0.7027763776751335, iteration: 432706
loss: 0.9794601798057556,grad_norm: 0.9298124365854737, iteration: 432707
loss: 0.9506250023841858,grad_norm: 0.7339928615940043, iteration: 432708
loss: 1.0098565816879272,grad_norm: 0.7429095702615406, iteration: 432709
loss: 0.9818964600563049,grad_norm: 0.607361804914229, iteration: 432710
loss: 1.0261627435684204,grad_norm: 0.8318369867472484, iteration: 432711
loss: 0.9912503361701965,grad_norm: 0.8129668039017147, iteration: 432712
loss: 1.0194263458251953,grad_norm: 0.7927955712569791, iteration: 432713
loss: 1.0234787464141846,grad_norm: 0.9185819169139537, iteration: 432714
loss: 1.060572862625122,grad_norm: 0.999999826049368, iteration: 432715
loss: 1.0000509023666382,grad_norm: 0.7745387343595428, iteration: 432716
loss: 1.0185482501983643,grad_norm: 0.9999999220956222, iteration: 432717
loss: 1.0406274795532227,grad_norm: 0.7993396312468568, iteration: 432718
loss: 0.9816628694534302,grad_norm: 0.8092596136883797, iteration: 432719
loss: 1.2360923290252686,grad_norm: 0.9999996036286746, iteration: 432720
loss: 1.004494547843933,grad_norm: 0.7460679915378624, iteration: 432721
loss: 0.9847730994224548,grad_norm: 0.8111701200551394, iteration: 432722
loss: 0.9964901208877563,grad_norm: 0.7211489488019329, iteration: 432723
loss: 0.9761518836021423,grad_norm: 0.7291513498202382, iteration: 432724
loss: 0.9676955342292786,grad_norm: 0.7494417289392621, iteration: 432725
loss: 1.044122576713562,grad_norm: 0.999999866116164, iteration: 432726
loss: 0.9657484292984009,grad_norm: 0.7485534379215456, iteration: 432727
loss: 1.035843014717102,grad_norm: 0.9999992550202279, iteration: 432728
loss: 1.0008577108383179,grad_norm: 0.9999994466226672, iteration: 432729
loss: 1.0322983264923096,grad_norm: 0.7745209143574857, iteration: 432730
loss: 0.9781852960586548,grad_norm: 0.6703934262905723, iteration: 432731
loss: 0.9865922331809998,grad_norm: 0.8459220050911627, iteration: 432732
loss: 0.9849238991737366,grad_norm: 0.8891060718047643, iteration: 432733
loss: 0.9769527316093445,grad_norm: 0.7337514633187573, iteration: 432734
loss: 1.017490267753601,grad_norm: 0.9536022990692335, iteration: 432735
loss: 1.0286009311676025,grad_norm: 0.86654265151958, iteration: 432736
loss: 1.002734661102295,grad_norm: 0.6825310510927335, iteration: 432737
loss: 0.9852991104125977,grad_norm: 0.6973505827437525, iteration: 432738
loss: 0.984282910823822,grad_norm: 0.7038835102343974, iteration: 432739
loss: 1.0479016304016113,grad_norm: 0.7128306083950389, iteration: 432740
loss: 1.0011590719223022,grad_norm: 0.8109076655022284, iteration: 432741
loss: 0.9746119379997253,grad_norm: 0.7283656656478272, iteration: 432742
loss: 0.9934465289115906,grad_norm: 0.7660497803780558, iteration: 432743
loss: 1.022493600845337,grad_norm: 0.8255538853161228, iteration: 432744
loss: 0.9721936583518982,grad_norm: 0.821411127365862, iteration: 432745
loss: 1.0419971942901611,grad_norm: 0.7498566026104612, iteration: 432746
loss: 1.0302904844284058,grad_norm: 0.7094309570903132, iteration: 432747
loss: 1.0060884952545166,grad_norm: 0.9140568623160185, iteration: 432748
loss: 1.0815715789794922,grad_norm: 0.9549326460451611, iteration: 432749
loss: 0.9849094152450562,grad_norm: 0.8851317729256021, iteration: 432750
loss: 1.0307255983352661,grad_norm: 0.9999999889189883, iteration: 432751
loss: 1.0066022872924805,grad_norm: 0.9331907966938909, iteration: 432752
loss: 0.9944767951965332,grad_norm: 0.8009365747148062, iteration: 432753
loss: 1.0566127300262451,grad_norm: 0.9061178022788715, iteration: 432754
loss: 1.012935757637024,grad_norm: 0.8884331725225025, iteration: 432755
loss: 1.0406267642974854,grad_norm: 0.7390683369378114, iteration: 432756
loss: 0.9752905368804932,grad_norm: 0.6885549380191861, iteration: 432757
loss: 1.0023627281188965,grad_norm: 0.9213502249812201, iteration: 432758
loss: 0.9983068704605103,grad_norm: 0.7561367925636164, iteration: 432759
loss: 1.0050534009933472,grad_norm: 0.7220458435532624, iteration: 432760
loss: 0.9715304374694824,grad_norm: 0.8714944779195678, iteration: 432761
loss: 1.019481897354126,grad_norm: 0.7897709410045537, iteration: 432762
loss: 0.9916281700134277,grad_norm: 0.7283486696374952, iteration: 432763
loss: 0.9779258966445923,grad_norm: 0.7450845578614952, iteration: 432764
loss: 1.0062016248703003,grad_norm: 0.7593990388557289, iteration: 432765
loss: 0.996004045009613,grad_norm: 0.7756846505172152, iteration: 432766
loss: 0.9673911929130554,grad_norm: 0.7604863909504794, iteration: 432767
loss: 1.017979383468628,grad_norm: 0.8039234112359317, iteration: 432768
loss: 0.9722000956535339,grad_norm: 0.8370805567600074, iteration: 432769
loss: 1.0283701419830322,grad_norm: 0.7701471522232815, iteration: 432770
loss: 0.9660599231719971,grad_norm: 0.8859705830426412, iteration: 432771
loss: 1.0063707828521729,grad_norm: 0.8407107566104325, iteration: 432772
loss: 1.017303705215454,grad_norm: 0.7844024555508504, iteration: 432773
loss: 0.957568347454071,grad_norm: 0.9237378395700467, iteration: 432774
loss: 1.0115772485733032,grad_norm: 0.8266743499830483, iteration: 432775
loss: 1.0374835729599,grad_norm: 0.7813962134969127, iteration: 432776
loss: 1.0266674757003784,grad_norm: 0.9999993454572378, iteration: 432777
loss: 0.9754019379615784,grad_norm: 0.7578549253306522, iteration: 432778
loss: 1.024217128753662,grad_norm: 0.9441609450000171, iteration: 432779
loss: 0.9974516034126282,grad_norm: 0.7204447940421398, iteration: 432780
loss: 0.9848097562789917,grad_norm: 0.6378270658862217, iteration: 432781
loss: 1.0010473728179932,grad_norm: 0.8089021829508193, iteration: 432782
loss: 1.0069481134414673,grad_norm: 0.8306021374517637, iteration: 432783
loss: 0.9822179079055786,grad_norm: 0.8300722931157068, iteration: 432784
loss: 1.002685785293579,grad_norm: 0.6704808454137273, iteration: 432785
loss: 1.003095269203186,grad_norm: 0.7494301142141462, iteration: 432786
loss: 1.0121738910675049,grad_norm: 0.8249199735930186, iteration: 432787
loss: 1.0168875455856323,grad_norm: 0.7976242537060436, iteration: 432788
loss: 1.0211153030395508,grad_norm: 0.7763740512532477, iteration: 432789
loss: 0.9876253008842468,grad_norm: 0.7420271213274279, iteration: 432790
loss: 1.0102959871292114,grad_norm: 0.8063657413996582, iteration: 432791
loss: 0.9719011187553406,grad_norm: 0.7774833260627537, iteration: 432792
loss: 0.9831348061561584,grad_norm: 0.8456108178767046, iteration: 432793
loss: 1.023129940032959,grad_norm: 0.8004932700587001, iteration: 432794
loss: 1.0240530967712402,grad_norm: 0.6184368186276166, iteration: 432795
loss: 0.9901562333106995,grad_norm: 0.7948697218905632, iteration: 432796
loss: 0.9738808274269104,grad_norm: 0.8636558597944972, iteration: 432797
loss: 0.9979680180549622,grad_norm: 0.809940785288293, iteration: 432798
loss: 0.9997361898422241,grad_norm: 0.8770200949397831, iteration: 432799
loss: 0.9530437588691711,grad_norm: 0.7640081526843023, iteration: 432800
loss: 0.9747884273529053,grad_norm: 0.8672331284364226, iteration: 432801
loss: 0.9861970543861389,grad_norm: 0.6770820797538774, iteration: 432802
loss: 0.9865223169326782,grad_norm: 0.8026554895742033, iteration: 432803
loss: 0.9642539024353027,grad_norm: 0.661853998709803, iteration: 432804
loss: 1.002742886543274,grad_norm: 0.9999998020962297, iteration: 432805
loss: 0.9571959972381592,grad_norm: 0.9999992648751509, iteration: 432806
loss: 1.0309969186782837,grad_norm: 0.9999991488957889, iteration: 432807
loss: 1.024744987487793,grad_norm: 0.999999144526238, iteration: 432808
loss: 1.0202322006225586,grad_norm: 0.9999993331270095, iteration: 432809
loss: 0.9741706252098083,grad_norm: 0.868058603461333, iteration: 432810
loss: 0.9879950284957886,grad_norm: 0.7891001772284418, iteration: 432811
loss: 1.0090506076812744,grad_norm: 0.8895382188962102, iteration: 432812
loss: 0.9990437030792236,grad_norm: 0.77936983958816, iteration: 432813
loss: 0.9903801083564758,grad_norm: 0.7348530291891407, iteration: 432814
loss: 0.9834901690483093,grad_norm: 0.663354421091769, iteration: 432815
loss: 1.0167137384414673,grad_norm: 0.9696527130142794, iteration: 432816
loss: 1.015937328338623,grad_norm: 0.8416433655767274, iteration: 432817
loss: 1.011507272720337,grad_norm: 0.7287973822607461, iteration: 432818
loss: 0.996735692024231,grad_norm: 0.803004215083531, iteration: 432819
loss: 0.989406406879425,grad_norm: 0.8259537643550396, iteration: 432820
loss: 0.9661137461662292,grad_norm: 0.9849228179705644, iteration: 432821
loss: 1.0154249668121338,grad_norm: 0.8182658539544264, iteration: 432822
loss: 0.9742796421051025,grad_norm: 0.920896557239812, iteration: 432823
loss: 1.013878345489502,grad_norm: 0.685880821734279, iteration: 432824
loss: 0.9838976263999939,grad_norm: 0.7714018006569715, iteration: 432825
loss: 0.9778457283973694,grad_norm: 0.7984694615793348, iteration: 432826
loss: 1.008793830871582,grad_norm: 0.9835732160411131, iteration: 432827
loss: 0.9928929209709167,grad_norm: 0.883574880818774, iteration: 432828
loss: 1.0123294591903687,grad_norm: 0.7893463872609116, iteration: 432829
loss: 1.005942463874817,grad_norm: 0.858629252665189, iteration: 432830
loss: 1.0148518085479736,grad_norm: 0.6792622286182268, iteration: 432831
loss: 1.0337085723876953,grad_norm: 0.7835891612708631, iteration: 432832
loss: 0.9654551148414612,grad_norm: 0.8419461332288736, iteration: 432833
loss: 0.9799885749816895,grad_norm: 0.7602758086319916, iteration: 432834
loss: 1.0766985416412354,grad_norm: 0.9999990766056731, iteration: 432835
loss: 0.9921139478683472,grad_norm: 0.7056646921596739, iteration: 432836
loss: 1.0095070600509644,grad_norm: 0.9999994703136551, iteration: 432837
loss: 0.9817265272140503,grad_norm: 0.8751035548985721, iteration: 432838
loss: 1.039753794670105,grad_norm: 0.7739006868884747, iteration: 432839
loss: 0.9644707441329956,grad_norm: 0.7632504996993477, iteration: 432840
loss: 1.034698486328125,grad_norm: 0.7705183747573885, iteration: 432841
loss: 1.012769341468811,grad_norm: 0.7484369475734195, iteration: 432842
loss: 1.0182178020477295,grad_norm: 0.7921829454682906, iteration: 432843
loss: 0.979406476020813,grad_norm: 0.7245186130921634, iteration: 432844
loss: 0.9747791290283203,grad_norm: 0.8370426385961376, iteration: 432845
loss: 0.9908604621887207,grad_norm: 0.8390883451831329, iteration: 432846
loss: 1.0120247602462769,grad_norm: 0.7060979666378171, iteration: 432847
loss: 0.9959127902984619,grad_norm: 0.7934827929509782, iteration: 432848
loss: 0.9845414757728577,grad_norm: 0.9999991827948336, iteration: 432849
loss: 1.012178897857666,grad_norm: 0.8304730983163815, iteration: 432850
loss: 1.0087080001831055,grad_norm: 0.9276477422731679, iteration: 432851
loss: 0.9721716642379761,grad_norm: 0.7998760754271166, iteration: 432852
loss: 1.022139072418213,grad_norm: 0.8978062377724246, iteration: 432853
loss: 1.0603314638137817,grad_norm: 0.9228463480571829, iteration: 432854
loss: 1.0359975099563599,grad_norm: 0.7797977644001322, iteration: 432855
loss: 1.023845911026001,grad_norm: 0.7356990777043569, iteration: 432856
loss: 1.0171735286712646,grad_norm: 0.9153712180333808, iteration: 432857
loss: 0.9994125366210938,grad_norm: 1.0000000037032701, iteration: 432858
loss: 0.9988468885421753,grad_norm: 0.7903368384427625, iteration: 432859
loss: 1.0146377086639404,grad_norm: 0.9718211855930929, iteration: 432860
loss: 1.028511881828308,grad_norm: 0.9999999350893823, iteration: 432861
loss: 1.0276095867156982,grad_norm: 0.7296562132355568, iteration: 432862
loss: 0.9971309304237366,grad_norm: 0.7225788714417253, iteration: 432863
loss: 0.9931779503822327,grad_norm: 0.8462925371366015, iteration: 432864
loss: 1.0451585054397583,grad_norm: 0.8316210293210459, iteration: 432865
loss: 0.9782189726829529,grad_norm: 0.7950408415162389, iteration: 432866
loss: 0.9990440607070923,grad_norm: 0.862255211414122, iteration: 432867
loss: 1.0099104642868042,grad_norm: 0.9999990987060359, iteration: 432868
loss: 0.9948251843452454,grad_norm: 0.7226993762920967, iteration: 432869
loss: 1.0178769826889038,grad_norm: 0.8052661539245551, iteration: 432870
loss: 0.9788831472396851,grad_norm: 0.8566117979401131, iteration: 432871
loss: 1.0127631425857544,grad_norm: 0.9174368567667768, iteration: 432872
loss: 0.9849076271057129,grad_norm: 0.9283752495770222, iteration: 432873
loss: 0.9799633622169495,grad_norm: 0.6795283487222097, iteration: 432874
loss: 0.9807058572769165,grad_norm: 0.7898492040463851, iteration: 432875
loss: 0.9716505408287048,grad_norm: 0.7421253566053819, iteration: 432876
loss: 1.0792019367218018,grad_norm: 0.999999412764733, iteration: 432877
loss: 1.0110057592391968,grad_norm: 0.6410781830822843, iteration: 432878
loss: 1.046085000038147,grad_norm: 0.7575370796979793, iteration: 432879
loss: 1.0081292390823364,grad_norm: 0.8526240511453387, iteration: 432880
loss: 0.9730777740478516,grad_norm: 0.8291729959889993, iteration: 432881
loss: 1.035827875137329,grad_norm: 0.8316167712537096, iteration: 432882
loss: 0.9714555144309998,grad_norm: 0.8600141639767, iteration: 432883
loss: 0.9531235098838806,grad_norm: 0.8452113811698376, iteration: 432884
loss: 1.031972885131836,grad_norm: 0.7387673279486312, iteration: 432885
loss: 1.0480304956436157,grad_norm: 0.8605953287604728, iteration: 432886
loss: 1.006016731262207,grad_norm: 0.999999466009743, iteration: 432887
loss: 0.9986066222190857,grad_norm: 0.788484180712797, iteration: 432888
loss: 0.9924672842025757,grad_norm: 0.9616787084013485, iteration: 432889
loss: 1.0366276502609253,grad_norm: 0.9832424788953105, iteration: 432890
loss: 1.0098011493682861,grad_norm: 0.7886878450002496, iteration: 432891
loss: 1.0275062322616577,grad_norm: 0.7420310680046955, iteration: 432892
loss: 0.9998990297317505,grad_norm: 0.8236804790533737, iteration: 432893
loss: 1.0238016843795776,grad_norm: 0.8861805859343849, iteration: 432894
loss: 1.0081076622009277,grad_norm: 0.7003136528649692, iteration: 432895
loss: 1.0405360460281372,grad_norm: 0.7813685945274277, iteration: 432896
loss: 1.0078647136688232,grad_norm: 0.8051133835870341, iteration: 432897
loss: 1.0911036729812622,grad_norm: 0.8224968760978579, iteration: 432898
loss: 1.01187264919281,grad_norm: 0.7514795006123289, iteration: 432899
loss: 0.9839407801628113,grad_norm: 0.860811031366318, iteration: 432900
loss: 1.0011980533599854,grad_norm: 0.727222234857982, iteration: 432901
loss: 1.0012636184692383,grad_norm: 0.7540758165784511, iteration: 432902
loss: 1.0277327299118042,grad_norm: 0.856610075674027, iteration: 432903
loss: 0.9668257832527161,grad_norm: 0.8886432625892997, iteration: 432904
loss: 1.0381609201431274,grad_norm: 0.7734814514431607, iteration: 432905
loss: 0.9740784168243408,grad_norm: 0.6230255909996827, iteration: 432906
loss: 1.0016107559204102,grad_norm: 0.9492343206032788, iteration: 432907
loss: 0.9806874394416809,grad_norm: 0.8463902185064519, iteration: 432908
loss: 1.0375577211380005,grad_norm: 0.9999992104848568, iteration: 432909
loss: 1.0278685092926025,grad_norm: 0.917858017419377, iteration: 432910
loss: 1.0665676593780518,grad_norm: 0.8352066588815032, iteration: 432911
loss: 1.0162906646728516,grad_norm: 0.8240871806887419, iteration: 432912
loss: 0.9716901779174805,grad_norm: 0.7568321646593573, iteration: 432913
loss: 1.0709917545318604,grad_norm: 0.9583056403404325, iteration: 432914
loss: 1.01692533493042,grad_norm: 0.9019757660841146, iteration: 432915
loss: 1.0517271757125854,grad_norm: 0.8856145047344105, iteration: 432916
loss: 1.0872920751571655,grad_norm: 0.9999998547852338, iteration: 432917
loss: 1.0218602418899536,grad_norm: 0.857298087628509, iteration: 432918
loss: 0.9923054575920105,grad_norm: 0.7532579969844261, iteration: 432919
loss: 0.9795282483100891,grad_norm: 0.7285334031299978, iteration: 432920
loss: 1.037218689918518,grad_norm: 0.8851698119952502, iteration: 432921
loss: 0.9526883363723755,grad_norm: 0.7509949107774688, iteration: 432922
loss: 1.0359604358673096,grad_norm: 0.8129562527406043, iteration: 432923
loss: 0.9725514650344849,grad_norm: 0.7183675445118208, iteration: 432924
loss: 1.0076749324798584,grad_norm: 0.8199244173958188, iteration: 432925
loss: 1.0253475904464722,grad_norm: 0.8111210202848014, iteration: 432926
loss: 1.0514951944351196,grad_norm: 0.9999990848563578, iteration: 432927
loss: 1.040724754333496,grad_norm: 0.7434278459694683, iteration: 432928
loss: 0.9860166311264038,grad_norm: 0.7860227049005192, iteration: 432929
loss: 1.016086459159851,grad_norm: 0.651275716209642, iteration: 432930
loss: 0.9707658886909485,grad_norm: 0.7389172394060052, iteration: 432931
loss: 1.033611536026001,grad_norm: 0.9162475945856801, iteration: 432932
loss: 1.0727161169052124,grad_norm: 0.9999999006772029, iteration: 432933
loss: 1.1033077239990234,grad_norm: 0.999999413753116, iteration: 432934
loss: 0.9843615293502808,grad_norm: 0.7321907450133218, iteration: 432935
loss: 0.9415127635002136,grad_norm: 0.7246718080501581, iteration: 432936
loss: 1.020403265953064,grad_norm: 0.9999990938498067, iteration: 432937
loss: 0.98225998878479,grad_norm: 0.7872589756829218, iteration: 432938
loss: 1.0081582069396973,grad_norm: 0.9999990244760506, iteration: 432939
loss: 1.0082018375396729,grad_norm: 0.8465458105194187, iteration: 432940
loss: 1.0282344818115234,grad_norm: 0.7243966830899975, iteration: 432941
loss: 1.0033419132232666,grad_norm: 0.8524161628574606, iteration: 432942
loss: 0.9596641063690186,grad_norm: 0.815075474106574, iteration: 432943
loss: 0.9977970123291016,grad_norm: 0.8648724939066786, iteration: 432944
loss: 1.0119074583053589,grad_norm: 0.7698667003579938, iteration: 432945
loss: 0.98799729347229,grad_norm: 0.999999558354734, iteration: 432946
loss: 0.9966412782669067,grad_norm: 0.690957660975227, iteration: 432947
loss: 1.0063616037368774,grad_norm: 0.682618025595677, iteration: 432948
loss: 0.9920148849487305,grad_norm: 0.7519223200596973, iteration: 432949
loss: 1.0105372667312622,grad_norm: 0.8578699950695394, iteration: 432950
loss: 0.9376676678657532,grad_norm: 0.7572601082272523, iteration: 432951
loss: 1.0032724142074585,grad_norm: 0.9953782426473125, iteration: 432952
loss: 0.980378270149231,grad_norm: 0.7913861921788964, iteration: 432953
loss: 1.0298463106155396,grad_norm: 0.9999997546738353, iteration: 432954
loss: 0.9724324345588684,grad_norm: 0.7714779145114188, iteration: 432955
loss: 1.051385521888733,grad_norm: 0.829018421047738, iteration: 432956
loss: 0.9935628771781921,grad_norm: 0.8636092216894905, iteration: 432957
loss: 0.9933094382286072,grad_norm: 0.816645479284406, iteration: 432958
loss: 1.0418877601623535,grad_norm: 0.8668530788462423, iteration: 432959
loss: 1.0380743741989136,grad_norm: 0.73251116231892, iteration: 432960
loss: 0.9675663709640503,grad_norm: 0.6306913057553417, iteration: 432961
loss: 1.012171983718872,grad_norm: 0.7358013471334393, iteration: 432962
loss: 1.0182528495788574,grad_norm: 0.9999998742761385, iteration: 432963
loss: 1.0519778728485107,grad_norm: 0.8314135019299573, iteration: 432964
loss: 0.9814813137054443,grad_norm: 0.7366796366554141, iteration: 432965
loss: 1.0078954696655273,grad_norm: 0.9550145514506703, iteration: 432966
loss: 1.0346678495407104,grad_norm: 0.8240306439751165, iteration: 432967
loss: 1.0024553537368774,grad_norm: 0.9999990392940681, iteration: 432968
loss: 0.9814196228981018,grad_norm: 0.8904475323894729, iteration: 432969
loss: 0.9652789831161499,grad_norm: 0.8480533689529722, iteration: 432970
loss: 0.967954695224762,grad_norm: 0.6805776777283293, iteration: 432971
loss: 1.0053844451904297,grad_norm: 0.7676836828270193, iteration: 432972
loss: 0.9759101271629333,grad_norm: 0.7671248085211188, iteration: 432973
loss: 0.9740357995033264,grad_norm: 0.678101984244159, iteration: 432974
loss: 1.1574145555496216,grad_norm: 0.9999991384601229, iteration: 432975
loss: 1.0388336181640625,grad_norm: 0.8343122516769509, iteration: 432976
loss: 0.9527906775474548,grad_norm: 0.6742979552285618, iteration: 432977
loss: 1.015068769454956,grad_norm: 0.667527591247787, iteration: 432978
loss: 1.0577380657196045,grad_norm: 0.9999990560788751, iteration: 432979
loss: 1.0132412910461426,grad_norm: 0.8725609651702124, iteration: 432980
loss: 0.9918825030326843,grad_norm: 0.9999991203170123, iteration: 432981
loss: 1.0315662622451782,grad_norm: 0.8443990430682605, iteration: 432982
loss: 1.0109814405441284,grad_norm: 0.814953558927761, iteration: 432983
loss: 1.038741111755371,grad_norm: 0.8050044719553536, iteration: 432984
loss: 1.0375643968582153,grad_norm: 0.8853706928718582, iteration: 432985
loss: 0.9887089729309082,grad_norm: 0.8815262837854856, iteration: 432986
loss: 1.0156497955322266,grad_norm: 0.7787795311260679, iteration: 432987
loss: 0.9975138902664185,grad_norm: 0.8994296233584123, iteration: 432988
loss: 0.9891420006752014,grad_norm: 0.9999990680948896, iteration: 432989
loss: 1.0265413522720337,grad_norm: 0.7732046078218792, iteration: 432990
loss: 0.9783686399459839,grad_norm: 0.8077972916473923, iteration: 432991
loss: 1.0173288583755493,grad_norm: 0.8065661245797071, iteration: 432992
loss: 0.9706871509552002,grad_norm: 0.7645870118612472, iteration: 432993
loss: 1.009827971458435,grad_norm: 0.8900483250629176, iteration: 432994
loss: 0.9902430176734924,grad_norm: 0.9650819795250435, iteration: 432995
loss: 0.973868727684021,grad_norm: 0.7531927654576362, iteration: 432996
loss: 1.0144245624542236,grad_norm: 0.7382486982350617, iteration: 432997
loss: 0.9809018969535828,grad_norm: 0.8539247324276162, iteration: 432998
loss: 0.9876821041107178,grad_norm: 0.8193971027096447, iteration: 432999
loss: 0.9910112619400024,grad_norm: 0.8706563717755493, iteration: 433000
loss: 0.995535671710968,grad_norm: 0.9999992555129391, iteration: 433001
loss: 0.9973207712173462,grad_norm: 0.8444010246071991, iteration: 433002
loss: 0.9851399064064026,grad_norm: 0.6565432257137254, iteration: 433003
loss: 1.0053350925445557,grad_norm: 0.7157133877352173, iteration: 433004
loss: 1.124705195426941,grad_norm: 0.8359537701924279, iteration: 433005
loss: 1.013351559638977,grad_norm: 0.999999324965483, iteration: 433006
loss: 1.0353347063064575,grad_norm: 0.9124452593767512, iteration: 433007
loss: 0.9958968758583069,grad_norm: 0.8976774729497257, iteration: 433008
loss: 1.0206494331359863,grad_norm: 0.8822003902968728, iteration: 433009
loss: 1.1005611419677734,grad_norm: 0.8010510434590915, iteration: 433010
loss: 1.033555030822754,grad_norm: 0.7576474800706686, iteration: 433011
loss: 0.9896470904350281,grad_norm: 0.7641155571296339, iteration: 433012
loss: 0.9883019328117371,grad_norm: 0.7007923282960877, iteration: 433013
loss: 1.0076321363449097,grad_norm: 0.7861192777285992, iteration: 433014
loss: 0.9394437074661255,grad_norm: 0.8365027890234873, iteration: 433015
loss: 1.0128263235092163,grad_norm: 0.7158619152076443, iteration: 433016
loss: 1.0428109169006348,grad_norm: 0.999998998552957, iteration: 433017
loss: 0.9937021136283875,grad_norm: 0.6372148791647058, iteration: 433018
loss: 1.0125916004180908,grad_norm: 0.8791160042386332, iteration: 433019
loss: 0.9844195246696472,grad_norm: 0.7517383100497516, iteration: 433020
loss: 1.0142186880111694,grad_norm: 0.8049999325054674, iteration: 433021
loss: 0.9694051146507263,grad_norm: 0.7330591006932664, iteration: 433022
loss: 1.018410563468933,grad_norm: 0.8813753681037816, iteration: 433023
loss: 0.9630350470542908,grad_norm: 0.8072905758569939, iteration: 433024
loss: 1.0126749277114868,grad_norm: 0.7770241339105723, iteration: 433025
loss: 0.9825649261474609,grad_norm: 0.9089415886387007, iteration: 433026
loss: 0.9806204438209534,grad_norm: 0.783647722106742, iteration: 433027
loss: 1.0222208499908447,grad_norm: 0.712870714564967, iteration: 433028
loss: 1.0362310409545898,grad_norm: 0.8871794410007613, iteration: 433029
loss: 0.9777346253395081,grad_norm: 0.6951413361754791, iteration: 433030
loss: 0.981356143951416,grad_norm: 0.944084860973454, iteration: 433031
loss: 0.9780480861663818,grad_norm: 0.8946458371363973, iteration: 433032
loss: 0.9989508986473083,grad_norm: 0.9999999258338172, iteration: 433033
loss: 1.0560736656188965,grad_norm: 0.8351320142384998, iteration: 433034
loss: 1.0886653661727905,grad_norm: 0.7671350977421345, iteration: 433035
loss: 1.0375090837478638,grad_norm: 0.6965785923662754, iteration: 433036
loss: 0.9963927268981934,grad_norm: 0.9999990996544257, iteration: 433037
loss: 1.0227088928222656,grad_norm: 0.8518974796383894, iteration: 433038
loss: 0.9854280352592468,grad_norm: 0.7882078107831798, iteration: 433039
loss: 1.2074319124221802,grad_norm: 0.9999996914673833, iteration: 433040
loss: 0.9671809077262878,grad_norm: 0.7905174165867436, iteration: 433041
loss: 1.0402789115905762,grad_norm: 0.7913737868859771, iteration: 433042
loss: 1.0350968837738037,grad_norm: 0.8001431808532714, iteration: 433043
loss: 1.0084525346755981,grad_norm: 0.7541277810261131, iteration: 433044
loss: 0.9788654446601868,grad_norm: 0.7858124629575651, iteration: 433045
loss: 1.0213993787765503,grad_norm: 0.9999994082787329, iteration: 433046
loss: 0.9890857934951782,grad_norm: 0.773433946027733, iteration: 433047
loss: 0.9588263034820557,grad_norm: 0.7610432918337814, iteration: 433048
loss: 0.9851296544075012,grad_norm: 0.6275090877937928, iteration: 433049
loss: 1.0031285285949707,grad_norm: 0.7817490478417417, iteration: 433050
loss: 1.0693869590759277,grad_norm: 0.9999994999964199, iteration: 433051
loss: 0.9587498903274536,grad_norm: 0.9988692864374795, iteration: 433052
loss: 0.9618641138076782,grad_norm: 0.6798407423919147, iteration: 433053
loss: 1.0480934381484985,grad_norm: 0.9380133915471197, iteration: 433054
loss: 1.020063042640686,grad_norm: 0.729927380029623, iteration: 433055
loss: 1.0067273378372192,grad_norm: 0.8092949563542283, iteration: 433056
loss: 1.0081202983856201,grad_norm: 0.6520480625687428, iteration: 433057
loss: 1.0149434804916382,grad_norm: 0.7395362105635488, iteration: 433058
loss: 0.9551596641540527,grad_norm: 0.7290048288862789, iteration: 433059
loss: 1.0265734195709229,grad_norm: 0.9999991473540083, iteration: 433060
loss: 1.0046017169952393,grad_norm: 0.8191858225094211, iteration: 433061
loss: 0.9748652577400208,grad_norm: 0.9006945674850487, iteration: 433062
loss: 1.0341107845306396,grad_norm: 0.789716781664903, iteration: 433063
loss: 0.9619659185409546,grad_norm: 0.7481804649497007, iteration: 433064
loss: 1.0298938751220703,grad_norm: 0.7205730471730977, iteration: 433065
loss: 0.9957101345062256,grad_norm: 0.8005407589679068, iteration: 433066
loss: 0.9819849729537964,grad_norm: 0.7398651247912542, iteration: 433067
loss: 1.028270959854126,grad_norm: 0.9151615032734036, iteration: 433068
loss: 1.06923246383667,grad_norm: 0.9999990903741125, iteration: 433069
loss: 1.03849196434021,grad_norm: 0.9999991045998753, iteration: 433070
loss: 1.0078274011611938,grad_norm: 0.9999994730083068, iteration: 433071
loss: 1.0254952907562256,grad_norm: 0.7927179905224444, iteration: 433072
loss: 0.9825364947319031,grad_norm: 0.6476281833985225, iteration: 433073
loss: 0.9822894334793091,grad_norm: 0.7784799388779059, iteration: 433074
loss: 0.9754572510719299,grad_norm: 0.7998251602706192, iteration: 433075
loss: 0.9734976887702942,grad_norm: 0.8811026844463749, iteration: 433076
loss: 1.0194545984268188,grad_norm: 0.9235596702448138, iteration: 433077
loss: 0.9977595210075378,grad_norm: 0.5859763234034737, iteration: 433078
loss: 0.9687565565109253,grad_norm: 0.846936515240268, iteration: 433079
loss: 0.9789260029792786,grad_norm: 0.6150975809988951, iteration: 433080
loss: 0.9923555254936218,grad_norm: 0.6917432509697314, iteration: 433081
loss: 0.965615451335907,grad_norm: 0.9999991980224872, iteration: 433082
loss: 1.0026392936706543,grad_norm: 0.7451896860842266, iteration: 433083
loss: 1.0463454723358154,grad_norm: 0.9999996791616065, iteration: 433084
loss: 1.0653482675552368,grad_norm: 0.9287369567699609, iteration: 433085
loss: 0.9985542893409729,grad_norm: 0.9760964272921557, iteration: 433086
loss: 0.9636098146438599,grad_norm: 0.8013441607664218, iteration: 433087
loss: 0.9892733693122864,grad_norm: 0.626831607858408, iteration: 433088
loss: 0.9773744940757751,grad_norm: 0.8256994482828383, iteration: 433089
loss: 1.0245001316070557,grad_norm: 0.8688913145875539, iteration: 433090
loss: 1.0177273750305176,grad_norm: 0.7889248310982012, iteration: 433091
loss: 0.9898030757904053,grad_norm: 0.7333355558639845, iteration: 433092
loss: 0.9805852770805359,grad_norm: 0.6816156808182641, iteration: 433093
loss: 1.0609277486801147,grad_norm: 0.9484489742978579, iteration: 433094
loss: 0.9980394840240479,grad_norm: 0.8994754144024745, iteration: 433095
loss: 1.0057286024093628,grad_norm: 0.7006797723422971, iteration: 433096
loss: 0.9848465323448181,grad_norm: 0.8587063432268927, iteration: 433097
loss: 0.973889172077179,grad_norm: 0.8859895823624092, iteration: 433098
loss: 0.9893128275871277,grad_norm: 0.8439290831378051, iteration: 433099
loss: 1.0440598726272583,grad_norm: 0.7789383872607133, iteration: 433100
loss: 0.9876652359962463,grad_norm: 0.8524815487038826, iteration: 433101
loss: 1.0177329778671265,grad_norm: 0.8881551723871066, iteration: 433102
loss: 0.9838553071022034,grad_norm: 0.8450413644688642, iteration: 433103
loss: 1.0316344499588013,grad_norm: 0.7914142075239475, iteration: 433104
loss: 1.005083680152893,grad_norm: 0.5972300210173282, iteration: 433105
loss: 1.0125412940979004,grad_norm: 0.8096364795550614, iteration: 433106
loss: 0.9848383665084839,grad_norm: 0.71795332124925, iteration: 433107
loss: 1.0242863893508911,grad_norm: 0.6522871844115076, iteration: 433108
loss: 0.9911099672317505,grad_norm: 0.8580362982657965, iteration: 433109
loss: 1.0763089656829834,grad_norm: 0.9999995645146216, iteration: 433110
loss: 1.0130164623260498,grad_norm: 0.8871138745022754, iteration: 433111
loss: 1.0917032957077026,grad_norm: 0.9999999057874752, iteration: 433112
loss: 1.057877540588379,grad_norm: 0.9999996860845458, iteration: 433113
loss: 1.0054600238800049,grad_norm: 0.7403978765634726, iteration: 433114
loss: 1.0271258354187012,grad_norm: 0.9861058041352977, iteration: 433115
loss: 0.9732881784439087,grad_norm: 0.8370067575722776, iteration: 433116
loss: 0.9983143210411072,grad_norm: 0.7659075803588165, iteration: 433117
loss: 1.0163236856460571,grad_norm: 0.9749120359769841, iteration: 433118
loss: 1.0898817777633667,grad_norm: 0.8458557482696235, iteration: 433119
loss: 0.9563385248184204,grad_norm: 0.6977589768988411, iteration: 433120
loss: 0.9854887127876282,grad_norm: 0.7736677522338767, iteration: 433121
loss: 0.9814057350158691,grad_norm: 0.6975731669154773, iteration: 433122
loss: 0.9854333400726318,grad_norm: 0.6857305358937529, iteration: 433123
loss: 0.9972028136253357,grad_norm: 0.7229478990799879, iteration: 433124
loss: 1.0205751657485962,grad_norm: 0.9764293554567717, iteration: 433125
loss: 0.9841265082359314,grad_norm: 0.7274343968926893, iteration: 433126
loss: 0.9966754913330078,grad_norm: 0.7513594918532824, iteration: 433127
loss: 1.003147006034851,grad_norm: 0.6591492260987786, iteration: 433128
loss: 0.9490630626678467,grad_norm: 0.6773234437132291, iteration: 433129
loss: 1.0151658058166504,grad_norm: 0.8662716625398029, iteration: 433130
loss: 1.0246201753616333,grad_norm: 0.9028897517762654, iteration: 433131
loss: 1.0065815448760986,grad_norm: 0.8907015861285794, iteration: 433132
loss: 1.0018641948699951,grad_norm: 0.6689039923136637, iteration: 433133
loss: 1.0201539993286133,grad_norm: 0.6650867928879354, iteration: 433134
loss: 0.994166910648346,grad_norm: 0.8248194999603026, iteration: 433135
loss: 1.0536381006240845,grad_norm: 0.9999991652871913, iteration: 433136
loss: 1.0294756889343262,grad_norm: 0.8118291899787483, iteration: 433137
loss: 1.0054339170455933,grad_norm: 0.8968676517812639, iteration: 433138
loss: 1.0168864727020264,grad_norm: 0.9246284039558442, iteration: 433139
loss: 1.0365474224090576,grad_norm: 0.8496278411832018, iteration: 433140
loss: 1.0138300657272339,grad_norm: 0.7595206642032446, iteration: 433141
loss: 0.9867922067642212,grad_norm: 0.6917396640095295, iteration: 433142
loss: 1.012629747390747,grad_norm: 0.99999968821254, iteration: 433143
loss: 0.9841710925102234,grad_norm: 0.7870396999013805, iteration: 433144
loss: 0.9855840802192688,grad_norm: 0.6786442480753205, iteration: 433145
loss: 1.0699902772903442,grad_norm: 0.7343920279143971, iteration: 433146
loss: 0.9981333613395691,grad_norm: 0.6709024388861403, iteration: 433147
loss: 0.9796807169914246,grad_norm: 0.7440576061534311, iteration: 433148
loss: 1.0001729726791382,grad_norm: 0.8636714351900009, iteration: 433149
loss: 1.0159223079681396,grad_norm: 0.8279296901017168, iteration: 433150
loss: 1.0230978727340698,grad_norm: 0.8307867089336094, iteration: 433151
loss: 1.0140680074691772,grad_norm: 0.9999990694867834, iteration: 433152
loss: 0.979820966720581,grad_norm: 0.8660331139986196, iteration: 433153
loss: 0.9693046808242798,grad_norm: 0.661192065086211, iteration: 433154
loss: 1.0206626653671265,grad_norm: 0.7378492752793213, iteration: 433155
loss: 0.9775486588478088,grad_norm: 0.893525785025436, iteration: 433156
loss: 1.0261032581329346,grad_norm: 0.798705327145393, iteration: 433157
loss: 1.0259239673614502,grad_norm: 0.983994057878002, iteration: 433158
loss: 1.0321511030197144,grad_norm: 0.7936257052357271, iteration: 433159
loss: 1.0016229152679443,grad_norm: 0.6401085845591407, iteration: 433160
loss: 1.0175039768218994,grad_norm: 0.643826901697596, iteration: 433161
loss: 1.0195785760879517,grad_norm: 0.9761222819813923, iteration: 433162
loss: 1.0244767665863037,grad_norm: 0.8540582375778204, iteration: 433163
loss: 1.0648914575576782,grad_norm: 0.7614898939638747, iteration: 433164
loss: 0.9987826347351074,grad_norm: 0.7079097076413784, iteration: 433165
loss: 1.0276484489440918,grad_norm: 0.8845359591644845, iteration: 433166
loss: 1.0381706953048706,grad_norm: 0.9999996504836711, iteration: 433167
loss: 0.9852339625358582,grad_norm: 0.8515878242045257, iteration: 433168
loss: 1.0039366483688354,grad_norm: 0.877440117314998, iteration: 433169
loss: 1.0403295755386353,grad_norm: 0.9999991318744104, iteration: 433170
loss: 0.9917008876800537,grad_norm: 0.9236388226284982, iteration: 433171
loss: 1.0453779697418213,grad_norm: 0.8720545425463173, iteration: 433172
loss: 1.0480419397354126,grad_norm: 0.8737632894262197, iteration: 433173
loss: 0.9929348230361938,grad_norm: 0.769807006519316, iteration: 433174
loss: 1.0075013637542725,grad_norm: 0.9837040953081975, iteration: 433175
loss: 0.9719846844673157,grad_norm: 0.7080501076700578, iteration: 433176
loss: 0.9524580240249634,grad_norm: 0.7821102469530101, iteration: 433177
loss: 1.0276328325271606,grad_norm: 0.653156006622263, iteration: 433178
loss: 1.0339617729187012,grad_norm: 0.8572727905547365, iteration: 433179
loss: 0.9936814308166504,grad_norm: 0.7002137468150761, iteration: 433180
loss: 0.9834752082824707,grad_norm: 0.6501722985277324, iteration: 433181
loss: 0.9734514355659485,grad_norm: 0.8305688327468582, iteration: 433182
loss: 1.0020397901535034,grad_norm: 0.8689671537912621, iteration: 433183
loss: 1.0043717622756958,grad_norm: 0.9999999332765688, iteration: 433184
loss: 0.9434354901313782,grad_norm: 0.8359554665925178, iteration: 433185
loss: 0.9902567267417908,grad_norm: 0.8085032968848234, iteration: 433186
loss: 0.99430251121521,grad_norm: 0.6845799905885938, iteration: 433187
loss: 1.013736605644226,grad_norm: 0.7994131847750426, iteration: 433188
loss: 1.0157397985458374,grad_norm: 0.6386271623772977, iteration: 433189
loss: 0.9607958197593689,grad_norm: 0.6854778466108042, iteration: 433190
loss: 1.01119863986969,grad_norm: 0.8398765768044961, iteration: 433191
loss: 1.0249590873718262,grad_norm: 0.689382141841965, iteration: 433192
loss: 1.0022622346878052,grad_norm: 0.8857928570771809, iteration: 433193
loss: 1.0255461931228638,grad_norm: 0.715068921779121, iteration: 433194
loss: 1.0105088949203491,grad_norm: 0.7641535369805948, iteration: 433195
loss: 0.9935029149055481,grad_norm: 0.7182631618097642, iteration: 433196
loss: 1.013948917388916,grad_norm: 0.9021400883920992, iteration: 433197
loss: 1.0185881853103638,grad_norm: 0.6652876302995794, iteration: 433198
loss: 1.0501363277435303,grad_norm: 0.8215935888978645, iteration: 433199
loss: 0.9946432113647461,grad_norm: 0.7155771225058722, iteration: 433200
loss: 1.0154072046279907,grad_norm: 0.7033030658901944, iteration: 433201
loss: 0.9827262759208679,grad_norm: 0.8942570032037627, iteration: 433202
loss: 1.0125828981399536,grad_norm: 0.7658273734456899, iteration: 433203
loss: 1.027560830116272,grad_norm: 0.7324775459366999, iteration: 433204
loss: 0.9956420063972473,grad_norm: 0.9237558366831433, iteration: 433205
loss: 0.9615820050239563,grad_norm: 0.9999990779278886, iteration: 433206
loss: 0.9895084500312805,grad_norm: 0.6593888965314192, iteration: 433207
loss: 1.0118520259857178,grad_norm: 0.6703004953683982, iteration: 433208
loss: 1.000906229019165,grad_norm: 0.7849172780010604, iteration: 433209
loss: 1.0210497379302979,grad_norm: 0.9999991677235555, iteration: 433210
loss: 0.9824414253234863,grad_norm: 0.8633162569067859, iteration: 433211
loss: 1.0261906385421753,grad_norm: 0.784697823189607, iteration: 433212
loss: 1.0165612697601318,grad_norm: 0.787063004766085, iteration: 433213
loss: 0.9991758465766907,grad_norm: 0.7997876777378656, iteration: 433214
loss: 1.0656702518463135,grad_norm: 0.9999993177606387, iteration: 433215
loss: 1.0238597393035889,grad_norm: 0.7670024217653745, iteration: 433216
loss: 1.0102766752243042,grad_norm: 0.873476800722773, iteration: 433217
loss: 0.9735410809516907,grad_norm: 0.7824246097436067, iteration: 433218
loss: 1.0221143960952759,grad_norm: 0.6750334718346195, iteration: 433219
loss: 1.0009078979492188,grad_norm: 0.8834298959006962, iteration: 433220
loss: 1.0176544189453125,grad_norm: 0.6951605027711713, iteration: 433221
loss: 1.0279184579849243,grad_norm: 0.8230273931709948, iteration: 433222
loss: 0.9974705576896667,grad_norm: 0.8353164434184748, iteration: 433223
loss: 1.0080962181091309,grad_norm: 0.7172220756068428, iteration: 433224
loss: 0.9873542785644531,grad_norm: 0.8357166347055326, iteration: 433225
loss: 1.0616724491119385,grad_norm: 0.9999994487774779, iteration: 433226
loss: 1.015550971031189,grad_norm: 0.9999997973445034, iteration: 433227
loss: 0.9969527125358582,grad_norm: 0.7504244289640148, iteration: 433228
loss: 0.9993963241577148,grad_norm: 0.9999992396496846, iteration: 433229
loss: 1.045701265335083,grad_norm: 0.8091552598445949, iteration: 433230
loss: 1.0156010389328003,grad_norm: 0.7504718206431304, iteration: 433231
loss: 0.988090991973877,grad_norm: 0.7308103749185133, iteration: 433232
loss: 0.9896245002746582,grad_norm: 0.8901227599519952, iteration: 433233
loss: 1.0251013040542603,grad_norm: 0.8766938478405727, iteration: 433234
loss: 1.0136406421661377,grad_norm: 0.6831916147955056, iteration: 433235
loss: 0.9956814050674438,grad_norm: 0.7191320182788288, iteration: 433236
loss: 0.971798300743103,grad_norm: 0.9415114633569988, iteration: 433237
loss: 1.0026671886444092,grad_norm: 0.7955664435828619, iteration: 433238
loss: 1.0005388259887695,grad_norm: 0.824889573395052, iteration: 433239
loss: 0.9920927882194519,grad_norm: 0.6897343793347571, iteration: 433240
loss: 1.0220446586608887,grad_norm: 0.9267096483959407, iteration: 433241
loss: 0.9842415452003479,grad_norm: 0.6837971576176352, iteration: 433242
loss: 0.9915874004364014,grad_norm: 0.999999881292886, iteration: 433243
loss: 0.9589648842811584,grad_norm: 0.8356077783296634, iteration: 433244
loss: 0.9918951988220215,grad_norm: 0.9999991048199935, iteration: 433245
loss: 1.0031883716583252,grad_norm: 0.7967853223101775, iteration: 433246
loss: 0.9881501793861389,grad_norm: 0.7763882157064823, iteration: 433247
loss: 1.009411334991455,grad_norm: 0.7417748346061936, iteration: 433248
loss: 1.012352466583252,grad_norm: 0.7423943013821401, iteration: 433249
loss: 0.9988463521003723,grad_norm: 0.8473271911272492, iteration: 433250
loss: 1.0056605339050293,grad_norm: 0.6864520957846767, iteration: 433251
loss: 0.9634175300598145,grad_norm: 0.6799178745854734, iteration: 433252
loss: 1.003883719444275,grad_norm: 0.6779743023627911, iteration: 433253
loss: 1.0204354524612427,grad_norm: 0.893284948482462, iteration: 433254
loss: 0.975193202495575,grad_norm: 0.9999991294926993, iteration: 433255
loss: 1.0270909070968628,grad_norm: 0.7956941784719724, iteration: 433256
loss: 1.0569039583206177,grad_norm: 0.9999994686490739, iteration: 433257
loss: 0.9927964210510254,grad_norm: 0.7310213750033335, iteration: 433258
loss: 0.9826563000679016,grad_norm: 0.8261095233211497, iteration: 433259
loss: 0.9739928245544434,grad_norm: 0.7915417545899188, iteration: 433260
loss: 0.9926314353942871,grad_norm: 0.6777488779357123, iteration: 433261
loss: 1.0269992351531982,grad_norm: 0.9999992894411306, iteration: 433262
loss: 0.9720532894134521,grad_norm: 0.799048724354623, iteration: 433263
loss: 1.022102952003479,grad_norm: 0.7129691994600381, iteration: 433264
loss: 0.9848197102546692,grad_norm: 0.7159847041319092, iteration: 433265
loss: 1.001412272453308,grad_norm: 0.7095190565861959, iteration: 433266
loss: 1.0000489950180054,grad_norm: 0.7325769231003675, iteration: 433267
loss: 1.02102792263031,grad_norm: 0.8155258698019239, iteration: 433268
loss: 1.00542151927948,grad_norm: 0.7250522313623787, iteration: 433269
loss: 0.9928035736083984,grad_norm: 0.747270492669831, iteration: 433270
loss: 0.9841049313545227,grad_norm: 0.8789194885118775, iteration: 433271
loss: 1.0183765888214111,grad_norm: 0.9999998543813855, iteration: 433272
loss: 0.9923728704452515,grad_norm: 0.9999991212459104, iteration: 433273
loss: 0.9957489371299744,grad_norm: 0.7695687253254404, iteration: 433274
loss: 1.002170205116272,grad_norm: 0.7956907187633044, iteration: 433275
loss: 0.9987865686416626,grad_norm: 0.8704197121709863, iteration: 433276
loss: 1.0059973001480103,grad_norm: 0.9757245671408327, iteration: 433277
loss: 0.9664806127548218,grad_norm: 0.7082872476705925, iteration: 433278
loss: 0.9324754476547241,grad_norm: 0.7976934426604207, iteration: 433279
loss: 1.0210673809051514,grad_norm: 0.7567136420562233, iteration: 433280
loss: 1.0083860158920288,grad_norm: 0.7113401664712823, iteration: 433281
loss: 1.0313225984573364,grad_norm: 0.7673568436876924, iteration: 433282
loss: 0.9807045459747314,grad_norm: 0.7628996772798338, iteration: 433283
loss: 1.0209628343582153,grad_norm: 0.7467800991986177, iteration: 433284
loss: 1.0056720972061157,grad_norm: 0.8245037092183147, iteration: 433285
loss: 0.9987084269523621,grad_norm: 0.7897131042427261, iteration: 433286
loss: 0.9901612401008606,grad_norm: 0.7826205519940796, iteration: 433287
loss: 0.999745786190033,grad_norm: 0.9999997025134304, iteration: 433288
loss: 0.9658801555633545,grad_norm: 0.7876546373623415, iteration: 433289
loss: 1.0194555521011353,grad_norm: 0.7495032351173221, iteration: 433290
loss: 1.0617488622665405,grad_norm: 0.9999994562567237, iteration: 433291
loss: 0.9907481074333191,grad_norm: 0.8217385573710592, iteration: 433292
loss: 1.0063234567642212,grad_norm: 0.7678117274441533, iteration: 433293
loss: 1.0098400115966797,grad_norm: 0.8338156930115584, iteration: 433294
loss: 1.0025684833526611,grad_norm: 0.7350993403031669, iteration: 433295
loss: 0.9560550451278687,grad_norm: 0.9999991524576928, iteration: 433296
loss: 0.9722413420677185,grad_norm: 0.810867003469408, iteration: 433297
loss: 0.9799545407295227,grad_norm: 0.8326593768410453, iteration: 433298
loss: 0.9709610939025879,grad_norm: 0.7572760303863153, iteration: 433299
loss: 1.0094553232192993,grad_norm: 0.9999998110144617, iteration: 433300
loss: 0.9778122305870056,grad_norm: 0.8180802394667677, iteration: 433301
loss: 1.027925729751587,grad_norm: 0.6253927554690323, iteration: 433302
loss: 0.9732013940811157,grad_norm: 0.999999449272161, iteration: 433303
loss: 0.9916650056838989,grad_norm: 0.8016568517815157, iteration: 433304
loss: 0.9841759204864502,grad_norm: 0.736153038376789, iteration: 433305
loss: 0.9737229943275452,grad_norm: 0.8549278873476255, iteration: 433306
loss: 0.9830509424209595,grad_norm: 0.8069626862584586, iteration: 433307
loss: 0.9992855787277222,grad_norm: 0.8313512005077933, iteration: 433308
loss: 1.0636508464813232,grad_norm: 0.9999997751783442, iteration: 433309
loss: 1.0021518468856812,grad_norm: 0.706367789742127, iteration: 433310
loss: 1.009069800376892,grad_norm: 0.8283152190120945, iteration: 433311
loss: 1.0148905515670776,grad_norm: 0.8844872836565677, iteration: 433312
loss: 0.997321367263794,grad_norm: 0.9700910598270617, iteration: 433313
loss: 1.0005813837051392,grad_norm: 0.662457526261006, iteration: 433314
loss: 0.976564347743988,grad_norm: 0.8569664763276162, iteration: 433315
loss: 1.025031328201294,grad_norm: 0.7095855530524336, iteration: 433316
loss: 0.9866809844970703,grad_norm: 0.9199231745166858, iteration: 433317
loss: 1.0093010663986206,grad_norm: 0.657733541678398, iteration: 433318
loss: 0.9886559844017029,grad_norm: 0.6620819150172055, iteration: 433319
loss: 1.0303884744644165,grad_norm: 0.9095525383046403, iteration: 433320
loss: 1.0272520780563354,grad_norm: 0.8112954571842715, iteration: 433321
loss: 1.0247621536254883,grad_norm: 0.7876313781200531, iteration: 433322
loss: 0.980857789516449,grad_norm: 0.6530486128409182, iteration: 433323
loss: 1.0049083232879639,grad_norm: 0.819534365472057, iteration: 433324
loss: 1.003828763961792,grad_norm: 0.8153589831532376, iteration: 433325
loss: 0.9692366719245911,grad_norm: 0.8296187893113841, iteration: 433326
loss: 1.003241777420044,grad_norm: 0.9999994346341924, iteration: 433327
loss: 0.9728432893753052,grad_norm: 0.6937748402230569, iteration: 433328
loss: 0.994808554649353,grad_norm: 0.7184592930571683, iteration: 433329
loss: 1.0728321075439453,grad_norm: 0.7214114669231403, iteration: 433330
loss: 1.013782262802124,grad_norm: 0.9999991136348679, iteration: 433331
loss: 1.0008785724639893,grad_norm: 0.7713296333625, iteration: 433332
loss: 0.9912452101707458,grad_norm: 0.724817931219116, iteration: 433333
loss: 1.0003026723861694,grad_norm: 0.7604401110054491, iteration: 433334
loss: 1.0179829597473145,grad_norm: 0.9999990766577784, iteration: 433335
loss: 0.972889244556427,grad_norm: 0.8564862246380812, iteration: 433336
loss: 1.005094289779663,grad_norm: 0.689308562666603, iteration: 433337
loss: 1.058401107788086,grad_norm: 1.0000000137024654, iteration: 433338
loss: 0.993624746799469,grad_norm: 0.7853407584338119, iteration: 433339
loss: 0.9975212812423706,grad_norm: 0.9556203485448271, iteration: 433340
loss: 1.008862853050232,grad_norm: 0.9999991300147681, iteration: 433341
loss: 1.0252883434295654,grad_norm: 0.7932197787463346, iteration: 433342
loss: 1.0167417526245117,grad_norm: 0.6973177092823353, iteration: 433343
loss: 1.1215630769729614,grad_norm: 0.9999994671093461, iteration: 433344
loss: 0.9785356521606445,grad_norm: 0.7861434442177486, iteration: 433345
loss: 1.030664086341858,grad_norm: 0.7821412898377689, iteration: 433346
loss: 0.9880613088607788,grad_norm: 0.7609766258664533, iteration: 433347
loss: 0.9932294487953186,grad_norm: 0.9999995794401756, iteration: 433348
loss: 1.0174866914749146,grad_norm: 0.7786115936837817, iteration: 433349
loss: 1.0546082258224487,grad_norm: 0.9999995574602794, iteration: 433350
loss: 1.0165536403656006,grad_norm: 0.7101889647053656, iteration: 433351
loss: 1.0149117708206177,grad_norm: 0.7946951325276246, iteration: 433352
loss: 0.9951949715614319,grad_norm: 0.7449628428114934, iteration: 433353
loss: 1.035500407218933,grad_norm: 0.8415689674879517, iteration: 433354
loss: 0.9896458387374878,grad_norm: 0.7266130017001496, iteration: 433355
loss: 0.9736320972442627,grad_norm: 0.6286848907138921, iteration: 433356
loss: 1.0014594793319702,grad_norm: 0.8412253466846519, iteration: 433357
loss: 0.9839193820953369,grad_norm: 0.7327955822889249, iteration: 433358
loss: 1.0509593486785889,grad_norm: 0.9999995530052896, iteration: 433359
loss: 1.023207664489746,grad_norm: 0.9999993032476648, iteration: 433360
loss: 0.9640100002288818,grad_norm: 0.6715695855483453, iteration: 433361
loss: 0.9915825128555298,grad_norm: 0.8487965950600237, iteration: 433362
loss: 1.0164401531219482,grad_norm: 0.7490058853420594, iteration: 433363
loss: 0.9888426065444946,grad_norm: 0.5987289168724301, iteration: 433364
loss: 0.971361517906189,grad_norm: 0.84158206800935, iteration: 433365
loss: 0.9921470880508423,grad_norm: 0.8133166842712639, iteration: 433366
loss: 1.0000656843185425,grad_norm: 0.7175417065411622, iteration: 433367
loss: 0.9567074179649353,grad_norm: 0.790465025646806, iteration: 433368
loss: 0.9866867661476135,grad_norm: 0.8866507391527547, iteration: 433369
loss: 1.0119140148162842,grad_norm: 0.8595134182241985, iteration: 433370
loss: 1.016373872756958,grad_norm: 0.8610337330317426, iteration: 433371
loss: 0.9756543040275574,grad_norm: 0.6822124002271153, iteration: 433372
loss: 0.95442134141922,grad_norm: 0.8361873316332108, iteration: 433373
loss: 1.045387864112854,grad_norm: 0.8302040918518206, iteration: 433374
loss: 1.0065280199050903,grad_norm: 0.7294345271864925, iteration: 433375
loss: 0.9759805798530579,grad_norm: 0.906650845229768, iteration: 433376
loss: 1.008726716041565,grad_norm: 0.814292511030241, iteration: 433377
loss: 1.001734733581543,grad_norm: 0.699409613238177, iteration: 433378
loss: 0.9613718390464783,grad_norm: 0.7300683904411157, iteration: 433379
loss: 0.9486011862754822,grad_norm: 0.7830902301988268, iteration: 433380
loss: 1.0225651264190674,grad_norm: 0.92359834097069, iteration: 433381
loss: 1.0266315937042236,grad_norm: 0.8535822411585253, iteration: 433382
loss: 1.0157721042633057,grad_norm: 0.7797147338699009, iteration: 433383
loss: 1.0489606857299805,grad_norm: 0.7339174854253758, iteration: 433384
loss: 1.0018584728240967,grad_norm: 0.6857464663694554, iteration: 433385
loss: 1.0036429166793823,grad_norm: 0.8183695140277454, iteration: 433386
loss: 1.0281116962432861,grad_norm: 0.9999992257370219, iteration: 433387
loss: 1.0122132301330566,grad_norm: 0.9999998033501981, iteration: 433388
loss: 0.9604709148406982,grad_norm: 0.7671964544554383, iteration: 433389
loss: 1.0003154277801514,grad_norm: 0.8435282715867388, iteration: 433390
loss: 1.015170931816101,grad_norm: 0.77713223786209, iteration: 433391
loss: 1.0047887563705444,grad_norm: 0.803991908733483, iteration: 433392
loss: 1.0179822444915771,grad_norm: 0.8127642448349356, iteration: 433393
loss: 0.984221875667572,grad_norm: 0.8178619602721825, iteration: 433394
loss: 0.9543114304542542,grad_norm: 0.8099607203420608, iteration: 433395
loss: 1.055352807044983,grad_norm: 0.9999997810902959, iteration: 433396
loss: 1.0096369981765747,grad_norm: 0.8780293980094773, iteration: 433397
loss: 0.9984516501426697,grad_norm: 0.6656769526913702, iteration: 433398
loss: 0.9781804084777832,grad_norm: 0.826048120076653, iteration: 433399
loss: 0.9762001633644104,grad_norm: 0.8376371071643064, iteration: 433400
loss: 1.0210270881652832,grad_norm: 0.8165701473259134, iteration: 433401
loss: 0.9595266580581665,grad_norm: 0.837246401280369, iteration: 433402
loss: 1.006726622581482,grad_norm: 0.7566072337428015, iteration: 433403
loss: 1.105641484260559,grad_norm: 0.9231379801148196, iteration: 433404
loss: 1.0210026502609253,grad_norm: 0.7364409222258153, iteration: 433405
loss: 1.0079580545425415,grad_norm: 0.9999993498432417, iteration: 433406
loss: 1.009766936302185,grad_norm: 0.9089487412151005, iteration: 433407
loss: 0.9893868565559387,grad_norm: 0.6841304773568839, iteration: 433408
loss: 0.9635759592056274,grad_norm: 0.936123554021679, iteration: 433409
loss: 0.992451012134552,grad_norm: 0.9999992840224174, iteration: 433410
loss: 0.9896330237388611,grad_norm: 0.7904977092800917, iteration: 433411
loss: 1.0005468130111694,grad_norm: 0.7471989586746959, iteration: 433412
loss: 1.0184053182601929,grad_norm: 0.7152522615422804, iteration: 433413
loss: 1.0074915885925293,grad_norm: 0.8421676678304861, iteration: 433414
loss: 1.016477108001709,grad_norm: 0.6877017174440442, iteration: 433415
loss: 0.9864003658294678,grad_norm: 0.6751448586866616, iteration: 433416
loss: 0.9938808679580688,grad_norm: 0.7857408465282197, iteration: 433417
loss: 0.9957588911056519,grad_norm: 0.7634957734661286, iteration: 433418
loss: 1.0049266815185547,grad_norm: 0.9999997150755323, iteration: 433419
loss: 1.000672459602356,grad_norm: 0.7868028209804235, iteration: 433420
loss: 0.9921201467514038,grad_norm: 0.7688403562925699, iteration: 433421
loss: 1.021474003791809,grad_norm: 0.7249897220910541, iteration: 433422
loss: 1.0315982103347778,grad_norm: 0.7596895881226348, iteration: 433423
loss: 1.026620864868164,grad_norm: 0.9999995907264566, iteration: 433424
loss: 1.036628007888794,grad_norm: 0.9999993608411691, iteration: 433425
loss: 0.996067225933075,grad_norm: 0.8253207674382028, iteration: 433426
loss: 1.0125641822814941,grad_norm: 0.7653133727164797, iteration: 433427
loss: 0.9805604815483093,grad_norm: 0.6608034332912317, iteration: 433428
loss: 0.9804847836494446,grad_norm: 0.6995401830331398, iteration: 433429
loss: 1.022984266281128,grad_norm: 0.8607076390651704, iteration: 433430
loss: 1.0241880416870117,grad_norm: 0.6913578440196838, iteration: 433431
loss: 1.0151835680007935,grad_norm: 0.634158690382473, iteration: 433432
loss: 0.9829919338226318,grad_norm: 0.7010737703878215, iteration: 433433
loss: 0.988397479057312,grad_norm: 0.9453929164788977, iteration: 433434
loss: 0.9965636134147644,grad_norm: 0.8442247201463464, iteration: 433435
loss: 1.0368752479553223,grad_norm: 0.960014215892656, iteration: 433436
loss: 0.9879695773124695,grad_norm: 0.7433006233683379, iteration: 433437
loss: 0.99496990442276,grad_norm: 0.7463816525968934, iteration: 433438
loss: 1.0037308931350708,grad_norm: 0.6882568412902182, iteration: 433439
loss: 1.0060962438583374,grad_norm: 0.8834059107123323, iteration: 433440
loss: 1.0672013759613037,grad_norm: 0.9914964288444951, iteration: 433441
loss: 0.9868764281272888,grad_norm: 0.6313343901167481, iteration: 433442
loss: 0.9726786017417908,grad_norm: 0.6889637842614665, iteration: 433443
loss: 1.1691869497299194,grad_norm: 0.9999997005608152, iteration: 433444
loss: 1.0112570524215698,grad_norm: 0.9999993325439241, iteration: 433445
loss: 1.0289109945297241,grad_norm: 0.9011263634739567, iteration: 433446
loss: 1.033074975013733,grad_norm: 0.84958454285141, iteration: 433447
loss: 1.0055921077728271,grad_norm: 0.8309711944675924, iteration: 433448
loss: 1.018860936164856,grad_norm: 0.7297996305025828, iteration: 433449
loss: 1.0033432245254517,grad_norm: 0.7718573764469688, iteration: 433450
loss: 1.004555583000183,grad_norm: 0.8751577124600073, iteration: 433451
loss: 0.993280827999115,grad_norm: 0.8989053083722952, iteration: 433452
loss: 0.9814985990524292,grad_norm: 0.6594857366837243, iteration: 433453
loss: 0.9554468989372253,grad_norm: 0.8398210110435785, iteration: 433454
loss: 0.9894264936447144,grad_norm: 0.9999999529283771, iteration: 433455
loss: 0.9922506213188171,grad_norm: 0.9999991924283661, iteration: 433456
loss: 0.9915294051170349,grad_norm: 0.8329112885254886, iteration: 433457
loss: 0.9945293068885803,grad_norm: 0.799646280446703, iteration: 433458
loss: 1.0259932279586792,grad_norm: 0.7747929338980717, iteration: 433459
loss: 0.9790117740631104,grad_norm: 0.6504281020782842, iteration: 433460
loss: 1.0114697217941284,grad_norm: 0.7811109321842695, iteration: 433461
loss: 0.9935229420661926,grad_norm: 0.9999998083034661, iteration: 433462
loss: 0.9687398076057434,grad_norm: 0.8734887527027704, iteration: 433463
loss: 1.0234256982803345,grad_norm: 0.8779320080704125, iteration: 433464
loss: 0.995361328125,grad_norm: 0.7210393058778639, iteration: 433465
loss: 1.005587100982666,grad_norm: 0.7910007101314245, iteration: 433466
loss: 1.0443943738937378,grad_norm: 0.7912462577669304, iteration: 433467
loss: 0.9802823662757874,grad_norm: 0.6735897295825936, iteration: 433468
loss: 1.0187325477600098,grad_norm: 0.6794101388655696, iteration: 433469
loss: 1.0205399990081787,grad_norm: 0.7413694504049254, iteration: 433470
loss: 0.9782163500785828,grad_norm: 0.8243021672378485, iteration: 433471
loss: 0.9725425839424133,grad_norm: 0.719525591112597, iteration: 433472
loss: 0.9802045226097107,grad_norm: 0.7640123642878139, iteration: 433473
loss: 0.991290271282196,grad_norm: 0.6514250378019357, iteration: 433474
loss: 1.0369913578033447,grad_norm: 0.8668878733969145, iteration: 433475
loss: 1.0273854732513428,grad_norm: 0.6854855825498797, iteration: 433476
loss: 0.9626646041870117,grad_norm: 0.7413731582193103, iteration: 433477
loss: 0.9511200785636902,grad_norm: 0.6972588371168025, iteration: 433478
loss: 0.9868839979171753,grad_norm: 0.9374114478161991, iteration: 433479
loss: 0.9844787120819092,grad_norm: 0.7768978453065043, iteration: 433480
loss: 1.001190185546875,grad_norm: 0.8403677348879204, iteration: 433481
loss: 1.0018912553787231,grad_norm: 0.8034290159293523, iteration: 433482
loss: 0.9727064967155457,grad_norm: 0.900757712780354, iteration: 433483
loss: 0.9988177418708801,grad_norm: 0.786535220841318, iteration: 433484
loss: 1.0073195695877075,grad_norm: 0.7620091452412661, iteration: 433485
loss: 0.9735309481620789,grad_norm: 0.76199480135333, iteration: 433486
loss: 0.9665055274963379,grad_norm: 0.6628809213784457, iteration: 433487
loss: 1.0107159614562988,grad_norm: 0.8548835353811083, iteration: 433488
loss: 1.0405384302139282,grad_norm: 0.7807550474872643, iteration: 433489
loss: 0.9755761027336121,grad_norm: 0.7885040935735442, iteration: 433490
loss: 0.9789594411849976,grad_norm: 0.7023970337965717, iteration: 433491
loss: 1.0073682069778442,grad_norm: 0.7603930411872888, iteration: 433492
loss: 0.993844211101532,grad_norm: 0.999999824283121, iteration: 433493
loss: 0.9882534742355347,grad_norm: 0.7706404210896324, iteration: 433494
loss: 1.0277999639511108,grad_norm: 0.7905929745218049, iteration: 433495
loss: 0.9824365973472595,grad_norm: 0.8834014767917427, iteration: 433496
loss: 1.0101892948150635,grad_norm: 0.632041905875912, iteration: 433497
loss: 1.0011929273605347,grad_norm: 0.7490266700815024, iteration: 433498
loss: 0.9940174221992493,grad_norm: 0.7687504339879768, iteration: 433499
loss: 0.9970539808273315,grad_norm: 0.657046508851067, iteration: 433500
loss: 0.9803556203842163,grad_norm: 0.9101419849504845, iteration: 433501
loss: 1.0205841064453125,grad_norm: 0.7670118831859174, iteration: 433502
loss: 1.008023738861084,grad_norm: 0.9999992199144466, iteration: 433503
loss: 1.0660879611968994,grad_norm: 0.8199916948285663, iteration: 433504
loss: 0.9957723021507263,grad_norm: 0.6994844659849949, iteration: 433505
loss: 0.9831550121307373,grad_norm: 0.888940264615887, iteration: 433506
loss: 1.0618269443511963,grad_norm: 0.9999999103458374, iteration: 433507
loss: 1.0272235870361328,grad_norm: 0.9999995918258126, iteration: 433508
loss: 1.029218077659607,grad_norm: 0.7402054559630052, iteration: 433509
loss: 0.996465265750885,grad_norm: 0.845690421090776, iteration: 433510
loss: 0.9662973284721375,grad_norm: 0.7482919792135043, iteration: 433511
loss: 0.9573567509651184,grad_norm: 0.6601465756828646, iteration: 433512
loss: 1.005411982536316,grad_norm: 0.7391213571449897, iteration: 433513
loss: 1.0003938674926758,grad_norm: 0.9999991619493218, iteration: 433514
loss: 0.9714223742485046,grad_norm: 0.8804906778759702, iteration: 433515
loss: 1.0208582878112793,grad_norm: 0.9999999211073262, iteration: 433516
loss: 0.9884409308433533,grad_norm: 0.7523964900838715, iteration: 433517
loss: 1.0086370706558228,grad_norm: 1.0000000252575647, iteration: 433518
loss: 0.983680784702301,grad_norm: 0.7629502539979651, iteration: 433519
loss: 1.22265625,grad_norm: 0.9999997012803997, iteration: 433520
loss: 1.0057921409606934,grad_norm: 0.8181518951512674, iteration: 433521
loss: 0.9784698486328125,grad_norm: 0.9850197880311233, iteration: 433522
loss: 0.9910023808479309,grad_norm: 0.690322542612984, iteration: 433523
loss: 0.994589626789093,grad_norm: 0.8274908701197459, iteration: 433524
loss: 0.9769108295440674,grad_norm: 0.6573004774655316, iteration: 433525
loss: 1.0359976291656494,grad_norm: 0.7479417304983204, iteration: 433526
loss: 1.039797067642212,grad_norm: 0.9681947006000766, iteration: 433527
loss: 0.9595432281494141,grad_norm: 0.7399711968419114, iteration: 433528
loss: 0.9636212587356567,grad_norm: 0.7484265733652721, iteration: 433529
loss: 1.201311469078064,grad_norm: 0.9999996156647518, iteration: 433530
loss: 1.03416907787323,grad_norm: 0.9999999295352884, iteration: 433531
loss: 0.9961129426956177,grad_norm: 0.674924497185737, iteration: 433532
loss: 1.0412001609802246,grad_norm: 0.9999995414712861, iteration: 433533
loss: 0.9903334975242615,grad_norm: 0.804889241038165, iteration: 433534
loss: 1.0162216424942017,grad_norm: 0.8438227306434836, iteration: 433535
loss: 1.012929081916809,grad_norm: 0.893375408624802, iteration: 433536
loss: 0.9675243496894836,grad_norm: 0.8143078091729178, iteration: 433537
loss: 1.0008444786071777,grad_norm: 0.673022908485072, iteration: 433538
loss: 0.9777234196662903,grad_norm: 0.7740894694304923, iteration: 433539
loss: 1.0250998735427856,grad_norm: 0.8457515389538216, iteration: 433540
loss: 1.0263715982437134,grad_norm: 0.6894597036700203, iteration: 433541
loss: 0.9749513864517212,grad_norm: 0.8249267576581896, iteration: 433542
loss: 1.0387555360794067,grad_norm: 0.7343641505386582, iteration: 433543
loss: 1.0248271226882935,grad_norm: 0.7983279066625298, iteration: 433544
loss: 1.0794609785079956,grad_norm: 0.9999993688641361, iteration: 433545
loss: 1.02240788936615,grad_norm: 0.8131619341919007, iteration: 433546
loss: 1.0030038356781006,grad_norm: 0.7580621575038882, iteration: 433547
loss: 0.989402174949646,grad_norm: 0.6769571913183988, iteration: 433548
loss: 0.9744393825531006,grad_norm: 0.837970121846945, iteration: 433549
loss: 1.000863790512085,grad_norm: 0.82113290389358, iteration: 433550
loss: 1.0074034929275513,grad_norm: 0.8172367091410232, iteration: 433551
loss: 1.0234297513961792,grad_norm: 0.7664611126880205, iteration: 433552
loss: 1.0211620330810547,grad_norm: 0.9086111235820477, iteration: 433553
loss: 1.0012530088424683,grad_norm: 0.713652239167676, iteration: 433554
loss: 0.9905887246131897,grad_norm: 0.8547841906298702, iteration: 433555
loss: 1.0083792209625244,grad_norm: 0.7874099025834932, iteration: 433556
loss: 0.9836539030075073,grad_norm: 0.9999989298997408, iteration: 433557
loss: 0.9993149042129517,grad_norm: 0.9999989805721561, iteration: 433558
loss: 0.995495617389679,grad_norm: 0.7967491311264028, iteration: 433559
loss: 1.0482481718063354,grad_norm: 0.999999168199017, iteration: 433560
loss: 0.9746010303497314,grad_norm: 0.6569794761272776, iteration: 433561
loss: 0.948201060295105,grad_norm: 0.8909107319004899, iteration: 433562
loss: 1.0120253562927246,grad_norm: 0.9491392085444254, iteration: 433563
loss: 0.9578056931495667,grad_norm: 0.6076661756107309, iteration: 433564
loss: 0.9843083024024963,grad_norm: 0.7108868099127101, iteration: 433565
loss: 1.0344953536987305,grad_norm: 0.7222411769282975, iteration: 433566
loss: 0.9777489304542542,grad_norm: 0.7550101215145937, iteration: 433567
loss: 1.0102614164352417,grad_norm: 0.867515531360604, iteration: 433568
loss: 1.035176396369934,grad_norm: 0.7850827660829481, iteration: 433569
loss: 1.0004297494888306,grad_norm: 0.9927739547182625, iteration: 433570
loss: 0.9752969741821289,grad_norm: 0.7858408632595204, iteration: 433571
loss: 1.0091183185577393,grad_norm: 0.9805055220043332, iteration: 433572
loss: 0.9998842477798462,grad_norm: 0.9999991020092047, iteration: 433573
loss: 0.9657264351844788,grad_norm: 0.8852752286680036, iteration: 433574
loss: 1.0389220714569092,grad_norm: 0.8351760844103203, iteration: 433575
loss: 1.0116502046585083,grad_norm: 0.7612470102647256, iteration: 433576
loss: 0.9947913885116577,grad_norm: 0.7492119189786022, iteration: 433577
loss: 0.9823902249336243,grad_norm: 0.801141314648778, iteration: 433578
loss: 1.036134123802185,grad_norm: 0.8138423088237484, iteration: 433579
loss: 0.9523033499717712,grad_norm: 0.8052047735584782, iteration: 433580
loss: 0.9977028369903564,grad_norm: 0.7987371825850323, iteration: 433581
loss: 0.98382169008255,grad_norm: 0.7354750860401212, iteration: 433582
loss: 0.9922069907188416,grad_norm: 0.753797917313449, iteration: 433583
loss: 1.0123634338378906,grad_norm: 0.7021346951247284, iteration: 433584
loss: 1.0155839920043945,grad_norm: 0.9999998185409823, iteration: 433585
loss: 1.3066047430038452,grad_norm: 0.9999998659670581, iteration: 433586
loss: 1.0293500423431396,grad_norm: 0.909123121150472, iteration: 433587
loss: 0.9612962007522583,grad_norm: 0.9999990849996185, iteration: 433588
loss: 0.9915953874588013,grad_norm: 0.6774983905124935, iteration: 433589
loss: 0.9583001136779785,grad_norm: 0.7101943773603806, iteration: 433590
loss: 1.0077502727508545,grad_norm: 0.9504243716000876, iteration: 433591
loss: 1.0024774074554443,grad_norm: 0.7931200731509879, iteration: 433592
loss: 1.001287817955017,grad_norm: 0.9999995918004543, iteration: 433593
loss: 1.04781973361969,grad_norm: 0.9999990483827237, iteration: 433594
loss: 0.972141444683075,grad_norm: 0.7639016311688391, iteration: 433595
loss: 0.9720057249069214,grad_norm: 0.9999995511187799, iteration: 433596
loss: 1.0061132907867432,grad_norm: 0.989682052077168, iteration: 433597
loss: 1.0170040130615234,grad_norm: 0.842986530058769, iteration: 433598
loss: 1.024522066116333,grad_norm: 0.7776242862270208, iteration: 433599
loss: 0.9878735542297363,grad_norm: 0.8513525326868646, iteration: 433600
loss: 1.0066803693771362,grad_norm: 0.9999991898282004, iteration: 433601
loss: 0.9622926115989685,grad_norm: 0.8176179451736063, iteration: 433602
loss: 1.0230598449707031,grad_norm: 0.8541717235637509, iteration: 433603
loss: 1.0147660970687866,grad_norm: 0.9999997219554306, iteration: 433604
loss: 1.1211769580841064,grad_norm: 0.8347551666938974, iteration: 433605
loss: 0.9865009188652039,grad_norm: 0.787967702728057, iteration: 433606
loss: 1.0080336332321167,grad_norm: 0.999999749436054, iteration: 433607
loss: 1.1952741146087646,grad_norm: 0.878737156209278, iteration: 433608
loss: 1.0240854024887085,grad_norm: 0.8495489912955413, iteration: 433609
loss: 0.9872660040855408,grad_norm: 0.6731124893568315, iteration: 433610
loss: 1.0133010149002075,grad_norm: 0.6731001135695993, iteration: 433611
loss: 1.010746717453003,grad_norm: 0.7450346586051856, iteration: 433612
loss: 1.04629385471344,grad_norm: 0.8120686410160125, iteration: 433613
loss: 0.9906740188598633,grad_norm: 0.7664516628892479, iteration: 433614
loss: 0.9529103636741638,grad_norm: 0.7514764063393843, iteration: 433615
loss: 1.0683331489562988,grad_norm: 0.9999996330245888, iteration: 433616
loss: 1.007357120513916,grad_norm: 0.7867262751307064, iteration: 433617
loss: 0.9677082300186157,grad_norm: 0.652578437462782, iteration: 433618
loss: 0.9889280200004578,grad_norm: 0.6305501954954715, iteration: 433619
loss: 1.0106080770492554,grad_norm: 0.9999991196944047, iteration: 433620
loss: 1.0145481824874878,grad_norm: 0.9941930254280937, iteration: 433621
loss: 1.0468733310699463,grad_norm: 0.9442637192445636, iteration: 433622
loss: 0.9992527961730957,grad_norm: 0.9035386019820479, iteration: 433623
loss: 1.0277811288833618,grad_norm: 0.8316723997112458, iteration: 433624
loss: 0.9811657667160034,grad_norm: 0.6565079667845952, iteration: 433625
loss: 1.0130366086959839,grad_norm: 0.7405028373238313, iteration: 433626
loss: 0.970859706401825,grad_norm: 0.8012833196707128, iteration: 433627
loss: 0.9822210669517517,grad_norm: 0.8205565769944774, iteration: 433628
loss: 0.9922021627426147,grad_norm: 0.7980251321785079, iteration: 433629
loss: 0.9789177775382996,grad_norm: 0.7768243990000625, iteration: 433630
loss: 0.9979571104049683,grad_norm: 0.9999993807702386, iteration: 433631
loss: 1.0456383228302002,grad_norm: 0.9999991502340025, iteration: 433632
loss: 1.0245866775512695,grad_norm: 0.8090938362034824, iteration: 433633
loss: 1.029727578163147,grad_norm: 0.9188377133036157, iteration: 433634
loss: 1.01631498336792,grad_norm: 0.7161739087591735, iteration: 433635
loss: 0.9725897312164307,grad_norm: 0.7607330881332506, iteration: 433636
loss: 0.9835212826728821,grad_norm: 0.7429298486385656, iteration: 433637
loss: 1.003593921661377,grad_norm: 0.7735359058861243, iteration: 433638
loss: 0.9965333342552185,grad_norm: 0.831395258307224, iteration: 433639
loss: 1.0393130779266357,grad_norm: 0.8178084476340289, iteration: 433640
loss: 0.9741097688674927,grad_norm: 0.7803212796234487, iteration: 433641
loss: 0.994438111782074,grad_norm: 0.7831814836414401, iteration: 433642
loss: 0.9744249582290649,grad_norm: 0.7506253400354822, iteration: 433643
loss: 1.0245519876480103,grad_norm: 0.7805525806491281, iteration: 433644
loss: 1.0460319519042969,grad_norm: 0.9999990237169396, iteration: 433645
loss: 1.1393659114837646,grad_norm: 0.9999994682505401, iteration: 433646
loss: 1.0116695165634155,grad_norm: 0.8251326409397075, iteration: 433647
loss: 0.9860338568687439,grad_norm: 0.8493503319393384, iteration: 433648
loss: 1.0018112659454346,grad_norm: 0.6428689726802239, iteration: 433649
loss: 0.9787296652793884,grad_norm: 0.7000721735816294, iteration: 433650
loss: 1.0260000228881836,grad_norm: 0.7919752424871425, iteration: 433651
loss: 1.008837103843689,grad_norm: 0.8829832129288391, iteration: 433652
loss: 0.9922199249267578,grad_norm: 0.8341748497841754, iteration: 433653
loss: 0.9683390855789185,grad_norm: 0.7731156443804432, iteration: 433654
loss: 1.0111515522003174,grad_norm: 0.6461729946213267, iteration: 433655
loss: 0.9987280368804932,grad_norm: 0.6343009578273417, iteration: 433656
loss: 0.9986293315887451,grad_norm: 0.6560766920176428, iteration: 433657
loss: 1.0562312602996826,grad_norm: 0.9878531558019191, iteration: 433658
loss: 0.9927692413330078,grad_norm: 0.7338572025812294, iteration: 433659
loss: 0.9701809287071228,grad_norm: 0.8508164743891876, iteration: 433660
loss: 0.9871610999107361,grad_norm: 0.8491598977940108, iteration: 433661
loss: 1.1503403186798096,grad_norm: 0.9999990315931138, iteration: 433662
loss: 1.037026286125183,grad_norm: 0.8540849201576018, iteration: 433663
loss: 0.9774206280708313,grad_norm: 0.9081220601986403, iteration: 433664
loss: 1.0133880376815796,grad_norm: 0.7593617714549614, iteration: 433665
loss: 1.0318892002105713,grad_norm: 0.8340818562044463, iteration: 433666
loss: 1.028300404548645,grad_norm: 0.7452069862121189, iteration: 433667
loss: 0.9807173013687134,grad_norm: 0.8041320675463801, iteration: 433668
loss: 1.0151652097702026,grad_norm: 0.7490715352491508, iteration: 433669
loss: 0.951565146446228,grad_norm: 0.7229521558214325, iteration: 433670
loss: 1.0036038160324097,grad_norm: 0.8355052330645235, iteration: 433671
loss: 0.984011709690094,grad_norm: 0.7353003298526322, iteration: 433672
loss: 1.0257164239883423,grad_norm: 0.7255042937881814, iteration: 433673
loss: 1.0123436450958252,grad_norm: 0.9690579357162447, iteration: 433674
loss: 1.0211328268051147,grad_norm: 0.6992824454482719, iteration: 433675
loss: 1.040915608406067,grad_norm: 0.7501504249600789, iteration: 433676
loss: 0.9999350905418396,grad_norm: 0.8352107549783616, iteration: 433677
loss: 1.0158952474594116,grad_norm: 0.890518545030243, iteration: 433678
loss: 0.961564838886261,grad_norm: 0.7509106774640579, iteration: 433679
loss: 1.0291178226470947,grad_norm: 0.7876157281304373, iteration: 433680
loss: 1.0110254287719727,grad_norm: 0.7939411013917108, iteration: 433681
loss: 1.0255887508392334,grad_norm: 0.8225615140882877, iteration: 433682
loss: 0.9936949014663696,grad_norm: 0.8096885019870919, iteration: 433683
loss: 0.979588508605957,grad_norm: 0.8489131829016346, iteration: 433684
loss: 0.9970425367355347,grad_norm: 0.8131827143224375, iteration: 433685
loss: 1.0074251890182495,grad_norm: 0.9999994294361806, iteration: 433686
loss: 0.9863469004631042,grad_norm: 0.7954501720418984, iteration: 433687
loss: 1.0260308980941772,grad_norm: 0.7684165777402143, iteration: 433688
loss: 0.9772037863731384,grad_norm: 0.8079269887467891, iteration: 433689
loss: 1.0170602798461914,grad_norm: 0.9329424449457348, iteration: 433690
loss: 0.9903576374053955,grad_norm: 0.669395818124892, iteration: 433691
loss: 0.9857572913169861,grad_norm: 0.7966901729403824, iteration: 433692
loss: 1.0212035179138184,grad_norm: 0.7377622684147397, iteration: 433693
loss: 1.025593876838684,grad_norm: 0.891464829648459, iteration: 433694
loss: 1.0018622875213623,grad_norm: 0.6918465638956842, iteration: 433695
loss: 1.019459843635559,grad_norm: 0.8139251427177587, iteration: 433696
loss: 1.0319159030914307,grad_norm: 0.9847003874803427, iteration: 433697
loss: 0.9869817495346069,grad_norm: 0.8768078976626449, iteration: 433698
loss: 0.9948842525482178,grad_norm: 0.9970042529835571, iteration: 433699
loss: 0.9742118120193481,grad_norm: 0.7399143667155048, iteration: 433700
loss: 0.9962021708488464,grad_norm: 0.7130514396013545, iteration: 433701
loss: 1.0064932107925415,grad_norm: 0.7390713692275528, iteration: 433702
loss: 1.0313122272491455,grad_norm: 0.9723324105001859, iteration: 433703
loss: 1.0007457733154297,grad_norm: 0.999999887898932, iteration: 433704
loss: 1.0101983547210693,grad_norm: 0.790269025049028, iteration: 433705
loss: 1.015121340751648,grad_norm: 0.6886969976065577, iteration: 433706
loss: 0.978915274143219,grad_norm: 0.9259128170417427, iteration: 433707
loss: 1.0210341215133667,grad_norm: 0.76341733108218, iteration: 433708
loss: 0.9763234257698059,grad_norm: 0.773292356699068, iteration: 433709
loss: 0.9720262885093689,grad_norm: 0.683223734380561, iteration: 433710
loss: 1.092629313468933,grad_norm: 0.7511153085464346, iteration: 433711
loss: 0.9893336296081543,grad_norm: 0.8349310173307989, iteration: 433712
loss: 1.0040079355239868,grad_norm: 0.8517786065309859, iteration: 433713
loss: 1.0212172269821167,grad_norm: 0.8315909273379468, iteration: 433714
loss: 0.9898144006729126,grad_norm: 0.7231439693017331, iteration: 433715
loss: 0.9480060935020447,grad_norm: 0.8570660853466179, iteration: 433716
loss: 1.0087522268295288,grad_norm: 0.7534544604183742, iteration: 433717
loss: 1.0173999071121216,grad_norm: 0.6980163144595651, iteration: 433718
loss: 1.0052238702774048,grad_norm: 0.7361964842487685, iteration: 433719
loss: 1.0105150938034058,grad_norm: 0.8067962852900372, iteration: 433720
loss: 1.0412180423736572,grad_norm: 0.7941931531335933, iteration: 433721
loss: 0.9902781844139099,grad_norm: 0.9816125884539867, iteration: 433722
loss: 0.9954100251197815,grad_norm: 0.7498840459537758, iteration: 433723
loss: 1.0104503631591797,grad_norm: 0.8736929195643648, iteration: 433724
loss: 1.2583798170089722,grad_norm: 0.999999377953214, iteration: 433725
loss: 1.0042318105697632,grad_norm: 0.8896579942058138, iteration: 433726
loss: 0.9903908967971802,grad_norm: 0.8703793153695281, iteration: 433727
loss: 0.9978280067443848,grad_norm: 0.8339214283090052, iteration: 433728
loss: 1.010169506072998,grad_norm: 0.876279825643491, iteration: 433729
loss: 1.0181553363800049,grad_norm: 0.7270769937672068, iteration: 433730
loss: 0.9903513789176941,grad_norm: 0.6194962032083687, iteration: 433731
loss: 0.9778501391410828,grad_norm: 0.7983960462229968, iteration: 433732
loss: 1.0023623704910278,grad_norm: 0.9024680432566868, iteration: 433733
loss: 1.000986933708191,grad_norm: 0.7162928064521464, iteration: 433734
loss: 0.9917383193969727,grad_norm: 0.8453765459220229, iteration: 433735
loss: 1.0299254655838013,grad_norm: 0.7687835312408605, iteration: 433736
loss: 0.9947509765625,grad_norm: 0.9736477478638317, iteration: 433737
loss: 1.0342646837234497,grad_norm: 0.9322486081056509, iteration: 433738
loss: 1.075463891029358,grad_norm: 0.7964607145896011, iteration: 433739
loss: 0.9795975089073181,grad_norm: 0.7619278852272092, iteration: 433740
loss: 1.1184186935424805,grad_norm: 0.8530144320003854, iteration: 433741
loss: 1.0140800476074219,grad_norm: 0.8787042233462241, iteration: 433742
loss: 0.9981467723846436,grad_norm: 0.8379920199575218, iteration: 433743
loss: 1.0430150032043457,grad_norm: 0.910630249534216, iteration: 433744
loss: 1.0112498998641968,grad_norm: 0.9823456561189952, iteration: 433745
loss: 1.0044225454330444,grad_norm: 0.7100140265222096, iteration: 433746
loss: 0.9573442339897156,grad_norm: 0.8321963357726364, iteration: 433747
loss: 0.9923697710037231,grad_norm: 0.8088756317564204, iteration: 433748
loss: 1.030759572982788,grad_norm: 0.6704403477208188, iteration: 433749
loss: 0.9969698190689087,grad_norm: 0.794367429790829, iteration: 433750
loss: 0.9841421246528625,grad_norm: 0.6782719021882054, iteration: 433751
loss: 1.0118428468704224,grad_norm: 0.8400359281140958, iteration: 433752
loss: 0.963792085647583,grad_norm: 0.7779282529193866, iteration: 433753
loss: 0.967841386795044,grad_norm: 0.8497991375179348, iteration: 433754
loss: 0.9654085636138916,grad_norm: 0.6749644349855224, iteration: 433755
loss: 0.9954372644424438,grad_norm: 0.7405811518615723, iteration: 433756
loss: 1.035261869430542,grad_norm: 0.7329158528128054, iteration: 433757
loss: 1.0407459735870361,grad_norm: 0.7732312438972485, iteration: 433758
loss: 0.9774735569953918,grad_norm: 0.7362188145407498, iteration: 433759
loss: 1.0071353912353516,grad_norm: 0.6934929673878464, iteration: 433760
loss: 0.9815782308578491,grad_norm: 0.6761472008825201, iteration: 433761
loss: 1.0196709632873535,grad_norm: 0.7410210652514065, iteration: 433762
loss: 0.9980173707008362,grad_norm: 0.8646689843492246, iteration: 433763
loss: 0.9577336311340332,grad_norm: 0.7565901883854913, iteration: 433764
loss: 0.9959193468093872,grad_norm: 0.8586412682939056, iteration: 433765
loss: 0.9675382375717163,grad_norm: 0.8797249903324008, iteration: 433766
loss: 0.9796381592750549,grad_norm: 0.6962468272103366, iteration: 433767
loss: 1.0281163454055786,grad_norm: 0.7609620652574237, iteration: 433768
loss: 1.008042812347412,grad_norm: 0.7709031699593001, iteration: 433769
loss: 1.0194628238677979,grad_norm: 0.7124877721374555, iteration: 433770
loss: 1.0104749202728271,grad_norm: 0.6200991231220828, iteration: 433771
loss: 0.9708132147789001,grad_norm: 0.9999991832453219, iteration: 433772
loss: 1.0292166471481323,grad_norm: 0.9999997720753092, iteration: 433773
loss: 1.0219000577926636,grad_norm: 0.8475416610404247, iteration: 433774
loss: 0.9972208142280579,grad_norm: 0.8662960677101772, iteration: 433775
loss: 0.9831227660179138,grad_norm: 0.755066439224693, iteration: 433776
loss: 0.9730048775672913,grad_norm: 0.7737820939379173, iteration: 433777
loss: 1.0245088338851929,grad_norm: 0.7751594134956792, iteration: 433778
loss: 1.0189248323440552,grad_norm: 0.6979647008757629, iteration: 433779
loss: 1.0096765756607056,grad_norm: 0.6778848903531568, iteration: 433780
loss: 1.0128897428512573,grad_norm: 0.7027393880306779, iteration: 433781
loss: 0.9833746552467346,grad_norm: 0.7774790248272885, iteration: 433782
loss: 1.0265048742294312,grad_norm: 0.9999998043523418, iteration: 433783
loss: 1.0213451385498047,grad_norm: 0.8579926726870651, iteration: 433784
loss: 0.9426932334899902,grad_norm: 0.7172127456974705, iteration: 433785
loss: 0.9645670652389526,grad_norm: 0.8605913377640629, iteration: 433786
loss: 1.001771330833435,grad_norm: 0.744373172155948, iteration: 433787
loss: 1.0147029161453247,grad_norm: 0.9026457534108959, iteration: 433788
loss: 1.0464260578155518,grad_norm: 0.7471980512435442, iteration: 433789
loss: 0.9779807329177856,grad_norm: 0.7512888095648221, iteration: 433790
loss: 1.0270024538040161,grad_norm: 0.8669771626521341, iteration: 433791
loss: 1.0370006561279297,grad_norm: 0.870074122785586, iteration: 433792
loss: 1.0102320909500122,grad_norm: 0.8188342828339807, iteration: 433793
loss: 1.009757399559021,grad_norm: 0.6196048405552139, iteration: 433794
loss: 1.0188454389572144,grad_norm: 0.683866855326444, iteration: 433795
loss: 0.968653678894043,grad_norm: 0.8445242592589045, iteration: 433796
loss: 1.0034338235855103,grad_norm: 0.6397396362372083, iteration: 433797
loss: 0.9883086085319519,grad_norm: 0.8970390283928652, iteration: 433798
loss: 0.9478737711906433,grad_norm: 0.7555199399705188, iteration: 433799
loss: 1.0179167985916138,grad_norm: 0.9999991937064207, iteration: 433800
loss: 1.0151607990264893,grad_norm: 0.9999996161257008, iteration: 433801
loss: 0.9858583211898804,grad_norm: 0.9981174023970112, iteration: 433802
loss: 0.9993733763694763,grad_norm: 0.6811950082545277, iteration: 433803
loss: 0.9660888910293579,grad_norm: 0.7670498321147933, iteration: 433804
loss: 1.0063608884811401,grad_norm: 0.8249877674728716, iteration: 433805
loss: 1.037283182144165,grad_norm: 0.9999999935486215, iteration: 433806
loss: 1.047502040863037,grad_norm: 0.9999993160153837, iteration: 433807
loss: 1.0013865232467651,grad_norm: 0.7653668383095618, iteration: 433808
loss: 1.001541018486023,grad_norm: 0.7895054624616308, iteration: 433809
loss: 0.9880421757698059,grad_norm: 0.879065801223416, iteration: 433810
loss: 1.0070536136627197,grad_norm: 0.9408291277375975, iteration: 433811
loss: 0.9803150296211243,grad_norm: 0.8239893068913317, iteration: 433812
loss: 1.0338165760040283,grad_norm: 0.9999991480732326, iteration: 433813
loss: 0.9888860583305359,grad_norm: 0.9999992895738363, iteration: 433814
loss: 0.9892164468765259,grad_norm: 0.9468160121856982, iteration: 433815
loss: 1.001990556716919,grad_norm: 0.7771708960473481, iteration: 433816
loss: 1.01129150390625,grad_norm: 0.8215037948058846, iteration: 433817
loss: 0.9462487101554871,grad_norm: 0.7917161641319755, iteration: 433818
loss: 1.015455961227417,grad_norm: 0.7075602160837521, iteration: 433819
loss: 0.9680741429328918,grad_norm: 0.69757056883033, iteration: 433820
loss: 0.9820188879966736,grad_norm: 0.8556707774599105, iteration: 433821
loss: 1.0386990308761597,grad_norm: 0.7935906126322673, iteration: 433822
loss: 1.0478228330612183,grad_norm: 0.8690508271354361, iteration: 433823
loss: 0.9959981441497803,grad_norm: 0.6597392623054781, iteration: 433824
loss: 0.9628058075904846,grad_norm: 0.6966979475124069, iteration: 433825
loss: 0.989535391330719,grad_norm: 0.8342439012603251, iteration: 433826
loss: 0.9804537892341614,grad_norm: 0.6937173999949476, iteration: 433827
loss: 1.023598313331604,grad_norm: 0.6852957073845414, iteration: 433828
loss: 0.9839397072792053,grad_norm: 0.8623690165157228, iteration: 433829
loss: 0.9810212850570679,grad_norm: 0.6772500765436998, iteration: 433830
loss: 0.9773484468460083,grad_norm: 0.6974718528420218, iteration: 433831
loss: 1.011072039604187,grad_norm: 0.8064341152509585, iteration: 433832
loss: 0.9925009608268738,grad_norm: 0.7886188100064507, iteration: 433833
loss: 1.0061407089233398,grad_norm: 0.7426925516000997, iteration: 433834
loss: 0.981514036655426,grad_norm: 0.7457442040074692, iteration: 433835
loss: 1.0006712675094604,grad_norm: 0.7635321971068152, iteration: 433836
loss: 1.0907642841339111,grad_norm: 0.9999991752330422, iteration: 433837
loss: 1.0024538040161133,grad_norm: 0.9999991182158672, iteration: 433838
loss: 1.001408338546753,grad_norm: 0.7090220554755103, iteration: 433839
loss: 0.963171124458313,grad_norm: 0.928911277820785, iteration: 433840
loss: 0.9852203130722046,grad_norm: 0.7956813698254126, iteration: 433841
loss: 1.0255252122879028,grad_norm: 0.9999991095675929, iteration: 433842
loss: 0.9906424283981323,grad_norm: 0.9410645048156141, iteration: 433843
loss: 1.0079617500305176,grad_norm: 0.8374993223127939, iteration: 433844
loss: 0.989315390586853,grad_norm: 0.7243679402861708, iteration: 433845
loss: 1.0044305324554443,grad_norm: 0.7767044382264675, iteration: 433846
loss: 1.0086435079574585,grad_norm: 0.7544658432638672, iteration: 433847
loss: 1.0129631757736206,grad_norm: 0.7338306940523488, iteration: 433848
loss: 0.9823535680770874,grad_norm: 0.7468830348871341, iteration: 433849
loss: 1.0187782049179077,grad_norm: 0.7801193878521212, iteration: 433850
loss: 0.9829661250114441,grad_norm: 0.7441295664512331, iteration: 433851
loss: 1.0115511417388916,grad_norm: 0.8301609657662823, iteration: 433852
loss: 0.9943966269493103,grad_norm: 0.9999991312868699, iteration: 433853
loss: 1.0246379375457764,grad_norm: 0.8246134432862011, iteration: 433854
loss: 0.9953206777572632,grad_norm: 0.873887828998538, iteration: 433855
loss: 1.0104221105575562,grad_norm: 0.6330051424136017, iteration: 433856
loss: 0.9844996929168701,grad_norm: 0.7113794400295974, iteration: 433857
loss: 0.984637439250946,grad_norm: 0.7442771280110188, iteration: 433858
loss: 1.0238748788833618,grad_norm: 0.8995524556736583, iteration: 433859
loss: 1.019150972366333,grad_norm: 0.9298072462364969, iteration: 433860
loss: 1.0009472370147705,grad_norm: 0.6421121065884335, iteration: 433861
loss: 0.9961913228034973,grad_norm: 0.9796884803230868, iteration: 433862
loss: 1.0284614562988281,grad_norm: 0.8755729492146559, iteration: 433863
loss: 0.9796003103256226,grad_norm: 0.7234534694963848, iteration: 433864
loss: 1.0151368379592896,grad_norm: 0.6734266336831418, iteration: 433865
loss: 1.0596007108688354,grad_norm: 0.8027611532731197, iteration: 433866
loss: 1.0829284191131592,grad_norm: 0.9999995612499339, iteration: 433867
loss: 0.9774773716926575,grad_norm: 0.7001280905266197, iteration: 433868
loss: 1.0337010622024536,grad_norm: 0.7548554057902499, iteration: 433869
loss: 1.033934235572815,grad_norm: 0.8113827936699961, iteration: 433870
loss: 1.0127687454223633,grad_norm: 0.7914227368475419, iteration: 433871
loss: 1.00083589553833,grad_norm: 0.7970111760018249, iteration: 433872
loss: 0.9875489473342896,grad_norm: 0.9158588790283552, iteration: 433873
loss: 0.9738210439682007,grad_norm: 0.8332033448898818, iteration: 433874
loss: 1.0055067539215088,grad_norm: 0.945301212799966, iteration: 433875
loss: 0.9831643104553223,grad_norm: 0.7282463191231157, iteration: 433876
loss: 1.0334722995758057,grad_norm: 0.8263690501805365, iteration: 433877
loss: 1.0157676935195923,grad_norm: 0.7728533732196918, iteration: 433878
loss: 0.9839040637016296,grad_norm: 0.999999722265714, iteration: 433879
loss: 0.9996128082275391,grad_norm: 0.8698132812140253, iteration: 433880
loss: 0.9930029511451721,grad_norm: 0.6929967863616415, iteration: 433881
loss: 1.0169181823730469,grad_norm: 0.7760367498338324, iteration: 433882
loss: 1.0407602787017822,grad_norm: 0.8137155634144971, iteration: 433883
loss: 0.9775760769844055,grad_norm: 0.7666205922460855, iteration: 433884
loss: 1.0889124870300293,grad_norm: 0.6861151845405892, iteration: 433885
loss: 1.0259977579116821,grad_norm: 0.9999993641042426, iteration: 433886
loss: 0.9887352585792542,grad_norm: 0.8036587255785632, iteration: 433887
loss: 0.9870806336402893,grad_norm: 0.9122928404961349, iteration: 433888
loss: 0.9973255395889282,grad_norm: 0.6766091925265629, iteration: 433889
loss: 0.9209426045417786,grad_norm: 0.9735929055456732, iteration: 433890
loss: 0.9979286789894104,grad_norm: 0.6936266452722596, iteration: 433891
loss: 0.9788099527359009,grad_norm: 0.8091577455275274, iteration: 433892
loss: 0.9655553698539734,grad_norm: 0.862434261126741, iteration: 433893
loss: 1.0098083019256592,grad_norm: 0.8797487263873285, iteration: 433894
loss: 1.0222247838974,grad_norm: 0.741995724134217, iteration: 433895
loss: 0.9822633266448975,grad_norm: 0.7144298847927125, iteration: 433896
loss: 1.0129497051239014,grad_norm: 0.7518097107936439, iteration: 433897
loss: 1.0096163749694824,grad_norm: 0.999999361606954, iteration: 433898
loss: 1.0572041273117065,grad_norm: 0.9999989845047478, iteration: 433899
loss: 1.0118643045425415,grad_norm: 0.7026165339623067, iteration: 433900
loss: 1.0120139122009277,grad_norm: 0.821328625698049, iteration: 433901
loss: 1.0250507593154907,grad_norm: 0.8917454125054672, iteration: 433902
loss: 0.9388764500617981,grad_norm: 0.7847355034778305, iteration: 433903
loss: 1.0112495422363281,grad_norm: 0.9008236171628328, iteration: 433904
loss: 1.0278973579406738,grad_norm: 0.6957228528103965, iteration: 433905
loss: 1.0028183460235596,grad_norm: 0.7535724880607962, iteration: 433906
loss: 0.9665623307228088,grad_norm: 0.6878285690048931, iteration: 433907
loss: 1.001336932182312,grad_norm: 0.7272778661516527, iteration: 433908
loss: 0.9611478447914124,grad_norm: 0.7936846733768099, iteration: 433909
loss: 0.9821802973747253,grad_norm: 0.7206227794406925, iteration: 433910
loss: 0.9938563704490662,grad_norm: 0.6745167254442161, iteration: 433911
loss: 1.0034208297729492,grad_norm: 0.9102329489599517, iteration: 433912
loss: 0.9689153432846069,grad_norm: 0.9330909244043973, iteration: 433913
loss: 1.0349595546722412,grad_norm: 0.8201102683879544, iteration: 433914
loss: 1.0163551568984985,grad_norm: 0.8613295221655684, iteration: 433915
loss: 0.9865297079086304,grad_norm: 0.9132034022072882, iteration: 433916
loss: 1.0254197120666504,grad_norm: 0.7125337749455785, iteration: 433917
loss: 0.9797372817993164,grad_norm: 0.8305634411108737, iteration: 433918
loss: 1.0291054248809814,grad_norm: 0.827523225436534, iteration: 433919
loss: 0.9766216278076172,grad_norm: 0.7922750987407663, iteration: 433920
loss: 0.9964333772659302,grad_norm: 0.6303811819469933, iteration: 433921
loss: 1.0000765323638916,grad_norm: 0.7135772777784387, iteration: 433922
loss: 1.0223042964935303,grad_norm: 0.8455719574049526, iteration: 433923
loss: 1.0107934474945068,grad_norm: 0.8653143534653869, iteration: 433924
loss: 1.0046441555023193,grad_norm: 0.7283774053515785, iteration: 433925
loss: 1.007447361946106,grad_norm: 0.7377357084130116, iteration: 433926
loss: 1.0333757400512695,grad_norm: 0.8532470849087392, iteration: 433927
loss: 0.9926088452339172,grad_norm: 0.8183875778925122, iteration: 433928
loss: 1.0016311407089233,grad_norm: 0.7728961915869906, iteration: 433929
loss: 1.0023764371871948,grad_norm: 0.9999992767473145, iteration: 433930
loss: 1.032291054725647,grad_norm: 0.9999992754501849, iteration: 433931
loss: 0.9668154716491699,grad_norm: 0.7704003562585483, iteration: 433932
loss: 0.9914987683296204,grad_norm: 0.8389299756555001, iteration: 433933
loss: 0.9862073063850403,grad_norm: 0.7469778316613245, iteration: 433934
loss: 0.987765371799469,grad_norm: 0.999999159635404, iteration: 433935
loss: 1.0193703174591064,grad_norm: 0.9259802294752123, iteration: 433936
loss: 1.024574637413025,grad_norm: 0.686015876592597, iteration: 433937
loss: 0.9851429462432861,grad_norm: 0.9966123943318234, iteration: 433938
loss: 1.0191915035247803,grad_norm: 0.785395396646912, iteration: 433939
loss: 1.016756296157837,grad_norm: 0.7972803701364893, iteration: 433940
loss: 0.9957891702651978,grad_norm: 0.9081513773145494, iteration: 433941
loss: 0.9906702637672424,grad_norm: 0.734930082009965, iteration: 433942
loss: 1.0328357219696045,grad_norm: 0.7417552802893418, iteration: 433943
loss: 1.0214815139770508,grad_norm: 0.6602010641107278, iteration: 433944
loss: 1.008151650428772,grad_norm: 0.795220493918341, iteration: 433945
loss: 0.9979909658432007,grad_norm: 0.8835969987155549, iteration: 433946
loss: 1.0412025451660156,grad_norm: 0.7026231705508492, iteration: 433947
loss: 1.0958491563796997,grad_norm: 0.9999998779901634, iteration: 433948
loss: 0.9849532842636108,grad_norm: 0.7670115504325424, iteration: 433949
loss: 1.185070276260376,grad_norm: 0.9999996421781328, iteration: 433950
loss: 0.9729582667350769,grad_norm: 0.7550669348537561, iteration: 433951
loss: 1.006876826286316,grad_norm: 0.8154095954570154, iteration: 433952
loss: 0.9592993259429932,grad_norm: 0.7841272117768667, iteration: 433953
loss: 1.0190730094909668,grad_norm: 0.8560021113830735, iteration: 433954
loss: 0.9528648257255554,grad_norm: 0.8368572040036645, iteration: 433955
loss: 1.0232415199279785,grad_norm: 0.8811055767878375, iteration: 433956
loss: 1.0053495168685913,grad_norm: 0.9999990693873639, iteration: 433957
loss: 0.9821150898933411,grad_norm: 0.8026143672331563, iteration: 433958
loss: 1.0172184705734253,grad_norm: 0.8013517119785346, iteration: 433959
loss: 1.0097103118896484,grad_norm: 0.6853018385694727, iteration: 433960
loss: 1.0201958417892456,grad_norm: 0.8749159710301532, iteration: 433961
loss: 0.9992040395736694,grad_norm: 0.745749471742541, iteration: 433962
loss: 0.9680130481719971,grad_norm: 0.9999990484448782, iteration: 433963
loss: 0.9828549027442932,grad_norm: 0.8584299773923858, iteration: 433964
loss: 1.0098285675048828,grad_norm: 0.8503963583394086, iteration: 433965
loss: 1.0088074207305908,grad_norm: 0.8290627456859482, iteration: 433966
loss: 1.0117082595825195,grad_norm: 0.7852809991808142, iteration: 433967
loss: 1.0356090068817139,grad_norm: 0.7100698756238727, iteration: 433968
loss: 1.0281273126602173,grad_norm: 0.9999991088998785, iteration: 433969
loss: 1.0101300477981567,grad_norm: 0.8685030211834277, iteration: 433970
loss: 0.9926972389221191,grad_norm: 0.9451094588758221, iteration: 433971
loss: 1.0204845666885376,grad_norm: 0.731192927018853, iteration: 433972
loss: 0.9906010031700134,grad_norm: 0.6932252290954032, iteration: 433973
loss: 0.9598000645637512,grad_norm: 0.7185028744081537, iteration: 433974
loss: 0.9978386759757996,grad_norm: 0.7562556802281617, iteration: 433975
loss: 0.9992758631706238,grad_norm: 0.7653293847926224, iteration: 433976
loss: 1.0001903772354126,grad_norm: 0.9999999068358866, iteration: 433977
loss: 1.002512812614441,grad_norm: 0.7266155048621825, iteration: 433978
loss: 0.9745142459869385,grad_norm: 0.9999992539505207, iteration: 433979
loss: 1.2149429321289062,grad_norm: 0.9999997868682018, iteration: 433980
loss: 1.0196200609207153,grad_norm: 0.7279817327546436, iteration: 433981
loss: 0.9482868909835815,grad_norm: 0.8213627408716692, iteration: 433982
loss: 0.9782370924949646,grad_norm: 0.860430580566742, iteration: 433983
loss: 1.0295565128326416,grad_norm: 0.9067031661649572, iteration: 433984
loss: 1.0118844509124756,grad_norm: 0.7573866225231953, iteration: 433985
loss: 1.008527159690857,grad_norm: 0.8403845147086599, iteration: 433986
loss: 0.9733456969261169,grad_norm: 0.9457694833699347, iteration: 433987
loss: 1.0243560075759888,grad_norm: 0.8207726111196323, iteration: 433988
loss: 1.02018141746521,grad_norm: 0.7162923994888479, iteration: 433989
loss: 1.0978277921676636,grad_norm: 0.9999999189549093, iteration: 433990
loss: 0.9997031092643738,grad_norm: 0.6215705927386257, iteration: 433991
loss: 0.9934203028678894,grad_norm: 0.8202621860015878, iteration: 433992
loss: 0.9720743298530579,grad_norm: 0.8470477669059783, iteration: 433993
loss: 1.027180790901184,grad_norm: 0.9184271635464639, iteration: 433994
loss: 0.9849771857261658,grad_norm: 0.789035177376452, iteration: 433995
loss: 1.012203574180603,grad_norm: 0.7821201984715372, iteration: 433996
loss: 1.0203442573547363,grad_norm: 0.7687124120134551, iteration: 433997
loss: 0.9927220940589905,grad_norm: 0.7614768017905732, iteration: 433998
loss: 0.9419742822647095,grad_norm: 0.8121169446162526, iteration: 433999
loss: 1.0013799667358398,grad_norm: 0.8056516026451581, iteration: 434000
loss: 0.9946376085281372,grad_norm: 0.6710000178545964, iteration: 434001
loss: 0.9953250885009766,grad_norm: 0.8772186101063131, iteration: 434002
loss: 0.9871351718902588,grad_norm: 0.8297479952625818, iteration: 434003
loss: 1.0183355808258057,grad_norm: 0.667743213531637, iteration: 434004
loss: 0.9834389686584473,grad_norm: 0.7496774041609439, iteration: 434005
loss: 0.9731329679489136,grad_norm: 0.7299424985716109, iteration: 434006
loss: 1.0064988136291504,grad_norm: 0.6343227622003905, iteration: 434007
loss: 0.9672324657440186,grad_norm: 0.7936429880563726, iteration: 434008
loss: 0.9746172428131104,grad_norm: 0.69185220700617, iteration: 434009
loss: 1.010079026222229,grad_norm: 0.7515005891694382, iteration: 434010
loss: 0.9572985768318176,grad_norm: 0.8618536030913116, iteration: 434011
loss: 1.0197782516479492,grad_norm: 0.8324711088890594, iteration: 434012
loss: 1.0995252132415771,grad_norm: 0.9999999168345233, iteration: 434013
loss: 1.0045512914657593,grad_norm: 0.9999990507419315, iteration: 434014
loss: 1.0115015506744385,grad_norm: 0.7213659383025628, iteration: 434015
loss: 0.9996711015701294,grad_norm: 0.5885537183051748, iteration: 434016
loss: 0.9782286286354065,grad_norm: 0.7471957364051921, iteration: 434017
loss: 1.0173945426940918,grad_norm: 0.860157701900926, iteration: 434018
loss: 1.030802845954895,grad_norm: 0.7877941911584022, iteration: 434019
loss: 1.011690616607666,grad_norm: 0.7985381216525386, iteration: 434020
loss: 0.9901043176651001,grad_norm: 0.7989505057936863, iteration: 434021
loss: 0.9730724692344666,grad_norm: 0.6829593164507946, iteration: 434022
loss: 1.0440675020217896,grad_norm: 0.9999990765486484, iteration: 434023
loss: 1.0292164087295532,grad_norm: 0.699124157149869, iteration: 434024
loss: 0.9867225289344788,grad_norm: 0.6686048406865066, iteration: 434025
loss: 1.0122859477996826,grad_norm: 0.7820747441893788, iteration: 434026
loss: 0.9923168420791626,grad_norm: 0.9121677446805775, iteration: 434027
loss: 1.0013048648834229,grad_norm: 0.8881870263397208, iteration: 434028
loss: 0.9882252812385559,grad_norm: 0.7301321082619323, iteration: 434029
loss: 1.0176538228988647,grad_norm: 0.9999990101123017, iteration: 434030
loss: 1.011398434638977,grad_norm: 0.9999995145372791, iteration: 434031
loss: 0.9529013633728027,grad_norm: 0.9999989899265203, iteration: 434032
loss: 0.9951924681663513,grad_norm: 0.8170609592871984, iteration: 434033
loss: 1.0308295488357544,grad_norm: 0.7926678815179461, iteration: 434034
loss: 0.998814582824707,grad_norm: 0.9999996096312419, iteration: 434035
loss: 1.0212979316711426,grad_norm: 0.6804900534435847, iteration: 434036
loss: 1.021867036819458,grad_norm: 0.8544370712111083, iteration: 434037
loss: 1.0080183744430542,grad_norm: 0.7127936344635546, iteration: 434038
loss: 1.00399649143219,grad_norm: 0.9999999038665867, iteration: 434039
loss: 1.0263069868087769,grad_norm: 0.9999992944628279, iteration: 434040
loss: 0.9859802722930908,grad_norm: 0.7552445958985405, iteration: 434041
loss: 1.044086217880249,grad_norm: 0.9653172364376313, iteration: 434042
loss: 1.0008176565170288,grad_norm: 0.9999996010581461, iteration: 434043
loss: 1.0112324953079224,grad_norm: 0.7631622378112163, iteration: 434044
loss: 1.016573190689087,grad_norm: 0.8168883304971482, iteration: 434045
loss: 1.0276424884796143,grad_norm: 0.6366039734517148, iteration: 434046
loss: 1.0183707475662231,grad_norm: 0.7634124408200628, iteration: 434047
loss: 1.0047765970230103,grad_norm: 0.9725043651910777, iteration: 434048
loss: 1.0213829278945923,grad_norm: 0.9666852064617243, iteration: 434049
loss: 1.019668459892273,grad_norm: 0.799706372450575, iteration: 434050
loss: 0.983747124671936,grad_norm: 0.859116728144164, iteration: 434051
loss: 1.0224896669387817,grad_norm: 0.7947705395650154, iteration: 434052
loss: 1.0010442733764648,grad_norm: 0.7877775608455707, iteration: 434053
loss: 0.9694597721099854,grad_norm: 0.9999991724759467, iteration: 434054
loss: 1.0997910499572754,grad_norm: 0.9999992128119592, iteration: 434055
loss: 1.0001404285430908,grad_norm: 0.7209295694960878, iteration: 434056
loss: 1.0855294466018677,grad_norm: 0.999999947144549, iteration: 434057
loss: 1.0353670120239258,grad_norm: 0.7563813151933818, iteration: 434058
loss: 1.009078025817871,grad_norm: 0.800567162902563, iteration: 434059
loss: 1.0190356969833374,grad_norm: 0.7375493431736079, iteration: 434060
loss: 1.0406513214111328,grad_norm: 0.9901310082952768, iteration: 434061
loss: 1.0207003355026245,grad_norm: 0.7812524089683134, iteration: 434062
loss: 0.9783983826637268,grad_norm: 0.7066978443410867, iteration: 434063
loss: 0.9922977089881897,grad_norm: 0.7973741502088129, iteration: 434064
loss: 1.3503165245056152,grad_norm: 0.9999997756433956, iteration: 434065
loss: 1.0015621185302734,grad_norm: 0.7964522477205096, iteration: 434066
loss: 1.0090138912200928,grad_norm: 0.8935060190718, iteration: 434067
loss: 1.0351932048797607,grad_norm: 0.6849119362440801, iteration: 434068
loss: 1.001075267791748,grad_norm: 0.8399996347450432, iteration: 434069
loss: 0.9905045032501221,grad_norm: 0.7447974583505196, iteration: 434070
loss: 1.0783185958862305,grad_norm: 0.9999999349769675, iteration: 434071
loss: 0.9904941916465759,grad_norm: 0.864341833930893, iteration: 434072
loss: 0.9612943530082703,grad_norm: 0.8425825313871736, iteration: 434073
loss: 1.0729519128799438,grad_norm: 0.9999994478022693, iteration: 434074
loss: 1.0442214012145996,grad_norm: 0.9999990237091736, iteration: 434075
loss: 1.043116569519043,grad_norm: 0.7036966977011903, iteration: 434076
loss: 0.9749981164932251,grad_norm: 0.6780520611326692, iteration: 434077
loss: 0.982487142086029,grad_norm: 0.6267130256594775, iteration: 434078
loss: 1.0135385990142822,grad_norm: 0.7788372614425663, iteration: 434079
loss: 1.0059669017791748,grad_norm: 0.7022128641490026, iteration: 434080
loss: 1.0054714679718018,grad_norm: 0.8487051253173633, iteration: 434081
loss: 1.0189048051834106,grad_norm: 0.6245457006077504, iteration: 434082
loss: 1.0263200998306274,grad_norm: 0.999999115563777, iteration: 434083
loss: 0.9701695442199707,grad_norm: 0.8290323809290157, iteration: 434084
loss: 0.9759894609451294,grad_norm: 0.8459503613979217, iteration: 434085
loss: 0.9750202298164368,grad_norm: 0.7158384851247582, iteration: 434086
loss: 0.9953163266181946,grad_norm: 0.6001593684504944, iteration: 434087
loss: 1.0208810567855835,grad_norm: 0.7060719827684928, iteration: 434088
loss: 1.0141757726669312,grad_norm: 0.820971493461135, iteration: 434089
loss: 1.0441616773605347,grad_norm: 0.9893230036014153, iteration: 434090
loss: 1.1606370210647583,grad_norm: 0.9410046603097066, iteration: 434091
loss: 0.9870437979698181,grad_norm: 0.8025887333814753, iteration: 434092
loss: 1.0387686491012573,grad_norm: 0.842649240621526, iteration: 434093
loss: 1.0066289901733398,grad_norm: 0.8071283651198742, iteration: 434094
loss: 0.9786746501922607,grad_norm: 0.72680615383999, iteration: 434095
loss: 0.9998899102210999,grad_norm: 0.7866899044752399, iteration: 434096
loss: 1.155004620552063,grad_norm: 0.9999994355362966, iteration: 434097
loss: 1.0139658451080322,grad_norm: 0.7327950956420469, iteration: 434098
loss: 0.9877849221229553,grad_norm: 0.8978770489684357, iteration: 434099
loss: 1.002472162246704,grad_norm: 0.6970141930624533, iteration: 434100
loss: 1.025669813156128,grad_norm: 0.8247682201307939, iteration: 434101
loss: 0.9818010926246643,grad_norm: 0.7283079820686963, iteration: 434102
loss: 0.9712246656417847,grad_norm: 0.7686053164811125, iteration: 434103
loss: 0.9769099950790405,grad_norm: 0.9999992494175505, iteration: 434104
loss: 0.9970054030418396,grad_norm: 0.592318493523132, iteration: 434105
loss: 0.9720954298973083,grad_norm: 0.6908532272132112, iteration: 434106
loss: 1.0056544542312622,grad_norm: 0.6638177447328423, iteration: 434107
loss: 1.022979736328125,grad_norm: 0.9100488095423701, iteration: 434108
loss: 0.9768357872962952,grad_norm: 0.6726261997738586, iteration: 434109
loss: 0.9913469552993774,grad_norm: 0.8155251733984581, iteration: 434110
loss: 1.011066198348999,grad_norm: 0.7896916367938196, iteration: 434111
loss: 1.0187712907791138,grad_norm: 0.7254010599304561, iteration: 434112
loss: 1.0061320066452026,grad_norm: 0.8663171670924396, iteration: 434113
loss: 0.9930564165115356,grad_norm: 0.7379165819116276, iteration: 434114
loss: 1.012794017791748,grad_norm: 0.8286502061939096, iteration: 434115
loss: 0.9693025350570679,grad_norm: 0.794434397106712, iteration: 434116
loss: 0.9977241158485413,grad_norm: 0.9999998505057454, iteration: 434117
loss: 0.9890257716178894,grad_norm: 0.9999994642574643, iteration: 434118
loss: 1.0286169052124023,grad_norm: 0.9999991198600194, iteration: 434119
loss: 0.9736688733100891,grad_norm: 0.7141153285636006, iteration: 434120
loss: 0.9809384942054749,grad_norm: 0.7909603686748475, iteration: 434121
loss: 0.9788076281547546,grad_norm: 0.9543710932452248, iteration: 434122
loss: 1.0224435329437256,grad_norm: 0.9398082357704999, iteration: 434123
loss: 0.9771611094474792,grad_norm: 0.8636560692787769, iteration: 434124
loss: 0.9993191957473755,grad_norm: 0.7692463836602403, iteration: 434125
loss: 1.0129297971725464,grad_norm: 0.7503815403882268, iteration: 434126
loss: 1.0084997415542603,grad_norm: 0.9999989527738978, iteration: 434127
loss: 1.0850257873535156,grad_norm: 0.866300874412511, iteration: 434128
loss: 0.9735448956489563,grad_norm: 0.798293335830544, iteration: 434129
loss: 0.9931164383888245,grad_norm: 0.7232155499936768, iteration: 434130
loss: 0.9685345888137817,grad_norm: 0.7202099987011953, iteration: 434131
loss: 1.015436053276062,grad_norm: 0.9759748687233564, iteration: 434132
loss: 0.9361939430236816,grad_norm: 0.7195393947062526, iteration: 434133
loss: 1.0500222444534302,grad_norm: 0.7568611893389297, iteration: 434134
loss: 1.0076754093170166,grad_norm: 0.9999990410951178, iteration: 434135
loss: 0.9881290197372437,grad_norm: 0.9415903842613795, iteration: 434136
loss: 1.018459439277649,grad_norm: 0.9999993355922778, iteration: 434137
loss: 0.9638261198997498,grad_norm: 0.7620453869340332, iteration: 434138
loss: 0.9935414791107178,grad_norm: 0.6674283344593721, iteration: 434139
loss: 1.0019041299819946,grad_norm: 0.7313673139572958, iteration: 434140
loss: 1.0199764966964722,grad_norm: 0.8149809197258471, iteration: 434141
loss: 0.9997835755348206,grad_norm: 0.7360658061968629, iteration: 434142
loss: 1.0032991170883179,grad_norm: 0.7505046963938847, iteration: 434143
loss: 0.9677025675773621,grad_norm: 0.8573296248612192, iteration: 434144
loss: 1.0298165082931519,grad_norm: 0.76521847546688, iteration: 434145
loss: 1.0356872081756592,grad_norm: 0.9999992388311281, iteration: 434146
loss: 0.9849250912666321,grad_norm: 0.7818761412359774, iteration: 434147
loss: 1.1134110689163208,grad_norm: 0.9999998174961146, iteration: 434148
loss: 1.0036509037017822,grad_norm: 0.7042756832588617, iteration: 434149
loss: 1.0096067190170288,grad_norm: 0.9756425200530888, iteration: 434150
loss: 0.9953915476799011,grad_norm: 0.710034095660278, iteration: 434151
loss: 0.9696928858757019,grad_norm: 0.7559448689955851, iteration: 434152
loss: 0.976927638053894,grad_norm: 0.6999771899716387, iteration: 434153
loss: 0.9643096327781677,grad_norm: 0.7826845644140847, iteration: 434154
loss: 1.0001720190048218,grad_norm: 0.8193592888250453, iteration: 434155
loss: 1.0128123760223389,grad_norm: 0.8599908078643047, iteration: 434156
loss: 0.9763497710227966,grad_norm: 0.7717557125914792, iteration: 434157
loss: 0.9853794574737549,grad_norm: 0.8085612410523265, iteration: 434158
loss: 0.9943361282348633,grad_norm: 0.6962581105669127, iteration: 434159
loss: 0.9916630387306213,grad_norm: 0.7550493519494241, iteration: 434160
loss: 0.9985687732696533,grad_norm: 0.7777579872405609, iteration: 434161
loss: 1.0288468599319458,grad_norm: 0.9389965063272424, iteration: 434162
loss: 0.998296856880188,grad_norm: 0.9152117893142386, iteration: 434163
loss: 1.081691026687622,grad_norm: 0.9999998281513037, iteration: 434164
loss: 1.0112574100494385,grad_norm: 0.7356091089867012, iteration: 434165
loss: 1.0243003368377686,grad_norm: 0.9999996865979573, iteration: 434166
loss: 0.9784142374992371,grad_norm: 0.9028209654423628, iteration: 434167
loss: 0.9873822331428528,grad_norm: 0.7894586944625831, iteration: 434168
loss: 1.0059376955032349,grad_norm: 0.916131727285483, iteration: 434169
loss: 1.012994647026062,grad_norm: 0.8756211397028802, iteration: 434170
loss: 1.0068308115005493,grad_norm: 0.9999989833506998, iteration: 434171
loss: 1.1959238052368164,grad_norm: 0.9999999676954449, iteration: 434172
loss: 0.9589142799377441,grad_norm: 0.8026807031004748, iteration: 434173
loss: 1.0410511493682861,grad_norm: 0.7602825731258319, iteration: 434174
loss: 0.9705024361610413,grad_norm: 0.9959929418847722, iteration: 434175
loss: 1.0045807361602783,grad_norm: 0.7674535770138224, iteration: 434176
loss: 0.9950791597366333,grad_norm: 0.9722099875863955, iteration: 434177
loss: 0.9818817973136902,grad_norm: 0.6077533955987398, iteration: 434178
loss: 0.9420179724693298,grad_norm: 0.7845626221877985, iteration: 434179
loss: 0.9730514287948608,grad_norm: 0.6605346734092028, iteration: 434180
loss: 1.0428882837295532,grad_norm: 0.8320412763803937, iteration: 434181
loss: 1.0080262422561646,grad_norm: 0.9999994427544877, iteration: 434182
loss: 1.08188796043396,grad_norm: 0.7376552161130125, iteration: 434183
loss: 1.077927589416504,grad_norm: 0.9999991377245041, iteration: 434184
loss: 1.0218892097473145,grad_norm: 0.7472201570474301, iteration: 434185
loss: 0.9837507605552673,grad_norm: 0.7152788347587322, iteration: 434186
loss: 0.9922528266906738,grad_norm: 0.7369851138180228, iteration: 434187
loss: 1.0273975133895874,grad_norm: 0.9933349901930439, iteration: 434188
loss: 1.0506449937820435,grad_norm: 0.9856527821055151, iteration: 434189
loss: 1.0132412910461426,grad_norm: 0.7959969741819918, iteration: 434190
loss: 0.9939574003219604,grad_norm: 0.7994680040965055, iteration: 434191
loss: 1.0174524784088135,grad_norm: 0.9999996535255425, iteration: 434192
loss: 1.0001658201217651,grad_norm: 0.5984741024315237, iteration: 434193
loss: 1.0070291757583618,grad_norm: 0.9332132076960057, iteration: 434194
loss: 1.0090147256851196,grad_norm: 0.7992544074355367, iteration: 434195
loss: 1.0113495588302612,grad_norm: 0.7738670038205445, iteration: 434196
loss: 0.9885648488998413,grad_norm: 0.6961697119343686, iteration: 434197
loss: 1.0067007541656494,grad_norm: 0.9999993753164457, iteration: 434198
loss: 1.01091468334198,grad_norm: 0.8564227434261203, iteration: 434199
loss: 1.0370454788208008,grad_norm: 0.9999990404630598, iteration: 434200
loss: 1.0118399858474731,grad_norm: 0.7977335164448858, iteration: 434201
loss: 0.9580261707305908,grad_norm: 0.7322099775209953, iteration: 434202
loss: 1.0071786642074585,grad_norm: 0.785531141991724, iteration: 434203
loss: 1.0017495155334473,grad_norm: 0.859410600728121, iteration: 434204
loss: 0.99452805519104,grad_norm: 0.7045474931485022, iteration: 434205
loss: 1.024597406387329,grad_norm: 0.9999995594681435, iteration: 434206
loss: 1.0287965536117554,grad_norm: 0.7914783547269397, iteration: 434207
loss: 1.0053833723068237,grad_norm: 0.714715338465479, iteration: 434208
loss: 0.9978187680244446,grad_norm: 0.7379946854126123, iteration: 434209
loss: 0.9822319746017456,grad_norm: 0.7912260744092157, iteration: 434210
loss: 1.0039492845535278,grad_norm: 0.8307598747782954, iteration: 434211
loss: 1.0071630477905273,grad_norm: 0.8391009214732744, iteration: 434212
loss: 1.0144519805908203,grad_norm: 0.8719647039666495, iteration: 434213
loss: 0.9711704850196838,grad_norm: 0.7304874666909933, iteration: 434214
loss: 0.9864153265953064,grad_norm: 0.65554732306969, iteration: 434215
loss: 1.0059415102005005,grad_norm: 0.7712905521221699, iteration: 434216
loss: 0.9979696869850159,grad_norm: 0.9608114366766061, iteration: 434217
loss: 0.9994489550590515,grad_norm: 0.7470707127303623, iteration: 434218
loss: 1.0220733880996704,grad_norm: 0.7796046059515306, iteration: 434219
loss: 1.0569320917129517,grad_norm: 0.9999993575040609, iteration: 434220
loss: 0.9818624258041382,grad_norm: 0.8755640693518593, iteration: 434221
loss: 1.0798808336257935,grad_norm: 0.9540342537419587, iteration: 434222
loss: 0.9858382940292358,grad_norm: 0.7369632810626765, iteration: 434223
loss: 0.9521536231040955,grad_norm: 0.9999991806790314, iteration: 434224
loss: 1.0983564853668213,grad_norm: 0.7858913171544727, iteration: 434225
loss: 0.9802786111831665,grad_norm: 0.8857024468634732, iteration: 434226
loss: 0.9937743544578552,grad_norm: 0.6656970614903626, iteration: 434227
loss: 0.9730333089828491,grad_norm: 0.740837360044267, iteration: 434228
loss: 0.9787595868110657,grad_norm: 0.9039563808563296, iteration: 434229
loss: 1.0178381204605103,grad_norm: 0.9999992144362918, iteration: 434230
loss: 1.0106606483459473,grad_norm: 0.7878424853361902, iteration: 434231
loss: 1.009070873260498,grad_norm: 0.821790994199917, iteration: 434232
loss: 0.9821886420249939,grad_norm: 0.7436153866846491, iteration: 434233
loss: 0.9576579928398132,grad_norm: 0.9226292164207296, iteration: 434234
loss: 0.9761381149291992,grad_norm: 0.6921782872753961, iteration: 434235
loss: 1.0328809022903442,grad_norm: 0.9999994472386398, iteration: 434236
loss: 0.9979110360145569,grad_norm: 0.6656466705618465, iteration: 434237
loss: 0.9721508622169495,grad_norm: 0.8104674114377374, iteration: 434238
loss: 0.9932620525360107,grad_norm: 0.8394077852337193, iteration: 434239
loss: 0.9922707676887512,grad_norm: 0.8310708888861912, iteration: 434240
loss: 0.9808894991874695,grad_norm: 0.6175801757942829, iteration: 434241
loss: 0.9930102229118347,grad_norm: 0.7817415111606857, iteration: 434242
loss: 0.9861112833023071,grad_norm: 0.765623128255244, iteration: 434243
loss: 1.0087929964065552,grad_norm: 0.7025332325072478, iteration: 434244
loss: 0.9802933931350708,grad_norm: 0.9665848982646987, iteration: 434245
loss: 1.0643099546432495,grad_norm: 0.9999996981295606, iteration: 434246
loss: 1.028664469718933,grad_norm: 0.7939876423350323, iteration: 434247
loss: 1.0181386470794678,grad_norm: 0.8894299116115538, iteration: 434248
loss: 0.9876687526702881,grad_norm: 0.8084022559648427, iteration: 434249
loss: 0.9755042791366577,grad_norm: 0.7270219142638846, iteration: 434250
loss: 1.0200763940811157,grad_norm: 0.722762786205706, iteration: 434251
loss: 1.0002456903457642,grad_norm: 0.7341628561455344, iteration: 434252
loss: 0.9954538345336914,grad_norm: 0.7867328715093259, iteration: 434253
loss: 0.9747334718704224,grad_norm: 0.6614383625033092, iteration: 434254
loss: 0.9820594191551208,grad_norm: 0.8450777070082841, iteration: 434255
loss: 0.9893084764480591,grad_norm: 0.7096292464969993, iteration: 434256
loss: 0.9687038064002991,grad_norm: 0.6887621337382807, iteration: 434257
loss: 0.9796882271766663,grad_norm: 0.7502929926922943, iteration: 434258
loss: 1.0075116157531738,grad_norm: 0.743135901922322, iteration: 434259
loss: 1.0471668243408203,grad_norm: 0.9999991125883915, iteration: 434260
loss: 1.023451805114746,grad_norm: 0.7904070910233757, iteration: 434261
loss: 1.0312021970748901,grad_norm: 0.9999996165806749, iteration: 434262
loss: 1.002332091331482,grad_norm: 0.8404851866842484, iteration: 434263
loss: 0.9663057327270508,grad_norm: 0.7945965603121369, iteration: 434264
loss: 1.0208594799041748,grad_norm: 0.8408522305498514, iteration: 434265
loss: 1.0137290954589844,grad_norm: 0.873516620699401, iteration: 434266
loss: 1.0392881631851196,grad_norm: 0.9999999232434205, iteration: 434267
loss: 0.9815260767936707,grad_norm: 0.6754730616849176, iteration: 434268
loss: 1.001380443572998,grad_norm: 0.6072967075089837, iteration: 434269
loss: 1.015038251876831,grad_norm: 0.8469709751874023, iteration: 434270
loss: 0.9796707034111023,grad_norm: 0.6699544435608069, iteration: 434271
loss: 1.0201680660247803,grad_norm: 0.8282071152328344, iteration: 434272
loss: 1.0250263214111328,grad_norm: 0.9999998950287003, iteration: 434273
loss: 0.9869616031646729,grad_norm: 0.6872387420535182, iteration: 434274
loss: 1.035520076751709,grad_norm: 0.7823159812263875, iteration: 434275
loss: 0.9751556515693665,grad_norm: 0.7373981422981573, iteration: 434276
loss: 0.9845842123031616,grad_norm: 0.8823934055993935, iteration: 434277
loss: 0.9536102414131165,grad_norm: 0.9580184989993866, iteration: 434278
loss: 1.02778160572052,grad_norm: 0.7417765928821511, iteration: 434279
loss: 1.007391095161438,grad_norm: 0.7028117184804527, iteration: 434280
loss: 1.0304239988327026,grad_norm: 0.8112761432219692, iteration: 434281
loss: 1.003960371017456,grad_norm: 0.7571734433309305, iteration: 434282
loss: 0.9870039224624634,grad_norm: 0.7168683242755944, iteration: 434283
loss: 1.011759638786316,grad_norm: 0.7320201759095893, iteration: 434284
loss: 1.0756779909133911,grad_norm: 0.9722151826026016, iteration: 434285
loss: 1.0257071256637573,grad_norm: 0.773165476081793, iteration: 434286
loss: 0.999584972858429,grad_norm: 0.7504485857530921, iteration: 434287
loss: 0.9802875518798828,grad_norm: 0.6903826082519702, iteration: 434288
loss: 0.9888221621513367,grad_norm: 0.9999992588375434, iteration: 434289
loss: 1.0199599266052246,grad_norm: 0.9999992451494552, iteration: 434290
loss: 1.0095282793045044,grad_norm: 0.8031085899720758, iteration: 434291
loss: 1.0276545286178589,grad_norm: 0.7406237586594688, iteration: 434292
loss: 0.9921745657920837,grad_norm: 0.8325694171452886, iteration: 434293
loss: 0.977576732635498,grad_norm: 0.682695534423308, iteration: 434294
loss: 0.9818843007087708,grad_norm: 0.7384330777676236, iteration: 434295
loss: 0.9783465266227722,grad_norm: 0.8088547657536791, iteration: 434296
loss: 1.0199626684188843,grad_norm: 0.8345416401863163, iteration: 434297
loss: 0.9904443025588989,grad_norm: 0.7998297369732077, iteration: 434298
loss: 1.0013535022735596,grad_norm: 0.787245204872971, iteration: 434299
loss: 0.972413957118988,grad_norm: 0.999999089373241, iteration: 434300
loss: 1.0262560844421387,grad_norm: 0.7365729780674083, iteration: 434301
loss: 1.0120370388031006,grad_norm: 0.7666239037516209, iteration: 434302
loss: 1.0044567584991455,grad_norm: 0.6885526921955666, iteration: 434303
loss: 0.9873082041740417,grad_norm: 0.8701801727528647, iteration: 434304
loss: 0.9975973963737488,grad_norm: 0.9172598001890996, iteration: 434305
loss: 1.0175729990005493,grad_norm: 0.9459912764995356, iteration: 434306
loss: 0.9648544192314148,grad_norm: 0.6547107752587369, iteration: 434307
loss: 1.0127116441726685,grad_norm: 0.7717155896167481, iteration: 434308
loss: 1.0455530881881714,grad_norm: 0.8022518242457733, iteration: 434309
loss: 0.971882700920105,grad_norm: 0.8052967091102721, iteration: 434310
loss: 0.9589410424232483,grad_norm: 0.9238035360243159, iteration: 434311
loss: 0.9970073699951172,grad_norm: 0.6947964452344213, iteration: 434312
loss: 1.0008498430252075,grad_norm: 0.7625211371955858, iteration: 434313
loss: 1.0028729438781738,grad_norm: 0.7982259151777291, iteration: 434314
loss: 0.9956651926040649,grad_norm: 0.8712842205179806, iteration: 434315
loss: 1.0330350399017334,grad_norm: 0.9999994194406344, iteration: 434316
loss: 0.9948675036430359,grad_norm: 0.9153997598762654, iteration: 434317
loss: 1.0045843124389648,grad_norm: 0.7525856406410809, iteration: 434318
loss: 0.9953617453575134,grad_norm: 0.7420928788898634, iteration: 434319
loss: 0.9626542925834656,grad_norm: 0.9616110025687505, iteration: 434320
loss: 1.016715168952942,grad_norm: 0.7406311930529975, iteration: 434321
loss: 1.0110924243927002,grad_norm: 0.6712393313719554, iteration: 434322
loss: 0.9772332310676575,grad_norm: 0.7271543460781461, iteration: 434323
loss: 0.9978852272033691,grad_norm: 0.6818731765835186, iteration: 434324
loss: 0.9886302351951599,grad_norm: 0.7938427984249706, iteration: 434325
loss: 0.9825291037559509,grad_norm: 0.8026559002444338, iteration: 434326
loss: 1.0016571283340454,grad_norm: 0.7997342083650005, iteration: 434327
loss: 1.0026201009750366,grad_norm: 0.7398668684873948, iteration: 434328
loss: 1.0238066911697388,grad_norm: 0.7742027767507719, iteration: 434329
loss: 0.9644724130630493,grad_norm: 0.8353740167285786, iteration: 434330
loss: 1.00325345993042,grad_norm: 0.800858540184201, iteration: 434331
loss: 1.0116363763809204,grad_norm: 0.7968722193804603, iteration: 434332
loss: 1.003900408744812,grad_norm: 0.8005906296230312, iteration: 434333
loss: 0.959606945514679,grad_norm: 0.961243793549846, iteration: 434334
loss: 1.0156642198562622,grad_norm: 0.6438784133624857, iteration: 434335
loss: 0.981959879398346,grad_norm: 0.6920899818671522, iteration: 434336
loss: 1.0268326997756958,grad_norm: 0.892957826488876, iteration: 434337
loss: 0.9743838310241699,grad_norm: 0.879558684072428, iteration: 434338
loss: 1.0519578456878662,grad_norm: 0.9608209348088338, iteration: 434339
loss: 1.0059927701950073,grad_norm: 0.7493564758762545, iteration: 434340
loss: 1.0762836933135986,grad_norm: 0.9999993293019183, iteration: 434341
loss: 1.032610297203064,grad_norm: 0.7518048817469055, iteration: 434342
loss: 1.020980715751648,grad_norm: 0.6596623125460988, iteration: 434343
loss: 0.9890305399894714,grad_norm: 0.8258555432436364, iteration: 434344
loss: 0.9891195893287659,grad_norm: 0.8268743127632016, iteration: 434345
loss: 0.9803272485733032,grad_norm: 0.9013516595685144, iteration: 434346
loss: 1.0055911540985107,grad_norm: 0.6671915406085771, iteration: 434347
loss: 0.9864373207092285,grad_norm: 0.9275969117951217, iteration: 434348
loss: 0.9756366610527039,grad_norm: 0.7769221848403264, iteration: 434349
loss: 0.9509214162826538,grad_norm: 0.7990384515366729, iteration: 434350
loss: 1.0219142436981201,grad_norm: 0.6874743355332391, iteration: 434351
loss: 1.0026960372924805,grad_norm: 0.6598908248817719, iteration: 434352
loss: 0.9853124022483826,grad_norm: 0.7876946231072379, iteration: 434353
loss: 0.9750698804855347,grad_norm: 0.7372678146489108, iteration: 434354
loss: 1.0014914274215698,grad_norm: 0.7887266735467264, iteration: 434355
loss: 1.0362945795059204,grad_norm: 0.8454013182105943, iteration: 434356
loss: 0.9940810203552246,grad_norm: 0.8772750846229511, iteration: 434357
loss: 0.9678779244422913,grad_norm: 0.7851969181920705, iteration: 434358
loss: 1.008515477180481,grad_norm: 0.7965543940179567, iteration: 434359
loss: 1.019112229347229,grad_norm: 0.7529555476915466, iteration: 434360
loss: 1.0158360004425049,grad_norm: 0.8924622020744901, iteration: 434361
loss: 1.0208760499954224,grad_norm: 0.8805704839680445, iteration: 434362
loss: 1.0210422277450562,grad_norm: 0.8643937071301526, iteration: 434363
loss: 1.0159133672714233,grad_norm: 0.999999902352679, iteration: 434364
loss: 1.028906226158142,grad_norm: 0.6797794477263696, iteration: 434365
loss: 1.008097529411316,grad_norm: 0.7781466721765197, iteration: 434366
loss: 1.0016136169433594,grad_norm: 0.7273419617879227, iteration: 434367
loss: 0.9787804484367371,grad_norm: 0.793815806363312, iteration: 434368
loss: 1.047270655632019,grad_norm: 0.9999999493267085, iteration: 434369
loss: 0.9987529516220093,grad_norm: 0.7123881322871364, iteration: 434370
loss: 1.0173745155334473,grad_norm: 0.746956809465819, iteration: 434371
loss: 0.9874088764190674,grad_norm: 0.7522212750364815, iteration: 434372
loss: 0.9849994778633118,grad_norm: 0.7115624200225374, iteration: 434373
loss: 1.0535831451416016,grad_norm: 0.8567089771921627, iteration: 434374
loss: 1.0182042121887207,grad_norm: 0.8625994049113856, iteration: 434375
loss: 1.0092973709106445,grad_norm: 0.7681150534684328, iteration: 434376
loss: 1.0147291421890259,grad_norm: 0.7312291194746792, iteration: 434377
loss: 0.96791672706604,grad_norm: 0.9288111642577301, iteration: 434378
loss: 1.0007109642028809,grad_norm: 0.7441809615978638, iteration: 434379
loss: 1.045786738395691,grad_norm: 0.9999993574270633, iteration: 434380
loss: 1.0365537405014038,grad_norm: 0.8913422703452961, iteration: 434381
loss: 1.0129177570343018,grad_norm: 0.6851057558120952, iteration: 434382
loss: 0.9887439608573914,grad_norm: 0.7329828224698572, iteration: 434383
loss: 0.9920943379402161,grad_norm: 0.7376426556362129, iteration: 434384
loss: 0.9641044735908508,grad_norm: 0.9999992535439227, iteration: 434385
loss: 1.0177397727966309,grad_norm: 0.8458445308316814, iteration: 434386
loss: 1.0164240598678589,grad_norm: 0.6934887777652722, iteration: 434387
loss: 0.9872362017631531,grad_norm: 0.7182747998476434, iteration: 434388
loss: 1.0091960430145264,grad_norm: 0.6471130178222878, iteration: 434389
loss: 1.0765044689178467,grad_norm: 0.9999994214313658, iteration: 434390
loss: 1.049466609954834,grad_norm: 0.9999999788726098, iteration: 434391
loss: 0.9880393147468567,grad_norm: 0.7541845967418546, iteration: 434392
loss: 0.9618042707443237,grad_norm: 0.691486390914263, iteration: 434393
loss: 1.0002610683441162,grad_norm: 0.8164753771217373, iteration: 434394
loss: 0.9842380881309509,grad_norm: 0.9999990997739704, iteration: 434395
loss: 1.015016794204712,grad_norm: 0.7000362926041834, iteration: 434396
loss: 1.030160903930664,grad_norm: 0.9999991807115489, iteration: 434397
loss: 0.9982806444168091,grad_norm: 0.776708600066035, iteration: 434398
loss: 0.9813700914382935,grad_norm: 0.725893114176695, iteration: 434399
loss: 0.9706882238388062,grad_norm: 0.7722758208554026, iteration: 434400
loss: 1.0140318870544434,grad_norm: 0.9113856567777173, iteration: 434401
loss: 1.0327110290527344,grad_norm: 0.7977281539665563, iteration: 434402
loss: 1.0280463695526123,grad_norm: 0.8271502598023801, iteration: 434403
loss: 0.9595615267753601,grad_norm: 0.7255373493267885, iteration: 434404
loss: 1.00701904296875,grad_norm: 0.767717487118024, iteration: 434405
loss: 0.9861761331558228,grad_norm: 0.9999995018648279, iteration: 434406
loss: 1.009433627128601,grad_norm: 0.9214913333387259, iteration: 434407
loss: 1.0124865770339966,grad_norm: 0.9999991112962826, iteration: 434408
loss: 1.1843750476837158,grad_norm: 0.9999998262974615, iteration: 434409
loss: 1.029201626777649,grad_norm: 0.8833549520843486, iteration: 434410
loss: 1.0013344287872314,grad_norm: 0.8223996869697969, iteration: 434411
loss: 0.9729731678962708,grad_norm: 0.7779125980772703, iteration: 434412
loss: 0.9959588646888733,grad_norm: 0.884847475532801, iteration: 434413
loss: 1.0267751216888428,grad_norm: 0.8280987761443395, iteration: 434414
loss: 1.0144600868225098,grad_norm: 0.6594059132503143, iteration: 434415
loss: 1.0765353441238403,grad_norm: 0.9059120346101507, iteration: 434416
loss: 1.0332262516021729,grad_norm: 0.8505662429599964, iteration: 434417
loss: 1.030965805053711,grad_norm: 0.6703417392590146, iteration: 434418
loss: 0.9722987413406372,grad_norm: 0.747249484422119, iteration: 434419
loss: 1.054875135421753,grad_norm: 0.9999997613917211, iteration: 434420
loss: 0.9861297607421875,grad_norm: 0.8115071770692337, iteration: 434421
loss: 1.0304605960845947,grad_norm: 0.8436699917311028, iteration: 434422
loss: 1.0514460802078247,grad_norm: 0.9999998853010548, iteration: 434423
loss: 1.0063908100128174,grad_norm: 0.8140987373619003, iteration: 434424
loss: 0.9818951487541199,grad_norm: 0.7287542266266367, iteration: 434425
loss: 0.9809564352035522,grad_norm: 0.7658228920088622, iteration: 434426
loss: 0.9764391779899597,grad_norm: 0.7877063459920131, iteration: 434427
loss: 1.006619930267334,grad_norm: 0.8495294221208939, iteration: 434428
loss: 0.9973554611206055,grad_norm: 0.8626225374366447, iteration: 434429
loss: 1.0143942832946777,grad_norm: 0.6726936189966569, iteration: 434430
loss: 0.9875406622886658,grad_norm: 0.8491652984476429, iteration: 434431
loss: 1.0125041007995605,grad_norm: 0.7985757404527634, iteration: 434432
loss: 0.9817415475845337,grad_norm: 0.8304712584806295, iteration: 434433
loss: 0.9720813632011414,grad_norm: 0.7087871793597247, iteration: 434434
loss: 0.9926819801330566,grad_norm: 0.8321753704509541, iteration: 434435
loss: 1.0160436630249023,grad_norm: 0.7283297978151946, iteration: 434436
loss: 1.0429290533065796,grad_norm: 0.8984486303742458, iteration: 434437
loss: 1.01581609249115,grad_norm: 0.7193372849351952, iteration: 434438
loss: 1.041253924369812,grad_norm: 0.999999265168862, iteration: 434439
loss: 1.0121828317642212,grad_norm: 0.8186062928397968, iteration: 434440
loss: 0.9817827343940735,grad_norm: 0.8420056009040526, iteration: 434441
loss: 0.9711143970489502,grad_norm: 0.8443928519910752, iteration: 434442
loss: 1.0132248401641846,grad_norm: 0.8130677311418868, iteration: 434443
loss: 0.9975318312644958,grad_norm: 0.8940163410589319, iteration: 434444
loss: 1.0133095979690552,grad_norm: 0.6433434548845332, iteration: 434445
loss: 0.9827116131782532,grad_norm: 0.80111509959103, iteration: 434446
loss: 0.961335301399231,grad_norm: 0.7904439826084851, iteration: 434447
loss: 0.973748505115509,grad_norm: 0.844945747617968, iteration: 434448
loss: 1.0037416219711304,grad_norm: 0.7131024447231734, iteration: 434449
loss: 1.0203750133514404,grad_norm: 0.6936616564747973, iteration: 434450
loss: 0.9744747877120972,grad_norm: 0.7051135841887496, iteration: 434451
loss: 1.0005759000778198,grad_norm: 0.8290013824498853, iteration: 434452
loss: 1.0221951007843018,grad_norm: 0.9999994566672247, iteration: 434453
loss: 0.9724060297012329,grad_norm: 0.977665462875107, iteration: 434454
loss: 1.0516042709350586,grad_norm: 0.8340486080345146, iteration: 434455
loss: 1.0159716606140137,grad_norm: 0.6730103131087519, iteration: 434456
loss: 1.0034112930297852,grad_norm: 0.7376704815565556, iteration: 434457
loss: 0.990874707698822,grad_norm: 0.731402060990638, iteration: 434458
loss: 1.02293062210083,grad_norm: 0.6963508973474871, iteration: 434459
loss: 0.9702281355857849,grad_norm: 0.7124609445321586, iteration: 434460
loss: 1.0080219507217407,grad_norm: 0.683339990873464, iteration: 434461
loss: 0.9977595210075378,grad_norm: 0.7726388541890201, iteration: 434462
loss: 0.997619092464447,grad_norm: 0.7249961061074864, iteration: 434463
loss: 1.0261542797088623,grad_norm: 0.8007580532065012, iteration: 434464
loss: 1.007411003112793,grad_norm: 0.8011266753983567, iteration: 434465
loss: 0.9832793474197388,grad_norm: 0.6650098380508351, iteration: 434466
loss: 0.9801613092422485,grad_norm: 0.8531638278779591, iteration: 434467
loss: 1.0127333402633667,grad_norm: 0.7233588562137175, iteration: 434468
loss: 1.0415148735046387,grad_norm: 0.9999992561337187, iteration: 434469
loss: 1.0760575532913208,grad_norm: 0.9999996116316007, iteration: 434470
loss: 0.9493184089660645,grad_norm: 0.8498966486540492, iteration: 434471
loss: 1.0185264348983765,grad_norm: 0.7989207904430765, iteration: 434472
loss: 1.0011978149414062,grad_norm: 0.6362828253252674, iteration: 434473
loss: 0.9955388307571411,grad_norm: 0.7214058526964392, iteration: 434474
loss: 0.9683873653411865,grad_norm: 0.927278112484227, iteration: 434475
loss: 0.9966042041778564,grad_norm: 0.7522748595438724, iteration: 434476
loss: 0.9830242395401001,grad_norm: 0.7060687071798327, iteration: 434477
loss: 1.0372042655944824,grad_norm: 0.9048193313504773, iteration: 434478
loss: 1.0189467668533325,grad_norm: 0.8468253136629408, iteration: 434479
loss: 0.9973740577697754,grad_norm: 0.8366929095550225, iteration: 434480
loss: 0.9999032616615295,grad_norm: 0.7109120506406232, iteration: 434481
loss: 1.0015277862548828,grad_norm: 0.9847771950293873, iteration: 434482
loss: 1.0282001495361328,grad_norm: 0.9999999256581623, iteration: 434483
loss: 1.1187520027160645,grad_norm: 0.99999914165391, iteration: 434484
loss: 0.9756360650062561,grad_norm: 0.7104798023563289, iteration: 434485
loss: 0.9787260890007019,grad_norm: 0.6202819746099074, iteration: 434486
loss: 1.0167343616485596,grad_norm: 0.7836795670747511, iteration: 434487
loss: 1.0404837131500244,grad_norm: 0.8697858574123952, iteration: 434488
loss: 0.983678936958313,grad_norm: 0.7819731180136877, iteration: 434489
loss: 0.9694632291793823,grad_norm: 0.8963706240071613, iteration: 434490
loss: 0.9833750128746033,grad_norm: 0.691522592081432, iteration: 434491
loss: 0.9777225852012634,grad_norm: 0.7903142023838389, iteration: 434492
loss: 0.9867821335792542,grad_norm: 0.8070482006755337, iteration: 434493
loss: 0.9628951549530029,grad_norm: 0.8373202612741891, iteration: 434494
loss: 1.0133845806121826,grad_norm: 0.8085153761588034, iteration: 434495
loss: 0.971480667591095,grad_norm: 0.8303116558258913, iteration: 434496
loss: 1.0084402561187744,grad_norm: 0.7314433884085324, iteration: 434497
loss: 1.020758032798767,grad_norm: 0.8877437035420098, iteration: 434498
loss: 1.0371863842010498,grad_norm: 0.6751636094413561, iteration: 434499
loss: 1.0326178073883057,grad_norm: 0.8278137505135017, iteration: 434500
loss: 0.9971873164176941,grad_norm: 0.7284485060795144, iteration: 434501
loss: 1.0194264650344849,grad_norm: 0.8221563513668416, iteration: 434502
loss: 0.984826922416687,grad_norm: 0.7995277457344573, iteration: 434503
loss: 0.9919024705886841,grad_norm: 0.7396340163145367, iteration: 434504
loss: 1.0022854804992676,grad_norm: 0.7076178715157107, iteration: 434505
loss: 1.0040837526321411,grad_norm: 0.826353266025518, iteration: 434506
loss: 0.9808578491210938,grad_norm: 0.7636104664733669, iteration: 434507
loss: 0.9913718700408936,grad_norm: 0.7988303805434664, iteration: 434508
loss: 1.012109398841858,grad_norm: 0.9997554814239111, iteration: 434509
loss: 0.9711561799049377,grad_norm: 0.8254213821307016, iteration: 434510
loss: 0.988688051700592,grad_norm: 0.8969224201172687, iteration: 434511
loss: 0.999648928642273,grad_norm: 0.8270691468278685, iteration: 434512
loss: 1.0054982900619507,grad_norm: 0.8110917545417088, iteration: 434513
loss: 1.0061832666397095,grad_norm: 0.6945413862032702, iteration: 434514
loss: 1.078696846961975,grad_norm: 0.828959141216748, iteration: 434515
loss: 0.9883010387420654,grad_norm: 0.9999994933729184, iteration: 434516
loss: 1.0545532703399658,grad_norm: 0.954552949363446, iteration: 434517
loss: 1.0643023252487183,grad_norm: 0.7379281640892801, iteration: 434518
loss: 1.0425471067428589,grad_norm: 0.698268368738846, iteration: 434519
loss: 1.0072717666625977,grad_norm: 0.8519806941023427, iteration: 434520
loss: 1.0129661560058594,grad_norm: 0.7357567421745603, iteration: 434521
loss: 1.0027498006820679,grad_norm: 0.8376502250491018, iteration: 434522
loss: 0.9731562733650208,grad_norm: 0.7540758015936434, iteration: 434523
loss: 0.9523743987083435,grad_norm: 0.8246383973834691, iteration: 434524
loss: 0.9808465242385864,grad_norm: 0.8571669110246888, iteration: 434525
loss: 0.9618026614189148,grad_norm: 0.764413051480185, iteration: 434526
loss: 0.9976361989974976,grad_norm: 0.8244349665813989, iteration: 434527
loss: 0.9853410720825195,grad_norm: 0.627788363354965, iteration: 434528
loss: 0.97260981798172,grad_norm: 0.7463294052851844, iteration: 434529
loss: 1.0188790559768677,grad_norm: 0.8137777166779548, iteration: 434530
loss: 1.0027779340744019,grad_norm: 0.8709764826305387, iteration: 434531
loss: 1.0306262969970703,grad_norm: 0.7822602882134347, iteration: 434532
loss: 1.0117801427841187,grad_norm: 0.7107148140629673, iteration: 434533
loss: 0.9805319905281067,grad_norm: 0.8913723322024433, iteration: 434534
loss: 1.035369873046875,grad_norm: 0.8435752323380482, iteration: 434535
loss: 1.0123368501663208,grad_norm: 0.7704675823935968, iteration: 434536
loss: 1.0217863321304321,grad_norm: 0.8127262932216622, iteration: 434537
loss: 0.9674025774002075,grad_norm: 0.999999142985179, iteration: 434538
loss: 0.9916555285453796,grad_norm: 0.6683819970458199, iteration: 434539
loss: 1.0169258117675781,grad_norm: 0.6401974378143398, iteration: 434540
loss: 1.084432601928711,grad_norm: 0.8953214679777105, iteration: 434541
loss: 1.0162477493286133,grad_norm: 0.7422298541849996, iteration: 434542
loss: 1.004502534866333,grad_norm: 0.9999993733067952, iteration: 434543
loss: 1.0147836208343506,grad_norm: 0.8446788432712413, iteration: 434544
loss: 1.0023614168167114,grad_norm: 0.7139025152015138, iteration: 434545
loss: 0.9926846623420715,grad_norm: 0.7236504980671006, iteration: 434546
loss: 0.998120903968811,grad_norm: 0.6228027827597058, iteration: 434547
loss: 0.9994021654129028,grad_norm: 0.658020820612285, iteration: 434548
loss: 0.9915565848350525,grad_norm: 0.8570755049462121, iteration: 434549
loss: 1.0061908960342407,grad_norm: 0.7225170710824913, iteration: 434550
loss: 0.9962356090545654,grad_norm: 0.7970335233946647, iteration: 434551
loss: 0.988029420375824,grad_norm: 0.802480267160823, iteration: 434552
loss: 0.9895045757293701,grad_norm: 0.8084155806806331, iteration: 434553
loss: 0.9838461875915527,grad_norm: 0.8089724058126939, iteration: 434554
loss: 0.9761044383049011,grad_norm: 0.7526514601441742, iteration: 434555
loss: 1.0049768686294556,grad_norm: 0.84065063904915, iteration: 434556
loss: 1.0033714771270752,grad_norm: 0.9999990391382698, iteration: 434557
loss: 1.0181602239608765,grad_norm: 0.7358453426090519, iteration: 434558
loss: 0.9858893752098083,grad_norm: 0.629537071137308, iteration: 434559
loss: 1.010146141052246,grad_norm: 0.7968256918296636, iteration: 434560
loss: 0.9851658940315247,grad_norm: 0.6798674919434988, iteration: 434561
loss: 0.9783872365951538,grad_norm: 0.6852845220463696, iteration: 434562
loss: 1.0203487873077393,grad_norm: 0.7936928391123098, iteration: 434563
loss: 0.9901723861694336,grad_norm: 0.9988665991155736, iteration: 434564
loss: 1.0066637992858887,grad_norm: 0.9392895586117149, iteration: 434565
loss: 1.005484700202942,grad_norm: 0.9014209386227559, iteration: 434566
loss: 1.0603190660476685,grad_norm: 0.869125630728493, iteration: 434567
loss: 1.0079200267791748,grad_norm: 0.9999990321752025, iteration: 434568
loss: 1.017906665802002,grad_norm: 0.7597568436913339, iteration: 434569
loss: 1.0269283056259155,grad_norm: 0.6887778470180136, iteration: 434570
loss: 1.0110684633255005,grad_norm: 0.8348362371863226, iteration: 434571
loss: 0.993240475654602,grad_norm: 0.73234509386548, iteration: 434572
loss: 1.00603449344635,grad_norm: 0.7707243999666699, iteration: 434573
loss: 0.9854118227958679,grad_norm: 0.6335494709780134, iteration: 434574
loss: 1.0275932550430298,grad_norm: 0.8439465872453645, iteration: 434575
loss: 0.9800066947937012,grad_norm: 0.8288575484873204, iteration: 434576
loss: 1.1177654266357422,grad_norm: 0.9999992768547641, iteration: 434577
loss: 0.99604731798172,grad_norm: 0.7708557701671821, iteration: 434578
loss: 0.9883858561515808,grad_norm: 0.8924136354506633, iteration: 434579
loss: 0.9984742403030396,grad_norm: 0.7107939942620175, iteration: 434580
loss: 0.9725968241691589,grad_norm: 0.7009902071141859, iteration: 434581
loss: 0.99237459897995,grad_norm: 0.8077771432981111, iteration: 434582
loss: 0.9970099329948425,grad_norm: 0.6850818304591341, iteration: 434583
loss: 1.034668207168579,grad_norm: 0.734529863931656, iteration: 434584
loss: 1.0141814947128296,grad_norm: 0.7832403616229153, iteration: 434585
loss: 0.9981444478034973,grad_norm: 0.7059649749665957, iteration: 434586
loss: 0.9797883033752441,grad_norm: 0.7045495776221176, iteration: 434587
loss: 1.0020595788955688,grad_norm: 0.7134897480021276, iteration: 434588
loss: 1.0009384155273438,grad_norm: 0.7920908357665486, iteration: 434589
loss: 0.9939879775047302,grad_norm: 0.8257959124146952, iteration: 434590
loss: 1.0233063697814941,grad_norm: 0.770662957165993, iteration: 434591
loss: 0.9843006730079651,grad_norm: 0.7083291788834952, iteration: 434592
loss: 1.0126962661743164,grad_norm: 0.6433863085107173, iteration: 434593
loss: 1.0076733827590942,grad_norm: 0.717790175927108, iteration: 434594
loss: 0.9909714460372925,grad_norm: 0.8188444514027793, iteration: 434595
loss: 0.9962554574012756,grad_norm: 0.8262386769851788, iteration: 434596
loss: 0.980671226978302,grad_norm: 0.818492420018446, iteration: 434597
loss: 0.9763838648796082,grad_norm: 0.9999995907104405, iteration: 434598
loss: 1.011292815208435,grad_norm: 0.9085797169667184, iteration: 434599
loss: 1.0008600950241089,grad_norm: 0.7787001997274995, iteration: 434600
loss: 1.0446828603744507,grad_norm: 0.9999993169172572, iteration: 434601
loss: 0.9892646670341492,grad_norm: 0.8336688557788232, iteration: 434602
loss: 0.9971045255661011,grad_norm: 0.8352964426593584, iteration: 434603
loss: 1.0001119375228882,grad_norm: 0.8506546077767804, iteration: 434604
loss: 0.9886372089385986,grad_norm: 0.9304181550060365, iteration: 434605
loss: 1.0118485689163208,grad_norm: 0.7387087497431329, iteration: 434606
loss: 1.0115488767623901,grad_norm: 0.7062144307994438, iteration: 434607
loss: 1.0044457912445068,grad_norm: 0.6695939668995379, iteration: 434608
loss: 0.9832627177238464,grad_norm: 0.9189541300672419, iteration: 434609
loss: 0.9622901082038879,grad_norm: 0.6871920003482166, iteration: 434610
loss: 1.0602540969848633,grad_norm: 0.9719239787936738, iteration: 434611
loss: 0.9923803210258484,grad_norm: 0.6945999024134704, iteration: 434612
loss: 1.0061153173446655,grad_norm: 0.7531589255210445, iteration: 434613
loss: 0.9643520712852478,grad_norm: 0.7388335436344066, iteration: 434614
loss: 1.0141242742538452,grad_norm: 0.9817444399613522, iteration: 434615
loss: 0.9984610080718994,grad_norm: 0.8861747492126155, iteration: 434616
loss: 0.9876556396484375,grad_norm: 0.9999993106613986, iteration: 434617
loss: 0.9751230478286743,grad_norm: 0.7436093935436917, iteration: 434618
loss: 0.9900779128074646,grad_norm: 0.7833475113092627, iteration: 434619
loss: 1.0068321228027344,grad_norm: 0.9999990620587794, iteration: 434620
loss: 1.024635672569275,grad_norm: 0.681388206315793, iteration: 434621
loss: 1.0026204586029053,grad_norm: 0.7602761949401815, iteration: 434622
loss: 0.9707476496696472,grad_norm: 0.8112295454659941, iteration: 434623
loss: 1.017625093460083,grad_norm: 0.6975070351757969, iteration: 434624
loss: 1.0023279190063477,grad_norm: 0.7685890180773316, iteration: 434625
loss: 1.0066081285476685,grad_norm: 0.8541415770155292, iteration: 434626
loss: 0.975928544998169,grad_norm: 0.7514806665816844, iteration: 434627
loss: 1.0063811540603638,grad_norm: 0.7463848511177139, iteration: 434628
loss: 1.0173449516296387,grad_norm: 0.8486392752881039, iteration: 434629
loss: 0.9482141733169556,grad_norm: 0.79145005430843, iteration: 434630
loss: 0.9785870909690857,grad_norm: 0.7557514217827189, iteration: 434631
loss: 0.9902423024177551,grad_norm: 0.7270187321131547, iteration: 434632
loss: 0.9960488080978394,grad_norm: 0.9464546543057841, iteration: 434633
loss: 1.025115728378296,grad_norm: 0.9999992794948613, iteration: 434634
loss: 1.0091850757598877,grad_norm: 0.9999993231869435, iteration: 434635
loss: 0.9834484457969666,grad_norm: 0.7412195763993181, iteration: 434636
loss: 1.0202200412750244,grad_norm: 0.7316256711585256, iteration: 434637
loss: 0.9887897968292236,grad_norm: 0.6993454028700659, iteration: 434638
loss: 1.0055261850357056,grad_norm: 0.6666782417452376, iteration: 434639
loss: 1.029556155204773,grad_norm: 0.7311671097723785, iteration: 434640
loss: 0.9987805485725403,grad_norm: 0.7543231833766091, iteration: 434641
loss: 1.0010837316513062,grad_norm: 0.706162332912155, iteration: 434642
loss: 0.9682502746582031,grad_norm: 0.8054365926701275, iteration: 434643
loss: 0.9953413605690002,grad_norm: 0.7372449763370951, iteration: 434644
loss: 0.9842172861099243,grad_norm: 0.8785020406282869, iteration: 434645
loss: 1.0006152391433716,grad_norm: 0.8442459160996746, iteration: 434646
loss: 1.0079538822174072,grad_norm: 0.7005252742009301, iteration: 434647
loss: 0.9968658089637756,grad_norm: 0.8883864451008953, iteration: 434648
loss: 1.031388282775879,grad_norm: 0.8176951525502641, iteration: 434649
loss: 0.9462286829948425,grad_norm: 0.829476291060726, iteration: 434650
loss: 1.0158473253250122,grad_norm: 0.6785473814373709, iteration: 434651
loss: 0.973423182964325,grad_norm: 0.7389717763411556, iteration: 434652
loss: 0.9985531568527222,grad_norm: 0.6753332966409257, iteration: 434653
loss: 1.0253043174743652,grad_norm: 0.8227982522329247, iteration: 434654
loss: 1.0228960514068604,grad_norm: 0.8411551401456167, iteration: 434655
loss: 0.9787333607673645,grad_norm: 0.8914023564415223, iteration: 434656
loss: 1.0101208686828613,grad_norm: 0.8128272505871862, iteration: 434657
loss: 0.9996669292449951,grad_norm: 0.8760183714717996, iteration: 434658
loss: 1.0315691232681274,grad_norm: 0.7934617335502523, iteration: 434659
loss: 0.9831346869468689,grad_norm: 0.7040753738255173, iteration: 434660
loss: 1.0332233905792236,grad_norm: 0.7564932996661146, iteration: 434661
loss: 1.0147318840026855,grad_norm: 0.8373885057265199, iteration: 434662
loss: 0.9771731495857239,grad_norm: 0.7501054380201709, iteration: 434663
loss: 0.9982666373252869,grad_norm: 0.8873822258327844, iteration: 434664
loss: 0.9920796155929565,grad_norm: 0.7107463815499969, iteration: 434665
loss: 1.0057481527328491,grad_norm: 0.7276458459676851, iteration: 434666
loss: 0.9947670102119446,grad_norm: 0.9999994591848349, iteration: 434667
loss: 1.009886622428894,grad_norm: 0.787698610113353, iteration: 434668
loss: 0.993488073348999,grad_norm: 0.7415771998530962, iteration: 434669
loss: 1.0200881958007812,grad_norm: 0.7738926070062428, iteration: 434670
loss: 0.9842478036880493,grad_norm: 0.7865418573571678, iteration: 434671
loss: 1.0097068548202515,grad_norm: 0.8101387731871106, iteration: 434672
loss: 0.9830751419067383,grad_norm: 0.753790773091776, iteration: 434673
loss: 0.9997244477272034,grad_norm: 0.7423163891702359, iteration: 434674
loss: 1.0132005214691162,grad_norm: 0.6535278948951506, iteration: 434675
loss: 1.020029067993164,grad_norm: 0.7601562404311407, iteration: 434676
loss: 1.0068973302841187,grad_norm: 0.8248425812575323, iteration: 434677
loss: 0.98137366771698,grad_norm: 0.8053044996895814, iteration: 434678
loss: 1.0106076002120972,grad_norm: 0.6887797582148112, iteration: 434679
loss: 0.9698579907417297,grad_norm: 0.8910262757129047, iteration: 434680
loss: 0.9903444051742554,grad_norm: 0.7307083051402493, iteration: 434681
loss: 0.9817761778831482,grad_norm: 0.7791008611295884, iteration: 434682
loss: 1.0058878660202026,grad_norm: 0.7092044095572856, iteration: 434683
loss: 1.0211893320083618,grad_norm: 0.7753548000740985, iteration: 434684
loss: 0.9880074262619019,grad_norm: 0.5964471390999105, iteration: 434685
loss: 1.0163161754608154,grad_norm: 0.7508750945372741, iteration: 434686
loss: 1.0025197267532349,grad_norm: 0.9999994708922831, iteration: 434687
loss: 1.022377371788025,grad_norm: 0.6550491036480605, iteration: 434688
loss: 0.9577336311340332,grad_norm: 0.7614108677244608, iteration: 434689
loss: 0.9780645370483398,grad_norm: 0.9997840617378427, iteration: 434690
loss: 0.9726057052612305,grad_norm: 0.9999989133793501, iteration: 434691
loss: 1.031489610671997,grad_norm: 0.8597589924159977, iteration: 434692
loss: 1.0504226684570312,grad_norm: 0.9999990989287336, iteration: 434693
loss: 0.9707891345024109,grad_norm: 0.9150551992701927, iteration: 434694
loss: 1.0154099464416504,grad_norm: 0.7418509103312712, iteration: 434695
loss: 1.003095269203186,grad_norm: 0.7047668756980139, iteration: 434696
loss: 0.9876521825790405,grad_norm: 0.9999998340706433, iteration: 434697
loss: 1.0480083227157593,grad_norm: 0.7709400517529217, iteration: 434698
loss: 0.9958540201187134,grad_norm: 0.8096249882129046, iteration: 434699
loss: 1.0187746286392212,grad_norm: 0.7156636075090385, iteration: 434700
loss: 0.9753578901290894,grad_norm: 0.8025849946125316, iteration: 434701
loss: 1.0086259841918945,grad_norm: 0.9072555160655857, iteration: 434702
loss: 0.9776180982589722,grad_norm: 0.714137113234469, iteration: 434703
loss: 0.9625021815299988,grad_norm: 0.8667053275354016, iteration: 434704
loss: 0.9894198775291443,grad_norm: 0.7577384016462188, iteration: 434705
loss: 0.9771977663040161,grad_norm: 0.8997826134326743, iteration: 434706
loss: 1.0165826082229614,grad_norm: 0.7449428510305954, iteration: 434707
loss: 0.9614041447639465,grad_norm: 0.7785435748619662, iteration: 434708
loss: 0.9851664304733276,grad_norm: 0.6646569115227438, iteration: 434709
loss: 0.9947887659072876,grad_norm: 0.92707953521237, iteration: 434710
loss: 0.9711084961891174,grad_norm: 0.9999991667122622, iteration: 434711
loss: 0.9595910310745239,grad_norm: 0.9999995051706178, iteration: 434712
loss: 1.127732753753662,grad_norm: 0.7820049369920767, iteration: 434713
loss: 1.0075700283050537,grad_norm: 0.9999993410978588, iteration: 434714
loss: 1.010515809059143,grad_norm: 0.8254877452319771, iteration: 434715
loss: 0.9651877284049988,grad_norm: 0.7002303097759522, iteration: 434716
loss: 0.956319272518158,grad_norm: 0.7381139986298768, iteration: 434717
loss: 1.0064888000488281,grad_norm: 0.7411425227370576, iteration: 434718
loss: 1.0282957553863525,grad_norm: 0.7744214920619817, iteration: 434719
loss: 1.108370065689087,grad_norm: 0.9999995631502445, iteration: 434720
loss: 0.9844607710838318,grad_norm: 0.7618312055232163, iteration: 434721
loss: 0.971991240978241,grad_norm: 0.8058760620646135, iteration: 434722
loss: 0.9946296215057373,grad_norm: 0.8534433321490585, iteration: 434723
loss: 0.9948652386665344,grad_norm: 0.7571195679630458, iteration: 434724
loss: 1.0012261867523193,grad_norm: 0.9039002539275641, iteration: 434725
loss: 1.0015314817428589,grad_norm: 0.7443766129493716, iteration: 434726
loss: 1.020341157913208,grad_norm: 0.7488569995152281, iteration: 434727
loss: 1.0031429529190063,grad_norm: 0.9056134859012306, iteration: 434728
loss: 1.0427767038345337,grad_norm: 0.9259908704484144, iteration: 434729
loss: 1.0268487930297852,grad_norm: 0.99999915568908, iteration: 434730
loss: 0.9720110893249512,grad_norm: 0.7512172391667152, iteration: 434731
loss: 1.0056328773498535,grad_norm: 0.7271657426532114, iteration: 434732
loss: 1.0406543016433716,grad_norm: 0.7626335640425487, iteration: 434733
loss: 0.9936088919639587,grad_norm: 0.8089476676585757, iteration: 434734
loss: 1.0158082246780396,grad_norm: 0.7739611035874419, iteration: 434735
loss: 1.0070667266845703,grad_norm: 0.6722664218639823, iteration: 434736
loss: 1.0032236576080322,grad_norm: 0.7959085084614382, iteration: 434737
loss: 0.994240403175354,grad_norm: 0.7549582315610749, iteration: 434738
loss: 1.008651614189148,grad_norm: 0.8191188684534604, iteration: 434739
loss: 0.991927981376648,grad_norm: 0.7426445905335609, iteration: 434740
loss: 1.0093433856964111,grad_norm: 0.8285280378269501, iteration: 434741
loss: 1.0165902376174927,grad_norm: 0.6923250721046457, iteration: 434742
loss: 0.9470388293266296,grad_norm: 0.7292060386399214, iteration: 434743
loss: 1.1552097797393799,grad_norm: 0.9999994787102812, iteration: 434744
loss: 1.0003076791763306,grad_norm: 0.74875777838616, iteration: 434745
loss: 0.969778299331665,grad_norm: 0.8691696569712493, iteration: 434746
loss: 1.0676779747009277,grad_norm: 0.734151960611195, iteration: 434747
loss: 0.9987119436264038,grad_norm: 0.9418610236864202, iteration: 434748
loss: 0.9707549214363098,grad_norm: 0.7483978010114944, iteration: 434749
loss: 1.017011284828186,grad_norm: 0.7517968077259471, iteration: 434750
loss: 0.9850526452064514,grad_norm: 0.7482926214307021, iteration: 434751
loss: 0.9719049334526062,grad_norm: 0.8043760789110324, iteration: 434752
loss: 0.9805848002433777,grad_norm: 0.8042676652728791, iteration: 434753
loss: 1.0127209424972534,grad_norm: 0.8505327739777937, iteration: 434754
loss: 0.9899710416793823,grad_norm: 0.8185282159407936, iteration: 434755
loss: 1.010915994644165,grad_norm: 0.7042806072564374, iteration: 434756
loss: 1.0002553462982178,grad_norm: 0.8787671475898291, iteration: 434757
loss: 0.9993334412574768,grad_norm: 0.7484056813398958, iteration: 434758
loss: 1.0258184671401978,grad_norm: 0.8137276315444918, iteration: 434759
loss: 1.0095546245574951,grad_norm: 0.999999289384245, iteration: 434760
loss: 0.9397327899932861,grad_norm: 0.7928480656149595, iteration: 434761
loss: 0.993457555770874,grad_norm: 0.7468501703169231, iteration: 434762
loss: 1.075814127922058,grad_norm: 0.8590223879459654, iteration: 434763
loss: 0.988244891166687,grad_norm: 0.7240106508118654, iteration: 434764
loss: 1.008442997932434,grad_norm: 0.7129890298240337, iteration: 434765
loss: 0.997863233089447,grad_norm: 0.9999991047729335, iteration: 434766
loss: 0.9798291921615601,grad_norm: 0.717451939133315, iteration: 434767
loss: 1.0058248043060303,grad_norm: 0.8387126751952838, iteration: 434768
loss: 0.9863805174827576,grad_norm: 0.6533363367838233, iteration: 434769
loss: 0.9858786463737488,grad_norm: 0.7806736744248254, iteration: 434770
loss: 1.0090522766113281,grad_norm: 0.8359223736328195, iteration: 434771
loss: 0.9775621294975281,grad_norm: 0.9475421226672616, iteration: 434772
loss: 1.003328561782837,grad_norm: 0.6947999036430293, iteration: 434773
loss: 0.9793156385421753,grad_norm: 0.9999996609069662, iteration: 434774
loss: 1.000854730606079,grad_norm: 0.7604330537246811, iteration: 434775
loss: 1.0188239812850952,grad_norm: 0.7249390694014841, iteration: 434776
loss: 0.9900795817375183,grad_norm: 0.7217181037352146, iteration: 434777
loss: 1.008839726448059,grad_norm: 0.733677048158791, iteration: 434778
loss: 0.9813359379768372,grad_norm: 0.8352552387157159, iteration: 434779
loss: 1.0501420497894287,grad_norm: 0.9999991221567516, iteration: 434780
loss: 0.9801360368728638,grad_norm: 0.9999992453051795, iteration: 434781
loss: 1.0351837873458862,grad_norm: 0.9086540021783708, iteration: 434782
loss: 1.0163830518722534,grad_norm: 0.7021490954830926, iteration: 434783
loss: 0.9710264801979065,grad_norm: 0.6673153472498955, iteration: 434784
loss: 0.9659444093704224,grad_norm: 0.7854070656749849, iteration: 434785
loss: 1.0062980651855469,grad_norm: 0.7621285150775502, iteration: 434786
loss: 1.0027894973754883,grad_norm: 0.6901526690067911, iteration: 434787
loss: 1.0658481121063232,grad_norm: 0.770807874961438, iteration: 434788
loss: 1.0054482221603394,grad_norm: 0.8798193834685689, iteration: 434789
loss: 0.9929425716400146,grad_norm: 0.7603114437533679, iteration: 434790
loss: 0.991704523563385,grad_norm: 0.693075318837324, iteration: 434791
loss: 1.04314124584198,grad_norm: 0.7296062528670882, iteration: 434792
loss: 1.0047820806503296,grad_norm: 0.6695045327741218, iteration: 434793
loss: 0.9881442189216614,grad_norm: 0.7707298381966172, iteration: 434794
loss: 0.9827682971954346,grad_norm: 0.9282957854870669, iteration: 434795
loss: 0.9973841905593872,grad_norm: 0.7446852343294983, iteration: 434796
loss: 0.9770154356956482,grad_norm: 0.5998848295013892, iteration: 434797
loss: 1.025598406791687,grad_norm: 0.7764095627229212, iteration: 434798
loss: 0.9633023142814636,grad_norm: 0.8565619161240464, iteration: 434799
loss: 1.0172796249389648,grad_norm: 0.7688554963659087, iteration: 434800
loss: 0.9836339354515076,grad_norm: 0.6855274456351587, iteration: 434801
loss: 1.034773349761963,grad_norm: 0.7464927835947703, iteration: 434802
loss: 1.0329432487487793,grad_norm: 0.9527461307739945, iteration: 434803
loss: 1.0743910074234009,grad_norm: 0.9325339908850908, iteration: 434804
loss: 1.0056685209274292,grad_norm: 0.8614099588515566, iteration: 434805
loss: 1.015151858329773,grad_norm: 0.7767404513549007, iteration: 434806
loss: 1.1715888977050781,grad_norm: 0.9999997477785495, iteration: 434807
loss: 0.9910726547241211,grad_norm: 0.7509603298011089, iteration: 434808
loss: 0.9503386616706848,grad_norm: 0.7118927540206209, iteration: 434809
loss: 1.0719481706619263,grad_norm: 0.9999995272459187, iteration: 434810
loss: 1.024414300918579,grad_norm: 0.8988134389162651, iteration: 434811
loss: 0.9716008901596069,grad_norm: 0.8356231878638276, iteration: 434812
loss: 1.004431128501892,grad_norm: 0.8080360271994337, iteration: 434813
loss: 0.9885255694389343,grad_norm: 0.7065901487565518, iteration: 434814
loss: 0.979573130607605,grad_norm: 0.6526765154379008, iteration: 434815
loss: 0.9898794293403625,grad_norm: 0.999999406475992, iteration: 434816
loss: 1.0411713123321533,grad_norm: 0.9999990293771168, iteration: 434817
loss: 1.151997685432434,grad_norm: 0.9999998213587504, iteration: 434818
loss: 1.273260235786438,grad_norm: 0.9999998997889631, iteration: 434819
loss: 1.2095898389816284,grad_norm: 0.999999258149507, iteration: 434820
loss: 0.9613367319107056,grad_norm: 0.8271993954078745, iteration: 434821
loss: 0.991901159286499,grad_norm: 0.6596816826505704, iteration: 434822
loss: 1.048597812652588,grad_norm: 0.9999998664879676, iteration: 434823
loss: 0.9952344298362732,grad_norm: 0.8462130735397169, iteration: 434824
loss: 1.0294255018234253,grad_norm: 0.9089887420549203, iteration: 434825
loss: 0.988013505935669,grad_norm: 0.7568325843521597, iteration: 434826
loss: 1.0814865827560425,grad_norm: 0.9999995999879886, iteration: 434827
loss: 1.0031806230545044,grad_norm: 0.8508958119298003, iteration: 434828
loss: 0.9776325821876526,grad_norm: 0.750816944004149, iteration: 434829
loss: 1.0154552459716797,grad_norm: 0.7793102677682758, iteration: 434830
loss: 0.9932336211204529,grad_norm: 0.8740542233701291, iteration: 434831
loss: 1.091455101966858,grad_norm: 0.99999954221105, iteration: 434832
loss: 0.9773334264755249,grad_norm: 0.8133964253797435, iteration: 434833
loss: 0.9750950336456299,grad_norm: 0.8491352165116185, iteration: 434834
loss: 1.007171392440796,grad_norm: 0.9132423475486747, iteration: 434835
loss: 1.0027412176132202,grad_norm: 0.9999991495806014, iteration: 434836
loss: 1.0409915447235107,grad_norm: 0.8373442696321288, iteration: 434837
loss: 0.991794764995575,grad_norm: 0.7979707619927391, iteration: 434838
loss: 1.0292596817016602,grad_norm: 0.8666253077964395, iteration: 434839
loss: 1.0910674333572388,grad_norm: 0.999999137378002, iteration: 434840
loss: 0.9903466701507568,grad_norm: 0.9915274049254038, iteration: 434841
loss: 1.0110653638839722,grad_norm: 0.7451125081746114, iteration: 434842
loss: 1.0354541540145874,grad_norm: 0.8336792825033261, iteration: 434843
loss: 0.999195396900177,grad_norm: 0.8232464561323266, iteration: 434844
loss: 0.9688917994499207,grad_norm: 0.7879658496313009, iteration: 434845
loss: 1.0157865285873413,grad_norm: 0.8233929514587808, iteration: 434846
loss: 0.9775092601776123,grad_norm: 0.7229644091307817, iteration: 434847
loss: 0.9586400389671326,grad_norm: 0.9999992113931603, iteration: 434848
loss: 1.011762022972107,grad_norm: 0.5893608857001678, iteration: 434849
loss: 1.0001460313796997,grad_norm: 0.9824839537498351, iteration: 434850
loss: 1.0268006324768066,grad_norm: 0.8163153280933982, iteration: 434851
loss: 0.9578520655632019,grad_norm: 0.8346877261036231, iteration: 434852
loss: 1.0543707609176636,grad_norm: 0.8154281020175307, iteration: 434853
loss: 1.0146801471710205,grad_norm: 0.8133008584236132, iteration: 434854
loss: 1.0105801820755005,grad_norm: 0.8351404643307264, iteration: 434855
loss: 0.9985335469245911,grad_norm: 0.6662339091626667, iteration: 434856
loss: 1.0380139350891113,grad_norm: 0.7164561773355179, iteration: 434857
loss: 0.9818950295448303,grad_norm: 0.7742209476084201, iteration: 434858
loss: 1.0863100290298462,grad_norm: 0.9999997029363102, iteration: 434859
loss: 0.9813839197158813,grad_norm: 0.9999990976068702, iteration: 434860
loss: 1.0835113525390625,grad_norm: 0.9999993318956928, iteration: 434861
loss: 0.9996079802513123,grad_norm: 0.7860174819513749, iteration: 434862
loss: 1.0169095993041992,grad_norm: 0.8286423473638972, iteration: 434863
loss: 1.0263683795928955,grad_norm: 0.8517169302394377, iteration: 434864
loss: 0.987650990486145,grad_norm: 0.9521151571026987, iteration: 434865
loss: 0.9691019058227539,grad_norm: 0.6930044348681687, iteration: 434866
loss: 1.0283132791519165,grad_norm: 0.9999994835943448, iteration: 434867
loss: 1.0103894472122192,grad_norm: 0.8423394248711781, iteration: 434868
loss: 0.9746772050857544,grad_norm: 0.7903651849851786, iteration: 434869
loss: 1.005460262298584,grad_norm: 0.9092882008291726, iteration: 434870
loss: 0.9617403149604797,grad_norm: 0.7817882816003314, iteration: 434871
loss: 1.0290000438690186,grad_norm: 0.999999041404114, iteration: 434872
loss: 0.9894628524780273,grad_norm: 0.7737349021331779, iteration: 434873
loss: 1.0064581632614136,grad_norm: 0.7638630705653303, iteration: 434874
loss: 1.0358144044876099,grad_norm: 0.7852764439557189, iteration: 434875
loss: 1.0470130443572998,grad_norm: 0.8485504539694516, iteration: 434876
loss: 0.9812473654747009,grad_norm: 0.7563441602170945, iteration: 434877
loss: 1.025547742843628,grad_norm: 0.8645290594142615, iteration: 434878
loss: 1.0007520914077759,grad_norm: 0.7601041270025742, iteration: 434879
loss: 0.9881201982498169,grad_norm: 0.8038144096440154, iteration: 434880
loss: 1.0319807529449463,grad_norm: 0.8536648555013624, iteration: 434881
loss: 1.0028737783432007,grad_norm: 0.7518718857473213, iteration: 434882
loss: 0.9972900152206421,grad_norm: 0.9187729691779073, iteration: 434883
loss: 1.0564903020858765,grad_norm: 0.999999066758981, iteration: 434884
loss: 0.9942219853401184,grad_norm: 0.6774725381745572, iteration: 434885
loss: 1.0112625360488892,grad_norm: 0.8319619561683943, iteration: 434886
loss: 1.0097343921661377,grad_norm: 0.8730787210526905, iteration: 434887
loss: 1.0356007814407349,grad_norm: 0.6291563444074918, iteration: 434888
loss: 0.9978477358818054,grad_norm: 0.8116461245950288, iteration: 434889
loss: 1.0514519214630127,grad_norm: 0.9999992928766633, iteration: 434890
loss: 1.0692607164382935,grad_norm: 0.7780875627519489, iteration: 434891
loss: 0.9932609796524048,grad_norm: 0.7416409300525197, iteration: 434892
loss: 1.0304056406021118,grad_norm: 0.8539653984233372, iteration: 434893
loss: 1.0275835990905762,grad_norm: 0.9389744337996881, iteration: 434894
loss: 1.0056957006454468,grad_norm: 0.7890713399510826, iteration: 434895
loss: 0.9808677434921265,grad_norm: 0.7271143040284026, iteration: 434896
loss: 0.9812934398651123,grad_norm: 0.8444607006033185, iteration: 434897
loss: 0.9866287112236023,grad_norm: 0.7244391937273829, iteration: 434898
loss: 0.9823216199874878,grad_norm: 0.7852087766608346, iteration: 434899
loss: 1.0656758546829224,grad_norm: 0.6800136408275573, iteration: 434900
loss: 0.9920811653137207,grad_norm: 0.9999991584573275, iteration: 434901
loss: 0.9762654304504395,grad_norm: 0.7455962993544701, iteration: 434902
loss: 1.002811074256897,grad_norm: 0.7663935353484925, iteration: 434903
loss: 0.9650739431381226,grad_norm: 0.6186740502723592, iteration: 434904
loss: 0.9911028146743774,grad_norm: 0.8139587754068847, iteration: 434905
loss: 1.0132551193237305,grad_norm: 0.7428420277439568, iteration: 434906
loss: 1.0222736597061157,grad_norm: 0.6747781373352717, iteration: 434907
loss: 1.0304617881774902,grad_norm: 0.9999992667235006, iteration: 434908
loss: 1.0033292770385742,grad_norm: 0.8665254359898679, iteration: 434909
loss: 0.9750370383262634,grad_norm: 0.9999991599768878, iteration: 434910
loss: 0.9850478172302246,grad_norm: 0.7836840768268561, iteration: 434911
loss: 1.018697738647461,grad_norm: 0.7162987989790066, iteration: 434912
loss: 1.0186113119125366,grad_norm: 0.8161187862491968, iteration: 434913
loss: 0.9442891478538513,grad_norm: 0.7775066256376084, iteration: 434914
loss: 0.9857209324836731,grad_norm: 0.7339496366336131, iteration: 434915
loss: 1.0132713317871094,grad_norm: 0.800496017015394, iteration: 434916
loss: 0.9901714324951172,grad_norm: 0.7364460750797411, iteration: 434917
loss: 1.0699353218078613,grad_norm: 0.8168193502122364, iteration: 434918
loss: 0.9986671805381775,grad_norm: 0.9177430756557144, iteration: 434919
loss: 0.9930773377418518,grad_norm: 0.6419903706555787, iteration: 434920
loss: 1.024321436882019,grad_norm: 0.787097022029543, iteration: 434921
loss: 1.032016634941101,grad_norm: 0.7529237220448197, iteration: 434922
loss: 1.0189998149871826,grad_norm: 0.6891667513470933, iteration: 434923
loss: 1.0015572309494019,grad_norm: 0.8578884371406965, iteration: 434924
loss: 1.003506064414978,grad_norm: 0.786973136237711, iteration: 434925
loss: 1.0101264715194702,grad_norm: 0.7890189460514797, iteration: 434926
loss: 1.002261757850647,grad_norm: 0.6290949527213584, iteration: 434927
loss: 0.9739001393318176,grad_norm: 0.7039092401804946, iteration: 434928
loss: 0.9402744770050049,grad_norm: 0.9103227260838499, iteration: 434929
loss: 1.0102150440216064,grad_norm: 0.7140021171641434, iteration: 434930
loss: 0.9661742448806763,grad_norm: 0.6961668139371199, iteration: 434931
loss: 1.0538151264190674,grad_norm: 0.9999992019569629, iteration: 434932
loss: 0.9835572838783264,grad_norm: 0.7845458934443015, iteration: 434933
loss: 0.9949617385864258,grad_norm: 0.773690010279641, iteration: 434934
loss: 1.00053071975708,grad_norm: 0.8017886014489434, iteration: 434935
loss: 1.028420090675354,grad_norm: 0.8174057867202748, iteration: 434936
loss: 1.0220928192138672,grad_norm: 0.7678900597585523, iteration: 434937
loss: 1.0103371143341064,grad_norm: 0.5944968686555921, iteration: 434938
loss: 1.005562663078308,grad_norm: 0.8839586572911793, iteration: 434939
loss: 1.0130114555358887,grad_norm: 0.9216135151531036, iteration: 434940
loss: 1.0056207180023193,grad_norm: 0.9209552487116346, iteration: 434941
loss: 0.980074942111969,grad_norm: 0.7656239451399953, iteration: 434942
loss: 0.9980325698852539,grad_norm: 0.7780147342228486, iteration: 434943
loss: 1.0316163301467896,grad_norm: 0.8100934621269053, iteration: 434944
loss: 1.0145673751831055,grad_norm: 0.7062249516720531, iteration: 434945
loss: 1.0268174409866333,grad_norm: 0.9999991142076057, iteration: 434946
loss: 0.9986720085144043,grad_norm: 0.7910218127573867, iteration: 434947
loss: 0.9773101806640625,grad_norm: 0.8312103735738694, iteration: 434948
loss: 1.015257477760315,grad_norm: 0.7711996828497564, iteration: 434949
loss: 1.0030205249786377,grad_norm: 0.9332275516303968, iteration: 434950
loss: 1.0328329801559448,grad_norm: 0.7136660724623539, iteration: 434951
loss: 1.0270394086837769,grad_norm: 1.000000078406619, iteration: 434952
loss: 1.0245364904403687,grad_norm: 0.8794577026299396, iteration: 434953
loss: 0.9932025671005249,grad_norm: 0.7433110790505788, iteration: 434954
loss: 1.0273901224136353,grad_norm: 0.7455649223634997, iteration: 434955
loss: 1.012864112854004,grad_norm: 0.7860972361318266, iteration: 434956
loss: 1.228310227394104,grad_norm: 0.9999997203916762, iteration: 434957
loss: 1.020267128944397,grad_norm: 0.8623301252911325, iteration: 434958
loss: 1.0035779476165771,grad_norm: 0.8285630698551221, iteration: 434959
loss: 1.0171018838882446,grad_norm: 0.9999990166033467, iteration: 434960
loss: 0.9842132329940796,grad_norm: 0.8493072363517136, iteration: 434961
loss: 0.986516535282135,grad_norm: 0.8974898961244061, iteration: 434962
loss: 0.9920654296875,grad_norm: 0.7524967005882929, iteration: 434963
loss: 0.9966697096824646,grad_norm: 0.9999990908484774, iteration: 434964
loss: 1.0177289247512817,grad_norm: 0.734229767234574, iteration: 434965
loss: 0.9690743088722229,grad_norm: 0.8903493569192931, iteration: 434966
loss: 0.9773809313774109,grad_norm: 0.7108426994551539, iteration: 434967
loss: 1.025865912437439,grad_norm: 0.7204834251965091, iteration: 434968
loss: 1.0312597751617432,grad_norm: 0.8729775650498168, iteration: 434969
loss: 1.015008807182312,grad_norm: 0.712136256943517, iteration: 434970
loss: 1.0026378631591797,grad_norm: 0.7426803379365846, iteration: 434971
loss: 1.0185834169387817,grad_norm: 0.9139793033300637, iteration: 434972
loss: 1.0042060613632202,grad_norm: 0.8319175708538702, iteration: 434973
loss: 0.9768936038017273,grad_norm: 0.6902668198219186, iteration: 434974
loss: 1.0053359270095825,grad_norm: 0.71065471929371, iteration: 434975
loss: 0.9814617037773132,grad_norm: 0.703549708703651, iteration: 434976
loss: 0.9534690380096436,grad_norm: 0.6444776864262449, iteration: 434977
loss: 0.9593344330787659,grad_norm: 0.7296194187349077, iteration: 434978
loss: 0.9999772310256958,grad_norm: 0.9999991269249083, iteration: 434979
loss: 0.9839080572128296,grad_norm: 0.6951343372384606, iteration: 434980
loss: 0.9710031747817993,grad_norm: 0.7270670403001803, iteration: 434981
loss: 1.03391695022583,grad_norm: 0.6890513596784935, iteration: 434982
loss: 0.9865620136260986,grad_norm: 0.8079078523632697, iteration: 434983
loss: 1.1414185762405396,grad_norm: 0.9999995822308055, iteration: 434984
loss: 1.0039482116699219,grad_norm: 0.9023794769321347, iteration: 434985
loss: 1.032273769378662,grad_norm: 0.7763461496717363, iteration: 434986
loss: 0.9851364493370056,grad_norm: 0.7078190272032205, iteration: 434987
loss: 1.0132449865341187,grad_norm: 0.8552350362804887, iteration: 434988
loss: 0.9457873702049255,grad_norm: 0.728464230945466, iteration: 434989
loss: 1.0067659616470337,grad_norm: 0.7301453943442425, iteration: 434990
loss: 1.0196017026901245,grad_norm: 0.7195369883077035, iteration: 434991
loss: 1.0038787126541138,grad_norm: 0.6851473843508272, iteration: 434992
loss: 1.022660255432129,grad_norm: 0.7219365431497767, iteration: 434993
loss: 1.0200659036636353,grad_norm: 0.9999990794005468, iteration: 434994
loss: 1.025144338607788,grad_norm: 0.7087945196114422, iteration: 434995
loss: 1.000898838043213,grad_norm: 0.7221565503797427, iteration: 434996
loss: 0.9974381923675537,grad_norm: 0.8341182446369648, iteration: 434997
loss: 1.0198147296905518,grad_norm: 0.7890406021324263, iteration: 434998
loss: 1.0264720916748047,grad_norm: 0.7247651542407838, iteration: 434999
loss: 1.009169578552246,grad_norm: 0.8862499866519707, iteration: 435000
loss: 0.9885981678962708,grad_norm: 0.724960469473685, iteration: 435001
loss: 0.9959027171134949,grad_norm: 0.6988725172347524, iteration: 435002
loss: 0.987980306148529,grad_norm: 0.7750835276707303, iteration: 435003
loss: 0.9634278416633606,grad_norm: 0.7417314829391196, iteration: 435004
loss: 0.9469716548919678,grad_norm: 0.7583351894445506, iteration: 435005
loss: 0.9904941916465759,grad_norm: 0.7020736144147507, iteration: 435006
loss: 0.9824460744857788,grad_norm: 0.8150132532710468, iteration: 435007
loss: 1.0123175382614136,grad_norm: 0.6843575199129337, iteration: 435008
loss: 0.9895888566970825,grad_norm: 0.750638154280649, iteration: 435009
loss: 1.018925428390503,grad_norm: 0.999999003661595, iteration: 435010
loss: 1.012603521347046,grad_norm: 0.80703314706981, iteration: 435011
loss: 0.9574906229972839,grad_norm: 0.6960935975100148, iteration: 435012
loss: 1.0072494745254517,grad_norm: 0.8745298855319849, iteration: 435013
loss: 1.0219625234603882,grad_norm: 0.9999995897721244, iteration: 435014
loss: 0.9947384595870972,grad_norm: 0.6466041979929381, iteration: 435015
loss: 0.9873933792114258,grad_norm: 0.6504567712466824, iteration: 435016
loss: 1.004958152770996,grad_norm: 0.7592359550713917, iteration: 435017
loss: 1.0008108615875244,grad_norm: 0.7785277083605159, iteration: 435018
loss: 1.0354852676391602,grad_norm: 0.9999998477162492, iteration: 435019
loss: 1.0914885997772217,grad_norm: 0.9999990512366153, iteration: 435020
loss: 0.979663610458374,grad_norm: 0.6239113989036381, iteration: 435021
loss: 1.0016415119171143,grad_norm: 0.7967175001453535, iteration: 435022
loss: 1.0223222970962524,grad_norm: 0.9999997910653771, iteration: 435023
loss: 1.0312505960464478,grad_norm: 0.8444440131515193, iteration: 435024
loss: 0.9917432069778442,grad_norm: 0.7907563997126246, iteration: 435025
loss: 0.9907741546630859,grad_norm: 0.815829179768422, iteration: 435026
loss: 0.9901784658432007,grad_norm: 0.7116174125134866, iteration: 435027
loss: 0.9776942133903503,grad_norm: 0.8173490716137052, iteration: 435028
loss: 0.9730170965194702,grad_norm: 0.6974665480385424, iteration: 435029
loss: 1.0022716522216797,grad_norm: 0.828352838688645, iteration: 435030
loss: 0.985681414604187,grad_norm: 0.6912672801223363, iteration: 435031
loss: 1.0071274042129517,grad_norm: 0.6773832933471651, iteration: 435032
loss: 1.0007723569869995,grad_norm: 0.7978510746006809, iteration: 435033
loss: 1.0099070072174072,grad_norm: 0.7660310376369408, iteration: 435034
loss: 0.9871619939804077,grad_norm: 0.7847431731363099, iteration: 435035
loss: 1.001901626586914,grad_norm: 0.9999991191413254, iteration: 435036
loss: 1.0041218996047974,grad_norm: 0.8268022556399157, iteration: 435037
loss: 1.0262185335159302,grad_norm: 0.6747502707168364, iteration: 435038
loss: 1.0192723274230957,grad_norm: 0.9073224042069634, iteration: 435039
loss: 0.9949820041656494,grad_norm: 0.9999991078859926, iteration: 435040
loss: 1.0221530199050903,grad_norm: 0.8318219113247383, iteration: 435041
loss: 0.9974151849746704,grad_norm: 0.7935902888259897, iteration: 435042
loss: 0.9864851832389832,grad_norm: 0.724257533376923, iteration: 435043
loss: 1.0169063806533813,grad_norm: 0.733512421207817, iteration: 435044
loss: 1.00307035446167,grad_norm: 0.6462489044238566, iteration: 435045
loss: 1.0180689096450806,grad_norm: 0.9999998619169256, iteration: 435046
loss: 1.0256870985031128,grad_norm: 0.9554859730515944, iteration: 435047
loss: 0.9988645911216736,grad_norm: 0.8025680231245665, iteration: 435048
loss: 0.9874014854431152,grad_norm: 0.7160564082432267, iteration: 435049
loss: 0.9861146807670593,grad_norm: 0.7687572844813383, iteration: 435050
loss: 0.9767756462097168,grad_norm: 0.8564251053238605, iteration: 435051
loss: 1.0088571310043335,grad_norm: 0.8274624494061728, iteration: 435052
loss: 0.9819249510765076,grad_norm: 0.7005231541956273, iteration: 435053
loss: 0.9735247492790222,grad_norm: 0.831825314310608, iteration: 435054
loss: 1.0364450216293335,grad_norm: 0.9999997401800516, iteration: 435055
loss: 0.9891787171363831,grad_norm: 0.7852722584757057, iteration: 435056
loss: 0.9714460968971252,grad_norm: 0.6909246453305745, iteration: 435057
loss: 0.9914297461509705,grad_norm: 0.7037108117825881, iteration: 435058
loss: 0.9923983812332153,grad_norm: 0.8221105517243387, iteration: 435059
loss: 0.9738186597824097,grad_norm: 0.9999997792470932, iteration: 435060
loss: 0.9919745922088623,grad_norm: 0.712658898231502, iteration: 435061
loss: 1.0073747634887695,grad_norm: 0.8242026825279186, iteration: 435062
loss: 1.014880657196045,grad_norm: 0.8641838528227743, iteration: 435063
loss: 0.9306548833847046,grad_norm: 0.7332747400062695, iteration: 435064
loss: 1.102972388267517,grad_norm: 0.7278677235813019, iteration: 435065
loss: 1.0198231935501099,grad_norm: 0.9375118741324169, iteration: 435066
loss: 1.012116551399231,grad_norm: 0.9999996415672471, iteration: 435067
loss: 0.9865835905075073,grad_norm: 0.712808703050813, iteration: 435068
loss: 0.9959103465080261,grad_norm: 0.7217656208668095, iteration: 435069
loss: 0.9894240498542786,grad_norm: 0.8750580028782228, iteration: 435070
loss: 1.0075103044509888,grad_norm: 0.9517694340460882, iteration: 435071
loss: 0.9757452011108398,grad_norm: 0.7591703550337868, iteration: 435072
loss: 0.9737453460693359,grad_norm: 0.6371986152225364, iteration: 435073
loss: 0.9741503000259399,grad_norm: 0.9108076332275669, iteration: 435074
loss: 0.9902287125587463,grad_norm: 0.9053021952705578, iteration: 435075
loss: 1.0639017820358276,grad_norm: 0.8832190021914582, iteration: 435076
loss: 1.0166634321212769,grad_norm: 0.810082926907864, iteration: 435077
loss: 1.0085214376449585,grad_norm: 0.7486181891518667, iteration: 435078
loss: 0.9898203015327454,grad_norm: 0.6344966121281052, iteration: 435079
loss: 1.034605622291565,grad_norm: 0.9999990552084722, iteration: 435080
loss: 1.0363636016845703,grad_norm: 0.999999132496371, iteration: 435081
loss: 1.01517653465271,grad_norm: 0.7209157417941199, iteration: 435082
loss: 1.0249865055084229,grad_norm: 0.9999989907770281, iteration: 435083
loss: 1.0119649171829224,grad_norm: 0.7127157386138795, iteration: 435084
loss: 1.015825629234314,grad_norm: 0.9866926453815944, iteration: 435085
loss: 1.0276687145233154,grad_norm: 0.9999992308004078, iteration: 435086
loss: 0.9719454050064087,grad_norm: 0.7382533742892315, iteration: 435087
loss: 1.0158438682556152,grad_norm: 0.8443465625371767, iteration: 435088
loss: 0.9639278650283813,grad_norm: 0.8223969052278188, iteration: 435089
loss: 1.0052582025527954,grad_norm: 0.7703567782787958, iteration: 435090
loss: 1.0334571599960327,grad_norm: 0.9999999375103615, iteration: 435091
loss: 1.1268951892852783,grad_norm: 0.9999997965054136, iteration: 435092
loss: 0.9925881624221802,grad_norm: 0.858906684016774, iteration: 435093
loss: 1.045093297958374,grad_norm: 0.9462788736116059, iteration: 435094
loss: 0.9893412590026855,grad_norm: 0.7599679344423533, iteration: 435095
loss: 1.0519593954086304,grad_norm: 0.9265965203771416, iteration: 435096
loss: 0.9952275156974792,grad_norm: 0.8054103086022564, iteration: 435097
loss: 1.0385175943374634,grad_norm: 0.8864981942577731, iteration: 435098
loss: 1.0081348419189453,grad_norm: 0.8200547022209613, iteration: 435099
loss: 0.9701820611953735,grad_norm: 0.6923820115832651, iteration: 435100
loss: 1.0125229358673096,grad_norm: 0.680262061829876, iteration: 435101
loss: 0.9991832971572876,grad_norm: 0.7246937989335835, iteration: 435102
loss: 0.9916843771934509,grad_norm: 0.8049317235794102, iteration: 435103
loss: 0.9964061379432678,grad_norm: 0.8106999474303492, iteration: 435104
loss: 0.9744773507118225,grad_norm: 0.8999679068460357, iteration: 435105
loss: 0.9723746180534363,grad_norm: 0.8418617932995105, iteration: 435106
loss: 1.0416593551635742,grad_norm: 0.999999342978008, iteration: 435107
loss: 1.027145504951477,grad_norm: 0.6950012691845537, iteration: 435108
loss: 1.0021165609359741,grad_norm: 0.747292839773036, iteration: 435109
loss: 1.084532380104065,grad_norm: 0.9999999243541876, iteration: 435110
loss: 1.0003772974014282,grad_norm: 0.7229725697154872, iteration: 435111
loss: 1.0241267681121826,grad_norm: 0.9675812672061486, iteration: 435112
loss: 1.041557788848877,grad_norm: 0.7807795022174019, iteration: 435113
loss: 1.0226004123687744,grad_norm: 0.8710193423997211, iteration: 435114
loss: 1.0058605670928955,grad_norm: 0.9976839182084579, iteration: 435115
loss: 0.990422248840332,grad_norm: 0.7055152942403604, iteration: 435116
loss: 1.048401117324829,grad_norm: 0.9999998586046215, iteration: 435117
loss: 0.9847602248191833,grad_norm: 0.6748481173307106, iteration: 435118
loss: 1.0104728937149048,grad_norm: 0.7674800753966854, iteration: 435119
loss: 0.9714859127998352,grad_norm: 0.7751253133058261, iteration: 435120
loss: 0.9935745596885681,grad_norm: 0.8176829888796484, iteration: 435121
loss: 1.0130343437194824,grad_norm: 0.6787316973404621, iteration: 435122
loss: 0.9957741498947144,grad_norm: 0.7373825774290464, iteration: 435123
loss: 0.977824330329895,grad_norm: 0.6269588377461222, iteration: 435124
loss: 1.002103567123413,grad_norm: 0.6902079969122183, iteration: 435125
loss: 1.0239516496658325,grad_norm: 0.8677453316776909, iteration: 435126
loss: 0.9744187593460083,grad_norm: 0.8916875910702246, iteration: 435127
loss: 1.017130970954895,grad_norm: 0.7982981617772804, iteration: 435128
loss: 1.0172733068466187,grad_norm: 0.6848827181082695, iteration: 435129
loss: 0.9849598407745361,grad_norm: 0.7194600655077705, iteration: 435130
loss: 0.9675796627998352,grad_norm: 0.7009659168193032, iteration: 435131
loss: 0.9993778467178345,grad_norm: 0.833610109455829, iteration: 435132
loss: 1.008651852607727,grad_norm: 0.9999992086540131, iteration: 435133
loss: 0.9528874158859253,grad_norm: 0.8691120473969913, iteration: 435134
loss: 0.9935368299484253,grad_norm: 0.8013313919052794, iteration: 435135
loss: 0.9893075227737427,grad_norm: 0.826111028332612, iteration: 435136
loss: 0.963778018951416,grad_norm: 0.7838279317182799, iteration: 435137
loss: 1.0077320337295532,grad_norm: 0.8220241315866658, iteration: 435138
loss: 1.028624176979065,grad_norm: 0.8435305212877291, iteration: 435139
loss: 1.0061366558074951,grad_norm: 0.8515724316949136, iteration: 435140
loss: 1.0104598999023438,grad_norm: 0.8421513091248968, iteration: 435141
loss: 0.9493255615234375,grad_norm: 0.7644724195270883, iteration: 435142
loss: 0.9787904620170593,grad_norm: 0.7101241923089997, iteration: 435143
loss: 1.015993356704712,grad_norm: 0.9999991006853851, iteration: 435144
loss: 0.989546537399292,grad_norm: 0.841381284055456, iteration: 435145
loss: 0.9708853960037231,grad_norm: 0.6732748379452966, iteration: 435146
loss: 1.0104939937591553,grad_norm: 0.7590890845818979, iteration: 435147
loss: 0.9867335557937622,grad_norm: 0.7680865833374827, iteration: 435148
loss: 0.9869435429573059,grad_norm: 0.7330859252282107, iteration: 435149
loss: 0.9603484272956848,grad_norm: 0.8852643524654404, iteration: 435150
loss: 1.0145184993743896,grad_norm: 0.9999991631089662, iteration: 435151
loss: 0.9783020615577698,grad_norm: 0.6824237285092833, iteration: 435152
loss: 0.9948983192443848,grad_norm: 0.6833202153701278, iteration: 435153
loss: 0.9639477729797363,grad_norm: 0.7764058614217069, iteration: 435154
loss: 1.0443764925003052,grad_norm: 0.9999991534242537, iteration: 435155
loss: 0.9832531809806824,grad_norm: 0.7570758471226555, iteration: 435156
loss: 1.005545735359192,grad_norm: 0.8146003732636311, iteration: 435157
loss: 1.0254653692245483,grad_norm: 0.9999991333979065, iteration: 435158
loss: 1.0003957748413086,grad_norm: 0.7989470005490432, iteration: 435159
loss: 0.9995400309562683,grad_norm: 0.8770712733022605, iteration: 435160
loss: 1.013026475906372,grad_norm: 0.7726979348715222, iteration: 435161
loss: 1.0238851308822632,grad_norm: 0.9109225954356859, iteration: 435162
loss: 1.065760612487793,grad_norm: 0.7828863836066733, iteration: 435163
loss: 0.9983810782432556,grad_norm: 0.7705997448314811, iteration: 435164
loss: 1.0044025182724,grad_norm: 0.7657218170932164, iteration: 435165
loss: 0.9738408923149109,grad_norm: 0.782389977964694, iteration: 435166
loss: 1.0712549686431885,grad_norm: 0.9999996512199361, iteration: 435167
loss: 0.9997484683990479,grad_norm: 0.7917300861720101, iteration: 435168
loss: 1.1128954887390137,grad_norm: 0.9999998642516572, iteration: 435169
loss: 1.0738775730133057,grad_norm: 0.9999995049155589, iteration: 435170
loss: 0.9947346448898315,grad_norm: 0.7115767070764604, iteration: 435171
loss: 0.9462928175926208,grad_norm: 0.8520765083745778, iteration: 435172
loss: 1.0266196727752686,grad_norm: 0.698319045488379, iteration: 435173
loss: 1.0120935440063477,grad_norm: 0.8085921673592735, iteration: 435174
loss: 1.0048290491104126,grad_norm: 0.8455957827301698, iteration: 435175
loss: 1.0101547241210938,grad_norm: 0.8230361085247279, iteration: 435176
loss: 0.9906290173530579,grad_norm: 0.6732688557347851, iteration: 435177
loss: 0.9974297881126404,grad_norm: 0.9357332961642398, iteration: 435178
loss: 1.0059363842010498,grad_norm: 0.686527456069418, iteration: 435179
loss: 0.982736349105835,grad_norm: 0.6822636711824879, iteration: 435180
loss: 1.0581952333450317,grad_norm: 0.9999996088359651, iteration: 435181
loss: 0.9781882762908936,grad_norm: 0.9024948866407031, iteration: 435182
loss: 1.0210323333740234,grad_norm: 0.8762504475753967, iteration: 435183
loss: 1.026412844657898,grad_norm: 0.826814783716834, iteration: 435184
loss: 1.0163134336471558,grad_norm: 0.8756715154729707, iteration: 435185
loss: 1.0192711353302002,grad_norm: 0.774882132626671, iteration: 435186
loss: 0.9876951575279236,grad_norm: 0.7307177730810777, iteration: 435187
loss: 0.991638720035553,grad_norm: 0.9447093637672741, iteration: 435188
loss: 1.0014357566833496,grad_norm: 0.7570814854548235, iteration: 435189
loss: 0.9968414902687073,grad_norm: 0.7992439541386056, iteration: 435190
loss: 0.9839764833450317,grad_norm: 0.82086729987286, iteration: 435191
loss: 1.0328474044799805,grad_norm: 0.9999991088647228, iteration: 435192
loss: 0.9896280765533447,grad_norm: 0.8223379321775369, iteration: 435193
loss: 0.9980592131614685,grad_norm: 0.7652502740127802, iteration: 435194
loss: 1.0210351943969727,grad_norm: 0.8569861347756488, iteration: 435195
loss: 0.9931990504264832,grad_norm: 0.9999992217852175, iteration: 435196
loss: 0.9766470789909363,grad_norm: 0.7870474067275844, iteration: 435197
loss: 0.9838862419128418,grad_norm: 0.8624317653261987, iteration: 435198
loss: 0.9784796237945557,grad_norm: 0.7459330597229191, iteration: 435199
loss: 1.0117418766021729,grad_norm: 0.9990592821928221, iteration: 435200
loss: 0.9934465289115906,grad_norm: 0.6747875062093469, iteration: 435201
loss: 1.0114095211029053,grad_norm: 0.7489327264355664, iteration: 435202
loss: 0.9654188752174377,grad_norm: 0.7940811918597328, iteration: 435203
loss: 1.0337294340133667,grad_norm: 0.9999991231441242, iteration: 435204
loss: 0.9945467114448547,grad_norm: 0.7426808375965698, iteration: 435205
loss: 1.0096036195755005,grad_norm: 0.9999997495607162, iteration: 435206
loss: 1.0008232593536377,grad_norm: 0.7453108643520506, iteration: 435207
loss: 1.0094481706619263,grad_norm: 0.6928592221748461, iteration: 435208
loss: 0.9636633396148682,grad_norm: 0.8966620908118051, iteration: 435209
loss: 0.983151376247406,grad_norm: 0.8292521987275573, iteration: 435210
loss: 0.9764299988746643,grad_norm: 0.8128118214612037, iteration: 435211
loss: 0.9954337477684021,grad_norm: 0.7293979770795417, iteration: 435212
loss: 1.0156886577606201,grad_norm: 0.6880975016540003, iteration: 435213
loss: 0.9886741638183594,grad_norm: 0.6344623149893127, iteration: 435214
loss: 0.9587426781654358,grad_norm: 0.864508508606525, iteration: 435215
loss: 1.0503653287887573,grad_norm: 0.890942244034042, iteration: 435216
loss: 1.0053200721740723,grad_norm: 0.900918212740101, iteration: 435217
loss: 1.0149123668670654,grad_norm: 0.819886247367056, iteration: 435218
loss: 0.9825007319450378,grad_norm: 0.9060688225012578, iteration: 435219
loss: 0.9839674234390259,grad_norm: 0.702568749471187, iteration: 435220
loss: 0.9930887818336487,grad_norm: 0.970841297102461, iteration: 435221
loss: 1.012037754058838,grad_norm: 0.8067088683980639, iteration: 435222
loss: 1.0096598863601685,grad_norm: 0.9059077371646157, iteration: 435223
loss: 1.0313324928283691,grad_norm: 0.999999865855954, iteration: 435224
loss: 1.048092246055603,grad_norm: 0.9999993065117756, iteration: 435225
loss: 1.056389331817627,grad_norm: 0.7803143123476732, iteration: 435226
loss: 0.9839901328086853,grad_norm: 0.9999991986915976, iteration: 435227
loss: 1.0261189937591553,grad_norm: 0.922064045014835, iteration: 435228
loss: 0.9891729950904846,grad_norm: 0.7818219235070282, iteration: 435229
loss: 0.9932183623313904,grad_norm: 0.6695382630324398, iteration: 435230
loss: 0.995136559009552,grad_norm: 0.8509625387488897, iteration: 435231
loss: 0.9750338792800903,grad_norm: 0.7490337314590869, iteration: 435232
loss: 0.9964637756347656,grad_norm: 0.729511507070949, iteration: 435233
loss: 1.0102193355560303,grad_norm: 0.7341287448049519, iteration: 435234
loss: 0.999514102935791,grad_norm: 0.999999140419092, iteration: 435235
loss: 1.0180244445800781,grad_norm: 0.8922505383413731, iteration: 435236
loss: 0.9894945621490479,grad_norm: 0.6791852927584875, iteration: 435237
loss: 0.99095219373703,grad_norm: 0.6713814767043862, iteration: 435238
loss: 0.9727551937103271,grad_norm: 0.7357152285054641, iteration: 435239
loss: 1.0039141178131104,grad_norm: 0.9999996652729168, iteration: 435240
loss: 1.0015424489974976,grad_norm: 0.6344321551454296, iteration: 435241
loss: 1.0005961656570435,grad_norm: 0.7358542022060203, iteration: 435242
loss: 0.9668571352958679,grad_norm: 0.7771476691715408, iteration: 435243
loss: 0.9839301705360413,grad_norm: 0.7350433629034951, iteration: 435244
loss: 0.9820709228515625,grad_norm: 0.7950410143435059, iteration: 435245
loss: 0.9938593506813049,grad_norm: 0.8232869547232342, iteration: 435246
loss: 1.083695888519287,grad_norm: 0.9999997048450554, iteration: 435247
loss: 1.0160690546035767,grad_norm: 0.9632576982420552, iteration: 435248
loss: 1.0209592580795288,grad_norm: 0.9421831688467414, iteration: 435249
loss: 0.9748260378837585,grad_norm: 0.7510458948707035, iteration: 435250
loss: 0.9861066937446594,grad_norm: 0.6906892637974791, iteration: 435251
loss: 1.0077083110809326,grad_norm: 0.9999993408990193, iteration: 435252
loss: 0.9595479965209961,grad_norm: 0.7435983552115668, iteration: 435253
loss: 0.9853166937828064,grad_norm: 0.7328311918959696, iteration: 435254
loss: 1.0332274436950684,grad_norm: 0.6993854485267166, iteration: 435255
loss: 0.9807170629501343,grad_norm: 0.8173889325403109, iteration: 435256
loss: 0.9718141555786133,grad_norm: 0.7067809053239124, iteration: 435257
loss: 1.0632472038269043,grad_norm: 0.9778567668381379, iteration: 435258
loss: 0.9873436093330383,grad_norm: 0.8055678127837149, iteration: 435259
loss: 0.9878706336021423,grad_norm: 0.855498535626237, iteration: 435260
loss: 0.9878293871879578,grad_norm: 0.7295569913905933, iteration: 435261
loss: 1.0297966003417969,grad_norm: 0.9999993993140236, iteration: 435262
loss: 1.029645323753357,grad_norm: 0.7955229407934323, iteration: 435263
loss: 1.0068773031234741,grad_norm: 0.6849080909764272, iteration: 435264
loss: 1.0096518993377686,grad_norm: 0.9999999213425227, iteration: 435265
loss: 0.9539619088172913,grad_norm: 0.8216236201271578, iteration: 435266
loss: 1.025886058807373,grad_norm: 0.8847837419742962, iteration: 435267
loss: 1.033544659614563,grad_norm: 0.9999997529807274, iteration: 435268
loss: 1.0017335414886475,grad_norm: 0.73169296141415, iteration: 435269
loss: 1.0094802379608154,grad_norm: 0.7597834852145665, iteration: 435270
loss: 0.9985634684562683,grad_norm: 0.8344421875097031, iteration: 435271
loss: 1.1020253896713257,grad_norm: 0.9053679618016393, iteration: 435272
loss: 0.9670657515525818,grad_norm: 0.9119670994895193, iteration: 435273
loss: 0.9677003026008606,grad_norm: 0.7477481149160944, iteration: 435274
loss: 0.9636993408203125,grad_norm: 0.8935195855958251, iteration: 435275
loss: 0.9600133895874023,grad_norm: 0.8520496304139877, iteration: 435276
loss: 1.0110456943511963,grad_norm: 0.647505217306309, iteration: 435277
loss: 0.9786442518234253,grad_norm: 0.9516292876479858, iteration: 435278
loss: 0.9650927186012268,grad_norm: 0.9999997425774474, iteration: 435279
loss: 1.0223548412322998,grad_norm: 0.9999992031425005, iteration: 435280
loss: 0.9842761754989624,grad_norm: 0.9999994245562103, iteration: 435281
loss: 0.9498106837272644,grad_norm: 0.6633778499818576, iteration: 435282
loss: 0.9719574451446533,grad_norm: 0.748845640449491, iteration: 435283
loss: 0.9707227945327759,grad_norm: 0.6928241894997105, iteration: 435284
loss: 0.9786731600761414,grad_norm: 0.6536100827955281, iteration: 435285
loss: 0.9611731767654419,grad_norm: 0.7378562791038711, iteration: 435286
loss: 1.005002737045288,grad_norm: 0.9999995944864686, iteration: 435287
loss: 0.994873046875,grad_norm: 0.8480088471298274, iteration: 435288
loss: 0.996886670589447,grad_norm: 0.7042759073592261, iteration: 435289
loss: 1.008216381072998,grad_norm: 0.7882385720480078, iteration: 435290
loss: 1.0164551734924316,grad_norm: 0.9999993347995305, iteration: 435291
loss: 1.0089077949523926,grad_norm: 0.7798872861640821, iteration: 435292
loss: 1.0510660409927368,grad_norm: 0.9041889400528339, iteration: 435293
loss: 0.9688929915428162,grad_norm: 0.698775473034053, iteration: 435294
loss: 1.061409592628479,grad_norm: 0.9999994051468476, iteration: 435295
loss: 1.042362093925476,grad_norm: 0.8542649872637679, iteration: 435296
loss: 1.0168261528015137,grad_norm: 0.6536856725796193, iteration: 435297
loss: 0.9847149848937988,grad_norm: 0.8445395019179189, iteration: 435298
loss: 0.9805238842964172,grad_norm: 0.8227812308026395, iteration: 435299
loss: 1.030393362045288,grad_norm: 0.9999996956161462, iteration: 435300
loss: 0.9746960997581482,grad_norm: 0.774806872846096, iteration: 435301
loss: 1.0863829851150513,grad_norm: 0.9999996309411192, iteration: 435302
loss: 0.9977566599845886,grad_norm: 0.6936176903516902, iteration: 435303
loss: 0.9696804881095886,grad_norm: 0.8152419965127163, iteration: 435304
loss: 1.0057117938995361,grad_norm: 0.830019797575929, iteration: 435305
loss: 0.9980199337005615,grad_norm: 0.7253513169224332, iteration: 435306
loss: 0.9657528400421143,grad_norm: 0.7284261775744364, iteration: 435307
loss: 0.9788376092910767,grad_norm: 0.7574140561790635, iteration: 435308
loss: 1.0196735858917236,grad_norm: 0.8413723156436028, iteration: 435309
loss: 1.0120630264282227,grad_norm: 0.7199867123123693, iteration: 435310
loss: 1.0807822942733765,grad_norm: 0.923668546120653, iteration: 435311
loss: 1.0503506660461426,grad_norm: 0.9122956906087648, iteration: 435312
loss: 0.9857722520828247,grad_norm: 0.8212156061023821, iteration: 435313
loss: 1.0128403902053833,grad_norm: 0.6821387188814731, iteration: 435314
loss: 1.069704532623291,grad_norm: 0.983057066823994, iteration: 435315
loss: 0.9770549535751343,grad_norm: 0.8311414769789032, iteration: 435316
loss: 1.0747500658035278,grad_norm: 0.9999998006164424, iteration: 435317
loss: 1.0074255466461182,grad_norm: 0.8735342444364025, iteration: 435318
loss: 1.0104008913040161,grad_norm: 0.69797742435383, iteration: 435319
loss: 1.011659026145935,grad_norm: 0.9035946674199827, iteration: 435320
loss: 1.0176211595535278,grad_norm: 0.8377843310263177, iteration: 435321
loss: 0.975902795791626,grad_norm: 0.7363807004123063, iteration: 435322
loss: 1.1350878477096558,grad_norm: 0.9999994510471929, iteration: 435323
loss: 0.9982790350914001,grad_norm: 0.8643828822245561, iteration: 435324
loss: 1.0086346864700317,grad_norm: 0.8124006338262609, iteration: 435325
loss: 0.9824011921882629,grad_norm: 0.9999991062992741, iteration: 435326
loss: 1.0065345764160156,grad_norm: 0.8048959072186832, iteration: 435327
loss: 1.0607160329818726,grad_norm: 0.9999993764227401, iteration: 435328
loss: 0.9812086820602417,grad_norm: 0.8720515299706935, iteration: 435329
loss: 1.01873779296875,grad_norm: 0.6727635639423815, iteration: 435330
loss: 1.0142098665237427,grad_norm: 0.7979631061802899, iteration: 435331
loss: 1.048531413078308,grad_norm: 0.999999319158079, iteration: 435332
loss: 1.0141314268112183,grad_norm: 0.6612166327057731, iteration: 435333
loss: 1.0087757110595703,grad_norm: 0.854687059132481, iteration: 435334
loss: 1.0110677480697632,grad_norm: 0.8030671619912413, iteration: 435335
loss: 0.986429750919342,grad_norm: 0.8496546939223456, iteration: 435336
loss: 1.1174894571304321,grad_norm: 0.999999886537266, iteration: 435337
loss: 1.019537329673767,grad_norm: 0.9612755896926027, iteration: 435338
loss: 0.9872572422027588,grad_norm: 0.7253125372374807, iteration: 435339
loss: 0.9843844771385193,grad_norm: 0.8407443846200274, iteration: 435340
loss: 1.1101988554000854,grad_norm: 0.9681397286644443, iteration: 435341
loss: 1.0077141523361206,grad_norm: 0.9999992881180378, iteration: 435342
loss: 1.0015900135040283,grad_norm: 0.7824990094201116, iteration: 435343
loss: 0.9776701927185059,grad_norm: 0.999999103374134, iteration: 435344
loss: 1.0045044422149658,grad_norm: 0.6975205755415385, iteration: 435345
loss: 0.9921969175338745,grad_norm: 0.7429497466799168, iteration: 435346
loss: 0.9946041107177734,grad_norm: 0.7200947451782659, iteration: 435347
loss: 0.9771814942359924,grad_norm: 0.7662413917040622, iteration: 435348
loss: 1.0233261585235596,grad_norm: 0.7593193326223394, iteration: 435349
loss: 0.9756901264190674,grad_norm: 0.7243772617187051, iteration: 435350
loss: 1.0032778978347778,grad_norm: 0.8105949877540184, iteration: 435351
loss: 1.0187386274337769,grad_norm: 0.796398993401604, iteration: 435352
loss: 0.9432364106178284,grad_norm: 0.7212013142317926, iteration: 435353
loss: 1.0098267793655396,grad_norm: 0.8605323097863983, iteration: 435354
loss: 1.0453637838363647,grad_norm: 0.9999991768163601, iteration: 435355
loss: 0.9853493571281433,grad_norm: 0.9780769228849746, iteration: 435356
loss: 0.9994277954101562,grad_norm: 0.6453065160628533, iteration: 435357
loss: 0.9742094874382019,grad_norm: 0.6947962304805251, iteration: 435358
loss: 1.030856966972351,grad_norm: 0.7843729531724627, iteration: 435359
loss: 1.0047767162322998,grad_norm: 0.8990648374263596, iteration: 435360
loss: 0.9585859775543213,grad_norm: 0.8072963398895165, iteration: 435361
loss: 0.9976398944854736,grad_norm: 0.9999993956112262, iteration: 435362
loss: 1.050378680229187,grad_norm: 0.9217569328065566, iteration: 435363
loss: 1.0786536931991577,grad_norm: 0.8693794296079637, iteration: 435364
loss: 0.9947503209114075,grad_norm: 0.8960287022545068, iteration: 435365
loss: 0.9931113719940186,grad_norm: 0.784495482426383, iteration: 435366
loss: 0.9941450357437134,grad_norm: 0.7879166115726591, iteration: 435367
loss: 1.0088192224502563,grad_norm: 0.7311698813132638, iteration: 435368
loss: 1.0237669944763184,grad_norm: 0.8422188930266346, iteration: 435369
loss: 1.0400402545928955,grad_norm: 0.8248588948731642, iteration: 435370
loss: 1.0093127489089966,grad_norm: 0.7520365587665968, iteration: 435371
loss: 1.0378905534744263,grad_norm: 0.9233487307103738, iteration: 435372
loss: 1.0734996795654297,grad_norm: 0.9999990714906448, iteration: 435373
loss: 1.027075171470642,grad_norm: 0.8390774892764447, iteration: 435374
loss: 1.0410783290863037,grad_norm: 0.6951405615553304, iteration: 435375
loss: 1.002892017364502,grad_norm: 0.8639877633749176, iteration: 435376
loss: 1.047863483428955,grad_norm: 0.731689275240282, iteration: 435377
loss: 1.0022997856140137,grad_norm: 0.9062413999227554, iteration: 435378
loss: 0.9934858083724976,grad_norm: 0.7341815080888403, iteration: 435379
loss: 0.9812911152839661,grad_norm: 0.9465859930228117, iteration: 435380
loss: 1.0357577800750732,grad_norm: 0.9999993677239585, iteration: 435381
loss: 0.9979637265205383,grad_norm: 0.7425740623050909, iteration: 435382
loss: 1.0403075218200684,grad_norm: 0.6679211647586503, iteration: 435383
loss: 0.9986927509307861,grad_norm: 0.9366983320043847, iteration: 435384
loss: 1.0016721487045288,grad_norm: 0.7294818734057307, iteration: 435385
loss: 1.0268995761871338,grad_norm: 0.6865009552557921, iteration: 435386
loss: 1.0056300163269043,grad_norm: 0.7757812084735349, iteration: 435387
loss: 1.0402683019638062,grad_norm: 0.8940079889731124, iteration: 435388
loss: 1.0015337467193604,grad_norm: 0.8198688079650984, iteration: 435389
loss: 1.0212862491607666,grad_norm: 0.7731827980941565, iteration: 435390
loss: 0.9543510675430298,grad_norm: 0.6945562461409557, iteration: 435391
loss: 0.9710380434989929,grad_norm: 0.8974371371426051, iteration: 435392
loss: 0.9884769916534424,grad_norm: 0.6973970635966191, iteration: 435393
loss: 1.0202739238739014,grad_norm: 0.7639706637724355, iteration: 435394
loss: 1.0053101778030396,grad_norm: 0.7242149995259554, iteration: 435395
loss: 1.1023327112197876,grad_norm: 0.9999995527521326, iteration: 435396
loss: 0.979734480381012,grad_norm: 0.7545143665213916, iteration: 435397
loss: 1.0063592195510864,grad_norm: 0.8695865383962764, iteration: 435398
loss: 0.9998081922531128,grad_norm: 0.7502780174019615, iteration: 435399
loss: 1.0365338325500488,grad_norm: 0.7980553744350997, iteration: 435400
loss: 1.0121179819107056,grad_norm: 0.8924473187923824, iteration: 435401
loss: 1.024147629737854,grad_norm: 0.6997232195976226, iteration: 435402
loss: 0.9937226176261902,grad_norm: 0.8015718545468676, iteration: 435403
loss: 1.0643820762634277,grad_norm: 0.8892739203097634, iteration: 435404
loss: 1.0134714841842651,grad_norm: 0.7415273442951784, iteration: 435405
loss: 0.9971990585327148,grad_norm: 0.7539804838655294, iteration: 435406
loss: 0.9677924513816833,grad_norm: 0.7527430671753896, iteration: 435407
loss: 0.9593534469604492,grad_norm: 0.9552118944867449, iteration: 435408
loss: 1.0233217477798462,grad_norm: 0.7365078245924506, iteration: 435409
loss: 0.9588569402694702,grad_norm: 0.7523433830980828, iteration: 435410
loss: 1.0204787254333496,grad_norm: 0.8631196379196583, iteration: 435411
loss: 0.9990208745002747,grad_norm: 0.7190058820755418, iteration: 435412
loss: 1.0183409452438354,grad_norm: 0.8961316016032933, iteration: 435413
loss: 1.0230590105056763,grad_norm: 0.7817467327386788, iteration: 435414
loss: 1.0418741703033447,grad_norm: 0.9407404806757862, iteration: 435415
loss: 1.0303568840026855,grad_norm: 0.7841997144642033, iteration: 435416
loss: 0.9876611828804016,grad_norm: 0.7168397779709106, iteration: 435417
loss: 1.0720455646514893,grad_norm: 0.8729099804251188, iteration: 435418
loss: 1.0395634174346924,grad_norm: 0.6722239058165966, iteration: 435419
loss: 0.9665038585662842,grad_norm: 0.996423089073821, iteration: 435420
loss: 0.9952185750007629,grad_norm: 0.7549635130095176, iteration: 435421
loss: 0.9367574453353882,grad_norm: 0.950330395550544, iteration: 435422
loss: 1.014589548110962,grad_norm: 0.9650783604573936, iteration: 435423
loss: 0.9925146698951721,grad_norm: 0.9033509046460481, iteration: 435424
loss: 1.014613389968872,grad_norm: 0.6973796626796089, iteration: 435425
loss: 1.003785490989685,grad_norm: 0.9415081817409923, iteration: 435426
loss: 1.005444049835205,grad_norm: 0.8017609737933498, iteration: 435427
loss: 1.0247222185134888,grad_norm: 0.7421635225778755, iteration: 435428
loss: 1.0273528099060059,grad_norm: 0.730192715202929, iteration: 435429
loss: 0.9907432198524475,grad_norm: 0.7114202193702449, iteration: 435430
loss: 1.0313276052474976,grad_norm: 0.6932903921051307, iteration: 435431
loss: 0.9813706874847412,grad_norm: 0.7921879178509612, iteration: 435432
loss: 1.0014539957046509,grad_norm: 0.6903841579302334, iteration: 435433
loss: 1.0168917179107666,grad_norm: 0.7518554616418897, iteration: 435434
loss: 1.0016053915023804,grad_norm: 0.8677459050872779, iteration: 435435
loss: 0.9920680522918701,grad_norm: 0.9089972429123315, iteration: 435436
loss: 1.017714023590088,grad_norm: 0.6968010697690429, iteration: 435437
loss: 1.0230019092559814,grad_norm: 0.9999996375465047, iteration: 435438
loss: 0.9798540472984314,grad_norm: 0.9012831654316261, iteration: 435439
loss: 1.0300376415252686,grad_norm: 0.9999991899536577, iteration: 435440
loss: 1.0057355165481567,grad_norm: 0.7619316420902137, iteration: 435441
loss: 0.9710000157356262,grad_norm: 0.8966128400928103, iteration: 435442
loss: 1.0211387872695923,grad_norm: 0.7880169973700515, iteration: 435443
loss: 1.0183968544006348,grad_norm: 0.7511972115937016, iteration: 435444
loss: 0.9858238101005554,grad_norm: 0.8218231752448737, iteration: 435445
loss: 1.0040675401687622,grad_norm: 0.7524146107733374, iteration: 435446
loss: 0.9304697513580322,grad_norm: 0.7374834911171105, iteration: 435447
loss: 0.9729174971580505,grad_norm: 0.8361075393506803, iteration: 435448
loss: 0.9742421507835388,grad_norm: 0.7276573356679249, iteration: 435449
loss: 1.057318925857544,grad_norm: 0.9999998779705873, iteration: 435450
loss: 0.9814726710319519,grad_norm: 0.7777438999474554, iteration: 435451
loss: 0.9894727468490601,grad_norm: 0.9999995606488594, iteration: 435452
loss: 1.0250118970870972,grad_norm: 0.8782534861847596, iteration: 435453
loss: 1.0119997262954712,grad_norm: 0.842335862123001, iteration: 435454
loss: 1.0572482347488403,grad_norm: 0.8791870143703483, iteration: 435455
loss: 0.9580537676811218,grad_norm: 0.8891078599860659, iteration: 435456
loss: 0.9946480393409729,grad_norm: 0.8663356264784258, iteration: 435457
loss: 0.9627189636230469,grad_norm: 0.7043609207380741, iteration: 435458
loss: 0.971517026424408,grad_norm: 0.8204464410008118, iteration: 435459
loss: 0.9906287789344788,grad_norm: 0.8881244164479313, iteration: 435460
loss: 1.0469934940338135,grad_norm: 0.999999243053959, iteration: 435461
loss: 1.0109423398971558,grad_norm: 0.77673793593316, iteration: 435462
loss: 1.0356708765029907,grad_norm: 0.8822134500027758, iteration: 435463
loss: 0.9818898439407349,grad_norm: 0.6655106571608529, iteration: 435464
loss: 1.0069656372070312,grad_norm: 0.777207653409485, iteration: 435465
loss: 1.032515525817871,grad_norm: 0.9202733358861406, iteration: 435466
loss: 0.9714593887329102,grad_norm: 0.8094080617578642, iteration: 435467
loss: 1.0181150436401367,grad_norm: 0.9999993820799894, iteration: 435468
loss: 1.0232291221618652,grad_norm: 0.8718137599891833, iteration: 435469
loss: 1.003934621810913,grad_norm: 0.6009755335072829, iteration: 435470
loss: 1.0486400127410889,grad_norm: 0.99999903564279, iteration: 435471
loss: 1.0213485956192017,grad_norm: 0.8112788311473252, iteration: 435472
loss: 1.0136096477508545,grad_norm: 0.8609546528760206, iteration: 435473
loss: 1.0098546743392944,grad_norm: 0.8512687404933501, iteration: 435474
loss: 1.0179505348205566,grad_norm: 0.7194807755829599, iteration: 435475
loss: 0.993015468120575,grad_norm: 0.7998531021461245, iteration: 435476
loss: 1.0104343891143799,grad_norm: 0.7350347434375686, iteration: 435477
loss: 1.013160228729248,grad_norm: 0.747114588680262, iteration: 435478
loss: 0.9910590052604675,grad_norm: 0.7713993236326745, iteration: 435479
loss: 0.9938258528709412,grad_norm: 0.7606879293812474, iteration: 435480
loss: 1.0326563119888306,grad_norm: 0.7155663981320455, iteration: 435481
loss: 0.9957618117332458,grad_norm: 0.8868243432451369, iteration: 435482
loss: 1.0024677515029907,grad_norm: 0.8826954827577107, iteration: 435483
loss: 0.988493025302887,grad_norm: 0.7375266518694352, iteration: 435484
loss: 0.9713544249534607,grad_norm: 0.7596909197359348, iteration: 435485
loss: 0.991344153881073,grad_norm: 0.8895356467975478, iteration: 435486
loss: 0.9950740337371826,grad_norm: 0.753279260997294, iteration: 435487
loss: 0.970394492149353,grad_norm: 0.8002188545294893, iteration: 435488
loss: 1.0066298246383667,grad_norm: 0.8922432797496297, iteration: 435489
loss: 1.0122281312942505,grad_norm: 0.7758863787487312, iteration: 435490
loss: 0.9748572111129761,grad_norm: 0.867886350066448, iteration: 435491
loss: 0.9939699172973633,grad_norm: 0.7343265140861396, iteration: 435492
loss: 0.9993181228637695,grad_norm: 0.8565532282890195, iteration: 435493
loss: 1.0269784927368164,grad_norm: 0.9931305446933717, iteration: 435494
loss: 0.9941018223762512,grad_norm: 0.821553318917914, iteration: 435495
loss: 1.0053879022598267,grad_norm: 0.8450507420143281, iteration: 435496
loss: 1.0401055812835693,grad_norm: 0.7407864729943047, iteration: 435497
loss: 0.9945001602172852,grad_norm: 0.793883950470181, iteration: 435498
loss: 0.9943101406097412,grad_norm: 0.8775080013968791, iteration: 435499
loss: 1.0093215703964233,grad_norm: 0.7579383581955303, iteration: 435500
loss: 1.0151885747909546,grad_norm: 0.8602031503482062, iteration: 435501
loss: 0.9999074935913086,grad_norm: 0.7938989387127174, iteration: 435502
loss: 1.014062523841858,grad_norm: 0.8094199549996259, iteration: 435503
loss: 0.9915544986724854,grad_norm: 0.8252942042403485, iteration: 435504
loss: 0.9864845871925354,grad_norm: 0.7343680207836473, iteration: 435505
loss: 0.9896056056022644,grad_norm: 0.900105079737456, iteration: 435506
loss: 0.989742636680603,grad_norm: 0.8544405847819483, iteration: 435507
loss: 0.9095097780227661,grad_norm: 0.7420470445776634, iteration: 435508
loss: 1.0294538736343384,grad_norm: 0.9999991678806505, iteration: 435509
loss: 0.9870442748069763,grad_norm: 0.7661911038278791, iteration: 435510
loss: 1.011743187904358,grad_norm: 0.6975529012389777, iteration: 435511
loss: 1.0834949016571045,grad_norm: 0.9999994816304058, iteration: 435512
loss: 1.0174129009246826,grad_norm: 0.7923533054526716, iteration: 435513
loss: 0.9684869647026062,grad_norm: 0.7343994581478307, iteration: 435514
loss: 0.998654305934906,grad_norm: 0.8225180228514521, iteration: 435515
loss: 1.0188026428222656,grad_norm: 0.7287641778120385, iteration: 435516
loss: 1.017303228378296,grad_norm: 0.8001355882551667, iteration: 435517
loss: 0.977275013923645,grad_norm: 0.8456691617180893, iteration: 435518
loss: 0.991251528263092,grad_norm: 0.7776582694181329, iteration: 435519
loss: 0.9728136658668518,grad_norm: 0.9201669451132902, iteration: 435520
loss: 1.0007230043411255,grad_norm: 0.7193598940644018, iteration: 435521
loss: 1.0208123922348022,grad_norm: 0.6995470748159162, iteration: 435522
loss: 0.9946805238723755,grad_norm: 0.82583555021034, iteration: 435523
loss: 1.0086312294006348,grad_norm: 0.8346913589233961, iteration: 435524
loss: 0.9830830097198486,grad_norm: 0.8116227939108365, iteration: 435525
loss: 1.009324312210083,grad_norm: 0.8348288103689042, iteration: 435526
loss: 1.0162876844406128,grad_norm: 0.8240342724264147, iteration: 435527
loss: 1.0080591440200806,grad_norm: 0.7032848830338849, iteration: 435528
loss: 0.9946765303611755,grad_norm: 0.8578522774933383, iteration: 435529
loss: 0.987093448638916,grad_norm: 0.6939462274155078, iteration: 435530
loss: 1.003387451171875,grad_norm: 0.85756851768298, iteration: 435531
loss: 0.9639115333557129,grad_norm: 0.7985767727594704, iteration: 435532
loss: 0.9864522814750671,grad_norm: 0.8465555041959503, iteration: 435533
loss: 1.0256116390228271,grad_norm: 0.8428672419029675, iteration: 435534
loss: 1.0225781202316284,grad_norm: 0.7948046955369483, iteration: 435535
loss: 0.973396360874176,grad_norm: 0.6657934396435982, iteration: 435536
loss: 1.011644721031189,grad_norm: 0.7788632506220768, iteration: 435537
loss: 0.9996594190597534,grad_norm: 0.7706129879599678, iteration: 435538
loss: 1.0272538661956787,grad_norm: 0.7393276449973212, iteration: 435539
loss: 0.9893103241920471,grad_norm: 0.8494489704633243, iteration: 435540
loss: 1.0412753820419312,grad_norm: 0.8327888794837734, iteration: 435541
loss: 1.0035765171051025,grad_norm: 0.778042914028401, iteration: 435542
loss: 1.0113654136657715,grad_norm: 0.9070021671335259, iteration: 435543
loss: 0.9629140496253967,grad_norm: 0.7043130572747854, iteration: 435544
loss: 0.9883966445922852,grad_norm: 0.6505850587004818, iteration: 435545
loss: 0.9716457724571228,grad_norm: 0.7148008153917979, iteration: 435546
loss: 1.0218193531036377,grad_norm: 0.7552587655009535, iteration: 435547
loss: 1.0003632307052612,grad_norm: 0.7269933166067832, iteration: 435548
loss: 1.0020759105682373,grad_norm: 0.8646972263336047, iteration: 435549
loss: 0.9925810694694519,grad_norm: 0.7978288831776305, iteration: 435550
loss: 0.9496669173240662,grad_norm: 0.7782540501116681, iteration: 435551
loss: 0.9907003045082092,grad_norm: 0.7061489092969399, iteration: 435552
loss: 1.0117298364639282,grad_norm: 0.7993590813774543, iteration: 435553
loss: 1.022742748260498,grad_norm: 0.9307538622205174, iteration: 435554
loss: 1.1062790155410767,grad_norm: 0.9999994047114438, iteration: 435555
loss: 1.0274219512939453,grad_norm: 0.8968893582874703, iteration: 435556
loss: 0.9759687781333923,grad_norm: 0.9999995751086415, iteration: 435557
loss: 1.0063027143478394,grad_norm: 0.7339923556336164, iteration: 435558
loss: 0.971518874168396,grad_norm: 0.7534449998629044, iteration: 435559
loss: 1.0554250478744507,grad_norm: 0.9999998382163119, iteration: 435560
loss: 0.9811782240867615,grad_norm: 0.7499595303714376, iteration: 435561
loss: 1.0538877248764038,grad_norm: 0.6418164108808817, iteration: 435562
loss: 1.0547784566879272,grad_norm: 0.9999992174204405, iteration: 435563
loss: 1.006630778312683,grad_norm: 0.619684117348442, iteration: 435564
loss: 0.9902447462081909,grad_norm: 0.7574103073085695, iteration: 435565
loss: 0.989904522895813,grad_norm: 0.7226414766907996, iteration: 435566
loss: 0.967985987663269,grad_norm: 0.8979184059954404, iteration: 435567
loss: 1.0291721820831299,grad_norm: 0.7820198220260273, iteration: 435568
loss: 0.980052649974823,grad_norm: 0.8239604851217012, iteration: 435569
loss: 1.0334692001342773,grad_norm: 0.8008390250001317, iteration: 435570
loss: 0.9881219863891602,grad_norm: 0.761260045771075, iteration: 435571
loss: 0.976138174533844,grad_norm: 0.6687559638757622, iteration: 435572
loss: 0.9784144163131714,grad_norm: 0.8701917724569821, iteration: 435573
loss: 0.9593348503112793,grad_norm: 0.6317771515901927, iteration: 435574
loss: 1.0131211280822754,grad_norm: 0.8609916764379082, iteration: 435575
loss: 1.0023576021194458,grad_norm: 0.7829891349724617, iteration: 435576
loss: 0.9857454895973206,grad_norm: 0.7077539640843904, iteration: 435577
loss: 0.9946374297142029,grad_norm: 0.7627716359328371, iteration: 435578
loss: 1.0112273693084717,grad_norm: 0.9999996355750311, iteration: 435579
loss: 0.9993083477020264,grad_norm: 0.8792907670775594, iteration: 435580
loss: 0.9987714886665344,grad_norm: 0.84551293326062, iteration: 435581
loss: 0.9958972930908203,grad_norm: 0.7294862898768353, iteration: 435582
loss: 1.014204502105713,grad_norm: 0.8872232613122546, iteration: 435583
loss: 1.0173554420471191,grad_norm: 0.819329314048633, iteration: 435584
loss: 1.0421055555343628,grad_norm: 0.8716319170466915, iteration: 435585
loss: 0.9809837341308594,grad_norm: 0.999999007917741, iteration: 435586
loss: 0.9812111854553223,grad_norm: 0.923261740023256, iteration: 435587
loss: 0.930172324180603,grad_norm: 0.7895218919459431, iteration: 435588
loss: 0.985556423664093,grad_norm: 0.7898439529282327, iteration: 435589
loss: 0.986266016960144,grad_norm: 0.9999995054783654, iteration: 435590
loss: 0.998677134513855,grad_norm: 0.8240015534598601, iteration: 435591
loss: 0.9893800616264343,grad_norm: 0.8556984884488055, iteration: 435592
loss: 1.0199133157730103,grad_norm: 0.7050738071157742, iteration: 435593
loss: 0.964294970035553,grad_norm: 0.9117187715704403, iteration: 435594
loss: 1.0080152750015259,grad_norm: 0.7462443414757141, iteration: 435595
loss: 1.019788384437561,grad_norm: 0.9999995562681899, iteration: 435596
loss: 1.0212278366088867,grad_norm: 0.7002804445229232, iteration: 435597
loss: 0.9811620712280273,grad_norm: 0.7246294014904107, iteration: 435598
loss: 0.9945611357688904,grad_norm: 0.6490917847626901, iteration: 435599
loss: 1.0232936143875122,grad_norm: 0.760938919078822, iteration: 435600
loss: 1.0550419092178345,grad_norm: 0.8442304858284069, iteration: 435601
loss: 1.0382232666015625,grad_norm: 0.7251110605841111, iteration: 435602
loss: 1.0107566118240356,grad_norm: 0.754026799172076, iteration: 435603
loss: 1.048295497894287,grad_norm: 0.8660858058501051, iteration: 435604
loss: 1.0032862424850464,grad_norm: 0.7715564350642823, iteration: 435605
loss: 0.9700003862380981,grad_norm: 0.709821115459929, iteration: 435606
loss: 0.9490334391593933,grad_norm: 0.8098014506474177, iteration: 435607
loss: 1.0027885437011719,grad_norm: 0.9999997974154482, iteration: 435608
loss: 0.970050036907196,grad_norm: 0.6911457287716382, iteration: 435609
loss: 0.9871401786804199,grad_norm: 0.8286888635745826, iteration: 435610
loss: 1.081451416015625,grad_norm: 0.856666969332662, iteration: 435611
loss: 0.9500389695167542,grad_norm: 0.9447939921034983, iteration: 435612
loss: 1.005037784576416,grad_norm: 0.8090009667898144, iteration: 435613
loss: 1.0064328908920288,grad_norm: 0.9944046453775863, iteration: 435614
loss: 0.9726681709289551,grad_norm: 0.776849226265578, iteration: 435615
loss: 1.0462661981582642,grad_norm: 0.6836523876219812, iteration: 435616
loss: 1.0367567539215088,grad_norm: 0.8828693102874233, iteration: 435617
loss: 1.0075033903121948,grad_norm: 0.6684152100049605, iteration: 435618
loss: 0.9977111220359802,grad_norm: 0.6750775177285744, iteration: 435619
loss: 1.0132025480270386,grad_norm: 0.7249120438672073, iteration: 435620
loss: 0.9993487000465393,grad_norm: 0.6192913179564705, iteration: 435621
loss: 0.9924058318138123,grad_norm: 0.9358016639760801, iteration: 435622
loss: 0.9911623597145081,grad_norm: 0.9999994780557103, iteration: 435623
loss: 1.0044893026351929,grad_norm: 0.8128108496840922, iteration: 435624
loss: 1.0059715509414673,grad_norm: 0.8408155245608018, iteration: 435625
loss: 1.0288305282592773,grad_norm: 0.9509262562529712, iteration: 435626
loss: 0.9804678559303284,grad_norm: 0.8120245986365522, iteration: 435627
loss: 0.9609215259552002,grad_norm: 0.8352796128100349, iteration: 435628
loss: 1.014430284500122,grad_norm: 0.8452940719451197, iteration: 435629
loss: 0.998725175857544,grad_norm: 0.8097248164144729, iteration: 435630
loss: 1.0049196481704712,grad_norm: 0.7413169593437161, iteration: 435631
loss: 1.0341609716415405,grad_norm: 0.8969291960818531, iteration: 435632
loss: 1.0080137252807617,grad_norm: 0.834014801694935, iteration: 435633
loss: 1.0105905532836914,grad_norm: 0.9337586101321158, iteration: 435634
loss: 0.9940246343612671,grad_norm: 0.7978770200455698, iteration: 435635
loss: 1.0135375261306763,grad_norm: 0.7101104591083253, iteration: 435636
loss: 0.9902801513671875,grad_norm: 0.7236029793853263, iteration: 435637
loss: 1.0578807592391968,grad_norm: 0.8676369122073062, iteration: 435638
loss: 0.9712899327278137,grad_norm: 0.6795174237813683, iteration: 435639
loss: 1.0066516399383545,grad_norm: 0.8831147034712447, iteration: 435640
loss: 1.046952247619629,grad_norm: 0.9999995156547067, iteration: 435641
loss: 1.037802815437317,grad_norm: 0.9999991647426405, iteration: 435642
loss: 0.9929818511009216,grad_norm: 0.9999997536192873, iteration: 435643
loss: 0.9816812872886658,grad_norm: 0.9033017670082368, iteration: 435644
loss: 1.0068081617355347,grad_norm: 0.806084893761411, iteration: 435645
loss: 0.9770098924636841,grad_norm: 0.7525023082280405, iteration: 435646
loss: 0.9751604795455933,grad_norm: 0.7519860319881013, iteration: 435647
loss: 0.980762243270874,grad_norm: 0.655989091307446, iteration: 435648
loss: 0.9691238403320312,grad_norm: 0.7070173499267435, iteration: 435649
loss: 0.9969931840896606,grad_norm: 0.7040775821045678, iteration: 435650
loss: 0.9876512289047241,grad_norm: 0.8730197091504369, iteration: 435651
loss: 1.0294307470321655,grad_norm: 0.881541197459604, iteration: 435652
loss: 1.0041495561599731,grad_norm: 0.9419244584746906, iteration: 435653
loss: 1.112929105758667,grad_norm: 0.999999483187342, iteration: 435654
loss: 1.036778450012207,grad_norm: 0.9999995678909854, iteration: 435655
loss: 1.012808918952942,grad_norm: 0.9077203003288676, iteration: 435656
loss: 0.9780323505401611,grad_norm: 0.8518755744644622, iteration: 435657
loss: 0.979962170124054,grad_norm: 0.7342968660111855, iteration: 435658
loss: 0.9774475693702698,grad_norm: 0.9004048195032852, iteration: 435659
loss: 1.0098060369491577,grad_norm: 0.8850120290350488, iteration: 435660
loss: 1.016844391822815,grad_norm: 0.744341585755378, iteration: 435661
loss: 1.000135898590088,grad_norm: 0.692672940453391, iteration: 435662
loss: 1.0130919218063354,grad_norm: 0.9112530886170597, iteration: 435663
loss: 0.9664429426193237,grad_norm: 0.7793205727766862, iteration: 435664
loss: 1.0258809328079224,grad_norm: 0.9106302084016041, iteration: 435665
loss: 1.012520432472229,grad_norm: 0.6557419225654726, iteration: 435666
loss: 0.9771866798400879,grad_norm: 0.9610479857782329, iteration: 435667
loss: 0.9888076782226562,grad_norm: 0.7433615130688023, iteration: 435668
loss: 1.010731816291809,grad_norm: 0.8519644697552741, iteration: 435669
loss: 1.013068437576294,grad_norm: 0.999999526141956, iteration: 435670
loss: 1.0178946256637573,grad_norm: 0.8100145792970384, iteration: 435671
loss: 0.9913533329963684,grad_norm: 0.8147687348483761, iteration: 435672
loss: 0.9754456281661987,grad_norm: 0.6540036211179757, iteration: 435673
loss: 1.0006321668624878,grad_norm: 0.8165006197358492, iteration: 435674
loss: 1.0371226072311401,grad_norm: 0.9999992082230569, iteration: 435675
loss: 0.9936369061470032,grad_norm: 0.6832446595301765, iteration: 435676
loss: 0.9904390573501587,grad_norm: 0.9999993017045805, iteration: 435677
loss: 1.0243381261825562,grad_norm: 0.7772132202829417, iteration: 435678
loss: 0.9971621632575989,grad_norm: 0.9999990104096627, iteration: 435679
loss: 0.9963873028755188,grad_norm: 0.6947113699427037, iteration: 435680
loss: 0.9875522255897522,grad_norm: 0.7956763230595933, iteration: 435681
loss: 1.018768548965454,grad_norm: 0.7690720970417434, iteration: 435682
loss: 1.0157932043075562,grad_norm: 0.9999990914186343, iteration: 435683
loss: 0.9693369269371033,grad_norm: 0.8210985373978734, iteration: 435684
loss: 0.9845353960990906,grad_norm: 0.9290523752521702, iteration: 435685
loss: 1.0886973142623901,grad_norm: 0.7621610706834488, iteration: 435686
loss: 0.9918489456176758,grad_norm: 0.7452647289317185, iteration: 435687
loss: 1.0160859823226929,grad_norm: 0.7260840131509784, iteration: 435688
loss: 1.110249400138855,grad_norm: 0.9999995850009961, iteration: 435689
loss: 0.9754390120506287,grad_norm: 0.7188925131733892, iteration: 435690
loss: 0.9983729124069214,grad_norm: 0.7764150217199317, iteration: 435691
loss: 0.9678524136543274,grad_norm: 0.9299587076449892, iteration: 435692
loss: 1.0084739923477173,grad_norm: 0.7048158827109627, iteration: 435693
loss: 1.012544870376587,grad_norm: 0.6852466899110963, iteration: 435694
loss: 0.9943554997444153,grad_norm: 0.8427993164436398, iteration: 435695
loss: 0.9954103827476501,grad_norm: 0.9364823652432906, iteration: 435696
loss: 1.005983591079712,grad_norm: 0.768721088253156, iteration: 435697
loss: 0.9614084362983704,grad_norm: 0.8483732716427872, iteration: 435698
loss: 0.9829676151275635,grad_norm: 0.7804767999983613, iteration: 435699
loss: 0.9877604842185974,grad_norm: 0.7685677262740533, iteration: 435700
loss: 0.9965328574180603,grad_norm: 0.7828421753136262, iteration: 435701
loss: 1.0113834142684937,grad_norm: 0.6939160907888751, iteration: 435702
loss: 1.0531283617019653,grad_norm: 0.8049099033676685, iteration: 435703
loss: 0.9898947477340698,grad_norm: 0.9999998840767523, iteration: 435704
loss: 0.9556201100349426,grad_norm: 0.8591969201632011, iteration: 435705
loss: 1.0464046001434326,grad_norm: 0.7282063751683328, iteration: 435706
loss: 1.0294334888458252,grad_norm: 0.9999990946841604, iteration: 435707
loss: 1.0208629369735718,grad_norm: 0.9999991539800768, iteration: 435708
loss: 1.0091028213500977,grad_norm: 0.7602208875882059, iteration: 435709
loss: 0.9920557141304016,grad_norm: 0.8472477640438856, iteration: 435710
loss: 0.9812540411949158,grad_norm: 0.712218875027223, iteration: 435711
loss: 1.0170090198516846,grad_norm: 0.8659152630417541, iteration: 435712
loss: 1.0252715349197388,grad_norm: 0.8814227920716933, iteration: 435713
loss: 0.9958181977272034,grad_norm: 0.7674092707308652, iteration: 435714
loss: 0.9800062775611877,grad_norm: 0.7312620831400612, iteration: 435715
loss: 1.0206849575042725,grad_norm: 0.7323719213524499, iteration: 435716
loss: 0.9687354564666748,grad_norm: 0.9011019082203713, iteration: 435717
loss: 0.975337564945221,grad_norm: 0.7884869170737033, iteration: 435718
loss: 1.0158170461654663,grad_norm: 0.7188095866165407, iteration: 435719
loss: 1.0235776901245117,grad_norm: 0.8851791715978349, iteration: 435720
loss: 0.9903783202171326,grad_norm: 0.8537499228276048, iteration: 435721
loss: 1.0178486108779907,grad_norm: 0.9999990499777158, iteration: 435722
loss: 1.005361557006836,grad_norm: 0.9584950002986259, iteration: 435723
loss: 1.1353659629821777,grad_norm: 0.9999999770111234, iteration: 435724
loss: 0.9959774017333984,grad_norm: 0.7187462793446803, iteration: 435725
loss: 0.9582256078720093,grad_norm: 0.7873401266360817, iteration: 435726
loss: 0.9892240762710571,grad_norm: 0.7538262753614358, iteration: 435727
loss: 0.9963149428367615,grad_norm: 0.7819659846046386, iteration: 435728
loss: 1.0150887966156006,grad_norm: 0.7386721874500558, iteration: 435729
loss: 0.9747314453125,grad_norm: 0.7303151055968182, iteration: 435730
loss: 1.0090504884719849,grad_norm: 0.9474590573820805, iteration: 435731
loss: 1.0101491212844849,grad_norm: 0.8610490150833435, iteration: 435732
loss: 1.017872929573059,grad_norm: 0.8226138078035911, iteration: 435733
loss: 1.0023356676101685,grad_norm: 0.7805494452204997, iteration: 435734
loss: 1.0206432342529297,grad_norm: 0.724922969691774, iteration: 435735
loss: 1.0273470878601074,grad_norm: 0.8369701705895598, iteration: 435736
loss: 1.0260149240493774,grad_norm: 0.9999993858895456, iteration: 435737
loss: 1.0227738618850708,grad_norm: 0.8822342349589527, iteration: 435738
loss: 1.0150694847106934,grad_norm: 0.6584991513310096, iteration: 435739
loss: 1.028394103050232,grad_norm: 0.7759785715587594, iteration: 435740
loss: 1.0126696825027466,grad_norm: 0.8603335296152437, iteration: 435741
loss: 0.9914302825927734,grad_norm: 0.7625229055637349, iteration: 435742
loss: 1.005500316619873,grad_norm: 0.9999992262563036, iteration: 435743
loss: 0.9828556776046753,grad_norm: 0.9999998931482056, iteration: 435744
loss: 0.9972328543663025,grad_norm: 0.9169410475617745, iteration: 435745
loss: 1.0348588228225708,grad_norm: 0.7157646534844254, iteration: 435746
loss: 1.0238964557647705,grad_norm: 0.7291813551093961, iteration: 435747
loss: 1.0557234287261963,grad_norm: 0.9999992402059413, iteration: 435748
loss: 0.995380699634552,grad_norm: 0.8004421166025976, iteration: 435749
loss: 1.1863617897033691,grad_norm: 1.0000000106441174, iteration: 435750
loss: 1.013737440109253,grad_norm: 0.8799090634220708, iteration: 435751
loss: 1.0072617530822754,grad_norm: 0.8690622338679697, iteration: 435752
loss: 0.9985871315002441,grad_norm: 0.7390916853240427, iteration: 435753
loss: 1.085797905921936,grad_norm: 0.9950320487734137, iteration: 435754
loss: 1.0330328941345215,grad_norm: 0.624493101356831, iteration: 435755
loss: 1.1144696474075317,grad_norm: 0.8317468730256397, iteration: 435756
loss: 0.9757473468780518,grad_norm: 0.749801611900203, iteration: 435757
loss: 0.9650142788887024,grad_norm: 0.7342356367946266, iteration: 435758
loss: 1.0405077934265137,grad_norm: 0.9999998284546536, iteration: 435759
loss: 1.0129427909851074,grad_norm: 0.7440921215776864, iteration: 435760
loss: 0.9733963012695312,grad_norm: 0.7169152688627556, iteration: 435761
loss: 1.000283122062683,grad_norm: 0.7885762588502789, iteration: 435762
loss: 1.0119887590408325,grad_norm: 0.8404490423044543, iteration: 435763
loss: 1.0482122898101807,grad_norm: 0.7200332322535032, iteration: 435764
loss: 0.9904842972755432,grad_norm: 0.8848186856212834, iteration: 435765
loss: 1.0632679462432861,grad_norm: 0.9999997554231211, iteration: 435766
loss: 1.092757225036621,grad_norm: 0.8467468511582806, iteration: 435767
loss: 1.0009982585906982,grad_norm: 0.9999997286836565, iteration: 435768
loss: 1.148776888847351,grad_norm: 0.9999996901322867, iteration: 435769
loss: 0.9985719323158264,grad_norm: 0.698195714005671, iteration: 435770
loss: 1.0154473781585693,grad_norm: 0.658109065248575, iteration: 435771
loss: 0.9961979389190674,grad_norm: 0.6971491887250348, iteration: 435772
loss: 1.00747811794281,grad_norm: 0.8742187263704774, iteration: 435773
loss: 0.9552285671234131,grad_norm: 0.7188397531491778, iteration: 435774
loss: 0.9835426211357117,grad_norm: 0.7629476785789766, iteration: 435775
loss: 1.0063151121139526,grad_norm: 0.9865437095829965, iteration: 435776
loss: 0.9725637435913086,grad_norm: 0.7847728384907097, iteration: 435777
loss: 0.9560054540634155,grad_norm: 0.8070991135341299, iteration: 435778
loss: 0.9926462173461914,grad_norm: 0.7666464319990579, iteration: 435779
loss: 1.0018638372421265,grad_norm: 0.7974336581944733, iteration: 435780
loss: 1.0126162767410278,grad_norm: 0.7494486399680544, iteration: 435781
loss: 1.143362283706665,grad_norm: 0.9999994678292051, iteration: 435782
loss: 0.9988284111022949,grad_norm: 0.7593706299709195, iteration: 435783
loss: 1.0008903741836548,grad_norm: 0.781309683866905, iteration: 435784
loss: 0.9963584542274475,grad_norm: 0.7621984000547724, iteration: 435785
loss: 1.0582677125930786,grad_norm: 0.999999758462658, iteration: 435786
loss: 0.98139888048172,grad_norm: 0.7141036626628061, iteration: 435787
loss: 1.1429684162139893,grad_norm: 0.9999999009957277, iteration: 435788
loss: 1.0767050981521606,grad_norm: 0.7405050245713073, iteration: 435789
loss: 1.1034824848175049,grad_norm: 0.9999998416673105, iteration: 435790
loss: 1.0009820461273193,grad_norm: 0.7448279904286258, iteration: 435791
loss: 1.0122519731521606,grad_norm: 0.875509851222771, iteration: 435792
loss: 1.0570687055587769,grad_norm: 0.6876386247826903, iteration: 435793
loss: 0.9816886782646179,grad_norm: 0.7717966118022521, iteration: 435794
loss: 1.0065195560455322,grad_norm: 0.8435375671517077, iteration: 435795
loss: 1.0316143035888672,grad_norm: 0.7691191620100502, iteration: 435796
loss: 1.0205413103103638,grad_norm: 0.8627171367642519, iteration: 435797
loss: 1.0386223793029785,grad_norm: 0.9999992140256839, iteration: 435798
loss: 1.0046429634094238,grad_norm: 0.6345136302241878, iteration: 435799
loss: 0.9780486226081848,grad_norm: 0.708792309057654, iteration: 435800
loss: 0.9953516125679016,grad_norm: 0.945523605889768, iteration: 435801
loss: 1.010227918624878,grad_norm: 0.8722291194506968, iteration: 435802
loss: 0.9942628741264343,grad_norm: 0.8338614151819284, iteration: 435803
loss: 1.0656781196594238,grad_norm: 0.7801282067509198, iteration: 435804
loss: 1.0796635150909424,grad_norm: 0.768816806056215, iteration: 435805
loss: 1.0543453693389893,grad_norm: 0.9999998603486971, iteration: 435806
loss: 1.0113930702209473,grad_norm: 0.8405419859622456, iteration: 435807
loss: 1.050520896911621,grad_norm: 0.878392688626727, iteration: 435808
loss: 1.032964825630188,grad_norm: 0.7456853858897436, iteration: 435809
loss: 0.9954787492752075,grad_norm: 0.8365225666463784, iteration: 435810
loss: 0.9941690564155579,grad_norm: 0.6911960714477849, iteration: 435811
loss: 1.0198410749435425,grad_norm: 0.9999994202627212, iteration: 435812
loss: 0.9750317931175232,grad_norm: 0.720709461400885, iteration: 435813
loss: 0.9735096096992493,grad_norm: 0.6741638530416081, iteration: 435814
loss: 1.000409483909607,grad_norm: 0.7740898232908303, iteration: 435815
loss: 1.0018306970596313,grad_norm: 0.6878361846501714, iteration: 435816
loss: 1.0552074909210205,grad_norm: 0.9999993630205324, iteration: 435817
loss: 1.0320388078689575,grad_norm: 0.781471108455528, iteration: 435818
loss: 0.9972171783447266,grad_norm: 0.6980122505372638, iteration: 435819
loss: 1.0180115699768066,grad_norm: 0.9000753423470264, iteration: 435820
loss: 0.9916807413101196,grad_norm: 0.7990243839773843, iteration: 435821
loss: 1.129746913909912,grad_norm: 0.9999993757869597, iteration: 435822
loss: 0.949251115322113,grad_norm: 0.6111739310363699, iteration: 435823
loss: 1.0045747756958008,grad_norm: 0.7473241513931055, iteration: 435824
loss: 1.0110112428665161,grad_norm: 0.7761808706288494, iteration: 435825
loss: 1.031506061553955,grad_norm: 0.8061340538256656, iteration: 435826
loss: 0.9743170142173767,grad_norm: 0.7534654638894229, iteration: 435827
loss: 1.0301339626312256,grad_norm: 0.999999537613195, iteration: 435828
loss: 1.0059452056884766,grad_norm: 0.8730933302978002, iteration: 435829
loss: 1.0224310159683228,grad_norm: 0.8517870142837151, iteration: 435830
loss: 1.0138810873031616,grad_norm: 0.8452738131442267, iteration: 435831
loss: 0.9991976618766785,grad_norm: 0.8481541946404488, iteration: 435832
loss: 1.0291314125061035,grad_norm: 0.8047019313918329, iteration: 435833
loss: 0.9845000505447388,grad_norm: 0.907774435691983, iteration: 435834
loss: 0.9966617822647095,grad_norm: 0.7635005011044197, iteration: 435835
loss: 1.0153114795684814,grad_norm: 0.816842650177693, iteration: 435836
loss: 1.0100009441375732,grad_norm: 0.9999990645782663, iteration: 435837
loss: 1.068395972251892,grad_norm: 0.9999992380836458, iteration: 435838
loss: 1.0436581373214722,grad_norm: 0.9999998300499088, iteration: 435839
loss: 0.9840735793113708,grad_norm: 0.7673592316336892, iteration: 435840
loss: 0.9692426919937134,grad_norm: 0.7770178839421669, iteration: 435841
loss: 0.9949752688407898,grad_norm: 0.8057588540590662, iteration: 435842
loss: 0.9567570090293884,grad_norm: 0.7971613469895137, iteration: 435843
loss: 0.9663469195365906,grad_norm: 0.827423448585443, iteration: 435844
loss: 0.9713561534881592,grad_norm: 0.8162732135802161, iteration: 435845
loss: 0.9778775572776794,grad_norm: 0.7262211196918266, iteration: 435846
loss: 1.0889240503311157,grad_norm: 0.999999669722844, iteration: 435847
loss: 0.9992980360984802,grad_norm: 0.8092645212489209, iteration: 435848
loss: 1.021651029586792,grad_norm: 0.6866676011840708, iteration: 435849
loss: 1.0081061124801636,grad_norm: 0.9999993719566922, iteration: 435850
loss: 1.0460948944091797,grad_norm: 0.8192023766005084, iteration: 435851
loss: 1.0367134809494019,grad_norm: 0.8026938975008495, iteration: 435852
loss: 0.9898868203163147,grad_norm: 0.6941579411794332, iteration: 435853
loss: 0.9931908249855042,grad_norm: 0.8249312110692697, iteration: 435854
loss: 1.0061272382736206,grad_norm: 0.5839760556949602, iteration: 435855
loss: 0.994286298751831,grad_norm: 0.7265597177545526, iteration: 435856
loss: 0.9700785279273987,grad_norm: 0.7260026015022377, iteration: 435857
loss: 1.1161752939224243,grad_norm: 0.9999993518617237, iteration: 435858
loss: 0.9963164925575256,grad_norm: 0.7312049733297971, iteration: 435859
loss: 1.0631077289581299,grad_norm: 0.9999992911904176, iteration: 435860
loss: 0.9905973076820374,grad_norm: 0.7359695371899401, iteration: 435861
loss: 1.0171984434127808,grad_norm: 0.999999174739915, iteration: 435862
loss: 1.0022610425949097,grad_norm: 0.7837216720439641, iteration: 435863
loss: 0.9953171014785767,grad_norm: 0.8161341553187927, iteration: 435864
loss: 1.041576623916626,grad_norm: 0.7913311679970152, iteration: 435865
loss: 1.0091500282287598,grad_norm: 0.7489050080587575, iteration: 435866
loss: 1.0262916088104248,grad_norm: 0.7155958249429462, iteration: 435867
loss: 0.9603736996650696,grad_norm: 0.7628377333777682, iteration: 435868
loss: 0.9929047226905823,grad_norm: 0.9622120518364435, iteration: 435869
loss: 0.9744839668273926,grad_norm: 0.9999990493071608, iteration: 435870
loss: 1.1043009757995605,grad_norm: 0.7417995551243329, iteration: 435871
loss: 1.031600832939148,grad_norm: 0.7694655752038202, iteration: 435872
loss: 1.0006458759307861,grad_norm: 0.8849464983913244, iteration: 435873
loss: 1.0005226135253906,grad_norm: 0.7712367200198382, iteration: 435874
loss: 0.9730761647224426,grad_norm: 0.9400197363619026, iteration: 435875
loss: 0.9800903797149658,grad_norm: 0.8114715538347498, iteration: 435876
loss: 1.0245232582092285,grad_norm: 0.9999991525612189, iteration: 435877
loss: 0.9704611301422119,grad_norm: 0.7223891148560334, iteration: 435878
loss: 0.9783582091331482,grad_norm: 0.8345057492310599, iteration: 435879
loss: 1.0207966566085815,grad_norm: 0.999999084067751, iteration: 435880
loss: 0.9934321045875549,grad_norm: 0.8337575964687305, iteration: 435881
loss: 1.0021896362304688,grad_norm: 0.7974800738679197, iteration: 435882
loss: 0.9717097878456116,grad_norm: 0.6916012587941676, iteration: 435883
loss: 1.0209612846374512,grad_norm: 0.7675543810926938, iteration: 435884
loss: 0.9707702994346619,grad_norm: 0.9320068340403964, iteration: 435885
loss: 0.9951928853988647,grad_norm: 0.727582092871973, iteration: 435886
loss: 1.000374436378479,grad_norm: 0.9999998753735936, iteration: 435887
loss: 0.9995080232620239,grad_norm: 0.858524098856356, iteration: 435888
loss: 1.0104506015777588,grad_norm: 0.7019840602197346, iteration: 435889
loss: 0.9682535529136658,grad_norm: 0.8218388316765168, iteration: 435890
loss: 1.030984878540039,grad_norm: 0.9999993466050167, iteration: 435891
loss: 1.0014926195144653,grad_norm: 0.8274592612438384, iteration: 435892
loss: 1.0005022287368774,grad_norm: 0.9032151252917947, iteration: 435893
loss: 1.0346794128417969,grad_norm: 0.6750963501886662, iteration: 435894
loss: 1.0065298080444336,grad_norm: 0.7779112524237721, iteration: 435895
loss: 1.0068954229354858,grad_norm: 0.7236490114487641, iteration: 435896
loss: 1.0229218006134033,grad_norm: 0.7791846966266464, iteration: 435897
loss: 1.0101358890533447,grad_norm: 0.7635963706359051, iteration: 435898
loss: 1.0404846668243408,grad_norm: 0.7947489606883926, iteration: 435899
loss: 0.9859138131141663,grad_norm: 0.9999998455912849, iteration: 435900
loss: 0.9753465056419373,grad_norm: 0.7750998647853516, iteration: 435901
loss: 0.9933068156242371,grad_norm: 0.7066810860924829, iteration: 435902
loss: 0.9746274948120117,grad_norm: 0.7586423284123057, iteration: 435903
loss: 0.9908056855201721,grad_norm: 0.7970176977215411, iteration: 435904
loss: 1.0563968420028687,grad_norm: 0.9999997072597426, iteration: 435905
loss: 0.9623397588729858,grad_norm: 0.8081398010322197, iteration: 435906
loss: 1.1060543060302734,grad_norm: 0.8640874806283597, iteration: 435907
loss: 1.0125367641448975,grad_norm: 0.677508360211427, iteration: 435908
loss: 1.0369309186935425,grad_norm: 0.9999998898233374, iteration: 435909
loss: 1.0023367404937744,grad_norm: 0.9760812172132745, iteration: 435910
loss: 1.0215150117874146,grad_norm: 0.7398026513791817, iteration: 435911
loss: 0.9902401566505432,grad_norm: 0.8400869475007946, iteration: 435912
loss: 0.9982936382293701,grad_norm: 0.8998919786012856, iteration: 435913
loss: 0.989722490310669,grad_norm: 0.7777012686230443, iteration: 435914
loss: 0.9908309578895569,grad_norm: 0.8133119122798345, iteration: 435915
loss: 0.9643486738204956,grad_norm: 0.770558421239734, iteration: 435916
loss: 0.9974822998046875,grad_norm: 0.7201215900428198, iteration: 435917
loss: 0.991371214389801,grad_norm: 0.6751712258454361, iteration: 435918
loss: 0.9894266724586487,grad_norm: 0.8240609700301719, iteration: 435919
loss: 0.9579325318336487,grad_norm: 0.8463327366776879, iteration: 435920
loss: 0.9926302433013916,grad_norm: 0.843892659425474, iteration: 435921
loss: 1.0034267902374268,grad_norm: 0.7319455097265685, iteration: 435922
loss: 0.9828332662582397,grad_norm: 0.7708620973702677, iteration: 435923
loss: 1.0162433385849,grad_norm: 0.7445965565630224, iteration: 435924
loss: 0.9825553894042969,grad_norm: 0.8311749398683758, iteration: 435925
loss: 0.972394585609436,grad_norm: 0.8177207516446526, iteration: 435926
loss: 1.0233241319656372,grad_norm: 0.7149766511012603, iteration: 435927
loss: 0.9780468940734863,grad_norm: 0.9999999406709847, iteration: 435928
loss: 1.0146678686141968,grad_norm: 0.6540350687144808, iteration: 435929
loss: 1.0054243803024292,grad_norm: 0.7547617896682302, iteration: 435930
loss: 1.0054965019226074,grad_norm: 0.8599256991733377, iteration: 435931
loss: 0.9740898013114929,grad_norm: 0.9280697166019002, iteration: 435932
loss: 1.076882004737854,grad_norm: 0.8216214605686356, iteration: 435933
loss: 0.9797928929328918,grad_norm: 0.8784543774424678, iteration: 435934
loss: 1.044481635093689,grad_norm: 0.9667791310493168, iteration: 435935
loss: 1.0163203477859497,grad_norm: 0.6996124969507269, iteration: 435936
loss: 0.9699805378913879,grad_norm: 0.6705941561370048, iteration: 435937
loss: 1.1175706386566162,grad_norm: 0.8321751679844016, iteration: 435938
loss: 0.9598212242126465,grad_norm: 0.8009978379478099, iteration: 435939
loss: 0.9910003542900085,grad_norm: 0.9843179880202455, iteration: 435940
loss: 0.9942041635513306,grad_norm: 0.8291522978828461, iteration: 435941
loss: 1.048933744430542,grad_norm: 0.7705460922496816, iteration: 435942
loss: 0.967948317527771,grad_norm: 0.958278072722138, iteration: 435943
loss: 1.0228146314620972,grad_norm: 0.9999994720333001, iteration: 435944
loss: 1.0062941312789917,grad_norm: 0.9999992027693317, iteration: 435945
loss: 0.9614166021347046,grad_norm: 0.6793711734941806, iteration: 435946
loss: 0.9997084736824036,grad_norm: 0.6767679840467146, iteration: 435947
loss: 1.0053383111953735,grad_norm: 0.7627265882891165, iteration: 435948
loss: 1.0222691297531128,grad_norm: 0.7791249558988869, iteration: 435949
loss: 0.9907283782958984,grad_norm: 0.6470925736180476, iteration: 435950
loss: 0.9892745614051819,grad_norm: 0.760182293659144, iteration: 435951
loss: 1.0239936113357544,grad_norm: 0.9999990462445592, iteration: 435952
loss: 1.0060527324676514,grad_norm: 0.7761993514837107, iteration: 435953
loss: 0.9768751263618469,grad_norm: 0.9194089235762978, iteration: 435954
loss: 1.0176057815551758,grad_norm: 0.7588939074652024, iteration: 435955
loss: 0.9738776087760925,grad_norm: 0.8160959417620529, iteration: 435956
loss: 1.0443658828735352,grad_norm: 0.999999548035207, iteration: 435957
loss: 1.0142462253570557,grad_norm: 0.6855273981207346, iteration: 435958
loss: 1.0092787742614746,grad_norm: 0.598310111402384, iteration: 435959
loss: 0.9686037302017212,grad_norm: 0.7820507175630407, iteration: 435960
loss: 1.0268528461456299,grad_norm: 0.7528187198796434, iteration: 435961
loss: 1.0024479627609253,grad_norm: 0.7057712616835897, iteration: 435962
loss: 1.0127925872802734,grad_norm: 0.8265377376182055, iteration: 435963
loss: 0.9890332221984863,grad_norm: 0.8314031518647782, iteration: 435964
loss: 1.0322551727294922,grad_norm: 0.7708720102989445, iteration: 435965
loss: 0.9980635643005371,grad_norm: 0.6094285941175319, iteration: 435966
loss: 1.0475738048553467,grad_norm: 0.7933418418686884, iteration: 435967
loss: 0.9788154363632202,grad_norm: 0.8366609000170795, iteration: 435968
loss: 1.0054287910461426,grad_norm: 0.7268353656462719, iteration: 435969
loss: 0.9930251836776733,grad_norm: 0.8439535951768662, iteration: 435970
loss: 1.0481246709823608,grad_norm: 0.876662413904995, iteration: 435971
loss: 0.9916794896125793,grad_norm: 0.7046626935349597, iteration: 435972
loss: 1.0243077278137207,grad_norm: 0.8471360991731992, iteration: 435973
loss: 1.0263020992279053,grad_norm: 0.7595126910788293, iteration: 435974
loss: 1.0100513696670532,grad_norm: 0.9999999501882049, iteration: 435975
loss: 0.986434280872345,grad_norm: 0.7785363898746139, iteration: 435976
loss: 1.0011403560638428,grad_norm: 0.9060347602484753, iteration: 435977
loss: 0.9884881973266602,grad_norm: 0.6517132498741963, iteration: 435978
loss: 1.0424656867980957,grad_norm: 0.8106974505728154, iteration: 435979
loss: 0.9926562905311584,grad_norm: 0.7804600251683617, iteration: 435980
loss: 0.9845858216285706,grad_norm: 0.6861885007149501, iteration: 435981
loss: 0.9983755946159363,grad_norm: 0.7814781510263036, iteration: 435982
loss: 1.0780442953109741,grad_norm: 0.8635292420949843, iteration: 435983
loss: 0.9808736443519592,grad_norm: 0.8688428124757477, iteration: 435984
loss: 0.9751293063163757,grad_norm: 0.7767485657177972, iteration: 435985
loss: 0.9894618988037109,grad_norm: 0.7542813124320156, iteration: 435986
loss: 0.9979712963104248,grad_norm: 0.7300766359836188, iteration: 435987
loss: 0.9929127097129822,grad_norm: 0.8132058490148419, iteration: 435988
loss: 0.9773775339126587,grad_norm: 0.8602288624081608, iteration: 435989
loss: 0.9872649312019348,grad_norm: 0.7545726113515919, iteration: 435990
loss: 0.9905553460121155,grad_norm: 1.0000000353804406, iteration: 435991
loss: 0.9867396950721741,grad_norm: 0.88084146030827, iteration: 435992
loss: 1.0493773221969604,grad_norm: 0.9999992934520046, iteration: 435993
loss: 0.9819491505622864,grad_norm: 0.932338465586771, iteration: 435994
loss: 0.9668864607810974,grad_norm: 0.7840857759990051, iteration: 435995
loss: 0.9622007012367249,grad_norm: 0.6572141586102938, iteration: 435996
loss: 1.031680941581726,grad_norm: 0.811569890457517, iteration: 435997
loss: 1.0371192693710327,grad_norm: 0.7454799164525935, iteration: 435998
loss: 0.9570796489715576,grad_norm: 0.7719300917016769, iteration: 435999
loss: 0.9892297387123108,grad_norm: 0.8102752283837099, iteration: 436000
loss: 0.9501360058784485,grad_norm: 0.9996373931769181, iteration: 436001
loss: 1.0699752569198608,grad_norm: 0.9215929953951417, iteration: 436002
loss: 1.0547689199447632,grad_norm: 0.951412514922228, iteration: 436003
loss: 0.9886646866798401,grad_norm: 0.8038880217508462, iteration: 436004
loss: 1.0065977573394775,grad_norm: 0.7805677750533494, iteration: 436005
loss: 0.9753378033638,grad_norm: 0.7672236190843047, iteration: 436006
loss: 1.010180115699768,grad_norm: 0.7515723880522578, iteration: 436007
loss: 1.0052969455718994,grad_norm: 0.7775406527541248, iteration: 436008
loss: 0.9873908162117004,grad_norm: 0.8416316339622822, iteration: 436009
loss: 1.0308709144592285,grad_norm: 0.7876062059032162, iteration: 436010
loss: 0.9839135408401489,grad_norm: 0.7415919693758081, iteration: 436011
loss: 1.0087984800338745,grad_norm: 0.6304659559004617, iteration: 436012
loss: 0.9733536243438721,grad_norm: 0.8448540711003598, iteration: 436013
loss: 0.9655027985572815,grad_norm: 0.919685947799082, iteration: 436014
loss: 0.9719575643539429,grad_norm: 0.8490953422924151, iteration: 436015
loss: 1.0798559188842773,grad_norm: 0.9928071053212049, iteration: 436016
loss: 1.0008985996246338,grad_norm: 0.6250270009735032, iteration: 436017
loss: 0.9880380630493164,grad_norm: 0.8120977678474064, iteration: 436018
loss: 1.0264767408370972,grad_norm: 0.902296304356957, iteration: 436019
loss: 0.9871165156364441,grad_norm: 0.7138064671439797, iteration: 436020
loss: 0.9797759056091309,grad_norm: 0.8039392577860403, iteration: 436021
loss: 0.9592528343200684,grad_norm: 0.9488511798773396, iteration: 436022
loss: 0.9879283905029297,grad_norm: 0.9999990173229463, iteration: 436023
loss: 1.0287657976150513,grad_norm: 0.9436614448284625, iteration: 436024
loss: 1.0600829124450684,grad_norm: 0.8589072436994636, iteration: 436025
loss: 1.0162278413772583,grad_norm: 0.8285740523133615, iteration: 436026
loss: 0.979742169380188,grad_norm: 0.9999991603156791, iteration: 436027
loss: 1.0336872339248657,grad_norm: 0.999999256531987, iteration: 436028
loss: 0.9692857265472412,grad_norm: 0.7326008684480129, iteration: 436029
loss: 0.9960712194442749,grad_norm: 0.9999998207457202, iteration: 436030
loss: 1.0041693449020386,grad_norm: 0.7882785114453787, iteration: 436031
loss: 1.0140684843063354,grad_norm: 0.859829244582436, iteration: 436032
loss: 0.9916638731956482,grad_norm: 0.941708542467935, iteration: 436033
loss: 0.9453845024108887,grad_norm: 0.8742674121543811, iteration: 436034
loss: 1.029599905014038,grad_norm: 0.9369164907894235, iteration: 436035
loss: 1.016542673110962,grad_norm: 0.7801552696438656, iteration: 436036
loss: 1.015769600868225,grad_norm: 0.9035983022550284, iteration: 436037
loss: 0.9678158164024353,grad_norm: 0.7880803655022939, iteration: 436038
loss: 0.9915674328804016,grad_norm: 0.899281061558396, iteration: 436039
loss: 1.0195153951644897,grad_norm: 0.7154315994895659, iteration: 436040
loss: 0.9679001569747925,grad_norm: 0.8002722902745432, iteration: 436041
loss: 1.0160188674926758,grad_norm: 0.757548560272941, iteration: 436042
loss: 1.029273509979248,grad_norm: 0.8205477051326207, iteration: 436043
loss: 0.9911928772926331,grad_norm: 0.7852808264347912, iteration: 436044
loss: 0.984821617603302,grad_norm: 0.7665489941094726, iteration: 436045
loss: 0.9771889448165894,grad_norm: 0.6360250633684239, iteration: 436046
loss: 0.9828569293022156,grad_norm: 0.7714121509719274, iteration: 436047
loss: 1.0348591804504395,grad_norm: 0.8233994561810732, iteration: 436048
loss: 1.066295862197876,grad_norm: 0.8342106723717021, iteration: 436049
loss: 0.9873478412628174,grad_norm: 0.8716861745201346, iteration: 436050
loss: 0.992343008518219,grad_norm: 0.6743337113247152, iteration: 436051
loss: 1.0153366327285767,grad_norm: 0.7168348553538955, iteration: 436052
loss: 1.0219355821609497,grad_norm: 0.6833142434645998, iteration: 436053
loss: 0.9988436102867126,grad_norm: 0.7415026996841215, iteration: 436054
loss: 0.9757927060127258,grad_norm: 0.7973417085657054, iteration: 436055
loss: 1.0769084692001343,grad_norm: 0.9999996642165667, iteration: 436056
loss: 0.9936743974685669,grad_norm: 0.8258985053421725, iteration: 436057
loss: 1.0428228378295898,grad_norm: 0.9177242579845043, iteration: 436058
loss: 1.0016709566116333,grad_norm: 0.9128642956272447, iteration: 436059
loss: 1.0390369892120361,grad_norm: 0.6538520076560517, iteration: 436060
loss: 1.0201784372329712,grad_norm: 0.8821407257024069, iteration: 436061
loss: 0.9923297166824341,grad_norm: 0.7885193420945101, iteration: 436062
loss: 0.9568672180175781,grad_norm: 0.724882591969257, iteration: 436063
loss: 1.0122841596603394,grad_norm: 0.7492237688958345, iteration: 436064
loss: 1.0174113512039185,grad_norm: 0.8808554403256472, iteration: 436065
loss: 0.9965117573738098,grad_norm: 0.7288073118483975, iteration: 436066
loss: 0.9953173398971558,grad_norm: 0.8898777547143469, iteration: 436067
loss: 1.0024999380111694,grad_norm: 0.89499147223463, iteration: 436068
loss: 0.9841718077659607,grad_norm: 0.7878736107469588, iteration: 436069
loss: 0.9790582656860352,grad_norm: 0.6327461445063578, iteration: 436070
loss: 0.9822412133216858,grad_norm: 0.7417681098603798, iteration: 436071
loss: 0.9875468015670776,grad_norm: 0.830935737951064, iteration: 436072
loss: 1.0820400714874268,grad_norm: 0.8185979732531371, iteration: 436073
loss: 0.9938300848007202,grad_norm: 0.6702322995869987, iteration: 436074
loss: 1.0069615840911865,grad_norm: 0.7746259844273811, iteration: 436075
loss: 0.9685501456260681,grad_norm: 0.7881602978446205, iteration: 436076
loss: 1.0466030836105347,grad_norm: 0.8447769706755591, iteration: 436077
loss: 1.0132993459701538,grad_norm: 0.8141949967727924, iteration: 436078
loss: 1.021866798400879,grad_norm: 0.8564662981564529, iteration: 436079
loss: 0.9942706823348999,grad_norm: 0.8486209684746275, iteration: 436080
loss: 0.9865039587020874,grad_norm: 0.7583919721876679, iteration: 436081
loss: 1.0315414667129517,grad_norm: 0.7307025537872436, iteration: 436082
loss: 0.9932146072387695,grad_norm: 0.7624867703133357, iteration: 436083
loss: 0.9918011426925659,grad_norm: 0.8219004302753348, iteration: 436084
loss: 0.974796712398529,grad_norm: 0.9335600270180846, iteration: 436085
loss: 0.9990972280502319,grad_norm: 0.8379359349284131, iteration: 436086
loss: 0.9579918384552002,grad_norm: 0.7740718156408207, iteration: 436087
loss: 1.0372929573059082,grad_norm: 0.7551941333734777, iteration: 436088
loss: 1.0239579677581787,grad_norm: 0.7790121834931497, iteration: 436089
loss: 0.9570611715316772,grad_norm: 0.7023573022953323, iteration: 436090
loss: 0.9830116629600525,grad_norm: 0.683740070391186, iteration: 436091
loss: 1.015913724899292,grad_norm: 0.7762596012991642, iteration: 436092
loss: 0.9764890670776367,grad_norm: 0.6532698979512819, iteration: 436093
loss: 0.9947271943092346,grad_norm: 0.7888375745827486, iteration: 436094
loss: 0.9846681356430054,grad_norm: 0.7194002466700057, iteration: 436095
loss: 0.9961310029029846,grad_norm: 0.9999991968963156, iteration: 436096
loss: 0.9748398065567017,grad_norm: 0.7646699088304649, iteration: 436097
loss: 1.0070409774780273,grad_norm: 0.7458312567734611, iteration: 436098
loss: 1.0063966512680054,grad_norm: 0.8592301612850071, iteration: 436099
loss: 0.970212996006012,grad_norm: 0.7986873463126266, iteration: 436100
loss: 0.9772424101829529,grad_norm: 0.8821652388215834, iteration: 436101
loss: 0.9954399466514587,grad_norm: 0.8270045705351698, iteration: 436102
loss: 0.9967386722564697,grad_norm: 0.999999798530453, iteration: 436103
loss: 1.0021781921386719,grad_norm: 0.9999990573100762, iteration: 436104
loss: 1.0478990077972412,grad_norm: 0.9999994273391614, iteration: 436105
loss: 0.9568129181861877,grad_norm: 0.8205908240685494, iteration: 436106
loss: 0.9854199290275574,grad_norm: 0.746918533073689, iteration: 436107
loss: 0.9996024966239929,grad_norm: 0.8081132521168581, iteration: 436108
loss: 0.9513881206512451,grad_norm: 0.7855934100004975, iteration: 436109
loss: 1.004734754562378,grad_norm: 0.7598372986370435, iteration: 436110
loss: 1.0112842321395874,grad_norm: 0.7906213258199001, iteration: 436111
loss: 1.004117488861084,grad_norm: 0.813917923549005, iteration: 436112
loss: 0.9936158657073975,grad_norm: 0.831930464317605, iteration: 436113
loss: 0.9767734408378601,grad_norm: 0.7593387839774638, iteration: 436114
loss: 0.9775174856185913,grad_norm: 0.9022706806855894, iteration: 436115
loss: 1.0378810167312622,grad_norm: 0.9392839332514216, iteration: 436116
loss: 0.982446551322937,grad_norm: 0.8169885544794967, iteration: 436117
loss: 0.9957143068313599,grad_norm: 0.8912792336946987, iteration: 436118
loss: 1.0052728652954102,grad_norm: 0.8177676499236337, iteration: 436119
loss: 0.9629473090171814,grad_norm: 0.9224365104187364, iteration: 436120
loss: 1.035537838935852,grad_norm: 0.8623232950925419, iteration: 436121
loss: 1.021735429763794,grad_norm: 0.7733408783214804, iteration: 436122
loss: 0.987653374671936,grad_norm: 0.6319338895089517, iteration: 436123
loss: 1.0021036863327026,grad_norm: 0.8623104204928363, iteration: 436124
loss: 0.9955379366874695,grad_norm: 0.9254517497154315, iteration: 436125
loss: 0.9912436604499817,grad_norm: 0.8126670623280652, iteration: 436126
loss: 0.9880270957946777,grad_norm: 0.9224709640691686, iteration: 436127
loss: 1.0791034698486328,grad_norm: 0.7585466177709219, iteration: 436128
loss: 1.0060710906982422,grad_norm: 0.8580116039031069, iteration: 436129
loss: 0.9892223477363586,grad_norm: 0.7840408551239695, iteration: 436130
loss: 1.0011588335037231,grad_norm: 0.6967934793500232, iteration: 436131
loss: 0.9714545011520386,grad_norm: 0.8246521556369125, iteration: 436132
loss: 0.967045783996582,grad_norm: 0.7823012431878729, iteration: 436133
loss: 0.9929513335227966,grad_norm: 0.7669034739578751, iteration: 436134
loss: 1.0151766538619995,grad_norm: 0.8898070964948169, iteration: 436135
loss: 1.022131323814392,grad_norm: 0.7153147805326237, iteration: 436136
loss: 1.0164965391159058,grad_norm: 0.845771024478279, iteration: 436137
loss: 0.9828212857246399,grad_norm: 0.7703367318309815, iteration: 436138
loss: 1.0078413486480713,grad_norm: 0.9999994166378896, iteration: 436139
loss: 1.0144643783569336,grad_norm: 0.7675283097165839, iteration: 436140
loss: 0.9968506693840027,grad_norm: 0.8576625971541842, iteration: 436141
loss: 0.9901179671287537,grad_norm: 0.7692341130619751, iteration: 436142
loss: 1.048340916633606,grad_norm: 0.8291466107253286, iteration: 436143
loss: 1.0274288654327393,grad_norm: 0.6302974047087604, iteration: 436144
loss: 1.0027397871017456,grad_norm: 0.742184677817223, iteration: 436145
loss: 0.9837964177131653,grad_norm: 0.7625574048873365, iteration: 436146
loss: 0.9995042681694031,grad_norm: 0.8387952778040363, iteration: 436147
loss: 0.9541852474212646,grad_norm: 0.7080490571636271, iteration: 436148
loss: 0.9822530746459961,grad_norm: 0.6794788173366749, iteration: 436149
loss: 1.1135973930358887,grad_norm: 0.999999843099062, iteration: 436150
loss: 0.9836870431900024,grad_norm: 0.7257910076471442, iteration: 436151
loss: 0.9644728899002075,grad_norm: 0.7650541454554033, iteration: 436152
loss: 1.0003787279129028,grad_norm: 0.9999999801770737, iteration: 436153
loss: 1.0077451467514038,grad_norm: 0.7117081491871792, iteration: 436154
loss: 0.9774675965309143,grad_norm: 0.919684121070067, iteration: 436155
loss: 0.9552618265151978,grad_norm: 0.7747585484127688, iteration: 436156
loss: 1.0347868204116821,grad_norm: 0.7001140035941139, iteration: 436157
loss: 1.0961878299713135,grad_norm: 0.999999686191769, iteration: 436158
loss: 1.0089792013168335,grad_norm: 0.8493997958286436, iteration: 436159
loss: 0.9671759605407715,grad_norm: 0.7915048837265855, iteration: 436160
loss: 1.0001221895217896,grad_norm: 0.7528496480403674, iteration: 436161
loss: 0.9909983277320862,grad_norm: 0.9879895179379068, iteration: 436162
loss: 0.9926097393035889,grad_norm: 0.6814459489877925, iteration: 436163
loss: 0.9881305694580078,grad_norm: 0.9152149629168556, iteration: 436164
loss: 1.007859468460083,grad_norm: 0.7184324254634792, iteration: 436165
loss: 1.0638110637664795,grad_norm: 0.7985303550657988, iteration: 436166
loss: 1.059081792831421,grad_norm: 0.9999999359423732, iteration: 436167
loss: 1.0253345966339111,grad_norm: 0.8755025901878092, iteration: 436168
loss: 1.0301103591918945,grad_norm: 0.7202230847652961, iteration: 436169
loss: 0.9579696655273438,grad_norm: 0.9562379056020073, iteration: 436170
loss: 0.967832088470459,grad_norm: 0.8543214439784017, iteration: 436171
loss: 1.050533413887024,grad_norm: 0.8285714861487604, iteration: 436172
loss: 0.960040807723999,grad_norm: 0.779980112476155, iteration: 436173
loss: 1.0350191593170166,grad_norm: 0.823001310442121, iteration: 436174
loss: 1.0584033727645874,grad_norm: 0.999999276344207, iteration: 436175
loss: 0.9895855784416199,grad_norm: 0.9037215961661982, iteration: 436176
loss: 0.987056314945221,grad_norm: 0.9999990775730804, iteration: 436177
loss: 1.0176427364349365,grad_norm: 0.7400227547767694, iteration: 436178
loss: 0.9743773341178894,grad_norm: 0.9999998455795968, iteration: 436179
loss: 1.060535192489624,grad_norm: 0.6672937898076091, iteration: 436180
loss: 1.0015687942504883,grad_norm: 0.8626024029987579, iteration: 436181
loss: 1.0252193212509155,grad_norm: 0.8182933234686749, iteration: 436182
loss: 0.9341132640838623,grad_norm: 0.7971943734796555, iteration: 436183
loss: 1.0064092874526978,grad_norm: 0.7877706406832353, iteration: 436184
loss: 1.0716077089309692,grad_norm: 0.9999997259993778, iteration: 436185
loss: 1.0456067323684692,grad_norm: 0.7781313077332837, iteration: 436186
loss: 0.9820327758789062,grad_norm: 0.7704554751856708, iteration: 436187
loss: 0.9988741278648376,grad_norm: 0.8920854210439857, iteration: 436188
loss: 1.0311578512191772,grad_norm: 0.7363485667926917, iteration: 436189
loss: 0.9958937764167786,grad_norm: 0.6778724032959652, iteration: 436190
loss: 1.0223149061203003,grad_norm: 0.9284953327685164, iteration: 436191
loss: 1.006312370300293,grad_norm: 0.7789353487811561, iteration: 436192
loss: 1.0017449855804443,grad_norm: 0.8449343391308372, iteration: 436193
loss: 1.009190320968628,grad_norm: 0.9355933862683399, iteration: 436194
loss: 1.0312730073928833,grad_norm: 0.7128058524208183, iteration: 436195
loss: 1.045803427696228,grad_norm: 0.9999989852940123, iteration: 436196
loss: 1.0125830173492432,grad_norm: 0.9999992470387251, iteration: 436197
loss: 0.987914502620697,grad_norm: 0.7200916602127762, iteration: 436198
loss: 1.145128607749939,grad_norm: 0.8241375350892546, iteration: 436199
loss: 1.0239999294281006,grad_norm: 0.9999997466305367, iteration: 436200
loss: 0.9393559098243713,grad_norm: 0.6924652757229914, iteration: 436201
loss: 1.061983585357666,grad_norm: 0.9999990547026433, iteration: 436202
loss: 0.9988784193992615,grad_norm: 0.6339964212657364, iteration: 436203
loss: 1.0153526067733765,grad_norm: 0.6286736061165513, iteration: 436204
loss: 1.0148282051086426,grad_norm: 0.6346033449131071, iteration: 436205
loss: 1.044399619102478,grad_norm: 0.8490756414436746, iteration: 436206
loss: 1.0048233270645142,grad_norm: 0.7491838560990355, iteration: 436207
loss: 1.0131198167800903,grad_norm: 0.9351121214310011, iteration: 436208
loss: 1.0574723482131958,grad_norm: 0.7481578469998817, iteration: 436209
loss: 1.0200375318527222,grad_norm: 0.8588683411925154, iteration: 436210
loss: 1.0167031288146973,grad_norm: 0.9999988586999421, iteration: 436211
loss: 0.9748203754425049,grad_norm: 0.6966536754759132, iteration: 436212
loss: 1.0209038257598877,grad_norm: 0.9999990862679492, iteration: 436213
loss: 0.9947133660316467,grad_norm: 0.9004180321237646, iteration: 436214
loss: 1.0013755559921265,grad_norm: 0.9076354329449939, iteration: 436215
loss: 0.9840367436408997,grad_norm: 0.9216171934704821, iteration: 436216
loss: 0.9477430582046509,grad_norm: 0.8314124739053089, iteration: 436217
loss: 0.9906920194625854,grad_norm: 0.735442879913406, iteration: 436218
loss: 1.0004855394363403,grad_norm: 0.7897018483759652, iteration: 436219
loss: 1.0580544471740723,grad_norm: 0.836085440636626, iteration: 436220
loss: 0.9644080996513367,grad_norm: 0.8911567846170779, iteration: 436221
loss: 1.031602382659912,grad_norm: 0.9999996477089337, iteration: 436222
loss: 1.0709115266799927,grad_norm: 0.8006495285159022, iteration: 436223
loss: 0.9825981855392456,grad_norm: 0.7919458345800844, iteration: 436224
loss: 1.0452333688735962,grad_norm: 0.8252080758397209, iteration: 436225
loss: 1.027276635169983,grad_norm: 0.9999991232915878, iteration: 436226
loss: 1.0134022235870361,grad_norm: 0.8434658966283994, iteration: 436227
loss: 1.2127597332000732,grad_norm: 0.9999997436684978, iteration: 436228
loss: 0.9828140139579773,grad_norm: 0.7718493774680958, iteration: 436229
loss: 0.9773998856544495,grad_norm: 0.8131879048854869, iteration: 436230
loss: 0.9995452165603638,grad_norm: 0.8941149190002704, iteration: 436231
loss: 1.0358999967575073,grad_norm: 0.7451831069402637, iteration: 436232
loss: 0.9855037927627563,grad_norm: 0.7952222107389396, iteration: 436233
loss: 1.0939239263534546,grad_norm: 0.8651846243582104, iteration: 436234
loss: 0.9824451208114624,grad_norm: 0.7830239468830102, iteration: 436235
loss: 1.0039329528808594,grad_norm: 0.6801828102608214, iteration: 436236
loss: 1.0786408185958862,grad_norm: 0.9999999574196153, iteration: 436237
loss: 1.0208401679992676,grad_norm: 0.780721478094029, iteration: 436238
loss: 1.0472244024276733,grad_norm: 0.8291634168842852, iteration: 436239
loss: 0.9976963400840759,grad_norm: 0.6902995502678079, iteration: 436240
loss: 1.0278083086013794,grad_norm: 0.6761877925522429, iteration: 436241
loss: 1.017634630203247,grad_norm: 0.9999993652600302, iteration: 436242
loss: 1.008866548538208,grad_norm: 0.6770820702861489, iteration: 436243
loss: 1.0462898015975952,grad_norm: 0.8405585987391669, iteration: 436244
loss: 0.9864123463630676,grad_norm: 0.8225582822122001, iteration: 436245
loss: 1.0069972276687622,grad_norm: 0.8906030689023524, iteration: 436246
loss: 1.0735257863998413,grad_norm: 0.9999994129043418, iteration: 436247
loss: 1.0153617858886719,grad_norm: 0.9999991236299131, iteration: 436248
loss: 1.0032248497009277,grad_norm: 0.9999989660628206, iteration: 436249
loss: 1.0432034730911255,grad_norm: 0.7643151648130245, iteration: 436250
loss: 1.0583953857421875,grad_norm: 0.7981705617371556, iteration: 436251
loss: 1.0636776685714722,grad_norm: 0.7630803352354222, iteration: 436252
loss: 1.1158047914505005,grad_norm: 0.9935264703818808, iteration: 436253
loss: 1.073357105255127,grad_norm: 0.9999996424650315, iteration: 436254
loss: 1.0122897624969482,grad_norm: 0.7098077055067349, iteration: 436255
loss: 0.9938241839408875,grad_norm: 0.783625137861938, iteration: 436256
loss: 1.0353187322616577,grad_norm: 0.96903416925802, iteration: 436257
loss: 1.0632351636886597,grad_norm: 0.9999995402264128, iteration: 436258
loss: 1.01461923122406,grad_norm: 0.7056903792661536, iteration: 436259
loss: 1.0440088510513306,grad_norm: 0.8100508632614644, iteration: 436260
loss: 1.087583065032959,grad_norm: 0.9999992906615606, iteration: 436261
loss: 1.0111877918243408,grad_norm: 0.790195451892698, iteration: 436262
loss: 1.0561165809631348,grad_norm: 0.8663394437916457, iteration: 436263
loss: 1.0793803930282593,grad_norm: 0.7582043934990738, iteration: 436264
loss: 1.020148515701294,grad_norm: 0.8003960190652945, iteration: 436265
loss: 0.971619725227356,grad_norm: 0.7775751182844517, iteration: 436266
loss: 1.020577311515808,grad_norm: 0.693248971155564, iteration: 436267
loss: 1.0452172756195068,grad_norm: 0.8329273468714673, iteration: 436268
loss: 1.0056997537612915,grad_norm: 0.9600762598871594, iteration: 436269
loss: 1.0232068300247192,grad_norm: 0.9999994098459012, iteration: 436270
loss: 1.0415370464324951,grad_norm: 0.9999993211010784, iteration: 436271
loss: 1.0212146043777466,grad_norm: 0.8090854958488155, iteration: 436272
loss: 1.0256279706954956,grad_norm: 0.8369444190446619, iteration: 436273
loss: 0.9770938754081726,grad_norm: 0.6036247158764791, iteration: 436274
loss: 1.052355170249939,grad_norm: 0.8371741098784548, iteration: 436275
loss: 0.9613469243049622,grad_norm: 0.7541532750118297, iteration: 436276
loss: 1.0071306228637695,grad_norm: 0.7227345566669569, iteration: 436277
loss: 0.9663011431694031,grad_norm: 0.7835205931220633, iteration: 436278
loss: 1.3106733560562134,grad_norm: 0.9999994395238145, iteration: 436279
loss: 0.9894975423812866,grad_norm: 0.7163883110189516, iteration: 436280
loss: 1.0240520238876343,grad_norm: 0.6622228611671633, iteration: 436281
loss: 0.9911372065544128,grad_norm: 0.6295703587282472, iteration: 436282
loss: 0.99920254945755,grad_norm: 0.8744877954452962, iteration: 436283
loss: 1.0091216564178467,grad_norm: 0.7567543526517083, iteration: 436284
loss: 0.9875651597976685,grad_norm: 0.9155534851629786, iteration: 436285
loss: 0.9940905570983887,grad_norm: 0.9999996289610492, iteration: 436286
loss: 0.9998560547828674,grad_norm: 0.8487154191787868, iteration: 436287
loss: 0.986945390701294,grad_norm: 0.7614817029516705, iteration: 436288
loss: 1.0102410316467285,grad_norm: 0.9999998282576927, iteration: 436289
loss: 1.0505239963531494,grad_norm: 0.6531929080261893, iteration: 436290
loss: 0.9812381863594055,grad_norm: 0.6892994896858663, iteration: 436291
loss: 1.012384295463562,grad_norm: 0.9515519885007274, iteration: 436292
loss: 0.9843544960021973,grad_norm: 0.7445754790509511, iteration: 436293
loss: 0.9761384725570679,grad_norm: 0.904241844158647, iteration: 436294
loss: 1.0423345565795898,grad_norm: 0.981862442954214, iteration: 436295
loss: 0.9596045017242432,grad_norm: 0.9999995205050257, iteration: 436296
loss: 0.9638475775718689,grad_norm: 0.7706819451605655, iteration: 436297
loss: 0.9766349196434021,grad_norm: 0.6643252634086887, iteration: 436298
loss: 1.0123292207717896,grad_norm: 0.9865101946846595, iteration: 436299
loss: 0.9701529741287231,grad_norm: 0.8342435888994452, iteration: 436300
loss: 1.014272689819336,grad_norm: 0.8037306211968767, iteration: 436301
loss: 0.9489534497261047,grad_norm: 0.8092379800169262, iteration: 436302
loss: 1.0138864517211914,grad_norm: 0.9999991443144047, iteration: 436303
loss: 0.9710062742233276,grad_norm: 0.8055375001860317, iteration: 436304
loss: 0.9706069827079773,grad_norm: 0.7537960373726292, iteration: 436305
loss: 0.9897415637969971,grad_norm: 0.8723581440427295, iteration: 436306
loss: 1.009352207183838,grad_norm: 0.9972954602447168, iteration: 436307
loss: 0.9817518591880798,grad_norm: 0.7858221488793247, iteration: 436308
loss: 1.0848381519317627,grad_norm: 0.6214000251447064, iteration: 436309
loss: 1.053963303565979,grad_norm: 0.8124983640378687, iteration: 436310
loss: 0.9831188321113586,grad_norm: 0.8223125526311796, iteration: 436311
loss: 0.9894192814826965,grad_norm: 0.8163471146807331, iteration: 436312
loss: 1.0361467599868774,grad_norm: 0.7423738923574978, iteration: 436313
loss: 1.0907145738601685,grad_norm: 0.9999996161261772, iteration: 436314
loss: 1.0080559253692627,grad_norm: 0.606019445269622, iteration: 436315
loss: 1.015136480331421,grad_norm: 0.9999999528435335, iteration: 436316
loss: 0.978945255279541,grad_norm: 0.7836368127549382, iteration: 436317
loss: 1.0664088726043701,grad_norm: 0.9999992195464381, iteration: 436318
loss: 1.0815232992172241,grad_norm: 0.7825181918315794, iteration: 436319
loss: 0.9841246008872986,grad_norm: 0.681318655797008, iteration: 436320
loss: 1.0003407001495361,grad_norm: 0.8306268127121248, iteration: 436321
loss: 1.0400792360305786,grad_norm: 0.9999992179833774, iteration: 436322
loss: 1.02701997756958,grad_norm: 0.7604539941913779, iteration: 436323
loss: 0.9422626495361328,grad_norm: 0.8689431535679557, iteration: 436324
loss: 1.0318593978881836,grad_norm: 0.7203301042497411, iteration: 436325
loss: 1.112622618675232,grad_norm: 0.9999991538197868, iteration: 436326
loss: 0.9721373319625854,grad_norm: 0.7174277830143403, iteration: 436327
loss: 1.0482085943222046,grad_norm: 0.8707902541756997, iteration: 436328
loss: 1.0161664485931396,grad_norm: 0.7779103281501708, iteration: 436329
loss: 1.0415217876434326,grad_norm: 0.9528844085389999, iteration: 436330
loss: 1.016031265258789,grad_norm: 0.7437558524979614, iteration: 436331
loss: 1.080881953239441,grad_norm: 0.8784451614880754, iteration: 436332
loss: 0.9989916086196899,grad_norm: 0.729572607980613, iteration: 436333
loss: 1.0675572156906128,grad_norm: 0.9999994227896534, iteration: 436334
loss: 1.0044704675674438,grad_norm: 0.7288616742354646, iteration: 436335
loss: 1.0481317043304443,grad_norm: 0.9999990887792948, iteration: 436336
loss: 1.005478858947754,grad_norm: 0.7724252626021016, iteration: 436337
loss: 1.0098822116851807,grad_norm: 0.8286460689992856, iteration: 436338
loss: 0.9844047427177429,grad_norm: 0.70663100362858, iteration: 436339
loss: 1.1566005945205688,grad_norm: 0.9999996965233303, iteration: 436340
loss: 1.1844781637191772,grad_norm: 0.9999989572479367, iteration: 436341
loss: 1.0141916275024414,grad_norm: 0.7717346052278851, iteration: 436342
loss: 1.0845102071762085,grad_norm: 0.9999999850524093, iteration: 436343
loss: 0.9900106191635132,grad_norm: 0.655741370640596, iteration: 436344
loss: 1.0277938842773438,grad_norm: 0.9999990782684565, iteration: 436345
loss: 1.0002835988998413,grad_norm: 0.8351840920541115, iteration: 436346
loss: 0.9869230389595032,grad_norm: 0.9999990181285495, iteration: 436347
loss: 1.058609962463379,grad_norm: 0.8667127667209605, iteration: 436348
loss: 1.006346583366394,grad_norm: 0.9596524471410441, iteration: 436349
loss: 1.0284565687179565,grad_norm: 0.9402641723174984, iteration: 436350
loss: 1.0589842796325684,grad_norm: 0.9999995630926017, iteration: 436351
loss: 1.047477126121521,grad_norm: 0.8007605517475249, iteration: 436352
loss: 1.0602049827575684,grad_norm: 0.982812668339615, iteration: 436353
loss: 0.9669902324676514,grad_norm: 0.8713496511538894, iteration: 436354
loss: 0.9826251268386841,grad_norm: 0.6998584724099597, iteration: 436355
loss: 1.0240843296051025,grad_norm: 0.7271828956890606, iteration: 436356
loss: 1.0767771005630493,grad_norm: 0.9999993561732401, iteration: 436357
loss: 1.0864745378494263,grad_norm: 0.7894437428828861, iteration: 436358
loss: 1.0766315460205078,grad_norm: 0.9999995044698642, iteration: 436359
loss: 1.0337618589401245,grad_norm: 0.7837934624973777, iteration: 436360
loss: 1.0325438976287842,grad_norm: 0.7386425361183742, iteration: 436361
loss: 0.9704744219779968,grad_norm: 0.8459846189377805, iteration: 436362
loss: 0.999825656414032,grad_norm: 0.7411029008181208, iteration: 436363
loss: 1.0088759660720825,grad_norm: 0.7154533468785131, iteration: 436364
loss: 1.0545307397842407,grad_norm: 0.9827621691390981, iteration: 436365
loss: 1.1130521297454834,grad_norm: 0.7776315515495409, iteration: 436366
loss: 1.0265673398971558,grad_norm: 0.7157042631745191, iteration: 436367
loss: 1.0704036951065063,grad_norm: 0.9999990276491086, iteration: 436368
loss: 1.0308188199996948,grad_norm: 0.7986485653868401, iteration: 436369
loss: 1.0818582773208618,grad_norm: 0.9021084663300653, iteration: 436370
loss: 1.0192078351974487,grad_norm: 0.9999991271354328, iteration: 436371
loss: 1.0223655700683594,grad_norm: 0.7941983103848566, iteration: 436372
loss: 1.0231602191925049,grad_norm: 0.8519885548232822, iteration: 436373
loss: 0.9933309555053711,grad_norm: 0.8959741370733365, iteration: 436374
loss: 1.1397154331207275,grad_norm: 0.9999996761608352, iteration: 436375
loss: 0.9765651226043701,grad_norm: 0.7478009073225309, iteration: 436376
loss: 1.115472674369812,grad_norm: 0.8316587095583228, iteration: 436377
loss: 1.032553791999817,grad_norm: 0.775920032772906, iteration: 436378
loss: 0.9939175248146057,grad_norm: 0.8574947988636534, iteration: 436379
loss: 1.0327483415603638,grad_norm: 0.9999991181913899, iteration: 436380
loss: 1.0951316356658936,grad_norm: 0.936966209467712, iteration: 436381
loss: 1.0932973623275757,grad_norm: 0.9999991427106869, iteration: 436382
loss: 1.0324965715408325,grad_norm: 0.8103373183177921, iteration: 436383
loss: 0.985510528087616,grad_norm: 0.790339167083226, iteration: 436384
loss: 1.141478419303894,grad_norm: 0.9999995753007375, iteration: 436385
loss: 1.0054173469543457,grad_norm: 0.8254811911630175, iteration: 436386
loss: 1.0480526685714722,grad_norm: 0.8072273313383257, iteration: 436387
loss: 0.9646425843238831,grad_norm: 0.6808727779959401, iteration: 436388
loss: 1.041872501373291,grad_norm: 0.999999207915828, iteration: 436389
loss: 0.9631283283233643,grad_norm: 0.9899795117555658, iteration: 436390
loss: 1.036482810974121,grad_norm: 0.9864976644008814, iteration: 436391
loss: 0.9792395234107971,grad_norm: 0.9187006528662393, iteration: 436392
loss: 1.0411328077316284,grad_norm: 0.8396840766838548, iteration: 436393
loss: 1.0717878341674805,grad_norm: 0.9090905294520593, iteration: 436394
loss: 1.0487878322601318,grad_norm: 0.8596987599166291, iteration: 436395
loss: 1.0631364583969116,grad_norm: 0.8924307603242977, iteration: 436396
loss: 1.0179275274276733,grad_norm: 0.7912934113852443, iteration: 436397
loss: 0.9702091217041016,grad_norm: 0.8205583749916017, iteration: 436398
loss: 1.0072115659713745,grad_norm: 0.8111400677879959, iteration: 436399
loss: 1.0288949012756348,grad_norm: 0.8436949046860166, iteration: 436400
loss: 1.0041849613189697,grad_norm: 0.6691742657456804, iteration: 436401
loss: 1.0890015363693237,grad_norm: 0.8900821287017304, iteration: 436402
loss: 1.2045499086380005,grad_norm: 0.8727345886168123, iteration: 436403
loss: 1.0060151815414429,grad_norm: 0.9106695649487319, iteration: 436404
loss: 0.9771963357925415,grad_norm: 0.9999991669952384, iteration: 436405
loss: 1.0151299238204956,grad_norm: 0.9999999329084678, iteration: 436406
loss: 1.0738072395324707,grad_norm: 0.9999998318026216, iteration: 436407
loss: 0.9936974048614502,grad_norm: 0.7617963214959521, iteration: 436408
loss: 0.985664427280426,grad_norm: 0.7958708977856762, iteration: 436409
loss: 1.0620019435882568,grad_norm: 0.9999998225933866, iteration: 436410
loss: 1.0377559661865234,grad_norm: 0.9999993335176474, iteration: 436411
loss: 1.1873114109039307,grad_norm: 0.9999991386851292, iteration: 436412
loss: 1.0058444738388062,grad_norm: 0.9999990619359226, iteration: 436413
loss: 0.9525254368782043,grad_norm: 0.7375606416429926, iteration: 436414
loss: 1.0287673473358154,grad_norm: 0.9059973094673993, iteration: 436415
loss: 1.0259158611297607,grad_norm: 0.9246993834965324, iteration: 436416
loss: 0.974997878074646,grad_norm: 0.587788608003254, iteration: 436417
loss: 1.0136449337005615,grad_norm: 0.823260462543756, iteration: 436418
loss: 1.0758264064788818,grad_norm: 0.7568323899308007, iteration: 436419
loss: 1.0110745429992676,grad_norm: 0.7627261268850805, iteration: 436420
loss: 0.9893160462379456,grad_norm: 0.7762415797762162, iteration: 436421
loss: 1.051983118057251,grad_norm: 0.9012996080953688, iteration: 436422
loss: 1.1396101713180542,grad_norm: 0.9999992170141653, iteration: 436423
loss: 1.0661970376968384,grad_norm: 0.8522159346105627, iteration: 436424
loss: 1.0080599784851074,grad_norm: 0.7612015560156133, iteration: 436425
loss: 0.9922557473182678,grad_norm: 0.9999990938883812, iteration: 436426
loss: 1.0087882280349731,grad_norm: 0.9999992093006131, iteration: 436427
loss: 1.0596867799758911,grad_norm: 0.849962555267544, iteration: 436428
loss: 1.043778657913208,grad_norm: 0.927905450701453, iteration: 436429
loss: 1.0198172330856323,grad_norm: 0.7696132862613927, iteration: 436430
loss: 0.9787101149559021,grad_norm: 0.9999999104192561, iteration: 436431
loss: 1.021799087524414,grad_norm: 0.7595741120361171, iteration: 436432
loss: 1.0075870752334595,grad_norm: 0.6982142005895173, iteration: 436433
loss: 0.9868811964988708,grad_norm: 0.7682559820467889, iteration: 436434
loss: 1.0656734704971313,grad_norm: 0.9999996191730336, iteration: 436435
loss: 1.005903720855713,grad_norm: 0.782943044483159, iteration: 436436
loss: 0.997189998626709,grad_norm: 0.7798922116732667, iteration: 436437
loss: 1.011983036994934,grad_norm: 0.9999993398177814, iteration: 436438
loss: 1.0294479131698608,grad_norm: 0.8412792701065158, iteration: 436439
loss: 1.0132092237472534,grad_norm: 0.7397822984721839, iteration: 436440
loss: 0.9955813884735107,grad_norm: 0.8084285147519301, iteration: 436441
loss: 1.0470112562179565,grad_norm: 0.9999994152893223, iteration: 436442
loss: 1.051842451095581,grad_norm: 0.8101327431727067, iteration: 436443
loss: 1.0121515989303589,grad_norm: 0.9732355557240449, iteration: 436444
loss: 0.9980093836784363,grad_norm: 0.7188835763531327, iteration: 436445
loss: 1.1346858739852905,grad_norm: 0.9999995459997175, iteration: 436446
loss: 1.023823618888855,grad_norm: 0.7393407700017176, iteration: 436447
loss: 0.9849368333816528,grad_norm: 0.9999991108221522, iteration: 436448
loss: 0.9973235726356506,grad_norm: 0.8133840886333691, iteration: 436449
loss: 0.9921460151672363,grad_norm: 0.9999991204253549, iteration: 436450
loss: 1.0636526346206665,grad_norm: 0.7679807289555093, iteration: 436451
loss: 1.034069299697876,grad_norm: 0.718154631620545, iteration: 436452
loss: 1.0383442640304565,grad_norm: 0.9999995952945834, iteration: 436453
loss: 0.99343341588974,grad_norm: 0.6552750328457321, iteration: 436454
loss: 1.0031613111495972,grad_norm: 0.7882625765439253, iteration: 436455
loss: 0.9678052067756653,grad_norm: 0.9240821519040008, iteration: 436456
loss: 1.0630853176116943,grad_norm: 0.8080513032479203, iteration: 436457
loss: 1.00916588306427,grad_norm: 0.670791006633213, iteration: 436458
loss: 1.013841152191162,grad_norm: 0.949021896272451, iteration: 436459
loss: 1.0277196168899536,grad_norm: 0.9999998821565211, iteration: 436460
loss: 0.9921629428863525,grad_norm: 0.7925010797771459, iteration: 436461
loss: 1.0027718544006348,grad_norm: 0.8436060023384869, iteration: 436462
loss: 1.1081717014312744,grad_norm: 0.9999993898480019, iteration: 436463
loss: 0.9375991225242615,grad_norm: 0.8027383130303136, iteration: 436464
loss: 0.9918233156204224,grad_norm: 0.7288124991109357, iteration: 436465
loss: 1.0318282842636108,grad_norm: 0.7961566159592592, iteration: 436466
loss: 0.998466968536377,grad_norm: 0.7799366637202375, iteration: 436467
loss: 1.0185059309005737,grad_norm: 0.8832325974869683, iteration: 436468
loss: 0.9738979935646057,grad_norm: 0.7391776084928479, iteration: 436469
loss: 0.9800294041633606,grad_norm: 0.9999991191406279, iteration: 436470
loss: 0.9923868179321289,grad_norm: 0.775173305881619, iteration: 436471
loss: 1.0544404983520508,grad_norm: 0.8934347455605387, iteration: 436472
loss: 1.0034739971160889,grad_norm: 0.9999998317219554, iteration: 436473
loss: 1.0770394802093506,grad_norm: 0.9999994612829605, iteration: 436474
loss: 1.0045945644378662,grad_norm: 0.7309700545407061, iteration: 436475
loss: 1.0260746479034424,grad_norm: 0.8763746092994724, iteration: 436476
loss: 1.0696924924850464,grad_norm: 0.7392001349500698, iteration: 436477
loss: 0.9891451597213745,grad_norm: 0.7344208725873614, iteration: 436478
loss: 0.9748584628105164,grad_norm: 0.9999999181029807, iteration: 436479
loss: 1.0450685024261475,grad_norm: 0.9082554750850468, iteration: 436480
loss: 1.0089614391326904,grad_norm: 0.7597695310546827, iteration: 436481
loss: 1.017025351524353,grad_norm: 0.7964869238680586, iteration: 436482
loss: 1.0764049291610718,grad_norm: 0.999999040552179, iteration: 436483
loss: 1.0039749145507812,grad_norm: 0.7504696301344891, iteration: 436484
loss: 0.9972420334815979,grad_norm: 0.8063038243573775, iteration: 436485
loss: 1.003052830696106,grad_norm: 0.6626279407005553, iteration: 436486
loss: 1.0237922668457031,grad_norm: 0.8633246117270006, iteration: 436487
loss: 0.9979400634765625,grad_norm: 0.6909112149119103, iteration: 436488
loss: 1.0199860334396362,grad_norm: 0.9102097348322803, iteration: 436489
loss: 1.018384575843811,grad_norm: 0.7520528014889623, iteration: 436490
loss: 1.0132230520248413,grad_norm: 0.8189829647423326, iteration: 436491
loss: 1.0067368745803833,grad_norm: 0.7728453222742148, iteration: 436492
loss: 1.0767571926116943,grad_norm: 0.9999999778623782, iteration: 436493
loss: 0.9879656434059143,grad_norm: 0.8046472701578555, iteration: 436494
loss: 1.0093456506729126,grad_norm: 0.7947055419385064, iteration: 436495
loss: 1.0684537887573242,grad_norm: 0.8756352101961354, iteration: 436496
loss: 1.1337158679962158,grad_norm: 0.9999997226819526, iteration: 436497
loss: 1.0047487020492554,grad_norm: 0.75323398731767, iteration: 436498
loss: 0.9839545488357544,grad_norm: 0.9999997740611956, iteration: 436499
loss: 1.0040948390960693,grad_norm: 0.7430380785737871, iteration: 436500
loss: 0.9312871098518372,grad_norm: 0.7753445847465428, iteration: 436501
loss: 1.0150084495544434,grad_norm: 0.784426848531947, iteration: 436502
loss: 1.0134825706481934,grad_norm: 0.744313913214486, iteration: 436503
loss: 1.0135188102722168,grad_norm: 0.9773445299475254, iteration: 436504
loss: 0.9817023277282715,grad_norm: 0.8398402647671708, iteration: 436505
loss: 1.0042346715927124,grad_norm: 0.8504588978103056, iteration: 436506
loss: 0.9948026537895203,grad_norm: 0.7769257464547755, iteration: 436507
loss: 0.9973966479301453,grad_norm: 0.7964285582773318, iteration: 436508
loss: 1.1701714992523193,grad_norm: 0.9999997380800392, iteration: 436509
loss: 1.0396865606307983,grad_norm: 0.9999992816419591, iteration: 436510
loss: 1.0823466777801514,grad_norm: 0.9999992214469142, iteration: 436511
loss: 0.972135066986084,grad_norm: 0.6897245586142101, iteration: 436512
loss: 1.0446845293045044,grad_norm: 0.8317116229846861, iteration: 436513
loss: 1.0421916246414185,grad_norm: 0.9999996990301167, iteration: 436514
loss: 1.0032556056976318,grad_norm: 0.7652153514190053, iteration: 436515
loss: 1.105795979499817,grad_norm: 0.9999999963831568, iteration: 436516
loss: 0.9887217283248901,grad_norm: 0.9999998176301084, iteration: 436517
loss: 1.0155346393585205,grad_norm: 0.9999992327966306, iteration: 436518
loss: 0.9785109758377075,grad_norm: 0.7757512122182928, iteration: 436519
loss: 0.9888010621070862,grad_norm: 0.9594996950069807, iteration: 436520
loss: 1.2192307710647583,grad_norm: 0.9999997802069233, iteration: 436521
loss: 1.0768800973892212,grad_norm: 0.9999991857088172, iteration: 436522
loss: 0.9990631341934204,grad_norm: 0.8430489484077801, iteration: 436523
loss: 1.0036207437515259,grad_norm: 0.7927629485264867, iteration: 436524
loss: 0.9980324506759644,grad_norm: 0.9161673895763799, iteration: 436525
loss: 1.0771803855895996,grad_norm: 0.6945871248214706, iteration: 436526
loss: 1.042698621749878,grad_norm: 0.9999998837051919, iteration: 436527
loss: 1.177310824394226,grad_norm: 0.9999991431317667, iteration: 436528
loss: 1.0410091876983643,grad_norm: 0.8637525758342244, iteration: 436529
loss: 0.9793344140052795,grad_norm: 0.7359161776655281, iteration: 436530
loss: 1.0104470252990723,grad_norm: 0.8851327223904546, iteration: 436531
loss: 0.9966222047805786,grad_norm: 0.8171848844171082, iteration: 436532
loss: 1.0485671758651733,grad_norm: 0.8190531415558665, iteration: 436533
loss: 0.9972056746482849,grad_norm: 0.7381128333586943, iteration: 436534
loss: 1.0221298933029175,grad_norm: 0.7134498196950686, iteration: 436535
loss: 0.9959113597869873,grad_norm: 0.8864710378612026, iteration: 436536
loss: 1.0172178745269775,grad_norm: 0.7521186901504486, iteration: 436537
loss: 1.1122721433639526,grad_norm: 0.9999996716631618, iteration: 436538
loss: 1.0081292390823364,grad_norm: 0.8837198493380811, iteration: 436539
loss: 0.9927093982696533,grad_norm: 0.8067272595929684, iteration: 436540
loss: 1.0042064189910889,grad_norm: 0.9060259845081118, iteration: 436541
loss: 0.9931890964508057,grad_norm: 0.8048606073632897, iteration: 436542
loss: 1.011893630027771,grad_norm: 0.8850423622508528, iteration: 436543
loss: 1.0078125,grad_norm: 0.883241802487537, iteration: 436544
loss: 1.080582857131958,grad_norm: 0.9999998002020974, iteration: 436545
loss: 1.0260231494903564,grad_norm: 0.9316681480850721, iteration: 436546
loss: 0.9668180346488953,grad_norm: 0.752038577927596, iteration: 436547
loss: 1.00968599319458,grad_norm: 0.8223015921938573, iteration: 436548
loss: 1.0288808345794678,grad_norm: 0.739075614177391, iteration: 436549
loss: 0.9762008190155029,grad_norm: 0.7696549251866446, iteration: 436550
loss: 0.9989864230155945,grad_norm: 0.9999991740981796, iteration: 436551
loss: 0.9932090044021606,grad_norm: 0.7358751175054289, iteration: 436552
loss: 1.0013667345046997,grad_norm: 0.7693462291141719, iteration: 436553
loss: 1.0023716688156128,grad_norm: 0.8038436183040758, iteration: 436554
loss: 0.9856076836585999,grad_norm: 0.7796973327449804, iteration: 436555
loss: 1.0296307802200317,grad_norm: 0.9325761791621465, iteration: 436556
loss: 1.0203931331634521,grad_norm: 0.9226092898831266, iteration: 436557
loss: 0.9872657060623169,grad_norm: 0.6292419702045661, iteration: 436558
loss: 1.0044536590576172,grad_norm: 0.9999995236933079, iteration: 436559
loss: 0.9940352439880371,grad_norm: 0.6897113071690911, iteration: 436560
loss: 1.067116379737854,grad_norm: 0.9431801903311863, iteration: 436561
loss: 1.0554537773132324,grad_norm: 0.9999992923564452, iteration: 436562
loss: 1.0101561546325684,grad_norm: 0.9999995388986885, iteration: 436563
loss: 0.982852041721344,grad_norm: 0.6902270272794024, iteration: 436564
loss: 0.9914455413818359,grad_norm: 0.9154127463680579, iteration: 436565
loss: 1.0617501735687256,grad_norm: 0.9999995247241678, iteration: 436566
loss: 1.004603624343872,grad_norm: 0.650316067125419, iteration: 436567
loss: 0.9991158246994019,grad_norm: 0.9999998578875358, iteration: 436568
loss: 1.0025031566619873,grad_norm: 0.853543114857007, iteration: 436569
loss: 1.031813383102417,grad_norm: 0.9999990892266654, iteration: 436570
loss: 0.9828206896781921,grad_norm: 0.9120726582349081, iteration: 436571
loss: 1.024741768836975,grad_norm: 0.9506914254843576, iteration: 436572
loss: 0.9913182854652405,grad_norm: 0.6371499001204562, iteration: 436573
loss: 0.9660280346870422,grad_norm: 0.8465713108317743, iteration: 436574
loss: 1.00813627243042,grad_norm: 0.9033732463736728, iteration: 436575
loss: 1.0047446489334106,grad_norm: 0.8028321997741866, iteration: 436576
loss: 1.0041108131408691,grad_norm: 0.7508487873804135, iteration: 436577
loss: 1.0828931331634521,grad_norm: 0.9999998754186677, iteration: 436578
loss: 1.0120434761047363,grad_norm: 0.8225426670040608, iteration: 436579
loss: 1.183390736579895,grad_norm: 0.9999993195273061, iteration: 436580
loss: 1.024282693862915,grad_norm: 0.8498811698635477, iteration: 436581
loss: 0.988694429397583,grad_norm: 0.8164659124232425, iteration: 436582
loss: 1.0225249528884888,grad_norm: 0.6585321810964461, iteration: 436583
loss: 1.0039561986923218,grad_norm: 0.7138364620353223, iteration: 436584
loss: 0.9835672378540039,grad_norm: 0.7465706371474559, iteration: 436585
loss: 1.023288369178772,grad_norm: 0.8514472150036534, iteration: 436586
loss: 0.9792156219482422,grad_norm: 0.7981144783395308, iteration: 436587
loss: 1.0358006954193115,grad_norm: 0.9999994412706349, iteration: 436588
loss: 0.9732645153999329,grad_norm: 0.7725573364508171, iteration: 436589
loss: 1.0189212560653687,grad_norm: 0.6251190255115318, iteration: 436590
loss: 1.0159950256347656,grad_norm: 0.9999988866295665, iteration: 436591
loss: 1.0196208953857422,grad_norm: 0.9999992422332957, iteration: 436592
loss: 1.0732700824737549,grad_norm: 0.9999994052709842, iteration: 436593
loss: 0.9779633283615112,grad_norm: 0.688220900097642, iteration: 436594
loss: 1.0109120607376099,grad_norm: 0.832468110658044, iteration: 436595
loss: 0.9779661297798157,grad_norm: 0.8157663013068732, iteration: 436596
loss: 0.9728742241859436,grad_norm: 0.9217547730906507, iteration: 436597
loss: 1.0317621231079102,grad_norm: 0.9999998044004208, iteration: 436598
loss: 0.9959849119186401,grad_norm: 0.9853085038077704, iteration: 436599
loss: 0.9924473166465759,grad_norm: 0.9999990178524855, iteration: 436600
loss: 1.0795931816101074,grad_norm: 0.9999999558763218, iteration: 436601
loss: 0.9609934091567993,grad_norm: 0.9203888747365361, iteration: 436602
loss: 1.034305453300476,grad_norm: 0.9999996491114774, iteration: 436603
loss: 0.9949787855148315,grad_norm: 0.8296436767877099, iteration: 436604
loss: 0.9812566041946411,grad_norm: 0.7940974026595086, iteration: 436605
loss: 1.0111955404281616,grad_norm: 0.685621531441862, iteration: 436606
loss: 0.9653387069702148,grad_norm: 0.7033699761973149, iteration: 436607
loss: 1.0273919105529785,grad_norm: 0.8351728267474947, iteration: 436608
loss: 1.0335828065872192,grad_norm: 0.9999993716710472, iteration: 436609
loss: 1.063254952430725,grad_norm: 0.7767313343458407, iteration: 436610
loss: 1.0675361156463623,grad_norm: 0.9999997268271122, iteration: 436611
loss: 1.0874041318893433,grad_norm: 0.8228589679507649, iteration: 436612
loss: 0.9897943735122681,grad_norm: 0.7458471386350835, iteration: 436613
loss: 1.0125706195831299,grad_norm: 0.8561198392081125, iteration: 436614
loss: 0.972088098526001,grad_norm: 0.9999991510664687, iteration: 436615
loss: 1.0074814558029175,grad_norm: 0.727939472568468, iteration: 436616
loss: 1.090444564819336,grad_norm: 0.9999996346296887, iteration: 436617
loss: 1.0630313158035278,grad_norm: 1.0000000107892653, iteration: 436618
loss: 0.9937083125114441,grad_norm: 0.7282690909630662, iteration: 436619
loss: 1.0398834943771362,grad_norm: 0.9999994717290568, iteration: 436620
loss: 1.0064972639083862,grad_norm: 0.7669148857708531, iteration: 436621
loss: 1.0513485670089722,grad_norm: 0.7343945158424874, iteration: 436622
loss: 1.0055921077728271,grad_norm: 0.9999998240936593, iteration: 436623
loss: 0.9853810667991638,grad_norm: 0.7377139442891892, iteration: 436624
loss: 0.970839262008667,grad_norm: 0.7767559954808925, iteration: 436625
loss: 1.0681110620498657,grad_norm: 1.000000103952749, iteration: 436626
loss: 1.0018140077590942,grad_norm: 0.8401164387234391, iteration: 436627
loss: 1.0052640438079834,grad_norm: 0.8010369406942687, iteration: 436628
loss: 1.0087581872940063,grad_norm: 0.9999998811443611, iteration: 436629
loss: 1.0211060047149658,grad_norm: 0.7826426041052645, iteration: 436630
loss: 1.0214135646820068,grad_norm: 0.7347307225111324, iteration: 436631
loss: 0.9540478587150574,grad_norm: 0.7864838922986415, iteration: 436632
loss: 0.9833820462226868,grad_norm: 0.7161144142003373, iteration: 436633
loss: 1.0192792415618896,grad_norm: 0.87954992285594, iteration: 436634
loss: 0.9712233543395996,grad_norm: 0.7979925751947566, iteration: 436635
loss: 1.0169625282287598,grad_norm: 0.999999469781886, iteration: 436636
loss: 1.02203369140625,grad_norm: 0.9787093082654769, iteration: 436637
loss: 1.0096008777618408,grad_norm: 0.9171489453731891, iteration: 436638
loss: 1.0116384029388428,grad_norm: 0.7144083099520093, iteration: 436639
loss: 0.9630692005157471,grad_norm: 0.7991276810126419, iteration: 436640
loss: 0.9914304614067078,grad_norm: 0.7734086056995946, iteration: 436641
loss: 1.009033441543579,grad_norm: 0.7598084212252375, iteration: 436642
loss: 1.0055793523788452,grad_norm: 0.7383063136384431, iteration: 436643
loss: 0.9948754906654358,grad_norm: 0.730341759206923, iteration: 436644
loss: 1.0301460027694702,grad_norm: 0.9643012168592361, iteration: 436645
loss: 0.973200261592865,grad_norm: 0.805315162355532, iteration: 436646
loss: 1.0334633588790894,grad_norm: 0.7793912498800325, iteration: 436647
loss: 1.0125881433486938,grad_norm: 0.9999990519141546, iteration: 436648
loss: 0.993926465511322,grad_norm: 0.7954419032350311, iteration: 436649
loss: 1.0088310241699219,grad_norm: 0.6690002588295664, iteration: 436650
loss: 1.0616183280944824,grad_norm: 0.7511973289884948, iteration: 436651
loss: 1.0612027645111084,grad_norm: 0.9999991971472932, iteration: 436652
loss: 0.9910690188407898,grad_norm: 0.6915975501323192, iteration: 436653
loss: 1.0063507556915283,grad_norm: 0.7660299617070285, iteration: 436654
loss: 0.9961354732513428,grad_norm: 0.8116441218401698, iteration: 436655
loss: 0.9838719367980957,grad_norm: 0.7623971133349233, iteration: 436656
loss: 0.9691416621208191,grad_norm: 0.743760458641021, iteration: 436657
loss: 0.9823969602584839,grad_norm: 0.6395690742605736, iteration: 436658
loss: 0.9911519885063171,grad_norm: 0.8539017928079631, iteration: 436659
loss: 1.0373209714889526,grad_norm: 0.6167977842279212, iteration: 436660
loss: 0.9713310599327087,grad_norm: 0.7812314567105691, iteration: 436661
loss: 0.9507904052734375,grad_norm: 0.8553818651036051, iteration: 436662
loss: 0.997535765171051,grad_norm: 0.9999996284276768, iteration: 436663
loss: 0.9747360944747925,grad_norm: 0.8358273138519289, iteration: 436664
loss: 0.9788973927497864,grad_norm: 0.6859815321758329, iteration: 436665
loss: 0.9744436740875244,grad_norm: 0.866906503033902, iteration: 436666
loss: 1.0036779642105103,grad_norm: 0.707145217759025, iteration: 436667
loss: 1.0059572458267212,grad_norm: 0.6991607138345072, iteration: 436668
loss: 1.0597909688949585,grad_norm: 0.9314010668406281, iteration: 436669
loss: 0.9429591298103333,grad_norm: 0.7508141363665036, iteration: 436670
loss: 0.9697232246398926,grad_norm: 0.7008031100115132, iteration: 436671
loss: 1.0206955671310425,grad_norm: 0.999999115029178, iteration: 436672
loss: 0.9948801398277283,grad_norm: 0.6208645101497976, iteration: 436673
loss: 0.9859600067138672,grad_norm: 0.9363376013357813, iteration: 436674
loss: 0.9947905540466309,grad_norm: 0.9154110929267218, iteration: 436675
loss: 1.0160698890686035,grad_norm: 0.651266944852906, iteration: 436676
loss: 0.9825320839881897,grad_norm: 0.9999990417381648, iteration: 436677
loss: 0.9989579916000366,grad_norm: 0.9559954164497075, iteration: 436678
loss: 1.0290921926498413,grad_norm: 0.9999999182789197, iteration: 436679
loss: 0.9702380299568176,grad_norm: 0.9531078508699113, iteration: 436680
loss: 0.9852040410041809,grad_norm: 0.7138804322398626, iteration: 436681
loss: 0.9294911623001099,grad_norm: 0.8886201565402236, iteration: 436682
loss: 0.9939716458320618,grad_norm: 0.7923878450757936, iteration: 436683
loss: 0.9920951724052429,grad_norm: 0.8262630228434215, iteration: 436684
loss: 0.9802727699279785,grad_norm: 0.9007604307043181, iteration: 436685
loss: 0.9866759777069092,grad_norm: 0.764504286434308, iteration: 436686
loss: 1.0213457345962524,grad_norm: 0.9999998889117152, iteration: 436687
loss: 1.007218360900879,grad_norm: 0.7809306858198436, iteration: 436688
loss: 1.0292249917984009,grad_norm: 0.6945631010349769, iteration: 436689
loss: 0.9895904660224915,grad_norm: 0.723394872320694, iteration: 436690
loss: 1.0369266271591187,grad_norm: 0.8989778981086666, iteration: 436691
loss: 0.9864174127578735,grad_norm: 0.8050277654902979, iteration: 436692
loss: 0.9911947250366211,grad_norm: 0.771462501799587, iteration: 436693
loss: 0.9853051900863647,grad_norm: 0.7716470866805468, iteration: 436694
loss: 0.9754607677459717,grad_norm: 0.7509604674044296, iteration: 436695
loss: 1.0443049669265747,grad_norm: 0.9999998511585512, iteration: 436696
loss: 0.9740579724311829,grad_norm: 0.7739828196178143, iteration: 436697
loss: 0.9822067022323608,grad_norm: 0.7583523059704174, iteration: 436698
loss: 0.9761868715286255,grad_norm: 0.7633242320214184, iteration: 436699
loss: 0.9888362884521484,grad_norm: 0.7011165475600509, iteration: 436700
loss: 1.0123581886291504,grad_norm: 0.9860934125180313, iteration: 436701
loss: 0.9693856239318848,grad_norm: 0.6970083477662519, iteration: 436702
loss: 1.0044937133789062,grad_norm: 0.8348445402252861, iteration: 436703
loss: 0.9843260049819946,grad_norm: 0.6721741765128161, iteration: 436704
loss: 1.0101265907287598,grad_norm: 0.8842979743107227, iteration: 436705
loss: 0.9724448919296265,grad_norm: 0.7383150094190726, iteration: 436706
loss: 1.036891222000122,grad_norm: 0.8240555971106185, iteration: 436707
loss: 1.001993179321289,grad_norm: 0.7782657136436159, iteration: 436708
loss: 0.9691147208213806,grad_norm: 0.9999992048302364, iteration: 436709
loss: 1.0141633749008179,grad_norm: 0.665355610754212, iteration: 436710
loss: 0.9971603155136108,grad_norm: 0.8733047343405801, iteration: 436711
loss: 0.9848147630691528,grad_norm: 0.8508860484110767, iteration: 436712
loss: 0.994411826133728,grad_norm: 0.7697639717352532, iteration: 436713
loss: 1.02299964427948,grad_norm: 0.8783493141712164, iteration: 436714
loss: 0.9985909461975098,grad_norm: 0.9222342721525119, iteration: 436715
loss: 1.0431559085845947,grad_norm: 0.8342719175359288, iteration: 436716
loss: 0.9693881869316101,grad_norm: 0.9800757334282951, iteration: 436717
loss: 1.1049134731292725,grad_norm: 0.9282226134458229, iteration: 436718
loss: 1.037674069404602,grad_norm: 0.807595531623416, iteration: 436719
loss: 0.9927574992179871,grad_norm: 0.9999996691636271, iteration: 436720
loss: 0.9699342250823975,grad_norm: 0.80416435528754, iteration: 436721
loss: 1.0186409950256348,grad_norm: 0.9394025584242254, iteration: 436722
loss: 0.9792660474777222,grad_norm: 0.6810761220384765, iteration: 436723
loss: 1.1361182928085327,grad_norm: 0.999999707475664, iteration: 436724
loss: 1.0094715356826782,grad_norm: 0.7738545613861373, iteration: 436725
loss: 0.9858875870704651,grad_norm: 0.7930196912328165, iteration: 436726
loss: 1.001665711402893,grad_norm: 0.7205027766168794, iteration: 436727
loss: 0.9964096546173096,grad_norm: 0.7635214443569998, iteration: 436728
loss: 1.021254062652588,grad_norm: 0.6572953273917508, iteration: 436729
loss: 1.040416955947876,grad_norm: 0.721083360199387, iteration: 436730
loss: 1.0015003681182861,grad_norm: 0.760178847587711, iteration: 436731
loss: 1.0103663206100464,grad_norm: 0.7943850225586064, iteration: 436732
loss: 1.0025266408920288,grad_norm: 0.9999993035827736, iteration: 436733
loss: 1.0057199001312256,grad_norm: 0.8271756632283158, iteration: 436734
loss: 0.9677797555923462,grad_norm: 0.9038292621257152, iteration: 436735
loss: 1.0132734775543213,grad_norm: 0.8132927575672978, iteration: 436736
loss: 1.0011316537857056,grad_norm: 0.857151522092758, iteration: 436737
loss: 0.9473807215690613,grad_norm: 0.8065824879788303, iteration: 436738
loss: 1.0023844242095947,grad_norm: 0.656909016093991, iteration: 436739
loss: 1.0091572999954224,grad_norm: 0.9553840580603865, iteration: 436740
loss: 1.0034178495407104,grad_norm: 0.9999990115677597, iteration: 436741
loss: 1.0020900964736938,grad_norm: 0.9868009443713409, iteration: 436742
loss: 0.9523844718933105,grad_norm: 0.7216852931614431, iteration: 436743
loss: 0.9734837412834167,grad_norm: 0.8193712066855918, iteration: 436744
loss: 0.9733003377914429,grad_norm: 0.7400489918550386, iteration: 436745
loss: 1.000366449356079,grad_norm: 0.7320113158960042, iteration: 436746
loss: 0.9951198101043701,grad_norm: 0.7559792043244132, iteration: 436747
loss: 1.0378413200378418,grad_norm: 0.7458601845817222, iteration: 436748
loss: 0.9898169636726379,grad_norm: 0.735870713013145, iteration: 436749
loss: 0.9890559315681458,grad_norm: 0.7324180134210587, iteration: 436750
loss: 1.0103040933609009,grad_norm: 0.8170370135393902, iteration: 436751
loss: 1.008107304573059,grad_norm: 0.6732787662867648, iteration: 436752
loss: 0.989997923374176,grad_norm: 0.8178419063947124, iteration: 436753
loss: 1.0110810995101929,grad_norm: 0.777005261450013, iteration: 436754
loss: 1.0235964059829712,grad_norm: 0.8294029147323068, iteration: 436755
loss: 0.9254193902015686,grad_norm: 0.7770928100855558, iteration: 436756
loss: 1.0077685117721558,grad_norm: 0.6597025997621839, iteration: 436757
loss: 0.9706140756607056,grad_norm: 0.8054654500818134, iteration: 436758
loss: 0.9842393398284912,grad_norm: 0.8944925129214258, iteration: 436759
loss: 0.9683700799942017,grad_norm: 0.8808177062326501, iteration: 436760
loss: 1.0192458629608154,grad_norm: 0.7189318330460551, iteration: 436761
loss: 0.9880561828613281,grad_norm: 0.8012030593409629, iteration: 436762
loss: 1.0122815370559692,grad_norm: 0.7899354230093277, iteration: 436763
loss: 0.9572867751121521,grad_norm: 0.8300215372046551, iteration: 436764
loss: 0.9696016907691956,grad_norm: 0.7952086110922281, iteration: 436765
loss: 0.9376525282859802,grad_norm: 0.7789196135435721, iteration: 436766
loss: 0.9917563796043396,grad_norm: 0.734587837751175, iteration: 436767
loss: 1.001295804977417,grad_norm: 0.6962950281321421, iteration: 436768
loss: 0.976905882358551,grad_norm: 0.7774820262368864, iteration: 436769
loss: 0.977851152420044,grad_norm: 0.7310042946334164, iteration: 436770
loss: 1.021287441253662,grad_norm: 0.7759193236806914, iteration: 436771
loss: 0.9926413297653198,grad_norm: 0.8741024430323886, iteration: 436772
loss: 0.9978240728378296,grad_norm: 0.7430150363021816, iteration: 436773
loss: 1.0629180669784546,grad_norm: 0.9999998536536203, iteration: 436774
loss: 1.0076279640197754,grad_norm: 0.8733965099273086, iteration: 436775
loss: 0.9850610494613647,grad_norm: 0.7733838873431539, iteration: 436776
loss: 0.9875985980033875,grad_norm: 0.9205787693319432, iteration: 436777
loss: 1.0899065732955933,grad_norm: 0.9999999917283477, iteration: 436778
loss: 0.9619198441505432,grad_norm: 0.9914694210841036, iteration: 436779
loss: 0.9647408723831177,grad_norm: 0.9980082827770127, iteration: 436780
loss: 0.9800505042076111,grad_norm: 0.765471799026223, iteration: 436781
loss: 1.007252812385559,grad_norm: 0.999999989663204, iteration: 436782
loss: 1.0142602920532227,grad_norm: 0.7743074096354017, iteration: 436783
loss: 1.0219933986663818,grad_norm: 0.976764447580713, iteration: 436784
loss: 1.0188210010528564,grad_norm: 0.8684347101689486, iteration: 436785
loss: 1.0435585975646973,grad_norm: 0.8313158349951953, iteration: 436786
loss: 1.134350061416626,grad_norm: 0.9999996178369247, iteration: 436787
loss: 1.082753300666809,grad_norm: 0.9999995786661353, iteration: 436788
loss: 1.0275142192840576,grad_norm: 0.731814500017221, iteration: 436789
loss: 0.9972619414329529,grad_norm: 0.7648261817454884, iteration: 436790
loss: 0.9939889311790466,grad_norm: 0.6561764583084725, iteration: 436791
loss: 0.986422598361969,grad_norm: 0.8712503215509685, iteration: 436792
loss: 0.9993317127227783,grad_norm: 0.7340144630552102, iteration: 436793
loss: 0.985668420791626,grad_norm: 0.8318192152792687, iteration: 436794
loss: 0.9997533559799194,grad_norm: 0.8284706391157048, iteration: 436795
loss: 1.0075572729110718,grad_norm: 0.7683540119973956, iteration: 436796
loss: 1.0070061683654785,grad_norm: 0.7105268619088709, iteration: 436797
loss: 0.9906126856803894,grad_norm: 0.6718779105216934, iteration: 436798
loss: 0.9903567433357239,grad_norm: 0.7282282825123538, iteration: 436799
loss: 1.0342427492141724,grad_norm: 0.9007991246362927, iteration: 436800
loss: 0.9275640845298767,grad_norm: 0.8662288846275752, iteration: 436801
loss: 1.0510624647140503,grad_norm: 0.9999994722542762, iteration: 436802
loss: 0.9534426927566528,grad_norm: 0.9999991056556339, iteration: 436803
loss: 1.0395853519439697,grad_norm: 0.8165271614855355, iteration: 436804
loss: 1.025541067123413,grad_norm: 0.9999990183412415, iteration: 436805
loss: 1.2595409154891968,grad_norm: 0.9999995439879446, iteration: 436806
loss: 1.017688274383545,grad_norm: 0.8652692931770997, iteration: 436807
loss: 1.0102038383483887,grad_norm: 0.7521872963015037, iteration: 436808
loss: 1.0148406028747559,grad_norm: 0.7406297171437815, iteration: 436809
loss: 0.985294759273529,grad_norm: 0.6885371429552214, iteration: 436810
loss: 1.0196048021316528,grad_norm: 0.8433011944741664, iteration: 436811
loss: 1.017106294631958,grad_norm: 0.7065580720699531, iteration: 436812
loss: 0.9949500560760498,grad_norm: 0.7834031041354784, iteration: 436813
loss: 0.9867767691612244,grad_norm: 0.7840430202031927, iteration: 436814
loss: 0.997227668762207,grad_norm: 0.8856170492742395, iteration: 436815
loss: 0.9846494793891907,grad_norm: 0.9999995880455103, iteration: 436816
loss: 0.9969875812530518,grad_norm: 0.8473044451123469, iteration: 436817
loss: 1.0141584873199463,grad_norm: 0.7850829304088704, iteration: 436818
loss: 0.9717597961425781,grad_norm: 0.9645678190216714, iteration: 436819
loss: 0.9736377596855164,grad_norm: 0.7099181983834599, iteration: 436820
loss: 1.0315961837768555,grad_norm: 0.9700057508884681, iteration: 436821
loss: 1.0379689931869507,grad_norm: 0.9999997214193458, iteration: 436822
loss: 1.0319734811782837,grad_norm: 0.7527675408827185, iteration: 436823
loss: 0.9537264704704285,grad_norm: 0.7711461739010868, iteration: 436824
loss: 1.0192019939422607,grad_norm: 0.9999991847600959, iteration: 436825
loss: 0.9979243278503418,grad_norm: 0.7592240233511786, iteration: 436826
loss: 0.9815933108329773,grad_norm: 0.7129567515934279, iteration: 436827
loss: 1.0396785736083984,grad_norm: 0.8376542872580611, iteration: 436828
loss: 0.974587619304657,grad_norm: 0.8448196536923868, iteration: 436829
loss: 0.9994969367980957,grad_norm: 0.7384795749926276, iteration: 436830
loss: 1.003793478012085,grad_norm: 0.7639206217136953, iteration: 436831
loss: 0.9784321188926697,grad_norm: 0.6905309811339294, iteration: 436832
loss: 0.9600791335105896,grad_norm: 0.8967948625084035, iteration: 436833
loss: 0.9771190881729126,grad_norm: 0.862152039415201, iteration: 436834
loss: 1.0104734897613525,grad_norm: 0.90856500015826, iteration: 436835
loss: 0.9821349382400513,grad_norm: 0.7530324190879087, iteration: 436836
loss: 1.0178351402282715,grad_norm: 0.6864952091262845, iteration: 436837
loss: 1.0081970691680908,grad_norm: 0.8328165775748076, iteration: 436838
loss: 1.0193110704421997,grad_norm: 0.8361137868514135, iteration: 436839
loss: 1.015065312385559,grad_norm: 0.748343498388077, iteration: 436840
loss: 0.9830917716026306,grad_norm: 0.7190581770899129, iteration: 436841
loss: 0.9608067870140076,grad_norm: 0.7084414735658272, iteration: 436842
loss: 0.991198718547821,grad_norm: 0.7021037997200865, iteration: 436843
loss: 0.9987838864326477,grad_norm: 0.7716814397532578, iteration: 436844
loss: 0.9760636687278748,grad_norm: 0.8832489670615751, iteration: 436845
loss: 0.9776150584220886,grad_norm: 0.7228284981318094, iteration: 436846
loss: 1.0190433263778687,grad_norm: 0.7100473943171436, iteration: 436847
loss: 1.1714493036270142,grad_norm: 0.9999995979221673, iteration: 436848
loss: 0.9850977063179016,grad_norm: 0.9999996125373215, iteration: 436849
loss: 1.0128943920135498,grad_norm: 0.7625912483593869, iteration: 436850
loss: 1.0415009260177612,grad_norm: 0.8082147875968049, iteration: 436851
loss: 0.9902545213699341,grad_norm: 0.8594676279836961, iteration: 436852
loss: 1.0517960786819458,grad_norm: 0.6856296145177876, iteration: 436853
loss: 0.9766892194747925,grad_norm: 0.657250477566211, iteration: 436854
loss: 1.0029313564300537,grad_norm: 0.6259520082195937, iteration: 436855
loss: 0.9637786149978638,grad_norm: 0.8501468382992033, iteration: 436856
loss: 0.969748318195343,grad_norm: 0.7797609463517906, iteration: 436857
loss: 0.9929025173187256,grad_norm: 0.8695720325606798, iteration: 436858
loss: 0.9350721836090088,grad_norm: 0.6315396642269544, iteration: 436859
loss: 0.9657620787620544,grad_norm: 0.9999998533301154, iteration: 436860
loss: 1.0284881591796875,grad_norm: 0.7603970505974886, iteration: 436861
loss: 1.031145691871643,grad_norm: 0.8159832606143839, iteration: 436862
loss: 1.0207730531692505,grad_norm: 0.7460166921567325, iteration: 436863
loss: 1.0217424631118774,grad_norm: 0.8139245827430251, iteration: 436864
loss: 0.9937111735343933,grad_norm: 0.7309988705387807, iteration: 436865
loss: 0.988662838935852,grad_norm: 0.8527713786103017, iteration: 436866
loss: 1.0816494226455688,grad_norm: 0.7440708301202281, iteration: 436867
loss: 1.0027059316635132,grad_norm: 0.7807565810389001, iteration: 436868
loss: 0.9865633845329285,grad_norm: 0.7791285426386596, iteration: 436869
loss: 0.9670536518096924,grad_norm: 0.8207093723396609, iteration: 436870
loss: 1.010560393333435,grad_norm: 0.9065813500852105, iteration: 436871
loss: 0.9434399008750916,grad_norm: 0.9326371479526139, iteration: 436872
loss: 0.9946264028549194,grad_norm: 0.7235621621279553, iteration: 436873
loss: 0.9510257244110107,grad_norm: 0.7296727841076679, iteration: 436874
loss: 0.9956545829772949,grad_norm: 0.8925251241584724, iteration: 436875
loss: 1.0242881774902344,grad_norm: 0.7262832694645639, iteration: 436876
loss: 1.0567848682403564,grad_norm: 0.7957607067351642, iteration: 436877
loss: 0.9883612394332886,grad_norm: 0.9999998421802269, iteration: 436878
loss: 1.0358027219772339,grad_norm: 0.9173126968661646, iteration: 436879
loss: 1.0125678777694702,grad_norm: 0.8150893906327984, iteration: 436880
loss: 1.0199882984161377,grad_norm: 0.7858242154926708, iteration: 436881
loss: 0.9675369262695312,grad_norm: 0.7747866129345642, iteration: 436882
loss: 0.974315345287323,grad_norm: 0.7558434389205414, iteration: 436883
loss: 1.1146870851516724,grad_norm: 0.9480393502977825, iteration: 436884
loss: 1.035550594329834,grad_norm: 0.7029131373495563, iteration: 436885
loss: 1.0731626749038696,grad_norm: 0.9999994844739857, iteration: 436886
loss: 1.0123592615127563,grad_norm: 0.792360346722334, iteration: 436887
loss: 1.0074529647827148,grad_norm: 0.7780642631138136, iteration: 436888
loss: 0.9991708397865295,grad_norm: 0.706325549481161, iteration: 436889
loss: 1.0074795484542847,grad_norm: 0.7586552290957245, iteration: 436890
loss: 0.9658927321434021,grad_norm: 0.8856839153566072, iteration: 436891
loss: 0.9782735109329224,grad_norm: 0.7987129395291492, iteration: 436892
loss: 1.0175427198410034,grad_norm: 0.9999996249655163, iteration: 436893
loss: 1.0031118392944336,grad_norm: 0.8705100196151742, iteration: 436894
loss: 1.0236376523971558,grad_norm: 0.7370956573512963, iteration: 436895
loss: 0.9691829085350037,grad_norm: 0.6670179013016644, iteration: 436896
loss: 1.0572364330291748,grad_norm: 0.9999993699187897, iteration: 436897
loss: 0.9784088134765625,grad_norm: 0.7072121736300315, iteration: 436898
loss: 0.993028998374939,grad_norm: 0.7155351544965999, iteration: 436899
loss: 1.0085643529891968,grad_norm: 0.7992552708203308, iteration: 436900
loss: 1.0240256786346436,grad_norm: 0.9999996972383356, iteration: 436901
loss: 1.0216151475906372,grad_norm: 0.9999995758725223, iteration: 436902
loss: 0.9969398975372314,grad_norm: 0.8598271878631458, iteration: 436903
loss: 1.0024224519729614,grad_norm: 0.8538196968742633, iteration: 436904
loss: 1.0210779905319214,grad_norm: 0.8711515217125049, iteration: 436905
loss: 1.0157562494277954,grad_norm: 0.6371949109175278, iteration: 436906
loss: 1.0247660875320435,grad_norm: 0.8681279799450814, iteration: 436907
loss: 1.0204071998596191,grad_norm: 0.8423936917242931, iteration: 436908
loss: 0.9897886514663696,grad_norm: 0.8931253596235571, iteration: 436909
loss: 1.0589115619659424,grad_norm: 0.9999993990196442, iteration: 436910
loss: 1.0024369955062866,grad_norm: 0.7323724146024081, iteration: 436911
loss: 1.004188895225525,grad_norm: 0.6427655370586803, iteration: 436912
loss: 0.9990609884262085,grad_norm: 0.9999998495502427, iteration: 436913
loss: 1.01443612575531,grad_norm: 0.8277013483467855, iteration: 436914
loss: 1.2742489576339722,grad_norm: 0.9999990978514749, iteration: 436915
loss: 1.0359811782836914,grad_norm: 0.9999996662901874, iteration: 436916
loss: 0.9826950430870056,grad_norm: 0.7396690241570386, iteration: 436917
loss: 0.983731746673584,grad_norm: 0.7886162780560537, iteration: 436918
loss: 1.0039434432983398,grad_norm: 0.7172571544208837, iteration: 436919
loss: 1.024647831916809,grad_norm: 0.7431274472130759, iteration: 436920
loss: 1.0086127519607544,grad_norm: 0.694021512092264, iteration: 436921
loss: 0.9780044555664062,grad_norm: 0.9736201739932636, iteration: 436922
loss: 1.0097788572311401,grad_norm: 0.9999991509696219, iteration: 436923
loss: 0.9850043058395386,grad_norm: 0.738396086160567, iteration: 436924
loss: 0.9885154962539673,grad_norm: 0.9324246534476501, iteration: 436925
loss: 0.9773485064506531,grad_norm: 0.9100620490941066, iteration: 436926
loss: 1.0199843645095825,grad_norm: 0.8072202607265824, iteration: 436927
loss: 0.993772029876709,grad_norm: 0.6970187419198288, iteration: 436928
loss: 0.9865448474884033,grad_norm: 0.7777789050375492, iteration: 436929
loss: 1.0281645059585571,grad_norm: 0.6648857821835157, iteration: 436930
loss: 0.9911507368087769,grad_norm: 0.6686282007671538, iteration: 436931
loss: 1.0155636072158813,grad_norm: 0.885096185479886, iteration: 436932
loss: 0.998396635055542,grad_norm: 0.8850235907478549, iteration: 436933
loss: 1.0971930027008057,grad_norm: 0.8505876595159871, iteration: 436934
loss: 0.9831200242042542,grad_norm: 0.7518337968264167, iteration: 436935
loss: 0.995614767074585,grad_norm: 0.7675469471055141, iteration: 436936
loss: 0.9978892207145691,grad_norm: 0.8551953276589394, iteration: 436937
loss: 1.0215413570404053,grad_norm: 0.8177543111867044, iteration: 436938
loss: 1.0494989156723022,grad_norm: 0.6908368099137963, iteration: 436939
loss: 0.9835003018379211,grad_norm: 0.8780096884980091, iteration: 436940
loss: 1.0554360151290894,grad_norm: 0.9999435228335771, iteration: 436941
loss: 0.9638969302177429,grad_norm: 0.8952185907013193, iteration: 436942
loss: 1.0724340677261353,grad_norm: 0.9262510056717741, iteration: 436943
loss: 1.0153123140335083,grad_norm: 0.984618865011057, iteration: 436944
loss: 0.954491913318634,grad_norm: 0.7822567695398006, iteration: 436945
loss: 0.9970739483833313,grad_norm: 0.705353951151792, iteration: 436946
loss: 0.9819561243057251,grad_norm: 0.9834273752977417, iteration: 436947
loss: 1.0503407716751099,grad_norm: 0.9810181106771718, iteration: 436948
loss: 0.9923313856124878,grad_norm: 0.8364900210209962, iteration: 436949
loss: 0.9782047271728516,grad_norm: 0.782021469504515, iteration: 436950
loss: 0.9859678745269775,grad_norm: 0.66663305190295, iteration: 436951
loss: 1.0066841840744019,grad_norm: 0.7887800536697304, iteration: 436952
loss: 1.0066685676574707,grad_norm: 0.9999991687020006, iteration: 436953
loss: 1.009691834449768,grad_norm: 0.7584189761139096, iteration: 436954
loss: 1.0036876201629639,grad_norm: 0.8008519326983935, iteration: 436955
loss: 1.017638087272644,grad_norm: 0.6599560128976133, iteration: 436956
loss: 1.0077911615371704,grad_norm: 0.6884614876614209, iteration: 436957
loss: 0.9792822003364563,grad_norm: 0.9999991977552748, iteration: 436958
loss: 0.9695027470588684,grad_norm: 0.7446686477322589, iteration: 436959
loss: 0.9967842698097229,grad_norm: 0.7546688323503999, iteration: 436960
loss: 1.0132192373275757,grad_norm: 0.707851255343528, iteration: 436961
loss: 0.9952691793441772,grad_norm: 0.7283171469162593, iteration: 436962
loss: 1.028287410736084,grad_norm: 0.8257674362648149, iteration: 436963
loss: 1.0390607118606567,grad_norm: 0.7098896884262509, iteration: 436964
loss: 1.0294348001480103,grad_norm: 0.8693534345307258, iteration: 436965
loss: 0.9823355078697205,grad_norm: 0.7134691852421167, iteration: 436966
loss: 1.0179636478424072,grad_norm: 0.7107034989592478, iteration: 436967
loss: 1.002013087272644,grad_norm: 0.7505962998462278, iteration: 436968
loss: 0.989425003528595,grad_norm: 0.765239016026938, iteration: 436969
loss: 1.0507419109344482,grad_norm: 0.9248728157249951, iteration: 436970
loss: 1.0079642534255981,grad_norm: 0.8935946314102512, iteration: 436971
loss: 0.9885821342468262,grad_norm: 0.8335681238624598, iteration: 436972
loss: 1.0039222240447998,grad_norm: 0.7012021689140218, iteration: 436973
loss: 1.033429741859436,grad_norm: 0.6947458140561165, iteration: 436974
loss: 1.0105383396148682,grad_norm: 0.8107102886535892, iteration: 436975
loss: 0.9983243346214294,grad_norm: 0.8188958920165799, iteration: 436976
loss: 1.0152260065078735,grad_norm: 0.9382560062741645, iteration: 436977
loss: 1.006536841392517,grad_norm: 0.7935060675156896, iteration: 436978
loss: 0.9963705539703369,grad_norm: 0.8285804276529444, iteration: 436979
loss: 0.96366947889328,grad_norm: 0.7956822756083252, iteration: 436980
loss: 1.0136746168136597,grad_norm: 0.6979785830425039, iteration: 436981
loss: 1.0008255243301392,grad_norm: 0.7814586624334107, iteration: 436982
loss: 1.003684163093567,grad_norm: 0.6651997745440574, iteration: 436983
loss: 1.0355480909347534,grad_norm: 0.9999996607098247, iteration: 436984
loss: 0.9807559847831726,grad_norm: 0.7434796986521769, iteration: 436985
loss: 1.0127816200256348,grad_norm: 0.8643320884791447, iteration: 436986
loss: 1.0044227838516235,grad_norm: 0.8659801196709587, iteration: 436987
loss: 1.003838300704956,grad_norm: 0.9110141732987302, iteration: 436988
loss: 1.021133303642273,grad_norm: 0.9140540409515442, iteration: 436989
loss: 1.021912693977356,grad_norm: 0.7077244512921126, iteration: 436990
loss: 1.000382900238037,grad_norm: 0.9999992136411985, iteration: 436991
loss: 1.0106605291366577,grad_norm: 0.7558380162727244, iteration: 436992
loss: 0.9792200922966003,grad_norm: 0.6816452114820365, iteration: 436993
loss: 1.0306333303451538,grad_norm: 0.8826774764248871, iteration: 436994
loss: 0.9853103756904602,grad_norm: 0.6940093353977768, iteration: 436995
loss: 0.9917123317718506,grad_norm: 0.702937382222666, iteration: 436996
loss: 0.9662101864814758,grad_norm: 0.8051287312843167, iteration: 436997
loss: 0.9995719790458679,grad_norm: 0.7810986604047165, iteration: 436998
loss: 0.9922911524772644,grad_norm: 0.999999982891327, iteration: 436999
loss: 0.9940306544303894,grad_norm: 0.8444927936812919, iteration: 437000
loss: 0.9816957712173462,grad_norm: 0.968741887208744, iteration: 437001
loss: 0.9967764019966125,grad_norm: 0.6913986280136234, iteration: 437002
loss: 1.0084998607635498,grad_norm: 0.7773275425733067, iteration: 437003
loss: 0.9876917600631714,grad_norm: 0.7912944606347058, iteration: 437004
loss: 1.0131769180297852,grad_norm: 0.7639204687750794, iteration: 437005
loss: 1.008458137512207,grad_norm: 0.9999989737043805, iteration: 437006
loss: 1.007641077041626,grad_norm: 0.769428095536168, iteration: 437007
loss: 0.9571433067321777,grad_norm: 0.6852774328866503, iteration: 437008
loss: 1.0028272867202759,grad_norm: 0.9233458893186268, iteration: 437009
loss: 1.1034573316574097,grad_norm: 0.9999998611106474, iteration: 437010
loss: 1.0100141763687134,grad_norm: 0.864255796655154, iteration: 437011
loss: 0.9912229180335999,grad_norm: 0.7244827494193657, iteration: 437012
loss: 0.9756534099578857,grad_norm: 0.76106071233374, iteration: 437013
loss: 0.9864509701728821,grad_norm: 0.715760541089564, iteration: 437014
loss: 1.005937099456787,grad_norm: 0.8130062329981561, iteration: 437015
loss: 1.0461747646331787,grad_norm: 0.999999365648461, iteration: 437016
loss: 1.0288573503494263,grad_norm: 0.9999993405385738, iteration: 437017
loss: 0.9885699152946472,grad_norm: 0.7350472906590383, iteration: 437018
loss: 1.0091757774353027,grad_norm: 0.7884205142174133, iteration: 437019
loss: 0.9910159111022949,grad_norm: 0.8308776835721591, iteration: 437020
loss: 0.994306743144989,grad_norm: 0.7479455820056058, iteration: 437021
loss: 0.9828475713729858,grad_norm: 0.6811996112142531, iteration: 437022
loss: 0.9869185090065002,grad_norm: 0.9540208578283584, iteration: 437023
loss: 0.9815847277641296,grad_norm: 0.901581948790233, iteration: 437024
loss: 1.007725715637207,grad_norm: 0.9999992143820705, iteration: 437025
loss: 0.9912130236625671,grad_norm: 0.8212099054184475, iteration: 437026
loss: 0.9477434158325195,grad_norm: 0.848243161517779, iteration: 437027
loss: 0.9930374622344971,grad_norm: 0.7073177050055631, iteration: 437028
loss: 1.0055952072143555,grad_norm: 0.5834876071279991, iteration: 437029
loss: 1.0261973142623901,grad_norm: 0.9706767622015956, iteration: 437030
loss: 0.9945652484893799,grad_norm: 0.83365480615988, iteration: 437031
loss: 0.9802100658416748,grad_norm: 0.8124447291129807, iteration: 437032
loss: 1.0298701524734497,grad_norm: 0.917767422363797, iteration: 437033
loss: 1.0083584785461426,grad_norm: 0.7807957667341441, iteration: 437034
loss: 1.0500543117523193,grad_norm: 0.9999995382092082, iteration: 437035
loss: 1.0025359392166138,grad_norm: 0.6784475583862759, iteration: 437036
loss: 1.0248996019363403,grad_norm: 0.9999993710827539, iteration: 437037
loss: 1.0018112659454346,grad_norm: 0.784173486397176, iteration: 437038
loss: 0.9917000532150269,grad_norm: 0.7400934826926409, iteration: 437039
loss: 1.006905198097229,grad_norm: 0.5618311690268385, iteration: 437040
loss: 1.0008569955825806,grad_norm: 0.8223490355671785, iteration: 437041
loss: 1.039779782295227,grad_norm: 0.9999997417665873, iteration: 437042
loss: 0.9934695959091187,grad_norm: 0.7077673493671838, iteration: 437043
loss: 1.0969229936599731,grad_norm: 0.881161410509179, iteration: 437044
loss: 0.9788099527359009,grad_norm: 0.9214017409438531, iteration: 437045
loss: 0.9723722338676453,grad_norm: 0.6853364981606862, iteration: 437046
loss: 1.0214476585388184,grad_norm: 0.7779762507986883, iteration: 437047
loss: 1.0069644451141357,grad_norm: 0.8414941106846754, iteration: 437048
loss: 1.0010875463485718,grad_norm: 0.784180793681817, iteration: 437049
loss: 0.9880539178848267,grad_norm: 0.7739597663235986, iteration: 437050
loss: 0.985114574432373,grad_norm: 0.7670822378010043, iteration: 437051
loss: 1.1026750802993774,grad_norm: 0.953103450103755, iteration: 437052
loss: 1.0141663551330566,grad_norm: 0.865913844643485, iteration: 437053
loss: 1.002756953239441,grad_norm: 0.7927477461256202, iteration: 437054
loss: 1.0321667194366455,grad_norm: 0.7742390886047502, iteration: 437055
loss: 1.0324819087982178,grad_norm: 0.6452091438627588, iteration: 437056
loss: 1.0365955829620361,grad_norm: 0.8415986195153534, iteration: 437057
loss: 0.9885274767875671,grad_norm: 0.6811705117999723, iteration: 437058
loss: 0.9849643111228943,grad_norm: 0.8255347134060975, iteration: 437059
loss: 0.9882425665855408,grad_norm: 0.8193898875887966, iteration: 437060
loss: 0.982291579246521,grad_norm: 0.9999998050419873, iteration: 437061
loss: 0.969353973865509,grad_norm: 0.719347141405477, iteration: 437062
loss: 0.9990699291229248,grad_norm: 0.9999993230749338, iteration: 437063
loss: 1.032454490661621,grad_norm: 0.9999995709630867, iteration: 437064
loss: 1.0494650602340698,grad_norm: 0.7491123485859876, iteration: 437065
loss: 0.9874706268310547,grad_norm: 0.7422083756849546, iteration: 437066
loss: 1.0033406019210815,grad_norm: 0.9498453824792895, iteration: 437067
loss: 1.0531666278839111,grad_norm: 0.9925113248237114, iteration: 437068
loss: 1.0117124319076538,grad_norm: 0.9999990480040938, iteration: 437069
loss: 1.0546892881393433,grad_norm: 0.9999998501592309, iteration: 437070
loss: 0.9906090497970581,grad_norm: 0.7709475757012317, iteration: 437071
loss: 1.0258334875106812,grad_norm: 0.9126660976037299, iteration: 437072
loss: 0.984560489654541,grad_norm: 0.723351436515779, iteration: 437073
loss: 0.9863592982292175,grad_norm: 0.7505974165435845, iteration: 437074
loss: 1.0068217515945435,grad_norm: 0.7075693266994535, iteration: 437075
loss: 0.9989563226699829,grad_norm: 0.8674891096028269, iteration: 437076
loss: 0.9922526478767395,grad_norm: 0.9586080877742663, iteration: 437077
loss: 1.0187690258026123,grad_norm: 0.8460449317911045, iteration: 437078
loss: 1.0238198041915894,grad_norm: 0.8619790868872326, iteration: 437079
loss: 1.2829649448394775,grad_norm: 0.9999997731467326, iteration: 437080
loss: 1.0027801990509033,grad_norm: 0.7102381726321891, iteration: 437081
loss: 0.9951137900352478,grad_norm: 0.8697555527821784, iteration: 437082
loss: 0.9733999371528625,grad_norm: 0.7773211685504506, iteration: 437083
loss: 1.027297019958496,grad_norm: 0.7743332947367696, iteration: 437084
loss: 0.9769107699394226,grad_norm: 0.7492920536206108, iteration: 437085
loss: 0.9699886441230774,grad_norm: 0.8935637853337783, iteration: 437086
loss: 1.0276657342910767,grad_norm: 0.9999991434147085, iteration: 437087
loss: 1.010079026222229,grad_norm: 0.73305543300641, iteration: 437088
loss: 1.0041664838790894,grad_norm: 0.9941315078515847, iteration: 437089
loss: 0.973201334476471,grad_norm: 0.9999997158222644, iteration: 437090
loss: 1.0022119283676147,grad_norm: 0.7272872241535264, iteration: 437091
loss: 1.0211751461029053,grad_norm: 0.7437307983299669, iteration: 437092
loss: 0.9935706853866577,grad_norm: 0.7625395682615311, iteration: 437093
loss: 1.01371431350708,grad_norm: 0.8654647192305482, iteration: 437094
loss: 1.0348368883132935,grad_norm: 0.9999991289270579, iteration: 437095
loss: 1.026848316192627,grad_norm: 0.8973256630601703, iteration: 437096
loss: 1.0235828161239624,grad_norm: 0.7614533531753778, iteration: 437097
loss: 1.007516622543335,grad_norm: 0.8195940767127555, iteration: 437098
loss: 0.9794809818267822,grad_norm: 0.766775612141999, iteration: 437099
loss: 0.993408739566803,grad_norm: 0.999999812760979, iteration: 437100
loss: 1.0143494606018066,grad_norm: 0.6762989777441016, iteration: 437101
loss: 0.999476969242096,grad_norm: 0.7708557020838509, iteration: 437102
loss: 1.0494916439056396,grad_norm: 0.7395007328897455, iteration: 437103
loss: 1.0080760717391968,grad_norm: 0.7665371369313942, iteration: 437104
loss: 0.9898157715797424,grad_norm: 0.8829929775620831, iteration: 437105
loss: 1.0840487480163574,grad_norm: 0.9999993926632563, iteration: 437106
loss: 1.0066235065460205,grad_norm: 0.9999991395543496, iteration: 437107
loss: 1.0082584619522095,grad_norm: 0.7199126967406476, iteration: 437108
loss: 1.0101205110549927,grad_norm: 0.9999989715940504, iteration: 437109
loss: 0.9804672598838806,grad_norm: 0.9167618428596181, iteration: 437110
loss: 1.0084881782531738,grad_norm: 0.845014774773715, iteration: 437111
loss: 0.9502888917922974,grad_norm: 0.8274074676353966, iteration: 437112
loss: 0.9632636308670044,grad_norm: 0.7672651703327613, iteration: 437113
loss: 0.9960023760795593,grad_norm: 0.7426200174560879, iteration: 437114
loss: 1.0310665369033813,grad_norm: 0.8729203886387837, iteration: 437115
loss: 0.996644914150238,grad_norm: 0.7754694409977957, iteration: 437116
loss: 1.017075538635254,grad_norm: 0.7297487581543024, iteration: 437117
loss: 1.0212903022766113,grad_norm: 0.8367389617872161, iteration: 437118
loss: 1.0169342756271362,grad_norm: 0.9999992177099477, iteration: 437119
loss: 0.9861595630645752,grad_norm: 0.6765927479399778, iteration: 437120
loss: 1.0018364191055298,grad_norm: 0.7867364634195857, iteration: 437121
loss: 0.9527419209480286,grad_norm: 0.7433493075906702, iteration: 437122
loss: 0.9892086982727051,grad_norm: 0.8025777803747011, iteration: 437123
loss: 1.0007985830307007,grad_norm: 0.9999990942400446, iteration: 437124
loss: 0.987893283367157,grad_norm: 0.8777227237871025, iteration: 437125
loss: 0.9835544228553772,grad_norm: 0.8303987448266231, iteration: 437126
loss: 1.0903286933898926,grad_norm: 0.7313854603136871, iteration: 437127
loss: 0.9665145874023438,grad_norm: 0.8997364978221453, iteration: 437128
loss: 0.9662758111953735,grad_norm: 0.783362884543947, iteration: 437129
loss: 0.9716674089431763,grad_norm: 0.7158924835724362, iteration: 437130
loss: 0.960696816444397,grad_norm: 0.7225202796927924, iteration: 437131
loss: 1.03447425365448,grad_norm: 0.9999995127952783, iteration: 437132
loss: 1.1085327863693237,grad_norm: 0.8727020659507444, iteration: 437133
loss: 1.0685847997665405,grad_norm: 0.9999990007888244, iteration: 437134
loss: 0.966623067855835,grad_norm: 0.7216762937101536, iteration: 437135
loss: 0.9376445412635803,grad_norm: 0.7434190253257589, iteration: 437136
loss: 1.0045217275619507,grad_norm: 0.8054078505055056, iteration: 437137
loss: 0.9929489493370056,grad_norm: 0.9999991056764057, iteration: 437138
loss: 1.0544562339782715,grad_norm: 0.999999730476076, iteration: 437139
loss: 1.0348992347717285,grad_norm: 0.9467442867001218, iteration: 437140
loss: 0.9947088956832886,grad_norm: 0.8850984663969653, iteration: 437141
loss: 0.9628730416297913,grad_norm: 0.8295731510649561, iteration: 437142
loss: 0.9972867369651794,grad_norm: 0.8134190977344965, iteration: 437143
loss: 1.0065029859542847,grad_norm: 0.7088651791362411, iteration: 437144
loss: 1.0816220045089722,grad_norm: 0.9999997609561567, iteration: 437145
loss: 1.0276453495025635,grad_norm: 0.9999990491493224, iteration: 437146
loss: 1.0315611362457275,grad_norm: 0.9999998063355104, iteration: 437147
loss: 0.9990588426589966,grad_norm: 0.7406724859148155, iteration: 437148
loss: 1.0261276960372925,grad_norm: 0.8839964534215841, iteration: 437149
loss: 0.9575528502464294,grad_norm: 0.6524576690385365, iteration: 437150
loss: 0.9989987015724182,grad_norm: 0.7490471572349992, iteration: 437151
loss: 0.9703156352043152,grad_norm: 0.8804379535131505, iteration: 437152
loss: 1.0265017747879028,grad_norm: 0.819036182293822, iteration: 437153
loss: 0.9720125794410706,grad_norm: 0.8029155653188932, iteration: 437154
loss: 1.001157283782959,grad_norm: 0.7740454472515994, iteration: 437155
loss: 0.9976108074188232,grad_norm: 0.8547644480849584, iteration: 437156
loss: 1.0054341554641724,grad_norm: 0.7912230741988545, iteration: 437157
loss: 0.983273446559906,grad_norm: 0.6322642150560575, iteration: 437158
loss: 1.1061550378799438,grad_norm: 0.9999993973343506, iteration: 437159
loss: 0.9999772906303406,grad_norm: 0.7147930847178816, iteration: 437160
loss: 0.9935044646263123,grad_norm: 0.999999105682387, iteration: 437161
loss: 0.9946867823600769,grad_norm: 0.8547422290162829, iteration: 437162
loss: 1.0181703567504883,grad_norm: 0.6906432616503135, iteration: 437163
loss: 1.0087900161743164,grad_norm: 0.8736692276567353, iteration: 437164
loss: 1.0453228950500488,grad_norm: 0.9655199239355493, iteration: 437165
loss: 1.0169658660888672,grad_norm: 0.9999993921496554, iteration: 437166
loss: 0.9894955158233643,grad_norm: 0.7621122443303028, iteration: 437167
loss: 0.9865766167640686,grad_norm: 0.8343827761052818, iteration: 437168
loss: 1.0223639011383057,grad_norm: 0.7950705209213227, iteration: 437169
loss: 1.025829792022705,grad_norm: 0.9045126107654776, iteration: 437170
loss: 1.0193957090377808,grad_norm: 0.6967290955038848, iteration: 437171
loss: 0.943138837814331,grad_norm: 0.7436548185223625, iteration: 437172
loss: 1.0503805875778198,grad_norm: 0.9999994934566986, iteration: 437173
loss: 1.0346908569335938,grad_norm: 0.823004364686301, iteration: 437174
loss: 1.0026112794876099,grad_norm: 0.8143318589200231, iteration: 437175
loss: 1.0592811107635498,grad_norm: 0.8312559004455363, iteration: 437176
loss: 1.0020450353622437,grad_norm: 0.8243293607347075, iteration: 437177
loss: 1.0228461027145386,grad_norm: 0.7648492366649331, iteration: 437178
loss: 0.9834581017494202,grad_norm: 0.760712057814366, iteration: 437179
loss: 0.9967553019523621,grad_norm: 0.7183241837569835, iteration: 437180
loss: 1.0048437118530273,grad_norm: 0.7580163898403344, iteration: 437181
loss: 1.022220253944397,grad_norm: 0.8227449197623343, iteration: 437182
loss: 0.9800975322723389,grad_norm: 0.8058016322307148, iteration: 437183
loss: 1.0474351644515991,grad_norm: 0.9016538648982633, iteration: 437184
loss: 1.0150701999664307,grad_norm: 0.9999991336345668, iteration: 437185
loss: 1.0010915994644165,grad_norm: 0.7431695118145021, iteration: 437186
loss: 0.968572199344635,grad_norm: 0.9999993566691096, iteration: 437187
loss: 0.9952507019042969,grad_norm: 0.8438938777123495, iteration: 437188
loss: 0.9891648292541504,grad_norm: 0.8245239474081213, iteration: 437189
loss: 0.9778199195861816,grad_norm: 0.7877998783904822, iteration: 437190
loss: 1.1618670225143433,grad_norm: 0.9999996390983894, iteration: 437191
loss: 0.9706201553344727,grad_norm: 0.912731567024374, iteration: 437192
loss: 0.9730458855628967,grad_norm: 0.7684196068043955, iteration: 437193
loss: 1.0352721214294434,grad_norm: 0.8760423046429598, iteration: 437194
loss: 0.9820324182510376,grad_norm: 0.8450774093741689, iteration: 437195
loss: 0.9923065900802612,grad_norm: 0.838564796115767, iteration: 437196
loss: 0.9615445733070374,grad_norm: 0.7514075428271679, iteration: 437197
loss: 1.0156056880950928,grad_norm: 0.7857246316356291, iteration: 437198
loss: 1.002941370010376,grad_norm: 0.6869849916489739, iteration: 437199
loss: 0.9921227097511292,grad_norm: 0.6627776060296883, iteration: 437200
loss: 1.0351810455322266,grad_norm: 1.00000004539147, iteration: 437201
loss: 1.0223207473754883,grad_norm: 0.6835760367705227, iteration: 437202
loss: 0.9846309423446655,grad_norm: 0.6769343187839345, iteration: 437203
loss: 1.018296480178833,grad_norm: 0.8463910034327201, iteration: 437204
loss: 1.040134072303772,grad_norm: 0.9999996874155598, iteration: 437205
loss: 0.9893330931663513,grad_norm: 0.9067740969499587, iteration: 437206
loss: 0.9602755904197693,grad_norm: 0.7275311667839254, iteration: 437207
loss: 1.021703839302063,grad_norm: 0.7539399320652844, iteration: 437208
loss: 0.9875860214233398,grad_norm: 0.963171278320846, iteration: 437209
loss: 0.9618948101997375,grad_norm: 0.9144944398462492, iteration: 437210
loss: 1.0045584440231323,grad_norm: 0.7665108555157714, iteration: 437211
loss: 0.9830217957496643,grad_norm: 0.7857829652864744, iteration: 437212
loss: 0.9941935539245605,grad_norm: 0.6013850504357112, iteration: 437213
loss: 1.007749080657959,grad_norm: 0.9999996157474939, iteration: 437214
loss: 1.0086212158203125,grad_norm: 0.8987467332774427, iteration: 437215
loss: 1.0072842836380005,grad_norm: 0.7637396471942441, iteration: 437216
loss: 1.0105432271957397,grad_norm: 0.7069619477078555, iteration: 437217
loss: 1.0023058652877808,grad_norm: 0.7176097433398987, iteration: 437218
loss: 0.9854158163070679,grad_norm: 0.9999990062967918, iteration: 437219
loss: 1.0210520029067993,grad_norm: 0.7765423790941535, iteration: 437220
loss: 0.9657241702079773,grad_norm: 0.8987548703320433, iteration: 437221
loss: 0.99669349193573,grad_norm: 0.8217311908700099, iteration: 437222
loss: 0.9821987748146057,grad_norm: 0.9906098270253182, iteration: 437223
loss: 0.9703054428100586,grad_norm: 0.8307665604231173, iteration: 437224
loss: 1.025698184967041,grad_norm: 0.8664064498589129, iteration: 437225
loss: 1.0979130268096924,grad_norm: 0.9999998570875934, iteration: 437226
loss: 1.1406017541885376,grad_norm: 0.9770823490585285, iteration: 437227
loss: 1.0570635795593262,grad_norm: 0.725602058032796, iteration: 437228
loss: 0.956564724445343,grad_norm: 0.780152182523618, iteration: 437229
loss: 0.993335485458374,grad_norm: 0.9259914078866475, iteration: 437230
loss: 1.0059648752212524,grad_norm: 0.9999990238107548, iteration: 437231
loss: 0.9618141651153564,grad_norm: 0.7413023477635743, iteration: 437232
loss: 0.9788230657577515,grad_norm: 0.7994296375942218, iteration: 437233
loss: 0.9683286547660828,grad_norm: 0.6532860183534593, iteration: 437234
loss: 1.0350964069366455,grad_norm: 0.7682057460531344, iteration: 437235
loss: 1.0080678462982178,grad_norm: 0.9999993436213968, iteration: 437236
loss: 0.9962524771690369,grad_norm: 0.7916228006986175, iteration: 437237
loss: 0.9939674139022827,grad_norm: 0.7662740266693778, iteration: 437238
loss: 0.9871528148651123,grad_norm: 0.7082498068231224, iteration: 437239
loss: 1.003162145614624,grad_norm: 0.9999998531148235, iteration: 437240
loss: 0.9824843406677246,grad_norm: 0.8102879964571266, iteration: 437241
loss: 0.9969590902328491,grad_norm: 0.708367572298564, iteration: 437242
loss: 1.0318920612335205,grad_norm: 0.6890543758386709, iteration: 437243
loss: 0.9496254324913025,grad_norm: 0.9999997045653924, iteration: 437244
loss: 1.0316963195800781,grad_norm: 0.7691173563760335, iteration: 437245
loss: 0.9904149770736694,grad_norm: 0.6588073613878247, iteration: 437246
loss: 0.9540630578994751,grad_norm: 0.7829110161712718, iteration: 437247
loss: 1.0006359815597534,grad_norm: 1.0000000550089245, iteration: 437248
loss: 1.009721040725708,grad_norm: 0.8443431915383883, iteration: 437249
loss: 1.0015515089035034,grad_norm: 0.6311909423635558, iteration: 437250
loss: 1.0099090337753296,grad_norm: 0.9999994664026632, iteration: 437251
loss: 1.012487530708313,grad_norm: 0.8545612380547767, iteration: 437252
loss: 1.0159423351287842,grad_norm: 0.912090092798595, iteration: 437253
loss: 0.9912144541740417,grad_norm: 0.8393456365985982, iteration: 437254
loss: 0.9636815190315247,grad_norm: 0.7304502905923163, iteration: 437255
loss: 1.0295714139938354,grad_norm: 0.8284589362902691, iteration: 437256
loss: 1.024634599685669,grad_norm: 0.8235269853912887, iteration: 437257
loss: 1.0051347017288208,grad_norm: 0.7534238127352945, iteration: 437258
loss: 0.9646438360214233,grad_norm: 0.8610568949768429, iteration: 437259
loss: 0.9996008276939392,grad_norm: 0.7138639523494227, iteration: 437260
loss: 0.9821526408195496,grad_norm: 0.8045972365520555, iteration: 437261
loss: 1.006154179573059,grad_norm: 0.7432719318340075, iteration: 437262
loss: 0.9877229928970337,grad_norm: 0.8650306070461015, iteration: 437263
loss: 1.0146770477294922,grad_norm: 0.8542932352043064, iteration: 437264
loss: 1.0684055089950562,grad_norm: 0.7646645042803722, iteration: 437265
loss: 1.0013192892074585,grad_norm: 0.8220559225486657, iteration: 437266
loss: 1.0171040296554565,grad_norm: 0.8820009328625164, iteration: 437267
loss: 1.0194494724273682,grad_norm: 0.7250949038595251, iteration: 437268
loss: 0.9984148740768433,grad_norm: 0.9899645665573091, iteration: 437269
loss: 1.2490400075912476,grad_norm: 0.9999997704001751, iteration: 437270
loss: 1.007200837135315,grad_norm: 0.8071957067599519, iteration: 437271
loss: 0.9356595277786255,grad_norm: 0.8134461728351474, iteration: 437272
loss: 1.0171847343444824,grad_norm: 0.928496202938092, iteration: 437273
loss: 0.97014319896698,grad_norm: 0.69441280237299, iteration: 437274
loss: 1.0029739141464233,grad_norm: 0.9528962154362232, iteration: 437275
loss: 1.0115423202514648,grad_norm: 0.6760119773460439, iteration: 437276
loss: 1.0197257995605469,grad_norm: 0.8101158403700377, iteration: 437277
loss: 1.0227655172348022,grad_norm: 0.7356281019324437, iteration: 437278
loss: 1.0001790523529053,grad_norm: 0.7479535230977776, iteration: 437279
loss: 1.0262328386306763,grad_norm: 0.7627917366100145, iteration: 437280
loss: 1.0085800886154175,grad_norm: 0.6298438445959712, iteration: 437281
loss: 0.9951210021972656,grad_norm: 0.7533745825732965, iteration: 437282
loss: 0.9796817898750305,grad_norm: 0.7681323608170041, iteration: 437283
loss: 1.056594967842102,grad_norm: 0.8705675329454747, iteration: 437284
loss: 0.9815222024917603,grad_norm: 0.7658991803017506, iteration: 437285
loss: 0.9903060793876648,grad_norm: 0.694996570913826, iteration: 437286
loss: 0.9945412874221802,grad_norm: 0.7310048804142762, iteration: 437287
loss: 1.0219043493270874,grad_norm: 0.8245707681934725, iteration: 437288
loss: 0.9761569499969482,grad_norm: 0.8321937578905928, iteration: 437289
loss: 1.0146178007125854,grad_norm: 0.6417030991495486, iteration: 437290
loss: 1.003177285194397,grad_norm: 0.7230040779583651, iteration: 437291
loss: 0.9792211651802063,grad_norm: 0.9999989693473886, iteration: 437292
loss: 0.985979437828064,grad_norm: 0.858625541392754, iteration: 437293
loss: 1.0036354064941406,grad_norm: 0.8807316867401994, iteration: 437294
loss: 1.0043014287948608,grad_norm: 0.7176758475556816, iteration: 437295
loss: 1.0195753574371338,grad_norm: 0.7518391691070406, iteration: 437296
loss: 1.0174180269241333,grad_norm: 0.8839172133609406, iteration: 437297
loss: 1.0011848211288452,grad_norm: 0.9190086746963091, iteration: 437298
loss: 1.1857364177703857,grad_norm: 0.9999999706407764, iteration: 437299
loss: 1.025667667388916,grad_norm: 0.9575093170342679, iteration: 437300
loss: 1.0371460914611816,grad_norm: 0.7597012266458527, iteration: 437301
loss: 1.0218157768249512,grad_norm: 0.9999992942624059, iteration: 437302
loss: 0.9781701564788818,grad_norm: 0.687673189561191, iteration: 437303
loss: 1.0542969703674316,grad_norm: 0.7409046681027576, iteration: 437304
loss: 1.0093709230422974,grad_norm: 0.702861116011963, iteration: 437305
loss: 0.9922251105308533,grad_norm: 0.8738404896670019, iteration: 437306
loss: 0.994108259677887,grad_norm: 0.815473646825805, iteration: 437307
loss: 0.9958733916282654,grad_norm: 0.7938449515330931, iteration: 437308
loss: 1.0932726860046387,grad_norm: 0.8163522899180231, iteration: 437309
loss: 1.0232393741607666,grad_norm: 0.8255730109419014, iteration: 437310
loss: 1.0236399173736572,grad_norm: 0.7656282108347822, iteration: 437311
loss: 1.0178885459899902,grad_norm: 0.7910504142217906, iteration: 437312
loss: 1.000847578048706,grad_norm: 0.8275746652803389, iteration: 437313
loss: 1.0340971946716309,grad_norm: 0.7372784499415086, iteration: 437314
loss: 1.0155882835388184,grad_norm: 0.9110784144867963, iteration: 437315
loss: 0.9950170516967773,grad_norm: 0.8304148075860737, iteration: 437316
loss: 1.001400351524353,grad_norm: 0.7935080134646793, iteration: 437317
loss: 0.9977304935455322,grad_norm: 0.6934068821161747, iteration: 437318
loss: 1.0273849964141846,grad_norm: 0.7913314333065105, iteration: 437319
loss: 1.0202348232269287,grad_norm: 0.7682788106395719, iteration: 437320
loss: 0.9804224371910095,grad_norm: 0.7235757377077867, iteration: 437321
loss: 1.0062801837921143,grad_norm: 0.7100578041010117, iteration: 437322
loss: 1.0590593814849854,grad_norm: 0.999999130356545, iteration: 437323
loss: 1.0218937397003174,grad_norm: 0.8379263367637038, iteration: 437324
loss: 1.0137099027633667,grad_norm: 0.6883375134234433, iteration: 437325
loss: 0.9453750252723694,grad_norm: 0.988640110431411, iteration: 437326
loss: 1.0096670389175415,grad_norm: 0.6959144088651094, iteration: 437327
loss: 1.0054875612258911,grad_norm: 0.9999999333840581, iteration: 437328
loss: 1.014159083366394,grad_norm: 0.6792801065951364, iteration: 437329
loss: 1.0878808498382568,grad_norm: 0.9999996704173458, iteration: 437330
loss: 1.020024299621582,grad_norm: 0.951765641038952, iteration: 437331
loss: 0.9851108193397522,grad_norm: 0.6061155136187059, iteration: 437332
loss: 0.9909176230430603,grad_norm: 0.6176714310255457, iteration: 437333
loss: 0.9488990902900696,grad_norm: 0.6738225802645214, iteration: 437334
loss: 1.00386381149292,grad_norm: 0.7445725639850355, iteration: 437335
loss: 0.9905145168304443,grad_norm: 0.8490852135123819, iteration: 437336
loss: 1.0342388153076172,grad_norm: 0.683717443463834, iteration: 437337
loss: 0.967030942440033,grad_norm: 0.7534669032396174, iteration: 437338
loss: 0.9739208817481995,grad_norm: 0.7670205061763006, iteration: 437339
loss: 0.990285336971283,grad_norm: 0.8634260572534022, iteration: 437340
loss: 1.0279943943023682,grad_norm: 0.734575988048594, iteration: 437341
loss: 1.048205852508545,grad_norm: 0.9965700910072285, iteration: 437342
loss: 0.9781036972999573,grad_norm: 0.7504610095044966, iteration: 437343
loss: 1.0228362083435059,grad_norm: 0.8679560641750644, iteration: 437344
loss: 0.9733760356903076,grad_norm: 0.8170761925425956, iteration: 437345
loss: 1.0025097131729126,grad_norm: 0.7617619293806764, iteration: 437346
loss: 0.9861674308776855,grad_norm: 0.8807460640554786, iteration: 437347
loss: 1.0193390846252441,grad_norm: 0.6729717405540995, iteration: 437348
loss: 1.0997258424758911,grad_norm: 0.9999992023459481, iteration: 437349
loss: 1.0046297311782837,grad_norm: 0.77331545517944, iteration: 437350
loss: 0.9856830835342407,grad_norm: 0.6251187005014737, iteration: 437351
loss: 0.9961753487586975,grad_norm: 0.9999989423484976, iteration: 437352
loss: 0.978882908821106,grad_norm: 0.8499256391274281, iteration: 437353
loss: 1.0412068367004395,grad_norm: 0.7616299588325746, iteration: 437354
loss: 1.0014634132385254,grad_norm: 0.7671059998995541, iteration: 437355
loss: 1.004337191581726,grad_norm: 0.6838300419230328, iteration: 437356
loss: 1.0215520858764648,grad_norm: 0.82050154569727, iteration: 437357
loss: 0.9507789611816406,grad_norm: 0.8584217603593037, iteration: 437358
loss: 1.0065232515335083,grad_norm: 0.8708635895155068, iteration: 437359
loss: 0.9878676533699036,grad_norm: 0.9999998292902061, iteration: 437360
loss: 1.0567541122436523,grad_norm: 0.8272758777902776, iteration: 437361
loss: 1.0446605682373047,grad_norm: 0.8305291206446308, iteration: 437362
loss: 0.9918610453605652,grad_norm: 0.7427592847849561, iteration: 437363
loss: 1.0178406238555908,grad_norm: 0.7605981614111069, iteration: 437364
loss: 0.9965435862541199,grad_norm: 0.7843373552597527, iteration: 437365
loss: 0.985913097858429,grad_norm: 0.8893428604914565, iteration: 437366
loss: 1.0090445280075073,grad_norm: 0.7819709943194033, iteration: 437367
loss: 1.0651538372039795,grad_norm: 0.9873774731179494, iteration: 437368
loss: 0.9851362705230713,grad_norm: 0.7351523656767635, iteration: 437369
loss: 0.9989403486251831,grad_norm: 0.9999550352175273, iteration: 437370
loss: 0.9900866150856018,grad_norm: 0.7153670316820561, iteration: 437371
loss: 1.0257329940795898,grad_norm: 0.7463193730034803, iteration: 437372
loss: 1.0015455484390259,grad_norm: 0.8086349857823631, iteration: 437373
loss: 0.9591807723045349,grad_norm: 0.8574702752097401, iteration: 437374
loss: 1.0444307327270508,grad_norm: 0.7734473164684591, iteration: 437375
loss: 1.0835959911346436,grad_norm: 0.9999995138743429, iteration: 437376
loss: 0.97759610414505,grad_norm: 0.6284539414365184, iteration: 437377
loss: 0.9875666499137878,grad_norm: 0.6423500867806727, iteration: 437378
loss: 1.0339958667755127,grad_norm: 0.858944103602518, iteration: 437379
loss: 0.9661061763763428,grad_norm: 0.7860061264774785, iteration: 437380
loss: 0.9982161521911621,grad_norm: 0.7516508966162279, iteration: 437381
loss: 0.9790447354316711,grad_norm: 0.7175990778167314, iteration: 437382
loss: 1.0030739307403564,grad_norm: 0.6370549206118458, iteration: 437383
loss: 1.039056658744812,grad_norm: 0.9116886484981002, iteration: 437384
loss: 0.9903354048728943,grad_norm: 0.874705641912997, iteration: 437385
loss: 0.9677866697311401,grad_norm: 0.6819017642150219, iteration: 437386
loss: 1.0305397510528564,grad_norm: 0.9073339485684541, iteration: 437387
loss: 1.0256069898605347,grad_norm: 0.8473128125978777, iteration: 437388
loss: 0.9975200295448303,grad_norm: 0.8376058493817006, iteration: 437389
loss: 1.0126328468322754,grad_norm: 0.856867018105253, iteration: 437390
loss: 0.9920293688774109,grad_norm: 0.931974264634632, iteration: 437391
loss: 1.024605631828308,grad_norm: 0.8164670884768279, iteration: 437392
loss: 1.0235986709594727,grad_norm: 0.5899447914623166, iteration: 437393
loss: 0.9856033325195312,grad_norm: 0.7642697525784836, iteration: 437394
loss: 0.9894329905509949,grad_norm: 0.8600483205441153, iteration: 437395
loss: 1.0748941898345947,grad_norm: 0.9999999803455905, iteration: 437396
loss: 0.9915769696235657,grad_norm: 0.805526260293256, iteration: 437397
loss: 1.011953592300415,grad_norm: 0.8277631646503623, iteration: 437398
loss: 0.9769421815872192,grad_norm: 0.8641736791005566, iteration: 437399
loss: 1.0812641382217407,grad_norm: 1.0000000023284685, iteration: 437400
loss: 1.0097951889038086,grad_norm: 0.8717210963130612, iteration: 437401
loss: 1.0033053159713745,grad_norm: 0.9156784878933478, iteration: 437402
loss: 0.9912341833114624,grad_norm: 0.7948348477631526, iteration: 437403
loss: 1.0089759826660156,grad_norm: 0.8198855324773089, iteration: 437404
loss: 1.0103497505187988,grad_norm: 0.7759846021685397, iteration: 437405
loss: 1.0169503688812256,grad_norm: 0.8701967471297168, iteration: 437406
loss: 1.0016065835952759,grad_norm: 0.9999991379483475, iteration: 437407
loss: 1.0147117376327515,grad_norm: 0.7170478970463435, iteration: 437408
loss: 1.08320951461792,grad_norm: 0.9999994283823506, iteration: 437409
loss: 0.9953233599662781,grad_norm: 0.6425817618219074, iteration: 437410
loss: 1.0125595331192017,grad_norm: 0.7913813450445477, iteration: 437411
loss: 0.9990777969360352,grad_norm: 0.8932069183443198, iteration: 437412
loss: 1.0106126070022583,grad_norm: 0.8308355281594346, iteration: 437413
loss: 0.937181830406189,grad_norm: 0.9107188950874007, iteration: 437414
loss: 0.9946013689041138,grad_norm: 0.8335580391140793, iteration: 437415
loss: 1.007305383682251,grad_norm: 0.8763838267779074, iteration: 437416
loss: 0.986567497253418,grad_norm: 0.7856795424462732, iteration: 437417
loss: 1.0424609184265137,grad_norm: 0.7960422775061443, iteration: 437418
loss: 1.0608445405960083,grad_norm: 0.8763636439597724, iteration: 437419
loss: 1.0390630960464478,grad_norm: 0.7093077390059351, iteration: 437420
loss: 1.0018479824066162,grad_norm: 0.9999996406328171, iteration: 437421
loss: 0.9869666695594788,grad_norm: 0.7566882532603675, iteration: 437422
loss: 0.9932855367660522,grad_norm: 0.9046866199182646, iteration: 437423
loss: 1.2066980600357056,grad_norm: 0.9999998989112586, iteration: 437424
loss: 1.029595971107483,grad_norm: 0.7821768233054999, iteration: 437425
loss: 1.0617036819458008,grad_norm: 0.8224264845782224, iteration: 437426
loss: 1.0026181936264038,grad_norm: 0.8212359363905435, iteration: 437427
loss: 0.9859426617622375,grad_norm: 0.6750309347905732, iteration: 437428
loss: 0.9922297596931458,grad_norm: 0.742452769727622, iteration: 437429
loss: 0.9930457472801208,grad_norm: 0.7272749802091715, iteration: 437430
loss: 1.021146535873413,grad_norm: 0.9604941654499015, iteration: 437431
loss: 0.9856355786323547,grad_norm: 0.7835733633738726, iteration: 437432
loss: 0.9913560748100281,grad_norm: 0.8141697209240333, iteration: 437433
loss: 1.0055651664733887,grad_norm: 0.999999135056145, iteration: 437434
loss: 1.0380890369415283,grad_norm: 0.9147336704883512, iteration: 437435
loss: 0.9983879327774048,grad_norm: 0.7404993333477219, iteration: 437436
loss: 0.952204704284668,grad_norm: 0.6532216589352472, iteration: 437437
loss: 1.0629279613494873,grad_norm: 0.9999991002688172, iteration: 437438
loss: 0.9706858992576599,grad_norm: 0.7152214108225521, iteration: 437439
loss: 1.0106525421142578,grad_norm: 0.9999989848800388, iteration: 437440
loss: 1.0155625343322754,grad_norm: 0.7376729342953561, iteration: 437441
loss: 0.980597198009491,grad_norm: 0.9132398410361692, iteration: 437442
loss: 1.0410257577896118,grad_norm: 0.9140477594304651, iteration: 437443
loss: 1.003339171409607,grad_norm: 0.7615847289432612, iteration: 437444
loss: 1.0155713558197021,grad_norm: 0.6754509080625005, iteration: 437445
loss: 0.966663658618927,grad_norm: 0.8587683038234749, iteration: 437446
loss: 1.010462999343872,grad_norm: 0.8022691484066852, iteration: 437447
loss: 0.9998864531517029,grad_norm: 0.7675092195866955, iteration: 437448
loss: 0.9471957683563232,grad_norm: 0.848968810609086, iteration: 437449
loss: 0.975432276725769,grad_norm: 0.9999990919659716, iteration: 437450
loss: 1.0198529958724976,grad_norm: 0.7709009771435972, iteration: 437451
loss: 0.9763073921203613,grad_norm: 0.9999997070423061, iteration: 437452
loss: 1.012496829032898,grad_norm: 1.00000006414191, iteration: 437453
loss: 0.990534245967865,grad_norm: 0.8880128142897497, iteration: 437454
loss: 0.9824718832969666,grad_norm: 0.7438630481442547, iteration: 437455
loss: 0.9935219883918762,grad_norm: 0.7784828461049256, iteration: 437456
loss: 0.9891065955162048,grad_norm: 0.7290879614403579, iteration: 437457
loss: 0.9718677401542664,grad_norm: 0.8003795263727925, iteration: 437458
loss: 1.0333006381988525,grad_norm: 0.7248893062196479, iteration: 437459
loss: 1.0238314867019653,grad_norm: 0.7834712857803025, iteration: 437460
loss: 0.9807683229446411,grad_norm: 0.6712871588286295, iteration: 437461
loss: 0.9891234636306763,grad_norm: 0.6936150233872759, iteration: 437462
loss: 1.0359426736831665,grad_norm: 0.8150108613025865, iteration: 437463
loss: 0.9782573580741882,grad_norm: 0.867769716589046, iteration: 437464
loss: 0.9925490021705627,grad_norm: 0.7907373852949005, iteration: 437465
loss: 1.0376092195510864,grad_norm: 0.8579393734640294, iteration: 437466
loss: 1.0119365453720093,grad_norm: 0.6697863387753078, iteration: 437467
loss: 0.997499406337738,grad_norm: 0.6511660376931128, iteration: 437468
loss: 1.0030533075332642,grad_norm: 0.9918879497672287, iteration: 437469
loss: 1.0285142660140991,grad_norm: 0.7598399554771162, iteration: 437470
loss: 0.9935147762298584,grad_norm: 0.8458554607809902, iteration: 437471
loss: 1.0093716382980347,grad_norm: 0.8237984163829888, iteration: 437472
loss: 0.9868077039718628,grad_norm: 0.7455030592690461, iteration: 437473
loss: 1.017242670059204,grad_norm: 0.9536821107803388, iteration: 437474
loss: 0.9775077104568481,grad_norm: 0.7915205678439807, iteration: 437475
loss: 1.127760410308838,grad_norm: 0.9999997576537133, iteration: 437476
loss: 0.960154116153717,grad_norm: 0.7785839622043531, iteration: 437477
loss: 0.9878496527671814,grad_norm: 0.7374485447658815, iteration: 437478
loss: 1.0143682956695557,grad_norm: 0.6024077196266824, iteration: 437479
loss: 1.0169601440429688,grad_norm: 0.6684433612658005, iteration: 437480
loss: 0.9960055351257324,grad_norm: 0.9122868851430653, iteration: 437481
loss: 1.0319876670837402,grad_norm: 0.7884973051270813, iteration: 437482
loss: 1.0453850030899048,grad_norm: 0.8898624993820717, iteration: 437483
loss: 1.0088850259780884,grad_norm: 0.805739126877917, iteration: 437484
loss: 1.034458875656128,grad_norm: 0.9207787241708302, iteration: 437485
loss: 0.9916642904281616,grad_norm: 0.8802131517114457, iteration: 437486
loss: 0.9726582765579224,grad_norm: 0.875883523071101, iteration: 437487
loss: 1.0157166719436646,grad_norm: 0.6576937671634405, iteration: 437488
loss: 0.9453603625297546,grad_norm: 0.7949306926404376, iteration: 437489
loss: 0.9754934310913086,grad_norm: 0.8569818663536934, iteration: 437490
loss: 0.9746462106704712,grad_norm: 0.8229684040025967, iteration: 437491
loss: 1.048484206199646,grad_norm: 0.7217131535157358, iteration: 437492
loss: 1.0163050889968872,grad_norm: 0.8954364794791675, iteration: 437493
loss: 1.0428494215011597,grad_norm: 0.7649562601113267, iteration: 437494
loss: 1.013800024986267,grad_norm: 0.9999999551687697, iteration: 437495
loss: 1.0082863569259644,grad_norm: 0.9320974884853969, iteration: 437496
loss: 0.9955864548683167,grad_norm: 0.8190440654701344, iteration: 437497
loss: 1.0087008476257324,grad_norm: 0.9204946199517899, iteration: 437498
loss: 0.9974614977836609,grad_norm: 0.7353964709031248, iteration: 437499
loss: 1.016278624534607,grad_norm: 0.7085665050375607, iteration: 437500
loss: 0.9905614852905273,grad_norm: 0.7295099766443125, iteration: 437501
loss: 1.022310733795166,grad_norm: 0.993281560648657, iteration: 437502
loss: 1.0034489631652832,grad_norm: 0.9999994740799466, iteration: 437503
loss: 0.9994223713874817,grad_norm: 0.7992486571217784, iteration: 437504
loss: 1.016597867012024,grad_norm: 0.9999990635947404, iteration: 437505
loss: 1.0327813625335693,grad_norm: 0.8437830660613638, iteration: 437506
loss: 1.0261789560317993,grad_norm: 0.6341012465710142, iteration: 437507
loss: 1.049049973487854,grad_norm: 0.6723637491078897, iteration: 437508
loss: 0.9926988482475281,grad_norm: 0.7663608025773261, iteration: 437509
loss: 1.0095171928405762,grad_norm: 0.7365653831196746, iteration: 437510
loss: 1.0123710632324219,grad_norm: 0.7861019440156486, iteration: 437511
loss: 0.9800549745559692,grad_norm: 0.7985912609451115, iteration: 437512
loss: 0.9968788623809814,grad_norm: 0.8830547972647717, iteration: 437513
loss: 0.9702088236808777,grad_norm: 0.6337307807598247, iteration: 437514
loss: 1.0187687873840332,grad_norm: 0.727143013808705, iteration: 437515
loss: 1.1008318662643433,grad_norm: 0.9999995410146576, iteration: 437516
loss: 0.9849847555160522,grad_norm: 0.9999990652942644, iteration: 437517
loss: 1.003745198249817,grad_norm: 0.7843374990027736, iteration: 437518
loss: 0.9933328032493591,grad_norm: 0.9000459387484446, iteration: 437519
loss: 1.037335753440857,grad_norm: 0.775694255932167, iteration: 437520
loss: 1.0131086111068726,grad_norm: 0.7548401233255165, iteration: 437521
loss: 0.9923725128173828,grad_norm: 0.7717045293555809, iteration: 437522
loss: 0.9842397570610046,grad_norm: 0.7077925245130012, iteration: 437523
loss: 1.0110526084899902,grad_norm: 0.9999998241016567, iteration: 437524
loss: 1.0293818712234497,grad_norm: 0.657546694127362, iteration: 437525
loss: 1.0253061056137085,grad_norm: 0.9999990186808453, iteration: 437526
loss: 1.0377610921859741,grad_norm: 0.9999996644986401, iteration: 437527
loss: 1.01933753490448,grad_norm: 0.8815544980629655, iteration: 437528
loss: 1.0664714574813843,grad_norm: 0.9999991693327738, iteration: 437529
loss: 1.0186179876327515,grad_norm: 0.8381645310345806, iteration: 437530
loss: 0.9892085790634155,grad_norm: 0.6964265047244448, iteration: 437531
loss: 1.0531682968139648,grad_norm: 0.7869185932148091, iteration: 437532
loss: 1.0037425756454468,grad_norm: 0.6512093254739464, iteration: 437533
loss: 1.085383653640747,grad_norm: 0.9999995385750643, iteration: 437534
loss: 1.021691918373108,grad_norm: 0.736085515139356, iteration: 437535
loss: 1.0082170963287354,grad_norm: 0.8415967275070323, iteration: 437536
loss: 1.042708158493042,grad_norm: 0.7660577372482835, iteration: 437537
loss: 0.9711101651191711,grad_norm: 0.7401499156259318, iteration: 437538
loss: 1.0022763013839722,grad_norm: 0.7926792259661073, iteration: 437539
loss: 1.066559076309204,grad_norm: 0.9999991468104965, iteration: 437540
loss: 1.077023983001709,grad_norm: 0.9999991386124105, iteration: 437541
loss: 1.0548410415649414,grad_norm: 0.8440607523341875, iteration: 437542
loss: 1.0898699760437012,grad_norm: 1.0000000806388412, iteration: 437543
loss: 1.0260975360870361,grad_norm: 0.8165798079477502, iteration: 437544
loss: 1.0250821113586426,grad_norm: 0.8697337615159899, iteration: 437545
loss: 0.9828880429267883,grad_norm: 0.8171528879604376, iteration: 437546
loss: 1.0071486234664917,grad_norm: 0.9999991462269586, iteration: 437547
loss: 1.0241037607192993,grad_norm: 0.9999998156394349, iteration: 437548
loss: 0.9933584332466125,grad_norm: 0.7329501784867221, iteration: 437549
loss: 1.0057374238967896,grad_norm: 0.7765790599422494, iteration: 437550
loss: 1.0111171007156372,grad_norm: 0.7288049455483432, iteration: 437551
loss: 0.9933747053146362,grad_norm: 0.7864268785610584, iteration: 437552
loss: 1.0105302333831787,grad_norm: 0.8001747900971103, iteration: 437553
loss: 0.9942163825035095,grad_norm: 0.8619100000757962, iteration: 437554
loss: 0.9806658625602722,grad_norm: 0.8638449365382914, iteration: 437555
loss: 1.0022861957550049,grad_norm: 0.835243847250594, iteration: 437556
loss: 1.0304081439971924,grad_norm: 0.7196172318247132, iteration: 437557
loss: 1.0037245750427246,grad_norm: 0.7262164811476988, iteration: 437558
loss: 1.0089197158813477,grad_norm: 0.7599172429874131, iteration: 437559
loss: 1.0362632274627686,grad_norm: 0.9157081666279238, iteration: 437560
loss: 1.0039844512939453,grad_norm: 0.7110584643838853, iteration: 437561
loss: 1.0198779106140137,grad_norm: 0.9001529220858999, iteration: 437562
loss: 1.028342604637146,grad_norm: 0.7045169083016258, iteration: 437563
loss: 1.040719747543335,grad_norm: 0.9366111625105938, iteration: 437564
loss: 1.0112264156341553,grad_norm: 0.8119022562240992, iteration: 437565
loss: 1.0412790775299072,grad_norm: 0.9999996620666542, iteration: 437566
loss: 0.9862303733825684,grad_norm: 0.8133730106498642, iteration: 437567
loss: 0.9742535352706909,grad_norm: 0.837399276661901, iteration: 437568
loss: 1.0250799655914307,grad_norm: 0.692378883319306, iteration: 437569
loss: 0.9840207099914551,grad_norm: 0.8686531662826967, iteration: 437570
loss: 0.9832541346549988,grad_norm: 0.7461098556137424, iteration: 437571
loss: 0.9988587498664856,grad_norm: 0.9365830206881632, iteration: 437572
loss: 0.9900692701339722,grad_norm: 0.7831222840936439, iteration: 437573
loss: 1.0057626962661743,grad_norm: 0.6844543945045908, iteration: 437574
loss: 1.003749132156372,grad_norm: 0.6323755576044189, iteration: 437575
loss: 1.042986512184143,grad_norm: 0.6616262563860027, iteration: 437576
loss: 0.9962392449378967,grad_norm: 0.8605934690784323, iteration: 437577
loss: 1.097324252128601,grad_norm: 0.9999999302672107, iteration: 437578
loss: 1.080342173576355,grad_norm: 0.9999990819731649, iteration: 437579
loss: 0.9693756103515625,grad_norm: 0.6952636450395873, iteration: 437580
loss: 1.0118765830993652,grad_norm: 0.9486152385733347, iteration: 437581
loss: 1.071889877319336,grad_norm: 0.9157786899959338, iteration: 437582
loss: 0.9923486113548279,grad_norm: 0.7244372620746691, iteration: 437583
loss: 0.9845698475837708,grad_norm: 0.7953840210519665, iteration: 437584
loss: 0.9928314089775085,grad_norm: 0.8624678883423782, iteration: 437585
loss: 0.9892165064811707,grad_norm: 0.8437337772243478, iteration: 437586
loss: 1.0159580707550049,grad_norm: 0.7349699669399811, iteration: 437587
loss: 0.9579054117202759,grad_norm: 0.9878805907501861, iteration: 437588
loss: 1.0738264322280884,grad_norm: 0.8375090704545947, iteration: 437589
loss: 0.9972829818725586,grad_norm: 0.8457646798260228, iteration: 437590
loss: 1.0440410375595093,grad_norm: 0.9999993591614897, iteration: 437591
loss: 0.9556037187576294,grad_norm: 0.8806932460014275, iteration: 437592
loss: 0.990047037601471,grad_norm: 0.6628900514270776, iteration: 437593
loss: 0.9860033392906189,grad_norm: 0.8060809244312894, iteration: 437594
loss: 1.067317008972168,grad_norm: 0.8015010708316274, iteration: 437595
loss: 1.0158424377441406,grad_norm: 0.6541053205705385, iteration: 437596
loss: 1.041025996208191,grad_norm: 0.8022708674169633, iteration: 437597
loss: 0.9891940355300903,grad_norm: 0.8253186935264138, iteration: 437598
loss: 1.0493309497833252,grad_norm: 0.7652672941407348, iteration: 437599
loss: 1.0056545734405518,grad_norm: 0.8059094803217405, iteration: 437600
loss: 0.9843858480453491,grad_norm: 0.8517314094709424, iteration: 437601
loss: 1.0033304691314697,grad_norm: 0.9402648730985543, iteration: 437602
loss: 1.069288969039917,grad_norm: 0.8657225203876757, iteration: 437603
loss: 1.0555105209350586,grad_norm: 0.9999991086158443, iteration: 437604
loss: 0.9797067046165466,grad_norm: 0.7954939787938013, iteration: 437605
loss: 0.9977709650993347,grad_norm: 0.6667322414547894, iteration: 437606
loss: 0.9804844260215759,grad_norm: 0.7899897028210864, iteration: 437607
loss: 1.0055408477783203,grad_norm: 0.8224832388385244, iteration: 437608
loss: 0.9604952335357666,grad_norm: 0.6745972870879519, iteration: 437609
loss: 1.0102829933166504,grad_norm: 0.8691923641995074, iteration: 437610
loss: 0.974532425403595,grad_norm: 0.8174308312903489, iteration: 437611
loss: 0.9812304973602295,grad_norm: 0.7345898163811546, iteration: 437612
loss: 0.9894486665725708,grad_norm: 0.7372014861550727, iteration: 437613
loss: 1.0000622272491455,grad_norm: 0.6267883425410655, iteration: 437614
loss: 0.9934932589530945,grad_norm: 0.7976816451166194, iteration: 437615
loss: 1.0302493572235107,grad_norm: 0.8729893907108908, iteration: 437616
loss: 1.0285496711730957,grad_norm: 0.7307869414589164, iteration: 437617
loss: 0.9435129761695862,grad_norm: 0.8450609652774986, iteration: 437618
loss: 1.0172619819641113,grad_norm: 0.7694247472289735, iteration: 437619
loss: 0.9724003672599792,grad_norm: 0.7122577288761102, iteration: 437620
loss: 0.9850476980209351,grad_norm: 0.8548000483842118, iteration: 437621
loss: 0.9858677983283997,grad_norm: 0.9771730760829962, iteration: 437622
loss: 0.9723837971687317,grad_norm: 0.7274040961310367, iteration: 437623
loss: 0.9598063826560974,grad_norm: 0.7644198040634982, iteration: 437624
loss: 0.993331789970398,grad_norm: 0.6951777319733764, iteration: 437625
loss: 0.9914343953132629,grad_norm: 0.6342650400814537, iteration: 437626
loss: 1.0267504453659058,grad_norm: 0.7828442382140257, iteration: 437627
loss: 0.999774158000946,grad_norm: 0.7339941071643523, iteration: 437628
loss: 0.9520882964134216,grad_norm: 0.834774878111664, iteration: 437629
loss: 0.9950709342956543,grad_norm: 0.7533879966039864, iteration: 437630
loss: 1.020493984222412,grad_norm: 0.7515317361704009, iteration: 437631
loss: 0.9877405762672424,grad_norm: 0.719393495630075, iteration: 437632
loss: 1.0191105604171753,grad_norm: 0.9999997515385527, iteration: 437633
loss: 0.9590935111045837,grad_norm: 0.8745430764811791, iteration: 437634
loss: 0.9720063805580139,grad_norm: 0.7368515854607237, iteration: 437635
loss: 1.0196956396102905,grad_norm: 0.6964065921676399, iteration: 437636
loss: 0.9969808459281921,grad_norm: 0.591196171437332, iteration: 437637
loss: 0.9845179915428162,grad_norm: 0.6182120958774218, iteration: 437638
loss: 1.0155680179595947,grad_norm: 0.8366591960374115, iteration: 437639
loss: 1.0165212154388428,grad_norm: 0.7192199086185215, iteration: 437640
loss: 1.0618637800216675,grad_norm: 0.7221668422084065, iteration: 437641
loss: 0.9738731980323792,grad_norm: 0.7500654824648224, iteration: 437642
loss: 1.0164128541946411,grad_norm: 0.7904293286712387, iteration: 437643
loss: 0.9906591773033142,grad_norm: 0.8190530720865214, iteration: 437644
loss: 0.9854809045791626,grad_norm: 0.789199824200687, iteration: 437645
loss: 1.0070818662643433,grad_norm: 0.9207697692628212, iteration: 437646
loss: 1.064659595489502,grad_norm: 0.8893964631990658, iteration: 437647
loss: 0.9691725373268127,grad_norm: 0.8286818349443702, iteration: 437648
loss: 0.9771105647087097,grad_norm: 0.6175553674917492, iteration: 437649
loss: 1.00625479221344,grad_norm: 0.8209239621263039, iteration: 437650
loss: 1.0052754878997803,grad_norm: 0.917208140897118, iteration: 437651
loss: 0.9932441711425781,grad_norm: 0.7641649512039758, iteration: 437652
loss: 1.0212074518203735,grad_norm: 0.9999998061201907, iteration: 437653
loss: 1.0033881664276123,grad_norm: 0.7355213968049414, iteration: 437654
loss: 1.0035405158996582,grad_norm: 0.9999998494041418, iteration: 437655
loss: 1.0032225847244263,grad_norm: 0.7546143077318167, iteration: 437656
loss: 0.9896033406257629,grad_norm: 0.6580307611661802, iteration: 437657
loss: 1.0063148736953735,grad_norm: 0.6647644120715652, iteration: 437658
loss: 1.0007508993148804,grad_norm: 0.7538748060347913, iteration: 437659
loss: 0.9606465697288513,grad_norm: 0.8432547290108799, iteration: 437660
loss: 1.0315154790878296,grad_norm: 0.841579828985002, iteration: 437661
loss: 0.9958096146583557,grad_norm: 0.6910458835958826, iteration: 437662
loss: 0.989351749420166,grad_norm: 0.9999999120998393, iteration: 437663
loss: 1.0452148914337158,grad_norm: 0.8406250490695433, iteration: 437664
loss: 1.0028778314590454,grad_norm: 0.8821548832538576, iteration: 437665
loss: 0.977415144443512,grad_norm: 0.8057066639897664, iteration: 437666
loss: 1.006948471069336,grad_norm: 0.81032041834502, iteration: 437667
loss: 1.0458561182022095,grad_norm: 0.6998126695236551, iteration: 437668
loss: 0.9982606768608093,grad_norm: 0.7654884967698793, iteration: 437669
loss: 1.0395877361297607,grad_norm: 0.6506090187033365, iteration: 437670
loss: 0.9873877167701721,grad_norm: 0.6931221715809625, iteration: 437671
loss: 0.9727370738983154,grad_norm: 0.800891601682368, iteration: 437672
loss: 0.9897092580795288,grad_norm: 0.7332917887634525, iteration: 437673
loss: 0.9924866557121277,grad_norm: 0.8579875590603292, iteration: 437674
loss: 1.0798314809799194,grad_norm: 0.9999998521787223, iteration: 437675
loss: 0.9817780256271362,grad_norm: 0.8230169358629817, iteration: 437676
loss: 0.9844800233840942,grad_norm: 0.834231297040926, iteration: 437677
loss: 1.0040819644927979,grad_norm: 0.6314822651231518, iteration: 437678
loss: 1.0283238887786865,grad_norm: 0.894116421952174, iteration: 437679
loss: 0.9598820209503174,grad_norm: 0.8978662843876625, iteration: 437680
loss: 0.9411470890045166,grad_norm: 0.7187093485122592, iteration: 437681
loss: 0.9792167544364929,grad_norm: 0.7938074373357141, iteration: 437682
loss: 0.9925280213356018,grad_norm: 0.777506060538318, iteration: 437683
loss: 0.976120114326477,grad_norm: 0.9157577967995529, iteration: 437684
loss: 0.9917788505554199,grad_norm: 0.7967938589846488, iteration: 437685
loss: 0.9800469875335693,grad_norm: 0.8067886811318017, iteration: 437686
loss: 0.9982986450195312,grad_norm: 0.6888828653706704, iteration: 437687
loss: 1.0488337278366089,grad_norm: 0.9267880800497116, iteration: 437688
loss: 1.0028795003890991,grad_norm: 0.7407296954864779, iteration: 437689
loss: 0.9716677665710449,grad_norm: 0.700945025335744, iteration: 437690
loss: 1.0942758321762085,grad_norm: 0.9999992106739723, iteration: 437691
loss: 1.0247793197631836,grad_norm: 0.6998842002484953, iteration: 437692
loss: 1.0202676057815552,grad_norm: 0.7449296293596065, iteration: 437693
loss: 0.9644273519515991,grad_norm: 0.815276245675255, iteration: 437694
loss: 0.9572739005088806,grad_norm: 0.7388730464799546, iteration: 437695
loss: 0.9861636757850647,grad_norm: 0.9040811135915287, iteration: 437696
loss: 0.9766407608985901,grad_norm: 0.7908834835286195, iteration: 437697
loss: 1.0533490180969238,grad_norm: 0.7746687392200563, iteration: 437698
loss: 0.9878383874893188,grad_norm: 0.6975898689629646, iteration: 437699
loss: 0.9759135842323303,grad_norm: 0.8331215821225686, iteration: 437700
loss: 0.9830716848373413,grad_norm: 0.6776633294207417, iteration: 437701
loss: 1.0343785285949707,grad_norm: 0.999999097780588, iteration: 437702
loss: 0.9658074975013733,grad_norm: 0.9019404962660907, iteration: 437703
loss: 1.0443370342254639,grad_norm: 0.9999998700119334, iteration: 437704
loss: 1.0121208429336548,grad_norm: 0.8752369532129649, iteration: 437705
loss: 0.9914224147796631,grad_norm: 0.6993855723291215, iteration: 437706
loss: 0.9841731786727905,grad_norm: 0.7599439765892917, iteration: 437707
loss: 0.9547863006591797,grad_norm: 0.8118873789250327, iteration: 437708
loss: 0.9693139791488647,grad_norm: 0.761927570538484, iteration: 437709
loss: 0.9796112179756165,grad_norm: 0.7492204795209142, iteration: 437710
loss: 1.0378811359405518,grad_norm: 0.7241277378272007, iteration: 437711
loss: 0.9513766169548035,grad_norm: 0.7286494098892468, iteration: 437712
loss: 1.0608112812042236,grad_norm: 0.8779990855465106, iteration: 437713
loss: 1.0378018617630005,grad_norm: 0.9999991386101593, iteration: 437714
loss: 1.0042496919631958,grad_norm: 0.6976534466890769, iteration: 437715
loss: 1.0160880088806152,grad_norm: 0.7045340491572168, iteration: 437716
loss: 1.0062435865402222,grad_norm: 0.715800649595814, iteration: 437717
loss: 1.0431222915649414,grad_norm: 0.8111191248670421, iteration: 437718
loss: 0.9818326234817505,grad_norm: 0.8687952702478307, iteration: 437719
loss: 1.0111278295516968,grad_norm: 0.7407801597101951, iteration: 437720
loss: 1.0733256340026855,grad_norm: 0.9999989791833679, iteration: 437721
loss: 1.0100164413452148,grad_norm: 0.9590522137116769, iteration: 437722
loss: 1.0075737237930298,grad_norm: 0.7268229126145831, iteration: 437723
loss: 1.0034832954406738,grad_norm: 0.8639123289525837, iteration: 437724
loss: 1.05976402759552,grad_norm: 0.9999992836692343, iteration: 437725
loss: 1.0478378534317017,grad_norm: 0.86426795852121, iteration: 437726
loss: 0.9972758293151855,grad_norm: 0.8663698611592213, iteration: 437727
loss: 1.1135716438293457,grad_norm: 0.9999996028324796, iteration: 437728
loss: 1.140470266342163,grad_norm: 0.9999989975307114, iteration: 437729
loss: 0.9808928370475769,grad_norm: 0.7228331276278935, iteration: 437730
loss: 0.9722371697425842,grad_norm: 0.8057403304259975, iteration: 437731
loss: 1.015525221824646,grad_norm: 0.8178437409882502, iteration: 437732
loss: 1.0019302368164062,grad_norm: 0.6928353431638032, iteration: 437733
loss: 0.9636129140853882,grad_norm: 0.7897555348082127, iteration: 437734
loss: 0.9957926869392395,grad_norm: 0.9272036826285689, iteration: 437735
loss: 0.9923079609870911,grad_norm: 0.999999711915968, iteration: 437736
loss: 1.012542963027954,grad_norm: 0.8508598020822982, iteration: 437737
loss: 1.0536319017410278,grad_norm: 0.7007751103589103, iteration: 437738
loss: 1.0275341272354126,grad_norm: 0.6969772902935657, iteration: 437739
loss: 1.0016322135925293,grad_norm: 0.8456033621937272, iteration: 437740
loss: 1.0895966291427612,grad_norm: 0.9999994653356801, iteration: 437741
loss: 0.983512818813324,grad_norm: 0.8699291975609205, iteration: 437742
loss: 0.9935145378112793,grad_norm: 0.7371463221282175, iteration: 437743
loss: 1.0998284816741943,grad_norm: 0.9999996152017919, iteration: 437744
loss: 0.9922692179679871,grad_norm: 0.9628017117020935, iteration: 437745
loss: 1.0036468505859375,grad_norm: 0.9467094147317613, iteration: 437746
loss: 1.0305869579315186,grad_norm: 0.846475200301869, iteration: 437747
loss: 1.0301936864852905,grad_norm: 0.9999992005868059, iteration: 437748
loss: 1.0993770360946655,grad_norm: 0.8652779798576228, iteration: 437749
loss: 1.0412611961364746,grad_norm: 0.9999998329297035, iteration: 437750
loss: 1.0232938528060913,grad_norm: 0.8509655312006332, iteration: 437751
loss: 1.0293996334075928,grad_norm: 0.8038065651169257, iteration: 437752
loss: 1.0011770725250244,grad_norm: 0.7620362001489555, iteration: 437753
loss: 0.96828293800354,grad_norm: 0.8521829486846181, iteration: 437754
loss: 1.0591192245483398,grad_norm: 0.9999996242708062, iteration: 437755
loss: 1.0359737873077393,grad_norm: 0.9999998071014453, iteration: 437756
loss: 1.017775535583496,grad_norm: 0.9085163595613919, iteration: 437757
loss: 0.9994223713874817,grad_norm: 0.8840883565166386, iteration: 437758
loss: 1.0508337020874023,grad_norm: 0.9999997643356682, iteration: 437759
loss: 1.0146334171295166,grad_norm: 0.7365430027844966, iteration: 437760
loss: 1.0880835056304932,grad_norm: 0.8994790505895216, iteration: 437761
loss: 1.0754029750823975,grad_norm: 0.9999995841859122, iteration: 437762
loss: 1.02495276927948,grad_norm: 0.7840433736959144, iteration: 437763
loss: 1.125008225440979,grad_norm: 1.0000000432656304, iteration: 437764
loss: 1.0058038234710693,grad_norm: 0.8185670507145699, iteration: 437765
loss: 1.1054356098175049,grad_norm: 0.999999775172596, iteration: 437766
loss: 0.992443323135376,grad_norm: 0.7469450910194781, iteration: 437767
loss: 1.1062028408050537,grad_norm: 1.0000000088829075, iteration: 437768
loss: 1.0725356340408325,grad_norm: 0.9999993375452761, iteration: 437769
loss: 1.0379359722137451,grad_norm: 0.9999991234487524, iteration: 437770
loss: 0.9980840682983398,grad_norm: 0.6926164454713306, iteration: 437771
loss: 0.9889070391654968,grad_norm: 0.8473668282574404, iteration: 437772
loss: 1.101967453956604,grad_norm: 0.9334206196177219, iteration: 437773
loss: 1.0899730920791626,grad_norm: 0.9999993853910074, iteration: 437774
loss: 1.1359524726867676,grad_norm: 0.9476331176872695, iteration: 437775
loss: 1.0664761066436768,grad_norm: 0.8607891970746201, iteration: 437776
loss: 1.0120291709899902,grad_norm: 0.999999111843972, iteration: 437777
loss: 1.079858422279358,grad_norm: 0.9999990783074978, iteration: 437778
loss: 0.9274412989616394,grad_norm: 0.8194520887686556, iteration: 437779
loss: 0.9974393844604492,grad_norm: 0.766238792451238, iteration: 437780
loss: 1.0115129947662354,grad_norm: 0.7874492414621227, iteration: 437781
loss: 1.0533947944641113,grad_norm: 0.9999991104964066, iteration: 437782
loss: 1.0945478677749634,grad_norm: 0.9999991299611812, iteration: 437783
loss: 1.0110429525375366,grad_norm: 0.8349436151379143, iteration: 437784
loss: 1.1176494359970093,grad_norm: 0.999999897655096, iteration: 437785
loss: 1.0099958181381226,grad_norm: 0.7707957263312687, iteration: 437786
loss: 1.04840886592865,grad_norm: 0.9411992875723029, iteration: 437787
loss: 1.0196934938430786,grad_norm: 0.788811879990289, iteration: 437788
loss: 1.0050926208496094,grad_norm: 0.9787698275056258, iteration: 437789
loss: 1.0172673463821411,grad_norm: 0.7189477711442229, iteration: 437790
loss: 1.0411536693572998,grad_norm: 0.9999990717581566, iteration: 437791
loss: 1.0283721685409546,grad_norm: 0.8032473624067933, iteration: 437792
loss: 0.9790051579475403,grad_norm: 0.7920503771615641, iteration: 437793
loss: 0.9944908618927002,grad_norm: 0.828625386029433, iteration: 437794
loss: 1.2004377841949463,grad_norm: 0.9999994075206199, iteration: 437795
loss: 1.027612328529358,grad_norm: 0.9716432080488338, iteration: 437796
loss: 1.0741130113601685,grad_norm: 0.999999101003372, iteration: 437797
loss: 1.083950400352478,grad_norm: 0.9999999963891928, iteration: 437798
loss: 0.9689850211143494,grad_norm: 0.7254093073722511, iteration: 437799
loss: 1.0081521272659302,grad_norm: 0.819076684494577, iteration: 437800
loss: 1.0283348560333252,grad_norm: 0.8312409409958316, iteration: 437801
loss: 1.008305311203003,grad_norm: 0.8625635415810655, iteration: 437802
loss: 1.0050662755966187,grad_norm: 0.8395404214213756, iteration: 437803
loss: 1.0950044393539429,grad_norm: 0.9999990893938955, iteration: 437804
loss: 1.0580930709838867,grad_norm: 0.9999991428736039, iteration: 437805
loss: 1.0200530290603638,grad_norm: 0.7438364608603583, iteration: 437806
loss: 1.199561595916748,grad_norm: 0.9999998052293846, iteration: 437807
loss: 1.0097730159759521,grad_norm: 0.8149517658738268, iteration: 437808
loss: 1.054905891418457,grad_norm: 0.9999991359489494, iteration: 437809
loss: 1.0734096765518188,grad_norm: 0.9009670574721712, iteration: 437810
loss: 1.0139161348342896,grad_norm: 0.8942147733428556, iteration: 437811
loss: 0.9923518896102905,grad_norm: 0.8413598901558038, iteration: 437812
loss: 1.0979782342910767,grad_norm: 0.9999994914817056, iteration: 437813
loss: 1.00630784034729,grad_norm: 0.7787596336831757, iteration: 437814
loss: 1.0189003944396973,grad_norm: 0.774900650104105, iteration: 437815
loss: 1.0136798620224,grad_norm: 0.862681249781178, iteration: 437816
loss: 0.9928860068321228,grad_norm: 0.7286762770717348, iteration: 437817
loss: 1.0028152465820312,grad_norm: 0.8775306543148053, iteration: 437818
loss: 1.0199891328811646,grad_norm: 0.9400196035969717, iteration: 437819
loss: 1.0343632698059082,grad_norm: 0.7802970998745037, iteration: 437820
loss: 0.9738420248031616,grad_norm: 0.8148911320942236, iteration: 437821
loss: 0.9828848838806152,grad_norm: 0.7323709247290269, iteration: 437822
loss: 0.9889699816703796,grad_norm: 0.8503881344792701, iteration: 437823
loss: 1.0039029121398926,grad_norm: 0.9438414088130229, iteration: 437824
loss: 0.9957066178321838,grad_norm: 0.7864751381642389, iteration: 437825
loss: 1.0102059841156006,grad_norm: 0.9349093827910173, iteration: 437826
loss: 0.9797206521034241,grad_norm: 0.9999990355457582, iteration: 437827
loss: 1.0112614631652832,grad_norm: 0.7666215197788813, iteration: 437828
loss: 1.0009210109710693,grad_norm: 0.7668724127806696, iteration: 437829
loss: 1.1094154119491577,grad_norm: 0.9463813472011392, iteration: 437830
loss: 0.9668077230453491,grad_norm: 0.7768891951537036, iteration: 437831
loss: 1.009390115737915,grad_norm: 0.6566645277929753, iteration: 437832
loss: 0.9793626666069031,grad_norm: 0.7590216653744953, iteration: 437833
loss: 1.0066226720809937,grad_norm: 0.774205465269974, iteration: 437834
loss: 0.9889700412750244,grad_norm: 0.9661926516679227, iteration: 437835
loss: 1.0428524017333984,grad_norm: 0.9044331385148295, iteration: 437836
loss: 1.0646339654922485,grad_norm: 0.8530985595683692, iteration: 437837
loss: 0.9712751507759094,grad_norm: 0.799874421457993, iteration: 437838
loss: 0.9761127233505249,grad_norm: 0.765987442016928, iteration: 437839
loss: 1.0327311754226685,grad_norm: 0.747254201013317, iteration: 437840
loss: 1.013202428817749,grad_norm: 0.8767342513208074, iteration: 437841
loss: 1.001240611076355,grad_norm: 0.7221543495075566, iteration: 437842
loss: 0.9729788899421692,grad_norm: 0.7311981768207896, iteration: 437843
loss: 1.0686100721359253,grad_norm: 0.7275826653641493, iteration: 437844
loss: 1.027078628540039,grad_norm: 0.9999991645097994, iteration: 437845
loss: 0.9456043243408203,grad_norm: 0.8017464017841087, iteration: 437846
loss: 1.0656648874282837,grad_norm: 0.8776734647306635, iteration: 437847
loss: 0.9976406097412109,grad_norm: 0.7936392928192507, iteration: 437848
loss: 1.013772964477539,grad_norm: 0.7413640740368389, iteration: 437849
loss: 1.0047523975372314,grad_norm: 0.7693077168183937, iteration: 437850
loss: 0.9910517930984497,grad_norm: 0.8772945887148667, iteration: 437851
loss: 1.0171290636062622,grad_norm: 0.6966954616723884, iteration: 437852
loss: 0.9833641648292542,grad_norm: 0.7164742194742074, iteration: 437853
loss: 1.0007926225662231,grad_norm: 0.8889398739946273, iteration: 437854
loss: 1.022422194480896,grad_norm: 0.9999990969670277, iteration: 437855
loss: 0.9712873101234436,grad_norm: 0.7823244507900703, iteration: 437856
loss: 1.0042991638183594,grad_norm: 0.7024527964701478, iteration: 437857
loss: 0.9925485849380493,grad_norm: 0.7038866866105519, iteration: 437858
loss: 0.9846398234367371,grad_norm: 0.8388312599524967, iteration: 437859
loss: 1.0464074611663818,grad_norm: 0.679740075748531, iteration: 437860
loss: 1.0633243322372437,grad_norm: 0.9999996344563484, iteration: 437861
loss: 1.0333778858184814,grad_norm: 0.6966856067487455, iteration: 437862
loss: 0.9852692484855652,grad_norm: 0.8866554750647946, iteration: 437863
loss: 1.0392711162567139,grad_norm: 0.7962000473030074, iteration: 437864
loss: 1.1017718315124512,grad_norm: 0.9999996394021098, iteration: 437865
loss: 1.0373722314834595,grad_norm: 0.6393512333839033, iteration: 437866
loss: 1.0180503129959106,grad_norm: 0.9999992520875959, iteration: 437867
loss: 0.9970813989639282,grad_norm: 0.76998586856466, iteration: 437868
loss: 1.0061038732528687,grad_norm: 0.6387891812405081, iteration: 437869
loss: 1.0066180229187012,grad_norm: 0.7624837255860646, iteration: 437870
loss: 0.9814127683639526,grad_norm: 0.8263443062926248, iteration: 437871
loss: 1.0489684343338013,grad_norm: 0.9254427351668585, iteration: 437872
loss: 1.0118159055709839,grad_norm: 0.8044746201284553, iteration: 437873
loss: 1.112410306930542,grad_norm: 0.833059129512297, iteration: 437874
loss: 1.0021225214004517,grad_norm: 0.7842276733610648, iteration: 437875
loss: 0.9904534220695496,grad_norm: 0.8359973734717113, iteration: 437876
loss: 1.0322750806808472,grad_norm: 0.7264538992181645, iteration: 437877
loss: 1.0638200044631958,grad_norm: 0.8807135244024543, iteration: 437878
loss: 0.980610728263855,grad_norm: 0.7192802986063135, iteration: 437879
loss: 1.0211496353149414,grad_norm: 0.8577621354683679, iteration: 437880
loss: 0.9891157150268555,grad_norm: 0.7070689508474423, iteration: 437881
loss: 1.031866192817688,grad_norm: 0.7843636343313795, iteration: 437882
loss: 1.0076165199279785,grad_norm: 0.6750835776069282, iteration: 437883
loss: 0.9804769158363342,grad_norm: 0.7945484029921486, iteration: 437884
loss: 1.011025071144104,grad_norm: 0.7454234365314578, iteration: 437885
loss: 1.058441162109375,grad_norm: 0.9999993627923182, iteration: 437886
loss: 1.0862712860107422,grad_norm: 0.8294518429102345, iteration: 437887
loss: 1.0095101594924927,grad_norm: 0.8339121782350152, iteration: 437888
loss: 1.0296956300735474,grad_norm: 0.7977113349000423, iteration: 437889
loss: 1.0107474327087402,grad_norm: 0.7691239445957626, iteration: 437890
loss: 0.9856696724891663,grad_norm: 0.757830451946704, iteration: 437891
loss: 0.9735362529754639,grad_norm: 0.8070863355883924, iteration: 437892
loss: 1.0500199794769287,grad_norm: 0.783232966568657, iteration: 437893
loss: 1.0410354137420654,grad_norm: 0.9999991489602794, iteration: 437894
loss: 1.0558820962905884,grad_norm: 0.9999991118300744, iteration: 437895
loss: 0.9937589168548584,grad_norm: 0.7245797719643211, iteration: 437896
loss: 0.9620378017425537,grad_norm: 0.7131588039894121, iteration: 437897
loss: 1.0738708972930908,grad_norm: 0.9999991064170812, iteration: 437898
loss: 0.9753819108009338,grad_norm: 0.7043981746373617, iteration: 437899
loss: 1.0067822933197021,grad_norm: 0.9999993232407199, iteration: 437900
loss: 1.1707899570465088,grad_norm: 0.9999997886670223, iteration: 437901
loss: 0.9985775351524353,grad_norm: 0.8869411463759413, iteration: 437902
loss: 1.0176712274551392,grad_norm: 0.9999990015812957, iteration: 437903
loss: 0.9940550327301025,grad_norm: 0.7095075616418757, iteration: 437904
loss: 1.0248197317123413,grad_norm: 0.9527244853635806, iteration: 437905
loss: 1.059131145477295,grad_norm: 0.9999993909493756, iteration: 437906
loss: 1.1252145767211914,grad_norm: 0.9999993159436158, iteration: 437907
loss: 1.0639721155166626,grad_norm: 0.9999994256045662, iteration: 437908
loss: 0.9681002497673035,grad_norm: 0.8278203365156573, iteration: 437909
loss: 1.0091056823730469,grad_norm: 0.8592247030946675, iteration: 437910
loss: 1.0509916543960571,grad_norm: 0.9446096305834679, iteration: 437911
loss: 0.9984879493713379,grad_norm: 0.8661098306678742, iteration: 437912
loss: 1.0018985271453857,grad_norm: 0.8049108634109534, iteration: 437913
loss: 1.0264075994491577,grad_norm: 0.9685535394654705, iteration: 437914
loss: 1.0896250009536743,grad_norm: 0.96656035086503, iteration: 437915
loss: 0.9980937838554382,grad_norm: 0.8882418069804553, iteration: 437916
loss: 1.1786134243011475,grad_norm: 0.9376598472835229, iteration: 437917
loss: 1.0152137279510498,grad_norm: 0.7890699481340181, iteration: 437918
loss: 0.9818628430366516,grad_norm: 0.9597223842246793, iteration: 437919
loss: 1.0657858848571777,grad_norm: 0.9999992108456213, iteration: 437920
loss: 1.0068788528442383,grad_norm: 0.9999990041998934, iteration: 437921
loss: 1.1218544244766235,grad_norm: 0.9999993430183334, iteration: 437922
loss: 1.0559678077697754,grad_norm: 0.9243966592207568, iteration: 437923
loss: 1.0314162969589233,grad_norm: 0.7939243574900365, iteration: 437924
loss: 1.0326646566390991,grad_norm: 0.9999996516814255, iteration: 437925
loss: 1.0357389450073242,grad_norm: 0.9999992918170012, iteration: 437926
loss: 0.9679297208786011,grad_norm: 0.8772623075171039, iteration: 437927
loss: 1.165234923362732,grad_norm: 0.999999743425662, iteration: 437928
loss: 1.1591558456420898,grad_norm: 0.9999991971315539, iteration: 437929
loss: 1.1100302934646606,grad_norm: 0.9999996329211229, iteration: 437930
loss: 1.004681944847107,grad_norm: 0.7926815421338218, iteration: 437931
loss: 1.0599974393844604,grad_norm: 0.9999992235607182, iteration: 437932
loss: 1.0295917987823486,grad_norm: 0.8339883001595036, iteration: 437933
loss: 1.0118643045425415,grad_norm: 0.8056977892604662, iteration: 437934
loss: 1.0513789653778076,grad_norm: 0.9999993307979871, iteration: 437935
loss: 1.033359169960022,grad_norm: 0.9999996911863683, iteration: 437936
loss: 1.0454013347625732,grad_norm: 0.9098420133330208, iteration: 437937
loss: 1.0914186239242554,grad_norm: 0.9271685328752532, iteration: 437938
loss: 1.0006444454193115,grad_norm: 0.7367731922047697, iteration: 437939
loss: 1.0021506547927856,grad_norm: 0.6877091263121764, iteration: 437940
loss: 1.0176005363464355,grad_norm: 0.8352014990326023, iteration: 437941
loss: 1.128340244293213,grad_norm: 0.9999991065521925, iteration: 437942
loss: 1.079006314277649,grad_norm: 0.9999999155658403, iteration: 437943
loss: 1.0441466569900513,grad_norm: 0.9999996952139135, iteration: 437944
loss: 1.0013058185577393,grad_norm: 0.7869689369526316, iteration: 437945
loss: 1.1285369396209717,grad_norm: 0.9999992559803881, iteration: 437946
loss: 1.0618125200271606,grad_norm: 0.9399140267755561, iteration: 437947
loss: 1.0328097343444824,grad_norm: 0.999999603728412, iteration: 437948
loss: 1.0752321481704712,grad_norm: 0.9999995537282446, iteration: 437949
loss: 0.9773398041725159,grad_norm: 0.7973306530030851, iteration: 437950
loss: 1.003507375717163,grad_norm: 0.9999994427002818, iteration: 437951
loss: 1.0222457647323608,grad_norm: 0.7014954177769726, iteration: 437952
loss: 1.0480053424835205,grad_norm: 0.9439604790964841, iteration: 437953
loss: 1.0080311298370361,grad_norm: 0.7615806476104943, iteration: 437954
loss: 0.9986811280250549,grad_norm: 0.748439071263085, iteration: 437955
loss: 1.0572794675827026,grad_norm: 0.965643101593935, iteration: 437956
loss: 1.0022821426391602,grad_norm: 0.8251330264579221, iteration: 437957
loss: 1.0226494073867798,grad_norm: 0.8269215267356779, iteration: 437958
loss: 1.0145740509033203,grad_norm: 0.7579780633001456, iteration: 437959
loss: 0.9864235520362854,grad_norm: 0.7151401464140837, iteration: 437960
loss: 1.0025897026062012,grad_norm: 0.7569447807528815, iteration: 437961
loss: 1.0090384483337402,grad_norm: 0.8780325517410639, iteration: 437962
loss: 1.0271681547164917,grad_norm: 0.8579863937355192, iteration: 437963
loss: 1.043870210647583,grad_norm: 0.7870798659883653, iteration: 437964
loss: 0.9771642088890076,grad_norm: 0.8107740050652206, iteration: 437965
loss: 1.0183521509170532,grad_norm: 0.6386292800446544, iteration: 437966
loss: 0.9703941345214844,grad_norm: 0.9999998722360173, iteration: 437967
loss: 1.3024861812591553,grad_norm: 0.9999997803404409, iteration: 437968
loss: 1.063615083694458,grad_norm: 0.9999995122812467, iteration: 437969
loss: 1.0693085193634033,grad_norm: 0.970828437736368, iteration: 437970
loss: 0.9886744022369385,grad_norm: 0.8429874842938152, iteration: 437971
loss: 1.0809803009033203,grad_norm: 0.9451521867655847, iteration: 437972
loss: 1.0567413568496704,grad_norm: 0.8472545712290374, iteration: 437973
loss: 1.061005711555481,grad_norm: 0.9267360006737331, iteration: 437974
loss: 1.0561816692352295,grad_norm: 0.9999995426204911, iteration: 437975
loss: 1.0546435117721558,grad_norm: 0.9605962764447611, iteration: 437976
loss: 1.0050114393234253,grad_norm: 0.7270070809480144, iteration: 437977
loss: 1.021600365638733,grad_norm: 0.8146991947984636, iteration: 437978
loss: 1.0202805995941162,grad_norm: 0.7767701497342706, iteration: 437979
loss: 0.9939233660697937,grad_norm: 0.7451413610182772, iteration: 437980
loss: 1.030584692955017,grad_norm: 0.7541580668236136, iteration: 437981
loss: 0.9523730874061584,grad_norm: 0.916798536456563, iteration: 437982
loss: 1.0044504404067993,grad_norm: 0.7557929134816783, iteration: 437983
loss: 1.0078699588775635,grad_norm: 0.7911657062490952, iteration: 437984
loss: 0.9982918500900269,grad_norm: 0.9829914293109796, iteration: 437985
loss: 1.1571060419082642,grad_norm: 0.9999998711973137, iteration: 437986
loss: 1.147680401802063,grad_norm: 0.9999994164731605, iteration: 437987
loss: 1.2348986864089966,grad_norm: 0.9999998186882457, iteration: 437988
loss: 1.017246127128601,grad_norm: 0.9380745807814922, iteration: 437989
loss: 1.0939394235610962,grad_norm: 0.9999998212982661, iteration: 437990
loss: 1.0298807621002197,grad_norm: 0.9999990965289323, iteration: 437991
loss: 1.0297977924346924,grad_norm: 0.9999990072536702, iteration: 437992
loss: 1.102833867073059,grad_norm: 0.9999995075031588, iteration: 437993
loss: 1.0134731531143188,grad_norm: 0.8080132456622416, iteration: 437994
loss: 1.005698323249817,grad_norm: 0.999999332012136, iteration: 437995
loss: 0.9757593274116516,grad_norm: 0.8146817912462514, iteration: 437996
loss: 1.0156266689300537,grad_norm: 0.8609560048306826, iteration: 437997
loss: 1.0190660953521729,grad_norm: 0.945406430559486, iteration: 437998
loss: 1.006248116493225,grad_norm: 0.7487193883859582, iteration: 437999
loss: 1.0152058601379395,grad_norm: 0.8430876195813048, iteration: 438000
loss: 0.9991531372070312,grad_norm: 0.8249853843518954, iteration: 438001
loss: 1.0123997926712036,grad_norm: 0.9999999090913712, iteration: 438002
loss: 1.008900761604309,grad_norm: 0.9400462862360245, iteration: 438003
loss: 1.0119671821594238,grad_norm: 0.7492262857154266, iteration: 438004
loss: 0.9895896315574646,grad_norm: 0.8283538201933981, iteration: 438005
loss: 0.9959222674369812,grad_norm: 0.717480842210713, iteration: 438006
loss: 1.0130047798156738,grad_norm: 0.892577748712487, iteration: 438007
loss: 1.0337320566177368,grad_norm: 0.9999998479322969, iteration: 438008
loss: 1.0097476243972778,grad_norm: 0.6400759058048765, iteration: 438009
loss: 1.0372332334518433,grad_norm: 0.818531587061738, iteration: 438010
loss: 1.032585620880127,grad_norm: 0.7734222497528551, iteration: 438011
loss: 1.0607519149780273,grad_norm: 0.999999057364161, iteration: 438012
loss: 0.987630307674408,grad_norm: 0.8981044180727178, iteration: 438013
loss: 1.047650694847107,grad_norm: 0.771175703392394, iteration: 438014
loss: 1.0281603336334229,grad_norm: 0.9999991998855504, iteration: 438015
loss: 1.0232868194580078,grad_norm: 0.9147830075782728, iteration: 438016
loss: 1.0875777006149292,grad_norm: 0.9036813519447476, iteration: 438017
loss: 0.9942317605018616,grad_norm: 0.8774887305405209, iteration: 438018
loss: 1.0292150974273682,grad_norm: 0.8061489366854344, iteration: 438019
loss: 1.0085574388504028,grad_norm: 0.9999997453010848, iteration: 438020
loss: 0.9896716475486755,grad_norm: 0.8128711090914396, iteration: 438021
loss: 0.9640082716941833,grad_norm: 0.8063269062934749, iteration: 438022
loss: 1.028876543045044,grad_norm: 0.9999991754014014, iteration: 438023
loss: 0.9936992526054382,grad_norm: 0.8905111136320746, iteration: 438024
loss: 0.995274543762207,grad_norm: 0.8401310696411347, iteration: 438025
loss: 1.0248687267303467,grad_norm: 0.6885449437237284, iteration: 438026
loss: 1.0137653350830078,grad_norm: 0.6533767110584023, iteration: 438027
loss: 1.040621042251587,grad_norm: 0.828377946159669, iteration: 438028
loss: 0.9984795451164246,grad_norm: 0.8653626908301908, iteration: 438029
loss: 1.1947078704833984,grad_norm: 0.9999991085338161, iteration: 438030
loss: 0.9807953834533691,grad_norm: 0.9999994922646797, iteration: 438031
loss: 1.0230121612548828,grad_norm: 0.759636279218964, iteration: 438032
loss: 0.9920839071273804,grad_norm: 0.74671735021336, iteration: 438033
loss: 1.0418498516082764,grad_norm: 0.9107668278802694, iteration: 438034
loss: 0.9924034476280212,grad_norm: 0.7687601376712151, iteration: 438035
loss: 1.0735584497451782,grad_norm: 0.999999844729596, iteration: 438036
loss: 1.0220836400985718,grad_norm: 0.8062888383668271, iteration: 438037
loss: 1.029659628868103,grad_norm: 0.9999994323993227, iteration: 438038
loss: 0.9949981570243835,grad_norm: 0.7711611334784597, iteration: 438039
loss: 1.0134364366531372,grad_norm: 0.8240263608312461, iteration: 438040
loss: 0.974754810333252,grad_norm: 0.8866036697243572, iteration: 438041
loss: 1.1548643112182617,grad_norm: 0.999999597088265, iteration: 438042
loss: 1.068336844444275,grad_norm: 0.999999875376306, iteration: 438043
loss: 1.0215508937835693,grad_norm: 0.9999993738811395, iteration: 438044
loss: 1.0370091199874878,grad_norm: 0.9999992252258307, iteration: 438045
loss: 1.0920602083206177,grad_norm: 0.9999990760048569, iteration: 438046
loss: 1.0245351791381836,grad_norm: 0.8252317462355273, iteration: 438047
loss: 0.9852992296218872,grad_norm: 0.7876847568130539, iteration: 438048
loss: 0.9986175894737244,grad_norm: 0.8614899581680451, iteration: 438049
loss: 1.0348196029663086,grad_norm: 0.8591094656349153, iteration: 438050
loss: 1.092610239982605,grad_norm: 0.9999990484181278, iteration: 438051
loss: 0.9917847514152527,grad_norm: 0.8783845249644325, iteration: 438052
loss: 1.0129153728485107,grad_norm: 0.90890740598898, iteration: 438053
loss: 1.021124005317688,grad_norm: 0.999999124567716, iteration: 438054
loss: 1.0560470819473267,grad_norm: 0.9999995733420254, iteration: 438055
loss: 1.007051706314087,grad_norm: 0.8067422414499646, iteration: 438056
loss: 1.1131283044815063,grad_norm: 0.9999990289054642, iteration: 438057
loss: 1.0256887674331665,grad_norm: 0.7212166682734233, iteration: 438058
loss: 1.0341187715530396,grad_norm: 0.7792247903354357, iteration: 438059
loss: 1.0561480522155762,grad_norm: 0.9999995962310423, iteration: 438060
loss: 1.0088462829589844,grad_norm: 0.9074220864234461, iteration: 438061
loss: 1.0051263570785522,grad_norm: 0.7474447557596624, iteration: 438062
loss: 1.045978307723999,grad_norm: 0.9999992613228661, iteration: 438063
loss: 1.0183182954788208,grad_norm: 0.9999993138698707, iteration: 438064
loss: 0.9846732020378113,grad_norm: 0.661663230863858, iteration: 438065
loss: 1.0573521852493286,grad_norm: 0.9999990966330798, iteration: 438066
loss: 1.0971986055374146,grad_norm: 0.9999993261889789, iteration: 438067
loss: 1.155111312866211,grad_norm: 0.9999999295679856, iteration: 438068
loss: 1.0569509267807007,grad_norm: 0.9999997183519426, iteration: 438069
loss: 1.0421171188354492,grad_norm: 0.9999999328942019, iteration: 438070
loss: 1.1840561628341675,grad_norm: 0.9999996886202558, iteration: 438071
loss: 1.0775847434997559,grad_norm: 0.9999993678614323, iteration: 438072
loss: 1.0029664039611816,grad_norm: 0.9999995376389039, iteration: 438073
loss: 0.9632552862167358,grad_norm: 0.9999998940058406, iteration: 438074
loss: 0.9832770228385925,grad_norm: 0.7684081421737645, iteration: 438075
loss: 1.0014894008636475,grad_norm: 0.8858674156522125, iteration: 438076
loss: 1.0078063011169434,grad_norm: 0.7109530395898485, iteration: 438077
loss: 0.988665759563446,grad_norm: 0.9151352725661112, iteration: 438078
loss: 1.0287319421768188,grad_norm: 0.9999991192971246, iteration: 438079
loss: 0.9883333444595337,grad_norm: 0.9999991492085263, iteration: 438080
loss: 1.0047025680541992,grad_norm: 0.7483238871135233, iteration: 438081
loss: 0.9930599927902222,grad_norm: 0.9481488730557389, iteration: 438082
loss: 1.0210379362106323,grad_norm: 0.6710466447530035, iteration: 438083
loss: 0.9823736548423767,grad_norm: 0.865211525519854, iteration: 438084
loss: 1.0279327630996704,grad_norm: 0.8038104911250437, iteration: 438085
loss: 0.981783390045166,grad_norm: 0.8901953146718408, iteration: 438086
loss: 0.9724960327148438,grad_norm: 0.9582844557124646, iteration: 438087
loss: 1.0519447326660156,grad_norm: 0.9999994110562919, iteration: 438088
loss: 1.0106359720230103,grad_norm: 0.7787693748544251, iteration: 438089
loss: 0.9942182898521423,grad_norm: 0.709394769242951, iteration: 438090
loss: 1.0915789604187012,grad_norm: 0.9999991914617895, iteration: 438091
loss: 1.0097641944885254,grad_norm: 0.7636240023042884, iteration: 438092
loss: 1.04324209690094,grad_norm: 0.999999075148136, iteration: 438093
loss: 0.9853608012199402,grad_norm: 0.8159233707654913, iteration: 438094
loss: 1.1566122770309448,grad_norm: 0.9999993630460647, iteration: 438095
loss: 1.021880030632019,grad_norm: 0.7181015731410741, iteration: 438096
loss: 1.0382887125015259,grad_norm: 0.9999994271926781, iteration: 438097
loss: 1.0038728713989258,grad_norm: 0.7512779864965243, iteration: 438098
loss: 1.0894633531570435,grad_norm: 0.915978338405113, iteration: 438099
loss: 1.007125973701477,grad_norm: 0.8179941556852532, iteration: 438100
loss: 1.0306432247161865,grad_norm: 0.8282894536061755, iteration: 438101
loss: 0.9825655817985535,grad_norm: 0.9170670908070119, iteration: 438102
loss: 0.9672334790229797,grad_norm: 0.9296700621911963, iteration: 438103
loss: 1.0512713193893433,grad_norm: 0.7786880199417026, iteration: 438104
loss: 1.0476089715957642,grad_norm: 0.9999991724679931, iteration: 438105
loss: 1.0060158967971802,grad_norm: 0.7895716231024471, iteration: 438106
loss: 1.0012354850769043,grad_norm: 0.9999990809861711, iteration: 438107
loss: 1.0324667692184448,grad_norm: 0.8380267972960971, iteration: 438108
loss: 1.073099136352539,grad_norm: 0.8198294944300459, iteration: 438109
loss: 1.0067346096038818,grad_norm: 0.7932946708499826, iteration: 438110
loss: 1.0409581661224365,grad_norm: 0.8845439742488053, iteration: 438111
loss: 1.0000295639038086,grad_norm: 0.760793474065298, iteration: 438112
loss: 0.9850888252258301,grad_norm: 0.6936100267945005, iteration: 438113
loss: 0.9848884344100952,grad_norm: 0.8953830576356844, iteration: 438114
loss: 1.0220757722854614,grad_norm: 0.7465564205692686, iteration: 438115
loss: 1.0570367574691772,grad_norm: 0.9999994107277135, iteration: 438116
loss: 0.9859777092933655,grad_norm: 0.9178202800249978, iteration: 438117
loss: 0.9925618171691895,grad_norm: 0.7106254292902875, iteration: 438118
loss: 0.9887293577194214,grad_norm: 0.7624553703771387, iteration: 438119
loss: 1.0267603397369385,grad_norm: 0.8690305801004672, iteration: 438120
loss: 1.0011544227600098,grad_norm: 0.7586765110281974, iteration: 438121
loss: 1.0031906366348267,grad_norm: 0.6747052904192672, iteration: 438122
loss: 1.0009410381317139,grad_norm: 0.8218603247962712, iteration: 438123
loss: 1.0062731504440308,grad_norm: 0.9724875985408368, iteration: 438124
loss: 1.0119365453720093,grad_norm: 0.761325006118396, iteration: 438125
loss: 1.0062589645385742,grad_norm: 0.7974233244187374, iteration: 438126
loss: 0.9897180199623108,grad_norm: 0.9279179206573847, iteration: 438127
loss: 1.0370548963546753,grad_norm: 0.9999989709970968, iteration: 438128
loss: 0.9744563102722168,grad_norm: 0.773644578652265, iteration: 438129
loss: 0.9943404197692871,grad_norm: 0.6798763528185858, iteration: 438130
loss: 1.052685260772705,grad_norm: 0.716704761643841, iteration: 438131
loss: 1.0020383596420288,grad_norm: 0.9999997188574937, iteration: 438132
loss: 1.0641894340515137,grad_norm: 0.9999992405028185, iteration: 438133
loss: 0.9918550848960876,grad_norm: 0.8674265322243214, iteration: 438134
loss: 0.988862156867981,grad_norm: 0.7767062878282414, iteration: 438135
loss: 0.9941606521606445,grad_norm: 0.7622704041844595, iteration: 438136
loss: 0.9942595958709717,grad_norm: 0.7564197795832327, iteration: 438137
loss: 1.0287902355194092,grad_norm: 0.9999993083828985, iteration: 438138
loss: 0.9998673796653748,grad_norm: 0.7708317821900235, iteration: 438139
loss: 1.024559736251831,grad_norm: 0.8566132511328196, iteration: 438140
loss: 1.0204910039901733,grad_norm: 0.9999991062454382, iteration: 438141
loss: 1.004302978515625,grad_norm: 0.7770585268407656, iteration: 438142
loss: 1.0103389024734497,grad_norm: 0.6207965005294412, iteration: 438143
loss: 1.0115431547164917,grad_norm: 0.815045217212715, iteration: 438144
loss: 1.0330145359039307,grad_norm: 0.7632006782129372, iteration: 438145
loss: 1.0024232864379883,grad_norm: 0.7142344401451083, iteration: 438146
loss: 0.9986076354980469,grad_norm: 0.939394460227822, iteration: 438147
loss: 1.0254188776016235,grad_norm: 0.8165131808251004, iteration: 438148
loss: 1.0216443538665771,grad_norm: 0.6688281242176651, iteration: 438149
loss: 1.0543580055236816,grad_norm: 0.9999992701765318, iteration: 438150
loss: 1.0452892780303955,grad_norm: 0.8950617891902893, iteration: 438151
loss: 0.9673410654067993,grad_norm: 0.8121410461080024, iteration: 438152
loss: 1.000220537185669,grad_norm: 0.7889947514653104, iteration: 438153
loss: 1.0173829793930054,grad_norm: 0.9999992276615634, iteration: 438154
loss: 1.013852596282959,grad_norm: 0.6941259362615818, iteration: 438155
loss: 1.029291033744812,grad_norm: 0.8689643288103084, iteration: 438156
loss: 1.0117599964141846,grad_norm: 0.9999998544763179, iteration: 438157
loss: 1.052726149559021,grad_norm: 0.8110695586189035, iteration: 438158
loss: 0.9638533592224121,grad_norm: 0.7443019020536159, iteration: 438159
loss: 1.024010419845581,grad_norm: 0.8254526774239319, iteration: 438160
loss: 0.9915556907653809,grad_norm: 0.6503730294396675, iteration: 438161
loss: 0.996632993221283,grad_norm: 0.9999993968130293, iteration: 438162
loss: 1.0321147441864014,grad_norm: 0.9848379462211488, iteration: 438163
loss: 1.013606071472168,grad_norm: 0.9388789443188912, iteration: 438164
loss: 1.03029203414917,grad_norm: 0.8738871876521478, iteration: 438165
loss: 1.021878719329834,grad_norm: 0.7677490351184385, iteration: 438166
loss: 1.038240671157837,grad_norm: 0.9002030322651392, iteration: 438167
loss: 0.9887752532958984,grad_norm: 0.9999998438339657, iteration: 438168
loss: 1.00227952003479,grad_norm: 0.7964585251283451, iteration: 438169
loss: 1.0030354261398315,grad_norm: 0.8762620502715398, iteration: 438170
loss: 1.0286824703216553,grad_norm: 0.9614616844897829, iteration: 438171
loss: 1.0694116353988647,grad_norm: 0.9999998380821673, iteration: 438172
loss: 1.0058048963546753,grad_norm: 0.7421608076677619, iteration: 438173
loss: 1.0066014528274536,grad_norm: 0.6325989149437664, iteration: 438174
loss: 1.029349684715271,grad_norm: 0.9294297039899515, iteration: 438175
loss: 1.0360126495361328,grad_norm: 0.9999991347344315, iteration: 438176
loss: 1.0687811374664307,grad_norm: 0.9999998000715912, iteration: 438177
loss: 0.9840829968452454,grad_norm: 0.95560381163685, iteration: 438178
loss: 1.0036851167678833,grad_norm: 0.698582653877778, iteration: 438179
loss: 1.0014203786849976,grad_norm: 0.80975981135465, iteration: 438180
loss: 0.9837512969970703,grad_norm: 0.6623257083426765, iteration: 438181
loss: 1.16737961769104,grad_norm: 0.9999998500755031, iteration: 438182
loss: 0.9986757040023804,grad_norm: 0.8381485846779642, iteration: 438183
loss: 0.9985238313674927,grad_norm: 0.8266855734044952, iteration: 438184
loss: 1.040022611618042,grad_norm: 0.9999994558458326, iteration: 438185
loss: 1.0033073425292969,grad_norm: 0.7299617012935549, iteration: 438186
loss: 1.0089253187179565,grad_norm: 0.793074126168734, iteration: 438187
loss: 1.0115694999694824,grad_norm: 0.8326097873425199, iteration: 438188
loss: 1.0170915126800537,grad_norm: 0.8930281051115113, iteration: 438189
loss: 1.0287854671478271,grad_norm: 0.7103151999814862, iteration: 438190
loss: 1.0131224393844604,grad_norm: 0.9023559338636002, iteration: 438191
loss: 1.0262541770935059,grad_norm: 0.7847555667409788, iteration: 438192
loss: 1.0114526748657227,grad_norm: 0.844816872720266, iteration: 438193
loss: 1.0329070091247559,grad_norm: 0.6807244995443624, iteration: 438194
loss: 0.977169394493103,grad_norm: 0.7485119661900449, iteration: 438195
loss: 1.0153326988220215,grad_norm: 0.8711810546474812, iteration: 438196
loss: 0.9955016374588013,grad_norm: 0.9945592143809875, iteration: 438197
loss: 1.0094387531280518,grad_norm: 0.7684425705048701, iteration: 438198
loss: 0.9712600708007812,grad_norm: 0.910328542304226, iteration: 438199
loss: 1.021038293838501,grad_norm: 0.9999997406135943, iteration: 438200
loss: 0.9861222505569458,grad_norm: 0.9999990118293883, iteration: 438201
loss: 0.9986517429351807,grad_norm: 0.6125572850306875, iteration: 438202
loss: 1.0179405212402344,grad_norm: 0.8716204572165975, iteration: 438203
loss: 1.043508529663086,grad_norm: 0.9813428253933919, iteration: 438204
loss: 1.005681037902832,grad_norm: 0.8845593232251425, iteration: 438205
loss: 1.0592970848083496,grad_norm: 0.9999992328018915, iteration: 438206
loss: 1.0104244947433472,grad_norm: 0.8628365039737522, iteration: 438207
loss: 0.9919880628585815,grad_norm: 0.6957425884134482, iteration: 438208
loss: 1.0206729173660278,grad_norm: 0.9999993102986953, iteration: 438209
loss: 0.9882047772407532,grad_norm: 0.8843561370236308, iteration: 438210
loss: 0.9800068736076355,grad_norm: 0.9999991284310604, iteration: 438211
loss: 0.9951457381248474,grad_norm: 0.8619563783686713, iteration: 438212
loss: 0.9799943566322327,grad_norm: 0.6671635357929193, iteration: 438213
loss: 0.988998293876648,grad_norm: 0.6996963701931279, iteration: 438214
loss: 0.9504873752593994,grad_norm: 0.7389679730749544, iteration: 438215
loss: 1.0060843229293823,grad_norm: 0.9535759890312828, iteration: 438216
loss: 0.9634533524513245,grad_norm: 0.9999993785430491, iteration: 438217
loss: 0.9858264327049255,grad_norm: 0.8739773404106288, iteration: 438218
loss: 1.058686375617981,grad_norm: 0.8476132002235879, iteration: 438219
loss: 0.9886006116867065,grad_norm: 0.999999899431011, iteration: 438220
loss: 0.9625705480575562,grad_norm: 0.8404678372200739, iteration: 438221
loss: 1.0046324729919434,grad_norm: 0.7775207507389322, iteration: 438222
loss: 1.0141116380691528,grad_norm: 0.8177475113064601, iteration: 438223
loss: 1.0162341594696045,grad_norm: 0.9299788544474868, iteration: 438224
loss: 1.047378659248352,grad_norm: 0.7087944625742287, iteration: 438225
loss: 1.0093040466308594,grad_norm: 0.929525531470578, iteration: 438226
loss: 0.9861001372337341,grad_norm: 0.6475874259847827, iteration: 438227
loss: 1.0056604146957397,grad_norm: 0.7682493114038641, iteration: 438228
loss: 0.9786849021911621,grad_norm: 0.8438069200936695, iteration: 438229
loss: 1.0303603410720825,grad_norm: 0.9999997911313631, iteration: 438230
loss: 0.9573519229888916,grad_norm: 0.7097497610963274, iteration: 438231
loss: 1.0911329984664917,grad_norm: 0.9999995710571495, iteration: 438232
loss: 1.2092231512069702,grad_norm: 0.9999999807520957, iteration: 438233
loss: 1.055424451828003,grad_norm: 0.9999999555581083, iteration: 438234
loss: 1.1552757024765015,grad_norm: 0.9999999084335044, iteration: 438235
loss: 0.9850824475288391,grad_norm: 0.7001801141922438, iteration: 438236
loss: 1.0897046327590942,grad_norm: 0.999999498273745, iteration: 438237
loss: 0.9980531930923462,grad_norm: 0.8964412158522483, iteration: 438238
loss: 1.0263340473175049,grad_norm: 0.9999992230152496, iteration: 438239
loss: 0.9878624081611633,grad_norm: 0.732849097218969, iteration: 438240
loss: 0.9871416687965393,grad_norm: 0.7541374952182537, iteration: 438241
loss: 1.005284309387207,grad_norm: 0.8492588571241482, iteration: 438242
loss: 0.9863545298576355,grad_norm: 0.5688709065616943, iteration: 438243
loss: 1.0332839488983154,grad_norm: 0.8415220437708787, iteration: 438244
loss: 0.9932122230529785,grad_norm: 0.9999992636390002, iteration: 438245
loss: 1.0094155073165894,grad_norm: 0.8588468052251325, iteration: 438246
loss: 0.995199978351593,grad_norm: 0.885102796717193, iteration: 438247
loss: 1.0289257764816284,grad_norm: 0.9999997443735393, iteration: 438248
loss: 1.0043102502822876,grad_norm: 0.9459161243295187, iteration: 438249
loss: 0.9870778918266296,grad_norm: 0.744772480227031, iteration: 438250
loss: 0.9627266526222229,grad_norm: 0.7219266103514465, iteration: 438251
loss: 0.9838067889213562,grad_norm: 0.7466535252147716, iteration: 438252
loss: 1.0245310068130493,grad_norm: 0.8095352780270307, iteration: 438253
loss: 1.0031826496124268,grad_norm: 0.7950158977923424, iteration: 438254
loss: 1.0111711025238037,grad_norm: 0.6461009910579187, iteration: 438255
loss: 0.9640438556671143,grad_norm: 0.8083095375929692, iteration: 438256
loss: 1.0476531982421875,grad_norm: 0.9999999366387284, iteration: 438257
loss: 1.01682710647583,grad_norm: 0.7862096325663178, iteration: 438258
loss: 0.9843912124633789,grad_norm: 0.6837080391745264, iteration: 438259
loss: 0.9771114587783813,grad_norm: 0.7802964927748672, iteration: 438260
loss: 0.9975499510765076,grad_norm: 0.8334684024316164, iteration: 438261
loss: 1.0209017992019653,grad_norm: 0.6964945265792486, iteration: 438262
loss: 0.9926578402519226,grad_norm: 0.8059261060873112, iteration: 438263
loss: 0.9656487703323364,grad_norm: 0.7758380087616488, iteration: 438264
loss: 1.0416897535324097,grad_norm: 0.9920383445089802, iteration: 438265
loss: 1.0133622884750366,grad_norm: 0.851756228632492, iteration: 438266
loss: 1.0126551389694214,grad_norm: 0.8535874324234235, iteration: 438267
loss: 1.0043084621429443,grad_norm: 0.9056032869242306, iteration: 438268
loss: 0.994459867477417,grad_norm: 0.9999991048337019, iteration: 438269
loss: 1.0074024200439453,grad_norm: 0.7976286358014276, iteration: 438270
loss: 1.04004967212677,grad_norm: 0.8229629999494648, iteration: 438271
loss: 1.0114614963531494,grad_norm: 0.6240758131827396, iteration: 438272
loss: 1.026969075202942,grad_norm: 0.7918888529008975, iteration: 438273
loss: 0.9893717169761658,grad_norm: 0.821377863313391, iteration: 438274
loss: 0.9817972183227539,grad_norm: 0.9684532323918591, iteration: 438275
loss: 1.0163288116455078,grad_norm: 0.9287968292286477, iteration: 438276
loss: 0.9945402145385742,grad_norm: 0.7309589246198603, iteration: 438277
loss: 1.1199538707733154,grad_norm: 0.9999995705588689, iteration: 438278
loss: 1.0186359882354736,grad_norm: 0.7793487990945048, iteration: 438279
loss: 1.0103249549865723,grad_norm: 0.9999999672563327, iteration: 438280
loss: 1.0360383987426758,grad_norm: 0.9394206411130449, iteration: 438281
loss: 0.9884005784988403,grad_norm: 0.7536752362456216, iteration: 438282
loss: 1.0441874265670776,grad_norm: 0.9999991201139633, iteration: 438283
loss: 1.023099422454834,grad_norm: 0.7613827085410023, iteration: 438284
loss: 1.0118522644042969,grad_norm: 0.82077523471643, iteration: 438285
loss: 1.016088604927063,grad_norm: 0.8016013921847011, iteration: 438286
loss: 1.0111936330795288,grad_norm: 0.8020548818453741, iteration: 438287
loss: 0.9835074543952942,grad_norm: 0.7605111853443225, iteration: 438288
loss: 0.9996023178100586,grad_norm: 0.6223575194373009, iteration: 438289
loss: 1.0391777753829956,grad_norm: 0.8664576134449553, iteration: 438290
loss: 1.001747727394104,grad_norm: 0.819698365406478, iteration: 438291
loss: 0.9984381198883057,grad_norm: 0.7569295845475807, iteration: 438292
loss: 0.9889532923698425,grad_norm: 0.8230722264606114, iteration: 438293
loss: 0.9827283024787903,grad_norm: 0.7065161735552227, iteration: 438294
loss: 1.0135822296142578,grad_norm: 0.8795960058259323, iteration: 438295
loss: 1.04850172996521,grad_norm: 0.7173765785040765, iteration: 438296
loss: 1.0521107912063599,grad_norm: 0.999999878475616, iteration: 438297
loss: 1.0554763078689575,grad_norm: 0.7080509078646386, iteration: 438298
loss: 1.01707923412323,grad_norm: 0.7758910549168614, iteration: 438299
loss: 0.9773504734039307,grad_norm: 0.9999992013104314, iteration: 438300
loss: 1.0344841480255127,grad_norm: 0.6772513268645297, iteration: 438301
loss: 0.9766960740089417,grad_norm: 0.7594113529974269, iteration: 438302
loss: 0.9880364537239075,grad_norm: 0.7145524161495732, iteration: 438303
loss: 1.0041621923446655,grad_norm: 0.8294105801342183, iteration: 438304
loss: 1.0158665180206299,grad_norm: 0.8924391617247506, iteration: 438305
loss: 1.0004889965057373,grad_norm: 0.6760964551001823, iteration: 438306
loss: 1.0272860527038574,grad_norm: 0.7429776586990788, iteration: 438307
loss: 1.0052709579467773,grad_norm: 0.6628946124680526, iteration: 438308
loss: 1.0041859149932861,grad_norm: 0.8462600902573909, iteration: 438309
loss: 1.04957914352417,grad_norm: 0.8637653069096259, iteration: 438310
loss: 0.9981663227081299,grad_norm: 0.8743291206039276, iteration: 438311
loss: 1.0069482326507568,grad_norm: 0.8779124033021849, iteration: 438312
loss: 1.0258952379226685,grad_norm: 0.6760596941544744, iteration: 438313
loss: 0.9632935523986816,grad_norm: 0.9999990012905744, iteration: 438314
loss: 1.0019826889038086,grad_norm: 0.8741867176092062, iteration: 438315
loss: 1.0163965225219727,grad_norm: 0.6920500891381614, iteration: 438316
loss: 1.0320111513137817,grad_norm: 0.7519800400572585, iteration: 438317
loss: 1.0754821300506592,grad_norm: 0.9999999501787552, iteration: 438318
loss: 1.0604227781295776,grad_norm: 0.9999993394293113, iteration: 438319
loss: 0.98410964012146,grad_norm: 0.6718357003069356, iteration: 438320
loss: 1.0041478872299194,grad_norm: 0.9999993497253113, iteration: 438321
loss: 1.0056606531143188,grad_norm: 0.7374619577289209, iteration: 438322
loss: 0.9912374019622803,grad_norm: 0.9604422365296746, iteration: 438323
loss: 0.9991480708122253,grad_norm: 0.9243061670386327, iteration: 438324
loss: 1.014878749847412,grad_norm: 0.6720063176725836, iteration: 438325
loss: 0.9988391399383545,grad_norm: 0.7629622228775379, iteration: 438326
loss: 1.0219522714614868,grad_norm: 0.9999990786997853, iteration: 438327
loss: 1.0162211656570435,grad_norm: 0.9999991664743269, iteration: 438328
loss: 0.9995167255401611,grad_norm: 0.75282513497447, iteration: 438329
loss: 1.040784239768982,grad_norm: 0.8819018279525586, iteration: 438330
loss: 1.024525761604309,grad_norm: 0.8500895552060279, iteration: 438331
loss: 0.9668450951576233,grad_norm: 0.7863686617479014, iteration: 438332
loss: 0.9962783455848694,grad_norm: 0.8392188443053635, iteration: 438333
loss: 0.9839194416999817,grad_norm: 0.9474689156033832, iteration: 438334
loss: 1.02873957157135,grad_norm: 0.8410455291066303, iteration: 438335
loss: 0.9649961590766907,grad_norm: 0.836693071845378, iteration: 438336
loss: 1.0174429416656494,grad_norm: 0.7620612735243323, iteration: 438337
loss: 0.9449833035469055,grad_norm: 0.664861911180238, iteration: 438338
loss: 1.0163925886154175,grad_norm: 0.7182087761535914, iteration: 438339
loss: 0.9945145845413208,grad_norm: 0.9999989974616557, iteration: 438340
loss: 1.0245739221572876,grad_norm: 0.9926504385047272, iteration: 438341
loss: 1.0104671716690063,grad_norm: 0.7537897855268051, iteration: 438342
loss: 1.0097976922988892,grad_norm: 0.818863676587984, iteration: 438343
loss: 0.9577533006668091,grad_norm: 0.9423874831207507, iteration: 438344
loss: 0.9797313809394836,grad_norm: 0.8118772991744875, iteration: 438345
loss: 1.0165290832519531,grad_norm: 0.7247483813835799, iteration: 438346
loss: 1.001076102256775,grad_norm: 0.6018209377567578, iteration: 438347
loss: 1.0611014366149902,grad_norm: 0.9999991613587447, iteration: 438348
loss: 0.9853218793869019,grad_norm: 0.8501458117270004, iteration: 438349
loss: 0.983220100402832,grad_norm: 0.771578709606002, iteration: 438350
loss: 0.9868820905685425,grad_norm: 0.9124238215360916, iteration: 438351
loss: 1.0526461601257324,grad_norm: 0.9999998870868917, iteration: 438352
loss: 1.0117865800857544,grad_norm: 0.7926414218710341, iteration: 438353
loss: 0.9905316233634949,grad_norm: 0.7866995811919231, iteration: 438354
loss: 1.016121745109558,grad_norm: 0.7019286925328918, iteration: 438355
loss: 1.0388215780258179,grad_norm: 0.9999992869564539, iteration: 438356
loss: 1.0042822360992432,grad_norm: 0.8247436595502892, iteration: 438357
loss: 1.0159738063812256,grad_norm: 0.8022293765322397, iteration: 438358
loss: 1.1045254468917847,grad_norm: 0.9999997020076921, iteration: 438359
loss: 1.0066595077514648,grad_norm: 0.7168920183371147, iteration: 438360
loss: 0.9995321035385132,grad_norm: 0.9669705375660866, iteration: 438361
loss: 1.0139580965042114,grad_norm: 0.678199959835791, iteration: 438362
loss: 0.9929293990135193,grad_norm: 0.832908642775974, iteration: 438363
loss: 0.9729626178741455,grad_norm: 0.7646515052830701, iteration: 438364
loss: 0.9869794249534607,grad_norm: 0.9999994183742643, iteration: 438365
loss: 0.9862135052680969,grad_norm: 0.8315700812336061, iteration: 438366
loss: 0.9566038250923157,grad_norm: 0.9160218962385994, iteration: 438367
loss: 1.0172336101531982,grad_norm: 0.7130655735644585, iteration: 438368
loss: 0.9937700629234314,grad_norm: 0.7625590127947347, iteration: 438369
loss: 1.0131068229675293,grad_norm: 0.8927377096046581, iteration: 438370
loss: 1.0175580978393555,grad_norm: 0.7921616534853453, iteration: 438371
loss: 0.9974729418754578,grad_norm: 0.8459054484258725, iteration: 438372
loss: 0.9682912826538086,grad_norm: 0.9189454985135064, iteration: 438373
loss: 0.9546513557434082,grad_norm: 0.8372102077339918, iteration: 438374
loss: 0.9691681861877441,grad_norm: 0.7639476833860699, iteration: 438375
loss: 0.9624122381210327,grad_norm: 0.7941811047230786, iteration: 438376
loss: 0.9848037362098694,grad_norm: 0.7850802047295735, iteration: 438377
loss: 0.9749709367752075,grad_norm: 0.7422628674543205, iteration: 438378
loss: 0.9879412651062012,grad_norm: 0.7536281457813538, iteration: 438379
loss: 1.0276247262954712,grad_norm: 0.8090052523379075, iteration: 438380
loss: 1.034474492073059,grad_norm: 0.9348060173291123, iteration: 438381
loss: 0.9979081749916077,grad_norm: 0.9483469810242525, iteration: 438382
loss: 1.0038740634918213,grad_norm: 0.71365202724163, iteration: 438383
loss: 0.979310154914856,grad_norm: 0.9026604957395387, iteration: 438384
loss: 0.9824907779693604,grad_norm: 0.7039590071930961, iteration: 438385
loss: 1.080597162246704,grad_norm: 0.9999993079713639, iteration: 438386
loss: 1.023602843284607,grad_norm: 0.8638193230772857, iteration: 438387
loss: 1.0283252000808716,grad_norm: 0.7160553361444789, iteration: 438388
loss: 0.986811101436615,grad_norm: 0.8407833689417946, iteration: 438389
loss: 1.019443392753601,grad_norm: 0.6503642047264162, iteration: 438390
loss: 1.0434962511062622,grad_norm: 0.7320644899251763, iteration: 438391
loss: 1.0438270568847656,grad_norm: 0.9999990907086708, iteration: 438392
loss: 1.0198218822479248,grad_norm: 0.9129675620320459, iteration: 438393
loss: 1.0021533966064453,grad_norm: 0.7779658920363905, iteration: 438394
loss: 0.9885936379432678,grad_norm: 0.8922148248974151, iteration: 438395
loss: 0.9942335486412048,grad_norm: 0.8059907385518935, iteration: 438396
loss: 1.0177465677261353,grad_norm: 0.8099425810673744, iteration: 438397
loss: 0.9904786944389343,grad_norm: 0.7774350651560678, iteration: 438398
loss: 0.990906834602356,grad_norm: 0.9999991888717439, iteration: 438399
loss: 1.017004370689392,grad_norm: 0.9999997603949, iteration: 438400
loss: 0.9858095645904541,grad_norm: 0.821233589092817, iteration: 438401
loss: 1.004049301147461,grad_norm: 0.9999994054928014, iteration: 438402
loss: 0.9490547180175781,grad_norm: 0.7610053414542636, iteration: 438403
loss: 1.000020146369934,grad_norm: 0.764663673470972, iteration: 438404
loss: 0.977199375629425,grad_norm: 0.999999108755402, iteration: 438405
loss: 1.0041310787200928,grad_norm: 0.7219407667495849, iteration: 438406
loss: 1.0352212190628052,grad_norm: 0.8286496539695054, iteration: 438407
loss: 0.9785729646682739,grad_norm: 0.8612485622887264, iteration: 438408
loss: 1.0133835077285767,grad_norm: 0.784674474715371, iteration: 438409
loss: 1.0051020383834839,grad_norm: 0.6557177189582813, iteration: 438410
loss: 1.0431238412857056,grad_norm: 0.7439522377339107, iteration: 438411
loss: 1.0124295949935913,grad_norm: 0.8576870156059075, iteration: 438412
loss: 0.9963791370391846,grad_norm: 0.6831998744870413, iteration: 438413
loss: 0.986461877822876,grad_norm: 0.7648504811492949, iteration: 438414
loss: 0.979427695274353,grad_norm: 0.8929365442895318, iteration: 438415
loss: 1.0296036005020142,grad_norm: 0.8312052341352645, iteration: 438416
loss: 1.0579513311386108,grad_norm: 0.9999998999060161, iteration: 438417
loss: 0.9864053726196289,grad_norm: 0.9999993889709132, iteration: 438418
loss: 0.9745147824287415,grad_norm: 0.7692415062407842, iteration: 438419
loss: 0.9875048995018005,grad_norm: 0.7605368510406685, iteration: 438420
loss: 1.0175926685333252,grad_norm: 0.7286922944077114, iteration: 438421
loss: 1.0363550186157227,grad_norm: 0.7980123106664473, iteration: 438422
loss: 0.9719024896621704,grad_norm: 0.730315322546519, iteration: 438423
loss: 1.0172520875930786,grad_norm: 0.7499781809471546, iteration: 438424
loss: 1.0019149780273438,grad_norm: 0.7433803354463542, iteration: 438425
loss: 1.0199215412139893,grad_norm: 0.6488585178603135, iteration: 438426
loss: 1.041609287261963,grad_norm: 0.9483292249532654, iteration: 438427
loss: 1.0287953615188599,grad_norm: 0.9999999221540982, iteration: 438428
loss: 0.9660616517066956,grad_norm: 0.8217390288237403, iteration: 438429
loss: 1.011252999305725,grad_norm: 0.9999999629435858, iteration: 438430
loss: 0.9880542159080505,grad_norm: 0.8658344103260807, iteration: 438431
loss: 1.0699489116668701,grad_norm: 0.9999995878417414, iteration: 438432
loss: 0.993588924407959,grad_norm: 0.8187183276478817, iteration: 438433
loss: 1.022546648979187,grad_norm: 0.9999996427785117, iteration: 438434
loss: 1.0349088907241821,grad_norm: 0.828982643896605, iteration: 438435
loss: 1.1136751174926758,grad_norm: 0.9999998685509173, iteration: 438436
loss: 0.9974541664123535,grad_norm: 0.7278288605923595, iteration: 438437
loss: 0.9740813970565796,grad_norm: 0.7725928930582272, iteration: 438438
loss: 1.023167371749878,grad_norm: 0.8845051754074881, iteration: 438439
loss: 1.0041080713272095,grad_norm: 0.9800245887333628, iteration: 438440
loss: 1.026319146156311,grad_norm: 0.7164504773065349, iteration: 438441
loss: 1.0086582899093628,grad_norm: 0.9047277981259786, iteration: 438442
loss: 1.0085513591766357,grad_norm: 0.5807047305539017, iteration: 438443
loss: 0.9804549217224121,grad_norm: 0.7174406124548781, iteration: 438444
loss: 0.9793441891670227,grad_norm: 0.7008485409563884, iteration: 438445
loss: 1.0132206678390503,grad_norm: 0.8078480506828517, iteration: 438446
loss: 0.9953886866569519,grad_norm: 0.6679644573998047, iteration: 438447
loss: 0.9689430594444275,grad_norm: 0.9999995018347003, iteration: 438448
loss: 0.9745781421661377,grad_norm: 0.8033415921566912, iteration: 438449
loss: 0.9955730438232422,grad_norm: 0.9329039217092437, iteration: 438450
loss: 1.045179009437561,grad_norm: 0.7374773652697336, iteration: 438451
loss: 1.0228095054626465,grad_norm: 0.926246762468297, iteration: 438452
loss: 1.0815603733062744,grad_norm: 0.999999753573402, iteration: 438453
loss: 1.0422451496124268,grad_norm: 0.9633188309806354, iteration: 438454
loss: 1.0216858386993408,grad_norm: 0.9999999986310547, iteration: 438455
loss: 0.9942012429237366,grad_norm: 0.7329299574366771, iteration: 438456
loss: 1.0092694759368896,grad_norm: 0.8096545172069406, iteration: 438457
loss: 0.9818074107170105,grad_norm: 0.6773863104898373, iteration: 438458
loss: 1.075963020324707,grad_norm: 0.8465197899030537, iteration: 438459
loss: 1.0175548791885376,grad_norm: 0.7443889176648444, iteration: 438460
loss: 1.0020005702972412,grad_norm: 0.8694550857035916, iteration: 438461
loss: 1.112396001815796,grad_norm: 0.9999991430177275, iteration: 438462
loss: 0.9866748452186584,grad_norm: 0.749155075324601, iteration: 438463
loss: 1.0131571292877197,grad_norm: 0.9999997937308132, iteration: 438464
loss: 0.9958206415176392,grad_norm: 0.9999991662119361, iteration: 438465
loss: 1.0062613487243652,grad_norm: 0.6488258140156081, iteration: 438466
loss: 0.9911237359046936,grad_norm: 0.8149980066954072, iteration: 438467
loss: 1.0484238862991333,grad_norm: 0.8939892227860703, iteration: 438468
loss: 0.9964843988418579,grad_norm: 0.8016149587300996, iteration: 438469
loss: 1.0137848854064941,grad_norm: 0.713502006247073, iteration: 438470
loss: 1.0279442071914673,grad_norm: 0.7541010169784842, iteration: 438471
loss: 0.9660521745681763,grad_norm: 0.9633433478302802, iteration: 438472
loss: 0.9552592635154724,grad_norm: 0.9079981876280229, iteration: 438473
loss: 1.0287728309631348,grad_norm: 0.7780319457563873, iteration: 438474
loss: 1.0138286352157593,grad_norm: 0.9333458735671368, iteration: 438475
loss: 1.018511414527893,grad_norm: 0.9100944194844209, iteration: 438476
loss: 1.0006730556488037,grad_norm: 0.8937937110626436, iteration: 438477
loss: 1.0673242807388306,grad_norm: 0.9999996827254317, iteration: 438478
loss: 1.113508701324463,grad_norm: 0.9999997864381066, iteration: 438479
loss: 1.0221812725067139,grad_norm: 0.7427918711471426, iteration: 438480
loss: 1.0066126585006714,grad_norm: 0.8186897446207371, iteration: 438481
loss: 1.0183560848236084,grad_norm: 0.9999998288275178, iteration: 438482
loss: 0.9765802025794983,grad_norm: 0.7782729325340786, iteration: 438483
loss: 0.9661573767662048,grad_norm: 0.7896833130996395, iteration: 438484
loss: 0.9993663430213928,grad_norm: 0.9445458042221484, iteration: 438485
loss: 1.0487005710601807,grad_norm: 0.9515553345557205, iteration: 438486
loss: 0.9763544797897339,grad_norm: 0.8160063014071858, iteration: 438487
loss: 0.9940071702003479,grad_norm: 0.6707838145920405, iteration: 438488
loss: 1.0097755193710327,grad_norm: 0.9999993611463072, iteration: 438489
loss: 1.0348740816116333,grad_norm: 0.8814786627843576, iteration: 438490
loss: 0.9959227442741394,grad_norm: 0.6734725567808413, iteration: 438491
loss: 1.007754921913147,grad_norm: 0.999999908149667, iteration: 438492
loss: 0.9771386981010437,grad_norm: 0.6759761403056374, iteration: 438493
loss: 1.0094295740127563,grad_norm: 0.7638230424232348, iteration: 438494
loss: 1.0523841381072998,grad_norm: 0.9999992213051228, iteration: 438495
loss: 0.9943292140960693,grad_norm: 0.8255315352138664, iteration: 438496
loss: 0.9512622356414795,grad_norm: 0.9029548347276778, iteration: 438497
loss: 1.0324949026107788,grad_norm: 0.6941068507108431, iteration: 438498
loss: 1.0270105600357056,grad_norm: 0.9999998895928105, iteration: 438499
loss: 0.9650861620903015,grad_norm: 0.9389399429205465, iteration: 438500
loss: 1.024847149848938,grad_norm: 0.8484205869170415, iteration: 438501
loss: 1.0186858177185059,grad_norm: 0.8348505412580196, iteration: 438502
loss: 0.9951924085617065,grad_norm: 0.6332898290910746, iteration: 438503
loss: 1.0158228874206543,grad_norm: 0.9999989419805864, iteration: 438504
loss: 1.0772119760513306,grad_norm: 0.9999995060731484, iteration: 438505
loss: 0.9673102498054504,grad_norm: 0.7876599395221847, iteration: 438506
loss: 0.9919427633285522,grad_norm: 0.7576813180063573, iteration: 438507
loss: 1.0021543502807617,grad_norm: 0.9999995730952008, iteration: 438508
loss: 0.9992520213127136,grad_norm: 0.9999993983078951, iteration: 438509
loss: 1.0342504978179932,grad_norm: 0.8559582086886904, iteration: 438510
loss: 0.9745854735374451,grad_norm: 0.7331653429401712, iteration: 438511
loss: 1.0063296556472778,grad_norm: 0.9999999023751006, iteration: 438512
loss: 1.0168862342834473,grad_norm: 0.8454722997118056, iteration: 438513
loss: 1.0234383344650269,grad_norm: 0.6325542563432969, iteration: 438514
loss: 1.0213267803192139,grad_norm: 0.9770933921563555, iteration: 438515
loss: 1.001206636428833,grad_norm: 0.6803853588086278, iteration: 438516
loss: 0.988605797290802,grad_norm: 0.8418680375276194, iteration: 438517
loss: 0.9830166101455688,grad_norm: 0.7283676676886811, iteration: 438518
loss: 1.0041686296463013,grad_norm: 0.6290604752688449, iteration: 438519
loss: 1.0126824378967285,grad_norm: 0.9843523912702477, iteration: 438520
loss: 1.0754481554031372,grad_norm: 0.9462450879160985, iteration: 438521
loss: 0.998671293258667,grad_norm: 0.9999993605649459, iteration: 438522
loss: 0.9967982172966003,grad_norm: 0.691827047779956, iteration: 438523
loss: 1.029834270477295,grad_norm: 0.8227755189303135, iteration: 438524
loss: 1.016603946685791,grad_norm: 0.7341236063640357, iteration: 438525
loss: 1.005033016204834,grad_norm: 0.7026215369335872, iteration: 438526
loss: 0.9924712777137756,grad_norm: 0.7754980651444634, iteration: 438527
loss: 0.9898116588592529,grad_norm: 0.8229020366195092, iteration: 438528
loss: 1.0105973482131958,grad_norm: 0.6881646172253424, iteration: 438529
loss: 0.9918114542961121,grad_norm: 0.7561524210713383, iteration: 438530
loss: 1.0175191164016724,grad_norm: 0.9999994416701101, iteration: 438531
loss: 1.0126895904541016,grad_norm: 0.6986430166118006, iteration: 438532
loss: 1.0447367429733276,grad_norm: 0.7889689099363175, iteration: 438533
loss: 1.3741583824157715,grad_norm: 0.9999999126382183, iteration: 438534
loss: 1.0024398565292358,grad_norm: 0.7824823439968923, iteration: 438535
loss: 1.0220311880111694,grad_norm: 0.6960950257019614, iteration: 438536
loss: 1.1110455989837646,grad_norm: 0.999999806543992, iteration: 438537
loss: 0.9913884997367859,grad_norm: 0.7550652105558157, iteration: 438538
loss: 0.9825765490531921,grad_norm: 0.7727427567777574, iteration: 438539
loss: 0.9896243214607239,grad_norm: 0.7274237882895008, iteration: 438540
loss: 0.9798306822776794,grad_norm: 0.6426124376499762, iteration: 438541
loss: 0.9896472692489624,grad_norm: 0.8790071892070209, iteration: 438542
loss: 0.9847294092178345,grad_norm: 0.7593225906827196, iteration: 438543
loss: 1.0003914833068848,grad_norm: 0.7843386592923248, iteration: 438544
loss: 1.0684751272201538,grad_norm: 0.894525359805431, iteration: 438545
loss: 1.026995301246643,grad_norm: 0.7994760806412133, iteration: 438546
loss: 1.0075570344924927,grad_norm: 0.8671852059247741, iteration: 438547
loss: 0.9979590773582458,grad_norm: 0.809025350665219, iteration: 438548
loss: 0.9956815838813782,grad_norm: 0.8226779175934027, iteration: 438549
loss: 1.0040494203567505,grad_norm: 0.831272827107092, iteration: 438550
loss: 1.0205793380737305,grad_norm: 0.8027332684865567, iteration: 438551
loss: 1.008373498916626,grad_norm: 0.8442094281915817, iteration: 438552
loss: 1.0032507181167603,grad_norm: 0.8099541173030634, iteration: 438553
loss: 1.0237170457839966,grad_norm: 0.8177439312718247, iteration: 438554
loss: 0.9944074749946594,grad_norm: 0.8576359303510066, iteration: 438555
loss: 0.9886963367462158,grad_norm: 0.7373806340006356, iteration: 438556
loss: 0.9934784770011902,grad_norm: 0.7921699474068088, iteration: 438557
loss: 1.0403144359588623,grad_norm: 0.859103324024647, iteration: 438558
loss: 1.0633721351623535,grad_norm: 0.9999992633559405, iteration: 438559
loss: 0.9745001792907715,grad_norm: 0.7657089297066338, iteration: 438560
loss: 1.0051133632659912,grad_norm: 0.8443272500249807, iteration: 438561
loss: 0.9949082732200623,grad_norm: 0.7960454794926005, iteration: 438562
loss: 1.0196821689605713,grad_norm: 0.8347148306847811, iteration: 438563
loss: 1.0055955648422241,grad_norm: 0.7097987262579877, iteration: 438564
loss: 1.0036503076553345,grad_norm: 0.7092998426390941, iteration: 438565
loss: 0.9907426834106445,grad_norm: 0.7136722777420417, iteration: 438566
loss: 1.008143424987793,grad_norm: 0.921524799498141, iteration: 438567
loss: 1.034835696220398,grad_norm: 0.9328718213241778, iteration: 438568
loss: 1.0005223751068115,grad_norm: 0.7581226571206484, iteration: 438569
loss: 1.0274444818496704,grad_norm: 0.8672545214479596, iteration: 438570
loss: 1.0001062154769897,grad_norm: 0.6963199183210992, iteration: 438571
loss: 0.9824548363685608,grad_norm: 0.7848757317615548, iteration: 438572
loss: 1.0375089645385742,grad_norm: 0.9999991141408898, iteration: 438573
loss: 0.9543441534042358,grad_norm: 0.7487091752562679, iteration: 438574
loss: 0.9600534439086914,grad_norm: 0.7962345548029267, iteration: 438575
loss: 1.0631251335144043,grad_norm: 0.999999677004917, iteration: 438576
loss: 1.0119500160217285,grad_norm: 0.8360167691692486, iteration: 438577
loss: 1.001959204673767,grad_norm: 0.8967420680437105, iteration: 438578
loss: 0.9952491521835327,grad_norm: 0.6838015240779604, iteration: 438579
loss: 0.9883594512939453,grad_norm: 0.9232271946496781, iteration: 438580
loss: 1.1196191310882568,grad_norm: 0.7806472404728247, iteration: 438581
loss: 0.9995455741882324,grad_norm: 0.8867445515762973, iteration: 438582
loss: 1.0771019458770752,grad_norm: 0.9999994540018556, iteration: 438583
loss: 0.9715835452079773,grad_norm: 0.780619280908866, iteration: 438584
loss: 1.016006588935852,grad_norm: 0.7378111208411802, iteration: 438585
loss: 0.9753767848014832,grad_norm: 0.8301261130226246, iteration: 438586
loss: 1.0010098218917847,grad_norm: 0.7345064362467525, iteration: 438587
loss: 0.9536028504371643,grad_norm: 0.774651860842684, iteration: 438588
loss: 0.9915888905525208,grad_norm: 0.8193433843864436, iteration: 438589
loss: 1.0146681070327759,grad_norm: 0.6630388215420014, iteration: 438590
loss: 0.9847658276557922,grad_norm: 0.6455365944677772, iteration: 438591
loss: 0.9855355620384216,grad_norm: 0.8191689403877549, iteration: 438592
loss: 0.9769670367240906,grad_norm: 0.7968994660037962, iteration: 438593
loss: 0.9519843459129333,grad_norm: 0.79757386277775, iteration: 438594
loss: 0.9983178377151489,grad_norm: 0.792306184225281, iteration: 438595
loss: 0.9799866080284119,grad_norm: 0.6935058032675657, iteration: 438596
loss: 0.9846960306167603,grad_norm: 0.8109968395534715, iteration: 438597
loss: 1.0132611989974976,grad_norm: 0.7299525659811169, iteration: 438598
loss: 0.9687488079071045,grad_norm: 0.7836313847766889, iteration: 438599
loss: 0.9957216382026672,grad_norm: 0.8426113466110438, iteration: 438600
loss: 1.0006237030029297,grad_norm: 0.9570381400647168, iteration: 438601
loss: 0.9431922435760498,grad_norm: 0.8627319365322179, iteration: 438602
loss: 0.9871979355812073,grad_norm: 0.730638530241537, iteration: 438603
loss: 1.0108680725097656,grad_norm: 0.7531440985841752, iteration: 438604
loss: 1.1663905382156372,grad_norm: 0.9999992307699682, iteration: 438605
loss: 1.083771824836731,grad_norm: 0.9999991054862031, iteration: 438606
loss: 0.9922143816947937,grad_norm: 0.7826935837080491, iteration: 438607
loss: 1.0016858577728271,grad_norm: 0.7003744031756607, iteration: 438608
loss: 0.9785559177398682,grad_norm: 0.7298211268794376, iteration: 438609
loss: 1.0065622329711914,grad_norm: 0.8580327380163151, iteration: 438610
loss: 1.0188007354736328,grad_norm: 0.8874918789789438, iteration: 438611
loss: 0.9934554696083069,grad_norm: 0.7631131268465883, iteration: 438612
loss: 1.0100473165512085,grad_norm: 0.7888316198938479, iteration: 438613
loss: 1.018453598022461,grad_norm: 0.7850606604234129, iteration: 438614
loss: 1.02450430393219,grad_norm: 0.7911079263667095, iteration: 438615
loss: 1.043563723564148,grad_norm: 0.6857189848661209, iteration: 438616
loss: 0.9968942403793335,grad_norm: 0.7667622488049769, iteration: 438617
loss: 1.0456970930099487,grad_norm: 0.9999994786830612, iteration: 438618
loss: 1.022028923034668,grad_norm: 0.829354400668141, iteration: 438619
loss: 1.0192517042160034,grad_norm: 0.9284169774785683, iteration: 438620
loss: 0.9615648984909058,grad_norm: 0.8688008645995131, iteration: 438621
loss: 1.0173739194869995,grad_norm: 0.7374782503271049, iteration: 438622
loss: 1.0158672332763672,grad_norm: 0.8647818121450038, iteration: 438623
loss: 1.0035173892974854,grad_norm: 0.7892007035644444, iteration: 438624
loss: 1.0598814487457275,grad_norm: 0.812397760683468, iteration: 438625
loss: 1.0416526794433594,grad_norm: 0.76664701381602, iteration: 438626
loss: 0.9745290875434875,grad_norm: 0.79499264086626, iteration: 438627
loss: 1.008455514907837,grad_norm: 0.6774934032329376, iteration: 438628
loss: 0.9871732592582703,grad_norm: 0.6629148213530783, iteration: 438629
loss: 1.0086241960525513,grad_norm: 0.7156940922393721, iteration: 438630
loss: 1.2040044069290161,grad_norm: 0.9999998045131965, iteration: 438631
loss: 1.0088907480239868,grad_norm: 0.9999990385004832, iteration: 438632
loss: 0.9943457245826721,grad_norm: 0.7171728645965, iteration: 438633
loss: 1.0066897869110107,grad_norm: 0.720953014477701, iteration: 438634
loss: 1.039305567741394,grad_norm: 0.9999998101393988, iteration: 438635
loss: 1.0167356729507446,grad_norm: 0.7587708145517308, iteration: 438636
loss: 1.2069441080093384,grad_norm: 0.9999999528262726, iteration: 438637
loss: 0.9758626222610474,grad_norm: 0.773273091175695, iteration: 438638
loss: 1.087327241897583,grad_norm: 0.9183738701761792, iteration: 438639
loss: 1.024412751197815,grad_norm: 0.9999997050397469, iteration: 438640
loss: 1.005725383758545,grad_norm: 0.8793534398151948, iteration: 438641
loss: 1.0013049840927124,grad_norm: 0.7312868076102127, iteration: 438642
loss: 0.9861911535263062,grad_norm: 0.999999802991582, iteration: 438643
loss: 0.9440553188323975,grad_norm: 0.8368093624767604, iteration: 438644
loss: 1.0066925287246704,grad_norm: 0.9999995018474607, iteration: 438645
loss: 0.9887177348136902,grad_norm: 0.7175477962614918, iteration: 438646
loss: 1.021457552909851,grad_norm: 0.8492848088100605, iteration: 438647
loss: 1.2620785236358643,grad_norm: 0.999999646221146, iteration: 438648
loss: 1.0335320234298706,grad_norm: 0.9999999919854176, iteration: 438649
loss: 1.0972896814346313,grad_norm: 1.0000000777174498, iteration: 438650
loss: 1.0027642250061035,grad_norm: 0.9002531527675636, iteration: 438651
loss: 1.0033434629440308,grad_norm: 0.7529912566605934, iteration: 438652
loss: 1.0052893161773682,grad_norm: 0.6965590016751401, iteration: 438653
loss: 1.0220869779586792,grad_norm: 0.9999991794006194, iteration: 438654
loss: 1.0366674661636353,grad_norm: 0.823457050904523, iteration: 438655
loss: 1.0372059345245361,grad_norm: 0.9416056856697284, iteration: 438656
loss: 1.014661192893982,grad_norm: 0.6955432655671773, iteration: 438657
loss: 1.0094566345214844,grad_norm: 0.7431001527813182, iteration: 438658
loss: 1.036488652229309,grad_norm: 0.8847033691479356, iteration: 438659
loss: 0.9872665405273438,grad_norm: 0.7727963778680756, iteration: 438660
loss: 1.147298812866211,grad_norm: 0.9999998545985577, iteration: 438661
loss: 1.0283164978027344,grad_norm: 0.9055835740442352, iteration: 438662
loss: 1.0313111543655396,grad_norm: 0.8296566783666757, iteration: 438663
loss: 1.02825129032135,grad_norm: 0.8583278441977444, iteration: 438664
loss: 1.0087792873382568,grad_norm: 0.6995214260361625, iteration: 438665
loss: 1.0589408874511719,grad_norm: 0.7018762216730204, iteration: 438666
loss: 0.9911452531814575,grad_norm: 0.8134374434607692, iteration: 438667
loss: 1.0030947923660278,grad_norm: 0.7868352443673404, iteration: 438668
loss: 1.0317763090133667,grad_norm: 0.9999990985419439, iteration: 438669
loss: 1.0782945156097412,grad_norm: 0.999999559355663, iteration: 438670
loss: 0.9958070516586304,grad_norm: 0.6745547300138579, iteration: 438671
loss: 1.0635395050048828,grad_norm: 0.9999989846356556, iteration: 438672
loss: 1.0573171377182007,grad_norm: 1.0000000181976267, iteration: 438673
loss: 1.0398584604263306,grad_norm: 0.9218089254918608, iteration: 438674
loss: 0.9929152727127075,grad_norm: 0.8938852529005883, iteration: 438675
loss: 0.9915475249290466,grad_norm: 0.7209409805915753, iteration: 438676
loss: 0.9839869141578674,grad_norm: 0.8160352199044701, iteration: 438677
loss: 1.0520530939102173,grad_norm: 0.999999222474632, iteration: 438678
loss: 1.0139268636703491,grad_norm: 0.6919027199134101, iteration: 438679
loss: 1.0811306238174438,grad_norm: 0.7450039274553353, iteration: 438680
loss: 1.0164687633514404,grad_norm: 0.9152835993500101, iteration: 438681
loss: 1.027843713760376,grad_norm: 0.7785053556365721, iteration: 438682
loss: 0.9930403828620911,grad_norm: 0.8757776841956272, iteration: 438683
loss: 1.0645272731781006,grad_norm: 0.7778393383627343, iteration: 438684
loss: 0.977660059928894,grad_norm: 0.6240366511095833, iteration: 438685
loss: 1.0238947868347168,grad_norm: 0.827249056456683, iteration: 438686
loss: 0.988269567489624,grad_norm: 0.8558576266280631, iteration: 438687
loss: 1.0000951290130615,grad_norm: 0.953942719505026, iteration: 438688
loss: 1.0470057725906372,grad_norm: 0.9999994379091035, iteration: 438689
loss: 0.9744976162910461,grad_norm: 0.76937363022234, iteration: 438690
loss: 1.0511066913604736,grad_norm: 0.7294604259479527, iteration: 438691
loss: 1.3372141122817993,grad_norm: 0.9999995816029362, iteration: 438692
loss: 1.0507545471191406,grad_norm: 0.9999994455293836, iteration: 438693
loss: 1.018964409828186,grad_norm: 0.999999069467323, iteration: 438694
loss: 1.0971485376358032,grad_norm: 0.9690537066665882, iteration: 438695
loss: 1.0066016912460327,grad_norm: 0.9276739060152244, iteration: 438696
loss: 1.0427314043045044,grad_norm: 0.7174702268173335, iteration: 438697
loss: 0.9865224957466125,grad_norm: 0.8245236927451306, iteration: 438698
loss: 1.0007447004318237,grad_norm: 0.8793668710894275, iteration: 438699
loss: 0.9768422245979309,grad_norm: 0.999999597391743, iteration: 438700
loss: 0.9995889663696289,grad_norm: 0.999999109271776, iteration: 438701
loss: 1.0603996515274048,grad_norm: 0.9999998240662122, iteration: 438702
loss: 0.9842879176139832,grad_norm: 0.7715805283011928, iteration: 438703
loss: 1.0164152383804321,grad_norm: 0.9999995831925607, iteration: 438704
loss: 0.9849735498428345,grad_norm: 0.7083470196902707, iteration: 438705
loss: 1.2057634592056274,grad_norm: 0.9999995447141832, iteration: 438706
loss: 1.0368232727050781,grad_norm: 0.6774004240870443, iteration: 438707
loss: 0.9932352900505066,grad_norm: 0.738701379613203, iteration: 438708
loss: 0.9925060868263245,grad_norm: 0.9999993830481506, iteration: 438709
loss: 0.9909369349479675,grad_norm: 0.9284882295962565, iteration: 438710
loss: 1.0176341533660889,grad_norm: 0.9999996201030672, iteration: 438711
loss: 0.9630258083343506,grad_norm: 0.7687207322353022, iteration: 438712
loss: 1.004514217376709,grad_norm: 0.6962084480638906, iteration: 438713
loss: 0.9825147986412048,grad_norm: 0.7245945932905883, iteration: 438714
loss: 1.002323865890503,grad_norm: 0.8719588105724627, iteration: 438715
loss: 0.9649955630302429,grad_norm: 0.8799470654181824, iteration: 438716
loss: 1.0537675619125366,grad_norm: 0.9999999371553744, iteration: 438717
loss: 0.9948875308036804,grad_norm: 0.8124837012217445, iteration: 438718
loss: 1.0047857761383057,grad_norm: 0.720694019735773, iteration: 438719
loss: 1.000133991241455,grad_norm: 0.893244218665535, iteration: 438720
loss: 1.0349498987197876,grad_norm: 0.6704977246158288, iteration: 438721
loss: 0.9757770895957947,grad_norm: 0.7691941637188676, iteration: 438722
loss: 1.0044180154800415,grad_norm: 0.7176840769608542, iteration: 438723
loss: 1.000654935836792,grad_norm: 0.8805160024719565, iteration: 438724
loss: 1.0131794214248657,grad_norm: 0.7918565765067731, iteration: 438725
loss: 0.9945885539054871,grad_norm: 0.7862505137243401, iteration: 438726
loss: 1.046582818031311,grad_norm: 0.8312155450460824, iteration: 438727
loss: 1.0205024480819702,grad_norm: 0.8657076740945095, iteration: 438728
loss: 0.9981476664543152,grad_norm: 0.8699730402680008, iteration: 438729
loss: 1.023856520652771,grad_norm: 0.9999993723983442, iteration: 438730
loss: 0.9950875639915466,grad_norm: 0.9999998261558625, iteration: 438731
loss: 1.0224690437316895,grad_norm: 0.7157391144336949, iteration: 438732
loss: 1.0251679420471191,grad_norm: 0.9067083055436554, iteration: 438733
loss: 0.9909719824790955,grad_norm: 0.8550444464993403, iteration: 438734
loss: 1.004866361618042,grad_norm: 0.6910311718245101, iteration: 438735
loss: 1.035886526107788,grad_norm: 0.8301853852729402, iteration: 438736
loss: 0.9878093004226685,grad_norm: 0.8381926206040962, iteration: 438737
loss: 0.9927408695220947,grad_norm: 0.7466027796408412, iteration: 438738
loss: 1.0713016986846924,grad_norm: 0.999999380208203, iteration: 438739
loss: 1.0132213830947876,grad_norm: 0.7765063654840517, iteration: 438740
loss: 0.9926808476448059,grad_norm: 0.9999995106464615, iteration: 438741
loss: 1.006940484046936,grad_norm: 0.96207345833565, iteration: 438742
loss: 0.9804022908210754,grad_norm: 0.7282531670537785, iteration: 438743
loss: 1.0058518648147583,grad_norm: 0.9999991892713774, iteration: 438744
loss: 1.0060217380523682,grad_norm: 0.7651703160455671, iteration: 438745
loss: 1.0231060981750488,grad_norm: 0.9999994896843554, iteration: 438746
loss: 1.0218323469161987,grad_norm: 0.9134458652525813, iteration: 438747
loss: 0.9999969005584717,grad_norm: 0.7687555250965921, iteration: 438748
loss: 0.9971321821212769,grad_norm: 0.761458422938191, iteration: 438749
loss: 0.9661096334457397,grad_norm: 0.7596829292293991, iteration: 438750
loss: 1.0945570468902588,grad_norm: 0.9999993063773448, iteration: 438751
loss: 0.9670185446739197,grad_norm: 0.7502202975462741, iteration: 438752
loss: 1.0631452798843384,grad_norm: 0.9999993724891632, iteration: 438753
loss: 1.0002131462097168,grad_norm: 0.9999991396829491, iteration: 438754
loss: 0.9832063317298889,grad_norm: 0.8147234526829642, iteration: 438755
loss: 1.0163713693618774,grad_norm: 0.6991379643418902, iteration: 438756
loss: 1.01457679271698,grad_norm: 0.8427154957968085, iteration: 438757
loss: 1.0089046955108643,grad_norm: 0.9629057297767896, iteration: 438758
loss: 1.0046337842941284,grad_norm: 0.999999037939845, iteration: 438759
loss: 0.9784679412841797,grad_norm: 0.851979409219898, iteration: 438760
loss: 1.010418176651001,grad_norm: 0.8445871220567336, iteration: 438761
loss: 0.9914618134498596,grad_norm: 0.7145484997672246, iteration: 438762
loss: 1.0379053354263306,grad_norm: 0.7429224469523629, iteration: 438763
loss: 1.029623031616211,grad_norm: 0.9999991053531971, iteration: 438764
loss: 0.9893608689308167,grad_norm: 0.7254323096262765, iteration: 438765
loss: 0.9618673920631409,grad_norm: 0.9081466488273052, iteration: 438766
loss: 1.0021770000457764,grad_norm: 0.8103034694266591, iteration: 438767
loss: 0.9949345588684082,grad_norm: 0.7161407235127881, iteration: 438768
loss: 0.9798887372016907,grad_norm: 0.7531995913797687, iteration: 438769
loss: 1.0379899740219116,grad_norm: 0.8858646527584093, iteration: 438770
loss: 0.9738253951072693,grad_norm: 0.7571243925925762, iteration: 438771
loss: 0.9715926051139832,grad_norm: 0.7029501530451434, iteration: 438772
loss: 1.0350117683410645,grad_norm: 0.8823249551797441, iteration: 438773
loss: 1.010280966758728,grad_norm: 0.9999999658789797, iteration: 438774
loss: 1.002137541770935,grad_norm: 0.7260545389839408, iteration: 438775
loss: 1.0195742845535278,grad_norm: 0.6946296840984902, iteration: 438776
loss: 0.9974287748336792,grad_norm: 0.7242121047743135, iteration: 438777
loss: 1.0162166357040405,grad_norm: 0.7531059836447355, iteration: 438778
loss: 0.9868950843811035,grad_norm: 0.7900305581766803, iteration: 438779
loss: 0.9744328856468201,grad_norm: 0.7028147211354714, iteration: 438780
loss: 1.0006318092346191,grad_norm: 0.8130145043585086, iteration: 438781
loss: 0.9810191988945007,grad_norm: 0.7231936423483639, iteration: 438782
loss: 1.0269602537155151,grad_norm: 0.7897853194224523, iteration: 438783
loss: 1.021920919418335,grad_norm: 0.8474246550868314, iteration: 438784
loss: 1.0121781826019287,grad_norm: 0.7497705628967651, iteration: 438785
loss: 1.0692682266235352,grad_norm: 0.9039970127353001, iteration: 438786
loss: 0.9850359559059143,grad_norm: 0.7112967121419687, iteration: 438787
loss: 0.9738843441009521,grad_norm: 0.7379319220319227, iteration: 438788
loss: 1.0655533075332642,grad_norm: 0.9999992117381904, iteration: 438789
loss: 1.0174686908721924,grad_norm: 0.9999992229330242, iteration: 438790
loss: 1.0111031532287598,grad_norm: 0.6870963665190116, iteration: 438791
loss: 0.9909077882766724,grad_norm: 0.9187164149145136, iteration: 438792
loss: 1.0388267040252686,grad_norm: 0.888960362165156, iteration: 438793
loss: 0.9942653179168701,grad_norm: 0.7055535365048663, iteration: 438794
loss: 1.016375184059143,grad_norm: 0.6669109086369641, iteration: 438795
loss: 0.9872106313705444,grad_norm: 0.8178382787194939, iteration: 438796
loss: 0.9590622782707214,grad_norm: 0.8881625257003719, iteration: 438797
loss: 1.047680377960205,grad_norm: 0.9999993465404424, iteration: 438798
loss: 1.025317668914795,grad_norm: 0.8497031213849732, iteration: 438799
loss: 0.9976550936698914,grad_norm: 0.8019987535898402, iteration: 438800
loss: 1.0399056673049927,grad_norm: 1.0000001446568647, iteration: 438801
loss: 1.0253610610961914,grad_norm: 0.7426937872379965, iteration: 438802
loss: 1.0280596017837524,grad_norm: 0.7502621337649934, iteration: 438803
loss: 1.0191612243652344,grad_norm: 0.9999997042737171, iteration: 438804
loss: 1.148414134979248,grad_norm: 0.9999998565943634, iteration: 438805
loss: 1.003103494644165,grad_norm: 0.7438517969732359, iteration: 438806
loss: 1.0486778020858765,grad_norm: 0.7100545410340868, iteration: 438807
loss: 0.9873935580253601,grad_norm: 0.9255596023661848, iteration: 438808
loss: 0.9912139177322388,grad_norm: 0.8023951091950718, iteration: 438809
loss: 1.0061748027801514,grad_norm: 0.7789066521317315, iteration: 438810
loss: 1.0180772542953491,grad_norm: 0.7797502769101649, iteration: 438811
loss: 1.0134683847427368,grad_norm: 0.9379612997198878, iteration: 438812
loss: 1.0743224620819092,grad_norm: 0.9999997897321526, iteration: 438813
loss: 1.032130241394043,grad_norm: 0.9999997782210348, iteration: 438814
loss: 0.9934290647506714,grad_norm: 0.7845816602195784, iteration: 438815
loss: 0.984281063079834,grad_norm: 0.7909283953374233, iteration: 438816
loss: 1.0130360126495361,grad_norm: 0.7860955828779564, iteration: 438817
loss: 1.0230927467346191,grad_norm: 0.7835305358959684, iteration: 438818
loss: 0.9650093913078308,grad_norm: 0.7654178119556558, iteration: 438819
loss: 1.0060029029846191,grad_norm: 0.7904114533102246, iteration: 438820
loss: 1.019095540046692,grad_norm: 0.7119511692784499, iteration: 438821
loss: 1.0151338577270508,grad_norm: 0.852526144099735, iteration: 438822
loss: 0.978914201259613,grad_norm: 0.8362618802754098, iteration: 438823
loss: 1.0003255605697632,grad_norm: 0.6984643626323738, iteration: 438824
loss: 0.9678282737731934,grad_norm: 0.8101427804013204, iteration: 438825
loss: 0.9975545406341553,grad_norm: 0.8344746669916918, iteration: 438826
loss: 0.9769772887229919,grad_norm: 0.8283166537417006, iteration: 438827
loss: 1.0042372941970825,grad_norm: 0.99999907714539, iteration: 438828
loss: 1.0199544429779053,grad_norm: 0.9999997758975572, iteration: 438829
loss: 1.0631425380706787,grad_norm: 0.8390711243414346, iteration: 438830
loss: 1.0611414909362793,grad_norm: 0.9276463201363871, iteration: 438831
loss: 1.0688122510910034,grad_norm: 0.6743907929081259, iteration: 438832
loss: 0.9962471723556519,grad_norm: 0.9999993707336071, iteration: 438833
loss: 1.0012441873550415,grad_norm: 0.9999991654283362, iteration: 438834
loss: 0.9994999170303345,grad_norm: 0.6656977316468394, iteration: 438835
loss: 0.9987495541572571,grad_norm: 0.7425072988708986, iteration: 438836
loss: 0.9849135875701904,grad_norm: 0.8057186214686785, iteration: 438837
loss: 1.0085538625717163,grad_norm: 0.6727578868042047, iteration: 438838
loss: 1.0195744037628174,grad_norm: 0.7147569965655498, iteration: 438839
loss: 1.0045992136001587,grad_norm: 0.6800548794214507, iteration: 438840
loss: 1.0415241718292236,grad_norm: 0.8959174728575819, iteration: 438841
loss: 0.9850808382034302,grad_norm: 0.8714499108575257, iteration: 438842
loss: 0.996856689453125,grad_norm: 0.7603140509754722, iteration: 438843
loss: 1.0292402505874634,grad_norm: 0.9999998225945792, iteration: 438844
loss: 0.9851991534233093,grad_norm: 0.9745473294706305, iteration: 438845
loss: 1.0191137790679932,grad_norm: 0.8086977434203393, iteration: 438846
loss: 0.9830061793327332,grad_norm: 0.7488152430156608, iteration: 438847
loss: 1.0127562284469604,grad_norm: 0.9999991764444883, iteration: 438848
loss: 0.9992597699165344,grad_norm: 0.6769100182526128, iteration: 438849
loss: 0.9820107221603394,grad_norm: 0.8754513362762393, iteration: 438850
loss: 0.992475152015686,grad_norm: 0.754200552346017, iteration: 438851
loss: 1.0174189805984497,grad_norm: 0.7247195347045106, iteration: 438852
loss: 1.0069504976272583,grad_norm: 0.8026851629121161, iteration: 438853
loss: 1.001987099647522,grad_norm: 0.8182242408416338, iteration: 438854
loss: 1.021925926208496,grad_norm: 0.7911639819794332, iteration: 438855
loss: 1.0037964582443237,grad_norm: 0.8763747273392463, iteration: 438856
loss: 0.9859582781791687,grad_norm: 0.9999999017132721, iteration: 438857
loss: 1.0186951160430908,grad_norm: 0.8094879572877026, iteration: 438858
loss: 0.9567480683326721,grad_norm: 0.821258165745253, iteration: 438859
loss: 1.0241485834121704,grad_norm: 0.7044814286424347, iteration: 438860
loss: 1.006496787071228,grad_norm: 0.9999991706941965, iteration: 438861
loss: 0.9825133681297302,grad_norm: 0.7801333937703084, iteration: 438862
loss: 1.0508413314819336,grad_norm: 0.8230717192314609, iteration: 438863
loss: 1.00147545337677,grad_norm: 0.999999189992003, iteration: 438864
loss: 0.9862017631530762,grad_norm: 0.871133539029694, iteration: 438865
loss: 1.0476503372192383,grad_norm: 0.9999994359323914, iteration: 438866
loss: 1.0016388893127441,grad_norm: 0.9999990945241439, iteration: 438867
loss: 1.0473905801773071,grad_norm: 0.9999991783917461, iteration: 438868
loss: 1.0134835243225098,grad_norm: 0.675874994697245, iteration: 438869
loss: 0.9937030673027039,grad_norm: 0.8209403555904681, iteration: 438870
loss: 0.9842197895050049,grad_norm: 0.9621149439826888, iteration: 438871
loss: 1.024815320968628,grad_norm: 0.586719219466669, iteration: 438872
loss: 0.9639038443565369,grad_norm: 0.7117887544524845, iteration: 438873
loss: 0.9911876320838928,grad_norm: 0.7204100657813828, iteration: 438874
loss: 1.0202172994613647,grad_norm: 0.6649730923152808, iteration: 438875
loss: 0.9825690984725952,grad_norm: 0.7050508880296947, iteration: 438876
loss: 0.990160346031189,grad_norm: 0.755733576418017, iteration: 438877
loss: 0.9541046619415283,grad_norm: 0.6944485811833158, iteration: 438878
loss: 0.9831361770629883,grad_norm: 0.8289275114695782, iteration: 438879
loss: 1.0098462104797363,grad_norm: 0.8328496224532024, iteration: 438880
loss: 1.0219378471374512,grad_norm: 0.7380739945616653, iteration: 438881
loss: 1.0196294784545898,grad_norm: 0.7713515124524379, iteration: 438882
loss: 0.9883384108543396,grad_norm: 0.7584511741999976, iteration: 438883
loss: 0.9989911317825317,grad_norm: 0.8955296252896443, iteration: 438884
loss: 1.0303242206573486,grad_norm: 0.7880250899956418, iteration: 438885
loss: 0.9796331524848938,grad_norm: 0.8279530733916438, iteration: 438886
loss: 1.0302493572235107,grad_norm: 0.9772774619793735, iteration: 438887
loss: 1.0075136423110962,grad_norm: 0.9999995880294121, iteration: 438888
loss: 1.0744199752807617,grad_norm: 0.9999998492535038, iteration: 438889
loss: 1.0422852039337158,grad_norm: 0.7536287604478425, iteration: 438890
loss: 1.001347541809082,grad_norm: 0.672295893749017, iteration: 438891
loss: 0.9914748668670654,grad_norm: 0.7904459910600826, iteration: 438892
loss: 0.9909083843231201,grad_norm: 0.8216599818634652, iteration: 438893
loss: 1.010162353515625,grad_norm: 0.8807723677254599, iteration: 438894
loss: 1.0326145887374878,grad_norm: 0.7701399700298587, iteration: 438895
loss: 1.000353455543518,grad_norm: 0.889324927627231, iteration: 438896
loss: 1.0217185020446777,grad_norm: 0.765664370156954, iteration: 438897
loss: 1.0146721601486206,grad_norm: 0.7229617836874654, iteration: 438898
loss: 1.002647042274475,grad_norm: 0.7040199544396805, iteration: 438899
loss: 1.0436633825302124,grad_norm: 0.8053752371869148, iteration: 438900
loss: 0.9670448899269104,grad_norm: 0.6969576775350909, iteration: 438901
loss: 0.9867967367172241,grad_norm: 0.6969942564498988, iteration: 438902
loss: 1.050146460533142,grad_norm: 0.7468063670791308, iteration: 438903
loss: 1.0800979137420654,grad_norm: 0.8306064863039524, iteration: 438904
loss: 1.0469701290130615,grad_norm: 0.9999999935516585, iteration: 438905
loss: 1.1876853704452515,grad_norm: 0.9999995988653603, iteration: 438906
loss: 0.9835533499717712,grad_norm: 0.7941802728740882, iteration: 438907
loss: 1.001259684562683,grad_norm: 0.6741364553013816, iteration: 438908
loss: 0.9460054636001587,grad_norm: 0.7419955104974404, iteration: 438909
loss: 0.9670231342315674,grad_norm: 0.8351598137213255, iteration: 438910
loss: 1.0256482362747192,grad_norm: 0.577845012067432, iteration: 438911
loss: 0.9882683157920837,grad_norm: 0.8013665842284355, iteration: 438912
loss: 0.9899660348892212,grad_norm: 0.8563561538234078, iteration: 438913
loss: 0.9962239265441895,grad_norm: 0.8462608168307977, iteration: 438914
loss: 1.0240514278411865,grad_norm: 0.8442381598833986, iteration: 438915
loss: 1.0008511543273926,grad_norm: 0.7161597668834045, iteration: 438916
loss: 0.9951424598693848,grad_norm: 0.9594082700626434, iteration: 438917
loss: 1.0156887769699097,grad_norm: 0.8217976523267612, iteration: 438918
loss: 1.0181853771209717,grad_norm: 0.7807909059892225, iteration: 438919
loss: 1.005225658416748,grad_norm: 0.8508264078779533, iteration: 438920
loss: 0.971093475818634,grad_norm: 0.7655986464093829, iteration: 438921
loss: 1.011794924736023,grad_norm: 0.6991847481866721, iteration: 438922
loss: 0.9642030596733093,grad_norm: 0.8283963299614419, iteration: 438923
loss: 1.0296401977539062,grad_norm: 0.7882737965557528, iteration: 438924
loss: 0.9529473185539246,grad_norm: 0.7107179178857687, iteration: 438925
loss: 1.0076038837432861,grad_norm: 0.830575906306121, iteration: 438926
loss: 1.0063445568084717,grad_norm: 0.9438428352706202, iteration: 438927
loss: 1.0138144493103027,grad_norm: 0.7215988686472805, iteration: 438928
loss: 1.1746479272842407,grad_norm: 0.9999999359730865, iteration: 438929
loss: 0.974694550037384,grad_norm: 0.8061445291946099, iteration: 438930
loss: 1.0119868516921997,grad_norm: 0.7108820509547406, iteration: 438931
loss: 1.0119670629501343,grad_norm: 0.7387092175730057, iteration: 438932
loss: 1.0380439758300781,grad_norm: 0.999999485899086, iteration: 438933
loss: 0.9798141121864319,grad_norm: 0.7687809658567444, iteration: 438934
loss: 1.1662588119506836,grad_norm: 0.9999995785566551, iteration: 438935
loss: 1.0023161172866821,grad_norm: 0.6959395204337846, iteration: 438936
loss: 0.9745382070541382,grad_norm: 0.9108017756885753, iteration: 438937
loss: 1.0121606588363647,grad_norm: 0.8742237015340071, iteration: 438938
loss: 1.0352094173431396,grad_norm: 0.6913525705163219, iteration: 438939
loss: 1.0186738967895508,grad_norm: 0.7967836103028061, iteration: 438940
loss: 0.9865357875823975,grad_norm: 0.6770806807894059, iteration: 438941
loss: 1.0186548233032227,grad_norm: 0.8864686989136608, iteration: 438942
loss: 0.9775686860084534,grad_norm: 0.7890072256340529, iteration: 438943
loss: 1.0251350402832031,grad_norm: 0.9193014589768099, iteration: 438944
loss: 1.021639108657837,grad_norm: 0.8944024179291774, iteration: 438945
loss: 0.9981903433799744,grad_norm: 0.6683869055690248, iteration: 438946
loss: 0.992896318435669,grad_norm: 0.6933206748092412, iteration: 438947
loss: 1.0189805030822754,grad_norm: 0.8214663726099127, iteration: 438948
loss: 1.0062127113342285,grad_norm: 0.799035168370696, iteration: 438949
loss: 0.9580159187316895,grad_norm: 0.7592174910240647, iteration: 438950
loss: 1.0144619941711426,grad_norm: 0.7490616004622904, iteration: 438951
loss: 0.9702086448669434,grad_norm: 0.8820646921042589, iteration: 438952
loss: 0.9922835230827332,grad_norm: 0.999999016015718, iteration: 438953
loss: 1.0137203931808472,grad_norm: 0.7434469572187901, iteration: 438954
loss: 0.9756983518600464,grad_norm: 0.9999991270412878, iteration: 438955
loss: 1.0024011135101318,grad_norm: 0.7927674966753395, iteration: 438956
loss: 0.9981741905212402,grad_norm: 0.9490604502659263, iteration: 438957
loss: 0.9796535968780518,grad_norm: 0.8180865114156207, iteration: 438958
loss: 0.9722241759300232,grad_norm: 0.6718907991269163, iteration: 438959
loss: 0.9703059196472168,grad_norm: 0.8060025296324693, iteration: 438960
loss: 0.9841161966323853,grad_norm: 0.9586083075382821, iteration: 438961
loss: 0.9993935227394104,grad_norm: 0.6743334766434267, iteration: 438962
loss: 1.0115971565246582,grad_norm: 0.7436589205526205, iteration: 438963
loss: 1.00538969039917,grad_norm: 0.8438404380389914, iteration: 438964
loss: 1.0001499652862549,grad_norm: 0.6873445601037422, iteration: 438965
loss: 0.9824049472808838,grad_norm: 0.8784573217691416, iteration: 438966
loss: 0.9823213219642639,grad_norm: 0.8225901566252837, iteration: 438967
loss: 1.0276410579681396,grad_norm: 0.7586787069512798, iteration: 438968
loss: 0.9569156765937805,grad_norm: 0.8027682602988364, iteration: 438969
loss: 1.0121362209320068,grad_norm: 0.6845001866701201, iteration: 438970
loss: 0.9700599312782288,grad_norm: 0.6632429822856416, iteration: 438971
loss: 0.9588409662246704,grad_norm: 0.7578188374891885, iteration: 438972
loss: 0.9520678520202637,grad_norm: 0.6782124802634587, iteration: 438973
loss: 0.9922723770141602,grad_norm: 0.6392767643079047, iteration: 438974
loss: 1.0015925168991089,grad_norm: 0.9033634104513749, iteration: 438975
loss: 1.009253978729248,grad_norm: 0.9783984422381402, iteration: 438976
loss: 0.9955114126205444,grad_norm: 0.8362061626035231, iteration: 438977
loss: 0.9937107563018799,grad_norm: 0.6978945867377764, iteration: 438978
loss: 1.0131038427352905,grad_norm: 0.9999992375892113, iteration: 438979
loss: 1.0104776620864868,grad_norm: 0.6642221948779216, iteration: 438980
loss: 1.0033038854599,grad_norm: 0.6684940092243761, iteration: 438981
loss: 0.9693163633346558,grad_norm: 0.7187458070461586, iteration: 438982
loss: 1.1887084245681763,grad_norm: 0.9999991203717363, iteration: 438983
loss: 0.9830952882766724,grad_norm: 0.8444580045222171, iteration: 438984
loss: 1.0336885452270508,grad_norm: 0.6966924437093411, iteration: 438985
loss: 0.9938449859619141,grad_norm: 0.9435473617983599, iteration: 438986
loss: 1.023026704788208,grad_norm: 0.9007516319753935, iteration: 438987
loss: 1.0456405878067017,grad_norm: 0.7981929027207725, iteration: 438988
loss: 0.9919873476028442,grad_norm: 0.7577493082421924, iteration: 438989
loss: 1.0069971084594727,grad_norm: 0.9999992212753986, iteration: 438990
loss: 0.9927324652671814,grad_norm: 0.9116044145112202, iteration: 438991
loss: 0.9867284297943115,grad_norm: 0.8170097377249487, iteration: 438992
loss: 1.0016802549362183,grad_norm: 0.9999993683250085, iteration: 438993
loss: 0.9785062074661255,grad_norm: 0.6625169487209349, iteration: 438994
loss: 0.9824329018592834,grad_norm: 0.6804757236797611, iteration: 438995
loss: 0.9963454604148865,grad_norm: 0.6908009838195083, iteration: 438996
loss: 1.062947154045105,grad_norm: 0.9999998069476908, iteration: 438997
loss: 1.0311710834503174,grad_norm: 0.6924862922995769, iteration: 438998
loss: 1.1070177555084229,grad_norm: 0.9999998130032076, iteration: 438999
loss: 0.9835367202758789,grad_norm: 0.8925305807759233, iteration: 439000
loss: 0.9845626950263977,grad_norm: 0.7720162247015753, iteration: 439001
loss: 1.0749568939208984,grad_norm: 0.9999996057899764, iteration: 439002
loss: 1.0032176971435547,grad_norm: 0.6799644726402861, iteration: 439003
loss: 1.09468674659729,grad_norm: 0.9999991534122872, iteration: 439004
loss: 1.0212082862854004,grad_norm: 0.9999993912981298, iteration: 439005
loss: 1.0275768041610718,grad_norm: 0.6868280325729019, iteration: 439006
loss: 0.9877843260765076,grad_norm: 0.7094108847266225, iteration: 439007
loss: 1.0269242525100708,grad_norm: 0.9999992801260791, iteration: 439008
loss: 1.0148826837539673,grad_norm: 0.9344545532144999, iteration: 439009
loss: 1.0812711715698242,grad_norm: 0.9999998353429055, iteration: 439010
loss: 1.0058337450027466,grad_norm: 0.7474298035548455, iteration: 439011
loss: 1.004309892654419,grad_norm: 0.9999999359397128, iteration: 439012
loss: 0.9632605910301208,grad_norm: 0.6265592665093626, iteration: 439013
loss: 0.9969334602355957,grad_norm: 0.7910843756011091, iteration: 439014
loss: 1.0127568244934082,grad_norm: 0.7439371025971848, iteration: 439015
loss: 1.0153676271438599,grad_norm: 0.8906610620580158, iteration: 439016
loss: 1.059978723526001,grad_norm: 0.9999995780444859, iteration: 439017
loss: 0.9833645820617676,grad_norm: 0.704333154420616, iteration: 439018
loss: 0.9921082854270935,grad_norm: 0.9999990734303693, iteration: 439019
loss: 1.0357577800750732,grad_norm: 0.7137316403478108, iteration: 439020
loss: 0.9823400974273682,grad_norm: 0.9999993320302589, iteration: 439021
loss: 1.0424069166183472,grad_norm: 0.8165558036797945, iteration: 439022
loss: 0.9685004353523254,grad_norm: 0.7552347921878685, iteration: 439023
loss: 1.0410792827606201,grad_norm: 0.8194633231976718, iteration: 439024
loss: 0.9684748649597168,grad_norm: 0.7487314207379694, iteration: 439025
loss: 1.0015885829925537,grad_norm: 0.6899457836735174, iteration: 439026
loss: 0.9790402054786682,grad_norm: 0.90747272660551, iteration: 439027
loss: 1.0185015201568604,grad_norm: 0.6960702911939797, iteration: 439028
loss: 0.9817381501197815,grad_norm: 0.7866256643073228, iteration: 439029
loss: 1.0369819402694702,grad_norm: 0.7931181282250992, iteration: 439030
loss: 0.985683023929596,grad_norm: 0.8000129918141347, iteration: 439031
loss: 0.9992555379867554,grad_norm: 0.9999991811440078, iteration: 439032
loss: 0.9629333019256592,grad_norm: 0.793320703507739, iteration: 439033
loss: 0.984985888004303,grad_norm: 0.8993493453756999, iteration: 439034
loss: 0.9736756682395935,grad_norm: 0.7782046325387064, iteration: 439035
loss: 0.9916759729385376,grad_norm: 0.6729577558890223, iteration: 439036
loss: 1.0107063055038452,grad_norm: 0.7781712373680882, iteration: 439037
loss: 1.0861711502075195,grad_norm: 0.9999991987962127, iteration: 439038
loss: 1.0042966604232788,grad_norm: 0.772049753337312, iteration: 439039
loss: 0.979988694190979,grad_norm: 0.9359776096791328, iteration: 439040
loss: 0.957457423210144,grad_norm: 0.6828713985883024, iteration: 439041
loss: 1.050874948501587,grad_norm: 0.723732014323038, iteration: 439042
loss: 0.9721165895462036,grad_norm: 0.7338666581181059, iteration: 439043
loss: 1.003309965133667,grad_norm: 0.9999989776406998, iteration: 439044
loss: 1.0706300735473633,grad_norm: 0.7694801117111365, iteration: 439045
loss: 1.003134846687317,grad_norm: 0.8114005034665269, iteration: 439046
loss: 0.9858061075210571,grad_norm: 0.9999995734407542, iteration: 439047
loss: 0.9943295121192932,grad_norm: 0.7221087370450071, iteration: 439048
loss: 1.0123720169067383,grad_norm: 0.832087225371395, iteration: 439049
loss: 0.9807843565940857,grad_norm: 0.779028103093002, iteration: 439050
loss: 1.0095503330230713,grad_norm: 0.8947147322973049, iteration: 439051
loss: 1.0162872076034546,grad_norm: 0.7851748300367715, iteration: 439052
loss: 1.0012922286987305,grad_norm: 0.8368850102087841, iteration: 439053
loss: 0.9910500645637512,grad_norm: 0.7601998277211958, iteration: 439054
loss: 1.0138702392578125,grad_norm: 0.7603998667228353, iteration: 439055
loss: 1.0119277238845825,grad_norm: 0.7420402291091199, iteration: 439056
loss: 0.9680737853050232,grad_norm: 0.7762714536270559, iteration: 439057
loss: 1.0065146684646606,grad_norm: 0.7540027031397152, iteration: 439058
loss: 1.0278656482696533,grad_norm: 0.7460491225818788, iteration: 439059
loss: 0.9831598401069641,grad_norm: 0.7624237195314215, iteration: 439060
loss: 0.9801837801933289,grad_norm: 0.7081877266625879, iteration: 439061
loss: 0.9820568561553955,grad_norm: 0.655880418215811, iteration: 439062
loss: 0.9915406107902527,grad_norm: 0.8383760634985744, iteration: 439063
loss: 1.0178234577178955,grad_norm: 0.9999993661806698, iteration: 439064
loss: 1.01792311668396,grad_norm: 0.7085652753140618, iteration: 439065
loss: 0.9753435850143433,grad_norm: 0.8169370091652348, iteration: 439066
loss: 0.9855968356132507,grad_norm: 0.770541792232315, iteration: 439067
loss: 0.9900936484336853,grad_norm: 0.8494899026647313, iteration: 439068
loss: 1.0212347507476807,grad_norm: 0.9967518576578643, iteration: 439069
loss: 1.0428298711776733,grad_norm: 0.7936105948865113, iteration: 439070
loss: 1.0317974090576172,grad_norm: 0.6073296860563607, iteration: 439071
loss: 0.989849865436554,grad_norm: 0.8982966152712727, iteration: 439072
loss: 0.9995316863059998,grad_norm: 0.7096950454442467, iteration: 439073
loss: 1.0217177867889404,grad_norm: 0.7602122669566007, iteration: 439074
loss: 0.9745193123817444,grad_norm: 0.6976318344746919, iteration: 439075
loss: 0.9666732549667358,grad_norm: 0.6961236429003722, iteration: 439076
loss: 1.0388126373291016,grad_norm: 0.7583254242892659, iteration: 439077
loss: 0.9828130006790161,grad_norm: 0.7654558328540287, iteration: 439078
loss: 0.9627119898796082,grad_norm: 0.8442315778811037, iteration: 439079
loss: 0.9519509077072144,grad_norm: 0.7756502884010834, iteration: 439080
loss: 1.0049961805343628,grad_norm: 0.6889515126008638, iteration: 439081
loss: 1.0081737041473389,grad_norm: 0.6792677410719193, iteration: 439082
loss: 1.0065245628356934,grad_norm: 0.9999992814084345, iteration: 439083
loss: 1.0258030891418457,grad_norm: 0.6075117746256038, iteration: 439084
loss: 0.9537447094917297,grad_norm: 0.7564337536198262, iteration: 439085
loss: 1.0162736177444458,grad_norm: 0.8956252305030391, iteration: 439086
loss: 1.048322319984436,grad_norm: 0.7747088692964167, iteration: 439087
loss: 0.9817399978637695,grad_norm: 0.7731566973195864, iteration: 439088
loss: 1.209163784980774,grad_norm: 0.9999992581673743, iteration: 439089
loss: 1.0449485778808594,grad_norm: 0.7480419086548158, iteration: 439090
loss: 0.9907594323158264,grad_norm: 0.7790667895029505, iteration: 439091
loss: 1.0171552896499634,grad_norm: 0.9999990014302046, iteration: 439092
loss: 1.0171314477920532,grad_norm: 0.8626581408669389, iteration: 439093
loss: 1.0510445833206177,grad_norm: 0.8416244443069253, iteration: 439094
loss: 1.005692720413208,grad_norm: 0.9224145479093047, iteration: 439095
loss: 1.0488835573196411,grad_norm: 0.9999995533877158, iteration: 439096
loss: 0.9948459267616272,grad_norm: 0.9295133303397434, iteration: 439097
loss: 0.9940897226333618,grad_norm: 0.8721331819833166, iteration: 439098
loss: 0.9934011101722717,grad_norm: 0.737211617282592, iteration: 439099
loss: 1.0280628204345703,grad_norm: 0.9999995381378213, iteration: 439100
loss: 0.9962087869644165,grad_norm: 0.831123038015273, iteration: 439101
loss: 1.006820797920227,grad_norm: 0.7275516265945235, iteration: 439102
loss: 0.9841986894607544,grad_norm: 0.6343637717099704, iteration: 439103
loss: 1.0107460021972656,grad_norm: 0.7654176396342753, iteration: 439104
loss: 0.9705031514167786,grad_norm: 0.8279404419872453, iteration: 439105
loss: 1.0172383785247803,grad_norm: 0.6570511458857327, iteration: 439106
loss: 0.9939671158790588,grad_norm: 0.8642319891619172, iteration: 439107
loss: 0.9617547988891602,grad_norm: 0.7727752245026747, iteration: 439108
loss: 0.9809353351593018,grad_norm: 0.7544848628116791, iteration: 439109
loss: 1.0012776851654053,grad_norm: 0.8225382585252793, iteration: 439110
loss: 1.0127904415130615,grad_norm: 0.6350386170713238, iteration: 439111
loss: 1.0058122873306274,grad_norm: 0.7359078316648993, iteration: 439112
loss: 1.0058622360229492,grad_norm: 0.8541319966236969, iteration: 439113
loss: 1.0077134370803833,grad_norm: 0.7844227407055387, iteration: 439114
loss: 0.9925216436386108,grad_norm: 0.7579747752044721, iteration: 439115
loss: 1.0129919052124023,grad_norm: 0.6953265175334346, iteration: 439116
loss: 0.9613621830940247,grad_norm: 0.8557558005018089, iteration: 439117
loss: 0.9858329892158508,grad_norm: 0.7288781856542318, iteration: 439118
loss: 1.0092648267745972,grad_norm: 0.7943099103328218, iteration: 439119
loss: 1.0038033723831177,grad_norm: 0.6756106919745695, iteration: 439120
loss: 0.9872394800186157,grad_norm: 0.8763404126033505, iteration: 439121
loss: 1.0190438032150269,grad_norm: 0.8254648518424387, iteration: 439122
loss: 0.9910558462142944,grad_norm: 0.8934811379308243, iteration: 439123
loss: 1.0010682344436646,grad_norm: 0.8889430427888785, iteration: 439124
loss: 0.9885692596435547,grad_norm: 0.7712319973265443, iteration: 439125
loss: 1.016579031944275,grad_norm: 0.8769127913006198, iteration: 439126
loss: 1.0422673225402832,grad_norm: 0.9999993285763542, iteration: 439127
loss: 1.0017341375350952,grad_norm: 0.6554373019576208, iteration: 439128
loss: 0.961800754070282,grad_norm: 0.7344962401855589, iteration: 439129
loss: 0.993755042552948,grad_norm: 0.9645562212764135, iteration: 439130
loss: 1.063876986503601,grad_norm: 0.9999996345974981, iteration: 439131
loss: 0.9911146759986877,grad_norm: 0.6552691733967644, iteration: 439132
loss: 1.0124865770339966,grad_norm: 0.9966040592701566, iteration: 439133
loss: 1.0810096263885498,grad_norm: 0.7748301105811124, iteration: 439134
loss: 0.9887441992759705,grad_norm: 0.6480504728265586, iteration: 439135
loss: 1.0129501819610596,grad_norm: 0.8367992973555213, iteration: 439136
loss: 0.9809653759002686,grad_norm: 0.7406108944192499, iteration: 439137
loss: 0.9507638216018677,grad_norm: 0.8392194576122096, iteration: 439138
loss: 0.9698913097381592,grad_norm: 0.8411125492950728, iteration: 439139
loss: 0.9986622929573059,grad_norm: 0.6260759455404195, iteration: 439140
loss: 0.9784948825836182,grad_norm: 0.7643283074368221, iteration: 439141
loss: 0.9999845623970032,grad_norm: 0.8695246349894565, iteration: 439142
loss: 1.003995656967163,grad_norm: 0.7213387982263364, iteration: 439143
loss: 1.0092366933822632,grad_norm: 0.9172953422162826, iteration: 439144
loss: 0.9784614443778992,grad_norm: 0.8412701641725674, iteration: 439145
loss: 1.0061448812484741,grad_norm: 0.7240847322759982, iteration: 439146
loss: 1.0493603944778442,grad_norm: 0.8146563209918289, iteration: 439147
loss: 1.0170398950576782,grad_norm: 0.6649462383220294, iteration: 439148
loss: 0.9780613780021667,grad_norm: 0.7038488492705748, iteration: 439149
loss: 1.0019925832748413,grad_norm: 0.9590687699504634, iteration: 439150
loss: 1.0284883975982666,grad_norm: 0.7638229534025747, iteration: 439151
loss: 0.9837767481803894,grad_norm: 0.7054510882789297, iteration: 439152
loss: 1.0667153596878052,grad_norm: 0.7279772251289252, iteration: 439153
loss: 0.9885172843933105,grad_norm: 0.8167520801420849, iteration: 439154
loss: 0.9812477231025696,grad_norm: 0.7521985533797295, iteration: 439155
loss: 0.9793776869773865,grad_norm: 0.812991198884384, iteration: 439156
loss: 0.9959560632705688,grad_norm: 0.814912034297988, iteration: 439157
loss: 0.9810654520988464,grad_norm: 0.6570727797312097, iteration: 439158
loss: 0.9942989945411682,grad_norm: 0.8147696068006645, iteration: 439159
loss: 1.008790135383606,grad_norm: 0.7592882953771055, iteration: 439160
loss: 1.0241175889968872,grad_norm: 0.7296088303579975, iteration: 439161
loss: 0.9831586480140686,grad_norm: 0.7524176441609459, iteration: 439162
loss: 1.0366249084472656,grad_norm: 0.6593279483338148, iteration: 439163
loss: 1.0198394060134888,grad_norm: 0.8480720260058553, iteration: 439164
loss: 0.998813807964325,grad_norm: 0.7884135766684894, iteration: 439165
loss: 1.038360357284546,grad_norm: 0.6757710133668448, iteration: 439166
loss: 0.9755569100379944,grad_norm: 0.8227700911421237, iteration: 439167
loss: 0.9991330504417419,grad_norm: 0.6853611325137674, iteration: 439168
loss: 1.008428692817688,grad_norm: 0.9999998705331147, iteration: 439169
loss: 0.977811336517334,grad_norm: 0.7459237175662884, iteration: 439170
loss: 0.9568063616752625,grad_norm: 0.811017520254533, iteration: 439171
loss: 0.9912016987800598,grad_norm: 0.7949831953325086, iteration: 439172
loss: 0.9684389233589172,grad_norm: 0.7921984356455292, iteration: 439173
loss: 0.9941431283950806,grad_norm: 0.7841851075191505, iteration: 439174
loss: 1.0109225511550903,grad_norm: 0.7415365559668359, iteration: 439175
loss: 0.978373646736145,grad_norm: 0.8681574685301195, iteration: 439176
loss: 0.9728192687034607,grad_norm: 0.7946383488257541, iteration: 439177
loss: 1.0231938362121582,grad_norm: 0.7456785880396914, iteration: 439178
loss: 0.9592296481132507,grad_norm: 0.8174341057789045, iteration: 439179
loss: 0.9957557320594788,grad_norm: 0.7394940016205815, iteration: 439180
loss: 1.0175522565841675,grad_norm: 0.8650286873538375, iteration: 439181
loss: 1.0177112817764282,grad_norm: 0.8320657355959135, iteration: 439182
loss: 0.993375301361084,grad_norm: 0.7235122431259425, iteration: 439183
loss: 0.9974854588508606,grad_norm: 0.8310437994411136, iteration: 439184
loss: 1.01496422290802,grad_norm: 0.7547901978825767, iteration: 439185
loss: 1.0105915069580078,grad_norm: 0.7222546924125431, iteration: 439186
loss: 0.9883357882499695,grad_norm: 0.6928122354691443, iteration: 439187
loss: 0.9678921699523926,grad_norm: 0.9081713445568657, iteration: 439188
loss: 1.030064344406128,grad_norm: 0.6898825039475284, iteration: 439189
loss: 1.1345716714859009,grad_norm: 0.9999994477362183, iteration: 439190
loss: 0.9657614231109619,grad_norm: 0.8358901370408508, iteration: 439191
loss: 0.965019166469574,grad_norm: 0.7762571508758878, iteration: 439192
loss: 1.0086591243743896,grad_norm: 0.6813016757113587, iteration: 439193
loss: 1.0104190111160278,grad_norm: 0.815094814731805, iteration: 439194
loss: 1.0352489948272705,grad_norm: 0.8135542297664944, iteration: 439195
loss: 0.9828340411186218,grad_norm: 0.8646609909414704, iteration: 439196
loss: 1.0577870607376099,grad_norm: 0.7991438176120296, iteration: 439197
loss: 0.9764402508735657,grad_norm: 0.7974361316842719, iteration: 439198
loss: 1.0159826278686523,grad_norm: 0.9999990521357122, iteration: 439199
loss: 0.9977191090583801,grad_norm: 0.7285273734223022, iteration: 439200
loss: 1.0015301704406738,grad_norm: 0.6535251000934537, iteration: 439201
loss: 1.046738624572754,grad_norm: 0.9905537153568158, iteration: 439202
loss: 0.9949256181716919,grad_norm: 0.7312015625945194, iteration: 439203
loss: 1.000319004058838,grad_norm: 0.7561064064320545, iteration: 439204
loss: 0.9986114501953125,grad_norm: 0.7092230833501983, iteration: 439205
loss: 0.9856280088424683,grad_norm: 0.8627744774496955, iteration: 439206
loss: 0.9890881776809692,grad_norm: 0.8317059903973559, iteration: 439207
loss: 0.9948193430900574,grad_norm: 0.7771553032527593, iteration: 439208
loss: 0.9970772862434387,grad_norm: 0.9962487156966514, iteration: 439209
loss: 1.0143297910690308,grad_norm: 0.8933318206252989, iteration: 439210
loss: 0.9988356232643127,grad_norm: 0.8317123383491959, iteration: 439211
loss: 1.0602647066116333,grad_norm: 0.7154179603425023, iteration: 439212
loss: 0.9862352013587952,grad_norm: 0.7284283159795539, iteration: 439213
loss: 0.9823341965675354,grad_norm: 0.7539233502700526, iteration: 439214
loss: 1.0245689153671265,grad_norm: 0.8694701842735725, iteration: 439215
loss: 1.2722784280776978,grad_norm: 0.9999997606462238, iteration: 439216
loss: 1.0366489887237549,grad_norm: 0.8268500420356607, iteration: 439217
loss: 0.9867528080940247,grad_norm: 0.811666360707128, iteration: 439218
loss: 1.099804162979126,grad_norm: 0.9999999368435764, iteration: 439219
loss: 1.0073131322860718,grad_norm: 0.5938189076700673, iteration: 439220
loss: 1.0010172128677368,grad_norm: 0.7063729030973517, iteration: 439221
loss: 0.985715389251709,grad_norm: 0.9068542548494256, iteration: 439222
loss: 1.0028433799743652,grad_norm: 0.7474612802031451, iteration: 439223
loss: 1.0220245122909546,grad_norm: 0.7810820744829792, iteration: 439224
loss: 1.0257858037948608,grad_norm: 0.7145825419170034, iteration: 439225
loss: 1.0003708600997925,grad_norm: 0.8034819436430288, iteration: 439226
loss: 1.0108461380004883,grad_norm: 0.8605388409789632, iteration: 439227
loss: 1.0159777402877808,grad_norm: 0.7575051568546995, iteration: 439228
loss: 0.9913903474807739,grad_norm: 0.66655361379072, iteration: 439229
loss: 0.9879316091537476,grad_norm: 0.6973013085422126, iteration: 439230
loss: 1.0088213682174683,grad_norm: 0.7058342931958215, iteration: 439231
loss: 1.0041003227233887,grad_norm: 0.7516676483457406, iteration: 439232
loss: 1.0037410259246826,grad_norm: 0.7549350669990886, iteration: 439233
loss: 1.0003679990768433,grad_norm: 0.7623829492787676, iteration: 439234
loss: 1.0116374492645264,grad_norm: 0.9159399098596912, iteration: 439235
loss: 0.9872846603393555,grad_norm: 0.7388723297873737, iteration: 439236
loss: 0.9974073171615601,grad_norm: 0.7687918174234809, iteration: 439237
loss: 0.9787702560424805,grad_norm: 0.7580517438557157, iteration: 439238
loss: 0.9966505765914917,grad_norm: 0.9456245778076824, iteration: 439239
loss: 1.011852502822876,grad_norm: 0.6825947225305471, iteration: 439240
loss: 0.9989374279975891,grad_norm: 0.9227116820798572, iteration: 439241
loss: 0.9680854082107544,grad_norm: 0.7833951514701186, iteration: 439242
loss: 1.0182619094848633,grad_norm: 0.8815402399987808, iteration: 439243
loss: 0.9646709561347961,grad_norm: 0.7633450872990484, iteration: 439244
loss: 1.0263386964797974,grad_norm: 0.8303226069637998, iteration: 439245
loss: 1.1019057035446167,grad_norm: 0.8898090280017789, iteration: 439246
loss: 1.0202699899673462,grad_norm: 0.8922441015804099, iteration: 439247
loss: 1.1410397291183472,grad_norm: 0.8890374900426822, iteration: 439248
loss: 1.0453370809555054,grad_norm: 0.9999993796452602, iteration: 439249
loss: 0.9920417666435242,grad_norm: 0.8766720382354385, iteration: 439250
loss: 1.0153257846832275,grad_norm: 0.8991812651280132, iteration: 439251
loss: 1.0115565061569214,grad_norm: 0.8590825603526909, iteration: 439252
loss: 0.960456132888794,grad_norm: 0.7936627479141412, iteration: 439253
loss: 0.9975172877311707,grad_norm: 0.69190430983853, iteration: 439254
loss: 0.9784359335899353,grad_norm: 0.7639669182653127, iteration: 439255
loss: 1.0083664655685425,grad_norm: 0.7216015654357305, iteration: 439256
loss: 1.0081868171691895,grad_norm: 0.697328348704003, iteration: 439257
loss: 0.9776557087898254,grad_norm: 0.9556718114207864, iteration: 439258
loss: 1.0055190324783325,grad_norm: 0.8251796244616927, iteration: 439259
loss: 1.0239534378051758,grad_norm: 0.7499175864415732, iteration: 439260
loss: 0.983421802520752,grad_norm: 0.7501539990997205, iteration: 439261
loss: 0.9957935810089111,grad_norm: 0.8087457933843888, iteration: 439262
loss: 1.0813519954681396,grad_norm: 0.9999990662498662, iteration: 439263
loss: 0.9747711420059204,grad_norm: 0.7936736744081262, iteration: 439264
loss: 1.0113376379013062,grad_norm: 0.8358139062124265, iteration: 439265
loss: 0.9838286638259888,grad_norm: 0.5903189285930416, iteration: 439266
loss: 0.9965642094612122,grad_norm: 0.8810797891073454, iteration: 439267
loss: 1.0123714208602905,grad_norm: 0.860911452274615, iteration: 439268
loss: 1.017455816268921,grad_norm: 0.7562071863685746, iteration: 439269
loss: 1.009317398071289,grad_norm: 0.7944300220203918, iteration: 439270
loss: 0.9999872446060181,grad_norm: 0.5800663503629417, iteration: 439271
loss: 0.9890081882476807,grad_norm: 0.6763658606704869, iteration: 439272
loss: 0.9956701397895813,grad_norm: 0.7666392636155076, iteration: 439273
loss: 0.9864934682846069,grad_norm: 0.7198713133330072, iteration: 439274
loss: 0.9807756543159485,grad_norm: 0.6925328370789372, iteration: 439275
loss: 1.0213441848754883,grad_norm: 0.7850029493085128, iteration: 439276
loss: 0.9930492043495178,grad_norm: 0.8490202862199173, iteration: 439277
loss: 1.0065507888793945,grad_norm: 0.8334389959940274, iteration: 439278
loss: 1.0023090839385986,grad_norm: 0.8958994413928193, iteration: 439279
loss: 1.012588620185852,grad_norm: 0.834852547742106, iteration: 439280
loss: 1.042681336402893,grad_norm: 0.846603582623518, iteration: 439281
loss: 1.0367363691329956,grad_norm: 0.8292719856106731, iteration: 439282
loss: 1.0200570821762085,grad_norm: 0.9874853264422024, iteration: 439283
loss: 1.0308196544647217,grad_norm: 1.0000000083621583, iteration: 439284
loss: 0.9854535460472107,grad_norm: 0.764300780044497, iteration: 439285
loss: 0.9770407676696777,grad_norm: 0.826883021475637, iteration: 439286
loss: 0.9948042631149292,grad_norm: 0.7530070367983944, iteration: 439287
loss: 0.9868312478065491,grad_norm: 0.7245430827888951, iteration: 439288
loss: 1.1756800413131714,grad_norm: 0.999999423937847, iteration: 439289
loss: 0.9814384579658508,grad_norm: 0.730464657437184, iteration: 439290
loss: 1.0056121349334717,grad_norm: 0.9098227016058609, iteration: 439291
loss: 1.0158791542053223,grad_norm: 0.9579040709113106, iteration: 439292
loss: 0.95158451795578,grad_norm: 0.7699330573232663, iteration: 439293
loss: 0.9886905550956726,grad_norm: 0.8034667361229489, iteration: 439294
loss: 1.0610445737838745,grad_norm: 0.8738276021685073, iteration: 439295
loss: 1.322108507156372,grad_norm: 0.9999990559904407, iteration: 439296
loss: 1.217482328414917,grad_norm: 0.9999997830002134, iteration: 439297
loss: 1.0282458066940308,grad_norm: 0.9999991682096199, iteration: 439298
loss: 1.024420976638794,grad_norm: 0.9999995854314364, iteration: 439299
loss: 0.9817456007003784,grad_norm: 0.8486522770279504, iteration: 439300
loss: 0.9891899824142456,grad_norm: 0.8348672251668777, iteration: 439301
loss: 0.9542196989059448,grad_norm: 0.8314644076132152, iteration: 439302
loss: 0.9989630579948425,grad_norm: 0.8126473036513969, iteration: 439303
loss: 0.9871264696121216,grad_norm: 0.7681719543352845, iteration: 439304
loss: 1.0694829225540161,grad_norm: 0.9999995458758687, iteration: 439305
loss: 1.0679373741149902,grad_norm: 0.7541454953217004, iteration: 439306
loss: 1.0444626808166504,grad_norm: 0.7769312518557777, iteration: 439307
loss: 0.9956249594688416,grad_norm: 0.9999990765881279, iteration: 439308
loss: 1.1425329446792603,grad_norm: 0.9999995839498574, iteration: 439309
loss: 1.006555438041687,grad_norm: 0.7961129096408385, iteration: 439310
loss: 0.9625489711761475,grad_norm: 0.8851220636566894, iteration: 439311
loss: 0.9889630675315857,grad_norm: 0.6981120410624411, iteration: 439312
loss: 1.0036582946777344,grad_norm: 0.679856323915266, iteration: 439313
loss: 1.015760898590088,grad_norm: 0.8267103103699308, iteration: 439314
loss: 1.00405752658844,grad_norm: 0.7893126388921081, iteration: 439315
loss: 0.9808899164199829,grad_norm: 0.7001942793071924, iteration: 439316
loss: 0.9814207553863525,grad_norm: 0.7482154986631664, iteration: 439317
loss: 1.0435341596603394,grad_norm: 0.8785220098144758, iteration: 439318
loss: 1.0061452388763428,grad_norm: 0.7911730838661735, iteration: 439319
loss: 0.9951924085617065,grad_norm: 0.6148001070606367, iteration: 439320
loss: 1.0139740705490112,grad_norm: 0.9962351819747682, iteration: 439321
loss: 1.0063947439193726,grad_norm: 0.7393105668012964, iteration: 439322
loss: 1.0015963315963745,grad_norm: 0.6482053166714838, iteration: 439323
loss: 1.0805163383483887,grad_norm: 0.8438820879929226, iteration: 439324
loss: 1.0115348100662231,grad_norm: 0.7164914172727441, iteration: 439325
loss: 1.000905990600586,grad_norm: 0.8140848593962067, iteration: 439326
loss: 0.9942148327827454,grad_norm: 0.857527266120019, iteration: 439327
loss: 0.977935254573822,grad_norm: 0.7251124571495832, iteration: 439328
loss: 1.0257090330123901,grad_norm: 0.8638539664078568, iteration: 439329
loss: 0.9994470477104187,grad_norm: 0.8678771132550284, iteration: 439330
loss: 0.9836737513542175,grad_norm: 0.7374156342267214, iteration: 439331
loss: 1.205649733543396,grad_norm: 0.9999992117966329, iteration: 439332
loss: 1.0304542779922485,grad_norm: 0.7223896210402915, iteration: 439333
loss: 0.9778972268104553,grad_norm: 0.7145197591295439, iteration: 439334
loss: 1.0014622211456299,grad_norm: 0.7700521135819544, iteration: 439335
loss: 1.0530691146850586,grad_norm: 0.7021257123176284, iteration: 439336
loss: 0.9937981367111206,grad_norm: 0.7566132955831506, iteration: 439337
loss: 1.105394959449768,grad_norm: 0.9999998382410182, iteration: 439338
loss: 0.9684083461761475,grad_norm: 0.827019779628332, iteration: 439339
loss: 1.0125728845596313,grad_norm: 0.7378389618336286, iteration: 439340
loss: 0.9971702098846436,grad_norm: 0.7664786866396287, iteration: 439341
loss: 0.9667237997055054,grad_norm: 0.7003951413596224, iteration: 439342
loss: 1.0035598278045654,grad_norm: 0.6734426038231195, iteration: 439343
loss: 1.004525065422058,grad_norm: 0.7965723961397013, iteration: 439344
loss: 1.1160675287246704,grad_norm: 0.9999992857964982, iteration: 439345
loss: 0.957187294960022,grad_norm: 0.7968417793229363, iteration: 439346
loss: 0.9946039319038391,grad_norm: 0.8057301608190199, iteration: 439347
loss: 0.9518938660621643,grad_norm: 0.7682860026401617, iteration: 439348
loss: 0.9624462723731995,grad_norm: 0.7929754545399331, iteration: 439349
loss: 1.0017536878585815,grad_norm: 0.7395199405155951, iteration: 439350
loss: 0.9898988008499146,grad_norm: 0.8971437835551983, iteration: 439351
loss: 0.9958749413490295,grad_norm: 0.7599236447109268, iteration: 439352
loss: 1.019304633140564,grad_norm: 0.5445824186418511, iteration: 439353
loss: 1.0132139921188354,grad_norm: 0.8239050239253192, iteration: 439354
loss: 1.0052828788757324,grad_norm: 0.8723528761366292, iteration: 439355
loss: 1.0104639530181885,grad_norm: 0.7505954966297879, iteration: 439356
loss: 0.9864827990531921,grad_norm: 0.8647387544659008, iteration: 439357
loss: 1.023198127746582,grad_norm: 0.7320829400702825, iteration: 439358
loss: 1.024156093597412,grad_norm: 0.9193494831062878, iteration: 439359
loss: 0.9973092675209045,grad_norm: 0.7198435343891072, iteration: 439360
loss: 1.0357379913330078,grad_norm: 0.8897562786143949, iteration: 439361
loss: 0.9744107127189636,grad_norm: 0.9133620442127005, iteration: 439362
loss: 1.0010473728179932,grad_norm: 0.7703863085247153, iteration: 439363
loss: 1.024150013923645,grad_norm: 0.958951699285175, iteration: 439364
loss: 1.0400333404541016,grad_norm: 0.7279695889837898, iteration: 439365
loss: 1.0059175491333008,grad_norm: 0.9243251526473969, iteration: 439366
loss: 1.0122096538543701,grad_norm: 0.7988511972752148, iteration: 439367
loss: 1.0052554607391357,grad_norm: 0.7507845519180826, iteration: 439368
loss: 0.9851733446121216,grad_norm: 0.8095669534110075, iteration: 439369
loss: 1.0288695096969604,grad_norm: 0.6622503560704694, iteration: 439370
loss: 0.9650170803070068,grad_norm: 0.9344163892167263, iteration: 439371
loss: 1.0346258878707886,grad_norm: 0.7087490726311144, iteration: 439372
loss: 1.0903265476226807,grad_norm: 0.7475921650509915, iteration: 439373
loss: 0.9647990465164185,grad_norm: 0.84499315289964, iteration: 439374
loss: 1.0195010900497437,grad_norm: 0.7229291277137797, iteration: 439375
loss: 1.0010788440704346,grad_norm: 0.6757831124450738, iteration: 439376
loss: 0.9939104914665222,grad_norm: 0.9999996117927261, iteration: 439377
loss: 0.9815599322319031,grad_norm: 0.8049553029495047, iteration: 439378
loss: 1.032826542854309,grad_norm: 0.7650863532670275, iteration: 439379
loss: 0.9763660430908203,grad_norm: 0.6948004810931824, iteration: 439380
loss: 1.019753336906433,grad_norm: 0.8510978980928825, iteration: 439381
loss: 1.0117554664611816,grad_norm: 0.7997109136201322, iteration: 439382
loss: 0.9818311333656311,grad_norm: 0.9999991230801644, iteration: 439383
loss: 1.0069609880447388,grad_norm: 0.7041342710150775, iteration: 439384
loss: 0.977580726146698,grad_norm: 0.9987829722546339, iteration: 439385
loss: 0.9879102110862732,grad_norm: 0.7382790692385153, iteration: 439386
loss: 0.9702714085578918,grad_norm: 0.6788106732470421, iteration: 439387
loss: 0.9754533767700195,grad_norm: 0.7393947019070549, iteration: 439388
loss: 1.0077277421951294,grad_norm: 0.7922093175552151, iteration: 439389
loss: 1.012906789779663,grad_norm: 0.6817689534576353, iteration: 439390
loss: 0.9584968686103821,grad_norm: 0.9093558732257807, iteration: 439391
loss: 1.0626826286315918,grad_norm: 0.999999374963635, iteration: 439392
loss: 0.9961178302764893,grad_norm: 0.7982589951653993, iteration: 439393
loss: 1.011204481124878,grad_norm: 0.851842389465694, iteration: 439394
loss: 1.033418893814087,grad_norm: 0.9999991778563099, iteration: 439395
loss: 1.0046696662902832,grad_norm: 0.8426584214736972, iteration: 439396
loss: 0.9872915148735046,grad_norm: 0.8246874416476112, iteration: 439397
loss: 0.9982309341430664,grad_norm: 0.8036425017102617, iteration: 439398
loss: 0.9762978553771973,grad_norm: 0.9999993221805662, iteration: 439399
loss: 1.0032742023468018,grad_norm: 0.7068816605248786, iteration: 439400
loss: 0.9892670512199402,grad_norm: 0.7778282337482005, iteration: 439401
loss: 1.0013936758041382,grad_norm: 0.7005577875680031, iteration: 439402
loss: 1.0182483196258545,grad_norm: 0.6875454552427075, iteration: 439403
loss: 1.0250548124313354,grad_norm: 0.7159264175564066, iteration: 439404
loss: 0.9553771018981934,grad_norm: 0.7636459277979527, iteration: 439405
loss: 0.9934999942779541,grad_norm: 0.7301337892150858, iteration: 439406
loss: 0.9902001023292542,grad_norm: 0.670447568368623, iteration: 439407
loss: 0.9761052131652832,grad_norm: 0.7122185589796487, iteration: 439408
loss: 1.0085170269012451,grad_norm: 0.7386837959351621, iteration: 439409
loss: 1.0001649856567383,grad_norm: 0.7661205671882046, iteration: 439410
loss: 1.0095371007919312,grad_norm: 0.8924954020555027, iteration: 439411
loss: 0.9961698055267334,grad_norm: 0.844660699176301, iteration: 439412
loss: 1.0227149724960327,grad_norm: 0.737831286465659, iteration: 439413
loss: 1.028976559638977,grad_norm: 0.7911916222378749, iteration: 439414
loss: 0.9846577048301697,grad_norm: 0.7911796478463462, iteration: 439415
loss: 0.986014187335968,grad_norm: 0.8594535144811641, iteration: 439416
loss: 1.0308802127838135,grad_norm: 0.6421109600519868, iteration: 439417
loss: 0.9926457405090332,grad_norm: 0.8907450189487213, iteration: 439418
loss: 0.995177149772644,grad_norm: 0.8068073707801461, iteration: 439419
loss: 0.9745112061500549,grad_norm: 0.6636141032713746, iteration: 439420
loss: 1.014402985572815,grad_norm: 0.9999990448732051, iteration: 439421
loss: 1.0087424516677856,grad_norm: 0.8149840310006096, iteration: 439422
loss: 1.0340678691864014,grad_norm: 0.7503902289937526, iteration: 439423
loss: 0.9606306552886963,grad_norm: 0.9542609213017329, iteration: 439424
loss: 0.9747217297554016,grad_norm: 0.6954584849467529, iteration: 439425
loss: 1.0169830322265625,grad_norm: 0.7131053784712651, iteration: 439426
loss: 0.9976142048835754,grad_norm: 0.9999990762950297, iteration: 439427
loss: 1.0040614604949951,grad_norm: 0.7055027289475047, iteration: 439428
loss: 0.9982250332832336,grad_norm: 0.6819853696873188, iteration: 439429
loss: 1.0218937397003174,grad_norm: 0.9889608511605065, iteration: 439430
loss: 1.0464943647384644,grad_norm: 0.8234485899580322, iteration: 439431
loss: 0.9770569801330566,grad_norm: 0.7459905909672234, iteration: 439432
loss: 1.0124802589416504,grad_norm: 0.7717592662185454, iteration: 439433
loss: 1.0026276111602783,grad_norm: 0.7399529390916114, iteration: 439434
loss: 1.013896107673645,grad_norm: 0.827780744721129, iteration: 439435
loss: 0.9647084474563599,grad_norm: 0.809774955223094, iteration: 439436
loss: 1.0189208984375,grad_norm: 0.6912646355577247, iteration: 439437
loss: 0.9797972440719604,grad_norm: 0.8761488845695676, iteration: 439438
loss: 0.9610247015953064,grad_norm: 0.6793153859360587, iteration: 439439
loss: 1.015461802482605,grad_norm: 0.7999241274244592, iteration: 439440
loss: 1.0133891105651855,grad_norm: 0.8372200653541467, iteration: 439441
loss: 1.0372523069381714,grad_norm: 0.7875970685005296, iteration: 439442
loss: 1.0043892860412598,grad_norm: 0.6368691017408974, iteration: 439443
loss: 0.9790158867835999,grad_norm: 0.7661830368305977, iteration: 439444
loss: 1.0725127458572388,grad_norm: 0.9962112054420724, iteration: 439445
loss: 0.9907203912734985,grad_norm: 0.7557765026790728, iteration: 439446
loss: 0.974470853805542,grad_norm: 0.8199412751106688, iteration: 439447
loss: 1.0107042789459229,grad_norm: 0.725011728504825, iteration: 439448
loss: 0.9868180155754089,grad_norm: 0.775189951424958, iteration: 439449
loss: 0.9840573072433472,grad_norm: 0.7475320320419891, iteration: 439450
loss: 1.033757209777832,grad_norm: 0.6730654912933371, iteration: 439451
loss: 0.9906755685806274,grad_norm: 0.820662759524223, iteration: 439452
loss: 0.9977718591690063,grad_norm: 0.9999992866061498, iteration: 439453
loss: 0.9481738209724426,grad_norm: 0.7510594385190206, iteration: 439454
loss: 0.9954747557640076,grad_norm: 0.8434578066471449, iteration: 439455
loss: 0.9827551245689392,grad_norm: 0.6652846509156035, iteration: 439456
loss: 1.0057004690170288,grad_norm: 0.7754078922386888, iteration: 439457
loss: 1.0076881647109985,grad_norm: 0.7388516750974511, iteration: 439458
loss: 1.028053641319275,grad_norm: 0.8300441097627286, iteration: 439459
loss: 1.0010228157043457,grad_norm: 0.6582987254605855, iteration: 439460
loss: 0.9734310507774353,grad_norm: 0.8219000110515972, iteration: 439461
loss: 1.0062079429626465,grad_norm: 0.7306235794299463, iteration: 439462
loss: 0.9757822155952454,grad_norm: 0.7267141870318804, iteration: 439463
loss: 0.980245053768158,grad_norm: 0.861130897002745, iteration: 439464
loss: 0.9867962002754211,grad_norm: 0.8206637399001631, iteration: 439465
loss: 0.9523659944534302,grad_norm: 0.680307241663888, iteration: 439466
loss: 0.9856963753700256,grad_norm: 0.7057863079915374, iteration: 439467
loss: 0.9676246643066406,grad_norm: 0.8280737574031869, iteration: 439468
loss: 1.0050239562988281,grad_norm: 0.6388498541332731, iteration: 439469
loss: 1.0050451755523682,grad_norm: 0.7033224134498759, iteration: 439470
loss: 0.9972819685935974,grad_norm: 0.8896768705050639, iteration: 439471
loss: 1.0026745796203613,grad_norm: 0.9154850754064441, iteration: 439472
loss: 1.0182149410247803,grad_norm: 0.8332087587866635, iteration: 439473
loss: 0.9419342279434204,grad_norm: 0.6818515721949062, iteration: 439474
loss: 1.0109572410583496,grad_norm: 0.7169274939770398, iteration: 439475
loss: 1.024837613105774,grad_norm: 0.7669932690268619, iteration: 439476
loss: 1.0191556215286255,grad_norm: 0.7549759744995643, iteration: 439477
loss: 1.0034836530685425,grad_norm: 0.8128110381771565, iteration: 439478
loss: 1.0098326206207275,grad_norm: 0.6764397924048094, iteration: 439479
loss: 0.9761870503425598,grad_norm: 0.8039434204491456, iteration: 439480
loss: 1.0331228971481323,grad_norm: 0.9638654469582177, iteration: 439481
loss: 1.0219221115112305,grad_norm: 0.6505449544115364, iteration: 439482
loss: 0.9966838955879211,grad_norm: 0.6989180803511286, iteration: 439483
loss: 1.0307250022888184,grad_norm: 0.867030598050633, iteration: 439484
loss: 1.0322532653808594,grad_norm: 0.8762316565975917, iteration: 439485
loss: 1.0263999700546265,grad_norm: 0.9999996850650928, iteration: 439486
loss: 0.9825053215026855,grad_norm: 0.7130506829789894, iteration: 439487
loss: 1.0273611545562744,grad_norm: 0.8045652685174773, iteration: 439488
loss: 0.9962379336357117,grad_norm: 0.7795341683396283, iteration: 439489
loss: 1.0329928398132324,grad_norm: 0.9999991348622758, iteration: 439490
loss: 0.9834081530570984,grad_norm: 0.9939967176746356, iteration: 439491
loss: 0.9874602556228638,grad_norm: 0.9999990194241493, iteration: 439492
loss: 1.0299543142318726,grad_norm: 0.914611767306257, iteration: 439493
loss: 1.0201804637908936,grad_norm: 0.7107178619391025, iteration: 439494
loss: 1.0045912265777588,grad_norm: 0.7867140516774627, iteration: 439495
loss: 1.0412697792053223,grad_norm: 0.999999046878234, iteration: 439496
loss: 0.9654106497764587,grad_norm: 0.7535354429858163, iteration: 439497
loss: 1.007459044456482,grad_norm: 0.8551536017662241, iteration: 439498
loss: 1.0275007486343384,grad_norm: 0.9735160724087014, iteration: 439499
loss: 1.0484768152236938,grad_norm: 0.9428637457322752, iteration: 439500
loss: 1.0184916257858276,grad_norm: 0.7656151031179561, iteration: 439501
loss: 0.9717618227005005,grad_norm: 0.8418885391860113, iteration: 439502
loss: 0.9791513085365295,grad_norm: 0.7473245954113987, iteration: 439503
loss: 1.0102341175079346,grad_norm: 0.6773194262743617, iteration: 439504
loss: 1.004281759262085,grad_norm: 0.8974785525094163, iteration: 439505
loss: 1.1921544075012207,grad_norm: 0.9919427363250248, iteration: 439506
loss: 1.0081719160079956,grad_norm: 0.7651580078764496, iteration: 439507
loss: 1.016054391860962,grad_norm: 0.8285569294375708, iteration: 439508
loss: 1.0257536172866821,grad_norm: 0.8507395850844622, iteration: 439509
loss: 1.0083024501800537,grad_norm: 0.8743687612490945, iteration: 439510
loss: 0.9714778065681458,grad_norm: 0.8605672424022911, iteration: 439511
loss: 0.9998599886894226,grad_norm: 0.7774488012959452, iteration: 439512
loss: 0.9981471300125122,grad_norm: 0.8263947514168432, iteration: 439513
loss: 1.0202733278274536,grad_norm: 0.7224139704205262, iteration: 439514
loss: 0.9755001068115234,grad_norm: 0.867949994985004, iteration: 439515
loss: 1.0068830251693726,grad_norm: 0.9999992732887366, iteration: 439516
loss: 0.9887388944625854,grad_norm: 0.7068148940954438, iteration: 439517
loss: 0.9933305382728577,grad_norm: 0.9999991224993328, iteration: 439518
loss: 1.016547679901123,grad_norm: 0.6907226460643978, iteration: 439519
loss: 1.0325021743774414,grad_norm: 0.8302840877663601, iteration: 439520
loss: 1.016883373260498,grad_norm: 0.6860816781655854, iteration: 439521
loss: 1.0205339193344116,grad_norm: 0.8325678843393753, iteration: 439522
loss: 0.9927918910980225,grad_norm: 0.728519391818506, iteration: 439523
loss: 1.062151551246643,grad_norm: 0.999998989831686, iteration: 439524
loss: 1.0208837985992432,grad_norm: 0.7224939825774686, iteration: 439525
loss: 1.0121850967407227,grad_norm: 0.801507151514604, iteration: 439526
loss: 1.0134505033493042,grad_norm: 0.881822858547974, iteration: 439527
loss: 0.9890351295471191,grad_norm: 0.738173059056014, iteration: 439528
loss: 0.9669085144996643,grad_norm: 0.7267113571520212, iteration: 439529
loss: 1.04502534866333,grad_norm: 0.9999998677383901, iteration: 439530
loss: 1.002626657485962,grad_norm: 0.9999991597040172, iteration: 439531
loss: 1.0172927379608154,grad_norm: 0.7590270638737716, iteration: 439532
loss: 0.9746673107147217,grad_norm: 0.6877685317350356, iteration: 439533
loss: 0.9895962476730347,grad_norm: 0.9128834605376358, iteration: 439534
loss: 1.0094666481018066,grad_norm: 0.8297234818963676, iteration: 439535
loss: 0.9705356359481812,grad_norm: 0.8390355736521848, iteration: 439536
loss: 1.0267878770828247,grad_norm: 0.7424894545698052, iteration: 439537
loss: 1.0067130327224731,grad_norm: 0.8975850926984589, iteration: 439538
loss: 1.0178438425064087,grad_norm: 0.7761345604485929, iteration: 439539
loss: 1.0066165924072266,grad_norm: 0.8880853231352913, iteration: 439540
loss: 0.9727320671081543,grad_norm: 0.9133414903893847, iteration: 439541
loss: 1.0132702589035034,grad_norm: 0.7236087738274766, iteration: 439542
loss: 1.028190016746521,grad_norm: 0.7788953010550269, iteration: 439543
loss: 1.0125935077667236,grad_norm: 0.6915988374001327, iteration: 439544
loss: 1.0220870971679688,grad_norm: 0.749724406228881, iteration: 439545
loss: 1.0145989656448364,grad_norm: 0.8286377090970247, iteration: 439546
loss: 1.0001380443572998,grad_norm: 0.7751088476370069, iteration: 439547
loss: 0.977183997631073,grad_norm: 0.7041435501331113, iteration: 439548
loss: 0.9912731051445007,grad_norm: 0.7255379274941424, iteration: 439549
loss: 0.980955958366394,grad_norm: 0.7606909311071007, iteration: 439550
loss: 0.9844050407409668,grad_norm: 0.7563810829413667, iteration: 439551
loss: 0.989388108253479,grad_norm: 0.668957859830248, iteration: 439552
loss: 0.9755260348320007,grad_norm: 0.7527099089793228, iteration: 439553
loss: 1.0657058954238892,grad_norm: 0.9999991415987749, iteration: 439554
loss: 1.0225335359573364,grad_norm: 0.7555078036002522, iteration: 439555
loss: 1.056544303894043,grad_norm: 0.9999997529110328, iteration: 439556
loss: 0.9977622032165527,grad_norm: 0.7609383611858707, iteration: 439557
loss: 0.9641216397285461,grad_norm: 0.793495070775228, iteration: 439558
loss: 1.0153558254241943,grad_norm: 0.9999990369703915, iteration: 439559
loss: 0.9979273676872253,grad_norm: 0.6645328530579732, iteration: 439560
loss: 0.9635004997253418,grad_norm: 0.8499988862209958, iteration: 439561
loss: 0.993721067905426,grad_norm: 0.891760305828049, iteration: 439562
loss: 0.9924604892730713,grad_norm: 0.7846258737947069, iteration: 439563
loss: 0.9906150698661804,grad_norm: 0.7346946672967003, iteration: 439564
loss: 0.9908918738365173,grad_norm: 0.7087466886106286, iteration: 439565
loss: 1.0044400691986084,grad_norm: 0.7556381491064145, iteration: 439566
loss: 1.0177057981491089,grad_norm: 0.7922420598718694, iteration: 439567
loss: 0.9731150269508362,grad_norm: 0.6384325697443138, iteration: 439568
loss: 1.0359020233154297,grad_norm: 0.9474529967559225, iteration: 439569
loss: 0.9997658133506775,grad_norm: 0.8594478796993824, iteration: 439570
loss: 1.0339887142181396,grad_norm: 0.8055286238782993, iteration: 439571
loss: 0.9788286685943604,grad_norm: 0.7263669470649557, iteration: 439572
loss: 0.9735478162765503,grad_norm: 0.8081372603783349, iteration: 439573
loss: 0.9823505878448486,grad_norm: 0.6948310276474171, iteration: 439574
loss: 1.0348727703094482,grad_norm: 0.7964046770919374, iteration: 439575
loss: 1.0182512998580933,grad_norm: 0.7014459794081968, iteration: 439576
loss: 0.9801315069198608,grad_norm: 0.7297634536558678, iteration: 439577
loss: 1.0069997310638428,grad_norm: 0.6264561070180816, iteration: 439578
loss: 1.0286704301834106,grad_norm: 0.6705810231123988, iteration: 439579
loss: 1.0312997102737427,grad_norm: 0.6928565102338217, iteration: 439580
loss: 0.9810884594917297,grad_norm: 0.6573203008568631, iteration: 439581
loss: 1.020310878753662,grad_norm: 0.7035269790653144, iteration: 439582
loss: 1.00039541721344,grad_norm: 0.9999992499086588, iteration: 439583
loss: 1.0444692373275757,grad_norm: 0.8274268718845362, iteration: 439584
loss: 1.0066442489624023,grad_norm: 0.7107629635502855, iteration: 439585
loss: 0.9893635511398315,grad_norm: 0.6972945641519357, iteration: 439586
loss: 0.9904730916023254,grad_norm: 0.7344414978560715, iteration: 439587
loss: 0.9980930089950562,grad_norm: 0.7781869552284905, iteration: 439588
loss: 0.9948880672454834,grad_norm: 0.7471688603613282, iteration: 439589
loss: 1.037788987159729,grad_norm: 0.7490827433580466, iteration: 439590
loss: 0.9748450517654419,grad_norm: 0.7750877016173501, iteration: 439591
loss: 1.0337281227111816,grad_norm: 0.86014965416392, iteration: 439592
loss: 0.9493855237960815,grad_norm: 0.7905822090578906, iteration: 439593
loss: 0.9955993890762329,grad_norm: 0.7168462306965805, iteration: 439594
loss: 0.9706387519836426,grad_norm: 0.8035098935629686, iteration: 439595
loss: 1.0452179908752441,grad_norm: 0.7694827527618567, iteration: 439596
loss: 0.9884782433509827,grad_norm: 0.6865337378460213, iteration: 439597
loss: 0.9811861515045166,grad_norm: 0.9999990055577628, iteration: 439598
loss: 0.952086329460144,grad_norm: 0.751581729231748, iteration: 439599
loss: 1.0066859722137451,grad_norm: 0.7921813078395975, iteration: 439600
loss: 1.0087765455245972,grad_norm: 0.7970940312085638, iteration: 439601
loss: 0.9840316772460938,grad_norm: 0.7994063383225593, iteration: 439602
loss: 1.158971905708313,grad_norm: 0.9999997987165955, iteration: 439603
loss: 1.115777850151062,grad_norm: 0.9999996278907721, iteration: 439604
loss: 1.042391300201416,grad_norm: 0.8301864521569424, iteration: 439605
loss: 1.0256339311599731,grad_norm: 0.8850805598655549, iteration: 439606
loss: 1.0558103322982788,grad_norm: 0.9999992252103433, iteration: 439607
loss: 0.9570445418357849,grad_norm: 0.7007654399237025, iteration: 439608
loss: 1.0286246538162231,grad_norm: 0.7200280884119631, iteration: 439609
loss: 1.0285640954971313,grad_norm: 0.8359410506604755, iteration: 439610
loss: 0.9787462949752808,grad_norm: 0.7762084961484146, iteration: 439611
loss: 1.0076309442520142,grad_norm: 0.8066508377572581, iteration: 439612
loss: 1.0831626653671265,grad_norm: 0.8897263056455017, iteration: 439613
loss: 0.989281177520752,grad_norm: 0.8059825556129664, iteration: 439614
loss: 1.003091812133789,grad_norm: 0.6977379074825766, iteration: 439615
loss: 1.0241622924804688,grad_norm: 0.9999997743712845, iteration: 439616
loss: 0.9913309216499329,grad_norm: 0.778509459858058, iteration: 439617
loss: 1.0025670528411865,grad_norm: 0.7417055062816477, iteration: 439618
loss: 1.0214972496032715,grad_norm: 0.6847620965783577, iteration: 439619
loss: 1.064879298210144,grad_norm: 0.9999991006217478, iteration: 439620
loss: 1.0159664154052734,grad_norm: 0.9999996340999865, iteration: 439621
loss: 0.9621753692626953,grad_norm: 0.7713079455266901, iteration: 439622
loss: 1.0124046802520752,grad_norm: 0.800269450090984, iteration: 439623
loss: 0.9908527135848999,grad_norm: 0.7145220532184462, iteration: 439624
loss: 0.979412853717804,grad_norm: 0.7265780160772423, iteration: 439625
loss: 0.9875655174255371,grad_norm: 0.7214491554479616, iteration: 439626
loss: 1.0151325464248657,grad_norm: 0.7807526670258139, iteration: 439627
loss: 0.9828758835792542,grad_norm: 0.7913999869088392, iteration: 439628
loss: 1.0607203245162964,grad_norm: 0.9999994860879533, iteration: 439629
loss: 0.9865854382514954,grad_norm: 0.8835170161692288, iteration: 439630
loss: 0.9921991229057312,grad_norm: 0.7186175439373474, iteration: 439631
loss: 1.0076215267181396,grad_norm: 0.7333348609077096, iteration: 439632
loss: 1.0096099376678467,grad_norm: 0.7466030748880655, iteration: 439633
loss: 1.055424690246582,grad_norm: 0.7646127486882467, iteration: 439634
loss: 1.0220035314559937,grad_norm: 0.803903015213287, iteration: 439635
loss: 1.0155911445617676,grad_norm: 0.7211587682993069, iteration: 439636
loss: 1.1344574689865112,grad_norm: 0.9374200777514261, iteration: 439637
loss: 1.0762180089950562,grad_norm: 0.7818570266117982, iteration: 439638
loss: 1.0215439796447754,grad_norm: 0.9999999350106538, iteration: 439639
loss: 1.0027384757995605,grad_norm: 0.7735359181992174, iteration: 439640
loss: 0.9456207156181335,grad_norm: 0.912847300689575, iteration: 439641
loss: 1.1000971794128418,grad_norm: 0.6764482799727433, iteration: 439642
loss: 1.038499116897583,grad_norm: 0.7114524078955872, iteration: 439643
loss: 1.018714189529419,grad_norm: 0.8866383079117529, iteration: 439644
loss: 0.9649882912635803,grad_norm: 0.6465794109164285, iteration: 439645
loss: 0.9762831330299377,grad_norm: 0.7085196007216, iteration: 439646
loss: 0.9905295372009277,grad_norm: 0.7073810979616425, iteration: 439647
loss: 1.0097886323928833,grad_norm: 0.9999991741749915, iteration: 439648
loss: 1.0520495176315308,grad_norm: 0.7132301510822326, iteration: 439649
loss: 1.008857250213623,grad_norm: 0.915643474000287, iteration: 439650
loss: 0.9952026605606079,grad_norm: 0.6406958894331493, iteration: 439651
loss: 1.016422986984253,grad_norm: 0.7925988000850915, iteration: 439652
loss: 0.9991123080253601,grad_norm: 0.7637295594975381, iteration: 439653
loss: 0.9852057099342346,grad_norm: 0.7215102099746449, iteration: 439654
loss: 1.07506263256073,grad_norm: 0.7932254328491006, iteration: 439655
loss: 0.9911577105522156,grad_norm: 0.732336409599309, iteration: 439656
loss: 1.013032078742981,grad_norm: 0.8003872011706354, iteration: 439657
loss: 0.9936889410018921,grad_norm: 0.8050401877018422, iteration: 439658
loss: 1.0129212141036987,grad_norm: 0.8880106526439622, iteration: 439659
loss: 1.0313472747802734,grad_norm: 0.74563383767764, iteration: 439660
loss: 1.023677945137024,grad_norm: 0.7841837150759993, iteration: 439661
loss: 1.008239507675171,grad_norm: 0.7778597125537423, iteration: 439662
loss: 0.9478791356086731,grad_norm: 0.6650111027539606, iteration: 439663
loss: 1.0255134105682373,grad_norm: 0.9163746296865504, iteration: 439664
loss: 0.9488826990127563,grad_norm: 0.9096408040938758, iteration: 439665
loss: 1.0004901885986328,grad_norm: 0.8522359711065665, iteration: 439666
loss: 1.0725313425064087,grad_norm: 0.8640074348598427, iteration: 439667
loss: 0.9907850027084351,grad_norm: 0.8309133910423416, iteration: 439668
loss: 1.0403937101364136,grad_norm: 0.9999998562322675, iteration: 439669
loss: 1.0152286291122437,grad_norm: 0.858093548649967, iteration: 439670
loss: 1.020394206047058,grad_norm: 0.9529355619540066, iteration: 439671
loss: 0.996482253074646,grad_norm: 0.791376856439334, iteration: 439672
loss: 1.0148439407348633,grad_norm: 0.9916066006277671, iteration: 439673
loss: 0.9938255548477173,grad_norm: 0.9999999950462991, iteration: 439674
loss: 1.023075819015503,grad_norm: 0.7926216454698906, iteration: 439675
loss: 1.024540662765503,grad_norm: 0.6947506437764607, iteration: 439676
loss: 0.9839008450508118,grad_norm: 0.9068547504537164, iteration: 439677
loss: 0.9743207097053528,grad_norm: 0.7630790610377186, iteration: 439678
loss: 0.9754250645637512,grad_norm: 0.7573989020432675, iteration: 439679
loss: 1.0122630596160889,grad_norm: 0.7778778835932746, iteration: 439680
loss: 1.0068061351776123,grad_norm: 0.6775713739134449, iteration: 439681
loss: 1.0544337034225464,grad_norm: 0.8251838747816489, iteration: 439682
loss: 1.0128427743911743,grad_norm: 0.8891539498135329, iteration: 439683
loss: 0.9757649898529053,grad_norm: 0.9257623919812268, iteration: 439684
loss: 1.0084439516067505,grad_norm: 0.7101281514820093, iteration: 439685
loss: 1.0464946031570435,grad_norm: 0.9288019068772921, iteration: 439686
loss: 1.0211347341537476,grad_norm: 0.7537498384203277, iteration: 439687
loss: 0.9908241033554077,grad_norm: 0.7272830794293493, iteration: 439688
loss: 1.0519742965698242,grad_norm: 0.9999999288177019, iteration: 439689
loss: 1.0075722932815552,grad_norm: 0.7826182887062154, iteration: 439690
loss: 1.0081281661987305,grad_norm: 0.8170778789967876, iteration: 439691
loss: 1.0078186988830566,grad_norm: 0.7766334731814615, iteration: 439692
loss: 1.00812828540802,grad_norm: 0.9781058583544568, iteration: 439693
loss: 0.9728097915649414,grad_norm: 0.8315445433934299, iteration: 439694
loss: 0.9614020586013794,grad_norm: 0.713886499163316, iteration: 439695
loss: 1.0702118873596191,grad_norm: 0.8325915094171706, iteration: 439696
loss: 1.0478123426437378,grad_norm: 0.9651703893858657, iteration: 439697
loss: 0.9756698608398438,grad_norm: 0.8537041036590455, iteration: 439698
loss: 1.0037957429885864,grad_norm: 0.8137712505587954, iteration: 439699
loss: 1.0690995454788208,grad_norm: 0.6930036348484625, iteration: 439700
loss: 1.0349702835083008,grad_norm: 0.89344802437323, iteration: 439701
loss: 0.9424468278884888,grad_norm: 0.7321682947793178, iteration: 439702
loss: 1.1420141458511353,grad_norm: 0.9669630115111975, iteration: 439703
loss: 1.0071901082992554,grad_norm: 0.9999992332913423, iteration: 439704
loss: 1.006762146949768,grad_norm: 0.9999993800931899, iteration: 439705
loss: 0.9954822659492493,grad_norm: 0.7700449628358266, iteration: 439706
loss: 1.0099323987960815,grad_norm: 0.9999994457154909, iteration: 439707
loss: 1.00011146068573,grad_norm: 0.9999996370054882, iteration: 439708
loss: 1.0625879764556885,grad_norm: 0.8561505306790584, iteration: 439709
loss: 1.0309211015701294,grad_norm: 0.9999992141783844, iteration: 439710
loss: 0.9982426762580872,grad_norm: 0.9691611129887759, iteration: 439711
loss: 0.9934775233268738,grad_norm: 0.8510852790190147, iteration: 439712
loss: 1.0624386072158813,grad_norm: 0.8169127294782333, iteration: 439713
loss: 0.9858614206314087,grad_norm: 0.6758094569925954, iteration: 439714
loss: 0.9494900107383728,grad_norm: 0.8066595695112684, iteration: 439715
loss: 1.049967885017395,grad_norm: 0.9999990489346249, iteration: 439716
loss: 1.0009104013442993,grad_norm: 0.823966037452957, iteration: 439717
loss: 1.017136573791504,grad_norm: 0.9790696055526982, iteration: 439718
loss: 1.0092356204986572,grad_norm: 0.9999994019419032, iteration: 439719
loss: 0.9975168704986572,grad_norm: 0.8134206983941134, iteration: 439720
loss: 0.9873601198196411,grad_norm: 0.9999989797450014, iteration: 439721
loss: 1.015010118484497,grad_norm: 0.6692213219678246, iteration: 439722
loss: 1.0298744440078735,grad_norm: 0.8579135097219249, iteration: 439723
loss: 0.9993102550506592,grad_norm: 0.7247000490587695, iteration: 439724
loss: 0.9812001585960388,grad_norm: 0.921105811253243, iteration: 439725
loss: 1.000406265258789,grad_norm: 0.8094907230668847, iteration: 439726
loss: 1.0131946802139282,grad_norm: 0.9999991484079864, iteration: 439727
loss: 0.9751637578010559,grad_norm: 0.9999991482442946, iteration: 439728
loss: 1.0092992782592773,grad_norm: 0.8319509118969041, iteration: 439729
loss: 1.0070966482162476,grad_norm: 0.8902871790014479, iteration: 439730
loss: 1.0017017126083374,grad_norm: 0.7183197569174529, iteration: 439731
loss: 1.001611351966858,grad_norm: 0.7699364266604553, iteration: 439732
loss: 0.9911147356033325,grad_norm: 0.6558070751644558, iteration: 439733
loss: 1.005959153175354,grad_norm: 0.8379867244258935, iteration: 439734
loss: 0.9999685883522034,grad_norm: 0.7713775958999411, iteration: 439735
loss: 1.090009331703186,grad_norm: 0.7657047160437989, iteration: 439736
loss: 0.9584614038467407,grad_norm: 0.7465184398910181, iteration: 439737
loss: 0.979418933391571,grad_norm: 0.8289582128892699, iteration: 439738
loss: 1.047985553741455,grad_norm: 0.9999998682128051, iteration: 439739
loss: 0.9978356957435608,grad_norm: 0.8014576846573161, iteration: 439740
loss: 0.9940036535263062,grad_norm: 1.0000000132339495, iteration: 439741
loss: 1.022727608680725,grad_norm: 0.879410194273772, iteration: 439742
loss: 1.0115244388580322,grad_norm: 0.7590046506102607, iteration: 439743
loss: 1.0491942167282104,grad_norm: 0.8321600532817258, iteration: 439744
loss: 0.9934475421905518,grad_norm: 0.715300644912672, iteration: 439745
loss: 1.015595555305481,grad_norm: 0.8664432356941453, iteration: 439746
loss: 1.0013760328292847,grad_norm: 0.7015409701639412, iteration: 439747
loss: 1.0577439069747925,grad_norm: 0.9999992537197123, iteration: 439748
loss: 0.9825752377510071,grad_norm: 0.8990829481473465, iteration: 439749
loss: 1.0123573541641235,grad_norm: 0.8440180714286734, iteration: 439750
loss: 1.0000370740890503,grad_norm: 0.8071441205500695, iteration: 439751
loss: 1.1089428663253784,grad_norm: 0.9999996137142745, iteration: 439752
loss: 0.9967430233955383,grad_norm: 0.7976760853499231, iteration: 439753
loss: 1.0389351844787598,grad_norm: 0.8133220121217638, iteration: 439754
loss: 1.0412222146987915,grad_norm: 0.9999996955966962, iteration: 439755
loss: 0.9730615019798279,grad_norm: 0.8544872253541245, iteration: 439756
loss: 1.0038813352584839,grad_norm: 0.7874605624443648, iteration: 439757
loss: 0.9796515703201294,grad_norm: 0.607682523424384, iteration: 439758
loss: 0.9686034917831421,grad_norm: 0.7626864756898153, iteration: 439759
loss: 0.9958842992782593,grad_norm: 0.8152912453403955, iteration: 439760
loss: 1.0126197338104248,grad_norm: 0.9218281483465123, iteration: 439761
loss: 1.006934642791748,grad_norm: 0.7030305027165404, iteration: 439762
loss: 1.1926310062408447,grad_norm: 0.9999996835548889, iteration: 439763
loss: 1.0216094255447388,grad_norm: 0.8184010962303085, iteration: 439764
loss: 0.9639602899551392,grad_norm: 0.6944176772424449, iteration: 439765
loss: 0.998929500579834,grad_norm: 0.9104618508327127, iteration: 439766
loss: 0.9875784516334534,grad_norm: 0.8521588038346112, iteration: 439767
loss: 1.0255763530731201,grad_norm: 0.9999998348288849, iteration: 439768
loss: 0.9577359557151794,grad_norm: 0.7696541473392576, iteration: 439769
loss: 1.034731149673462,grad_norm: 0.7194920950710894, iteration: 439770
loss: 1.0138003826141357,grad_norm: 0.9633450509560234, iteration: 439771
loss: 0.984675407409668,grad_norm: 0.7624614372305402, iteration: 439772
loss: 1.0742937326431274,grad_norm: 0.7827890117734141, iteration: 439773
loss: 1.082180380821228,grad_norm: 0.9999995891429497, iteration: 439774
loss: 1.0179542303085327,grad_norm: 0.8867406963349616, iteration: 439775
loss: 0.9644138216972351,grad_norm: 0.6576810113467362, iteration: 439776
loss: 1.0326730012893677,grad_norm: 0.7681803810961395, iteration: 439777
loss: 0.9850417971611023,grad_norm: 0.7789098709065092, iteration: 439778
loss: 1.0058302879333496,grad_norm: 0.6774619913147037, iteration: 439779
loss: 0.9760658144950867,grad_norm: 0.8499697036528008, iteration: 439780
loss: 1.0131150484085083,grad_norm: 0.8320440176470281, iteration: 439781
loss: 1.0181374549865723,grad_norm: 0.8173568945151335, iteration: 439782
loss: 1.0453760623931885,grad_norm: 0.859348034772664, iteration: 439783
loss: 1.0798252820968628,grad_norm: 0.7490879766195061, iteration: 439784
loss: 0.9937111139297485,grad_norm: 0.621153812465735, iteration: 439785
loss: 0.9979035258293152,grad_norm: 0.769922778664433, iteration: 439786
loss: 0.9860284924507141,grad_norm: 0.6354336256662585, iteration: 439787
loss: 1.0074182748794556,grad_norm: 0.9999991650500698, iteration: 439788
loss: 1.0038938522338867,grad_norm: 0.8972495712774158, iteration: 439789
loss: 0.970981776714325,grad_norm: 0.6638707379372767, iteration: 439790
loss: 1.0147879123687744,grad_norm: 0.7615666746240184, iteration: 439791
loss: 1.3939182758331299,grad_norm: 0.9999991388777695, iteration: 439792
loss: 1.1240276098251343,grad_norm: 0.999999962335901, iteration: 439793
loss: 0.9890686869621277,grad_norm: 0.7947955259884091, iteration: 439794
loss: 0.9687772989273071,grad_norm: 0.7811338924197524, iteration: 439795
loss: 1.119156837463379,grad_norm: 1.000000018361379, iteration: 439796
loss: 0.9851936101913452,grad_norm: 0.8563839532123256, iteration: 439797
loss: 0.9736160635948181,grad_norm: 0.6942928587912319, iteration: 439798
loss: 0.9696111083030701,grad_norm: 0.8117251073095743, iteration: 439799
loss: 0.9858954548835754,grad_norm: 0.915706556485566, iteration: 439800
loss: 0.9902137517929077,grad_norm: 0.8331041663983106, iteration: 439801
loss: 0.9898017644882202,grad_norm: 0.7283313155173684, iteration: 439802
loss: 0.9970614910125732,grad_norm: 0.9209713170442319, iteration: 439803
loss: 1.0078611373901367,grad_norm: 0.780037718183587, iteration: 439804
loss: 0.9740669131278992,grad_norm: 0.9491690224797097, iteration: 439805
loss: 1.0596964359283447,grad_norm: 0.9999997743472078, iteration: 439806
loss: 0.9892983436584473,grad_norm: 0.8808794407852375, iteration: 439807
loss: 0.9659026861190796,grad_norm: 0.7516294413856579, iteration: 439808
loss: 1.008957028388977,grad_norm: 0.999999073526044, iteration: 439809
loss: 0.9654289484024048,grad_norm: 0.7453544497362367, iteration: 439810
loss: 0.9540968537330627,grad_norm: 0.7217509100046695, iteration: 439811
loss: 0.9780047535896301,grad_norm: 0.7101241456958264, iteration: 439812
loss: 0.9820390343666077,grad_norm: 0.7360784454367268, iteration: 439813
loss: 0.9701231122016907,grad_norm: 0.754023987941546, iteration: 439814
loss: 1.023978590965271,grad_norm: 0.9999993632919086, iteration: 439815
loss: 1.0265710353851318,grad_norm: 0.7728333817555403, iteration: 439816
loss: 1.048682689666748,grad_norm: 0.9999992887120303, iteration: 439817
loss: 0.9898938536643982,grad_norm: 0.8645391460008018, iteration: 439818
loss: 0.9912976026535034,grad_norm: 0.7064208294677284, iteration: 439819
loss: 0.9966896176338196,grad_norm: 0.8507294619907964, iteration: 439820
loss: 1.0017478466033936,grad_norm: 0.9999997092135025, iteration: 439821
loss: 1.0086724758148193,grad_norm: 0.8356871694633574, iteration: 439822
loss: 1.0415854454040527,grad_norm: 0.7128563988899029, iteration: 439823
loss: 1.0239211320877075,grad_norm: 0.7957344947429367, iteration: 439824
loss: 1.0293961763381958,grad_norm: 0.9999994036177301, iteration: 439825
loss: 1.0597223043441772,grad_norm: 0.8974428006663667, iteration: 439826
loss: 0.9910941123962402,grad_norm: 0.763488436919607, iteration: 439827
loss: 1.0253478288650513,grad_norm: 0.778830820037551, iteration: 439828
loss: 1.0127336978912354,grad_norm: 0.7459592517216084, iteration: 439829
loss: 0.9943640232086182,grad_norm: 0.7695028208344441, iteration: 439830
loss: 0.9845234751701355,grad_norm: 0.9999995277605251, iteration: 439831
loss: 0.9528197050094604,grad_norm: 0.7948694691117683, iteration: 439832
loss: 1.0021076202392578,grad_norm: 0.999999401771827, iteration: 439833
loss: 0.9607105851173401,grad_norm: 0.751341766596805, iteration: 439834
loss: 0.9920862317085266,grad_norm: 0.7319937015341405, iteration: 439835
loss: 0.9885005950927734,grad_norm: 0.8337378134439061, iteration: 439836
loss: 1.0545378923416138,grad_norm: 0.8469202136507376, iteration: 439837
loss: 0.9550685882568359,grad_norm: 0.8236766893676649, iteration: 439838
loss: 1.043637990951538,grad_norm: 0.9712688526778449, iteration: 439839
loss: 0.9860331416130066,grad_norm: 0.9375033424981045, iteration: 439840
loss: 1.0068433284759521,grad_norm: 0.7716097579369144, iteration: 439841
loss: 0.9922427535057068,grad_norm: 0.81206695492611, iteration: 439842
loss: 1.013065218925476,grad_norm: 0.8331349533783801, iteration: 439843
loss: 0.9755658507347107,grad_norm: 0.8888644681031124, iteration: 439844
loss: 1.0813900232315063,grad_norm: 0.8004272818994984, iteration: 439845
loss: 0.9766794443130493,grad_norm: 0.8345038232479708, iteration: 439846
loss: 1.0193556547164917,grad_norm: 0.9999993071769077, iteration: 439847
loss: 1.010550618171692,grad_norm: 0.8753235105771656, iteration: 439848
loss: 0.9743552207946777,grad_norm: 0.7721989703747969, iteration: 439849
loss: 1.0026873350143433,grad_norm: 0.6854707483538528, iteration: 439850
loss: 0.9796651601791382,grad_norm: 0.8109574136471793, iteration: 439851
loss: 1.0178292989730835,grad_norm: 0.8793129929249438, iteration: 439852
loss: 1.0128506422042847,grad_norm: 0.7868419597443346, iteration: 439853
loss: 0.9845849275588989,grad_norm: 0.7490936920655457, iteration: 439854
loss: 0.9557826519012451,grad_norm: 0.8027454295306249, iteration: 439855
loss: 1.0064189434051514,grad_norm: 0.6797578698236769, iteration: 439856
loss: 0.9913640022277832,grad_norm: 0.6964037456043313, iteration: 439857
loss: 0.9971174001693726,grad_norm: 0.796654048592514, iteration: 439858
loss: 0.9914409518241882,grad_norm: 0.8869540215560532, iteration: 439859
loss: 1.2347553968429565,grad_norm: 0.9999997981150582, iteration: 439860
loss: 0.998714804649353,grad_norm: 0.8664860844048536, iteration: 439861
loss: 1.025414228439331,grad_norm: 0.7021870412228124, iteration: 439862
loss: 1.0007085800170898,grad_norm: 0.6706364797116046, iteration: 439863
loss: 0.9984627962112427,grad_norm: 0.9003730679627263, iteration: 439864
loss: 0.988385021686554,grad_norm: 0.7326833190986579, iteration: 439865
loss: 1.0159567594528198,grad_norm: 0.7153361259841707, iteration: 439866
loss: 1.0404465198516846,grad_norm: 0.8322035656443184, iteration: 439867
loss: 0.9838272929191589,grad_norm: 0.6619703194938509, iteration: 439868
loss: 0.9868163466453552,grad_norm: 0.6896215027941418, iteration: 439869
loss: 0.9966753125190735,grad_norm: 0.6348307718278406, iteration: 439870
loss: 0.9506253600120544,grad_norm: 0.8187427497196117, iteration: 439871
loss: 1.0038261413574219,grad_norm: 0.7907602029831502, iteration: 439872
loss: 1.0293056964874268,grad_norm: 0.7597332924665205, iteration: 439873
loss: 1.0068470239639282,grad_norm: 0.9999990683902871, iteration: 439874
loss: 1.1900551319122314,grad_norm: 0.9999994880705971, iteration: 439875
loss: 1.1943655014038086,grad_norm: 0.9999995194809849, iteration: 439876
loss: 0.9749577045440674,grad_norm: 0.6663059644998697, iteration: 439877
loss: 1.3277802467346191,grad_norm: 0.999999120691161, iteration: 439878
loss: 0.9927165508270264,grad_norm: 0.7716741394920359, iteration: 439879
loss: 0.9902891516685486,grad_norm: 0.999999524344494, iteration: 439880
loss: 0.967438817024231,grad_norm: 0.6824391370759854, iteration: 439881
loss: 1.036196231842041,grad_norm: 0.9030756085844902, iteration: 439882
loss: 0.9953041076660156,grad_norm: 0.7609709854172118, iteration: 439883
loss: 1.0228688716888428,grad_norm: 0.6284099331962104, iteration: 439884
loss: 0.987656831741333,grad_norm: 0.774106873198454, iteration: 439885
loss: 1.0402557849884033,grad_norm: 0.7556546238694702, iteration: 439886
loss: 0.9997808337211609,grad_norm: 0.8491112132006811, iteration: 439887
loss: 1.0004979372024536,grad_norm: 0.7678790091496978, iteration: 439888
loss: 1.032074213027954,grad_norm: 0.9999998308779027, iteration: 439889
loss: 1.0211381912231445,grad_norm: 0.7812927123543282, iteration: 439890
loss: 1.054895043373108,grad_norm: 0.99999959607524, iteration: 439891
loss: 1.0017513036727905,grad_norm: 0.8164626898293353, iteration: 439892
loss: 0.9856622219085693,grad_norm: 0.7278864444965923, iteration: 439893
loss: 0.9473537802696228,grad_norm: 0.6765997734981034, iteration: 439894
loss: 0.989737331867218,grad_norm: 0.7114219519963872, iteration: 439895
loss: 1.001002311706543,grad_norm: 0.8140924988708484, iteration: 439896
loss: 0.9798723459243774,grad_norm: 0.6859466307052567, iteration: 439897
loss: 1.028226375579834,grad_norm: 0.8842256311285284, iteration: 439898
loss: 0.9995482563972473,grad_norm: 0.866227411364614, iteration: 439899
loss: 0.9794260263442993,grad_norm: 0.8553206998980905, iteration: 439900
loss: 0.9933732748031616,grad_norm: 0.7663096184170518, iteration: 439901
loss: 1.0371850728988647,grad_norm: 0.763344287408147, iteration: 439902
loss: 1.0360807180404663,grad_norm: 0.9132379820928642, iteration: 439903
loss: 1.0239653587341309,grad_norm: 0.8024164765135136, iteration: 439904
loss: 1.0892914533615112,grad_norm: 0.9999998079133473, iteration: 439905
loss: 0.9895144701004028,grad_norm: 0.7371125346045175, iteration: 439906
loss: 1.1237913370132446,grad_norm: 0.9999992885681879, iteration: 439907
loss: 1.0141234397888184,grad_norm: 0.782877631071499, iteration: 439908
loss: 1.0083956718444824,grad_norm: 0.6672437101327761, iteration: 439909
loss: 1.0177819728851318,grad_norm: 0.9999991947904976, iteration: 439910
loss: 1.0386186838150024,grad_norm: 0.8432599406652465, iteration: 439911
loss: 1.00497567653656,grad_norm: 0.7330951491194022, iteration: 439912
loss: 1.1544251441955566,grad_norm: 1.000000037626155, iteration: 439913
loss: 1.098448395729065,grad_norm: 0.845288541354727, iteration: 439914
loss: 1.3069875240325928,grad_norm: 0.9999997721502938, iteration: 439915
loss: 0.9899848699569702,grad_norm: 0.6627206790140628, iteration: 439916
loss: 1.2577310800552368,grad_norm: 0.999999142668091, iteration: 439917
loss: 0.9818500280380249,grad_norm: 0.7353253272039502, iteration: 439918
loss: 1.1576391458511353,grad_norm: 0.9999997144124029, iteration: 439919
loss: 0.989651083946228,grad_norm: 0.7263749942992235, iteration: 439920
loss: 1.0370521545410156,grad_norm: 0.9999993146263556, iteration: 439921
loss: 1.3415286540985107,grad_norm: 0.9999997016237039, iteration: 439922
loss: 1.022232174873352,grad_norm: 0.759517302932973, iteration: 439923
loss: 0.9837177395820618,grad_norm: 0.8428505818689122, iteration: 439924
loss: 0.9921242594718933,grad_norm: 0.7847355058559904, iteration: 439925
loss: 1.046504020690918,grad_norm: 0.732145630443018, iteration: 439926
loss: 0.9792709946632385,grad_norm: 0.6993516994880233, iteration: 439927
loss: 0.9622851610183716,grad_norm: 0.8105341174178057, iteration: 439928
loss: 0.9835196137428284,grad_norm: 0.7498581516718253, iteration: 439929
loss: 0.9928632974624634,grad_norm: 0.6948775928114137, iteration: 439930
loss: 0.9566494822502136,grad_norm: 0.8268707441334064, iteration: 439931
loss: 1.011000394821167,grad_norm: 0.6747353711231078, iteration: 439932
loss: 0.9925989508628845,grad_norm: 0.8769274300023041, iteration: 439933
loss: 1.035950779914856,grad_norm: 0.999999184101101, iteration: 439934
loss: 1.026360273361206,grad_norm: 0.9999997026127998, iteration: 439935
loss: 0.9948261380195618,grad_norm: 0.8717171939839189, iteration: 439936
loss: 1.0099159479141235,grad_norm: 0.8343002212513821, iteration: 439937
loss: 1.0602644681930542,grad_norm: 0.7994206375246818, iteration: 439938
loss: 0.98891282081604,grad_norm: 0.7390522092398805, iteration: 439939
loss: 0.9911133646965027,grad_norm: 0.8064213575056518, iteration: 439940
loss: 1.0194900035858154,grad_norm: 0.6329841499457182, iteration: 439941
loss: 1.002718210220337,grad_norm: 0.9598592895643411, iteration: 439942
loss: 1.0052964687347412,grad_norm: 0.6612987687543657, iteration: 439943
loss: 1.0254557132720947,grad_norm: 0.8038453612352598, iteration: 439944
loss: 1.0082406997680664,grad_norm: 0.5982976116231964, iteration: 439945
loss: 0.9887515902519226,grad_norm: 0.9840524975131307, iteration: 439946
loss: 0.9447943568229675,grad_norm: 0.7162651994987929, iteration: 439947
loss: 1.0110777616500854,grad_norm: 0.7011957870975094, iteration: 439948
loss: 0.9944837689399719,grad_norm: 0.7346621205179921, iteration: 439949
loss: 0.9828499555587769,grad_norm: 0.8637997058452587, iteration: 439950
loss: 0.9772644639015198,grad_norm: 0.7347631682349847, iteration: 439951
loss: 1.0116020441055298,grad_norm: 0.7235221075208622, iteration: 439952
loss: 1.0626616477966309,grad_norm: 0.7492323597699968, iteration: 439953
loss: 0.9716087579727173,grad_norm: 0.8087288287576853, iteration: 439954
loss: 0.9809825420379639,grad_norm: 0.8355462818470654, iteration: 439955
loss: 0.9725706577301025,grad_norm: 0.9685387594095763, iteration: 439956
loss: 1.0641034841537476,grad_norm: 0.8140287373401096, iteration: 439957
loss: 1.0080451965332031,grad_norm: 0.9999992411716833, iteration: 439958
loss: 1.0273727178573608,grad_norm: 0.9685782609406962, iteration: 439959
loss: 0.9847456216812134,grad_norm: 0.7478349874533265, iteration: 439960
loss: 1.013752818107605,grad_norm: 0.8779890584249755, iteration: 439961
loss: 1.4135106801986694,grad_norm: 0.9999999917512561, iteration: 439962
loss: 0.990519642829895,grad_norm: 0.70100498311595, iteration: 439963
loss: 1.0318487882614136,grad_norm: 0.7783475424206281, iteration: 439964
loss: 0.9886637330055237,grad_norm: 0.9444123775150168, iteration: 439965
loss: 1.0204945802688599,grad_norm: 0.8005599913049637, iteration: 439966
loss: 1.0337847471237183,grad_norm: 0.8003628747289175, iteration: 439967
loss: 0.9986677765846252,grad_norm: 0.9999991330912495, iteration: 439968
loss: 0.9808316826820374,grad_norm: 0.8367355879939858, iteration: 439969
loss: 1.0207254886627197,grad_norm: 0.7245714111441061, iteration: 439970
loss: 1.0017826557159424,grad_norm: 0.8190057137753589, iteration: 439971
loss: 1.0530627965927124,grad_norm: 0.9999995578158876, iteration: 439972
loss: 0.9879774451255798,grad_norm: 0.8528450125141189, iteration: 439973
loss: 1.0371804237365723,grad_norm: 0.8268038999636298, iteration: 439974
loss: 1.0178536176681519,grad_norm: 0.6992672324627046, iteration: 439975
loss: 1.0060456991195679,grad_norm: 0.8560220976263567, iteration: 439976
loss: 0.9675517678260803,grad_norm: 0.889973354110748, iteration: 439977
loss: 0.9821301698684692,grad_norm: 0.7645098241255474, iteration: 439978
loss: 1.027185320854187,grad_norm: 0.6631102887914937, iteration: 439979
loss: 0.9699808955192566,grad_norm: 0.7604630639635126, iteration: 439980
loss: 0.9702407717704773,grad_norm: 0.7303977156697452, iteration: 439981
loss: 1.0072203874588013,grad_norm: 0.7570561682530815, iteration: 439982
loss: 1.0060677528381348,grad_norm: 0.764770636724602, iteration: 439983
loss: 1.0124397277832031,grad_norm: 0.8884196436689641, iteration: 439984
loss: 0.9718725085258484,grad_norm: 0.6969702090485808, iteration: 439985
loss: 0.9847707152366638,grad_norm: 0.9237680215964814, iteration: 439986
loss: 1.025234341621399,grad_norm: 0.7075654999593861, iteration: 439987
loss: 1.0576987266540527,grad_norm: 0.9999992289654217, iteration: 439988
loss: 1.0885282754898071,grad_norm: 0.9999994898486063, iteration: 439989
loss: 1.0298988819122314,grad_norm: 1.000000027147037, iteration: 439990
loss: 1.0015696287155151,grad_norm: 0.9999996371687307, iteration: 439991
loss: 1.0146682262420654,grad_norm: 0.7586572156199225, iteration: 439992
loss: 0.9714663028717041,grad_norm: 0.7585361676252508, iteration: 439993
loss: 0.9962927103042603,grad_norm: 0.9302740816789258, iteration: 439994
loss: 1.0040737390518188,grad_norm: 0.7745126211978925, iteration: 439995
loss: 1.0192078351974487,grad_norm: 0.7770408712351032, iteration: 439996
loss: 0.9388957619667053,grad_norm: 0.7932241732771751, iteration: 439997
loss: 0.981171190738678,grad_norm: 0.7605616977832003, iteration: 439998
loss: 1.004051923751831,grad_norm: 0.9999995998015444, iteration: 439999
loss: 0.9446647763252258,grad_norm: 0.9056757757483801, iteration: 440000
Evaluating at step 440000
{'val': 0.9941111579537392, 'test': 2.0965424280958818}
loss: 1.0254592895507812,grad_norm: 0.9183963235069077, iteration: 440001
loss: 0.9752548933029175,grad_norm: 0.7402126588080287, iteration: 440002
loss: 0.9985401034355164,grad_norm: 0.9713763852006417, iteration: 440003
loss: 1.0096269845962524,grad_norm: 0.9999990242662857, iteration: 440004
loss: 1.047239899635315,grad_norm: 0.9999995583155463, iteration: 440005
loss: 1.0370198488235474,grad_norm: 0.835331876755185, iteration: 440006
loss: 0.9619606137275696,grad_norm: 0.7396027350482944, iteration: 440007
loss: 1.0392632484436035,grad_norm: 0.9035290340482999, iteration: 440008
loss: 0.9909896850585938,grad_norm: 0.6895922722819428, iteration: 440009
loss: 1.013234257698059,grad_norm: 0.691559097958239, iteration: 440010
loss: 0.9899691343307495,grad_norm: 0.7055231172396715, iteration: 440011
loss: 0.983900785446167,grad_norm: 0.7700901623024387, iteration: 440012
loss: 1.0028166770935059,grad_norm: 0.7371920477369788, iteration: 440013
loss: 0.9685206413269043,grad_norm: 0.7564225782649038, iteration: 440014
loss: 1.0224418640136719,grad_norm: 0.9999997056480024, iteration: 440015
loss: 0.9833516478538513,grad_norm: 0.7007642197304415, iteration: 440016
loss: 0.953954815864563,grad_norm: 0.6998304133097448, iteration: 440017
loss: 0.9757621884346008,grad_norm: 0.6259604576905787, iteration: 440018
loss: 0.9833987951278687,grad_norm: 0.7267342424258807, iteration: 440019
loss: 0.9698789715766907,grad_norm: 0.9954701988986839, iteration: 440020
loss: 1.0109946727752686,grad_norm: 0.9860337265848359, iteration: 440021
loss: 1.058925747871399,grad_norm: 0.9999993254926198, iteration: 440022
loss: 1.0072569847106934,grad_norm: 0.8664414557006629, iteration: 440023
loss: 1.0293681621551514,grad_norm: 0.9999991886769757, iteration: 440024
loss: 1.0409483909606934,grad_norm: 0.9999991197284739, iteration: 440025
loss: 0.9704357385635376,grad_norm: 0.8584371477164265, iteration: 440026
loss: 0.9862335324287415,grad_norm: 0.7802777085492834, iteration: 440027
loss: 0.9594749212265015,grad_norm: 0.7779658032707224, iteration: 440028
loss: 1.075855016708374,grad_norm: 0.7666449535026673, iteration: 440029
loss: 1.002548098564148,grad_norm: 0.8443922001760314, iteration: 440030
loss: 1.0220061540603638,grad_norm: 0.706951681745277, iteration: 440031
loss: 0.9965969324111938,grad_norm: 0.8173331291444119, iteration: 440032
loss: 1.0000122785568237,grad_norm: 0.7969773224043737, iteration: 440033
loss: 1.0173977613449097,grad_norm: 0.7392301993337208, iteration: 440034
loss: 1.0138455629348755,grad_norm: 0.7056868143098668, iteration: 440035
loss: 1.0250686407089233,grad_norm: 0.9999998093275466, iteration: 440036
loss: 0.9594248533248901,grad_norm: 0.7621409554348483, iteration: 440037
loss: 0.9762660264968872,grad_norm: 0.8037287254018644, iteration: 440038
loss: 0.9709292650222778,grad_norm: 0.7294350756733761, iteration: 440039
loss: 1.0353553295135498,grad_norm: 0.7110593887259138, iteration: 440040
loss: 1.0063767433166504,grad_norm: 0.7809702007664766, iteration: 440041
loss: 0.9985724687576294,grad_norm: 0.9999991075046443, iteration: 440042
loss: 1.0077353715896606,grad_norm: 0.8859248087395079, iteration: 440043
loss: 0.997449517250061,grad_norm: 0.728090339201482, iteration: 440044
loss: 0.9999451637268066,grad_norm: 0.8565361180301986, iteration: 440045
loss: 1.0438780784606934,grad_norm: 0.8540376432938371, iteration: 440046
loss: 0.9866895079612732,grad_norm: 0.9999990859078984, iteration: 440047
loss: 1.0791940689086914,grad_norm: 0.7793543758774166, iteration: 440048
loss: 1.0140565633773804,grad_norm: 0.8731606606341076, iteration: 440049
loss: 0.9468154907226562,grad_norm: 0.8395460978911239, iteration: 440050
loss: 1.0062029361724854,grad_norm: 0.8223403919718485, iteration: 440051
loss: 1.0056661367416382,grad_norm: 0.7620591617127475, iteration: 440052
loss: 1.0210546255111694,grad_norm: 0.7600292656782274, iteration: 440053
loss: 0.9667350053787231,grad_norm: 0.6706354350126492, iteration: 440054
loss: 1.0111879110336304,grad_norm: 0.7582647968829432, iteration: 440055
loss: 1.0254228115081787,grad_norm: 0.7035177488332064, iteration: 440056
loss: 1.0071430206298828,grad_norm: 0.9999999255363756, iteration: 440057
loss: 0.9690162539482117,grad_norm: 0.8547266757324895, iteration: 440058
loss: 1.0002636909484863,grad_norm: 0.7915718045492097, iteration: 440059
loss: 1.0110087394714355,grad_norm: 0.7618837187182272, iteration: 440060
loss: 0.9928828477859497,grad_norm: 0.7773082489146671, iteration: 440061
loss: 1.0044704675674438,grad_norm: 0.7333792445626941, iteration: 440062
loss: 1.057818055152893,grad_norm: 0.9999991076522619, iteration: 440063
loss: 0.9913796782493591,grad_norm: 0.7473191939015887, iteration: 440064
loss: 1.0336686372756958,grad_norm: 0.929100940983852, iteration: 440065
loss: 1.0176784992218018,grad_norm: 0.999999202957017, iteration: 440066
loss: 1.0438323020935059,grad_norm: 0.999999145141879, iteration: 440067
loss: 0.9879578351974487,grad_norm: 0.8618424847098771, iteration: 440068
loss: 1.0749924182891846,grad_norm: 0.9999994732954612, iteration: 440069
loss: 0.9723677039146423,grad_norm: 0.8616416040538003, iteration: 440070
loss: 1.0502605438232422,grad_norm: 0.9210661239960629, iteration: 440071
loss: 0.9482021927833557,grad_norm: 0.8846108327847952, iteration: 440072
loss: 1.0117905139923096,grad_norm: 0.8287074792224456, iteration: 440073
loss: 1.0449992418289185,grad_norm: 0.680039778645919, iteration: 440074
loss: 1.015649437904358,grad_norm: 0.8811122465196418, iteration: 440075
loss: 1.0144169330596924,grad_norm: 0.708118846795295, iteration: 440076
loss: 0.9860357046127319,grad_norm: 0.999999648058509, iteration: 440077
loss: 0.9993482828140259,grad_norm: 0.8139729211671166, iteration: 440078
loss: 0.9752295613288879,grad_norm: 0.7795524289936263, iteration: 440079
loss: 1.0089905261993408,grad_norm: 0.686482649539623, iteration: 440080
loss: 1.0314414501190186,grad_norm: 0.9236647235112951, iteration: 440081
loss: 0.9400702714920044,grad_norm: 0.8622692061605718, iteration: 440082
loss: 1.010933756828308,grad_norm: 0.6554262707083345, iteration: 440083
loss: 1.0552784204483032,grad_norm: 0.9999998055961704, iteration: 440084
loss: 0.9810459017753601,grad_norm: 0.7298652302676907, iteration: 440085
loss: 1.0404677391052246,grad_norm: 0.9999992768893181, iteration: 440086
loss: 1.026794195175171,grad_norm: 0.9999997160222198, iteration: 440087
loss: 1.0397356748580933,grad_norm: 0.7481255841303043, iteration: 440088
loss: 1.0015699863433838,grad_norm: 0.7970192322233379, iteration: 440089
loss: 0.9907330870628357,grad_norm: 0.9931117040246337, iteration: 440090
loss: 1.0023449659347534,grad_norm: 0.7723815850937263, iteration: 440091
loss: 1.0233174562454224,grad_norm: 0.8465751204345049, iteration: 440092
loss: 1.0261361598968506,grad_norm: 0.887605571383434, iteration: 440093
loss: 0.9690393209457397,grad_norm: 0.7361496428811448, iteration: 440094
loss: 1.0818703174591064,grad_norm: 0.8467975625096714, iteration: 440095
loss: 0.9873045086860657,grad_norm: 0.8212924625190628, iteration: 440096
loss: 0.9871000051498413,grad_norm: 0.7642548979822381, iteration: 440097
loss: 1.0220779180526733,grad_norm: 0.8242895542388279, iteration: 440098
loss: 0.9764748215675354,grad_norm: 0.7801374723850111, iteration: 440099
loss: 1.027843952178955,grad_norm: 0.7773261192325731, iteration: 440100
loss: 1.0197969675064087,grad_norm: 0.7241999405739967, iteration: 440101
loss: 1.0122772455215454,grad_norm: 0.8477750153875672, iteration: 440102
loss: 0.9485387206077576,grad_norm: 0.7463112150175293, iteration: 440103
loss: 1.0061241388320923,grad_norm: 0.661236829870137, iteration: 440104
loss: 1.0359193086624146,grad_norm: 0.8401556168903148, iteration: 440105
loss: 1.0030896663665771,grad_norm: 0.5999559052314295, iteration: 440106
loss: 1.0711750984191895,grad_norm: 0.9999999093372188, iteration: 440107
loss: 0.9886423349380493,grad_norm: 0.651825206115219, iteration: 440108
loss: 0.9834755063056946,grad_norm: 0.7128551254540458, iteration: 440109
loss: 0.990878701210022,grad_norm: 0.9999990317637001, iteration: 440110
loss: 1.0092393159866333,grad_norm: 0.7460624409690039, iteration: 440111
loss: 1.0023367404937744,grad_norm: 0.7139719807119471, iteration: 440112
loss: 1.0272512435913086,grad_norm: 0.6646545774872984, iteration: 440113
loss: 0.9946910738945007,grad_norm: 0.7747496525365145, iteration: 440114
loss: 0.9456368684768677,grad_norm: 0.9999990498047379, iteration: 440115
loss: 1.015163779258728,grad_norm: 0.812267396615455, iteration: 440116
loss: 0.9820041656494141,grad_norm: 0.7575373075357164, iteration: 440117
loss: 1.0073881149291992,grad_norm: 0.9999999512282476, iteration: 440118
loss: 0.9937658309936523,grad_norm: 0.8581276118518901, iteration: 440119
loss: 1.0388100147247314,grad_norm: 0.6797050261983526, iteration: 440120
loss: 0.9788154363632202,grad_norm: 0.8625470545883798, iteration: 440121
loss: 1.008309245109558,grad_norm: 0.7446139226541071, iteration: 440122
loss: 0.9986114501953125,grad_norm: 0.7496304432849661, iteration: 440123
loss: 1.0104949474334717,grad_norm: 0.9098628563229063, iteration: 440124
loss: 1.0905653238296509,grad_norm: 0.9999991116373932, iteration: 440125
loss: 1.0075106620788574,grad_norm: 0.9359869085747367, iteration: 440126
loss: 1.0197266340255737,grad_norm: 0.8515045287106494, iteration: 440127
loss: 0.9868585467338562,grad_norm: 0.9999991040875948, iteration: 440128
loss: 1.0124180316925049,grad_norm: 0.8484279746701221, iteration: 440129
loss: 1.0067722797393799,grad_norm: 0.7191745441712059, iteration: 440130
loss: 0.9787243604660034,grad_norm: 0.7258709621179158, iteration: 440131
loss: 0.9604671001434326,grad_norm: 0.72680937784952, iteration: 440132
loss: 1.0026788711547852,grad_norm: 0.9999992105973755, iteration: 440133
loss: 1.0070937871932983,grad_norm: 0.9999990605007837, iteration: 440134
loss: 0.9377575516700745,grad_norm: 0.6820929443203875, iteration: 440135
loss: 0.9755001068115234,grad_norm: 0.9051588991878144, iteration: 440136
loss: 0.9916141629219055,grad_norm: 0.866067756572967, iteration: 440137
loss: 0.9789056181907654,grad_norm: 0.7324426182809021, iteration: 440138
loss: 1.0811253786087036,grad_norm: 0.9999994843915095, iteration: 440139
loss: 0.9996699094772339,grad_norm: 0.9999990594941328, iteration: 440140
loss: 0.9745040535926819,grad_norm: 0.6678506136707315, iteration: 440141
loss: 0.9777698516845703,grad_norm: 0.7838373147629896, iteration: 440142
loss: 0.995424211025238,grad_norm: 0.8119155340246058, iteration: 440143
loss: 0.987129271030426,grad_norm: 0.6820368340267664, iteration: 440144
loss: 1.0265463590621948,grad_norm: 0.6989734835671633, iteration: 440145
loss: 0.9594770669937134,grad_norm: 0.8110174878727846, iteration: 440146
loss: 0.9737740755081177,grad_norm: 0.9283862343404218, iteration: 440147
loss: 1.0058168172836304,grad_norm: 0.6748637876927704, iteration: 440148
loss: 0.9943670034408569,grad_norm: 0.7211057400170885, iteration: 440149
loss: 0.9537901878356934,grad_norm: 0.7716612699968887, iteration: 440150
loss: 1.0023925304412842,grad_norm: 0.7732783173554222, iteration: 440151
loss: 1.0380964279174805,grad_norm: 0.9999990358262837, iteration: 440152
loss: 0.9744083285331726,grad_norm: 0.8967728888767181, iteration: 440153
loss: 1.067672848701477,grad_norm: 0.727545369589751, iteration: 440154
loss: 1.0871145725250244,grad_norm: 0.9999994351870463, iteration: 440155
loss: 1.0006335973739624,grad_norm: 0.7450877025762604, iteration: 440156
loss: 0.9935064911842346,grad_norm: 0.7760867407313998, iteration: 440157
loss: 1.0901440382003784,grad_norm: 0.9999992941968011, iteration: 440158
loss: 1.010096788406372,grad_norm: 0.83081675561566, iteration: 440159
loss: 1.0283334255218506,grad_norm: 0.7297326477394487, iteration: 440160
loss: 1.032731294631958,grad_norm: 0.9480942272581039, iteration: 440161
loss: 1.008094310760498,grad_norm: 0.6861721358362788, iteration: 440162
loss: 0.9972003102302551,grad_norm: 0.6534595813790045, iteration: 440163
loss: 0.9847038388252258,grad_norm: 0.999999540254986, iteration: 440164
loss: 1.012220025062561,grad_norm: 0.7363572874895641, iteration: 440165
loss: 0.9932368993759155,grad_norm: 0.7345602506423093, iteration: 440166
loss: 0.9984970688819885,grad_norm: 0.7280110742757987, iteration: 440167
loss: 1.0506254434585571,grad_norm: 0.9038594312795715, iteration: 440168
loss: 0.9715790748596191,grad_norm: 0.7376155685111495, iteration: 440169
loss: 1.0238089561462402,grad_norm: 0.8059308801884874, iteration: 440170
loss: 1.0337800979614258,grad_norm: 0.8867563667636464, iteration: 440171
loss: 1.0675115585327148,grad_norm: 0.9999996961930242, iteration: 440172
loss: 1.0754382610321045,grad_norm: 0.9999995708568098, iteration: 440173
loss: 1.002324104309082,grad_norm: 0.6108705223736979, iteration: 440174
loss: 0.9934701919555664,grad_norm: 0.7417671463823642, iteration: 440175
loss: 0.9875993132591248,grad_norm: 0.6750749231790037, iteration: 440176
loss: 0.9773184657096863,grad_norm: 0.7609849113041396, iteration: 440177
loss: 0.9796789884567261,grad_norm: 0.7184381754011709, iteration: 440178
loss: 1.0345097780227661,grad_norm: 0.9999992715051846, iteration: 440179
loss: 1.0350432395935059,grad_norm: 0.8242390370053816, iteration: 440180
loss: 1.0244170427322388,grad_norm: 0.9430369840238754, iteration: 440181
loss: 1.0384080410003662,grad_norm: 0.8041000614085054, iteration: 440182
loss: 1.039272665977478,grad_norm: 0.9999990289461802, iteration: 440183
loss: 1.0039031505584717,grad_norm: 0.8577243070037928, iteration: 440184
loss: 1.0420756340026855,grad_norm: 0.99999952574131, iteration: 440185
loss: 0.9734006524085999,grad_norm: 0.8398511573592095, iteration: 440186
loss: 0.9909672737121582,grad_norm: 0.7519230871829411, iteration: 440187
loss: 0.9623202085494995,grad_norm: 0.7304273913129314, iteration: 440188
loss: 1.0007466077804565,grad_norm: 0.7143842508554299, iteration: 440189
loss: 0.9844976663589478,grad_norm: 0.8977577174234953, iteration: 440190
loss: 1.0200817584991455,grad_norm: 0.7429603112245293, iteration: 440191
loss: 1.0057350397109985,grad_norm: 0.8943279858324901, iteration: 440192
loss: 0.9905173182487488,grad_norm: 0.9761687138519725, iteration: 440193
loss: 0.978107213973999,grad_norm: 0.8651830425218818, iteration: 440194
loss: 0.9888178110122681,grad_norm: 0.8004492461577813, iteration: 440195
loss: 1.0173355340957642,grad_norm: 0.9181296883968538, iteration: 440196
loss: 1.0008660554885864,grad_norm: 0.716502694564155, iteration: 440197
loss: 1.0258097648620605,grad_norm: 0.8141723825905828, iteration: 440198
loss: 0.9948834776878357,grad_norm: 0.7275844286562664, iteration: 440199
loss: 0.970287561416626,grad_norm: 0.8195543514987418, iteration: 440200
loss: 0.981337308883667,grad_norm: 0.6437058600774552, iteration: 440201
loss: 0.9852476119995117,grad_norm: 0.9999990980700991, iteration: 440202
loss: 0.9853551387786865,grad_norm: 0.7830946126969466, iteration: 440203
loss: 1.0205098390579224,grad_norm: 0.9164113641506442, iteration: 440204
loss: 1.030005931854248,grad_norm: 0.8376134596926258, iteration: 440205
loss: 1.017264485359192,grad_norm: 0.7169318312898764, iteration: 440206
loss: 0.9910614490509033,grad_norm: 0.8341238821551296, iteration: 440207
loss: 1.024802327156067,grad_norm: 0.582978864725341, iteration: 440208
loss: 0.9905180931091309,grad_norm: 0.9999993199281548, iteration: 440209
loss: 1.012985110282898,grad_norm: 0.7375284578422555, iteration: 440210
loss: 0.9880098104476929,grad_norm: 0.6633388705570009, iteration: 440211
loss: 0.9663026332855225,grad_norm: 0.6562002700263359, iteration: 440212
loss: 0.9994595646858215,grad_norm: 0.8111991807606643, iteration: 440213
loss: 0.9768744111061096,grad_norm: 0.7186811398335917, iteration: 440214
loss: 0.9689404368400574,grad_norm: 0.8647031461468282, iteration: 440215
loss: 0.9880624413490295,grad_norm: 0.776627664963045, iteration: 440216
loss: 1.0080878734588623,grad_norm: 0.9999994204744536, iteration: 440217
loss: 1.096856713294983,grad_norm: 0.9999998019982596, iteration: 440218
loss: 0.992415726184845,grad_norm: 0.7009717606804724, iteration: 440219
loss: 0.9625788927078247,grad_norm: 0.7403529728454828, iteration: 440220
loss: 1.0410130023956299,grad_norm: 0.6907665656988679, iteration: 440221
loss: 0.9880300164222717,grad_norm: 0.6723412185745358, iteration: 440222
loss: 1.0349642038345337,grad_norm: 0.8188281873333698, iteration: 440223
loss: 1.0061498880386353,grad_norm: 0.8514634597450034, iteration: 440224
loss: 1.0399354696273804,grad_norm: 0.8351734849002488, iteration: 440225
loss: 0.9762959480285645,grad_norm: 0.7856043120043884, iteration: 440226
loss: 0.980424702167511,grad_norm: 0.7929515888706461, iteration: 440227
loss: 0.9773425459861755,grad_norm: 0.8695867070070822, iteration: 440228
loss: 0.9342542886734009,grad_norm: 0.7500395214131705, iteration: 440229
loss: 1.033134937286377,grad_norm: 0.9999992669357253, iteration: 440230
loss: 1.0085833072662354,grad_norm: 0.7701436321769068, iteration: 440231
loss: 1.0124561786651611,grad_norm: 0.8112447762783107, iteration: 440232
loss: 0.9852122664451599,grad_norm: 0.8667306239287366, iteration: 440233
loss: 1.0035438537597656,grad_norm: 0.6666475563683937, iteration: 440234
loss: 1.0188268423080444,grad_norm: 0.8215431264469953, iteration: 440235
loss: 1.0081151723861694,grad_norm: 0.692548661255986, iteration: 440236
loss: 0.9688449501991272,grad_norm: 0.7206263593328692, iteration: 440237
loss: 1.0022454261779785,grad_norm: 0.9467507172149187, iteration: 440238
loss: 1.0087497234344482,grad_norm: 0.8263643113897312, iteration: 440239
loss: 1.00340735912323,grad_norm: 0.7122954239638808, iteration: 440240
loss: 0.9479050040245056,grad_norm: 0.8167572472420485, iteration: 440241
loss: 0.99092036485672,grad_norm: 0.6014028742250203, iteration: 440242
loss: 1.0136348009109497,grad_norm: 0.9999999117614635, iteration: 440243
loss: 0.9723617434501648,grad_norm: 0.7785884587846081, iteration: 440244
loss: 0.980341911315918,grad_norm: 0.7575095756201665, iteration: 440245
loss: 1.011162281036377,grad_norm: 0.7990210196836437, iteration: 440246
loss: 0.9884272217750549,grad_norm: 0.9999991193216671, iteration: 440247
loss: 1.044476866722107,grad_norm: 0.7994329196281268, iteration: 440248
loss: 0.9837111234664917,grad_norm: 0.9816409803031916, iteration: 440249
loss: 0.9887316226959229,grad_norm: 0.9999995931663186, iteration: 440250
loss: 0.9728856086730957,grad_norm: 0.7393604961774672, iteration: 440251
loss: 1.0918524265289307,grad_norm: 0.8165796988912722, iteration: 440252
loss: 1.019466757774353,grad_norm: 0.8220783619898259, iteration: 440253
loss: 0.9859129190444946,grad_norm: 0.9159892593304205, iteration: 440254
loss: 1.012971043586731,grad_norm: 0.7535676082378677, iteration: 440255
loss: 1.0108293294906616,grad_norm: 0.8816766699062989, iteration: 440256
loss: 0.9968860149383545,grad_norm: 0.7226240747559638, iteration: 440257
loss: 0.9718126058578491,grad_norm: 0.7108451148189506, iteration: 440258
loss: 1.0240534543991089,grad_norm: 0.7765019967256387, iteration: 440259
loss: 0.9531674385070801,grad_norm: 0.9999991260735835, iteration: 440260
loss: 1.028174638748169,grad_norm: 0.7717837624197398, iteration: 440261
loss: 0.989564836025238,grad_norm: 0.8378204444852896, iteration: 440262
loss: 0.9771507382392883,grad_norm: 0.9999993262702872, iteration: 440263
loss: 0.9839915633201599,grad_norm: 0.8998887776314143, iteration: 440264
loss: 0.9796858429908752,grad_norm: 0.7816945156882771, iteration: 440265
loss: 0.9847630262374878,grad_norm: 0.6643163341923687, iteration: 440266
loss: 0.9953055381774902,grad_norm: 0.7916551335674239, iteration: 440267
loss: 0.9613582491874695,grad_norm: 0.8337473848357054, iteration: 440268
loss: 1.0381038188934326,grad_norm: 0.8427236360329277, iteration: 440269
loss: 1.0211002826690674,grad_norm: 0.8088917741023597, iteration: 440270
loss: 0.9845259785652161,grad_norm: 0.6768091474333316, iteration: 440271
loss: 1.0022832155227661,grad_norm: 0.873268600066762, iteration: 440272
loss: 0.9719277024269104,grad_norm: 0.8006031690921587, iteration: 440273
loss: 0.9676715135574341,grad_norm: 0.6885630858182632, iteration: 440274
loss: 1.0952565670013428,grad_norm: 0.9999990060107602, iteration: 440275
loss: 0.9945970177650452,grad_norm: 0.703149478964202, iteration: 440276
loss: 0.9847314953804016,grad_norm: 0.9999992160541679, iteration: 440277
loss: 1.0333061218261719,grad_norm: 0.9999992624063361, iteration: 440278
loss: 1.0428051948547363,grad_norm: 0.9433144115507588, iteration: 440279
loss: 1.0150456428527832,grad_norm: 0.7311156874946193, iteration: 440280
loss: 0.989841639995575,grad_norm: 0.9416396903328546, iteration: 440281
loss: 0.9823806285858154,grad_norm: 0.7962414664345473, iteration: 440282
loss: 0.9795008301734924,grad_norm: 0.7311502564020766, iteration: 440283
loss: 0.9912444949150085,grad_norm: 0.8113286982284642, iteration: 440284
loss: 0.992637574672699,grad_norm: 0.6577382033483692, iteration: 440285
loss: 1.0200393199920654,grad_norm: 0.8986484100957888, iteration: 440286
loss: 1.0007601976394653,grad_norm: 0.6881999145723793, iteration: 440287
loss: 1.0320329666137695,grad_norm: 0.8114584048608416, iteration: 440288
loss: 1.0596725940704346,grad_norm: 0.9999996110683075, iteration: 440289
loss: 1.0367664098739624,grad_norm: 0.9999995516098582, iteration: 440290
loss: 0.9169744253158569,grad_norm: 0.7382481567121698, iteration: 440291
loss: 0.953029453754425,grad_norm: 0.8540589867542425, iteration: 440292
loss: 0.9982184171676636,grad_norm: 0.6271633449976286, iteration: 440293
loss: 0.9986636638641357,grad_norm: 0.999777007876185, iteration: 440294
loss: 1.0280884504318237,grad_norm: 0.9999995935805542, iteration: 440295
loss: 1.0416669845581055,grad_norm: 0.9676235613163588, iteration: 440296
loss: 0.9961186647415161,grad_norm: 0.8262811832187396, iteration: 440297
loss: 1.0070656538009644,grad_norm: 0.8233883280921349, iteration: 440298
loss: 0.9635066986083984,grad_norm: 0.7384744262910565, iteration: 440299
loss: 0.9844383001327515,grad_norm: 0.8096319989984153, iteration: 440300
loss: 0.9679675698280334,grad_norm: 0.7638394842751979, iteration: 440301
loss: 1.008446216583252,grad_norm: 0.6414228868497235, iteration: 440302
loss: 0.9599147439002991,grad_norm: 0.6816446343649946, iteration: 440303
loss: 0.9836071729660034,grad_norm: 0.7445130939814982, iteration: 440304
loss: 1.0105865001678467,grad_norm: 0.8264877121866269, iteration: 440305
loss: 1.0512586832046509,grad_norm: 0.8117722762229514, iteration: 440306
loss: 1.0110963582992554,grad_norm: 0.6660610345424329, iteration: 440307
loss: 0.9840729236602783,grad_norm: 0.7607774087624861, iteration: 440308
loss: 0.9781145453453064,grad_norm: 0.8257696946696197, iteration: 440309
loss: 0.9879513382911682,grad_norm: 0.7904393945767779, iteration: 440310
loss: 0.9918426871299744,grad_norm: 0.7492384530407146, iteration: 440311
loss: 0.9864166975021362,grad_norm: 0.8809754947892076, iteration: 440312
loss: 1.0329995155334473,grad_norm: 0.9598448607814977, iteration: 440313
loss: 0.9955523014068604,grad_norm: 0.8427310856182221, iteration: 440314
loss: 1.0053068399429321,grad_norm: 0.7615335253004881, iteration: 440315
loss: 1.0531165599822998,grad_norm: 0.8482764367130534, iteration: 440316
loss: 0.970596969127655,grad_norm: 0.7439189491266648, iteration: 440317
loss: 0.9955248236656189,grad_norm: 0.9512599711482354, iteration: 440318
loss: 1.0602214336395264,grad_norm: 0.9999992948308579, iteration: 440319
loss: 0.9956293702125549,grad_norm: 0.944672683313754, iteration: 440320
loss: 0.9619867205619812,grad_norm: 0.8167640135763714, iteration: 440321
loss: 1.0490391254425049,grad_norm: 0.9999999024268974, iteration: 440322
loss: 0.9673235416412354,grad_norm: 0.9999991504461756, iteration: 440323
loss: 1.0425342321395874,grad_norm: 0.9999991915302995, iteration: 440324
loss: 1.0307645797729492,grad_norm: 0.9514081239066686, iteration: 440325
loss: 1.0150763988494873,grad_norm: 0.9472834842340667, iteration: 440326
loss: 1.0064681768417358,grad_norm: 0.7472439570611289, iteration: 440327
loss: 1.0041792392730713,grad_norm: 0.7874432302653706, iteration: 440328
loss: 1.0011794567108154,grad_norm: 0.7999489146648879, iteration: 440329
loss: 0.9356549382209778,grad_norm: 0.7538738789229359, iteration: 440330
loss: 1.0336871147155762,grad_norm: 0.9999993602257338, iteration: 440331
loss: 0.9814376831054688,grad_norm: 0.9219799644284584, iteration: 440332
loss: 1.0084996223449707,grad_norm: 0.6964678770830762, iteration: 440333
loss: 0.9671365022659302,grad_norm: 0.7774690779175834, iteration: 440334
loss: 0.975628137588501,grad_norm: 0.7291936902076869, iteration: 440335
loss: 0.9700347185134888,grad_norm: 0.765167583651415, iteration: 440336
loss: 0.987942099571228,grad_norm: 0.7864404724024741, iteration: 440337
loss: 0.9945799112319946,grad_norm: 0.8559188734276248, iteration: 440338
loss: 1.0241061449050903,grad_norm: 0.870486674542704, iteration: 440339
loss: 1.005110263824463,grad_norm: 0.8099199196845976, iteration: 440340
loss: 1.0080524682998657,grad_norm: 0.780323767597097, iteration: 440341
loss: 1.0141291618347168,grad_norm: 0.9999995905890446, iteration: 440342
loss: 1.0243724584579468,grad_norm: 0.8045807169445881, iteration: 440343
loss: 1.0230753421783447,grad_norm: 0.999999025832708, iteration: 440344
loss: 0.9926859140396118,grad_norm: 0.77655397780251, iteration: 440345
loss: 0.9866151809692383,grad_norm: 0.7991830363356349, iteration: 440346
loss: 0.9876725077629089,grad_norm: 0.749762593531435, iteration: 440347
loss: 1.0167142152786255,grad_norm: 0.779648733888033, iteration: 440348
loss: 1.0093916654586792,grad_norm: 0.7207015466727017, iteration: 440349
loss: 0.9905433654785156,grad_norm: 0.739948543979027, iteration: 440350
loss: 1.0345054864883423,grad_norm: 0.8312997060299334, iteration: 440351
loss: 1.0667189359664917,grad_norm: 0.8918262582561339, iteration: 440352
loss: 1.0018333196640015,grad_norm: 0.5833783271807925, iteration: 440353
loss: 1.0103228092193604,grad_norm: 0.7054465237989879, iteration: 440354
loss: 1.0317288637161255,grad_norm: 0.6926418998942079, iteration: 440355
loss: 0.9926550388336182,grad_norm: 0.7286275264483976, iteration: 440356
loss: 0.9940576553344727,grad_norm: 0.8177869528333762, iteration: 440357
loss: 1.0102638006210327,grad_norm: 0.75527967285144, iteration: 440358
loss: 1.0027785301208496,grad_norm: 0.8749267251468599, iteration: 440359
loss: 0.9752804636955261,grad_norm: 0.8926921626514063, iteration: 440360
loss: 1.0269660949707031,grad_norm: 0.7838251700652042, iteration: 440361
loss: 0.9955697655677795,grad_norm: 0.7904180434314987, iteration: 440362
loss: 1.001412272453308,grad_norm: 0.8844415771159109, iteration: 440363
loss: 1.0129950046539307,grad_norm: 0.8579848300929048, iteration: 440364
loss: 1.0060139894485474,grad_norm: 0.7449787509044042, iteration: 440365
loss: 0.9800205230712891,grad_norm: 0.8204415410392282, iteration: 440366
loss: 0.9952086210250854,grad_norm: 0.9226204610208901, iteration: 440367
loss: 1.0125203132629395,grad_norm: 0.7699736265083732, iteration: 440368
loss: 1.0184972286224365,grad_norm: 0.74498961492973, iteration: 440369
loss: 1.0053582191467285,grad_norm: 0.8336719311229865, iteration: 440370
loss: 0.9857767224311829,grad_norm: 0.7364163229020989, iteration: 440371
loss: 0.9660124778747559,grad_norm: 0.6762996818381292, iteration: 440372
loss: 1.018560528755188,grad_norm: 0.902078582166094, iteration: 440373
loss: 1.0224401950836182,grad_norm: 0.8643773915572529, iteration: 440374
loss: 0.9728106260299683,grad_norm: 0.6847913944644168, iteration: 440375
loss: 1.0027716159820557,grad_norm: 0.7536670983669823, iteration: 440376
loss: 0.9782882928848267,grad_norm: 0.682146448691991, iteration: 440377
loss: 1.0210421085357666,grad_norm: 0.8403600531268353, iteration: 440378
loss: 0.9787622690200806,grad_norm: 0.8101664451064048, iteration: 440379
loss: 1.0272502899169922,grad_norm: 0.790389467788185, iteration: 440380
loss: 1.0001014471054077,grad_norm: 0.7065070658471273, iteration: 440381
loss: 1.0185045003890991,grad_norm: 0.9277629957774638, iteration: 440382
loss: 1.0219594240188599,grad_norm: 0.8015187248480249, iteration: 440383
loss: 1.0057851076126099,grad_norm: 0.877541340743141, iteration: 440384
loss: 1.014150857925415,grad_norm: 0.8436526139897857, iteration: 440385
loss: 1.0250202417373657,grad_norm: 0.6356890237359821, iteration: 440386
loss: 0.9815776944160461,grad_norm: 0.838374828871663, iteration: 440387
loss: 0.950158953666687,grad_norm: 0.7485130903907184, iteration: 440388
loss: 1.0365182161331177,grad_norm: 0.9999998440515303, iteration: 440389
loss: 1.006268858909607,grad_norm: 0.7344739883847056, iteration: 440390
loss: 0.9744573831558228,grad_norm: 0.8396483182765393, iteration: 440391
loss: 1.0274949073791504,grad_norm: 0.9999997273072028, iteration: 440392
loss: 1.0026540756225586,grad_norm: 0.8518333943164713, iteration: 440393
loss: 1.0086945295333862,grad_norm: 0.6664836985546156, iteration: 440394
loss: 0.9795105457305908,grad_norm: 0.7625132932594707, iteration: 440395
loss: 1.0076360702514648,grad_norm: 0.8568626277147198, iteration: 440396
loss: 1.0338321924209595,grad_norm: 0.90382462614229, iteration: 440397
loss: 0.975440502166748,grad_norm: 0.8023788129070826, iteration: 440398
loss: 1.0259941816329956,grad_norm: 0.8867023401186868, iteration: 440399
loss: 1.0242643356323242,grad_norm: 0.9174933843034174, iteration: 440400
loss: 0.9945176243782043,grad_norm: 0.6930549591603734, iteration: 440401
loss: 0.986412763595581,grad_norm: 0.7335953630493702, iteration: 440402
loss: 0.973038375377655,grad_norm: 0.9999991636860663, iteration: 440403
loss: 1.0326236486434937,grad_norm: 0.9999991837909873, iteration: 440404
loss: 0.9516531825065613,grad_norm: 0.977590224318423, iteration: 440405
loss: 1.0053136348724365,grad_norm: 0.7180440303053669, iteration: 440406
loss: 1.0354208946228027,grad_norm: 0.8132559794083449, iteration: 440407
loss: 0.9831629991531372,grad_norm: 0.8171414509438233, iteration: 440408
loss: 0.9803630709648132,grad_norm: 0.9999994826214482, iteration: 440409
loss: 0.9895403981208801,grad_norm: 0.6848233041379189, iteration: 440410
loss: 0.9969727396965027,grad_norm: 0.9390015281255533, iteration: 440411
loss: 0.986530065536499,grad_norm: 0.7907780873467646, iteration: 440412
loss: 0.9927883744239807,grad_norm: 0.7341408527638233, iteration: 440413
loss: 1.0200599431991577,grad_norm: 0.7483185496997722, iteration: 440414
loss: 0.9653272032737732,grad_norm: 0.7044542723066417, iteration: 440415
loss: 0.9957817792892456,grad_norm: 0.6857054649138505, iteration: 440416
loss: 1.0197595357894897,grad_norm: 0.7674390955766638, iteration: 440417
loss: 1.0070321559906006,grad_norm: 0.7190399625655367, iteration: 440418
loss: 1.0230321884155273,grad_norm: 0.8258152312297298, iteration: 440419
loss: 0.9856842756271362,grad_norm: 0.859231090727258, iteration: 440420
loss: 0.99751216173172,grad_norm: 0.7339372841620754, iteration: 440421
loss: 0.98966383934021,grad_norm: 0.9999989947214145, iteration: 440422
loss: 1.0328302383422852,grad_norm: 0.8691935975744904, iteration: 440423
loss: 0.993650496006012,grad_norm: 0.6637377470874382, iteration: 440424
loss: 1.0015336275100708,grad_norm: 0.7601687207841491, iteration: 440425
loss: 0.9861795902252197,grad_norm: 0.8339459096428659, iteration: 440426
loss: 0.9985248446464539,grad_norm: 0.9985405931097496, iteration: 440427
loss: 1.0122467279434204,grad_norm: 0.7949069291923779, iteration: 440428
loss: 1.0379798412322998,grad_norm: 0.7820484506671744, iteration: 440429
loss: 0.9880916476249695,grad_norm: 0.6918102311083868, iteration: 440430
loss: 0.9897551536560059,grad_norm: 0.791696188634275, iteration: 440431
loss: 1.0223267078399658,grad_norm: 0.6631824385189773, iteration: 440432
loss: 1.0125244855880737,grad_norm: 0.7246084267103791, iteration: 440433
loss: 1.0080291032791138,grad_norm: 0.695759604493306, iteration: 440434
loss: 1.0210407972335815,grad_norm: 0.7354663809590255, iteration: 440435
loss: 0.9986916780471802,grad_norm: 0.7023989506722772, iteration: 440436
loss: 1.0005172491073608,grad_norm: 0.8127065997029673, iteration: 440437
loss: 0.9888964891433716,grad_norm: 0.999999055461551, iteration: 440438
loss: 0.9988818168640137,grad_norm: 0.7386772981755177, iteration: 440439
loss: 1.0059772729873657,grad_norm: 0.9999991719565057, iteration: 440440
loss: 0.9878847002983093,grad_norm: 0.7418365734880793, iteration: 440441
loss: 0.9793264269828796,grad_norm: 0.7962508930670374, iteration: 440442
loss: 1.0069886445999146,grad_norm: 0.756774467622491, iteration: 440443
loss: 0.9966220259666443,grad_norm: 0.7725640251530239, iteration: 440444
loss: 1.0214680433273315,grad_norm: 0.8953333366646579, iteration: 440445
loss: 0.9988503456115723,grad_norm: 0.7159567048325309, iteration: 440446
loss: 1.0003633499145508,grad_norm: 0.6850288809236695, iteration: 440447
loss: 1.005611538887024,grad_norm: 0.8451079693837803, iteration: 440448
loss: 1.0075980424880981,grad_norm: 0.8931888795220724, iteration: 440449
loss: 0.9691474437713623,grad_norm: 0.9069193646865292, iteration: 440450
loss: 0.9869967103004456,grad_norm: 0.6600850488531356, iteration: 440451
loss: 0.9960641860961914,grad_norm: 0.9604174779982312, iteration: 440452
loss: 0.9894742369651794,grad_norm: 0.6532514577043835, iteration: 440453
loss: 0.9907947182655334,grad_norm: 0.7032416520307228, iteration: 440454
loss: 1.0192468166351318,grad_norm: 0.8267859833003833, iteration: 440455
loss: 0.991661548614502,grad_norm: 0.8768489039962757, iteration: 440456
loss: 1.014015555381775,grad_norm: 0.7472362767784588, iteration: 440457
loss: 0.9801146984100342,grad_norm: 0.9009728092126252, iteration: 440458
loss: 1.0061354637145996,grad_norm: 0.715210252963438, iteration: 440459
loss: 1.0025655031204224,grad_norm: 0.8582818602199943, iteration: 440460
loss: 0.992280900478363,grad_norm: 0.8179558192613631, iteration: 440461
loss: 1.1587682962417603,grad_norm: 0.9381937856328176, iteration: 440462
loss: 0.9892383813858032,grad_norm: 0.7751279830054444, iteration: 440463
loss: 0.9712697863578796,grad_norm: 0.6513774386155233, iteration: 440464
loss: 1.0425904989242554,grad_norm: 0.999999811246102, iteration: 440465
loss: 0.9592525362968445,grad_norm: 0.7563162391457844, iteration: 440466
loss: 1.0188014507293701,grad_norm: 0.9999991137923867, iteration: 440467
loss: 1.0245317220687866,grad_norm: 0.7172117914162821, iteration: 440468
loss: 1.032137393951416,grad_norm: 0.7757452744296007, iteration: 440469
loss: 0.9983185529708862,grad_norm: 0.96398546730866, iteration: 440470
loss: 1.0240540504455566,grad_norm: 0.8625795749040608, iteration: 440471
loss: 1.0194156169891357,grad_norm: 0.7611989096407309, iteration: 440472
loss: 1.002814531326294,grad_norm: 0.6524083753878348, iteration: 440473
loss: 0.9994468092918396,grad_norm: 0.7278867353359665, iteration: 440474
loss: 1.0020177364349365,grad_norm: 0.663240546313119, iteration: 440475
loss: 1.0210509300231934,grad_norm: 0.8151942048316125, iteration: 440476
loss: 1.0063024759292603,grad_norm: 0.811403993790913, iteration: 440477
loss: 1.0097559690475464,grad_norm: 0.7180346982235056, iteration: 440478
loss: 0.9953533411026001,grad_norm: 0.7612019805726966, iteration: 440479
loss: 1.0132901668548584,grad_norm: 0.7764681105587982, iteration: 440480
loss: 1.001932144165039,grad_norm: 0.7962367420431018, iteration: 440481
loss: 0.982208251953125,grad_norm: 0.9668684172545249, iteration: 440482
loss: 0.9690834283828735,grad_norm: 0.8152032188254357, iteration: 440483
loss: 0.9944887757301331,grad_norm: 0.8707496623326245, iteration: 440484
loss: 0.9697229862213135,grad_norm: 0.7885608757685976, iteration: 440485
loss: 1.0323185920715332,grad_norm: 0.7000767171707561, iteration: 440486
loss: 0.9829522371292114,grad_norm: 0.9366269471960037, iteration: 440487
loss: 1.0113270282745361,grad_norm: 0.8248787325963245, iteration: 440488
loss: 1.0196229219436646,grad_norm: 0.8198967171870811, iteration: 440489
loss: 1.0426157712936401,grad_norm: 0.7370980457573391, iteration: 440490
loss: 1.0066897869110107,grad_norm: 0.7912412358468526, iteration: 440491
loss: 1.0007741451263428,grad_norm: 0.7288103503617627, iteration: 440492
loss: 1.117239236831665,grad_norm: 0.784289319702934, iteration: 440493
loss: 1.006085753440857,grad_norm: 0.6722529197557278, iteration: 440494
loss: 1.042319893836975,grad_norm: 0.8027270232441992, iteration: 440495
loss: 0.9802236557006836,grad_norm: 0.7558131938429038, iteration: 440496
loss: 0.9796087741851807,grad_norm: 0.6432757495887322, iteration: 440497
loss: 1.0087887048721313,grad_norm: 0.7088797662463668, iteration: 440498
loss: 0.9886474609375,grad_norm: 0.779975068012016, iteration: 440499
loss: 0.9916055798530579,grad_norm: 0.7735687759373785, iteration: 440500
loss: 0.9970159530639648,grad_norm: 0.6808787176227463, iteration: 440501
loss: 0.9946357607841492,grad_norm: 0.7441674531879765, iteration: 440502
loss: 0.9638431668281555,grad_norm: 0.8256770734278128, iteration: 440503
loss: 1.0012285709381104,grad_norm: 0.9297715492618019, iteration: 440504
loss: 1.0061957836151123,grad_norm: 0.8134697641580737, iteration: 440505
loss: 1.012020468711853,grad_norm: 0.7300400576029474, iteration: 440506
loss: 1.00431227684021,grad_norm: 0.9999991743395322, iteration: 440507
loss: 0.9999486207962036,grad_norm: 0.7373692283857355, iteration: 440508
loss: 0.9965390563011169,grad_norm: 0.6925445887147793, iteration: 440509
loss: 0.9747475385665894,grad_norm: 0.798956755214738, iteration: 440510
loss: 0.9913989305496216,grad_norm: 0.7230519690625575, iteration: 440511
loss: 1.0210001468658447,grad_norm: 0.7000110934883541, iteration: 440512
loss: 0.9806700348854065,grad_norm: 0.8948185378173739, iteration: 440513
loss: 1.029369592666626,grad_norm: 0.7315864364556518, iteration: 440514
loss: 0.9937015771865845,grad_norm: 0.8205683830176601, iteration: 440515
loss: 0.9971040487289429,grad_norm: 0.750095565690175, iteration: 440516
loss: 1.0034897327423096,grad_norm: 0.7666546781880899, iteration: 440517
loss: 0.9790061712265015,grad_norm: 0.9999990134258369, iteration: 440518
loss: 0.9844033718109131,grad_norm: 0.63266445021355, iteration: 440519
loss: 1.050034523010254,grad_norm: 0.8289249086528782, iteration: 440520
loss: 1.0069266557693481,grad_norm: 0.7627845756748237, iteration: 440521
loss: 0.9723968505859375,grad_norm: 0.7257666790140275, iteration: 440522
loss: 0.9944050312042236,grad_norm: 0.8950365610931663, iteration: 440523
loss: 0.9561488628387451,grad_norm: 0.8248510984720353, iteration: 440524
loss: 1.0132062435150146,grad_norm: 0.746163079246792, iteration: 440525
loss: 1.002004861831665,grad_norm: 0.8536289622326618, iteration: 440526
loss: 1.0301929712295532,grad_norm: 0.7420686670400004, iteration: 440527
loss: 0.9688307046890259,grad_norm: 0.8782415427108275, iteration: 440528
loss: 1.0103009939193726,grad_norm: 0.7508150364017098, iteration: 440529
loss: 0.9910075664520264,grad_norm: 0.8164074438953508, iteration: 440530
loss: 0.9871551990509033,grad_norm: 0.9999990322835417, iteration: 440531
loss: 1.0139979124069214,grad_norm: 0.7631563226377246, iteration: 440532
loss: 1.0477150678634644,grad_norm: 0.9999991496398508, iteration: 440533
loss: 1.0368335247039795,grad_norm: 0.7918050940403039, iteration: 440534
loss: 1.025051236152649,grad_norm: 0.999999902973908, iteration: 440535
loss: 1.0132697820663452,grad_norm: 0.9999993141297013, iteration: 440536
loss: 0.9617998600006104,grad_norm: 0.9430651898335418, iteration: 440537
loss: 0.9953888654708862,grad_norm: 0.871044930088503, iteration: 440538
loss: 0.9920256733894348,grad_norm: 0.6850172655278338, iteration: 440539
loss: 0.9720519781112671,grad_norm: 0.8242293695755156, iteration: 440540
loss: 0.9471916556358337,grad_norm: 0.7630360247904453, iteration: 440541
loss: 1.0237834453582764,grad_norm: 0.8202993099524055, iteration: 440542
loss: 0.9882805347442627,grad_norm: 0.9190888264142734, iteration: 440543
loss: 0.975488543510437,grad_norm: 0.9484716996897354, iteration: 440544
loss: 1.0353378057479858,grad_norm: 0.9999997973961883, iteration: 440545
loss: 0.9802420139312744,grad_norm: 0.8365465271009135, iteration: 440546
loss: 1.0342963933944702,grad_norm: 0.7138244564571271, iteration: 440547
loss: 0.9717656373977661,grad_norm: 0.9461114391063268, iteration: 440548
loss: 1.011688470840454,grad_norm: 0.7533467538319917, iteration: 440549
loss: 0.9930600523948669,grad_norm: 0.7646960918991359, iteration: 440550
loss: 0.9939392805099487,grad_norm: 0.9999997597524194, iteration: 440551
loss: 1.036253809928894,grad_norm: 0.7786917041658485, iteration: 440552
loss: 0.9702605605125427,grad_norm: 0.6682412928970659, iteration: 440553
loss: 1.0588752031326294,grad_norm: 0.764823306369397, iteration: 440554
loss: 1.0825196504592896,grad_norm: 0.9999998570365318, iteration: 440555
loss: 1.0224082469940186,grad_norm: 0.671623579162049, iteration: 440556
loss: 1.0033334493637085,grad_norm: 0.8579412097678814, iteration: 440557
loss: 0.9805909991264343,grad_norm: 0.8001488898565476, iteration: 440558
loss: 0.9845908284187317,grad_norm: 0.7356395840742523, iteration: 440559
loss: 0.9734101295471191,grad_norm: 0.595921662228877, iteration: 440560
loss: 1.0001033544540405,grad_norm: 0.7375096358807596, iteration: 440561
loss: 1.0123180150985718,grad_norm: 0.7877454014206642, iteration: 440562
loss: 0.9922789931297302,grad_norm: 0.8472898108777435, iteration: 440563
loss: 0.9722129702568054,grad_norm: 0.7073515801638124, iteration: 440564
loss: 1.036119818687439,grad_norm: 0.9999997895845036, iteration: 440565
loss: 1.016871690750122,grad_norm: 0.8280224584575215, iteration: 440566
loss: 0.9504813551902771,grad_norm: 0.7509201100548956, iteration: 440567
loss: 0.9662273526191711,grad_norm: 0.7125880332064979, iteration: 440568
loss: 0.9555930495262146,grad_norm: 0.7445612825945617, iteration: 440569
loss: 1.015177607536316,grad_norm: 0.9649235898283994, iteration: 440570
loss: 1.0593645572662354,grad_norm: 0.7265674520317775, iteration: 440571
loss: 0.9893243908882141,grad_norm: 0.8003863807929479, iteration: 440572
loss: 1.0062974691390991,grad_norm: 0.724124113747089, iteration: 440573
loss: 1.0432676076889038,grad_norm: 0.7615025544705254, iteration: 440574
loss: 0.9960564374923706,grad_norm: 0.8209890269732716, iteration: 440575
loss: 0.9957171678543091,grad_norm: 0.7358039738613085, iteration: 440576
loss: 1.0293495655059814,grad_norm: 0.7002572590647099, iteration: 440577
loss: 1.0057427883148193,grad_norm: 0.7777019739282505, iteration: 440578
loss: 1.061346173286438,grad_norm: 0.9999992938152232, iteration: 440579
loss: 1.004457950592041,grad_norm: 0.8701097923170845, iteration: 440580
loss: 1.0674465894699097,grad_norm: 0.7377188240643386, iteration: 440581
loss: 1.0144288539886475,grad_norm: 0.7276631111341164, iteration: 440582
loss: 1.0148364305496216,grad_norm: 0.8336665982135208, iteration: 440583
loss: 1.0551141500473022,grad_norm: 0.9999991743926011, iteration: 440584
loss: 0.9721928834915161,grad_norm: 0.7656301668356319, iteration: 440585
loss: 0.9736126661300659,grad_norm: 0.7755995294585971, iteration: 440586
loss: 0.9835166335105896,grad_norm: 0.7281573199685786, iteration: 440587
loss: 1.0075056552886963,grad_norm: 0.6576519455191557, iteration: 440588
loss: 1.0309245586395264,grad_norm: 0.8752012489435954, iteration: 440589
loss: 1.0321167707443237,grad_norm: 0.9958613568162308, iteration: 440590
loss: 1.021981954574585,grad_norm: 0.7321758321495923, iteration: 440591
loss: 1.0192523002624512,grad_norm: 0.6278972892945386, iteration: 440592
loss: 1.0480926036834717,grad_norm: 0.9999995568537522, iteration: 440593
loss: 1.001956582069397,grad_norm: 0.9231905610345971, iteration: 440594
loss: 0.9847887754440308,grad_norm: 0.8559994297929968, iteration: 440595
loss: 0.966162919998169,grad_norm: 0.8122964189957107, iteration: 440596
loss: 1.04286527633667,grad_norm: 0.9999994569721142, iteration: 440597
loss: 1.0077570676803589,grad_norm: 0.8144641614793641, iteration: 440598
loss: 1.0116556882858276,grad_norm: 0.7299156042388777, iteration: 440599
loss: 1.0162522792816162,grad_norm: 0.9026339385361487, iteration: 440600
loss: 0.9742117524147034,grad_norm: 0.8825894392511208, iteration: 440601
loss: 0.9971578121185303,grad_norm: 0.784440088566839, iteration: 440602
loss: 1.063169002532959,grad_norm: 0.9999997229546917, iteration: 440603
loss: 1.0065805912017822,grad_norm: 0.9220001654189159, iteration: 440604
loss: 1.0229113101959229,grad_norm: 0.7631486291487685, iteration: 440605
loss: 0.9709790349006653,grad_norm: 0.763519791488256, iteration: 440606
loss: 1.0045865774154663,grad_norm: 0.8180377413678345, iteration: 440607
loss: 1.0113005638122559,grad_norm: 0.6223822113437537, iteration: 440608
loss: 1.0023541450500488,grad_norm: 0.8561272521600375, iteration: 440609
loss: 0.9871233105659485,grad_norm: 0.6738766975852297, iteration: 440610
loss: 1.0193004608154297,grad_norm: 0.736432740859693, iteration: 440611
loss: 1.018905758857727,grad_norm: 0.7209387979285383, iteration: 440612
loss: 1.0141627788543701,grad_norm: 0.7362978125758528, iteration: 440613
loss: 0.9907940030097961,grad_norm: 0.709075699876868, iteration: 440614
loss: 1.0746058225631714,grad_norm: 0.9999997672359277, iteration: 440615
loss: 1.0367059707641602,grad_norm: 0.7648738377148362, iteration: 440616
loss: 0.9828207492828369,grad_norm: 0.7952308501033862, iteration: 440617
loss: 1.0570321083068848,grad_norm: 0.9629707734765605, iteration: 440618
loss: 1.0185633897781372,grad_norm: 0.7891424331693522, iteration: 440619
loss: 0.9884829521179199,grad_norm: 0.6895075463234626, iteration: 440620
loss: 1.02614426612854,grad_norm: 0.8072413353498745, iteration: 440621
loss: 0.989824116230011,grad_norm: 0.7563709469721808, iteration: 440622
loss: 0.9974075555801392,grad_norm: 0.8875175982812713, iteration: 440623
loss: 1.0254815816879272,grad_norm: 0.7275612706062845, iteration: 440624
loss: 1.0315285921096802,grad_norm: 0.6273881099280558, iteration: 440625
loss: 1.0007234811782837,grad_norm: 0.8843312588216269, iteration: 440626
loss: 1.037702202796936,grad_norm: 0.7935174572299006, iteration: 440627
loss: 0.9959868788719177,grad_norm: 0.6333001891421985, iteration: 440628
loss: 0.9572386145591736,grad_norm: 0.8393120908602831, iteration: 440629
loss: 0.993770182132721,grad_norm: 0.7436356987514441, iteration: 440630
loss: 1.0422207117080688,grad_norm: 0.7483879086724835, iteration: 440631
loss: 1.0215483903884888,grad_norm: 0.7378143753816956, iteration: 440632
loss: 1.0089184045791626,grad_norm: 0.6659527304386128, iteration: 440633
loss: 0.996790885925293,grad_norm: 0.808950528748683, iteration: 440634
loss: 0.9995729327201843,grad_norm: 0.8674587353250297, iteration: 440635
loss: 0.9571932554244995,grad_norm: 0.9999991128987142, iteration: 440636
loss: 1.0002285242080688,grad_norm: 0.9419341843070922, iteration: 440637
loss: 1.0167298316955566,grad_norm: 0.9999993524207171, iteration: 440638
loss: 0.9931068420410156,grad_norm: 0.7355645622946949, iteration: 440639
loss: 1.0087697505950928,grad_norm: 0.8631562892347667, iteration: 440640
loss: 0.9820573925971985,grad_norm: 0.7615083980918121, iteration: 440641
loss: 1.019300103187561,grad_norm: 0.8217306239399067, iteration: 440642
loss: 1.1070588827133179,grad_norm: 0.9502850570644042, iteration: 440643
loss: 0.9865121245384216,grad_norm: 0.8248406890062889, iteration: 440644
loss: 1.0579878091812134,grad_norm: 0.9999990850993236, iteration: 440645
loss: 1.0131553411483765,grad_norm: 0.7869571102227839, iteration: 440646
loss: 1.0501172542572021,grad_norm: 0.9999996755812758, iteration: 440647
loss: 0.9610792994499207,grad_norm: 0.7168386896266056, iteration: 440648
loss: 0.9727033376693726,grad_norm: 0.9999991803274825, iteration: 440649
loss: 1.008623719215393,grad_norm: 0.8161648873888538, iteration: 440650
loss: 0.9620972871780396,grad_norm: 0.7246527473574313, iteration: 440651
loss: 1.0124690532684326,grad_norm: 0.6576505926783824, iteration: 440652
loss: 1.008585810661316,grad_norm: 0.709389358560637, iteration: 440653
loss: 1.0166162252426147,grad_norm: 0.749195516080848, iteration: 440654
loss: 0.969282329082489,grad_norm: 0.914867141537338, iteration: 440655
loss: 1.0006513595581055,grad_norm: 0.9754650860616373, iteration: 440656
loss: 0.9777414798736572,grad_norm: 0.8198598320488527, iteration: 440657
loss: 1.0339587926864624,grad_norm: 0.9999997160206164, iteration: 440658
loss: 1.0024170875549316,grad_norm: 0.6464631255554183, iteration: 440659
loss: 1.0038630962371826,grad_norm: 0.7919203672820235, iteration: 440660
loss: 1.0179100036621094,grad_norm: 0.8667745794375444, iteration: 440661
loss: 0.9472192525863647,grad_norm: 0.7645743455173402, iteration: 440662
loss: 0.9864664077758789,grad_norm: 0.7634276949966103, iteration: 440663
loss: 1.003157138824463,grad_norm: 0.9999997628769162, iteration: 440664
loss: 0.9749149084091187,grad_norm: 0.9999996387759506, iteration: 440665
loss: 0.975991427898407,grad_norm: 0.8536261248301655, iteration: 440666
loss: 0.9946275353431702,grad_norm: 0.7737899214031152, iteration: 440667
loss: 0.9993184804916382,grad_norm: 0.7135210937266643, iteration: 440668
loss: 1.0169581174850464,grad_norm: 0.9999995363725872, iteration: 440669
loss: 1.0192368030548096,grad_norm: 0.8105488402161825, iteration: 440670
loss: 1.0306220054626465,grad_norm: 0.8661116956581277, iteration: 440671
loss: 1.0088618993759155,grad_norm: 0.8448281383983965, iteration: 440672
loss: 0.9714297652244568,grad_norm: 0.8298181051901984, iteration: 440673
loss: 1.0057166814804077,grad_norm: 0.8213038197671302, iteration: 440674
loss: 1.0178282260894775,grad_norm: 0.638506918074828, iteration: 440675
loss: 1.0213136672973633,grad_norm: 0.7239008539176003, iteration: 440676
loss: 1.03519868850708,grad_norm: 0.9999993172383749, iteration: 440677
loss: 1.00923490524292,grad_norm: 0.6759482751294237, iteration: 440678
loss: 1.004516839981079,grad_norm: 0.7768563570961771, iteration: 440679
loss: 1.018180251121521,grad_norm: 0.9999989980043243, iteration: 440680
loss: 1.0144033432006836,grad_norm: 0.7251082771985323, iteration: 440681
loss: 1.0071393251419067,grad_norm: 0.9473432126216169, iteration: 440682
loss: 1.031341314315796,grad_norm: 0.9776924831742383, iteration: 440683
loss: 1.00166654586792,grad_norm: 0.8513972412167097, iteration: 440684
loss: 0.9741135835647583,grad_norm: 0.8879534994083853, iteration: 440685
loss: 1.0059274435043335,grad_norm: 0.953981833932473, iteration: 440686
loss: 1.0005313158035278,grad_norm: 0.8013978889812852, iteration: 440687
loss: 0.9891799092292786,grad_norm: 0.7849095625742946, iteration: 440688
loss: 1.0064152479171753,grad_norm: 0.8998933485894414, iteration: 440689
loss: 0.9972670078277588,grad_norm: 0.6750478714427374, iteration: 440690
loss: 1.0046557188034058,grad_norm: 0.7813203695852139, iteration: 440691
loss: 1.1207835674285889,grad_norm: 0.9124804958511273, iteration: 440692
loss: 0.9855312705039978,grad_norm: 0.7892184135297496, iteration: 440693
loss: 1.0618011951446533,grad_norm: 0.8306876929035036, iteration: 440694
loss: 1.0069516897201538,grad_norm: 0.6656214016208597, iteration: 440695
loss: 1.0254056453704834,grad_norm: 0.7162119738182968, iteration: 440696
loss: 0.9990489482879639,grad_norm: 0.7997003835845601, iteration: 440697
loss: 0.9905703067779541,grad_norm: 0.9999992026675619, iteration: 440698
loss: 0.9700050950050354,grad_norm: 0.7382212358051495, iteration: 440699
loss: 0.9634654521942139,grad_norm: 0.8409848770857056, iteration: 440700
loss: 0.9980552196502686,grad_norm: 0.7883287056474316, iteration: 440701
loss: 1.0332733392715454,grad_norm: 0.79147034037659, iteration: 440702
loss: 1.075176477432251,grad_norm: 0.8691154238751092, iteration: 440703
loss: 1.0130469799041748,grad_norm: 0.7968366550418776, iteration: 440704
loss: 0.9803408980369568,grad_norm: 0.7029733344829544, iteration: 440705
loss: 1.0332212448120117,grad_norm: 0.8107223104227544, iteration: 440706
loss: 0.9304918646812439,grad_norm: 0.729601217536522, iteration: 440707
loss: 1.024747610092163,grad_norm: 0.844741067233019, iteration: 440708
loss: 1.0089468955993652,grad_norm: 0.7449599542071891, iteration: 440709
loss: 0.9983544945716858,grad_norm: 0.8254618394009868, iteration: 440710
loss: 0.9672242403030396,grad_norm: 0.7399939281060017, iteration: 440711
loss: 1.0253808498382568,grad_norm: 0.9999994133086018, iteration: 440712
loss: 0.9942482709884644,grad_norm: 0.8298413263617517, iteration: 440713
loss: 1.01797616481781,grad_norm: 0.8177999455567182, iteration: 440714
loss: 1.0134775638580322,grad_norm: 0.8332380232098997, iteration: 440715
loss: 0.9980002641677856,grad_norm: 0.7063915448364383, iteration: 440716
loss: 0.9986319541931152,grad_norm: 0.7053355465695724, iteration: 440717
loss: 0.9971821904182434,grad_norm: 0.8303606967514848, iteration: 440718
loss: 0.9851102232933044,grad_norm: 0.7898790616521959, iteration: 440719
loss: 0.961600124835968,grad_norm: 0.7039571193005724, iteration: 440720
loss: 0.9801686406135559,grad_norm: 0.6994052932987131, iteration: 440721
loss: 0.9997064471244812,grad_norm: 0.7854987198514741, iteration: 440722
loss: 1.0125432014465332,grad_norm: 0.999999392835002, iteration: 440723
loss: 1.0772173404693604,grad_norm: 0.9196671674730452, iteration: 440724
loss: 1.0315717458724976,grad_norm: 0.9167891978906718, iteration: 440725
loss: 1.1076703071594238,grad_norm: 0.9999992029367896, iteration: 440726
loss: 1.0191372632980347,grad_norm: 0.7168809642121174, iteration: 440727
loss: 0.9671210646629333,grad_norm: 0.8298704307967305, iteration: 440728
loss: 1.056864857673645,grad_norm: 0.930600182145531, iteration: 440729
loss: 1.0497583150863647,grad_norm: 0.9030681299443055, iteration: 440730
loss: 0.9673073887825012,grad_norm: 0.7133302735295074, iteration: 440731
loss: 1.0038847923278809,grad_norm: 0.9318953085270725, iteration: 440732
loss: 0.969240128993988,grad_norm: 0.6598360242009076, iteration: 440733
loss: 1.0447274446487427,grad_norm: 0.7536898707892713, iteration: 440734
loss: 0.9954643249511719,grad_norm: 0.7245414251741271, iteration: 440735
loss: 1.146799087524414,grad_norm: 0.999999901377581, iteration: 440736
loss: 1.0374590158462524,grad_norm: 0.9737211812378891, iteration: 440737
loss: 1.0119324922561646,grad_norm: 0.9393781582509857, iteration: 440738
loss: 0.9902952313423157,grad_norm: 0.7329897916479134, iteration: 440739
loss: 1.0101927518844604,grad_norm: 0.7549364478083733, iteration: 440740
loss: 1.0141973495483398,grad_norm: 0.7829686351374977, iteration: 440741
loss: 1.0397590398788452,grad_norm: 0.9999996813132661, iteration: 440742
loss: 1.0238534212112427,grad_norm: 0.6286134069306069, iteration: 440743
loss: 0.9908733367919922,grad_norm: 0.825167469300441, iteration: 440744
loss: 0.991826057434082,grad_norm: 0.7476237641386586, iteration: 440745
loss: 1.0347868204116821,grad_norm: 0.7958924664081707, iteration: 440746
loss: 1.0170799493789673,grad_norm: 0.7404391647055707, iteration: 440747
loss: 1.0287407636642456,grad_norm: 0.7158311987042679, iteration: 440748
loss: 0.9976034760475159,grad_norm: 0.9015438153818608, iteration: 440749
loss: 1.0492258071899414,grad_norm: 0.999999085610776, iteration: 440750
loss: 1.0560362339019775,grad_norm: 0.999999189344894, iteration: 440751
loss: 0.9872596263885498,grad_norm: 0.8961227353168937, iteration: 440752
loss: 0.9895187616348267,grad_norm: 0.8278287750376259, iteration: 440753
loss: 0.9987049698829651,grad_norm: 0.7481499514751554, iteration: 440754
loss: 1.0190180540084839,grad_norm: 0.9835495369452113, iteration: 440755
loss: 1.011110782623291,grad_norm: 0.7955098867325892, iteration: 440756
loss: 1.013328194618225,grad_norm: 0.9999996694651972, iteration: 440757
loss: 1.0000646114349365,grad_norm: 0.7076195089038021, iteration: 440758
loss: 1.0039470195770264,grad_norm: 0.9999990417562858, iteration: 440759
loss: 0.9983486533164978,grad_norm: 0.6949881705807652, iteration: 440760
loss: 1.0048907995224,grad_norm: 0.7158901058814451, iteration: 440761
loss: 1.0148248672485352,grad_norm: 0.7604006128396483, iteration: 440762
loss: 1.0186049938201904,grad_norm: 0.8525369207483444, iteration: 440763
loss: 0.985683798789978,grad_norm: 0.7431548491935256, iteration: 440764
loss: 0.9525839686393738,grad_norm: 0.9919038638020239, iteration: 440765
loss: 0.9751316905021667,grad_norm: 0.6565356503469022, iteration: 440766
loss: 0.9742710590362549,grad_norm: 0.9316030648261137, iteration: 440767
loss: 1.0862725973129272,grad_norm: 0.999999312004799, iteration: 440768
loss: 1.032437801361084,grad_norm: 0.927058701226795, iteration: 440769
loss: 1.0212397575378418,grad_norm: 0.7554062455338421, iteration: 440770
loss: 1.0282936096191406,grad_norm: 0.6361086193565523, iteration: 440771
loss: 1.0707714557647705,grad_norm: 0.999999913400466, iteration: 440772
loss: 0.9907341003417969,grad_norm: 0.7821798538981884, iteration: 440773
loss: 0.9884024262428284,grad_norm: 0.6571192247883916, iteration: 440774
loss: 0.9965270161628723,grad_norm: 0.9116284590411744, iteration: 440775
loss: 1.0987478494644165,grad_norm: 0.9999992195659406, iteration: 440776
loss: 1.0165029764175415,grad_norm: 0.950119707310846, iteration: 440777
loss: 0.9946619868278503,grad_norm: 0.7867901781258239, iteration: 440778
loss: 1.0049399137496948,grad_norm: 0.7964746956710177, iteration: 440779
loss: 0.9967789053916931,grad_norm: 0.9999993738966968, iteration: 440780
loss: 0.9738935232162476,grad_norm: 0.6977856228131551, iteration: 440781
loss: 0.9565793871879578,grad_norm: 0.7462716506613396, iteration: 440782
loss: 1.0375357866287231,grad_norm: 0.963882790806621, iteration: 440783
loss: 1.0120351314544678,grad_norm: 0.9999990831072759, iteration: 440784
loss: 1.0042948722839355,grad_norm: 0.7095963358639225, iteration: 440785
loss: 1.0310888290405273,grad_norm: 0.7795835978099249, iteration: 440786
loss: 1.0126274824142456,grad_norm: 0.6366408003919971, iteration: 440787
loss: 0.9844461679458618,grad_norm: 0.9999993082886572, iteration: 440788
loss: 0.9987221360206604,grad_norm: 0.8274930960121905, iteration: 440789
loss: 0.9877905249595642,grad_norm: 0.9406497519261955, iteration: 440790
loss: 1.0032259225845337,grad_norm: 0.9999991717427704, iteration: 440791
loss: 1.043778419494629,grad_norm: 0.9323794839929139, iteration: 440792
loss: 0.9974275827407837,grad_norm: 0.7645205163864713, iteration: 440793
loss: 0.9815158843994141,grad_norm: 0.938544933279428, iteration: 440794
loss: 1.0133135318756104,grad_norm: 0.7847918521918577, iteration: 440795
loss: 1.0564754009246826,grad_norm: 0.9834743648674741, iteration: 440796
loss: 0.9907435774803162,grad_norm: 0.7824514414600221, iteration: 440797
loss: 0.9834730625152588,grad_norm: 0.9347500989896049, iteration: 440798
loss: 1.0090751647949219,grad_norm: 0.7250387293627519, iteration: 440799
loss: 0.9753494262695312,grad_norm: 0.7935298593365062, iteration: 440800
loss: 0.992280900478363,grad_norm: 0.7589168104344629, iteration: 440801
loss: 0.9803479909896851,grad_norm: 0.7711641790502428, iteration: 440802
loss: 0.9642060995101929,grad_norm: 0.8917344036031587, iteration: 440803
loss: 0.9974648356437683,grad_norm: 0.8139157934674937, iteration: 440804
loss: 0.9773240089416504,grad_norm: 0.8504818376569288, iteration: 440805
loss: 1.0866873264312744,grad_norm: 0.9999993029829058, iteration: 440806
loss: 0.9967522621154785,grad_norm: 0.8970876214588269, iteration: 440807
loss: 1.0086525678634644,grad_norm: 0.7405585202920606, iteration: 440808
loss: 0.9842629432678223,grad_norm: 0.7300111651149586, iteration: 440809
loss: 0.9854995012283325,grad_norm: 0.8600075852790756, iteration: 440810
loss: 0.9712041616439819,grad_norm: 0.8261652524031414, iteration: 440811
loss: 0.9935742020606995,grad_norm: 0.7830363861295784, iteration: 440812
loss: 1.0151071548461914,grad_norm: 0.7381518320462364, iteration: 440813
loss: 1.006203532218933,grad_norm: 0.5794780405484244, iteration: 440814
loss: 0.9705908894538879,grad_norm: 0.8628050754402481, iteration: 440815
loss: 0.9694972634315491,grad_norm: 0.7418142887169911, iteration: 440816
loss: 0.9938327670097351,grad_norm: 0.7361731553157007, iteration: 440817
loss: 0.9852008819580078,grad_norm: 0.9999992503677038, iteration: 440818
loss: 1.0195765495300293,grad_norm: 0.7013002736902296, iteration: 440819
loss: 0.9912782907485962,grad_norm: 0.7525833239185535, iteration: 440820
loss: 1.0459345579147339,grad_norm: 0.8983342421951941, iteration: 440821
loss: 0.9795065522193909,grad_norm: 0.8936468375573753, iteration: 440822
loss: 1.0664620399475098,grad_norm: 0.9967180845992585, iteration: 440823
loss: 1.0508862733840942,grad_norm: 0.9999993407529734, iteration: 440824
loss: 1.0032926797866821,grad_norm: 0.7111449916377032, iteration: 440825
loss: 1.033913493156433,grad_norm: 0.9999990559982291, iteration: 440826
loss: 0.9883096218109131,grad_norm: 0.9999993612546946, iteration: 440827
loss: 1.0135940313339233,grad_norm: 0.8434710874859007, iteration: 440828
loss: 1.0402579307556152,grad_norm: 0.9406051180407933, iteration: 440829
loss: 0.9842749238014221,grad_norm: 0.7875606733897101, iteration: 440830
loss: 0.9884457588195801,grad_norm: 0.8364356318183557, iteration: 440831
loss: 0.975919246673584,grad_norm: 0.7154263414126945, iteration: 440832
loss: 1.2010177373886108,grad_norm: 0.9353604489359657, iteration: 440833
loss: 0.9854103922843933,grad_norm: 0.8175432772441844, iteration: 440834
loss: 1.0542012453079224,grad_norm: 0.7850585982093362, iteration: 440835
loss: 1.0348507165908813,grad_norm: 0.7629414836797999, iteration: 440836
loss: 0.9632202386856079,grad_norm: 0.7734498552731001, iteration: 440837
loss: 0.9853982925415039,grad_norm: 0.8328088652739651, iteration: 440838
loss: 1.008955478668213,grad_norm: 0.9985018744390226, iteration: 440839
loss: 1.0182439088821411,grad_norm: 0.7641781686302802, iteration: 440840
loss: 0.999144434928894,grad_norm: 0.8632800216146688, iteration: 440841
loss: 1.0139436721801758,grad_norm: 0.7913242994282534, iteration: 440842
loss: 0.9996601939201355,grad_norm: 0.7536682558770607, iteration: 440843
loss: 1.022447109222412,grad_norm: 0.6996994772355714, iteration: 440844
loss: 1.0178905725479126,grad_norm: 0.8946897208992395, iteration: 440845
loss: 1.033461570739746,grad_norm: 0.8420888791370742, iteration: 440846
loss: 1.0135976076126099,grad_norm: 0.6993359856313998, iteration: 440847
loss: 0.9614221453666687,grad_norm: 0.7860441878299181, iteration: 440848
loss: 0.9953828454017639,grad_norm: 0.7998599342771929, iteration: 440849
loss: 0.9973659515380859,grad_norm: 0.8469935811384732, iteration: 440850
loss: 1.0025980472564697,grad_norm: 0.8367622000425512, iteration: 440851
loss: 0.9996508359909058,grad_norm: 0.999999112188892, iteration: 440852
loss: 0.9751041531562805,grad_norm: 0.8030340381436943, iteration: 440853
loss: 1.0079870223999023,grad_norm: 0.8365268358649358, iteration: 440854
loss: 1.0954439640045166,grad_norm: 0.9999992663199835, iteration: 440855
loss: 0.9939621090888977,grad_norm: 0.9053087222989948, iteration: 440856
loss: 1.055476427078247,grad_norm: 0.9999998212119718, iteration: 440857
loss: 0.9937742352485657,grad_norm: 0.8660088869790469, iteration: 440858
loss: 0.9628489017486572,grad_norm: 0.8552535715239555, iteration: 440859
loss: 1.0042963027954102,grad_norm: 0.7424877493581299, iteration: 440860
loss: 0.9924408197402954,grad_norm: 0.9050743310495231, iteration: 440861
loss: 1.0267674922943115,grad_norm: 0.9999993105847708, iteration: 440862
loss: 1.0023831129074097,grad_norm: 0.8765000645097806, iteration: 440863
loss: 1.0126749277114868,grad_norm: 0.6982671290402392, iteration: 440864
loss: 1.0040847063064575,grad_norm: 0.7038289233210668, iteration: 440865
loss: 1.0086878538131714,grad_norm: 0.7557100707359369, iteration: 440866
loss: 1.0014190673828125,grad_norm: 0.9175972198168186, iteration: 440867
loss: 1.0008705854415894,grad_norm: 0.7454902497804534, iteration: 440868
loss: 1.012917399406433,grad_norm: 0.7957732994385932, iteration: 440869
loss: 0.9640596508979797,grad_norm: 0.7748414989032997, iteration: 440870
loss: 0.9967106580734253,grad_norm: 0.7397829709093598, iteration: 440871
loss: 0.9542350172996521,grad_norm: 0.7787447579118308, iteration: 440872
loss: 0.9760639667510986,grad_norm: 0.8151710440523325, iteration: 440873
loss: 1.0011869668960571,grad_norm: 0.7685469596206763, iteration: 440874
loss: 0.9672927856445312,grad_norm: 0.7386976324661709, iteration: 440875
loss: 1.0089560747146606,grad_norm: 0.7927684099879362, iteration: 440876
loss: 1.0347847938537598,grad_norm: 0.6845603134954664, iteration: 440877
loss: 1.002777099609375,grad_norm: 0.672417690131025, iteration: 440878
loss: 0.9676359295845032,grad_norm: 0.8515475306637901, iteration: 440879
loss: 0.9963531494140625,grad_norm: 0.7220127020153823, iteration: 440880
loss: 1.001873254776001,grad_norm: 0.8847278818934244, iteration: 440881
loss: 0.9965502023696899,grad_norm: 0.9999992606407268, iteration: 440882
loss: 0.9580679535865784,grad_norm: 0.7494517974386024, iteration: 440883
loss: 1.0310442447662354,grad_norm: 0.9999996366199918, iteration: 440884
loss: 0.9912405610084534,grad_norm: 0.739613244958746, iteration: 440885
loss: 0.9854018092155457,grad_norm: 0.7443252626184119, iteration: 440886
loss: 1.0032122135162354,grad_norm: 0.844158353471545, iteration: 440887
loss: 1.0131651163101196,grad_norm: 0.7372219197929237, iteration: 440888
loss: 1.0107989311218262,grad_norm: 0.9552525427501354, iteration: 440889
loss: 1.006731629371643,grad_norm: 0.7329845835146436, iteration: 440890
loss: 1.0199182033538818,grad_norm: 0.7098740188142242, iteration: 440891
loss: 0.9726933836936951,grad_norm: 0.722512147870268, iteration: 440892
loss: 1.0077836513519287,grad_norm: 0.8517366500297027, iteration: 440893
loss: 1.0759135484695435,grad_norm: 0.8324677271317729, iteration: 440894
loss: 1.0137531757354736,grad_norm: 0.9999998781041322, iteration: 440895
loss: 1.0114024877548218,grad_norm: 0.8242058506669921, iteration: 440896
loss: 1.0100607872009277,grad_norm: 0.7136252555442439, iteration: 440897
loss: 1.0245496034622192,grad_norm: 0.8006781471958575, iteration: 440898
loss: 0.9848995804786682,grad_norm: 0.8055440800449004, iteration: 440899
loss: 1.0118072032928467,grad_norm: 0.8783923716769364, iteration: 440900
loss: 0.9631805419921875,grad_norm: 0.6381486242146404, iteration: 440901
loss: 1.0299549102783203,grad_norm: 0.7120405894331507, iteration: 440902
loss: 0.9905683398246765,grad_norm: 0.8158158213555646, iteration: 440903
loss: 0.985938310623169,grad_norm: 0.8775104685902357, iteration: 440904
loss: 1.0235326290130615,grad_norm: 0.7847909648080187, iteration: 440905
loss: 1.0083979368209839,grad_norm: 0.8472784332774531, iteration: 440906
loss: 0.9793014526367188,grad_norm: 0.8036029755264411, iteration: 440907
loss: 0.986786425113678,grad_norm: 0.7740383998855088, iteration: 440908
loss: 0.9923616647720337,grad_norm: 0.7315922144499916, iteration: 440909
loss: 0.9792131185531616,grad_norm: 0.8608089988725776, iteration: 440910
loss: 0.9812140464782715,grad_norm: 0.9999991936138091, iteration: 440911
loss: 0.9827862977981567,grad_norm: 0.999999393667209, iteration: 440912
loss: 1.019590139389038,grad_norm: 0.8496508852674465, iteration: 440913
loss: 0.9893876910209656,grad_norm: 0.7183593005580741, iteration: 440914
loss: 1.1331026554107666,grad_norm: 0.999998963464971, iteration: 440915
loss: 0.9897170662879944,grad_norm: 0.8365793095119177, iteration: 440916
loss: 0.9936094880104065,grad_norm: 0.6926697967871807, iteration: 440917
loss: 0.9950212240219116,grad_norm: 0.6801421368512716, iteration: 440918
loss: 0.9715039730072021,grad_norm: 0.7439975323360158, iteration: 440919
loss: 0.9965380430221558,grad_norm: 0.6785357079186848, iteration: 440920
loss: 1.0375839471817017,grad_norm: 0.7534456776434498, iteration: 440921
loss: 0.9680580496788025,grad_norm: 0.8528548570018268, iteration: 440922
loss: 1.0618679523468018,grad_norm: 0.8146549717176595, iteration: 440923
loss: 1.0134148597717285,grad_norm: 0.8145487446323796, iteration: 440924
loss: 0.9703924059867859,grad_norm: 0.7847674094424142, iteration: 440925
loss: 0.9903924465179443,grad_norm: 0.6482971905547649, iteration: 440926
loss: 0.9695557355880737,grad_norm: 0.751087538021504, iteration: 440927
loss: 1.0066012144088745,grad_norm: 0.7345032791878595, iteration: 440928
loss: 1.026167869567871,grad_norm: 0.9999991388185344, iteration: 440929
loss: 0.9767308831214905,grad_norm: 0.8531223806779734, iteration: 440930
loss: 0.9701250791549683,grad_norm: 0.7021784635738669, iteration: 440931
loss: 1.0040274858474731,grad_norm: 0.8039001872707396, iteration: 440932
loss: 0.9904411435127258,grad_norm: 0.7324511460470514, iteration: 440933
loss: 0.9727956652641296,grad_norm: 0.8066051002105649, iteration: 440934
loss: 0.9931397438049316,grad_norm: 0.7561224138130614, iteration: 440935
loss: 1.0894622802734375,grad_norm: 0.9999993338210683, iteration: 440936
loss: 1.016827940940857,grad_norm: 0.7846268117119756, iteration: 440937
loss: 1.0254648923873901,grad_norm: 0.8577619766370898, iteration: 440938
loss: 0.995866596698761,grad_norm: 0.6784970217165881, iteration: 440939
loss: 0.9714277386665344,grad_norm: 0.8766265167604715, iteration: 440940
loss: 1.0655837059020996,grad_norm: 0.9001508106358541, iteration: 440941
loss: 0.9870761036872864,grad_norm: 0.7401141713016275, iteration: 440942
loss: 0.9724496006965637,grad_norm: 0.7324129385531216, iteration: 440943
loss: 1.0136380195617676,grad_norm: 0.7564706816477756, iteration: 440944
loss: 0.9990895986557007,grad_norm: 0.8700258493667363, iteration: 440945
loss: 1.0118378400802612,grad_norm: 0.7090134814489348, iteration: 440946
loss: 1.0199272632598877,grad_norm: 0.7564156351851542, iteration: 440947
loss: 1.0193538665771484,grad_norm: 0.8784042099083769, iteration: 440948
loss: 1.0034359693527222,grad_norm: 0.692861415656713, iteration: 440949
loss: 1.014773964881897,grad_norm: 0.8501561585038572, iteration: 440950
loss: 0.9854294657707214,grad_norm: 0.8682946560489443, iteration: 440951
loss: 0.9548060894012451,grad_norm: 0.9337321002044416, iteration: 440952
loss: 1.0265289545059204,grad_norm: 0.9999998763350775, iteration: 440953
loss: 0.9621060490608215,grad_norm: 0.9776679868903919, iteration: 440954
loss: 0.9858222603797913,grad_norm: 0.7498108611709728, iteration: 440955
loss: 1.019083023071289,grad_norm: 0.7251456844726522, iteration: 440956
loss: 0.9781273603439331,grad_norm: 0.8389652626993247, iteration: 440957
loss: 1.017170786857605,grad_norm: 0.8265272668933892, iteration: 440958
loss: 1.0133042335510254,grad_norm: 0.6363918209101135, iteration: 440959
loss: 1.071986198425293,grad_norm: 0.9999997250664427, iteration: 440960
loss: 0.9749533534049988,grad_norm: 0.7507773335056738, iteration: 440961
loss: 1.0101003646850586,grad_norm: 0.7611556317972942, iteration: 440962
loss: 1.0139936208724976,grad_norm: 0.7566680570642959, iteration: 440963
loss: 0.9769451022148132,grad_norm: 0.8149011238713373, iteration: 440964
loss: 0.977555513381958,grad_norm: 0.7570622872178389, iteration: 440965
loss: 0.9647948741912842,grad_norm: 0.7918592132163741, iteration: 440966
loss: 1.0153659582138062,grad_norm: 0.9543815740932631, iteration: 440967
loss: 1.0191847085952759,grad_norm: 0.763359774233113, iteration: 440968
loss: 1.052987813949585,grad_norm: 0.9477204435982498, iteration: 440969
loss: 1.0022412538528442,grad_norm: 0.8796187667243167, iteration: 440970
loss: 1.0465764999389648,grad_norm: 0.8586683336561296, iteration: 440971
loss: 0.9750784635543823,grad_norm: 0.7079201679569428, iteration: 440972
loss: 0.9925729632377625,grad_norm: 0.8093769860232343, iteration: 440973
loss: 0.9572162628173828,grad_norm: 0.7245821583596426, iteration: 440974
loss: 0.9855989813804626,grad_norm: 0.650537181672837, iteration: 440975
loss: 1.0043505430221558,grad_norm: 0.9999989896299822, iteration: 440976
loss: 1.0412874221801758,grad_norm: 0.9999995776168408, iteration: 440977
loss: 0.9736211895942688,grad_norm: 0.8625294873606807, iteration: 440978
loss: 0.988565981388092,grad_norm: 0.8334874363770217, iteration: 440979
loss: 1.0055618286132812,grad_norm: 0.8293870628063977, iteration: 440980
loss: 0.9838341474533081,grad_norm: 0.706597843282291, iteration: 440981
loss: 1.0598737001419067,grad_norm: 0.9999992702164135, iteration: 440982
loss: 0.9584194421768188,grad_norm: 0.8599451179898363, iteration: 440983
loss: 1.01565420627594,grad_norm: 0.8204641030690313, iteration: 440984
loss: 0.9764328598976135,grad_norm: 0.760032441387948, iteration: 440985
loss: 1.029708743095398,grad_norm: 0.7912815403267501, iteration: 440986
loss: 1.018004298210144,grad_norm: 0.9103697134750597, iteration: 440987
loss: 0.9888607263565063,grad_norm: 0.829661661455157, iteration: 440988
loss: 1.0175968408584595,grad_norm: 0.7490562474105892, iteration: 440989
loss: 0.9941434264183044,grad_norm: 0.9068868324478941, iteration: 440990
loss: 1.0390987396240234,grad_norm: 0.9999999242404305, iteration: 440991
loss: 0.9943391680717468,grad_norm: 0.7035074284952191, iteration: 440992
loss: 1.0100505352020264,grad_norm: 0.7257188953395144, iteration: 440993
loss: 1.0191552639007568,grad_norm: 0.999999519161378, iteration: 440994
loss: 0.9703674912452698,grad_norm: 0.7274657926509834, iteration: 440995
loss: 1.078437328338623,grad_norm: 0.9070605782339892, iteration: 440996
loss: 1.0131183862686157,grad_norm: 0.7309026502696346, iteration: 440997
loss: 0.9985206127166748,grad_norm: 0.6255548681851623, iteration: 440998
loss: 0.9970057010650635,grad_norm: 0.7327097873548634, iteration: 440999
loss: 1.0337159633636475,grad_norm: 0.9999995812214728, iteration: 441000
loss: 1.0055934190750122,grad_norm: 0.8199244312647709, iteration: 441001
loss: 1.0113434791564941,grad_norm: 0.8205828128477621, iteration: 441002
loss: 1.0489798784255981,grad_norm: 0.9999993507178427, iteration: 441003
loss: 1.0143605470657349,grad_norm: 0.6660723828233531, iteration: 441004
loss: 1.0496705770492554,grad_norm: 0.9189812793364612, iteration: 441005
loss: 0.9929640889167786,grad_norm: 0.7443212249261812, iteration: 441006
loss: 1.003778100013733,grad_norm: 0.834050092809253, iteration: 441007
loss: 1.0774388313293457,grad_norm: 0.9863982259689402, iteration: 441008
loss: 0.9880031943321228,grad_norm: 0.727700073138731, iteration: 441009
loss: 1.0904185771942139,grad_norm: 0.9999990457843839, iteration: 441010
loss: 1.0096209049224854,grad_norm: 0.7962463423314652, iteration: 441011
loss: 1.008970856666565,grad_norm: 0.8813256426743129, iteration: 441012
loss: 0.9708060622215271,grad_norm: 0.9999998228745465, iteration: 441013
loss: 0.9837698340415955,grad_norm: 0.7485184081804632, iteration: 441014
loss: 0.9550453424453735,grad_norm: 0.8634449663198763, iteration: 441015
loss: 1.072664737701416,grad_norm: 0.9999993078510573, iteration: 441016
loss: 0.9701717495918274,grad_norm: 0.7779279283916201, iteration: 441017
loss: 1.0187000036239624,grad_norm: 0.8514704437433578, iteration: 441018
loss: 0.9771298170089722,grad_norm: 0.782872282688571, iteration: 441019
loss: 1.0034493207931519,grad_norm: 0.999999145365338, iteration: 441020
loss: 1.012613296508789,grad_norm: 0.697404182949308, iteration: 441021
loss: 1.0035382509231567,grad_norm: 0.9077942383471083, iteration: 441022
loss: 0.9884633421897888,grad_norm: 0.7411231240432444, iteration: 441023
loss: 0.983794629573822,grad_norm: 0.7989537331557799, iteration: 441024
loss: 0.9686050415039062,grad_norm: 0.9090910062980673, iteration: 441025
loss: 1.0113630294799805,grad_norm: 0.691372353119947, iteration: 441026
loss: 0.9918294548988342,grad_norm: 0.7762986773784323, iteration: 441027
loss: 1.0722408294677734,grad_norm: 0.9999992484988499, iteration: 441028
loss: 0.9847568869590759,grad_norm: 0.6822745774207568, iteration: 441029
loss: 0.9640216827392578,grad_norm: 0.7953292235417381, iteration: 441030
loss: 0.9822192788124084,grad_norm: 0.7398475729696823, iteration: 441031
loss: 0.9997348785400391,grad_norm: 0.9460879008678942, iteration: 441032
loss: 1.0018739700317383,grad_norm: 0.7639678400509827, iteration: 441033
loss: 0.9850183725357056,grad_norm: 0.9999991381127766, iteration: 441034
loss: 1.0505245923995972,grad_norm: 0.9999990491625991, iteration: 441035
loss: 0.9751661419868469,grad_norm: 0.9291115664962671, iteration: 441036
loss: 0.9687362909317017,grad_norm: 0.9185061751802751, iteration: 441037
loss: 1.0085415840148926,grad_norm: 0.7688219817806629, iteration: 441038
loss: 1.0244792699813843,grad_norm: 0.8820234375811601, iteration: 441039
loss: 0.999069333076477,grad_norm: 0.805075138277476, iteration: 441040
loss: 1.0237942934036255,grad_norm: 0.8075136738688097, iteration: 441041
loss: 0.9710981845855713,grad_norm: 0.74146448304834, iteration: 441042
loss: 1.002612590789795,grad_norm: 0.7515813317629835, iteration: 441043
loss: 0.9892935752868652,grad_norm: 0.7488773932616609, iteration: 441044
loss: 1.0201261043548584,grad_norm: 0.7541912667659771, iteration: 441045
loss: 0.9727346897125244,grad_norm: 0.8027144333193458, iteration: 441046
loss: 1.0070757865905762,grad_norm: 0.7632918987426569, iteration: 441047
loss: 1.1368659734725952,grad_norm: 0.9999998480478991, iteration: 441048
loss: 1.0136018991470337,grad_norm: 0.718398490711632, iteration: 441049
loss: 1.0193538665771484,grad_norm: 0.7916922450545153, iteration: 441050
loss: 1.018639087677002,grad_norm: 0.7429833890646456, iteration: 441051
loss: 1.0619124174118042,grad_norm: 0.9354293402043691, iteration: 441052
loss: 1.002780795097351,grad_norm: 0.7282661581567403, iteration: 441053
loss: 0.9734229445457458,grad_norm: 0.6228001303335362, iteration: 441054
loss: 1.004773736000061,grad_norm: 0.8636530125746175, iteration: 441055
loss: 0.9817321300506592,grad_norm: 0.7431253309355939, iteration: 441056
loss: 1.1954468488693237,grad_norm: 1.0000000112319958, iteration: 441057
loss: 0.9883372783660889,grad_norm: 0.8981899195198677, iteration: 441058
loss: 1.0005310773849487,grad_norm: 0.6851283415583016, iteration: 441059
loss: 1.001484990119934,grad_norm: 0.9668465589176307, iteration: 441060
loss: 1.026307463645935,grad_norm: 0.9282293792086204, iteration: 441061
loss: 1.007238507270813,grad_norm: 0.924744209146302, iteration: 441062
loss: 1.0325206518173218,grad_norm: 0.9999990313935759, iteration: 441063
loss: 0.99712735414505,grad_norm: 0.6866751409123416, iteration: 441064
loss: 1.0180068016052246,grad_norm: 0.7340886658702674, iteration: 441065
loss: 0.9839456677436829,grad_norm: 0.7750743916362867, iteration: 441066
loss: 1.0307458639144897,grad_norm: 0.8384020550401559, iteration: 441067
loss: 0.9592315554618835,grad_norm: 0.8005501503683471, iteration: 441068
loss: 1.0117186307907104,grad_norm: 0.7175579474464576, iteration: 441069
loss: 1.0190844535827637,grad_norm: 0.8681028426109061, iteration: 441070
loss: 0.9872027635574341,grad_norm: 0.861572622632338, iteration: 441071
loss: 0.977135181427002,grad_norm: 0.8867844112658383, iteration: 441072
loss: 1.0494697093963623,grad_norm: 0.7935513498685518, iteration: 441073
loss: 1.0043220520019531,grad_norm: 0.7134201939404736, iteration: 441074
loss: 0.9772018790245056,grad_norm: 0.8778299559618123, iteration: 441075
loss: 1.0207326412200928,grad_norm: 0.7245177860269081, iteration: 441076
loss: 0.9877965450286865,grad_norm: 0.6906346386831291, iteration: 441077
loss: 1.014212727546692,grad_norm: 0.7747278764870266, iteration: 441078
loss: 1.0072269439697266,grad_norm: 0.7613762847098609, iteration: 441079
loss: 1.0162030458450317,grad_norm: 0.8772179407375231, iteration: 441080
loss: 0.9703088998794556,grad_norm: 0.8737698512254396, iteration: 441081
loss: 1.0139797925949097,grad_norm: 0.740210616913959, iteration: 441082
loss: 1.011942982673645,grad_norm: 0.6587916459412562, iteration: 441083
loss: 1.0202913284301758,grad_norm: 0.7577729492125514, iteration: 441084
loss: 0.9462809562683105,grad_norm: 0.7137977030164466, iteration: 441085
loss: 1.0053614377975464,grad_norm: 0.9362054996262416, iteration: 441086
loss: 1.0396615266799927,grad_norm: 0.7164037497229451, iteration: 441087
loss: 1.0254156589508057,grad_norm: 0.9668424132928307, iteration: 441088
loss: 0.9527164697647095,grad_norm: 0.7046848583983831, iteration: 441089
loss: 1.0752025842666626,grad_norm: 0.9999996052449496, iteration: 441090
loss: 1.0121898651123047,grad_norm: 0.8275571165546185, iteration: 441091
loss: 1.0120774507522583,grad_norm: 0.795464991644487, iteration: 441092
loss: 1.015679955482483,grad_norm: 0.7240257194975155, iteration: 441093
loss: 0.9795851707458496,grad_norm: 0.8459454895295402, iteration: 441094
loss: 1.0021973848342896,grad_norm: 0.821170148696747, iteration: 441095
loss: 0.9594815373420715,grad_norm: 0.8220409611881323, iteration: 441096
loss: 0.9629672765731812,grad_norm: 0.7971919071688178, iteration: 441097
loss: 1.0096476078033447,grad_norm: 0.9999992039370786, iteration: 441098
loss: 1.0348180532455444,grad_norm: 0.8162803498937656, iteration: 441099
loss: 1.044329047203064,grad_norm: 0.8784822696778376, iteration: 441100
loss: 1.012473464012146,grad_norm: 0.7525335952292241, iteration: 441101
loss: 1.0235122442245483,grad_norm: 0.7043270865854454, iteration: 441102
loss: 1.0486550331115723,grad_norm: 0.813115159793914, iteration: 441103
loss: 1.1520476341247559,grad_norm: 0.9999995395912387, iteration: 441104
loss: 0.9990917444229126,grad_norm: 0.6389891111553107, iteration: 441105
loss: 1.0348854064941406,grad_norm: 0.9995069536941703, iteration: 441106
loss: 1.0266848802566528,grad_norm: 0.9999996873008181, iteration: 441107
loss: 1.0172719955444336,grad_norm: 0.8087633856365032, iteration: 441108
loss: 1.0006436109542847,grad_norm: 0.8715093645603472, iteration: 441109
loss: 1.0555901527404785,grad_norm: 0.9999994122185802, iteration: 441110
loss: 1.0287730693817139,grad_norm: 0.9999992840268435, iteration: 441111
loss: 0.9926586151123047,grad_norm: 0.8895627895050647, iteration: 441112
loss: 0.9769968390464783,grad_norm: 0.8211891361143654, iteration: 441113
loss: 1.0008293390274048,grad_norm: 0.8072468022111724, iteration: 441114
loss: 0.9868630766868591,grad_norm: 0.789581768600464, iteration: 441115
loss: 0.9739663004875183,grad_norm: 0.80307276127354, iteration: 441116
loss: 1.024167776107788,grad_norm: 0.9999990795678718, iteration: 441117
loss: 0.996383011341095,grad_norm: 0.6875873044775989, iteration: 441118
loss: 0.9668263792991638,grad_norm: 0.7861581986922112, iteration: 441119
loss: 1.015832781791687,grad_norm: 0.6776559931303439, iteration: 441120
loss: 1.0235909223556519,grad_norm: 0.6892656836655705, iteration: 441121
loss: 1.0078688859939575,grad_norm: 0.8599376593939398, iteration: 441122
loss: 1.0111172199249268,grad_norm: 0.6725344829434114, iteration: 441123
loss: 0.9855226874351501,grad_norm: 0.7538700820609072, iteration: 441124
loss: 0.9900814294815063,grad_norm: 0.9307535250633168, iteration: 441125
loss: 1.0202765464782715,grad_norm: 0.9999995230274747, iteration: 441126
loss: 0.9910839200019836,grad_norm: 0.7661829001104418, iteration: 441127
loss: 1.0229690074920654,grad_norm: 0.6348833897705463, iteration: 441128
loss: 0.9895141124725342,grad_norm: 0.9999996968521844, iteration: 441129
loss: 0.9454814791679382,grad_norm: 0.8029273264057377, iteration: 441130
loss: 0.9753444194793701,grad_norm: 0.696031811951229, iteration: 441131
loss: 0.9543946385383606,grad_norm: 0.7051784680508325, iteration: 441132
loss: 0.9769171476364136,grad_norm: 0.8800679956132706, iteration: 441133
loss: 1.0174155235290527,grad_norm: 0.7863548006620726, iteration: 441134
loss: 1.0167721509933472,grad_norm: 0.7341402759931008, iteration: 441135
loss: 0.9557211995124817,grad_norm: 0.6904216175731405, iteration: 441136
loss: 1.0437350273132324,grad_norm: 0.8849532740196835, iteration: 441137
loss: 1.0042048692703247,grad_norm: 0.844676181360066, iteration: 441138
loss: 0.9951158761978149,grad_norm: 0.9087223859525485, iteration: 441139
loss: 1.0660457611083984,grad_norm: 1.000000005360251, iteration: 441140
loss: 0.9883139133453369,grad_norm: 0.9999994360277515, iteration: 441141
loss: 1.0293290615081787,grad_norm: 0.7540669072907767, iteration: 441142
loss: 1.0027503967285156,grad_norm: 0.8700056809240992, iteration: 441143
loss: 0.994607150554657,grad_norm: 0.7913529741721107, iteration: 441144
loss: 1.0031906366348267,grad_norm: 0.8609971674613448, iteration: 441145
loss: 0.9777594208717346,grad_norm: 0.824486308133585, iteration: 441146
loss: 0.977304995059967,grad_norm: 0.6668300791203039, iteration: 441147
loss: 0.9758532047271729,grad_norm: 0.6834271791042645, iteration: 441148
loss: 0.987233579158783,grad_norm: 0.7723139011913742, iteration: 441149
loss: 0.9908788800239563,grad_norm: 0.9447703967442509, iteration: 441150
loss: 0.9844622015953064,grad_norm: 0.6453050120410396, iteration: 441151
loss: 1.059496521949768,grad_norm: 0.9999996347611259, iteration: 441152
loss: 1.0636866092681885,grad_norm: 0.8773777325850093, iteration: 441153
loss: 1.0234711170196533,grad_norm: 0.9374717688159511, iteration: 441154
loss: 1.002672791481018,grad_norm: 0.5631511712554574, iteration: 441155
loss: 1.0084768533706665,grad_norm: 0.7345078516717577, iteration: 441156
loss: 1.1091469526290894,grad_norm: 0.7976058818823732, iteration: 441157
loss: 1.0217640399932861,grad_norm: 0.7698645122191938, iteration: 441158
loss: 0.9888529181480408,grad_norm: 0.7938991271047935, iteration: 441159
loss: 0.9564501643180847,grad_norm: 0.7766874279338283, iteration: 441160
loss: 1.0618280172348022,grad_norm: 0.9999996467042862, iteration: 441161
loss: 0.9454653859138489,grad_norm: 0.767218173739218, iteration: 441162
loss: 0.9857763051986694,grad_norm: 0.7602481375966588, iteration: 441163
loss: 1.0485477447509766,grad_norm: 0.9444814901884814, iteration: 441164
loss: 1.0250014066696167,grad_norm: 0.6858855161489625, iteration: 441165
loss: 0.9779751896858215,grad_norm: 0.727955474905173, iteration: 441166
loss: 1.0176515579223633,grad_norm: 0.7559111287673624, iteration: 441167
loss: 1.0384328365325928,grad_norm: 0.8193621556531347, iteration: 441168
loss: 0.9634553790092468,grad_norm: 0.8446612377032913, iteration: 441169
loss: 0.9855715036392212,grad_norm: 0.8695555061879141, iteration: 441170
loss: 0.9695813059806824,grad_norm: 0.7486508351188469, iteration: 441171
loss: 1.0078506469726562,grad_norm: 0.9719303476039822, iteration: 441172
loss: 0.9993074536323547,grad_norm: 0.8737422188584576, iteration: 441173
loss: 0.9866029620170593,grad_norm: 0.8995041949767116, iteration: 441174
loss: 1.0062096118927002,grad_norm: 0.7513247549386161, iteration: 441175
loss: 1.0031201839447021,grad_norm: 0.9673307578857869, iteration: 441176
loss: 0.9999551177024841,grad_norm: 0.8336021146862035, iteration: 441177
loss: 1.0138697624206543,grad_norm: 0.9999998418270195, iteration: 441178
loss: 1.0182374715805054,grad_norm: 0.9284478808297828, iteration: 441179
loss: 0.9887453317642212,grad_norm: 0.7886271845696095, iteration: 441180
loss: 0.9885914921760559,grad_norm: 0.6539635272110286, iteration: 441181
loss: 0.9936298727989197,grad_norm: 0.7011180261661305, iteration: 441182
loss: 1.016808032989502,grad_norm: 0.8111004726893459, iteration: 441183
loss: 0.9915072321891785,grad_norm: 0.8010472525263435, iteration: 441184
loss: 0.964897096157074,grad_norm: 0.9999996648097824, iteration: 441185
loss: 0.9596144556999207,grad_norm: 0.8258105364123307, iteration: 441186
loss: 0.9824991226196289,grad_norm: 0.8383726818779308, iteration: 441187
loss: 1.0078576803207397,grad_norm: 0.7343808116109811, iteration: 441188
loss: 0.996268630027771,grad_norm: 0.7444340358415992, iteration: 441189
loss: 0.9864739775657654,grad_norm: 0.792309892213344, iteration: 441190
loss: 1.023328423500061,grad_norm: 0.7677325553100096, iteration: 441191
loss: 0.997185468673706,grad_norm: 0.6641482774446356, iteration: 441192
loss: 1.0001704692840576,grad_norm: 0.7685826576798234, iteration: 441193
loss: 1.0051265954971313,grad_norm: 0.6325727579940644, iteration: 441194
loss: 1.0132360458374023,grad_norm: 0.9999991687587209, iteration: 441195
loss: 0.949362576007843,grad_norm: 0.7361836807864292, iteration: 441196
loss: 1.0430060625076294,grad_norm: 0.8631966257602272, iteration: 441197
loss: 1.0368791818618774,grad_norm: 0.9999991775942914, iteration: 441198
loss: 1.0014147758483887,grad_norm: 0.7954167357582383, iteration: 441199
loss: 0.9749758839607239,grad_norm: 0.6087267348796822, iteration: 441200
loss: 1.0026214122772217,grad_norm: 0.7549784592614857, iteration: 441201
loss: 1.0218696594238281,grad_norm: 0.8026079763321674, iteration: 441202
loss: 0.9584421515464783,grad_norm: 0.7293334199687128, iteration: 441203
loss: 1.0036815404891968,grad_norm: 0.6416018487819933, iteration: 441204
loss: 1.0355795621871948,grad_norm: 0.9119826659911842, iteration: 441205
loss: 1.0258147716522217,grad_norm: 0.8261814792574992, iteration: 441206
loss: 0.9928429126739502,grad_norm: 0.715096357296064, iteration: 441207
loss: 0.9799869656562805,grad_norm: 0.8134491558251224, iteration: 441208
loss: 1.0082862377166748,grad_norm: 0.8658569162669579, iteration: 441209
loss: 1.0474653244018555,grad_norm: 0.9080795528709151, iteration: 441210
loss: 1.1013870239257812,grad_norm: 0.9999994999289016, iteration: 441211
loss: 0.993130087852478,grad_norm: 0.8427570414924217, iteration: 441212
loss: 1.024168848991394,grad_norm: 0.7757573053219408, iteration: 441213
loss: 0.9442396759986877,grad_norm: 0.7205451865197892, iteration: 441214
loss: 1.0011779069900513,grad_norm: 0.9537589338057935, iteration: 441215
loss: 1.0059504508972168,grad_norm: 0.7037983714934564, iteration: 441216
loss: 1.0303255319595337,grad_norm: 0.783774477452156, iteration: 441217
loss: 0.9929623603820801,grad_norm: 0.8289673611982654, iteration: 441218
loss: 1.0840556621551514,grad_norm: 0.7596504516764078, iteration: 441219
loss: 1.0100953578948975,grad_norm: 0.6870984669296734, iteration: 441220
loss: 0.9860616326332092,grad_norm: 0.8167617190046651, iteration: 441221
loss: 0.9746822714805603,grad_norm: 0.8430244825931088, iteration: 441222
loss: 0.9993800520896912,grad_norm: 0.8053773845696204, iteration: 441223
loss: 1.0015088319778442,grad_norm: 0.7977793728623367, iteration: 441224
loss: 0.9985343813896179,grad_norm: 0.6370779824935678, iteration: 441225
loss: 0.9962939023971558,grad_norm: 0.7555341022148895, iteration: 441226
loss: 0.9651005864143372,grad_norm: 0.8466604213425796, iteration: 441227
loss: 1.0173214673995972,grad_norm: 0.8242732442416042, iteration: 441228
loss: 0.9945166707038879,grad_norm: 0.7938932774697173, iteration: 441229
loss: 0.989118218421936,grad_norm: 0.7863124470269008, iteration: 441230
loss: 0.9686644673347473,grad_norm: 0.7120978709538018, iteration: 441231
loss: 0.9739292860031128,grad_norm: 0.6424794021205746, iteration: 441232
loss: 1.0075373649597168,grad_norm: 0.8353894418791744, iteration: 441233
loss: 1.0160093307495117,grad_norm: 0.8655868516263912, iteration: 441234
loss: 0.9966550469398499,grad_norm: 0.9999993357281951, iteration: 441235
loss: 1.0070587396621704,grad_norm: 0.9999991731869694, iteration: 441236
loss: 0.980301558971405,grad_norm: 0.7659545996632087, iteration: 441237
loss: 0.9610764384269714,grad_norm: 0.6380426472791827, iteration: 441238
loss: 1.0024954080581665,grad_norm: 0.6611508175502712, iteration: 441239
loss: 0.9906818270683289,grad_norm: 0.834754976114771, iteration: 441240
loss: 0.9445884823799133,grad_norm: 0.8593421297152658, iteration: 441241
loss: 1.0079100131988525,grad_norm: 0.8276428540597895, iteration: 441242
loss: 1.0331227779388428,grad_norm: 0.7763311421208522, iteration: 441243
loss: 0.9864781498908997,grad_norm: 0.7502960562763346, iteration: 441244
loss: 1.0076162815093994,grad_norm: 0.7262352066083649, iteration: 441245
loss: 1.0072753429412842,grad_norm: 0.763251592964401, iteration: 441246
loss: 1.0344295501708984,grad_norm: 0.9999991274344877, iteration: 441247
loss: 0.9849246740341187,grad_norm: 0.8925318656604878, iteration: 441248
loss: 1.046040654182434,grad_norm: 0.8665487880375744, iteration: 441249
loss: 1.015206217765808,grad_norm: 0.8730391459281819, iteration: 441250
loss: 0.9794971346855164,grad_norm: 0.8585657829657806, iteration: 441251
loss: 1.002571702003479,grad_norm: 0.9999990899406684, iteration: 441252
loss: 0.9922589063644409,grad_norm: 0.9999998823077516, iteration: 441253
loss: 1.0142101049423218,grad_norm: 0.8278238358267248, iteration: 441254
loss: 0.9690939784049988,grad_norm: 0.7177000585665306, iteration: 441255
loss: 1.0738773345947266,grad_norm: 0.9999992234477338, iteration: 441256
loss: 0.9951427578926086,grad_norm: 0.9532992169280798, iteration: 441257
loss: 0.9792327880859375,grad_norm: 0.7343379163359834, iteration: 441258
loss: 1.0060068368911743,grad_norm: 0.7269140248997458, iteration: 441259
loss: 0.9744304418563843,grad_norm: 0.7603826521274824, iteration: 441260
loss: 1.0131845474243164,grad_norm: 0.8033021895628365, iteration: 441261
loss: 0.966536283493042,grad_norm: 0.9232055612898314, iteration: 441262
loss: 1.007625699043274,grad_norm: 0.7282421101724507, iteration: 441263
loss: 1.0432924032211304,grad_norm: 0.8234338081657803, iteration: 441264
loss: 0.9687153697013855,grad_norm: 0.7605324222632506, iteration: 441265
loss: 1.061916708946228,grad_norm: 0.9999992113326704, iteration: 441266
loss: 0.9626775979995728,grad_norm: 0.7956489596295825, iteration: 441267
loss: 0.96650230884552,grad_norm: 0.7620719181154494, iteration: 441268
loss: 0.9918902516365051,grad_norm: 0.7970621040350613, iteration: 441269
loss: 0.9938504695892334,grad_norm: 0.8156637854666527, iteration: 441270
loss: 1.057534098625183,grad_norm: 0.9999993467327339, iteration: 441271
loss: 1.000644326210022,grad_norm: 0.7046981434086421, iteration: 441272
loss: 1.0233516693115234,grad_norm: 0.9999998054733026, iteration: 441273
loss: 1.0081062316894531,grad_norm: 0.7805433016765239, iteration: 441274
loss: 0.9820178747177124,grad_norm: 0.7755241939538097, iteration: 441275
loss: 0.9847173094749451,grad_norm: 0.7395528724491773, iteration: 441276
loss: 0.9483785033226013,grad_norm: 0.9574152059607219, iteration: 441277
loss: 1.026239275932312,grad_norm: 0.8793176929610461, iteration: 441278
loss: 0.9992331266403198,grad_norm: 0.7863814347106443, iteration: 441279
loss: 0.9479022026062012,grad_norm: 0.8323222785874647, iteration: 441280
loss: 0.9527116417884827,grad_norm: 0.9569747595336097, iteration: 441281
loss: 1.005112886428833,grad_norm: 0.6058561112621217, iteration: 441282
loss: 1.0558949708938599,grad_norm: 0.6739402237712745, iteration: 441283
loss: 0.9999962449073792,grad_norm: 0.999999060930205, iteration: 441284
loss: 0.9988996982574463,grad_norm: 0.9999995436317429, iteration: 441285
loss: 0.9625109434127808,grad_norm: 0.6878350586507151, iteration: 441286
loss: 0.9374715089797974,grad_norm: 0.833843579133063, iteration: 441287
loss: 0.9860656261444092,grad_norm: 0.7494537451472374, iteration: 441288
loss: 1.0159555673599243,grad_norm: 0.8816570378505681, iteration: 441289
loss: 0.9659781455993652,grad_norm: 0.7646340005405472, iteration: 441290
loss: 1.047484040260315,grad_norm: 0.8601531639066342, iteration: 441291
loss: 0.9812742471694946,grad_norm: 0.651361742638233, iteration: 441292
loss: 0.9844653010368347,grad_norm: 0.945266210025813, iteration: 441293
loss: 1.0369257926940918,grad_norm: 0.7591368985223407, iteration: 441294
loss: 1.0416885614395142,grad_norm: 0.9771235391638378, iteration: 441295
loss: 1.0349314212799072,grad_norm: 0.832752526582633, iteration: 441296
loss: 1.0186522006988525,grad_norm: 0.7937391154428779, iteration: 441297
loss: 1.0184142589569092,grad_norm: 0.6789467060551488, iteration: 441298
loss: 1.0091747045516968,grad_norm: 0.9999995345110844, iteration: 441299
loss: 0.9823871850967407,grad_norm: 0.8092186185487292, iteration: 441300
loss: 0.9910095930099487,grad_norm: 0.9999996984342554, iteration: 441301
loss: 1.0029351711273193,grad_norm: 0.7716455581658325, iteration: 441302
loss: 0.982513427734375,grad_norm: 0.7831180170034546, iteration: 441303
loss: 1.085803508758545,grad_norm: 0.908559608606979, iteration: 441304
loss: 1.0286349058151245,grad_norm: 0.9999998119962689, iteration: 441305
loss: 1.0308349132537842,grad_norm: 0.7805680499166758, iteration: 441306
loss: 1.080552339553833,grad_norm: 0.9999995984290629, iteration: 441307
loss: 0.9842943549156189,grad_norm: 0.7754488701364036, iteration: 441308
loss: 0.9749994874000549,grad_norm: 0.6772526904520934, iteration: 441309
loss: 0.990544855594635,grad_norm: 0.6726439885374431, iteration: 441310
loss: 0.9809663891792297,grad_norm: 0.7188108101120722, iteration: 441311
loss: 1.045676589012146,grad_norm: 0.9998339496709514, iteration: 441312
loss: 0.9901495575904846,grad_norm: 0.9988379121525777, iteration: 441313
loss: 0.997195839881897,grad_norm: 0.9708367281836806, iteration: 441314
loss: 0.9928101897239685,grad_norm: 0.7708213783105677, iteration: 441315
loss: 0.9985643029212952,grad_norm: 0.6944142693010266, iteration: 441316
loss: 1.0343753099441528,grad_norm: 0.9630423803488839, iteration: 441317
loss: 0.9706905484199524,grad_norm: 0.7321192408425143, iteration: 441318
loss: 0.9603857398033142,grad_norm: 0.7928907048975462, iteration: 441319
loss: 0.9900928735733032,grad_norm: 0.7998450269874138, iteration: 441320
loss: 1.0112709999084473,grad_norm: 0.8830204357124272, iteration: 441321
loss: 1.0358657836914062,grad_norm: 0.7966173588514045, iteration: 441322
loss: 0.9884947538375854,grad_norm: 0.7078615299439257, iteration: 441323
loss: 1.0373941659927368,grad_norm: 0.9999997924942183, iteration: 441324
loss: 1.0259953737258911,grad_norm: 0.7368578920997767, iteration: 441325
loss: 1.0252375602722168,grad_norm: 0.9999993306393021, iteration: 441326
loss: 1.006197214126587,grad_norm: 0.6768996548788967, iteration: 441327
loss: 0.9892953038215637,grad_norm: 0.6968916500420257, iteration: 441328
loss: 1.043080449104309,grad_norm: 0.6570011276642779, iteration: 441329
loss: 1.0002577304840088,grad_norm: 0.7984928260429826, iteration: 441330
loss: 1.012799620628357,grad_norm: 0.6072198752513029, iteration: 441331
loss: 0.9637738466262817,grad_norm: 0.8256579982116864, iteration: 441332
loss: 0.9954042434692383,grad_norm: 0.8552652998357888, iteration: 441333
loss: 1.0210177898406982,grad_norm: 0.7747674402883253, iteration: 441334
loss: 0.9930909276008606,grad_norm: 0.8325506122520082, iteration: 441335
loss: 0.995359480381012,grad_norm: 0.6977467941312058, iteration: 441336
loss: 1.0208754539489746,grad_norm: 0.7679879612487458, iteration: 441337
loss: 1.0387147665023804,grad_norm: 0.8842198945747047, iteration: 441338
loss: 0.9474185705184937,grad_norm: 0.7859518243087507, iteration: 441339
loss: 0.9651958346366882,grad_norm: 0.7829249625067545, iteration: 441340
loss: 0.997948408126831,grad_norm: 0.7322024558302678, iteration: 441341
loss: 0.9704614877700806,grad_norm: 0.7517735246309655, iteration: 441342
loss: 1.0272091627120972,grad_norm: 0.9083551882390659, iteration: 441343
loss: 0.977500855922699,grad_norm: 0.6280546126084123, iteration: 441344
loss: 1.0099396705627441,grad_norm: 0.7862678453417732, iteration: 441345
loss: 1.0055500268936157,grad_norm: 0.8369500394583926, iteration: 441346
loss: 1.0003488063812256,grad_norm: 0.7086196963236617, iteration: 441347
loss: 0.9944301843643188,grad_norm: 0.9999996115645873, iteration: 441348
loss: 0.9778369665145874,grad_norm: 0.8781894112865173, iteration: 441349
loss: 0.9981921911239624,grad_norm: 0.8020773309730763, iteration: 441350
loss: 1.0426042079925537,grad_norm: 0.778968786650344, iteration: 441351
loss: 0.9716895222663879,grad_norm: 0.676316136370849, iteration: 441352
loss: 1.0052136182785034,grad_norm: 0.731165785590146, iteration: 441353
loss: 0.9992173314094543,grad_norm: 0.8712225469820812, iteration: 441354
loss: 1.003799557685852,grad_norm: 0.7110336757422392, iteration: 441355
loss: 1.0328962802886963,grad_norm: 0.8420459184777497, iteration: 441356
loss: 1.031946063041687,grad_norm: 0.7837107333033887, iteration: 441357
loss: 1.006951093673706,grad_norm: 0.7080287033998772, iteration: 441358
loss: 1.0217134952545166,grad_norm: 0.8764243572441763, iteration: 441359
loss: 1.0048748254776,grad_norm: 0.8024081624680975, iteration: 441360
loss: 0.9565738439559937,grad_norm: 0.850912001967953, iteration: 441361
loss: 0.9797723293304443,grad_norm: 0.670137247060044, iteration: 441362
loss: 1.0021483898162842,grad_norm: 0.662621781173338, iteration: 441363
loss: 0.9924911260604858,grad_norm: 0.7647441838066921, iteration: 441364
loss: 1.0382496118545532,grad_norm: 0.6597034390500849, iteration: 441365
loss: 0.9833151698112488,grad_norm: 0.7041521623699281, iteration: 441366
loss: 0.9919688701629639,grad_norm: 0.8191715494672088, iteration: 441367
loss: 1.0296854972839355,grad_norm: 0.9270961423573676, iteration: 441368
loss: 1.0137288570404053,grad_norm: 0.7142229586295271, iteration: 441369
loss: 0.9865685105323792,grad_norm: 0.8209543132215726, iteration: 441370
loss: 1.0434993505477905,grad_norm: 0.7095773273736037, iteration: 441371
loss: 1.0068397521972656,grad_norm: 0.8966651314435212, iteration: 441372
loss: 1.0351923704147339,grad_norm: 0.7956869741955541, iteration: 441373
loss: 1.0765939950942993,grad_norm: 0.9712829250731724, iteration: 441374
loss: 0.9711781144142151,grad_norm: 0.6551826934885299, iteration: 441375
loss: 0.9975546002388,grad_norm: 0.720083007309902, iteration: 441376
loss: 0.9978596568107605,grad_norm: 0.7288810576330758, iteration: 441377
loss: 1.0530400276184082,grad_norm: 0.9131303813276668, iteration: 441378
loss: 0.9937032461166382,grad_norm: 0.7856061286827101, iteration: 441379
loss: 1.0208649635314941,grad_norm: 0.7900334743330274, iteration: 441380
loss: 1.0180898904800415,grad_norm: 0.7386665887261216, iteration: 441381
loss: 1.0146478414535522,grad_norm: 0.7897684261173236, iteration: 441382
loss: 1.0047212839126587,grad_norm: 0.721774857033279, iteration: 441383
loss: 1.0331733226776123,grad_norm: 0.999999618908427, iteration: 441384
loss: 0.986233651638031,grad_norm: 0.8692007084394886, iteration: 441385
loss: 0.9935163855552673,grad_norm: 0.8996163806862443, iteration: 441386
loss: 1.0012315511703491,grad_norm: 0.9003294343176679, iteration: 441387
loss: 0.9826517701148987,grad_norm: 0.7180166658138349, iteration: 441388
loss: 0.9786053895950317,grad_norm: 0.8318184387648012, iteration: 441389
loss: 0.979250431060791,grad_norm: 0.9872371800320204, iteration: 441390
loss: 0.9832165837287903,grad_norm: 0.9999990383592996, iteration: 441391
loss: 0.9750645160675049,grad_norm: 0.7165637002598672, iteration: 441392
loss: 1.0027722120285034,grad_norm: 0.675431902081387, iteration: 441393
loss: 1.0093573331832886,grad_norm: 0.7433594999424733, iteration: 441394
loss: 1.0203408002853394,grad_norm: 0.9999992904496277, iteration: 441395
loss: 1.0064895153045654,grad_norm: 0.7525927447757259, iteration: 441396
loss: 1.0003479719161987,grad_norm: 0.7144375650492084, iteration: 441397
loss: 0.9989328384399414,grad_norm: 0.830396178478207, iteration: 441398
loss: 1.0218281745910645,grad_norm: 0.7759164028681492, iteration: 441399
loss: 0.9731862545013428,grad_norm: 0.7040997681396962, iteration: 441400
loss: 0.9967861771583557,grad_norm: 0.7036723468902245, iteration: 441401
loss: 0.9832317233085632,grad_norm: 0.7435207400945937, iteration: 441402
loss: 1.0142818689346313,grad_norm: 0.7852015510313763, iteration: 441403
loss: 1.020403265953064,grad_norm: 0.9999997347148509, iteration: 441404
loss: 1.0183390378952026,grad_norm: 0.7729626671539807, iteration: 441405
loss: 0.9954912066459656,grad_norm: 0.9722568721351976, iteration: 441406
loss: 1.0322169065475464,grad_norm: 0.798253531738976, iteration: 441407
loss: 0.9915878176689148,grad_norm: 0.7120575013689894, iteration: 441408
loss: 0.9984275698661804,grad_norm: 0.6882514782851843, iteration: 441409
loss: 1.0071033239364624,grad_norm: 0.6883044226357811, iteration: 441410
loss: 0.9834496974945068,grad_norm: 0.7495997591887669, iteration: 441411
loss: 0.9970479607582092,grad_norm: 0.7529995169999832, iteration: 441412
loss: 1.0103192329406738,grad_norm: 0.8854809656236181, iteration: 441413
loss: 0.9856806993484497,grad_norm: 0.839286814442842, iteration: 441414
loss: 1.0199717283248901,grad_norm: 0.8677973466124026, iteration: 441415
loss: 1.0418004989624023,grad_norm: 0.8082813453677099, iteration: 441416
loss: 0.99334716796875,grad_norm: 0.8662570339539727, iteration: 441417
loss: 0.995235800743103,grad_norm: 0.9999997243326934, iteration: 441418
loss: 0.9976606369018555,grad_norm: 0.7212439300273843, iteration: 441419
loss: 1.0095560550689697,grad_norm: 0.7915377320203452, iteration: 441420
loss: 1.0042657852172852,grad_norm: 0.7901752449972459, iteration: 441421
loss: 1.0287870168685913,grad_norm: 0.9999990591248209, iteration: 441422
loss: 1.059946060180664,grad_norm: 0.9999998747678612, iteration: 441423
loss: 0.9950170516967773,grad_norm: 0.822775664162203, iteration: 441424
loss: 1.039414882659912,grad_norm: 0.9233209726143322, iteration: 441425
loss: 1.0068036317825317,grad_norm: 0.8429188816323685, iteration: 441426
loss: 1.0184403657913208,grad_norm: 0.7336229167408916, iteration: 441427
loss: 0.9911038279533386,grad_norm: 0.9497668150338227, iteration: 441428
loss: 1.0869431495666504,grad_norm: 0.9999992317464278, iteration: 441429
loss: 0.9848581552505493,grad_norm: 0.9999992585364808, iteration: 441430
loss: 1.0088711977005005,grad_norm: 0.9999995241643456, iteration: 441431
loss: 1.0406923294067383,grad_norm: 0.7395072115623179, iteration: 441432
loss: 1.0101433992385864,grad_norm: 0.8054456144459298, iteration: 441433
loss: 1.022470235824585,grad_norm: 0.8068801276194584, iteration: 441434
loss: 0.9993245005607605,grad_norm: 0.711655287642628, iteration: 441435
loss: 1.0075167417526245,grad_norm: 0.885148981561982, iteration: 441436
loss: 1.0415127277374268,grad_norm: 0.9999991167305192, iteration: 441437
loss: 1.0698134899139404,grad_norm: 0.823115325774866, iteration: 441438
loss: 0.9848217964172363,grad_norm: 0.8707588650081503, iteration: 441439
loss: 1.0005791187286377,grad_norm: 0.9999999180700386, iteration: 441440
loss: 0.9891967177391052,grad_norm: 0.9458422883515057, iteration: 441441
loss: 0.9724688529968262,grad_norm: 0.9364370240672664, iteration: 441442
loss: 1.0148347616195679,grad_norm: 0.9620747104956323, iteration: 441443
loss: 1.006976842880249,grad_norm: 0.6776979016972835, iteration: 441444
loss: 1.0177905559539795,grad_norm: 0.9206687653441785, iteration: 441445
loss: 1.0076030492782593,grad_norm: 0.8688674816375266, iteration: 441446
loss: 1.0074183940887451,grad_norm: 0.8106960065727516, iteration: 441447
loss: 1.0128390789031982,grad_norm: 0.852428762392036, iteration: 441448
loss: 1.0837607383728027,grad_norm: 0.7001412559578482, iteration: 441449
loss: 1.092807650566101,grad_norm: 0.999999403394398, iteration: 441450
loss: 1.0345981121063232,grad_norm: 0.9999990461120505, iteration: 441451
loss: 1.0048272609710693,grad_norm: 0.7724452460432748, iteration: 441452
loss: 0.9694451689720154,grad_norm: 0.7052185161003841, iteration: 441453
loss: 0.9822435975074768,grad_norm: 0.7483673273636623, iteration: 441454
loss: 1.0432085990905762,grad_norm: 0.9999996876793292, iteration: 441455
loss: 0.9988084435462952,grad_norm: 0.8002737237938278, iteration: 441456
loss: 0.997191309928894,grad_norm: 0.7330858542248532, iteration: 441457
loss: 0.9967441558837891,grad_norm: 0.6982449066838742, iteration: 441458
loss: 0.9911627769470215,grad_norm: 0.9999993311289377, iteration: 441459
loss: 1.0069626569747925,grad_norm: 0.9388650146763079, iteration: 441460
loss: 0.9884200692176819,grad_norm: 0.6787945156736632, iteration: 441461
loss: 0.9911671280860901,grad_norm: 0.7655556168379113, iteration: 441462
loss: 1.0374435186386108,grad_norm: 0.9999990435582748, iteration: 441463
loss: 0.9889712929725647,grad_norm: 0.8471158822612284, iteration: 441464
loss: 1.005111575126648,grad_norm: 0.8195021630764432, iteration: 441465
loss: 1.0131467580795288,grad_norm: 0.671063721581504, iteration: 441466
loss: 1.023159384727478,grad_norm: 1.0000000454660252, iteration: 441467
loss: 1.0057604312896729,grad_norm: 0.8000872683950537, iteration: 441468
loss: 1.0331406593322754,grad_norm: 0.9999993085915488, iteration: 441469
loss: 1.0134015083312988,grad_norm: 0.7047572725819518, iteration: 441470
loss: 1.0509121417999268,grad_norm: 0.9999991305918836, iteration: 441471
loss: 1.0099657773971558,grad_norm: 0.9025112032044168, iteration: 441472
loss: 1.003679633140564,grad_norm: 0.9999992297829247, iteration: 441473
loss: 1.0730929374694824,grad_norm: 0.8660155008936015, iteration: 441474
loss: 0.9715501666069031,grad_norm: 0.8105753531481905, iteration: 441475
loss: 1.0414986610412598,grad_norm: 0.9999995130968011, iteration: 441476
loss: 1.0074217319488525,grad_norm: 0.6521369736034567, iteration: 441477
loss: 1.0018645524978638,grad_norm: 0.8381925040794371, iteration: 441478
loss: 1.0111318826675415,grad_norm: 0.7185243274310298, iteration: 441479
loss: 1.0271697044372559,grad_norm: 0.8509855247896242, iteration: 441480
loss: 0.9975018501281738,grad_norm: 0.7665908503262228, iteration: 441481
loss: 1.0155025720596313,grad_norm: 0.8672483739649521, iteration: 441482
loss: 0.9869799017906189,grad_norm: 0.6262140387036638, iteration: 441483
loss: 1.008882761001587,grad_norm: 0.6026267767048185, iteration: 441484
loss: 0.9642516374588013,grad_norm: 0.8659513639428774, iteration: 441485
loss: 1.0187181234359741,grad_norm: 0.7724108276616137, iteration: 441486
loss: 1.0443211793899536,grad_norm: 0.9087900147653377, iteration: 441487
loss: 1.0269851684570312,grad_norm: 0.7936192264147602, iteration: 441488
loss: 0.9990636110305786,grad_norm: 0.8751412711273098, iteration: 441489
loss: 1.0939626693725586,grad_norm: 0.7926693090898642, iteration: 441490
loss: 0.9940420389175415,grad_norm: 0.7483357453330561, iteration: 441491
loss: 0.996982216835022,grad_norm: 0.7992449434384603, iteration: 441492
loss: 1.0125980377197266,grad_norm: 0.8922603041305016, iteration: 441493
loss: 0.9852619767189026,grad_norm: 0.9758156752969508, iteration: 441494
loss: 1.0135265588760376,grad_norm: 0.7402277762460074, iteration: 441495
loss: 0.9920899868011475,grad_norm: 0.8470013512735148, iteration: 441496
loss: 1.0013612508773804,grad_norm: 0.8413687991480282, iteration: 441497
loss: 0.9985819458961487,grad_norm: 0.8575034368840408, iteration: 441498
loss: 0.9816247820854187,grad_norm: 0.700355536868445, iteration: 441499
loss: 1.0231860876083374,grad_norm: 0.7988398343099676, iteration: 441500
loss: 1.0074877738952637,grad_norm: 0.8578943683488902, iteration: 441501
loss: 1.043790578842163,grad_norm: 0.7757306462495609, iteration: 441502
loss: 1.0422868728637695,grad_norm: 0.9566633066166619, iteration: 441503
loss: 1.0103133916854858,grad_norm: 0.7246624788361489, iteration: 441504
loss: 1.0289305448532104,grad_norm: 0.6641264982774419, iteration: 441505
loss: 1.0061355829238892,grad_norm: 0.9972157516331365, iteration: 441506
loss: 1.0475308895111084,grad_norm: 0.9696983949500594, iteration: 441507
loss: 1.0328665971755981,grad_norm: 0.776408065251393, iteration: 441508
loss: 1.0508259534835815,grad_norm: 0.9999996403302664, iteration: 441509
loss: 0.9969863891601562,grad_norm: 0.839717357127104, iteration: 441510
loss: 1.0191494226455688,grad_norm: 0.7822788602780488, iteration: 441511
loss: 1.0190625190734863,grad_norm: 0.8869352610031499, iteration: 441512
loss: 1.0089683532714844,grad_norm: 0.7922159747270555, iteration: 441513
loss: 1.001543402671814,grad_norm: 0.9319586781655188, iteration: 441514
loss: 1.0036402940750122,grad_norm: 0.8517297895498246, iteration: 441515
loss: 1.0276702642440796,grad_norm: 0.8163482608199385, iteration: 441516
loss: 0.9711782932281494,grad_norm: 0.9172948817457177, iteration: 441517
loss: 1.0060776472091675,grad_norm: 0.8455280907994992, iteration: 441518
loss: 0.9960083365440369,grad_norm: 0.9314893164009208, iteration: 441519
loss: 1.03274667263031,grad_norm: 0.7858774636588137, iteration: 441520
loss: 1.016254186630249,grad_norm: 0.7978478469290796, iteration: 441521
loss: 0.9648497104644775,grad_norm: 0.8396431711664103, iteration: 441522
loss: 1.0124741792678833,grad_norm: 0.8144781884148125, iteration: 441523
loss: 1.0186069011688232,grad_norm: 0.8196449947056371, iteration: 441524
loss: 1.007056713104248,grad_norm: 0.7533911058156849, iteration: 441525
loss: 1.0432679653167725,grad_norm: 0.7088906398931697, iteration: 441526
loss: 0.9851144552230835,grad_norm: 0.6971621924287325, iteration: 441527
loss: 1.0011465549468994,grad_norm: 0.8723377084995441, iteration: 441528
loss: 1.0184824466705322,grad_norm: 0.8635452815150123, iteration: 441529
loss: 0.9930732250213623,grad_norm: 0.8222638118811979, iteration: 441530
loss: 0.9658035635948181,grad_norm: 0.8490851078565831, iteration: 441531
loss: 1.0171513557434082,grad_norm: 0.7085783295346657, iteration: 441532
loss: 0.9824005961418152,grad_norm: 0.7486843534708478, iteration: 441533
loss: 1.0013927221298218,grad_norm: 0.8089599935534186, iteration: 441534
loss: 0.997900664806366,grad_norm: 0.9465037429077158, iteration: 441535
loss: 1.051376461982727,grad_norm: 0.9999990585434676, iteration: 441536
loss: 0.9825536012649536,grad_norm: 0.6760465357842762, iteration: 441537
loss: 1.044115662574768,grad_norm: 0.9999993904527231, iteration: 441538
loss: 0.93434739112854,grad_norm: 0.7780774411187183, iteration: 441539
loss: 0.9931570887565613,grad_norm: 0.9125733679839474, iteration: 441540
loss: 1.0046948194503784,grad_norm: 0.8586445203824957, iteration: 441541
loss: 0.9877914190292358,grad_norm: 0.9240547213782478, iteration: 441542
loss: 0.9927420616149902,grad_norm: 0.9153062674616694, iteration: 441543
loss: 1.015466570854187,grad_norm: 0.8367777824632202, iteration: 441544
loss: 1.0179555416107178,grad_norm: 0.6764868128378803, iteration: 441545
loss: 1.0009058713912964,grad_norm: 0.7643096036910177, iteration: 441546
loss: 1.0307879447937012,grad_norm: 0.8723360231102902, iteration: 441547
loss: 1.0028742551803589,grad_norm: 0.8546801531115564, iteration: 441548
loss: 1.0135107040405273,grad_norm: 0.8450614610000444, iteration: 441549
loss: 1.005176067352295,grad_norm: 0.7870299387754979, iteration: 441550
loss: 1.0359840393066406,grad_norm: 0.7386485459394829, iteration: 441551
loss: 1.023332118988037,grad_norm: 0.7416013796684968, iteration: 441552
loss: 1.000612735748291,grad_norm: 0.9094377996102335, iteration: 441553
loss: 1.0466917753219604,grad_norm: 0.9792148855045635, iteration: 441554
loss: 0.9603328108787537,grad_norm: 0.716807691771882, iteration: 441555
loss: 1.0023167133331299,grad_norm: 0.7042405992253152, iteration: 441556
loss: 0.961164116859436,grad_norm: 0.9730048150849814, iteration: 441557
loss: 0.9853943586349487,grad_norm: 0.7329441112483223, iteration: 441558
loss: 0.9873796701431274,grad_norm: 0.8502825038587234, iteration: 441559
loss: 1.028687596321106,grad_norm: 0.9999998596956721, iteration: 441560
loss: 1.0273478031158447,grad_norm: 0.8439451197491957, iteration: 441561
loss: 0.9814809560775757,grad_norm: 0.6565468016672036, iteration: 441562
loss: 1.0745265483856201,grad_norm: 0.9999991625357186, iteration: 441563
loss: 0.9928951859474182,grad_norm: 0.7579262238939647, iteration: 441564
loss: 1.0216399431228638,grad_norm: 0.999999469185365, iteration: 441565
loss: 1.0012881755828857,grad_norm: 0.7950054198420533, iteration: 441566
loss: 1.1288548707962036,grad_norm: 0.9999999043287983, iteration: 441567
loss: 0.9939266443252563,grad_norm: 0.740105896321, iteration: 441568
loss: 1.010839819908142,grad_norm: 0.9999994887640461, iteration: 441569
loss: 1.0303443670272827,grad_norm: 0.6511887287910076, iteration: 441570
loss: 0.9549834132194519,grad_norm: 0.7225990530984019, iteration: 441571
loss: 1.040140151977539,grad_norm: 0.8025637003864646, iteration: 441572
loss: 0.9924611449241638,grad_norm: 0.799026407605391, iteration: 441573
loss: 1.015022873878479,grad_norm: 0.7861142432345124, iteration: 441574
loss: 0.986426055431366,grad_norm: 0.9999990763242275, iteration: 441575
loss: 1.037448525428772,grad_norm: 0.8239443435806182, iteration: 441576
loss: 1.0983999967575073,grad_norm: 0.777115558992282, iteration: 441577
loss: 0.9880626201629639,grad_norm: 0.7443659273843113, iteration: 441578
loss: 0.9595024585723877,grad_norm: 0.7006694570749976, iteration: 441579
loss: 1.0107454061508179,grad_norm: 0.8045964017911593, iteration: 441580
loss: 1.0002331733703613,grad_norm: 0.7374010119505714, iteration: 441581
loss: 0.9810976982116699,grad_norm: 0.9267274402897994, iteration: 441582
loss: 1.014518141746521,grad_norm: 0.7231894546623748, iteration: 441583
loss: 0.9782651662826538,grad_norm: 0.8079416780419705, iteration: 441584
loss: 1.0088818073272705,grad_norm: 0.9999991780496983, iteration: 441585
loss: 0.9824727177619934,grad_norm: 0.7164751565413497, iteration: 441586
loss: 1.0222408771514893,grad_norm: 0.8525216482390825, iteration: 441587
loss: 1.0010156631469727,grad_norm: 0.9999999414356601, iteration: 441588
loss: 0.9502056837081909,grad_norm: 0.6930801979195337, iteration: 441589
loss: 1.0254359245300293,grad_norm: 0.8401221688997943, iteration: 441590
loss: 1.021723747253418,grad_norm: 0.8184011523721207, iteration: 441591
loss: 1.015805721282959,grad_norm: 0.999998984075122, iteration: 441592
loss: 1.0485543012619019,grad_norm: 0.9999995752145019, iteration: 441593
loss: 1.002900242805481,grad_norm: 0.8972476103262149, iteration: 441594
loss: 1.0000308752059937,grad_norm: 0.8574339142372808, iteration: 441595
loss: 1.0063984394073486,grad_norm: 0.7863212237966537, iteration: 441596
loss: 0.9973015785217285,grad_norm: 0.9999990682707229, iteration: 441597
loss: 1.0233923196792603,grad_norm: 0.7563537788430433, iteration: 441598
loss: 1.0097005367279053,grad_norm: 0.8215511242613508, iteration: 441599
loss: 0.9866315722465515,grad_norm: 0.9438044489989251, iteration: 441600
loss: 0.9662447571754456,grad_norm: 0.8146280196386408, iteration: 441601
loss: 0.9910486936569214,grad_norm: 0.8861410647011745, iteration: 441602
loss: 1.0230404138565063,grad_norm: 0.9178098851060376, iteration: 441603
loss: 0.9977394938468933,grad_norm: 0.809177902224691, iteration: 441604
loss: 1.05821692943573,grad_norm: 0.9569191498921715, iteration: 441605
loss: 0.9866743087768555,grad_norm: 0.8419978486508666, iteration: 441606
loss: 0.9601590633392334,grad_norm: 0.8193061686356335, iteration: 441607
loss: 1.0219943523406982,grad_norm: 0.7809129961961668, iteration: 441608
loss: 0.9942865967750549,grad_norm: 0.9999994437240398, iteration: 441609
loss: 1.01698637008667,grad_norm: 0.7351959460831209, iteration: 441610
loss: 0.9967241287231445,grad_norm: 0.743046823023911, iteration: 441611
loss: 1.0118370056152344,grad_norm: 0.7338550471982764, iteration: 441612
loss: 1.016292929649353,grad_norm: 0.8847706889421267, iteration: 441613
loss: 0.9704437255859375,grad_norm: 0.8007187384781487, iteration: 441614
loss: 0.9604477286338806,grad_norm: 0.8660102073655003, iteration: 441615
loss: 0.9816734194755554,grad_norm: 0.7531671685278966, iteration: 441616
loss: 1.0259783267974854,grad_norm: 0.8782670736566761, iteration: 441617
loss: 0.9717209935188293,grad_norm: 0.7373819282335621, iteration: 441618
loss: 1.0310138463974,grad_norm: 0.871987592960578, iteration: 441619
loss: 0.9838074445724487,grad_norm: 0.6672638856142767, iteration: 441620
loss: 1.0847467184066772,grad_norm: 0.8048673580059998, iteration: 441621
loss: 0.9793943166732788,grad_norm: 0.8686365979498654, iteration: 441622
loss: 0.9950509667396545,grad_norm: 0.7537782693631983, iteration: 441623
loss: 1.0103363990783691,grad_norm: 0.9999993010356726, iteration: 441624
loss: 1.0055838823318481,grad_norm: 0.9999991240416172, iteration: 441625
loss: 0.9975481629371643,grad_norm: 0.7147165808032921, iteration: 441626
loss: 1.015173316001892,grad_norm: 0.9999990036455183, iteration: 441627
loss: 0.9900404214859009,grad_norm: 0.8063634483005145, iteration: 441628
loss: 0.9649255871772766,grad_norm: 0.8757519237738579, iteration: 441629
loss: 1.080456018447876,grad_norm: 0.9999992772914083, iteration: 441630
loss: 1.0114985704421997,grad_norm: 0.8095500820887941, iteration: 441631
loss: 1.0167683362960815,grad_norm: 0.7993012893069943, iteration: 441632
loss: 0.9926925301551819,grad_norm: 0.7587911178609112, iteration: 441633
loss: 0.9929563403129578,grad_norm: 0.794938621133293, iteration: 441634
loss: 0.9964948296546936,grad_norm: 0.8955370029851218, iteration: 441635
loss: 0.9714022278785706,grad_norm: 0.7743013239822718, iteration: 441636
loss: 0.9933114051818848,grad_norm: 0.7580170516207274, iteration: 441637
loss: 1.0018434524536133,grad_norm: 0.5916880527376175, iteration: 441638
loss: 0.966943621635437,grad_norm: 0.8855564712077298, iteration: 441639
loss: 0.9904734492301941,grad_norm: 0.8071690398294776, iteration: 441640
loss: 1.0087047815322876,grad_norm: 0.7390702766915006, iteration: 441641
loss: 1.0411529541015625,grad_norm: 0.9999992042555201, iteration: 441642
loss: 0.9923871159553528,grad_norm: 0.777527261909869, iteration: 441643
loss: 1.031702995300293,grad_norm: 0.7481759805582641, iteration: 441644
loss: 0.9950641393661499,grad_norm: 0.9999998888029532, iteration: 441645
loss: 0.9960718154907227,grad_norm: 0.7031115987520292, iteration: 441646
loss: 0.9745485782623291,grad_norm: 0.7157687720107913, iteration: 441647
loss: 0.9808244109153748,grad_norm: 0.7486741727814586, iteration: 441648
loss: 1.0412721633911133,grad_norm: 0.752287945509831, iteration: 441649
loss: 1.005297303199768,grad_norm: 0.8080563070530036, iteration: 441650
loss: 0.968951404094696,grad_norm: 0.7411990395277951, iteration: 441651
loss: 0.9926261305809021,grad_norm: 0.8332069166865094, iteration: 441652
loss: 1.0391746759414673,grad_norm: 0.9999998884237877, iteration: 441653
loss: 0.9965788125991821,grad_norm: 0.8969190294811722, iteration: 441654
loss: 1.0270404815673828,grad_norm: 0.9136312718411947, iteration: 441655
loss: 0.9929461479187012,grad_norm: 0.9847723851920263, iteration: 441656
loss: 1.0066280364990234,grad_norm: 0.681551642300603, iteration: 441657
loss: 0.9859516024589539,grad_norm: 0.9999998367835221, iteration: 441658
loss: 0.9637370109558105,grad_norm: 0.8467528272141315, iteration: 441659
loss: 0.996406614780426,grad_norm: 0.7827846555048608, iteration: 441660
loss: 1.0452638864517212,grad_norm: 0.9999995664315098, iteration: 441661
loss: 1.0016175508499146,grad_norm: 1.0000000005284497, iteration: 441662
loss: 0.9950748682022095,grad_norm: 0.9999992743238401, iteration: 441663
loss: 0.9933541417121887,grad_norm: 0.9805691751527634, iteration: 441664
loss: 1.0073119401931763,grad_norm: 0.9537784746251862, iteration: 441665
loss: 0.9907731413841248,grad_norm: 0.7915076081207394, iteration: 441666
loss: 0.9871432781219482,grad_norm: 0.7142897192434261, iteration: 441667
loss: 0.9618043899536133,grad_norm: 0.6689205012415363, iteration: 441668
loss: 0.9845862984657288,grad_norm: 0.8239370681364281, iteration: 441669
loss: 0.974763810634613,grad_norm: 0.9439632593616554, iteration: 441670
loss: 0.9927665591239929,grad_norm: 0.7510195590930333, iteration: 441671
loss: 1.0221855640411377,grad_norm: 0.7086994142818928, iteration: 441672
loss: 0.9677642583847046,grad_norm: 0.7318804445248875, iteration: 441673
loss: 1.0035194158554077,grad_norm: 0.8047907693546275, iteration: 441674
loss: 1.065938115119934,grad_norm: 0.782303845998499, iteration: 441675
loss: 1.058712363243103,grad_norm: 0.9999994457040393, iteration: 441676
loss: 1.0333448648452759,grad_norm: 0.9829221397907733, iteration: 441677
loss: 1.025843858718872,grad_norm: 0.7740046017837808, iteration: 441678
loss: 0.9697084426879883,grad_norm: 0.7987467827842978, iteration: 441679
loss: 0.9920295476913452,grad_norm: 0.7310720425432415, iteration: 441680
loss: 0.9864044189453125,grad_norm: 0.8033357462586687, iteration: 441681
loss: 1.0008609294891357,grad_norm: 0.7623515331285929, iteration: 441682
loss: 1.0057142972946167,grad_norm: 0.8654699308113031, iteration: 441683
loss: 0.9596750736236572,grad_norm: 0.8495877785579752, iteration: 441684
loss: 1.0073728561401367,grad_norm: 0.7581148323380089, iteration: 441685
loss: 1.019382357597351,grad_norm: 0.7986628029733156, iteration: 441686
loss: 0.9868599772453308,grad_norm: 0.7802006045656092, iteration: 441687
loss: 1.027979850769043,grad_norm: 0.6855814049633527, iteration: 441688
loss: 0.9874033331871033,grad_norm: 0.7998403985141692, iteration: 441689
loss: 0.9794031381607056,grad_norm: 0.755721250786815, iteration: 441690
loss: 0.9926662445068359,grad_norm: 0.7328559601188578, iteration: 441691
loss: 1.0631163120269775,grad_norm: 0.7927744479351244, iteration: 441692
loss: 1.0061819553375244,grad_norm: 0.9526108819738018, iteration: 441693
loss: 1.0365732908248901,grad_norm: 0.5988237696043578, iteration: 441694
loss: 1.0062954425811768,grad_norm: 0.9357278677105956, iteration: 441695
loss: 0.9649686217308044,grad_norm: 0.9999990488440673, iteration: 441696
loss: 1.0474333763122559,grad_norm: 0.9999998721942891, iteration: 441697
loss: 0.9735300540924072,grad_norm: 0.8028332636472285, iteration: 441698
loss: 0.9699136018753052,grad_norm: 0.8817147385734172, iteration: 441699
loss: 1.0265883207321167,grad_norm: 0.6764972793802284, iteration: 441700
loss: 0.9794150590896606,grad_norm: 0.9253289643729609, iteration: 441701
loss: 0.9859289526939392,grad_norm: 0.6627338028202754, iteration: 441702
loss: 1.1001092195510864,grad_norm: 0.7032918288201386, iteration: 441703
loss: 0.9869730472564697,grad_norm: 0.7751294963650395, iteration: 441704
loss: 1.001381516456604,grad_norm: 0.6275643689322924, iteration: 441705
loss: 0.9707670211791992,grad_norm: 0.8332442012862604, iteration: 441706
loss: 0.9550327658653259,grad_norm: 0.761172916120249, iteration: 441707
loss: 0.9905169606208801,grad_norm: 0.7607521111823298, iteration: 441708
loss: 1.01673424243927,grad_norm: 0.8924865503674386, iteration: 441709
loss: 0.9885882139205933,grad_norm: 0.9999995895411569, iteration: 441710
loss: 1.0243722200393677,grad_norm: 0.7381250610947891, iteration: 441711
loss: 0.9973357319831848,grad_norm: 0.8986198836737057, iteration: 441712
loss: 0.996105968952179,grad_norm: 0.8425039409333698, iteration: 441713
loss: 0.9745137691497803,grad_norm: 0.6770737139500299, iteration: 441714
loss: 1.0285762548446655,grad_norm: 0.8265420104825438, iteration: 441715
loss: 0.9745500683784485,grad_norm: 0.6453432161074418, iteration: 441716
loss: 1.0222201347351074,grad_norm: 0.9334749760539661, iteration: 441717
loss: 0.9913498759269714,grad_norm: 0.9195890375642958, iteration: 441718
loss: 0.9818885922431946,grad_norm: 0.6567517486556442, iteration: 441719
loss: 1.0211480855941772,grad_norm: 0.9764446286881887, iteration: 441720
loss: 1.0082870721817017,grad_norm: 0.6920569236591027, iteration: 441721
loss: 1.035246729850769,grad_norm: 0.9999991164495656, iteration: 441722
loss: 0.9905657172203064,grad_norm: 0.702565537855275, iteration: 441723
loss: 1.1436231136322021,grad_norm: 0.999999938428346, iteration: 441724
loss: 1.0247457027435303,grad_norm: 0.659224377471844, iteration: 441725
loss: 1.0739411115646362,grad_norm: 0.8268969154131178, iteration: 441726
loss: 1.0151419639587402,grad_norm: 0.9999991303259442, iteration: 441727
loss: 1.0307910442352295,grad_norm: 0.8358017900982964, iteration: 441728
loss: 1.0144445896148682,grad_norm: 0.7979104363003469, iteration: 441729
loss: 0.9802135825157166,grad_norm: 0.917047667221919, iteration: 441730
loss: 1.0002747774124146,grad_norm: 0.7861875475650432, iteration: 441731
loss: 0.9718393683433533,grad_norm: 0.9362228202466096, iteration: 441732
loss: 1.009506344795227,grad_norm: 0.8459512044544586, iteration: 441733
loss: 1.0566972494125366,grad_norm: 0.9999998543038346, iteration: 441734
loss: 1.002104640007019,grad_norm: 0.7858537910061164, iteration: 441735
loss: 0.9946303367614746,grad_norm: 0.723728613793622, iteration: 441736
loss: 1.0407726764678955,grad_norm: 0.8608302374010135, iteration: 441737
loss: 1.0118027925491333,grad_norm: 0.8004277238153618, iteration: 441738
loss: 1.033610463142395,grad_norm: 0.9999992601094034, iteration: 441739
loss: 1.0461294651031494,grad_norm: 0.755428195886978, iteration: 441740
loss: 1.0000133514404297,grad_norm: 0.7956501286620102, iteration: 441741
loss: 1.0234934091567993,grad_norm: 0.8428454587107734, iteration: 441742
loss: 1.0109001398086548,grad_norm: 0.9280798200938427, iteration: 441743
loss: 0.978344738483429,grad_norm: 0.7817373086197401, iteration: 441744
loss: 0.9853602051734924,grad_norm: 0.8188820795743877, iteration: 441745
loss: 1.0102758407592773,grad_norm: 0.9551370001727882, iteration: 441746
loss: 1.0369465351104736,grad_norm: 0.9417998135032235, iteration: 441747
loss: 0.9740871787071228,grad_norm: 0.8355041864766429, iteration: 441748
loss: 0.9930935502052307,grad_norm: 0.6407844959681456, iteration: 441749
loss: 1.0592855215072632,grad_norm: 0.9999991868496053, iteration: 441750
loss: 0.9918285608291626,grad_norm: 0.8375124968909045, iteration: 441751
loss: 0.9795373678207397,grad_norm: 0.7686351200315066, iteration: 441752
loss: 1.0013490915298462,grad_norm: 0.99999994194929, iteration: 441753
loss: 1.0004093647003174,grad_norm: 0.8258752608435023, iteration: 441754
loss: 0.993054986000061,grad_norm: 0.7697562585791158, iteration: 441755
loss: 0.9780164957046509,grad_norm: 0.6553662917744336, iteration: 441756
loss: 0.9641394019126892,grad_norm: 0.8126313564697762, iteration: 441757
loss: 1.0006300210952759,grad_norm: 0.7611662967976764, iteration: 441758
loss: 1.0010673999786377,grad_norm: 0.9929319739667333, iteration: 441759
loss: 0.9870876669883728,grad_norm: 0.7570063406387157, iteration: 441760
loss: 0.9770095348358154,grad_norm: 0.7352851840725921, iteration: 441761
loss: 0.9925674796104431,grad_norm: 0.6671980290671944, iteration: 441762
loss: 1.013877272605896,grad_norm: 0.8447654516560216, iteration: 441763
loss: 1.003734827041626,grad_norm: 0.7069541273496893, iteration: 441764
loss: 1.015052318572998,grad_norm: 0.7802837255116452, iteration: 441765
loss: 0.9927455186843872,grad_norm: 0.7667891179002289, iteration: 441766
loss: 1.0136171579360962,grad_norm: 0.7977030658116996, iteration: 441767
loss: 0.9934770464897156,grad_norm: 0.7403885921480249, iteration: 441768
loss: 1.0040637254714966,grad_norm: 0.7192925239033225, iteration: 441769
loss: 0.9908422827720642,grad_norm: 0.7734787696549087, iteration: 441770
loss: 0.9863990545272827,grad_norm: 0.9999991992663947, iteration: 441771
loss: 1.01369047164917,grad_norm: 0.8284295665062972, iteration: 441772
loss: 1.0054872035980225,grad_norm: 0.7174469220894162, iteration: 441773
loss: 0.9860218167304993,grad_norm: 0.7256814047261703, iteration: 441774
loss: 0.9928134679794312,grad_norm: 0.8039140528916089, iteration: 441775
loss: 0.982686460018158,grad_norm: 0.9999999510010434, iteration: 441776
loss: 1.0161088705062866,grad_norm: 0.697066543911953, iteration: 441777
loss: 1.0288560390472412,grad_norm: 0.6956664637213779, iteration: 441778
loss: 0.9995750784873962,grad_norm: 0.8291009733179557, iteration: 441779
loss: 0.9896180033683777,grad_norm: 0.9742998510309243, iteration: 441780
loss: 0.9721993207931519,grad_norm: 0.9807919900209606, iteration: 441781
loss: 0.987615704536438,grad_norm: 0.7564651504106705, iteration: 441782
loss: 0.9941408038139343,grad_norm: 0.9999997021866829, iteration: 441783
loss: 1.0796388387680054,grad_norm: 0.9144957539753934, iteration: 441784
loss: 0.9919545650482178,grad_norm: 0.7919483655335613, iteration: 441785
loss: 1.0077530145645142,grad_norm: 0.8926358875279468, iteration: 441786
loss: 0.9991626143455505,grad_norm: 0.9379230887746968, iteration: 441787
loss: 0.9573004841804504,grad_norm: 0.7211775175082421, iteration: 441788
loss: 0.9917401075363159,grad_norm: 0.8363649271056761, iteration: 441789
loss: 0.9944658279418945,grad_norm: 0.7238543787880086, iteration: 441790
loss: 1.1502035856246948,grad_norm: 0.9999991260035185, iteration: 441791
loss: 0.9888584613800049,grad_norm: 0.7718150997963475, iteration: 441792
loss: 0.9855632781982422,grad_norm: 0.5691658554615554, iteration: 441793
loss: 1.007943034172058,grad_norm: 0.9999990431502039, iteration: 441794
loss: 1.0416326522827148,grad_norm: 0.9999998082870081, iteration: 441795
loss: 1.0500074625015259,grad_norm: 0.9999995654552373, iteration: 441796
loss: 0.9890972375869751,grad_norm: 0.8767121934483167, iteration: 441797
loss: 1.0589356422424316,grad_norm: 0.9999995516278346, iteration: 441798
loss: 1.037693977355957,grad_norm: 0.8189551278474072, iteration: 441799
loss: 0.9746201634407043,grad_norm: 0.7221499416825, iteration: 441800
loss: 0.9711679220199585,grad_norm: 0.99999924295594, iteration: 441801
loss: 1.0236667394638062,grad_norm: 0.8100070214720952, iteration: 441802
loss: 0.99350506067276,grad_norm: 0.7885548608281976, iteration: 441803
loss: 0.9833333492279053,grad_norm: 0.6879809476897621, iteration: 441804
loss: 0.9800181984901428,grad_norm: 0.7223119140249521, iteration: 441805
loss: 1.0192583799362183,grad_norm: 0.8326243514977794, iteration: 441806
loss: 1.0185718536376953,grad_norm: 0.7031264313792185, iteration: 441807
loss: 0.9976118206977844,grad_norm: 0.9074612235904412, iteration: 441808
loss: 0.9782752990722656,grad_norm: 0.7495300785131292, iteration: 441809
loss: 0.9641560912132263,grad_norm: 0.8656439658808519, iteration: 441810
loss: 1.003237009048462,grad_norm: 0.7348394930856224, iteration: 441811
loss: 1.0237747430801392,grad_norm: 0.9999993558378165, iteration: 441812
loss: 0.9430443644523621,grad_norm: 0.6751854926975073, iteration: 441813
loss: 1.031459927558899,grad_norm: 0.8181124308025116, iteration: 441814
loss: 1.0056507587432861,grad_norm: 0.7218679086445946, iteration: 441815
loss: 0.9944795966148376,grad_norm: 0.8499023702465364, iteration: 441816
loss: 1.0014493465423584,grad_norm: 0.9999993503618746, iteration: 441817
loss: 1.0251277685165405,grad_norm: 0.7931111363744288, iteration: 441818
loss: 1.0382574796676636,grad_norm: 0.9999993745985982, iteration: 441819
loss: 0.9928402900695801,grad_norm: 0.9639252271724148, iteration: 441820
loss: 0.9649491906166077,grad_norm: 0.8068289372622752, iteration: 441821
loss: 0.9779425263404846,grad_norm: 0.8423669520363299, iteration: 441822
loss: 1.0052977800369263,grad_norm: 0.7925667583777352, iteration: 441823
loss: 0.9609981775283813,grad_norm: 0.8025954729180407, iteration: 441824
loss: 1.011210322380066,grad_norm: 0.7796100659739265, iteration: 441825
loss: 0.9981613755226135,grad_norm: 0.8039010037147368, iteration: 441826
loss: 0.9687922596931458,grad_norm: 0.7174788791976915, iteration: 441827
loss: 1.0199733972549438,grad_norm: 0.9999993005961084, iteration: 441828
loss: 0.9904935956001282,grad_norm: 0.7320998596307203, iteration: 441829
loss: 0.976509153842926,grad_norm: 0.6864484151618735, iteration: 441830
loss: 0.9932247996330261,grad_norm: 0.6791821533374942, iteration: 441831
loss: 1.0162495374679565,grad_norm: 0.7354876012515972, iteration: 441832
loss: 0.9933910369873047,grad_norm: 0.8667497215160901, iteration: 441833
loss: 0.9626819491386414,grad_norm: 0.796539551369618, iteration: 441834
loss: 1.0323610305786133,grad_norm: 0.7881824002997578, iteration: 441835
loss: 1.0065144300460815,grad_norm: 0.7460648023006697, iteration: 441836
loss: 0.962799072265625,grad_norm: 0.9999994803967903, iteration: 441837
loss: 1.031254768371582,grad_norm: 0.9405208626674679, iteration: 441838
loss: 1.0360885858535767,grad_norm: 0.8068860901527788, iteration: 441839
loss: 0.9820454716682434,grad_norm: 0.8093054194946585, iteration: 441840
loss: 0.995282769203186,grad_norm: 0.6833098187424276, iteration: 441841
loss: 1.024204969406128,grad_norm: 0.8592158439859562, iteration: 441842
loss: 0.9819939136505127,grad_norm: 0.8810496063978777, iteration: 441843
loss: 1.0087008476257324,grad_norm: 0.6640146271844942, iteration: 441844
loss: 1.0253506898880005,grad_norm: 0.6964988351572413, iteration: 441845
loss: 0.9707675576210022,grad_norm: 0.8951718079319825, iteration: 441846
loss: 0.982820451259613,grad_norm: 0.8493982835057025, iteration: 441847
loss: 1.011110544204712,grad_norm: 0.8085089484862898, iteration: 441848
loss: 0.9786001443862915,grad_norm: 0.782762582268516, iteration: 441849
loss: 1.019693374633789,grad_norm: 0.7352888088035411, iteration: 441850
loss: 1.0709444284439087,grad_norm: 0.8787170166477621, iteration: 441851
loss: 1.0049620866775513,grad_norm: 0.7378289573150619, iteration: 441852
loss: 0.9692561626434326,grad_norm: 0.7770687851164058, iteration: 441853
loss: 1.022118091583252,grad_norm: 0.6294497354588074, iteration: 441854
loss: 1.0481756925582886,grad_norm: 0.9999990323392722, iteration: 441855
loss: 1.0140324831008911,grad_norm: 0.713124592989652, iteration: 441856
loss: 0.9886379241943359,grad_norm: 1.0000000125306427, iteration: 441857
loss: 0.9808316230773926,grad_norm: 0.896299180967703, iteration: 441858
loss: 1.0137182474136353,grad_norm: 0.7412895272061066, iteration: 441859
loss: 1.018576741218567,grad_norm: 0.9999992727236423, iteration: 441860
loss: 0.9807333946228027,grad_norm: 0.6906415407519716, iteration: 441861
loss: 0.9871939420700073,grad_norm: 0.7803230023046054, iteration: 441862
loss: 0.9846213459968567,grad_norm: 0.9135496446664034, iteration: 441863
loss: 0.9874328374862671,grad_norm: 0.8082019370549663, iteration: 441864
loss: 1.048401951789856,grad_norm: 0.8048698078454694, iteration: 441865
loss: 1.0546830892562866,grad_norm: 0.9999991125133908, iteration: 441866
loss: 1.0104668140411377,grad_norm: 0.7842334477382578, iteration: 441867
loss: 1.0048497915267944,grad_norm: 0.9999994429926585, iteration: 441868
loss: 0.9633554220199585,grad_norm: 0.8710716341958988, iteration: 441869
loss: 0.9583981037139893,grad_norm: 0.9849351249524457, iteration: 441870
loss: 1.032421350479126,grad_norm: 0.9493985136700877, iteration: 441871
loss: 1.0133079290390015,grad_norm: 0.8325808254641122, iteration: 441872
loss: 1.0013993978500366,grad_norm: 0.887579852257057, iteration: 441873
loss: 1.1690993309020996,grad_norm: 0.9999998906906784, iteration: 441874
loss: 1.0222349166870117,grad_norm: 0.8321742049030729, iteration: 441875
loss: 0.9741578102111816,grad_norm: 0.8077858524338586, iteration: 441876
loss: 0.959727942943573,grad_norm: 0.8528394390845825, iteration: 441877
loss: 1.0238280296325684,grad_norm: 0.7629952256566597, iteration: 441878
loss: 0.9957455992698669,grad_norm: 0.9321685663052803, iteration: 441879
loss: 0.9814603924751282,grad_norm: 0.8614871088324522, iteration: 441880
loss: 1.1594868898391724,grad_norm: 0.7451916788899335, iteration: 441881
loss: 1.0286521911621094,grad_norm: 0.9955813919577764, iteration: 441882
loss: 0.9925724267959595,grad_norm: 0.7258599551099998, iteration: 441883
loss: 0.9870260953903198,grad_norm: 0.8460972813035607, iteration: 441884
loss: 1.0124011039733887,grad_norm: 0.8998779440005031, iteration: 441885
loss: 0.9965923428535461,grad_norm: 0.8809346778503772, iteration: 441886
loss: 0.968455970287323,grad_norm: 0.7984888857952144, iteration: 441887
loss: 1.0444339513778687,grad_norm: 0.7935679615939258, iteration: 441888
loss: 1.0136996507644653,grad_norm: 0.7278101451061927, iteration: 441889
loss: 0.9884074330329895,grad_norm: 0.753099065685121, iteration: 441890
loss: 0.9992315769195557,grad_norm: 0.8214256092520887, iteration: 441891
loss: 0.9945082068443298,grad_norm: 0.8074064587958077, iteration: 441892
loss: 0.9802846312522888,grad_norm: 0.7871983526402256, iteration: 441893
loss: 0.9739214181900024,grad_norm: 0.7906382119017671, iteration: 441894
loss: 1.0423271656036377,grad_norm: 0.7791500840880767, iteration: 441895
loss: 0.9843708276748657,grad_norm: 0.7080385427291868, iteration: 441896
loss: 0.9946105480194092,grad_norm: 0.7633715820534337, iteration: 441897
loss: 0.9993135929107666,grad_norm: 0.7664621100087144, iteration: 441898
loss: 0.9924177527427673,grad_norm: 0.7534099196028391, iteration: 441899
loss: 1.0064246654510498,grad_norm: 0.7464774940551792, iteration: 441900
loss: 1.0189299583435059,grad_norm: 0.7828692371621543, iteration: 441901
loss: 1.0569871664047241,grad_norm: 0.9999993004285168, iteration: 441902
loss: 1.0399423837661743,grad_norm: 0.998812597998617, iteration: 441903
loss: 1.0689469575881958,grad_norm: 0.9999991126283189, iteration: 441904
loss: 0.9859355092048645,grad_norm: 0.8158786823837461, iteration: 441905
loss: 1.1514595746994019,grad_norm: 0.9999993836138755, iteration: 441906
loss: 0.9944528937339783,grad_norm: 0.7403161349739034, iteration: 441907
loss: 0.9835708737373352,grad_norm: 0.7202009338683524, iteration: 441908
loss: 0.9863893389701843,grad_norm: 0.8710194182517274, iteration: 441909
loss: 1.0317432880401611,grad_norm: 0.7232325112565998, iteration: 441910
loss: 1.096349835395813,grad_norm: 0.9782188673477727, iteration: 441911
loss: 0.9733778834342957,grad_norm: 0.7000554399483375, iteration: 441912
loss: 0.9897646903991699,grad_norm: 0.67924487442364, iteration: 441913
loss: 0.9641985297203064,grad_norm: 0.999999523510879, iteration: 441914
loss: 1.0000053644180298,grad_norm: 0.6850264842158811, iteration: 441915
loss: 1.0108768939971924,grad_norm: 0.8450683460156281, iteration: 441916
loss: 1.0997310876846313,grad_norm: 0.9046659037013772, iteration: 441917
loss: 1.0990750789642334,grad_norm: 0.9269762619940232, iteration: 441918
loss: 0.9944130778312683,grad_norm: 0.9999990866391707, iteration: 441919
loss: 0.9795712232589722,grad_norm: 0.9681885174158756, iteration: 441920
loss: 0.9924933314323425,grad_norm: 0.8504434608760116, iteration: 441921
loss: 1.0068639516830444,grad_norm: 0.7523000973645686, iteration: 441922
loss: 0.9560340046882629,grad_norm: 0.737256722173908, iteration: 441923
loss: 1.0011311769485474,grad_norm: 0.9999998655992788, iteration: 441924
loss: 1.007034182548523,grad_norm: 0.9999990933268943, iteration: 441925
loss: 1.068139910697937,grad_norm: 0.9999999605154404, iteration: 441926
loss: 0.9669224619865417,grad_norm: 0.6944473514101984, iteration: 441927
loss: 1.0211169719696045,grad_norm: 0.7231948304734555, iteration: 441928
loss: 1.0094748735427856,grad_norm: 0.9581193435357825, iteration: 441929
loss: 1.1397242546081543,grad_norm: 0.7342396372931341, iteration: 441930
loss: 0.995564877986908,grad_norm: 0.8963934533006598, iteration: 441931
loss: 1.03021240234375,grad_norm: 0.9999989903986375, iteration: 441932
loss: 0.9901519417762756,grad_norm: 0.7941561496928361, iteration: 441933
loss: 1.0121142864227295,grad_norm: 0.6695471450797298, iteration: 441934
loss: 0.9754707217216492,grad_norm: 0.9999991108919878, iteration: 441935
loss: 0.9959425330162048,grad_norm: 0.8041156930398509, iteration: 441936
loss: 1.0053086280822754,grad_norm: 0.8560829014611862, iteration: 441937
loss: 1.0080702304840088,grad_norm: 0.9999991482022319, iteration: 441938
loss: 1.0182077884674072,grad_norm: 0.778126070541171, iteration: 441939
loss: 1.0386055707931519,grad_norm: 0.7718232136532105, iteration: 441940
loss: 0.9957097172737122,grad_norm: 0.9999991750373777, iteration: 441941
loss: 1.0489369630813599,grad_norm: 0.774172017434985, iteration: 441942
loss: 1.0070358514785767,grad_norm: 1.0000000288264126, iteration: 441943
loss: 1.0363242626190186,grad_norm: 0.7716608220356083, iteration: 441944
loss: 1.0358772277832031,grad_norm: 0.8075152611687891, iteration: 441945
loss: 0.9870275259017944,grad_norm: 0.9433078778321048, iteration: 441946
loss: 1.0026017427444458,grad_norm: 0.7185951696208424, iteration: 441947
loss: 1.012740969657898,grad_norm: 0.7828950591740276, iteration: 441948
loss: 1.006438970565796,grad_norm: 0.8266083346670581, iteration: 441949
loss: 1.055686116218567,grad_norm: 0.9999992691950738, iteration: 441950
loss: 1.016774296760559,grad_norm: 0.7563963767738892, iteration: 441951
loss: 1.0517557859420776,grad_norm: 0.9999993012474804, iteration: 441952
loss: 1.0232590436935425,grad_norm: 0.9999997368580504, iteration: 441953
loss: 1.015217661857605,grad_norm: 0.6973692989467501, iteration: 441954
loss: 0.9642851948738098,grad_norm: 0.7368034028749946, iteration: 441955
loss: 1.0185432434082031,grad_norm: 0.7329909400762229, iteration: 441956
loss: 0.9885802865028381,grad_norm: 0.9999990833495228, iteration: 441957
loss: 1.0020123720169067,grad_norm: 0.8368815864766403, iteration: 441958
loss: 0.9862421751022339,grad_norm: 0.7682660596244748, iteration: 441959
loss: 0.9966176748275757,grad_norm: 0.9213114836094957, iteration: 441960
loss: 0.953255832195282,grad_norm: 0.8467240837503451, iteration: 441961
loss: 1.0089143514633179,grad_norm: 0.7236268755832914, iteration: 441962
loss: 0.9574505686759949,grad_norm: 0.7225767495089078, iteration: 441963
loss: 0.9668508768081665,grad_norm: 0.8739241616094998, iteration: 441964
loss: 1.020074725151062,grad_norm: 0.7854215161230188, iteration: 441965
loss: 0.973536491394043,grad_norm: 0.7974201639929979, iteration: 441966
loss: 0.9752684831619263,grad_norm: 0.8034314427211666, iteration: 441967
loss: 1.0545705556869507,grad_norm: 0.9999993742970277, iteration: 441968
loss: 1.054508090019226,grad_norm: 0.820647479706708, iteration: 441969
loss: 1.02569580078125,grad_norm: 0.7387845540068118, iteration: 441970
loss: 1.0611265897750854,grad_norm: 0.8254351642123001, iteration: 441971
loss: 0.9376360774040222,grad_norm: 0.9999992682940999, iteration: 441972
loss: 0.9908095002174377,grad_norm: 0.7521796133045283, iteration: 441973
loss: 1.001503586769104,grad_norm: 0.8279434151359741, iteration: 441974
loss: 0.996614933013916,grad_norm: 0.8843310293894768, iteration: 441975
loss: 1.006329894065857,grad_norm: 0.8172111934343934, iteration: 441976
loss: 0.9848140478134155,grad_norm: 0.9616309224694111, iteration: 441977
loss: 0.9935541749000549,grad_norm: 0.8272085040233115, iteration: 441978
loss: 0.9706697463989258,grad_norm: 0.7070306191540834, iteration: 441979
loss: 1.0058612823486328,grad_norm: 0.9921124639522891, iteration: 441980
loss: 0.9735389351844788,grad_norm: 0.6428711810647333, iteration: 441981
loss: 0.9846085906028748,grad_norm: 0.6802337320549082, iteration: 441982
loss: 0.9877455830574036,grad_norm: 0.7577674005958132, iteration: 441983
loss: 0.9780823588371277,grad_norm: 0.7890868544089394, iteration: 441984
loss: 1.025528907775879,grad_norm: 0.8787728696974869, iteration: 441985
loss: 1.0503393411636353,grad_norm: 0.7809405117242699, iteration: 441986
loss: 1.180894136428833,grad_norm: 0.9999991045544019, iteration: 441987
loss: 1.0667237043380737,grad_norm: 0.9999994415382545, iteration: 441988
loss: 0.9943042993545532,grad_norm: 0.9999995235470995, iteration: 441989
loss: 1.0014339685440063,grad_norm: 0.5838643563132363, iteration: 441990
loss: 1.086081862449646,grad_norm: 0.8427400911982381, iteration: 441991
loss: 1.014718770980835,grad_norm: 0.7075550450886419, iteration: 441992
loss: 1.006211757659912,grad_norm: 0.8138021791286747, iteration: 441993
loss: 1.0707544088363647,grad_norm: 0.9999994699941538, iteration: 441994
loss: 0.9736013412475586,grad_norm: 0.7522345213519004, iteration: 441995
loss: 1.0168811082839966,grad_norm: 0.7789964268780357, iteration: 441996
loss: 0.9715172052383423,grad_norm: 0.6833578765817212, iteration: 441997
loss: 1.0227469205856323,grad_norm: 0.6489361547393196, iteration: 441998
loss: 1.0077837705612183,grad_norm: 0.6680813995810366, iteration: 441999
loss: 0.9664905071258545,grad_norm: 0.8088603243536969, iteration: 442000
loss: 0.9840161204338074,grad_norm: 0.797400169092225, iteration: 442001
loss: 1.0002872943878174,grad_norm: 0.7284655203435664, iteration: 442002
loss: 1.0719479322433472,grad_norm: 0.9999995137539434, iteration: 442003
loss: 0.9904863834381104,grad_norm: 0.6422642850203172, iteration: 442004
loss: 1.0979434251785278,grad_norm: 0.7722314113000543, iteration: 442005
loss: 1.000327706336975,grad_norm: 0.8720891583870042, iteration: 442006
loss: 0.9440909624099731,grad_norm: 0.6949235451813819, iteration: 442007
loss: 1.0084569454193115,grad_norm: 0.8032912459691793, iteration: 442008
loss: 1.011372447013855,grad_norm: 0.7438924709098614, iteration: 442009
loss: 0.9797890782356262,grad_norm: 0.5842562213546654, iteration: 442010
loss: 0.9975785613059998,grad_norm: 0.7588570020496542, iteration: 442011
loss: 1.0057810544967651,grad_norm: 0.930138141102906, iteration: 442012
loss: 0.9849750995635986,grad_norm: 0.8600346270906508, iteration: 442013
loss: 1.0164480209350586,grad_norm: 0.8354549422537205, iteration: 442014
loss: 1.0002093315124512,grad_norm: 0.9999993455815585, iteration: 442015
loss: 1.0081218481063843,grad_norm: 0.9247439725884686, iteration: 442016
loss: 0.977649986743927,grad_norm: 0.8097520994489065, iteration: 442017
loss: 1.0121595859527588,grad_norm: 0.8587110130631728, iteration: 442018
loss: 1.0037184953689575,grad_norm: 0.7798740818499376, iteration: 442019
loss: 1.0031754970550537,grad_norm: 0.8285017769842073, iteration: 442020
loss: 0.9757279753684998,grad_norm: 0.6910621314948446, iteration: 442021
loss: 0.979520857334137,grad_norm: 0.7440217224147442, iteration: 442022
loss: 1.0148179531097412,grad_norm: 0.8017009996921411, iteration: 442023
loss: 1.0065516233444214,grad_norm: 0.9999993473191853, iteration: 442024
loss: 1.0562138557434082,grad_norm: 0.7177407129538692, iteration: 442025
loss: 0.9934861063957214,grad_norm: 0.7167183053872904, iteration: 442026
loss: 0.9853720664978027,grad_norm: 0.7670861761547138, iteration: 442027
loss: 1.0097932815551758,grad_norm: 0.8163647905250878, iteration: 442028
loss: 0.9932969808578491,grad_norm: 0.9999990064278679, iteration: 442029
loss: 1.0412538051605225,grad_norm: 0.7118972711309619, iteration: 442030
loss: 0.9812713265419006,grad_norm: 0.7772314738429967, iteration: 442031
loss: 1.1803719997406006,grad_norm: 0.9999993753523672, iteration: 442032
loss: 1.056978702545166,grad_norm: 0.9999996466737341, iteration: 442033
loss: 0.9761278033256531,grad_norm: 0.7436085197027892, iteration: 442034
loss: 0.9882153272628784,grad_norm: 0.7437958025047341, iteration: 442035
loss: 0.9948386549949646,grad_norm: 0.7583768304922882, iteration: 442036
loss: 1.0000935792922974,grad_norm: 0.759137834030262, iteration: 442037
loss: 0.9695866703987122,grad_norm: 0.7569019194259576, iteration: 442038
loss: 1.0025027990341187,grad_norm: 0.7602753346267824, iteration: 442039
loss: 0.960006058216095,grad_norm: 0.8561998052763776, iteration: 442040
loss: 0.996926486492157,grad_norm: 0.7310122436655175, iteration: 442041
loss: 1.0127851963043213,grad_norm: 0.999999129506907, iteration: 442042
loss: 0.9909444451332092,grad_norm: 0.9999999685397613, iteration: 442043
loss: 1.0016108751296997,grad_norm: 0.9195868210771453, iteration: 442044
loss: 1.0145930051803589,grad_norm: 0.9999996029545214, iteration: 442045
loss: 1.0532492399215698,grad_norm: 0.999998955089116, iteration: 442046
loss: 1.019391417503357,grad_norm: 0.6956451928242509, iteration: 442047
loss: 0.9906389713287354,grad_norm: 0.6289636227516979, iteration: 442048
loss: 0.9898564219474792,grad_norm: 0.767123445968604, iteration: 442049
loss: 0.9758475422859192,grad_norm: 0.8212423849540883, iteration: 442050
loss: 0.9840338826179504,grad_norm: 0.7226394460828574, iteration: 442051
loss: 0.9959782361984253,grad_norm: 0.719528172126078, iteration: 442052
loss: 1.0014718770980835,grad_norm: 0.6666086715812517, iteration: 442053
loss: 0.9833752512931824,grad_norm: 0.7589238969590992, iteration: 442054
loss: 0.9923452138900757,grad_norm: 0.763747277262808, iteration: 442055
loss: 0.9937683343887329,grad_norm: 0.6676174121261907, iteration: 442056
loss: 0.9933197498321533,grad_norm: 0.7891543361637795, iteration: 442057
loss: 1.0544356107711792,grad_norm: 0.9999994900138268, iteration: 442058
loss: 1.007953405380249,grad_norm: 0.8033394009698699, iteration: 442059
loss: 0.9987027049064636,grad_norm: 0.7316341663089256, iteration: 442060
loss: 1.0040392875671387,grad_norm: 0.6929441239445209, iteration: 442061
loss: 0.9993118643760681,grad_norm: 0.7989866006740528, iteration: 442062
loss: 0.9930534958839417,grad_norm: 0.6859335338045991, iteration: 442063
loss: 1.008605718612671,grad_norm: 0.7938286082434882, iteration: 442064
loss: 0.9685216546058655,grad_norm: 0.9298766468198604, iteration: 442065
loss: 1.0082106590270996,grad_norm: 0.6946341852496297, iteration: 442066
loss: 1.0005601644515991,grad_norm: 0.6867273167651927, iteration: 442067
loss: 1.016208291053772,grad_norm: 0.8629625300434817, iteration: 442068
loss: 1.0069575309753418,grad_norm: 0.7692984714013241, iteration: 442069
loss: 1.0003318786621094,grad_norm: 0.6829510851852989, iteration: 442070
loss: 1.0079758167266846,grad_norm: 0.8911538623490112, iteration: 442071
loss: 1.0169405937194824,grad_norm: 0.736438673961315, iteration: 442072
loss: 1.0091862678527832,grad_norm: 0.8902880831946148, iteration: 442073
loss: 1.0022597312927246,grad_norm: 0.8571233123406702, iteration: 442074
loss: 1.0501115322113037,grad_norm: 0.8713539155847562, iteration: 442075
loss: 0.9962239265441895,grad_norm: 0.7919387932644166, iteration: 442076
loss: 1.0036308765411377,grad_norm: 0.7829169801190979, iteration: 442077
loss: 1.025618076324463,grad_norm: 0.814394205521479, iteration: 442078
loss: 0.98946213722229,grad_norm: 0.9999991130939565, iteration: 442079
loss: 0.9965987801551819,grad_norm: 0.8956598382830545, iteration: 442080
loss: 1.0119776725769043,grad_norm: 0.7750261065594409, iteration: 442081
loss: 0.969209611415863,grad_norm: 0.7274256398400119, iteration: 442082
loss: 1.0161460638046265,grad_norm: 0.8865827601927532, iteration: 442083
loss: 0.9895479083061218,grad_norm: 0.7824978147705561, iteration: 442084
loss: 1.0179543495178223,grad_norm: 0.7414483068930815, iteration: 442085
loss: 0.9786960482597351,grad_norm: 0.7189087901581809, iteration: 442086
loss: 1.0167235136032104,grad_norm: 0.8399055992172683, iteration: 442087
loss: 1.0012778043746948,grad_norm: 0.8369438227562305, iteration: 442088
loss: 1.0673547983169556,grad_norm: 0.9999999703779084, iteration: 442089
loss: 1.0017881393432617,grad_norm: 0.7783389011974073, iteration: 442090
loss: 1.029174566268921,grad_norm: 0.9999999385989958, iteration: 442091
loss: 0.9906952381134033,grad_norm: 0.8729573737502256, iteration: 442092
loss: 0.9455853700637817,grad_norm: 0.8954199228731672, iteration: 442093
loss: 0.9998530745506287,grad_norm: 0.7028361271584807, iteration: 442094
loss: 0.9949168562889099,grad_norm: 0.7682059533259255, iteration: 442095
loss: 1.0281070470809937,grad_norm: 0.6947029121180466, iteration: 442096
loss: 0.9813852310180664,grad_norm: 0.9207279983870666, iteration: 442097
loss: 1.0494797229766846,grad_norm: 0.9821697676293544, iteration: 442098
loss: 1.0175588130950928,grad_norm: 0.7614515640658992, iteration: 442099
loss: 0.9561635255813599,grad_norm: 0.7192742145589264, iteration: 442100
loss: 1.022922158241272,grad_norm: 0.8285766776662306, iteration: 442101
loss: 1.003065824508667,grad_norm: 0.654005605121914, iteration: 442102
loss: 0.9922748804092407,grad_norm: 0.700001353460729, iteration: 442103
loss: 1.067963719367981,grad_norm: 0.9999994357560537, iteration: 442104
loss: 0.983205258846283,grad_norm: 0.8030199398131207, iteration: 442105
loss: 1.1086959838867188,grad_norm: 0.7213132053835702, iteration: 442106
loss: 1.0014957189559937,grad_norm: 0.9999998571771676, iteration: 442107
loss: 1.0028154850006104,grad_norm: 0.9471615722200553, iteration: 442108
loss: 1.010194182395935,grad_norm: 1.0000000519335264, iteration: 442109
loss: 1.0354653596878052,grad_norm: 0.8082860011860314, iteration: 442110
loss: 1.012247920036316,grad_norm: 0.8032002045695184, iteration: 442111
loss: 0.9984942078590393,grad_norm: 0.8768277204025566, iteration: 442112
loss: 1.0103355646133423,grad_norm: 0.8067147129666413, iteration: 442113
loss: 1.0029479265213013,grad_norm: 0.9999992123921181, iteration: 442114
loss: 0.9848562479019165,grad_norm: 0.7989138963871942, iteration: 442115
loss: 1.005792260169983,grad_norm: 0.6878891680375301, iteration: 442116
loss: 1.0159763097763062,grad_norm: 0.7280422821063447, iteration: 442117
loss: 0.9925824999809265,grad_norm: 0.8777902224781728, iteration: 442118
loss: 1.0009843111038208,grad_norm: 0.783740458074552, iteration: 442119
loss: 0.9934629797935486,grad_norm: 0.7027775676141476, iteration: 442120
loss: 1.001535177230835,grad_norm: 0.6990721927578533, iteration: 442121
loss: 0.9843912124633789,grad_norm: 0.6929896419803415, iteration: 442122
loss: 1.0345518589019775,grad_norm: 0.9999994046635778, iteration: 442123
loss: 0.9879835844039917,grad_norm: 0.8128143212658415, iteration: 442124
loss: 1.0259298086166382,grad_norm: 0.9999995800237919, iteration: 442125
loss: 1.0189698934555054,grad_norm: 0.7923412144349486, iteration: 442126
loss: 0.9879516959190369,grad_norm: 0.7075323233573728, iteration: 442127
loss: 1.03042733669281,grad_norm: 0.8451786079572938, iteration: 442128
loss: 0.9717966318130493,grad_norm: 0.9999992058820169, iteration: 442129
loss: 0.9851210117340088,grad_norm: 0.9253690561250512, iteration: 442130
loss: 0.986234188079834,grad_norm: 0.7568635847091897, iteration: 442131
loss: 0.956080436706543,grad_norm: 0.6591623955019318, iteration: 442132
loss: 1.0018508434295654,grad_norm: 0.8078302129323366, iteration: 442133
loss: 1.0151888132095337,grad_norm: 0.8117835635952712, iteration: 442134
loss: 0.9800029397010803,grad_norm: 0.7905300022769244, iteration: 442135
loss: 1.0029293298721313,grad_norm: 0.9999989651100788, iteration: 442136
loss: 0.9772480726242065,grad_norm: 0.8997277737869959, iteration: 442137
loss: 1.0344200134277344,grad_norm: 0.9999994581926682, iteration: 442138
loss: 1.0002493858337402,grad_norm: 0.7199952122945329, iteration: 442139
loss: 1.0216261148452759,grad_norm: 0.7988083891478358, iteration: 442140
loss: 1.0223842859268188,grad_norm: 0.8434769100351305, iteration: 442141
loss: 1.0479323863983154,grad_norm: 0.953531135923084, iteration: 442142
loss: 0.9790776968002319,grad_norm: 0.6889167842675419, iteration: 442143
loss: 0.9641485214233398,grad_norm: 0.712944708529906, iteration: 442144
loss: 0.9858724474906921,grad_norm: 0.7935102606221162, iteration: 442145
loss: 1.0049582719802856,grad_norm: 0.8329256777048729, iteration: 442146
loss: 1.0060575008392334,grad_norm: 0.7107143683021644, iteration: 442147
loss: 1.007561445236206,grad_norm: 0.7891718054681038, iteration: 442148
loss: 1.0265140533447266,grad_norm: 0.8245424964703232, iteration: 442149
loss: 1.0020040273666382,grad_norm: 0.7995393711692694, iteration: 442150
loss: 0.9996271133422852,grad_norm: 0.8361586610242482, iteration: 442151
loss: 1.0099549293518066,grad_norm: 0.9184478180322675, iteration: 442152
loss: 0.9866496920585632,grad_norm: 0.780291532371297, iteration: 442153
loss: 0.9987176656723022,grad_norm: 0.6920688400285949, iteration: 442154
loss: 1.0465514659881592,grad_norm: 0.9999997081553768, iteration: 442155
loss: 0.9893174171447754,grad_norm: 0.772676022211263, iteration: 442156
loss: 1.0154552459716797,grad_norm: 0.8441824825662961, iteration: 442157
loss: 0.9994065165519714,grad_norm: 0.8274186102273299, iteration: 442158
loss: 1.016350269317627,grad_norm: 0.7897358473923725, iteration: 442159
loss: 1.1114493608474731,grad_norm: 0.9999989781136814, iteration: 442160
loss: 0.997867226600647,grad_norm: 0.7258730802718723, iteration: 442161
loss: 0.9785292744636536,grad_norm: 0.8290923247594874, iteration: 442162
loss: 0.9939789175987244,grad_norm: 0.6286760481370252, iteration: 442163
loss: 0.9911406636238098,grad_norm: 1.0000000853731918, iteration: 442164
loss: 1.0170323848724365,grad_norm: 0.8113010078531452, iteration: 442165
loss: 1.007555365562439,grad_norm: 0.7851674105808308, iteration: 442166
loss: 0.9962432384490967,grad_norm: 0.7598622597068789, iteration: 442167
loss: 0.9743221402168274,grad_norm: 0.8078976576582789, iteration: 442168
loss: 1.0397289991378784,grad_norm: 0.8341030932212321, iteration: 442169
loss: 1.009295105934143,grad_norm: 0.733833067783973, iteration: 442170
loss: 0.986758828163147,grad_norm: 0.9999992722550695, iteration: 442171
loss: 1.053481101989746,grad_norm: 0.9064565368366224, iteration: 442172
loss: 0.9982548356056213,grad_norm: 0.793282979980452, iteration: 442173
loss: 1.0092384815216064,grad_norm: 0.702823584216005, iteration: 442174
loss: 1.0430067777633667,grad_norm: 0.8311441547732759, iteration: 442175
loss: 0.9778459668159485,grad_norm: 0.7639162822145755, iteration: 442176
loss: 1.0037109851837158,grad_norm: 0.9031514709412077, iteration: 442177
loss: 0.9562668800354004,grad_norm: 0.8107621143970896, iteration: 442178
loss: 1.008003830909729,grad_norm: 0.9999995571374499, iteration: 442179
loss: 1.036401391029358,grad_norm: 0.7589861608045482, iteration: 442180
loss: 1.0714025497436523,grad_norm: 0.9999989999198863, iteration: 442181
loss: 1.009433627128601,grad_norm: 0.7196907304637999, iteration: 442182
loss: 0.9887409806251526,grad_norm: 0.8276556332375145, iteration: 442183
loss: 0.9935369491577148,grad_norm: 0.6581591644096381, iteration: 442184
loss: 1.1288340091705322,grad_norm: 0.9903342902756631, iteration: 442185
loss: 0.9536387324333191,grad_norm: 0.7009137637870256, iteration: 442186
loss: 1.0253633260726929,grad_norm: 0.9165466976765808, iteration: 442187
loss: 0.9979288578033447,grad_norm: 0.999999102133215, iteration: 442188
loss: 1.1654809713363647,grad_norm: 0.9999997329181339, iteration: 442189
loss: 1.0683096647262573,grad_norm: 0.9999991647409733, iteration: 442190
loss: 0.9867988228797913,grad_norm: 0.7556294180190539, iteration: 442191
loss: 1.0094058513641357,grad_norm: 0.8153951687047047, iteration: 442192
loss: 1.0373038053512573,grad_norm: 0.9825084056605033, iteration: 442193
loss: 0.9955899715423584,grad_norm: 0.9218233892952653, iteration: 442194
loss: 1.028879165649414,grad_norm: 0.9999993704876282, iteration: 442195
loss: 1.0962460041046143,grad_norm: 0.9999998907465397, iteration: 442196
loss: 0.9812082052230835,grad_norm: 0.8165074937836153, iteration: 442197
loss: 0.974503755569458,grad_norm: 0.8396639559155961, iteration: 442198
loss: 0.9709774255752563,grad_norm: 0.8395882166298528, iteration: 442199
loss: 1.0252348184585571,grad_norm: 0.6809489416900275, iteration: 442200
loss: 0.987707257270813,grad_norm: 0.7347837943770342, iteration: 442201
loss: 0.9809215664863586,grad_norm: 0.8361307136956226, iteration: 442202
loss: 0.9874978065490723,grad_norm: 0.8568610173981958, iteration: 442203
loss: 1.025879979133606,grad_norm: 0.8305433155422562, iteration: 442204
loss: 0.9607000350952148,grad_norm: 0.7633752162190358, iteration: 442205
loss: 1.0131219625473022,grad_norm: 0.7494152290348663, iteration: 442206
loss: 1.0083346366882324,grad_norm: 0.7552570289986144, iteration: 442207
loss: 1.027374029159546,grad_norm: 0.9662711935382977, iteration: 442208
loss: 0.9801640510559082,grad_norm: 0.6896882653701002, iteration: 442209
loss: 0.981848955154419,grad_norm: 0.915154088284249, iteration: 442210
loss: 1.0149431228637695,grad_norm: 0.867091511348078, iteration: 442211
loss: 1.1982877254486084,grad_norm: 0.999999959609897, iteration: 442212
loss: 1.0727660655975342,grad_norm: 0.9999990933259706, iteration: 442213
loss: 1.1191227436065674,grad_norm: 0.7688334058009768, iteration: 442214
loss: 1.0120799541473389,grad_norm: 0.8927600815454116, iteration: 442215
loss: 0.9836221933364868,grad_norm: 0.7786931324626747, iteration: 442216
loss: 1.0386000871658325,grad_norm: 0.8384753504331198, iteration: 442217
loss: 1.0265840291976929,grad_norm: 0.851702513994796, iteration: 442218
loss: 1.0267302989959717,grad_norm: 0.9999995968434711, iteration: 442219
loss: 1.0366215705871582,grad_norm: 0.741839628335641, iteration: 442220
loss: 0.9557670950889587,grad_norm: 0.7344418541039118, iteration: 442221
loss: 0.945724606513977,grad_norm: 0.8167220838779357, iteration: 442222
loss: 0.963823676109314,grad_norm: 0.744002484668216, iteration: 442223
loss: 0.9766685366630554,grad_norm: 0.7518323135797842, iteration: 442224
loss: 0.9948428869247437,grad_norm: 0.7395453649226313, iteration: 442225
loss: 1.0072834491729736,grad_norm: 0.8896889023837055, iteration: 442226
loss: 1.0254625082015991,grad_norm: 0.9778923477937875, iteration: 442227
loss: 1.1441071033477783,grad_norm: 0.9797491622449654, iteration: 442228
loss: 0.9575168490409851,grad_norm: 0.8951451871874012, iteration: 442229
loss: 1.0024980306625366,grad_norm: 0.8226304619197565, iteration: 442230
loss: 0.9980131983757019,grad_norm: 0.8459543534460408, iteration: 442231
loss: 1.0739593505859375,grad_norm: 0.9547894905993943, iteration: 442232
loss: 1.1657592058181763,grad_norm: 0.9999999048661249, iteration: 442233
loss: 0.9622160196304321,grad_norm: 0.7584772585372823, iteration: 442234
loss: 1.0836421251296997,grad_norm: 0.9999994008998772, iteration: 442235
loss: 1.0288785696029663,grad_norm: 0.9999994756365698, iteration: 442236
loss: 1.0028982162475586,grad_norm: 0.9999992110025424, iteration: 442237
loss: 0.9995204210281372,grad_norm: 0.7839260976378399, iteration: 442238
loss: 1.0874375104904175,grad_norm: 0.7101310045199156, iteration: 442239
loss: 1.0090434551239014,grad_norm: 0.9999991849741935, iteration: 442240
loss: 0.9787362217903137,grad_norm: 0.8095899638896272, iteration: 442241
loss: 1.2353498935699463,grad_norm: 0.9999998441652637, iteration: 442242
loss: 1.0043444633483887,grad_norm: 0.7094875282701362, iteration: 442243
loss: 1.0232322216033936,grad_norm: 0.673269317111393, iteration: 442244
loss: 1.006101369857788,grad_norm: 0.7876356932085515, iteration: 442245
loss: 1.057251214981079,grad_norm: 0.7529759192957373, iteration: 442246
loss: 1.0006144046783447,grad_norm: 0.7364565469138402, iteration: 442247
loss: 1.0131678581237793,grad_norm: 0.9401527893091962, iteration: 442248
loss: 1.0024676322937012,grad_norm: 0.6708731893326271, iteration: 442249
loss: 1.0172786712646484,grad_norm: 0.7193104947494608, iteration: 442250
loss: 1.021999716758728,grad_norm: 0.9442944458350239, iteration: 442251
loss: 0.9915171265602112,grad_norm: 0.9207306655288114, iteration: 442252
loss: 1.0388814210891724,grad_norm: 0.6950480042420792, iteration: 442253
loss: 1.0248163938522339,grad_norm: 0.9999994337689747, iteration: 442254
loss: 1.117777943611145,grad_norm: 0.8814909266805462, iteration: 442255
loss: 0.9810834527015686,grad_norm: 0.9999991602569552, iteration: 442256
loss: 1.1652745008468628,grad_norm: 0.99999914895571, iteration: 442257
loss: 1.0922894477844238,grad_norm: 0.9999992092569022, iteration: 442258
loss: 1.1281177997589111,grad_norm: 0.9999991171220204, iteration: 442259
loss: 1.067899465560913,grad_norm: 0.7941649683811782, iteration: 442260
loss: 1.0112507343292236,grad_norm: 0.760576621315327, iteration: 442261
loss: 0.9935516715049744,grad_norm: 0.7141008203331418, iteration: 442262
loss: 1.0756423473358154,grad_norm: 0.9999997561816122, iteration: 442263
loss: 1.0582921504974365,grad_norm: 0.8722464295418265, iteration: 442264
loss: 1.0485795736312866,grad_norm: 0.8803866507784365, iteration: 442265
loss: 1.008641242980957,grad_norm: 0.9037332106428957, iteration: 442266
loss: 1.0395492315292358,grad_norm: 0.7599832401734313, iteration: 442267
loss: 1.0335227251052856,grad_norm: 0.7457246880729055, iteration: 442268
loss: 1.0078468322753906,grad_norm: 0.7770391509508798, iteration: 442269
loss: 1.0631141662597656,grad_norm: 0.781469900866598, iteration: 442270
loss: 1.0814582109451294,grad_norm: 0.7994680592442669, iteration: 442271
loss: 0.99737548828125,grad_norm: 0.8033395716610291, iteration: 442272
loss: 1.013800859451294,grad_norm: 0.7873837046700578, iteration: 442273
loss: 0.9876514077186584,grad_norm: 0.669582508857802, iteration: 442274
loss: 0.9713366031646729,grad_norm: 0.6933063095431486, iteration: 442275
loss: 1.0393333435058594,grad_norm: 0.6514393164633039, iteration: 442276
loss: 0.9568861722946167,grad_norm: 0.8323915624436251, iteration: 442277
loss: 0.9891050457954407,grad_norm: 0.8679595103780051, iteration: 442278
loss: 1.0247743129730225,grad_norm: 0.9999992278549961, iteration: 442279
loss: 0.9882966876029968,grad_norm: 0.9999991473099527, iteration: 442280
loss: 0.9978683590888977,grad_norm: 0.73207668485572, iteration: 442281
loss: 1.0303019285202026,grad_norm: 0.8617849057572577, iteration: 442282
loss: 1.0008440017700195,grad_norm: 0.9059058068307957, iteration: 442283
loss: 1.0017948150634766,grad_norm: 0.9999994366302086, iteration: 442284
loss: 1.001531958580017,grad_norm: 0.8163844908397565, iteration: 442285
loss: 1.0284559726715088,grad_norm: 0.9999997123730052, iteration: 442286
loss: 0.9903560876846313,grad_norm: 0.7745912344138998, iteration: 442287
loss: 1.1368818283081055,grad_norm: 0.9999995637417873, iteration: 442288
loss: 1.0409976243972778,grad_norm: 0.9999991552883155, iteration: 442289
loss: 0.9959001541137695,grad_norm: 0.9116775900927766, iteration: 442290
loss: 0.9751976728439331,grad_norm: 0.7375917720199954, iteration: 442291
loss: 0.9857089519500732,grad_norm: 0.8701968736539015, iteration: 442292
loss: 0.991323709487915,grad_norm: 0.8800899309307768, iteration: 442293
loss: 1.0314087867736816,grad_norm: 0.7677184923317184, iteration: 442294
loss: 1.0138757228851318,grad_norm: 0.8157555034894242, iteration: 442295
loss: 1.0144522190093994,grad_norm: 0.9999990232568357, iteration: 442296
loss: 0.9840877056121826,grad_norm: 0.8064339134557735, iteration: 442297
loss: 0.9532278180122375,grad_norm: 0.7955645322828111, iteration: 442298
loss: 0.9850814342498779,grad_norm: 0.885488603623028, iteration: 442299
loss: 1.0040010213851929,grad_norm: 0.7396632776798793, iteration: 442300
loss: 1.023589849472046,grad_norm: 0.999999417013541, iteration: 442301
loss: 0.9748687744140625,grad_norm: 0.9621988381591318, iteration: 442302
loss: 1.0125268697738647,grad_norm: 0.8086325817192609, iteration: 442303
loss: 1.0105489492416382,grad_norm: 0.7840553929368405, iteration: 442304
loss: 1.064296007156372,grad_norm: 0.7454085134332953, iteration: 442305
loss: 1.0297919511795044,grad_norm: 0.7073834760597851, iteration: 442306
loss: 1.0350120067596436,grad_norm: 0.779645728401976, iteration: 442307
loss: 0.9716886878013611,grad_norm: 0.5975450845658741, iteration: 442308
loss: 1.1600924730300903,grad_norm: 0.9759065425796172, iteration: 442309
loss: 1.0788908004760742,grad_norm: 0.8990561820462071, iteration: 442310
loss: 0.9476584196090698,grad_norm: 0.7790804337518835, iteration: 442311
loss: 0.9750799536705017,grad_norm: 0.6525828742502681, iteration: 442312
loss: 1.0197350978851318,grad_norm: 0.7461631567081486, iteration: 442313
loss: 1.0854262113571167,grad_norm: 0.9205158481133254, iteration: 442314
loss: 0.9659079313278198,grad_norm: 0.7149553447492323, iteration: 442315
loss: 1.0488568544387817,grad_norm: 0.8088870083862477, iteration: 442316
loss: 1.240006446838379,grad_norm: 0.9187861664997908, iteration: 442317
loss: 1.0486775636672974,grad_norm: 0.7614837171138739, iteration: 442318
loss: 0.9891175031661987,grad_norm: 0.8077323753326011, iteration: 442319
loss: 1.0841128826141357,grad_norm: 0.9999999776054227, iteration: 442320
loss: 0.9896355271339417,grad_norm: 0.8711489389904216, iteration: 442321
loss: 1.0410847663879395,grad_norm: 0.9999991960945926, iteration: 442322
loss: 1.0018339157104492,grad_norm: 0.9999990749678707, iteration: 442323
loss: 1.054017186164856,grad_norm: 0.8377824262973746, iteration: 442324
loss: 1.024889588356018,grad_norm: 0.7651939742377091, iteration: 442325
loss: 1.0102657079696655,grad_norm: 0.8310360809377473, iteration: 442326
loss: 0.9676858186721802,grad_norm: 0.8746706118210501, iteration: 442327
loss: 0.9951237440109253,grad_norm: 0.696062682428969, iteration: 442328
loss: 1.0386021137237549,grad_norm: 0.7670448835048842, iteration: 442329
loss: 1.0704742670059204,grad_norm: 0.8650974252572481, iteration: 442330
loss: 0.994718611240387,grad_norm: 0.6560488419093743, iteration: 442331
loss: 1.0792920589447021,grad_norm: 0.9564087108733096, iteration: 442332
loss: 1.0023080110549927,grad_norm: 0.7165676474006211, iteration: 442333
loss: 1.0169577598571777,grad_norm: 0.856030152056643, iteration: 442334
loss: 1.1126482486724854,grad_norm: 0.9034613500552023, iteration: 442335
loss: 0.9647839069366455,grad_norm: 0.9760585212375716, iteration: 442336
loss: 1.0136510133743286,grad_norm: 0.8973248882158147, iteration: 442337
loss: 0.982947051525116,grad_norm: 0.7069330048234146, iteration: 442338
loss: 1.0197733640670776,grad_norm: 0.8278441979822125, iteration: 442339
loss: 0.9583470821380615,grad_norm: 0.7294118586894186, iteration: 442340
loss: 0.9739278554916382,grad_norm: 0.7226450454301394, iteration: 442341
loss: 1.0121463537216187,grad_norm: 0.8115219820675236, iteration: 442342
loss: 0.9910773634910583,grad_norm: 0.8222803282016514, iteration: 442343
loss: 0.966422438621521,grad_norm: 0.9999992841582013, iteration: 442344
loss: 0.9916877150535583,grad_norm: 0.8837040982865867, iteration: 442345
loss: 1.0209263563156128,grad_norm: 0.8289957554453459, iteration: 442346
loss: 1.0090513229370117,grad_norm: 0.8119780816421595, iteration: 442347
loss: 1.0106801986694336,grad_norm: 0.9999991821967636, iteration: 442348
loss: 1.0498578548431396,grad_norm: 0.99999907426013, iteration: 442349
loss: 1.0028043985366821,grad_norm: 0.7096347205388222, iteration: 442350
loss: 1.0122475624084473,grad_norm: 0.99999908178528, iteration: 442351
loss: 1.0020817518234253,grad_norm: 0.7682564766645089, iteration: 442352
loss: 0.9750702381134033,grad_norm: 0.7439044932070126, iteration: 442353
loss: 1.028533697128296,grad_norm: 0.7441829434789755, iteration: 442354
loss: 1.017500400543213,grad_norm: 0.7001480509395079, iteration: 442355
loss: 1.0035200119018555,grad_norm: 0.713941391707941, iteration: 442356
loss: 1.0135903358459473,grad_norm: 0.8047089005284154, iteration: 442357
loss: 1.0105501413345337,grad_norm: 0.9999998976568548, iteration: 442358
loss: 0.9664530754089355,grad_norm: 0.8121543550535895, iteration: 442359
loss: 1.0209033489227295,grad_norm: 0.8348839908130014, iteration: 442360
loss: 1.0411794185638428,grad_norm: 0.77196909967687, iteration: 442361
loss: 1.0317579507827759,grad_norm: 0.6941898221212348, iteration: 442362
loss: 1.0897815227508545,grad_norm: 0.999999019772495, iteration: 442363
loss: 1.0280592441558838,grad_norm: 0.7714189679960987, iteration: 442364
loss: 0.9941906332969666,grad_norm: 0.7567502824215115, iteration: 442365
loss: 0.9856988787651062,grad_norm: 0.8170402570282517, iteration: 442366
loss: 0.9877987504005432,grad_norm: 0.826982708821518, iteration: 442367
loss: 0.9977537393569946,grad_norm: 0.9999999075445672, iteration: 442368
loss: 0.999640166759491,grad_norm: 0.8723127907290906, iteration: 442369
loss: 1.081898808479309,grad_norm: 0.8461103735640684, iteration: 442370
loss: 1.0593339204788208,grad_norm: 0.7167650522738283, iteration: 442371
loss: 0.9774990677833557,grad_norm: 0.7416141798593768, iteration: 442372
loss: 0.9885631203651428,grad_norm: 0.7180155371316194, iteration: 442373
loss: 1.0142160654067993,grad_norm: 0.8929465739812984, iteration: 442374
loss: 1.001715898513794,grad_norm: 0.7618341266991967, iteration: 442375
loss: 0.956051766872406,grad_norm: 0.7555297079705791, iteration: 442376
loss: 1.011474847793579,grad_norm: 0.9999991434691256, iteration: 442377
loss: 0.982098400592804,grad_norm: 0.6707001291146438, iteration: 442378
loss: 0.9986336827278137,grad_norm: 0.6680642132722368, iteration: 442379
loss: 1.0178282260894775,grad_norm: 0.7610138142772851, iteration: 442380
loss: 1.011274814605713,grad_norm: 0.7247038344155602, iteration: 442381
loss: 0.972683846950531,grad_norm: 0.722857090144393, iteration: 442382
loss: 0.9903302788734436,grad_norm: 0.8452018433256554, iteration: 442383
loss: 1.0399842262268066,grad_norm: 0.9763272715156314, iteration: 442384
loss: 0.9775692224502563,grad_norm: 0.9999999360745272, iteration: 442385
loss: 0.9721055626869202,grad_norm: 0.896323200238292, iteration: 442386
loss: 1.0965828895568848,grad_norm: 0.7779621464190668, iteration: 442387
loss: 1.0178642272949219,grad_norm: 0.6773442959664622, iteration: 442388
loss: 0.9771391749382019,grad_norm: 0.7417145136550471, iteration: 442389
loss: 0.98842853307724,grad_norm: 0.8455046655568675, iteration: 442390
loss: 0.9765135645866394,grad_norm: 0.8311664957070298, iteration: 442391
loss: 1.0161025524139404,grad_norm: 0.9070421133858915, iteration: 442392
loss: 0.9498146176338196,grad_norm: 0.7045302459169563, iteration: 442393
loss: 1.0002256631851196,grad_norm: 0.7034773975624952, iteration: 442394
loss: 0.9960903525352478,grad_norm: 0.838520720423116, iteration: 442395
loss: 1.010986328125,grad_norm: 0.7578788814870373, iteration: 442396
loss: 1.0496149063110352,grad_norm: 0.8322386158837243, iteration: 442397
loss: 1.0473273992538452,grad_norm: 0.7517979719600969, iteration: 442398
loss: 1.0322343111038208,grad_norm: 0.9463167741404833, iteration: 442399
loss: 1.0312823057174683,grad_norm: 0.8290815950402493, iteration: 442400
loss: 0.9660128951072693,grad_norm: 0.6963014732713004, iteration: 442401
loss: 0.9823060035705566,grad_norm: 0.7665527205236804, iteration: 442402
loss: 1.0903998613357544,grad_norm: 0.9974383530835104, iteration: 442403
loss: 1.0137385129928589,grad_norm: 0.8027069217304192, iteration: 442404
loss: 1.0313692092895508,grad_norm: 0.6999935250828225, iteration: 442405
loss: 1.0081793069839478,grad_norm: 0.9406908994395642, iteration: 442406
loss: 0.9989174604415894,grad_norm: 0.8794055601095574, iteration: 442407
loss: 1.0430448055267334,grad_norm: 0.7363885211757528, iteration: 442408
loss: 0.9853695034980774,grad_norm: 0.8088023290022137, iteration: 442409
loss: 0.9842403531074524,grad_norm: 0.8445027802873127, iteration: 442410
loss: 1.0625691413879395,grad_norm: 0.8314881173898748, iteration: 442411
loss: 1.074045181274414,grad_norm: 0.999999284495527, iteration: 442412
loss: 1.008468508720398,grad_norm: 0.7787329773285239, iteration: 442413
loss: 1.0425846576690674,grad_norm: 0.9511208163092549, iteration: 442414
loss: 1.0128885507583618,grad_norm: 0.7457440988093798, iteration: 442415
loss: 0.9873526692390442,grad_norm: 0.8261614903766621, iteration: 442416
loss: 0.9984228610992432,grad_norm: 0.7545138202652344, iteration: 442417
loss: 1.0024542808532715,grad_norm: 0.8041740291784701, iteration: 442418
loss: 0.9581171870231628,grad_norm: 0.7437952496545344, iteration: 442419
loss: 1.0126036405563354,grad_norm: 0.9441787847875187, iteration: 442420
loss: 0.9908496737480164,grad_norm: 0.6784069884120962, iteration: 442421
loss: 1.0262527465820312,grad_norm: 0.7318527937103102, iteration: 442422
loss: 0.9965996742248535,grad_norm: 0.8236795761592507, iteration: 442423
loss: 0.992181658744812,grad_norm: 0.7842518909447506, iteration: 442424
loss: 1.049843430519104,grad_norm: 0.9999999510666997, iteration: 442425
loss: 1.0188754796981812,grad_norm: 0.765878197357181, iteration: 442426
loss: 1.020137906074524,grad_norm: 0.8420802339300644, iteration: 442427
loss: 0.98770672082901,grad_norm: 0.7726614158122245, iteration: 442428
loss: 0.9771595597267151,grad_norm: 0.9050968870099221, iteration: 442429
loss: 0.9648050665855408,grad_norm: 0.6309615480154799, iteration: 442430
loss: 0.9984752535820007,grad_norm: 0.7925212311330594, iteration: 442431
loss: 0.9949246644973755,grad_norm: 0.8748028744240536, iteration: 442432
loss: 1.0005875825881958,grad_norm: 0.8964645897853766, iteration: 442433
loss: 1.034759521484375,grad_norm: 0.8173146032160896, iteration: 442434
loss: 1.0345170497894287,grad_norm: 0.9154155696705057, iteration: 442435
loss: 1.008811116218567,grad_norm: 0.804654473800068, iteration: 442436
loss: 0.9867309927940369,grad_norm: 0.6892857993731637, iteration: 442437
loss: 0.9908387064933777,grad_norm: 0.7233410194764525, iteration: 442438
loss: 0.9995635151863098,grad_norm: 0.786165396192977, iteration: 442439
loss: 1.0234370231628418,grad_norm: 0.9692035444142336, iteration: 442440
loss: 0.9747757911682129,grad_norm: 0.727232546640492, iteration: 442441
loss: 0.9977788925170898,grad_norm: 0.8037056030442936, iteration: 442442
loss: 1.0211632251739502,grad_norm: 0.9145952751729388, iteration: 442443
loss: 1.0241531133651733,grad_norm: 0.7454299426216814, iteration: 442444
loss: 1.0147790908813477,grad_norm: 0.8400645762581822, iteration: 442445
loss: 0.9904767870903015,grad_norm: 0.864834655402405, iteration: 442446
loss: 0.9821305871009827,grad_norm: 0.7303122850053155, iteration: 442447
loss: 1.0282347202301025,grad_norm: 0.8182076611304911, iteration: 442448
loss: 1.0185378789901733,grad_norm: 0.766054169092514, iteration: 442449
loss: 0.9525366425514221,grad_norm: 0.7789522488441335, iteration: 442450
loss: 1.0029258728027344,grad_norm: 0.7150668928647115, iteration: 442451
loss: 0.9928207993507385,grad_norm: 0.7158493603373368, iteration: 442452
loss: 1.0223517417907715,grad_norm: 0.7595493618317739, iteration: 442453
loss: 1.050971508026123,grad_norm: 0.9999998941123692, iteration: 442454
loss: 1.1196374893188477,grad_norm: 0.6959587845674903, iteration: 442455
loss: 1.011852502822876,grad_norm: 0.750176588714795, iteration: 442456
loss: 0.9847722053527832,grad_norm: 0.6811843688859885, iteration: 442457
loss: 1.0436575412750244,grad_norm: 0.7231172863250419, iteration: 442458
loss: 0.9974069595336914,grad_norm: 0.7206180611910551, iteration: 442459
loss: 1.001605749130249,grad_norm: 0.9020432862251243, iteration: 442460
loss: 1.0184611082077026,grad_norm: 0.6688462328806141, iteration: 442461
loss: 1.0690385103225708,grad_norm: 0.9506361914295501, iteration: 442462
loss: 1.0668725967407227,grad_norm: 0.759235567140638, iteration: 442463
loss: 0.9904870986938477,grad_norm: 0.7398010295045955, iteration: 442464
loss: 1.009484052658081,grad_norm: 0.8195106255654718, iteration: 442465
loss: 1.0178093910217285,grad_norm: 0.8949598532257388, iteration: 442466
loss: 1.0161644220352173,grad_norm: 0.8104566500143977, iteration: 442467
loss: 1.0916852951049805,grad_norm: 0.8348594077440485, iteration: 442468
loss: 1.0516905784606934,grad_norm: 0.9433491319107351, iteration: 442469
loss: 1.0118699073791504,grad_norm: 0.740333870868943, iteration: 442470
loss: 0.9662610292434692,grad_norm: 0.8724053392512506, iteration: 442471
loss: 1.0051015615463257,grad_norm: 0.6798409718793785, iteration: 442472
loss: 1.0269514322280884,grad_norm: 0.9999999876785035, iteration: 442473
loss: 1.016821265220642,grad_norm: 0.7506464496934024, iteration: 442474
loss: 1.0210905075073242,grad_norm: 0.957641096422159, iteration: 442475
loss: 1.0187523365020752,grad_norm: 0.6875187663649489, iteration: 442476
loss: 0.9734342694282532,grad_norm: 0.9307858548533531, iteration: 442477
loss: 1.0824769735336304,grad_norm: 0.8535718597701155, iteration: 442478
loss: 1.079310655593872,grad_norm: 0.9999991341632933, iteration: 442479
loss: 1.038631796836853,grad_norm: 0.6065185246969157, iteration: 442480
loss: 0.9752097129821777,grad_norm: 0.6973764098596665, iteration: 442481
loss: 1.068763256072998,grad_norm: 0.8042100616987001, iteration: 442482
loss: 0.9875447154045105,grad_norm: 0.8760075846607371, iteration: 442483
loss: 0.9946536421775818,grad_norm: 0.7542765933468721, iteration: 442484
loss: 1.062867522239685,grad_norm: 0.9910866267602871, iteration: 442485
loss: 1.0582088232040405,grad_norm: 0.7359356886396932, iteration: 442486
loss: 1.0036571025848389,grad_norm: 0.7363147050788171, iteration: 442487
loss: 0.9746319055557251,grad_norm: 0.7341831424707992, iteration: 442488
loss: 1.0039507150650024,grad_norm: 0.7834568778962955, iteration: 442489
loss: 1.0398651361465454,grad_norm: 0.8015542969839682, iteration: 442490
loss: 1.0262811183929443,grad_norm: 0.7764367605370845, iteration: 442491
loss: 1.0647473335266113,grad_norm: 0.7416855096344873, iteration: 442492
loss: 1.004833698272705,grad_norm: 0.8855202195818211, iteration: 442493
loss: 1.0005470514297485,grad_norm: 0.9999990537618624, iteration: 442494
loss: 1.0060614347457886,grad_norm: 0.7814732706705722, iteration: 442495
loss: 1.0000674724578857,grad_norm: 0.8928326859134248, iteration: 442496
loss: 0.9800509810447693,grad_norm: 0.80889785698368, iteration: 442497
loss: 1.0010223388671875,grad_norm: 0.8587748036607795, iteration: 442498
loss: 1.0081013441085815,grad_norm: 0.9999994488458173, iteration: 442499
loss: 1.0580816268920898,grad_norm: 0.9999996855856549, iteration: 442500
loss: 0.9882317185401917,grad_norm: 0.8596878044833691, iteration: 442501
loss: 1.0252580642700195,grad_norm: 0.8577616206972771, iteration: 442502
loss: 0.9817984104156494,grad_norm: 0.7933813349985754, iteration: 442503
loss: 1.0010511875152588,grad_norm: 0.760744048812737, iteration: 442504
loss: 0.9898207187652588,grad_norm: 0.7065749515515563, iteration: 442505
loss: 1.1173592805862427,grad_norm: 1.0000000460099001, iteration: 442506
loss: 0.9649229049682617,grad_norm: 0.7028713801892487, iteration: 442507
loss: 0.9953608512878418,grad_norm: 0.7895986742936207, iteration: 442508
loss: 0.967441976070404,grad_norm: 0.7474386640063905, iteration: 442509
loss: 1.0227959156036377,grad_norm: 0.9125122507553362, iteration: 442510
loss: 1.0261743068695068,grad_norm: 0.999999859942686, iteration: 442511
loss: 0.9888341426849365,grad_norm: 0.7713917328902934, iteration: 442512
loss: 1.047996163368225,grad_norm: 0.9999996329505364, iteration: 442513
loss: 1.0078771114349365,grad_norm: 0.6021795767746232, iteration: 442514
loss: 1.05838942527771,grad_norm: 0.9999999085822471, iteration: 442515
loss: 1.019995093345642,grad_norm: 0.8363918709860044, iteration: 442516
loss: 1.0339481830596924,grad_norm: 0.7990483667436467, iteration: 442517
loss: 1.019797444343567,grad_norm: 0.8695203125074107, iteration: 442518
loss: 1.006730079650879,grad_norm: 0.717322339808817, iteration: 442519
loss: 0.9800520539283752,grad_norm: 0.8022056708701558, iteration: 442520
loss: 1.0213631391525269,grad_norm: 0.8178108278230592, iteration: 442521
loss: 1.0572102069854736,grad_norm: 0.9060273436824077, iteration: 442522
loss: 1.0376571416854858,grad_norm: 0.7542514297709744, iteration: 442523
loss: 1.0598593950271606,grad_norm: 0.9999999232968361, iteration: 442524
loss: 1.0254545211791992,grad_norm: 0.7364420866492939, iteration: 442525
loss: 1.0324300527572632,grad_norm: 0.9999991351649578, iteration: 442526
loss: 0.9892358779907227,grad_norm: 0.7848527219272252, iteration: 442527
loss: 1.0456258058547974,grad_norm: 0.9999989965638041, iteration: 442528
loss: 1.0130832195281982,grad_norm: 0.7571154257335585, iteration: 442529
loss: 1.0077306032180786,grad_norm: 0.6909996402725321, iteration: 442530
loss: 1.01162588596344,grad_norm: 0.7496098671974196, iteration: 442531
loss: 1.0316578149795532,grad_norm: 0.7374603450850337, iteration: 442532
loss: 0.9862582087516785,grad_norm: 0.8185960940617375, iteration: 442533
loss: 0.9868678450584412,grad_norm: 0.6614123157083974, iteration: 442534
loss: 1.0124123096466064,grad_norm: 0.7182826970178368, iteration: 442535
loss: 1.057734489440918,grad_norm: 0.6941322021678928, iteration: 442536
loss: 1.0017303228378296,grad_norm: 0.9243254120364162, iteration: 442537
loss: 1.0430032014846802,grad_norm: 0.9999989487340118, iteration: 442538
loss: 0.9983827471733093,grad_norm: 0.824072001616059, iteration: 442539
loss: 1.1598894596099854,grad_norm: 0.989565621856268, iteration: 442540
loss: 1.0183104276657104,grad_norm: 0.9999998667585658, iteration: 442541
loss: 0.998929500579834,grad_norm: 0.709097423426781, iteration: 442542
loss: 1.0013751983642578,grad_norm: 0.9999996260179096, iteration: 442543
loss: 1.064835548400879,grad_norm: 0.9999995086371258, iteration: 442544
loss: 1.0163942575454712,grad_norm: 0.7997949954477134, iteration: 442545
loss: 1.0068094730377197,grad_norm: 0.8661367474770737, iteration: 442546
loss: 0.9687670469284058,grad_norm: 0.761536923253572, iteration: 442547
loss: 1.0361956357955933,grad_norm: 0.999999086316683, iteration: 442548
loss: 0.9635158181190491,grad_norm: 0.7034530959873989, iteration: 442549
loss: 1.0577389001846313,grad_norm: 0.6973053509145057, iteration: 442550
loss: 1.0069758892059326,grad_norm: 0.9099568312046735, iteration: 442551
loss: 1.042784333229065,grad_norm: 0.7035461330647591, iteration: 442552
loss: 0.9958474040031433,grad_norm: 0.8882905456407995, iteration: 442553
loss: 1.026621699333191,grad_norm: 0.9999991601294748, iteration: 442554
loss: 0.9902464151382446,grad_norm: 0.8648126459168558, iteration: 442555
loss: 1.0219388008117676,grad_norm: 0.9999990431279178, iteration: 442556
loss: 1.0405118465423584,grad_norm: 0.9151301571926027, iteration: 442557
loss: 1.004136323928833,grad_norm: 0.783021992683315, iteration: 442558
loss: 1.0392937660217285,grad_norm: 0.9999998421575973, iteration: 442559
loss: 0.9989496469497681,grad_norm: 0.8911216961060846, iteration: 442560
loss: 1.0405735969543457,grad_norm: 0.8187928617395408, iteration: 442561
loss: 1.030253291130066,grad_norm: 1.0000000238522204, iteration: 442562
loss: 0.957097053527832,grad_norm: 0.7896656655410034, iteration: 442563
loss: 0.9940168261528015,grad_norm: 0.8419779266326348, iteration: 442564
loss: 0.9835534691810608,grad_norm: 0.7870638169835742, iteration: 442565
loss: 1.0587671995162964,grad_norm: 0.9999993239167655, iteration: 442566
loss: 1.003156304359436,grad_norm: 0.6802465715017881, iteration: 442567
loss: 1.0113623142242432,grad_norm: 0.9467630649106306, iteration: 442568
loss: 0.9935746788978577,grad_norm: 0.999999072174739, iteration: 442569
loss: 1.1014583110809326,grad_norm: 0.9999999246735698, iteration: 442570
loss: 0.9576898813247681,grad_norm: 0.8746969730927001, iteration: 442571
loss: 0.9688344597816467,grad_norm: 0.7343136949008666, iteration: 442572
loss: 1.021509051322937,grad_norm: 0.999999057139674, iteration: 442573
loss: 1.01365327835083,grad_norm: 0.6966494275043127, iteration: 442574
loss: 1.0283199548721313,grad_norm: 0.9999994204797161, iteration: 442575
loss: 0.9852719306945801,grad_norm: 0.8263289846788701, iteration: 442576
loss: 1.1438305377960205,grad_norm: 0.9999996533958587, iteration: 442577
loss: 1.0345276594161987,grad_norm: 0.999999286513461, iteration: 442578
loss: 0.9985008239746094,grad_norm: 0.8530383678973529, iteration: 442579
loss: 1.0215134620666504,grad_norm: 0.7898339497947627, iteration: 442580
loss: 1.0395708084106445,grad_norm: 0.899096177000367, iteration: 442581
loss: 1.032248616218567,grad_norm: 0.8680778650833316, iteration: 442582
loss: 1.0311834812164307,grad_norm: 0.9999994393699553, iteration: 442583
loss: 0.9841744303703308,grad_norm: 0.7819972514769729, iteration: 442584
loss: 1.0627926588058472,grad_norm: 0.9999992683311967, iteration: 442585
loss: 0.9767248630523682,grad_norm: 0.8542843817937238, iteration: 442586
loss: 0.9825982451438904,grad_norm: 0.8335799637768739, iteration: 442587
loss: 1.0386791229248047,grad_norm: 0.999999600494802, iteration: 442588
loss: 1.0196106433868408,grad_norm: 0.859776050768146, iteration: 442589
loss: 1.0309693813323975,grad_norm: 0.7734414983190376, iteration: 442590
loss: 0.985514223575592,grad_norm: 0.7090725294286719, iteration: 442591
loss: 1.0069867372512817,grad_norm: 0.6678042785644397, iteration: 442592
loss: 1.027741551399231,grad_norm: 0.8718817374478972, iteration: 442593
loss: 1.0615601539611816,grad_norm: 0.999999954634562, iteration: 442594
loss: 1.0161776542663574,grad_norm: 0.9999990898237929, iteration: 442595
loss: 1.039062738418579,grad_norm: 0.9999997932045931, iteration: 442596
loss: 1.0192497968673706,grad_norm: 0.7605751918414486, iteration: 442597
loss: 0.9848809242248535,grad_norm: 0.7424566539082937, iteration: 442598
loss: 1.0740762948989868,grad_norm: 0.999999793593077, iteration: 442599
loss: 1.028786301612854,grad_norm: 0.9999994241662962, iteration: 442600
loss: 0.9987451434135437,grad_norm: 0.8143828124194344, iteration: 442601
loss: 0.9820721745491028,grad_norm: 0.7401420185071627, iteration: 442602
loss: 0.98432457447052,grad_norm: 0.7663859030017767, iteration: 442603
loss: 1.002619743347168,grad_norm: 0.7809255439269122, iteration: 442604
loss: 1.0600537061691284,grad_norm: 0.8818155960047146, iteration: 442605
loss: 0.9852829575538635,grad_norm: 0.9999991947347101, iteration: 442606
loss: 1.0074132680892944,grad_norm: 0.7376449024466807, iteration: 442607
loss: 1.1229259967803955,grad_norm: 1.000000023740794, iteration: 442608
loss: 1.0129039287567139,grad_norm: 0.9791256556719282, iteration: 442609
loss: 1.0388473272323608,grad_norm: 0.8061720218927658, iteration: 442610
loss: 1.0054492950439453,grad_norm: 0.7198766754428709, iteration: 442611
loss: 1.0086783170700073,grad_norm: 0.8160996869649816, iteration: 442612
loss: 0.9849204421043396,grad_norm: 0.6712709283972496, iteration: 442613
loss: 1.0058434009552002,grad_norm: 0.9196428047902042, iteration: 442614
loss: 0.9981807470321655,grad_norm: 0.6582083839165134, iteration: 442615
loss: 1.0195282697677612,grad_norm: 0.9999998785444641, iteration: 442616
loss: 0.9826042652130127,grad_norm: 0.7024976949192946, iteration: 442617
loss: 1.0127832889556885,grad_norm: 0.7094225650032016, iteration: 442618
loss: 0.9965541362762451,grad_norm: 0.6850616598658754, iteration: 442619
loss: 1.0088696479797363,grad_norm: 0.8004123513674166, iteration: 442620
loss: 1.0026061534881592,grad_norm: 0.7699515550712785, iteration: 442621
loss: 0.9287224411964417,grad_norm: 0.7693938826666136, iteration: 442622
loss: 1.0483087301254272,grad_norm: 0.6645609324431027, iteration: 442623
loss: 1.005468726158142,grad_norm: 0.8320387000379399, iteration: 442624
loss: 0.9935916662216187,grad_norm: 0.9999990421428508, iteration: 442625
loss: 1.0545774698257446,grad_norm: 0.9999992765978534, iteration: 442626
loss: 1.0190186500549316,grad_norm: 0.7697962330424037, iteration: 442627
loss: 0.9819237589836121,grad_norm: 0.7576338833022633, iteration: 442628
loss: 0.9663233160972595,grad_norm: 0.9022854732262591, iteration: 442629
loss: 1.0059115886688232,grad_norm: 0.9973213026568877, iteration: 442630
loss: 0.989913284778595,grad_norm: 0.8099294744217866, iteration: 442631
loss: 1.0252097845077515,grad_norm: 0.7471249588378579, iteration: 442632
loss: 1.009709119796753,grad_norm: 0.7743480645637547, iteration: 442633
loss: 1.0099760293960571,grad_norm: 0.7894388154002182, iteration: 442634
loss: 1.041339635848999,grad_norm: 0.836039079848874, iteration: 442635
loss: 1.0279169082641602,grad_norm: 0.7540955038871764, iteration: 442636
loss: 1.0164129734039307,grad_norm: 0.7163580444558026, iteration: 442637
loss: 1.0351301431655884,grad_norm: 0.6054183984168033, iteration: 442638
loss: 0.9826952815055847,grad_norm: 0.7978323802259343, iteration: 442639
loss: 1.0336772203445435,grad_norm: 0.7612292443558589, iteration: 442640
loss: 1.0149924755096436,grad_norm: 0.7178864448358587, iteration: 442641
loss: 1.0231282711029053,grad_norm: 0.6838988279505411, iteration: 442642
loss: 1.0009487867355347,grad_norm: 0.786258089370574, iteration: 442643
loss: 1.008588433265686,grad_norm: 0.7134811129805765, iteration: 442644
loss: 0.9825300574302673,grad_norm: 0.807167973046678, iteration: 442645
loss: 1.0073182582855225,grad_norm: 0.7795027127065187, iteration: 442646
loss: 1.0018812417984009,grad_norm: 0.7486330632916642, iteration: 442647
loss: 0.9699739813804626,grad_norm: 0.9017269539704817, iteration: 442648
loss: 1.1520025730133057,grad_norm: 0.9999999612469647, iteration: 442649
loss: 1.0220682621002197,grad_norm: 0.9022296060300563, iteration: 442650
loss: 0.9914477467536926,grad_norm: 0.5681381674670901, iteration: 442651
loss: 0.9866230487823486,grad_norm: 0.7503007583570743, iteration: 442652
loss: 0.9823650121688843,grad_norm: 0.6418216318325992, iteration: 442653
loss: 1.0079096555709839,grad_norm: 0.7350468113389943, iteration: 442654
loss: 1.0117299556732178,grad_norm: 0.7682218103469498, iteration: 442655
loss: 1.0260900259017944,grad_norm: 0.6906312517074359, iteration: 442656
loss: 0.9768616557121277,grad_norm: 0.8551138615273552, iteration: 442657
loss: 0.9945979118347168,grad_norm: 0.8699204386606291, iteration: 442658
loss: 1.0632109642028809,grad_norm: 0.8149876322957196, iteration: 442659
loss: 0.9714690446853638,grad_norm: 0.6767263433285607, iteration: 442660
loss: 1.000598430633545,grad_norm: 0.682547462555996, iteration: 442661
loss: 0.9802542328834534,grad_norm: 0.7590788891738964, iteration: 442662
loss: 0.9947431087493896,grad_norm: 0.8322223341888758, iteration: 442663
loss: 1.0325514078140259,grad_norm: 0.9999990271676145, iteration: 442664
loss: 1.0080883502960205,grad_norm: 0.9999992680156484, iteration: 442665
loss: 0.9636656641960144,grad_norm: 0.7512349016575699, iteration: 442666
loss: 0.9911386370658875,grad_norm: 0.8658420837480647, iteration: 442667
loss: 1.000952124595642,grad_norm: 0.8179343932504604, iteration: 442668
loss: 1.018385887145996,grad_norm: 0.6853236348835849, iteration: 442669
loss: 1.04866361618042,grad_norm: 0.8236834769285247, iteration: 442670
loss: 0.9796656370162964,grad_norm: 0.6743387444422713, iteration: 442671
loss: 1.0312623977661133,grad_norm: 1.0000000658558603, iteration: 442672
loss: 0.9937116503715515,grad_norm: 0.6866312039773497, iteration: 442673
loss: 1.0344114303588867,grad_norm: 0.9315892272177388, iteration: 442674
loss: 1.0260493755340576,grad_norm: 0.9342009971512674, iteration: 442675
loss: 0.9840390086174011,grad_norm: 0.9838456096316857, iteration: 442676
loss: 0.9659412503242493,grad_norm: 0.6546968592221019, iteration: 442677
loss: 1.0273538827896118,grad_norm: 0.6791333847745433, iteration: 442678
loss: 1.032118558883667,grad_norm: 0.9261468494737547, iteration: 442679
loss: 0.9757846593856812,grad_norm: 0.751421959599248, iteration: 442680
loss: 0.9821289777755737,grad_norm: 0.7199557502788891, iteration: 442681
loss: 1.0197654962539673,grad_norm: 0.9999990234650283, iteration: 442682
loss: 0.9592371582984924,grad_norm: 0.6996322978940515, iteration: 442683
loss: 1.0056012868881226,grad_norm: 0.7761815692009116, iteration: 442684
loss: 1.0110056400299072,grad_norm: 0.7359197064583054, iteration: 442685
loss: 0.9976272583007812,grad_norm: 0.7329816591597145, iteration: 442686
loss: 0.9893965125083923,grad_norm: 0.8054798374226859, iteration: 442687
loss: 0.9666256904602051,grad_norm: 0.8341532091212668, iteration: 442688
loss: 0.9828246831893921,grad_norm: 0.9039839241449161, iteration: 442689
loss: 0.9981305003166199,grad_norm: 0.8504530583334458, iteration: 442690
loss: 1.0102832317352295,grad_norm: 0.7874321407315662, iteration: 442691
loss: 1.0051628351211548,grad_norm: 0.8295555321877436, iteration: 442692
loss: 1.0244780778884888,grad_norm: 0.6489252474140501, iteration: 442693
loss: 0.9911544919013977,grad_norm: 0.6892735930717063, iteration: 442694
loss: 1.010125994682312,grad_norm: 0.7309439429568809, iteration: 442695
loss: 0.9934855103492737,grad_norm: 0.7710065781747906, iteration: 442696
loss: 1.0209530591964722,grad_norm: 0.9999990011975306, iteration: 442697
loss: 1.0024327039718628,grad_norm: 0.7695650437194786, iteration: 442698
loss: 1.0193018913269043,grad_norm: 0.8235153573865336, iteration: 442699
loss: 0.9771417379379272,grad_norm: 0.9999992544609818, iteration: 442700
loss: 1.0151907205581665,grad_norm: 0.9310866362098866, iteration: 442701
loss: 1.030685544013977,grad_norm: 0.8928083576054724, iteration: 442702
loss: 1.0308616161346436,grad_norm: 0.7841667401079706, iteration: 442703
loss: 1.0088011026382446,grad_norm: 0.7676903740288948, iteration: 442704
loss: 0.9761495590209961,grad_norm: 0.7962663777186486, iteration: 442705
loss: 1.0694150924682617,grad_norm: 0.9999990431931051, iteration: 442706
loss: 1.0076904296875,grad_norm: 0.7809371863109282, iteration: 442707
loss: 1.0071258544921875,grad_norm: 0.6770094171582909, iteration: 442708
loss: 1.0246365070343018,grad_norm: 0.9066250397722576, iteration: 442709
loss: 0.9981393814086914,grad_norm: 0.9145800192058163, iteration: 442710
loss: 1.0350769758224487,grad_norm: 0.9999993749134631, iteration: 442711
loss: 0.9918839931488037,grad_norm: 0.6777066957920987, iteration: 442712
loss: 1.0093822479248047,grad_norm: 0.775303163219221, iteration: 442713
loss: 1.0625512599945068,grad_norm: 0.9999993381320778, iteration: 442714
loss: 0.9969429969787598,grad_norm: 0.8104063318173075, iteration: 442715
loss: 0.975134551525116,grad_norm: 0.7176243304911857, iteration: 442716
loss: 1.0016015768051147,grad_norm: 0.8184066977102123, iteration: 442717
loss: 1.0050863027572632,grad_norm: 0.8891302865406329, iteration: 442718
loss: 1.0496639013290405,grad_norm: 0.7612551425546165, iteration: 442719
loss: 0.9751281142234802,grad_norm: 0.7847466289925186, iteration: 442720
loss: 1.0018389225006104,grad_norm: 0.6963991152158888, iteration: 442721
loss: 1.0120837688446045,grad_norm: 0.7257207351525532, iteration: 442722
loss: 1.066640019416809,grad_norm: 0.9125109099627007, iteration: 442723
loss: 0.9790758490562439,grad_norm: 0.9128111463806232, iteration: 442724
loss: 1.1478228569030762,grad_norm: 0.8733390494538052, iteration: 442725
loss: 0.9481990337371826,grad_norm: 0.7733099377712621, iteration: 442726
loss: 1.0366194248199463,grad_norm: 0.8894539398723729, iteration: 442727
loss: 1.1009629964828491,grad_norm: 0.836453687272944, iteration: 442728
loss: 1.0024021863937378,grad_norm: 0.7312303094665882, iteration: 442729
loss: 1.067251443862915,grad_norm: 0.6948275756687687, iteration: 442730
loss: 0.9620466232299805,grad_norm: 0.8981675389852882, iteration: 442731
loss: 0.9914484620094299,grad_norm: 0.8744217531549113, iteration: 442732
loss: 0.9899441003799438,grad_norm: 0.7375738068209278, iteration: 442733
loss: 0.9952704310417175,grad_norm: 0.8881155710861273, iteration: 442734
loss: 1.0163248777389526,grad_norm: 0.7948027309880716, iteration: 442735
loss: 0.976751446723938,grad_norm: 0.7615673672548091, iteration: 442736
loss: 1.07048761844635,grad_norm: 0.9839717798580091, iteration: 442737
loss: 1.0165568590164185,grad_norm: 0.8354747816214634, iteration: 442738
loss: 0.9988403916358948,grad_norm: 0.999999687796994, iteration: 442739
loss: 1.0076748132705688,grad_norm: 0.7128338141614258, iteration: 442740
loss: 1.0057772397994995,grad_norm: 0.7056282089395574, iteration: 442741
loss: 1.026405930519104,grad_norm: 0.7570786198931867, iteration: 442742
loss: 1.002948522567749,grad_norm: 0.9999994475627609, iteration: 442743
loss: 1.0955140590667725,grad_norm: 0.9999992501123162, iteration: 442744
loss: 1.253502607345581,grad_norm: 0.9999998828249629, iteration: 442745
loss: 0.9643968343734741,grad_norm: 0.7312227972646194, iteration: 442746
loss: 0.951987624168396,grad_norm: 0.8547657289991699, iteration: 442747
loss: 1.0868362188339233,grad_norm: 0.9999993901542445, iteration: 442748
loss: 1.033478021621704,grad_norm: 0.6975287188476105, iteration: 442749
loss: 0.9804758429527283,grad_norm: 0.7564750887540244, iteration: 442750
loss: 1.041892170906067,grad_norm: 0.9121980162610288, iteration: 442751
loss: 1.086624026298523,grad_norm: 0.7265415139833376, iteration: 442752
loss: 0.9881339073181152,grad_norm: 0.9379706397098156, iteration: 442753
loss: 1.028805136680603,grad_norm: 0.9999998596242365, iteration: 442754
loss: 1.0172480344772339,grad_norm: 0.7252140001181464, iteration: 442755
loss: 1.0290828943252563,grad_norm: 0.7009502234475623, iteration: 442756
loss: 0.9666357040405273,grad_norm: 0.664530577768927, iteration: 442757
loss: 0.994326114654541,grad_norm: 0.8765685351275165, iteration: 442758
loss: 1.0258079767227173,grad_norm: 0.8883170850732351, iteration: 442759
loss: 0.9839493632316589,grad_norm: 0.9265093467565375, iteration: 442760
loss: 0.9698144197463989,grad_norm: 0.7319290731011746, iteration: 442761
loss: 0.9986799359321594,grad_norm: 0.8359068103857606, iteration: 442762
loss: 1.0061100721359253,grad_norm: 0.783550770922746, iteration: 442763
loss: 1.0763713121414185,grad_norm: 0.9999991087590873, iteration: 442764
loss: 0.9739844799041748,grad_norm: 0.8086708659842564, iteration: 442765
loss: 1.2462016344070435,grad_norm: 0.9999993761064898, iteration: 442766
loss: 0.9713289141654968,grad_norm: 0.9158114665270252, iteration: 442767
loss: 1.0130776166915894,grad_norm: 0.6439431780760746, iteration: 442768
loss: 1.067460060119629,grad_norm: 0.7360869426878444, iteration: 442769
loss: 1.0180355310440063,grad_norm: 0.8533671440043779, iteration: 442770
loss: 1.0375101566314697,grad_norm: 0.9999992709459754, iteration: 442771
loss: 0.9915153384208679,grad_norm: 0.9999995853199276, iteration: 442772
loss: 0.9874799251556396,grad_norm: 0.728526928733641, iteration: 442773
loss: 0.9672926068305969,grad_norm: 0.8287661044384619, iteration: 442774
loss: 0.9866227507591248,grad_norm: 0.7531252867221424, iteration: 442775
loss: 1.012225866317749,grad_norm: 0.787926190686617, iteration: 442776
loss: 1.003537893295288,grad_norm: 0.7840196858689018, iteration: 442777
loss: 1.0842701196670532,grad_norm: 0.8989537762568794, iteration: 442778
loss: 0.9859616756439209,grad_norm: 0.7438407581544013, iteration: 442779
loss: 1.0157431364059448,grad_norm: 0.7850729921796561, iteration: 442780
loss: 0.9992813467979431,grad_norm: 0.901105954170905, iteration: 442781
loss: 1.023314356803894,grad_norm: 0.8273315988892157, iteration: 442782
loss: 1.0296436548233032,grad_norm: 0.7294873731748008, iteration: 442783
loss: 0.9987857937812805,grad_norm: 0.9999999035356313, iteration: 442784
loss: 0.9411740303039551,grad_norm: 0.7334176919084764, iteration: 442785
loss: 1.1653053760528564,grad_norm: 0.999999221339168, iteration: 442786
loss: 0.9860363006591797,grad_norm: 0.725697111237181, iteration: 442787
loss: 1.010266900062561,grad_norm: 0.8191683705955104, iteration: 442788
loss: 1.017012596130371,grad_norm: 0.7521170229262661, iteration: 442789
loss: 0.9943037033081055,grad_norm: 0.9999995222135862, iteration: 442790
loss: 1.078755259513855,grad_norm: 0.9020986587216442, iteration: 442791
loss: 0.9951817989349365,grad_norm: 0.6781635970859541, iteration: 442792
loss: 1.0142852067947388,grad_norm: 0.7961950080193991, iteration: 442793
loss: 0.968334436416626,grad_norm: 0.7889969924439143, iteration: 442794
loss: 1.0038052797317505,grad_norm: 0.7945240559644327, iteration: 442795
loss: 0.9984252452850342,grad_norm: 0.6907319709963099, iteration: 442796
loss: 1.0266411304473877,grad_norm: 0.9193780054503932, iteration: 442797
loss: 0.971721887588501,grad_norm: 0.84663662194307, iteration: 442798
loss: 0.9894222021102905,grad_norm: 0.8768641436179421, iteration: 442799
loss: 0.9811592102050781,grad_norm: 0.674049566808541, iteration: 442800
loss: 0.993516206741333,grad_norm: 0.8069818919188214, iteration: 442801
loss: 0.9762723445892334,grad_norm: 0.8368917757203567, iteration: 442802
loss: 1.0400195121765137,grad_norm: 0.9999998866136415, iteration: 442803
loss: 1.0232148170471191,grad_norm: 0.798574324294053, iteration: 442804
loss: 0.9989853501319885,grad_norm: 0.6963880289513162, iteration: 442805
loss: 0.9782863855361938,grad_norm: 0.6463223116086324, iteration: 442806
loss: 0.9902941584587097,grad_norm: 0.9334175628697687, iteration: 442807
loss: 1.0881251096725464,grad_norm: 0.9999999240722054, iteration: 442808
loss: 0.9904764294624329,grad_norm: 0.9695852885982739, iteration: 442809
loss: 1.0378117561340332,grad_norm: 0.5837863484911594, iteration: 442810
loss: 1.063032865524292,grad_norm: 0.8932145559817704, iteration: 442811
loss: 1.0425320863723755,grad_norm: 0.9312297811037461, iteration: 442812
loss: 1.0898076295852661,grad_norm: 0.8093971730370518, iteration: 442813
loss: 0.9680269956588745,grad_norm: 0.6623468790540621, iteration: 442814
loss: 1.010711669921875,grad_norm: 0.8860468086790033, iteration: 442815
loss: 1.1206235885620117,grad_norm: 0.9999996631945695, iteration: 442816
loss: 1.0030711889266968,grad_norm: 0.845972822996292, iteration: 442817
loss: 0.9824861288070679,grad_norm: 0.8850948184686896, iteration: 442818
loss: 1.2844547033309937,grad_norm: 0.9999996038314387, iteration: 442819
loss: 1.003368616104126,grad_norm: 0.908322534931399, iteration: 442820
loss: 1.116666316986084,grad_norm: 0.9999994322788014, iteration: 442821
loss: 1.0175883769989014,grad_norm: 0.8094191834054686, iteration: 442822
loss: 0.9848024845123291,grad_norm: 0.9160578085974774, iteration: 442823
loss: 1.0130608081817627,grad_norm: 0.7992541203625961, iteration: 442824
loss: 0.9769590497016907,grad_norm: 0.7134690165338049, iteration: 442825
loss: 0.9859625101089478,grad_norm: 0.6795363291348403, iteration: 442826
loss: 1.0148693323135376,grad_norm: 0.7065943338144852, iteration: 442827
loss: 1.0045125484466553,grad_norm: 0.9515195670236166, iteration: 442828
loss: 1.0490072965621948,grad_norm: 0.9270052552873153, iteration: 442829
loss: 1.004300594329834,grad_norm: 0.7380000050962905, iteration: 442830
loss: 0.9790962338447571,grad_norm: 0.7639616146092915, iteration: 442831
loss: 0.9968388676643372,grad_norm: 0.7732166684710943, iteration: 442832
loss: 0.9829807877540588,grad_norm: 0.9533312870417767, iteration: 442833
loss: 0.9980526566505432,grad_norm: 0.8343233493909861, iteration: 442834
loss: 1.0249907970428467,grad_norm: 0.9023942385154767, iteration: 442835
loss: 1.0156583786010742,grad_norm: 0.8958813741374543, iteration: 442836
loss: 1.0046359300613403,grad_norm: 0.690728469440427, iteration: 442837
loss: 1.0165116786956787,grad_norm: 0.7478178726685843, iteration: 442838
loss: 1.1521021127700806,grad_norm: 0.9999991528766509, iteration: 442839
loss: 0.9791508316993713,grad_norm: 0.7596000376657461, iteration: 442840
loss: 0.9930922389030457,grad_norm: 0.9297121296833982, iteration: 442841
loss: 0.9834321141242981,grad_norm: 0.7765915548203989, iteration: 442842
loss: 1.027579426765442,grad_norm: 0.6881095181140369, iteration: 442843
loss: 1.0138052701950073,grad_norm: 0.8121644782366552, iteration: 442844
loss: 0.9955386519432068,grad_norm: 0.8510191238544235, iteration: 442845
loss: 1.0908344984054565,grad_norm: 0.9999992066023266, iteration: 442846
loss: 0.9921973943710327,grad_norm: 0.7323246652709582, iteration: 442847
loss: 1.0255517959594727,grad_norm: 0.8703223122628556, iteration: 442848
loss: 0.9847726821899414,grad_norm: 0.749654076900654, iteration: 442849
loss: 0.984744668006897,grad_norm: 0.7914780086145506, iteration: 442850
loss: 0.993506908416748,grad_norm: 0.6956986826142136, iteration: 442851
loss: 0.9916325211524963,grad_norm: 0.7320796403895299, iteration: 442852
loss: 1.0120536088943481,grad_norm: 0.7319506842979369, iteration: 442853
loss: 0.9849945306777954,grad_norm: 0.8209308046827456, iteration: 442854
loss: 0.9810908436775208,grad_norm: 0.6940684325545203, iteration: 442855
loss: 0.9854512214660645,grad_norm: 0.8041631639511659, iteration: 442856
loss: 0.9865888357162476,grad_norm: 0.8865858434078697, iteration: 442857
loss: 0.9945016503334045,grad_norm: 0.6796590176344827, iteration: 442858
loss: 0.9973583817481995,grad_norm: 0.7467449942082132, iteration: 442859
loss: 1.0358678102493286,grad_norm: 0.9999991132583581, iteration: 442860
loss: 0.9727215766906738,grad_norm: 0.7166023977351145, iteration: 442861
loss: 1.0116300582885742,grad_norm: 0.8443292611579126, iteration: 442862
loss: 1.0036506652832031,grad_norm: 0.781339340011413, iteration: 442863
loss: 0.9484958052635193,grad_norm: 0.9999989965966578, iteration: 442864
loss: 0.966463029384613,grad_norm: 0.8062113331279459, iteration: 442865
loss: 1.0163757801055908,grad_norm: 0.8076779250555409, iteration: 442866
loss: 0.9798473119735718,grad_norm: 0.8783329505243849, iteration: 442867
loss: 1.0257517099380493,grad_norm: 0.850613599319613, iteration: 442868
loss: 1.0103368759155273,grad_norm: 0.7952481734751364, iteration: 442869
loss: 0.9677940011024475,grad_norm: 0.705849548971114, iteration: 442870
loss: 0.9981720447540283,grad_norm: 0.7311621962087519, iteration: 442871
loss: 1.0550569295883179,grad_norm: 0.9999990440493232, iteration: 442872
loss: 1.0049540996551514,grad_norm: 0.778964761423346, iteration: 442873
loss: 1.019669532775879,grad_norm: 0.8131020860210418, iteration: 442874
loss: 0.9890287518501282,grad_norm: 0.9271575086777639, iteration: 442875
loss: 0.9972882270812988,grad_norm: 0.9999990336316443, iteration: 442876
loss: 1.0082944631576538,grad_norm: 0.8581605537290461, iteration: 442877
loss: 0.964718759059906,grad_norm: 0.9999997786665825, iteration: 442878
loss: 0.957673192024231,grad_norm: 0.8665521446206688, iteration: 442879
loss: 1.0066660642623901,grad_norm: 0.7170718458345773, iteration: 442880
loss: 1.0041770935058594,grad_norm: 0.7857542974342383, iteration: 442881
loss: 1.0211652517318726,grad_norm: 0.9661254640851575, iteration: 442882
loss: 1.035103678703308,grad_norm: 0.7183583353056027, iteration: 442883
loss: 1.0340421199798584,grad_norm: 0.9999996334114079, iteration: 442884
loss: 1.0120290517807007,grad_norm: 0.9197381473015844, iteration: 442885
loss: 1.0121616125106812,grad_norm: 0.7492609859210848, iteration: 442886
loss: 0.9739301204681396,grad_norm: 0.8591086745624755, iteration: 442887
loss: 0.9920576214790344,grad_norm: 0.7914243876787149, iteration: 442888
loss: 0.9746914505958557,grad_norm: 0.8788667074333609, iteration: 442889
loss: 0.9994683861732483,grad_norm: 0.7014888186704883, iteration: 442890
loss: 1.145398497581482,grad_norm: 0.9999997611717376, iteration: 442891
loss: 1.0339207649230957,grad_norm: 0.7139320701086862, iteration: 442892
loss: 0.9818911552429199,grad_norm: 0.8634525110654508, iteration: 442893
loss: 0.9840995669364929,grad_norm: 0.7807127219883699, iteration: 442894
loss: 1.0546025037765503,grad_norm: 0.999999392812624, iteration: 442895
loss: 0.9949550032615662,grad_norm: 0.7421972858247506, iteration: 442896
loss: 0.9933251142501831,grad_norm: 0.9346724360186343, iteration: 442897
loss: 1.0313007831573486,grad_norm: 0.6354284845759081, iteration: 442898
loss: 1.0029489994049072,grad_norm: 0.6888787860410793, iteration: 442899
loss: 1.0228224992752075,grad_norm: 0.9027446994240894, iteration: 442900
loss: 1.0181655883789062,grad_norm: 0.7458298728975616, iteration: 442901
loss: 0.9860526323318481,grad_norm: 0.7066901688436279, iteration: 442902
loss: 0.9616249799728394,grad_norm: 0.8259507009569886, iteration: 442903
loss: 1.018650770187378,grad_norm: 0.8381521026500296, iteration: 442904
loss: 1.0005868673324585,grad_norm: 0.7626110892695089, iteration: 442905
loss: 1.0293118953704834,grad_norm: 0.8242644680742047, iteration: 442906
loss: 0.9809433817863464,grad_norm: 0.7655454952404416, iteration: 442907
loss: 0.9923729300498962,grad_norm: 0.9999991647313673, iteration: 442908
loss: 1.0032265186309814,grad_norm: 0.8587321096880862, iteration: 442909
loss: 1.0107227563858032,grad_norm: 0.7335954857036578, iteration: 442910
loss: 1.0159661769866943,grad_norm: 0.7566337395311916, iteration: 442911
loss: 1.0137569904327393,grad_norm: 0.7374054631949264, iteration: 442912
loss: 1.0155524015426636,grad_norm: 0.7167872826164156, iteration: 442913
loss: 1.0343114137649536,grad_norm: 0.9999998903957814, iteration: 442914
loss: 0.981636643409729,grad_norm: 0.6656249365054597, iteration: 442915
loss: 1.055593490600586,grad_norm: 0.9999998011502782, iteration: 442916
loss: 1.0379934310913086,grad_norm: 0.9999994371522225, iteration: 442917
loss: 1.002694010734558,grad_norm: 0.7102972944997359, iteration: 442918
loss: 1.0028775930404663,grad_norm: 0.698119244892638, iteration: 442919
loss: 1.005828857421875,grad_norm: 0.9999990522745198, iteration: 442920
loss: 1.0011343955993652,grad_norm: 0.6337090020716616, iteration: 442921
loss: 0.9629213213920593,grad_norm: 0.9999991088960993, iteration: 442922
loss: 0.9696901440620422,grad_norm: 0.7785160724093481, iteration: 442923
loss: 1.0162463188171387,grad_norm: 0.7392085086847832, iteration: 442924
loss: 1.110499382019043,grad_norm: 0.9999994913363354, iteration: 442925
loss: 0.9931866526603699,grad_norm: 0.8896190142375965, iteration: 442926
loss: 1.0404706001281738,grad_norm: 0.9999993655836893, iteration: 442927
loss: 1.0699416399002075,grad_norm: 0.9999998676980549, iteration: 442928
loss: 0.974901556968689,grad_norm: 0.9999992720990712, iteration: 442929
loss: 0.9984330534934998,grad_norm: 0.8197568010697169, iteration: 442930
loss: 1.0204744338989258,grad_norm: 0.7349488054401208, iteration: 442931
loss: 0.9920172095298767,grad_norm: 0.7863418022247183, iteration: 442932
loss: 1.096137523651123,grad_norm: 0.9814362269934187, iteration: 442933
loss: 1.0376176834106445,grad_norm: 0.9087017487456174, iteration: 442934
loss: 0.9675043225288391,grad_norm: 0.7057014187002553, iteration: 442935
loss: 1.0437489748001099,grad_norm: 0.6405259720295208, iteration: 442936
loss: 0.9868121147155762,grad_norm: 0.7183853518118904, iteration: 442937
loss: 0.9916266202926636,grad_norm: 0.7719174731210591, iteration: 442938
loss: 1.0624037981033325,grad_norm: 0.87158954082479, iteration: 442939
loss: 1.002110481262207,grad_norm: 0.7736375521084715, iteration: 442940
loss: 0.992637574672699,grad_norm: 0.7363113949993688, iteration: 442941
loss: 0.9646846055984497,grad_norm: 0.7949902404972727, iteration: 442942
loss: 0.9968752861022949,grad_norm: 0.8019376891106785, iteration: 442943
loss: 1.0434718132019043,grad_norm: 0.8702131990722464, iteration: 442944
loss: 1.0337193012237549,grad_norm: 0.9999991639755124, iteration: 442945
loss: 1.0138620138168335,grad_norm: 0.8164904243718443, iteration: 442946
loss: 0.9971449375152588,grad_norm: 0.8651023034962306, iteration: 442947
loss: 1.00007963180542,grad_norm: 0.7522836092472496, iteration: 442948
loss: 0.9415369629859924,grad_norm: 0.7472818879116997, iteration: 442949
loss: 0.9874998331069946,grad_norm: 0.793718955433222, iteration: 442950
loss: 0.989298939704895,grad_norm: 0.9999991021518718, iteration: 442951
loss: 0.9949766397476196,grad_norm: 0.7125134534523392, iteration: 442952
loss: 0.9732400178909302,grad_norm: 0.9312616894838629, iteration: 442953
loss: 1.0233871936798096,grad_norm: 0.845458416719933, iteration: 442954
loss: 0.9997891783714294,grad_norm: 1.0000001092842403, iteration: 442955
loss: 1.0470068454742432,grad_norm: 0.9999999720740423, iteration: 442956
loss: 1.0163968801498413,grad_norm: 0.7025941470333101, iteration: 442957
loss: 1.0124791860580444,grad_norm: 0.8209893369721277, iteration: 442958
loss: 1.0201526880264282,grad_norm: 0.8268412603013667, iteration: 442959
loss: 1.0231915712356567,grad_norm: 0.7825186010140935, iteration: 442960
loss: 1.043428897857666,grad_norm: 0.7106435222902963, iteration: 442961
loss: 0.965846061706543,grad_norm: 0.7796062812637302, iteration: 442962
loss: 1.070609211921692,grad_norm: 0.9999995637132784, iteration: 442963
loss: 1.0379230976104736,grad_norm: 0.8456141398815555, iteration: 442964
loss: 1.0221761465072632,grad_norm: 0.7751073995512737, iteration: 442965
loss: 0.9793763756752014,grad_norm: 0.7810643098737692, iteration: 442966
loss: 0.9964316487312317,grad_norm: 0.6981914896907739, iteration: 442967
loss: 0.9950886964797974,grad_norm: 0.9260950410811293, iteration: 442968
loss: 1.0342789888381958,grad_norm: 0.8853137009468227, iteration: 442969
loss: 1.0022053718566895,grad_norm: 0.886458457799749, iteration: 442970
loss: 0.9922412037849426,grad_norm: 0.9999991021758972, iteration: 442971
loss: 0.9987239837646484,grad_norm: 0.8175897332193525, iteration: 442972
loss: 1.0283029079437256,grad_norm: 0.9809442246871144, iteration: 442973
loss: 1.0010489225387573,grad_norm: 0.9999997011802868, iteration: 442974
loss: 1.010277509689331,grad_norm: 0.9687568870529204, iteration: 442975
loss: 1.0173367261886597,grad_norm: 0.8682157083384243, iteration: 442976
loss: 1.0283222198486328,grad_norm: 0.9999989526996588, iteration: 442977
loss: 1.0154070854187012,grad_norm: 0.6689240933251106, iteration: 442978
loss: 0.9969324469566345,grad_norm: 0.7290868879105831, iteration: 442979
loss: 1.0455905199050903,grad_norm: 0.8465409365317929, iteration: 442980
loss: 0.9856643080711365,grad_norm: 0.6648247798833453, iteration: 442981
loss: 1.0376509428024292,grad_norm: 0.9999994832575188, iteration: 442982
loss: 1.0095146894454956,grad_norm: 0.7727724912844299, iteration: 442983
loss: 1.0029093027114868,grad_norm: 0.7161153830098543, iteration: 442984
loss: 1.0143108367919922,grad_norm: 0.9999991142378539, iteration: 442985
loss: 1.0554834604263306,grad_norm: 0.9999995952814582, iteration: 442986
loss: 1.0031239986419678,grad_norm: 0.8240002103501526, iteration: 442987
loss: 1.0241410732269287,grad_norm: 0.9999994506492695, iteration: 442988
loss: 1.019578218460083,grad_norm: 0.7738070511267892, iteration: 442989
loss: 1.1178333759307861,grad_norm: 0.8600412757214928, iteration: 442990
loss: 0.9991670846939087,grad_norm: 0.7904294446384256, iteration: 442991
loss: 0.98368901014328,grad_norm: 0.8087707169686141, iteration: 442992
loss: 0.983875572681427,grad_norm: 0.9383900356300252, iteration: 442993
loss: 1.013838529586792,grad_norm: 0.9999993465018567, iteration: 442994
loss: 0.9801915287971497,grad_norm: 0.9999990753634251, iteration: 442995
loss: 1.0047110319137573,grad_norm: 0.746472073077431, iteration: 442996
loss: 0.983729898929596,grad_norm: 0.7990281817736428, iteration: 442997
loss: 0.9869905114173889,grad_norm: 0.6591566998850361, iteration: 442998
loss: 0.9919171929359436,grad_norm: 0.8220415246738682, iteration: 442999
loss: 1.0084989070892334,grad_norm: 0.9999991512818925, iteration: 443000
loss: 1.000217318534851,grad_norm: 0.999999863033411, iteration: 443001
loss: 1.0240283012390137,grad_norm: 0.6967741199895237, iteration: 443002
loss: 0.9689753651618958,grad_norm: 0.7622508550847619, iteration: 443003
loss: 1.0217612981796265,grad_norm: 0.7681911222137573, iteration: 443004
loss: 0.9935762286186218,grad_norm: 0.6970448083789709, iteration: 443005
loss: 1.010941505432129,grad_norm: 0.626543034126313, iteration: 443006
loss: 1.0293898582458496,grad_norm: 0.9999990327197226, iteration: 443007
loss: 1.0250272750854492,grad_norm: 0.678349303458529, iteration: 443008
loss: 1.0293039083480835,grad_norm: 0.7033597916504136, iteration: 443009
loss: 1.040220856666565,grad_norm: 0.6805634408479934, iteration: 443010
loss: 1.0468640327453613,grad_norm: 0.9386221978156656, iteration: 443011
loss: 1.0013034343719482,grad_norm: 0.9999989621027724, iteration: 443012
loss: 1.024674654006958,grad_norm: 0.7806343389834725, iteration: 443013
loss: 1.0042566061019897,grad_norm: 0.7548813895975595, iteration: 443014
loss: 0.9787694811820984,grad_norm: 0.7805521944076244, iteration: 443015
loss: 1.0101832151412964,grad_norm: 0.724041536894862, iteration: 443016
loss: 0.9771908521652222,grad_norm: 0.9999992962224803, iteration: 443017
loss: 1.0068495273590088,grad_norm: 0.7879742096126782, iteration: 443018
loss: 1.0144675970077515,grad_norm: 0.7464614926285695, iteration: 443019
loss: 0.9813842177391052,grad_norm: 0.729652471696777, iteration: 443020
loss: 0.997775673866272,grad_norm: 0.744359969707836, iteration: 443021
loss: 1.0079526901245117,grad_norm: 0.6970422664020837, iteration: 443022
loss: 1.0578049421310425,grad_norm: 0.9999998527815772, iteration: 443023
loss: 1.0291800498962402,grad_norm: 0.9999996878676618, iteration: 443024
loss: 0.9851372241973877,grad_norm: 0.6768216141848098, iteration: 443025
loss: 1.000934362411499,grad_norm: 0.8675280073514615, iteration: 443026
loss: 1.003781795501709,grad_norm: 0.8019837639305955, iteration: 443027
loss: 0.9865477085113525,grad_norm: 0.7278632142122458, iteration: 443028
loss: 1.006791114807129,grad_norm: 0.999999909570552, iteration: 443029
loss: 1.1039490699768066,grad_norm: 0.9999991493230779, iteration: 443030
loss: 0.9802306294441223,grad_norm: 0.7386557063825988, iteration: 443031
loss: 1.0238028764724731,grad_norm: 0.9132021393896462, iteration: 443032
loss: 1.0025005340576172,grad_norm: 0.7614354638574603, iteration: 443033
loss: 1.0191106796264648,grad_norm: 0.7269299965943828, iteration: 443034
loss: 0.9943534731864929,grad_norm: 0.7187817278556773, iteration: 443035
loss: 0.992063581943512,grad_norm: 0.867381308802288, iteration: 443036
loss: 0.9598655700683594,grad_norm: 0.8008735936105373, iteration: 443037
loss: 1.006968379020691,grad_norm: 0.8375536368442092, iteration: 443038
loss: 1.061731219291687,grad_norm: 0.7550494963179497, iteration: 443039
loss: 1.010756254196167,grad_norm: 0.7885444784076212, iteration: 443040
loss: 0.9278174042701721,grad_norm: 0.6900538863957842, iteration: 443041
loss: 1.0321508646011353,grad_norm: 0.9999995678613594, iteration: 443042
loss: 1.0008219480514526,grad_norm: 0.735545527695291, iteration: 443043
loss: 1.0043892860412598,grad_norm: 0.6144334293747973, iteration: 443044
loss: 1.0291410684585571,grad_norm: 0.8920645643268833, iteration: 443045
loss: 1.0238856077194214,grad_norm: 0.7160582662507652, iteration: 443046
loss: 1.0350513458251953,grad_norm: 0.8787838072167704, iteration: 443047
loss: 0.9853523373603821,grad_norm: 0.9010220060228348, iteration: 443048
loss: 0.9936840534210205,grad_norm: 0.8644107743254342, iteration: 443049
loss: 0.9845914840698242,grad_norm: 0.709660069296336, iteration: 443050
loss: 1.0250566005706787,grad_norm: 0.7574835090023466, iteration: 443051
loss: 0.988834798336029,grad_norm: 0.8247973399756512, iteration: 443052
loss: 1.0183279514312744,grad_norm: 0.8115952537609488, iteration: 443053
loss: 0.9915663003921509,grad_norm: 0.7259148524267051, iteration: 443054
loss: 0.9930764436721802,grad_norm: 0.7210905175680615, iteration: 443055
loss: 1.0676356554031372,grad_norm: 0.9999992365484311, iteration: 443056
loss: 1.0189015865325928,grad_norm: 0.6441217372381807, iteration: 443057
loss: 0.946373701095581,grad_norm: 0.8238648792284615, iteration: 443058
loss: 0.9992185831069946,grad_norm: 0.9851522918974488, iteration: 443059
loss: 1.0165899991989136,grad_norm: 0.7898067351945106, iteration: 443060
loss: 0.9829680919647217,grad_norm: 0.6642239617506822, iteration: 443061
loss: 1.000242829322815,grad_norm: 0.7458130812916945, iteration: 443062
loss: 0.9915703535079956,grad_norm: 0.8050795038381826, iteration: 443063
loss: 0.9763373732566833,grad_norm: 0.8141823705169546, iteration: 443064
loss: 1.1878594160079956,grad_norm: 0.9999998505669218, iteration: 443065
loss: 0.9866385459899902,grad_norm: 0.9706273503039546, iteration: 443066
loss: 1.0221165418624878,grad_norm: 0.8409032871627105, iteration: 443067
loss: 1.0170925855636597,grad_norm: 0.778924856745787, iteration: 443068
loss: 1.0016934871673584,grad_norm: 0.6177226822964084, iteration: 443069
loss: 1.0532361268997192,grad_norm: 0.9999999594438482, iteration: 443070
loss: 1.0277395248413086,grad_norm: 0.6552618672349139, iteration: 443071
loss: 1.0400689840316772,grad_norm: 0.685270431717896, iteration: 443072
loss: 1.0015054941177368,grad_norm: 0.7411192712473852, iteration: 443073
loss: 1.0334707498550415,grad_norm: 0.8221140804608702, iteration: 443074
loss: 1.002219557762146,grad_norm: 0.8003495233657839, iteration: 443075
loss: 0.990894079208374,grad_norm: 0.8505612125816177, iteration: 443076
loss: 0.9791831374168396,grad_norm: 0.7265971152237214, iteration: 443077
loss: 0.9983651041984558,grad_norm: 0.9999989084147186, iteration: 443078
loss: 0.9739671349525452,grad_norm: 0.7960833626313987, iteration: 443079
loss: 0.9984097480773926,grad_norm: 0.8425018839378638, iteration: 443080
loss: 0.9822317361831665,grad_norm: 0.7489021752480044, iteration: 443081
loss: 1.002602219581604,grad_norm: 0.7926761834953703, iteration: 443082
loss: 0.9718217849731445,grad_norm: 0.6557848111579659, iteration: 443083
loss: 0.9871557950973511,grad_norm: 0.6954404059454046, iteration: 443084
loss: 1.009039282798767,grad_norm: 0.8070675654097106, iteration: 443085
loss: 0.9652069211006165,grad_norm: 0.8099031928787451, iteration: 443086
loss: 0.9858287572860718,grad_norm: 0.9229621704379256, iteration: 443087
loss: 0.9846392273902893,grad_norm: 0.8090588979165474, iteration: 443088
loss: 0.9929758906364441,grad_norm: 0.8207010919273499, iteration: 443089
loss: 1.040164828300476,grad_norm: 0.9999991111811765, iteration: 443090
loss: 0.9904525876045227,grad_norm: 0.767307402390469, iteration: 443091
loss: 1.0566586256027222,grad_norm: 0.9393693448887558, iteration: 443092
loss: 1.013789176940918,grad_norm: 0.8724625746389738, iteration: 443093
loss: 0.9958889484405518,grad_norm: 0.7165590434910353, iteration: 443094
loss: 0.9806681275367737,grad_norm: 0.7142536942396678, iteration: 443095
loss: 0.9880414605140686,grad_norm: 0.7129815868894085, iteration: 443096
loss: 0.9726261496543884,grad_norm: 0.8028919809770065, iteration: 443097
loss: 0.9758462309837341,grad_norm: 0.9665274310528227, iteration: 443098
loss: 0.9892095923423767,grad_norm: 0.8286134682438056, iteration: 443099
loss: 0.9961860775947571,grad_norm: 0.8151413954656886, iteration: 443100
loss: 1.0231996774673462,grad_norm: 0.7507351182522273, iteration: 443101
loss: 1.00657320022583,grad_norm: 0.7371619055008989, iteration: 443102
loss: 0.9880198240280151,grad_norm: 0.6599787963078899, iteration: 443103
loss: 1.024695873260498,grad_norm: 0.7704924459592017, iteration: 443104
loss: 0.994183361530304,grad_norm: 0.8208570249916461, iteration: 443105
loss: 0.9853751063346863,grad_norm: 0.9062696398846947, iteration: 443106
loss: 0.9726161360740662,grad_norm: 0.64174786036976, iteration: 443107
loss: 1.0223784446716309,grad_norm: 0.8370783799686884, iteration: 443108
loss: 0.998169481754303,grad_norm: 0.8611696497528254, iteration: 443109
loss: 1.0065783262252808,grad_norm: 0.9275738521768403, iteration: 443110
loss: 0.981192409992218,grad_norm: 0.6997040712702589, iteration: 443111
loss: 0.9975067377090454,grad_norm: 0.73466751113086, iteration: 443112
loss: 0.982528805732727,grad_norm: 0.9844543943241316, iteration: 443113
loss: 0.9974230527877808,grad_norm: 0.7357906896785107, iteration: 443114
loss: 1.007879376411438,grad_norm: 0.7954108138795837, iteration: 443115
loss: 0.9872012138366699,grad_norm: 0.8665823288649411, iteration: 443116
loss: 0.9742183685302734,grad_norm: 0.7497546522308361, iteration: 443117
loss: 1.0169732570648193,grad_norm: 0.9999999470231958, iteration: 443118
loss: 1.0318704843521118,grad_norm: 0.679608554336714, iteration: 443119
loss: 0.954760730266571,grad_norm: 0.8348067526781064, iteration: 443120
loss: 1.0162179470062256,grad_norm: 0.8052382447819052, iteration: 443121
loss: 0.983532726764679,grad_norm: 0.6274611069888552, iteration: 443122
loss: 0.9642029404640198,grad_norm: 0.7528718180943682, iteration: 443123
loss: 0.9649709463119507,grad_norm: 0.8587971880262522, iteration: 443124
loss: 1.024533987045288,grad_norm: 0.7919636659098884, iteration: 443125
loss: 0.9674574136734009,grad_norm: 0.8507643317703684, iteration: 443126
loss: 1.0176059007644653,grad_norm: 0.8310370927703088, iteration: 443127
loss: 1.004738688468933,grad_norm: 0.9899572151752626, iteration: 443128
loss: 0.974539577960968,grad_norm: 0.7878702450208371, iteration: 443129
loss: 1.00101637840271,grad_norm: 0.8560172557214998, iteration: 443130
loss: 0.9979563355445862,grad_norm: 0.8970285173003505, iteration: 443131
loss: 0.996815025806427,grad_norm: 0.7977029092819362, iteration: 443132
loss: 1.0566118955612183,grad_norm: 0.8224099092605662, iteration: 443133
loss: 0.9918924570083618,grad_norm: 0.6995112472506542, iteration: 443134
loss: 1.025844693183899,grad_norm: 0.7288644381765695, iteration: 443135
loss: 1.074955701828003,grad_norm: 0.9999995371273667, iteration: 443136
loss: 1.005672812461853,grad_norm: 0.843796617908767, iteration: 443137
loss: 0.9552779793739319,grad_norm: 0.818572068780652, iteration: 443138
loss: 0.9850842952728271,grad_norm: 0.7126246629605766, iteration: 443139
loss: 1.0101970434188843,grad_norm: 0.8382091672697811, iteration: 443140
loss: 1.0196120738983154,grad_norm: 0.7399710553086155, iteration: 443141
loss: 0.9848845601081848,grad_norm: 0.9218115991954191, iteration: 443142
loss: 1.0529175996780396,grad_norm: 0.8658655550617105, iteration: 443143
loss: 1.0257983207702637,grad_norm: 0.7233207221984262, iteration: 443144
loss: 1.073527216911316,grad_norm: 0.9999999070770841, iteration: 443145
loss: 1.0038851499557495,grad_norm: 0.9999992201056135, iteration: 443146
loss: 1.0188099145889282,grad_norm: 0.8759268100454976, iteration: 443147
loss: 0.9822073578834534,grad_norm: 0.6760477948752344, iteration: 443148
loss: 0.9699983596801758,grad_norm: 0.9999991390338473, iteration: 443149
loss: 0.9757088422775269,grad_norm: 0.8195927364570823, iteration: 443150
loss: 1.048263430595398,grad_norm: 0.8835673610138243, iteration: 443151
loss: 1.0695120096206665,grad_norm: 0.7208185906024637, iteration: 443152
loss: 0.989081621170044,grad_norm: 0.753439410073531, iteration: 443153
loss: 1.014586329460144,grad_norm: 0.9932704999090232, iteration: 443154
loss: 1.022315502166748,grad_norm: 0.8584463520889629, iteration: 443155
loss: 1.0066230297088623,grad_norm: 0.8786994960024618, iteration: 443156
loss: 0.9891629219055176,grad_norm: 0.7301759330239218, iteration: 443157
loss: 1.0020886659622192,grad_norm: 0.9244950665904899, iteration: 443158
loss: 0.9693167209625244,grad_norm: 0.7576314235484283, iteration: 443159
loss: 1.0517243146896362,grad_norm: 0.9999994889774911, iteration: 443160
loss: 0.971154510974884,grad_norm: 0.973871323884693, iteration: 443161
loss: 0.9997286200523376,grad_norm: 0.9359558814018157, iteration: 443162
loss: 0.9976004958152771,grad_norm: 0.9074489695406031, iteration: 443163
loss: 0.9879317879676819,grad_norm: 0.7367701005160228, iteration: 443164
loss: 0.9717192053794861,grad_norm: 0.6537308180483085, iteration: 443165
loss: 0.9896587133407593,grad_norm: 0.9267860440402118, iteration: 443166
loss: 1.0147700309753418,grad_norm: 0.8439596209846005, iteration: 443167
loss: 1.0016162395477295,grad_norm: 0.7779436595732303, iteration: 443168
loss: 1.0106631517410278,grad_norm: 0.9349525316198732, iteration: 443169
loss: 1.0005356073379517,grad_norm: 0.711395263278469, iteration: 443170
loss: 0.9975296258926392,grad_norm: 0.7850090179287046, iteration: 443171
loss: 0.991718053817749,grad_norm: 0.7984118813870152, iteration: 443172
loss: 0.9846962690353394,grad_norm: 0.9999993814003384, iteration: 443173
loss: 1.0113805532455444,grad_norm: 0.7936343142226066, iteration: 443174
loss: 0.9527345895767212,grad_norm: 0.761715506483989, iteration: 443175
loss: 0.9915346503257751,grad_norm: 0.8485472577681843, iteration: 443176
loss: 1.0036522150039673,grad_norm: 0.8123555766233796, iteration: 443177
loss: 0.9540025591850281,grad_norm: 0.8278595594324376, iteration: 443178
loss: 1.0359569787979126,grad_norm: 0.8315542523492822, iteration: 443179
loss: 1.027247428894043,grad_norm: 0.7560325558472487, iteration: 443180
loss: 1.0312974452972412,grad_norm: 0.9999997323401728, iteration: 443181
loss: 1.0097721815109253,grad_norm: 0.7585479864519034, iteration: 443182
loss: 1.0145456790924072,grad_norm: 0.9999992863330521, iteration: 443183
loss: 1.0268340110778809,grad_norm: 0.8567439266202956, iteration: 443184
loss: 1.0204027891159058,grad_norm: 0.9876473240268843, iteration: 443185
loss: 0.9617291688919067,grad_norm: 0.6310591069322705, iteration: 443186
loss: 0.9987193942070007,grad_norm: 0.9530729995049962, iteration: 443187
loss: 0.9784479737281799,grad_norm: 0.8832257958394636, iteration: 443188
loss: 0.9751778841018677,grad_norm: 0.7838014551131013, iteration: 443189
loss: 1.0074599981307983,grad_norm: 0.8196226661380773, iteration: 443190
loss: 1.0502240657806396,grad_norm: 0.7036751228502112, iteration: 443191
loss: 0.9862633347511292,grad_norm: 0.7469395146190209, iteration: 443192
loss: 0.9977465271949768,grad_norm: 0.6552233215778186, iteration: 443193
loss: 1.011691689491272,grad_norm: 0.902328308429405, iteration: 443194
loss: 1.055504322052002,grad_norm: 0.9934525443658484, iteration: 443195
loss: 1.002298355102539,grad_norm: 0.7840435771426167, iteration: 443196
loss: 1.0024561882019043,grad_norm: 0.9373072695698664, iteration: 443197
loss: 1.0638607740402222,grad_norm: 0.999999621918255, iteration: 443198
loss: 0.9652477502822876,grad_norm: 0.7161359800268372, iteration: 443199
loss: 1.0234086513519287,grad_norm: 0.9575526412993705, iteration: 443200
loss: 0.998188316822052,grad_norm: 0.749077724156229, iteration: 443201
loss: 0.9855172634124756,grad_norm: 0.8524130385446868, iteration: 443202
loss: 1.006547212600708,grad_norm: 0.8675057346157589, iteration: 443203
loss: 0.9730294346809387,grad_norm: 0.8805710890332462, iteration: 443204
loss: 0.9841652512550354,grad_norm: 0.7378511641153703, iteration: 443205
loss: 1.0025246143341064,grad_norm: 0.9650859449441388, iteration: 443206
loss: 0.9741952419281006,grad_norm: 0.7323512969635966, iteration: 443207
loss: 1.0092536211013794,grad_norm: 0.8165190982955519, iteration: 443208
loss: 1.0111750364303589,grad_norm: 0.6727082573613609, iteration: 443209
loss: 0.9856610894203186,grad_norm: 0.6339227566450961, iteration: 443210
loss: 1.0361491441726685,grad_norm: 0.8250133352362437, iteration: 443211
loss: 1.0381981134414673,grad_norm: 0.7279483345880967, iteration: 443212
loss: 0.9947812557220459,grad_norm: 0.6812069388968842, iteration: 443213
loss: 0.9979460835456848,grad_norm: 0.7226492839220983, iteration: 443214
loss: 0.9982829093933105,grad_norm: 0.664238055797669, iteration: 443215
loss: 0.9869809746742249,grad_norm: 0.7539935730282307, iteration: 443216
loss: 1.0086121559143066,grad_norm: 0.9331669168058673, iteration: 443217
loss: 0.9820229411125183,grad_norm: 0.7242945862870205, iteration: 443218
loss: 1.024609088897705,grad_norm: 0.7012991158198089, iteration: 443219
loss: 0.9875917434692383,grad_norm: 0.9073070756745546, iteration: 443220
loss: 0.9907447099685669,grad_norm: 0.9479627003965152, iteration: 443221
loss: 0.9921690225601196,grad_norm: 0.7422363774101697, iteration: 443222
loss: 0.9705288410186768,grad_norm: 0.8923381378647474, iteration: 443223
loss: 0.9665631055831909,grad_norm: 0.6800953867404396, iteration: 443224
loss: 1.0197697877883911,grad_norm: 0.7713007015903547, iteration: 443225
loss: 1.0326192378997803,grad_norm: 0.864430871597896, iteration: 443226
loss: 0.9879489541053772,grad_norm: 0.6099146035302054, iteration: 443227
loss: 0.9772225022315979,grad_norm: 0.860545811608753, iteration: 443228
loss: 0.9894651174545288,grad_norm: 0.959629376393164, iteration: 443229
loss: 1.0214922428131104,grad_norm: 0.7195951461585497, iteration: 443230
loss: 1.0083656311035156,grad_norm: 0.7213577019136921, iteration: 443231
loss: 0.9803656339645386,grad_norm: 0.6728503660383155, iteration: 443232
loss: 1.0307482481002808,grad_norm: 0.9999992457200031, iteration: 443233
loss: 0.9941654801368713,grad_norm: 0.9999991112092116, iteration: 443234
loss: 0.9899083375930786,grad_norm: 0.7590217953464509, iteration: 443235
loss: 1.0791113376617432,grad_norm: 0.9999991119015204, iteration: 443236
loss: 0.9947235584259033,grad_norm: 0.6880327118804364, iteration: 443237
loss: 1.0057264566421509,grad_norm: 0.8573092316939585, iteration: 443238
loss: 0.982461154460907,grad_norm: 0.7924931649415621, iteration: 443239
loss: 0.9720215797424316,grad_norm: 0.5773776997498116, iteration: 443240
loss: 1.012256145477295,grad_norm: 0.7897879936366841, iteration: 443241
loss: 1.0025084018707275,grad_norm: 0.6914940064925341, iteration: 443242
loss: 0.9771257638931274,grad_norm: 0.7811402591965141, iteration: 443243
loss: 1.0095739364624023,grad_norm: 0.9999991989116014, iteration: 443244
loss: 0.9819773435592651,grad_norm: 0.7272777727855791, iteration: 443245
loss: 1.0346046686172485,grad_norm: 0.8610080316106549, iteration: 443246
loss: 0.9563136696815491,grad_norm: 0.8417801323756938, iteration: 443247
loss: 0.9667326211929321,grad_norm: 0.7466889169627601, iteration: 443248
loss: 0.9949160814285278,grad_norm: 0.816718558770155, iteration: 443249
loss: 0.9902039766311646,grad_norm: 0.8244185292468692, iteration: 443250
loss: 0.9936085939407349,grad_norm: 0.6962022150542512, iteration: 443251
loss: 0.9964238405227661,grad_norm: 0.7500179552316607, iteration: 443252
loss: 1.03288996219635,grad_norm: 0.6910602987606774, iteration: 443253
loss: 1.0200259685516357,grad_norm: 0.8953560428993746, iteration: 443254
loss: 1.0545189380645752,grad_norm: 0.9999999080176517, iteration: 443255
loss: 0.9335097670555115,grad_norm: 0.806326178586403, iteration: 443256
loss: 1.0057077407836914,grad_norm: 0.7744204457119079, iteration: 443257
loss: 0.9789654612541199,grad_norm: 0.6331534036875288, iteration: 443258
loss: 0.9975233674049377,grad_norm: 0.793988406753573, iteration: 443259
loss: 1.018452525138855,grad_norm: 0.7351675559349099, iteration: 443260
loss: 0.9954506754875183,grad_norm: 0.7516267339658844, iteration: 443261
loss: 0.9799033999443054,grad_norm: 0.6228724088022884, iteration: 443262
loss: 0.9694800972938538,grad_norm: 0.7437430210412939, iteration: 443263
loss: 0.9542738199234009,grad_norm: 0.9190404744580223, iteration: 443264
loss: 1.0210340023040771,grad_norm: 0.7399733880759697, iteration: 443265
loss: 1.0231363773345947,grad_norm: 0.8273439950851447, iteration: 443266
loss: 0.982953667640686,grad_norm: 0.6519606218819158, iteration: 443267
loss: 1.1122605800628662,grad_norm: 1.0000000420894064, iteration: 443268
loss: 0.9971373677253723,grad_norm: 0.7187567547520075, iteration: 443269
loss: 0.9951980113983154,grad_norm: 0.8525044687398078, iteration: 443270
loss: 1.0031311511993408,grad_norm: 0.815948931843187, iteration: 443271
loss: 0.9858637452125549,grad_norm: 0.7283364475994434, iteration: 443272
loss: 1.0104048252105713,grad_norm: 0.9999994193556325, iteration: 443273
loss: 1.0040035247802734,grad_norm: 0.77501896422507, iteration: 443274
loss: 1.0162162780761719,grad_norm: 0.7697179459886804, iteration: 443275
loss: 0.9437981247901917,grad_norm: 0.8168446758400574, iteration: 443276
loss: 1.0180381536483765,grad_norm: 0.7323562697632239, iteration: 443277
loss: 0.9743945598602295,grad_norm: 0.7819016095244807, iteration: 443278
loss: 0.9769710302352905,grad_norm: 0.7492598015262558, iteration: 443279
loss: 0.9866583347320557,grad_norm: 0.6941600393768363, iteration: 443280
loss: 0.9589277505874634,grad_norm: 0.8036754634702198, iteration: 443281
loss: 0.970238447189331,grad_norm: 0.8310981360330243, iteration: 443282
loss: 1.0136749744415283,grad_norm: 0.8578147119435642, iteration: 443283
loss: 0.9862629175186157,grad_norm: 0.720883517726762, iteration: 443284
loss: 1.0143742561340332,grad_norm: 0.9999998405995724, iteration: 443285
loss: 1.0851755142211914,grad_norm: 0.9999999987693087, iteration: 443286
loss: 0.9923856854438782,grad_norm: 0.8007319647807639, iteration: 443287
loss: 0.9857888221740723,grad_norm: 0.9999994203164098, iteration: 443288
loss: 0.9959473609924316,grad_norm: 0.7928682779963695, iteration: 443289
loss: 0.9841524958610535,grad_norm: 0.8015416392833454, iteration: 443290
loss: 1.0197415351867676,grad_norm: 0.7129770373149819, iteration: 443291
loss: 0.981223464012146,grad_norm: 0.8083541913938062, iteration: 443292
loss: 0.9974286556243896,grad_norm: 0.7413534727534469, iteration: 443293
loss: 0.9673898220062256,grad_norm: 0.5985558759019726, iteration: 443294
loss: 1.0145167112350464,grad_norm: 0.9999999775599906, iteration: 443295
loss: 0.9983717203140259,grad_norm: 0.671784159001389, iteration: 443296
loss: 0.9898601174354553,grad_norm: 0.7830185046453386, iteration: 443297
loss: 0.9832814931869507,grad_norm: 0.818185931806417, iteration: 443298
loss: 1.036081075668335,grad_norm: 0.7560779649939642, iteration: 443299
loss: 1.0286577939987183,grad_norm: 0.9999994061370814, iteration: 443300
loss: 1.0025930404663086,grad_norm: 0.7402226230766595, iteration: 443301
loss: 0.9996405243873596,grad_norm: 0.8403789226578666, iteration: 443302
loss: 1.0096358060836792,grad_norm: 0.7137589434849685, iteration: 443303
loss: 1.02472984790802,grad_norm: 0.9294810255916509, iteration: 443304
loss: 1.0033049583435059,grad_norm: 0.726363564133783, iteration: 443305
loss: 0.9689663052558899,grad_norm: 0.7555674167740691, iteration: 443306
loss: 1.0144474506378174,grad_norm: 0.7122265170696666, iteration: 443307
loss: 1.015976905822754,grad_norm: 0.8301371379769329, iteration: 443308
loss: 0.9974617958068848,grad_norm: 0.6371671766296757, iteration: 443309
loss: 0.9886466860771179,grad_norm: 0.7751824234512692, iteration: 443310
loss: 0.9681868553161621,grad_norm: 0.9999990198343768, iteration: 443311
loss: 1.0097731351852417,grad_norm: 0.7134232888744955, iteration: 443312
loss: 1.2472875118255615,grad_norm: 0.999999171885187, iteration: 443313
loss: 1.0201280117034912,grad_norm: 0.8385232469579901, iteration: 443314
loss: 1.0164493322372437,grad_norm: 0.9837390603691258, iteration: 443315
loss: 1.0167384147644043,grad_norm: 0.747731945540285, iteration: 443316
loss: 1.0116958618164062,grad_norm: 0.7945645605226209, iteration: 443317
loss: 0.9700413942337036,grad_norm: 0.999999234059205, iteration: 443318
loss: 0.9805030226707458,grad_norm: 0.9072631080987333, iteration: 443319
loss: 1.0605340003967285,grad_norm: 0.6874636031734092, iteration: 443320
loss: 0.9689200520515442,grad_norm: 0.9536435864641091, iteration: 443321
loss: 0.9759543538093567,grad_norm: 0.7581810931762608, iteration: 443322
loss: 1.0165935754776,grad_norm: 0.8045443098425202, iteration: 443323
loss: 0.9371929168701172,grad_norm: 0.9999990126053107, iteration: 443324
loss: 1.048865795135498,grad_norm: 0.9448128131057804, iteration: 443325
loss: 1.0086930990219116,grad_norm: 0.6470736925056064, iteration: 443326
loss: 1.0344977378845215,grad_norm: 0.8306018198007213, iteration: 443327
loss: 0.9983144998550415,grad_norm: 0.7145119359132587, iteration: 443328
loss: 1.0063272714614868,grad_norm: 0.6107258177671556, iteration: 443329
loss: 1.0329035520553589,grad_norm: 0.8536128948012993, iteration: 443330
loss: 0.9863669276237488,grad_norm: 0.7323690668087386, iteration: 443331
loss: 0.9793804287910461,grad_norm: 0.9619589177912056, iteration: 443332
loss: 0.9718136787414551,grad_norm: 0.8806168904603459, iteration: 443333
loss: 0.9780016541481018,grad_norm: 0.6746394455408904, iteration: 443334
loss: 1.00467848777771,grad_norm: 0.7360141370653439, iteration: 443335
loss: 1.0245776176452637,grad_norm: 0.9999995219775875, iteration: 443336
loss: 1.0261121988296509,grad_norm: 0.8568159314553527, iteration: 443337
loss: 0.9638451933860779,grad_norm: 0.846691558930217, iteration: 443338
loss: 1.1141138076782227,grad_norm: 0.815538973221106, iteration: 443339
loss: 0.9983886480331421,grad_norm: 0.9999999094429091, iteration: 443340
loss: 0.966641366481781,grad_norm: 0.8077546735387914, iteration: 443341
loss: 0.9764673709869385,grad_norm: 0.8700824466575129, iteration: 443342
loss: 0.9903343915939331,grad_norm: 0.9999998555158732, iteration: 443343
loss: 1.0127581357955933,grad_norm: 0.9999998667071683, iteration: 443344
loss: 1.1526210308074951,grad_norm: 0.8524469772078064, iteration: 443345
loss: 0.9862660765647888,grad_norm: 0.7167234634982061, iteration: 443346
loss: 0.9847764372825623,grad_norm: 0.648867974129644, iteration: 443347
loss: 1.0059906244277954,grad_norm: 0.8777056377384468, iteration: 443348
loss: 1.0039550065994263,grad_norm: 0.7639424791609583, iteration: 443349
loss: 1.0026121139526367,grad_norm: 0.7187172249652387, iteration: 443350
loss: 1.0122597217559814,grad_norm: 0.7105734213001921, iteration: 443351
loss: 0.9983447790145874,grad_norm: 0.7160223956067027, iteration: 443352
loss: 1.0240399837493896,grad_norm: 0.8258372671159547, iteration: 443353
loss: 1.006751298904419,grad_norm: 0.7676175779090156, iteration: 443354
loss: 0.9708642363548279,grad_norm: 0.988901580024974, iteration: 443355
loss: 0.9925341606140137,grad_norm: 0.8013118869785624, iteration: 443356
loss: 1.0242068767547607,grad_norm: 0.8294794666420855, iteration: 443357
loss: 1.0303343534469604,grad_norm: 0.7546028790156798, iteration: 443358
loss: 0.9635935425758362,grad_norm: 0.824312111553917, iteration: 443359
loss: 1.0217969417572021,grad_norm: 0.766398890453805, iteration: 443360
loss: 1.0221099853515625,grad_norm: 0.5988054580123494, iteration: 443361
loss: 0.9958612322807312,grad_norm: 0.7139502954858152, iteration: 443362
loss: 0.980419933795929,grad_norm: 0.6435772315519224, iteration: 443363
loss: 0.9569054245948792,grad_norm: 0.742628270733863, iteration: 443364
loss: 0.9780384302139282,grad_norm: 0.7766975834582527, iteration: 443365
loss: 1.0093493461608887,grad_norm: 0.9165038523019589, iteration: 443366
loss: 0.9545947313308716,grad_norm: 0.8716050640139849, iteration: 443367
loss: 1.0014961957931519,grad_norm: 0.781238902093947, iteration: 443368
loss: 0.9970382452011108,grad_norm: 0.7698461554702648, iteration: 443369
loss: 0.9933861494064331,grad_norm: 0.6637893189891008, iteration: 443370
loss: 1.0114645957946777,grad_norm: 0.7881374983874434, iteration: 443371
loss: 1.0071957111358643,grad_norm: 0.7437660399199812, iteration: 443372
loss: 1.008832335472107,grad_norm: 0.7335111826693591, iteration: 443373
loss: 0.9617078304290771,grad_norm: 0.7768253344678466, iteration: 443374
loss: 1.0078489780426025,grad_norm: 0.899193736112051, iteration: 443375
loss: 0.990200400352478,grad_norm: 0.9999993937821963, iteration: 443376
loss: 1.0051257610321045,grad_norm: 0.844444506784214, iteration: 443377
loss: 1.0013140439987183,grad_norm: 0.8915812790555387, iteration: 443378
loss: 1.025776982307434,grad_norm: 0.9063138163456184, iteration: 443379
loss: 0.9834491014480591,grad_norm: 0.6781290092736155, iteration: 443380
loss: 1.0194627046585083,grad_norm: 0.7184087280337341, iteration: 443381
loss: 0.9967553019523621,grad_norm: 0.691581027294415, iteration: 443382
loss: 1.0040699243545532,grad_norm: 0.999999776161091, iteration: 443383
loss: 1.010337471961975,grad_norm: 0.8540595699921961, iteration: 443384
loss: 1.0005764961242676,grad_norm: 0.8272246336068246, iteration: 443385
loss: 0.9852498173713684,grad_norm: 0.841883796715989, iteration: 443386
loss: 0.9862722754478455,grad_norm: 0.7479036846925643, iteration: 443387
loss: 1.010570764541626,grad_norm: 0.754955641284025, iteration: 443388
loss: 1.0160479545593262,grad_norm: 0.8030755810772278, iteration: 443389
loss: 0.9894322156906128,grad_norm: 0.7326440392310403, iteration: 443390
loss: 1.0737457275390625,grad_norm: 0.9999995563173821, iteration: 443391
loss: 0.972642719745636,grad_norm: 0.7992086480730144, iteration: 443392
loss: 1.0307402610778809,grad_norm: 0.7217871247869785, iteration: 443393
loss: 1.0003539323806763,grad_norm: 0.6785518667880637, iteration: 443394
loss: 1.0062485933303833,grad_norm: 0.7034862065058977, iteration: 443395
loss: 0.9523234367370605,grad_norm: 0.86444563511842, iteration: 443396
loss: 1.0128767490386963,grad_norm: 0.946665758675779, iteration: 443397
loss: 1.05375075340271,grad_norm: 0.9391919592100849, iteration: 443398
loss: 0.9730164408683777,grad_norm: 0.6861941445446958, iteration: 443399
loss: 1.008076548576355,grad_norm: 0.8455903425413814, iteration: 443400
loss: 0.9818431735038757,grad_norm: 0.7094052286617359, iteration: 443401
loss: 1.074954867362976,grad_norm: 1.0000000703698335, iteration: 443402
loss: 1.0045872926712036,grad_norm: 0.7417683035957483, iteration: 443403
loss: 1.014415979385376,grad_norm: 0.8423706732197432, iteration: 443404
loss: 1.019075632095337,grad_norm: 0.8052849804447528, iteration: 443405
loss: 1.0031169652938843,grad_norm: 0.7500483043568356, iteration: 443406
loss: 1.0093495845794678,grad_norm: 0.7741959806129645, iteration: 443407
loss: 0.9829318523406982,grad_norm: 0.8129792454493653, iteration: 443408
loss: 0.9930482506752014,grad_norm: 0.8368149937290986, iteration: 443409
loss: 0.9863795638084412,grad_norm: 0.7495590407584328, iteration: 443410
loss: 1.014679193496704,grad_norm: 0.8765686472968738, iteration: 443411
loss: 1.0386970043182373,grad_norm: 0.9999990224082229, iteration: 443412
loss: 1.0074812173843384,grad_norm: 0.8091014880861653, iteration: 443413
loss: 1.032525897026062,grad_norm: 0.8539025632448812, iteration: 443414
loss: 0.9734373092651367,grad_norm: 0.8618035774824043, iteration: 443415
loss: 1.0885963439941406,grad_norm: 0.9999992031129423, iteration: 443416
loss: 0.9588305354118347,grad_norm: 0.7493749287816349, iteration: 443417
loss: 0.9750874638557434,grad_norm: 0.7861785756150512, iteration: 443418
loss: 1.0377559661865234,grad_norm: 0.9999999293501164, iteration: 443419
loss: 1.0115679502487183,grad_norm: 0.9260314683735777, iteration: 443420
loss: 1.0178143978118896,grad_norm: 0.7705341337009072, iteration: 443421
loss: 1.053348422050476,grad_norm: 0.9999997041312535, iteration: 443422
loss: 1.0130038261413574,grad_norm: 0.7223458619921221, iteration: 443423
loss: 0.9680495858192444,grad_norm: 0.7145520591744291, iteration: 443424
loss: 0.9798240661621094,grad_norm: 0.8374289464576563, iteration: 443425
loss: 1.0269083976745605,grad_norm: 0.9999991413689261, iteration: 443426
loss: 1.004889965057373,grad_norm: 0.8395314721891788, iteration: 443427
loss: 0.9750393629074097,grad_norm: 0.9999991250443523, iteration: 443428
loss: 0.9775587320327759,grad_norm: 0.9999997468340491, iteration: 443429
loss: 0.9915354251861572,grad_norm: 0.8001321463714638, iteration: 443430
loss: 0.9578371644020081,grad_norm: 0.8086766913749068, iteration: 443431
loss: 1.007065773010254,grad_norm: 0.9928451226770667, iteration: 443432
loss: 0.9979798793792725,grad_norm: 0.7669332528901188, iteration: 443433
loss: 1.0274015665054321,grad_norm: 0.7102627986354637, iteration: 443434
loss: 1.0203332901000977,grad_norm: 0.6889427876155119, iteration: 443435
loss: 0.9894422292709351,grad_norm: 0.624689129350868, iteration: 443436
loss: 1.0170985460281372,grad_norm: 0.8518403575866004, iteration: 443437
loss: 0.993019163608551,grad_norm: 0.6960722992407573, iteration: 443438
loss: 0.9499238133430481,grad_norm: 0.7709534583921049, iteration: 443439
loss: 1.0192114114761353,grad_norm: 0.7618905070268586, iteration: 443440
loss: 1.0195552110671997,grad_norm: 0.8319908121412821, iteration: 443441
loss: 1.006555199623108,grad_norm: 0.7133247571328977, iteration: 443442
loss: 1.0307546854019165,grad_norm: 0.7489432133101367, iteration: 443443
loss: 1.0240305662155151,grad_norm: 0.8259400194289375, iteration: 443444
loss: 1.023659110069275,grad_norm: 0.8548061315791138, iteration: 443445
loss: 1.0106455087661743,grad_norm: 0.8520818127483527, iteration: 443446
loss: 0.9968845844268799,grad_norm: 0.8719469540788775, iteration: 443447
loss: 1.0120056867599487,grad_norm: 0.731589597436689, iteration: 443448
loss: 0.9827719926834106,grad_norm: 0.7434646061587303, iteration: 443449
loss: 0.9934230446815491,grad_norm: 0.8499675124093587, iteration: 443450
loss: 1.025855302810669,grad_norm: 0.7182436202271347, iteration: 443451
loss: 1.0004115104675293,grad_norm: 0.7689032921800889, iteration: 443452
loss: 1.0131207704544067,grad_norm: 0.8457393091891281, iteration: 443453
loss: 0.9946163296699524,grad_norm: 0.8282112990195465, iteration: 443454
loss: 0.9840385913848877,grad_norm: 0.9023717410648485, iteration: 443455
loss: 0.987799346446991,grad_norm: 0.7706146971469677, iteration: 443456
loss: 0.9847142100334167,grad_norm: 0.7758357055582835, iteration: 443457
loss: 0.9916868805885315,grad_norm: 0.633899701838632, iteration: 443458
loss: 0.9546240568161011,grad_norm: 0.8818300125506905, iteration: 443459
loss: 0.9963995814323425,grad_norm: 0.7946266737366227, iteration: 443460
loss: 1.0422154664993286,grad_norm: 0.8023426949763085, iteration: 443461
loss: 1.0115861892700195,grad_norm: 0.9999991610240451, iteration: 443462
loss: 0.9924072027206421,grad_norm: 0.7714597421630283, iteration: 443463
loss: 1.00155508518219,grad_norm: 0.9136312927231826, iteration: 443464
loss: 0.9793612957000732,grad_norm: 0.846891062961482, iteration: 443465
loss: 0.9830111861228943,grad_norm: 0.7042028261220694, iteration: 443466
loss: 1.0593228340148926,grad_norm: 0.9999996575454745, iteration: 443467
loss: 1.0171481370925903,grad_norm: 0.9019949573098153, iteration: 443468
loss: 0.9630423188209534,grad_norm: 0.7853810493194567, iteration: 443469
loss: 1.113332986831665,grad_norm: 0.9999999526035641, iteration: 443470
loss: 0.9621760249137878,grad_norm: 0.999999166268277, iteration: 443471
loss: 1.0350396633148193,grad_norm: 0.735973559698628, iteration: 443472
loss: 1.1026015281677246,grad_norm: 0.9999997132459449, iteration: 443473
loss: 0.9847566485404968,grad_norm: 0.8196657521968501, iteration: 443474
loss: 1.02070152759552,grad_norm: 0.632138067665217, iteration: 443475
loss: 1.0167686939239502,grad_norm: 0.7557377522195541, iteration: 443476
loss: 0.967505693435669,grad_norm: 0.8837598831057784, iteration: 443477
loss: 1.023247480392456,grad_norm: 0.8479629882056618, iteration: 443478
loss: 1.0335791110992432,grad_norm: 0.787376870226432, iteration: 443479
loss: 0.9720308780670166,grad_norm: 0.6860759161622066, iteration: 443480
loss: 0.9762899279594421,grad_norm: 0.9767089744850936, iteration: 443481
loss: 1.0111931562423706,grad_norm: 0.8429970622262896, iteration: 443482
loss: 1.010719895362854,grad_norm: 0.9124938150624844, iteration: 443483
loss: 0.9880791902542114,grad_norm: 0.9746072980051751, iteration: 443484
loss: 1.0337293148040771,grad_norm: 0.684075059968257, iteration: 443485
loss: 1.0178425312042236,grad_norm: 0.9999991574707975, iteration: 443486
loss: 0.9827834367752075,grad_norm: 0.9863168578267151, iteration: 443487
loss: 0.9816986918449402,grad_norm: 0.8134600384243396, iteration: 443488
loss: 0.9777674078941345,grad_norm: 0.8627145325171737, iteration: 443489
loss: 0.9836859107017517,grad_norm: 0.8232419580831697, iteration: 443490
loss: 1.0613796710968018,grad_norm: 0.9999991670373928, iteration: 443491
loss: 0.9954384565353394,grad_norm: 0.835763503123861, iteration: 443492
loss: 1.0056264400482178,grad_norm: 0.8895228642272751, iteration: 443493
loss: 1.0076600313186646,grad_norm: 0.7685702577806801, iteration: 443494
loss: 0.9795045852661133,grad_norm: 0.7034856669609141, iteration: 443495
loss: 1.007253885269165,grad_norm: 0.6020575563127751, iteration: 443496
loss: 0.9990902543067932,grad_norm: 0.5855652609701605, iteration: 443497
loss: 0.9651045799255371,grad_norm: 0.8021016136635455, iteration: 443498
loss: 1.0199280977249146,grad_norm: 0.8308648573230268, iteration: 443499
loss: 1.0089921951293945,grad_norm: 0.7585572116437861, iteration: 443500
loss: 1.0044617652893066,grad_norm: 0.8493845211003376, iteration: 443501
loss: 0.9673259258270264,grad_norm: 0.7156362231766495, iteration: 443502
loss: 0.9986932873725891,grad_norm: 0.7572674220390124, iteration: 443503
loss: 1.0154436826705933,grad_norm: 0.8705599594756596, iteration: 443504
loss: 0.9732197523117065,grad_norm: 0.8621136016158756, iteration: 443505
loss: 1.0301566123962402,grad_norm: 0.9999991375852898, iteration: 443506
loss: 1.0063495635986328,grad_norm: 0.7645863419424642, iteration: 443507
loss: 1.000544548034668,grad_norm: 0.8176132894415976, iteration: 443508
loss: 1.034766674041748,grad_norm: 0.9999989522950062, iteration: 443509
loss: 1.0055485963821411,grad_norm: 0.7277772427706455, iteration: 443510
loss: 1.006933331489563,grad_norm: 0.999999425250816, iteration: 443511
loss: 0.958698570728302,grad_norm: 0.770065660079614, iteration: 443512
loss: 1.0676968097686768,grad_norm: 0.7518346482534692, iteration: 443513
loss: 0.9820576906204224,grad_norm: 0.6420612033224208, iteration: 443514
loss: 1.0125514268875122,grad_norm: 0.8095946501901149, iteration: 443515
loss: 0.9837138056755066,grad_norm: 0.6761271254943223, iteration: 443516
loss: 1.0037420988082886,grad_norm: 0.9999992762688626, iteration: 443517
loss: 1.0494331121444702,grad_norm: 0.6862560786405332, iteration: 443518
loss: 0.9775899648666382,grad_norm: 0.8167230939427951, iteration: 443519
loss: 0.9572177529335022,grad_norm: 0.8429876866488214, iteration: 443520
loss: 1.0067615509033203,grad_norm: 0.7578080226986601, iteration: 443521
loss: 0.9821568131446838,grad_norm: 0.7656734994009959, iteration: 443522
loss: 0.9971433877944946,grad_norm: 0.8317996739107816, iteration: 443523
loss: 0.9922471046447754,grad_norm: 0.7901299345662711, iteration: 443524
loss: 1.0207253694534302,grad_norm: 0.826825502711198, iteration: 443525
loss: 1.0229734182357788,grad_norm: 0.9999991889480951, iteration: 443526
loss: 1.0211825370788574,grad_norm: 0.8500835649310882, iteration: 443527
loss: 1.003643274307251,grad_norm: 0.820833812377224, iteration: 443528
loss: 1.0731945037841797,grad_norm: 0.9999991331872753, iteration: 443529
loss: 0.9474753737449646,grad_norm: 0.7647767247970652, iteration: 443530
loss: 0.9967755079269409,grad_norm: 0.7817232728970076, iteration: 443531
loss: 1.0065770149230957,grad_norm: 0.8147016934514941, iteration: 443532
loss: 0.9683243036270142,grad_norm: 0.7901505292229304, iteration: 443533
loss: 0.9856191277503967,grad_norm: 0.8405661719101787, iteration: 443534
loss: 1.001253366470337,grad_norm: 0.9678436982775873, iteration: 443535
loss: 1.016175627708435,grad_norm: 0.7324475926058966, iteration: 443536
loss: 0.9954339265823364,grad_norm: 0.70521295478723, iteration: 443537
loss: 0.9715097546577454,grad_norm: 0.8463900459979541, iteration: 443538
loss: 1.0225125551223755,grad_norm: 0.9999992521119633, iteration: 443539
loss: 0.9511610865592957,grad_norm: 0.8043755893112255, iteration: 443540
loss: 1.0438439846038818,grad_norm: 0.6898513470599525, iteration: 443541
loss: 1.0095765590667725,grad_norm: 0.6660660910210259, iteration: 443542
loss: 1.002867341041565,grad_norm: 0.7935338369623157, iteration: 443543
loss: 1.1396788358688354,grad_norm: 0.9999998655264444, iteration: 443544
loss: 0.9977908730506897,grad_norm: 0.9599977703198431, iteration: 443545
loss: 1.0073127746582031,grad_norm: 0.7444908408544941, iteration: 443546
loss: 1.0060821771621704,grad_norm: 0.679285461837436, iteration: 443547
loss: 0.9847376942634583,grad_norm: 0.7950179534793927, iteration: 443548
loss: 1.018799066543579,grad_norm: 0.6580246542485657, iteration: 443549
loss: 0.9802087545394897,grad_norm: 0.6968839825913241, iteration: 443550
loss: 1.0153627395629883,grad_norm: 0.9999990843513574, iteration: 443551
loss: 1.140812873840332,grad_norm: 0.999999121341089, iteration: 443552
loss: 0.9987401962280273,grad_norm: 0.9467265622579671, iteration: 443553
loss: 0.9901787042617798,grad_norm: 0.7097610486560147, iteration: 443554
loss: 0.9787757396697998,grad_norm: 0.8406386046767004, iteration: 443555
loss: 0.9771194458007812,grad_norm: 0.8813908476165405, iteration: 443556
loss: 0.976203978061676,grad_norm: 0.7857561340534881, iteration: 443557
loss: 0.9785791635513306,grad_norm: 0.6647478341173895, iteration: 443558
loss: 1.0307611227035522,grad_norm: 0.8273457426682411, iteration: 443559
loss: 0.9785236716270447,grad_norm: 0.7995015585918126, iteration: 443560
loss: 0.9948557019233704,grad_norm: 0.8613899827587996, iteration: 443561
loss: 0.9669668078422546,grad_norm: 0.7920885366816877, iteration: 443562
loss: 1.0123679637908936,grad_norm: 0.8828303052385644, iteration: 443563
loss: 1.0123145580291748,grad_norm: 0.7631385945275465, iteration: 443564
loss: 1.018843173980713,grad_norm: 0.7664243691960424, iteration: 443565
loss: 1.0070805549621582,grad_norm: 0.8804298514224639, iteration: 443566
loss: 1.00289785861969,grad_norm: 0.7489992322232146, iteration: 443567
loss: 0.9945148229598999,grad_norm: 0.8094588556879121, iteration: 443568
loss: 0.9702473282814026,grad_norm: 0.9871494223272319, iteration: 443569
loss: 1.0292919874191284,grad_norm: 0.9999998607008371, iteration: 443570
loss: 1.0315051078796387,grad_norm: 0.7748632859840956, iteration: 443571
loss: 0.9731351137161255,grad_norm: 0.8962886524493135, iteration: 443572
loss: 1.007057785987854,grad_norm: 0.9999996527285708, iteration: 443573
loss: 0.9691838622093201,grad_norm: 0.9855927814883678, iteration: 443574
loss: 1.005696415901184,grad_norm: 0.999999140600162, iteration: 443575
loss: 1.009598731994629,grad_norm: 0.7503263163346924, iteration: 443576
loss: 1.0090726613998413,grad_norm: 0.7745607430328271, iteration: 443577
loss: 1.00902259349823,grad_norm: 0.7432236966559524, iteration: 443578
loss: 1.0409600734710693,grad_norm: 0.999999928499526, iteration: 443579
loss: 0.9858352541923523,grad_norm: 0.9111896332260211, iteration: 443580
loss: 0.9997090697288513,grad_norm: 0.7821507300574324, iteration: 443581
loss: 1.0012967586517334,grad_norm: 0.7106005481183608, iteration: 443582
loss: 0.9983740448951721,grad_norm: 0.7733166410477016, iteration: 443583
loss: 1.0122610330581665,grad_norm: 0.8060482461498033, iteration: 443584
loss: 0.9833880066871643,grad_norm: 0.7528710722120898, iteration: 443585
loss: 0.9696633219718933,grad_norm: 0.7495559965829386, iteration: 443586
loss: 1.030490756034851,grad_norm: 0.728882886859285, iteration: 443587
loss: 0.9975115656852722,grad_norm: 0.806330661520898, iteration: 443588
loss: 0.9611371755599976,grad_norm: 0.8011881049209145, iteration: 443589
loss: 0.9680421352386475,grad_norm: 0.649526746802386, iteration: 443590
loss: 0.9856757521629333,grad_norm: 0.753781636146883, iteration: 443591
loss: 0.9917501211166382,grad_norm: 0.7400770714538499, iteration: 443592
loss: 0.9680380821228027,grad_norm: 0.7737874013467385, iteration: 443593
loss: 1.0129393339157104,grad_norm: 0.7248661758545701, iteration: 443594
loss: 1.0089678764343262,grad_norm: 0.7235727213568455, iteration: 443595
loss: 1.0247098207473755,grad_norm: 0.7599141112915346, iteration: 443596
loss: 0.9559304118156433,grad_norm: 0.6454967987426878, iteration: 443597
loss: 0.9989444613456726,grad_norm: 0.7629993159708944, iteration: 443598
loss: 1.008800745010376,grad_norm: 0.8084983354601014, iteration: 443599
loss: 1.004874587059021,grad_norm: 0.9377183739943598, iteration: 443600
loss: 1.0016545057296753,grad_norm: 0.6719775240755913, iteration: 443601
loss: 1.0188671350479126,grad_norm: 0.8468280300350846, iteration: 443602
loss: 1.0411850214004517,grad_norm: 0.9999999431837364, iteration: 443603
loss: 0.9578964114189148,grad_norm: 0.7786780609287433, iteration: 443604
loss: 1.0114201307296753,grad_norm: 0.9851394009398139, iteration: 443605
loss: 1.13092839717865,grad_norm: 0.9999991119966861, iteration: 443606
loss: 0.9659075140953064,grad_norm: 0.7481100971274796, iteration: 443607
loss: 0.9813908338546753,grad_norm: 0.6415690393704614, iteration: 443608
loss: 0.9910756349563599,grad_norm: 0.715984790867533, iteration: 443609
loss: 1.0012212991714478,grad_norm: 0.7517010125512292, iteration: 443610
loss: 1.015876054763794,grad_norm: 0.758345902152825, iteration: 443611
loss: 1.0228805541992188,grad_norm: 0.7145149797260943, iteration: 443612
loss: 1.0580514669418335,grad_norm: 0.779177395131423, iteration: 443613
loss: 1.0008882284164429,grad_norm: 0.91875116702596, iteration: 443614
loss: 1.0750467777252197,grad_norm: 0.951935260592735, iteration: 443615
loss: 1.0204955339431763,grad_norm: 0.750076779203315, iteration: 443616
loss: 1.033280611038208,grad_norm: 0.9702625065291238, iteration: 443617
loss: 1.0276576280593872,grad_norm: 0.7876861749339796, iteration: 443618
loss: 0.9941648840904236,grad_norm: 0.6754254831167912, iteration: 443619
loss: 0.9601249098777771,grad_norm: 0.8150346912634718, iteration: 443620
loss: 0.9865352511405945,grad_norm: 0.6742174568656315, iteration: 443621
loss: 1.0020190477371216,grad_norm: 0.6998140181331899, iteration: 443622
loss: 1.0164048671722412,grad_norm: 0.6875340802473758, iteration: 443623
loss: 1.0145381689071655,grad_norm: 0.6998619832264081, iteration: 443624
loss: 1.0130339860916138,grad_norm: 0.7892467351609619, iteration: 443625
loss: 0.9962019920349121,grad_norm: 0.9156893412149386, iteration: 443626
loss: 1.00960111618042,grad_norm: 0.817688095665882, iteration: 443627
loss: 0.9537192583084106,grad_norm: 0.748819890272405, iteration: 443628
loss: 0.9832412600517273,grad_norm: 0.8122268999589046, iteration: 443629
loss: 1.0109072923660278,grad_norm: 0.9031542000820952, iteration: 443630
loss: 1.032502293586731,grad_norm: 0.8677671903233348, iteration: 443631
loss: 0.9690738320350647,grad_norm: 0.8796838151906329, iteration: 443632
loss: 0.992137610912323,grad_norm: 0.7176557526282045, iteration: 443633
loss: 1.0086580514907837,grad_norm: 0.797568954707335, iteration: 443634
loss: 1.0799986124038696,grad_norm: 0.9999998763028043, iteration: 443635
loss: 1.0677088499069214,grad_norm: 0.8530920157488391, iteration: 443636
loss: 1.003115177154541,grad_norm: 0.973375577230194, iteration: 443637
loss: 1.000253677368164,grad_norm: 0.7687771666732175, iteration: 443638
loss: 0.9683329463005066,grad_norm: 0.7840073806854468, iteration: 443639
loss: 1.0051100254058838,grad_norm: 0.9999999417571912, iteration: 443640
loss: 1.0060126781463623,grad_norm: 0.7516986658131909, iteration: 443641
loss: 1.0109885931015015,grad_norm: 0.9999990664239791, iteration: 443642
loss: 1.016706943511963,grad_norm: 0.9999991530248996, iteration: 443643
loss: 1.0040684938430786,grad_norm: 0.6887794792917618, iteration: 443644
loss: 1.0675251483917236,grad_norm: 0.9999996177632123, iteration: 443645
loss: 0.9780183434486389,grad_norm: 0.6259816336492374, iteration: 443646
loss: 1.0170625448226929,grad_norm: 0.9081373292005885, iteration: 443647
loss: 1.0250287055969238,grad_norm: 0.6681803140807036, iteration: 443648
loss: 1.0180586576461792,grad_norm: 0.9999994055739868, iteration: 443649
loss: 1.1173499822616577,grad_norm: 0.9999998585635226, iteration: 443650
loss: 1.0171881914138794,grad_norm: 0.83460338916234, iteration: 443651
loss: 1.0473833084106445,grad_norm: 0.9999996776911613, iteration: 443652
loss: 1.0067684650421143,grad_norm: 0.664663971638456, iteration: 443653
loss: 0.9912552833557129,grad_norm: 0.8051286804403334, iteration: 443654
loss: 0.9907566905021667,grad_norm: 0.8801266926798977, iteration: 443655
loss: 0.9947312474250793,grad_norm: 0.9800283384600816, iteration: 443656
loss: 0.9793895483016968,grad_norm: 0.715820564796318, iteration: 443657
loss: 0.9965838193893433,grad_norm: 0.7822622733093869, iteration: 443658
loss: 0.9863464832305908,grad_norm: 0.7970779390857305, iteration: 443659
loss: 1.0321271419525146,grad_norm: 0.9999991137216041, iteration: 443660
loss: 0.9862626791000366,grad_norm: 0.8969341271823474, iteration: 443661
loss: 1.01731276512146,grad_norm: 0.9416354842773639, iteration: 443662
loss: 1.0099844932556152,grad_norm: 0.8528317460795036, iteration: 443663
loss: 1.0042861700057983,grad_norm: 0.7800092639669078, iteration: 443664
loss: 0.9855526685714722,grad_norm: 0.869387267853143, iteration: 443665
loss: 0.9806444048881531,grad_norm: 0.626908275411531, iteration: 443666
loss: 1.0164871215820312,grad_norm: 0.9999994671299783, iteration: 443667
loss: 0.982391893863678,grad_norm: 0.7995200576344529, iteration: 443668
loss: 1.0250688791275024,grad_norm: 0.7550727392389126, iteration: 443669
loss: 1.0290933847427368,grad_norm: 0.7504231111375342, iteration: 443670
loss: 0.9852727055549622,grad_norm: 0.6931240525329868, iteration: 443671
loss: 0.97467440366745,grad_norm: 0.8206047127099622, iteration: 443672
loss: 1.0013467073440552,grad_norm: 0.9205701838755606, iteration: 443673
loss: 1.0604417324066162,grad_norm: 0.9999992012122904, iteration: 443674
loss: 0.9643664360046387,grad_norm: 0.7386568182154862, iteration: 443675
loss: 1.010542631149292,grad_norm: 0.9504001131783545, iteration: 443676
loss: 1.0040953159332275,grad_norm: 0.9761917663334414, iteration: 443677
loss: 0.9874535799026489,grad_norm: 0.7923424705644371, iteration: 443678
loss: 0.9688292741775513,grad_norm: 0.8016012818878179, iteration: 443679
loss: 1.0366545915603638,grad_norm: 0.7918089764004013, iteration: 443680
loss: 0.9730790257453918,grad_norm: 0.7535840241182263, iteration: 443681
loss: 1.0098605155944824,grad_norm: 0.7797076481259279, iteration: 443682
loss: 0.9978024959564209,grad_norm: 0.9125463670400603, iteration: 443683
loss: 0.9627727270126343,grad_norm: 0.7837806730349923, iteration: 443684
loss: 1.010934829711914,grad_norm: 0.7673010440515925, iteration: 443685
loss: 0.9764775037765503,grad_norm: 0.7843402961348855, iteration: 443686
loss: 1.0074533224105835,grad_norm: 0.987412353909666, iteration: 443687
loss: 0.9695616960525513,grad_norm: 0.6580847228146184, iteration: 443688
loss: 1.0374361276626587,grad_norm: 0.7032458337996139, iteration: 443689
loss: 0.9849485754966736,grad_norm: 0.9465542261839944, iteration: 443690
loss: 1.0927343368530273,grad_norm: 0.9265283160727846, iteration: 443691
loss: 0.9799003601074219,grad_norm: 0.7283218315284016, iteration: 443692
loss: 0.9700539112091064,grad_norm: 0.7856277716520237, iteration: 443693
loss: 0.9877882599830627,grad_norm: 0.7857170531589213, iteration: 443694
loss: 0.9875916838645935,grad_norm: 0.7834000629509597, iteration: 443695
loss: 0.9602288007736206,grad_norm: 0.8127947794809661, iteration: 443696
loss: 1.0163390636444092,grad_norm: 0.9999997852070662, iteration: 443697
loss: 0.990251898765564,grad_norm: 0.7837564339630457, iteration: 443698
loss: 0.9876260161399841,grad_norm: 0.7952245327820786, iteration: 443699
loss: 0.9896906018257141,grad_norm: 0.8468714224800926, iteration: 443700
loss: 1.0398879051208496,grad_norm: 0.7694230955496006, iteration: 443701
loss: 0.9833924770355225,grad_norm: 0.9782700319267569, iteration: 443702
loss: 0.9824860095977783,grad_norm: 0.6480698024656351, iteration: 443703
loss: 1.016463279724121,grad_norm: 0.8599342205714207, iteration: 443704
loss: 0.9925583600997925,grad_norm: 0.6993795515977391, iteration: 443705
loss: 1.0030381679534912,grad_norm: 0.7492114704051985, iteration: 443706
loss: 1.0009616613388062,grad_norm: 0.8182352846009927, iteration: 443707
loss: 0.986891508102417,grad_norm: 0.837849867913363, iteration: 443708
loss: 1.0061479806900024,grad_norm: 0.9999995657338956, iteration: 443709
loss: 0.9740225672721863,grad_norm: 0.7122894676635132, iteration: 443710
loss: 1.0101124048233032,grad_norm: 0.8429670887577858, iteration: 443711
loss: 0.9835011959075928,grad_norm: 0.8079405622413911, iteration: 443712
loss: 1.0276312828063965,grad_norm: 0.9999991798525938, iteration: 443713
loss: 0.9963071942329407,grad_norm: 0.7626388611268063, iteration: 443714
loss: 1.0247734785079956,grad_norm: 0.7550982378886038, iteration: 443715
loss: 0.9825760722160339,grad_norm: 0.6877429850762541, iteration: 443716
loss: 1.0185060501098633,grad_norm: 0.724085428198609, iteration: 443717
loss: 1.0255299806594849,grad_norm: 0.9999997917682897, iteration: 443718
loss: 0.9826576709747314,grad_norm: 0.794292716075047, iteration: 443719
loss: 0.9982463121414185,grad_norm: 0.833121574682204, iteration: 443720
loss: 1.0316065549850464,grad_norm: 0.8493926008451909, iteration: 443721
loss: 1.0170854330062866,grad_norm: 0.6935138989416858, iteration: 443722
loss: 1.0375512838363647,grad_norm: 0.6565879464853264, iteration: 443723
loss: 0.9603646397590637,grad_norm: 0.7276645841788754, iteration: 443724
loss: 0.9636622667312622,grad_norm: 0.7737721090951438, iteration: 443725
loss: 1.0091429948806763,grad_norm: 0.6708759629384584, iteration: 443726
loss: 1.0366241931915283,grad_norm: 0.8532131808735871, iteration: 443727
loss: 1.0512527227401733,grad_norm: 0.9999998585916478, iteration: 443728
loss: 0.9665807485580444,grad_norm: 0.7040393200182031, iteration: 443729
loss: 0.9971768260002136,grad_norm: 0.7735327384524691, iteration: 443730
loss: 0.9965112209320068,grad_norm: 0.6719693811348659, iteration: 443731
loss: 0.9852396845817566,grad_norm: 0.8799115289751838, iteration: 443732
loss: 0.9912665486335754,grad_norm: 0.9999998637925033, iteration: 443733
loss: 1.0040862560272217,grad_norm: 0.6301899154103622, iteration: 443734
loss: 0.9716976881027222,grad_norm: 0.7860934466759748, iteration: 443735
loss: 0.9445971846580505,grad_norm: 0.7549914054419457, iteration: 443736
loss: 0.9561309218406677,grad_norm: 0.9511053145262799, iteration: 443737
loss: 0.9971135258674622,grad_norm: 0.8794143212425762, iteration: 443738
loss: 1.0149905681610107,grad_norm: 0.8100739531659928, iteration: 443739
loss: 0.9761581420898438,grad_norm: 0.8366938945631317, iteration: 443740
loss: 0.9317542314529419,grad_norm: 0.9004924643852078, iteration: 443741
loss: 0.9868651032447815,grad_norm: 0.7732298743400583, iteration: 443742
loss: 1.089095115661621,grad_norm: 0.9999994744511446, iteration: 443743
loss: 1.0566149950027466,grad_norm: 0.9999999122207486, iteration: 443744
loss: 1.002774953842163,grad_norm: 0.7387821750382104, iteration: 443745
loss: 1.0462220907211304,grad_norm: 0.9999994796635965, iteration: 443746
loss: 1.0072805881500244,grad_norm: 0.950344254026167, iteration: 443747
loss: 0.9876173138618469,grad_norm: 0.7154559158349999, iteration: 443748
loss: 1.011335015296936,grad_norm: 0.7273781759617326, iteration: 443749
loss: 0.963241457939148,grad_norm: 0.8115524069089654, iteration: 443750
loss: 1.0039219856262207,grad_norm: 0.9999990067243845, iteration: 443751
loss: 1.01901113986969,grad_norm: 0.6019471393659277, iteration: 443752
loss: 1.0217430591583252,grad_norm: 0.8353728591643828, iteration: 443753
loss: 0.9339886903762817,grad_norm: 0.7519100044959938, iteration: 443754
loss: 0.9887556433677673,grad_norm: 0.7307876498917304, iteration: 443755
loss: 0.9948442578315735,grad_norm: 0.6920878381840138, iteration: 443756
loss: 1.0232322216033936,grad_norm: 0.9999993735034471, iteration: 443757
loss: 1.0756393671035767,grad_norm: 0.7687378207659091, iteration: 443758
loss: 0.9924853444099426,grad_norm: 0.7636556159836695, iteration: 443759
loss: 0.9926977753639221,grad_norm: 0.9999990622567143, iteration: 443760
loss: 0.995995283126831,grad_norm: 0.9999998326640229, iteration: 443761
loss: 1.0050450563430786,grad_norm: 0.9999995614628621, iteration: 443762
loss: 0.9582506418228149,grad_norm: 0.8836828792826169, iteration: 443763
loss: 0.9828656911849976,grad_norm: 0.7627114430693115, iteration: 443764
loss: 1.0248061418533325,grad_norm: 0.9999992755692733, iteration: 443765
loss: 0.9904700517654419,grad_norm: 0.7943640184084959, iteration: 443766
loss: 0.9810143709182739,grad_norm: 0.84233444802382, iteration: 443767
loss: 1.079635500907898,grad_norm: 0.999999642078788, iteration: 443768
loss: 1.0535222291946411,grad_norm: 0.999999900940758, iteration: 443769
loss: 1.0913323163986206,grad_norm: 0.9999998902251706, iteration: 443770
loss: 0.9983353018760681,grad_norm: 0.8279741044176847, iteration: 443771
loss: 1.0199190378189087,grad_norm: 0.8626892354505398, iteration: 443772
loss: 0.9823703169822693,grad_norm: 0.9065887680888652, iteration: 443773
loss: 1.0213316679000854,grad_norm: 0.7791903990771051, iteration: 443774
loss: 1.065470576286316,grad_norm: 0.9999997412198728, iteration: 443775
loss: 0.9749456644058228,grad_norm: 0.7080997686695121, iteration: 443776
loss: 0.9749506711959839,grad_norm: 0.68105773930533, iteration: 443777
loss: 1.0056084394454956,grad_norm: 0.8709958824669208, iteration: 443778
loss: 1.037982702255249,grad_norm: 0.9446415608828183, iteration: 443779
loss: 0.9927625060081482,grad_norm: 0.8101941000334998, iteration: 443780
loss: 0.9791136980056763,grad_norm: 0.7367297521588686, iteration: 443781
loss: 1.001852035522461,grad_norm: 0.9999991683677548, iteration: 443782
loss: 1.0224372148513794,grad_norm: 0.9999992578022397, iteration: 443783
loss: 1.044769525527954,grad_norm: 0.999999330772361, iteration: 443784
loss: 1.0132328271865845,grad_norm: 0.7782866323609798, iteration: 443785
loss: 1.2234174013137817,grad_norm: 0.999999550384871, iteration: 443786
loss: 0.992946982383728,grad_norm: 0.9999993421601923, iteration: 443787
loss: 0.9769282341003418,grad_norm: 0.7192802418903426, iteration: 443788
loss: 1.0216635465621948,grad_norm: 0.8089427801172443, iteration: 443789
loss: 0.9929668307304382,grad_norm: 0.8385746878436889, iteration: 443790
loss: 1.0187017917633057,grad_norm: 0.8684877762809226, iteration: 443791
loss: 0.9723867774009705,grad_norm: 0.8354639304465616, iteration: 443792
loss: 0.9788066148757935,grad_norm: 0.7885897802380302, iteration: 443793
loss: 1.0395277738571167,grad_norm: 0.880702967520795, iteration: 443794
loss: 0.9805676341056824,grad_norm: 0.7683179930174636, iteration: 443795
loss: 1.0378010272979736,grad_norm: 0.7313598162132404, iteration: 443796
loss: 1.0261000394821167,grad_norm: 0.7404849705626899, iteration: 443797
loss: 1.0343574285507202,grad_norm: 0.6133748548867969, iteration: 443798
loss: 1.0156837701797485,grad_norm: 0.8252794743629547, iteration: 443799
loss: 0.9865768551826477,grad_norm: 0.7949105766230966, iteration: 443800
loss: 1.0217875242233276,grad_norm: 0.7889199679271189, iteration: 443801
loss: 0.9907918572425842,grad_norm: 0.8980371800234521, iteration: 443802
loss: 1.0071252584457397,grad_norm: 0.7889020216078797, iteration: 443803
loss: 0.9953768849372864,grad_norm: 0.74360163595508, iteration: 443804
loss: 1.0206326246261597,grad_norm: 0.7403215601663882, iteration: 443805
loss: 1.041264295578003,grad_norm: 0.8976472858684683, iteration: 443806
loss: 1.0052000284194946,grad_norm: 0.7017649224749116, iteration: 443807
loss: 0.9915833473205566,grad_norm: 0.8435770521226781, iteration: 443808
loss: 1.004830002784729,grad_norm: 0.8670661277694125, iteration: 443809
loss: 1.0000633001327515,grad_norm: 0.8988068663683307, iteration: 443810
loss: 0.9913854598999023,grad_norm: 0.8049762153192732, iteration: 443811
loss: 1.0457797050476074,grad_norm: 0.7584563126714186, iteration: 443812
loss: 1.0041465759277344,grad_norm: 0.664029358498393, iteration: 443813
loss: 1.129096269607544,grad_norm: 0.9999994327559727, iteration: 443814
loss: 0.9614508748054504,grad_norm: 0.7874652596215131, iteration: 443815
loss: 0.9891480803489685,grad_norm: 0.7180974042406013, iteration: 443816
loss: 1.0181002616882324,grad_norm: 0.7036055156898581, iteration: 443817
loss: 0.9850155115127563,grad_norm: 0.6741565143641557, iteration: 443818
loss: 1.027803897857666,grad_norm: 0.6774749923187648, iteration: 443819
loss: 1.0693329572677612,grad_norm: 0.9599610149830248, iteration: 443820
loss: 1.0403399467468262,grad_norm: 0.9999991476283215, iteration: 443821
loss: 1.0230188369750977,grad_norm: 0.7288731429778836, iteration: 443822
loss: 0.9912264943122864,grad_norm: 0.7259835885736348, iteration: 443823
loss: 0.9975568056106567,grad_norm: 0.8104504979535995, iteration: 443824
loss: 0.9977944493293762,grad_norm: 0.9999992321722044, iteration: 443825
loss: 1.017374873161316,grad_norm: 0.8057280005897401, iteration: 443826
loss: 1.0126562118530273,grad_norm: 0.709165339179634, iteration: 443827
loss: 1.0054676532745361,grad_norm: 0.8484900197571666, iteration: 443828
loss: 1.0310800075531006,grad_norm: 0.9999990896468279, iteration: 443829
loss: 0.9841424226760864,grad_norm: 0.9999998015444161, iteration: 443830
loss: 0.9691510200500488,grad_norm: 0.720367359833258, iteration: 443831
loss: 0.9880245923995972,grad_norm: 0.7978626526420616, iteration: 443832
loss: 1.0322754383087158,grad_norm: 0.9319053296443192, iteration: 443833
loss: 0.9553383588790894,grad_norm: 0.7153544130612458, iteration: 443834
loss: 0.9776275157928467,grad_norm: 0.8008416155369289, iteration: 443835
loss: 1.0414202213287354,grad_norm: 0.7190265358688169, iteration: 443836
loss: 1.0506595373153687,grad_norm: 0.7156401363250555, iteration: 443837
loss: 0.954395055770874,grad_norm: 0.818124031778764, iteration: 443838
loss: 1.0069154500961304,grad_norm: 0.999999913075551, iteration: 443839
loss: 1.0099499225616455,grad_norm: 0.846268070045201, iteration: 443840
loss: 0.9777061939239502,grad_norm: 0.8704348081514697, iteration: 443841
loss: 1.008760929107666,grad_norm: 0.8175347236513293, iteration: 443842
loss: 1.0362366437911987,grad_norm: 0.8702049585001059, iteration: 443843
loss: 1.0063085556030273,grad_norm: 0.8197970661548913, iteration: 443844
loss: 0.9912806749343872,grad_norm: 0.8352999265211857, iteration: 443845
loss: 1.0026723146438599,grad_norm: 0.8860687175484031, iteration: 443846
loss: 0.9864028096199036,grad_norm: 0.9999995218127022, iteration: 443847
loss: 1.0723543167114258,grad_norm: 0.9206890535234169, iteration: 443848
loss: 1.0140259265899658,grad_norm: 0.6145792411390752, iteration: 443849
loss: 1.0035405158996582,grad_norm: 0.7335862990239688, iteration: 443850
loss: 1.0229425430297852,grad_norm: 0.9384340069023379, iteration: 443851
loss: 0.9712203145027161,grad_norm: 0.7868169760555203, iteration: 443852
loss: 1.038183331489563,grad_norm: 0.8534523178305001, iteration: 443853
loss: 0.9695581197738647,grad_norm: 0.7080840801099121, iteration: 443854
loss: 1.0145078897476196,grad_norm: 0.6723715480265905, iteration: 443855
loss: 1.0944095849990845,grad_norm: 0.9999993140171687, iteration: 443856
loss: 0.9702844023704529,grad_norm: 0.888847917514889, iteration: 443857
loss: 1.0062118768692017,grad_norm: 0.9202434768816167, iteration: 443858
loss: 1.0677598714828491,grad_norm: 0.784541303479446, iteration: 443859
loss: 0.9835253953933716,grad_norm: 0.6594692871529209, iteration: 443860
loss: 1.0009971857070923,grad_norm: 0.735593585003376, iteration: 443861
loss: 1.0567512512207031,grad_norm: 0.9999998622355205, iteration: 443862
loss: 1.0227516889572144,grad_norm: 0.6760265330743228, iteration: 443863
loss: 0.9887850880622864,grad_norm: 0.9487860918482707, iteration: 443864
loss: 0.9868087768554688,grad_norm: 0.8247496157991714, iteration: 443865
loss: 0.9861703515052795,grad_norm: 0.880991694911952, iteration: 443866
loss: 0.9731137752532959,grad_norm: 0.9134916692658475, iteration: 443867
loss: 0.9837138652801514,grad_norm: 0.7299828533871929, iteration: 443868
loss: 0.9508923292160034,grad_norm: 0.8338061797123378, iteration: 443869
loss: 1.0161947011947632,grad_norm: 0.7859992495792867, iteration: 443870
loss: 0.9973999857902527,grad_norm: 0.6987642316678293, iteration: 443871
loss: 1.042388677597046,grad_norm: 0.702386824416974, iteration: 443872
loss: 1.0139567852020264,grad_norm: 0.9537644975199959, iteration: 443873
loss: 1.024757981300354,grad_norm: 0.890317740433747, iteration: 443874
loss: 0.9834615588188171,grad_norm: 0.7180658888062439, iteration: 443875
loss: 0.9813752174377441,grad_norm: 0.8677428339722467, iteration: 443876
loss: 0.9994625449180603,grad_norm: 0.8576294397619237, iteration: 443877
loss: 1.0687543153762817,grad_norm: 0.9999991382464084, iteration: 443878
loss: 1.0361125469207764,grad_norm: 0.9999994156155569, iteration: 443879
loss: 0.9856789708137512,grad_norm: 0.8121119454479039, iteration: 443880
loss: 0.9998116493225098,grad_norm: 0.846219588553577, iteration: 443881
loss: 1.03485107421875,grad_norm: 0.8959503417418754, iteration: 443882
loss: 0.990057647228241,grad_norm: 0.8322766429474782, iteration: 443883
loss: 1.004531741142273,grad_norm: 0.7648999181875307, iteration: 443884
loss: 0.9586313962936401,grad_norm: 0.5952353819354838, iteration: 443885
loss: 1.0455607175827026,grad_norm: 0.9999994543867313, iteration: 443886
loss: 1.0098235607147217,grad_norm: 0.7164569329666949, iteration: 443887
loss: 1.019104242324829,grad_norm: 0.9579139109208917, iteration: 443888
loss: 0.9944704174995422,grad_norm: 0.9999993707770847, iteration: 443889
loss: 1.043760061264038,grad_norm: 0.9999998891261113, iteration: 443890
loss: 1.0387308597564697,grad_norm: 0.9999999269082697, iteration: 443891
loss: 1.0053917169570923,grad_norm: 0.7217560066877852, iteration: 443892
loss: 1.0317356586456299,grad_norm: 0.999999645308373, iteration: 443893
loss: 1.0001718997955322,grad_norm: 0.6337500568038696, iteration: 443894
loss: 0.9882248044013977,grad_norm: 0.8361121618618625, iteration: 443895
loss: 1.000179648399353,grad_norm: 0.9999990915898094, iteration: 443896
loss: 1.0068824291229248,grad_norm: 0.6526705530864025, iteration: 443897
loss: 0.9890202283859253,grad_norm: 0.802646664077549, iteration: 443898
loss: 0.9727808237075806,grad_norm: 0.7732408456955862, iteration: 443899
loss: 0.9725996255874634,grad_norm: 0.762613997440322, iteration: 443900
loss: 1.0169503688812256,grad_norm: 0.7448522584361387, iteration: 443901
loss: 0.9799160957336426,grad_norm: 0.7230432976370176, iteration: 443902
loss: 0.9855722188949585,grad_norm: 0.7509884691205073, iteration: 443903
loss: 0.9951530694961548,grad_norm: 0.7591349592027149, iteration: 443904
loss: 0.9854071140289307,grad_norm: 0.7230670422453064, iteration: 443905
loss: 1.0066030025482178,grad_norm: 0.8335981016950094, iteration: 443906
loss: 1.0068352222442627,grad_norm: 0.7338137603006515, iteration: 443907
loss: 1.017053246498108,grad_norm: 0.7341536766282749, iteration: 443908
loss: 1.0015947818756104,grad_norm: 0.6622959035826531, iteration: 443909
loss: 0.9692032337188721,grad_norm: 0.6981341681047104, iteration: 443910
loss: 0.9836392998695374,grad_norm: 0.6497431043700003, iteration: 443911
loss: 1.017557978630066,grad_norm: 0.6479343156645271, iteration: 443912
loss: 0.9958463311195374,grad_norm: 0.8234502596099487, iteration: 443913
loss: 1.00606369972229,grad_norm: 0.6406637563487807, iteration: 443914
loss: 0.9494463801383972,grad_norm: 0.9501251771949664, iteration: 443915
loss: 0.9897429347038269,grad_norm: 0.7565818174873774, iteration: 443916
loss: 0.9862565398216248,grad_norm: 0.7745437540657067, iteration: 443917
loss: 1.0403468608856201,grad_norm: 0.979785374912605, iteration: 443918
loss: 1.0190290212631226,grad_norm: 0.7629589655005073, iteration: 443919
loss: 1.0117459297180176,grad_norm: 0.7612592240296692, iteration: 443920
loss: 0.9991949200630188,grad_norm: 0.7954328971368814, iteration: 443921
loss: 0.9917513132095337,grad_norm: 0.9711867721525221, iteration: 443922
loss: 0.9586608409881592,grad_norm: 0.7821834167022771, iteration: 443923
loss: 1.0235577821731567,grad_norm: 0.8503677674736972, iteration: 443924
loss: 1.0227620601654053,grad_norm: 0.7429981336764518, iteration: 443925
loss: 1.0092517137527466,grad_norm: 0.7376931370363301, iteration: 443926
loss: 0.9983980059623718,grad_norm: 0.753705633712809, iteration: 443927
loss: 0.9653803110122681,grad_norm: 0.6511487090970761, iteration: 443928
loss: 1.0228139162063599,grad_norm: 0.7791040882701231, iteration: 443929
loss: 0.9821344017982483,grad_norm: 0.9074770011904435, iteration: 443930
loss: 0.9924243092536926,grad_norm: 0.804787441578683, iteration: 443931
loss: 1.141466736793518,grad_norm: 0.9999997233859033, iteration: 443932
loss: 0.984365701675415,grad_norm: 0.9999993948427292, iteration: 443933
loss: 1.007412075996399,grad_norm: 0.712533388559366, iteration: 443934
loss: 1.0245577096939087,grad_norm: 0.8196126207694114, iteration: 443935
loss: 1.0359045267105103,grad_norm: 0.9999993560856505, iteration: 443936
loss: 0.9903984665870667,grad_norm: 0.9999998956110895, iteration: 443937
loss: 1.0056395530700684,grad_norm: 0.8482529863455799, iteration: 443938
loss: 0.9866443276405334,grad_norm: 0.8085270402010899, iteration: 443939
loss: 0.9891056418418884,grad_norm: 0.8803922940711529, iteration: 443940
loss: 0.9650959968566895,grad_norm: 0.6916093951810129, iteration: 443941
loss: 1.0104377269744873,grad_norm: 0.7775437566062742, iteration: 443942
loss: 1.0708335638046265,grad_norm: 0.9999997126920687, iteration: 443943
loss: 1.0024911165237427,grad_norm: 0.8779369034070014, iteration: 443944
loss: 1.0295921564102173,grad_norm: 0.8578714369508018, iteration: 443945
loss: 1.0195963382720947,grad_norm: 0.7775161186998746, iteration: 443946
loss: 0.9824594855308533,grad_norm: 0.7350638845958706, iteration: 443947
loss: 0.9706908464431763,grad_norm: 0.8663259651705829, iteration: 443948
loss: 0.9901150465011597,grad_norm: 0.8896614805202765, iteration: 443949
loss: 0.9475458860397339,grad_norm: 0.933568264624988, iteration: 443950
loss: 1.017470359802246,grad_norm: 0.7081374820558304, iteration: 443951
loss: 1.005895972251892,grad_norm: 0.8064651559476359, iteration: 443952
loss: 1.0015777349472046,grad_norm: 0.7722131794607985, iteration: 443953
loss: 1.0166553258895874,grad_norm: 0.6867241467514589, iteration: 443954
loss: 1.0203057527542114,grad_norm: 0.999999087404887, iteration: 443955
loss: 1.0928659439086914,grad_norm: 0.8079547462191139, iteration: 443956
loss: 1.0097784996032715,grad_norm: 0.9999994920965983, iteration: 443957
loss: 0.9662944078445435,grad_norm: 0.827493824288072, iteration: 443958
loss: 0.9974642395973206,grad_norm: 0.7436544001786219, iteration: 443959
loss: 1.0255135297775269,grad_norm: 0.8033720108259971, iteration: 443960
loss: 0.9744613170623779,grad_norm: 0.6605960766769919, iteration: 443961
loss: 0.9868304133415222,grad_norm: 0.7429851154901096, iteration: 443962
loss: 0.944797158241272,grad_norm: 0.8948210035071358, iteration: 443963
loss: 0.9821361303329468,grad_norm: 0.7638495189863974, iteration: 443964
loss: 0.9752240777015686,grad_norm: 0.8681093111260326, iteration: 443965
loss: 1.0029126405715942,grad_norm: 0.9999990724110229, iteration: 443966
loss: 1.0437570810317993,grad_norm: 0.9692391263858648, iteration: 443967
loss: 0.981616735458374,grad_norm: 0.9030270406332941, iteration: 443968
loss: 1.134817361831665,grad_norm: 0.841879265905675, iteration: 443969
loss: 1.0079318284988403,grad_norm: 0.7870097537917405, iteration: 443970
loss: 1.0389536619186401,grad_norm: 0.8182603306716913, iteration: 443971
loss: 0.9716917276382446,grad_norm: 0.7425070144792203, iteration: 443972
loss: 0.998072624206543,grad_norm: 0.697634042797634, iteration: 443973
loss: 1.009566307067871,grad_norm: 0.7717967220473755, iteration: 443974
loss: 0.9858893156051636,grad_norm: 0.7806564461586628, iteration: 443975
loss: 0.9746074676513672,grad_norm: 0.7073359625023568, iteration: 443976
loss: 1.0277427434921265,grad_norm: 0.8217479166767111, iteration: 443977
loss: 1.0708593130111694,grad_norm: 0.9999995403680415, iteration: 443978
loss: 0.9964248538017273,grad_norm: 0.6542539192134423, iteration: 443979
loss: 0.9814367294311523,grad_norm: 0.9999990814792705, iteration: 443980
loss: 0.9967882037162781,grad_norm: 0.6896884730405252, iteration: 443981
loss: 1.004028558731079,grad_norm: 0.8913104848217174, iteration: 443982
loss: 0.996701180934906,grad_norm: 0.8801679713028037, iteration: 443983
loss: 1.0150337219238281,grad_norm: 0.7473016683335927, iteration: 443984
loss: 1.014154076576233,grad_norm: 0.9999992373955712, iteration: 443985
loss: 1.0106605291366577,grad_norm: 0.7671020981822071, iteration: 443986
loss: 1.0516316890716553,grad_norm: 0.9831927661927743, iteration: 443987
loss: 1.006659746170044,grad_norm: 0.9999991132446909, iteration: 443988
loss: 1.0068897008895874,grad_norm: 0.8043677080123551, iteration: 443989
loss: 0.9889123439788818,grad_norm: 0.5912942122069569, iteration: 443990
loss: 1.0071872472763062,grad_norm: 0.7808262962562323, iteration: 443991
loss: 1.0195083618164062,grad_norm: 0.705328944752497, iteration: 443992
loss: 1.016369104385376,grad_norm: 0.7737582417003613, iteration: 443993
loss: 1.0902013778686523,grad_norm: 0.8549541321933728, iteration: 443994
loss: 0.9839794635772705,grad_norm: 0.7636548565976253, iteration: 443995
loss: 1.0028895139694214,grad_norm: 0.7491829345768543, iteration: 443996
loss: 1.0674991607666016,grad_norm: 0.9999998581360502, iteration: 443997
loss: 0.9726195931434631,grad_norm: 0.7796681546831181, iteration: 443998
loss: 0.9808371067047119,grad_norm: 0.7013011974388877, iteration: 443999
loss: 1.008530855178833,grad_norm: 0.7068771111679206, iteration: 444000
loss: 1.0231472253799438,grad_norm: 0.7677763534195108, iteration: 444001
loss: 1.0009276866912842,grad_norm: 0.8477695714619572, iteration: 444002
loss: 1.0058138370513916,grad_norm: 0.8511483034000848, iteration: 444003
loss: 1.007354974746704,grad_norm: 0.7228152454461684, iteration: 444004
loss: 1.0238244533538818,grad_norm: 0.9999994222458076, iteration: 444005
loss: 1.0043092966079712,grad_norm: 0.6346379644981189, iteration: 444006
loss: 0.9829468131065369,grad_norm: 0.9960951943122401, iteration: 444007
loss: 1.0096381902694702,grad_norm: 0.8747411553921844, iteration: 444008
loss: 1.0861302614212036,grad_norm: 0.8915625877419796, iteration: 444009
loss: 0.9971535801887512,grad_norm: 0.8592033888515493, iteration: 444010
loss: 0.998662531375885,grad_norm: 0.8536249750536447, iteration: 444011
loss: 0.9714783430099487,grad_norm: 0.7889638591871234, iteration: 444012
loss: 1.066310167312622,grad_norm: 0.8856818548224794, iteration: 444013
loss: 1.07399582862854,grad_norm: 0.7835282947081621, iteration: 444014
loss: 0.9795798063278198,grad_norm: 0.71710956723436, iteration: 444015
loss: 1.0081676244735718,grad_norm: 0.8263381338428566, iteration: 444016
loss: 1.1002006530761719,grad_norm: 0.888664896415825, iteration: 444017
loss: 1.0070271492004395,grad_norm: 0.7704204482648496, iteration: 444018
loss: 1.0717754364013672,grad_norm: 0.9999991600047223, iteration: 444019
loss: 0.998991847038269,grad_norm: 0.6353236889305309, iteration: 444020
loss: 1.0097354650497437,grad_norm: 0.8020183627294496, iteration: 444021
loss: 1.0042461156845093,grad_norm: 0.7419574404043003, iteration: 444022
loss: 1.0459827184677124,grad_norm: 0.9999991960266602, iteration: 444023
loss: 0.9897220134735107,grad_norm: 0.8415900405938004, iteration: 444024
loss: 0.9686253070831299,grad_norm: 0.8317618433989609, iteration: 444025
loss: 0.9850431680679321,grad_norm: 0.7301694707527043, iteration: 444026
loss: 0.9874904155731201,grad_norm: 0.9999999115965004, iteration: 444027
loss: 0.9905220866203308,grad_norm: 0.6971952856271846, iteration: 444028
loss: 1.0372153520584106,grad_norm: 0.9999994004425575, iteration: 444029
loss: 1.0000715255737305,grad_norm: 0.7236013766194752, iteration: 444030
loss: 1.0262682437896729,grad_norm: 0.8546884859739597, iteration: 444031
loss: 1.0070420503616333,grad_norm: 0.8637181682829023, iteration: 444032
loss: 1.0332406759262085,grad_norm: 0.6962576905653276, iteration: 444033
loss: 0.9833618402481079,grad_norm: 0.92659006642661, iteration: 444034
loss: 1.0620818138122559,grad_norm: 0.9596246885062021, iteration: 444035
loss: 0.9757580757141113,grad_norm: 0.6489638680595946, iteration: 444036
loss: 1.0022200345993042,grad_norm: 0.8834607920010961, iteration: 444037
loss: 1.0449838638305664,grad_norm: 0.9999991240932169, iteration: 444038
loss: 1.0273895263671875,grad_norm: 0.7272164943371615, iteration: 444039
loss: 1.0118048191070557,grad_norm: 0.768874384626393, iteration: 444040
loss: 1.018539547920227,grad_norm: 0.8073478888953173, iteration: 444041
loss: 0.9727474451065063,grad_norm: 0.73212270694153, iteration: 444042
loss: 1.1062126159667969,grad_norm: 0.9999990371158949, iteration: 444043
loss: 1.0062730312347412,grad_norm: 0.6822872074248573, iteration: 444044
loss: 1.0098252296447754,grad_norm: 0.6549617301431517, iteration: 444045
loss: 0.9581632018089294,grad_norm: 0.6039559507441579, iteration: 444046
loss: 0.9882064461708069,grad_norm: 0.9191844101377677, iteration: 444047
loss: 1.0343431234359741,grad_norm: 0.7645841465755805, iteration: 444048
loss: 0.9869985580444336,grad_norm: 0.7047086466820743, iteration: 444049
loss: 0.9438057541847229,grad_norm: 0.6865082098811099, iteration: 444050
loss: 1.0243356227874756,grad_norm: 0.9374684961808516, iteration: 444051
loss: 0.9866500496864319,grad_norm: 0.6941884758916551, iteration: 444052
loss: 1.0139784812927246,grad_norm: 0.9999993166776889, iteration: 444053
loss: 1.0004405975341797,grad_norm: 0.8804899578012555, iteration: 444054
loss: 0.9938563704490662,grad_norm: 0.7519665881822818, iteration: 444055
loss: 1.1526883840560913,grad_norm: 0.9999997697123258, iteration: 444056
loss: 1.0236507654190063,grad_norm: 0.8791238875595937, iteration: 444057
loss: 0.9779496788978577,grad_norm: 0.7283148790067485, iteration: 444058
loss: 0.9723586440086365,grad_norm: 0.9999991261660295, iteration: 444059
loss: 0.9953797459602356,grad_norm: 0.8081538435180783, iteration: 444060
loss: 0.971563458442688,grad_norm: 0.7321534169338711, iteration: 444061
loss: 0.9718384742736816,grad_norm: 0.6884588222218988, iteration: 444062
loss: 0.9555980563163757,grad_norm: 0.8495716887965404, iteration: 444063
loss: 1.041501760482788,grad_norm: 0.9489972006499579, iteration: 444064
loss: 1.0141788721084595,grad_norm: 0.8458888415228305, iteration: 444065
loss: 0.9990581274032593,grad_norm: 0.9007291313816028, iteration: 444066
loss: 1.0112953186035156,grad_norm: 0.7297059219127673, iteration: 444067
loss: 0.9911554455757141,grad_norm: 0.7554118927655565, iteration: 444068
loss: 1.076641321182251,grad_norm: 0.9999997105753144, iteration: 444069
loss: 1.0186907052993774,grad_norm: 0.7913224822404382, iteration: 444070
loss: 1.0054714679718018,grad_norm: 0.8787334265608227, iteration: 444071
loss: 1.0089975595474243,grad_norm: 0.7858861281357534, iteration: 444072
loss: 0.9962624311447144,grad_norm: 0.734054983324761, iteration: 444073
loss: 1.023996353149414,grad_norm: 0.9999998287438657, iteration: 444074
loss: 0.9832903146743774,grad_norm: 0.9999994996400803, iteration: 444075
loss: 1.002665400505066,grad_norm: 0.6341813381950132, iteration: 444076
loss: 0.9857156872749329,grad_norm: 0.8201281438947138, iteration: 444077
loss: 1.0350730419158936,grad_norm: 0.8952652721472979, iteration: 444078
loss: 1.0185266733169556,grad_norm: 0.6954177433755576, iteration: 444079
loss: 0.9891613125801086,grad_norm: 0.6690693918747851, iteration: 444080
loss: 1.083085060119629,grad_norm: 0.8783114622679855, iteration: 444081
loss: 0.9572319388389587,grad_norm: 0.8143661684763328, iteration: 444082
loss: 0.9778180122375488,grad_norm: 0.7250853567347689, iteration: 444083
loss: 0.9884462356567383,grad_norm: 0.9999989981291952, iteration: 444084
loss: 0.9757078886032104,grad_norm: 0.9051592005936039, iteration: 444085
loss: 1.2148357629776,grad_norm: 0.999999848556489, iteration: 444086
loss: 1.0115851163864136,grad_norm: 0.898568669296124, iteration: 444087
loss: 1.0272915363311768,grad_norm: 0.9999991098773973, iteration: 444088
loss: 1.0330874919891357,grad_norm: 0.9999994234489237, iteration: 444089
loss: 0.9577946066856384,grad_norm: 0.8634707504788001, iteration: 444090
loss: 0.9686939716339111,grad_norm: 0.7640484901293628, iteration: 444091
loss: 0.9969010353088379,grad_norm: 0.999999404333192, iteration: 444092
loss: 0.9857473969459534,grad_norm: 0.6931631750409569, iteration: 444093
loss: 0.9692859649658203,grad_norm: 0.7319130707913587, iteration: 444094
loss: 0.9928659200668335,grad_norm: 0.7958655502080294, iteration: 444095
loss: 1.0482136011123657,grad_norm: 0.7080504703289288, iteration: 444096
loss: 1.0591490268707275,grad_norm: 0.8703190438575004, iteration: 444097
loss: 1.008858561515808,grad_norm: 0.7135826494120218, iteration: 444098
loss: 0.9978479146957397,grad_norm: 0.8939041104091611, iteration: 444099
loss: 1.0641247034072876,grad_norm: 0.9999993850522644, iteration: 444100
loss: 1.0206477642059326,grad_norm: 0.9999992491776828, iteration: 444101
loss: 0.9880403876304626,grad_norm: 0.7373924301036928, iteration: 444102
loss: 0.9863476753234863,grad_norm: 0.6531939301704405, iteration: 444103
loss: 1.0683809518814087,grad_norm: 0.9999995479073086, iteration: 444104
loss: 1.0690845251083374,grad_norm: 0.9999992446626633, iteration: 444105
loss: 1.0736613273620605,grad_norm: 0.999999760290118, iteration: 444106
loss: 1.154178261756897,grad_norm: 0.9999995895710347, iteration: 444107
loss: 1.1152520179748535,grad_norm: 0.8676374302431916, iteration: 444108
loss: 1.001231074333191,grad_norm: 0.7559120330765963, iteration: 444109
loss: 0.9729993939399719,grad_norm: 0.7680498794042635, iteration: 444110
loss: 0.9628334641456604,grad_norm: 0.999999314076999, iteration: 444111
loss: 0.9956088662147522,grad_norm: 0.883068978734829, iteration: 444112
loss: 0.980768084526062,grad_norm: 0.8516457474653918, iteration: 444113
loss: 0.9914848804473877,grad_norm: 0.8543450588320883, iteration: 444114
loss: 0.9977136850357056,grad_norm: 0.7757129785466187, iteration: 444115
loss: 1.0205646753311157,grad_norm: 0.8341283411246552, iteration: 444116
loss: 0.9931241273880005,grad_norm: 0.6977506454108472, iteration: 444117
loss: 0.9930428862571716,grad_norm: 0.9055094475259349, iteration: 444118
loss: 1.0544902086257935,grad_norm: 0.9999992417261016, iteration: 444119
loss: 0.9642499089241028,grad_norm: 0.8764424185973102, iteration: 444120
loss: 0.9818525910377502,grad_norm: 0.8188089381197073, iteration: 444121
loss: 1.0149893760681152,grad_norm: 0.8669284431899972, iteration: 444122
loss: 0.9701488614082336,grad_norm: 0.7732382709450302, iteration: 444123
loss: 1.010608196258545,grad_norm: 0.7957454195229886, iteration: 444124
loss: 0.9992039799690247,grad_norm: 0.7954718392463097, iteration: 444125
loss: 0.9779970049858093,grad_norm: 0.7151631780460214, iteration: 444126
loss: 0.9781507253646851,grad_norm: 0.8645013855820756, iteration: 444127
loss: 0.9876279830932617,grad_norm: 0.6899884820983987, iteration: 444128
loss: 0.9956780076026917,grad_norm: 0.7518249511459993, iteration: 444129
loss: 1.1443984508514404,grad_norm: 0.9999997207763055, iteration: 444130
loss: 1.027947187423706,grad_norm: 0.9999994853212066, iteration: 444131
loss: 1.2062678337097168,grad_norm: 0.9999992124692474, iteration: 444132
loss: 1.0063616037368774,grad_norm: 0.983599506560667, iteration: 444133
loss: 1.006534457206726,grad_norm: 0.891020590951899, iteration: 444134
loss: 1.064528465270996,grad_norm: 0.9999999103727938, iteration: 444135
loss: 0.9383255839347839,grad_norm: 0.7954106771975494, iteration: 444136
loss: 1.0421332120895386,grad_norm: 0.7251576768851364, iteration: 444137
loss: 0.9514892101287842,grad_norm: 0.8151801341455114, iteration: 444138
loss: 1.0314421653747559,grad_norm: 0.8947894634058373, iteration: 444139
loss: 0.9771929979324341,grad_norm: 0.7514693895449689, iteration: 444140
loss: 1.0069878101348877,grad_norm: 0.8113671126834913, iteration: 444141
loss: 1.0484086275100708,grad_norm: 0.9999992467052626, iteration: 444142
loss: 1.0310035943984985,grad_norm: 0.8518353003521666, iteration: 444143
loss: 0.9991126656532288,grad_norm: 0.9999990650701941, iteration: 444144
loss: 0.9531806111335754,grad_norm: 0.7820335876707153, iteration: 444145
loss: 1.015928864479065,grad_norm: 0.8118487208927982, iteration: 444146
loss: 0.9900181293487549,grad_norm: 0.8314443833557114, iteration: 444147
loss: 0.9991878867149353,grad_norm: 0.8296383674895099, iteration: 444148
loss: 0.9918609857559204,grad_norm: 0.7894518369971233, iteration: 444149
loss: 1.0144948959350586,grad_norm: 0.9055468464207165, iteration: 444150
loss: 1.0283936262130737,grad_norm: 0.9122888839396663, iteration: 444151
loss: 0.9975585341453552,grad_norm: 0.7690304820333568, iteration: 444152
loss: 0.9510540962219238,grad_norm: 0.8390769655099825, iteration: 444153
loss: 1.0083588361740112,grad_norm: 0.6986205724186544, iteration: 444154
loss: 0.9768600463867188,grad_norm: 0.8081322144265896, iteration: 444155
loss: 0.9915984272956848,grad_norm: 0.9215912310226031, iteration: 444156
loss: 0.9858551621437073,grad_norm: 0.8245379015672634, iteration: 444157
loss: 0.9974792003631592,grad_norm: 0.8344994083867352, iteration: 444158
loss: 0.9829655289649963,grad_norm: 0.8599436204107949, iteration: 444159
loss: 0.9735554456710815,grad_norm: 0.9336023522090292, iteration: 444160
loss: 1.003687858581543,grad_norm: 0.7139215903614392, iteration: 444161
loss: 0.9842235445976257,grad_norm: 0.7487290166210788, iteration: 444162
loss: 1.0054270029067993,grad_norm: 0.8869892596848348, iteration: 444163
loss: 0.9970716834068298,grad_norm: 0.7630569539372917, iteration: 444164
loss: 0.984226644039154,grad_norm: 0.9634892242557035, iteration: 444165
loss: 0.9886134266853333,grad_norm: 0.7726823869043419, iteration: 444166
loss: 0.9821475744247437,grad_norm: 0.7585749576418579, iteration: 444167
loss: 1.022615909576416,grad_norm: 0.921523420232859, iteration: 444168
loss: 0.969797670841217,grad_norm: 0.7115761823073072, iteration: 444169
loss: 1.0201025009155273,grad_norm: 0.8319743913229859, iteration: 444170
loss: 1.0107307434082031,grad_norm: 0.7737809922643887, iteration: 444171
loss: 1.0107522010803223,grad_norm: 0.8335023197235119, iteration: 444172
loss: 1.013154149055481,grad_norm: 0.8135994407931214, iteration: 444173
loss: 1.0242199897766113,grad_norm: 0.7634977801470888, iteration: 444174
loss: 0.9979791641235352,grad_norm: 0.9999989711950547, iteration: 444175
loss: 1.02993643283844,grad_norm: 0.9999991170373053, iteration: 444176
loss: 1.0268023014068604,grad_norm: 0.9999994989652243, iteration: 444177
loss: 0.9751887917518616,grad_norm: 0.7863819138969622, iteration: 444178
loss: 0.9913408756256104,grad_norm: 0.9075595243108836, iteration: 444179
loss: 0.9790046811103821,grad_norm: 0.7007965635299769, iteration: 444180
loss: 0.9624090790748596,grad_norm: 0.7608620750152622, iteration: 444181
loss: 0.9919209480285645,grad_norm: 0.9216469909390749, iteration: 444182
loss: 0.9986268877983093,grad_norm: 0.8047936842213291, iteration: 444183
loss: 1.017135500907898,grad_norm: 0.6160070618117307, iteration: 444184
loss: 1.029937744140625,grad_norm: 0.7843936601158331, iteration: 444185
loss: 0.9966214299201965,grad_norm: 0.6475286715971889, iteration: 444186
loss: 0.9929214119911194,grad_norm: 0.8715063922970822, iteration: 444187
loss: 1.0557781457901,grad_norm: 0.9999990880772044, iteration: 444188
loss: 1.008829116821289,grad_norm: 0.74939423759653, iteration: 444189
loss: 1.0164811611175537,grad_norm: 0.8035336857310674, iteration: 444190
loss: 1.0067155361175537,grad_norm: 0.9999994329218715, iteration: 444191
loss: 0.9674045443534851,grad_norm: 0.7402097832591599, iteration: 444192
loss: 1.0586177110671997,grad_norm: 0.9999995496029436, iteration: 444193
loss: 0.9463266730308533,grad_norm: 0.9138697907079868, iteration: 444194
loss: 0.9761021733283997,grad_norm: 0.7253516689001832, iteration: 444195
loss: 1.033864140510559,grad_norm: 0.9999994189609119, iteration: 444196
loss: 0.9771504998207092,grad_norm: 0.7853106232993636, iteration: 444197
loss: 1.0018441677093506,grad_norm: 0.8495653775977815, iteration: 444198
loss: 0.9951948523521423,grad_norm: 0.6311674205841867, iteration: 444199
loss: 0.973150908946991,grad_norm: 0.7774855285474351, iteration: 444200
loss: 1.0083816051483154,grad_norm: 0.9999994863014082, iteration: 444201
loss: 1.0038104057312012,grad_norm: 0.9361487618402108, iteration: 444202
loss: 1.016566514968872,grad_norm: 0.7870294521561101, iteration: 444203
loss: 0.9939359426498413,grad_norm: 0.7956079510579228, iteration: 444204
loss: 1.0189540386199951,grad_norm: 0.740721021553963, iteration: 444205
loss: 0.9726110100746155,grad_norm: 0.7513816615200404, iteration: 444206
loss: 1.0075881481170654,grad_norm: 0.8849196877280967, iteration: 444207
loss: 1.0103185176849365,grad_norm: 0.795629628352596, iteration: 444208
loss: 1.018678069114685,grad_norm: 0.8770832158471812, iteration: 444209
loss: 0.9942740797996521,grad_norm: 0.7877147458898575, iteration: 444210
loss: 1.0433543920516968,grad_norm: 0.9999996744273573, iteration: 444211
loss: 1.0175119638442993,grad_norm: 0.6771275169924326, iteration: 444212
loss: 1.0013959407806396,grad_norm: 0.7724612631489066, iteration: 444213
loss: 1.0916483402252197,grad_norm: 0.8911683565317275, iteration: 444214
loss: 1.027592658996582,grad_norm: 0.8122828519431219, iteration: 444215
loss: 0.9892885684967041,grad_norm: 0.8052801458508339, iteration: 444216
loss: 0.9509379863739014,grad_norm: 0.6921194128583872, iteration: 444217
loss: 1.0381078720092773,grad_norm: 0.6221668296915813, iteration: 444218
loss: 0.949043333530426,grad_norm: 0.70449846199322, iteration: 444219
loss: 0.9952213168144226,grad_norm: 0.99999909920009, iteration: 444220
loss: 0.9951558113098145,grad_norm: 0.7457403347569289, iteration: 444221
loss: 0.9978964924812317,grad_norm: 0.651483593872644, iteration: 444222
loss: 1.0067259073257446,grad_norm: 0.9999994484627814, iteration: 444223
loss: 0.9879433512687683,grad_norm: 0.851347803235992, iteration: 444224
loss: 0.9751890301704407,grad_norm: 0.9999994407593314, iteration: 444225
loss: 1.03650963306427,grad_norm: 0.7200690930002894, iteration: 444226
loss: 1.0014698505401611,grad_norm: 0.7240215483946896, iteration: 444227
loss: 0.9727085828781128,grad_norm: 0.75301130624309, iteration: 444228
loss: 0.9863052368164062,grad_norm: 0.7373209983610621, iteration: 444229
loss: 1.0029466152191162,grad_norm: 0.9999990879922994, iteration: 444230
loss: 1.0310859680175781,grad_norm: 0.6452332806820191, iteration: 444231
loss: 1.0074282884597778,grad_norm: 0.8344995045394628, iteration: 444232
loss: 0.9701172113418579,grad_norm: 0.9587407594811534, iteration: 444233
loss: 0.9962021112442017,grad_norm: 0.9875748205243785, iteration: 444234
loss: 1.1992783546447754,grad_norm: 0.9999995612650558, iteration: 444235
loss: 0.9697583317756653,grad_norm: 0.7699754777031356, iteration: 444236
loss: 1.004075288772583,grad_norm: 0.8379595809690679, iteration: 444237
loss: 1.0152568817138672,grad_norm: 0.8193213170184767, iteration: 444238
loss: 1.0240352153778076,grad_norm: 0.9661408275061816, iteration: 444239
loss: 1.005370855331421,grad_norm: 0.7348979215765997, iteration: 444240
loss: 1.0029211044311523,grad_norm: 0.8331135715450267, iteration: 444241
loss: 0.9662767052650452,grad_norm: 0.8266304074604713, iteration: 444242
loss: 1.026694893836975,grad_norm: 0.7529525798081395, iteration: 444243
loss: 1.053252935409546,grad_norm: 0.7630325807650981, iteration: 444244
loss: 0.9812558889389038,grad_norm: 0.7385105111578855, iteration: 444245
loss: 1.0182191133499146,grad_norm: 0.7888020690428185, iteration: 444246
loss: 1.0001296997070312,grad_norm: 0.7536553096066181, iteration: 444247
loss: 0.9915967583656311,grad_norm: 0.7862316855539657, iteration: 444248
loss: 1.0226863622665405,grad_norm: 0.7923381917991996, iteration: 444249
loss: 0.9942691922187805,grad_norm: 0.7305966808754805, iteration: 444250
loss: 0.9988434910774231,grad_norm: 0.7544435330778987, iteration: 444251
loss: 1.0061415433883667,grad_norm: 0.924992058048931, iteration: 444252
loss: 0.993915319442749,grad_norm: 0.7242257757141878, iteration: 444253
loss: 0.9840210676193237,grad_norm: 0.6846573939772009, iteration: 444254
loss: 1.012416124343872,grad_norm: 0.7576227471255108, iteration: 444255
loss: 1.0199081897735596,grad_norm: 0.9452267036877577, iteration: 444256
loss: 0.9991350769996643,grad_norm: 0.743703245832241, iteration: 444257
loss: 1.0302729606628418,grad_norm: 0.8587171201459174, iteration: 444258
loss: 0.9908335208892822,grad_norm: 0.7863054041960581, iteration: 444259
loss: 1.0118515491485596,grad_norm: 0.8711104433426183, iteration: 444260
loss: 1.0316731929779053,grad_norm: 0.763907469150135, iteration: 444261
loss: 0.9979044198989868,grad_norm: 0.8580531271081857, iteration: 444262
loss: 0.9750253558158875,grad_norm: 0.8776031470785528, iteration: 444263
loss: 0.9979046583175659,grad_norm: 0.8224728597638523, iteration: 444264
loss: 0.988318145275116,grad_norm: 0.7534822721559756, iteration: 444265
loss: 1.018248200416565,grad_norm: 0.7716399098754502, iteration: 444266
loss: 1.0339888334274292,grad_norm: 0.84514292154618, iteration: 444267
loss: 1.0083532333374023,grad_norm: 0.6075709853013398, iteration: 444268
loss: 1.0476888418197632,grad_norm: 0.8123386799873584, iteration: 444269
loss: 0.9726663827896118,grad_norm: 0.8332933970209866, iteration: 444270
loss: 0.988310694694519,grad_norm: 0.7251033782415679, iteration: 444271
loss: 1.0023345947265625,grad_norm: 0.6854744761311561, iteration: 444272
loss: 0.9986481070518494,grad_norm: 0.7882835934323624, iteration: 444273
loss: 0.9849473834037781,grad_norm: 0.8571685823476716, iteration: 444274
loss: 0.9816202521324158,grad_norm: 0.6724183594205639, iteration: 444275
loss: 0.9724898934364319,grad_norm: 0.7913563733619292, iteration: 444276
loss: 1.0232242345809937,grad_norm: 0.9476135840804965, iteration: 444277
loss: 0.971776008605957,grad_norm: 0.8923821400666286, iteration: 444278
loss: 0.9452419281005859,grad_norm: 0.887530629011022, iteration: 444279
loss: 1.02715003490448,grad_norm: 0.8297019020344781, iteration: 444280
loss: 1.0107982158660889,grad_norm: 0.7354736143199512, iteration: 444281
loss: 0.9952738881111145,grad_norm: 0.7546968160827895, iteration: 444282
loss: 1.0143600702285767,grad_norm: 0.780600714958588, iteration: 444283
loss: 1.000212550163269,grad_norm: 0.7745544560014614, iteration: 444284
loss: 0.9943891763687134,grad_norm: 0.6781475683801422, iteration: 444285
loss: 1.017549991607666,grad_norm: 0.7546288093938902, iteration: 444286
loss: 0.9865283966064453,grad_norm: 0.6669393914217469, iteration: 444287
loss: 1.0126103162765503,grad_norm: 0.8289459282712176, iteration: 444288
loss: 0.9977618455886841,grad_norm: 0.7418976064283903, iteration: 444289
loss: 1.0025349855422974,grad_norm: 0.9999991534926612, iteration: 444290
loss: 0.9764156341552734,grad_norm: 0.830798057683888, iteration: 444291
loss: 1.0175973176956177,grad_norm: 0.7956744213975289, iteration: 444292
loss: 1.027211308479309,grad_norm: 0.999999374625029, iteration: 444293
loss: 1.0770963430404663,grad_norm: 0.9999992476056402, iteration: 444294
loss: 1.0418548583984375,grad_norm: 0.8695989950172003, iteration: 444295
loss: 1.0136960744857788,grad_norm: 0.9129942665234253, iteration: 444296
loss: 1.0226733684539795,grad_norm: 0.8011946227929715, iteration: 444297
loss: 0.9944085478782654,grad_norm: 0.7013760637857847, iteration: 444298
loss: 1.000127911567688,grad_norm: 0.7209639539740075, iteration: 444299
loss: 0.9931455254554749,grad_norm: 0.8651370495918632, iteration: 444300
loss: 1.0023871660232544,grad_norm: 0.7107014447397697, iteration: 444301
loss: 0.9992231130599976,grad_norm: 0.7544924785976648, iteration: 444302
loss: 0.9871247410774231,grad_norm: 0.9259258242410826, iteration: 444303
loss: 1.017051100730896,grad_norm: 0.8833206295279366, iteration: 444304
loss: 0.9962690472602844,grad_norm: 0.7559550675580708, iteration: 444305
loss: 0.9655061364173889,grad_norm: 0.7669944519986076, iteration: 444306
loss: 0.9644084572792053,grad_norm: 0.8726392610726161, iteration: 444307
loss: 1.0099440813064575,grad_norm: 0.789568712909016, iteration: 444308
loss: 1.0030040740966797,grad_norm: 0.732105086585298, iteration: 444309
loss: 1.0079917907714844,grad_norm: 0.7937532415310364, iteration: 444310
loss: 1.0281237363815308,grad_norm: 0.757284990064167, iteration: 444311
loss: 0.994853138923645,grad_norm: 0.9335587103090892, iteration: 444312
loss: 1.008333683013916,grad_norm: 0.999999868010514, iteration: 444313
loss: 0.9943179488182068,grad_norm: 0.8941141739744727, iteration: 444314
loss: 0.9968693256378174,grad_norm: 0.8130939093854793, iteration: 444315
loss: 1.0013277530670166,grad_norm: 0.8119271095463835, iteration: 444316
loss: 1.0459855794906616,grad_norm: 0.7251426885200714, iteration: 444317
loss: 1.046555757522583,grad_norm: 0.9378871442150061, iteration: 444318
loss: 0.9756652116775513,grad_norm: 0.999999260810055, iteration: 444319
loss: 1.0005338191986084,grad_norm: 0.8247062238808778, iteration: 444320
loss: 1.0631545782089233,grad_norm: 0.7668803060208949, iteration: 444321
loss: 0.9813037514686584,grad_norm: 0.7440963446158315, iteration: 444322
loss: 1.0270625352859497,grad_norm: 0.885067715255371, iteration: 444323
loss: 1.0007472038269043,grad_norm: 0.8289252282666765, iteration: 444324
loss: 0.9998897910118103,grad_norm: 0.937748513406963, iteration: 444325
loss: 0.9739847779273987,grad_norm: 0.9999993086482396, iteration: 444326
loss: 1.0002254247665405,grad_norm: 0.7673716803499308, iteration: 444327
loss: 0.9992726445198059,grad_norm: 0.6635416024210796, iteration: 444328
loss: 1.0144562721252441,grad_norm: 0.7959479572335361, iteration: 444329
loss: 0.9923588633537292,grad_norm: 0.873536214641682, iteration: 444330
loss: 1.00530207157135,grad_norm: 0.7601160741984349, iteration: 444331
loss: 1.0069950819015503,grad_norm: 0.743512246242051, iteration: 444332
loss: 0.9926958680152893,grad_norm: 0.7042790373950112, iteration: 444333
loss: 1.0379456281661987,grad_norm: 0.819512026257055, iteration: 444334
loss: 0.979526937007904,grad_norm: 0.8291595670510254, iteration: 444335
loss: 0.9763743281364441,grad_norm: 0.7404350364065465, iteration: 444336
loss: 1.0136680603027344,grad_norm: 0.8229070090491086, iteration: 444337
loss: 0.9826511740684509,grad_norm: 0.7366567335323174, iteration: 444338
loss: 1.0861612558364868,grad_norm: 0.9999991390768548, iteration: 444339
loss: 0.9719973802566528,grad_norm: 0.8334651976352148, iteration: 444340
loss: 1.0069926977157593,grad_norm: 0.9999993877556493, iteration: 444341
loss: 1.0155444145202637,grad_norm: 0.7232122204591744, iteration: 444342
loss: 0.9882182478904724,grad_norm: 0.7087674818994191, iteration: 444343
loss: 1.0154485702514648,grad_norm: 0.9999993369769261, iteration: 444344
loss: 0.9693924784660339,grad_norm: 0.6436840449967063, iteration: 444345
loss: 1.0174425840377808,grad_norm: 0.779531292005676, iteration: 444346
loss: 1.0439438819885254,grad_norm: 0.8449866148809348, iteration: 444347
loss: 1.0062928199768066,grad_norm: 0.7270338820650903, iteration: 444348
loss: 0.9925929307937622,grad_norm: 0.8163825160090131, iteration: 444349
loss: 0.9711958169937134,grad_norm: 0.8006973371437053, iteration: 444350
loss: 1.0139158964157104,grad_norm: 0.7010998170404745, iteration: 444351
loss: 1.1741766929626465,grad_norm: 0.9999995322427114, iteration: 444352
loss: 1.017429232597351,grad_norm: 0.6786232932433687, iteration: 444353
loss: 0.9958269000053406,grad_norm: 0.8087029523595313, iteration: 444354
loss: 0.9980933666229248,grad_norm: 0.7326415064298102, iteration: 444355
loss: 0.9851642847061157,grad_norm: 0.8597820595917521, iteration: 444356
loss: 1.0228848457336426,grad_norm: 0.8100164791883512, iteration: 444357
loss: 0.9724559187889099,grad_norm: 0.6724348975686284, iteration: 444358
loss: 1.000057578086853,grad_norm: 0.8595111190884404, iteration: 444359
loss: 1.0075920820236206,grad_norm: 0.7464718088246713, iteration: 444360
loss: 1.0060396194458008,grad_norm: 0.8947924523974455, iteration: 444361
loss: 0.9902420043945312,grad_norm: 0.8987734564232451, iteration: 444362
loss: 0.9719923138618469,grad_norm: 0.7623057079032346, iteration: 444363
loss: 1.0534298419952393,grad_norm: 0.824394987689342, iteration: 444364
loss: 0.9857887029647827,grad_norm: 0.7290156153224518, iteration: 444365
loss: 1.0017012357711792,grad_norm: 0.9999998911392666, iteration: 444366
loss: 0.9799206852912903,grad_norm: 0.823502837819935, iteration: 444367
loss: 1.056522250175476,grad_norm: 0.9999991558513462, iteration: 444368
loss: 0.9853479266166687,grad_norm: 0.9250410765925514, iteration: 444369
loss: 1.0018136501312256,grad_norm: 0.7464995069864105, iteration: 444370
loss: 1.0440924167633057,grad_norm: 0.990401606814649, iteration: 444371
loss: 1.0138249397277832,grad_norm: 0.6756542640561359, iteration: 444372
loss: 1.0209912061691284,grad_norm: 0.8008002086831467, iteration: 444373
loss: 0.9978897571563721,grad_norm: 0.8806071283709327, iteration: 444374
loss: 1.0003585815429688,grad_norm: 0.7077127547594173, iteration: 444375
loss: 1.0133962631225586,grad_norm: 0.8936745509568715, iteration: 444376
loss: 1.010184645652771,grad_norm: 0.9999990680672038, iteration: 444377
loss: 0.9976345896720886,grad_norm: 0.7352607747064053, iteration: 444378
loss: 1.0068835020065308,grad_norm: 0.736355338090194, iteration: 444379
loss: 1.0298794507980347,grad_norm: 0.7775608090938446, iteration: 444380
loss: 0.9895635843276978,grad_norm: 0.7575346747103183, iteration: 444381
loss: 0.9929586052894592,grad_norm: 0.7385886551934499, iteration: 444382
loss: 0.9863469004631042,grad_norm: 0.6895856799408497, iteration: 444383
loss: 1.021891474723816,grad_norm: 0.7798415455442149, iteration: 444384
loss: 1.013378381729126,grad_norm: 0.927181010215787, iteration: 444385
loss: 1.0048967599868774,grad_norm: 0.8851952649991386, iteration: 444386
loss: 0.9821017980575562,grad_norm: 0.7900477975059059, iteration: 444387
loss: 0.9610662460327148,grad_norm: 0.999999804800949, iteration: 444388
loss: 0.9996525049209595,grad_norm: 0.9999999525711596, iteration: 444389
loss: 1.0024957656860352,grad_norm: 0.7967633889238409, iteration: 444390
loss: 0.9624528288841248,grad_norm: 0.7967251802890523, iteration: 444391
loss: 1.0449352264404297,grad_norm: 0.9999998285293805, iteration: 444392
loss: 1.000538945198059,grad_norm: 0.795934563450322, iteration: 444393
loss: 1.0083088874816895,grad_norm: 0.9999998685533664, iteration: 444394
loss: 1.1111400127410889,grad_norm: 0.8698290816410181, iteration: 444395
loss: 0.9588898420333862,grad_norm: 0.7919502786406114, iteration: 444396
loss: 1.0248932838439941,grad_norm: 0.8646840106853665, iteration: 444397
loss: 1.058428406715393,grad_norm: 0.8572884295001313, iteration: 444398
loss: 1.117534875869751,grad_norm: 0.9999999505469855, iteration: 444399
loss: 0.9995942711830139,grad_norm: 0.7960421531121923, iteration: 444400
loss: 1.015976071357727,grad_norm: 0.7852435425395371, iteration: 444401
loss: 1.0368525981903076,grad_norm: 0.8878775803611137, iteration: 444402
loss: 0.9855740666389465,grad_norm: 0.9999998982555746, iteration: 444403
loss: 1.01861572265625,grad_norm: 0.8547676109466522, iteration: 444404
loss: 0.9763612151145935,grad_norm: 0.7782347454473951, iteration: 444405
loss: 1.0079703330993652,grad_norm: 0.7952129959354398, iteration: 444406
loss: 0.9832780957221985,grad_norm: 0.7770480704304165, iteration: 444407
loss: 1.0470788478851318,grad_norm: 0.8529686298974347, iteration: 444408
loss: 0.9937413334846497,grad_norm: 0.7946421702329535, iteration: 444409
loss: 0.990351676940918,grad_norm: 0.7727891704999827, iteration: 444410
loss: 1.0629990100860596,grad_norm: 0.999999469367435, iteration: 444411
loss: 1.058737874031067,grad_norm: 0.9999990898745726, iteration: 444412
loss: 1.0102630853652954,grad_norm: 0.9999990811444729, iteration: 444413
loss: 0.9900112748146057,grad_norm: 0.7744235942731184, iteration: 444414
loss: 0.9792391061782837,grad_norm: 0.9043400389852608, iteration: 444415
loss: 1.0247313976287842,grad_norm: 0.8751984525525495, iteration: 444416
loss: 0.9852631092071533,grad_norm: 0.9640532602166075, iteration: 444417
loss: 1.1037753820419312,grad_norm: 0.8966076373624762, iteration: 444418
loss: 1.2670347690582275,grad_norm: 1.0000000158712004, iteration: 444419
loss: 0.9491840600967407,grad_norm: 0.8263364610364714, iteration: 444420
loss: 1.0095455646514893,grad_norm: 0.6497351016841254, iteration: 444421
loss: 0.9807677268981934,grad_norm: 0.9141778310657667, iteration: 444422
loss: 1.0426889657974243,grad_norm: 0.9166951446792121, iteration: 444423
loss: 0.9780553579330444,grad_norm: 0.8007423535942195, iteration: 444424
loss: 1.1411027908325195,grad_norm: 0.9999992977655009, iteration: 444425
loss: 1.052817463874817,grad_norm: 0.9999993880782934, iteration: 444426
loss: 1.0472211837768555,grad_norm: 0.685683673434704, iteration: 444427
loss: 1.1544562578201294,grad_norm: 0.9999990060837661, iteration: 444428
loss: 1.2580742835998535,grad_norm: 0.9999993702231481, iteration: 444429
loss: 1.1266849040985107,grad_norm: 0.9999991974411994, iteration: 444430
loss: 1.0033857822418213,grad_norm: 0.8542632811591893, iteration: 444431
loss: 1.0088260173797607,grad_norm: 0.7369508508152584, iteration: 444432
loss: 0.9771791100502014,grad_norm: 0.7558003019203073, iteration: 444433
loss: 1.0771002769470215,grad_norm: 0.9999993368691679, iteration: 444434
loss: 1.0023680925369263,grad_norm: 0.6787414292591855, iteration: 444435
loss: 1.225342035293579,grad_norm: 0.9999998392877127, iteration: 444436
loss: 1.0084233283996582,grad_norm: 0.8584067005537187, iteration: 444437
loss: 1.010749340057373,grad_norm: 0.9999991548016809, iteration: 444438
loss: 1.0098603963851929,grad_norm: 0.9999996286015534, iteration: 444439
loss: 1.016853928565979,grad_norm: 0.9999994094683653, iteration: 444440
loss: 0.950559675693512,grad_norm: 0.8005025436180724, iteration: 444441
loss: 0.9576060771942139,grad_norm: 0.8884613028963341, iteration: 444442
loss: 1.0779818296432495,grad_norm: 0.9999995577887276, iteration: 444443
loss: 1.039226770401001,grad_norm: 0.9842627149241696, iteration: 444444
loss: 1.0969372987747192,grad_norm: 0.9999997869007372, iteration: 444445
loss: 0.9899716973304749,grad_norm: 0.7807225473526161, iteration: 444446
loss: 0.9767187833786011,grad_norm: 0.7709840971477047, iteration: 444447
loss: 1.0067756175994873,grad_norm: 0.9999999191298093, iteration: 444448
loss: 0.9687107801437378,grad_norm: 0.6964181871459395, iteration: 444449
loss: 1.0196901559829712,grad_norm: 0.714530065142316, iteration: 444450
loss: 1.0123072862625122,grad_norm: 0.8336524744720777, iteration: 444451
loss: 1.0085350275039673,grad_norm: 0.8225404746495949, iteration: 444452
loss: 0.9551892876625061,grad_norm: 0.9011292298985095, iteration: 444453
loss: 1.0115083456039429,grad_norm: 0.7390090325574227, iteration: 444454
loss: 1.016213059425354,grad_norm: 0.8486083894654185, iteration: 444455
loss: 1.073630452156067,grad_norm: 0.9999991367676415, iteration: 444456
loss: 1.0424284934997559,grad_norm: 0.999999059427922, iteration: 444457
loss: 1.005224585533142,grad_norm: 0.9999993061281723, iteration: 444458
loss: 1.0434588193893433,grad_norm: 0.9999999661525756, iteration: 444459
loss: 1.0403311252593994,grad_norm: 0.8151409642526657, iteration: 444460
loss: 1.0381604433059692,grad_norm: 0.9999995607263987, iteration: 444461
loss: 1.0606058835983276,grad_norm: 0.9999992511776304, iteration: 444462
loss: 0.9934133887290955,grad_norm: 0.69050779253918, iteration: 444463
loss: 1.0119878053665161,grad_norm: 0.7430441191903576, iteration: 444464
loss: 1.0500521659851074,grad_norm: 0.8756165035299907, iteration: 444465
loss: 1.0695980787277222,grad_norm: 0.9999990656711583, iteration: 444466
loss: 0.9714926481246948,grad_norm: 0.7176049201259092, iteration: 444467
loss: 0.9837172031402588,grad_norm: 0.7104649412863513, iteration: 444468
loss: 0.9923945665359497,grad_norm: 0.8171654023157681, iteration: 444469
loss: 0.9674751162528992,grad_norm: 0.7619456076875901, iteration: 444470
loss: 0.9907953143119812,grad_norm: 0.5922929730169717, iteration: 444471
loss: 1.0072494745254517,grad_norm: 0.7881906929355466, iteration: 444472
loss: 0.9745338559150696,grad_norm: 0.8224246076028386, iteration: 444473
loss: 0.9826720952987671,grad_norm: 0.9382880430997182, iteration: 444474
loss: 1.0012799501419067,grad_norm: 0.7418670054253227, iteration: 444475
loss: 0.989183247089386,grad_norm: 0.8354010113013401, iteration: 444476
loss: 1.037706732749939,grad_norm: 0.7436189156474513, iteration: 444477
loss: 0.9723859429359436,grad_norm: 0.8860257122245845, iteration: 444478
loss: 0.9901793003082275,grad_norm: 0.875465243033937, iteration: 444479
loss: 1.0201042890548706,grad_norm: 0.9999999356732004, iteration: 444480
loss: 0.9827598929405212,grad_norm: 0.8344684230714527, iteration: 444481
loss: 1.0010275840759277,grad_norm: 0.7179774524721689, iteration: 444482
loss: 0.9836127161979675,grad_norm: 0.9999996000699917, iteration: 444483
loss: 1.0199774503707886,grad_norm: 0.8407323635556343, iteration: 444484
loss: 0.9871004223823547,grad_norm: 0.8822071074301516, iteration: 444485
loss: 0.9857968091964722,grad_norm: 0.8906233036625041, iteration: 444486
loss: 0.9863839149475098,grad_norm: 0.6389540747037179, iteration: 444487
loss: 1.0018463134765625,grad_norm: 0.6831942997960265, iteration: 444488
loss: 0.9947603344917297,grad_norm: 0.748598463367756, iteration: 444489
loss: 1.06088387966156,grad_norm: 0.7953825000806003, iteration: 444490
loss: 1.0068086385726929,grad_norm: 0.9999997078245904, iteration: 444491
loss: 0.9981834888458252,grad_norm: 0.8857164820051816, iteration: 444492
loss: 1.047894835472107,grad_norm: 0.7881835734001308, iteration: 444493
loss: 0.9662390947341919,grad_norm: 0.716333747497438, iteration: 444494
loss: 0.9786026477813721,grad_norm: 0.7836598059084463, iteration: 444495
loss: 1.0046765804290771,grad_norm: 0.6925539016032397, iteration: 444496
loss: 0.9989258050918579,grad_norm: 0.7271710656102606, iteration: 444497
loss: 1.0007416009902954,grad_norm: 0.9420925194516941, iteration: 444498
loss: 0.9761955738067627,grad_norm: 0.8501183315726082, iteration: 444499
loss: 0.9941045641899109,grad_norm: 0.7011158826093081, iteration: 444500
loss: 0.9621672034263611,grad_norm: 0.7511723183428561, iteration: 444501
loss: 0.9771822690963745,grad_norm: 0.6633383565038148, iteration: 444502
loss: 1.078513741493225,grad_norm: 0.8082163820380711, iteration: 444503
loss: 1.0083345174789429,grad_norm: 0.6951230213531447, iteration: 444504
loss: 1.010980248451233,grad_norm: 0.7683443366899863, iteration: 444505
loss: 1.0481644868850708,grad_norm: 0.9999996341848424, iteration: 444506
loss: 1.0364108085632324,grad_norm: 0.6601683392775146, iteration: 444507
loss: 1.2575898170471191,grad_norm: 0.9999998959552535, iteration: 444508
loss: 1.011673927307129,grad_norm: 0.9999999560098645, iteration: 444509
loss: 0.97614985704422,grad_norm: 0.9697742373160361, iteration: 444510
loss: 0.9948800206184387,grad_norm: 0.6828352770244369, iteration: 444511
loss: 1.0066486597061157,grad_norm: 0.7517989152894352, iteration: 444512
loss: 1.0370243787765503,grad_norm: 0.802346175338532, iteration: 444513
loss: 1.0212219953536987,grad_norm: 0.7938108422365193, iteration: 444514
loss: 1.0247137546539307,grad_norm: 0.7866639977446795, iteration: 444515
loss: 0.9698439240455627,grad_norm: 0.9999995043174791, iteration: 444516
loss: 0.9939868450164795,grad_norm: 0.999999338925071, iteration: 444517
loss: 0.986290454864502,grad_norm: 0.68582371429945, iteration: 444518
loss: 1.0349074602127075,grad_norm: 0.8751135785621796, iteration: 444519
loss: 0.9730786681175232,grad_norm: 0.8757318853148681, iteration: 444520
loss: 1.0367194414138794,grad_norm: 0.9999997131194899, iteration: 444521
loss: 1.009749174118042,grad_norm: 0.7335741538528446, iteration: 444522
loss: 1.0066145658493042,grad_norm: 0.7270301795624324, iteration: 444523
loss: 0.9548924565315247,grad_norm: 0.8235973089372856, iteration: 444524
loss: 1.0837507247924805,grad_norm: 0.9999992624876953, iteration: 444525
loss: 1.0014978647232056,grad_norm: 0.9999990159674846, iteration: 444526
loss: 0.9684547185897827,grad_norm: 0.7579113684060657, iteration: 444527
loss: 0.9935802817344666,grad_norm: 0.6565378217392525, iteration: 444528
loss: 1.0177568197250366,grad_norm: 0.779823537180818, iteration: 444529
loss: 0.9776322245597839,grad_norm: 0.8172328404081495, iteration: 444530
loss: 1.0148179531097412,grad_norm: 0.99999950239582, iteration: 444531
loss: 1.0081219673156738,grad_norm: 0.7186692811721722, iteration: 444532
loss: 1.0043785572052002,grad_norm: 0.8808803889446671, iteration: 444533
loss: 1.0342845916748047,grad_norm: 0.8213994767927368, iteration: 444534
loss: 1.0004830360412598,grad_norm: 0.7199414365667361, iteration: 444535
loss: 1.0292052030563354,grad_norm: 0.764310557978958, iteration: 444536
loss: 1.038204550743103,grad_norm: 0.9999998511103283, iteration: 444537
loss: 0.970085084438324,grad_norm: 0.7455594937688915, iteration: 444538
loss: 1.0362199544906616,grad_norm: 0.8944485269944498, iteration: 444539
loss: 1.0156441926956177,grad_norm: 0.9781527254560501, iteration: 444540
loss: 0.9697776436805725,grad_norm: 0.7849519254383358, iteration: 444541
loss: 1.0115609169006348,grad_norm: 0.8841215560719505, iteration: 444542
loss: 1.0015037059783936,grad_norm: 0.7947974116744243, iteration: 444543
loss: 1.0047502517700195,grad_norm: 0.8208479570610603, iteration: 444544
loss: 0.951201856136322,grad_norm: 0.8018400261654007, iteration: 444545
loss: 1.0123302936553955,grad_norm: 0.9478875142763895, iteration: 444546
loss: 0.9936231970787048,grad_norm: 0.7657441737907839, iteration: 444547
loss: 0.9873312711715698,grad_norm: 0.8157514082723585, iteration: 444548
loss: 0.9940010905265808,grad_norm: 0.6523517834113737, iteration: 444549
loss: 0.9764655232429504,grad_norm: 0.7707082777557316, iteration: 444550
loss: 0.9689388871192932,grad_norm: 0.7169208461444272, iteration: 444551
loss: 0.9972876310348511,grad_norm: 0.8439555147336038, iteration: 444552
loss: 1.0034691095352173,grad_norm: 0.8273342736611123, iteration: 444553
loss: 1.0072002410888672,grad_norm: 0.7162383566076373, iteration: 444554
loss: 1.0339943170547485,grad_norm: 0.9999989764511968, iteration: 444555
loss: 0.9883060455322266,grad_norm: 0.8134638745789335, iteration: 444556
loss: 1.029410481452942,grad_norm: 0.7004588536483921, iteration: 444557
loss: 0.9841423630714417,grad_norm: 0.8581277361284018, iteration: 444558
loss: 0.9833062887191772,grad_norm: 0.814336934198792, iteration: 444559
loss: 1.0356918573379517,grad_norm: 0.9453286871090879, iteration: 444560
loss: 0.9872426986694336,grad_norm: 0.8187259022851319, iteration: 444561
loss: 0.9948570728302002,grad_norm: 0.9300151727732267, iteration: 444562
loss: 1.0057038068771362,grad_norm: 0.9470390496218658, iteration: 444563
loss: 1.0004730224609375,grad_norm: 0.805472946741904, iteration: 444564
loss: 0.9910560250282288,grad_norm: 0.9999991751136271, iteration: 444565
loss: 0.9833130836486816,grad_norm: 0.8958199565488362, iteration: 444566
loss: 1.0209438800811768,grad_norm: 0.7508322641364992, iteration: 444567
loss: 0.9697601199150085,grad_norm: 0.7718633598064025, iteration: 444568
loss: 0.9899707436561584,grad_norm: 0.9122883893801694, iteration: 444569
loss: 0.9863265752792358,grad_norm: 0.9999996635527154, iteration: 444570
loss: 0.9822325706481934,grad_norm: 0.7662726342044887, iteration: 444571
loss: 1.0308722257614136,grad_norm: 0.8750651977002674, iteration: 444572
loss: 0.9831554293632507,grad_norm: 0.9343159324714674, iteration: 444573
loss: 1.1351021528244019,grad_norm: 1.0000000436434464, iteration: 444574
loss: 1.0058619976043701,grad_norm: 0.6905376793634628, iteration: 444575
loss: 1.011807918548584,grad_norm: 0.8098088355820288, iteration: 444576
loss: 0.9703322649002075,grad_norm: 0.8032531015715022, iteration: 444577
loss: 1.035048484802246,grad_norm: 0.9999994453249476, iteration: 444578
loss: 1.0088932514190674,grad_norm: 0.591966594819358, iteration: 444579
loss: 1.0400851964950562,grad_norm: 0.8944123471493441, iteration: 444580
loss: 1.1402528285980225,grad_norm: 0.9999997580024242, iteration: 444581
loss: 1.009270191192627,grad_norm: 0.7194793800876383, iteration: 444582
loss: 1.0185750722885132,grad_norm: 0.7071310554411442, iteration: 444583
loss: 1.2727668285369873,grad_norm: 0.9999994998893984, iteration: 444584
loss: 1.0023247003555298,grad_norm: 0.9744358825590144, iteration: 444585
loss: 0.9920844435691833,grad_norm: 0.8714454061988408, iteration: 444586
loss: 1.0165883302688599,grad_norm: 0.7829903428657092, iteration: 444587
loss: 0.9475253224372864,grad_norm: 0.9495099246111441, iteration: 444588
loss: 1.0041577816009521,grad_norm: 0.9999999493127402, iteration: 444589
loss: 1.0572962760925293,grad_norm: 0.9999990707748164, iteration: 444590
loss: 1.0157521963119507,grad_norm: 0.8289085722645836, iteration: 444591
loss: 0.9892301559448242,grad_norm: 0.6485146211499937, iteration: 444592
loss: 0.9845958352088928,grad_norm: 0.7077708652917196, iteration: 444593
loss: 0.9798822402954102,grad_norm: 0.783269734791193, iteration: 444594
loss: 1.0527470111846924,grad_norm: 0.7992692425104632, iteration: 444595
loss: 1.0016834735870361,grad_norm: 0.8304992029520007, iteration: 444596
loss: 1.1347118616104126,grad_norm: 0.9999999565780452, iteration: 444597
loss: 0.9757879972457886,grad_norm: 0.8540135753113642, iteration: 444598
loss: 1.018812894821167,grad_norm: 0.6352150025728115, iteration: 444599
loss: 1.001251220703125,grad_norm: 0.6762806668740089, iteration: 444600
loss: 1.015884280204773,grad_norm: 0.7490478296030179, iteration: 444601
loss: 1.0083141326904297,grad_norm: 0.6736934532092045, iteration: 444602
loss: 1.0666136741638184,grad_norm: 0.8239326177619454, iteration: 444603
loss: 1.0224941968917847,grad_norm: 0.8312876525037114, iteration: 444604
loss: 1.1814197301864624,grad_norm: 0.8980342672714481, iteration: 444605
loss: 0.9641959071159363,grad_norm: 0.8182962810757919, iteration: 444606
loss: 1.000388741493225,grad_norm: 0.7452477789334867, iteration: 444607
loss: 1.0219546556472778,grad_norm: 0.8373434002602905, iteration: 444608
loss: 0.9777328968048096,grad_norm: 0.7509936045261425, iteration: 444609
loss: 1.0015642642974854,grad_norm: 0.8083552490111674, iteration: 444610
loss: 1.0348007678985596,grad_norm: 0.8712647784784178, iteration: 444611
loss: 1.0056384801864624,grad_norm: 0.9999999531538835, iteration: 444612
loss: 1.0315613746643066,grad_norm: 0.9999998912872277, iteration: 444613
loss: 1.0042673349380493,grad_norm: 0.7486884088966949, iteration: 444614
loss: 1.0407367944717407,grad_norm: 1.0000000276718577, iteration: 444615
loss: 1.0083242654800415,grad_norm: 0.8435280870576402, iteration: 444616
loss: 0.9976347088813782,grad_norm: 0.7699142214360901, iteration: 444617
loss: 0.9686058759689331,grad_norm: 0.9999996827359916, iteration: 444618
loss: 1.0720065832138062,grad_norm: 0.8410925702520634, iteration: 444619
loss: 1.0146287679672241,grad_norm: 0.7609144955590951, iteration: 444620
loss: 1.0163819789886475,grad_norm: 0.5790652816268067, iteration: 444621
loss: 1.1428769826889038,grad_norm: 0.9770825757666387, iteration: 444622
loss: 0.9920397996902466,grad_norm: 0.9821182012503336, iteration: 444623
loss: 1.0148200988769531,grad_norm: 0.8859637548073088, iteration: 444624
loss: 1.004738688468933,grad_norm: 0.999999869925218, iteration: 444625
loss: 0.9915782809257507,grad_norm: 0.9607885378715809, iteration: 444626
loss: 1.0003950595855713,grad_norm: 0.8326394939230861, iteration: 444627
loss: 0.9938682913780212,grad_norm: 0.9999993252801034, iteration: 444628
loss: 1.044229507446289,grad_norm: 0.6987070745444933, iteration: 444629
loss: 1.0076690912246704,grad_norm: 0.7453572113977578, iteration: 444630
loss: 1.0011290311813354,grad_norm: 0.7882979454433756, iteration: 444631
loss: 1.0280137062072754,grad_norm: 0.8620687252991531, iteration: 444632
loss: 0.9884523153305054,grad_norm: 0.7451417285468578, iteration: 444633
loss: 0.995786726474762,grad_norm: 0.7755038848287583, iteration: 444634
loss: 0.982893168926239,grad_norm: 0.8116028436456878, iteration: 444635
loss: 1.029165506362915,grad_norm: 0.9658313226898348, iteration: 444636
loss: 1.0726268291473389,grad_norm: 0.9999993391042256, iteration: 444637
loss: 1.0098638534545898,grad_norm: 0.7666004052496888, iteration: 444638
loss: 1.0115655660629272,grad_norm: 0.8961376377658573, iteration: 444639
loss: 1.0337424278259277,grad_norm: 0.8850230349700481, iteration: 444640
loss: 1.0022988319396973,grad_norm: 0.711276665002463, iteration: 444641
loss: 1.0239015817642212,grad_norm: 0.8532476304026303, iteration: 444642
loss: 0.9866690635681152,grad_norm: 0.7819652920984737, iteration: 444643
loss: 1.0462344884872437,grad_norm: 0.9999995690121805, iteration: 444644
loss: 1.0478332042694092,grad_norm: 0.7744095563633908, iteration: 444645
loss: 1.0350364446640015,grad_norm: 0.9999991432993173, iteration: 444646
loss: 0.9434725046157837,grad_norm: 0.8923485231852597, iteration: 444647
loss: 1.1994590759277344,grad_norm: 0.9999997201671804, iteration: 444648
loss: 1.0324493646621704,grad_norm: 0.7405453126097983, iteration: 444649
loss: 1.0024932622909546,grad_norm: 0.8433255275855172, iteration: 444650
loss: 0.9450774788856506,grad_norm: 0.7724129541593937, iteration: 444651
loss: 1.0130144357681274,grad_norm: 0.9698946232820437, iteration: 444652
loss: 1.0530788898468018,grad_norm: 0.9999991639874287, iteration: 444653
loss: 1.0360888242721558,grad_norm: 0.7438990036700047, iteration: 444654
loss: 0.9821780323982239,grad_norm: 0.8817415659700265, iteration: 444655
loss: 1.0136319398880005,grad_norm: 0.9999996242450971, iteration: 444656
loss: 0.9811561703681946,grad_norm: 0.832350750945169, iteration: 444657
loss: 1.0551869869232178,grad_norm: 0.9999993436859683, iteration: 444658
loss: 0.9825875759124756,grad_norm: 0.7813201733805754, iteration: 444659
loss: 1.0551087856292725,grad_norm: 0.9999992295293659, iteration: 444660
loss: 1.011216640472412,grad_norm: 0.7003174650684905, iteration: 444661
loss: 1.0145751237869263,grad_norm: 0.8209178204788402, iteration: 444662
loss: 1.0287162065505981,grad_norm: 0.6319167104032989, iteration: 444663
loss: 0.9967189431190491,grad_norm: 0.7112755050268502, iteration: 444664
loss: 1.03486967086792,grad_norm: 0.9999993354058258, iteration: 444665
loss: 0.9554767608642578,grad_norm: 0.8596746006291505, iteration: 444666
loss: 1.0324755907058716,grad_norm: 0.8474343273455059, iteration: 444667
loss: 0.9834032654762268,grad_norm: 0.6779796706875519, iteration: 444668
loss: 1.165789008140564,grad_norm: 0.9999998139243147, iteration: 444669
loss: 1.0365328788757324,grad_norm: 0.9999999868202637, iteration: 444670
loss: 1.0387938022613525,grad_norm: 0.9999998782606777, iteration: 444671
loss: 1.039281964302063,grad_norm: 0.9999996461959325, iteration: 444672
loss: 1.0008639097213745,grad_norm: 0.8460502404742448, iteration: 444673
loss: 1.0088040828704834,grad_norm: 0.7793230578162675, iteration: 444674
loss: 1.0001041889190674,grad_norm: 0.9999999683463958, iteration: 444675
loss: 1.1479954719543457,grad_norm: 0.9999997416778852, iteration: 444676
loss: 0.9816964864730835,grad_norm: 0.7827940765552088, iteration: 444677
loss: 0.9793233275413513,grad_norm: 0.6818589564650357, iteration: 444678
loss: 0.9932395219802856,grad_norm: 0.999999124715384, iteration: 444679
loss: 0.9894415736198425,grad_norm: 0.7394258467010483, iteration: 444680
loss: 1.004122018814087,grad_norm: 0.7834298520609246, iteration: 444681
loss: 1.0275402069091797,grad_norm: 0.8653601028870845, iteration: 444682
loss: 1.000659465789795,grad_norm: 0.9026919530247165, iteration: 444683
loss: 0.9914326667785645,grad_norm: 0.7570874561030106, iteration: 444684
loss: 0.9588746428489685,grad_norm: 0.9999999409277276, iteration: 444685
loss: 0.9892410039901733,grad_norm: 0.8402803436326662, iteration: 444686
loss: 0.9574389457702637,grad_norm: 0.6998442112468445, iteration: 444687
loss: 0.9932711720466614,grad_norm: 0.653924686225844, iteration: 444688
loss: 1.0361263751983643,grad_norm: 0.7747116898406771, iteration: 444689
loss: 1.0191577672958374,grad_norm: 0.7921487750908844, iteration: 444690
loss: 0.9849451780319214,grad_norm: 0.9177089531637523, iteration: 444691
loss: 0.9537116289138794,grad_norm: 0.6342668103061403, iteration: 444692
loss: 0.9884018898010254,grad_norm: 0.6456586381423064, iteration: 444693
loss: 1.0354840755462646,grad_norm: 0.8118794856258011, iteration: 444694
loss: 0.9704735279083252,grad_norm: 0.8631286696987913, iteration: 444695
loss: 1.0008829832077026,grad_norm: 0.830482014667286, iteration: 444696
loss: 1.0093834400177002,grad_norm: 0.7803895912457661, iteration: 444697
loss: 1.0160106420516968,grad_norm: 0.9546102414749355, iteration: 444698
loss: 0.9474251866340637,grad_norm: 0.749467685316843, iteration: 444699
loss: 1.0305523872375488,grad_norm: 0.9246466874559122, iteration: 444700
loss: 1.0035055875778198,grad_norm: 0.6979764973612143, iteration: 444701
loss: 0.9938471913337708,grad_norm: 0.6844270770748695, iteration: 444702
loss: 1.0085006952285767,grad_norm: 0.7328297945868628, iteration: 444703
loss: 0.9987470507621765,grad_norm: 0.8691702045076656, iteration: 444704
loss: 0.97952800989151,grad_norm: 0.6700627482474676, iteration: 444705
loss: 1.050071358680725,grad_norm: 0.984501094484912, iteration: 444706
loss: 0.9431571364402771,grad_norm: 0.8179858016736985, iteration: 444707
loss: 1.0418894290924072,grad_norm: 0.8026667985542398, iteration: 444708
loss: 0.9639036059379578,grad_norm: 0.9999991660497518, iteration: 444709
loss: 0.9879891872406006,grad_norm: 0.7788623105890427, iteration: 444710
loss: 1.025320053100586,grad_norm: 0.9999998063265965, iteration: 444711
loss: 0.9873827695846558,grad_norm: 0.602648080972558, iteration: 444712
loss: 0.9999483823776245,grad_norm: 0.8111527516883792, iteration: 444713
loss: 1.0139586925506592,grad_norm: 0.9999996282276702, iteration: 444714
loss: 1.0582219362258911,grad_norm: 0.9999996103256419, iteration: 444715
loss: 1.028490662574768,grad_norm: 0.7219952578714195, iteration: 444716
loss: 0.9309317469596863,grad_norm: 0.8555523602206432, iteration: 444717
loss: 1.0055955648422241,grad_norm: 0.7520304558391456, iteration: 444718
loss: 0.9894601702690125,grad_norm: 0.7661641370089229, iteration: 444719
loss: 1.0098439455032349,grad_norm: 0.6835541575652495, iteration: 444720
loss: 0.984154462814331,grad_norm: 0.790790185172568, iteration: 444721
loss: 0.9878662824630737,grad_norm: 0.744540328204972, iteration: 444722
loss: 0.9851112365722656,grad_norm: 0.7164052430905258, iteration: 444723
loss: 0.9793152809143066,grad_norm: 0.8804418977673675, iteration: 444724
loss: 0.9718966484069824,grad_norm: 1.00000000955236, iteration: 444725
loss: 1.017974615097046,grad_norm: 0.8058438077115423, iteration: 444726
loss: 1.0107555389404297,grad_norm: 0.6245834521135489, iteration: 444727
loss: 0.9941614270210266,grad_norm: 0.7495036674612969, iteration: 444728
loss: 1.035818099975586,grad_norm: 0.7143279820778566, iteration: 444729
loss: 0.998492419719696,grad_norm: 0.8434574677620761, iteration: 444730
loss: 0.9864733219146729,grad_norm: 0.7699996431060497, iteration: 444731
loss: 0.9692370891571045,grad_norm: 0.9702519314456338, iteration: 444732
loss: 1.0097496509552002,grad_norm: 0.8130391052215712, iteration: 444733
loss: 0.9687128067016602,grad_norm: 0.8322886648172322, iteration: 444734
loss: 1.0084567070007324,grad_norm: 0.8263648902951761, iteration: 444735
loss: 1.0526272058486938,grad_norm: 0.7679447838862677, iteration: 444736
loss: 0.9972714185714722,grad_norm: 0.8732083742356203, iteration: 444737
loss: 1.0146417617797852,grad_norm: 0.7821894147458156, iteration: 444738
loss: 1.2305116653442383,grad_norm: 0.9999993064072185, iteration: 444739
loss: 0.9758254289627075,grad_norm: 0.7754834331898831, iteration: 444740
loss: 1.0337809324264526,grad_norm: 0.9999997557216058, iteration: 444741
loss: 1.0313624143600464,grad_norm: 0.8520925163049797, iteration: 444742
loss: 0.977730393409729,grad_norm: 0.8394003981095405, iteration: 444743
loss: 0.9897544384002686,grad_norm: 0.8163286198604494, iteration: 444744
loss: 0.9926254749298096,grad_norm: 0.6839584975189417, iteration: 444745
loss: 0.9744255542755127,grad_norm: 0.6955133780932695, iteration: 444746
loss: 0.9319649934768677,grad_norm: 0.8324452448131779, iteration: 444747
loss: 1.0228275060653687,grad_norm: 0.9999999438112206, iteration: 444748
loss: 0.9924352765083313,grad_norm: 0.6716919563382449, iteration: 444749
loss: 1.0066044330596924,grad_norm: 0.8735130312409107, iteration: 444750
loss: 0.98077791929245,grad_norm: 0.7900861110650507, iteration: 444751
loss: 0.9654386043548584,grad_norm: 0.8371763513673478, iteration: 444752
loss: 1.0196186304092407,grad_norm: 0.7183897541947231, iteration: 444753
loss: 1.1411054134368896,grad_norm: 0.9999997920937356, iteration: 444754
loss: 1.0610054731369019,grad_norm: 0.8883147253376886, iteration: 444755
loss: 1.000506043434143,grad_norm: 0.816780376491328, iteration: 444756
loss: 1.024782419204712,grad_norm: 0.720748195076626, iteration: 444757
loss: 1.0184943675994873,grad_norm: 0.8150858008920093, iteration: 444758
loss: 0.9533188939094543,grad_norm: 0.8513431895306497, iteration: 444759
loss: 1.0691940784454346,grad_norm: 0.7949366177880682, iteration: 444760
loss: 1.0022423267364502,grad_norm: 0.825435565198304, iteration: 444761
loss: 1.0119378566741943,grad_norm: 0.9748033778750343, iteration: 444762
loss: 1.0057395696640015,grad_norm: 0.87149005436564, iteration: 444763
loss: 1.0118352174758911,grad_norm: 0.8629103555239607, iteration: 444764
loss: 1.1137635707855225,grad_norm: 0.9999993633188878, iteration: 444765
loss: 0.9733807444572449,grad_norm: 0.9253694040048335, iteration: 444766
loss: 0.994314432144165,grad_norm: 0.8647056891186378, iteration: 444767
loss: 1.0192776918411255,grad_norm: 0.8484228870709385, iteration: 444768
loss: 1.0459541082382202,grad_norm: 0.8713891485922646, iteration: 444769
loss: 0.9954966902732849,grad_norm: 0.6710152649418482, iteration: 444770
loss: 1.0248950719833374,grad_norm: 0.9999993664535256, iteration: 444771
loss: 1.0267410278320312,grad_norm: 0.7119965173770414, iteration: 444772
loss: 0.9830843210220337,grad_norm: 0.9337388317024977, iteration: 444773
loss: 0.9708834290504456,grad_norm: 0.9010586697170015, iteration: 444774
loss: 1.0163543224334717,grad_norm: 0.9999999005367485, iteration: 444775
loss: 1.03226637840271,grad_norm: 0.8176746352670372, iteration: 444776
loss: 0.9940866827964783,grad_norm: 0.726657877218005, iteration: 444777
loss: 0.9985216856002808,grad_norm: 0.7179412577084163, iteration: 444778
loss: 0.9978506565093994,grad_norm: 0.7652886457905073, iteration: 444779
loss: 1.1139576435089111,grad_norm: 0.9999990527845362, iteration: 444780
loss: 0.9844196438789368,grad_norm: 0.8722579987012632, iteration: 444781
loss: 1.013459324836731,grad_norm: 0.8104501073551077, iteration: 444782
loss: 1.0358721017837524,grad_norm: 0.8375489813158042, iteration: 444783
loss: 1.0376660823822021,grad_norm: 0.9999993640587972, iteration: 444784
loss: 0.9983091354370117,grad_norm: 0.7361094482542087, iteration: 444785
loss: 1.0758881568908691,grad_norm: 0.9999992219955794, iteration: 444786
loss: 0.9928503036499023,grad_norm: 0.8023783119908668, iteration: 444787
loss: 1.0060383081436157,grad_norm: 0.9999993702678889, iteration: 444788
loss: 1.0145546197891235,grad_norm: 0.8232622135806354, iteration: 444789
loss: 0.9564895629882812,grad_norm: 0.6716787593708204, iteration: 444790
loss: 1.1146690845489502,grad_norm: 0.9999998749286939, iteration: 444791
loss: 1.0640064477920532,grad_norm: 0.9999997534266176, iteration: 444792
loss: 1.146287441253662,grad_norm: 0.9999998909567751, iteration: 444793
loss: 0.9914731383323669,grad_norm: 0.761707643097353, iteration: 444794
loss: 1.059896469116211,grad_norm: 0.9831635681859265, iteration: 444795
loss: 1.0283195972442627,grad_norm: 0.9105486535167986, iteration: 444796
loss: 1.013426661491394,grad_norm: 0.9999989778272522, iteration: 444797
loss: 1.033122181892395,grad_norm: 0.9999993850378555, iteration: 444798
loss: 1.055567741394043,grad_norm: 0.9213507455187044, iteration: 444799
loss: 1.0274299383163452,grad_norm: 0.9999996930193182, iteration: 444800
loss: 1.045494794845581,grad_norm: 0.9999992610408699, iteration: 444801
loss: 1.0154790878295898,grad_norm: 0.8095182504142668, iteration: 444802
loss: 1.028649926185608,grad_norm: 0.9999993955212458, iteration: 444803
loss: 0.9752606749534607,grad_norm: 0.7271639207323939, iteration: 444804
loss: 0.9947577714920044,grad_norm: 0.7684309540882635, iteration: 444805
loss: 1.2083066701889038,grad_norm: 0.999999089432154, iteration: 444806
loss: 0.9991633296012878,grad_norm: 0.7174925532507609, iteration: 444807
loss: 1.0336545705795288,grad_norm: 0.8195459420549919, iteration: 444808
loss: 0.99370276927948,grad_norm: 0.9999998939848955, iteration: 444809
loss: 1.0022027492523193,grad_norm: 0.8375249228174293, iteration: 444810
loss: 1.0269681215286255,grad_norm: 0.7207487241072501, iteration: 444811
loss: 1.0706908702850342,grad_norm: 0.8602438787041756, iteration: 444812
loss: 1.0117117166519165,grad_norm: 0.8404656337157889, iteration: 444813
loss: 0.9866948127746582,grad_norm: 0.8227782849937969, iteration: 444814
loss: 1.0519341230392456,grad_norm: 0.9999996647213315, iteration: 444815
loss: 1.0508756637573242,grad_norm: 0.8516529793223997, iteration: 444816
loss: 1.0169563293457031,grad_norm: 0.8319113970168937, iteration: 444817
loss: 0.9790651798248291,grad_norm: 0.6836483715160036, iteration: 444818
loss: 0.9689491391181946,grad_norm: 0.6580345813960621, iteration: 444819
loss: 0.9607465267181396,grad_norm: 0.8173904802463141, iteration: 444820
loss: 1.0000500679016113,grad_norm: 0.7484928040343156, iteration: 444821
loss: 0.9881568551063538,grad_norm: 0.8620752903444718, iteration: 444822
loss: 0.9806313514709473,grad_norm: 0.7989579068567058, iteration: 444823
loss: 1.0236989259719849,grad_norm: 0.9999990094447125, iteration: 444824
loss: 1.0527403354644775,grad_norm: 0.8328409007926959, iteration: 444825
loss: 0.9845942854881287,grad_norm: 0.8360539501194386, iteration: 444826
loss: 1.0618008375167847,grad_norm: 0.999999774837247, iteration: 444827
loss: 1.0263704061508179,grad_norm: 0.794914230257691, iteration: 444828
loss: 0.99424147605896,grad_norm: 0.7506105563990219, iteration: 444829
loss: 1.0239232778549194,grad_norm: 0.7854413740650202, iteration: 444830
loss: 1.0797345638275146,grad_norm: 0.999999150723418, iteration: 444831
loss: 1.0072011947631836,grad_norm: 0.9403332455537227, iteration: 444832
loss: 0.9756865501403809,grad_norm: 0.7629811018704425, iteration: 444833
loss: 1.002369999885559,grad_norm: 0.8821387557069295, iteration: 444834
loss: 0.978579580783844,grad_norm: 0.7208424471774, iteration: 444835
loss: 1.047866702079773,grad_norm: 0.999999675166, iteration: 444836
loss: 1.015838623046875,grad_norm: 0.9999991155158539, iteration: 444837
loss: 0.9804830551147461,grad_norm: 0.8635213020547382, iteration: 444838
loss: 1.0009685754776,grad_norm: 0.7705361014206044, iteration: 444839
loss: 1.010319471359253,grad_norm: 0.6894480552358666, iteration: 444840
loss: 0.9863389134407043,grad_norm: 0.7091827309253584, iteration: 444841
loss: 1.0246703624725342,grad_norm: 0.812694001177509, iteration: 444842
loss: 1.0648601055145264,grad_norm: 0.791163773937236, iteration: 444843
loss: 0.9726808071136475,grad_norm: 0.7483104439518031, iteration: 444844
loss: 0.9854536652565002,grad_norm: 0.7164574941229737, iteration: 444845
loss: 1.0014814138412476,grad_norm: 0.7378126253016075, iteration: 444846
loss: 1.0059704780578613,grad_norm: 0.9749007887475618, iteration: 444847
loss: 1.0262935161590576,grad_norm: 0.8794809308235645, iteration: 444848
loss: 1.0154316425323486,grad_norm: 0.9056662419499304, iteration: 444849
loss: 0.9777922630310059,grad_norm: 0.7767046071739876, iteration: 444850
loss: 0.9697862267494202,grad_norm: 0.7394652258110077, iteration: 444851
loss: 0.9621689915657043,grad_norm: 0.8208787848123026, iteration: 444852
loss: 1.0125614404678345,grad_norm: 0.9999995408925895, iteration: 444853
loss: 1.0284143686294556,grad_norm: 0.7408753586437894, iteration: 444854
loss: 1.0236601829528809,grad_norm: 0.9308502164899857, iteration: 444855
loss: 0.9789350628852844,grad_norm: 0.776193004914515, iteration: 444856
loss: 0.9736741185188293,grad_norm: 0.794380619385664, iteration: 444857
loss: 1.0261800289154053,grad_norm: 0.9999991154748366, iteration: 444858
loss: 1.0831174850463867,grad_norm: 0.842874087927007, iteration: 444859
loss: 1.0458248853683472,grad_norm: 0.8064485181970676, iteration: 444860
loss: 1.0132101774215698,grad_norm: 0.8679024686661302, iteration: 444861
loss: 1.0512467622756958,grad_norm: 0.8026531740059815, iteration: 444862
loss: 0.9995091557502747,grad_norm: 0.8160254725119324, iteration: 444863
loss: 0.966279149055481,grad_norm: 0.7634281501459788, iteration: 444864
loss: 1.015647530555725,grad_norm: 0.7763679414681857, iteration: 444865
loss: 0.9986719489097595,grad_norm: 0.9207993794436387, iteration: 444866
loss: 1.0450161695480347,grad_norm: 0.6960386179058872, iteration: 444867
loss: 1.0071702003479004,grad_norm: 0.7501418473325145, iteration: 444868
loss: 0.9841773509979248,grad_norm: 0.7014956542487838, iteration: 444869
loss: 1.0062464475631714,grad_norm: 0.7409642046495521, iteration: 444870
loss: 1.0082964897155762,grad_norm: 0.7669691497547481, iteration: 444871
loss: 0.9854932427406311,grad_norm: 0.8232649643641183, iteration: 444872
loss: 0.9782686829566956,grad_norm: 0.7196336985192961, iteration: 444873
loss: 1.0630242824554443,grad_norm: 0.7548955583339437, iteration: 444874
loss: 1.0050482749938965,grad_norm: 0.8845233718881348, iteration: 444875
loss: 0.992333173751831,grad_norm: 0.9938212652310832, iteration: 444876
loss: 0.9826883673667908,grad_norm: 0.7272551890739635, iteration: 444877
loss: 1.0951998233795166,grad_norm: 0.9999991783007484, iteration: 444878
loss: 1.0009617805480957,grad_norm: 0.6253354610543641, iteration: 444879
loss: 1.1190723180770874,grad_norm: 0.999999304824322, iteration: 444880
loss: 1.006038784980774,grad_norm: 0.7455612347697307, iteration: 444881
loss: 0.970148503780365,grad_norm: 0.6971903493330985, iteration: 444882
loss: 0.9965105056762695,grad_norm: 0.7764849872140963, iteration: 444883
loss: 1.0011265277862549,grad_norm: 0.8934342860377189, iteration: 444884
loss: 0.9795491695404053,grad_norm: 0.9999992477974919, iteration: 444885
loss: 0.9610165953636169,grad_norm: 0.8324059695253879, iteration: 444886
loss: 1.0122520923614502,grad_norm: 0.703962324943044, iteration: 444887
loss: 1.0435484647750854,grad_norm: 0.8177169262182011, iteration: 444888
loss: 0.9960953593254089,grad_norm: 0.7906506009532945, iteration: 444889
loss: 1.0053205490112305,grad_norm: 0.8760396848565428, iteration: 444890
loss: 1.100886583328247,grad_norm: 0.9999998816764605, iteration: 444891
loss: 0.9902461171150208,grad_norm: 0.8106625787617873, iteration: 444892
loss: 0.98984694480896,grad_norm: 0.7940286122433626, iteration: 444893
loss: 0.9992696046829224,grad_norm: 0.6819586497873581, iteration: 444894
loss: 0.975360095500946,grad_norm: 0.830749775117397, iteration: 444895
loss: 1.023900032043457,grad_norm: 0.9688407831996739, iteration: 444896
loss: 0.9615903496742249,grad_norm: 0.8507278886152226, iteration: 444897
loss: 1.0097789764404297,grad_norm: 0.6793829071247567, iteration: 444898
loss: 0.9986568689346313,grad_norm: 0.6334310180061461, iteration: 444899
loss: 1.002747893333435,grad_norm: 0.8254785152952799, iteration: 444900
loss: 1.0044598579406738,grad_norm: 0.7062678202909057, iteration: 444901
loss: 1.1350995302200317,grad_norm: 0.9999992816338579, iteration: 444902
loss: 0.9908291101455688,grad_norm: 0.7315933970563016, iteration: 444903
loss: 1.0202919244766235,grad_norm: 0.9970788774885241, iteration: 444904
loss: 0.957481861114502,grad_norm: 0.881364692235048, iteration: 444905
loss: 1.0166722536087036,grad_norm: 0.7770412112448867, iteration: 444906
loss: 1.0760610103607178,grad_norm: 0.8672652181031393, iteration: 444907
loss: 1.1034040451049805,grad_norm: 0.9999991341830761, iteration: 444908
loss: 1.0185043811798096,grad_norm: 0.9999997285186332, iteration: 444909
loss: 1.0426241159439087,grad_norm: 0.8688794112241955, iteration: 444910
loss: 0.9670885801315308,grad_norm: 0.6435205546579119, iteration: 444911
loss: 1.0532859563827515,grad_norm: 0.9978234134635582, iteration: 444912
loss: 1.0823206901550293,grad_norm: 0.8339720455263171, iteration: 444913
loss: 1.000780463218689,grad_norm: 0.838387235916287, iteration: 444914
loss: 1.0016453266143799,grad_norm: 0.6602341713894833, iteration: 444915
loss: 1.0130399465560913,grad_norm: 0.7203929285576207, iteration: 444916
loss: 1.0200704336166382,grad_norm: 0.8903700091138793, iteration: 444917
loss: 1.006222128868103,grad_norm: 0.8403774286058572, iteration: 444918
loss: 0.9994162321090698,grad_norm: 0.9999994746140817, iteration: 444919
loss: 1.0262064933776855,grad_norm: 0.9183777799648322, iteration: 444920
loss: 1.0142508745193481,grad_norm: 0.8216670490819344, iteration: 444921
loss: 0.9932115077972412,grad_norm: 0.8591135725906694, iteration: 444922
loss: 0.9791229367256165,grad_norm: 0.6456488129558857, iteration: 444923
loss: 1.028477430343628,grad_norm: 0.7376069535231246, iteration: 444924
loss: 0.9765832424163818,grad_norm: 0.7827938645140589, iteration: 444925
loss: 1.0021926164627075,grad_norm: 0.677598804321222, iteration: 444926
loss: 1.0098121166229248,grad_norm: 0.6544899042227896, iteration: 444927
loss: 1.0533534288406372,grad_norm: 0.7617404789169047, iteration: 444928
loss: 1.076432228088379,grad_norm: 0.9304997613439178, iteration: 444929
loss: 1.024825930595398,grad_norm: 0.999999828811219, iteration: 444930
loss: 0.969470739364624,grad_norm: 0.6961403170416303, iteration: 444931
loss: 1.0071972608566284,grad_norm: 0.7842325740404894, iteration: 444932
loss: 1.0174833536148071,grad_norm: 0.7300365530645061, iteration: 444933
loss: 1.0223969221115112,grad_norm: 0.725638007666687, iteration: 444934
loss: 0.9869939684867859,grad_norm: 0.8092225261166505, iteration: 444935
loss: 1.0231719017028809,grad_norm: 0.9575141948142886, iteration: 444936
loss: 1.1928642988204956,grad_norm: 0.9999995532685989, iteration: 444937
loss: 1.0361875295639038,grad_norm: 0.8935693697514516, iteration: 444938
loss: 0.9826481342315674,grad_norm: 0.7402275372949775, iteration: 444939
loss: 1.029860496520996,grad_norm: 0.6719618586551552, iteration: 444940
loss: 1.35610830783844,grad_norm: 0.9999997245150615, iteration: 444941
loss: 1.0165010690689087,grad_norm: 0.6778941969332102, iteration: 444942
loss: 0.9539028406143188,grad_norm: 0.7069685544686608, iteration: 444943
loss: 1.0314396619796753,grad_norm: 0.7537863362839543, iteration: 444944
loss: 1.0320156812667847,grad_norm: 0.7132201099006501, iteration: 444945
loss: 0.9798478484153748,grad_norm: 0.8190616299633428, iteration: 444946
loss: 0.9718756079673767,grad_norm: 0.6952746298388431, iteration: 444947
loss: 0.9694502353668213,grad_norm: 0.7622113806011142, iteration: 444948
loss: 1.0061343908309937,grad_norm: 0.9999991470258949, iteration: 444949
loss: 0.9846050143241882,grad_norm: 0.8450453303177166, iteration: 444950
loss: 1.02690589427948,grad_norm: 0.9999991473847272, iteration: 444951
loss: 0.9734451174736023,grad_norm: 0.7311175668434727, iteration: 444952
loss: 1.0072650909423828,grad_norm: 0.7911088757759506, iteration: 444953
loss: 1.013163685798645,grad_norm: 0.9999992495791475, iteration: 444954
loss: 1.0067946910858154,grad_norm: 0.8350216877164058, iteration: 444955
loss: 1.0417029857635498,grad_norm: 0.8222396141558137, iteration: 444956
loss: 0.9595041871070862,grad_norm: 0.9625885358799704, iteration: 444957
loss: 0.9854199886322021,grad_norm: 0.7679006552249246, iteration: 444958
loss: 1.0212277173995972,grad_norm: 0.6242873850208833, iteration: 444959
loss: 0.983713686466217,grad_norm: 0.7866317807540092, iteration: 444960
loss: 0.9919589161872864,grad_norm: 0.8254246697889448, iteration: 444961
loss: 1.0262739658355713,grad_norm: 0.612814186437141, iteration: 444962
loss: 0.9948086142539978,grad_norm: 0.771413915200464, iteration: 444963
loss: 1.0077168941497803,grad_norm: 0.6831566191748947, iteration: 444964
loss: 1.025195598602295,grad_norm: 0.9999999601032199, iteration: 444965
loss: 0.9827244281768799,grad_norm: 0.9999991554471431, iteration: 444966
loss: 0.9869628548622131,grad_norm: 0.6602471359797679, iteration: 444967
loss: 0.9677290320396423,grad_norm: 0.9401629340457747, iteration: 444968
loss: 1.0008797645568848,grad_norm: 0.8358809688922223, iteration: 444969
loss: 0.9846459031105042,grad_norm: 0.7604006049771801, iteration: 444970
loss: 1.0274091958999634,grad_norm: 0.999999608980693, iteration: 444971
loss: 1.0472664833068848,grad_norm: 0.8632458558181995, iteration: 444972
loss: 1.0294784307479858,grad_norm: 0.865784706658664, iteration: 444973
loss: 1.1449382305145264,grad_norm: 0.9999991955867601, iteration: 444974
loss: 1.0029746294021606,grad_norm: 0.8168059010088647, iteration: 444975
loss: 0.9995667338371277,grad_norm: 0.7703084997436641, iteration: 444976
loss: 0.9998077750205994,grad_norm: 0.989503537488757, iteration: 444977
loss: 1.0030925273895264,grad_norm: 0.9999994261552356, iteration: 444978
loss: 1.0378235578536987,grad_norm: 0.8721761423867613, iteration: 444979
loss: 1.008913278579712,grad_norm: 0.7845737003467985, iteration: 444980
loss: 0.9985870718955994,grad_norm: 0.7456054879007393, iteration: 444981
loss: 1.01483154296875,grad_norm: 0.8958795455072325, iteration: 444982
loss: 1.0418931245803833,grad_norm: 0.9999991994912375, iteration: 444983
loss: 1.0405306816101074,grad_norm: 0.905293292118208, iteration: 444984
loss: 1.0203779935836792,grad_norm: 0.7515726743766413, iteration: 444985
loss: 1.0134690999984741,grad_norm: 0.7255174729319936, iteration: 444986
loss: 1.0161316394805908,grad_norm: 0.6184867718930234, iteration: 444987
loss: 1.026005744934082,grad_norm: 0.8128998704206168, iteration: 444988
loss: 1.0159832239151,grad_norm: 0.8221079422643011, iteration: 444989
loss: 1.0232701301574707,grad_norm: 0.8013425291536139, iteration: 444990
loss: 0.9952363967895508,grad_norm: 0.9999990429967895, iteration: 444991
loss: 1.0039938688278198,grad_norm: 0.8589170899024404, iteration: 444992
loss: 1.0294809341430664,grad_norm: 0.8721477732505222, iteration: 444993
loss: 0.9721526503562927,grad_norm: 0.6362799766780339, iteration: 444994
loss: 0.9955365657806396,grad_norm: 0.7584426857386665, iteration: 444995
loss: 0.9787935018539429,grad_norm: 0.6515986209153081, iteration: 444996
loss: 0.9931538701057434,grad_norm: 0.7003982313294662, iteration: 444997
loss: 1.0006349086761475,grad_norm: 0.7842272763223631, iteration: 444998
loss: 1.012024998664856,grad_norm: 0.7635046501354058, iteration: 444999
loss: 1.0136748552322388,grad_norm: 0.68000723187014, iteration: 445000
loss: 1.0042152404785156,grad_norm: 0.9163401048633513, iteration: 445001
loss: 1.0360333919525146,grad_norm: 0.999999765779956, iteration: 445002
loss: 1.0291308164596558,grad_norm: 0.981767358201478, iteration: 445003
loss: 1.016088604927063,grad_norm: 0.6983146351886321, iteration: 445004
loss: 0.9828894138336182,grad_norm: 0.604915451161794, iteration: 445005
loss: 1.0285667181015015,grad_norm: 0.7099077694344934, iteration: 445006
loss: 0.9680713415145874,grad_norm: 0.7746723961767811, iteration: 445007
loss: 1.0166441202163696,grad_norm: 0.7600737088701472, iteration: 445008
loss: 1.0204825401306152,grad_norm: 0.9999991392949336, iteration: 445009
loss: 0.9887728095054626,grad_norm: 0.8126304346510717, iteration: 445010
loss: 0.9681809544563293,grad_norm: 0.7957504573371179, iteration: 445011
loss: 0.9971102476119995,grad_norm: 0.7338536725178855, iteration: 445012
loss: 1.0044736862182617,grad_norm: 0.9231543514096985, iteration: 445013
loss: 0.9817208051681519,grad_norm: 0.8754430897033981, iteration: 445014
loss: 0.9839135408401489,grad_norm: 0.8134889944167562, iteration: 445015
loss: 0.9898897409439087,grad_norm: 0.7497814092307874, iteration: 445016
loss: 1.018532156944275,grad_norm: 0.9999990780129749, iteration: 445017
loss: 1.0279442071914673,grad_norm: 0.8229483122777596, iteration: 445018
loss: 1.0139435529708862,grad_norm: 0.8169949180544485, iteration: 445019
loss: 1.0211162567138672,grad_norm: 0.9416734820718056, iteration: 445020
loss: 1.0196161270141602,grad_norm: 0.7417928694989446, iteration: 445021
loss: 1.0301244258880615,grad_norm: 0.706845003187425, iteration: 445022
loss: 1.0236872434616089,grad_norm: 0.8488760393080049, iteration: 445023
loss: 0.9652336239814758,grad_norm: 0.7798093884682609, iteration: 445024
loss: 1.0073039531707764,grad_norm: 0.7950813981817615, iteration: 445025
loss: 1.0168867111206055,grad_norm: 0.6895418481004058, iteration: 445026
loss: 1.0137826204299927,grad_norm: 0.75433833159711, iteration: 445027
loss: 0.9898927211761475,grad_norm: 0.7895983803354101, iteration: 445028
loss: 0.9822801351547241,grad_norm: 0.7016463631933, iteration: 445029
loss: 0.9783559441566467,grad_norm: 0.7767908989199012, iteration: 445030
loss: 1.0087496042251587,grad_norm: 0.6782281842828641, iteration: 445031
loss: 0.9678128361701965,grad_norm: 0.872611665931451, iteration: 445032
loss: 0.9942541122436523,grad_norm: 0.9703116234004286, iteration: 445033
loss: 0.9854612946510315,grad_norm: 0.954005641875982, iteration: 445034
loss: 1.0246007442474365,grad_norm: 0.6705184214465205, iteration: 445035
loss: 0.9910808801651001,grad_norm: 0.7325236196192789, iteration: 445036
loss: 1.0098545551300049,grad_norm: 0.9971072197457578, iteration: 445037
loss: 0.974671483039856,grad_norm: 0.7940927950350006, iteration: 445038
loss: 1.0345755815505981,grad_norm: 0.7718369309901628, iteration: 445039
loss: 1.0416359901428223,grad_norm: 0.9151846450571464, iteration: 445040
loss: 1.0003114938735962,grad_norm: 0.7952062391843692, iteration: 445041
loss: 1.0215681791305542,grad_norm: 0.7152308958028393, iteration: 445042
loss: 1.0225034952163696,grad_norm: 0.8037077958642966, iteration: 445043
loss: 1.0406625270843506,grad_norm: 0.7990238282983121, iteration: 445044
loss: 1.0521680116653442,grad_norm: 0.831138623154103, iteration: 445045
loss: 1.0118303298950195,grad_norm: 0.9121010322767296, iteration: 445046
loss: 1.0019330978393555,grad_norm: 0.7514589112096539, iteration: 445047
loss: 0.9848451614379883,grad_norm: 0.8100510931823048, iteration: 445048
loss: 0.9875146150588989,grad_norm: 0.7580722498823829, iteration: 445049
loss: 1.0285850763320923,grad_norm: 0.9273256221311377, iteration: 445050
loss: 1.030946969985962,grad_norm: 0.8585928584447708, iteration: 445051
loss: 1.0350403785705566,grad_norm: 0.9999997678975973, iteration: 445052
loss: 0.991768479347229,grad_norm: 0.7934007520977412, iteration: 445053
loss: 0.9864100813865662,grad_norm: 0.8202027179498917, iteration: 445054
loss: 0.9606027603149414,grad_norm: 0.8152274901989952, iteration: 445055
loss: 1.0931377410888672,grad_norm: 0.999999147796131, iteration: 445056
loss: 1.0128672122955322,grad_norm: 0.6800068121865739, iteration: 445057
loss: 1.0146552324295044,grad_norm: 0.7942577719018927, iteration: 445058
loss: 1.0271129608154297,grad_norm: 0.7603898729455902, iteration: 445059
loss: 1.0300604104995728,grad_norm: 0.9999991490066287, iteration: 445060
loss: 1.0888786315917969,grad_norm: 0.9999993128755503, iteration: 445061
loss: 0.9805927872657776,grad_norm: 0.8594611434284111, iteration: 445062
loss: 0.9875576496124268,grad_norm: 0.7007171413051472, iteration: 445063
loss: 0.983615517616272,grad_norm: 0.6052500787020926, iteration: 445064
loss: 0.9884596467018127,grad_norm: 0.9999990244203161, iteration: 445065
loss: 0.9879827499389648,grad_norm: 0.6868162723717298, iteration: 445066
loss: 0.9656431078910828,grad_norm: 0.8609381150382492, iteration: 445067
loss: 0.9590230584144592,grad_norm: 0.9999990621176077, iteration: 445068
loss: 0.9884708523750305,grad_norm: 0.6971464971181721, iteration: 445069
loss: 1.0142635107040405,grad_norm: 0.8224263884230731, iteration: 445070
loss: 1.0306761264801025,grad_norm: 0.9999992982121622, iteration: 445071
loss: 0.9755430221557617,grad_norm: 0.8756984429725371, iteration: 445072
loss: 1.0209192037582397,grad_norm: 0.7383691197217988, iteration: 445073
loss: 1.0072124004364014,grad_norm: 0.7228502094364434, iteration: 445074
loss: 1.00196373462677,grad_norm: 0.7509898846261246, iteration: 445075
loss: 1.0189943313598633,grad_norm: 0.7501128617408452, iteration: 445076
loss: 1.0036426782608032,grad_norm: 0.6977597395751837, iteration: 445077
loss: 0.9901513457298279,grad_norm: 0.701467152881144, iteration: 445078
loss: 0.9961461424827576,grad_norm: 0.7669078325178159, iteration: 445079
loss: 0.9878703355789185,grad_norm: 0.6562852879149979, iteration: 445080
loss: 0.9821918606758118,grad_norm: 0.911661116463819, iteration: 445081
loss: 0.9841756820678711,grad_norm: 0.8144277274540311, iteration: 445082
loss: 1.0072427988052368,grad_norm: 0.7701976706976393, iteration: 445083
loss: 1.027098298072815,grad_norm: 0.7110144741793789, iteration: 445084
loss: 0.954451858997345,grad_norm: 0.6715148435770669, iteration: 445085
loss: 0.9974573254585266,grad_norm: 0.7001593697668953, iteration: 445086
loss: 1.0159626007080078,grad_norm: 0.8210076817610891, iteration: 445087
loss: 1.0208160877227783,grad_norm: 0.7142987212325551, iteration: 445088
loss: 1.0393444299697876,grad_norm: 0.7201327685950183, iteration: 445089
loss: 0.9970150589942932,grad_norm: 0.75883491484277, iteration: 445090
loss: 0.9831781387329102,grad_norm: 0.789711133907261, iteration: 445091
loss: 1.0190719366073608,grad_norm: 0.7912556681609916, iteration: 445092
loss: 1.0079656839370728,grad_norm: 0.8410870002730964, iteration: 445093
loss: 1.0445146560668945,grad_norm: 0.9999990787061597, iteration: 445094
loss: 1.013136863708496,grad_norm: 0.7082443507177419, iteration: 445095
loss: 1.0409698486328125,grad_norm: 0.6808325762516307, iteration: 445096
loss: 1.0189390182495117,grad_norm: 0.741748939790056, iteration: 445097
loss: 1.0067616701126099,grad_norm: 0.702021273072274, iteration: 445098
loss: 1.0332592725753784,grad_norm: 0.9573888984415616, iteration: 445099
loss: 1.0282124280929565,grad_norm: 0.8315200076757756, iteration: 445100
loss: 1.0781536102294922,grad_norm: 0.8937148329826781, iteration: 445101
loss: 1.030348300933838,grad_norm: 0.9999997378077069, iteration: 445102
loss: 1.0693737268447876,grad_norm: 0.8270817124871409, iteration: 445103
loss: 1.0189487934112549,grad_norm: 0.7969325014556038, iteration: 445104
loss: 0.9563264846801758,grad_norm: 0.8739150619688676, iteration: 445105
loss: 0.9858500957489014,grad_norm: 0.7544410097415841, iteration: 445106
loss: 1.0193092823028564,grad_norm: 0.9999997675965724, iteration: 445107
loss: 1.0147637128829956,grad_norm: 0.9999989977353649, iteration: 445108
loss: 1.0059958696365356,grad_norm: 0.7767447991281269, iteration: 445109
loss: 0.9830562472343445,grad_norm: 0.9999989896035866, iteration: 445110
loss: 0.9955629706382751,grad_norm: 0.6960474434606622, iteration: 445111
loss: 0.9628856778144836,grad_norm: 0.6991029935950727, iteration: 445112
loss: 1.0081943273544312,grad_norm: 0.7416160920439335, iteration: 445113
loss: 0.9769619703292847,grad_norm: 0.7406101040012486, iteration: 445114
loss: 1.0136061906814575,grad_norm: 0.9013631431228416, iteration: 445115
loss: 1.013121485710144,grad_norm: 0.9302253324231256, iteration: 445116
loss: 1.031912922859192,grad_norm: 0.7558702559280511, iteration: 445117
loss: 0.9855149984359741,grad_norm: 0.9180477978433589, iteration: 445118
loss: 0.9605708122253418,grad_norm: 0.7730274120008882, iteration: 445119
loss: 1.0008151531219482,grad_norm: 0.720740703816463, iteration: 445120
loss: 1.0112550258636475,grad_norm: 0.9999992612571174, iteration: 445121
loss: 0.983795166015625,grad_norm: 0.6420807198385305, iteration: 445122
loss: 0.9561402201652527,grad_norm: 0.8877954807858309, iteration: 445123
loss: 1.0062974691390991,grad_norm: 0.8999260099829847, iteration: 445124
loss: 1.0226404666900635,grad_norm: 0.6424695517250395, iteration: 445125
loss: 0.9812387228012085,grad_norm: 0.999999621236163, iteration: 445126
loss: 0.9919251203536987,grad_norm: 0.758236618779856, iteration: 445127
loss: 1.0155909061431885,grad_norm: 0.8147214773040978, iteration: 445128
loss: 1.0230987071990967,grad_norm: 0.8074908518811658, iteration: 445129
loss: 0.9798168540000916,grad_norm: 0.7653011005054077, iteration: 445130
loss: 0.9984527230262756,grad_norm: 0.651030396037719, iteration: 445131
loss: 0.9905754327774048,grad_norm: 0.6912020675837016, iteration: 445132
loss: 1.0131843090057373,grad_norm: 0.999999584204719, iteration: 445133
loss: 1.0013988018035889,grad_norm: 0.8236658820079089, iteration: 445134
loss: 0.9917871952056885,grad_norm: 0.7379919342683063, iteration: 445135
loss: 0.9803166389465332,grad_norm: 0.5471403994351316, iteration: 445136
loss: 0.9997696280479431,grad_norm: 0.8094063213672048, iteration: 445137
loss: 0.9844085574150085,grad_norm: 0.7237686565111343, iteration: 445138
loss: 0.9874474406242371,grad_norm: 0.8479732656900623, iteration: 445139
loss: 0.9842113256454468,grad_norm: 0.8324176418839501, iteration: 445140
loss: 1.0179381370544434,grad_norm: 0.6786268955008307, iteration: 445141
loss: 0.9710647463798523,grad_norm: 0.8229045646728469, iteration: 445142
loss: 0.9943058490753174,grad_norm: 0.9999991932156835, iteration: 445143
loss: 1.0041261911392212,grad_norm: 0.7610250584646583, iteration: 445144
loss: 1.005733609199524,grad_norm: 0.8612873543792988, iteration: 445145
loss: 1.004422903060913,grad_norm: 0.9590617575928614, iteration: 445146
loss: 0.9788590669631958,grad_norm: 0.7408787948547889, iteration: 445147
loss: 0.9992347955703735,grad_norm: 0.8753170040768431, iteration: 445148
loss: 1.0062108039855957,grad_norm: 0.8124291279849097, iteration: 445149
loss: 0.9877411723136902,grad_norm: 0.7501295956699937, iteration: 445150
loss: 1.0313265323638916,grad_norm: 0.711608959068497, iteration: 445151
loss: 0.9975281357765198,grad_norm: 0.7242207731349496, iteration: 445152
loss: 1.01529860496521,grad_norm: 0.6755427937442917, iteration: 445153
loss: 1.0947092771530151,grad_norm: 0.9999991553485094, iteration: 445154
loss: 0.9899382591247559,grad_norm: 0.8955660971695626, iteration: 445155
loss: 1.0129756927490234,grad_norm: 0.7542129101788078, iteration: 445156
loss: 1.0126607418060303,grad_norm: 0.9421322384228038, iteration: 445157
loss: 1.0105684995651245,grad_norm: 0.7638745661994616, iteration: 445158
loss: 0.9670072197914124,grad_norm: 0.7574492276932683, iteration: 445159
loss: 0.9843643307685852,grad_norm: 0.8491433858825294, iteration: 445160
loss: 0.9734376072883606,grad_norm: 0.7131896845454028, iteration: 445161
loss: 1.0267740488052368,grad_norm: 0.7299302388347554, iteration: 445162
loss: 1.0130642652511597,grad_norm: 0.7110697786545583, iteration: 445163
loss: 1.0028297901153564,grad_norm: 0.8455843804195682, iteration: 445164
loss: 0.9806981086730957,grad_norm: 0.8519188394345947, iteration: 445165
loss: 0.9905490875244141,grad_norm: 0.8173147712028795, iteration: 445166
loss: 1.0079655647277832,grad_norm: 0.7831745487224774, iteration: 445167
loss: 0.9796842336654663,grad_norm: 0.8789408848651824, iteration: 445168
loss: 0.9993047118186951,grad_norm: 0.7060092004832672, iteration: 445169
loss: 0.9781181812286377,grad_norm: 0.7931145789716703, iteration: 445170
loss: 1.0259219408035278,grad_norm: 0.7304222023877536, iteration: 445171
loss: 0.9958451986312866,grad_norm: 0.7298210817607605, iteration: 445172
loss: 1.0273422002792358,grad_norm: 0.7918492396942093, iteration: 445173
loss: 0.9935939908027649,grad_norm: 0.9061868013932914, iteration: 445174
loss: 1.0097626447677612,grad_norm: 0.6810200150093426, iteration: 445175
loss: 0.9801905751228333,grad_norm: 0.7966088303473113, iteration: 445176
loss: 1.1012850999832153,grad_norm: 0.8525880223498041, iteration: 445177
loss: 1.0170843601226807,grad_norm: 0.703344420097919, iteration: 445178
loss: 0.9339504837989807,grad_norm: 0.7509637680487466, iteration: 445179
loss: 0.9873619675636292,grad_norm: 0.7793968015684298, iteration: 445180
loss: 0.9373610615730286,grad_norm: 0.8605474089112334, iteration: 445181
loss: 1.0153888463974,grad_norm: 0.8153232824569306, iteration: 445182
loss: 0.9746763706207275,grad_norm: 0.6879846417630568, iteration: 445183
loss: 1.034834384918213,grad_norm: 0.7405847872907048, iteration: 445184
loss: 0.9933056831359863,grad_norm: 0.785129110953772, iteration: 445185
loss: 0.976200520992279,grad_norm: 0.7992318560450223, iteration: 445186
loss: 1.0258394479751587,grad_norm: 0.8243478241188609, iteration: 445187
loss: 0.9974881410598755,grad_norm: 0.7778894270036076, iteration: 445188
loss: 0.9890820384025574,grad_norm: 0.999999622881588, iteration: 445189
loss: 0.9980957508087158,grad_norm: 0.8128088231503164, iteration: 445190
loss: 1.0026620626449585,grad_norm: 0.6884611659679548, iteration: 445191
loss: 0.9817563891410828,grad_norm: 0.781008494835177, iteration: 445192
loss: 1.022275447845459,grad_norm: 0.9999992170062111, iteration: 445193
loss: 1.0009406805038452,grad_norm: 0.9589779235544822, iteration: 445194
loss: 1.0092833042144775,grad_norm: 0.8130378174670557, iteration: 445195
loss: 1.0080219507217407,grad_norm: 0.698953331167211, iteration: 445196
loss: 0.974591851234436,grad_norm: 0.6917779903475142, iteration: 445197
loss: 1.013087272644043,grad_norm: 0.6135325603818123, iteration: 445198
loss: 1.0390377044677734,grad_norm: 0.7580113746410198, iteration: 445199
loss: 0.9847646355628967,grad_norm: 0.8442507803126866, iteration: 445200
loss: 1.1507259607315063,grad_norm: 0.9999994417000694, iteration: 445201
loss: 0.958284854888916,grad_norm: 0.7501800112521205, iteration: 445202
loss: 1.0170924663543701,grad_norm: 0.7426284804863164, iteration: 445203
loss: 0.9669493436813354,grad_norm: 0.8939675731061669, iteration: 445204
loss: 1.0158601999282837,grad_norm: 0.7979659159093033, iteration: 445205
loss: 0.9819474220275879,grad_norm: 0.8487660351788002, iteration: 445206
loss: 0.9978539943695068,grad_norm: 0.7614064164140558, iteration: 445207
loss: 1.017233967781067,grad_norm: 0.9292119436400493, iteration: 445208
loss: 1.0553773641586304,grad_norm: 0.999999291787819, iteration: 445209
loss: 1.0084075927734375,grad_norm: 0.7379130341364737, iteration: 445210
loss: 1.0052814483642578,grad_norm: 0.801799957069054, iteration: 445211
loss: 0.9710033535957336,grad_norm: 0.8165679058541596, iteration: 445212
loss: 0.9900029897689819,grad_norm: 0.9897590751095326, iteration: 445213
loss: 1.0383321046829224,grad_norm: 0.8471262001993619, iteration: 445214
loss: 1.0241179466247559,grad_norm: 0.6570834086511385, iteration: 445215
loss: 1.0001758337020874,grad_norm: 0.8784030836643784, iteration: 445216
loss: 1.0344778299331665,grad_norm: 0.7479571805536125, iteration: 445217
loss: 1.0096139907836914,grad_norm: 0.7779874818916529, iteration: 445218
loss: 1.0256075859069824,grad_norm: 0.8894695167941341, iteration: 445219
loss: 0.9625388383865356,grad_norm: 0.8266514699434357, iteration: 445220
loss: 1.0639294385910034,grad_norm: 0.999999450077818, iteration: 445221
loss: 0.9839693903923035,grad_norm: 0.802741522113748, iteration: 445222
loss: 1.019850254058838,grad_norm: 0.7899465773877651, iteration: 445223
loss: 0.9936410188674927,grad_norm: 0.8497244933322649, iteration: 445224
loss: 0.9873273968696594,grad_norm: 0.7276974311502263, iteration: 445225
loss: 0.9851575493812561,grad_norm: 0.8239759848960989, iteration: 445226
loss: 0.9922831654548645,grad_norm: 0.7031168938209513, iteration: 445227
loss: 1.1575225591659546,grad_norm: 0.9999997136017817, iteration: 445228
loss: 1.0035916566848755,grad_norm: 0.7877768244207966, iteration: 445229
loss: 0.9817415475845337,grad_norm: 0.8697646570533638, iteration: 445230
loss: 1.0122708082199097,grad_norm: 0.783928514532, iteration: 445231
loss: 1.0130149126052856,grad_norm: 0.7278562895862029, iteration: 445232
loss: 1.0122427940368652,grad_norm: 0.9999996383582446, iteration: 445233
loss: 1.0566117763519287,grad_norm: 0.74016596656452, iteration: 445234
loss: 0.9754815101623535,grad_norm: 0.83400345437952, iteration: 445235
loss: 0.9910793304443359,grad_norm: 0.7472078494246132, iteration: 445236
loss: 0.9936308264732361,grad_norm: 0.8416622306941233, iteration: 445237
loss: 1.0476468801498413,grad_norm: 0.9999998375094243, iteration: 445238
loss: 0.9634642601013184,grad_norm: 0.7842677902052486, iteration: 445239
loss: 1.138429880142212,grad_norm: 0.8933410606820863, iteration: 445240
loss: 1.0212812423706055,grad_norm: 0.6790827983686966, iteration: 445241
loss: 1.0010955333709717,grad_norm: 0.8158947731985581, iteration: 445242
loss: 1.0075510740280151,grad_norm: 0.7644100143428771, iteration: 445243
loss: 0.9728667736053467,grad_norm: 0.9846993484135649, iteration: 445244
loss: 1.006257176399231,grad_norm: 0.921880062685149, iteration: 445245
loss: 1.0087660551071167,grad_norm: 0.7273982430886885, iteration: 445246
loss: 0.9932604432106018,grad_norm: 0.7639985410986696, iteration: 445247
loss: 1.012338638305664,grad_norm: 0.6782035232413173, iteration: 445248
loss: 1.0143399238586426,grad_norm: 0.9565094626602361, iteration: 445249
loss: 0.9895090460777283,grad_norm: 0.9193628533759409, iteration: 445250
loss: 1.05148184299469,grad_norm: 0.879337667132001, iteration: 445251
loss: 1.037049651145935,grad_norm: 0.7151106377086502, iteration: 445252
loss: 1.0077348947525024,grad_norm: 0.6715727275217663, iteration: 445253
loss: 0.9899827837944031,grad_norm: 0.7290576747188159, iteration: 445254
loss: 1.009170413017273,grad_norm: 0.7267966977489196, iteration: 445255
loss: 1.0078372955322266,grad_norm: 0.70586928422544, iteration: 445256
loss: 1.020455241203308,grad_norm: 0.8954060586249409, iteration: 445257
loss: 1.0376067161560059,grad_norm: 0.7455009855413108, iteration: 445258
loss: 1.0248767137527466,grad_norm: 0.6765588627845808, iteration: 445259
loss: 1.0054374933242798,grad_norm: 0.7515907192759175, iteration: 445260
loss: 1.0103861093521118,grad_norm: 0.9999993753518253, iteration: 445261
loss: 1.0512202978134155,grad_norm: 0.783322714254416, iteration: 445262
loss: 1.001679539680481,grad_norm: 0.9780039884370633, iteration: 445263
loss: 0.9902846217155457,grad_norm: 0.7826848274902929, iteration: 445264
loss: 0.9777088761329651,grad_norm: 0.7436521329893375, iteration: 445265
loss: 1.0161644220352173,grad_norm: 0.7050959081336041, iteration: 445266
loss: 1.0099774599075317,grad_norm: 0.9894157007616198, iteration: 445267
loss: 0.9969403147697449,grad_norm: 0.6293425408094542, iteration: 445268
loss: 1.0200023651123047,grad_norm: 0.744025647867022, iteration: 445269
loss: 0.9859968423843384,grad_norm: 0.8537701318200641, iteration: 445270
loss: 1.0161302089691162,grad_norm: 0.7233067185036234, iteration: 445271
loss: 0.9857688546180725,grad_norm: 0.8665271556796862, iteration: 445272
loss: 0.9813829660415649,grad_norm: 0.8719360636940188, iteration: 445273
loss: 0.9918721914291382,grad_norm: 0.7486451041539999, iteration: 445274
loss: 0.9833530187606812,grad_norm: 0.75224433792841, iteration: 445275
loss: 1.0783251523971558,grad_norm: 0.9779891667064096, iteration: 445276
loss: 1.0253182649612427,grad_norm: 0.6930692893151895, iteration: 445277
loss: 1.0125668048858643,grad_norm: 0.7449098902578324, iteration: 445278
loss: 1.0290236473083496,grad_norm: 0.8437482068430993, iteration: 445279
loss: 1.0032304525375366,grad_norm: 0.7996635083788466, iteration: 445280
loss: 1.000536561012268,grad_norm: 0.6516479900460452, iteration: 445281
loss: 1.0616176128387451,grad_norm: 0.751189073352198, iteration: 445282
loss: 1.001508355140686,grad_norm: 0.999999667814713, iteration: 445283
loss: 0.9898183345794678,grad_norm: 0.8466592976595102, iteration: 445284
loss: 0.9802840352058411,grad_norm: 0.8342007189485977, iteration: 445285
loss: 0.9712137579917908,grad_norm: 0.7209015778413541, iteration: 445286
loss: 0.9866432547569275,grad_norm: 0.8397893169335927, iteration: 445287
loss: 0.9825569987297058,grad_norm: 0.9999989874348495, iteration: 445288
loss: 1.0164248943328857,grad_norm: 0.6855032646666784, iteration: 445289
loss: 1.0284154415130615,grad_norm: 0.9715896567862543, iteration: 445290
loss: 1.0055235624313354,grad_norm: 0.699208564837046, iteration: 445291
loss: 0.9883983135223389,grad_norm: 0.8077821768722322, iteration: 445292
loss: 0.9907135963439941,grad_norm: 0.7569907508381666, iteration: 445293
loss: 1.049222707748413,grad_norm: 0.9999990457379921, iteration: 445294
loss: 0.9717894196510315,grad_norm: 0.999999357038029, iteration: 445295
loss: 1.0172760486602783,grad_norm: 0.9999999989699013, iteration: 445296
loss: 0.987815797328949,grad_norm: 0.8335238836684263, iteration: 445297
loss: 0.9846075177192688,grad_norm: 0.7491642311796889, iteration: 445298
loss: 1.081238865852356,grad_norm: 0.9999995159456617, iteration: 445299
loss: 1.0074738264083862,grad_norm: 0.9501046335496953, iteration: 445300
loss: 1.0036453008651733,grad_norm: 0.6770427069482444, iteration: 445301
loss: 1.0232981443405151,grad_norm: 0.7679261297503713, iteration: 445302
loss: 0.9688645005226135,grad_norm: 0.824950292258251, iteration: 445303
loss: 0.990484893321991,grad_norm: 0.7363045471681251, iteration: 445304
loss: 1.0227302312850952,grad_norm: 0.7433479575271814, iteration: 445305
loss: 1.059674620628357,grad_norm: 0.8150082683160196, iteration: 445306
loss: 1.077504277229309,grad_norm: 0.9999992176830195, iteration: 445307
loss: 0.9894156455993652,grad_norm: 0.8042096305720358, iteration: 445308
loss: 0.9910737872123718,grad_norm: 0.7560917018436726, iteration: 445309
loss: 0.978255569934845,grad_norm: 0.8627916366136655, iteration: 445310
loss: 1.0333659648895264,grad_norm: 0.8040655804149046, iteration: 445311
loss: 1.0070801973342896,grad_norm: 0.8437779837177136, iteration: 445312
loss: 1.0367372035980225,grad_norm: 0.6346321427720587, iteration: 445313
loss: 0.999646782875061,grad_norm: 0.9626601411013473, iteration: 445314
loss: 1.0189909934997559,grad_norm: 0.8722164745541364, iteration: 445315
loss: 1.0283478498458862,grad_norm: 0.860714631236387, iteration: 445316
loss: 0.9852962493896484,grad_norm: 0.7906090324488312, iteration: 445317
loss: 0.9908455610275269,grad_norm: 0.7290850853996286, iteration: 445318
loss: 1.022038459777832,grad_norm: 0.7895861611814381, iteration: 445319
loss: 1.0191395282745361,grad_norm: 0.8689383962114391, iteration: 445320
loss: 1.1394046545028687,grad_norm: 0.7756932216124386, iteration: 445321
loss: 1.000664472579956,grad_norm: 0.7543400404680036, iteration: 445322
loss: 1.0316517353057861,grad_norm: 0.9999993509930194, iteration: 445323
loss: 1.0150432586669922,grad_norm: 0.7570157778603694, iteration: 445324
loss: 1.0069270133972168,grad_norm: 0.7247671839691908, iteration: 445325
loss: 1.0103377103805542,grad_norm: 0.8063530967213992, iteration: 445326
loss: 0.9729261994361877,grad_norm: 0.7452729640551653, iteration: 445327
loss: 1.0235673189163208,grad_norm: 0.8301551510618207, iteration: 445328
loss: 1.0220603942871094,grad_norm: 0.8351748155017147, iteration: 445329
loss: 0.976389467716217,grad_norm: 0.7844632745640632, iteration: 445330
loss: 0.9963095188140869,grad_norm: 0.7131848779729074, iteration: 445331
loss: 1.0354700088500977,grad_norm: 0.8125712339384871, iteration: 445332
loss: 0.9899730086326599,grad_norm: 0.6277160811848401, iteration: 445333
loss: 0.9862103462219238,grad_norm: 0.685675075821425, iteration: 445334
loss: 1.0274142026901245,grad_norm: 0.9321733164002677, iteration: 445335
loss: 0.9950165748596191,grad_norm: 0.9352275842388739, iteration: 445336
loss: 1.004686713218689,grad_norm: 0.7601186702153109, iteration: 445337
loss: 0.9921101927757263,grad_norm: 0.70346400228375, iteration: 445338
loss: 0.9727761149406433,grad_norm: 0.9745052560535306, iteration: 445339
loss: 1.0429385900497437,grad_norm: 0.7604554678103033, iteration: 445340
loss: 0.9890085458755493,grad_norm: 0.7200960317113713, iteration: 445341
loss: 1.0137004852294922,grad_norm: 0.7266624819706582, iteration: 445342
loss: 1.021251916885376,grad_norm: 0.8153844114510795, iteration: 445343
loss: 0.9872204661369324,grad_norm: 0.9027043938862577, iteration: 445344
loss: 1.0438920259475708,grad_norm: 0.7647063811227666, iteration: 445345
loss: 1.064104437828064,grad_norm: 0.8424945226230411, iteration: 445346
loss: 0.9730477929115295,grad_norm: 0.8110600361397564, iteration: 445347
loss: 0.98316890001297,grad_norm: 0.7107785469606899, iteration: 445348
loss: 0.9808990359306335,grad_norm: 0.701290600655713, iteration: 445349
loss: 1.0761969089508057,grad_norm: 0.9999997501783794, iteration: 445350
loss: 0.9957484006881714,grad_norm: 0.7670756228886938, iteration: 445351
loss: 0.9992967247962952,grad_norm: 0.6544146161383759, iteration: 445352
loss: 1.030870795249939,grad_norm: 0.691453412462329, iteration: 445353
loss: 1.012076735496521,grad_norm: 0.8123711312242955, iteration: 445354
loss: 1.0156329870224,grad_norm: 0.9400948444260357, iteration: 445355
loss: 0.9661164283752441,grad_norm: 0.7886203201088986, iteration: 445356
loss: 1.0066959857940674,grad_norm: 0.8338654699281458, iteration: 445357
loss: 0.9597795009613037,grad_norm: 0.8075156454833317, iteration: 445358
loss: 0.9912528395652771,grad_norm: 0.9322628848157272, iteration: 445359
loss: 1.0436376333236694,grad_norm: 0.8562404787458993, iteration: 445360
loss: 1.0346336364746094,grad_norm: 0.9999992364040644, iteration: 445361
loss: 0.9767189621925354,grad_norm: 0.7127456404189697, iteration: 445362
loss: 1.0026183128356934,grad_norm: 0.8946371025277082, iteration: 445363
loss: 1.0040110349655151,grad_norm: 0.7200816999873279, iteration: 445364
loss: 0.9727136492729187,grad_norm: 0.7974285112067314, iteration: 445365
loss: 1.0286240577697754,grad_norm: 0.9999992371076798, iteration: 445366
loss: 1.0377166271209717,grad_norm: 0.9003352194592318, iteration: 445367
loss: 0.9775470495223999,grad_norm: 0.8303340986511747, iteration: 445368
loss: 0.9877423048019409,grad_norm: 0.999999147196135, iteration: 445369
loss: 0.9644395709037781,grad_norm: 0.7589316304724587, iteration: 445370
loss: 0.9804587960243225,grad_norm: 0.7410300749361718, iteration: 445371
loss: 1.0064610242843628,grad_norm: 0.6350145735274788, iteration: 445372
loss: 0.9854428172111511,grad_norm: 0.7288234031543397, iteration: 445373
loss: 1.0251647233963013,grad_norm: 0.8604291638876947, iteration: 445374
loss: 0.9985303282737732,grad_norm: 0.7765733475866147, iteration: 445375
loss: 1.0252892971038818,grad_norm: 0.8500820125928601, iteration: 445376
loss: 1.026922583580017,grad_norm: 0.7574321247839477, iteration: 445377
loss: 1.0171809196472168,grad_norm: 0.8992328893734755, iteration: 445378
loss: 0.981658399105072,grad_norm: 0.7826162137074087, iteration: 445379
loss: 0.9495654702186584,grad_norm: 0.736800727690451, iteration: 445380
loss: 0.9680825471878052,grad_norm: 0.7835872865186231, iteration: 445381
loss: 1.0222684144973755,grad_norm: 0.6640127165449042, iteration: 445382
loss: 1.0151687860488892,grad_norm: 0.8626537455553479, iteration: 445383
loss: 1.0301755666732788,grad_norm: 0.8063807392303262, iteration: 445384
loss: 0.9938583970069885,grad_norm: 0.6835415819502483, iteration: 445385
loss: 0.9951282739639282,grad_norm: 0.9999997568820344, iteration: 445386
loss: 0.961622953414917,grad_norm: 0.7494353736056689, iteration: 445387
loss: 0.9862112998962402,grad_norm: 0.6775267391719844, iteration: 445388
loss: 1.1088950634002686,grad_norm: 0.9999994654995249, iteration: 445389
loss: 0.976287841796875,grad_norm: 0.7051369294098786, iteration: 445390
loss: 1.0291922092437744,grad_norm: 0.999999141266136, iteration: 445391
loss: 1.0054137706756592,grad_norm: 0.6803499558527274, iteration: 445392
loss: 0.947738528251648,grad_norm: 0.6300590652219287, iteration: 445393
loss: 1.0561734437942505,grad_norm: 0.811548271616774, iteration: 445394
loss: 1.0453137159347534,grad_norm: 0.7978211609289436, iteration: 445395
loss: 1.020967960357666,grad_norm: 0.9174190521749662, iteration: 445396
loss: 0.9837285876274109,grad_norm: 0.8661654747641471, iteration: 445397
loss: 1.0058521032333374,grad_norm: 0.8522557704429579, iteration: 445398
loss: 1.0226489305496216,grad_norm: 0.6986217282263725, iteration: 445399
loss: 1.0184017419815063,grad_norm: 0.542693393414587, iteration: 445400
loss: 1.0029406547546387,grad_norm: 0.9999990629341422, iteration: 445401
loss: 0.960116446018219,grad_norm: 0.925647585814042, iteration: 445402
loss: 1.0153204202651978,grad_norm: 0.9635066380915016, iteration: 445403
loss: 0.993297815322876,grad_norm: 0.8615460022784822, iteration: 445404
loss: 1.1190606355667114,grad_norm: 0.7813801962397895, iteration: 445405
loss: 0.9447463154792786,grad_norm: 0.8037600629706848, iteration: 445406
loss: 1.000794529914856,grad_norm: 0.7587930583161467, iteration: 445407
loss: 0.9955591559410095,grad_norm: 0.7798822732075689, iteration: 445408
loss: 1.001574993133545,grad_norm: 0.9999994734683451, iteration: 445409
loss: 1.0126934051513672,grad_norm: 0.6522438034995396, iteration: 445410
loss: 0.9914426803588867,grad_norm: 0.6766175743854858, iteration: 445411
loss: 0.9847260117530823,grad_norm: 0.9243158225053555, iteration: 445412
loss: 1.0670356750488281,grad_norm: 0.9999992654540979, iteration: 445413
loss: 1.0560719966888428,grad_norm: 0.9593240504120305, iteration: 445414
loss: 0.9771482944488525,grad_norm: 0.744028929122366, iteration: 445415
loss: 0.9981772303581238,grad_norm: 0.7531319184638947, iteration: 445416
loss: 1.0014415979385376,grad_norm: 0.802395394697962, iteration: 445417
loss: 0.9636707305908203,grad_norm: 0.7951609410769804, iteration: 445418
loss: 1.0188417434692383,grad_norm: 0.8168625758491967, iteration: 445419
loss: 1.0427778959274292,grad_norm: 0.7091227249142861, iteration: 445420
loss: 1.006470799446106,grad_norm: 0.7977336646600796, iteration: 445421
loss: 0.980294406414032,grad_norm: 0.7534961053395499, iteration: 445422
loss: 1.008522629737854,grad_norm: 0.8883205957424248, iteration: 445423
loss: 0.9923366904258728,grad_norm: 0.7010041359431558, iteration: 445424
loss: 1.0815223455429077,grad_norm: 0.8443479736128325, iteration: 445425
loss: 1.038390874862671,grad_norm: 0.7668559887133946, iteration: 445426
loss: 0.9910755753517151,grad_norm: 0.7433410258880611, iteration: 445427
loss: 1.0028990507125854,grad_norm: 0.9290861291767905, iteration: 445428
loss: 0.9830055236816406,grad_norm: 0.8602339226216472, iteration: 445429
loss: 0.9857727885246277,grad_norm: 0.6731294987776525, iteration: 445430
loss: 0.9712106585502625,grad_norm: 0.7321003728156491, iteration: 445431
loss: 0.9955064654350281,grad_norm: 0.6647677316299873, iteration: 445432
loss: 1.040569543838501,grad_norm: 0.8422162339874454, iteration: 445433
loss: 0.982894241809845,grad_norm: 0.7814735629443533, iteration: 445434
loss: 1.0408053398132324,grad_norm: 0.6846035659738186, iteration: 445435
loss: 1.0322636365890503,grad_norm: 0.8455319732622967, iteration: 445436
loss: 1.032479166984558,grad_norm: 0.6380202112206057, iteration: 445437
loss: 0.9964700937271118,grad_norm: 0.80487586687717, iteration: 445438
loss: 1.0396723747253418,grad_norm: 0.7227201020111093, iteration: 445439
loss: 1.0368517637252808,grad_norm: 0.7217860475690303, iteration: 445440
loss: 1.0010993480682373,grad_norm: 0.8667817080297335, iteration: 445441
loss: 1.022610068321228,grad_norm: 0.8805666469451118, iteration: 445442
loss: 0.9930711388587952,grad_norm: 0.8376980810771157, iteration: 445443
loss: 1.0921576023101807,grad_norm: 0.9999994865179902, iteration: 445444
loss: 0.9981714487075806,grad_norm: 0.6531538147181684, iteration: 445445
loss: 1.0467860698699951,grad_norm: 0.9999994938647788, iteration: 445446
loss: 1.0304805040359497,grad_norm: 0.7319937844121324, iteration: 445447
loss: 1.0136114358901978,grad_norm: 0.9565401549874226, iteration: 445448
loss: 0.9768388271331787,grad_norm: 0.9999992146609227, iteration: 445449
loss: 1.0235873460769653,grad_norm: 0.7943831432131304, iteration: 445450
loss: 0.9588474035263062,grad_norm: 0.9762268326606942, iteration: 445451
loss: 1.0253766775131226,grad_norm: 0.7280635057259358, iteration: 445452
loss: 0.9394983649253845,grad_norm: 0.6777668754404464, iteration: 445453
loss: 1.0214893817901611,grad_norm: 0.6817490963252024, iteration: 445454
loss: 1.0028866529464722,grad_norm: 0.9999991873213833, iteration: 445455
loss: 0.9901869893074036,grad_norm: 0.8647222112986008, iteration: 445456
loss: 0.9952421188354492,grad_norm: 0.6999567965095623, iteration: 445457
loss: 0.996817946434021,grad_norm: 0.735352950624586, iteration: 445458
loss: 0.9750803112983704,grad_norm: 0.7446846866694247, iteration: 445459
loss: 1.0076512098312378,grad_norm: 0.8195623445426409, iteration: 445460
loss: 1.0049891471862793,grad_norm: 0.610724591101066, iteration: 445461
loss: 0.9947946071624756,grad_norm: 0.7087390875135464, iteration: 445462
loss: 0.9595624208450317,grad_norm: 0.7154575629741496, iteration: 445463
loss: 1.0359119176864624,grad_norm: 0.8515254472014211, iteration: 445464
loss: 1.022787094116211,grad_norm: 0.9999995058366374, iteration: 445465
loss: 0.9993904829025269,grad_norm: 0.7448907343029232, iteration: 445466
loss: 0.9991594552993774,grad_norm: 0.7582000401723819, iteration: 445467
loss: 1.0007790327072144,grad_norm: 0.8442780219946113, iteration: 445468
loss: 1.0136735439300537,grad_norm: 0.8119292954276397, iteration: 445469
loss: 0.9576107263565063,grad_norm: 0.8366731265145608, iteration: 445470
loss: 1.0192365646362305,grad_norm: 0.6712674795081282, iteration: 445471
loss: 0.9917113184928894,grad_norm: 0.659365812944367, iteration: 445472
loss: 0.9568815231323242,grad_norm: 0.8340357104629523, iteration: 445473
loss: 0.9812291860580444,grad_norm: 0.5784452614451042, iteration: 445474
loss: 1.0732274055480957,grad_norm: 0.7324146505378778, iteration: 445475
loss: 0.9559035897254944,grad_norm: 0.6328784114084911, iteration: 445476
loss: 0.9716728329658508,grad_norm: 0.9150044978219359, iteration: 445477
loss: 0.9878793358802795,grad_norm: 0.9069466893594959, iteration: 445478
loss: 1.0133388042449951,grad_norm: 0.6672209223560298, iteration: 445479
loss: 0.9695382118225098,grad_norm: 0.746417088853577, iteration: 445480
loss: 1.0150680541992188,grad_norm: 0.9413159902321776, iteration: 445481
loss: 0.9531518220901489,grad_norm: 0.7135154604856734, iteration: 445482
loss: 1.0171188116073608,grad_norm: 0.7889270812211517, iteration: 445483
loss: 1.0003042221069336,grad_norm: 0.6931593669977364, iteration: 445484
loss: 1.0372182130813599,grad_norm: 0.6595232030254821, iteration: 445485
loss: 0.9776021242141724,grad_norm: 0.6633791092648015, iteration: 445486
loss: 1.0669804811477661,grad_norm: 0.999999903298226, iteration: 445487
loss: 0.9930030703544617,grad_norm: 0.8041953339998225, iteration: 445488
loss: 0.9402735829353333,grad_norm: 0.6619318650872125, iteration: 445489
loss: 0.991612434387207,grad_norm: 0.7958321631327043, iteration: 445490
loss: 0.9962844848632812,grad_norm: 0.8267067576853132, iteration: 445491
loss: 1.0780906677246094,grad_norm: 0.9695543828365885, iteration: 445492
loss: 0.9932282567024231,grad_norm: 0.7317511682594627, iteration: 445493
loss: 0.9854414463043213,grad_norm: 0.741347314620986, iteration: 445494
loss: 0.9796163439750671,grad_norm: 0.71671756325359, iteration: 445495
loss: 0.9786537885665894,grad_norm: 0.7838349955891759, iteration: 445496
loss: 0.9887356758117676,grad_norm: 0.8780923890597258, iteration: 445497
loss: 0.9889179468154907,grad_norm: 0.8262007201952414, iteration: 445498
loss: 0.9817296862602234,grad_norm: 0.7668776549624133, iteration: 445499
loss: 1.003040075302124,grad_norm: 0.9936762503246789, iteration: 445500
loss: 1.031902551651001,grad_norm: 0.9964356253664619, iteration: 445501
loss: 0.9942054152488708,grad_norm: 0.689100204485621, iteration: 445502
loss: 1.038626790046692,grad_norm: 0.7310909391672141, iteration: 445503
loss: 1.0077680349349976,grad_norm: 0.8304485876415563, iteration: 445504
loss: 1.0081337690353394,grad_norm: 0.7712798763047743, iteration: 445505
loss: 0.9447607398033142,grad_norm: 0.7740055338293977, iteration: 445506
loss: 1.0171538591384888,grad_norm: 0.8801373265091101, iteration: 445507
loss: 0.9788740277290344,grad_norm: 0.6011014423725148, iteration: 445508
loss: 1.0049498081207275,grad_norm: 0.6862892440989058, iteration: 445509
loss: 0.9996476173400879,grad_norm: 0.7275165365106248, iteration: 445510
loss: 1.0314050912857056,grad_norm: 0.9357887222412585, iteration: 445511
loss: 0.9811261892318726,grad_norm: 0.9999990837519894, iteration: 445512
loss: 1.0049690008163452,grad_norm: 0.886164151269238, iteration: 445513
loss: 0.9915028810501099,grad_norm: 0.7777080173755648, iteration: 445514
loss: 0.9995167255401611,grad_norm: 0.8224178054295037, iteration: 445515
loss: 1.013966679573059,grad_norm: 0.8327140430558304, iteration: 445516
loss: 1.061672568321228,grad_norm: 0.8762160575850216, iteration: 445517
loss: 1.0176959037780762,grad_norm: 0.7191943562865072, iteration: 445518
loss: 1.0086841583251953,grad_norm: 0.7401953488524459, iteration: 445519
loss: 0.9778993725776672,grad_norm: 0.8005066862851775, iteration: 445520
loss: 0.9984481334686279,grad_norm: 0.7593341918376992, iteration: 445521
loss: 1.0165654420852661,grad_norm: 0.6708924951577172, iteration: 445522
loss: 0.9934260845184326,grad_norm: 0.7189147724862319, iteration: 445523
loss: 1.0220474004745483,grad_norm: 0.7102631229445393, iteration: 445524
loss: 1.0180131196975708,grad_norm: 0.8061520004358808, iteration: 445525
loss: 1.0050194263458252,grad_norm: 0.9999999567430463, iteration: 445526
loss: 1.0163787603378296,grad_norm: 0.7611276060063276, iteration: 445527
loss: 1.0018976926803589,grad_norm: 0.6779525482496696, iteration: 445528
loss: 0.9869310259819031,grad_norm: 0.7983394205906292, iteration: 445529
loss: 1.1979259252548218,grad_norm: 0.9999999926573958, iteration: 445530
loss: 1.0724437236785889,grad_norm: 0.9030550234573688, iteration: 445531
loss: 1.0493251085281372,grad_norm: 0.9999999033851444, iteration: 445532
loss: 0.9834113717079163,grad_norm: 0.6936023407144394, iteration: 445533
loss: 1.0256837606430054,grad_norm: 0.8550429815127322, iteration: 445534
loss: 1.057465672492981,grad_norm: 0.7946018195427355, iteration: 445535
loss: 1.0029064416885376,grad_norm: 0.6497103303539503, iteration: 445536
loss: 1.070581078529358,grad_norm: 0.999999458138927, iteration: 445537
loss: 1.0028204917907715,grad_norm: 0.7766991495909694, iteration: 445538
loss: 1.0034774541854858,grad_norm: 0.7560832975560271, iteration: 445539
loss: 0.9543102979660034,grad_norm: 0.7756028330704069, iteration: 445540
loss: 0.9857248663902283,grad_norm: 0.7346070463826816, iteration: 445541
loss: 1.0183398723602295,grad_norm: 0.7674685856691795, iteration: 445542
loss: 0.9567912220954895,grad_norm: 0.750206348861066, iteration: 445543
loss: 0.9553095102310181,grad_norm: 0.7984935464853944, iteration: 445544
loss: 1.0745981931686401,grad_norm: 0.9999997288749418, iteration: 445545
loss: 1.141281008720398,grad_norm: 0.999999649290901, iteration: 445546
loss: 0.9948904514312744,grad_norm: 0.7610683911771015, iteration: 445547
loss: 1.1712322235107422,grad_norm: 0.9999991143811358, iteration: 445548
loss: 0.9851499795913696,grad_norm: 0.8745958919312641, iteration: 445549
loss: 0.9654757976531982,grad_norm: 0.7515578811184244, iteration: 445550
loss: 0.9804106950759888,grad_norm: 0.7440983871997706, iteration: 445551
loss: 0.987150251865387,grad_norm: 0.7228350470558602, iteration: 445552
loss: 1.0019550323486328,grad_norm: 0.8404300546690263, iteration: 445553
loss: 1.0272012948989868,grad_norm: 0.73878205780202, iteration: 445554
loss: 0.9880395531654358,grad_norm: 0.809664539123344, iteration: 445555
loss: 0.9840917587280273,grad_norm: 0.7032864650747574, iteration: 445556
loss: 1.0855685472488403,grad_norm: 0.9999991339881655, iteration: 445557
loss: 1.018186092376709,grad_norm: 0.758536703620186, iteration: 445558
loss: 1.1151032447814941,grad_norm: 0.9999997215050117, iteration: 445559
loss: 1.0317683219909668,grad_norm: 0.7165863556995188, iteration: 445560
loss: 1.1112593412399292,grad_norm: 0.9999993520488241, iteration: 445561
loss: 0.9705423712730408,grad_norm: 0.9357072214402263, iteration: 445562
loss: 1.0192991495132446,grad_norm: 0.795937052650495, iteration: 445563
loss: 1.0293735265731812,grad_norm: 0.8876019539149796, iteration: 445564
loss: 1.0014302730560303,grad_norm: 0.8230644166385367, iteration: 445565
loss: 0.9841739535331726,grad_norm: 0.6884392010731369, iteration: 445566
loss: 1.1146724224090576,grad_norm: 0.9999994343350291, iteration: 445567
loss: 1.0064096450805664,grad_norm: 0.9247279726823319, iteration: 445568
loss: 1.0076978206634521,grad_norm: 0.8317336347617156, iteration: 445569
loss: 0.9852041006088257,grad_norm: 0.7526750565498611, iteration: 445570
loss: 1.0305346250534058,grad_norm: 0.9357683419919008, iteration: 445571
loss: 0.972297728061676,grad_norm: 0.7850577781974332, iteration: 445572
loss: 1.0154951810836792,grad_norm: 0.9999991436254868, iteration: 445573
loss: 1.0209870338439941,grad_norm: 0.7832764474348796, iteration: 445574
loss: 0.9808436632156372,grad_norm: 0.7435052760224168, iteration: 445575
loss: 1.0007967948913574,grad_norm: 0.9305319757522473, iteration: 445576
loss: 1.0097692012786865,grad_norm: 0.7631782136496449, iteration: 445577
loss: 1.002942681312561,grad_norm: 0.7750232927077924, iteration: 445578
loss: 1.0073108673095703,grad_norm: 0.7315874012363793, iteration: 445579
loss: 0.9938784241676331,grad_norm: 0.6829015039465203, iteration: 445580
loss: 0.9518170952796936,grad_norm: 0.6852112809028256, iteration: 445581
loss: 1.025549054145813,grad_norm: 0.9999996191849628, iteration: 445582
loss: 1.0048271417617798,grad_norm: 0.7353885521203579, iteration: 445583
loss: 1.0204534530639648,grad_norm: 0.7806888518492984, iteration: 445584
loss: 0.9937512874603271,grad_norm: 0.8258972226889352, iteration: 445585
loss: 1.02267587184906,grad_norm: 0.8723152516683516, iteration: 445586
loss: 0.9930307269096375,grad_norm: 0.755712758435837, iteration: 445587
loss: 0.991030216217041,grad_norm: 0.7643464264769823, iteration: 445588
loss: 0.9615140557289124,grad_norm: 0.8651622933754977, iteration: 445589
loss: 0.9977515339851379,grad_norm: 0.6278843816559582, iteration: 445590
loss: 1.0023384094238281,grad_norm: 0.6268855493889023, iteration: 445591
loss: 0.9914289712905884,grad_norm: 0.8093701937660602, iteration: 445592
loss: 1.016310214996338,grad_norm: 0.6847468181807744, iteration: 445593
loss: 1.036985993385315,grad_norm: 0.8624744062729738, iteration: 445594
loss: 0.9969328045845032,grad_norm: 0.999999471294507, iteration: 445595
loss: 1.0186724662780762,grad_norm: 0.7643477024898999, iteration: 445596
loss: 0.9888612627983093,grad_norm: 0.999999170961294, iteration: 445597
loss: 0.9913797378540039,grad_norm: 0.7524112911659303, iteration: 445598
loss: 0.988446056842804,grad_norm: 0.6492574731982377, iteration: 445599
loss: 0.9803433418273926,grad_norm: 0.981992621269042, iteration: 445600
loss: 1.0002930164337158,grad_norm: 0.9184741886361382, iteration: 445601
loss: 1.0373023748397827,grad_norm: 0.8073298080571063, iteration: 445602
loss: 0.9683753252029419,grad_norm: 0.666413719328309, iteration: 445603
loss: 0.9705373048782349,grad_norm: 0.811334596742136, iteration: 445604
loss: 0.9888156056404114,grad_norm: 0.7145662084187149, iteration: 445605
loss: 1.010939598083496,grad_norm: 0.7183275255551904, iteration: 445606
loss: 0.9948681592941284,grad_norm: 0.7014507270388226, iteration: 445607
loss: 1.0190107822418213,grad_norm: 0.7513326025608124, iteration: 445608
loss: 1.0182400941848755,grad_norm: 0.8424269305208373, iteration: 445609
loss: 1.0178356170654297,grad_norm: 0.999999097228115, iteration: 445610
loss: 1.009749174118042,grad_norm: 0.7292858507639591, iteration: 445611
loss: 1.0036300420761108,grad_norm: 0.7574064425711655, iteration: 445612
loss: 1.0052634477615356,grad_norm: 0.8001015826909844, iteration: 445613
loss: 0.9831708669662476,grad_norm: 0.8144867520446436, iteration: 445614
loss: 0.9900877475738525,grad_norm: 0.8238737145797131, iteration: 445615
loss: 1.0274235010147095,grad_norm: 0.7563975824732492, iteration: 445616
loss: 0.9580647945404053,grad_norm: 0.8788220908421457, iteration: 445617
loss: 0.9794411659240723,grad_norm: 0.788539933597727, iteration: 445618
loss: 0.9970592260360718,grad_norm: 0.9999992223558588, iteration: 445619
loss: 0.9990664124488831,grad_norm: 0.851507639468286, iteration: 445620
loss: 1.0236268043518066,grad_norm: 0.7457489524512085, iteration: 445621
loss: 1.0045489072799683,grad_norm: 0.9999990703165021, iteration: 445622
loss: 1.0179548263549805,grad_norm: 0.6567004941365255, iteration: 445623
loss: 1.0242972373962402,grad_norm: 1.0000000191370988, iteration: 445624
loss: 1.0215303897857666,grad_norm: 0.8623606155498963, iteration: 445625
loss: 0.975278913974762,grad_norm: 0.691799170604626, iteration: 445626
loss: 0.9901827573776245,grad_norm: 0.6826343928265953, iteration: 445627
loss: 0.9996609687805176,grad_norm: 0.7721964049608566, iteration: 445628
loss: 0.9797661900520325,grad_norm: 0.785084201667295, iteration: 445629
loss: 0.9808979630470276,grad_norm: 0.8367079501453909, iteration: 445630
loss: 1.0607061386108398,grad_norm: 0.8322065159774716, iteration: 445631
loss: 1.0758687257766724,grad_norm: 0.9999991842779127, iteration: 445632
loss: 1.0122097730636597,grad_norm: 0.9999991415560275, iteration: 445633
loss: 1.0319844484329224,grad_norm: 0.738982108617223, iteration: 445634
loss: 1.006972074508667,grad_norm: 0.7853195923602475, iteration: 445635
loss: 0.9983221888542175,grad_norm: 0.9999996438673949, iteration: 445636
loss: 1.0486255884170532,grad_norm: 0.8206506495717765, iteration: 445637
loss: 0.9877015948295593,grad_norm: 0.8536056935896793, iteration: 445638
loss: 0.9649783372879028,grad_norm: 0.7559473337887294, iteration: 445639
loss: 1.003455638885498,grad_norm: 0.7738928011403007, iteration: 445640
loss: 1.0217405557632446,grad_norm: 0.7555996941315396, iteration: 445641
loss: 0.9751515984535217,grad_norm: 0.666861074012175, iteration: 445642
loss: 0.9881256222724915,grad_norm: 0.8401749498022578, iteration: 445643
loss: 1.0048134326934814,grad_norm: 0.7304801785821857, iteration: 445644
loss: 1.0250308513641357,grad_norm: 0.9999993353679012, iteration: 445645
loss: 0.9779307246208191,grad_norm: 0.8599109739727043, iteration: 445646
loss: 1.0169384479522705,grad_norm: 0.8365248218063204, iteration: 445647
loss: 1.0228458642959595,grad_norm: 0.9999992824267634, iteration: 445648
loss: 0.9960311651229858,grad_norm: 0.8097761227186384, iteration: 445649
loss: 1.0459046363830566,grad_norm: 0.9999996990331305, iteration: 445650
loss: 1.03543221950531,grad_norm: 0.8460471074415766, iteration: 445651
loss: 1.0048227310180664,grad_norm: 0.6930087269419014, iteration: 445652
loss: 0.9958599805831909,grad_norm: 0.7590805465052926, iteration: 445653
loss: 1.063920259475708,grad_norm: 0.9999989992019364, iteration: 445654
loss: 0.9905046224594116,grad_norm: 0.7466765223362533, iteration: 445655
loss: 0.98858642578125,grad_norm: 0.7341475184442964, iteration: 445656
loss: 1.0378426313400269,grad_norm: 0.9999996416064152, iteration: 445657
loss: 1.0037546157836914,grad_norm: 0.6102867129300232, iteration: 445658
loss: 0.9903758764266968,grad_norm: 0.9999990904703827, iteration: 445659
loss: 0.9908308386802673,grad_norm: 0.99999954462524, iteration: 445660
loss: 1.0433874130249023,grad_norm: 0.9999992292772443, iteration: 445661
loss: 1.0122759342193604,grad_norm: 0.7359660333952195, iteration: 445662
loss: 1.0261890888214111,grad_norm: 0.665217302289834, iteration: 445663
loss: 0.9534717202186584,grad_norm: 0.8249859017405974, iteration: 445664
loss: 1.0982229709625244,grad_norm: 0.7656109238299832, iteration: 445665
loss: 0.9758654832839966,grad_norm: 0.916305029775968, iteration: 445666
loss: 1.0061824321746826,grad_norm: 0.8496271124605568, iteration: 445667
loss: 1.0042390823364258,grad_norm: 0.8595943701807275, iteration: 445668
loss: 0.9934008121490479,grad_norm: 0.666837460603325, iteration: 445669
loss: 0.9960277676582336,grad_norm: 0.821835156990662, iteration: 445670
loss: 0.9794530272483826,grad_norm: 0.999999305647253, iteration: 445671
loss: 0.9429740309715271,grad_norm: 0.6794048802961322, iteration: 445672
loss: 1.033115267753601,grad_norm: 0.9503862315865238, iteration: 445673
loss: 1.012868881225586,grad_norm: 0.8354519956284012, iteration: 445674
loss: 0.9983066916465759,grad_norm: 0.8852868566214495, iteration: 445675
loss: 1.039987564086914,grad_norm: 0.8296662832685058, iteration: 445676
loss: 1.0164271593093872,grad_norm: 0.7233040740221, iteration: 445677
loss: 0.9796664118766785,grad_norm: 0.6662808665467368, iteration: 445678
loss: 1.0416691303253174,grad_norm: 0.776364098405247, iteration: 445679
loss: 1.0163099765777588,grad_norm: 0.7223534803264969, iteration: 445680
loss: 1.0006834268569946,grad_norm: 0.9090595536693289, iteration: 445681
loss: 1.0533952713012695,grad_norm: 0.9999999327857263, iteration: 445682
loss: 1.0247400999069214,grad_norm: 0.6923179557858707, iteration: 445683
loss: 0.9793779850006104,grad_norm: 0.9999991077578179, iteration: 445684
loss: 1.0486222505569458,grad_norm: 0.9999995464298448, iteration: 445685
loss: 1.017327070236206,grad_norm: 0.7525450133320607, iteration: 445686
loss: 1.0271461009979248,grad_norm: 0.5542410686580737, iteration: 445687
loss: 0.9734851717948914,grad_norm: 0.7112295234431345, iteration: 445688
loss: 0.9486696720123291,grad_norm: 0.7777870044135015, iteration: 445689
loss: 0.9609362483024597,grad_norm: 0.7444088982504288, iteration: 445690
loss: 0.9673887491226196,grad_norm: 0.9999992608164017, iteration: 445691
loss: 1.0171825885772705,grad_norm: 0.7274906383408994, iteration: 445692
loss: 0.991649866104126,grad_norm: 0.7765998675591146, iteration: 445693
loss: 1.0021703243255615,grad_norm: 0.7618334054375662, iteration: 445694
loss: 1.0098217725753784,grad_norm: 1.0000000085348495, iteration: 445695
loss: 1.0076087713241577,grad_norm: 0.7380514610763762, iteration: 445696
loss: 1.0306907892227173,grad_norm: 0.7221156415503136, iteration: 445697
loss: 1.0614333152770996,grad_norm: 0.9999998399876829, iteration: 445698
loss: 1.0317803621292114,grad_norm: 0.9999990850515599, iteration: 445699
loss: 1.059363842010498,grad_norm: 0.9570208223185319, iteration: 445700
loss: 0.9904091954231262,grad_norm: 0.9999991194338297, iteration: 445701
loss: 0.9994910955429077,grad_norm: 0.7263740352921167, iteration: 445702
loss: 1.0502865314483643,grad_norm: 0.9999991364195864, iteration: 445703
loss: 1.005509614944458,grad_norm: 0.7380195185944995, iteration: 445704
loss: 0.9919331073760986,grad_norm: 0.8205451029217231, iteration: 445705
loss: 0.994734525680542,grad_norm: 0.7232801225167046, iteration: 445706
loss: 1.009189486503601,grad_norm: 0.8870020927882549, iteration: 445707
loss: 1.0419120788574219,grad_norm: 0.9999990324931675, iteration: 445708
loss: 0.9859002828598022,grad_norm: 0.7634197822621934, iteration: 445709
loss: 0.985745906829834,grad_norm: 0.8182505917769369, iteration: 445710
loss: 0.9863100647926331,grad_norm: 0.7851024493324081, iteration: 445711
loss: 0.9913550615310669,grad_norm: 0.9303405586059207, iteration: 445712
loss: 1.0035213232040405,grad_norm: 0.9512662240454365, iteration: 445713
loss: 1.0422252416610718,grad_norm: 0.7890598243121809, iteration: 445714
loss: 1.0552284717559814,grad_norm: 0.7515226694868568, iteration: 445715
loss: 1.0478895902633667,grad_norm: 0.87652822690384, iteration: 445716
loss: 1.0188790559768677,grad_norm: 0.7131389413801905, iteration: 445717
loss: 0.9771131277084351,grad_norm: 0.9999991683849063, iteration: 445718
loss: 1.0045522451400757,grad_norm: 0.6923107367631454, iteration: 445719
loss: 1.0464648008346558,grad_norm: 0.6410167209878349, iteration: 445720
loss: 0.978264331817627,grad_norm: 0.8630576503743617, iteration: 445721
loss: 1.0273362398147583,grad_norm: 0.9064280919125707, iteration: 445722
loss: 0.9823155999183655,grad_norm: 0.9868216551604068, iteration: 445723
loss: 0.9701322913169861,grad_norm: 0.8458272111504652, iteration: 445724
loss: 1.0637714862823486,grad_norm: 0.9999993209346748, iteration: 445725
loss: 0.9835097193717957,grad_norm: 0.9999990216513339, iteration: 445726
loss: 0.9970664978027344,grad_norm: 0.7919082257535304, iteration: 445727
loss: 0.9744752645492554,grad_norm: 0.9063912852808866, iteration: 445728
loss: 1.002684473991394,grad_norm: 0.9999999153644025, iteration: 445729
loss: 1.0024986267089844,grad_norm: 0.7641305063920271, iteration: 445730
loss: 1.001781940460205,grad_norm: 0.8388127277198503, iteration: 445731
loss: 1.0132609605789185,grad_norm: 0.6969827301284511, iteration: 445732
loss: 1.054129958152771,grad_norm: 0.8770279455178789, iteration: 445733
loss: 0.9894751906394958,grad_norm: 0.7009586510713322, iteration: 445734
loss: 0.9767409563064575,grad_norm: 0.8324130945947998, iteration: 445735
loss: 1.0261207818984985,grad_norm: 0.756085422282658, iteration: 445736
loss: 1.0148687362670898,grad_norm: 0.9099825941406788, iteration: 445737
loss: 0.9779163002967834,grad_norm: 0.9793489394302474, iteration: 445738
loss: 0.9946142435073853,grad_norm: 0.7030582548434453, iteration: 445739
loss: 1.0708919763565063,grad_norm: 0.9999997730594807, iteration: 445740
loss: 0.9826756119728088,grad_norm: 0.685186566239887, iteration: 445741
loss: 1.0094820261001587,grad_norm: 0.9312437337036401, iteration: 445742
loss: 0.9934661984443665,grad_norm: 0.8775114542633016, iteration: 445743
loss: 0.9835615754127502,grad_norm: 0.6823956683516743, iteration: 445744
loss: 1.0091376304626465,grad_norm: 0.7407555362114835, iteration: 445745
loss: 1.0091122388839722,grad_norm: 0.6963532259702342, iteration: 445746
loss: 0.9801168441772461,grad_norm: 0.6941146143833711, iteration: 445747
loss: 0.9825917482376099,grad_norm: 0.727498506490631, iteration: 445748
loss: 1.024659514427185,grad_norm: 0.7796961779782612, iteration: 445749
loss: 1.0033127069473267,grad_norm: 0.6415956240773039, iteration: 445750
loss: 1.0266762971878052,grad_norm: 0.9999992336681433, iteration: 445751
loss: 0.9969630241394043,grad_norm: 0.8235903379866198, iteration: 445752
loss: 1.0011076927185059,grad_norm: 0.6855657756510091, iteration: 445753
loss: 1.0149105787277222,grad_norm: 0.9194914194132409, iteration: 445754
loss: 1.115527868270874,grad_norm: 0.999999226996203, iteration: 445755
loss: 1.0230062007904053,grad_norm: 0.8341387126110126, iteration: 445756
loss: 0.9572237730026245,grad_norm: 0.6887689074572005, iteration: 445757
loss: 1.1112480163574219,grad_norm: 0.8556285048429182, iteration: 445758
loss: 1.0227903127670288,grad_norm: 0.8046396605231958, iteration: 445759
loss: 1.0119446516036987,grad_norm: 0.7501634818357151, iteration: 445760
loss: 1.0248626470565796,grad_norm: 0.7323349746134606, iteration: 445761
loss: 0.9766837954521179,grad_norm: 0.9999995858505033, iteration: 445762
loss: 0.9918226003646851,grad_norm: 0.7469295844569859, iteration: 445763
loss: 0.998992383480072,grad_norm: 0.9388163585511787, iteration: 445764
loss: 0.9826068878173828,grad_norm: 0.7232597986867803, iteration: 445765
loss: 1.0226645469665527,grad_norm: 0.7806787834744048, iteration: 445766
loss: 1.0424386262893677,grad_norm: 0.798444804051302, iteration: 445767
loss: 1.0619317293167114,grad_norm: 0.9999992240880945, iteration: 445768
loss: 0.9621718525886536,grad_norm: 0.7515213542755442, iteration: 445769
loss: 1.0780888795852661,grad_norm: 0.7951046168540308, iteration: 445770
loss: 0.9930002093315125,grad_norm: 0.6607322748225463, iteration: 445771
loss: 1.0154391527175903,grad_norm: 0.8700189304608263, iteration: 445772
loss: 1.1688989400863647,grad_norm: 0.9191423938876846, iteration: 445773
loss: 1.0260348320007324,grad_norm: 0.7161582365984983, iteration: 445774
loss: 1.1257965564727783,grad_norm: 0.999999946739195, iteration: 445775
loss: 0.9846674203872681,grad_norm: 0.885129641705671, iteration: 445776
loss: 0.9987093806266785,grad_norm: 0.96445605970695, iteration: 445777
loss: 1.0279042720794678,grad_norm: 0.9999990334780855, iteration: 445778
loss: 1.0744233131408691,grad_norm: 0.9999996028360798, iteration: 445779
loss: 0.9888776540756226,grad_norm: 0.8983653942750953, iteration: 445780
loss: 1.1214736700057983,grad_norm: 0.9999992663972227, iteration: 445781
loss: 0.9869288206100464,grad_norm: 0.8187173539420913, iteration: 445782
loss: 0.9850220680236816,grad_norm: 0.6657375264970008, iteration: 445783
loss: 1.0887833833694458,grad_norm: 0.7518684192320569, iteration: 445784
loss: 1.0721389055252075,grad_norm: 0.734695570149554, iteration: 445785
loss: 0.9902758598327637,grad_norm: 0.7018457086185373, iteration: 445786
loss: 0.9591734409332275,grad_norm: 0.8981777968126365, iteration: 445787
loss: 1.0493110418319702,grad_norm: 0.9999991772303336, iteration: 445788
loss: 1.0328706502914429,grad_norm: 0.9245883891502815, iteration: 445789
loss: 0.971111536026001,grad_norm: 0.8121328237319999, iteration: 445790
loss: 1.0207340717315674,grad_norm: 0.7690800763346644, iteration: 445791
loss: 0.9820102453231812,grad_norm: 0.6788702280072931, iteration: 445792
loss: 0.9911025762557983,grad_norm: 0.9506631636837728, iteration: 445793
loss: 1.054714560508728,grad_norm: 0.9999991709170686, iteration: 445794
loss: 1.0533347129821777,grad_norm: 0.999999185875565, iteration: 445795
loss: 1.0200464725494385,grad_norm: 0.9690857389107809, iteration: 445796
loss: 1.0148909091949463,grad_norm: 0.9999990040408544, iteration: 445797
loss: 0.9652692079544067,grad_norm: 0.7158423732254072, iteration: 445798
loss: 0.9788276553153992,grad_norm: 0.7809897190263575, iteration: 445799
loss: 0.991041362285614,grad_norm: 0.7817503612478957, iteration: 445800
loss: 1.0082930326461792,grad_norm: 0.9999993150992208, iteration: 445801
loss: 1.0474048852920532,grad_norm: 0.7195163961532574, iteration: 445802
loss: 1.0131442546844482,grad_norm: 0.6659679103560066, iteration: 445803
loss: 1.0767590999603271,grad_norm: 0.9999991307911931, iteration: 445804
loss: 0.984976589679718,grad_norm: 0.8065132257727783, iteration: 445805
loss: 1.0909963846206665,grad_norm: 0.9731133461589444, iteration: 445806
loss: 0.9726377129554749,grad_norm: 0.886627698042777, iteration: 445807
loss: 1.0439401865005493,grad_norm: 0.7886486943767205, iteration: 445808
loss: 0.9334166049957275,grad_norm: 0.906053683096268, iteration: 445809
loss: 1.0263831615447998,grad_norm: 0.8686555003149331, iteration: 445810
loss: 0.9963316321372986,grad_norm: 0.8569867094754796, iteration: 445811
loss: 0.9956352114677429,grad_norm: 0.7271112316288569, iteration: 445812
loss: 1.0563850402832031,grad_norm: 0.9999993619710991, iteration: 445813
loss: 1.0215179920196533,grad_norm: 0.9994125773098524, iteration: 445814
loss: 1.0132663249969482,grad_norm: 0.7569824681980531, iteration: 445815
loss: 0.98269122838974,grad_norm: 0.852025269740784, iteration: 445816
loss: 0.9746110439300537,grad_norm: 0.7475198725620663, iteration: 445817
loss: 1.0187047719955444,grad_norm: 0.7824537807324068, iteration: 445818
loss: 0.9957415461540222,grad_norm: 0.8179763730249071, iteration: 445819
loss: 0.9778062701225281,grad_norm: 0.741616004144436, iteration: 445820
loss: 0.9959082007408142,grad_norm: 0.8160122006819197, iteration: 445821
loss: 0.9834027886390686,grad_norm: 0.8237556839502553, iteration: 445822
loss: 1.0116381645202637,grad_norm: 0.8248572216533119, iteration: 445823
loss: 1.0085904598236084,grad_norm: 0.8980008797511084, iteration: 445824
loss: 1.0295791625976562,grad_norm: 0.9999996005994388, iteration: 445825
loss: 1.037568211555481,grad_norm: 0.8065842592407838, iteration: 445826
loss: 1.010054349899292,grad_norm: 0.7081348510181941, iteration: 445827
loss: 1.0142625570297241,grad_norm: 0.8610379232764802, iteration: 445828
loss: 1.0895730257034302,grad_norm: 0.9999996262186339, iteration: 445829
loss: 1.0182116031646729,grad_norm: 0.901558182706584, iteration: 445830
loss: 1.0203169584274292,grad_norm: 0.9999992436363121, iteration: 445831
loss: 0.992716908454895,grad_norm: 0.7480854640753684, iteration: 445832
loss: 1.00257408618927,grad_norm: 0.7719140309113014, iteration: 445833
loss: 0.9774816632270813,grad_norm: 0.7319832925626512, iteration: 445834
loss: 0.9810613989830017,grad_norm: 0.9999994893373749, iteration: 445835
loss: 1.001570463180542,grad_norm: 0.7361049407788712, iteration: 445836
loss: 1.0819227695465088,grad_norm: 0.7657865402280817, iteration: 445837
loss: 1.003119945526123,grad_norm: 0.8777369135353833, iteration: 445838
loss: 1.0440431833267212,grad_norm: 0.8898601593123957, iteration: 445839
loss: 0.9979320764541626,grad_norm: 0.7550436546454506, iteration: 445840
loss: 1.0584092140197754,grad_norm: 0.7936342617669454, iteration: 445841
loss: 0.9916934967041016,grad_norm: 0.8351801581768581, iteration: 445842
loss: 0.9842788577079773,grad_norm: 0.8280573244910545, iteration: 445843
loss: 1.028894305229187,grad_norm: 0.8626383602042538, iteration: 445844
loss: 1.0149785280227661,grad_norm: 0.787376031369075, iteration: 445845
loss: 1.0414997339248657,grad_norm: 0.9507715126061786, iteration: 445846
loss: 1.025328278541565,grad_norm: 0.7130086295087756, iteration: 445847
loss: 0.9561300277709961,grad_norm: 0.6379837941682442, iteration: 445848
loss: 1.0032713413238525,grad_norm: 0.9010791394755637, iteration: 445849
loss: 1.0148733854293823,grad_norm: 0.7220044458223883, iteration: 445850
loss: 0.9862609505653381,grad_norm: 0.79097010474051, iteration: 445851
loss: 1.0392005443572998,grad_norm: 0.8696842822949373, iteration: 445852
loss: 1.0068179368972778,grad_norm: 0.8653890401243974, iteration: 445853
loss: 1.0147472620010376,grad_norm: 0.6473691748710803, iteration: 445854
loss: 0.9927510619163513,grad_norm: 0.821228606284506, iteration: 445855
loss: 0.9700920581817627,grad_norm: 0.7205875320739457, iteration: 445856
loss: 1.0230259895324707,grad_norm: 0.689542503798998, iteration: 445857
loss: 1.0480623245239258,grad_norm: 0.722353036857291, iteration: 445858
loss: 1.0510293245315552,grad_norm: 0.7702350685834295, iteration: 445859
loss: 1.02134108543396,grad_norm: 0.9332434810677669, iteration: 445860
loss: 0.9788264036178589,grad_norm: 0.7885567841609536, iteration: 445861
loss: 1.1118415594100952,grad_norm: 0.9999996702320996, iteration: 445862
loss: 1.0244001150131226,grad_norm: 0.8561801470053396, iteration: 445863
loss: 0.9753015637397766,grad_norm: 0.7259362213459295, iteration: 445864
loss: 1.0524402856826782,grad_norm: 0.9999991697437949, iteration: 445865
loss: 0.9832656979560852,grad_norm: 0.873038583404545, iteration: 445866
loss: 1.032693862915039,grad_norm: 0.9999993792556032, iteration: 445867
loss: 1.0151517391204834,grad_norm: 0.7683830825357535, iteration: 445868
loss: 1.0880464315414429,grad_norm: 0.7538113170711384, iteration: 445869
loss: 1.0102101564407349,grad_norm: 0.9999994376934863, iteration: 445870
loss: 1.0071632862091064,grad_norm: 0.7795953128967729, iteration: 445871
loss: 1.016596794128418,grad_norm: 0.7306191162628332, iteration: 445872
loss: 0.9950188994407654,grad_norm: 0.7882998850097478, iteration: 445873
loss: 1.0157802104949951,grad_norm: 0.773905957255225, iteration: 445874
loss: 0.9916196465492249,grad_norm: 0.6722432665254081, iteration: 445875
loss: 1.0086086988449097,grad_norm: 0.7356158259448952, iteration: 445876
loss: 1.0305572748184204,grad_norm: 0.8481428851988271, iteration: 445877
loss: 1.1089601516723633,grad_norm: 0.9999991727553493, iteration: 445878
loss: 1.0150704383850098,grad_norm: 0.9999996172229751, iteration: 445879
loss: 1.0721230506896973,grad_norm: 0.9999991298257794, iteration: 445880
loss: 0.9891080260276794,grad_norm: 0.7751456849152081, iteration: 445881
loss: 1.0499017238616943,grad_norm: 0.8068521507820049, iteration: 445882
loss: 1.0372530221939087,grad_norm: 0.8025749832999368, iteration: 445883
loss: 1.0742744207382202,grad_norm: 0.8070204696794687, iteration: 445884
loss: 1.074894666671753,grad_norm: 0.8608983383721386, iteration: 445885
loss: 1.0148727893829346,grad_norm: 0.8721735981144018, iteration: 445886
loss: 1.026684045791626,grad_norm: 0.7898601855349854, iteration: 445887
loss: 1.0724060535430908,grad_norm: 0.9999998623679298, iteration: 445888
loss: 0.9988703727722168,grad_norm: 0.8084451270008796, iteration: 445889
loss: 1.0024317502975464,grad_norm: 0.7749780951036186, iteration: 445890
loss: 0.9835860729217529,grad_norm: 0.809496021802175, iteration: 445891
loss: 1.0235685110092163,grad_norm: 0.7059692159768541, iteration: 445892
loss: 1.1619887351989746,grad_norm: 0.9999993739702793, iteration: 445893
loss: 1.0350443124771118,grad_norm: 0.9999998407766584, iteration: 445894
loss: 1.007265567779541,grad_norm: 0.8719859723905707, iteration: 445895
loss: 1.0096206665039062,grad_norm: 0.8333475782482538, iteration: 445896
loss: 1.0168458223342896,grad_norm: 0.9999994546881295, iteration: 445897
loss: 1.0326521396636963,grad_norm: 0.8236010292744838, iteration: 445898
loss: 1.030998945236206,grad_norm: 0.7683310304508556, iteration: 445899
loss: 1.0082379579544067,grad_norm: 0.7310008302768034, iteration: 445900
loss: 0.9658137559890747,grad_norm: 0.927775324157545, iteration: 445901
loss: 1.0642255544662476,grad_norm: 0.999999251915, iteration: 445902
loss: 1.0419647693634033,grad_norm: 0.9999992513978386, iteration: 445903
loss: 1.0814507007598877,grad_norm: 0.9999997881826354, iteration: 445904
loss: 0.9887614846229553,grad_norm: 0.8221815409932367, iteration: 445905
loss: 1.0631687641143799,grad_norm: 0.9999990896164505, iteration: 445906
loss: 0.9693340063095093,grad_norm: 0.7983738659318983, iteration: 445907
loss: 1.004203200340271,grad_norm: 0.9086769222720436, iteration: 445908
loss: 0.9713153839111328,grad_norm: 0.980433909881123, iteration: 445909
loss: 1.0340927839279175,grad_norm: 0.9999993156176009, iteration: 445910
loss: 0.9893929362297058,grad_norm: 0.9046500712228527, iteration: 445911
loss: 0.9990686774253845,grad_norm: 0.8936628099597186, iteration: 445912
loss: 1.0054792165756226,grad_norm: 0.7494578147008684, iteration: 445913
loss: 1.072275161743164,grad_norm: 0.9999998031095059, iteration: 445914
loss: 1.0093650817871094,grad_norm: 0.7359881538920223, iteration: 445915
loss: 1.0556938648223877,grad_norm: 0.7678428249881823, iteration: 445916
loss: 0.9792993068695068,grad_norm: 0.7409563532845798, iteration: 445917
loss: 1.0209078788757324,grad_norm: 0.7258748564382684, iteration: 445918
loss: 1.040569543838501,grad_norm: 0.9999998409954156, iteration: 445919
loss: 1.0069369077682495,grad_norm: 0.7299788194703957, iteration: 445920
loss: 1.0924519300460815,grad_norm: 0.9005280623292184, iteration: 445921
loss: 0.9903696179389954,grad_norm: 0.9999990866792084, iteration: 445922
loss: 1.0641447305679321,grad_norm: 0.8824298872611149, iteration: 445923
loss: 1.0250974893569946,grad_norm: 0.8385873141570884, iteration: 445924
loss: 1.0433799028396606,grad_norm: 0.7101475368736845, iteration: 445925
loss: 0.9829061031341553,grad_norm: 0.8531963577075238, iteration: 445926
loss: 1.063330888748169,grad_norm: 0.7428749395462878, iteration: 445927
loss: 1.008272409439087,grad_norm: 0.9475535629123868, iteration: 445928
loss: 1.1143559217453003,grad_norm: 0.8325371684375221, iteration: 445929
loss: 1.1235685348510742,grad_norm: 0.9999999117757764, iteration: 445930
loss: 1.0327084064483643,grad_norm: 0.9999991145047259, iteration: 445931
loss: 1.0651592016220093,grad_norm: 0.8052632450644337, iteration: 445932
loss: 1.0414997339248657,grad_norm: 0.8064396109514369, iteration: 445933
loss: 0.9486215114593506,grad_norm: 0.8744762529987911, iteration: 445934
loss: 0.9691892266273499,grad_norm: 0.8485308641268265, iteration: 445935
loss: 0.9966374039649963,grad_norm: 0.8005506967000846, iteration: 445936
loss: 1.1114561557769775,grad_norm: 0.9137571749175958, iteration: 445937
loss: 1.027510404586792,grad_norm: 0.999999646238131, iteration: 445938
loss: 1.0920817852020264,grad_norm: 0.999999198361666, iteration: 445939
loss: 1.0793429613113403,grad_norm: 0.9999994170662379, iteration: 445940
loss: 1.0337105989456177,grad_norm: 0.7649925031405492, iteration: 445941
loss: 1.0235875844955444,grad_norm: 0.9281562969914326, iteration: 445942
loss: 1.1031726598739624,grad_norm: 0.9625384978321169, iteration: 445943
loss: 1.0081573724746704,grad_norm: 0.8707179941546103, iteration: 445944
loss: 0.9884117841720581,grad_norm: 0.8113753424682758, iteration: 445945
loss: 0.9481184482574463,grad_norm: 0.7482577238952854, iteration: 445946
loss: 1.0744880437850952,grad_norm: 0.9999999411233386, iteration: 445947
loss: 1.1130633354187012,grad_norm: 0.9275534852640848, iteration: 445948
loss: 1.0378433465957642,grad_norm: 0.8360067896558208, iteration: 445949
loss: 1.0770620107650757,grad_norm: 0.9999997956664264, iteration: 445950
loss: 0.9783526659011841,grad_norm: 0.7322111332126935, iteration: 445951
loss: 1.0387431383132935,grad_norm: 0.7904158897774609, iteration: 445952
loss: 1.0193772315979004,grad_norm: 0.6984518455382408, iteration: 445953
loss: 1.0657986402511597,grad_norm: 0.9999996878161073, iteration: 445954
loss: 0.9864993095397949,grad_norm: 0.7377684167028898, iteration: 445955
loss: 1.0434463024139404,grad_norm: 0.7331787086705697, iteration: 445956
loss: 1.0032765865325928,grad_norm: 0.7106531295750246, iteration: 445957
loss: 1.0311131477355957,grad_norm: 0.8939326290376037, iteration: 445958
loss: 1.037429928779602,grad_norm: 0.7225160852854495, iteration: 445959
loss: 0.9714228510856628,grad_norm: 0.6955860778613119, iteration: 445960
loss: 0.9783475995063782,grad_norm: 0.8758825452150664, iteration: 445961
loss: 1.0404976606369019,grad_norm: 0.8170139491908516, iteration: 445962
loss: 1.1316797733306885,grad_norm: 0.9999995562085127, iteration: 445963
loss: 1.0062981843948364,grad_norm: 0.929347082301568, iteration: 445964
loss: 1.0396062135696411,grad_norm: 0.9999994582467906, iteration: 445965
loss: 1.0245399475097656,grad_norm: 0.9999991274721091, iteration: 445966
loss: 1.0098575353622437,grad_norm: 0.6412653691738204, iteration: 445967
loss: 0.9921008348464966,grad_norm: 0.763827092662536, iteration: 445968
loss: 1.0588445663452148,grad_norm: 0.7212966668563997, iteration: 445969
loss: 0.9911712408065796,grad_norm: 0.9999990826664542, iteration: 445970
loss: 0.9917441606521606,grad_norm: 0.9999993817975579, iteration: 445971
loss: 0.9821121692657471,grad_norm: 0.9077799602836656, iteration: 445972
loss: 1.0898685455322266,grad_norm: 0.9999995367140796, iteration: 445973
loss: 1.000836968421936,grad_norm: 0.9471635314059904, iteration: 445974
loss: 1.0263705253601074,grad_norm: 0.8203361343893302, iteration: 445975
loss: 0.9788398146629333,grad_norm: 0.8047650830242526, iteration: 445976
loss: 1.0403897762298584,grad_norm: 0.837546662040838, iteration: 445977
loss: 0.9786746501922607,grad_norm: 0.8678247511339144, iteration: 445978
loss: 1.0595752000808716,grad_norm: 0.9999997424789934, iteration: 445979
loss: 1.082010269165039,grad_norm: 0.8409281718752878, iteration: 445980
loss: 1.0524117946624756,grad_norm: 0.9736313552256306, iteration: 445981
loss: 1.0548335313796997,grad_norm: 0.9999990328960443, iteration: 445982
loss: 1.0500248670578003,grad_norm: 0.8699590811630146, iteration: 445983
loss: 1.2247735261917114,grad_norm: 0.9999999622439029, iteration: 445984
loss: 1.028482437133789,grad_norm: 0.9999992325329644, iteration: 445985
loss: 1.057509422302246,grad_norm: 0.9431131312799773, iteration: 445986
loss: 1.090676188468933,grad_norm: 0.9999993661713815, iteration: 445987
loss: 1.0258264541625977,grad_norm: 0.8056384364877485, iteration: 445988
loss: 1.0166293382644653,grad_norm: 0.999999142382137, iteration: 445989
loss: 1.0272247791290283,grad_norm: 0.9999996616885436, iteration: 445990
loss: 1.0186481475830078,grad_norm: 0.609732319007236, iteration: 445991
loss: 1.0821714401245117,grad_norm: 0.9999998957087854, iteration: 445992
loss: 0.9725279808044434,grad_norm: 0.6969067172910449, iteration: 445993
loss: 1.006600260734558,grad_norm: 0.7601689465245642, iteration: 445994
loss: 1.0173676013946533,grad_norm: 0.904636907281316, iteration: 445995
loss: 1.0838432312011719,grad_norm: 0.999999809340219, iteration: 445996
loss: 1.0404068231582642,grad_norm: 0.9999992674421616, iteration: 445997
loss: 1.0031843185424805,grad_norm: 0.8016587999925346, iteration: 445998
loss: 1.0203503370285034,grad_norm: 0.8629870163934047, iteration: 445999
loss: 1.0020482540130615,grad_norm: 0.7789381113843988, iteration: 446000
loss: 1.0423110723495483,grad_norm: 0.99999927184956, iteration: 446001
loss: 1.045775055885315,grad_norm: 0.9303896250326092, iteration: 446002
loss: 1.0061790943145752,grad_norm: 0.9123486984328089, iteration: 446003
loss: 1.0197426080703735,grad_norm: 0.9999991565593013, iteration: 446004
loss: 0.9706152677536011,grad_norm: 0.9999996645114992, iteration: 446005
loss: 0.992698609828949,grad_norm: 0.9269006414401448, iteration: 446006
loss: 0.9688534140586853,grad_norm: 0.7752063564299273, iteration: 446007
loss: 1.0288039445877075,grad_norm: 0.7076434339744669, iteration: 446008
loss: 0.9612496495246887,grad_norm: 0.8504509347236011, iteration: 446009
loss: 0.9978983402252197,grad_norm: 0.8132722473276346, iteration: 446010
loss: 0.9901452660560608,grad_norm: 0.7585236609845395, iteration: 446011
loss: 1.0724331140518188,grad_norm: 0.9999993718138429, iteration: 446012
loss: 0.9848710894584656,grad_norm: 0.6560694839026953, iteration: 446013
loss: 0.9824883937835693,grad_norm: 0.6908362499504901, iteration: 446014
loss: 1.0490927696228027,grad_norm: 0.9900526567152292, iteration: 446015
loss: 0.9912202954292297,grad_norm: 0.6380235335304855, iteration: 446016
loss: 1.011996865272522,grad_norm: 0.9999993652571071, iteration: 446017
loss: 0.9838621616363525,grad_norm: 0.6719774105078932, iteration: 446018
loss: 1.0104488134384155,grad_norm: 0.8308044943634577, iteration: 446019
loss: 0.9432130455970764,grad_norm: 0.8285562670198293, iteration: 446020
loss: 1.0607844591140747,grad_norm: 0.9999996400414287, iteration: 446021
loss: 0.9643437266349792,grad_norm: 0.7154408176572623, iteration: 446022
loss: 0.9918391108512878,grad_norm: 0.6998274634205666, iteration: 446023
loss: 1.0745148658752441,grad_norm: 0.71396023228563, iteration: 446024
loss: 0.9952889084815979,grad_norm: 0.7612061432579068, iteration: 446025
loss: 1.0026178359985352,grad_norm: 0.7255230295744486, iteration: 446026
loss: 1.0422829389572144,grad_norm: 0.9999996303634239, iteration: 446027
loss: 0.9834727644920349,grad_norm: 0.7989541286173346, iteration: 446028
loss: 0.9866994023323059,grad_norm: 0.6517094916916579, iteration: 446029
loss: 1.0376533269882202,grad_norm: 0.9019130393986352, iteration: 446030
loss: 1.0263344049453735,grad_norm: 0.9999992839268744, iteration: 446031
loss: 1.030347466468811,grad_norm: 0.8782321319735815, iteration: 446032
loss: 1.0112842321395874,grad_norm: 0.9999990590060139, iteration: 446033
loss: 0.9953792691230774,grad_norm: 0.7919082669538088, iteration: 446034
loss: 1.012831449508667,grad_norm: 0.9999993334719756, iteration: 446035
loss: 1.0055732727050781,grad_norm: 0.7597688861395746, iteration: 446036
loss: 1.0113052129745483,grad_norm: 0.732726811393427, iteration: 446037
loss: 1.024424433708191,grad_norm: 0.758687753719699, iteration: 446038
loss: 0.9610921144485474,grad_norm: 0.9389504981131828, iteration: 446039
loss: 1.069726824760437,grad_norm: 0.8676362622131049, iteration: 446040
loss: 0.9683347940444946,grad_norm: 0.7976886680830102, iteration: 446041
loss: 0.9965795278549194,grad_norm: 0.9245487565859192, iteration: 446042
loss: 1.0399972200393677,grad_norm: 0.7466337759270343, iteration: 446043
loss: 1.0094845294952393,grad_norm: 0.9216382472872392, iteration: 446044
loss: 1.0126967430114746,grad_norm: 0.7832034437074985, iteration: 446045
loss: 1.0618689060211182,grad_norm: 0.9999993815413172, iteration: 446046
loss: 1.0013107061386108,grad_norm: 0.75224362266424, iteration: 446047
loss: 1.0067107677459717,grad_norm: 0.7015292939096097, iteration: 446048
loss: 1.073901653289795,grad_norm: 0.7980914516578248, iteration: 446049
loss: 1.0166592597961426,grad_norm: 0.6010635940221948, iteration: 446050
loss: 1.0009113550186157,grad_norm: 0.8466406936607399, iteration: 446051
loss: 0.9794148802757263,grad_norm: 0.775390419524931, iteration: 446052
loss: 0.9974803924560547,grad_norm: 0.5768588762547077, iteration: 446053
loss: 0.98602294921875,grad_norm: 0.7440226743734962, iteration: 446054
loss: 0.972345769405365,grad_norm: 0.8581707271011088, iteration: 446055
loss: 0.9666919112205505,grad_norm: 0.8552145518094939, iteration: 446056
loss: 0.9925819635391235,grad_norm: 0.8268750140125992, iteration: 446057
loss: 1.0173377990722656,grad_norm: 0.7437092887498249, iteration: 446058
loss: 1.0494824647903442,grad_norm: 0.888542038724719, iteration: 446059
loss: 0.9829035401344299,grad_norm: 0.999999096787226, iteration: 446060
loss: 0.9726749658584595,grad_norm: 0.7624886316052956, iteration: 446061
loss: 0.982292890548706,grad_norm: 0.7721354173213613, iteration: 446062
loss: 1.0815354585647583,grad_norm: 0.9999993743581557, iteration: 446063
loss: 1.0065470933914185,grad_norm: 0.7965914411093872, iteration: 446064
loss: 1.0197421312332153,grad_norm: 0.9999997688770511, iteration: 446065
loss: 0.971038818359375,grad_norm: 0.8626543253345669, iteration: 446066
loss: 1.0138647556304932,grad_norm: 0.9999997518191979, iteration: 446067
loss: 0.9705486297607422,grad_norm: 0.8986530135414789, iteration: 446068
loss: 1.0454086065292358,grad_norm: 0.9999998919007068, iteration: 446069
loss: 1.0261586904525757,grad_norm: 0.7153443361335946, iteration: 446070
loss: 0.9745103716850281,grad_norm: 0.5757602784247107, iteration: 446071
loss: 1.044967532157898,grad_norm: 0.9999999374765947, iteration: 446072
loss: 1.001662015914917,grad_norm: 0.580866681949752, iteration: 446073
loss: 1.0418270826339722,grad_norm: 0.8670646179883618, iteration: 446074
loss: 1.0491430759429932,grad_norm: 0.7687353619716154, iteration: 446075
loss: 1.0851086378097534,grad_norm: 0.808244562319557, iteration: 446076
loss: 1.0343378782272339,grad_norm: 0.9337727583272186, iteration: 446077
loss: 0.9980601072311401,grad_norm: 0.8069239650551113, iteration: 446078
loss: 1.0223431587219238,grad_norm: 0.887441004779482, iteration: 446079
loss: 0.991402268409729,grad_norm: 0.7328149716630297, iteration: 446080
loss: 0.9618886113166809,grad_norm: 0.681625888230029, iteration: 446081
loss: 1.0041149854660034,grad_norm: 0.8208847888998012, iteration: 446082
loss: 1.137795090675354,grad_norm: 0.8527750554531088, iteration: 446083
loss: 0.9828224182128906,grad_norm: 0.8937567725418831, iteration: 446084
loss: 0.9960325360298157,grad_norm: 0.8501609907523684, iteration: 446085
loss: 1.008938193321228,grad_norm: 0.6701864406267488, iteration: 446086
loss: 0.9775143265724182,grad_norm: 0.876174235114057, iteration: 446087
loss: 0.9862084984779358,grad_norm: 0.8120563884807417, iteration: 446088
loss: 0.9788861274719238,grad_norm: 0.9684840534631537, iteration: 446089
loss: 1.022400140762329,grad_norm: 0.6635864542014577, iteration: 446090
loss: 0.9731945991516113,grad_norm: 0.7849821971191759, iteration: 446091
loss: 0.9859327673912048,grad_norm: 0.8594821439750555, iteration: 446092
loss: 1.0379860401153564,grad_norm: 0.7513627480663958, iteration: 446093
loss: 0.9937344193458557,grad_norm: 0.7250645743719428, iteration: 446094
loss: 1.0463829040527344,grad_norm: 0.8460995435562688, iteration: 446095
loss: 1.0050380229949951,grad_norm: 0.8478173449364379, iteration: 446096
loss: 0.9627551436424255,grad_norm: 0.7308534016382364, iteration: 446097
loss: 1.0169458389282227,grad_norm: 0.7419516560955592, iteration: 446098
loss: 0.9605039954185486,grad_norm: 0.7052144865292315, iteration: 446099
loss: 0.9980729818344116,grad_norm: 0.7114530628219813, iteration: 446100
loss: 0.9970625042915344,grad_norm: 0.7648593622423926, iteration: 446101
loss: 1.0050700902938843,grad_norm: 0.7901424233071229, iteration: 446102
loss: 1.0110771656036377,grad_norm: 0.9999992055772761, iteration: 446103
loss: 0.9657441973686218,grad_norm: 0.7817180075461951, iteration: 446104
loss: 0.9998236298561096,grad_norm: 0.7249386479010417, iteration: 446105
loss: 1.0083355903625488,grad_norm: 0.6785774574285817, iteration: 446106
loss: 1.006914734840393,grad_norm: 0.8897748139815442, iteration: 446107
loss: 1.0014771223068237,grad_norm: 0.7624316342377699, iteration: 446108
loss: 1.043848991394043,grad_norm: 0.7465868370297207, iteration: 446109
loss: 1.0116209983825684,grad_norm: 0.7429805216489548, iteration: 446110
loss: 0.9587749242782593,grad_norm: 0.7625829981546519, iteration: 446111
loss: 0.9834529161453247,grad_norm: 0.8471876333070668, iteration: 446112
loss: 1.06979501247406,grad_norm: 0.8022135431514382, iteration: 446113
loss: 0.9989768862724304,grad_norm: 0.9999996446850247, iteration: 446114
loss: 1.0489532947540283,grad_norm: 0.9383963754677638, iteration: 446115
loss: 0.9607889652252197,grad_norm: 0.8064543266570605, iteration: 446116
loss: 0.9557355642318726,grad_norm: 0.7282586769413577, iteration: 446117
loss: 1.0213059186935425,grad_norm: 0.7570001468343479, iteration: 446118
loss: 0.9895991086959839,grad_norm: 0.7641092281128183, iteration: 446119
loss: 0.9708768129348755,grad_norm: 0.809083747776321, iteration: 446120
loss: 1.000821828842163,grad_norm: 0.8694227343282982, iteration: 446121
loss: 0.9959452748298645,grad_norm: 0.6572683794261789, iteration: 446122
loss: 1.0072152614593506,grad_norm: 0.6920210397215777, iteration: 446123
loss: 0.9633373618125916,grad_norm: 0.7080837598531877, iteration: 446124
loss: 1.0745753049850464,grad_norm: 0.8923873087160481, iteration: 446125
loss: 0.9743295311927795,grad_norm: 0.7074111627298688, iteration: 446126
loss: 1.0201197862625122,grad_norm: 0.8144662178521124, iteration: 446127
loss: 0.9687362313270569,grad_norm: 0.7101138270596551, iteration: 446128
loss: 0.9955406785011292,grad_norm: 0.6651973195340245, iteration: 446129
loss: 0.9564774632453918,grad_norm: 0.7930026761866622, iteration: 446130
loss: 0.9818115234375,grad_norm: 0.724603729978449, iteration: 446131
loss: 1.0153188705444336,grad_norm: 0.8485047465053581, iteration: 446132
loss: 1.076816201210022,grad_norm: 0.9967594654769326, iteration: 446133
loss: 1.046047329902649,grad_norm: 0.8072741625105835, iteration: 446134
loss: 1.0165725946426392,grad_norm: 0.9999992983684363, iteration: 446135
loss: 0.9885556101799011,grad_norm: 0.9596080734678569, iteration: 446136
loss: 0.9904203414916992,grad_norm: 0.7985779039728066, iteration: 446137
loss: 1.0041333436965942,grad_norm: 0.8925993897461464, iteration: 446138
loss: 0.9882720708847046,grad_norm: 0.7347010943587337, iteration: 446139
loss: 1.0383977890014648,grad_norm: 0.7814834449022094, iteration: 446140
loss: 1.0116604566574097,grad_norm: 0.9999993191947396, iteration: 446141
loss: 1.0012365579605103,grad_norm: 0.695345940435883, iteration: 446142
loss: 0.9926347136497498,grad_norm: 0.9999992008707751, iteration: 446143
loss: 1.0836639404296875,grad_norm: 0.7405673196411131, iteration: 446144
loss: 1.0270928144454956,grad_norm: 0.9999998171937348, iteration: 446145
loss: 1.0098260641098022,grad_norm: 0.7738258984737524, iteration: 446146
loss: 1.051039695739746,grad_norm: 0.7581983439163813, iteration: 446147
loss: 0.9493632912635803,grad_norm: 0.6953251360195077, iteration: 446148
loss: 1.0396175384521484,grad_norm: 0.7655915485360998, iteration: 446149
loss: 0.9883759021759033,grad_norm: 0.7447363337990542, iteration: 446150
loss: 0.9792996644973755,grad_norm: 0.7507842746877692, iteration: 446151
loss: 1.0060584545135498,grad_norm: 0.9165437162584125, iteration: 446152
loss: 1.0227694511413574,grad_norm: 0.9682933305545349, iteration: 446153
loss: 1.0028259754180908,grad_norm: 0.7550247646712368, iteration: 446154
loss: 1.0035659074783325,grad_norm: 0.9999990558520978, iteration: 446155
loss: 0.9826040267944336,grad_norm: 0.6439037203532016, iteration: 446156
loss: 1.0027124881744385,grad_norm: 0.8562655925050122, iteration: 446157
loss: 1.08661687374115,grad_norm: 0.7931679285032267, iteration: 446158
loss: 1.0670514106750488,grad_norm: 0.9999994011354538, iteration: 446159
loss: 0.9877988696098328,grad_norm: 0.7526719240377947, iteration: 446160
loss: 1.0638445615768433,grad_norm: 0.77785312726018, iteration: 446161
loss: 0.9558016061782837,grad_norm: 0.812310742805685, iteration: 446162
loss: 0.9899197816848755,grad_norm: 0.9253281035993638, iteration: 446163
loss: 1.0015358924865723,grad_norm: 0.9999992767831452, iteration: 446164
loss: 0.9807170033454895,grad_norm: 0.7890252980210313, iteration: 446165
loss: 0.9789037704467773,grad_norm: 0.8834098189519222, iteration: 446166
loss: 0.9600275754928589,grad_norm: 0.7916388536890122, iteration: 446167
loss: 1.0117084980010986,grad_norm: 0.7316804783213497, iteration: 446168
loss: 0.9769976139068604,grad_norm: 0.7886655760967296, iteration: 446169
loss: 0.9755003452301025,grad_norm: 0.6329985042450118, iteration: 446170
loss: 0.9701715707778931,grad_norm: 0.6603152049442459, iteration: 446171
loss: 1.0375896692276,grad_norm: 0.9999998108942564, iteration: 446172
loss: 1.0372799634933472,grad_norm: 0.8954112828966821, iteration: 446173
loss: 1.0019686222076416,grad_norm: 0.794449136755628, iteration: 446174
loss: 0.9801194071769714,grad_norm: 0.9999991484467993, iteration: 446175
loss: 1.0251786708831787,grad_norm: 0.7737608872339742, iteration: 446176
loss: 1.0148905515670776,grad_norm: 0.8075902763199597, iteration: 446177
loss: 0.9924221634864807,grad_norm: 0.6642684997533093, iteration: 446178
loss: 0.9868528842926025,grad_norm: 0.7664315742855693, iteration: 446179
loss: 1.0140256881713867,grad_norm: 0.6774963639353231, iteration: 446180
loss: 1.013526439666748,grad_norm: 0.6918987532580421, iteration: 446181
loss: 1.0180351734161377,grad_norm: 0.8592572998967938, iteration: 446182
loss: 1.0676742792129517,grad_norm: 0.7749913325255116, iteration: 446183
loss: 1.015928864479065,grad_norm: 0.9507922470912075, iteration: 446184
loss: 1.0424014329910278,grad_norm: 0.9999992135829152, iteration: 446185
loss: 1.004021406173706,grad_norm: 0.6449851288204159, iteration: 446186
loss: 1.0057319402694702,grad_norm: 0.9999992662184477, iteration: 446187
loss: 0.970867931842804,grad_norm: 0.8205755599302978, iteration: 446188
loss: 1.0209554433822632,grad_norm: 0.6101688004088832, iteration: 446189
loss: 0.9958274960517883,grad_norm: 0.8416558191852898, iteration: 446190
loss: 0.984188437461853,grad_norm: 0.7893170484491748, iteration: 446191
loss: 0.9815736413002014,grad_norm: 0.633783084874022, iteration: 446192
loss: 1.0223287343978882,grad_norm: 0.9999992322353832, iteration: 446193
loss: 1.025771975517273,grad_norm: 0.6394801857596693, iteration: 446194
loss: 1.0868594646453857,grad_norm: 0.999999087848779, iteration: 446195
loss: 0.9748730659484863,grad_norm: 0.7563174885929552, iteration: 446196
loss: 1.0261892080307007,grad_norm: 0.8571037870123621, iteration: 446197
loss: 0.9790181517601013,grad_norm: 0.7389867308953408, iteration: 446198
loss: 0.966562032699585,grad_norm: 0.7537698443540198, iteration: 446199
loss: 0.9891335964202881,grad_norm: 0.9931215453001302, iteration: 446200
loss: 1.0151766538619995,grad_norm: 0.9999993426389456, iteration: 446201
loss: 1.015312910079956,grad_norm: 0.7032081797702863, iteration: 446202
loss: 1.0042260885238647,grad_norm: 0.877799736740305, iteration: 446203
loss: 1.0173356533050537,grad_norm: 0.701144637590816, iteration: 446204
loss: 1.0744134187698364,grad_norm: 0.9999995410867851, iteration: 446205
loss: 0.990909218788147,grad_norm: 0.7868094857030666, iteration: 446206
loss: 0.9806509017944336,grad_norm: 0.7160357059188429, iteration: 446207
loss: 1.0621522665023804,grad_norm: 0.9999999345617644, iteration: 446208
loss: 1.008165717124939,grad_norm: 0.7435068095667107, iteration: 446209
loss: 1.0414284467697144,grad_norm: 0.9276917509999091, iteration: 446210
loss: 1.0192385911941528,grad_norm: 0.659893294590362, iteration: 446211
loss: 1.071210265159607,grad_norm: 0.9046830429848646, iteration: 446212
loss: 0.9986071586608887,grad_norm: 0.8639214317608538, iteration: 446213
loss: 0.974784255027771,grad_norm: 0.7912234204888231, iteration: 446214
loss: 1.094375729560852,grad_norm: 0.999999929931224, iteration: 446215
loss: 1.0057642459869385,grad_norm: 0.881855612454528, iteration: 446216
loss: 1.0137653350830078,grad_norm: 0.9999996445706764, iteration: 446217
loss: 1.0614594221115112,grad_norm: 0.9999994321742424, iteration: 446218
loss: 1.0413058996200562,grad_norm: 0.8923258861390935, iteration: 446219
loss: 1.0580542087554932,grad_norm: 0.7795533171097668, iteration: 446220
loss: 0.986491858959198,grad_norm: 0.771078051682863, iteration: 446221
loss: 1.0110481977462769,grad_norm: 0.7515814573105968, iteration: 446222
loss: 1.0121253728866577,grad_norm: 0.7759200008512922, iteration: 446223
loss: 1.03390371799469,grad_norm: 0.8327171678141344, iteration: 446224
loss: 1.0430625677108765,grad_norm: 0.9999990672870936, iteration: 446225
loss: 1.037156343460083,grad_norm: 0.9307178449273207, iteration: 446226
loss: 0.9867270588874817,grad_norm: 0.9999991080462024, iteration: 446227
loss: 0.999762773513794,grad_norm: 0.6529493864930845, iteration: 446228
loss: 0.9928916692733765,grad_norm: 0.7217536193319911, iteration: 446229
loss: 0.9957007765769958,grad_norm: 0.7462814218906028, iteration: 446230
loss: 0.950569212436676,grad_norm: 0.7694066924382205, iteration: 446231
loss: 1.0052680969238281,grad_norm: 0.7522938010133592, iteration: 446232
loss: 0.995805561542511,grad_norm: 0.7283016283048348, iteration: 446233
loss: 1.0144282579421997,grad_norm: 0.9411542777781061, iteration: 446234
loss: 1.0232460498809814,grad_norm: 0.745364222235976, iteration: 446235
loss: 1.0143104791641235,grad_norm: 0.8482461621710118, iteration: 446236
loss: 1.0219532251358032,grad_norm: 0.6356672274343934, iteration: 446237
loss: 1.0482732057571411,grad_norm: 0.8784305142598249, iteration: 446238
loss: 0.9910640716552734,grad_norm: 0.7077573194328443, iteration: 446239
loss: 1.006030559539795,grad_norm: 0.9999994807875984, iteration: 446240
loss: 1.047742486000061,grad_norm: 0.999999986942705, iteration: 446241
loss: 1.0056196451187134,grad_norm: 0.6324892481403869, iteration: 446242
loss: 0.9986466765403748,grad_norm: 0.800834317554378, iteration: 446243
loss: 1.0164119005203247,grad_norm: 0.706860820158276, iteration: 446244
loss: 1.0187371969223022,grad_norm: 0.8511871237723613, iteration: 446245
loss: 1.048940896987915,grad_norm: 0.999999754363767, iteration: 446246
loss: 1.0227737426757812,grad_norm: 0.8681884186513174, iteration: 446247
loss: 1.0387914180755615,grad_norm: 0.9956638393641242, iteration: 446248
loss: 1.0376245975494385,grad_norm: 0.9348589923296481, iteration: 446249
loss: 0.9923533201217651,grad_norm: 0.6959109319555282, iteration: 446250
loss: 1.0159939527511597,grad_norm: 0.7702944902518097, iteration: 446251
loss: 1.0016553401947021,grad_norm: 0.6517467931442577, iteration: 446252
loss: 0.9938613772392273,grad_norm: 0.7436535600941093, iteration: 446253
loss: 1.010694980621338,grad_norm: 0.9074180567437518, iteration: 446254
loss: 0.9935397505760193,grad_norm: 0.7511776978957623, iteration: 446255
loss: 0.9838754534721375,grad_norm: 0.7065917434821903, iteration: 446256
loss: 1.0154824256896973,grad_norm: 0.8136023847373306, iteration: 446257
loss: 1.0060131549835205,grad_norm: 0.9999990833634606, iteration: 446258
loss: 0.9958453178405762,grad_norm: 0.9999991387424612, iteration: 446259
loss: 1.0061224699020386,grad_norm: 0.9523459930061158, iteration: 446260
loss: 1.3128679990768433,grad_norm: 0.9999997218432184, iteration: 446261
loss: 0.9781423807144165,grad_norm: 0.7559721413240865, iteration: 446262
loss: 1.0700911283493042,grad_norm: 0.999999191371023, iteration: 446263
loss: 1.0521878004074097,grad_norm: 0.8341245985513441, iteration: 446264
loss: 1.0207927227020264,grad_norm: 0.7645136484538195, iteration: 446265
loss: 1.0257710218429565,grad_norm: 0.7557234418801914, iteration: 446266
loss: 0.968370258808136,grad_norm: 0.7488176774617441, iteration: 446267
loss: 0.9915220141410828,grad_norm: 0.8174493786867484, iteration: 446268
loss: 1.0369315147399902,grad_norm: 0.7141679311890159, iteration: 446269
loss: 1.1211117506027222,grad_norm: 0.9999995514199691, iteration: 446270
loss: 0.9803940057754517,grad_norm: 0.9999991615968788, iteration: 446271
loss: 0.9770421385765076,grad_norm: 0.7194324937957569, iteration: 446272
loss: 1.0563405752182007,grad_norm: 0.7502787165338446, iteration: 446273
loss: 1.0318140983581543,grad_norm: 0.8536732528968792, iteration: 446274
loss: 0.9861144423484802,grad_norm: 0.804526528027661, iteration: 446275
loss: 1.038880467414856,grad_norm: 0.9999998836772478, iteration: 446276
loss: 1.0851401090621948,grad_norm: 0.999999482503578, iteration: 446277
loss: 1.0464603900909424,grad_norm: 0.9999998597080617, iteration: 446278
loss: 1.0709667205810547,grad_norm: 0.9999997818648785, iteration: 446279
loss: 1.0328933000564575,grad_norm: 0.6498473637377659, iteration: 446280
loss: 1.0133658647537231,grad_norm: 0.9999990027835441, iteration: 446281
loss: 1.1008414030075073,grad_norm: 0.9759299130092671, iteration: 446282
loss: 0.9913756847381592,grad_norm: 0.8757440289635942, iteration: 446283
loss: 0.9719721674919128,grad_norm: 0.7362818052883823, iteration: 446284
loss: 1.0029327869415283,grad_norm: 0.9999990196951279, iteration: 446285
loss: 1.0048325061798096,grad_norm: 0.8019597940428894, iteration: 446286
loss: 1.0166171789169312,grad_norm: 0.9999995450823836, iteration: 446287
loss: 0.9989572763442993,grad_norm: 0.9601968882245178, iteration: 446288
loss: 1.0352160930633545,grad_norm: 0.8522183391514917, iteration: 446289
loss: 1.011467456817627,grad_norm: 0.9999990968836963, iteration: 446290
loss: 1.0111931562423706,grad_norm: 0.9999991113151948, iteration: 446291
loss: 1.1523593664169312,grad_norm: 0.9999996853804596, iteration: 446292
loss: 1.1276031732559204,grad_norm: 0.9999997863648766, iteration: 446293
loss: 0.9918490052223206,grad_norm: 0.8529959105299609, iteration: 446294
loss: 1.0199604034423828,grad_norm: 0.9999999247408796, iteration: 446295
loss: 1.0319054126739502,grad_norm: 0.9780537791386652, iteration: 446296
loss: 1.148785948753357,grad_norm: 0.9433626619326007, iteration: 446297
loss: 0.9762389063835144,grad_norm: 0.8923958186343031, iteration: 446298
loss: 0.992484986782074,grad_norm: 0.8055680339581475, iteration: 446299
loss: 0.9883679747581482,grad_norm: 0.9834647498318623, iteration: 446300
loss: 0.993003249168396,grad_norm: 0.7211271910651038, iteration: 446301
loss: 1.0421111583709717,grad_norm: 0.9999990004538262, iteration: 446302
loss: 0.9948117733001709,grad_norm: 0.7672480216240419, iteration: 446303
loss: 1.0283302068710327,grad_norm: 0.9999994795770197, iteration: 446304
loss: 0.9821248650550842,grad_norm: 0.7920101671338862, iteration: 446305
loss: 1.0134069919586182,grad_norm: 0.8253227744856118, iteration: 446306
loss: 1.0051183700561523,grad_norm: 0.7681288466577851, iteration: 446307
loss: 0.9952415227890015,grad_norm: 0.9999990295447668, iteration: 446308
loss: 1.0085796117782593,grad_norm: 0.9999995524818169, iteration: 446309
loss: 0.9972613453865051,grad_norm: 0.8252256938986563, iteration: 446310
loss: 1.0165002346038818,grad_norm: 1.000000091825012, iteration: 446311
loss: 1.0413711071014404,grad_norm: 0.6784587921864691, iteration: 446312
loss: 1.0737404823303223,grad_norm: 0.9999999405974769, iteration: 446313
loss: 1.001266360282898,grad_norm: 0.9999992446978088, iteration: 446314
loss: 0.9627240300178528,grad_norm: 0.9260640612992606, iteration: 446315
loss: 0.9701547622680664,grad_norm: 0.6966071602078671, iteration: 446316
loss: 0.9808148741722107,grad_norm: 0.749830396200351, iteration: 446317
loss: 1.0069050788879395,grad_norm: 0.9999994984629198, iteration: 446318
loss: 1.0628384351730347,grad_norm: 0.9999999350231418, iteration: 446319
loss: 1.1243603229522705,grad_norm: 0.8303052719328795, iteration: 446320
loss: 1.0102756023406982,grad_norm: 0.9987677729073208, iteration: 446321
loss: 0.976040244102478,grad_norm: 0.8356821749248243, iteration: 446322
loss: 0.9913912415504456,grad_norm: 0.8875474431174476, iteration: 446323
loss: 1.0325309038162231,grad_norm: 0.7960678920517978, iteration: 446324
loss: 0.997195839881897,grad_norm: 0.9999990084275098, iteration: 446325
loss: 0.987594485282898,grad_norm: 0.8146840656853139, iteration: 446326
loss: 1.0637377500534058,grad_norm: 0.9999997358392378, iteration: 446327
loss: 1.0067814588546753,grad_norm: 0.8186472457620484, iteration: 446328
loss: 1.0045334100723267,grad_norm: 0.7656129283428224, iteration: 446329
loss: 1.0276988744735718,grad_norm: 0.7552646515386064, iteration: 446330
loss: 1.2823591232299805,grad_norm: 0.999999729991134, iteration: 446331
loss: 1.0865424871444702,grad_norm: 0.9999998176077268, iteration: 446332
loss: 0.9955851435661316,grad_norm: 0.9374125708300826, iteration: 446333
loss: 0.968023955821991,grad_norm: 0.751118728887282, iteration: 446334
loss: 1.052521824836731,grad_norm: 0.7418139301788739, iteration: 446335
loss: 1.0897690057754517,grad_norm: 0.8795531449924331, iteration: 446336
loss: 1.0029594898223877,grad_norm: 0.8201424027681538, iteration: 446337
loss: 0.9803805351257324,grad_norm: 0.8751819440054743, iteration: 446338
loss: 1.0022717714309692,grad_norm: 0.7472489703776752, iteration: 446339
loss: 0.9832594394683838,grad_norm: 0.6627752320052603, iteration: 446340
loss: 1.0143574476242065,grad_norm: 0.8468606339995495, iteration: 446341
loss: 1.082470178604126,grad_norm: 0.9748431403915105, iteration: 446342
loss: 1.0492539405822754,grad_norm: 0.9999994424372931, iteration: 446343
loss: 1.0399593114852905,grad_norm: 0.741250144563487, iteration: 446344
loss: 0.9684157967567444,grad_norm: 0.7850035816568741, iteration: 446345
loss: 1.0148378610610962,grad_norm: 0.8370734993675276, iteration: 446346
loss: 1.0357340574264526,grad_norm: 0.7943687888223856, iteration: 446347
loss: 1.0046496391296387,grad_norm: 0.6526138848322433, iteration: 446348
loss: 1.0536344051361084,grad_norm: 0.9999999132065427, iteration: 446349
loss: 1.0036637783050537,grad_norm: 0.7988000895886382, iteration: 446350
loss: 1.0089044570922852,grad_norm: 0.7954438350842528, iteration: 446351
loss: 0.9886438250541687,grad_norm: 0.8991703834769469, iteration: 446352
loss: 1.0819809436798096,grad_norm: 0.9999992918956684, iteration: 446353
loss: 1.004006028175354,grad_norm: 0.788660427948963, iteration: 446354
loss: 0.9673001766204834,grad_norm: 0.9999991745542649, iteration: 446355
loss: 0.9703338742256165,grad_norm: 0.7246508827369135, iteration: 446356
loss: 1.0434602499008179,grad_norm: 0.7666274392235315, iteration: 446357
loss: 0.9737660884857178,grad_norm: 0.6759262539135763, iteration: 446358
loss: 1.0086710453033447,grad_norm: 0.7144231756657801, iteration: 446359
loss: 0.9935621619224548,grad_norm: 0.7405753181513216, iteration: 446360
loss: 0.9778681397438049,grad_norm: 0.8187194569138279, iteration: 446361
loss: 1.0948424339294434,grad_norm: 0.9999993884969225, iteration: 446362
loss: 0.9807006120681763,grad_norm: 0.8680365611415441, iteration: 446363
loss: 0.9698194861412048,grad_norm: 0.9999995126420239, iteration: 446364
loss: 1.1178711652755737,grad_norm: 0.9999993421369757, iteration: 446365
loss: 1.0579110383987427,grad_norm: 0.9178429537014232, iteration: 446366
loss: 0.983668327331543,grad_norm: 0.8693775276481436, iteration: 446367
loss: 1.0067228078842163,grad_norm: 0.7365121032990956, iteration: 446368
loss: 1.0233287811279297,grad_norm: 0.999999410382726, iteration: 446369
loss: 0.9959002733230591,grad_norm: 0.7344029324577012, iteration: 446370
loss: 0.9872665405273438,grad_norm: 0.839421267684495, iteration: 446371
loss: 1.1040855646133423,grad_norm: 0.9999994811422832, iteration: 446372
loss: 0.9920998215675354,grad_norm: 0.696327873853844, iteration: 446373
loss: 1.0289556980133057,grad_norm: 0.727569177350279, iteration: 446374
loss: 0.969373345375061,grad_norm: 0.999999471356384, iteration: 446375
loss: 0.9854316115379333,grad_norm: 0.7594618807828507, iteration: 446376
loss: 0.970848560333252,grad_norm: 0.7065923925086961, iteration: 446377
loss: 0.9958522319793701,grad_norm: 0.839439848039929, iteration: 446378
loss: 0.9866509437561035,grad_norm: 0.762305795821956, iteration: 446379
loss: 1.0269801616668701,grad_norm: 0.8436779713106298, iteration: 446380
loss: 1.0008864402770996,grad_norm: 0.9999996591958011, iteration: 446381
loss: 1.041056752204895,grad_norm: 0.9684922181278321, iteration: 446382
loss: 0.9948557019233704,grad_norm: 0.7538493175941651, iteration: 446383
loss: 0.9811406135559082,grad_norm: 0.999999054835823, iteration: 446384
loss: 1.034054160118103,grad_norm: 0.9999993135961104, iteration: 446385
loss: 1.0284838676452637,grad_norm: 0.8126498028315818, iteration: 446386
loss: 1.0620770454406738,grad_norm: 0.9999998736035414, iteration: 446387
loss: 1.0610395669937134,grad_norm: 0.8022573095354284, iteration: 446388
loss: 1.0302648544311523,grad_norm: 0.7572474200659681, iteration: 446389
loss: 1.0012590885162354,grad_norm: 0.8572559064866221, iteration: 446390
loss: 1.0381635427474976,grad_norm: 0.7572366418512917, iteration: 446391
loss: 0.986750066280365,grad_norm: 0.6994895129134864, iteration: 446392
loss: 0.9729930758476257,grad_norm: 0.7846343725723076, iteration: 446393
loss: 0.9691635370254517,grad_norm: 0.7675456469858442, iteration: 446394
loss: 1.0149197578430176,grad_norm: 0.7930419799079113, iteration: 446395
loss: 1.0224202871322632,grad_norm: 0.7537615518883428, iteration: 446396
loss: 1.0350384712219238,grad_norm: 0.9999991908927278, iteration: 446397
loss: 1.044705867767334,grad_norm: 0.9999992449363934, iteration: 446398
loss: 1.019229531288147,grad_norm: 0.7188232978472239, iteration: 446399
loss: 0.9818535447120667,grad_norm: 0.7740892182197884, iteration: 446400
loss: 1.001835584640503,grad_norm: 0.6583517749911315, iteration: 446401
loss: 0.9993287920951843,grad_norm: 0.80331136426612, iteration: 446402
loss: 0.978888213634491,grad_norm: 0.8276952122473289, iteration: 446403
loss: 0.9789098501205444,grad_norm: 0.8918127029921069, iteration: 446404
loss: 1.136076807975769,grad_norm: 0.9999996254579384, iteration: 446405
loss: 0.9270704388618469,grad_norm: 0.7347484652975674, iteration: 446406
loss: 1.027145504951477,grad_norm: 0.8388843416262837, iteration: 446407
loss: 1.006453037261963,grad_norm: 0.7050672685413907, iteration: 446408
loss: 1.0533716678619385,grad_norm: 0.882122132633416, iteration: 446409
loss: 0.9940440058708191,grad_norm: 0.6872390109410663, iteration: 446410
loss: 1.0021809339523315,grad_norm: 0.7066401746436104, iteration: 446411
loss: 1.0493718385696411,grad_norm: 0.9999999052302851, iteration: 446412
loss: 0.9562689661979675,grad_norm: 0.6512032307120699, iteration: 446413
loss: 0.9777386784553528,grad_norm: 0.7901125432097068, iteration: 446414
loss: 1.016089677810669,grad_norm: 0.6688843652755209, iteration: 446415
loss: 0.9701736569404602,grad_norm: 0.7459650828365166, iteration: 446416
loss: 0.9972802400588989,grad_norm: 0.971693183773554, iteration: 446417
loss: 1.02117121219635,grad_norm: 0.7837794827411226, iteration: 446418
loss: 0.997653067111969,grad_norm: 0.7019209322948047, iteration: 446419
loss: 1.0085337162017822,grad_norm: 0.8555477261960196, iteration: 446420
loss: 1.085021734237671,grad_norm: 0.9999992890259649, iteration: 446421
loss: 0.9875319004058838,grad_norm: 0.9206802322067174, iteration: 446422
loss: 0.9504647254943848,grad_norm: 0.6720282675848043, iteration: 446423
loss: 1.0740748643875122,grad_norm: 0.9999990720864733, iteration: 446424
loss: 1.0710877180099487,grad_norm: 0.7991770552089551, iteration: 446425
loss: 0.9699570536613464,grad_norm: 0.7704628944924828, iteration: 446426
loss: 1.0181258916854858,grad_norm: 0.7413463015459322, iteration: 446427
loss: 1.0222748517990112,grad_norm: 0.6590047014804409, iteration: 446428
loss: 0.9742213487625122,grad_norm: 0.8262800651686436, iteration: 446429
loss: 1.0389394760131836,grad_norm: 0.7478722254797441, iteration: 446430
loss: 1.1974925994873047,grad_norm: 0.9999991060257563, iteration: 446431
loss: 1.0107660293579102,grad_norm: 0.672730493958381, iteration: 446432
loss: 1.029760718345642,grad_norm: 0.8035621063629051, iteration: 446433
loss: 1.0537893772125244,grad_norm: 0.9999997070706598, iteration: 446434
loss: 1.031247854232788,grad_norm: 0.8588125111947024, iteration: 446435
loss: 1.0032881498336792,grad_norm: 0.8124454199908663, iteration: 446436
loss: 1.05948007106781,grad_norm: 0.8712616205874603, iteration: 446437
loss: 1.0101208686828613,grad_norm: 0.8495257867472127, iteration: 446438
loss: 1.0078157186508179,grad_norm: 0.8604006757324553, iteration: 446439
loss: 1.0496587753295898,grad_norm: 0.9374747088108815, iteration: 446440
loss: 0.9890928268432617,grad_norm: 0.7472725455609784, iteration: 446441
loss: 1.0068551301956177,grad_norm: 0.744941713339196, iteration: 446442
loss: 0.9889450669288635,grad_norm: 0.7642098712916825, iteration: 446443
loss: 0.996753990650177,grad_norm: 0.6800567495611569, iteration: 446444
loss: 1.0112017393112183,grad_norm: 0.8367286831928542, iteration: 446445
loss: 0.9719696044921875,grad_norm: 0.9124335571304563, iteration: 446446
loss: 1.0229371786117554,grad_norm: 0.8375533116758117, iteration: 446447
loss: 1.0122047662734985,grad_norm: 0.706428658781566, iteration: 446448
loss: 1.0138293504714966,grad_norm: 0.6950377487973497, iteration: 446449
loss: 0.9860420823097229,grad_norm: 0.7771794580444026, iteration: 446450
loss: 1.0228687524795532,grad_norm: 0.999999862129687, iteration: 446451
loss: 0.9741992950439453,grad_norm: 0.7878677316508985, iteration: 446452
loss: 1.0028482675552368,grad_norm: 0.7662804280015942, iteration: 446453
loss: 0.97891765832901,grad_norm: 0.7767731688878617, iteration: 446454
loss: 0.9726261496543884,grad_norm: 0.6866523824606896, iteration: 446455
loss: 0.9981096982955933,grad_norm: 0.8852498221191818, iteration: 446456
loss: 0.9826974868774414,grad_norm: 0.7693649986884261, iteration: 446457
loss: 1.0168602466583252,grad_norm: 0.9999997967486355, iteration: 446458
loss: 1.0050771236419678,grad_norm: 0.7269257553541434, iteration: 446459
loss: 0.9950881600379944,grad_norm: 0.8196763965558064, iteration: 446460
loss: 0.9715395569801331,grad_norm: 0.7973356086969566, iteration: 446461
loss: 0.9780758619308472,grad_norm: 0.8767689197772444, iteration: 446462
loss: 1.0637224912643433,grad_norm: 0.8622937436189766, iteration: 446463
loss: 1.010310411453247,grad_norm: 0.6734814047396012, iteration: 446464
loss: 0.9871343374252319,grad_norm: 0.8010988276010907, iteration: 446465
loss: 0.9811909198760986,grad_norm: 0.7407328243866339, iteration: 446466
loss: 1.0013681650161743,grad_norm: 0.9999992786880562, iteration: 446467
loss: 1.0448882579803467,grad_norm: 0.9038433055280414, iteration: 446468
loss: 0.9491059184074402,grad_norm: 0.8119253708030125, iteration: 446469
loss: 1.0157393217086792,grad_norm: 0.9394631440775784, iteration: 446470
loss: 0.9873799085617065,grad_norm: 0.9999989887173845, iteration: 446471
loss: 1.0098958015441895,grad_norm: 0.7905436149036185, iteration: 446472
loss: 1.0121707916259766,grad_norm: 0.704199539639375, iteration: 446473
loss: 0.9918062090873718,grad_norm: 0.7922947383596294, iteration: 446474
loss: 1.0027382373809814,grad_norm: 0.7848010722893658, iteration: 446475
loss: 0.9946566820144653,grad_norm: 0.7522099679741235, iteration: 446476
loss: 0.9536560773849487,grad_norm: 0.7429699073342848, iteration: 446477
loss: 1.0203659534454346,grad_norm: 0.7601785516716097, iteration: 446478
loss: 0.9862962961196899,grad_norm: 0.7640078222767341, iteration: 446479
loss: 0.9757186770439148,grad_norm: 0.9257213175643849, iteration: 446480
loss: 1.0097922086715698,grad_norm: 0.8019870314455341, iteration: 446481
loss: 1.0444657802581787,grad_norm: 0.9999990707398826, iteration: 446482
loss: 1.0234380960464478,grad_norm: 0.8290159144735922, iteration: 446483
loss: 1.0079506635665894,grad_norm: 0.9999990281470056, iteration: 446484
loss: 0.9918813705444336,grad_norm: 0.9836316310756907, iteration: 446485
loss: 1.0350003242492676,grad_norm: 0.9999990997742826, iteration: 446486
loss: 0.9853693842887878,grad_norm: 0.628780409561724, iteration: 446487
loss: 1.115301489830017,grad_norm: 0.9999996961025485, iteration: 446488
loss: 1.013123869895935,grad_norm: 0.7011816645821032, iteration: 446489
loss: 1.021912932395935,grad_norm: 0.8245435237518685, iteration: 446490
loss: 1.0170745849609375,grad_norm: 0.8354044788514049, iteration: 446491
loss: 0.9914312958717346,grad_norm: 0.7081405225777097, iteration: 446492
loss: 0.9839991927146912,grad_norm: 0.8288076769964828, iteration: 446493
loss: 1.0295346975326538,grad_norm: 0.9749704187701179, iteration: 446494
loss: 1.0080757141113281,grad_norm: 0.7573109542465827, iteration: 446495
loss: 0.9921524524688721,grad_norm: 0.8462008627939565, iteration: 446496
loss: 0.955862820148468,grad_norm: 0.8028891863975597, iteration: 446497
loss: 0.9862263202667236,grad_norm: 0.8907837366754621, iteration: 446498
loss: 0.9500431418418884,grad_norm: 0.7551933144556366, iteration: 446499
loss: 0.9823622107505798,grad_norm: 0.7441396709198138, iteration: 446500
loss: 0.9593569040298462,grad_norm: 0.7674709434414951, iteration: 446501
loss: 1.022358775138855,grad_norm: 0.9435203093337852, iteration: 446502
loss: 0.9854403734207153,grad_norm: 0.7393425481190026, iteration: 446503
loss: 0.9702460765838623,grad_norm: 0.7614743730156449, iteration: 446504
loss: 1.0312714576721191,grad_norm: 0.9999999745131253, iteration: 446505
loss: 1.078197717666626,grad_norm: 0.9999993259038996, iteration: 446506
loss: 0.9946715235710144,grad_norm: 0.7054385338347786, iteration: 446507
loss: 1.008535385131836,grad_norm: 0.9999991374734917, iteration: 446508
loss: 0.9691585302352905,grad_norm: 0.7278794251482396, iteration: 446509
loss: 0.9915875196456909,grad_norm: 0.8808256708397703, iteration: 446510
loss: 1.040285587310791,grad_norm: 0.7842842991499984, iteration: 446511
loss: 0.9963669776916504,grad_norm: 0.8173660631774796, iteration: 446512
loss: 1.01430344581604,grad_norm: 0.8392408721151089, iteration: 446513
loss: 1.0643055438995361,grad_norm: 0.8332305756108181, iteration: 446514
loss: 0.9797644019126892,grad_norm: 0.7040992425517637, iteration: 446515
loss: 1.023937463760376,grad_norm: 0.873818233762286, iteration: 446516
loss: 0.9955326914787292,grad_norm: 0.8437762211574268, iteration: 446517
loss: 0.9693307876586914,grad_norm: 0.8131100782748761, iteration: 446518
loss: 0.9445488452911377,grad_norm: 0.7061458095451772, iteration: 446519
loss: 1.0076111555099487,grad_norm: 0.8178971248525324, iteration: 446520
loss: 1.0062087774276733,grad_norm: 0.7373095174524609, iteration: 446521
loss: 0.9913208484649658,grad_norm: 0.7552315807939419, iteration: 446522
loss: 1.0740599632263184,grad_norm: 0.7251313025795814, iteration: 446523
loss: 0.9894328713417053,grad_norm: 0.7069159688460133, iteration: 446524
loss: 0.9715434312820435,grad_norm: 0.9894881801742235, iteration: 446525
loss: 1.0283907651901245,grad_norm: 0.7679929767002451, iteration: 446526
loss: 0.9925656914710999,grad_norm: 0.7936917283744838, iteration: 446527
loss: 1.0001771450042725,grad_norm: 0.7798833545205207, iteration: 446528
loss: 1.0136563777923584,grad_norm: 0.8143522695151533, iteration: 446529
loss: 0.9654251337051392,grad_norm: 0.7464438198569503, iteration: 446530
loss: 0.9986046552658081,grad_norm: 0.8348549520761649, iteration: 446531
loss: 1.1033576726913452,grad_norm: 0.9999993295489558, iteration: 446532
loss: 1.0320186614990234,grad_norm: 0.9999996962304041, iteration: 446533
loss: 1.0393186807632446,grad_norm: 0.8979754347807898, iteration: 446534
loss: 1.0240507125854492,grad_norm: 0.807644050073511, iteration: 446535
loss: 1.0376144647598267,grad_norm: 0.8312377659958958, iteration: 446536
loss: 0.9600434899330139,grad_norm: 0.8365317142940724, iteration: 446537
loss: 0.9692368507385254,grad_norm: 0.7362010175316129, iteration: 446538
loss: 0.981630802154541,grad_norm: 0.9999992821364225, iteration: 446539
loss: 0.9915960431098938,grad_norm: 0.805454366219242, iteration: 446540
loss: 0.976543664932251,grad_norm: 0.6962391528939862, iteration: 446541
loss: 1.0243785381317139,grad_norm: 0.6052395180432744, iteration: 446542
loss: 1.030851125717163,grad_norm: 0.7350821171812861, iteration: 446543
loss: 0.9752587080001831,grad_norm: 0.7489491482511036, iteration: 446544
loss: 0.9933061599731445,grad_norm: 0.5820053281215403, iteration: 446545
loss: 0.9798746109008789,grad_norm: 0.7582828635402769, iteration: 446546
loss: 1.0020593404769897,grad_norm: 0.7489451643813024, iteration: 446547
loss: 1.0031200647354126,grad_norm: 0.7742107032076053, iteration: 446548
loss: 1.068697214126587,grad_norm: 0.9379063723331968, iteration: 446549
loss: 0.9950529336929321,grad_norm: 0.7358344689391725, iteration: 446550
loss: 1.0333735942840576,grad_norm: 0.7381878551182492, iteration: 446551
loss: 0.9652466177940369,grad_norm: 0.7337097653871911, iteration: 446552
loss: 0.9962151646614075,grad_norm: 0.8077716772181951, iteration: 446553
loss: 0.9809139966964722,grad_norm: 0.8368354674620366, iteration: 446554
loss: 1.0996227264404297,grad_norm: 0.9999995425460922, iteration: 446555
loss: 0.9968218207359314,grad_norm: 0.6475960180148628, iteration: 446556
loss: 1.1111869812011719,grad_norm: 0.9999996072053822, iteration: 446557
loss: 1.0107916593551636,grad_norm: 0.687103733418995, iteration: 446558
loss: 1.0460381507873535,grad_norm: 0.9191129064083705, iteration: 446559
loss: 0.9714142084121704,grad_norm: 0.7446115584214502, iteration: 446560
loss: 1.0175442695617676,grad_norm: 0.9336229564346309, iteration: 446561
loss: 1.0138583183288574,grad_norm: 0.7279890045674919, iteration: 446562
loss: 1.0291693210601807,grad_norm: 0.8506004623280913, iteration: 446563
loss: 1.0021183490753174,grad_norm: 0.8101597969079555, iteration: 446564
loss: 0.9516974687576294,grad_norm: 0.8681144344806156, iteration: 446565
loss: 1.0397443771362305,grad_norm: 0.999999677875735, iteration: 446566
loss: 0.9765745401382446,grad_norm: 0.7407225929834276, iteration: 446567
loss: 0.9556334018707275,grad_norm: 0.8925125480966727, iteration: 446568
loss: 1.0033825635910034,grad_norm: 0.8324312663390852, iteration: 446569
loss: 0.9988623261451721,grad_norm: 0.7786368417698525, iteration: 446570
loss: 1.0170674324035645,grad_norm: 0.8951042571457619, iteration: 446571
loss: 1.0000300407409668,grad_norm: 0.7436095735325653, iteration: 446572
loss: 1.0134528875350952,grad_norm: 0.7151257838007611, iteration: 446573
loss: 0.9794902801513672,grad_norm: 0.9999996655935531, iteration: 446574
loss: 1.000644564628601,grad_norm: 0.7591749869834922, iteration: 446575
loss: 1.0063549280166626,grad_norm: 0.7860422727578803, iteration: 446576
loss: 0.9717658758163452,grad_norm: 0.9813327362644152, iteration: 446577
loss: 1.040106177330017,grad_norm: 0.9999994504251807, iteration: 446578
loss: 1.039559006690979,grad_norm: 0.7646973810199608, iteration: 446579
loss: 0.9488587379455566,grad_norm: 0.8398823934806399, iteration: 446580
loss: 1.048268437385559,grad_norm: 0.8111579216836691, iteration: 446581
loss: 0.9647733569145203,grad_norm: 0.8736891177075758, iteration: 446582
loss: 0.997336745262146,grad_norm: 0.8182414616358997, iteration: 446583
loss: 0.9939197897911072,grad_norm: 0.7646730851677078, iteration: 446584
loss: 0.9882882833480835,grad_norm: 0.6350018820479034, iteration: 446585
loss: 0.9540261626243591,grad_norm: 0.8445817873019357, iteration: 446586
loss: 0.9952290058135986,grad_norm: 0.7647144513697164, iteration: 446587
loss: 0.9999330043792725,grad_norm: 0.844521677490135, iteration: 446588
loss: 1.026427149772644,grad_norm: 0.7783577969706565, iteration: 446589
loss: 0.9532628655433655,grad_norm: 0.7907746699012277, iteration: 446590
loss: 1.0002344846725464,grad_norm: 0.8127285711041763, iteration: 446591
loss: 0.9689566493034363,grad_norm: 0.877624085838885, iteration: 446592
loss: 1.0295246839523315,grad_norm: 0.8160284152246456, iteration: 446593
loss: 0.9876444339752197,grad_norm: 0.8749135619175413, iteration: 446594
loss: 0.9734810590744019,grad_norm: 0.7799513817383738, iteration: 446595
loss: 0.9816354513168335,grad_norm: 0.7408039279037599, iteration: 446596
loss: 0.9895836710929871,grad_norm: 0.7694898783988271, iteration: 446597
loss: 0.9903045892715454,grad_norm: 0.7930913020909888, iteration: 446598
loss: 0.9896634221076965,grad_norm: 0.8356236506595788, iteration: 446599
loss: 0.9810457229614258,grad_norm: 0.8004496784293264, iteration: 446600
loss: 0.9946457147598267,grad_norm: 0.7390223608511027, iteration: 446601
loss: 0.9741178750991821,grad_norm: 0.8688374814481583, iteration: 446602
loss: 1.0568968057632446,grad_norm: 0.7897488087580818, iteration: 446603
loss: 1.0148192644119263,grad_norm: 0.780224077138949, iteration: 446604
loss: 0.9915617108345032,grad_norm: 0.8278050374064954, iteration: 446605
loss: 1.0612705945968628,grad_norm: 0.999999056237639, iteration: 446606
loss: 1.0302683115005493,grad_norm: 0.8055029175758837, iteration: 446607
loss: 0.986354410648346,grad_norm: 0.8197079579881847, iteration: 446608
loss: 0.9556538462638855,grad_norm: 0.734558563798068, iteration: 446609
loss: 0.9870866537094116,grad_norm: 0.6236829134503377, iteration: 446610
loss: 0.9775198698043823,grad_norm: 0.9173000593480937, iteration: 446611
loss: 1.0087199211120605,grad_norm: 0.665658578001004, iteration: 446612
loss: 1.0143671035766602,grad_norm: 0.7444471039262838, iteration: 446613
loss: 0.9922128915786743,grad_norm: 0.838543624379695, iteration: 446614
loss: 0.9946538209915161,grad_norm: 0.9858450822178689, iteration: 446615
loss: 1.0042178630828857,grad_norm: 0.6964987363524036, iteration: 446616
loss: 1.0078226327896118,grad_norm: 0.8077848562461829, iteration: 446617
loss: 0.96321040391922,grad_norm: 0.9999989841032269, iteration: 446618
loss: 1.0063230991363525,grad_norm: 0.7225142998476964, iteration: 446619
loss: 0.9692013263702393,grad_norm: 0.750691105021725, iteration: 446620
loss: 0.970260739326477,grad_norm: 0.9418629728105373, iteration: 446621
loss: 0.9957426190376282,grad_norm: 0.737346422022258, iteration: 446622
loss: 1.024536371231079,grad_norm: 0.8873188691768797, iteration: 446623
loss: 1.0304995775222778,grad_norm: 0.6814281571396166, iteration: 446624
loss: 0.9754586219787598,grad_norm: 0.8060404798236881, iteration: 446625
loss: 0.9951440095901489,grad_norm: 0.7658756967520934, iteration: 446626
loss: 1.0109537839889526,grad_norm: 0.7959315892818161, iteration: 446627
loss: 0.9816849827766418,grad_norm: 0.8935118347966838, iteration: 446628
loss: 0.9627852439880371,grad_norm: 0.7700058928261346, iteration: 446629
loss: 0.9726114273071289,grad_norm: 0.6862169121138311, iteration: 446630
loss: 1.0039252042770386,grad_norm: 0.7967893947525451, iteration: 446631
loss: 1.020711898803711,grad_norm: 0.8101687274999536, iteration: 446632
loss: 1.0047032833099365,grad_norm: 0.7316336662620009, iteration: 446633
loss: 0.9512283802032471,grad_norm: 0.8147471418259701, iteration: 446634
loss: 1.0180761814117432,grad_norm: 0.827376096737128, iteration: 446635
loss: 0.9659253358840942,grad_norm: 0.8586737016055748, iteration: 446636
loss: 1.0120689868927002,grad_norm: 0.7305417871262841, iteration: 446637
loss: 0.9924076795578003,grad_norm: 0.8996211680496721, iteration: 446638
loss: 0.981723427772522,grad_norm: 0.8544630685015365, iteration: 446639
loss: 1.058995008468628,grad_norm: 0.9999991921822522, iteration: 446640
loss: 1.016790747642517,grad_norm: 0.9999995446279921, iteration: 446641
loss: 0.9693652391433716,grad_norm: 0.6433353409799275, iteration: 446642
loss: 0.9929235577583313,grad_norm: 0.9551241280274644, iteration: 446643
loss: 0.9935683012008667,grad_norm: 0.7475463350984041, iteration: 446644
loss: 1.0121346712112427,grad_norm: 0.9995748024184296, iteration: 446645
loss: 1.0096744298934937,grad_norm: 0.8871349930191943, iteration: 446646
loss: 0.947997510433197,grad_norm: 0.820174252780829, iteration: 446647
loss: 1.0584263801574707,grad_norm: 0.7958264258959086, iteration: 446648
loss: 1.028560757637024,grad_norm: 0.7321709665732823, iteration: 446649
loss: 0.9810969829559326,grad_norm: 0.8144870028739487, iteration: 446650
loss: 0.9859715700149536,grad_norm: 0.8267826622532709, iteration: 446651
loss: 0.9938880205154419,grad_norm: 0.7313841371630155, iteration: 446652
loss: 0.9737144708633423,grad_norm: 0.7589571265060796, iteration: 446653
loss: 0.9971612095832825,grad_norm: 0.7349459443653514, iteration: 446654
loss: 1.0099883079528809,grad_norm: 0.6431095394746007, iteration: 446655
loss: 1.002477765083313,grad_norm: 0.7733185799696336, iteration: 446656
loss: 0.9766217470169067,grad_norm: 0.7602988762798233, iteration: 446657
loss: 1.0047072172164917,grad_norm: 0.8284858255787092, iteration: 446658
loss: 1.0366777181625366,grad_norm: 0.7401721489065767, iteration: 446659
loss: 1.0190523862838745,grad_norm: 0.8261357255169487, iteration: 446660
loss: 1.022417664527893,grad_norm: 0.7306153135511282, iteration: 446661
loss: 1.0431039333343506,grad_norm: 0.7594995874262146, iteration: 446662
loss: 1.0476645231246948,grad_norm: 0.8615104653273676, iteration: 446663
loss: 1.0779073238372803,grad_norm: 0.999999207938356, iteration: 446664
loss: 0.9895683526992798,grad_norm: 0.7409740779482828, iteration: 446665
loss: 1.0024086236953735,grad_norm: 0.6937104976197569, iteration: 446666
loss: 1.046489953994751,grad_norm: 0.8244101499401865, iteration: 446667
loss: 0.9745216369628906,grad_norm: 0.8116851427003957, iteration: 446668
loss: 1.017924189567566,grad_norm: 0.7235068425136972, iteration: 446669
loss: 0.9633597731590271,grad_norm: 0.7717240360149488, iteration: 446670
loss: 0.9676753282546997,grad_norm: 0.6363313847223668, iteration: 446671
loss: 1.0206313133239746,grad_norm: 0.7727842947122846, iteration: 446672
loss: 0.99990314245224,grad_norm: 0.8400097271539883, iteration: 446673
loss: 0.9898661375045776,grad_norm: 0.7383980616056546, iteration: 446674
loss: 1.0366185903549194,grad_norm: 0.9999998534194808, iteration: 446675
loss: 1.020474910736084,grad_norm: 0.8436375889977941, iteration: 446676
loss: 1.0089662075042725,grad_norm: 0.7929040192801372, iteration: 446677
loss: 0.9790011048316956,grad_norm: 0.8226274805855067, iteration: 446678
loss: 1.0236717462539673,grad_norm: 0.9487664290556067, iteration: 446679
loss: 0.9802730679512024,grad_norm: 0.7528514357450528, iteration: 446680
loss: 1.016552209854126,grad_norm: 0.9999990688056994, iteration: 446681
loss: 1.0166168212890625,grad_norm: 0.6984800071037209, iteration: 446682
loss: 0.9483962059020996,grad_norm: 0.8664489819928434, iteration: 446683
loss: 0.9512660503387451,grad_norm: 0.7058854020876174, iteration: 446684
loss: 1.0456384420394897,grad_norm: 0.8173215640488379, iteration: 446685
loss: 0.9949670433998108,grad_norm: 0.8590627956830378, iteration: 446686
loss: 0.9850131869316101,grad_norm: 0.7612901772409612, iteration: 446687
loss: 1.0137368440628052,grad_norm: 0.7494880746531639, iteration: 446688
loss: 1.0066455602645874,grad_norm: 0.7378053143711601, iteration: 446689
loss: 1.029738426208496,grad_norm: 0.8575086961325933, iteration: 446690
loss: 0.9789912104606628,grad_norm: 0.6706832128704001, iteration: 446691
loss: 0.9718008041381836,grad_norm: 0.7351398892746747, iteration: 446692
loss: 1.0464996099472046,grad_norm: 0.9999996448852021, iteration: 446693
loss: 1.0626416206359863,grad_norm: 0.8237581928530082, iteration: 446694
loss: 1.033036470413208,grad_norm: 0.7158845452742633, iteration: 446695
loss: 0.9939819574356079,grad_norm: 0.7234693611492836, iteration: 446696
loss: 0.9981001019477844,grad_norm: 0.7447652074093738, iteration: 446697
loss: 1.0204942226409912,grad_norm: 0.8785435672401707, iteration: 446698
loss: 0.9544540047645569,grad_norm: 0.6873156859455308, iteration: 446699
loss: 0.9985765814781189,grad_norm: 0.6494831065027606, iteration: 446700
loss: 1.004183053970337,grad_norm: 0.8992651323124184, iteration: 446701
loss: 0.9880210161209106,grad_norm: 0.8542955083018181, iteration: 446702
loss: 1.0442988872528076,grad_norm: 1.0000000469903088, iteration: 446703
loss: 1.0146456956863403,grad_norm: 0.7341820169287478, iteration: 446704
loss: 1.0723646879196167,grad_norm: 0.9999990679643345, iteration: 446705
loss: 1.0296063423156738,grad_norm: 0.9999998988348315, iteration: 446706
loss: 1.030558466911316,grad_norm: 0.8423998992273418, iteration: 446707
loss: 1.0672924518585205,grad_norm: 0.7573001688574954, iteration: 446708
loss: 0.9822233319282532,grad_norm: 0.8784804541903847, iteration: 446709
loss: 1.0166997909545898,grad_norm: 0.8592443167367715, iteration: 446710
loss: 0.9870986342430115,grad_norm: 0.8735065842030436, iteration: 446711
loss: 0.9954233765602112,grad_norm: 0.7248970454217312, iteration: 446712
loss: 1.016304612159729,grad_norm: 0.7105807085511902, iteration: 446713
loss: 0.9927018284797668,grad_norm: 0.796165117406149, iteration: 446714
loss: 0.943777322769165,grad_norm: 0.747417468476124, iteration: 446715
loss: 0.9769846796989441,grad_norm: 0.6917156936024083, iteration: 446716
loss: 1.0033965110778809,grad_norm: 0.8227942444046831, iteration: 446717
loss: 0.9557146430015564,grad_norm: 0.9702269600609773, iteration: 446718
loss: 0.9637022614479065,grad_norm: 0.7069195337758378, iteration: 446719
loss: 0.9842654466629028,grad_norm: 0.7447902047335608, iteration: 446720
loss: 0.9848700761795044,grad_norm: 0.8745455478172773, iteration: 446721
loss: 1.0134326219558716,grad_norm: 0.747714560827363, iteration: 446722
loss: 1.0249592065811157,grad_norm: 0.7280260450814943, iteration: 446723
loss: 1.0042932033538818,grad_norm: 0.5890060699420535, iteration: 446724
loss: 1.033921241760254,grad_norm: 0.7813065625310069, iteration: 446725
loss: 0.9921042919158936,grad_norm: 0.7034112015228098, iteration: 446726
loss: 1.0051976442337036,grad_norm: 0.9999995764096612, iteration: 446727
loss: 0.9794055819511414,grad_norm: 0.7601299994953152, iteration: 446728
loss: 1.0402919054031372,grad_norm: 0.9152650575916532, iteration: 446729
loss: 0.9828765392303467,grad_norm: 0.6917588562556856, iteration: 446730
loss: 0.9922550916671753,grad_norm: 0.8216857198999103, iteration: 446731
loss: 0.9989383220672607,grad_norm: 0.7507332294857203, iteration: 446732
loss: 1.0983396768569946,grad_norm: 0.9999998804735674, iteration: 446733
loss: 0.963512122631073,grad_norm: 0.9534680871655152, iteration: 446734
loss: 1.0107626914978027,grad_norm: 0.6767624939995903, iteration: 446735
loss: 0.9933865070343018,grad_norm: 0.8489763154236669, iteration: 446736
loss: 0.99532151222229,grad_norm: 0.7263544510186748, iteration: 446737
loss: 0.9971474409103394,grad_norm: 0.9999998740810144, iteration: 446738
loss: 0.9806243777275085,grad_norm: 0.7220450526843392, iteration: 446739
loss: 0.9782752990722656,grad_norm: 0.7602291415398648, iteration: 446740
loss: 1.018644094467163,grad_norm: 0.682480003213877, iteration: 446741
loss: 0.9673193097114563,grad_norm: 0.8122536958915574, iteration: 446742
loss: 1.0346269607543945,grad_norm: 0.9757730079374055, iteration: 446743
loss: 1.0103204250335693,grad_norm: 0.7115170623264506, iteration: 446744
loss: 0.9863224029541016,grad_norm: 0.6638303007822505, iteration: 446745
loss: 0.9995858073234558,grad_norm: 0.999999849978664, iteration: 446746
loss: 0.9958112239837646,grad_norm: 0.6716424099720528, iteration: 446747
loss: 0.995637059211731,grad_norm: 0.732256642121407, iteration: 446748
loss: 0.9987167716026306,grad_norm: 0.9872004433156675, iteration: 446749
loss: 1.0160307884216309,grad_norm: 0.911067412098613, iteration: 446750
loss: 1.0076336860656738,grad_norm: 0.6686880031901307, iteration: 446751
loss: 1.0041089057922363,grad_norm: 0.6761113071386433, iteration: 446752
loss: 1.0171575546264648,grad_norm: 0.7332836350407179, iteration: 446753
loss: 0.9890897870063782,grad_norm: 0.6680693149423721, iteration: 446754
loss: 1.0231417417526245,grad_norm: 0.8973142269958776, iteration: 446755
loss: 1.0127313137054443,grad_norm: 0.8101163359477306, iteration: 446756
loss: 0.9994214773178101,grad_norm: 0.8523833402534424, iteration: 446757
loss: 1.0072062015533447,grad_norm: 0.9008938114062559, iteration: 446758
loss: 1.0421371459960938,grad_norm: 0.9999998883487102, iteration: 446759
loss: 1.0152980089187622,grad_norm: 0.830614595308697, iteration: 446760
loss: 0.9862651824951172,grad_norm: 0.7689459472941387, iteration: 446761
loss: 1.006852626800537,grad_norm: 0.9041367929630457, iteration: 446762
loss: 1.091565489768982,grad_norm: 0.8861335487720106, iteration: 446763
loss: 1.0167793035507202,grad_norm: 0.7644012549336923, iteration: 446764
loss: 0.9947338700294495,grad_norm: 0.8854095007886436, iteration: 446765
loss: 1.0514345169067383,grad_norm: 0.9999995511824347, iteration: 446766
loss: 1.0380425453186035,grad_norm: 0.8211397333987961, iteration: 446767
loss: 1.0162540674209595,grad_norm: 0.999999302432363, iteration: 446768
loss: 0.9912872314453125,grad_norm: 0.7787379232474921, iteration: 446769
loss: 0.9837927222251892,grad_norm: 0.8164143244728701, iteration: 446770
loss: 0.9822272658348083,grad_norm: 0.7747961747559419, iteration: 446771
loss: 0.976108193397522,grad_norm: 0.8477763209772947, iteration: 446772
loss: 1.0355461835861206,grad_norm: 0.9999993892510454, iteration: 446773
loss: 0.9962605237960815,grad_norm: 0.7672997596536227, iteration: 446774
loss: 1.0177907943725586,grad_norm: 0.7922617024542309, iteration: 446775
loss: 0.9824777841567993,grad_norm: 0.672829598914713, iteration: 446776
loss: 0.9804331660270691,grad_norm: 0.7694520758898357, iteration: 446777
loss: 1.0383023023605347,grad_norm: 0.7396971726495569, iteration: 446778
loss: 0.9766581058502197,grad_norm: 0.8064954429288681, iteration: 446779
loss: 0.9616519808769226,grad_norm: 0.6956405407101046, iteration: 446780
loss: 0.9898687601089478,grad_norm: 0.6948201520246469, iteration: 446781
loss: 1.0048415660858154,grad_norm: 0.858378394177518, iteration: 446782
loss: 1.0047812461853027,grad_norm: 0.7426429861858816, iteration: 446783
loss: 1.0217313766479492,grad_norm: 0.8312641229242249, iteration: 446784
loss: 1.0289504528045654,grad_norm: 0.7790674115113617, iteration: 446785
loss: 1.045900821685791,grad_norm: 0.9999998229865164, iteration: 446786
loss: 1.1131318807601929,grad_norm: 0.9999993085279193, iteration: 446787
loss: 1.020576000213623,grad_norm: 0.8693167543724568, iteration: 446788
loss: 0.9807623624801636,grad_norm: 0.7313526699142178, iteration: 446789
loss: 0.9829891324043274,grad_norm: 0.6294342118086969, iteration: 446790
loss: 0.9671400189399719,grad_norm: 0.8050041562999758, iteration: 446791
loss: 0.9792134165763855,grad_norm: 0.9999991293449656, iteration: 446792
loss: 0.9712129831314087,grad_norm: 0.8160643204986241, iteration: 446793
loss: 0.9594127535820007,grad_norm: 0.7903857708871752, iteration: 446794
loss: 1.0148614645004272,grad_norm: 0.9999995130961864, iteration: 446795
loss: 0.9973307251930237,grad_norm: 0.8063479713120707, iteration: 446796
loss: 1.021865725517273,grad_norm: 0.9069620203855661, iteration: 446797
loss: 1.0357950925827026,grad_norm: 0.803149765773074, iteration: 446798
loss: 1.0212221145629883,grad_norm: 0.7796013524390889, iteration: 446799
loss: 0.9991244077682495,grad_norm: 0.6215916171634169, iteration: 446800
loss: 0.9616775512695312,grad_norm: 0.8796330914608259, iteration: 446801
loss: 0.9755256772041321,grad_norm: 0.7147494189434086, iteration: 446802
loss: 1.0401592254638672,grad_norm: 0.9999993449020097, iteration: 446803
loss: 0.9845120310783386,grad_norm: 0.897866297727666, iteration: 446804
loss: 1.0428599119186401,grad_norm: 0.9999999149442069, iteration: 446805
loss: 0.9984604716300964,grad_norm: 0.8318020259481874, iteration: 446806
loss: 0.99908047914505,grad_norm: 0.7084716834848095, iteration: 446807
loss: 1.0065537691116333,grad_norm: 0.8547415875182737, iteration: 446808
loss: 0.9762239456176758,grad_norm: 0.7986424156185072, iteration: 446809
loss: 0.9969809055328369,grad_norm: 0.7772265505902485, iteration: 446810
loss: 0.9927700757980347,grad_norm: 0.7901670297190362, iteration: 446811
loss: 1.0277944803237915,grad_norm: 0.8075703888992339, iteration: 446812
loss: 0.9789024591445923,grad_norm: 0.7718599612937771, iteration: 446813
loss: 0.9845611453056335,grad_norm: 0.6958960390730738, iteration: 446814
loss: 1.0070576667785645,grad_norm: 0.9810663467172724, iteration: 446815
loss: 0.9765880703926086,grad_norm: 0.9465375697137298, iteration: 446816
loss: 0.96160888671875,grad_norm: 0.800335352283893, iteration: 446817
loss: 1.0101827383041382,grad_norm: 0.7040571066240291, iteration: 446818
loss: 1.1699795722961426,grad_norm: 0.9999996573894235, iteration: 446819
loss: 1.0335898399353027,grad_norm: 0.9454831486821095, iteration: 446820
loss: 1.0081439018249512,grad_norm: 0.8004792175873322, iteration: 446821
loss: 1.0134977102279663,grad_norm: 0.7801870653639432, iteration: 446822
loss: 1.0304814577102661,grad_norm: 0.996529926228663, iteration: 446823
loss: 1.0144319534301758,grad_norm: 0.7141464887804332, iteration: 446824
loss: 0.998838484287262,grad_norm: 0.8710038498146936, iteration: 446825
loss: 0.9942517280578613,grad_norm: 0.8621212210807817, iteration: 446826
loss: 0.9720338582992554,grad_norm: 0.7583387156697649, iteration: 446827
loss: 0.9706051349639893,grad_norm: 0.6294537923348822, iteration: 446828
loss: 0.9631636142730713,grad_norm: 0.609825681087923, iteration: 446829
loss: 0.994096040725708,grad_norm: 0.666248171201376, iteration: 446830
loss: 0.9739143252372742,grad_norm: 0.8832281842180413, iteration: 446831
loss: 0.9989860653877258,grad_norm: 0.6765218978830398, iteration: 446832
loss: 1.0145764350891113,grad_norm: 0.7636299672763996, iteration: 446833
loss: 1.0231631994247437,grad_norm: 0.7875876341301099, iteration: 446834
loss: 1.004814624786377,grad_norm: 0.6867943776167817, iteration: 446835
loss: 0.983065664768219,grad_norm: 0.8880376345069257, iteration: 446836
loss: 0.9688895344734192,grad_norm: 0.6034720048001871, iteration: 446837
loss: 1.0105098485946655,grad_norm: 0.747196810617891, iteration: 446838
loss: 1.003856897354126,grad_norm: 0.8936383268711784, iteration: 446839
loss: 1.0055949687957764,grad_norm: 0.7093056699120444, iteration: 446840
loss: 0.9692983627319336,grad_norm: 0.7231739993287779, iteration: 446841
loss: 1.0058363676071167,grad_norm: 0.7186464199388395, iteration: 446842
loss: 0.999403178691864,grad_norm: 0.8538435937425535, iteration: 446843
loss: 1.0709006786346436,grad_norm: 0.7430475923399491, iteration: 446844
loss: 0.9961190819740295,grad_norm: 0.7601002555041091, iteration: 446845
loss: 1.0020933151245117,grad_norm: 0.703530106137608, iteration: 446846
loss: 1.0041882991790771,grad_norm: 0.6638579868695875, iteration: 446847
loss: 0.9468162059783936,grad_norm: 0.8589014739702518, iteration: 446848
loss: 1.0073142051696777,grad_norm: 0.8457905429106659, iteration: 446849
loss: 0.9842644333839417,grad_norm: 0.6765281978507307, iteration: 446850
loss: 1.0206890106201172,grad_norm: 0.840816325550898, iteration: 446851
loss: 0.990570068359375,grad_norm: 0.7521675731161618, iteration: 446852
loss: 0.9701504707336426,grad_norm: 0.7644480975609361, iteration: 446853
loss: 1.0422449111938477,grad_norm: 0.8742406262124629, iteration: 446854
loss: 1.0319511890411377,grad_norm: 0.8408259532984346, iteration: 446855
loss: 1.002909779548645,grad_norm: 0.9999996804702849, iteration: 446856
loss: 0.9897360801696777,grad_norm: 0.7431756016952443, iteration: 446857
loss: 1.0229785442352295,grad_norm: 0.6746278121798401, iteration: 446858
loss: 0.9927332997322083,grad_norm: 0.8925380916113331, iteration: 446859
loss: 0.9911136031150818,grad_norm: 0.8795261290188061, iteration: 446860
loss: 0.981468915939331,grad_norm: 0.9204263688948641, iteration: 446861
loss: 1.0295405387878418,grad_norm: 0.7407115273152567, iteration: 446862
loss: 0.9568066596984863,grad_norm: 0.849356113259108, iteration: 446863
loss: 1.0088759660720825,grad_norm: 0.7295663566832324, iteration: 446864
loss: 1.0050665140151978,grad_norm: 0.7545937892669364, iteration: 446865
loss: 0.9917638301849365,grad_norm: 0.7905428933333647, iteration: 446866
loss: 0.998711109161377,grad_norm: 0.8928469266690806, iteration: 446867
loss: 0.9807761907577515,grad_norm: 0.6533211656150074, iteration: 446868
loss: 0.9665157794952393,grad_norm: 0.8341335315364362, iteration: 446869
loss: 0.9621149301528931,grad_norm: 0.6895607781299883, iteration: 446870
loss: 1.015183925628662,grad_norm: 0.9999990909657075, iteration: 446871
loss: 0.9528641104698181,grad_norm: 0.686454142528407, iteration: 446872
loss: 1.0121827125549316,grad_norm: 0.7878124515231055, iteration: 446873
loss: 0.9762938022613525,grad_norm: 0.8080026222561677, iteration: 446874
loss: 0.9669081568717957,grad_norm: 0.7431822094188486, iteration: 446875
loss: 0.9825900793075562,grad_norm: 0.7749916448205778, iteration: 446876
loss: 0.9610646963119507,grad_norm: 0.6920596669653624, iteration: 446877
loss: 0.9982523322105408,grad_norm: 0.7984949922375411, iteration: 446878
loss: 0.9799121618270874,grad_norm: 0.7005074125422543, iteration: 446879
loss: 1.076436161994934,grad_norm: 0.9173137850582719, iteration: 446880
loss: 1.0003952980041504,grad_norm: 0.8068981351828558, iteration: 446881
loss: 1.0590697526931763,grad_norm: 0.9999994745916936, iteration: 446882
loss: 0.9892637729644775,grad_norm: 0.6386450429262429, iteration: 446883
loss: 1.0433249473571777,grad_norm: 0.7920542241904592, iteration: 446884
loss: 1.031185269355774,grad_norm: 0.9999991319504642, iteration: 446885
loss: 0.9818939566612244,grad_norm: 0.6889633109725811, iteration: 446886
loss: 0.9581157565116882,grad_norm: 0.7640971442552782, iteration: 446887
loss: 0.9758384227752686,grad_norm: 0.7651611278283067, iteration: 446888
loss: 1.057378888130188,grad_norm: 0.9219684250295703, iteration: 446889
loss: 0.9994521141052246,grad_norm: 0.7077414971750203, iteration: 446890
loss: 1.0304622650146484,grad_norm: 0.7972801585033432, iteration: 446891
loss: 0.971340000629425,grad_norm: 0.8248071922699752, iteration: 446892
loss: 1.036799430847168,grad_norm: 0.6822147253524434, iteration: 446893
loss: 1.0225145816802979,grad_norm: 0.9248422194523598, iteration: 446894
loss: 1.0000782012939453,grad_norm: 0.8367926464341684, iteration: 446895
loss: 1.0150972604751587,grad_norm: 0.8595811004183783, iteration: 446896
loss: 1.0062963962554932,grad_norm: 0.7143771719133558, iteration: 446897
loss: 0.9768148064613342,grad_norm: 0.7538031895555874, iteration: 446898
loss: 0.9358490109443665,grad_norm: 0.6487315216641447, iteration: 446899
loss: 0.9962287545204163,grad_norm: 0.735310553231211, iteration: 446900
loss: 1.0015531778335571,grad_norm: 0.6420487142339155, iteration: 446901
loss: 0.9940418601036072,grad_norm: 0.7202414934734689, iteration: 446902
loss: 1.055928111076355,grad_norm: 0.9999999363298654, iteration: 446903
loss: 0.9951309561729431,grad_norm: 0.7688525191879801, iteration: 446904
loss: 0.9896272420883179,grad_norm: 0.9999996783180769, iteration: 446905
loss: 0.969135046005249,grad_norm: 0.824937684197881, iteration: 446906
loss: 1.040059208869934,grad_norm: 0.9999992347661576, iteration: 446907
loss: 1.0175398588180542,grad_norm: 0.6793956368171397, iteration: 446908
loss: 0.9769341349601746,grad_norm: 0.7237062679307903, iteration: 446909
loss: 0.9582710862159729,grad_norm: 0.7623694957779017, iteration: 446910
loss: 1.0158575773239136,grad_norm: 0.787046860003976, iteration: 446911
loss: 1.0324573516845703,grad_norm: 0.8228245769221241, iteration: 446912
loss: 1.0768028497695923,grad_norm: 0.9999997514449904, iteration: 446913
loss: 0.9791921377182007,grad_norm: 0.9540516274578588, iteration: 446914
loss: 1.00865638256073,grad_norm: 0.6766008960167057, iteration: 446915
loss: 1.01792573928833,grad_norm: 0.8715571127454871, iteration: 446916
loss: 0.9963676333427429,grad_norm: 0.9419147651236867, iteration: 446917
loss: 1.027852177619934,grad_norm: 0.8382831489475885, iteration: 446918
loss: 0.9565991759300232,grad_norm: 0.7952936710011884, iteration: 446919
loss: 0.9825929999351501,grad_norm: 0.7353707370083827, iteration: 446920
loss: 0.9958840012550354,grad_norm: 0.8579978243099106, iteration: 446921
loss: 0.9778940677642822,grad_norm: 0.6903318211615811, iteration: 446922
loss: 1.0000063180923462,grad_norm: 0.6931467373160962, iteration: 446923
loss: 1.0099115371704102,grad_norm: 0.7246751267325069, iteration: 446924
loss: 0.9793014526367188,grad_norm: 0.7714487233082792, iteration: 446925
loss: 1.026219367980957,grad_norm: 0.8792724844306644, iteration: 446926
loss: 0.9922125339508057,grad_norm: 0.9999990146315494, iteration: 446927
loss: 0.9970529079437256,grad_norm: 0.7512724660354446, iteration: 446928
loss: 1.0398426055908203,grad_norm: 0.9999991855671881, iteration: 446929
loss: 1.027643084526062,grad_norm: 0.999999120141123, iteration: 446930
loss: 1.0034854412078857,grad_norm: 0.70525717496626, iteration: 446931
loss: 1.0183078050613403,grad_norm: 0.8894711377708419, iteration: 446932
loss: 1.0057569742202759,grad_norm: 0.8886525983421499, iteration: 446933
loss: 1.0111606121063232,grad_norm: 0.8110728347395311, iteration: 446934
loss: 1.008958101272583,grad_norm: 0.7399574846110142, iteration: 446935
loss: 0.9996652007102966,grad_norm: 0.712166104225873, iteration: 446936
loss: 0.9601044654846191,grad_norm: 0.8842871611532063, iteration: 446937
loss: 1.02010977268219,grad_norm: 0.9999994248649335, iteration: 446938
loss: 1.0163178443908691,grad_norm: 0.8412785888031886, iteration: 446939
loss: 1.0179948806762695,grad_norm: 0.7608456690634726, iteration: 446940
loss: 1.0120590925216675,grad_norm: 0.8777008659064126, iteration: 446941
loss: 1.0131808519363403,grad_norm: 0.8246530967204468, iteration: 446942
loss: 1.001697063446045,grad_norm: 0.7924799363795579, iteration: 446943
loss: 0.9792174696922302,grad_norm: 0.7716195757277419, iteration: 446944
loss: 0.9800900220870972,grad_norm: 0.6485434504790033, iteration: 446945
loss: 0.9894987940788269,grad_norm: 0.7484260008823649, iteration: 446946
loss: 0.9885474443435669,grad_norm: 0.85376632263697, iteration: 446947
loss: 0.9956352114677429,grad_norm: 0.7142663215887698, iteration: 446948
loss: 0.9936701655387878,grad_norm: 0.8290375005261691, iteration: 446949
loss: 1.0126374959945679,grad_norm: 0.7660229035896918, iteration: 446950
loss: 0.9989246129989624,grad_norm: 0.6974540769022401, iteration: 446951
loss: 1.0132124423980713,grad_norm: 0.8786328925341395, iteration: 446952
loss: 1.021452784538269,grad_norm: 0.6258816359347747, iteration: 446953
loss: 0.9946777820587158,grad_norm: 0.8667739699710745, iteration: 446954
loss: 0.9877104759216309,grad_norm: 0.6965216696872247, iteration: 446955
loss: 1.016510248184204,grad_norm: 0.8291198837150645, iteration: 446956
loss: 1.006179690361023,grad_norm: 0.9999991282350698, iteration: 446957
loss: 1.0005007982254028,grad_norm: 0.9999990584372696, iteration: 446958
loss: 1.0152015686035156,grad_norm: 0.7351432023309709, iteration: 446959
loss: 0.9813997149467468,grad_norm: 0.7299508499098486, iteration: 446960
loss: 0.9868599772453308,grad_norm: 0.6804368882055168, iteration: 446961
loss: 0.9920021295547485,grad_norm: 0.786491364509766, iteration: 446962
loss: 0.9926250576972961,grad_norm: 0.8796749730841081, iteration: 446963
loss: 1.0120936632156372,grad_norm: 0.7600453546576839, iteration: 446964
loss: 0.9882830381393433,grad_norm: 0.8814454806814143, iteration: 446965
loss: 1.0224295854568481,grad_norm: 0.7687082102493205, iteration: 446966
loss: 1.035447597503662,grad_norm: 0.7411736164046696, iteration: 446967
loss: 0.9809319972991943,grad_norm: 0.6667976879243251, iteration: 446968
loss: 1.007830023765564,grad_norm: 0.7278093626984462, iteration: 446969
loss: 0.9756978154182434,grad_norm: 0.6961384204592622, iteration: 446970
loss: 1.0305747985839844,grad_norm: 0.9999998902836058, iteration: 446971
loss: 1.0142987966537476,grad_norm: 0.869332734139359, iteration: 446972
loss: 0.9744966626167297,grad_norm: 0.8042803460974581, iteration: 446973
loss: 0.9893495440483093,grad_norm: 0.7763532917296536, iteration: 446974
loss: 1.0046768188476562,grad_norm: 0.845862480162396, iteration: 446975
loss: 0.9985538125038147,grad_norm: 0.7227628830025808, iteration: 446976
loss: 1.0101069211959839,grad_norm: 0.9999992159263591, iteration: 446977
loss: 1.0517725944519043,grad_norm: 0.8029454012076221, iteration: 446978
loss: 0.9843591451644897,grad_norm: 0.6568794870078181, iteration: 446979
loss: 0.9717391729354858,grad_norm: 0.8650399506330549, iteration: 446980
loss: 0.9512717127799988,grad_norm: 0.7752962302331703, iteration: 446981
loss: 0.9802103042602539,grad_norm: 0.9655545783534158, iteration: 446982
loss: 0.9951369762420654,grad_norm: 0.9081042385843875, iteration: 446983
loss: 1.1321370601654053,grad_norm: 0.9999992537860599, iteration: 446984
loss: 0.9794150590896606,grad_norm: 0.722278031248615, iteration: 446985
loss: 1.0051182508468628,grad_norm: 0.7352972392235276, iteration: 446986
loss: 0.9729641079902649,grad_norm: 0.9361134057953906, iteration: 446987
loss: 0.9988234043121338,grad_norm: 0.7828559218801926, iteration: 446988
loss: 1.0155805349349976,grad_norm: 0.9713519817453392, iteration: 446989
loss: 1.075111746788025,grad_norm: 0.9999996570963384, iteration: 446990
loss: 1.0154393911361694,grad_norm: 0.9696747564055654, iteration: 446991
loss: 0.9792622923851013,grad_norm: 0.6772526833682004, iteration: 446992
loss: 0.975760281085968,grad_norm: 0.8100466558571336, iteration: 446993
loss: 1.0589350461959839,grad_norm: 0.8470492353087411, iteration: 446994
loss: 0.9980738759040833,grad_norm: 0.8597432039192415, iteration: 446995
loss: 1.0007407665252686,grad_norm: 0.9538288483409121, iteration: 446996
loss: 1.008823037147522,grad_norm: 0.7763972635045469, iteration: 446997
loss: 0.9698192477226257,grad_norm: 0.8466677855169331, iteration: 446998
loss: 1.1351114511489868,grad_norm: 0.9450044773805057, iteration: 446999
loss: 0.9875848889350891,grad_norm: 0.7512045882514448, iteration: 447000
loss: 1.0273163318634033,grad_norm: 0.9999994672137991, iteration: 447001
loss: 1.0109022855758667,grad_norm: 0.7078680775362657, iteration: 447002
loss: 1.029692530632019,grad_norm: 0.8425857745895355, iteration: 447003
loss: 0.9715344905853271,grad_norm: 0.9999995095505825, iteration: 447004
loss: 1.0333669185638428,grad_norm: 0.8196875943558233, iteration: 447005
loss: 1.0209591388702393,grad_norm: 0.7009944227410426, iteration: 447006
loss: 0.9889553785324097,grad_norm: 0.7916717089697696, iteration: 447007
loss: 1.0028547048568726,grad_norm: 0.7487627555907441, iteration: 447008
loss: 1.0874649286270142,grad_norm: 0.9532373533522903, iteration: 447009
loss: 1.0131847858428955,grad_norm: 0.8093046180426303, iteration: 447010
loss: 1.0295665264129639,grad_norm: 0.8245684311630318, iteration: 447011
loss: 0.9955940842628479,grad_norm: 0.75705501802571, iteration: 447012
loss: 0.9917367696762085,grad_norm: 0.678511187837869, iteration: 447013
loss: 1.1337816715240479,grad_norm: 0.890913624322196, iteration: 447014
loss: 1.0220558643341064,grad_norm: 0.900060022331057, iteration: 447015
loss: 1.0030971765518188,grad_norm: 0.7269920286245259, iteration: 447016
loss: 1.0359829664230347,grad_norm: 0.9999998375697845, iteration: 447017
loss: 1.0037943124771118,grad_norm: 0.8209075609905353, iteration: 447018
loss: 0.9911253452301025,grad_norm: 0.7048895265319345, iteration: 447019
loss: 1.034637689590454,grad_norm: 0.9375379207030343, iteration: 447020
loss: 0.9897202253341675,grad_norm: 0.6977941149554914, iteration: 447021
loss: 1.035579800605774,grad_norm: 0.9999990987505053, iteration: 447022
loss: 1.0312848091125488,grad_norm: 0.7687822494432826, iteration: 447023
loss: 1.0020912885665894,grad_norm: 0.772249339703939, iteration: 447024
loss: 0.9648231863975525,grad_norm: 0.8265417231674124, iteration: 447025
loss: 0.9829115867614746,grad_norm: 0.6863489681540715, iteration: 447026
loss: 1.0235682725906372,grad_norm: 0.6704536816561497, iteration: 447027
loss: 1.0551183223724365,grad_norm: 0.9999997533503745, iteration: 447028
loss: 1.030434250831604,grad_norm: 0.6630321859612195, iteration: 447029
loss: 1.0162888765335083,grad_norm: 0.9999993602305527, iteration: 447030
loss: 0.9932668805122375,grad_norm: 0.7423134504604406, iteration: 447031
loss: 1.0261577367782593,grad_norm: 0.8841533107895324, iteration: 447032
loss: 0.986041784286499,grad_norm: 0.7634718547436448, iteration: 447033
loss: 0.9885305166244507,grad_norm: 0.8878348122295532, iteration: 447034
loss: 0.9913781881332397,grad_norm: 0.6968081289478488, iteration: 447035
loss: 1.016274333000183,grad_norm: 0.9196292832482073, iteration: 447036
loss: 0.971542477607727,grad_norm: 0.8021542679287381, iteration: 447037
loss: 1.0104132890701294,grad_norm: 0.7540848274942356, iteration: 447038
loss: 1.0121629238128662,grad_norm: 0.7185531694228193, iteration: 447039
loss: 0.9869286417961121,grad_norm: 0.9999998325761746, iteration: 447040
loss: 0.9741506576538086,grad_norm: 0.7787192984284247, iteration: 447041
loss: 0.9664358496665955,grad_norm: 0.7313934848640654, iteration: 447042
loss: 1.018568515777588,grad_norm: 0.7242048816161901, iteration: 447043
loss: 0.990346372127533,grad_norm: 0.7678140561151192, iteration: 447044
loss: 0.9838755130767822,grad_norm: 0.7528020550681203, iteration: 447045
loss: 0.995613694190979,grad_norm: 0.8305168451504376, iteration: 447046
loss: 0.9921948909759521,grad_norm: 0.7076559453396394, iteration: 447047
loss: 1.0922991037368774,grad_norm: 0.9999996120142944, iteration: 447048
loss: 0.9756303429603577,grad_norm: 0.6980174101766895, iteration: 447049
loss: 0.9896641373634338,grad_norm: 0.960487540545598, iteration: 447050
loss: 0.9982352256774902,grad_norm: 0.9178654198151881, iteration: 447051
loss: 0.9891231060028076,grad_norm: 0.9534532725710081, iteration: 447052
loss: 0.9824472665786743,grad_norm: 0.7888199815180656, iteration: 447053
loss: 1.0129027366638184,grad_norm: 0.9303610511476613, iteration: 447054
loss: 1.0499674081802368,grad_norm: 0.8124248353875411, iteration: 447055
loss: 1.005071759223938,grad_norm: 0.8421191667342772, iteration: 447056
loss: 0.9986283183097839,grad_norm: 0.6777275138006411, iteration: 447057
loss: 1.0147182941436768,grad_norm: 0.7450230953982092, iteration: 447058
loss: 0.9989349842071533,grad_norm: 0.9745665837086119, iteration: 447059
loss: 0.9847949743270874,grad_norm: 0.657388645779771, iteration: 447060
loss: 1.0123621225357056,grad_norm: 0.7352198889137946, iteration: 447061
loss: 1.0380977392196655,grad_norm: 0.8389850147618095, iteration: 447062
loss: 1.0089385509490967,grad_norm: 0.8633697525789961, iteration: 447063
loss: 0.9965440034866333,grad_norm: 0.9123039211456789, iteration: 447064
loss: 1.0126123428344727,grad_norm: 0.9885317214324487, iteration: 447065
loss: 1.0003288984298706,grad_norm: 0.6908813453120599, iteration: 447066
loss: 1.0416077375411987,grad_norm: 0.7532137524661294, iteration: 447067
loss: 1.0158048868179321,grad_norm: 0.7172328344757944, iteration: 447068
loss: 1.0837346315383911,grad_norm: 0.78558460178068, iteration: 447069
loss: 1.0140599012374878,grad_norm: 0.9398685700144299, iteration: 447070
loss: 0.9802235960960388,grad_norm: 0.7205328383195062, iteration: 447071
loss: 0.9812749028205872,grad_norm: 0.8888857478557667, iteration: 447072
loss: 0.9617605805397034,grad_norm: 0.8418576937647323, iteration: 447073
loss: 0.982546329498291,grad_norm: 0.7080356971540172, iteration: 447074
loss: 1.008056402206421,grad_norm: 0.8673406100974769, iteration: 447075
loss: 1.1177215576171875,grad_norm: 0.9999999710739469, iteration: 447076
loss: 1.0088932514190674,grad_norm: 0.7397371784349815, iteration: 447077
loss: 1.012983798980713,grad_norm: 0.999999072202076, iteration: 447078
loss: 0.9843200445175171,grad_norm: 0.8626000797066704, iteration: 447079
loss: 1.0250591039657593,grad_norm: 0.7900257558926541, iteration: 447080
loss: 1.0020869970321655,grad_norm: 0.6796879045366869, iteration: 447081
loss: 0.9866749048233032,grad_norm: 0.7435474426081976, iteration: 447082
loss: 1.0172873735427856,grad_norm: 0.7456679217515199, iteration: 447083
loss: 0.9542502164840698,grad_norm: 0.7673435272121812, iteration: 447084
loss: 0.9919338822364807,grad_norm: 0.7593671441127651, iteration: 447085
loss: 1.0050740242004395,grad_norm: 0.9243142728813052, iteration: 447086
loss: 1.004253625869751,grad_norm: 0.9222092989002656, iteration: 447087
loss: 0.9958084225654602,grad_norm: 0.8246450788792736, iteration: 447088
loss: 1.0067871809005737,grad_norm: 0.7119544685402714, iteration: 447089
loss: 0.9768041968345642,grad_norm: 0.7089500041207937, iteration: 447090
loss: 0.9858912825584412,grad_norm: 0.999999186760606, iteration: 447091
loss: 1.081214189529419,grad_norm: 0.873907209950495, iteration: 447092
loss: 1.1380120515823364,grad_norm: 0.9999996102786898, iteration: 447093
loss: 1.0443364381790161,grad_norm: 0.8020536913764762, iteration: 447094
loss: 0.9988510608673096,grad_norm: 0.7810381916444932, iteration: 447095
loss: 0.9922453165054321,grad_norm: 0.8720649779996009, iteration: 447096
loss: 1.0276662111282349,grad_norm: 0.9188373978848944, iteration: 447097
loss: 1.0022270679473877,grad_norm: 0.7499661824536051, iteration: 447098
loss: 1.018317461013794,grad_norm: 0.8525449221586884, iteration: 447099
loss: 1.0282589197158813,grad_norm: 0.6634340627488201, iteration: 447100
loss: 0.9821181297302246,grad_norm: 0.7466712390945515, iteration: 447101
loss: 0.9633479118347168,grad_norm: 0.741859951893903, iteration: 447102
loss: 1.0209859609603882,grad_norm: 0.6896526574000726, iteration: 447103
loss: 0.9891920685768127,grad_norm: 0.8416596588874412, iteration: 447104
loss: 0.9692074060440063,grad_norm: 0.7638338559917628, iteration: 447105
loss: 1.0880546569824219,grad_norm: 0.713742430912772, iteration: 447106
loss: 0.9857527017593384,grad_norm: 0.7272451321519641, iteration: 447107
loss: 1.0152660608291626,grad_norm: 0.838543995590694, iteration: 447108
loss: 1.0235795974731445,grad_norm: 0.8663876749797488, iteration: 447109
loss: 0.9988614320755005,grad_norm: 0.8016505349572028, iteration: 447110
loss: 1.007283091545105,grad_norm: 0.9003372588244114, iteration: 447111
loss: 1.0079752206802368,grad_norm: 0.7229396304021949, iteration: 447112
loss: 0.9875425696372986,grad_norm: 0.8321409249683934, iteration: 447113
loss: 0.9808600544929504,grad_norm: 0.6852122306618686, iteration: 447114
loss: 0.9844991564750671,grad_norm: 0.7452793381559941, iteration: 447115
loss: 0.9746668934822083,grad_norm: 0.9999994810038975, iteration: 447116
loss: 1.023297905921936,grad_norm: 0.789330560140289, iteration: 447117
loss: 1.0421299934387207,grad_norm: 0.7704149492690144, iteration: 447118
loss: 0.975165069103241,grad_norm: 0.7143478380206734, iteration: 447119
loss: 1.0076427459716797,grad_norm: 0.9284492297165023, iteration: 447120
loss: 1.0006694793701172,grad_norm: 0.7732285694962724, iteration: 447121
loss: 0.9781249165534973,grad_norm: 0.7039586967842218, iteration: 447122
loss: 1.0002994537353516,grad_norm: 0.967658054502409, iteration: 447123
loss: 1.0080949068069458,grad_norm: 0.9999996249182103, iteration: 447124
loss: 1.012696623802185,grad_norm: 0.9147125937193913, iteration: 447125
loss: 1.00381600856781,grad_norm: 0.6906418975906632, iteration: 447126
loss: 0.9994792342185974,grad_norm: 0.7257515925427681, iteration: 447127
loss: 0.997174084186554,grad_norm: 0.7109886467244425, iteration: 447128
loss: 0.9970009922981262,grad_norm: 0.667374483547923, iteration: 447129
loss: 1.0263237953186035,grad_norm: 0.8605629910876915, iteration: 447130
loss: 0.9954089522361755,grad_norm: 0.7111018557040849, iteration: 447131
loss: 1.0113635063171387,grad_norm: 0.7580047923878883, iteration: 447132
loss: 1.0069565773010254,grad_norm: 0.8587101624620334, iteration: 447133
loss: 0.9823791980743408,grad_norm: 0.8801619693715896, iteration: 447134
loss: 1.0263804197311401,grad_norm: 0.8615801428053161, iteration: 447135
loss: 1.01117742061615,grad_norm: 0.876502538420876, iteration: 447136
loss: 0.9960403442382812,grad_norm: 0.7790910660697599, iteration: 447137
loss: 1.0132290124893188,grad_norm: 0.8621298776952256, iteration: 447138
loss: 1.0024173259735107,grad_norm: 0.7988060463197068, iteration: 447139
loss: 1.010317087173462,grad_norm: 0.7571255175102468, iteration: 447140
loss: 1.0231939554214478,grad_norm: 0.6823230227373921, iteration: 447141
loss: 0.9947816729545593,grad_norm: 0.8545745234578612, iteration: 447142
loss: 1.0155062675476074,grad_norm: 0.9702607598033852, iteration: 447143
loss: 0.9790406823158264,grad_norm: 0.8111868686843812, iteration: 447144
loss: 0.9692801833152771,grad_norm: 0.756376233948178, iteration: 447145
loss: 0.9930753111839294,grad_norm: 0.7102752031664109, iteration: 447146
loss: 1.0214253664016724,grad_norm: 0.7361794986896916, iteration: 447147
loss: 1.013293981552124,grad_norm: 0.7482731640231306, iteration: 447148
loss: 0.9627392888069153,grad_norm: 0.8010854534181839, iteration: 447149
loss: 0.988616406917572,grad_norm: 0.8768567376711441, iteration: 447150
loss: 0.9779011011123657,grad_norm: 0.7654156223851196, iteration: 447151
loss: 0.9887634515762329,grad_norm: 0.7480066957816042, iteration: 447152
loss: 0.9803411364555359,grad_norm: 0.8415438063576871, iteration: 447153
loss: 1.0100021362304688,grad_norm: 0.6940520888060122, iteration: 447154
loss: 1.0374031066894531,grad_norm: 0.7822939192581292, iteration: 447155
loss: 0.9999655485153198,grad_norm: 0.8604360949850957, iteration: 447156
loss: 0.9943975210189819,grad_norm: 0.6207200077312388, iteration: 447157
loss: 0.9817690849304199,grad_norm: 0.9999991705840012, iteration: 447158
loss: 1.0325636863708496,grad_norm: 0.8355162635334917, iteration: 447159
loss: 1.024754285812378,grad_norm: 0.9375875975699675, iteration: 447160
loss: 1.003509283065796,grad_norm: 0.696492061853411, iteration: 447161
loss: 0.962061882019043,grad_norm: 0.7103791157885094, iteration: 447162
loss: 0.9842933416366577,grad_norm: 0.6398744853783072, iteration: 447163
loss: 1.0191444158554077,grad_norm: 0.7966063757293672, iteration: 447164
loss: 0.9903255701065063,grad_norm: 0.9225760805845344, iteration: 447165
loss: 0.9925904870033264,grad_norm: 0.7482119046557578, iteration: 447166
loss: 1.010684847831726,grad_norm: 0.9146871525347197, iteration: 447167
loss: 0.9900657534599304,grad_norm: 0.7556741194785326, iteration: 447168
loss: 0.9677106142044067,grad_norm: 0.860704777815469, iteration: 447169
loss: 1.060139536857605,grad_norm: 0.9999992881858801, iteration: 447170
loss: 0.9561035633087158,grad_norm: 0.9458644409322129, iteration: 447171
loss: 0.9925287365913391,grad_norm: 0.7661108680333079, iteration: 447172
loss: 0.9917868375778198,grad_norm: 0.7248025166628462, iteration: 447173
loss: 1.0027267932891846,grad_norm: 0.886784066345995, iteration: 447174
loss: 1.013192892074585,grad_norm: 0.7890023654057788, iteration: 447175
loss: 0.994939386844635,grad_norm: 0.7170311417370964, iteration: 447176
loss: 0.9827718138694763,grad_norm: 0.7620070029166457, iteration: 447177
loss: 0.9900142550468445,grad_norm: 0.6484906360225395, iteration: 447178
loss: 1.032393217086792,grad_norm: 0.7378193345795254, iteration: 447179
loss: 0.9922597408294678,grad_norm: 0.7307063253111477, iteration: 447180
loss: 1.0022999048233032,grad_norm: 0.7842772839962071, iteration: 447181
loss: 0.98466557264328,grad_norm: 0.6897293768529195, iteration: 447182
loss: 0.9771355390548706,grad_norm: 0.9883216592606484, iteration: 447183
loss: 1.0308374166488647,grad_norm: 0.6658338860682561, iteration: 447184
loss: 0.9851928949356079,grad_norm: 0.7070084948929941, iteration: 447185
loss: 0.9759640097618103,grad_norm: 0.6707896138695549, iteration: 447186
loss: 1.0124073028564453,grad_norm: 0.6607286059734122, iteration: 447187
loss: 1.0008269548416138,grad_norm: 0.8054714062746694, iteration: 447188
loss: 0.9917632341384888,grad_norm: 0.7935846700309103, iteration: 447189
loss: 0.9931343793869019,grad_norm: 0.7357706426953454, iteration: 447190
loss: 1.0095899105072021,grad_norm: 0.7167216048177261, iteration: 447191
loss: 1.0092839002609253,grad_norm: 0.9999991302556215, iteration: 447192
loss: 1.0055066347122192,grad_norm: 0.6937524214725532, iteration: 447193
loss: 1.0036976337432861,grad_norm: 0.7593854551843414, iteration: 447194
loss: 1.039920449256897,grad_norm: 0.6817070897313652, iteration: 447195
loss: 0.9925056099891663,grad_norm: 0.8443504599666988, iteration: 447196
loss: 1.0090543031692505,grad_norm: 0.7583800986682684, iteration: 447197
loss: 1.0249594449996948,grad_norm: 0.8168277643813957, iteration: 447198
loss: 0.9770486354827881,grad_norm: 0.8506012918536036, iteration: 447199
loss: 1.0128068923950195,grad_norm: 0.7610233099367161, iteration: 447200
loss: 1.013985276222229,grad_norm: 0.7301864408874214, iteration: 447201
loss: 0.9583656787872314,grad_norm: 0.8709792628076876, iteration: 447202
loss: 1.015400767326355,grad_norm: 0.8601922273497942, iteration: 447203
loss: 0.9902443885803223,grad_norm: 0.7338552867171804, iteration: 447204
loss: 0.9582844376564026,grad_norm: 0.7352336137249541, iteration: 447205
loss: 1.1668872833251953,grad_norm: 0.9999994329315691, iteration: 447206
loss: 1.044305682182312,grad_norm: 0.7576001115985244, iteration: 447207
loss: 0.9933181405067444,grad_norm: 0.8483848857402897, iteration: 447208
loss: 0.9822753667831421,grad_norm: 0.6901260062242164, iteration: 447209
loss: 0.9889472126960754,grad_norm: 0.8773144518277307, iteration: 447210
loss: 1.0346598625183105,grad_norm: 0.7579899486738991, iteration: 447211
loss: 1.0137012004852295,grad_norm: 0.8390749239907584, iteration: 447212
loss: 1.0117758512496948,grad_norm: 0.8106859048524625, iteration: 447213
loss: 0.9753456115722656,grad_norm: 0.6615163805659322, iteration: 447214
loss: 1.003726840019226,grad_norm: 0.7383950380619345, iteration: 447215
loss: 0.9438791871070862,grad_norm: 0.7651095389626744, iteration: 447216
loss: 1.0122061967849731,grad_norm: 0.8667443555481095, iteration: 447217
loss: 1.0647300481796265,grad_norm: 0.9999996402770787, iteration: 447218
loss: 1.0955356359481812,grad_norm: 0.8307457439467986, iteration: 447219
loss: 1.0094876289367676,grad_norm: 0.711342801023289, iteration: 447220
loss: 1.0253323316574097,grad_norm: 0.912621110843304, iteration: 447221
loss: 1.092292070388794,grad_norm: 0.8919547677815621, iteration: 447222
loss: 0.9782397747039795,grad_norm: 0.6886742944116875, iteration: 447223
loss: 0.9910517334938049,grad_norm: 0.8965097038783577, iteration: 447224
loss: 1.0435981750488281,grad_norm: 0.9999998111726932, iteration: 447225
loss: 0.9776485562324524,grad_norm: 0.7122225917882594, iteration: 447226
loss: 0.9919945597648621,grad_norm: 0.8024137416359506, iteration: 447227
loss: 0.9786368012428284,grad_norm: 0.7795422133321935, iteration: 447228
loss: 0.986086905002594,grad_norm: 0.8354085444250589, iteration: 447229
loss: 0.9894176125526428,grad_norm: 0.775482578892254, iteration: 447230
loss: 1.054753065109253,grad_norm: 0.9385831296137056, iteration: 447231
loss: 1.0163235664367676,grad_norm: 0.8115814957840246, iteration: 447232
loss: 1.0091220140457153,grad_norm: 0.7917745791066704, iteration: 447233
loss: 0.9940214157104492,grad_norm: 0.7195639845993471, iteration: 447234
loss: 0.9977849721908569,grad_norm: 0.7219806290683124, iteration: 447235
loss: 0.9845408797264099,grad_norm: 0.7046433930989194, iteration: 447236
loss: 1.0239135026931763,grad_norm: 0.6153964239388245, iteration: 447237
loss: 1.0295377969741821,grad_norm: 0.7985600763439953, iteration: 447238
loss: 1.0125523805618286,grad_norm: 0.9114358057383685, iteration: 447239
loss: 0.9964860081672668,grad_norm: 0.720018406762187, iteration: 447240
loss: 0.9911690950393677,grad_norm: 0.7346309525742976, iteration: 447241
loss: 1.018540859222412,grad_norm: 0.8984222803925179, iteration: 447242
loss: 1.0131691694259644,grad_norm: 0.8330837980346231, iteration: 447243
loss: 0.9766041040420532,grad_norm: 0.8254336970806376, iteration: 447244
loss: 0.9928828477859497,grad_norm: 0.9152098405283491, iteration: 447245
loss: 1.0315614938735962,grad_norm: 0.9107089664551689, iteration: 447246
loss: 1.0270041227340698,grad_norm: 0.9719951644818638, iteration: 447247
loss: 1.0090620517730713,grad_norm: 0.7950800624884687, iteration: 447248
loss: 1.0055373907089233,grad_norm: 0.8118800505984844, iteration: 447249
loss: 0.9952044486999512,grad_norm: 0.7426534036577184, iteration: 447250
loss: 1.0104377269744873,grad_norm: 0.5665341246312389, iteration: 447251
loss: 1.0285784006118774,grad_norm: 0.764259439087342, iteration: 447252
loss: 1.031529426574707,grad_norm: 0.8529751854993068, iteration: 447253
loss: 1.008249282836914,grad_norm: 0.7705665111585087, iteration: 447254
loss: 1.0149043798446655,grad_norm: 0.692150540997573, iteration: 447255
loss: 0.9928156733512878,grad_norm: 0.7096465929606527, iteration: 447256
loss: 1.093125581741333,grad_norm: 0.9747297720316275, iteration: 447257
loss: 0.9928984045982361,grad_norm: 0.7400814145893219, iteration: 447258
loss: 0.9870809316635132,grad_norm: 0.6747255628222066, iteration: 447259
loss: 0.9805710315704346,grad_norm: 0.7175032989226164, iteration: 447260
loss: 1.0007972717285156,grad_norm: 0.744077941444407, iteration: 447261
loss: 1.0310311317443848,grad_norm: 0.8876745417215585, iteration: 447262
loss: 0.9950568079948425,grad_norm: 0.8794247457499492, iteration: 447263
loss: 1.0346035957336426,grad_norm: 0.7332227667127447, iteration: 447264
loss: 1.0067201852798462,grad_norm: 0.784827338431959, iteration: 447265
loss: 1.0056848526000977,grad_norm: 0.7871900939910932, iteration: 447266
loss: 0.9985722899436951,grad_norm: 0.732432008624248, iteration: 447267
loss: 0.9952395558357239,grad_norm: 0.792771880318058, iteration: 447268
loss: 1.0013636350631714,grad_norm: 0.8379571283596625, iteration: 447269
loss: 1.0030938386917114,grad_norm: 0.8501304187504182, iteration: 447270
loss: 1.0637868642807007,grad_norm: 0.7589333958721893, iteration: 447271
loss: 0.990851640701294,grad_norm: 0.8455941804011725, iteration: 447272
loss: 1.001491904258728,grad_norm: 0.8277858753150731, iteration: 447273
loss: 0.998376727104187,grad_norm: 0.8700039079073899, iteration: 447274
loss: 1.0099241733551025,grad_norm: 0.7020036406555208, iteration: 447275
loss: 0.9774430394172668,grad_norm: 0.7997008882246329, iteration: 447276
loss: 0.9548655152320862,grad_norm: 0.6809386555356421, iteration: 447277
loss: 1.0039619207382202,grad_norm: 0.7882792203828413, iteration: 447278
loss: 1.0457451343536377,grad_norm: 0.9999993343714164, iteration: 447279
loss: 1.0492737293243408,grad_norm: 0.8870171322366824, iteration: 447280
loss: 1.0024811029434204,grad_norm: 0.9999997599112085, iteration: 447281
loss: 0.987648069858551,grad_norm: 0.7535219960499482, iteration: 447282
loss: 1.003333330154419,grad_norm: 0.8535032707942405, iteration: 447283
loss: 0.9983093738555908,grad_norm: 0.6321129251114134, iteration: 447284
loss: 1.2266786098480225,grad_norm: 0.9999998807648073, iteration: 447285
loss: 0.9486832022666931,grad_norm: 0.8229028119986772, iteration: 447286
loss: 1.011491060256958,grad_norm: 0.8730374128413461, iteration: 447287
loss: 1.0113697052001953,grad_norm: 0.8876127489164503, iteration: 447288
loss: 0.9664783477783203,grad_norm: 0.6724191092218355, iteration: 447289
loss: 1.0109124183654785,grad_norm: 0.7897519596630765, iteration: 447290
loss: 1.007854700088501,grad_norm: 0.7632220645049754, iteration: 447291
loss: 1.0758378505706787,grad_norm: 0.8073594958787724, iteration: 447292
loss: 1.0157865285873413,grad_norm: 0.8337481143349285, iteration: 447293
loss: 1.0307121276855469,grad_norm: 0.9303165067359326, iteration: 447294
loss: 1.018489122390747,grad_norm: 0.6785556443703711, iteration: 447295
loss: 1.0253453254699707,grad_norm: 0.7877446888192415, iteration: 447296
loss: 0.9734212160110474,grad_norm: 0.9999994656391773, iteration: 447297
loss: 1.0716849565505981,grad_norm: 0.8293434819867942, iteration: 447298
loss: 1.032467246055603,grad_norm: 0.8847007493257169, iteration: 447299
loss: 1.0027318000793457,grad_norm: 0.786610494846881, iteration: 447300
loss: 0.992123007774353,grad_norm: 0.7379859192296211, iteration: 447301
loss: 0.9573451280593872,grad_norm: 0.7221762905126903, iteration: 447302
loss: 1.0362048149108887,grad_norm: 0.8851982222642405, iteration: 447303
loss: 0.9984776377677917,grad_norm: 0.7178256316341547, iteration: 447304
loss: 1.1241440773010254,grad_norm: 0.9999992592470408, iteration: 447305
loss: 1.0178532600402832,grad_norm: 0.7967746069412134, iteration: 447306
loss: 1.0045394897460938,grad_norm: 0.8174619813334263, iteration: 447307
loss: 1.0191502571105957,grad_norm: 0.8219943031094927, iteration: 447308
loss: 1.0269806385040283,grad_norm: 0.9178446895565576, iteration: 447309
loss: 1.0056840181350708,grad_norm: 0.7791501988660723, iteration: 447310
loss: 1.1334432363510132,grad_norm: 0.8359850169570634, iteration: 447311
loss: 0.9797202348709106,grad_norm: 0.7206070623070036, iteration: 447312
loss: 1.0054442882537842,grad_norm: 0.9781281073319118, iteration: 447313
loss: 1.0044026374816895,grad_norm: 0.8998865123923561, iteration: 447314
loss: 0.9701350927352905,grad_norm: 0.8310523564497366, iteration: 447315
loss: 0.9998086094856262,grad_norm: 0.7081617026146919, iteration: 447316
loss: 1.0219697952270508,grad_norm: 0.9170512686049346, iteration: 447317
loss: 0.9606584310531616,grad_norm: 0.7478451142260639, iteration: 447318
loss: 0.9784566760063171,grad_norm: 0.7898862588744081, iteration: 447319
loss: 0.9791662096977234,grad_norm: 0.9999992144667337, iteration: 447320
loss: 0.9817797541618347,grad_norm: 0.6661625791516397, iteration: 447321
loss: 1.0204616785049438,grad_norm: 0.7720970318836022, iteration: 447322
loss: 0.9790324568748474,grad_norm: 0.9273791041971569, iteration: 447323
loss: 1.0182467699050903,grad_norm: 0.8299074007817011, iteration: 447324
loss: 1.0114831924438477,grad_norm: 0.8260390137264363, iteration: 447325
loss: 1.0036154985427856,grad_norm: 0.902020994804638, iteration: 447326
loss: 0.986291766166687,grad_norm: 0.7443150820614296, iteration: 447327
loss: 1.005065679550171,grad_norm: 0.7976034064339895, iteration: 447328
loss: 0.9825761318206787,grad_norm: 0.8014053666681087, iteration: 447329
loss: 1.030888319015503,grad_norm: 0.6866199186159687, iteration: 447330
loss: 0.9729417562484741,grad_norm: 0.866782570336253, iteration: 447331
loss: 0.9740737676620483,grad_norm: 0.7607158923738504, iteration: 447332
loss: 0.9884219169616699,grad_norm: 0.824451702530829, iteration: 447333
loss: 0.9906711578369141,grad_norm: 0.7213514938668204, iteration: 447334
loss: 1.0112066268920898,grad_norm: 0.7818806772579588, iteration: 447335
loss: 0.9924225211143494,grad_norm: 0.7200668379776297, iteration: 447336
loss: 1.0002894401550293,grad_norm: 0.8564556997724508, iteration: 447337
loss: 0.9871787428855896,grad_norm: 0.7277166392808243, iteration: 447338
loss: 0.9866992831230164,grad_norm: 0.6707122629404012, iteration: 447339
loss: 0.964067816734314,grad_norm: 0.7089012181136901, iteration: 447340
loss: 1.010129690170288,grad_norm: 0.7080634210564276, iteration: 447341
loss: 0.9742507934570312,grad_norm: 0.7130019521126509, iteration: 447342
loss: 0.9752523303031921,grad_norm: 0.7731461972615261, iteration: 447343
loss: 0.9854729771614075,grad_norm: 0.7042967258076253, iteration: 447344
loss: 0.9766120910644531,grad_norm: 0.8546658180558669, iteration: 447345
loss: 0.9936837553977966,grad_norm: 0.8061379450999797, iteration: 447346
loss: 0.9768590927124023,grad_norm: 0.8716616953982187, iteration: 447347
loss: 0.994391143321991,grad_norm: 0.914583560035911, iteration: 447348
loss: 1.0109518766403198,grad_norm: 0.6296598779379025, iteration: 447349
loss: 0.9899202585220337,grad_norm: 0.7800270341516659, iteration: 447350
loss: 0.9936498999595642,grad_norm: 0.8518215126052946, iteration: 447351
loss: 0.9821063876152039,grad_norm: 0.7431034125353927, iteration: 447352
loss: 1.0203801393508911,grad_norm: 0.8084079825400523, iteration: 447353
loss: 1.005684733390808,grad_norm: 0.8537101812240557, iteration: 447354
loss: 1.0481094121932983,grad_norm: 0.9999992164629637, iteration: 447355
loss: 0.9931870102882385,grad_norm: 0.8052899457109922, iteration: 447356
loss: 0.981135904788971,grad_norm: 0.915444754990418, iteration: 447357
loss: 1.026516318321228,grad_norm: 0.7879143234402879, iteration: 447358
loss: 1.0447465181350708,grad_norm: 0.9999993954887246, iteration: 447359
loss: 0.9854407906532288,grad_norm: 0.7329419803686359, iteration: 447360
loss: 1.0291107892990112,grad_norm: 0.9086267822321652, iteration: 447361
loss: 1.0324610471725464,grad_norm: 0.9999994879223099, iteration: 447362
loss: 1.0023646354675293,grad_norm: 0.7875425727845161, iteration: 447363
loss: 1.0573076009750366,grad_norm: 0.8395832267369701, iteration: 447364
loss: 1.0182114839553833,grad_norm: 0.7721918576614853, iteration: 447365
loss: 1.0348551273345947,grad_norm: 0.8727793589037417, iteration: 447366
loss: 0.9782775640487671,grad_norm: 0.7704321171970638, iteration: 447367
loss: 0.9797282814979553,grad_norm: 0.7555986961276877, iteration: 447368
loss: 1.0769367218017578,grad_norm: 0.9999995524784954, iteration: 447369
loss: 1.0111011266708374,grad_norm: 0.7436571415658354, iteration: 447370
loss: 0.9934483170509338,grad_norm: 0.890215329267876, iteration: 447371
loss: 1.030471682548523,grad_norm: 0.9999998640472832, iteration: 447372
loss: 1.0162724256515503,grad_norm: 0.7163871770906167, iteration: 447373
loss: 1.0345467329025269,grad_norm: 0.7571248079526551, iteration: 447374
loss: 1.031630277633667,grad_norm: 0.7754090156544807, iteration: 447375
loss: 0.9798937439918518,grad_norm: 0.7272575268770018, iteration: 447376
loss: 1.0192227363586426,grad_norm: 0.8423809054457521, iteration: 447377
loss: 1.0866341590881348,grad_norm: 0.9999993904718607, iteration: 447378
loss: 1.0107592344284058,grad_norm: 0.9999991915530546, iteration: 447379
loss: 1.0089786052703857,grad_norm: 0.8389822190886764, iteration: 447380
loss: 0.9727374911308289,grad_norm: 0.8006642857135329, iteration: 447381
loss: 1.0236341953277588,grad_norm: 0.9695298606989325, iteration: 447382
loss: 0.9996439814567566,grad_norm: 0.8543137290415771, iteration: 447383
loss: 1.0058797597885132,grad_norm: 0.8512129121720835, iteration: 447384
loss: 0.9988446235656738,grad_norm: 0.6422025980923558, iteration: 447385
loss: 1.0349552631378174,grad_norm: 0.7709359881435164, iteration: 447386
loss: 1.0048168897628784,grad_norm: 0.6073481759246859, iteration: 447387
loss: 1.0247266292572021,grad_norm: 0.6990259224734829, iteration: 447388
loss: 0.9944531321525574,grad_norm: 0.8201711359816802, iteration: 447389
loss: 1.007322072982788,grad_norm: 0.7792931935380516, iteration: 447390
loss: 0.9659579992294312,grad_norm: 0.8608568204799593, iteration: 447391
loss: 1.1007875204086304,grad_norm: 0.9630908822272588, iteration: 447392
loss: 1.0200493335723877,grad_norm: 0.6457642174847351, iteration: 447393
loss: 1.0230952501296997,grad_norm: 0.734365142334348, iteration: 447394
loss: 0.9996712803840637,grad_norm: 0.7320241664014296, iteration: 447395
loss: 1.0009502172470093,grad_norm: 0.7789274033053477, iteration: 447396
loss: 1.0042206048965454,grad_norm: 0.8273805932485485, iteration: 447397
loss: 1.000265121459961,grad_norm: 0.9999991019365596, iteration: 447398
loss: 0.9964989423751831,grad_norm: 0.7986355347630723, iteration: 447399
loss: 0.9794455170631409,grad_norm: 0.7238538240014933, iteration: 447400
loss: 0.9964081645011902,grad_norm: 0.8028005408446303, iteration: 447401
loss: 0.9882233142852783,grad_norm: 0.6911693328634403, iteration: 447402
loss: 0.949737548828125,grad_norm: 0.8085097006662689, iteration: 447403
loss: 1.0199283361434937,grad_norm: 0.7085595830014214, iteration: 447404
loss: 1.0041686296463013,grad_norm: 0.8321316401380003, iteration: 447405
loss: 0.9819014668464661,grad_norm: 0.866868636632817, iteration: 447406
loss: 0.9882803559303284,grad_norm: 0.8213654659798412, iteration: 447407
loss: 1.009171962738037,grad_norm: 0.6991771302511357, iteration: 447408
loss: 1.047896146774292,grad_norm: 0.7900124326436829, iteration: 447409
loss: 0.9888289570808411,grad_norm: 0.728996171492442, iteration: 447410
loss: 1.0183377265930176,grad_norm: 0.9073654251110124, iteration: 447411
loss: 0.9908350706100464,grad_norm: 0.7529446201019994, iteration: 447412
loss: 0.9865317344665527,grad_norm: 0.8100377843065828, iteration: 447413
loss: 1.0057905912399292,grad_norm: 0.7195509027536764, iteration: 447414
loss: 0.9814409017562866,grad_norm: 0.804158161494352, iteration: 447415
loss: 1.0080350637435913,grad_norm: 0.728578219023374, iteration: 447416
loss: 0.9958508014678955,grad_norm: 0.7105312839190601, iteration: 447417
loss: 1.0023069381713867,grad_norm: 0.764342623555143, iteration: 447418
loss: 1.0191854238510132,grad_norm: 0.8424446802653216, iteration: 447419
loss: 1.0194859504699707,grad_norm: 0.6923833893199226, iteration: 447420
loss: 0.9835065007209778,grad_norm: 0.8641301042370577, iteration: 447421
loss: 0.98665851354599,grad_norm: 0.9034150523155146, iteration: 447422
loss: 1.0400855541229248,grad_norm: 0.8062300854779211, iteration: 447423
loss: 0.9923385381698608,grad_norm: 0.7207476132909267, iteration: 447424
loss: 1.0236726999282837,grad_norm: 0.7259023148772358, iteration: 447425
loss: 0.9514093399047852,grad_norm: 0.7725616604109927, iteration: 447426
loss: 1.0153934955596924,grad_norm: 0.8916329152853218, iteration: 447427
loss: 1.0170469284057617,grad_norm: 0.6910504255953982, iteration: 447428
loss: 0.9631883502006531,grad_norm: 0.6968807738657052, iteration: 447429
loss: 1.0043845176696777,grad_norm: 0.7490585528053849, iteration: 447430
loss: 1.0127930641174316,grad_norm: 0.7559056687768786, iteration: 447431
loss: 0.980949342250824,grad_norm: 0.7989579049018602, iteration: 447432
loss: 0.9818484783172607,grad_norm: 0.9999991819030807, iteration: 447433
loss: 1.0190824270248413,grad_norm: 0.6397653185432938, iteration: 447434
loss: 1.017843246459961,grad_norm: 0.6651604271930092, iteration: 447435
loss: 1.0022268295288086,grad_norm: 0.7317788441295728, iteration: 447436
loss: 1.0235834121704102,grad_norm: 0.6212481473233281, iteration: 447437
loss: 0.9774998426437378,grad_norm: 0.880702754579628, iteration: 447438
loss: 0.967197597026825,grad_norm: 0.7679440574965728, iteration: 447439
loss: 1.039394736289978,grad_norm: 0.7077125112852723, iteration: 447440
loss: 1.0865005254745483,grad_norm: 1.0000000135232647, iteration: 447441
loss: 1.002269983291626,grad_norm: 0.8572807347618127, iteration: 447442
loss: 0.9833700060844421,grad_norm: 0.6685397431047448, iteration: 447443
loss: 1.0146443843841553,grad_norm: 0.8831040323683744, iteration: 447444
loss: 0.9856280088424683,grad_norm: 0.8192456010127688, iteration: 447445
loss: 1.0231480598449707,grad_norm: 0.9999998297054516, iteration: 447446
loss: 1.0115114450454712,grad_norm: 0.8072898654765798, iteration: 447447
loss: 0.9985999464988708,grad_norm: 0.8287350368611989, iteration: 447448
loss: 1.006920337677002,grad_norm: 0.8311899045921025, iteration: 447449
loss: 0.9944210648536682,grad_norm: 0.8144076182108916, iteration: 447450
loss: 1.0268419981002808,grad_norm: 0.9999990491554295, iteration: 447451
loss: 1.1411324739456177,grad_norm: 1.0000000013677464, iteration: 447452
loss: 1.005781888961792,grad_norm: 0.6831110932015624, iteration: 447453
loss: 1.0326768159866333,grad_norm: 0.644592539557952, iteration: 447454
loss: 0.9643896818161011,grad_norm: 0.720119193670372, iteration: 447455
loss: 1.0007258653640747,grad_norm: 0.8034372606517768, iteration: 447456
loss: 1.0027929544448853,grad_norm: 0.7352372101314661, iteration: 447457
loss: 1.0033878087997437,grad_norm: 0.7740132516187717, iteration: 447458
loss: 1.0075055360794067,grad_norm: 0.7353718576987495, iteration: 447459
loss: 0.9948945045471191,grad_norm: 0.9041126145082548, iteration: 447460
loss: 0.987125813961029,grad_norm: 0.8770801221692287, iteration: 447461
loss: 1.0130473375320435,grad_norm: 0.768070584938211, iteration: 447462
loss: 1.0495429039001465,grad_norm: 0.9938543346366991, iteration: 447463
loss: 1.0035359859466553,grad_norm: 0.8441672404426444, iteration: 447464
loss: 1.0230307579040527,grad_norm: 0.9999994190824465, iteration: 447465
loss: 1.0215787887573242,grad_norm: 0.999999623842799, iteration: 447466
loss: 0.976860761642456,grad_norm: 0.767471204549002, iteration: 447467
loss: 1.0097863674163818,grad_norm: 0.8392454931341, iteration: 447468
loss: 0.9818742871284485,grad_norm: 0.8331996862552827, iteration: 447469
loss: 0.9859176278114319,grad_norm: 0.8186832338567973, iteration: 447470
loss: 1.0029971599578857,grad_norm: 0.7657345443962101, iteration: 447471
loss: 0.9859101176261902,grad_norm: 0.7185222444481864, iteration: 447472
loss: 0.9898988604545593,grad_norm: 0.760719679602339, iteration: 447473
loss: 1.0561344623565674,grad_norm: 0.8683116735850329, iteration: 447474
loss: 1.0109429359436035,grad_norm: 0.7806963962978705, iteration: 447475
loss: 0.9955299496650696,grad_norm: 0.7483274033566831, iteration: 447476
loss: 1.0119645595550537,grad_norm: 0.7205694760755192, iteration: 447477
loss: 0.9498230814933777,grad_norm: 0.8415857966883761, iteration: 447478
loss: 0.9807432293891907,grad_norm: 0.8192185858943075, iteration: 447479
loss: 1.0227720737457275,grad_norm: 0.7966023530899803, iteration: 447480
loss: 1.1220988035202026,grad_norm: 0.9999995883462038, iteration: 447481
loss: 1.0451381206512451,grad_norm: 0.9999992480670623, iteration: 447482
loss: 1.0175848007202148,grad_norm: 0.6775191754225406, iteration: 447483
loss: 1.0121935606002808,grad_norm: 0.7821328657528824, iteration: 447484
loss: 1.090181827545166,grad_norm: 0.9999992714262702, iteration: 447485
loss: 0.9790326952934265,grad_norm: 0.8122827082801509, iteration: 447486
loss: 0.9541968107223511,grad_norm: 0.8493212474987955, iteration: 447487
loss: 1.005959391593933,grad_norm: 0.7469995292535566, iteration: 447488
loss: 0.9959898591041565,grad_norm: 0.8960246750890355, iteration: 447489
loss: 1.0126961469650269,grad_norm: 0.9311839223522012, iteration: 447490
loss: 1.0063254833221436,grad_norm: 0.7421262461628465, iteration: 447491
loss: 1.006598711013794,grad_norm: 0.7820582241369262, iteration: 447492
loss: 0.9947749972343445,grad_norm: 0.6275462755114134, iteration: 447493
loss: 0.9589590430259705,grad_norm: 0.813970440830067, iteration: 447494
loss: 0.9495111703872681,grad_norm: 0.8482808276729588, iteration: 447495
loss: 0.9923261404037476,grad_norm: 0.7809917222285331, iteration: 447496
loss: 1.0336921215057373,grad_norm: 0.7613511902233128, iteration: 447497
loss: 1.0178735256195068,grad_norm: 0.7606846873308074, iteration: 447498
loss: 0.9373607635498047,grad_norm: 0.731288239615447, iteration: 447499
loss: 0.9806700348854065,grad_norm: 0.7471002476951107, iteration: 447500
loss: 0.988109827041626,grad_norm: 0.7656186789176707, iteration: 447501
loss: 1.0293298959732056,grad_norm: 0.7672620844430955, iteration: 447502
loss: 0.9871701598167419,grad_norm: 0.8079791643278809, iteration: 447503
loss: 0.9982136487960815,grad_norm: 0.8043807046676823, iteration: 447504
loss: 1.0190746784210205,grad_norm: 0.7894069582447948, iteration: 447505
loss: 1.0067055225372314,grad_norm: 0.9378581253528208, iteration: 447506
loss: 1.0070441961288452,grad_norm: 0.6641031776614638, iteration: 447507
loss: 0.9908168911933899,grad_norm: 0.8153490137108649, iteration: 447508
loss: 1.0108860731124878,grad_norm: 0.8710354974116802, iteration: 447509
loss: 0.9695009589195251,grad_norm: 0.9991471241182783, iteration: 447510
loss: 1.0351855754852295,grad_norm: 0.7464232244896092, iteration: 447511
loss: 0.9855965971946716,grad_norm: 0.7251565884539389, iteration: 447512
loss: 0.943222165107727,grad_norm: 0.747051948942408, iteration: 447513
loss: 0.9822355508804321,grad_norm: 0.8440906345902458, iteration: 447514
loss: 1.0001767873764038,grad_norm: 0.8495969252696438, iteration: 447515
loss: 1.0107901096343994,grad_norm: 0.9146770258020639, iteration: 447516
loss: 1.031550407409668,grad_norm: 0.8314506569190636, iteration: 447517
loss: 0.9882354736328125,grad_norm: 0.764587577177409, iteration: 447518
loss: 0.9768927097320557,grad_norm: 0.7991167242304861, iteration: 447519
loss: 0.9935192465782166,grad_norm: 0.8191264295883895, iteration: 447520
loss: 0.9901304841041565,grad_norm: 0.7132805142925204, iteration: 447521
loss: 1.0047892332077026,grad_norm: 0.6321276457233911, iteration: 447522
loss: 1.0069068670272827,grad_norm: 0.7510749105604202, iteration: 447523
loss: 0.9576033353805542,grad_norm: 0.958768514693148, iteration: 447524
loss: 1.0315804481506348,grad_norm: 0.8185101928910832, iteration: 447525
loss: 1.0302950143814087,grad_norm: 0.7423560105452811, iteration: 447526
loss: 1.1002649068832397,grad_norm: 0.8655658535174915, iteration: 447527
loss: 0.9970389008522034,grad_norm: 0.9289262335555869, iteration: 447528
loss: 1.025078296661377,grad_norm: 0.9235006593985112, iteration: 447529
loss: 0.9997656345367432,grad_norm: 0.8377692189901579, iteration: 447530
loss: 1.0174251794815063,grad_norm: 0.9165009936744931, iteration: 447531
loss: 0.9747633337974548,grad_norm: 0.8977255192022673, iteration: 447532
loss: 0.9883983731269836,grad_norm: 0.6785904636775761, iteration: 447533
loss: 0.955328643321991,grad_norm: 0.7962047417026719, iteration: 447534
loss: 0.9468495845794678,grad_norm: 0.8043342447453918, iteration: 447535
loss: 1.0050103664398193,grad_norm: 0.9046400396767849, iteration: 447536
loss: 1.0505952835083008,grad_norm: 0.7838527959238898, iteration: 447537
loss: 0.9949571490287781,grad_norm: 0.7216322116443903, iteration: 447538
loss: 0.9886859059333801,grad_norm: 0.9999992323274066, iteration: 447539
loss: 0.9890667200088501,grad_norm: 0.8749176139225477, iteration: 447540
loss: 0.9987990260124207,grad_norm: 0.9999992398634142, iteration: 447541
loss: 0.9732071757316589,grad_norm: 0.6795368347300085, iteration: 447542
loss: 1.127321481704712,grad_norm: 0.9999994537775421, iteration: 447543
loss: 1.0069419145584106,grad_norm: 0.8909222944823216, iteration: 447544
loss: 0.9646037817001343,grad_norm: 0.7533461086426141, iteration: 447545
loss: 0.9983124732971191,grad_norm: 0.6697747289746483, iteration: 447546
loss: 1.017212986946106,grad_norm: 0.8597605755018359, iteration: 447547
loss: 0.9977012276649475,grad_norm: 0.9654965014843019, iteration: 447548
loss: 0.9893671274185181,grad_norm: 0.7115184219775325, iteration: 447549
loss: 0.9957945346832275,grad_norm: 0.7437349107595813, iteration: 447550
loss: 0.9925821423530579,grad_norm: 0.9999993925509043, iteration: 447551
loss: 0.9945774674415588,grad_norm: 0.6143073676824161, iteration: 447552
loss: 1.0248302221298218,grad_norm: 0.9999993118542786, iteration: 447553
loss: 0.9902203679084778,grad_norm: 0.8237562358993172, iteration: 447554
loss: 0.9585757255554199,grad_norm: 0.8836278676653003, iteration: 447555
loss: 0.9788923859596252,grad_norm: 0.7615033850678041, iteration: 447556
loss: 1.0072894096374512,grad_norm: 0.8033353586395993, iteration: 447557
loss: 1.0132969617843628,grad_norm: 0.9447472089579378, iteration: 447558
loss: 1.019953966140747,grad_norm: 0.8349744617205468, iteration: 447559
loss: 1.029310941696167,grad_norm: 0.831522502014327, iteration: 447560
loss: 1.0734853744506836,grad_norm: 0.8991219353168921, iteration: 447561
loss: 0.9664040207862854,grad_norm: 0.8193229675485574, iteration: 447562
loss: 1.0737850666046143,grad_norm: 0.9498756874445216, iteration: 447563
loss: 0.953461766242981,grad_norm: 0.6593185201068132, iteration: 447564
loss: 1.0334808826446533,grad_norm: 0.658383507588574, iteration: 447565
loss: 0.9860391616821289,grad_norm: 0.6441265913262766, iteration: 447566
loss: 0.9453503489494324,grad_norm: 0.6977118525094366, iteration: 447567
loss: 1.0360894203186035,grad_norm: 0.7534619777629492, iteration: 447568
loss: 1.041498064994812,grad_norm: 0.6795833261818525, iteration: 447569
loss: 1.0124549865722656,grad_norm: 0.9999998937873075, iteration: 447570
loss: 1.0157870054244995,grad_norm: 0.9999997594485366, iteration: 447571
loss: 1.0251765251159668,grad_norm: 0.8664163347865674, iteration: 447572
loss: 1.0054041147232056,grad_norm: 0.9305866088961178, iteration: 447573
loss: 0.9789744019508362,grad_norm: 0.7671952165841956, iteration: 447574
loss: 0.9740915894508362,grad_norm: 0.9999989750833805, iteration: 447575
loss: 1.0116580724716187,grad_norm: 0.7497469016217594, iteration: 447576
loss: 0.9833081364631653,grad_norm: 0.8775620419713183, iteration: 447577
loss: 0.9784956574440002,grad_norm: 0.68771856934607, iteration: 447578
loss: 0.9649609327316284,grad_norm: 0.7171045033117832, iteration: 447579
loss: 0.9666836261749268,grad_norm: 0.7914321323139852, iteration: 447580
loss: 0.9882982969284058,grad_norm: 0.9999991728898217, iteration: 447581
loss: 1.0266304016113281,grad_norm: 0.9385219724568985, iteration: 447582
loss: 1.0296953916549683,grad_norm: 0.660556081206843, iteration: 447583
loss: 0.9866707921028137,grad_norm: 0.9128742761730082, iteration: 447584
loss: 0.9948868155479431,grad_norm: 0.7415418247690199, iteration: 447585
loss: 1.0962672233581543,grad_norm: 0.9999996273263084, iteration: 447586
loss: 0.9824102520942688,grad_norm: 0.8982657854858672, iteration: 447587
loss: 0.9859755039215088,grad_norm: 0.9336147297981322, iteration: 447588
loss: 1.0058925151824951,grad_norm: 0.7185234980721926, iteration: 447589
loss: 1.0276798009872437,grad_norm: 0.8712728559532237, iteration: 447590
loss: 1.0186347961425781,grad_norm: 0.7345625154413572, iteration: 447591
loss: 1.0596959590911865,grad_norm: 0.6801707620372419, iteration: 447592
loss: 0.9615771770477295,grad_norm: 0.81225336676253, iteration: 447593
loss: 1.0323385000228882,grad_norm: 0.7592096079052548, iteration: 447594
loss: 1.0604438781738281,grad_norm: 0.9999997070073305, iteration: 447595
loss: 1.0036808252334595,grad_norm: 0.8946677486515825, iteration: 447596
loss: 0.9951748251914978,grad_norm: 0.7934704166285302, iteration: 447597
loss: 1.0097200870513916,grad_norm: 0.6462467464088026, iteration: 447598
loss: 0.9796930551528931,grad_norm: 0.6684679656023716, iteration: 447599
loss: 1.017155647277832,grad_norm: 0.67128913742064, iteration: 447600
loss: 1.0022995471954346,grad_norm: 0.7634870901227762, iteration: 447601
loss: 1.096083641052246,grad_norm: 0.9950756471073993, iteration: 447602
loss: 0.9903152585029602,grad_norm: 0.732660927925549, iteration: 447603
loss: 1.061861515045166,grad_norm: 0.9999999270429165, iteration: 447604
loss: 1.0453683137893677,grad_norm: 0.9101337104750603, iteration: 447605
loss: 0.9965269565582275,grad_norm: 0.6934324941672431, iteration: 447606
loss: 0.9625982642173767,grad_norm: 0.7364569799097105, iteration: 447607
loss: 0.9886502027511597,grad_norm: 0.6856608253266332, iteration: 447608
loss: 0.9961844682693481,grad_norm: 0.7800197188162254, iteration: 447609
loss: 1.1054342985153198,grad_norm: 0.9999994513486743, iteration: 447610
loss: 0.9827624559402466,grad_norm: 0.7530153730387429, iteration: 447611
loss: 0.9978451728820801,grad_norm: 0.742693999609575, iteration: 447612
loss: 0.9742218255996704,grad_norm: 0.8751489721005234, iteration: 447613
loss: 0.9890989661216736,grad_norm: 0.9314126483137771, iteration: 447614
loss: 0.9638168215751648,grad_norm: 0.8358256302541537, iteration: 447615
loss: 0.9941927790641785,grad_norm: 0.7489067652820184, iteration: 447616
loss: 0.9967917203903198,grad_norm: 0.7938993869628477, iteration: 447617
loss: 1.0295379161834717,grad_norm: 0.7493536679849195, iteration: 447618
loss: 1.0026657581329346,grad_norm: 0.7558645461975699, iteration: 447619
loss: 0.9973039031028748,grad_norm: 0.8358175603236165, iteration: 447620
loss: 1.010653018951416,grad_norm: 0.6515254272624872, iteration: 447621
loss: 1.031217336654663,grad_norm: 0.9999992224564512, iteration: 447622
loss: 1.001996397972107,grad_norm: 0.8891384888499457, iteration: 447623
loss: 1.0865886211395264,grad_norm: 0.999999013911336, iteration: 447624
loss: 1.000307321548462,grad_norm: 0.7255795194034672, iteration: 447625
loss: 1.0209388732910156,grad_norm: 0.86346134477212, iteration: 447626
loss: 0.9768385291099548,grad_norm: 0.8779985928489402, iteration: 447627
loss: 1.033130407333374,grad_norm: 0.8610188653878814, iteration: 447628
loss: 0.9711875915527344,grad_norm: 0.6810708453443451, iteration: 447629
loss: 1.0148345232009888,grad_norm: 0.7520238229812717, iteration: 447630
loss: 1.1410744190216064,grad_norm: 0.9014912704737286, iteration: 447631
loss: 1.0072962045669556,grad_norm: 0.7636394733636686, iteration: 447632
loss: 0.985507071018219,grad_norm: 0.9999993041226951, iteration: 447633
loss: 0.9990249872207642,grad_norm: 0.7991437236071862, iteration: 447634
loss: 1.0489882230758667,grad_norm: 0.7822279706813273, iteration: 447635
loss: 1.019487738609314,grad_norm: 0.7173652339622172, iteration: 447636
loss: 0.9630125164985657,grad_norm: 0.9200306055688764, iteration: 447637
loss: 0.9516201615333557,grad_norm: 0.771052551063018, iteration: 447638
loss: 0.9797178506851196,grad_norm: 0.7398894371627176, iteration: 447639
loss: 0.9815185070037842,grad_norm: 0.8598380504286444, iteration: 447640
loss: 0.982005774974823,grad_norm: 0.6762897019861205, iteration: 447641
loss: 1.0240387916564941,grad_norm: 0.7648984288882531, iteration: 447642
loss: 0.9842946529388428,grad_norm: 0.8888902975141855, iteration: 447643
loss: 1.0322544574737549,grad_norm: 0.9211371412143683, iteration: 447644
loss: 0.9802742600440979,grad_norm: 0.9999991184379179, iteration: 447645
loss: 0.9821372628211975,grad_norm: 0.7042611427203509, iteration: 447646
loss: 0.9614810347557068,grad_norm: 0.921835325002954, iteration: 447647
loss: 1.0057953596115112,grad_norm: 0.7117797633201016, iteration: 447648
loss: 0.9932051301002502,grad_norm: 0.7427630612585184, iteration: 447649
loss: 1.0079805850982666,grad_norm: 0.7655889556751515, iteration: 447650
loss: 1.032339096069336,grad_norm: 0.770934640274886, iteration: 447651
loss: 1.0438377857208252,grad_norm: 0.7247529972832547, iteration: 447652
loss: 1.0506435632705688,grad_norm: 0.9999997422818248, iteration: 447653
loss: 1.0036101341247559,grad_norm: 0.930076499447296, iteration: 447654
loss: 1.0183337926864624,grad_norm: 0.6183709956684019, iteration: 447655
loss: 1.0389169454574585,grad_norm: 0.7747210436957696, iteration: 447656
loss: 0.986939549446106,grad_norm: 0.8256093697742988, iteration: 447657
loss: 0.9615081548690796,grad_norm: 0.7737642653544667, iteration: 447658
loss: 1.0191314220428467,grad_norm: 0.7537894256769998, iteration: 447659
loss: 0.9735199809074402,grad_norm: 0.8446626420930455, iteration: 447660
loss: 1.0091010332107544,grad_norm: 0.7672305161643207, iteration: 447661
loss: 1.0062857866287231,grad_norm: 0.986671625493463, iteration: 447662
loss: 0.9929907917976379,grad_norm: 0.7272219765655422, iteration: 447663
loss: 0.9807119369506836,grad_norm: 0.859758752488351, iteration: 447664
loss: 1.0143126249313354,grad_norm: 0.6635334698153749, iteration: 447665
loss: 1.0755561590194702,grad_norm: 0.9999995353045061, iteration: 447666
loss: 1.0122426748275757,grad_norm: 0.7911586585606496, iteration: 447667
loss: 1.0121756792068481,grad_norm: 0.8763586916989494, iteration: 447668
loss: 1.0074677467346191,grad_norm: 0.7858064746752492, iteration: 447669
loss: 1.011338710784912,grad_norm: 0.7255080432776814, iteration: 447670
loss: 1.0266121625900269,grad_norm: 0.9059781599243621, iteration: 447671
loss: 0.9989876747131348,grad_norm: 0.7344261186387575, iteration: 447672
loss: 0.9763386845588684,grad_norm: 0.9377323968780255, iteration: 447673
loss: 1.2665585279464722,grad_norm: 0.9999991848190358, iteration: 447674
loss: 0.9744391441345215,grad_norm: 0.9999997190931067, iteration: 447675
loss: 0.9756707549095154,grad_norm: 0.8489853070293456, iteration: 447676
loss: 1.004740834236145,grad_norm: 0.7703751637774535, iteration: 447677
loss: 0.9646434187889099,grad_norm: 0.862593786243812, iteration: 447678
loss: 0.9724781513214111,grad_norm: 0.8663808510137241, iteration: 447679
loss: 0.9791570901870728,grad_norm: 0.7424843302054908, iteration: 447680
loss: 1.0239812135696411,grad_norm: 0.821453067691781, iteration: 447681
loss: 1.0000075101852417,grad_norm: 0.8436593334627958, iteration: 447682
loss: 0.9982755780220032,grad_norm: 0.7005223486451216, iteration: 447683
loss: 1.004470705986023,grad_norm: 0.9264756142639697, iteration: 447684
loss: 0.9681307077407837,grad_norm: 0.7905468568904063, iteration: 447685
loss: 0.9762186408042908,grad_norm: 0.9999999054872492, iteration: 447686
loss: 0.9946742653846741,grad_norm: 0.6624846490028887, iteration: 447687
loss: 1.0401837825775146,grad_norm: 0.9322306096843588, iteration: 447688
loss: 0.9821295738220215,grad_norm: 0.8214204843589786, iteration: 447689
loss: 0.9916052222251892,grad_norm: 0.7891048786509952, iteration: 447690
loss: 0.9833556413650513,grad_norm: 0.8575390581167853, iteration: 447691
loss: 1.081826090812683,grad_norm: 0.9999994141302145, iteration: 447692
loss: 1.0110517740249634,grad_norm: 0.9333022628816532, iteration: 447693
loss: 1.00639009475708,grad_norm: 0.801114074605923, iteration: 447694
loss: 0.9868906736373901,grad_norm: 0.6957228788267132, iteration: 447695
loss: 1.005332350730896,grad_norm: 0.7099227423958258, iteration: 447696
loss: 1.0096412897109985,grad_norm: 0.7842508908158247, iteration: 447697
loss: 0.9822472929954529,grad_norm: 0.7517361379344157, iteration: 447698
loss: 1.0113345384597778,grad_norm: 0.9831274926779514, iteration: 447699
loss: 1.0110957622528076,grad_norm: 0.7696548991958421, iteration: 447700
loss: 1.1842910051345825,grad_norm: 0.9999992380096238, iteration: 447701
loss: 1.0151112079620361,grad_norm: 0.7681548645780188, iteration: 447702
loss: 1.0237489938735962,grad_norm: 0.8069714975556883, iteration: 447703
loss: 0.9941535592079163,grad_norm: 0.9220679382668953, iteration: 447704
loss: 0.9721505641937256,grad_norm: 0.7720833692279341, iteration: 447705
loss: 0.9498682022094727,grad_norm: 0.7287676517244386, iteration: 447706
loss: 0.9939777255058289,grad_norm: 0.6651919493730437, iteration: 447707
loss: 1.0041934251785278,grad_norm: 0.9645295260017296, iteration: 447708
loss: 0.9845900535583496,grad_norm: 0.7806421378027846, iteration: 447709
loss: 0.9882961511611938,grad_norm: 0.7932000888175, iteration: 447710
loss: 0.9751548767089844,grad_norm: 0.7414153663984796, iteration: 447711
loss: 0.9826794862747192,grad_norm: 0.9251640476848061, iteration: 447712
loss: 1.0678364038467407,grad_norm: 0.9999990324608118, iteration: 447713
loss: 1.0112777948379517,grad_norm: 0.9147839524122009, iteration: 447714
loss: 0.9847255945205688,grad_norm: 0.9999998839618691, iteration: 447715
loss: 1.0038511753082275,grad_norm: 0.7815093430929979, iteration: 447716
loss: 0.9804259538650513,grad_norm: 0.7320454494513231, iteration: 447717
loss: 1.0113240480422974,grad_norm: 0.9999992382971216, iteration: 447718
loss: 0.979083240032196,grad_norm: 0.763670168463262, iteration: 447719
loss: 0.9846680760383606,grad_norm: 0.9415079231982847, iteration: 447720
loss: 0.9806063771247864,grad_norm: 0.7569468487566643, iteration: 447721
loss: 1.0402034521102905,grad_norm: 0.7998264788487994, iteration: 447722
loss: 1.0023211240768433,grad_norm: 0.726084446160404, iteration: 447723
loss: 0.9764633774757385,grad_norm: 0.7728479818647583, iteration: 447724
loss: 0.989136815071106,grad_norm: 1.0000000049762137, iteration: 447725
loss: 0.9971371293067932,grad_norm: 0.6914811154276796, iteration: 447726
loss: 0.9807765483856201,grad_norm: 0.7737815231926599, iteration: 447727
loss: 1.0799190998077393,grad_norm: 0.9999990842103772, iteration: 447728
loss: 0.9477241635322571,grad_norm: 0.8512181144353558, iteration: 447729
loss: 1.017119288444519,grad_norm: 0.7311011356559695, iteration: 447730
loss: 0.9932777285575867,grad_norm: 0.6469859975876757, iteration: 447731
loss: 0.9725096225738525,grad_norm: 0.8490830875761571, iteration: 447732
loss: 1.038271427154541,grad_norm: 0.8293918977295052, iteration: 447733
loss: 1.0085231065750122,grad_norm: 0.755868539052787, iteration: 447734
loss: 0.987424910068512,grad_norm: 0.7121638649978379, iteration: 447735
loss: 1.029745101928711,grad_norm: 0.884702827860822, iteration: 447736
loss: 0.9754655361175537,grad_norm: 0.8520703163714368, iteration: 447737
loss: 1.0338081121444702,grad_norm: 0.9670815267703295, iteration: 447738
loss: 0.9769242405891418,grad_norm: 0.8068697447564638, iteration: 447739
loss: 0.9794767498970032,grad_norm: 0.8156630981163268, iteration: 447740
loss: 1.0162402391433716,grad_norm: 0.7862437653932359, iteration: 447741
loss: 1.0095735788345337,grad_norm: 0.9264674057592108, iteration: 447742
loss: 0.9968010783195496,grad_norm: 0.7458944155669402, iteration: 447743
loss: 1.0511622428894043,grad_norm: 0.7648307695542009, iteration: 447744
loss: 1.0117124319076538,grad_norm: 0.955660752984895, iteration: 447745
loss: 0.9915769696235657,grad_norm: 0.7892727805009455, iteration: 447746
loss: 1.0383504629135132,grad_norm: 0.9671204516268614, iteration: 447747
loss: 0.9615850448608398,grad_norm: 0.878188444518588, iteration: 447748
loss: 1.0685452222824097,grad_norm: 0.9999997925187342, iteration: 447749
loss: 1.0094271898269653,grad_norm: 0.921244742976391, iteration: 447750
loss: 0.9976344704627991,grad_norm: 0.7889036136373698, iteration: 447751
loss: 0.9788312315940857,grad_norm: 0.811913162871972, iteration: 447752
loss: 1.0000187158584595,grad_norm: 0.7682842723204792, iteration: 447753
loss: 0.9908090233802795,grad_norm: 0.9553904846705791, iteration: 447754
loss: 1.0006041526794434,grad_norm: 0.757265260415705, iteration: 447755
loss: 1.0114065408706665,grad_norm: 0.9544204344906668, iteration: 447756
loss: 1.0136053562164307,grad_norm: 0.9663930580108174, iteration: 447757
loss: 1.0131359100341797,grad_norm: 0.7175538211162384, iteration: 447758
loss: 1.041719675064087,grad_norm: 0.8986922265805721, iteration: 447759
loss: 0.980845034122467,grad_norm: 0.6803247246136969, iteration: 447760
loss: 0.9544903635978699,grad_norm: 0.6972991953010744, iteration: 447761
loss: 1.124153971672058,grad_norm: 0.9932595167653693, iteration: 447762
loss: 0.970345675945282,grad_norm: 0.9999990314579247, iteration: 447763
loss: 0.9694364666938782,grad_norm: 0.7219228176379695, iteration: 447764
loss: 1.001061201095581,grad_norm: 0.8423160406063953, iteration: 447765
loss: 0.9972679615020752,grad_norm: 0.6600313665559248, iteration: 447766
loss: 0.9548599123954773,grad_norm: 0.8064053561270511, iteration: 447767
loss: 0.9776043891906738,grad_norm: 0.7274670698749096, iteration: 447768
loss: 0.9667091965675354,grad_norm: 0.767580914333145, iteration: 447769
loss: 0.9974664449691772,grad_norm: 0.9999992811102297, iteration: 447770
loss: 0.9972792863845825,grad_norm: 0.6345170113264164, iteration: 447771
loss: 1.0381355285644531,grad_norm: 0.9289615492874053, iteration: 447772
loss: 1.0239955186843872,grad_norm: 0.6279846507075356, iteration: 447773
loss: 0.9599179625511169,grad_norm: 0.5836641164647163, iteration: 447774
loss: 0.9599025845527649,grad_norm: 0.7476923436395235, iteration: 447775
loss: 0.9963345527648926,grad_norm: 0.7725532751966931, iteration: 447776
loss: 0.9870381951332092,grad_norm: 0.7643325865801333, iteration: 447777
loss: 1.0006016492843628,grad_norm: 0.7125571236048873, iteration: 447778
loss: 0.997017502784729,grad_norm: 0.7958071221222357, iteration: 447779
loss: 0.984062910079956,grad_norm: 0.9353054414992926, iteration: 447780
loss: 0.9724203944206238,grad_norm: 0.6608337637233995, iteration: 447781
loss: 1.0319417715072632,grad_norm: 0.9999990185804711, iteration: 447782
loss: 0.9703500270843506,grad_norm: 0.7288708901646815, iteration: 447783
loss: 1.0310359001159668,grad_norm: 0.9999993890234131, iteration: 447784
loss: 0.9830721616744995,grad_norm: 0.7620467048620965, iteration: 447785
loss: 0.9947082996368408,grad_norm: 0.6849373621044782, iteration: 447786
loss: 0.9621200561523438,grad_norm: 0.7635166380267532, iteration: 447787
loss: 0.9730657339096069,grad_norm: 0.8006861187965437, iteration: 447788
loss: 0.979856014251709,grad_norm: 0.8975644718996414, iteration: 447789
loss: 0.9702526926994324,grad_norm: 0.6789710025207982, iteration: 447790
loss: 0.9889111518859863,grad_norm: 0.6879533235064482, iteration: 447791
loss: 1.052505373954773,grad_norm: 0.9282678492588305, iteration: 447792
loss: 1.0115785598754883,grad_norm: 0.9254015328766815, iteration: 447793
loss: 1.0733007192611694,grad_norm: 0.8985144057448508, iteration: 447794
loss: 0.9688023924827576,grad_norm: 0.9999993382301909, iteration: 447795
loss: 1.0028120279312134,grad_norm: 0.9999993397792708, iteration: 447796
loss: 0.9526631236076355,grad_norm: 0.7233915070129359, iteration: 447797
loss: 0.9835772514343262,grad_norm: 0.7040704869320248, iteration: 447798
loss: 1.019375205039978,grad_norm: 0.8019898251103647, iteration: 447799
loss: 1.0118303298950195,grad_norm: 0.8617153274136611, iteration: 447800
loss: 0.9878154993057251,grad_norm: 0.9999997752741887, iteration: 447801
loss: 0.989744246006012,grad_norm: 0.7481572102570897, iteration: 447802
loss: 1.0237150192260742,grad_norm: 0.6799654542328196, iteration: 447803
loss: 1.0250719785690308,grad_norm: 0.8409427101780023, iteration: 447804
loss: 1.0222622156143188,grad_norm: 0.9588783648118782, iteration: 447805
loss: 1.0166211128234863,grad_norm: 0.7258313802711405, iteration: 447806
loss: 1.039170265197754,grad_norm: 0.9257262188604549, iteration: 447807
loss: 1.0179554224014282,grad_norm: 0.7715135800937699, iteration: 447808
loss: 1.0144134759902954,grad_norm: 0.9193269114160801, iteration: 447809
loss: 1.0207208395004272,grad_norm: 0.8541016893880131, iteration: 447810
loss: 0.9995641112327576,grad_norm: 0.8651007864877859, iteration: 447811
loss: 0.9894171357154846,grad_norm: 0.8382919753409936, iteration: 447812
loss: 0.9983016848564148,grad_norm: 0.9400122425014439, iteration: 447813
loss: 1.0305588245391846,grad_norm: 0.6242423728207901, iteration: 447814
loss: 0.9726719260215759,grad_norm: 0.8238231269761809, iteration: 447815
loss: 0.9821024537086487,grad_norm: 0.8562150068518201, iteration: 447816
loss: 1.0527187585830688,grad_norm: 0.9007936211837244, iteration: 447817
loss: 1.0189813375473022,grad_norm: 0.7288093296816279, iteration: 447818
loss: 1.0210870504379272,grad_norm: 0.7392201224656969, iteration: 447819
loss: 1.003603219985962,grad_norm: 0.7476082021909946, iteration: 447820
loss: 1.0123000144958496,grad_norm: 0.6837891835924478, iteration: 447821
loss: 0.9743140935897827,grad_norm: 0.9999999095229352, iteration: 447822
loss: 1.020504117012024,grad_norm: 0.99999932890011, iteration: 447823
loss: 1.0074633359909058,grad_norm: 0.730068731091006, iteration: 447824
loss: 0.9558537602424622,grad_norm: 0.7213872479742018, iteration: 447825
loss: 0.9881503582000732,grad_norm: 0.6691916879961413, iteration: 447826
loss: 1.0283104181289673,grad_norm: 0.8128603538292067, iteration: 447827
loss: 1.0334548950195312,grad_norm: 0.849954731105147, iteration: 447828
loss: 1.049173355102539,grad_norm: 0.8977817463313161, iteration: 447829
loss: 1.0103538036346436,grad_norm: 0.8586513160529876, iteration: 447830
loss: 0.9922745823860168,grad_norm: 0.7069340355894773, iteration: 447831
loss: 1.0506855249404907,grad_norm: 0.9999992173238212, iteration: 447832
loss: 0.9981793761253357,grad_norm: 0.7592583367715554, iteration: 447833
loss: 1.0030853748321533,grad_norm: 0.7065484582489295, iteration: 447834
loss: 0.9758694767951965,grad_norm: 0.669739123775861, iteration: 447835
loss: 1.0094919204711914,grad_norm: 0.9999994127892593, iteration: 447836
loss: 1.0455936193466187,grad_norm: 0.8493780985929059, iteration: 447837
loss: 0.9942176938056946,grad_norm: 0.7079777837869163, iteration: 447838
loss: 1.058637261390686,grad_norm: 1.0000000709984915, iteration: 447839
loss: 1.0287342071533203,grad_norm: 0.7648248552246365, iteration: 447840
loss: 1.0144540071487427,grad_norm: 0.8573688659041836, iteration: 447841
loss: 0.9756126403808594,grad_norm: 0.7393321668379447, iteration: 447842
loss: 1.0056084394454956,grad_norm: 0.9839873252371859, iteration: 447843
loss: 1.0124238729476929,grad_norm: 0.6198870056359974, iteration: 447844
loss: 0.9705999493598938,grad_norm: 0.7728025802263467, iteration: 447845
loss: 0.996402382850647,grad_norm: 0.7121581432921678, iteration: 447846
loss: 1.0581419467926025,grad_norm: 0.7914736086194023, iteration: 447847
loss: 1.0896161794662476,grad_norm: 0.8336395572277248, iteration: 447848
loss: 1.048721194267273,grad_norm: 0.999999318256089, iteration: 447849
loss: 0.9725340008735657,grad_norm: 0.8011243785536922, iteration: 447850
loss: 1.0073299407958984,grad_norm: 0.7682501993911011, iteration: 447851
loss: 1.0399129390716553,grad_norm: 0.999999681973669, iteration: 447852
loss: 0.9780949950218201,grad_norm: 0.8776030241703682, iteration: 447853
loss: 0.9753599166870117,grad_norm: 0.9999994907609611, iteration: 447854
loss: 0.990752100944519,grad_norm: 0.7328870424802894, iteration: 447855
loss: 1.032404899597168,grad_norm: 0.8383680851349123, iteration: 447856
loss: 1.0259921550750732,grad_norm: 0.8352689269861845, iteration: 447857
loss: 1.0028706789016724,grad_norm: 0.8531510396948612, iteration: 447858
loss: 1.0299781560897827,grad_norm: 0.999999185824854, iteration: 447859
loss: 1.0214622020721436,grad_norm: 0.7859642088736225, iteration: 447860
loss: 1.0032655000686646,grad_norm: 0.7417066391486776, iteration: 447861
loss: 0.9899497032165527,grad_norm: 0.807513078116737, iteration: 447862
loss: 0.9876866340637207,grad_norm: 0.7308183648455282, iteration: 447863
loss: 0.9846885800361633,grad_norm: 0.7932331212690441, iteration: 447864
loss: 0.9936739206314087,grad_norm: 0.774959106182238, iteration: 447865
loss: 0.9758632779121399,grad_norm: 0.7477368395783539, iteration: 447866
loss: 1.0157569646835327,grad_norm: 0.8563140715594032, iteration: 447867
loss: 1.0152872800827026,grad_norm: 0.9999991399097198, iteration: 447868
loss: 1.0427749156951904,grad_norm: 0.999999591678437, iteration: 447869
loss: 0.993058443069458,grad_norm: 0.8468355148082783, iteration: 447870
loss: 0.9612738490104675,grad_norm: 0.6015441445950205, iteration: 447871
loss: 0.9674719572067261,grad_norm: 0.829449006642657, iteration: 447872
loss: 1.0003416538238525,grad_norm: 0.8638545208596167, iteration: 447873
loss: 0.981815755367279,grad_norm: 0.8389714696364782, iteration: 447874
loss: 1.0060597658157349,grad_norm: 0.7397732031671902, iteration: 447875
loss: 1.0466740131378174,grad_norm: 0.8122755096026769, iteration: 447876
loss: 0.9926676154136658,grad_norm: 0.8445392592922357, iteration: 447877
loss: 0.9746308326721191,grad_norm: 0.8364826283875928, iteration: 447878
loss: 1.0225746631622314,grad_norm: 0.7999857628639114, iteration: 447879
loss: 1.0258780717849731,grad_norm: 0.6984209285663361, iteration: 447880
loss: 0.9928205013275146,grad_norm: 0.857793270842016, iteration: 447881
loss: 1.0074068307876587,grad_norm: 0.7977253182604628, iteration: 447882
loss: 1.0099698305130005,grad_norm: 0.6928732498951004, iteration: 447883
loss: 1.0223397016525269,grad_norm: 0.8656001165327223, iteration: 447884
loss: 0.998287558555603,grad_norm: 0.7029529601607766, iteration: 447885
loss: 1.245462417602539,grad_norm: 0.9999994877437456, iteration: 447886
loss: 0.9854686856269836,grad_norm: 0.6990414220325127, iteration: 447887
loss: 0.9976814389228821,grad_norm: 0.8162750947437738, iteration: 447888
loss: 1.005242109298706,grad_norm: 0.8117407200942821, iteration: 447889
loss: 1.0161001682281494,grad_norm: 0.776802257759047, iteration: 447890
loss: 0.9693430662155151,grad_norm: 0.7363459792066415, iteration: 447891
loss: 0.9836574792861938,grad_norm: 0.6664487735138134, iteration: 447892
loss: 0.9922623038291931,grad_norm: 0.7872627170400779, iteration: 447893
loss: 0.970003068447113,grad_norm: 0.8156151323109525, iteration: 447894
loss: 0.9965493083000183,grad_norm: 0.6709977843668702, iteration: 447895
loss: 1.0672168731689453,grad_norm: 0.6740927275493518, iteration: 447896
loss: 1.0132328271865845,grad_norm: 0.7445644492054265, iteration: 447897
loss: 0.9966357946395874,grad_norm: 0.9283306610293689, iteration: 447898
loss: 1.0192400217056274,grad_norm: 0.664656236793329, iteration: 447899
loss: 1.057978630065918,grad_norm: 0.9999990447012548, iteration: 447900
loss: 0.9996803402900696,grad_norm: 0.5973496547993745, iteration: 447901
loss: 0.970065712928772,grad_norm: 0.7911222997118167, iteration: 447902
loss: 1.1161340475082397,grad_norm: 0.8899824063034402, iteration: 447903
loss: 0.9761995673179626,grad_norm: 0.7602354136903408, iteration: 447904
loss: 0.9603918790817261,grad_norm: 0.833147225131703, iteration: 447905
loss: 0.981940507888794,grad_norm: 0.8644861288960324, iteration: 447906
loss: 0.9811566472053528,grad_norm: 0.9999991434811291, iteration: 447907
loss: 1.0648229122161865,grad_norm: 0.9999998318736134, iteration: 447908
loss: 1.028018593788147,grad_norm: 0.9085837283162524, iteration: 447909
loss: 0.9849689602851868,grad_norm: 0.7517835434062282, iteration: 447910
loss: 0.9842832684516907,grad_norm: 0.7719749244672267, iteration: 447911
loss: 0.9977230429649353,grad_norm: 0.6716587723003258, iteration: 447912
loss: 1.013027310371399,grad_norm: 0.7862067467359499, iteration: 447913
loss: 1.0015053749084473,grad_norm: 0.7363974202642511, iteration: 447914
loss: 0.9751617312431335,grad_norm: 0.7528561599779939, iteration: 447915
loss: 1.016400933265686,grad_norm: 0.9152853889395569, iteration: 447916
loss: 0.9706403613090515,grad_norm: 0.7936638214002637, iteration: 447917
loss: 0.9634290933609009,grad_norm: 0.6790115778517448, iteration: 447918
loss: 1.018435001373291,grad_norm: 0.7814471930411395, iteration: 447919
loss: 1.0547293424606323,grad_norm: 0.8361170046843953, iteration: 447920
loss: 1.0337082147598267,grad_norm: 0.9999993961028241, iteration: 447921
loss: 0.9761649370193481,grad_norm: 0.8178666276495808, iteration: 447922
loss: 1.0112621784210205,grad_norm: 0.8696469078974123, iteration: 447923
loss: 0.9523670673370361,grad_norm: 0.7483881843062591, iteration: 447924
loss: 0.9955968260765076,grad_norm: 0.828707143620296, iteration: 447925
loss: 0.9965820908546448,grad_norm: 0.691594264639538, iteration: 447926
loss: 1.0312670469284058,grad_norm: 0.6244378014855616, iteration: 447927
loss: 0.9719489812850952,grad_norm: 0.7930421188294746, iteration: 447928
loss: 0.9635952711105347,grad_norm: 0.6710668709920674, iteration: 447929
loss: 0.9775805473327637,grad_norm: 0.7425436078358685, iteration: 447930
loss: 0.9797254204750061,grad_norm: 0.7087919117553404, iteration: 447931
loss: 1.113050937652588,grad_norm: 0.9999996268955761, iteration: 447932
loss: 1.0384107828140259,grad_norm: 0.9999999644861828, iteration: 447933
loss: 0.9857549667358398,grad_norm: 0.5873238335825526, iteration: 447934
loss: 0.9695892333984375,grad_norm: 0.8198689675934259, iteration: 447935
loss: 0.9981841444969177,grad_norm: 0.679884981656898, iteration: 447936
loss: 1.0112595558166504,grad_norm: 0.6702826993462448, iteration: 447937
loss: 0.9983229637145996,grad_norm: 0.9605432642244163, iteration: 447938
loss: 0.9943075776100159,grad_norm: 0.6736509800044224, iteration: 447939
loss: 1.0004596710205078,grad_norm: 0.8184139130301945, iteration: 447940
loss: 1.0295649766921997,grad_norm: 0.999999718096887, iteration: 447941
loss: 0.9851557016372681,grad_norm: 0.7049591418179577, iteration: 447942
loss: 1.1600568294525146,grad_norm: 0.917960120441705, iteration: 447943
loss: 1.008569359779358,grad_norm: 0.804440061236156, iteration: 447944
loss: 0.989536464214325,grad_norm: 0.8803085235379098, iteration: 447945
loss: 1.0086981058120728,grad_norm: 0.791287316244476, iteration: 447946
loss: 0.9781494140625,grad_norm: 0.7460324186851485, iteration: 447947
loss: 0.9828066825866699,grad_norm: 0.7710028000995873, iteration: 447948
loss: 1.0196009874343872,grad_norm: 0.7148830024285477, iteration: 447949
loss: 0.9976356625556946,grad_norm: 0.6838266211418261, iteration: 447950
loss: 0.9822558760643005,grad_norm: 0.7658584876674804, iteration: 447951
loss: 1.0152649879455566,grad_norm: 0.7082861264108349, iteration: 447952
loss: 1.0565319061279297,grad_norm: 0.8000482806786698, iteration: 447953
loss: 1.0361884832382202,grad_norm: 0.9999998653163756, iteration: 447954
loss: 1.0408834218978882,grad_norm: 0.9999991664822585, iteration: 447955
loss: 0.9942911863327026,grad_norm: 0.7157580907431785, iteration: 447956
loss: 1.0551249980926514,grad_norm: 1.0000000916203715, iteration: 447957
loss: 0.9928017854690552,grad_norm: 0.8182880751346425, iteration: 447958
loss: 1.007599115371704,grad_norm: 0.6622224415797345, iteration: 447959
loss: 1.0224770307540894,grad_norm: 0.8975070606165849, iteration: 447960
loss: 1.0500117540359497,grad_norm: 0.9999996111133008, iteration: 447961
loss: 0.9900504946708679,grad_norm: 0.8581179001957524, iteration: 447962
loss: 0.9962390661239624,grad_norm: 0.7490921002175248, iteration: 447963
loss: 0.9820475578308105,grad_norm: 0.8275974125136851, iteration: 447964
loss: 0.9967221617698669,grad_norm: 0.6800002877824205, iteration: 447965
loss: 1.007942795753479,grad_norm: 0.9196007132961382, iteration: 447966
loss: 0.9849972724914551,grad_norm: 0.7947685442373149, iteration: 447967
loss: 0.9844412207603455,grad_norm: 0.851131335022797, iteration: 447968
loss: 0.9854363203048706,grad_norm: 0.7372093943796935, iteration: 447969
loss: 1.0360394716262817,grad_norm: 0.8078963097550896, iteration: 447970
loss: 0.9934945702552795,grad_norm: 0.7384937055298386, iteration: 447971
loss: 0.9929364323616028,grad_norm: 0.7222852985961149, iteration: 447972
loss: 0.9904627799987793,grad_norm: 0.7603005011573352, iteration: 447973
loss: 1.0005673170089722,grad_norm: 0.805380173174038, iteration: 447974
loss: 1.0198488235473633,grad_norm: 0.8162063080690716, iteration: 447975
loss: 1.0068718194961548,grad_norm: 0.8078543649977892, iteration: 447976
loss: 1.0044617652893066,grad_norm: 0.7854493064977741, iteration: 447977
loss: 1.0250024795532227,grad_norm: 0.5925586191228087, iteration: 447978
loss: 0.9679841995239258,grad_norm: 0.8904943819511626, iteration: 447979
loss: 0.9674250483512878,grad_norm: 0.7198351961543474, iteration: 447980
loss: 1.008170485496521,grad_norm: 0.9999999538064858, iteration: 447981
loss: 1.0186737775802612,grad_norm: 0.7419320897238066, iteration: 447982
loss: 0.9679752588272095,grad_norm: 0.7728234647770097, iteration: 447983
loss: 1.017103910446167,grad_norm: 0.6522289936416926, iteration: 447984
loss: 0.9868879318237305,grad_norm: 0.8481009451683911, iteration: 447985
loss: 1.0366588830947876,grad_norm: 0.9999999576319365, iteration: 447986
loss: 0.9643614888191223,grad_norm: 0.8381023250279719, iteration: 447987
loss: 0.9871845841407776,grad_norm: 0.8312290512050186, iteration: 447988
loss: 0.9958359599113464,grad_norm: 0.7297869692554143, iteration: 447989
loss: 0.9879189133644104,grad_norm: 0.6504320786886256, iteration: 447990
loss: 0.9546191096305847,grad_norm: 0.6977798198866436, iteration: 447991
loss: 1.003785490989685,grad_norm: 0.8412684308812519, iteration: 447992
loss: 0.9977986216545105,grad_norm: 0.6226751036226428, iteration: 447993
loss: 0.9881947636604309,grad_norm: 0.7943864004856437, iteration: 447994
loss: 0.9845516681671143,grad_norm: 0.8040811931037313, iteration: 447995
loss: 1.0245187282562256,grad_norm: 0.9999992701852366, iteration: 447996
loss: 0.9889046549797058,grad_norm: 0.8187968450068374, iteration: 447997
loss: 1.0075032711029053,grad_norm: 0.7782801976095333, iteration: 447998
loss: 0.9837466478347778,grad_norm: 0.7649970954920557, iteration: 447999
loss: 0.9939419627189636,grad_norm: 0.7186723289148118, iteration: 448000
loss: 0.9936642646789551,grad_norm: 0.710079674487974, iteration: 448001
loss: 1.0087226629257202,grad_norm: 0.8867929128109343, iteration: 448002
loss: 0.9660264849662781,grad_norm: 0.7014722220825951, iteration: 448003
loss: 0.9881977438926697,grad_norm: 0.7878080697363018, iteration: 448004
loss: 0.9888792634010315,grad_norm: 0.6394852052525208, iteration: 448005
loss: 0.994306206703186,grad_norm: 0.7999448654989552, iteration: 448006
loss: 1.043070912361145,grad_norm: 0.8591808307488635, iteration: 448007
loss: 0.9938157200813293,grad_norm: 0.8597814783435083, iteration: 448008
loss: 1.0095388889312744,grad_norm: 0.7913767886410142, iteration: 448009
loss: 0.9859887361526489,grad_norm: 0.6431588301179848, iteration: 448010
loss: 1.0046247243881226,grad_norm: 0.7135230065443356, iteration: 448011
loss: 1.0033034086227417,grad_norm: 0.9999992184559927, iteration: 448012
loss: 0.98814457654953,grad_norm: 0.7469144236893184, iteration: 448013
loss: 1.006233811378479,grad_norm: 0.9999999988102558, iteration: 448014
loss: 1.028019666671753,grad_norm: 0.692349791380648, iteration: 448015
loss: 1.0085875988006592,grad_norm: 0.9038490788944189, iteration: 448016
loss: 1.0131938457489014,grad_norm: 0.7015949987613557, iteration: 448017
loss: 1.0447951555252075,grad_norm: 0.8261374876296368, iteration: 448018
loss: 1.014441967010498,grad_norm: 0.8641506950747343, iteration: 448019
loss: 0.9632187485694885,grad_norm: 0.7409584572053024, iteration: 448020
loss: 0.9994776844978333,grad_norm: 0.8256204418132724, iteration: 448021
loss: 0.9967498779296875,grad_norm: 0.8228901543633631, iteration: 448022
loss: 1.0223324298858643,grad_norm: 0.6652405706162883, iteration: 448023
loss: 0.9957798719406128,grad_norm: 0.7613852239276446, iteration: 448024
loss: 1.0179136991500854,grad_norm: 0.6120774068692025, iteration: 448025
loss: 0.9966433644294739,grad_norm: 0.7856888633595991, iteration: 448026
loss: 0.9798056483268738,grad_norm: 0.8588991424592631, iteration: 448027
loss: 0.9658724069595337,grad_norm: 0.8178336832938832, iteration: 448028
loss: 0.9802183508872986,grad_norm: 0.8022693915349507, iteration: 448029
loss: 0.994208812713623,grad_norm: 0.7479141481892152, iteration: 448030
loss: 0.986552357673645,grad_norm: 0.7291307114231331, iteration: 448031
loss: 1.0006434917449951,grad_norm: 0.7354580461568152, iteration: 448032
loss: 0.9770878553390503,grad_norm: 0.6925776801916274, iteration: 448033
loss: 0.9886271953582764,grad_norm: 0.8613821501847824, iteration: 448034
loss: 1.0779489278793335,grad_norm: 0.8477486649961213, iteration: 448035
loss: 1.06930410861969,grad_norm: 0.9999998386565937, iteration: 448036
loss: 1.035699725151062,grad_norm: 0.8403186883577161, iteration: 448037
loss: 1.007575511932373,grad_norm: 0.853200191765749, iteration: 448038
loss: 1.0009528398513794,grad_norm: 0.6630148657907879, iteration: 448039
loss: 1.0048843622207642,grad_norm: 0.7124981080563444, iteration: 448040
loss: 1.0638898611068726,grad_norm: 0.9757562039819889, iteration: 448041
loss: 0.9877652525901794,grad_norm: 0.8579118939781764, iteration: 448042
loss: 0.9661170244216919,grad_norm: 0.8845283450044094, iteration: 448043
loss: 0.9942507147789001,grad_norm: 0.8146202114447022, iteration: 448044
loss: 1.0188719034194946,grad_norm: 0.7365135508557387, iteration: 448045
loss: 0.9880300164222717,grad_norm: 0.6536381992044954, iteration: 448046
loss: 1.041650652885437,grad_norm: 0.8448047730524915, iteration: 448047
loss: 0.9871658682823181,grad_norm: 0.8020126455774923, iteration: 448048
loss: 1.0144764184951782,grad_norm: 0.7535665562160829, iteration: 448049
loss: 0.9888649582862854,grad_norm: 0.9999992503937638, iteration: 448050
loss: 1.0109598636627197,grad_norm: 0.8688010270289357, iteration: 448051
loss: 0.9922516942024231,grad_norm: 0.6888625992369141, iteration: 448052
loss: 0.9894561171531677,grad_norm: 0.6774715059603185, iteration: 448053
loss: 1.008880376815796,grad_norm: 0.7414607106185905, iteration: 448054
loss: 0.9827828407287598,grad_norm: 0.7277024644231445, iteration: 448055
loss: 0.9819118976593018,grad_norm: 0.7270807989798579, iteration: 448056
loss: 0.9895187616348267,grad_norm: 0.6996174522720927, iteration: 448057
loss: 0.974687933921814,grad_norm: 0.7000437259906944, iteration: 448058
loss: 1.020892858505249,grad_norm: 0.9919935444153526, iteration: 448059
loss: 1.0153601169586182,grad_norm: 0.760419908204096, iteration: 448060
loss: 0.9579784870147705,grad_norm: 0.8177150042089671, iteration: 448061
loss: 1.0124702453613281,grad_norm: 0.8456796775483241, iteration: 448062
loss: 1.005165934562683,grad_norm: 0.8320122080603539, iteration: 448063
loss: 0.9915438890457153,grad_norm: 0.7399208905036928, iteration: 448064
loss: 1.029089331626892,grad_norm: 0.9310615256922385, iteration: 448065
loss: 0.9447804093360901,grad_norm: 0.6703919495046317, iteration: 448066
loss: 0.9861435890197754,grad_norm: 0.7769850901252626, iteration: 448067
loss: 1.0246573686599731,grad_norm: 0.7936119875527298, iteration: 448068
loss: 1.0130490064620972,grad_norm: 0.8233283801353711, iteration: 448069
loss: 0.9777663350105286,grad_norm: 0.7153416190109672, iteration: 448070
loss: 1.0001537799835205,grad_norm: 0.6849022584614202, iteration: 448071
loss: 0.9964770078659058,grad_norm: 0.8490634894560809, iteration: 448072
loss: 1.0518524646759033,grad_norm: 0.7771242716998303, iteration: 448073
loss: 0.9843953847885132,grad_norm: 0.926466183239738, iteration: 448074
loss: 0.9714378714561462,grad_norm: 0.8219578968107266, iteration: 448075
loss: 1.0210845470428467,grad_norm: 0.7258852193456508, iteration: 448076
loss: 1.0027830600738525,grad_norm: 0.8067967717473077, iteration: 448077
loss: 1.0184400081634521,grad_norm: 0.8322790322646406, iteration: 448078
loss: 0.9858115315437317,grad_norm: 0.999999720062442, iteration: 448079
loss: 0.9897140860557556,grad_norm: 0.7265278280859284, iteration: 448080
loss: 0.9759673476219177,grad_norm: 0.7908179751093858, iteration: 448081
loss: 0.9961075186729431,grad_norm: 0.7168031618699565, iteration: 448082
loss: 0.987384021282196,grad_norm: 0.9999991544873684, iteration: 448083
loss: 0.9743643999099731,grad_norm: 0.630816241234058, iteration: 448084
loss: 0.9775073528289795,grad_norm: 0.8078603073935388, iteration: 448085
loss: 1.0040929317474365,grad_norm: 0.7140735871090077, iteration: 448086
loss: 1.0337724685668945,grad_norm: 0.7446577380268135, iteration: 448087
loss: 0.9742953181266785,grad_norm: 0.879158956657413, iteration: 448088
loss: 1.0026297569274902,grad_norm: 0.6483535435515362, iteration: 448089
loss: 0.9955385327339172,grad_norm: 0.6817318053239291, iteration: 448090
loss: 1.006759762763977,grad_norm: 0.8081743349800766, iteration: 448091
loss: 1.0082229375839233,grad_norm: 0.9470291407450392, iteration: 448092
loss: 1.1569123268127441,grad_norm: 0.9999996785433977, iteration: 448093
loss: 0.996919572353363,grad_norm: 0.7144145698746154, iteration: 448094
loss: 1.0049591064453125,grad_norm: 0.7088254114863658, iteration: 448095
loss: 0.99346524477005,grad_norm: 0.9534943897453173, iteration: 448096
loss: 0.9905280470848083,grad_norm: 0.9119446752671311, iteration: 448097
loss: 1.0076090097427368,grad_norm: 0.7906743881370599, iteration: 448098
loss: 1.0241365432739258,grad_norm: 0.8298424874388869, iteration: 448099
loss: 1.0001789331436157,grad_norm: 0.9029164899973307, iteration: 448100
loss: 0.9970729351043701,grad_norm: 0.790675749142438, iteration: 448101
loss: 0.9713788628578186,grad_norm: 0.7541357278113573, iteration: 448102
loss: 0.9899093508720398,grad_norm: 0.6733902847220461, iteration: 448103
loss: 0.9907178282737732,grad_norm: 0.7474200403555756, iteration: 448104
loss: 1.0151681900024414,grad_norm: 0.8325688239203574, iteration: 448105
loss: 1.015210747718811,grad_norm: 0.8027944697589788, iteration: 448106
loss: 1.0084069967269897,grad_norm: 0.791686642536321, iteration: 448107
loss: 1.0549650192260742,grad_norm: 0.8357029946619888, iteration: 448108
loss: 1.007530689239502,grad_norm: 0.9999998231383522, iteration: 448109
loss: 1.0084223747253418,grad_norm: 0.8016789444587319, iteration: 448110
loss: 0.9802172780036926,grad_norm: 0.7097405077783151, iteration: 448111
loss: 0.9932305812835693,grad_norm: 0.7419094317908387, iteration: 448112
loss: 1.00542151927948,grad_norm: 0.7082885382757056, iteration: 448113
loss: 1.0045496225357056,grad_norm: 0.9694222069004262, iteration: 448114
loss: 1.0081582069396973,grad_norm: 0.6912133023313793, iteration: 448115
loss: 1.0145984888076782,grad_norm: 0.9999993529627617, iteration: 448116
loss: 1.0158404111862183,grad_norm: 0.8225839075654877, iteration: 448117
loss: 1.0297690629959106,grad_norm: 0.8050462470480076, iteration: 448118
loss: 0.9810017943382263,grad_norm: 0.820114630846007, iteration: 448119
loss: 0.9676064252853394,grad_norm: 0.839413117219991, iteration: 448120
loss: 1.0193147659301758,grad_norm: 0.6609586156211547, iteration: 448121
loss: 0.9461385607719421,grad_norm: 0.6985915728725484, iteration: 448122
loss: 0.9646360874176025,grad_norm: 0.7510600896658655, iteration: 448123
loss: 1.000938892364502,grad_norm: 0.7811570547261173, iteration: 448124
loss: 1.0040549039840698,grad_norm: 0.6753045685592645, iteration: 448125
loss: 0.9712055921554565,grad_norm: 0.9999990522610787, iteration: 448126
loss: 1.0103294849395752,grad_norm: 0.8663708417766655, iteration: 448127
loss: 0.9749084711074829,grad_norm: 0.7582569973790543, iteration: 448128
loss: 1.0105242729187012,grad_norm: 0.7818839786512728, iteration: 448129
loss: 1.0116751194000244,grad_norm: 0.7707348229094815, iteration: 448130
loss: 1.0089311599731445,grad_norm: 0.7181285709772907, iteration: 448131
loss: 1.04776930809021,grad_norm: 0.693854950799415, iteration: 448132
loss: 1.007420539855957,grad_norm: 0.9999994550211803, iteration: 448133
loss: 0.9772312045097351,grad_norm: 0.87597971290132, iteration: 448134
loss: 1.0292350053787231,grad_norm: 0.9904925791960898, iteration: 448135
loss: 1.0082073211669922,grad_norm: 0.6894020106540273, iteration: 448136
loss: 1.0010647773742676,grad_norm: 0.6756227710126791, iteration: 448137
loss: 1.0049686431884766,grad_norm: 0.8807697537721489, iteration: 448138
loss: 0.9990039467811584,grad_norm: 0.7359147714919931, iteration: 448139
loss: 1.012858510017395,grad_norm: 0.9307894574154559, iteration: 448140
loss: 1.0332576036453247,grad_norm: 0.7145172398508981, iteration: 448141
loss: 1.004463791847229,grad_norm: 0.9191563932987064, iteration: 448142
loss: 0.9625979065895081,grad_norm: 0.6891859593162003, iteration: 448143
loss: 0.9898794889450073,grad_norm: 0.785402881893325, iteration: 448144
loss: 1.0082035064697266,grad_norm: 0.7254276144853387, iteration: 448145
loss: 0.9741656184196472,grad_norm: 0.726148949281889, iteration: 448146
loss: 0.9809558391571045,grad_norm: 0.6149796632343747, iteration: 448147
loss: 0.974724292755127,grad_norm: 0.5737972700766842, iteration: 448148
loss: 0.9904075860977173,grad_norm: 0.7363223030469558, iteration: 448149
loss: 1.0043234825134277,grad_norm: 0.9999992392159873, iteration: 448150
loss: 1.0099804401397705,grad_norm: 0.703431306660055, iteration: 448151
loss: 0.9764321446418762,grad_norm: 0.6799276223879973, iteration: 448152
loss: 0.990962028503418,grad_norm: 0.8157268279326493, iteration: 448153
loss: 0.9936434030532837,grad_norm: 0.8749122279362231, iteration: 448154
loss: 1.0077401399612427,grad_norm: 0.744929476100043, iteration: 448155
loss: 0.9742589592933655,grad_norm: 0.7516901209361468, iteration: 448156
loss: 1.024564266204834,grad_norm: 0.7292200609610091, iteration: 448157
loss: 1.0218710899353027,grad_norm: 0.9212230901240881, iteration: 448158
loss: 1.0127421617507935,grad_norm: 0.6992487442846508, iteration: 448159
loss: 0.9972429275512695,grad_norm: 0.7609991759705519, iteration: 448160
loss: 0.9978773593902588,grad_norm: 0.8048820567839977, iteration: 448161
loss: 0.9641535878181458,grad_norm: 0.7344814843895378, iteration: 448162
loss: 0.9660412073135376,grad_norm: 0.7938991544065066, iteration: 448163
loss: 0.9772523045539856,grad_norm: 0.933841002893331, iteration: 448164
loss: 1.0159542560577393,grad_norm: 0.6925102312863234, iteration: 448165
loss: 0.9893407821655273,grad_norm: 0.8037559108525273, iteration: 448166
loss: 1.0119774341583252,grad_norm: 0.8387118203163639, iteration: 448167
loss: 0.9903480410575867,grad_norm: 0.9236204879736943, iteration: 448168
loss: 0.9907222986221313,grad_norm: 0.6490428394633423, iteration: 448169
loss: 0.9549029469490051,grad_norm: 0.9426280656858517, iteration: 448170
loss: 0.9986699819564819,grad_norm: 0.7773225294363935, iteration: 448171
loss: 1.0071430206298828,grad_norm: 0.9999990451709073, iteration: 448172
loss: 0.9817651510238647,grad_norm: 0.9302734249886188, iteration: 448173
loss: 1.003958821296692,grad_norm: 0.9940756401893874, iteration: 448174
loss: 0.9816975593566895,grad_norm: 0.7786399082540454, iteration: 448175
loss: 1.0263341665267944,grad_norm: 0.6679884430962301, iteration: 448176
loss: 1.0019394159317017,grad_norm: 0.8514484775585272, iteration: 448177
loss: 0.9741501808166504,grad_norm: 0.6804263666996209, iteration: 448178
loss: 0.9919232726097107,grad_norm: 0.7251012890227809, iteration: 448179
loss: 1.024009108543396,grad_norm: 0.8665761769268461, iteration: 448180
loss: 0.9893072247505188,grad_norm: 0.7546324410985124, iteration: 448181
loss: 1.013641357421875,grad_norm: 0.8365314843587189, iteration: 448182
loss: 0.9791866540908813,grad_norm: 0.6755946680056688, iteration: 448183
loss: 0.9923941493034363,grad_norm: 0.7876818050245622, iteration: 448184
loss: 1.0591695308685303,grad_norm: 0.8848296190333441, iteration: 448185
loss: 0.9827295541763306,grad_norm: 0.8681388759420032, iteration: 448186
loss: 1.0177998542785645,grad_norm: 0.7713512492579634, iteration: 448187
loss: 1.0241618156433105,grad_norm: 0.9021195581424959, iteration: 448188
loss: 0.9885997176170349,grad_norm: 0.6997163048071267, iteration: 448189
loss: 0.9872477650642395,grad_norm: 0.7652195059241977, iteration: 448190
loss: 0.9751468896865845,grad_norm: 0.7940507467681976, iteration: 448191
loss: 1.0254356861114502,grad_norm: 0.7215858411060342, iteration: 448192
loss: 1.0095021724700928,grad_norm: 0.828527810211741, iteration: 448193
loss: 0.9864019751548767,grad_norm: 0.7919978526610008, iteration: 448194
loss: 1.0145411491394043,grad_norm: 0.7770278547498437, iteration: 448195
loss: 0.9830949902534485,grad_norm: 0.7978901184596113, iteration: 448196
loss: 1.0002046823501587,grad_norm: 0.9097779314936244, iteration: 448197
loss: 0.9757307171821594,grad_norm: 0.7555753418276879, iteration: 448198
loss: 1.0204075574874878,grad_norm: 0.7416191838504754, iteration: 448199
loss: 1.009568691253662,grad_norm: 0.9999990954577458, iteration: 448200
loss: 1.111647129058838,grad_norm: 0.9289701689991756, iteration: 448201
loss: 0.9984076619148254,grad_norm: 0.8686764221968551, iteration: 448202
loss: 0.9888617992401123,grad_norm: 0.7039465851721648, iteration: 448203
loss: 0.9476203322410583,grad_norm: 0.8508529847888925, iteration: 448204
loss: 0.9844803214073181,grad_norm: 0.7997935825352775, iteration: 448205
loss: 1.0103644132614136,grad_norm: 0.8864962390739412, iteration: 448206
loss: 0.9885765314102173,grad_norm: 0.8916059734835419, iteration: 448207
loss: 1.006351351737976,grad_norm: 0.9999990445424364, iteration: 448208
loss: 1.0071958303451538,grad_norm: 0.8903526865526074, iteration: 448209
loss: 0.975650429725647,grad_norm: 0.7620954062028313, iteration: 448210
loss: 0.9895269274711609,grad_norm: 0.9082168425777829, iteration: 448211
loss: 1.0399953126907349,grad_norm: 0.9312998850921608, iteration: 448212
loss: 0.9985641241073608,grad_norm: 0.752759678944331, iteration: 448213
loss: 0.9873099327087402,grad_norm: 0.769498776884937, iteration: 448214
loss: 0.9784029126167297,grad_norm: 0.6410767795153375, iteration: 448215
loss: 1.0057532787322998,grad_norm: 0.8410149946422502, iteration: 448216
loss: 1.0154203176498413,grad_norm: 0.7790668240968783, iteration: 448217
loss: 1.0035306215286255,grad_norm: 0.691028909840227, iteration: 448218
loss: 0.9839990139007568,grad_norm: 0.7370205013205333, iteration: 448219
loss: 0.9866859316825867,grad_norm: 0.9002798672899345, iteration: 448220
loss: 0.9908491373062134,grad_norm: 0.9999996531385248, iteration: 448221
loss: 0.984450101852417,grad_norm: 0.8677957328889102, iteration: 448222
loss: 0.9657233357429504,grad_norm: 0.8193447451358247, iteration: 448223
loss: 1.0557126998901367,grad_norm: 0.9999998376764677, iteration: 448224
loss: 1.0060679912567139,grad_norm: 0.9973931317393328, iteration: 448225
loss: 0.994204044342041,grad_norm: 0.8057669947039392, iteration: 448226
loss: 1.0295891761779785,grad_norm: 0.9362446733905528, iteration: 448227
loss: 0.9520928859710693,grad_norm: 0.6950053063662893, iteration: 448228
loss: 0.9908137917518616,grad_norm: 0.9888133670616146, iteration: 448229
loss: 1.0143945217132568,grad_norm: 0.884444953437225, iteration: 448230
loss: 0.959437370300293,grad_norm: 0.7817450889221389, iteration: 448231
loss: 1.017799735069275,grad_norm: 0.7001247122178291, iteration: 448232
loss: 0.9898545742034912,grad_norm: 0.9999989949335324, iteration: 448233
loss: 1.0215692520141602,grad_norm: 0.8056266989043597, iteration: 448234
loss: 0.9697662591934204,grad_norm: 0.7342631465289409, iteration: 448235
loss: 0.962780237197876,grad_norm: 0.8453683605790377, iteration: 448236
loss: 0.9897326827049255,grad_norm: 0.7403643936038408, iteration: 448237
loss: 0.9588862657546997,grad_norm: 0.8654053517869361, iteration: 448238
loss: 0.974949300289154,grad_norm: 0.7637460733062466, iteration: 448239
loss: 1.0088038444519043,grad_norm: 0.7290635202096659, iteration: 448240
loss: 0.999427318572998,grad_norm: 0.6682566129724669, iteration: 448241
loss: 1.0071675777435303,grad_norm: 0.7043123279620572, iteration: 448242
loss: 0.9793416261672974,grad_norm: 0.9999993441228692, iteration: 448243
loss: 1.0169332027435303,grad_norm: 0.8301645635850664, iteration: 448244
loss: 1.0130242109298706,grad_norm: 0.7237010831026259, iteration: 448245
loss: 0.9932726621627808,grad_norm: 0.9201852487470068, iteration: 448246
loss: 1.0207329988479614,grad_norm: 0.6382974721003212, iteration: 448247
loss: 1.0293946266174316,grad_norm: 0.6396911379000625, iteration: 448248
loss: 1.0028409957885742,grad_norm: 0.6315041036440737, iteration: 448249
loss: 0.953923225402832,grad_norm: 0.8031681463094066, iteration: 448250
loss: 0.9966064691543579,grad_norm: 0.6823376112997438, iteration: 448251
loss: 1.0461931228637695,grad_norm: 1.0000000288573008, iteration: 448252
loss: 0.999923050403595,grad_norm: 0.8259472396485991, iteration: 448253
loss: 1.0227221250534058,grad_norm: 0.6446011037933154, iteration: 448254
loss: 1.008961796760559,grad_norm: 0.8269251487324113, iteration: 448255
loss: 0.9971501231193542,grad_norm: 0.9962924713242248, iteration: 448256
loss: 0.9772344827651978,grad_norm: 0.7342075827498208, iteration: 448257
loss: 0.9783960580825806,grad_norm: 0.9022597347562386, iteration: 448258
loss: 1.0035194158554077,grad_norm: 0.6965966897710054, iteration: 448259
loss: 0.9882402420043945,grad_norm: 0.7101262655033758, iteration: 448260
loss: 1.0149494409561157,grad_norm: 0.7733371306077718, iteration: 448261
loss: 0.9777265191078186,grad_norm: 0.8209149119743712, iteration: 448262
loss: 0.9664087891578674,grad_norm: 0.7716966559244781, iteration: 448263
loss: 0.958989679813385,grad_norm: 0.8673142274451084, iteration: 448264
loss: 0.9993815422058105,grad_norm: 0.9339721440523155, iteration: 448265
loss: 1.0024511814117432,grad_norm: 0.623732049134722, iteration: 448266
loss: 0.99873286485672,grad_norm: 0.7708497569039445, iteration: 448267
loss: 0.9889044165611267,grad_norm: 0.7517831680297399, iteration: 448268
loss: 1.0055034160614014,grad_norm: 0.8677888663313206, iteration: 448269
loss: 0.9985119700431824,grad_norm: 0.7009471481963647, iteration: 448270
loss: 0.9899433851242065,grad_norm: 0.7047567043128966, iteration: 448271
loss: 1.0238298177719116,grad_norm: 0.794072644467727, iteration: 448272
loss: 0.9627543091773987,grad_norm: 0.8468589766599219, iteration: 448273
loss: 1.0016639232635498,grad_norm: 0.9783432807494599, iteration: 448274
loss: 0.981749415397644,grad_norm: 0.8488221937713936, iteration: 448275
loss: 1.051659345626831,grad_norm: 0.9999994481382032, iteration: 448276
loss: 0.971947193145752,grad_norm: 0.8034727248539423, iteration: 448277
loss: 0.9963513016700745,grad_norm: 0.8901679970987338, iteration: 448278
loss: 0.98576819896698,grad_norm: 0.66699105625076, iteration: 448279
loss: 0.9533109068870544,grad_norm: 0.8709184938877826, iteration: 448280
loss: 1.0148924589157104,grad_norm: 0.7104750733156006, iteration: 448281
loss: 1.0397945642471313,grad_norm: 0.9020462811918154, iteration: 448282
loss: 1.0030053853988647,grad_norm: 0.7521456427548684, iteration: 448283
loss: 0.9662638306617737,grad_norm: 0.6811277978350302, iteration: 448284
loss: 0.9981215000152588,grad_norm: 0.7148855798479827, iteration: 448285
loss: 1.0127520561218262,grad_norm: 0.9516983334841338, iteration: 448286
loss: 1.0008643865585327,grad_norm: 0.7188077781889014, iteration: 448287
loss: 0.9678862690925598,grad_norm: 0.805485059292573, iteration: 448288
loss: 0.9621841907501221,grad_norm: 0.7868134721120202, iteration: 448289
loss: 1.0135823488235474,grad_norm: 0.7867862923489825, iteration: 448290
loss: 0.9957587122917175,grad_norm: 0.8311980332043727, iteration: 448291
loss: 1.0276923179626465,grad_norm: 0.8890888650710241, iteration: 448292
loss: 0.9766272902488708,grad_norm: 0.7775258680937472, iteration: 448293
loss: 1.0005991458892822,grad_norm: 0.7604699560992781, iteration: 448294
loss: 0.9612338542938232,grad_norm: 0.8417055434265068, iteration: 448295
loss: 1.0291565656661987,grad_norm: 0.8247915909582952, iteration: 448296
loss: 1.010601282119751,grad_norm: 0.6443800816808462, iteration: 448297
loss: 0.9877055287361145,grad_norm: 0.6783541559441649, iteration: 448298
loss: 1.0001616477966309,grad_norm: 0.9999998012295109, iteration: 448299
loss: 0.9763246774673462,grad_norm: 0.6892875235542378, iteration: 448300
loss: 0.9643866419792175,grad_norm: 0.8284228215474739, iteration: 448301
loss: 0.9927462339401245,grad_norm: 0.7338602516019415, iteration: 448302
loss: 0.9824386239051819,grad_norm: 0.9049793659843642, iteration: 448303
loss: 1.0240914821624756,grad_norm: 0.9999997796367437, iteration: 448304
loss: 0.9953863024711609,grad_norm: 0.8535590235514429, iteration: 448305
loss: 0.9982839226722717,grad_norm: 0.7105473574451581, iteration: 448306
loss: 0.9617370367050171,grad_norm: 0.9999993924243201, iteration: 448307
loss: 1.0090175867080688,grad_norm: 0.8722326184132447, iteration: 448308
loss: 0.9842255711555481,grad_norm: 0.7560158487953691, iteration: 448309
loss: 1.0002325773239136,grad_norm: 0.8400920793298302, iteration: 448310
loss: 0.9625570178031921,grad_norm: 0.9999996694158311, iteration: 448311
loss: 1.0414767265319824,grad_norm: 0.9671669546890957, iteration: 448312
loss: 0.9832350611686707,grad_norm: 0.9999989680818502, iteration: 448313
loss: 0.99720698595047,grad_norm: 0.8616000018321002, iteration: 448314
loss: 1.0160512924194336,grad_norm: 0.9999994601772235, iteration: 448315
loss: 0.98569655418396,grad_norm: 0.6926319789463151, iteration: 448316
loss: 1.0118846893310547,grad_norm: 0.9999993196712251, iteration: 448317
loss: 0.9961370229721069,grad_norm: 0.661712610700539, iteration: 448318
loss: 1.0198695659637451,grad_norm: 0.8291296881536995, iteration: 448319
loss: 0.9945665597915649,grad_norm: 0.7365624395488346, iteration: 448320
loss: 1.0250308513641357,grad_norm: 0.7929928148642099, iteration: 448321
loss: 1.0412139892578125,grad_norm: 0.9999989838052706, iteration: 448322
loss: 0.9811934232711792,grad_norm: 0.7726222525602066, iteration: 448323
loss: 0.9762830138206482,grad_norm: 0.8787196054979608, iteration: 448324
loss: 1.0113911628723145,grad_norm: 0.8138245263644653, iteration: 448325
loss: 1.0003228187561035,grad_norm: 0.7817422857613954, iteration: 448326
loss: 1.025092363357544,grad_norm: 0.7675535153368488, iteration: 448327
loss: 1.0067734718322754,grad_norm: 0.855463533365542, iteration: 448328
loss: 1.0209064483642578,grad_norm: 0.7589609335356314, iteration: 448329
loss: 0.9905515313148499,grad_norm: 0.8253160741650422, iteration: 448330
loss: 1.0266907215118408,grad_norm: 0.8134950153522339, iteration: 448331
loss: 0.9792211651802063,grad_norm: 0.864492976437758, iteration: 448332
loss: 0.9926411509513855,grad_norm: 0.7416755592865072, iteration: 448333
loss: 1.1099430322647095,grad_norm: 0.9999991148673965, iteration: 448334
loss: 0.9909395575523376,grad_norm: 0.6897415486686059, iteration: 448335
loss: 1.030408501625061,grad_norm: 0.6486099964170758, iteration: 448336
loss: 1.032524585723877,grad_norm: 0.6858365141176311, iteration: 448337
loss: 1.0549134016036987,grad_norm: 0.9999998834418344, iteration: 448338
loss: 1.034449815750122,grad_norm: 0.7381011709292822, iteration: 448339
loss: 0.9895676970481873,grad_norm: 0.763454726802855, iteration: 448340
loss: 1.0430283546447754,grad_norm: 0.6412829819944564, iteration: 448341
loss: 1.0179662704467773,grad_norm: 0.9330004421301338, iteration: 448342
loss: 1.107163429260254,grad_norm: 0.9999993498237648, iteration: 448343
loss: 0.9796473383903503,grad_norm: 0.7686114841665399, iteration: 448344
loss: 0.9708776473999023,grad_norm: 0.6476798984433509, iteration: 448345
loss: 1.0034593343734741,grad_norm: 0.7992029718715072, iteration: 448346
loss: 1.0371192693710327,grad_norm: 0.9999992991277961, iteration: 448347
loss: 0.9958455562591553,grad_norm: 0.8007587360233125, iteration: 448348
loss: 0.9825418591499329,grad_norm: 0.7764262104002158, iteration: 448349
loss: 0.9611716866493225,grad_norm: 0.8793984697140909, iteration: 448350
loss: 0.9983623027801514,grad_norm: 0.629420562696777, iteration: 448351
loss: 0.9795765280723572,grad_norm: 0.8465505080008798, iteration: 448352
loss: 0.9903005361557007,grad_norm: 0.8393238567389756, iteration: 448353
loss: 1.0150395631790161,grad_norm: 0.7310697464547578, iteration: 448354
loss: 1.011413812637329,grad_norm: 0.7295067912772754, iteration: 448355
loss: 1.0043326616287231,grad_norm: 0.7202051616813006, iteration: 448356
loss: 1.0097064971923828,grad_norm: 0.6735059583430221, iteration: 448357
loss: 1.0395216941833496,grad_norm: 0.8369427378997576, iteration: 448358
loss: 1.008461356163025,grad_norm: 0.6703263090932934, iteration: 448359
loss: 1.0065969228744507,grad_norm: 0.7636944912360943, iteration: 448360
loss: 0.9739841222763062,grad_norm: 0.856147226614183, iteration: 448361
loss: 1.0193740129470825,grad_norm: 0.9999991126707523, iteration: 448362
loss: 1.0588322877883911,grad_norm: 0.8033058767973044, iteration: 448363
loss: 0.9960314631462097,grad_norm: 0.7810679711131093, iteration: 448364
loss: 1.011192798614502,grad_norm: 0.7487345501255759, iteration: 448365
loss: 1.0245411396026611,grad_norm: 0.7094965830447878, iteration: 448366
loss: 0.9948077201843262,grad_norm: 0.7469901109623478, iteration: 448367
loss: 1.0403989553451538,grad_norm: 0.8871956727038147, iteration: 448368
loss: 1.0050411224365234,grad_norm: 0.7961559877548321, iteration: 448369
loss: 0.9903756976127625,grad_norm: 0.7401633048295545, iteration: 448370
loss: 1.093342661857605,grad_norm: 0.9999998423905605, iteration: 448371
loss: 1.0454232692718506,grad_norm: 0.6491957841679514, iteration: 448372
loss: 1.000972867012024,grad_norm: 0.6478212833889346, iteration: 448373
loss: 1.0144559144973755,grad_norm: 0.867211395193079, iteration: 448374
loss: 1.01937997341156,grad_norm: 0.7301332664121363, iteration: 448375
loss: 1.002864956855774,grad_norm: 0.8494131665605298, iteration: 448376
loss: 1.077754259109497,grad_norm: 0.9999998434042904, iteration: 448377
loss: 0.9850638508796692,grad_norm: 0.6658085519332648, iteration: 448378
loss: 1.0689963102340698,grad_norm: 0.99999953702169, iteration: 448379
loss: 1.015499472618103,grad_norm: 0.8232722311180085, iteration: 448380
loss: 0.9610364437103271,grad_norm: 0.638124267349033, iteration: 448381
loss: 0.9364810585975647,grad_norm: 0.9033331427851191, iteration: 448382
loss: 1.0575988292694092,grad_norm: 0.8687812351321141, iteration: 448383
loss: 0.9837750196456909,grad_norm: 0.8779181779375674, iteration: 448384
loss: 0.9933012127876282,grad_norm: 0.6965103068793154, iteration: 448385
loss: 0.9883974194526672,grad_norm: 0.8265974377493865, iteration: 448386
loss: 1.0367316007614136,grad_norm: 0.9999994641361272, iteration: 448387
loss: 0.9816170334815979,grad_norm: 0.862997344157617, iteration: 448388
loss: 0.9546786546707153,grad_norm: 0.7524753620801171, iteration: 448389
loss: 0.9865662455558777,grad_norm: 0.7798647485662152, iteration: 448390
loss: 0.9915545582771301,grad_norm: 0.6752717979826685, iteration: 448391
loss: 0.9736365675926208,grad_norm: 0.7028211992960641, iteration: 448392
loss: 0.9796411991119385,grad_norm: 0.8552195844217648, iteration: 448393
loss: 1.00839102268219,grad_norm: 0.9468546975583695, iteration: 448394
loss: 0.9965495467185974,grad_norm: 0.7634738743861068, iteration: 448395
loss: 1.0515787601470947,grad_norm: 0.7097585974490298, iteration: 448396
loss: 1.0237469673156738,grad_norm: 0.7316948476212572, iteration: 448397
loss: 1.0026955604553223,grad_norm: 0.8044025012550685, iteration: 448398
loss: 0.9798623323440552,grad_norm: 0.7054079931507603, iteration: 448399
loss: 0.9927522540092468,grad_norm: 0.7266012392521937, iteration: 448400
loss: 1.024380087852478,grad_norm: 0.6782384561889356, iteration: 448401
loss: 1.1581974029541016,grad_norm: 0.9999994107835727, iteration: 448402
loss: 1.0170681476593018,grad_norm: 0.8412823757418281, iteration: 448403
loss: 0.9907771944999695,grad_norm: 0.8281846124799834, iteration: 448404
loss: 0.9978578686714172,grad_norm: 0.9691844416010218, iteration: 448405
loss: 1.0298209190368652,grad_norm: 0.8016492240204793, iteration: 448406
loss: 1.015344500541687,grad_norm: 0.6525831832604068, iteration: 448407
loss: 1.0135486125946045,grad_norm: 0.6533137890575487, iteration: 448408
loss: 0.9584320783615112,grad_norm: 0.7787658648752998, iteration: 448409
loss: 1.0634019374847412,grad_norm: 0.9999994540197931, iteration: 448410
loss: 0.9840351343154907,grad_norm: 0.6772639035213591, iteration: 448411
loss: 0.9818081259727478,grad_norm: 0.758973066613958, iteration: 448412
loss: 1.0847668647766113,grad_norm: 0.8587266730793469, iteration: 448413
loss: 0.9884248971939087,grad_norm: 0.6530369859006842, iteration: 448414
loss: 0.9786078929901123,grad_norm: 0.8436406129480626, iteration: 448415
loss: 1.0057600736618042,grad_norm: 0.7954225196381353, iteration: 448416
loss: 1.0071684122085571,grad_norm: 0.826909346856248, iteration: 448417
loss: 1.0207239389419556,grad_norm: 0.778017771197404, iteration: 448418
loss: 0.9810845851898193,grad_norm: 0.6753776436753327, iteration: 448419
loss: 1.0031154155731201,grad_norm: 0.9619761926795567, iteration: 448420
loss: 1.0312867164611816,grad_norm: 0.8139793291438301, iteration: 448421
loss: 0.9893637299537659,grad_norm: 0.6775534805039104, iteration: 448422
loss: 0.9726157188415527,grad_norm: 0.8260640688477282, iteration: 448423
loss: 0.9892098307609558,grad_norm: 0.6939566821813994, iteration: 448424
loss: 1.0086266994476318,grad_norm: 0.8136077878300828, iteration: 448425
loss: 1.0115407705307007,grad_norm: 0.9133970162347085, iteration: 448426
loss: 0.9795530438423157,grad_norm: 0.7749140938842027, iteration: 448427
loss: 0.9708932042121887,grad_norm: 0.7237200442937316, iteration: 448428
loss: 1.0230058431625366,grad_norm: 0.9999998739633981, iteration: 448429
loss: 0.9694731831550598,grad_norm: 0.7095178874398625, iteration: 448430
loss: 1.0304701328277588,grad_norm: 0.9873532463462606, iteration: 448431
loss: 0.9748976826667786,grad_norm: 0.7779388906873134, iteration: 448432
loss: 1.1668885946273804,grad_norm: 0.9999992303540853, iteration: 448433
loss: 0.9851517081260681,grad_norm: 0.7183136188773046, iteration: 448434
loss: 0.9810970425605774,grad_norm: 0.78044278662551, iteration: 448435
loss: 0.9821839928627014,grad_norm: 0.735138833311394, iteration: 448436
loss: 1.0162943601608276,grad_norm: 0.8208718071455798, iteration: 448437
loss: 0.9704962372779846,grad_norm: 0.6899488698596233, iteration: 448438
loss: 1.0131359100341797,grad_norm: 0.7666608027290353, iteration: 448439
loss: 1.060287356376648,grad_norm: 0.9999997622898499, iteration: 448440
loss: 0.9960033893585205,grad_norm: 0.7390989714191636, iteration: 448441
loss: 1.02930748462677,grad_norm: 0.6779851657838166, iteration: 448442
loss: 0.9963409304618835,grad_norm: 0.873223894090795, iteration: 448443
loss: 0.9792983531951904,grad_norm: 0.8711051968780312, iteration: 448444
loss: 0.9837100505828857,grad_norm: 0.74460466381327, iteration: 448445
loss: 1.0186549425125122,grad_norm: 0.9880648930857746, iteration: 448446
loss: 1.0021377801895142,grad_norm: 0.7669533366759366, iteration: 448447
loss: 1.0731405019760132,grad_norm: 0.9999998103695401, iteration: 448448
loss: 0.9796935319900513,grad_norm: 0.7874542742178827, iteration: 448449
loss: 1.006725788116455,grad_norm: 0.8676281273788427, iteration: 448450
loss: 0.9495111703872681,grad_norm: 0.8413295058224082, iteration: 448451
loss: 1.0114822387695312,grad_norm: 0.685255568234025, iteration: 448452
loss: 0.9565914273262024,grad_norm: 0.7616393488757089, iteration: 448453
loss: 0.9666297435760498,grad_norm: 0.7141496747741992, iteration: 448454
loss: 1.0071234703063965,grad_norm: 0.7493443820003169, iteration: 448455
loss: 1.002774715423584,grad_norm: 0.8566502870852711, iteration: 448456
loss: 0.9601064920425415,grad_norm: 0.7095899854237981, iteration: 448457
loss: 1.0231858491897583,grad_norm: 0.9999990005957649, iteration: 448458
loss: 0.9794453382492065,grad_norm: 0.7232670523922506, iteration: 448459
loss: 0.948117196559906,grad_norm: 0.7179884519733535, iteration: 448460
loss: 1.1227643489837646,grad_norm: 0.9999996612344514, iteration: 448461
loss: 1.0059916973114014,grad_norm: 0.7588243937790445, iteration: 448462
loss: 1.0140639543533325,grad_norm: 0.999999280874784, iteration: 448463
loss: 0.9988700747489929,grad_norm: 0.6597653485784917, iteration: 448464
loss: 0.9854960441589355,grad_norm: 0.9999989026348677, iteration: 448465
loss: 1.023673415184021,grad_norm: 0.8421987659402396, iteration: 448466
loss: 1.0062260627746582,grad_norm: 0.8214199659175988, iteration: 448467
loss: 0.9938019514083862,grad_norm: 0.767483553814388, iteration: 448468
loss: 1.0037070512771606,grad_norm: 0.7770574478640688, iteration: 448469
loss: 1.0187040567398071,grad_norm: 0.6657248497928941, iteration: 448470
loss: 1.0253323316574097,grad_norm: 0.7776464465859215, iteration: 448471
loss: 0.9802396893501282,grad_norm: 0.846147002968165, iteration: 448472
loss: 0.9746184349060059,grad_norm: 0.7258849250897185, iteration: 448473
loss: 1.049168586730957,grad_norm: 0.9999995515631551, iteration: 448474
loss: 0.9795635342597961,grad_norm: 0.9871024740286708, iteration: 448475
loss: 1.004747748374939,grad_norm: 0.7018366795725283, iteration: 448476
loss: 0.9954885840415955,grad_norm: 0.8082957117963742, iteration: 448477
loss: 0.9995164275169373,grad_norm: 0.7536533605289286, iteration: 448478
loss: 1.002026081085205,grad_norm: 0.9416311059890455, iteration: 448479
loss: 1.0746625661849976,grad_norm: 0.9999996244025185, iteration: 448480
loss: 0.9769488573074341,grad_norm: 0.8029048286419361, iteration: 448481
loss: 0.9913663864135742,grad_norm: 0.6681498794849742, iteration: 448482
loss: 1.0134742259979248,grad_norm: 0.7840186029670693, iteration: 448483
loss: 0.9965081810951233,grad_norm: 0.7317315407797472, iteration: 448484
loss: 1.0232517719268799,grad_norm: 0.7275651977079352, iteration: 448485
loss: 1.0283235311508179,grad_norm: 0.9100307528210914, iteration: 448486
loss: 0.9765727519989014,grad_norm: 0.9956222610537838, iteration: 448487
loss: 1.0287038087844849,grad_norm: 0.7384825717489759, iteration: 448488
loss: 0.9952848553657532,grad_norm: 0.8490619633265123, iteration: 448489
loss: 0.969631552696228,grad_norm: 0.8288022084004285, iteration: 448490
loss: 1.0457196235656738,grad_norm: 0.9999990989527907, iteration: 448491
loss: 1.0111733675003052,grad_norm: 0.8457088043808388, iteration: 448492
loss: 0.9873809814453125,grad_norm: 0.9999994532012543, iteration: 448493
loss: 1.0199707746505737,grad_norm: 0.7796164353847229, iteration: 448494
loss: 0.9901747703552246,grad_norm: 0.8250309201907557, iteration: 448495
loss: 1.013694167137146,grad_norm: 0.8286837149998774, iteration: 448496
loss: 1.0153484344482422,grad_norm: 0.844834696319748, iteration: 448497
loss: 1.0474498271942139,grad_norm: 0.7864799164228324, iteration: 448498
loss: 1.0162205696105957,grad_norm: 0.8297600289317494, iteration: 448499
loss: 1.0119572877883911,grad_norm: 0.7487281817439407, iteration: 448500
loss: 1.002013921737671,grad_norm: 0.8260278100083267, iteration: 448501
loss: 1.0157129764556885,grad_norm: 0.7600578135260426, iteration: 448502
loss: 1.0048881769180298,grad_norm: 0.8309400796996262, iteration: 448503
loss: 0.9904541969299316,grad_norm: 0.9053220883551801, iteration: 448504
loss: 1.0101518630981445,grad_norm: 0.8382487341404508, iteration: 448505
loss: 1.010040521621704,grad_norm: 0.681624335013269, iteration: 448506
loss: 0.9674234390258789,grad_norm: 0.7410504300707214, iteration: 448507
loss: 0.9945687651634216,grad_norm: 0.7947565695993914, iteration: 448508
loss: 1.0153353214263916,grad_norm: 0.7633181137051248, iteration: 448509
loss: 0.9948084354400635,grad_norm: 0.599655035378764, iteration: 448510
loss: 1.027753233909607,grad_norm: 0.7004474864387557, iteration: 448511
loss: 0.989599883556366,grad_norm: 0.7111575282280216, iteration: 448512
loss: 1.0121092796325684,grad_norm: 0.9287185789867832, iteration: 448513
loss: 0.9655224084854126,grad_norm: 0.7027557541646753, iteration: 448514
loss: 0.9878032207489014,grad_norm: 0.8430321619320574, iteration: 448515
loss: 1.0117982625961304,grad_norm: 0.8886149809564156, iteration: 448516
loss: 0.961797297000885,grad_norm: 0.898859767251996, iteration: 448517
loss: 0.988014280796051,grad_norm: 0.9999999863771002, iteration: 448518
loss: 0.9839156270027161,grad_norm: 0.8364685698482935, iteration: 448519
loss: 0.9650610089302063,grad_norm: 0.7884463895439057, iteration: 448520
loss: 0.9662275910377502,grad_norm: 0.7745859453284941, iteration: 448521
loss: 0.9935489296913147,grad_norm: 0.8718869386231977, iteration: 448522
loss: 0.991314709186554,grad_norm: 0.7544461842508045, iteration: 448523
loss: 1.009156584739685,grad_norm: 0.5961514833389064, iteration: 448524
loss: 1.0214742422103882,grad_norm: 0.9710586581997862, iteration: 448525
loss: 0.9829936027526855,grad_norm: 0.7635253720784284, iteration: 448526
loss: 1.07538640499115,grad_norm: 0.9999992622783803, iteration: 448527
loss: 0.9891197681427002,grad_norm: 0.6223187420137625, iteration: 448528
loss: 1.024176836013794,grad_norm: 0.7300554371110525, iteration: 448529
loss: 0.9566638469696045,grad_norm: 0.8014855986992429, iteration: 448530
loss: 1.0209132432937622,grad_norm: 0.9999994469022525, iteration: 448531
loss: 0.9823698401451111,grad_norm: 0.7761887256452696, iteration: 448532
loss: 0.9929352402687073,grad_norm: 0.665089265743425, iteration: 448533
loss: 1.0422812700271606,grad_norm: 0.8249208880504513, iteration: 448534
loss: 0.9908348321914673,grad_norm: 0.715748770016104, iteration: 448535
loss: 1.0839470624923706,grad_norm: 0.8961853246108004, iteration: 448536
loss: 0.9581190347671509,grad_norm: 0.9097275753498243, iteration: 448537
loss: 0.9757372140884399,grad_norm: 0.713703021585838, iteration: 448538
loss: 1.0244438648223877,grad_norm: 0.9999996458158508, iteration: 448539
loss: 1.0053578615188599,grad_norm: 0.9709496439441323, iteration: 448540
loss: 0.9780016541481018,grad_norm: 0.6474441482307823, iteration: 448541
loss: 0.9800221920013428,grad_norm: 0.683938301773656, iteration: 448542
loss: 1.0474450588226318,grad_norm: 0.9480325915370047, iteration: 448543
loss: 0.9708664417266846,grad_norm: 0.7762498131931891, iteration: 448544
loss: 1.0123894214630127,grad_norm: 0.7523342049590235, iteration: 448545
loss: 1.0007374286651611,grad_norm: 0.8554033273406576, iteration: 448546
loss: 0.9929157495498657,grad_norm: 0.9999993473318957, iteration: 448547
loss: 1.0737476348876953,grad_norm: 0.7908934907092698, iteration: 448548
loss: 1.177703857421875,grad_norm: 0.9999996047214856, iteration: 448549
loss: 0.997082531452179,grad_norm: 0.7781274001976917, iteration: 448550
loss: 1.0064245462417603,grad_norm: 0.6073761875923561, iteration: 448551
loss: 0.9967469573020935,grad_norm: 0.6915117681659471, iteration: 448552
loss: 0.9659125804901123,grad_norm: 0.783786368498787, iteration: 448553
loss: 1.0227919816970825,grad_norm: 0.7830312676381526, iteration: 448554
loss: 0.980522632598877,grad_norm: 0.8245493033897098, iteration: 448555
loss: 0.9840772747993469,grad_norm: 0.7755697645246259, iteration: 448556
loss: 1.0383280515670776,grad_norm: 0.9999992241979485, iteration: 448557
loss: 0.9743568301200867,grad_norm: 0.7893341154332987, iteration: 448558
loss: 1.0426394939422607,grad_norm: 0.9999991266692475, iteration: 448559
loss: 0.9875278472900391,grad_norm: 0.6065869445389314, iteration: 448560
loss: 0.9673207998275757,grad_norm: 0.8942672495412588, iteration: 448561
loss: 1.0355533361434937,grad_norm: 0.7056426933244603, iteration: 448562
loss: 0.9610137939453125,grad_norm: 0.866338785226502, iteration: 448563
loss: 0.9830084443092346,grad_norm: 0.6585351860467769, iteration: 448564
loss: 0.9956952333450317,grad_norm: 0.8437318427432803, iteration: 448565
loss: 1.0641342401504517,grad_norm: 0.9311171236808339, iteration: 448566
loss: 1.0181348323822021,grad_norm: 0.735330931579269, iteration: 448567
loss: 0.9944229125976562,grad_norm: 0.7638448513259743, iteration: 448568
loss: 1.0100054740905762,grad_norm: 0.6854607760179122, iteration: 448569
loss: 0.9766107797622681,grad_norm: 0.6261082144300315, iteration: 448570
loss: 0.9695596098899841,grad_norm: 0.9999991687760896, iteration: 448571
loss: 1.023073434829712,grad_norm: 0.7520329818747723, iteration: 448572
loss: 0.9562717080116272,grad_norm: 0.6816216042428166, iteration: 448573
loss: 1.049594759941101,grad_norm: 0.7757641832967537, iteration: 448574
loss: 1.003564715385437,grad_norm: 0.981746953029965, iteration: 448575
loss: 1.0106276273727417,grad_norm: 0.7154047099314389, iteration: 448576
loss: 1.0662457942962646,grad_norm: 0.9999998472254916, iteration: 448577
loss: 0.9606531262397766,grad_norm: 0.8398876092991634, iteration: 448578
loss: 0.9983432292938232,grad_norm: 0.775502480349921, iteration: 448579
loss: 0.9602413177490234,grad_norm: 0.7017670752642536, iteration: 448580
loss: 1.0386601686477661,grad_norm: 0.8571739011115064, iteration: 448581
loss: 1.0290896892547607,grad_norm: 0.8626492477850006, iteration: 448582
loss: 1.0653259754180908,grad_norm: 0.9999993062940234, iteration: 448583
loss: 0.9835940003395081,grad_norm: 0.9493103741980327, iteration: 448584
loss: 0.9875974655151367,grad_norm: 0.75530168810345, iteration: 448585
loss: 1.0109822750091553,grad_norm: 0.9414988124771999, iteration: 448586
loss: 0.98457932472229,grad_norm: 0.7248522270832135, iteration: 448587
loss: 1.0399326086044312,grad_norm: 0.9999993460163037, iteration: 448588
loss: 1.015149712562561,grad_norm: 0.7537482836373215, iteration: 448589
loss: 0.9988476634025574,grad_norm: 0.6400225449188705, iteration: 448590
loss: 1.0133318901062012,grad_norm: 0.7958418467004167, iteration: 448591
loss: 0.996526300907135,grad_norm: 0.8111827870919365, iteration: 448592
loss: 0.9774139523506165,grad_norm: 0.6542940661637333, iteration: 448593
loss: 1.0203607082366943,grad_norm: 0.9584247138918147, iteration: 448594
loss: 1.020227074623108,grad_norm: 0.8972180476236796, iteration: 448595
loss: 1.0114167928695679,grad_norm: 0.837235088550945, iteration: 448596
loss: 0.9781673550605774,grad_norm: 0.916789928190671, iteration: 448597
loss: 0.9822018146514893,grad_norm: 0.7108019435902794, iteration: 448598
loss: 1.0061852931976318,grad_norm: 0.7582426724708444, iteration: 448599
loss: 1.0111809968948364,grad_norm: 0.7739319440935492, iteration: 448600
loss: 0.9799589514732361,grad_norm: 0.6905041263109246, iteration: 448601
loss: 0.9781851768493652,grad_norm: 0.7203710138889244, iteration: 448602
loss: 1.001006007194519,grad_norm: 0.7652301209672695, iteration: 448603
loss: 1.022196888923645,grad_norm: 0.9999996861770087, iteration: 448604
loss: 1.0146396160125732,grad_norm: 0.8842903376865042, iteration: 448605
loss: 1.007055640220642,grad_norm: 0.7741343121298324, iteration: 448606
loss: 0.9836406111717224,grad_norm: 0.7656494538134955, iteration: 448607
loss: 1.0488542318344116,grad_norm: 0.9999993486077008, iteration: 448608
loss: 1.0297223329544067,grad_norm: 0.8141346428391221, iteration: 448609
loss: 0.9845892786979675,grad_norm: 0.9739773156397136, iteration: 448610
loss: 0.9953452348709106,grad_norm: 0.8878347918500713, iteration: 448611
loss: 1.0760022401809692,grad_norm: 0.6907089667419997, iteration: 448612
loss: 1.0108003616333008,grad_norm: 0.9500317707504619, iteration: 448613
loss: 0.9867030382156372,grad_norm: 0.7412644498584023, iteration: 448614
loss: 1.010616421699524,grad_norm: 0.8630331349851317, iteration: 448615
loss: 0.9987881183624268,grad_norm: 0.9999996987894717, iteration: 448616
loss: 1.0367913246154785,grad_norm: 0.9999991729815513, iteration: 448617
loss: 1.0237950086593628,grad_norm: 0.911663337874598, iteration: 448618
loss: 0.9773935675621033,grad_norm: 0.8140740031348105, iteration: 448619
loss: 0.980854868888855,grad_norm: 0.7973739959499525, iteration: 448620
loss: 1.053153395652771,grad_norm: 0.8466695970566477, iteration: 448621
loss: 0.9976115822792053,grad_norm: 0.9715485471341966, iteration: 448622
loss: 0.9825649261474609,grad_norm: 0.7084008199137756, iteration: 448623
loss: 0.9748480319976807,grad_norm: 0.7783890207333114, iteration: 448624
loss: 0.9951032400131226,grad_norm: 0.8016714611214192, iteration: 448625
loss: 1.053029179573059,grad_norm: 0.8893962976004047, iteration: 448626
loss: 1.0223957300186157,grad_norm: 0.744045838993912, iteration: 448627
loss: 1.0401989221572876,grad_norm: 0.9999994419869591, iteration: 448628
loss: 1.073683261871338,grad_norm: 0.9999990678831304, iteration: 448629
loss: 0.9447780847549438,grad_norm: 0.6677060802699533, iteration: 448630
loss: 0.9779670834541321,grad_norm: 0.8210758648819928, iteration: 448631
loss: 0.9884626865386963,grad_norm: 0.7672635562265117, iteration: 448632
loss: 1.0028432607650757,grad_norm: 0.8661848949198081, iteration: 448633
loss: 0.9843051433563232,grad_norm: 0.6611505254388734, iteration: 448634
loss: 1.0215327739715576,grad_norm: 0.6757561874048613, iteration: 448635
loss: 0.9781843423843384,grad_norm: 0.7802877053000005, iteration: 448636
loss: 1.0315091609954834,grad_norm: 0.8270235206110936, iteration: 448637
loss: 1.0201683044433594,grad_norm: 0.7077959040085156, iteration: 448638
loss: 1.0054932832717896,grad_norm: 0.7946040746022668, iteration: 448639
loss: 0.983573853969574,grad_norm: 0.7073258485635291, iteration: 448640
loss: 0.9984233379364014,grad_norm: 0.9643618882389262, iteration: 448641
loss: 1.0121744871139526,grad_norm: 0.7643361305627436, iteration: 448642
loss: 1.0112239122390747,grad_norm: 0.7579730942503375, iteration: 448643
loss: 1.0086320638656616,grad_norm: 0.7740822225476475, iteration: 448644
loss: 1.0529849529266357,grad_norm: 0.9999990433031417, iteration: 448645
loss: 1.008753776550293,grad_norm: 0.8306968109137254, iteration: 448646
loss: 0.9798184633255005,grad_norm: 0.8310167416770586, iteration: 448647
loss: 0.9732653498649597,grad_norm: 0.7840927271565883, iteration: 448648
loss: 1.0011515617370605,grad_norm: 0.7006880395748118, iteration: 448649
loss: 1.0183569192886353,grad_norm: 0.9999992584079334, iteration: 448650
loss: 0.9832810163497925,grad_norm: 0.7802064736609656, iteration: 448651
loss: 0.9655881524085999,grad_norm: 0.8462668765729817, iteration: 448652
loss: 1.0010066032409668,grad_norm: 0.6954293895091695, iteration: 448653
loss: 1.0240895748138428,grad_norm: 0.999999121016609, iteration: 448654
loss: 0.9918633103370667,grad_norm: 0.754865004162138, iteration: 448655
loss: 1.0101133584976196,grad_norm: 0.9999993842489504, iteration: 448656
loss: 0.9824441075325012,grad_norm: 0.8372652182007182, iteration: 448657
loss: 1.0047093629837036,grad_norm: 0.7807571906995542, iteration: 448658
loss: 1.0701485872268677,grad_norm: 0.9999994175833067, iteration: 448659
loss: 0.9935818314552307,grad_norm: 0.8151595075681822, iteration: 448660
loss: 1.0268429517745972,grad_norm: 0.7541022890859435, iteration: 448661
loss: 1.0071316957473755,grad_norm: 0.7929960790226184, iteration: 448662
loss: 1.0019094944000244,grad_norm: 0.943776989393588, iteration: 448663
loss: 0.971900999546051,grad_norm: 0.7351142058033978, iteration: 448664
loss: 0.9988706707954407,grad_norm: 0.5936300069839681, iteration: 448665
loss: 1.0034618377685547,grad_norm: 0.9999995278087788, iteration: 448666
loss: 1.0582306385040283,grad_norm: 0.8468778723904316, iteration: 448667
loss: 1.002682089805603,grad_norm: 0.8447485004008398, iteration: 448668
loss: 0.970859706401825,grad_norm: 0.7125949749171283, iteration: 448669
loss: 1.026161789894104,grad_norm: 0.8011794621454964, iteration: 448670
loss: 1.008545994758606,grad_norm: 0.9999991840373202, iteration: 448671
loss: 0.948647677898407,grad_norm: 0.7785367261846141, iteration: 448672
loss: 0.9939302206039429,grad_norm: 0.714250720623346, iteration: 448673
loss: 0.9876348972320557,grad_norm: 0.7236118162656806, iteration: 448674
loss: 1.0175188779830933,grad_norm: 0.8239819615381154, iteration: 448675
loss: 0.977716863155365,grad_norm: 0.6801248686807051, iteration: 448676
loss: 0.9882490038871765,grad_norm: 0.7924694146362367, iteration: 448677
loss: 1.0016580820083618,grad_norm: 0.7913845837293493, iteration: 448678
loss: 1.090378999710083,grad_norm: 0.6818387008166212, iteration: 448679
loss: 1.0157973766326904,grad_norm: 0.6920715800364482, iteration: 448680
loss: 0.9382315874099731,grad_norm: 0.913329804666383, iteration: 448681
loss: 1.0037403106689453,grad_norm: 0.8807667439656057, iteration: 448682
loss: 0.9889828562736511,grad_norm: 0.732037442455358, iteration: 448683
loss: 0.9903745055198669,grad_norm: 0.7361355703900234, iteration: 448684
loss: 1.0389149188995361,grad_norm: 0.6874915130498315, iteration: 448685
loss: 1.0014424324035645,grad_norm: 0.8680076510251951, iteration: 448686
loss: 0.9866434335708618,grad_norm: 0.7424966794091736, iteration: 448687
loss: 1.0415018796920776,grad_norm: 0.9618088966650786, iteration: 448688
loss: 0.9794875383377075,grad_norm: 0.7463569214583512, iteration: 448689
loss: 1.0253634452819824,grad_norm: 0.7319084891240142, iteration: 448690
loss: 0.9988930821418762,grad_norm: 0.721183888462438, iteration: 448691
loss: 0.9773368835449219,grad_norm: 0.7640677555892067, iteration: 448692
loss: 0.9960130453109741,grad_norm: 0.7507573677474062, iteration: 448693
loss: 1.0389323234558105,grad_norm: 0.8678251317805666, iteration: 448694
loss: 1.0134483575820923,grad_norm: 0.6565562139998682, iteration: 448695
loss: 1.1055161952972412,grad_norm: 0.9999992840751057, iteration: 448696
loss: 0.9997799396514893,grad_norm: 0.7529775129180954, iteration: 448697
loss: 0.9422550201416016,grad_norm: 0.7892322207012965, iteration: 448698
loss: 0.9873967170715332,grad_norm: 0.743301745846774, iteration: 448699
loss: 0.9781888723373413,grad_norm: 0.7493086173435614, iteration: 448700
loss: 1.010508418083191,grad_norm: 0.815412475498925, iteration: 448701
loss: 0.9892600178718567,grad_norm: 0.8529543713615214, iteration: 448702
loss: 0.9891848564147949,grad_norm: 0.654902312785547, iteration: 448703
loss: 1.007514238357544,grad_norm: 0.6855238790757776, iteration: 448704
loss: 0.9898653030395508,grad_norm: 0.7022331710817304, iteration: 448705
loss: 1.0343989133834839,grad_norm: 0.8821729956376873, iteration: 448706
loss: 0.9884245991706848,grad_norm: 0.861030364139882, iteration: 448707
loss: 0.9887741208076477,grad_norm: 0.7325663554516185, iteration: 448708
loss: 1.0210086107254028,grad_norm: 0.7590375536441927, iteration: 448709
loss: 1.0280872583389282,grad_norm: 0.9999993645053431, iteration: 448710
loss: 1.0289686918258667,grad_norm: 0.7823901034347934, iteration: 448711
loss: 0.9591160416603088,grad_norm: 0.773516196436712, iteration: 448712
loss: 1.0643627643585205,grad_norm: 0.8457712967342003, iteration: 448713
loss: 1.0166152715682983,grad_norm: 0.7359161926466246, iteration: 448714
loss: 0.9873653650283813,grad_norm: 0.7921353229113539, iteration: 448715
loss: 0.9922244548797607,grad_norm: 0.7890840759412147, iteration: 448716
loss: 1.038018822669983,grad_norm: 0.8839893690472436, iteration: 448717
loss: 0.9617982506752014,grad_norm: 0.6881257015784975, iteration: 448718
loss: 1.021295189857483,grad_norm: 0.8676621555492938, iteration: 448719
loss: 1.0349727869033813,grad_norm: 0.8588177342403666, iteration: 448720
loss: 1.0033607482910156,grad_norm: 0.831506909111202, iteration: 448721
loss: 0.9602867960929871,grad_norm: 0.6598590666321292, iteration: 448722
loss: 0.9927940368652344,grad_norm: 0.8465190022883485, iteration: 448723
loss: 0.9850188493728638,grad_norm: 0.7286048291563308, iteration: 448724
loss: 0.9844375252723694,grad_norm: 0.8163374585279967, iteration: 448725
loss: 0.975561261177063,grad_norm: 0.696439390054015, iteration: 448726
loss: 1.0775327682495117,grad_norm: 0.9999997530545246, iteration: 448727
loss: 0.9849026203155518,grad_norm: 0.7181577097747607, iteration: 448728
loss: 0.9845225811004639,grad_norm: 0.6813148380142875, iteration: 448729
loss: 1.0733345746994019,grad_norm: 0.9999995564309321, iteration: 448730
loss: 1.0032029151916504,grad_norm: 0.8084754805294148, iteration: 448731
loss: 1.0073128938674927,grad_norm: 0.710435021802863, iteration: 448732
loss: 0.9872894287109375,grad_norm: 0.640682382667367, iteration: 448733
loss: 0.9974501132965088,grad_norm: 0.7893088065838605, iteration: 448734
loss: 0.9907042980194092,grad_norm: 0.8403754073051403, iteration: 448735
loss: 1.0283409357070923,grad_norm: 0.7746545773744888, iteration: 448736
loss: 1.005379319190979,grad_norm: 0.8098693767321279, iteration: 448737
loss: 0.9970408082008362,grad_norm: 0.9999989639352799, iteration: 448738
loss: 0.9832355976104736,grad_norm: 0.7885970771201379, iteration: 448739
loss: 1.018580436706543,grad_norm: 0.999999592715402, iteration: 448740
loss: 0.9873062968254089,grad_norm: 0.9336673802228885, iteration: 448741
loss: 0.9652253985404968,grad_norm: 0.7645593352803759, iteration: 448742
loss: 1.0146082639694214,grad_norm: 0.6252264621181075, iteration: 448743
loss: 0.9794635772705078,grad_norm: 0.7215064309223898, iteration: 448744
loss: 1.034717321395874,grad_norm: 0.8802916447646194, iteration: 448745
loss: 0.9950539469718933,grad_norm: 0.7548453165530455, iteration: 448746
loss: 1.0358033180236816,grad_norm: 0.7517815286668647, iteration: 448747
loss: 1.0571955442428589,grad_norm: 0.9999997578147776, iteration: 448748
loss: 1.0267363786697388,grad_norm: 0.9663797760762731, iteration: 448749
loss: 0.9878145456314087,grad_norm: 0.9999997218550948, iteration: 448750
loss: 1.0688178539276123,grad_norm: 0.8455418368084723, iteration: 448751
loss: 1.0135815143585205,grad_norm: 0.7630207677394772, iteration: 448752
loss: 1.0072169303894043,grad_norm: 0.9999993407257578, iteration: 448753
loss: 1.0106889009475708,grad_norm: 0.8450560120195754, iteration: 448754
loss: 1.0429401397705078,grad_norm: 0.8069725184572805, iteration: 448755
loss: 0.9860696792602539,grad_norm: 0.6385071415083021, iteration: 448756
loss: 1.043832778930664,grad_norm: 0.7779343661276032, iteration: 448757
loss: 0.9865145087242126,grad_norm: 0.7820509894850852, iteration: 448758
loss: 1.2071069478988647,grad_norm: 0.9999995043963177, iteration: 448759
loss: 1.0008459091186523,grad_norm: 0.7534819813810608, iteration: 448760
loss: 1.0246654748916626,grad_norm: 0.7290339140905843, iteration: 448761
loss: 0.9785881638526917,grad_norm: 0.8946325354621835, iteration: 448762
loss: 1.0231268405914307,grad_norm: 0.9999993586415422, iteration: 448763
loss: 0.986951470375061,grad_norm: 0.688749410260869, iteration: 448764
loss: 1.02336847782135,grad_norm: 0.7780412145965044, iteration: 448765
loss: 0.9846667051315308,grad_norm: 0.8537499397756153, iteration: 448766
loss: 1.0067061185836792,grad_norm: 0.9999991865434262, iteration: 448767
loss: 1.0372697114944458,grad_norm: 0.9999992892256141, iteration: 448768
loss: 1.0254714488983154,grad_norm: 0.8974449880938933, iteration: 448769
loss: 0.9595567584037781,grad_norm: 0.886022870411145, iteration: 448770
loss: 0.9834678769111633,grad_norm: 0.7119186084007186, iteration: 448771
loss: 1.0227344036102295,grad_norm: 0.9999991169636805, iteration: 448772
loss: 0.9834914803504944,grad_norm: 0.9999991457575217, iteration: 448773
loss: 0.9637653231620789,grad_norm: 0.9337324027313474, iteration: 448774
loss: 1.0270987749099731,grad_norm: 0.7822730544338632, iteration: 448775
loss: 1.0407670736312866,grad_norm: 0.9999991185673174, iteration: 448776
loss: 0.9871207475662231,grad_norm: 0.7449570177700958, iteration: 448777
loss: 1.0526028871536255,grad_norm: 0.9999991592607836, iteration: 448778
loss: 1.0295625925064087,grad_norm: 0.9999991565416487, iteration: 448779
loss: 1.1672389507293701,grad_norm: 0.9999992756707363, iteration: 448780
loss: 1.011196255683899,grad_norm: 0.8046268974921393, iteration: 448781
loss: 0.9836919903755188,grad_norm: 0.9999994647000333, iteration: 448782
loss: 1.0140714645385742,grad_norm: 0.7303386962577234, iteration: 448783
loss: 0.95972740650177,grad_norm: 0.6927704422582895, iteration: 448784
loss: 1.0486767292022705,grad_norm: 0.760997156327873, iteration: 448785
loss: 0.9475454688072205,grad_norm: 0.8175921738293299, iteration: 448786
loss: 1.000659704208374,grad_norm: 0.7223191445784872, iteration: 448787
loss: 1.059861421585083,grad_norm: 0.9999996937351918, iteration: 448788
loss: 1.0245200395584106,grad_norm: 0.8635658824665993, iteration: 448789
loss: 0.9991465210914612,grad_norm: 0.9999996279223667, iteration: 448790
loss: 1.0049726963043213,grad_norm: 0.9999999008028848, iteration: 448791
loss: 0.9840487837791443,grad_norm: 0.8063319389647733, iteration: 448792
loss: 1.1177818775177002,grad_norm: 0.9999996552616959, iteration: 448793
loss: 0.9365428686141968,grad_norm: 0.7832633417461755, iteration: 448794
loss: 0.9861609935760498,grad_norm: 0.7900997498603222, iteration: 448795
loss: 0.9841629862785339,grad_norm: 0.6801027936241928, iteration: 448796
loss: 1.0498675107955933,grad_norm: 0.97042145055565, iteration: 448797
loss: 1.0136656761169434,grad_norm: 0.9999998449709856, iteration: 448798
loss: 0.9687373042106628,grad_norm: 0.7932812887594493, iteration: 448799
loss: 1.0690873861312866,grad_norm: 0.9999997989727136, iteration: 448800
loss: 1.0316799879074097,grad_norm: 0.9073193993344988, iteration: 448801
loss: 1.0068811178207397,grad_norm: 0.9527417534557634, iteration: 448802
loss: 0.9753327369689941,grad_norm: 0.6983224586424688, iteration: 448803
loss: 0.9777379631996155,grad_norm: 0.99999932070994, iteration: 448804
loss: 1.0364406108856201,grad_norm: 0.8426660720854617, iteration: 448805
loss: 1.0093603134155273,grad_norm: 0.7349900162663104, iteration: 448806
loss: 1.106904149055481,grad_norm: 0.9999996981713659, iteration: 448807
loss: 1.0072455406188965,grad_norm: 0.7849329010348348, iteration: 448808
loss: 1.0423756837844849,grad_norm: 0.8306649609508617, iteration: 448809
loss: 1.0006996393203735,grad_norm: 0.8438819689090232, iteration: 448810
loss: 1.0262186527252197,grad_norm: 0.8421792603705535, iteration: 448811
loss: 1.029528260231018,grad_norm: 0.9474570487827707, iteration: 448812
loss: 0.9813392162322998,grad_norm: 0.7758665944751288, iteration: 448813
loss: 0.999148428440094,grad_norm: 0.6919858995029214, iteration: 448814
loss: 1.0012930631637573,grad_norm: 0.9999991020889506, iteration: 448815
loss: 1.0943236351013184,grad_norm: 0.9999998663693904, iteration: 448816
loss: 1.0177578926086426,grad_norm: 0.8133805995869681, iteration: 448817
loss: 0.9868242144584656,grad_norm: 0.7850033634206902, iteration: 448818
loss: 1.0711884498596191,grad_norm: 0.8412648597389041, iteration: 448819
loss: 1.0642564296722412,grad_norm: 0.8682482938430203, iteration: 448820
loss: 1.0430229902267456,grad_norm: 0.9999999808447919, iteration: 448821
loss: 1.0181804895401,grad_norm: 0.7957766365650268, iteration: 448822
loss: 1.069929838180542,grad_norm: 0.9999991279666487, iteration: 448823
loss: 0.9937600493431091,grad_norm: 0.7833971669558377, iteration: 448824
loss: 1.0029454231262207,grad_norm: 0.9111317646348448, iteration: 448825
loss: 0.9856478571891785,grad_norm: 0.9207487612498869, iteration: 448826
loss: 1.1132341623306274,grad_norm: 0.9314853553320291, iteration: 448827
loss: 0.9670886993408203,grad_norm: 0.9051379422789658, iteration: 448828
loss: 1.0049775838851929,grad_norm: 0.8389002877415228, iteration: 448829
loss: 0.9988029599189758,grad_norm: 0.7008785717293275, iteration: 448830
loss: 0.9957239627838135,grad_norm: 0.9999997505501993, iteration: 448831
loss: 1.0161739587783813,grad_norm: 0.7712573857086676, iteration: 448832
loss: 0.9911430478096008,grad_norm: 0.880133845953504, iteration: 448833
loss: 0.9784006476402283,grad_norm: 0.916289122471195, iteration: 448834
loss: 1.035077691078186,grad_norm: 0.8132626347046155, iteration: 448835
loss: 0.9592429399490356,grad_norm: 0.9999998887100883, iteration: 448836
loss: 0.9786940813064575,grad_norm: 0.7482702102800548, iteration: 448837
loss: 0.9995437860488892,grad_norm: 0.7127116835658023, iteration: 448838
loss: 1.0284193754196167,grad_norm: 0.8289621993424566, iteration: 448839
loss: 0.9768796563148499,grad_norm: 0.9276223875806873, iteration: 448840
loss: 0.9736750721931458,grad_norm: 0.8033376506098844, iteration: 448841
loss: 1.0076667070388794,grad_norm: 0.7301567852585007, iteration: 448842
loss: 1.021351933479309,grad_norm: 0.9999992314810519, iteration: 448843
loss: 0.976912796497345,grad_norm: 0.6704028192227284, iteration: 448844
loss: 1.1129142045974731,grad_norm: 0.9999991539477041, iteration: 448845
loss: 0.9473410248756409,grad_norm: 0.7311828270060978, iteration: 448846
loss: 1.0088646411895752,grad_norm: 0.9391585041329854, iteration: 448847
loss: 1.0299559831619263,grad_norm: 0.9999990634830477, iteration: 448848
loss: 0.9982661604881287,grad_norm: 0.7648391602759251, iteration: 448849
loss: 1.0042461156845093,grad_norm: 0.8763527414072653, iteration: 448850
loss: 1.0254662036895752,grad_norm: 0.7480616170233344, iteration: 448851
loss: 0.9937930107116699,grad_norm: 0.9370096687114501, iteration: 448852
loss: 0.9872504472732544,grad_norm: 0.6637693595303886, iteration: 448853
loss: 1.0258857011795044,grad_norm: 0.8564999326418299, iteration: 448854
loss: 0.9673606157302856,grad_norm: 0.7121274621808518, iteration: 448855
loss: 1.06732976436615,grad_norm: 0.7287099639754847, iteration: 448856
loss: 0.9747782349586487,grad_norm: 0.9999999172040445, iteration: 448857
loss: 1.008890986442566,grad_norm: 0.6420725271631669, iteration: 448858
loss: 0.9973331093788147,grad_norm: 0.7299016010559779, iteration: 448859
loss: 1.0178624391555786,grad_norm: 0.7386203232699706, iteration: 448860
loss: 1.0802310705184937,grad_norm: 0.9999992911683203, iteration: 448861
loss: 1.027176022529602,grad_norm: 0.7634811640996901, iteration: 448862
loss: 1.0511194467544556,grad_norm: 0.9999992182439857, iteration: 448863
loss: 0.9816906452178955,grad_norm: 0.7259869799790774, iteration: 448864
loss: 0.9916640520095825,grad_norm: 0.7806444220531331, iteration: 448865
loss: 1.0052675008773804,grad_norm: 0.999999289791744, iteration: 448866
loss: 1.1004341840744019,grad_norm: 0.9999990222208931, iteration: 448867
loss: 1.0419622659683228,grad_norm: 0.9999994892304287, iteration: 448868
loss: 1.032010793685913,grad_norm: 0.8540658153257751, iteration: 448869
loss: 1.00846529006958,grad_norm: 0.8164982335730303, iteration: 448870
loss: 0.9803600311279297,grad_norm: 0.7953199802299469, iteration: 448871
loss: 1.011860966682434,grad_norm: 0.761334405227638, iteration: 448872
loss: 1.0608049631118774,grad_norm: 0.9999991337906717, iteration: 448873
loss: 0.9925761818885803,grad_norm: 0.6996343519279313, iteration: 448874
loss: 1.0695180892944336,grad_norm: 0.9005039261644623, iteration: 448875
loss: 0.9856739640235901,grad_norm: 0.999999687966893, iteration: 448876
loss: 0.992572009563446,grad_norm: 0.7109330140554474, iteration: 448877
loss: 1.0205289125442505,grad_norm: 0.9299934930805808, iteration: 448878
loss: 1.0503219366073608,grad_norm: 0.999999278489832, iteration: 448879
loss: 0.9982733726501465,grad_norm: 0.7599618298469281, iteration: 448880
loss: 1.0103394985198975,grad_norm: 0.9999995383672686, iteration: 448881
loss: 1.0919283628463745,grad_norm: 0.9999991479653336, iteration: 448882
loss: 1.0298367738723755,grad_norm: 0.7129918553135168, iteration: 448883
loss: 0.9988284111022949,grad_norm: 0.7122618748431537, iteration: 448884
loss: 0.9952456951141357,grad_norm: 0.8768618825060953, iteration: 448885
loss: 1.027529239654541,grad_norm: 0.7286646029715961, iteration: 448886
loss: 1.057626485824585,grad_norm: 0.9213656581605603, iteration: 448887
loss: 1.0085996389389038,grad_norm: 0.9562765049815696, iteration: 448888
loss: 0.9693593978881836,grad_norm: 0.7544299768628828, iteration: 448889
loss: 1.033677101135254,grad_norm: 0.9999996742118856, iteration: 448890
loss: 0.9417718648910522,grad_norm: 0.9999990466142419, iteration: 448891
loss: 1.003028154373169,grad_norm: 0.7491846334313024, iteration: 448892
loss: 0.9928853511810303,grad_norm: 0.673800247433401, iteration: 448893
loss: 0.9957267642021179,grad_norm: 0.6641131883325269, iteration: 448894
loss: 1.0163031816482544,grad_norm: 0.9999990455127866, iteration: 448895
loss: 1.223669409751892,grad_norm: 0.9999993733030133, iteration: 448896
loss: 1.0142735242843628,grad_norm: 0.9633586121039599, iteration: 448897
loss: 0.9553437829017639,grad_norm: 0.9999992785802595, iteration: 448898
loss: 0.9638428092002869,grad_norm: 0.8829362501480739, iteration: 448899
loss: 0.9972673654556274,grad_norm: 0.9999991021305968, iteration: 448900
loss: 0.9826687574386597,grad_norm: 0.8211048567561139, iteration: 448901
loss: 0.9888327121734619,grad_norm: 0.7347842118867577, iteration: 448902
loss: 0.9685192704200745,grad_norm: 0.8146822039211092, iteration: 448903
loss: 1.0455235242843628,grad_norm: 0.9999993278060384, iteration: 448904
loss: 1.0175000429153442,grad_norm: 0.9999991311654093, iteration: 448905
loss: 0.9947588443756104,grad_norm: 0.706696184493452, iteration: 448906
loss: 0.9724810123443604,grad_norm: 0.7035370343807793, iteration: 448907
loss: 1.0191097259521484,grad_norm: 0.6693928550754982, iteration: 448908
loss: 1.0029066801071167,grad_norm: 0.8130165379133136, iteration: 448909
loss: 1.092397928237915,grad_norm: 0.8821826928525163, iteration: 448910
loss: 1.0252848863601685,grad_norm: 0.8261873454575703, iteration: 448911
loss: 1.0450581312179565,grad_norm: 0.6859825247641985, iteration: 448912
loss: 1.1279346942901611,grad_norm: 0.9999998981822492, iteration: 448913
loss: 0.9664144515991211,grad_norm: 0.8633454009538624, iteration: 448914
loss: 0.9668480753898621,grad_norm: 0.719343298734411, iteration: 448915
loss: 0.9940780401229858,grad_norm: 0.7902050006257205, iteration: 448916
loss: 1.0636122226715088,grad_norm: 0.9999995192864392, iteration: 448917
loss: 0.9954710006713867,grad_norm: 0.9321635826422637, iteration: 448918
loss: 0.9967279434204102,grad_norm: 0.6812979111590396, iteration: 448919
loss: 1.0127803087234497,grad_norm: 0.9999990356311654, iteration: 448920
loss: 0.968669593334198,grad_norm: 0.721673143992436, iteration: 448921
loss: 1.0065970420837402,grad_norm: 0.8811770343792175, iteration: 448922
loss: 0.9998488426208496,grad_norm: 0.6297941337157906, iteration: 448923
loss: 0.9932640790939331,grad_norm: 0.8289565068962774, iteration: 448924
loss: 1.033834457397461,grad_norm: 0.9309972065109156, iteration: 448925
loss: 1.0153592824935913,grad_norm: 0.9999991548175838, iteration: 448926
loss: 0.9826577305793762,grad_norm: 0.6186291836993271, iteration: 448927
loss: 1.0273737907409668,grad_norm: 0.9999992933761126, iteration: 448928
loss: 1.0233345031738281,grad_norm: 0.999999097871851, iteration: 448929
loss: 1.0360420942306519,grad_norm: 0.9999997870301632, iteration: 448930
loss: 1.0723093748092651,grad_norm: 0.6372842156453026, iteration: 448931
loss: 1.029484510421753,grad_norm: 0.9999998052904044, iteration: 448932
loss: 1.0114381313323975,grad_norm: 0.8381139817881983, iteration: 448933
loss: 0.934880793094635,grad_norm: 0.9999994671085212, iteration: 448934
loss: 1.0378015041351318,grad_norm: 0.9999993561014354, iteration: 448935
loss: 1.0219690799713135,grad_norm: 0.9999996227042265, iteration: 448936
loss: 1.0724040269851685,grad_norm: 0.9398288128254593, iteration: 448937
loss: 0.9897949695587158,grad_norm: 0.6322124338843328, iteration: 448938
loss: 1.0448646545410156,grad_norm: 0.8463003864466004, iteration: 448939
loss: 0.9985150098800659,grad_norm: 0.9999995655768679, iteration: 448940
loss: 0.995831310749054,grad_norm: 0.9034323140330259, iteration: 448941
loss: 1.0594638586044312,grad_norm: 0.9999996263729426, iteration: 448942
loss: 0.9742684960365295,grad_norm: 0.8337808382759907, iteration: 448943
loss: 0.977722704410553,grad_norm: 0.8613823587446819, iteration: 448944
loss: 1.046545147895813,grad_norm: 0.8529060393619659, iteration: 448945
loss: 1.0440398454666138,grad_norm: 0.8640976098913381, iteration: 448946
loss: 1.0226958990097046,grad_norm: 0.8779674969171611, iteration: 448947
loss: 1.092219591140747,grad_norm: 0.9032820484400128, iteration: 448948
loss: 0.9734179377555847,grad_norm: 0.8204717544785222, iteration: 448949
loss: 1.0222874879837036,grad_norm: 0.9999991663464072, iteration: 448950
loss: 1.00822913646698,grad_norm: 0.7175786666618533, iteration: 448951
loss: 1.0202035903930664,grad_norm: 0.731412872874367, iteration: 448952
loss: 0.9885162711143494,grad_norm: 0.7531693160229578, iteration: 448953
loss: 0.9685423374176025,grad_norm: 0.7633559285112212, iteration: 448954
loss: 1.0161458253860474,grad_norm: 0.7861541839653872, iteration: 448955
loss: 1.0184104442596436,grad_norm: 0.8522789969008442, iteration: 448956
loss: 0.9913035035133362,grad_norm: 0.8347885119680151, iteration: 448957
loss: 1.000392198562622,grad_norm: 0.7468778976403492, iteration: 448958
loss: 1.0145593881607056,grad_norm: 0.6879413069519514, iteration: 448959
loss: 0.9868884086608887,grad_norm: 0.8451418066833374, iteration: 448960
loss: 1.063602089881897,grad_norm: 0.9999995610901123, iteration: 448961
loss: 0.9763543605804443,grad_norm: 0.912973136109062, iteration: 448962
loss: 1.0157850980758667,grad_norm: 0.734697605818936, iteration: 448963
loss: 1.0173025131225586,grad_norm: 0.7233127034966619, iteration: 448964
loss: 0.9700616598129272,grad_norm: 0.7205874636643479, iteration: 448965
loss: 0.9981467127799988,grad_norm: 0.7512277509550513, iteration: 448966
loss: 0.9891519546508789,grad_norm: 0.7380793714126304, iteration: 448967
loss: 0.9980337619781494,grad_norm: 0.8095727055037503, iteration: 448968
loss: 1.0537186861038208,grad_norm: 0.8826855896224837, iteration: 448969
loss: 0.9436177015304565,grad_norm: 0.8336677778033812, iteration: 448970
loss: 1.0201590061187744,grad_norm: 0.9999995382294368, iteration: 448971
loss: 0.9623664617538452,grad_norm: 0.7652111828133752, iteration: 448972
loss: 1.0529496669769287,grad_norm: 0.9999990881104192, iteration: 448973
loss: 0.967634379863739,grad_norm: 0.7071598016278625, iteration: 448974
loss: 0.9812716841697693,grad_norm: 0.8840535378174836, iteration: 448975
loss: 1.0433133840560913,grad_norm: 0.9430602842479737, iteration: 448976
loss: 0.99021977186203,grad_norm: 1.0000000944473932, iteration: 448977
loss: 1.030280351638794,grad_norm: 0.9102546784793358, iteration: 448978
loss: 0.9915294051170349,grad_norm: 0.7940639116868691, iteration: 448979
loss: 1.0209773778915405,grad_norm: 0.8446100998138037, iteration: 448980
loss: 0.9731336832046509,grad_norm: 0.8681118393213191, iteration: 448981
loss: 1.0168683528900146,grad_norm: 0.8742832185097681, iteration: 448982
loss: 0.9869831204414368,grad_norm: 0.7156769420900975, iteration: 448983
loss: 0.9983981251716614,grad_norm: 0.8862345768798724, iteration: 448984
loss: 1.0311293601989746,grad_norm: 0.6653385034143093, iteration: 448985
loss: 0.9726255536079407,grad_norm: 0.8510983259885141, iteration: 448986
loss: 1.003706455230713,grad_norm: 0.7670813857910255, iteration: 448987
loss: 1.001382827758789,grad_norm: 0.7167987246318868, iteration: 448988
loss: 0.9979074597358704,grad_norm: 0.9999997414247923, iteration: 448989
loss: 1.0079988241195679,grad_norm: 0.7854283600995565, iteration: 448990
loss: 1.0036323070526123,grad_norm: 0.9999992583706363, iteration: 448991
loss: 1.023816466331482,grad_norm: 0.9076193234814152, iteration: 448992
loss: 0.9635999798774719,grad_norm: 0.8731901869419388, iteration: 448993
loss: 1.0136773586273193,grad_norm: 0.75996792483961, iteration: 448994
loss: 1.0028117895126343,grad_norm: 0.7446776321319781, iteration: 448995
loss: 0.9898084402084351,grad_norm: 0.6993793197983816, iteration: 448996
loss: 0.9736964106559753,grad_norm: 0.7122658390552619, iteration: 448997
loss: 0.9872460961341858,grad_norm: 0.8712630141842761, iteration: 448998
loss: 1.0450857877731323,grad_norm: 0.999999210298511, iteration: 448999
loss: 0.9766635298728943,grad_norm: 0.74912232272064, iteration: 449000
loss: 0.9796598553657532,grad_norm: 0.7231198348097425, iteration: 449001
loss: 0.9981518387794495,grad_norm: 0.8664336717022884, iteration: 449002
loss: 1.0087136030197144,grad_norm: 0.70485550224695, iteration: 449003
loss: 0.9727404117584229,grad_norm: 0.6413132732355328, iteration: 449004
loss: 1.0728821754455566,grad_norm: 0.9999998887293045, iteration: 449005
loss: 0.9770192503929138,grad_norm: 0.760661534108092, iteration: 449006
loss: 0.9943991303443909,grad_norm: 0.8416917670525254, iteration: 449007
loss: 0.9898422360420227,grad_norm: 0.8765603123797735, iteration: 449008
loss: 0.9933663010597229,grad_norm: 0.99999967935377, iteration: 449009
loss: 1.001180648803711,grad_norm: 0.7608189578519418, iteration: 449010
loss: 1.017401933670044,grad_norm: 0.9999998192531718, iteration: 449011
loss: 0.9815394878387451,grad_norm: 0.7706365572607653, iteration: 449012
loss: 1.1106586456298828,grad_norm: 0.9999991488616892, iteration: 449013
loss: 0.9997597932815552,grad_norm: 0.9999990571108671, iteration: 449014
loss: 1.023061990737915,grad_norm: 0.783994352062616, iteration: 449015
loss: 1.0524295568466187,grad_norm: 0.9999995066984625, iteration: 449016
loss: 1.0370008945465088,grad_norm: 0.8313311015148886, iteration: 449017
loss: 1.105060338973999,grad_norm: 0.9999998756679873, iteration: 449018
loss: 0.995576798915863,grad_norm: 0.8889435343587568, iteration: 449019
loss: 0.9975040555000305,grad_norm: 0.8727535237090026, iteration: 449020
loss: 1.1023273468017578,grad_norm: 0.849678283603722, iteration: 449021
loss: 1.0430643558502197,grad_norm: 0.7809991623795954, iteration: 449022
loss: 1.1021569967269897,grad_norm: 0.7695792392792523, iteration: 449023
loss: 1.0421862602233887,grad_norm: 0.856131812416576, iteration: 449024
loss: 1.0098206996917725,grad_norm: 0.9999991946992878, iteration: 449025
loss: 0.9652184247970581,grad_norm: 0.773193362704168, iteration: 449026
loss: 0.9649515151977539,grad_norm: 0.7958155693219752, iteration: 449027
loss: 1.0016183853149414,grad_norm: 0.8245289607019223, iteration: 449028
loss: 1.0239064693450928,grad_norm: 0.7177257818321325, iteration: 449029
loss: 0.990058958530426,grad_norm: 0.7103720390960234, iteration: 449030
loss: 1.1002659797668457,grad_norm: 0.9999992852350236, iteration: 449031
loss: 1.1337817907333374,grad_norm: 0.9999992373356205, iteration: 449032
loss: 1.0465750694274902,grad_norm: 0.8220860138403099, iteration: 449033
loss: 1.0328211784362793,grad_norm: 0.9260275864643299, iteration: 449034
loss: 1.00907301902771,grad_norm: 0.89780071265581, iteration: 449035
loss: 0.970479428768158,grad_norm: 0.9249374822186384, iteration: 449036
loss: 1.0447016954421997,grad_norm: 0.8546153008134206, iteration: 449037
loss: 1.0077753067016602,grad_norm: 0.7952250794639772, iteration: 449038
loss: 1.0348165035247803,grad_norm: 0.9999997880577438, iteration: 449039
loss: 0.9969328045845032,grad_norm: 0.7819347303501624, iteration: 449040
loss: 1.0639313459396362,grad_norm: 0.8775279146972568, iteration: 449041
loss: 1.0653024911880493,grad_norm: 0.9624056425221394, iteration: 449042
loss: 0.9955892562866211,grad_norm: 0.7820443209229804, iteration: 449043
loss: 1.010852336883545,grad_norm: 0.8833499487464302, iteration: 449044
loss: 0.9773848056793213,grad_norm: 0.748337061963079, iteration: 449045
loss: 1.0153738260269165,grad_norm: 0.7815613347798053, iteration: 449046
loss: 1.0288333892822266,grad_norm: 0.8560398463956396, iteration: 449047
loss: 1.0109360218048096,grad_norm: 0.8027188254920694, iteration: 449048
loss: 1.0345971584320068,grad_norm: 0.8021233282801358, iteration: 449049
loss: 1.0060148239135742,grad_norm: 0.740183108276664, iteration: 449050
loss: 1.0176068544387817,grad_norm: 0.99999998148626, iteration: 449051
loss: 0.9778193831443787,grad_norm: 0.8013482293151305, iteration: 449052
loss: 1.0366086959838867,grad_norm: 0.9999999024672215, iteration: 449053
loss: 1.0741195678710938,grad_norm: 0.9999994375964127, iteration: 449054
loss: 1.0206393003463745,grad_norm: 0.8330016032549928, iteration: 449055
loss: 1.0028713941574097,grad_norm: 0.6931770401695225, iteration: 449056
loss: 1.064476728439331,grad_norm: 0.9999992086474403, iteration: 449057
loss: 1.0003745555877686,grad_norm: 0.7002200480550718, iteration: 449058
loss: 0.9594330787658691,grad_norm: 0.776036956556667, iteration: 449059
loss: 0.977345883846283,grad_norm: 0.7517648679376474, iteration: 449060
loss: 0.983353853225708,grad_norm: 0.8202670352322559, iteration: 449061
loss: 0.9994993805885315,grad_norm: 0.8412958577426016, iteration: 449062
loss: 1.0210896730422974,grad_norm: 0.8118585680931494, iteration: 449063
loss: 0.9877788424491882,grad_norm: 0.662326718110196, iteration: 449064
loss: 1.0384875535964966,grad_norm: 0.9999999797246645, iteration: 449065
loss: 1.0264126062393188,grad_norm: 0.7455536000982883, iteration: 449066
loss: 1.0020827054977417,grad_norm: 0.7845559913356757, iteration: 449067
loss: 0.9844708442687988,grad_norm: 0.9654089327671641, iteration: 449068
loss: 1.0023384094238281,grad_norm: 0.7620856026834324, iteration: 449069
loss: 1.002021074295044,grad_norm: 0.8775095995498217, iteration: 449070
loss: 1.0709400177001953,grad_norm: 0.9999996489908357, iteration: 449071
loss: 1.003116250038147,grad_norm: 0.8841525394596144, iteration: 449072
loss: 1.0123906135559082,grad_norm: 0.9999997164652846, iteration: 449073
loss: 0.9791796207427979,grad_norm: 0.7979207722093961, iteration: 449074
loss: 1.0045307874679565,grad_norm: 0.6967140964234704, iteration: 449075
loss: 1.0233458280563354,grad_norm: 0.9999997692154741, iteration: 449076
loss: 1.0722739696502686,grad_norm: 0.9999994727621833, iteration: 449077
loss: 0.9928092956542969,grad_norm: 0.7202023399801833, iteration: 449078
loss: 0.9907576441764832,grad_norm: 0.84441152453719, iteration: 449079
loss: 0.9719158411026001,grad_norm: 0.941828495134539, iteration: 449080
loss: 1.0223015546798706,grad_norm: 0.7334268536176602, iteration: 449081
loss: 0.9947195053100586,grad_norm: 0.8179365535511777, iteration: 449082
loss: 1.0035078525543213,grad_norm: 1.0000000107788016, iteration: 449083
loss: 0.9745050072669983,grad_norm: 0.8525384141772592, iteration: 449084
loss: 1.005387544631958,grad_norm: 0.9147882165935369, iteration: 449085
loss: 0.9971500039100647,grad_norm: 0.7115198490827865, iteration: 449086
loss: 1.0292104482650757,grad_norm: 0.809933453753867, iteration: 449087
loss: 0.9670267105102539,grad_norm: 0.9999998726264333, iteration: 449088
loss: 0.9717810750007629,grad_norm: 0.8112836552702603, iteration: 449089
loss: 1.0318138599395752,grad_norm: 0.7572662244522553, iteration: 449090
loss: 0.9914829134941101,grad_norm: 0.6530396196141823, iteration: 449091
loss: 1.0180209875106812,grad_norm: 0.7714973872273809, iteration: 449092
loss: 0.9984404444694519,grad_norm: 0.7273659063250286, iteration: 449093
loss: 1.0163073539733887,grad_norm: 0.8971250576682798, iteration: 449094
loss: 1.0879034996032715,grad_norm: 0.9999999338831442, iteration: 449095
loss: 0.968819260597229,grad_norm: 0.8318244141002211, iteration: 449096
loss: 0.9884278774261475,grad_norm: 0.8801420770324515, iteration: 449097
loss: 0.994868814945221,grad_norm: 0.9986372251756318, iteration: 449098
loss: 1.0389683246612549,grad_norm: 0.8339074090124508, iteration: 449099
loss: 1.0069636106491089,grad_norm: 0.6608788024719936, iteration: 449100
loss: 1.0143091678619385,grad_norm: 0.8108905860540534, iteration: 449101
loss: 0.9882274270057678,grad_norm: 0.9999997282703988, iteration: 449102
loss: 0.9802711606025696,grad_norm: 0.7245935047763493, iteration: 449103
loss: 1.0017282962799072,grad_norm: 0.6489667656951151, iteration: 449104
loss: 1.1112117767333984,grad_norm: 0.9999994534109968, iteration: 449105
loss: 0.9659889936447144,grad_norm: 0.822700950678638, iteration: 449106
loss: 1.0361238718032837,grad_norm: 0.9999995263072459, iteration: 449107
loss: 1.0066027641296387,grad_norm: 0.9366307562134645, iteration: 449108
loss: 1.1187430620193481,grad_norm: 0.9999996634902096, iteration: 449109
loss: 0.9848964214324951,grad_norm: 0.8575397150469706, iteration: 449110
loss: 1.0182642936706543,grad_norm: 0.9999996393035002, iteration: 449111
loss: 1.0181430578231812,grad_norm: 0.8812639918825846, iteration: 449112
loss: 1.015718936920166,grad_norm: 0.8777531819160825, iteration: 449113
loss: 1.0053514242172241,grad_norm: 0.8117762788175347, iteration: 449114
loss: 0.9918716549873352,grad_norm: 0.9244402523957957, iteration: 449115
loss: 1.0063753128051758,grad_norm: 0.9999989735409578, iteration: 449116
loss: 1.2260464429855347,grad_norm: 0.999999522419779, iteration: 449117
loss: 1.09474515914917,grad_norm: 0.7336466337925605, iteration: 449118
loss: 1.0760602951049805,grad_norm: 0.7782805507415079, iteration: 449119
loss: 0.9815935492515564,grad_norm: 0.999999271404088, iteration: 449120
loss: 0.9983317852020264,grad_norm: 0.865508827011364, iteration: 449121
loss: 1.043092131614685,grad_norm: 0.9999998562418365, iteration: 449122
loss: 0.9775299429893494,grad_norm: 0.7806899446621969, iteration: 449123
loss: 1.012744426727295,grad_norm: 0.8744598184482444, iteration: 449124
loss: 1.050335168838501,grad_norm: 0.782417042112342, iteration: 449125
loss: 1.0169953107833862,grad_norm: 0.9999997920442483, iteration: 449126
loss: 1.0085723400115967,grad_norm: 0.7865000904627626, iteration: 449127
loss: 1.0361143350601196,grad_norm: 0.8563361672908699, iteration: 449128
loss: 1.0427014827728271,grad_norm: 0.9999998021896342, iteration: 449129
loss: 1.161055326461792,grad_norm: 0.9999998795288233, iteration: 449130
loss: 1.0319627523422241,grad_norm: 0.7957298639989834, iteration: 449131
loss: 1.0019724369049072,grad_norm: 0.9999991958993885, iteration: 449132
loss: 1.0457839965820312,grad_norm: 0.9999999068589955, iteration: 449133
loss: 0.9720192551612854,grad_norm: 0.7717425910274798, iteration: 449134
loss: 1.0176818370819092,grad_norm: 0.9999990793391014, iteration: 449135
loss: 1.017389178276062,grad_norm: 0.8091341095454158, iteration: 449136
loss: 0.9920870065689087,grad_norm: 0.9636331811378497, iteration: 449137
loss: 1.041689395904541,grad_norm: 0.9215103804441378, iteration: 449138
loss: 1.0242927074432373,grad_norm: 0.8632161814759607, iteration: 449139
loss: 1.0021238327026367,grad_norm: 0.7881923617761086, iteration: 449140
loss: 1.037247896194458,grad_norm: 0.7928055675675578, iteration: 449141
loss: 1.06795334815979,grad_norm: 0.839302555600542, iteration: 449142
loss: 0.9340225458145142,grad_norm: 0.6996633766676401, iteration: 449143
loss: 0.9530412554740906,grad_norm: 0.753654713925521, iteration: 449144
loss: 1.0058335065841675,grad_norm: 0.7878891902929641, iteration: 449145
loss: 1.0258674621582031,grad_norm: 0.845771281583602, iteration: 449146
loss: 1.0947359800338745,grad_norm: 0.9999991907046971, iteration: 449147
loss: 1.1196261644363403,grad_norm: 0.786275214236974, iteration: 449148
loss: 0.9986023306846619,grad_norm: 0.6359667384311463, iteration: 449149
loss: 1.0819706916809082,grad_norm: 0.796463276270918, iteration: 449150
loss: 0.9699001312255859,grad_norm: 0.666343698570092, iteration: 449151
loss: 1.005176305770874,grad_norm: 0.8766002825813567, iteration: 449152
loss: 1.1216932535171509,grad_norm: 0.9999992259813539, iteration: 449153
loss: 1.0627269744873047,grad_norm: 0.7244400458317617, iteration: 449154
loss: 0.9959063529968262,grad_norm: 0.8199422738025552, iteration: 449155
loss: 1.069862723350525,grad_norm: 0.893480242900595, iteration: 449156
loss: 1.0484384298324585,grad_norm: 0.999999193506041, iteration: 449157
loss: 1.1467244625091553,grad_norm: 0.999999741870314, iteration: 449158
loss: 1.2088984251022339,grad_norm: 0.9999992828721923, iteration: 449159
loss: 1.0522133111953735,grad_norm: 0.8939683752523161, iteration: 449160
loss: 0.9796761870384216,grad_norm: 0.702944863003474, iteration: 449161
loss: 1.0124479532241821,grad_norm: 0.9999990455873426, iteration: 449162
loss: 1.0367146730422974,grad_norm: 0.9999995119048488, iteration: 449163
loss: 1.0780246257781982,grad_norm: 0.7977591442423839, iteration: 449164
loss: 1.101932168006897,grad_norm: 0.9999992550171767, iteration: 449165
loss: 1.0035017728805542,grad_norm: 0.9999994839705351, iteration: 449166
loss: 0.978190541267395,grad_norm: 0.9999994201182536, iteration: 449167
loss: 1.050728678703308,grad_norm: 0.8926021385783063, iteration: 449168
loss: 1.0891311168670654,grad_norm: 0.9470141072330978, iteration: 449169
loss: 1.0039994716644287,grad_norm: 0.904258733102936, iteration: 449170
loss: 1.017964482307434,grad_norm: 0.8311751459098676, iteration: 449171
loss: 0.9875255823135376,grad_norm: 0.7952445730176895, iteration: 449172
loss: 1.1036914587020874,grad_norm: 0.9023699697069042, iteration: 449173
loss: 1.0795648097991943,grad_norm: 0.9999991323055416, iteration: 449174
loss: 0.9809175133705139,grad_norm: 0.7916320121128162, iteration: 449175
loss: 1.033449649810791,grad_norm: 0.8289298995865186, iteration: 449176
loss: 1.06769597530365,grad_norm: 0.7842152260580623, iteration: 449177
loss: 1.1045024394989014,grad_norm: 0.9070703316812175, iteration: 449178
loss: 1.0985275506973267,grad_norm: 0.8730271769246949, iteration: 449179
loss: 1.0190415382385254,grad_norm: 0.8833854068630088, iteration: 449180
loss: 0.9619994163513184,grad_norm: 0.7644166440040727, iteration: 449181
loss: 1.0072945356369019,grad_norm: 0.727942621116113, iteration: 449182
loss: 1.0671054124832153,grad_norm: 0.7390311850289097, iteration: 449183
loss: 1.0831058025360107,grad_norm: 0.9826683434718482, iteration: 449184
loss: 1.0670199394226074,grad_norm: 0.7766341517519295, iteration: 449185
loss: 1.0273898839950562,grad_norm: 0.9999989882990378, iteration: 449186
loss: 1.0349640846252441,grad_norm: 0.9999992465392686, iteration: 449187
loss: 1.012528657913208,grad_norm: 0.9999996119250071, iteration: 449188
loss: 1.1631078720092773,grad_norm: 0.9999992296078382, iteration: 449189
loss: 1.1075513362884521,grad_norm: 0.7957749493424949, iteration: 449190
loss: 0.991088330745697,grad_norm: 0.9061173720201459, iteration: 449191
loss: 0.9827908277511597,grad_norm: 0.8602696452250191, iteration: 449192
loss: 1.0356543064117432,grad_norm: 0.7312507179568215, iteration: 449193
loss: 1.0156140327453613,grad_norm: 0.9999998114153471, iteration: 449194
loss: 1.0008573532104492,grad_norm: 0.7419613320906615, iteration: 449195
loss: 1.0001300573349,grad_norm: 0.9999990362041556, iteration: 449196
loss: 1.0113203525543213,grad_norm: 0.6861000390909052, iteration: 449197
loss: 1.0751733779907227,grad_norm: 0.8932965537148753, iteration: 449198
loss: 1.010140299797058,grad_norm: 0.8925692688132003, iteration: 449199
loss: 1.0311282873153687,grad_norm: 0.7147058662222063, iteration: 449200
loss: 1.0025527477264404,grad_norm: 0.8050426749070206, iteration: 449201
loss: 1.0655795335769653,grad_norm: 0.7894197124631983, iteration: 449202
loss: 1.0395472049713135,grad_norm: 0.9999989174625695, iteration: 449203
loss: 1.0700994729995728,grad_norm: 0.6168236827466193, iteration: 449204
loss: 1.0178956985473633,grad_norm: 0.8658583055955021, iteration: 449205
loss: 1.008013367652893,grad_norm: 0.9807868906128823, iteration: 449206
loss: 0.9842689633369446,grad_norm: 0.8664098314666653, iteration: 449207
loss: 1.003489375114441,grad_norm: 0.8043569972618763, iteration: 449208
loss: 1.007215976715088,grad_norm: 0.7851733868374411, iteration: 449209
loss: 1.048439860343933,grad_norm: 0.7989233347950778, iteration: 449210
loss: 1.1107558012008667,grad_norm: 0.9999991243826571, iteration: 449211
loss: 1.153978943824768,grad_norm: 0.8270136316806843, iteration: 449212
loss: 1.0373784303665161,grad_norm: 0.9368794211273522, iteration: 449213
loss: 0.9744264483451843,grad_norm: 0.7279018946963367, iteration: 449214
loss: 1.0015190839767456,grad_norm: 0.7605368433739205, iteration: 449215
loss: 0.9866296648979187,grad_norm: 0.7649146614204474, iteration: 449216
loss: 1.0087159872055054,grad_norm: 0.8756560229536194, iteration: 449217
loss: 0.9882474541664124,grad_norm: 0.8550869173047447, iteration: 449218
loss: 1.0095930099487305,grad_norm: 0.8014005376377423, iteration: 449219
loss: 1.0651628971099854,grad_norm: 0.870996162531129, iteration: 449220
loss: 1.0250999927520752,grad_norm: 0.9174437158236638, iteration: 449221
loss: 1.1015994548797607,grad_norm: 0.9999995531466804, iteration: 449222
loss: 1.0105171203613281,grad_norm: 0.7435056134916223, iteration: 449223
loss: 1.0419492721557617,grad_norm: 0.79496060965244, iteration: 449224
loss: 1.0572450160980225,grad_norm: 0.9999999407236286, iteration: 449225
loss: 1.0491282939910889,grad_norm: 0.8458644819500918, iteration: 449226
loss: 1.0038384199142456,grad_norm: 0.9999997695488905, iteration: 449227
loss: 1.0893851518630981,grad_norm: 0.8574409805362065, iteration: 449228
loss: 1.0010344982147217,grad_norm: 0.9901641865941115, iteration: 449229
loss: 1.0088499784469604,grad_norm: 0.7405976925359269, iteration: 449230
loss: 0.9836512207984924,grad_norm: 0.9831346568554125, iteration: 449231
loss: 1.121138334274292,grad_norm: 0.9999995902701014, iteration: 449232
loss: 1.0077214241027832,grad_norm: 0.8103808695879601, iteration: 449233
loss: 1.0134727954864502,grad_norm: 0.7307786057888118, iteration: 449234
loss: 1.0267482995986938,grad_norm: 0.931528158250576, iteration: 449235
loss: 0.9798479080200195,grad_norm: 0.8232226300457742, iteration: 449236
loss: 0.9684953093528748,grad_norm: 0.5780991825982599, iteration: 449237
loss: 0.9865435361862183,grad_norm: 0.7321256438388969, iteration: 449238
loss: 1.015104055404663,grad_norm: 0.9999996238103833, iteration: 449239
loss: 0.9937663674354553,grad_norm: 0.7883797505888229, iteration: 449240
loss: 0.986034095287323,grad_norm: 0.8008597769419843, iteration: 449241
loss: 0.9465873837471008,grad_norm: 0.7793122997987668, iteration: 449242
loss: 1.002894639968872,grad_norm: 0.7019188766269374, iteration: 449243
loss: 1.0254393815994263,grad_norm: 0.6682682807216817, iteration: 449244
loss: 0.981778085231781,grad_norm: 0.8447768256991808, iteration: 449245
loss: 1.036080241203308,grad_norm: 0.9457985959962193, iteration: 449246
loss: 0.9745914936065674,grad_norm: 0.9617362344751152, iteration: 449247
loss: 1.0070549249649048,grad_norm: 0.7711290689279056, iteration: 449248
loss: 1.1290255784988403,grad_norm: 0.9999992819608595, iteration: 449249
loss: 1.019408106803894,grad_norm: 0.6400783010314361, iteration: 449250
loss: 1.0343403816223145,grad_norm: 0.9556647955196862, iteration: 449251
loss: 0.9941026568412781,grad_norm: 0.9493239516878554, iteration: 449252
loss: 1.0277191400527954,grad_norm: 0.7277376467905825, iteration: 449253
loss: 0.9913845658302307,grad_norm: 0.7449378690218618, iteration: 449254
loss: 1.069035530090332,grad_norm: 0.9999997898928262, iteration: 449255
loss: 0.9960538148880005,grad_norm: 0.6654083150871443, iteration: 449256
loss: 0.9914767742156982,grad_norm: 0.6698901843205587, iteration: 449257
loss: 1.0935133695602417,grad_norm: 0.7772836878456119, iteration: 449258
loss: 1.01736581325531,grad_norm: 0.8005363869236521, iteration: 449259
loss: 0.9835721850395203,grad_norm: 0.8540803477920804, iteration: 449260
loss: 1.022567868232727,grad_norm: 0.9999993100325852, iteration: 449261
loss: 1.0074063539505005,grad_norm: 0.8016326502795675, iteration: 449262
loss: 1.0563448667526245,grad_norm: 0.9999991058673519, iteration: 449263
loss: 1.0451409816741943,grad_norm: 0.8585437644219155, iteration: 449264
loss: 1.0629713535308838,grad_norm: 0.8099436312422849, iteration: 449265
loss: 0.9987050890922546,grad_norm: 0.9075405873828879, iteration: 449266
loss: 1.0315074920654297,grad_norm: 0.8580802464708267, iteration: 449267
loss: 1.1952292919158936,grad_norm: 0.9999996231849368, iteration: 449268
loss: 0.99974524974823,grad_norm: 0.8325579945793261, iteration: 449269
loss: 0.9983989596366882,grad_norm: 0.8788858793105997, iteration: 449270
loss: 1.0210727453231812,grad_norm: 0.6685280970458515, iteration: 449271
loss: 1.0465394258499146,grad_norm: 0.9339035871009873, iteration: 449272
loss: 0.9917104840278625,grad_norm: 0.7831490993865837, iteration: 449273
loss: 1.1279572248458862,grad_norm: 0.8007016113961335, iteration: 449274
loss: 0.9713556170463562,grad_norm: 0.7420413953564285, iteration: 449275
loss: 1.0132768154144287,grad_norm: 0.931817987494785, iteration: 449276
loss: 1.0912060737609863,grad_norm: 0.9999992388059913, iteration: 449277
loss: 1.2862868309020996,grad_norm: 0.9999993171445543, iteration: 449278
loss: 0.9784953594207764,grad_norm: 0.8544664909799935, iteration: 449279
loss: 0.98405522108078,grad_norm: 0.9732910865173366, iteration: 449280
loss: 1.005388855934143,grad_norm: 0.8998141111026144, iteration: 449281
loss: 1.006619930267334,grad_norm: 0.9084957032256192, iteration: 449282
loss: 1.141623616218567,grad_norm: 0.9999998102953722, iteration: 449283
loss: 1.009351134300232,grad_norm: 0.7030731222004787, iteration: 449284
loss: 1.0131994485855103,grad_norm: 0.7973705293480509, iteration: 449285
loss: 1.0163590908050537,grad_norm: 0.6089325862338767, iteration: 449286
loss: 0.996313214302063,grad_norm: 0.9999992837199112, iteration: 449287
loss: 1.03598952293396,grad_norm: 0.8088093880244566, iteration: 449288
loss: 1.0697084665298462,grad_norm: 0.8724169453496665, iteration: 449289
loss: 1.0944554805755615,grad_norm: 0.9999999619246012, iteration: 449290
loss: 1.0375537872314453,grad_norm: 0.9999993464424426, iteration: 449291
loss: 1.135073184967041,grad_norm: 0.7105318832530271, iteration: 449292
loss: 1.027450442314148,grad_norm: 0.7798585923539062, iteration: 449293
loss: 1.085040807723999,grad_norm: 0.9999990719342358, iteration: 449294
loss: 0.9858462810516357,grad_norm: 0.9153885284608064, iteration: 449295
loss: 1.0563806295394897,grad_norm: 0.8502395640218955, iteration: 449296
loss: 1.204697608947754,grad_norm: 0.9999993285091976, iteration: 449297
loss: 1.0771139860153198,grad_norm: 0.9999991280595146, iteration: 449298
loss: 1.1346194744110107,grad_norm: 0.9999997051347778, iteration: 449299
loss: 1.0816258192062378,grad_norm: 0.9999994211727646, iteration: 449300
loss: 0.9888917803764343,grad_norm: 0.7800953023464323, iteration: 449301
loss: 1.0691912174224854,grad_norm: 0.7026285036528963, iteration: 449302
loss: 1.0085211992263794,grad_norm: 0.9999998260432167, iteration: 449303
loss: 1.0830053091049194,grad_norm: 0.814452242430281, iteration: 449304
loss: 1.0653488636016846,grad_norm: 0.9999993288639003, iteration: 449305
loss: 0.9741266965866089,grad_norm: 0.7735762151185399, iteration: 449306
loss: 1.0413384437561035,grad_norm: 0.7089189744887046, iteration: 449307
loss: 1.092788815498352,grad_norm: 0.9825380917597298, iteration: 449308
loss: 1.0210299491882324,grad_norm: 0.673630137308259, iteration: 449309
loss: 1.1019299030303955,grad_norm: 0.9470821481352145, iteration: 449310
loss: 1.0604885816574097,grad_norm: 0.7457600129599191, iteration: 449311
loss: 1.160715937614441,grad_norm: 0.9999999387466598, iteration: 449312
loss: 1.041854977607727,grad_norm: 0.8640653387220226, iteration: 449313
loss: 1.0784595012664795,grad_norm: 0.9999999229564206, iteration: 449314
loss: 1.0809279680252075,grad_norm: 0.9999998972231058, iteration: 449315
loss: 1.054189920425415,grad_norm: 0.8192737922450216, iteration: 449316
loss: 1.0997942686080933,grad_norm: 0.9025050840864137, iteration: 449317
loss: 1.032459020614624,grad_norm: 0.9429534192970258, iteration: 449318
loss: 1.0122947692871094,grad_norm: 0.9441016758055504, iteration: 449319
loss: 1.0445419549942017,grad_norm: 0.9999998345941052, iteration: 449320
loss: 1.075183629989624,grad_norm: 0.9999997970668836, iteration: 449321
loss: 1.0526599884033203,grad_norm: 0.9999992666569343, iteration: 449322
loss: 1.10752272605896,grad_norm: 0.9999991193372695, iteration: 449323
loss: 1.013022780418396,grad_norm: 0.9999992769864595, iteration: 449324
loss: 1.1218831539154053,grad_norm: 0.7630305166502855, iteration: 449325
loss: 1.026059865951538,grad_norm: 0.7593539058906456, iteration: 449326
loss: 1.08970046043396,grad_norm: 0.999999462147474, iteration: 449327
loss: 1.0097134113311768,grad_norm: 0.9999993861747724, iteration: 449328
loss: 1.0903058052062988,grad_norm: 0.8976785807319532, iteration: 449329
loss: 1.0035308599472046,grad_norm: 0.999999820020132, iteration: 449330
loss: 1.0886598825454712,grad_norm: 0.9999991747350836, iteration: 449331
loss: 1.2032512426376343,grad_norm: 0.9537778595320192, iteration: 449332
loss: 1.0738658905029297,grad_norm: 0.9999990863377075, iteration: 449333
loss: 1.1753946542739868,grad_norm: 0.9999999744282617, iteration: 449334
loss: 1.1119945049285889,grad_norm: 0.999999663840058, iteration: 449335
loss: 1.0217880010604858,grad_norm: 0.9371964763133497, iteration: 449336
loss: 1.1275758743286133,grad_norm: 0.9999993156023362, iteration: 449337
loss: 1.0159752368927002,grad_norm: 0.8818841697159299, iteration: 449338
loss: 1.2643604278564453,grad_norm: 0.9999994668873183, iteration: 449339
loss: 1.195022702217102,grad_norm: 0.9999997784536027, iteration: 449340
loss: 1.0749406814575195,grad_norm: 0.9999991340663619, iteration: 449341
loss: 1.0304685831069946,grad_norm: 0.9999991212888125, iteration: 449342
loss: 1.0953843593597412,grad_norm: 0.9999997026297242, iteration: 449343
loss: 1.1323975324630737,grad_norm: 0.9999994679358784, iteration: 449344
loss: 1.1326725482940674,grad_norm: 0.8041005377723389, iteration: 449345
loss: 1.1389364004135132,grad_norm: 0.8167681259724056, iteration: 449346
loss: 1.0375146865844727,grad_norm: 0.8121483135211417, iteration: 449347
loss: 1.0743647813796997,grad_norm: 0.9999991352195107, iteration: 449348
loss: 1.0835329294204712,grad_norm: 0.9999998511476443, iteration: 449349
loss: 1.0950148105621338,grad_norm: 0.7269164275855734, iteration: 449350
loss: 1.0407330989837646,grad_norm: 0.925895137562382, iteration: 449351
loss: 1.1314619779586792,grad_norm: 0.8026114575057532, iteration: 449352
loss: 1.0847804546356201,grad_norm: 0.7191365575676664, iteration: 449353
loss: 1.1750773191452026,grad_norm: 0.9238887391881184, iteration: 449354
loss: 1.005796194076538,grad_norm: 0.910378472645048, iteration: 449355
loss: 1.131781816482544,grad_norm: 0.9999991635008413, iteration: 449356
loss: 1.0145410299301147,grad_norm: 0.7987758056507412, iteration: 449357
loss: 1.090096116065979,grad_norm: 0.8373133103000868, iteration: 449358
loss: 1.0848397016525269,grad_norm: 0.9999995679295641, iteration: 449359
loss: 1.0466804504394531,grad_norm: 0.8660215363993302, iteration: 449360
loss: 1.165377140045166,grad_norm: 0.9999995409226949, iteration: 449361
loss: 1.1205428838729858,grad_norm: 0.9999997627629027, iteration: 449362
loss: 1.0725994110107422,grad_norm: 0.9999995024625908, iteration: 449363
loss: 1.1031197309494019,grad_norm: 0.9999991840679328, iteration: 449364
loss: 1.0428146123886108,grad_norm: 0.8482870040194311, iteration: 449365
loss: 1.0626330375671387,grad_norm: 0.8459144098484863, iteration: 449366
loss: 1.1027497053146362,grad_norm: 0.9999995075390401, iteration: 449367
loss: 1.0257683992385864,grad_norm: 0.7636949858133707, iteration: 449368
loss: 1.0254852771759033,grad_norm: 0.9999991819092391, iteration: 449369
loss: 1.0452044010162354,grad_norm: 0.7992781050075575, iteration: 449370
loss: 1.061281442642212,grad_norm: 0.9860343794524837, iteration: 449371
loss: 1.1301497220993042,grad_norm: 0.9999996073349992, iteration: 449372
loss: 1.0940794944763184,grad_norm: 0.8085748543045171, iteration: 449373
loss: 1.167214035987854,grad_norm: 0.9183601314988202, iteration: 449374
loss: 1.2229703664779663,grad_norm: 1.0000000978789485, iteration: 449375
loss: 1.0208559036254883,grad_norm: 0.9999995754857911, iteration: 449376
loss: 1.0554102659225464,grad_norm: 0.8938646082563758, iteration: 449377
loss: 1.0821138620376587,grad_norm: 0.9999991218816506, iteration: 449378
loss: 1.0920093059539795,grad_norm: 0.9419393965910245, iteration: 449379
loss: 1.0784802436828613,grad_norm: 0.7176809479175382, iteration: 449380
loss: 0.9804179072380066,grad_norm: 0.8820678273912234, iteration: 449381
loss: 1.1067869663238525,grad_norm: 0.9999995411450439, iteration: 449382
loss: 0.9918327331542969,grad_norm: 0.7837024140301381, iteration: 449383
loss: 1.4368622303009033,grad_norm: 0.9999995944985982, iteration: 449384
loss: 1.1179276704788208,grad_norm: 0.9999996134646013, iteration: 449385
loss: 1.1534286737442017,grad_norm: 0.9999991385642958, iteration: 449386
loss: 1.0398658514022827,grad_norm: 0.8713816026808506, iteration: 449387
loss: 1.018675446510315,grad_norm: 0.676928800281203, iteration: 449388
loss: 1.0704938173294067,grad_norm: 0.9999990878822484, iteration: 449389
loss: 1.1111817359924316,grad_norm: 0.8837907613731844, iteration: 449390
loss: 0.9589022397994995,grad_norm: 0.7512499259576477, iteration: 449391
loss: 1.0859155654907227,grad_norm: 0.9947990124622009, iteration: 449392
loss: 1.102164387702942,grad_norm: 0.9999995398791462, iteration: 449393
loss: 1.000875473022461,grad_norm: 0.8723792490449209, iteration: 449394
loss: 1.1489498615264893,grad_norm: 0.8538831350720076, iteration: 449395
loss: 1.1966164112091064,grad_norm: 0.9999994821982251, iteration: 449396
loss: 1.053598165512085,grad_norm: 0.9999991709238759, iteration: 449397
loss: 1.143673062324524,grad_norm: 0.9999996841627173, iteration: 449398
loss: 1.089721441268921,grad_norm: 0.9999991764921544, iteration: 449399
loss: 1.1123732328414917,grad_norm: 0.9926066465654059, iteration: 449400
loss: 1.1252127885818481,grad_norm: 0.9999995012962605, iteration: 449401
loss: 1.1602673530578613,grad_norm: 0.9999999810963474, iteration: 449402
loss: 1.0679858922958374,grad_norm: 0.9999997620408456, iteration: 449403
loss: 0.9671992659568787,grad_norm: 0.8071065327568812, iteration: 449404
loss: 0.9999902248382568,grad_norm: 0.7328321264538267, iteration: 449405
loss: 1.03011155128479,grad_norm: 0.9999995954884545, iteration: 449406
loss: 0.9946720600128174,grad_norm: 0.8927539203892765, iteration: 449407
loss: 1.0515000820159912,grad_norm: 0.9999994526925255, iteration: 449408
loss: 1.035134196281433,grad_norm: 0.7761352332219519, iteration: 449409
loss: 1.1074045896530151,grad_norm: 0.921985230963484, iteration: 449410
loss: 0.9991911053657532,grad_norm: 0.9999996062526424, iteration: 449411
loss: 1.0039910078048706,grad_norm: 0.6811458677020983, iteration: 449412
loss: 0.981708288192749,grad_norm: 0.9999993641998435, iteration: 449413
loss: 1.046287178993225,grad_norm: 0.9999990140222571, iteration: 449414
loss: 1.0568220615386963,grad_norm: 0.7807155483276896, iteration: 449415
loss: 1.1654598712921143,grad_norm: 0.999999056347996, iteration: 449416
loss: 1.031089425086975,grad_norm: 0.999999106804232, iteration: 449417
loss: 1.0853196382522583,grad_norm: 0.9999998434674177, iteration: 449418
loss: 1.033251404762268,grad_norm: 0.7787511263179845, iteration: 449419
loss: 1.0924673080444336,grad_norm: 0.9999993310965657, iteration: 449420
loss: 1.055862307548523,grad_norm: 0.8495581558844176, iteration: 449421
loss: 0.9924209117889404,grad_norm: 0.6109369316573243, iteration: 449422
loss: 1.033101201057434,grad_norm: 0.9999996571946989, iteration: 449423
loss: 1.042798399925232,grad_norm: 0.6572236787362634, iteration: 449424
loss: 1.006554365158081,grad_norm: 0.999999891836147, iteration: 449425
loss: 1.096102237701416,grad_norm: 0.9999993212369024, iteration: 449426
loss: 1.006848692893982,grad_norm: 0.7844104070119043, iteration: 449427
loss: 1.119080901145935,grad_norm: 0.8922682385423106, iteration: 449428
loss: 1.0313472747802734,grad_norm: 0.9999997619375012, iteration: 449429
loss: 1.0369154214859009,grad_norm: 0.7608718147409727, iteration: 449430
loss: 1.0240044593811035,grad_norm: 0.9999990303488058, iteration: 449431
loss: 1.0633467435836792,grad_norm: 0.9999990955549037, iteration: 449432
loss: 1.0246858596801758,grad_norm: 0.9999995470817016, iteration: 449433
loss: 1.0252869129180908,grad_norm: 0.7331351086235934, iteration: 449434
loss: 0.9604712724685669,grad_norm: 0.8951247154336258, iteration: 449435
loss: 0.9839917421340942,grad_norm: 0.672203548879038, iteration: 449436
loss: 0.9598101377487183,grad_norm: 0.8236856344142828, iteration: 449437
loss: 1.0370441675186157,grad_norm: 0.9999998452434822, iteration: 449438
loss: 1.0142520666122437,grad_norm: 0.8489880582233434, iteration: 449439
loss: 1.0735985040664673,grad_norm: 0.9999998144301739, iteration: 449440
loss: 1.1005918979644775,grad_norm: 0.9999995924595448, iteration: 449441
loss: 1.0176738500595093,grad_norm: 0.912345326131275, iteration: 449442
loss: 1.0054835081100464,grad_norm: 0.667333085745753, iteration: 449443
loss: 1.006789207458496,grad_norm: 0.9180542793424857, iteration: 449444
loss: 0.9570134878158569,grad_norm: 0.898958581556915, iteration: 449445
loss: 1.0318418741226196,grad_norm: 0.8024673283963611, iteration: 449446
loss: 0.9817377924919128,grad_norm: 0.708428317417726, iteration: 449447
loss: 1.0248174667358398,grad_norm: 0.8089843878571843, iteration: 449448
loss: 1.0069767236709595,grad_norm: 0.8230500593960074, iteration: 449449
loss: 1.0249948501586914,grad_norm: 0.9999998744331356, iteration: 449450
loss: 1.066339135169983,grad_norm: 0.9999995363375921, iteration: 449451
loss: 1.0874837636947632,grad_norm: 0.9999995432091792, iteration: 449452
loss: 1.0209214687347412,grad_norm: 0.774231110645257, iteration: 449453
loss: 0.9808337092399597,grad_norm: 0.7906694476585028, iteration: 449454
loss: 1.0000298023223877,grad_norm: 0.6968592026302456, iteration: 449455
loss: 1.0008219480514526,grad_norm: 0.8785499105873473, iteration: 449456
loss: 0.9994399547576904,grad_norm: 0.7365969369728962, iteration: 449457
loss: 1.0039664506912231,grad_norm: 0.6765175803131394, iteration: 449458
loss: 0.9958820939064026,grad_norm: 0.8343081656071816, iteration: 449459
loss: 1.0368891954421997,grad_norm: 0.9999992877425785, iteration: 449460
loss: 1.030134916305542,grad_norm: 0.9395544830594414, iteration: 449461
loss: 0.9927388429641724,grad_norm: 0.9999991838757769, iteration: 449462
loss: 1.1198092699050903,grad_norm: 0.9999991212945172, iteration: 449463
loss: 1.0268275737762451,grad_norm: 0.9644305420574043, iteration: 449464
loss: 0.9806567430496216,grad_norm: 0.8708025191892663, iteration: 449465
loss: 1.0263103246688843,grad_norm: 0.7373544199918801, iteration: 449466
loss: 1.0186448097229004,grad_norm: 0.9999991403152289, iteration: 449467
loss: 1.0238884687423706,grad_norm: 0.8701099489066066, iteration: 449468
loss: 0.9943432211875916,grad_norm: 0.9999997184408435, iteration: 449469
loss: 1.0103713274002075,grad_norm: 0.9999990527420369, iteration: 449470
loss: 0.9894564747810364,grad_norm: 0.7855276038679725, iteration: 449471
loss: 1.111539363861084,grad_norm: 0.9999998353318702, iteration: 449472
loss: 0.9789360761642456,grad_norm: 0.8149782062065815, iteration: 449473
loss: 0.997846245765686,grad_norm: 0.6678788170519991, iteration: 449474
loss: 0.9787735342979431,grad_norm: 0.8470200610509826, iteration: 449475
loss: 1.0026472806930542,grad_norm: 0.8180933936112268, iteration: 449476
loss: 1.0207984447479248,grad_norm: 0.7934427013290363, iteration: 449477
loss: 1.1572728157043457,grad_norm: 0.9999999598106106, iteration: 449478
loss: 1.0103754997253418,grad_norm: 0.6552724631370265, iteration: 449479
loss: 0.9787418246269226,grad_norm: 0.6903933978255938, iteration: 449480
loss: 1.1516549587249756,grad_norm: 0.9999997642597264, iteration: 449481
loss: 1.0119372606277466,grad_norm: 0.9999990627686677, iteration: 449482
loss: 1.0053985118865967,grad_norm: 0.7185312606385855, iteration: 449483
loss: 1.0400429964065552,grad_norm: 0.9999989470421806, iteration: 449484
loss: 1.0349102020263672,grad_norm: 0.9999997586047973, iteration: 449485
loss: 0.9811599254608154,grad_norm: 0.9999997700216934, iteration: 449486
loss: 1.0231127738952637,grad_norm: 0.6263439918823224, iteration: 449487
loss: 0.9782809615135193,grad_norm: 0.7783163922853179, iteration: 449488
loss: 1.0270576477050781,grad_norm: 0.8151795947023646, iteration: 449489
loss: 0.9895581603050232,grad_norm: 0.7310165783411847, iteration: 449490
loss: 1.0131932497024536,grad_norm: 0.7607907503206253, iteration: 449491
loss: 1.0019172430038452,grad_norm: 0.9999989878561304, iteration: 449492
loss: 0.9500104188919067,grad_norm: 0.7884918309819993, iteration: 449493
loss: 0.9900868535041809,grad_norm: 0.7686174382864597, iteration: 449494
loss: 0.9992994070053101,grad_norm: 0.9999994036799107, iteration: 449495
loss: 0.985940158367157,grad_norm: 0.7033862648285667, iteration: 449496
loss: 0.997683048248291,grad_norm: 0.8840830042086706, iteration: 449497
loss: 1.0455195903778076,grad_norm: 0.7836429105415853, iteration: 449498
loss: 1.0861164331436157,grad_norm: 0.9999997579139194, iteration: 449499
loss: 0.9994776844978333,grad_norm: 0.9999990115474015, iteration: 449500
loss: 0.9997006058692932,grad_norm: 0.7724035446269661, iteration: 449501
loss: 1.0025328397750854,grad_norm: 0.7908466830169233, iteration: 449502
loss: 0.9757700562477112,grad_norm: 0.8083790041355825, iteration: 449503
loss: 1.0177098512649536,grad_norm: 0.719662242305204, iteration: 449504
loss: 1.0096991062164307,grad_norm: 0.8673405341460593, iteration: 449505
loss: 0.9797716736793518,grad_norm: 0.7294576283688743, iteration: 449506
loss: 1.0538415908813477,grad_norm: 0.9999993183547795, iteration: 449507
loss: 1.0275332927703857,grad_norm: 0.7271953627468825, iteration: 449508
loss: 1.012897253036499,grad_norm: 0.8227871707805302, iteration: 449509
loss: 0.970617949962616,grad_norm: 0.7544021991548618, iteration: 449510
loss: 0.9868939518928528,grad_norm: 0.8329064585574288, iteration: 449511
loss: 1.0621514320373535,grad_norm: 0.9613697637072756, iteration: 449512
loss: 0.9570516347885132,grad_norm: 0.7455050912845309, iteration: 449513
loss: 1.0165290832519531,grad_norm: 0.7833832574467247, iteration: 449514
loss: 0.9751112461090088,grad_norm: 0.8245104500781891, iteration: 449515
loss: 0.9821141362190247,grad_norm: 0.7459380790656124, iteration: 449516
loss: 1.0049861669540405,grad_norm: 0.9177429172757529, iteration: 449517
loss: 0.9786170721054077,grad_norm: 0.6704138843282176, iteration: 449518
loss: 1.0061250925064087,grad_norm: 0.8107885743402713, iteration: 449519
loss: 0.9932815432548523,grad_norm: 0.9999991317054036, iteration: 449520
loss: 0.9739018678665161,grad_norm: 0.666579381400291, iteration: 449521
loss: 0.994918704032898,grad_norm: 0.9841275498008285, iteration: 449522
loss: 1.0280847549438477,grad_norm: 0.9341793886039721, iteration: 449523
loss: 1.1645534038543701,grad_norm: 0.7874026123878949, iteration: 449524
loss: 1.0338306427001953,grad_norm: 0.9999996308093948, iteration: 449525
loss: 1.0137875080108643,grad_norm: 0.8682013156781064, iteration: 449526
loss: 0.9820519685745239,grad_norm: 0.7579562883146327, iteration: 449527
loss: 1.040405511856079,grad_norm: 0.9801470031037516, iteration: 449528
loss: 1.0220867395401,grad_norm: 0.784785288911178, iteration: 449529
loss: 1.0010813474655151,grad_norm: 0.8861118945280999, iteration: 449530
loss: 1.0693293809890747,grad_norm: 0.9228834310374555, iteration: 449531
loss: 1.054226040840149,grad_norm: 0.999999205595506, iteration: 449532
loss: 0.9894731640815735,grad_norm: 0.7506786762881753, iteration: 449533
loss: 1.067849040031433,grad_norm: 0.8389183165373173, iteration: 449534
loss: 1.0292398929595947,grad_norm: 0.7959400596701913, iteration: 449535
loss: 1.0240074396133423,grad_norm: 0.9999997789127848, iteration: 449536
loss: 0.9959521293640137,grad_norm: 0.7521578193650181, iteration: 449537
loss: 1.0216643810272217,grad_norm: 0.8844547451334692, iteration: 449538
loss: 0.9957959651947021,grad_norm: 0.8169633348511512, iteration: 449539
loss: 1.0794917345046997,grad_norm: 0.9999992580669553, iteration: 449540
loss: 0.9753343462944031,grad_norm: 0.8838996591255538, iteration: 449541
loss: 0.9792131781578064,grad_norm: 0.7447712137714527, iteration: 449542
loss: 1.0802403688430786,grad_norm: 0.9170310093947097, iteration: 449543
loss: 1.0653587579727173,grad_norm: 0.999999885633105, iteration: 449544
loss: 1.0107306241989136,grad_norm: 0.7341437199134322, iteration: 449545
loss: 1.067224144935608,grad_norm: 0.8184969388875504, iteration: 449546
loss: 0.994492769241333,grad_norm: 0.7261106251811028, iteration: 449547
loss: 1.0435463190078735,grad_norm: 0.8413326179131531, iteration: 449548
loss: 1.0080913305282593,grad_norm: 0.8790477937178074, iteration: 449549
loss: 0.9602924585342407,grad_norm: 0.8294378488554399, iteration: 449550
loss: 0.9787901639938354,grad_norm: 0.8157918344938123, iteration: 449551
loss: 1.0088396072387695,grad_norm: 0.8190318064238202, iteration: 449552
loss: 0.9710067510604858,grad_norm: 0.8176479011621448, iteration: 449553
loss: 1.0736260414123535,grad_norm: 0.8483045633960546, iteration: 449554
loss: 1.0212178230285645,grad_norm: 0.6647552144106894, iteration: 449555
loss: 0.9758968353271484,grad_norm: 0.7120435626303788, iteration: 449556
loss: 1.0580135583877563,grad_norm: 1.0000000151386672, iteration: 449557
loss: 0.9841039180755615,grad_norm: 0.7251864888443187, iteration: 449558
loss: 1.1162939071655273,grad_norm: 0.9999999953285197, iteration: 449559
loss: 1.0624085664749146,grad_norm: 0.9999994780571776, iteration: 449560
loss: 1.03071928024292,grad_norm: 0.8298035943813281, iteration: 449561
loss: 0.9961410760879517,grad_norm: 0.832208453721213, iteration: 449562
loss: 1.0623135566711426,grad_norm: 0.8224992554764011, iteration: 449563
loss: 1.0464833974838257,grad_norm: 0.8955570995964998, iteration: 449564
loss: 1.0474998950958252,grad_norm: 0.9999990370777785, iteration: 449565
loss: 0.9666916728019714,grad_norm: 0.8841417434259069, iteration: 449566
loss: 1.0553256273269653,grad_norm: 0.9649612764517116, iteration: 449567
loss: 1.0295261144638062,grad_norm: 0.7711235609726992, iteration: 449568
loss: 1.0199170112609863,grad_norm: 0.855772926793127, iteration: 449569
loss: 1.1421035528182983,grad_norm: 0.9999997374058587, iteration: 449570
loss: 1.0239876508712769,grad_norm: 0.9470249339551952, iteration: 449571
loss: 0.9836394786834717,grad_norm: 0.6168882333147209, iteration: 449572
loss: 1.0520529747009277,grad_norm: 0.9240488169007507, iteration: 449573
loss: 1.089181661605835,grad_norm: 0.9999994811555502, iteration: 449574
loss: 1.0761951208114624,grad_norm: 0.8764501621766776, iteration: 449575
loss: 1.1151734590530396,grad_norm: 0.9999992738187804, iteration: 449576
loss: 1.0485988855361938,grad_norm: 0.8270809886897388, iteration: 449577
loss: 1.0864779949188232,grad_norm: 0.9999992175719516, iteration: 449578
loss: 0.9964816570281982,grad_norm: 0.999999325062417, iteration: 449579
loss: 1.019178867340088,grad_norm: 0.7413541353620349, iteration: 449580
loss: 1.0204713344573975,grad_norm: 0.9122842040873618, iteration: 449581
loss: 1.0483016967773438,grad_norm: 0.8138133963634524, iteration: 449582
loss: 1.3368430137634277,grad_norm: 0.9999998687176856, iteration: 449583
loss: 1.0734128952026367,grad_norm: 0.9999993325194814, iteration: 449584
loss: 1.1578339338302612,grad_norm: 0.8454641110959082, iteration: 449585
loss: 1.0577102899551392,grad_norm: 0.9999993952664918, iteration: 449586
loss: 1.0414519309997559,grad_norm: 0.9035925332192832, iteration: 449587
loss: 1.0841996669769287,grad_norm: 0.999999179492678, iteration: 449588
loss: 1.1085827350616455,grad_norm: 0.9999997977878844, iteration: 449589
loss: 1.1991382837295532,grad_norm: 0.9999998742262118, iteration: 449590
loss: 1.126910924911499,grad_norm: 0.8567495475368494, iteration: 449591
loss: 1.0608404874801636,grad_norm: 0.8183599225204047, iteration: 449592
loss: 1.0502657890319824,grad_norm: 0.9999994767789108, iteration: 449593
loss: 1.1081668138504028,grad_norm: 0.9999993144453375, iteration: 449594
loss: 1.187386155128479,grad_norm: 0.9999999165380417, iteration: 449595
loss: 1.2515130043029785,grad_norm: 0.999999650524628, iteration: 449596
loss: 1.2701215744018555,grad_norm: 0.9999991953218135, iteration: 449597
loss: 1.236639380455017,grad_norm: 0.9999997848144165, iteration: 449598
loss: 0.9750891923904419,grad_norm: 0.9999992668861947, iteration: 449599
loss: 1.1134713888168335,grad_norm: 0.9999998721486897, iteration: 449600
loss: 1.0225569009780884,grad_norm: 0.8121765188266972, iteration: 449601
loss: 1.0388109683990479,grad_norm: 0.9999996312159158, iteration: 449602
loss: 1.0306682586669922,grad_norm: 0.9999990762284193, iteration: 449603
loss: 1.1006633043289185,grad_norm: 0.8936051741823553, iteration: 449604
loss: 1.0398216247558594,grad_norm: 0.8555027588362878, iteration: 449605
loss: 1.0865213871002197,grad_norm: 0.9999991498613652, iteration: 449606
loss: 1.0688843727111816,grad_norm: 0.9999991765367383, iteration: 449607
loss: 1.0164870023727417,grad_norm: 0.7527868474687834, iteration: 449608
loss: 1.0801949501037598,grad_norm: 1.0000000203762915, iteration: 449609
loss: 1.048200249671936,grad_norm: 0.9999996550429793, iteration: 449610
loss: 1.0372828245162964,grad_norm: 0.7838416359101914, iteration: 449611
loss: 1.0665746927261353,grad_norm: 0.9999998606627758, iteration: 449612
loss: 1.0768412351608276,grad_norm: 0.820423996843575, iteration: 449613
loss: 1.1023168563842773,grad_norm: 0.9999992442111171, iteration: 449614
loss: 1.0869745016098022,grad_norm: 0.9999990749331242, iteration: 449615
loss: 0.9532821178436279,grad_norm: 0.9999991194412148, iteration: 449616
loss: 1.0396240949630737,grad_norm: 0.9999999325918696, iteration: 449617
loss: 1.0648747682571411,grad_norm: 0.9700697473397967, iteration: 449618
loss: 1.0731005668640137,grad_norm: 0.9999997207350867, iteration: 449619
loss: 1.0664033889770508,grad_norm: 0.9999998021774027, iteration: 449620
loss: 1.1239309310913086,grad_norm: 0.9999997401258834, iteration: 449621
loss: 1.0774626731872559,grad_norm: 0.9999997588989943, iteration: 449622
loss: 1.1077234745025635,grad_norm: 0.999999245008686, iteration: 449623
loss: 1.0565921068191528,grad_norm: 0.7747513541368782, iteration: 449624
loss: 1.1035010814666748,grad_norm: 0.9999992271128304, iteration: 449625
loss: 0.9361388087272644,grad_norm: 0.9999990915021236, iteration: 449626
loss: 1.0070818662643433,grad_norm: 0.7230530677466441, iteration: 449627
loss: 1.0026979446411133,grad_norm: 0.7517484048228208, iteration: 449628
loss: 1.0114458799362183,grad_norm: 0.9999990298724093, iteration: 449629
loss: 1.0801922082901,grad_norm: 0.9999996964916934, iteration: 449630
loss: 1.0367218255996704,grad_norm: 0.9239172292454645, iteration: 449631
loss: 1.0143098831176758,grad_norm: 0.7739908157107882, iteration: 449632
loss: 1.0196315050125122,grad_norm: 0.7783994822669922, iteration: 449633
loss: 1.126505970954895,grad_norm: 0.9999998512095669, iteration: 449634
loss: 1.0117998123168945,grad_norm: 0.6050800460951277, iteration: 449635
loss: 1.0114009380340576,grad_norm: 0.6697305498574698, iteration: 449636
loss: 1.0777047872543335,grad_norm: 0.9999999868453399, iteration: 449637
loss: 1.082991600036621,grad_norm: 0.9999989663650249, iteration: 449638
loss: 1.0338308811187744,grad_norm: 0.7584082233730822, iteration: 449639
loss: 1.2129554748535156,grad_norm: 0.9999992281359187, iteration: 449640
loss: 0.9936692714691162,grad_norm: 0.973671428899453, iteration: 449641
loss: 1.0808954238891602,grad_norm: 0.9999991996131753, iteration: 449642
loss: 1.0118045806884766,grad_norm: 0.8616961984001738, iteration: 449643
loss: 0.9892604351043701,grad_norm: 0.976916915447971, iteration: 449644
loss: 1.1631767749786377,grad_norm: 0.9999992572257038, iteration: 449645
loss: 1.0637568235397339,grad_norm: 0.9457158339024699, iteration: 449646
loss: 0.9772555828094482,grad_norm: 0.6839373948984623, iteration: 449647
loss: 0.9883333444595337,grad_norm: 0.999999904188014, iteration: 449648
loss: 1.1348589658737183,grad_norm: 0.9999999744327751, iteration: 449649
loss: 1.0942779779434204,grad_norm: 0.9999991457171203, iteration: 449650
loss: 1.036757230758667,grad_norm: 0.9999990688991501, iteration: 449651
loss: 1.0299644470214844,grad_norm: 0.9999994179971247, iteration: 449652
loss: 1.0619558095932007,grad_norm: 0.7677458693254315, iteration: 449653
loss: 1.075340986251831,grad_norm: 0.9890538457446735, iteration: 449654
loss: 1.050176739692688,grad_norm: 0.8562617957016748, iteration: 449655
loss: 1.074020266532898,grad_norm: 0.9999998935984089, iteration: 449656
loss: 1.0014435052871704,grad_norm: 0.9857040449481207, iteration: 449657
loss: 1.1309033632278442,grad_norm: 0.9999993332878482, iteration: 449658
loss: 1.1475039720535278,grad_norm: 0.9456686889323516, iteration: 449659
loss: 1.192231297492981,grad_norm: 0.9999998643658136, iteration: 449660
loss: 1.1807801723480225,grad_norm: 0.9999998957357068, iteration: 449661
loss: 0.9911914467811584,grad_norm: 0.8368197393296182, iteration: 449662
loss: 0.9816669821739197,grad_norm: 0.8733503612651735, iteration: 449663
loss: 1.2162566184997559,grad_norm: 0.8575696888466023, iteration: 449664
loss: 1.0059843063354492,grad_norm: 0.9999999498661922, iteration: 449665
loss: 1.1243882179260254,grad_norm: 0.9999996534985414, iteration: 449666
loss: 1.0599732398986816,grad_norm: 0.8448879099133494, iteration: 449667
loss: 1.0423662662506104,grad_norm: 0.6993001209663577, iteration: 449668
loss: 1.03214430809021,grad_norm: 0.9000129769649623, iteration: 449669
loss: 0.9761766791343689,grad_norm: 0.8483644702753731, iteration: 449670
loss: 1.022172212600708,grad_norm: 0.5884919487618897, iteration: 449671
loss: 0.9473900198936462,grad_norm: 0.7763281094995226, iteration: 449672
loss: 1.0226649045944214,grad_norm: 0.7422985113157333, iteration: 449673
loss: 1.0075327157974243,grad_norm: 0.6508606108987041, iteration: 449674
loss: 0.9978076815605164,grad_norm: 0.8236376889035073, iteration: 449675
loss: 1.0184725522994995,grad_norm: 0.8613252794497449, iteration: 449676
loss: 1.0163313150405884,grad_norm: 0.6511299858424956, iteration: 449677
loss: 0.974653959274292,grad_norm: 0.6939370891866774, iteration: 449678
loss: 1.1554745435714722,grad_norm: 0.9999996331558583, iteration: 449679
loss: 1.029685616493225,grad_norm: 0.8982534200015239, iteration: 449680
loss: 1.0750458240509033,grad_norm: 0.9999991530603155, iteration: 449681
loss: 1.0405042171478271,grad_norm: 0.8356953680279684, iteration: 449682
loss: 1.0342026948928833,grad_norm: 0.9999994434863717, iteration: 449683
loss: 1.0509008169174194,grad_norm: 0.9448128393210588, iteration: 449684
loss: 0.9598160982131958,grad_norm: 0.8325652535250753, iteration: 449685
loss: 1.0038260221481323,grad_norm: 0.7622000543176807, iteration: 449686
loss: 1.0757845640182495,grad_norm: 0.9999989973056095, iteration: 449687
loss: 0.9886406064033508,grad_norm: 0.9339868057826867, iteration: 449688
loss: 0.9896221160888672,grad_norm: 0.7752345255745399, iteration: 449689
loss: 1.0221400260925293,grad_norm: 0.8179210890506579, iteration: 449690
loss: 0.9859045147895813,grad_norm: 0.8254703690163868, iteration: 449691
loss: 1.04427170753479,grad_norm: 1.000000113637475, iteration: 449692
loss: 1.0105421543121338,grad_norm: 0.8240535873624948, iteration: 449693
loss: 1.0081310272216797,grad_norm: 0.8157295963362318, iteration: 449694
loss: 1.0565565824508667,grad_norm: 0.877360048399055, iteration: 449695
loss: 0.9800807237625122,grad_norm: 0.7222320546997709, iteration: 449696
loss: 1.0028197765350342,grad_norm: 0.8514245734682947, iteration: 449697
loss: 1.0666213035583496,grad_norm: 0.6985718907050916, iteration: 449698
loss: 0.9999301433563232,grad_norm: 0.7404744152522343, iteration: 449699
loss: 0.9790207147598267,grad_norm: 0.839782263335303, iteration: 449700
loss: 1.04037606716156,grad_norm: 0.9999994110144349, iteration: 449701
loss: 1.0459779500961304,grad_norm: 0.8600983822926201, iteration: 449702
loss: 1.020302176475525,grad_norm: 0.9999993367877491, iteration: 449703
loss: 1.032692313194275,grad_norm: 0.8243597812090532, iteration: 449704
loss: 1.018488883972168,grad_norm: 0.7204683041493054, iteration: 449705
loss: 1.2236378192901611,grad_norm: 0.9999999938408094, iteration: 449706
loss: 1.1061595678329468,grad_norm: 0.9999989509952854, iteration: 449707
loss: 0.9829130172729492,grad_norm: 0.8876246135151248, iteration: 449708
loss: 1.019589900970459,grad_norm: 0.9221273368025451, iteration: 449709
loss: 1.0213074684143066,grad_norm: 0.7436703784702902, iteration: 449710
loss: 0.9874312877655029,grad_norm: 0.7531205198569363, iteration: 449711
loss: 1.0719249248504639,grad_norm: 0.7525809840766268, iteration: 449712
loss: 1.0635336637496948,grad_norm: 0.9999994956126043, iteration: 449713
loss: 0.9648860096931458,grad_norm: 0.6911910861125498, iteration: 449714
loss: 1.03244948387146,grad_norm: 0.9999991189844657, iteration: 449715
loss: 0.9830015301704407,grad_norm: 0.7950042367796067, iteration: 449716
loss: 0.9915940761566162,grad_norm: 0.8249287399398398, iteration: 449717
loss: 1.0210521221160889,grad_norm: 0.8334553878932035, iteration: 449718
loss: 0.9784784913063049,grad_norm: 0.7981465238759724, iteration: 449719
loss: 1.040473461151123,grad_norm: 0.9999996197309038, iteration: 449720
loss: 1.0118786096572876,grad_norm: 0.9999990141564932, iteration: 449721
loss: 1.0222978591918945,grad_norm: 0.9999997039547261, iteration: 449722
loss: 1.003335952758789,grad_norm: 0.9999994690750952, iteration: 449723
loss: 1.0010855197906494,grad_norm: 0.7655868905055289, iteration: 449724
loss: 1.002373456954956,grad_norm: 0.9999999097767446, iteration: 449725
loss: 1.0141093730926514,grad_norm: 0.7235803965704298, iteration: 449726
loss: 1.0725607872009277,grad_norm: 0.9999992629693653, iteration: 449727
loss: 1.1263339519500732,grad_norm: 0.9999998304216244, iteration: 449728
loss: 1.0452316999435425,grad_norm: 0.9999995015552179, iteration: 449729
loss: 1.0227789878845215,grad_norm: 0.9999997248016649, iteration: 449730
loss: 0.9704048037528992,grad_norm: 0.7321482981859474, iteration: 449731
loss: 1.081214427947998,grad_norm: 0.7875791846467216, iteration: 449732
loss: 0.9795811176300049,grad_norm: 0.7404580355926839, iteration: 449733
loss: 1.0068690776824951,grad_norm: 0.6554184826752424, iteration: 449734
loss: 1.024205207824707,grad_norm: 0.7370175307736192, iteration: 449735
loss: 1.0044749975204468,grad_norm: 0.8654396155215397, iteration: 449736
loss: 1.0349808931350708,grad_norm: 0.999999267583186, iteration: 449737
loss: 1.0118162631988525,grad_norm: 0.8509885879665436, iteration: 449738
loss: 1.0316224098205566,grad_norm: 0.9029634413694402, iteration: 449739
loss: 1.0512566566467285,grad_norm: 0.8372097792531714, iteration: 449740
loss: 0.9777583479881287,grad_norm: 0.6691633805113333, iteration: 449741
loss: 1.019635796546936,grad_norm: 0.8168743672264251, iteration: 449742
loss: 1.1344295740127563,grad_norm: 0.999999929059743, iteration: 449743
loss: 0.9553027749061584,grad_norm: 0.8760193575150805, iteration: 449744
loss: 1.069015383720398,grad_norm: 0.9999989990240584, iteration: 449745
loss: 1.083307147026062,grad_norm: 0.937936002544093, iteration: 449746
loss: 0.9999150633811951,grad_norm: 0.9476816298099906, iteration: 449747
loss: 0.9577791094779968,grad_norm: 0.767067235447567, iteration: 449748
loss: 1.0193525552749634,grad_norm: 0.8482442584625708, iteration: 449749
loss: 1.0699043273925781,grad_norm: 0.9999991739304018, iteration: 449750
loss: 1.031214714050293,grad_norm: 0.7971554898544844, iteration: 449751
loss: 1.1171443462371826,grad_norm: 0.9999991515248925, iteration: 449752
loss: 0.9983883500099182,grad_norm: 0.7410148812228426, iteration: 449753
loss: 0.9882798790931702,grad_norm: 0.9343153255894314, iteration: 449754
loss: 1.0247979164123535,grad_norm: 0.7109662870534282, iteration: 449755
loss: 1.0139491558074951,grad_norm: 0.7989534675881355, iteration: 449756
loss: 1.091139554977417,grad_norm: 0.9999992421377525, iteration: 449757
loss: 1.008064866065979,grad_norm: 0.6540702833233404, iteration: 449758
loss: 1.085484266281128,grad_norm: 0.9683082369388106, iteration: 449759
loss: 0.9859834909439087,grad_norm: 0.8321529858090059, iteration: 449760
loss: 1.0318278074264526,grad_norm: 0.9999998717009893, iteration: 449761
loss: 1.0005236864089966,grad_norm: 0.8328965131709143, iteration: 449762
loss: 1.1029253005981445,grad_norm: 0.9999994589516198, iteration: 449763
loss: 1.0029228925704956,grad_norm: 0.7031028183142187, iteration: 449764
loss: 1.0056852102279663,grad_norm: 0.676771324690656, iteration: 449765
loss: 0.9556910395622253,grad_norm: 0.6807655653934376, iteration: 449766
loss: 0.988797664642334,grad_norm: 0.9937840751170253, iteration: 449767
loss: 0.992158830165863,grad_norm: 0.7416360238218208, iteration: 449768
loss: 0.9628672003746033,grad_norm: 0.7506781366000643, iteration: 449769
loss: 1.1792231798171997,grad_norm: 0.9999992302755973, iteration: 449770
loss: 1.0510965585708618,grad_norm: 0.8149771366189381, iteration: 449771
loss: 0.9970167279243469,grad_norm: 0.5951803003218188, iteration: 449772
loss: 1.0062721967697144,grad_norm: 0.8321663799701321, iteration: 449773
loss: 0.9810227751731873,grad_norm: 0.7211978462620025, iteration: 449774
loss: 0.9791276454925537,grad_norm: 0.9864040443979988, iteration: 449775
loss: 1.1820746660232544,grad_norm: 0.9999993870957565, iteration: 449776
loss: 0.9910931587219238,grad_norm: 0.9539454773421677, iteration: 449777
loss: 0.9883503317832947,grad_norm: 0.7452448727526497, iteration: 449778
loss: 1.0447524785995483,grad_norm: 0.8647073546381581, iteration: 449779
loss: 1.0509612560272217,grad_norm: 0.9818783052787686, iteration: 449780
loss: 1.0032380819320679,grad_norm: 0.6717895246555101, iteration: 449781
loss: 0.9729018211364746,grad_norm: 0.7681400829367735, iteration: 449782
loss: 1.057390570640564,grad_norm: 0.9999994150087946, iteration: 449783
loss: 1.2356361150741577,grad_norm: 0.9999990874268077, iteration: 449784
loss: 0.9717019200325012,grad_norm: 0.7228715501154253, iteration: 449785
loss: 0.9994889497756958,grad_norm: 0.8073555267842958, iteration: 449786
loss: 1.0016374588012695,grad_norm: 0.711364271670639, iteration: 449787
loss: 1.0558403730392456,grad_norm: 0.666309430183212, iteration: 449788
loss: 1.1595956087112427,grad_norm: 0.941256410168307, iteration: 449789
loss: 1.0018606185913086,grad_norm: 0.8793032160098148, iteration: 449790
loss: 0.9995125532150269,grad_norm: 0.6805237309593815, iteration: 449791
loss: 1.0898735523223877,grad_norm: 0.9999999182586635, iteration: 449792
loss: 1.1823086738586426,grad_norm: 0.9999997006839484, iteration: 449793
loss: 1.1297435760498047,grad_norm: 0.9999994961721764, iteration: 449794
loss: 1.0383397340774536,grad_norm: 0.9999996800745791, iteration: 449795
loss: 1.1809487342834473,grad_norm: 0.9999991125300992, iteration: 449796
loss: 0.9751335382461548,grad_norm: 0.854551368056558, iteration: 449797
loss: 0.9825484752655029,grad_norm: 0.999999471760668, iteration: 449798
loss: 0.9858266711235046,grad_norm: 0.8395288560598224, iteration: 449799
loss: 0.9997445940971375,grad_norm: 0.8149140338678309, iteration: 449800
loss: 1.0519819259643555,grad_norm: 0.9999992701904664, iteration: 449801
loss: 1.1143680810928345,grad_norm: 0.9122340932483084, iteration: 449802
loss: 1.0226093530654907,grad_norm: 0.7296817413465004, iteration: 449803
loss: 0.993046760559082,grad_norm: 1.0000000173398702, iteration: 449804
loss: 0.9529262185096741,grad_norm: 0.8888879605805033, iteration: 449805
loss: 1.011755347251892,grad_norm: 0.7502801950600553, iteration: 449806
loss: 1.0329593420028687,grad_norm: 0.71005487783797, iteration: 449807
loss: 1.0169910192489624,grad_norm: 0.8239024803185407, iteration: 449808
loss: 1.0053484439849854,grad_norm: 0.9999998793062032, iteration: 449809
loss: 1.072709083557129,grad_norm: 0.8702247945057442, iteration: 449810
loss: 1.0717475414276123,grad_norm: 0.9999998896381649, iteration: 449811
loss: 1.025274634361267,grad_norm: 0.7259371657378385, iteration: 449812
loss: 0.993495762348175,grad_norm: 0.8156117382925138, iteration: 449813
loss: 1.0054339170455933,grad_norm: 0.9070408760322595, iteration: 449814
loss: 1.0391899347305298,grad_norm: 0.9999998528618878, iteration: 449815
loss: 0.9831010699272156,grad_norm: 0.999999232485674, iteration: 449816
loss: 0.9798649549484253,grad_norm: 0.9372821383256151, iteration: 449817
loss: 0.9670965671539307,grad_norm: 0.7492695311578372, iteration: 449818
loss: 0.9863795042037964,grad_norm: 0.7665262773081551, iteration: 449819
loss: 1.0265910625457764,grad_norm: 0.7834830826338156, iteration: 449820
loss: 1.072625994682312,grad_norm: 0.9999999852591948, iteration: 449821
loss: 1.0090817213058472,grad_norm: 0.71492693842173, iteration: 449822
loss: 1.0129834413528442,grad_norm: 0.80753258421714, iteration: 449823
loss: 1.1175649166107178,grad_norm: 0.9999992247004115, iteration: 449824
loss: 1.0874271392822266,grad_norm: 0.9999991189055685, iteration: 449825
loss: 0.9968274831771851,grad_norm: 0.999998986011821, iteration: 449826
loss: 1.0629791021347046,grad_norm: 0.9999999324785249, iteration: 449827
loss: 1.0896607637405396,grad_norm: 0.9999999683675385, iteration: 449828
loss: 0.985919177532196,grad_norm: 0.7844499795235388, iteration: 449829
loss: 0.9914656281471252,grad_norm: 0.7189434587356123, iteration: 449830
loss: 1.164711833000183,grad_norm: 0.9999990597786497, iteration: 449831
loss: 1.021337628364563,grad_norm: 0.8035414924406954, iteration: 449832
loss: 1.0046885013580322,grad_norm: 0.8313012822183408, iteration: 449833
loss: 0.9852674603462219,grad_norm: 0.7844810053195981, iteration: 449834
loss: 1.1172000169754028,grad_norm: 0.7863876245577518, iteration: 449835
loss: 1.0418256521224976,grad_norm: 0.9999996224896731, iteration: 449836
loss: 1.2127444744110107,grad_norm: 0.9999998738568103, iteration: 449837
loss: 1.2179988622665405,grad_norm: 0.9999990363721697, iteration: 449838
loss: 0.9931892156600952,grad_norm: 0.8205043762141825, iteration: 449839
loss: 1.0018212795257568,grad_norm: 0.7650935341642492, iteration: 449840
loss: 1.0614144802093506,grad_norm: 0.9999991121423241, iteration: 449841
loss: 1.0011858940124512,grad_norm: 0.7255095731366811, iteration: 449842
loss: 1.0341205596923828,grad_norm: 0.9999995773842837, iteration: 449843
loss: 1.018835186958313,grad_norm: 0.9999990371298451, iteration: 449844
loss: 1.119846224784851,grad_norm: 0.9999996827978953, iteration: 449845
loss: 1.2082809209823608,grad_norm: 0.9999999065341355, iteration: 449846
loss: 1.2646734714508057,grad_norm: 0.9999999796383152, iteration: 449847
loss: 1.0413845777511597,grad_norm: 0.834209357689997, iteration: 449848
loss: 1.0083197355270386,grad_norm: 0.8668921280181253, iteration: 449849
loss: 1.2177022695541382,grad_norm: 0.9999996825540959, iteration: 449850
loss: 1.015297293663025,grad_norm: 0.9999994526795057, iteration: 449851
loss: 1.0784399509429932,grad_norm: 0.9999998915327498, iteration: 449852
loss: 1.2842116355895996,grad_norm: 1.0000000008686425, iteration: 449853
loss: 1.0616888999938965,grad_norm: 0.7591102498734521, iteration: 449854
loss: 1.0098389387130737,grad_norm: 0.8100586390359023, iteration: 449855
loss: 1.0650851726531982,grad_norm: 0.9999999974020068, iteration: 449856
loss: 1.021125078201294,grad_norm: 0.9407051844848807, iteration: 449857
loss: 0.9580432176589966,grad_norm: 0.7742495085320298, iteration: 449858
loss: 1.2270139455795288,grad_norm: 0.9999998680359917, iteration: 449859
loss: 0.9783840179443359,grad_norm: 0.6518738701630887, iteration: 449860
loss: 0.998719334602356,grad_norm: 0.7732291266426369, iteration: 449861
loss: 1.0269572734832764,grad_norm: 0.9999995716803879, iteration: 449862
loss: 1.0458321571350098,grad_norm: 0.8245086828024827, iteration: 449863
loss: 1.0021864175796509,grad_norm: 0.7436847676362293, iteration: 449864
loss: 1.0071357488632202,grad_norm: 0.8349787907482161, iteration: 449865
loss: 1.0203830003738403,grad_norm: 0.7450681423411208, iteration: 449866
loss: 0.9554036259651184,grad_norm: 0.8089881651494135, iteration: 449867
loss: 1.260141134262085,grad_norm: 0.9999998917332058, iteration: 449868
loss: 0.9928262829780579,grad_norm: 0.9710402995051093, iteration: 449869
loss: 1.0342172384262085,grad_norm: 0.9999992756568963, iteration: 449870
loss: 1.1710134744644165,grad_norm: 0.9999995453703127, iteration: 449871
loss: 1.074770450592041,grad_norm: 0.9999993566674124, iteration: 449872
loss: 1.161034345626831,grad_norm: 0.9999999170924252, iteration: 449873
loss: 1.0777209997177124,grad_norm: 1.0000000047761746, iteration: 449874
loss: 1.0373507738113403,grad_norm: 0.7018315517840398, iteration: 449875
loss: 1.0228103399276733,grad_norm: 0.8982182103947134, iteration: 449876
loss: 1.022279143333435,grad_norm: 0.8317154340860298, iteration: 449877
loss: 1.021751046180725,grad_norm: 0.7730498904152453, iteration: 449878
loss: 1.080805778503418,grad_norm: 0.9999996243584929, iteration: 449879
loss: 0.9872323274612427,grad_norm: 0.8052470251601037, iteration: 449880
loss: 0.9945282936096191,grad_norm: 0.8489516975465966, iteration: 449881
loss: 1.0308008193969727,grad_norm: 0.8265192093274414, iteration: 449882
loss: 0.9856406450271606,grad_norm: 0.8638955520566776, iteration: 449883
loss: 0.9944443106651306,grad_norm: 0.7281426744524554, iteration: 449884
loss: 0.9226058125495911,grad_norm: 0.633520070110482, iteration: 449885
loss: 1.0227344036102295,grad_norm: 0.8998454175285869, iteration: 449886
loss: 1.0434675216674805,grad_norm: 0.6745666531967784, iteration: 449887
loss: 1.0579652786254883,grad_norm: 0.888520285220574, iteration: 449888
loss: 1.0013177394866943,grad_norm: 0.9999996052844584, iteration: 449889
loss: 0.9833449721336365,grad_norm: 0.7601571889813477, iteration: 449890
loss: 1.0294793844223022,grad_norm: 0.8648719183953555, iteration: 449891
loss: 0.9610075354576111,grad_norm: 0.730460628876927, iteration: 449892
loss: 1.1332063674926758,grad_norm: 0.9999999778211639, iteration: 449893
loss: 1.0077911615371704,grad_norm: 0.9999995040724434, iteration: 449894
loss: 1.1830459833145142,grad_norm: 0.9999994720898352, iteration: 449895
loss: 1.0383273363113403,grad_norm: 0.8375332393485697, iteration: 449896
loss: 1.0684823989868164,grad_norm: 0.9999997195563064, iteration: 449897
loss: 1.2086238861083984,grad_norm: 0.9999998882116579, iteration: 449898
loss: 1.0273711681365967,grad_norm: 0.9487760072417827, iteration: 449899
loss: 1.0351182222366333,grad_norm: 0.9460575629745178, iteration: 449900
loss: 1.118818759918213,grad_norm: 0.9999993205530476, iteration: 449901
loss: 1.0340851545333862,grad_norm: 0.9999991737414167, iteration: 449902
loss: 1.0098390579223633,grad_norm: 0.7316838537313988, iteration: 449903
loss: 1.1422371864318848,grad_norm: 0.9999993179960415, iteration: 449904
loss: 1.092764139175415,grad_norm: 0.9054061029647433, iteration: 449905
loss: 1.0266015529632568,grad_norm: 0.9999990982184525, iteration: 449906
loss: 1.0681883096694946,grad_norm: 0.9999996931820012, iteration: 449907
loss: 1.025072693824768,grad_norm: 0.8138972244879875, iteration: 449908
loss: 1.0287303924560547,grad_norm: 0.712211789525506, iteration: 449909
loss: 1.0385671854019165,grad_norm: 0.9999998843348082, iteration: 449910
loss: 1.0571346282958984,grad_norm: 0.9999991921978575, iteration: 449911
loss: 1.0277702808380127,grad_norm: 0.7482682565115124, iteration: 449912
loss: 1.0190446376800537,grad_norm: 0.9155377458371529, iteration: 449913
loss: 0.9986700415611267,grad_norm: 0.6962861954761436, iteration: 449914
loss: 0.9942266941070557,grad_norm: 0.9176217898646624, iteration: 449915
loss: 1.011312484741211,grad_norm: 0.7604567886512283, iteration: 449916
loss: 1.0276144742965698,grad_norm: 0.9144404489012332, iteration: 449917
loss: 0.9958025217056274,grad_norm: 0.8709008704471639, iteration: 449918
loss: 1.0128449201583862,grad_norm: 0.7527435143158531, iteration: 449919
loss: 0.9847320318222046,grad_norm: 0.9590569583562596, iteration: 449920
loss: 1.0044779777526855,grad_norm: 0.7672462606284935, iteration: 449921
loss: 1.010900855064392,grad_norm: 0.670381659753934, iteration: 449922
loss: 1.020093560218811,grad_norm: 0.9403327697361825, iteration: 449923
loss: 1.0129672288894653,grad_norm: 0.9999991497573022, iteration: 449924
loss: 1.0068409442901611,grad_norm: 0.8365640867980987, iteration: 449925
loss: 1.0498151779174805,grad_norm: 0.9999991174437312, iteration: 449926
loss: 1.0123051404953003,grad_norm: 0.6640870921723994, iteration: 449927
loss: 0.9623607993125916,grad_norm: 0.8061164629651929, iteration: 449928
loss: 1.0023632049560547,grad_norm: 0.7041396822810576, iteration: 449929
loss: 1.0267579555511475,grad_norm: 0.7014146462362706, iteration: 449930
loss: 1.0414412021636963,grad_norm: 0.999999749114819, iteration: 449931
loss: 1.075731873512268,grad_norm: 0.9272752410122691, iteration: 449932
loss: 0.9930570721626282,grad_norm: 0.7746289599679823, iteration: 449933
loss: 0.9836146831512451,grad_norm: 0.7646824113604347, iteration: 449934
loss: 1.0159238576889038,grad_norm: 0.9999996127537588, iteration: 449935
loss: 0.9939935803413391,grad_norm: 0.8524853257175478, iteration: 449936
loss: 1.0201328992843628,grad_norm: 0.8606287299165519, iteration: 449937
loss: 1.0172194242477417,grad_norm: 0.8877374783233009, iteration: 449938
loss: 1.016663670539856,grad_norm: 0.6926383726060173, iteration: 449939
loss: 1.0171494483947754,grad_norm: 0.6597215716643277, iteration: 449940
loss: 1.0015814304351807,grad_norm: 0.9999989585586265, iteration: 449941
loss: 1.0926131010055542,grad_norm: 0.9999990471156307, iteration: 449942
loss: 1.1526539325714111,grad_norm: 0.9999999364139647, iteration: 449943
loss: 1.0253658294677734,grad_norm: 0.750301076736509, iteration: 449944
loss: 1.0594875812530518,grad_norm: 0.7954688027827242, iteration: 449945
loss: 1.059486985206604,grad_norm: 0.8447547770785063, iteration: 449946
loss: 0.9888443350791931,grad_norm: 0.8804882695490686, iteration: 449947
loss: 0.9863001108169556,grad_norm: 0.9155781508546597, iteration: 449948
loss: 1.0727437734603882,grad_norm: 0.9308842816554408, iteration: 449949
loss: 1.0255329608917236,grad_norm: 0.7029772651757621, iteration: 449950
loss: 0.9979589581489563,grad_norm: 0.9210646884538003, iteration: 449951
loss: 0.9964625835418701,grad_norm: 0.6986833384782973, iteration: 449952
loss: 1.0098603963851929,grad_norm: 0.9287609134965106, iteration: 449953
loss: 1.0098226070404053,grad_norm: 0.7697565684434986, iteration: 449954
loss: 1.072541356086731,grad_norm: 0.6778564235270144, iteration: 449955
loss: 1.0019608736038208,grad_norm: 0.8064204007333562, iteration: 449956
loss: 1.075930118560791,grad_norm: 0.9999990570288573, iteration: 449957
loss: 1.0261119604110718,grad_norm: 0.7147386810743481, iteration: 449958
loss: 0.9667376279830933,grad_norm: 0.7738168357176869, iteration: 449959
loss: 1.033859133720398,grad_norm: 0.7590815199875007, iteration: 449960
loss: 1.0218244791030884,grad_norm: 0.9999990963951245, iteration: 449961
loss: 1.0426524877548218,grad_norm: 0.8367153976263763, iteration: 449962
loss: 1.02626633644104,grad_norm: 0.8089408944708552, iteration: 449963
loss: 1.0239707231521606,grad_norm: 0.8227319043311518, iteration: 449964
loss: 0.9924166798591614,grad_norm: 0.7096415150719749, iteration: 449965
loss: 1.000124454498291,grad_norm: 0.8309304016795462, iteration: 449966
loss: 1.0216712951660156,grad_norm: 0.7761535898331628, iteration: 449967
loss: 1.0802541971206665,grad_norm: 0.7864920587698639, iteration: 449968
loss: 1.0028164386749268,grad_norm: 0.9730962363966218, iteration: 449969
loss: 1.0298020839691162,grad_norm: 0.842504804086129, iteration: 449970
loss: 0.9922304749488831,grad_norm: 0.870504107719695, iteration: 449971
loss: 0.9983782768249512,grad_norm: 0.9999998019811134, iteration: 449972
loss: 0.9990332126617432,grad_norm: 0.8449439323202746, iteration: 449973
loss: 0.9903659224510193,grad_norm: 0.7650327152400073, iteration: 449974
loss: 0.9704268574714661,grad_norm: 0.8648008560390404, iteration: 449975
loss: 1.0048803091049194,grad_norm: 0.6995571137985572, iteration: 449976
loss: 0.9545841813087463,grad_norm: 0.8321296298390071, iteration: 449977
loss: 1.0180542469024658,grad_norm: 0.9999994934533781, iteration: 449978
loss: 0.9854334592819214,grad_norm: 0.7459629413929719, iteration: 449979
loss: 1.042717456817627,grad_norm: 0.7438556406887398, iteration: 449980
loss: 1.0280026197433472,grad_norm: 0.9999999152193033, iteration: 449981
loss: 1.0772391557693481,grad_norm: 0.8351999550803184, iteration: 449982
loss: 1.0125232934951782,grad_norm: 0.7059351077959591, iteration: 449983
loss: 0.9641833901405334,grad_norm: 0.6929731951437005, iteration: 449984
loss: 1.0320780277252197,grad_norm: 0.8886877961141765, iteration: 449985
loss: 1.0706056356430054,grad_norm: 0.9999992032341118, iteration: 449986
loss: 1.0313053131103516,grad_norm: 0.7486249736748788, iteration: 449987
loss: 1.0284193754196167,grad_norm: 0.7435043316769938, iteration: 449988
loss: 0.9662219882011414,grad_norm: 0.8055153603311875, iteration: 449989
loss: 0.9932268261909485,grad_norm: 0.8308630474702512, iteration: 449990
loss: 0.9675120115280151,grad_norm: 0.8006157381749458, iteration: 449991
loss: 1.0000916719436646,grad_norm: 0.6413934000315199, iteration: 449992
loss: 1.0224578380584717,grad_norm: 0.7262123257776532, iteration: 449993
loss: 0.9712921380996704,grad_norm: 0.7245565039714168, iteration: 449994
loss: 1.0019451379776,grad_norm: 0.6908485704218914, iteration: 449995
loss: 0.9861624836921692,grad_norm: 0.8191697341364936, iteration: 449996
loss: 0.9697669744491577,grad_norm: 0.6756531561362344, iteration: 449997
loss: 0.9666192531585693,grad_norm: 0.8172472552499812, iteration: 449998
loss: 0.9738463759422302,grad_norm: 0.7393134143172948, iteration: 449999
loss: 1.0253655910491943,grad_norm: 0.8994593623535214, iteration: 450000
Evaluating at step 450000
{'val': 0.9967204183340073, 'test': 2.02496791647646}
loss: 0.9900413155555725,grad_norm: 0.9999997040033892, iteration: 450001
loss: 1.011986494064331,grad_norm: 0.9999998150797674, iteration: 450002
loss: 1.0095999240875244,grad_norm: 0.8267089691394998, iteration: 450003
loss: 1.008315920829773,grad_norm: 0.9819398891265441, iteration: 450004
loss: 1.0324628353118896,grad_norm: 0.8837963546679625, iteration: 450005
loss: 1.0304453372955322,grad_norm: 0.9526693243636354, iteration: 450006
loss: 1.0299917459487915,grad_norm: 0.8220875716809638, iteration: 450007
loss: 1.1662936210632324,grad_norm: 0.9999993375542698, iteration: 450008
loss: 1.101120114326477,grad_norm: 0.8036210576600697, iteration: 450009
loss: 0.982140064239502,grad_norm: 0.6177866009205522, iteration: 450010
loss: 0.9861523509025574,grad_norm: 0.82579034799753, iteration: 450011
loss: 1.0384361743927002,grad_norm: 0.7543395663183063, iteration: 450012
loss: 1.0529154539108276,grad_norm: 0.9999995840502622, iteration: 450013
loss: 1.0050183534622192,grad_norm: 0.9890567342472358, iteration: 450014
loss: 0.9993870854377747,grad_norm: 0.7662632555923278, iteration: 450015
loss: 1.0220706462860107,grad_norm: 0.7833590236101107, iteration: 450016
loss: 0.9727455377578735,grad_norm: 0.6777611029077626, iteration: 450017
loss: 0.9624707102775574,grad_norm: 0.7410559046934982, iteration: 450018
loss: 1.0109766721725464,grad_norm: 0.7684583757162657, iteration: 450019
loss: 1.0445659160614014,grad_norm: 0.9448221569588157, iteration: 450020
loss: 1.0439625978469849,grad_norm: 0.8591596911876288, iteration: 450021
loss: 1.004417061805725,grad_norm: 0.7351991268819573, iteration: 450022
loss: 1.0653867721557617,grad_norm: 0.7755529879152131, iteration: 450023
loss: 0.9820802807807922,grad_norm: 0.7556304272779862, iteration: 450024
loss: 0.969423770904541,grad_norm: 0.9456729230663768, iteration: 450025
loss: 1.0069273710250854,grad_norm: 0.8156168030297668, iteration: 450026
loss: 1.0502456426620483,grad_norm: 0.9999998968042346, iteration: 450027
loss: 0.9992442727088928,grad_norm: 0.7985701680040258, iteration: 450028
loss: 1.0275119543075562,grad_norm: 0.9999998200033471, iteration: 450029
loss: 1.011634349822998,grad_norm: 0.6176066825114156, iteration: 450030
loss: 0.9986919164657593,grad_norm: 0.8099737102567939, iteration: 450031
loss: 0.9868428111076355,grad_norm: 0.8850184697751622, iteration: 450032
loss: 1.007920265197754,grad_norm: 0.8345302490920287, iteration: 450033
loss: 1.007597804069519,grad_norm: 0.7179590466211967, iteration: 450034
loss: 1.0364171266555786,grad_norm: 0.8875866267994594, iteration: 450035
loss: 1.0742219686508179,grad_norm: 0.9331300989042265, iteration: 450036
loss: 1.0120962858200073,grad_norm: 0.85030085903475, iteration: 450037
loss: 0.9914727807044983,grad_norm: 0.7357372751726786, iteration: 450038
loss: 1.0331100225448608,grad_norm: 0.7847375783885996, iteration: 450039
loss: 1.0342451333999634,grad_norm: 0.9999991919534479, iteration: 450040
loss: 1.0054763555526733,grad_norm: 0.7618670248624395, iteration: 450041
loss: 1.0020861625671387,grad_norm: 0.7947045215814496, iteration: 450042
loss: 1.0331662893295288,grad_norm: 0.6499451238556089, iteration: 450043
loss: 0.9650702476501465,grad_norm: 0.7910503171718969, iteration: 450044
loss: 1.0055131912231445,grad_norm: 0.9353668245858919, iteration: 450045
loss: 0.9955206513404846,grad_norm: 0.7669478495527552, iteration: 450046
loss: 0.9849098920822144,grad_norm: 0.9499102612852052, iteration: 450047
loss: 1.0060316324234009,grad_norm: 0.7023713900143009, iteration: 450048
loss: 0.9685376882553101,grad_norm: 0.9763481673875909, iteration: 450049
loss: 1.0028597116470337,grad_norm: 0.7871451975292404, iteration: 450050
loss: 0.9795932769775391,grad_norm: 0.9095077659594506, iteration: 450051
loss: 1.006524920463562,grad_norm: 0.8865252027671054, iteration: 450052
loss: 0.9644877314567566,grad_norm: 0.7932190429238271, iteration: 450053
loss: 0.9846429824829102,grad_norm: 0.9752907072000173, iteration: 450054
loss: 1.0536702871322632,grad_norm: 0.9999994095976394, iteration: 450055
loss: 0.9783307313919067,grad_norm: 0.959818843760713, iteration: 450056
loss: 0.990043044090271,grad_norm: 0.7633156214710298, iteration: 450057
loss: 0.9867838621139526,grad_norm: 0.9054729352948205, iteration: 450058
loss: 1.0214375257492065,grad_norm: 0.6991793792807915, iteration: 450059
loss: 1.0139168500900269,grad_norm: 0.735317510410753, iteration: 450060
loss: 1.0062057971954346,grad_norm: 0.8842506701713825, iteration: 450061
loss: 0.9507332444190979,grad_norm: 0.7904728088724335, iteration: 450062
loss: 1.0084381103515625,grad_norm: 0.682854054066685, iteration: 450063
loss: 1.0191906690597534,grad_norm: 0.7504997613472615, iteration: 450064
loss: 0.9930616021156311,grad_norm: 0.7805815853519128, iteration: 450065
loss: 1.0184745788574219,grad_norm: 0.6722384357451087, iteration: 450066
loss: 0.9767345190048218,grad_norm: 0.7541907811690198, iteration: 450067
loss: 1.0058091878890991,grad_norm: 0.7163505189454397, iteration: 450068
loss: 0.9543983340263367,grad_norm: 0.965636276556787, iteration: 450069
loss: 1.0204811096191406,grad_norm: 0.7898017285394626, iteration: 450070
loss: 1.0002048015594482,grad_norm: 0.731501071576591, iteration: 450071
loss: 1.0172890424728394,grad_norm: 0.9999993999894771, iteration: 450072
loss: 0.9502145648002625,grad_norm: 0.9999992719213769, iteration: 450073
loss: 0.9379715919494629,grad_norm: 0.7655786002518347, iteration: 450074
loss: 0.9624626040458679,grad_norm: 0.7385983385686805, iteration: 450075
loss: 1.0448193550109863,grad_norm: 0.7188895147183487, iteration: 450076
loss: 1.0334864854812622,grad_norm: 0.999999979622933, iteration: 450077
loss: 1.1171294450759888,grad_norm: 0.9999990734324864, iteration: 450078
loss: 1.039036512374878,grad_norm: 0.8220375772580514, iteration: 450079
loss: 0.9973203539848328,grad_norm: 0.729561454670604, iteration: 450080
loss: 1.0016230344772339,grad_norm: 0.7686161431632449, iteration: 450081
loss: 0.9906074404716492,grad_norm: 0.9999992755831539, iteration: 450082
loss: 1.0261644124984741,grad_norm: 0.838303644111666, iteration: 450083
loss: 1.0045222043991089,grad_norm: 0.7884723650412113, iteration: 450084
loss: 1.0068039894104004,grad_norm: 0.7572439134134777, iteration: 450085
loss: 1.0468794107437134,grad_norm: 0.9999989667472214, iteration: 450086
loss: 1.0234713554382324,grad_norm: 0.9223149097396037, iteration: 450087
loss: 1.107129454612732,grad_norm: 0.9999998701367331, iteration: 450088
loss: 1.0755139589309692,grad_norm: 0.9999996619524226, iteration: 450089
loss: 0.9946723580360413,grad_norm: 0.7624338574642722, iteration: 450090
loss: 0.9977232813835144,grad_norm: 0.7927557845272901, iteration: 450091
loss: 0.9997926354408264,grad_norm: 0.7080158344761889, iteration: 450092
loss: 1.0001293420791626,grad_norm: 0.6708628996060099, iteration: 450093
loss: 1.0263493061065674,grad_norm: 0.9999995539676939, iteration: 450094
loss: 1.0343408584594727,grad_norm: 0.9999990983040612, iteration: 450095
loss: 1.0307594537734985,grad_norm: 0.799289844498658, iteration: 450096
loss: 0.9913703799247742,grad_norm: 0.7756965559424943, iteration: 450097
loss: 1.0513988733291626,grad_norm: 0.8922634868580199, iteration: 450098
loss: 1.0712196826934814,grad_norm: 0.9999991956178766, iteration: 450099
loss: 0.97978675365448,grad_norm: 0.877786368833144, iteration: 450100
loss: 0.9631867408752441,grad_norm: 0.757674847450584, iteration: 450101
loss: 1.0110273361206055,grad_norm: 0.7786637201573028, iteration: 450102
loss: 1.0691380500793457,grad_norm: 0.9999996366021336, iteration: 450103
loss: 0.9904316067695618,grad_norm: 0.7578212533031639, iteration: 450104
loss: 1.0130687952041626,grad_norm: 0.6491476962239302, iteration: 450105
loss: 0.9675064086914062,grad_norm: 0.9999990806684386, iteration: 450106
loss: 0.9532631635665894,grad_norm: 0.8241560003600175, iteration: 450107
loss: 1.020505428314209,grad_norm: 0.8489943599482446, iteration: 450108
loss: 0.98836350440979,grad_norm: 0.8184930552960055, iteration: 450109
loss: 0.9949232339859009,grad_norm: 0.7332307259017022, iteration: 450110
loss: 0.999041736125946,grad_norm: 0.8002224404724038, iteration: 450111
loss: 0.9955078363418579,grad_norm: 0.6160635078895262, iteration: 450112
loss: 0.9911917448043823,grad_norm: 0.6572959202541768, iteration: 450113
loss: 1.070534586906433,grad_norm: 0.8194958458936521, iteration: 450114
loss: 1.0381470918655396,grad_norm: 0.7259027500747575, iteration: 450115
loss: 1.077965497970581,grad_norm: 0.8131352462295819, iteration: 450116
loss: 0.9906190037727356,grad_norm: 0.7650094490408257, iteration: 450117
loss: 1.0197491645812988,grad_norm: 0.7833684411414853, iteration: 450118
loss: 0.9958528876304626,grad_norm: 0.7143598142738671, iteration: 450119
loss: 0.993949830532074,grad_norm: 0.752996982862877, iteration: 450120
loss: 1.0272189378738403,grad_norm: 0.9999990322931872, iteration: 450121
loss: 1.0227030515670776,grad_norm: 0.8351364501133679, iteration: 450122
loss: 0.9911046028137207,grad_norm: 0.8354378900703973, iteration: 450123
loss: 1.0356485843658447,grad_norm: 0.8652264625743381, iteration: 450124
loss: 1.0865765810012817,grad_norm: 0.8364324305600918, iteration: 450125
loss: 0.9910768270492554,grad_norm: 0.8703400633401467, iteration: 450126
loss: 0.9689729809761047,grad_norm: 0.6970646455881143, iteration: 450127
loss: 1.0006204843521118,grad_norm: 0.8002203414263123, iteration: 450128
loss: 1.0245143175125122,grad_norm: 0.6712253163039296, iteration: 450129
loss: 1.0659985542297363,grad_norm: 0.6836606203448584, iteration: 450130
loss: 1.057814359664917,grad_norm: 0.7459467678713646, iteration: 450131
loss: 0.9880290031433105,grad_norm: 0.8502252834506375, iteration: 450132
loss: 0.981066107749939,grad_norm: 0.6977458583588552, iteration: 450133
loss: 1.0437597036361694,grad_norm: 0.999999176406685, iteration: 450134
loss: 0.9733748435974121,grad_norm: 0.8327362911254456, iteration: 450135
loss: 1.044155240058899,grad_norm: 0.9999993073630941, iteration: 450136
loss: 0.9834021925926208,grad_norm: 0.6582665566822421, iteration: 450137
loss: 0.9998170137405396,grad_norm: 0.6806383649400727, iteration: 450138
loss: 1.0200711488723755,grad_norm: 0.7515530138340294, iteration: 450139
loss: 1.0152108669281006,grad_norm: 0.7450659547294639, iteration: 450140
loss: 0.9877555966377258,grad_norm: 0.753554826886257, iteration: 450141
loss: 0.9819183349609375,grad_norm: 0.9999990455154804, iteration: 450142
loss: 1.034738540649414,grad_norm: 0.752602042874635, iteration: 450143
loss: 1.0071419477462769,grad_norm: 0.9999996240639748, iteration: 450144
loss: 1.0273045301437378,grad_norm: 0.7288835363921323, iteration: 450145
loss: 1.000422477722168,grad_norm: 0.771371961046922, iteration: 450146
loss: 1.0255964994430542,grad_norm: 0.6582888939051279, iteration: 450147
loss: 0.9753013253211975,grad_norm: 0.8210573105642889, iteration: 450148
loss: 1.0628467798233032,grad_norm: 0.9999997427062399, iteration: 450149
loss: 0.9761881828308105,grad_norm: 0.8015820737198391, iteration: 450150
loss: 0.9761887192726135,grad_norm: 0.6568700352233627, iteration: 450151
loss: 1.021754503250122,grad_norm: 0.9999993990094626, iteration: 450152
loss: 1.0975309610366821,grad_norm: 0.9999999298248436, iteration: 450153
loss: 1.0103809833526611,grad_norm: 0.7339439152629771, iteration: 450154
loss: 1.035030722618103,grad_norm: 0.9999998837881173, iteration: 450155
loss: 0.9711518883705139,grad_norm: 0.8400927816224256, iteration: 450156
loss: 0.9933249354362488,grad_norm: 0.6975654364878329, iteration: 450157
loss: 1.0133904218673706,grad_norm: 0.6691404435154058, iteration: 450158
loss: 0.9895337224006653,grad_norm: 0.9376959789150476, iteration: 450159
loss: 0.9813603162765503,grad_norm: 0.7191087517404926, iteration: 450160
loss: 1.019789457321167,grad_norm: 0.8179919410195473, iteration: 450161
loss: 1.0219895839691162,grad_norm: 0.8696497626772776, iteration: 450162
loss: 1.024463415145874,grad_norm: 0.7488516842955736, iteration: 450163
loss: 1.0224339962005615,grad_norm: 0.9999997397965252, iteration: 450164
loss: 1.0059864521026611,grad_norm: 0.7877830140547128, iteration: 450165
loss: 0.9679871797561646,grad_norm: 0.787686409954433, iteration: 450166
loss: 1.0246199369430542,grad_norm: 0.8111069962642655, iteration: 450167
loss: 1.0234906673431396,grad_norm: 0.9999994254828739, iteration: 450168
loss: 1.0862637758255005,grad_norm: 0.6694168665560979, iteration: 450169
loss: 1.0015888214111328,grad_norm: 0.7798306880639644, iteration: 450170
loss: 1.0014970302581787,grad_norm: 0.9999998666283837, iteration: 450171
loss: 1.0387824773788452,grad_norm: 0.8683012287481012, iteration: 450172
loss: 1.0354104042053223,grad_norm: 0.7763470195468997, iteration: 450173
loss: 0.9894985556602478,grad_norm: 0.6240488981034791, iteration: 450174
loss: 1.0321409702301025,grad_norm: 0.7419273297606348, iteration: 450175
loss: 0.9565948247909546,grad_norm: 0.7771861536880788, iteration: 450176
loss: 0.9705270528793335,grad_norm: 0.9531973002235373, iteration: 450177
loss: 0.9748238921165466,grad_norm: 0.7822031446012392, iteration: 450178
loss: 1.0270971059799194,grad_norm: 0.9857207649366289, iteration: 450179
loss: 1.0131758451461792,grad_norm: 0.6421020623350678, iteration: 450180
loss: 0.9771512746810913,grad_norm: 0.8186879193712535, iteration: 450181
loss: 1.018318772315979,grad_norm: 0.9999996321372799, iteration: 450182
loss: 1.0112463235855103,grad_norm: 0.9843878866092245, iteration: 450183
loss: 1.0029582977294922,grad_norm: 0.7704434672905945, iteration: 450184
loss: 0.9904537200927734,grad_norm: 0.7055098604398767, iteration: 450185
loss: 1.0172984600067139,grad_norm: 0.7320897359093594, iteration: 450186
loss: 1.0284501314163208,grad_norm: 0.719959916069085, iteration: 450187
loss: 1.0127660036087036,grad_norm: 0.8662508195498918, iteration: 450188
loss: 0.9799615144729614,grad_norm: 0.9079439117400225, iteration: 450189
loss: 1.0140764713287354,grad_norm: 0.88223891087579, iteration: 450190
loss: 1.0715082883834839,grad_norm: 0.9236071200370457, iteration: 450191
loss: 1.004176378250122,grad_norm: 0.8076800941738926, iteration: 450192
loss: 1.0224056243896484,grad_norm: 0.8175909854090754, iteration: 450193
loss: 1.0151777267456055,grad_norm: 0.7694024968948093, iteration: 450194
loss: 1.0366084575653076,grad_norm: 0.864609734833844, iteration: 450195
loss: 1.0368010997772217,grad_norm: 0.7014975823906823, iteration: 450196
loss: 0.9915701746940613,grad_norm: 0.8101196942886913, iteration: 450197
loss: 1.1403812170028687,grad_norm: 0.8431786405898474, iteration: 450198
loss: 1.0291389226913452,grad_norm: 0.9999993337886847, iteration: 450199
loss: 1.032523274421692,grad_norm: 0.8310040234878179, iteration: 450200
loss: 1.0695617198944092,grad_norm: 0.8263377714832756, iteration: 450201
loss: 1.0344032049179077,grad_norm: 0.7568428826280269, iteration: 450202
loss: 1.0004892349243164,grad_norm: 0.7810227189758284, iteration: 450203
loss: 1.0358363389968872,grad_norm: 0.7188516511958897, iteration: 450204
loss: 1.0130797624588013,grad_norm: 0.7783098904474326, iteration: 450205
loss: 0.9798160195350647,grad_norm: 0.6738863851667642, iteration: 450206
loss: 0.9963609576225281,grad_norm: 0.899402616393785, iteration: 450207
loss: 1.0214732885360718,grad_norm: 0.9144493879242009, iteration: 450208
loss: 0.9878514409065247,grad_norm: 0.8920013439517454, iteration: 450209
loss: 1.0157068967819214,grad_norm: 0.9999998432189593, iteration: 450210
loss: 1.004697561264038,grad_norm: 0.9136681221326233, iteration: 450211
loss: 0.988609254360199,grad_norm: 0.8360415324943424, iteration: 450212
loss: 0.9752137660980225,grad_norm: 0.7172056791785386, iteration: 450213
loss: 0.9879902601242065,grad_norm: 0.9497138244862035, iteration: 450214
loss: 1.0281689167022705,grad_norm: 0.7475784460375874, iteration: 450215
loss: 0.9649642109870911,grad_norm: 0.6748685129488251, iteration: 450216
loss: 1.011183261871338,grad_norm: 0.999999475081552, iteration: 450217
loss: 1.0086874961853027,grad_norm: 0.8247954848221112, iteration: 450218
loss: 1.031851887702942,grad_norm: 0.8740284849915325, iteration: 450219
loss: 0.958249032497406,grad_norm: 0.6893270936532143, iteration: 450220
loss: 1.0029335021972656,grad_norm: 0.706705356836488, iteration: 450221
loss: 0.9946854114532471,grad_norm: 0.7386496125653084, iteration: 450222
loss: 1.0015172958374023,grad_norm: 0.938328943303574, iteration: 450223
loss: 0.9990907311439514,grad_norm: 0.7181240696085123, iteration: 450224
loss: 1.0050519704818726,grad_norm: 0.870061801323901, iteration: 450225
loss: 1.0266953706741333,grad_norm: 0.900773201891515, iteration: 450226
loss: 1.082741618156433,grad_norm: 0.9999991747920891, iteration: 450227
loss: 0.9722502827644348,grad_norm: 0.7890525516770294, iteration: 450228
loss: 0.9852907061576843,grad_norm: 0.8592541257665787, iteration: 450229
loss: 1.0587236881256104,grad_norm: 0.8168059545347739, iteration: 450230
loss: 1.0112625360488892,grad_norm: 0.6191219609003652, iteration: 450231
loss: 0.9862776398658752,grad_norm: 0.7336287326491701, iteration: 450232
loss: 1.0233361721038818,grad_norm: 0.6159929813929996, iteration: 450233
loss: 1.036959171295166,grad_norm: 0.9999990900060259, iteration: 450234
loss: 0.9811267852783203,grad_norm: 0.7763524070778162, iteration: 450235
loss: 1.0083768367767334,grad_norm: 0.7637277175593498, iteration: 450236
loss: 1.0203542709350586,grad_norm: 0.8024266966207447, iteration: 450237
loss: 0.9935435056686401,grad_norm: 0.7944035386807485, iteration: 450238
loss: 0.9649990797042847,grad_norm: 0.7544652172778783, iteration: 450239
loss: 0.9717317819595337,grad_norm: 0.8624124058797795, iteration: 450240
loss: 0.9854567646980286,grad_norm: 0.7435628546994361, iteration: 450241
loss: 0.9756500124931335,grad_norm: 0.8007805299567201, iteration: 450242
loss: 0.9896999001502991,grad_norm: 0.782266533449736, iteration: 450243
loss: 1.0315239429473877,grad_norm: 0.8321431288197156, iteration: 450244
loss: 1.0605130195617676,grad_norm: 0.9999998513493472, iteration: 450245
loss: 0.9710671901702881,grad_norm: 0.7358702430642737, iteration: 450246
loss: 0.9486211538314819,grad_norm: 0.8311618904673718, iteration: 450247
loss: 1.0543001890182495,grad_norm: 0.9999991651519755, iteration: 450248
loss: 1.010944128036499,grad_norm: 0.7529531613822322, iteration: 450249
loss: 1.0869191884994507,grad_norm: 0.7783315878351171, iteration: 450250
loss: 1.1392420530319214,grad_norm: 0.9999992150447079, iteration: 450251
loss: 1.0161120891571045,grad_norm: 0.6895460974164282, iteration: 450252
loss: 1.0019358396530151,grad_norm: 0.6850402943749319, iteration: 450253
loss: 1.0230154991149902,grad_norm: 0.9999999516192737, iteration: 450254
loss: 1.0769168138504028,grad_norm: 0.9999996587843593, iteration: 450255
loss: 0.9938609600067139,grad_norm: 0.7766723182235191, iteration: 450256
loss: 1.0197783708572388,grad_norm: 0.8971891039932565, iteration: 450257
loss: 1.08401620388031,grad_norm: 1.0000000776667306, iteration: 450258
loss: 1.0155293941497803,grad_norm: 0.9999990561154466, iteration: 450259
loss: 0.9881300926208496,grad_norm: 0.7327131963590938, iteration: 450260
loss: 1.0081068277359009,grad_norm: 0.7689192089964904, iteration: 450261
loss: 0.9842265844345093,grad_norm: 0.9551397318153889, iteration: 450262
loss: 0.9687100648880005,grad_norm: 0.9999993079409397, iteration: 450263
loss: 1.1011176109313965,grad_norm: 0.9999997664922955, iteration: 450264
loss: 1.0138533115386963,grad_norm: 0.9999996436608694, iteration: 450265
loss: 1.123812198638916,grad_norm: 0.9999992364213536, iteration: 450266
loss: 1.0178114175796509,grad_norm: 0.6868968688590846, iteration: 450267
loss: 1.0282058715820312,grad_norm: 0.8261303586689104, iteration: 450268
loss: 0.9544439911842346,grad_norm: 0.7249559733714197, iteration: 450269
loss: 0.9615767598152161,grad_norm: 0.7400768048786659, iteration: 450270
loss: 0.9836592078208923,grad_norm: 0.9380594713272432, iteration: 450271
loss: 1.0217089653015137,grad_norm: 0.9999999330016663, iteration: 450272
loss: 1.0731829404830933,grad_norm: 0.9099157293485668, iteration: 450273
loss: 0.9768324494361877,grad_norm: 0.7492511719389328, iteration: 450274
loss: 1.1232198476791382,grad_norm: 0.9999998084700935, iteration: 450275
loss: 1.2646489143371582,grad_norm: 0.9999998752224203, iteration: 450276
loss: 0.995725154876709,grad_norm: 0.9480521304262689, iteration: 450277
loss: 1.0894404649734497,grad_norm: 0.9999990170758872, iteration: 450278
loss: 0.9725310206413269,grad_norm: 0.8019439045117902, iteration: 450279
loss: 0.9646024107933044,grad_norm: 0.8982137976609312, iteration: 450280
loss: 0.9961307644844055,grad_norm: 0.7330493760554296, iteration: 450281
loss: 1.0197070837020874,grad_norm: 0.7316543992856773, iteration: 450282
loss: 0.9825926423072815,grad_norm: 0.9507849269349449, iteration: 450283
loss: 1.0524910688400269,grad_norm: 0.9999997578365057, iteration: 450284
loss: 1.0415583848953247,grad_norm: 0.7236365833643613, iteration: 450285
loss: 1.0236507654190063,grad_norm: 0.702638533461672, iteration: 450286
loss: 1.100382924079895,grad_norm: 0.7715151877574463, iteration: 450287
loss: 0.9753801822662354,grad_norm: 0.7840468674580374, iteration: 450288
loss: 0.9883447885513306,grad_norm: 0.6968660284115258, iteration: 450289
loss: 0.9996979832649231,grad_norm: 0.7934281554866132, iteration: 450290
loss: 0.9906982779502869,grad_norm: 0.7590708878671971, iteration: 450291
loss: 1.054081916809082,grad_norm: 0.9999990459455745, iteration: 450292
loss: 1.0337138175964355,grad_norm: 0.9999990828186566, iteration: 450293
loss: 0.9798575639724731,grad_norm: 0.7093427347651431, iteration: 450294
loss: 1.0064523220062256,grad_norm: 0.7892388120453109, iteration: 450295
loss: 0.9939190745353699,grad_norm: 0.932315224738165, iteration: 450296
loss: 1.042700171470642,grad_norm: 0.8405449036781307, iteration: 450297
loss: 1.0015580654144287,grad_norm: 0.7647957073082068, iteration: 450298
loss: 1.0353208780288696,grad_norm: 0.8546433866269302, iteration: 450299
loss: 0.9709655046463013,grad_norm: 0.7175888490129652, iteration: 450300
loss: 1.1042371988296509,grad_norm: 0.7542805644466029, iteration: 450301
loss: 0.9815619587898254,grad_norm: 0.649118756596168, iteration: 450302
loss: 0.9779520630836487,grad_norm: 0.8055477980764457, iteration: 450303
loss: 0.9865515828132629,grad_norm: 0.7898366781544199, iteration: 450304
loss: 1.0109434127807617,grad_norm: 0.6426274873601838, iteration: 450305
loss: 0.9761670827865601,grad_norm: 0.5824965996367559, iteration: 450306
loss: 0.9999412894248962,grad_norm: 0.8908378589698221, iteration: 450307
loss: 0.9951257109642029,grad_norm: 0.6388612983186941, iteration: 450308
loss: 1.0027326345443726,grad_norm: 0.7768043465439827, iteration: 450309
loss: 1.058127522468567,grad_norm: 0.9999991994577287, iteration: 450310
loss: 0.9908897280693054,grad_norm: 0.835023786211249, iteration: 450311
loss: 1.0234071016311646,grad_norm: 0.7440745808140068, iteration: 450312
loss: 1.031768798828125,grad_norm: 0.8164552371003451, iteration: 450313
loss: 1.0051072835922241,grad_norm: 0.9397926760350329, iteration: 450314
loss: 1.02122163772583,grad_norm: 0.8012308303292046, iteration: 450315
loss: 0.9958673119544983,grad_norm: 0.7731540883006817, iteration: 450316
loss: 1.0413552522659302,grad_norm: 0.9999997351003492, iteration: 450317
loss: 1.0460153818130493,grad_norm: 0.6817275319319083, iteration: 450318
loss: 0.9861211776733398,grad_norm: 0.9999998278871061, iteration: 450319
loss: 1.044204592704773,grad_norm: 0.8965033410663493, iteration: 450320
loss: 1.0905715227127075,grad_norm: 0.9999999200706255, iteration: 450321
loss: 0.9955211281776428,grad_norm: 0.7801855029663243, iteration: 450322
loss: 1.0127829313278198,grad_norm: 0.8764695950570606, iteration: 450323
loss: 1.0846689939498901,grad_norm: 0.9999992633247142, iteration: 450324
loss: 1.027795433998108,grad_norm: 0.8605385857024233, iteration: 450325
loss: 1.0036308765411377,grad_norm: 0.946194743406566, iteration: 450326
loss: 1.0068775415420532,grad_norm: 0.9999990053813892, iteration: 450327
loss: 0.9943026900291443,grad_norm: 0.8566212076718348, iteration: 450328
loss: 1.0664209127426147,grad_norm: 0.8487795906065446, iteration: 450329
loss: 0.9903076887130737,grad_norm: 0.6604461007343069, iteration: 450330
loss: 1.05155348777771,grad_norm: 0.9177574161591132, iteration: 450331
loss: 1.023490071296692,grad_norm: 0.7078879730741563, iteration: 450332
loss: 1.0203760862350464,grad_norm: 0.9652117760153756, iteration: 450333
loss: 1.0195281505584717,grad_norm: 0.7914966874029774, iteration: 450334
loss: 1.005820631980896,grad_norm: 0.8497109901110603, iteration: 450335
loss: 0.9525827765464783,grad_norm: 0.7573721969137746, iteration: 450336
loss: 1.025246024131775,grad_norm: 0.688763724547288, iteration: 450337
loss: 1.0441440343856812,grad_norm: 0.9071697715694548, iteration: 450338
loss: 0.9772307872772217,grad_norm: 0.6720401462692682, iteration: 450339
loss: 1.0303393602371216,grad_norm: 0.9999998409923787, iteration: 450340
loss: 1.035477638244629,grad_norm: 0.7672615156984605, iteration: 450341
loss: 1.0139907598495483,grad_norm: 0.7723455165312747, iteration: 450342
loss: 1.0432004928588867,grad_norm: 0.9999991828813133, iteration: 450343
loss: 1.15546715259552,grad_norm: 0.9999999623990208, iteration: 450344
loss: 1.0209347009658813,grad_norm: 0.9999997003020956, iteration: 450345
loss: 1.1368873119354248,grad_norm: 0.9999998460818715, iteration: 450346
loss: 0.9470127820968628,grad_norm: 0.9999990656697524, iteration: 450347
loss: 0.9733504056930542,grad_norm: 0.7192172966176948, iteration: 450348
loss: 0.9691944718360901,grad_norm: 0.8956971545029422, iteration: 450349
loss: 1.0357187986373901,grad_norm: 0.9999991862447448, iteration: 450350
loss: 1.0981611013412476,grad_norm: 0.9999997084107298, iteration: 450351
loss: 1.0201948881149292,grad_norm: 0.9999990563865587, iteration: 450352
loss: 1.0622742176055908,grad_norm: 0.671946341570266, iteration: 450353
loss: 0.964514434337616,grad_norm: 0.8172545944585204, iteration: 450354
loss: 1.1618874073028564,grad_norm: 0.9999999616308051, iteration: 450355
loss: 1.0132172107696533,grad_norm: 0.7209799488037959, iteration: 450356
loss: 0.9894987344741821,grad_norm: 0.7231567818933767, iteration: 450357
loss: 0.9882533550262451,grad_norm: 0.7365318363143453, iteration: 450358
loss: 0.9922269582748413,grad_norm: 0.706504222454945, iteration: 450359
loss: 1.105574607849121,grad_norm: 0.9999999590286989, iteration: 450360
loss: 1.0353517532348633,grad_norm: 0.8606419247821429, iteration: 450361
loss: 1.0042227506637573,grad_norm: 0.8804452720603372, iteration: 450362
loss: 1.0072863101959229,grad_norm: 0.8718370819157132, iteration: 450363
loss: 1.0207633972167969,grad_norm: 0.8038655784260315, iteration: 450364
loss: 0.9947067499160767,grad_norm: 0.8287361835706171, iteration: 450365
loss: 0.978378415107727,grad_norm: 0.9999996149370548, iteration: 450366
loss: 0.9645802974700928,grad_norm: 0.8535828433647465, iteration: 450367
loss: 1.0019335746765137,grad_norm: 0.7062419166936402, iteration: 450368
loss: 1.0424466133117676,grad_norm: 0.941594909553311, iteration: 450369
loss: 1.0015212297439575,grad_norm: 0.72032731003541, iteration: 450370
loss: 1.0195266008377075,grad_norm: 0.8287402040685273, iteration: 450371
loss: 1.1042145490646362,grad_norm: 0.9999992915218072, iteration: 450372
loss: 1.0040334463119507,grad_norm: 0.6018863250214291, iteration: 450373
loss: 0.9940652251243591,grad_norm: 0.9451302173457423, iteration: 450374
loss: 1.1609218120574951,grad_norm: 0.9999991485169372, iteration: 450375
loss: 1.0112653970718384,grad_norm: 0.8521936462849531, iteration: 450376
loss: 1.0303349494934082,grad_norm: 0.9999991816667175, iteration: 450377
loss: 1.0074958801269531,grad_norm: 0.9999996326446272, iteration: 450378
loss: 1.0262876749038696,grad_norm: 0.7478496937346782, iteration: 450379
loss: 0.9825621843338013,grad_norm: 0.7059265055535205, iteration: 450380
loss: 0.992517352104187,grad_norm: 0.8274287177091051, iteration: 450381
loss: 1.0085947513580322,grad_norm: 0.9999997009045878, iteration: 450382
loss: 1.0739243030548096,grad_norm: 0.9999990929075865, iteration: 450383
loss: 0.991789698600769,grad_norm: 0.8547562269984169, iteration: 450384
loss: 1.0546523332595825,grad_norm: 0.6830351642740292, iteration: 450385
loss: 1.0240602493286133,grad_norm: 0.9999997151925258, iteration: 450386
loss: 1.0039790868759155,grad_norm: 0.7398791568038514, iteration: 450387
loss: 1.0140745639801025,grad_norm: 0.7098852129252246, iteration: 450388
loss: 1.0883198976516724,grad_norm: 0.8747204638759254, iteration: 450389
loss: 1.0938926935195923,grad_norm: 0.8909887058349178, iteration: 450390
loss: 1.0053884983062744,grad_norm: 0.7049808094632122, iteration: 450391
loss: 1.0398783683776855,grad_norm: 0.8881385505608727, iteration: 450392
loss: 0.9673434495925903,grad_norm: 0.7681320301610062, iteration: 450393
loss: 1.0056281089782715,grad_norm: 0.8069974029801249, iteration: 450394
loss: 1.0345340967178345,grad_norm: 0.6891820092325217, iteration: 450395
loss: 1.0034877061843872,grad_norm: 0.6956487753752866, iteration: 450396
loss: 0.9731579422950745,grad_norm: 0.8991512965165979, iteration: 450397
loss: 1.0084331035614014,grad_norm: 0.7556812356600303, iteration: 450398
loss: 0.9968199729919434,grad_norm: 0.6858799950553259, iteration: 450399
loss: 1.0043727159500122,grad_norm: 0.6995078503139424, iteration: 450400
loss: 1.0372499227523804,grad_norm: 0.8089263211398702, iteration: 450401
loss: 1.0256558656692505,grad_norm: 0.7919083445398664, iteration: 450402
loss: 1.033724069595337,grad_norm: 0.9346399216962108, iteration: 450403
loss: 0.9919633269309998,grad_norm: 0.7308871921238289, iteration: 450404
loss: 1.0181483030319214,grad_norm: 0.698646178973505, iteration: 450405
loss: 1.0099458694458008,grad_norm: 0.9999999025614625, iteration: 450406
loss: 1.0049536228179932,grad_norm: 0.7604266055436077, iteration: 450407
loss: 1.016518473625183,grad_norm: 0.8857525975267972, iteration: 450408
loss: 0.9754182696342468,grad_norm: 0.8056034270983603, iteration: 450409
loss: 1.0255051851272583,grad_norm: 0.8581630255148321, iteration: 450410
loss: 1.0193259716033936,grad_norm: 0.9999994342394005, iteration: 450411
loss: 1.007846713066101,grad_norm: 0.6794649637697294, iteration: 450412
loss: 0.9823741912841797,grad_norm: 0.8056117043877549, iteration: 450413
loss: 1.019286870956421,grad_norm: 0.8335516771433887, iteration: 450414
loss: 1.0758147239685059,grad_norm: 0.9999992885997305, iteration: 450415
loss: 0.9734956622123718,grad_norm: 0.6450397382240809, iteration: 450416
loss: 0.9930559992790222,grad_norm: 0.8437809173457772, iteration: 450417
loss: 0.9953213930130005,grad_norm: 0.6588791710267027, iteration: 450418
loss: 1.0140970945358276,grad_norm: 0.5964961767840482, iteration: 450419
loss: 1.025774359703064,grad_norm: 0.9999994508845658, iteration: 450420
loss: 0.9866726994514465,grad_norm: 0.7246586876132143, iteration: 450421
loss: 1.0261865854263306,grad_norm: 0.827943730183709, iteration: 450422
loss: 1.009484052658081,grad_norm: 0.6768841603777506, iteration: 450423
loss: 1.0050880908966064,grad_norm: 0.8354391343468724, iteration: 450424
loss: 1.006135106086731,grad_norm: 0.929174820770026, iteration: 450425
loss: 1.0190925598144531,grad_norm: 0.9999998482681911, iteration: 450426
loss: 1.0177241563796997,grad_norm: 0.9587922428503605, iteration: 450427
loss: 1.017356514930725,grad_norm: 0.9220380036812795, iteration: 450428
loss: 0.9936856627464294,grad_norm: 0.696691511178488, iteration: 450429
loss: 0.9882352948188782,grad_norm: 0.6474654892413217, iteration: 450430
loss: 1.006149172782898,grad_norm: 0.9493575363678315, iteration: 450431
loss: 0.9859219789505005,grad_norm: 0.842694997302965, iteration: 450432
loss: 1.0069279670715332,grad_norm: 1.0000000178294808, iteration: 450433
loss: 1.0256929397583008,grad_norm: 0.8065439889720333, iteration: 450434
loss: 0.9536147713661194,grad_norm: 0.8838195071031274, iteration: 450435
loss: 0.9756107330322266,grad_norm: 0.8239555320471932, iteration: 450436
loss: 1.0215095281600952,grad_norm: 0.6513589797977145, iteration: 450437
loss: 0.9797031283378601,grad_norm: 0.8045972831275296, iteration: 450438
loss: 1.0090112686157227,grad_norm: 0.7977246840637916, iteration: 450439
loss: 1.0162017345428467,grad_norm: 0.9999996530138459, iteration: 450440
loss: 0.9998817443847656,grad_norm: 0.7837430553165733, iteration: 450441
loss: 0.9858088493347168,grad_norm: 0.7939222563509659, iteration: 450442
loss: 1.011491298675537,grad_norm: 0.6958229966655239, iteration: 450443
loss: 1.013479232788086,grad_norm: 0.7395770215643672, iteration: 450444
loss: 0.9903194308280945,grad_norm: 0.7630374135276611, iteration: 450445
loss: 0.9649337530136108,grad_norm: 0.7284147955762101, iteration: 450446
loss: 0.9733995199203491,grad_norm: 0.8056054475367255, iteration: 450447
loss: 1.190195083618164,grad_norm: 0.9999996719346623, iteration: 450448
loss: 1.1152002811431885,grad_norm: 0.9999992087474292, iteration: 450449
loss: 1.0064518451690674,grad_norm: 0.999999235983567, iteration: 450450
loss: 0.9933806657791138,grad_norm: 0.636204431695311, iteration: 450451
loss: 1.0349370241165161,grad_norm: 0.9999997435448439, iteration: 450452
loss: 1.0238484144210815,grad_norm: 0.5930774546724084, iteration: 450453
loss: 0.9707326889038086,grad_norm: 0.8636095654956848, iteration: 450454
loss: 1.0237221717834473,grad_norm: 0.7803583695769999, iteration: 450455
loss: 1.1124870777130127,grad_norm: 0.8148339275514936, iteration: 450456
loss: 1.02573561668396,grad_norm: 0.8905101090644204, iteration: 450457
loss: 0.9840055108070374,grad_norm: 0.6765886806423365, iteration: 450458
loss: 1.0807985067367554,grad_norm: 0.9999991971235814, iteration: 450459
loss: 1.03290855884552,grad_norm: 0.7933851798558872, iteration: 450460
loss: 1.007991075515747,grad_norm: 0.7458684570543208, iteration: 450461
loss: 1.0299497842788696,grad_norm: 0.6279973535291917, iteration: 450462
loss: 0.9893554449081421,grad_norm: 0.7568668462557434, iteration: 450463
loss: 0.9967297911643982,grad_norm: 0.8035665104411488, iteration: 450464
loss: 0.977482795715332,grad_norm: 0.7253074799264504, iteration: 450465
loss: 1.020102620124817,grad_norm: 0.9999990633523788, iteration: 450466
loss: 0.9927682280540466,grad_norm: 0.7575574016436896, iteration: 450467
loss: 1.0651413202285767,grad_norm: 0.9999992398770879, iteration: 450468
loss: 0.9691948890686035,grad_norm: 0.8039242963493692, iteration: 450469
loss: 1.0124815702438354,grad_norm: 0.671023329654246, iteration: 450470
loss: 1.0069524049758911,grad_norm: 0.877658983413846, iteration: 450471
loss: 0.9819329380989075,grad_norm: 0.7637867795907056, iteration: 450472
loss: 0.9805837869644165,grad_norm: 0.7696493819177184, iteration: 450473
loss: 1.0012669563293457,grad_norm: 0.7934235228360862, iteration: 450474
loss: 1.005486249923706,grad_norm: 0.7685390470434527, iteration: 450475
loss: 1.0203394889831543,grad_norm: 0.9999990552726673, iteration: 450476
loss: 1.028396487236023,grad_norm: 0.9999994231005173, iteration: 450477
loss: 0.9690877199172974,grad_norm: 0.7342567643446278, iteration: 450478
loss: 0.9618161916732788,grad_norm: 0.8640210914973774, iteration: 450479
loss: 1.0164830684661865,grad_norm: 0.7597242450595293, iteration: 450480
loss: 1.1477065086364746,grad_norm: 0.9999992194465934, iteration: 450481
loss: 0.9998675584793091,grad_norm: 0.7556492743808264, iteration: 450482
loss: 1.0163600444793701,grad_norm: 0.6804934739973401, iteration: 450483
loss: 1.0637826919555664,grad_norm: 0.8078511307728061, iteration: 450484
loss: 1.05720853805542,grad_norm: 0.999999494607213, iteration: 450485
loss: 0.9838559031486511,grad_norm: 0.8751060795183064, iteration: 450486
loss: 0.9829272627830505,grad_norm: 0.997414419843389, iteration: 450487
loss: 1.020218849182129,grad_norm: 0.9999992894645979, iteration: 450488
loss: 1.0579438209533691,grad_norm: 0.9999994251421087, iteration: 450489
loss: 1.0254582166671753,grad_norm: 0.80121421160565, iteration: 450490
loss: 0.9879748821258545,grad_norm: 0.9834538681521926, iteration: 450491
loss: 0.9648443460464478,grad_norm: 0.9999999207681936, iteration: 450492
loss: 1.0736173391342163,grad_norm: 0.9290776184669108, iteration: 450493
loss: 0.9858418107032776,grad_norm: 0.9047293577053396, iteration: 450494
loss: 1.0136234760284424,grad_norm: 0.8225950223018248, iteration: 450495
loss: 0.9679829478263855,grad_norm: 0.8142103599184557, iteration: 450496
loss: 1.0562812089920044,grad_norm: 0.781974665843088, iteration: 450497
loss: 0.959045946598053,grad_norm: 0.7095656300067811, iteration: 450498
loss: 0.9510905742645264,grad_norm: 0.7907567135061362, iteration: 450499
loss: 1.0202163457870483,grad_norm: 0.7318093242556387, iteration: 450500
loss: 0.9950663447380066,grad_norm: 0.654837956874715, iteration: 450501
loss: 1.0219309329986572,grad_norm: 0.766487574559202, iteration: 450502
loss: 1.0247167348861694,grad_norm: 0.7482888803158783, iteration: 450503
loss: 1.0108846426010132,grad_norm: 0.8841421563417722, iteration: 450504
loss: 1.003539800643921,grad_norm: 0.6914085076314245, iteration: 450505
loss: 1.003299593925476,grad_norm: 0.7200384679620226, iteration: 450506
loss: 1.0104957818984985,grad_norm: 0.7127419719076276, iteration: 450507
loss: 0.9969149231910706,grad_norm: 0.8521294130448676, iteration: 450508
loss: 0.9948524832725525,grad_norm: 0.7553293222150601, iteration: 450509
loss: 0.9503069519996643,grad_norm: 0.8875634254433595, iteration: 450510
loss: 1.0055696964263916,grad_norm: 0.9805929390984492, iteration: 450511
loss: 0.9740107655525208,grad_norm: 0.8285737822416164, iteration: 450512
loss: 1.0949313640594482,grad_norm: 0.7966382821464096, iteration: 450513
loss: 1.0003262758255005,grad_norm: 0.812589223136293, iteration: 450514
loss: 1.056084156036377,grad_norm: 0.9227036581551966, iteration: 450515
loss: 0.9986000061035156,grad_norm: 0.9999994652036748, iteration: 450516
loss: 1.0547711849212646,grad_norm: 0.9999994018711497, iteration: 450517
loss: 1.0116462707519531,grad_norm: 0.7555553324491081, iteration: 450518
loss: 0.9855791330337524,grad_norm: 0.6329097942448405, iteration: 450519
loss: 0.964310348033905,grad_norm: 0.9516587887134185, iteration: 450520
loss: 0.9915734529495239,grad_norm: 0.787322818602096, iteration: 450521
loss: 0.9867327213287354,grad_norm: 0.8137624710625708, iteration: 450522
loss: 1.026192307472229,grad_norm: 0.9825981457764211, iteration: 450523
loss: 1.0244345664978027,grad_norm: 0.9999993236581768, iteration: 450524
loss: 1.0391322374343872,grad_norm: 0.8221695571463267, iteration: 450525
loss: 0.9662833213806152,grad_norm: 0.6768619988322148, iteration: 450526
loss: 1.1438919305801392,grad_norm: 0.9999998270447802, iteration: 450527
loss: 0.9640593528747559,grad_norm: 0.6431354046545613, iteration: 450528
loss: 0.9903949499130249,grad_norm: 0.668389735942056, iteration: 450529
loss: 1.0108274221420288,grad_norm: 0.7611762725978823, iteration: 450530
loss: 0.9864890575408936,grad_norm: 0.7877346920385044, iteration: 450531
loss: 0.998222291469574,grad_norm: 0.7914182296727564, iteration: 450532
loss: 1.0063529014587402,grad_norm: 0.657756877097766, iteration: 450533
loss: 1.0170745849609375,grad_norm: 0.9999996104344515, iteration: 450534
loss: 1.000433325767517,grad_norm: 0.7544380708643, iteration: 450535
loss: 1.0033119916915894,grad_norm: 0.9064873025396111, iteration: 450536
loss: 0.990021824836731,grad_norm: 0.865054991434148, iteration: 450537
loss: 0.9562415480613708,grad_norm: 0.9086603193372083, iteration: 450538
loss: 1.0402852296829224,grad_norm: 0.8059321437229681, iteration: 450539
loss: 1.0211601257324219,grad_norm: 0.9999995705998037, iteration: 450540
loss: 1.1222851276397705,grad_norm: 0.9999994486921614, iteration: 450541
loss: 0.963972806930542,grad_norm: 0.6885311327561601, iteration: 450542
loss: 1.0018410682678223,grad_norm: 0.8705086814373731, iteration: 450543
loss: 1.0031520128250122,grad_norm: 0.8982647724935507, iteration: 450544
loss: 0.9705223441123962,grad_norm: 0.9999995195727845, iteration: 450545
loss: 1.024186611175537,grad_norm: 0.98491365667922, iteration: 450546
loss: 1.079531192779541,grad_norm: 0.7748394287877582, iteration: 450547
loss: 1.0045899152755737,grad_norm: 0.9999992495719368, iteration: 450548
loss: 0.995474636554718,grad_norm: 0.646535732431907, iteration: 450549
loss: 0.9999823570251465,grad_norm: 0.8887010680123256, iteration: 450550
loss: 0.9821033477783203,grad_norm: 0.9999992049332382, iteration: 450551
loss: 0.9781603813171387,grad_norm: 0.776722814029317, iteration: 450552
loss: 0.9653522968292236,grad_norm: 0.7348946879272631, iteration: 450553
loss: 0.996895432472229,grad_norm: 0.7739987426022563, iteration: 450554
loss: 0.9974287152290344,grad_norm: 0.5964596288798637, iteration: 450555
loss: 0.958227813243866,grad_norm: 0.7447249554012502, iteration: 450556
loss: 1.0983959436416626,grad_norm: 0.9999997722391457, iteration: 450557
loss: 0.9908672571182251,grad_norm: 0.8578749833157265, iteration: 450558
loss: 0.9650115966796875,grad_norm: 0.6651224411456982, iteration: 450559
loss: 0.990746259689331,grad_norm: 0.8034287004579024, iteration: 450560
loss: 1.0154039859771729,grad_norm: 0.8252751729659784, iteration: 450561
loss: 1.0061966180801392,grad_norm: 0.7413741317060708, iteration: 450562
loss: 0.9960684180259705,grad_norm: 0.7498929778799991, iteration: 450563
loss: 1.1098123788833618,grad_norm: 0.9999998012509443, iteration: 450564
loss: 1.0081393718719482,grad_norm: 0.9999991046717437, iteration: 450565
loss: 1.025044560432434,grad_norm: 0.8385107304848427, iteration: 450566
loss: 1.072052240371704,grad_norm: 0.8020097026267373, iteration: 450567
loss: 0.9898635745048523,grad_norm: 0.8100995837529166, iteration: 450568
loss: 0.9772055745124817,grad_norm: 0.999999178099004, iteration: 450569
loss: 0.9768210053443909,grad_norm: 0.7576265317157131, iteration: 450570
loss: 1.0124752521514893,grad_norm: 0.8033184775269055, iteration: 450571
loss: 0.9713379740715027,grad_norm: 0.7111938576069611, iteration: 450572
loss: 1.0846978425979614,grad_norm: 0.7983794298961335, iteration: 450573
loss: 1.012287974357605,grad_norm: 0.8468825470090011, iteration: 450574
loss: 1.0339438915252686,grad_norm: 0.8464544189534281, iteration: 450575
loss: 0.9914537668228149,grad_norm: 0.7195239169087336, iteration: 450576
loss: 0.9733718633651733,grad_norm: 0.860724756661392, iteration: 450577
loss: 1.0175492763519287,grad_norm: 0.630683312254721, iteration: 450578
loss: 1.083727478981018,grad_norm: 0.9293067026318215, iteration: 450579
loss: 0.9955408573150635,grad_norm: 0.8985514641756227, iteration: 450580
loss: 1.047343134880066,grad_norm: 0.9363810241686137, iteration: 450581
loss: 1.01034414768219,grad_norm: 0.8909213089751059, iteration: 450582
loss: 1.0071419477462769,grad_norm: 0.7016258189990974, iteration: 450583
loss: 1.075973629951477,grad_norm: 0.809986699833781, iteration: 450584
loss: 1.0132386684417725,grad_norm: 0.7012400581008313, iteration: 450585
loss: 0.9775246381759644,grad_norm: 0.8359659945723408, iteration: 450586
loss: 0.9902902245521545,grad_norm: 0.6461023831799778, iteration: 450587
loss: 1.024410367012024,grad_norm: 0.9999991498788157, iteration: 450588
loss: 1.0158244371414185,grad_norm: 0.7901710495868496, iteration: 450589
loss: 1.0100642442703247,grad_norm: 0.7261981273321956, iteration: 450590
loss: 0.9739404320716858,grad_norm: 0.9999991158590271, iteration: 450591
loss: 1.0131802558898926,grad_norm: 0.7833242267316657, iteration: 450592
loss: 1.0260426998138428,grad_norm: 0.7031175297671701, iteration: 450593
loss: 1.0066384077072144,grad_norm: 0.7543079033645578, iteration: 450594
loss: 1.0665627717971802,grad_norm: 0.9999990586316587, iteration: 450595
loss: 1.0173158645629883,grad_norm: 0.9999997941734137, iteration: 450596
loss: 1.0019913911819458,grad_norm: 0.7654056504772181, iteration: 450597
loss: 0.9876076579093933,grad_norm: 0.9999996929666857, iteration: 450598
loss: 1.0450644493103027,grad_norm: 0.7758951896177765, iteration: 450599
loss: 0.9875988364219666,grad_norm: 0.7735190900992971, iteration: 450600
loss: 1.0256295204162598,grad_norm: 0.6966874622715787, iteration: 450601
loss: 0.996255099773407,grad_norm: 0.7982329622209436, iteration: 450602
loss: 0.9940645098686218,grad_norm: 0.6937665766155106, iteration: 450603
loss: 1.035030484199524,grad_norm: 0.768234560703688, iteration: 450604
loss: 0.9855220317840576,grad_norm: 0.9068973902855619, iteration: 450605
loss: 1.0228307247161865,grad_norm: 0.9378516922552135, iteration: 450606
loss: 0.9522537589073181,grad_norm: 0.8335498194053657, iteration: 450607
loss: 1.0149118900299072,grad_norm: 0.789334751895876, iteration: 450608
loss: 1.024932861328125,grad_norm: 0.999999556412882, iteration: 450609
loss: 1.0149164199829102,grad_norm: 0.7880225260590976, iteration: 450610
loss: 1.036708116531372,grad_norm: 0.999999262256355, iteration: 450611
loss: 1.0130608081817627,grad_norm: 0.8668509235093238, iteration: 450612
loss: 1.0001702308654785,grad_norm: 0.9999991478236324, iteration: 450613
loss: 1.0147768259048462,grad_norm: 0.9999994235087658, iteration: 450614
loss: 0.9837292432785034,grad_norm: 0.7596172078215719, iteration: 450615
loss: 0.9891597032546997,grad_norm: 0.6871524086711603, iteration: 450616
loss: 1.0316964387893677,grad_norm: 0.6533612602425157, iteration: 450617
loss: 1.0314961671829224,grad_norm: 0.8039847954112583, iteration: 450618
loss: 1.0286051034927368,grad_norm: 0.7192739362074636, iteration: 450619
loss: 0.992132842540741,grad_norm: 0.7679339472772106, iteration: 450620
loss: 1.0425622463226318,grad_norm: 0.7705358065931076, iteration: 450621
loss: 1.008644461631775,grad_norm: 0.8350924984278905, iteration: 450622
loss: 0.9878237247467041,grad_norm: 0.6644262988265457, iteration: 450623
loss: 0.9967901110649109,grad_norm: 0.9982900305180451, iteration: 450624
loss: 1.0347647666931152,grad_norm: 0.7109178023564513, iteration: 450625
loss: 0.9969662427902222,grad_norm: 0.7058419548049335, iteration: 450626
loss: 1.007717251777649,grad_norm: 0.8612771334248294, iteration: 450627
loss: 0.9725891351699829,grad_norm: 0.6640832152509396, iteration: 450628
loss: 0.9975050687789917,grad_norm: 0.9999995739586082, iteration: 450629
loss: 0.9864872097969055,grad_norm: 0.7618894709131991, iteration: 450630
loss: 0.9732146859169006,grad_norm: 0.7195331721057215, iteration: 450631
loss: 0.9831969141960144,grad_norm: 0.6061693631330363, iteration: 450632
loss: 1.0049173831939697,grad_norm: 0.7030291969042849, iteration: 450633
loss: 0.9903845191001892,grad_norm: 0.7096640275759616, iteration: 450634
loss: 1.0549070835113525,grad_norm: 0.9999999033898018, iteration: 450635
loss: 0.9933755993843079,grad_norm: 0.9999998723305088, iteration: 450636
loss: 1.0043221712112427,grad_norm: 0.9162861620784762, iteration: 450637
loss: 0.954423189163208,grad_norm: 0.7030780451780295, iteration: 450638
loss: 1.0107723474502563,grad_norm: 0.9999990965781277, iteration: 450639
loss: 1.0537395477294922,grad_norm: 0.8529497232741655, iteration: 450640
loss: 1.2372910976409912,grad_norm: 0.9999996634961806, iteration: 450641
loss: 1.0682463645935059,grad_norm: 0.6941818828888507, iteration: 450642
loss: 1.0134170055389404,grad_norm: 0.7097833148986631, iteration: 450643
loss: 1.0194997787475586,grad_norm: 0.7056175720870534, iteration: 450644
loss: 0.9935140609741211,grad_norm: 0.6648791479945212, iteration: 450645
loss: 1.0170236825942993,grad_norm: 0.7938210483095027, iteration: 450646
loss: 0.9895085692405701,grad_norm: 0.7846281849200222, iteration: 450647
loss: 0.9662096500396729,grad_norm: 0.5847113456899368, iteration: 450648
loss: 0.9989949464797974,grad_norm: 0.7918726413071308, iteration: 450649
loss: 1.0177247524261475,grad_norm: 0.8758155459765489, iteration: 450650
loss: 1.029813528060913,grad_norm: 0.8902246202460941, iteration: 450651
loss: 0.9729575514793396,grad_norm: 0.8144496027159517, iteration: 450652
loss: 1.0256444215774536,grad_norm: 0.9999991747112291, iteration: 450653
loss: 1.0396665334701538,grad_norm: 0.8401273132902766, iteration: 450654
loss: 0.9984989166259766,grad_norm: 0.7226949003466236, iteration: 450655
loss: 1.0046608448028564,grad_norm: 0.7441777009554604, iteration: 450656
loss: 0.9874855875968933,grad_norm: 0.881941799498061, iteration: 450657
loss: 1.0168172121047974,grad_norm: 0.7255921596409631, iteration: 450658
loss: 1.0375112295150757,grad_norm: 0.7510822291275988, iteration: 450659
loss: 0.9913593530654907,grad_norm: 0.9999991332431986, iteration: 450660
loss: 1.0123291015625,grad_norm: 0.9999994434046741, iteration: 450661
loss: 0.9727008938789368,grad_norm: 0.8706679718470053, iteration: 450662
loss: 0.9663102626800537,grad_norm: 0.7210994750691867, iteration: 450663
loss: 0.9650551080703735,grad_norm: 0.6740021414038571, iteration: 450664
loss: 0.9812647104263306,grad_norm: 0.7875799529632238, iteration: 450665
loss: 1.0625567436218262,grad_norm: 0.9236641976402795, iteration: 450666
loss: 0.9671862125396729,grad_norm: 0.7403614219158132, iteration: 450667
loss: 0.9927587509155273,grad_norm: 0.9999991271297611, iteration: 450668
loss: 1.0014972686767578,grad_norm: 0.81213244212592, iteration: 450669
loss: 0.9923181533813477,grad_norm: 0.7646428832245428, iteration: 450670
loss: 1.0114256143569946,grad_norm: 0.8928497134895614, iteration: 450671
loss: 1.0171823501586914,grad_norm: 0.6489321633900377, iteration: 450672
loss: 1.0155727863311768,grad_norm: 0.6993805484453826, iteration: 450673
loss: 1.0184299945831299,grad_norm: 0.790257905639564, iteration: 450674
loss: 1.0026768445968628,grad_norm: 0.6697441067627106, iteration: 450675
loss: 0.9974019527435303,grad_norm: 0.7207954116416225, iteration: 450676
loss: 0.9787814617156982,grad_norm: 0.6772379137660453, iteration: 450677
loss: 0.9971475005149841,grad_norm: 0.7741556442526772, iteration: 450678
loss: 1.0069295167922974,grad_norm: 0.8531419820829431, iteration: 450679
loss: 1.0206204652786255,grad_norm: 0.7857461424364931, iteration: 450680
loss: 1.029181718826294,grad_norm: 0.8380333582584172, iteration: 450681
loss: 1.0402684211730957,grad_norm: 0.9999995418483403, iteration: 450682
loss: 1.0745856761932373,grad_norm: 0.8396091071791968, iteration: 450683
loss: 1.0200650691986084,grad_norm: 0.7704653043234766, iteration: 450684
loss: 0.9724590182304382,grad_norm: 0.82564318205561, iteration: 450685
loss: 0.9943088293075562,grad_norm: 0.8012984630674254, iteration: 450686
loss: 1.0169183015823364,grad_norm: 0.9323903923344438, iteration: 450687
loss: 1.1435017585754395,grad_norm: 0.9999996518638891, iteration: 450688
loss: 1.036336898803711,grad_norm: 0.8268423355878332, iteration: 450689
loss: 0.9959259033203125,grad_norm: 0.9999990808743549, iteration: 450690
loss: 1.001510500907898,grad_norm: 0.7065633322895801, iteration: 450691
loss: 0.9751113653182983,grad_norm: 0.8463560235521659, iteration: 450692
loss: 0.9893020391464233,grad_norm: 0.9999989461795773, iteration: 450693
loss: 1.0061699151992798,grad_norm: 0.7914602682114991, iteration: 450694
loss: 1.0031129121780396,grad_norm: 0.8615857660014007, iteration: 450695
loss: 1.0009946823120117,grad_norm: 0.7381089266040902, iteration: 450696
loss: 1.0405298471450806,grad_norm: 0.6468338487919345, iteration: 450697
loss: 0.9704559445381165,grad_norm: 0.8172553260004228, iteration: 450698
loss: 1.0082579851150513,grad_norm: 0.9588493409603936, iteration: 450699
loss: 1.0026987791061401,grad_norm: 0.7748249956129856, iteration: 450700
loss: 1.0304691791534424,grad_norm: 0.6239056876743279, iteration: 450701
loss: 1.01535964012146,grad_norm: 0.7931745652666563, iteration: 450702
loss: 0.9953185319900513,grad_norm: 0.8776699691019769, iteration: 450703
loss: 0.9808123111724854,grad_norm: 0.7719363990606405, iteration: 450704
loss: 0.9809496998786926,grad_norm: 0.7955853068026796, iteration: 450705
loss: 0.9881800413131714,grad_norm: 0.7199767044781851, iteration: 450706
loss: 1.0850852727890015,grad_norm: 0.9999998224042663, iteration: 450707
loss: 1.0122071504592896,grad_norm: 0.6399067537319796, iteration: 450708
loss: 1.0062364339828491,grad_norm: 0.8098998531264816, iteration: 450709
loss: 0.9845016598701477,grad_norm: 0.7036715201203928, iteration: 450710
loss: 1.0759199857711792,grad_norm: 1.0000000559756919, iteration: 450711
loss: 1.0381107330322266,grad_norm: 0.9073156096004956, iteration: 450712
loss: 1.0016217231750488,grad_norm: 0.6817606915595088, iteration: 450713
loss: 1.0072088241577148,grad_norm: 0.8027363340196998, iteration: 450714
loss: 0.972508430480957,grad_norm: 0.9067377928383259, iteration: 450715
loss: 1.0444841384887695,grad_norm: 0.7630955221568956, iteration: 450716
loss: 0.9737553000450134,grad_norm: 0.8379509840105525, iteration: 450717
loss: 0.9867068529129028,grad_norm: 0.78756632850642, iteration: 450718
loss: 1.0108219385147095,grad_norm: 0.7707681339951981, iteration: 450719
loss: 1.0215373039245605,grad_norm: 0.7392657956687859, iteration: 450720
loss: 0.9858285784721375,grad_norm: 0.7264038144589975, iteration: 450721
loss: 1.0015521049499512,grad_norm: 0.9999998210039976, iteration: 450722
loss: 0.9918232560157776,grad_norm: 0.8119769697636515, iteration: 450723
loss: 1.0079981088638306,grad_norm: 0.8394960170308204, iteration: 450724
loss: 1.0142836570739746,grad_norm: 0.7549395474095721, iteration: 450725
loss: 0.9922431707382202,grad_norm: 0.8097808709147287, iteration: 450726
loss: 0.9900128245353699,grad_norm: 0.6595131448683326, iteration: 450727
loss: 1.0197608470916748,grad_norm: 0.8564597025377787, iteration: 450728
loss: 1.024918556213379,grad_norm: 0.7267554997538568, iteration: 450729
loss: 1.0219876766204834,grad_norm: 0.795148960469049, iteration: 450730
loss: 0.9871229529380798,grad_norm: 0.722018024268893, iteration: 450731
loss: 0.997523844242096,grad_norm: 0.7217151171390385, iteration: 450732
loss: 1.0156099796295166,grad_norm: 0.6255552854385636, iteration: 450733
loss: 1.1989136934280396,grad_norm: 0.9307882445661417, iteration: 450734
loss: 0.9652828574180603,grad_norm: 0.7391990423502374, iteration: 450735
loss: 0.9929400682449341,grad_norm: 0.8129294413387268, iteration: 450736
loss: 0.9911166429519653,grad_norm: 0.6748071281571965, iteration: 450737
loss: 0.9952689409255981,grad_norm: 0.8071594392825551, iteration: 450738
loss: 1.001584529876709,grad_norm: 0.8654534944388349, iteration: 450739
loss: 0.99360191822052,grad_norm: 0.6512623067029866, iteration: 450740
loss: 1.0482895374298096,grad_norm: 0.999999737475027, iteration: 450741
loss: 0.9991334080696106,grad_norm: 0.6152591146165773, iteration: 450742
loss: 1.0227649211883545,grad_norm: 0.7118622331064566, iteration: 450743
loss: 1.0116392374038696,grad_norm: 0.9004521421257883, iteration: 450744
loss: 1.004592776298523,grad_norm: 0.6799382967743414, iteration: 450745
loss: 0.9879066348075867,grad_norm: 0.7822896970964189, iteration: 450746
loss: 0.9920057654380798,grad_norm: 0.8218714835487251, iteration: 450747
loss: 1.0391429662704468,grad_norm: 0.9999997415510945, iteration: 450748
loss: 0.9893144965171814,grad_norm: 0.7523968338104626, iteration: 450749
loss: 1.004217505455017,grad_norm: 0.7570311998208675, iteration: 450750
loss: 1.0004812479019165,grad_norm: 0.7769556803298948, iteration: 450751
loss: 1.0062006711959839,grad_norm: 0.7775607106314839, iteration: 450752
loss: 0.9816917777061462,grad_norm: 0.6984663806932693, iteration: 450753
loss: 1.009562611579895,grad_norm: 0.7301370084486859, iteration: 450754
loss: 1.0008224248886108,grad_norm: 0.6796394724214576, iteration: 450755
loss: 0.9988530278205872,grad_norm: 0.8222893996248254, iteration: 450756
loss: 1.011676549911499,grad_norm: 0.7033306537802472, iteration: 450757
loss: 1.0187073945999146,grad_norm: 0.7112617995677345, iteration: 450758
loss: 1.0497266054153442,grad_norm: 0.7466395740577368, iteration: 450759
loss: 1.0050410032272339,grad_norm: 0.7403952731073907, iteration: 450760
loss: 0.9854822158813477,grad_norm: 0.7455051571780911, iteration: 450761
loss: 0.989632785320282,grad_norm: 0.7697558841526156, iteration: 450762
loss: 1.0126043558120728,grad_norm: 0.9341831195198401, iteration: 450763
loss: 0.9877069592475891,grad_norm: 0.717584025789193, iteration: 450764
loss: 0.9995040893554688,grad_norm: 0.7108006722530278, iteration: 450765
loss: 1.0416536331176758,grad_norm: 0.83827522256491, iteration: 450766
loss: 1.002303123474121,grad_norm: 0.9332895142690966, iteration: 450767
loss: 1.0496692657470703,grad_norm: 0.9999992720321691, iteration: 450768
loss: 1.0143604278564453,grad_norm: 0.6174280842006814, iteration: 450769
loss: 0.9893348813056946,grad_norm: 0.9342948569816746, iteration: 450770
loss: 1.0068106651306152,grad_norm: 0.8837203968146127, iteration: 450771
loss: 1.005640983581543,grad_norm: 0.7517961931481177, iteration: 450772
loss: 0.9624550342559814,grad_norm: 0.6817270841792643, iteration: 450773
loss: 0.9555033445358276,grad_norm: 0.6514313485404305, iteration: 450774
loss: 1.094698190689087,grad_norm: 0.8314309946133315, iteration: 450775
loss: 1.0077478885650635,grad_norm: 0.9098751626167829, iteration: 450776
loss: 1.00924813747406,grad_norm: 0.8411475564805176, iteration: 450777
loss: 1.0271728038787842,grad_norm: 0.8330174933276998, iteration: 450778
loss: 1.0076571702957153,grad_norm: 0.7561622241682874, iteration: 450779
loss: 1.0021644830703735,grad_norm: 0.6087415429668213, iteration: 450780
loss: 1.0084736347198486,grad_norm: 0.7698702511013623, iteration: 450781
loss: 1.017494559288025,grad_norm: 0.7868543559374586, iteration: 450782
loss: 1.0006448030471802,grad_norm: 0.6254860372735065, iteration: 450783
loss: 1.0001099109649658,grad_norm: 0.807762516881986, iteration: 450784
loss: 1.0247466564178467,grad_norm: 0.7337408385667823, iteration: 450785
loss: 0.9625680446624756,grad_norm: 0.7470255442100893, iteration: 450786
loss: 1.0536329746246338,grad_norm: 0.7780814924613968, iteration: 450787
loss: 1.0162420272827148,grad_norm: 0.7971835792919608, iteration: 450788
loss: 0.9962872862815857,grad_norm: 0.7854963976309123, iteration: 450789
loss: 1.119584083557129,grad_norm: 0.9999998458723347, iteration: 450790
loss: 0.9734355211257935,grad_norm: 0.6716958725670049, iteration: 450791
loss: 1.0282378196716309,grad_norm: 0.7031555685156679, iteration: 450792
loss: 1.0023398399353027,grad_norm: 0.6042996285569691, iteration: 450793
loss: 1.0104280710220337,grad_norm: 0.7429433499065669, iteration: 450794
loss: 0.9717652797698975,grad_norm: 0.7463729766711512, iteration: 450795
loss: 0.980549156665802,grad_norm: 0.8547367725031552, iteration: 450796
loss: 0.9809247851371765,grad_norm: 0.7169093649544835, iteration: 450797
loss: 0.9829291701316833,grad_norm: 0.7635290718119297, iteration: 450798
loss: 0.9931382536888123,grad_norm: 0.8943904190265255, iteration: 450799
loss: 1.0021049976348877,grad_norm: 0.8726249620586408, iteration: 450800
loss: 0.967045783996582,grad_norm: 0.663310816650197, iteration: 450801
loss: 0.985187828540802,grad_norm: 0.9158968350090219, iteration: 450802
loss: 0.9935628175735474,grad_norm: 0.7607960037796885, iteration: 450803
loss: 0.9693790078163147,grad_norm: 0.6755772514008206, iteration: 450804
loss: 0.9964685440063477,grad_norm: 0.6892550314712683, iteration: 450805
loss: 0.97322678565979,grad_norm: 0.7658766108642511, iteration: 450806
loss: 1.0116316080093384,grad_norm: 0.7065585278696186, iteration: 450807
loss: 0.9951933026313782,grad_norm: 0.7356178634067436, iteration: 450808
loss: 0.9989715218544006,grad_norm: 0.6288740033136102, iteration: 450809
loss: 1.0104213953018188,grad_norm: 0.759766525356559, iteration: 450810
loss: 0.9948269128799438,grad_norm: 0.8115323409883015, iteration: 450811
loss: 1.0046823024749756,grad_norm: 0.8024561897207781, iteration: 450812
loss: 0.9815866351127625,grad_norm: 0.7912399013807628, iteration: 450813
loss: 1.0125346183776855,grad_norm: 0.6949482050797446, iteration: 450814
loss: 0.9807902574539185,grad_norm: 0.7015416084611921, iteration: 450815
loss: 0.9379215836524963,grad_norm: 0.8794705169590654, iteration: 450816
loss: 1.0040496587753296,grad_norm: 0.7472432049813933, iteration: 450817
loss: 1.017937421798706,grad_norm: 0.8336107616566875, iteration: 450818
loss: 0.9980319738388062,grad_norm: 0.7848619333458475, iteration: 450819
loss: 0.9994098544120789,grad_norm: 0.6705452027060863, iteration: 450820
loss: 1.002811312675476,grad_norm: 0.7503771920053143, iteration: 450821
loss: 0.9975467324256897,grad_norm: 0.7448655168562383, iteration: 450822
loss: 1.0015418529510498,grad_norm: 0.7227095899447111, iteration: 450823
loss: 1.0215877294540405,grad_norm: 0.8350286039851834, iteration: 450824
loss: 1.011032223701477,grad_norm: 0.8256992336017768, iteration: 450825
loss: 1.0476341247558594,grad_norm: 0.748450005240862, iteration: 450826
loss: 0.9968048930168152,grad_norm: 0.883147501450526, iteration: 450827
loss: 0.9715952277183533,grad_norm: 0.7731249497803421, iteration: 450828
loss: 1.1055938005447388,grad_norm: 0.8588459533279564, iteration: 450829
loss: 0.9957314729690552,grad_norm: 0.8126654334097481, iteration: 450830
loss: 1.1077510118484497,grad_norm: 0.9999992784072047, iteration: 450831
loss: 0.9976378083229065,grad_norm: 0.6568989938801618, iteration: 450832
loss: 1.0088608264923096,grad_norm: 0.6561909148422524, iteration: 450833
loss: 0.9855504035949707,grad_norm: 0.7818806076995493, iteration: 450834
loss: 1.0103263854980469,grad_norm: 0.6386035341509955, iteration: 450835
loss: 0.9859481453895569,grad_norm: 0.8274133753355302, iteration: 450836
loss: 1.0276206731796265,grad_norm: 0.7589203292612089, iteration: 450837
loss: 1.0149449110031128,grad_norm: 0.9999991575457191, iteration: 450838
loss: 1.0078773498535156,grad_norm: 0.8239482983683769, iteration: 450839
loss: 1.033203125,grad_norm: 0.9999994968425994, iteration: 450840
loss: 0.9893357753753662,grad_norm: 0.9999997418109939, iteration: 450841
loss: 1.0154980421066284,grad_norm: 0.6625202970835448, iteration: 450842
loss: 0.978230357170105,grad_norm: 0.7175512403854164, iteration: 450843
loss: 1.0221878290176392,grad_norm: 0.734593346876309, iteration: 450844
loss: 1.067049264907837,grad_norm: 0.8841336935387814, iteration: 450845
loss: 0.9567686319351196,grad_norm: 0.81733728179119, iteration: 450846
loss: 0.9686302542686462,grad_norm: 0.8552856915791023, iteration: 450847
loss: 1.00664484500885,grad_norm: 0.6925345974380159, iteration: 450848
loss: 1.055077314376831,grad_norm: 0.8240456961158036, iteration: 450849
loss: 1.0498793125152588,grad_norm: 0.8749000584011173, iteration: 450850
loss: 1.0396651029586792,grad_norm: 0.9412206142532686, iteration: 450851
loss: 0.9874591827392578,grad_norm: 0.6954027839845975, iteration: 450852
loss: 1.0244898796081543,grad_norm: 0.9678781575412085, iteration: 450853
loss: 0.9876449704170227,grad_norm: 0.882537469628692, iteration: 450854
loss: 1.186072826385498,grad_norm: 0.999999929010197, iteration: 450855
loss: 1.0214638710021973,grad_norm: 0.8306251548897547, iteration: 450856
loss: 0.9861906170845032,grad_norm: 0.6406046126702808, iteration: 450857
loss: 0.954816997051239,grad_norm: 0.8454677309634522, iteration: 450858
loss: 1.0286400318145752,grad_norm: 0.7469989205854827, iteration: 450859
loss: 1.025147795677185,grad_norm: 0.8250577593111055, iteration: 450860
loss: 0.9543110728263855,grad_norm: 0.7769928594219715, iteration: 450861
loss: 0.9959837198257446,grad_norm: 0.6757266128833742, iteration: 450862
loss: 1.0322856903076172,grad_norm: 0.8357918699795881, iteration: 450863
loss: 0.9972577095031738,grad_norm: 0.9999995113524127, iteration: 450864
loss: 1.0076568126678467,grad_norm: 0.6581871056802486, iteration: 450865
loss: 1.039318323135376,grad_norm: 0.9999998092357557, iteration: 450866
loss: 1.0014182329177856,grad_norm: 0.6960153981340115, iteration: 450867
loss: 0.9692808389663696,grad_norm: 0.6346044833717818, iteration: 450868
loss: 1.015777349472046,grad_norm: 0.6925829000232705, iteration: 450869
loss: 1.0359838008880615,grad_norm: 0.7943010487299094, iteration: 450870
loss: 1.022610068321228,grad_norm: 0.6927601751768967, iteration: 450871
loss: 1.0241175889968872,grad_norm: 0.9261677508433686, iteration: 450872
loss: 1.0159552097320557,grad_norm: 0.732060698506659, iteration: 450873
loss: 1.0411689281463623,grad_norm: 0.9999990686431225, iteration: 450874
loss: 1.0165660381317139,grad_norm: 0.7373553187838936, iteration: 450875
loss: 0.9949606657028198,grad_norm: 0.7725703913667422, iteration: 450876
loss: 1.0150306224822998,grad_norm: 0.7672596129890321, iteration: 450877
loss: 1.0328775644302368,grad_norm: 0.7386679635376892, iteration: 450878
loss: 0.983092725276947,grad_norm: 0.988002306700582, iteration: 450879
loss: 1.0207933187484741,grad_norm: 0.684712233970556, iteration: 450880
loss: 0.9724077582359314,grad_norm: 0.7306101868293668, iteration: 450881
loss: 1.0143015384674072,grad_norm: 0.6681155768447294, iteration: 450882
loss: 1.0382717847824097,grad_norm: 0.8369341345518782, iteration: 450883
loss: 1.0341002941131592,grad_norm: 0.7931067653598294, iteration: 450884
loss: 0.9985949397087097,grad_norm: 0.7099394585922207, iteration: 450885
loss: 0.9925029873847961,grad_norm: 0.7438264002361601, iteration: 450886
loss: 0.955044150352478,grad_norm: 1.0000001077335863, iteration: 450887
loss: 1.0299510955810547,grad_norm: 0.8284128011687145, iteration: 450888
loss: 0.9948920607566833,grad_norm: 0.6688555221863229, iteration: 450889
loss: 1.0425587892532349,grad_norm: 0.8796107993805755, iteration: 450890
loss: 0.9559274911880493,grad_norm: 0.8730299408107179, iteration: 450891
loss: 0.9886786937713623,grad_norm: 0.8508172174196423, iteration: 450892
loss: 0.9707464575767517,grad_norm: 0.8420142541577583, iteration: 450893
loss: 0.9938057065010071,grad_norm: 0.8091183673686253, iteration: 450894
loss: 1.0579242706298828,grad_norm: 0.999999069159008, iteration: 450895
loss: 1.00491201877594,grad_norm: 0.7708379345632047, iteration: 450896
loss: 1.0064035654067993,grad_norm: 0.7724890339458131, iteration: 450897
loss: 0.9859623312950134,grad_norm: 0.7525912382448079, iteration: 450898
loss: 1.056626796722412,grad_norm: 0.8351571034003821, iteration: 450899
loss: 1.1364250183105469,grad_norm: 0.999999618353771, iteration: 450900
loss: 1.0336471796035767,grad_norm: 0.7847146775533648, iteration: 450901
loss: 1.0334088802337646,grad_norm: 0.8109360223982139, iteration: 450902
loss: 1.0126348733901978,grad_norm: 0.7725124770002233, iteration: 450903
loss: 0.9894827604293823,grad_norm: 0.8065405068285378, iteration: 450904
loss: 0.9731510281562805,grad_norm: 0.923332036172096, iteration: 450905
loss: 1.043464183807373,grad_norm: 0.9999997997888435, iteration: 450906
loss: 1.0005364418029785,grad_norm: 0.8974320668032779, iteration: 450907
loss: 1.0456706285476685,grad_norm: 0.7368414513236825, iteration: 450908
loss: 1.0374451875686646,grad_norm: 0.7016215772812517, iteration: 450909
loss: 1.1152739524841309,grad_norm: 0.9999992182219888, iteration: 450910
loss: 0.9718338251113892,grad_norm: 0.7759387228652884, iteration: 450911
loss: 1.0055336952209473,grad_norm: 0.6901663588487663, iteration: 450912
loss: 0.9752365350723267,grad_norm: 0.7961006013814628, iteration: 450913
loss: 0.9914788007736206,grad_norm: 0.7029749644922952, iteration: 450914
loss: 0.9661127924919128,grad_norm: 0.7967299484852987, iteration: 450915
loss: 0.9987819790840149,grad_norm: 0.7107100055564635, iteration: 450916
loss: 0.992078959941864,grad_norm: 0.765974669632749, iteration: 450917
loss: 0.9785139560699463,grad_norm: 0.847584375994976, iteration: 450918
loss: 1.0093437433242798,grad_norm: 0.9279545272357802, iteration: 450919
loss: 1.0060350894927979,grad_norm: 0.9999996017048777, iteration: 450920
loss: 0.9901266098022461,grad_norm: 0.7986521979902655, iteration: 450921
loss: 0.9976279139518738,grad_norm: 0.8424970256112797, iteration: 450922
loss: 1.0090372562408447,grad_norm: 0.6132417788358764, iteration: 450923
loss: 0.9903764724731445,grad_norm: 0.7812245513214535, iteration: 450924
loss: 1.0114890336990356,grad_norm: 0.8199494905322884, iteration: 450925
loss: 1.0050935745239258,grad_norm: 0.6560171562068862, iteration: 450926
loss: 0.9956299066543579,grad_norm: 0.7584468347993896, iteration: 450927
loss: 0.992384135723114,grad_norm: 0.7900460839262861, iteration: 450928
loss: 1.0017173290252686,grad_norm: 0.7037052895188917, iteration: 450929
loss: 0.9945884943008423,grad_norm: 0.8505355654267432, iteration: 450930
loss: 1.0017691850662231,grad_norm: 0.7795184111652618, iteration: 450931
loss: 0.982248067855835,grad_norm: 0.8025473678688283, iteration: 450932
loss: 1.03391695022583,grad_norm: 0.9999998593524819, iteration: 450933
loss: 0.9817624092102051,grad_norm: 0.7872643672344234, iteration: 450934
loss: 0.936596155166626,grad_norm: 0.7450771370196791, iteration: 450935
loss: 1.0219237804412842,grad_norm: 0.8353935816269983, iteration: 450936
loss: 1.0387108325958252,grad_norm: 0.8617356983746232, iteration: 450937
loss: 0.9928678274154663,grad_norm: 0.7779569321899064, iteration: 450938
loss: 1.0124932527542114,grad_norm: 0.7626453305006857, iteration: 450939
loss: 0.9841943383216858,grad_norm: 0.9276906519598098, iteration: 450940
loss: 1.0050948858261108,grad_norm: 0.9999995065517371, iteration: 450941
loss: 1.0446001291275024,grad_norm: 0.999999278524699, iteration: 450942
loss: 1.0216751098632812,grad_norm: 0.8606087345860148, iteration: 450943
loss: 1.2029324769973755,grad_norm: 0.9999991733075768, iteration: 450944
loss: 0.9519258737564087,grad_norm: 0.9430414484877662, iteration: 450945
loss: 0.9907416701316833,grad_norm: 0.6956617842244475, iteration: 450946
loss: 1.166874647140503,grad_norm: 0.8277725469790811, iteration: 450947
loss: 1.0053157806396484,grad_norm: 0.7788790624682668, iteration: 450948
loss: 1.0196917057037354,grad_norm: 0.9171355542577483, iteration: 450949
loss: 1.0304830074310303,grad_norm: 0.733941380380013, iteration: 450950
loss: 1.0053988695144653,grad_norm: 0.7906734139699435, iteration: 450951
loss: 0.9968268275260925,grad_norm: 0.9901540883061246, iteration: 450952
loss: 0.9657105207443237,grad_norm: 0.8280401040167693, iteration: 450953
loss: 1.0367907285690308,grad_norm: 0.9686862844345377, iteration: 450954
loss: 0.9983364343643188,grad_norm: 0.7100804697650034, iteration: 450955
loss: 0.9945138692855835,grad_norm: 0.6963778983695047, iteration: 450956
loss: 1.0032787322998047,grad_norm: 0.7792610394148888, iteration: 450957
loss: 1.0114312171936035,grad_norm: 0.9999991014449252, iteration: 450958
loss: 0.9853119850158691,grad_norm: 0.7409485877847952, iteration: 450959
loss: 0.9906784296035767,grad_norm: 0.6390966547290774, iteration: 450960
loss: 1.0073925256729126,grad_norm: 0.8477909724548309, iteration: 450961
loss: 1.029144525527954,grad_norm: 0.819609998812475, iteration: 450962
loss: 0.9987220764160156,grad_norm: 0.9460905550745263, iteration: 450963
loss: 0.9954484105110168,grad_norm: 0.7771265090553076, iteration: 450964
loss: 0.9965223073959351,grad_norm: 0.8968435981871576, iteration: 450965
loss: 1.0969905853271484,grad_norm: 0.9103183846836973, iteration: 450966
loss: 1.0764156579971313,grad_norm: 0.9537618862379834, iteration: 450967
loss: 1.0301698446273804,grad_norm: 0.9999999547480898, iteration: 450968
loss: 0.9922585487365723,grad_norm: 0.7025161124846498, iteration: 450969
loss: 1.0135020017623901,grad_norm: 0.7879351897178504, iteration: 450970
loss: 0.9922588467597961,grad_norm: 0.7281651680310037, iteration: 450971
loss: 1.0105819702148438,grad_norm: 0.6814527193765771, iteration: 450972
loss: 1.013776183128357,grad_norm: 0.9999999744959701, iteration: 450973
loss: 0.9698992967605591,grad_norm: 0.7175473612403249, iteration: 450974
loss: 0.9705365300178528,grad_norm: 0.78188697451587, iteration: 450975
loss: 0.9671148657798767,grad_norm: 0.8370444465766689, iteration: 450976
loss: 1.0463976860046387,grad_norm: 0.9999993164286617, iteration: 450977
loss: 0.981011152267456,grad_norm: 0.8051224412614053, iteration: 450978
loss: 1.0280543565750122,grad_norm: 0.9224126732751508, iteration: 450979
loss: 1.007379412651062,grad_norm: 0.8418667558598926, iteration: 450980
loss: 1.078504204750061,grad_norm: 0.9999997959130816, iteration: 450981
loss: 0.999955415725708,grad_norm: 0.8073768588676336, iteration: 450982
loss: 0.9881882071495056,grad_norm: 0.7348160329068739, iteration: 450983
loss: 1.0494890213012695,grad_norm: 0.9999994517180177, iteration: 450984
loss: 0.9836537837982178,grad_norm: 0.6588375983755972, iteration: 450985
loss: 1.0688949823379517,grad_norm: 0.888032505677454, iteration: 450986
loss: 1.003812313079834,grad_norm: 0.7504355961691862, iteration: 450987
loss: 1.0736076831817627,grad_norm: 0.9464631655171922, iteration: 450988
loss: 1.110308051109314,grad_norm: 0.9999993249542896, iteration: 450989
loss: 0.9936644434928894,grad_norm: 0.7280644923604083, iteration: 450990
loss: 1.0849106311798096,grad_norm: 0.9277743786440319, iteration: 450991
loss: 1.068771481513977,grad_norm: 0.857554304432222, iteration: 450992
loss: 1.0347931385040283,grad_norm: 0.6944477009851019, iteration: 450993
loss: 1.0372177362442017,grad_norm: 0.6030715233348364, iteration: 450994
loss: 1.0134334564208984,grad_norm: 0.899854107344749, iteration: 450995
loss: 1.0475736856460571,grad_norm: 0.9999997312453485, iteration: 450996
loss: 1.093254566192627,grad_norm: 0.9999992909209355, iteration: 450997
loss: 1.1708072423934937,grad_norm: 0.7715305254902872, iteration: 450998
loss: 1.0537022352218628,grad_norm: 0.9999995133731165, iteration: 450999
loss: 0.9779719710350037,grad_norm: 0.6639564470587042, iteration: 451000
loss: 1.0394772291183472,grad_norm: 0.9999997091289765, iteration: 451001
loss: 1.0021734237670898,grad_norm: 0.954720041609672, iteration: 451002
loss: 1.0579066276550293,grad_norm: 0.999999329674814, iteration: 451003
loss: 0.9751972556114197,grad_norm: 0.8691022744918706, iteration: 451004
loss: 1.0288058519363403,grad_norm: 0.7408612788315576, iteration: 451005
loss: 1.0291814804077148,grad_norm: 0.9999990905158402, iteration: 451006
loss: 0.9562673568725586,grad_norm: 0.6937025407522024, iteration: 451007
loss: 1.0095661878585815,grad_norm: 0.8377015746648993, iteration: 451008
loss: 1.0354512929916382,grad_norm: 0.864367472337762, iteration: 451009
loss: 0.9968812465667725,grad_norm: 0.777085209758078, iteration: 451010
loss: 1.0124672651290894,grad_norm: 0.7065722411522635, iteration: 451011
loss: 0.9869110584259033,grad_norm: 0.9790220570369902, iteration: 451012
loss: 0.979721188545227,grad_norm: 0.8003198411579233, iteration: 451013
loss: 1.0763658285140991,grad_norm: 0.8307976879310673, iteration: 451014
loss: 0.9671656489372253,grad_norm: 0.8199194140165889, iteration: 451015
loss: 1.0991967916488647,grad_norm: 0.959323087448746, iteration: 451016
loss: 1.0276988744735718,grad_norm: 0.7828992273736506, iteration: 451017
loss: 1.0926593542099,grad_norm: 0.99999916098156, iteration: 451018
loss: 1.0224238634109497,grad_norm: 0.7791020277668942, iteration: 451019
loss: 0.9798755645751953,grad_norm: 0.7189717064462492, iteration: 451020
loss: 1.1327992677688599,grad_norm: 0.7734283166335235, iteration: 451021
loss: 0.9454625248908997,grad_norm: 0.8818164785111952, iteration: 451022
loss: 0.9944247603416443,grad_norm: 0.999999767224969, iteration: 451023
loss: 0.9741193056106567,grad_norm: 0.7066404674466575, iteration: 451024
loss: 0.9999354481697083,grad_norm: 0.7074936445651198, iteration: 451025
loss: 1.0121221542358398,grad_norm: 0.7453754437920269, iteration: 451026
loss: 0.967847466468811,grad_norm: 0.9006350434908994, iteration: 451027
loss: 1.0087933540344238,grad_norm: 0.8180956286683998, iteration: 451028
loss: 1.0048935413360596,grad_norm: 0.708488642318482, iteration: 451029
loss: 0.9968194961547852,grad_norm: 0.9999992228569197, iteration: 451030
loss: 0.9726141095161438,grad_norm: 0.6710171642480729, iteration: 451031
loss: 1.0992578268051147,grad_norm: 0.7795718259328694, iteration: 451032
loss: 1.0028332471847534,grad_norm: 0.725130890706068, iteration: 451033
loss: 0.999712347984314,grad_norm: 0.7718245753154486, iteration: 451034
loss: 1.112372636795044,grad_norm: 0.999999309228126, iteration: 451035
loss: 1.0249722003936768,grad_norm: 0.9999997748139676, iteration: 451036
loss: 0.9993309378623962,grad_norm: 0.999999298368647, iteration: 451037
loss: 0.9611517190933228,grad_norm: 0.9999996807504485, iteration: 451038
loss: 1.0288382768630981,grad_norm: 0.7583811979687425, iteration: 451039
loss: 0.9977824687957764,grad_norm: 0.8184822544426139, iteration: 451040
loss: 0.9388574361801147,grad_norm: 0.838959855104285, iteration: 451041
loss: 1.0567891597747803,grad_norm: 0.9999996927540595, iteration: 451042
loss: 0.9930753707885742,grad_norm: 0.7990394804570595, iteration: 451043
loss: 1.0411322116851807,grad_norm: 0.9999992425982163, iteration: 451044
loss: 0.9955918192863464,grad_norm: 0.6075600446385762, iteration: 451045
loss: 0.9866213798522949,grad_norm: 0.6530044164412714, iteration: 451046
loss: 0.9826727509498596,grad_norm: 0.8439164019771591, iteration: 451047
loss: 1.0060038566589355,grad_norm: 0.772901803358918, iteration: 451048
loss: 1.0399399995803833,grad_norm: 0.7807229461612488, iteration: 451049
loss: 0.9849330186843872,grad_norm: 0.9999996956999484, iteration: 451050
loss: 0.9863220453262329,grad_norm: 0.7735673776752972, iteration: 451051
loss: 1.0437641143798828,grad_norm: 0.8690852066619729, iteration: 451052
loss: 1.0365161895751953,grad_norm: 0.8599464658920939, iteration: 451053
loss: 1.0784329175949097,grad_norm: 0.8407659702628235, iteration: 451054
loss: 1.090536117553711,grad_norm: 0.9999991147371619, iteration: 451055
loss: 0.9763674736022949,grad_norm: 0.7752922641691469, iteration: 451056
loss: 1.010275959968567,grad_norm: 0.9892024714415806, iteration: 451057
loss: 0.9992085695266724,grad_norm: 0.6862303757497228, iteration: 451058
loss: 0.9986572861671448,grad_norm: 0.7319810965206798, iteration: 451059
loss: 1.059533715248108,grad_norm: 0.8462395458095774, iteration: 451060
loss: 0.999057412147522,grad_norm: 0.7382427272800648, iteration: 451061
loss: 1.1007839441299438,grad_norm: 0.999999217025628, iteration: 451062
loss: 0.9977335333824158,grad_norm: 0.7771582450396447, iteration: 451063
loss: 1.0227516889572144,grad_norm: 0.9999992274633157, iteration: 451064
loss: 0.9758042097091675,grad_norm: 0.7274843693264168, iteration: 451065
loss: 1.0554713010787964,grad_norm: 0.9999992225363242, iteration: 451066
loss: 0.9651400446891785,grad_norm: 0.6600810241487658, iteration: 451067
loss: 1.071002721786499,grad_norm: 0.9999998382081974, iteration: 451068
loss: 1.030690312385559,grad_norm: 0.9999992379541284, iteration: 451069
loss: 1.0231842994689941,grad_norm: 0.878032871279404, iteration: 451070
loss: 1.0251749753952026,grad_norm: 0.9999996957252894, iteration: 451071
loss: 0.9956883192062378,grad_norm: 0.7654892836887489, iteration: 451072
loss: 1.0195050239562988,grad_norm: 0.5876276286743537, iteration: 451073
loss: 0.9899570941925049,grad_norm: 0.7007165956044153, iteration: 451074
loss: 1.0539504289627075,grad_norm: 0.9999994202580312, iteration: 451075
loss: 0.9997848272323608,grad_norm: 0.7694149655490935, iteration: 451076
loss: 1.0005767345428467,grad_norm: 0.8727099943628603, iteration: 451077
loss: 1.0305641889572144,grad_norm: 0.8564502993031903, iteration: 451078
loss: 1.0298875570297241,grad_norm: 0.8550602520737061, iteration: 451079
loss: 1.017445683479309,grad_norm: 0.6989878890041512, iteration: 451080
loss: 1.0128308534622192,grad_norm: 0.7008362902727773, iteration: 451081
loss: 1.006408452987671,grad_norm: 0.8020489887595431, iteration: 451082
loss: 1.0545850992202759,grad_norm: 1.0000000150304291, iteration: 451083
loss: 1.0415672063827515,grad_norm: 0.7216205041660259, iteration: 451084
loss: 1.0101176500320435,grad_norm: 0.7096136464645068, iteration: 451085
loss: 1.0612021684646606,grad_norm: 0.9629811875954452, iteration: 451086
loss: 1.0091513395309448,grad_norm: 0.6721163152071883, iteration: 451087
loss: 1.0702229738235474,grad_norm: 0.8003973163815219, iteration: 451088
loss: 0.9909353256225586,grad_norm: 0.8463572137116383, iteration: 451089
loss: 0.9551660418510437,grad_norm: 0.7703294684565671, iteration: 451090
loss: 0.991070568561554,grad_norm: 0.7276850100033527, iteration: 451091
loss: 1.042621374130249,grad_norm: 0.9999993330500051, iteration: 451092
loss: 0.9815996885299683,grad_norm: 0.7282765274344208, iteration: 451093
loss: 1.081632375717163,grad_norm: 0.8992371178494329, iteration: 451094
loss: 0.987639844417572,grad_norm: 0.7788151600634399, iteration: 451095
loss: 0.9788033366203308,grad_norm: 0.8228397363890674, iteration: 451096
loss: 0.9750341176986694,grad_norm: 0.878457734614233, iteration: 451097
loss: 1.1085290908813477,grad_norm: 0.9999999539594326, iteration: 451098
loss: 1.0448031425476074,grad_norm: 0.9999993661888279, iteration: 451099
loss: 1.0155397653579712,grad_norm: 0.9999992679002639, iteration: 451100
loss: 1.0123003721237183,grad_norm: 0.9941991285729433, iteration: 451101
loss: 0.9890119433403015,grad_norm: 0.7112916948017509, iteration: 451102
loss: 1.066457748413086,grad_norm: 0.8211394754212478, iteration: 451103
loss: 1.003544569015503,grad_norm: 0.7131832636041135, iteration: 451104
loss: 0.996712327003479,grad_norm: 0.7809949570720683, iteration: 451105
loss: 1.100763201713562,grad_norm: 0.9999993013247059, iteration: 451106
loss: 1.031206488609314,grad_norm: 0.7155484047594092, iteration: 451107
loss: 0.9842405319213867,grad_norm: 0.9113983348336019, iteration: 451108
loss: 0.9863120913505554,grad_norm: 0.85801191645344, iteration: 451109
loss: 0.9681994915008545,grad_norm: 0.9222599402556728, iteration: 451110
loss: 0.9875423312187195,grad_norm: 0.75643843916955, iteration: 451111
loss: 0.9916850328445435,grad_norm: 0.801906548273974, iteration: 451112
loss: 1.0101839303970337,grad_norm: 0.7458479304497628, iteration: 451113
loss: 0.9822089076042175,grad_norm: 0.7580940871676016, iteration: 451114
loss: 1.000472068786621,grad_norm: 0.8882449155840076, iteration: 451115
loss: 1.014709234237671,grad_norm: 0.9028032547238325, iteration: 451116
loss: 0.9699795246124268,grad_norm: 0.6899099939914376, iteration: 451117
loss: 1.0465409755706787,grad_norm: 0.8129949923138845, iteration: 451118
loss: 0.9953534603118896,grad_norm: 0.7453194462402953, iteration: 451119
loss: 0.9640257358551025,grad_norm: 0.9999997901035697, iteration: 451120
loss: 0.9910668730735779,grad_norm: 0.9999992271507272, iteration: 451121
loss: 1.077131748199463,grad_norm: 0.9999994013588736, iteration: 451122
loss: 1.0733973979949951,grad_norm: 0.9999998117095737, iteration: 451123
loss: 1.0278469324111938,grad_norm: 0.7964036825339528, iteration: 451124
loss: 1.0104714632034302,grad_norm: 0.7162579013830364, iteration: 451125
loss: 0.9718720316886902,grad_norm: 0.7883425020323557, iteration: 451126
loss: 0.9542044997215271,grad_norm: 0.6795371657214507, iteration: 451127
loss: 1.0083513259887695,grad_norm: 0.9999998687786024, iteration: 451128
loss: 1.0266478061676025,grad_norm: 0.9302859537273216, iteration: 451129
loss: 1.0562466382980347,grad_norm: 0.7304077775222529, iteration: 451130
loss: 1.0350759029388428,grad_norm: 0.7260846394837456, iteration: 451131
loss: 1.1091374158859253,grad_norm: 0.9999997254788985, iteration: 451132
loss: 0.9941966533660889,grad_norm: 0.815814256851778, iteration: 451133
loss: 0.9724056720733643,grad_norm: 0.7449934862642175, iteration: 451134
loss: 1.0195493698120117,grad_norm: 0.8591626912656573, iteration: 451135
loss: 1.0199393033981323,grad_norm: 0.9280648702280667, iteration: 451136
loss: 1.013121247291565,grad_norm: 0.6871703053818454, iteration: 451137
loss: 1.0129109621047974,grad_norm: 0.7946450699017615, iteration: 451138
loss: 0.9791542887687683,grad_norm: 0.7465662715451092, iteration: 451139
loss: 1.0457379817962646,grad_norm: 0.6911018065371191, iteration: 451140
loss: 0.9787520170211792,grad_norm: 0.999999619249165, iteration: 451141
loss: 0.9816889762878418,grad_norm: 0.7412022358247099, iteration: 451142
loss: 1.0051060914993286,grad_norm: 0.7969084314823037, iteration: 451143
loss: 0.9874491095542908,grad_norm: 0.7804010198517654, iteration: 451144
loss: 0.9890562891960144,grad_norm: 0.6879222514011731, iteration: 451145
loss: 1.0953587293624878,grad_norm: 0.7717130794575524, iteration: 451146
loss: 0.997009813785553,grad_norm: 0.6191250466773158, iteration: 451147
loss: 0.997101902961731,grad_norm: 0.7025862541405442, iteration: 451148
loss: 1.0181795358657837,grad_norm: 0.768228043437644, iteration: 451149
loss: 0.9814120531082153,grad_norm: 0.9999998589556034, iteration: 451150
loss: 0.9834447503089905,grad_norm: 0.767543839822311, iteration: 451151
loss: 1.011142611503601,grad_norm: 0.7009112593334912, iteration: 451152
loss: 0.9884016513824463,grad_norm: 0.9999996021077583, iteration: 451153
loss: 1.019826054573059,grad_norm: 0.9028422452202011, iteration: 451154
loss: 1.0029996633529663,grad_norm: 0.7382955088278838, iteration: 451155
loss: 1.009149193763733,grad_norm: 0.9237293714343591, iteration: 451156
loss: 1.0485827922821045,grad_norm: 0.7219089023346259, iteration: 451157
loss: 1.0169832706451416,grad_norm: 0.7444187720678009, iteration: 451158
loss: 0.9789783954620361,grad_norm: 0.758301168339749, iteration: 451159
loss: 1.0526424646377563,grad_norm: 0.9999994563408352, iteration: 451160
loss: 1.01384437084198,grad_norm: 0.8385306108670761, iteration: 451161
loss: 0.9745774269104004,grad_norm: 0.7512711998698102, iteration: 451162
loss: 1.0132633447647095,grad_norm: 0.652737610761507, iteration: 451163
loss: 1.0079026222229004,grad_norm: 0.8391909253258971, iteration: 451164
loss: 0.9796456098556519,grad_norm: 0.6899806279007151, iteration: 451165
loss: 0.986741840839386,grad_norm: 0.7876072879727136, iteration: 451166
loss: 0.9874743819236755,grad_norm: 0.9999991533922745, iteration: 451167
loss: 0.983829915523529,grad_norm: 0.8023275744650761, iteration: 451168
loss: 0.9932517409324646,grad_norm: 0.846827391816431, iteration: 451169
loss: 0.9650300145149231,grad_norm: 0.8765462093596917, iteration: 451170
loss: 0.988899290561676,grad_norm: 0.7809333432959922, iteration: 451171
loss: 0.9884535670280457,grad_norm: 0.637783898595474, iteration: 451172
loss: 1.0185749530792236,grad_norm: 0.8122844838182225, iteration: 451173
loss: 1.0995118618011475,grad_norm: 0.9722598977564835, iteration: 451174
loss: 1.0257893800735474,grad_norm: 0.8199816209242012, iteration: 451175
loss: 0.9965985417366028,grad_norm: 0.8441266937094669, iteration: 451176
loss: 0.9729331731796265,grad_norm: 0.7633391479450121, iteration: 451177
loss: 1.008175015449524,grad_norm: 0.9999993993180687, iteration: 451178
loss: 1.0127179622650146,grad_norm: 0.6490735966148538, iteration: 451179
loss: 1.0028096437454224,grad_norm: 0.7320150463540154, iteration: 451180
loss: 0.9993459582328796,grad_norm: 0.8064234011447242, iteration: 451181
loss: 1.0214446783065796,grad_norm: 0.8634942854545393, iteration: 451182
loss: 1.0462650060653687,grad_norm: 0.7352470622150324, iteration: 451183
loss: 0.9790658950805664,grad_norm: 0.7643773406398269, iteration: 451184
loss: 1.0420124530792236,grad_norm: 0.999999159285707, iteration: 451185
loss: 0.9848672151565552,grad_norm: 0.6982551528276991, iteration: 451186
loss: 1.008919358253479,grad_norm: 0.8682381519899959, iteration: 451187
loss: 0.9967454671859741,grad_norm: 0.6978570912188102, iteration: 451188
loss: 0.9801025390625,grad_norm: 0.8801182281697174, iteration: 451189
loss: 1.015326738357544,grad_norm: 0.7148359878528279, iteration: 451190
loss: 0.9710848331451416,grad_norm: 0.6417129410423412, iteration: 451191
loss: 1.0058379173278809,grad_norm: 0.8565727514260381, iteration: 451192
loss: 0.9605052471160889,grad_norm: 0.6987471267695683, iteration: 451193
loss: 0.9963560700416565,grad_norm: 0.9999990599293456, iteration: 451194
loss: 1.0078221559524536,grad_norm: 0.984152458646561, iteration: 451195
loss: 1.1651010513305664,grad_norm: 0.9908080659977954, iteration: 451196
loss: 0.9955962896347046,grad_norm: 0.7241150311134833, iteration: 451197
loss: 1.0267561674118042,grad_norm: 0.6748961084530618, iteration: 451198
loss: 0.9898949265480042,grad_norm: 0.9295788173232197, iteration: 451199
loss: 1.075896143913269,grad_norm: 0.9944312801335995, iteration: 451200
loss: 0.9791899919509888,grad_norm: 0.6151456720437231, iteration: 451201
loss: 1.0153316259384155,grad_norm: 0.7178643652760334, iteration: 451202
loss: 1.011254072189331,grad_norm: 0.7906908033372049, iteration: 451203
loss: 0.9990398287773132,grad_norm: 0.9943494323598108, iteration: 451204
loss: 0.9914952516555786,grad_norm: 0.755967309488697, iteration: 451205
loss: 1.0018755197525024,grad_norm: 0.8923207288603184, iteration: 451206
loss: 0.9981552958488464,grad_norm: 0.6828885970776571, iteration: 451207
loss: 1.0154691934585571,grad_norm: 0.9043288593076307, iteration: 451208
loss: 1.0729137659072876,grad_norm: 0.9389605109489139, iteration: 451209
loss: 0.9750436544418335,grad_norm: 0.8688681445440825, iteration: 451210
loss: 0.9896717071533203,grad_norm: 0.7843687534558308, iteration: 451211
loss: 1.066786527633667,grad_norm: 0.9999991149918891, iteration: 451212
loss: 1.1073116064071655,grad_norm: 0.9999992141353239, iteration: 451213
loss: 0.9765220284461975,grad_norm: 0.7646269862919871, iteration: 451214
loss: 0.9937214255332947,grad_norm: 0.7527353485855169, iteration: 451215
loss: 1.0233776569366455,grad_norm: 0.7777054411382361, iteration: 451216
loss: 1.0326251983642578,grad_norm: 0.9999998879212312, iteration: 451217
loss: 1.0181047916412354,grad_norm: 0.646778957693757, iteration: 451218
loss: 1.0116080045700073,grad_norm: 0.6716619787244885, iteration: 451219
loss: 0.9958375692367554,grad_norm: 0.9418168602526559, iteration: 451220
loss: 0.995877206325531,grad_norm: 0.7513722811744747, iteration: 451221
loss: 1.0291351079940796,grad_norm: 0.9999997772452661, iteration: 451222
loss: 1.006866693496704,grad_norm: 0.6858036133850225, iteration: 451223
loss: 0.9965343475341797,grad_norm: 0.7426719225140722, iteration: 451224
loss: 1.0238169431686401,grad_norm: 0.750851090288885, iteration: 451225
loss: 1.0190693140029907,grad_norm: 0.6926232696954809, iteration: 451226
loss: 0.9993542432785034,grad_norm: 0.8865647953712342, iteration: 451227
loss: 1.0177663564682007,grad_norm: 0.7359181318525827, iteration: 451228
loss: 0.9736558198928833,grad_norm: 0.9190462825540399, iteration: 451229
loss: 1.0050090551376343,grad_norm: 0.8165011857416008, iteration: 451230
loss: 1.011543869972229,grad_norm: 0.8971464675653172, iteration: 451231
loss: 0.9975962042808533,grad_norm: 0.6121227243119135, iteration: 451232
loss: 0.9773165583610535,grad_norm: 0.7532807048582955, iteration: 451233
loss: 0.9783869981765747,grad_norm: 0.7647092274715068, iteration: 451234
loss: 1.0412570238113403,grad_norm: 0.9413973069619992, iteration: 451235
loss: 1.0799672603607178,grad_norm: 0.9999991844257315, iteration: 451236
loss: 1.0323257446289062,grad_norm: 0.7262166565610917, iteration: 451237
loss: 0.9930651783943176,grad_norm: 0.6999921173273808, iteration: 451238
loss: 1.0211834907531738,grad_norm: 0.573124463774879, iteration: 451239
loss: 1.0624560117721558,grad_norm: 0.685881171038533, iteration: 451240
loss: 0.9940908551216125,grad_norm: 0.8629369732994572, iteration: 451241
loss: 1.012558102607727,grad_norm: 0.7265566907750604, iteration: 451242
loss: 1.0192590951919556,grad_norm: 0.9999991673301917, iteration: 451243
loss: 1.011558175086975,grad_norm: 0.9543703885860019, iteration: 451244
loss: 1.002714991569519,grad_norm: 0.8117546619151269, iteration: 451245
loss: 1.0849331617355347,grad_norm: 0.9999998980231212, iteration: 451246
loss: 1.0429282188415527,grad_norm: 0.8645225048202526, iteration: 451247
loss: 1.0107074975967407,grad_norm: 0.6657260161117267, iteration: 451248
loss: 0.9975656867027283,grad_norm: 0.862640802651461, iteration: 451249
loss: 0.9579885601997375,grad_norm: 0.7675240226058977, iteration: 451250
loss: 1.019892930984497,grad_norm: 0.7660699619942838, iteration: 451251
loss: 0.9957327246665955,grad_norm: 0.8556549696893394, iteration: 451252
loss: 1.0265798568725586,grad_norm: 0.7861679673852069, iteration: 451253
loss: 0.9983227849006653,grad_norm: 0.7682940578722879, iteration: 451254
loss: 0.9658838510513306,grad_norm: 0.6543554926462407, iteration: 451255
loss: 0.995043933391571,grad_norm: 0.7734696213530791, iteration: 451256
loss: 1.0333259105682373,grad_norm: 0.9999993576981331, iteration: 451257
loss: 0.9902949929237366,grad_norm: 0.7826497769964526, iteration: 451258
loss: 0.9896726012229919,grad_norm: 0.780576312994046, iteration: 451259
loss: 0.9958422780036926,grad_norm: 0.8031347562458521, iteration: 451260
loss: 0.9612538814544678,grad_norm: 0.7428920060049202, iteration: 451261
loss: 0.9776342511177063,grad_norm: 0.7579211133310461, iteration: 451262
loss: 0.9847935438156128,grad_norm: 0.7453386494454058, iteration: 451263
loss: 0.9796283841133118,grad_norm: 0.6656286716704612, iteration: 451264
loss: 0.9736362099647522,grad_norm: 0.797590635956965, iteration: 451265
loss: 1.0322626829147339,grad_norm: 0.8313924448916424, iteration: 451266
loss: 1.0495933294296265,grad_norm: 0.8398030974632341, iteration: 451267
loss: 0.9735907316207886,grad_norm: 0.7894144172619453, iteration: 451268
loss: 1.0263259410858154,grad_norm: 0.7932074667694183, iteration: 451269
loss: 0.980870246887207,grad_norm: 0.7729977928924638, iteration: 451270
loss: 1.036050796508789,grad_norm: 0.6598097516608337, iteration: 451271
loss: 1.0210355520248413,grad_norm: 0.9999991050428375, iteration: 451272
loss: 0.9924271106719971,grad_norm: 0.7518578263930937, iteration: 451273
loss: 1.0669701099395752,grad_norm: 0.8227747262576653, iteration: 451274
loss: 0.9971314072608948,grad_norm: 0.6439565115172016, iteration: 451275
loss: 0.9863808155059814,grad_norm: 0.6444156842766493, iteration: 451276
loss: 1.0255000591278076,grad_norm: 0.5824428132574525, iteration: 451277
loss: 0.9716312885284424,grad_norm: 0.7614086911482794, iteration: 451278
loss: 1.0064024925231934,grad_norm: 0.7139809803399422, iteration: 451279
loss: 1.005348563194275,grad_norm: 0.8267419677183011, iteration: 451280
loss: 1.0046240091323853,grad_norm: 0.7483854716409061, iteration: 451281
loss: 1.02141273021698,grad_norm: 0.8283310483687153, iteration: 451282
loss: 1.0025758743286133,grad_norm: 0.6996867263681243, iteration: 451283
loss: 0.9679269194602966,grad_norm: 0.9105130283467003, iteration: 451284
loss: 1.0051851272583008,grad_norm: 0.6899108982250662, iteration: 451285
loss: 0.9900184273719788,grad_norm: 0.8951802843378248, iteration: 451286
loss: 0.9687487483024597,grad_norm: 0.844655594276942, iteration: 451287
loss: 0.9714552164077759,grad_norm: 0.7776006800571906, iteration: 451288
loss: 1.0232036113739014,grad_norm: 0.6772033856976496, iteration: 451289
loss: 1.001570701599121,grad_norm: 0.8913517705774285, iteration: 451290
loss: 1.0338373184204102,grad_norm: 0.6630502611795994, iteration: 451291
loss: 1.007066011428833,grad_norm: 0.9422568062322222, iteration: 451292
loss: 0.9963236451148987,grad_norm: 0.813653950504694, iteration: 451293
loss: 0.9873740673065186,grad_norm: 0.9999998689075409, iteration: 451294
loss: 1.0308895111083984,grad_norm: 0.764091687052667, iteration: 451295
loss: 1.0066866874694824,grad_norm: 0.708323562824875, iteration: 451296
loss: 1.011700987815857,grad_norm: 0.8912179691803912, iteration: 451297
loss: 1.0104331970214844,grad_norm: 0.9999990269658215, iteration: 451298
loss: 0.9950922727584839,grad_norm: 0.788208210769494, iteration: 451299
loss: 0.9821756482124329,grad_norm: 0.7897609190037144, iteration: 451300
loss: 1.0020718574523926,grad_norm: 0.9142689181945001, iteration: 451301
loss: 1.0117441415786743,grad_norm: 0.9283698044651285, iteration: 451302
loss: 1.016331434249878,grad_norm: 0.7455958904820241, iteration: 451303
loss: 0.9916258454322815,grad_norm: 0.9822302235683098, iteration: 451304
loss: 0.9910810589790344,grad_norm: 0.9274991002218366, iteration: 451305
loss: 0.9845004677772522,grad_norm: 0.5563410431920358, iteration: 451306
loss: 0.9815402030944824,grad_norm: 0.7189837845792708, iteration: 451307
loss: 0.9778603315353394,grad_norm: 0.6700700937726134, iteration: 451308
loss: 1.004225492477417,grad_norm: 0.8150676783209485, iteration: 451309
loss: 1.000618815422058,grad_norm: 0.6816343835010861, iteration: 451310
loss: 1.0095607042312622,grad_norm: 0.8794121890408849, iteration: 451311
loss: 1.0126287937164307,grad_norm: 0.8048028476814336, iteration: 451312
loss: 0.9743602871894836,grad_norm: 0.6916639936449652, iteration: 451313
loss: 1.0008924007415771,grad_norm: 0.7532583838665299, iteration: 451314
loss: 1.0719661712646484,grad_norm: 0.7144105927775356, iteration: 451315
loss: 0.9902586340904236,grad_norm: 0.6504259913886978, iteration: 451316
loss: 1.0226155519485474,grad_norm: 0.7149327270302017, iteration: 451317
loss: 1.0076674222946167,grad_norm: 0.8020830942786797, iteration: 451318
loss: 0.998546838760376,grad_norm: 0.7036844925472046, iteration: 451319
loss: 0.9713513255119324,grad_norm: 0.9165462177472951, iteration: 451320
loss: 0.9872668981552124,grad_norm: 0.7997424650598144, iteration: 451321
loss: 1.0170460939407349,grad_norm: 0.7736796031787666, iteration: 451322
loss: 0.9999206066131592,grad_norm: 0.7103784494766974, iteration: 451323
loss: 1.0035871267318726,grad_norm: 0.9415632180711211, iteration: 451324
loss: 1.0157119035720825,grad_norm: 0.8692337425928383, iteration: 451325
loss: 0.9637779593467712,grad_norm: 0.7859638628084301, iteration: 451326
loss: 0.9932882189750671,grad_norm: 0.7243556856666309, iteration: 451327
loss: 0.9804812073707581,grad_norm: 0.6557434590182643, iteration: 451328
loss: 1.024315595626831,grad_norm: 0.7421083418943275, iteration: 451329
loss: 1.0112779140472412,grad_norm: 0.7912737917015453, iteration: 451330
loss: 1.0543982982635498,grad_norm: 0.9252824873964506, iteration: 451331
loss: 1.0037325620651245,grad_norm: 0.8911895118132092, iteration: 451332
loss: 1.0280526876449585,grad_norm: 0.7840582603341869, iteration: 451333
loss: 0.9963663220405579,grad_norm: 0.7979750868016529, iteration: 451334
loss: 0.9730212688446045,grad_norm: 0.7160775447023674, iteration: 451335
loss: 0.9664142727851868,grad_norm: 0.7134430551461548, iteration: 451336
loss: 0.997288167476654,grad_norm: 0.7444573189077239, iteration: 451337
loss: 1.0183331966400146,grad_norm: 0.7473279887670192, iteration: 451338
loss: 1.0164049863815308,grad_norm: 0.7407197216989618, iteration: 451339
loss: 0.9440351724624634,grad_norm: 0.8909193358376877, iteration: 451340
loss: 0.9860633015632629,grad_norm: 0.877497204786071, iteration: 451341
loss: 0.9779754281044006,grad_norm: 0.7211613161589733, iteration: 451342
loss: 0.9910648465156555,grad_norm: 0.7053436809863578, iteration: 451343
loss: 0.9872861504554749,grad_norm: 0.6924697995608522, iteration: 451344
loss: 0.998901903629303,grad_norm: 0.7955559183051877, iteration: 451345
loss: 1.0111027956008911,grad_norm: 0.8506392780491745, iteration: 451346
loss: 0.9975742697715759,grad_norm: 0.8301145850664936, iteration: 451347
loss: 0.9747641682624817,grad_norm: 0.6931257993422767, iteration: 451348
loss: 0.9676613807678223,grad_norm: 0.7004190573110038, iteration: 451349
loss: 0.9418728351593018,grad_norm: 0.75642324806075, iteration: 451350
loss: 1.0442513227462769,grad_norm: 0.8354670317512226, iteration: 451351
loss: 1.0097376108169556,grad_norm: 0.7068725942366487, iteration: 451352
loss: 0.9834446907043457,grad_norm: 0.874981235912767, iteration: 451353
loss: 1.076120138168335,grad_norm: 0.9043807161707252, iteration: 451354
loss: 0.978528618812561,grad_norm: 0.7148803213510188, iteration: 451355
loss: 1.0077426433563232,grad_norm: 0.6802618595891408, iteration: 451356
loss: 1.0122177600860596,grad_norm: 0.8720033313362612, iteration: 451357
loss: 1.0183197259902954,grad_norm: 0.7288514792687604, iteration: 451358
loss: 0.9940804839134216,grad_norm: 0.7217251982241111, iteration: 451359
loss: 0.9656025171279907,grad_norm: 0.7336982429906895, iteration: 451360
loss: 1.0072438716888428,grad_norm: 0.7102337388743013, iteration: 451361
loss: 1.0271821022033691,grad_norm: 0.8658498089987566, iteration: 451362
loss: 1.0446627140045166,grad_norm: 0.9677531021130283, iteration: 451363
loss: 0.9941500425338745,grad_norm: 0.7317293649699659, iteration: 451364
loss: 1.0127919912338257,grad_norm: 0.5866698138944977, iteration: 451365
loss: 0.9847981929779053,grad_norm: 0.8432503767329185, iteration: 451366
loss: 1.0210193395614624,grad_norm: 0.8870561195525847, iteration: 451367
loss: 1.0127923488616943,grad_norm: 0.7955691907093678, iteration: 451368
loss: 1.0020323991775513,grad_norm: 0.804534633432008, iteration: 451369
loss: 0.9799602627754211,grad_norm: 0.7409133932066042, iteration: 451370
loss: 0.984534502029419,grad_norm: 0.7655108414156288, iteration: 451371
loss: 1.0023517608642578,grad_norm: 0.7812252992432284, iteration: 451372
loss: 0.9629674553871155,grad_norm: 0.8632032579308335, iteration: 451373
loss: 1.0241655111312866,grad_norm: 0.7392204423671497, iteration: 451374
loss: 1.0029124021530151,grad_norm: 0.7168178800879927, iteration: 451375
loss: 0.9726142287254333,grad_norm: 0.6924764217443456, iteration: 451376
loss: 1.0004897117614746,grad_norm: 0.7069288470483662, iteration: 451377
loss: 1.002719521522522,grad_norm: 0.9999993233800899, iteration: 451378
loss: 0.9847721457481384,grad_norm: 0.8129370185020995, iteration: 451379
loss: 0.9834859371185303,grad_norm: 0.7572323011719293, iteration: 451380
loss: 0.9886744618415833,grad_norm: 0.9729626185909945, iteration: 451381
loss: 1.0200358629226685,grad_norm: 0.7604827796128618, iteration: 451382
loss: 0.9668559432029724,grad_norm: 0.7565053887015162, iteration: 451383
loss: 0.9713821411132812,grad_norm: 0.896861620302046, iteration: 451384
loss: 1.018428921699524,grad_norm: 0.893592038938664, iteration: 451385
loss: 1.0181714296340942,grad_norm: 0.8212371569353336, iteration: 451386
loss: 0.9934784770011902,grad_norm: 0.8580991828580966, iteration: 451387
loss: 0.9785667061805725,grad_norm: 0.7076794757094479, iteration: 451388
loss: 0.9994845986366272,grad_norm: 0.9309530003702824, iteration: 451389
loss: 1.1064701080322266,grad_norm: 0.9999992728066857, iteration: 451390
loss: 0.9436088800430298,grad_norm: 0.8400522523134862, iteration: 451391
loss: 0.9979578852653503,grad_norm: 0.9144783587576297, iteration: 451392
loss: 1.0821255445480347,grad_norm: 0.9999993920847965, iteration: 451393
loss: 1.0019123554229736,grad_norm: 0.6691794339499684, iteration: 451394
loss: 0.995502769947052,grad_norm: 0.796764165773008, iteration: 451395
loss: 0.9793530702590942,grad_norm: 0.8487623852281614, iteration: 451396
loss: 0.9530318379402161,grad_norm: 0.8590568665517982, iteration: 451397
loss: 1.0282578468322754,grad_norm: 0.9195335359822353, iteration: 451398
loss: 0.9760417938232422,grad_norm: 0.7812648408716406, iteration: 451399
loss: 0.9893549084663391,grad_norm: 0.6851568270094645, iteration: 451400
loss: 0.9878689646720886,grad_norm: 0.6672820795730474, iteration: 451401
loss: 1.0456286668777466,grad_norm: 0.773340219605237, iteration: 451402
loss: 1.0171030759811401,grad_norm: 0.740629380765171, iteration: 451403
loss: 1.0216896533966064,grad_norm: 0.6611232305270544, iteration: 451404
loss: 0.9823541045188904,grad_norm: 0.9999993772259002, iteration: 451405
loss: 1.048041582107544,grad_norm: 0.7447741586486553, iteration: 451406
loss: 0.9927115440368652,grad_norm: 0.8646722068602399, iteration: 451407
loss: 0.967048168182373,grad_norm: 0.6437891502567457, iteration: 451408
loss: 1.0105100870132446,grad_norm: 0.6931826704432967, iteration: 451409
loss: 0.9801677465438843,grad_norm: 0.8625445326721202, iteration: 451410
loss: 1.0125826597213745,grad_norm: 0.7084791448282821, iteration: 451411
loss: 0.9479727745056152,grad_norm: 0.7052085417657388, iteration: 451412
loss: 1.0102410316467285,grad_norm: 0.9999996724958814, iteration: 451413
loss: 1.0095051527023315,grad_norm: 0.999999870287488, iteration: 451414
loss: 1.0259209871292114,grad_norm: 0.8164137488459351, iteration: 451415
loss: 1.0085371732711792,grad_norm: 0.7589109674033789, iteration: 451416
loss: 1.039549469947815,grad_norm: 0.6741142273260646, iteration: 451417
loss: 1.0010688304901123,grad_norm: 0.790501700582157, iteration: 451418
loss: 0.9847263097763062,grad_norm: 0.7453544261228168, iteration: 451419
loss: 1.0232545137405396,grad_norm: 0.8136223236416197, iteration: 451420
loss: 1.0086742639541626,grad_norm: 0.8275004191820448, iteration: 451421
loss: 0.9907211065292358,grad_norm: 0.6171903395341813, iteration: 451422
loss: 1.01491117477417,grad_norm: 0.7533604323200597, iteration: 451423
loss: 0.9860625863075256,grad_norm: 0.7666024705914, iteration: 451424
loss: 1.0329675674438477,grad_norm: 0.9999990401644707, iteration: 451425
loss: 0.9972515106201172,grad_norm: 0.8103730376307993, iteration: 451426
loss: 0.9878586530685425,grad_norm: 0.999999835854212, iteration: 451427
loss: 1.0208770036697388,grad_norm: 0.9999998702286499, iteration: 451428
loss: 1.0014036893844604,grad_norm: 0.8957949704840957, iteration: 451429
loss: 0.9979615807533264,grad_norm: 0.9999996712489773, iteration: 451430
loss: 0.9962565302848816,grad_norm: 0.7240285840438604, iteration: 451431
loss: 0.985508143901825,grad_norm: 0.7790568456034108, iteration: 451432
loss: 0.9798977971076965,grad_norm: 0.7029506257443356, iteration: 451433
loss: 1.0168918371200562,grad_norm: 0.8294254903457945, iteration: 451434
loss: 0.997337281703949,grad_norm: 0.6906420702216901, iteration: 451435
loss: 1.005628228187561,grad_norm: 0.6354676713196716, iteration: 451436
loss: 1.0185530185699463,grad_norm: 0.8711694185925958, iteration: 451437
loss: 0.9764487147331238,grad_norm: 0.69859769410103, iteration: 451438
loss: 0.9966163635253906,grad_norm: 0.7782562008501732, iteration: 451439
loss: 1.0253684520721436,grad_norm: 0.9999990909965432, iteration: 451440
loss: 1.060090184211731,grad_norm: 0.9652339860587914, iteration: 451441
loss: 0.9800283908843994,grad_norm: 0.6833586224589616, iteration: 451442
loss: 0.9948302507400513,grad_norm: 0.8280342151791552, iteration: 451443
loss: 1.0948911905288696,grad_norm: 0.9999999677278046, iteration: 451444
loss: 1.0174587965011597,grad_norm: 0.80578286008906, iteration: 451445
loss: 0.9978141188621521,grad_norm: 0.8029545747219288, iteration: 451446
loss: 0.9735342264175415,grad_norm: 0.6915586493433181, iteration: 451447
loss: 1.029728889465332,grad_norm: 0.7161408992216853, iteration: 451448
loss: 1.0170278549194336,grad_norm: 0.7621708733276472, iteration: 451449
loss: 1.0809106826782227,grad_norm: 0.9999998317530938, iteration: 451450
loss: 1.0386637449264526,grad_norm: 0.8197596665101053, iteration: 451451
loss: 1.017743468284607,grad_norm: 0.730566195383261, iteration: 451452
loss: 0.9896907806396484,grad_norm: 0.8138471293299004, iteration: 451453
loss: 1.0202065706253052,grad_norm: 0.765271713064995, iteration: 451454
loss: 1.00260329246521,grad_norm: 0.6883259049976487, iteration: 451455
loss: 1.0531902313232422,grad_norm: 0.7666309409486529, iteration: 451456
loss: 0.9674502015113831,grad_norm: 0.8191817405288934, iteration: 451457
loss: 1.0053225755691528,grad_norm: 0.7570551810526033, iteration: 451458
loss: 1.0082037448883057,grad_norm: 0.7555385112503535, iteration: 451459
loss: 1.038314938545227,grad_norm: 0.9969527883002182, iteration: 451460
loss: 1.0263841152191162,grad_norm: 0.6031958755794561, iteration: 451461
loss: 1.0062276124954224,grad_norm: 0.8259564433336145, iteration: 451462
loss: 1.0151195526123047,grad_norm: 0.9781968005661218, iteration: 451463
loss: 1.028732419013977,grad_norm: 0.999999416036518, iteration: 451464
loss: 0.9984662532806396,grad_norm: 0.6595596627864614, iteration: 451465
loss: 1.000347375869751,grad_norm: 0.9163110625482397, iteration: 451466
loss: 1.082557201385498,grad_norm: 0.999999843784607, iteration: 451467
loss: 1.000464916229248,grad_norm: 0.8727921787913616, iteration: 451468
loss: 1.0110660791397095,grad_norm: 0.9999993436196797, iteration: 451469
loss: 0.9887735247612,grad_norm: 0.9564134016203919, iteration: 451470
loss: 1.0174627304077148,grad_norm: 0.7482433899684046, iteration: 451471
loss: 0.9499450325965881,grad_norm: 0.6852962302689679, iteration: 451472
loss: 1.005076289176941,grad_norm: 0.7433152828376002, iteration: 451473
loss: 1.0138442516326904,grad_norm: 0.8937520451139361, iteration: 451474
loss: 0.9657594561576843,grad_norm: 0.7543773205867477, iteration: 451475
loss: 1.0028587579727173,grad_norm: 0.9999992227394932, iteration: 451476
loss: 0.9975473880767822,grad_norm: 0.7442222577944516, iteration: 451477
loss: 0.9664270877838135,grad_norm: 0.6543238236461114, iteration: 451478
loss: 1.0009950399398804,grad_norm: 0.7110513356903906, iteration: 451479
loss: 1.0017656087875366,grad_norm: 0.8836818815357642, iteration: 451480
loss: 1.0016504526138306,grad_norm: 0.7944026115792253, iteration: 451481
loss: 1.006826639175415,grad_norm: 0.7849607958375073, iteration: 451482
loss: 1.0085835456848145,grad_norm: 0.7647272026404357, iteration: 451483
loss: 1.0347504615783691,grad_norm: 0.9999995580441488, iteration: 451484
loss: 1.0181559324264526,grad_norm: 0.6814353803437652, iteration: 451485
loss: 1.0086793899536133,grad_norm: 0.9164823132125417, iteration: 451486
loss: 1.018609881401062,grad_norm: 0.7617901695358932, iteration: 451487
loss: 1.0232514142990112,grad_norm: 0.7017754230926126, iteration: 451488
loss: 0.9774782061576843,grad_norm: 0.6934836157477636, iteration: 451489
loss: 1.0558339357376099,grad_norm: 0.8706959932942908, iteration: 451490
loss: 0.9859967827796936,grad_norm: 0.6829419841041551, iteration: 451491
loss: 0.9777677655220032,grad_norm: 0.6695980623243656, iteration: 451492
loss: 0.9961623549461365,grad_norm: 0.7564154662373876, iteration: 451493
loss: 1.0105845928192139,grad_norm: 0.8177842709453865, iteration: 451494
loss: 1.0281691551208496,grad_norm: 0.6488009442865957, iteration: 451495
loss: 1.0683196783065796,grad_norm: 0.7481646212447934, iteration: 451496
loss: 0.9806162714958191,grad_norm: 0.6911309459198475, iteration: 451497
loss: 0.9994398951530457,grad_norm: 0.7842508601677012, iteration: 451498
loss: 1.0476747751235962,grad_norm: 0.7984833927238014, iteration: 451499
loss: 1.1139217615127563,grad_norm: 0.862773019514665, iteration: 451500
loss: 1.0142587423324585,grad_norm: 0.9767882014728086, iteration: 451501
loss: 0.984560489654541,grad_norm: 0.7645429976908616, iteration: 451502
loss: 0.9759687781333923,grad_norm: 0.9096486029962509, iteration: 451503
loss: 0.9692421555519104,grad_norm: 0.7809355950539437, iteration: 451504
loss: 1.000165581703186,grad_norm: 0.712698960200086, iteration: 451505
loss: 0.9977420568466187,grad_norm: 0.9999995774106272, iteration: 451506
loss: 1.0270951986312866,grad_norm: 0.9999995397305547, iteration: 451507
loss: 0.9959962368011475,grad_norm: 0.8559758014025746, iteration: 451508
loss: 1.0098901987075806,grad_norm: 0.703048406442585, iteration: 451509
loss: 1.0261499881744385,grad_norm: 0.9281966590986138, iteration: 451510
loss: 0.9704774022102356,grad_norm: 0.8095525113903853, iteration: 451511
loss: 0.9876370429992676,grad_norm: 0.6683771273784522, iteration: 451512
loss: 1.012770414352417,grad_norm: 0.6622676964123816, iteration: 451513
loss: 0.9987146854400635,grad_norm: 0.8739203848520675, iteration: 451514
loss: 1.0600523948669434,grad_norm: 0.7891075067320993, iteration: 451515
loss: 0.970937967300415,grad_norm: 0.744458323829765, iteration: 451516
loss: 1.0151294469833374,grad_norm: 0.6426529803910382, iteration: 451517
loss: 1.0217115879058838,grad_norm: 0.9710633192399983, iteration: 451518
loss: 0.9881414175033569,grad_norm: 0.7456551009012227, iteration: 451519
loss: 1.1357475519180298,grad_norm: 0.9999999986434145, iteration: 451520
loss: 1.059812068939209,grad_norm: 0.8358345616053946, iteration: 451521
loss: 1.0032283067703247,grad_norm: 0.7138664681083265, iteration: 451522
loss: 1.0084264278411865,grad_norm: 0.6823744683579324, iteration: 451523
loss: 0.9910496473312378,grad_norm: 0.763229126523298, iteration: 451524
loss: 1.1107958555221558,grad_norm: 0.9999998137853416, iteration: 451525
loss: 1.0054106712341309,grad_norm: 0.6663314905488296, iteration: 451526
loss: 0.9650638103485107,grad_norm: 0.7559450897998097, iteration: 451527
loss: 1.0016088485717773,grad_norm: 0.7567005083399311, iteration: 451528
loss: 0.9801792502403259,grad_norm: 0.6824354496951063, iteration: 451529
loss: 1.0492464303970337,grad_norm: 0.9999994210559157, iteration: 451530
loss: 1.049576997756958,grad_norm: 0.8059757978739138, iteration: 451531
loss: 1.0256253480911255,grad_norm: 0.8046206379911923, iteration: 451532
loss: 0.9755141735076904,grad_norm: 0.8650083202349623, iteration: 451533
loss: 1.02431321144104,grad_norm: 0.6977777033432557, iteration: 451534
loss: 1.0018563270568848,grad_norm: 0.7310810710385691, iteration: 451535
loss: 0.9998684525489807,grad_norm: 0.7225923881176618, iteration: 451536
loss: 0.9778661131858826,grad_norm: 0.7415752510606205, iteration: 451537
loss: 1.0105363130569458,grad_norm: 0.8298280694350765, iteration: 451538
loss: 0.9903525114059448,grad_norm: 0.650540262572231, iteration: 451539
loss: 1.0100103616714478,grad_norm: 0.700313915259081, iteration: 451540
loss: 1.0253633260726929,grad_norm: 0.9261966487311798, iteration: 451541
loss: 1.0114965438842773,grad_norm: 0.7751903018809821, iteration: 451542
loss: 0.9865268468856812,grad_norm: 0.9635814123484957, iteration: 451543
loss: 1.0145925283432007,grad_norm: 0.9712168646976622, iteration: 451544
loss: 1.0631053447723389,grad_norm: 0.8631840359692519, iteration: 451545
loss: 1.0198955535888672,grad_norm: 0.803049385293606, iteration: 451546
loss: 1.0175141096115112,grad_norm: 0.7231303899598579, iteration: 451547
loss: 1.0308853387832642,grad_norm: 0.6974237215745669, iteration: 451548
loss: 0.9736114144325256,grad_norm: 0.7453319611552085, iteration: 451549
loss: 0.9725314378738403,grad_norm: 0.7423975731018074, iteration: 451550
loss: 1.038928508758545,grad_norm: 0.8235611732626299, iteration: 451551
loss: 1.0225316286087036,grad_norm: 0.7699166271642435, iteration: 451552
loss: 1.0457861423492432,grad_norm: 0.8262493680127682, iteration: 451553
loss: 0.9948362112045288,grad_norm: 0.8864364733535739, iteration: 451554
loss: 0.9887819290161133,grad_norm: 0.7185933805003425, iteration: 451555
loss: 0.9926315546035767,grad_norm: 0.6000124022218261, iteration: 451556
loss: 1.0290042161941528,grad_norm: 0.716761488600729, iteration: 451557
loss: 1.0057566165924072,grad_norm: 0.7808171650873665, iteration: 451558
loss: 0.9983818531036377,grad_norm: 0.7748395605969797, iteration: 451559
loss: 0.9652155637741089,grad_norm: 0.6505157947149671, iteration: 451560
loss: 0.9977583289146423,grad_norm: 0.7733810432620144, iteration: 451561
loss: 0.9750921130180359,grad_norm: 0.9999999209445535, iteration: 451562
loss: 1.0420746803283691,grad_norm: 0.8322371824573211, iteration: 451563
loss: 0.9754305481910706,grad_norm: 0.8236327664836305, iteration: 451564
loss: 1.004906415939331,grad_norm: 0.9114409671475346, iteration: 451565
loss: 1.0095160007476807,grad_norm: 0.9999996893828482, iteration: 451566
loss: 1.0271087884902954,grad_norm: 0.7643850168267976, iteration: 451567
loss: 1.0097873210906982,grad_norm: 0.6939308805261271, iteration: 451568
loss: 0.9977195858955383,grad_norm: 0.8029124092329126, iteration: 451569
loss: 0.9958531260490417,grad_norm: 0.6567788945687227, iteration: 451570
loss: 1.0039442777633667,grad_norm: 0.7032529055385955, iteration: 451571
loss: 1.0159990787506104,grad_norm: 0.772469861819053, iteration: 451572
loss: 1.0562293529510498,grad_norm: 0.99999968447165, iteration: 451573
loss: 0.9901347756385803,grad_norm: 0.7119002851861347, iteration: 451574
loss: 1.0102516412734985,grad_norm: 0.8129386893326764, iteration: 451575
loss: 1.0158517360687256,grad_norm: 0.876706893119516, iteration: 451576
loss: 0.9978205561637878,grad_norm: 0.8124460362754076, iteration: 451577
loss: 1.0236190557479858,grad_norm: 0.8668623836353356, iteration: 451578
loss: 1.0001300573349,grad_norm: 0.6750665095639784, iteration: 451579
loss: 1.0090155601501465,grad_norm: 0.727547094579973, iteration: 451580
loss: 0.9907039999961853,grad_norm: 0.8187482973517235, iteration: 451581
loss: 1.012365460395813,grad_norm: 0.6906352009163542, iteration: 451582
loss: 0.9893860816955566,grad_norm: 0.8187388890545906, iteration: 451583
loss: 0.9601525068283081,grad_norm: 0.7263854861898782, iteration: 451584
loss: 0.980951189994812,grad_norm: 0.7914042234730875, iteration: 451585
loss: 1.0048755407333374,grad_norm: 0.7155538113023832, iteration: 451586
loss: 0.9819137454032898,grad_norm: 0.7538390018936536, iteration: 451587
loss: 1.018088936805725,grad_norm: 0.7706495740739954, iteration: 451588
loss: 0.9565883278846741,grad_norm: 0.8097510693862736, iteration: 451589
loss: 0.9962284564971924,grad_norm: 0.8007726359932867, iteration: 451590
loss: 1.0132888555526733,grad_norm: 0.7231243694352095, iteration: 451591
loss: 1.0020356178283691,grad_norm: 0.7241351044087762, iteration: 451592
loss: 0.9925084114074707,grad_norm: 0.9999991429903532, iteration: 451593
loss: 1.0007346868515015,grad_norm: 0.7151724952694896, iteration: 451594
loss: 1.0063621997833252,grad_norm: 0.8675135038839465, iteration: 451595
loss: 0.9864931106567383,grad_norm: 0.8534009327416238, iteration: 451596
loss: 1.0064542293548584,grad_norm: 0.9999995496492172, iteration: 451597
loss: 1.088105320930481,grad_norm: 0.7487932018317565, iteration: 451598
loss: 1.1209397315979004,grad_norm: 0.9999992370969638, iteration: 451599
loss: 1.0064480304718018,grad_norm: 0.7628582050927107, iteration: 451600
loss: 1.0223655700683594,grad_norm: 0.9999994307564056, iteration: 451601
loss: 1.0038038492202759,grad_norm: 0.7701691619420891, iteration: 451602
loss: 0.9814083576202393,grad_norm: 0.6767650153183693, iteration: 451603
loss: 0.983543872833252,grad_norm: 0.7109452850951016, iteration: 451604
loss: 0.9829088449478149,grad_norm: 0.6877536594162716, iteration: 451605
loss: 1.0122406482696533,grad_norm: 0.9999991474406398, iteration: 451606
loss: 0.9967315793037415,grad_norm: 0.9999992533793899, iteration: 451607
loss: 0.9726676940917969,grad_norm: 0.696024635693032, iteration: 451608
loss: 0.9746265411376953,grad_norm: 0.827227484230516, iteration: 451609
loss: 0.9823653697967529,grad_norm: 0.7871722569253511, iteration: 451610
loss: 1.0110645294189453,grad_norm: 0.7198917854268004, iteration: 451611
loss: 1.0412572622299194,grad_norm: 0.9999998401846095, iteration: 451612
loss: 0.983350932598114,grad_norm: 0.7594886085493259, iteration: 451613
loss: 1.0157557725906372,grad_norm: 0.7259409146491815, iteration: 451614
loss: 1.0029723644256592,grad_norm: 0.7152194746314305, iteration: 451615
loss: 0.9825345277786255,grad_norm: 0.7391275611136515, iteration: 451616
loss: 0.9972969889640808,grad_norm: 0.6793275545336576, iteration: 451617
loss: 0.9990483522415161,grad_norm: 0.6781888761488275, iteration: 451618
loss: 1.0725338459014893,grad_norm: 0.9115415712915849, iteration: 451619
loss: 1.0056781768798828,grad_norm: 0.785597143549102, iteration: 451620
loss: 0.9669355154037476,grad_norm: 0.7507859231861422, iteration: 451621
loss: 0.9991363883018494,grad_norm: 0.7035071341355673, iteration: 451622
loss: 1.052956461906433,grad_norm: 0.9073871958592739, iteration: 451623
loss: 1.0106176137924194,grad_norm: 0.9863387315677533, iteration: 451624
loss: 1.0734100341796875,grad_norm: 0.9999996725686771, iteration: 451625
loss: 1.0224144458770752,grad_norm: 0.7412899428083901, iteration: 451626
loss: 1.0263853073120117,grad_norm: 0.6910404251133075, iteration: 451627
loss: 1.0016285181045532,grad_norm: 0.847398568039427, iteration: 451628
loss: 1.017336368560791,grad_norm: 0.7433635394203069, iteration: 451629
loss: 1.0512852668762207,grad_norm: 0.7312299118945542, iteration: 451630
loss: 0.9883939027786255,grad_norm: 0.7306767540371452, iteration: 451631
loss: 1.0031296014785767,grad_norm: 0.7011775052416743, iteration: 451632
loss: 1.0707721710205078,grad_norm: 0.9999995495592667, iteration: 451633
loss: 0.9923803806304932,grad_norm: 0.6905196825066126, iteration: 451634
loss: 1.0140228271484375,grad_norm: 0.8722617391889743, iteration: 451635
loss: 0.9979074597358704,grad_norm: 0.7174130087825924, iteration: 451636
loss: 0.996090292930603,grad_norm: 0.8610039323415715, iteration: 451637
loss: 0.9808395504951477,grad_norm: 0.6606556484427872, iteration: 451638
loss: 0.9997589588165283,grad_norm: 0.8030554586660479, iteration: 451639
loss: 1.010976791381836,grad_norm: 0.6934209658559306, iteration: 451640
loss: 1.049385905265808,grad_norm: 0.9999994984193273, iteration: 451641
loss: 1.000731348991394,grad_norm: 0.8561790062650722, iteration: 451642
loss: 0.9856391549110413,grad_norm: 0.7650848607200113, iteration: 451643
loss: 1.0372567176818848,grad_norm: 0.9999996833897853, iteration: 451644
loss: 0.9387336373329163,grad_norm: 0.8459431145775608, iteration: 451645
loss: 1.0024809837341309,grad_norm: 0.722384707045184, iteration: 451646
loss: 1.0105791091918945,grad_norm: 0.730125505584632, iteration: 451647
loss: 0.9614723324775696,grad_norm: 0.911690078311978, iteration: 451648
loss: 0.9894996285438538,grad_norm: 0.8908870637762796, iteration: 451649
loss: 0.9983463883399963,grad_norm: 0.9107971941738497, iteration: 451650
loss: 0.9544920325279236,grad_norm: 0.8345937674653272, iteration: 451651
loss: 1.0477807521820068,grad_norm: 0.8243563677289274, iteration: 451652
loss: 1.0717878341674805,grad_norm: 0.9830116061717246, iteration: 451653
loss: 0.9676222205162048,grad_norm: 0.8792353501881632, iteration: 451654
loss: 1.0269813537597656,grad_norm: 0.7722696805190076, iteration: 451655
loss: 1.0021923780441284,grad_norm: 0.7616209302327738, iteration: 451656
loss: 1.0304930210113525,grad_norm: 0.7729556272481977, iteration: 451657
loss: 1.1284677982330322,grad_norm: 0.9999993891791144, iteration: 451658
loss: 0.9794780015945435,grad_norm: 0.6775518049631937, iteration: 451659
loss: 1.0089362859725952,grad_norm: 0.7821768032783772, iteration: 451660
loss: 1.0163328647613525,grad_norm: 0.7844797954436091, iteration: 451661
loss: 0.9853304624557495,grad_norm: 0.9999996511087972, iteration: 451662
loss: 0.9506595730781555,grad_norm: 0.7597681865000164, iteration: 451663
loss: 0.9723131656646729,grad_norm: 0.8176968257858243, iteration: 451664
loss: 0.9622853994369507,grad_norm: 0.9999989891752026, iteration: 451665
loss: 1.0085479021072388,grad_norm: 0.8798907443161286, iteration: 451666
loss: 0.9731432795524597,grad_norm: 0.7919709339591626, iteration: 451667
loss: 0.9907278418540955,grad_norm: 0.8332617341392039, iteration: 451668
loss: 1.0606497526168823,grad_norm: 0.9999997220950415, iteration: 451669
loss: 1.0259498357772827,grad_norm: 0.6712476727299987, iteration: 451670
loss: 1.0047658681869507,grad_norm: 0.8058022094319333, iteration: 451671
loss: 0.9991540312767029,grad_norm: 0.9999995129204724, iteration: 451672
loss: 0.9641570448875427,grad_norm: 0.9093721636410226, iteration: 451673
loss: 1.0495340824127197,grad_norm: 0.8484251838591153, iteration: 451674
loss: 0.9928351640701294,grad_norm: 0.9999993391105807, iteration: 451675
loss: 0.9974619746208191,grad_norm: 0.8493181483459707, iteration: 451676
loss: 0.9828950762748718,grad_norm: 0.7600482512915345, iteration: 451677
loss: 0.9739403128623962,grad_norm: 0.6813923358715902, iteration: 451678
loss: 1.0061895847320557,grad_norm: 0.7783320544558291, iteration: 451679
loss: 1.027718186378479,grad_norm: 0.8343726588624173, iteration: 451680
loss: 0.9989114999771118,grad_norm: 0.7943869242147829, iteration: 451681
loss: 0.9946532845497131,grad_norm: 0.757889224160389, iteration: 451682
loss: 0.962946355342865,grad_norm: 0.6844330442051159, iteration: 451683
loss: 0.9918659329414368,grad_norm: 0.7619609851934609, iteration: 451684
loss: 1.0338133573532104,grad_norm: 0.7793511820253596, iteration: 451685
loss: 1.08651864528656,grad_norm: 0.9999997144764889, iteration: 451686
loss: 1.0069353580474854,grad_norm: 0.7423800178211329, iteration: 451687
loss: 1.0448962450027466,grad_norm: 0.9999992415498741, iteration: 451688
loss: 1.0139095783233643,grad_norm: 0.8777601825610207, iteration: 451689
loss: 0.9859476089477539,grad_norm: 0.7475461667009017, iteration: 451690
loss: 1.0144381523132324,grad_norm: 0.9999992034357885, iteration: 451691
loss: 0.9835330247879028,grad_norm: 0.7640760028353882, iteration: 451692
loss: 0.9922124743461609,grad_norm: 0.769350225326652, iteration: 451693
loss: 1.0261950492858887,grad_norm: 0.8356943733596718, iteration: 451694
loss: 0.9919032454490662,grad_norm: 0.930365532156574, iteration: 451695
loss: 1.005583643913269,grad_norm: 0.7677317268141697, iteration: 451696
loss: 1.021652102470398,grad_norm: 0.9455717685199366, iteration: 451697
loss: 1.0045920610427856,grad_norm: 0.8537414186550502, iteration: 451698
loss: 1.0148370265960693,grad_norm: 0.6920812481606, iteration: 451699
loss: 1.0022938251495361,grad_norm: 0.7370095816441569, iteration: 451700
loss: 0.9722784757614136,grad_norm: 0.6872920116105369, iteration: 451701
loss: 0.9798254370689392,grad_norm: 0.8428639432699586, iteration: 451702
loss: 0.9986181855201721,grad_norm: 0.7258114344408128, iteration: 451703
loss: 1.002576231956482,grad_norm: 0.9047374383155307, iteration: 451704
loss: 1.0032246112823486,grad_norm: 0.9999991357060314, iteration: 451705
loss: 1.136359691619873,grad_norm: 0.9999999003052613, iteration: 451706
loss: 0.9741865396499634,grad_norm: 0.9477376751513207, iteration: 451707
loss: 0.9747692346572876,grad_norm: 0.7054009757627256, iteration: 451708
loss: 1.0558000802993774,grad_norm: 0.8358385743079537, iteration: 451709
loss: 1.0493888854980469,grad_norm: 0.9854595796944436, iteration: 451710
loss: 1.0244178771972656,grad_norm: 0.9399076659823081, iteration: 451711
loss: 1.028866171836853,grad_norm: 0.7111840566126627, iteration: 451712
loss: 0.982657790184021,grad_norm: 0.7410694539180024, iteration: 451713
loss: 0.9585422873497009,grad_norm: 0.8479783245252387, iteration: 451714
loss: 1.0070419311523438,grad_norm: 0.7868035022895216, iteration: 451715
loss: 1.011687159538269,grad_norm: 0.7539527181845777, iteration: 451716
loss: 1.2665859460830688,grad_norm: 0.9999997011159615, iteration: 451717
loss: 1.009316086769104,grad_norm: 0.7244158313123821, iteration: 451718
loss: 1.029297113418579,grad_norm: 0.6512197874649978, iteration: 451719
loss: 0.98446124792099,grad_norm: 0.6666135174424407, iteration: 451720
loss: 1.0099949836730957,grad_norm: 0.7761217232872489, iteration: 451721
loss: 1.2535873651504517,grad_norm: 1.000000013243653, iteration: 451722
loss: 0.9911104440689087,grad_norm: 0.7145188250273637, iteration: 451723
loss: 1.1911795139312744,grad_norm: 0.9999990228315436, iteration: 451724
loss: 0.9971164464950562,grad_norm: 0.855980841151924, iteration: 451725
loss: 0.9922949075698853,grad_norm: 0.6960700057854428, iteration: 451726
loss: 0.976843535900116,grad_norm: 0.7222940979754288, iteration: 451727
loss: 0.9918697476387024,grad_norm: 0.7034761537231045, iteration: 451728
loss: 0.9776812195777893,grad_norm: 0.6700968144132328, iteration: 451729
loss: 0.9742836952209473,grad_norm: 0.6814406297951676, iteration: 451730
loss: 1.0254045724868774,grad_norm: 0.9999990837700878, iteration: 451731
loss: 1.0025866031646729,grad_norm: 0.8514548439814354, iteration: 451732
loss: 1.0112584829330444,grad_norm: 0.6233781438977363, iteration: 451733
loss: 0.9756322503089905,grad_norm: 0.73744637219345, iteration: 451734
loss: 0.9868949055671692,grad_norm: 0.7386625081838832, iteration: 451735
loss: 1.0301425457000732,grad_norm: 0.9999993611566409, iteration: 451736
loss: 0.9840924739837646,grad_norm: 0.6478408269084499, iteration: 451737
loss: 1.3229451179504395,grad_norm: 0.9999998983517353, iteration: 451738
loss: 0.9739783406257629,grad_norm: 0.983535434510782, iteration: 451739
loss: 1.0245710611343384,grad_norm: 0.7697499897354445, iteration: 451740
loss: 0.995310366153717,grad_norm: 0.8693595692837023, iteration: 451741
loss: 1.010360598564148,grad_norm: 0.9999990644037335, iteration: 451742
loss: 1.0136382579803467,grad_norm: 0.9110021551515867, iteration: 451743
loss: 1.0178889036178589,grad_norm: 0.6772735299830411, iteration: 451744
loss: 1.0150831937789917,grad_norm: 0.8514778961318816, iteration: 451745
loss: 1.025526523590088,grad_norm: 0.780009787061158, iteration: 451746
loss: 1.0198569297790527,grad_norm: 0.8906093353727453, iteration: 451747
loss: 0.9879657030105591,grad_norm: 0.9999999354313662, iteration: 451748
loss: 1.0015863180160522,grad_norm: 0.9033841539236521, iteration: 451749
loss: 1.198563814163208,grad_norm: 0.8213453489020532, iteration: 451750
loss: 1.0166939496994019,grad_norm: 0.6966951030891797, iteration: 451751
loss: 0.9825932383537292,grad_norm: 0.6719493077303587, iteration: 451752
loss: 1.1489472389221191,grad_norm: 0.9999992277459605, iteration: 451753
loss: 1.0132724046707153,grad_norm: 0.7080471633275895, iteration: 451754
loss: 1.0154869556427002,grad_norm: 0.9999992172063743, iteration: 451755
loss: 1.0175738334655762,grad_norm: 0.7051646158292635, iteration: 451756
loss: 1.0289922952651978,grad_norm: 0.999999271480656, iteration: 451757
loss: 1.0111619234085083,grad_norm: 0.8904497441094437, iteration: 451758
loss: 0.9737523198127747,grad_norm: 0.6115833893865852, iteration: 451759
loss: 1.0313464403152466,grad_norm: 0.9999998211143271, iteration: 451760
loss: 0.9794293642044067,grad_norm: 0.898343660359664, iteration: 451761
loss: 1.0289826393127441,grad_norm: 0.8659921920028363, iteration: 451762
loss: 1.0180490016937256,grad_norm: 0.784942936185026, iteration: 451763
loss: 0.9794138073921204,grad_norm: 0.7444260199956922, iteration: 451764
loss: 0.9702146649360657,grad_norm: 0.706634883389671, iteration: 451765
loss: 0.9877706170082092,grad_norm: 0.7287036856650521, iteration: 451766
loss: 0.9886451363563538,grad_norm: 0.672001152121534, iteration: 451767
loss: 1.0277479887008667,grad_norm: 0.9999998476601138, iteration: 451768
loss: 0.9885427951812744,grad_norm: 0.8502892936853658, iteration: 451769
loss: 1.0043184757232666,grad_norm: 0.9999989826510456, iteration: 451770
loss: 1.0298534631729126,grad_norm: 0.8870804990178702, iteration: 451771
loss: 0.9933302402496338,grad_norm: 0.9394260679612227, iteration: 451772
loss: 0.993025541305542,grad_norm: 0.6326006170164387, iteration: 451773
loss: 1.0922119617462158,grad_norm: 0.8794162961597235, iteration: 451774
loss: 1.059043526649475,grad_norm: 0.9999995822557153, iteration: 451775
loss: 0.9678230881690979,grad_norm: 0.727046021842196, iteration: 451776
loss: 0.9889320135116577,grad_norm: 0.7385318181158592, iteration: 451777
loss: 1.02620530128479,grad_norm: 0.9999990980008795, iteration: 451778
loss: 0.9553691148757935,grad_norm: 0.8238730613848535, iteration: 451779
loss: 0.9732599854469299,grad_norm: 0.7677741436975993, iteration: 451780
loss: 1.0042387247085571,grad_norm: 0.7525286447175915, iteration: 451781
loss: 0.9693990349769592,grad_norm: 0.9610477523036826, iteration: 451782
loss: 1.008180856704712,grad_norm: 0.8748262472772449, iteration: 451783
loss: 1.014889121055603,grad_norm: 0.7450388005179012, iteration: 451784
loss: 0.9844421148300171,grad_norm: 0.790422743916559, iteration: 451785
loss: 0.9850299954414368,grad_norm: 0.7983977021784809, iteration: 451786
loss: 1.1153672933578491,grad_norm: 0.9999997670270449, iteration: 451787
loss: 1.0626121759414673,grad_norm: 0.7439262202721205, iteration: 451788
loss: 1.014400839805603,grad_norm: 0.9462044675844886, iteration: 451789
loss: 0.9836167097091675,grad_norm: 0.6325688088436984, iteration: 451790
loss: 1.03663170337677,grad_norm: 0.7369870590682459, iteration: 451791
loss: 0.965548038482666,grad_norm: 0.7809920506289579, iteration: 451792
loss: 1.0474745035171509,grad_norm: 0.9999998527507701, iteration: 451793
loss: 1.0148017406463623,grad_norm: 0.8590499079991744, iteration: 451794
loss: 0.9744174480438232,grad_norm: 0.8467003400826473, iteration: 451795
loss: 1.0050016641616821,grad_norm: 0.7804884066738896, iteration: 451796
loss: 0.999578595161438,grad_norm: 0.6554359451661018, iteration: 451797
loss: 0.9979824423789978,grad_norm: 0.6671634935696034, iteration: 451798
loss: 1.001528263092041,grad_norm: 0.7417157082419862, iteration: 451799
loss: 0.9997380971908569,grad_norm: 0.884531089597814, iteration: 451800
loss: 1.0414314270019531,grad_norm: 0.8841357662265941, iteration: 451801
loss: 1.0145174264907837,grad_norm: 0.9999991275897055, iteration: 451802
loss: 0.9895361065864563,grad_norm: 0.8367977921887039, iteration: 451803
loss: 1.0023882389068604,grad_norm: 0.6931246830932922, iteration: 451804
loss: 0.9961273670196533,grad_norm: 0.6094403809741311, iteration: 451805
loss: 0.9776288866996765,grad_norm: 0.7120165627666029, iteration: 451806
loss: 0.99680495262146,grad_norm: 0.8192948627837447, iteration: 451807
loss: 1.0415157079696655,grad_norm: 0.9999991497067372, iteration: 451808
loss: 1.0166808366775513,grad_norm: 0.7544617510173983, iteration: 451809
loss: 0.9931043982505798,grad_norm: 0.737524662494893, iteration: 451810
loss: 1.0275546312332153,grad_norm: 0.7646329803789729, iteration: 451811
loss: 1.0045627355575562,grad_norm: 0.6385416283143148, iteration: 451812
loss: 1.000434160232544,grad_norm: 0.8646696234555354, iteration: 451813
loss: 1.1196985244750977,grad_norm: 0.9999998262178686, iteration: 451814
loss: 0.9852355718612671,grad_norm: 0.8298874451043863, iteration: 451815
loss: 1.0522226095199585,grad_norm: 0.9792073779881957, iteration: 451816
loss: 1.0303603410720825,grad_norm: 0.8704007644074526, iteration: 451817
loss: 1.032867670059204,grad_norm: 0.8845372842875877, iteration: 451818
loss: 0.9802529215812683,grad_norm: 0.6800905350815404, iteration: 451819
loss: 1.0023261308670044,grad_norm: 0.7442575282626687, iteration: 451820
loss: 0.9988280534744263,grad_norm: 0.643551639389538, iteration: 451821
loss: 0.9596707224845886,grad_norm: 0.8448009959660892, iteration: 451822
loss: 0.9815281629562378,grad_norm: 0.6846577817813001, iteration: 451823
loss: 1.0050034523010254,grad_norm: 0.77044737256918, iteration: 451824
loss: 1.0178757905960083,grad_norm: 0.8330587657530417, iteration: 451825
loss: 0.9956958293914795,grad_norm: 0.7987555989312543, iteration: 451826
loss: 1.0207571983337402,grad_norm: 0.9004376133327672, iteration: 451827
loss: 1.1136856079101562,grad_norm: 0.777555888107262, iteration: 451828
loss: 0.9994862675666809,grad_norm: 0.8421895965358226, iteration: 451829
loss: 1.0042550563812256,grad_norm: 0.8754219970292114, iteration: 451830
loss: 1.0336617231369019,grad_norm: 0.7761333622805989, iteration: 451831
loss: 0.9886998534202576,grad_norm: 0.8033748683786224, iteration: 451832
loss: 0.9970014691352844,grad_norm: 0.9246735444604306, iteration: 451833
loss: 0.9762235879898071,grad_norm: 0.7473797418505883, iteration: 451834
loss: 1.030609130859375,grad_norm: 0.6406444948975165, iteration: 451835
loss: 1.017604112625122,grad_norm: 0.8768642514411235, iteration: 451836
loss: 1.0921311378479004,grad_norm: 0.919576128313836, iteration: 451837
loss: 0.9869382977485657,grad_norm: 0.8975925042919256, iteration: 451838
loss: 0.9769852757453918,grad_norm: 0.7994391880533938, iteration: 451839
loss: 1.00938880443573,grad_norm: 0.6938269906753635, iteration: 451840
loss: 1.0205711126327515,grad_norm: 0.7833900061022827, iteration: 451841
loss: 1.0144085884094238,grad_norm: 0.7628176811448462, iteration: 451842
loss: 1.0129029750823975,grad_norm: 0.7664007154959027, iteration: 451843
loss: 0.9972965121269226,grad_norm: 0.8585880428337909, iteration: 451844
loss: 1.0122965574264526,grad_norm: 0.851127516306949, iteration: 451845
loss: 1.0078186988830566,grad_norm: 0.7686246166292519, iteration: 451846
loss: 1.0087355375289917,grad_norm: 0.9999993712298882, iteration: 451847
loss: 1.0542196035385132,grad_norm: 0.6498417788036059, iteration: 451848
loss: 0.9697003960609436,grad_norm: 0.6651528258079659, iteration: 451849
loss: 1.0159268379211426,grad_norm: 0.8316464705250755, iteration: 451850
loss: 0.9985199570655823,grad_norm: 0.6565433256668259, iteration: 451851
loss: 1.0081290006637573,grad_norm: 0.7089488182456658, iteration: 451852
loss: 1.0238040685653687,grad_norm: 0.6595123192931155, iteration: 451853
loss: 1.0230278968811035,grad_norm: 0.8761513404746262, iteration: 451854
loss: 0.9815098643302917,grad_norm: 0.8263546559607076, iteration: 451855
loss: 1.0164337158203125,grad_norm: 0.6437974716101142, iteration: 451856
loss: 0.9933834671974182,grad_norm: 0.8452070052123355, iteration: 451857
loss: 1.0876045227050781,grad_norm: 0.9999994678628936, iteration: 451858
loss: 0.9836718440055847,grad_norm: 0.7308249921554846, iteration: 451859
loss: 1.0329337120056152,grad_norm: 0.8560222914901602, iteration: 451860
loss: 0.9658671617507935,grad_norm: 0.8025046331234914, iteration: 451861
loss: 0.9835236072540283,grad_norm: 0.9999991333986481, iteration: 451862
loss: 1.0067005157470703,grad_norm: 0.6523215222156645, iteration: 451863
loss: 1.0253597497940063,grad_norm: 0.9297257841549804, iteration: 451864
loss: 1.009136438369751,grad_norm: 0.9999992454329656, iteration: 451865
loss: 1.0178190469741821,grad_norm: 0.9126544899650871, iteration: 451866
loss: 0.9822771549224854,grad_norm: 0.7422022661933811, iteration: 451867
loss: 1.0357539653778076,grad_norm: 0.7713519214948051, iteration: 451868
loss: 1.0078935623168945,grad_norm: 0.8901759622450687, iteration: 451869
loss: 0.9780158400535583,grad_norm: 0.9999995065754254, iteration: 451870
loss: 0.9729668498039246,grad_norm: 0.6885760876265015, iteration: 451871
loss: 1.0192757844924927,grad_norm: 0.7424891196014992, iteration: 451872
loss: 1.0064878463745117,grad_norm: 0.7353336436841109, iteration: 451873
loss: 1.0536506175994873,grad_norm: 0.8271458534692366, iteration: 451874
loss: 1.0341989994049072,grad_norm: 0.8242613767275899, iteration: 451875
loss: 1.00798761844635,grad_norm: 0.6139792364061933, iteration: 451876
loss: 0.994287371635437,grad_norm: 0.6078623143415236, iteration: 451877
loss: 0.9675537347793579,grad_norm: 0.7898355654795034, iteration: 451878
loss: 1.0008865594863892,grad_norm: 0.7878599597631082, iteration: 451879
loss: 0.9882904291152954,grad_norm: 0.670949273384594, iteration: 451880
loss: 1.0468811988830566,grad_norm: 0.9999995330846525, iteration: 451881
loss: 1.0374348163604736,grad_norm: 0.7402179676543748, iteration: 451882
loss: 0.990158200263977,grad_norm: 0.7541847715884862, iteration: 451883
loss: 0.9871112704277039,grad_norm: 0.67135787002926, iteration: 451884
loss: 1.060903549194336,grad_norm: 0.9999995267403443, iteration: 451885
loss: 1.0005438327789307,grad_norm: 0.9999992666293415, iteration: 451886
loss: 0.9882856607437134,grad_norm: 0.8230861776998056, iteration: 451887
loss: 1.0528043508529663,grad_norm: 0.7918465815198846, iteration: 451888
loss: 1.0093529224395752,grad_norm: 0.7290679489734653, iteration: 451889
loss: 1.0255622863769531,grad_norm: 0.8161714866840294, iteration: 451890
loss: 0.9757652878761292,grad_norm: 0.7083709012098632, iteration: 451891
loss: 1.0207687616348267,grad_norm: 0.7747168119379992, iteration: 451892
loss: 1.0734878778457642,grad_norm: 0.999999671048977, iteration: 451893
loss: 1.0198991298675537,grad_norm: 0.8054870102885889, iteration: 451894
loss: 0.9701530337333679,grad_norm: 0.7356636441721217, iteration: 451895
loss: 1.0760904550552368,grad_norm: 0.9999997928056917, iteration: 451896
loss: 1.0039114952087402,grad_norm: 0.9999991896470719, iteration: 451897
loss: 0.9955363273620605,grad_norm: 0.999999154888721, iteration: 451898
loss: 1.100572109222412,grad_norm: 0.9999999487856017, iteration: 451899
loss: 0.9895657300949097,grad_norm: 0.7995907607898921, iteration: 451900
loss: 1.0024770498275757,grad_norm: 0.8090745788156675, iteration: 451901
loss: 1.0364410877227783,grad_norm: 0.8102044099911161, iteration: 451902
loss: 0.990006148815155,grad_norm: 0.9487798385191257, iteration: 451903
loss: 0.9787726402282715,grad_norm: 0.7491733386867173, iteration: 451904
loss: 0.9869006276130676,grad_norm: 0.832530051854077, iteration: 451905
loss: 1.003484845161438,grad_norm: 0.6416645562400041, iteration: 451906
loss: 0.9766258001327515,grad_norm: 0.8339233482921012, iteration: 451907
loss: 1.000393033027649,grad_norm: 0.9999996734100584, iteration: 451908
loss: 1.0061231851577759,grad_norm: 0.6927453730970152, iteration: 451909
loss: 1.0279170274734497,grad_norm: 0.7643952093099128, iteration: 451910
loss: 0.9798286557197571,grad_norm: 0.7328735980998994, iteration: 451911
loss: 0.9869126677513123,grad_norm: 0.7835829302027916, iteration: 451912
loss: 0.9815669059753418,grad_norm: 0.7256085755759771, iteration: 451913
loss: 0.9883694648742676,grad_norm: 0.7634521933365475, iteration: 451914
loss: 0.9822049140930176,grad_norm: 0.725157108632264, iteration: 451915
loss: 0.9834891557693481,grad_norm: 0.7993553309813342, iteration: 451916
loss: 1.047552227973938,grad_norm: 0.999999083580336, iteration: 451917
loss: 0.9546378254890442,grad_norm: 0.7327538204592721, iteration: 451918
loss: 0.9992936253547668,grad_norm: 0.8127854084435203, iteration: 451919
loss: 1.0061615705490112,grad_norm: 0.8369381113265086, iteration: 451920
loss: 0.9761279225349426,grad_norm: 0.7182979350510704, iteration: 451921
loss: 1.0264160633087158,grad_norm: 0.7159634191635178, iteration: 451922
loss: 1.0175962448120117,grad_norm: 0.6795078494073449, iteration: 451923
loss: 1.0182147026062012,grad_norm: 0.7180310683143887, iteration: 451924
loss: 0.9821821451187134,grad_norm: 0.9999994027828659, iteration: 451925
loss: 1.0305702686309814,grad_norm: 0.7218818238345635, iteration: 451926
loss: 1.055377721786499,grad_norm: 0.9896298612323969, iteration: 451927
loss: 1.0051453113555908,grad_norm: 0.712625476087763, iteration: 451928
loss: 1.0145047903060913,grad_norm: 0.999999774150528, iteration: 451929
loss: 1.0126959085464478,grad_norm: 0.7816838268373281, iteration: 451930
loss: 1.0133916139602661,grad_norm: 0.9195167662577398, iteration: 451931
loss: 0.9969847798347473,grad_norm: 0.8051632342429494, iteration: 451932
loss: 1.0213557481765747,grad_norm: 0.9999998225100346, iteration: 451933
loss: 1.0214333534240723,grad_norm: 0.982330089797721, iteration: 451934
loss: 1.0095477104187012,grad_norm: 0.8667097265806922, iteration: 451935
loss: 0.9924740791320801,grad_norm: 0.6372901139119175, iteration: 451936
loss: 1.0336652994155884,grad_norm: 0.8222488249308803, iteration: 451937
loss: 1.0208665132522583,grad_norm: 0.70486669655547, iteration: 451938
loss: 0.9873026609420776,grad_norm: 0.8852751269080938, iteration: 451939
loss: 1.0065280199050903,grad_norm: 0.6946408459271043, iteration: 451940
loss: 1.0067360401153564,grad_norm: 0.8183239981625909, iteration: 451941
loss: 1.085010290145874,grad_norm: 0.6996705266078662, iteration: 451942
loss: 0.9908803105354309,grad_norm: 0.7760644860233828, iteration: 451943
loss: 0.9937547445297241,grad_norm: 0.8057067658599588, iteration: 451944
loss: 1.0627551078796387,grad_norm: 0.6335072782985117, iteration: 451945
loss: 0.985286295413971,grad_norm: 0.8817840050079108, iteration: 451946
loss: 1.0058258771896362,grad_norm: 0.7577802499653662, iteration: 451947
loss: 0.9927970767021179,grad_norm: 0.7579752375285745, iteration: 451948
loss: 1.023444414138794,grad_norm: 0.9759353836908743, iteration: 451949
loss: 1.0211201906204224,grad_norm: 0.7799368476909814, iteration: 451950
loss: 1.0023200511932373,grad_norm: 0.6358214963111609, iteration: 451951
loss: 1.069555401802063,grad_norm: 0.8729905322631082, iteration: 451952
loss: 1.0158922672271729,grad_norm: 0.8198865272958981, iteration: 451953
loss: 0.9833441376686096,grad_norm: 0.7293623075819903, iteration: 451954
loss: 0.9830817580223083,grad_norm: 0.7700889081058513, iteration: 451955
loss: 1.0479166507720947,grad_norm: 0.9416853844588178, iteration: 451956
loss: 0.9941094517707825,grad_norm: 0.7163603090244531, iteration: 451957
loss: 1.0077048540115356,grad_norm: 0.9339590646981093, iteration: 451958
loss: 1.0701470375061035,grad_norm: 0.7955209111350598, iteration: 451959
loss: 0.9932608604431152,grad_norm: 0.7466466907470071, iteration: 451960
loss: 1.0806959867477417,grad_norm: 0.9999998236058688, iteration: 451961
loss: 1.0182790756225586,grad_norm: 0.9999993341578789, iteration: 451962
loss: 0.9756712913513184,grad_norm: 0.7069428669188107, iteration: 451963
loss: 1.0233254432678223,grad_norm: 0.6007302496417346, iteration: 451964
loss: 1.007280707359314,grad_norm: 0.647472620546154, iteration: 451965
loss: 1.154228687286377,grad_norm: 0.7556533110588971, iteration: 451966
loss: 0.9546647071838379,grad_norm: 0.7196255794607767, iteration: 451967
loss: 1.0041499137878418,grad_norm: 0.8724200670541646, iteration: 451968
loss: 1.0144851207733154,grad_norm: 0.6882301638232555, iteration: 451969
loss: 1.0180531740188599,grad_norm: 0.9999998982097122, iteration: 451970
loss: 0.9671266078948975,grad_norm: 0.7598610238943584, iteration: 451971
loss: 1.0040730237960815,grad_norm: 0.6907076357329409, iteration: 451972
loss: 0.9746444821357727,grad_norm: 0.617473942464618, iteration: 451973
loss: 1.0218656063079834,grad_norm: 0.7798139701570878, iteration: 451974
loss: 0.9807864427566528,grad_norm: 0.7429189814375361, iteration: 451975
loss: 0.9733719229698181,grad_norm: 0.7109888004220288, iteration: 451976
loss: 0.983544647693634,grad_norm: 0.6157435298062249, iteration: 451977
loss: 0.9943663477897644,grad_norm: 0.7122754634845926, iteration: 451978
loss: 1.0397530794143677,grad_norm: 0.7276720937713621, iteration: 451979
loss: 0.9494848251342773,grad_norm: 0.7422406771713885, iteration: 451980
loss: 0.9699429869651794,grad_norm: 0.8339859019714078, iteration: 451981
loss: 0.9958419799804688,grad_norm: 0.6499396747357696, iteration: 451982
loss: 1.0155956745147705,grad_norm: 0.8235510308069219, iteration: 451983
loss: 1.049599051475525,grad_norm: 0.9588779588349263, iteration: 451984
loss: 1.0079432725906372,grad_norm: 0.8052134071608762, iteration: 451985
loss: 1.0195826292037964,grad_norm: 0.8023147455919148, iteration: 451986
loss: 1.085970401763916,grad_norm: 0.8074169512132829, iteration: 451987
loss: 0.9952938556671143,grad_norm: 0.8487869499542923, iteration: 451988
loss: 1.0192255973815918,grad_norm: 0.7810615070285813, iteration: 451989
loss: 1.0060863494873047,grad_norm: 0.8377941656956929, iteration: 451990
loss: 1.0057240724563599,grad_norm: 0.8019771724411738, iteration: 451991
loss: 1.0232985019683838,grad_norm: 0.6968640720814773, iteration: 451992
loss: 0.9846336841583252,grad_norm: 0.7409751817209762, iteration: 451993
loss: 0.9637400507926941,grad_norm: 0.8367739380606465, iteration: 451994
loss: 0.972664475440979,grad_norm: 0.7892629450915217, iteration: 451995
loss: 1.079239845275879,grad_norm: 0.9999998644996686, iteration: 451996
loss: 0.9917160868644714,grad_norm: 0.855161618302237, iteration: 451997
loss: 1.0565462112426758,grad_norm: 0.8892636388538623, iteration: 451998
loss: 0.9802822470664978,grad_norm: 0.8130088812186608, iteration: 451999
loss: 0.9603226184844971,grad_norm: 0.7564253115343105, iteration: 452000
loss: 1.0375875234603882,grad_norm: 0.7486248882549124, iteration: 452001
loss: 1.0301015377044678,grad_norm: 0.6548716040858853, iteration: 452002
loss: 1.0266671180725098,grad_norm: 0.7836764772057356, iteration: 452003
loss: 1.10526442527771,grad_norm: 0.9999994765420376, iteration: 452004
loss: 0.9880831837654114,grad_norm: 0.7733983903579638, iteration: 452005
loss: 1.0136855840682983,grad_norm: 0.7353667426042821, iteration: 452006
loss: 0.9783990979194641,grad_norm: 0.9999991091709239, iteration: 452007
loss: 0.9950920939445496,grad_norm: 0.7209704976546074, iteration: 452008
loss: 1.0208055973052979,grad_norm: 0.7614043203850807, iteration: 452009
loss: 0.9771106243133545,grad_norm: 0.782849118304239, iteration: 452010
loss: 1.0372331142425537,grad_norm: 0.8876986781942616, iteration: 452011
loss: 0.9972572922706604,grad_norm: 0.6603218846234492, iteration: 452012
loss: 1.0200037956237793,grad_norm: 0.6845667528624784, iteration: 452013
loss: 1.0383411645889282,grad_norm: 0.7235459693768164, iteration: 452014
loss: 0.9651716351509094,grad_norm: 0.7242680622587095, iteration: 452015
loss: 1.0175604820251465,grad_norm: 0.8688335214297184, iteration: 452016
loss: 0.9980837106704712,grad_norm: 0.8451097092846741, iteration: 452017
loss: 1.0303431749343872,grad_norm: 1.000000055035983, iteration: 452018
loss: 0.9801639318466187,grad_norm: 0.8693722338468539, iteration: 452019
loss: 1.0025560855865479,grad_norm: 0.791564642201863, iteration: 452020
loss: 0.9570376873016357,grad_norm: 0.8801933245632225, iteration: 452021
loss: 1.0650763511657715,grad_norm: 0.7141900020407866, iteration: 452022
loss: 1.0039645433425903,grad_norm: 0.6338370463875246, iteration: 452023
loss: 0.970118522644043,grad_norm: 0.6684864729024281, iteration: 452024
loss: 0.9740589261054993,grad_norm: 0.7580871361274875, iteration: 452025
loss: 0.9838138818740845,grad_norm: 0.7396753852413765, iteration: 452026
loss: 0.9932568073272705,grad_norm: 0.8572412151353802, iteration: 452027
loss: 1.0099575519561768,grad_norm: 0.6363318837443543, iteration: 452028
loss: 1.030549168586731,grad_norm: 0.8098766484734035, iteration: 452029
loss: 1.1090562343597412,grad_norm: 0.779426495938004, iteration: 452030
loss: 1.0519163608551025,grad_norm: 0.9999997744468336, iteration: 452031
loss: 1.0010387897491455,grad_norm: 0.816568327805052, iteration: 452032
loss: 1.008879542350769,grad_norm: 0.8936523539598477, iteration: 452033
loss: 1.050188660621643,grad_norm: 0.748914822054978, iteration: 452034
loss: 0.9797136783599854,grad_norm: 0.8358385168141186, iteration: 452035
loss: 1.0162160396575928,grad_norm: 0.7268332061386645, iteration: 452036
loss: 0.9926646947860718,grad_norm: 0.6581278472464115, iteration: 452037
loss: 1.0092670917510986,grad_norm: 0.814207470764192, iteration: 452038
loss: 1.0326415300369263,grad_norm: 0.8076194763435359, iteration: 452039
loss: 0.9803860187530518,grad_norm: 0.7729810227454841, iteration: 452040
loss: 0.9957027435302734,grad_norm: 0.7142449059706095, iteration: 452041
loss: 1.0270785093307495,grad_norm: 0.8137082699858899, iteration: 452042
loss: 1.0228418111801147,grad_norm: 0.702422846218714, iteration: 452043
loss: 1.0143948793411255,grad_norm: 0.8581745335099962, iteration: 452044
loss: 1.0185437202453613,grad_norm: 0.747459021600754, iteration: 452045
loss: 0.9883416295051575,grad_norm: 0.7094510001078783, iteration: 452046
loss: 0.9963061809539795,grad_norm: 0.9014143425701415, iteration: 452047
loss: 1.0298011302947998,grad_norm: 0.7144196518596237, iteration: 452048
loss: 0.9752490520477295,grad_norm: 0.7887231083570845, iteration: 452049
loss: 1.0326679944992065,grad_norm: 0.6995935967318195, iteration: 452050
loss: 0.9708965420722961,grad_norm: 0.6916518123203823, iteration: 452051
loss: 0.9732298254966736,grad_norm: 0.772946484270093, iteration: 452052
loss: 1.0116779804229736,grad_norm: 0.715059673767368, iteration: 452053
loss: 1.0500283241271973,grad_norm: 0.9999994639892588, iteration: 452054
loss: 0.9920015931129456,grad_norm: 0.8341053226605422, iteration: 452055
loss: 0.9714174866676331,grad_norm: 0.7807139923033657, iteration: 452056
loss: 0.9917042851448059,grad_norm: 0.7510631955325053, iteration: 452057
loss: 1.0045162439346313,grad_norm: 0.6597614632334138, iteration: 452058
loss: 0.9749670028686523,grad_norm: 0.673429339282895, iteration: 452059
loss: 1.0059518814086914,grad_norm: 0.7564323138605052, iteration: 452060
loss: 0.9903955459594727,grad_norm: 0.7596294606644112, iteration: 452061
loss: 0.9927520751953125,grad_norm: 0.9445818068889613, iteration: 452062
loss: 0.9538281559944153,grad_norm: 0.7889756054874589, iteration: 452063
loss: 1.010359764099121,grad_norm: 0.7514818014316916, iteration: 452064
loss: 1.0048727989196777,grad_norm: 0.7124514348774524, iteration: 452065
loss: 1.0350953340530396,grad_norm: 0.6525498984276213, iteration: 452066
loss: 0.9807230830192566,grad_norm: 0.8062881621190563, iteration: 452067
loss: 0.9842716455459595,grad_norm: 0.7173990353388799, iteration: 452068
loss: 1.0798105001449585,grad_norm: 0.730266493033556, iteration: 452069
loss: 1.0265535116195679,grad_norm: 0.8255250753593614, iteration: 452070
loss: 1.0053761005401611,grad_norm: 0.7718342915062776, iteration: 452071
loss: 0.9954953789710999,grad_norm: 0.8069555930033582, iteration: 452072
loss: 0.9768304228782654,grad_norm: 0.8956985972318586, iteration: 452073
loss: 1.0123682022094727,grad_norm: 0.6689742644008486, iteration: 452074
loss: 1.0461547374725342,grad_norm: 0.999999267418915, iteration: 452075
loss: 0.9956051707267761,grad_norm: 0.7496723687330665, iteration: 452076
loss: 0.9807270765304565,grad_norm: 0.6775537063491852, iteration: 452077
loss: 0.992581307888031,grad_norm: 0.6735557292310869, iteration: 452078
loss: 1.001340627670288,grad_norm: 0.764657291768995, iteration: 452079
loss: 1.0179665088653564,grad_norm: 0.8787156200445722, iteration: 452080
loss: 1.02255117893219,grad_norm: 0.7454888469410433, iteration: 452081
loss: 0.9848105311393738,grad_norm: 0.6550353205782825, iteration: 452082
loss: 0.9937976598739624,grad_norm: 0.7192299948999157, iteration: 452083
loss: 1.0453914403915405,grad_norm: 0.7973132157414774, iteration: 452084
loss: 1.005876898765564,grad_norm: 0.855616308535324, iteration: 452085
loss: 1.008979082107544,grad_norm: 0.6051772695654596, iteration: 452086
loss: 1.010974407196045,grad_norm: 0.6272546062379692, iteration: 452087
loss: 1.0080527067184448,grad_norm: 0.9999994756899467, iteration: 452088
loss: 1.0031979084014893,grad_norm: 0.7926903353330347, iteration: 452089
loss: 0.9680360555648804,grad_norm: 0.8798128743971949, iteration: 452090
loss: 1.015013337135315,grad_norm: 0.8274862029349441, iteration: 452091
loss: 0.9823739528656006,grad_norm: 0.8258781307466827, iteration: 452092
loss: 0.9902535080909729,grad_norm: 0.6833691850990924, iteration: 452093
loss: 0.9676021933555603,grad_norm: 0.7800332490424271, iteration: 452094
loss: 1.0001180171966553,grad_norm: 0.6418775220105123, iteration: 452095
loss: 1.0413774251937866,grad_norm: 0.8292219891388657, iteration: 452096
loss: 1.0171520709991455,grad_norm: 0.8061274185561013, iteration: 452097
loss: 1.058484435081482,grad_norm: 0.7751368190035602, iteration: 452098
loss: 1.0223180055618286,grad_norm: 0.823660675863344, iteration: 452099
loss: 1.0564453601837158,grad_norm: 0.8045430484325868, iteration: 452100
loss: 0.9694374799728394,grad_norm: 0.7959666656875855, iteration: 452101
loss: 0.9591766595840454,grad_norm: 0.7896793651120406, iteration: 452102
loss: 0.9929920434951782,grad_norm: 0.7148462534573196, iteration: 452103
loss: 1.0638748407363892,grad_norm: 0.7247354843879864, iteration: 452104
loss: 1.009912133216858,grad_norm: 0.6609565176842108, iteration: 452105
loss: 0.9636979103088379,grad_norm: 0.6538382371672371, iteration: 452106
loss: 0.9879102110862732,grad_norm: 0.9257226125310648, iteration: 452107
loss: 1.0132094621658325,grad_norm: 0.6789171062682334, iteration: 452108
loss: 0.9876111149787903,grad_norm: 0.7947844041851327, iteration: 452109
loss: 1.0238865613937378,grad_norm: 0.8016338439765932, iteration: 452110
loss: 0.9849668741226196,grad_norm: 0.8099233241635779, iteration: 452111
loss: 0.9776961803436279,grad_norm: 0.782612973796797, iteration: 452112
loss: 1.0001085996627808,grad_norm: 0.7510418024831973, iteration: 452113
loss: 0.9810609221458435,grad_norm: 0.7219075309417717, iteration: 452114
loss: 0.986329972743988,grad_norm: 0.6672133010692184, iteration: 452115
loss: 1.0112568140029907,grad_norm: 0.6894915937360594, iteration: 452116
loss: 0.9979814291000366,grad_norm: 0.8654368438196648, iteration: 452117
loss: 0.9736728668212891,grad_norm: 0.7858694986126722, iteration: 452118
loss: 0.9932284951210022,grad_norm: 0.7743621878113658, iteration: 452119
loss: 0.9698560833930969,grad_norm: 0.7876389193948389, iteration: 452120
loss: 0.9989926815032959,grad_norm: 0.768241465045141, iteration: 452121
loss: 0.9712278842926025,grad_norm: 0.855796996389266, iteration: 452122
loss: 1.0066311359405518,grad_norm: 0.7267151499935202, iteration: 452123
loss: 0.9700180888175964,grad_norm: 0.8032611417201782, iteration: 452124
loss: 0.9551070928573608,grad_norm: 0.6240196515525887, iteration: 452125
loss: 1.0247331857681274,grad_norm: 0.9999997314308297, iteration: 452126
loss: 1.013225793838501,grad_norm: 0.7520949010030485, iteration: 452127
loss: 1.0535802841186523,grad_norm: 0.9999995110795051, iteration: 452128
loss: 1.0129590034484863,grad_norm: 0.7902966865058432, iteration: 452129
loss: 0.9979328513145447,grad_norm: 0.7791047195470467, iteration: 452130
loss: 0.9993820786476135,grad_norm: 0.7442488247958158, iteration: 452131
loss: 0.9995869398117065,grad_norm: 0.8273801370369707, iteration: 452132
loss: 1.017836093902588,grad_norm: 0.9999994008840333, iteration: 452133
loss: 1.0213944911956787,grad_norm: 0.7269254961603386, iteration: 452134
loss: 1.043684959411621,grad_norm: 0.8732828516885808, iteration: 452135
loss: 1.0206241607666016,grad_norm: 0.8411693429245038, iteration: 452136
loss: 1.0349270105361938,grad_norm: 0.8436387742594021, iteration: 452137
loss: 0.9612341523170471,grad_norm: 0.8714408422395754, iteration: 452138
loss: 1.0053530931472778,grad_norm: 0.7560815649042677, iteration: 452139
loss: 0.9640852808952332,grad_norm: 0.8165198476913114, iteration: 452140
loss: 0.9841989874839783,grad_norm: 0.808267414439231, iteration: 452141
loss: 1.0063477754592896,grad_norm: 0.7842023089586471, iteration: 452142
loss: 1.020998477935791,grad_norm: 0.7492371410153265, iteration: 452143
loss: 0.9706262946128845,grad_norm: 0.7359992017653844, iteration: 452144
loss: 0.9834362268447876,grad_norm: 0.7165110923663318, iteration: 452145
loss: 0.9908896088600159,grad_norm: 0.6822015341880073, iteration: 452146
loss: 1.0102190971374512,grad_norm: 0.9999998911652982, iteration: 452147
loss: 1.0099343061447144,grad_norm: 0.7989789280151365, iteration: 452148
loss: 1.014471173286438,grad_norm: 0.8004040642690226, iteration: 452149
loss: 0.983963668346405,grad_norm: 0.7939873431606027, iteration: 452150
loss: 0.9965042471885681,grad_norm: 0.8818112117903686, iteration: 452151
loss: 0.9930696487426758,grad_norm: 0.7288729248036219, iteration: 452152
loss: 1.024990439414978,grad_norm: 0.9999998865542471, iteration: 452153
loss: 0.9968242049217224,grad_norm: 0.7009366708497361, iteration: 452154
loss: 0.9902411103248596,grad_norm: 0.8027187447868392, iteration: 452155
loss: 1.001998782157898,grad_norm: 0.7459067139428008, iteration: 452156
loss: 0.9765343070030212,grad_norm: 0.8677147191011185, iteration: 452157
loss: 0.965217649936676,grad_norm: 0.9691817657811367, iteration: 452158
loss: 0.9931517243385315,grad_norm: 0.6603574596934392, iteration: 452159
loss: 1.0034314393997192,grad_norm: 0.6779104086287652, iteration: 452160
loss: 0.9967910647392273,grad_norm: 0.6074756297270553, iteration: 452161
loss: 1.0925326347351074,grad_norm: 0.9999989904965825, iteration: 452162
loss: 0.9842312335968018,grad_norm: 0.8073349049958447, iteration: 452163
loss: 1.0057021379470825,grad_norm: 0.9555186829930647, iteration: 452164
loss: 1.0282479524612427,grad_norm: 0.6341735390650372, iteration: 452165
loss: 0.9996263384819031,grad_norm: 0.741459200754015, iteration: 452166
loss: 0.9811419248580933,grad_norm: 0.6621121081063138, iteration: 452167
loss: 0.9974101781845093,grad_norm: 0.6949436560795578, iteration: 452168
loss: 0.9811502695083618,grad_norm: 0.6233081145164245, iteration: 452169
loss: 1.0486371517181396,grad_norm: 0.8206686700680845, iteration: 452170
loss: 0.9728760719299316,grad_norm: 0.7827558249377449, iteration: 452171
loss: 1.0247838497161865,grad_norm: 0.9999991703928575, iteration: 452172
loss: 0.9982973337173462,grad_norm: 0.9429518904310766, iteration: 452173
loss: 1.0134674310684204,grad_norm: 0.774757526590741, iteration: 452174
loss: 0.9902427792549133,grad_norm: 0.6815541231895474, iteration: 452175
loss: 0.9712281823158264,grad_norm: 0.8077471738709416, iteration: 452176
loss: 1.0270049571990967,grad_norm: 0.9999990255067552, iteration: 452177
loss: 0.9489212036132812,grad_norm: 0.7717456756369925, iteration: 452178
loss: 1.0511410236358643,grad_norm: 0.9704230590737829, iteration: 452179
loss: 1.0456665754318237,grad_norm: 0.8383390407700925, iteration: 452180
loss: 1.0069150924682617,grad_norm: 0.9999991835738105, iteration: 452181
loss: 0.9771082401275635,grad_norm: 0.6716185885277552, iteration: 452182
loss: 0.9948658347129822,grad_norm: 0.8447598654266166, iteration: 452183
loss: 0.9893642663955688,grad_norm: 0.7176430505778824, iteration: 452184
loss: 1.0395933389663696,grad_norm: 0.9999990874590834, iteration: 452185
loss: 1.003960371017456,grad_norm: 0.803413323052158, iteration: 452186
loss: 1.097025990486145,grad_norm: 0.7945795488677894, iteration: 452187
loss: 0.9825769066810608,grad_norm: 0.9999992755869695, iteration: 452188
loss: 0.9837176203727722,grad_norm: 0.6588104979158113, iteration: 452189
loss: 1.0110493898391724,grad_norm: 0.7963888829989305, iteration: 452190
loss: 0.9661535620689392,grad_norm: 0.7680853776771236, iteration: 452191
loss: 1.014296293258667,grad_norm: 0.7775234835011081, iteration: 452192
loss: 0.9394751191139221,grad_norm: 0.8232323261628258, iteration: 452193
loss: 1.074924349784851,grad_norm: 0.9999998656285947, iteration: 452194
loss: 1.0299686193466187,grad_norm: 0.6818463525500335, iteration: 452195
loss: 0.9752227663993835,grad_norm: 0.563925397077466, iteration: 452196
loss: 1.00948166847229,grad_norm: 0.9999994222600624, iteration: 452197
loss: 0.9813264012336731,grad_norm: 0.6627185868381424, iteration: 452198
loss: 0.9812778234481812,grad_norm: 0.6706236500326386, iteration: 452199
loss: 1.003892183303833,grad_norm: 0.7644800573302041, iteration: 452200
loss: 1.0331364870071411,grad_norm: 0.7533978493765907, iteration: 452201
loss: 0.9786034226417542,grad_norm: 0.8246993458854917, iteration: 452202
loss: 1.0238196849822998,grad_norm: 0.8049285851328162, iteration: 452203
loss: 0.9960595965385437,grad_norm: 0.7132157642333319, iteration: 452204
loss: 1.022524356842041,grad_norm: 0.8154343411262794, iteration: 452205
loss: 0.9937217235565186,grad_norm: 0.7438875717835958, iteration: 452206
loss: 0.9727054834365845,grad_norm: 0.6716949446943755, iteration: 452207
loss: 0.9629712700843811,grad_norm: 0.751880968800768, iteration: 452208
loss: 0.9729323387145996,grad_norm: 0.8079766913310107, iteration: 452209
loss: 1.0198169946670532,grad_norm: 0.718421400775972, iteration: 452210
loss: 0.9906213879585266,grad_norm: 0.980557381457389, iteration: 452211
loss: 1.0276563167572021,grad_norm: 0.8384605997188125, iteration: 452212
loss: 0.9625328183174133,grad_norm: 0.793281871991351, iteration: 452213
loss: 0.9910797476768494,grad_norm: 0.7674978221422163, iteration: 452214
loss: 1.0009628534317017,grad_norm: 0.7495323844437909, iteration: 452215
loss: 0.9686214923858643,grad_norm: 0.8175470053849448, iteration: 452216
loss: 1.0379629135131836,grad_norm: 0.8704319219564053, iteration: 452217
loss: 0.9601868987083435,grad_norm: 0.8315458102334696, iteration: 452218
loss: 1.007836103439331,grad_norm: 0.9999990987222549, iteration: 452219
loss: 1.0074654817581177,grad_norm: 0.7841867950443115, iteration: 452220
loss: 1.0278356075286865,grad_norm: 0.8689933773978051, iteration: 452221
loss: 0.9901228547096252,grad_norm: 0.771245804949974, iteration: 452222
loss: 1.011342167854309,grad_norm: 0.7407750044817114, iteration: 452223
loss: 1.0175620317459106,grad_norm: 0.8178987718347224, iteration: 452224
loss: 1.0100882053375244,grad_norm: 0.9861973782893156, iteration: 452225
loss: 1.000754475593567,grad_norm: 0.6783822492474004, iteration: 452226
loss: 1.0248823165893555,grad_norm: 0.6276637349530734, iteration: 452227
loss: 1.0146746635437012,grad_norm: 0.9999992959887191, iteration: 452228
loss: 0.9718266725540161,grad_norm: 0.6972256934552754, iteration: 452229
loss: 1.0092436075210571,grad_norm: 0.7648224999865931, iteration: 452230
loss: 1.0043764114379883,grad_norm: 0.6338635195160706, iteration: 452231
loss: 0.9919712543487549,grad_norm: 0.9106428575259766, iteration: 452232
loss: 1.0276204347610474,grad_norm: 0.8482138164159304, iteration: 452233
loss: 1.037682294845581,grad_norm: 0.7958084421393029, iteration: 452234
loss: 1.0025105476379395,grad_norm: 0.6315806820745914, iteration: 452235
loss: 1.0461103916168213,grad_norm: 0.8210213637793692, iteration: 452236
loss: 0.9691963791847229,grad_norm: 0.8962733397398741, iteration: 452237
loss: 0.9678473472595215,grad_norm: 0.8480957895087653, iteration: 452238
loss: 1.0100945234298706,grad_norm: 0.8487884597007458, iteration: 452239
loss: 1.0157803297042847,grad_norm: 0.6724237226596808, iteration: 452240
loss: 0.9774364829063416,grad_norm: 0.737263833246243, iteration: 452241
loss: 0.9952055215835571,grad_norm: 0.6812898748154019, iteration: 452242
loss: 1.006789207458496,grad_norm: 0.9017541828334851, iteration: 452243
loss: 0.999478816986084,grad_norm: 0.773796029630536, iteration: 452244
loss: 1.015343427658081,grad_norm: 0.7422056466105611, iteration: 452245
loss: 0.9820981621742249,grad_norm: 0.7138800078783375, iteration: 452246
loss: 0.9795867204666138,grad_norm: 0.7251393275739689, iteration: 452247
loss: 1.009111762046814,grad_norm: 0.9999990580613366, iteration: 452248
loss: 0.9990242123603821,grad_norm: 0.8485662752389179, iteration: 452249
loss: 0.9992131590843201,grad_norm: 0.7347196488014688, iteration: 452250
loss: 1.023795247077942,grad_norm: 0.7304680111289233, iteration: 452251
loss: 0.9808900952339172,grad_norm: 0.815406803256489, iteration: 452252
loss: 0.9836574196815491,grad_norm: 0.7958870364766399, iteration: 452253
loss: 1.0074799060821533,grad_norm: 0.7006229482717624, iteration: 452254
loss: 0.9844629168510437,grad_norm: 0.8763046166069678, iteration: 452255
loss: 0.984566867351532,grad_norm: 0.6120877489626263, iteration: 452256
loss: 0.9786102771759033,grad_norm: 0.7522325818458142, iteration: 452257
loss: 1.0209671258926392,grad_norm: 0.7621796371746595, iteration: 452258
loss: 0.9997115135192871,grad_norm: 0.8674382231800677, iteration: 452259
loss: 0.9829623103141785,grad_norm: 0.8120371787700499, iteration: 452260
loss: 1.0319883823394775,grad_norm: 0.867968676467864, iteration: 452261
loss: 0.9959811568260193,grad_norm: 0.6869254701108122, iteration: 452262
loss: 1.013883352279663,grad_norm: 0.8229894889355828, iteration: 452263
loss: 1.0178431272506714,grad_norm: 0.7992918489196826, iteration: 452264
loss: 1.0257755517959595,grad_norm: 0.7504254455717967, iteration: 452265
loss: 0.9791166186332703,grad_norm: 0.7795935059326701, iteration: 452266
loss: 0.9654651284217834,grad_norm: 0.7509120932123029, iteration: 452267
loss: 1.0327867269515991,grad_norm: 0.7577369588978561, iteration: 452268
loss: 1.0401298999786377,grad_norm: 0.999999013192174, iteration: 452269
loss: 1.0221421718597412,grad_norm: 0.7507145155363664, iteration: 452270
loss: 1.023200511932373,grad_norm: 0.7738984511956332, iteration: 452271
loss: 0.988292932510376,grad_norm: 0.7429922667231756, iteration: 452272
loss: 0.9884965419769287,grad_norm: 0.8770841156739075, iteration: 452273
loss: 1.0088998079299927,grad_norm: 0.8024805938183669, iteration: 452274
loss: 0.9709071516990662,grad_norm: 0.8101863764929114, iteration: 452275
loss: 1.0146524906158447,grad_norm: 0.8594426432683607, iteration: 452276
loss: 1.0410704612731934,grad_norm: 0.7864219725287172, iteration: 452277
loss: 0.9942552447319031,grad_norm: 0.8758651831008645, iteration: 452278
loss: 1.0043199062347412,grad_norm: 0.8281685007101369, iteration: 452279
loss: 1.0256813764572144,grad_norm: 0.9999991915497606, iteration: 452280
loss: 0.9828627705574036,grad_norm: 0.7377645347291016, iteration: 452281
loss: 0.9904794692993164,grad_norm: 0.7337606501755126, iteration: 452282
loss: 0.9711589813232422,grad_norm: 0.8583061413170574, iteration: 452283
loss: 1.0325461626052856,grad_norm: 0.789273142516162, iteration: 452284
loss: 0.996065080165863,grad_norm: 0.8640143746956644, iteration: 452285
loss: 0.9685540199279785,grad_norm: 0.6086655750104489, iteration: 452286
loss: 1.0132339000701904,grad_norm: 0.80946642414715, iteration: 452287
loss: 0.9884976148605347,grad_norm: 0.7388532496880782, iteration: 452288
loss: 1.0087405443191528,grad_norm: 0.8162562198324611, iteration: 452289
loss: 1.0307456254959106,grad_norm: 0.718731201598534, iteration: 452290
loss: 1.0526232719421387,grad_norm: 0.7969119800028113, iteration: 452291
loss: 1.0222599506378174,grad_norm: 0.7128390573317952, iteration: 452292
loss: 1.0276069641113281,grad_norm: 0.7768774716785538, iteration: 452293
loss: 1.0250316858291626,grad_norm: 0.9835608093513323, iteration: 452294
loss: 1.0270249843597412,grad_norm: 0.9999991873557733, iteration: 452295
loss: 1.0380535125732422,grad_norm: 0.8670311647209498, iteration: 452296
loss: 0.9890739917755127,grad_norm: 0.9999998712646221, iteration: 452297
loss: 1.0223896503448486,grad_norm: 0.9192082397772017, iteration: 452298
loss: 0.9710855484008789,grad_norm: 0.9580421627602018, iteration: 452299
loss: 1.008620023727417,grad_norm: 0.7657405420206653, iteration: 452300
loss: 0.9750326871871948,grad_norm: 0.8346062015313802, iteration: 452301
loss: 0.97963947057724,grad_norm: 0.7629605251025103, iteration: 452302
loss: 0.9675574898719788,grad_norm: 0.7097446878135588, iteration: 452303
loss: 1.006190299987793,grad_norm: 0.6644754408170916, iteration: 452304
loss: 1.0239648818969727,grad_norm: 0.6326553243094222, iteration: 452305
loss: 1.0158087015151978,grad_norm: 0.7850935427587158, iteration: 452306
loss: 1.0239295959472656,grad_norm: 0.7844427341660116, iteration: 452307
loss: 0.9885814189910889,grad_norm: 0.7123019799520615, iteration: 452308
loss: 1.034461259841919,grad_norm: 0.7165137981707239, iteration: 452309
loss: 1.0392661094665527,grad_norm: 0.7050124746961672, iteration: 452310
loss: 0.9541401267051697,grad_norm: 0.8909229078384177, iteration: 452311
loss: 1.008726954460144,grad_norm: 0.7474275356681735, iteration: 452312
loss: 1.0254604816436768,grad_norm: 0.9091917456586187, iteration: 452313
loss: 1.0459344387054443,grad_norm: 0.8265023927564893, iteration: 452314
loss: 0.9972913861274719,grad_norm: 0.8415031811841985, iteration: 452315
loss: 0.9783645868301392,grad_norm: 0.7502519824158667, iteration: 452316
loss: 0.9918978810310364,grad_norm: 0.9081735446820167, iteration: 452317
loss: 0.9833223223686218,grad_norm: 0.7738280365373013, iteration: 452318
loss: 0.9766363501548767,grad_norm: 0.7451749924418165, iteration: 452319
loss: 1.0031968355178833,grad_norm: 0.6909302601337433, iteration: 452320
loss: 0.9700485467910767,grad_norm: 0.7055369436124782, iteration: 452321
loss: 1.0218502283096313,grad_norm: 0.6202673709159754, iteration: 452322
loss: 0.9978601932525635,grad_norm: 0.8065024937731406, iteration: 452323
loss: 1.020676851272583,grad_norm: 0.6779950050719848, iteration: 452324
loss: 0.9770090579986572,grad_norm: 0.728516490507502, iteration: 452325
loss: 1.022516131401062,grad_norm: 0.9369143222055415, iteration: 452326
loss: 1.0114445686340332,grad_norm: 0.7075324493671007, iteration: 452327
loss: 1.0340207815170288,grad_norm: 0.7774739488422522, iteration: 452328
loss: 0.9726194143295288,grad_norm: 0.7597813975277175, iteration: 452329
loss: 0.9845609068870544,grad_norm: 0.7619343656693138, iteration: 452330
loss: 1.0258674621582031,grad_norm: 0.7038635446706168, iteration: 452331
loss: 0.9820778965950012,grad_norm: 0.7538978971368223, iteration: 452332
loss: 0.9737839698791504,grad_norm: 0.6931136842489045, iteration: 452333
loss: 1.0139223337173462,grad_norm: 0.8763306684854756, iteration: 452334
loss: 1.0140395164489746,grad_norm: 0.8576128161452684, iteration: 452335
loss: 1.0443673133850098,grad_norm: 0.9999993305024419, iteration: 452336
loss: 0.9783200621604919,grad_norm: 0.6887276944069765, iteration: 452337
loss: 0.9787652492523193,grad_norm: 0.7849040343031358, iteration: 452338
loss: 0.9916587471961975,grad_norm: 0.7345997077392004, iteration: 452339
loss: 1.0106399059295654,grad_norm: 0.8527830906118236, iteration: 452340
loss: 1.0038042068481445,grad_norm: 0.7854374834035122, iteration: 452341
loss: 0.9945403337478638,grad_norm: 0.8420096630411247, iteration: 452342
loss: 0.9777103662490845,grad_norm: 0.7694765862849131, iteration: 452343
loss: 0.9912223219871521,grad_norm: 0.8442699387984197, iteration: 452344
loss: 1.023860216140747,grad_norm: 0.8316331041116974, iteration: 452345
loss: 1.0278615951538086,grad_norm: 0.7433156682986857, iteration: 452346
loss: 0.9725624918937683,grad_norm: 0.8336902218658765, iteration: 452347
loss: 0.9442704319953918,grad_norm: 0.7872634736051382, iteration: 452348
loss: 1.037105917930603,grad_norm: 0.8973983279257638, iteration: 452349
loss: 1.030372142791748,grad_norm: 0.8185999203450584, iteration: 452350
loss: 1.0157160758972168,grad_norm: 0.681859397779948, iteration: 452351
loss: 0.9849568009376526,grad_norm: 0.7021117695410388, iteration: 452352
loss: 0.9550471901893616,grad_norm: 0.7757653361024716, iteration: 452353
loss: 1.0302129983901978,grad_norm: 0.7993431738411635, iteration: 452354
loss: 1.0442081689834595,grad_norm: 0.7429566815168596, iteration: 452355
loss: 0.9846832156181335,grad_norm: 0.660665186949654, iteration: 452356
loss: 0.9972461462020874,grad_norm: 0.723074572707472, iteration: 452357
loss: 0.9857679009437561,grad_norm: 0.87582679200835, iteration: 452358
loss: 1.0074857473373413,grad_norm: 0.83158525248523, iteration: 452359
loss: 1.013810396194458,grad_norm: 0.8283304642574013, iteration: 452360
loss: 1.028943419456482,grad_norm: 0.7905257940677384, iteration: 452361
loss: 1.0673472881317139,grad_norm: 0.8364312893250015, iteration: 452362
loss: 1.0027215480804443,grad_norm: 0.7033005733782023, iteration: 452363
loss: 0.9598838686943054,grad_norm: 0.7522114225482235, iteration: 452364
loss: 0.9921079874038696,grad_norm: 0.7226862912396268, iteration: 452365
loss: 0.9997530579566956,grad_norm: 0.7644972959896792, iteration: 452366
loss: 1.0150518417358398,grad_norm: 0.7469264951406103, iteration: 452367
loss: 0.9933820962905884,grad_norm: 0.7758109250862848, iteration: 452368
loss: 0.9470484256744385,grad_norm: 0.8006143099864771, iteration: 452369
loss: 1.0525920391082764,grad_norm: 0.7909000372024341, iteration: 452370
loss: 1.033966302871704,grad_norm: 0.7439767294722243, iteration: 452371
loss: 0.9632982015609741,grad_norm: 0.907589270320563, iteration: 452372
loss: 0.9876546859741211,grad_norm: 0.7146780536542466, iteration: 452373
loss: 0.9470177888870239,grad_norm: 0.7340913846010101, iteration: 452374
loss: 1.060207486152649,grad_norm: 0.7872470809724753, iteration: 452375
loss: 1.0667943954467773,grad_norm: 0.9918181238498247, iteration: 452376
loss: 1.0123014450073242,grad_norm: 0.7967875478887679, iteration: 452377
loss: 0.991817831993103,grad_norm: 0.7994851771249024, iteration: 452378
loss: 1.031218409538269,grad_norm: 0.6499297321197836, iteration: 452379
loss: 0.9825417995452881,grad_norm: 0.7969552460965798, iteration: 452380
loss: 1.0163512229919434,grad_norm: 0.720989460274082, iteration: 452381
loss: 0.9847403764724731,grad_norm: 0.78944234097979, iteration: 452382
loss: 1.0085225105285645,grad_norm: 0.9128444778123598, iteration: 452383
loss: 1.0277742147445679,grad_norm: 0.7494943510616288, iteration: 452384
loss: 1.0149980783462524,grad_norm: 0.8854085519652902, iteration: 452385
loss: 1.000074028968811,grad_norm: 0.8042673503471685, iteration: 452386
loss: 1.0108773708343506,grad_norm: 0.7049048528138621, iteration: 452387
loss: 0.9990258812904358,grad_norm: 0.7136716455431323, iteration: 452388
loss: 1.0354359149932861,grad_norm: 0.7451935428597535, iteration: 452389
loss: 1.0092365741729736,grad_norm: 0.8395995110979554, iteration: 452390
loss: 1.0245777368545532,grad_norm: 0.6449387318648402, iteration: 452391
loss: 0.9869586229324341,grad_norm: 0.7355576894493545, iteration: 452392
loss: 1.0156830549240112,grad_norm: 0.6948807934640244, iteration: 452393
loss: 0.9987210631370544,grad_norm: 0.6472027163491122, iteration: 452394
loss: 1.016433835029602,grad_norm: 0.8307897405129738, iteration: 452395
loss: 1.003913164138794,grad_norm: 0.8156273652023905, iteration: 452396
loss: 0.9924352169036865,grad_norm: 0.7232366402095928, iteration: 452397
loss: 0.9885675311088562,grad_norm: 0.7382109624953868, iteration: 452398
loss: 0.9787237048149109,grad_norm: 0.7923033071736181, iteration: 452399
loss: 0.9824000597000122,grad_norm: 0.882247644468281, iteration: 452400
loss: 0.9758725762367249,grad_norm: 0.8553883219793751, iteration: 452401
loss: 0.9758186936378479,grad_norm: 0.6697837768397995, iteration: 452402
loss: 0.9973284602165222,grad_norm: 0.8941335950010803, iteration: 452403
loss: 1.0238871574401855,grad_norm: 0.7360212993055868, iteration: 452404
loss: 0.9890709519386292,grad_norm: 0.8387385702259645, iteration: 452405
loss: 0.9729487299919128,grad_norm: 0.6544428203486671, iteration: 452406
loss: 0.9900957345962524,grad_norm: 0.7189688113193942, iteration: 452407
loss: 0.9940887689590454,grad_norm: 0.7391238079551454, iteration: 452408
loss: 1.0056641101837158,grad_norm: 0.7634861215587441, iteration: 452409
loss: 0.9908798336982727,grad_norm: 0.8571046601257495, iteration: 452410
loss: 0.9785447716712952,grad_norm: 0.6342171543484496, iteration: 452411
loss: 0.9963043332099915,grad_norm: 0.7848729737676394, iteration: 452412
loss: 0.9975818395614624,grad_norm: 0.720412829157736, iteration: 452413
loss: 0.9889810085296631,grad_norm: 0.7788816626941387, iteration: 452414
loss: 1.015286922454834,grad_norm: 0.8225792594702889, iteration: 452415
loss: 1.0111579895019531,grad_norm: 0.7798615495208181, iteration: 452416
loss: 1.0055148601531982,grad_norm: 0.9285683659542283, iteration: 452417
loss: 0.9985238909721375,grad_norm: 0.7634236794398269, iteration: 452418
loss: 1.0351169109344482,grad_norm: 0.9999992647459093, iteration: 452419
loss: 0.9876788258552551,grad_norm: 0.6891543292899627, iteration: 452420
loss: 1.0073802471160889,grad_norm: 0.7062456851860571, iteration: 452421
loss: 1.0173193216323853,grad_norm: 0.7615046779075948, iteration: 452422
loss: 0.9718449115753174,grad_norm: 0.8614122918920243, iteration: 452423
loss: 1.00557279586792,grad_norm: 0.7698086490300162, iteration: 452424
loss: 0.9908728003501892,grad_norm: 0.7563270150426886, iteration: 452425
loss: 1.0024232864379883,grad_norm: 0.6284531172408822, iteration: 452426
loss: 0.9775575995445251,grad_norm: 0.831456056634244, iteration: 452427
loss: 1.0197412967681885,grad_norm: 0.7365801981365383, iteration: 452428
loss: 0.9669239521026611,grad_norm: 0.8779455800767383, iteration: 452429
loss: 1.012927770614624,grad_norm: 0.9174361100651054, iteration: 452430
loss: 0.9479267597198486,grad_norm: 0.7867134090388541, iteration: 452431
loss: 0.9930827021598816,grad_norm: 0.833516852814479, iteration: 452432
loss: 0.9957183003425598,grad_norm: 0.7313419387026205, iteration: 452433
loss: 0.9885132312774658,grad_norm: 0.7234854381138963, iteration: 452434
loss: 1.0188432931900024,grad_norm: 0.9999993722798571, iteration: 452435
loss: 1.0518317222595215,grad_norm: 0.7916217441939889, iteration: 452436
loss: 0.9731405377388,grad_norm: 0.6455363120185553, iteration: 452437
loss: 1.0134886503219604,grad_norm: 0.8505649986788307, iteration: 452438
loss: 0.9949268698692322,grad_norm: 0.712844049850941, iteration: 452439
loss: 1.0317761898040771,grad_norm: 0.7667609201507696, iteration: 452440
loss: 1.1729604005813599,grad_norm: 0.9999997387352966, iteration: 452441
loss: 1.026161551475525,grad_norm: 0.7081189277274937, iteration: 452442
loss: 0.9966416954994202,grad_norm: 0.8578050899407011, iteration: 452443
loss: 1.0107042789459229,grad_norm: 0.9174924549669914, iteration: 452444
loss: 1.0048187971115112,grad_norm: 0.8052714003325824, iteration: 452445
loss: 1.0072444677352905,grad_norm: 0.7669809981425945, iteration: 452446
loss: 1.0141851902008057,grad_norm: 0.910792766871792, iteration: 452447
loss: 0.9900832176208496,grad_norm: 0.8493492552587912, iteration: 452448
loss: 1.0333690643310547,grad_norm: 0.820340400727855, iteration: 452449
loss: 0.9462212920188904,grad_norm: 0.7324571508311716, iteration: 452450
loss: 1.0026609897613525,grad_norm: 0.9514560673129124, iteration: 452451
loss: 1.0128132104873657,grad_norm: 0.7228732592947946, iteration: 452452
loss: 0.9929919242858887,grad_norm: 0.6661713749240874, iteration: 452453
loss: 0.9993817210197449,grad_norm: 0.8161823104766713, iteration: 452454
loss: 1.0587217807769775,grad_norm: 0.7013879559423729, iteration: 452455
loss: 0.9989489912986755,grad_norm: 0.674008665455062, iteration: 452456
loss: 1.0323081016540527,grad_norm: 0.9304824915963616, iteration: 452457
loss: 1.0103479623794556,grad_norm: 0.9051436949383374, iteration: 452458
loss: 0.9718351364135742,grad_norm: 0.8857369346128676, iteration: 452459
loss: 1.039631962776184,grad_norm: 0.9999994271355694, iteration: 452460
loss: 0.9851194024085999,grad_norm: 0.8579260912378497, iteration: 452461
loss: 0.9692056179046631,grad_norm: 0.7364911291006639, iteration: 452462
loss: 1.006140947341919,grad_norm: 0.8705856395649303, iteration: 452463
loss: 1.0175551176071167,grad_norm: 0.774737401944756, iteration: 452464
loss: 0.9485173225402832,grad_norm: 0.8563337524219481, iteration: 452465
loss: 1.036954641342163,grad_norm: 0.8265726953839534, iteration: 452466
loss: 1.0198545455932617,grad_norm: 0.9999996269635061, iteration: 452467
loss: 1.0742931365966797,grad_norm: 0.841233682897928, iteration: 452468
loss: 0.995316207408905,grad_norm: 0.7274720693291465, iteration: 452469
loss: 1.0120208263397217,grad_norm: 0.7441070036916082, iteration: 452470
loss: 1.0199202299118042,grad_norm: 0.9999992674247106, iteration: 452471
loss: 1.021774411201477,grad_norm: 0.7888949242902709, iteration: 452472
loss: 1.0031646490097046,grad_norm: 0.7949837636634803, iteration: 452473
loss: 0.9838535189628601,grad_norm: 0.8356008471418691, iteration: 452474
loss: 1.0496540069580078,grad_norm: 0.9475326582108883, iteration: 452475
loss: 1.0108169317245483,grad_norm: 0.7486807970880907, iteration: 452476
loss: 0.9920885562896729,grad_norm: 0.7548768572141732, iteration: 452477
loss: 1.0020313262939453,grad_norm: 0.7899877340245979, iteration: 452478
loss: 0.9897693991661072,grad_norm: 0.6679403235460798, iteration: 452479
loss: 0.9924411773681641,grad_norm: 0.6878624090279558, iteration: 452480
loss: 1.010257601737976,grad_norm: 0.7956558622567466, iteration: 452481
loss: 1.007081389427185,grad_norm: 0.7662341827787136, iteration: 452482
loss: 1.0139472484588623,grad_norm: 0.9999991565547168, iteration: 452483
loss: 0.9923860430717468,grad_norm: 0.9999997276034414, iteration: 452484
loss: 0.9548036456108093,grad_norm: 0.8551194229190214, iteration: 452485
loss: 1.030158281326294,grad_norm: 0.7387890500281503, iteration: 452486
loss: 1.0075390338897705,grad_norm: 0.9999990891172335, iteration: 452487
loss: 0.9699649214744568,grad_norm: 0.7642901934288439, iteration: 452488
loss: 0.958595335483551,grad_norm: 0.7131023926596021, iteration: 452489
loss: 0.9946122169494629,grad_norm: 0.8119350676934487, iteration: 452490
loss: 0.9875772595405579,grad_norm: 0.6765348142439376, iteration: 452491
loss: 1.0094337463378906,grad_norm: 0.8626491426061892, iteration: 452492
loss: 1.0020473003387451,grad_norm: 0.691320018545127, iteration: 452493
loss: 1.007115125656128,grad_norm: 0.8676978684712846, iteration: 452494
loss: 1.0354212522506714,grad_norm: 0.7043798125226169, iteration: 452495
loss: 1.0377248525619507,grad_norm: 0.7534707405517602, iteration: 452496
loss: 1.0080163478851318,grad_norm: 0.8837040549726228, iteration: 452497
loss: 0.9906731247901917,grad_norm: 0.8221614680815541, iteration: 452498
loss: 0.9904254674911499,grad_norm: 0.9457813918370553, iteration: 452499
loss: 0.9971580505371094,grad_norm: 0.7088133243578745, iteration: 452500
loss: 1.0053330659866333,grad_norm: 0.6962346753568125, iteration: 452501
loss: 1.0129883289337158,grad_norm: 0.7813379137987654, iteration: 452502
loss: 0.9897887110710144,grad_norm: 0.7402549389076245, iteration: 452503
loss: 1.0064778327941895,grad_norm: 0.9068679662011508, iteration: 452504
loss: 1.104841709136963,grad_norm: 0.9906553042083754, iteration: 452505
loss: 0.999155580997467,grad_norm: 0.7415236764025176, iteration: 452506
loss: 0.9702998399734497,grad_norm: 0.7356409192429, iteration: 452507
loss: 0.9650359749794006,grad_norm: 0.6642196595053743, iteration: 452508
loss: 0.9758144617080688,grad_norm: 0.6691100244378356, iteration: 452509
loss: 0.9946670532226562,grad_norm: 0.8182844316072723, iteration: 452510
loss: 1.0383278131484985,grad_norm: 0.7333198984665361, iteration: 452511
loss: 0.9573206901550293,grad_norm: 0.8141904756753136, iteration: 452512
loss: 0.9866137504577637,grad_norm: 0.7725293790990225, iteration: 452513
loss: 0.9823261499404907,grad_norm: 0.7600724090843916, iteration: 452514
loss: 1.0031373500823975,grad_norm: 0.7291735806032684, iteration: 452515
loss: 0.9702684879302979,grad_norm: 0.8107156641811767, iteration: 452516
loss: 1.0501048564910889,grad_norm: 0.8721917207266002, iteration: 452517
loss: 0.9441916346549988,grad_norm: 0.9140780411099536, iteration: 452518
loss: 1.0053434371948242,grad_norm: 0.9844556987567614, iteration: 452519
loss: 1.0174773931503296,grad_norm: 0.7276275772341511, iteration: 452520
loss: 0.9987796545028687,grad_norm: 0.8252130851497418, iteration: 452521
loss: 0.979350745677948,grad_norm: 0.6846756451754107, iteration: 452522
loss: 1.060767650604248,grad_norm: 0.8308804367498106, iteration: 452523
loss: 0.9918215870857239,grad_norm: 0.7417605369681423, iteration: 452524
loss: 0.989018976688385,grad_norm: 0.6978483387901442, iteration: 452525
loss: 0.972633957862854,grad_norm: 0.7019417900357674, iteration: 452526
loss: 0.9774327874183655,grad_norm: 0.871029427718534, iteration: 452527
loss: 1.033479928970337,grad_norm: 0.8170143603137683, iteration: 452528
loss: 1.0135966539382935,grad_norm: 0.8591497960407702, iteration: 452529
loss: 1.010162353515625,grad_norm: 0.8097336558044891, iteration: 452530
loss: 1.0400996208190918,grad_norm: 0.8495991598009411, iteration: 452531
loss: 1.0165373086929321,grad_norm: 0.9143453266418917, iteration: 452532
loss: 1.0127272605895996,grad_norm: 0.9999990064580336, iteration: 452533
loss: 1.0374385118484497,grad_norm: 0.938031984958898, iteration: 452534
loss: 0.9881215691566467,grad_norm: 0.7807204286693167, iteration: 452535
loss: 1.0028462409973145,grad_norm: 0.7456962664743331, iteration: 452536
loss: 1.0024248361587524,grad_norm: 0.721968270192626, iteration: 452537
loss: 1.0212581157684326,grad_norm: 0.7794737930785796, iteration: 452538
loss: 1.0089139938354492,grad_norm: 0.8184473734490707, iteration: 452539
loss: 0.9820008277893066,grad_norm: 0.7556683351903195, iteration: 452540
loss: 0.9995759129524231,grad_norm: 0.6915089152274995, iteration: 452541
loss: 1.0137072801589966,grad_norm: 0.9999996008541471, iteration: 452542
loss: 1.0348047018051147,grad_norm: 0.9999998814961659, iteration: 452543
loss: 1.0087127685546875,grad_norm: 0.7296257897451686, iteration: 452544
loss: 1.035646915435791,grad_norm: 0.8579134658354663, iteration: 452545
loss: 0.9871916174888611,grad_norm: 0.9272352878253014, iteration: 452546
loss: 1.0021005868911743,grad_norm: 0.8502333221812272, iteration: 452547
loss: 1.041978359222412,grad_norm: 0.6806011636971455, iteration: 452548
loss: 1.0349085330963135,grad_norm: 0.8483396616433383, iteration: 452549
loss: 0.9648056030273438,grad_norm: 0.8467473686742197, iteration: 452550
loss: 1.018466830253601,grad_norm: 0.7798199380963831, iteration: 452551
loss: 0.9974980354309082,grad_norm: 0.7872091277168735, iteration: 452552
loss: 1.0060175657272339,grad_norm: 0.8243533723203115, iteration: 452553
loss: 1.0243924856185913,grad_norm: 0.8070659712615865, iteration: 452554
loss: 0.9987484812736511,grad_norm: 0.7861171669444931, iteration: 452555
loss: 0.9855719804763794,grad_norm: 0.700792616081946, iteration: 452556
loss: 0.9994558095932007,grad_norm: 0.752244325739595, iteration: 452557
loss: 1.0150671005249023,grad_norm: 0.6594653262198797, iteration: 452558
loss: 1.0073026418685913,grad_norm: 0.7493128473106057, iteration: 452559
loss: 1.0716484785079956,grad_norm: 0.8597870031999258, iteration: 452560
loss: 0.9781515598297119,grad_norm: 0.8123649719696319, iteration: 452561
loss: 0.9996346831321716,grad_norm: 0.8498293169117997, iteration: 452562
loss: 0.9828781485557556,grad_norm: 0.8629271303767339, iteration: 452563
loss: 1.0245333909988403,grad_norm: 0.6722514782967245, iteration: 452564
loss: 0.9936749339103699,grad_norm: 0.7179391413929724, iteration: 452565
loss: 0.960472583770752,grad_norm: 0.9120205416046108, iteration: 452566
loss: 1.0260368585586548,grad_norm: 0.8446945850415976, iteration: 452567
loss: 0.9840051531791687,grad_norm: 0.8516040942274112, iteration: 452568
loss: 0.9818249940872192,grad_norm: 0.7453872560965014, iteration: 452569
loss: 0.9646856784820557,grad_norm: 0.9068021229375537, iteration: 452570
loss: 1.0265029668807983,grad_norm: 0.7246004070223784, iteration: 452571
loss: 0.9758438467979431,grad_norm: 0.7717238956063751, iteration: 452572
loss: 1.005199909210205,grad_norm: 0.8035384300838165, iteration: 452573
loss: 1.0490177869796753,grad_norm: 0.9999998512363176, iteration: 452574
loss: 0.9734973311424255,grad_norm: 0.6551150351721975, iteration: 452575
loss: 0.9915793538093567,grad_norm: 0.7346691694851817, iteration: 452576
loss: 1.0101374387741089,grad_norm: 0.7771616455468591, iteration: 452577
loss: 1.06803297996521,grad_norm: 0.9999992542140752, iteration: 452578
loss: 1.0198683738708496,grad_norm: 0.6989890462479363, iteration: 452579
loss: 0.9865021705627441,grad_norm: 0.8252141104572206, iteration: 452580
loss: 0.9979696273803711,grad_norm: 0.6765841218158, iteration: 452581
loss: 1.0186409950256348,grad_norm: 0.9999991792021566, iteration: 452582
loss: 0.9943572282791138,grad_norm: 0.8168956155712094, iteration: 452583
loss: 1.010555624961853,grad_norm: 0.9550495843997793, iteration: 452584
loss: 0.9928002953529358,grad_norm: 0.809809547693228, iteration: 452585
loss: 0.9721108078956604,grad_norm: 0.8520047925168026, iteration: 452586
loss: 1.012044072151184,grad_norm: 0.9999998193111138, iteration: 452587
loss: 1.0204933881759644,grad_norm: 0.6684343694449355, iteration: 452588
loss: 1.0116461515426636,grad_norm: 0.9956214728665669, iteration: 452589
loss: 1.0075769424438477,grad_norm: 0.7818022476611948, iteration: 452590
loss: 0.9733027219772339,grad_norm: 0.7289436532719963, iteration: 452591
loss: 1.0037368535995483,grad_norm: 0.7472770934223593, iteration: 452592
loss: 1.0093194246292114,grad_norm: 0.7287463156604176, iteration: 452593
loss: 1.010040044784546,grad_norm: 0.8093397635714442, iteration: 452594
loss: 0.95906662940979,grad_norm: 0.8331337257761943, iteration: 452595
loss: 0.9920353889465332,grad_norm: 0.7001749694627728, iteration: 452596
loss: 0.9905501008033752,grad_norm: 0.9999998882370524, iteration: 452597
loss: 0.9557989835739136,grad_norm: 0.8101957834202999, iteration: 452598
loss: 0.9957371950149536,grad_norm: 0.8310481462875615, iteration: 452599
loss: 0.9826925992965698,grad_norm: 0.8126622496889373, iteration: 452600
loss: 1.0256773233413696,grad_norm: 0.9055544057821979, iteration: 452601
loss: 1.0206983089447021,grad_norm: 0.6962736814479354, iteration: 452602
loss: 0.9828613996505737,grad_norm: 0.8422676914560755, iteration: 452603
loss: 0.956050455570221,grad_norm: 0.8182983336837054, iteration: 452604
loss: 1.0105284452438354,grad_norm: 0.7609862142887877, iteration: 452605
loss: 1.027582049369812,grad_norm: 0.9999998976630556, iteration: 452606
loss: 0.9886833429336548,grad_norm: 0.7231052268818692, iteration: 452607
loss: 0.9948855638504028,grad_norm: 0.8303109120680738, iteration: 452608
loss: 1.0711005926132202,grad_norm: 0.7662508631859837, iteration: 452609
loss: 1.0480724573135376,grad_norm: 0.9999993359455288, iteration: 452610
loss: 1.0156193971633911,grad_norm: 1.000000011508406, iteration: 452611
loss: 0.9826058745384216,grad_norm: 0.8196185989092373, iteration: 452612
loss: 1.0139437913894653,grad_norm: 0.6653187813777685, iteration: 452613
loss: 1.0007779598236084,grad_norm: 0.9709942759298243, iteration: 452614
loss: 1.0092421770095825,grad_norm: 0.7006238640456487, iteration: 452615
loss: 1.0007562637329102,grad_norm: 0.86852066391202, iteration: 452616
loss: 0.9921587109565735,grad_norm: 0.8719761585072541, iteration: 452617
loss: 0.9762305021286011,grad_norm: 0.7829997165210236, iteration: 452618
loss: 1.0236254930496216,grad_norm: 0.7476314732041031, iteration: 452619
loss: 1.0103307962417603,grad_norm: 0.7983948294532947, iteration: 452620
loss: 0.9922998547554016,grad_norm: 0.85672405011701, iteration: 452621
loss: 1.0103483200073242,grad_norm: 0.7150841194833711, iteration: 452622
loss: 1.0011117458343506,grad_norm: 0.7397014434899302, iteration: 452623
loss: 1.0438259840011597,grad_norm: 0.7032401796590693, iteration: 452624
loss: 1.0036157369613647,grad_norm: 0.7206510154985379, iteration: 452625
loss: 1.1002264022827148,grad_norm: 0.7677915076416856, iteration: 452626
loss: 1.0458574295043945,grad_norm: 0.9591958303495356, iteration: 452627
loss: 1.0146851539611816,grad_norm: 0.7674828820494709, iteration: 452628
loss: 1.0075197219848633,grad_norm: 0.7462591268397463, iteration: 452629
loss: 1.0121806859970093,grad_norm: 0.825184951853525, iteration: 452630
loss: 0.9459074139595032,grad_norm: 0.7606240408816157, iteration: 452631
loss: 1.0031954050064087,grad_norm: 0.8062417896842572, iteration: 452632
loss: 0.9834707379341125,grad_norm: 0.6764772335635358, iteration: 452633
loss: 0.9786251783370972,grad_norm: 0.7509674848340239, iteration: 452634
loss: 1.0203708410263062,grad_norm: 0.6405791388055261, iteration: 452635
loss: 0.9832012057304382,grad_norm: 0.7615557179502985, iteration: 452636
loss: 1.0010393857955933,grad_norm: 0.8109692704472893, iteration: 452637
loss: 0.9775993824005127,grad_norm: 0.7407877233831522, iteration: 452638
loss: 1.0322744846343994,grad_norm: 0.804802826805016, iteration: 452639
loss: 0.9996883869171143,grad_norm: 0.8892506425960267, iteration: 452640
loss: 1.0035606622695923,grad_norm: 0.7405322969397884, iteration: 452641
loss: 1.0383591651916504,grad_norm: 0.7398236521963376, iteration: 452642
loss: 0.9960693717002869,grad_norm: 0.9241720653597416, iteration: 452643
loss: 1.0159226655960083,grad_norm: 0.6073667029691167, iteration: 452644
loss: 0.9529919624328613,grad_norm: 0.8180022113223865, iteration: 452645
loss: 1.0304349660873413,grad_norm: 0.9700509609140399, iteration: 452646
loss: 1.0183358192443848,grad_norm: 0.8585756470043608, iteration: 452647
loss: 1.0522069931030273,grad_norm: 0.7118361879955999, iteration: 452648
loss: 1.0262842178344727,grad_norm: 0.9999990029748117, iteration: 452649
loss: 0.9698970317840576,grad_norm: 0.6813176318935871, iteration: 452650
loss: 1.0267112255096436,grad_norm: 0.8664346491034376, iteration: 452651
loss: 1.078006625175476,grad_norm: 0.9999992075557153, iteration: 452652
loss: 1.018723726272583,grad_norm: 0.9999998931437185, iteration: 452653
loss: 0.9984549880027771,grad_norm: 0.7012727322442391, iteration: 452654
loss: 0.9837790131568909,grad_norm: 0.7871666305083067, iteration: 452655
loss: 0.9811555743217468,grad_norm: 0.705482585703522, iteration: 452656
loss: 0.9887135028839111,grad_norm: 0.8386088823363684, iteration: 452657
loss: 0.9797729253768921,grad_norm: 0.8193793266699776, iteration: 452658
loss: 1.03927481174469,grad_norm: 0.7433282045048393, iteration: 452659
loss: 1.0062450170516968,grad_norm: 0.6574512183251813, iteration: 452660
loss: 1.0682073831558228,grad_norm: 0.8970463586817911, iteration: 452661
loss: 0.9912915825843811,grad_norm: 0.8098823458407644, iteration: 452662
loss: 1.029171347618103,grad_norm: 0.8507530719701407, iteration: 452663
loss: 1.0465381145477295,grad_norm: 0.9999994134322259, iteration: 452664
loss: 0.9651825428009033,grad_norm: 0.9261059684519815, iteration: 452665
loss: 0.9832125306129456,grad_norm: 0.6956137546988622, iteration: 452666
loss: 1.0140528678894043,grad_norm: 0.9999994753393119, iteration: 452667
loss: 1.0028473138809204,grad_norm: 0.7861404102608348, iteration: 452668
loss: 0.9841281771659851,grad_norm: 0.7743649486289993, iteration: 452669
loss: 0.9796101450920105,grad_norm: 0.6830351023666356, iteration: 452670
loss: 1.0158292055130005,grad_norm: 0.7450892937602328, iteration: 452671
loss: 1.022679090499878,grad_norm: 0.8194354696288392, iteration: 452672
loss: 0.980542778968811,grad_norm: 0.8951353960190078, iteration: 452673
loss: 1.0404788255691528,grad_norm: 0.9999991984377932, iteration: 452674
loss: 1.002013921737671,grad_norm: 0.8279431427220438, iteration: 452675
loss: 1.0997503995895386,grad_norm: 0.6716739987102707, iteration: 452676
loss: 0.9941359758377075,grad_norm: 0.7773009983069439, iteration: 452677
loss: 0.9697761535644531,grad_norm: 0.8279372050749494, iteration: 452678
loss: 1.0202745199203491,grad_norm: 0.7013686872221628, iteration: 452679
loss: 1.0109713077545166,grad_norm: 0.6586977449743613, iteration: 452680
loss: 1.0110118389129639,grad_norm: 0.7207899686332765, iteration: 452681
loss: 0.985917866230011,grad_norm: 0.7603567829617149, iteration: 452682
loss: 0.987561047077179,grad_norm: 0.7141855010512402, iteration: 452683
loss: 0.9664742350578308,grad_norm: 0.9005938492909198, iteration: 452684
loss: 0.9827204346656799,grad_norm: 0.9195378755243448, iteration: 452685
loss: 1.0028446912765503,grad_norm: 0.6909646976978757, iteration: 452686
loss: 1.0085933208465576,grad_norm: 0.7619021671495089, iteration: 452687
loss: 0.9953727126121521,grad_norm: 0.8319066563911514, iteration: 452688
loss: 0.9964966177940369,grad_norm: 0.8671741892747252, iteration: 452689
loss: 1.009592056274414,grad_norm: 0.7866072036362356, iteration: 452690
loss: 1.0254405736923218,grad_norm: 0.7012850358758063, iteration: 452691
loss: 0.9930281043052673,grad_norm: 0.7341229831892125, iteration: 452692
loss: 0.9543542861938477,grad_norm: 0.9225424509064684, iteration: 452693
loss: 0.9534732699394226,grad_norm: 0.9626213467944598, iteration: 452694
loss: 1.0196436643600464,grad_norm: 0.8619846987637831, iteration: 452695
loss: 0.9827622175216675,grad_norm: 0.7395483797091382, iteration: 452696
loss: 1.060666561126709,grad_norm: 0.9999990670184269, iteration: 452697
loss: 0.937110424041748,grad_norm: 0.8617662179442893, iteration: 452698
loss: 0.9869964718818665,grad_norm: 0.7839478535860133, iteration: 452699
loss: 1.0026044845581055,grad_norm: 0.738663716951405, iteration: 452700
loss: 0.9945639967918396,grad_norm: 0.8817597373531267, iteration: 452701
loss: 0.9874175190925598,grad_norm: 0.9999991734100651, iteration: 452702
loss: 1.0006189346313477,grad_norm: 0.7976613099873763, iteration: 452703
loss: 0.9810553193092346,grad_norm: 0.7362721096480546, iteration: 452704
loss: 0.987151563167572,grad_norm: 0.9501065209476093, iteration: 452705
loss: 0.9787424206733704,grad_norm: 0.8574046836291639, iteration: 452706
loss: 0.9956748485565186,grad_norm: 0.9999991551125463, iteration: 452707
loss: 0.9813370704650879,grad_norm: 0.9660407822373223, iteration: 452708
loss: 0.9772941470146179,grad_norm: 0.8074012632316104, iteration: 452709
loss: 1.0001734495162964,grad_norm: 0.7335089912754567, iteration: 452710
loss: 0.9607614278793335,grad_norm: 0.8430668318291338, iteration: 452711
loss: 0.9992441534996033,grad_norm: 0.735310103088736, iteration: 452712
loss: 1.0255964994430542,grad_norm: 0.7936890369389507, iteration: 452713
loss: 1.0194624662399292,grad_norm: 0.8211115326592887, iteration: 452714
loss: 1.0351990461349487,grad_norm: 0.8225403931370892, iteration: 452715
loss: 0.9365604519844055,grad_norm: 0.6822823321732642, iteration: 452716
loss: 0.9750561118125916,grad_norm: 0.7410278159819916, iteration: 452717
loss: 1.0038025379180908,grad_norm: 0.999999745829282, iteration: 452718
loss: 0.9887790083885193,grad_norm: 0.8497979819944785, iteration: 452719
loss: 0.9792122840881348,grad_norm: 0.7650775751907519, iteration: 452720
loss: 1.007745385169983,grad_norm: 0.9999991146681143, iteration: 452721
loss: 1.0933860540390015,grad_norm: 0.9999999106054799, iteration: 452722
loss: 0.9645782709121704,grad_norm: 0.9999995314730509, iteration: 452723
loss: 1.0117305517196655,grad_norm: 0.7794463957773301, iteration: 452724
loss: 1.025659441947937,grad_norm: 0.7150679040388143, iteration: 452725
loss: 1.017297387123108,grad_norm: 0.777286226343793, iteration: 452726
loss: 0.9786903262138367,grad_norm: 0.6624205902200696, iteration: 452727
loss: 0.95339035987854,grad_norm: 0.6310593772902294, iteration: 452728
loss: 1.0359179973602295,grad_norm: 0.7865984214481062, iteration: 452729
loss: 1.005314826965332,grad_norm: 0.9999995705070808, iteration: 452730
loss: 1.0287896394729614,grad_norm: 0.967447619272684, iteration: 452731
loss: 0.969923198223114,grad_norm: 0.7854747465276479, iteration: 452732
loss: 0.9910615086555481,grad_norm: 0.7761208000980117, iteration: 452733
loss: 0.9715554714202881,grad_norm: 0.7759143472845902, iteration: 452734
loss: 1.0051770210266113,grad_norm: 0.7409271677126232, iteration: 452735
loss: 1.0061792135238647,grad_norm: 0.779919366053744, iteration: 452736
loss: 1.0091466903686523,grad_norm: 0.6784751817601741, iteration: 452737
loss: 0.9789063930511475,grad_norm: 0.7156657041809446, iteration: 452738
loss: 0.9911876916885376,grad_norm: 0.6017911058798094, iteration: 452739
loss: 0.9393478631973267,grad_norm: 0.8639776263403905, iteration: 452740
loss: 0.9842705726623535,grad_norm: 0.787219404647411, iteration: 452741
loss: 1.0355154275894165,grad_norm: 1.0000000665529534, iteration: 452742
loss: 0.9974139928817749,grad_norm: 0.8269628755375354, iteration: 452743
loss: 1.0018651485443115,grad_norm: 0.8708799165170337, iteration: 452744
loss: 0.9969341158866882,grad_norm: 0.6758439511217599, iteration: 452745
loss: 0.9947395920753479,grad_norm: 0.7190891851582804, iteration: 452746
loss: 0.9930206537246704,grad_norm: 0.7306417643982447, iteration: 452747
loss: 0.9618353247642517,grad_norm: 0.7642064346092479, iteration: 452748
loss: 1.0597575902938843,grad_norm: 0.8871549915893415, iteration: 452749
loss: 0.9819895029067993,grad_norm: 0.6695954060001499, iteration: 452750
loss: 0.9901875257492065,grad_norm: 0.7241643482510366, iteration: 452751
loss: 0.9670926332473755,grad_norm: 0.9016152898483762, iteration: 452752
loss: 0.9955453872680664,grad_norm: 0.6853792893474951, iteration: 452753
loss: 1.0009410381317139,grad_norm: 0.6981670139237052, iteration: 452754
loss: 1.006657600402832,grad_norm: 0.7502793339632245, iteration: 452755
loss: 1.0179874897003174,grad_norm: 0.7678528354414158, iteration: 452756
loss: 0.9863362908363342,grad_norm: 0.7214723499736421, iteration: 452757
loss: 1.019434928894043,grad_norm: 0.9997353432151421, iteration: 452758
loss: 1.013418436050415,grad_norm: 0.7940388061502628, iteration: 452759
loss: 1.0453728437423706,grad_norm: 0.9999999841121324, iteration: 452760
loss: 1.013887643814087,grad_norm: 0.7903686818823162, iteration: 452761
loss: 0.970612108707428,grad_norm: 0.999999002559658, iteration: 452762
loss: 1.0060328245162964,grad_norm: 0.7328981159618408, iteration: 452763
loss: 1.0585365295410156,grad_norm: 0.7837185640423118, iteration: 452764
loss: 0.9628802537918091,grad_norm: 0.9888213732649601, iteration: 452765
loss: 1.001173734664917,grad_norm: 0.8121277296560854, iteration: 452766
loss: 0.9981762170791626,grad_norm: 0.844843867551925, iteration: 452767
loss: 0.9998806118965149,grad_norm: 0.8843262362359804, iteration: 452768
loss: 0.9830636382102966,grad_norm: 0.7364654469373353, iteration: 452769
loss: 0.9809858202934265,grad_norm: 0.698636410501887, iteration: 452770
loss: 0.9853551983833313,grad_norm: 0.7629079475019718, iteration: 452771
loss: 0.9780576825141907,grad_norm: 0.8688520482365216, iteration: 452772
loss: 1.0215654373168945,grad_norm: 0.6898319127673003, iteration: 452773
loss: 0.9777951836585999,grad_norm: 0.8374525498342811, iteration: 452774
loss: 0.9965822100639343,grad_norm: 0.7508713672330296, iteration: 452775
loss: 1.009855031967163,grad_norm: 0.6843306958153316, iteration: 452776
loss: 1.0328795909881592,grad_norm: 0.667403204300078, iteration: 452777
loss: 1.015663981437683,grad_norm: 0.7249592642334971, iteration: 452778
loss: 0.9821895956993103,grad_norm: 0.8090862048770232, iteration: 452779
loss: 1.014256238937378,grad_norm: 0.9999999453678728, iteration: 452780
loss: 0.9885447025299072,grad_norm: 0.8597334451211649, iteration: 452781
loss: 0.9648953676223755,grad_norm: 0.733866808326477, iteration: 452782
loss: 0.9859468340873718,grad_norm: 0.7901223627381823, iteration: 452783
loss: 0.9900844097137451,grad_norm: 0.8103149849306148, iteration: 452784
loss: 1.0925127267837524,grad_norm: 0.8654977935622873, iteration: 452785
loss: 0.9968199133872986,grad_norm: 0.6715447703063732, iteration: 452786
loss: 0.9894561767578125,grad_norm: 0.6512484288162527, iteration: 452787
loss: 1.0670313835144043,grad_norm: 0.7816087662994874, iteration: 452788
loss: 0.9824061989784241,grad_norm: 0.6152773369570246, iteration: 452789
loss: 1.0357524156570435,grad_norm: 0.9999991626356223, iteration: 452790
loss: 0.9890141487121582,grad_norm: 0.8036569494191858, iteration: 452791
loss: 1.0158804655075073,grad_norm: 0.7177998475226973, iteration: 452792
loss: 0.9901846051216125,grad_norm: 0.8156777416846245, iteration: 452793
loss: 0.9763714671134949,grad_norm: 0.7203597952015, iteration: 452794
loss: 1.0655517578125,grad_norm: 0.9999997506249186, iteration: 452795
loss: 0.976884126663208,grad_norm: 0.8607444773348424, iteration: 452796
loss: 0.9960687160491943,grad_norm: 0.8235348681597555, iteration: 452797
loss: 0.9955334663391113,grad_norm: 0.899258849082269, iteration: 452798
loss: 0.9964890480041504,grad_norm: 0.8636047377239343, iteration: 452799
loss: 1.021154522895813,grad_norm: 0.8392678976380638, iteration: 452800
loss: 1.0255519151687622,grad_norm: 0.7603947632497554, iteration: 452801
loss: 0.9995496273040771,grad_norm: 0.7537071328274036, iteration: 452802
loss: 0.9816053509712219,grad_norm: 0.786361872774892, iteration: 452803
loss: 1.0315080881118774,grad_norm: 0.5975262280090563, iteration: 452804
loss: 0.9939860105514526,grad_norm: 0.7185989698215882, iteration: 452805
loss: 0.9644197225570679,grad_norm: 0.9999994875842032, iteration: 452806
loss: 0.9859910011291504,grad_norm: 0.7946776070319225, iteration: 452807
loss: 0.993808925151825,grad_norm: 0.755371318666224, iteration: 452808
loss: 0.9924780130386353,grad_norm: 0.5561505453449824, iteration: 452809
loss: 1.0167524814605713,grad_norm: 0.8662629346751776, iteration: 452810
loss: 1.0133488178253174,grad_norm: 0.7501452208158327, iteration: 452811
loss: 1.0568890571594238,grad_norm: 0.9999990471972672, iteration: 452812
loss: 1.0560276508331299,grad_norm: 0.7684144618172917, iteration: 452813
loss: 1.0219470262527466,grad_norm: 0.999999123526576, iteration: 452814
loss: 0.9738191962242126,grad_norm: 0.9094437652627423, iteration: 452815
loss: 0.9737920761108398,grad_norm: 0.909703345020255, iteration: 452816
loss: 1.0025904178619385,grad_norm: 0.6939095626730255, iteration: 452817
loss: 0.995679497718811,grad_norm: 0.7092249000856391, iteration: 452818
loss: 1.0463335514068604,grad_norm: 0.9999992929750192, iteration: 452819
loss: 1.016442894935608,grad_norm: 0.7416394069519475, iteration: 452820
loss: 1.0371555089950562,grad_norm: 0.7579375204878476, iteration: 452821
loss: 0.9973921179771423,grad_norm: 0.7162545365235623, iteration: 452822
loss: 0.9943410754203796,grad_norm: 0.7026919304873074, iteration: 452823
loss: 0.9925065040588379,grad_norm: 0.8667530976538059, iteration: 452824
loss: 0.9822909235954285,grad_norm: 0.6103390495605361, iteration: 452825
loss: 1.001242995262146,grad_norm: 0.7499726326590382, iteration: 452826
loss: 1.0024374723434448,grad_norm: 0.9177205302482224, iteration: 452827
loss: 0.9946844577789307,grad_norm: 0.7213840724191197, iteration: 452828
loss: 0.9818699359893799,grad_norm: 0.6664159832051175, iteration: 452829
loss: 1.0123493671417236,grad_norm: 0.7405250995137456, iteration: 452830
loss: 1.001601219177246,grad_norm: 0.8056004954932205, iteration: 452831
loss: 0.9817019701004028,grad_norm: 0.7956611680451289, iteration: 452832
loss: 0.9784387946128845,grad_norm: 0.851297294060406, iteration: 452833
loss: 1.092790126800537,grad_norm: 0.9999996729432877, iteration: 452834
loss: 1.0437679290771484,grad_norm: 0.9805384430143416, iteration: 452835
loss: 0.9879931807518005,grad_norm: 0.7743466655182575, iteration: 452836
loss: 1.0373289585113525,grad_norm: 0.9999991853705208, iteration: 452837
loss: 0.9890727996826172,grad_norm: 0.6555428218459295, iteration: 452838
loss: 1.0211166143417358,grad_norm: 0.8644458462942577, iteration: 452839
loss: 0.9772049784660339,grad_norm: 0.8194197603441407, iteration: 452840
loss: 1.012696623802185,grad_norm: 0.7840554374313027, iteration: 452841
loss: 0.966126561164856,grad_norm: 0.7014185925177534, iteration: 452842
loss: 1.0017077922821045,grad_norm: 0.7993438283654145, iteration: 452843
loss: 1.0086266994476318,grad_norm: 0.763444764576354, iteration: 452844
loss: 1.0037163496017456,grad_norm: 0.6558165367298021, iteration: 452845
loss: 0.9998007416725159,grad_norm: 0.691997156835854, iteration: 452846
loss: 0.9965987205505371,grad_norm: 0.6871874548570559, iteration: 452847
loss: 1.0278774499893188,grad_norm: 0.9999991519511571, iteration: 452848
loss: 0.9939070343971252,grad_norm: 0.7773171114701595, iteration: 452849
loss: 0.9702860116958618,grad_norm: 0.6350957897515178, iteration: 452850
loss: 1.026175856590271,grad_norm: 0.8128677840698382, iteration: 452851
loss: 0.9861666560173035,grad_norm: 0.9415180108120395, iteration: 452852
loss: 1.0189807415008545,grad_norm: 0.9999990865135565, iteration: 452853
loss: 1.008380651473999,grad_norm: 0.779643479543952, iteration: 452854
loss: 0.9941858649253845,grad_norm: 0.7130911058791397, iteration: 452855
loss: 1.0446555614471436,grad_norm: 1.000000043616965, iteration: 452856
loss: 1.0325921773910522,grad_norm: 0.7995474658674316, iteration: 452857
loss: 1.0229977369308472,grad_norm: 0.7544504299389796, iteration: 452858
loss: 0.988623857498169,grad_norm: 0.6797271414667904, iteration: 452859
loss: 0.9709658622741699,grad_norm: 0.68975942878378, iteration: 452860
loss: 1.0119333267211914,grad_norm: 0.7610121228533463, iteration: 452861
loss: 0.9706764221191406,grad_norm: 0.8387762656926118, iteration: 452862
loss: 0.9754210710525513,grad_norm: 0.7993765154705277, iteration: 452863
loss: 1.0306291580200195,grad_norm: 0.8634181602737963, iteration: 452864
loss: 1.0126320123672485,grad_norm: 0.7194748470324164, iteration: 452865
loss: 0.9937700629234314,grad_norm: 0.8443526821691749, iteration: 452866
loss: 1.078773021697998,grad_norm: 0.9999998577745655, iteration: 452867
loss: 1.0295212268829346,grad_norm: 0.7982474264983239, iteration: 452868
loss: 0.9622597694396973,grad_norm: 0.6737221479043659, iteration: 452869
loss: 1.0015119314193726,grad_norm: 0.7090258371751292, iteration: 452870
loss: 1.0070624351501465,grad_norm: 0.9997676682190824, iteration: 452871
loss: 1.0142403841018677,grad_norm: 0.887281987823607, iteration: 452872
loss: 1.069818377494812,grad_norm: 0.8751405601049708, iteration: 452873
loss: 0.9740654230117798,grad_norm: 0.9307390914429939, iteration: 452874
loss: 0.9849154949188232,grad_norm: 0.9287707070065145, iteration: 452875
loss: 1.0202986001968384,grad_norm: 0.9999992462175061, iteration: 452876
loss: 1.0598242282867432,grad_norm: 0.8523994828505942, iteration: 452877
loss: 1.009812593460083,grad_norm: 0.8169763895582742, iteration: 452878
loss: 0.9967801570892334,grad_norm: 0.7175764129285217, iteration: 452879
loss: 1.021648645401001,grad_norm: 0.7733005432106758, iteration: 452880
loss: 0.991084098815918,grad_norm: 0.6123351699789807, iteration: 452881
loss: 1.0035200119018555,grad_norm: 0.9999990595779292, iteration: 452882
loss: 1.0207141637802124,grad_norm: 0.6915970239505211, iteration: 452883
loss: 1.0223275423049927,grad_norm: 0.6525408726588371, iteration: 452884
loss: 0.955453634262085,grad_norm: 0.7124647323659059, iteration: 452885
loss: 1.001892328262329,grad_norm: 0.8309763601723446, iteration: 452886
loss: 1.0001004934310913,grad_norm: 0.721355100739684, iteration: 452887
loss: 0.9449719190597534,grad_norm: 0.7122319947462725, iteration: 452888
loss: 0.9815429449081421,grad_norm: 0.623721290283344, iteration: 452889
loss: 1.0035566091537476,grad_norm: 0.744100700141224, iteration: 452890
loss: 1.0272548198699951,grad_norm: 0.9999991640977186, iteration: 452891
loss: 1.0056060552597046,grad_norm: 0.69309751931253, iteration: 452892
loss: 1.0005868673324585,grad_norm: 0.7263705994268813, iteration: 452893
loss: 1.0448144674301147,grad_norm: 0.9124309713337956, iteration: 452894
loss: 0.9861546754837036,grad_norm: 0.9460557423385747, iteration: 452895
loss: 1.0830845832824707,grad_norm: 0.722930817123365, iteration: 452896
loss: 0.9716954827308655,grad_norm: 0.7946605871104676, iteration: 452897
loss: 0.9487117528915405,grad_norm: 0.6992115237010921, iteration: 452898
loss: 0.990461528301239,grad_norm: 0.9127159519321417, iteration: 452899
loss: 1.023199439048767,grad_norm: 0.759892291882874, iteration: 452900
loss: 0.988255500793457,grad_norm: 0.7751671156470561, iteration: 452901
loss: 0.9683142900466919,grad_norm: 0.7830577634834307, iteration: 452902
loss: 0.9926744103431702,grad_norm: 0.7575211228168488, iteration: 452903
loss: 1.0155216455459595,grad_norm: 0.7132831863586728, iteration: 452904
loss: 0.9936183094978333,grad_norm: 0.672616691290522, iteration: 452905
loss: 0.9941834211349487,grad_norm: 0.7562122821323247, iteration: 452906
loss: 0.9796937704086304,grad_norm: 0.726710268067679, iteration: 452907
loss: 1.1164551973342896,grad_norm: 0.9999996951715816, iteration: 452908
loss: 1.00917387008667,grad_norm: 0.6860155224931888, iteration: 452909
loss: 1.0909000635147095,grad_norm: 0.9999995852641713, iteration: 452910
loss: 1.016514539718628,grad_norm: 0.7251562065257946, iteration: 452911
loss: 0.9880006313323975,grad_norm: 0.7046261262030362, iteration: 452912
loss: 1.0684847831726074,grad_norm: 0.9999999034667022, iteration: 452913
loss: 0.9898830652236938,grad_norm: 0.9659621584590666, iteration: 452914
loss: 0.9673203825950623,grad_norm: 0.7757747840378907, iteration: 452915
loss: 1.034921407699585,grad_norm: 0.8698164688758909, iteration: 452916
loss: 1.0262359380722046,grad_norm: 0.9551929076349968, iteration: 452917
loss: 1.0091944932937622,grad_norm: 0.8497560976006342, iteration: 452918
loss: 0.9900500774383545,grad_norm: 0.7996166586267365, iteration: 452919
loss: 0.9889598488807678,grad_norm: 0.7221190219940178, iteration: 452920
loss: 1.0302177667617798,grad_norm: 0.9999994317095816, iteration: 452921
loss: 1.0194857120513916,grad_norm: 0.9999991676609957, iteration: 452922
loss: 0.9904176592826843,grad_norm: 0.6806329167444268, iteration: 452923
loss: 1.01170814037323,grad_norm: 0.9999995611118376, iteration: 452924
loss: 1.005330204963684,grad_norm: 0.8251332239148201, iteration: 452925
loss: 1.0130053758621216,grad_norm: 0.7813188985044225, iteration: 452926
loss: 0.9788188338279724,grad_norm: 0.7052213896924455, iteration: 452927
loss: 1.0312120914459229,grad_norm: 0.7943046600502041, iteration: 452928
loss: 1.0049532651901245,grad_norm: 0.6563481776467122, iteration: 452929
loss: 1.0073609352111816,grad_norm: 0.999999484819074, iteration: 452930
loss: 1.0684454441070557,grad_norm: 0.7845079426336454, iteration: 452931
loss: 1.1550182104110718,grad_norm: 0.99999996854973, iteration: 452932
loss: 0.9939337968826294,grad_norm: 0.7036071419024492, iteration: 452933
loss: 0.969427764415741,grad_norm: 0.8079150932361889, iteration: 452934
loss: 1.0137290954589844,grad_norm: 0.8069310864184597, iteration: 452935
loss: 1.0212539434432983,grad_norm: 0.8612235003057563, iteration: 452936
loss: 1.0111140012741089,grad_norm: 0.8403970311994998, iteration: 452937
loss: 1.0418063402175903,grad_norm: 0.9765012463590108, iteration: 452938
loss: 0.9899675846099854,grad_norm: 0.7454571514083864, iteration: 452939
loss: 1.0307549238204956,grad_norm: 0.6921569709674361, iteration: 452940
loss: 0.968299925327301,grad_norm: 0.8334885842108892, iteration: 452941
loss: 0.9668854475021362,grad_norm: 0.7021256723179773, iteration: 452942
loss: 1.0126116275787354,grad_norm: 0.8678053331031991, iteration: 452943
loss: 0.9715737700462341,grad_norm: 0.9999995101718011, iteration: 452944
loss: 1.0043822526931763,grad_norm: 0.7236829964353121, iteration: 452945
loss: 1.00180983543396,grad_norm: 0.8536331986796868, iteration: 452946
loss: 1.0151163339614868,grad_norm: 0.8466974000245924, iteration: 452947
loss: 1.0506314039230347,grad_norm: 0.937918723004518, iteration: 452948
loss: 1.0418617725372314,grad_norm: 0.9999996621191097, iteration: 452949
loss: 0.9837912917137146,grad_norm: 0.6759629170057995, iteration: 452950
loss: 0.9955897331237793,grad_norm: 0.719806883720812, iteration: 452951
loss: 1.068928837776184,grad_norm: 0.7519147271748517, iteration: 452952
loss: 0.9651442170143127,grad_norm: 0.9584510937033782, iteration: 452953
loss: 0.985888659954071,grad_norm: 0.9191954579957915, iteration: 452954
loss: 1.0049821138381958,grad_norm: 0.7972766204524238, iteration: 452955
loss: 1.033793568611145,grad_norm: 0.80867336662798, iteration: 452956
loss: 0.9853217005729675,grad_norm: 0.7634325067805016, iteration: 452957
loss: 0.9871120452880859,grad_norm: 0.7166985397971434, iteration: 452958
loss: 0.9851344227790833,grad_norm: 0.6417620834321723, iteration: 452959
loss: 1.0132521390914917,grad_norm: 0.8414956124792169, iteration: 452960
loss: 1.0542795658111572,grad_norm: 0.9999991254859989, iteration: 452961
loss: 1.0367201566696167,grad_norm: 0.8609074802420956, iteration: 452962
loss: 0.9898050427436829,grad_norm: 0.9999996444534122, iteration: 452963
loss: 1.0190083980560303,grad_norm: 0.9915868326633213, iteration: 452964
loss: 0.9499174952507019,grad_norm: 0.8124178844459305, iteration: 452965
loss: 0.9724542498588562,grad_norm: 0.7280723314320469, iteration: 452966
loss: 1.013623595237732,grad_norm: 0.7827598805192892, iteration: 452967
loss: 0.992321789264679,grad_norm: 0.7789857683214951, iteration: 452968
loss: 0.9937484264373779,grad_norm: 0.8663178033746137, iteration: 452969
loss: 1.0390647649765015,grad_norm: 0.9999995597467267, iteration: 452970
loss: 1.0323406457901,grad_norm: 0.9999993752531562, iteration: 452971
loss: 0.9450512528419495,grad_norm: 0.6712143629880877, iteration: 452972
loss: 0.9931973814964294,grad_norm: 0.9999993115440993, iteration: 452973
loss: 0.9692878723144531,grad_norm: 0.7717210481247287, iteration: 452974
loss: 0.9550588130950928,grad_norm: 0.794719699279806, iteration: 452975
loss: 0.9771326184272766,grad_norm: 0.7259220167072011, iteration: 452976
loss: 0.9982765316963196,grad_norm: 0.7865436808354654, iteration: 452977
loss: 1.0567785501480103,grad_norm: 0.9999992101094357, iteration: 452978
loss: 0.9972594976425171,grad_norm: 0.8154043352191532, iteration: 452979
loss: 1.0324093103408813,grad_norm: 0.7726197410976603, iteration: 452980
loss: 1.0057563781738281,grad_norm: 0.7921140043415349, iteration: 452981
loss: 0.9542328715324402,grad_norm: 0.7553693216319506, iteration: 452982
loss: 1.0194283723831177,grad_norm: 0.999999287149562, iteration: 452983
loss: 1.0330435037612915,grad_norm: 0.924850646597047, iteration: 452984
loss: 1.006068229675293,grad_norm: 0.6751041995735129, iteration: 452985
loss: 1.021363615989685,grad_norm: 0.833879329285808, iteration: 452986
loss: 1.0264242887496948,grad_norm: 0.9999993700311787, iteration: 452987
loss: 1.0133945941925049,grad_norm: 0.849351312951977, iteration: 452988
loss: 1.062421441078186,grad_norm: 0.9999994590028375, iteration: 452989
loss: 1.0067495107650757,grad_norm: 0.9039711079132008, iteration: 452990
loss: 0.9853275418281555,grad_norm: 0.8341277660063796, iteration: 452991
loss: 1.0081895589828491,grad_norm: 0.9999992204525879, iteration: 452992
loss: 0.987212598323822,grad_norm: 0.7801065833952042, iteration: 452993
loss: 1.0341483354568481,grad_norm: 0.9047449426407432, iteration: 452994
loss: 0.9952853918075562,grad_norm: 0.773585908217314, iteration: 452995
loss: 0.9540196657180786,grad_norm: 0.7709196798950426, iteration: 452996
loss: 0.9941820502281189,grad_norm: 0.9999998294001338, iteration: 452997
loss: 0.9915364384651184,grad_norm: 0.7126570658627368, iteration: 452998
loss: 0.9966582655906677,grad_norm: 0.8684655929196862, iteration: 452999
loss: 1.0194718837738037,grad_norm: 0.7839080743826189, iteration: 453000
loss: 0.9820278286933899,grad_norm: 0.8185343048986294, iteration: 453001
loss: 1.0306366682052612,grad_norm: 0.7725173390001363, iteration: 453002
loss: 1.015385389328003,grad_norm: 0.8260595736623193, iteration: 453003
loss: 1.020682692527771,grad_norm: 0.7703083362571818, iteration: 453004
loss: 0.9594592452049255,grad_norm: 0.8785926912663107, iteration: 453005
loss: 1.0092973709106445,grad_norm: 0.9999991756590139, iteration: 453006
loss: 1.0266667604446411,grad_norm: 0.9999995083910377, iteration: 453007
loss: 1.009162187576294,grad_norm: 0.700755193932537, iteration: 453008
loss: 1.0163674354553223,grad_norm: 0.8228475326175054, iteration: 453009
loss: 0.999363124370575,grad_norm: 0.6736656709275055, iteration: 453010
loss: 1.0173181295394897,grad_norm: 0.69494657369768, iteration: 453011
loss: 0.977515697479248,grad_norm: 0.965417298962766, iteration: 453012
loss: 1.0164700746536255,grad_norm: 0.7914906124885053, iteration: 453013
loss: 0.9745089411735535,grad_norm: 0.8282663435791464, iteration: 453014
loss: 0.9778256416320801,grad_norm: 0.7065188931498461, iteration: 453015
loss: 1.0181775093078613,grad_norm: 0.7535935302196962, iteration: 453016
loss: 0.9677327275276184,grad_norm: 0.7046107712756089, iteration: 453017
loss: 1.016900658607483,grad_norm: 0.9999999040933754, iteration: 453018
loss: 0.9828863143920898,grad_norm: 0.8737207220666185, iteration: 453019
loss: 0.9826110601425171,grad_norm: 0.6952054425606149, iteration: 453020
loss: 0.9772658944129944,grad_norm: 0.5951178439720736, iteration: 453021
loss: 1.0441741943359375,grad_norm: 0.9999990637980207, iteration: 453022
loss: 1.018203854560852,grad_norm: 0.7786920427809441, iteration: 453023
loss: 1.0236784219741821,grad_norm: 0.6898616686518786, iteration: 453024
loss: 0.9648740887641907,grad_norm: 0.8723343701899403, iteration: 453025
loss: 1.005267858505249,grad_norm: 0.8995178947948597, iteration: 453026
loss: 1.0115832090377808,grad_norm: 0.8510896095431316, iteration: 453027
loss: 1.007383108139038,grad_norm: 0.9040091305533047, iteration: 453028
loss: 0.9836196303367615,grad_norm: 0.9999996734204754, iteration: 453029
loss: 1.016594409942627,grad_norm: 0.7906144663571414, iteration: 453030
loss: 1.0213947296142578,grad_norm: 0.9999998180431392, iteration: 453031
loss: 0.9986415505409241,grad_norm: 0.9478979511210343, iteration: 453032
loss: 0.9901223182678223,grad_norm: 0.7176656288048798, iteration: 453033
loss: 0.9968370199203491,grad_norm: 0.707781271292296, iteration: 453034
loss: 1.0827069282531738,grad_norm: 0.9999991154544402, iteration: 453035
loss: 0.9920505285263062,grad_norm: 0.7393754052255582, iteration: 453036
loss: 1.0058372020721436,grad_norm: 0.660129139686009, iteration: 453037
loss: 0.9758511781692505,grad_norm: 0.7977512583716984, iteration: 453038
loss: 1.05485999584198,grad_norm: 0.9999991781154498, iteration: 453039
loss: 1.0199965238571167,grad_norm: 0.768920374752493, iteration: 453040
loss: 0.9684081673622131,grad_norm: 0.7869903593464586, iteration: 453041
loss: 0.9921252727508545,grad_norm: 0.7503383847867771, iteration: 453042
loss: 1.09529709815979,grad_norm: 0.8338169938208229, iteration: 453043
loss: 1.0248411893844604,grad_norm: 0.7607350446450213, iteration: 453044
loss: 0.9623093605041504,grad_norm: 0.8336410845555204, iteration: 453045
loss: 1.0236676931381226,grad_norm: 0.7954111542487913, iteration: 453046
loss: 1.0543689727783203,grad_norm: 0.9999990673894351, iteration: 453047
loss: 1.0386337041854858,grad_norm: 0.9999996746801719, iteration: 453048
loss: 1.0599957704544067,grad_norm: 0.9999990266851256, iteration: 453049
loss: 0.9701176881790161,grad_norm: 0.7460105271379944, iteration: 453050
loss: 1.0135904550552368,grad_norm: 0.6243572782787448, iteration: 453051
loss: 0.9734430313110352,grad_norm: 0.719684515280671, iteration: 453052
loss: 0.9778760075569153,grad_norm: 0.7170040225125305, iteration: 453053
loss: 0.9636591076850891,grad_norm: 0.9297432381799522, iteration: 453054
loss: 1.024598240852356,grad_norm: 0.8596984467576759, iteration: 453055
loss: 1.015580415725708,grad_norm: 0.8223554343587601, iteration: 453056
loss: 1.021263837814331,grad_norm: 0.9371637793553913, iteration: 453057
loss: 0.9979695081710815,grad_norm: 0.7971678246209393, iteration: 453058
loss: 1.0102555751800537,grad_norm: 0.7587269794165078, iteration: 453059
loss: 0.9981849193572998,grad_norm: 0.8098558101555443, iteration: 453060
loss: 0.9804235696792603,grad_norm: 0.7616899648638374, iteration: 453061
loss: 1.0051157474517822,grad_norm: 0.9999998126981837, iteration: 453062
loss: 0.9764624834060669,grad_norm: 0.6743113321491554, iteration: 453063
loss: 0.9710690975189209,grad_norm: 0.7399807940622162, iteration: 453064
loss: 1.0141345262527466,grad_norm: 0.9999997103058733, iteration: 453065
loss: 1.0454542636871338,grad_norm: 0.7576676545998402, iteration: 453066
loss: 1.0297112464904785,grad_norm: 0.8333792883443055, iteration: 453067
loss: 0.9975402355194092,grad_norm: 0.6232161031025326, iteration: 453068
loss: 1.082115888595581,grad_norm: 0.9999995957940669, iteration: 453069
loss: 1.0487022399902344,grad_norm: 0.8280887957725508, iteration: 453070
loss: 1.008058786392212,grad_norm: 0.7797843339757331, iteration: 453071
loss: 1.0079691410064697,grad_norm: 0.818039129398665, iteration: 453072
loss: 1.0482759475708008,grad_norm: 0.8830639838424288, iteration: 453073
loss: 1.006470799446106,grad_norm: 0.9166397353144762, iteration: 453074
loss: 0.9932001233100891,grad_norm: 0.672252842407329, iteration: 453075
loss: 0.9541072249412537,grad_norm: 0.7497729722264368, iteration: 453076
loss: 0.9840386509895325,grad_norm: 0.9195722977598166, iteration: 453077
loss: 1.0227341651916504,grad_norm: 0.7117865110772135, iteration: 453078
loss: 0.9958581924438477,grad_norm: 0.7774533062700472, iteration: 453079
loss: 1.0402231216430664,grad_norm: 0.99999973633377, iteration: 453080
loss: 1.0638384819030762,grad_norm: 0.9768131727095, iteration: 453081
loss: 1.0136899948120117,grad_norm: 0.8805227405957107, iteration: 453082
loss: 0.983059823513031,grad_norm: 0.6468842411230606, iteration: 453083
loss: 1.0187515020370483,grad_norm: 0.6789185410932079, iteration: 453084
loss: 1.0235956907272339,grad_norm: 0.8765046347619334, iteration: 453085
loss: 1.126110315322876,grad_norm: 0.9999994083037113, iteration: 453086
loss: 1.121679663658142,grad_norm: 0.9999999409290923, iteration: 453087
loss: 1.0495693683624268,grad_norm: 0.863448175489185, iteration: 453088
loss: 0.9940133094787598,grad_norm: 0.803279716719236, iteration: 453089
loss: 1.0842114686965942,grad_norm: 0.9999993845241205, iteration: 453090
loss: 1.0952450037002563,grad_norm: 0.8544023816188845, iteration: 453091
loss: 0.9825038313865662,grad_norm: 0.8694473000259845, iteration: 453092
loss: 1.0342055559158325,grad_norm: 0.9999990443333686, iteration: 453093
loss: 0.9664614200592041,grad_norm: 0.9999994621732258, iteration: 453094
loss: 1.0840281248092651,grad_norm: 0.9966149605554957, iteration: 453095
loss: 1.0845530033111572,grad_norm: 0.8917797994115747, iteration: 453096
loss: 0.9868727922439575,grad_norm: 0.8731982511110423, iteration: 453097
loss: 1.0135258436203003,grad_norm: 0.999999218825464, iteration: 453098
loss: 1.0270284414291382,grad_norm: 0.9999993601362879, iteration: 453099
loss: 1.0153145790100098,grad_norm: 0.6983718413302331, iteration: 453100
loss: 0.9976224303245544,grad_norm: 0.8636006808955303, iteration: 453101
loss: 0.9698982834815979,grad_norm: 0.8940419316297125, iteration: 453102
loss: 1.0564582347869873,grad_norm: 0.7381488621929903, iteration: 453103
loss: 0.9670044183731079,grad_norm: 0.6615423207048551, iteration: 453104
loss: 0.9558185338973999,grad_norm: 0.6970236311839199, iteration: 453105
loss: 1.0037157535552979,grad_norm: 0.9803587315794348, iteration: 453106
loss: 0.9747382402420044,grad_norm: 0.7135010375670202, iteration: 453107
loss: 1.0282803773880005,grad_norm: 0.9999999288503487, iteration: 453108
loss: 1.0431491136550903,grad_norm: 0.8730889211317462, iteration: 453109
loss: 0.968745768070221,grad_norm: 0.8314087514226219, iteration: 453110
loss: 1.1167141199111938,grad_norm: 1.0000000867136472, iteration: 453111
loss: 1.0805561542510986,grad_norm: 0.9999999598955989, iteration: 453112
loss: 1.009170413017273,grad_norm: 0.9999990966674802, iteration: 453113
loss: 1.05852210521698,grad_norm: 0.8112284199322938, iteration: 453114
loss: 1.0198782682418823,grad_norm: 0.9999989931289025, iteration: 453115
loss: 1.034920334815979,grad_norm: 0.782182266580955, iteration: 453116
loss: 1.0087300539016724,grad_norm: 0.6712138517055986, iteration: 453117
loss: 1.0881136655807495,grad_norm: 0.9999999022337166, iteration: 453118
loss: 1.00162672996521,grad_norm: 0.6929086634335425, iteration: 453119
loss: 0.9871170520782471,grad_norm: 0.6595871108659263, iteration: 453120
loss: 1.0245410203933716,grad_norm: 0.9064045423419302, iteration: 453121
loss: 1.0273209810256958,grad_norm: 0.8553568233602505, iteration: 453122
loss: 1.0285834074020386,grad_norm: 0.8010199417028407, iteration: 453123
loss: 0.9930527806282043,grad_norm: 0.7382732637816181, iteration: 453124
loss: 1.0282702445983887,grad_norm: 0.9999996960602292, iteration: 453125
loss: 1.0664829015731812,grad_norm: 0.8423039887250905, iteration: 453126
loss: 1.0374678373336792,grad_norm: 0.7688050836550805, iteration: 453127
loss: 1.0252338647842407,grad_norm: 0.9999995806994565, iteration: 453128
loss: 1.0037821531295776,grad_norm: 0.6729404058961403, iteration: 453129
loss: 1.0348461866378784,grad_norm: 0.9999991564120063, iteration: 453130
loss: 1.028080940246582,grad_norm: 0.8952465924539793, iteration: 453131
loss: 0.9467778205871582,grad_norm: 0.8090871126302868, iteration: 453132
loss: 1.0163023471832275,grad_norm: 0.9302753548733185, iteration: 453133
loss: 1.0600908994674683,grad_norm: 0.9999994726767016, iteration: 453134
loss: 1.049081802368164,grad_norm: 0.8110239698885078, iteration: 453135
loss: 1.045371651649475,grad_norm: 0.9999991462207474, iteration: 453136
loss: 1.0371556282043457,grad_norm: 0.9999994077622522, iteration: 453137
loss: 0.9876528978347778,grad_norm: 0.8197477664237862, iteration: 453138
loss: 1.0031055212020874,grad_norm: 0.9999997215634371, iteration: 453139
loss: 1.1042133569717407,grad_norm: 0.9999996620821608, iteration: 453140
loss: 1.047377347946167,grad_norm: 0.9999994101361276, iteration: 453141
loss: 1.035856008529663,grad_norm: 0.9999995950283481, iteration: 453142
loss: 0.9867143630981445,grad_norm: 0.7651627027873955, iteration: 453143
loss: 0.9949015378952026,grad_norm: 0.7359789005748361, iteration: 453144
loss: 1.0812711715698242,grad_norm: 0.9867395230878713, iteration: 453145
loss: 0.9935023188591003,grad_norm: 0.6900853064026267, iteration: 453146
loss: 1.0334347486495972,grad_norm: 0.9999999615254376, iteration: 453147
loss: 0.9818011522293091,grad_norm: 0.816808630432554, iteration: 453148
loss: 1.010823130607605,grad_norm: 0.9346264313589413, iteration: 453149
loss: 1.00583016872406,grad_norm: 0.7515741200635528, iteration: 453150
loss: 1.0694928169250488,grad_norm: 0.999999543949257, iteration: 453151
loss: 0.9677214026451111,grad_norm: 0.7123833599823461, iteration: 453152
loss: 0.9758568406105042,grad_norm: 0.7236406291277148, iteration: 453153
loss: 1.067801833152771,grad_norm: 0.9999994447868401, iteration: 453154
loss: 1.0451180934906006,grad_norm: 0.9999994527163711, iteration: 453155
loss: 1.0227665901184082,grad_norm: 0.6598281863151769, iteration: 453156
loss: 1.0219690799713135,grad_norm: 0.9999998115045522, iteration: 453157
loss: 1.0049283504486084,grad_norm: 0.9391366879489637, iteration: 453158
loss: 0.9923509955406189,grad_norm: 0.6967523384759211, iteration: 453159
loss: 1.0050371885299683,grad_norm: 0.736847474704328, iteration: 453160
loss: 1.033746600151062,grad_norm: 0.8281265259820446, iteration: 453161
loss: 0.9903137683868408,grad_norm: 0.7421423438550039, iteration: 453162
loss: 1.0070881843566895,grad_norm: 0.7785522574936384, iteration: 453163
loss: 0.9555082321166992,grad_norm: 0.7550719930682521, iteration: 453164
loss: 1.005627989768982,grad_norm: 0.8943703666258559, iteration: 453165
loss: 1.0225694179534912,grad_norm: 0.9999998119454311, iteration: 453166
loss: 0.9874157309532166,grad_norm: 0.9999997880784205, iteration: 453167
loss: 1.0266681909561157,grad_norm: 0.7754699325805473, iteration: 453168
loss: 1.0147000551223755,grad_norm: 0.9198844664884758, iteration: 453169
loss: 0.9812307953834534,grad_norm: 0.7533545217980241, iteration: 453170
loss: 0.9720503091812134,grad_norm: 0.7949910944270813, iteration: 453171
loss: 0.9564359784126282,grad_norm: 0.8472540637711898, iteration: 453172
loss: 0.995087742805481,grad_norm: 0.999999072201841, iteration: 453173
loss: 1.0698333978652954,grad_norm: 0.999999114239128, iteration: 453174
loss: 1.0285218954086304,grad_norm: 0.7676695843054374, iteration: 453175
loss: 1.0696146488189697,grad_norm: 0.9999991516834058, iteration: 453176
loss: 0.9832229018211365,grad_norm: 0.9339944614809519, iteration: 453177
loss: 1.0202133655548096,grad_norm: 0.9999993262110058, iteration: 453178
loss: 1.0119657516479492,grad_norm: 0.7883385436613954, iteration: 453179
loss: 0.9865200519561768,grad_norm: 0.7538362268030068, iteration: 453180
loss: 1.0091614723205566,grad_norm: 0.8818125775256356, iteration: 453181
loss: 1.004045844078064,grad_norm: 0.9156728560825623, iteration: 453182
loss: 1.021122932434082,grad_norm: 0.9101182582972089, iteration: 453183
loss: 0.9811935424804688,grad_norm: 0.7010426599735053, iteration: 453184
loss: 0.9917789101600647,grad_norm: 0.7598771482346852, iteration: 453185
loss: 1.0582586526870728,grad_norm: 0.9999996896068257, iteration: 453186
loss: 0.9582135081291199,grad_norm: 0.8589209095748386, iteration: 453187
loss: 1.0274617671966553,grad_norm: 0.7422355562540662, iteration: 453188
loss: 1.0204906463623047,grad_norm: 0.999999657540104, iteration: 453189
loss: 1.0015041828155518,grad_norm: 0.781301803885024, iteration: 453190
loss: 1.0291714668273926,grad_norm: 0.9999996668252141, iteration: 453191
loss: 1.032720685005188,grad_norm: 0.7190198211019524, iteration: 453192
loss: 1.0081039667129517,grad_norm: 0.8595365586359877, iteration: 453193
loss: 0.9938660860061646,grad_norm: 0.9999990930956147, iteration: 453194
loss: 1.0049762725830078,grad_norm: 0.9999995774404549, iteration: 453195
loss: 1.0845221281051636,grad_norm: 0.9999995745641562, iteration: 453196
loss: 0.9793440103530884,grad_norm: 0.8210666941697635, iteration: 453197
loss: 0.9789888262748718,grad_norm: 0.6531483174456176, iteration: 453198
loss: 1.0018157958984375,grad_norm: 0.6885961932155112, iteration: 453199
loss: 1.028897762298584,grad_norm: 0.9999990286598, iteration: 453200
loss: 1.0068670511245728,grad_norm: 0.9999994294900448, iteration: 453201
loss: 0.9845100045204163,grad_norm: 0.8686660890332203, iteration: 453202
loss: 1.026872158050537,grad_norm: 0.9069145383229401, iteration: 453203
loss: 1.0027436017990112,grad_norm: 0.7917963448194885, iteration: 453204
loss: 0.9675846099853516,grad_norm: 0.6993861276769854, iteration: 453205
loss: 0.9957343339920044,grad_norm: 0.8992217908528564, iteration: 453206
loss: 0.972935140132904,grad_norm: 0.7896654515018121, iteration: 453207
loss: 1.0137693881988525,grad_norm: 0.7947775048000744, iteration: 453208
loss: 0.984341561794281,grad_norm: 0.7632599004617793, iteration: 453209
loss: 0.9693068265914917,grad_norm: 0.9474460769016078, iteration: 453210
loss: 1.0133062601089478,grad_norm: 0.972472796586214, iteration: 453211
loss: 1.0407809019088745,grad_norm: 0.7927947145282137, iteration: 453212
loss: 1.0021899938583374,grad_norm: 0.8718632029768902, iteration: 453213
loss: 1.0168743133544922,grad_norm: 0.7926299813295468, iteration: 453214
loss: 1.0376120805740356,grad_norm: 0.8145770322232337, iteration: 453215
loss: 0.9721970558166504,grad_norm: 0.7965536387341967, iteration: 453216
loss: 1.014908790588379,grad_norm: 0.59843925675207, iteration: 453217
loss: 1.0129671096801758,grad_norm: 0.727490929955331, iteration: 453218
loss: 1.0094523429870605,grad_norm: 0.8343704184179456, iteration: 453219
loss: 0.9785388112068176,grad_norm: 0.8357675558771952, iteration: 453220
loss: 1.0105592012405396,grad_norm: 0.7457902982632362, iteration: 453221
loss: 1.0044926404953003,grad_norm: 0.7660821021845472, iteration: 453222
loss: 1.0301679372787476,grad_norm: 0.9999990960401651, iteration: 453223
loss: 1.040893316268921,grad_norm: 0.875899468927217, iteration: 453224
loss: 0.9740217924118042,grad_norm: 0.7216174397743635, iteration: 453225
loss: 1.1268441677093506,grad_norm: 0.7432874341192682, iteration: 453226
loss: 1.0703006982803345,grad_norm: 0.8065313956550362, iteration: 453227
loss: 0.988736629486084,grad_norm: 0.9530622250305942, iteration: 453228
loss: 1.0283236503601074,grad_norm: 0.7443959820648829, iteration: 453229
loss: 0.9805195927619934,grad_norm: 0.9999997537772762, iteration: 453230
loss: 0.9770219326019287,grad_norm: 0.8030425158053178, iteration: 453231
loss: 1.0466190576553345,grad_norm: 0.7388581715566883, iteration: 453232
loss: 0.9699914455413818,grad_norm: 0.8467918472795114, iteration: 453233
loss: 1.034618854522705,grad_norm: 0.9259841768819406, iteration: 453234
loss: 0.9996740221977234,grad_norm: 0.7882372557158481, iteration: 453235
loss: 0.9859825968742371,grad_norm: 0.8135754664837609, iteration: 453236
loss: 0.9922771453857422,grad_norm: 0.9077690221675812, iteration: 453237
loss: 1.3410285711288452,grad_norm: 0.9999996720254263, iteration: 453238
loss: 0.9757749438285828,grad_norm: 0.6912816587557954, iteration: 453239
loss: 1.0163135528564453,grad_norm: 0.8630949651075601, iteration: 453240
loss: 0.9970004558563232,grad_norm: 0.7637327242589614, iteration: 453241
loss: 0.9667915105819702,grad_norm: 0.681026516607587, iteration: 453242
loss: 0.9573290348052979,grad_norm: 0.8528699452042541, iteration: 453243
loss: 0.9771081209182739,grad_norm: 0.8511543858079798, iteration: 453244
loss: 0.9936773777008057,grad_norm: 0.796794885864415, iteration: 453245
loss: 1.0463945865631104,grad_norm: 0.7976833095895834, iteration: 453246
loss: 0.9876276850700378,grad_norm: 0.7899240297119839, iteration: 453247
loss: 0.991430938243866,grad_norm: 0.8383004046529947, iteration: 453248
loss: 1.027115821838379,grad_norm: 0.7024659845266258, iteration: 453249
loss: 0.9914043545722961,grad_norm: 0.8350604344367021, iteration: 453250
loss: 0.9793424010276794,grad_norm: 0.7307654447411059, iteration: 453251
loss: 1.0174052715301514,grad_norm: 0.712611393365846, iteration: 453252
loss: 0.9867364168167114,grad_norm: 0.6584789330468765, iteration: 453253
loss: 0.9662028551101685,grad_norm: 0.797843411580067, iteration: 453254
loss: 0.9639424681663513,grad_norm: 0.7010653840782101, iteration: 453255
loss: 1.0372357368469238,grad_norm: 0.6758519929252732, iteration: 453256
loss: 1.0160107612609863,grad_norm: 0.6818451246089732, iteration: 453257
loss: 1.0404939651489258,grad_norm: 0.8566945093960939, iteration: 453258
loss: 0.9832164645195007,grad_norm: 0.7719140854605716, iteration: 453259
loss: 0.988956093788147,grad_norm: 0.727407787613956, iteration: 453260
loss: 1.0152921676635742,grad_norm: 0.890527318354083, iteration: 453261
loss: 1.0194703340530396,grad_norm: 0.9999998734099603, iteration: 453262
loss: 1.0148652791976929,grad_norm: 0.9999992705527265, iteration: 453263
loss: 1.0096933841705322,grad_norm: 0.7006746957688149, iteration: 453264
loss: 1.0281860828399658,grad_norm: 0.8701684304878914, iteration: 453265
loss: 0.9724757671356201,grad_norm: 0.91085126552669, iteration: 453266
loss: 0.9867033958435059,grad_norm: 0.7925821452196563, iteration: 453267
loss: 1.011728286743164,grad_norm: 0.7946741875708367, iteration: 453268
loss: 0.9635607004165649,grad_norm: 0.9999998227378288, iteration: 453269
loss: 1.0251368284225464,grad_norm: 0.8533375820095642, iteration: 453270
loss: 0.9767915606498718,grad_norm: 0.7586799697704857, iteration: 453271
loss: 0.9661979675292969,grad_norm: 0.9741084537437453, iteration: 453272
loss: 1.0332037210464478,grad_norm: 0.9999991612100239, iteration: 453273
loss: 1.0012226104736328,grad_norm: 0.7524288898969116, iteration: 453274
loss: 0.9994449615478516,grad_norm: 0.708804263114947, iteration: 453275
loss: 0.9964702129364014,grad_norm: 0.9486306882792542, iteration: 453276
loss: 0.996903121471405,grad_norm: 0.666807869550289, iteration: 453277
loss: 0.9526475071907043,grad_norm: 0.6119911030567423, iteration: 453278
loss: 1.0550484657287598,grad_norm: 0.9999991626891084, iteration: 453279
loss: 1.0055723190307617,grad_norm: 0.7496468474603544, iteration: 453280
loss: 0.9951059818267822,grad_norm: 0.791365986793571, iteration: 453281
loss: 1.059789776802063,grad_norm: 0.6876028014387341, iteration: 453282
loss: 0.996030867099762,grad_norm: 0.9999991680754877, iteration: 453283
loss: 1.011914849281311,grad_norm: 0.9999991283337663, iteration: 453284
loss: 1.0143998861312866,grad_norm: 0.9169934183861319, iteration: 453285
loss: 0.9504621624946594,grad_norm: 0.8583781993423375, iteration: 453286
loss: 0.9857403635978699,grad_norm: 0.8417360359814448, iteration: 453287
loss: 0.994331955909729,grad_norm: 0.7979289950317358, iteration: 453288
loss: 1.0251156091690063,grad_norm: 0.763748894108105, iteration: 453289
loss: 0.9873522520065308,grad_norm: 0.8061046396319127, iteration: 453290
loss: 1.0245448350906372,grad_norm: 0.8679487085798498, iteration: 453291
loss: 1.0164624452590942,grad_norm: 0.6647182560865758, iteration: 453292
loss: 1.0180244445800781,grad_norm: 0.8406322961519909, iteration: 453293
loss: 0.9936179518699646,grad_norm: 0.6736907732309959, iteration: 453294
loss: 0.997571587562561,grad_norm: 0.6495359517882512, iteration: 453295
loss: 1.0310372114181519,grad_norm: 0.9999990327799843, iteration: 453296
loss: 0.98677659034729,grad_norm: 0.6416413044356932, iteration: 453297
loss: 0.9855992197990417,grad_norm: 0.7373105072503119, iteration: 453298
loss: 1.0139052867889404,grad_norm: 0.721747299585612, iteration: 453299
loss: 1.0017856359481812,grad_norm: 0.8797788934989108, iteration: 453300
loss: 1.0236172676086426,grad_norm: 0.8257772370249945, iteration: 453301
loss: 1.0446548461914062,grad_norm: 0.9999994541687046, iteration: 453302
loss: 1.0139344930648804,grad_norm: 0.9999993068503908, iteration: 453303
loss: 1.0094687938690186,grad_norm: 0.6605261921799176, iteration: 453304
loss: 1.043451189994812,grad_norm: 0.9999996382002779, iteration: 453305
loss: 1.0237423181533813,grad_norm: 0.8338264315461531, iteration: 453306
loss: 0.9958665370941162,grad_norm: 0.767057525891705, iteration: 453307
loss: 1.0267256498336792,grad_norm: 0.9999994679444449, iteration: 453308
loss: 1.0005601644515991,grad_norm: 0.7396021325905798, iteration: 453309
loss: 0.9653476476669312,grad_norm: 0.8363590562005692, iteration: 453310
loss: 1.0176085233688354,grad_norm: 0.7037247162899042, iteration: 453311
loss: 1.071858525276184,grad_norm: 0.8780358179690145, iteration: 453312
loss: 0.9915539026260376,grad_norm: 0.794993994121204, iteration: 453313
loss: 1.0300657749176025,grad_norm: 0.8051382575191276, iteration: 453314
loss: 1.1434401273727417,grad_norm: 0.7996518872490869, iteration: 453315
loss: 0.9789649248123169,grad_norm: 0.8705099745786655, iteration: 453316
loss: 1.0944175720214844,grad_norm: 0.8224200010695462, iteration: 453317
loss: 1.0092597007751465,grad_norm: 0.7740928468699697, iteration: 453318
loss: 0.994719386100769,grad_norm: 0.9532114642718055, iteration: 453319
loss: 1.0276463031768799,grad_norm: 0.9152804452196691, iteration: 453320
loss: 1.043252944946289,grad_norm: 0.799479325967231, iteration: 453321
loss: 0.9541633725166321,grad_norm: 0.7675691670629482, iteration: 453322
loss: 0.9983801245689392,grad_norm: 0.8182137739382748, iteration: 453323
loss: 0.9978951215744019,grad_norm: 0.7412251112591118, iteration: 453324
loss: 0.9991349577903748,grad_norm: 0.6525743834446215, iteration: 453325
loss: 1.0271363258361816,grad_norm: 0.7515551167174633, iteration: 453326
loss: 0.9944394826889038,grad_norm: 0.9999990290785102, iteration: 453327
loss: 1.0104409456253052,grad_norm: 0.8524556250393684, iteration: 453328
loss: 1.013687252998352,grad_norm: 0.7581088048676373, iteration: 453329
loss: 1.0072709321975708,grad_norm: 0.999999187030184, iteration: 453330
loss: 1.0252957344055176,grad_norm: 0.5875518596202527, iteration: 453331
loss: 1.0794494152069092,grad_norm: 0.9973272451082258, iteration: 453332
loss: 0.9885920286178589,grad_norm: 0.999999465878043, iteration: 453333
loss: 1.0752426385879517,grad_norm: 0.9999997900843766, iteration: 453334
loss: 0.9922417402267456,grad_norm: 0.8877509622900175, iteration: 453335
loss: 1.0038715600967407,grad_norm: 0.7339568425892473, iteration: 453336
loss: 0.9534350037574768,grad_norm: 0.8557244867702831, iteration: 453337
loss: 0.9768813252449036,grad_norm: 0.7031903452431856, iteration: 453338
loss: 0.9795160293579102,grad_norm: 0.8259548903892653, iteration: 453339
loss: 0.9599355459213257,grad_norm: 0.7541635680045177, iteration: 453340
loss: 0.9949365854263306,grad_norm: 0.8266707714677566, iteration: 453341
loss: 1.0087909698486328,grad_norm: 0.6962294908265775, iteration: 453342
loss: 1.0503504276275635,grad_norm: 0.7829735858043451, iteration: 453343
loss: 0.9953653216362,grad_norm: 0.7210784684669589, iteration: 453344
loss: 0.9800405502319336,grad_norm: 0.8635673196652932, iteration: 453345
loss: 1.008684754371643,grad_norm: 0.7719006846417267, iteration: 453346
loss: 0.9967597126960754,grad_norm: 0.7788042031437348, iteration: 453347
loss: 0.9883315563201904,grad_norm: 0.7836354073764177, iteration: 453348
loss: 0.9845336675643921,grad_norm: 0.8572141894844245, iteration: 453349
loss: 1.0547868013381958,grad_norm: 0.8512326046665266, iteration: 453350
loss: 0.9785764813423157,grad_norm: 0.6537065067094628, iteration: 453351
loss: 1.0281546115875244,grad_norm: 0.9999997514091798, iteration: 453352
loss: 1.0228049755096436,grad_norm: 0.6658434171365508, iteration: 453353
loss: 1.0021092891693115,grad_norm: 0.6616670123047481, iteration: 453354
loss: 0.9797073006629944,grad_norm: 0.8116743161237984, iteration: 453355
loss: 0.9747446775436401,grad_norm: 0.7237833318307674, iteration: 453356
loss: 0.9916096925735474,grad_norm: 0.8395191124192877, iteration: 453357
loss: 1.0076185464859009,grad_norm: 0.6995980362907702, iteration: 453358
loss: 0.9968183040618896,grad_norm: 0.8770418883698423, iteration: 453359
loss: 0.9775227308273315,grad_norm: 0.8681352507593354, iteration: 453360
loss: 0.9900932312011719,grad_norm: 0.9033579625498254, iteration: 453361
loss: 1.0055400133132935,grad_norm: 0.7980454879109152, iteration: 453362
loss: 0.9478797912597656,grad_norm: 0.7277748661738436, iteration: 453363
loss: 1.0095183849334717,grad_norm: 0.956444136538696, iteration: 453364
loss: 0.9791293740272522,grad_norm: 0.7386893691631594, iteration: 453365
loss: 0.9589653015136719,grad_norm: 0.8876580200612982, iteration: 453366
loss: 0.9915781617164612,grad_norm: 0.8509246472210052, iteration: 453367
loss: 1.0187188386917114,grad_norm: 0.7735298742593061, iteration: 453368
loss: 0.9824280142784119,grad_norm: 0.7705304692208151, iteration: 453369
loss: 1.0587576627731323,grad_norm: 0.9687491854228719, iteration: 453370
loss: 0.9951058626174927,grad_norm: 0.8476543052822233, iteration: 453371
loss: 0.9791818261146545,grad_norm: 0.7949660194309098, iteration: 453372
loss: 1.045228362083435,grad_norm: 0.7279419443447064, iteration: 453373
loss: 1.0014832019805908,grad_norm: 0.7739941578835156, iteration: 453374
loss: 0.9711413383483887,grad_norm: 0.7440479101888064, iteration: 453375
loss: 0.9828774929046631,grad_norm: 0.7201916304527431, iteration: 453376
loss: 1.0374585390090942,grad_norm: 0.816090221670999, iteration: 453377
loss: 0.999909520149231,grad_norm: 0.7107920790190417, iteration: 453378
loss: 0.9974396228790283,grad_norm: 0.8446292364159557, iteration: 453379
loss: 1.0019115209579468,grad_norm: 0.8347879102783111, iteration: 453380
loss: 1.0306246280670166,grad_norm: 0.9999993485915526, iteration: 453381
loss: 1.0179089307785034,grad_norm: 0.8031721846263563, iteration: 453382
loss: 0.993211030960083,grad_norm: 0.7762365347750415, iteration: 453383
loss: 0.983961284160614,grad_norm: 0.7253009153164134, iteration: 453384
loss: 1.0595734119415283,grad_norm: 0.877200514582452, iteration: 453385
loss: 1.0034064054489136,grad_norm: 0.5916310863119844, iteration: 453386
loss: 1.0161064863204956,grad_norm: 0.6634969672464325, iteration: 453387
loss: 1.0073678493499756,grad_norm: 0.7149941106808797, iteration: 453388
loss: 1.0204092264175415,grad_norm: 0.9620119649450954, iteration: 453389
loss: 1.0207562446594238,grad_norm: 0.7440257754355625, iteration: 453390
loss: 0.9958352446556091,grad_norm: 0.8201179660189046, iteration: 453391
loss: 0.9946548342704773,grad_norm: 0.8168673504455289, iteration: 453392
loss: 0.9989266395568848,grad_norm: 0.6202085794197302, iteration: 453393
loss: 1.027891993522644,grad_norm: 0.6849700302994449, iteration: 453394
loss: 1.059409260749817,grad_norm: 0.8462620364640299, iteration: 453395
loss: 1.039138674736023,grad_norm: 0.75660146937691, iteration: 453396
loss: 1.0121146440505981,grad_norm: 0.9999994111098516, iteration: 453397
loss: 0.9940325021743774,grad_norm: 0.9115069106704528, iteration: 453398
loss: 1.0437108278274536,grad_norm: 0.8157966307251334, iteration: 453399
loss: 0.9517750144004822,grad_norm: 0.8766568736715598, iteration: 453400
loss: 0.9824524521827698,grad_norm: 0.7150131769293896, iteration: 453401
loss: 1.1109020709991455,grad_norm: 0.8825857988226615, iteration: 453402
loss: 0.9732509851455688,grad_norm: 0.829056864168184, iteration: 453403
loss: 0.9868229627609253,grad_norm: 0.9646649137184332, iteration: 453404
loss: 1.0159070491790771,grad_norm: 0.9999989976841935, iteration: 453405
loss: 0.9657332301139832,grad_norm: 0.7293839975113717, iteration: 453406
loss: 1.0365793704986572,grad_norm: 0.6395023309406711, iteration: 453407
loss: 0.9719552993774414,grad_norm: 0.8243642233637271, iteration: 453408
loss: 0.953594446182251,grad_norm: 0.8390636163838222, iteration: 453409
loss: 1.0104928016662598,grad_norm: 0.7986069545213133, iteration: 453410
loss: 1.0131391286849976,grad_norm: 0.8270875906207599, iteration: 453411
loss: 0.9934501051902771,grad_norm: 0.6559221416329363, iteration: 453412
loss: 1.0207122564315796,grad_norm: 0.7127252825948174, iteration: 453413
loss: 0.9959406852722168,grad_norm: 0.8398524633212286, iteration: 453414
loss: 0.9741826057434082,grad_norm: 0.7376924029886789, iteration: 453415
loss: 0.9835412502288818,grad_norm: 0.6900261927335523, iteration: 453416
loss: 1.0289326906204224,grad_norm: 0.7248206901611601, iteration: 453417
loss: 1.008276104927063,grad_norm: 0.6811409024297865, iteration: 453418
loss: 0.9969331622123718,grad_norm: 0.954742013000806, iteration: 453419
loss: 1.038386583328247,grad_norm: 0.6306053200420152, iteration: 453420
loss: 0.9984016418457031,grad_norm: 0.9200525197382211, iteration: 453421
loss: 1.0386028289794922,grad_norm: 0.8955862537479259, iteration: 453422
loss: 1.0226260423660278,grad_norm: 0.8520139739581116, iteration: 453423
loss: 0.9581778049468994,grad_norm: 0.6119186132551506, iteration: 453424
loss: 1.0155154466629028,grad_norm: 0.6349636949976539, iteration: 453425
loss: 1.0151809453964233,grad_norm: 0.6813884184452133, iteration: 453426
loss: 1.0458745956420898,grad_norm: 0.8542108176067484, iteration: 453427
loss: 1.0421525239944458,grad_norm: 0.6859614576523811, iteration: 453428
loss: 0.9821249842643738,grad_norm: 0.6272734767996327, iteration: 453429
loss: 1.0165307521820068,grad_norm: 0.8497868128424906, iteration: 453430
loss: 0.9988181591033936,grad_norm: 0.6580174306740864, iteration: 453431
loss: 0.9943658709526062,grad_norm: 0.6730059326320833, iteration: 453432
loss: 1.0028244256973267,grad_norm: 0.9891503162390131, iteration: 453433
loss: 1.0069799423217773,grad_norm: 0.9774248401464083, iteration: 453434
loss: 1.032809853553772,grad_norm: 0.913863951999398, iteration: 453435
loss: 0.9606974720954895,grad_norm: 0.77511877668862, iteration: 453436
loss: 0.9697480797767639,grad_norm: 0.8161953059382466, iteration: 453437
loss: 1.0314160585403442,grad_norm: 0.7717737944331197, iteration: 453438
loss: 1.0226175785064697,grad_norm: 0.7162914843186812, iteration: 453439
loss: 1.0214518308639526,grad_norm: 0.9999996650379158, iteration: 453440
loss: 0.9956645965576172,grad_norm: 0.6462663721236843, iteration: 453441
loss: 0.9606897234916687,grad_norm: 0.7453746788622785, iteration: 453442
loss: 1.020660638809204,grad_norm: 0.9661216021682435, iteration: 453443
loss: 0.9801834225654602,grad_norm: 0.7455384536730034, iteration: 453444
loss: 0.9790012240409851,grad_norm: 0.8216357868176601, iteration: 453445
loss: 0.987082839012146,grad_norm: 0.6824778415359185, iteration: 453446
loss: 0.9906234741210938,grad_norm: 0.8315532962593031, iteration: 453447
loss: 0.9982990622520447,grad_norm: 0.7016816518877379, iteration: 453448
loss: 1.009787678718567,grad_norm: 0.999999515566578, iteration: 453449
loss: 1.0094505548477173,grad_norm: 0.9133298327147802, iteration: 453450
loss: 1.0263941287994385,grad_norm: 0.999999069715463, iteration: 453451
loss: 0.9733672738075256,grad_norm: 0.7058680309492138, iteration: 453452
loss: 0.9800509214401245,grad_norm: 0.9999989403282622, iteration: 453453
loss: 1.0117778778076172,grad_norm: 0.7633137199186533, iteration: 453454
loss: 0.9740020036697388,grad_norm: 0.6776188600163892, iteration: 453455
loss: 0.9772500991821289,grad_norm: 0.7394328512551469, iteration: 453456
loss: 0.9898704290390015,grad_norm: 0.7008520322497084, iteration: 453457
loss: 1.0139917135238647,grad_norm: 0.6046977876910845, iteration: 453458
loss: 0.9861559867858887,grad_norm: 0.697666780808848, iteration: 453459
loss: 0.9864622354507446,grad_norm: 0.9999991747904426, iteration: 453460
loss: 1.027681589126587,grad_norm: 0.6993569402865174, iteration: 453461
loss: 1.001118779182434,grad_norm: 0.7797451279580467, iteration: 453462
loss: 1.026313066482544,grad_norm: 0.7336358843504094, iteration: 453463
loss: 0.980602502822876,grad_norm: 0.7780254348873052, iteration: 453464
loss: 0.9912973046302795,grad_norm: 0.9999999567758935, iteration: 453465
loss: 0.9815970063209534,grad_norm: 0.661095740431587, iteration: 453466
loss: 1.0242104530334473,grad_norm: 0.9999998650098347, iteration: 453467
loss: 0.9837323427200317,grad_norm: 0.7685639552502449, iteration: 453468
loss: 0.9711380004882812,grad_norm: 0.7046208970135929, iteration: 453469
loss: 0.9488433599472046,grad_norm: 0.7688553707046141, iteration: 453470
loss: 0.9858798980712891,grad_norm: 0.7583992498972921, iteration: 453471
loss: 0.9717766642570496,grad_norm: 0.7814170698073076, iteration: 453472
loss: 0.9894711375236511,grad_norm: 0.7245333225235367, iteration: 453473
loss: 1.0052098035812378,grad_norm: 0.9246105003990216, iteration: 453474
loss: 0.9925323724746704,grad_norm: 0.8894289179147168, iteration: 453475
loss: 1.0104228258132935,grad_norm: 0.8498191706655639, iteration: 453476
loss: 0.9917353391647339,grad_norm: 0.9999996230352937, iteration: 453477
loss: 0.9767268896102905,grad_norm: 0.7811862392188766, iteration: 453478
loss: 0.9614946842193604,grad_norm: 0.8515208597715759, iteration: 453479
loss: 1.012095332145691,grad_norm: 0.7313349435715685, iteration: 453480
loss: 0.9835580587387085,grad_norm: 0.7519981815524899, iteration: 453481
loss: 1.0107946395874023,grad_norm: 0.6749328262657069, iteration: 453482
loss: 1.032266616821289,grad_norm: 0.8416577036045138, iteration: 453483
loss: 1.046970009803772,grad_norm: 0.7948702884795861, iteration: 453484
loss: 1.0298750400543213,grad_norm: 0.7880777694691308, iteration: 453485
loss: 1.018998384475708,grad_norm: 0.9999997158812418, iteration: 453486
loss: 1.0025516748428345,grad_norm: 0.8723511322642988, iteration: 453487
loss: 1.0119022130966187,grad_norm: 0.665012360213845, iteration: 453488
loss: 1.0170108079910278,grad_norm: 0.8315148406168188, iteration: 453489
loss: 1.021181344985962,grad_norm: 0.6351996453288967, iteration: 453490
loss: 1.0228049755096436,grad_norm: 0.8323430782962197, iteration: 453491
loss: 0.993039608001709,grad_norm: 0.7314727683298408, iteration: 453492
loss: 1.075104832649231,grad_norm: 0.9377262942694965, iteration: 453493
loss: 0.9930158853530884,grad_norm: 0.6824498283245504, iteration: 453494
loss: 1.0242456197738647,grad_norm: 0.8561345675540859, iteration: 453495
loss: 1.0133787393569946,grad_norm: 0.8583022112242764, iteration: 453496
loss: 0.950573742389679,grad_norm: 0.9807118121674009, iteration: 453497
loss: 0.9768049716949463,grad_norm: 0.7669845789620536, iteration: 453498
loss: 1.0209959745407104,grad_norm: 0.645538231207619, iteration: 453499
loss: 0.9857627153396606,grad_norm: 0.855078127382593, iteration: 453500
loss: 0.9974178671836853,grad_norm: 0.7820989500944949, iteration: 453501
loss: 0.9929892420768738,grad_norm: 0.7903190377650683, iteration: 453502
loss: 0.9800606966018677,grad_norm: 0.6694184991002383, iteration: 453503
loss: 1.0098412036895752,grad_norm: 0.8046087016817658, iteration: 453504
loss: 0.9520218968391418,grad_norm: 0.7783566055665971, iteration: 453505
loss: 1.0152510404586792,grad_norm: 0.7320996436990296, iteration: 453506
loss: 1.013651728630066,grad_norm: 0.8662678684836135, iteration: 453507
loss: 0.9871355295181274,grad_norm: 0.7445042050860773, iteration: 453508
loss: 1.0026179552078247,grad_norm: 0.6978327510960903, iteration: 453509
loss: 0.9665712714195251,grad_norm: 0.7175572704482083, iteration: 453510
loss: 0.9437787532806396,grad_norm: 0.7744235385334446, iteration: 453511
loss: 0.9877802133560181,grad_norm: 0.6653870920112096, iteration: 453512
loss: 1.0053375959396362,grad_norm: 0.6958206372743653, iteration: 453513
loss: 1.0930485725402832,grad_norm: 0.961839209194386, iteration: 453514
loss: 1.0046037435531616,grad_norm: 0.8222484256202497, iteration: 453515
loss: 1.0547661781311035,grad_norm: 0.6504785350113039, iteration: 453516
loss: 1.018467903137207,grad_norm: 0.6604496006231487, iteration: 453517
loss: 0.9910029172897339,grad_norm: 0.664336544398777, iteration: 453518
loss: 0.986272931098938,grad_norm: 0.8475259711029417, iteration: 453519
loss: 1.0102484226226807,grad_norm: 0.8189221825308566, iteration: 453520
loss: 1.072568416595459,grad_norm: 0.781680687770167, iteration: 453521
loss: 1.009017825126648,grad_norm: 0.7575276786025105, iteration: 453522
loss: 1.0379177331924438,grad_norm: 0.9999998818238937, iteration: 453523
loss: 1.0250252485275269,grad_norm: 0.6913047984025195, iteration: 453524
loss: 1.0040606260299683,grad_norm: 0.8085910998842585, iteration: 453525
loss: 0.9813748002052307,grad_norm: 0.6305914249253564, iteration: 453526
loss: 1.008318543434143,grad_norm: 0.6872900552798729, iteration: 453527
loss: 1.0314478874206543,grad_norm: 0.9999991183407829, iteration: 453528
loss: 1.0391216278076172,grad_norm: 0.9999999422365617, iteration: 453529
loss: 0.98480623960495,grad_norm: 0.6993435105364137, iteration: 453530
loss: 0.9840536713600159,grad_norm: 0.7973760291479534, iteration: 453531
loss: 0.9712170362472534,grad_norm: 0.8693735550303296, iteration: 453532
loss: 1.0018755197525024,grad_norm: 0.7108273899185662, iteration: 453533
loss: 1.032989740371704,grad_norm: 0.7780187979910862, iteration: 453534
loss: 0.9996141791343689,grad_norm: 0.8669128353338671, iteration: 453535
loss: 1.029252052307129,grad_norm: 0.7196635337181956, iteration: 453536
loss: 1.0077643394470215,grad_norm: 0.7317134046671575, iteration: 453537
loss: 0.9465480446815491,grad_norm: 0.7903010936542418, iteration: 453538
loss: 0.9854887127876282,grad_norm: 0.7366991822305389, iteration: 453539
loss: 1.0425124168395996,grad_norm: 0.8958463111799088, iteration: 453540
loss: 1.1021792888641357,grad_norm: 0.9999995374787151, iteration: 453541
loss: 0.9642433524131775,grad_norm: 0.8175058456056999, iteration: 453542
loss: 1.0284438133239746,grad_norm: 0.7238574222467422, iteration: 453543
loss: 0.9995936155319214,grad_norm: 0.923582213871143, iteration: 453544
loss: 0.993933379650116,grad_norm: 0.9999995222338213, iteration: 453545
loss: 1.0023926496505737,grad_norm: 0.8774643942327367, iteration: 453546
loss: 1.0267469882965088,grad_norm: 0.6373994381953004, iteration: 453547
loss: 0.9989343285560608,grad_norm: 0.6979568039751742, iteration: 453548
loss: 0.9861875772476196,grad_norm: 0.7299270322448699, iteration: 453549
loss: 1.0049247741699219,grad_norm: 0.9999997384858929, iteration: 453550
loss: 0.9552156329154968,grad_norm: 0.8746998037724034, iteration: 453551
loss: 0.9839029312133789,grad_norm: 0.7293212556173891, iteration: 453552
loss: 0.9798569083213806,grad_norm: 0.7924058354221514, iteration: 453553
loss: 1.001909852027893,grad_norm: 0.764388548861443, iteration: 453554
loss: 0.9796429872512817,grad_norm: 0.7703269131003636, iteration: 453555
loss: 0.9879420399665833,grad_norm: 0.8092709237544149, iteration: 453556
loss: 1.0192683935165405,grad_norm: 0.8590077241803449, iteration: 453557
loss: 1.001084327697754,grad_norm: 0.8988940280640653, iteration: 453558
loss: 1.0029504299163818,grad_norm: 0.7158926504207727, iteration: 453559
loss: 1.0014668703079224,grad_norm: 0.825621153476219, iteration: 453560
loss: 0.982483983039856,grad_norm: 0.7193376617163512, iteration: 453561
loss: 0.9717698097229004,grad_norm: 0.7175349158566622, iteration: 453562
loss: 1.036979079246521,grad_norm: 0.9999991461544545, iteration: 453563
loss: 0.9634426832199097,grad_norm: 0.7844519944198933, iteration: 453564
loss: 1.0027555227279663,grad_norm: 0.874060986814392, iteration: 453565
loss: 1.0003650188446045,grad_norm: 0.7170114547276037, iteration: 453566
loss: 1.00845468044281,grad_norm: 0.8364834028688747, iteration: 453567
loss: 1.0355221033096313,grad_norm: 0.847341991762545, iteration: 453568
loss: 1.0306336879730225,grad_norm: 0.8622450380219248, iteration: 453569
loss: 0.9858872294425964,grad_norm: 0.770769738681165, iteration: 453570
loss: 0.9847514629364014,grad_norm: 0.6481990386275821, iteration: 453571
loss: 0.9904718399047852,grad_norm: 0.6561331458990867, iteration: 453572
loss: 1.0006107091903687,grad_norm: 0.7951503981337753, iteration: 453573
loss: 0.9896275997161865,grad_norm: 0.9053371238189032, iteration: 453574
loss: 1.003234624862671,grad_norm: 0.8509741357083853, iteration: 453575
loss: 1.0202585458755493,grad_norm: 0.6759216694802657, iteration: 453576
loss: 0.9955171942710876,grad_norm: 0.9242988670526735, iteration: 453577
loss: 0.9751622676849365,grad_norm: 0.8141877665447069, iteration: 453578
loss: 0.9887351989746094,grad_norm: 0.7333865903556579, iteration: 453579
loss: 0.9631297588348389,grad_norm: 0.8477641228396379, iteration: 453580
loss: 0.9920923709869385,grad_norm: 0.7059803384789439, iteration: 453581
loss: 0.9890478253364563,grad_norm: 0.7418331513788464, iteration: 453582
loss: 1.000754714012146,grad_norm: 0.8055843557621839, iteration: 453583
loss: 0.976895272731781,grad_norm: 0.7749518292812962, iteration: 453584
loss: 0.9766674637794495,grad_norm: 0.6774944719438837, iteration: 453585
loss: 0.9998621344566345,grad_norm: 0.9115967202235766, iteration: 453586
loss: 1.0185438394546509,grad_norm: 0.7377157609534077, iteration: 453587
loss: 1.021377682685852,grad_norm: 0.7501411501349119, iteration: 453588
loss: 1.0173102617263794,grad_norm: 0.6878243644068239, iteration: 453589
loss: 0.9832103848457336,grad_norm: 0.8899349083883301, iteration: 453590
loss: 1.00493586063385,grad_norm: 0.690328825961787, iteration: 453591
loss: 0.9759109020233154,grad_norm: 0.9999991822243535, iteration: 453592
loss: 0.9799959063529968,grad_norm: 0.8208902964906681, iteration: 453593
loss: 0.9574905037879944,grad_norm: 0.8373470142229088, iteration: 453594
loss: 1.0068999528884888,grad_norm: 0.6391929304989784, iteration: 453595
loss: 0.9891065359115601,grad_norm: 0.7840270269702888, iteration: 453596
loss: 1.0095999240875244,grad_norm: 0.7449025838240718, iteration: 453597
loss: 0.9637011289596558,grad_norm: 0.7914025277106895, iteration: 453598
loss: 1.021523118019104,grad_norm: 0.7539331752232986, iteration: 453599
loss: 1.0661962032318115,grad_norm: 0.9999992665639644, iteration: 453600
loss: 0.9721536636352539,grad_norm: 0.7756893431204405, iteration: 453601
loss: 0.9968004822731018,grad_norm: 0.8283022369479851, iteration: 453602
loss: 1.0125129222869873,grad_norm: 0.7870367613151534, iteration: 453603
loss: 0.9968885779380798,grad_norm: 0.7779866434281577, iteration: 453604
loss: 0.991715669631958,grad_norm: 0.6232780442237409, iteration: 453605
loss: 0.9824583530426025,grad_norm: 0.694616950988142, iteration: 453606
loss: 1.0230741500854492,grad_norm: 0.8094250529009555, iteration: 453607
loss: 0.9748339653015137,grad_norm: 0.79719347096741, iteration: 453608
loss: 0.9824758172035217,grad_norm: 0.5939244153926194, iteration: 453609
loss: 0.9500194787979126,grad_norm: 0.7050138238660787, iteration: 453610
loss: 0.9973671436309814,grad_norm: 0.8354600904368662, iteration: 453611
loss: 0.9938246011734009,grad_norm: 0.9999990543601579, iteration: 453612
loss: 0.998744785785675,grad_norm: 0.7465296001892552, iteration: 453613
loss: 1.0256580114364624,grad_norm: 0.6994459839208774, iteration: 453614
loss: 1.0084363222122192,grad_norm: 0.7805462063601974, iteration: 453615
loss: 1.0051740407943726,grad_norm: 0.7050584735657575, iteration: 453616
loss: 0.9783144593238831,grad_norm: 0.8224003235034758, iteration: 453617
loss: 0.9770522713661194,grad_norm: 0.8308048704112243, iteration: 453618
loss: 0.9899763464927673,grad_norm: 0.8654491696929694, iteration: 453619
loss: 1.002077341079712,grad_norm: 0.7241449200780627, iteration: 453620
loss: 1.0105708837509155,grad_norm: 0.6780878594268479, iteration: 453621
loss: 1.013157844543457,grad_norm: 0.7644716842096307, iteration: 453622
loss: 1.0148051977157593,grad_norm: 0.7622587184465256, iteration: 453623
loss: 0.9895786046981812,grad_norm: 0.9999991011743629, iteration: 453624
loss: 1.0054078102111816,grad_norm: 0.9085194759782815, iteration: 453625
loss: 0.9835671186447144,grad_norm: 0.6116513691177393, iteration: 453626
loss: 0.9941447973251343,grad_norm: 0.9806792576427539, iteration: 453627
loss: 0.9867034554481506,grad_norm: 0.8701393920999924, iteration: 453628
loss: 0.9767091870307922,grad_norm: 0.7923154840031618, iteration: 453629
loss: 1.0124794244766235,grad_norm: 0.9999998429689498, iteration: 453630
loss: 0.9905799627304077,grad_norm: 0.893262074065923, iteration: 453631
loss: 0.9734665155410767,grad_norm: 0.8789789182706245, iteration: 453632
loss: 0.9828007817268372,grad_norm: 0.6640245293287979, iteration: 453633
loss: 1.0088976621627808,grad_norm: 0.7050754888513939, iteration: 453634
loss: 1.0128926038742065,grad_norm: 0.7783606305127417, iteration: 453635
loss: 0.9956809878349304,grad_norm: 0.6598660387976283, iteration: 453636
loss: 0.9648423194885254,grad_norm: 0.7897897077562606, iteration: 453637
loss: 0.9895245432853699,grad_norm: 0.6325801072255844, iteration: 453638
loss: 0.9995889663696289,grad_norm: 0.7279326793346287, iteration: 453639
loss: 0.9939239025115967,grad_norm: 0.797144590601531, iteration: 453640
loss: 1.0156482458114624,grad_norm: 0.7844215909140247, iteration: 453641
loss: 0.9930317401885986,grad_norm: 0.7046010750015512, iteration: 453642
loss: 0.9987717270851135,grad_norm: 0.7393996911314242, iteration: 453643
loss: 1.0022855997085571,grad_norm: 0.715776402371877, iteration: 453644
loss: 1.057699203491211,grad_norm: 0.9999996247058317, iteration: 453645
loss: 0.9839963912963867,grad_norm: 0.72210749913614, iteration: 453646
loss: 0.9845253229141235,grad_norm: 0.7772684884181988, iteration: 453647
loss: 0.9975442886352539,grad_norm: 0.7844820101394668, iteration: 453648
loss: 1.004393219947815,grad_norm: 0.6553755675405425, iteration: 453649
loss: 0.9943737387657166,grad_norm: 0.6638954083751425, iteration: 453650
loss: 0.9976021647453308,grad_norm: 0.6819528403530357, iteration: 453651
loss: 1.0213735103607178,grad_norm: 0.7775476003597995, iteration: 453652
loss: 1.0085028409957886,grad_norm: 0.7251607368604782, iteration: 453653
loss: 0.9952961802482605,grad_norm: 0.81602580432643, iteration: 453654
loss: 1.0143158435821533,grad_norm: 0.9999992199255688, iteration: 453655
loss: 0.9926603436470032,grad_norm: 0.8281583557430138, iteration: 453656
loss: 1.0130029916763306,grad_norm: 0.9441760283260319, iteration: 453657
loss: 1.019562005996704,grad_norm: 0.7254763536978808, iteration: 453658
loss: 0.9871472120285034,grad_norm: 0.9999991119185682, iteration: 453659
loss: 0.9937416315078735,grad_norm: 0.7275707345750376, iteration: 453660
loss: 0.9875185489654541,grad_norm: 0.7468421896472675, iteration: 453661
loss: 0.9772974252700806,grad_norm: 0.8255554819671485, iteration: 453662
loss: 1.016459584236145,grad_norm: 0.8038836064857185, iteration: 453663
loss: 0.97890704870224,grad_norm: 0.8553600620864735, iteration: 453664
loss: 1.0411852598190308,grad_norm: 0.8734453007403401, iteration: 453665
loss: 0.993866503238678,grad_norm: 0.8097137212944191, iteration: 453666
loss: 0.9726381897926331,grad_norm: 0.6379760692761138, iteration: 453667
loss: 1.0269124507904053,grad_norm: 0.786082239287785, iteration: 453668
loss: 0.9999310374259949,grad_norm: 0.6598808733515906, iteration: 453669
loss: 0.9940935969352722,grad_norm: 0.7980643200935691, iteration: 453670
loss: 0.9941209554672241,grad_norm: 0.724223484745965, iteration: 453671
loss: 1.0030503273010254,grad_norm: 0.7648778632602548, iteration: 453672
loss: 0.9723438620567322,grad_norm: 0.8110261952779183, iteration: 453673
loss: 0.9609816670417786,grad_norm: 0.9999995962909168, iteration: 453674
loss: 0.997550368309021,grad_norm: 0.8446742118768819, iteration: 453675
loss: 1.0042024850845337,grad_norm: 0.9999999347277235, iteration: 453676
loss: 1.02274751663208,grad_norm: 0.7774116817718454, iteration: 453677
loss: 0.9862528443336487,grad_norm: 0.7481533968513242, iteration: 453678
loss: 1.022621750831604,grad_norm: 0.6964346719651706, iteration: 453679
loss: 1.012464165687561,grad_norm: 0.8389399311257996, iteration: 453680
loss: 1.0020884275436401,grad_norm: 0.7444668952474512, iteration: 453681
loss: 1.0090651512145996,grad_norm: 0.8072587884366065, iteration: 453682
loss: 0.9660293459892273,grad_norm: 0.708840940763511, iteration: 453683
loss: 0.9712153077125549,grad_norm: 0.9296922699217154, iteration: 453684
loss: 0.995501697063446,grad_norm: 0.9534448944922633, iteration: 453685
loss: 1.009535789489746,grad_norm: 0.9174108687318612, iteration: 453686
loss: 1.009192943572998,grad_norm: 0.8441396047384322, iteration: 453687
loss: 1.0156874656677246,grad_norm: 0.6877812247297306, iteration: 453688
loss: 1.025748610496521,grad_norm: 0.8600512755145341, iteration: 453689
loss: 0.9996369481086731,grad_norm: 0.7101031965407949, iteration: 453690
loss: 0.9816378355026245,grad_norm: 0.9999995392126441, iteration: 453691
loss: 1.039476752281189,grad_norm: 0.7035380693866861, iteration: 453692
loss: 1.013566493988037,grad_norm: 0.7437072379783071, iteration: 453693
loss: 0.9747772812843323,grad_norm: 0.7471163711987837, iteration: 453694
loss: 1.0066560506820679,grad_norm: 0.6305723736610381, iteration: 453695
loss: 1.0204013586044312,grad_norm: 0.7092233755565474, iteration: 453696
loss: 1.0179036855697632,grad_norm: 0.8458905922510851, iteration: 453697
loss: 0.9886792898178101,grad_norm: 0.9999991072196847, iteration: 453698
loss: 1.017435908317566,grad_norm: 0.9999996301586062, iteration: 453699
loss: 1.0084134340286255,grad_norm: 0.6666618026875796, iteration: 453700
loss: 0.9696665406227112,grad_norm: 0.8419779736970406, iteration: 453701
loss: 0.9429786205291748,grad_norm: 0.8514183194782161, iteration: 453702
loss: 1.009647250175476,grad_norm: 0.8317826931813266, iteration: 453703
loss: 1.002565622329712,grad_norm: 0.6845583090177774, iteration: 453704
loss: 1.0040242671966553,grad_norm: 0.8126064889220516, iteration: 453705
loss: 1.01274836063385,grad_norm: 0.8983780504532031, iteration: 453706
loss: 0.9864169955253601,grad_norm: 0.8044933547847682, iteration: 453707
loss: 1.0087016820907593,grad_norm: 0.6045512953478683, iteration: 453708
loss: 1.0650767087936401,grad_norm: 0.9999998681623731, iteration: 453709
loss: 0.9779994487762451,grad_norm: 0.7619461613945023, iteration: 453710
loss: 0.9890518188476562,grad_norm: 0.9013515456152041, iteration: 453711
loss: 1.0116573572158813,grad_norm: 0.8947615997131018, iteration: 453712
loss: 0.9766606688499451,grad_norm: 0.777902320121402, iteration: 453713
loss: 1.0065866708755493,grad_norm: 0.9999994804660106, iteration: 453714
loss: 1.0021347999572754,grad_norm: 0.6900499787597966, iteration: 453715
loss: 1.0112587213516235,grad_norm: 0.6786600460533273, iteration: 453716
loss: 1.0402249097824097,grad_norm: 0.9999994431858671, iteration: 453717
loss: 0.9803453683853149,grad_norm: 0.7507255162030756, iteration: 453718
loss: 0.9996717572212219,grad_norm: 0.6924335321468491, iteration: 453719
loss: 0.9993413686752319,grad_norm: 0.7200990578048378, iteration: 453720
loss: 1.0183875560760498,grad_norm: 0.8218607753856307, iteration: 453721
loss: 0.9467374086380005,grad_norm: 0.6276852554793926, iteration: 453722
loss: 1.040417194366455,grad_norm: 0.7713289271763154, iteration: 453723
loss: 0.9744012355804443,grad_norm: 0.6831673265958117, iteration: 453724
loss: 0.9817408919334412,grad_norm: 0.799294583683977, iteration: 453725
loss: 0.9928025603294373,grad_norm: 0.7418189581795184, iteration: 453726
loss: 0.9577056765556335,grad_norm: 0.7286693081388453, iteration: 453727
loss: 1.0521103143692017,grad_norm: 0.9999991102280427, iteration: 453728
loss: 1.0036298036575317,grad_norm: 0.7772808385832825, iteration: 453729
loss: 1.0888780355453491,grad_norm: 0.9999992810089291, iteration: 453730
loss: 0.9999982118606567,grad_norm: 0.566511251738028, iteration: 453731
loss: 0.9827166199684143,grad_norm: 0.782067266929994, iteration: 453732
loss: 0.9884968400001526,grad_norm: 0.720174794383672, iteration: 453733
loss: 0.993817150592804,grad_norm: 0.8596255136175412, iteration: 453734
loss: 0.9945849776268005,grad_norm: 0.6688193980253194, iteration: 453735
loss: 0.9864826798439026,grad_norm: 0.7467078849033505, iteration: 453736
loss: 0.9884719848632812,grad_norm: 0.6280523096050001, iteration: 453737
loss: 1.0154210329055786,grad_norm: 0.8038271735399979, iteration: 453738
loss: 1.0343167781829834,grad_norm: 0.8907352990029247, iteration: 453739
loss: 0.9848664999008179,grad_norm: 0.7699410541745209, iteration: 453740
loss: 0.9670467376708984,grad_norm: 0.7961360532310976, iteration: 453741
loss: 1.0047377347946167,grad_norm: 0.7353043397481246, iteration: 453742
loss: 0.9696460366249084,grad_norm: 0.6925408100657838, iteration: 453743
loss: 0.9805130362510681,grad_norm: 0.7124616126650146, iteration: 453744
loss: 0.9704185724258423,grad_norm: 0.7115786375346193, iteration: 453745
loss: 1.0062181949615479,grad_norm: 0.7117461529660357, iteration: 453746
loss: 0.997681736946106,grad_norm: 0.73944624382646, iteration: 453747
loss: 1.1388391256332397,grad_norm: 0.9999998004276305, iteration: 453748
loss: 0.9835498332977295,grad_norm: 0.7151775498069459, iteration: 453749
loss: 0.968158483505249,grad_norm: 0.825387976003342, iteration: 453750
loss: 0.9691664576530457,grad_norm: 0.6931508996557263, iteration: 453751
loss: 1.3770217895507812,grad_norm: 0.9999996872443805, iteration: 453752
loss: 0.9988217353820801,grad_norm: 0.742273704780922, iteration: 453753
loss: 0.968920886516571,grad_norm: 0.757366532716857, iteration: 453754
loss: 1.0079741477966309,grad_norm: 0.762501909514368, iteration: 453755
loss: 0.9684579372406006,grad_norm: 0.900731547001724, iteration: 453756
loss: 1.0124289989471436,grad_norm: 0.6426023854071986, iteration: 453757
loss: 1.0045013427734375,grad_norm: 0.7498747789350488, iteration: 453758
loss: 1.0227984189987183,grad_norm: 0.8538653075522681, iteration: 453759
loss: 1.0055793523788452,grad_norm: 0.654475601487635, iteration: 453760
loss: 1.073776364326477,grad_norm: 0.9999989698945817, iteration: 453761
loss: 0.9774250984191895,grad_norm: 0.792664969307082, iteration: 453762
loss: 0.9993669390678406,grad_norm: 0.7000517618113559, iteration: 453763
loss: 0.9962210655212402,grad_norm: 0.7443077570904911, iteration: 453764
loss: 1.0405678749084473,grad_norm: 0.9999990765795603, iteration: 453765
loss: 0.9775223731994629,grad_norm: 0.6987890323116794, iteration: 453766
loss: 0.9919607043266296,grad_norm: 0.932649300464048, iteration: 453767
loss: 1.0114850997924805,grad_norm: 0.6055278678292552, iteration: 453768
loss: 0.9989356994628906,grad_norm: 0.8565354515879249, iteration: 453769
loss: 1.0116509199142456,grad_norm: 0.9999994260452756, iteration: 453770
loss: 1.001274585723877,grad_norm: 0.9999994112510774, iteration: 453771
loss: 1.0407646894454956,grad_norm: 0.7887239609736829, iteration: 453772
loss: 1.0452169179916382,grad_norm: 0.9166651060212022, iteration: 453773
loss: 1.0179721117019653,grad_norm: 0.6267511973866567, iteration: 453774
loss: 0.9993931651115417,grad_norm: 0.9812027215429533, iteration: 453775
loss: 0.9802203178405762,grad_norm: 0.826615264954517, iteration: 453776
loss: 1.0314757823944092,grad_norm: 0.9999996276301013, iteration: 453777
loss: 1.0352495908737183,grad_norm: 0.9490384857805089, iteration: 453778
loss: 0.9955629110336304,grad_norm: 0.6745597634916644, iteration: 453779
loss: 0.9716165661811829,grad_norm: 0.8649484428172209, iteration: 453780
loss: 1.020822286605835,grad_norm: 0.6639136907139829, iteration: 453781
loss: 1.025524616241455,grad_norm: 0.9999992644089806, iteration: 453782
loss: 1.0035462379455566,grad_norm: 0.9342723065744134, iteration: 453783
loss: 0.9885413646697998,grad_norm: 0.7119612637766056, iteration: 453784
loss: 1.0220931768417358,grad_norm: 0.6610978076953496, iteration: 453785
loss: 0.9828808307647705,grad_norm: 0.7576887117390585, iteration: 453786
loss: 0.9833353161811829,grad_norm: 0.7669170209455386, iteration: 453787
loss: 0.9823388457298279,grad_norm: 0.8363608644777166, iteration: 453788
loss: 0.997576117515564,grad_norm: 0.8712345275829555, iteration: 453789
loss: 0.9700107574462891,grad_norm: 0.80233532070508, iteration: 453790
loss: 1.007217526435852,grad_norm: 0.883743224472082, iteration: 453791
loss: 1.020841121673584,grad_norm: 0.6528588764882649, iteration: 453792
loss: 0.9900652170181274,grad_norm: 0.7628065717280432, iteration: 453793
loss: 0.9964005947113037,grad_norm: 0.7067652127114809, iteration: 453794
loss: 0.9989516139030457,grad_norm: 0.9125378086856677, iteration: 453795
loss: 0.9788492321968079,grad_norm: 0.8723670804136076, iteration: 453796
loss: 0.9909579157829285,grad_norm: 0.8360868089395707, iteration: 453797
loss: 1.0288407802581787,grad_norm: 0.7489569679208927, iteration: 453798
loss: 0.9960712790489197,grad_norm: 0.9511356656680225, iteration: 453799
loss: 0.9778206944465637,grad_norm: 0.8423004312865227, iteration: 453800
loss: 1.026177167892456,grad_norm: 0.7266425409758187, iteration: 453801
loss: 1.0255926847457886,grad_norm: 0.7552837785452571, iteration: 453802
loss: 0.9697104692459106,grad_norm: 0.779326142029192, iteration: 453803
loss: 1.0244579315185547,grad_norm: 0.8473439852225885, iteration: 453804
loss: 0.9981725215911865,grad_norm: 0.7628260403491073, iteration: 453805
loss: 1.013162612915039,grad_norm: 0.7124199056204212, iteration: 453806
loss: 0.9989443421363831,grad_norm: 0.6971881402888396, iteration: 453807
loss: 0.9728989005088806,grad_norm: 0.6662528114755568, iteration: 453808
loss: 1.0060042142868042,grad_norm: 0.6861495430055781, iteration: 453809
loss: 0.9977996945381165,grad_norm: 0.75993205979715, iteration: 453810
loss: 1.0230028629302979,grad_norm: 0.9999990987223523, iteration: 453811
loss: 0.9994012713432312,grad_norm: 0.6729645704829358, iteration: 453812
loss: 1.0128058195114136,grad_norm: 0.856518474463004, iteration: 453813
loss: 0.985499918460846,grad_norm: 0.5913208419504509, iteration: 453814
loss: 1.0950586795806885,grad_norm: 0.9732514158622192, iteration: 453815
loss: 1.0120269060134888,grad_norm: 0.999999451729579, iteration: 453816
loss: 0.9921841025352478,grad_norm: 0.7878615627072332, iteration: 453817
loss: 1.012190580368042,grad_norm: 0.7446134004289582, iteration: 453818
loss: 0.9885457754135132,grad_norm: 0.9198624752778419, iteration: 453819
loss: 1.0308589935302734,grad_norm: 0.7351430066266137, iteration: 453820
loss: 1.029189109802246,grad_norm: 0.9999991305795235, iteration: 453821
loss: 0.9680100083351135,grad_norm: 0.6659037467113711, iteration: 453822
loss: 1.0602028369903564,grad_norm: 0.7540814501394808, iteration: 453823
loss: 1.0083705186843872,grad_norm: 0.9320951847514616, iteration: 453824
loss: 0.9982233643531799,grad_norm: 0.9999995469660844, iteration: 453825
loss: 1.0035696029663086,grad_norm: 0.7641661735295128, iteration: 453826
loss: 0.9852033257484436,grad_norm: 0.8310945290972782, iteration: 453827
loss: 1.0366747379302979,grad_norm: 0.8002097003239886, iteration: 453828
loss: 0.9987846612930298,grad_norm: 0.8464730168850598, iteration: 453829
loss: 1.0020335912704468,grad_norm: 0.6795004508526017, iteration: 453830
loss: 0.9971176981925964,grad_norm: 0.7527315944747806, iteration: 453831
loss: 1.0135598182678223,grad_norm: 0.999999094901279, iteration: 453832
loss: 1.0229898691177368,grad_norm: 0.729967764749839, iteration: 453833
loss: 0.979367733001709,grad_norm: 0.7029053889965785, iteration: 453834
loss: 0.9826284646987915,grad_norm: 0.7521727004314805, iteration: 453835
loss: 1.0136581659317017,grad_norm: 0.7866800264306809, iteration: 453836
loss: 1.0250837802886963,grad_norm: 0.9236627981819795, iteration: 453837
loss: 0.9873126745223999,grad_norm: 0.8214337560920179, iteration: 453838
loss: 1.0270167589187622,grad_norm: 0.9451344726050689, iteration: 453839
loss: 1.0240530967712402,grad_norm: 0.6756649179798165, iteration: 453840
loss: 1.0374503135681152,grad_norm: 0.8645569398862799, iteration: 453841
loss: 1.0022262334823608,grad_norm: 0.7697095187603945, iteration: 453842
loss: 1.0048719644546509,grad_norm: 0.7822953049033959, iteration: 453843
loss: 0.9819451570510864,grad_norm: 0.8548794473298036, iteration: 453844
loss: 1.004648208618164,grad_norm: 0.7733222304314179, iteration: 453845
loss: 0.9639293551445007,grad_norm: 0.7085756653088487, iteration: 453846
loss: 0.9656254053115845,grad_norm: 0.7736767996360022, iteration: 453847
loss: 1.0020042657852173,grad_norm: 0.7895290792700946, iteration: 453848
loss: 0.9973312616348267,grad_norm: 0.730306672697929, iteration: 453849
loss: 0.969810426235199,grad_norm: 0.7339008928671241, iteration: 453850
loss: 0.9875258803367615,grad_norm: 0.7944831945247459, iteration: 453851
loss: 0.9583790898323059,grad_norm: 0.8166816956400845, iteration: 453852
loss: 0.9601156115531921,grad_norm: 0.9439967710381029, iteration: 453853
loss: 0.9950319528579712,grad_norm: 0.8170606697373746, iteration: 453854
loss: 1.0020458698272705,grad_norm: 0.7995734252792219, iteration: 453855
loss: 0.991586446762085,grad_norm: 0.746706642320237, iteration: 453856
loss: 0.9836294651031494,grad_norm: 0.8032953223620999, iteration: 453857
loss: 1.0189976692199707,grad_norm: 0.6196294960132936, iteration: 453858
loss: 0.9845231175422668,grad_norm: 0.7461363112529387, iteration: 453859
loss: 1.004446029663086,grad_norm: 0.7597859559928928, iteration: 453860
loss: 0.9994610548019409,grad_norm: 0.7853754692565439, iteration: 453861
loss: 0.9761988520622253,grad_norm: 0.9521403381519953, iteration: 453862
loss: 1.037270188331604,grad_norm: 0.7984584599439637, iteration: 453863
loss: 0.9907177686691284,grad_norm: 0.7204122200982486, iteration: 453864
loss: 0.9774413108825684,grad_norm: 0.7620561292713536, iteration: 453865
loss: 1.0111751556396484,grad_norm: 0.8348977830702443, iteration: 453866
loss: 1.0061122179031372,grad_norm: 0.8014659427535628, iteration: 453867
loss: 0.9747224450111389,grad_norm: 0.7503861387517516, iteration: 453868
loss: 1.027601957321167,grad_norm: 0.7646564678249812, iteration: 453869
loss: 0.9908598065376282,grad_norm: 0.6893406993724609, iteration: 453870
loss: 1.0017844438552856,grad_norm: 0.7054711126465317, iteration: 453871
loss: 1.0114250183105469,grad_norm: 0.739302624232686, iteration: 453872
loss: 0.9883180856704712,grad_norm: 0.8159072765967859, iteration: 453873
loss: 0.9921485781669617,grad_norm: 0.6651320153309421, iteration: 453874
loss: 1.043427586555481,grad_norm: 0.6948033942470537, iteration: 453875
loss: 0.9743878245353699,grad_norm: 0.7033452981927395, iteration: 453876
loss: 0.9759619235992432,grad_norm: 0.6689045779527194, iteration: 453877
loss: 1.0586771965026855,grad_norm: 0.9474528384473472, iteration: 453878
loss: 0.9714651107788086,grad_norm: 0.5974055821967718, iteration: 453879
loss: 1.012522578239441,grad_norm: 0.6858659838987718, iteration: 453880
loss: 1.1430538892745972,grad_norm: 0.9999997702234994, iteration: 453881
loss: 0.9895827770233154,grad_norm: 0.7395532411159718, iteration: 453882
loss: 0.9902172088623047,grad_norm: 0.742028357247325, iteration: 453883
loss: 1.0171840190887451,grad_norm: 0.7529477633062968, iteration: 453884
loss: 1.0056713819503784,grad_norm: 0.8542919108077466, iteration: 453885
loss: 0.9791860580444336,grad_norm: 0.754512568376523, iteration: 453886
loss: 0.9738197922706604,grad_norm: 0.7307850867058514, iteration: 453887
loss: 0.9948363900184631,grad_norm: 0.6516258862652358, iteration: 453888
loss: 0.9783070683479309,grad_norm: 0.713616910330826, iteration: 453889
loss: 0.999207615852356,grad_norm: 0.8274678470445116, iteration: 453890
loss: 0.9836780428886414,grad_norm: 0.7044509707876185, iteration: 453891
loss: 0.9982549548149109,grad_norm: 0.7739417896009999, iteration: 453892
loss: 0.986592710018158,grad_norm: 0.9440058740167467, iteration: 453893
loss: 0.9948316216468811,grad_norm: 0.923224778332121, iteration: 453894
loss: 0.9898567795753479,grad_norm: 0.7377318290592035, iteration: 453895
loss: 1.0248470306396484,grad_norm: 0.7311047266917052, iteration: 453896
loss: 0.9935770630836487,grad_norm: 0.7905811426288015, iteration: 453897
loss: 1.0187879800796509,grad_norm: 0.69766531987416, iteration: 453898
loss: 0.9878304600715637,grad_norm: 0.7201142137401212, iteration: 453899
loss: 0.991517186164856,grad_norm: 0.7168839762491032, iteration: 453900
loss: 1.0405148267745972,grad_norm: 0.8260267523619478, iteration: 453901
loss: 1.0257681608200073,grad_norm: 0.8580085070445144, iteration: 453902
loss: 0.9811506867408752,grad_norm: 0.6789637352861102, iteration: 453903
loss: 0.9806708097457886,grad_norm: 0.8989502641696067, iteration: 453904
loss: 0.9874770045280457,grad_norm: 0.6980517669435964, iteration: 453905
loss: 0.9767035841941833,grad_norm: 0.8927097594386912, iteration: 453906
loss: 0.9877237677574158,grad_norm: 0.7651761877218851, iteration: 453907
loss: 1.0303443670272827,grad_norm: 0.7258224620420499, iteration: 453908
loss: 1.0991584062576294,grad_norm: 0.9999991346467199, iteration: 453909
loss: 0.9986941814422607,grad_norm: 0.7209015967437656, iteration: 453910
loss: 0.9885528683662415,grad_norm: 0.8998202655941054, iteration: 453911
loss: 1.006973385810852,grad_norm: 0.7675047093409528, iteration: 453912
loss: 1.0071086883544922,grad_norm: 0.7171579749442021, iteration: 453913
loss: 1.040617823600769,grad_norm: 0.9999997388000288, iteration: 453914
loss: 1.0043147802352905,grad_norm: 0.8197941833584103, iteration: 453915
loss: 0.997947096824646,grad_norm: 0.8013526202890408, iteration: 453916
loss: 0.9807770848274231,grad_norm: 0.8610992950897092, iteration: 453917
loss: 0.9678766131401062,grad_norm: 0.7326824267859597, iteration: 453918
loss: 0.9786832928657532,grad_norm: 0.815505525721008, iteration: 453919
loss: 1.0059967041015625,grad_norm: 0.8255169493266389, iteration: 453920
loss: 0.9822924733161926,grad_norm: 0.6371644102558943, iteration: 453921
loss: 1.003011703491211,grad_norm: 0.688163163926488, iteration: 453922
loss: 1.0324547290802002,grad_norm: 0.7198417742314528, iteration: 453923
loss: 1.0175037384033203,grad_norm: 0.7328604669180496, iteration: 453924
loss: 1.0231610536575317,grad_norm: 0.9999996417286701, iteration: 453925
loss: 1.004931926727295,grad_norm: 0.8572059320752285, iteration: 453926
loss: 1.0805039405822754,grad_norm: 0.8178407124601327, iteration: 453927
loss: 0.9942242503166199,grad_norm: 0.635520454632179, iteration: 453928
loss: 1.0401725769042969,grad_norm: 0.6956527394861587, iteration: 453929
loss: 1.0015912055969238,grad_norm: 0.9025171511185693, iteration: 453930
loss: 1.0245497226715088,grad_norm: 0.8758853523794847, iteration: 453931
loss: 0.9991803169250488,grad_norm: 0.9078959814543371, iteration: 453932
loss: 1.0003135204315186,grad_norm: 0.7636937003564035, iteration: 453933
loss: 0.9827969074249268,grad_norm: 0.7240569553091256, iteration: 453934
loss: 0.9845975041389465,grad_norm: 0.6965716841658994, iteration: 453935
loss: 1.0222707986831665,grad_norm: 0.7286025048674205, iteration: 453936
loss: 1.0039870738983154,grad_norm: 0.7978743940943243, iteration: 453937
loss: 1.1068390607833862,grad_norm: 0.8518953540608478, iteration: 453938
loss: 0.9841412901878357,grad_norm: 0.7247996894598328, iteration: 453939
loss: 1.033400297164917,grad_norm: 0.8316111069687009, iteration: 453940
loss: 1.013086199760437,grad_norm: 0.8442916923071846, iteration: 453941
loss: 1.0078099966049194,grad_norm: 0.7884843860474939, iteration: 453942
loss: 1.0251120328903198,grad_norm: 0.8664429116969348, iteration: 453943
loss: 1.0172559022903442,grad_norm: 0.7654288834575179, iteration: 453944
loss: 1.01279878616333,grad_norm: 0.6793771078725113, iteration: 453945
loss: 0.9839431047439575,grad_norm: 0.7404805567113651, iteration: 453946
loss: 1.0106050968170166,grad_norm: 0.7945002842352626, iteration: 453947
loss: 0.9963509440422058,grad_norm: 0.7088479424750931, iteration: 453948
loss: 1.0194721221923828,grad_norm: 0.7618575451416437, iteration: 453949
loss: 0.9981917142868042,grad_norm: 0.9062273504595848, iteration: 453950
loss: 0.9957407116889954,grad_norm: 0.7877125717379739, iteration: 453951
loss: 0.9953650832176208,grad_norm: 0.7562272100295386, iteration: 453952
loss: 1.009893536567688,grad_norm: 0.952202725982827, iteration: 453953
loss: 0.9816608428955078,grad_norm: 0.6899190138793826, iteration: 453954
loss: 0.9824906587600708,grad_norm: 0.8281357396082216, iteration: 453955
loss: 0.9743641018867493,grad_norm: 0.8411165453603389, iteration: 453956
loss: 1.047284722328186,grad_norm: 0.8177040837268805, iteration: 453957
loss: 1.003127932548523,grad_norm: 0.8798929357401445, iteration: 453958
loss: 1.0238122940063477,grad_norm: 0.7904099589142001, iteration: 453959
loss: 1.0193511247634888,grad_norm: 0.7993451055308505, iteration: 453960
loss: 0.9965845346450806,grad_norm: 0.7924043908897984, iteration: 453961
loss: 1.0035756826400757,grad_norm: 0.8517499158850105, iteration: 453962
loss: 0.9960442185401917,grad_norm: 0.7627108029691944, iteration: 453963
loss: 0.996789276599884,grad_norm: 0.7388094387000117, iteration: 453964
loss: 1.0901901721954346,grad_norm: 0.7732913735315033, iteration: 453965
loss: 0.9876933693885803,grad_norm: 0.7180656628554, iteration: 453966
loss: 1.0030075311660767,grad_norm: 0.9035683434602005, iteration: 453967
loss: 0.993810772895813,grad_norm: 0.7968549517541079, iteration: 453968
loss: 1.0129361152648926,grad_norm: 0.6253105255051301, iteration: 453969
loss: 1.0050265789031982,grad_norm: 0.8056782367017987, iteration: 453970
loss: 1.0035619735717773,grad_norm: 0.7574875693885977, iteration: 453971
loss: 0.9947752952575684,grad_norm: 0.9077423114390769, iteration: 453972
loss: 1.0008671283721924,grad_norm: 0.9999990209735499, iteration: 453973
loss: 0.9924275875091553,grad_norm: 0.7636630735230768, iteration: 453974
loss: 0.9721602201461792,grad_norm: 0.7596524360709555, iteration: 453975
loss: 0.992656946182251,grad_norm: 0.8208157805451052, iteration: 453976
loss: 1.0251588821411133,grad_norm: 0.7108511172634968, iteration: 453977
loss: 0.9807722568511963,grad_norm: 0.7446894280657267, iteration: 453978
loss: 1.0211397409439087,grad_norm: 0.7058350819200653, iteration: 453979
loss: 0.9928869009017944,grad_norm: 0.7531017967614837, iteration: 453980
loss: 1.0184301137924194,grad_norm: 0.8777585535096932, iteration: 453981
loss: 1.0208791494369507,grad_norm: 0.802122733624139, iteration: 453982
loss: 1.0037351846694946,grad_norm: 0.6450392019118554, iteration: 453983
loss: 0.9686421751976013,grad_norm: 0.8275272198240087, iteration: 453984
loss: 1.0312632322311401,grad_norm: 0.8434711583650336, iteration: 453985
loss: 1.013061285018921,grad_norm: 0.6283341257178935, iteration: 453986
loss: 0.968188464641571,grad_norm: 0.8602446747640577, iteration: 453987
loss: 1.0037893056869507,grad_norm: 0.7091418227360864, iteration: 453988
loss: 0.9964159727096558,grad_norm: 0.7575183857088612, iteration: 453989
loss: 0.9897095561027527,grad_norm: 0.8745980397542575, iteration: 453990
loss: 1.0089352130889893,grad_norm: 0.6742237688562971, iteration: 453991
loss: 1.0084494352340698,grad_norm: 0.748862879145005, iteration: 453992
loss: 1.0180552005767822,grad_norm: 0.9820034178180549, iteration: 453993
loss: 0.9837953448295593,grad_norm: 0.7148240876785877, iteration: 453994
loss: 0.9829183220863342,grad_norm: 0.9031206186838443, iteration: 453995
loss: 0.9437691569328308,grad_norm: 0.7877588762062171, iteration: 453996
loss: 1.0304545164108276,grad_norm: 0.7668585119657334, iteration: 453997
loss: 0.9955400228500366,grad_norm: 0.8874333640102532, iteration: 453998
loss: 0.9975598454475403,grad_norm: 0.7345742482217388, iteration: 453999
loss: 1.007059097290039,grad_norm: 0.9999990246206062, iteration: 454000
loss: 1.0119357109069824,grad_norm: 0.7589946047975006, iteration: 454001
loss: 1.009196400642395,grad_norm: 0.8052685831267177, iteration: 454002
loss: 1.0011709928512573,grad_norm: 0.6766764194743867, iteration: 454003
loss: 1.0221201181411743,grad_norm: 0.6746976739837447, iteration: 454004
loss: 1.0261183977127075,grad_norm: 0.7475475672218471, iteration: 454005
loss: 0.9890689253807068,grad_norm: 0.8088949238719297, iteration: 454006
loss: 0.9993257522583008,grad_norm: 0.6997040881650007, iteration: 454007
loss: 0.9866775870323181,grad_norm: 0.9149883862072117, iteration: 454008
loss: 1.006292700767517,grad_norm: 0.734880401788992, iteration: 454009
loss: 0.9978022575378418,grad_norm: 0.8320125526249734, iteration: 454010
loss: 0.9885866641998291,grad_norm: 0.7624372127149376, iteration: 454011
loss: 1.009446382522583,grad_norm: 0.922389006336978, iteration: 454012
loss: 0.9759665727615356,grad_norm: 0.9385575147468814, iteration: 454013
loss: 0.9805107712745667,grad_norm: 0.7106038422592791, iteration: 454014
loss: 1.020222544670105,grad_norm: 0.7137386359163727, iteration: 454015
loss: 1.0076305866241455,grad_norm: 0.7815992642181055, iteration: 454016
loss: 0.9996380805969238,grad_norm: 0.8159263468256693, iteration: 454017
loss: 0.9879671335220337,grad_norm: 0.8069135638919845, iteration: 454018
loss: 0.9806054830551147,grad_norm: 0.8787503733067507, iteration: 454019
loss: 0.9961354732513428,grad_norm: 0.5666485917228844, iteration: 454020
loss: 0.994961678981781,grad_norm: 0.6771562007836651, iteration: 454021
loss: 0.9988062381744385,grad_norm: 0.6689862671490538, iteration: 454022
loss: 0.9968076944351196,grad_norm: 0.7253028180843397, iteration: 454023
loss: 0.9789384603500366,grad_norm: 0.9999999003095517, iteration: 454024
loss: 1.034134864807129,grad_norm: 0.7447691590675225, iteration: 454025
loss: 1.010191559791565,grad_norm: 0.7593945272822573, iteration: 454026
loss: 0.9933083653450012,grad_norm: 0.6771598533575794, iteration: 454027
loss: 1.0427188873291016,grad_norm: 0.9999996444581413, iteration: 454028
loss: 0.9914436340332031,grad_norm: 0.918299309242909, iteration: 454029
loss: 0.9694109559059143,grad_norm: 0.6792057668774402, iteration: 454030
loss: 0.9815536141395569,grad_norm: 0.7260887135676464, iteration: 454031
loss: 1.003340244293213,grad_norm: 0.9999991159937973, iteration: 454032
loss: 0.9964027404785156,grad_norm: 0.9999998781176646, iteration: 454033
loss: 0.9902448058128357,grad_norm: 0.6627652503399649, iteration: 454034
loss: 1.0009870529174805,grad_norm: 0.6918272733224422, iteration: 454035
loss: 0.9775218963623047,grad_norm: 0.7104227404742648, iteration: 454036
loss: 0.9724367260932922,grad_norm: 0.8206867529838435, iteration: 454037
loss: 0.9552983045578003,grad_norm: 0.7160565654831167, iteration: 454038
loss: 1.0102009773254395,grad_norm: 0.7600360656529571, iteration: 454039
loss: 0.9916510581970215,grad_norm: 0.7914095152623402, iteration: 454040
loss: 1.0123934745788574,grad_norm: 0.7317702077601957, iteration: 454041
loss: 0.9859294891357422,grad_norm: 0.7625164007151837, iteration: 454042
loss: 1.0226153135299683,grad_norm: 0.7288214883820294, iteration: 454043
loss: 0.9382187128067017,grad_norm: 0.8364059561628401, iteration: 454044
loss: 0.968505859375,grad_norm: 0.8058259107679374, iteration: 454045
loss: 0.979362428188324,grad_norm: 0.7452629631336868, iteration: 454046
loss: 0.995098888874054,grad_norm: 0.7841892396335884, iteration: 454047
loss: 0.9794691205024719,grad_norm: 0.7307662070130643, iteration: 454048
loss: 0.9765989184379578,grad_norm: 0.7700759257977073, iteration: 454049
loss: 1.0491187572479248,grad_norm: 0.818772219941324, iteration: 454050
loss: 0.9936388731002808,grad_norm: 0.8328145378398994, iteration: 454051
loss: 1.0109596252441406,grad_norm: 0.692660238205205, iteration: 454052
loss: 0.9801425337791443,grad_norm: 0.7393581645561554, iteration: 454053
loss: 0.9640331864356995,grad_norm: 0.7471664748456439, iteration: 454054
loss: 0.9997057914733887,grad_norm: 0.780951195083393, iteration: 454055
loss: 0.964560329914093,grad_norm: 0.965057323063033, iteration: 454056
loss: 1.0000828504562378,grad_norm: 0.716233665444807, iteration: 454057
loss: 1.047484040260315,grad_norm: 0.9999992102608504, iteration: 454058
loss: 1.0222777128219604,grad_norm: 0.8539784809200847, iteration: 454059
loss: 0.9980039000511169,grad_norm: 0.7947861122525578, iteration: 454060
loss: 1.0058695077896118,grad_norm: 0.7417626355253042, iteration: 454061
loss: 0.9813429117202759,grad_norm: 0.8287038155140503, iteration: 454062
loss: 1.0032720565795898,grad_norm: 0.7794120396705897, iteration: 454063
loss: 0.9348343014717102,grad_norm: 0.648782423261929, iteration: 454064
loss: 0.9815594553947449,grad_norm: 0.7310646508152935, iteration: 454065
loss: 0.9857452511787415,grad_norm: 0.761912225210247, iteration: 454066
loss: 1.025668740272522,grad_norm: 0.6915328087569695, iteration: 454067
loss: 1.0059982538223267,grad_norm: 0.9999993247973252, iteration: 454068
loss: 1.0057004690170288,grad_norm: 0.8945573677705971, iteration: 454069
loss: 0.983243465423584,grad_norm: 0.7721534501152031, iteration: 454070
loss: 0.9861323237419128,grad_norm: 0.6708841184830157, iteration: 454071
loss: 1.0264766216278076,grad_norm: 0.6703897668625322, iteration: 454072
loss: 1.0220950841903687,grad_norm: 0.8264952290333585, iteration: 454073
loss: 0.9844375848770142,grad_norm: 0.7411598907540063, iteration: 454074
loss: 1.0629514455795288,grad_norm: 0.7167554666371454, iteration: 454075
loss: 0.9702687859535217,grad_norm: 0.7239095531456299, iteration: 454076
loss: 1.0098859071731567,grad_norm: 0.7448467543461396, iteration: 454077
loss: 0.9982668161392212,grad_norm: 0.884918151038799, iteration: 454078
loss: 0.9775909781455994,grad_norm: 0.9197017628127485, iteration: 454079
loss: 1.0360625982284546,grad_norm: 0.8166742535151456, iteration: 454080
loss: 0.9788317680358887,grad_norm: 0.7822266444611131, iteration: 454081
loss: 0.9641903042793274,grad_norm: 0.7792222652547631, iteration: 454082
loss: 0.9719697833061218,grad_norm: 0.8087077534725509, iteration: 454083
loss: 0.9692332148551941,grad_norm: 0.8920898599497019, iteration: 454084
loss: 1.0060734748840332,grad_norm: 0.9999990680635328, iteration: 454085
loss: 0.9888540506362915,grad_norm: 0.7183897858124575, iteration: 454086
loss: 0.9812564849853516,grad_norm: 0.9637736976089789, iteration: 454087
loss: 1.0320961475372314,grad_norm: 0.7234927513107328, iteration: 454088
loss: 1.023984432220459,grad_norm: 0.9343723088198805, iteration: 454089
loss: 0.9799525141716003,grad_norm: 0.7581071279245939, iteration: 454090
loss: 1.001631498336792,grad_norm: 0.7932895807094027, iteration: 454091
loss: 1.0086811780929565,grad_norm: 0.6676885155916795, iteration: 454092
loss: 0.9917741417884827,grad_norm: 0.7636606787423728, iteration: 454093
loss: 1.1316810846328735,grad_norm: 0.8728520921489296, iteration: 454094
loss: 0.9964350461959839,grad_norm: 0.7514138670424273, iteration: 454095
loss: 0.9743921756744385,grad_norm: 0.9999999370327127, iteration: 454096
loss: 0.9584938883781433,grad_norm: 0.7212199347593053, iteration: 454097
loss: 0.9987077116966248,grad_norm: 0.6636010937554759, iteration: 454098
loss: 0.9787344932556152,grad_norm: 0.8593941873859344, iteration: 454099
loss: 0.9866910576820374,grad_norm: 0.8283654187073541, iteration: 454100
loss: 0.9645682573318481,grad_norm: 0.6771448240941975, iteration: 454101
loss: 0.9989469647407532,grad_norm: 0.664362136284165, iteration: 454102
loss: 0.9837761521339417,grad_norm: 0.7007507534121653, iteration: 454103
loss: 0.9988868832588196,grad_norm: 0.9199250341894945, iteration: 454104
loss: 1.0082570314407349,grad_norm: 0.6764810894943012, iteration: 454105
loss: 0.9605413675308228,grad_norm: 0.8184989518089337, iteration: 454106
loss: 1.0089808702468872,grad_norm: 0.819868500320899, iteration: 454107
loss: 1.0062168836593628,grad_norm: 0.7748905278503543, iteration: 454108
loss: 1.0127477645874023,grad_norm: 0.6733949823229138, iteration: 454109
loss: 1.028555154800415,grad_norm: 0.7259843034892666, iteration: 454110
loss: 1.0459357500076294,grad_norm: 0.8333461968720636, iteration: 454111
loss: 0.9825612902641296,grad_norm: 0.7221022469315681, iteration: 454112
loss: 0.9988658428192139,grad_norm: 0.7893538841544481, iteration: 454113
loss: 1.0011991262435913,grad_norm: 0.6349264626366914, iteration: 454114
loss: 1.056207299232483,grad_norm: 1.0000000832601799, iteration: 454115
loss: 0.9876960515975952,grad_norm: 0.7877589196703425, iteration: 454116
loss: 0.9689996242523193,grad_norm: 0.696992796577979, iteration: 454117
loss: 0.9866726994514465,grad_norm: 0.645764255136258, iteration: 454118
loss: 0.9947668313980103,grad_norm: 0.7006902862743702, iteration: 454119
loss: 1.0030611753463745,grad_norm: 0.6402924165887292, iteration: 454120
loss: 1.0109660625457764,grad_norm: 0.6676021353174871, iteration: 454121
loss: 1.0024648904800415,grad_norm: 0.7025339262412147, iteration: 454122
loss: 0.9957449436187744,grad_norm: 0.8866761965538583, iteration: 454123
loss: 1.002928614616394,grad_norm: 0.613570354607588, iteration: 454124
loss: 0.9907708764076233,grad_norm: 0.7857047501198504, iteration: 454125
loss: 1.0115879774093628,grad_norm: 0.8742222421522498, iteration: 454126
loss: 0.9932502508163452,grad_norm: 0.7660161566623219, iteration: 454127
loss: 0.9738203287124634,grad_norm: 0.7463844010348629, iteration: 454128
loss: 0.9605101346969604,grad_norm: 0.8647311836993682, iteration: 454129
loss: 1.0214126110076904,grad_norm: 0.739794149454985, iteration: 454130
loss: 0.9971427917480469,grad_norm: 0.7463459067972669, iteration: 454131
loss: 0.978302001953125,grad_norm: 0.8284734618818437, iteration: 454132
loss: 0.9950860142707825,grad_norm: 0.8415044144114623, iteration: 454133
loss: 1.0325063467025757,grad_norm: 0.9999994400674234, iteration: 454134
loss: 0.9956436157226562,grad_norm: 0.7655274066596127, iteration: 454135
loss: 0.9232836365699768,grad_norm: 0.7363251882802163, iteration: 454136
loss: 0.9752102494239807,grad_norm: 0.6673266947555961, iteration: 454137
loss: 1.0614444017410278,grad_norm: 0.9999995139693639, iteration: 454138
loss: 1.0074846744537354,grad_norm: 0.7977565994787286, iteration: 454139
loss: 0.9742915630340576,grad_norm: 0.749177040843706, iteration: 454140
loss: 1.0028202533721924,grad_norm: 0.8404656089145514, iteration: 454141
loss: 1.0052440166473389,grad_norm: 0.7558387301958049, iteration: 454142
loss: 0.9748650789260864,grad_norm: 0.809596606426708, iteration: 454143
loss: 1.0050723552703857,grad_norm: 0.87312890152172, iteration: 454144
loss: 0.9498447775840759,grad_norm: 0.7384154067735685, iteration: 454145
loss: 0.9864677786827087,grad_norm: 0.7822572871610458, iteration: 454146
loss: 1.0509579181671143,grad_norm: 0.6919684126104517, iteration: 454147
loss: 0.9919695854187012,grad_norm: 0.745356636619177, iteration: 454148
loss: 1.005298137664795,grad_norm: 0.847279277817232, iteration: 454149
loss: 1.0063624382019043,grad_norm: 0.865955294960429, iteration: 454150
loss: 0.989923894405365,grad_norm: 0.6976923185313209, iteration: 454151
loss: 0.9807959198951721,grad_norm: 0.7555466556993172, iteration: 454152
loss: 0.9972333908081055,grad_norm: 0.7057430730917711, iteration: 454153
loss: 0.9898603558540344,grad_norm: 0.8073887875209474, iteration: 454154
loss: 0.9735514521598816,grad_norm: 0.8971151298777447, iteration: 454155
loss: 1.0189619064331055,grad_norm: 0.8280677301973052, iteration: 454156
loss: 1.1431984901428223,grad_norm: 0.9999991874192729, iteration: 454157
loss: 1.0095922946929932,grad_norm: 0.9219314234341025, iteration: 454158
loss: 1.0064533948898315,grad_norm: 0.949892284776916, iteration: 454159
loss: 0.9933556914329529,grad_norm: 0.8154036146103528, iteration: 454160
loss: 0.9977190494537354,grad_norm: 0.762179089892424, iteration: 454161
loss: 1.0720359086990356,grad_norm: 0.7887821141118154, iteration: 454162
loss: 1.0062209367752075,grad_norm: 0.795413011536383, iteration: 454163
loss: 0.984345018863678,grad_norm: 0.7580299756959328, iteration: 454164
loss: 1.080161213874817,grad_norm: 0.8455721674004828, iteration: 454165
loss: 0.9933085441589355,grad_norm: 0.6983514847345287, iteration: 454166
loss: 0.9706934094429016,grad_norm: 0.6399894587100379, iteration: 454167
loss: 1.00999116897583,grad_norm: 0.7427593389520897, iteration: 454168
loss: 0.9539479613304138,grad_norm: 0.7554520931365458, iteration: 454169
loss: 0.9442594647407532,grad_norm: 0.7181680655126733, iteration: 454170
loss: 0.964242696762085,grad_norm: 0.8274179430489538, iteration: 454171
loss: 0.9824268221855164,grad_norm: 0.7558540894345724, iteration: 454172
loss: 0.9823623895645142,grad_norm: 0.7051967168860592, iteration: 454173
loss: 0.9685385227203369,grad_norm: 0.6861242304299268, iteration: 454174
loss: 1.0351753234863281,grad_norm: 0.6996455101831023, iteration: 454175
loss: 1.0305092334747314,grad_norm: 0.9203734413785482, iteration: 454176
loss: 1.0117285251617432,grad_norm: 0.6543133290563852, iteration: 454177
loss: 0.9410144090652466,grad_norm: 0.9999994381224547, iteration: 454178
loss: 0.9811831712722778,grad_norm: 0.7309405634850951, iteration: 454179
loss: 1.0131169557571411,grad_norm: 0.835218924308472, iteration: 454180
loss: 1.0079500675201416,grad_norm: 0.8220520732863212, iteration: 454181
loss: 1.0353608131408691,grad_norm: 0.9895128152960839, iteration: 454182
loss: 0.9661006927490234,grad_norm: 0.7543907844843709, iteration: 454183
loss: 0.9793615341186523,grad_norm: 0.7188840047012228, iteration: 454184
loss: 0.9795878529548645,grad_norm: 0.7031324616937035, iteration: 454185
loss: 0.9868690967559814,grad_norm: 0.7223658635550149, iteration: 454186
loss: 0.9758350253105164,grad_norm: 0.7754114715808609, iteration: 454187
loss: 0.979353129863739,grad_norm: 0.8281937852288589, iteration: 454188
loss: 1.231082558631897,grad_norm: 0.9999995020130932, iteration: 454189
loss: 1.0457903146743774,grad_norm: 0.9999996320870437, iteration: 454190
loss: 0.9975683093070984,grad_norm: 0.615175805520334, iteration: 454191
loss: 1.0080095529556274,grad_norm: 0.9501631341940638, iteration: 454192
loss: 1.0074143409729004,grad_norm: 0.8156774006853649, iteration: 454193
loss: 1.0085605382919312,grad_norm: 0.8429445750672919, iteration: 454194
loss: 0.9759010076522827,grad_norm: 0.9999990852014625, iteration: 454195
loss: 0.9940547347068787,grad_norm: 0.8751833655084826, iteration: 454196
loss: 1.0191237926483154,grad_norm: 0.8134965433317276, iteration: 454197
loss: 0.9981480836868286,grad_norm: 0.9999994023574723, iteration: 454198
loss: 1.020145058631897,grad_norm: 0.8124083333342359, iteration: 454199
loss: 0.9559300541877747,grad_norm: 0.8912543773931194, iteration: 454200
loss: 1.003755807876587,grad_norm: 0.6954685946405542, iteration: 454201
loss: 0.9908632040023804,grad_norm: 0.6857344547141242, iteration: 454202
loss: 0.9873595237731934,grad_norm: 0.8663597223385715, iteration: 454203
loss: 1.2085983753204346,grad_norm: 0.9999991977883362, iteration: 454204
loss: 1.007574200630188,grad_norm: 0.7188182350685642, iteration: 454205
loss: 0.9969688653945923,grad_norm: 0.8601672965916493, iteration: 454206
loss: 0.9861282706260681,grad_norm: 0.9999996991123604, iteration: 454207
loss: 1.001821756362915,grad_norm: 0.9586183583908726, iteration: 454208
loss: 0.9669961929321289,grad_norm: 0.9999991821992336, iteration: 454209
loss: 1.0636979341506958,grad_norm: 0.735651331691626, iteration: 454210
loss: 1.073477029800415,grad_norm: 0.9999992690376371, iteration: 454211
loss: 0.9813933968544006,grad_norm: 0.6415333824039676, iteration: 454212
loss: 1.1157232522964478,grad_norm: 0.9999999601451045, iteration: 454213
loss: 0.9988589882850647,grad_norm: 0.7076485887961216, iteration: 454214
loss: 0.9903092980384827,grad_norm: 0.9253522996060234, iteration: 454215
loss: 0.9999480247497559,grad_norm: 0.8633399481932632, iteration: 454216
loss: 0.9792413115501404,grad_norm: 0.7833272153353655, iteration: 454217
loss: 0.9661068916320801,grad_norm: 0.9999995323985992, iteration: 454218
loss: 1.0795001983642578,grad_norm: 0.8793686126611523, iteration: 454219
loss: 1.0126093626022339,grad_norm: 0.7575581660459947, iteration: 454220
loss: 0.9892436861991882,grad_norm: 0.7355929666258978, iteration: 454221
loss: 1.0598433017730713,grad_norm: 0.9999994423645068, iteration: 454222
loss: 1.0172678232192993,grad_norm: 0.7253792046454902, iteration: 454223
loss: 1.014338493347168,grad_norm: 0.891675922329274, iteration: 454224
loss: 0.9955623149871826,grad_norm: 0.9999996680026272, iteration: 454225
loss: 1.0440860986709595,grad_norm: 0.9999995742405727, iteration: 454226
loss: 1.0180779695510864,grad_norm: 0.7032574114948094, iteration: 454227
loss: 1.0117367506027222,grad_norm: 0.594368590106649, iteration: 454228
loss: 0.9578672647476196,grad_norm: 0.8149335024116927, iteration: 454229
loss: 1.0161570310592651,grad_norm: 0.6920601477631405, iteration: 454230
loss: 0.973901629447937,grad_norm: 0.7515058921316897, iteration: 454231
loss: 1.0074717998504639,grad_norm: 0.7067567790126965, iteration: 454232
loss: 1.0142978429794312,grad_norm: 0.8268524952677775, iteration: 454233
loss: 1.0034700632095337,grad_norm: 0.861482115369312, iteration: 454234
loss: 0.9853300452232361,grad_norm: 0.9259642679110935, iteration: 454235
loss: 1.0373106002807617,grad_norm: 0.6962041377532922, iteration: 454236
loss: 0.9741121530532837,grad_norm: 0.7653797070310705, iteration: 454237
loss: 0.9894047379493713,grad_norm: 0.8091986748505661, iteration: 454238
loss: 1.021848201751709,grad_norm: 0.8134529188569812, iteration: 454239
loss: 1.0455539226531982,grad_norm: 0.9999990551162037, iteration: 454240
loss: 1.0366243124008179,grad_norm: 0.8409809004401642, iteration: 454241
loss: 1.0101126432418823,grad_norm: 0.8592526716729411, iteration: 454242
loss: 0.9660443663597107,grad_norm: 0.8438412696290007, iteration: 454243
loss: 0.9934810400009155,grad_norm: 0.7427528925961082, iteration: 454244
loss: 0.9625084400177002,grad_norm: 0.7849547965781988, iteration: 454245
loss: 0.9764482378959656,grad_norm: 0.709691042647401, iteration: 454246
loss: 0.9986487030982971,grad_norm: 0.8188719465690434, iteration: 454247
loss: 1.0508506298065186,grad_norm: 0.9999990563204291, iteration: 454248
loss: 1.0055630207061768,grad_norm: 0.8370606274078974, iteration: 454249
loss: 1.0121958255767822,grad_norm: 0.8654695155009837, iteration: 454250
loss: 1.0023363828659058,grad_norm: 0.9133328996248422, iteration: 454251
loss: 1.0392887592315674,grad_norm: 0.8817117397377874, iteration: 454252
loss: 0.9687221050262451,grad_norm: 0.642810846779268, iteration: 454253
loss: 1.0063819885253906,grad_norm: 0.6569347379548152, iteration: 454254
loss: 1.0253849029541016,grad_norm: 0.9999995314230489, iteration: 454255
loss: 1.028902530670166,grad_norm: 0.8654951628907125, iteration: 454256
loss: 0.9865267872810364,grad_norm: 0.6715832785307351, iteration: 454257
loss: 0.9890421032905579,grad_norm: 0.6802578105700121, iteration: 454258
loss: 1.011616826057434,grad_norm: 0.7551155383792494, iteration: 454259
loss: 0.9965966939926147,grad_norm: 0.7739020786998798, iteration: 454260
loss: 1.0717607736587524,grad_norm: 0.7634697070282552, iteration: 454261
loss: 1.0073899030685425,grad_norm: 0.6010257769116688, iteration: 454262
loss: 0.986717939376831,grad_norm: 0.821055833628624, iteration: 454263
loss: 1.0750280618667603,grad_norm: 0.6879729920602388, iteration: 454264
loss: 1.0235071182250977,grad_norm: 0.9999995855387485, iteration: 454265
loss: 1.0440372228622437,grad_norm: 0.9999995236312945, iteration: 454266
loss: 1.0370293855667114,grad_norm: 0.6465748241029283, iteration: 454267
loss: 0.9619808197021484,grad_norm: 0.7846997903051715, iteration: 454268
loss: 1.0008125305175781,grad_norm: 0.7827456005290567, iteration: 454269
loss: 1.0815116167068481,grad_norm: 0.9999998437664871, iteration: 454270
loss: 0.9995846748352051,grad_norm: 0.7583040222866062, iteration: 454271
loss: 0.9986665844917297,grad_norm: 0.8052008094742156, iteration: 454272
loss: 1.0133705139160156,grad_norm: 0.7445129622333801, iteration: 454273
loss: 1.004960298538208,grad_norm: 0.7305363445007677, iteration: 454274
loss: 0.9800741076469421,grad_norm: 0.9203411438503577, iteration: 454275
loss: 0.9613550305366516,grad_norm: 0.8039584602329529, iteration: 454276
loss: 0.9872332811355591,grad_norm: 0.8190128184242853, iteration: 454277
loss: 0.9754530191421509,grad_norm: 0.8311137495620703, iteration: 454278
loss: 1.0239681005477905,grad_norm: 0.7734849854706055, iteration: 454279
loss: 0.9884856939315796,grad_norm: 0.5654840729231858, iteration: 454280
loss: 0.9998133182525635,grad_norm: 0.6951756725037068, iteration: 454281
loss: 1.0093284845352173,grad_norm: 0.7975166884491575, iteration: 454282
loss: 1.0570955276489258,grad_norm: 0.667957673888169, iteration: 454283
loss: 1.0191359519958496,grad_norm: 0.6913152135121032, iteration: 454284
loss: 0.9800988435745239,grad_norm: 0.7573984654151219, iteration: 454285
loss: 0.9878823161125183,grad_norm: 0.8240321563869699, iteration: 454286
loss: 0.9733565449714661,grad_norm: 0.7061614662661702, iteration: 454287
loss: 0.9495086669921875,grad_norm: 0.765264799105099, iteration: 454288
loss: 1.0155162811279297,grad_norm: 0.8681170992007896, iteration: 454289
loss: 0.9857038855552673,grad_norm: 0.8732647831036234, iteration: 454290
loss: 0.9411018490791321,grad_norm: 0.7341168832455167, iteration: 454291
loss: 0.9971173405647278,grad_norm: 0.6764680385236514, iteration: 454292
loss: 0.9548541307449341,grad_norm: 0.7966921161164249, iteration: 454293
loss: 1.0072839260101318,grad_norm: 0.9999992698110692, iteration: 454294
loss: 0.9742414355278015,grad_norm: 0.6864137601065095, iteration: 454295
loss: 0.9980932474136353,grad_norm: 0.7791201526470084, iteration: 454296
loss: 0.9952026009559631,grad_norm: 0.7086572415164705, iteration: 454297
loss: 1.01497483253479,grad_norm: 0.7478214035924863, iteration: 454298
loss: 0.9763156175613403,grad_norm: 0.8575190320112756, iteration: 454299
loss: 1.0635926723480225,grad_norm: 0.7237348351300579, iteration: 454300
loss: 1.0117847919464111,grad_norm: 0.8006830752157084, iteration: 454301
loss: 1.010530710220337,grad_norm: 0.7506510777976692, iteration: 454302
loss: 0.9675878286361694,grad_norm: 0.8757568917696089, iteration: 454303
loss: 0.9991433024406433,grad_norm: 0.7661506482566297, iteration: 454304
loss: 1.0122027397155762,grad_norm: 0.7033946142407278, iteration: 454305
loss: 0.991700291633606,grad_norm: 0.7167236004644784, iteration: 454306
loss: 0.9928465485572815,grad_norm: 0.7050033258147327, iteration: 454307
loss: 1.0044300556182861,grad_norm: 0.807089761877876, iteration: 454308
loss: 0.9898840188980103,grad_norm: 0.8192306202840779, iteration: 454309
loss: 1.061090111732483,grad_norm: 1.000000024753308, iteration: 454310
loss: 0.9650516510009766,grad_norm: 0.7291299191902129, iteration: 454311
loss: 1.0247135162353516,grad_norm: 0.8272389702351051, iteration: 454312
loss: 0.9885753393173218,grad_norm: 0.8794817539655514, iteration: 454313
loss: 0.9949773550033569,grad_norm: 0.8118416358389562, iteration: 454314
loss: 1.0046989917755127,grad_norm: 0.9999998584884658, iteration: 454315
loss: 0.9789601564407349,grad_norm: 0.9720043815556136, iteration: 454316
loss: 1.0122407674789429,grad_norm: 0.8641524878651183, iteration: 454317
loss: 1.0084205865859985,grad_norm: 0.7063286853147703, iteration: 454318
loss: 1.0148359537124634,grad_norm: 0.8789083513020849, iteration: 454319
loss: 1.016677737236023,grad_norm: 0.8250596225948424, iteration: 454320
loss: 1.0033890008926392,grad_norm: 0.7198863166659719, iteration: 454321
loss: 1.0113935470581055,grad_norm: 0.8970124747636333, iteration: 454322
loss: 0.9914557933807373,grad_norm: 0.7757186988229213, iteration: 454323
loss: 1.0189025402069092,grad_norm: 0.6355126514248032, iteration: 454324
loss: 0.9909286499023438,grad_norm: 0.7664136206993412, iteration: 454325
loss: 0.9888221025466919,grad_norm: 0.7166117004747294, iteration: 454326
loss: 1.0018514394760132,grad_norm: 0.637077338828447, iteration: 454327
loss: 1.0073983669281006,grad_norm: 0.8896271691135811, iteration: 454328
loss: 1.0156763792037964,grad_norm: 0.7343687511941797, iteration: 454329
loss: 1.0681242942810059,grad_norm: 0.9999993543664102, iteration: 454330
loss: 1.0314013957977295,grad_norm: 0.7611886093166258, iteration: 454331
loss: 1.0314432382583618,grad_norm: 0.9999996609116847, iteration: 454332
loss: 1.009278655052185,grad_norm: 0.6910771687504846, iteration: 454333
loss: 1.0042598247528076,grad_norm: 0.7173247701596743, iteration: 454334
loss: 0.9968551397323608,grad_norm: 0.7826656852830337, iteration: 454335
loss: 1.0200625658035278,grad_norm: 0.6780112094482525, iteration: 454336
loss: 1.0068519115447998,grad_norm: 0.7386352184585435, iteration: 454337
loss: 1.048205018043518,grad_norm: 0.7805258671415756, iteration: 454338
loss: 0.9844226837158203,grad_norm: 0.7740710261832564, iteration: 454339
loss: 1.0513912439346313,grad_norm: 0.9999997745888504, iteration: 454340
loss: 0.9895716309547424,grad_norm: 0.7924869732256953, iteration: 454341
loss: 1.0199867486953735,grad_norm: 0.7824564437475915, iteration: 454342
loss: 1.0090502500534058,grad_norm: 0.8093723760276385, iteration: 454343
loss: 0.983807384967804,grad_norm: 0.6659968928263129, iteration: 454344
loss: 0.9978989958763123,grad_norm: 0.7075931681555275, iteration: 454345
loss: 0.9848778247833252,grad_norm: 0.8489464391897208, iteration: 454346
loss: 1.001692295074463,grad_norm: 0.7379038488933072, iteration: 454347
loss: 0.9780768752098083,grad_norm: 0.7034362789082288, iteration: 454348
loss: 1.0207948684692383,grad_norm: 0.9999991533280472, iteration: 454349
loss: 1.0575499534606934,grad_norm: 0.9999994564213792, iteration: 454350
loss: 1.0084278583526611,grad_norm: 0.862023901364608, iteration: 454351
loss: 0.9957897663116455,grad_norm: 0.9686858258309446, iteration: 454352
loss: 0.9957179427146912,grad_norm: 0.7918048749888431, iteration: 454353
loss: 1.030598759651184,grad_norm: 0.8610954329549301, iteration: 454354
loss: 1.0152885913848877,grad_norm: 0.6688750830010685, iteration: 454355
loss: 1.0416011810302734,grad_norm: 0.709371932215367, iteration: 454356
loss: 0.9878338575363159,grad_norm: 0.7922238401645544, iteration: 454357
loss: 0.9915452003479004,grad_norm: 0.8085291802520848, iteration: 454358
loss: 0.9997828006744385,grad_norm: 0.8536357301003593, iteration: 454359
loss: 1.023058295249939,grad_norm: 0.781613098368528, iteration: 454360
loss: 1.038496971130371,grad_norm: 0.696329278276454, iteration: 454361
loss: 1.0127683877944946,grad_norm: 0.7931712615431342, iteration: 454362
loss: 0.950612485408783,grad_norm: 0.6407681053655776, iteration: 454363
loss: 0.9741811752319336,grad_norm: 0.7109750739041464, iteration: 454364
loss: 0.9538294672966003,grad_norm: 0.692551107410802, iteration: 454365
loss: 0.9933726191520691,grad_norm: 0.7452842102700451, iteration: 454366
loss: 1.008897304534912,grad_norm: 0.6947759710504406, iteration: 454367
loss: 0.9694107174873352,grad_norm: 0.7605445358864952, iteration: 454368
loss: 1.0046390295028687,grad_norm: 0.6897093664704393, iteration: 454369
loss: 1.0279264450073242,grad_norm: 0.7845036722984695, iteration: 454370
loss: 0.9920029044151306,grad_norm: 0.8387678425046559, iteration: 454371
loss: 1.293305516242981,grad_norm: 0.9999997165864248, iteration: 454372
loss: 0.9890674948692322,grad_norm: 0.7643568774865689, iteration: 454373
loss: 1.0512648820877075,grad_norm: 0.7222831052133637, iteration: 454374
loss: 1.015570878982544,grad_norm: 0.8891575580668952, iteration: 454375
loss: 0.995421826839447,grad_norm: 0.7437674668113121, iteration: 454376
loss: 0.9515672326087952,grad_norm: 0.8159258787898654, iteration: 454377
loss: 0.9951688647270203,grad_norm: 0.8147524117812395, iteration: 454378
loss: 1.0251458883285522,grad_norm: 0.8013947597377709, iteration: 454379
loss: 0.9594631791114807,grad_norm: 0.8125856643550126, iteration: 454380
loss: 1.036200761795044,grad_norm: 0.9999991824998755, iteration: 454381
loss: 0.9812281727790833,grad_norm: 0.845785128972097, iteration: 454382
loss: 1.0136406421661377,grad_norm: 0.7614087034553058, iteration: 454383
loss: 1.0377516746520996,grad_norm: 0.999999739170465, iteration: 454384
loss: 1.0348505973815918,grad_norm: 0.7398667972388634, iteration: 454385
loss: 0.9987772703170776,grad_norm: 0.6930550554656478, iteration: 454386
loss: 0.9746314883232117,grad_norm: 0.6986867157880927, iteration: 454387
loss: 1.0125257968902588,grad_norm: 0.8601833039919315, iteration: 454388
loss: 1.0090687274932861,grad_norm: 0.7326420603659993, iteration: 454389
loss: 0.9813418984413147,grad_norm: 0.9403287379061082, iteration: 454390
loss: 0.9840720891952515,grad_norm: 0.8066440530016129, iteration: 454391
loss: 0.9515887498855591,grad_norm: 0.747186571623994, iteration: 454392
loss: 1.0104426145553589,grad_norm: 0.7184942123872173, iteration: 454393
loss: 1.047898530960083,grad_norm: 0.9999993886853054, iteration: 454394
loss: 0.9821611046791077,grad_norm: 0.770281276667231, iteration: 454395
loss: 0.9966002702713013,grad_norm: 0.7761921578598582, iteration: 454396
loss: 0.9938054084777832,grad_norm: 0.6543021289879978, iteration: 454397
loss: 1.0125789642333984,grad_norm: 0.9999993000959225, iteration: 454398
loss: 0.995279848575592,grad_norm: 0.7868818735078061, iteration: 454399
loss: 1.0106720924377441,grad_norm: 0.7474560463893282, iteration: 454400
loss: 1.0343186855316162,grad_norm: 0.824970277161014, iteration: 454401
loss: 1.0172749757766724,grad_norm: 0.9999994959167393, iteration: 454402
loss: 0.9704967737197876,grad_norm: 0.6907121793261676, iteration: 454403
loss: 0.9842680096626282,grad_norm: 0.8447620492818751, iteration: 454404
loss: 1.0100345611572266,grad_norm: 0.7175927970289706, iteration: 454405
loss: 1.0048248767852783,grad_norm: 0.9242149270260277, iteration: 454406
loss: 1.00630784034729,grad_norm: 0.7442804745090642, iteration: 454407
loss: 0.9731441140174866,grad_norm: 0.6731298097239545, iteration: 454408
loss: 1.0017187595367432,grad_norm: 0.8290437465523783, iteration: 454409
loss: 1.033601999282837,grad_norm: 0.6639103022101442, iteration: 454410
loss: 0.9900531768798828,grad_norm: 0.85615475456172, iteration: 454411
loss: 1.0118714570999146,grad_norm: 0.863830337262919, iteration: 454412
loss: 0.9801086187362671,grad_norm: 0.6533659254685815, iteration: 454413
loss: 0.996441662311554,grad_norm: 0.8593692576902605, iteration: 454414
loss: 0.9773483276367188,grad_norm: 0.7566835936203915, iteration: 454415
loss: 0.9672548174858093,grad_norm: 0.8574241168127541, iteration: 454416
loss: 1.0525085926055908,grad_norm: 0.9999991552250151, iteration: 454417
loss: 0.9993813037872314,grad_norm: 0.6465443840463737, iteration: 454418
loss: 0.965671181678772,grad_norm: 0.6736065393290671, iteration: 454419
loss: 0.9954763054847717,grad_norm: 0.694167663413377, iteration: 454420
loss: 1.0133486986160278,grad_norm: 0.8313024809464933, iteration: 454421
loss: 0.9966477751731873,grad_norm: 0.8465807924995407, iteration: 454422
loss: 0.9860185980796814,grad_norm: 0.8642557901790159, iteration: 454423
loss: 0.9926424622535706,grad_norm: 0.7543870128504385, iteration: 454424
loss: 0.9887322187423706,grad_norm: 0.6223261249796176, iteration: 454425
loss: 1.0082589387893677,grad_norm: 0.7625504861425916, iteration: 454426
loss: 1.0040929317474365,grad_norm: 0.6570175881867083, iteration: 454427
loss: 0.9834316968917847,grad_norm: 0.7999842964605844, iteration: 454428
loss: 1.024734616279602,grad_norm: 0.8473860703301949, iteration: 454429
loss: 1.0057923793792725,grad_norm: 0.9240999570540038, iteration: 454430
loss: 0.9991016983985901,grad_norm: 0.9293285820856848, iteration: 454431
loss: 1.0085015296936035,grad_norm: 0.7146579848285911, iteration: 454432
loss: 1.0289489030838013,grad_norm: 0.8900401521191741, iteration: 454433
loss: 1.0020371675491333,grad_norm: 0.8115453494419842, iteration: 454434
loss: 1.0035539865493774,grad_norm: 0.7136921881849839, iteration: 454435
loss: 0.9476906061172485,grad_norm: 0.8324444784299886, iteration: 454436
loss: 0.9637821316719055,grad_norm: 0.9318639431453982, iteration: 454437
loss: 0.9932687282562256,grad_norm: 0.9999990809806505, iteration: 454438
loss: 0.9960588216781616,grad_norm: 0.9840912842244629, iteration: 454439
loss: 1.0364768505096436,grad_norm: 0.8745519921947515, iteration: 454440
loss: 0.9853091239929199,grad_norm: 0.9999989524523638, iteration: 454441
loss: 0.9879688620567322,grad_norm: 0.7565730646335745, iteration: 454442
loss: 0.9617642164230347,grad_norm: 0.6635724109609994, iteration: 454443
loss: 1.0011082887649536,grad_norm: 0.8140752915921141, iteration: 454444
loss: 1.017670750617981,grad_norm: 0.8789978815206875, iteration: 454445
loss: 0.971309244632721,grad_norm: 0.6854989145146688, iteration: 454446
loss: 1.0024714469909668,grad_norm: 0.8410505786649414, iteration: 454447
loss: 0.9958432912826538,grad_norm: 0.77724753785203, iteration: 454448
loss: 0.9765973091125488,grad_norm: 0.7130812214569637, iteration: 454449
loss: 0.9633575677871704,grad_norm: 0.8335046721040589, iteration: 454450
loss: 0.9621580243110657,grad_norm: 0.7337268019258711, iteration: 454451
loss: 0.9799740314483643,grad_norm: 0.7710657517251299, iteration: 454452
loss: 0.9933492541313171,grad_norm: 0.687041367062228, iteration: 454453
loss: 1.0159900188446045,grad_norm: 0.7097404366125993, iteration: 454454
loss: 0.9658925533294678,grad_norm: 0.6719742103551334, iteration: 454455
loss: 0.9586766362190247,grad_norm: 0.7774495129304984, iteration: 454456
loss: 1.0060935020446777,grad_norm: 0.7497440535342882, iteration: 454457
loss: 1.0192844867706299,grad_norm: 0.9520986220541919, iteration: 454458
loss: 1.0324530601501465,grad_norm: 0.6438448094339501, iteration: 454459
loss: 0.9900025129318237,grad_norm: 0.7912883731891194, iteration: 454460
loss: 1.0048571825027466,grad_norm: 0.6414633730163016, iteration: 454461
loss: 1.0244537591934204,grad_norm: 0.9999996345570096, iteration: 454462
loss: 0.9957987070083618,grad_norm: 0.7741997333977487, iteration: 454463
loss: 1.020397663116455,grad_norm: 0.7054532103564282, iteration: 454464
loss: 0.9901841282844543,grad_norm: 0.7290674044711737, iteration: 454465
loss: 0.9853507280349731,grad_norm: 0.7254733933535046, iteration: 454466
loss: 0.9923782348632812,grad_norm: 0.9074825757987074, iteration: 454467
loss: 1.0232030153274536,grad_norm: 0.7013535400201331, iteration: 454468
loss: 1.0015202760696411,grad_norm: 0.8790928089154075, iteration: 454469
loss: 1.0038100481033325,grad_norm: 0.999999232523457, iteration: 454470
loss: 0.9789553880691528,grad_norm: 0.7371070762898059, iteration: 454471
loss: 1.0760644674301147,grad_norm: 0.9999997556462095, iteration: 454472
loss: 1.019607424736023,grad_norm: 0.8987099730763805, iteration: 454473
loss: 1.07828950881958,grad_norm: 0.9999990745830646, iteration: 454474
loss: 1.0262515544891357,grad_norm: 0.8536048992075265, iteration: 454475
loss: 1.0525922775268555,grad_norm: 0.7267976200836282, iteration: 454476
loss: 1.0036779642105103,grad_norm: 0.7913783720191772, iteration: 454477
loss: 1.0123733282089233,grad_norm: 0.6946602575986025, iteration: 454478
loss: 1.0203832387924194,grad_norm: 0.7411579157089758, iteration: 454479
loss: 0.9972554445266724,grad_norm: 0.7698327738596651, iteration: 454480
loss: 0.9885166883468628,grad_norm: 0.7306643150267363, iteration: 454481
loss: 1.0110576152801514,grad_norm: 0.9999991222759815, iteration: 454482
loss: 0.9963944554328918,grad_norm: 0.7620985471738174, iteration: 454483
loss: 1.0127679109573364,grad_norm: 0.7948155758081128, iteration: 454484
loss: 1.1057738065719604,grad_norm: 1.0000000036308003, iteration: 454485
loss: 1.009912133216858,grad_norm: 0.6358938113898988, iteration: 454486
loss: 1.00393807888031,grad_norm: 0.7318897226990232, iteration: 454487
loss: 0.9903303384780884,grad_norm: 0.802520208520383, iteration: 454488
loss: 0.9911276698112488,grad_norm: 0.7560463095010198, iteration: 454489
loss: 1.015811800956726,grad_norm: 0.6984301269577428, iteration: 454490
loss: 1.0544224977493286,grad_norm: 0.8379616373297009, iteration: 454491
loss: 0.9591195583343506,grad_norm: 0.9437946983345298, iteration: 454492
loss: 0.9845810532569885,grad_norm: 0.7779374939343859, iteration: 454493
loss: 0.9582619071006775,grad_norm: 0.6670818071563722, iteration: 454494
loss: 0.9538560509681702,grad_norm: 0.7721758610306139, iteration: 454495
loss: 0.9570831060409546,grad_norm: 0.6092528161431155, iteration: 454496
loss: 1.0135010480880737,grad_norm: 0.8146075757440212, iteration: 454497
loss: 1.0391496419906616,grad_norm: 0.703806313685995, iteration: 454498
loss: 1.0159542560577393,grad_norm: 0.7714710651970396, iteration: 454499
loss: 0.988102912902832,grad_norm: 0.7892003406509176, iteration: 454500
loss: 0.9811260104179382,grad_norm: 0.7489341890943205, iteration: 454501
loss: 1.0613206624984741,grad_norm: 0.9999990539161968, iteration: 454502
loss: 1.0264739990234375,grad_norm: 0.8564838287815868, iteration: 454503
loss: 0.9896360039710999,grad_norm: 0.8623455853473242, iteration: 454504
loss: 1.0094105005264282,grad_norm: 0.7257653699371154, iteration: 454505
loss: 0.9822078943252563,grad_norm: 0.7425523607460165, iteration: 454506
loss: 1.0096867084503174,grad_norm: 0.7801765857019052, iteration: 454507
loss: 1.0401581525802612,grad_norm: 0.7867012047322571, iteration: 454508
loss: 1.031103491783142,grad_norm: 0.8016275669467152, iteration: 454509
loss: 1.017113447189331,grad_norm: 0.8124701809145859, iteration: 454510
loss: 0.9873469471931458,grad_norm: 0.739015881614056, iteration: 454511
loss: 0.9709081053733826,grad_norm: 0.7995768834306713, iteration: 454512
loss: 1.0378957986831665,grad_norm: 0.9999996132035675, iteration: 454513
loss: 0.9904280304908752,grad_norm: 0.8152799529648516, iteration: 454514
loss: 0.9910035133361816,grad_norm: 0.8926531417804955, iteration: 454515
loss: 1.0220096111297607,grad_norm: 0.8022330107632436, iteration: 454516
loss: 1.0824458599090576,grad_norm: 0.9141555725196976, iteration: 454517
loss: 1.0425052642822266,grad_norm: 0.6700160560726288, iteration: 454518
loss: 1.1180115938186646,grad_norm: 0.9999994015695537, iteration: 454519
loss: 0.9822768568992615,grad_norm: 0.7603850535500204, iteration: 454520
loss: 0.9992845058441162,grad_norm: 0.7593971328958136, iteration: 454521
loss: 1.0115611553192139,grad_norm: 0.6975015091448513, iteration: 454522
loss: 0.998548150062561,grad_norm: 0.8393066989132255, iteration: 454523
loss: 0.9383861422538757,grad_norm: 0.7738674538633178, iteration: 454524
loss: 1.007642149925232,grad_norm: 0.9491317672665595, iteration: 454525
loss: 1.0224609375,grad_norm: 0.8616271053038327, iteration: 454526
loss: 1.0702953338623047,grad_norm: 0.9999991529471088, iteration: 454527
loss: 0.9978863596916199,grad_norm: 0.678699386490879, iteration: 454528
loss: 1.0217928886413574,grad_norm: 0.706851956490386, iteration: 454529
loss: 0.9653264284133911,grad_norm: 0.6365597124593381, iteration: 454530
loss: 1.0004876852035522,grad_norm: 0.7608151199574982, iteration: 454531
loss: 1.0214449167251587,grad_norm: 0.9999993452904412, iteration: 454532
loss: 1.0004587173461914,grad_norm: 0.8825641434354033, iteration: 454533
loss: 0.9890161752700806,grad_norm: 0.9999990463023295, iteration: 454534
loss: 1.001548171043396,grad_norm: 0.8143117567721232, iteration: 454535
loss: 0.9526640772819519,grad_norm: 0.715953356106559, iteration: 454536
loss: 0.9913053512573242,grad_norm: 0.9508601711727226, iteration: 454537
loss: 0.9535602927207947,grad_norm: 0.9310512377932751, iteration: 454538
loss: 0.9864247441291809,grad_norm: 0.7163582430666013, iteration: 454539
loss: 1.0205892324447632,grad_norm: 0.8184977568707817, iteration: 454540
loss: 1.0577504634857178,grad_norm: 0.7206325347018121, iteration: 454541
loss: 1.0531160831451416,grad_norm: 0.7335446716037771, iteration: 454542
loss: 0.9858824014663696,grad_norm: 0.6788192398695866, iteration: 454543
loss: 0.994317352771759,grad_norm: 0.7905069408000608, iteration: 454544
loss: 0.989957332611084,grad_norm: 0.7469549395650875, iteration: 454545
loss: 1.0226633548736572,grad_norm: 0.999999289963094, iteration: 454546
loss: 1.0117013454437256,grad_norm: 0.7280586015784861, iteration: 454547
loss: 1.0016676187515259,grad_norm: 0.7590417921225268, iteration: 454548
loss: 1.0216045379638672,grad_norm: 0.666540848501694, iteration: 454549
loss: 1.0642215013504028,grad_norm: 0.9999990192237396, iteration: 454550
loss: 1.0031057596206665,grad_norm: 0.9187756881308939, iteration: 454551
loss: 1.0088555812835693,grad_norm: 0.8030633485886208, iteration: 454552
loss: 1.0153982639312744,grad_norm: 0.9094363822127959, iteration: 454553
loss: 1.0785393714904785,grad_norm: 0.9999999821718515, iteration: 454554
loss: 1.004746913909912,grad_norm: 0.999999703956242, iteration: 454555
loss: 1.0367032289505005,grad_norm: 0.6248866029047555, iteration: 454556
loss: 1.0684595108032227,grad_norm: 0.8081211227970068, iteration: 454557
loss: 0.9628899693489075,grad_norm: 0.900715339411253, iteration: 454558
loss: 0.9743741154670715,grad_norm: 0.8637508316141735, iteration: 454559
loss: 1.0391026735305786,grad_norm: 0.9084506740980303, iteration: 454560
loss: 1.0180058479309082,grad_norm: 0.7182984562389398, iteration: 454561
loss: 1.0007600784301758,grad_norm: 0.673351773264212, iteration: 454562
loss: 0.9973503351211548,grad_norm: 0.8066373359745039, iteration: 454563
loss: 1.0315661430358887,grad_norm: 0.7162342962215315, iteration: 454564
loss: 0.9617553949356079,grad_norm: 0.8762810482599024, iteration: 454565
loss: 0.9782175421714783,grad_norm: 0.6796839067069449, iteration: 454566
loss: 1.0105360746383667,grad_norm: 0.732211116969032, iteration: 454567
loss: 0.9753631949424744,grad_norm: 0.7611540166675658, iteration: 454568
loss: 0.9530124068260193,grad_norm: 0.7467970457832204, iteration: 454569
loss: 1.117106318473816,grad_norm: 0.9157891471717183, iteration: 454570
loss: 1.0475460290908813,grad_norm: 0.8091889426224075, iteration: 454571
loss: 1.0381171703338623,grad_norm: 0.8456052240331242, iteration: 454572
loss: 1.0761879682540894,grad_norm: 0.7457802156574656, iteration: 454573
loss: 0.9977987408638,grad_norm: 0.9999994867757434, iteration: 454574
loss: 1.0144566297531128,grad_norm: 0.7876563081591164, iteration: 454575
loss: 1.167584776878357,grad_norm: 0.9999997621983246, iteration: 454576
loss: 0.9742887020111084,grad_norm: 0.7041673473481032, iteration: 454577
loss: 1.097579836845398,grad_norm: 0.9999990897110793, iteration: 454578
loss: 1.0034786462783813,grad_norm: 0.860256874134667, iteration: 454579
loss: 1.061605453491211,grad_norm: 0.8709999305238553, iteration: 454580
loss: 1.0277295112609863,grad_norm: 0.842040591317495, iteration: 454581
loss: 1.0443729162216187,grad_norm: 0.7064387911996108, iteration: 454582
loss: 1.0347647666931152,grad_norm: 0.846437973587662, iteration: 454583
loss: 1.0234781503677368,grad_norm: 0.9933904807656132, iteration: 454584
loss: 0.9702320098876953,grad_norm: 0.7950228709555401, iteration: 454585
loss: 0.9581138491630554,grad_norm: 0.7684106353231274, iteration: 454586
loss: 1.1312776803970337,grad_norm: 0.9999994638718832, iteration: 454587
loss: 1.10884428024292,grad_norm: 0.9999995970029687, iteration: 454588
loss: 0.9923386573791504,grad_norm: 0.8468413585528626, iteration: 454589
loss: 1.0443167686462402,grad_norm: 0.9999996263974842, iteration: 454590
loss: 1.0223811864852905,grad_norm: 0.7319393606752955, iteration: 454591
loss: 1.0716549158096313,grad_norm: 0.9809760300841222, iteration: 454592
loss: 1.0662323236465454,grad_norm: 0.9999998990008313, iteration: 454593
loss: 1.2394466400146484,grad_norm: 0.9999998264607455, iteration: 454594
loss: 1.003078579902649,grad_norm: 0.7748288319962245, iteration: 454595
loss: 1.0095020532608032,grad_norm: 0.9999994963051024, iteration: 454596
loss: 1.029600977897644,grad_norm: 0.7229927648307545, iteration: 454597
loss: 1.0548092126846313,grad_norm: 0.9999992048319023, iteration: 454598
loss: 1.0460715293884277,grad_norm: 0.9121760494226238, iteration: 454599
loss: 0.9622020125389099,grad_norm: 0.7842118159269627, iteration: 454600
loss: 1.0123498439788818,grad_norm: 0.778852381994769, iteration: 454601
loss: 0.989558219909668,grad_norm: 0.760664349664064, iteration: 454602
loss: 1.0947580337524414,grad_norm: 0.9999992225822161, iteration: 454603
loss: 1.0003447532653809,grad_norm: 0.8376308722112826, iteration: 454604
loss: 1.1263706684112549,grad_norm: 0.999999206063819, iteration: 454605
loss: 1.094810128211975,grad_norm: 0.999999395168011, iteration: 454606
loss: 1.0285720825195312,grad_norm: 0.8708335628031072, iteration: 454607
loss: 0.969820499420166,grad_norm: 0.8744101305030677, iteration: 454608
loss: 1.004984736442566,grad_norm: 0.9999994858769728, iteration: 454609
loss: 1.1053844690322876,grad_norm: 0.8773418939722112, iteration: 454610
loss: 1.0393809080123901,grad_norm: 0.999999142833275, iteration: 454611
loss: 1.132688283920288,grad_norm: 0.9999991449232576, iteration: 454612
loss: 1.0202275514602661,grad_norm: 0.5961910439530639, iteration: 454613
loss: 1.0734474658966064,grad_norm: 0.7515965244362423, iteration: 454614
loss: 1.010299563407898,grad_norm: 0.8659316171239988, iteration: 454615
loss: 1.0751596689224243,grad_norm: 0.9999992194231124, iteration: 454616
loss: 0.9953050017356873,grad_norm: 0.701856731271441, iteration: 454617
loss: 1.0127630233764648,grad_norm: 0.7178239510960637, iteration: 454618
loss: 0.9876766800880432,grad_norm: 0.7186969132064873, iteration: 454619
loss: 0.9715138673782349,grad_norm: 0.898101542628043, iteration: 454620
loss: 1.049098014831543,grad_norm: 0.9999996171257269, iteration: 454621
loss: 0.9800868034362793,grad_norm: 0.6459584253332253, iteration: 454622
loss: 1.0845928192138672,grad_norm: 0.8140028458755231, iteration: 454623
loss: 1.1738444566726685,grad_norm: 0.9999996582245858, iteration: 454624
loss: 1.0224206447601318,grad_norm: 0.826090594390387, iteration: 454625
loss: 1.0189707279205322,grad_norm: 0.8842343045845436, iteration: 454626
loss: 1.026553750038147,grad_norm: 0.9999992728854801, iteration: 454627
loss: 1.009940266609192,grad_norm: 0.7029280966043437, iteration: 454628
loss: 1.0184444189071655,grad_norm: 0.9999991481364667, iteration: 454629
loss: 1.0988523960113525,grad_norm: 0.9999994000869024, iteration: 454630
loss: 0.9716103076934814,grad_norm: 0.775514789842836, iteration: 454631
loss: 1.018149495124817,grad_norm: 0.761165218762514, iteration: 454632
loss: 1.0211493968963623,grad_norm: 0.9529237238994707, iteration: 454633
loss: 1.009585976600647,grad_norm: 0.7670845231635148, iteration: 454634
loss: 1.0841155052185059,grad_norm: 0.754797017572445, iteration: 454635
loss: 1.137955665588379,grad_norm: 0.999999933748229, iteration: 454636
loss: 1.0631932020187378,grad_norm: 0.8066329718978476, iteration: 454637
loss: 1.0428084135055542,grad_norm: 0.8793440028599793, iteration: 454638
loss: 0.9769126176834106,grad_norm: 0.8260662926214243, iteration: 454639
loss: 1.0253117084503174,grad_norm: 0.9375414456556472, iteration: 454640
loss: 1.011178731918335,grad_norm: 0.9673149294274682, iteration: 454641
loss: 1.1398690938949585,grad_norm: 0.9999999139380017, iteration: 454642
loss: 1.0336610078811646,grad_norm: 0.9999994991172738, iteration: 454643
loss: 1.0108627080917358,grad_norm: 0.9999998449055156, iteration: 454644
loss: 1.0359374284744263,grad_norm: 0.7122108786273741, iteration: 454645
loss: 0.9879109263420105,grad_norm: 0.7388336977136117, iteration: 454646
loss: 1.03075110912323,grad_norm: 0.9999998353121957, iteration: 454647
loss: 1.0287818908691406,grad_norm: 0.8842176563911899, iteration: 454648
loss: 1.0214604139328003,grad_norm: 0.9999990839866525, iteration: 454649
loss: 0.9899685382843018,grad_norm: 0.999999488847349, iteration: 454650
loss: 0.9809322357177734,grad_norm: 0.9999992014672204, iteration: 454651
loss: 1.0253328084945679,grad_norm: 0.9663014261734567, iteration: 454652
loss: 1.003470540046692,grad_norm: 0.9999990865936754, iteration: 454653
loss: 1.0310989618301392,grad_norm: 0.8957262128026187, iteration: 454654
loss: 0.9820325970649719,grad_norm: 0.9075622562113796, iteration: 454655
loss: 1.0169388055801392,grad_norm: 0.6785783250102789, iteration: 454656
loss: 1.0148590803146362,grad_norm: 0.9999992059121157, iteration: 454657
loss: 1.0024933815002441,grad_norm: 0.8160069900909496, iteration: 454658
loss: 1.0041407346725464,grad_norm: 0.6904114275223745, iteration: 454659
loss: 1.0566411018371582,grad_norm: 0.9060943947982727, iteration: 454660
loss: 1.0112570524215698,grad_norm: 0.70640324098091, iteration: 454661
loss: 0.9680868983268738,grad_norm: 0.9312603408089106, iteration: 454662
loss: 1.0200321674346924,grad_norm: 0.7271296665830431, iteration: 454663
loss: 0.9624897837638855,grad_norm: 0.7347968309563043, iteration: 454664
loss: 0.9873178601264954,grad_norm: 0.9101284511269587, iteration: 454665
loss: 0.9586580395698547,grad_norm: 0.8613966543255344, iteration: 454666
loss: 1.0671032667160034,grad_norm: 0.8077829962719251, iteration: 454667
loss: 0.9646540284156799,grad_norm: 0.6228503676594515, iteration: 454668
loss: 0.9995781183242798,grad_norm: 0.7100555149900478, iteration: 454669
loss: 1.0076624155044556,grad_norm: 0.8516987575413144, iteration: 454670
loss: 1.0177381038665771,grad_norm: 0.9999989572282253, iteration: 454671
loss: 0.9790611267089844,grad_norm: 0.7996230943763188, iteration: 454672
loss: 0.9909172654151917,grad_norm: 0.718669171774308, iteration: 454673
loss: 1.0045872926712036,grad_norm: 0.8565439123751354, iteration: 454674
loss: 1.0048834085464478,grad_norm: 0.8061480485218961, iteration: 454675
loss: 0.9754551649093628,grad_norm: 0.8976942915284196, iteration: 454676
loss: 1.0887717008590698,grad_norm: 0.8529244556667119, iteration: 454677
loss: 1.0013443231582642,grad_norm: 0.8909946447545403, iteration: 454678
loss: 0.9767019152641296,grad_norm: 0.680787368871114, iteration: 454679
loss: 0.9975899457931519,grad_norm: 0.785159612993511, iteration: 454680
loss: 1.0213505029678345,grad_norm: 0.6502352132893795, iteration: 454681
loss: 0.9995179772377014,grad_norm: 0.7913766209898263, iteration: 454682
loss: 1.013299822807312,grad_norm: 0.8386445309401579, iteration: 454683
loss: 1.028050184249878,grad_norm: 0.6513856451309861, iteration: 454684
loss: 1.0177044868469238,grad_norm: 0.7255760979873656, iteration: 454685
loss: 1.0161159038543701,grad_norm: 0.8009389015895614, iteration: 454686
loss: 1.0540260076522827,grad_norm: 0.8293501157424261, iteration: 454687
loss: 0.9877128005027771,grad_norm: 0.8561091701854712, iteration: 454688
loss: 0.9999872446060181,grad_norm: 0.6914578476668508, iteration: 454689
loss: 1.0110541582107544,grad_norm: 0.8008300288681052, iteration: 454690
loss: 0.9930131435394287,grad_norm: 0.7405854408557152, iteration: 454691
loss: 1.0345823764801025,grad_norm: 0.8572435287868148, iteration: 454692
loss: 0.9995006322860718,grad_norm: 0.999999468403627, iteration: 454693
loss: 0.9816148281097412,grad_norm: 0.6499071513566981, iteration: 454694
loss: 0.9854453802108765,grad_norm: 0.8161896646441432, iteration: 454695
loss: 0.997618556022644,grad_norm: 0.7857242069545111, iteration: 454696
loss: 1.0098286867141724,grad_norm: 0.682049301172721, iteration: 454697
loss: 0.9802204370498657,grad_norm: 0.696940338115768, iteration: 454698
loss: 0.9835560917854309,grad_norm: 0.9580189061935891, iteration: 454699
loss: 1.0332170724868774,grad_norm: 0.9318607331992489, iteration: 454700
loss: 1.0124812126159668,grad_norm: 0.8749584837032617, iteration: 454701
loss: 1.0021106004714966,grad_norm: 0.7246588622756749, iteration: 454702
loss: 1.0057982206344604,grad_norm: 0.7202174243269183, iteration: 454703
loss: 0.9936975836753845,grad_norm: 0.9999998415641213, iteration: 454704
loss: 1.0170658826828003,grad_norm: 0.6901634123844993, iteration: 454705
loss: 0.9868388772010803,grad_norm: 0.7950599575661734, iteration: 454706
loss: 0.9987185001373291,grad_norm: 0.9999998302435693, iteration: 454707
loss: 1.0168297290802002,grad_norm: 0.8061865433665114, iteration: 454708
loss: 1.0308231115341187,grad_norm: 0.9999995427176722, iteration: 454709
loss: 0.9704805612564087,grad_norm: 0.9947808245165783, iteration: 454710
loss: 1.02370285987854,grad_norm: 0.7579499153357946, iteration: 454711
loss: 0.9959404468536377,grad_norm: 0.7322167536035135, iteration: 454712
loss: 0.9977392554283142,grad_norm: 0.8323994364201227, iteration: 454713
loss: 1.031324028968811,grad_norm: 0.7641801675640091, iteration: 454714
loss: 0.9767783284187317,grad_norm: 0.8199132129170035, iteration: 454715
loss: 0.9823404550552368,grad_norm: 0.6624039665211242, iteration: 454716
loss: 1.0095560550689697,grad_norm: 0.6716123987805045, iteration: 454717
loss: 0.9613685011863708,grad_norm: 0.8834776547898062, iteration: 454718
loss: 1.0284740924835205,grad_norm: 0.6857685231239651, iteration: 454719
loss: 1.0089911222457886,grad_norm: 0.868384481500827, iteration: 454720
loss: 1.0105178356170654,grad_norm: 0.7455510685163305, iteration: 454721
loss: 1.1041371822357178,grad_norm: 0.9999996404073126, iteration: 454722
loss: 0.9997928738594055,grad_norm: 0.6796736700212259, iteration: 454723
loss: 0.9993457794189453,grad_norm: 0.86135430679419, iteration: 454724
loss: 1.091630220413208,grad_norm: 0.9999991034116847, iteration: 454725
loss: 1.0217933654785156,grad_norm: 0.6752835436985511, iteration: 454726
loss: 1.015267014503479,grad_norm: 0.6857209639920457, iteration: 454727
loss: 0.9476372599601746,grad_norm: 0.9249651330984366, iteration: 454728
loss: 1.0531541109085083,grad_norm: 0.7778419098145268, iteration: 454729
loss: 1.0119562149047852,grad_norm: 0.780826629534574, iteration: 454730
loss: 1.0072486400604248,grad_norm: 0.8004477357705099, iteration: 454731
loss: 1.0013028383255005,grad_norm: 0.7000213230292597, iteration: 454732
loss: 1.0069198608398438,grad_norm: 0.9351980284520057, iteration: 454733
loss: 1.0061825513839722,grad_norm: 0.9999998664152863, iteration: 454734
loss: 0.9970439672470093,grad_norm: 0.817266330737486, iteration: 454735
loss: 1.076143741607666,grad_norm: 0.9999998774144492, iteration: 454736
loss: 0.9872192144393921,grad_norm: 0.7314773651607973, iteration: 454737
loss: 1.0071974992752075,grad_norm: 0.7914966677364128, iteration: 454738
loss: 1.0036693811416626,grad_norm: 0.999999103605959, iteration: 454739
loss: 1.022694706916809,grad_norm: 0.9565725921575067, iteration: 454740
loss: 1.0375874042510986,grad_norm: 0.8527365820444365, iteration: 454741
loss: 0.9811057448387146,grad_norm: 0.8375290455547684, iteration: 454742
loss: 0.9503116011619568,grad_norm: 0.69387589553527, iteration: 454743
loss: 1.0019502639770508,grad_norm: 0.6461348964398614, iteration: 454744
loss: 1.0226991176605225,grad_norm: 0.846269579444054, iteration: 454745
loss: 0.9518449902534485,grad_norm: 0.9999998539384759, iteration: 454746
loss: 1.0483262538909912,grad_norm: 0.7502964100474988, iteration: 454747
loss: 1.1189934015274048,grad_norm: 0.999999774901965, iteration: 454748
loss: 0.9919303059577942,grad_norm: 0.8691269367316158, iteration: 454749
loss: 1.0417410135269165,grad_norm: 0.9999994401062089, iteration: 454750
loss: 1.0149312019348145,grad_norm: 0.9637627562545533, iteration: 454751
loss: 1.0216699838638306,grad_norm: 0.9555596223158059, iteration: 454752
loss: 0.9989297389984131,grad_norm: 0.8041305902256444, iteration: 454753
loss: 1.001111388206482,grad_norm: 0.7686979208574137, iteration: 454754
loss: 0.9693284630775452,grad_norm: 0.8194898224360665, iteration: 454755
loss: 0.9964917302131653,grad_norm: 0.8431705968076225, iteration: 454756
loss: 0.9713058471679688,grad_norm: 0.6573621649347804, iteration: 454757
loss: 1.0138370990753174,grad_norm: 0.7080107389498439, iteration: 454758
loss: 1.0576635599136353,grad_norm: 0.9060485638849278, iteration: 454759
loss: 1.0299948453903198,grad_norm: 0.7690905822185918, iteration: 454760
loss: 0.9812653064727783,grad_norm: 0.7511035262332048, iteration: 454761
loss: 0.9837172031402588,grad_norm: 0.7649126821538784, iteration: 454762
loss: 1.1523433923721313,grad_norm: 0.9999992368055057, iteration: 454763
loss: 1.0008442401885986,grad_norm: 0.8213636572737848, iteration: 454764
loss: 0.9861050248146057,grad_norm: 0.8301225588859248, iteration: 454765
loss: 1.0278581380844116,grad_norm: 0.9999991992332031, iteration: 454766
loss: 0.9743021130561829,grad_norm: 0.7244283238855971, iteration: 454767
loss: 0.9861025214195251,grad_norm: 0.7173295979462408, iteration: 454768
loss: 0.9856644868850708,grad_norm: 0.7756803317081747, iteration: 454769
loss: 0.9840919971466064,grad_norm: 0.7150794949023338, iteration: 454770
loss: 1.041853666305542,grad_norm: 0.8430640892467642, iteration: 454771
loss: 0.9562545418739319,grad_norm: 0.8773298064165982, iteration: 454772
loss: 1.0160293579101562,grad_norm: 0.6777836198306492, iteration: 454773
loss: 0.97527015209198,grad_norm: 0.8413915640198188, iteration: 454774
loss: 1.0288805961608887,grad_norm: 0.9999990334130775, iteration: 454775
loss: 1.0436536073684692,grad_norm: 0.999999405161151, iteration: 454776
loss: 0.9698553681373596,grad_norm: 0.6567803350961476, iteration: 454777
loss: 1.1191610097885132,grad_norm: 0.9999999018279231, iteration: 454778
loss: 1.024537205696106,grad_norm: 0.6682924438045393, iteration: 454779
loss: 1.0303717851638794,grad_norm: 0.9999989910935769, iteration: 454780
loss: 0.9710381627082825,grad_norm: 0.8447535182362661, iteration: 454781
loss: 0.9739519953727722,grad_norm: 0.7896054717724669, iteration: 454782
loss: 1.0147390365600586,grad_norm: 0.9173412761772033, iteration: 454783
loss: 0.9666088223457336,grad_norm: 0.7335237063450059, iteration: 454784
loss: 0.984123170375824,grad_norm: 0.7970046756154465, iteration: 454785
loss: 0.9730026125907898,grad_norm: 0.7079378915233657, iteration: 454786
loss: 1.0238932371139526,grad_norm: 0.7017507209106814, iteration: 454787
loss: 1.0275806188583374,grad_norm: 0.7874112083924634, iteration: 454788
loss: 1.004789113998413,grad_norm: 0.9999990974344712, iteration: 454789
loss: 0.9676075577735901,grad_norm: 0.677844652769928, iteration: 454790
loss: 1.000739336013794,grad_norm: 0.688128903131431, iteration: 454791
loss: 1.013502836227417,grad_norm: 0.7400432000132054, iteration: 454792
loss: 1.0125677585601807,grad_norm: 0.9727029111048484, iteration: 454793
loss: 1.0000545978546143,grad_norm: 0.7909664780393201, iteration: 454794
loss: 1.0316112041473389,grad_norm: 0.7716792652100247, iteration: 454795
loss: 1.0238821506500244,grad_norm: 0.6764814609358866, iteration: 454796
loss: 1.0401458740234375,grad_norm: 0.9429349762469708, iteration: 454797
loss: 1.0553046464920044,grad_norm: 0.6709955451180336, iteration: 454798
loss: 1.014337182044983,grad_norm: 0.6965104078125278, iteration: 454799
loss: 0.986415445804596,grad_norm: 0.9999992397552945, iteration: 454800
loss: 1.0175811052322388,grad_norm: 0.9257099617542287, iteration: 454801
loss: 0.9834352731704712,grad_norm: 0.7439482341353383, iteration: 454802
loss: 1.0125823020935059,grad_norm: 0.7112380362210918, iteration: 454803
loss: 1.02176034450531,grad_norm: 0.8732718355059053, iteration: 454804
loss: 0.9498322606086731,grad_norm: 0.8692042570986003, iteration: 454805
loss: 1.017020344734192,grad_norm: 0.9399710697883088, iteration: 454806
loss: 0.9998136758804321,grad_norm: 0.8187432824129869, iteration: 454807
loss: 0.9977952837944031,grad_norm: 0.8465067722109835, iteration: 454808
loss: 1.0841033458709717,grad_norm: 0.9999998734490116, iteration: 454809
loss: 0.9979130029678345,grad_norm: 0.7717351506900856, iteration: 454810
loss: 0.9812161326408386,grad_norm: 0.796138837869162, iteration: 454811
loss: 1.0248717069625854,grad_norm: 0.7365718480152288, iteration: 454812
loss: 1.0309360027313232,grad_norm: 0.7853422266040712, iteration: 454813
loss: 1.0106309652328491,grad_norm: 0.80653122903262, iteration: 454814
loss: 0.9680965542793274,grad_norm: 0.8243974802798539, iteration: 454815
loss: 0.9895358681678772,grad_norm: 0.7355479110832353, iteration: 454816
loss: 1.003096103668213,grad_norm: 0.8412427616773193, iteration: 454817
loss: 1.0217363834381104,grad_norm: 0.7816118264994305, iteration: 454818
loss: 1.0014573335647583,grad_norm: 0.8191932081963748, iteration: 454819
loss: 1.0240885019302368,grad_norm: 0.9999996608836148, iteration: 454820
loss: 1.0635566711425781,grad_norm: 0.9999991475581945, iteration: 454821
loss: 0.9959426522254944,grad_norm: 0.6287913131529228, iteration: 454822
loss: 0.9909118413925171,grad_norm: 0.6923595091825757, iteration: 454823
loss: 1.003676176071167,grad_norm: 0.7370931187643402, iteration: 454824
loss: 1.0170539617538452,grad_norm: 0.7618463630257846, iteration: 454825
loss: 0.9747790098190308,grad_norm: 0.7284614209597499, iteration: 454826
loss: 1.0084388256072998,grad_norm: 0.6080419727144725, iteration: 454827
loss: 1.0157021284103394,grad_norm: 0.7279536908663009, iteration: 454828
loss: 0.9698802828788757,grad_norm: 0.8221826686730731, iteration: 454829
loss: 1.0093125104904175,grad_norm: 0.7611911474284501, iteration: 454830
loss: 0.9968526363372803,grad_norm: 0.9999992419980855, iteration: 454831
loss: 1.0137770175933838,grad_norm: 0.8916838728774029, iteration: 454832
loss: 0.9826583862304688,grad_norm: 0.7129809629708639, iteration: 454833
loss: 0.979350209236145,grad_norm: 0.776613958873087, iteration: 454834
loss: 1.0062888860702515,grad_norm: 0.9117947040824571, iteration: 454835
loss: 0.9784896969795227,grad_norm: 0.719691673169786, iteration: 454836
loss: 0.9993865489959717,grad_norm: 0.709840631322716, iteration: 454837
loss: 1.031309962272644,grad_norm: 0.9999991885019114, iteration: 454838
loss: 0.9903748631477356,grad_norm: 0.8731438339516981, iteration: 454839
loss: 1.1100225448608398,grad_norm: 0.9999992020955649, iteration: 454840
loss: 1.0256149768829346,grad_norm: 0.9488507212656739, iteration: 454841
loss: 0.9892706871032715,grad_norm: 0.8016615972541317, iteration: 454842
loss: 1.0068141222000122,grad_norm: 0.9999990864434797, iteration: 454843
loss: 1.011211633682251,grad_norm: 0.708858079386203, iteration: 454844
loss: 0.9938125014305115,grad_norm: 0.6611329152773419, iteration: 454845
loss: 1.0042842626571655,grad_norm: 0.9010076208549559, iteration: 454846
loss: 1.0440646409988403,grad_norm: 0.9613141817412894, iteration: 454847
loss: 1.013335108757019,grad_norm: 0.9999993056934445, iteration: 454848
loss: 1.0749415159225464,grad_norm: 0.9999991687549167, iteration: 454849
loss: 0.9934678673744202,grad_norm: 0.8352395176993519, iteration: 454850
loss: 0.9496118426322937,grad_norm: 0.7377461439671543, iteration: 454851
loss: 0.9960559010505676,grad_norm: 0.9690369536021719, iteration: 454852
loss: 0.9872283935546875,grad_norm: 0.8121576503299021, iteration: 454853
loss: 1.024147868156433,grad_norm: 0.9722970090002817, iteration: 454854
loss: 1.0291335582733154,grad_norm: 0.9999994755746364, iteration: 454855
loss: 1.0477404594421387,grad_norm: 0.9635955629070723, iteration: 454856
loss: 0.9754005670547485,grad_norm: 0.7265633757069923, iteration: 454857
loss: 0.9923796653747559,grad_norm: 0.827376129167231, iteration: 454858
loss: 1.0199638605117798,grad_norm: 0.9056871934305976, iteration: 454859
loss: 0.971405029296875,grad_norm: 0.8950506043444649, iteration: 454860
loss: 0.9773849248886108,grad_norm: 0.8163180225225896, iteration: 454861
loss: 1.0336838960647583,grad_norm: 0.7622630483810705, iteration: 454862
loss: 1.0198452472686768,grad_norm: 0.9999996335495842, iteration: 454863
loss: 0.9920356869697571,grad_norm: 0.672143980636317, iteration: 454864
loss: 1.00657320022583,grad_norm: 0.9528112384657696, iteration: 454865
loss: 1.0110886096954346,grad_norm: 0.8182582287376243, iteration: 454866
loss: 1.001839518547058,grad_norm: 0.6017596356523235, iteration: 454867
loss: 0.9942548871040344,grad_norm: 0.7494184404892926, iteration: 454868
loss: 0.9648603200912476,grad_norm: 0.7622492062333605, iteration: 454869
loss: 1.0115083456039429,grad_norm: 0.7215425003650061, iteration: 454870
loss: 1.0202854871749878,grad_norm: 0.7163440787265021, iteration: 454871
loss: 1.0018261671066284,grad_norm: 0.8038043628341243, iteration: 454872
loss: 0.9482225775718689,grad_norm: 0.9980711413495332, iteration: 454873
loss: 1.027312159538269,grad_norm: 0.7488979415317567, iteration: 454874
loss: 0.9847007393836975,grad_norm: 0.8072312264259118, iteration: 454875
loss: 1.119953989982605,grad_norm: 0.9999993259739602, iteration: 454876
loss: 0.9698514342308044,grad_norm: 0.881993127657325, iteration: 454877
loss: 0.9867888689041138,grad_norm: 0.9999992214337743, iteration: 454878
loss: 1.0006240606307983,grad_norm: 0.7301903410559358, iteration: 454879
loss: 0.9787636995315552,grad_norm: 0.9763733470485955, iteration: 454880
loss: 1.011169195175171,grad_norm: 0.7979886620254235, iteration: 454881
loss: 1.002797245979309,grad_norm: 0.7490862347263957, iteration: 454882
loss: 0.963233232498169,grad_norm: 0.7337320855331755, iteration: 454883
loss: 0.9600266218185425,grad_norm: 0.7519290753458853, iteration: 454884
loss: 0.959568977355957,grad_norm: 0.8225189598803611, iteration: 454885
loss: 1.0094817876815796,grad_norm: 0.7660161320937048, iteration: 454886
loss: 0.944736659526825,grad_norm: 0.7887631596473966, iteration: 454887
loss: 1.0091654062271118,grad_norm: 0.8918533486273076, iteration: 454888
loss: 1.0003900527954102,grad_norm: 0.9817642649471403, iteration: 454889
loss: 0.9701603055000305,grad_norm: 0.7304647641412958, iteration: 454890
loss: 1.0283125638961792,grad_norm: 0.76927763338662, iteration: 454891
loss: 0.9769923686981201,grad_norm: 0.846715541505717, iteration: 454892
loss: 1.001695990562439,grad_norm: 0.703934284742874, iteration: 454893
loss: 0.969654381275177,grad_norm: 0.9927249867977997, iteration: 454894
loss: 0.9987389445304871,grad_norm: 0.8284072547959448, iteration: 454895
loss: 1.0066388845443726,grad_norm: 0.7616501912146729, iteration: 454896
loss: 0.9997758865356445,grad_norm: 0.7140973126385696, iteration: 454897
loss: 1.039997935295105,grad_norm: 0.75006004097337, iteration: 454898
loss: 0.9940802454948425,grad_norm: 0.7891425681865339, iteration: 454899
loss: 0.9756765961647034,grad_norm: 0.8092837492373162, iteration: 454900
loss: 1.003264307975769,grad_norm: 0.7278688697117992, iteration: 454901
loss: 1.0013892650604248,grad_norm: 0.7206281669273871, iteration: 454902
loss: 1.041678547859192,grad_norm: 0.838089623789497, iteration: 454903
loss: 1.0015881061553955,grad_norm: 0.6827241480954606, iteration: 454904
loss: 1.0231575965881348,grad_norm: 0.9999993089468213, iteration: 454905
loss: 0.9679768681526184,grad_norm: 0.7862497353395256, iteration: 454906
loss: 1.071649193763733,grad_norm: 0.712029392227203, iteration: 454907
loss: 0.9897326827049255,grad_norm: 0.6905552728824823, iteration: 454908
loss: 1.0632617473602295,grad_norm: 0.8127528247989115, iteration: 454909
loss: 0.9953391551971436,grad_norm: 0.8310015171708248, iteration: 454910
loss: 0.9694969058036804,grad_norm: 0.6707417438499277, iteration: 454911
loss: 0.9808058142662048,grad_norm: 0.8178440683299213, iteration: 454912
loss: 0.9856252074241638,grad_norm: 0.7531080053899096, iteration: 454913
loss: 1.0108851194381714,grad_norm: 0.7327662348463024, iteration: 454914
loss: 1.0531256198883057,grad_norm: 0.702081588419673, iteration: 454915
loss: 1.0115562677383423,grad_norm: 0.7616714456424442, iteration: 454916
loss: 0.9905205368995667,grad_norm: 0.8584073188552155, iteration: 454917
loss: 0.9653592109680176,grad_norm: 0.8439761590193615, iteration: 454918
loss: 0.9939550161361694,grad_norm: 0.7781267837868682, iteration: 454919
loss: 0.9939097166061401,grad_norm: 0.7545253093003744, iteration: 454920
loss: 1.0021194219589233,grad_norm: 0.6835895495156703, iteration: 454921
loss: 0.9908248782157898,grad_norm: 0.6652898362444588, iteration: 454922
loss: 1.0025964975357056,grad_norm: 0.6983898430981831, iteration: 454923
loss: 0.977841317653656,grad_norm: 0.7174716354543925, iteration: 454924
loss: 1.0441542863845825,grad_norm: 0.9999992233350528, iteration: 454925
loss: 1.019405722618103,grad_norm: 0.79697456844764, iteration: 454926
loss: 1.0168505907058716,grad_norm: 0.8934255573526331, iteration: 454927
loss: 0.9863027930259705,grad_norm: 0.6552784130317565, iteration: 454928
loss: 0.982286274433136,grad_norm: 0.6838087686480494, iteration: 454929
loss: 1.0789670944213867,grad_norm: 0.8571180935579144, iteration: 454930
loss: 0.9962193369865417,grad_norm: 0.848872849967105, iteration: 454931
loss: 1.083192229270935,grad_norm: 0.9999999720562598, iteration: 454932
loss: 0.9971861243247986,grad_norm: 0.9640839103118986, iteration: 454933
loss: 1.0808908939361572,grad_norm: 0.6619031195907992, iteration: 454934
loss: 0.9675314426422119,grad_norm: 0.9136438452852799, iteration: 454935
loss: 1.0460927486419678,grad_norm: 0.9747971042385031, iteration: 454936
loss: 1.0392411947250366,grad_norm: 0.6505785656390214, iteration: 454937
loss: 0.9873015284538269,grad_norm: 0.8630414197860694, iteration: 454938
loss: 1.0795749425888062,grad_norm: 0.8597204057525215, iteration: 454939
loss: 0.985331118106842,grad_norm: 0.8641021590690373, iteration: 454940
loss: 1.008662223815918,grad_norm: 0.7859768351056206, iteration: 454941
loss: 0.9999958872795105,grad_norm: 0.8158818231671582, iteration: 454942
loss: 1.0002433061599731,grad_norm: 0.8411994582659786, iteration: 454943
loss: 0.9899013042449951,grad_norm: 0.6212566794855433, iteration: 454944
loss: 0.9649990200996399,grad_norm: 0.8732256812615135, iteration: 454945
loss: 1.0060421228408813,grad_norm: 0.6994184791763755, iteration: 454946
loss: 1.0002669095993042,grad_norm: 0.9999994074592687, iteration: 454947
loss: 1.0202736854553223,grad_norm: 0.8500960112903921, iteration: 454948
loss: 1.0215250253677368,grad_norm: 0.9018286543838886, iteration: 454949
loss: 0.964626133441925,grad_norm: 0.705458545906906, iteration: 454950
loss: 1.0019891262054443,grad_norm: 0.7209628457797645, iteration: 454951
loss: 0.9382280111312866,grad_norm: 0.6784020087883917, iteration: 454952
loss: 1.0167691707611084,grad_norm: 0.9999991592950518, iteration: 454953
loss: 1.0135478973388672,grad_norm: 0.7780224411914622, iteration: 454954
loss: 1.0131248235702515,grad_norm: 0.7365420707456868, iteration: 454955
loss: 1.0455690622329712,grad_norm: 0.9999994573914309, iteration: 454956
loss: 1.014427900314331,grad_norm: 0.7872856205199761, iteration: 454957
loss: 0.9690677523612976,grad_norm: 0.9113734450837259, iteration: 454958
loss: 1.0191781520843506,grad_norm: 0.8531209521255806, iteration: 454959
loss: 1.0137871503829956,grad_norm: 0.7871518099634577, iteration: 454960
loss: 1.0156787633895874,grad_norm: 0.7187301378290673, iteration: 454961
loss: 0.9732980132102966,grad_norm: 0.9649345381641549, iteration: 454962
loss: 1.01032292842865,grad_norm: 0.9248671935332564, iteration: 454963
loss: 1.0260802507400513,grad_norm: 0.7665362523625806, iteration: 454964
loss: 1.0087902545928955,grad_norm: 0.7332879088351718, iteration: 454965
loss: 1.0022449493408203,grad_norm: 0.9072148822759915, iteration: 454966
loss: 1.0267767906188965,grad_norm: 0.6640959302934341, iteration: 454967
loss: 1.011328935623169,grad_norm: 0.678906353494882, iteration: 454968
loss: 1.0322750806808472,grad_norm: 0.9999990296103225, iteration: 454969
loss: 0.9701212048530579,grad_norm: 0.7687683509886764, iteration: 454970
loss: 0.9935172200202942,grad_norm: 0.7035799001260292, iteration: 454971
loss: 1.0077741146087646,grad_norm: 0.9136166125144041, iteration: 454972
loss: 1.0198805332183838,grad_norm: 0.7406001051725927, iteration: 454973
loss: 1.0092068910598755,grad_norm: 0.723174588904395, iteration: 454974
loss: 0.9710991382598877,grad_norm: 0.7374815316692284, iteration: 454975
loss: 0.9748774170875549,grad_norm: 0.7245362004222825, iteration: 454976
loss: 0.9896603226661682,grad_norm: 0.8733677239738524, iteration: 454977
loss: 1.0330320596694946,grad_norm: 0.6971773020296765, iteration: 454978
loss: 1.0229856967926025,grad_norm: 0.8644181726153081, iteration: 454979
loss: 0.9982084035873413,grad_norm: 0.8575519512079246, iteration: 454980
loss: 0.9777204990386963,grad_norm: 0.9353143220998799, iteration: 454981
loss: 0.9836633801460266,grad_norm: 0.9999994746691385, iteration: 454982
loss: 1.0271047353744507,grad_norm: 0.9999992239638119, iteration: 454983
loss: 0.9512483477592468,grad_norm: 0.833855096256163, iteration: 454984
loss: 1.0190142393112183,grad_norm: 0.7716424160694036, iteration: 454985
loss: 1.0085078477859497,grad_norm: 0.8173493054417896, iteration: 454986
loss: 0.9844439625740051,grad_norm: 0.7408128743501478, iteration: 454987
loss: 0.9977095127105713,grad_norm: 0.8332157696810792, iteration: 454988
loss: 0.9923123717308044,grad_norm: 0.7312790609523302, iteration: 454989
loss: 0.9562447667121887,grad_norm: 0.7594832465356415, iteration: 454990
loss: 1.0090514421463013,grad_norm: 0.6842225251281063, iteration: 454991
loss: 1.0141990184783936,grad_norm: 0.9999993475277261, iteration: 454992
loss: 1.0062512159347534,grad_norm: 0.8467384322318381, iteration: 454993
loss: 1.010154128074646,grad_norm: 0.9999990536592083, iteration: 454994
loss: 0.988064169883728,grad_norm: 0.768598510417807, iteration: 454995
loss: 1.011942982673645,grad_norm: 0.6795571771113749, iteration: 454996
loss: 1.0038400888442993,grad_norm: 0.7583773313437223, iteration: 454997
loss: 0.9893508553504944,grad_norm: 0.7267135890258967, iteration: 454998
loss: 1.0129971504211426,grad_norm: 0.7973195182125318, iteration: 454999
loss: 1.0070019960403442,grad_norm: 0.7820904335073006, iteration: 455000
loss: 1.0050956010818481,grad_norm: 0.7439347778895155, iteration: 455001
loss: 0.9845555424690247,grad_norm: 0.7317828048209222, iteration: 455002
loss: 1.0411245822906494,grad_norm: 0.9059565050523825, iteration: 455003
loss: 0.9969004392623901,grad_norm: 0.6942891655139468, iteration: 455004
loss: 1.0156564712524414,grad_norm: 0.7520890595766105, iteration: 455005
loss: 1.139704942703247,grad_norm: 0.9999993674665755, iteration: 455006
loss: 0.9546723961830139,grad_norm: 0.8776309378695617, iteration: 455007
loss: 0.9549806714057922,grad_norm: 0.8286743160325092, iteration: 455008
loss: 1.0118627548217773,grad_norm: 0.6953836568693512, iteration: 455009
loss: 0.9892334938049316,grad_norm: 0.6588667099647357, iteration: 455010
loss: 1.014384388923645,grad_norm: 0.7376595345774767, iteration: 455011
loss: 1.03257155418396,grad_norm: 0.6695055702669137, iteration: 455012
loss: 0.9929192066192627,grad_norm: 0.8654732002002102, iteration: 455013
loss: 0.9711484313011169,grad_norm: 0.7547257004243441, iteration: 455014
loss: 1.007567286491394,grad_norm: 0.6052940876606078, iteration: 455015
loss: 1.0017787218093872,grad_norm: 0.8876137168107735, iteration: 455016
loss: 1.1143301725387573,grad_norm: 0.9999997869880921, iteration: 455017
loss: 1.000045657157898,grad_norm: 0.8344452010460021, iteration: 455018
loss: 1.025588035583496,grad_norm: 0.915455830639018, iteration: 455019
loss: 0.9754090905189514,grad_norm: 0.7751641910849969, iteration: 455020
loss: 0.9952387809753418,grad_norm: 0.6911083761751738, iteration: 455021
loss: 1.0611474514007568,grad_norm: 0.9999993868257278, iteration: 455022
loss: 1.0128124952316284,grad_norm: 0.7831571770268271, iteration: 455023
loss: 0.9580599069595337,grad_norm: 0.6879442014389173, iteration: 455024
loss: 0.9863439798355103,grad_norm: 0.7430570049369678, iteration: 455025
loss: 1.0103224515914917,grad_norm: 0.9999995332001955, iteration: 455026
loss: 0.9774439334869385,grad_norm: 0.7373127177495155, iteration: 455027
loss: 1.0306525230407715,grad_norm: 0.8657935385906953, iteration: 455028
loss: 1.0124748945236206,grad_norm: 0.9009073703459921, iteration: 455029
loss: 1.0102720260620117,grad_norm: 0.8628455182649762, iteration: 455030
loss: 1.1070117950439453,grad_norm: 0.8521128749465257, iteration: 455031
loss: 0.9753511548042297,grad_norm: 0.7714892453993527, iteration: 455032
loss: 0.9845973253250122,grad_norm: 0.9184984573214006, iteration: 455033
loss: 0.9830485582351685,grad_norm: 0.8503509260560573, iteration: 455034
loss: 0.9751588702201843,grad_norm: 0.957128919172544, iteration: 455035
loss: 1.0939699411392212,grad_norm: 0.7577560623928071, iteration: 455036
loss: 1.0073003768920898,grad_norm: 0.7805331149995445, iteration: 455037
loss: 1.0127863883972168,grad_norm: 0.6591494834072278, iteration: 455038
loss: 0.9774190187454224,grad_norm: 0.6096963144274098, iteration: 455039
loss: 1.0002485513687134,grad_norm: 0.8644225903997398, iteration: 455040
loss: 1.009853720664978,grad_norm: 0.9330549122552236, iteration: 455041
loss: 0.9526633024215698,grad_norm: 0.6634520932231588, iteration: 455042
loss: 0.9895775318145752,grad_norm: 0.6921560108309004, iteration: 455043
loss: 0.9670306444168091,grad_norm: 0.7035130080927462, iteration: 455044
loss: 0.9638731479644775,grad_norm: 0.8046464026055015, iteration: 455045
loss: 0.9748258590698242,grad_norm: 0.7678271443932714, iteration: 455046
loss: 0.9817303419113159,grad_norm: 0.8293528193464708, iteration: 455047
loss: 0.9911519885063171,grad_norm: 0.632574028261791, iteration: 455048
loss: 0.9825460314750671,grad_norm: 0.8601254476053916, iteration: 455049
loss: 1.0107378959655762,grad_norm: 0.9074121925747479, iteration: 455050
loss: 1.0250582695007324,grad_norm: 0.7814755654545348, iteration: 455051
loss: 0.9986571669578552,grad_norm: 0.6939786082306257, iteration: 455052
loss: 0.9779409170150757,grad_norm: 0.8292271305763232, iteration: 455053
loss: 0.9529288411140442,grad_norm: 0.8457933691193794, iteration: 455054
loss: 1.0019265413284302,grad_norm: 0.7736220578111113, iteration: 455055
loss: 1.0057029724121094,grad_norm: 0.6363685025759712, iteration: 455056
loss: 0.9895710945129395,grad_norm: 0.7751342119666219, iteration: 455057
loss: 1.028814673423767,grad_norm: 0.772571275947721, iteration: 455058
loss: 1.0219861268997192,grad_norm: 0.9204311918914564, iteration: 455059
loss: 0.9714428186416626,grad_norm: 0.7347187969726023, iteration: 455060
loss: 1.0179134607315063,grad_norm: 0.8056532515996694, iteration: 455061
loss: 0.9778557419776917,grad_norm: 0.7165863626553881, iteration: 455062
loss: 1.038373351097107,grad_norm: 0.6734776315244284, iteration: 455063
loss: 1.0301735401153564,grad_norm: 0.9999992341856274, iteration: 455064
loss: 0.9954795241355896,grad_norm: 0.8748615883750782, iteration: 455065
loss: 0.9882966876029968,grad_norm: 0.7927613616069928, iteration: 455066
loss: 1.0044283866882324,grad_norm: 0.6744642036801936, iteration: 455067
loss: 1.0209088325500488,grad_norm: 0.7369427868162427, iteration: 455068
loss: 0.9903854131698608,grad_norm: 0.869744007119955, iteration: 455069
loss: 0.9840843677520752,grad_norm: 0.7299154247149143, iteration: 455070
loss: 0.98936927318573,grad_norm: 0.7284826949689357, iteration: 455071
loss: 0.9959548115730286,grad_norm: 0.999999117707461, iteration: 455072
loss: 0.9922674298286438,grad_norm: 0.9999993488731365, iteration: 455073
loss: 1.0908757448196411,grad_norm: 0.9160586449163675, iteration: 455074
loss: 1.0092166662216187,grad_norm: 0.884859286504924, iteration: 455075
loss: 1.0257467031478882,grad_norm: 0.8425449278601541, iteration: 455076
loss: 0.9778971076011658,grad_norm: 0.9726396721448326, iteration: 455077
loss: 0.9575382471084595,grad_norm: 0.7104145106692341, iteration: 455078
loss: 0.9740148186683655,grad_norm: 0.649979829485213, iteration: 455079
loss: 1.0208736658096313,grad_norm: 0.9999995880898495, iteration: 455080
loss: 0.9839758276939392,grad_norm: 0.8439880698201995, iteration: 455081
loss: 0.9706143140792847,grad_norm: 0.6949446627453983, iteration: 455082
loss: 0.9960967302322388,grad_norm: 0.9873635850319126, iteration: 455083
loss: 1.0178191661834717,grad_norm: 0.7038403549616351, iteration: 455084
loss: 1.0615698099136353,grad_norm: 0.9853466819762017, iteration: 455085
loss: 0.9787418842315674,grad_norm: 0.8771339320181357, iteration: 455086
loss: 1.0310578346252441,grad_norm: 0.6798879056872399, iteration: 455087
loss: 1.0066043138504028,grad_norm: 0.7465917768040269, iteration: 455088
loss: 0.9937763214111328,grad_norm: 0.7253640955998191, iteration: 455089
loss: 0.992405354976654,grad_norm: 0.8607386740234282, iteration: 455090
loss: 0.9703158736228943,grad_norm: 0.9906521309635027, iteration: 455091
loss: 1.0125283002853394,grad_norm: 0.8316676659885816, iteration: 455092
loss: 0.9982048273086548,grad_norm: 0.7091924279881111, iteration: 455093
loss: 0.9871068000793457,grad_norm: 0.7177503263913259, iteration: 455094
loss: 0.9919024109840393,grad_norm: 0.8096153309989036, iteration: 455095
loss: 1.0013346672058105,grad_norm: 0.7729971212662718, iteration: 455096
loss: 1.008055329322815,grad_norm: 0.6646025660081551, iteration: 455097
loss: 1.0396592617034912,grad_norm: 0.9999997159255088, iteration: 455098
loss: 1.0073559284210205,grad_norm: 0.7816910335983313, iteration: 455099
loss: 0.9815349578857422,grad_norm: 0.9999995162820295, iteration: 455100
loss: 1.0177778005599976,grad_norm: 0.7682056591144024, iteration: 455101
loss: 0.9823442101478577,grad_norm: 0.9999991394850457, iteration: 455102
loss: 0.9939321279525757,grad_norm: 0.812030455508869, iteration: 455103
loss: 0.9804555773735046,grad_norm: 0.8370260355040038, iteration: 455104
loss: 1.056043267250061,grad_norm: 0.7314970293987105, iteration: 455105
loss: 0.9906068444252014,grad_norm: 0.7148937568432846, iteration: 455106
loss: 1.0124582052230835,grad_norm: 0.7405554643452377, iteration: 455107
loss: 1.0256918668746948,grad_norm: 0.7865455354282387, iteration: 455108
loss: 1.0305962562561035,grad_norm: 0.9999989716768154, iteration: 455109
loss: 0.9979103207588196,grad_norm: 0.9587794472068274, iteration: 455110
loss: 0.9781883358955383,grad_norm: 0.7487430005588428, iteration: 455111
loss: 0.9594002962112427,grad_norm: 0.8835415255911921, iteration: 455112
loss: 0.9902735352516174,grad_norm: 0.8084594200666232, iteration: 455113
loss: 0.9462828636169434,grad_norm: 0.567640890340705, iteration: 455114
loss: 1.0188498497009277,grad_norm: 0.723997614076539, iteration: 455115
loss: 0.9822608232498169,grad_norm: 0.6910526768210457, iteration: 455116
loss: 0.9765862822532654,grad_norm: 0.9999990632074237, iteration: 455117
loss: 0.9913163781166077,grad_norm: 0.9574410492862305, iteration: 455118
loss: 0.9645774960517883,grad_norm: 0.7582819586277278, iteration: 455119
loss: 0.9934180974960327,grad_norm: 0.837492042989556, iteration: 455120
loss: 1.0354101657867432,grad_norm: 0.8419976340929763, iteration: 455121
loss: 0.9992766976356506,grad_norm: 0.6755018445490699, iteration: 455122
loss: 0.9771543145179749,grad_norm: 0.737923756324611, iteration: 455123
loss: 0.9698544144630432,grad_norm: 0.7661580728377356, iteration: 455124
loss: 1.0042647123336792,grad_norm: 0.5745244273654848, iteration: 455125
loss: 0.9591887593269348,grad_norm: 0.6945289943589862, iteration: 455126
loss: 1.0026413202285767,grad_norm: 0.8589973884967484, iteration: 455127
loss: 0.9490275382995605,grad_norm: 0.828918175503041, iteration: 455128
loss: 1.0788966417312622,grad_norm: 0.947638014126731, iteration: 455129
loss: 0.9930309653282166,grad_norm: 0.70646504756997, iteration: 455130
loss: 0.9806957840919495,grad_norm: 0.8370901736711368, iteration: 455131
loss: 0.9916893243789673,grad_norm: 0.8047153525527341, iteration: 455132
loss: 1.0145595073699951,grad_norm: 0.7500247222544868, iteration: 455133
loss: 1.0310665369033813,grad_norm: 0.8603141732712274, iteration: 455134
loss: 0.9967277646064758,grad_norm: 0.7225503945502723, iteration: 455135
loss: 0.9875718951225281,grad_norm: 0.7321205492839589, iteration: 455136
loss: 0.988449215888977,grad_norm: 0.7980092695158569, iteration: 455137
loss: 1.0129183530807495,grad_norm: 0.8142768236486463, iteration: 455138
loss: 0.985896646976471,grad_norm: 0.8424363666603875, iteration: 455139
loss: 1.031697154045105,grad_norm: 0.6810710096100197, iteration: 455140
loss: 0.9873703122138977,grad_norm: 0.7138758341372401, iteration: 455141
loss: 1.0482748746871948,grad_norm: 0.999999045226715, iteration: 455142
loss: 0.9830930829048157,grad_norm: 0.7810529783165986, iteration: 455143
loss: 0.9746718406677246,grad_norm: 0.9613633603631805, iteration: 455144
loss: 0.9728203415870667,grad_norm: 0.8907668029247326, iteration: 455145
loss: 0.9829321503639221,grad_norm: 0.9005111100188856, iteration: 455146
loss: 0.9612941741943359,grad_norm: 0.7078449031079239, iteration: 455147
loss: 1.0198229551315308,grad_norm: 0.9041693369512388, iteration: 455148
loss: 1.0796079635620117,grad_norm: 0.9999994491999464, iteration: 455149
loss: 1.0063477754592896,grad_norm: 0.6531983842391841, iteration: 455150
loss: 1.0005524158477783,grad_norm: 0.6655512850160473, iteration: 455151
loss: 0.9944518208503723,grad_norm: 0.6308487390440333, iteration: 455152
loss: 0.9961318969726562,grad_norm: 0.5992612511824672, iteration: 455153
loss: 1.0283730030059814,grad_norm: 0.8372645185870641, iteration: 455154
loss: 0.9446425437927246,grad_norm: 0.8226761300935985, iteration: 455155
loss: 0.97034752368927,grad_norm: 0.8658035300967498, iteration: 455156
loss: 0.98423832654953,grad_norm: 0.7770790030059458, iteration: 455157
loss: 1.0405948162078857,grad_norm: 0.7574766188648396, iteration: 455158
loss: 1.0041710138320923,grad_norm: 0.9110027527196499, iteration: 455159
loss: 0.9779441356658936,grad_norm: 0.850454926035319, iteration: 455160
loss: 0.9841412901878357,grad_norm: 0.8144935725073489, iteration: 455161
loss: 0.9751331210136414,grad_norm: 0.7059127382651761, iteration: 455162
loss: 1.0079814195632935,grad_norm: 0.7386919758033479, iteration: 455163
loss: 0.9691389799118042,grad_norm: 0.7511061289972798, iteration: 455164
loss: 0.9971903562545776,grad_norm: 0.7515720605343704, iteration: 455165
loss: 0.9874534606933594,grad_norm: 0.9999993709439274, iteration: 455166
loss: 1.0001221895217896,grad_norm: 0.7532199600659109, iteration: 455167
loss: 1.0505191087722778,grad_norm: 0.7321629902789641, iteration: 455168
loss: 0.9805247783660889,grad_norm: 0.9636763745383579, iteration: 455169
loss: 0.9880437254905701,grad_norm: 0.8286153223358451, iteration: 455170
loss: 0.9942091107368469,grad_norm: 0.7333025624877192, iteration: 455171
loss: 1.0583312511444092,grad_norm: 0.9999992773880418, iteration: 455172
loss: 1.00751531124115,grad_norm: 0.8234281864545951, iteration: 455173
loss: 1.0059152841567993,grad_norm: 0.8045391775067648, iteration: 455174
loss: 0.9996742606163025,grad_norm: 0.7886205325875765, iteration: 455175
loss: 1.0158123970031738,grad_norm: 0.7920160477400597, iteration: 455176
loss: 0.9864134788513184,grad_norm: 0.6739423786424713, iteration: 455177
loss: 1.0393848419189453,grad_norm: 0.8170662225090074, iteration: 455178
loss: 1.0187402963638306,grad_norm: 0.7702536233750902, iteration: 455179
loss: 0.9560440182685852,grad_norm: 0.8561262286960025, iteration: 455180
loss: 0.9949554204940796,grad_norm: 0.7278409313193634, iteration: 455181
loss: 1.0002014636993408,grad_norm: 0.7487565214700908, iteration: 455182
loss: 1.0349866151809692,grad_norm: 0.6522656092706325, iteration: 455183
loss: 0.9637943506240845,grad_norm: 0.8610085452734929, iteration: 455184
loss: 1.0068877935409546,grad_norm: 0.739033991707565, iteration: 455185
loss: 0.9821100234985352,grad_norm: 0.791450959205069, iteration: 455186
loss: 1.0108201503753662,grad_norm: 0.8578938322255568, iteration: 455187
loss: 1.0107293128967285,grad_norm: 0.781859928052123, iteration: 455188
loss: 1.0197957754135132,grad_norm: 0.6580390901431492, iteration: 455189
loss: 0.9787698984146118,grad_norm: 0.8012950165865431, iteration: 455190
loss: 1.0062870979309082,grad_norm: 0.690415031893656, iteration: 455191
loss: 1.014192819595337,grad_norm: 0.7121011901016392, iteration: 455192
loss: 0.9615654349327087,grad_norm: 0.6247991053001065, iteration: 455193
loss: 1.0097123384475708,grad_norm: 0.7025039566586242, iteration: 455194
loss: 0.9994516372680664,grad_norm: 0.8723593973121019, iteration: 455195
loss: 0.9705381393432617,grad_norm: 0.7450906312670125, iteration: 455196
loss: 0.9867181181907654,grad_norm: 0.8499946245061422, iteration: 455197
loss: 0.9871944785118103,grad_norm: 0.8447801348777209, iteration: 455198
loss: 1.0181554555892944,grad_norm: 0.6499578435998393, iteration: 455199
loss: 0.9951215982437134,grad_norm: 0.9079571000037525, iteration: 455200
loss: 1.1199232339859009,grad_norm: 0.9999992908965928, iteration: 455201
loss: 0.9579139947891235,grad_norm: 0.623043027462277, iteration: 455202
loss: 1.018868088722229,grad_norm: 0.7300156942912274, iteration: 455203
loss: 0.9757755994796753,grad_norm: 0.6890343338972775, iteration: 455204
loss: 1.0050324201583862,grad_norm: 0.6981888444655646, iteration: 455205
loss: 1.0428334474563599,grad_norm: 0.742558311869524, iteration: 455206
loss: 0.9982272386550903,grad_norm: 0.7247184824113061, iteration: 455207
loss: 1.0642940998077393,grad_norm: 0.9991325578541687, iteration: 455208
loss: 0.9817311763763428,grad_norm: 0.7939469930703521, iteration: 455209
loss: 0.9947289228439331,grad_norm: 0.7538946803042229, iteration: 455210
loss: 1.0046725273132324,grad_norm: 0.6890297524296317, iteration: 455211
loss: 1.0309197902679443,grad_norm: 0.836277329032757, iteration: 455212
loss: 1.0360292196273804,grad_norm: 0.9999992200903464, iteration: 455213
loss: 1.0223164558410645,grad_norm: 0.6685184149802468, iteration: 455214
loss: 1.0287827253341675,grad_norm: 0.8536408162377216, iteration: 455215
loss: 0.9587342739105225,grad_norm: 0.7586786778697302, iteration: 455216
loss: 1.0044033527374268,grad_norm: 0.7942264960719462, iteration: 455217
loss: 1.007668137550354,grad_norm: 0.8730482603459259, iteration: 455218
loss: 0.967018187046051,grad_norm: 0.6464722520481972, iteration: 455219
loss: 1.0070232152938843,grad_norm: 0.765995732845642, iteration: 455220
loss: 1.0102871656417847,grad_norm: 0.7642681319175904, iteration: 455221
loss: 0.9903791546821594,grad_norm: 0.7003960102212079, iteration: 455222
loss: 1.0251853466033936,grad_norm: 0.8426862907287768, iteration: 455223
loss: 0.9850412607192993,grad_norm: 0.7740105426819401, iteration: 455224
loss: 1.0137149095535278,grad_norm: 0.9145485924009327, iteration: 455225
loss: 1.0192939043045044,grad_norm: 0.6974979766672484, iteration: 455226
loss: 1.0416134595870972,grad_norm: 0.7925191419939415, iteration: 455227
loss: 1.1240543127059937,grad_norm: 0.9468725209223245, iteration: 455228
loss: 1.0106480121612549,grad_norm: 0.8762588668248353, iteration: 455229
loss: 1.0042399168014526,grad_norm: 0.7233896774869653, iteration: 455230
loss: 0.9749268889427185,grad_norm: 0.6471022256723713, iteration: 455231
loss: 1.017892599105835,grad_norm: 0.8071336258835012, iteration: 455232
loss: 0.9762091040611267,grad_norm: 0.9121803364026085, iteration: 455233
loss: 0.9876866936683655,grad_norm: 0.7655952958489672, iteration: 455234
loss: 0.9804883599281311,grad_norm: 0.6536161745082592, iteration: 455235
loss: 0.9711589217185974,grad_norm: 0.7163418071065102, iteration: 455236
loss: 1.0197992324829102,grad_norm: 0.9999998259226891, iteration: 455237
loss: 1.0062159299850464,grad_norm: 0.8889167383707518, iteration: 455238
loss: 1.0087398290634155,grad_norm: 0.6875000653627026, iteration: 455239
loss: 1.0255098342895508,grad_norm: 0.8310848151668091, iteration: 455240
loss: 0.9913762211799622,grad_norm: 0.8837893892803027, iteration: 455241
loss: 0.9738104939460754,grad_norm: 0.7447328558180449, iteration: 455242
loss: 0.9769411087036133,grad_norm: 0.6648604150203247, iteration: 455243
loss: 0.9955153465270996,grad_norm: 0.7043551983669765, iteration: 455244
loss: 0.9756914377212524,grad_norm: 0.685685010798266, iteration: 455245
loss: 0.994130551815033,grad_norm: 0.8070839627997664, iteration: 455246
loss: 0.9791472554206848,grad_norm: 0.8323324495230754, iteration: 455247
loss: 0.9688816070556641,grad_norm: 0.7735814018571817, iteration: 455248
loss: 1.0173778533935547,grad_norm: 0.7333582808508723, iteration: 455249
loss: 1.003321647644043,grad_norm: 0.7778476046276221, iteration: 455250
loss: 1.0071520805358887,grad_norm: 0.6810932231513405, iteration: 455251
loss: 1.0054246187210083,grad_norm: 0.8026672039470205, iteration: 455252
loss: 0.979106605052948,grad_norm: 0.6307631805811627, iteration: 455253
loss: 0.9616805911064148,grad_norm: 0.7140748723988086, iteration: 455254
loss: 0.9955915212631226,grad_norm: 0.9999994677245185, iteration: 455255
loss: 1.0413802862167358,grad_norm: 0.729057829272399, iteration: 455256
loss: 0.9519556760787964,grad_norm: 0.6447289374271692, iteration: 455257
loss: 1.0319730043411255,grad_norm: 0.6557953880680648, iteration: 455258
loss: 0.9519103169441223,grad_norm: 0.7788529363168065, iteration: 455259
loss: 1.0309536457061768,grad_norm: 0.8083314694820938, iteration: 455260
loss: 1.0411841869354248,grad_norm: 0.7952830039596744, iteration: 455261
loss: 1.0150376558303833,grad_norm: 0.6233305315152285, iteration: 455262
loss: 1.0067209005355835,grad_norm: 0.903892391616265, iteration: 455263
loss: 0.958618700504303,grad_norm: 0.6519980294678805, iteration: 455264
loss: 0.963654100894928,grad_norm: 0.7026627480612682, iteration: 455265
loss: 1.0139216184616089,grad_norm: 0.801552279291616, iteration: 455266
loss: 1.0127662420272827,grad_norm: 0.6480680944540534, iteration: 455267
loss: 0.9445962905883789,grad_norm: 0.9515254635492502, iteration: 455268
loss: 1.044019103050232,grad_norm: 0.7898680392827044, iteration: 455269
loss: 0.9818479418754578,grad_norm: 0.6892692989264797, iteration: 455270
loss: 1.0670812129974365,grad_norm: 0.9630736131031421, iteration: 455271
loss: 0.9655805826187134,grad_norm: 0.6920910930990789, iteration: 455272
loss: 1.0327820777893066,grad_norm: 0.7721540302605446, iteration: 455273
loss: 1.0425137281417847,grad_norm: 0.6508168728957465, iteration: 455274
loss: 0.9934639930725098,grad_norm: 0.8296901892175828, iteration: 455275
loss: 1.012833833694458,grad_norm: 0.9999989307000633, iteration: 455276
loss: 1.003219485282898,grad_norm: 0.8624400569135712, iteration: 455277
loss: 1.023937702178955,grad_norm: 0.7810160868222809, iteration: 455278
loss: 0.9943217039108276,grad_norm: 0.7707871492868331, iteration: 455279
loss: 0.9883004426956177,grad_norm: 0.7380157572300878, iteration: 455280
loss: 1.0564974546432495,grad_norm: 0.8697387804329618, iteration: 455281
loss: 1.005207896232605,grad_norm: 0.7127399630839567, iteration: 455282
loss: 0.9811269640922546,grad_norm: 0.9999990391894438, iteration: 455283
loss: 0.9960145950317383,grad_norm: 0.8869336744840322, iteration: 455284
loss: 0.9782479405403137,grad_norm: 0.9489979624840512, iteration: 455285
loss: 1.0068398714065552,grad_norm: 0.7626950395032475, iteration: 455286
loss: 0.9977902770042419,grad_norm: 0.9999998975938739, iteration: 455287
loss: 0.972579300403595,grad_norm: 0.8183932699565397, iteration: 455288
loss: 1.0341814756393433,grad_norm: 0.9439057900465643, iteration: 455289
loss: 1.0096718072891235,grad_norm: 0.8796356669457024, iteration: 455290
loss: 1.0183531045913696,grad_norm: 0.9999996102182774, iteration: 455291
loss: 0.9772899150848389,grad_norm: 0.7210487906541814, iteration: 455292
loss: 0.989021897315979,grad_norm: 0.7538312218633226, iteration: 455293
loss: 1.0100829601287842,grad_norm: 0.8047081952722841, iteration: 455294
loss: 0.982003390789032,grad_norm: 0.8813011067035517, iteration: 455295
loss: 1.014281988143921,grad_norm: 0.7487392000184895, iteration: 455296
loss: 1.0135647058486938,grad_norm: 0.6911676059897742, iteration: 455297
loss: 1.0262610912322998,grad_norm: 0.8059439763155004, iteration: 455298
loss: 1.0266612768173218,grad_norm: 0.67617779634474, iteration: 455299
loss: 0.9860862493515015,grad_norm: 0.9125528337497224, iteration: 455300
loss: 0.9980698227882385,grad_norm: 0.9999993236761215, iteration: 455301
loss: 0.9683368802070618,grad_norm: 0.7590589519639734, iteration: 455302
loss: 0.99543696641922,grad_norm: 0.8302952712791731, iteration: 455303
loss: 1.042610764503479,grad_norm: 0.7659138450343769, iteration: 455304
loss: 1.009913682937622,grad_norm: 0.7593229156702539, iteration: 455305
loss: 0.9876515865325928,grad_norm: 0.7931051691924165, iteration: 455306
loss: 1.000564455986023,grad_norm: 0.7647529628549754, iteration: 455307
loss: 1.0089106559753418,grad_norm: 0.7603679299062349, iteration: 455308
loss: 0.9932640194892883,grad_norm: 0.761421838657504, iteration: 455309
loss: 0.9874774813652039,grad_norm: 0.7737159194726048, iteration: 455310
loss: 0.9987817406654358,grad_norm: 0.8873987243461035, iteration: 455311
loss: 0.9753605723381042,grad_norm: 0.739217415874997, iteration: 455312
loss: 0.987745463848114,grad_norm: 0.8147383557929364, iteration: 455313
loss: 0.9966232180595398,grad_norm: 0.727363322820616, iteration: 455314
loss: 0.9913094639778137,grad_norm: 0.7730181147683942, iteration: 455315
loss: 0.9783421754837036,grad_norm: 0.7361198589434882, iteration: 455316
loss: 1.024789571762085,grad_norm: 0.8601511902294617, iteration: 455317
loss: 0.9732812643051147,grad_norm: 0.7084479818587902, iteration: 455318
loss: 0.9679526686668396,grad_norm: 0.8955781815822121, iteration: 455319
loss: 1.0063700675964355,grad_norm: 0.8421659457870058, iteration: 455320
loss: 1.0020806789398193,grad_norm: 0.6818303737809032, iteration: 455321
loss: 1.0062335729599,grad_norm: 0.6503375435477808, iteration: 455322
loss: 0.9770846366882324,grad_norm: 0.7795554684092815, iteration: 455323
loss: 0.9885025024414062,grad_norm: 0.7532136290839405, iteration: 455324
loss: 0.9665814638137817,grad_norm: 0.685662108422221, iteration: 455325
loss: 0.987714946269989,grad_norm: 0.8395406565376496, iteration: 455326
loss: 1.0199211835861206,grad_norm: 0.9999999518951855, iteration: 455327
loss: 1.0209823846817017,grad_norm: 0.7374325460744846, iteration: 455328
loss: 0.9840392470359802,grad_norm: 0.7696060218429, iteration: 455329
loss: 1.010118007659912,grad_norm: 0.7362333338421525, iteration: 455330
loss: 1.0118905305862427,grad_norm: 0.7095338466629953, iteration: 455331
loss: 1.0150302648544312,grad_norm: 0.7353495366726281, iteration: 455332
loss: 0.9833297729492188,grad_norm: 0.7307114807636274, iteration: 455333
loss: 0.9880169034004211,grad_norm: 0.8270259949134, iteration: 455334
loss: 1.0061976909637451,grad_norm: 0.7783387101298637, iteration: 455335
loss: 1.0051922798156738,grad_norm: 0.7128793684375805, iteration: 455336
loss: 0.99538254737854,grad_norm: 0.7949239886940166, iteration: 455337
loss: 0.9733876585960388,grad_norm: 0.8929956947365669, iteration: 455338
loss: 0.975888192653656,grad_norm: 0.8399382445680618, iteration: 455339
loss: 1.0092189311981201,grad_norm: 0.8582187644065348, iteration: 455340
loss: 1.032683253288269,grad_norm: 0.8915852470668216, iteration: 455341
loss: 0.9900243878364563,grad_norm: 0.7305173465911551, iteration: 455342
loss: 1.032976508140564,grad_norm: 0.781974110210447, iteration: 455343
loss: 0.9962490797042847,grad_norm: 0.7853439310612222, iteration: 455344
loss: 0.9795030355453491,grad_norm: 0.7280239931792224, iteration: 455345
loss: 0.9886714220046997,grad_norm: 0.6875055554204329, iteration: 455346
loss: 0.9723287224769592,grad_norm: 0.7782412754536158, iteration: 455347
loss: 0.9834721684455872,grad_norm: 0.7196698091691592, iteration: 455348
loss: 1.0279340744018555,grad_norm: 0.7417464458941647, iteration: 455349
loss: 1.0004650354385376,grad_norm: 0.7797854684662386, iteration: 455350
loss: 1.0123441219329834,grad_norm: 0.7391214387044337, iteration: 455351
loss: 1.0007532835006714,grad_norm: 0.7483127111075115, iteration: 455352
loss: 1.0440378189086914,grad_norm: 0.7960980979388337, iteration: 455353
loss: 0.946384072303772,grad_norm: 0.7838771388662749, iteration: 455354
loss: 1.0375540256500244,grad_norm: 0.8432547143354678, iteration: 455355
loss: 0.970317006111145,grad_norm: 0.9081071364129621, iteration: 455356
loss: 1.0443729162216187,grad_norm: 0.6630548879063471, iteration: 455357
loss: 0.9814055562019348,grad_norm: 0.7754665741222532, iteration: 455358
loss: 1.0119565725326538,grad_norm: 0.8055284114789628, iteration: 455359
loss: 0.9876155257225037,grad_norm: 0.7940354581543556, iteration: 455360
loss: 1.024033546447754,grad_norm: 0.9999999798285176, iteration: 455361
loss: 1.0118544101715088,grad_norm: 0.831539199991234, iteration: 455362
loss: 0.9685750603675842,grad_norm: 0.7738044562160061, iteration: 455363
loss: 1.0003767013549805,grad_norm: 0.7446512613203717, iteration: 455364
loss: 0.9989550709724426,grad_norm: 0.7197108178995044, iteration: 455365
loss: 1.021874189376831,grad_norm: 0.9999991448936616, iteration: 455366
loss: 0.9778034687042236,grad_norm: 0.6606112484882837, iteration: 455367
loss: 1.0127898454666138,grad_norm: 0.920937563486363, iteration: 455368
loss: 1.0050612688064575,grad_norm: 0.7649085955610362, iteration: 455369
loss: 0.9784196615219116,grad_norm: 0.7233295361609346, iteration: 455370
loss: 1.02040696144104,grad_norm: 0.712024523675056, iteration: 455371
loss: 1.012674331665039,grad_norm: 0.7017558520083228, iteration: 455372
loss: 0.9899596571922302,grad_norm: 0.7689025538238558, iteration: 455373
loss: 1.0180760622024536,grad_norm: 0.9999990394449901, iteration: 455374
loss: 0.9671708941459656,grad_norm: 0.9251607901580866, iteration: 455375
loss: 1.026324987411499,grad_norm: 0.8424573747490696, iteration: 455376
loss: 1.0377399921417236,grad_norm: 0.6894841016501506, iteration: 455377
loss: 1.022220253944397,grad_norm: 0.857112441812411, iteration: 455378
loss: 1.0255166292190552,grad_norm: 0.8499289710514353, iteration: 455379
loss: 0.9830211400985718,grad_norm: 0.8582983823065623, iteration: 455380
loss: 1.0099116563796997,grad_norm: 0.7706606244114655, iteration: 455381
loss: 1.003811240196228,grad_norm: 0.7297079711022825, iteration: 455382
loss: 0.9886835813522339,grad_norm: 0.8762374300372362, iteration: 455383
loss: 0.992080807685852,grad_norm: 0.8695851452983175, iteration: 455384
loss: 1.0128705501556396,grad_norm: 0.7574807909865793, iteration: 455385
loss: 0.9898101091384888,grad_norm: 0.8119376221091122, iteration: 455386
loss: 1.0517592430114746,grad_norm: 0.9999997028835601, iteration: 455387
loss: 1.0416851043701172,grad_norm: 0.9400847986618189, iteration: 455388
loss: 1.0238069295883179,grad_norm: 0.7210022607620018, iteration: 455389
loss: 0.9914087653160095,grad_norm: 0.8295265672522136, iteration: 455390
loss: 1.0444400310516357,grad_norm: 0.8476610917347945, iteration: 455391
loss: 1.0060410499572754,grad_norm: 0.8361257417847027, iteration: 455392
loss: 1.0279135704040527,grad_norm: 0.8058504805885045, iteration: 455393
loss: 0.9759126901626587,grad_norm: 0.8174216924101969, iteration: 455394
loss: 1.0156598091125488,grad_norm: 0.873038357744154, iteration: 455395
loss: 1.0036866664886475,grad_norm: 0.7485350533306457, iteration: 455396
loss: 0.9914405345916748,grad_norm: 0.6591006893773469, iteration: 455397
loss: 0.9934315085411072,grad_norm: 0.637891530611974, iteration: 455398
loss: 1.0262566804885864,grad_norm: 0.8915172907148717, iteration: 455399
loss: 1.009512186050415,grad_norm: 0.7788185305745202, iteration: 455400
loss: 0.9990673661231995,grad_norm: 0.7408964034531222, iteration: 455401
loss: 0.9686952829360962,grad_norm: 0.9999990915271026, iteration: 455402
loss: 1.0161420106887817,grad_norm: 0.7448380910054382, iteration: 455403
loss: 1.0358861684799194,grad_norm: 0.725104783621708, iteration: 455404
loss: 1.0349476337432861,grad_norm: 0.8000800861635844, iteration: 455405
loss: 1.0046496391296387,grad_norm: 0.9999991147745522, iteration: 455406
loss: 1.0238807201385498,grad_norm: 0.8480609588978321, iteration: 455407
loss: 1.0039376020431519,grad_norm: 0.6084155069023461, iteration: 455408
loss: 0.9960739016532898,grad_norm: 0.927785201080036, iteration: 455409
loss: 1.0055959224700928,grad_norm: 0.7282564363360823, iteration: 455410
loss: 1.0019975900650024,grad_norm: 0.7200361507603575, iteration: 455411
loss: 1.0067964792251587,grad_norm: 0.778545492537871, iteration: 455412
loss: 0.9767343401908875,grad_norm: 0.6201732587662359, iteration: 455413
loss: 0.9896104335784912,grad_norm: 0.7340533513649479, iteration: 455414
loss: 0.9692931175231934,grad_norm: 0.9333899350243804, iteration: 455415
loss: 1.0292346477508545,grad_norm: 0.71851918403839, iteration: 455416
loss: 1.0079923868179321,grad_norm: 0.8131295250404637, iteration: 455417
loss: 1.002806305885315,grad_norm: 0.8282378800408688, iteration: 455418
loss: 0.9970159530639648,grad_norm: 0.9999999200555044, iteration: 455419
loss: 1.0220725536346436,grad_norm: 0.7194940202916846, iteration: 455420
loss: 1.0062178373336792,grad_norm: 0.7904476936272519, iteration: 455421
loss: 1.0039945840835571,grad_norm: 0.7954482961291683, iteration: 455422
loss: 0.9983295202255249,grad_norm: 0.8504785023850117, iteration: 455423
loss: 0.9569358229637146,grad_norm: 0.6553684817511234, iteration: 455424
loss: 0.9666917324066162,grad_norm: 0.6100024142794501, iteration: 455425
loss: 0.9485877752304077,grad_norm: 0.7199770622442772, iteration: 455426
loss: 0.9917778372764587,grad_norm: 0.8023846756273785, iteration: 455427
loss: 1.013272762298584,grad_norm: 0.8890661007673939, iteration: 455428
loss: 1.0141395330429077,grad_norm: 0.817441092309251, iteration: 455429
loss: 0.9873720407485962,grad_norm: 0.7009477270650833, iteration: 455430
loss: 0.995887041091919,grad_norm: 0.673257958573512, iteration: 455431
loss: 0.9842787981033325,grad_norm: 0.9655372844268688, iteration: 455432
loss: 0.9997314810752869,grad_norm: 0.7981661629064856, iteration: 455433
loss: 1.0148069858551025,grad_norm: 0.8479326204727853, iteration: 455434
loss: 1.001625895500183,grad_norm: 0.6871456985376928, iteration: 455435
loss: 1.0116175413131714,grad_norm: 0.6950331664286676, iteration: 455436
loss: 0.9947196841239929,grad_norm: 0.7712887429006078, iteration: 455437
loss: 0.9740118980407715,grad_norm: 0.7229964445795286, iteration: 455438
loss: 1.0040756464004517,grad_norm: 0.6922421080817287, iteration: 455439
loss: 1.0035523176193237,grad_norm: 0.7620291297459267, iteration: 455440
loss: 1.0122710466384888,grad_norm: 0.9999998845850918, iteration: 455441
loss: 0.9790228009223938,grad_norm: 0.7815008222470053, iteration: 455442
loss: 1.0069177150726318,grad_norm: 0.6414487544932339, iteration: 455443
loss: 0.9825151562690735,grad_norm: 0.7079900778638432, iteration: 455444
loss: 0.9677955508232117,grad_norm: 0.84645282313484, iteration: 455445
loss: 0.9900438785552979,grad_norm: 0.7715684208294414, iteration: 455446
loss: 0.975868821144104,grad_norm: 0.7698562802294219, iteration: 455447
loss: 0.9944381713867188,grad_norm: 0.875019933398139, iteration: 455448
loss: 1.0036669969558716,grad_norm: 0.621007850886988, iteration: 455449
loss: 1.0023750066757202,grad_norm: 0.7690767050371163, iteration: 455450
loss: 0.9860894083976746,grad_norm: 0.7381288470947719, iteration: 455451
loss: 0.9948524832725525,grad_norm: 0.7814848484276811, iteration: 455452
loss: 0.9910663962364197,grad_norm: 0.7639719237617926, iteration: 455453
loss: 1.0174270868301392,grad_norm: 0.7213152256264455, iteration: 455454
loss: 1.0086003541946411,grad_norm: 0.8963083026240918, iteration: 455455
loss: 0.9633715152740479,grad_norm: 0.8116954893016999, iteration: 455456
loss: 0.966594398021698,grad_norm: 0.6343949449739243, iteration: 455457
loss: 1.0097862482070923,grad_norm: 0.8497795923567459, iteration: 455458
loss: 0.9840569496154785,grad_norm: 0.7473737554791902, iteration: 455459
loss: 0.9795227646827698,grad_norm: 0.7071492553677751, iteration: 455460
loss: 0.9675501585006714,grad_norm: 0.8440422422564776, iteration: 455461
loss: 1.052004337310791,grad_norm: 0.8597086588940417, iteration: 455462
loss: 0.9782307147979736,grad_norm: 0.8253023053526104, iteration: 455463
loss: 0.9876251220703125,grad_norm: 0.9781403817030451, iteration: 455464
loss: 0.9803804755210876,grad_norm: 0.6367431081733015, iteration: 455465
loss: 1.0104700326919556,grad_norm: 0.9999993817785053, iteration: 455466
loss: 0.9825013279914856,grad_norm: 0.7551611407731282, iteration: 455467
loss: 0.9948040843009949,grad_norm: 0.845613663279556, iteration: 455468
loss: 1.0043741464614868,grad_norm: 0.9999991436823038, iteration: 455469
loss: 0.9571442008018494,grad_norm: 0.7224441183917033, iteration: 455470
loss: 1.0067662000656128,grad_norm: 0.794290644821049, iteration: 455471
loss: 0.9622963070869446,grad_norm: 0.7350733910886377, iteration: 455472
loss: 0.9945969581604004,grad_norm: 0.8198747497901123, iteration: 455473
loss: 0.9958890080451965,grad_norm: 0.6482016152753681, iteration: 455474
loss: 1.00216805934906,grad_norm: 0.6697291670018878, iteration: 455475
loss: 0.993274450302124,grad_norm: 0.9020544954078441, iteration: 455476
loss: 1.0287721157073975,grad_norm: 0.738130913671989, iteration: 455477
loss: 1.0001801252365112,grad_norm: 0.7071876828698355, iteration: 455478
loss: 0.9988386034965515,grad_norm: 0.6843937837469103, iteration: 455479
loss: 1.0193192958831787,grad_norm: 0.7833832819982189, iteration: 455480
loss: 0.9759652614593506,grad_norm: 0.7250306268854814, iteration: 455481
loss: 1.0079060792922974,grad_norm: 0.7353381281012085, iteration: 455482
loss: 1.000113606452942,grad_norm: 0.7277521731483754, iteration: 455483
loss: 1.0332258939743042,grad_norm: 0.7148261288602437, iteration: 455484
loss: 0.9999327063560486,grad_norm: 0.736876011660287, iteration: 455485
loss: 1.0141521692276,grad_norm: 0.796743226135256, iteration: 455486
loss: 1.004154920578003,grad_norm: 0.8448868737656342, iteration: 455487
loss: 0.9728325605392456,grad_norm: 0.6849164643974827, iteration: 455488
loss: 1.0161861181259155,grad_norm: 0.8675241094441164, iteration: 455489
loss: 1.001945972442627,grad_norm: 0.6872267993449003, iteration: 455490
loss: 1.0088427066802979,grad_norm: 0.7132229184472703, iteration: 455491
loss: 1.034159779548645,grad_norm: 0.6702340029221596, iteration: 455492
loss: 0.9825581908226013,grad_norm: 0.7338116731736183, iteration: 455493
loss: 0.997465968132019,grad_norm: 0.7471652516353238, iteration: 455494
loss: 1.0139989852905273,grad_norm: 0.7719868517597338, iteration: 455495
loss: 1.0144814252853394,grad_norm: 0.8151804391378756, iteration: 455496
loss: 0.9581512212753296,grad_norm: 0.7325736913226125, iteration: 455497
loss: 0.984539806842804,grad_norm: 0.9060797345720706, iteration: 455498
loss: 0.9805318117141724,grad_norm: 0.7537403780673037, iteration: 455499
loss: 0.9697410464286804,grad_norm: 0.8636669397377206, iteration: 455500
loss: 0.9766960144042969,grad_norm: 0.6506816200586243, iteration: 455501
loss: 1.011146903038025,grad_norm: 0.7353403112264365, iteration: 455502
loss: 0.9803504347801208,grad_norm: 0.6668690794487806, iteration: 455503
loss: 1.0181093215942383,grad_norm: 0.9999994805834537, iteration: 455504
loss: 1.021401286125183,grad_norm: 0.7936669917590845, iteration: 455505
loss: 0.9880780577659607,grad_norm: 0.7695088452993785, iteration: 455506
loss: 0.9935764670372009,grad_norm: 0.7727994118876134, iteration: 455507
loss: 1.0126827955245972,grad_norm: 0.74012684659744, iteration: 455508
loss: 0.9813866019248962,grad_norm: 0.8438475085114474, iteration: 455509
loss: 1.0074853897094727,grad_norm: 0.7755505832276632, iteration: 455510
loss: 1.0129014253616333,grad_norm: 0.8429086214291444, iteration: 455511
loss: 0.9559939503669739,grad_norm: 0.7682859855972181, iteration: 455512
loss: 0.9885132908821106,grad_norm: 0.7132122820918578, iteration: 455513
loss: 0.9753171801567078,grad_norm: 0.7900498090794603, iteration: 455514
loss: 1.0076849460601807,grad_norm: 0.5834710189344183, iteration: 455515
loss: 1.0058729648590088,grad_norm: 0.7780326639436383, iteration: 455516
loss: 0.9688701033592224,grad_norm: 0.6149617841584631, iteration: 455517
loss: 0.9645304083824158,grad_norm: 0.680896023141322, iteration: 455518
loss: 1.015779972076416,grad_norm: 0.8861441639947311, iteration: 455519
loss: 0.9655448198318481,grad_norm: 0.8172138889012374, iteration: 455520
loss: 0.9945793747901917,grad_norm: 0.8889088132715198, iteration: 455521
loss: 1.0078421831130981,grad_norm: 0.6771847785306632, iteration: 455522
loss: 0.9922882318496704,grad_norm: 0.7446578869493955, iteration: 455523
loss: 1.0078620910644531,grad_norm: 0.7894483791616002, iteration: 455524
loss: 1.0412187576293945,grad_norm: 0.7930533458917151, iteration: 455525
loss: 0.9998633861541748,grad_norm: 0.7571335966295931, iteration: 455526
loss: 1.0280787944793701,grad_norm: 0.9999999470409079, iteration: 455527
loss: 1.0190786123275757,grad_norm: 0.8773304280293247, iteration: 455528
loss: 1.0144381523132324,grad_norm: 0.6590699383220117, iteration: 455529
loss: 1.0043952465057373,grad_norm: 0.7743149474436983, iteration: 455530
loss: 0.9865583181381226,grad_norm: 0.7073317384518026, iteration: 455531
loss: 0.9630905985832214,grad_norm: 0.9574367261193655, iteration: 455532
loss: 1.0165003538131714,grad_norm: 0.8845342690453011, iteration: 455533
loss: 1.0050380229949951,grad_norm: 0.8103931845873796, iteration: 455534
loss: 1.002852439880371,grad_norm: 0.7714435171384428, iteration: 455535
loss: 0.9935888648033142,grad_norm: 0.6943540218875768, iteration: 455536
loss: 0.9841094017028809,grad_norm: 0.8064307021140769, iteration: 455537
loss: 0.9804929494857788,grad_norm: 0.9081802849112957, iteration: 455538
loss: 1.008620262145996,grad_norm: 0.8293548286222542, iteration: 455539
loss: 1.0318708419799805,grad_norm: 0.7741540238142907, iteration: 455540
loss: 1.012308955192566,grad_norm: 0.7262665192687999, iteration: 455541
loss: 0.9868063926696777,grad_norm: 0.8290761172833109, iteration: 455542
loss: 0.9775241017341614,grad_norm: 0.7710443472397721, iteration: 455543
loss: 0.9919232130050659,grad_norm: 0.6684089116785533, iteration: 455544
loss: 1.0037009716033936,grad_norm: 0.9999999379312688, iteration: 455545
loss: 1.0078539848327637,grad_norm: 0.7176578991997608, iteration: 455546
loss: 0.9944453239440918,grad_norm: 0.717665462761363, iteration: 455547
loss: 0.9668805599212646,grad_norm: 0.9470970613372083, iteration: 455548
loss: 0.939243495464325,grad_norm: 0.9657099101337433, iteration: 455549
loss: 0.9811021089553833,grad_norm: 0.705588247573598, iteration: 455550
loss: 1.0004866123199463,grad_norm: 0.716903287039602, iteration: 455551
loss: 1.04072105884552,grad_norm: 0.7325690027580041, iteration: 455552
loss: 0.9884880185127258,grad_norm: 0.736859652330581, iteration: 455553
loss: 0.9713693261146545,grad_norm: 0.7086028226626916, iteration: 455554
loss: 1.0120333433151245,grad_norm: 0.7300117803290886, iteration: 455555
loss: 1.0083314180374146,grad_norm: 0.6977729851989067, iteration: 455556
loss: 0.977581262588501,grad_norm: 0.8525871002740367, iteration: 455557
loss: 0.997991681098938,grad_norm: 0.7416364021479728, iteration: 455558
loss: 1.0038913488388062,grad_norm: 0.7546042432300992, iteration: 455559
loss: 0.9831348657608032,grad_norm: 0.6858870240640215, iteration: 455560
loss: 0.9983122944831848,grad_norm: 0.7957579304475659, iteration: 455561
loss: 1.0002384185791016,grad_norm: 0.8657956251079542, iteration: 455562
loss: 1.0249911546707153,grad_norm: 0.6748727083550512, iteration: 455563
loss: 0.9761126041412354,grad_norm: 0.6862086098370938, iteration: 455564
loss: 1.0129497051239014,grad_norm: 0.7465534740657347, iteration: 455565
loss: 0.9712382555007935,grad_norm: 0.6769595588808536, iteration: 455566
loss: 0.9575003385543823,grad_norm: 0.7049781628431074, iteration: 455567
loss: 0.9705773591995239,grad_norm: 0.8013331518634473, iteration: 455568
loss: 0.9966957569122314,grad_norm: 0.8070598347505639, iteration: 455569
loss: 0.958404541015625,grad_norm: 0.7494742216687545, iteration: 455570
loss: 1.0096982717514038,grad_norm: 0.913861431967425, iteration: 455571
loss: 1.0402107238769531,grad_norm: 0.9999998006104392, iteration: 455572
loss: 1.0211576223373413,grad_norm: 0.7836257184727216, iteration: 455573
loss: 0.9931663870811462,grad_norm: 0.656545621122979, iteration: 455574
loss: 1.02668297290802,grad_norm: 0.8862275375657976, iteration: 455575
loss: 1.0037031173706055,grad_norm: 0.7025674315098387, iteration: 455576
loss: 0.9755237102508545,grad_norm: 0.9999990626028589, iteration: 455577
loss: 1.0546823740005493,grad_norm: 0.9053631818829565, iteration: 455578
loss: 0.9709089398384094,grad_norm: 0.6878327125264672, iteration: 455579
loss: 0.9812730550765991,grad_norm: 0.7679349172099984, iteration: 455580
loss: 0.9918001890182495,grad_norm: 0.9999989592172152, iteration: 455581
loss: 1.0286509990692139,grad_norm: 0.6917729677481917, iteration: 455582
loss: 1.0098097324371338,grad_norm: 0.6969271687012422, iteration: 455583
loss: 1.0367226600646973,grad_norm: 0.9626385363502191, iteration: 455584
loss: 1.000289797782898,grad_norm: 0.9757882622612943, iteration: 455585
loss: 0.9904168248176575,grad_norm: 0.6962763573277054, iteration: 455586
loss: 1.016658067703247,grad_norm: 0.8268806317834794, iteration: 455587
loss: 1.0041707754135132,grad_norm: 0.8189368639860676, iteration: 455588
loss: 0.9797614216804504,grad_norm: 0.9781729681433418, iteration: 455589
loss: 1.0023705959320068,grad_norm: 0.7965376184410059, iteration: 455590
loss: 1.0606629848480225,grad_norm: 0.889678529408865, iteration: 455591
loss: 0.9991999268531799,grad_norm: 0.8322718741467562, iteration: 455592
loss: 0.9875127673149109,grad_norm: 0.7018729040077334, iteration: 455593
loss: 0.9952823519706726,grad_norm: 0.8660467370237204, iteration: 455594
loss: 0.9646100997924805,grad_norm: 0.6254470544200502, iteration: 455595
loss: 1.0398346185684204,grad_norm: 0.8342970702247547, iteration: 455596
loss: 1.0033154487609863,grad_norm: 0.621219648326791, iteration: 455597
loss: 0.9719468951225281,grad_norm: 0.7461482563502937, iteration: 455598
loss: 1.0212006568908691,grad_norm: 0.6615695776094981, iteration: 455599
loss: 1.0225143432617188,grad_norm: 0.7499841554694795, iteration: 455600
loss: 1.0193604230880737,grad_norm: 0.9890088654951095, iteration: 455601
loss: 1.0128675699234009,grad_norm: 0.7543899604799188, iteration: 455602
loss: 0.9907117486000061,grad_norm: 0.920954551571508, iteration: 455603
loss: 0.9658595323562622,grad_norm: 0.7363637013245807, iteration: 455604
loss: 1.0427886247634888,grad_norm: 0.7525897619450909, iteration: 455605
loss: 1.0231456756591797,grad_norm: 0.7386735470093484, iteration: 455606
loss: 0.9970600605010986,grad_norm: 0.6161686801230499, iteration: 455607
loss: 0.9759431481361389,grad_norm: 0.8057151622698647, iteration: 455608
loss: 1.008872151374817,grad_norm: 0.750295309240611, iteration: 455609
loss: 1.0271364450454712,grad_norm: 0.6496332956941209, iteration: 455610
loss: 0.9906269907951355,grad_norm: 0.6177945830566594, iteration: 455611
loss: 0.9609358906745911,grad_norm: 0.6703816318065247, iteration: 455612
loss: 0.9868215322494507,grad_norm: 0.7911078051179332, iteration: 455613
loss: 0.9873712062835693,grad_norm: 0.6913700629633188, iteration: 455614
loss: 0.9988840818405151,grad_norm: 0.7592488897107352, iteration: 455615
loss: 0.9888516068458557,grad_norm: 0.7031544655928477, iteration: 455616
loss: 1.0054737329483032,grad_norm: 0.6767039891611885, iteration: 455617
loss: 1.052972435951233,grad_norm: 0.9999991943401567, iteration: 455618
loss: 0.9653097987174988,grad_norm: 0.7653589224786064, iteration: 455619
loss: 0.9870676398277283,grad_norm: 0.7490221771846038, iteration: 455620
loss: 0.9597722291946411,grad_norm: 0.7572896636728549, iteration: 455621
loss: 0.9507015943527222,grad_norm: 0.8002898946319166, iteration: 455622
loss: 1.0016322135925293,grad_norm: 0.7285072884116186, iteration: 455623
loss: 1.0130892992019653,grad_norm: 0.9026252250759051, iteration: 455624
loss: 0.9965143799781799,grad_norm: 0.6915407867787822, iteration: 455625
loss: 0.9896206259727478,grad_norm: 0.6703436335489854, iteration: 455626
loss: 1.0177589654922485,grad_norm: 0.6845730511150331, iteration: 455627
loss: 1.0110255479812622,grad_norm: 0.6290690805819998, iteration: 455628
loss: 1.0059475898742676,grad_norm: 0.7346105510575002, iteration: 455629
loss: 0.996555507183075,grad_norm: 0.8092010920923252, iteration: 455630
loss: 0.9560797810554504,grad_norm: 0.884983750875155, iteration: 455631
loss: 1.024209976196289,grad_norm: 0.9999998433874271, iteration: 455632
loss: 0.9872549772262573,grad_norm: 0.6361620411633143, iteration: 455633
loss: 1.0253477096557617,grad_norm: 0.961698378073934, iteration: 455634
loss: 1.0036638975143433,grad_norm: 0.7482105923886824, iteration: 455635
loss: 0.995917797088623,grad_norm: 0.9719621479267609, iteration: 455636
loss: 1.0104540586471558,grad_norm: 0.648997552951853, iteration: 455637
loss: 0.9326243996620178,grad_norm: 0.7604586792846015, iteration: 455638
loss: 0.9906285405158997,grad_norm: 0.8249599264225751, iteration: 455639
loss: 1.0052443742752075,grad_norm: 0.8904126106520963, iteration: 455640
loss: 1.0080817937850952,grad_norm: 0.8374798000309198, iteration: 455641
loss: 1.0109710693359375,grad_norm: 0.6778443999163002, iteration: 455642
loss: 1.004248857498169,grad_norm: 0.710507560056743, iteration: 455643
loss: 0.96630859375,grad_norm: 0.7633862591200654, iteration: 455644
loss: 1.0337646007537842,grad_norm: 0.855430960293286, iteration: 455645
loss: 0.9767826199531555,grad_norm: 0.9999990485683808, iteration: 455646
loss: 0.9932335019111633,grad_norm: 0.6910856452034844, iteration: 455647
loss: 0.9799320697784424,grad_norm: 0.7184764349153703, iteration: 455648
loss: 1.0197027921676636,grad_norm: 0.6764789603461969, iteration: 455649
loss: 0.9721677303314209,grad_norm: 0.6721874528564554, iteration: 455650
loss: 0.9968357682228088,grad_norm: 0.6539233639268925, iteration: 455651
loss: 0.9936532378196716,grad_norm: 0.7053856928812829, iteration: 455652
loss: 1.0056827068328857,grad_norm: 0.727891964427018, iteration: 455653
loss: 1.0017167329788208,grad_norm: 0.7653030956150629, iteration: 455654
loss: 1.0245081186294556,grad_norm: 0.883287628857378, iteration: 455655
loss: 0.9774331450462341,grad_norm: 0.9925912202061405, iteration: 455656
loss: 1.0036238431930542,grad_norm: 0.7593304190055853, iteration: 455657
loss: 0.9806787967681885,grad_norm: 0.8768844438568713, iteration: 455658
loss: 0.9818947315216064,grad_norm: 0.6697821099236009, iteration: 455659
loss: 0.9761141538619995,grad_norm: 0.7657468977514095, iteration: 455660
loss: 1.0160095691680908,grad_norm: 0.790872692732381, iteration: 455661
loss: 0.9949857592582703,grad_norm: 0.830131499401329, iteration: 455662
loss: 1.0306247472763062,grad_norm: 0.6664822272246719, iteration: 455663
loss: 1.1131870746612549,grad_norm: 0.9405655255526565, iteration: 455664
loss: 0.9910616278648376,grad_norm: 0.7586553358037343, iteration: 455665
loss: 0.9598686695098877,grad_norm: 0.7626488990756672, iteration: 455666
loss: 0.9801060557365417,grad_norm: 0.8777610007398896, iteration: 455667
loss: 1.0435892343521118,grad_norm: 0.7550889849061174, iteration: 455668
loss: 0.9942021369934082,grad_norm: 0.6349782588543078, iteration: 455669
loss: 0.9654766321182251,grad_norm: 0.7552600566502726, iteration: 455670
loss: 1.0660821199417114,grad_norm: 0.8197784777163037, iteration: 455671
loss: 1.034400463104248,grad_norm: 0.7430342153936447, iteration: 455672
loss: 0.9819493889808655,grad_norm: 0.6493288781891217, iteration: 455673
loss: 1.0294291973114014,grad_norm: 0.9999999371592596, iteration: 455674
loss: 0.9808235168457031,grad_norm: 0.765856605955493, iteration: 455675
loss: 0.998260498046875,grad_norm: 0.7736897671992544, iteration: 455676
loss: 0.9843403697013855,grad_norm: 0.818351692755247, iteration: 455677
loss: 0.9699894785881042,grad_norm: 0.7353971407467744, iteration: 455678
loss: 0.9904046058654785,grad_norm: 0.9999991781785176, iteration: 455679
loss: 0.9937974214553833,grad_norm: 0.971186031858239, iteration: 455680
loss: 1.0643503665924072,grad_norm: 0.9598826499717101, iteration: 455681
loss: 0.9952807426452637,grad_norm: 0.7024246890696357, iteration: 455682
loss: 1.0096274614334106,grad_norm: 0.6676920323156802, iteration: 455683
loss: 0.9650036096572876,grad_norm: 0.7888981219947773, iteration: 455684
loss: 1.0078648328781128,grad_norm: 0.6433042219650972, iteration: 455685
loss: 0.9909244179725647,grad_norm: 0.7656447019205659, iteration: 455686
loss: 1.030329942703247,grad_norm: 0.7333977411561107, iteration: 455687
loss: 0.9907736778259277,grad_norm: 0.7553508935331108, iteration: 455688
loss: 0.9769212603569031,grad_norm: 0.7255953933971209, iteration: 455689
loss: 0.994698703289032,grad_norm: 0.8055964775763942, iteration: 455690
loss: 0.9864194989204407,grad_norm: 0.7616568699977966, iteration: 455691
loss: 0.9995566606521606,grad_norm: 0.7675148691541822, iteration: 455692
loss: 0.9633910655975342,grad_norm: 0.8189398161993156, iteration: 455693
loss: 1.0056387186050415,grad_norm: 0.7975210008702903, iteration: 455694
loss: 0.9812743067741394,grad_norm: 0.7978731013273852, iteration: 455695
loss: 1.0197486877441406,grad_norm: 0.6894170791432985, iteration: 455696
loss: 1.0025967359542847,grad_norm: 0.9036906034216012, iteration: 455697
loss: 1.009039282798767,grad_norm: 0.8372518528899545, iteration: 455698
loss: 1.0284297466278076,grad_norm: 0.8085008674118138, iteration: 455699
loss: 0.9923372864723206,grad_norm: 0.7479300761551382, iteration: 455700
loss: 1.0126001834869385,grad_norm: 0.6990719316382874, iteration: 455701
loss: 0.9971697330474854,grad_norm: 0.7635751830154653, iteration: 455702
loss: 0.9909718632698059,grad_norm: 0.8270445594632947, iteration: 455703
loss: 0.9379176497459412,grad_norm: 0.9999991760032254, iteration: 455704
loss: 1.020979881286621,grad_norm: 0.8361595290722875, iteration: 455705
loss: 1.0036876201629639,grad_norm: 0.6966917253774848, iteration: 455706
loss: 1.0068082809448242,grad_norm: 0.7829450504314805, iteration: 455707
loss: 0.9851770401000977,grad_norm: 0.788688780941537, iteration: 455708
loss: 1.0301998853683472,grad_norm: 0.9650204959205667, iteration: 455709
loss: 1.0039539337158203,grad_norm: 0.9999992652444263, iteration: 455710
loss: 1.006126880645752,grad_norm: 0.9999998909318163, iteration: 455711
loss: 0.9536350965499878,grad_norm: 0.7784201652310697, iteration: 455712
loss: 1.0274475812911987,grad_norm: 0.8118813590715811, iteration: 455713
loss: 1.0218987464904785,grad_norm: 0.84752496721369, iteration: 455714
loss: 1.0131160020828247,grad_norm: 0.7483699860012406, iteration: 455715
loss: 0.9851686358451843,grad_norm: 0.8301500680970392, iteration: 455716
loss: 1.0164724588394165,grad_norm: 0.7436777770227112, iteration: 455717
loss: 0.986165463924408,grad_norm: 0.8658663192908334, iteration: 455718
loss: 0.9777438640594482,grad_norm: 0.7506343454579045, iteration: 455719
loss: 0.9781433939933777,grad_norm: 0.687941968429813, iteration: 455720
loss: 1.0030945539474487,grad_norm: 0.9036056771412794, iteration: 455721
loss: 0.99462890625,grad_norm: 0.9999990218597059, iteration: 455722
loss: 0.9639254808425903,grad_norm: 0.6928862872285119, iteration: 455723
loss: 0.9921041131019592,grad_norm: 0.6513358339298743, iteration: 455724
loss: 0.9970787167549133,grad_norm: 0.9999991213131764, iteration: 455725
loss: 0.9860666990280151,grad_norm: 0.6417370461810373, iteration: 455726
loss: 1.0209951400756836,grad_norm: 0.9719674710867382, iteration: 455727
loss: 1.0062074661254883,grad_norm: 0.7163028514643939, iteration: 455728
loss: 0.9836943745613098,grad_norm: 0.9010691567849652, iteration: 455729
loss: 0.9869673252105713,grad_norm: 0.7872475621175735, iteration: 455730
loss: 1.02977454662323,grad_norm: 0.8141664691813905, iteration: 455731
loss: 1.0025634765625,grad_norm: 0.8261700376844524, iteration: 455732
loss: 1.0092275142669678,grad_norm: 0.8372584353855045, iteration: 455733
loss: 1.0141807794570923,grad_norm: 0.8664544601777393, iteration: 455734
loss: 0.9776898622512817,grad_norm: 0.6695795485226652, iteration: 455735
loss: 1.0392054319381714,grad_norm: 0.742149870120467, iteration: 455736
loss: 0.9385979771614075,grad_norm: 0.9379072826462574, iteration: 455737
loss: 1.042786955833435,grad_norm: 0.8800770298113967, iteration: 455738
loss: 1.0198405981063843,grad_norm: 0.8375846613566048, iteration: 455739
loss: 1.0353926420211792,grad_norm: 0.6213171523267589, iteration: 455740
loss: 1.1013034582138062,grad_norm: 0.999999212003992, iteration: 455741
loss: 0.9521586894989014,grad_norm: 0.7023846062323352, iteration: 455742
loss: 1.031114101409912,grad_norm: 0.673859352348381, iteration: 455743
loss: 0.9806275963783264,grad_norm: 0.7566122958530344, iteration: 455744
loss: 1.0293229818344116,grad_norm: 0.7957227911060721, iteration: 455745
loss: 1.0620226860046387,grad_norm: 0.999999962805766, iteration: 455746
loss: 1.0286320447921753,grad_norm: 0.7273506681327653, iteration: 455747
loss: 1.0270227193832397,grad_norm: 0.7302744721026172, iteration: 455748
loss: 1.0125974416732788,grad_norm: 0.8050333569949218, iteration: 455749
loss: 0.9989812970161438,grad_norm: 0.6918295864294547, iteration: 455750
loss: 0.9818007946014404,grad_norm: 0.6962283912273416, iteration: 455751
loss: 0.9872735142707825,grad_norm: 0.8991864139361783, iteration: 455752
loss: 1.005653738975525,grad_norm: 0.7212530284094503, iteration: 455753
loss: 0.9704834222793579,grad_norm: 0.6947276181169004, iteration: 455754
loss: 1.011839509010315,grad_norm: 0.7835661529106955, iteration: 455755
loss: 1.0078577995300293,grad_norm: 0.6590602067684583, iteration: 455756
loss: 1.0437932014465332,grad_norm: 0.8100745280618522, iteration: 455757
loss: 1.0056062936782837,grad_norm: 0.7482955722242328, iteration: 455758
loss: 0.9788069725036621,grad_norm: 0.6945951575583185, iteration: 455759
loss: 0.9691804051399231,grad_norm: 0.9999991926140492, iteration: 455760
loss: 0.9851222038269043,grad_norm: 0.8091683317284734, iteration: 455761
loss: 1.0051645040512085,grad_norm: 0.9507065146925445, iteration: 455762
loss: 0.972533643245697,grad_norm: 0.7898586095160914, iteration: 455763
loss: 0.9975380301475525,grad_norm: 0.6988888656167936, iteration: 455764
loss: 1.0114684104919434,grad_norm: 0.6965328126080542, iteration: 455765
loss: 1.0388904809951782,grad_norm: 0.8652197178217722, iteration: 455766
loss: 0.9924742579460144,grad_norm: 0.7338901338231589, iteration: 455767
loss: 1.0030063390731812,grad_norm: 0.7644045813003448, iteration: 455768
loss: 0.9820641875267029,grad_norm: 0.7708614258668333, iteration: 455769
loss: 1.0316160917282104,grad_norm: 0.7953915407113583, iteration: 455770
loss: 0.9622219800949097,grad_norm: 0.6633104779226014, iteration: 455771
loss: 0.9924244284629822,grad_norm: 0.9235893928430101, iteration: 455772
loss: 1.0113457441329956,grad_norm: 0.9612801423707876, iteration: 455773
loss: 1.001306414604187,grad_norm: 0.8102345651685374, iteration: 455774
loss: 1.0183944702148438,grad_norm: 0.6953884052771722, iteration: 455775
loss: 1.0402891635894775,grad_norm: 0.701123520816704, iteration: 455776
loss: 1.018682599067688,grad_norm: 0.6881480544425783, iteration: 455777
loss: 0.9939443469047546,grad_norm: 0.7097621036553772, iteration: 455778
loss: 1.007619023323059,grad_norm: 0.8556073745581616, iteration: 455779
loss: 0.9836065173149109,grad_norm: 0.7732746340912402, iteration: 455780
loss: 1.0221246480941772,grad_norm: 0.8862810844360496, iteration: 455781
loss: 1.035710096359253,grad_norm: 0.8890639586745562, iteration: 455782
loss: 0.9772477746009827,grad_norm: 0.7828223975085058, iteration: 455783
loss: 0.9921278953552246,grad_norm: 0.6411785638554182, iteration: 455784
loss: 0.9834203124046326,grad_norm: 0.8739765878356659, iteration: 455785
loss: 0.987925112247467,grad_norm: 0.6849380823376875, iteration: 455786
loss: 1.0302867889404297,grad_norm: 0.7230334643459806, iteration: 455787
loss: 1.0209356546401978,grad_norm: 0.7965866544695115, iteration: 455788
loss: 1.0093549489974976,grad_norm: 0.6342243377612076, iteration: 455789
loss: 0.9727405309677124,grad_norm: 0.6910177112303176, iteration: 455790
loss: 1.0032895803451538,grad_norm: 0.7871157238933392, iteration: 455791
loss: 0.9968279600143433,grad_norm: 0.7839497836544161, iteration: 455792
loss: 0.9963327646255493,grad_norm: 0.8377915813020598, iteration: 455793
loss: 0.9834921360015869,grad_norm: 0.9380111138259223, iteration: 455794
loss: 1.0327601432800293,grad_norm: 0.8698421235329848, iteration: 455795
loss: 0.9802753925323486,grad_norm: 0.8106260292794237, iteration: 455796
loss: 0.9677843451499939,grad_norm: 0.6467919649659603, iteration: 455797
loss: 0.9890011548995972,grad_norm: 0.7711668776779065, iteration: 455798
loss: 1.0039637088775635,grad_norm: 0.8642982098399747, iteration: 455799
loss: 0.9693346619606018,grad_norm: 0.6669724196391273, iteration: 455800
loss: 0.9970226883888245,grad_norm: 0.7238175860047938, iteration: 455801
loss: 1.0106656551361084,grad_norm: 0.6389117277953299, iteration: 455802
loss: 0.996705174446106,grad_norm: 0.7173378576259274, iteration: 455803
loss: 0.9906824827194214,grad_norm: 0.7297276391413839, iteration: 455804
loss: 1.0027178525924683,grad_norm: 0.6344450205502314, iteration: 455805
loss: 1.0591727495193481,grad_norm: 0.9128459636540673, iteration: 455806
loss: 1.0013841390609741,grad_norm: 0.6891371276309011, iteration: 455807
loss: 1.0131752490997314,grad_norm: 0.81964889661258, iteration: 455808
loss: 1.0061805248260498,grad_norm: 0.6167067039118957, iteration: 455809
loss: 0.9983155727386475,grad_norm: 0.7474455101242843, iteration: 455810
loss: 1.0080268383026123,grad_norm: 0.8142016974988122, iteration: 455811
loss: 0.9961562156677246,grad_norm: 0.7869614503387244, iteration: 455812
loss: 1.026903510093689,grad_norm: 0.8464425581435109, iteration: 455813
loss: 0.9803242683410645,grad_norm: 0.7193951182199569, iteration: 455814
loss: 0.9419422745704651,grad_norm: 0.7285021368045024, iteration: 455815
loss: 1.0201244354248047,grad_norm: 0.8256766075789347, iteration: 455816
loss: 1.0025118589401245,grad_norm: 0.835383245106064, iteration: 455817
loss: 1.0007792711257935,grad_norm: 0.7736024476867486, iteration: 455818
loss: 1.0148344039916992,grad_norm: 0.8292440386883717, iteration: 455819
loss: 0.9773550033569336,grad_norm: 0.8378937388848297, iteration: 455820
loss: 1.0159127712249756,grad_norm: 0.7139581777053924, iteration: 455821
loss: 1.036867618560791,grad_norm: 0.6492521334172991, iteration: 455822
loss: 0.9717183113098145,grad_norm: 0.9999991341925344, iteration: 455823
loss: 1.0039775371551514,grad_norm: 0.6890831609709183, iteration: 455824
loss: 1.027125358581543,grad_norm: 0.6750695044725384, iteration: 455825
loss: 0.9933608770370483,grad_norm: 0.8849228690654712, iteration: 455826
loss: 0.9953948259353638,grad_norm: 0.9492931704788125, iteration: 455827
loss: 0.9666424989700317,grad_norm: 0.9999991451867906, iteration: 455828
loss: 0.9600623250007629,grad_norm: 0.7366430353106717, iteration: 455829
loss: 0.9914062023162842,grad_norm: 0.6787203215040545, iteration: 455830
loss: 1.0189213752746582,grad_norm: 0.916319750726369, iteration: 455831
loss: 0.9850049614906311,grad_norm: 0.8045644293772548, iteration: 455832
loss: 1.0004700422286987,grad_norm: 0.7289263852494543, iteration: 455833
loss: 0.9793358445167542,grad_norm: 0.6291787170820404, iteration: 455834
loss: 1.026848554611206,grad_norm: 0.6868914621310439, iteration: 455835
loss: 0.9992380142211914,grad_norm: 0.6540694784698006, iteration: 455836
loss: 1.0027602910995483,grad_norm: 0.6993791734751597, iteration: 455837
loss: 0.998497724533081,grad_norm: 0.6806566492135641, iteration: 455838
loss: 0.9756489396095276,grad_norm: 0.827711071034937, iteration: 455839
loss: 0.9746049642562866,grad_norm: 0.8340534782110958, iteration: 455840
loss: 0.9976453185081482,grad_norm: 0.8339442319811025, iteration: 455841
loss: 1.0005073547363281,grad_norm: 0.7675994378868289, iteration: 455842
loss: 0.9932374954223633,grad_norm: 0.8078038490880141, iteration: 455843
loss: 0.9991521239280701,grad_norm: 0.8454247964575898, iteration: 455844
loss: 1.010888695716858,grad_norm: 0.7586733960741496, iteration: 455845
loss: 0.9969264268875122,grad_norm: 0.7109733892587735, iteration: 455846
loss: 1.0127077102661133,grad_norm: 0.7724035678482968, iteration: 455847
loss: 0.972661554813385,grad_norm: 0.9427489782101297, iteration: 455848
loss: 1.0117707252502441,grad_norm: 0.9602968077511908, iteration: 455849
loss: 0.9784792065620422,grad_norm: 0.8413581174110555, iteration: 455850
loss: 1.0003849267959595,grad_norm: 0.6696940068467483, iteration: 455851
loss: 1.016117811203003,grad_norm: 0.7957271614151865, iteration: 455852
loss: 1.003862738609314,grad_norm: 0.8407271527130832, iteration: 455853
loss: 1.0098507404327393,grad_norm: 0.8266215802757088, iteration: 455854
loss: 1.0127675533294678,grad_norm: 0.8603527238684089, iteration: 455855
loss: 0.9641092419624329,grad_norm: 0.7767989969544604, iteration: 455856
loss: 0.9975470304489136,grad_norm: 0.7896634365491858, iteration: 455857
loss: 0.9866510033607483,grad_norm: 0.7670708095982502, iteration: 455858
loss: 0.988159716129303,grad_norm: 0.7778653679661471, iteration: 455859
loss: 1.0235284566879272,grad_norm: 0.6924450271542718, iteration: 455860
loss: 0.9841240644454956,grad_norm: 0.8318315744589618, iteration: 455861
loss: 0.9765684604644775,grad_norm: 0.9266675369346719, iteration: 455862
loss: 0.9739686846733093,grad_norm: 0.8647737306856628, iteration: 455863
loss: 1.0336370468139648,grad_norm: 0.7972401191416628, iteration: 455864
loss: 0.9675108194351196,grad_norm: 0.8385791932316582, iteration: 455865
loss: 1.0036555528640747,grad_norm: 0.8070442437049834, iteration: 455866
loss: 1.0264949798583984,grad_norm: 0.8291600342236375, iteration: 455867
loss: 0.9788812398910522,grad_norm: 0.747168193170262, iteration: 455868
loss: 1.011106252670288,grad_norm: 0.9325691762882794, iteration: 455869
loss: 1.0175933837890625,grad_norm: 0.7967995710799604, iteration: 455870
loss: 1.034044861793518,grad_norm: 0.641513344294167, iteration: 455871
loss: 1.0059431791305542,grad_norm: 0.7062455862300223, iteration: 455872
loss: 0.9952597618103027,grad_norm: 0.6896653837446833, iteration: 455873
loss: 0.984330415725708,grad_norm: 0.7303799719126689, iteration: 455874
loss: 0.9795336723327637,grad_norm: 0.8013075987156925, iteration: 455875
loss: 0.9557153582572937,grad_norm: 0.9335155465923205, iteration: 455876
loss: 1.0413519144058228,grad_norm: 0.9999993008586405, iteration: 455877
loss: 0.9973099231719971,grad_norm: 0.9999991130565876, iteration: 455878
loss: 1.001734972000122,grad_norm: 0.7754927666521068, iteration: 455879
loss: 0.9771345853805542,grad_norm: 0.9871532511669057, iteration: 455880
loss: 0.974297285079956,grad_norm: 0.851743283483028, iteration: 455881
loss: 0.9752949476242065,grad_norm: 0.7255077882941833, iteration: 455882
loss: 1.0222234725952148,grad_norm: 0.7812505768710777, iteration: 455883
loss: 0.9871609807014465,grad_norm: 0.7531307565104319, iteration: 455884
loss: 1.079296588897705,grad_norm: 0.9999999706182022, iteration: 455885
loss: 1.0112431049346924,grad_norm: 0.718676943903846, iteration: 455886
loss: 0.9884999990463257,grad_norm: 0.6099223160212168, iteration: 455887
loss: 1.0033026933670044,grad_norm: 0.7858404875224019, iteration: 455888
loss: 0.996426522731781,grad_norm: 0.7919273804697233, iteration: 455889
loss: 0.985382080078125,grad_norm: 0.8928406271402041, iteration: 455890
loss: 0.9839447736740112,grad_norm: 0.89767540190973, iteration: 455891
loss: 1.0040278434753418,grad_norm: 0.9818738007892089, iteration: 455892
loss: 1.0116420984268188,grad_norm: 0.7173351292194737, iteration: 455893
loss: 1.0034918785095215,grad_norm: 0.7738415787379253, iteration: 455894
loss: 1.0158497095108032,grad_norm: 0.7323537600499856, iteration: 455895
loss: 0.9921939373016357,grad_norm: 0.7265319817691402, iteration: 455896
loss: 1.0283225774765015,grad_norm: 0.9999995272866844, iteration: 455897
loss: 1.0451241731643677,grad_norm: 0.7600766401010485, iteration: 455898
loss: 0.9481351971626282,grad_norm: 0.6724212604153927, iteration: 455899
loss: 1.0596325397491455,grad_norm: 0.8739630697679339, iteration: 455900
loss: 1.0052404403686523,grad_norm: 0.777836719713867, iteration: 455901
loss: 0.9909603595733643,grad_norm: 0.7328397673838489, iteration: 455902
loss: 0.9907276630401611,grad_norm: 0.6824807197066989, iteration: 455903
loss: 1.0059932470321655,grad_norm: 0.762789508263877, iteration: 455904
loss: 1.008612871170044,grad_norm: 0.7214727917787912, iteration: 455905
loss: 0.9723867774009705,grad_norm: 0.7924020648271278, iteration: 455906
loss: 1.0079947710037231,grad_norm: 0.7450495724014813, iteration: 455907
loss: 1.0012773275375366,grad_norm: 0.9149848331143328, iteration: 455908
loss: 0.9576392769813538,grad_norm: 0.9012708242229661, iteration: 455909
loss: 0.986813485622406,grad_norm: 0.8736142558378318, iteration: 455910
loss: 1.0014582872390747,grad_norm: 0.9048806139486312, iteration: 455911
loss: 0.9982978701591492,grad_norm: 0.6317453760636645, iteration: 455912
loss: 1.0644077062606812,grad_norm: 0.9454234331380084, iteration: 455913
loss: 1.0267740488052368,grad_norm: 0.7161224871973371, iteration: 455914
loss: 0.9834818243980408,grad_norm: 0.9999995274330725, iteration: 455915
loss: 0.9970289468765259,grad_norm: 0.8639648931054954, iteration: 455916
loss: 0.9919646382331848,grad_norm: 0.7490331896892564, iteration: 455917
loss: 0.9917612075805664,grad_norm: 0.740933345407037, iteration: 455918
loss: 1.0288480520248413,grad_norm: 0.6609161152793158, iteration: 455919
loss: 0.996002197265625,grad_norm: 0.7626255344032393, iteration: 455920
loss: 1.0034215450286865,grad_norm: 0.799876556212966, iteration: 455921
loss: 0.9700637459754944,grad_norm: 0.7954539285583394, iteration: 455922
loss: 0.9688277840614319,grad_norm: 0.7742953366668429, iteration: 455923
loss: 0.9687114953994751,grad_norm: 0.674630917746812, iteration: 455924
loss: 0.9857588410377502,grad_norm: 0.7510168028487545, iteration: 455925
loss: 0.9878135323524475,grad_norm: 0.8242185430405032, iteration: 455926
loss: 0.9653388261795044,grad_norm: 0.6979242544090892, iteration: 455927
loss: 0.9985643625259399,grad_norm: 0.7324572076677185, iteration: 455928
loss: 1.0851914882659912,grad_norm: 0.9573882132823075, iteration: 455929
loss: 1.014999270439148,grad_norm: 0.9999992693948074, iteration: 455930
loss: 0.9988883137702942,grad_norm: 0.9168643058249449, iteration: 455931
loss: 0.979262113571167,grad_norm: 0.6201172664521809, iteration: 455932
loss: 0.9862801432609558,grad_norm: 0.6814476727814648, iteration: 455933
loss: 0.9981979131698608,grad_norm: 0.7764192441334965, iteration: 455934
loss: 1.008059024810791,grad_norm: 0.6918344474745318, iteration: 455935
loss: 0.9892736673355103,grad_norm: 0.8130026184607692, iteration: 455936
loss: 1.003015160560608,grad_norm: 0.7721099521167747, iteration: 455937
loss: 0.9621041417121887,grad_norm: 0.7200775486233989, iteration: 455938
loss: 0.993930459022522,grad_norm: 0.7663974968682626, iteration: 455939
loss: 1.021189570426941,grad_norm: 0.6961441447801747, iteration: 455940
loss: 1.013847827911377,grad_norm: 0.6169278936477389, iteration: 455941
loss: 1.005204677581787,grad_norm: 0.865248591796967, iteration: 455942
loss: 0.9595704674720764,grad_norm: 0.731216221697193, iteration: 455943
loss: 1.0250937938690186,grad_norm: 0.7112105411590812, iteration: 455944
loss: 1.0071362257003784,grad_norm: 0.6597941112872088, iteration: 455945
loss: 1.0018584728240967,grad_norm: 0.8861278468546057, iteration: 455946
loss: 0.9821329712867737,grad_norm: 0.7762193740818122, iteration: 455947
loss: 0.9709919095039368,grad_norm: 0.7240202307603111, iteration: 455948
loss: 0.9856711030006409,grad_norm: 0.7110327846453526, iteration: 455949
loss: 1.0115472078323364,grad_norm: 0.7163799563833517, iteration: 455950
loss: 0.9985697269439697,grad_norm: 0.6613917582944837, iteration: 455951
loss: 0.9498110413551331,grad_norm: 0.7791350107943446, iteration: 455952
loss: 0.9818149209022522,grad_norm: 0.8015498790440859, iteration: 455953
loss: 0.9615819454193115,grad_norm: 0.7167351506589628, iteration: 455954
loss: 0.9963513612747192,grad_norm: 0.6444622754082315, iteration: 455955
loss: 0.9642772078514099,grad_norm: 0.7114156741940983, iteration: 455956
loss: 1.0130481719970703,grad_norm: 0.7897186785744927, iteration: 455957
loss: 0.9736043214797974,grad_norm: 0.7795965460813493, iteration: 455958
loss: 0.9817827939987183,grad_norm: 0.7069074907252961, iteration: 455959
loss: 0.9809686541557312,grad_norm: 0.8011807409007141, iteration: 455960
loss: 0.974172055721283,grad_norm: 0.7062922153186285, iteration: 455961
loss: 1.0273852348327637,grad_norm: 0.8128126352910298, iteration: 455962
loss: 1.0046863555908203,grad_norm: 0.6514833490938882, iteration: 455963
loss: 0.971525251865387,grad_norm: 0.6214546024965255, iteration: 455964
loss: 0.9653172492980957,grad_norm: 0.6315434336028465, iteration: 455965
loss: 0.9738857746124268,grad_norm: 0.7443341378602174, iteration: 455966
loss: 0.9828774333000183,grad_norm: 0.8170117188260301, iteration: 455967
loss: 1.0024585723876953,grad_norm: 0.6681774986829139, iteration: 455968
loss: 1.0156346559524536,grad_norm: 0.8162693706529105, iteration: 455969
loss: 1.03451406955719,grad_norm: 0.7866044264326448, iteration: 455970
loss: 1.0031706094741821,grad_norm: 0.7545603443769897, iteration: 455971
loss: 0.9727434515953064,grad_norm: 0.7469821682980307, iteration: 455972
loss: 0.9772035479545593,grad_norm: 0.7204900337123061, iteration: 455973
loss: 1.0189685821533203,grad_norm: 0.7455779990779158, iteration: 455974
loss: 1.0340255498886108,grad_norm: 0.999999429151012, iteration: 455975
loss: 0.9949526190757751,grad_norm: 0.6546711145965457, iteration: 455976
loss: 1.0160671472549438,grad_norm: 0.708143640242431, iteration: 455977
loss: 1.0832892656326294,grad_norm: 0.9227574719080107, iteration: 455978
loss: 0.9772205352783203,grad_norm: 0.7308825560234875, iteration: 455979
loss: 0.9923120141029358,grad_norm: 0.7324941264058366, iteration: 455980
loss: 0.9873778223991394,grad_norm: 0.68570164551254, iteration: 455981
loss: 1.057725191116333,grad_norm: 0.8649824605623425, iteration: 455982
loss: 0.9768440127372742,grad_norm: 0.964104753630775, iteration: 455983
loss: 1.0035309791564941,grad_norm: 0.7536342227974172, iteration: 455984
loss: 1.0018101930618286,grad_norm: 0.71819340045752, iteration: 455985
loss: 0.9677198529243469,grad_norm: 0.7867168475489985, iteration: 455986
loss: 0.9790388345718384,grad_norm: 0.7939380789837001, iteration: 455987
loss: 0.9891473650932312,grad_norm: 0.6591646809276062, iteration: 455988
loss: 0.9915612936019897,grad_norm: 0.6384251632567676, iteration: 455989
loss: 0.9964861869812012,grad_norm: 0.9405328098362151, iteration: 455990
loss: 0.9739965200424194,grad_norm: 0.6590790870402061, iteration: 455991
loss: 0.9726349115371704,grad_norm: 0.884004542452352, iteration: 455992
loss: 0.975083589553833,grad_norm: 0.7060574716364993, iteration: 455993
loss: 1.0104730129241943,grad_norm: 0.700825805423154, iteration: 455994
loss: 0.9556064605712891,grad_norm: 0.7737435489110114, iteration: 455995
loss: 0.9996671676635742,grad_norm: 0.8802686418767782, iteration: 455996
loss: 1.0055145025253296,grad_norm: 0.6679866305568612, iteration: 455997
loss: 0.9841514229774475,grad_norm: 0.6927640316804128, iteration: 455998
loss: 0.9731096029281616,grad_norm: 0.7674259680484132, iteration: 455999
loss: 0.9700056910514832,grad_norm: 0.7120504226880551, iteration: 456000
loss: 1.0406060218811035,grad_norm: 0.8849513622843872, iteration: 456001
loss: 0.9984877705574036,grad_norm: 0.9202642915740641, iteration: 456002
loss: 0.9767667651176453,grad_norm: 0.7423812937003705, iteration: 456003
loss: 1.0110012292861938,grad_norm: 0.849900253168673, iteration: 456004
loss: 1.0107091665267944,grad_norm: 0.9999995378850528, iteration: 456005
loss: 1.012332797050476,grad_norm: 0.801982277588368, iteration: 456006
loss: 0.9933875203132629,grad_norm: 0.7838855016895191, iteration: 456007
loss: 1.0123313665390015,grad_norm: 0.9999999806781407, iteration: 456008
loss: 0.9869493842124939,grad_norm: 0.798333560188858, iteration: 456009
loss: 0.9744338989257812,grad_norm: 0.765439607790128, iteration: 456010
loss: 0.9978731870651245,grad_norm: 0.8255856275784861, iteration: 456011
loss: 1.0216795206069946,grad_norm: 0.9999991566054448, iteration: 456012
loss: 0.9751278758049011,grad_norm: 0.6719761990621885, iteration: 456013
loss: 0.9922378063201904,grad_norm: 0.9325924810461635, iteration: 456014
loss: 1.0170894861221313,grad_norm: 0.791021546456614, iteration: 456015
loss: 0.9777562022209167,grad_norm: 0.8611650405589856, iteration: 456016
loss: 0.9853756427764893,grad_norm: 0.8625185185893641, iteration: 456017
loss: 0.9657270908355713,grad_norm: 0.7672043145385391, iteration: 456018
loss: 1.0093423128128052,grad_norm: 1.0000000943456315, iteration: 456019
loss: 1.065229058265686,grad_norm: 0.9999991842881545, iteration: 456020
loss: 0.9975670576095581,grad_norm: 0.8426712362681218, iteration: 456021
loss: 0.9874007701873779,grad_norm: 0.7443031042469692, iteration: 456022
loss: 0.9367582201957703,grad_norm: 0.7414195672203278, iteration: 456023
loss: 0.9992356300354004,grad_norm: 0.6633064113481115, iteration: 456024
loss: 0.9666515588760376,grad_norm: 0.8049633322045898, iteration: 456025
loss: 0.980933427810669,grad_norm: 0.8936667828559518, iteration: 456026
loss: 0.994223415851593,grad_norm: 0.7217511261764147, iteration: 456027
loss: 1.0010008811950684,grad_norm: 0.7687066834515922, iteration: 456028
loss: 1.014379620552063,grad_norm: 0.9999995873294739, iteration: 456029
loss: 0.9706968069076538,grad_norm: 0.8113330386491447, iteration: 456030
loss: 0.9976277947425842,grad_norm: 0.755486465431694, iteration: 456031
loss: 1.008323311805725,grad_norm: 0.8557755683333198, iteration: 456032
loss: 0.9755478501319885,grad_norm: 0.6608925732146392, iteration: 456033
loss: 1.0041199922561646,grad_norm: 0.7643238471671578, iteration: 456034
loss: 1.011364459991455,grad_norm: 0.9148655771659148, iteration: 456035
loss: 0.982757031917572,grad_norm: 0.8042267589697694, iteration: 456036
loss: 1.027080774307251,grad_norm: 0.8527582037244917, iteration: 456037
loss: 0.9878387451171875,grad_norm: 0.602828589079878, iteration: 456038
loss: 0.9881733655929565,grad_norm: 0.9999995934076367, iteration: 456039
loss: 1.0126289129257202,grad_norm: 0.684903173453262, iteration: 456040
loss: 1.0005143880844116,grad_norm: 0.8502174747021929, iteration: 456041
loss: 1.0126546621322632,grad_norm: 0.6804767822107032, iteration: 456042
loss: 1.031630039215088,grad_norm: 0.9096428227828154, iteration: 456043
loss: 0.9957119226455688,grad_norm: 0.7765629703008763, iteration: 456044
loss: 0.9533030986785889,grad_norm: 0.6956307402156726, iteration: 456045
loss: 0.9597562551498413,grad_norm: 0.6795446323157337, iteration: 456046
loss: 0.9679064750671387,grad_norm: 0.7608613129834013, iteration: 456047
loss: 0.9942259192466736,grad_norm: 0.7952354442193585, iteration: 456048
loss: 1.0013158321380615,grad_norm: 0.7656791614977301, iteration: 456049
loss: 1.0127737522125244,grad_norm: 0.6713635525692409, iteration: 456050
loss: 1.019142985343933,grad_norm: 0.6843508521414886, iteration: 456051
loss: 1.0084116458892822,grad_norm: 0.6331711449009627, iteration: 456052
loss: 1.00165593624115,grad_norm: 0.7876054494052093, iteration: 456053
loss: 0.9771901369094849,grad_norm: 0.6452699650604944, iteration: 456054
loss: 1.0068868398666382,grad_norm: 0.5919918813506115, iteration: 456055
loss: 1.0393213033676147,grad_norm: 0.7208144298484388, iteration: 456056
loss: 1.0123971700668335,grad_norm: 0.8650478209599244, iteration: 456057
loss: 0.9947925806045532,grad_norm: 0.7881828668811715, iteration: 456058
loss: 1.037308931350708,grad_norm: 0.8993321357477835, iteration: 456059
loss: 0.9873883128166199,grad_norm: 0.7691897163540033, iteration: 456060
loss: 1.1062581539154053,grad_norm: 0.9999996333491435, iteration: 456061
loss: 0.9917592406272888,grad_norm: 0.779793000186159, iteration: 456062
loss: 0.9597923159599304,grad_norm: 0.6807200701836341, iteration: 456063
loss: 0.9661933183670044,grad_norm: 0.8154052500900185, iteration: 456064
loss: 0.9915615916252136,grad_norm: 0.7728387939414801, iteration: 456065
loss: 1.0006781816482544,grad_norm: 0.6967710900825704, iteration: 456066
loss: 1.0166393518447876,grad_norm: 0.8545435641684214, iteration: 456067
loss: 1.0304527282714844,grad_norm: 0.9999991609033626, iteration: 456068
loss: 1.0149081945419312,grad_norm: 0.8087303257168812, iteration: 456069
loss: 0.9740260243415833,grad_norm: 0.8140394796966889, iteration: 456070
loss: 0.9827277660369873,grad_norm: 0.6831757617532541, iteration: 456071
loss: 0.9922275543212891,grad_norm: 0.7283701417170568, iteration: 456072
loss: 0.9746465682983398,grad_norm: 0.6241985439517843, iteration: 456073
loss: 1.010768175125122,grad_norm: 0.8859440344443442, iteration: 456074
loss: 1.0109671354293823,grad_norm: 0.778000884775595, iteration: 456075
loss: 0.9878999590873718,grad_norm: 0.7324831890324484, iteration: 456076
loss: 0.9383808374404907,grad_norm: 0.7659863824750787, iteration: 456077
loss: 0.9952194690704346,grad_norm: 0.814536655613043, iteration: 456078
loss: 0.9702271223068237,grad_norm: 0.7672954777363792, iteration: 456079
loss: 1.089666724205017,grad_norm: 0.9999991357708314, iteration: 456080
loss: 1.040016531944275,grad_norm: 0.961067122323861, iteration: 456081
loss: 0.9912289977073669,grad_norm: 0.7438119351827464, iteration: 456082
loss: 0.9899469017982483,grad_norm: 0.8285570121993404, iteration: 456083
loss: 0.9737117290496826,grad_norm: 0.8155744272944327, iteration: 456084
loss: 1.0083658695220947,grad_norm: 0.8121910193264898, iteration: 456085
loss: 0.994844913482666,grad_norm: 0.7368334133189264, iteration: 456086
loss: 1.0214905738830566,grad_norm: 0.7522472308071934, iteration: 456087
loss: 0.9921453595161438,grad_norm: 1.0000000515368697, iteration: 456088
loss: 1.0213027000427246,grad_norm: 0.7783594375914895, iteration: 456089
loss: 1.0106538534164429,grad_norm: 0.9854135447911273, iteration: 456090
loss: 1.011263132095337,grad_norm: 0.6022306496357352, iteration: 456091
loss: 1.1148983240127563,grad_norm: 0.8496663705018058, iteration: 456092
loss: 1.0155378580093384,grad_norm: 0.8134201873660313, iteration: 456093
loss: 1.0727341175079346,grad_norm: 0.9999995213086466, iteration: 456094
loss: 1.002897024154663,grad_norm: 0.8605981202376464, iteration: 456095
loss: 0.9935218095779419,grad_norm: 0.7486338917274167, iteration: 456096
loss: 1.0192893743515015,grad_norm: 0.9999992084775419, iteration: 456097
loss: 1.0190590620040894,grad_norm: 0.950178931724865, iteration: 456098
loss: 0.9640772938728333,grad_norm: 0.98159720593408, iteration: 456099
loss: 0.9850029945373535,grad_norm: 0.9376874573531843, iteration: 456100
loss: 0.9855700731277466,grad_norm: 0.7081981238678057, iteration: 456101
loss: 0.9521584510803223,grad_norm: 0.9228627375057435, iteration: 456102
loss: 0.9965652823448181,grad_norm: 0.7345167173693888, iteration: 456103
loss: 0.9750942587852478,grad_norm: 0.7611852012901078, iteration: 456104
loss: 1.0168182849884033,grad_norm: 0.8792999633360776, iteration: 456105
loss: 1.0128567218780518,grad_norm: 0.7934404014570636, iteration: 456106
loss: 0.9694114923477173,grad_norm: 0.9108598392790281, iteration: 456107
loss: 0.9807830452919006,grad_norm: 0.8131525699648803, iteration: 456108
loss: 0.955624520778656,grad_norm: 0.8128422001353415, iteration: 456109
loss: 1.0067566633224487,grad_norm: 0.8105101597539796, iteration: 456110
loss: 0.9877834320068359,grad_norm: 0.8028874819715335, iteration: 456111
loss: 1.0105022192001343,grad_norm: 0.7225781579629627, iteration: 456112
loss: 0.9868497848510742,grad_norm: 0.8144233271848299, iteration: 456113
loss: 0.9755680561065674,grad_norm: 0.7556417336837308, iteration: 456114
loss: 0.971638560295105,grad_norm: 0.8548671603900312, iteration: 456115
loss: 1.0393129587173462,grad_norm: 0.999999205818957, iteration: 456116
loss: 0.9769694805145264,grad_norm: 0.7383518485597313, iteration: 456117
loss: 0.9965470433235168,grad_norm: 0.6158092258998964, iteration: 456118
loss: 0.996680736541748,grad_norm: 0.7436682296569272, iteration: 456119
loss: 0.9990906119346619,grad_norm: 0.6953320682545322, iteration: 456120
loss: 1.0190068483352661,grad_norm: 0.863076558626685, iteration: 456121
loss: 0.9897150993347168,grad_norm: 0.7799773311883744, iteration: 456122
loss: 0.9746710062026978,grad_norm: 0.6627071812232886, iteration: 456123
loss: 1.0101051330566406,grad_norm: 0.6663069667656645, iteration: 456124
loss: 0.9885590672492981,grad_norm: 0.6793702591138326, iteration: 456125
loss: 1.0101362466812134,grad_norm: 0.7118143266443476, iteration: 456126
loss: 1.0202199220657349,grad_norm: 0.8483135504799022, iteration: 456127
loss: 1.0032345056533813,grad_norm: 0.9514967041832453, iteration: 456128
loss: 1.0026944875717163,grad_norm: 0.9999990257337141, iteration: 456129
loss: 0.9899594783782959,grad_norm: 0.6250563381021992, iteration: 456130
loss: 0.9825505018234253,grad_norm: 0.6735755471963827, iteration: 456131
loss: 0.9658530354499817,grad_norm: 0.7068802117365026, iteration: 456132
loss: 0.9896634817123413,grad_norm: 0.7660675849338966, iteration: 456133
loss: 0.9779994487762451,grad_norm: 0.6993559185526896, iteration: 456134
loss: 1.019877552986145,grad_norm: 0.7428517571130141, iteration: 456135
loss: 0.9984304904937744,grad_norm: 0.5938308953089535, iteration: 456136
loss: 0.9755473136901855,grad_norm: 0.9999990374655292, iteration: 456137
loss: 1.0286964178085327,grad_norm: 0.7520188281658169, iteration: 456138
loss: 0.9889076352119446,grad_norm: 0.7509425050638152, iteration: 456139
loss: 1.0322074890136719,grad_norm: 0.69595862118112, iteration: 456140
loss: 0.9863821268081665,grad_norm: 0.8918341632239425, iteration: 456141
loss: 1.0340372323989868,grad_norm: 0.9431769452051045, iteration: 456142
loss: 0.9795925617218018,grad_norm: 0.9129598413700966, iteration: 456143
loss: 0.9529144763946533,grad_norm: 0.7654035520296767, iteration: 456144
loss: 1.0192453861236572,grad_norm: 0.9240412143571944, iteration: 456145
loss: 1.0515167713165283,grad_norm: 0.8004432251926206, iteration: 456146
loss: 1.026240587234497,grad_norm: 0.674239912200149, iteration: 456147
loss: 0.9078390598297119,grad_norm: 0.7271609208748163, iteration: 456148
loss: 0.9941821098327637,grad_norm: 0.9999991331285714, iteration: 456149
loss: 0.9914977550506592,grad_norm: 0.744388909726328, iteration: 456150
loss: 0.9967745542526245,grad_norm: 0.8150255877160624, iteration: 456151
loss: 1.0280767679214478,grad_norm: 0.8462312279107298, iteration: 456152
loss: 0.9785565733909607,grad_norm: 0.999998976017312, iteration: 456153
loss: 1.0021110773086548,grad_norm: 0.8016893095178853, iteration: 456154
loss: 1.02335524559021,grad_norm: 0.7556747084331537, iteration: 456155
loss: 0.9989007711410522,grad_norm: 0.888698944177715, iteration: 456156
loss: 1.04265296459198,grad_norm: 0.9999997675762593, iteration: 456157
loss: 0.9992818832397461,grad_norm: 0.9458358913747819, iteration: 456158
loss: 1.0397028923034668,grad_norm: 0.7636468473945491, iteration: 456159
loss: 0.9833311438560486,grad_norm: 0.9999990361562442, iteration: 456160
loss: 1.0143721103668213,grad_norm: 0.8230230406983448, iteration: 456161
loss: 1.0579160451889038,grad_norm: 0.9999999088638793, iteration: 456162
loss: 1.0301265716552734,grad_norm: 0.7187275098704095, iteration: 456163
loss: 0.9901310205459595,grad_norm: 0.8990779907870801, iteration: 456164
loss: 1.0192337036132812,grad_norm: 0.7500672021718597, iteration: 456165
loss: 0.996878445148468,grad_norm: 0.74769422303215, iteration: 456166
loss: 1.0064549446105957,grad_norm: 0.7618578517692687, iteration: 456167
loss: 0.997948944568634,grad_norm: 0.804376184939911, iteration: 456168
loss: 0.9263305068016052,grad_norm: 0.8719025994870878, iteration: 456169
loss: 0.9788588285446167,grad_norm: 0.8038589677043662, iteration: 456170
loss: 0.9482687711715698,grad_norm: 0.7394160317335029, iteration: 456171
loss: 0.9895904660224915,grad_norm: 0.7173561794159121, iteration: 456172
loss: 0.9881462454795837,grad_norm: 0.7613304904990241, iteration: 456173
loss: 1.0082378387451172,grad_norm: 0.8157466795734094, iteration: 456174
loss: 1.03066086769104,grad_norm: 0.999999073007758, iteration: 456175
loss: 0.990422785282135,grad_norm: 0.7390443267938449, iteration: 456176
loss: 0.9709616303443909,grad_norm: 0.7584485472727, iteration: 456177
loss: 0.9904710054397583,grad_norm: 0.8578663521685208, iteration: 456178
loss: 1.0106821060180664,grad_norm: 0.8823988842621501, iteration: 456179
loss: 0.9859052300453186,grad_norm: 0.8954381080704902, iteration: 456180
loss: 1.0242996215820312,grad_norm: 0.87203084078665, iteration: 456181
loss: 0.9865409135818481,grad_norm: 0.7300770812763785, iteration: 456182
loss: 1.0190463066101074,grad_norm: 0.9502544011610403, iteration: 456183
loss: 0.9873782992362976,grad_norm: 0.7548401352286278, iteration: 456184
loss: 1.001265525817871,grad_norm: 0.674232353754115, iteration: 456185
loss: 0.997037947177887,grad_norm: 0.804788687212606, iteration: 456186
loss: 0.9946588277816772,grad_norm: 0.679861186488675, iteration: 456187
loss: 0.9595245122909546,grad_norm: 0.7023118784947494, iteration: 456188
loss: 1.0197619199752808,grad_norm: 0.9178603866447475, iteration: 456189
loss: 1.0424681901931763,grad_norm: 0.7796039319093057, iteration: 456190
loss: 1.0113447904586792,grad_norm: 0.7677657982159022, iteration: 456191
loss: 1.0058919191360474,grad_norm: 0.9180484174913062, iteration: 456192
loss: 1.0126229524612427,grad_norm: 0.835810069336275, iteration: 456193
loss: 0.99477618932724,grad_norm: 0.8264375406189408, iteration: 456194
loss: 0.9949897527694702,grad_norm: 0.8328333635775105, iteration: 456195
loss: 1.0003623962402344,grad_norm: 0.8263244594319049, iteration: 456196
loss: 1.0048834085464478,grad_norm: 0.8386337290483843, iteration: 456197
loss: 1.0112004280090332,grad_norm: 0.8226651195951094, iteration: 456198
loss: 0.9708841443061829,grad_norm: 0.7992541093798061, iteration: 456199
loss: 0.9908079504966736,grad_norm: 0.8607970737491134, iteration: 456200
loss: 0.9884510040283203,grad_norm: 0.8516507446181844, iteration: 456201
loss: 1.010096549987793,grad_norm: 0.8214219635488728, iteration: 456202
loss: 1.0146021842956543,grad_norm: 0.9158507357549418, iteration: 456203
loss: 0.9710687398910522,grad_norm: 0.7875113092530072, iteration: 456204
loss: 1.0269616842269897,grad_norm: 0.9999991892837946, iteration: 456205
loss: 1.0059798955917358,grad_norm: 0.695973453154868, iteration: 456206
loss: 0.9980328679084778,grad_norm: 0.7679337131169652, iteration: 456207
loss: 0.9776864051818848,grad_norm: 0.7723141585642082, iteration: 456208
loss: 0.9843915104866028,grad_norm: 0.7564497127712012, iteration: 456209
loss: 0.9528160691261292,grad_norm: 0.7667180934553137, iteration: 456210
loss: 1.000855565071106,grad_norm: 0.6868163447230969, iteration: 456211
loss: 0.9900995492935181,grad_norm: 0.8051009710820028, iteration: 456212
loss: 1.0171074867248535,grad_norm: 0.9090176088083635, iteration: 456213
loss: 1.010429859161377,grad_norm: 0.9999998745817141, iteration: 456214
loss: 0.9795430898666382,grad_norm: 0.8030985025269542, iteration: 456215
loss: 1.027533769607544,grad_norm: 0.7135194368471717, iteration: 456216
loss: 1.0219786167144775,grad_norm: 0.728921077653887, iteration: 456217
loss: 1.0881764888763428,grad_norm: 0.9999990840088229, iteration: 456218
loss: 1.0384689569473267,grad_norm: 0.7558863662737222, iteration: 456219
loss: 0.9875448942184448,grad_norm: 0.8650308139587476, iteration: 456220
loss: 1.0261791944503784,grad_norm: 0.7539798216544424, iteration: 456221
loss: 0.9996559619903564,grad_norm: 0.7082056690685196, iteration: 456222
loss: 0.9845932722091675,grad_norm: 0.9217839319277897, iteration: 456223
loss: 1.0038269758224487,grad_norm: 0.8544835960360078, iteration: 456224
loss: 1.0117120742797852,grad_norm: 0.7686652485577049, iteration: 456225
loss: 1.028835415840149,grad_norm: 0.9999991192157507, iteration: 456226
loss: 1.0526567697525024,grad_norm: 0.637561524719051, iteration: 456227
loss: 0.9575148820877075,grad_norm: 0.6384483465442966, iteration: 456228
loss: 1.0060386657714844,grad_norm: 0.9999998401115731, iteration: 456229
loss: 1.054549217224121,grad_norm: 0.9382444961294972, iteration: 456230
loss: 1.012070655822754,grad_norm: 0.7501479184144786, iteration: 456231
loss: 1.0033646821975708,grad_norm: 0.8437582698267949, iteration: 456232
loss: 0.9796595573425293,grad_norm: 0.7430006934968738, iteration: 456233
loss: 1.0247198343276978,grad_norm: 0.7216616388839066, iteration: 456234
loss: 1.0252984762191772,grad_norm: 0.8289574136044894, iteration: 456235
loss: 0.9803687930107117,grad_norm: 0.7825557569008359, iteration: 456236
loss: 1.0333421230316162,grad_norm: 0.8449436467135764, iteration: 456237
loss: 0.9965333342552185,grad_norm: 0.6917113289148347, iteration: 456238
loss: 0.9575129151344299,grad_norm: 0.775406472259318, iteration: 456239
loss: 0.969240665435791,grad_norm: 0.839909468899128, iteration: 456240
loss: 1.0028784275054932,grad_norm: 0.7702393221816254, iteration: 456241
loss: 0.989498496055603,grad_norm: 0.8642536619375155, iteration: 456242
loss: 1.0160294771194458,grad_norm: 0.7269024182941789, iteration: 456243
loss: 1.0266183614730835,grad_norm: 0.9999993818485481, iteration: 456244
loss: 0.9750813841819763,grad_norm: 0.7850201071358883, iteration: 456245
loss: 1.0295287370681763,grad_norm: 0.8476365704422532, iteration: 456246
loss: 1.0066736936569214,grad_norm: 0.7482090822334075, iteration: 456247
loss: 0.9988208413124084,grad_norm: 0.7072868461306598, iteration: 456248
loss: 0.9930163025856018,grad_norm: 0.7503962707108149, iteration: 456249
loss: 1.0101996660232544,grad_norm: 0.8650861390988472, iteration: 456250
loss: 0.9772485494613647,grad_norm: 0.6447970465270303, iteration: 456251
loss: 0.957072913646698,grad_norm: 0.8747652890357197, iteration: 456252
loss: 1.177583932876587,grad_norm: 0.9999995266524797, iteration: 456253
loss: 0.9571384191513062,grad_norm: 0.7254801198213148, iteration: 456254
loss: 0.9885490536689758,grad_norm: 0.8134519908095289, iteration: 456255
loss: 1.0216810703277588,grad_norm: 0.999999077732426, iteration: 456256
loss: 0.9811745882034302,grad_norm: 0.642748384530085, iteration: 456257
loss: 0.96481853723526,grad_norm: 0.7187907962436398, iteration: 456258
loss: 0.9246863126754761,grad_norm: 0.8175592074036567, iteration: 456259
loss: 0.9984554052352905,grad_norm: 0.7802031931336164, iteration: 456260
loss: 1.0330891609191895,grad_norm: 0.8406712099396907, iteration: 456261
loss: 0.9665843844413757,grad_norm: 0.6646927522641766, iteration: 456262
loss: 0.9726994633674622,grad_norm: 0.7414801824900843, iteration: 456263
loss: 0.9800027012825012,grad_norm: 0.6636529127366921, iteration: 456264
loss: 0.9914988279342651,grad_norm: 0.8895598800584514, iteration: 456265
loss: 0.9774379730224609,grad_norm: 0.8656800505190008, iteration: 456266
loss: 1.0161287784576416,grad_norm: 0.8791290414556145, iteration: 456267
loss: 0.9853114485740662,grad_norm: 0.7210326474808493, iteration: 456268
loss: 0.9978227019309998,grad_norm: 0.6510807289000586, iteration: 456269
loss: 1.010588526725769,grad_norm: 0.8379108171078321, iteration: 456270
loss: 0.9958515167236328,grad_norm: 0.8666553844341865, iteration: 456271
loss: 0.9907993078231812,grad_norm: 0.9999991737096844, iteration: 456272
loss: 0.9926915168762207,grad_norm: 0.7827511052931144, iteration: 456273
loss: 1.0102176666259766,grad_norm: 0.8252322987304765, iteration: 456274
loss: 0.9830884337425232,grad_norm: 0.6749269625067922, iteration: 456275
loss: 0.9913289546966553,grad_norm: 0.7363687436906334, iteration: 456276
loss: 1.0363783836364746,grad_norm: 0.6748782892222719, iteration: 456277
loss: 0.9948328137397766,grad_norm: 0.9999989985668978, iteration: 456278
loss: 1.04084312915802,grad_norm: 0.9999995885628116, iteration: 456279
loss: 0.9991066455841064,grad_norm: 0.736434951485725, iteration: 456280
loss: 1.0021578073501587,grad_norm: 0.9531746098060176, iteration: 456281
loss: 0.9332781434059143,grad_norm: 0.8088777311511363, iteration: 456282
loss: 1.0587149858474731,grad_norm: 0.9999991762292832, iteration: 456283
loss: 1.0020389556884766,grad_norm: 0.6980456108758794, iteration: 456284
loss: 1.0380545854568481,grad_norm: 0.8381680445357913, iteration: 456285
loss: 1.0247000455856323,grad_norm: 0.9999994288059936, iteration: 456286
loss: 0.9804455637931824,grad_norm: 0.7557960851721273, iteration: 456287
loss: 1.0573219060897827,grad_norm: 0.9999990777466692, iteration: 456288
loss: 1.0374767780303955,grad_norm: 0.9057291601761636, iteration: 456289
loss: 1.1655664443969727,grad_norm: 0.9999998124481049, iteration: 456290
loss: 0.968130886554718,grad_norm: 0.5745851440882802, iteration: 456291
loss: 1.0074982643127441,grad_norm: 0.8165075623935226, iteration: 456292
loss: 1.0328034162521362,grad_norm: 0.9999989946248903, iteration: 456293
loss: 1.0176039934158325,grad_norm: 0.9187988944702136, iteration: 456294
loss: 1.0238211154937744,grad_norm: 0.7326913187867229, iteration: 456295
loss: 0.9979358315467834,grad_norm: 0.7123827170111469, iteration: 456296
loss: 0.9813124537467957,grad_norm: 0.703145025807149, iteration: 456297
loss: 1.0048348903656006,grad_norm: 0.8758857775726803, iteration: 456298
loss: 1.0065125226974487,grad_norm: 0.7287050407023002, iteration: 456299
loss: 1.002090573310852,grad_norm: 0.9999993763307194, iteration: 456300
loss: 1.0115277767181396,grad_norm: 0.7889652239810109, iteration: 456301
loss: 1.0211013555526733,grad_norm: 0.8585494520292312, iteration: 456302
loss: 1.0111656188964844,grad_norm: 0.6613404187478699, iteration: 456303
loss: 1.0158439874649048,grad_norm: 0.9523895626441077, iteration: 456304
loss: 1.0026086568832397,grad_norm: 0.8017123021854397, iteration: 456305
loss: 1.0171823501586914,grad_norm: 0.9575626914478463, iteration: 456306
loss: 0.9770526885986328,grad_norm: 0.7655666479827654, iteration: 456307
loss: 1.0049593448638916,grad_norm: 0.7723549560064785, iteration: 456308
loss: 0.9862908720970154,grad_norm: 0.685960660943037, iteration: 456309
loss: 1.0192320346832275,grad_norm: 0.999999305139462, iteration: 456310
loss: 1.019604206085205,grad_norm: 0.8276578138700347, iteration: 456311
loss: 0.9890046119689941,grad_norm: 0.6852868933825343, iteration: 456312
loss: 1.0289087295532227,grad_norm: 0.7582821755867838, iteration: 456313
loss: 0.9616422057151794,grad_norm: 0.7554269707090988, iteration: 456314
loss: 0.9836832284927368,grad_norm: 0.8899741241594746, iteration: 456315
loss: 1.036984920501709,grad_norm: 0.7188940813823634, iteration: 456316
loss: 0.9902219176292419,grad_norm: 0.7351948144305471, iteration: 456317
loss: 0.9800217151641846,grad_norm: 0.6331930655728459, iteration: 456318
loss: 0.9775984883308411,grad_norm: 0.8580062702669901, iteration: 456319
loss: 1.011471152305603,grad_norm: 0.7576273953186058, iteration: 456320
loss: 1.0134137868881226,grad_norm: 0.9999992554835768, iteration: 456321
loss: 0.9824342131614685,grad_norm: 0.7199196294600129, iteration: 456322
loss: 0.9860159754753113,grad_norm: 0.9301937911795894, iteration: 456323
loss: 1.0212066173553467,grad_norm: 0.5916203537539575, iteration: 456324
loss: 0.9813293814659119,grad_norm: 0.7268319007318335, iteration: 456325
loss: 0.9909341335296631,grad_norm: 0.8893824211560079, iteration: 456326
loss: 1.0265799760818481,grad_norm: 0.732001107062947, iteration: 456327
loss: 1.024627447128296,grad_norm: 0.8397385291960014, iteration: 456328
loss: 1.0531573295593262,grad_norm: 0.8764053541098176, iteration: 456329
loss: 1.0017902851104736,grad_norm: 0.7814243545607886, iteration: 456330
loss: 0.9946567416191101,grad_norm: 0.7188858814162993, iteration: 456331
loss: 0.9834757447242737,grad_norm: 0.8139528039108412, iteration: 456332
loss: 1.0318145751953125,grad_norm: 0.7211407657600465, iteration: 456333
loss: 1.093688726425171,grad_norm: 0.7416601140415507, iteration: 456334
loss: 1.0343507528305054,grad_norm: 0.7038608177545449, iteration: 456335
loss: 0.9872891902923584,grad_norm: 0.8248865125824504, iteration: 456336
loss: 1.0413657426834106,grad_norm: 0.7887401940935521, iteration: 456337
loss: 1.0280081033706665,grad_norm: 0.9999993154855495, iteration: 456338
loss: 1.002573013305664,grad_norm: 0.900501655563528, iteration: 456339
loss: 1.0046303272247314,grad_norm: 0.8844699424408213, iteration: 456340
loss: 0.9844162464141846,grad_norm: 0.7201841947592373, iteration: 456341
loss: 0.9916995167732239,grad_norm: 0.8592611824033877, iteration: 456342
loss: 1.0384770631790161,grad_norm: 0.755947411013245, iteration: 456343
loss: 0.9756230711936951,grad_norm: 0.7602321236740963, iteration: 456344
loss: 1.0091509819030762,grad_norm: 0.7922039031530711, iteration: 456345
loss: 1.0053616762161255,grad_norm: 0.7194432484118791, iteration: 456346
loss: 1.0179160833358765,grad_norm: 0.8115735968307716, iteration: 456347
loss: 0.9880463480949402,grad_norm: 0.8886785420552716, iteration: 456348
loss: 0.9660587906837463,grad_norm: 0.6822463841394231, iteration: 456349
loss: 1.0147781372070312,grad_norm: 0.7517840311682135, iteration: 456350
loss: 1.0357922315597534,grad_norm: 0.9999996720391273, iteration: 456351
loss: 1.0052963495254517,grad_norm: 0.8233582536871515, iteration: 456352
loss: 1.0233513116836548,grad_norm: 0.6486177001685872, iteration: 456353
loss: 1.0071239471435547,grad_norm: 0.8548124769071702, iteration: 456354
loss: 0.968231737613678,grad_norm: 0.7775076414472012, iteration: 456355
loss: 0.9910394549369812,grad_norm: 0.9999992826961461, iteration: 456356
loss: 1.0226795673370361,grad_norm: 0.8442112382471503, iteration: 456357
loss: 1.010790467262268,grad_norm: 0.7408209713033214, iteration: 456358
loss: 1.0743058919906616,grad_norm: 0.9999993713421106, iteration: 456359
loss: 1.0264232158660889,grad_norm: 0.7474657239344876, iteration: 456360
loss: 1.0923434495925903,grad_norm: 0.99999981353711, iteration: 456361
loss: 0.9864488840103149,grad_norm: 0.616307624402221, iteration: 456362
loss: 1.0090636014938354,grad_norm: 0.7509397227129249, iteration: 456363
loss: 1.0088506937026978,grad_norm: 0.6493833927305576, iteration: 456364
loss: 1.014854907989502,grad_norm: 0.7339435871940312, iteration: 456365
loss: 0.9816446900367737,grad_norm: 0.7461833975161659, iteration: 456366
loss: 1.0537760257720947,grad_norm: 0.9999998626463734, iteration: 456367
loss: 1.0251771211624146,grad_norm: 0.9999991094438822, iteration: 456368
loss: 1.0501264333724976,grad_norm: 0.7010266677916671, iteration: 456369
loss: 1.0565639734268188,grad_norm: 0.9282077311465465, iteration: 456370
loss: 0.9608862400054932,grad_norm: 0.712820867634006, iteration: 456371
loss: 1.0581214427947998,grad_norm: 0.9999993290543606, iteration: 456372
loss: 1.0160640478134155,grad_norm: 0.8737841968340825, iteration: 456373
loss: 1.022735595703125,grad_norm: 0.6485448453936912, iteration: 456374
loss: 0.9809027314186096,grad_norm: 0.682746403560126, iteration: 456375
loss: 1.027746319770813,grad_norm: 0.9999997257510427, iteration: 456376
loss: 1.0165765285491943,grad_norm: 0.8391026575127658, iteration: 456377
loss: 0.9972641468048096,grad_norm: 0.7630551735029741, iteration: 456378
loss: 1.0127129554748535,grad_norm: 0.8053386433578467, iteration: 456379
loss: 1.004327654838562,grad_norm: 0.6124303216043412, iteration: 456380
loss: 1.0087970495224,grad_norm: 0.724910972568518, iteration: 456381
loss: 1.004520297050476,grad_norm: 0.907575457637897, iteration: 456382
loss: 1.0158932209014893,grad_norm: 0.8745659107478976, iteration: 456383
loss: 0.9818786382675171,grad_norm: 0.7828327620671385, iteration: 456384
loss: 1.0286186933517456,grad_norm: 0.9999991106479823, iteration: 456385
loss: 0.9623101949691772,grad_norm: 0.6239516758796192, iteration: 456386
loss: 0.9968669414520264,grad_norm: 0.764434765050299, iteration: 456387
loss: 0.9768451452255249,grad_norm: 0.6936110985232403, iteration: 456388
loss: 1.005370855331421,grad_norm: 0.8050002845016424, iteration: 456389
loss: 1.009534478187561,grad_norm: 0.7601929622103909, iteration: 456390
loss: 1.0567485094070435,grad_norm: 0.9999992626939609, iteration: 456391
loss: 0.9944323897361755,grad_norm: 0.7341206244987726, iteration: 456392
loss: 0.9998315572738647,grad_norm: 0.7465946357918406, iteration: 456393
loss: 1.0036468505859375,grad_norm: 0.6786266517563738, iteration: 456394
loss: 1.0317511558532715,grad_norm: 0.7166782667909161, iteration: 456395
loss: 1.0157445669174194,grad_norm: 0.9999993408382549, iteration: 456396
loss: 1.0686029195785522,grad_norm: 0.9059920755645259, iteration: 456397
loss: 1.0386625528335571,grad_norm: 0.7194676265817863, iteration: 456398
loss: 1.0045331716537476,grad_norm: 0.6663133174212706, iteration: 456399
loss: 1.0040124654769897,grad_norm: 0.8447918162044691, iteration: 456400
loss: 1.0399374961853027,grad_norm: 0.9999991120972, iteration: 456401
loss: 0.970510721206665,grad_norm: 0.7915043461765642, iteration: 456402
loss: 1.0089561939239502,grad_norm: 0.9781875297479461, iteration: 456403
loss: 0.9838158488273621,grad_norm: 0.8283917905820494, iteration: 456404
loss: 0.9795635342597961,grad_norm: 0.8009706997222134, iteration: 456405
loss: 1.0033323764801025,grad_norm: 0.7347046264617425, iteration: 456406
loss: 1.015344262123108,grad_norm: 0.75139899895151, iteration: 456407
loss: 1.0207725763320923,grad_norm: 0.6626143926844856, iteration: 456408
loss: 0.9887593388557434,grad_norm: 0.880801281222853, iteration: 456409
loss: 1.0359116792678833,grad_norm: 0.672037703263172, iteration: 456410
loss: 1.0254688262939453,grad_norm: 0.8105140664883812, iteration: 456411
loss: 0.9989499449729919,grad_norm: 0.6472048423831409, iteration: 456412
loss: 0.9921088814735413,grad_norm: 0.711958410868433, iteration: 456413
loss: 1.0077303647994995,grad_norm: 0.6699398724650176, iteration: 456414
loss: 1.0396119356155396,grad_norm: 0.9999991517921126, iteration: 456415
loss: 1.0187172889709473,grad_norm: 0.8927665105361281, iteration: 456416
loss: 1.0010027885437012,grad_norm: 0.722960344378222, iteration: 456417
loss: 1.0014814138412476,grad_norm: 0.9401015184822933, iteration: 456418
loss: 1.011220097541809,grad_norm: 0.7350761573736905, iteration: 456419
loss: 0.988592267036438,grad_norm: 0.8458084024070257, iteration: 456420
loss: 0.9944236874580383,grad_norm: 0.9201749947409322, iteration: 456421
loss: 1.0331783294677734,grad_norm: 0.7627333403434605, iteration: 456422
loss: 1.028266429901123,grad_norm: 0.7915763083643038, iteration: 456423
loss: 1.0434014797210693,grad_norm: 0.7816083606580249, iteration: 456424
loss: 0.9612196683883667,grad_norm: 0.7821001943984204, iteration: 456425
loss: 0.9780911207199097,grad_norm: 0.8735396171751192, iteration: 456426
loss: 0.9789162278175354,grad_norm: 0.7149505192260934, iteration: 456427
loss: 1.0941237211227417,grad_norm: 0.9999990628037501, iteration: 456428
loss: 0.9963542819023132,grad_norm: 0.8163722671217009, iteration: 456429
loss: 1.0100841522216797,grad_norm: 0.8927035552109956, iteration: 456430
loss: 1.0241585969924927,grad_norm: 0.9999993327081335, iteration: 456431
loss: 0.9977210164070129,grad_norm: 0.9468775863373255, iteration: 456432
loss: 0.9973748922348022,grad_norm: 0.7824799278506807, iteration: 456433
loss: 1.0324435234069824,grad_norm: 0.9439155914082771, iteration: 456434
loss: 0.9945034384727478,grad_norm: 0.9000648918929862, iteration: 456435
loss: 0.9846643209457397,grad_norm: 0.8504289039724544, iteration: 456436
loss: 0.9978935718536377,grad_norm: 0.6989089540158938, iteration: 456437
loss: 1.041085124015808,grad_norm: 0.9425185744414425, iteration: 456438
loss: 1.012771725654602,grad_norm: 0.5966921129485507, iteration: 456439
loss: 0.997987687587738,grad_norm: 0.6369722909629225, iteration: 456440
loss: 0.9808842539787292,grad_norm: 0.7811605580296774, iteration: 456441
loss: 1.0085784196853638,grad_norm: 0.6974690232670283, iteration: 456442
loss: 1.0339255332946777,grad_norm: 0.7721267818424951, iteration: 456443
loss: 1.0067286491394043,grad_norm: 0.8763980170800229, iteration: 456444
loss: 1.0163065195083618,grad_norm: 0.9999991759197153, iteration: 456445
loss: 0.9710357785224915,grad_norm: 0.8105876571930903, iteration: 456446
loss: 1.044905185699463,grad_norm: 0.7750754964794212, iteration: 456447
loss: 0.98488450050354,grad_norm: 0.65513104231994, iteration: 456448
loss: 1.0508434772491455,grad_norm: 0.7427821473006411, iteration: 456449
loss: 0.9993525147438049,grad_norm: 0.8512006185186454, iteration: 456450
loss: 0.9562076926231384,grad_norm: 0.693796784708398, iteration: 456451
loss: 0.9561397433280945,grad_norm: 0.8412868835891077, iteration: 456452
loss: 1.0354735851287842,grad_norm: 0.6971865601212021, iteration: 456453
loss: 0.9975512027740479,grad_norm: 0.6299638065394673, iteration: 456454
loss: 1.0231536626815796,grad_norm: 0.7528808320614148, iteration: 456455
loss: 0.9957263469696045,grad_norm: 0.8190619859273345, iteration: 456456
loss: 1.086303472518921,grad_norm: 0.8973757950893926, iteration: 456457
loss: 0.9959782958030701,grad_norm: 0.9999992936037387, iteration: 456458
loss: 0.9542685151100159,grad_norm: 0.8714640634421825, iteration: 456459
loss: 0.975642204284668,grad_norm: 0.7081930579623699, iteration: 456460
loss: 1.064124584197998,grad_norm: 1.0000000002337845, iteration: 456461
loss: 1.0157197713851929,grad_norm: 0.7566687565382705, iteration: 456462
loss: 1.011336088180542,grad_norm: 0.6757043402157845, iteration: 456463
loss: 1.017706036567688,grad_norm: 0.7283934894432273, iteration: 456464
loss: 1.037745714187622,grad_norm: 0.9999998854196213, iteration: 456465
loss: 0.9836388826370239,grad_norm: 0.999999297671591, iteration: 456466
loss: 1.0959827899932861,grad_norm: 0.9999991311488389, iteration: 456467
loss: 1.0370687246322632,grad_norm: 0.999999496322439, iteration: 456468
loss: 1.021147608757019,grad_norm: 0.8242994672678153, iteration: 456469
loss: 1.0143629312515259,grad_norm: 0.7849471579821945, iteration: 456470
loss: 0.9782937169075012,grad_norm: 0.7400274704328659, iteration: 456471
loss: 1.1142338514328003,grad_norm: 0.9999989785180409, iteration: 456472
loss: 1.054795265197754,grad_norm: 0.994620613614432, iteration: 456473
loss: 1.056409239768982,grad_norm: 0.8917380657264359, iteration: 456474
loss: 0.9997496604919434,grad_norm: 0.874207001595117, iteration: 456475
loss: 0.9785141944885254,grad_norm: 0.7096450432106794, iteration: 456476
loss: 0.9898401498794556,grad_norm: 0.826360731731241, iteration: 456477
loss: 1.0041139125823975,grad_norm: 0.999999481861118, iteration: 456478
loss: 0.9989174008369446,grad_norm: 0.9999990576647038, iteration: 456479
loss: 1.036388635635376,grad_norm: 0.8073939569517412, iteration: 456480
loss: 1.180006504058838,grad_norm: 0.9999992421142554, iteration: 456481
loss: 1.0004216432571411,grad_norm: 0.7345775493739019, iteration: 456482
loss: 1.189587950706482,grad_norm: 0.8178471879490268, iteration: 456483
loss: 1.1401180028915405,grad_norm: 0.8648525302662586, iteration: 456484
loss: 1.037805199623108,grad_norm: 0.8904178328879808, iteration: 456485
loss: 1.0181416273117065,grad_norm: 0.7010095504739834, iteration: 456486
loss: 0.9594869017601013,grad_norm: 0.9999991525499843, iteration: 456487
loss: 0.9716764688491821,grad_norm: 0.9646793518452428, iteration: 456488
loss: 1.0259675979614258,grad_norm: 0.8497446056167043, iteration: 456489
loss: 0.9911887645721436,grad_norm: 0.6801915433960747, iteration: 456490
loss: 0.9803949594497681,grad_norm: 0.8506684321426364, iteration: 456491
loss: 0.983116626739502,grad_norm: 0.719792365734491, iteration: 456492
loss: 1.0466495752334595,grad_norm: 0.7529372725695793, iteration: 456493
loss: 1.0052614212036133,grad_norm: 0.8782030700836025, iteration: 456494
loss: 1.027915596961975,grad_norm: 0.9006023280424347, iteration: 456495
loss: 0.9810132384300232,grad_norm: 0.6583425649581623, iteration: 456496
loss: 1.000712275505066,grad_norm: 0.9780690304652581, iteration: 456497
loss: 1.099867343902588,grad_norm: 0.9999992077197866, iteration: 456498
loss: 1.011968970298767,grad_norm: 0.94389080384003, iteration: 456499
loss: 0.9716524481773376,grad_norm: 0.7558873642497327, iteration: 456500
loss: 0.9776615500450134,grad_norm: 0.671631924513231, iteration: 456501
loss: 1.0540053844451904,grad_norm: 0.680973775534581, iteration: 456502
loss: 1.0271968841552734,grad_norm: 0.6922841360546577, iteration: 456503
loss: 1.0073844194412231,grad_norm: 0.9999997904514042, iteration: 456504
loss: 0.9898010492324829,grad_norm: 0.8499290994327849, iteration: 456505
loss: 0.9993710517883301,grad_norm: 0.9323473677222726, iteration: 456506
loss: 1.013975739479065,grad_norm: 0.7245731991058121, iteration: 456507
loss: 1.0174366235733032,grad_norm: 0.6727955728231421, iteration: 456508
loss: 0.9902198910713196,grad_norm: 0.7201724626810859, iteration: 456509
loss: 1.0390801429748535,grad_norm: 0.9999991568398898, iteration: 456510
loss: 1.0212187767028809,grad_norm: 0.8095225599010268, iteration: 456511
loss: 0.9933027625083923,grad_norm: 0.7902885340655296, iteration: 456512
loss: 1.0173860788345337,grad_norm: 0.6539815245620609, iteration: 456513
loss: 1.0151023864746094,grad_norm: 0.6990293163003045, iteration: 456514
loss: 1.0099568367004395,grad_norm: 0.8020077211123747, iteration: 456515
loss: 0.9973058700561523,grad_norm: 0.6615394955009413, iteration: 456516
loss: 0.9951626658439636,grad_norm: 0.7662046026713398, iteration: 456517
loss: 1.0031883716583252,grad_norm: 0.715643203269339, iteration: 456518
loss: 1.057647705078125,grad_norm: 0.6803435737453198, iteration: 456519
loss: 1.037940263748169,grad_norm: 0.7413534626876916, iteration: 456520
loss: 0.99363112449646,grad_norm: 0.6859331313645343, iteration: 456521
loss: 1.009548306465149,grad_norm: 0.9101492232666812, iteration: 456522
loss: 0.9877059459686279,grad_norm: 0.8292954089601791, iteration: 456523
loss: 0.9796516299247742,grad_norm: 0.8545551529481086, iteration: 456524
loss: 0.948816180229187,grad_norm: 0.681438612608001, iteration: 456525
loss: 1.0886831283569336,grad_norm: 0.9999992914762831, iteration: 456526
loss: 0.9930106997489929,grad_norm: 0.7244743923271323, iteration: 456527
loss: 1.0499247312545776,grad_norm: 0.9999992349311447, iteration: 456528
loss: 1.0153155326843262,grad_norm: 0.7889307640824439, iteration: 456529
loss: 1.0020880699157715,grad_norm: 0.771013790495389, iteration: 456530
loss: 0.9804974794387817,grad_norm: 0.8669549383549234, iteration: 456531
loss: 0.9917003512382507,grad_norm: 0.7315588337899408, iteration: 456532
loss: 1.0468220710754395,grad_norm: 0.9284708900718462, iteration: 456533
loss: 1.0132834911346436,grad_norm: 0.6586887253089944, iteration: 456534
loss: 1.0086196660995483,grad_norm: 0.7812228792219064, iteration: 456535
loss: 0.967767059803009,grad_norm: 0.9999990964799459, iteration: 456536
loss: 0.9869835376739502,grad_norm: 0.7670557973326614, iteration: 456537
loss: 1.028221607208252,grad_norm: 0.9999997824158586, iteration: 456538
loss: 0.9982671737670898,grad_norm: 0.6476038684436575, iteration: 456539
loss: 1.0031752586364746,grad_norm: 0.8067645419554061, iteration: 456540
loss: 1.0205769538879395,grad_norm: 0.75584557133178, iteration: 456541
loss: 1.0447858572006226,grad_norm: 0.9999990285078004, iteration: 456542
loss: 1.0159449577331543,grad_norm: 0.7994764825440589, iteration: 456543
loss: 1.0537309646606445,grad_norm: 0.6965008997011982, iteration: 456544
loss: 0.9971141815185547,grad_norm: 0.7881491925692762, iteration: 456545
loss: 1.0133140087127686,grad_norm: 0.7658906002084067, iteration: 456546
loss: 0.983159065246582,grad_norm: 0.9999990726893392, iteration: 456547
loss: 1.0047156810760498,grad_norm: 0.8158778568881203, iteration: 456548
loss: 1.0028198957443237,grad_norm: 0.7678136892956962, iteration: 456549
loss: 1.0163861513137817,grad_norm: 0.8950764495446112, iteration: 456550
loss: 0.9835278391838074,grad_norm: 0.9999998448148295, iteration: 456551
loss: 1.0102795362472534,grad_norm: 0.8912599162045279, iteration: 456552
loss: 0.9923692941665649,grad_norm: 0.7576468566515616, iteration: 456553
loss: 1.0202608108520508,grad_norm: 0.941004364962382, iteration: 456554
loss: 0.9787530303001404,grad_norm: 0.884020627561009, iteration: 456555
loss: 1.0975819826126099,grad_norm: 0.8254846693619833, iteration: 456556
loss: 1.019684910774231,grad_norm: 0.9999994429874353, iteration: 456557
loss: 1.0952311754226685,grad_norm: 0.9999997452125257, iteration: 456558
loss: 1.0057867765426636,grad_norm: 0.8584864947740558, iteration: 456559
loss: 1.0965774059295654,grad_norm: 0.8147278392424339, iteration: 456560
loss: 1.0080866813659668,grad_norm: 0.7730210905321222, iteration: 456561
loss: 0.991130530834198,grad_norm: 0.6888178174000431, iteration: 456562
loss: 0.9950301051139832,grad_norm: 0.8743352966725577, iteration: 456563
loss: 1.144124150276184,grad_norm: 0.9999994877750569, iteration: 456564
loss: 1.1725190877914429,grad_norm: 0.9999999875200873, iteration: 456565
loss: 1.0170862674713135,grad_norm: 0.8265974786166209, iteration: 456566
loss: 1.0837841033935547,grad_norm: 0.9999995204245548, iteration: 456567
loss: 1.0003211498260498,grad_norm: 0.7022576349773519, iteration: 456568
loss: 1.035129189491272,grad_norm: 0.9724665770940708, iteration: 456569
loss: 1.084732174873352,grad_norm: 0.9587689870437319, iteration: 456570
loss: 1.0216107368469238,grad_norm: 0.6988524729341347, iteration: 456571
loss: 1.0780636072158813,grad_norm: 0.999999314759399, iteration: 456572
loss: 0.9822397828102112,grad_norm: 0.9999991873936852, iteration: 456573
loss: 1.0071561336517334,grad_norm: 0.7360037120156029, iteration: 456574
loss: 0.9949178099632263,grad_norm: 0.7073153862772839, iteration: 456575
loss: 1.0060019493103027,grad_norm: 0.5660071626775575, iteration: 456576
loss: 0.992583692073822,grad_norm: 0.8110805619513778, iteration: 456577
loss: 1.0153167247772217,grad_norm: 0.6820457521190246, iteration: 456578
loss: 1.0007715225219727,grad_norm: 0.7279270093257378, iteration: 456579
loss: 1.0157159566879272,grad_norm: 0.8474661980789289, iteration: 456580
loss: 0.9886572360992432,grad_norm: 0.8579673422792651, iteration: 456581
loss: 1.024399995803833,grad_norm: 0.8050782101842858, iteration: 456582
loss: 1.0625718832015991,grad_norm: 0.9999998326426254, iteration: 456583
loss: 1.0156866312026978,grad_norm: 0.8201691343459906, iteration: 456584
loss: 1.0535385608673096,grad_norm: 0.7218748341632374, iteration: 456585
loss: 1.025095820426941,grad_norm: 0.6698069157687885, iteration: 456586
loss: 1.0956456661224365,grad_norm: 0.8120776242898682, iteration: 456587
loss: 0.976677417755127,grad_norm: 0.7701735581474304, iteration: 456588
loss: 0.995525598526001,grad_norm: 0.7168862524081361, iteration: 456589
loss: 0.998953640460968,grad_norm: 0.8843996041918522, iteration: 456590
loss: 1.0069997310638428,grad_norm: 0.7317409131151651, iteration: 456591
loss: 0.9634401202201843,grad_norm: 0.8762686788876388, iteration: 456592
loss: 1.0177453756332397,grad_norm: 0.8185635896867931, iteration: 456593
loss: 0.9754307270050049,grad_norm: 0.8931263263982631, iteration: 456594
loss: 0.9860758781433105,grad_norm: 0.7437892165309822, iteration: 456595
loss: 1.0263159275054932,grad_norm: 0.8411874915895157, iteration: 456596
loss: 1.0544357299804688,grad_norm: 0.7932879464533422, iteration: 456597
loss: 1.011661171913147,grad_norm: 0.8073856454745252, iteration: 456598
loss: 1.0417296886444092,grad_norm: 0.8989087814561382, iteration: 456599
loss: 1.000712513923645,grad_norm: 0.6834635252617541, iteration: 456600
loss: 1.0158088207244873,grad_norm: 0.999999890089505, iteration: 456601
loss: 1.023755431175232,grad_norm: 0.8234349285965394, iteration: 456602
loss: 0.986604630947113,grad_norm: 0.7545252656372824, iteration: 456603
loss: 0.9977057576179504,grad_norm: 0.8219387554495208, iteration: 456604
loss: 0.9906194806098938,grad_norm: 0.9977616911887581, iteration: 456605
loss: 1.014483094215393,grad_norm: 0.8437799701184879, iteration: 456606
loss: 0.9960174560546875,grad_norm: 0.8917020017391046, iteration: 456607
loss: 0.9948155283927917,grad_norm: 0.7146902994091903, iteration: 456608
loss: 0.9539580345153809,grad_norm: 0.8667189372438161, iteration: 456609
loss: 1.0434651374816895,grad_norm: 0.8268025147854895, iteration: 456610
loss: 0.9762383699417114,grad_norm: 0.7232439885196691, iteration: 456611
loss: 1.0006625652313232,grad_norm: 0.8263557726684961, iteration: 456612
loss: 1.0995182991027832,grad_norm: 0.999999012450206, iteration: 456613
loss: 1.082302212715149,grad_norm: 0.9999996644122725, iteration: 456614
loss: 1.0083872079849243,grad_norm: 0.7863567141360762, iteration: 456615
loss: 1.0052814483642578,grad_norm: 0.9711047608118333, iteration: 456616
loss: 0.989281177520752,grad_norm: 0.7594404055245186, iteration: 456617
loss: 0.9751504063606262,grad_norm: 0.7654935164022926, iteration: 456618
loss: 1.1421504020690918,grad_norm: 0.9999998829774234, iteration: 456619
loss: 1.0830541849136353,grad_norm: 0.999999279166671, iteration: 456620
loss: 0.9833192825317383,grad_norm: 0.676730971294001, iteration: 456621
loss: 1.0142619609832764,grad_norm: 0.7985258537913817, iteration: 456622
loss: 1.0287902355194092,grad_norm: 0.8551065930097447, iteration: 456623
loss: 0.9359430074691772,grad_norm: 0.8658979518596789, iteration: 456624
loss: 0.9907681345939636,grad_norm: 0.8647835245210456, iteration: 456625
loss: 1.0045617818832397,grad_norm: 0.7221947017004016, iteration: 456626
loss: 1.017858862876892,grad_norm: 0.7354658665579846, iteration: 456627
loss: 1.096846103668213,grad_norm: 0.7992447070933998, iteration: 456628
loss: 1.0083212852478027,grad_norm: 0.8215714598291649, iteration: 456629
loss: 0.9961822628974915,grad_norm: 0.7626279318655951, iteration: 456630
loss: 0.9847580194473267,grad_norm: 0.772187322277015, iteration: 456631
loss: 1.0201045274734497,grad_norm: 0.9244958812165369, iteration: 456632
loss: 1.0413585901260376,grad_norm: 0.9908190352519044, iteration: 456633
loss: 1.0395489931106567,grad_norm: 0.6463717157278527, iteration: 456634
loss: 1.1237902641296387,grad_norm: 0.9999992332373514, iteration: 456635
loss: 0.9630394577980042,grad_norm: 0.8157972236326196, iteration: 456636
loss: 0.9898579120635986,grad_norm: 0.7939125810825467, iteration: 456637
loss: 1.0180168151855469,grad_norm: 0.8129566608074756, iteration: 456638
loss: 0.9901651740074158,grad_norm: 0.61331635689459, iteration: 456639
loss: 0.9978888630867004,grad_norm: 0.7933221647107214, iteration: 456640
loss: 1.0055526494979858,grad_norm: 0.8631653986803784, iteration: 456641
loss: 0.9865941405296326,grad_norm: 0.9999994000438035, iteration: 456642
loss: 0.9876810312271118,grad_norm: 0.6174622214928905, iteration: 456643
loss: 0.9963737726211548,grad_norm: 0.6931489304106115, iteration: 456644
loss: 1.028417944908142,grad_norm: 0.8041494740326238, iteration: 456645
loss: 0.9821226596832275,grad_norm: 0.7978554765237467, iteration: 456646
loss: 1.0233893394470215,grad_norm: 0.8054242426273432, iteration: 456647
loss: 1.0037946701049805,grad_norm: 0.6815347833047295, iteration: 456648
loss: 1.025436282157898,grad_norm: 0.8039348967215286, iteration: 456649
loss: 1.0090453624725342,grad_norm: 0.7682684948152513, iteration: 456650
loss: 1.0378143787384033,grad_norm: 0.7751265907031328, iteration: 456651
loss: 0.988544762134552,grad_norm: 0.7988789791696395, iteration: 456652
loss: 0.9990981221199036,grad_norm: 0.9481295596577177, iteration: 456653
loss: 1.0002214908599854,grad_norm: 0.9993866782216693, iteration: 456654
loss: 1.017844796180725,grad_norm: 0.8383045324397556, iteration: 456655
loss: 0.9759858250617981,grad_norm: 0.7074248827710381, iteration: 456656
loss: 0.9918437004089355,grad_norm: 0.6847322049740431, iteration: 456657
loss: 0.95896315574646,grad_norm: 0.7494984241559546, iteration: 456658
loss: 1.0035349130630493,grad_norm: 0.7891427575707378, iteration: 456659
loss: 0.986518383026123,grad_norm: 0.7211676075125847, iteration: 456660
loss: 0.9864214062690735,grad_norm: 0.7208593613199272, iteration: 456661
loss: 0.9899013042449951,grad_norm: 0.9999998815698923, iteration: 456662
loss: 1.1091642379760742,grad_norm: 0.7768552693260241, iteration: 456663
loss: 1.0208187103271484,grad_norm: 0.7037508568482223, iteration: 456664
loss: 0.9875336289405823,grad_norm: 0.9268706467583706, iteration: 456665
loss: 1.0527740716934204,grad_norm: 0.7526865401174198, iteration: 456666
loss: 0.9939866065979004,grad_norm: 0.9999999368216199, iteration: 456667
loss: 0.9653354287147522,grad_norm: 0.8091896638774454, iteration: 456668
loss: 0.9853622317314148,grad_norm: 0.7991145062488887, iteration: 456669
loss: 1.0438112020492554,grad_norm: 0.9999997760503405, iteration: 456670
loss: 1.0431311130523682,grad_norm: 0.7718468663511343, iteration: 456671
loss: 0.9738211631774902,grad_norm: 0.8718424238525959, iteration: 456672
loss: 0.9469719529151917,grad_norm: 0.8756414632377924, iteration: 456673
loss: 0.9991192817687988,grad_norm: 0.7186643376622572, iteration: 456674
loss: 1.0813438892364502,grad_norm: 0.7962168359442814, iteration: 456675
loss: 0.9630418419837952,grad_norm: 0.7352791658399258, iteration: 456676
loss: 0.953925371170044,grad_norm: 0.7173234406297561, iteration: 456677
loss: 1.0528188943862915,grad_norm: 0.9999996916150952, iteration: 456678
loss: 0.9669066667556763,grad_norm: 0.676732867576908, iteration: 456679
loss: 0.9754904508590698,grad_norm: 0.8267052513586356, iteration: 456680
loss: 1.0335131883621216,grad_norm: 0.8553639072893491, iteration: 456681
loss: 0.9757741689682007,grad_norm: 0.7451523759328764, iteration: 456682
loss: 1.0045816898345947,grad_norm: 0.679473305363141, iteration: 456683
loss: 1.0054388046264648,grad_norm: 0.8314241957623928, iteration: 456684
loss: 0.9934799671173096,grad_norm: 0.6834974015806833, iteration: 456685
loss: 1.0283442735671997,grad_norm: 0.9896848677311431, iteration: 456686
loss: 1.0034116506576538,grad_norm: 0.8247930145766501, iteration: 456687
loss: 1.000707745552063,grad_norm: 0.9023632864112286, iteration: 456688
loss: 1.0889215469360352,grad_norm: 0.933141849819532, iteration: 456689
loss: 1.081121802330017,grad_norm: 0.9999993482713847, iteration: 456690
loss: 1.0146050453186035,grad_norm: 0.9486145758331336, iteration: 456691
loss: 1.015797734260559,grad_norm: 0.728693626174138, iteration: 456692
loss: 0.9806929230690002,grad_norm: 0.9999989903465824, iteration: 456693
loss: 1.0001693964004517,grad_norm: 0.99105645824448, iteration: 456694
loss: 1.0111383199691772,grad_norm: 0.6115360695049182, iteration: 456695
loss: 0.9969400763511658,grad_norm: 0.7396902967755938, iteration: 456696
loss: 0.9830793738365173,grad_norm: 0.849399353328387, iteration: 456697
loss: 1.019120693206787,grad_norm: 0.7579671793144145, iteration: 456698
loss: 1.0178123712539673,grad_norm: 0.8441106866442154, iteration: 456699
loss: 0.9882908463478088,grad_norm: 0.6595925329328421, iteration: 456700
loss: 1.0168054103851318,grad_norm: 0.9782627669336385, iteration: 456701
loss: 1.0071544647216797,grad_norm: 0.8212972820098754, iteration: 456702
loss: 1.0024700164794922,grad_norm: 0.6899291440093357, iteration: 456703
loss: 0.983720064163208,grad_norm: 0.7758092912271458, iteration: 456704
loss: 1.0025382041931152,grad_norm: 0.6319043835056789, iteration: 456705
loss: 0.9551495909690857,grad_norm: 0.8625516359623374, iteration: 456706
loss: 0.9669724106788635,grad_norm: 0.7879032155961748, iteration: 456707
loss: 0.9844520092010498,grad_norm: 0.828742459583144, iteration: 456708
loss: 1.0068858861923218,grad_norm: 0.6682184422955371, iteration: 456709
loss: 1.0801308155059814,grad_norm: 0.7962395439300893, iteration: 456710
loss: 1.0096482038497925,grad_norm: 0.6929796305871204, iteration: 456711
loss: 1.0121546983718872,grad_norm: 0.8007972856057349, iteration: 456712
loss: 1.0052381753921509,grad_norm: 0.9240025920104232, iteration: 456713
loss: 0.9892274141311646,grad_norm: 0.9902762108685006, iteration: 456714
loss: 0.9988141655921936,grad_norm: 0.835432156922369, iteration: 456715
loss: 0.9995445609092712,grad_norm: 0.8486487999205349, iteration: 456716
loss: 0.983119010925293,grad_norm: 0.7364662296944233, iteration: 456717
loss: 0.9972023367881775,grad_norm: 0.9365689341317178, iteration: 456718
loss: 0.9454922080039978,grad_norm: 0.8882480690996339, iteration: 456719
loss: 1.0366705656051636,grad_norm: 0.7485531611106314, iteration: 456720
loss: 0.9904589653015137,grad_norm: 0.8358344088523335, iteration: 456721
loss: 0.9941953420639038,grad_norm: 0.682291815005893, iteration: 456722
loss: 0.9727554321289062,grad_norm: 0.7421048990310465, iteration: 456723
loss: 0.9847814440727234,grad_norm: 0.9999996216408783, iteration: 456724
loss: 0.9931776523590088,grad_norm: 0.7931486258050239, iteration: 456725
loss: 1.0308585166931152,grad_norm: 0.9999992877697682, iteration: 456726
loss: 0.9998186826705933,grad_norm: 0.6778600219993673, iteration: 456727
loss: 0.9722464680671692,grad_norm: 0.7977571680085004, iteration: 456728
loss: 0.9903225898742676,grad_norm: 0.9999990826286523, iteration: 456729
loss: 0.9937695264816284,grad_norm: 0.7598187025429458, iteration: 456730
loss: 1.0279043912887573,grad_norm: 0.9999994138569691, iteration: 456731
loss: 1.0080350637435913,grad_norm: 0.8164037605675987, iteration: 456732
loss: 1.0157854557037354,grad_norm: 0.81113273454765, iteration: 456733
loss: 1.0217424631118774,grad_norm: 0.8171701811195631, iteration: 456734
loss: 1.1165317296981812,grad_norm: 0.7563860052638953, iteration: 456735
loss: 1.0104871988296509,grad_norm: 0.7632183274439116, iteration: 456736
loss: 1.0089300870895386,grad_norm: 0.7807618507841072, iteration: 456737
loss: 1.0105690956115723,grad_norm: 0.8925925634834008, iteration: 456738
loss: 1.003914475440979,grad_norm: 0.7577836585379745, iteration: 456739
loss: 0.9976104497909546,grad_norm: 0.6806644706472539, iteration: 456740
loss: 0.9753766655921936,grad_norm: 0.78318382952014, iteration: 456741
loss: 1.019178867340088,grad_norm: 0.9999995159343422, iteration: 456742
loss: 0.9917477369308472,grad_norm: 0.8371061024118588, iteration: 456743
loss: 1.0015599727630615,grad_norm: 0.782891265473998, iteration: 456744
loss: 0.9928648471832275,grad_norm: 0.6720139496569372, iteration: 456745
loss: 1.0101432800292969,grad_norm: 0.8368213118818767, iteration: 456746
loss: 1.0270103216171265,grad_norm: 0.6855326521076681, iteration: 456747
loss: 0.9891387820243835,grad_norm: 0.8116905496035958, iteration: 456748
loss: 1.0227930545806885,grad_norm: 0.7258707343171057, iteration: 456749
loss: 1.0038175582885742,grad_norm: 0.6773264105835588, iteration: 456750
loss: 1.113461971282959,grad_norm: 0.8568983502421794, iteration: 456751
loss: 1.011812448501587,grad_norm: 0.9999992499109691, iteration: 456752
loss: 0.9959429502487183,grad_norm: 0.8961746671299662, iteration: 456753
loss: 0.9573186635971069,grad_norm: 0.8474125917653937, iteration: 456754
loss: 0.9942334890365601,grad_norm: 0.7723768374815261, iteration: 456755
loss: 0.9836287498474121,grad_norm: 0.9999990399726154, iteration: 456756
loss: 0.9830729365348816,grad_norm: 0.762427366808139, iteration: 456757
loss: 1.0346447229385376,grad_norm: 0.8616405131676219, iteration: 456758
loss: 0.95762038230896,grad_norm: 0.6367189857789849, iteration: 456759
loss: 1.0363876819610596,grad_norm: 0.720870620226958, iteration: 456760
loss: 0.9505763649940491,grad_norm: 0.7480894870243661, iteration: 456761
loss: 1.0086252689361572,grad_norm: 0.7364805929308048, iteration: 456762
loss: 0.9916701912879944,grad_norm: 0.7366587546832939, iteration: 456763
loss: 1.0253419876098633,grad_norm: 0.7470982634692336, iteration: 456764
loss: 1.0114467144012451,grad_norm: 0.8275480273207783, iteration: 456765
loss: 1.0010509490966797,grad_norm: 0.8705092649770557, iteration: 456766
loss: 0.9808375835418701,grad_norm: 0.717676696495171, iteration: 456767
loss: 1.0203914642333984,grad_norm: 0.6877712404305779, iteration: 456768
loss: 0.9810042977333069,grad_norm: 0.7485245899556551, iteration: 456769
loss: 0.9929125905036926,grad_norm: 0.811996037597952, iteration: 456770
loss: 0.981762707233429,grad_norm: 0.8494962585830695, iteration: 456771
loss: 1.0149418115615845,grad_norm: 0.7797292102477125, iteration: 456772
loss: 0.9678703546524048,grad_norm: 0.6852972176760808, iteration: 456773
loss: 1.0013691186904907,grad_norm: 0.7449297882773424, iteration: 456774
loss: 1.0122172832489014,grad_norm: 0.8693459519322554, iteration: 456775
loss: 1.0194156169891357,grad_norm: 0.8409801544184792, iteration: 456776
loss: 1.0395535230636597,grad_norm: 0.7558282857850079, iteration: 456777
loss: 0.9715301394462585,grad_norm: 0.7235398311759259, iteration: 456778
loss: 1.0176728963851929,grad_norm: 0.7757126953787393, iteration: 456779
loss: 0.9645304679870605,grad_norm: 0.7779069900388887, iteration: 456780
loss: 0.9803102612495422,grad_norm: 0.7365679168266364, iteration: 456781
loss: 1.0014889240264893,grad_norm: 0.7582798150128472, iteration: 456782
loss: 1.0196282863616943,grad_norm: 0.9272757849547415, iteration: 456783
loss: 1.000931739807129,grad_norm: 0.8557201004355766, iteration: 456784
loss: 1.0367125272750854,grad_norm: 0.9999996018838644, iteration: 456785
loss: 1.0231212377548218,grad_norm: 0.8792146922524698, iteration: 456786
loss: 0.9838798642158508,grad_norm: 0.7406201812859418, iteration: 456787
loss: 0.980816662311554,grad_norm: 0.7946880661049641, iteration: 456788
loss: 1.0183771848678589,grad_norm: 0.795686446638222, iteration: 456789
loss: 0.9978488683700562,grad_norm: 0.7915380405131649, iteration: 456790
loss: 1.0126621723175049,grad_norm: 0.8547399764009306, iteration: 456791
loss: 1.0253636837005615,grad_norm: 0.9999999685227303, iteration: 456792
loss: 1.0045088529586792,grad_norm: 0.8537095501894897, iteration: 456793
loss: 1.0192619562149048,grad_norm: 0.7397540270582927, iteration: 456794
loss: 0.9392786026000977,grad_norm: 0.8301693472480444, iteration: 456795
loss: 0.9865916967391968,grad_norm: 0.7174502247847038, iteration: 456796
loss: 1.0177268981933594,grad_norm: 0.824061833355398, iteration: 456797
loss: 0.9650953412055969,grad_norm: 0.7902043377353309, iteration: 456798
loss: 0.9938011765480042,grad_norm: 0.7838592185093624, iteration: 456799
loss: 1.000322699546814,grad_norm: 0.618848608208617, iteration: 456800
loss: 0.997722864151001,grad_norm: 0.7908824864550272, iteration: 456801
loss: 0.9869546294212341,grad_norm: 0.7295600652516147, iteration: 456802
loss: 0.9926722645759583,grad_norm: 0.7407182676943099, iteration: 456803
loss: 1.0426825284957886,grad_norm: 0.8776787042036285, iteration: 456804
loss: 0.986032247543335,grad_norm: 0.999998992366772, iteration: 456805
loss: 0.9954971671104431,grad_norm: 0.69175200710878, iteration: 456806
loss: 0.9865815043449402,grad_norm: 0.6456959222343605, iteration: 456807
loss: 1.0263099670410156,grad_norm: 0.7114809908594013, iteration: 456808
loss: 1.0764302015304565,grad_norm: 0.7422911435495743, iteration: 456809
loss: 0.9954051375389099,grad_norm: 0.6745606524262007, iteration: 456810
loss: 1.0375093221664429,grad_norm: 0.796837391021179, iteration: 456811
loss: 1.0138932466506958,grad_norm: 0.6204884725314178, iteration: 456812
loss: 1.0916860103607178,grad_norm: 0.7198174026202938, iteration: 456813
loss: 1.0179678201675415,grad_norm: 0.8950635892110947, iteration: 456814
loss: 0.9732913970947266,grad_norm: 0.7510476211692451, iteration: 456815
loss: 1.0123332738876343,grad_norm: 0.6337033829380376, iteration: 456816
loss: 0.9980894327163696,grad_norm: 0.8918342878336568, iteration: 456817
loss: 0.9861487746238708,grad_norm: 0.7596024420223123, iteration: 456818
loss: 0.9903745651245117,grad_norm: 0.81630454784801, iteration: 456819
loss: 1.0408916473388672,grad_norm: 0.9999996027266248, iteration: 456820
loss: 1.0127074718475342,grad_norm: 0.9999998700446495, iteration: 456821
loss: 0.9811224341392517,grad_norm: 0.7145456764475168, iteration: 456822
loss: 1.0204038619995117,grad_norm: 0.7565008284975795, iteration: 456823
loss: 0.9939873814582825,grad_norm: 0.8135612033975549, iteration: 456824
loss: 1.00229811668396,grad_norm: 0.7656160413144424, iteration: 456825
loss: 1.0136818885803223,grad_norm: 0.6910509823442988, iteration: 456826
loss: 1.0321769714355469,grad_norm: 0.8056944594889514, iteration: 456827
loss: 1.0155664682388306,grad_norm: 0.8584648680634153, iteration: 456828
loss: 0.980053186416626,grad_norm: 0.6838481775337681, iteration: 456829
loss: 0.9983799457550049,grad_norm: 0.7370315704712413, iteration: 456830
loss: 0.9692565202713013,grad_norm: 0.9776604009899884, iteration: 456831
loss: 0.9923345446586609,grad_norm: 0.9999999336325105, iteration: 456832
loss: 1.0067180395126343,grad_norm: 0.6817075476755692, iteration: 456833
loss: 0.9952461123466492,grad_norm: 0.7609353555954148, iteration: 456834
loss: 1.0145829916000366,grad_norm: 0.936594537401923, iteration: 456835
loss: 0.9672035574913025,grad_norm: 0.7535744977706077, iteration: 456836
loss: 1.0247740745544434,grad_norm: 0.7268936292406671, iteration: 456837
loss: 0.9893288016319275,grad_norm: 0.7851267185022357, iteration: 456838
loss: 0.9888165593147278,grad_norm: 0.7850428396316337, iteration: 456839
loss: 0.9962774515151978,grad_norm: 0.7802236453149402, iteration: 456840
loss: 0.999798059463501,grad_norm: 0.6813085663379244, iteration: 456841
loss: 0.9812173843383789,grad_norm: 0.8360956878054132, iteration: 456842
loss: 0.9855785369873047,grad_norm: 0.6478148938529976, iteration: 456843
loss: 0.9492197036743164,grad_norm: 0.7504097031913598, iteration: 456844
loss: 0.9976736903190613,grad_norm: 0.8493714312897821, iteration: 456845
loss: 1.0235532522201538,grad_norm: 0.9707640844516223, iteration: 456846
loss: 0.9796119928359985,grad_norm: 0.8105803248813652, iteration: 456847
loss: 1.018620491027832,grad_norm: 0.7805557879917173, iteration: 456848
loss: 0.9526167511940002,grad_norm: 0.8489608084474491, iteration: 456849
loss: 0.9806032776832581,grad_norm: 0.7983956572335837, iteration: 456850
loss: 0.997799277305603,grad_norm: 0.8767201215246009, iteration: 456851
loss: 0.9865257143974304,grad_norm: 0.923623445091252, iteration: 456852
loss: 0.9758537411689758,grad_norm: 0.6602307927206572, iteration: 456853
loss: 1.0049420595169067,grad_norm: 0.713503156216387, iteration: 456854
loss: 0.9607949256896973,grad_norm: 0.8626319324683552, iteration: 456855
loss: 0.9625931978225708,grad_norm: 0.6473505673980441, iteration: 456856
loss: 1.0122748613357544,grad_norm: 0.8727721230882967, iteration: 456857
loss: 0.9962625503540039,grad_norm: 0.6078231670555665, iteration: 456858
loss: 0.9599238038063049,grad_norm: 0.7729629971977202, iteration: 456859
loss: 1.021992802619934,grad_norm: 0.7153647516421613, iteration: 456860
loss: 1.024175763130188,grad_norm: 0.7406688027052407, iteration: 456861
loss: 1.0095912218093872,grad_norm: 0.8145063221904877, iteration: 456862
loss: 1.0370805263519287,grad_norm: 0.6763985934254283, iteration: 456863
loss: 0.9760745167732239,grad_norm: 0.9149718576822625, iteration: 456864
loss: 1.0131001472473145,grad_norm: 0.6586967830244576, iteration: 456865
loss: 0.979985773563385,grad_norm: 0.7659661430978877, iteration: 456866
loss: 1.0177022218704224,grad_norm: 0.6837105194493281, iteration: 456867
loss: 0.9990912079811096,grad_norm: 0.6429065410181971, iteration: 456868
loss: 1.0012457370758057,grad_norm: 0.651274878741852, iteration: 456869
loss: 0.9886641502380371,grad_norm: 0.8323251014361213, iteration: 456870
loss: 1.0280166864395142,grad_norm: 0.6839226231855141, iteration: 456871
loss: 0.9573349356651306,grad_norm: 0.7305083751201498, iteration: 456872
loss: 0.997680127620697,grad_norm: 0.802386137656585, iteration: 456873
loss: 1.001962423324585,grad_norm: 0.8013392712362127, iteration: 456874
loss: 1.0173557996749878,grad_norm: 0.7757193610432872, iteration: 456875
loss: 1.0408188104629517,grad_norm: 0.898211267486097, iteration: 456876
loss: 1.0227112770080566,grad_norm: 0.8681272969422337, iteration: 456877
loss: 1.0349185466766357,grad_norm: 0.7465873444010326, iteration: 456878
loss: 0.9802840948104858,grad_norm: 0.7409725369565858, iteration: 456879
loss: 0.9888965487480164,grad_norm: 0.7362468335612822, iteration: 456880
loss: 1.0056828260421753,grad_norm: 0.8107504006299933, iteration: 456881
loss: 0.9862868785858154,grad_norm: 0.6725763525189415, iteration: 456882
loss: 0.9976100325584412,grad_norm: 0.7502049268785983, iteration: 456883
loss: 0.9866487383842468,grad_norm: 0.7574251532596189, iteration: 456884
loss: 1.006962776184082,grad_norm: 0.9999992843579874, iteration: 456885
loss: 1.0184684991836548,grad_norm: 0.790992870976814, iteration: 456886
loss: 1.0838780403137207,grad_norm: 0.9999992587520685, iteration: 456887
loss: 1.0561113357543945,grad_norm: 0.9999993456367003, iteration: 456888
loss: 1.0419378280639648,grad_norm: 0.853259205612371, iteration: 456889
loss: 0.9880111813545227,grad_norm: 0.7358246555006818, iteration: 456890
loss: 1.001833438873291,grad_norm: 0.7599634859765512, iteration: 456891
loss: 1.0083732604980469,grad_norm: 0.8702000366419831, iteration: 456892
loss: 1.0520360469818115,grad_norm: 0.7894974862828537, iteration: 456893
loss: 1.0039020776748657,grad_norm: 0.7192096488380854, iteration: 456894
loss: 0.9872938394546509,grad_norm: 0.7482225772997884, iteration: 456895
loss: 1.0160884857177734,grad_norm: 0.8191962404458337, iteration: 456896
loss: 1.0102527141571045,grad_norm: 0.8401085715294885, iteration: 456897
loss: 1.0373327732086182,grad_norm: 0.9421800831749277, iteration: 456898
loss: 0.9621404409408569,grad_norm: 0.7146814201031004, iteration: 456899
loss: 1.0056984424591064,grad_norm: 0.8393526395313667, iteration: 456900
loss: 0.9810206890106201,grad_norm: 0.7743183295888024, iteration: 456901
loss: 1.007333755493164,grad_norm: 0.6167973384045783, iteration: 456902
loss: 1.0039540529251099,grad_norm: 0.693318494736427, iteration: 456903
loss: 1.013230562210083,grad_norm: 0.7593714083266898, iteration: 456904
loss: 0.9397203922271729,grad_norm: 0.7532782168101196, iteration: 456905
loss: 0.9981303811073303,grad_norm: 0.7619173417145467, iteration: 456906
loss: 1.0205410718917847,grad_norm: 0.9999990612920132, iteration: 456907
loss: 0.9799185395240784,grad_norm: 0.8385635096550178, iteration: 456908
loss: 0.9885711669921875,grad_norm: 0.8154868070646122, iteration: 456909
loss: 0.9773090481758118,grad_norm: 0.7695192463082204, iteration: 456910
loss: 1.0094243288040161,grad_norm: 0.6753666764895117, iteration: 456911
loss: 1.0070379972457886,grad_norm: 0.8128773221446394, iteration: 456912
loss: 1.026754379272461,grad_norm: 0.7181062070955301, iteration: 456913
loss: 1.0333820581436157,grad_norm: 0.6897528634435021, iteration: 456914
loss: 0.9912351965904236,grad_norm: 0.9999997594979958, iteration: 456915
loss: 1.0031616687774658,grad_norm: 0.6404346696275739, iteration: 456916
loss: 1.0061665773391724,grad_norm: 0.7785272508805642, iteration: 456917
loss: 0.9887467622756958,grad_norm: 0.7258247240606063, iteration: 456918
loss: 0.9995353817939758,grad_norm: 0.6815867227391793, iteration: 456919
loss: 0.9967706799507141,grad_norm: 0.9032792083245396, iteration: 456920
loss: 1.0216418504714966,grad_norm: 0.839982692526687, iteration: 456921
loss: 1.0154838562011719,grad_norm: 0.8362106966142021, iteration: 456922
loss: 0.9970150589942932,grad_norm: 0.7433594306727042, iteration: 456923
loss: 0.9943681955337524,grad_norm: 0.9999992388772605, iteration: 456924
loss: 0.9850327968597412,grad_norm: 0.7033893742508692, iteration: 456925
loss: 1.0321061611175537,grad_norm: 0.8525710432065131, iteration: 456926
loss: 0.9916141033172607,grad_norm: 0.7141118936004215, iteration: 456927
loss: 1.0207977294921875,grad_norm: 0.6700083303148641, iteration: 456928
loss: 0.9709170460700989,grad_norm: 0.6932126696748733, iteration: 456929
loss: 1.0245126485824585,grad_norm: 0.928487907510268, iteration: 456930
loss: 1.0048660039901733,grad_norm: 0.8601917746705161, iteration: 456931
loss: 1.0197389125823975,grad_norm: 0.6610555136622936, iteration: 456932
loss: 1.0111922025680542,grad_norm: 0.713471038295689, iteration: 456933
loss: 0.9973999857902527,grad_norm: 0.9999995272765733, iteration: 456934
loss: 0.9896328449249268,grad_norm: 0.7173313216311519, iteration: 456935
loss: 1.009574294090271,grad_norm: 0.7448634224749093, iteration: 456936
loss: 1.0215586423873901,grad_norm: 0.8316640617167271, iteration: 456937
loss: 0.9984416961669922,grad_norm: 0.7350051784705706, iteration: 456938
loss: 1.015217661857605,grad_norm: 0.7115401890621952, iteration: 456939
loss: 1.014541745185852,grad_norm: 0.8992051327091242, iteration: 456940
loss: 1.0007215738296509,grad_norm: 0.8367365168775569, iteration: 456941
loss: 1.029371976852417,grad_norm: 0.7884102480213813, iteration: 456942
loss: 0.9991121292114258,grad_norm: 0.7168264838227554, iteration: 456943
loss: 0.9939988255500793,grad_norm: 0.7823945134016111, iteration: 456944
loss: 0.9559431076049805,grad_norm: 0.7892749025750108, iteration: 456945
loss: 0.9801021814346313,grad_norm: 0.8267544652242964, iteration: 456946
loss: 0.9943145513534546,grad_norm: 0.7442126041813506, iteration: 456947
loss: 1.023872971534729,grad_norm: 0.8940940780823389, iteration: 456948
loss: 0.9973096251487732,grad_norm: 0.6434963474988431, iteration: 456949
loss: 1.0246491432189941,grad_norm: 0.8202840011183561, iteration: 456950
loss: 0.9857364892959595,grad_norm: 0.704414271409124, iteration: 456951
loss: 0.9923547506332397,grad_norm: 0.7690483966416158, iteration: 456952
loss: 1.0223041772842407,grad_norm: 0.7384315913589597, iteration: 456953
loss: 0.9966633319854736,grad_norm: 0.8676536640194025, iteration: 456954
loss: 0.974163293838501,grad_norm: 0.9527353526022159, iteration: 456955
loss: 1.0259801149368286,grad_norm: 0.8925194910190776, iteration: 456956
loss: 1.0121159553527832,grad_norm: 0.8278428248721682, iteration: 456957
loss: 1.0464719533920288,grad_norm: 0.8097919313211995, iteration: 456958
loss: 1.0009900331497192,grad_norm: 0.7934846098295403, iteration: 456959
loss: 1.019885540008545,grad_norm: 0.7611783226826436, iteration: 456960
loss: 1.0114316940307617,grad_norm: 0.7135954572189809, iteration: 456961
loss: 1.002463459968567,grad_norm: 0.6514866434808945, iteration: 456962
loss: 0.997201144695282,grad_norm: 0.6398961497842018, iteration: 456963
loss: 1.0138325691223145,grad_norm: 0.6553866051996475, iteration: 456964
loss: 1.0079127550125122,grad_norm: 0.719275460892466, iteration: 456965
loss: 1.0638004541397095,grad_norm: 0.6876202456102067, iteration: 456966
loss: 1.0261402130126953,grad_norm: 0.711821857301086, iteration: 456967
loss: 0.969677746295929,grad_norm: 0.7820520013420785, iteration: 456968
loss: 1.0580569505691528,grad_norm: 0.6717089466511036, iteration: 456969
loss: 1.0038279294967651,grad_norm: 0.8390167715729839, iteration: 456970
loss: 0.9967740178108215,grad_norm: 0.8846562737619673, iteration: 456971
loss: 0.9894589781761169,grad_norm: 0.59635737655177, iteration: 456972
loss: 1.0188606977462769,grad_norm: 0.7167305689634057, iteration: 456973
loss: 1.0386338233947754,grad_norm: 0.7410638648229229, iteration: 456974
loss: 0.9945397973060608,grad_norm: 0.8566699197050899, iteration: 456975
loss: 0.9918461441993713,grad_norm: 0.7705648671810266, iteration: 456976
loss: 0.9887035489082336,grad_norm: 0.832123212366362, iteration: 456977
loss: 1.0073705911636353,grad_norm: 0.8466617675725984, iteration: 456978
loss: 0.9727471470832825,grad_norm: 0.8059019912175692, iteration: 456979
loss: 0.9646894335746765,grad_norm: 0.655854482320836, iteration: 456980
loss: 1.0024585723876953,grad_norm: 0.7090134489978228, iteration: 456981
loss: 0.9694550037384033,grad_norm: 0.799636553903665, iteration: 456982
loss: 0.9897505640983582,grad_norm: 0.7505251901337923, iteration: 456983
loss: 0.9940910935401917,grad_norm: 0.8158754249725891, iteration: 456984
loss: 1.0181483030319214,grad_norm: 0.8177854880415352, iteration: 456985
loss: 1.0074702501296997,grad_norm: 0.7566592219618605, iteration: 456986
loss: 0.9869615435600281,grad_norm: 0.7410733939386873, iteration: 456987
loss: 1.0085172653198242,grad_norm: 0.7496976399683284, iteration: 456988
loss: 0.9987962245941162,grad_norm: 0.7387981726515312, iteration: 456989
loss: 0.9650790095329285,grad_norm: 0.9999991366082105, iteration: 456990
loss: 0.9836393594741821,grad_norm: 0.845179069087622, iteration: 456991
loss: 0.9947828650474548,grad_norm: 0.7739323643478738, iteration: 456992
loss: 0.9966098666191101,grad_norm: 0.6466746830674498, iteration: 456993
loss: 1.0069979429244995,grad_norm: 0.7980582069237957, iteration: 456994
loss: 1.0002306699752808,grad_norm: 0.9999997588962524, iteration: 456995
loss: 0.9858484268188477,grad_norm: 0.697324204883426, iteration: 456996
loss: 0.9877621531486511,grad_norm: 0.9305293101358666, iteration: 456997
loss: 1.0307128429412842,grad_norm: 0.6618771029636588, iteration: 456998
loss: 1.0137410163879395,grad_norm: 0.6901950556397087, iteration: 456999
loss: 1.042272925376892,grad_norm: 0.7140406284709985, iteration: 457000
loss: 1.0364043712615967,grad_norm: 0.8312345919903202, iteration: 457001
loss: 1.007889986038208,grad_norm: 0.8810688481888238, iteration: 457002
loss: 0.985039472579956,grad_norm: 0.8117554182498726, iteration: 457003
loss: 0.996117889881134,grad_norm: 0.6922287132986773, iteration: 457004
loss: 1.0235377550125122,grad_norm: 0.921273837917262, iteration: 457005
loss: 0.9643391370773315,grad_norm: 0.7385005809819803, iteration: 457006
loss: 0.9532766938209534,grad_norm: 0.826059712666766, iteration: 457007
loss: 1.007308006286621,grad_norm: 0.8415137117103317, iteration: 457008
loss: 1.021914005279541,grad_norm: 0.8997010599462817, iteration: 457009
loss: 0.9930331707000732,grad_norm: 0.8821792684200666, iteration: 457010
loss: 0.9875396490097046,grad_norm: 0.7047742211008377, iteration: 457011
loss: 0.9678090810775757,grad_norm: 0.8234796994287954, iteration: 457012
loss: 0.9909428358078003,grad_norm: 0.7217535338220312, iteration: 457013
loss: 1.0031886100769043,grad_norm: 0.7283440201762836, iteration: 457014
loss: 0.9406858086585999,grad_norm: 0.974553177565187, iteration: 457015
loss: 0.9945300221443176,grad_norm: 0.8308408884566318, iteration: 457016
loss: 1.0373742580413818,grad_norm: 0.8281142353185402, iteration: 457017
loss: 1.0048868656158447,grad_norm: 0.8078145487157216, iteration: 457018
loss: 0.9866704940795898,grad_norm: 0.9455962645233023, iteration: 457019
loss: 0.9690411686897278,grad_norm: 0.6909074486890912, iteration: 457020
loss: 0.981147289276123,grad_norm: 0.8479598261718326, iteration: 457021
loss: 0.9890666604042053,grad_norm: 0.7189032226601163, iteration: 457022
loss: 0.9901458621025085,grad_norm: 0.7484958447564953, iteration: 457023
loss: 0.9969623684883118,grad_norm: 0.6297723014056825, iteration: 457024
loss: 0.9905346035957336,grad_norm: 0.6907968807701934, iteration: 457025
loss: 1.020634412765503,grad_norm: 0.9199325695616648, iteration: 457026
loss: 1.0271378755569458,grad_norm: 0.7219411476660542, iteration: 457027
loss: 1.0087149143218994,grad_norm: 0.7571959602078245, iteration: 457028
loss: 0.9637491106987,grad_norm: 0.7182797343767848, iteration: 457029
loss: 0.9822289943695068,grad_norm: 0.8231427626940272, iteration: 457030
loss: 0.9989057183265686,grad_norm: 0.9097519162305288, iteration: 457031
loss: 1.0043089389801025,grad_norm: 0.73167209668441, iteration: 457032
loss: 1.0194354057312012,grad_norm: 0.7151284735731441, iteration: 457033
loss: 0.9948179125785828,grad_norm: 0.5911827698448318, iteration: 457034
loss: 1.0038012266159058,grad_norm: 0.622065022064602, iteration: 457035
loss: 1.0184937715530396,grad_norm: 0.9999997898262319, iteration: 457036
loss: 0.978119969367981,grad_norm: 0.6845306679927934, iteration: 457037
loss: 1.0051132440567017,grad_norm: 0.727344988453876, iteration: 457038
loss: 1.0240689516067505,grad_norm: 0.7078574382226143, iteration: 457039
loss: 0.9619743227958679,grad_norm: 0.9643795877043151, iteration: 457040
loss: 1.0505796670913696,grad_norm: 0.8224079285162659, iteration: 457041
loss: 1.0103398561477661,grad_norm: 0.720045248768461, iteration: 457042
loss: 0.9994028210639954,grad_norm: 0.7807249149068063, iteration: 457043
loss: 0.9749248027801514,grad_norm: 0.7227742618738773, iteration: 457044
loss: 0.9699718356132507,grad_norm: 0.7097368256873094, iteration: 457045
loss: 0.9733407497406006,grad_norm: 0.7543550029113512, iteration: 457046
loss: 1.019504427909851,grad_norm: 0.8155253704710115, iteration: 457047
loss: 0.9826501607894897,grad_norm: 0.7818557966449313, iteration: 457048
loss: 0.9840342998504639,grad_norm: 0.7806599270966101, iteration: 457049
loss: 0.9778782725334167,grad_norm: 0.7032413879611483, iteration: 457050
loss: 1.0098345279693604,grad_norm: 0.7679388378486369, iteration: 457051
loss: 0.9807425737380981,grad_norm: 0.8124202381670675, iteration: 457052
loss: 0.9703244566917419,grad_norm: 0.728348567202725, iteration: 457053
loss: 1.021594524383545,grad_norm: 0.8060060714278556, iteration: 457054
loss: 0.9793012142181396,grad_norm: 0.8516053777200076, iteration: 457055
loss: 0.9966597557067871,grad_norm: 0.8609638454625554, iteration: 457056
loss: 0.9703841805458069,grad_norm: 0.8418331210653508, iteration: 457057
loss: 0.9963890314102173,grad_norm: 0.8175363154945388, iteration: 457058
loss: 0.983921468257904,grad_norm: 0.6414869296586699, iteration: 457059
loss: 1.0086909532546997,grad_norm: 0.8348697930387019, iteration: 457060
loss: 1.0405895709991455,grad_norm: 0.8857420411383206, iteration: 457061
loss: 1.017601490020752,grad_norm: 0.8577674926870883, iteration: 457062
loss: 0.9719569683074951,grad_norm: 0.6212569467659562, iteration: 457063
loss: 1.0101581811904907,grad_norm: 0.9383900153369109, iteration: 457064
loss: 1.0155541896820068,grad_norm: 0.5447317317677071, iteration: 457065
loss: 1.0346782207489014,grad_norm: 0.5952921187561208, iteration: 457066
loss: 0.990875244140625,grad_norm: 0.7108841502732367, iteration: 457067
loss: 1.138893723487854,grad_norm: 0.7810559766237934, iteration: 457068
loss: 1.0319410562515259,grad_norm: 0.7007088517583511, iteration: 457069
loss: 1.0508487224578857,grad_norm: 0.9999992343382331, iteration: 457070
loss: 1.0038431882858276,grad_norm: 0.6349819098003228, iteration: 457071
loss: 0.9972776174545288,grad_norm: 0.8029768859989332, iteration: 457072
loss: 1.0028363466262817,grad_norm: 0.9999993843554504, iteration: 457073
loss: 1.015493392944336,grad_norm: 0.7182488163540548, iteration: 457074
loss: 1.0084586143493652,grad_norm: 0.7093954654604505, iteration: 457075
loss: 1.008836030960083,grad_norm: 0.8630853787248415, iteration: 457076
loss: 0.9799535870552063,grad_norm: 0.7477634202457202, iteration: 457077
loss: 0.9877119660377502,grad_norm: 0.9999990773284432, iteration: 457078
loss: 0.9927223324775696,grad_norm: 0.8908532727815793, iteration: 457079
loss: 1.0627219676971436,grad_norm: 0.9516578371710805, iteration: 457080
loss: 0.972272515296936,grad_norm: 0.8143764070771409, iteration: 457081
loss: 1.0242830514907837,grad_norm: 0.999999493838466, iteration: 457082
loss: 1.0978444814682007,grad_norm: 0.9999998956505357, iteration: 457083
loss: 1.0614005327224731,grad_norm: 0.9999996868233227, iteration: 457084
loss: 0.9989131093025208,grad_norm: 0.7084043013487307, iteration: 457085
loss: 0.9628234505653381,grad_norm: 0.7720082773140045, iteration: 457086
loss: 0.9513208866119385,grad_norm: 0.7547273830699989, iteration: 457087
loss: 0.976250946521759,grad_norm: 0.8640440168433006, iteration: 457088
loss: 1.017545223236084,grad_norm: 0.676482057561801, iteration: 457089
loss: 1.0148123502731323,grad_norm: 0.7212556395448029, iteration: 457090
loss: 0.9951029419898987,grad_norm: 0.9014094883234237, iteration: 457091
loss: 1.0128740072250366,grad_norm: 0.7080964032124523, iteration: 457092
loss: 0.9876963496208191,grad_norm: 0.7229856139059087, iteration: 457093
loss: 1.1070126295089722,grad_norm: 0.9999996403502951, iteration: 457094
loss: 1.0025391578674316,grad_norm: 0.7658945164333195, iteration: 457095
loss: 0.9806877374649048,grad_norm: 0.7352348777695525, iteration: 457096
loss: 1.0426281690597534,grad_norm: 0.9999990446386526, iteration: 457097
loss: 1.0053805112838745,grad_norm: 0.726013862208353, iteration: 457098
loss: 0.9763277173042297,grad_norm: 0.6826638352725836, iteration: 457099
loss: 0.9722639322280884,grad_norm: 0.7231998713772936, iteration: 457100
loss: 0.9764400124549866,grad_norm: 0.8189492130466641, iteration: 457101
loss: 1.002854824066162,grad_norm: 0.9999999009258549, iteration: 457102
loss: 0.9973293542861938,grad_norm: 0.7827288912568809, iteration: 457103
loss: 1.0122178792953491,grad_norm: 0.8818688354288516, iteration: 457104
loss: 0.9818612337112427,grad_norm: 0.6954466061743934, iteration: 457105
loss: 1.0224229097366333,grad_norm: 0.9999990851045063, iteration: 457106
loss: 1.0171177387237549,grad_norm: 0.7401664609106557, iteration: 457107
loss: 0.9775964617729187,grad_norm: 0.6901454678825355, iteration: 457108
loss: 0.9856363534927368,grad_norm: 0.769261745838591, iteration: 457109
loss: 0.9505942463874817,grad_norm: 0.7771850691563326, iteration: 457110
loss: 1.0183793306350708,grad_norm: 0.8533908949831162, iteration: 457111
loss: 0.9859417080879211,grad_norm: 0.7593732334894809, iteration: 457112
loss: 1.0395771265029907,grad_norm: 0.8212070684412192, iteration: 457113
loss: 1.0008151531219482,grad_norm: 0.7795404330024792, iteration: 457114
loss: 1.0265079736709595,grad_norm: 0.7386463772832311, iteration: 457115
loss: 0.9714903831481934,grad_norm: 0.69653758928203, iteration: 457116
loss: 1.007627010345459,grad_norm: 0.860258801295612, iteration: 457117
loss: 0.9867293238639832,grad_norm: 0.7862027538946736, iteration: 457118
loss: 1.0045561790466309,grad_norm: 0.7217870792331669, iteration: 457119
loss: 1.0138540267944336,grad_norm: 0.6853468324108724, iteration: 457120
loss: 1.0433763265609741,grad_norm: 0.8605566375468919, iteration: 457121
loss: 0.9889447093009949,grad_norm: 0.5807991095147769, iteration: 457122
loss: 1.0064393281936646,grad_norm: 0.7620633113005874, iteration: 457123
loss: 1.1049104928970337,grad_norm: 1.0000000050937154, iteration: 457124
loss: 1.0255751609802246,grad_norm: 0.7805041411146075, iteration: 457125
loss: 1.0020577907562256,grad_norm: 0.7040044730264704, iteration: 457126
loss: 0.9896947741508484,grad_norm: 0.8354792773881355, iteration: 457127
loss: 1.0467197895050049,grad_norm: 0.8071052755816481, iteration: 457128
loss: 1.0764271020889282,grad_norm: 0.999999169407989, iteration: 457129
loss: 0.9837645292282104,grad_norm: 0.6825676936321665, iteration: 457130
loss: 0.956874668598175,grad_norm: 0.7591234330146135, iteration: 457131
loss: 1.0486128330230713,grad_norm: 0.7879632505961285, iteration: 457132
loss: 0.9898883104324341,grad_norm: 0.7851306489589903, iteration: 457133
loss: 1.0026218891143799,grad_norm: 0.8239428139005721, iteration: 457134
loss: 0.9778437614440918,grad_norm: 0.8202907652487867, iteration: 457135
loss: 0.9908426403999329,grad_norm: 0.9999991146540592, iteration: 457136
loss: 0.9448966979980469,grad_norm: 0.7014670569686866, iteration: 457137
loss: 1.0003976821899414,grad_norm: 0.7358922032456914, iteration: 457138
loss: 1.0450085401535034,grad_norm: 0.9999998400139166, iteration: 457139
loss: 0.9732822775840759,grad_norm: 0.6447688007539603, iteration: 457140
loss: 0.9671030640602112,grad_norm: 0.7925074579729768, iteration: 457141
loss: 0.9622789621353149,grad_norm: 0.7149628998888782, iteration: 457142
loss: 0.9929800629615784,grad_norm: 0.6568699751279853, iteration: 457143
loss: 0.9989407062530518,grad_norm: 0.6470299271369804, iteration: 457144
loss: 0.9739850759506226,grad_norm: 0.7816412875514259, iteration: 457145
loss: 0.9907985925674438,grad_norm: 0.7411772192187169, iteration: 457146
loss: 0.9907277822494507,grad_norm: 0.6985345171691687, iteration: 457147
loss: 1.0163843631744385,grad_norm: 0.856369550140922, iteration: 457148
loss: 0.9793104529380798,grad_norm: 0.6698484708239415, iteration: 457149
loss: 1.1035926342010498,grad_norm: 0.9999995827570706, iteration: 457150
loss: 1.0181491374969482,grad_norm: 0.7054284059080785, iteration: 457151
loss: 0.9738819599151611,grad_norm: 0.8645993564322907, iteration: 457152
loss: 0.9650369882583618,grad_norm: 0.7838257566180465, iteration: 457153
loss: 0.9934739470481873,grad_norm: 0.7935010249322136, iteration: 457154
loss: 1.0174133777618408,grad_norm: 0.9999992899950688, iteration: 457155
loss: 0.9707404375076294,grad_norm: 0.7110147066609356, iteration: 457156
loss: 1.0478986501693726,grad_norm: 0.881784320176862, iteration: 457157
loss: 0.9808034896850586,grad_norm: 0.7954097691931451, iteration: 457158
loss: 1.0007288455963135,grad_norm: 0.6242964894258562, iteration: 457159
loss: 1.002632975578308,grad_norm: 0.6713113520977856, iteration: 457160
loss: 1.0145221948623657,grad_norm: 0.7724798948743125, iteration: 457161
loss: 0.9915540814399719,grad_norm: 0.7728440819438369, iteration: 457162
loss: 0.976254403591156,grad_norm: 0.8781072542290949, iteration: 457163
loss: 0.9880084991455078,grad_norm: 0.7784679282584784, iteration: 457164
loss: 1.0032601356506348,grad_norm: 0.7184963852097993, iteration: 457165
loss: 1.0027674436569214,grad_norm: 0.7185803691946575, iteration: 457166
loss: 1.0117199420928955,grad_norm: 0.8258573389184165, iteration: 457167
loss: 0.9707765579223633,grad_norm: 0.8432694708249332, iteration: 457168
loss: 1.003211498260498,grad_norm: 0.7756817735397239, iteration: 457169
loss: 0.9669600129127502,grad_norm: 0.8009821610216666, iteration: 457170
loss: 1.0740771293640137,grad_norm: 0.865827076947146, iteration: 457171
loss: 0.9914422035217285,grad_norm: 0.99999912626588, iteration: 457172
loss: 0.9890283942222595,grad_norm: 0.7848678625988814, iteration: 457173
loss: 0.9808163046836853,grad_norm: 0.7846008918483705, iteration: 457174
loss: 0.9686814546585083,grad_norm: 0.9033962842874099, iteration: 457175
loss: 0.9674234986305237,grad_norm: 0.8480018272815136, iteration: 457176
loss: 1.0159876346588135,grad_norm: 0.783425345985475, iteration: 457177
loss: 0.9583964347839355,grad_norm: 0.6257379222036192, iteration: 457178
loss: 0.995146632194519,grad_norm: 0.7115547646154299, iteration: 457179
loss: 0.9786586761474609,grad_norm: 0.6492067857256755, iteration: 457180
loss: 0.9666555523872375,grad_norm: 0.9999996552408639, iteration: 457181
loss: 1.0273512601852417,grad_norm: 0.8066493184482916, iteration: 457182
loss: 0.9641503095626831,grad_norm: 0.8527624833119817, iteration: 457183
loss: 1.0308691263198853,grad_norm: 0.9159981067569124, iteration: 457184
loss: 1.0109058618545532,grad_norm: 0.5714010041126233, iteration: 457185
loss: 0.963462769985199,grad_norm: 0.6860336850462907, iteration: 457186
loss: 1.026005744934082,grad_norm: 0.9999999085909266, iteration: 457187
loss: 0.9684227705001831,grad_norm: 0.7363654639410991, iteration: 457188
loss: 1.0173169374465942,grad_norm: 0.648690083338575, iteration: 457189
loss: 1.0363558530807495,grad_norm: 0.6782747921176027, iteration: 457190
loss: 0.9664604067802429,grad_norm: 0.834069011659348, iteration: 457191
loss: 0.980481743812561,grad_norm: 0.7700899211977914, iteration: 457192
loss: 1.0602362155914307,grad_norm: 0.7828817873003879, iteration: 457193
loss: 0.966021716594696,grad_norm: 0.768545764531925, iteration: 457194
loss: 0.9598553776741028,grad_norm: 0.7363716723087346, iteration: 457195
loss: 0.9878440499305725,grad_norm: 0.7284887603025259, iteration: 457196
loss: 1.01676607131958,grad_norm: 0.8196224790894308, iteration: 457197
loss: 1.0310444831848145,grad_norm: 0.8682258237195951, iteration: 457198
loss: 0.9523927569389343,grad_norm: 0.8128413427779351, iteration: 457199
loss: 1.0070058107376099,grad_norm: 0.7895553739180778, iteration: 457200
loss: 0.9926543235778809,grad_norm: 0.7279285516291711, iteration: 457201
loss: 0.981153666973114,grad_norm: 0.8445430014268028, iteration: 457202
loss: 0.9961351752281189,grad_norm: 0.7694931751114362, iteration: 457203
loss: 1.004917860031128,grad_norm: 0.8488783702764092, iteration: 457204
loss: 0.9972301125526428,grad_norm: 0.6612233082995465, iteration: 457205
loss: 1.0005437135696411,grad_norm: 0.7419615821464776, iteration: 457206
loss: 1.0004874467849731,grad_norm: 0.6983952652678479, iteration: 457207
loss: 1.0203092098236084,grad_norm: 0.829451898222436, iteration: 457208
loss: 1.020336627960205,grad_norm: 0.6877734971633288, iteration: 457209
loss: 1.0204682350158691,grad_norm: 0.8926702069068128, iteration: 457210
loss: 0.9761542677879333,grad_norm: 0.7800764762190886, iteration: 457211
loss: 1.1332929134368896,grad_norm: 0.9130455006695469, iteration: 457212
loss: 1.0235822200775146,grad_norm: 0.8142494883163892, iteration: 457213
loss: 0.9681482911109924,grad_norm: 0.7094420468714091, iteration: 457214
loss: 1.0113343000411987,grad_norm: 0.7920938389366348, iteration: 457215
loss: 0.9984714388847351,grad_norm: 0.9095545615967461, iteration: 457216
loss: 0.9808679819107056,grad_norm: 0.9999991855414676, iteration: 457217
loss: 1.0366419553756714,grad_norm: 0.8647844946061921, iteration: 457218
loss: 0.9674403071403503,grad_norm: 0.8322823741417824, iteration: 457219
loss: 1.0205613374710083,grad_norm: 0.7154633442559525, iteration: 457220
loss: 0.982214629650116,grad_norm: 0.7906798488181638, iteration: 457221
loss: 0.9976406097412109,grad_norm: 0.6964315013517075, iteration: 457222
loss: 0.9880960583686829,grad_norm: 0.7530348396766016, iteration: 457223
loss: 1.0050344467163086,grad_norm: 0.7808073366051762, iteration: 457224
loss: 0.9861437082290649,grad_norm: 0.7179510083338759, iteration: 457225
loss: 0.9736694693565369,grad_norm: 0.7961921562558334, iteration: 457226
loss: 0.9791378974914551,grad_norm: 0.8655773829280379, iteration: 457227
loss: 0.9998731017112732,grad_norm: 0.9999992612742744, iteration: 457228
loss: 1.0351938009262085,grad_norm: 0.7995642221201918, iteration: 457229
loss: 0.9943010210990906,grad_norm: 0.9277305949729874, iteration: 457230
loss: 1.022091269493103,grad_norm: 0.8049955805011245, iteration: 457231
loss: 1.0355894565582275,grad_norm: 0.9992838669342028, iteration: 457232
loss: 1.1008832454681396,grad_norm: 0.9260578944223798, iteration: 457233
loss: 0.9597533941268921,grad_norm: 0.8133883557642427, iteration: 457234
loss: 1.0387492179870605,grad_norm: 0.739281558350021, iteration: 457235
loss: 1.073659896850586,grad_norm: 0.6019910923701867, iteration: 457236
loss: 1.0146340131759644,grad_norm: 0.9301539300236445, iteration: 457237
loss: 0.9914323687553406,grad_norm: 0.7466135705690828, iteration: 457238
loss: 1.0462850332260132,grad_norm: 0.9123198658175897, iteration: 457239
loss: 0.9919061660766602,grad_norm: 0.8682426145324177, iteration: 457240
loss: 1.0108386278152466,grad_norm: 0.7002900854628927, iteration: 457241
loss: 1.0076929330825806,grad_norm: 0.7051384907131503, iteration: 457242
loss: 1.028236985206604,grad_norm: 0.730766568695357, iteration: 457243
loss: 1.011396050453186,grad_norm: 0.7974553218914963, iteration: 457244
loss: 1.0061219930648804,grad_norm: 0.923787718759362, iteration: 457245
loss: 0.9703901410102844,grad_norm: 0.7710296076729813, iteration: 457246
loss: 0.98735111951828,grad_norm: 0.6313013071699843, iteration: 457247
loss: 0.9770325422286987,grad_norm: 0.6802799197206659, iteration: 457248
loss: 0.9879446029663086,grad_norm: 0.7543999459493504, iteration: 457249
loss: 1.0111076831817627,grad_norm: 0.9281180401403425, iteration: 457250
loss: 1.0002883672714233,grad_norm: 0.8715729474426577, iteration: 457251
loss: 0.9844350814819336,grad_norm: 0.8174976700708163, iteration: 457252
loss: 0.9870425462722778,grad_norm: 0.6738903229460392, iteration: 457253
loss: 1.0049086809158325,grad_norm: 0.9999998163432107, iteration: 457254
loss: 1.094933271408081,grad_norm: 0.9999993848989441, iteration: 457255
loss: 1.036185383796692,grad_norm: 0.7393571224489183, iteration: 457256
loss: 0.9910835027694702,grad_norm: 0.7326450758128461, iteration: 457257
loss: 1.0211896896362305,grad_norm: 0.7508042403810853, iteration: 457258
loss: 1.0270038843154907,grad_norm: 0.7220024563190797, iteration: 457259
loss: 0.9702779054641724,grad_norm: 0.7548902412031606, iteration: 457260
loss: 1.0278515815734863,grad_norm: 0.9999992866477526, iteration: 457261
loss: 1.0037096738815308,grad_norm: 0.6327841906747146, iteration: 457262
loss: 1.0132678747177124,grad_norm: 0.632654293777869, iteration: 457263
loss: 1.0124149322509766,grad_norm: 0.8032766675762341, iteration: 457264
loss: 0.9825482964515686,grad_norm: 0.8579730851602783, iteration: 457265
loss: 1.013576865196228,grad_norm: 0.8318348773303512, iteration: 457266
loss: 1.0684906244277954,grad_norm: 0.8766803019639904, iteration: 457267
loss: 0.9890084266662598,grad_norm: 0.7405960393972574, iteration: 457268
loss: 0.974104106426239,grad_norm: 0.8710145346438178, iteration: 457269
loss: 0.9955819249153137,grad_norm: 0.7057816202985057, iteration: 457270
loss: 1.0285555124282837,grad_norm: 0.706409841563403, iteration: 457271
loss: 0.97484290599823,grad_norm: 0.8051665771060552, iteration: 457272
loss: 1.0391546487808228,grad_norm: 0.7713595240405069, iteration: 457273
loss: 1.0335477590560913,grad_norm: 0.8052464506797627, iteration: 457274
loss: 0.9796432852745056,grad_norm: 0.722918439742777, iteration: 457275
loss: 0.9912208318710327,grad_norm: 0.8378263407730057, iteration: 457276
loss: 1.0206528902053833,grad_norm: 0.8427794186573113, iteration: 457277
loss: 1.010124921798706,grad_norm: 0.6965053945852558, iteration: 457278
loss: 0.9910619854927063,grad_norm: 0.9999989935548692, iteration: 457279
loss: 1.0124403238296509,grad_norm: 0.8524241705792973, iteration: 457280
loss: 1.005611777305603,grad_norm: 0.7098121746105958, iteration: 457281
loss: 0.9821314811706543,grad_norm: 0.742373604106981, iteration: 457282
loss: 1.0310522317886353,grad_norm: 0.80490505928245, iteration: 457283
loss: 0.9953351616859436,grad_norm: 0.9152767808829911, iteration: 457284
loss: 1.0151907205581665,grad_norm: 0.7376628563735318, iteration: 457285
loss: 0.9515310525894165,grad_norm: 0.6770084337467585, iteration: 457286
loss: 1.0353477001190186,grad_norm: 0.8355714111981347, iteration: 457287
loss: 1.0403178930282593,grad_norm: 0.6298476771582343, iteration: 457288
loss: 0.9677444696426392,grad_norm: 0.8130461226209672, iteration: 457289
loss: 1.0287423133850098,grad_norm: 0.805546792044192, iteration: 457290
loss: 1.0146657228469849,grad_norm: 0.8401221321172413, iteration: 457291
loss: 1.0084866285324097,grad_norm: 0.8150108401918089, iteration: 457292
loss: 0.9856366515159607,grad_norm: 0.8614333689924152, iteration: 457293
loss: 1.037201166152954,grad_norm: 0.9999992225712335, iteration: 457294
loss: 1.0063502788543701,grad_norm: 0.6521511149878089, iteration: 457295
loss: 0.9734488725662231,grad_norm: 0.7797758462282655, iteration: 457296
loss: 1.0029782056808472,grad_norm: 0.763027450745935, iteration: 457297
loss: 0.9820348620414734,grad_norm: 0.8845329977541486, iteration: 457298
loss: 1.009645700454712,grad_norm: 0.6910182108987482, iteration: 457299
loss: 1.009441614151001,grad_norm: 0.7720212189424633, iteration: 457300
loss: 1.0469447374343872,grad_norm: 0.7749286372138356, iteration: 457301
loss: 1.0254400968551636,grad_norm: 0.7248936751350749, iteration: 457302
loss: 0.9986758828163147,grad_norm: 0.8192870887261874, iteration: 457303
loss: 1.0017950534820557,grad_norm: 0.8720206980046472, iteration: 457304
loss: 1.017024040222168,grad_norm: 0.7653595039059254, iteration: 457305
loss: 0.9808193445205688,grad_norm: 0.7330278651359436, iteration: 457306
loss: 1.0016942024230957,grad_norm: 0.7857154986382833, iteration: 457307
loss: 0.9958881139755249,grad_norm: 0.6984679902608207, iteration: 457308
loss: 0.9845706820487976,grad_norm: 0.9796646129241836, iteration: 457309
loss: 0.9960165023803711,grad_norm: 0.728199616937006, iteration: 457310
loss: 0.9825115203857422,grad_norm: 0.6489076744268861, iteration: 457311
loss: 1.0422184467315674,grad_norm: 0.9712192829909329, iteration: 457312
loss: 1.0027564764022827,grad_norm: 0.8919637375037917, iteration: 457313
loss: 0.9853132367134094,grad_norm: 0.7396209227826327, iteration: 457314
loss: 1.028841495513916,grad_norm: 0.7491786163907665, iteration: 457315
loss: 0.9972715973854065,grad_norm: 0.8300616162423128, iteration: 457316
loss: 0.9903451204299927,grad_norm: 0.8487967038273887, iteration: 457317
loss: 1.0103074312210083,grad_norm: 0.6972431650324991, iteration: 457318
loss: 1.0298956632614136,grad_norm: 0.9999995153107382, iteration: 457319
loss: 0.9838405847549438,grad_norm: 0.8231227250214705, iteration: 457320
loss: 0.9672648310661316,grad_norm: 0.746270018786898, iteration: 457321
loss: 0.9883661270141602,grad_norm: 0.9732414598452921, iteration: 457322
loss: 0.9877899289131165,grad_norm: 0.6034048034139482, iteration: 457323
loss: 0.9960805773735046,grad_norm: 0.9641165405410166, iteration: 457324
loss: 0.9843308925628662,grad_norm: 0.8157317992705757, iteration: 457325
loss: 0.9828909635543823,grad_norm: 0.9085859966187704, iteration: 457326
loss: 0.9885777831077576,grad_norm: 0.6910823931210375, iteration: 457327
loss: 0.9652285575866699,grad_norm: 0.8449141176802507, iteration: 457328
loss: 0.9922270774841309,grad_norm: 0.6679410063631346, iteration: 457329
loss: 1.0468193292617798,grad_norm: 0.7553682193896931, iteration: 457330
loss: 0.9931925535202026,grad_norm: 0.7806820170616715, iteration: 457331
loss: 1.0146567821502686,grad_norm: 0.8193723286946675, iteration: 457332
loss: 1.0018056631088257,grad_norm: 0.774773813271648, iteration: 457333
loss: 1.0259003639221191,grad_norm: 0.722740054068565, iteration: 457334
loss: 1.0110132694244385,grad_norm: 0.696276475826198, iteration: 457335
loss: 1.038210153579712,grad_norm: 0.8201728580357142, iteration: 457336
loss: 1.0650286674499512,grad_norm: 0.9999992247835048, iteration: 457337
loss: 1.010942816734314,grad_norm: 0.8194407845843421, iteration: 457338
loss: 0.9725915789604187,grad_norm: 0.8026705122376767, iteration: 457339
loss: 0.9752702116966248,grad_norm: 0.8560172011715388, iteration: 457340
loss: 0.9733092188835144,grad_norm: 0.6784951070450714, iteration: 457341
loss: 1.0194391012191772,grad_norm: 0.7717513052055729, iteration: 457342
loss: 1.0008701086044312,grad_norm: 0.678903294102473, iteration: 457343
loss: 1.0022950172424316,grad_norm: 0.8615810534216062, iteration: 457344
loss: 0.9739542603492737,grad_norm: 0.9268829099211638, iteration: 457345
loss: 0.9803547263145447,grad_norm: 0.8046608742612757, iteration: 457346
loss: 0.9974381923675537,grad_norm: 0.6844692467394045, iteration: 457347
loss: 1.013451099395752,grad_norm: 0.999999404528512, iteration: 457348
loss: 1.0178894996643066,grad_norm: 0.8071884688903035, iteration: 457349
loss: 0.9670838117599487,grad_norm: 0.8170458776783096, iteration: 457350
loss: 1.011488437652588,grad_norm: 0.7132999638076567, iteration: 457351
loss: 1.1087367534637451,grad_norm: 0.9999992803088309, iteration: 457352
loss: 1.0198696851730347,grad_norm: 0.7608219231776466, iteration: 457353
loss: 0.9946836829185486,grad_norm: 0.7810218538084086, iteration: 457354
loss: 1.0061575174331665,grad_norm: 0.8391412337210166, iteration: 457355
loss: 1.006176233291626,grad_norm: 0.8439230834807085, iteration: 457356
loss: 1.0860975980758667,grad_norm: 0.999999689250599, iteration: 457357
loss: 0.9542043805122375,grad_norm: 0.9999991506545507, iteration: 457358
loss: 0.990749180316925,grad_norm: 0.7364106098485278, iteration: 457359
loss: 0.9853924512863159,grad_norm: 0.7784590130127037, iteration: 457360
loss: 0.9837302565574646,grad_norm: 0.719876649430417, iteration: 457361
loss: 1.0085219144821167,grad_norm: 0.8268496120327653, iteration: 457362
loss: 0.9817970991134644,grad_norm: 0.6263820763899202, iteration: 457363
loss: 0.9961196780204773,grad_norm: 0.7712915251772257, iteration: 457364
loss: 1.0037511587142944,grad_norm: 0.6467689152981493, iteration: 457365
loss: 0.9973821640014648,grad_norm: 0.8363851640141362, iteration: 457366
loss: 0.9951148629188538,grad_norm: 0.6334670909162518, iteration: 457367
loss: 1.0104628801345825,grad_norm: 0.6009356817949947, iteration: 457368
loss: 0.96917325258255,grad_norm: 0.7353675381953599, iteration: 457369
loss: 1.0524022579193115,grad_norm: 0.8129163320058558, iteration: 457370
loss: 1.045515775680542,grad_norm: 0.944300265321814, iteration: 457371
loss: 1.0075381994247437,grad_norm: 0.64681162306389, iteration: 457372
loss: 1.0230774879455566,grad_norm: 0.792449613039435, iteration: 457373
loss: 0.9675423502922058,grad_norm: 0.8516681833511778, iteration: 457374
loss: 1.0085035562515259,grad_norm: 0.8134675074661953, iteration: 457375
loss: 0.9729121923446655,grad_norm: 0.7876528722397688, iteration: 457376
loss: 0.9998213052749634,grad_norm: 0.7297603752145804, iteration: 457377
loss: 1.0217158794403076,grad_norm: 0.7576888991777239, iteration: 457378
loss: 1.0732115507125854,grad_norm: 0.674780386753747, iteration: 457379
loss: 1.0248162746429443,grad_norm: 0.8435454594417037, iteration: 457380
loss: 0.9988447427749634,grad_norm: 0.7185725689310581, iteration: 457381
loss: 0.9685020446777344,grad_norm: 0.7978884718234315, iteration: 457382
loss: 0.9983657598495483,grad_norm: 0.7965544016871169, iteration: 457383
loss: 1.0286701917648315,grad_norm: 0.6938804418186734, iteration: 457384
loss: 0.9765916466712952,grad_norm: 0.6487259718939846, iteration: 457385
loss: 1.0139973163604736,grad_norm: 0.8282282345480599, iteration: 457386
loss: 0.9859071969985962,grad_norm: 0.7630782661088519, iteration: 457387
loss: 0.9978530406951904,grad_norm: 0.8474875563863484, iteration: 457388
loss: 0.9945300221443176,grad_norm: 0.6830883562172332, iteration: 457389
loss: 1.02155339717865,grad_norm: 0.7032757893669924, iteration: 457390
loss: 1.0165663957595825,grad_norm: 0.8926472499395387, iteration: 457391
loss: 0.9752358198165894,grad_norm: 0.7439262966988208, iteration: 457392
loss: 0.9976954460144043,grad_norm: 0.7802586098109854, iteration: 457393
loss: 1.0236438512802124,grad_norm: 0.8179503670357212, iteration: 457394
loss: 0.9829591512680054,grad_norm: 0.7211535704781349, iteration: 457395
loss: 1.0240089893341064,grad_norm: 0.9932458688407724, iteration: 457396
loss: 1.0471675395965576,grad_norm: 0.9060140439698794, iteration: 457397
loss: 1.0070780515670776,grad_norm: 0.7476196817578721, iteration: 457398
loss: 1.083709716796875,grad_norm: 0.8421408547340808, iteration: 457399
loss: 1.011674165725708,grad_norm: 0.6948715285281338, iteration: 457400
loss: 1.0484957695007324,grad_norm: 0.8652962373813418, iteration: 457401
loss: 0.9993093013763428,grad_norm: 0.8550983565873557, iteration: 457402
loss: 1.0334874391555786,grad_norm: 0.9999991293094778, iteration: 457403
loss: 0.9933808445930481,grad_norm: 0.892549151772816, iteration: 457404
loss: 1.023597002029419,grad_norm: 0.9999990147327619, iteration: 457405
loss: 0.9930818676948547,grad_norm: 0.7659706289471085, iteration: 457406
loss: 1.0243628025054932,grad_norm: 0.7901592833694695, iteration: 457407
loss: 0.9987539052963257,grad_norm: 0.6268034438327146, iteration: 457408
loss: 1.0121941566467285,grad_norm: 0.8458607485307155, iteration: 457409
loss: 0.9921185374259949,grad_norm: 0.999999126414503, iteration: 457410
loss: 0.986842155456543,grad_norm: 0.668500657573083, iteration: 457411
loss: 0.9895943999290466,grad_norm: 0.6854033106730534, iteration: 457412
loss: 1.0154097080230713,grad_norm: 0.9999990556784649, iteration: 457413
loss: 1.0051277875900269,grad_norm: 0.8215447993427452, iteration: 457414
loss: 1.0188612937927246,grad_norm: 0.9177478208560397, iteration: 457415
loss: 0.994602382183075,grad_norm: 0.7139254155773745, iteration: 457416
loss: 1.0330311059951782,grad_norm: 0.7620915532933615, iteration: 457417
loss: 1.006779670715332,grad_norm: 0.9999994044689873, iteration: 457418
loss: 1.0183556079864502,grad_norm: 0.8969235821910716, iteration: 457419
loss: 0.9881635308265686,grad_norm: 0.7958861042864736, iteration: 457420
loss: 0.9848081469535828,grad_norm: 0.7271430235354015, iteration: 457421
loss: 1.0102622509002686,grad_norm: 0.9164760104940067, iteration: 457422
loss: 1.0126084089279175,grad_norm: 0.8446904093950297, iteration: 457423
loss: 1.0229469537734985,grad_norm: 0.997717728623097, iteration: 457424
loss: 0.979215145111084,grad_norm: 0.6786009035649816, iteration: 457425
loss: 1.0075745582580566,grad_norm: 0.8535293170583783, iteration: 457426
loss: 1.000027060508728,grad_norm: 0.9999990726470982, iteration: 457427
loss: 0.9994406700134277,grad_norm: 0.7732096060346315, iteration: 457428
loss: 0.9567037224769592,grad_norm: 0.7177729657787651, iteration: 457429
loss: 1.0126703977584839,grad_norm: 0.6876310850051568, iteration: 457430
loss: 1.0001863241195679,grad_norm: 0.656468878263861, iteration: 457431
loss: 0.9900287389755249,grad_norm: 0.7973346176672698, iteration: 457432
loss: 0.9765816330909729,grad_norm: 0.7478370408806209, iteration: 457433
loss: 1.010215401649475,grad_norm: 0.7396945796661717, iteration: 457434
loss: 0.9565606117248535,grad_norm: 0.7164473529336743, iteration: 457435
loss: 1.036438226699829,grad_norm: 0.6890635496262665, iteration: 457436
loss: 1.0300050973892212,grad_norm: 0.6794591592077757, iteration: 457437
loss: 0.9674031734466553,grad_norm: 0.838354859956767, iteration: 457438
loss: 1.0265790224075317,grad_norm: 0.6826737965524993, iteration: 457439
loss: 1.0460370779037476,grad_norm: 0.7211479758349221, iteration: 457440
loss: 0.9761333465576172,grad_norm: 0.7698029253370106, iteration: 457441
loss: 0.9814508557319641,grad_norm: 0.7624952678146512, iteration: 457442
loss: 0.9925562143325806,grad_norm: 0.7630556736669023, iteration: 457443
loss: 1.0307976007461548,grad_norm: 0.7543066868430004, iteration: 457444
loss: 1.0504136085510254,grad_norm: 0.999999625627564, iteration: 457445
loss: 0.9511154294013977,grad_norm: 0.7783835232992539, iteration: 457446
loss: 1.0055785179138184,grad_norm: 0.904927559787097, iteration: 457447
loss: 1.0082342624664307,grad_norm: 0.7460107837670801, iteration: 457448
loss: 1.0306730270385742,grad_norm: 0.8833696249511896, iteration: 457449
loss: 1.0069702863693237,grad_norm: 0.6756727761332639, iteration: 457450
loss: 0.9707908630371094,grad_norm: 0.7621336468902314, iteration: 457451
loss: 0.990626335144043,grad_norm: 0.9471074817560511, iteration: 457452
loss: 0.9943529367446899,grad_norm: 0.8081093229235127, iteration: 457453
loss: 1.0141059160232544,grad_norm: 0.999999232748893, iteration: 457454
loss: 1.0866059064865112,grad_norm: 0.9999993581632544, iteration: 457455
loss: 0.9449537396430969,grad_norm: 0.8800032947705072, iteration: 457456
loss: 1.070609450340271,grad_norm: 0.8403048741019681, iteration: 457457
loss: 1.0082942247390747,grad_norm: 0.7649392188104959, iteration: 457458
loss: 0.9912080764770508,grad_norm: 0.7188043362903777, iteration: 457459
loss: 0.9840253591537476,grad_norm: 0.8261263240338044, iteration: 457460
loss: 1.010087013244629,grad_norm: 0.7718352620009855, iteration: 457461
loss: 1.009213924407959,grad_norm: 0.6720243603274184, iteration: 457462
loss: 1.0092196464538574,grad_norm: 0.6661108000779474, iteration: 457463
loss: 1.0217503309249878,grad_norm: 0.8229080814307204, iteration: 457464
loss: 0.9977393746376038,grad_norm: 0.7145087728953431, iteration: 457465
loss: 0.982765793800354,grad_norm: 0.8009656844866619, iteration: 457466
loss: 0.9976097941398621,grad_norm: 0.9999989660981567, iteration: 457467
loss: 1.009689211845398,grad_norm: 0.6928928129757164, iteration: 457468
loss: 0.971902072429657,grad_norm: 0.6356332473560614, iteration: 457469
loss: 0.9919653534889221,grad_norm: 0.7361593501401986, iteration: 457470
loss: 0.9790855646133423,grad_norm: 0.6675472688644686, iteration: 457471
loss: 0.9877251982688904,grad_norm: 0.9019447700459294, iteration: 457472
loss: 1.0627073049545288,grad_norm: 0.699368904087074, iteration: 457473
loss: 1.0557814836502075,grad_norm: 0.9315568678797296, iteration: 457474
loss: 1.0389326810836792,grad_norm: 0.7673268239697755, iteration: 457475
loss: 0.967991828918457,grad_norm: 0.7705694485849383, iteration: 457476
loss: 0.9671259522438049,grad_norm: 0.8097535662651478, iteration: 457477
loss: 0.979238748550415,grad_norm: 0.8130693272468433, iteration: 457478
loss: 0.9972657561302185,grad_norm: 0.697518542579918, iteration: 457479
loss: 1.0182603597640991,grad_norm: 0.7687808012222215, iteration: 457480
loss: 0.9892476201057434,grad_norm: 0.6907916936119469, iteration: 457481
loss: 0.9863370656967163,grad_norm: 0.8205644933603259, iteration: 457482
loss: 0.9677028059959412,grad_norm: 0.6688437083794658, iteration: 457483
loss: 0.9567809700965881,grad_norm: 0.7041132037466786, iteration: 457484
loss: 1.0311708450317383,grad_norm: 0.6049151965099406, iteration: 457485
loss: 1.008748173713684,grad_norm: 0.5625319790095157, iteration: 457486
loss: 0.9708278775215149,grad_norm: 0.8649711836155243, iteration: 457487
loss: 1.0016340017318726,grad_norm: 0.7538627260186537, iteration: 457488
loss: 0.9876817464828491,grad_norm: 0.7408371926110545, iteration: 457489
loss: 0.9984193444252014,grad_norm: 0.6514597545896078, iteration: 457490
loss: 0.972048819065094,grad_norm: 0.8386741078196578, iteration: 457491
loss: 1.010696530342102,grad_norm: 0.6987418826206376, iteration: 457492
loss: 1.030854344367981,grad_norm: 0.7184134597974624, iteration: 457493
loss: 0.9957985877990723,grad_norm: 0.9256490018830952, iteration: 457494
loss: 0.9838178157806396,grad_norm: 0.6969476630969332, iteration: 457495
loss: 0.9982414841651917,grad_norm: 0.7431392444580429, iteration: 457496
loss: 0.9915620684623718,grad_norm: 0.8599130002783557, iteration: 457497
loss: 0.9717540144920349,grad_norm: 0.7146376847188666, iteration: 457498
loss: 0.9735147953033447,grad_norm: 0.781410825049761, iteration: 457499
loss: 1.0552809238433838,grad_norm: 0.950417247176445, iteration: 457500
loss: 1.0105807781219482,grad_norm: 0.691556407514775, iteration: 457501
loss: 0.9792726039886475,grad_norm: 0.7205444488699819, iteration: 457502
loss: 1.0142567157745361,grad_norm: 0.7117562322205945, iteration: 457503
loss: 1.0378550291061401,grad_norm: 0.9999990307940295, iteration: 457504
loss: 1.0079482793807983,grad_norm: 0.8030328326870728, iteration: 457505
loss: 0.9801992774009705,grad_norm: 0.7338318378480674, iteration: 457506
loss: 0.9814392924308777,grad_norm: 0.9313215399787542, iteration: 457507
loss: 0.9721758365631104,grad_norm: 0.7311270010208282, iteration: 457508
loss: 0.9820078015327454,grad_norm: 0.7520931801206681, iteration: 457509
loss: 1.0325734615325928,grad_norm: 0.7699742475164504, iteration: 457510
loss: 1.0576320886611938,grad_norm: 0.9329161754679121, iteration: 457511
loss: 1.0094999074935913,grad_norm: 0.7687871934154389, iteration: 457512
loss: 1.0200175046920776,grad_norm: 0.7473238485767116, iteration: 457513
loss: 0.9943073987960815,grad_norm: 0.8324749042317487, iteration: 457514
loss: 0.963070273399353,grad_norm: 0.7313107061103469, iteration: 457515
loss: 1.031916618347168,grad_norm: 0.7870345920144031, iteration: 457516
loss: 0.9931755065917969,grad_norm: 0.7143081720611948, iteration: 457517
loss: 1.017428994178772,grad_norm: 0.7307203223937868, iteration: 457518
loss: 0.9974184036254883,grad_norm: 0.7412820968794523, iteration: 457519
loss: 1.007309913635254,grad_norm: 0.9127918597913321, iteration: 457520
loss: 0.9465189576148987,grad_norm: 0.8608917700183727, iteration: 457521
loss: 0.976152777671814,grad_norm: 0.8147843805643913, iteration: 457522
loss: 1.0228444337844849,grad_norm: 0.9223971774576943, iteration: 457523
loss: 0.9715012907981873,grad_norm: 0.7511076720495603, iteration: 457524
loss: 1.0436619520187378,grad_norm: 0.999999853428014, iteration: 457525
loss: 0.9953880906105042,grad_norm: 0.8311598113886173, iteration: 457526
loss: 1.0443512201309204,grad_norm: 0.7673084068386263, iteration: 457527
loss: 1.0459353923797607,grad_norm: 0.8661664889210728, iteration: 457528
loss: 1.0332077741622925,grad_norm: 0.9999992936608717, iteration: 457529
loss: 0.9819096326828003,grad_norm: 0.7735808763772366, iteration: 457530
loss: 1.0361888408660889,grad_norm: 0.8439353021867764, iteration: 457531
loss: 0.9859691262245178,grad_norm: 0.8581528738298306, iteration: 457532
loss: 1.0488992929458618,grad_norm: 0.7164675162572289, iteration: 457533
loss: 0.9689309597015381,grad_norm: 0.6531211584548777, iteration: 457534
loss: 1.11039400100708,grad_norm: 0.945315270797942, iteration: 457535
loss: 1.0140609741210938,grad_norm: 0.7077419762425197, iteration: 457536
loss: 0.9813815951347351,grad_norm: 0.7950770432751746, iteration: 457537
loss: 1.0040005445480347,grad_norm: 0.6663674012338403, iteration: 457538
loss: 1.0062910318374634,grad_norm: 0.7074863619770034, iteration: 457539
loss: 0.9725449681282043,grad_norm: 0.7559758888696924, iteration: 457540
loss: 1.0179252624511719,grad_norm: 0.8311350947696773, iteration: 457541
loss: 1.006035327911377,grad_norm: 0.8636217046371482, iteration: 457542
loss: 0.9796136021614075,grad_norm: 0.8417837300685808, iteration: 457543
loss: 0.9712979197502136,grad_norm: 0.9072701057473962, iteration: 457544
loss: 0.993851363658905,grad_norm: 0.6778540442115176, iteration: 457545
loss: 1.0020753145217896,grad_norm: 0.7438811945237334, iteration: 457546
loss: 1.0042715072631836,grad_norm: 0.6619829381445007, iteration: 457547
loss: 1.1493310928344727,grad_norm: 0.9999997834673897, iteration: 457548
loss: 1.0003819465637207,grad_norm: 0.6906608252458244, iteration: 457549
loss: 0.9913115501403809,grad_norm: 0.7630552261171905, iteration: 457550
loss: 0.9813833832740784,grad_norm: 0.7009925018369322, iteration: 457551
loss: 1.0090948343276978,grad_norm: 0.7193433339827097, iteration: 457552
loss: 1.048584222793579,grad_norm: 0.892953721530409, iteration: 457553
loss: 1.0949608087539673,grad_norm: 0.933367441858279, iteration: 457554
loss: 0.9709087610244751,grad_norm: 0.8999622562377998, iteration: 457555
loss: 0.9930768013000488,grad_norm: 0.6621126601267832, iteration: 457556
loss: 1.0343501567840576,grad_norm: 0.7844498921646501, iteration: 457557
loss: 0.9731415510177612,grad_norm: 0.7925953936167941, iteration: 457558
loss: 0.9955714344978333,grad_norm: 0.7630540231036583, iteration: 457559
loss: 1.0158354043960571,grad_norm: 0.6953448952132556, iteration: 457560
loss: 1.0211888551712036,grad_norm: 0.7087329585683407, iteration: 457561
loss: 1.0044313669204712,grad_norm: 0.7334517624091392, iteration: 457562
loss: 1.0225796699523926,grad_norm: 0.7158358219656756, iteration: 457563
loss: 1.0159863233566284,grad_norm: 0.7412587831701386, iteration: 457564
loss: 1.0024718046188354,grad_norm: 0.7295374397960193, iteration: 457565
loss: 0.9997266530990601,grad_norm: 0.7241751729426332, iteration: 457566
loss: 1.0427781343460083,grad_norm: 0.7418432817794098, iteration: 457567
loss: 1.01800537109375,grad_norm: 0.7522776681999113, iteration: 457568
loss: 1.0006041526794434,grad_norm: 0.9391168354167493, iteration: 457569
loss: 0.9702157378196716,grad_norm: 0.9391252016079191, iteration: 457570
loss: 1.0228697061538696,grad_norm: 0.637097678630268, iteration: 457571
loss: 1.0078262090682983,grad_norm: 0.6383678197233518, iteration: 457572
loss: 0.9872942566871643,grad_norm: 0.7636730329980759, iteration: 457573
loss: 1.049615740776062,grad_norm: 0.8581413377312425, iteration: 457574
loss: 1.0120378732681274,grad_norm: 0.6657351655523249, iteration: 457575
loss: 0.9833115339279175,grad_norm: 0.7821135425360078, iteration: 457576
loss: 0.9994112253189087,grad_norm: 0.9999994187264878, iteration: 457577
loss: 1.0355147123336792,grad_norm: 0.8192522031547316, iteration: 457578
loss: 0.9775375723838806,grad_norm: 0.7936337569095282, iteration: 457579
loss: 1.008095145225525,grad_norm: 0.8150659046864691, iteration: 457580
loss: 0.9908224940299988,grad_norm: 0.7471346600492645, iteration: 457581
loss: 1.0091543197631836,grad_norm: 0.7059453543635869, iteration: 457582
loss: 0.9762024283409119,grad_norm: 0.8603640982355727, iteration: 457583
loss: 1.040945291519165,grad_norm: 0.8420559476890894, iteration: 457584
loss: 0.9814845323562622,grad_norm: 0.7002044168138264, iteration: 457585
loss: 1.0056818723678589,grad_norm: 0.8544191735759195, iteration: 457586
loss: 1.015324354171753,grad_norm: 0.77820113728932, iteration: 457587
loss: 1.012283444404602,grad_norm: 0.7388254876158494, iteration: 457588
loss: 0.9649845957756042,grad_norm: 0.8254693590384132, iteration: 457589
loss: 1.0164989233016968,grad_norm: 0.7438435975909409, iteration: 457590
loss: 0.995466947555542,grad_norm: 0.8638493422406401, iteration: 457591
loss: 1.0915948152542114,grad_norm: 0.9999993146188558, iteration: 457592
loss: 1.0144851207733154,grad_norm: 0.9575617487451356, iteration: 457593
loss: 1.0321720838546753,grad_norm: 0.9359208116175732, iteration: 457594
loss: 1.0195878744125366,grad_norm: 0.999999562770714, iteration: 457595
loss: 0.9784815311431885,grad_norm: 0.6991600091546105, iteration: 457596
loss: 1.0262495279312134,grad_norm: 0.6278763727410973, iteration: 457597
loss: 0.9955525994300842,grad_norm: 0.7670642217511155, iteration: 457598
loss: 0.9821059107780457,grad_norm: 0.7595335995611838, iteration: 457599
loss: 1.025307297706604,grad_norm: 0.9331467707707677, iteration: 457600
loss: 1.065306544303894,grad_norm: 0.8326597915303655, iteration: 457601
loss: 1.014395833015442,grad_norm: 0.9999996427387461, iteration: 457602
loss: 0.983484148979187,grad_norm: 0.6697578966525563, iteration: 457603
loss: 0.9860504865646362,grad_norm: 0.7678475195611215, iteration: 457604
loss: 0.9920567870140076,grad_norm: 0.7546657225634817, iteration: 457605
loss: 1.0056027173995972,grad_norm: 0.870332451846248, iteration: 457606
loss: 0.9971683621406555,grad_norm: 0.883219085035742, iteration: 457607
loss: 0.9832558631896973,grad_norm: 0.7204442519040962, iteration: 457608
loss: 0.9785619974136353,grad_norm: 0.7765116588392321, iteration: 457609
loss: 0.96983802318573,grad_norm: 0.8073827407514215, iteration: 457610
loss: 1.0102077722549438,grad_norm: 0.7193957637508518, iteration: 457611
loss: 1.0044054985046387,grad_norm: 0.7385184286739831, iteration: 457612
loss: 0.9825939536094666,grad_norm: 0.7238663667456383, iteration: 457613
loss: 1.0152714252471924,grad_norm: 0.8152734109858142, iteration: 457614
loss: 1.017327904701233,grad_norm: 0.7316619944702747, iteration: 457615
loss: 0.9862313866615295,grad_norm: 0.7386614458197337, iteration: 457616
loss: 0.9859124422073364,grad_norm: 0.9999992052358319, iteration: 457617
loss: 1.0264077186584473,grad_norm: 0.9999996161199564, iteration: 457618
loss: 0.9802795648574829,grad_norm: 0.7317323355056081, iteration: 457619
loss: 1.0019479990005493,grad_norm: 0.5322320789005263, iteration: 457620
loss: 0.986644446849823,grad_norm: 0.8667693737504539, iteration: 457621
loss: 0.9809039235115051,grad_norm: 0.6841648646225583, iteration: 457622
loss: 1.0612448453903198,grad_norm: 0.9999991551700386, iteration: 457623
loss: 0.9889119267463684,grad_norm: 0.837006805068059, iteration: 457624
loss: 0.9929134249687195,grad_norm: 0.7428770883997117, iteration: 457625
loss: 0.9937152862548828,grad_norm: 0.6881531270906913, iteration: 457626
loss: 1.0164875984191895,grad_norm: 0.764792780740959, iteration: 457627
loss: 1.0187958478927612,grad_norm: 0.7556840003364326, iteration: 457628
loss: 1.0190883874893188,grad_norm: 0.9529692264221188, iteration: 457629
loss: 1.0167542695999146,grad_norm: 0.8762191024237194, iteration: 457630
loss: 0.9922726154327393,grad_norm: 0.8595192585668274, iteration: 457631
loss: 0.9988446235656738,grad_norm: 0.9287495641967008, iteration: 457632
loss: 0.9749740362167358,grad_norm: 0.7157494045293822, iteration: 457633
loss: 1.0044078826904297,grad_norm: 0.7121442817718062, iteration: 457634
loss: 0.9783559441566467,grad_norm: 0.755922194787941, iteration: 457635
loss: 1.0137711763381958,grad_norm: 0.72590036272457, iteration: 457636
loss: 0.9887987971305847,grad_norm: 0.8666198907650614, iteration: 457637
loss: 0.9618253707885742,grad_norm: 0.8012616175410046, iteration: 457638
loss: 0.9734731912612915,grad_norm: 0.7166399158632718, iteration: 457639
loss: 0.9953368902206421,grad_norm: 0.9982521357044131, iteration: 457640
loss: 0.9821575880050659,grad_norm: 0.7755384383828182, iteration: 457641
loss: 0.9672404527664185,grad_norm: 0.6277158203605888, iteration: 457642
loss: 1.027653455734253,grad_norm: 0.7325002670369329, iteration: 457643
loss: 1.008004903793335,grad_norm: 0.7398025425520587, iteration: 457644
loss: 1.0010349750518799,grad_norm: 0.83989939029377, iteration: 457645
loss: 1.0119986534118652,grad_norm: 0.9999990523278254, iteration: 457646
loss: 1.033902883529663,grad_norm: 0.8831272871413279, iteration: 457647
loss: 0.9851821660995483,grad_norm: 0.8753222627265578, iteration: 457648
loss: 0.9494946002960205,grad_norm: 0.7334346759577548, iteration: 457649
loss: 1.003928780555725,grad_norm: 0.7406676214744649, iteration: 457650
loss: 0.9986627101898193,grad_norm: 0.7025129365936819, iteration: 457651
loss: 0.9891241192817688,grad_norm: 0.7612769118421735, iteration: 457652
loss: 0.9540510773658752,grad_norm: 0.7847869647126818, iteration: 457653
loss: 1.026105523109436,grad_norm: 0.9999991272955284, iteration: 457654
loss: 1.0056418180465698,grad_norm: 0.7860013624486009, iteration: 457655
loss: 1.0144953727722168,grad_norm: 0.9707694257426398, iteration: 457656
loss: 1.0239758491516113,grad_norm: 0.6979198242001161, iteration: 457657
loss: 0.9840032458305359,grad_norm: 0.8391108745617744, iteration: 457658
loss: 0.9863747954368591,grad_norm: 0.7518706643044343, iteration: 457659
loss: 1.0213755369186401,grad_norm: 0.7216251628300305, iteration: 457660
loss: 1.0839675664901733,grad_norm: 0.9999998281921169, iteration: 457661
loss: 1.0147652626037598,grad_norm: 0.6902031933216151, iteration: 457662
loss: 0.9942923784255981,grad_norm: 0.7217312930210631, iteration: 457663
loss: 1.0125595331192017,grad_norm: 0.8488145062338941, iteration: 457664
loss: 0.9824139475822449,grad_norm: 0.8126679908040862, iteration: 457665
loss: 0.9397298097610474,grad_norm: 0.7973785267951149, iteration: 457666
loss: 1.0112634897232056,grad_norm: 0.6916706861276359, iteration: 457667
loss: 1.012698769569397,grad_norm: 0.5924609995835407, iteration: 457668
loss: 0.9749683737754822,grad_norm: 0.5340251206877142, iteration: 457669
loss: 1.0139797925949097,grad_norm: 0.6712175270104674, iteration: 457670
loss: 0.9713578224182129,grad_norm: 0.7011421502460832, iteration: 457671
loss: 1.026166319847107,grad_norm: 0.8727741943683586, iteration: 457672
loss: 1.018601655960083,grad_norm: 0.8897299952905333, iteration: 457673
loss: 0.9981251358985901,grad_norm: 0.6591023869412463, iteration: 457674
loss: 0.9936738014221191,grad_norm: 0.5997752218308354, iteration: 457675
loss: 0.9823320508003235,grad_norm: 0.771819620261043, iteration: 457676
loss: 1.061401128768921,grad_norm: 0.6903937851558298, iteration: 457677
loss: 1.0225142240524292,grad_norm: 0.6786834415760942, iteration: 457678
loss: 1.0815757513046265,grad_norm: 0.9999992930182516, iteration: 457679
loss: 0.9862098097801208,grad_norm: 0.736523587273085, iteration: 457680
loss: 1.042615532875061,grad_norm: 0.874843675362237, iteration: 457681
loss: 1.0150490999221802,grad_norm: 0.8999905711039602, iteration: 457682
loss: 1.0236393213272095,grad_norm: 0.8067134971470089, iteration: 457683
loss: 0.9881831407546997,grad_norm: 0.7215986113713897, iteration: 457684
loss: 0.9956946969032288,grad_norm: 0.6785107798735225, iteration: 457685
loss: 1.011411428451538,grad_norm: 0.6944051752795567, iteration: 457686
loss: 0.9830565452575684,grad_norm: 0.735778070154706, iteration: 457687
loss: 1.0116674900054932,grad_norm: 0.9868663187794317, iteration: 457688
loss: 0.9815762639045715,grad_norm: 0.819768789257212, iteration: 457689
loss: 0.9934965968132019,grad_norm: 0.7456972266050784, iteration: 457690
loss: 1.0303410291671753,grad_norm: 0.6673521240797895, iteration: 457691
loss: 1.012570858001709,grad_norm: 0.9999992016731712, iteration: 457692
loss: 1.0245308876037598,grad_norm: 0.9105984332961747, iteration: 457693
loss: 1.0157628059387207,grad_norm: 0.7474264342013335, iteration: 457694
loss: 1.0165979862213135,grad_norm: 0.8557972873101355, iteration: 457695
loss: 1.0123893022537231,grad_norm: 0.7670359349930416, iteration: 457696
loss: 0.9653765559196472,grad_norm: 0.7347000170230553, iteration: 457697
loss: 0.9971379041671753,grad_norm: 0.6131408682660527, iteration: 457698
loss: 1.0203626155853271,grad_norm: 0.8269446875123211, iteration: 457699
loss: 1.025018334388733,grad_norm: 0.6972988945200276, iteration: 457700
loss: 1.025399088859558,grad_norm: 0.734585896672781, iteration: 457701
loss: 1.0097976922988892,grad_norm: 0.8340676995338646, iteration: 457702
loss: 1.0014804601669312,grad_norm: 0.8785803568914551, iteration: 457703
loss: 1.0057436227798462,grad_norm: 0.7555311133571568, iteration: 457704
loss: 1.003480076789856,grad_norm: 0.8013664838601327, iteration: 457705
loss: 1.0057381391525269,grad_norm: 0.6425778958029252, iteration: 457706
loss: 0.9839014410972595,grad_norm: 0.8808543601381466, iteration: 457707
loss: 1.0073812007904053,grad_norm: 0.9999996247631993, iteration: 457708
loss: 0.9479713439941406,grad_norm: 0.7696972145650078, iteration: 457709
loss: 0.9901067018508911,grad_norm: 0.6744894111897752, iteration: 457710
loss: 1.009070873260498,grad_norm: 0.7743899616409886, iteration: 457711
loss: 1.014077067375183,grad_norm: 0.6183105574244909, iteration: 457712
loss: 0.9740098714828491,grad_norm: 0.7662853757954311, iteration: 457713
loss: 0.9847636818885803,grad_norm: 0.8083770737076891, iteration: 457714
loss: 1.0060036182403564,grad_norm: 0.8815392150592788, iteration: 457715
loss: 0.9987200498580933,grad_norm: 0.7212972757637185, iteration: 457716
loss: 0.945685088634491,grad_norm: 0.7086165094094384, iteration: 457717
loss: 0.9969814419746399,grad_norm: 0.8068325281193554, iteration: 457718
loss: 1.0155032873153687,grad_norm: 0.8351548245986475, iteration: 457719
loss: 0.9888840913772583,grad_norm: 0.7031568951751671, iteration: 457720
loss: 1.0158799886703491,grad_norm: 0.7361339917366831, iteration: 457721
loss: 0.9760133624076843,grad_norm: 0.8039553667523508, iteration: 457722
loss: 1.0199874639511108,grad_norm: 0.7817284550096103, iteration: 457723
loss: 0.9748241901397705,grad_norm: 0.7541332032381212, iteration: 457724
loss: 0.9978881478309631,grad_norm: 0.8411361184246093, iteration: 457725
loss: 1.007128357887268,grad_norm: 0.6891031534008736, iteration: 457726
loss: 0.9733014702796936,grad_norm: 0.8405564376934276, iteration: 457727
loss: 1.002453327178955,grad_norm: 0.9741153422210379, iteration: 457728
loss: 1.0099653005599976,grad_norm: 0.6454464101516331, iteration: 457729
loss: 0.982937753200531,grad_norm: 0.8622469696699467, iteration: 457730
loss: 0.9734961986541748,grad_norm: 0.8033167282539612, iteration: 457731
loss: 0.9784475564956665,grad_norm: 0.73005291924078, iteration: 457732
loss: 0.9851232767105103,grad_norm: 0.7888903175485535, iteration: 457733
loss: 0.9849656820297241,grad_norm: 0.647237620738961, iteration: 457734
loss: 0.9641330242156982,grad_norm: 0.9015658098202588, iteration: 457735
loss: 0.9907001852989197,grad_norm: 0.7318164389429206, iteration: 457736
loss: 0.9741165041923523,grad_norm: 0.7318058757401743, iteration: 457737
loss: 1.028421401977539,grad_norm: 0.700526740426041, iteration: 457738
loss: 1.0140271186828613,grad_norm: 0.6224562502388483, iteration: 457739
loss: 1.0018614530563354,grad_norm: 0.8057097979557016, iteration: 457740
loss: 1.0223525762557983,grad_norm: 0.7727921008114936, iteration: 457741
loss: 1.0478477478027344,grad_norm: 0.7368456695917509, iteration: 457742
loss: 1.023155689239502,grad_norm: 0.7848717685806299, iteration: 457743
loss: 1.0122522115707397,grad_norm: 0.6840182619926125, iteration: 457744
loss: 1.0310900211334229,grad_norm: 0.8043996331295584, iteration: 457745
loss: 0.9853049516677856,grad_norm: 0.6587649862158046, iteration: 457746
loss: 0.9994924664497375,grad_norm: 0.7061434239034229, iteration: 457747
loss: 0.9973282217979431,grad_norm: 0.8498582800488299, iteration: 457748
loss: 1.003943681716919,grad_norm: 0.6778161720916349, iteration: 457749
loss: 0.9959630966186523,grad_norm: 0.6346537523123461, iteration: 457750
loss: 0.9983634352684021,grad_norm: 0.7139348079017893, iteration: 457751
loss: 1.005589485168457,grad_norm: 0.8710080612864398, iteration: 457752
loss: 1.0076406002044678,grad_norm: 0.7364813039340797, iteration: 457753
loss: 0.9799705743789673,grad_norm: 0.8021555929762806, iteration: 457754
loss: 0.9959594011306763,grad_norm: 0.6665220316888186, iteration: 457755
loss: 1.0119887590408325,grad_norm: 0.8068311417973505, iteration: 457756
loss: 0.9859129786491394,grad_norm: 0.731573554155422, iteration: 457757
loss: 0.9809929728507996,grad_norm: 0.731360422350667, iteration: 457758
loss: 0.9865369200706482,grad_norm: 0.6845521581162607, iteration: 457759
loss: 1.0398597717285156,grad_norm: 0.9999991014200212, iteration: 457760
loss: 0.9597840905189514,grad_norm: 0.8522437445304331, iteration: 457761
loss: 0.9910544753074646,grad_norm: 0.839207476211916, iteration: 457762
loss: 1.015579104423523,grad_norm: 0.7285808187286306, iteration: 457763
loss: 1.0054805278778076,grad_norm: 0.7772802328756119, iteration: 457764
loss: 0.9878915548324585,grad_norm: 0.7372307090923644, iteration: 457765
loss: 0.9835091233253479,grad_norm: 0.7536540172305225, iteration: 457766
loss: 0.9960660934448242,grad_norm: 0.7528289779339786, iteration: 457767
loss: 1.0485212802886963,grad_norm: 0.8458985347585286, iteration: 457768
loss: 1.001963496208191,grad_norm: 0.6288854032545079, iteration: 457769
loss: 1.0027412176132202,grad_norm: 0.7245270021274444, iteration: 457770
loss: 0.9898794293403625,grad_norm: 0.7021020213277932, iteration: 457771
loss: 0.9949356317520142,grad_norm: 0.7537228527900024, iteration: 457772
loss: 1.0521266460418701,grad_norm: 0.9867327935288907, iteration: 457773
loss: 0.9786840677261353,grad_norm: 0.7946459643659323, iteration: 457774
loss: 1.006670355796814,grad_norm: 0.7548345054943769, iteration: 457775
loss: 0.9523457288742065,grad_norm: 0.6235250889716227, iteration: 457776
loss: 1.005245327949524,grad_norm: 0.7217916141004509, iteration: 457777
loss: 1.025791049003601,grad_norm: 0.9104099570426522, iteration: 457778
loss: 1.0188888311386108,grad_norm: 0.879058078004557, iteration: 457779
loss: 1.0200533866882324,grad_norm: 0.770634606018116, iteration: 457780
loss: 0.9738064408302307,grad_norm: 0.6334361216881597, iteration: 457781
loss: 0.9604365229606628,grad_norm: 0.8286524175771277, iteration: 457782
loss: 1.0313940048217773,grad_norm: 0.699826077768963, iteration: 457783
loss: 0.983269453048706,grad_norm: 0.6925157199573984, iteration: 457784
loss: 1.0196620225906372,grad_norm: 0.8136461124004611, iteration: 457785
loss: 0.991448163986206,grad_norm: 0.7950270476712907, iteration: 457786
loss: 0.9775910973548889,grad_norm: 0.9802149823737772, iteration: 457787
loss: 1.0031136274337769,grad_norm: 0.7385233803800273, iteration: 457788
loss: 0.991344153881073,grad_norm: 0.6931161509027592, iteration: 457789
loss: 1.0030481815338135,grad_norm: 0.7529843695110474, iteration: 457790
loss: 0.965740978717804,grad_norm: 0.8804624263555617, iteration: 457791
loss: 0.9501227140426636,grad_norm: 0.8885596670395504, iteration: 457792
loss: 0.9679340124130249,grad_norm: 0.6720045310179592, iteration: 457793
loss: 1.0278548002243042,grad_norm: 0.740519318807722, iteration: 457794
loss: 1.0091310739517212,grad_norm: 0.9933590591054229, iteration: 457795
loss: 1.0307502746582031,grad_norm: 0.776812941187427, iteration: 457796
loss: 0.9959372878074646,grad_norm: 0.6899372253545175, iteration: 457797
loss: 0.9742393493652344,grad_norm: 0.861123204659689, iteration: 457798
loss: 1.0399775505065918,grad_norm: 0.7332192658970308, iteration: 457799
loss: 0.9966611862182617,grad_norm: 0.7805480924649465, iteration: 457800
loss: 1.0242793560028076,grad_norm: 0.7355778971336961, iteration: 457801
loss: 0.9664002060890198,grad_norm: 0.8005121195417859, iteration: 457802
loss: 1.0057624578475952,grad_norm: 0.6081844529226538, iteration: 457803
loss: 0.9720903635025024,grad_norm: 0.6832357729955401, iteration: 457804
loss: 1.022559404373169,grad_norm: 0.7558195597732196, iteration: 457805
loss: 1.0030113458633423,grad_norm: 0.8241659970663786, iteration: 457806
loss: 0.9841285943984985,grad_norm: 0.7261905209240618, iteration: 457807
loss: 1.0064902305603027,grad_norm: 0.9060117043269046, iteration: 457808
loss: 0.972753643989563,grad_norm: 0.7166997027261586, iteration: 457809
loss: 0.9728448987007141,grad_norm: 0.6807743461153895, iteration: 457810
loss: 0.9770930409431458,grad_norm: 0.6520926106611592, iteration: 457811
loss: 1.0446027517318726,grad_norm: 0.6766346318551086, iteration: 457812
loss: 1.0086032152175903,grad_norm: 0.6470141055439315, iteration: 457813
loss: 0.9784968495368958,grad_norm: 0.7457774216287941, iteration: 457814
loss: 1.0126676559448242,grad_norm: 0.7582123383016758, iteration: 457815
loss: 0.9993780255317688,grad_norm: 0.6522146280011394, iteration: 457816
loss: 1.027076244354248,grad_norm: 0.7059960024836, iteration: 457817
loss: 1.0244543552398682,grad_norm: 0.7351078106909099, iteration: 457818
loss: 0.9747810959815979,grad_norm: 0.7148130878064175, iteration: 457819
loss: 0.9868801236152649,grad_norm: 0.7650463198992419, iteration: 457820
loss: 0.9545734524726868,grad_norm: 0.7703983108046794, iteration: 457821
loss: 1.0071992874145508,grad_norm: 0.8065587176974442, iteration: 457822
loss: 1.0221103429794312,grad_norm: 0.7008526688828239, iteration: 457823
loss: 0.9837303757667542,grad_norm: 0.7624070581552426, iteration: 457824
loss: 1.014960527420044,grad_norm: 0.9189586862951467, iteration: 457825
loss: 0.9934090375900269,grad_norm: 0.9999990775381925, iteration: 457826
loss: 0.9623335003852844,grad_norm: 0.8386989174772254, iteration: 457827
loss: 0.9927388429641724,grad_norm: 0.8144274972853934, iteration: 457828
loss: 1.0081273317337036,grad_norm: 0.687663914644351, iteration: 457829
loss: 0.9767917394638062,grad_norm: 0.7376183839434582, iteration: 457830
loss: 1.0124430656433105,grad_norm: 0.8153510619246099, iteration: 457831
loss: 0.9700489044189453,grad_norm: 0.7021371238122605, iteration: 457832
loss: 1.0288530588150024,grad_norm: 0.9999991713926574, iteration: 457833
loss: 0.959820032119751,grad_norm: 0.57304450801824, iteration: 457834
loss: 1.0092589855194092,grad_norm: 0.6324591578627519, iteration: 457835
loss: 0.985319197177887,grad_norm: 0.7593400214747053, iteration: 457836
loss: 0.9705454707145691,grad_norm: 0.8118923964139115, iteration: 457837
loss: 1.0185743570327759,grad_norm: 0.8656031589857573, iteration: 457838
loss: 0.974341869354248,grad_norm: 0.7442849031491271, iteration: 457839
loss: 0.99347984790802,grad_norm: 0.6981571022014785, iteration: 457840
loss: 1.0079927444458008,grad_norm: 0.7822478801991435, iteration: 457841
loss: 1.0180498361587524,grad_norm: 0.8875768530071222, iteration: 457842
loss: 1.0342422723770142,grad_norm: 0.8995103576199562, iteration: 457843
loss: 0.9854093790054321,grad_norm: 0.7327626525158926, iteration: 457844
loss: 0.9841539859771729,grad_norm: 0.7174131708011019, iteration: 457845
loss: 0.9791066646575928,grad_norm: 0.8148578685296793, iteration: 457846
loss: 1.0245615243911743,grad_norm: 0.9421411856644711, iteration: 457847
loss: 0.9838519096374512,grad_norm: 0.6767587049774437, iteration: 457848
loss: 1.0106064081192017,grad_norm: 0.691323240625623, iteration: 457849
loss: 0.9816179871559143,grad_norm: 0.8283369658904275, iteration: 457850
loss: 1.0028563737869263,grad_norm: 0.8746034935512609, iteration: 457851
loss: 0.9704967737197876,grad_norm: 0.7753895269426547, iteration: 457852
loss: 0.9623361229896545,grad_norm: 0.7568943777845601, iteration: 457853
loss: 0.9816684126853943,grad_norm: 0.7613312531544895, iteration: 457854
loss: 1.0222177505493164,grad_norm: 0.7407385508892455, iteration: 457855
loss: 0.9884036779403687,grad_norm: 0.7336064342162858, iteration: 457856
loss: 1.0388315916061401,grad_norm: 0.9999994710342442, iteration: 457857
loss: 1.0207868814468384,grad_norm: 0.7762768264640793, iteration: 457858
loss: 1.0319335460662842,grad_norm: 0.8032004197278274, iteration: 457859
loss: 1.0196533203125,grad_norm: 0.7627018741980754, iteration: 457860
loss: 0.9922045469284058,grad_norm: 0.7958991686574141, iteration: 457861
loss: 0.9768859148025513,grad_norm: 0.8469893763241729, iteration: 457862
loss: 0.9761223196983337,grad_norm: 0.76405953407791, iteration: 457863
loss: 1.009635329246521,grad_norm: 0.7339759769649972, iteration: 457864
loss: 0.9752914309501648,grad_norm: 0.825950560763496, iteration: 457865
loss: 1.0141500234603882,grad_norm: 0.9999992609474889, iteration: 457866
loss: 0.9594044089317322,grad_norm: 0.811701414917981, iteration: 457867
loss: 1.0467218160629272,grad_norm: 0.9999990954837732, iteration: 457868
loss: 0.9985566735267639,grad_norm: 0.8110167669819109, iteration: 457869
loss: 0.9930288195610046,grad_norm: 0.7918756965208938, iteration: 457870
loss: 0.9806195497512817,grad_norm: 0.7527157109964967, iteration: 457871
loss: 0.977400541305542,grad_norm: 0.8485931256486513, iteration: 457872
loss: 0.9675357341766357,grad_norm: 0.7009614354295767, iteration: 457873
loss: 1.0297600030899048,grad_norm: 0.7428510033919944, iteration: 457874
loss: 0.9973307847976685,grad_norm: 0.7780201888667508, iteration: 457875
loss: 1.0082119703292847,grad_norm: 0.7908859959272999, iteration: 457876
loss: 1.0260978937149048,grad_norm: 0.7445874197409279, iteration: 457877
loss: 0.9911309480667114,grad_norm: 0.7058948166455263, iteration: 457878
loss: 0.9894022941589355,grad_norm: 0.7396984288867711, iteration: 457879
loss: 0.9858073592185974,grad_norm: 0.816859601548046, iteration: 457880
loss: 1.0112719535827637,grad_norm: 0.7913003096430018, iteration: 457881
loss: 0.9899806976318359,grad_norm: 0.8049580022675892, iteration: 457882
loss: 1.0122675895690918,grad_norm: 0.7515675765696939, iteration: 457883
loss: 0.9966127276420593,grad_norm: 0.8921047912218858, iteration: 457884
loss: 1.0016757249832153,grad_norm: 0.6896384303460051, iteration: 457885
loss: 0.9893128871917725,grad_norm: 0.731003747011631, iteration: 457886
loss: 1.0273767709732056,grad_norm: 0.7916089293245804, iteration: 457887
loss: 1.011343240737915,grad_norm: 0.763714440522231, iteration: 457888
loss: 0.976347029209137,grad_norm: 0.7066439724622066, iteration: 457889
loss: 1.0160092115402222,grad_norm: 0.8617196562681829, iteration: 457890
loss: 0.9825496673583984,grad_norm: 0.8389432683604279, iteration: 457891
loss: 1.0050376653671265,grad_norm: 0.83200111866484, iteration: 457892
loss: 0.9876101016998291,grad_norm: 0.8225465085353162, iteration: 457893
loss: 0.9628809094429016,grad_norm: 0.8187570932970991, iteration: 457894
loss: 0.9835349321365356,grad_norm: 0.6878699768070468, iteration: 457895
loss: 0.9711649417877197,grad_norm: 0.7066331539994017, iteration: 457896
loss: 1.0031980276107788,grad_norm: 0.999999523025731, iteration: 457897
loss: 0.990001380443573,grad_norm: 0.7915966284734158, iteration: 457898
loss: 1.0122802257537842,grad_norm: 0.8864812928805651, iteration: 457899
loss: 0.9991585612297058,grad_norm: 0.7515403164349125, iteration: 457900
loss: 0.9732726216316223,grad_norm: 0.843142652128938, iteration: 457901
loss: 0.9943116903305054,grad_norm: 0.7580690373772072, iteration: 457902
loss: 1.0254087448120117,grad_norm: 0.6835227820351941, iteration: 457903
loss: 0.9792689085006714,grad_norm: 0.8366934368190603, iteration: 457904
loss: 0.9926454424858093,grad_norm: 0.7577687910548638, iteration: 457905
loss: 1.012555480003357,grad_norm: 0.722257151371924, iteration: 457906
loss: 1.0059376955032349,grad_norm: 0.6914269263333608, iteration: 457907
loss: 1.0159037113189697,grad_norm: 0.7835077204749243, iteration: 457908
loss: 1.0036730766296387,grad_norm: 0.8056754003372499, iteration: 457909
loss: 0.9779036045074463,grad_norm: 0.7181829720917158, iteration: 457910
loss: 0.9858050346374512,grad_norm: 0.753250046234536, iteration: 457911
loss: 1.0049464702606201,grad_norm: 0.9999994300834398, iteration: 457912
loss: 1.0176783800125122,grad_norm: 0.7466446298844575, iteration: 457913
loss: 0.9898169040679932,grad_norm: 0.6880610434174402, iteration: 457914
loss: 0.9949201941490173,grad_norm: 0.8324605137876762, iteration: 457915
loss: 0.9852818250656128,grad_norm: 0.7734892781126492, iteration: 457916
loss: 1.0582618713378906,grad_norm: 0.783575264969383, iteration: 457917
loss: 1.0247247219085693,grad_norm: 0.712971901757527, iteration: 457918
loss: 1.014821171760559,grad_norm: 0.7389497076999417, iteration: 457919
loss: 0.9812853336334229,grad_norm: 0.7097743624212934, iteration: 457920
loss: 0.985526978969574,grad_norm: 0.9999994465676778, iteration: 457921
loss: 1.0071755647659302,grad_norm: 0.8018658471444162, iteration: 457922
loss: 1.0454615354537964,grad_norm: 0.8008549818928642, iteration: 457923
loss: 1.0086385011672974,grad_norm: 0.8567227836755309, iteration: 457924
loss: 0.9986922144889832,grad_norm: 0.6756903911503015, iteration: 457925
loss: 0.9848951101303101,grad_norm: 0.65033070849151, iteration: 457926
loss: 0.9858453273773193,grad_norm: 0.643250888921001, iteration: 457927
loss: 0.9969815015792847,grad_norm: 0.659115608864726, iteration: 457928
loss: 1.089112401008606,grad_norm: 0.9999991804442436, iteration: 457929
loss: 1.0196772813796997,grad_norm: 0.8048360822376731, iteration: 457930
loss: 0.9982979893684387,grad_norm: 0.8491101510862222, iteration: 457931
loss: 1.0131803750991821,grad_norm: 0.8289551309016479, iteration: 457932
loss: 1.0012813806533813,grad_norm: 0.7022665162472324, iteration: 457933
loss: 1.0094636678695679,grad_norm: 0.9999993620685454, iteration: 457934
loss: 1.0036673545837402,grad_norm: 0.6596334130053993, iteration: 457935
loss: 0.9860022664070129,grad_norm: 0.6663022857121895, iteration: 457936
loss: 0.9694932699203491,grad_norm: 0.781495948167916, iteration: 457937
loss: 1.0231947898864746,grad_norm: 0.8265034085710515, iteration: 457938
loss: 1.0046370029449463,grad_norm: 0.8731980614017876, iteration: 457939
loss: 0.9945893883705139,grad_norm: 0.9999990536504706, iteration: 457940
loss: 0.9782368540763855,grad_norm: 0.7544819761982846, iteration: 457941
loss: 1.0026887655258179,grad_norm: 0.6770177646412568, iteration: 457942
loss: 1.0291767120361328,grad_norm: 0.8342443162931836, iteration: 457943
loss: 0.9789027571678162,grad_norm: 0.8067482148186135, iteration: 457944
loss: 0.9662540555000305,grad_norm: 0.7316691272672273, iteration: 457945
loss: 0.9841187000274658,grad_norm: 0.6717895691899005, iteration: 457946
loss: 1.0019547939300537,grad_norm: 0.8972218637760521, iteration: 457947
loss: 0.9624215960502625,grad_norm: 0.8367513854797872, iteration: 457948
loss: 1.0232073068618774,grad_norm: 0.9999989771243645, iteration: 457949
loss: 1.0118697881698608,grad_norm: 0.7796769811300437, iteration: 457950
loss: 0.9688966870307922,grad_norm: 0.727223292613469, iteration: 457951
loss: 0.9761530160903931,grad_norm: 0.7306737992700217, iteration: 457952
loss: 1.1060168743133545,grad_norm: 0.9999990561844644, iteration: 457953
loss: 1.0155256986618042,grad_norm: 0.830294243922066, iteration: 457954
loss: 1.0078130960464478,grad_norm: 0.7862709173907529, iteration: 457955
loss: 1.028880000114441,grad_norm: 0.9311132999879566, iteration: 457956
loss: 0.9928817749023438,grad_norm: 0.6614507153143891, iteration: 457957
loss: 0.9864714741706848,grad_norm: 0.7438940725375784, iteration: 457958
loss: 1.02165949344635,grad_norm: 0.7254206533872225, iteration: 457959
loss: 0.972317636013031,grad_norm: 0.7471994216456378, iteration: 457960
loss: 1.0026134252548218,grad_norm: 0.6337058596820729, iteration: 457961
loss: 1.0022305250167847,grad_norm: 0.695737255128892, iteration: 457962
loss: 1.013067603111267,grad_norm: 0.8072693760926657, iteration: 457963
loss: 0.9914854168891907,grad_norm: 0.8859843792134936, iteration: 457964
loss: 0.9749822616577148,grad_norm: 0.7173030611300842, iteration: 457965
loss: 0.9879763722419739,grad_norm: 0.9293461660427915, iteration: 457966
loss: 1.0139509439468384,grad_norm: 0.6403924929195577, iteration: 457967
loss: 0.9925585389137268,grad_norm: 0.6628945024697828, iteration: 457968
loss: 1.0092488527297974,grad_norm: 0.8893842209561572, iteration: 457969
loss: 0.9863264560699463,grad_norm: 0.8256198289779987, iteration: 457970
loss: 0.993173360824585,grad_norm: 0.7295996888815457, iteration: 457971
loss: 1.0186349153518677,grad_norm: 0.7451704261082767, iteration: 457972
loss: 0.9940032958984375,grad_norm: 0.8700911579844055, iteration: 457973
loss: 1.021112084388733,grad_norm: 0.9056350263718942, iteration: 457974
loss: 0.9647508263587952,grad_norm: 0.805873407073503, iteration: 457975
loss: 0.9736905097961426,grad_norm: 0.7338737470582902, iteration: 457976
loss: 1.0294095277786255,grad_norm: 0.7726835182001219, iteration: 457977
loss: 0.9906581044197083,grad_norm: 0.8880855019469847, iteration: 457978
loss: 0.9899067282676697,grad_norm: 0.7451919446905452, iteration: 457979
loss: 1.0295299291610718,grad_norm: 0.7752058822895737, iteration: 457980
loss: 1.001750111579895,grad_norm: 0.7228857316842594, iteration: 457981
loss: 0.9880422949790955,grad_norm: 0.9999989312702844, iteration: 457982
loss: 0.9854915142059326,grad_norm: 0.6950719153093519, iteration: 457983
loss: 1.014062523841858,grad_norm: 0.6270557937078903, iteration: 457984
loss: 0.9964268803596497,grad_norm: 0.6315113421278555, iteration: 457985
loss: 1.0062507390975952,grad_norm: 0.9999991476674481, iteration: 457986
loss: 0.9891058802604675,grad_norm: 0.914178980382236, iteration: 457987
loss: 0.9964431524276733,grad_norm: 0.7048089653436875, iteration: 457988
loss: 0.9992101192474365,grad_norm: 0.7444231418450756, iteration: 457989
loss: 1.0583258867263794,grad_norm: 0.7788855399426224, iteration: 457990
loss: 0.9664241075515747,grad_norm: 0.8422171052981037, iteration: 457991
loss: 1.030321478843689,grad_norm: 0.8274700267105128, iteration: 457992
loss: 0.9984002113342285,grad_norm: 0.7006335140046226, iteration: 457993
loss: 0.9864442944526672,grad_norm: 0.7551105097217145, iteration: 457994
loss: 1.0157816410064697,grad_norm: 0.7601222285995859, iteration: 457995
loss: 0.953153669834137,grad_norm: 0.7785321721533456, iteration: 457996
loss: 0.9820047616958618,grad_norm: 0.778865311660581, iteration: 457997
loss: 1.021659255027771,grad_norm: 0.7254283100972181, iteration: 457998
loss: 0.9824864268302917,grad_norm: 0.7678925293893558, iteration: 457999
loss: 0.9763494729995728,grad_norm: 0.6722741238009957, iteration: 458000
loss: 0.9989742636680603,grad_norm: 0.9002094152089751, iteration: 458001
loss: 0.9898195266723633,grad_norm: 0.7125255170197548, iteration: 458002
loss: 0.9893770813941956,grad_norm: 0.8634767839853034, iteration: 458003
loss: 0.9928541779518127,grad_norm: 0.6941881904706717, iteration: 458004
loss: 1.015095829963684,grad_norm: 0.9365699186992266, iteration: 458005
loss: 1.0122647285461426,grad_norm: 0.802928147227602, iteration: 458006
loss: 1.0151853561401367,grad_norm: 0.6534754781221198, iteration: 458007
loss: 0.9537324905395508,grad_norm: 0.8525386006552039, iteration: 458008
loss: 1.0247522592544556,grad_norm: 0.756649004462971, iteration: 458009
loss: 1.0059270858764648,grad_norm: 0.7818762297753856, iteration: 458010
loss: 1.0227057933807373,grad_norm: 0.7114778079462104, iteration: 458011
loss: 0.9822059273719788,grad_norm: 0.7685361062184658, iteration: 458012
loss: 0.9834475517272949,grad_norm: 0.864124404117484, iteration: 458013
loss: 0.9827245473861694,grad_norm: 0.7257008482784435, iteration: 458014
loss: 0.9822070002555847,grad_norm: 0.7169375043856591, iteration: 458015
loss: 0.956369936466217,grad_norm: 0.8138621218032703, iteration: 458016
loss: 0.994311511516571,grad_norm: 0.7404124645640776, iteration: 458017
loss: 1.0036466121673584,grad_norm: 0.8082796083302787, iteration: 458018
loss: 0.9873177409172058,grad_norm: 0.7108831738827154, iteration: 458019
loss: 1.0247048139572144,grad_norm: 0.8569154980165502, iteration: 458020
loss: 1.0052772760391235,grad_norm: 0.7074820605507811, iteration: 458021
loss: 1.0043118000030518,grad_norm: 0.8218500952912916, iteration: 458022
loss: 1.0113940238952637,grad_norm: 0.7904650546910957, iteration: 458023
loss: 0.9820380806922913,grad_norm: 0.6955195923439422, iteration: 458024
loss: 1.000866413116455,grad_norm: 0.756445684526334, iteration: 458025
loss: 1.0232689380645752,grad_norm: 0.6712514102146584, iteration: 458026
loss: 1.0083690881729126,grad_norm: 0.7221974870934117, iteration: 458027
loss: 0.9832792282104492,grad_norm: 0.7605101892753895, iteration: 458028
loss: 0.945841372013092,grad_norm: 0.7838684377411187, iteration: 458029
loss: 1.0091556310653687,grad_norm: 0.9284008767683588, iteration: 458030
loss: 0.9941627383232117,grad_norm: 0.7661073561955791, iteration: 458031
loss: 1.0109968185424805,grad_norm: 0.7065146286807997, iteration: 458032
loss: 0.9664506912231445,grad_norm: 0.7388459958293313, iteration: 458033
loss: 1.000970721244812,grad_norm: 0.7138275331699233, iteration: 458034
loss: 0.9400712847709656,grad_norm: 0.8612934097627148, iteration: 458035
loss: 1.0684312582015991,grad_norm: 0.7675691547150405, iteration: 458036
loss: 0.9552624821662903,grad_norm: 0.7237067843471957, iteration: 458037
loss: 1.0231086015701294,grad_norm: 0.7547779884686938, iteration: 458038
loss: 0.9616986513137817,grad_norm: 0.7285560934911742, iteration: 458039
loss: 1.1095811128616333,grad_norm: 1.0000000216663327, iteration: 458040
loss: 1.0008598566055298,grad_norm: 0.8192916235069884, iteration: 458041
loss: 1.0283920764923096,grad_norm: 0.7612777551778745, iteration: 458042
loss: 0.9688975214958191,grad_norm: 0.6994216607303713, iteration: 458043
loss: 1.0195729732513428,grad_norm: 0.8842273177616851, iteration: 458044
loss: 1.017823576927185,grad_norm: 0.8313709774444566, iteration: 458045
loss: 0.9661127924919128,grad_norm: 0.845454024755691, iteration: 458046
loss: 0.9697898030281067,grad_norm: 0.6860804150423889, iteration: 458047
loss: 0.9670897126197815,grad_norm: 0.6583042290062208, iteration: 458048
loss: 1.00674569606781,grad_norm: 0.9122411379870218, iteration: 458049
loss: 1.0798566341400146,grad_norm: 0.9999997718404223, iteration: 458050
loss: 1.021970272064209,grad_norm: 0.7983846463755708, iteration: 458051
loss: 1.0032925605773926,grad_norm: 0.8767450573451073, iteration: 458052
loss: 1.0099871158599854,grad_norm: 0.7993294692078143, iteration: 458053
loss: 1.0000299215316772,grad_norm: 0.9485309575802617, iteration: 458054
loss: 0.9720916748046875,grad_norm: 0.7952089116389874, iteration: 458055
loss: 0.9884305000305176,grad_norm: 0.8310150196469871, iteration: 458056
loss: 1.0062600374221802,grad_norm: 0.7331090692627646, iteration: 458057
loss: 1.0083061456680298,grad_norm: 0.8194506611474728, iteration: 458058
loss: 0.9962608218193054,grad_norm: 0.6935345109845384, iteration: 458059
loss: 1.0145779848098755,grad_norm: 0.6708619404039241, iteration: 458060
loss: 0.9663964509963989,grad_norm: 0.6351829801618909, iteration: 458061
loss: 1.00301194190979,grad_norm: 0.7307415306972678, iteration: 458062
loss: 1.0784438848495483,grad_norm: 0.9999997048787076, iteration: 458063
loss: 0.9978749752044678,grad_norm: 0.8633271329082569, iteration: 458064
loss: 1.0044010877609253,grad_norm: 0.7573697267073002, iteration: 458065
loss: 0.9938326478004456,grad_norm: 0.8272804451196402, iteration: 458066
loss: 1.0101888179779053,grad_norm: 0.6834449272399216, iteration: 458067
loss: 1.017525315284729,grad_norm: 0.7142369086550019, iteration: 458068
loss: 0.9562250971794128,grad_norm: 0.7425457993503346, iteration: 458069
loss: 0.9691017866134644,grad_norm: 0.9301321791539615, iteration: 458070
loss: 1.0289613008499146,grad_norm: 0.939304374724843, iteration: 458071
loss: 1.0051400661468506,grad_norm: 0.6843205636551114, iteration: 458072
loss: 1.0079021453857422,grad_norm: 0.8336262429952032, iteration: 458073
loss: 1.0234085321426392,grad_norm: 0.9387749121249709, iteration: 458074
loss: 1.181993842124939,grad_norm: 0.9999994259977699, iteration: 458075
loss: 1.0536547899246216,grad_norm: 0.9988188327940091, iteration: 458076
loss: 1.0206226110458374,grad_norm: 0.9999990751808139, iteration: 458077
loss: 0.9769278168678284,grad_norm: 0.8578473793440227, iteration: 458078
loss: 1.00324547290802,grad_norm: 0.7744107842612719, iteration: 458079
loss: 1.040284514427185,grad_norm: 0.8905409950621462, iteration: 458080
loss: 0.9935218691825867,grad_norm: 0.828746001034819, iteration: 458081
loss: 1.0269804000854492,grad_norm: 0.8692867129771836, iteration: 458082
loss: 1.0407480001449585,grad_norm: 0.7949940798095204, iteration: 458083
loss: 1.197110891342163,grad_norm: 0.9999997140885184, iteration: 458084
loss: 1.0033084154129028,grad_norm: 0.9576238354212343, iteration: 458085
loss: 1.069726586341858,grad_norm: 0.9999997480566845, iteration: 458086
loss: 0.9901115298271179,grad_norm: 0.813430123873016, iteration: 458087
loss: 0.9883771538734436,grad_norm: 0.8240165835534574, iteration: 458088
loss: 1.0179499387741089,grad_norm: 0.6693200349269866, iteration: 458089
loss: 0.9972337484359741,grad_norm: 0.9047190833116192, iteration: 458090
loss: 1.018660545349121,grad_norm: 0.9896703229741247, iteration: 458091
loss: 0.985658586025238,grad_norm: 0.8225954604512978, iteration: 458092
loss: 1.0128767490386963,grad_norm: 0.7844139675990683, iteration: 458093
loss: 0.9945381879806519,grad_norm: 0.7130506665450885, iteration: 458094
loss: 0.9903839230537415,grad_norm: 0.6921094075094395, iteration: 458095
loss: 1.0390090942382812,grad_norm: 0.8074236383297926, iteration: 458096
loss: 1.0111538171768188,grad_norm: 0.7021217818137313, iteration: 458097
loss: 1.003322720527649,grad_norm: 0.7425008177697339, iteration: 458098
loss: 1.0145502090454102,grad_norm: 0.8531396771429457, iteration: 458099
loss: 0.9921011328697205,grad_norm: 0.7943981824355493, iteration: 458100
loss: 1.0149749517440796,grad_norm: 0.9999991495963912, iteration: 458101
loss: 0.9901652932167053,grad_norm: 0.8442631718095339, iteration: 458102
loss: 0.9844827055931091,grad_norm: 0.8342042897923002, iteration: 458103
loss: 0.9998995661735535,grad_norm: 0.7248800795445026, iteration: 458104
loss: 1.0179272890090942,grad_norm: 0.7425206221882877, iteration: 458105
loss: 0.9613714218139648,grad_norm: 0.7627846607408598, iteration: 458106
loss: 1.015641212463379,grad_norm: 0.7822045484437349, iteration: 458107
loss: 1.0859105587005615,grad_norm: 0.7414012716408279, iteration: 458108
loss: 0.987818717956543,grad_norm: 0.9752968701271512, iteration: 458109
loss: 0.9913979768753052,grad_norm: 0.8940804494957926, iteration: 458110
loss: 0.9922096729278564,grad_norm: 0.802816233098205, iteration: 458111
loss: 0.9691457748413086,grad_norm: 0.8338607900622935, iteration: 458112
loss: 1.0228222608566284,grad_norm: 0.7018837553709198, iteration: 458113
loss: 0.9861615896224976,grad_norm: 0.8750554186271471, iteration: 458114
loss: 1.002455234527588,grad_norm: 0.8980642549465703, iteration: 458115
loss: 0.9757535457611084,grad_norm: 0.9166232454125783, iteration: 458116
loss: 0.9874528050422668,grad_norm: 0.7823935772863089, iteration: 458117
loss: 0.9762604832649231,grad_norm: 0.7120602945164932, iteration: 458118
loss: 1.0485190153121948,grad_norm: 0.8546154784852696, iteration: 458119
loss: 1.014889121055603,grad_norm: 0.8851583890740847, iteration: 458120
loss: 0.9861293435096741,grad_norm: 0.7288952648524484, iteration: 458121
loss: 1.0086313486099243,grad_norm: 0.7735657368032359, iteration: 458122
loss: 0.9849603176116943,grad_norm: 0.7147834957388699, iteration: 458123
loss: 0.9866737127304077,grad_norm: 0.8243605746814122, iteration: 458124
loss: 1.0139559507369995,grad_norm: 0.8558414216932579, iteration: 458125
loss: 1.0218517780303955,grad_norm: 0.6011681208101025, iteration: 458126
loss: 1.0291633605957031,grad_norm: 0.7441221029862406, iteration: 458127
loss: 1.0924148559570312,grad_norm: 0.999999605262659, iteration: 458128
loss: 1.0350821018218994,grad_norm: 0.5722079590382544, iteration: 458129
loss: 0.9829345941543579,grad_norm: 0.7398311063028388, iteration: 458130
loss: 0.9908166527748108,grad_norm: 0.948810805820683, iteration: 458131
loss: 0.9928053617477417,grad_norm: 0.9258138744527524, iteration: 458132
loss: 1.0527065992355347,grad_norm: 0.833786500478634, iteration: 458133
loss: 0.9992983937263489,grad_norm: 0.6586857559112901, iteration: 458134
loss: 1.0222140550613403,grad_norm: 0.8939357586345272, iteration: 458135
loss: 1.0261002779006958,grad_norm: 0.6977562071118486, iteration: 458136
loss: 0.9872018098831177,grad_norm: 0.8162191934306144, iteration: 458137
loss: 0.9763773083686829,grad_norm: 0.727359118089067, iteration: 458138
loss: 0.9969238638877869,grad_norm: 0.8410457532588226, iteration: 458139
loss: 1.0861984491348267,grad_norm: 0.9999996273491789, iteration: 458140
loss: 1.000671148300171,grad_norm: 0.6700806357191789, iteration: 458141
loss: 0.9691438674926758,grad_norm: 0.6833496203681932, iteration: 458142
loss: 1.0851633548736572,grad_norm: 0.7459352892182572, iteration: 458143
loss: 1.0094801187515259,grad_norm: 0.7663375887295729, iteration: 458144
loss: 1.0006225109100342,grad_norm: 0.906870966379857, iteration: 458145
loss: 0.9994111061096191,grad_norm: 0.6991500275469205, iteration: 458146
loss: 0.995940089225769,grad_norm: 0.6813216542799273, iteration: 458147
loss: 0.9894111752510071,grad_norm: 0.6638501695659306, iteration: 458148
loss: 0.992439329624176,grad_norm: 0.6985683952723805, iteration: 458149
loss: 0.9880703091621399,grad_norm: 0.8546750993990339, iteration: 458150
loss: 1.0179039239883423,grad_norm: 0.7659733511581096, iteration: 458151
loss: 0.9834666848182678,grad_norm: 0.6328552144860436, iteration: 458152
loss: 0.9852410554885864,grad_norm: 0.9077942059853049, iteration: 458153
loss: 0.9789819717407227,grad_norm: 0.8170154830910298, iteration: 458154
loss: 1.0387567281723022,grad_norm: 0.9782352170356152, iteration: 458155
loss: 1.005342960357666,grad_norm: 0.8641313355892125, iteration: 458156
loss: 1.0298532247543335,grad_norm: 1.0000000171336803, iteration: 458157
loss: 0.9741278886795044,grad_norm: 0.9067103088108553, iteration: 458158
loss: 1.0001686811447144,grad_norm: 0.7812108788410778, iteration: 458159
loss: 0.9948686361312866,grad_norm: 1.0000000460034237, iteration: 458160
loss: 0.9967436194419861,grad_norm: 0.7281946157384018, iteration: 458161
loss: 0.9674038290977478,grad_norm: 0.7012836610174317, iteration: 458162
loss: 1.0100505352020264,grad_norm: 0.6572192174285107, iteration: 458163
loss: 1.0208219289779663,grad_norm: 0.6393693117543383, iteration: 458164
loss: 0.9821627736091614,grad_norm: 0.7457325838396371, iteration: 458165
loss: 0.9888096451759338,grad_norm: 0.6891394468373717, iteration: 458166
loss: 0.9876956343650818,grad_norm: 0.7960063689754265, iteration: 458167
loss: 1.0492786169052124,grad_norm: 0.8024935097365459, iteration: 458168
loss: 0.9666976928710938,grad_norm: 0.8462491595972715, iteration: 458169
loss: 0.9999547600746155,grad_norm: 0.7450134609816669, iteration: 458170
loss: 0.9767817258834839,grad_norm: 0.6763168832104898, iteration: 458171
loss: 0.9815284013748169,grad_norm: 0.7465859185884758, iteration: 458172
loss: 1.0661468505859375,grad_norm: 0.7570656900749408, iteration: 458173
loss: 0.9616596102714539,grad_norm: 0.7213949744245246, iteration: 458174
loss: 0.9991763830184937,grad_norm: 0.6665334608223098, iteration: 458175
loss: 0.9704394936561584,grad_norm: 0.6635378310529443, iteration: 458176
loss: 1.0001537799835205,grad_norm: 0.6491866327373135, iteration: 458177
loss: 1.0057235956192017,grad_norm: 0.7306554968039668, iteration: 458178
loss: 1.0032751560211182,grad_norm: 0.7362237613111233, iteration: 458179
loss: 0.9715845584869385,grad_norm: 0.6440116447666134, iteration: 458180
loss: 0.9910237193107605,grad_norm: 0.8471697802783901, iteration: 458181
loss: 1.0147976875305176,grad_norm: 0.9999996508051843, iteration: 458182
loss: 1.0016123056411743,grad_norm: 0.9999998902305333, iteration: 458183
loss: 0.9619923830032349,grad_norm: 0.8222164613351445, iteration: 458184
loss: 0.9850682616233826,grad_norm: 0.6892208985314614, iteration: 458185
loss: 1.0034445524215698,grad_norm: 0.7219948994943858, iteration: 458186
loss: 0.9981212019920349,grad_norm: 0.7779640968764008, iteration: 458187
loss: 0.976042628288269,grad_norm: 0.9999990979005304, iteration: 458188
loss: 1.008612871170044,grad_norm: 0.9999990583156213, iteration: 458189
loss: 0.9842920899391174,grad_norm: 0.8241020849557839, iteration: 458190
loss: 1.070603847503662,grad_norm: 0.9999992189865253, iteration: 458191
loss: 1.063240647315979,grad_norm: 0.8053434693214306, iteration: 458192
loss: 1.0094693899154663,grad_norm: 0.7125068519908011, iteration: 458193
loss: 0.9593532085418701,grad_norm: 0.7242951581374644, iteration: 458194
loss: 0.9654727578163147,grad_norm: 0.839391874854991, iteration: 458195
loss: 1.1199043989181519,grad_norm: 0.746611711838185, iteration: 458196
loss: 1.0307245254516602,grad_norm: 0.6724380276982433, iteration: 458197
loss: 1.0050309896469116,grad_norm: 0.8476767538680794, iteration: 458198
loss: 1.029394507408142,grad_norm: 0.7209247962834378, iteration: 458199
loss: 0.9675889015197754,grad_norm: 0.7085900672280926, iteration: 458200
loss: 1.024487018585205,grad_norm: 0.9999990410843971, iteration: 458201
loss: 1.0089930295944214,grad_norm: 0.7498076092280275, iteration: 458202
loss: 0.9885894060134888,grad_norm: 0.7056862982291263, iteration: 458203
loss: 0.9575821161270142,grad_norm: 0.7400589775758146, iteration: 458204
loss: 1.0007469654083252,grad_norm: 0.9999994357943213, iteration: 458205
loss: 1.0454579591751099,grad_norm: 0.9999990786831682, iteration: 458206
loss: 0.9731877446174622,grad_norm: 0.7787791447754872, iteration: 458207
loss: 1.0239648818969727,grad_norm: 0.7653739781216956, iteration: 458208
loss: 0.9819600582122803,grad_norm: 0.8023076834715259, iteration: 458209
loss: 1.0050631761550903,grad_norm: 0.6570722712448505, iteration: 458210
loss: 0.9788085222244263,grad_norm: 0.7357519215407393, iteration: 458211
loss: 1.0238560438156128,grad_norm: 0.8182635433688688, iteration: 458212
loss: 1.0092201232910156,grad_norm: 0.6809282801364932, iteration: 458213
loss: 1.0083527565002441,grad_norm: 0.9718781723221753, iteration: 458214
loss: 0.9918103218078613,grad_norm: 0.8112703676834448, iteration: 458215
loss: 1.0504158735275269,grad_norm: 0.8495157674949537, iteration: 458216
loss: 1.012357234954834,grad_norm: 0.7972007281181291, iteration: 458217
loss: 1.2079429626464844,grad_norm: 0.9999997316728544, iteration: 458218
loss: 1.004888892173767,grad_norm: 0.9229613448054983, iteration: 458219
loss: 0.9759624600410461,grad_norm: 0.9265731035058241, iteration: 458220
loss: 0.9978619813919067,grad_norm: 0.8155878712112067, iteration: 458221
loss: 0.9997170567512512,grad_norm: 0.7954696246093976, iteration: 458222
loss: 0.9951416254043579,grad_norm: 0.8477244079071624, iteration: 458223
loss: 0.9948867559432983,grad_norm: 0.716927293907153, iteration: 458224
loss: 1.0195387601852417,grad_norm: 0.8054352225960866, iteration: 458225
loss: 0.9860190153121948,grad_norm: 0.8257267173005104, iteration: 458226
loss: 1.042057752609253,grad_norm: 0.7498571253736384, iteration: 458227
loss: 0.9866628050804138,grad_norm: 0.6763987738635541, iteration: 458228
loss: 1.0105170011520386,grad_norm: 0.7482574278006466, iteration: 458229
loss: 1.0402907133102417,grad_norm: 0.999999981699523, iteration: 458230
loss: 1.0969269275665283,grad_norm: 0.9923173532741858, iteration: 458231
loss: 0.9853348135948181,grad_norm: 0.8534466174686752, iteration: 458232
loss: 1.001821756362915,grad_norm: 0.6440824382864455, iteration: 458233
loss: 1.0216097831726074,grad_norm: 0.7689326083682132, iteration: 458234
loss: 0.9566335082054138,grad_norm: 0.92977828739532, iteration: 458235
loss: 0.980871319770813,grad_norm: 0.7379520841991701, iteration: 458236
loss: 1.0115150213241577,grad_norm: 0.6853137719374168, iteration: 458237
loss: 0.972514808177948,grad_norm: 0.6414984918961459, iteration: 458238
loss: 1.005789875984192,grad_norm: 0.7617777408721877, iteration: 458239
loss: 1.0043160915374756,grad_norm: 0.7760861123306061, iteration: 458240
loss: 1.0219043493270874,grad_norm: 0.7998575482147788, iteration: 458241
loss: 1.0131382942199707,grad_norm: 0.7722421867669063, iteration: 458242
loss: 0.967097282409668,grad_norm: 0.7914556519214453, iteration: 458243
loss: 0.9871699810028076,grad_norm: 0.7045614004625254, iteration: 458244
loss: 0.974351167678833,grad_norm: 0.9749360604381815, iteration: 458245
loss: 0.9983424544334412,grad_norm: 0.7233069031566103, iteration: 458246
loss: 0.9935894012451172,grad_norm: 0.6946171160548028, iteration: 458247
loss: 1.014527678489685,grad_norm: 0.767130090179965, iteration: 458248
loss: 1.0057048797607422,grad_norm: 0.7772919289827543, iteration: 458249
loss: 1.0204182863235474,grad_norm: 0.7750893610055645, iteration: 458250
loss: 0.9934060573577881,grad_norm: 0.9178890005763127, iteration: 458251
loss: 1.0288254022598267,grad_norm: 0.79245474364588, iteration: 458252
loss: 1.0183316469192505,grad_norm: 0.8116582812184652, iteration: 458253
loss: 1.0007219314575195,grad_norm: 0.7651445423723652, iteration: 458254
loss: 1.0023788213729858,grad_norm: 0.9999992362259187, iteration: 458255
loss: 1.0079231262207031,grad_norm: 0.7804614504767409, iteration: 458256
loss: 0.9915529489517212,grad_norm: 0.6799434117495348, iteration: 458257
loss: 1.0432133674621582,grad_norm: 0.9999998554525346, iteration: 458258
loss: 1.026340365409851,grad_norm: 0.823341689724618, iteration: 458259
loss: 0.982921838760376,grad_norm: 0.9999993994538499, iteration: 458260
loss: 1.0130536556243896,grad_norm: 0.7944102569974444, iteration: 458261
loss: 1.0757819414138794,grad_norm: 0.6983149639459341, iteration: 458262
loss: 0.9995459318161011,grad_norm: 0.8158842817030516, iteration: 458263
loss: 1.0123512744903564,grad_norm: 0.7149065856673933, iteration: 458264
loss: 0.9955226182937622,grad_norm: 0.710667542501718, iteration: 458265
loss: 0.988109290599823,grad_norm: 0.9999997526261222, iteration: 458266
loss: 0.9588918089866638,grad_norm: 0.6736995541208197, iteration: 458267
loss: 1.0746324062347412,grad_norm: 0.7828629167156569, iteration: 458268
loss: 0.991399347782135,grad_norm: 0.8050024338179, iteration: 458269
loss: 1.06423819065094,grad_norm: 0.9999997182004055, iteration: 458270
loss: 0.9991689920425415,grad_norm: 0.683670919589474, iteration: 458271
loss: 1.04133141040802,grad_norm: 0.7467252843308073, iteration: 458272
loss: 0.9909631609916687,grad_norm: 0.8778154079995805, iteration: 458273
loss: 0.9719560146331787,grad_norm: 0.7395889877809704, iteration: 458274
loss: 1.0126526355743408,grad_norm: 0.9999996399594357, iteration: 458275
loss: 1.0098477602005005,grad_norm: 0.9999991924188646, iteration: 458276
loss: 1.0559924840927124,grad_norm: 0.7768099289496585, iteration: 458277
loss: 1.057251214981079,grad_norm: 0.8183499318745665, iteration: 458278
loss: 0.9780547618865967,grad_norm: 0.705618900651036, iteration: 458279
loss: 0.9870065450668335,grad_norm: 0.8783067827218748, iteration: 458280
loss: 0.98768550157547,grad_norm: 0.7337238914134291, iteration: 458281
loss: 0.9666005373001099,grad_norm: 0.7436900432500076, iteration: 458282
loss: 1.0061845779418945,grad_norm: 0.9391664450512016, iteration: 458283
loss: 0.9856336712837219,grad_norm: 0.735142118901858, iteration: 458284
loss: 0.967420756816864,grad_norm: 0.7713938550745255, iteration: 458285
loss: 0.9931482076644897,grad_norm: 0.6560077192069652, iteration: 458286
loss: 1.0068680047988892,grad_norm: 0.7583993018083833, iteration: 458287
loss: 0.997043251991272,grad_norm: 0.7470722847218253, iteration: 458288
loss: 1.0107200145721436,grad_norm: 0.7692308620605645, iteration: 458289
loss: 1.0487935543060303,grad_norm: 0.9999992060375478, iteration: 458290
loss: 1.000279426574707,grad_norm: 0.7965594242688578, iteration: 458291
loss: 0.9664306640625,grad_norm: 0.7053511630991373, iteration: 458292
loss: 1.0168660879135132,grad_norm: 0.8032659304284541, iteration: 458293
loss: 0.9935743808746338,grad_norm: 0.8508216887159386, iteration: 458294
loss: 0.9749541878700256,grad_norm: 0.7713378483337834, iteration: 458295
loss: 1.0065504312515259,grad_norm: 0.7930631515117501, iteration: 458296
loss: 1.0219866037368774,grad_norm: 0.9033098272831418, iteration: 458297
loss: 1.0076336860656738,grad_norm: 0.7101820868157616, iteration: 458298
loss: 0.9805587530136108,grad_norm: 0.801392607330396, iteration: 458299
loss: 0.9950100779533386,grad_norm: 0.7558094299867717, iteration: 458300
loss: 1.055282711982727,grad_norm: 0.9561624133527917, iteration: 458301
loss: 1.0154982805252075,grad_norm: 0.7663274163440023, iteration: 458302
loss: 0.9562771916389465,grad_norm: 0.7231589316945721, iteration: 458303
loss: 1.0000718832015991,grad_norm: 0.7039583128759328, iteration: 458304
loss: 0.9915054440498352,grad_norm: 0.9325039044758254, iteration: 458305
loss: 1.0081769227981567,grad_norm: 0.9999995912943095, iteration: 458306
loss: 0.957085907459259,grad_norm: 0.7525322430003891, iteration: 458307
loss: 0.9904659390449524,grad_norm: 0.9999990235269275, iteration: 458308
loss: 0.9888812899589539,grad_norm: 0.8752667477701807, iteration: 458309
loss: 1.0175495147705078,grad_norm: 0.6795032760520133, iteration: 458310
loss: 0.9800716042518616,grad_norm: 0.7401986833153877, iteration: 458311
loss: 0.9940637946128845,grad_norm: 0.8115758417957484, iteration: 458312
loss: 1.01859712600708,grad_norm: 0.7481564432544353, iteration: 458313
loss: 0.9918528199195862,grad_norm: 0.8271813787420419, iteration: 458314
loss: 0.9836699366569519,grad_norm: 0.8806388875231739, iteration: 458315
loss: 1.085039496421814,grad_norm: 0.9999997559951145, iteration: 458316
loss: 1.0034112930297852,grad_norm: 0.7886430869858628, iteration: 458317
loss: 1.0064572095870972,grad_norm: 0.9378654562041218, iteration: 458318
loss: 1.0079734325408936,grad_norm: 0.729228536086402, iteration: 458319
loss: 1.0072842836380005,grad_norm: 0.6440697914071715, iteration: 458320
loss: 1.0351980924606323,grad_norm: 0.9999996613081481, iteration: 458321
loss: 1.0324046611785889,grad_norm: 0.7942106107464582, iteration: 458322
loss: 1.0029925107955933,grad_norm: 0.7634212611465249, iteration: 458323
loss: 1.0048792362213135,grad_norm: 0.6615708674815164, iteration: 458324
loss: 0.9623026847839355,grad_norm: 0.8016607308505935, iteration: 458325
loss: 1.0018302202224731,grad_norm: 0.6894208170513281, iteration: 458326
loss: 1.0405457019805908,grad_norm: 0.9211793800081755, iteration: 458327
loss: 1.0078176259994507,grad_norm: 0.7165985057253672, iteration: 458328
loss: 0.9855401515960693,grad_norm: 0.8047280824661059, iteration: 458329
loss: 0.9702157974243164,grad_norm: 0.7575472012430841, iteration: 458330
loss: 1.0069022178649902,grad_norm: 0.9999995367778778, iteration: 458331
loss: 0.9969598650932312,grad_norm: 0.9263898392055167, iteration: 458332
loss: 1.0262285470962524,grad_norm: 0.9999996677753538, iteration: 458333
loss: 1.0087324380874634,grad_norm: 0.7871285913441015, iteration: 458334
loss: 1.0934404134750366,grad_norm: 0.757366201318648, iteration: 458335
loss: 0.988531231880188,grad_norm: 0.6703793755566929, iteration: 458336
loss: 0.975263237953186,grad_norm: 0.6517560468980723, iteration: 458337
loss: 0.987224817276001,grad_norm: 0.6680650297826259, iteration: 458338
loss: 1.0629875659942627,grad_norm: 0.9603887221580777, iteration: 458339
loss: 1.0708134174346924,grad_norm: 0.9999996681427223, iteration: 458340
loss: 0.9937931299209595,grad_norm: 0.8226955307494815, iteration: 458341
loss: 1.0132908821105957,grad_norm: 0.7185145353406335, iteration: 458342
loss: 0.9879507422447205,grad_norm: 0.9999989916160927, iteration: 458343
loss: 1.0264554023742676,grad_norm: 0.8030495266882216, iteration: 458344
loss: 0.9882344603538513,grad_norm: 0.6918114417539438, iteration: 458345
loss: 0.9907174706459045,grad_norm: 0.9143205822843995, iteration: 458346
loss: 0.9704245328903198,grad_norm: 0.8076745337382134, iteration: 458347
loss: 0.9821353554725647,grad_norm: 0.873916142329821, iteration: 458348
loss: 0.981724202632904,grad_norm: 0.7714836458756988, iteration: 458349
loss: 0.9960087537765503,grad_norm: 0.791880682195106, iteration: 458350
loss: 1.0299848318099976,grad_norm: 0.8257136519347021, iteration: 458351
loss: 1.0109450817108154,grad_norm: 0.9769328845418602, iteration: 458352
loss: 1.0249518156051636,grad_norm: 0.74733003709572, iteration: 458353
loss: 0.965383768081665,grad_norm: 0.7273980119518955, iteration: 458354
loss: 1.020298719406128,grad_norm: 0.8120121518131137, iteration: 458355
loss: 1.0041618347167969,grad_norm: 0.6563723931359355, iteration: 458356
loss: 0.9964228272438049,grad_norm: 0.7269534097164067, iteration: 458357
loss: 1.0443899631500244,grad_norm: 0.9999992537981643, iteration: 458358
loss: 1.1428921222686768,grad_norm: 0.999999957583732, iteration: 458359
loss: 0.9949827790260315,grad_norm: 0.6429337115125409, iteration: 458360
loss: 0.9821043610572815,grad_norm: 0.8704909145904385, iteration: 458361
loss: 0.9797207713127136,grad_norm: 0.7161279046230148, iteration: 458362
loss: 1.081595540046692,grad_norm: 0.9307977778580557, iteration: 458363
loss: 0.971398115158081,grad_norm: 0.6417482188425178, iteration: 458364
loss: 0.9698317050933838,grad_norm: 0.7949341270482513, iteration: 458365
loss: 1.00434410572052,grad_norm: 0.7696718208394364, iteration: 458366
loss: 0.9884427189826965,grad_norm: 0.8765551898705213, iteration: 458367
loss: 1.0513112545013428,grad_norm: 0.8861950607638901, iteration: 458368
loss: 1.0105777978897095,grad_norm: 0.8358945243848989, iteration: 458369
loss: 0.9759984016418457,grad_norm: 0.7447539173970679, iteration: 458370
loss: 1.0194519758224487,grad_norm: 0.9023719756442159, iteration: 458371
loss: 1.0677485466003418,grad_norm: 0.9999999829611036, iteration: 458372
loss: 1.0046485662460327,grad_norm: 0.7741953005341432, iteration: 458373
loss: 0.9908565878868103,grad_norm: 0.6962874390230417, iteration: 458374
loss: 1.0143241882324219,grad_norm: 0.7499769293965322, iteration: 458375
loss: 1.0228055715560913,grad_norm: 0.775534602214071, iteration: 458376
loss: 0.961726188659668,grad_norm: 0.7994118817222009, iteration: 458377
loss: 0.967978298664093,grad_norm: 0.7685067452709837, iteration: 458378
loss: 1.0361418724060059,grad_norm: 0.7901007181537616, iteration: 458379
loss: 1.0316632986068726,grad_norm: 0.6749032082918222, iteration: 458380
loss: 1.0054675340652466,grad_norm: 0.7232617346693349, iteration: 458381
loss: 0.9799116253852844,grad_norm: 0.8386219039610836, iteration: 458382
loss: 0.9935612678527832,grad_norm: 0.6960515822870345, iteration: 458383
loss: 0.9688642621040344,grad_norm: 0.7065177800213172, iteration: 458384
loss: 0.9816592335700989,grad_norm: 0.7233638384683955, iteration: 458385
loss: 1.0312244892120361,grad_norm: 0.8849426472212263, iteration: 458386
loss: 0.9978765249252319,grad_norm: 0.7784923402484593, iteration: 458387
loss: 1.0683406591415405,grad_norm: 0.940761779324457, iteration: 458388
loss: 0.995281457901001,grad_norm: 0.8715416344463444, iteration: 458389
loss: 0.9991809725761414,grad_norm: 0.9999996264004655, iteration: 458390
loss: 0.9987850189208984,grad_norm: 0.7583753222209705, iteration: 458391
loss: 0.9850573539733887,grad_norm: 0.6501575777352455, iteration: 458392
loss: 1.0020837783813477,grad_norm: 0.9999999497865304, iteration: 458393
loss: 1.000640630722046,grad_norm: 0.7440868873111958, iteration: 458394
loss: 0.9963353276252747,grad_norm: 0.7183221731484449, iteration: 458395
loss: 1.0277423858642578,grad_norm: 0.9467864468563213, iteration: 458396
loss: 1.1525324583053589,grad_norm: 0.9999998548728936, iteration: 458397
loss: 0.9889826774597168,grad_norm: 0.7500070475502417, iteration: 458398
loss: 0.9631635546684265,grad_norm: 0.863413151574428, iteration: 458399
loss: 0.9790137410163879,grad_norm: 0.8475543201010034, iteration: 458400
loss: 0.9810007810592651,grad_norm: 0.7367364750969673, iteration: 458401
loss: 1.0014675855636597,grad_norm: 0.999999020961438, iteration: 458402
loss: 1.0381214618682861,grad_norm: 0.9999991293066348, iteration: 458403
loss: 0.9750399589538574,grad_norm: 0.9114867868552391, iteration: 458404
loss: 1.0456467866897583,grad_norm: 0.7458234685979063, iteration: 458405
loss: 1.0837914943695068,grad_norm: 0.9999996999290088, iteration: 458406
loss: 0.9769848585128784,grad_norm: 0.8715430101058288, iteration: 458407
loss: 1.0005983114242554,grad_norm: 0.8249195417912447, iteration: 458408
loss: 0.9910740256309509,grad_norm: 0.9999998368905051, iteration: 458409
loss: 0.9850427508354187,grad_norm: 0.7697641197788715, iteration: 458410
loss: 0.9887755513191223,grad_norm: 0.7949104781819978, iteration: 458411
loss: 1.027361512184143,grad_norm: 0.8476392744037978, iteration: 458412
loss: 1.0239111185073853,grad_norm: 0.9999992642755804, iteration: 458413
loss: 1.0108153820037842,grad_norm: 0.6279353071129823, iteration: 458414
loss: 0.9838208556175232,grad_norm: 0.995586431979769, iteration: 458415
loss: 1.026848316192627,grad_norm: 0.6755902098138766, iteration: 458416
loss: 1.1952229738235474,grad_norm: 1.0000000514356204, iteration: 458417
loss: 0.9524984359741211,grad_norm: 0.8154346060834846, iteration: 458418
loss: 1.003071904182434,grad_norm: 0.992601188993203, iteration: 458419
loss: 1.0014400482177734,grad_norm: 0.7943502846708173, iteration: 458420
loss: 0.9862857460975647,grad_norm: 0.8695648864397468, iteration: 458421
loss: 1.0292211771011353,grad_norm: 0.6632289211272053, iteration: 458422
loss: 1.0395808219909668,grad_norm: 0.7791839830608991, iteration: 458423
loss: 1.021899938583374,grad_norm: 0.9999993209222615, iteration: 458424
loss: 0.9781510233879089,grad_norm: 0.7111933254733459, iteration: 458425
loss: 0.9782924652099609,grad_norm: 0.7488166489972687, iteration: 458426
loss: 1.006877064704895,grad_norm: 0.744948938968635, iteration: 458427
loss: 1.023752212524414,grad_norm: 0.7646031797855385, iteration: 458428
loss: 1.1944135427474976,grad_norm: 0.9999991619966254, iteration: 458429
loss: 0.9778960347175598,grad_norm: 0.6421546209963142, iteration: 458430
loss: 1.0014017820358276,grad_norm: 0.7798999048608518, iteration: 458431
loss: 1.0099865198135376,grad_norm: 0.8225479018783963, iteration: 458432
loss: 0.9957293272018433,grad_norm: 0.686747170931398, iteration: 458433
loss: 1.0210167169570923,grad_norm: 0.7948482603655234, iteration: 458434
loss: 1.1149874925613403,grad_norm: 0.9999999855629598, iteration: 458435
loss: 1.0663976669311523,grad_norm: 0.9999996756086044, iteration: 458436
loss: 0.9543853402137756,grad_norm: 0.6766190044847805, iteration: 458437
loss: 1.029617428779602,grad_norm: 0.9069305209052978, iteration: 458438
loss: 1.0206845998764038,grad_norm: 0.8415964913115511, iteration: 458439
loss: 1.0094009637832642,grad_norm: 0.8714146858490738, iteration: 458440
loss: 1.0306503772735596,grad_norm: 0.8560052410279704, iteration: 458441
loss: 1.0074403285980225,grad_norm: 0.6662042746434917, iteration: 458442
loss: 1.0130289793014526,grad_norm: 0.8420869117229776, iteration: 458443
loss: 0.9823824167251587,grad_norm: 0.7749098044976739, iteration: 458444
loss: 1.0098878145217896,grad_norm: 0.6009160521095667, iteration: 458445
loss: 1.0600647926330566,grad_norm: 0.9999998779682542, iteration: 458446
loss: 0.9338324666023254,grad_norm: 0.6494780362474204, iteration: 458447
loss: 1.0267199277877808,grad_norm: 0.6310990562429571, iteration: 458448
loss: 0.9981235265731812,grad_norm: 0.9661062075813792, iteration: 458449
loss: 1.0145964622497559,grad_norm: 0.745165044927069, iteration: 458450
loss: 1.0199748277664185,grad_norm: 0.8944757300174389, iteration: 458451
loss: 1.0274866819381714,grad_norm: 0.6812623312797841, iteration: 458452
loss: 1.019791841506958,grad_norm: 0.6853346680003228, iteration: 458453
loss: 1.0005582571029663,grad_norm: 0.7807300331466597, iteration: 458454
loss: 0.9859590530395508,grad_norm: 0.9059765671419899, iteration: 458455
loss: 0.9997608661651611,grad_norm: 0.8970031518540109, iteration: 458456
loss: 1.0230180025100708,grad_norm: 0.9999991936183873, iteration: 458457
loss: 0.9844212532043457,grad_norm: 0.7865901015771471, iteration: 458458
loss: 0.9948117136955261,grad_norm: 0.8777167044828519, iteration: 458459
loss: 0.9948542714118958,grad_norm: 0.9152865560469442, iteration: 458460
loss: 1.027199149131775,grad_norm: 0.8215037817088439, iteration: 458461
loss: 1.0217727422714233,grad_norm: 0.8083717544451826, iteration: 458462
loss: 1.004597783088684,grad_norm: 0.7051685457021731, iteration: 458463
loss: 0.9780452847480774,grad_norm: 0.6924636988732426, iteration: 458464
loss: 1.0119222402572632,grad_norm: 0.8548956386698608, iteration: 458465
loss: 1.0242078304290771,grad_norm: 0.783101809159273, iteration: 458466
loss: 0.9684768915176392,grad_norm: 0.7904719039213467, iteration: 458467
loss: 1.0041532516479492,grad_norm: 0.9275702936063297, iteration: 458468
loss: 1.019019603729248,grad_norm: 0.8213354139588419, iteration: 458469
loss: 0.9798904061317444,grad_norm: 0.7477242508745361, iteration: 458470
loss: 0.9884926676750183,grad_norm: 0.8039233545821832, iteration: 458471
loss: 1.0030531883239746,grad_norm: 0.80915426887901, iteration: 458472
loss: 0.9947473406791687,grad_norm: 0.6611261690064384, iteration: 458473
loss: 0.9726853370666504,grad_norm: 0.8763594021469557, iteration: 458474
loss: 0.9725738763809204,grad_norm: 0.7641345423666308, iteration: 458475
loss: 0.9834834337234497,grad_norm: 0.8132810505585617, iteration: 458476
loss: 1.001798152923584,grad_norm: 0.7671374683408101, iteration: 458477
loss: 0.9970158338546753,grad_norm: 0.8826005819551849, iteration: 458478
loss: 0.9990654587745667,grad_norm: 0.77561731283388, iteration: 458479
loss: 1.0310403108596802,grad_norm: 0.9999991527781099, iteration: 458480
loss: 1.0416035652160645,grad_norm: 0.7676573282786131, iteration: 458481
loss: 0.9960837364196777,grad_norm: 0.8912464534904694, iteration: 458482
loss: 1.0032771825790405,grad_norm: 0.7515775054728212, iteration: 458483
loss: 0.9887367486953735,grad_norm: 0.8612161266789936, iteration: 458484
loss: 0.9994648098945618,grad_norm: 0.9999997286388577, iteration: 458485
loss: 1.0060876607894897,grad_norm: 0.7629679648066674, iteration: 458486
loss: 0.9655081033706665,grad_norm: 0.8453364202099466, iteration: 458487
loss: 0.9594032168388367,grad_norm: 0.6775468683340938, iteration: 458488
loss: 0.9906165599822998,grad_norm: 0.7949523903550904, iteration: 458489
loss: 0.9726828336715698,grad_norm: 0.9731145367151189, iteration: 458490
loss: 0.989613950252533,grad_norm: 0.8400398331352634, iteration: 458491
loss: 1.011712670326233,grad_norm: 0.9423774043728803, iteration: 458492
loss: 1.0807058811187744,grad_norm: 1.0000000121620114, iteration: 458493
loss: 1.004868745803833,grad_norm: 0.7448430370912928, iteration: 458494
loss: 1.0464781522750854,grad_norm: 0.694870383684929, iteration: 458495
loss: 1.0152453184127808,grad_norm: 0.9999994497127328, iteration: 458496
loss: 1.011720895767212,grad_norm: 0.7587588342619908, iteration: 458497
loss: 0.9896432757377625,grad_norm: 0.7353426753709309, iteration: 458498
loss: 0.9896939992904663,grad_norm: 0.8226522609080343, iteration: 458499
loss: 1.0272738933563232,grad_norm: 0.999999022611707, iteration: 458500
loss: 1.0152863264083862,grad_norm: 0.999999461853152, iteration: 458501
loss: 1.0162603855133057,grad_norm: 0.8024046127893177, iteration: 458502
loss: 0.9972082376480103,grad_norm: 0.7745059404367352, iteration: 458503
loss: 0.9963338375091553,grad_norm: 0.8678809523667391, iteration: 458504
loss: 0.9998438358306885,grad_norm: 0.7490735629052127, iteration: 458505
loss: 1.028947114944458,grad_norm: 0.7145407371310634, iteration: 458506
loss: 0.9798427224159241,grad_norm: 0.8205456608227066, iteration: 458507
loss: 0.986668586730957,grad_norm: 0.6635562520946301, iteration: 458508
loss: 0.9759424328804016,grad_norm: 0.8037743508491024, iteration: 458509
loss: 1.1806902885437012,grad_norm: 1.0000000069672528, iteration: 458510
loss: 1.0201243162155151,grad_norm: 0.8640688165088365, iteration: 458511
loss: 1.0448414087295532,grad_norm: 0.9999996065810508, iteration: 458512
loss: 0.9816534519195557,grad_norm: 0.6889978164930916, iteration: 458513
loss: 0.9744102954864502,grad_norm: 0.9216412762510146, iteration: 458514
loss: 1.0102148056030273,grad_norm: 0.9999991502571474, iteration: 458515
loss: 1.0064585208892822,grad_norm: 0.8112485997248782, iteration: 458516
loss: 0.9689642190933228,grad_norm: 0.7924236352181281, iteration: 458517
loss: 1.0035871267318726,grad_norm: 0.7620736536206593, iteration: 458518
loss: 0.9954433441162109,grad_norm: 0.7806937383814201, iteration: 458519
loss: 1.0012315511703491,grad_norm: 0.6969815159841437, iteration: 458520
loss: 0.9991632103919983,grad_norm: 0.8353611013548711, iteration: 458521
loss: 1.0367146730422974,grad_norm: 0.7387015277257671, iteration: 458522
loss: 1.0001055002212524,grad_norm: 0.7076604412692067, iteration: 458523
loss: 0.9696016907691956,grad_norm: 0.8233615034681616, iteration: 458524
loss: 0.9827615022659302,grad_norm: 0.9999996383480753, iteration: 458525
loss: 0.9505565762519836,grad_norm: 0.692516180934396, iteration: 458526
loss: 1.0306977033615112,grad_norm: 0.9999996167520773, iteration: 458527
loss: 0.9897037744522095,grad_norm: 0.7483439904403926, iteration: 458528
loss: 1.0371105670928955,grad_norm: 0.728114635531222, iteration: 458529
loss: 0.9837216734886169,grad_norm: 0.8598175465183336, iteration: 458530
loss: 0.9818256497383118,grad_norm: 0.7010535476034525, iteration: 458531
loss: 1.0072706937789917,grad_norm: 0.7837564090909034, iteration: 458532
loss: 0.9963042736053467,grad_norm: 0.999999887859325, iteration: 458533
loss: 0.9541231393814087,grad_norm: 0.715218133272419, iteration: 458534
loss: 1.0220224857330322,grad_norm: 0.770842993986233, iteration: 458535
loss: 0.995659589767456,grad_norm: 0.6732374824701249, iteration: 458536
loss: 0.9871882200241089,grad_norm: 0.9843549049075855, iteration: 458537
loss: 1.0182853937149048,grad_norm: 0.8265468622887446, iteration: 458538
loss: 1.0057308673858643,grad_norm: 0.8135309171693653, iteration: 458539
loss: 1.0141980648040771,grad_norm: 0.6005923187814297, iteration: 458540
loss: 1.0892870426177979,grad_norm: 0.9999998927189289, iteration: 458541
loss: 1.0270353555679321,grad_norm: 0.7323547602834487, iteration: 458542
loss: 1.1920095682144165,grad_norm: 0.8279014137610161, iteration: 458543
loss: 1.0259642601013184,grad_norm: 0.7401116650424094, iteration: 458544
loss: 0.9816581010818481,grad_norm: 0.7705669363947528, iteration: 458545
loss: 1.0073513984680176,grad_norm: 0.7277532697501836, iteration: 458546
loss: 0.9894667863845825,grad_norm: 0.8738264450515557, iteration: 458547
loss: 1.0184383392333984,grad_norm: 0.7956453151489404, iteration: 458548
loss: 0.9941661357879639,grad_norm: 0.7271007754692268, iteration: 458549
loss: 0.9656552672386169,grad_norm: 0.6754625754826651, iteration: 458550
loss: 1.0328786373138428,grad_norm: 0.8873975793975675, iteration: 458551
loss: 1.1109567880630493,grad_norm: 0.999999848508494, iteration: 458552
loss: 0.9601099491119385,grad_norm: 0.7648833473976588, iteration: 458553
loss: 1.0052425861358643,grad_norm: 0.9999996831663143, iteration: 458554
loss: 1.030073881149292,grad_norm: 0.853004868805312, iteration: 458555
loss: 0.9766275882720947,grad_norm: 0.7935749773863464, iteration: 458556
loss: 0.9833728671073914,grad_norm: 0.8902322331057722, iteration: 458557
loss: 0.9590048789978027,grad_norm: 0.7079379496081619, iteration: 458558
loss: 1.0910158157348633,grad_norm: 0.813015474810541, iteration: 458559
loss: 1.005454421043396,grad_norm: 0.8047472906156424, iteration: 458560
loss: 0.9763179421424866,grad_norm: 0.8458382962555359, iteration: 458561
loss: 0.9771065711975098,grad_norm: 0.728472105526157, iteration: 458562
loss: 0.9890400171279907,grad_norm: 0.6152687376229177, iteration: 458563
loss: 0.9794209599494934,grad_norm: 0.8984688122478189, iteration: 458564
loss: 0.9367749691009521,grad_norm: 0.7507739616578565, iteration: 458565
loss: 0.9936915040016174,grad_norm: 0.9815670842399554, iteration: 458566
loss: 1.0173218250274658,grad_norm: 0.8501918998220914, iteration: 458567
loss: 0.9936299324035645,grad_norm: 0.9999995236795711, iteration: 458568
loss: 1.0069305896759033,grad_norm: 0.9096964798862996, iteration: 458569
loss: 1.0064971446990967,grad_norm: 0.8628090468132354, iteration: 458570
loss: 0.990110456943512,grad_norm: 0.6757223039136736, iteration: 458571
loss: 0.9577529430389404,grad_norm: 0.7450997133810268, iteration: 458572
loss: 0.9896129965782166,grad_norm: 0.9171607878764986, iteration: 458573
loss: 0.9635406732559204,grad_norm: 0.7197538674863918, iteration: 458574
loss: 1.097216010093689,grad_norm: 0.999999601157503, iteration: 458575
loss: 1.0307936668395996,grad_norm: 0.7532608410438498, iteration: 458576
loss: 1.0007225275039673,grad_norm: 0.6104865299940607, iteration: 458577
loss: 0.9584702849388123,grad_norm: 0.721173120006057, iteration: 458578
loss: 1.05070161819458,grad_norm: 0.9999992716817654, iteration: 458579
loss: 1.0163401365280151,grad_norm: 0.7150522393371855, iteration: 458580
loss: 0.9961876273155212,grad_norm: 0.7344230203493498, iteration: 458581
loss: 0.9947184324264526,grad_norm: 0.8471079477830933, iteration: 458582
loss: 1.029468059539795,grad_norm: 0.8197714431247328, iteration: 458583
loss: 1.0203323364257812,grad_norm: 0.7856746788363205, iteration: 458584
loss: 1.0658150911331177,grad_norm: 0.9999997515815888, iteration: 458585
loss: 0.9746553897857666,grad_norm: 0.9166444516914415, iteration: 458586
loss: 1.016223669052124,grad_norm: 0.9999991040693706, iteration: 458587
loss: 0.99741131067276,grad_norm: 0.6121115434923216, iteration: 458588
loss: 1.0110529661178589,grad_norm: 0.693185273917312, iteration: 458589
loss: 0.9945327639579773,grad_norm: 0.7031794789559948, iteration: 458590
loss: 1.0065010786056519,grad_norm: 0.8888264250465282, iteration: 458591
loss: 0.9084283709526062,grad_norm: 0.8128739331917755, iteration: 458592
loss: 0.9930016398429871,grad_norm: 0.6669625994517749, iteration: 458593
loss: 0.9829180240631104,grad_norm: 0.8325261819137261, iteration: 458594
loss: 0.9985882043838501,grad_norm: 0.6903203698498712, iteration: 458595
loss: 0.9944604635238647,grad_norm: 0.7663397507899761, iteration: 458596
loss: 0.9938977360725403,grad_norm: 0.833289246076384, iteration: 458597
loss: 1.012323021888733,grad_norm: 0.7073285695245115, iteration: 458598
loss: 0.9854806065559387,grad_norm: 0.8935217167546782, iteration: 458599
loss: 0.9973618388175964,grad_norm: 0.7769497635577985, iteration: 458600
loss: 1.1822220087051392,grad_norm: 0.999999208312316, iteration: 458601
loss: 0.9726033210754395,grad_norm: 0.7981642090000058, iteration: 458602
loss: 0.9863495826721191,grad_norm: 0.9500028974136103, iteration: 458603
loss: 1.00100576877594,grad_norm: 0.7036099305727267, iteration: 458604
loss: 0.9901694655418396,grad_norm: 0.9999991547184863, iteration: 458605
loss: 1.010962724685669,grad_norm: 0.6807278094807767, iteration: 458606
loss: 1.0030303001403809,grad_norm: 0.7273564942701446, iteration: 458607
loss: 0.9989883303642273,grad_norm: 0.838971771237795, iteration: 458608
loss: 1.0207891464233398,grad_norm: 0.7854004696228569, iteration: 458609
loss: 0.9587689638137817,grad_norm: 0.9633859768271166, iteration: 458610
loss: 0.9990895390510559,grad_norm: 0.7383118113760081, iteration: 458611
loss: 1.0078060626983643,grad_norm: 0.7688224800253772, iteration: 458612
loss: 1.0686371326446533,grad_norm: 0.7718483602705115, iteration: 458613
loss: 0.9886243939399719,grad_norm: 0.7604505797599427, iteration: 458614
loss: 0.9862338304519653,grad_norm: 0.7373387158575023, iteration: 458615
loss: 1.0243998765945435,grad_norm: 0.8097218715387431, iteration: 458616
loss: 1.0881876945495605,grad_norm: 0.7193763999436625, iteration: 458617
loss: 0.9990625381469727,grad_norm: 0.7768648902850996, iteration: 458618
loss: 0.9986122250556946,grad_norm: 0.8879174255032048, iteration: 458619
loss: 0.9981306195259094,grad_norm: 0.7974316606015434, iteration: 458620
loss: 1.010198712348938,grad_norm: 0.8205760224216184, iteration: 458621
loss: 0.9893352389335632,grad_norm: 0.8663299166249752, iteration: 458622
loss: 1.0030678510665894,grad_norm: 0.771864709666606, iteration: 458623
loss: 1.0036611557006836,grad_norm: 0.7693426363184431, iteration: 458624
loss: 0.9916432499885559,grad_norm: 0.6965616282959771, iteration: 458625
loss: 0.9958792328834534,grad_norm: 0.7795830498235851, iteration: 458626
loss: 1.0508155822753906,grad_norm: 0.9999996759246902, iteration: 458627
loss: 1.000567078590393,grad_norm: 0.8148248287203541, iteration: 458628
loss: 1.031446933746338,grad_norm: 0.6437150417591763, iteration: 458629
loss: 1.0051363706588745,grad_norm: 0.7977779607676654, iteration: 458630
loss: 0.9719624519348145,grad_norm: 0.8865271702342153, iteration: 458631
loss: 0.9868184328079224,grad_norm: 0.8608738993757763, iteration: 458632
loss: 0.9907457828521729,grad_norm: 0.8670284421732811, iteration: 458633
loss: 0.9761703014373779,grad_norm: 0.7496013713261729, iteration: 458634
loss: 0.9979715943336487,grad_norm: 0.6924449523664652, iteration: 458635
loss: 0.9871577620506287,grad_norm: 0.7604369608397087, iteration: 458636
loss: 1.0064258575439453,grad_norm: 0.7955333915674851, iteration: 458637
loss: 1.020616054534912,grad_norm: 0.9999992799525533, iteration: 458638
loss: 1.0132216215133667,grad_norm: 0.6894281156339984, iteration: 458639
loss: 1.010185956954956,grad_norm: 0.8018947652913935, iteration: 458640
loss: 0.9636793732643127,grad_norm: 0.7327972913765144, iteration: 458641
loss: 1.006861686706543,grad_norm: 0.9050183236999094, iteration: 458642
loss: 1.0068914890289307,grad_norm: 0.9292083197272222, iteration: 458643
loss: 0.9694931507110596,grad_norm: 0.7087659517854543, iteration: 458644
loss: 0.9887329936027527,grad_norm: 0.8697347620398985, iteration: 458645
loss: 0.9804509878158569,grad_norm: 0.7648150486424014, iteration: 458646
loss: 0.9944134950637817,grad_norm: 0.7574136375568088, iteration: 458647
loss: 0.9638420939445496,grad_norm: 0.7570205307523022, iteration: 458648
loss: 1.1381890773773193,grad_norm: 0.999999649899381, iteration: 458649
loss: 0.9562543630599976,grad_norm: 0.7616636670259614, iteration: 458650
loss: 0.9950752854347229,grad_norm: 0.8776676327722485, iteration: 458651
loss: 1.0396864414215088,grad_norm: 0.9999992272895976, iteration: 458652
loss: 1.012223720550537,grad_norm: 0.7090776409525349, iteration: 458653
loss: 0.9894340634346008,grad_norm: 0.8473419811853069, iteration: 458654
loss: 0.9938040971755981,grad_norm: 0.879683252760727, iteration: 458655
loss: 0.9747369885444641,grad_norm: 0.8336825129867791, iteration: 458656
loss: 0.9900447726249695,grad_norm: 0.6589198911016106, iteration: 458657
loss: 1.005811333656311,grad_norm: 0.9999990791295295, iteration: 458658
loss: 0.9947970509529114,grad_norm: 0.7004612095240988, iteration: 458659
loss: 0.9705449938774109,grad_norm: 0.6592191921866678, iteration: 458660
loss: 1.0301666259765625,grad_norm: 0.8754081473993558, iteration: 458661
loss: 0.991918683052063,grad_norm: 0.6982961454766979, iteration: 458662
loss: 0.9566424489021301,grad_norm: 0.8503228549847728, iteration: 458663
loss: 1.0083016157150269,grad_norm: 0.9999991805222267, iteration: 458664
loss: 1.001802921295166,grad_norm: 0.8354491592188058, iteration: 458665
loss: 0.9853869676589966,grad_norm: 0.6654807025418583, iteration: 458666
loss: 0.9732621908187866,grad_norm: 0.6251115659638978, iteration: 458667
loss: 0.9880334138870239,grad_norm: 0.8878276912782248, iteration: 458668
loss: 0.9843805432319641,grad_norm: 0.7043990790201383, iteration: 458669
loss: 1.0385161638259888,grad_norm: 0.8433747236365513, iteration: 458670
loss: 1.0146539211273193,grad_norm: 0.7102378534600194, iteration: 458671
loss: 1.0069197416305542,grad_norm: 0.8068922411271936, iteration: 458672
loss: 1.0173341035842896,grad_norm: 0.6480518592603198, iteration: 458673
loss: 1.0394638776779175,grad_norm: 0.9999993584379155, iteration: 458674
loss: 0.992483913898468,grad_norm: 0.7549811861460977, iteration: 458675
loss: 0.9856572151184082,grad_norm: 0.7727080132995449, iteration: 458676
loss: 1.0186488628387451,grad_norm: 0.8045750365357123, iteration: 458677
loss: 1.0006933212280273,grad_norm: 0.9999991764862597, iteration: 458678
loss: 1.000009298324585,grad_norm: 0.7575051266413194, iteration: 458679
loss: 1.018777847290039,grad_norm: 0.6817254712588509, iteration: 458680
loss: 0.9982485175132751,grad_norm: 0.7591878248647448, iteration: 458681
loss: 1.0346146821975708,grad_norm: 0.9999999865284034, iteration: 458682
loss: 1.03190279006958,grad_norm: 0.6962234011402162, iteration: 458683
loss: 1.0363824367523193,grad_norm: 0.8542192893753052, iteration: 458684
loss: 1.052293062210083,grad_norm: 0.999998960373643, iteration: 458685
loss: 1.051712989807129,grad_norm: 0.7030835300402584, iteration: 458686
loss: 0.9926131367683411,grad_norm: 0.7789371283518953, iteration: 458687
loss: 0.9866989850997925,grad_norm: 0.7959815479033274, iteration: 458688
loss: 0.9997391700744629,grad_norm: 0.8417672748722744, iteration: 458689
loss: 0.9933072924613953,grad_norm: 0.6007131982424984, iteration: 458690
loss: 0.9767458438873291,grad_norm: 0.9999991768378038, iteration: 458691
loss: 0.9985396265983582,grad_norm: 0.8054460849626272, iteration: 458692
loss: 1.0115786790847778,grad_norm: 0.6983055491660726, iteration: 458693
loss: 1.005185842514038,grad_norm: 0.6947034152158829, iteration: 458694
loss: 0.9864981770515442,grad_norm: 0.7768432022903111, iteration: 458695
loss: 0.9835913777351379,grad_norm: 0.7781096183328012, iteration: 458696
loss: 1.0379416942596436,grad_norm: 0.8239246650722838, iteration: 458697
loss: 0.9909337162971497,grad_norm: 0.9042218715773496, iteration: 458698
loss: 0.9937876462936401,grad_norm: 0.7199845787056038, iteration: 458699
loss: 0.9948372840881348,grad_norm: 0.885599139074974, iteration: 458700
loss: 1.0296472311019897,grad_norm: 0.8356171408063163, iteration: 458701
loss: 0.9999252557754517,grad_norm: 0.7962260684193844, iteration: 458702
loss: 1.005501389503479,grad_norm: 0.81565084518399, iteration: 458703
loss: 1.001471757888794,grad_norm: 0.9468915878280476, iteration: 458704
loss: 1.0072307586669922,grad_norm: 0.8489451665326858, iteration: 458705
loss: 1.0019257068634033,grad_norm: 0.700919809954764, iteration: 458706
loss: 1.0178399085998535,grad_norm: 0.8029722200643525, iteration: 458707
loss: 0.9654279351234436,grad_norm: 0.7141862059496852, iteration: 458708
loss: 1.0144261121749878,grad_norm: 0.999999113690752, iteration: 458709
loss: 1.0082401037216187,grad_norm: 0.7197797585880847, iteration: 458710
loss: 1.0654629468917847,grad_norm: 0.9999994889253855, iteration: 458711
loss: 1.0343234539031982,grad_norm: 0.9969999681593421, iteration: 458712
loss: 1.0256469249725342,grad_norm: 0.7465157991084757, iteration: 458713
loss: 0.9796142578125,grad_norm: 0.8869641955568178, iteration: 458714
loss: 0.9993221163749695,grad_norm: 0.7714574537863719, iteration: 458715
loss: 0.9653563499450684,grad_norm: 0.8633599557850222, iteration: 458716
loss: 1.060001015663147,grad_norm: 0.9999991563166304, iteration: 458717
loss: 1.0086145401000977,grad_norm: 0.803184406986488, iteration: 458718
loss: 1.0201082229614258,grad_norm: 0.8965497518153679, iteration: 458719
loss: 0.9882177710533142,grad_norm: 0.7910570384744914, iteration: 458720
loss: 0.9948165416717529,grad_norm: 0.7609831794200231, iteration: 458721
loss: 0.9935645461082458,grad_norm: 0.7254825483208803, iteration: 458722
loss: 1.0011299848556519,grad_norm: 0.6475404171545803, iteration: 458723
loss: 0.9954034686088562,grad_norm: 0.6676306359329387, iteration: 458724
loss: 1.016534686088562,grad_norm: 0.9999997791978694, iteration: 458725
loss: 1.016402006149292,grad_norm: 0.8957406456463097, iteration: 458726
loss: 1.0206279754638672,grad_norm: 0.8149356951588684, iteration: 458727
loss: 1.0137851238250732,grad_norm: 0.7063883211943446, iteration: 458728
loss: 1.013238787651062,grad_norm: 0.919392847140169, iteration: 458729
loss: 0.9584211707115173,grad_norm: 0.812107515853729, iteration: 458730
loss: 1.008500337600708,grad_norm: 0.7799738326300247, iteration: 458731
loss: 0.9782419204711914,grad_norm: 0.7220122537394862, iteration: 458732
loss: 1.0317620038986206,grad_norm: 0.7703370471212481, iteration: 458733
loss: 1.0221024751663208,grad_norm: 0.9999999954954447, iteration: 458734
loss: 0.9905096888542175,grad_norm: 0.65344960953618, iteration: 458735
loss: 1.0059309005737305,grad_norm: 0.8373545303756491, iteration: 458736
loss: 1.0163460969924927,grad_norm: 0.9999991237316199, iteration: 458737
loss: 0.980525553226471,grad_norm: 0.8719197646677328, iteration: 458738
loss: 0.9946954250335693,grad_norm: 0.9999993440827252, iteration: 458739
loss: 1.0288166999816895,grad_norm: 0.8820162493769085, iteration: 458740
loss: 1.0293822288513184,grad_norm: 0.6925153898276388, iteration: 458741
loss: 0.970223605632782,grad_norm: 0.6963859779834423, iteration: 458742
loss: 1.012961745262146,grad_norm: 0.9999993363134245, iteration: 458743
loss: 0.9708133339881897,grad_norm: 0.7369742102700267, iteration: 458744
loss: 0.9838284254074097,grad_norm: 0.8823644107533585, iteration: 458745
loss: 0.9931527376174927,grad_norm: 0.9999993753670339, iteration: 458746
loss: 0.9769686460494995,grad_norm: 0.8755100853585099, iteration: 458747
loss: 1.0110282897949219,grad_norm: 0.7189965463242014, iteration: 458748
loss: 0.9801308512687683,grad_norm: 0.9999996061110296, iteration: 458749
loss: 1.0198495388031006,grad_norm: 0.9999996245639095, iteration: 458750
loss: 0.9996138215065002,grad_norm: 0.9082999813684056, iteration: 458751
loss: 1.0852800607681274,grad_norm: 0.9999990866536024, iteration: 458752
loss: 0.9862110018730164,grad_norm: 0.7163654695942865, iteration: 458753
loss: 0.973095178604126,grad_norm: 0.7986954253944286, iteration: 458754
loss: 0.9664791822433472,grad_norm: 0.6783246768976056, iteration: 458755
loss: 1.0345737934112549,grad_norm: 0.9999994312757124, iteration: 458756
loss: 0.9866990447044373,grad_norm: 0.7416183976357313, iteration: 458757
loss: 0.9854164719581604,grad_norm: 0.7919299957889031, iteration: 458758
loss: 1.0068472623825073,grad_norm: 0.7435268534239888, iteration: 458759
loss: 1.0028741359710693,grad_norm: 0.907704166514814, iteration: 458760
loss: 0.9852307438850403,grad_norm: 0.7206264029878381, iteration: 458761
loss: 0.9897615313529968,grad_norm: 0.6311373984757523, iteration: 458762
loss: 1.0122472047805786,grad_norm: 0.6995861411417595, iteration: 458763
loss: 0.9863536357879639,grad_norm: 0.9749627564139297, iteration: 458764
loss: 1.0058906078338623,grad_norm: 0.6622850825785835, iteration: 458765
loss: 1.0179104804992676,grad_norm: 0.5999555617657173, iteration: 458766
loss: 0.9600114822387695,grad_norm: 0.7223439378049269, iteration: 458767
loss: 1.0116997957229614,grad_norm: 0.7801022428542352, iteration: 458768
loss: 1.011380910873413,grad_norm: 0.9999995793423023, iteration: 458769
loss: 1.0162967443466187,grad_norm: 0.9134154464891036, iteration: 458770
loss: 1.0090749263763428,grad_norm: 0.9945654212667357, iteration: 458771
loss: 1.0260436534881592,grad_norm: 0.9999995192979348, iteration: 458772
loss: 0.968745768070221,grad_norm: 0.6800704745987368, iteration: 458773
loss: 1.0026494264602661,grad_norm: 0.7458680447457604, iteration: 458774
loss: 1.0192872285842896,grad_norm: 0.7625701203966438, iteration: 458775
loss: 0.9874343276023865,grad_norm: 0.6182193457538507, iteration: 458776
loss: 0.9810874462127686,grad_norm: 0.8367716564532619, iteration: 458777
loss: 1.0049878358840942,grad_norm: 0.5421451640490171, iteration: 458778
loss: 0.9927806854248047,grad_norm: 0.7682829863951323, iteration: 458779
loss: 0.9725155830383301,grad_norm: 0.826499921370822, iteration: 458780
loss: 1.026943564414978,grad_norm: 0.9999999392881166, iteration: 458781
loss: 1.0109606981277466,grad_norm: 0.7861904943162741, iteration: 458782
loss: 0.953463613986969,grad_norm: 0.9999991286252246, iteration: 458783
loss: 1.014449119567871,grad_norm: 0.9999993553152793, iteration: 458784
loss: 1.0097627639770508,grad_norm: 0.9338719428880882, iteration: 458785
loss: 1.0157133340835571,grad_norm: 0.914659716188749, iteration: 458786
loss: 1.0082634687423706,grad_norm: 0.8710961460692404, iteration: 458787
loss: 0.9733076095581055,grad_norm: 0.8960997785873003, iteration: 458788
loss: 1.0019619464874268,grad_norm: 0.6757920389899481, iteration: 458789
loss: 0.996861457824707,grad_norm: 0.8737879165977811, iteration: 458790
loss: 1.0238077640533447,grad_norm: 0.8859372445655865, iteration: 458791
loss: 0.9996311664581299,grad_norm: 0.8863144575473375, iteration: 458792
loss: 0.959142804145813,grad_norm: 0.6274208164259427, iteration: 458793
loss: 1.0194120407104492,grad_norm: 0.7594357746278831, iteration: 458794
loss: 1.005706787109375,grad_norm: 0.8589026947917302, iteration: 458795
loss: 0.9920873641967773,grad_norm: 0.6930075976112557, iteration: 458796
loss: 0.9905242919921875,grad_norm: 0.9999994202669069, iteration: 458797
loss: 0.9758818745613098,grad_norm: 0.6199555964083893, iteration: 458798
loss: 0.9933134317398071,grad_norm: 0.6945446754036698, iteration: 458799
loss: 0.9951295852661133,grad_norm: 0.6960168902672534, iteration: 458800
loss: 1.0047496557235718,grad_norm: 0.6581249573760073, iteration: 458801
loss: 0.9905152916908264,grad_norm: 0.7561440005063809, iteration: 458802
loss: 0.9956361651420593,grad_norm: 0.7694648420845916, iteration: 458803
loss: 1.000800371170044,grad_norm: 0.9442855742765632, iteration: 458804
loss: 1.0061575174331665,grad_norm: 0.826356545654251, iteration: 458805
loss: 1.0201261043548584,grad_norm: 0.8267179267685857, iteration: 458806
loss: 0.9684712290763855,grad_norm: 0.8400263113798897, iteration: 458807
loss: 0.9916181564331055,grad_norm: 0.9218827375244707, iteration: 458808
loss: 1.020551323890686,grad_norm: 0.9999997997084242, iteration: 458809
loss: 1.0834686756134033,grad_norm: 0.7815804319813466, iteration: 458810
loss: 1.0183906555175781,grad_norm: 0.7282883916694303, iteration: 458811
loss: 1.0175594091415405,grad_norm: 0.815473047235921, iteration: 458812
loss: 0.9725951552391052,grad_norm: 0.8567689884980791, iteration: 458813
loss: 0.9807741641998291,grad_norm: 0.8013056361797448, iteration: 458814
loss: 1.0478190183639526,grad_norm: 0.9278182144482536, iteration: 458815
loss: 1.0044968128204346,grad_norm: 0.9999991460866771, iteration: 458816
loss: 0.9911738038063049,grad_norm: 0.774001713825018, iteration: 458817
loss: 1.0355526208877563,grad_norm: 0.8789117330996128, iteration: 458818
loss: 1.0042554140090942,grad_norm: 0.835222209831868, iteration: 458819
loss: 1.0025204420089722,grad_norm: 0.8874578581657181, iteration: 458820
loss: 1.0008333921432495,grad_norm: 0.7799331936598696, iteration: 458821
loss: 1.0025299787521362,grad_norm: 0.7666775212475773, iteration: 458822
loss: 1.0257173776626587,grad_norm: 0.8778067238994485, iteration: 458823
loss: 1.0292391777038574,grad_norm: 0.7543052405479936, iteration: 458824
loss: 1.0089555978775024,grad_norm: 0.7041615989023218, iteration: 458825
loss: 0.9940373301506042,grad_norm: 0.7728513865822022, iteration: 458826
loss: 0.9868627190589905,grad_norm: 0.5889901749623966, iteration: 458827
loss: 1.1015154123306274,grad_norm: 0.999999212541667, iteration: 458828
loss: 1.0454933643341064,grad_norm: 0.7297706257438583, iteration: 458829
loss: 0.9926338195800781,grad_norm: 0.7045909799717864, iteration: 458830
loss: 1.003543734550476,grad_norm: 0.8478751666024276, iteration: 458831
loss: 0.993815004825592,grad_norm: 0.8510975080888205, iteration: 458832
loss: 1.0288527011871338,grad_norm: 0.8678172554881176, iteration: 458833
loss: 0.9891017079353333,grad_norm: 0.7526321284041806, iteration: 458834
loss: 0.9861050248146057,grad_norm: 0.6485850234740727, iteration: 458835
loss: 1.002851963043213,grad_norm: 0.6716151705428247, iteration: 458836
loss: 0.981441080570221,grad_norm: 0.7361092203430286, iteration: 458837
loss: 0.9970888495445251,grad_norm: 0.735811513281076, iteration: 458838
loss: 0.9592400789260864,grad_norm: 0.7281960574875076, iteration: 458839
loss: 0.9781118631362915,grad_norm: 0.6418750539943312, iteration: 458840
loss: 0.989798903465271,grad_norm: 0.9999997770538128, iteration: 458841
loss: 0.9772121906280518,grad_norm: 0.7290020793991008, iteration: 458842
loss: 1.0074009895324707,grad_norm: 0.9999994162073178, iteration: 458843
loss: 1.029744267463684,grad_norm: 0.7168398178842553, iteration: 458844
loss: 0.9887367486953735,grad_norm: 0.6250298663033883, iteration: 458845
loss: 1.0046350955963135,grad_norm: 0.8030988973567447, iteration: 458846
loss: 1.0177310705184937,grad_norm: 0.8505648426007829, iteration: 458847
loss: 1.0179938077926636,grad_norm: 0.8280865977859448, iteration: 458848
loss: 0.9997486472129822,grad_norm: 0.7389402396563848, iteration: 458849
loss: 1.0042345523834229,grad_norm: 0.7827848599270062, iteration: 458850
loss: 0.9824729561805725,grad_norm: 0.9654507766078032, iteration: 458851
loss: 0.9270519614219666,grad_norm: 0.7367248429433274, iteration: 458852
loss: 0.9928514361381531,grad_norm: 0.9999990890501778, iteration: 458853
loss: 1.0299930572509766,grad_norm: 0.74812626970732, iteration: 458854
loss: 1.0071543455123901,grad_norm: 0.783355060406435, iteration: 458855
loss: 1.0004851818084717,grad_norm: 0.7639793009363672, iteration: 458856
loss: 1.0193347930908203,grad_norm: 0.9999999299794156, iteration: 458857
loss: 1.0180054903030396,grad_norm: 0.6908648392342076, iteration: 458858
loss: 0.9684169888496399,grad_norm: 0.7759135462452592, iteration: 458859
loss: 0.9807428121566772,grad_norm: 0.8493965589863504, iteration: 458860
loss: 1.0248732566833496,grad_norm: 0.9999990104441357, iteration: 458861
loss: 1.0092428922653198,grad_norm: 0.6736876656783787, iteration: 458862
loss: 1.0312879085540771,grad_norm: 0.9999996800293793, iteration: 458863
loss: 1.0189670324325562,grad_norm: 0.8190130879402184, iteration: 458864
loss: 1.0101441144943237,grad_norm: 0.6988471395069619, iteration: 458865
loss: 1.0658501386642456,grad_norm: 0.9906820716987281, iteration: 458866
loss: 1.0108246803283691,grad_norm: 0.6950110010206315, iteration: 458867
loss: 1.0060333013534546,grad_norm: 0.6492052354675903, iteration: 458868
loss: 0.9883801341056824,grad_norm: 0.7246295282344141, iteration: 458869
loss: 0.9586415886878967,grad_norm: 0.8687012399119098, iteration: 458870
loss: 1.0084046125411987,grad_norm: 0.7730977471279339, iteration: 458871
loss: 1.071221113204956,grad_norm: 0.8373973130734994, iteration: 458872
loss: 1.0018532276153564,grad_norm: 0.6761853663622702, iteration: 458873
loss: 1.0307403802871704,grad_norm: 0.7071215116229128, iteration: 458874
loss: 0.9915561676025391,grad_norm: 0.6937815378791964, iteration: 458875
loss: 1.0103375911712646,grad_norm: 0.7821016538340395, iteration: 458876
loss: 0.9869975447654724,grad_norm: 0.8516941064414902, iteration: 458877
loss: 1.0393012762069702,grad_norm: 0.7512172373523793, iteration: 458878
loss: 0.974101722240448,grad_norm: 0.7342069679009766, iteration: 458879
loss: 0.9969280958175659,grad_norm: 0.9196253561324726, iteration: 458880
loss: 0.9883468747138977,grad_norm: 0.7893930715288225, iteration: 458881
loss: 1.0233995914459229,grad_norm: 0.769011637437042, iteration: 458882
loss: 1.0180513858795166,grad_norm: 0.7259229823702096, iteration: 458883
loss: 0.9942797422409058,grad_norm: 0.7899481327266318, iteration: 458884
loss: 0.9717888832092285,grad_norm: 0.8725171501078086, iteration: 458885
loss: 1.0512404441833496,grad_norm: 0.9999999553616976, iteration: 458886
loss: 1.0087827444076538,grad_norm: 0.8050658708143719, iteration: 458887
loss: 0.9872260689735413,grad_norm: 0.6311489942109451, iteration: 458888
loss: 1.0250576734542847,grad_norm: 0.5705424948545935, iteration: 458889
loss: 0.988764226436615,grad_norm: 0.7665719225383116, iteration: 458890
loss: 1.0339051485061646,grad_norm: 0.905336174833547, iteration: 458891
loss: 0.9960498809814453,grad_norm: 0.7221874989039974, iteration: 458892
loss: 0.9759002327919006,grad_norm: 0.6155077947729801, iteration: 458893
loss: 0.982203483581543,grad_norm: 0.8412434215917513, iteration: 458894
loss: 0.9588167071342468,grad_norm: 0.6768556917184164, iteration: 458895
loss: 0.997259259223938,grad_norm: 0.7836066884928728, iteration: 458896
loss: 1.0255318880081177,grad_norm: 0.999999500163708, iteration: 458897
loss: 1.0260679721832275,grad_norm: 0.6995244225960677, iteration: 458898
loss: 0.9611369967460632,grad_norm: 0.8566788476029328, iteration: 458899
loss: 0.9920005202293396,grad_norm: 0.8952335325423176, iteration: 458900
loss: 0.9788694977760315,grad_norm: 0.9072855967536428, iteration: 458901
loss: 1.002077579498291,grad_norm: 0.675426636500172, iteration: 458902
loss: 0.9812383055686951,grad_norm: 0.7955936728256516, iteration: 458903
loss: 1.0209720134735107,grad_norm: 0.7286245133225112, iteration: 458904
loss: 0.933987021446228,grad_norm: 0.6519498660719832, iteration: 458905
loss: 1.0173351764678955,grad_norm: 0.8119904045916504, iteration: 458906
loss: 0.9997949004173279,grad_norm: 0.7425080933780274, iteration: 458907
loss: 1.0548087358474731,grad_norm: 0.9999990802772086, iteration: 458908
loss: 0.9875198006629944,grad_norm: 0.7431422313205648, iteration: 458909
loss: 0.9762172698974609,grad_norm: 0.6229489309597939, iteration: 458910
loss: 0.9918113946914673,grad_norm: 0.6051501091737775, iteration: 458911
loss: 0.9978001117706299,grad_norm: 0.9975648617771063, iteration: 458912
loss: 0.9743151664733887,grad_norm: 0.8525332101613121, iteration: 458913
loss: 1.0070738792419434,grad_norm: 0.7160256660596777, iteration: 458914
loss: 1.149880290031433,grad_norm: 0.9957515839998029, iteration: 458915
loss: 0.9803763628005981,grad_norm: 0.7326962823955215, iteration: 458916
loss: 1.008939504623413,grad_norm: 0.727980689998187, iteration: 458917
loss: 1.0067905187606812,grad_norm: 0.8352196743910562, iteration: 458918
loss: 0.9698345065116882,grad_norm: 0.7085567903356293, iteration: 458919
loss: 0.966376543045044,grad_norm: 0.7885540158707154, iteration: 458920
loss: 1.0108846426010132,grad_norm: 0.8746584983632598, iteration: 458921
loss: 1.0098694562911987,grad_norm: 0.7168562884745663, iteration: 458922
loss: 0.9982566237449646,grad_norm: 0.6410502320798329, iteration: 458923
loss: 0.9904618263244629,grad_norm: 0.7601352860350179, iteration: 458924
loss: 1.0189377069473267,grad_norm: 0.9999993927489831, iteration: 458925
loss: 0.9949312210083008,grad_norm: 0.8097154941958996, iteration: 458926
loss: 0.9804962277412415,grad_norm: 0.766443050528019, iteration: 458927
loss: 0.9849026203155518,grad_norm: 0.7702408145887578, iteration: 458928
loss: 0.9591172337532043,grad_norm: 0.8344870362866007, iteration: 458929
loss: 1.0284974575042725,grad_norm: 0.7248456334864155, iteration: 458930
loss: 1.0095716714859009,grad_norm: 0.8512392455547926, iteration: 458931
loss: 0.9787849187850952,grad_norm: 0.6947086052828989, iteration: 458932
loss: 0.9642232656478882,grad_norm: 0.7772472332703408, iteration: 458933
loss: 1.0227595567703247,grad_norm: 0.7142578030497422, iteration: 458934
loss: 0.9887693524360657,grad_norm: 0.6837285601673069, iteration: 458935
loss: 1.0230607986450195,grad_norm: 0.8734248856070885, iteration: 458936
loss: 1.0106894969940186,grad_norm: 0.9999992796629189, iteration: 458937
loss: 1.0215909481048584,grad_norm: 0.7952574637562564, iteration: 458938
loss: 1.0200634002685547,grad_norm: 0.7565820812156061, iteration: 458939
loss: 1.037887692451477,grad_norm: 0.8921900725461892, iteration: 458940
loss: 0.996832549571991,grad_norm: 0.7724953801631049, iteration: 458941
loss: 0.995311439037323,grad_norm: 0.9999995933146483, iteration: 458942
loss: 1.0410511493682861,grad_norm: 0.7069466419105245, iteration: 458943
loss: 1.0002825260162354,grad_norm: 0.7821138076321781, iteration: 458944
loss: 1.0131639242172241,grad_norm: 0.9999997840324406, iteration: 458945
loss: 1.004035234451294,grad_norm: 0.7763925496381084, iteration: 458946
loss: 1.1140100955963135,grad_norm: 0.9999994790553763, iteration: 458947
loss: 0.9726408123970032,grad_norm: 0.729174918707179, iteration: 458948
loss: 1.0005033016204834,grad_norm: 0.7948948660538785, iteration: 458949
loss: 1.040930151939392,grad_norm: 0.7114751549747589, iteration: 458950
loss: 1.0019314289093018,grad_norm: 0.5999114828934891, iteration: 458951
loss: 0.9632938504219055,grad_norm: 0.6539053603322929, iteration: 458952
loss: 0.9943111538887024,grad_norm: 0.9253138216574974, iteration: 458953
loss: 1.0094455480575562,grad_norm: 0.9345828725999619, iteration: 458954
loss: 1.0338479280471802,grad_norm: 0.8422221285087698, iteration: 458955
loss: 0.9823649525642395,grad_norm: 0.7075561891642604, iteration: 458956
loss: 0.9935027956962585,grad_norm: 0.7317928219592429, iteration: 458957
loss: 1.0045892000198364,grad_norm: 0.8928967335584654, iteration: 458958
loss: 1.017433524131775,grad_norm: 0.8806829671610616, iteration: 458959
loss: 1.015974998474121,grad_norm: 0.6891488627632404, iteration: 458960
loss: 0.9352602958679199,grad_norm: 0.7451183250575869, iteration: 458961
loss: 1.0105996131896973,grad_norm: 0.8494259031930913, iteration: 458962
loss: 1.002859354019165,grad_norm: 0.7505355246870147, iteration: 458963
loss: 0.9781426787376404,grad_norm: 0.8667145203973927, iteration: 458964
loss: 1.0429316759109497,grad_norm: 0.832081782127593, iteration: 458965
loss: 1.0279215574264526,grad_norm: 0.9999993480825459, iteration: 458966
loss: 1.1348756551742554,grad_norm: 0.9999999034282903, iteration: 458967
loss: 0.972834587097168,grad_norm: 0.803191536017295, iteration: 458968
loss: 1.0097516775131226,grad_norm: 0.7393027282115219, iteration: 458969
loss: 1.000418782234192,grad_norm: 0.8789808412121677, iteration: 458970
loss: 1.0088869333267212,grad_norm: 0.9999991713828987, iteration: 458971
loss: 1.0114223957061768,grad_norm: 0.8342435690461198, iteration: 458972
loss: 0.9853312373161316,grad_norm: 0.856405195781288, iteration: 458973
loss: 1.0030747652053833,grad_norm: 0.8577475800521311, iteration: 458974
loss: 1.001130223274231,grad_norm: 0.8050349995145173, iteration: 458975
loss: 0.9881241917610168,grad_norm: 0.7674313488035143, iteration: 458976
loss: 1.0079224109649658,grad_norm: 0.6365327139393486, iteration: 458977
loss: 0.9627964496612549,grad_norm: 0.7643057904492847, iteration: 458978
loss: 1.0003809928894043,grad_norm: 0.8804991370311253, iteration: 458979
loss: 1.0238125324249268,grad_norm: 0.9999993338963089, iteration: 458980
loss: 1.0118352174758911,grad_norm: 0.6753480597811016, iteration: 458981
loss: 1.0175278186798096,grad_norm: 0.9912654392804429, iteration: 458982
loss: 1.029360055923462,grad_norm: 0.9999999010795755, iteration: 458983
loss: 1.0030053853988647,grad_norm: 0.692653145409801, iteration: 458984
loss: 1.0065311193466187,grad_norm: 0.7858150318475384, iteration: 458985
loss: 1.0130665302276611,grad_norm: 0.8050417161225462, iteration: 458986
loss: 1.03152334690094,grad_norm: 0.8035545944560221, iteration: 458987
loss: 1.0882601737976074,grad_norm: 0.999999811125812, iteration: 458988
loss: 0.984657883644104,grad_norm: 0.808842504629773, iteration: 458989
loss: 0.9752079248428345,grad_norm: 0.788220081418371, iteration: 458990
loss: 1.044984221458435,grad_norm: 0.7246674609759622, iteration: 458991
loss: 0.9975136518478394,grad_norm: 0.7960176133013491, iteration: 458992
loss: 0.9776870608329773,grad_norm: 0.7124909337316363, iteration: 458993
loss: 1.0257269144058228,grad_norm: 0.79110676547055, iteration: 458994
loss: 1.020878791809082,grad_norm: 0.9266871337699095, iteration: 458995
loss: 1.006494164466858,grad_norm: 0.7964161229678326, iteration: 458996
loss: 1.0721937417984009,grad_norm: 0.9999990611465167, iteration: 458997
loss: 0.9801524877548218,grad_norm: 0.6258925409510274, iteration: 458998
loss: 1.0430456399917603,grad_norm: 0.6843382767904449, iteration: 458999
loss: 1.0167982578277588,grad_norm: 0.8527949625375886, iteration: 459000
loss: 0.9976826310157776,grad_norm: 0.9999997443967937, iteration: 459001
loss: 1.0059192180633545,grad_norm: 0.795394064503507, iteration: 459002
loss: 0.959453821182251,grad_norm: 0.7946530004499451, iteration: 459003
loss: 1.0302999019622803,grad_norm: 0.9751483838722799, iteration: 459004
loss: 1.0031658411026,grad_norm: 0.6834663636340065, iteration: 459005
loss: 0.9463418126106262,grad_norm: 0.7027428071114893, iteration: 459006
loss: 1.0670695304870605,grad_norm: 0.9999993098122043, iteration: 459007
loss: 0.9759772419929504,grad_norm: 0.8091537648982129, iteration: 459008
loss: 0.9880577325820923,grad_norm: 0.7022376479870133, iteration: 459009
loss: 1.0165197849273682,grad_norm: 0.742391333582044, iteration: 459010
loss: 0.9922424554824829,grad_norm: 0.7938915974963394, iteration: 459011
loss: 1.0185648202896118,grad_norm: 0.796281704832761, iteration: 459012
loss: 1.035646677017212,grad_norm: 0.812967830133173, iteration: 459013
loss: 1.1146267652511597,grad_norm: 0.9999995944391629, iteration: 459014
loss: 1.007885456085205,grad_norm: 0.6188722567853289, iteration: 459015
loss: 0.9569437503814697,grad_norm: 0.8032308379800392, iteration: 459016
loss: 0.9652290940284729,grad_norm: 0.8059237799811376, iteration: 459017
loss: 1.009804368019104,grad_norm: 0.6206261796481205, iteration: 459018
loss: 1.030518651008606,grad_norm: 0.9327727758324278, iteration: 459019
loss: 0.9955275058746338,grad_norm: 0.7185868847848452, iteration: 459020
loss: 0.9988263845443726,grad_norm: 0.7675326846768848, iteration: 459021
loss: 1.005706548690796,grad_norm: 0.8861561788865114, iteration: 459022
loss: 0.9880625009536743,grad_norm: 0.7070445070770993, iteration: 459023
loss: 0.979709804058075,grad_norm: 0.7301483060088054, iteration: 459024
loss: 1.0088552236557007,grad_norm: 0.624611684946186, iteration: 459025
loss: 0.9777779579162598,grad_norm: 0.8003240497130829, iteration: 459026
loss: 0.9997838139533997,grad_norm: 0.772922485312149, iteration: 459027
loss: 0.9989109039306641,grad_norm: 0.9999992722900597, iteration: 459028
loss: 0.9934737086296082,grad_norm: 0.7660636745214568, iteration: 459029
loss: 0.9938958883285522,grad_norm: 0.6185151744979833, iteration: 459030
loss: 0.9984363317489624,grad_norm: 0.7881274655656012, iteration: 459031
loss: 0.9831751585006714,grad_norm: 0.8108441128795217, iteration: 459032
loss: 0.9970823526382446,grad_norm: 0.799326808920095, iteration: 459033
loss: 0.9793143272399902,grad_norm: 0.8573656057141568, iteration: 459034
loss: 0.9874703884124756,grad_norm: 0.7787636571173205, iteration: 459035
loss: 0.9939830899238586,grad_norm: 0.8811509532994514, iteration: 459036
loss: 1.0319676399230957,grad_norm: 0.7403932734394846, iteration: 459037
loss: 0.9720868468284607,grad_norm: 0.7499621868861593, iteration: 459038
loss: 0.9958739280700684,grad_norm: 0.6993581324408134, iteration: 459039
loss: 1.0180964469909668,grad_norm: 0.9999992563887881, iteration: 459040
loss: 1.022117257118225,grad_norm: 0.7444174855442289, iteration: 459041
loss: 1.0016692876815796,grad_norm: 0.7491020828203102, iteration: 459042
loss: 1.0152279138565063,grad_norm: 0.7859920454002208, iteration: 459043
loss: 1.1133856773376465,grad_norm: 0.9999990888816985, iteration: 459044
loss: 1.0232797861099243,grad_norm: 0.7146642119438968, iteration: 459045
loss: 1.010990023612976,grad_norm: 0.7789140791533743, iteration: 459046
loss: 1.0030434131622314,grad_norm: 0.7676194259178564, iteration: 459047
loss: 1.0117194652557373,grad_norm: 0.7049633205927957, iteration: 459048
loss: 1.051377296447754,grad_norm: 0.9999990123096034, iteration: 459049
loss: 1.0264896154403687,grad_norm: 0.7458965450785415, iteration: 459050
loss: 1.0298256874084473,grad_norm: 0.9999999095779222, iteration: 459051
loss: 0.9754931926727295,grad_norm: 0.9591598338596219, iteration: 459052
loss: 0.9960783123970032,grad_norm: 0.8438371683713394, iteration: 459053
loss: 1.0241438150405884,grad_norm: 0.9999994241120168, iteration: 459054
loss: 0.9974556565284729,grad_norm: 0.698069379296881, iteration: 459055
loss: 0.9865963459014893,grad_norm: 0.7202327980109262, iteration: 459056
loss: 1.0138741731643677,grad_norm: 0.7488800747185022, iteration: 459057
loss: 1.0206159353256226,grad_norm: 0.6987490410732582, iteration: 459058
loss: 1.01139235496521,grad_norm: 0.7537173990785205, iteration: 459059
loss: 0.9869207143783569,grad_norm: 0.8503035772010147, iteration: 459060
loss: 0.9792736172676086,grad_norm: 0.8518445955371129, iteration: 459061
loss: 1.030472755432129,grad_norm: 0.7801226183233217, iteration: 459062
loss: 1.020104169845581,grad_norm: 0.822849203882515, iteration: 459063
loss: 0.9652003049850464,grad_norm: 0.7300735400906431, iteration: 459064
loss: 0.9785906076431274,grad_norm: 0.7290244055532124, iteration: 459065
loss: 0.9976189136505127,grad_norm: 0.6986380581552201, iteration: 459066
loss: 0.9949443936347961,grad_norm: 0.7543039673749604, iteration: 459067
loss: 1.0091733932495117,grad_norm: 0.8319968844219533, iteration: 459068
loss: 0.9975061416625977,grad_norm: 0.9999991634476764, iteration: 459069
loss: 1.0226871967315674,grad_norm: 0.679894231053972, iteration: 459070
loss: 1.0159813165664673,grad_norm: 0.8763530668945256, iteration: 459071
loss: 0.9954071044921875,grad_norm: 0.8734631344963586, iteration: 459072
loss: 0.9950246214866638,grad_norm: 0.7072085010921805, iteration: 459073
loss: 0.9575000405311584,grad_norm: 0.8138597031514577, iteration: 459074
loss: 1.0314587354660034,grad_norm: 0.8981345702451503, iteration: 459075
loss: 1.018679141998291,grad_norm: 0.7742595062402186, iteration: 459076
loss: 1.0978504419326782,grad_norm: 0.9999990757961622, iteration: 459077
loss: 0.9833502173423767,grad_norm: 0.7170123435198905, iteration: 459078
loss: 1.000467300415039,grad_norm: 0.9999994256106601, iteration: 459079
loss: 0.9903987050056458,grad_norm: 0.6838001911370429, iteration: 459080
loss: 1.0274593830108643,grad_norm: 0.7804870557313862, iteration: 459081
loss: 0.9924861788749695,grad_norm: 0.8669639297157185, iteration: 459082
loss: 1.0058220624923706,grad_norm: 0.8328144935282251, iteration: 459083
loss: 1.0231826305389404,grad_norm: 0.9999992203710073, iteration: 459084
loss: 0.9662643074989319,grad_norm: 0.7411532656682172, iteration: 459085
loss: 1.050887942314148,grad_norm: 0.7941016539373407, iteration: 459086
loss: 1.0356992483139038,grad_norm: 0.8482541956143521, iteration: 459087
loss: 0.9786151647567749,grad_norm: 0.8519060210103393, iteration: 459088
loss: 0.9806772470474243,grad_norm: 0.9199439594122165, iteration: 459089
loss: 0.9616780281066895,grad_norm: 0.9039774396301271, iteration: 459090
loss: 1.011415958404541,grad_norm: 0.7535343701010083, iteration: 459091
loss: 0.982117772102356,grad_norm: 0.7832928817082361, iteration: 459092
loss: 1.0368144512176514,grad_norm: 0.7357560765812817, iteration: 459093
loss: 1.0129833221435547,grad_norm: 0.6535748162481289, iteration: 459094
loss: 1.001105546951294,grad_norm: 0.8167427512021762, iteration: 459095
loss: 0.9963527917861938,grad_norm: 0.8068266878955324, iteration: 459096
loss: 1.0222597122192383,grad_norm: 0.701774077079607, iteration: 459097
loss: 1.0111443996429443,grad_norm: 0.7886868656976336, iteration: 459098
loss: 0.9854822754859924,grad_norm: 0.7945736861292583, iteration: 459099
loss: 1.024160385131836,grad_norm: 0.9999991319451397, iteration: 459100
loss: 0.9918021559715271,grad_norm: 0.8090209028354148, iteration: 459101
loss: 1.0513609647750854,grad_norm: 0.9999995835467937, iteration: 459102
loss: 1.0109201669692993,grad_norm: 0.7202794294241655, iteration: 459103
loss: 0.999816358089447,grad_norm: 0.7364792403624351, iteration: 459104
loss: 0.9687013030052185,grad_norm: 0.9300971330695469, iteration: 459105
loss: 1.0126434564590454,grad_norm: 0.8437909533539212, iteration: 459106
loss: 0.956712543964386,grad_norm: 0.7710918885286216, iteration: 459107
loss: 0.9969401359558105,grad_norm: 0.7158829323729912, iteration: 459108
loss: 1.0311107635498047,grad_norm: 0.901590413938288, iteration: 459109
loss: 0.9899889230728149,grad_norm: 0.9999994634853048, iteration: 459110
loss: 0.9911358952522278,grad_norm: 0.8580786601811349, iteration: 459111
loss: 0.9960691928863525,grad_norm: 0.9379877476487083, iteration: 459112
loss: 0.9810743927955627,grad_norm: 0.8616236791800052, iteration: 459113
loss: 0.9901574850082397,grad_norm: 0.6529572769875713, iteration: 459114
loss: 1.0018943548202515,grad_norm: 0.6847280757528417, iteration: 459115
loss: 1.0022021532058716,grad_norm: 0.7412391582436745, iteration: 459116
loss: 1.0173122882843018,grad_norm: 0.803194354831246, iteration: 459117
loss: 1.0230201482772827,grad_norm: 0.836201387638669, iteration: 459118
loss: 0.9920129776000977,grad_norm: 0.7580430363720845, iteration: 459119
loss: 0.9753512144088745,grad_norm: 0.8006672857513703, iteration: 459120
loss: 1.0300183296203613,grad_norm: 0.7968458167147213, iteration: 459121
loss: 0.9724838733673096,grad_norm: 0.7543480420067912, iteration: 459122
loss: 1.0147788524627686,grad_norm: 0.7975251299054466, iteration: 459123
loss: 1.0220603942871094,grad_norm: 0.8875067526836814, iteration: 459124
loss: 1.01576566696167,grad_norm: 0.823372363386615, iteration: 459125
loss: 1.0497843027114868,grad_norm: 0.967247066647854, iteration: 459126
loss: 0.9595114588737488,grad_norm: 0.6924533362845636, iteration: 459127
loss: 1.026468276977539,grad_norm: 0.8205013255654541, iteration: 459128
loss: 1.0024572610855103,grad_norm: 0.6500941086212447, iteration: 459129
loss: 1.0170453786849976,grad_norm: 0.7435811397096007, iteration: 459130
loss: 1.043558955192566,grad_norm: 0.9764152261494244, iteration: 459131
loss: 1.0127090215682983,grad_norm: 0.9636334074925371, iteration: 459132
loss: 1.0140265226364136,grad_norm: 0.7809560726724105, iteration: 459133
loss: 1.0373222827911377,grad_norm: 0.9999999479653722, iteration: 459134
loss: 1.038913369178772,grad_norm: 0.7016257252074397, iteration: 459135
loss: 0.9896573424339294,grad_norm: 0.7687478263058948, iteration: 459136
loss: 0.9954536557197571,grad_norm: 0.6238826536230717, iteration: 459137
loss: 0.996358335018158,grad_norm: 0.8454714528619189, iteration: 459138
loss: 1.0023741722106934,grad_norm: 0.7483530279208821, iteration: 459139
loss: 0.989841639995575,grad_norm: 0.6794863401054448, iteration: 459140
loss: 0.9694916009902954,grad_norm: 0.7608081264066628, iteration: 459141
loss: 0.9813625812530518,grad_norm: 0.8612897728996769, iteration: 459142
loss: 1.033623218536377,grad_norm: 0.9999993264036889, iteration: 459143
loss: 1.028429388999939,grad_norm: 0.7449390990714837, iteration: 459144
loss: 1.0614567995071411,grad_norm: 0.9999994210108569, iteration: 459145
loss: 1.016593098640442,grad_norm: 0.7479038207645323, iteration: 459146
loss: 0.9885219931602478,grad_norm: 0.8340134552065954, iteration: 459147
loss: 1.0178238153457642,grad_norm: 0.919332334880975, iteration: 459148
loss: 1.0029107332229614,grad_norm: 0.946109696595796, iteration: 459149
loss: 0.9877748489379883,grad_norm: 0.9769694617611927, iteration: 459150
loss: 0.995220422744751,grad_norm: 0.6895142608942554, iteration: 459151
loss: 0.9982284307479858,grad_norm: 0.8849390255454512, iteration: 459152
loss: 1.0824350118637085,grad_norm: 0.8599892691425101, iteration: 459153
loss: 1.0108445882797241,grad_norm: 0.7474189912095838, iteration: 459154
loss: 0.9997622966766357,grad_norm: 0.8309257349354813, iteration: 459155
loss: 1.0171358585357666,grad_norm: 0.9999993944068618, iteration: 459156
loss: 1.0315098762512207,grad_norm: 0.9999996901285856, iteration: 459157
loss: 0.9782783389091492,grad_norm: 0.8133182448006694, iteration: 459158
loss: 1.0098185539245605,grad_norm: 0.9999991199505982, iteration: 459159
loss: 1.0023313760757446,grad_norm: 0.7221654382736414, iteration: 459160
loss: 1.005843162536621,grad_norm: 0.727514144109056, iteration: 459161
loss: 1.0245953798294067,grad_norm: 0.7240673322857937, iteration: 459162
loss: 1.022638201713562,grad_norm: 0.8693830429993323, iteration: 459163
loss: 0.9827530980110168,grad_norm: 0.6804962178313405, iteration: 459164
loss: 1.0624332427978516,grad_norm: 0.6913442799059382, iteration: 459165
loss: 1.044665813446045,grad_norm: 0.9999994122014892, iteration: 459166
loss: 0.9813404679298401,grad_norm: 0.6599175494163604, iteration: 459167
loss: 0.9888746738433838,grad_norm: 0.721468508176417, iteration: 459168
loss: 1.0030733346939087,grad_norm: 0.7293810443003907, iteration: 459169
loss: 0.963413655757904,grad_norm: 0.7228162000912471, iteration: 459170
loss: 1.0281745195388794,grad_norm: 0.7361286044228939, iteration: 459171
loss: 1.0317583084106445,grad_norm: 0.86899557431998, iteration: 459172
loss: 0.961978018283844,grad_norm: 0.7567349839950336, iteration: 459173
loss: 0.9871808886528015,grad_norm: 0.8235296754030236, iteration: 459174
loss: 1.017986536026001,grad_norm: 0.726040937056247, iteration: 459175
loss: 1.0133658647537231,grad_norm: 0.8126251870014017, iteration: 459176
loss: 0.9848487377166748,grad_norm: 0.8986712341826433, iteration: 459177
loss: 1.0197923183441162,grad_norm: 0.7796675639214744, iteration: 459178
loss: 1.0023088455200195,grad_norm: 0.7018797019382288, iteration: 459179
loss: 1.0028117895126343,grad_norm: 0.7108478246024916, iteration: 459180
loss: 1.043644905090332,grad_norm: 0.9999993048774186, iteration: 459181
loss: 0.9794870615005493,grad_norm: 0.7855228145799323, iteration: 459182
loss: 0.9772970676422119,grad_norm: 0.7921001709878664, iteration: 459183
loss: 0.9872840642929077,grad_norm: 0.7261899140194054, iteration: 459184
loss: 1.0034340620040894,grad_norm: 0.7171263769661559, iteration: 459185
loss: 1.0310790538787842,grad_norm: 0.8178898163096997, iteration: 459186
loss: 1.0198845863342285,grad_norm: 0.9999993641150801, iteration: 459187
loss: 0.9609280824661255,grad_norm: 0.738929939463534, iteration: 459188
loss: 0.9830839037895203,grad_norm: 0.722042748715579, iteration: 459189
loss: 0.9956384301185608,grad_norm: 0.7723245225911257, iteration: 459190
loss: 1.0162272453308105,grad_norm: 0.7643411171046117, iteration: 459191
loss: 1.0077775716781616,grad_norm: 0.7989341224136931, iteration: 459192
loss: 1.0225088596343994,grad_norm: 0.6176238544903343, iteration: 459193
loss: 0.9643242359161377,grad_norm: 0.8301314838759333, iteration: 459194
loss: 1.0145673751831055,grad_norm: 0.921306079157583, iteration: 459195
loss: 1.0120443105697632,grad_norm: 0.8191835158583827, iteration: 459196
loss: 0.987790584564209,grad_norm: 0.7353079345084041, iteration: 459197
loss: 1.0103843212127686,grad_norm: 0.9999996874467416, iteration: 459198
loss: 0.9937015175819397,grad_norm: 0.8467722631079272, iteration: 459199
loss: 0.9938157200813293,grad_norm: 0.8057151584838669, iteration: 459200
loss: 1.0119518041610718,grad_norm: 0.9999992834769188, iteration: 459201
loss: 0.9804862141609192,grad_norm: 0.7777453297063049, iteration: 459202
loss: 0.999449610710144,grad_norm: 0.7073686135568441, iteration: 459203
loss: 0.9991649985313416,grad_norm: 0.7491291724590816, iteration: 459204
loss: 1.0097585916519165,grad_norm: 0.7522294834025665, iteration: 459205
loss: 1.0368353128433228,grad_norm: 0.9999996114446871, iteration: 459206
loss: 0.981451153755188,grad_norm: 0.8044464390685178, iteration: 459207
loss: 0.994111180305481,grad_norm: 0.7397049526528793, iteration: 459208
loss: 1.0753110647201538,grad_norm: 0.9999992969046879, iteration: 459209
loss: 0.9717517495155334,grad_norm: 0.7445045038320739, iteration: 459210
loss: 0.9806255102157593,grad_norm: 0.7099899684070454, iteration: 459211
loss: 0.9762070178985596,grad_norm: 0.7524591041279982, iteration: 459212
loss: 0.9734483957290649,grad_norm: 0.7399443887159584, iteration: 459213
loss: 0.9925474524497986,grad_norm: 0.8081860809575833, iteration: 459214
loss: 0.9905504584312439,grad_norm: 0.8750283779653063, iteration: 459215
loss: 1.0371061563491821,grad_norm: 0.9989020674318941, iteration: 459216
loss: 0.9911485314369202,grad_norm: 0.6531992413959619, iteration: 459217
loss: 0.9976537227630615,grad_norm: 0.6887857808247133, iteration: 459218
loss: 0.9987390637397766,grad_norm: 0.7038803611907868, iteration: 459219
loss: 0.9619148373603821,grad_norm: 0.7634711393167368, iteration: 459220
loss: 0.9597466588020325,grad_norm: 0.7908205081930999, iteration: 459221
loss: 0.9936674237251282,grad_norm: 0.8027840982350968, iteration: 459222
loss: 0.9948869943618774,grad_norm: 0.9024769271662724, iteration: 459223
loss: 0.9994685649871826,grad_norm: 0.8129000675783755, iteration: 459224
loss: 0.9711098074913025,grad_norm: 0.9037964163258926, iteration: 459225
loss: 1.0028942823410034,grad_norm: 0.7204945333176618, iteration: 459226
loss: 1.043959379196167,grad_norm: 0.9999992016163637, iteration: 459227
loss: 0.9701403379440308,grad_norm: 0.8723501410738355, iteration: 459228
loss: 1.0112237930297852,grad_norm: 0.7647422338549295, iteration: 459229
loss: 1.0295110940933228,grad_norm: 0.8427682019818737, iteration: 459230
loss: 0.9769940376281738,grad_norm: 0.7998118192046911, iteration: 459231
loss: 1.0324050188064575,grad_norm: 0.9999993301556284, iteration: 459232
loss: 1.027563214302063,grad_norm: 0.835420187168584, iteration: 459233
loss: 1.0565043687820435,grad_norm: 0.9146356239691821, iteration: 459234
loss: 1.0080305337905884,grad_norm: 0.9999993107233968, iteration: 459235
loss: 0.98930823802948,grad_norm: 0.7504713403311806, iteration: 459236
loss: 0.9752815961837769,grad_norm: 0.9999991380535316, iteration: 459237
loss: 1.0124092102050781,grad_norm: 0.6897550512618199, iteration: 459238
loss: 0.9996367692947388,grad_norm: 0.7974809973889108, iteration: 459239
loss: 1.0917483568191528,grad_norm: 0.9999991860987024, iteration: 459240
loss: 0.9850409030914307,grad_norm: 0.8013722793134362, iteration: 459241
loss: 1.019382357597351,grad_norm: 0.7848130466775555, iteration: 459242
loss: 1.0083657503128052,grad_norm: 0.6462974460874911, iteration: 459243
loss: 0.9895793199539185,grad_norm: 0.7519408619030483, iteration: 459244
loss: 0.9571805596351624,grad_norm: 0.7215026985049684, iteration: 459245
loss: 1.019854187965393,grad_norm: 0.7527334797885382, iteration: 459246
loss: 1.016783595085144,grad_norm: 0.9866099780147563, iteration: 459247
loss: 1.0212547779083252,grad_norm: 0.8369504791433142, iteration: 459248
loss: 0.963539183139801,grad_norm: 0.7405636929192279, iteration: 459249
loss: 1.0070927143096924,grad_norm: 0.999999715057976, iteration: 459250
loss: 0.9963476657867432,grad_norm: 0.8203141028878473, iteration: 459251
loss: 1.0083671808242798,grad_norm: 0.8003277869452813, iteration: 459252
loss: 1.0193525552749634,grad_norm: 0.7141139729247536, iteration: 459253
loss: 0.9745107293128967,grad_norm: 0.7798058504169008, iteration: 459254
loss: 1.011272668838501,grad_norm: 0.9999999647145632, iteration: 459255
loss: 1.0097206830978394,grad_norm: 0.6703934415263042, iteration: 459256
loss: 1.002332091331482,grad_norm: 0.8810543813557746, iteration: 459257
loss: 0.9938438534736633,grad_norm: 0.6153846219642456, iteration: 459258
loss: 1.0192315578460693,grad_norm: 0.8561927214546292, iteration: 459259
loss: 0.9911231398582458,grad_norm: 0.8754727513668653, iteration: 459260
loss: 1.0210891962051392,grad_norm: 0.7567141920899729, iteration: 459261
loss: 0.9707667827606201,grad_norm: 0.9649834417352117, iteration: 459262
loss: 1.032536268234253,grad_norm: 0.9999995958283815, iteration: 459263
loss: 1.0241312980651855,grad_norm: 0.8731232625949344, iteration: 459264
loss: 1.0199683904647827,grad_norm: 0.9206728310910167, iteration: 459265
loss: 0.9902124404907227,grad_norm: 0.7722863929198615, iteration: 459266
loss: 1.0305970907211304,grad_norm: 0.7999415591531674, iteration: 459267
loss: 1.0328675508499146,grad_norm: 0.8878316929718167, iteration: 459268
loss: 0.9874585270881653,grad_norm: 0.9352450237582414, iteration: 459269
loss: 0.9980912208557129,grad_norm: 0.9999992906691993, iteration: 459270
loss: 1.016584038734436,grad_norm: 0.9999998913652851, iteration: 459271
loss: 1.045253038406372,grad_norm: 0.8398422231274202, iteration: 459272
loss: 1.0051932334899902,grad_norm: 0.8236568636420518, iteration: 459273
loss: 1.009192943572998,grad_norm: 0.8444911919532536, iteration: 459274
loss: 1.027005672454834,grad_norm: 0.9999997482762447, iteration: 459275
loss: 1.0228391885757446,grad_norm: 0.764549109992576, iteration: 459276
loss: 0.9929468035697937,grad_norm: 0.6726954953528491, iteration: 459277
loss: 1.0380628108978271,grad_norm: 0.7008899677665461, iteration: 459278
loss: 0.9675324559211731,grad_norm: 0.7534657664842789, iteration: 459279
loss: 0.9821942448616028,grad_norm: 0.6877957020275528, iteration: 459280
loss: 0.9811835885047913,grad_norm: 0.7458805903284426, iteration: 459281
loss: 1.0051510334014893,grad_norm: 0.9999989541531291, iteration: 459282
loss: 1.0328929424285889,grad_norm: 0.7108756928354019, iteration: 459283
loss: 0.9879224896430969,grad_norm: 0.7614361191169554, iteration: 459284
loss: 1.0092170238494873,grad_norm: 0.7257091889209766, iteration: 459285
loss: 0.983102023601532,grad_norm: 0.8861104162465026, iteration: 459286
loss: 0.9874888062477112,grad_norm: 0.6300902586738467, iteration: 459287
loss: 1.0163507461547852,grad_norm: 0.7003843716521366, iteration: 459288
loss: 0.9811966419219971,grad_norm: 0.7281047417100177, iteration: 459289
loss: 0.9758370518684387,grad_norm: 0.5896746777329971, iteration: 459290
loss: 0.9861473441123962,grad_norm: 0.8927748857691798, iteration: 459291
loss: 0.9963760375976562,grad_norm: 0.9065520239315864, iteration: 459292
loss: 0.9904793500900269,grad_norm: 0.7043790257178176, iteration: 459293
loss: 1.0553925037384033,grad_norm: 0.9999999426080286, iteration: 459294
loss: 0.9879342913627625,grad_norm: 0.6977183470726113, iteration: 459295
loss: 1.0057706832885742,grad_norm: 0.9999992452377592, iteration: 459296
loss: 1.0193489789962769,grad_norm: 0.6485031166689327, iteration: 459297
loss: 0.9640251994132996,grad_norm: 0.8338017561613741, iteration: 459298
loss: 1.0173572301864624,grad_norm: 0.9999991424640344, iteration: 459299
loss: 0.9740978479385376,grad_norm: 0.7396708277837494, iteration: 459300
loss: 0.9877281785011292,grad_norm: 0.849515575258323, iteration: 459301
loss: 1.0160096883773804,grad_norm: 0.7422277434252248, iteration: 459302
loss: 1.0117167234420776,grad_norm: 0.6828365333711462, iteration: 459303
loss: 1.0210472345352173,grad_norm: 0.6911089272097687, iteration: 459304
loss: 1.0297770500183105,grad_norm: 0.8208797211738478, iteration: 459305
loss: 0.9987139701843262,grad_norm: 0.697705810946034, iteration: 459306
loss: 0.9678258895874023,grad_norm: 0.8023265557058017, iteration: 459307
loss: 1.0310786962509155,grad_norm: 0.7364926567232795, iteration: 459308
loss: 1.0065065622329712,grad_norm: 0.8919138884127606, iteration: 459309
loss: 0.9884855151176453,grad_norm: 0.68185679887836, iteration: 459310
loss: 1.052161693572998,grad_norm: 0.8524522819463247, iteration: 459311
loss: 0.9666731953620911,grad_norm: 0.999999633609696, iteration: 459312
loss: 0.9890764355659485,grad_norm: 0.6477107712310944, iteration: 459313
loss: 1.0122042894363403,grad_norm: 0.8522590734689101, iteration: 459314
loss: 0.9714680910110474,grad_norm: 0.8914862566125331, iteration: 459315
loss: 1.0168105363845825,grad_norm: 0.8409598719895716, iteration: 459316
loss: 1.0145256519317627,grad_norm: 0.8718891480363401, iteration: 459317
loss: 1.0071450471878052,grad_norm: 0.7423863934844974, iteration: 459318
loss: 0.9955593943595886,grad_norm: 0.912083054553055, iteration: 459319
loss: 1.0195646286010742,grad_norm: 0.7943948529029677, iteration: 459320
loss: 1.0111660957336426,grad_norm: 0.7301302494045052, iteration: 459321
loss: 1.0092711448669434,grad_norm: 0.6986905687070223, iteration: 459322
loss: 0.9950841665267944,grad_norm: 0.9718511949004839, iteration: 459323
loss: 0.958392858505249,grad_norm: 0.9453917728410304, iteration: 459324
loss: 0.9957104325294495,grad_norm: 0.6899995578359003, iteration: 459325
loss: 0.9787681698799133,grad_norm: 0.8134347993191579, iteration: 459326
loss: 1.0165554285049438,grad_norm: 0.8325409481260633, iteration: 459327
loss: 1.0128371715545654,grad_norm: 0.7177364697255406, iteration: 459328
loss: 1.0238580703735352,grad_norm: 0.7056873849744493, iteration: 459329
loss: 1.0129754543304443,grad_norm: 0.845419107346936, iteration: 459330
loss: 0.9916974306106567,grad_norm: 0.7297873843062486, iteration: 459331
loss: 0.993344783782959,grad_norm: 0.9999991753518958, iteration: 459332
loss: 0.9474461674690247,grad_norm: 0.7521293947211556, iteration: 459333
loss: 0.9946466684341431,grad_norm: 0.7457563059730743, iteration: 459334
loss: 0.9545118808746338,grad_norm: 0.8323279935690797, iteration: 459335
loss: 1.001336932182312,grad_norm: 0.6379486379410935, iteration: 459336
loss: 1.0423740148544312,grad_norm: 0.691133559433847, iteration: 459337
loss: 1.0071732997894287,grad_norm: 0.8137878700883886, iteration: 459338
loss: 1.0215662717819214,grad_norm: 0.7396801789377184, iteration: 459339
loss: 0.9998231530189514,grad_norm: 0.7231118442632968, iteration: 459340
loss: 1.0251940488815308,grad_norm: 0.9999991289095924, iteration: 459341
loss: 1.0001533031463623,grad_norm: 0.8388364620393198, iteration: 459342
loss: 1.0072757005691528,grad_norm: 0.9999989816502447, iteration: 459343
loss: 1.122146487236023,grad_norm: 0.9855033077365681, iteration: 459344
loss: 0.9990772008895874,grad_norm: 0.717808342626748, iteration: 459345
loss: 1.0393610000610352,grad_norm: 0.8330811237353162, iteration: 459346
loss: 0.9976533055305481,grad_norm: 0.8169311607978875, iteration: 459347
loss: 1.018394947052002,grad_norm: 0.9128156247345055, iteration: 459348
loss: 1.0291831493377686,grad_norm: 0.8053960140899892, iteration: 459349
loss: 1.000375747680664,grad_norm: 0.6924227649647725, iteration: 459350
loss: 0.9575583934783936,grad_norm: 0.776435658395677, iteration: 459351
loss: 0.973105788230896,grad_norm: 0.8456087280295408, iteration: 459352
loss: 1.0531624555587769,grad_norm: 0.9999997264850822, iteration: 459353
loss: 1.0647096633911133,grad_norm: 0.7385895923352509, iteration: 459354
loss: 0.9695191383361816,grad_norm: 0.8984064699562547, iteration: 459355
loss: 0.9802959561347961,grad_norm: 0.8271734753756762, iteration: 459356
loss: 1.010909080505371,grad_norm: 0.8540335047901852, iteration: 459357
loss: 0.9746498465538025,grad_norm: 0.879562173723829, iteration: 459358
loss: 1.0307649374008179,grad_norm: 0.6802836498184384, iteration: 459359
loss: 1.0217934846878052,grad_norm: 0.999999477090565, iteration: 459360
loss: 0.9417259097099304,grad_norm: 0.8031371160574353, iteration: 459361
loss: 1.0115910768508911,grad_norm: 0.8484089172394563, iteration: 459362
loss: 1.0306533575057983,grad_norm: 0.8462126392235542, iteration: 459363
loss: 1.011525273323059,grad_norm: 0.7418092585125429, iteration: 459364
loss: 0.9838892817497253,grad_norm: 0.8478864370314745, iteration: 459365
loss: 1.0243191719055176,grad_norm: 0.9513084284152319, iteration: 459366
loss: 1.0121076107025146,grad_norm: 0.7717641683669144, iteration: 459367
loss: 0.9715214967727661,grad_norm: 0.7722792396215048, iteration: 459368
loss: 1.0211427211761475,grad_norm: 0.8489701478484813, iteration: 459369
loss: 0.9796561002731323,grad_norm: 0.7482994682905825, iteration: 459370
loss: 0.9473283290863037,grad_norm: 0.7372109354805293, iteration: 459371
loss: 1.0066319704055786,grad_norm: 0.8904772083282048, iteration: 459372
loss: 1.0245953798294067,grad_norm: 0.7259611752406359, iteration: 459373
loss: 0.9985713362693787,grad_norm: 0.8066707142178849, iteration: 459374
loss: 1.0364179611206055,grad_norm: 0.9999992418963705, iteration: 459375
loss: 0.9695383906364441,grad_norm: 0.8227653737336962, iteration: 459376
loss: 0.9844463467597961,grad_norm: 0.783771905076546, iteration: 459377
loss: 1.0228707790374756,grad_norm: 0.9634887248392291, iteration: 459378
loss: 1.0016347169876099,grad_norm: 0.7578980190133491, iteration: 459379
loss: 0.9713311791419983,grad_norm: 0.6754165809531253, iteration: 459380
loss: 0.9742534160614014,grad_norm: 0.7892254009742866, iteration: 459381
loss: 0.9942543506622314,grad_norm: 0.9999989493454196, iteration: 459382
loss: 1.0129231214523315,grad_norm: 0.8281959901214859, iteration: 459383
loss: 1.0071827173233032,grad_norm: 0.8578835674239194, iteration: 459384
loss: 1.0139265060424805,grad_norm: 0.7548996601658492, iteration: 459385
loss: 0.9611977934837341,grad_norm: 0.5875474446543388, iteration: 459386
loss: 1.0136781930923462,grad_norm: 0.9999995473595298, iteration: 459387
loss: 0.9938695430755615,grad_norm: 0.8221913079937514, iteration: 459388
loss: 0.9906483888626099,grad_norm: 0.8038934183524327, iteration: 459389
loss: 1.0124009847640991,grad_norm: 0.7566274126588214, iteration: 459390
loss: 0.9969117641448975,grad_norm: 0.7800573129014432, iteration: 459391
loss: 1.0146211385726929,grad_norm: 0.7422060782571317, iteration: 459392
loss: 0.985273540019989,grad_norm: 0.7562220307346099, iteration: 459393
loss: 0.9892008304595947,grad_norm: 0.7593241388135915, iteration: 459394
loss: 1.0672411918640137,grad_norm: 0.9305814198558059, iteration: 459395
loss: 1.0121229887008667,grad_norm: 0.6193224926878129, iteration: 459396
loss: 0.9961677193641663,grad_norm: 0.8505576000897874, iteration: 459397
loss: 0.965483546257019,grad_norm: 0.7262796840899618, iteration: 459398
loss: 0.9979289174079895,grad_norm: 0.7120543312230425, iteration: 459399
loss: 1.0208122730255127,grad_norm: 0.9908740818764986, iteration: 459400
loss: 0.9633721709251404,grad_norm: 0.713916368683368, iteration: 459401
loss: 0.9908515810966492,grad_norm: 0.7769043481076886, iteration: 459402
loss: 1.0500787496566772,grad_norm: 0.7100526617414726, iteration: 459403
loss: 1.0042879581451416,grad_norm: 0.9545641517088755, iteration: 459404
loss: 0.9930023550987244,grad_norm: 0.677635573363543, iteration: 459405
loss: 1.029409408569336,grad_norm: 0.7709483497312803, iteration: 459406
loss: 1.0729957818984985,grad_norm: 0.999999464471528, iteration: 459407
loss: 1.0954011678695679,grad_norm: 0.6821437496421656, iteration: 459408
loss: 0.9623862504959106,grad_norm: 0.9239867373981931, iteration: 459409
loss: 1.0228917598724365,grad_norm: 0.7238140626052995, iteration: 459410
loss: 1.0105332136154175,grad_norm: 0.7371192415039421, iteration: 459411
loss: 1.013890266418457,grad_norm: 0.7702361549778307, iteration: 459412
loss: 1.1078921556472778,grad_norm: 0.9999993703441361, iteration: 459413
loss: 0.9842894077301025,grad_norm: 0.8269774494539023, iteration: 459414
loss: 1.0347298383712769,grad_norm: 0.768765893519282, iteration: 459415
loss: 0.9710366129875183,grad_norm: 0.7075251209792803, iteration: 459416
loss: 1.0144200325012207,grad_norm: 0.786046974366382, iteration: 459417
loss: 1.0458883047103882,grad_norm: 0.8766949527996306, iteration: 459418
loss: 1.0227174758911133,grad_norm: 0.7515356740673044, iteration: 459419
loss: 1.0202460289001465,grad_norm: 0.799990300497631, iteration: 459420
loss: 1.0004607439041138,grad_norm: 0.8193574715388374, iteration: 459421
loss: 0.9818804264068604,grad_norm: 0.7666845113373083, iteration: 459422
loss: 1.030572772026062,grad_norm: 0.8181196616547267, iteration: 459423
loss: 0.9867459535598755,grad_norm: 0.892349995811615, iteration: 459424
loss: 1.0361577272415161,grad_norm: 0.8171706665451229, iteration: 459425
loss: 1.0224320888519287,grad_norm: 0.9999991479134384, iteration: 459426
loss: 0.9897615313529968,grad_norm: 0.7744080996971107, iteration: 459427
loss: 0.9836909770965576,grad_norm: 0.6767729563353836, iteration: 459428
loss: 1.0117712020874023,grad_norm: 0.7890473777235306, iteration: 459429
loss: 1.0068390369415283,grad_norm: 0.9999991330942782, iteration: 459430
loss: 1.0322656631469727,grad_norm: 0.7314128900605522, iteration: 459431
loss: 0.9781927466392517,grad_norm: 0.7490858341187111, iteration: 459432
loss: 0.9645840525627136,grad_norm: 0.8108140816965498, iteration: 459433
loss: 0.9832100868225098,grad_norm: 0.9999992380791549, iteration: 459434
loss: 0.9479585886001587,grad_norm: 0.8852530924955146, iteration: 459435
loss: 0.9885509014129639,grad_norm: 0.6444836641518338, iteration: 459436
loss: 0.9649505615234375,grad_norm: 0.7570599263576729, iteration: 459437
loss: 0.965214192867279,grad_norm: 0.6912218422948916, iteration: 459438
loss: 0.9925525188446045,grad_norm: 0.7679441592524872, iteration: 459439
loss: 1.0121712684631348,grad_norm: 0.700699326904407, iteration: 459440
loss: 0.9970748424530029,grad_norm: 0.7597741612052551, iteration: 459441
loss: 1.0179229974746704,grad_norm: 0.9999991421364027, iteration: 459442
loss: 0.9762774705886841,grad_norm: 0.8043510344065228, iteration: 459443
loss: 1.0171093940734863,grad_norm: 0.7653294098363291, iteration: 459444
loss: 1.0009664297103882,grad_norm: 0.8996943886869704, iteration: 459445
loss: 0.9895532727241516,grad_norm: 0.7875075926862004, iteration: 459446
loss: 1.0098421573638916,grad_norm: 0.8238854078346269, iteration: 459447
loss: 1.01958167552948,grad_norm: 0.758914319951838, iteration: 459448
loss: 1.0157220363616943,grad_norm: 0.7262916289368044, iteration: 459449
loss: 1.0174932479858398,grad_norm: 0.8194604131515251, iteration: 459450
loss: 0.9705055356025696,grad_norm: 0.7053900475490977, iteration: 459451
loss: 1.0217775106430054,grad_norm: 0.710873668890743, iteration: 459452
loss: 0.9737356305122375,grad_norm: 0.8666916580370665, iteration: 459453
loss: 0.9981569647789001,grad_norm: 0.7672497330296218, iteration: 459454
loss: 1.0012028217315674,grad_norm: 0.7763521329267223, iteration: 459455
loss: 1.0008665323257446,grad_norm: 0.7910857948587986, iteration: 459456
loss: 0.9882766008377075,grad_norm: 0.7548415373580896, iteration: 459457
loss: 0.9760383367538452,grad_norm: 0.6939070460454803, iteration: 459458
loss: 1.0284571647644043,grad_norm: 0.874452404095828, iteration: 459459
loss: 0.964188277721405,grad_norm: 0.9174984220191725, iteration: 459460
loss: 0.9715537428855896,grad_norm: 0.77579051522925, iteration: 459461
loss: 1.0399353504180908,grad_norm: 0.7944612608179192, iteration: 459462
loss: 1.0341105461120605,grad_norm: 0.8166570085485425, iteration: 459463
loss: 1.0031437873840332,grad_norm: 0.8963903672048356, iteration: 459464
loss: 0.9719167947769165,grad_norm: 0.7863634674151891, iteration: 459465
loss: 1.024564266204834,grad_norm: 0.8703500524129448, iteration: 459466
loss: 1.0361639261245728,grad_norm: 0.7974098368855814, iteration: 459467
loss: 0.923965334892273,grad_norm: 0.7234760490897189, iteration: 459468
loss: 0.9566347002983093,grad_norm: 0.7020713196565652, iteration: 459469
loss: 0.9866682291030884,grad_norm: 0.7222624041744286, iteration: 459470
loss: 0.9644755721092224,grad_norm: 0.765723421515081, iteration: 459471
loss: 1.0270923376083374,grad_norm: 0.6774349026684157, iteration: 459472
loss: 1.002839207649231,grad_norm: 0.9520969566810515, iteration: 459473
loss: 1.0233628749847412,grad_norm: 0.724272926403195, iteration: 459474
loss: 1.0471744537353516,grad_norm: 0.6858079792028929, iteration: 459475
loss: 0.9676594138145447,grad_norm: 0.7058568095226602, iteration: 459476
loss: 0.970575213432312,grad_norm: 0.8254366020893585, iteration: 459477
loss: 0.992784321308136,grad_norm: 0.8524312226786678, iteration: 459478
loss: 1.0208791494369507,grad_norm: 0.7409939620689937, iteration: 459479
loss: 0.9881871342658997,grad_norm: 0.6366203953484105, iteration: 459480
loss: 1.0198173522949219,grad_norm: 0.729856463903269, iteration: 459481
loss: 0.964023768901825,grad_norm: 0.7300107725906431, iteration: 459482
loss: 1.0054823160171509,grad_norm: 0.6706257642179358, iteration: 459483
loss: 1.0337120294570923,grad_norm: 0.8683480641771559, iteration: 459484
loss: 1.0233758687973022,grad_norm: 0.8432541726054703, iteration: 459485
loss: 1.0199308395385742,grad_norm: 0.9999990057072728, iteration: 459486
loss: 0.9892382025718689,grad_norm: 0.7592177624807872, iteration: 459487
loss: 0.9967911243438721,grad_norm: 0.8566386776654052, iteration: 459488
loss: 1.1920135021209717,grad_norm: 0.9999999597275123, iteration: 459489
loss: 0.98656165599823,grad_norm: 0.6819081171344475, iteration: 459490
loss: 0.9813024401664734,grad_norm: 0.7582247598230228, iteration: 459491
loss: 0.9836596250534058,grad_norm: 0.7961894891254779, iteration: 459492
loss: 1.0051145553588867,grad_norm: 0.8538883717490893, iteration: 459493
loss: 0.9961426258087158,grad_norm: 0.729843661481031, iteration: 459494
loss: 1.0201460123062134,grad_norm: 0.6690943115761002, iteration: 459495
loss: 0.9541818499565125,grad_norm: 0.9999990029959628, iteration: 459496
loss: 0.9839927554130554,grad_norm: 0.6827370159791323, iteration: 459497
loss: 1.0244709253311157,grad_norm: 0.7367265043976196, iteration: 459498
loss: 0.9719625115394592,grad_norm: 0.8398427395030394, iteration: 459499
loss: 1.0003708600997925,grad_norm: 0.6413004895137312, iteration: 459500
loss: 0.9446651339530945,grad_norm: 0.8322366944754688, iteration: 459501
loss: 1.027475357055664,grad_norm: 0.9035301492886875, iteration: 459502
loss: 0.9404112696647644,grad_norm: 0.6944323929061529, iteration: 459503
loss: 0.9629818201065063,grad_norm: 0.8933918946818841, iteration: 459504
loss: 0.9971439242362976,grad_norm: 0.6443997371002904, iteration: 459505
loss: 0.9826322197914124,grad_norm: 0.9533197459401428, iteration: 459506
loss: 0.9967344403266907,grad_norm: 0.8063102736536616, iteration: 459507
loss: 0.9854034185409546,grad_norm: 0.8180992530727159, iteration: 459508
loss: 0.9893943071365356,grad_norm: 0.687678522557354, iteration: 459509
loss: 0.9934910535812378,grad_norm: 0.6694989209557146, iteration: 459510
loss: 0.9872491955757141,grad_norm: 0.8102633822769276, iteration: 459511
loss: 1.0195008516311646,grad_norm: 0.7692094613805218, iteration: 459512
loss: 1.0195960998535156,grad_norm: 0.9999992473459183, iteration: 459513
loss: 0.9741778373718262,grad_norm: 0.644833369951585, iteration: 459514
loss: 1.0096226930618286,grad_norm: 0.8530674805856063, iteration: 459515
loss: 1.0013375282287598,grad_norm: 0.6819375131206336, iteration: 459516
loss: 1.0657604932785034,grad_norm: 0.999999883793715, iteration: 459517
loss: 0.9815348386764526,grad_norm: 0.774351660347316, iteration: 459518
loss: 0.9747087359428406,grad_norm: 0.7903161012512152, iteration: 459519
loss: 0.9377045035362244,grad_norm: 0.9261313965424118, iteration: 459520
loss: 0.9903653860092163,grad_norm: 0.7422362186918143, iteration: 459521
loss: 0.9719541072845459,grad_norm: 0.7748011192032432, iteration: 459522
loss: 1.0003491640090942,grad_norm: 0.804501483668889, iteration: 459523
loss: 0.9629560708999634,grad_norm: 0.7834573778410834, iteration: 459524
loss: 1.0136840343475342,grad_norm: 0.7466915344923709, iteration: 459525
loss: 1.076512098312378,grad_norm: 0.7728651481729426, iteration: 459526
loss: 1.0132744312286377,grad_norm: 0.7637487538363597, iteration: 459527
loss: 1.0129361152648926,grad_norm: 0.9147571702734456, iteration: 459528
loss: 0.9922700524330139,grad_norm: 0.8487605482111515, iteration: 459529
loss: 0.9651476144790649,grad_norm: 0.7802716369771627, iteration: 459530
loss: 0.9792078137397766,grad_norm: 0.7468070313556969, iteration: 459531
loss: 1.0176093578338623,grad_norm: 0.845569140637846, iteration: 459532
loss: 1.004796028137207,grad_norm: 0.9999993531476499, iteration: 459533
loss: 1.0189623832702637,grad_norm: 0.825043544835556, iteration: 459534
loss: 0.9720662236213684,grad_norm: 0.7330117178278057, iteration: 459535
loss: 1.0494945049285889,grad_norm: 0.9999998830252137, iteration: 459536
loss: 1.0104411840438843,grad_norm: 0.7224657319834153, iteration: 459537
loss: 0.9992377758026123,grad_norm: 0.7670711866073606, iteration: 459538
loss: 0.9929381012916565,grad_norm: 0.7593404097888673, iteration: 459539
loss: 0.9901409149169922,grad_norm: 0.7246597615054272, iteration: 459540
loss: 0.9707410335540771,grad_norm: 0.8982216705349841, iteration: 459541
loss: 0.960114061832428,grad_norm: 0.9893740734352464, iteration: 459542
loss: 0.9826580286026001,grad_norm: 0.9380945634399398, iteration: 459543
loss: 0.9344530701637268,grad_norm: 0.8415616330586196, iteration: 459544
loss: 1.0563275814056396,grad_norm: 0.9999998823223125, iteration: 459545
loss: 1.0071525573730469,grad_norm: 0.8068421794989799, iteration: 459546
loss: 0.962617039680481,grad_norm: 0.687784874055026, iteration: 459547
loss: 1.0096287727355957,grad_norm: 0.8384468268233518, iteration: 459548
loss: 0.9829376339912415,grad_norm: 0.8009246462824582, iteration: 459549
loss: 0.9958731532096863,grad_norm: 0.7854125212971038, iteration: 459550
loss: 1.0137104988098145,grad_norm: 0.737637074655669, iteration: 459551
loss: 1.0010768175125122,grad_norm: 0.9633747236082971, iteration: 459552
loss: 0.9932863712310791,grad_norm: 0.7424183631647401, iteration: 459553
loss: 0.9657742977142334,grad_norm: 0.8085937968573746, iteration: 459554
loss: 1.0153238773345947,grad_norm: 0.7622393619423232, iteration: 459555
loss: 1.0277667045593262,grad_norm: 0.7306798245426634, iteration: 459556
loss: 0.972201943397522,grad_norm: 0.6950865954846227, iteration: 459557
loss: 1.0137733221054077,grad_norm: 0.8097555443221227, iteration: 459558
loss: 0.978488564491272,grad_norm: 0.7661838744407007, iteration: 459559
loss: 0.9910197257995605,grad_norm: 0.616616717082115, iteration: 459560
loss: 0.9936453700065613,grad_norm: 0.9068700554078044, iteration: 459561
loss: 1.007689118385315,grad_norm: 0.7516943658596776, iteration: 459562
loss: 1.0029199123382568,grad_norm: 0.8089391365690958, iteration: 459563
loss: 0.9962578415870667,grad_norm: 0.7669380483428397, iteration: 459564
loss: 1.0626417398452759,grad_norm: 0.9999992414312172, iteration: 459565
loss: 0.9917561411857605,grad_norm: 0.8429424514610901, iteration: 459566
loss: 1.0100573301315308,grad_norm: 0.7069896257926249, iteration: 459567
loss: 0.9792569279670715,grad_norm: 0.7579651320277984, iteration: 459568
loss: 1.0051041841506958,grad_norm: 0.8226361063995347, iteration: 459569
loss: 0.9817197918891907,grad_norm: 0.8162031002343603, iteration: 459570
loss: 1.0310407876968384,grad_norm: 0.9221560512556815, iteration: 459571
loss: 0.9766829013824463,grad_norm: 0.822241257697712, iteration: 459572
loss: 1.0199130773544312,grad_norm: 0.7486837229639095, iteration: 459573
loss: 0.9882602095603943,grad_norm: 0.7565948095614028, iteration: 459574
loss: 1.0708489418029785,grad_norm: 0.7956160506268153, iteration: 459575
loss: 0.9918461441993713,grad_norm: 0.5840266965092972, iteration: 459576
loss: 1.043619990348816,grad_norm: 0.768295398734141, iteration: 459577
loss: 0.9789377450942993,grad_norm: 0.7913628702044058, iteration: 459578
loss: 1.079787254333496,grad_norm: 0.9914964546176578, iteration: 459579
loss: 1.0658295154571533,grad_norm: 0.9999996651804341, iteration: 459580
loss: 0.959932804107666,grad_norm: 0.9999991311359754, iteration: 459581
loss: 0.9918944835662842,grad_norm: 0.7506783572466784, iteration: 459582
loss: 1.0024818181991577,grad_norm: 0.7717023357933442, iteration: 459583
loss: 1.0045830011367798,grad_norm: 0.7318336540920798, iteration: 459584
loss: 1.0087242126464844,grad_norm: 0.7594122971326112, iteration: 459585
loss: 1.0318684577941895,grad_norm: 0.9999998540812676, iteration: 459586
loss: 1.0361313819885254,grad_norm: 0.9999992443489998, iteration: 459587
loss: 1.0034466981887817,grad_norm: 0.7533834069910945, iteration: 459588
loss: 0.9859976172447205,grad_norm: 0.7579448362840161, iteration: 459589
loss: 0.9886367321014404,grad_norm: 0.8199452283154318, iteration: 459590
loss: 0.9855660200119019,grad_norm: 0.6624112517693893, iteration: 459591
loss: 1.0050684213638306,grad_norm: 0.8651892580585712, iteration: 459592
loss: 1.0151615142822266,grad_norm: 0.7857115540458537, iteration: 459593
loss: 0.9495204091072083,grad_norm: 0.8108060765595939, iteration: 459594
loss: 0.9941638708114624,grad_norm: 0.7745688079076618, iteration: 459595
loss: 1.0221023559570312,grad_norm: 0.7304808135053008, iteration: 459596
loss: 0.9836430549621582,grad_norm: 0.7189948524336045, iteration: 459597
loss: 1.0354382991790771,grad_norm: 0.8593604236670509, iteration: 459598
loss: 0.9807276725769043,grad_norm: 0.7662688075968688, iteration: 459599
loss: 1.0177149772644043,grad_norm: 0.7341832546664819, iteration: 459600
loss: 0.9938141703605652,grad_norm: 0.7567469284652801, iteration: 459601
loss: 0.9904744625091553,grad_norm: 0.8819983044426902, iteration: 459602
loss: 1.0863440036773682,grad_norm: 0.9999995101710949, iteration: 459603
loss: 0.974746584892273,grad_norm: 0.8558229624323206, iteration: 459604
loss: 0.9824496507644653,grad_norm: 0.8827780854724356, iteration: 459605
loss: 1.0080076456069946,grad_norm: 0.6797280203021485, iteration: 459606
loss: 0.9579773545265198,grad_norm: 0.9999998999066846, iteration: 459607
loss: 0.9818251729011536,grad_norm: 0.950404630252478, iteration: 459608
loss: 0.9754664301872253,grad_norm: 0.6883029076522387, iteration: 459609
loss: 0.9624994993209839,grad_norm: 0.7477366143018109, iteration: 459610
loss: 1.0205754041671753,grad_norm: 0.7960053397867368, iteration: 459611
loss: 1.0003106594085693,grad_norm: 0.6252372556506558, iteration: 459612
loss: 0.9748597741127014,grad_norm: 0.8851064999106923, iteration: 459613
loss: 1.1380704641342163,grad_norm: 0.9999995701785106, iteration: 459614
loss: 1.018032431602478,grad_norm: 0.9439044529698538, iteration: 459615
loss: 1.0042603015899658,grad_norm: 0.7367774905520705, iteration: 459616
loss: 1.0381799936294556,grad_norm: 0.6837298322643108, iteration: 459617
loss: 1.0051088333129883,grad_norm: 0.8636468333996062, iteration: 459618
loss: 1.0130687952041626,grad_norm: 0.7325979652493558, iteration: 459619
loss: 0.9445287585258484,grad_norm: 0.9557745010079014, iteration: 459620
loss: 0.9873332381248474,grad_norm: 0.9999999236969187, iteration: 459621
loss: 1.0622272491455078,grad_norm: 0.9999998353889307, iteration: 459622
loss: 0.9815689921379089,grad_norm: 0.7492384939503166, iteration: 459623
loss: 0.9911817312240601,grad_norm: 0.8795760182904112, iteration: 459624
loss: 1.019136905670166,grad_norm: 0.8159150828072639, iteration: 459625
loss: 0.9557231664657593,grad_norm: 0.7143002042326346, iteration: 459626
loss: 1.0213189125061035,grad_norm: 0.7441706243631051, iteration: 459627
loss: 1.0981779098510742,grad_norm: 0.9999999160678712, iteration: 459628
loss: 0.9817923903465271,grad_norm: 0.8199382665741384, iteration: 459629
loss: 1.0000702142715454,grad_norm: 0.7722453936022462, iteration: 459630
loss: 1.002616047859192,grad_norm: 0.7054109331284737, iteration: 459631
loss: 1.0814827680587769,grad_norm: 0.8559577822947206, iteration: 459632
loss: 1.0042856931686401,grad_norm: 0.6907039601161536, iteration: 459633
loss: 1.0188840627670288,grad_norm: 0.7269400037335139, iteration: 459634
loss: 0.9686650037765503,grad_norm: 0.680764928322968, iteration: 459635
loss: 0.9863592386245728,grad_norm: 0.8049371932654932, iteration: 459636
loss: 0.9774349331855774,grad_norm: 0.7166573234052863, iteration: 459637
loss: 1.0259253978729248,grad_norm: 0.7789316885349411, iteration: 459638
loss: 0.958599328994751,grad_norm: 0.7320666501897389, iteration: 459639
loss: 0.9718179106712341,grad_norm: 0.6804897363282998, iteration: 459640
loss: 1.022539496421814,grad_norm: 0.8849617014926079, iteration: 459641
loss: 1.026562213897705,grad_norm: 0.782830584630293, iteration: 459642
loss: 1.0056755542755127,grad_norm: 0.810954558702491, iteration: 459643
loss: 1.0231122970581055,grad_norm: 0.8757845535817891, iteration: 459644
loss: 1.0185599327087402,grad_norm: 0.7575656736329697, iteration: 459645
loss: 1.0110875368118286,grad_norm: 0.8314089215308715, iteration: 459646
loss: 0.9638335704803467,grad_norm: 0.8417990847067224, iteration: 459647
loss: 0.9855612516403198,grad_norm: 0.8546871179930101, iteration: 459648
loss: 1.010790228843689,grad_norm: 0.9132866319928454, iteration: 459649
loss: 0.9999316334724426,grad_norm: 0.9609299717573156, iteration: 459650
loss: 0.991550624370575,grad_norm: 0.7675459707781981, iteration: 459651
loss: 1.1063469648361206,grad_norm: 0.9999999125844433, iteration: 459652
loss: 0.992942750453949,grad_norm: 0.9741533359493062, iteration: 459653
loss: 1.001861810684204,grad_norm: 0.7378035192647914, iteration: 459654
loss: 1.0096690654754639,grad_norm: 0.8043523597295406, iteration: 459655
loss: 1.0052739381790161,grad_norm: 0.7854818451020532, iteration: 459656
loss: 1.0706508159637451,grad_norm: 0.7079917770953754, iteration: 459657
loss: 1.0749225616455078,grad_norm: 0.99999907130463, iteration: 459658
loss: 1.0138596296310425,grad_norm: 0.6642524125762452, iteration: 459659
loss: 1.007672905921936,grad_norm: 0.7758499346954647, iteration: 459660
loss: 1.025899052619934,grad_norm: 0.9999994865582896, iteration: 459661
loss: 0.9964039921760559,grad_norm: 0.8479278227226696, iteration: 459662
loss: 1.0798698663711548,grad_norm: 0.9999991661786072, iteration: 459663
loss: 1.025631070137024,grad_norm: 0.9999995024393981, iteration: 459664
loss: 1.004762053489685,grad_norm: 0.9468892055808789, iteration: 459665
loss: 1.1119844913482666,grad_norm: 0.9999996079688445, iteration: 459666
loss: 1.0058032274246216,grad_norm: 0.8265654820947148, iteration: 459667
loss: 1.04111647605896,grad_norm: 0.9176480480054864, iteration: 459668
loss: 0.9896920323371887,grad_norm: 0.8196142458994475, iteration: 459669
loss: 0.9757205247879028,grad_norm: 0.6950462625133227, iteration: 459670
loss: 1.0072540044784546,grad_norm: 0.8769846220737044, iteration: 459671
loss: 0.9947199821472168,grad_norm: 0.7196898561271599, iteration: 459672
loss: 1.0096933841705322,grad_norm: 0.8319001333197262, iteration: 459673
loss: 1.1227586269378662,grad_norm: 1.0000000331651189, iteration: 459674
loss: 0.9921025633811951,grad_norm: 0.668159258698803, iteration: 459675
loss: 0.9868090152740479,grad_norm: 0.8309538104339573, iteration: 459676
loss: 1.052626609802246,grad_norm: 0.9999991843448623, iteration: 459677
loss: 1.239572286605835,grad_norm: 0.9999997942314807, iteration: 459678
loss: 1.2755517959594727,grad_norm: 0.9999998818209177, iteration: 459679
loss: 0.9637022018432617,grad_norm: 0.6322399717511873, iteration: 459680
loss: 1.0941828489303589,grad_norm: 0.7297678205253154, iteration: 459681
loss: 1.0288034677505493,grad_norm: 0.8293224215403077, iteration: 459682
loss: 0.9991026520729065,grad_norm: 0.7708927599365101, iteration: 459683
loss: 1.1999151706695557,grad_norm: 0.9999996309455066, iteration: 459684
loss: 1.1449618339538574,grad_norm: 0.8294815510359623, iteration: 459685
loss: 1.1024452447891235,grad_norm: 0.8718431744557774, iteration: 459686
loss: 1.0194945335388184,grad_norm: 0.9505056842933042, iteration: 459687
loss: 0.9733781814575195,grad_norm: 0.8886896459030539, iteration: 459688
loss: 1.0166438817977905,grad_norm: 0.8614895135519051, iteration: 459689
loss: 0.995762050151825,grad_norm: 0.6810084749864999, iteration: 459690
loss: 1.0104193687438965,grad_norm: 0.892169880972908, iteration: 459691
loss: 0.98308265209198,grad_norm: 0.9999993626956896, iteration: 459692
loss: 1.1044907569885254,grad_norm: 0.9999999844203237, iteration: 459693
loss: 1.0148578882217407,grad_norm: 0.7258865289221463, iteration: 459694
loss: 1.0078431367874146,grad_norm: 0.9999991381921602, iteration: 459695
loss: 0.9784684181213379,grad_norm: 0.6410863604024505, iteration: 459696
loss: 0.9528433680534363,grad_norm: 0.9999996274250326, iteration: 459697
loss: 1.0420929193496704,grad_norm: 0.8820415588485685, iteration: 459698
loss: 1.1421226263046265,grad_norm: 0.9980717448432197, iteration: 459699
loss: 1.0313736200332642,grad_norm: 0.6775467490979403, iteration: 459700
loss: 1.068009614944458,grad_norm: 0.9999994077740806, iteration: 459701
loss: 1.0419416427612305,grad_norm: 0.673255599200257, iteration: 459702
loss: 1.057255744934082,grad_norm: 0.9999992717510613, iteration: 459703
loss: 0.9579752087593079,grad_norm: 0.7980360527484228, iteration: 459704
loss: 1.0333205461502075,grad_norm: 0.7261548242409125, iteration: 459705
loss: 0.9960376024246216,grad_norm: 0.7088922726826258, iteration: 459706
loss: 1.0612375736236572,grad_norm: 0.8243711777243126, iteration: 459707
loss: 1.145268440246582,grad_norm: 1.000000036819697, iteration: 459708
loss: 1.022875189781189,grad_norm: 0.9999992679216432, iteration: 459709
loss: 0.9764098525047302,grad_norm: 0.7056498086550625, iteration: 459710
loss: 0.9849478602409363,grad_norm: 0.7743062843446673, iteration: 459711
loss: 1.144490122795105,grad_norm: 0.8916393570672689, iteration: 459712
loss: 1.1225407123565674,grad_norm: 0.9999997397552018, iteration: 459713
loss: 0.9941076040267944,grad_norm: 0.6916407545808517, iteration: 459714
loss: 1.0220094919204712,grad_norm: 1.0000000094782433, iteration: 459715
loss: 1.033518671989441,grad_norm: 0.7911410876821494, iteration: 459716
loss: 0.9974806308746338,grad_norm: 0.7583240046053771, iteration: 459717
loss: 1.0451115369796753,grad_norm: 0.8935982527526063, iteration: 459718
loss: 1.0139479637145996,grad_norm: 0.7419080330949414, iteration: 459719
loss: 1.00740385055542,grad_norm: 0.9999996064313367, iteration: 459720
loss: 1.0443171262741089,grad_norm: 0.8231452674795183, iteration: 459721
loss: 1.031775951385498,grad_norm: 0.9183253623964003, iteration: 459722
loss: 1.0007902383804321,grad_norm: 0.6751720028245043, iteration: 459723
loss: 1.0299018621444702,grad_norm: 0.729499536278353, iteration: 459724
loss: 0.9789189696311951,grad_norm: 0.8473447618930894, iteration: 459725
loss: 0.9748691320419312,grad_norm: 0.7203280018360148, iteration: 459726
loss: 1.0177291631698608,grad_norm: 0.6177041454437836, iteration: 459727
loss: 0.9830663800239563,grad_norm: 0.879376343765676, iteration: 459728
loss: 1.0121325254440308,grad_norm: 0.7498957058785194, iteration: 459729
loss: 1.0742801427841187,grad_norm: 0.7206835044572053, iteration: 459730
loss: 1.0897345542907715,grad_norm: 0.999999402913068, iteration: 459731
loss: 1.0167834758758545,grad_norm: 0.7862034270847039, iteration: 459732
loss: 1.0109955072402954,grad_norm: 0.9999990670358964, iteration: 459733
loss: 1.0128252506256104,grad_norm: 0.6613813023333607, iteration: 459734
loss: 0.9874160289764404,grad_norm: 0.6264572391991933, iteration: 459735
loss: 1.0579947233200073,grad_norm: 0.9999996701182529, iteration: 459736
loss: 0.9894617795944214,grad_norm: 0.6794971113008451, iteration: 459737
loss: 1.0417006015777588,grad_norm: 0.7039893171599461, iteration: 459738
loss: 1.0206303596496582,grad_norm: 0.8487302966152863, iteration: 459739
loss: 1.03533136844635,grad_norm: 0.7818310859078652, iteration: 459740
loss: 1.0715034008026123,grad_norm: 0.9068010196137305, iteration: 459741
loss: 0.9963396787643433,grad_norm: 0.7262762135368718, iteration: 459742
loss: 1.0811110734939575,grad_norm: 0.9999994677004926, iteration: 459743
loss: 0.9741697311401367,grad_norm: 0.9999994537928861, iteration: 459744
loss: 0.9923948645591736,grad_norm: 0.7622234373686915, iteration: 459745
loss: 0.9953172206878662,grad_norm: 0.833755424321058, iteration: 459746
loss: 1.0215314626693726,grad_norm: 0.8334576852848014, iteration: 459747
loss: 1.0053741931915283,grad_norm: 0.6329218496477554, iteration: 459748
loss: 1.0295755863189697,grad_norm: 0.8259372291526139, iteration: 459749
loss: 1.0007742643356323,grad_norm: 0.7318539592801545, iteration: 459750
loss: 0.9508032202720642,grad_norm: 0.6757906282500538, iteration: 459751
loss: 1.023271918296814,grad_norm: 0.6723706264087871, iteration: 459752
loss: 1.0262961387634277,grad_norm: 0.7293640527766365, iteration: 459753
loss: 0.9605442881584167,grad_norm: 0.9122668030364742, iteration: 459754
loss: 0.9748488664627075,grad_norm: 0.7407015828428745, iteration: 459755
loss: 1.007804274559021,grad_norm: 0.8722942649333032, iteration: 459756
loss: 0.9915922284126282,grad_norm: 0.7258474445385428, iteration: 459757
loss: 0.9750568866729736,grad_norm: 0.7012459592310832, iteration: 459758
loss: 0.9973046779632568,grad_norm: 0.651851343340994, iteration: 459759
loss: 0.9635654091835022,grad_norm: 0.7992072909838769, iteration: 459760
loss: 0.9971638917922974,grad_norm: 0.9999997991093553, iteration: 459761
loss: 1.0061126947402954,grad_norm: 0.9999991622456422, iteration: 459762
loss: 0.9901766180992126,grad_norm: 0.9477880951784022, iteration: 459763
loss: 1.0235158205032349,grad_norm: 0.9395853766601663, iteration: 459764
loss: 0.9841554760932922,grad_norm: 0.7584751847558575, iteration: 459765
loss: 0.9764031767845154,grad_norm: 0.7647596441954223, iteration: 459766
loss: 1.0305110216140747,grad_norm: 0.7602098657878432, iteration: 459767
loss: 0.9997508525848389,grad_norm: 0.9999997174264221, iteration: 459768
loss: 1.009145736694336,grad_norm: 0.7725020067235608, iteration: 459769
loss: 1.0092830657958984,grad_norm: 0.7968489142655539, iteration: 459770
loss: 1.0083749294281006,grad_norm: 0.9226440582815861, iteration: 459771
loss: 0.9886384010314941,grad_norm: 0.7496960745309323, iteration: 459772
loss: 0.9802880883216858,grad_norm: 0.8135119973952749, iteration: 459773
loss: 1.0191285610198975,grad_norm: 0.7974946556095118, iteration: 459774
loss: 0.9732112288475037,grad_norm: 0.7730187757061913, iteration: 459775
loss: 0.9844886064529419,grad_norm: 0.7081867451917264, iteration: 459776
loss: 1.0484455823898315,grad_norm: 0.7818553255707963, iteration: 459777
loss: 0.9916097521781921,grad_norm: 0.7789197017610694, iteration: 459778
loss: 0.9957286715507507,grad_norm: 0.8175906431068788, iteration: 459779
loss: 0.997704803943634,grad_norm: 0.7257004339363597, iteration: 459780
loss: 0.9876621961593628,grad_norm: 0.7580322178026535, iteration: 459781
loss: 1.0916658639907837,grad_norm: 0.9401146709469749, iteration: 459782
loss: 0.9827718138694763,grad_norm: 0.801446390763262, iteration: 459783
loss: 0.9930712580680847,grad_norm: 0.999999165104623, iteration: 459784
loss: 1.0439833402633667,grad_norm: 0.7062422998390119, iteration: 459785
loss: 1.0537817478179932,grad_norm: 0.9999992690882973, iteration: 459786
loss: 1.0753329992294312,grad_norm: 0.8618910109300985, iteration: 459787
loss: 0.9898532032966614,grad_norm: 0.7551572820298597, iteration: 459788
loss: 1.1275713443756104,grad_norm: 0.9999996378425283, iteration: 459789
loss: 1.029752254486084,grad_norm: 0.7214784898058477, iteration: 459790
loss: 0.9648146033287048,grad_norm: 0.884768154236488, iteration: 459791
loss: 1.0457876920700073,grad_norm: 0.9999998888204422, iteration: 459792
loss: 0.9754992723464966,grad_norm: 0.8268860755152206, iteration: 459793
loss: 0.9690081477165222,grad_norm: 0.6763487149525, iteration: 459794
loss: 0.9847200512886047,grad_norm: 0.7169238637377047, iteration: 459795
loss: 0.9872671961784363,grad_norm: 0.9319697383243473, iteration: 459796
loss: 1.0410529375076294,grad_norm: 0.9999997682107643, iteration: 459797
loss: 1.073852777481079,grad_norm: 0.9404155968708151, iteration: 459798
loss: 1.0042389631271362,grad_norm: 0.7910044981536866, iteration: 459799
loss: 1.0114113092422485,grad_norm: 0.8941279697297496, iteration: 459800
loss: 1.023094892501831,grad_norm: 0.9649394875146154, iteration: 459801
loss: 0.9876308441162109,grad_norm: 0.682473058806051, iteration: 459802
loss: 1.0189310312271118,grad_norm: 0.8445658369435007, iteration: 459803
loss: 1.0668774843215942,grad_norm: 0.9999995138859255, iteration: 459804
loss: 1.0667619705200195,grad_norm: 0.9999996363861671, iteration: 459805
loss: 1.059481143951416,grad_norm: 0.9999994660581067, iteration: 459806
loss: 1.0013986825942993,grad_norm: 0.7485309547000515, iteration: 459807
loss: 1.0143132209777832,grad_norm: 0.6311818617514933, iteration: 459808
loss: 0.959136426448822,grad_norm: 0.8468595458989528, iteration: 459809
loss: 1.0294771194458008,grad_norm: 0.9999994417280145, iteration: 459810
loss: 0.9962713122367859,grad_norm: 0.7181362351040979, iteration: 459811
loss: 1.0047688484191895,grad_norm: 0.8937091211337308, iteration: 459812
loss: 0.9809914231300354,grad_norm: 0.9999996647013335, iteration: 459813
loss: 1.0060955286026,grad_norm: 0.6586823981769174, iteration: 459814
loss: 1.0038254261016846,grad_norm: 0.9999990973706459, iteration: 459815
loss: 1.0168190002441406,grad_norm: 0.6970939500160366, iteration: 459816
loss: 0.9871429800987244,grad_norm: 0.9999999996828287, iteration: 459817
loss: 1.037209391593933,grad_norm: 0.83945530816093, iteration: 459818
loss: 0.9623767137527466,grad_norm: 0.8969517878514537, iteration: 459819
loss: 1.0192046165466309,grad_norm: 0.766902465903654, iteration: 459820
loss: 0.9545946717262268,grad_norm: 0.7253767054744716, iteration: 459821
loss: 0.9827502369880676,grad_norm: 0.6935788866681477, iteration: 459822
loss: 0.9842020273208618,grad_norm: 0.7150634368749788, iteration: 459823
loss: 1.0434671640396118,grad_norm: 0.9999995373012038, iteration: 459824
loss: 1.0000897645950317,grad_norm: 0.8224530801521884, iteration: 459825
loss: 0.9431862235069275,grad_norm: 0.7584869333283522, iteration: 459826
loss: 1.0259276628494263,grad_norm: 0.999999348458167, iteration: 459827
loss: 0.9809426665306091,grad_norm: 0.8476031106314158, iteration: 459828
loss: 0.9503058195114136,grad_norm: 0.7540338406976856, iteration: 459829
loss: 0.9978629350662231,grad_norm: 0.816275924187486, iteration: 459830
loss: 0.9884585738182068,grad_norm: 0.8260412051457687, iteration: 459831
loss: 1.0128288269042969,grad_norm: 0.7648098941479746, iteration: 459832
loss: 0.9857357144355774,grad_norm: 0.7677482577205069, iteration: 459833
loss: 1.1275485754013062,grad_norm: 0.8568700251047471, iteration: 459834
loss: 1.03891122341156,grad_norm: 1.0000000120754124, iteration: 459835
loss: 1.0317293405532837,grad_norm: 0.6286420408551385, iteration: 459836
loss: 0.9888796210289001,grad_norm: 0.8637388660977221, iteration: 459837
loss: 1.0166376829147339,grad_norm: 0.7962410680389103, iteration: 459838
loss: 1.0741221904754639,grad_norm: 0.9999992636819102, iteration: 459839
loss: 1.0250393152236938,grad_norm: 0.7610959850542567, iteration: 459840
loss: 0.9799168705940247,grad_norm: 0.7218727689349995, iteration: 459841
loss: 1.1050723791122437,grad_norm: 0.999999770793341, iteration: 459842
loss: 1.0151325464248657,grad_norm: 0.8762661866549851, iteration: 459843
loss: 1.022328495979309,grad_norm: 0.6861628682875035, iteration: 459844
loss: 0.9821241497993469,grad_norm: 0.683483104863881, iteration: 459845
loss: 0.9960207939147949,grad_norm: 0.8775752655988505, iteration: 459846
loss: 0.9652228355407715,grad_norm: 0.9999991593968588, iteration: 459847
loss: 0.969567060470581,grad_norm: 0.7503114751159429, iteration: 459848
loss: 1.013614535331726,grad_norm: 0.866700487976037, iteration: 459849
loss: 1.0005160570144653,grad_norm: 0.6752577571969529, iteration: 459850
loss: 0.9843780994415283,grad_norm: 0.73294535569591, iteration: 459851
loss: 0.9739099144935608,grad_norm: 0.8253191786377169, iteration: 459852
loss: 1.0280112028121948,grad_norm: 0.6115658544893385, iteration: 459853
loss: 1.0206642150878906,grad_norm: 0.6662025965200553, iteration: 459854
loss: 0.9959708452224731,grad_norm: 0.6341141292297074, iteration: 459855
loss: 1.0132278203964233,grad_norm: 0.8416443997702341, iteration: 459856
loss: 0.9978522062301636,grad_norm: 0.8120086803945141, iteration: 459857
loss: 0.9998437166213989,grad_norm: 0.7713656464076274, iteration: 459858
loss: 1.0207409858703613,grad_norm: 0.9999991279795855, iteration: 459859
loss: 0.9850015640258789,grad_norm: 0.7642159002530148, iteration: 459860
loss: 1.0190129280090332,grad_norm: 0.8003470380888437, iteration: 459861
loss: 0.9907788634300232,grad_norm: 0.7179948238811749, iteration: 459862
loss: 1.0016018152236938,grad_norm: 0.7710129224446178, iteration: 459863
loss: 0.9820916056632996,grad_norm: 0.8606664788338484, iteration: 459864
loss: 1.0274535417556763,grad_norm: 0.6983692269259227, iteration: 459865
loss: 0.9817601442337036,grad_norm: 0.6978814803745235, iteration: 459866
loss: 0.9860233068466187,grad_norm: 0.7287689690537588, iteration: 459867
loss: 1.0508736371994019,grad_norm: 0.9632775806530206, iteration: 459868
loss: 0.9780881404876709,grad_norm: 0.8232367109874605, iteration: 459869
loss: 0.9943271279335022,grad_norm: 0.7691191289863775, iteration: 459870
loss: 1.0449049472808838,grad_norm: 0.9999991728641706, iteration: 459871
loss: 0.9973334074020386,grad_norm: 0.8000381202964431, iteration: 459872
loss: 0.9973644018173218,grad_norm: 0.9999995093084842, iteration: 459873
loss: 0.9947632551193237,grad_norm: 0.9999990423153788, iteration: 459874
loss: 0.9816600680351257,grad_norm: 0.7993439495609815, iteration: 459875
loss: 1.001782774925232,grad_norm: 0.8057251025318457, iteration: 459876
loss: 0.9941523671150208,grad_norm: 0.7785836083996945, iteration: 459877
loss: 1.0075055360794067,grad_norm: 0.892215730713659, iteration: 459878
loss: 1.0080351829528809,grad_norm: 0.7189279873587928, iteration: 459879
loss: 0.9941542744636536,grad_norm: 0.9999998854464781, iteration: 459880
loss: 1.0042202472686768,grad_norm: 0.999999729778892, iteration: 459881
loss: 0.9699168801307678,grad_norm: 0.9394809692308067, iteration: 459882
loss: 1.043928623199463,grad_norm: 0.7243171604586777, iteration: 459883
loss: 0.9995386004447937,grad_norm: 0.8559425810356727, iteration: 459884
loss: 0.986070454120636,grad_norm: 0.6350335999783588, iteration: 459885
loss: 1.0467109680175781,grad_norm: 0.9397207563624437, iteration: 459886
loss: 1.071972370147705,grad_norm: 0.858666045653174, iteration: 459887
loss: 1.0191947221755981,grad_norm: 0.8669075807074276, iteration: 459888
loss: 1.0287470817565918,grad_norm: 0.9999999099724777, iteration: 459889
loss: 0.9937329292297363,grad_norm: 0.7913157123032768, iteration: 459890
loss: 1.0309892892837524,grad_norm: 0.9216319305397516, iteration: 459891
loss: 0.9856395721435547,grad_norm: 0.8419976534202012, iteration: 459892
loss: 1.0396887063980103,grad_norm: 0.9999996011325127, iteration: 459893
loss: 0.9781890511512756,grad_norm: 0.7417990118378559, iteration: 459894
loss: 0.9967389106750488,grad_norm: 0.819561244792668, iteration: 459895
loss: 0.9896870255470276,grad_norm: 0.7109928831857443, iteration: 459896
loss: 0.9899373650550842,grad_norm: 0.6215842249210417, iteration: 459897
loss: 1.2675507068634033,grad_norm: 0.9999996485585246, iteration: 459898
loss: 1.0377764701843262,grad_norm: 0.7935595019927792, iteration: 459899
loss: 1.0534412860870361,grad_norm: 0.9999991376353723, iteration: 459900
loss: 1.0560845136642456,grad_norm: 0.7013317077734611, iteration: 459901
loss: 1.0047872066497803,grad_norm: 0.8091887538815997, iteration: 459902
loss: 1.0626572370529175,grad_norm: 0.8012138293393873, iteration: 459903
loss: 1.010703206062317,grad_norm: 0.9519049509816015, iteration: 459904
loss: 1.005585789680481,grad_norm: 0.7999874041727941, iteration: 459905
loss: 1.0287216901779175,grad_norm: 0.7099317625014974, iteration: 459906
loss: 1.0332250595092773,grad_norm: 0.771133642833519, iteration: 459907
loss: 0.9669572710990906,grad_norm: 0.9999995028076276, iteration: 459908
loss: 1.0213760137557983,grad_norm: 0.7738881829255327, iteration: 459909
loss: 0.9998939037322998,grad_norm: 0.7266173282342271, iteration: 459910
loss: 1.0077629089355469,grad_norm: 0.8244809966231326, iteration: 459911
loss: 1.0032497644424438,grad_norm: 0.68954113654608, iteration: 459912
loss: 1.0161828994750977,grad_norm: 0.7844581690057816, iteration: 459913
loss: 1.0501186847686768,grad_norm: 0.9246548732402229, iteration: 459914
loss: 0.9806384444236755,grad_norm: 0.7849734499854211, iteration: 459915
loss: 0.9915376901626587,grad_norm: 0.9999997787787108, iteration: 459916
loss: 0.9467968940734863,grad_norm: 0.7616300680499369, iteration: 459917
loss: 1.0079628229141235,grad_norm: 0.8904430672267862, iteration: 459918
loss: 1.0092179775238037,grad_norm: 0.7330260405377191, iteration: 459919
loss: 1.0166105031967163,grad_norm: 0.7109273473233967, iteration: 459920
loss: 1.006708025932312,grad_norm: 0.8036053930097858, iteration: 459921
loss: 0.9840500354766846,grad_norm: 0.750426831862092, iteration: 459922
loss: 1.0255579948425293,grad_norm: 0.8300570703373826, iteration: 459923
loss: 0.9569279551506042,grad_norm: 0.7140567976884055, iteration: 459924
loss: 1.020437479019165,grad_norm: 0.9999997753837901, iteration: 459925
loss: 1.0093241930007935,grad_norm: 0.8603706323427439, iteration: 459926
loss: 1.099990963935852,grad_norm: 0.999999186525166, iteration: 459927
loss: 1.0350793600082397,grad_norm: 0.8252264927908924, iteration: 459928
loss: 0.9781289100646973,grad_norm: 0.8093711393991012, iteration: 459929
loss: 1.0156813859939575,grad_norm: 0.7529904281568598, iteration: 459930
loss: 0.9974321126937866,grad_norm: 0.8315074064213992, iteration: 459931
loss: 0.9943124055862427,grad_norm: 0.8511976176684062, iteration: 459932
loss: 1.0028904676437378,grad_norm: 0.7730788556408884, iteration: 459933
loss: 0.9625186324119568,grad_norm: 0.6482481992064864, iteration: 459934
loss: 0.9314854741096497,grad_norm: 0.6949649945088959, iteration: 459935
loss: 0.9923524856567383,grad_norm: 0.7809964851125801, iteration: 459936
loss: 1.0132086277008057,grad_norm: 0.6714853422168207, iteration: 459937
loss: 0.9889662861824036,grad_norm: 0.6707356308336813, iteration: 459938
loss: 0.9808406829833984,grad_norm: 0.7767669256834537, iteration: 459939
loss: 1.0425159931182861,grad_norm: 0.7943665865355327, iteration: 459940
loss: 1.0088188648223877,grad_norm: 0.7957131583301541, iteration: 459941
loss: 1.0778733491897583,grad_norm: 0.7558111133009937, iteration: 459942
loss: 1.0075353384017944,grad_norm: 0.7624004880562464, iteration: 459943
loss: 1.06052827835083,grad_norm: 0.8652787264932073, iteration: 459944
loss: 1.0060060024261475,grad_norm: 0.7923504279100971, iteration: 459945
loss: 0.9985607862472534,grad_norm: 0.8156760069895722, iteration: 459946
loss: 0.9474495649337769,grad_norm: 0.6904796105762011, iteration: 459947
loss: 1.0105297565460205,grad_norm: 0.7837354471264958, iteration: 459948
loss: 1.0204014778137207,grad_norm: 0.7838656301705397, iteration: 459949
loss: 1.002410888671875,grad_norm: 0.7914750430465399, iteration: 459950
loss: 0.9936668872833252,grad_norm: 0.7425945710898627, iteration: 459951
loss: 1.0065436363220215,grad_norm: 0.8121069190721365, iteration: 459952
loss: 1.0332505702972412,grad_norm: 0.6960920681521757, iteration: 459953
loss: 0.9875903129577637,grad_norm: 0.8242616369179294, iteration: 459954
loss: 0.9898919463157654,grad_norm: 0.6569383022370958, iteration: 459955
loss: 1.022721529006958,grad_norm: 0.9553368174563835, iteration: 459956
loss: 0.9949398636817932,grad_norm: 0.6703034188346743, iteration: 459957
loss: 0.9765439629554749,grad_norm: 0.7078742285798895, iteration: 459958
loss: 1.0143718719482422,grad_norm: 0.8720948712045061, iteration: 459959
loss: 0.9804691076278687,grad_norm: 0.7435967439220954, iteration: 459960
loss: 1.017213225364685,grad_norm: 0.7952280629594894, iteration: 459961
loss: 1.0274897813796997,grad_norm: 0.803897813749248, iteration: 459962
loss: 0.9573600888252258,grad_norm: 0.6544899204540919, iteration: 459963
loss: 1.0107589960098267,grad_norm: 0.8737455513428578, iteration: 459964
loss: 1.0025328397750854,grad_norm: 0.6974647220631902, iteration: 459965
loss: 1.0360145568847656,grad_norm: 0.9104351047424164, iteration: 459966
loss: 0.984523594379425,grad_norm: 0.8823697522037965, iteration: 459967
loss: 1.0352436304092407,grad_norm: 0.8233074826539312, iteration: 459968
loss: 1.0137825012207031,grad_norm: 0.7181738961586339, iteration: 459969
loss: 0.9831536412239075,grad_norm: 0.6685945943558984, iteration: 459970
loss: 1.0070685148239136,grad_norm: 0.7351320466893312, iteration: 459971
loss: 0.9913662672042847,grad_norm: 0.9782051756606874, iteration: 459972
loss: 0.9928997159004211,grad_norm: 0.7163754267103142, iteration: 459973
loss: 1.0657520294189453,grad_norm: 0.8715566876740356, iteration: 459974
loss: 0.9990125298500061,grad_norm: 0.8718635666907332, iteration: 459975
loss: 0.9835666418075562,grad_norm: 0.7721000741193887, iteration: 459976
loss: 1.0046511888504028,grad_norm: 0.9999994444564261, iteration: 459977
loss: 1.0158361196517944,grad_norm: 0.684668673308375, iteration: 459978
loss: 1.007660150527954,grad_norm: 0.8388500858295023, iteration: 459979
loss: 0.9777495265007019,grad_norm: 0.8150011082711822, iteration: 459980
loss: 1.0137643814086914,grad_norm: 0.7632669379014212, iteration: 459981
loss: 1.0100408792495728,grad_norm: 0.8600663176340845, iteration: 459982
loss: 0.9915404319763184,grad_norm: 0.9999998046478057, iteration: 459983
loss: 0.9852989315986633,grad_norm: 0.7263007365408803, iteration: 459984
loss: 1.0045630931854248,grad_norm: 0.6401585157252211, iteration: 459985
loss: 1.0679527521133423,grad_norm: 0.7741391979261669, iteration: 459986
loss: 0.9822812080383301,grad_norm: 0.7662554691163117, iteration: 459987
loss: 1.0069694519042969,grad_norm: 0.7040866241836131, iteration: 459988
loss: 1.0080915689468384,grad_norm: 0.6411967206206549, iteration: 459989
loss: 1.017938494682312,grad_norm: 0.6354036728863981, iteration: 459990
loss: 1.0612822771072388,grad_norm: 0.9999998524858612, iteration: 459991
loss: 1.0131248235702515,grad_norm: 0.9194976655427911, iteration: 459992
loss: 0.9994860887527466,grad_norm: 0.9255614746173997, iteration: 459993
loss: 1.0083398818969727,grad_norm: 0.7855664890947619, iteration: 459994
loss: 1.0509980916976929,grad_norm: 0.9999997185703842, iteration: 459995
loss: 1.0083646774291992,grad_norm: 0.89575823689689, iteration: 459996
loss: 1.0383076667785645,grad_norm: 0.7552939035478755, iteration: 459997
loss: 1.0144060850143433,grad_norm: 0.9999999095028206, iteration: 459998
loss: 1.0110381841659546,grad_norm: 0.8349864059768813, iteration: 459999
loss: 0.964296281337738,grad_norm: 0.6622790394401858, iteration: 460000
Evaluating at step 460000
{'val': 0.9965213742107153, 'test': 2.102532504084773}
loss: 0.9824846386909485,grad_norm: 0.803414627274894, iteration: 460001
loss: 0.9703417420387268,grad_norm: 0.9469300291681839, iteration: 460002
loss: 1.0965989828109741,grad_norm: 0.9999998107952273, iteration: 460003
loss: 0.9784096479415894,grad_norm: 0.7971767405925141, iteration: 460004
loss: 1.146649718284607,grad_norm: 0.9528802156158231, iteration: 460005
loss: 0.9995149374008179,grad_norm: 0.7189481884144612, iteration: 460006
loss: 0.9950072169303894,grad_norm: 0.7591156577160423, iteration: 460007
loss: 0.976791262626648,grad_norm: 0.8342961494513834, iteration: 460008
loss: 1.0030066967010498,grad_norm: 0.6307085082868323, iteration: 460009
loss: 1.0217996835708618,grad_norm: 0.9999995514563945, iteration: 460010
loss: 1.03770911693573,grad_norm: 0.7961711130919609, iteration: 460011
loss: 0.984649658203125,grad_norm: 0.7927383033279306, iteration: 460012
loss: 0.9808648824691772,grad_norm: 0.7682695969403036, iteration: 460013
loss: 0.9922391772270203,grad_norm: 0.8765492601912128, iteration: 460014
loss: 1.0049195289611816,grad_norm: 0.7314215668214713, iteration: 460015
loss: 1.0813791751861572,grad_norm: 0.9999999981489481, iteration: 460016
loss: 0.9716386795043945,grad_norm: 0.6480375704414439, iteration: 460017
loss: 1.0099313259124756,grad_norm: 0.78980735872506, iteration: 460018
loss: 1.0306514501571655,grad_norm: 0.9999999006969534, iteration: 460019
loss: 0.9958785772323608,grad_norm: 0.6842443614964612, iteration: 460020
loss: 1.0950698852539062,grad_norm: 0.9999991613457707, iteration: 460021
loss: 1.0135774612426758,grad_norm: 0.7795172379698084, iteration: 460022
loss: 1.081719994544983,grad_norm: 0.6675483905765371, iteration: 460023
loss: 0.9781132936477661,grad_norm: 0.8026794218484357, iteration: 460024
loss: 1.1071133613586426,grad_norm: 0.8718671668418202, iteration: 460025
loss: 1.003507137298584,grad_norm: 0.8234234981902461, iteration: 460026
loss: 1.0356439352035522,grad_norm: 0.8705350843127625, iteration: 460027
loss: 0.9942935705184937,grad_norm: 0.8698759032594949, iteration: 460028
loss: 0.9860435724258423,grad_norm: 0.7047689437551009, iteration: 460029
loss: 1.0175855159759521,grad_norm: 0.8112583662362783, iteration: 460030
loss: 1.0902653932571411,grad_norm: 0.9999992624326032, iteration: 460031
loss: 0.9805616140365601,grad_norm: 0.7220539674134109, iteration: 460032
loss: 1.052619218826294,grad_norm: 0.9999997079997878, iteration: 460033
loss: 1.000816822052002,grad_norm: 0.6499948856586478, iteration: 460034
loss: 1.037925124168396,grad_norm: 0.6995943030262471, iteration: 460035
loss: 0.9720516800880432,grad_norm: 0.6938838194900848, iteration: 460036
loss: 0.9752574563026428,grad_norm: 0.8116188516736744, iteration: 460037
loss: 1.0365277528762817,grad_norm: 0.7450963086519027, iteration: 460038
loss: 1.004361867904663,grad_norm: 0.796649733823888, iteration: 460039
loss: 1.0621787309646606,grad_norm: 0.9523881225592018, iteration: 460040
loss: 1.02615225315094,grad_norm: 0.8049360661812696, iteration: 460041
loss: 1.0434505939483643,grad_norm: 0.9999992758664353, iteration: 460042
loss: 1.0721704959869385,grad_norm: 0.9999992357264591, iteration: 460043
loss: 0.990234375,grad_norm: 0.9506949616474979, iteration: 460044
loss: 1.0000725984573364,grad_norm: 0.7898689186786537, iteration: 460045
loss: 0.9873285889625549,grad_norm: 0.6635818710941153, iteration: 460046
loss: 1.021956443786621,grad_norm: 0.776667946282588, iteration: 460047
loss: 0.9898549318313599,grad_norm: 0.8442623881864991, iteration: 460048
loss: 0.9869384169578552,grad_norm: 0.6899074730444119, iteration: 460049
loss: 1.065401315689087,grad_norm: 0.8951477630368007, iteration: 460050
loss: 0.9854849576950073,grad_norm: 0.8269086039174389, iteration: 460051
loss: 1.020249366760254,grad_norm: 0.898100295152082, iteration: 460052
loss: 1.061363935470581,grad_norm: 0.8861231436360985, iteration: 460053
loss: 1.0336682796478271,grad_norm: 0.7028248537353197, iteration: 460054
loss: 1.0789589881896973,grad_norm: 0.9999997104628968, iteration: 460055
loss: 1.004041075706482,grad_norm: 0.7629596200203226, iteration: 460056
loss: 1.0008301734924316,grad_norm: 0.7223351991745478, iteration: 460057
loss: 0.9945100545883179,grad_norm: 0.6494101056176095, iteration: 460058
loss: 1.0044318437576294,grad_norm: 0.7404063698803858, iteration: 460059
loss: 1.0014952421188354,grad_norm: 0.8502408068693946, iteration: 460060
loss: 1.0331032276153564,grad_norm: 0.6959699414755475, iteration: 460061
loss: 1.0127993822097778,grad_norm: 0.6206365356251365, iteration: 460062
loss: 0.9689521193504333,grad_norm: 0.5746730380759789, iteration: 460063
loss: 1.0266528129577637,grad_norm: 0.907562596453811, iteration: 460064
loss: 1.0394700765609741,grad_norm: 0.6469403274149462, iteration: 460065
loss: 1.0067157745361328,grad_norm: 0.7045091812982585, iteration: 460066
loss: 1.0271652936935425,grad_norm: 0.5950794859339072, iteration: 460067
loss: 1.0203922986984253,grad_norm: 0.737576124930608, iteration: 460068
loss: 1.0026297569274902,grad_norm: 0.8363570729360968, iteration: 460069
loss: 0.9699229598045349,grad_norm: 0.5969916316327313, iteration: 460070
loss: 0.9924630522727966,grad_norm: 0.8191139951463754, iteration: 460071
loss: 0.970814049243927,grad_norm: 0.7977586072225822, iteration: 460072
loss: 1.012048602104187,grad_norm: 0.7244194005112815, iteration: 460073
loss: 1.0192233324050903,grad_norm: 0.7553935237499603, iteration: 460074
loss: 0.9879299402236938,grad_norm: 0.8496904680539853, iteration: 460075
loss: 1.171345829963684,grad_norm: 0.9999993479592126, iteration: 460076
loss: 0.9917871952056885,grad_norm: 0.7193189803789112, iteration: 460077
loss: 0.9932789206504822,grad_norm: 0.9999990656102281, iteration: 460078
loss: 0.9900273680686951,grad_norm: 0.77847110135721, iteration: 460079
loss: 0.9954571723937988,grad_norm: 0.6469039403059887, iteration: 460080
loss: 1.026665449142456,grad_norm: 0.7133996279349208, iteration: 460081
loss: 0.9969545006752014,grad_norm: 0.8437305334813358, iteration: 460082
loss: 1.021390676498413,grad_norm: 0.7927088670354113, iteration: 460083
loss: 1.0041234493255615,grad_norm: 0.7734720751848029, iteration: 460084
loss: 0.9842919707298279,grad_norm: 0.6861150953856042, iteration: 460085
loss: 1.015832543373108,grad_norm: 0.8962109884698211, iteration: 460086
loss: 0.9778167605400085,grad_norm: 0.7680781750712188, iteration: 460087
loss: 1.0769121646881104,grad_norm: 0.8763504756827168, iteration: 460088
loss: 1.0124750137329102,grad_norm: 0.7547411054071833, iteration: 460089
loss: 0.9962226748466492,grad_norm: 0.8058442873397819, iteration: 460090
loss: 1.0186957120895386,grad_norm: 0.869111472328957, iteration: 460091
loss: 1.0248926877975464,grad_norm: 0.9999997915347931, iteration: 460092
loss: 0.9875547289848328,grad_norm: 0.709169272938733, iteration: 460093
loss: 0.9967257976531982,grad_norm: 0.7650321666647273, iteration: 460094
loss: 1.0061177015304565,grad_norm: 0.7422221070038494, iteration: 460095
loss: 0.9817588925361633,grad_norm: 0.9999998626311518, iteration: 460096
loss: 1.0659397840499878,grad_norm: 0.9999992241219493, iteration: 460097
loss: 0.9986123442649841,grad_norm: 0.7846292915537938, iteration: 460098
loss: 1.038751482963562,grad_norm: 0.7251975812576312, iteration: 460099
loss: 0.9899319410324097,grad_norm: 0.6750846233670822, iteration: 460100
loss: 0.9853047728538513,grad_norm: 0.7334380578816028, iteration: 460101
loss: 1.0275920629501343,grad_norm: 0.999999066242602, iteration: 460102
loss: 0.9998995661735535,grad_norm: 0.7727526784050709, iteration: 460103
loss: 0.9702979326248169,grad_norm: 0.9999990074536453, iteration: 460104
loss: 0.9860864877700806,grad_norm: 0.6245491167820183, iteration: 460105
loss: 0.9637951254844666,grad_norm: 0.9999990830630813, iteration: 460106
loss: 1.0560109615325928,grad_norm: 0.8956008266686201, iteration: 460107
loss: 1.0537824630737305,grad_norm: 0.9999990717235734, iteration: 460108
loss: 1.0022449493408203,grad_norm: 0.8220431365686038, iteration: 460109
loss: 1.0074750185012817,grad_norm: 0.8047868643267957, iteration: 460110
loss: 1.0315452814102173,grad_norm: 0.7265078310005921, iteration: 460111
loss: 0.9472147822380066,grad_norm: 0.8187832804598266, iteration: 460112
loss: 1.0368515253067017,grad_norm: 0.6912720738649125, iteration: 460113
loss: 1.015783429145813,grad_norm: 0.7223788241537479, iteration: 460114
loss: 1.0005370378494263,grad_norm: 0.8143602969437193, iteration: 460115
loss: 0.9872514605522156,grad_norm: 0.7592861622352379, iteration: 460116
loss: 1.024704098701477,grad_norm: 0.9149310697868359, iteration: 460117
loss: 0.964195191860199,grad_norm: 0.6697140656153059, iteration: 460118
loss: 0.9867774844169617,grad_norm: 0.8467211444111732, iteration: 460119
loss: 0.9570122957229614,grad_norm: 0.7686653416746564, iteration: 460120
loss: 1.0040587186813354,grad_norm: 0.9999998415780268, iteration: 460121
loss: 1.0026954412460327,grad_norm: 0.8780089416744009, iteration: 460122
loss: 0.9893637895584106,grad_norm: 0.8444958294930238, iteration: 460123
loss: 1.014979600906372,grad_norm: 0.6657392400295691, iteration: 460124
loss: 1.0146235227584839,grad_norm: 0.6716208856593567, iteration: 460125
loss: 0.9933633208274841,grad_norm: 0.7918071514743902, iteration: 460126
loss: 0.9823121428489685,grad_norm: 0.7993952077681407, iteration: 460127
loss: 1.0090200901031494,grad_norm: 0.6570599574090327, iteration: 460128
loss: 0.986924409866333,grad_norm: 0.851466200363358, iteration: 460129
loss: 0.9440327286720276,grad_norm: 0.6959593408790352, iteration: 460130
loss: 1.0237562656402588,grad_norm: 0.8351907562432225, iteration: 460131
loss: 1.011753797531128,grad_norm: 0.9999996408444366, iteration: 460132
loss: 1.0025241374969482,grad_norm: 0.6962606675058534, iteration: 460133
loss: 1.0259745121002197,grad_norm: 0.9072746441111738, iteration: 460134
loss: 0.9856107831001282,grad_norm: 0.694428767870617, iteration: 460135
loss: 1.010224461555481,grad_norm: 0.7209231490556202, iteration: 460136
loss: 0.9572116732597351,grad_norm: 0.705680227226039, iteration: 460137
loss: 0.9762623310089111,grad_norm: 0.68800728105899, iteration: 460138
loss: 1.0059829950332642,grad_norm: 0.7084094074044697, iteration: 460139
loss: 1.016686201095581,grad_norm: 0.6735896138569523, iteration: 460140
loss: 1.0011793375015259,grad_norm: 0.9577861339405765, iteration: 460141
loss: 0.9974859356880188,grad_norm: 0.7750100243826933, iteration: 460142
loss: 0.9557836651802063,grad_norm: 0.8042416559607475, iteration: 460143
loss: 1.0075470209121704,grad_norm: 0.7834327894239397, iteration: 460144
loss: 1.052347183227539,grad_norm: 0.9999993491609349, iteration: 460145
loss: 0.9915783405303955,grad_norm: 0.7651560998449363, iteration: 460146
loss: 1.070317268371582,grad_norm: 0.9999990732106894, iteration: 460147
loss: 1.05766761302948,grad_norm: 0.699262362119943, iteration: 460148
loss: 0.9833266139030457,grad_norm: 0.9999997303837005, iteration: 460149
loss: 1.0077760219573975,grad_norm: 0.9153463817579434, iteration: 460150
loss: 0.9728066921234131,grad_norm: 0.7050610912065509, iteration: 460151
loss: 1.0271857976913452,grad_norm: 0.7139782082482738, iteration: 460152
loss: 0.9845426678657532,grad_norm: 0.6510957552852433, iteration: 460153
loss: 1.002463459968567,grad_norm: 0.7242488601697904, iteration: 460154
loss: 0.9790764451026917,grad_norm: 0.6398662583638183, iteration: 460155
loss: 0.9955077767372131,grad_norm: 0.6937042285315482, iteration: 460156
loss: 1.0424113273620605,grad_norm: 0.7852489530579475, iteration: 460157
loss: 1.1234455108642578,grad_norm: 0.9230466498862214, iteration: 460158
loss: 1.0041245222091675,grad_norm: 0.740679186119211, iteration: 460159
loss: 0.9549195170402527,grad_norm: 0.7211624704678375, iteration: 460160
loss: 1.0025293827056885,grad_norm: 0.8227076447519107, iteration: 460161
loss: 0.9848574995994568,grad_norm: 0.7393995614302447, iteration: 460162
loss: 1.0439614057540894,grad_norm: 0.7572197278702256, iteration: 460163
loss: 0.975730299949646,grad_norm: 0.7444802605063361, iteration: 460164
loss: 0.9642907381057739,grad_norm: 0.9999992878955539, iteration: 460165
loss: 0.9669917821884155,grad_norm: 0.8534265461790921, iteration: 460166
loss: 1.000083088874817,grad_norm: 0.8157290480943623, iteration: 460167
loss: 0.99549400806427,grad_norm: 0.8590316814899053, iteration: 460168
loss: 1.0214277505874634,grad_norm: 0.9139655057411518, iteration: 460169
loss: 0.9774289727210999,grad_norm: 0.9999989847313226, iteration: 460170
loss: 1.0168637037277222,grad_norm: 0.9086315532315676, iteration: 460171
loss: 1.028480887413025,grad_norm: 0.7601641756471265, iteration: 460172
loss: 1.026462435722351,grad_norm: 0.6717756631695692, iteration: 460173
loss: 0.9952792525291443,grad_norm: 0.7782926904281992, iteration: 460174
loss: 1.0705705881118774,grad_norm: 0.7873909448528896, iteration: 460175
loss: 1.0001888275146484,grad_norm: 0.6776218448061616, iteration: 460176
loss: 0.9811654090881348,grad_norm: 0.7765366809621123, iteration: 460177
loss: 0.9837920069694519,grad_norm: 0.8386684362016448, iteration: 460178
loss: 0.9627633690834045,grad_norm: 0.7914727825700696, iteration: 460179
loss: 1.0492953062057495,grad_norm: 0.8940815561058145, iteration: 460180
loss: 0.9766366481781006,grad_norm: 0.7352170581576561, iteration: 460181
loss: 1.0118262767791748,grad_norm: 0.9999998257283111, iteration: 460182
loss: 1.0353282690048218,grad_norm: 0.9999990201769767, iteration: 460183
loss: 1.009493112564087,grad_norm: 0.7911507318814451, iteration: 460184
loss: 1.0102626085281372,grad_norm: 0.7008941887342593, iteration: 460185
loss: 1.0143656730651855,grad_norm: 0.8131740472285502, iteration: 460186
loss: 0.9982944130897522,grad_norm: 0.8690506275572353, iteration: 460187
loss: 1.0936733484268188,grad_norm: 0.9999999030666921, iteration: 460188
loss: 0.9938825368881226,grad_norm: 0.7790136852378822, iteration: 460189
loss: 0.9994483590126038,grad_norm: 0.7800637526913218, iteration: 460190
loss: 1.0238398313522339,grad_norm: 0.7771727903000785, iteration: 460191
loss: 0.9675153493881226,grad_norm: 0.8402842914111476, iteration: 460192
loss: 1.017037034034729,grad_norm: 0.8859144465978007, iteration: 460193
loss: 0.9966756701469421,grad_norm: 0.8025097532595524, iteration: 460194
loss: 0.9944427609443665,grad_norm: 0.8192838977193944, iteration: 460195
loss: 1.0041413307189941,grad_norm: 0.7213022477208172, iteration: 460196
loss: 0.9873881340026855,grad_norm: 0.8595432609901229, iteration: 460197
loss: 0.9657120108604431,grad_norm: 0.7606616861708899, iteration: 460198
loss: 1.0575028657913208,grad_norm: 0.7879218323255098, iteration: 460199
loss: 1.114627718925476,grad_norm: 0.9589640915443786, iteration: 460200
loss: 1.0417189598083496,grad_norm: 0.9433885593658964, iteration: 460201
loss: 1.0372179746627808,grad_norm: 0.8150071200131191, iteration: 460202
loss: 1.0307072401046753,grad_norm: 0.8176900386720318, iteration: 460203
loss: 1.007837176322937,grad_norm: 0.8207862760538387, iteration: 460204
loss: 0.9991162419319153,grad_norm: 0.7436226384829582, iteration: 460205
loss: 1.0147521495819092,grad_norm: 0.6659834977408176, iteration: 460206
loss: 0.9879581332206726,grad_norm: 0.6664293672497602, iteration: 460207
loss: 1.0314793586730957,grad_norm: 0.8738748076334089, iteration: 460208
loss: 1.0097417831420898,grad_norm: 0.9999992058674022, iteration: 460209
loss: 1.0685572624206543,grad_norm: 0.8767292244478767, iteration: 460210
loss: 1.0155245065689087,grad_norm: 0.8773248430662011, iteration: 460211
loss: 1.0066008567810059,grad_norm: 0.9999992289579078, iteration: 460212
loss: 1.0185712575912476,grad_norm: 0.8315127003381648, iteration: 460213
loss: 1.000619649887085,grad_norm: 0.7921665519095207, iteration: 460214
loss: 0.9809102416038513,grad_norm: 0.7123346835570611, iteration: 460215
loss: 1.063309669494629,grad_norm: 0.9999996501454291, iteration: 460216
loss: 1.0149445533752441,grad_norm: 0.8348267075578235, iteration: 460217
loss: 1.0123226642608643,grad_norm: 0.8743312298514809, iteration: 460218
loss: 1.0096181631088257,grad_norm: 0.6334622003938838, iteration: 460219
loss: 0.9622608423233032,grad_norm: 0.9999991384193767, iteration: 460220
loss: 0.9793165326118469,grad_norm: 0.7713126430310292, iteration: 460221
loss: 0.9734612703323364,grad_norm: 0.905239627195794, iteration: 460222
loss: 0.98216313123703,grad_norm: 0.606049319411041, iteration: 460223
loss: 1.0122013092041016,grad_norm: 0.711705543389526, iteration: 460224
loss: 1.0226279497146606,grad_norm: 0.8253696398407525, iteration: 460225
loss: 1.008438229560852,grad_norm: 0.7569558256895356, iteration: 460226
loss: 1.0154798030853271,grad_norm: 0.9769209592234394, iteration: 460227
loss: 0.9732317328453064,grad_norm: 0.714334650377429, iteration: 460228
loss: 0.9777134656906128,grad_norm: 0.6701873475262021, iteration: 460229
loss: 1.019256591796875,grad_norm: 0.7912134530747553, iteration: 460230
loss: 0.9985843896865845,grad_norm: 0.760532273551872, iteration: 460231
loss: 0.959998607635498,grad_norm: 0.7959310714373107, iteration: 460232
loss: 1.0613157749176025,grad_norm: 0.9999992364953458, iteration: 460233
loss: 0.9851680994033813,grad_norm: 0.7421135192468612, iteration: 460234
loss: 0.9637442231178284,grad_norm: 0.9999991335707338, iteration: 460235
loss: 0.9922525882720947,grad_norm: 0.868384977185458, iteration: 460236
loss: 1.0489596128463745,grad_norm: 0.9831085221321856, iteration: 460237
loss: 0.9905579090118408,grad_norm: 0.8471717138743905, iteration: 460238
loss: 1.0450072288513184,grad_norm: 0.7382209155387773, iteration: 460239
loss: 0.9687281847000122,grad_norm: 0.6916686068648775, iteration: 460240
loss: 1.0048129558563232,grad_norm: 0.8435359060387645, iteration: 460241
loss: 1.0177392959594727,grad_norm: 0.7689734170114972, iteration: 460242
loss: 1.018859624862671,grad_norm: 0.6558557602881585, iteration: 460243
loss: 0.9652354121208191,grad_norm: 0.7548099603016318, iteration: 460244
loss: 0.988877534866333,grad_norm: 0.6922285949053107, iteration: 460245
loss: 0.9894971251487732,grad_norm: 0.7483864632032177, iteration: 460246
loss: 1.0056824684143066,grad_norm: 0.7382664665080171, iteration: 460247
loss: 1.0004525184631348,grad_norm: 0.7340773759208025, iteration: 460248
loss: 1.0132811069488525,grad_norm: 0.7986388538984307, iteration: 460249
loss: 1.008418321609497,grad_norm: 0.8815169328768914, iteration: 460250
loss: 0.9979885816574097,grad_norm: 0.7961528649911785, iteration: 460251
loss: 1.0062875747680664,grad_norm: 0.6696496931170655, iteration: 460252
loss: 1.0195313692092896,grad_norm: 0.7100146479977439, iteration: 460253
loss: 1.0106372833251953,grad_norm: 0.8898882756390931, iteration: 460254
loss: 1.0076904296875,grad_norm: 0.8401428708051947, iteration: 460255
loss: 0.9788360595703125,grad_norm: 0.7207579679872874, iteration: 460256
loss: 1.0080722570419312,grad_norm: 0.7173212999867251, iteration: 460257
loss: 0.9914742112159729,grad_norm: 0.7354595576746674, iteration: 460258
loss: 0.9871315956115723,grad_norm: 0.8109596178819436, iteration: 460259
loss: 1.0077699422836304,grad_norm: 0.7205789201381102, iteration: 460260
loss: 0.9707539677619934,grad_norm: 0.8268329558556696, iteration: 460261
loss: 0.9879938364028931,grad_norm: 0.6623280726803215, iteration: 460262
loss: 1.0372194051742554,grad_norm: 0.7526944447021724, iteration: 460263
loss: 1.0169132947921753,grad_norm: 0.8114557732854445, iteration: 460264
loss: 0.996730625629425,grad_norm: 0.7554756039410357, iteration: 460265
loss: 1.009716272354126,grad_norm: 0.7643535623389013, iteration: 460266
loss: 1.0090620517730713,grad_norm: 0.9051550653960326, iteration: 460267
loss: 1.0086678266525269,grad_norm: 0.7436198965871752, iteration: 460268
loss: 0.9865392446517944,grad_norm: 0.7766342345805094, iteration: 460269
loss: 0.9882023334503174,grad_norm: 0.853572407608749, iteration: 460270
loss: 0.9909151792526245,grad_norm: 0.7270249684665492, iteration: 460271
loss: 1.0233135223388672,grad_norm: 0.6297908707323181, iteration: 460272
loss: 1.0114976167678833,grad_norm: 0.734577095082141, iteration: 460273
loss: 1.0002418756484985,grad_norm: 0.7182048348511811, iteration: 460274
loss: 1.0119026899337769,grad_norm: 0.913156735559527, iteration: 460275
loss: 1.0066633224487305,grad_norm: 0.9243986278982932, iteration: 460276
loss: 0.9746764302253723,grad_norm: 0.8169751323594094, iteration: 460277
loss: 0.9867867827415466,grad_norm: 0.7017067283802915, iteration: 460278
loss: 1.0475772619247437,grad_norm: 0.84750129608952, iteration: 460279
loss: 0.997569739818573,grad_norm: 0.6635926534864762, iteration: 460280
loss: 1.0221107006072998,grad_norm: 0.7011581935868144, iteration: 460281
loss: 1.0342867374420166,grad_norm: 0.8526160891118361, iteration: 460282
loss: 0.9946836829185486,grad_norm: 0.9999995769007037, iteration: 460283
loss: 0.9632471799850464,grad_norm: 0.8380462577289586, iteration: 460284
loss: 0.9851312041282654,grad_norm: 0.7387361854146278, iteration: 460285
loss: 0.9919811487197876,grad_norm: 0.7212996420232901, iteration: 460286
loss: 0.974129319190979,grad_norm: 0.9224513506440083, iteration: 460287
loss: 1.022985577583313,grad_norm: 0.7271283198667158, iteration: 460288
loss: 0.9901493787765503,grad_norm: 0.8166800206950815, iteration: 460289
loss: 1.0098140239715576,grad_norm: 0.9662065472862065, iteration: 460290
loss: 0.9927434325218201,grad_norm: 0.7915341991718514, iteration: 460291
loss: 0.9727720022201538,grad_norm: 0.6731438283929567, iteration: 460292
loss: 0.9774266481399536,grad_norm: 0.6695148935673397, iteration: 460293
loss: 1.0363091230392456,grad_norm: 0.6881410155921632, iteration: 460294
loss: 1.035395622253418,grad_norm: 0.7869331581001787, iteration: 460295
loss: 0.9872838854789734,grad_norm: 0.6659063819614975, iteration: 460296
loss: 0.986655592918396,grad_norm: 0.8200076624059229, iteration: 460297
loss: 1.0373684167861938,grad_norm: 0.8010214804059335, iteration: 460298
loss: 1.000943899154663,grad_norm: 0.8128471718727476, iteration: 460299
loss: 0.9791167378425598,grad_norm: 0.9339520771289841, iteration: 460300
loss: 0.9957923293113708,grad_norm: 0.7834554925017375, iteration: 460301
loss: 0.9380635619163513,grad_norm: 0.7740224993885448, iteration: 460302
loss: 0.9905003905296326,grad_norm: 0.8712899841713312, iteration: 460303
loss: 1.0078717470169067,grad_norm: 0.7731260769850782, iteration: 460304
loss: 1.0222007036209106,grad_norm: 0.7144709164544567, iteration: 460305
loss: 0.9797120690345764,grad_norm: 0.8475049703571998, iteration: 460306
loss: 1.0249216556549072,grad_norm: 0.747001242442745, iteration: 460307
loss: 0.9747868180274963,grad_norm: 0.8006057875147882, iteration: 460308
loss: 0.9880647659301758,grad_norm: 0.6492434270787601, iteration: 460309
loss: 1.0181256532669067,grad_norm: 0.8009099463504702, iteration: 460310
loss: 0.9980014562606812,grad_norm: 0.9084016053402527, iteration: 460311
loss: 0.9639649987220764,grad_norm: 0.7345761901766988, iteration: 460312
loss: 0.9469665288925171,grad_norm: 0.6344296125686862, iteration: 460313
loss: 0.9898288249969482,grad_norm: 0.9848525812274447, iteration: 460314
loss: 1.0233436822891235,grad_norm: 0.7181071155366967, iteration: 460315
loss: 1.0072649717330933,grad_norm: 0.8635170492575555, iteration: 460316
loss: 1.0321803092956543,grad_norm: 0.7192097144434471, iteration: 460317
loss: 0.9871754050254822,grad_norm: 0.732400684730554, iteration: 460318
loss: 0.9827650785446167,grad_norm: 0.8548958844376195, iteration: 460319
loss: 0.9926868081092834,grad_norm: 0.8804018417372595, iteration: 460320
loss: 0.9790025353431702,grad_norm: 0.6533061384262262, iteration: 460321
loss: 1.007825255393982,grad_norm: 0.9999990383439483, iteration: 460322
loss: 1.0115362405776978,grad_norm: 0.7115624932959246, iteration: 460323
loss: 1.0035667419433594,grad_norm: 0.6976008468524442, iteration: 460324
loss: 0.9997844696044922,grad_norm: 0.7541261428010309, iteration: 460325
loss: 0.9984484910964966,grad_norm: 0.7948738027062251, iteration: 460326
loss: 0.9851046204566956,grad_norm: 0.7733363252930635, iteration: 460327
loss: 1.0389031171798706,grad_norm: 0.7706428019267758, iteration: 460328
loss: 0.9852380752563477,grad_norm: 0.6725310699863263, iteration: 460329
loss: 0.9807947874069214,grad_norm: 0.7957995865289142, iteration: 460330
loss: 0.9984481334686279,grad_norm: 0.7224346269330089, iteration: 460331
loss: 1.0185606479644775,grad_norm: 0.7599114616521908, iteration: 460332
loss: 1.0074586868286133,grad_norm: 0.7284686641148108, iteration: 460333
loss: 0.9995051622390747,grad_norm: 0.7715008893908467, iteration: 460334
loss: 1.005359172821045,grad_norm: 0.7920814402506354, iteration: 460335
loss: 1.001393437385559,grad_norm: 0.8464459387646257, iteration: 460336
loss: 0.9670286774635315,grad_norm: 0.7739486555997444, iteration: 460337
loss: 0.9941555857658386,grad_norm: 0.9999994267108916, iteration: 460338
loss: 0.985029399394989,grad_norm: 0.7382919323486771, iteration: 460339
loss: 1.0028979778289795,grad_norm: 0.9999990262571232, iteration: 460340
loss: 0.9878008365631104,grad_norm: 0.7833901971864438, iteration: 460341
loss: 1.1711022853851318,grad_norm: 0.7923284337989196, iteration: 460342
loss: 0.9773571491241455,grad_norm: 0.7841510598046834, iteration: 460343
loss: 0.9859975576400757,grad_norm: 0.6742998098113315, iteration: 460344
loss: 1.0039894580841064,grad_norm: 0.8234345360545319, iteration: 460345
loss: 1.0202916860580444,grad_norm: 0.7117157987830386, iteration: 460346
loss: 1.0246306657791138,grad_norm: 0.8237534590471293, iteration: 460347
loss: 0.9918704628944397,grad_norm: 0.8615126823375632, iteration: 460348
loss: 0.9784317016601562,grad_norm: 0.9999990055257604, iteration: 460349
loss: 1.0970430374145508,grad_norm: 0.9999990826504269, iteration: 460350
loss: 1.0182063579559326,grad_norm: 0.9003114572980572, iteration: 460351
loss: 1.0291712284088135,grad_norm: 0.7486122598499957, iteration: 460352
loss: 1.0068321228027344,grad_norm: 0.8107707236881851, iteration: 460353
loss: 1.0417132377624512,grad_norm: 0.8328749206867744, iteration: 460354
loss: 1.0159883499145508,grad_norm: 0.9093544620637541, iteration: 460355
loss: 1.0613651275634766,grad_norm: 0.8444402599547297, iteration: 460356
loss: 1.0118651390075684,grad_norm: 0.7759949996210722, iteration: 460357
loss: 0.9981722831726074,grad_norm: 0.9999994394181716, iteration: 460358
loss: 0.9898087382316589,grad_norm: 0.9067003847154221, iteration: 460359
loss: 0.9866994619369507,grad_norm: 0.7395009897947817, iteration: 460360
loss: 0.9959282875061035,grad_norm: 0.6569402369509035, iteration: 460361
loss: 1.008328914642334,grad_norm: 0.8625922169483334, iteration: 460362
loss: 1.0030268430709839,grad_norm: 0.7427618941797889, iteration: 460363
loss: 0.9882422685623169,grad_norm: 0.7396921226489099, iteration: 460364
loss: 0.9744982123374939,grad_norm: 0.567299370973676, iteration: 460365
loss: 0.9777497053146362,grad_norm: 0.9715209454767575, iteration: 460366
loss: 1.0416457653045654,grad_norm: 0.7512677665822561, iteration: 460367
loss: 0.9847763776779175,grad_norm: 0.7595132899179337, iteration: 460368
loss: 1.0015519857406616,grad_norm: 0.9999990742561717, iteration: 460369
loss: 1.0028166770935059,grad_norm: 0.7492026257032236, iteration: 460370
loss: 0.9843838810920715,grad_norm: 0.8897264023914669, iteration: 460371
loss: 0.9991961121559143,grad_norm: 0.7121672472407657, iteration: 460372
loss: 1.0084776878356934,grad_norm: 0.6946816842060196, iteration: 460373
loss: 1.0073717832565308,grad_norm: 0.741123518582868, iteration: 460374
loss: 0.9630231857299805,grad_norm: 0.7032068906863728, iteration: 460375
loss: 0.9940940737724304,grad_norm: 0.6313060498973843, iteration: 460376
loss: 0.9812790155410767,grad_norm: 0.6951104782114959, iteration: 460377
loss: 1.0038434267044067,grad_norm: 0.8031984105159813, iteration: 460378
loss: 0.9924987554550171,grad_norm: 0.9999995423242376, iteration: 460379
loss: 1.0242860317230225,grad_norm: 0.7912766156488533, iteration: 460380
loss: 1.0526169538497925,grad_norm: 0.9999990866926074, iteration: 460381
loss: 1.0089508295059204,grad_norm: 0.6252490221316971, iteration: 460382
loss: 0.9800685048103333,grad_norm: 0.6667706558891928, iteration: 460383
loss: 1.0035213232040405,grad_norm: 0.6520380283718742, iteration: 460384
loss: 1.0337128639221191,grad_norm: 0.9999995034765824, iteration: 460385
loss: 1.0346896648406982,grad_norm: 0.9999997041107979, iteration: 460386
loss: 1.0085541009902954,grad_norm: 0.8097476428809776, iteration: 460387
loss: 1.0214025974273682,grad_norm: 0.9999992877495614, iteration: 460388
loss: 0.9727926254272461,grad_norm: 0.7881489870722521, iteration: 460389
loss: 0.9971003532409668,grad_norm: 0.5727207423448686, iteration: 460390
loss: 0.9886590838432312,grad_norm: 0.8345235931026399, iteration: 460391
loss: 0.9980166554450989,grad_norm: 0.7139472127215201, iteration: 460392
loss: 0.9980089664459229,grad_norm: 0.73323906670243, iteration: 460393
loss: 1.0731135606765747,grad_norm: 0.7674470169257174, iteration: 460394
loss: 1.0383961200714111,grad_norm: 0.7522516982035007, iteration: 460395
loss: 0.9842448830604553,grad_norm: 0.8355147763015263, iteration: 460396
loss: 1.0484631061553955,grad_norm: 0.7192013431117558, iteration: 460397
loss: 1.0232961177825928,grad_norm: 0.6354763900486375, iteration: 460398
loss: 0.9932246208190918,grad_norm: 0.786968459154823, iteration: 460399
loss: 0.9628899097442627,grad_norm: 0.7351994183097874, iteration: 460400
loss: 0.9738045334815979,grad_norm: 0.6995577788497679, iteration: 460401
loss: 1.0195252895355225,grad_norm: 0.8188542267554817, iteration: 460402
loss: 0.9654263854026794,grad_norm: 0.6987683596326995, iteration: 460403
loss: 1.014907717704773,grad_norm: 0.700655202331401, iteration: 460404
loss: 0.9663538932800293,grad_norm: 0.9999992560476104, iteration: 460405
loss: 1.002628207206726,grad_norm: 0.8216736174689387, iteration: 460406
loss: 1.012130618095398,grad_norm: 0.9999997119013904, iteration: 460407
loss: 1.028738260269165,grad_norm: 0.6826740150009741, iteration: 460408
loss: 0.9935094714164734,grad_norm: 0.7735471453397554, iteration: 460409
loss: 1.0646297931671143,grad_norm: 0.8132565956784537, iteration: 460410
loss: 1.000594973564148,grad_norm: 0.6397463123331895, iteration: 460411
loss: 1.0085475444793701,grad_norm: 0.9999990223506374, iteration: 460412
loss: 0.997747540473938,grad_norm: 0.751379608115328, iteration: 460413
loss: 1.01461923122406,grad_norm: 0.7824688194717867, iteration: 460414
loss: 1.0877856016159058,grad_norm: 0.8343137191621772, iteration: 460415
loss: 1.0140790939331055,grad_norm: 0.7436867483165716, iteration: 460416
loss: 0.9904388785362244,grad_norm: 0.7387494878134576, iteration: 460417
loss: 1.0116368532180786,grad_norm: 0.7518054402339709, iteration: 460418
loss: 0.9786970615386963,grad_norm: 0.7332927456692216, iteration: 460419
loss: 0.9888949394226074,grad_norm: 0.8119356001714029, iteration: 460420
loss: 1.0299110412597656,grad_norm: 0.9999990578759939, iteration: 460421
loss: 1.0085248947143555,grad_norm: 0.8295777484300739, iteration: 460422
loss: 0.9769973158836365,grad_norm: 0.777034386670477, iteration: 460423
loss: 1.0035419464111328,grad_norm: 0.8130690038859016, iteration: 460424
loss: 0.9672792553901672,grad_norm: 0.8492499630783336, iteration: 460425
loss: 1.014609456062317,grad_norm: 0.6930784837870887, iteration: 460426
loss: 0.9840651750564575,grad_norm: 0.8925423965851732, iteration: 460427
loss: 1.009352445602417,grad_norm: 0.7954724442198549, iteration: 460428
loss: 0.9797412157058716,grad_norm: 0.7191573754073277, iteration: 460429
loss: 0.9930988550186157,grad_norm: 0.7432754701956074, iteration: 460430
loss: 0.9674919247627258,grad_norm: 0.9295277417398441, iteration: 460431
loss: 1.0161014795303345,grad_norm: 0.9999994436540629, iteration: 460432
loss: 1.0152631998062134,grad_norm: 0.6520292841547336, iteration: 460433
loss: 0.9743480086326599,grad_norm: 0.7565607389597738, iteration: 460434
loss: 1.0034034252166748,grad_norm: 0.8381884485372404, iteration: 460435
loss: 0.979418158531189,grad_norm: 0.7553198774481281, iteration: 460436
loss: 0.9985731840133667,grad_norm: 0.850545125578962, iteration: 460437
loss: 0.9679596424102783,grad_norm: 0.7370251893039558, iteration: 460438
loss: 1.0188504457473755,grad_norm: 0.7104079186225151, iteration: 460439
loss: 1.003383994102478,grad_norm: 0.715691595652118, iteration: 460440
loss: 0.9997844099998474,grad_norm: 0.8177855648951654, iteration: 460441
loss: 1.0704325437545776,grad_norm: 0.9999999149507023, iteration: 460442
loss: 1.0292969942092896,grad_norm: 0.7418611398769467, iteration: 460443
loss: 1.025322437286377,grad_norm: 0.858488221831401, iteration: 460444
loss: 1.0204746723175049,grad_norm: 0.8916112116316702, iteration: 460445
loss: 1.0104104280471802,grad_norm: 0.8060201506684426, iteration: 460446
loss: 0.9432582259178162,grad_norm: 0.8557713808008804, iteration: 460447
loss: 0.9720024466514587,grad_norm: 0.7538481322295169, iteration: 460448
loss: 0.9809088706970215,grad_norm: 0.6894997838413987, iteration: 460449
loss: 0.9981442093849182,grad_norm: 0.7173994939081189, iteration: 460450
loss: 1.0139837265014648,grad_norm: 0.7908370020467409, iteration: 460451
loss: 0.9925023317337036,grad_norm: 0.818843143748215, iteration: 460452
loss: 0.967700719833374,grad_norm: 0.7072587797812595, iteration: 460453
loss: 1.1771525144577026,grad_norm: 0.9582346130183669, iteration: 460454
loss: 0.9718723893165588,grad_norm: 0.7717543230174412, iteration: 460455
loss: 1.0190719366073608,grad_norm: 0.8299751852821196, iteration: 460456
loss: 0.9803434610366821,grad_norm: 0.6415197450642866, iteration: 460457
loss: 1.0592749118804932,grad_norm: 0.9999990659771633, iteration: 460458
loss: 1.0226970911026,grad_norm: 0.717912091583354, iteration: 460459
loss: 1.024924635887146,grad_norm: 0.7841168093174539, iteration: 460460
loss: 1.006850004196167,grad_norm: 0.76463457222534, iteration: 460461
loss: 0.9909715056419373,grad_norm: 0.7608363777477807, iteration: 460462
loss: 1.0111324787139893,grad_norm: 0.6707234196758923, iteration: 460463
loss: 0.9742861390113831,grad_norm: 0.7136341956076296, iteration: 460464
loss: 0.9945769906044006,grad_norm: 0.7776257135237011, iteration: 460465
loss: 1.0293290615081787,grad_norm: 0.7912660059558541, iteration: 460466
loss: 0.9862537980079651,grad_norm: 0.9999995883987216, iteration: 460467
loss: 0.969091534614563,grad_norm: 0.765413600203961, iteration: 460468
loss: 1.0170587301254272,grad_norm: 0.9999990850050537, iteration: 460469
loss: 0.9566648602485657,grad_norm: 0.6866265561698062, iteration: 460470
loss: 0.9687724113464355,grad_norm: 0.8966004761294887, iteration: 460471
loss: 1.0763957500457764,grad_norm: 0.8063657936538229, iteration: 460472
loss: 0.997248113155365,grad_norm: 0.6963007082158635, iteration: 460473
loss: 1.0295627117156982,grad_norm: 0.8373653113433728, iteration: 460474
loss: 1.0042330026626587,grad_norm: 0.9999997046247728, iteration: 460475
loss: 0.9927094578742981,grad_norm: 0.8779515891718587, iteration: 460476
loss: 1.0259743928909302,grad_norm: 0.9999998812890728, iteration: 460477
loss: 0.9905449748039246,grad_norm: 0.8093321687958905, iteration: 460478
loss: 1.0301473140716553,grad_norm: 0.7193491809345236, iteration: 460479
loss: 0.9976199865341187,grad_norm: 0.8316197853096465, iteration: 460480
loss: 1.0433831214904785,grad_norm: 0.7477201247728475, iteration: 460481
loss: 1.0005133152008057,grad_norm: 0.7588602948038199, iteration: 460482
loss: 1.0334572792053223,grad_norm: 0.6864548977282673, iteration: 460483
loss: 0.9905639290809631,grad_norm: 0.629578717768906, iteration: 460484
loss: 0.9760668873786926,grad_norm: 0.837551529044475, iteration: 460485
loss: 0.9919862151145935,grad_norm: 0.8053886436895948, iteration: 460486
loss: 1.03963303565979,grad_norm: 0.7732037757769223, iteration: 460487
loss: 1.01264226436615,grad_norm: 0.8554644548119719, iteration: 460488
loss: 0.9778184294700623,grad_norm: 0.7354876770780736, iteration: 460489
loss: 1.0337549448013306,grad_norm: 0.9348547503442429, iteration: 460490
loss: 0.9923679828643799,grad_norm: 0.6857069728119541, iteration: 460491
loss: 0.9798319339752197,grad_norm: 0.8708719977805173, iteration: 460492
loss: 0.9653214812278748,grad_norm: 0.5959930462571541, iteration: 460493
loss: 1.0236870050430298,grad_norm: 0.956894135901759, iteration: 460494
loss: 0.9902553558349609,grad_norm: 0.7939291287983871, iteration: 460495
loss: 0.9839361906051636,grad_norm: 0.7546004724155039, iteration: 460496
loss: 1.007147192955017,grad_norm: 0.8502157889403249, iteration: 460497
loss: 0.9790706634521484,grad_norm: 0.7886499127433395, iteration: 460498
loss: 1.0079683065414429,grad_norm: 0.7339804271215624, iteration: 460499
loss: 1.0125453472137451,grad_norm: 0.6208373552627052, iteration: 460500
loss: 1.0075472593307495,grad_norm: 0.9861070363981804, iteration: 460501
loss: 1.0203295946121216,grad_norm: 0.724773387233695, iteration: 460502
loss: 1.0048274993896484,grad_norm: 0.9435150626418979, iteration: 460503
loss: 1.0153921842575073,grad_norm: 0.8577889780767564, iteration: 460504
loss: 0.9985568523406982,grad_norm: 0.9590598573303504, iteration: 460505
loss: 0.9790536165237427,grad_norm: 0.6986185595029267, iteration: 460506
loss: 1.0158607959747314,grad_norm: 0.6355059901181167, iteration: 460507
loss: 0.9726617932319641,grad_norm: 0.80976864333401, iteration: 460508
loss: 1.022003173828125,grad_norm: 0.5853077464597025, iteration: 460509
loss: 1.0449354648590088,grad_norm: 0.7743945769811619, iteration: 460510
loss: 1.002390742301941,grad_norm: 0.7215115422026198, iteration: 460511
loss: 1.0701254606246948,grad_norm: 0.9999995295023483, iteration: 460512
loss: 0.9903884530067444,grad_norm: 0.6891171404592447, iteration: 460513
loss: 0.9707028269767761,grad_norm: 0.8036667164194877, iteration: 460514
loss: 0.9525858759880066,grad_norm: 0.7565713384943276, iteration: 460515
loss: 0.9876189231872559,grad_norm: 0.9123908519067366, iteration: 460516
loss: 0.9776214957237244,grad_norm: 0.800585461616996, iteration: 460517
loss: 0.9866877198219299,grad_norm: 0.711228794467204, iteration: 460518
loss: 0.9795393943786621,grad_norm: 0.83572553246653, iteration: 460519
loss: 0.9946528077125549,grad_norm: 0.8145809691388057, iteration: 460520
loss: 1.016638159751892,grad_norm: 0.7148167578521609, iteration: 460521
loss: 0.9874330163002014,grad_norm: 0.6902253330105628, iteration: 460522
loss: 0.9599935412406921,grad_norm: 0.6821804410699166, iteration: 460523
loss: 1.012440800666809,grad_norm: 0.7878559486381045, iteration: 460524
loss: 0.9977463483810425,grad_norm: 0.7682261911858801, iteration: 460525
loss: 1.0197386741638184,grad_norm: 0.7549093207462492, iteration: 460526
loss: 1.0118883848190308,grad_norm: 0.8871935853298085, iteration: 460527
loss: 1.0098943710327148,grad_norm: 0.7437733225141869, iteration: 460528
loss: 1.0219225883483887,grad_norm: 0.9999994115917835, iteration: 460529
loss: 0.9964826107025146,grad_norm: 0.9999998821348813, iteration: 460530
loss: 0.967778205871582,grad_norm: 0.9523924064645904, iteration: 460531
loss: 0.9979270696640015,grad_norm: 0.695613712740645, iteration: 460532
loss: 1.003169059753418,grad_norm: 0.8435770603418802, iteration: 460533
loss: 1.0177807807922363,grad_norm: 0.8578233145203115, iteration: 460534
loss: 1.0201104879379272,grad_norm: 0.6818636272556405, iteration: 460535
loss: 1.0130267143249512,grad_norm: 0.7203566863818699, iteration: 460536
loss: 1.0128017663955688,grad_norm: 0.6090106320102494, iteration: 460537
loss: 1.0133399963378906,grad_norm: 0.9748277360493233, iteration: 460538
loss: 1.0175038576126099,grad_norm: 0.7015729859355948, iteration: 460539
loss: 1.0335453748703003,grad_norm: 0.8692952388134615, iteration: 460540
loss: 1.0257223844528198,grad_norm: 0.869856246438161, iteration: 460541
loss: 0.970361053943634,grad_norm: 0.703088037057463, iteration: 460542
loss: 0.9464824199676514,grad_norm: 0.8417402885817817, iteration: 460543
loss: 1.0796319246292114,grad_norm: 0.9999991542648757, iteration: 460544
loss: 0.9858740568161011,grad_norm: 0.7342806364372751, iteration: 460545
loss: 1.0160578489303589,grad_norm: 0.7637961276493686, iteration: 460546
loss: 0.9981104731559753,grad_norm: 0.7693100585249971, iteration: 460547
loss: 1.0078617334365845,grad_norm: 0.9999990894804502, iteration: 460548
loss: 1.0014053583145142,grad_norm: 0.6177433841014774, iteration: 460549
loss: 1.0155162811279297,grad_norm: 0.9335637014883094, iteration: 460550
loss: 0.983495831489563,grad_norm: 0.7810786241979264, iteration: 460551
loss: 0.9839252233505249,grad_norm: 0.7162521510899493, iteration: 460552
loss: 0.9591928124427795,grad_norm: 0.6735063989786698, iteration: 460553
loss: 0.9758353233337402,grad_norm: 0.7095777037056941, iteration: 460554
loss: 0.9800739288330078,grad_norm: 0.6722400285829833, iteration: 460555
loss: 0.9771270751953125,grad_norm: 0.8635582913026405, iteration: 460556
loss: 0.9968414902687073,grad_norm: 0.6470903857756686, iteration: 460557
loss: 1.0238381624221802,grad_norm: 0.9758383944801821, iteration: 460558
loss: 0.996922492980957,grad_norm: 0.6832152691391772, iteration: 460559
loss: 1.0004900693893433,grad_norm: 0.8043802354156337, iteration: 460560
loss: 1.0354700088500977,grad_norm: 0.8506731083989285, iteration: 460561
loss: 1.0260835886001587,grad_norm: 0.7969361919985832, iteration: 460562
loss: 1.0352035760879517,grad_norm: 0.8294615732202555, iteration: 460563
loss: 0.9933279752731323,grad_norm: 0.7283996047245487, iteration: 460564
loss: 0.9865348935127258,grad_norm: 0.6654515071890801, iteration: 460565
loss: 0.9808638095855713,grad_norm: 0.6971548453758598, iteration: 460566
loss: 0.9868771433830261,grad_norm: 0.8164505550125573, iteration: 460567
loss: 0.9712396860122681,grad_norm: 0.6389409323895109, iteration: 460568
loss: 1.0480585098266602,grad_norm: 0.7436939753665157, iteration: 460569
loss: 1.1905206441879272,grad_norm: 0.9999999965147733, iteration: 460570
loss: 0.9980489015579224,grad_norm: 0.7582312522830867, iteration: 460571
loss: 0.9968444108963013,grad_norm: 0.6694471828996886, iteration: 460572
loss: 0.9857536554336548,grad_norm: 0.7292876904933012, iteration: 460573
loss: 1.01405668258667,grad_norm: 0.6899449353046861, iteration: 460574
loss: 1.2104158401489258,grad_norm: 0.9999997447687424, iteration: 460575
loss: 1.1542872190475464,grad_norm: 0.9999999220408009, iteration: 460576
loss: 1.0310362577438354,grad_norm: 0.7750477272256725, iteration: 460577
loss: 1.0255656242370605,grad_norm: 0.7568391957316312, iteration: 460578
loss: 1.0695301294326782,grad_norm: 0.9999998837050202, iteration: 460579
loss: 0.9857901930809021,grad_norm: 0.6871779956554115, iteration: 460580
loss: 1.437586784362793,grad_norm: 0.9999999584137693, iteration: 460581
loss: 1.0106592178344727,grad_norm: 0.8528237445795809, iteration: 460582
loss: 1.184153437614441,grad_norm: 0.9999996189449158, iteration: 460583
loss: 1.0057116746902466,grad_norm: 0.9861089397990743, iteration: 460584
loss: 0.9845337867736816,grad_norm: 0.7929496936306022, iteration: 460585
loss: 0.9986271858215332,grad_norm: 0.7981233719608062, iteration: 460586
loss: 1.0067979097366333,grad_norm: 0.9111822675033273, iteration: 460587
loss: 1.0500181913375854,grad_norm: 0.9999989695711121, iteration: 460588
loss: 1.0198801755905151,grad_norm: 0.7852010212351388, iteration: 460589
loss: 1.0365172624588013,grad_norm: 0.7456882866992389, iteration: 460590
loss: 0.9857014417648315,grad_norm: 0.7185373109046594, iteration: 460591
loss: 0.9860050082206726,grad_norm: 0.9149231811509453, iteration: 460592
loss: 0.9882654547691345,grad_norm: 0.7467614213271576, iteration: 460593
loss: 1.0539506673812866,grad_norm: 0.9999999091544037, iteration: 460594
loss: 0.997643768787384,grad_norm: 0.7990547383958002, iteration: 460595
loss: 0.982387363910675,grad_norm: 0.690415337083117, iteration: 460596
loss: 1.0028417110443115,grad_norm: 0.6049849553753248, iteration: 460597
loss: 0.9663296937942505,grad_norm: 0.871249391119223, iteration: 460598
loss: 0.9843516945838928,grad_norm: 0.8145498872828589, iteration: 460599
loss: 1.0067670345306396,grad_norm: 0.7026140199296724, iteration: 460600
loss: 1.0286128520965576,grad_norm: 0.9999995075699843, iteration: 460601
loss: 1.0040068626403809,grad_norm: 0.9999999176141869, iteration: 460602
loss: 0.9993281364440918,grad_norm: 0.9415987376831056, iteration: 460603
loss: 0.9966787099838257,grad_norm: 0.9027127137794607, iteration: 460604
loss: 1.0333927869796753,grad_norm: 0.9999994349310809, iteration: 460605
loss: 0.9900506138801575,grad_norm: 0.7006979993512967, iteration: 460606
loss: 0.9934465289115906,grad_norm: 0.9999991560602586, iteration: 460607
loss: 1.0156135559082031,grad_norm: 0.9728944742148763, iteration: 460608
loss: 0.9776052236557007,grad_norm: 0.9999996793839362, iteration: 460609
loss: 1.0091184377670288,grad_norm: 0.8280604303705325, iteration: 460610
loss: 1.0150102376937866,grad_norm: 0.9518214438920083, iteration: 460611
loss: 0.978972315788269,grad_norm: 0.8495608053964804, iteration: 460612
loss: 0.9927417039871216,grad_norm: 0.71609834783247, iteration: 460613
loss: 1.0469872951507568,grad_norm: 0.9999990797348088, iteration: 460614
loss: 1.0098568201065063,grad_norm: 0.9999991243202727, iteration: 460615
loss: 0.951644241809845,grad_norm: 0.999999107328349, iteration: 460616
loss: 0.9879946112632751,grad_norm: 0.7514866980437155, iteration: 460617
loss: 1.0035003423690796,grad_norm: 0.7800631271857275, iteration: 460618
loss: 0.9842529892921448,grad_norm: 0.7436543696583708, iteration: 460619
loss: 0.9975259304046631,grad_norm: 0.7396276267887022, iteration: 460620
loss: 1.0189064741134644,grad_norm: 0.8596012112389637, iteration: 460621
loss: 0.9663301110267639,grad_norm: 0.7511015163196508, iteration: 460622
loss: 1.0490607023239136,grad_norm: 0.8766634736941489, iteration: 460623
loss: 1.0031471252441406,grad_norm: 0.7919749110873764, iteration: 460624
loss: 1.0010056495666504,grad_norm: 0.7848526749646128, iteration: 460625
loss: 1.0375785827636719,grad_norm: 0.9999995972649692, iteration: 460626
loss: 0.9810842275619507,grad_norm: 0.7309007466582995, iteration: 460627
loss: 0.9886274933815002,grad_norm: 0.7561807438909751, iteration: 460628
loss: 1.0319656133651733,grad_norm: 0.8274877251244624, iteration: 460629
loss: 0.9662184119224548,grad_norm: 0.8303506562674033, iteration: 460630
loss: 0.9925087094306946,grad_norm: 0.9194082065870132, iteration: 460631
loss: 0.9660210609436035,grad_norm: 0.6995727186788366, iteration: 460632
loss: 0.958171010017395,grad_norm: 0.9549772668673735, iteration: 460633
loss: 0.9782503247261047,grad_norm: 0.7921231234367043, iteration: 460634
loss: 1.0505374670028687,grad_norm: 0.9999990112765726, iteration: 460635
loss: 1.0190088748931885,grad_norm: 0.7256739389260286, iteration: 460636
loss: 0.9944719672203064,grad_norm: 0.9178261982716568, iteration: 460637
loss: 1.0187026262283325,grad_norm: 0.846956373909254, iteration: 460638
loss: 0.9730737209320068,grad_norm: 0.7754716621082204, iteration: 460639
loss: 1.0171990394592285,grad_norm: 0.8257354645613225, iteration: 460640
loss: 1.0115547180175781,grad_norm: 0.7748359095330384, iteration: 460641
loss: 0.959185779094696,grad_norm: 0.7227145757388856, iteration: 460642
loss: 0.9736711382865906,grad_norm: 0.7540494216478236, iteration: 460643
loss: 0.9661298990249634,grad_norm: 0.9143252102159468, iteration: 460644
loss: 1.0193430185317993,grad_norm: 0.7140002133439334, iteration: 460645
loss: 1.0026756525039673,grad_norm: 0.7639755652235898, iteration: 460646
loss: 1.0184149742126465,grad_norm: 0.9999990669365664, iteration: 460647
loss: 1.023102879524231,grad_norm: 0.7514288995946066, iteration: 460648
loss: 0.9991208910942078,grad_norm: 0.6780414351442671, iteration: 460649
loss: 0.9848960041999817,grad_norm: 0.8069212974333833, iteration: 460650
loss: 0.9799605011940002,grad_norm: 0.9999991432863808, iteration: 460651
loss: 0.9552319049835205,grad_norm: 0.7228803903807904, iteration: 460652
loss: 0.9865654110908508,grad_norm: 0.7186627508664772, iteration: 460653
loss: 1.0100672245025635,grad_norm: 0.7794040608187106, iteration: 460654
loss: 1.0014618635177612,grad_norm: 0.6866698439380596, iteration: 460655
loss: 1.0056548118591309,grad_norm: 0.8020123796995547, iteration: 460656
loss: 0.9791643023490906,grad_norm: 0.774387139450398, iteration: 460657
loss: 1.032052755355835,grad_norm: 0.8256794236546766, iteration: 460658
loss: 0.98995041847229,grad_norm: 0.9776068311468651, iteration: 460659
loss: 1.0053023099899292,grad_norm: 0.9999992495183299, iteration: 460660
loss: 1.007228136062622,grad_norm: 0.7895093306340172, iteration: 460661
loss: 0.9977770447731018,grad_norm: 0.7257527741901357, iteration: 460662
loss: 0.9967743158340454,grad_norm: 0.6521117182799112, iteration: 460663
loss: 0.9614683389663696,grad_norm: 0.7520574542848928, iteration: 460664
loss: 1.028993844985962,grad_norm: 0.7433690480327921, iteration: 460665
loss: 0.9493497014045715,grad_norm: 0.8023352591644178, iteration: 460666
loss: 1.000496745109558,grad_norm: 0.6548515959026332, iteration: 460667
loss: 1.0347408056259155,grad_norm: 0.8751066968509792, iteration: 460668
loss: 1.031476378440857,grad_norm: 0.999999430808322, iteration: 460669
loss: 1.0051764249801636,grad_norm: 0.7402257797128932, iteration: 460670
loss: 0.9937568306922913,grad_norm: 0.8384064421111963, iteration: 460671
loss: 1.0173707008361816,grad_norm: 0.845280745030815, iteration: 460672
loss: 0.9967385530471802,grad_norm: 0.7873752528504928, iteration: 460673
loss: 1.013736605644226,grad_norm: 0.8979351440726143, iteration: 460674
loss: 1.0001235008239746,grad_norm: 0.7882015429052148, iteration: 460675
loss: 0.9788936972618103,grad_norm: 0.7214139908235493, iteration: 460676
loss: 1.0186148881912231,grad_norm: 0.7135224765120135, iteration: 460677
loss: 1.0214132070541382,grad_norm: 0.7794169052563196, iteration: 460678
loss: 0.9959309697151184,grad_norm: 0.6764119306442805, iteration: 460679
loss: 1.0003300905227661,grad_norm: 0.78549719585691, iteration: 460680
loss: 1.004652976989746,grad_norm: 0.8866400223629698, iteration: 460681
loss: 1.0276466608047485,grad_norm: 0.7847271647164189, iteration: 460682
loss: 1.0436736345291138,grad_norm: 0.9494540968288028, iteration: 460683
loss: 1.0230971574783325,grad_norm: 0.8962785293891576, iteration: 460684
loss: 0.9680539965629578,grad_norm: 0.7197441563401301, iteration: 460685
loss: 1.003454566001892,grad_norm: 0.8644563064945833, iteration: 460686
loss: 0.9862167835235596,grad_norm: 0.8200505021687983, iteration: 460687
loss: 1.018119215965271,grad_norm: 0.7498329118071737, iteration: 460688
loss: 0.9953086376190186,grad_norm: 0.97607932538204, iteration: 460689
loss: 0.9978537559509277,grad_norm: 0.8092920722659884, iteration: 460690
loss: 1.0078624486923218,grad_norm: 0.9808076824489496, iteration: 460691
loss: 0.9757041335105896,grad_norm: 0.9086915004245795, iteration: 460692
loss: 1.0039269924163818,grad_norm: 0.6965244995543086, iteration: 460693
loss: 0.9930916428565979,grad_norm: 0.8863792829043339, iteration: 460694
loss: 1.0285264253616333,grad_norm: 0.8467179374823586, iteration: 460695
loss: 1.0057650804519653,grad_norm: 0.7788084824675169, iteration: 460696
loss: 1.003737211227417,grad_norm: 0.7067558702339123, iteration: 460697
loss: 0.9972390532493591,grad_norm: 0.999999717610023, iteration: 460698
loss: 1.0110859870910645,grad_norm: 0.7468378466459191, iteration: 460699
loss: 1.017927646636963,grad_norm: 0.6455966471054104, iteration: 460700
loss: 0.9953984618186951,grad_norm: 0.9659951315513908, iteration: 460701
loss: 1.0185397863388062,grad_norm: 0.6866950565200189, iteration: 460702
loss: 0.9967241287231445,grad_norm: 0.8154067175042415, iteration: 460703
loss: 0.9749141335487366,grad_norm: 0.6119885855062349, iteration: 460704
loss: 0.9856491684913635,grad_norm: 0.8170669260348364, iteration: 460705
loss: 1.01789391040802,grad_norm: 0.9554822399300354, iteration: 460706
loss: 0.9878844618797302,grad_norm: 0.6570549955532449, iteration: 460707
loss: 0.9846271276473999,grad_norm: 0.7823870589488489, iteration: 460708
loss: 1.1852720975875854,grad_norm: 0.8956208085847158, iteration: 460709
loss: 0.9955455660820007,grad_norm: 0.7119654005525061, iteration: 460710
loss: 0.9585342407226562,grad_norm: 0.9198694948534873, iteration: 460711
loss: 0.9937727451324463,grad_norm: 0.6609161904877368, iteration: 460712
loss: 1.0137553215026855,grad_norm: 0.679466351635645, iteration: 460713
loss: 1.0196887254714966,grad_norm: 0.6908836485414438, iteration: 460714
loss: 0.9991996884346008,grad_norm: 0.794734797815658, iteration: 460715
loss: 1.0083677768707275,grad_norm: 0.7472164661256165, iteration: 460716
loss: 0.9797666668891907,grad_norm: 0.9752309425572666, iteration: 460717
loss: 1.0046319961547852,grad_norm: 0.8825983401903671, iteration: 460718
loss: 1.0080268383026123,grad_norm: 0.8225534885708314, iteration: 460719
loss: 1.0203200578689575,grad_norm: 0.8111328814030305, iteration: 460720
loss: 1.0320595502853394,grad_norm: 0.6844440382477505, iteration: 460721
loss: 1.0075864791870117,grad_norm: 0.6711015682003556, iteration: 460722
loss: 1.001740574836731,grad_norm: 0.6611593106590141, iteration: 460723
loss: 0.9613116383552551,grad_norm: 0.729745134752984, iteration: 460724
loss: 1.0095431804656982,grad_norm: 0.8926332289167922, iteration: 460725
loss: 0.9775378704071045,grad_norm: 0.8113594957747973, iteration: 460726
loss: 0.9880490303039551,grad_norm: 0.8075887937500416, iteration: 460727
loss: 0.9578523635864258,grad_norm: 0.8716828282182848, iteration: 460728
loss: 1.0287399291992188,grad_norm: 0.6926565645335009, iteration: 460729
loss: 0.9941602349281311,grad_norm: 0.7440164693651392, iteration: 460730
loss: 1.024454951286316,grad_norm: 0.9721774555976161, iteration: 460731
loss: 0.99195396900177,grad_norm: 0.8076377490163634, iteration: 460732
loss: 0.9462981820106506,grad_norm: 0.7135033146279233, iteration: 460733
loss: 1.019038200378418,grad_norm: 0.727880052106485, iteration: 460734
loss: 0.9412495493888855,grad_norm: 0.8116951315468067, iteration: 460735
loss: 1.0095360279083252,grad_norm: 0.7509873597104353, iteration: 460736
loss: 0.9638216495513916,grad_norm: 0.7463444242341895, iteration: 460737
loss: 1.0253010988235474,grad_norm: 0.7669925000606802, iteration: 460738
loss: 1.0159294605255127,grad_norm: 0.9999997571516679, iteration: 460739
loss: 1.0908477306365967,grad_norm: 0.9999997137363599, iteration: 460740
loss: 0.9813459515571594,grad_norm: 0.7226139681410297, iteration: 460741
loss: 1.0034397840499878,grad_norm: 0.7612813622504326, iteration: 460742
loss: 1.0097198486328125,grad_norm: 0.7880301953831095, iteration: 460743
loss: 1.0071173906326294,grad_norm: 0.8859712645257402, iteration: 460744
loss: 0.9908302426338196,grad_norm: 0.8613313338658644, iteration: 460745
loss: 1.023446798324585,grad_norm: 0.6945762158121382, iteration: 460746
loss: 1.0780110359191895,grad_norm: 0.7630430353596597, iteration: 460747
loss: 1.00289785861969,grad_norm: 0.7854353433909183, iteration: 460748
loss: 0.971560001373291,grad_norm: 0.7984965378310239, iteration: 460749
loss: 1.002641201019287,grad_norm: 0.622133014554823, iteration: 460750
loss: 1.0597537755966187,grad_norm: 0.999999094590459, iteration: 460751
loss: 0.9982274770736694,grad_norm: 0.6140112333622958, iteration: 460752
loss: 0.9834614396095276,grad_norm: 0.6654725903923772, iteration: 460753
loss: 1.0029290914535522,grad_norm: 0.708576265752097, iteration: 460754
loss: 0.97779780626297,grad_norm: 0.7218946962621703, iteration: 460755
loss: 0.9977906346321106,grad_norm: 0.6768568493473038, iteration: 460756
loss: 1.0037620067596436,grad_norm: 0.6809346842347529, iteration: 460757
loss: 1.106840968132019,grad_norm: 0.9999999394179644, iteration: 460758
loss: 1.0245823860168457,grad_norm: 0.6984253184348581, iteration: 460759
loss: 0.9667831063270569,grad_norm: 0.8760170285976946, iteration: 460760
loss: 0.9647524952888489,grad_norm: 0.704318404100286, iteration: 460761
loss: 0.9889732599258423,grad_norm: 0.8946722591988471, iteration: 460762
loss: 1.0175813436508179,grad_norm: 0.7924912808560369, iteration: 460763
loss: 1.0212126970291138,grad_norm: 0.9056913361895002, iteration: 460764
loss: 1.0102707147598267,grad_norm: 0.7691680168145805, iteration: 460765
loss: 1.0066758394241333,grad_norm: 0.6745147979780582, iteration: 460766
loss: 0.9701789617538452,grad_norm: 0.9008265122803008, iteration: 460767
loss: 1.007068157196045,grad_norm: 0.750683407059899, iteration: 460768
loss: 0.9533355236053467,grad_norm: 0.7848698689489202, iteration: 460769
loss: 0.9934750199317932,grad_norm: 0.7364551679940369, iteration: 460770
loss: 1.0226991176605225,grad_norm: 0.743069949123589, iteration: 460771
loss: 1.0243440866470337,grad_norm: 0.7234823833866144, iteration: 460772
loss: 0.9924956560134888,grad_norm: 0.7548250598106035, iteration: 460773
loss: 0.9656504988670349,grad_norm: 0.8271211896865512, iteration: 460774
loss: 0.973764181137085,grad_norm: 0.6887356985530864, iteration: 460775
loss: 1.0170633792877197,grad_norm: 0.8186234173864488, iteration: 460776
loss: 1.0600390434265137,grad_norm: 0.886911295176173, iteration: 460777
loss: 0.9677892327308655,grad_norm: 0.7377884774995184, iteration: 460778
loss: 1.0348551273345947,grad_norm: 0.8126396740100974, iteration: 460779
loss: 0.9731249213218689,grad_norm: 0.9007843740640809, iteration: 460780
loss: 0.9937117695808411,grad_norm: 0.741355799372834, iteration: 460781
loss: 0.9886467456817627,grad_norm: 0.8140795287163297, iteration: 460782
loss: 0.9841547608375549,grad_norm: 0.733709198426032, iteration: 460783
loss: 1.013817310333252,grad_norm: 0.8549962750532688, iteration: 460784
loss: 0.9926042556762695,grad_norm: 0.6723914836281594, iteration: 460785
loss: 0.997107744216919,grad_norm: 0.6278966621216449, iteration: 460786
loss: 1.0346912145614624,grad_norm: 0.8203591107768501, iteration: 460787
loss: 1.0229977369308472,grad_norm: 0.7797994380772906, iteration: 460788
loss: 0.9971591830253601,grad_norm: 0.6405733023368658, iteration: 460789
loss: 1.0207080841064453,grad_norm: 0.7362780635244733, iteration: 460790
loss: 1.0113145112991333,grad_norm: 0.8265796588644586, iteration: 460791
loss: 0.9762498736381531,grad_norm: 0.727544097059529, iteration: 460792
loss: 0.9918686747550964,grad_norm: 0.9999991297373054, iteration: 460793
loss: 1.049228549003601,grad_norm: 0.9023991226646577, iteration: 460794
loss: 0.9770844578742981,grad_norm: 0.7349470378932257, iteration: 460795
loss: 0.9975458383560181,grad_norm: 0.7627173471518102, iteration: 460796
loss: 0.98944091796875,grad_norm: 0.6718866528862729, iteration: 460797
loss: 1.0229077339172363,grad_norm: 0.8329847430757596, iteration: 460798
loss: 1.0572090148925781,grad_norm: 0.8790796326487983, iteration: 460799
loss: 1.0411299467086792,grad_norm: 0.7297629876699429, iteration: 460800
loss: 0.9810940027236938,grad_norm: 0.8087102428116016, iteration: 460801
loss: 0.9995264410972595,grad_norm: 0.6623186484962689, iteration: 460802
loss: 1.0510369539260864,grad_norm: 0.7740162622117093, iteration: 460803
loss: 0.9698633551597595,grad_norm: 0.7087217672943937, iteration: 460804
loss: 0.9894554615020752,grad_norm: 0.7808700324933041, iteration: 460805
loss: 1.0411869287490845,grad_norm: 0.8858320383442396, iteration: 460806
loss: 1.0120244026184082,grad_norm: 0.7875709525869014, iteration: 460807
loss: 0.9903706908226013,grad_norm: 0.7331562795138842, iteration: 460808
loss: 1.0038845539093018,grad_norm: 0.8927874750193382, iteration: 460809
loss: 1.0034022331237793,grad_norm: 0.7009066533332547, iteration: 460810
loss: 0.9776462912559509,grad_norm: 0.7647998817133638, iteration: 460811
loss: 0.9904119372367859,grad_norm: 0.8946202836988278, iteration: 460812
loss: 1.0070871114730835,grad_norm: 0.8356783207617235, iteration: 460813
loss: 1.0153559446334839,grad_norm: 0.7352214827289842, iteration: 460814
loss: 1.0302246809005737,grad_norm: 0.8977711682773826, iteration: 460815
loss: 0.9651360511779785,grad_norm: 0.7023133558133507, iteration: 460816
loss: 1.0068767070770264,grad_norm: 0.7978194189316105, iteration: 460817
loss: 0.9756817817687988,grad_norm: 0.7171177784471662, iteration: 460818
loss: 0.9957647919654846,grad_norm: 0.8594068550176959, iteration: 460819
loss: 0.9432817697525024,grad_norm: 0.9999992463445849, iteration: 460820
loss: 1.0152757167816162,grad_norm: 0.6726878009471412, iteration: 460821
loss: 0.995337188243866,grad_norm: 0.7801952540917654, iteration: 460822
loss: 1.0201222896575928,grad_norm: 0.8947978001702301, iteration: 460823
loss: 1.010200023651123,grad_norm: 0.774614189211742, iteration: 460824
loss: 0.9889324307441711,grad_norm: 0.7284070048768839, iteration: 460825
loss: 1.0011788606643677,grad_norm: 0.7127893917639103, iteration: 460826
loss: 0.9952821135520935,grad_norm: 0.8291122390535036, iteration: 460827
loss: 0.9809511303901672,grad_norm: 0.999999679659157, iteration: 460828
loss: 1.000441551208496,grad_norm: 0.996811676429635, iteration: 460829
loss: 1.0499241352081299,grad_norm: 0.9999990714634137, iteration: 460830
loss: 0.9883134961128235,grad_norm: 0.6921840921348247, iteration: 460831
loss: 1.0408387184143066,grad_norm: 0.999999783195847, iteration: 460832
loss: 1.014320731163025,grad_norm: 0.712739263450246, iteration: 460833
loss: 1.0165008306503296,grad_norm: 0.9038891266674931, iteration: 460834
loss: 1.0089446306228638,grad_norm: 0.7826981970999857, iteration: 460835
loss: 1.0534693002700806,grad_norm: 0.8479048893733179, iteration: 460836
loss: 1.0417948961257935,grad_norm: 0.9999992883064793, iteration: 460837
loss: 0.9971385598182678,grad_norm: 0.6571195883623815, iteration: 460838
loss: 1.018273949623108,grad_norm: 0.9363138758837415, iteration: 460839
loss: 1.0246615409851074,grad_norm: 0.7966153902870453, iteration: 460840
loss: 0.9693698883056641,grad_norm: 0.7244820807764217, iteration: 460841
loss: 1.0014621019363403,grad_norm: 0.6601062716701644, iteration: 460842
loss: 1.0077091455459595,grad_norm: 0.8107085277882343, iteration: 460843
loss: 1.0095030069351196,grad_norm: 0.9379528263733278, iteration: 460844
loss: 1.0084741115570068,grad_norm: 0.7449331289991014, iteration: 460845
loss: 1.0184530019760132,grad_norm: 0.7376518778879707, iteration: 460846
loss: 1.0107643604278564,grad_norm: 0.7772751652636876, iteration: 460847
loss: 1.036936640739441,grad_norm: 0.8389278594928256, iteration: 460848
loss: 0.9959465265274048,grad_norm: 0.7564750870051462, iteration: 460849
loss: 0.9923973083496094,grad_norm: 0.8788839174373058, iteration: 460850
loss: 0.9499189853668213,grad_norm: 0.7638023144396275, iteration: 460851
loss: 0.9816262125968933,grad_norm: 0.7611194962224438, iteration: 460852
loss: 0.9931038022041321,grad_norm: 0.769472447015338, iteration: 460853
loss: 1.0124380588531494,grad_norm: 0.7495252384502863, iteration: 460854
loss: 0.9920728802680969,grad_norm: 0.6367919888586048, iteration: 460855
loss: 0.9794811010360718,grad_norm: 0.6331155546845197, iteration: 460856
loss: 0.97181236743927,grad_norm: 0.8477263695988904, iteration: 460857
loss: 0.987432062625885,grad_norm: 0.7621080218589641, iteration: 460858
loss: 1.0046617984771729,grad_norm: 0.6674083153059411, iteration: 460859
loss: 1.000778317451477,grad_norm: 0.9999998994136783, iteration: 460860
loss: 0.9875325560569763,grad_norm: 0.8481492583819162, iteration: 460861
loss: 1.0066626071929932,grad_norm: 0.749393621232291, iteration: 460862
loss: 1.007165789604187,grad_norm: 0.6939879489275546, iteration: 460863
loss: 0.9845211505889893,grad_norm: 0.9999991089059843, iteration: 460864
loss: 1.0175786018371582,grad_norm: 0.7693891452014475, iteration: 460865
loss: 1.0068575143814087,grad_norm: 0.68777510436775, iteration: 460866
loss: 0.9887501001358032,grad_norm: 0.8547783158331171, iteration: 460867
loss: 1.005035161972046,grad_norm: 0.8534800592643266, iteration: 460868
loss: 0.994598388671875,grad_norm: 0.7571807582332418, iteration: 460869
loss: 1.011898159980774,grad_norm: 0.7788639312947818, iteration: 460870
loss: 1.0110641717910767,grad_norm: 0.9999996083517408, iteration: 460871
loss: 0.9867369532585144,grad_norm: 0.8386263410532258, iteration: 460872
loss: 0.967487633228302,grad_norm: 0.6590721438794697, iteration: 460873
loss: 1.0124914646148682,grad_norm: 0.9184040140553509, iteration: 460874
loss: 0.9705122113227844,grad_norm: 0.6774966096903803, iteration: 460875
loss: 0.9756546020507812,grad_norm: 0.9010253186460048, iteration: 460876
loss: 1.0079604387283325,grad_norm: 0.6900897050869027, iteration: 460877
loss: 0.9781338572502136,grad_norm: 0.8037832513145985, iteration: 460878
loss: 1.0139967203140259,grad_norm: 0.704424861404805, iteration: 460879
loss: 0.9856129288673401,grad_norm: 0.7684284952367149, iteration: 460880
loss: 0.9714098572731018,grad_norm: 0.8172906630505569, iteration: 460881
loss: 0.9661037921905518,grad_norm: 0.6899869892562123, iteration: 460882
loss: 0.9794028997421265,grad_norm: 0.8237962365484632, iteration: 460883
loss: 1.0112112760543823,grad_norm: 0.845236016438699, iteration: 460884
loss: 0.9827015995979309,grad_norm: 0.7369711798096363, iteration: 460885
loss: 0.9629595279693604,grad_norm: 0.8118751115007954, iteration: 460886
loss: 0.9677706956863403,grad_norm: 0.676089519788469, iteration: 460887
loss: 0.9909121990203857,grad_norm: 0.6443841974345252, iteration: 460888
loss: 1.0211836099624634,grad_norm: 0.8540321871807975, iteration: 460889
loss: 0.9851442575454712,grad_norm: 0.7707864214181331, iteration: 460890
loss: 0.9864416122436523,grad_norm: 0.7000583569642675, iteration: 460891
loss: 0.9679661989212036,grad_norm: 0.8223960636648786, iteration: 460892
loss: 0.9291756749153137,grad_norm: 0.6776874499928728, iteration: 460893
loss: 1.0284847021102905,grad_norm: 0.9232162394989126, iteration: 460894
loss: 1.0145560503005981,grad_norm: 0.7133270431720725, iteration: 460895
loss: 0.9831278324127197,grad_norm: 0.7389731730113582, iteration: 460896
loss: 0.9910223484039307,grad_norm: 0.7650421091694233, iteration: 460897
loss: 0.9935048222541809,grad_norm: 0.9273643708034114, iteration: 460898
loss: 0.9971845149993896,grad_norm: 0.8080521351455375, iteration: 460899
loss: 1.0229843854904175,grad_norm: 0.8516199894217941, iteration: 460900
loss: 1.0035797357559204,grad_norm: 0.8161542028440986, iteration: 460901
loss: 0.972572386264801,grad_norm: 0.7824790781457092, iteration: 460902
loss: 0.9867495894432068,grad_norm: 0.7046846036539295, iteration: 460903
loss: 1.0161129236221313,grad_norm: 0.7607779643089675, iteration: 460904
loss: 1.013312816619873,grad_norm: 0.7144206065275475, iteration: 460905
loss: 1.0255086421966553,grad_norm: 0.8491267374964283, iteration: 460906
loss: 1.0180613994598389,grad_norm: 0.7632496850210075, iteration: 460907
loss: 0.9996564984321594,grad_norm: 0.8100778640893711, iteration: 460908
loss: 1.009242057800293,grad_norm: 0.6523635698526298, iteration: 460909
loss: 1.0001112222671509,grad_norm: 0.7113639043158431, iteration: 460910
loss: 1.0771688222885132,grad_norm: 0.9188565675527296, iteration: 460911
loss: 1.0631722211837769,grad_norm: 0.9999990987561719, iteration: 460912
loss: 1.0124003887176514,grad_norm: 0.7347239077282044, iteration: 460913
loss: 1.0226991176605225,grad_norm: 0.7570756002358325, iteration: 460914
loss: 1.0113972425460815,grad_norm: 0.7388253763637055, iteration: 460915
loss: 1.0300272703170776,grad_norm: 0.731503629157212, iteration: 460916
loss: 1.0409691333770752,grad_norm: 0.8663182110636461, iteration: 460917
loss: 0.9916926026344299,grad_norm: 0.680706253107057, iteration: 460918
loss: 0.9936522841453552,grad_norm: 0.7584106060024189, iteration: 460919
loss: 1.0273882150650024,grad_norm: 0.8108981989806183, iteration: 460920
loss: 1.0298036336898804,grad_norm: 0.80144173929847, iteration: 460921
loss: 0.9799737334251404,grad_norm: 0.821835810024429, iteration: 460922
loss: 0.9968530535697937,grad_norm: 0.6781321936490717, iteration: 460923
loss: 0.9885547161102295,grad_norm: 0.7702796085895358, iteration: 460924
loss: 1.0380899906158447,grad_norm: 1.00000001133039, iteration: 460925
loss: 0.9950358867645264,grad_norm: 0.7282139649409769, iteration: 460926
loss: 0.9780992865562439,grad_norm: 0.6261532753713673, iteration: 460927
loss: 0.9986836910247803,grad_norm: 0.8655006606963641, iteration: 460928
loss: 0.9717608094215393,grad_norm: 0.6760560546791856, iteration: 460929
loss: 0.9898728132247925,grad_norm: 0.6756242112776014, iteration: 460930
loss: 0.9967997670173645,grad_norm: 0.8881486069967723, iteration: 460931
loss: 0.9970252513885498,grad_norm: 0.9093981262098866, iteration: 460932
loss: 1.0106136798858643,grad_norm: 0.758328080172154, iteration: 460933
loss: 1.0180106163024902,grad_norm: 0.7101073913576803, iteration: 460934
loss: 0.9763094186782837,grad_norm: 0.8643404621543482, iteration: 460935
loss: 0.9718886017799377,grad_norm: 0.6758293799614898, iteration: 460936
loss: 0.9819389581680298,grad_norm: 0.6950485734305325, iteration: 460937
loss: 1.0268372297286987,grad_norm: 0.8508171698699425, iteration: 460938
loss: 1.0149164199829102,grad_norm: 0.6639571644508514, iteration: 460939
loss: 1.0078346729278564,grad_norm: 0.7425848394549394, iteration: 460940
loss: 1.0180106163024902,grad_norm: 0.6743184202765901, iteration: 460941
loss: 1.0512666702270508,grad_norm: 0.726690593200221, iteration: 460942
loss: 0.990425169467926,grad_norm: 0.6718103506363172, iteration: 460943
loss: 1.0041730403900146,grad_norm: 0.9155129209907382, iteration: 460944
loss: 1.0239886045455933,grad_norm: 0.8700027490959226, iteration: 460945
loss: 1.0032801628112793,grad_norm: 0.7973993560975764, iteration: 460946
loss: 0.980353593826294,grad_norm: 0.6860369211739833, iteration: 460947
loss: 1.0532793998718262,grad_norm: 0.9999990613677086, iteration: 460948
loss: 0.998843789100647,grad_norm: 0.8281848386011418, iteration: 460949
loss: 1.017149806022644,grad_norm: 0.6950370258309684, iteration: 460950
loss: 1.097669005393982,grad_norm: 0.9999999289828969, iteration: 460951
loss: 1.0166290998458862,grad_norm: 0.7405195817781558, iteration: 460952
loss: 1.0255287885665894,grad_norm: 0.7776106952177881, iteration: 460953
loss: 1.0120176076889038,grad_norm: 0.7507120204233039, iteration: 460954
loss: 1.0353695154190063,grad_norm: 0.851395046092381, iteration: 460955
loss: 0.9905815124511719,grad_norm: 0.8618620527777174, iteration: 460956
loss: 0.9674887657165527,grad_norm: 0.7315378389544099, iteration: 460957
loss: 0.9849281907081604,grad_norm: 0.7296180236290735, iteration: 460958
loss: 0.9747847318649292,grad_norm: 0.8291529706441733, iteration: 460959
loss: 0.9946035742759705,grad_norm: 0.8072683450335835, iteration: 460960
loss: 0.9799180626869202,grad_norm: 0.7209337727794861, iteration: 460961
loss: 0.9536215662956238,grad_norm: 0.7450130177486227, iteration: 460962
loss: 1.0146055221557617,grad_norm: 0.771515701742806, iteration: 460963
loss: 1.0812735557556152,grad_norm: 0.9959522368040138, iteration: 460964
loss: 1.02198326587677,grad_norm: 0.6833144554888028, iteration: 460965
loss: 0.9679813981056213,grad_norm: 0.723668231926989, iteration: 460966
loss: 1.0230780839920044,grad_norm: 0.999998985999321, iteration: 460967
loss: 1.0554081201553345,grad_norm: 0.8138116873709008, iteration: 460968
loss: 0.9656341671943665,grad_norm: 0.6649520167029025, iteration: 460969
loss: 0.972998857498169,grad_norm: 0.8241040424579614, iteration: 460970
loss: 1.023775339126587,grad_norm: 0.8118666949325829, iteration: 460971
loss: 0.9777137637138367,grad_norm: 0.8191879616797506, iteration: 460972
loss: 1.024880051612854,grad_norm: 0.8588419960576913, iteration: 460973
loss: 1.0140200853347778,grad_norm: 0.7761188577721898, iteration: 460974
loss: 0.9673028588294983,grad_norm: 0.7638698291500821, iteration: 460975
loss: 1.0753791332244873,grad_norm: 0.9999992815593745, iteration: 460976
loss: 1.0059173107147217,grad_norm: 0.7657746092584055, iteration: 460977
loss: 0.9965213537216187,grad_norm: 0.7029633653182955, iteration: 460978
loss: 1.0035300254821777,grad_norm: 0.8353706862818951, iteration: 460979
loss: 0.9967027902603149,grad_norm: 0.7785439606571894, iteration: 460980
loss: 0.991718590259552,grad_norm: 0.6452336150798521, iteration: 460981
loss: 1.0285967588424683,grad_norm: 0.8136568432725012, iteration: 460982
loss: 1.0225728750228882,grad_norm: 0.8145033481944998, iteration: 460983
loss: 1.0107483863830566,grad_norm: 0.703057463652027, iteration: 460984
loss: 1.02243173122406,grad_norm: 0.9086777854296538, iteration: 460985
loss: 1.0282236337661743,grad_norm: 0.862229856008628, iteration: 460986
loss: 1.1083203554153442,grad_norm: 0.8088549142394378, iteration: 460987
loss: 0.9948300123214722,grad_norm: 0.7941235913250981, iteration: 460988
loss: 1.0040335655212402,grad_norm: 0.7001900593078034, iteration: 460989
loss: 1.036152720451355,grad_norm: 0.7521680331857645, iteration: 460990
loss: 1.068964958190918,grad_norm: 0.999999645178143, iteration: 460991
loss: 1.0985126495361328,grad_norm: 0.912140423577892, iteration: 460992
loss: 1.0456280708312988,grad_norm: 0.9999999577404575, iteration: 460993
loss: 1.0056079626083374,grad_norm: 0.9999994873982256, iteration: 460994
loss: 1.000233769416809,grad_norm: 0.9999990964431559, iteration: 460995
loss: 0.9890652298927307,grad_norm: 0.8043204779552869, iteration: 460996
loss: 1.013912558555603,grad_norm: 0.6566424299321015, iteration: 460997
loss: 1.1154147386550903,grad_norm: 0.755398756934253, iteration: 460998
loss: 0.9893606305122375,grad_norm: 0.9999999048310887, iteration: 460999
loss: 0.9693503975868225,grad_norm: 0.8293758108908719, iteration: 461000
loss: 0.9924763441085815,grad_norm: 0.8111556876398551, iteration: 461001
loss: 0.9672315120697021,grad_norm: 0.781841156553665, iteration: 461002
loss: 0.9869664907455444,grad_norm: 0.6895043618735122, iteration: 461003
loss: 0.9717805981636047,grad_norm: 0.8230963983046783, iteration: 461004
loss: 0.9681881070137024,grad_norm: 0.9126776600670227, iteration: 461005
loss: 1.017369270324707,grad_norm: 0.6564833472897583, iteration: 461006
loss: 1.0020215511322021,grad_norm: 0.817599283517646, iteration: 461007
loss: 1.0002702474594116,grad_norm: 0.7713811249052132, iteration: 461008
loss: 0.9909363389015198,grad_norm: 0.6300346330319188, iteration: 461009
loss: 0.961295485496521,grad_norm: 0.7918782577198409, iteration: 461010
loss: 0.9764267802238464,grad_norm: 0.7668506986333318, iteration: 461011
loss: 0.9576091766357422,grad_norm: 0.7795720293281678, iteration: 461012
loss: 0.9950610399246216,grad_norm: 0.8412179088494641, iteration: 461013
loss: 1.029089331626892,grad_norm: 0.6895625003821779, iteration: 461014
loss: 0.973848283290863,grad_norm: 0.8110533792090239, iteration: 461015
loss: 0.9939106702804565,grad_norm: 0.8854874800895031, iteration: 461016
loss: 1.0278403759002686,grad_norm: 0.798770327289089, iteration: 461017
loss: 0.9805476665496826,grad_norm: 0.7178420085201563, iteration: 461018
loss: 0.9587405323982239,grad_norm: 0.7427369937848248, iteration: 461019
loss: 1.025718331336975,grad_norm: 0.9999999729346175, iteration: 461020
loss: 1.019366979598999,grad_norm: 0.7959377268548489, iteration: 461021
loss: 0.9657015204429626,grad_norm: 0.8964308968429859, iteration: 461022
loss: 1.1000896692276,grad_norm: 0.7896466246511056, iteration: 461023
loss: 0.9842817187309265,grad_norm: 0.8101242040057534, iteration: 461024
loss: 1.0338432788848877,grad_norm: 0.6695588673537533, iteration: 461025
loss: 0.9968369603157043,grad_norm: 0.7360753238202026, iteration: 461026
loss: 0.97630774974823,grad_norm: 0.6415248106342055, iteration: 461027
loss: 0.9911950826644897,grad_norm: 0.8877094315072745, iteration: 461028
loss: 0.9846848845481873,grad_norm: 0.6236048827020637, iteration: 461029
loss: 0.9968978762626648,grad_norm: 0.7000268839963607, iteration: 461030
loss: 1.0256212949752808,grad_norm: 0.7831165449113672, iteration: 461031
loss: 1.0105180740356445,grad_norm: 0.7999147921720293, iteration: 461032
loss: 0.9538370370864868,grad_norm: 0.734605241602252, iteration: 461033
loss: 1.0014729499816895,grad_norm: 0.646073009706491, iteration: 461034
loss: 0.9929079413414001,grad_norm: 0.9398225427835942, iteration: 461035
loss: 0.9872145056724548,grad_norm: 0.8094738074931276, iteration: 461036
loss: 0.982276201248169,grad_norm: 0.9999990070415445, iteration: 461037
loss: 0.9867991209030151,grad_norm: 0.7470694004230838, iteration: 461038
loss: 1.009424090385437,grad_norm: 0.6978924846608594, iteration: 461039
loss: 1.0481706857681274,grad_norm: 0.8885896697180065, iteration: 461040
loss: 0.9714294672012329,grad_norm: 0.7519199414755376, iteration: 461041
loss: 0.9858722686767578,grad_norm: 0.829758868976459, iteration: 461042
loss: 0.9628915190696716,grad_norm: 0.7463136365098367, iteration: 461043
loss: 1.019579291343689,grad_norm: 0.8263393965921256, iteration: 461044
loss: 0.9618188738822937,grad_norm: 0.6368549179338896, iteration: 461045
loss: 1.0104788541793823,grad_norm: 0.8254842239891171, iteration: 461046
loss: 1.0722886323928833,grad_norm: 0.9999999216476466, iteration: 461047
loss: 1.0098742246627808,grad_norm: 0.7049522684897459, iteration: 461048
loss: 0.9787776470184326,grad_norm: 0.7168391328702747, iteration: 461049
loss: 1.0174437761306763,grad_norm: 0.82612417428232, iteration: 461050
loss: 1.020554780960083,grad_norm: 0.843981499046941, iteration: 461051
loss: 1.0011401176452637,grad_norm: 0.8901076785211421, iteration: 461052
loss: 0.9833176732063293,grad_norm: 0.6426399731292752, iteration: 461053
loss: 1.0247293710708618,grad_norm: 0.7506372587132808, iteration: 461054
loss: 0.9621784687042236,grad_norm: 0.7735804402185381, iteration: 461055
loss: 1.0003418922424316,grad_norm: 0.714063561536611, iteration: 461056
loss: 0.9779990315437317,grad_norm: 0.6340260609660361, iteration: 461057
loss: 0.9895154237747192,grad_norm: 0.8070521777886851, iteration: 461058
loss: 1.0017284154891968,grad_norm: 0.6752401479995769, iteration: 461059
loss: 0.9666826128959656,grad_norm: 0.787293839236103, iteration: 461060
loss: 1.007901906967163,grad_norm: 0.8741807962752218, iteration: 461061
loss: 0.9972987174987793,grad_norm: 0.7912186913683728, iteration: 461062
loss: 1.0400035381317139,grad_norm: 0.8370893984212247, iteration: 461063
loss: 1.035014271736145,grad_norm: 0.6583974421706705, iteration: 461064
loss: 0.9807304739952087,grad_norm: 0.7090698920771592, iteration: 461065
loss: 0.9952177405357361,grad_norm: 0.9379383799319199, iteration: 461066
loss: 1.0139648914337158,grad_norm: 0.7055613742791044, iteration: 461067
loss: 1.0042253732681274,grad_norm: 0.8080707902016695, iteration: 461068
loss: 1.021284580230713,grad_norm: 0.8286717803199115, iteration: 461069
loss: 0.9793702960014343,grad_norm: 0.8229794928612281, iteration: 461070
loss: 1.0004115104675293,grad_norm: 0.6628941114618436, iteration: 461071
loss: 0.9983536005020142,grad_norm: 0.745339549869679, iteration: 461072
loss: 1.026333212852478,grad_norm: 0.8137936583520345, iteration: 461073
loss: 1.0060423612594604,grad_norm: 0.8100341852028762, iteration: 461074
loss: 0.9814529418945312,grad_norm: 0.6972485538349933, iteration: 461075
loss: 1.0042840242385864,grad_norm: 0.7549240155462081, iteration: 461076
loss: 1.0249837636947632,grad_norm: 0.7705908100755549, iteration: 461077
loss: 0.9787242412567139,grad_norm: 0.6452411779381021, iteration: 461078
loss: 0.9934064149856567,grad_norm: 0.6280266185314993, iteration: 461079
loss: 0.9946173429489136,grad_norm: 0.7300188693035176, iteration: 461080
loss: 1.021131157875061,grad_norm: 0.7145840103788663, iteration: 461081
loss: 1.0129444599151611,grad_norm: 0.7759598297754664, iteration: 461082
loss: 0.9739312529563904,grad_norm: 0.8853808773965461, iteration: 461083
loss: 1.1184210777282715,grad_norm: 0.9999991319465465, iteration: 461084
loss: 0.9956204891204834,grad_norm: 0.9999988861272133, iteration: 461085
loss: 1.0969346761703491,grad_norm: 0.99999912489354, iteration: 461086
loss: 0.975881814956665,grad_norm: 0.9999991186976542, iteration: 461087
loss: 1.0068029165267944,grad_norm: 0.6833440920566121, iteration: 461088
loss: 0.9751139283180237,grad_norm: 0.745464912646867, iteration: 461089
loss: 0.979081928730011,grad_norm: 0.706129436049541, iteration: 461090
loss: 1.0137748718261719,grad_norm: 0.7350694417065791, iteration: 461091
loss: 1.0098137855529785,grad_norm: 0.7237446295712849, iteration: 461092
loss: 1.0065075159072876,grad_norm: 0.709519561842162, iteration: 461093
loss: 1.0123345851898193,grad_norm: 0.9999990783511894, iteration: 461094
loss: 0.9944472908973694,grad_norm: 0.7571627443339213, iteration: 461095
loss: 1.0139755010604858,grad_norm: 0.8180891073660037, iteration: 461096
loss: 0.9741044044494629,grad_norm: 0.7293947012600476, iteration: 461097
loss: 1.0305871963500977,grad_norm: 0.779305592012606, iteration: 461098
loss: 0.9781480431556702,grad_norm: 0.7473642402275132, iteration: 461099
loss: 1.0015976428985596,grad_norm: 0.6378500193386688, iteration: 461100
loss: 1.0189342498779297,grad_norm: 0.6347069593049749, iteration: 461101
loss: 0.9812899827957153,grad_norm: 0.7125761412406311, iteration: 461102
loss: 1.0789138078689575,grad_norm: 0.8307938069953158, iteration: 461103
loss: 0.9645360708236694,grad_norm: 0.7282774361859304, iteration: 461104
loss: 0.9981247782707214,grad_norm: 0.8595759220807098, iteration: 461105
loss: 0.9804885387420654,grad_norm: 0.7466152396327627, iteration: 461106
loss: 1.030541181564331,grad_norm: 0.9307591713540602, iteration: 461107
loss: 0.9835941791534424,grad_norm: 0.6404230910235548, iteration: 461108
loss: 1.007063865661621,grad_norm: 0.6746026329345582, iteration: 461109
loss: 1.0046671628952026,grad_norm: 0.7551648997751678, iteration: 461110
loss: 0.9840214848518372,grad_norm: 0.9999999482561667, iteration: 461111
loss: 1.0890053510665894,grad_norm: 0.9999998747187765, iteration: 461112
loss: 0.9667849540710449,grad_norm: 0.9640681253446195, iteration: 461113
loss: 0.9881601333618164,grad_norm: 0.9999991516535807, iteration: 461114
loss: 0.9924383163452148,grad_norm: 0.7161417362496972, iteration: 461115
loss: 0.9986104965209961,grad_norm: 0.7874594698084969, iteration: 461116
loss: 0.9891095161437988,grad_norm: 0.6613561082390577, iteration: 461117
loss: 1.0139497518539429,grad_norm: 0.8152281172857072, iteration: 461118
loss: 1.0144526958465576,grad_norm: 0.7662945445480305, iteration: 461119
loss: 0.9997088313102722,grad_norm: 0.9210352831189194, iteration: 461120
loss: 1.0045015811920166,grad_norm: 0.6795325704178663, iteration: 461121
loss: 1.0130938291549683,grad_norm: 0.7399387463515624, iteration: 461122
loss: 0.9688757658004761,grad_norm: 0.7690242415196635, iteration: 461123
loss: 1.0160162448883057,grad_norm: 0.6827443303037204, iteration: 461124
loss: 1.0110533237457275,grad_norm: 0.7317892142465416, iteration: 461125
loss: 0.9826064109802246,grad_norm: 0.8601683557125062, iteration: 461126
loss: 0.9610736966133118,grad_norm: 0.7134713127827186, iteration: 461127
loss: 0.9540754556655884,grad_norm: 0.8021886345692205, iteration: 461128
loss: 1.0045883655548096,grad_norm: 0.788354605098936, iteration: 461129
loss: 1.0079624652862549,grad_norm: 0.844633941800692, iteration: 461130
loss: 1.0037200450897217,grad_norm: 0.8320980271367412, iteration: 461131
loss: 1.0103578567504883,grad_norm: 0.9999993625347463, iteration: 461132
loss: 0.9638311862945557,grad_norm: 0.7421354532585738, iteration: 461133
loss: 0.9937797784805298,grad_norm: 0.7045812813071723, iteration: 461134
loss: 1.0368016958236694,grad_norm: 0.6420654653750562, iteration: 461135
loss: 1.0051445960998535,grad_norm: 0.9362793752115589, iteration: 461136
loss: 1.1838561296463013,grad_norm: 0.9999991773552919, iteration: 461137
loss: 1.0061179399490356,grad_norm: 0.892612592970793, iteration: 461138
loss: 0.9960172772407532,grad_norm: 0.8246269893850465, iteration: 461139
loss: 1.0645442008972168,grad_norm: 0.9999995922818529, iteration: 461140
loss: 0.9848484396934509,grad_norm: 0.7041316786645778, iteration: 461141
loss: 1.0064964294433594,grad_norm: 0.8433631917080384, iteration: 461142
loss: 1.0282899141311646,grad_norm: 0.7632572564967324, iteration: 461143
loss: 0.9931190609931946,grad_norm: 0.8631945697557388, iteration: 461144
loss: 0.9959529042243958,grad_norm: 0.9518661022219276, iteration: 461145
loss: 1.0017344951629639,grad_norm: 0.6945964595239402, iteration: 461146
loss: 1.016357421875,grad_norm: 0.7040333734778534, iteration: 461147
loss: 0.9737582802772522,grad_norm: 0.7361276361123297, iteration: 461148
loss: 0.9929594397544861,grad_norm: 0.6777760354765485, iteration: 461149
loss: 1.0158964395523071,grad_norm: 0.7936724958695027, iteration: 461150
loss: 0.9469438791275024,grad_norm: 0.7267232993540946, iteration: 461151
loss: 0.9776400327682495,grad_norm: 0.9999991937517899, iteration: 461152
loss: 0.9781147837638855,grad_norm: 0.7406124217492145, iteration: 461153
loss: 0.976403534412384,grad_norm: 0.7299438566653077, iteration: 461154
loss: 0.9615527987480164,grad_norm: 0.7153182075812353, iteration: 461155
loss: 1.0011591911315918,grad_norm: 0.7355177230901296, iteration: 461156
loss: 0.9852416515350342,grad_norm: 0.7069259308974147, iteration: 461157
loss: 1.036232829093933,grad_norm: 0.9999995139119744, iteration: 461158
loss: 1.0025110244750977,grad_norm: 0.8696950826686083, iteration: 461159
loss: 0.9770815372467041,grad_norm: 0.7631658185201547, iteration: 461160
loss: 0.9689362049102783,grad_norm: 0.9365959463776905, iteration: 461161
loss: 0.9628474712371826,grad_norm: 0.7478335320476636, iteration: 461162
loss: 0.9852039217948914,grad_norm: 0.6294503639253158, iteration: 461163
loss: 1.011803388595581,grad_norm: 0.6520110843117334, iteration: 461164
loss: 0.9749008417129517,grad_norm: 0.616425249642835, iteration: 461165
loss: 1.0290848016738892,grad_norm: 0.7302159772201974, iteration: 461166
loss: 1.0424925088882446,grad_norm: 0.7506021418226901, iteration: 461167
loss: 0.9943271279335022,grad_norm: 0.8504766610173016, iteration: 461168
loss: 1.0216548442840576,grad_norm: 0.7396473854691131, iteration: 461169
loss: 1.053676724433899,grad_norm: 0.7647479674639274, iteration: 461170
loss: 1.0405967235565186,grad_norm: 0.7824954443330253, iteration: 461171
loss: 0.9438232779502869,grad_norm: 0.9088981949462027, iteration: 461172
loss: 0.9865193367004395,grad_norm: 0.6673138053029655, iteration: 461173
loss: 0.9989151954650879,grad_norm: 0.7736869863867082, iteration: 461174
loss: 0.9775429964065552,grad_norm: 0.6664781465079618, iteration: 461175
loss: 1.0383543968200684,grad_norm: 0.8564046736095835, iteration: 461176
loss: 1.0179814100265503,grad_norm: 0.9999990195237066, iteration: 461177
loss: 1.0108869075775146,grad_norm: 0.7115461882100098, iteration: 461178
loss: 0.9816654920578003,grad_norm: 0.7601389607980266, iteration: 461179
loss: 0.9933688640594482,grad_norm: 0.7028465495243795, iteration: 461180
loss: 0.9892399907112122,grad_norm: 0.8040976813042847, iteration: 461181
loss: 1.0028891563415527,grad_norm: 0.7444706841187528, iteration: 461182
loss: 0.9801335334777832,grad_norm: 0.7088322614911626, iteration: 461183
loss: 0.9852215051651001,grad_norm: 0.9916750526443913, iteration: 461184
loss: 0.9937792420387268,grad_norm: 0.7491806773546356, iteration: 461185
loss: 1.0158683061599731,grad_norm: 0.7591671950899411, iteration: 461186
loss: 0.9554357528686523,grad_norm: 0.9924525269153852, iteration: 461187
loss: 1.0651001930236816,grad_norm: 0.9999995110269703, iteration: 461188
loss: 0.9743559956550598,grad_norm: 0.7008234280617863, iteration: 461189
loss: 0.9908263087272644,grad_norm: 0.7881727272240054, iteration: 461190
loss: 1.0081721544265747,grad_norm: 0.8655938927486224, iteration: 461191
loss: 0.9982670545578003,grad_norm: 0.7193424053672325, iteration: 461192
loss: 1.017676830291748,grad_norm: 0.7925665809926392, iteration: 461193
loss: 0.985198974609375,grad_norm: 0.9100446644777821, iteration: 461194
loss: 0.9981921911239624,grad_norm: 0.6690328939925203, iteration: 461195
loss: 1.0202386379241943,grad_norm: 0.7171525222334355, iteration: 461196
loss: 0.9747005105018616,grad_norm: 0.7423023731170889, iteration: 461197
loss: 1.005750060081482,grad_norm: 0.8083873749469833, iteration: 461198
loss: 1.0094424486160278,grad_norm: 0.7838958191458252, iteration: 461199
loss: 1.012799620628357,grad_norm: 0.821277861009016, iteration: 461200
loss: 1.0044960975646973,grad_norm: 0.8286426066599665, iteration: 461201
loss: 1.0031397342681885,grad_norm: 0.6804358012884902, iteration: 461202
loss: 0.9977400898933411,grad_norm: 0.7339180972931387, iteration: 461203
loss: 1.0258896350860596,grad_norm: 0.999999077057673, iteration: 461204
loss: 0.9889358878135681,grad_norm: 0.790892725965864, iteration: 461205
loss: 1.0304027795791626,grad_norm: 0.6850828142737531, iteration: 461206
loss: 0.9976611137390137,grad_norm: 0.9461652362694001, iteration: 461207
loss: 1.0098919868469238,grad_norm: 0.938741401497052, iteration: 461208
loss: 0.9938148260116577,grad_norm: 0.7082289419608881, iteration: 461209
loss: 1.0037896633148193,grad_norm: 0.7974686752009346, iteration: 461210
loss: 1.0053373575210571,grad_norm: 0.7781729261452307, iteration: 461211
loss: 1.014987826347351,grad_norm: 0.6945050895438771, iteration: 461212
loss: 0.9946066737174988,grad_norm: 0.8517927976314007, iteration: 461213
loss: 1.0265730619430542,grad_norm: 0.9310797469590002, iteration: 461214
loss: 0.9770260453224182,grad_norm: 0.9999991352009728, iteration: 461215
loss: 1.0005773305892944,grad_norm: 0.8048916347181682, iteration: 461216
loss: 1.0167871713638306,grad_norm: 0.8371758910295415, iteration: 461217
loss: 0.9617310166358948,grad_norm: 0.6978813448127213, iteration: 461218
loss: 1.000415563583374,grad_norm: 0.8415789852086354, iteration: 461219
loss: 0.9996857643127441,grad_norm: 0.999999066896919, iteration: 461220
loss: 0.9568719267845154,grad_norm: 0.7698621087421642, iteration: 461221
loss: 1.0075277090072632,grad_norm: 0.6461933764688641, iteration: 461222
loss: 0.9951825141906738,grad_norm: 0.7830706856855579, iteration: 461223
loss: 0.9796172976493835,grad_norm: 0.9999998976386965, iteration: 461224
loss: 1.0225841999053955,grad_norm: 0.8677904547855794, iteration: 461225
loss: 1.0676140785217285,grad_norm: 0.9999992128071583, iteration: 461226
loss: 1.0208382606506348,grad_norm: 0.999999100108012, iteration: 461227
loss: 1.020188570022583,grad_norm: 0.9301591116407769, iteration: 461228
loss: 0.9590651392936707,grad_norm: 0.7797264681233345, iteration: 461229
loss: 1.1099783182144165,grad_norm: 0.9999995035930394, iteration: 461230
loss: 1.031568169593811,grad_norm: 0.8125010511392196, iteration: 461231
loss: 1.0124881267547607,grad_norm: 0.6066104061956926, iteration: 461232
loss: 0.9980517029762268,grad_norm: 0.8746272365347018, iteration: 461233
loss: 1.0370168685913086,grad_norm: 0.9999997901615026, iteration: 461234
loss: 1.0095998048782349,grad_norm: 0.7817448802789989, iteration: 461235
loss: 0.9988406896591187,grad_norm: 0.6338824324770286, iteration: 461236
loss: 0.9644483327865601,grad_norm: 0.8075808327922879, iteration: 461237
loss: 1.0112701654434204,grad_norm: 0.6877682092496632, iteration: 461238
loss: 1.0614014863967896,grad_norm: 0.658688011489715, iteration: 461239
loss: 0.9910266399383545,grad_norm: 0.7756676471380717, iteration: 461240
loss: 0.9804701209068298,grad_norm: 0.7712635721238502, iteration: 461241
loss: 1.0254714488983154,grad_norm: 0.8067629862415712, iteration: 461242
loss: 0.9929093718528748,grad_norm: 0.8629914570995961, iteration: 461243
loss: 0.9699783325195312,grad_norm: 0.9999991649691051, iteration: 461244
loss: 1.0041372776031494,grad_norm: 0.709037777332928, iteration: 461245
loss: 0.9625080227851868,grad_norm: 0.8916069345899292, iteration: 461246
loss: 1.0180004835128784,grad_norm: 0.6360732311600489, iteration: 461247
loss: 0.9741522669792175,grad_norm: 0.8079062988146084, iteration: 461248
loss: 1.0238010883331299,grad_norm: 0.9350555010272492, iteration: 461249
loss: 1.0140169858932495,grad_norm: 0.7625436445784494, iteration: 461250
loss: 0.974514901638031,grad_norm: 0.7655729485053409, iteration: 461251
loss: 0.9739868640899658,grad_norm: 0.715954210783102, iteration: 461252
loss: 0.9995107054710388,grad_norm: 0.6896752295144715, iteration: 461253
loss: 1.0011831521987915,grad_norm: 0.6839844514482855, iteration: 461254
loss: 1.0358152389526367,grad_norm: 0.7961026984424007, iteration: 461255
loss: 1.0134639739990234,grad_norm: 0.7980058364130436, iteration: 461256
loss: 1.0226079225540161,grad_norm: 0.6408765334174226, iteration: 461257
loss: 1.0137461423873901,grad_norm: 0.9226750404012231, iteration: 461258
loss: 1.031630516052246,grad_norm: 0.7660274396567281, iteration: 461259
loss: 0.9694036245346069,grad_norm: 0.8250176984302874, iteration: 461260
loss: 0.9916964173316956,grad_norm: 0.810206045764143, iteration: 461261
loss: 1.0366625785827637,grad_norm: 0.8570558161645155, iteration: 461262
loss: 1.0011850595474243,grad_norm: 0.8125341057183222, iteration: 461263
loss: 1.0241074562072754,grad_norm: 0.7306887068195836, iteration: 461264
loss: 1.0079838037490845,grad_norm: 0.7217160746715799, iteration: 461265
loss: 0.9837706089019775,grad_norm: 0.7808618357114869, iteration: 461266
loss: 0.962865948677063,grad_norm: 0.7020077872638592, iteration: 461267
loss: 1.008528232574463,grad_norm: 0.6314498981274238, iteration: 461268
loss: 0.994525671005249,grad_norm: 0.7111405922197129, iteration: 461269
loss: 0.9705814123153687,grad_norm: 0.8917803180877605, iteration: 461270
loss: 0.9911547899246216,grad_norm: 0.9999992828536926, iteration: 461271
loss: 1.0026190280914307,grad_norm: 0.9999992366436118, iteration: 461272
loss: 1.0227729082107544,grad_norm: 0.7861376859678091, iteration: 461273
loss: 1.0024404525756836,grad_norm: 0.7018095628314732, iteration: 461274
loss: 1.0568883419036865,grad_norm: 0.9366566289022329, iteration: 461275
loss: 1.0169233083724976,grad_norm: 0.6911056767055537, iteration: 461276
loss: 0.9834075570106506,grad_norm: 0.9578758923950407, iteration: 461277
loss: 1.0606385469436646,grad_norm: 0.752940560472973, iteration: 461278
loss: 1.0018278360366821,grad_norm: 0.7664571257898588, iteration: 461279
loss: 0.9937246441841125,grad_norm: 0.6534387993101437, iteration: 461280
loss: 0.9566634893417358,grad_norm: 0.8553015041831656, iteration: 461281
loss: 1.012391209602356,grad_norm: 0.8228543594805181, iteration: 461282
loss: 0.9698308706283569,grad_norm: 0.7459946457134645, iteration: 461283
loss: 0.9976395964622498,grad_norm: 0.8606239869792162, iteration: 461284
loss: 1.0012027025222778,grad_norm: 0.6611993589071282, iteration: 461285
loss: 1.0307905673980713,grad_norm: 0.6462896903845592, iteration: 461286
loss: 1.0097646713256836,grad_norm: 0.6834646489822767, iteration: 461287
loss: 1.014910340309143,grad_norm: 0.7525205109490793, iteration: 461288
loss: 0.952334463596344,grad_norm: 0.7142349332687875, iteration: 461289
loss: 0.999344527721405,grad_norm: 0.6607866426741407, iteration: 461290
loss: 1.0013115406036377,grad_norm: 0.844387497963303, iteration: 461291
loss: 1.0037199258804321,grad_norm: 0.7465295024059421, iteration: 461292
loss: 1.0388191938400269,grad_norm: 0.943217375834159, iteration: 461293
loss: 0.9628267884254456,grad_norm: 0.984123327743957, iteration: 461294
loss: 0.9845540523529053,grad_norm: 0.6914535683196995, iteration: 461295
loss: 1.023219108581543,grad_norm: 0.8729037026703588, iteration: 461296
loss: 1.0190316438674927,grad_norm: 0.8630355339740363, iteration: 461297
loss: 1.0074748992919922,grad_norm: 0.8549372265966098, iteration: 461298
loss: 1.0476467609405518,grad_norm: 0.8569167300472101, iteration: 461299
loss: 0.9604776501655579,grad_norm: 0.6960378102669335, iteration: 461300
loss: 0.9799914360046387,grad_norm: 0.8087599983582731, iteration: 461301
loss: 1.0152844190597534,grad_norm: 0.823709795775662, iteration: 461302
loss: 0.9942460060119629,grad_norm: 0.9999992757252633, iteration: 461303
loss: 1.0024418830871582,grad_norm: 0.798685922745577, iteration: 461304
loss: 0.9768586754798889,grad_norm: 0.9380455778199508, iteration: 461305
loss: 0.9848249554634094,grad_norm: 0.7883372246991093, iteration: 461306
loss: 0.9644436836242676,grad_norm: 0.6998375647732352, iteration: 461307
loss: 1.0169111490249634,grad_norm: 0.6382963023407809, iteration: 461308
loss: 0.9957478046417236,grad_norm: 0.6895040916717081, iteration: 461309
loss: 0.9662792682647705,grad_norm: 0.9661478204661728, iteration: 461310
loss: 1.00265634059906,grad_norm: 0.6318098707883144, iteration: 461311
loss: 1.0106768608093262,grad_norm: 0.8576820887624313, iteration: 461312
loss: 0.9923580288887024,grad_norm: 0.711411665967396, iteration: 461313
loss: 1.0371370315551758,grad_norm: 0.7640564671965202, iteration: 461314
loss: 1.0016958713531494,grad_norm: 0.8177448692154112, iteration: 461315
loss: 0.9905598163604736,grad_norm: 0.7546313184480847, iteration: 461316
loss: 1.0209076404571533,grad_norm: 0.8669093055761419, iteration: 461317
loss: 1.0110104084014893,grad_norm: 0.8295656261102875, iteration: 461318
loss: 1.1027404069900513,grad_norm: 0.9525806325160003, iteration: 461319
loss: 0.9961370825767517,grad_norm: 0.7041841319773072, iteration: 461320
loss: 0.9769206643104553,grad_norm: 0.9999992020031653, iteration: 461321
loss: 0.9778077006340027,grad_norm: 0.7717526716613183, iteration: 461322
loss: 1.0091370344161987,grad_norm: 0.7528338305702799, iteration: 461323
loss: 1.0047929286956787,grad_norm: 0.6767126145474882, iteration: 461324
loss: 1.0737618207931519,grad_norm: 0.9999997426372134, iteration: 461325
loss: 1.0193891525268555,grad_norm: 0.6722586950011744, iteration: 461326
loss: 1.007821798324585,grad_norm: 0.669897061960598, iteration: 461327
loss: 1.0004302263259888,grad_norm: 0.7268858901706072, iteration: 461328
loss: 1.0024123191833496,grad_norm: 0.9579322310283749, iteration: 461329
loss: 0.9782302975654602,grad_norm: 0.7990041686021105, iteration: 461330
loss: 1.0632705688476562,grad_norm: 0.7988770166291217, iteration: 461331
loss: 0.9641306400299072,grad_norm: 0.688524847590334, iteration: 461332
loss: 0.9747897386550903,grad_norm: 0.7812136824446383, iteration: 461333
loss: 0.9832654595375061,grad_norm: 0.8402878810077787, iteration: 461334
loss: 0.982840359210968,grad_norm: 0.7353958040767122, iteration: 461335
loss: 0.9883447289466858,grad_norm: 0.7687617911152513, iteration: 461336
loss: 1.0762900114059448,grad_norm: 0.9999995461931012, iteration: 461337
loss: 0.9845097064971924,grad_norm: 0.764361866325121, iteration: 461338
loss: 0.9773152470588684,grad_norm: 0.8127203353874356, iteration: 461339
loss: 0.9782153367996216,grad_norm: 0.7722844872144471, iteration: 461340
loss: 0.9868115782737732,grad_norm: 0.7765049837705016, iteration: 461341
loss: 1.015102744102478,grad_norm: 0.618462216407836, iteration: 461342
loss: 1.0206325054168701,grad_norm: 0.9999998760282883, iteration: 461343
loss: 1.0135551691055298,grad_norm: 0.7991062432233044, iteration: 461344
loss: 0.971679151058197,grad_norm: 0.9999990621172283, iteration: 461345
loss: 1.1013169288635254,grad_norm: 0.7884795407726263, iteration: 461346
loss: 1.0135951042175293,grad_norm: 0.6931204502770139, iteration: 461347
loss: 1.0553892850875854,grad_norm: 0.7792205819882461, iteration: 461348
loss: 1.0191564559936523,grad_norm: 0.7764823417970996, iteration: 461349
loss: 0.9841205477714539,grad_norm: 0.7271507041549958, iteration: 461350
loss: 0.9823116064071655,grad_norm: 0.8181354356432273, iteration: 461351
loss: 0.9979246258735657,grad_norm: 0.7959775926311272, iteration: 461352
loss: 0.9717614650726318,grad_norm: 0.8124023596240993, iteration: 461353
loss: 0.9919468760490417,grad_norm: 0.6998201259427405, iteration: 461354
loss: 1.0710755586624146,grad_norm: 0.9999997304111579, iteration: 461355
loss: 1.017593264579773,grad_norm: 0.7267249977165838, iteration: 461356
loss: 0.9773774147033691,grad_norm: 0.6561189866665671, iteration: 461357
loss: 0.9946023225784302,grad_norm: 0.8208931476089499, iteration: 461358
loss: 0.9933139085769653,grad_norm: 0.9999992974231935, iteration: 461359
loss: 0.9623514413833618,grad_norm: 0.6702426399577569, iteration: 461360
loss: 1.0268710851669312,grad_norm: 0.8780288550350667, iteration: 461361
loss: 1.0180407762527466,grad_norm: 0.6518353762604204, iteration: 461362
loss: 1.0169693231582642,grad_norm: 0.8296142248684963, iteration: 461363
loss: 1.0567861795425415,grad_norm: 0.9999999089811826, iteration: 461364
loss: 1.012168526649475,grad_norm: 0.8678571161055557, iteration: 461365
loss: 0.9927899837493896,grad_norm: 0.681381564566251, iteration: 461366
loss: 0.9403688311576843,grad_norm: 0.9224021224209298, iteration: 461367
loss: 0.9598674774169922,grad_norm: 0.8413794637008475, iteration: 461368
loss: 1.0120282173156738,grad_norm: 0.8424429067842156, iteration: 461369
loss: 1.1120250225067139,grad_norm: 0.9999991293227412, iteration: 461370
loss: 0.9780924320220947,grad_norm: 0.8993853915958998, iteration: 461371
loss: 0.9944710731506348,grad_norm: 0.7908685431474306, iteration: 461372
loss: 1.0077598094940186,grad_norm: 0.9391517628408624, iteration: 461373
loss: 0.9681957960128784,grad_norm: 0.9999989802315494, iteration: 461374
loss: 0.986176073551178,grad_norm: 0.6855843495654759, iteration: 461375
loss: 0.9649632573127747,grad_norm: 0.8080571861355632, iteration: 461376
loss: 0.9819191098213196,grad_norm: 0.9999991578107359, iteration: 461377
loss: 1.0052294731140137,grad_norm: 0.6283738974489661, iteration: 461378
loss: 0.9866083264350891,grad_norm: 0.7825411982672242, iteration: 461379
loss: 0.9428883194923401,grad_norm: 0.887584927389662, iteration: 461380
loss: 0.9988155364990234,grad_norm: 0.7798165588389565, iteration: 461381
loss: 0.9832792282104492,grad_norm: 0.7975833267015016, iteration: 461382
loss: 1.0152989625930786,grad_norm: 0.6499934222414598, iteration: 461383
loss: 1.0517734289169312,grad_norm: 0.8063434952648909, iteration: 461384
loss: 0.9594845771789551,grad_norm: 0.6987612471672382, iteration: 461385
loss: 0.9927380681037903,grad_norm: 0.7772569750947571, iteration: 461386
loss: 0.9702205061912537,grad_norm: 0.7515660721209774, iteration: 461387
loss: 0.9816912412643433,grad_norm: 0.999999094036227, iteration: 461388
loss: 1.0302599668502808,grad_norm: 0.7338070261671428, iteration: 461389
loss: 1.0144433975219727,grad_norm: 0.725833810258971, iteration: 461390
loss: 1.109514832496643,grad_norm: 0.7780051186728585, iteration: 461391
loss: 0.9949309825897217,grad_norm: 0.8480710991930781, iteration: 461392
loss: 1.020979881286621,grad_norm: 0.694685570381523, iteration: 461393
loss: 0.9859108328819275,grad_norm: 0.8220361877975283, iteration: 461394
loss: 1.0497945547103882,grad_norm: 0.9999990815162123, iteration: 461395
loss: 0.9671549797058105,grad_norm: 0.6877545544478822, iteration: 461396
loss: 1.0761693716049194,grad_norm: 0.8024464840955833, iteration: 461397
loss: 0.9756409525871277,grad_norm: 0.7119057486188345, iteration: 461398
loss: 1.0981800556182861,grad_norm: 0.9999998633289161, iteration: 461399
loss: 0.9959605932235718,grad_norm: 0.6590492419062369, iteration: 461400
loss: 0.9819990992546082,grad_norm: 0.7707793360259837, iteration: 461401
loss: 0.9805352687835693,grad_norm: 0.6752237454447749, iteration: 461402
loss: 0.9675928354263306,grad_norm: 0.7617809843640014, iteration: 461403
loss: 1.0244264602661133,grad_norm: 0.8594949789438695, iteration: 461404
loss: 1.1310489177703857,grad_norm: 0.8815428988291546, iteration: 461405
loss: 1.033669352531433,grad_norm: 0.8386646495151616, iteration: 461406
loss: 0.9647230505943298,grad_norm: 0.8424933817396504, iteration: 461407
loss: 1.079441785812378,grad_norm: 0.8183543682965837, iteration: 461408
loss: 1.0125651359558105,grad_norm: 0.7352918082000934, iteration: 461409
loss: 0.9836452603340149,grad_norm: 0.849809982042145, iteration: 461410
loss: 1.1137617826461792,grad_norm: 0.9999998151168946, iteration: 461411
loss: 1.1144920587539673,grad_norm: 0.999999348624763, iteration: 461412
loss: 1.0244030952453613,grad_norm: 0.8248727527484808, iteration: 461413
loss: 1.1045010089874268,grad_norm: 0.7316335075100943, iteration: 461414
loss: 1.0166937112808228,grad_norm: 0.9358926444539866, iteration: 461415
loss: 1.023618459701538,grad_norm: 0.8677445317919461, iteration: 461416
loss: 1.0356009006500244,grad_norm: 0.7585534286866611, iteration: 461417
loss: 1.0124950408935547,grad_norm: 0.9999991856597837, iteration: 461418
loss: 1.098218321800232,grad_norm: 0.9999997447603866, iteration: 461419
loss: 1.0557752847671509,grad_norm: 0.9999992922892663, iteration: 461420
loss: 1.0070654153823853,grad_norm: 0.9999992371289407, iteration: 461421
loss: 1.0783494710922241,grad_norm: 0.9999998663344101, iteration: 461422
loss: 1.0402203798294067,grad_norm: 0.9999996029863537, iteration: 461423
loss: 1.0182427167892456,grad_norm: 0.7714327589580722, iteration: 461424
loss: 1.4163354635238647,grad_norm: 0.9999996473584619, iteration: 461425
loss: 1.0226603746414185,grad_norm: 0.9999997734775903, iteration: 461426
loss: 0.9866530895233154,grad_norm: 0.6582932810876432, iteration: 461427
loss: 1.0085721015930176,grad_norm: 0.8484171594122462, iteration: 461428
loss: 1.0198280811309814,grad_norm: 0.6998241029847938, iteration: 461429
loss: 1.1175645589828491,grad_norm: 0.999999923092183, iteration: 461430
loss: 1.0424047708511353,grad_norm: 0.9999999146820729, iteration: 461431
loss: 1.1306127309799194,grad_norm: 0.6725866969515523, iteration: 461432
loss: 1.15746009349823,grad_norm: 0.9999994206493968, iteration: 461433
loss: 1.0063365697860718,grad_norm: 0.7025999828291639, iteration: 461434
loss: 1.0869016647338867,grad_norm: 0.9999994233227631, iteration: 461435
loss: 1.057848334312439,grad_norm: 0.7765639474168331, iteration: 461436
loss: 1.3602906465530396,grad_norm: 0.9999999484461459, iteration: 461437
loss: 1.0157626867294312,grad_norm: 0.8343483690610229, iteration: 461438
loss: 1.1295220851898193,grad_norm: 0.9999992887034396, iteration: 461439
loss: 1.1140056848526,grad_norm: 0.9999990540496926, iteration: 461440
loss: 1.278765320777893,grad_norm: 0.9999998793893583, iteration: 461441
loss: 1.1767158508300781,grad_norm: 0.9999995827824157, iteration: 461442
loss: 1.1013705730438232,grad_norm: 0.9999994130615609, iteration: 461443
loss: 1.0106091499328613,grad_norm: 0.9999992885088751, iteration: 461444
loss: 1.0425158739089966,grad_norm: 0.9999990199738615, iteration: 461445
loss: 1.3335434198379517,grad_norm: 0.9999997870269614, iteration: 461446
loss: 1.0157129764556885,grad_norm: 0.9825621982068958, iteration: 461447
loss: 1.0868401527404785,grad_norm: 0.9979367068588874, iteration: 461448
loss: 1.0700680017471313,grad_norm: 0.9999994952500046, iteration: 461449
loss: 1.2450417280197144,grad_norm: 0.9999993302831973, iteration: 461450
loss: 1.115832805633545,grad_norm: 0.9999999173884897, iteration: 461451
loss: 1.1080801486968994,grad_norm: 0.9999995815735875, iteration: 461452
loss: 1.0176230669021606,grad_norm: 0.9999994374172945, iteration: 461453
loss: 1.2339471578598022,grad_norm: 0.999999586490435, iteration: 461454
loss: 1.0761297941207886,grad_norm: 0.9548548709665202, iteration: 461455
loss: 1.0452699661254883,grad_norm: 0.9999999230998001, iteration: 461456
loss: 1.1818686723709106,grad_norm: 0.9999991549837651, iteration: 461457
loss: 1.2217366695404053,grad_norm: 0.9999995398170854, iteration: 461458
loss: 1.094525933265686,grad_norm: 0.9999994398378408, iteration: 461459
loss: 0.9922838807106018,grad_norm: 0.9999992917323319, iteration: 461460
loss: 0.9838547110557556,grad_norm: 0.999999046665656, iteration: 461461
loss: 1.0633869171142578,grad_norm: 0.9999990190511573, iteration: 461462
loss: 1.2043571472167969,grad_norm: 0.9999994696124407, iteration: 461463
loss: 1.0707634687423706,grad_norm: 0.99999938249616, iteration: 461464
loss: 1.1775859594345093,grad_norm: 0.9999999251628874, iteration: 461465
loss: 1.3004382848739624,grad_norm: 0.9999993387396255, iteration: 461466
loss: 0.9800813794136047,grad_norm: 0.6387283207644602, iteration: 461467
loss: 1.1380634307861328,grad_norm: 0.9999999655728176, iteration: 461468
loss: 1.2264251708984375,grad_norm: 0.999999600072735, iteration: 461469
loss: 1.2001500129699707,grad_norm: 0.9999997239640759, iteration: 461470
loss: 1.1395114660263062,grad_norm: 0.7493715585722172, iteration: 461471
loss: 1.2298319339752197,grad_norm: 0.999999499276449, iteration: 461472
loss: 0.9827986359596252,grad_norm: 0.9999999224889836, iteration: 461473
loss: 1.2324761152267456,grad_norm: 0.9999996315773483, iteration: 461474
loss: 1.0565803050994873,grad_norm: 0.9999992174794351, iteration: 461475
loss: 1.09228515625,grad_norm: 0.9999996114287671, iteration: 461476
loss: 1.0570979118347168,grad_norm: 0.9999991419616364, iteration: 461477
loss: 0.9930225610733032,grad_norm: 0.8375469622621702, iteration: 461478
loss: 1.099652647972107,grad_norm: 0.9999999291936811, iteration: 461479
loss: 1.0011956691741943,grad_norm: 0.9999992571929502, iteration: 461480
loss: 1.0731406211853027,grad_norm: 0.9999994232097091, iteration: 461481
loss: 1.1850653886795044,grad_norm: 0.9999996931634534, iteration: 461482
loss: 1.0620981454849243,grad_norm: 0.9999995469168773, iteration: 461483
loss: 1.013385534286499,grad_norm: 0.6625981161033576, iteration: 461484
loss: 1.0383750200271606,grad_norm: 0.9999990352765269, iteration: 461485
loss: 1.1597508192062378,grad_norm: 0.9999996566081865, iteration: 461486
loss: 1.0132765769958496,grad_norm: 0.8415617833471815, iteration: 461487
loss: 1.0292690992355347,grad_norm: 0.8895976345337716, iteration: 461488
loss: 1.0472317934036255,grad_norm: 0.9999992641321602, iteration: 461489
loss: 1.285365343093872,grad_norm: 0.9999994379683358, iteration: 461490
loss: 1.0862212181091309,grad_norm: 0.9999997993184632, iteration: 461491
loss: 1.1414908170700073,grad_norm: 0.999999931024905, iteration: 461492
loss: 1.148614764213562,grad_norm: 0.9999996431941671, iteration: 461493
loss: 1.0935007333755493,grad_norm: 0.9999995117780763, iteration: 461494
loss: 1.1471173763275146,grad_norm: 0.9999997681474181, iteration: 461495
loss: 1.0719921588897705,grad_norm: 0.999999984064709, iteration: 461496
loss: 1.239591360092163,grad_norm: 0.9999999359384132, iteration: 461497
loss: 1.0557807683944702,grad_norm: 0.8672154617485336, iteration: 461498
loss: 1.1361898183822632,grad_norm: 0.9999995936449504, iteration: 461499
loss: 1.292972445487976,grad_norm: 0.9999998240934953, iteration: 461500
loss: 1.0939985513687134,grad_norm: 0.999999884824954, iteration: 461501
loss: 1.0946811437606812,grad_norm: 0.9999994052688542, iteration: 461502
loss: 1.3626810312271118,grad_norm: 0.9999999702631919, iteration: 461503
loss: 1.2559704780578613,grad_norm: 0.9999998118904365, iteration: 461504
loss: 1.2266247272491455,grad_norm: 0.9999998613581397, iteration: 461505
loss: 1.0349794626235962,grad_norm: 0.9999995247297675, iteration: 461506
loss: 1.0793100595474243,grad_norm: 0.999999750709958, iteration: 461507
loss: 1.184802532196045,grad_norm: 0.9999997849231842, iteration: 461508
loss: 1.1043225526809692,grad_norm: 0.9999997314427077, iteration: 461509
loss: 1.2980259656906128,grad_norm: 0.9999992028060852, iteration: 461510
loss: 1.230906367301941,grad_norm: 0.9999996889883365, iteration: 461511
loss: 1.2251677513122559,grad_norm: 0.9999999208593695, iteration: 461512
loss: 1.2262568473815918,grad_norm: 0.9999994332424584, iteration: 461513
loss: 1.1580955982208252,grad_norm: 0.9999994736466328, iteration: 461514
loss: 1.019038438796997,grad_norm: 0.9999993137211765, iteration: 461515
loss: 1.042218804359436,grad_norm: 0.9999997801583492, iteration: 461516
loss: 1.1935136318206787,grad_norm: 0.9999991647237991, iteration: 461517
loss: 1.107467532157898,grad_norm: 0.9999999533172552, iteration: 461518
loss: 1.1361325979232788,grad_norm: 0.9999997720856375, iteration: 461519
loss: 1.3427056074142456,grad_norm: 0.999999773114567, iteration: 461520
loss: 1.0280680656433105,grad_norm: 0.8456339878244742, iteration: 461521
loss: 1.1921690702438354,grad_norm: 0.9999997364067882, iteration: 461522
loss: 1.121616005897522,grad_norm: 0.9999991920188801, iteration: 461523
loss: 1.0071568489074707,grad_norm: 0.8842802492091412, iteration: 461524
loss: 1.0074687004089355,grad_norm: 0.8031698368020795, iteration: 461525
loss: 0.9890106916427612,grad_norm: 0.8517705909528003, iteration: 461526
loss: 1.091099739074707,grad_norm: 0.9999996335034116, iteration: 461527
loss: 1.2484439611434937,grad_norm: 0.9999993969376015, iteration: 461528
loss: 1.0009946823120117,grad_norm: 0.8678134360514576, iteration: 461529
loss: 1.0616565942764282,grad_norm: 0.9999991221744948, iteration: 461530
loss: 1.089601993560791,grad_norm: 0.852740732438865, iteration: 461531
loss: 1.2365212440490723,grad_norm: 0.9999994346204899, iteration: 461532
loss: 1.0394586324691772,grad_norm: 0.9188390860542616, iteration: 461533
loss: 1.1675561666488647,grad_norm: 0.9999998289893738, iteration: 461534
loss: 1.0214645862579346,grad_norm: 0.6520927417259741, iteration: 461535
loss: 1.0213942527770996,grad_norm: 0.999999364598868, iteration: 461536
loss: 1.1246925592422485,grad_norm: 0.9999999296321943, iteration: 461537
loss: 1.0638453960418701,grad_norm: 0.9999994554110427, iteration: 461538
loss: 1.1519923210144043,grad_norm: 0.999999881414386, iteration: 461539
loss: 1.1117322444915771,grad_norm: 0.9999991121304003, iteration: 461540
loss: 1.1265382766723633,grad_norm: 0.9999995834980768, iteration: 461541
loss: 1.1499154567718506,grad_norm: 0.999999375891159, iteration: 461542
loss: 1.0055729150772095,grad_norm: 0.7350989887097434, iteration: 461543
loss: 1.0160378217697144,grad_norm: 0.8244885781575092, iteration: 461544
loss: 0.9803756475448608,grad_norm: 0.7763504617549611, iteration: 461545
loss: 1.0011935234069824,grad_norm: 0.9999997313812204, iteration: 461546
loss: 1.0858726501464844,grad_norm: 0.9424846740036837, iteration: 461547
loss: 0.9895320534706116,grad_norm: 0.9999990732338128, iteration: 461548
loss: 0.9765180945396423,grad_norm: 0.8870654464304104, iteration: 461549
loss: 1.167092204093933,grad_norm: 0.9999998269866273, iteration: 461550
loss: 1.027035117149353,grad_norm: 0.9999992356229499, iteration: 461551
loss: 0.9944530725479126,grad_norm: 0.9999996903832931, iteration: 461552
loss: 1.000512719154358,grad_norm: 0.8264095976819241, iteration: 461553
loss: 1.0229518413543701,grad_norm: 0.8323427013267541, iteration: 461554
loss: 1.028310775756836,grad_norm: 0.9999990788476599, iteration: 461555
loss: 1.077100396156311,grad_norm: 0.9999996747738064, iteration: 461556
loss: 1.083424687385559,grad_norm: 0.9999994905608057, iteration: 461557
loss: 1.0139172077178955,grad_norm: 0.7883821311176865, iteration: 461558
loss: 1.0140657424926758,grad_norm: 0.7802747239831864, iteration: 461559
loss: 1.1708135604858398,grad_norm: 0.9999992552760327, iteration: 461560
loss: 1.023372769355774,grad_norm: 0.9999998535503453, iteration: 461561
loss: 1.0381933450698853,grad_norm: 0.9999990697879337, iteration: 461562
loss: 1.1564769744873047,grad_norm: 0.9999998876374269, iteration: 461563
loss: 0.9518693089485168,grad_norm: 0.7958611141661636, iteration: 461564
loss: 1.0979828834533691,grad_norm: 0.91488769122419, iteration: 461565
loss: 1.0240806341171265,grad_norm: 0.63558197789469, iteration: 461566
loss: 1.000407338142395,grad_norm: 0.9999994792023282, iteration: 461567
loss: 1.0184296369552612,grad_norm: 0.7773398598399796, iteration: 461568
loss: 1.031798005104065,grad_norm: 0.9999991774802617, iteration: 461569
loss: 1.3002792596817017,grad_norm: 0.9999998637373423, iteration: 461570
loss: 0.9785245656967163,grad_norm: 0.7581883887867529, iteration: 461571
loss: 1.0225194692611694,grad_norm: 0.9999991955058702, iteration: 461572
loss: 1.000344157218933,grad_norm: 0.8079589270099727, iteration: 461573
loss: 1.0133036375045776,grad_norm: 0.7374386024771078, iteration: 461574
loss: 0.9623081684112549,grad_norm: 0.7801284249330955, iteration: 461575
loss: 0.9717626571655273,grad_norm: 0.9520951143423396, iteration: 461576
loss: 1.0632742643356323,grad_norm: 0.9999994960587695, iteration: 461577
loss: 0.9872071743011475,grad_norm: 0.6883919389863508, iteration: 461578
loss: 1.0235804319381714,grad_norm: 0.6898916811125874, iteration: 461579
loss: 0.992677628993988,grad_norm: 0.9999998445964596, iteration: 461580
loss: 0.985505223274231,grad_norm: 0.964122025188768, iteration: 461581
loss: 0.9516054391860962,grad_norm: 0.9999991333885173, iteration: 461582
loss: 0.9564335942268372,grad_norm: 0.7612954543916978, iteration: 461583
loss: 0.9527431130409241,grad_norm: 0.7403365692468763, iteration: 461584
loss: 0.9984896779060364,grad_norm: 0.789435552823267, iteration: 461585
loss: 1.0329036712646484,grad_norm: 0.9999991104813081, iteration: 461586
loss: 1.053650975227356,grad_norm: 0.9999992583190846, iteration: 461587
loss: 0.9889109134674072,grad_norm: 0.7934008615963728, iteration: 461588
loss: 0.9836142659187317,grad_norm: 0.8436543492735029, iteration: 461589
loss: 0.9898760914802551,grad_norm: 0.7770387686517662, iteration: 461590
loss: 0.9942101240158081,grad_norm: 0.7832165970652608, iteration: 461591
loss: 1.0160895586013794,grad_norm: 0.7368043140855901, iteration: 461592
loss: 1.0530427694320679,grad_norm: 0.7780197271887942, iteration: 461593
loss: 1.0498509407043457,grad_norm: 0.9999990636760631, iteration: 461594
loss: 1.1585952043533325,grad_norm: 0.9999999252836934, iteration: 461595
loss: 0.9718029499053955,grad_norm: 0.8827764342208637, iteration: 461596
loss: 1.005935788154602,grad_norm: 0.7771755181450154, iteration: 461597
loss: 0.9277822375297546,grad_norm: 0.7455267472144047, iteration: 461598
loss: 1.030483365058899,grad_norm: 0.9109269471716552, iteration: 461599
loss: 1.0516356229782104,grad_norm: 0.9547786202434378, iteration: 461600
loss: 1.1191703081130981,grad_norm: 0.9999996577359006, iteration: 461601
loss: 0.9923873543739319,grad_norm: 0.7740873438707987, iteration: 461602
loss: 1.0146172046661377,grad_norm: 0.9999998897699549, iteration: 461603
loss: 1.1049604415893555,grad_norm: 0.9999993733523563, iteration: 461604
loss: 0.9825708866119385,grad_norm: 0.8773836281417927, iteration: 461605
loss: 1.0007518529891968,grad_norm: 0.9999991766863947, iteration: 461606
loss: 1.041988730430603,grad_norm: 0.9999993380694397, iteration: 461607
loss: 0.9716148972511292,grad_norm: 0.9999992215624252, iteration: 461608
loss: 1.0105345249176025,grad_norm: 0.8752827651795814, iteration: 461609
loss: 0.9791097640991211,grad_norm: 0.7232233256558738, iteration: 461610
loss: 1.04041588306427,grad_norm: 0.9999997917018361, iteration: 461611
loss: 1.052446961402893,grad_norm: 0.9999992572450653, iteration: 461612
loss: 0.964232861995697,grad_norm: 0.7473003286357938, iteration: 461613
loss: 1.0656991004943848,grad_norm: 0.9999990383133889, iteration: 461614
loss: 1.124638319015503,grad_norm: 0.9999998404028067, iteration: 461615
loss: 0.9537217020988464,grad_norm: 0.6888479162981737, iteration: 461616
loss: 0.9979938864707947,grad_norm: 0.5920676719385574, iteration: 461617
loss: 1.0516510009765625,grad_norm: 0.9452197083136092, iteration: 461618
loss: 1.0059257745742798,grad_norm: 0.8743984285347286, iteration: 461619
loss: 1.185395359992981,grad_norm: 0.9999990918359912, iteration: 461620
loss: 1.0715765953063965,grad_norm: 0.8956123028204037, iteration: 461621
loss: 1.052139401435852,grad_norm: 0.9999998010917663, iteration: 461622
loss: 0.9873695373535156,grad_norm: 0.8890906620764355, iteration: 461623
loss: 1.057984709739685,grad_norm: 0.9544085324594789, iteration: 461624
loss: 1.055564522743225,grad_norm: 0.8471725500187234, iteration: 461625
loss: 1.0290075540542603,grad_norm: 0.8549065877818246, iteration: 461626
loss: 0.9627370834350586,grad_norm: 0.6577569780473196, iteration: 461627
loss: 1.0216213464736938,grad_norm: 0.7628191745018328, iteration: 461628
loss: 1.0097033977508545,grad_norm: 0.8454476386497443, iteration: 461629
loss: 1.0720232725143433,grad_norm: 0.9999995362182931, iteration: 461630
loss: 1.0007362365722656,grad_norm: 0.8078747705799268, iteration: 461631
loss: 0.9701860547065735,grad_norm: 0.758049257733243, iteration: 461632
loss: 1.1303958892822266,grad_norm: 0.9419890543858791, iteration: 461633
loss: 0.9464555978775024,grad_norm: 0.7791984542770634, iteration: 461634
loss: 0.9990550875663757,grad_norm: 0.8001543816331058, iteration: 461635
loss: 1.0033103227615356,grad_norm: 0.9999991142795516, iteration: 461636
loss: 1.013419270515442,grad_norm: 0.7586266047861077, iteration: 461637
loss: 1.0196483135223389,grad_norm: 0.9999995482911553, iteration: 461638
loss: 1.0081764459609985,grad_norm: 0.9999996817305945, iteration: 461639
loss: 0.9465261101722717,grad_norm: 0.6428462153457255, iteration: 461640
loss: 0.9820823073387146,grad_norm: 0.8506001357007078, iteration: 461641
loss: 1.0205671787261963,grad_norm: 0.9948344280058776, iteration: 461642
loss: 1.0018740892410278,grad_norm: 0.6573513412391068, iteration: 461643
loss: 1.0064553022384644,grad_norm: 0.7267553719846919, iteration: 461644
loss: 1.0514987707138062,grad_norm: 0.9999991963191575, iteration: 461645
loss: 0.9985202550888062,grad_norm: 0.7769496391436189, iteration: 461646
loss: 1.0556799173355103,grad_norm: 0.7969285441713025, iteration: 461647
loss: 0.9966749548912048,grad_norm: 0.9040378155948606, iteration: 461648
loss: 0.9508423805236816,grad_norm: 0.6231948409759921, iteration: 461649
loss: 1.1502666473388672,grad_norm: 0.9999995429834558, iteration: 461650
loss: 0.9877774715423584,grad_norm: 0.7338973103941294, iteration: 461651
loss: 1.0316392183303833,grad_norm: 0.9999991257610531, iteration: 461652
loss: 1.0252223014831543,grad_norm: 0.7947951602363511, iteration: 461653
loss: 0.9648512601852417,grad_norm: 0.7842697763374749, iteration: 461654
loss: 0.977624773979187,grad_norm: 0.8751371149696185, iteration: 461655
loss: 1.070330023765564,grad_norm: 0.9999990469544228, iteration: 461656
loss: 0.9833981394767761,grad_norm: 0.7455641631009159, iteration: 461657
loss: 1.0633183717727661,grad_norm: 0.9279174278716951, iteration: 461658
loss: 1.0037133693695068,grad_norm: 0.9999996714788558, iteration: 461659
loss: 0.9905315637588501,grad_norm: 0.8820511702462247, iteration: 461660
loss: 1.0880862474441528,grad_norm: 0.9999990177550384, iteration: 461661
loss: 1.008497953414917,grad_norm: 0.7355588833740246, iteration: 461662
loss: 1.0115864276885986,grad_norm: 0.783262720185234, iteration: 461663
loss: 0.9874204993247986,grad_norm: 0.797165822707095, iteration: 461664
loss: 1.0471951961517334,grad_norm: 0.9999993488428508, iteration: 461665
loss: 1.1165673732757568,grad_norm: 0.904281828983437, iteration: 461666
loss: 0.9725133776664734,grad_norm: 0.676261189054942, iteration: 461667
loss: 0.9884123206138611,grad_norm: 0.7377076773489086, iteration: 461668
loss: 0.9881157279014587,grad_norm: 0.7127466855202856, iteration: 461669
loss: 0.9922630786895752,grad_norm: 0.8838115279539801, iteration: 461670
loss: 0.9790588021278381,grad_norm: 0.9903689042221687, iteration: 461671
loss: 0.9849920868873596,grad_norm: 0.6320327216256649, iteration: 461672
loss: 1.0208897590637207,grad_norm: 0.9999996142708338, iteration: 461673
loss: 0.9792565703392029,grad_norm: 0.8163258977226772, iteration: 461674
loss: 1.0056848526000977,grad_norm: 0.6452880980442831, iteration: 461675
loss: 1.0008471012115479,grad_norm: 0.7639567144307869, iteration: 461676
loss: 1.0200707912445068,grad_norm: 0.7235512181640537, iteration: 461677
loss: 0.9902676343917847,grad_norm: 0.9999991254784584, iteration: 461678
loss: 0.980210542678833,grad_norm: 0.7600640598444534, iteration: 461679
loss: 1.0226197242736816,grad_norm: 0.8251285278645165, iteration: 461680
loss: 1.0547540187835693,grad_norm: 0.8619926115846585, iteration: 461681
loss: 1.0530478954315186,grad_norm: 0.7292208447448967, iteration: 461682
loss: 0.9977990984916687,grad_norm: 0.8351164718235, iteration: 461683
loss: 0.9962968826293945,grad_norm: 0.9999991957248519, iteration: 461684
loss: 0.9841063022613525,grad_norm: 0.7396959818696371, iteration: 461685
loss: 1.0587035417556763,grad_norm: 0.8043299391811765, iteration: 461686
loss: 1.0029479265213013,grad_norm: 0.8157018071266747, iteration: 461687
loss: 0.9676074385643005,grad_norm: 0.7402701728111858, iteration: 461688
loss: 1.0593578815460205,grad_norm: 0.8490280117599855, iteration: 461689
loss: 1.0109268426895142,grad_norm: 0.6539337531241987, iteration: 461690
loss: 1.1731358766555786,grad_norm: 0.9999998767460034, iteration: 461691
loss: 1.051634430885315,grad_norm: 0.7929616204922538, iteration: 461692
loss: 1.0018153190612793,grad_norm: 0.9524410693674059, iteration: 461693
loss: 0.9642020463943481,grad_norm: 0.8476394070049801, iteration: 461694
loss: 1.016680359840393,grad_norm: 0.999999297353439, iteration: 461695
loss: 1.011964201927185,grad_norm: 0.6041693603559387, iteration: 461696
loss: 1.1041380167007446,grad_norm: 0.9999999219584749, iteration: 461697
loss: 1.0087708234786987,grad_norm: 0.8322803446693039, iteration: 461698
loss: 0.9483097195625305,grad_norm: 0.6192004678969877, iteration: 461699
loss: 1.013296365737915,grad_norm: 0.8731861551783552, iteration: 461700
loss: 1.0286756753921509,grad_norm: 0.88034399972938, iteration: 461701
loss: 0.9844237565994263,grad_norm: 0.7272066098574567, iteration: 461702
loss: 1.0615723133087158,grad_norm: 0.9999996787944202, iteration: 461703
loss: 1.002785563468933,grad_norm: 0.9860602856724372, iteration: 461704
loss: 0.9807587265968323,grad_norm: 0.9204146239287578, iteration: 461705
loss: 0.9928746223449707,grad_norm: 0.7391355293645304, iteration: 461706
loss: 1.042547583580017,grad_norm: 0.8755239155836877, iteration: 461707
loss: 1.0089123249053955,grad_norm: 0.7053098791126405, iteration: 461708
loss: 1.0003836154937744,grad_norm: 0.7042803803016947, iteration: 461709
loss: 0.9932706952095032,grad_norm: 0.6856128359908232, iteration: 461710
loss: 1.0531940460205078,grad_norm: 0.8194890498895482, iteration: 461711
loss: 1.0659584999084473,grad_norm: 0.9999997398307532, iteration: 461712
loss: 0.9808893203735352,grad_norm: 0.9313083764856472, iteration: 461713
loss: 0.99293452501297,grad_norm: 0.9999997358201114, iteration: 461714
loss: 1.0529979467391968,grad_norm: 0.9999993250746553, iteration: 461715
loss: 0.9997465014457703,grad_norm: 0.7917322905859863, iteration: 461716
loss: 0.9943997263908386,grad_norm: 0.8857817308467144, iteration: 461717
loss: 0.9758957028388977,grad_norm: 0.7314879675082471, iteration: 461718
loss: 1.0863564014434814,grad_norm: 0.8608152048491732, iteration: 461719
loss: 0.9866545796394348,grad_norm: 0.8976373586126167, iteration: 461720
loss: 0.9739883542060852,grad_norm: 0.7356051450463607, iteration: 461721
loss: 1.0092960596084595,grad_norm: 0.6765683501214734, iteration: 461722
loss: 1.0394606590270996,grad_norm: 0.9999998906608948, iteration: 461723
loss: 1.063057780265808,grad_norm: 0.9551405749385039, iteration: 461724
loss: 1.0067813396453857,grad_norm: 0.8322967281302645, iteration: 461725
loss: 1.0100936889648438,grad_norm: 0.7966411601112006, iteration: 461726
loss: 0.9939135909080505,grad_norm: 0.6391155079890929, iteration: 461727
loss: 0.9923007488250732,grad_norm: 0.7908213101928044, iteration: 461728
loss: 1.043953537940979,grad_norm: 0.9451396100209938, iteration: 461729
loss: 1.0533629655838013,grad_norm: 0.7835495164787247, iteration: 461730
loss: 0.9996547102928162,grad_norm: 0.6799910412723111, iteration: 461731
loss: 0.9841629862785339,grad_norm: 0.7460584835381198, iteration: 461732
loss: 0.9499728679656982,grad_norm: 0.8607215651738238, iteration: 461733
loss: 1.0666738748550415,grad_norm: 0.9999994628671245, iteration: 461734
loss: 1.0024185180664062,grad_norm: 0.6551592301614396, iteration: 461735
loss: 1.0413763523101807,grad_norm: 0.683441387945974, iteration: 461736
loss: 1.0969990491867065,grad_norm: 0.9999992751948827, iteration: 461737
loss: 1.0174485445022583,grad_norm: 0.6548733670554797, iteration: 461738
loss: 0.9651033282279968,grad_norm: 0.759819409920545, iteration: 461739
loss: 0.9737942814826965,grad_norm: 0.8414730603992242, iteration: 461740
loss: 1.0108985900878906,grad_norm: 0.7849486696069415, iteration: 461741
loss: 1.0149396657943726,grad_norm: 0.6405209043241934, iteration: 461742
loss: 0.9786161780357361,grad_norm: 0.8061653058666707, iteration: 461743
loss: 0.9955028891563416,grad_norm: 0.7823528602302832, iteration: 461744
loss: 0.9527875185012817,grad_norm: 0.825221505522992, iteration: 461745
loss: 1.0316072702407837,grad_norm: 0.8425124488594534, iteration: 461746
loss: 1.014267086982727,grad_norm: 0.6893008450829555, iteration: 461747
loss: 1.0310348272323608,grad_norm: 0.7833668251900744, iteration: 461748
loss: 0.98096764087677,grad_norm: 1.000000068430035, iteration: 461749
loss: 0.983452320098877,grad_norm: 0.918569351457124, iteration: 461750
loss: 1.0061691999435425,grad_norm: 0.9172604984799343, iteration: 461751
loss: 1.0286272764205933,grad_norm: 0.7789927622977725, iteration: 461752
loss: 0.9966577887535095,grad_norm: 0.685985924591177, iteration: 461753
loss: 1.0292134284973145,grad_norm: 0.9999996247883959, iteration: 461754
loss: 1.1082134246826172,grad_norm: 0.9999993991390949, iteration: 461755
loss: 0.9960678815841675,grad_norm: 0.9567363117290433, iteration: 461756
loss: 1.0427272319793701,grad_norm: 0.8946401360879708, iteration: 461757
loss: 0.9932984709739685,grad_norm: 0.7827852533533722, iteration: 461758
loss: 1.022689938545227,grad_norm: 0.954901022357269, iteration: 461759
loss: 1.0955288410186768,grad_norm: 0.8163510021639484, iteration: 461760
loss: 1.0642588138580322,grad_norm: 0.99999952895264, iteration: 461761
loss: 0.9923284649848938,grad_norm: 0.7879821640161183, iteration: 461762
loss: 0.9581736326217651,grad_norm: 0.8269697303487167, iteration: 461763
loss: 1.1835182905197144,grad_norm: 0.9999995080191321, iteration: 461764
loss: 1.0096098184585571,grad_norm: 0.7895868355189846, iteration: 461765
loss: 0.9894474148750305,grad_norm: 0.7887200267025413, iteration: 461766
loss: 0.9818950891494751,grad_norm: 0.823168548411015, iteration: 461767
loss: 1.0536044836044312,grad_norm: 0.999999013582116, iteration: 461768
loss: 1.0117390155792236,grad_norm: 0.9999993970385059, iteration: 461769
loss: 1.0418758392333984,grad_norm: 0.9999999285968452, iteration: 461770
loss: 0.9819328784942627,grad_norm: 0.9999991922123808, iteration: 461771
loss: 1.0549640655517578,grad_norm: 0.9999996478480697, iteration: 461772
loss: 0.9791202545166016,grad_norm: 0.7737907049113945, iteration: 461773
loss: 1.0241739749908447,grad_norm: 0.9999990560415775, iteration: 461774
loss: 0.9937906265258789,grad_norm: 0.8778598176702744, iteration: 461775
loss: 1.0385631322860718,grad_norm: 0.99999995125967, iteration: 461776
loss: 0.9808199405670166,grad_norm: 0.7117022563361635, iteration: 461777
loss: 1.0212193727493286,grad_norm: 0.6973208974040314, iteration: 461778
loss: 1.0105780363082886,grad_norm: 0.7768655425366816, iteration: 461779
loss: 1.0061644315719604,grad_norm: 0.8532324258001145, iteration: 461780
loss: 0.9706893563270569,grad_norm: 0.9061711836957659, iteration: 461781
loss: 1.1569571495056152,grad_norm: 0.9999999668163952, iteration: 461782
loss: 1.0045676231384277,grad_norm: 0.9999993984394053, iteration: 461783
loss: 0.9898872375488281,grad_norm: 0.7564510770429808, iteration: 461784
loss: 0.979893684387207,grad_norm: 0.7980610549735694, iteration: 461785
loss: 0.969196081161499,grad_norm: 0.8078035265166673, iteration: 461786
loss: 0.9902474880218506,grad_norm: 0.7487704663350746, iteration: 461787
loss: 0.9787700176239014,grad_norm: 0.666988333808239, iteration: 461788
loss: 0.9822267889976501,grad_norm: 0.7316529571009628, iteration: 461789
loss: 1.0401333570480347,grad_norm: 0.8494441428139317, iteration: 461790
loss: 0.9663383960723877,grad_norm: 0.6357618546036414, iteration: 461791
loss: 0.997800886631012,grad_norm: 0.999999617194377, iteration: 461792
loss: 0.9582452774047852,grad_norm: 0.7572089556651733, iteration: 461793
loss: 0.9632892608642578,grad_norm: 0.7841632510423013, iteration: 461794
loss: 0.992500364780426,grad_norm: 0.6417672462480756, iteration: 461795
loss: 0.9825756549835205,grad_norm: 0.8028203511710872, iteration: 461796
loss: 1.0291845798492432,grad_norm: 0.9999998107207955, iteration: 461797
loss: 1.0802559852600098,grad_norm: 0.7574488960323009, iteration: 461798
loss: 0.9737349152565002,grad_norm: 0.7526348447847776, iteration: 461799
loss: 0.9846555590629578,grad_norm: 0.7753724527049747, iteration: 461800
loss: 1.0612084865570068,grad_norm: 0.9285940125927455, iteration: 461801
loss: 0.9691839814186096,grad_norm: 0.7542300395204884, iteration: 461802
loss: 1.0005868673324585,grad_norm: 0.8217867688380784, iteration: 461803
loss: 0.9723493456840515,grad_norm: 0.7531388075887225, iteration: 461804
loss: 1.0276856422424316,grad_norm: 0.78626477213061, iteration: 461805
loss: 1.0042996406555176,grad_norm: 0.8234705486448788, iteration: 461806
loss: 1.0209074020385742,grad_norm: 0.8682428666212677, iteration: 461807
loss: 1.0617103576660156,grad_norm: 1.0000000283843622, iteration: 461808
loss: 1.0128871202468872,grad_norm: 0.8262316071568724, iteration: 461809
loss: 0.9927306175231934,grad_norm: 0.7181750583647952, iteration: 461810
loss: 0.9635228514671326,grad_norm: 0.9999991381947869, iteration: 461811
loss: 0.9766244292259216,grad_norm: 0.7436652514975057, iteration: 461812
loss: 1.051850438117981,grad_norm: 0.9292298505016348, iteration: 461813
loss: 1.3813714981079102,grad_norm: 0.9999997505734365, iteration: 461814
loss: 1.014588713645935,grad_norm: 0.9999997347315754, iteration: 461815
loss: 1.1254029273986816,grad_norm: 0.9999993831968927, iteration: 461816
loss: 0.9976668953895569,grad_norm: 0.7294466517655096, iteration: 461817
loss: 1.0234293937683105,grad_norm: 0.738754450915745, iteration: 461818
loss: 0.9852174520492554,grad_norm: 0.748746140122394, iteration: 461819
loss: 0.9823885560035706,grad_norm: 0.7343157257337015, iteration: 461820
loss: 0.9865630269050598,grad_norm: 0.6676055277963852, iteration: 461821
loss: 1.0021916627883911,grad_norm: 0.9999996562785199, iteration: 461822
loss: 1.0345171689987183,grad_norm: 0.9902563027117398, iteration: 461823
loss: 1.0268349647521973,grad_norm: 0.7077415820073701, iteration: 461824
loss: 1.0205888748168945,grad_norm: 0.8746368007006968, iteration: 461825
loss: 0.9752635359764099,grad_norm: 0.836971875082549, iteration: 461826
loss: 0.9682943224906921,grad_norm: 0.6617682836645605, iteration: 461827
loss: 1.0144928693771362,grad_norm: 0.8793040203607194, iteration: 461828
loss: 1.0097781419754028,grad_norm: 0.9999993502441585, iteration: 461829
loss: 1.0058162212371826,grad_norm: 0.7180387396557993, iteration: 461830
loss: 1.0171654224395752,grad_norm: 0.7483959326776463, iteration: 461831
loss: 0.9766219854354858,grad_norm: 0.8372159575547315, iteration: 461832
loss: 1.0539730787277222,grad_norm: 0.7096179911257009, iteration: 461833
loss: 0.9892984628677368,grad_norm: 0.7075405175729595, iteration: 461834
loss: 1.0044831037521362,grad_norm: 0.7613616402695234, iteration: 461835
loss: 0.9966478943824768,grad_norm: 0.9114338683076169, iteration: 461836
loss: 0.9638805389404297,grad_norm: 0.7172849348654687, iteration: 461837
loss: 1.0319631099700928,grad_norm: 0.9533129486346749, iteration: 461838
loss: 0.9953270554542542,grad_norm: 0.7466574331094533, iteration: 461839
loss: 1.0058321952819824,grad_norm: 0.9999995966528855, iteration: 461840
loss: 1.0022870302200317,grad_norm: 0.8911600527580323, iteration: 461841
loss: 1.0055806636810303,grad_norm: 0.9999992497403404, iteration: 461842
loss: 1.0570411682128906,grad_norm: 0.99999958160807, iteration: 461843
loss: 1.0408437252044678,grad_norm: 0.9999998656335879, iteration: 461844
loss: 1.0266262292861938,grad_norm: 0.6891013776426655, iteration: 461845
loss: 0.9858100414276123,grad_norm: 0.6515633915570745, iteration: 461846
loss: 0.9769385457038879,grad_norm: 0.9566486251340732, iteration: 461847
loss: 1.033023715019226,grad_norm: 0.9668526370693364, iteration: 461848
loss: 1.0056424140930176,grad_norm: 0.6472430826213881, iteration: 461849
loss: 1.0179214477539062,grad_norm: 0.7286720706035652, iteration: 461850
loss: 0.9993977546691895,grad_norm: 0.8231888231723401, iteration: 461851
loss: 1.01600182056427,grad_norm: 0.9999992555048866, iteration: 461852
loss: 0.9857315421104431,grad_norm: 0.9999991357633521, iteration: 461853
loss: 0.9726776480674744,grad_norm: 0.7895170386975401, iteration: 461854
loss: 1.1037204265594482,grad_norm: 0.9999992220844869, iteration: 461855
loss: 1.1560059785842896,grad_norm: 0.999999769565447, iteration: 461856
loss: 1.0478260517120361,grad_norm: 0.9999991890047375, iteration: 461857
loss: 0.9713379144668579,grad_norm: 0.8084582469120593, iteration: 461858
loss: 0.9859241843223572,grad_norm: 0.8049145618195396, iteration: 461859
loss: 1.0694202184677124,grad_norm: 0.9856470047795267, iteration: 461860
loss: 0.9876993894577026,grad_norm: 0.9157246884812847, iteration: 461861
loss: 0.9992913007736206,grad_norm: 0.6823701272260394, iteration: 461862
loss: 1.0343211889266968,grad_norm: 0.9999989457820764, iteration: 461863
loss: 1.1857835054397583,grad_norm: 0.9999994537590534, iteration: 461864
loss: 1.0533219575881958,grad_norm: 0.8426371436240714, iteration: 461865
loss: 0.9957711696624756,grad_norm: 0.7639254577415303, iteration: 461866
loss: 1.0113725662231445,grad_norm: 0.9999997467629892, iteration: 461867
loss: 1.0517668724060059,grad_norm: 0.9999991868450315, iteration: 461868
loss: 1.036698818206787,grad_norm: 0.9814786330941626, iteration: 461869
loss: 1.011428713798523,grad_norm: 0.7487677579949085, iteration: 461870
loss: 1.2156567573547363,grad_norm: 0.9999996114434787, iteration: 461871
loss: 1.0210886001586914,grad_norm: 0.8158027131517999, iteration: 461872
loss: 1.0143202543258667,grad_norm: 0.6877396336062914, iteration: 461873
loss: 0.9797701239585876,grad_norm: 0.7987178669780381, iteration: 461874
loss: 0.9904085993766785,grad_norm: 0.6892813328970744, iteration: 461875
loss: 1.0125722885131836,grad_norm: 0.9537178268090978, iteration: 461876
loss: 1.0449004173278809,grad_norm: 0.9999998014904499, iteration: 461877
loss: 1.0888779163360596,grad_norm: 0.8822451394512097, iteration: 461878
loss: 0.9624257683753967,grad_norm: 0.7312040240742871, iteration: 461879
loss: 1.0074248313903809,grad_norm: 0.7885487618785936, iteration: 461880
loss: 0.974849283695221,grad_norm: 0.870232261239731, iteration: 461881
loss: 0.9763379693031311,grad_norm: 0.7682562205184269, iteration: 461882
loss: 1.0238800048828125,grad_norm: 0.999999205415781, iteration: 461883
loss: 0.9934489727020264,grad_norm: 0.7912696106144673, iteration: 461884
loss: 0.9999911189079285,grad_norm: 0.6197569947170106, iteration: 461885
loss: 1.0823036432266235,grad_norm: 0.9999990652775994, iteration: 461886
loss: 0.9681614637374878,grad_norm: 0.8064542005814561, iteration: 461887
loss: 1.0489805936813354,grad_norm: 0.9034849361948437, iteration: 461888
loss: 1.0549602508544922,grad_norm: 0.8055179206957936, iteration: 461889
loss: 1.0450658798217773,grad_norm: 0.999999645355534, iteration: 461890
loss: 1.011554479598999,grad_norm: 0.999999377602136, iteration: 461891
loss: 1.0461466312408447,grad_norm: 0.9999993272605711, iteration: 461892
loss: 1.0253986120224,grad_norm: 0.999999165734772, iteration: 461893
loss: 1.0463740825653076,grad_norm: 0.9279284016105658, iteration: 461894
loss: 1.032406210899353,grad_norm: 0.9627379271473573, iteration: 461895
loss: 0.982282817363739,grad_norm: 0.7444454337399374, iteration: 461896
loss: 0.9792730808258057,grad_norm: 0.9999991848586048, iteration: 461897
loss: 0.985156238079071,grad_norm: 0.9999996630160499, iteration: 461898
loss: 0.9482673406600952,grad_norm: 0.7189586341700964, iteration: 461899
loss: 0.9891483187675476,grad_norm: 0.7137333539718391, iteration: 461900
loss: 0.978800356388092,grad_norm: 0.9999993627519467, iteration: 461901
loss: 1.1173816919326782,grad_norm: 0.9999999592307013, iteration: 461902
loss: 1.0108898878097534,grad_norm: 0.7800628523317844, iteration: 461903
loss: 1.1545820236206055,grad_norm: 0.9999992764003576, iteration: 461904
loss: 0.9785277843475342,grad_norm: 0.8925200457716971, iteration: 461905
loss: 1.0311050415039062,grad_norm: 0.9999993321653318, iteration: 461906
loss: 1.0653724670410156,grad_norm: 0.9999998454720745, iteration: 461907
loss: 1.0607162714004517,grad_norm: 0.7717376430081204, iteration: 461908
loss: 1.097788691520691,grad_norm: 0.9999991086860379, iteration: 461909
loss: 1.117681622505188,grad_norm: 0.9999996578657872, iteration: 461910
loss: 0.9628757238388062,grad_norm: 0.8192182609062537, iteration: 461911
loss: 1.0764275789260864,grad_norm: 0.9999996356687402, iteration: 461912
loss: 0.9707161784172058,grad_norm: 0.8255993264660636, iteration: 461913
loss: 1.0200896263122559,grad_norm: 0.9999998303302583, iteration: 461914
loss: 1.1072899103164673,grad_norm: 1.0000000205093837, iteration: 461915
loss: 1.0332157611846924,grad_norm: 0.758859673591154, iteration: 461916
loss: 1.1350181102752686,grad_norm: 0.9862421140456594, iteration: 461917
loss: 1.012650489807129,grad_norm: 0.7207173908092502, iteration: 461918
loss: 1.0096721649169922,grad_norm: 0.7362059351081762, iteration: 461919
loss: 1.0028377771377563,grad_norm: 0.9291369219052421, iteration: 461920
loss: 1.0233609676361084,grad_norm: 0.8028525799025114, iteration: 461921
loss: 0.9906154870986938,grad_norm: 0.7752688663129776, iteration: 461922
loss: 1.0380618572235107,grad_norm: 0.9999997137715236, iteration: 461923
loss: 1.0507665872573853,grad_norm: 0.7729297107917416, iteration: 461924
loss: 1.0009074211120605,grad_norm: 0.9999991624759186, iteration: 461925
loss: 1.0186896324157715,grad_norm: 0.9999998493817733, iteration: 461926
loss: 0.9650285243988037,grad_norm: 0.9999991023257877, iteration: 461927
loss: 1.1589683294296265,grad_norm: 0.9999996821051476, iteration: 461928
loss: 1.047207236289978,grad_norm: 0.8477021031712861, iteration: 461929
loss: 1.0296989679336548,grad_norm: 0.9999993982402765, iteration: 461930
loss: 0.9926959276199341,grad_norm: 0.9165298129601842, iteration: 461931
loss: 1.0774379968643188,grad_norm: 0.9999998168524863, iteration: 461932
loss: 1.0184260606765747,grad_norm: 0.8735610750910926, iteration: 461933
loss: 1.0397735834121704,grad_norm: 0.9900347167919917, iteration: 461934
loss: 1.0307656526565552,grad_norm: 0.7734414746479319, iteration: 461935
loss: 1.0137821435928345,grad_norm: 0.9999991010650643, iteration: 461936
loss: 0.998596727848053,grad_norm: 0.8333021126065582, iteration: 461937
loss: 0.9787980914115906,grad_norm: 0.6992252154459595, iteration: 461938
loss: 0.9870755076408386,grad_norm: 0.6452922955765596, iteration: 461939
loss: 0.9806934595108032,grad_norm: 0.9999992503437996, iteration: 461940
loss: 1.1110538244247437,grad_norm: 0.9999997993917088, iteration: 461941
loss: 0.9917644262313843,grad_norm: 0.8128701661654363, iteration: 461942
loss: 1.0311349630355835,grad_norm: 0.7752476205555268, iteration: 461943
loss: 1.081722617149353,grad_norm: 0.8853300740958112, iteration: 461944
loss: 1.1013857126235962,grad_norm: 0.999999733968768, iteration: 461945
loss: 0.9990613460540771,grad_norm: 0.6659203520755476, iteration: 461946
loss: 1.196370005607605,grad_norm: 0.9999995072955888, iteration: 461947
loss: 1.0658189058303833,grad_norm: 0.9999996212025255, iteration: 461948
loss: 1.0281972885131836,grad_norm: 0.8312679814046005, iteration: 461949
loss: 1.0182058811187744,grad_norm: 0.7560108440409201, iteration: 461950
loss: 1.0236623287200928,grad_norm: 0.9135308443874297, iteration: 461951
loss: 1.0085371732711792,grad_norm: 0.8553073944092177, iteration: 461952
loss: 1.064894676208496,grad_norm: 0.9999998346301898, iteration: 461953
loss: 1.032461404800415,grad_norm: 0.8098590664025425, iteration: 461954
loss: 0.9917670488357544,grad_norm: 0.7665199023433888, iteration: 461955
loss: 1.0029770135879517,grad_norm: 0.6717153816918155, iteration: 461956
loss: 1.0255638360977173,grad_norm: 0.7293290027536096, iteration: 461957
loss: 0.9850465655326843,grad_norm: 0.9071360462025843, iteration: 461958
loss: 0.9641468524932861,grad_norm: 0.8099720521695366, iteration: 461959
loss: 0.9947815537452698,grad_norm: 0.7643775011392173, iteration: 461960
loss: 0.9994614720344543,grad_norm: 0.6828762550961807, iteration: 461961
loss: 0.9953221678733826,grad_norm: 0.9999994537077262, iteration: 461962
loss: 1.0631215572357178,grad_norm: 1.0000000512539629, iteration: 461963
loss: 1.0895565748214722,grad_norm: 0.9999993481407649, iteration: 461964
loss: 0.9994750022888184,grad_norm: 0.9787561641536315, iteration: 461965
loss: 1.1440850496292114,grad_norm: 0.989880969806147, iteration: 461966
loss: 1.014967441558838,grad_norm: 0.738514699096571, iteration: 461967
loss: 1.0144436359405518,grad_norm: 0.7292431546535796, iteration: 461968
loss: 1.0184593200683594,grad_norm: 0.7366743323138907, iteration: 461969
loss: 1.0069472789764404,grad_norm: 0.726288068828985, iteration: 461970
loss: 1.1140856742858887,grad_norm: 0.999999194313668, iteration: 461971
loss: 1.0216305255889893,grad_norm: 0.9999996397740817, iteration: 461972
loss: 1.0239700078964233,grad_norm: 0.9999991876627372, iteration: 461973
loss: 1.0019482374191284,grad_norm: 0.604538491470085, iteration: 461974
loss: 1.0068978071212769,grad_norm: 0.7550480874376962, iteration: 461975
loss: 1.0074392557144165,grad_norm: 0.6638653813342148, iteration: 461976
loss: 1.0765808820724487,grad_norm: 0.999999561285418, iteration: 461977
loss: 1.0036170482635498,grad_norm: 0.9012941887645332, iteration: 461978
loss: 1.0387835502624512,grad_norm: 1.0000000021538162, iteration: 461979
loss: 1.0446696281433105,grad_norm: 0.8791571517921456, iteration: 461980
loss: 1.0123951435089111,grad_norm: 0.9999996029847628, iteration: 461981
loss: 1.0154017210006714,grad_norm: 0.7957329126274401, iteration: 461982
loss: 1.02985417842865,grad_norm: 0.859855261945478, iteration: 461983
loss: 0.9783077836036682,grad_norm: 0.6564636880324249, iteration: 461984
loss: 1.0941721200942993,grad_norm: 0.9999998974747362, iteration: 461985
loss: 1.026220440864563,grad_norm: 0.9257869291414427, iteration: 461986
loss: 1.0161350965499878,grad_norm: 0.6818405423243936, iteration: 461987
loss: 1.0091873407363892,grad_norm: 0.9144142486610001, iteration: 461988
loss: 1.0331543684005737,grad_norm: 0.9999994250487589, iteration: 461989
loss: 0.9978508353233337,grad_norm: 0.9999998458797608, iteration: 461990
loss: 1.0722565650939941,grad_norm: 0.9999996352790718, iteration: 461991
loss: 0.9695984721183777,grad_norm: 0.917016724626443, iteration: 461992
loss: 1.0336649417877197,grad_norm: 0.9718787870014213, iteration: 461993
loss: 1.066849946975708,grad_norm: 0.9999993225527096, iteration: 461994
loss: 1.0942692756652832,grad_norm: 0.9999990496906058, iteration: 461995
loss: 1.0117696523666382,grad_norm: 0.7769475218956264, iteration: 461996
loss: 1.030929684638977,grad_norm: 0.9999993625329697, iteration: 461997
loss: 1.063053846359253,grad_norm: 0.9999990512692576, iteration: 461998
loss: 1.0036089420318604,grad_norm: 1.000000019972462, iteration: 461999
loss: 1.0356638431549072,grad_norm: 0.9999992028412044, iteration: 462000
loss: 1.1245497465133667,grad_norm: 0.9999992199897826, iteration: 462001
loss: 1.0523797273635864,grad_norm: 0.748667671001023, iteration: 462002
loss: 1.0291526317596436,grad_norm: 0.8287084733167581, iteration: 462003
loss: 0.9623371362686157,grad_norm: 0.7537662591118323, iteration: 462004
loss: 1.0229213237762451,grad_norm: 0.6628345173027501, iteration: 462005
loss: 1.1329092979431152,grad_norm: 0.9614783540884599, iteration: 462006
loss: 1.0585951805114746,grad_norm: 0.9999995309147067, iteration: 462007
loss: 0.9793265461921692,grad_norm: 0.662102455443343, iteration: 462008
loss: 1.002181053161621,grad_norm: 0.7523880416263974, iteration: 462009
loss: 1.027229905128479,grad_norm: 0.751056792128535, iteration: 462010
loss: 1.0261096954345703,grad_norm: 0.9999992356957871, iteration: 462011
loss: 1.0072369575500488,grad_norm: 0.8521235470203036, iteration: 462012
loss: 1.0640958547592163,grad_norm: 0.9999999247127025, iteration: 462013
loss: 1.0383156538009644,grad_norm: 0.9999991615904358, iteration: 462014
loss: 1.1182109117507935,grad_norm: 0.9999994847330731, iteration: 462015
loss: 0.9834270477294922,grad_norm: 0.7987697472569656, iteration: 462016
loss: 0.9994674921035767,grad_norm: 0.9999991226497907, iteration: 462017
loss: 0.9722449779510498,grad_norm: 0.77499260794391, iteration: 462018
loss: 0.9507585167884827,grad_norm: 0.9642359887014119, iteration: 462019
loss: 1.06333327293396,grad_norm: 0.9220988932055901, iteration: 462020
loss: 1.0104444026947021,grad_norm: 1.0000000233918713, iteration: 462021
loss: 1.119316816329956,grad_norm: 0.9999992634663061, iteration: 462022
loss: 1.0842859745025635,grad_norm: 0.9999993365367917, iteration: 462023
loss: 0.98271244764328,grad_norm: 0.7335847698612599, iteration: 462024
loss: 0.9899184107780457,grad_norm: 0.8586562675028624, iteration: 462025
loss: 1.0224735736846924,grad_norm: 0.8266037428221299, iteration: 462026
loss: 0.992859959602356,grad_norm: 0.829974194542296, iteration: 462027
loss: 1.0984117984771729,grad_norm: 0.8508424318536968, iteration: 462028
loss: 1.078506350517273,grad_norm: 0.9999999319851205, iteration: 462029
loss: 0.9892908930778503,grad_norm: 0.7866898439427138, iteration: 462030
loss: 0.9952951669692993,grad_norm: 0.753730375973634, iteration: 462031
loss: 1.0223432779312134,grad_norm: 0.8810314493989261, iteration: 462032
loss: 0.9779167175292969,grad_norm: 0.9130156529702578, iteration: 462033
loss: 0.9774161577224731,grad_norm: 0.6361557744944676, iteration: 462034
loss: 1.0157437324523926,grad_norm: 0.8474152243404648, iteration: 462035
loss: 1.0197030305862427,grad_norm: 0.9999991171772894, iteration: 462036
loss: 1.0362402200698853,grad_norm: 0.8350816847316854, iteration: 462037
loss: 0.993980884552002,grad_norm: 0.7067061503560653, iteration: 462038
loss: 1.0372107028961182,grad_norm: 0.8542082073625978, iteration: 462039
loss: 0.9899146556854248,grad_norm: 0.7671311387567012, iteration: 462040
loss: 0.9633811712265015,grad_norm: 0.6708339441754657, iteration: 462041
loss: 1.0766494274139404,grad_norm: 0.8702626702964043, iteration: 462042
loss: 0.9696348309516907,grad_norm: 0.7754010050827738, iteration: 462043
loss: 0.9981213808059692,grad_norm: 0.9840965570834047, iteration: 462044
loss: 1.0484259128570557,grad_norm: 0.9999993238680409, iteration: 462045
loss: 1.0415009260177612,grad_norm: 0.8993220733370484, iteration: 462046
loss: 0.9974565505981445,grad_norm: 0.7487796770043528, iteration: 462047
loss: 1.0157370567321777,grad_norm: 0.9845030077932589, iteration: 462048
loss: 1.028070330619812,grad_norm: 0.6955373640210082, iteration: 462049
loss: 1.022937297821045,grad_norm: 0.6963967902888696, iteration: 462050
loss: 0.9865603446960449,grad_norm: 0.6424637258769141, iteration: 462051
loss: 1.0027337074279785,grad_norm: 0.74323130648955, iteration: 462052
loss: 1.0100293159484863,grad_norm: 0.745700587525456, iteration: 462053
loss: 0.9892504811286926,grad_norm: 0.8260272448440434, iteration: 462054
loss: 1.0328363180160522,grad_norm: 0.78808816619542, iteration: 462055
loss: 1.0044379234313965,grad_norm: 0.8518772205731898, iteration: 462056
loss: 0.9846060872077942,grad_norm: 0.8452971312618595, iteration: 462057
loss: 1.0084688663482666,grad_norm: 0.7056784406677982, iteration: 462058
loss: 0.9780253767967224,grad_norm: 0.8926026301891113, iteration: 462059
loss: 1.0502915382385254,grad_norm: 0.7221555865254474, iteration: 462060
loss: 1.056137204170227,grad_norm: 0.9039604828293236, iteration: 462061
loss: 1.1496776342391968,grad_norm: 0.9999998238153698, iteration: 462062
loss: 1.0386031866073608,grad_norm: 0.9999991221955059, iteration: 462063
loss: 0.98073810338974,grad_norm: 0.800031817380293, iteration: 462064
loss: 1.0261304378509521,grad_norm: 0.7985374164552081, iteration: 462065
loss: 1.0055066347122192,grad_norm: 0.7191862230257788, iteration: 462066
loss: 0.9871635437011719,grad_norm: 0.7006137441004248, iteration: 462067
loss: 0.9869139194488525,grad_norm: 0.9999990645704474, iteration: 462068
loss: 1.0162417888641357,grad_norm: 0.7523511990233237, iteration: 462069
loss: 1.0882563591003418,grad_norm: 0.7293984975254942, iteration: 462070
loss: 1.031257152557373,grad_norm: 0.728834162314316, iteration: 462071
loss: 1.065787434577942,grad_norm: 0.8084435303916456, iteration: 462072
loss: 1.0637080669403076,grad_norm: 0.8627804886233352, iteration: 462073
loss: 0.9861809611320496,grad_norm: 0.7004590239944738, iteration: 462074
loss: 1.0803574323654175,grad_norm: 0.9451809705110936, iteration: 462075
loss: 1.0098739862442017,grad_norm: 0.9022268324827941, iteration: 462076
loss: 0.9932126402854919,grad_norm: 0.9563811310993293, iteration: 462077
loss: 0.9842625260353088,grad_norm: 0.84022189449927, iteration: 462078
loss: 0.960809588432312,grad_norm: 0.735621258152464, iteration: 462079
loss: 1.0218578577041626,grad_norm: 0.9999995632004474, iteration: 462080
loss: 1.1232928037643433,grad_norm: 0.9999999117864425, iteration: 462081
loss: 1.17472505569458,grad_norm: 0.9999991797907661, iteration: 462082
loss: 0.9971737265586853,grad_norm: 0.7904245474638866, iteration: 462083
loss: 1.0665990114212036,grad_norm: 0.9999997843963757, iteration: 462084
loss: 0.9878442883491516,grad_norm: 0.9999990319133785, iteration: 462085
loss: 1.081998348236084,grad_norm: 0.9999994407159509, iteration: 462086
loss: 0.9841132760047913,grad_norm: 0.8353246248909265, iteration: 462087
loss: 1.2064327001571655,grad_norm: 0.9999999290409393, iteration: 462088
loss: 1.0043014287948608,grad_norm: 0.733613362079675, iteration: 462089
loss: 1.1003023386001587,grad_norm: 0.9302286488079053, iteration: 462090
loss: 0.9878155589103699,grad_norm: 0.6704869991683178, iteration: 462091
loss: 0.9902639389038086,grad_norm: 0.7668341997911027, iteration: 462092
loss: 1.0232460498809814,grad_norm: 0.7803128077082926, iteration: 462093
loss: 1.020931363105774,grad_norm: 0.9249408736931364, iteration: 462094
loss: 1.039724588394165,grad_norm: 0.9999993503796855, iteration: 462095
loss: 1.0074303150177002,grad_norm: 0.8037441388518554, iteration: 462096
loss: 1.0270041227340698,grad_norm: 0.7384201015205518, iteration: 462097
loss: 1.016027808189392,grad_norm: 0.7864285192824588, iteration: 462098
loss: 1.0040611028671265,grad_norm: 0.6909927161912481, iteration: 462099
loss: 1.0816651582717896,grad_norm: 0.9999991696135899, iteration: 462100
loss: 0.929330587387085,grad_norm: 0.8190990443573792, iteration: 462101
loss: 1.1170926094055176,grad_norm: 0.9999993119929141, iteration: 462102
loss: 1.064591646194458,grad_norm: 0.9999993301736714, iteration: 462103
loss: 1.0434484481811523,grad_norm: 0.9999993354552048, iteration: 462104
loss: 0.9658059477806091,grad_norm: 0.9524541635756124, iteration: 462105
loss: 1.0158977508544922,grad_norm: 0.9999998093983579, iteration: 462106
loss: 0.9992027282714844,grad_norm: 0.7757708749000495, iteration: 462107
loss: 1.1743687391281128,grad_norm: 0.9999997632126559, iteration: 462108
loss: 1.0606191158294678,grad_norm: 0.7339792892360477, iteration: 462109
loss: 0.9924995303153992,grad_norm: 0.8162908754871262, iteration: 462110
loss: 1.0084772109985352,grad_norm: 0.8962049629774471, iteration: 462111
loss: 1.0219004154205322,grad_norm: 0.631484500717289, iteration: 462112
loss: 0.9910731315612793,grad_norm: 0.7493645675110044, iteration: 462113
loss: 1.0103981494903564,grad_norm: 0.7898039384617843, iteration: 462114
loss: 0.9963737726211548,grad_norm: 0.640975423892564, iteration: 462115
loss: 0.9953315854072571,grad_norm: 0.6519083554011341, iteration: 462116
loss: 1.0284377336502075,grad_norm: 0.9999996687798197, iteration: 462117
loss: 1.0995591878890991,grad_norm: 0.9999999543271574, iteration: 462118
loss: 1.007869839668274,grad_norm: 0.8036752513171802, iteration: 462119
loss: 1.064684510231018,grad_norm: 0.9999997826067286, iteration: 462120
loss: 1.0685328245162964,grad_norm: 0.962626826259087, iteration: 462121
loss: 0.9771702289581299,grad_norm: 0.8486202711660833, iteration: 462122
loss: 0.9709943532943726,grad_norm: 0.8995363158537334, iteration: 462123
loss: 0.9804244041442871,grad_norm: 0.8711494552422046, iteration: 462124
loss: 0.9990680813789368,grad_norm: 0.9999991732739806, iteration: 462125
loss: 0.9881395697593689,grad_norm: 0.8596360129092672, iteration: 462126
loss: 0.9761131405830383,grad_norm: 0.7900882647465978, iteration: 462127
loss: 1.062022089958191,grad_norm: 0.6582317522795081, iteration: 462128
loss: 0.986407458782196,grad_norm: 0.7600741853381024, iteration: 462129
loss: 1.0107207298278809,grad_norm: 0.7482608327317921, iteration: 462130
loss: 0.9794795513153076,grad_norm: 0.9999997910410043, iteration: 462131
loss: 1.024073839187622,grad_norm: 0.80410403748639, iteration: 462132
loss: 1.0281649827957153,grad_norm: 0.8371731648025782, iteration: 462133
loss: 1.0406959056854248,grad_norm: 0.696906337668129, iteration: 462134
loss: 1.065482497215271,grad_norm: 0.9999998836544705, iteration: 462135
loss: 0.9580962061882019,grad_norm: 0.6525835226300183, iteration: 462136
loss: 0.9865076541900635,grad_norm: 0.7495802123034885, iteration: 462137
loss: 1.027347207069397,grad_norm: 0.5695303034632878, iteration: 462138
loss: 1.0236018896102905,grad_norm: 0.9999992023074641, iteration: 462139
loss: 1.0041770935058594,grad_norm: 0.7777157335041482, iteration: 462140
loss: 1.0382081270217896,grad_norm: 0.8598854396283347, iteration: 462141
loss: 0.9981167316436768,grad_norm: 0.6452987635649021, iteration: 462142
loss: 0.9914067387580872,grad_norm: 0.6592711275452589, iteration: 462143
loss: 1.0018194913864136,grad_norm: 0.6403297404318069, iteration: 462144
loss: 1.0237863063812256,grad_norm: 0.5521295076313227, iteration: 462145
loss: 1.0269286632537842,grad_norm: 0.9038576625985264, iteration: 462146
loss: 1.0410956144332886,grad_norm: 0.9999995636583613, iteration: 462147
loss: 1.0494437217712402,grad_norm: 0.9846357365867614, iteration: 462148
loss: 0.9825233817100525,grad_norm: 0.7029972436255147, iteration: 462149
loss: 1.0571293830871582,grad_norm: 0.9999991398624246, iteration: 462150
loss: 1.0043030977249146,grad_norm: 0.7299040645325329, iteration: 462151
loss: 1.0080435276031494,grad_norm: 0.7315529492708492, iteration: 462152
loss: 0.9743573069572449,grad_norm: 0.7475590442088598, iteration: 462153
loss: 1.0342011451721191,grad_norm: 0.8912969810067118, iteration: 462154
loss: 1.0072455406188965,grad_norm: 0.9999992177806215, iteration: 462155
loss: 1.0162001848220825,grad_norm: 0.6976375860259879, iteration: 462156
loss: 1.0130797624588013,grad_norm: 0.7644607707044406, iteration: 462157
loss: 0.9592057466506958,grad_norm: 0.7468758131620079, iteration: 462158
loss: 0.9758968353271484,grad_norm: 0.7846046924582316, iteration: 462159
loss: 0.9753236770629883,grad_norm: 0.8281001196666171, iteration: 462160
loss: 1.0077710151672363,grad_norm: 0.64413170505604, iteration: 462161
loss: 1.0068202018737793,grad_norm: 0.9022851798467265, iteration: 462162
loss: 0.9802976846694946,grad_norm: 0.7890769089866269, iteration: 462163
loss: 1.0513683557510376,grad_norm: 0.9999993077293549, iteration: 462164
loss: 1.0168511867523193,grad_norm: 0.7686378171124081, iteration: 462165
loss: 1.017087697982788,grad_norm: 0.8322124524908266, iteration: 462166
loss: 0.9813469052314758,grad_norm: 0.7909232184450855, iteration: 462167
loss: 1.0776015520095825,grad_norm: 0.9999998744528891, iteration: 462168
loss: 1.1318695545196533,grad_norm: 0.9999992151302424, iteration: 462169
loss: 1.0756852626800537,grad_norm: 0.7649342746094987, iteration: 462170
loss: 1.0159084796905518,grad_norm: 0.7557290781959285, iteration: 462171
loss: 0.9924056529998779,grad_norm: 0.9999996397876998, iteration: 462172
loss: 0.9711807370185852,grad_norm: 0.7337687743473741, iteration: 462173
loss: 1.0074777603149414,grad_norm: 0.99999953497241, iteration: 462174
loss: 0.995583713054657,grad_norm: 0.8803148904276391, iteration: 462175
loss: 0.9552218914031982,grad_norm: 0.6651949754367834, iteration: 462176
loss: 0.9799957275390625,grad_norm: 0.6966707796206908, iteration: 462177
loss: 1.002173900604248,grad_norm: 0.8619514853384873, iteration: 462178
loss: 1.010172963142395,grad_norm: 0.9999993276122199, iteration: 462179
loss: 1.0135340690612793,grad_norm: 0.9999996565783817, iteration: 462180
loss: 1.0560898780822754,grad_norm: 0.829443332892794, iteration: 462181
loss: 0.9490453600883484,grad_norm: 0.6949616234709485, iteration: 462182
loss: 0.9938017725944519,grad_norm: 0.6945283861077878, iteration: 462183
loss: 1.0016560554504395,grad_norm: 0.8040849025726456, iteration: 462184
loss: 0.9937599301338196,grad_norm: 0.9163307116273341, iteration: 462185
loss: 0.9936349987983704,grad_norm: 0.7665527004703899, iteration: 462186
loss: 0.9822548627853394,grad_norm: 0.7473133816211049, iteration: 462187
loss: 1.030637264251709,grad_norm: 0.726890737225115, iteration: 462188
loss: 1.006612777709961,grad_norm: 0.6535158398297366, iteration: 462189
loss: 1.0040663480758667,grad_norm: 0.9144876637823008, iteration: 462190
loss: 1.0640513896942139,grad_norm: 0.9999998149277527, iteration: 462191
loss: 1.0023490190505981,grad_norm: 0.9404099416179219, iteration: 462192
loss: 1.0039212703704834,grad_norm: 0.999999950608717, iteration: 462193
loss: 0.98325514793396,grad_norm: 0.8075158393332982, iteration: 462194
loss: 1.0229511260986328,grad_norm: 0.7216008334005609, iteration: 462195
loss: 0.9879257082939148,grad_norm: 0.9999997399932473, iteration: 462196
loss: 1.0675667524337769,grad_norm: 0.9999997004908988, iteration: 462197
loss: 1.0013924837112427,grad_norm: 0.9999997632223874, iteration: 462198
loss: 1.0326980352401733,grad_norm: 0.7044784212448398, iteration: 462199
loss: 1.0051631927490234,grad_norm: 0.831098597585863, iteration: 462200
loss: 1.0229121446609497,grad_norm: 0.7647014877699286, iteration: 462201
loss: 0.9957883358001709,grad_norm: 0.8857770506476977, iteration: 462202
loss: 0.980965256690979,grad_norm: 0.8601347522157379, iteration: 462203
loss: 1.0126703977584839,grad_norm: 0.8013925755775758, iteration: 462204
loss: 1.0090243816375732,grad_norm: 0.7058394448095175, iteration: 462205
loss: 1.0365909337997437,grad_norm: 0.9378931065937686, iteration: 462206
loss: 0.9746812582015991,grad_norm: 0.5908750496757622, iteration: 462207
loss: 0.9829936623573303,grad_norm: 0.7115655956647888, iteration: 462208
loss: 1.0149669647216797,grad_norm: 0.8797675246385044, iteration: 462209
loss: 1.045880675315857,grad_norm: 0.9999998564976664, iteration: 462210
loss: 1.0293642282485962,grad_norm: 0.75642849366935, iteration: 462211
loss: 1.071605920791626,grad_norm: 0.9999995277877959, iteration: 462212
loss: 0.9948551654815674,grad_norm: 0.7520565810697959, iteration: 462213
loss: 1.0043538808822632,grad_norm: 0.798520042276727, iteration: 462214
loss: 0.9863318204879761,grad_norm: 0.7799633262163608, iteration: 462215
loss: 0.9995163083076477,grad_norm: 0.9595276397807411, iteration: 462216
loss: 1.0094761848449707,grad_norm: 0.7167265787025626, iteration: 462217
loss: 0.9971190094947815,grad_norm: 0.762763176125714, iteration: 462218
loss: 1.020605206489563,grad_norm: 0.9999999333623796, iteration: 462219
loss: 1.0185221433639526,grad_norm: 0.7694812332042656, iteration: 462220
loss: 0.962850034236908,grad_norm: 0.7715384884842434, iteration: 462221
loss: 1.0193171501159668,grad_norm: 0.8859095987829002, iteration: 462222
loss: 1.0150887966156006,grad_norm: 0.8351409713112725, iteration: 462223
loss: 0.9859363436698914,grad_norm: 0.6850008831799708, iteration: 462224
loss: 1.0260287523269653,grad_norm: 0.7088663831836775, iteration: 462225
loss: 0.9937635064125061,grad_norm: 0.818359522345992, iteration: 462226
loss: 0.9733217358589172,grad_norm: 0.7944500660131221, iteration: 462227
loss: 1.0122826099395752,grad_norm: 0.9521472658093503, iteration: 462228
loss: 0.9872013926506042,grad_norm: 0.9267396473252064, iteration: 462229
loss: 0.9698099493980408,grad_norm: 0.8387426930443579, iteration: 462230
loss: 1.1018966436386108,grad_norm: 0.9999999281010592, iteration: 462231
loss: 1.0053541660308838,grad_norm: 0.9999994891785285, iteration: 462232
loss: 1.057736873626709,grad_norm: 0.757521184610737, iteration: 462233
loss: 1.047447681427002,grad_norm: 0.999999802915907, iteration: 462234
loss: 1.0089390277862549,grad_norm: 0.9123259275372859, iteration: 462235
loss: 1.085601806640625,grad_norm: 0.9759111069692569, iteration: 462236
loss: 1.0054975748062134,grad_norm: 0.8446695239951748, iteration: 462237
loss: 0.9994230270385742,grad_norm: 0.9999996909219614, iteration: 462238
loss: 1.0541187524795532,grad_norm: 1.0000001081899954, iteration: 462239
loss: 1.023296594619751,grad_norm: 0.9036216434758827, iteration: 462240
loss: 1.0555167198181152,grad_norm: 0.999999412759419, iteration: 462241
loss: 0.9694221615791321,grad_norm: 0.7533768044278639, iteration: 462242
loss: 0.9922901391983032,grad_norm: 0.8393799418519889, iteration: 462243
loss: 1.0803440809249878,grad_norm: 0.999999947373859, iteration: 462244
loss: 1.080365777015686,grad_norm: 0.9999989901674937, iteration: 462245
loss: 1.0943654775619507,grad_norm: 0.9999995450369878, iteration: 462246
loss: 1.000462293624878,grad_norm: 0.8088718005079215, iteration: 462247
loss: 0.9967829585075378,grad_norm: 0.6610972578050208, iteration: 462248
loss: 1.0099914073944092,grad_norm: 0.8585565913460611, iteration: 462249
loss: 1.0500825643539429,grad_norm: 0.8855719128955983, iteration: 462250
loss: 0.980780303478241,grad_norm: 0.9999992105172942, iteration: 462251
loss: 1.1141517162322998,grad_norm: 0.9758748966125284, iteration: 462252
loss: 1.108935832977295,grad_norm: 0.8879430327604708, iteration: 462253
loss: 1.1135742664337158,grad_norm: 0.9999994470701572, iteration: 462254
loss: 1.1009970903396606,grad_norm: 0.9999999648348918, iteration: 462255
loss: 0.9889652729034424,grad_norm: 0.8093801303389805, iteration: 462256
loss: 0.9966306686401367,grad_norm: 0.7431480621542529, iteration: 462257
loss: 0.9942731857299805,grad_norm: 0.9999990065050686, iteration: 462258
loss: 0.9520174860954285,grad_norm: 0.999999698108635, iteration: 462259
loss: 1.143629550933838,grad_norm: 0.9999997978186835, iteration: 462260
loss: 0.9841201901435852,grad_norm: 0.781827021818136, iteration: 462261
loss: 1.00057053565979,grad_norm: 0.9208750628519583, iteration: 462262
loss: 1.0107825994491577,grad_norm: 0.8643816223032205, iteration: 462263
loss: 0.9936858415603638,grad_norm: 0.9999995038646956, iteration: 462264
loss: 1.00758957862854,grad_norm: 0.753473123528586, iteration: 462265
loss: 0.9789037704467773,grad_norm: 0.9999991780418802, iteration: 462266
loss: 1.0407681465148926,grad_norm: 0.7620906673225059, iteration: 462267
loss: 1.0446016788482666,grad_norm: 0.9999995580610751, iteration: 462268
loss: 0.9875589609146118,grad_norm: 0.6452866017669512, iteration: 462269
loss: 1.0124704837799072,grad_norm: 0.7376357042386108, iteration: 462270
loss: 1.0021969079971313,grad_norm: 0.853312785820016, iteration: 462271
loss: 1.0092670917510986,grad_norm: 0.7894741177264136, iteration: 462272
loss: 1.0206290483474731,grad_norm: 0.6985744992365492, iteration: 462273
loss: 1.0198729038238525,grad_norm: 0.9999994391991285, iteration: 462274
loss: 1.0582711696624756,grad_norm: 0.9999993944700414, iteration: 462275
loss: 0.9493094682693481,grad_norm: 0.7444778293781523, iteration: 462276
loss: 1.0509042739868164,grad_norm: 0.9999995636917394, iteration: 462277
loss: 1.0181304216384888,grad_norm: 0.9264447534064569, iteration: 462278
loss: 1.0029605627059937,grad_norm: 0.6927071218883256, iteration: 462279
loss: 1.0084530115127563,grad_norm: 0.8455591043005238, iteration: 462280
loss: 1.0264322757720947,grad_norm: 0.9999997890722206, iteration: 462281
loss: 1.0143800973892212,grad_norm: 0.9999991540037435, iteration: 462282
loss: 0.9871378540992737,grad_norm: 0.8618769031260698, iteration: 462283
loss: 1.0063855648040771,grad_norm: 0.8433335207371998, iteration: 462284
loss: 0.9952443838119507,grad_norm: 0.9999999055036621, iteration: 462285
loss: 1.1059836149215698,grad_norm: 0.9810652604316726, iteration: 462286
loss: 1.0101056098937988,grad_norm: 0.8615439082460027, iteration: 462287
loss: 0.9762617349624634,grad_norm: 0.9999997562938712, iteration: 462288
loss: 0.9905622005462646,grad_norm: 0.7877014297815143, iteration: 462289
loss: 1.0081367492675781,grad_norm: 0.9999994644730383, iteration: 462290
loss: 1.0745011568069458,grad_norm: 0.9999999343848723, iteration: 462291
loss: 1.0759456157684326,grad_norm: 0.9999992969092213, iteration: 462292
loss: 1.1027116775512695,grad_norm: 0.9999998254331732, iteration: 462293
loss: 0.9780958294868469,grad_norm: 0.7312155332469266, iteration: 462294
loss: 0.9954971075057983,grad_norm: 0.5617964241338657, iteration: 462295
loss: 1.0061951875686646,grad_norm: 0.6265105666581395, iteration: 462296
loss: 0.9942532181739807,grad_norm: 0.8547068168653175, iteration: 462297
loss: 1.0108897686004639,grad_norm: 0.9999992447810306, iteration: 462298
loss: 0.9918871521949768,grad_norm: 0.7291490140270069, iteration: 462299
loss: 1.0026191473007202,grad_norm: 0.9999994832803218, iteration: 462300
loss: 1.071738362312317,grad_norm: 0.6890507486735369, iteration: 462301
loss: 0.9368462562561035,grad_norm: 0.7873285856076364, iteration: 462302
loss: 1.0039833784103394,grad_norm: 0.8001525171876174, iteration: 462303
loss: 0.9916709065437317,grad_norm: 0.7851262049877761, iteration: 462304
loss: 1.0205656290054321,grad_norm: 0.7403756563835941, iteration: 462305
loss: 1.0468932390213013,grad_norm: 0.999999404794787, iteration: 462306
loss: 1.0007177591323853,grad_norm: 0.7651984777808605, iteration: 462307
loss: 0.9897474646568298,grad_norm: 0.6140716497041996, iteration: 462308
loss: 1.0854339599609375,grad_norm: 0.9999998066556244, iteration: 462309
loss: 0.9990469217300415,grad_norm: 0.9999996786725354, iteration: 462310
loss: 1.039604902267456,grad_norm: 0.8343190612060902, iteration: 462311
loss: 0.9604135751724243,grad_norm: 0.812660707234219, iteration: 462312
loss: 0.9870820045471191,grad_norm: 0.6478358701437221, iteration: 462313
loss: 1.059556007385254,grad_norm: 0.9999997838575483, iteration: 462314
loss: 0.9994727969169617,grad_norm: 0.9999997788272844, iteration: 462315
loss: 0.9835147261619568,grad_norm: 0.6432247390124968, iteration: 462316
loss: 0.996262788772583,grad_norm: 0.9999992768812236, iteration: 462317
loss: 1.0055633783340454,grad_norm: 0.750922115613014, iteration: 462318
loss: 1.1128675937652588,grad_norm: 0.9999999275427108, iteration: 462319
loss: 1.0090336799621582,grad_norm: 0.999999293177869, iteration: 462320
loss: 0.9714315533638,grad_norm: 0.8389697962220032, iteration: 462321
loss: 1.053746223449707,grad_norm: 0.9999998904417327, iteration: 462322
loss: 0.9786576628684998,grad_norm: 0.7742009674874998, iteration: 462323
loss: 0.9885271191596985,grad_norm: 0.7633944193837399, iteration: 462324
loss: 1.0011634826660156,grad_norm: 0.8037982235016008, iteration: 462325
loss: 0.9526972770690918,grad_norm: 0.8857287219682561, iteration: 462326
loss: 0.996517539024353,grad_norm: 0.7123966089838907, iteration: 462327
loss: 1.0235931873321533,grad_norm: 0.7559834197327889, iteration: 462328
loss: 1.0283288955688477,grad_norm: 0.9999996838524873, iteration: 462329
loss: 1.0021333694458008,grad_norm: 0.9999996615187797, iteration: 462330
loss: 1.0320570468902588,grad_norm: 0.9999994829256873, iteration: 462331
loss: 1.022792935371399,grad_norm: 0.9999999586334063, iteration: 462332
loss: 1.0402379035949707,grad_norm: 0.8852445805346528, iteration: 462333
loss: 1.0068501234054565,grad_norm: 0.648285469584083, iteration: 462334
loss: 0.9886271357536316,grad_norm: 0.7587500347415325, iteration: 462335
loss: 1.0208356380462646,grad_norm: 0.8033508969128247, iteration: 462336
loss: 1.0449678897857666,grad_norm: 0.7417559951566174, iteration: 462337
loss: 0.9734414219856262,grad_norm: 0.727600511982781, iteration: 462338
loss: 1.1253662109375,grad_norm: 0.9999998579715776, iteration: 462339
loss: 0.9786405563354492,grad_norm: 0.8375945031361776, iteration: 462340
loss: 1.0655187368392944,grad_norm: 0.9999999789213877, iteration: 462341
loss: 1.0320849418640137,grad_norm: 0.9999998297941096, iteration: 462342
loss: 1.0036500692367554,grad_norm: 0.9999991915887226, iteration: 462343
loss: 1.0428169965744019,grad_norm: 0.8599084580814533, iteration: 462344
loss: 0.9741670489311218,grad_norm: 0.6203630109323836, iteration: 462345
loss: 1.011691689491272,grad_norm: 0.9999992283711181, iteration: 462346
loss: 1.0084099769592285,grad_norm: 0.8335643190782568, iteration: 462347
loss: 1.0210011005401611,grad_norm: 0.9370052426729981, iteration: 462348
loss: 0.9760489463806152,grad_norm: 0.713189676320968, iteration: 462349
loss: 0.9910857081413269,grad_norm: 0.7699967841514245, iteration: 462350
loss: 1.1292555332183838,grad_norm: 1.0000000042989639, iteration: 462351
loss: 1.0201770067214966,grad_norm: 0.9626628608985038, iteration: 462352
loss: 1.0302859544754028,grad_norm: 0.6671540682270288, iteration: 462353
loss: 1.119062066078186,grad_norm: 0.9999999072182104, iteration: 462354
loss: 1.0198616981506348,grad_norm: 0.8836078048370158, iteration: 462355
loss: 0.9597073793411255,grad_norm: 0.7141429290066774, iteration: 462356
loss: 1.011868953704834,grad_norm: 0.9730266867414994, iteration: 462357
loss: 1.0288259983062744,grad_norm: 0.8741565372410582, iteration: 462358
loss: 1.0172120332717896,grad_norm: 0.7938532442183472, iteration: 462359
loss: 0.9942833781242371,grad_norm: 0.8293980042374899, iteration: 462360
loss: 1.005890965461731,grad_norm: 0.8671212465127573, iteration: 462361
loss: 1.0399829149246216,grad_norm: 0.9999990449958975, iteration: 462362
loss: 1.0533584356307983,grad_norm: 0.999999557311916, iteration: 462363
loss: 1.0490739345550537,grad_norm: 0.9999999535433328, iteration: 462364
loss: 1.0100756883621216,grad_norm: 0.8624479012267275, iteration: 462365
loss: 1.0214918851852417,grad_norm: 0.9999995466182856, iteration: 462366
loss: 1.0164430141448975,grad_norm: 0.7828212821834645, iteration: 462367
loss: 1.0724477767944336,grad_norm: 0.9094249650447593, iteration: 462368
loss: 0.9494062662124634,grad_norm: 0.7877745659998991, iteration: 462369
loss: 1.0212938785552979,grad_norm: 0.999999298958196, iteration: 462370
loss: 0.9923564195632935,grad_norm: 0.8668434532016153, iteration: 462371
loss: 1.0099238157272339,grad_norm: 0.9999994106340031, iteration: 462372
loss: 0.9734508991241455,grad_norm: 0.7903555709339455, iteration: 462373
loss: 0.9857555627822876,grad_norm: 0.730962345698202, iteration: 462374
loss: 0.9824934601783752,grad_norm: 0.6192048682195328, iteration: 462375
loss: 1.0072287321090698,grad_norm: 0.7658969606597535, iteration: 462376
loss: 1.1383466720581055,grad_norm: 0.9999995286745952, iteration: 462377
loss: 0.9439073801040649,grad_norm: 0.8626810629590095, iteration: 462378
loss: 0.9785361289978027,grad_norm: 0.6954258746224794, iteration: 462379
loss: 1.005297303199768,grad_norm: 0.9999990426688945, iteration: 462380
loss: 1.0122588872909546,grad_norm: 0.7890904682973572, iteration: 462381
loss: 1.1413516998291016,grad_norm: 0.999999942403068, iteration: 462382
loss: 1.0041946172714233,grad_norm: 0.6650079529983496, iteration: 462383
loss: 1.012581467628479,grad_norm: 1.0000000805459894, iteration: 462384
loss: 1.2866004705429077,grad_norm: 0.9999992715875364, iteration: 462385
loss: 1.0463452339172363,grad_norm: 0.9109928481320689, iteration: 462386
loss: 1.1513853073120117,grad_norm: 0.8352111516907398, iteration: 462387
loss: 1.0766738653182983,grad_norm: 0.7866456067868971, iteration: 462388
loss: 0.9551201462745667,grad_norm: 0.8266375774648426, iteration: 462389
loss: 1.0057239532470703,grad_norm: 0.9999990368340663, iteration: 462390
loss: 1.0461276769638062,grad_norm: 0.9287226622475185, iteration: 462391
loss: 0.9879065155982971,grad_norm: 0.7076008896391448, iteration: 462392
loss: 1.0016556978225708,grad_norm: 0.8117135932085574, iteration: 462393
loss: 1.1387097835540771,grad_norm: 0.9999993062267029, iteration: 462394
loss: 1.1404632329940796,grad_norm: 0.8911461422768925, iteration: 462395
loss: 1.0219346284866333,grad_norm: 0.9328956194114104, iteration: 462396
loss: 1.002516508102417,grad_norm: 0.8061186778757228, iteration: 462397
loss: 1.0287442207336426,grad_norm: 0.9433068494317275, iteration: 462398
loss: 1.0440528392791748,grad_norm: 0.8377346317112391, iteration: 462399
loss: 1.0690971612930298,grad_norm: 0.8131529666761254, iteration: 462400
loss: 0.99974125623703,grad_norm: 0.8902484468461045, iteration: 462401
loss: 1.1050331592559814,grad_norm: 0.9999998989429275, iteration: 462402
loss: 0.9904425740242004,grad_norm: 0.6986835496378678, iteration: 462403
loss: 0.9914249777793884,grad_norm: 0.807300491318974, iteration: 462404
loss: 1.0441343784332275,grad_norm: 0.9999993628090703, iteration: 462405
loss: 0.9803971648216248,grad_norm: 0.7675143460545057, iteration: 462406
loss: 1.008172869682312,grad_norm: 0.7107314704793333, iteration: 462407
loss: 1.1032558679580688,grad_norm: 0.999999424464996, iteration: 462408
loss: 1.159438133239746,grad_norm: 0.7322934691963708, iteration: 462409
loss: 0.998956024646759,grad_norm: 0.9999997948141836, iteration: 462410
loss: 1.008803129196167,grad_norm: 0.8522759481915617, iteration: 462411
loss: 1.0689265727996826,grad_norm: 0.9999990999984586, iteration: 462412
loss: 1.035780906677246,grad_norm: 0.9999989854616997, iteration: 462413
loss: 1.045192003250122,grad_norm: 0.8884240777148692, iteration: 462414
loss: 1.0207477807998657,grad_norm: 0.643362686675938, iteration: 462415
loss: 1.0272020101547241,grad_norm: 0.7952866979207983, iteration: 462416
loss: 1.0207977294921875,grad_norm: 0.9955306505426693, iteration: 462417
loss: 0.9887769818305969,grad_norm: 0.7072060459125984, iteration: 462418
loss: 0.9773465394973755,grad_norm: 0.9939924422948285, iteration: 462419
loss: 0.9559668898582458,grad_norm: 0.7120772493625747, iteration: 462420
loss: 0.9929559230804443,grad_norm: 0.6727360922142563, iteration: 462421
loss: 1.0663102865219116,grad_norm: 0.999998995077635, iteration: 462422
loss: 0.9923524260520935,grad_norm: 0.7077020146257786, iteration: 462423
loss: 1.02908456325531,grad_norm: 0.9999995925886643, iteration: 462424
loss: 1.0176867246627808,grad_norm: 0.8220821256514369, iteration: 462425
loss: 1.0873342752456665,grad_norm: 0.999999842132377, iteration: 462426
loss: 1.0230034589767456,grad_norm: 0.637214026547082, iteration: 462427
loss: 1.0478503704071045,grad_norm: 0.9999990723595651, iteration: 462428
loss: 0.9952013492584229,grad_norm: 0.5769112841033639, iteration: 462429
loss: 1.064755916595459,grad_norm: 0.8063670391550548, iteration: 462430
loss: 1.009797215461731,grad_norm: 0.9731841141526228, iteration: 462431
loss: 0.9973744750022888,grad_norm: 0.8340320409183846, iteration: 462432
loss: 1.0251717567443848,grad_norm: 0.9999994199532208, iteration: 462433
loss: 1.0647776126861572,grad_norm: 0.9134759839039311, iteration: 462434
loss: 1.1050325632095337,grad_norm: 0.999999565248726, iteration: 462435
loss: 1.0046746730804443,grad_norm: 0.8424265310468008, iteration: 462436
loss: 1.0844534635543823,grad_norm: 0.9999998657056586, iteration: 462437
loss: 0.9932176470756531,grad_norm: 0.9283968043138412, iteration: 462438
loss: 1.0112234354019165,grad_norm: 0.9323458479184671, iteration: 462439
loss: 0.9879011511802673,grad_norm: 0.8888920971867287, iteration: 462440
loss: 0.9930546879768372,grad_norm: 0.7107744294287279, iteration: 462441
loss: 1.0004390478134155,grad_norm: 0.8350411739989123, iteration: 462442
loss: 1.0114431381225586,grad_norm: 0.7141215968066476, iteration: 462443
loss: 0.9815645217895508,grad_norm: 0.9123708987136379, iteration: 462444
loss: 1.0270153284072876,grad_norm: 0.9999996851735169, iteration: 462445
loss: 1.051411747932434,grad_norm: 0.8978185613538926, iteration: 462446
loss: 1.0164369344711304,grad_norm: 0.8253589264230992, iteration: 462447
loss: 1.1159617900848389,grad_norm: 0.9999990804768789, iteration: 462448
loss: 1.1288256645202637,grad_norm: 0.9999993207791883, iteration: 462449
loss: 1.057003378868103,grad_norm: 0.8565311152464997, iteration: 462450
loss: 0.9780478477478027,grad_norm: 0.8830935369618983, iteration: 462451
loss: 0.9850746393203735,grad_norm: 0.7157133918817139, iteration: 462452
loss: 1.022959589958191,grad_norm: 0.5972337798952869, iteration: 462453
loss: 1.0180774927139282,grad_norm: 0.7425804088299102, iteration: 462454
loss: 1.016369342803955,grad_norm: 0.7906713131560906, iteration: 462455
loss: 1.0058387517929077,grad_norm: 0.7082506911251314, iteration: 462456
loss: 1.0233891010284424,grad_norm: 0.7811940006210463, iteration: 462457
loss: 1.0629545450210571,grad_norm: 0.9999999248948469, iteration: 462458
loss: 0.9703819751739502,grad_norm: 0.8076824814746366, iteration: 462459
loss: 0.9964938163757324,grad_norm: 0.8186208766922427, iteration: 462460
loss: 0.9609752893447876,grad_norm: 0.8904564055270356, iteration: 462461
loss: 1.0334128141403198,grad_norm: 0.9999997614578889, iteration: 462462
loss: 1.0430841445922852,grad_norm: 0.8949159348689366, iteration: 462463
loss: 0.9898196458816528,grad_norm: 0.9567296329889208, iteration: 462464
loss: 1.0356189012527466,grad_norm: 0.7814010765765449, iteration: 462465
loss: 1.0906872749328613,grad_norm: 0.858836686892078, iteration: 462466
loss: 0.9721992015838623,grad_norm: 0.7356616450335147, iteration: 462467
loss: 1.2860791683197021,grad_norm: 0.9999998887233452, iteration: 462468
loss: 1.0287692546844482,grad_norm: 0.7557479862215661, iteration: 462469
loss: 1.0202791690826416,grad_norm: 0.9999993958598306, iteration: 462470
loss: 1.0045688152313232,grad_norm: 0.7454281778036496, iteration: 462471
loss: 0.9953780770301819,grad_norm: 0.8742048909069712, iteration: 462472
loss: 1.0131216049194336,grad_norm: 0.994484265683441, iteration: 462473
loss: 1.0089646577835083,grad_norm: 0.7985774452184456, iteration: 462474
loss: 0.9842140078544617,grad_norm: 0.8170869859783113, iteration: 462475
loss: 1.0813905000686646,grad_norm: 0.804654323559383, iteration: 462476
loss: 1.0530718564987183,grad_norm: 0.9999999097349593, iteration: 462477
loss: 1.0301384925842285,grad_norm: 0.7745319865683791, iteration: 462478
loss: 0.9863124489784241,grad_norm: 0.7906089229319261, iteration: 462479
loss: 0.997156023979187,grad_norm: 0.7373854301986289, iteration: 462480
loss: 1.0346570014953613,grad_norm: 0.9999993144601366, iteration: 462481
loss: 1.1019649505615234,grad_norm: 0.999999512347034, iteration: 462482
loss: 0.9755602478981018,grad_norm: 0.8303872810152015, iteration: 462483
loss: 1.0073109865188599,grad_norm: 0.6487517113497346, iteration: 462484
loss: 1.0759466886520386,grad_norm: 0.9999991755612907, iteration: 462485
loss: 1.156767725944519,grad_norm: 0.9999993458876106, iteration: 462486
loss: 0.9995388984680176,grad_norm: 0.7024957685709711, iteration: 462487
loss: 1.0189977884292603,grad_norm: 0.7316116888599687, iteration: 462488
loss: 0.9642990827560425,grad_norm: 0.9999993071540024, iteration: 462489
loss: 1.0217009782791138,grad_norm: 0.8042797673563886, iteration: 462490
loss: 1.0959945917129517,grad_norm: 0.9999998697248091, iteration: 462491
loss: 1.0030299425125122,grad_norm: 0.9999999579035189, iteration: 462492
loss: 1.0565292835235596,grad_norm: 1.0000000221455638, iteration: 462493
loss: 1.014835238456726,grad_norm: 0.7459184523699466, iteration: 462494
loss: 1.0519044399261475,grad_norm: 0.9999993180233449, iteration: 462495
loss: 1.0185705423355103,grad_norm: 0.99999916957324, iteration: 462496
loss: 1.0094095468521118,grad_norm: 0.74845538392736, iteration: 462497
loss: 1.0340174436569214,grad_norm: 0.6353983936527412, iteration: 462498
loss: 1.0100722312927246,grad_norm: 0.9127386016639377, iteration: 462499
loss: 0.9903050065040588,grad_norm: 0.9999993780060802, iteration: 462500
loss: 1.0969233512878418,grad_norm: 0.7383897522937763, iteration: 462501
loss: 0.9605324268341064,grad_norm: 0.8276712512644386, iteration: 462502
loss: 1.0695745944976807,grad_norm: 0.9940969523536203, iteration: 462503
loss: 1.052575707435608,grad_norm: 0.9263597425768806, iteration: 462504
loss: 1.0712544918060303,grad_norm: 0.8554065001898585, iteration: 462505
loss: 0.9882027506828308,grad_norm: 0.6885436780092264, iteration: 462506
loss: 1.0176982879638672,grad_norm: 0.8847432469897307, iteration: 462507
loss: 1.0254172086715698,grad_norm: 0.8764115744402049, iteration: 462508
loss: 1.023758888244629,grad_norm: 0.8777223543123124, iteration: 462509
loss: 1.0563452243804932,grad_norm: 0.8374885701146777, iteration: 462510
loss: 1.0145572423934937,grad_norm: 0.8096669859205011, iteration: 462511
loss: 0.9891188144683838,grad_norm: 0.7091961303211493, iteration: 462512
loss: 1.007694125175476,grad_norm: 0.999999830572194, iteration: 462513
loss: 1.0018024444580078,grad_norm: 0.7150176053088181, iteration: 462514
loss: 1.0667431354522705,grad_norm: 0.8392794571753329, iteration: 462515
loss: 0.9829714894294739,grad_norm: 0.7845339124201473, iteration: 462516
loss: 0.9825600385665894,grad_norm: 0.8207921303866644, iteration: 462517
loss: 1.0407168865203857,grad_norm: 0.9999999444562268, iteration: 462518
loss: 1.0395597219467163,grad_norm: 0.8634971182980303, iteration: 462519
loss: 1.045428991317749,grad_norm: 0.9206647742249109, iteration: 462520
loss: 1.0180397033691406,grad_norm: 0.6698924058832721, iteration: 462521
loss: 1.048704743385315,grad_norm: 0.9999995255824536, iteration: 462522
loss: 1.014407753944397,grad_norm: 0.8997592359217672, iteration: 462523
loss: 0.9676979780197144,grad_norm: 0.7603708419905938, iteration: 462524
loss: 0.9846327900886536,grad_norm: 0.9986407911723084, iteration: 462525
loss: 1.085841417312622,grad_norm: 0.999999591208982, iteration: 462526
loss: 0.9874386787414551,grad_norm: 0.7152058419920512, iteration: 462527
loss: 1.0991913080215454,grad_norm: 0.9999990926584245, iteration: 462528
loss: 1.0018893480300903,grad_norm: 0.7380125059769299, iteration: 462529
loss: 0.9939888715744019,grad_norm: 0.7503884665308529, iteration: 462530
loss: 0.9775763750076294,grad_norm: 0.8589301751556159, iteration: 462531
loss: 1.0278786420822144,grad_norm: 0.7307710997943611, iteration: 462532
loss: 1.0831156969070435,grad_norm: 0.9999991067448867, iteration: 462533
loss: 0.9753637313842773,grad_norm: 0.9043706874280982, iteration: 462534
loss: 1.0528488159179688,grad_norm: 0.8024350915680943, iteration: 462535
loss: 1.0155386924743652,grad_norm: 0.9210085537311274, iteration: 462536
loss: 1.0775456428527832,grad_norm: 0.8151427795570438, iteration: 462537
loss: 0.9700043797492981,grad_norm: 0.7450029794669677, iteration: 462538
loss: 1.0377155542373657,grad_norm: 0.9999991017072567, iteration: 462539
loss: 1.03327214717865,grad_norm: 0.8925103146267024, iteration: 462540
loss: 1.0026769638061523,grad_norm: 0.7874321561143108, iteration: 462541
loss: 1.05359947681427,grad_norm: 0.9999996698173281, iteration: 462542
loss: 0.9946724772453308,grad_norm: 0.6370681027158329, iteration: 462543
loss: 1.0236777067184448,grad_norm: 0.9042194733779566, iteration: 462544
loss: 0.9994454383850098,grad_norm: 0.6770111870811233, iteration: 462545
loss: 0.9860150814056396,grad_norm: 0.8320655034519823, iteration: 462546
loss: 1.0746009349822998,grad_norm: 0.8305477423962453, iteration: 462547
loss: 1.0840411186218262,grad_norm: 0.9999992734869853, iteration: 462548
loss: 0.9859390258789062,grad_norm: 0.8520449384281971, iteration: 462549
loss: 0.9881895184516907,grad_norm: 0.6472264037476368, iteration: 462550
loss: 1.0277998447418213,grad_norm: 0.9329991342719176, iteration: 462551
loss: 1.0246070623397827,grad_norm: 0.8144243512091681, iteration: 462552
loss: 1.0093728303909302,grad_norm: 0.9999991286926979, iteration: 462553
loss: 0.9632797241210938,grad_norm: 0.7853142418344038, iteration: 462554
loss: 0.9833627939224243,grad_norm: 0.7795713832032235, iteration: 462555
loss: 1.0088175535202026,grad_norm: 0.7271361955597041, iteration: 462556
loss: 1.1870026588439941,grad_norm: 0.9999998063446613, iteration: 462557
loss: 1.0971742868423462,grad_norm: 0.8523151534877217, iteration: 462558
loss: 1.0694751739501953,grad_norm: 0.9999991638213674, iteration: 462559
loss: 0.9654172658920288,grad_norm: 0.7448840603986825, iteration: 462560
loss: 0.9788211584091187,grad_norm: 0.7682450081954565, iteration: 462561
loss: 1.0255426168441772,grad_norm: 0.7842702943975975, iteration: 462562
loss: 1.0553350448608398,grad_norm: 0.9999994385000677, iteration: 462563
loss: 1.0444438457489014,grad_norm: 0.9032316155695786, iteration: 462564
loss: 1.006858229637146,grad_norm: 0.9999999479399598, iteration: 462565
loss: 0.989105761051178,grad_norm: 0.6574975070671509, iteration: 462566
loss: 0.9951255917549133,grad_norm: 0.6730713361512636, iteration: 462567
loss: 1.026105523109436,grad_norm: 0.8997062104282849, iteration: 462568
loss: 1.0102159976959229,grad_norm: 0.7649283561702924, iteration: 462569
loss: 0.9815797805786133,grad_norm: 0.999999001678241, iteration: 462570
loss: 1.0096553564071655,grad_norm: 0.8445283627709005, iteration: 462571
loss: 1.0677363872528076,grad_norm: 0.999999162525491, iteration: 462572
loss: 0.9776404500007629,grad_norm: 0.999999095524138, iteration: 462573
loss: 1.0015504360198975,grad_norm: 0.7037592542514339, iteration: 462574
loss: 1.0099217891693115,grad_norm: 0.9457977528015006, iteration: 462575
loss: 1.0065219402313232,grad_norm: 0.7484375994164975, iteration: 462576
loss: 1.0060964822769165,grad_norm: 0.7056394833482785, iteration: 462577
loss: 1.0076806545257568,grad_norm: 0.721637944752195, iteration: 462578
loss: 0.985079288482666,grad_norm: 0.814267113532088, iteration: 462579
loss: 0.9812402725219727,grad_norm: 0.7063678117454273, iteration: 462580
loss: 1.0035537481307983,grad_norm: 0.7826672555075637, iteration: 462581
loss: 0.9871650338172913,grad_norm: 0.8076644826963626, iteration: 462582
loss: 1.0084222555160522,grad_norm: 0.9140358291744952, iteration: 462583
loss: 0.9895246028900146,grad_norm: 0.7305721712124111, iteration: 462584
loss: 1.0226752758026123,grad_norm: 0.9999999159846853, iteration: 462585
loss: 0.9867918491363525,grad_norm: 0.8742228736219859, iteration: 462586
loss: 1.015175461769104,grad_norm: 0.8825508907223923, iteration: 462587
loss: 1.1022377014160156,grad_norm: 0.9999997677440574, iteration: 462588
loss: 0.9799246191978455,grad_norm: 0.7839974755937861, iteration: 462589
loss: 1.000977873802185,grad_norm: 0.7398415738177199, iteration: 462590
loss: 1.0226690769195557,grad_norm: 0.7800546645538966, iteration: 462591
loss: 0.9951369166374207,grad_norm: 0.7662341227171056, iteration: 462592
loss: 1.032494306564331,grad_norm: 0.9999989692371641, iteration: 462593
loss: 1.0110989809036255,grad_norm: 0.9883234188449314, iteration: 462594
loss: 0.9920923709869385,grad_norm: 0.806559047394322, iteration: 462595
loss: 1.02653169631958,grad_norm: 0.8158265109564193, iteration: 462596
loss: 1.0235776901245117,grad_norm: 0.9999998953460298, iteration: 462597
loss: 1.053537130355835,grad_norm: 0.7725995448181044, iteration: 462598
loss: 0.9758782982826233,grad_norm: 0.8359236711041991, iteration: 462599
loss: 0.968138575553894,grad_norm: 0.8029134750381961, iteration: 462600
loss: 1.0217524766921997,grad_norm: 0.9999995803500609, iteration: 462601
loss: 0.9646525979042053,grad_norm: 0.8454026792216179, iteration: 462602
loss: 1.1143286228179932,grad_norm: 0.9999990837472426, iteration: 462603
loss: 0.9930335879325867,grad_norm: 0.6264821847027954, iteration: 462604
loss: 1.0066429376602173,grad_norm: 0.9999996108830184, iteration: 462605
loss: 0.9981645345687866,grad_norm: 0.8228563056951163, iteration: 462606
loss: 0.9926974177360535,grad_norm: 0.7116108500692172, iteration: 462607
loss: 1.0427089929580688,grad_norm: 0.9999993828040243, iteration: 462608
loss: 0.9887104630470276,grad_norm: 0.7164224946060507, iteration: 462609
loss: 0.9762457609176636,grad_norm: 0.6609032861850603, iteration: 462610
loss: 1.035149097442627,grad_norm: 0.912685636680653, iteration: 462611
loss: 1.04694664478302,grad_norm: 0.8293654786676321, iteration: 462612
loss: 1.0081863403320312,grad_norm: 0.9999995262633398, iteration: 462613
loss: 0.9554582834243774,grad_norm: 0.7104980422690163, iteration: 462614
loss: 1.0155720710754395,grad_norm: 0.7185866203042613, iteration: 462615
loss: 1.0147444009780884,grad_norm: 0.7987296375647502, iteration: 462616
loss: 0.9926854968070984,grad_norm: 0.7067511118132774, iteration: 462617
loss: 1.0798009634017944,grad_norm: 0.9146577889879345, iteration: 462618
loss: 1.0000144243240356,grad_norm: 0.7627577288016593, iteration: 462619
loss: 1.0241446495056152,grad_norm: 0.9999993435701986, iteration: 462620
loss: 1.1443991661071777,grad_norm: 0.9999998016113634, iteration: 462621
loss: 0.9797627329826355,grad_norm: 0.8216928746477213, iteration: 462622
loss: 1.0373501777648926,grad_norm: 0.9999999563059162, iteration: 462623
loss: 1.0070277452468872,grad_norm: 0.7576683581931544, iteration: 462624
loss: 1.0042895078659058,grad_norm: 0.8820939475275509, iteration: 462625
loss: 0.9747968912124634,grad_norm: 0.6725854975665667, iteration: 462626
loss: 1.148270845413208,grad_norm: 0.9999995369029678, iteration: 462627
loss: 0.9964970946311951,grad_norm: 0.7210917620704324, iteration: 462628
loss: 1.0023375749588013,grad_norm: 0.8499421600474222, iteration: 462629
loss: 1.0386959314346313,grad_norm: 0.9999990725401604, iteration: 462630
loss: 0.9925188422203064,grad_norm: 0.755267193926798, iteration: 462631
loss: 0.9615879654884338,grad_norm: 0.6938835776157865, iteration: 462632
loss: 1.0420219898223877,grad_norm: 0.999999547808777, iteration: 462633
loss: 0.9918414354324341,grad_norm: 0.8227475916216721, iteration: 462634
loss: 0.9838194251060486,grad_norm: 0.7536169188322773, iteration: 462635
loss: 1.0157663822174072,grad_norm: 0.7192494735188182, iteration: 462636
loss: 0.9995457530021667,grad_norm: 0.807933593669225, iteration: 462637
loss: 0.960432231426239,grad_norm: 0.6416143056753378, iteration: 462638
loss: 1.030267596244812,grad_norm: 0.9999994376922203, iteration: 462639
loss: 1.0022649765014648,grad_norm: 0.7764107838358563, iteration: 462640
loss: 0.9882310628890991,grad_norm: 0.8464446923402433, iteration: 462641
loss: 1.0158007144927979,grad_norm: 0.8417448410594436, iteration: 462642
loss: 0.9813594222068787,grad_norm: 0.6043361322067089, iteration: 462643
loss: 0.995781421661377,grad_norm: 0.8116294228938937, iteration: 462644
loss: 1.0245190858840942,grad_norm: 0.7820967266887457, iteration: 462645
loss: 1.0532491207122803,grad_norm: 0.6626838434037855, iteration: 462646
loss: 0.9483593702316284,grad_norm: 0.7745884506291725, iteration: 462647
loss: 0.9941438436508179,grad_norm: 0.7725433624462094, iteration: 462648
loss: 1.0217169523239136,grad_norm: 0.7935366891502322, iteration: 462649
loss: 1.0542471408843994,grad_norm: 0.9999998258570131, iteration: 462650
loss: 0.9998589158058167,grad_norm: 0.6100778157530237, iteration: 462651
loss: 0.9890605807304382,grad_norm: 0.7033312213448637, iteration: 462652
loss: 1.0169984102249146,grad_norm: 0.6758254599515846, iteration: 462653
loss: 1.0878854990005493,grad_norm: 0.9999992067943141, iteration: 462654
loss: 1.0136512517929077,grad_norm: 0.8869539251108802, iteration: 462655
loss: 0.9932517409324646,grad_norm: 0.7676555611132335, iteration: 462656
loss: 0.9987866878509521,grad_norm: 0.7789954829528701, iteration: 462657
loss: 1.0065308809280396,grad_norm: 0.7739274856194445, iteration: 462658
loss: 1.0653682947158813,grad_norm: 0.9999998782981414, iteration: 462659
loss: 0.994675874710083,grad_norm: 0.9999991336568146, iteration: 462660
loss: 1.037095546722412,grad_norm: 0.9677309855067405, iteration: 462661
loss: 1.0388200283050537,grad_norm: 0.7283369915302457, iteration: 462662
loss: 1.0336668491363525,grad_norm: 0.9999996514677695, iteration: 462663
loss: 0.9858977794647217,grad_norm: 0.7645090333181447, iteration: 462664
loss: 0.994221568107605,grad_norm: 0.7324424857282344, iteration: 462665
loss: 1.0274543762207031,grad_norm: 0.721280755101972, iteration: 462666
loss: 1.0360007286071777,grad_norm: 0.6195004994082836, iteration: 462667
loss: 1.0399144887924194,grad_norm: 0.9999998493413687, iteration: 462668
loss: 1.014542818069458,grad_norm: 0.8194292750345478, iteration: 462669
loss: 0.9961402416229248,grad_norm: 0.9307535602383745, iteration: 462670
loss: 1.001127004623413,grad_norm: 0.7655371348779962, iteration: 462671
loss: 0.9850630760192871,grad_norm: 0.6992300819439363, iteration: 462672
loss: 0.9648619890213013,grad_norm: 0.8139071309041901, iteration: 462673
loss: 1.043468713760376,grad_norm: 0.7917513417394966, iteration: 462674
loss: 1.1348882913589478,grad_norm: 0.9999993012403734, iteration: 462675
loss: 1.029102087020874,grad_norm: 0.9062126580266155, iteration: 462676
loss: 0.9893413186073303,grad_norm: 0.9944976213720014, iteration: 462677
loss: 1.0333210229873657,grad_norm: 0.8090890310614005, iteration: 462678
loss: 1.0292390584945679,grad_norm: 0.8571633006413677, iteration: 462679
loss: 1.0311251878738403,grad_norm: 0.9999993466365502, iteration: 462680
loss: 1.063429355621338,grad_norm: 0.9999994687890652, iteration: 462681
loss: 1.0023316144943237,grad_norm: 0.6030812252931552, iteration: 462682
loss: 0.981810986995697,grad_norm: 0.8192889732064547, iteration: 462683
loss: 0.9642911553382874,grad_norm: 0.8023867335851749, iteration: 462684
loss: 0.9916859269142151,grad_norm: 0.6719365526085759, iteration: 462685
loss: 0.9847326874732971,grad_norm: 0.7404439754587239, iteration: 462686
loss: 1.0179882049560547,grad_norm: 0.6941372652086377, iteration: 462687
loss: 1.0296344757080078,grad_norm: 0.9999993134441599, iteration: 462688
loss: 1.0073906183242798,grad_norm: 0.7610143104584094, iteration: 462689
loss: 1.0067553520202637,grad_norm: 0.7502163801939182, iteration: 462690
loss: 0.9829921722412109,grad_norm: 0.9999993437996736, iteration: 462691
loss: 1.0382944345474243,grad_norm: 0.9999998620235224, iteration: 462692
loss: 0.9936233758926392,grad_norm: 0.7616340565547031, iteration: 462693
loss: 1.0253939628601074,grad_norm: 0.9999998749573692, iteration: 462694
loss: 1.0631170272827148,grad_norm: 0.9458441743052652, iteration: 462695
loss: 0.9854245185852051,grad_norm: 0.8663289359849042, iteration: 462696
loss: 1.005100965499878,grad_norm: 0.6572525086027362, iteration: 462697
loss: 1.003804326057434,grad_norm: 0.8265639927172055, iteration: 462698
loss: 0.9967043399810791,grad_norm: 0.7820353759324641, iteration: 462699
loss: 1.0106077194213867,grad_norm: 0.7802708539597103, iteration: 462700
loss: 1.000974178314209,grad_norm: 0.6417423423950219, iteration: 462701
loss: 0.9844664931297302,grad_norm: 0.75022435355396, iteration: 462702
loss: 1.0048954486846924,grad_norm: 0.7690417601949164, iteration: 462703
loss: 1.0153166055679321,grad_norm: 0.8296595007089584, iteration: 462704
loss: 0.9675562381744385,grad_norm: 0.8263866707556298, iteration: 462705
loss: 0.9754745364189148,grad_norm: 0.9474863888004161, iteration: 462706
loss: 1.038846492767334,grad_norm: 0.9999995838909701, iteration: 462707
loss: 0.9956174492835999,grad_norm: 0.8966915465663783, iteration: 462708
loss: 1.0216052532196045,grad_norm: 0.8730762118100485, iteration: 462709
loss: 1.0195350646972656,grad_norm: 0.8587128915994432, iteration: 462710
loss: 1.0240654945373535,grad_norm: 0.8803945365252301, iteration: 462711
loss: 1.0177878141403198,grad_norm: 0.8184677515366132, iteration: 462712
loss: 0.9939987063407898,grad_norm: 0.7929944946169837, iteration: 462713
loss: 1.000590443611145,grad_norm: 0.8310038652668253, iteration: 462714
loss: 0.9535305500030518,grad_norm: 0.7563679035026709, iteration: 462715
loss: 0.9507631063461304,grad_norm: 0.7264712086084713, iteration: 462716
loss: 0.9373298287391663,grad_norm: 0.7344214051730369, iteration: 462717
loss: 1.030659794807434,grad_norm: 0.999999106016766, iteration: 462718
loss: 1.02226984500885,grad_norm: 0.8005441246931471, iteration: 462719
loss: 1.0334978103637695,grad_norm: 0.9999998895018545, iteration: 462720
loss: 1.0385140180587769,grad_norm: 0.9999995044347318, iteration: 462721
loss: 1.0310848951339722,grad_norm: 0.7833282498643858, iteration: 462722
loss: 0.9876636266708374,grad_norm: 0.765333822304899, iteration: 462723
loss: 0.9712201952934265,grad_norm: 0.6994884368432985, iteration: 462724
loss: 1.0409566164016724,grad_norm: 0.9999994613803022, iteration: 462725
loss: 0.9793040156364441,grad_norm: 0.7460537702501859, iteration: 462726
loss: 0.9871704578399658,grad_norm: 0.7215850110458584, iteration: 462727
loss: 0.9921713471412659,grad_norm: 0.677066035584527, iteration: 462728
loss: 1.014306664466858,grad_norm: 0.8420116025116168, iteration: 462729
loss: 0.9829587340354919,grad_norm: 0.6137751200259682, iteration: 462730
loss: 0.9928446412086487,grad_norm: 0.5917051467635233, iteration: 462731
loss: 1.035471796989441,grad_norm: 0.9023580292425737, iteration: 462732
loss: 0.9702549576759338,grad_norm: 0.8981674423921752, iteration: 462733
loss: 1.0135703086853027,grad_norm: 0.6974639848715282, iteration: 462734
loss: 0.9889726638793945,grad_norm: 0.7871331822404238, iteration: 462735
loss: 1.0038702487945557,grad_norm: 0.8299833952442418, iteration: 462736
loss: 1.04056978225708,grad_norm: 0.9183212066446605, iteration: 462737
loss: 1.0492364168167114,grad_norm: 0.9999998315157915, iteration: 462738
loss: 1.0062657594680786,grad_norm: 0.9999995882099219, iteration: 462739
loss: 0.9751467704772949,grad_norm: 0.780789191841935, iteration: 462740
loss: 0.9931657314300537,grad_norm: 0.9999997755670903, iteration: 462741
loss: 1.0223978757858276,grad_norm: 0.7603204335228413, iteration: 462742
loss: 1.021983027458191,grad_norm: 0.9999997568648642, iteration: 462743
loss: 1.031628966331482,grad_norm: 0.9999996249431682, iteration: 462744
loss: 0.9772478938102722,grad_norm: 0.7829919640909487, iteration: 462745
loss: 0.9836550951004028,grad_norm: 0.8145334207138774, iteration: 462746
loss: 0.9419285655021667,grad_norm: 0.7852482858802191, iteration: 462747
loss: 0.9899448752403259,grad_norm: 0.8883872285773444, iteration: 462748
loss: 1.0137232542037964,grad_norm: 0.7250440857437613, iteration: 462749
loss: 0.9653974771499634,grad_norm: 0.7314471920647744, iteration: 462750
loss: 0.9824087619781494,grad_norm: 0.6924158563425473, iteration: 462751
loss: 1.0349916219711304,grad_norm: 0.824669725981869, iteration: 462752
loss: 0.9962223768234253,grad_norm: 0.7347266949800333, iteration: 462753
loss: 1.0026812553405762,grad_norm: 0.6917407509391224, iteration: 462754
loss: 1.0355299711227417,grad_norm: 0.9999993881068278, iteration: 462755
loss: 0.9589356780052185,grad_norm: 0.8056159827987214, iteration: 462756
loss: 1.0293744802474976,grad_norm: 0.7324173465794422, iteration: 462757
loss: 1.0443929433822632,grad_norm: 0.7979916950167556, iteration: 462758
loss: 1.012478232383728,grad_norm: 0.6489203768177232, iteration: 462759
loss: 1.0228718519210815,grad_norm: 0.8795482120067294, iteration: 462760
loss: 1.0333501100540161,grad_norm: 0.9999999001065365, iteration: 462761
loss: 1.0108023881912231,grad_norm: 0.8688619857626937, iteration: 462762
loss: 1.0073412656784058,grad_norm: 0.8497572661404389, iteration: 462763
loss: 1.0079452991485596,grad_norm: 0.9871859397029371, iteration: 462764
loss: 1.0901609659194946,grad_norm: 0.9999998097544991, iteration: 462765
loss: 1.053000569343567,grad_norm: 0.6817941691208611, iteration: 462766
loss: 0.9838714003562927,grad_norm: 0.8267271594036671, iteration: 462767
loss: 1.002076268196106,grad_norm: 0.9433004384018407, iteration: 462768
loss: 1.047078013420105,grad_norm: 0.7383007518091684, iteration: 462769
loss: 0.9801129102706909,grad_norm: 0.7795329068136336, iteration: 462770
loss: 0.9838029742240906,grad_norm: 0.8111076866393903, iteration: 462771
loss: 1.0128306150436401,grad_norm: 0.9999995547243827, iteration: 462772
loss: 0.9714695811271667,grad_norm: 0.8165843934284222, iteration: 462773
loss: 0.9845839738845825,grad_norm: 0.7856143855253894, iteration: 462774
loss: 0.9685916304588318,grad_norm: 0.6919957881030168, iteration: 462775
loss: 0.997901976108551,grad_norm: 0.6593110102851736, iteration: 462776
loss: 1.0408114194869995,grad_norm: 0.9999991387170889, iteration: 462777
loss: 1.0061405897140503,grad_norm: 0.7013586131299585, iteration: 462778
loss: 1.0038148164749146,grad_norm: 0.9218419276223309, iteration: 462779
loss: 1.0103517770767212,grad_norm: 0.8573747083700421, iteration: 462780
loss: 0.9956654906272888,grad_norm: 0.9423745765221455, iteration: 462781
loss: 1.0006444454193115,grad_norm: 0.7702086002104716, iteration: 462782
loss: 0.9817450642585754,grad_norm: 0.7518043161935922, iteration: 462783
loss: 0.9897230267524719,grad_norm: 0.7957034250066783, iteration: 462784
loss: 1.0141010284423828,grad_norm: 0.6530646208943836, iteration: 462785
loss: 0.9254187345504761,grad_norm: 0.8173322510794493, iteration: 462786
loss: 0.9925210475921631,grad_norm: 0.7834539036237619, iteration: 462787
loss: 1.1368814706802368,grad_norm: 1.0000000023906015, iteration: 462788
loss: 1.0318740606307983,grad_norm: 0.7761628908505354, iteration: 462789
loss: 0.9983024001121521,grad_norm: 0.661564475810566, iteration: 462790
loss: 1.012087106704712,grad_norm: 0.8754661819431199, iteration: 462791
loss: 1.0167502164840698,grad_norm: 0.9999991785784472, iteration: 462792
loss: 1.0196397304534912,grad_norm: 0.8104265523875932, iteration: 462793
loss: 1.0260071754455566,grad_norm: 0.8229768723018681, iteration: 462794
loss: 0.999203622341156,grad_norm: 0.843569299060708, iteration: 462795
loss: 0.995093584060669,grad_norm: 0.6941343396306675, iteration: 462796
loss: 1.0104118585586548,grad_norm: 0.8145625006987905, iteration: 462797
loss: 0.9914395213127136,grad_norm: 0.7138947116017796, iteration: 462798
loss: 0.9975051879882812,grad_norm: 0.7131656591040915, iteration: 462799
loss: 0.9897012710571289,grad_norm: 0.809514723857009, iteration: 462800
loss: 0.9941157698631287,grad_norm: 0.5993423258869324, iteration: 462801
loss: 1.0224257707595825,grad_norm: 0.7278976436960698, iteration: 462802
loss: 1.0086588859558105,grad_norm: 0.9999991096245321, iteration: 462803
loss: 1.0372858047485352,grad_norm: 0.797133887748714, iteration: 462804
loss: 1.0456231832504272,grad_norm: 0.9999990524804724, iteration: 462805
loss: 0.9791185855865479,grad_norm: 0.9538006494807773, iteration: 462806
loss: 1.0017671585083008,grad_norm: 0.7523777575623304, iteration: 462807
loss: 1.005621075630188,grad_norm: 0.805957727431503, iteration: 462808
loss: 0.9642916321754456,grad_norm: 0.691348141736294, iteration: 462809
loss: 1.0971415042877197,grad_norm: 0.9999997365984987, iteration: 462810
loss: 0.9788609743118286,grad_norm: 0.7874783597125475, iteration: 462811
loss: 1.0320942401885986,grad_norm: 0.999999356310745, iteration: 462812
loss: 1.02231764793396,grad_norm: 0.7510625007314652, iteration: 462813
loss: 1.0694764852523804,grad_norm: 0.9999991954817559, iteration: 462814
loss: 1.036350965499878,grad_norm: 0.858765466338585, iteration: 462815
loss: 0.9806969165802002,grad_norm: 0.9438483584142944, iteration: 462816
loss: 1.001084566116333,grad_norm: 0.6830426727531331, iteration: 462817
loss: 0.9829854369163513,grad_norm: 0.6895111031936828, iteration: 462818
loss: 1.0627089738845825,grad_norm: 0.9999999214700457, iteration: 462819
loss: 0.980591893196106,grad_norm: 0.7412032493696008, iteration: 462820
loss: 1.039884328842163,grad_norm: 0.9239843395804753, iteration: 462821
loss: 0.9733597636222839,grad_norm: 0.6910484159840287, iteration: 462822
loss: 0.9798980355262756,grad_norm: 0.7115376062165532, iteration: 462823
loss: 1.0183364152908325,grad_norm: 0.844282084000332, iteration: 462824
loss: 1.0679116249084473,grad_norm: 0.8842218332560672, iteration: 462825
loss: 1.0082461833953857,grad_norm: 0.6889491353059277, iteration: 462826
loss: 0.9788336753845215,grad_norm: 0.7771578546594107, iteration: 462827
loss: 1.0244731903076172,grad_norm: 1.0000000442343597, iteration: 462828
loss: 1.0684442520141602,grad_norm: 0.9999999222974857, iteration: 462829
loss: 0.97795569896698,grad_norm: 0.7268262480145451, iteration: 462830
loss: 1.0246385335922241,grad_norm: 0.9323561309811125, iteration: 462831
loss: 0.9877025485038757,grad_norm: 0.7548541253501563, iteration: 462832
loss: 1.0122743844985962,grad_norm: 0.8662880240167193, iteration: 462833
loss: 0.9663640260696411,grad_norm: 0.8504427508566994, iteration: 462834
loss: 1.0069243907928467,grad_norm: 0.7324878114625548, iteration: 462835
loss: 1.0586506128311157,grad_norm: 0.9094371969154239, iteration: 462836
loss: 1.0293394327163696,grad_norm: 0.7736608214524717, iteration: 462837
loss: 1.0016170740127563,grad_norm: 0.8585996888970903, iteration: 462838
loss: 0.9869568943977356,grad_norm: 0.939013111135517, iteration: 462839
loss: 0.9999202489852905,grad_norm: 0.8336557983448061, iteration: 462840
loss: 0.9907053112983704,grad_norm: 0.6846715428979538, iteration: 462841
loss: 1.0031541585922241,grad_norm: 0.6108505942095643, iteration: 462842
loss: 0.9935300946235657,grad_norm: 0.6485459605662227, iteration: 462843
loss: 0.9826305508613586,grad_norm: 0.6731095574120379, iteration: 462844
loss: 0.9347371459007263,grad_norm: 0.7486969678765484, iteration: 462845
loss: 1.0357639789581299,grad_norm: 0.9999998667639755, iteration: 462846
loss: 1.0222015380859375,grad_norm: 0.7667577824925371, iteration: 462847
loss: 0.998075544834137,grad_norm: 0.9999994021380485, iteration: 462848
loss: 0.9561140537261963,grad_norm: 0.7923265496138154, iteration: 462849
loss: 1.0548818111419678,grad_norm: 0.9999999373373124, iteration: 462850
loss: 1.019005537033081,grad_norm: 0.9999998599695337, iteration: 462851
loss: 0.983676552772522,grad_norm: 0.8018222648563468, iteration: 462852
loss: 0.9816542863845825,grad_norm: 0.8285986645723491, iteration: 462853
loss: 0.9950945377349854,grad_norm: 0.6712603722053162, iteration: 462854
loss: 1.0818697214126587,grad_norm: 0.9999991748697103, iteration: 462855
loss: 1.0439351797103882,grad_norm: 0.7714299227348974, iteration: 462856
loss: 0.9891132116317749,grad_norm: 0.6526225662275178, iteration: 462857
loss: 0.9571888446807861,grad_norm: 0.798605407400485, iteration: 462858
loss: 0.9658204913139343,grad_norm: 0.7570958117133871, iteration: 462859
loss: 0.9879963994026184,grad_norm: 0.6858357787409624, iteration: 462860
loss: 0.9960643649101257,grad_norm: 0.6407522564790783, iteration: 462861
loss: 1.0038834810256958,grad_norm: 0.6957700370203634, iteration: 462862
loss: 1.0100204944610596,grad_norm: 0.8194187985780282, iteration: 462863
loss: 0.9756803512573242,grad_norm: 0.6508551528472827, iteration: 462864
loss: 1.0403045415878296,grad_norm: 0.840366945368147, iteration: 462865
loss: 0.9210277199745178,grad_norm: 0.8219878385295235, iteration: 462866
loss: 1.024143099784851,grad_norm: 0.9999990252586484, iteration: 462867
loss: 1.0447999238967896,grad_norm: 0.8636041328999765, iteration: 462868
loss: 0.9789999127388,grad_norm: 0.7536548650134074, iteration: 462869
loss: 1.0231881141662598,grad_norm: 0.9044538523445732, iteration: 462870
loss: 1.0063539743423462,grad_norm: 0.8826924743659132, iteration: 462871
loss: 0.9825011491775513,grad_norm: 0.6707304835118393, iteration: 462872
loss: 0.9688876867294312,grad_norm: 0.6819156716939471, iteration: 462873
loss: 0.9992839097976685,grad_norm: 0.7044304156176843, iteration: 462874
loss: 1.0035957098007202,grad_norm: 0.8977970116635022, iteration: 462875
loss: 1.0095585584640503,grad_norm: 0.8465599139555497, iteration: 462876
loss: 0.9998244047164917,grad_norm: 0.8536202067673181, iteration: 462877
loss: 0.986445426940918,grad_norm: 0.7386096589719454, iteration: 462878
loss: 1.0185346603393555,grad_norm: 0.9639583032328048, iteration: 462879
loss: 1.050400733947754,grad_norm: 0.999999886285553, iteration: 462880
loss: 0.9626083970069885,grad_norm: 0.809686865956433, iteration: 462881
loss: 0.9875357747077942,grad_norm: 0.7414904476241101, iteration: 462882
loss: 1.0614675283432007,grad_norm: 0.999999245507524, iteration: 462883
loss: 1.0186084508895874,grad_norm: 0.5885977074318128, iteration: 462884
loss: 0.9934422373771667,grad_norm: 0.8272051593105699, iteration: 462885
loss: 0.9812477231025696,grad_norm: 0.9390646411097998, iteration: 462886
loss: 1.0160514116287231,grad_norm: 0.9999991391062308, iteration: 462887
loss: 1.0034080743789673,grad_norm: 0.6955370657172113, iteration: 462888
loss: 1.0191067457199097,grad_norm: 0.7710106358786821, iteration: 462889
loss: 1.0286316871643066,grad_norm: 0.7895424446679713, iteration: 462890
loss: 1.0606036186218262,grad_norm: 0.7631979126933884, iteration: 462891
loss: 0.9925293326377869,grad_norm: 0.8704655457145435, iteration: 462892
loss: 1.015880823135376,grad_norm: 0.7969627164868217, iteration: 462893
loss: 0.9928140044212341,grad_norm: 0.8228417990366627, iteration: 462894
loss: 1.004793405532837,grad_norm: 0.6983018286068339, iteration: 462895
loss: 1.011954426765442,grad_norm: 0.9999996758293612, iteration: 462896
loss: 1.0627037286758423,grad_norm: 0.9999999787291306, iteration: 462897
loss: 0.9826294183731079,grad_norm: 0.8654698602017458, iteration: 462898
loss: 1.033331036567688,grad_norm: 0.875643620210943, iteration: 462899
loss: 1.033074975013733,grad_norm: 0.9510796927239535, iteration: 462900
loss: 0.9980528950691223,grad_norm: 0.8493898281786283, iteration: 462901
loss: 0.9799531698226929,grad_norm: 0.8689556305912446, iteration: 462902
loss: 0.9712545275688171,grad_norm: 0.7243456608394688, iteration: 462903
loss: 0.9921153783798218,grad_norm: 0.8223058666028634, iteration: 462904
loss: 0.9851203560829163,grad_norm: 0.8625571434042939, iteration: 462905
loss: 1.0212721824645996,grad_norm: 0.9137413974299843, iteration: 462906
loss: 0.9461435675621033,grad_norm: 0.743252886419088, iteration: 462907
loss: 1.0635478496551514,grad_norm: 0.7083543266790079, iteration: 462908
loss: 1.0238664150238037,grad_norm: 0.7119870967440385, iteration: 462909
loss: 0.9920758605003357,grad_norm: 0.7172739408302093, iteration: 462910
loss: 0.9926573038101196,grad_norm: 0.9999993194633712, iteration: 462911
loss: 0.9974731206893921,grad_norm: 0.7904461212352835, iteration: 462912
loss: 0.9954084753990173,grad_norm: 0.8014561530428277, iteration: 462913
loss: 0.9543269872665405,grad_norm: 0.7760046206245317, iteration: 462914
loss: 1.0960910320281982,grad_norm: 0.7155540576516337, iteration: 462915
loss: 1.0276721715927124,grad_norm: 0.7057540137191731, iteration: 462916
loss: 1.0075558423995972,grad_norm: 0.6803180465245453, iteration: 462917
loss: 0.9836884140968323,grad_norm: 0.8846923055336642, iteration: 462918
loss: 0.9663633108139038,grad_norm: 0.9999994582646284, iteration: 462919
loss: 1.0189003944396973,grad_norm: 0.999998999257826, iteration: 462920
loss: 0.9576334953308105,grad_norm: 0.6983266051821712, iteration: 462921
loss: 1.0084805488586426,grad_norm: 0.6361278275789459, iteration: 462922
loss: 0.9893532395362854,grad_norm: 0.8287135722297304, iteration: 462923
loss: 0.9847676753997803,grad_norm: 0.828529469262979, iteration: 462924
loss: 1.0308369398117065,grad_norm: 0.7380678453247497, iteration: 462925
loss: 1.0464738607406616,grad_norm: 0.7624193280642226, iteration: 462926
loss: 1.0655786991119385,grad_norm: 0.8253828414112887, iteration: 462927
loss: 0.9987234473228455,grad_norm: 0.6045294948324643, iteration: 462928
loss: 0.9959125518798828,grad_norm: 0.8867481411889147, iteration: 462929
loss: 1.0427175760269165,grad_norm: 0.9999994812004861, iteration: 462930
loss: 0.9884735345840454,grad_norm: 0.940564534268557, iteration: 462931
loss: 0.9445791840553284,grad_norm: 0.7836369404269294, iteration: 462932
loss: 0.9921429753303528,grad_norm: 0.9334297600350733, iteration: 462933
loss: 1.0325433015823364,grad_norm: 0.9999997518360514, iteration: 462934
loss: 1.0354794263839722,grad_norm: 0.831581085364115, iteration: 462935
loss: 1.0039509534835815,grad_norm: 0.7938979560940757, iteration: 462936
loss: 1.0369668006896973,grad_norm: 0.6510561093557803, iteration: 462937
loss: 0.9909666180610657,grad_norm: 0.6751921797658236, iteration: 462938
loss: 1.0358514785766602,grad_norm: 0.7271249480655795, iteration: 462939
loss: 0.9927722811698914,grad_norm: 0.9347754371820799, iteration: 462940
loss: 0.9934483170509338,grad_norm: 0.8113889119662471, iteration: 462941
loss: 1.0185024738311768,grad_norm: 0.8264047207204467, iteration: 462942
loss: 0.9528586864471436,grad_norm: 0.7262416169960481, iteration: 462943
loss: 1.043225884437561,grad_norm: 0.9999998247792949, iteration: 462944
loss: 1.0005536079406738,grad_norm: 0.6089625389351352, iteration: 462945
loss: 0.9978310465812683,grad_norm: 0.782310392618567, iteration: 462946
loss: 1.0063731670379639,grad_norm: 0.5978889639053551, iteration: 462947
loss: 1.0094139575958252,grad_norm: 0.7633715252520445, iteration: 462948
loss: 0.9695862531661987,grad_norm: 0.9164238098075771, iteration: 462949
loss: 1.019989252090454,grad_norm: 0.7953649182980904, iteration: 462950
loss: 0.9379379749298096,grad_norm: 0.7525574844009741, iteration: 462951
loss: 1.025596261024475,grad_norm: 0.7848621258971975, iteration: 462952
loss: 1.027920126914978,grad_norm: 0.863937776036171, iteration: 462953
loss: 1.0339924097061157,grad_norm: 0.6475223542080049, iteration: 462954
loss: 1.1972026824951172,grad_norm: 0.9999991974284058, iteration: 462955
loss: 0.9831274151802063,grad_norm: 0.8317168953174287, iteration: 462956
loss: 1.023437738418579,grad_norm: 0.7633590263090072, iteration: 462957
loss: 1.015207290649414,grad_norm: 0.8288849976474354, iteration: 462958
loss: 0.9933069944381714,grad_norm: 0.6635614217131655, iteration: 462959
loss: 0.9991676211357117,grad_norm: 0.821333544688567, iteration: 462960
loss: 1.0127053260803223,grad_norm: 0.8410359896717557, iteration: 462961
loss: 0.9970263838768005,grad_norm: 0.760185251579842, iteration: 462962
loss: 0.9987795352935791,grad_norm: 0.8748160744523177, iteration: 462963
loss: 0.9918461441993713,grad_norm: 0.7102518610185067, iteration: 462964
loss: 1.000732183456421,grad_norm: 0.9109601270336982, iteration: 462965
loss: 0.9744430780410767,grad_norm: 0.8414839086231547, iteration: 462966
loss: 0.9846111536026001,grad_norm: 0.6946826388117892, iteration: 462967
loss: 1.0152238607406616,grad_norm: 0.8376383891446607, iteration: 462968
loss: 0.9716682434082031,grad_norm: 0.8337339077734426, iteration: 462969
loss: 1.0698966979980469,grad_norm: 0.9999994161007495, iteration: 462970
loss: 1.0084264278411865,grad_norm: 0.8131098098868315, iteration: 462971
loss: 0.994807779788971,grad_norm: 0.9332113747820059, iteration: 462972
loss: 0.9942865967750549,grad_norm: 0.7540212681123855, iteration: 462973
loss: 0.9806652069091797,grad_norm: 0.7347159759718401, iteration: 462974
loss: 0.9928737878799438,grad_norm: 0.7391805632383827, iteration: 462975
loss: 1.0404469966888428,grad_norm: 0.8345083408176603, iteration: 462976
loss: 1.063700556755066,grad_norm: 0.7980535768081969, iteration: 462977
loss: 1.006219744682312,grad_norm: 0.8857645233710093, iteration: 462978
loss: 1.083430528640747,grad_norm: 0.75989475453409, iteration: 462979
loss: 1.002088189125061,grad_norm: 0.5793050116155763, iteration: 462980
loss: 1.0439260005950928,grad_norm: 0.7919797814171096, iteration: 462981
loss: 1.0133211612701416,grad_norm: 0.7299939305534634, iteration: 462982
loss: 0.9736204147338867,grad_norm: 0.7990591533277646, iteration: 462983
loss: 0.9789302349090576,grad_norm: 0.9301867823710048, iteration: 462984
loss: 0.9923282265663147,grad_norm: 0.7392464302668754, iteration: 462985
loss: 1.0217902660369873,grad_norm: 0.7852588925135096, iteration: 462986
loss: 1.0369157791137695,grad_norm: 0.7241420622639516, iteration: 462987
loss: 1.0263237953186035,grad_norm: 0.9999990648034622, iteration: 462988
loss: 0.9792993068695068,grad_norm: 0.7313805706485741, iteration: 462989
loss: 0.9941619634628296,grad_norm: 0.6844593785028538, iteration: 462990
loss: 0.9501153826713562,grad_norm: 0.7430196797947365, iteration: 462991
loss: 0.9913344383239746,grad_norm: 0.7411469080393052, iteration: 462992
loss: 1.0452680587768555,grad_norm: 0.8446991010915625, iteration: 462993
loss: 1.02010977268219,grad_norm: 0.8363324211221254, iteration: 462994
loss: 0.9588229656219482,grad_norm: 0.6806286823591343, iteration: 462995
loss: 0.9951158761978149,grad_norm: 0.7596473811895872, iteration: 462996
loss: 1.0043833255767822,grad_norm: 0.8594484720606506, iteration: 462997
loss: 1.0246981382369995,grad_norm: 0.9999994383325609, iteration: 462998
loss: 1.0160510540008545,grad_norm: 0.7723012635834493, iteration: 462999
loss: 1.0114558935165405,grad_norm: 0.7162251851717133, iteration: 463000
loss: 1.0024418830871582,grad_norm: 0.8315844553444622, iteration: 463001
loss: 0.9782664179801941,grad_norm: 0.9999998465391026, iteration: 463002
loss: 0.9991037249565125,grad_norm: 0.7937384655097566, iteration: 463003
loss: 0.9995611906051636,grad_norm: 0.9232888162390643, iteration: 463004
loss: 1.0244868993759155,grad_norm: 0.7422146821864204, iteration: 463005
loss: 0.9906025528907776,grad_norm: 0.7542547700656589, iteration: 463006
loss: 1.0074501037597656,grad_norm: 0.7608394177541563, iteration: 463007
loss: 0.9879249930381775,grad_norm: 0.9524023075152828, iteration: 463008
loss: 0.9738809466362,grad_norm: 0.7607188782406529, iteration: 463009
loss: 1.011290192604065,grad_norm: 0.7467499940497996, iteration: 463010
loss: 0.9790840744972229,grad_norm: 0.6630835530845517, iteration: 463011
loss: 0.9798746109008789,grad_norm: 0.7878196361515165, iteration: 463012
loss: 0.9320416450500488,grad_norm: 0.8277947136218683, iteration: 463013
loss: 0.9885151386260986,grad_norm: 0.7283985368020918, iteration: 463014
loss: 1.0089908838272095,grad_norm: 0.6531059352207751, iteration: 463015
loss: 1.0052212476730347,grad_norm: 0.9175213613210708, iteration: 463016
loss: 1.0214024782180786,grad_norm: 0.7860467917440273, iteration: 463017
loss: 1.0129746198654175,grad_norm: 0.6964864186334407, iteration: 463018
loss: 0.9919875860214233,grad_norm: 0.8289324191751833, iteration: 463019
loss: 0.9906385540962219,grad_norm: 0.7682218203054645, iteration: 463020
loss: 0.9497407674789429,grad_norm: 0.7681124846870703, iteration: 463021
loss: 0.9693649411201477,grad_norm: 0.8232618706444117, iteration: 463022
loss: 1.0167640447616577,grad_norm: 0.9320607268026367, iteration: 463023
loss: 0.9720655679702759,grad_norm: 0.6482720807339006, iteration: 463024
loss: 1.0437102317810059,grad_norm: 0.8537554471810065, iteration: 463025
loss: 0.9396602511405945,grad_norm: 0.7459765740909395, iteration: 463026
loss: 1.0698591470718384,grad_norm: 0.9999996649272161, iteration: 463027
loss: 1.0211939811706543,grad_norm: 0.6891950263718687, iteration: 463028
loss: 0.9817755818367004,grad_norm: 0.7918891293543605, iteration: 463029
loss: 0.9677977561950684,grad_norm: 0.7088707306982227, iteration: 463030
loss: 0.9881713390350342,grad_norm: 0.6409206952771508, iteration: 463031
loss: 1.1657007932662964,grad_norm: 0.9999998695580197, iteration: 463032
loss: 1.0161151885986328,grad_norm: 0.7543884148617779, iteration: 463033
loss: 0.9897242784500122,grad_norm: 0.9999990797879822, iteration: 463034
loss: 1.0355368852615356,grad_norm: 0.8940653135063512, iteration: 463035
loss: 0.9842758178710938,grad_norm: 0.7300361655592628, iteration: 463036
loss: 1.0161405801773071,grad_norm: 0.8377954670844435, iteration: 463037
loss: 1.0243455171585083,grad_norm: 0.7941973822684143, iteration: 463038
loss: 1.0022839307785034,grad_norm: 0.7104219780715055, iteration: 463039
loss: 1.0343647003173828,grad_norm: 0.6173496576821043, iteration: 463040
loss: 1.0213589668273926,grad_norm: 0.777237759671388, iteration: 463041
loss: 1.0104005336761475,grad_norm: 0.7976555000754247, iteration: 463042
loss: 1.0222764015197754,grad_norm: 0.6410350512644846, iteration: 463043
loss: 0.9902827143669128,grad_norm: 0.7743289400188867, iteration: 463044
loss: 1.0052316188812256,grad_norm: 0.6512021968889031, iteration: 463045
loss: 0.9712497591972351,grad_norm: 0.885877347785982, iteration: 463046
loss: 1.0060216188430786,grad_norm: 0.7990125097470546, iteration: 463047
loss: 0.967133641242981,grad_norm: 0.8778601830492301, iteration: 463048
loss: 1.0156618356704712,grad_norm: 0.9999991026903874, iteration: 463049
loss: 1.0181574821472168,grad_norm: 0.6683703224198246, iteration: 463050
loss: 1.0166652202606201,grad_norm: 0.8099043176925104, iteration: 463051
loss: 0.9892332553863525,grad_norm: 0.9843742425554938, iteration: 463052
loss: 1.0170782804489136,grad_norm: 0.82479554578849, iteration: 463053
loss: 1.0337793827056885,grad_norm: 0.6823741236560035, iteration: 463054
loss: 0.9857082962989807,grad_norm: 0.7332759021795501, iteration: 463055
loss: 1.0176116228103638,grad_norm: 0.9999993160414181, iteration: 463056
loss: 1.019665002822876,grad_norm: 0.9172845706755384, iteration: 463057
loss: 0.9854222536087036,grad_norm: 0.8548156527328249, iteration: 463058
loss: 1.0179924964904785,grad_norm: 0.7575349711170266, iteration: 463059
loss: 1.073620080947876,grad_norm: 0.9999999946070931, iteration: 463060
loss: 1.0291271209716797,grad_norm: 0.8706289233792649, iteration: 463061
loss: 0.9880713224411011,grad_norm: 0.6512243594114255, iteration: 463062
loss: 0.9987649917602539,grad_norm: 0.8983554721953657, iteration: 463063
loss: 0.9997759461402893,grad_norm: 0.9999993983495453, iteration: 463064
loss: 0.9923596382141113,grad_norm: 0.9999999398195494, iteration: 463065
loss: 0.9721113443374634,grad_norm: 0.7024198247923162, iteration: 463066
loss: 1.0013494491577148,grad_norm: 0.7124327999056767, iteration: 463067
loss: 0.9927968382835388,grad_norm: 0.7226847778613172, iteration: 463068
loss: 1.022903323173523,grad_norm: 0.8396487062773255, iteration: 463069
loss: 0.9868790507316589,grad_norm: 0.7741218492491609, iteration: 463070
loss: 0.9710869789123535,grad_norm: 0.7614486777412989, iteration: 463071
loss: 1.0642881393432617,grad_norm: 0.7617083492824732, iteration: 463072
loss: 0.9762886166572571,grad_norm: 0.74965521421221, iteration: 463073
loss: 0.9739662408828735,grad_norm: 0.8665523579917235, iteration: 463074
loss: 0.9947210550308228,grad_norm: 0.9213996600957891, iteration: 463075
loss: 1.028846263885498,grad_norm: 0.7633291896436298, iteration: 463076
loss: 1.040769100189209,grad_norm: 0.6936461578666008, iteration: 463077
loss: 0.990852415561676,grad_norm: 0.7282588374193915, iteration: 463078
loss: 0.9850894212722778,grad_norm: 0.8888595288618141, iteration: 463079
loss: 0.9979880452156067,grad_norm: 0.9120994714415427, iteration: 463080
loss: 1.171805739402771,grad_norm: 0.9985248497053727, iteration: 463081
loss: 1.0167524814605713,grad_norm: 0.9551467645149332, iteration: 463082
loss: 1.0291966199874878,grad_norm: 0.8054606031451678, iteration: 463083
loss: 1.0402432680130005,grad_norm: 0.9999992729369694, iteration: 463084
loss: 0.9612541794776917,grad_norm: 0.6701496037705994, iteration: 463085
loss: 0.9914058446884155,grad_norm: 0.9705960783181157, iteration: 463086
loss: 0.9962384700775146,grad_norm: 0.7424604410581966, iteration: 463087
loss: 1.0060088634490967,grad_norm: 0.732045054738932, iteration: 463088
loss: 1.001769781112671,grad_norm: 0.795249750998379, iteration: 463089
loss: 0.9986971020698547,grad_norm: 0.8241454147972068, iteration: 463090
loss: 0.9689820408821106,grad_norm: 0.8011731972666669, iteration: 463091
loss: 1.0113753080368042,grad_norm: 0.7842286189115572, iteration: 463092
loss: 1.0118695497512817,grad_norm: 0.9398645323836663, iteration: 463093
loss: 1.0129055976867676,grad_norm: 0.7575192247202036, iteration: 463094
loss: 0.9654078483581543,grad_norm: 0.7745215174965756, iteration: 463095
loss: 0.9740901589393616,grad_norm: 0.7157445531164993, iteration: 463096
loss: 1.0553593635559082,grad_norm: 0.8385971294713288, iteration: 463097
loss: 1.051185965538025,grad_norm: 0.7566376239306117, iteration: 463098
loss: 1.0490639209747314,grad_norm: 0.999999987023192, iteration: 463099
loss: 0.9605894088745117,grad_norm: 0.9999992006734417, iteration: 463100
loss: 0.9861339330673218,grad_norm: 0.6802347668445917, iteration: 463101
loss: 1.00428307056427,grad_norm: 0.7023562874631781, iteration: 463102
loss: 0.9773320555686951,grad_norm: 0.7741290893567293, iteration: 463103
loss: 1.0205199718475342,grad_norm: 0.9999994516306093, iteration: 463104
loss: 0.9805300235748291,grad_norm: 0.7676396013319434, iteration: 463105
loss: 0.9984790086746216,grad_norm: 0.9999992270568131, iteration: 463106
loss: 1.008581519126892,grad_norm: 0.6383782880391746, iteration: 463107
loss: 1.0653473138809204,grad_norm: 0.9999999195031091, iteration: 463108
loss: 1.030440330505371,grad_norm: 0.9481037486777071, iteration: 463109
loss: 0.9827427268028259,grad_norm: 0.9999991050942681, iteration: 463110
loss: 1.0028159618377686,grad_norm: 0.7172530278077048, iteration: 463111
loss: 0.9633647203445435,grad_norm: 0.7674473930400783, iteration: 463112
loss: 0.9790965914726257,grad_norm: 0.7308086558486887, iteration: 463113
loss: 0.9957627654075623,grad_norm: 0.8005158747127846, iteration: 463114
loss: 1.0507707595825195,grad_norm: 0.7856216218217166, iteration: 463115
loss: 0.9913988709449768,grad_norm: 0.6829782325333305, iteration: 463116
loss: 0.9861468076705933,grad_norm: 0.6561941305399679, iteration: 463117
loss: 1.0141925811767578,grad_norm: 0.9999992302372942, iteration: 463118
loss: 0.9943897128105164,grad_norm: 0.9999997605436804, iteration: 463119
loss: 0.9845712184906006,grad_norm: 0.8411151279017073, iteration: 463120
loss: 0.9964065551757812,grad_norm: 0.7792236595700216, iteration: 463121
loss: 0.9903692007064819,grad_norm: 0.8107504060595556, iteration: 463122
loss: 0.975841760635376,grad_norm: 0.7178711731663976, iteration: 463123
loss: 1.0159202814102173,grad_norm: 0.8383682397906349, iteration: 463124
loss: 1.0204566717147827,grad_norm: 0.8836547208984077, iteration: 463125
loss: 0.974924623966217,grad_norm: 0.7903503914647744, iteration: 463126
loss: 1.0015746355056763,grad_norm: 0.7427543871713771, iteration: 463127
loss: 0.9781093001365662,grad_norm: 0.7606543519010537, iteration: 463128
loss: 1.0026578903198242,grad_norm: 0.6455029342721218, iteration: 463129
loss: 1.0102177858352661,grad_norm: 0.8256052029179779, iteration: 463130
loss: 1.0113518238067627,grad_norm: 0.8586158689268759, iteration: 463131
loss: 1.00246262550354,grad_norm: 0.8523152666352573, iteration: 463132
loss: 0.9836382865905762,grad_norm: 0.7517610937974931, iteration: 463133
loss: 1.0534287691116333,grad_norm: 0.9458279864102928, iteration: 463134
loss: 1.0056577920913696,grad_norm: 0.9447645421059251, iteration: 463135
loss: 0.9813767671585083,grad_norm: 0.7102348417001152, iteration: 463136
loss: 1.013775110244751,grad_norm: 0.7668440329435378, iteration: 463137
loss: 0.9783786535263062,grad_norm: 0.9999991007204364, iteration: 463138
loss: 1.02437424659729,grad_norm: 0.8205526916384912, iteration: 463139
loss: 0.9519450068473816,grad_norm: 0.7645961220790904, iteration: 463140
loss: 0.9587405920028687,grad_norm: 0.7301076654264742, iteration: 463141
loss: 0.9896484613418579,grad_norm: 0.9999992130270113, iteration: 463142
loss: 0.9989210963249207,grad_norm: 0.6542335711152215, iteration: 463143
loss: 1.0120042562484741,grad_norm: 0.8703671444875855, iteration: 463144
loss: 1.0059406757354736,grad_norm: 0.7539838130804601, iteration: 463145
loss: 0.9978192448616028,grad_norm: 0.7341190107149032, iteration: 463146
loss: 0.9841746091842651,grad_norm: 0.9999993647015843, iteration: 463147
loss: 1.0105280876159668,grad_norm: 0.9999995375034064, iteration: 463148
loss: 1.038143515586853,grad_norm: 0.999999151836809, iteration: 463149
loss: 0.9932381510734558,grad_norm: 0.9593666708545597, iteration: 463150
loss: 0.9933790564537048,grad_norm: 0.6935418575948575, iteration: 463151
loss: 0.9722591638565063,grad_norm: 0.7960932269373826, iteration: 463152
loss: 1.0032628774642944,grad_norm: 0.7595233025896729, iteration: 463153
loss: 0.9924878478050232,grad_norm: 0.7384509013985276, iteration: 463154
loss: 0.9743782877922058,grad_norm: 0.9999991005858123, iteration: 463155
loss: 1.009323239326477,grad_norm: 0.9833889518226285, iteration: 463156
loss: 1.007918357849121,grad_norm: 0.6804025445525897, iteration: 463157
loss: 1.0156241655349731,grad_norm: 0.999999464584626, iteration: 463158
loss: 1.0187560319900513,grad_norm: 0.8509844915872765, iteration: 463159
loss: 0.9584711790084839,grad_norm: 0.8289321049460076, iteration: 463160
loss: 0.9817305207252502,grad_norm: 0.7665796961584838, iteration: 463161
loss: 1.0426619052886963,grad_norm: 0.7969445811859256, iteration: 463162
loss: 0.9960748553276062,grad_norm: 0.6351535004535461, iteration: 463163
loss: 1.0126010179519653,grad_norm: 0.632989390006392, iteration: 463164
loss: 1.0020151138305664,grad_norm: 0.6980116451191954, iteration: 463165
loss: 0.9941532611846924,grad_norm: 0.9388335812875224, iteration: 463166
loss: 1.0203919410705566,grad_norm: 0.6225065083999333, iteration: 463167
loss: 0.9669405817985535,grad_norm: 0.7944631401010623, iteration: 463168
loss: 0.9803286790847778,grad_norm: 0.7274893795344688, iteration: 463169
loss: 1.0348317623138428,grad_norm: 0.99999972351234, iteration: 463170
loss: 1.0126250982284546,grad_norm: 0.7493744253268753, iteration: 463171
loss: 1.0209062099456787,grad_norm: 0.78078164906322, iteration: 463172
loss: 1.0102167129516602,grad_norm: 0.9999992447705709, iteration: 463173
loss: 0.9612962603569031,grad_norm: 0.6365828754278993, iteration: 463174
loss: 0.9964389204978943,grad_norm: 0.9999990067334141, iteration: 463175
loss: 0.98259037733078,grad_norm: 0.9999994437222957, iteration: 463176
loss: 0.9777560234069824,grad_norm: 0.7374792443256408, iteration: 463177
loss: 0.9973714351654053,grad_norm: 0.7178348409374173, iteration: 463178
loss: 0.9561317563056946,grad_norm: 0.7879653328528283, iteration: 463179
loss: 1.0235435962677002,grad_norm: 0.8243033725991976, iteration: 463180
loss: 0.9634604454040527,grad_norm: 0.7203209968379255, iteration: 463181
loss: 0.9959968328475952,grad_norm: 0.7641003510514943, iteration: 463182
loss: 1.0196338891983032,grad_norm: 0.7537466568759703, iteration: 463183
loss: 0.9901345372200012,grad_norm: 0.8035304983786649, iteration: 463184
loss: 1.0411311388015747,grad_norm: 0.8086146225339834, iteration: 463185
loss: 1.0185467004776,grad_norm: 0.8204507151348063, iteration: 463186
loss: 0.9863762259483337,grad_norm: 0.8135711370312853, iteration: 463187
loss: 0.9723634123802185,grad_norm: 0.7636507552904231, iteration: 463188
loss: 1.0182712078094482,grad_norm: 0.9999996555497321, iteration: 463189
loss: 1.0117853879928589,grad_norm: 0.7495320768948732, iteration: 463190
loss: 1.0134187936782837,grad_norm: 0.9999999476578876, iteration: 463191
loss: 1.0290229320526123,grad_norm: 0.9999990800895514, iteration: 463192
loss: 1.014176845550537,grad_norm: 0.6982130687827828, iteration: 463193
loss: 1.026728868484497,grad_norm: 0.8166651351142984, iteration: 463194
loss: 0.9541475176811218,grad_norm: 0.7959006268468891, iteration: 463195
loss: 1.045456051826477,grad_norm: 0.9999993313878216, iteration: 463196
loss: 1.0412284135818481,grad_norm: 0.7040906057629348, iteration: 463197
loss: 1.0319620370864868,grad_norm: 0.7859222718233724, iteration: 463198
loss: 1.015796422958374,grad_norm: 0.9999999830469875, iteration: 463199
loss: 0.9756739735603333,grad_norm: 0.6918960450315852, iteration: 463200
loss: 1.0243823528289795,grad_norm: 0.843338723240055, iteration: 463201
loss: 0.9900293946266174,grad_norm: 0.7530704985641534, iteration: 463202
loss: 1.0098472833633423,grad_norm: 0.8569769870799364, iteration: 463203
loss: 0.9736486077308655,grad_norm: 0.7636624828989103, iteration: 463204
loss: 0.9948293566703796,grad_norm: 0.7238602621849931, iteration: 463205
loss: 1.0248136520385742,grad_norm: 0.8120753194324324, iteration: 463206
loss: 1.1169625520706177,grad_norm: 0.9999998951790827, iteration: 463207
loss: 0.983144223690033,grad_norm: 0.8063440175851865, iteration: 463208
loss: 0.982036292552948,grad_norm: 0.8335175130084506, iteration: 463209
loss: 1.0097101926803589,grad_norm: 0.9154187089279355, iteration: 463210
loss: 0.9859614372253418,grad_norm: 0.6928919275799902, iteration: 463211
loss: 1.0105717182159424,grad_norm: 0.83432790564263, iteration: 463212
loss: 1.0473648309707642,grad_norm: 0.6795111738727172, iteration: 463213
loss: 1.027622938156128,grad_norm: 0.6853725296613408, iteration: 463214
loss: 0.9939980506896973,grad_norm: 0.6417290369369345, iteration: 463215
loss: 0.974088191986084,grad_norm: 0.7438527556938113, iteration: 463216
loss: 1.0576496124267578,grad_norm: 0.999999418606527, iteration: 463217
loss: 1.0310393571853638,grad_norm: 0.9999997305493147, iteration: 463218
loss: 1.0063008069992065,grad_norm: 0.7406781273695433, iteration: 463219
loss: 0.9913167357444763,grad_norm: 0.7511883373990025, iteration: 463220
loss: 0.9595694541931152,grad_norm: 0.7666296172984566, iteration: 463221
loss: 0.9957135319709778,grad_norm: 0.7656057228403624, iteration: 463222
loss: 1.0235246419906616,grad_norm: 0.999999510964484, iteration: 463223
loss: 1.0407052040100098,grad_norm: 0.8540458549206298, iteration: 463224
loss: 1.0176138877868652,grad_norm: 0.9760412015846343, iteration: 463225
loss: 0.9629030227661133,grad_norm: 0.744663104516027, iteration: 463226
loss: 1.0394586324691772,grad_norm: 0.9999991250519497, iteration: 463227
loss: 0.9771925806999207,grad_norm: 0.6581290131363425, iteration: 463228
loss: 0.9595900177955627,grad_norm: 0.7171926317299505, iteration: 463229
loss: 0.9787968993186951,grad_norm: 0.681232886924007, iteration: 463230
loss: 0.982914388179779,grad_norm: 0.6174636579979932, iteration: 463231
loss: 1.0059126615524292,grad_norm: 0.8138109641961404, iteration: 463232
loss: 0.9815865159034729,grad_norm: 0.759601655696312, iteration: 463233
loss: 1.007907748222351,grad_norm: 0.7292678501318806, iteration: 463234
loss: 1.011118769645691,grad_norm: 0.7778361563165564, iteration: 463235
loss: 0.9531174898147583,grad_norm: 0.6652736353099343, iteration: 463236
loss: 1.0395426750183105,grad_norm: 0.9999994489262048, iteration: 463237
loss: 1.0017579793930054,grad_norm: 0.9999994693627157, iteration: 463238
loss: 1.0155621767044067,grad_norm: 0.7562539685510237, iteration: 463239
loss: 1.0038635730743408,grad_norm: 0.7327291505615533, iteration: 463240
loss: 1.0192476511001587,grad_norm: 0.8654367609713166, iteration: 463241
loss: 1.007961630821228,grad_norm: 0.7223178695608482, iteration: 463242
loss: 0.9844480752944946,grad_norm: 0.8120351010508239, iteration: 463243
loss: 1.0124194622039795,grad_norm: 0.6536122591990581, iteration: 463244
loss: 1.086254596710205,grad_norm: 0.9999998703648473, iteration: 463245
loss: 1.0141324996948242,grad_norm: 0.9258509634311413, iteration: 463246
loss: 1.027670979499817,grad_norm: 0.9999990735296175, iteration: 463247
loss: 1.0055533647537231,grad_norm: 0.7812193819473036, iteration: 463248
loss: 1.0713557004928589,grad_norm: 0.9999992745984856, iteration: 463249
loss: 0.9905053377151489,grad_norm: 0.6382228162728012, iteration: 463250
loss: 0.979361891746521,grad_norm: 0.6676000268135415, iteration: 463251
loss: 1.0368090867996216,grad_norm: 0.9586113356154276, iteration: 463252
loss: 1.0022715330123901,grad_norm: 0.9999991916432411, iteration: 463253
loss: 1.006552815437317,grad_norm: 0.8376763763461139, iteration: 463254
loss: 0.9556625485420227,grad_norm: 0.6699588342376424, iteration: 463255
loss: 0.996976375579834,grad_norm: 0.7100114442833817, iteration: 463256
loss: 0.9827954173088074,grad_norm: 0.8073462016747258, iteration: 463257
loss: 0.9853557348251343,grad_norm: 0.703550307171718, iteration: 463258
loss: 0.9981706738471985,grad_norm: 0.7244336192486917, iteration: 463259
loss: 0.995714545249939,grad_norm: 0.7219943526214332, iteration: 463260
loss: 0.9985063672065735,grad_norm: 0.9527455106678855, iteration: 463261
loss: 1.0175879001617432,grad_norm: 0.8182274252761678, iteration: 463262
loss: 1.018027901649475,grad_norm: 0.659286142948809, iteration: 463263
loss: 1.0435677766799927,grad_norm: 0.7064982059764706, iteration: 463264
loss: 0.9960485100746155,grad_norm: 0.6700867184914123, iteration: 463265
loss: 1.057594656944275,grad_norm: 0.9999991855033717, iteration: 463266
loss: 0.9867614507675171,grad_norm: 0.7170294707716316, iteration: 463267
loss: 1.041730284690857,grad_norm: 0.9999996858108242, iteration: 463268
loss: 0.9639551043510437,grad_norm: 0.79864387626752, iteration: 463269
loss: 0.9777835011482239,grad_norm: 0.7022445687331546, iteration: 463270
loss: 1.0054099559783936,grad_norm: 0.7839719449725386, iteration: 463271
loss: 0.9687895178794861,grad_norm: 0.7547960255677346, iteration: 463272
loss: 0.9828663468360901,grad_norm: 0.7478666824956495, iteration: 463273
loss: 1.0073336362838745,grad_norm: 0.8158431073417962, iteration: 463274
loss: 0.9937640428543091,grad_norm: 0.71856643959855, iteration: 463275
loss: 0.9947516918182373,grad_norm: 0.625580264449303, iteration: 463276
loss: 0.9946500062942505,grad_norm: 0.9214971384748203, iteration: 463277
loss: 1.0031815767288208,grad_norm: 0.6860557438444312, iteration: 463278
loss: 1.005313515663147,grad_norm: 0.6566835943563751, iteration: 463279
loss: 0.9712182283401489,grad_norm: 0.8113357060869594, iteration: 463280
loss: 0.9757222533226013,grad_norm: 0.8182976751128958, iteration: 463281
loss: 1.0126352310180664,grad_norm: 0.845097868264507, iteration: 463282
loss: 1.0206258296966553,grad_norm: 0.7085735075783247, iteration: 463283
loss: 1.0368280410766602,grad_norm: 0.8669605515584032, iteration: 463284
loss: 1.0592120885849,grad_norm: 0.9999992613226062, iteration: 463285
loss: 0.9953011870384216,grad_norm: 0.7073237125001943, iteration: 463286
loss: 0.9933540225028992,grad_norm: 0.7325444629033425, iteration: 463287
loss: 1.0098274946212769,grad_norm: 0.7037997820413098, iteration: 463288
loss: 0.9932931065559387,grad_norm: 0.7768741551717248, iteration: 463289
loss: 1.001179814338684,grad_norm: 0.6832315201813478, iteration: 463290
loss: 0.9791228175163269,grad_norm: 0.7955337528622117, iteration: 463291
loss: 1.0292648077011108,grad_norm: 0.7871073889498186, iteration: 463292
loss: 0.9888741970062256,grad_norm: 0.8278497237381405, iteration: 463293
loss: 0.9859517216682434,grad_norm: 0.778451621005764, iteration: 463294
loss: 0.9445641040802002,grad_norm: 0.8110654441026346, iteration: 463295
loss: 0.9731660485267639,grad_norm: 0.8341639905461276, iteration: 463296
loss: 0.9841391444206238,grad_norm: 0.7252041752281744, iteration: 463297
loss: 0.9967909455299377,grad_norm: 0.8437707557985545, iteration: 463298
loss: 0.9877461791038513,grad_norm: 0.751745889525547, iteration: 463299
loss: 1.00753915309906,grad_norm: 0.7563385383634506, iteration: 463300
loss: 1.0221136808395386,grad_norm: 0.8941230373794141, iteration: 463301
loss: 1.0475924015045166,grad_norm: 0.9999998869882768, iteration: 463302
loss: 1.0131213665008545,grad_norm: 0.8881481418892913, iteration: 463303
loss: 1.0112268924713135,grad_norm: 0.9112810225083439, iteration: 463304
loss: 0.980695366859436,grad_norm: 0.6423231823815871, iteration: 463305
loss: 1.0314273834228516,grad_norm: 0.7663249550159295, iteration: 463306
loss: 0.9574303030967712,grad_norm: 0.6898401566433188, iteration: 463307
loss: 0.9937835335731506,grad_norm: 0.7573464456594301, iteration: 463308
loss: 1.0062439441680908,grad_norm: 0.8123733073604927, iteration: 463309
loss: 1.0971194505691528,grad_norm: 0.6891712346853367, iteration: 463310
loss: 1.0150803327560425,grad_norm: 0.8801328480106166, iteration: 463311
loss: 0.9950101375579834,grad_norm: 0.820253036600447, iteration: 463312
loss: 0.9785476922988892,grad_norm: 0.736881681035698, iteration: 463313
loss: 0.9973518252372742,grad_norm: 0.8324633605839244, iteration: 463314
loss: 0.9597413539886475,grad_norm: 0.6375631694240405, iteration: 463315
loss: 1.0193523168563843,grad_norm: 0.7768494518987388, iteration: 463316
loss: 0.9984606504440308,grad_norm: 0.8435377449032118, iteration: 463317
loss: 0.9757224917411804,grad_norm: 0.6798401964347093, iteration: 463318
loss: 1.0168685913085938,grad_norm: 0.9019389348170846, iteration: 463319
loss: 1.0080262422561646,grad_norm: 0.9999993465160584, iteration: 463320
loss: 1.0022350549697876,grad_norm: 0.8197459505065925, iteration: 463321
loss: 0.9781999588012695,grad_norm: 0.8816332499546583, iteration: 463322
loss: 1.022918462753296,grad_norm: 0.8011973016268072, iteration: 463323
loss: 0.9829808473587036,grad_norm: 0.7990343376017509, iteration: 463324
loss: 1.0193860530853271,grad_norm: 0.8646848662061242, iteration: 463325
loss: 1.0184706449508667,grad_norm: 0.8081703708690554, iteration: 463326
loss: 1.018911600112915,grad_norm: 0.7653633136207716, iteration: 463327
loss: 0.9988061189651489,grad_norm: 0.7676750919922316, iteration: 463328
loss: 0.9960824251174927,grad_norm: 0.6741212312031997, iteration: 463329
loss: 1.023954153060913,grad_norm: 0.7289656797318053, iteration: 463330
loss: 0.9721423983573914,grad_norm: 0.8041108673519277, iteration: 463331
loss: 1.0055663585662842,grad_norm: 0.8279101808872886, iteration: 463332
loss: 1.0050309896469116,grad_norm: 0.7400068909885303, iteration: 463333
loss: 0.9970166683197021,grad_norm: 0.9999994892803143, iteration: 463334
loss: 1.0254459381103516,grad_norm: 0.7886768143667484, iteration: 463335
loss: 1.0005017518997192,grad_norm: 0.8496729734381647, iteration: 463336
loss: 0.9972058534622192,grad_norm: 0.747321260582843, iteration: 463337
loss: 1.008114218711853,grad_norm: 0.8665178024611001, iteration: 463338
loss: 0.9898742437362671,grad_norm: 0.717539799281793, iteration: 463339
loss: 0.9851059317588806,grad_norm: 0.78199595551877, iteration: 463340
loss: 0.9314149022102356,grad_norm: 0.7233273426297174, iteration: 463341
loss: 1.111464023590088,grad_norm: 0.999999560851963, iteration: 463342
loss: 0.9808008670806885,grad_norm: 0.747993279187475, iteration: 463343
loss: 0.9931955337524414,grad_norm: 0.7548403232229772, iteration: 463344
loss: 0.9979668855667114,grad_norm: 0.999999505288422, iteration: 463345
loss: 0.9965128302574158,grad_norm: 0.865154070805118, iteration: 463346
loss: 0.9794620871543884,grad_norm: 0.9579237293801851, iteration: 463347
loss: 0.9959360361099243,grad_norm: 0.7797358291275874, iteration: 463348
loss: 1.0067721605300903,grad_norm: 0.8446291464121208, iteration: 463349
loss: 1.039455771446228,grad_norm: 0.6938429758747766, iteration: 463350
loss: 1.0587342977523804,grad_norm: 0.8091396003398847, iteration: 463351
loss: 1.0118502378463745,grad_norm: 0.7396288595854369, iteration: 463352
loss: 1.0057871341705322,grad_norm: 0.6784577225624595, iteration: 463353
loss: 0.993780255317688,grad_norm: 0.8536860955861826, iteration: 463354
loss: 0.9686237573623657,grad_norm: 0.7563558291228899, iteration: 463355
loss: 1.0194735527038574,grad_norm: 0.7378482022630035, iteration: 463356
loss: 0.9802072644233704,grad_norm: 0.8856801764669544, iteration: 463357
loss: 0.9988793730735779,grad_norm: 0.8123140509887622, iteration: 463358
loss: 1.0035902261734009,grad_norm: 0.8896198439349944, iteration: 463359
loss: 0.9953861236572266,grad_norm: 0.9442982766851176, iteration: 463360
loss: 1.019966959953308,grad_norm: 0.9999990702274278, iteration: 463361
loss: 0.9417810440063477,grad_norm: 0.7383221020884424, iteration: 463362
loss: 1.013268232345581,grad_norm: 0.7528479412966068, iteration: 463363
loss: 1.0034337043762207,grad_norm: 0.9999992662534057, iteration: 463364
loss: 1.0016905069351196,grad_norm: 0.7591677239374632, iteration: 463365
loss: 1.010094404220581,grad_norm: 0.7425815196893697, iteration: 463366
loss: 1.0191620588302612,grad_norm: 0.8284213435876855, iteration: 463367
loss: 1.0036203861236572,grad_norm: 0.728198472631543, iteration: 463368
loss: 0.9593771696090698,grad_norm: 0.920666394564387, iteration: 463369
loss: 0.9852833151817322,grad_norm: 0.7087309803143522, iteration: 463370
loss: 1.0167906284332275,grad_norm: 0.8259653922816987, iteration: 463371
loss: 1.0096509456634521,grad_norm: 0.6452032982113797, iteration: 463372
loss: 1.0163941383361816,grad_norm: 0.8532453221521182, iteration: 463373
loss: 1.0593057870864868,grad_norm: 0.999999563521778, iteration: 463374
loss: 0.9978435635566711,grad_norm: 0.6721582113256708, iteration: 463375
loss: 0.9815975427627563,grad_norm: 0.7649289549307122, iteration: 463376
loss: 0.9612712860107422,grad_norm: 0.7029055045717942, iteration: 463377
loss: 0.9878917336463928,grad_norm: 0.7662036192469626, iteration: 463378
loss: 1.010597825050354,grad_norm: 0.8057944719960097, iteration: 463379
loss: 0.9830725789070129,grad_norm: 0.6634375479055736, iteration: 463380
loss: 0.9738499522209167,grad_norm: 0.6826911637885381, iteration: 463381
loss: 0.9703963398933411,grad_norm: 0.696611715453584, iteration: 463382
loss: 0.9764113426208496,grad_norm: 0.8687523862648712, iteration: 463383
loss: 1.0266801118850708,grad_norm: 0.9999990362822246, iteration: 463384
loss: 1.0013028383255005,grad_norm: 0.8122914813146568, iteration: 463385
loss: 0.989716112613678,grad_norm: 0.6245818877142898, iteration: 463386
loss: 0.9812915921211243,grad_norm: 0.6338932811284722, iteration: 463387
loss: 0.9851285219192505,grad_norm: 0.7230072113162532, iteration: 463388
loss: 1.013967514038086,grad_norm: 0.7152881346567507, iteration: 463389
loss: 0.9640055298805237,grad_norm: 0.7412174031341, iteration: 463390
loss: 1.0302777290344238,grad_norm: 0.7285858257664717, iteration: 463391
loss: 1.018204927444458,grad_norm: 0.774592612641453, iteration: 463392
loss: 0.9977428913116455,grad_norm: 0.7065463808702891, iteration: 463393
loss: 1.0150461196899414,grad_norm: 0.7799377756519247, iteration: 463394
loss: 0.98465496301651,grad_norm: 0.9999990701369583, iteration: 463395
loss: 1.0012547969818115,grad_norm: 0.7211089135922438, iteration: 463396
loss: 1.2701393365859985,grad_norm: 0.9999995784124002, iteration: 463397
loss: 0.9637835621833801,grad_norm: 0.7155249560584513, iteration: 463398
loss: 1.0126011371612549,grad_norm: 0.8479659564682103, iteration: 463399
loss: 0.991268515586853,grad_norm: 0.8006705612159952, iteration: 463400
loss: 1.0658668279647827,grad_norm: 0.9999998985570593, iteration: 463401
loss: 1.01487398147583,grad_norm: 0.7704852964984502, iteration: 463402
loss: 1.0093882083892822,grad_norm: 0.7007577866916965, iteration: 463403
loss: 0.9544070959091187,grad_norm: 0.7924442685828863, iteration: 463404
loss: 1.08126699924469,grad_norm: 0.9999998935154729, iteration: 463405
loss: 0.9745757579803467,grad_norm: 0.9999991061120865, iteration: 463406
loss: 0.9950507879257202,grad_norm: 0.862206085740654, iteration: 463407
loss: 1.031712293624878,grad_norm: 0.7382831090474635, iteration: 463408
loss: 0.9901596307754517,grad_norm: 0.7186331036740903, iteration: 463409
loss: 1.0990478992462158,grad_norm: 0.9999999543541247, iteration: 463410
loss: 0.9729938507080078,grad_norm: 0.7715095000545927, iteration: 463411
loss: 1.0151753425598145,grad_norm: 0.8278432615402042, iteration: 463412
loss: 0.9809132218360901,grad_norm: 0.8390993673493358, iteration: 463413
loss: 1.1225818395614624,grad_norm: 0.7229212404645567, iteration: 463414
loss: 1.0034407377243042,grad_norm: 0.7339791632159153, iteration: 463415
loss: 1.0086166858673096,grad_norm: 0.6889358993310484, iteration: 463416
loss: 0.980377197265625,grad_norm: 0.7237944940384862, iteration: 463417
loss: 1.0024670362472534,grad_norm: 0.8663808530117973, iteration: 463418
loss: 0.9598502516746521,grad_norm: 0.8754114674048329, iteration: 463419
loss: 1.02761709690094,grad_norm: 0.7187473117208771, iteration: 463420
loss: 0.9747785925865173,grad_norm: 0.7156468226159897, iteration: 463421
loss: 1.0128709077835083,grad_norm: 0.8854972512316657, iteration: 463422
loss: 1.0283085107803345,grad_norm: 0.5505866581359212, iteration: 463423
loss: 1.02629816532135,grad_norm: 0.7472745255239424, iteration: 463424
loss: 1.0072768926620483,grad_norm: 0.6868012164317224, iteration: 463425
loss: 1.0178006887435913,grad_norm: 0.675872440999686, iteration: 463426
loss: 0.9963449239730835,grad_norm: 0.9999997454102076, iteration: 463427
loss: 0.9764573574066162,grad_norm: 0.781440404141269, iteration: 463428
loss: 1.020845890045166,grad_norm: 0.9925373198158598, iteration: 463429
loss: 0.9950262308120728,grad_norm: 0.7497836742912638, iteration: 463430
loss: 1.0083409547805786,grad_norm: 0.8095569223001076, iteration: 463431
loss: 1.0028890371322632,grad_norm: 0.761044710113857, iteration: 463432
loss: 1.0185514688491821,grad_norm: 0.8285268666367771, iteration: 463433
loss: 1.0045360326766968,grad_norm: 0.6313537482416491, iteration: 463434
loss: 0.9974167346954346,grad_norm: 0.6770205197069559, iteration: 463435
loss: 1.0010606050491333,grad_norm: 0.7074175288485255, iteration: 463436
loss: 0.9871596097946167,grad_norm: 0.6731715967502322, iteration: 463437
loss: 0.9709989428520203,grad_norm: 0.7314143568363237, iteration: 463438
loss: 0.9752536416053772,grad_norm: 0.8164219308878288, iteration: 463439
loss: 1.0129320621490479,grad_norm: 0.9999998156166408, iteration: 463440
loss: 1.0244836807250977,grad_norm: 0.8922576119581724, iteration: 463441
loss: 0.9890682101249695,grad_norm: 0.6210545581765147, iteration: 463442
loss: 1.0441296100616455,grad_norm: 0.9999999547803183, iteration: 463443
loss: 1.0384730100631714,grad_norm: 0.7693610148948854, iteration: 463444
loss: 0.9954145550727844,grad_norm: 0.7363240841805925, iteration: 463445
loss: 1.039934754371643,grad_norm: 0.8149678562746983, iteration: 463446
loss: 1.0270837545394897,grad_norm: 0.6916256696185671, iteration: 463447
loss: 0.957553505897522,grad_norm: 0.799484707730999, iteration: 463448
loss: 0.9849017858505249,grad_norm: 0.9203046657814723, iteration: 463449
loss: 0.9799449443817139,grad_norm: 0.9776062748991, iteration: 463450
loss: 1.0014327764511108,grad_norm: 0.7813821478298731, iteration: 463451
loss: 1.0014094114303589,grad_norm: 0.860335151290682, iteration: 463452
loss: 0.990015983581543,grad_norm: 0.7260823019088934, iteration: 463453
loss: 0.9941025972366333,grad_norm: 0.8814234013683462, iteration: 463454
loss: 1.005661964416504,grad_norm: 0.8117628265621967, iteration: 463455
loss: 0.9708261489868164,grad_norm: 0.7919507347686277, iteration: 463456
loss: 0.9941953420639038,grad_norm: 0.999999059014165, iteration: 463457
loss: 1.005794644355774,grad_norm: 0.8556678226424915, iteration: 463458
loss: 1.024951457977295,grad_norm: 0.7093663946927878, iteration: 463459
loss: 0.9605612754821777,grad_norm: 0.8417879067499313, iteration: 463460
loss: 0.9856806993484497,grad_norm: 0.7793407280304444, iteration: 463461
loss: 1.0374253988265991,grad_norm: 0.8926517945434841, iteration: 463462
loss: 1.0251007080078125,grad_norm: 0.9999990614199612, iteration: 463463
loss: 0.9945871233940125,grad_norm: 0.8692351028115574, iteration: 463464
loss: 1.022912621498108,grad_norm: 0.8118354568498046, iteration: 463465
loss: 0.9959629774093628,grad_norm: 0.8776182581340916, iteration: 463466
loss: 0.9881036281585693,grad_norm: 0.7702575172294597, iteration: 463467
loss: 1.0763100385665894,grad_norm: 0.81916416327742, iteration: 463468
loss: 1.002468228340149,grad_norm: 0.7587387921031868, iteration: 463469
loss: 1.0090504884719849,grad_norm: 0.8090361223378832, iteration: 463470
loss: 1.0448248386383057,grad_norm: 0.9999996129435744, iteration: 463471
loss: 1.0222198963165283,grad_norm: 0.7485472447237546, iteration: 463472
loss: 1.021968126296997,grad_norm: 0.8086125432392715, iteration: 463473
loss: 1.018725037574768,grad_norm: 0.621602430962324, iteration: 463474
loss: 1.04414701461792,grad_norm: 0.6862624270333707, iteration: 463475
loss: 1.0054484605789185,grad_norm: 0.9999993805908194, iteration: 463476
loss: 1.0217597484588623,grad_norm: 0.6731792114920276, iteration: 463477
loss: 1.0056909322738647,grad_norm: 0.7677937540486305, iteration: 463478
loss: 1.0352628231048584,grad_norm: 0.9999994230722508, iteration: 463479
loss: 0.989077627658844,grad_norm: 0.6905377486226064, iteration: 463480
loss: 0.9416223168373108,grad_norm: 0.7030282516609415, iteration: 463481
loss: 0.967559814453125,grad_norm: 0.769314757745237, iteration: 463482
loss: 0.9900491833686829,grad_norm: 0.7294699588784102, iteration: 463483
loss: 1.0141794681549072,grad_norm: 0.6919142053555293, iteration: 463484
loss: 1.0440751314163208,grad_norm: 0.9733288316854329, iteration: 463485
loss: 1.0157115459442139,grad_norm: 0.6911769811514838, iteration: 463486
loss: 1.0327435731887817,grad_norm: 0.6688257010271965, iteration: 463487
loss: 0.9859802722930908,grad_norm: 0.8323074749230854, iteration: 463488
loss: 0.9993301033973694,grad_norm: 0.9233276655090299, iteration: 463489
loss: 0.960476279258728,grad_norm: 0.9999990440325097, iteration: 463490
loss: 1.058121681213379,grad_norm: 0.9999993813993744, iteration: 463491
loss: 1.0140093564987183,grad_norm: 0.7395778902333383, iteration: 463492
loss: 1.0201783180236816,grad_norm: 0.6566215634075182, iteration: 463493
loss: 1.002840518951416,grad_norm: 0.658124884756827, iteration: 463494
loss: 1.0005278587341309,grad_norm: 0.9999999324131305, iteration: 463495
loss: 1.09574294090271,grad_norm: 0.9999993980174097, iteration: 463496
loss: 1.0152924060821533,grad_norm: 0.9999999109369595, iteration: 463497
loss: 0.9498452544212341,grad_norm: 0.7542039647241904, iteration: 463498
loss: 1.0158085823059082,grad_norm: 0.9999991053234478, iteration: 463499
loss: 1.017713189125061,grad_norm: 0.8813279054955663, iteration: 463500
loss: 1.0130082368850708,grad_norm: 0.6479597656381544, iteration: 463501
loss: 1.013656735420227,grad_norm: 0.7443285359314576, iteration: 463502
loss: 0.9698724150657654,grad_norm: 0.6955485704614298, iteration: 463503
loss: 1.0676884651184082,grad_norm: 0.999999962507324, iteration: 463504
loss: 0.954754650592804,grad_norm: 0.99640417098431, iteration: 463505
loss: 1.0139455795288086,grad_norm: 0.8773374503122857, iteration: 463506
loss: 1.0843586921691895,grad_norm: 0.814490807815592, iteration: 463507
loss: 0.9848117828369141,grad_norm: 0.8076730400956013, iteration: 463508
loss: 0.9923260807991028,grad_norm: 0.7992955707735617, iteration: 463509
loss: 0.9538490772247314,grad_norm: 0.7772305043189794, iteration: 463510
loss: 1.012434482574463,grad_norm: 0.8188461709765553, iteration: 463511
loss: 1.0201607942581177,grad_norm: 0.821752851542956, iteration: 463512
loss: 1.0174812078475952,grad_norm: 0.7394876996731986, iteration: 463513
loss: 0.9993516802787781,grad_norm: 0.8967138953625664, iteration: 463514
loss: 0.9551769495010376,grad_norm: 0.812533664300781, iteration: 463515
loss: 1.0055574178695679,grad_norm: 0.8401644600227156, iteration: 463516
loss: 0.9685431122779846,grad_norm: 0.763203060685951, iteration: 463517
loss: 0.9857854247093201,grad_norm: 0.7844478762422121, iteration: 463518
loss: 1.0539687871932983,grad_norm: 0.7969347618052262, iteration: 463519
loss: 1.000449299812317,grad_norm: 0.853072055365615, iteration: 463520
loss: 0.9751673936843872,grad_norm: 0.8962100669644139, iteration: 463521
loss: 0.9820419549942017,grad_norm: 0.7381837804883051, iteration: 463522
loss: 0.9762381911277771,grad_norm: 0.790745779426518, iteration: 463523
loss: 1.0192378759384155,grad_norm: 0.799512854753342, iteration: 463524
loss: 0.999854326248169,grad_norm: 0.8302855896663709, iteration: 463525
loss: 0.9929946660995483,grad_norm: 0.7817366144689365, iteration: 463526
loss: 1.0092819929122925,grad_norm: 0.9999998959238572, iteration: 463527
loss: 1.0142394304275513,grad_norm: 0.6661156635010835, iteration: 463528
loss: 0.9790371656417847,grad_norm: 0.6875860124306961, iteration: 463529
loss: 0.9801467061042786,grad_norm: 0.8798686714543857, iteration: 463530
loss: 1.1533801555633545,grad_norm: 0.9999994679507715, iteration: 463531
loss: 0.9883877038955688,grad_norm: 0.677101357806991, iteration: 463532
loss: 0.9883023500442505,grad_norm: 0.8220719448561133, iteration: 463533
loss: 1.0141987800598145,grad_norm: 0.7804984035172137, iteration: 463534
loss: 0.9463424682617188,grad_norm: 0.8509358167989526, iteration: 463535
loss: 1.0125067234039307,grad_norm: 0.9999998829125009, iteration: 463536
loss: 0.9479085803031921,grad_norm: 0.7821965415011546, iteration: 463537
loss: 1.0077213048934937,grad_norm: 0.6637048158019714, iteration: 463538
loss: 1.0026953220367432,grad_norm: 0.7887771546571319, iteration: 463539
loss: 0.9852113127708435,grad_norm: 0.8499102883970667, iteration: 463540
loss: 1.0238301753997803,grad_norm: 0.9743759839067898, iteration: 463541
loss: 0.9778897762298584,grad_norm: 0.7209070880153667, iteration: 463542
loss: 0.9943432807922363,grad_norm: 0.670044187231781, iteration: 463543
loss: 0.9948740601539612,grad_norm: 0.7865117913846903, iteration: 463544
loss: 0.9994831085205078,grad_norm: 0.7857790252882906, iteration: 463545
loss: 0.9835482239723206,grad_norm: 0.703104432588475, iteration: 463546
loss: 1.0751925706863403,grad_norm: 0.7283664332079195, iteration: 463547
loss: 0.9828035831451416,grad_norm: 0.8205218812478865, iteration: 463548
loss: 0.9889302253723145,grad_norm: 0.7355495687678916, iteration: 463549
loss: 1.0161876678466797,grad_norm: 0.7777262749551946, iteration: 463550
loss: 1.03708016872406,grad_norm: 0.773866606584543, iteration: 463551
loss: 0.9769662022590637,grad_norm: 0.7542503801869654, iteration: 463552
loss: 1.0238341093063354,grad_norm: 0.729312065768142, iteration: 463553
loss: 0.9526084065437317,grad_norm: 0.9999998974256585, iteration: 463554
loss: 1.0174623727798462,grad_norm: 0.840423500229083, iteration: 463555
loss: 1.0027755498886108,grad_norm: 0.962299792136614, iteration: 463556
loss: 0.9634025692939758,grad_norm: 0.7363275847701741, iteration: 463557
loss: 0.9779704809188843,grad_norm: 0.8213525225236601, iteration: 463558
loss: 0.9888784885406494,grad_norm: 0.6868964785294319, iteration: 463559
loss: 0.9907516241073608,grad_norm: 0.7641922097430571, iteration: 463560
loss: 1.0287830829620361,grad_norm: 0.9999996995336328, iteration: 463561
loss: 1.0273613929748535,grad_norm: 0.9999993522449638, iteration: 463562
loss: 1.0039408206939697,grad_norm: 0.8450938564867526, iteration: 463563
loss: 0.9902263283729553,grad_norm: 0.7639788329285337, iteration: 463564
loss: 0.9997718930244446,grad_norm: 0.6977074274297097, iteration: 463565
loss: 1.0146172046661377,grad_norm: 0.7574535378338519, iteration: 463566
loss: 1.0450814962387085,grad_norm: 0.7985486419465646, iteration: 463567
loss: 1.022168755531311,grad_norm: 0.7173939449580049, iteration: 463568
loss: 1.0558140277862549,grad_norm: 0.9689862533851109, iteration: 463569
loss: 0.9857158660888672,grad_norm: 0.7379732100763899, iteration: 463570
loss: 1.0976213216781616,grad_norm: 0.9999992314788563, iteration: 463571
loss: 1.0270484685897827,grad_norm: 0.7180252246325086, iteration: 463572
loss: 1.0483382940292358,grad_norm: 0.9999990751789377, iteration: 463573
loss: 1.0109132528305054,grad_norm: 0.8761329631135396, iteration: 463574
loss: 0.9971417784690857,grad_norm: 0.7770937795591831, iteration: 463575
loss: 0.9661781787872314,grad_norm: 0.7902539384467998, iteration: 463576
loss: 1.025168776512146,grad_norm: 0.7943909563467733, iteration: 463577
loss: 0.9727432727813721,grad_norm: 0.9699896364363728, iteration: 463578
loss: 1.0161012411117554,grad_norm: 0.5947747222630532, iteration: 463579
loss: 1.0278034210205078,grad_norm: 0.7119707727461234, iteration: 463580
loss: 0.9609917402267456,grad_norm: 0.6436588897021865, iteration: 463581
loss: 1.0005005598068237,grad_norm: 0.7185549599287988, iteration: 463582
loss: 0.9789686799049377,grad_norm: 0.6825073277108338, iteration: 463583
loss: 0.9859707355499268,grad_norm: 0.6871481636329417, iteration: 463584
loss: 1.013623595237732,grad_norm: 0.9375189973967178, iteration: 463585
loss: 1.0039163827896118,grad_norm: 0.7942972309281741, iteration: 463586
loss: 1.0358059406280518,grad_norm: 0.8666097796487179, iteration: 463587
loss: 1.0046075582504272,grad_norm: 0.8428221675409675, iteration: 463588
loss: 1.0220845937728882,grad_norm: 0.777114817100805, iteration: 463589
loss: 1.0027986764907837,grad_norm: 0.7522229289546357, iteration: 463590
loss: 1.0071101188659668,grad_norm: 0.6617183209830315, iteration: 463591
loss: 0.9894179701805115,grad_norm: 0.7587855646160125, iteration: 463592
loss: 1.0248711109161377,grad_norm: 0.7182103921838461, iteration: 463593
loss: 0.9988927841186523,grad_norm: 0.8028321606602511, iteration: 463594
loss: 0.9889122843742371,grad_norm: 0.7300285676804855, iteration: 463595
loss: 1.0355920791625977,grad_norm: 0.7620139330509672, iteration: 463596
loss: 1.024910807609558,grad_norm: 0.7154795291024982, iteration: 463597
loss: 1.003401279449463,grad_norm: 0.9999992649409223, iteration: 463598
loss: 0.9682973027229309,grad_norm: 0.7231099094326571, iteration: 463599
loss: 1.0153576135635376,grad_norm: 0.8525288938199652, iteration: 463600
loss: 1.018425703048706,grad_norm: 0.7755536498961001, iteration: 463601
loss: 0.9825855493545532,grad_norm: 0.6512781921146732, iteration: 463602
loss: 1.0078431367874146,grad_norm: 0.9425314840921881, iteration: 463603
loss: 1.004726529121399,grad_norm: 0.7477248156676289, iteration: 463604
loss: 1.02654230594635,grad_norm: 0.7978739843158053, iteration: 463605
loss: 1.0163079500198364,grad_norm: 0.6763988263267304, iteration: 463606
loss: 1.0017335414886475,grad_norm: 0.9999990914337584, iteration: 463607
loss: 1.0023517608642578,grad_norm: 0.9999990953361121, iteration: 463608
loss: 1.0318704843521118,grad_norm: 0.7376087006663984, iteration: 463609
loss: 0.9886458516120911,grad_norm: 0.9423141444948874, iteration: 463610
loss: 0.9938845038414001,grad_norm: 0.6854597158614908, iteration: 463611
loss: 0.9912482500076294,grad_norm: 0.782492210660723, iteration: 463612
loss: 1.0263217687606812,grad_norm: 0.8041545805918301, iteration: 463613
loss: 1.0011333227157593,grad_norm: 0.8253533279535096, iteration: 463614
loss: 0.9932096004486084,grad_norm: 0.7993145136750462, iteration: 463615
loss: 0.996715784072876,grad_norm: 0.7482288583806067, iteration: 463616
loss: 1.0022777318954468,grad_norm: 0.8057586328062644, iteration: 463617
loss: 0.9874783754348755,grad_norm: 0.9999989593760498, iteration: 463618
loss: 0.9708823561668396,grad_norm: 0.7537885259745383, iteration: 463619
loss: 0.9882261157035828,grad_norm: 0.7461620452322842, iteration: 463620
loss: 0.9987003207206726,grad_norm: 0.8478042765595587, iteration: 463621
loss: 0.9877710938453674,grad_norm: 0.9532102999484852, iteration: 463622
loss: 1.020684003829956,grad_norm: 0.7702615350989256, iteration: 463623
loss: 0.9846578240394592,grad_norm: 0.8293038095011828, iteration: 463624
loss: 1.0099486112594604,grad_norm: 0.7180934852395578, iteration: 463625
loss: 0.9931473135948181,grad_norm: 0.6159852617762166, iteration: 463626
loss: 1.024280309677124,grad_norm: 0.9999991918395654, iteration: 463627
loss: 1.0483232736587524,grad_norm: 0.8124191754729021, iteration: 463628
loss: 1.055227279663086,grad_norm: 0.9904315792283072, iteration: 463629
loss: 1.0215789079666138,grad_norm: 0.8746050133005847, iteration: 463630
loss: 1.0142385959625244,grad_norm: 0.5793579658503427, iteration: 463631
loss: 0.9533503651618958,grad_norm: 0.824245280378402, iteration: 463632
loss: 0.9786810874938965,grad_norm: 0.6487224747416472, iteration: 463633
loss: 1.0000332593917847,grad_norm: 0.7944958138290817, iteration: 463634
loss: 1.0066722631454468,grad_norm: 0.8518015853903463, iteration: 463635
loss: 1.0039072036743164,grad_norm: 0.7715409032026467, iteration: 463636
loss: 1.0093839168548584,grad_norm: 0.7778402729734567, iteration: 463637
loss: 1.0011895895004272,grad_norm: 0.8589809449936395, iteration: 463638
loss: 1.0325353145599365,grad_norm: 0.8171152317452268, iteration: 463639
loss: 0.9857478141784668,grad_norm: 0.7429362685383697, iteration: 463640
loss: 1.0312033891677856,grad_norm: 0.6092018383899548, iteration: 463641
loss: 1.0083484649658203,grad_norm: 0.7328735660714417, iteration: 463642
loss: 0.9962025284767151,grad_norm: 0.8568243498654842, iteration: 463643
loss: 1.0040987730026245,grad_norm: 0.9999990205090056, iteration: 463644
loss: 1.0036693811416626,grad_norm: 0.9460660066186065, iteration: 463645
loss: 0.9992504119873047,grad_norm: 0.8860587203653836, iteration: 463646
loss: 0.9962899684906006,grad_norm: 0.9999994936302076, iteration: 463647
loss: 0.9975649118423462,grad_norm: 0.7455625183169005, iteration: 463648
loss: 1.0150858163833618,grad_norm: 0.6351877624951073, iteration: 463649
loss: 1.0169198513031006,grad_norm: 0.999999298417487, iteration: 463650
loss: 0.9998745918273926,grad_norm: 0.7046206431379904, iteration: 463651
loss: 1.0148974657058716,grad_norm: 0.8072780793190844, iteration: 463652
loss: 0.9850112199783325,grad_norm: 0.8100855813507504, iteration: 463653
loss: 0.9722200036048889,grad_norm: 0.7480145805285157, iteration: 463654
loss: 1.0127973556518555,grad_norm: 0.7193416824969843, iteration: 463655
loss: 0.9985290765762329,grad_norm: 0.6809011291577247, iteration: 463656
loss: 1.00570547580719,grad_norm: 0.7477380590737436, iteration: 463657
loss: 1.003342628479004,grad_norm: 0.7403996115130527, iteration: 463658
loss: 1.0051660537719727,grad_norm: 0.9999990369516616, iteration: 463659
loss: 1.0014598369598389,grad_norm: 0.7051336131587255, iteration: 463660
loss: 0.9615651965141296,grad_norm: 0.7845791630210492, iteration: 463661
loss: 0.9709055423736572,grad_norm: 0.7066021225882542, iteration: 463662
loss: 1.0087306499481201,grad_norm: 0.8059618941677922, iteration: 463663
loss: 1.0022165775299072,grad_norm: 0.8059378004114616, iteration: 463664
loss: 1.007216215133667,grad_norm: 0.7718609272660211, iteration: 463665
loss: 0.9905061721801758,grad_norm: 0.6188909886098124, iteration: 463666
loss: 0.9939014315605164,grad_norm: 0.7589778441512391, iteration: 463667
loss: 0.9870050549507141,grad_norm: 0.7306131295963156, iteration: 463668
loss: 1.014533281326294,grad_norm: 0.8326272823948859, iteration: 463669
loss: 1.0005334615707397,grad_norm: 0.9999994212919575, iteration: 463670
loss: 0.9570735096931458,grad_norm: 0.7712501946817062, iteration: 463671
loss: 0.9950842261314392,grad_norm: 0.7219526395416881, iteration: 463672
loss: 0.9688493013381958,grad_norm: 0.704568371497104, iteration: 463673
loss: 1.0394691228866577,grad_norm: 0.781263484151471, iteration: 463674
loss: 0.9867761135101318,grad_norm: 0.8350916792122047, iteration: 463675
loss: 0.9829668402671814,grad_norm: 0.730438575426243, iteration: 463676
loss: 1.008845329284668,grad_norm: 0.8987598478908048, iteration: 463677
loss: 0.9889057874679565,grad_norm: 0.5965253066504295, iteration: 463678
loss: 1.0011727809906006,grad_norm: 0.702943578589982, iteration: 463679
loss: 1.005690574645996,grad_norm: 0.7135858343014031, iteration: 463680
loss: 1.0016443729400635,grad_norm: 0.7304023121769304, iteration: 463681
loss: 0.9734987616539001,grad_norm: 0.6064860115256024, iteration: 463682
loss: 1.0093050003051758,grad_norm: 0.7490356458290913, iteration: 463683
loss: 1.0247671604156494,grad_norm: 0.9999990750425951, iteration: 463684
loss: 1.0091356039047241,grad_norm: 0.6617698093693856, iteration: 463685
loss: 1.0155439376831055,grad_norm: 0.7657718777714242, iteration: 463686
loss: 1.0632075071334839,grad_norm: 0.9999992659864166, iteration: 463687
loss: 0.9921063184738159,grad_norm: 0.9999994505486478, iteration: 463688
loss: 1.0082427263259888,grad_norm: 0.7972196763489737, iteration: 463689
loss: 1.0181111097335815,grad_norm: 0.7298719177931212, iteration: 463690
loss: 1.0381951332092285,grad_norm: 0.684352163955612, iteration: 463691
loss: 0.9975392818450928,grad_norm: 0.7349706957403137, iteration: 463692
loss: 1.0151818990707397,grad_norm: 0.7024931610210834, iteration: 463693
loss: 1.0013644695281982,grad_norm: 0.7485751136493151, iteration: 463694
loss: 0.9752944111824036,grad_norm: 0.6160776819697852, iteration: 463695
loss: 1.0096204280853271,grad_norm: 0.7146633043022936, iteration: 463696
loss: 0.9909090995788574,grad_norm: 0.6857929916996217, iteration: 463697
loss: 1.0206619501113892,grad_norm: 0.7849011711532451, iteration: 463698
loss: 1.0079630613327026,grad_norm: 0.7406714571435876, iteration: 463699
loss: 1.0171411037445068,grad_norm: 0.6872796125329321, iteration: 463700
loss: 0.9990211129188538,grad_norm: 0.655091062016887, iteration: 463701
loss: 1.0070990324020386,grad_norm: 0.9692653866505084, iteration: 463702
loss: 0.9800955653190613,grad_norm: 0.9999992699747473, iteration: 463703
loss: 1.0218297243118286,grad_norm: 0.6789928724770388, iteration: 463704
loss: 0.9781073927879333,grad_norm: 0.9999993802427207, iteration: 463705
loss: 0.9765419960021973,grad_norm: 0.8590290508860481, iteration: 463706
loss: 0.9973636269569397,grad_norm: 0.6603153421185644, iteration: 463707
loss: 1.013024091720581,grad_norm: 0.6492164665939408, iteration: 463708
loss: 0.9914997816085815,grad_norm: 0.8276596567306975, iteration: 463709
loss: 1.0238945484161377,grad_norm: 0.8089111618151884, iteration: 463710
loss: 1.0280367136001587,grad_norm: 0.6796037332734777, iteration: 463711
loss: 1.0004751682281494,grad_norm: 0.7556481516091974, iteration: 463712
loss: 1.0007150173187256,grad_norm: 0.8027967562497346, iteration: 463713
loss: 0.9923476576805115,grad_norm: 0.7313571987643456, iteration: 463714
loss: 0.9932151436805725,grad_norm: 0.8871667860922628, iteration: 463715
loss: 1.0083394050598145,grad_norm: 0.6527154063682176, iteration: 463716
loss: 1.0093742609024048,grad_norm: 0.6430580697159335, iteration: 463717
loss: 0.983156144618988,grad_norm: 0.7659344952690422, iteration: 463718
loss: 0.9775829911231995,grad_norm: 0.7659839227768914, iteration: 463719
loss: 0.9373958110809326,grad_norm: 0.762769119162157, iteration: 463720
loss: 1.0275181531906128,grad_norm: 0.6715164926505497, iteration: 463721
loss: 1.0040510892868042,grad_norm: 0.7604865757559259, iteration: 463722
loss: 1.0239160060882568,grad_norm: 0.909081809247649, iteration: 463723
loss: 1.0141627788543701,grad_norm: 0.6943403406428844, iteration: 463724
loss: 1.0616239309310913,grad_norm: 0.9999999552032016, iteration: 463725
loss: 1.0100576877593994,grad_norm: 0.704609356771927, iteration: 463726
loss: 0.9964360594749451,grad_norm: 0.887975846528807, iteration: 463727
loss: 0.9965341687202454,grad_norm: 0.8144471121075101, iteration: 463728
loss: 0.9868103861808777,grad_norm: 0.9346998742422915, iteration: 463729
loss: 1.0274263620376587,grad_norm: 0.9798747167621089, iteration: 463730
loss: 1.0190038681030273,grad_norm: 0.7729584917796324, iteration: 463731
loss: 1.0327646732330322,grad_norm: 0.8006208450690834, iteration: 463732
loss: 0.9905449748039246,grad_norm: 0.9352033148809669, iteration: 463733
loss: 1.0252163410186768,grad_norm: 0.7106801402065841, iteration: 463734
loss: 1.0014851093292236,grad_norm: 0.7308883359198438, iteration: 463735
loss: 1.023756504058838,grad_norm: 0.8629515793981672, iteration: 463736
loss: 0.9965343475341797,grad_norm: 0.7241853540964703, iteration: 463737
loss: 0.963093101978302,grad_norm: 0.9381341246496953, iteration: 463738
loss: 0.972845196723938,grad_norm: 0.8106399095323885, iteration: 463739
loss: 1.0382964611053467,grad_norm: 0.9923337596258197, iteration: 463740
loss: 0.9909437894821167,grad_norm: 0.6680181162116492, iteration: 463741
loss: 0.9720181822776794,grad_norm: 0.7607424022645661, iteration: 463742
loss: 1.032996416091919,grad_norm: 0.940073348229817, iteration: 463743
loss: 0.9864657521247864,grad_norm: 0.7358579139965381, iteration: 463744
loss: 1.0212198495864868,grad_norm: 0.8246450007267945, iteration: 463745
loss: 1.0012551546096802,grad_norm: 0.8130529664033872, iteration: 463746
loss: 0.9772652387619019,grad_norm: 0.7715453468697779, iteration: 463747
loss: 0.9992604851722717,grad_norm: 0.760363634852245, iteration: 463748
loss: 0.9688628315925598,grad_norm: 0.7020682146480935, iteration: 463749
loss: 0.9940655827522278,grad_norm: 0.98134081834793, iteration: 463750
loss: 0.9977006912231445,grad_norm: 0.6629838195769144, iteration: 463751
loss: 0.9925380945205688,grad_norm: 0.8616877535941082, iteration: 463752
loss: 0.9787964224815369,grad_norm: 0.7792977157026907, iteration: 463753
loss: 0.9966741800308228,grad_norm: 0.7423578324236011, iteration: 463754
loss: 0.9517695903778076,grad_norm: 0.873220671111607, iteration: 463755
loss: 0.9761075973510742,grad_norm: 0.7453701030244293, iteration: 463756
loss: 0.9565604329109192,grad_norm: 0.7163253444265915, iteration: 463757
loss: 1.0044625997543335,grad_norm: 0.7959764889535772, iteration: 463758
loss: 1.0906401872634888,grad_norm: 0.9999991573454013, iteration: 463759
loss: 1.0151067972183228,grad_norm: 0.6894851369968202, iteration: 463760
loss: 1.0335488319396973,grad_norm: 0.6502724282702332, iteration: 463761
loss: 0.9582891464233398,grad_norm: 0.6569554985572206, iteration: 463762
loss: 1.0302807092666626,grad_norm: 0.9077768867854332, iteration: 463763
loss: 0.9880951046943665,grad_norm: 0.9657633318468616, iteration: 463764
loss: 0.9905071258544922,grad_norm: 0.6441527334541862, iteration: 463765
loss: 0.97769695520401,grad_norm: 0.6902611167916285, iteration: 463766
loss: 1.001287579536438,grad_norm: 0.7063639342651602, iteration: 463767
loss: 1.0112518072128296,grad_norm: 0.9951113307010824, iteration: 463768
loss: 0.9800437092781067,grad_norm: 0.6807697151682675, iteration: 463769
loss: 0.970695436000824,grad_norm: 0.7925338179780903, iteration: 463770
loss: 1.0199655294418335,grad_norm: 0.7534882684504499, iteration: 463771
loss: 0.9605523943901062,grad_norm: 0.7550503384869849, iteration: 463772
loss: 0.9960861802101135,grad_norm: 0.7625179638467389, iteration: 463773
loss: 1.0031793117523193,grad_norm: 0.742488164099909, iteration: 463774
loss: 1.105440378189087,grad_norm: 0.9999990206962767, iteration: 463775
loss: 1.0260062217712402,grad_norm: 0.6747308048126784, iteration: 463776
loss: 1.0122132301330566,grad_norm: 0.7669384629430408, iteration: 463777
loss: 0.977703332901001,grad_norm: 0.7100275331114319, iteration: 463778
loss: 0.9885805249214172,grad_norm: 0.8608931814687243, iteration: 463779
loss: 1.0060547590255737,grad_norm: 0.745328042123613, iteration: 463780
loss: 0.9918505549430847,grad_norm: 0.8527784042780533, iteration: 463781
loss: 1.001930832862854,grad_norm: 0.6838743053871574, iteration: 463782
loss: 0.9880955219268799,grad_norm: 0.6855764668867134, iteration: 463783
loss: 1.0125341415405273,grad_norm: 0.7913988487196733, iteration: 463784
loss: 0.9949438571929932,grad_norm: 0.7249619206433052, iteration: 463785
loss: 0.9807481169700623,grad_norm: 0.8643766720032651, iteration: 463786
loss: 1.0089672803878784,grad_norm: 0.7466679103101661, iteration: 463787
loss: 1.0107311010360718,grad_norm: 0.8079236132591675, iteration: 463788
loss: 1.0437356233596802,grad_norm: 0.7393654373843349, iteration: 463789
loss: 1.0300548076629639,grad_norm: 0.8368287347066488, iteration: 463790
loss: 0.9947992563247681,grad_norm: 0.6657824078722483, iteration: 463791
loss: 0.9844651818275452,grad_norm: 0.6855168115665116, iteration: 463792
loss: 0.990930438041687,grad_norm: 0.7193981738140317, iteration: 463793
loss: 0.9798851013183594,grad_norm: 0.8480393138442457, iteration: 463794
loss: 0.9893303513526917,grad_norm: 0.6476009016751868, iteration: 463795
loss: 0.9800387024879456,grad_norm: 0.6656735345439304, iteration: 463796
loss: 1.0034924745559692,grad_norm: 0.7919952541469706, iteration: 463797
loss: 0.9721638560295105,grad_norm: 0.8261200513016409, iteration: 463798
loss: 1.0405012369155884,grad_norm: 0.7541716908473761, iteration: 463799
loss: 0.9782853126525879,grad_norm: 0.7857146037866924, iteration: 463800
loss: 1.0098756551742554,grad_norm: 0.581327022451555, iteration: 463801
loss: 0.957916796207428,grad_norm: 0.8750235696574616, iteration: 463802
loss: 0.9687562584877014,grad_norm: 0.6889597908769878, iteration: 463803
loss: 1.006171703338623,grad_norm: 0.8104932699634596, iteration: 463804
loss: 1.0527780055999756,grad_norm: 0.9999996373723058, iteration: 463805
loss: 1.0003268718719482,grad_norm: 0.8121330465319114, iteration: 463806
loss: 1.0073330402374268,grad_norm: 0.6907158760490647, iteration: 463807
loss: 0.9958817958831787,grad_norm: 0.78613337966182, iteration: 463808
loss: 1.0103354454040527,grad_norm: 0.799663542357555, iteration: 463809
loss: 0.9747228026390076,grad_norm: 0.9999994779597327, iteration: 463810
loss: 1.0333952903747559,grad_norm: 0.9999992194554974, iteration: 463811
loss: 0.9848795533180237,grad_norm: 0.7105071276460273, iteration: 463812
loss: 1.019851565361023,grad_norm: 0.7448546519699268, iteration: 463813
loss: 1.0316696166992188,grad_norm: 0.7537094053338289, iteration: 463814
loss: 1.0016756057739258,grad_norm: 0.8026960365385438, iteration: 463815
loss: 1.021483302116394,grad_norm: 0.8659026777902092, iteration: 463816
loss: 1.01362144947052,grad_norm: 0.7147178940172352, iteration: 463817
loss: 1.0094804763793945,grad_norm: 0.7749510805028645, iteration: 463818
loss: 0.9810037016868591,grad_norm: 0.7750143668551728, iteration: 463819
loss: 1.0454767942428589,grad_norm: 0.6951567751438527, iteration: 463820
loss: 1.0130040645599365,grad_norm: 0.9999996916397708, iteration: 463821
loss: 1.0041099786758423,grad_norm: 0.7617540805182179, iteration: 463822
loss: 1.0282593965530396,grad_norm: 0.6907748702778175, iteration: 463823
loss: 0.9780519008636475,grad_norm: 0.8597556955740199, iteration: 463824
loss: 1.0009994506835938,grad_norm: 0.6370880031446449, iteration: 463825
loss: 0.9966992735862732,grad_norm: 0.7646084630852603, iteration: 463826
loss: 0.9961300492286682,grad_norm: 0.6927105225838764, iteration: 463827
loss: 0.9963204264640808,grad_norm: 0.7546417576654881, iteration: 463828
loss: 1.0141383409500122,grad_norm: 0.9558853379554602, iteration: 463829
loss: 1.016561508178711,grad_norm: 0.7952402673815052, iteration: 463830
loss: 0.976741373538971,grad_norm: 0.7023955014638287, iteration: 463831
loss: 0.9749737977981567,grad_norm: 0.8060354394328217, iteration: 463832
loss: 0.9999017119407654,grad_norm: 0.914476921969385, iteration: 463833
loss: 1.031187891960144,grad_norm: 0.8114784984517698, iteration: 463834
loss: 0.9881309866905212,grad_norm: 0.7464646690469833, iteration: 463835
loss: 0.990755021572113,grad_norm: 0.7248331598733665, iteration: 463836
loss: 0.9997559785842896,grad_norm: 0.7896518909014631, iteration: 463837
loss: 1.0030286312103271,grad_norm: 0.7132296133676341, iteration: 463838
loss: 0.9834911823272705,grad_norm: 0.7824113589871362, iteration: 463839
loss: 1.0045263767242432,grad_norm: 0.7815804125261574, iteration: 463840
loss: 1.0390520095825195,grad_norm: 0.6940663474301991, iteration: 463841
loss: 0.9935043454170227,grad_norm: 0.8278913149110099, iteration: 463842
loss: 0.9910050630569458,grad_norm: 0.7537879505292711, iteration: 463843
loss: 0.9988599419593811,grad_norm: 0.6451794591785674, iteration: 463844
loss: 1.0184166431427002,grad_norm: 0.8677936482562889, iteration: 463845
loss: 1.1063331365585327,grad_norm: 0.7463723112878876, iteration: 463846
loss: 0.9914256930351257,grad_norm: 0.8343461114613088, iteration: 463847
loss: 1.18868088722229,grad_norm: 1.0000000089583148, iteration: 463848
loss: 1.0023881196975708,grad_norm: 0.7704721030952296, iteration: 463849
loss: 0.9954000115394592,grad_norm: 0.8319125643582661, iteration: 463850
loss: 0.9724944233894348,grad_norm: 0.7751225001778035, iteration: 463851
loss: 1.0532978773117065,grad_norm: 0.6148993857585144, iteration: 463852
loss: 0.9949936866760254,grad_norm: 0.7664021505665924, iteration: 463853
loss: 1.0266263484954834,grad_norm: 0.8649895654314886, iteration: 463854
loss: 0.9733311533927917,grad_norm: 0.7653268021509143, iteration: 463855
loss: 1.0180160999298096,grad_norm: 0.6980501804324666, iteration: 463856
loss: 0.9989560842514038,grad_norm: 0.6358540736261045, iteration: 463857
loss: 1.0003597736358643,grad_norm: 0.6961119639482711, iteration: 463858
loss: 0.9829097986221313,grad_norm: 0.7547562265925659, iteration: 463859
loss: 0.9615333676338196,grad_norm: 0.746291005828548, iteration: 463860
loss: 1.0232821702957153,grad_norm: 0.9098203863353244, iteration: 463861
loss: 0.9796345829963684,grad_norm: 0.8331432356532207, iteration: 463862
loss: 0.9777012467384338,grad_norm: 0.7020835040259193, iteration: 463863
loss: 0.9589149355888367,grad_norm: 0.666000887539351, iteration: 463864
loss: 1.0317566394805908,grad_norm: 0.7716797616494191, iteration: 463865
loss: 0.9494619965553284,grad_norm: 0.7366204072693743, iteration: 463866
loss: 0.9750906825065613,grad_norm: 0.5753118520987405, iteration: 463867
loss: 0.9856607913970947,grad_norm: 0.6669166197867554, iteration: 463868
loss: 0.9805123805999756,grad_norm: 0.7441226969281365, iteration: 463869
loss: 0.954315185546875,grad_norm: 0.9193541690630306, iteration: 463870
loss: 1.0157215595245361,grad_norm: 0.676645292223812, iteration: 463871
loss: 0.9918659925460815,grad_norm: 0.6948588439148181, iteration: 463872
loss: 0.9644804000854492,grad_norm: 0.8896404281531096, iteration: 463873
loss: 1.0304971933364868,grad_norm: 0.7014825020028091, iteration: 463874
loss: 0.9973459243774414,grad_norm: 0.982546246699884, iteration: 463875
loss: 0.992333173751831,grad_norm: 0.7164212716148965, iteration: 463876
loss: 1.0031991004943848,grad_norm: 0.7007679752772146, iteration: 463877
loss: 1.0017635822296143,grad_norm: 0.9079531950536119, iteration: 463878
loss: 1.000905156135559,grad_norm: 0.7184061364118343, iteration: 463879
loss: 0.9753504395484924,grad_norm: 0.9999991067344849, iteration: 463880
loss: 0.9863309860229492,grad_norm: 0.8573419029330112, iteration: 463881
loss: 0.9919321537017822,grad_norm: 0.6728225633322017, iteration: 463882
loss: 1.0123296976089478,grad_norm: 0.8122044512044096, iteration: 463883
loss: 1.0082521438598633,grad_norm: 0.6607773167621166, iteration: 463884
loss: 0.992893397808075,grad_norm: 0.7468589820992673, iteration: 463885
loss: 1.016603708267212,grad_norm: 0.8980378271125665, iteration: 463886
loss: 1.0059080123901367,grad_norm: 0.7932466286269096, iteration: 463887
loss: 0.994566023349762,grad_norm: 0.9283016817666268, iteration: 463888
loss: 1.0034807920455933,grad_norm: 0.9196256334090501, iteration: 463889
loss: 1.0192140340805054,grad_norm: 0.9041397890684341, iteration: 463890
loss: 0.9904962182044983,grad_norm: 0.6335286582224116, iteration: 463891
loss: 1.016128420829773,grad_norm: 0.8788387348110281, iteration: 463892
loss: 0.9668995141983032,grad_norm: 0.7380452580760543, iteration: 463893
loss: 1.2890315055847168,grad_norm: 0.9999993101363075, iteration: 463894
loss: 1.001700758934021,grad_norm: 0.8324545856545398, iteration: 463895
loss: 1.0002564191818237,grad_norm: 0.9999998234077633, iteration: 463896
loss: 0.9665389060974121,grad_norm: 0.7238827219458187, iteration: 463897
loss: 0.9773691296577454,grad_norm: 0.7733331004217748, iteration: 463898
loss: 0.9691510796546936,grad_norm: 0.7961695311865044, iteration: 463899
loss: 0.9834901690483093,grad_norm: 0.7895636461754099, iteration: 463900
loss: 1.016048789024353,grad_norm: 0.9999990083753906, iteration: 463901
loss: 1.0507067441940308,grad_norm: 1.0000000084567107, iteration: 463902
loss: 1.0891749858856201,grad_norm: 0.8527095488169154, iteration: 463903
loss: 1.0098172426223755,grad_norm: 0.6079564477168812, iteration: 463904
loss: 1.0122780799865723,grad_norm: 0.7988294304291942, iteration: 463905
loss: 1.0078953504562378,grad_norm: 0.8246319652251044, iteration: 463906
loss: 1.0043470859527588,grad_norm: 0.7454228290274039, iteration: 463907
loss: 1.0198580026626587,grad_norm: 0.7792402839841707, iteration: 463908
loss: 1.005218744277954,grad_norm: 0.9999992368684449, iteration: 463909
loss: 1.0628373622894287,grad_norm: 0.7602426343014876, iteration: 463910
loss: 1.0126601457595825,grad_norm: 0.729280404080232, iteration: 463911
loss: 1.0144562721252441,grad_norm: 0.9999994854096746, iteration: 463912
loss: 0.9922330379486084,grad_norm: 0.6259025391910615, iteration: 463913
loss: 0.9871736168861389,grad_norm: 0.8134089322092424, iteration: 463914
loss: 0.969519853591919,grad_norm: 0.8027093868008611, iteration: 463915
loss: 1.0721126794815063,grad_norm: 0.7781300548585589, iteration: 463916
loss: 0.9942465424537659,grad_norm: 0.999999344329971, iteration: 463917
loss: 1.0689473152160645,grad_norm: 0.999999794156104, iteration: 463918
loss: 0.9577399492263794,grad_norm: 0.712717233620791, iteration: 463919
loss: 1.023217797279358,grad_norm: 0.6817401297789508, iteration: 463920
loss: 0.9807069301605225,grad_norm: 0.7809976648532811, iteration: 463921
loss: 1.006492257118225,grad_norm: 0.781880291306308, iteration: 463922
loss: 1.0131419897079468,grad_norm: 0.6752888763274552, iteration: 463923
loss: 1.011962652206421,grad_norm: 0.6815884638502675, iteration: 463924
loss: 1.0004793405532837,grad_norm: 0.7804366415489641, iteration: 463925
loss: 0.9745641350746155,grad_norm: 0.7801942888473512, iteration: 463926
loss: 1.0264265537261963,grad_norm: 0.6734768473963226, iteration: 463927
loss: 1.0202711820602417,grad_norm: 0.7279019205780267, iteration: 463928
loss: 0.9848692417144775,grad_norm: 0.6950512157625167, iteration: 463929
loss: 0.9848561882972717,grad_norm: 0.7318976800491975, iteration: 463930
loss: 1.0615911483764648,grad_norm: 0.9999990562403729, iteration: 463931
loss: 1.0245051383972168,grad_norm: 0.9433743366719759, iteration: 463932
loss: 0.9911311864852905,grad_norm: 0.6988881011526684, iteration: 463933
loss: 0.9937800765037537,grad_norm: 0.7611587745997604, iteration: 463934
loss: 1.003745436668396,grad_norm: 0.664573973058398, iteration: 463935
loss: 0.9768185615539551,grad_norm: 0.775984586435938, iteration: 463936
loss: 0.9576816558837891,grad_norm: 0.9999991786186716, iteration: 463937
loss: 0.9571018218994141,grad_norm: 0.8264374161376079, iteration: 463938
loss: 1.0041698217391968,grad_norm: 0.7580875096815337, iteration: 463939
loss: 0.9883474111557007,grad_norm: 0.8364965320666415, iteration: 463940
loss: 0.9931846261024475,grad_norm: 0.7411505979512085, iteration: 463941
loss: 0.9934549331665039,grad_norm: 0.9789078519794376, iteration: 463942
loss: 1.047188401222229,grad_norm: 0.8718801373968699, iteration: 463943
loss: 0.9910027980804443,grad_norm: 0.67772524803727, iteration: 463944
loss: 1.0232174396514893,grad_norm: 0.9999996466110389, iteration: 463945
loss: 0.9724203944206238,grad_norm: 0.6936899672116522, iteration: 463946
loss: 1.0136990547180176,grad_norm: 0.7399886863479543, iteration: 463947
loss: 1.001976490020752,grad_norm: 0.6259417929710812, iteration: 463948
loss: 0.9610564708709717,grad_norm: 0.7596962252618414, iteration: 463949
loss: 0.9994868040084839,grad_norm: 0.7240466055091982, iteration: 463950
loss: 1.0366580486297607,grad_norm: 0.9025926193009263, iteration: 463951
loss: 1.0251492261886597,grad_norm: 0.756119401513678, iteration: 463952
loss: 0.9841998815536499,grad_norm: 0.773780004132809, iteration: 463953
loss: 0.9723067879676819,grad_norm: 0.8163639963460958, iteration: 463954
loss: 1.0009452104568481,grad_norm: 0.9999991265514706, iteration: 463955
loss: 1.0262757539749146,grad_norm: 0.715485446672008, iteration: 463956
loss: 1.0118372440338135,grad_norm: 0.7064536399960136, iteration: 463957
loss: 0.9872903823852539,grad_norm: 0.8795432843296301, iteration: 463958
loss: 0.9680721163749695,grad_norm: 0.7654659172642206, iteration: 463959
loss: 0.9896659255027771,grad_norm: 0.7441316895133514, iteration: 463960
loss: 0.992335319519043,grad_norm: 0.7381344994866305, iteration: 463961
loss: 1.0149627923965454,grad_norm: 0.7830459916888074, iteration: 463962
loss: 0.9642775058746338,grad_norm: 0.85490082093666, iteration: 463963
loss: 1.0234673023223877,grad_norm: 0.8033495886553996, iteration: 463964
loss: 1.0043538808822632,grad_norm: 0.6337455310765925, iteration: 463965
loss: 0.9427842497825623,grad_norm: 0.8341845814894288, iteration: 463966
loss: 1.0092518329620361,grad_norm: 0.9118018671466828, iteration: 463967
loss: 0.9795413017272949,grad_norm: 0.8672130814875681, iteration: 463968
loss: 0.9956436157226562,grad_norm: 0.7316616137650821, iteration: 463969
loss: 0.9805368185043335,grad_norm: 0.8313028485793154, iteration: 463970
loss: 1.0234315395355225,grad_norm: 0.688666380191958, iteration: 463971
loss: 1.0022571086883545,grad_norm: 0.8331096626003988, iteration: 463972
loss: 0.9720601439476013,grad_norm: 0.8262823286030817, iteration: 463973
loss: 0.9645939469337463,grad_norm: 0.6696024952942093, iteration: 463974
loss: 1.0296252965927124,grad_norm: 0.9021408941021762, iteration: 463975
loss: 0.9534268975257874,grad_norm: 0.6276099769030228, iteration: 463976
loss: 0.9914755821228027,grad_norm: 0.7006038928795754, iteration: 463977
loss: 0.9827393889427185,grad_norm: 0.7661693083222388, iteration: 463978
loss: 1.0202661752700806,grad_norm: 0.8440749616610564, iteration: 463979
loss: 1.0003387928009033,grad_norm: 0.9219539074491229, iteration: 463980
loss: 0.9911253452301025,grad_norm: 0.7651498076658407, iteration: 463981
loss: 0.9970855712890625,grad_norm: 0.9075902145446192, iteration: 463982
loss: 0.970166802406311,grad_norm: 0.6359709185031914, iteration: 463983
loss: 1.0209057331085205,grad_norm: 0.8630695405907689, iteration: 463984
loss: 1.0350192785263062,grad_norm: 0.7684443463300116, iteration: 463985
loss: 0.9942424297332764,grad_norm: 0.79669396893231, iteration: 463986
loss: 1.0065054893493652,grad_norm: 0.9999997664485084, iteration: 463987
loss: 1.0772616863250732,grad_norm: 0.7740574548962675, iteration: 463988
loss: 0.9782435894012451,grad_norm: 0.8196945514557604, iteration: 463989
loss: 0.9984499216079712,grad_norm: 0.7562118228088877, iteration: 463990
loss: 0.9973901510238647,grad_norm: 0.730191719011937, iteration: 463991
loss: 1.013295292854309,grad_norm: 0.7484914772083815, iteration: 463992
loss: 1.0175256729125977,grad_norm: 0.8329869494372794, iteration: 463993
loss: 1.0104849338531494,grad_norm: 0.7671348525121117, iteration: 463994
loss: 0.9818354845046997,grad_norm: 0.711797584272938, iteration: 463995
loss: 1.017776608467102,grad_norm: 0.687278295571635, iteration: 463996
loss: 1.0418261289596558,grad_norm: 0.707139718688472, iteration: 463997
loss: 0.9602782726287842,grad_norm: 0.7925005050010341, iteration: 463998
loss: 1.006502628326416,grad_norm: 0.7342341757137161, iteration: 463999
loss: 0.996039628982544,grad_norm: 0.7264103851592768, iteration: 464000
loss: 1.0170621871948242,grad_norm: 0.7810036161967341, iteration: 464001
loss: 1.0542923212051392,grad_norm: 0.7155868365507798, iteration: 464002
loss: 0.9632484912872314,grad_norm: 0.7597033583092858, iteration: 464003
loss: 0.9726220965385437,grad_norm: 0.7733072204757396, iteration: 464004
loss: 1.008537769317627,grad_norm: 0.9999994522462653, iteration: 464005
loss: 0.9851034283638,grad_norm: 0.6756776505764464, iteration: 464006
loss: 1.0142621994018555,grad_norm: 0.9513614751649672, iteration: 464007
loss: 0.9520032405853271,grad_norm: 0.7717834036130496, iteration: 464008
loss: 0.9992062449455261,grad_norm: 0.8601136746207058, iteration: 464009
loss: 1.0433100461959839,grad_norm: 0.7944401110182849, iteration: 464010
loss: 1.0019348859786987,grad_norm: 0.7652064293930683, iteration: 464011
loss: 0.9912604689598083,grad_norm: 0.7506697077955606, iteration: 464012
loss: 1.0429456233978271,grad_norm: 0.8298510809057926, iteration: 464013
loss: 1.0131925344467163,grad_norm: 0.8881784812621358, iteration: 464014
loss: 1.01663076877594,grad_norm: 0.8713381720641163, iteration: 464015
loss: 1.002648949623108,grad_norm: 0.7604713201446117, iteration: 464016
loss: 0.9993873238563538,grad_norm: 0.8226831248894848, iteration: 464017
loss: 0.9813779592514038,grad_norm: 0.607307863616825, iteration: 464018
loss: 1.0872056484222412,grad_norm: 0.9999992282617552, iteration: 464019
loss: 0.9749815464019775,grad_norm: 0.7013711763955287, iteration: 464020
loss: 1.0245565176010132,grad_norm: 0.7961222902463909, iteration: 464021
loss: 0.9816163778305054,grad_norm: 0.7332374299250446, iteration: 464022
loss: 1.0093756914138794,grad_norm: 0.99999998857659, iteration: 464023
loss: 1.0120919942855835,grad_norm: 0.8838200461440884, iteration: 464024
loss: 1.014344573020935,grad_norm: 0.6539828351825958, iteration: 464025
loss: 0.9952439665794373,grad_norm: 0.7753127727563593, iteration: 464026
loss: 1.0038774013519287,grad_norm: 0.7370210779420274, iteration: 464027
loss: 0.9983747601509094,grad_norm: 0.5776619321389951, iteration: 464028
loss: 0.9355610609054565,grad_norm: 0.772000231196536, iteration: 464029
loss: 0.9858656525611877,grad_norm: 0.7720089769053534, iteration: 464030
loss: 0.9953290820121765,grad_norm: 0.8414291159946047, iteration: 464031
loss: 1.0177254676818848,grad_norm: 0.8839946159209963, iteration: 464032
loss: 0.9919933080673218,grad_norm: 0.8269257445111098, iteration: 464033
loss: 0.9492269158363342,grad_norm: 0.6259560200760332, iteration: 464034
loss: 1.0114902257919312,grad_norm: 0.7708429141176946, iteration: 464035
loss: 0.9783886671066284,grad_norm: 0.773163800803063, iteration: 464036
loss: 1.0048739910125732,grad_norm: 0.7103749972343443, iteration: 464037
loss: 0.9563964009284973,grad_norm: 0.6833636669894712, iteration: 464038
loss: 0.9754350781440735,grad_norm: 0.6452228541116829, iteration: 464039
loss: 0.9956812858581543,grad_norm: 0.8167648746800833, iteration: 464040
loss: 1.040477991104126,grad_norm: 0.786165977067618, iteration: 464041
loss: 1.0265740156173706,grad_norm: 0.6997420421381366, iteration: 464042
loss: 1.012295126914978,grad_norm: 0.777000637302595, iteration: 464043
loss: 0.9991005659103394,grad_norm: 0.6951013015464645, iteration: 464044
loss: 1.0147498846054077,grad_norm: 0.8000018097179403, iteration: 464045
loss: 0.9488126635551453,grad_norm: 0.6888604943646865, iteration: 464046
loss: 0.9827211499214172,grad_norm: 0.8018615333800998, iteration: 464047
loss: 1.0051212310791016,grad_norm: 0.8251790720195685, iteration: 464048
loss: 1.0659129619598389,grad_norm: 0.7779225467926462, iteration: 464049
loss: 0.9891769886016846,grad_norm: 0.8538824300862417, iteration: 464050
loss: 1.0044574737548828,grad_norm: 0.8108915830090024, iteration: 464051
loss: 1.0222082138061523,grad_norm: 0.77872765492549, iteration: 464052
loss: 0.9840131402015686,grad_norm: 0.6029543722748617, iteration: 464053
loss: 1.0136122703552246,grad_norm: 0.7230337588052997, iteration: 464054
